From 0d446e6103c7d746f6076e3191d89ee3392c6017 Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Mon, 12 Jun 2023 10:28:43 +0200
Subject: [PATCH 001/583] Develop is now preparing 1.7.0.

---
 CMakeLists.txt                                                 | 3 ++-
 examples/adaptiveprecision-blockjacobi/CMakeLists.txt          | 2 +-
 examples/build-setup.sh                                        | 2 +-
 examples/cb-gmres/CMakeLists.txt                               | 2 +-
 examples/custom-logger/CMakeLists.txt                          | 2 +-
 examples/custom-matrix-format/CMakeLists.txt                   | 2 +-
 examples/custom-stopping-criterion/CMakeLists.txt              | 2 +-
 examples/ginkgo-overhead/CMakeLists.txt                        | 2 +-
 examples/ginkgo-ranges/CMakeLists.txt                          | 2 +-
 examples/heat-equation/CMakeLists.txt                          | 2 +-
 examples/ilu-preconditioned-solver/CMakeLists.txt              | 2 +-
 examples/inverse-iteration/CMakeLists.txt                      | 2 +-
 examples/ir-ilu-preconditioned-solver/CMakeLists.txt           | 2 +-
 examples/iterative-refinement/CMakeLists.txt                   | 2 +-
 examples/kokkos_assembly/CMakeLists.txt                        | 2 +-
 examples/minimal-cuda-solver/CMakeLists.txt                    | 2 +-
 examples/mixed-multigrid-preconditioned-solver/CMakeLists.txt  | 2 +-
 examples/mixed-multigrid-solver/CMakeLists.txt                 | 2 +-
 examples/mixed-precision-ir/CMakeLists.txt                     | 2 +-
 examples/mixed-spmv/CMakeLists.txt                             | 2 +-
 .../multigrid-preconditioned-solver-customized/CMakeLists.txt  | 2 +-
 examples/multigrid-preconditioned-solver/CMakeLists.txt        | 2 +-
 examples/nine-pt-stencil-solver/CMakeLists.txt                 | 2 +-
 examples/papi-logging/CMakeLists.txt                           | 2 +-
 examples/par-ilu-convergence/CMakeLists.txt                    | 2 +-
 examples/performance-debugging/CMakeLists.txt                  | 2 +-
 examples/poisson-solver/CMakeLists.txt                         | 2 +-
 examples/preconditioned-solver/CMakeLists.txt                  | 2 +-
 examples/preconditioner-export/CMakeLists.txt                  | 2 +-
 examples/schroedinger-splitting/CMakeLists.txt                 | 2 +-
 examples/simple-solver-logging/CMakeLists.txt                  | 2 +-
 examples/simple-solver/CMakeLists.txt                          | 2 +-
 examples/three-pt-stencil-solver/CMakeLists.txt                | 2 +-
 33 files changed, 34 insertions(+), 33 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d7940e7f40b..df6f0ffb89a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -9,7 +9,8 @@ if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
     cmake_policy(SET CMP0104 OLD)
 endif()
 
-project(Ginkgo LANGUAGES C CXX VERSION 1.6.0 DESCRIPTION "A numerical linear algebra library targeting many-core architectures")
+
+project(Ginkgo LANGUAGES C CXX VERSION 1.7.0 DESCRIPTION "A numerical linear algebra library targeting many-core architectures")
 set(Ginkgo_VERSION_TAG "master")
 set(PROJECT_VERSION_TAG ${Ginkgo_VERSION_TAG})
 
diff --git a/examples/adaptiveprecision-blockjacobi/CMakeLists.txt b/examples/adaptiveprecision-blockjacobi/CMakeLists.txt
index b121e201c77..744df84a74b 100644
--- a/examples/adaptiveprecision-blockjacobi/CMakeLists.txt
+++ b/examples/adaptiveprecision-blockjacobi/CMakeLists.txt
@@ -3,7 +3,7 @@ project(adaptiveprecision-blockjacobi)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
 endif()
 
 add_executable(adaptiveprecision-blockjacobi adaptiveprecision-blockjacobi.cpp)
diff --git a/examples/build-setup.sh b/examples/build-setup.sh
index f7a14a0d0a6..a0c947e433b 100644
--- a/examples/build-setup.sh
+++ b/examples/build-setup.sh
@@ -3,7 +3,7 @@
 # copy libraries
 LIBRARY_NAMES="ginkgo ginkgo_reference ginkgo_omp ginkgo_cuda ginkgo_hip ginkgo_dpcpp ginkgo_device"
 SUFFIXES=".so .dylib .dll d.so d.dylib d.dll"
-VERSION="1.6.0"
+VERSION="1.7.0"
 for name in ${LIBRARY_NAMES}; do
     for suffix in ${SUFFIXES}; do
         cp ${BUILD_DIR}/lib/lib${name}${suffix}.${VERSION} \
diff --git a/examples/cb-gmres/CMakeLists.txt b/examples/cb-gmres/CMakeLists.txt
index 97321c8ccbc..d616b16c882 100644
--- a/examples/cb-gmres/CMakeLists.txt
+++ b/examples/cb-gmres/CMakeLists.txt
@@ -3,7 +3,7 @@ project(cb-gmres)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
 endif()
 
 add_executable(cb-gmres cb-gmres.cpp)
diff --git a/examples/custom-logger/CMakeLists.txt b/examples/custom-logger/CMakeLists.txt
index 1d0c8bcf9ad..f986dd52e76 100644
--- a/examples/custom-logger/CMakeLists.txt
+++ b/examples/custom-logger/CMakeLists.txt
@@ -3,7 +3,7 @@ project(custom-logger)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
 endif()
 
 add_executable(custom-logger custom-logger.cpp)
diff --git a/examples/custom-matrix-format/CMakeLists.txt b/examples/custom-matrix-format/CMakeLists.txt
index c357572edea..47eeda0143c 100644
--- a/examples/custom-matrix-format/CMakeLists.txt
+++ b/examples/custom-matrix-format/CMakeLists.txt
@@ -3,7 +3,7 @@ project(custom-matrix-format CXX CUDA)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
     find_package(OpenMP 3.0 REQUIRED)
 endif()
 
diff --git a/examples/custom-stopping-criterion/CMakeLists.txt b/examples/custom-stopping-criterion/CMakeLists.txt
index 79b7b9aaab5..811baa59a9c 100644
--- a/examples/custom-stopping-criterion/CMakeLists.txt
+++ b/examples/custom-stopping-criterion/CMakeLists.txt
@@ -3,7 +3,7 @@ project(custom-stopping-criterion)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
     set(THREADS_PREFER_PTHREAD_FLAG ON)
     find_package(Threads REQUIRED)
 endif()
diff --git a/examples/ginkgo-overhead/CMakeLists.txt b/examples/ginkgo-overhead/CMakeLists.txt
index 5afbc22c731..fcd7a81c230 100644
--- a/examples/ginkgo-overhead/CMakeLists.txt
+++ b/examples/ginkgo-overhead/CMakeLists.txt
@@ -3,7 +3,7 @@ project(ginkgo-overhead)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
 endif()
 
 add_executable(ginkgo-overhead ginkgo-overhead.cpp)
diff --git a/examples/ginkgo-ranges/CMakeLists.txt b/examples/ginkgo-ranges/CMakeLists.txt
index de86438d62b..6e30c4f9af4 100644
--- a/examples/ginkgo-ranges/CMakeLists.txt
+++ b/examples/ginkgo-ranges/CMakeLists.txt
@@ -3,7 +3,7 @@ project(ginkgo-ranges)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
 endif()
 add_executable(ginkgo-ranges ginkgo-ranges.cpp)
 target_link_libraries(ginkgo-ranges Ginkgo::ginkgo)
diff --git a/examples/heat-equation/CMakeLists.txt b/examples/heat-equation/CMakeLists.txt
index 3b0cfc57cb0..f4790edaa8d 100644
--- a/examples/heat-equation/CMakeLists.txt
+++ b/examples/heat-equation/CMakeLists.txt
@@ -3,7 +3,7 @@ project(heat-equation)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
 endif()
 find_package(OpenCV REQUIRED)
 
diff --git a/examples/ilu-preconditioned-solver/CMakeLists.txt b/examples/ilu-preconditioned-solver/CMakeLists.txt
index 85daf54923a..e6c840f38f8 100644
--- a/examples/ilu-preconditioned-solver/CMakeLists.txt
+++ b/examples/ilu-preconditioned-solver/CMakeLists.txt
@@ -3,7 +3,7 @@ project(ilu-preconditioned-solver)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
 endif()
 
 add_executable(ilu-preconditioned-solver ilu-preconditioned-solver.cpp)
diff --git a/examples/inverse-iteration/CMakeLists.txt b/examples/inverse-iteration/CMakeLists.txt
index fa1d17e55c4..deb72accffd 100644
--- a/examples/inverse-iteration/CMakeLists.txt
+++ b/examples/inverse-iteration/CMakeLists.txt
@@ -3,7 +3,7 @@ project(inverse-iteration)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
 endif()
 
 add_executable(inverse-iteration inverse-iteration.cpp)
diff --git a/examples/ir-ilu-preconditioned-solver/CMakeLists.txt b/examples/ir-ilu-preconditioned-solver/CMakeLists.txt
index c1424429636..fc1205fbd0d 100644
--- a/examples/ir-ilu-preconditioned-solver/CMakeLists.txt
+++ b/examples/ir-ilu-preconditioned-solver/CMakeLists.txt
@@ -3,7 +3,7 @@ project(ir-ilu-preconditioned-solver)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
 endif()
 
 add_executable(ir-ilu-preconditioned-solver ir-ilu-preconditioned-solver.cpp)
diff --git a/examples/iterative-refinement/CMakeLists.txt b/examples/iterative-refinement/CMakeLists.txt
index 39a2651a90d..fe94a94455b 100644
--- a/examples/iterative-refinement/CMakeLists.txt
+++ b/examples/iterative-refinement/CMakeLists.txt
@@ -3,7 +3,7 @@ project(iterative-refinement)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
 endif()
 
 add_executable(iterative-refinement iterative-refinement.cpp)
diff --git a/examples/kokkos_assembly/CMakeLists.txt b/examples/kokkos_assembly/CMakeLists.txt
index e6f214e68e2..bfee201c91d 100644
--- a/examples/kokkos_assembly/CMakeLists.txt
+++ b/examples/kokkos_assembly/CMakeLists.txt
@@ -3,7 +3,7 @@ project(kokkos-assembly CXX)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if(NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
 endif()
 
 find_package(Kokkos REQUIRED)
diff --git a/examples/minimal-cuda-solver/CMakeLists.txt b/examples/minimal-cuda-solver/CMakeLists.txt
index 52aa56b60fc..3add4bb30ad 100644
--- a/examples/minimal-cuda-solver/CMakeLists.txt
+++ b/examples/minimal-cuda-solver/CMakeLists.txt
@@ -3,7 +3,7 @@ project(minimal-cuda-solver)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
 endif()
 
 add_executable(minimal-cuda-solver minimal-cuda-solver.cpp)
diff --git a/examples/mixed-multigrid-preconditioned-solver/CMakeLists.txt b/examples/mixed-multigrid-preconditioned-solver/CMakeLists.txt
index 54384f544b7..d710f10f146 100644
--- a/examples/mixed-multigrid-preconditioned-solver/CMakeLists.txt
+++ b/examples/mixed-multigrid-preconditioned-solver/CMakeLists.txt
@@ -3,7 +3,7 @@ project(mixed-multigrid-preconditioned-solver)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
 endif()
 
 add_executable(mixed-multigrid-preconditioned-solver mixed-multigrid-preconditioned-solver.cpp)
diff --git a/examples/mixed-multigrid-solver/CMakeLists.txt b/examples/mixed-multigrid-solver/CMakeLists.txt
index e4ee334e38f..17ec2fa398e 100644
--- a/examples/mixed-multigrid-solver/CMakeLists.txt
+++ b/examples/mixed-multigrid-solver/CMakeLists.txt
@@ -3,7 +3,7 @@ project(mixed-multigrid-solver)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
 endif()
 
 add_executable(mixed-multigrid-solver mixed-multigrid-solver.cpp)
diff --git a/examples/mixed-precision-ir/CMakeLists.txt b/examples/mixed-precision-ir/CMakeLists.txt
index a0a46c0fd6e..01094a5376b 100644
--- a/examples/mixed-precision-ir/CMakeLists.txt
+++ b/examples/mixed-precision-ir/CMakeLists.txt
@@ -3,7 +3,7 @@ project(mixed-precision-ir)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
 endif()
 
 add_executable(mixed-precision-ir mixed-precision-ir.cpp)
diff --git a/examples/mixed-spmv/CMakeLists.txt b/examples/mixed-spmv/CMakeLists.txt
index ad8e31aad3e..0e4378ca82f 100644
--- a/examples/mixed-spmv/CMakeLists.txt
+++ b/examples/mixed-spmv/CMakeLists.txt
@@ -3,7 +3,7 @@ project(mixed-spmv)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
 endif()
 
 add_executable(mixed-spmv mixed-spmv.cpp)
diff --git a/examples/multigrid-preconditioned-solver-customized/CMakeLists.txt b/examples/multigrid-preconditioned-solver-customized/CMakeLists.txt
index 4d2b0822d08..411b57b2c83 100644
--- a/examples/multigrid-preconditioned-solver-customized/CMakeLists.txt
+++ b/examples/multigrid-preconditioned-solver-customized/CMakeLists.txt
@@ -3,7 +3,7 @@ project(multigrid-preconditioned-solver-customized)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
 endif()
 
 add_executable(multigrid-preconditioned-solver-customized multigrid-preconditioned-solver-customized.cpp)
diff --git a/examples/multigrid-preconditioned-solver/CMakeLists.txt b/examples/multigrid-preconditioned-solver/CMakeLists.txt
index af7c296b631..90277398b85 100644
--- a/examples/multigrid-preconditioned-solver/CMakeLists.txt
+++ b/examples/multigrid-preconditioned-solver/CMakeLists.txt
@@ -3,7 +3,7 @@ project(multigrid-preconditioned-solver)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
 endif()
 
 add_executable(multigrid-preconditioned-solver multigrid-preconditioned-solver.cpp)
diff --git a/examples/nine-pt-stencil-solver/CMakeLists.txt b/examples/nine-pt-stencil-solver/CMakeLists.txt
index d2384129d47..35610ba758a 100644
--- a/examples/nine-pt-stencil-solver/CMakeLists.txt
+++ b/examples/nine-pt-stencil-solver/CMakeLists.txt
@@ -3,7 +3,7 @@ project(nine-pt-stencil-solver)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
 endif()
 
 add_executable(nine-pt-stencil-solver nine-pt-stencil-solver.cpp)
diff --git a/examples/papi-logging/CMakeLists.txt b/examples/papi-logging/CMakeLists.txt
index ac2560f499d..6927675e2ec 100644
--- a/examples/papi-logging/CMakeLists.txt
+++ b/examples/papi-logging/CMakeLists.txt
@@ -3,7 +3,7 @@ project(papi-logging)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
 endif()
 
 if (NOT GINKGO_HAVE_PAPI_SDE)
diff --git a/examples/par-ilu-convergence/CMakeLists.txt b/examples/par-ilu-convergence/CMakeLists.txt
index bee08841173..23b7afd1e75 100644
--- a/examples/par-ilu-convergence/CMakeLists.txt
+++ b/examples/par-ilu-convergence/CMakeLists.txt
@@ -3,7 +3,7 @@ project(par-ilu-convergence)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
 endif()
 
 add_executable(par-ilu-convergence par-ilu-convergence.cpp)
diff --git a/examples/performance-debugging/CMakeLists.txt b/examples/performance-debugging/CMakeLists.txt
index 4f095e4d1c6..715cd99fe1b 100644
--- a/examples/performance-debugging/CMakeLists.txt
+++ b/examples/performance-debugging/CMakeLists.txt
@@ -3,7 +3,7 @@ project(performance-debugging)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
 endif()
 
 add_executable(performance-debugging performance-debugging.cpp)
diff --git a/examples/poisson-solver/CMakeLists.txt b/examples/poisson-solver/CMakeLists.txt
index 64e0633ee75..bd5383876d5 100644
--- a/examples/poisson-solver/CMakeLists.txt
+++ b/examples/poisson-solver/CMakeLists.txt
@@ -3,7 +3,7 @@ project(poisson-solver)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
 endif()
 
 add_executable(poisson-solver poisson-solver.cpp)
diff --git a/examples/preconditioned-solver/CMakeLists.txt b/examples/preconditioned-solver/CMakeLists.txt
index b046686243d..a412885f219 100644
--- a/examples/preconditioned-solver/CMakeLists.txt
+++ b/examples/preconditioned-solver/CMakeLists.txt
@@ -3,7 +3,7 @@ project(preconditioned-solver)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
 endif()
 add_executable(preconditioned-solver preconditioned-solver.cpp)
 target_link_libraries(preconditioned-solver Ginkgo::ginkgo)
diff --git a/examples/preconditioner-export/CMakeLists.txt b/examples/preconditioner-export/CMakeLists.txt
index 1d2156b9d5a..1cfd6d7ff84 100644
--- a/examples/preconditioner-export/CMakeLists.txt
+++ b/examples/preconditioner-export/CMakeLists.txt
@@ -3,7 +3,7 @@ project(preconditioner-export)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
 endif()
 
 add_executable(preconditioner-export preconditioner-export.cpp)
diff --git a/examples/schroedinger-splitting/CMakeLists.txt b/examples/schroedinger-splitting/CMakeLists.txt
index b7bdece35e8..1e49a1f88b4 100644
--- a/examples/schroedinger-splitting/CMakeLists.txt
+++ b/examples/schroedinger-splitting/CMakeLists.txt
@@ -3,7 +3,7 @@ project(schroedinger-splitting)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
 endif()
 find_package(OpenCV REQUIRED)
 
diff --git a/examples/simple-solver-logging/CMakeLists.txt b/examples/simple-solver-logging/CMakeLists.txt
index 4092445848a..befead38e7d 100644
--- a/examples/simple-solver-logging/CMakeLists.txt
+++ b/examples/simple-solver-logging/CMakeLists.txt
@@ -3,7 +3,7 @@ project(simple-solver-logging)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
 endif()
 
 add_executable(simple-solver-logging simple-solver-logging.cpp)
diff --git a/examples/simple-solver/CMakeLists.txt b/examples/simple-solver/CMakeLists.txt
index f505e19729e..dd0faec5f53 100644
--- a/examples/simple-solver/CMakeLists.txt
+++ b/examples/simple-solver/CMakeLists.txt
@@ -3,7 +3,7 @@ project(simple-solver)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
 endif()
 
 add_executable(simple-solver simple-solver.cpp)
diff --git a/examples/three-pt-stencil-solver/CMakeLists.txt b/examples/three-pt-stencil-solver/CMakeLists.txt
index d2941b12976..fc0691dd7c9 100644
--- a/examples/three-pt-stencil-solver/CMakeLists.txt
+++ b/examples/three-pt-stencil-solver/CMakeLists.txt
@@ -3,7 +3,7 @@ project(three-pt-stencil-solver)
 
 # We only need to find Ginkgo if we build this example stand-alone
 if (NOT GINKGO_BUILD_EXAMPLES)
-    find_package(Ginkgo 1.6.0 REQUIRED)
+    find_package(Ginkgo 1.7.0 REQUIRED)
 endif()
 
 add_executable(three-pt-stencil-solver three-pt-stencil-solver.cpp)

From a93239103afe92de477b9e82f0f2b34b3e088d62 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Fri, 23 Jun 2023 09:53:00 +0200
Subject: [PATCH 002/583] add reorderings to sparse_blas benchmark

---
 benchmark/sparse_blas/operations.cpp  | 126 +++++++++++++++++++++++++-
 benchmark/sparse_blas/sparse_blas.cpp |  12 ++-
 2 files changed, 130 insertions(+), 8 deletions(-)

diff --git a/benchmark/sparse_blas/operations.cpp b/benchmark/sparse_blas/operations.cpp
index 6a817a67c0d..dc96143ed6d 100644
--- a/benchmark/sparse_blas/operations.cpp
+++ b/benchmark/sparse_blas/operations.cpp
@@ -695,11 +695,111 @@ class SymbolicCholeskyOperation : public BenchmarkOperation {
 };
 
 
+class ReorderRcmOperation : public BenchmarkOperation {
+    using reorder_type = gko::reorder::Rcm<etype, itype>;
+
+public:
+    explicit ReorderRcmOperation(const Mtx* mtx)
+        : mtx_{mtx->clone()},
+          factory_{reorder_type::build().on(mtx->get_executor())}
+    {}
+
+    std::pair<bool, double> validate() const override
+    {
+        // validating RCM correctness is hard, let's leave it out for now
+        return {true, 0.0};
+    }
+
+    gko::size_type get_flops() const override { return 0; }
+
+    gko::size_type get_memory() const override { return 0; }
+
+    void prepare() override {}
+
+    void run() override { reorder_ = factory_->generate(mtx_); }
+
+private:
+    std::shared_ptr<Mtx> mtx_;
+    std::unique_ptr<reorder_type::Factory> factory_;
+    std::unique_ptr<reorder_type> reorder_;
+};
+
+
+#if GKO_HAVE_METIS
+
+
+class ReorderNestedDissectionOperation : public BenchmarkOperation {
+    using factory_type =
+        gko::experimental::reorder::NestedDissection<etype, itype>;
+    using reorder_type = gko::matrix::Permutation<itype>;
+
+public:
+    explicit ReorderNestedDissectionOperation(const Mtx* mtx)
+        : mtx_{mtx->clone()},
+          factory_{factory_type::build().on(mtx->get_executor())}
+    {}
+
+    std::pair<bool, double> validate() const override
+    {
+        // validating ND correctness is hard, let's leave it out for now
+        return {true, 0.0};
+    }
+
+    gko::size_type get_flops() const override { return 0; }
+
+    gko::size_type get_memory() const override { return 0; }
+
+    void prepare() override {}
+
+    void run() override { reorder_ = factory_->generate(mtx_); }
+
+private:
+    std::shared_ptr<Mtx> mtx_;
+    std::unique_ptr<factory_type> factory_;
+    std::unique_ptr<reorder_type> reorder_;
+};
+
+
+#endif
+
+
+class ReorderApproxMinDegOperation : public BenchmarkOperation {
+    using factory_type = gko::experimental::reorder::Amd<itype>;
+    using reorder_type = gko::matrix::Permutation<itype>;
+
+public:
+    explicit ReorderApproxMinDegOperation(const Mtx* mtx)
+        : mtx_{mtx->clone()},
+          factory_{factory_type::build().on(mtx->get_executor())}
+    {}
+
+    std::pair<bool, double> validate() const override
+    {
+        // validating AMD correctness is hard, let's leave it out for now
+        return {true, 0.0};
+    }
+
+    gko::size_type get_flops() const override { return 0; }
+
+    gko::size_type get_memory() const override { return 0; }
+
+    void prepare() override {}
+
+    void run() override { reorder_ = factory_->generate(mtx_); }
+
+private:
+    std::shared_ptr<Mtx> mtx_;
+    std::unique_ptr<factory_type> factory_;
+    std::unique_ptr<reorder_type> reorder_;
+};
+
+
 const std::map<std::string,
                std::function<std::unique_ptr<BenchmarkOperation>(const Mtx*)>>
-    operation_map{
-        {"spgemm",
-         [](const Mtx* mtx) { return std::make_unique<SpgemmOperation>(mtx); }},
+    operation_map
+{
+    {"spgemm",
+     [](const Mtx* mtx) { return std::make_unique<SpgemmOperation>(mtx); }},
         {"spgeam",
          [](const Mtx* mtx) { return std::make_unique<SpgeamOperation>(mtx); }},
         {"transpose",
@@ -726,9 +826,25 @@ const std::map<std::string,
          [](const Mtx* mtx) {
              return std::make_unique<SymbolicCholeskyOperation>(mtx, false);
          }},
-        {"symbolic_cholesky_symmetric", [](const Mtx* mtx) {
+        {"symbolic_cholesky_symmetric",
+         [](const Mtx* mtx) {
              return std::make_unique<SymbolicCholeskyOperation>(mtx, true);
-         }}};
+         }},
+        {"reorder_rcm",
+         [](const Mtx* mtx) {
+             return std::make_unique<ReorderRcmOperation>(mtx);
+         }},
+        {"reorder_amd", [](const Mtx* mtx) {
+             return std::make_unique<ReorderApproxMinDegOperation>(mtx);
+         }},
+#if GKO_HAVE_METIS
+    {
+        "reorder_nd", [](const Mtx* mtx) {
+            return std::make_unique<ReorderNestedDissectionOperation>(mtx);
+        }
+    }
+#endif
+};
 
 
 std::unique_ptr<BenchmarkOperation> get_operation(std::string name,
diff --git a/benchmark/sparse_blas/sparse_blas.cpp b/benchmark/sparse_blas/sparse_blas.cpp
index 4fb06d2a4a0..3b0ce26db5f 100644
--- a/benchmark/sparse_blas/sparse_blas.cpp
+++ b/benchmark/sparse_blas/sparse_blas.cpp
@@ -57,11 +57,17 @@ const auto benchmark_name = "sparse_blas";
 
 using mat_data = gko::matrix_data<etype, itype>;
 
-DEFINE_string(
-    operations, "spgemm,spgeam,transpose",
+const char* operations_string =
     "Comma-separated list of operations to be benchmarked. Can be "
     "spgemm, spgeam, transpose, sort, is_sorted, generate_lookup, "
-    "lookup, symbolic_lu, symbolic_cholesky, symbolic_cholesky_symmetric");
+    "lookup, symbolic_lu, symbolic_cholesky, "
+    "symbolic_cholesky_symmetric, reorder_rcm, "
+#if GKO_HAVE_METIS
+    "reorder_nd, "
+#endif
+    "reorder_amd";
+
+DEFINE_string(operations, "spgemm,spgeam,transpose", operations_string);
 
 DEFINE_bool(validate, false,
             "Check for correct sparsity pattern and compute the L2 norm "

From 201ee956747aa5328dd4f524813b7721f4c60de0 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Fri, 23 Jun 2023 10:58:05 +0200
Subject: [PATCH 003/583] improve formatting

Co-authored-by: Yuhsiang M. Tsai <yhmtsai@gmail.com>
---
 benchmark/sparse_blas/operations.cpp | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/benchmark/sparse_blas/operations.cpp b/benchmark/sparse_blas/operations.cpp
index dc96143ed6d..66e5707c559 100644
--- a/benchmark/sparse_blas/operations.cpp
+++ b/benchmark/sparse_blas/operations.cpp
@@ -796,10 +796,9 @@ class ReorderApproxMinDegOperation : public BenchmarkOperation {
 
 const std::map<std::string,
                std::function<std::unique_ptr<BenchmarkOperation>(const Mtx*)>>
-    operation_map
-{
-    {"spgemm",
-     [](const Mtx* mtx) { return std::make_unique<SpgemmOperation>(mtx); }},
+    operation_map{
+        {"spgemm",
+         [](const Mtx* mtx) { return std::make_unique<SpgemmOperation>(mtx); }},
         {"spgeam",
          [](const Mtx* mtx) { return std::make_unique<SpgeamOperation>(mtx); }},
         {"transpose",
@@ -834,17 +833,18 @@ const std::map<std::string,
          [](const Mtx* mtx) {
              return std::make_unique<ReorderRcmOperation>(mtx);
          }},
-        {"reorder_amd", [](const Mtx* mtx) {
+        {"reorder_amd",
+         [](const Mtx* mtx) {
              return std::make_unique<ReorderApproxMinDegOperation>(mtx);
          }},
+        {"reorder_nd",
+         [](const Mtx* mtx) -> std::unique_ptr<BenchmarkOperation> {
 #if GKO_HAVE_METIS
-    {
-        "reorder_nd", [](const Mtx* mtx) {
-            return std::make_unique<ReorderNestedDissectionOperation>(mtx);
-        }
-    }
+             return std::make_unique<ReorderNestedDissectionOperation>(mtx);
+#else
+             GKO_NOT_COMPILED(METIS);
 #endif
-};
+         }}};
 
 
 std::unique_ptr<BenchmarkOperation> get_operation(std::string name,

From 42c9d00fa9f5b07474c5331ccb10aefd39f57c27 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Wed, 5 Apr 2023 10:55:35 +0200
Subject: [PATCH 004/583] add SparsityCsr sorting kernels

---
 .../matrix/sparsity_csr_kernels.hpp.inc       | 32 +++---
 core/test/utils/unsort_matrix.hpp             | 19 +---
 cuda/matrix/sparsity_csr_kernels.cu           | 65 ++++++++++++
 dpcpp/matrix/sparsity_csr_kernels.dp.cpp      | 45 ++++++++-
 hip/matrix/sparsity_csr_kernels.hip.cpp       | 63 ++++++++++++
 test/matrix/sparsity_csr_kernels.cpp          | 98 +++++++++++++++++++
 6 files changed, 287 insertions(+), 35 deletions(-)

diff --git a/common/cuda_hip/matrix/sparsity_csr_kernels.hpp.inc b/common/cuda_hip/matrix/sparsity_csr_kernels.hpp.inc
index dddd7946a04..2d2ca9a5183 100644
--- a/common/cuda_hip/matrix/sparsity_csr_kernels.hpp.inc
+++ b/common/cuda_hip/matrix/sparsity_csr_kernels.hpp.inc
@@ -121,19 +121,19 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
 
 
 template <typename ValueType, typename IndexType>
-void sort_by_column_index(std::shared_ptr<const DefaultExecutor> exec,
-                          matrix::SparsityCsr<ValueType, IndexType>* to_sort)
-    GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_SPARSITY_CSR_SORT_BY_COLUMN_INDEX);
-
-
-template <typename ValueType, typename IndexType>
-void is_sorted_by_column_index(
-    std::shared_ptr<const DefaultExecutor> exec,
-    const matrix::SparsityCsr<ValueType, IndexType>* to_check,
-    bool* is_sorted) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_SPARSITY_CSR_IS_SORTED_BY_COLUMN_INDEX);
+void fallback_sort(std::shared_ptr<const DefaultExecutor> exec,
+                   matrix::SparsityCsr<ValueType, IndexType>* to_sort)
+{
+    const auto row_ptrs = to_sort->get_const_row_ptrs();
+    const auto col_idxs = to_sort->get_col_idxs();
+    const auto nnz = to_sort->get_num_nonzeros();
+    const auto num_rows = to_sort->get_size()[0];
+    array<IndexType> row_idx_array(exec, nnz);
+    const auto row_idxs = row_idx_array.get_data();
+    components::convert_ptrs_to_idxs(exec, row_ptrs, num_rows, row_idxs);
+    // two sorts by integer keys hopefully enable Thrust to use cub's RadixSort
+    thrust::sort_by_key(thrust_policy(exec), col_idxs, col_idxs + nnz,
+                        row_idxs);
+    thrust::stable_sort_by_key(thrust_policy(exec), row_idxs, row_idxs + nnz,
+                               col_idxs);
+}
diff --git a/core/test/utils/unsort_matrix.hpp b/core/test/utils/unsort_matrix.hpp
index 04ece71d346..1af40352bd2 100644
--- a/core/test/utils/unsort_matrix.hpp
+++ b/core/test/utils/unsort_matrix.hpp
@@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/matrix_data.hpp>
+#include <ginkgo/core/base/temporary_clone.hpp>
 #include <ginkgo/core/matrix/csr.hpp>
 
 
@@ -55,24 +56,10 @@ namespace test {
 template <typename MtxPtr, typename RandomEngine>
 void unsort_matrix(MtxPtr&& mtx, RandomEngine&& engine)
 {
-    using value_type = gko::detail::pointee<decltype(mtx->get_values())>;
-    using index_type = gko::detail::pointee<decltype(mtx->get_col_idxs())>;
-    auto nnz = mtx->get_num_stored_elements();
-    if (nnz <= 0) {
-        return;
-    }
-
+    using value_type = typename gko::detail::pointee<MtxPtr>::value_type;
+    using index_type = typename gko::detail::pointee<MtxPtr>::index_type;
     const auto exec = mtx->get_executor();
     const auto master = exec->get_master();
-
-    // If exec is not the master/host, extract the master and perform the
-    // unsorting there, followed by copying it back
-    if (exec != master) {
-        auto h_mtx = mtx->clone(master);
-        unsort_matrix(h_mtx, engine);
-        mtx->copy_from(h_mtx);
-        return;
-    }
     matrix_data<value_type, index_type> data;
     mtx->write(data);
     auto& nonzeros = data.nonzeros;
diff --git a/cuda/matrix/sparsity_csr_kernels.cu b/cuda/matrix/sparsity_csr_kernels.cu
index 73e1fd9cb76..ab367c80b20 100644
--- a/cuda/matrix/sparsity_csr_kernels.cu
+++ b/cuda/matrix/sparsity_csr_kernels.cu
@@ -33,15 +33,22 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/matrix/sparsity_csr_kernels.hpp"
 
 
+#include <thrust/sort.h>
+
+
 #include <ginkgo/core/base/exception_helpers.hpp>
 
 
 #include "accessor/cuda_helper.hpp"
 #include "accessor/reduced_row_major.hpp"
 #include "core/base/mixed_precision_types.hpp"
+#include "core/components/fill_array_kernels.hpp"
+#include "core/components/format_conversion_kernels.hpp"
 #include "core/synthesizer/implementation_selection.hpp"
 #include "cuda/base/config.hpp"
+#include "cuda/base/cusparse_bindings.hpp"
 #include "cuda/base/math.hpp"
+#include "cuda/base/thrust.cuh"
 #include "cuda/base/types.hpp"
 #include "cuda/components/cooperative_groups.cuh"
 #include "cuda/components/reduction.cuh"
@@ -61,6 +68,7 @@ namespace sparsity_csr {
 
 
 constexpr int classical_oversubscription = 32;
+constexpr int default_block_size = 512;
 constexpr int spmv_block_size = 128;
 constexpr int warps_in_block = 4;
 
@@ -68,6 +76,7 @@ constexpr int warps_in_block = 4;
 using classical_kernels = syn::value_list<int, 2>;
 
 
+#include "common/cuda_hip/matrix/csr_common.hpp.inc"
 #include "common/cuda_hip/matrix/sparsity_csr_kernels.hpp.inc"
 
 
@@ -178,6 +187,62 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_SPARSITY_CSR_ADVANCED_SPMV_KERNEL);
 
 
+template <typename ValueType, typename IndexType>
+void sort_by_column_index(std::shared_ptr<const DefaultExecutor> exec,
+                          matrix::SparsityCsr<ValueType, IndexType>* to_sort)
+{
+    const auto nnz = static_cast<IndexType>(to_sort->get_num_nonzeros());
+    const auto num_rows = static_cast<IndexType>(to_sort->get_size()[0]);
+    const auto num_cols = static_cast<IndexType>(to_sort->get_size()[1]);
+    const auto row_ptrs = to_sort->get_const_row_ptrs();
+    const auto col_idxs = to_sort->get_col_idxs();
+    if (cusparse::is_supported<ValueType, IndexType>::value) {
+        const auto handle = exec->get_cusparse_handle();
+        auto descr = cusparse::create_mat_descr();
+        array<IndexType> permutation_array(exec, to_sort->get_num_nonzeros());
+        auto permutation = permutation_array.get_data();
+        components::fill_seq_array(exec, permutation,
+                                   to_sort->get_num_nonzeros());
+        size_type buffer_size{};
+        cusparse::csrsort_buffer_size(handle, num_rows, num_cols, nnz, row_ptrs,
+                                      col_idxs, buffer_size);
+        array<char> buffer_array{exec, buffer_size};
+        auto buffer = buffer_array.get_data();
+        cusparse::csrsort(handle, num_rows, num_cols, nnz, descr, row_ptrs,
+                          col_idxs, permutation, buffer);
+        cusparse::destroy(descr);
+    } else {
+        fallback_sort(exec, to_sort);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_SPARSITY_CSR_SORT_BY_COLUMN_INDEX);
+
+
+template <typename ValueType, typename IndexType>
+void is_sorted_by_column_index(
+    std::shared_ptr<const DefaultExecutor> exec,
+    const matrix::SparsityCsr<ValueType, IndexType>* to_check, bool* is_sorted)
+{
+    *is_sorted = true;
+    auto cpu_array = make_array_view(exec->get_master(), 1, is_sorted);
+    auto gpu_array = array<bool>{exec, cpu_array};
+    const auto num_rows = static_cast<IndexType>(to_check->get_size()[0]);
+    auto num_blocks = ceildiv(num_rows, default_block_size);
+    if (num_blocks > 0) {
+        kernel::check_unsorted<<<num_blocks, default_block_size, 0,
+                                 exec->get_stream()>>>(
+            to_check->get_const_row_ptrs(), to_check->get_const_col_idxs(),
+            num_rows, gpu_array.get_data());
+    }
+    cpu_array = gpu_array;
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_SPARSITY_CSR_IS_SORTED_BY_COLUMN_INDEX);
+
+
 }  // namespace sparsity_csr
 }  // namespace cuda
 }  // namespace kernels
diff --git a/dpcpp/matrix/sparsity_csr_kernels.dp.cpp b/dpcpp/matrix/sparsity_csr_kernels.dp.cpp
index 2cebac00c5f..f12d15175b7 100644
--- a/dpcpp/matrix/sparsity_csr_kernels.dp.cpp
+++ b/dpcpp/matrix/sparsity_csr_kernels.dp.cpp
@@ -303,7 +303,24 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
 template <typename ValueType, typename IndexType>
 void sort_by_column_index(std::shared_ptr<const DpcppExecutor> exec,
                           matrix::SparsityCsr<ValueType, IndexType>* to_sort)
-    GKO_NOT_IMPLEMENTED;
+{
+    const auto num_rows = to_sort->get_size()[0];
+    const auto row_ptrs = to_sort->get_const_row_ptrs();
+    const auto cols = to_sort->get_const_col_idxs();
+    auto queue = exec->get_queue();
+    // build sorted postorder node list for each row
+    queue->submit([&](sycl::handler& cgh) {
+        cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx_id) {
+            const auto row = idx_id[0];
+            const auto row_begin = row_ptrs[row];
+            const auto row_end = row_ptrs[row + 1];
+            auto lower_end = row_begin;
+            // heap-sort the elements
+            std::make_heap(cols + row_begin, cols + lower_end);
+            std::sort_heap(cols + row_begin, cols + lower_end);
+        });
+    });
+}
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_SPARSITY_CSR_SORT_BY_COLUMN_INDEX);
@@ -312,8 +329,30 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
 template <typename ValueType, typename IndexType>
 void is_sorted_by_column_index(
     std::shared_ptr<const DpcppExecutor> exec,
-    const matrix::SparsityCsr<ValueType, IndexType>* to_check,
-    bool* is_sorted) GKO_NOT_IMPLEMENTED;
+    const matrix::SparsityCsr<ValueType, IndexType>* to_check, bool* is_sorted)
+{
+    array<bool> is_sorted_device_array{exec, {true}};
+    const auto num_rows = to_check->get_size()[0];
+    const auto row_ptrs = to_check->get_const_row_ptrs();
+    const auto cols = to_check->get_const_col_idxs();
+    auto is_sorted_device = is_sorted_device_array.get_data();
+    exec->get_queue()->submit([&](sycl::handler& cgh) {
+        cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) {
+            const auto row = static_cast<size_type>(idx[0]);
+            const auto begin = row_ptrs[row];
+            const auto end = row_ptrs[row + 1];
+            if (*is_sorted_device) {
+                for (auto i = begin; i < end - 1; i++) {
+                    if (cols[i] > cols[i + 1]) {
+                        *is_sorted_device = false;
+                        break;
+                    }
+                }
+            }
+        });
+    });
+    *is_sorted = exec->copy_val_to_host(is_sorted_device);
+};
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_SPARSITY_CSR_IS_SORTED_BY_COLUMN_INDEX);
diff --git a/hip/matrix/sparsity_csr_kernels.hip.cpp b/hip/matrix/sparsity_csr_kernels.hip.cpp
index bc9cd0a31db..2084aa5656f 100644
--- a/hip/matrix/sparsity_csr_kernels.hip.cpp
+++ b/hip/matrix/sparsity_csr_kernels.hip.cpp
@@ -34,6 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <hip/hip_runtime.h>
+#include <thrust/sort.h>
 
 
 #include <ginkgo/core/base/exception_helpers.hpp>
@@ -42,9 +43,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "accessor/hip_helper.hpp"
 #include "accessor/reduced_row_major.hpp"
 #include "core/base/mixed_precision_types.hpp"
+#include "core/components/fill_array_kernels.hpp"
+#include "core/components/format_conversion_kernels.hpp"
 #include "core/synthesizer/implementation_selection.hpp"
 #include "hip/base/config.hip.hpp"
+#include "hip/base/hipsparse_bindings.hip.hpp"
 #include "hip/base/math.hip.hpp"
+#include "hip/base/thrust.hip.hpp"
 #include "hip/base/types.hip.hpp"
 #include "hip/components/cooperative_groups.hip.hpp"
 #include "hip/components/reduction.hip.hpp"
@@ -64,6 +69,7 @@ namespace sparsity_csr {
 
 
 constexpr int classical_oversubscription = 32;
+constexpr int default_block_size = 512;
 constexpr int spmv_block_size = 256;
 constexpr int warps_in_block = 4;
 
@@ -71,6 +77,7 @@ constexpr int warps_in_block = 4;
 using classical_kernels = syn::value_list<int, 2>;
 
 
+#include "common/cuda_hip/matrix/csr_common.hpp.inc"
 #include "common/cuda_hip/matrix/sparsity_csr_kernels.hpp.inc"
 
 
@@ -181,6 +188,62 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_SPARSITY_CSR_ADVANCED_SPMV_KERNEL);
 
 
+template <typename ValueType, typename IndexType>
+void sort_by_column_index(std::shared_ptr<const DefaultExecutor> exec,
+                          matrix::SparsityCsr<ValueType, IndexType>* to_sort)
+{
+    const auto nnz = static_cast<IndexType>(to_sort->get_num_nonzeros());
+    const auto num_rows = static_cast<IndexType>(to_sort->get_size()[0]);
+    const auto num_cols = static_cast<IndexType>(to_sort->get_size()[1]);
+    const auto row_ptrs = to_sort->get_const_row_ptrs();
+    const auto col_idxs = to_sort->get_col_idxs();
+    if (hipsparse::is_supported<ValueType, IndexType>::value) {
+        const auto handle = exec->get_hipsparse_handle();
+        auto descr = hipsparse::create_mat_descr();
+        array<IndexType> permutation_array(exec, to_sort->get_num_nonzeros());
+        auto permutation = permutation_array.get_data();
+        components::fill_seq_array(exec, permutation,
+                                   to_sort->get_num_nonzeros());
+        size_type buffer_size{};
+        hipsparse::csrsort_buffer_size(handle, num_rows, num_cols, nnz,
+                                       row_ptrs, col_idxs, buffer_size);
+        array<char> buffer_array{exec, buffer_size};
+        auto buffer = buffer_array.get_data();
+        hipsparse::csrsort(handle, num_rows, num_cols, nnz, descr, row_ptrs,
+                           col_idxs, permutation, buffer);
+        hipsparse::destroy(descr);
+    } else {
+        fallback_sort(exec, to_sort);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_SPARSITY_CSR_SORT_BY_COLUMN_INDEX);
+
+
+template <typename ValueType, typename IndexType>
+void is_sorted_by_column_index(
+    std::shared_ptr<const DefaultExecutor> exec,
+    const matrix::SparsityCsr<ValueType, IndexType>* to_check, bool* is_sorted)
+{
+    *is_sorted = true;
+    auto cpu_array = make_array_view(exec->get_master(), 1, is_sorted);
+    auto gpu_array = array<bool>{exec, cpu_array};
+    const auto num_rows = static_cast<IndexType>(to_check->get_size()[0]);
+    auto num_blocks = ceildiv(num_rows, default_block_size);
+    if (num_blocks > 0) {
+        kernel::check_unsorted<<<num_blocks, default_block_size, 0,
+                                 exec->get_stream()>>>(
+            to_check->get_const_row_ptrs(), to_check->get_const_col_idxs(),
+            num_rows, gpu_array.get_data());
+    }
+    cpu_array = gpu_array;
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_SPARSITY_CSR_IS_SORTED_BY_COLUMN_INDEX);
+
+
 }  // namespace sparsity_csr
 }  // namespace hip
 }  // namespace kernels
diff --git a/test/matrix/sparsity_csr_kernels.cpp b/test/matrix/sparsity_csr_kernels.cpp
index b137ce72ca8..af1e6ca1401 100644
--- a/test/matrix/sparsity_csr_kernels.cpp
+++ b/test/matrix/sparsity_csr_kernels.cpp
@@ -50,6 +50,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/test/utils.hpp"
 #include "core/test/utils/assertions.hpp"
 #include "core/test/utils/matrix_generator.hpp"
+#include "core/test/utils/unsort_matrix.hpp"
 #include "test/utils/executor.hpp"
 
 
@@ -59,6 +60,7 @@ namespace {
 class SparsityCsr : public CommonTestFixture {
 protected:
     using Mtx = gko::matrix::SparsityCsr<value_type, index_type>;
+    using Mtx64 = gko::matrix::SparsityCsr<value_type, gko::int64>;
 
     SparsityCsr() : rng{9312}
     {
@@ -145,4 +147,100 @@ TEST_F(SparsityCsr, ConvertToDenseIsEquivalentToRef)
 }
 
 
+TEST_F(SparsityCsr, SortSortedMatrixIsEquivalentToRef)
+{
+    mtx->sort_by_column_index();
+    dmtx->sort_by_column_index();
+
+    auto cols_view =
+        gko::make_array_view(ref, mtx->get_num_nonzeros(), mtx->get_col_idxs());
+    auto dcols_view = gko::make_array_view(exec, dmtx->get_num_nonzeros(),
+                                           dmtx->get_col_idxs());
+    GKO_ASSERT_ARRAY_EQ(cols_view, dcols_view);
+}
+
+
+TEST_F(SparsityCsr, SortSortedMatrix64IsEquivalentToRef)
+{
+    auto mtx64 = Mtx64::create(ref);
+    auto dmtx64 = Mtx64::create(exec);
+    gko::matrix_data<value_type, index_type> data;
+    gko::matrix_data<value_type, gko::int64> data64;
+    mtx->sort_by_column_index();
+    mtx->write(data);
+    data64.size = data.size;
+    for (auto entry : data.nonzeros) {
+        data64.nonzeros.emplace_back(entry.row, entry.column, entry.value);
+    }
+    mtx64->read(data64);
+    dmtx64->read(data64);
+
+    mtx64->sort_by_column_index();
+    dmtx64->sort_by_column_index();
+
+    auto cols_view = gko::make_array_view(ref, mtx64->get_num_nonzeros(),
+                                          mtx64->get_col_idxs());
+    auto dcols_view = gko::make_array_view(exec, dmtx64->get_num_nonzeros(),
+                                           dmtx64->get_col_idxs());
+    GKO_ASSERT_ARRAY_EQ(cols_view, dcols_view);
+}
+
+
+TEST_F(SparsityCsr, SortUnsortedMatrixIsEquivalentToRef)
+{
+    gko::test::unsort_matrix(mtx, rng);
+    dmtx->copy_from(mtx);
+
+    mtx->sort_by_column_index();
+    dmtx->sort_by_column_index();
+
+    auto cols_view =
+        gko::make_array_view(ref, mtx->get_num_nonzeros(), mtx->get_col_idxs());
+    auto dcols_view = gko::make_array_view(exec, dmtx->get_num_nonzeros(),
+                                           dmtx->get_col_idxs());
+    GKO_ASSERT_ARRAY_EQ(cols_view, dcols_view);
+}
+
+
+TEST_F(SparsityCsr, SortUnsortedMatrix64IsEquivalentToRef)
+{
+    gko::test::unsort_matrix(mtx, rng);
+    auto mtx64 = Mtx64::create(ref);
+    auto dmtx64 = Mtx64::create(exec);
+    gko::matrix_data<value_type, index_type> data;
+    gko::matrix_data<value_type, gko::int64> data64;
+    mtx->sort_by_column_index();
+    mtx->write(data);
+    data64.size = data.size;
+    for (auto entry : data.nonzeros) {
+        data64.nonzeros.emplace_back(entry.row, entry.column, entry.value);
+    }
+    mtx64->read(data64);
+    dmtx64->read(data64);
+
+    mtx64->sort_by_column_index();
+    dmtx64->sort_by_column_index();
+
+    auto cols_view = gko::make_array_view(ref, mtx64->get_num_nonzeros(),
+                                          mtx64->get_col_idxs());
+    auto dcols_view = gko::make_array_view(exec, dmtx64->get_num_nonzeros(),
+                                           dmtx64->get_col_idxs());
+    GKO_ASSERT_ARRAY_EQ(cols_view, dcols_view);
+}
+
+
+TEST_F(SparsityCsr, RecognizesUnsortedMatrix)
+{
+    gko::test::unsort_matrix(dmtx, rng);
+
+    ASSERT_FALSE(dmtx->is_sorted_by_column_index());
+}
+
+
+TEST_F(SparsityCsr, RecognizesSortedMatrix)
+{
+    ASSERT_TRUE(dmtx->is_sorted_by_column_index());
+}
+
+
 }  // namespace

From ce1e0f73b7e1af9a518555c5650cd044cd7c9d2d Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Wed, 5 Apr 2023 15:23:58 +0200
Subject: [PATCH 005/583] fix dpcpp compilation issues

---
 dpcpp/matrix/sparsity_csr_kernels.dp.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/dpcpp/matrix/sparsity_csr_kernels.dp.cpp b/dpcpp/matrix/sparsity_csr_kernels.dp.cpp
index f12d15175b7..6001e687dca 100644
--- a/dpcpp/matrix/sparsity_csr_kernels.dp.cpp
+++ b/dpcpp/matrix/sparsity_csr_kernels.dp.cpp
@@ -306,7 +306,7 @@ void sort_by_column_index(std::shared_ptr<const DpcppExecutor> exec,
 {
     const auto num_rows = to_sort->get_size()[0];
     const auto row_ptrs = to_sort->get_const_row_ptrs();
-    const auto cols = to_sort->get_const_col_idxs();
+    const auto cols = to_sort->get_col_idxs();
     auto queue = exec->get_queue();
     // build sorted postorder node list for each row
     queue->submit([&](sycl::handler& cgh) {
@@ -314,10 +314,9 @@ void sort_by_column_index(std::shared_ptr<const DpcppExecutor> exec,
             const auto row = idx_id[0];
             const auto row_begin = row_ptrs[row];
             const auto row_end = row_ptrs[row + 1];
-            auto lower_end = row_begin;
             // heap-sort the elements
-            std::make_heap(cols + row_begin, cols + lower_end);
-            std::sort_heap(cols + row_begin, cols + lower_end);
+            std::make_heap(cols + row_begin, cols + row_end);
+            std::sort_heap(cols + row_begin, cols + row_end);
         });
     });
 }

From 3af2e6caa3e8b4278163f2ea440a1e8c2a5d7007 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Mon, 5 Jun 2023 21:39:10 +0200
Subject: [PATCH 006/583] fix unsorted test

---
 test/matrix/sparsity_csr_kernels.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/matrix/sparsity_csr_kernels.cpp b/test/matrix/sparsity_csr_kernels.cpp
index af1e6ca1401..d865570b6d0 100644
--- a/test/matrix/sparsity_csr_kernels.cpp
+++ b/test/matrix/sparsity_csr_kernels.cpp
@@ -209,7 +209,6 @@ TEST_F(SparsityCsr, SortUnsortedMatrix64IsEquivalentToRef)
     auto dmtx64 = Mtx64::create(exec);
     gko::matrix_data<value_type, index_type> data;
     gko::matrix_data<value_type, gko::int64> data64;
-    mtx->sort_by_column_index();
     mtx->write(data);
     data64.size = data.size;
     for (auto entry : data.nonzeros) {

From 6ef07edb9abb35da530198a6c298ed7963e342ba Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Wed, 7 Jun 2023 12:11:41 +0200
Subject: [PATCH 007/583] review updates

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 core/test/utils/unsort_matrix.hpp        | 2 --
 dpcpp/matrix/sparsity_csr_kernels.dp.cpp | 7 ++++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/core/test/utils/unsort_matrix.hpp b/core/test/utils/unsort_matrix.hpp
index 1af40352bd2..e22d86b326e 100644
--- a/core/test/utils/unsort_matrix.hpp
+++ b/core/test/utils/unsort_matrix.hpp
@@ -58,8 +58,6 @@ void unsort_matrix(MtxPtr&& mtx, RandomEngine&& engine)
 {
     using value_type = typename gko::detail::pointee<MtxPtr>::value_type;
     using index_type = typename gko::detail::pointee<MtxPtr>::index_type;
-    const auto exec = mtx->get_executor();
-    const auto master = exec->get_master();
     matrix_data<value_type, index_type> data;
     mtx->write(data);
     auto& nonzeros = data.nonzeros;
diff --git a/dpcpp/matrix/sparsity_csr_kernels.dp.cpp b/dpcpp/matrix/sparsity_csr_kernels.dp.cpp
index 6001e687dca..133e5f41478 100644
--- a/dpcpp/matrix/sparsity_csr_kernels.dp.cpp
+++ b/dpcpp/matrix/sparsity_csr_kernels.dp.cpp
@@ -330,11 +330,12 @@ void is_sorted_by_column_index(
     std::shared_ptr<const DpcppExecutor> exec,
     const matrix::SparsityCsr<ValueType, IndexType>* to_check, bool* is_sorted)
 {
-    array<bool> is_sorted_device_array{exec, {true}};
+    auto cpu_array = make_array_view(exec->get_master(), 1, is_sorted);
+    auto gpu_array = array<bool>{exec, cpu_array};
     const auto num_rows = to_check->get_size()[0];
     const auto row_ptrs = to_check->get_const_row_ptrs();
     const auto cols = to_check->get_const_col_idxs();
-    auto is_sorted_device = is_sorted_device_array.get_data();
+    auto is_sorted_device = gpu_array.get_data();
     exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) {
             const auto row = static_cast<size_type>(idx[0]);
@@ -350,7 +351,7 @@ void is_sorted_by_column_index(
             }
         });
     });
-    *is_sorted = exec->copy_val_to_host(is_sorted_device);
+    cpu_array = gpu_array;
 };
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(

From 6267a56c25dba1e77d2ca43d231738398803d19b Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Tue, 20 Jun 2023 10:56:22 +0200
Subject: [PATCH 008/583] fix is_sorted initialization

---
 dpcpp/matrix/sparsity_csr_kernels.dp.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dpcpp/matrix/sparsity_csr_kernels.dp.cpp b/dpcpp/matrix/sparsity_csr_kernels.dp.cpp
index 133e5f41478..1acc16d7026 100644
--- a/dpcpp/matrix/sparsity_csr_kernels.dp.cpp
+++ b/dpcpp/matrix/sparsity_csr_kernels.dp.cpp
@@ -330,6 +330,7 @@ void is_sorted_by_column_index(
     std::shared_ptr<const DpcppExecutor> exec,
     const matrix::SparsityCsr<ValueType, IndexType>* to_check, bool* is_sorted)
 {
+    *is_sorted = true;
     auto cpu_array = make_array_view(exec->get_master(), 1, is_sorted);
     auto gpu_array = array<bool>{exec, cpu_array};
     const auto num_rows = to_check->get_size()[0];

From 70c75b38d3a67223ee59c786351c3fa071fe8851 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Fri, 30 Jun 2023 09:45:04 +0200
Subject: [PATCH 009/583] disable ambiguous range constructor

this should fix the bug reported by sonarcloud: https://sonarcloud.io/project/issues?open=AYTucKfs8qk247btl14g&id=ginkgo-project_ginkgo
---
 include/ginkgo/core/base/range.hpp | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/include/ginkgo/core/base/range.hpp b/include/ginkgo/core/base/range.hpp
index c9713f33572..ed8901075bd 100644
--- a/include/ginkgo/core/base/range.hpp
+++ b/include/ginkgo/core/base/range.hpp
@@ -188,6 +188,27 @@ GKO_ATTRIBUTES constexpr GKO_INLINE
            equal_dimensions<CurrentDimension + 1>(first, second);
 }
 
+/**
+ * Helper that stores the first type of a parameter pack, if its length is
+ * greater 0.
+ */
+template <class...>
+struct head;
+
+/**
+ * @copydoc head
+ */
+template <class First, class... Rest>
+struct head<First, Rest...> {
+    using type = First;
+};
+
+/**
+ * @copydoc head
+ */
+template <class... T>
+using head_t = typename head<T...>::type;
+
 
 }  // namespace detail
 
@@ -327,7 +348,12 @@ class range {
      *
      * @param params  parameters forwarded to Accessor constructor.
      */
-    template <typename... AccessorParams>
+    template <
+        typename... AccessorParams,
+        typename = std::enable_if_t<
+            sizeof...(AccessorParams) != 1 ||
+            !std::is_same<
+                range, std::decay<detail::head_t<AccessorParams...>>>::value>>
     GKO_ATTRIBUTES constexpr explicit range(AccessorParams&&... params)
         : accessor_{std::forward<AccessorParams>(params)...}
     {}

From 73bad7245b1af3f9ef4a6bd5c0abf3bff9477d7e Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@upsj.de>
Date: Wed, 5 Jul 2023 10:03:08 +0200
Subject: [PATCH 010/583] Guard against spaces in GinkgoConfig.cmake.in

---
 cmake/GinkgoConfig.cmake.in | 38 ++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/cmake/GinkgoConfig.cmake.in b/cmake/GinkgoConfig.cmake.in
index a2857310183..44aaf34fc3f 100644
--- a/cmake/GinkgoConfig.cmake.in
+++ b/cmake/GinkgoConfig.cmake.in
@@ -61,27 +61,27 @@ set(GINKGO_IWYU_PATH @GINKGO_IWYU_PATH@)
 
 set(GINKGO_JACOBI_FULL_OPTIMIZATIONS @GINKGO_JACOBI_FULL_OPTIMIZATIONS@)
 
-set(GINKGO_CUDA_ARCHITECTURES @GINKGO_CUDA_ARCHITECTURES@)
+set(GINKGO_CUDA_ARCHITECTURES "@GINKGO_CUDA_ARCHITECTURES@")
 set(GINKGO_CUDA_DEFAULT_HOST_COMPILER @GINKGO_CUDA_DEFAULT_HOST_COMPILER@)
-set(GINKGO_CUDA_HOST_COMPILER @CMAKE_CUDA_HOST_COMPILER@)
-set(GINKGO_CUDA_ARCH_FLAGS @GINKGO_CUDA_ARCH_FLAGS@)
+set(GINKGO_CUDA_HOST_COMPILER "@CMAKE_CUDA_HOST_COMPILER@")
+set(GINKGO_CUDA_ARCH_FLAGS "@GINKGO_CUDA_ARCH_FLAGS@")
 
-set(GINKGO_HIP_COMPILER_FLAGS @GINKGO_HIP_COMPILER_FLAGS@)
-set(GINKGO_HIP_HCC_COMPILER_FLAGS @GINKGO_HIP_HCC_COMPILER_FLAGS@)
-set(GINKGO_HIP_NVCC_COMPILER_FLAGS @GINKGO_HIP_NVCC_COMPILER_FLAGS@)
-set(GINKGO_HIP_CLANG_COMPILER_FLAGS @GINKGO_HIP_CLANG_COMPILER_FLAGS@)
+set(GINKGO_HIP_COMPILER_FLAGS "@GINKGO_HIP_COMPILER_FLAGS@")
+set(GINKGO_HIP_HCC_COMPILER_FLAGS "@GINKGO_HIP_HCC_COMPILER_FLAGS@")
+set(GINKGO_HIP_NVCC_COMPILER_FLAGS "@GINKGO_HIP_NVCC_COMPILER_FLAGS@")
+set(GINKGO_HIP_CLANG_COMPILER_FLAGS "@GINKGO_HIP_CLANG_COMPILER_FLAGS@")
 set(GINKGO_HIP_PLATFORM @GINKGO_HIP_PLATFORM@)
-set(GINKGO_HIP_PLATFORM_AMD_REGEX @HIP_PLATFORM_AMD_REGEX@)
-set(GINKGO_HIP_PLATFORM_NVIDIA_REGEX @HIP_PLATFORM_NVIDIA_REGEX@)
-set(GINKGO_HIP_AMDGPU @GINKGO_HIP_AMDGPU@)
+set(GINKGO_HIP_PLATFORM_AMD_REGEX "@HIP_PLATFORM_AMD_REGEX@")
+set(GINKGO_HIP_PLATFORM_NVIDIA_REGEX "@HIP_PLATFORM_NVIDIA_REGEX@")
+set(GINKGO_HIP_AMDGPU "@GINKGO_HIP_AMDGPU@")
 set(GINKGO_HIP_VERSION @GINKGO_HIP_VERSION@)
-set(GINKGO_AMD_ARCH_FLAGS @GINKGO_AMD_ARCH_FLAGS@)
+set(GINKGO_AMD_ARCH_FLAGS "@GINKGO_AMD_ARCH_FLAGS@")
 
 set(GINKGO_DPCPP_VERSION @GINKGO_DPCPP_VERSION@)
 set(GINKGO_DPCPP_MAJOR_VERSION @GINKGO_DPCPP_MAJOR_VERSION@)
-set(GINKGO_DPCPP_FLAGS @GINKGO_DPCPP_FLAGS@)
-set(GINKGO_MKL_ROOT @GINKGO_MKL_ROOT@)
-set(GINKGO_DPL_ROOT @GINKGO_DPL_ROOT@)
+set(GINKGO_DPCPP_FLAGS "@GINKGO_DPCPP_FLAGS@")
+set(GINKGO_MKL_ROOT "@GINKGO_MKL_ROOT@")
+set(GINKGO_DPL_ROOT "@GINKGO_DPL_ROOT@")
 
 set(GINKGO_BUILD_MPI @GINKGO_BUILD_MPI@)
 
@@ -117,9 +117,9 @@ set(GINKGO_CUDA_COMPILER "@CMAKE_CUDA_COMPILER@")
 set(GINKGO_CUDA_COMPILER_VERSION @CMAKE_CUDA_COMPILER_VERSION@)
 set(GINKGO_CUDA_HOST_LINK_LAUNCHER "@CMAKE_CUDA_HOST_LINK_LAUNCHER@")
 
-set(GINKGO_CUBLAS_LIBRARIES @CUBLAS@)
-set(GINKGO_CUSPARSE_LIBRARIES @CUSPARSE@)
-set(GINKGO_CUDA_LIBRARIES @CUDA_RUNTIME_LIBS@)
+set(GINKGO_CUBLAS_LIBRARIES "@CUBLAS@")
+set(GINKGO_CUSPARSE_LIBRARIES "@CUSPARSE@")
+set(GINKGO_CUDA_LIBRARIES "@CUDA_RUNTIME_LIBS@")
 set(GINKGO_CUDA_TOOLKIT_INCLUDE_DIRECTORIES "@CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES@")
 
 set(GINKGO_CUDA_FLAGS "@CMAKE_CUDA_FLAGS_MODIFY@")
@@ -129,8 +129,8 @@ set(GINKGO_CUDA_FLAGS_RELEASE "@CMAKE_CUDA_FLAGS_RELEASE_MODIFY@")
 # OpenMP
 set(GINKGO_OPENMP_VERSION @OpenMP_CXX_VERSION@)
 
-set(GINKGO_OPENMP_LIB_NAMES @OpenMP_CXX_LIB_NAMES@)
-set(GINKGO_OPENMP_LIBRARIES @OpenMP_CXX_LIBRARIES@)
+set(GINKGO_OPENMP_LIB_NAMES "@OpenMP_CXX_LIB_NAMES@")
+set(GINKGO_OPENMP_LIBRARIES "@OpenMP_CXX_LIBRARIES@")
 
 set(GINKGO_OPENMP_FLAGS "@OpenMP_CXX_FLAGS@")
 

From ab93e8c40e9c64b1dfe0a12ae7fe06962bcc94f1 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Thu, 6 Jul 2023 11:29:03 +0200
Subject: [PATCH 011/583] Remove CUDA_ARCH from GPU-enabled CI jobs

These are all jobs running on amdci right now.
We can specify the CUDA architecture from environment variables of the gitlab-runner,
which makes the test runs a bit more flexible as well.
---
 .gitlab-ci.yml | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index f81e271288c..d15c25dc270 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -98,7 +98,6 @@ build/cuda92/nompi/gcc/all/release/shared:
     BUILD_CUDA: "ON"
     BUILD_HIP: "ON"
     BUILD_TYPE: "Release"
-    CUDA_ARCH: 61
 
 # cuda 10.1 and friends
 # Build CUDA NVIDIA without omp
@@ -169,7 +168,6 @@ build/cuda101/nompi/clang/all/release/static:
 #    MPI_AS_ROOT: "ON"
 #    BUILD_HIP: "OFF"
 #    BUILD_TYPE: "Release"
-#    CUDA_ARCH: 61
 
 
 #build/clang-cuda101/nompi/clang/cuda/debug/static:
@@ -187,7 +185,6 @@ build/cuda101/nompi/clang/all/release/static:
 #    BUILD_TYPE: "Debug"
 #    FAST_TESTS: "ON"
 #    BUILD_SHARED_LIBS: "OFF"
-#    CUDA_ARCH: 61
 
 
 # cuda 10.2 and friends
@@ -358,7 +355,6 @@ build/cuda114/nompi/gcc/cuda/debug/shared:
     CXX_FLAGS: "-Wno-error=maybe-uninitialized"
     # disable spurious unused argument warning
     EXTRA_CMAKE_FLAGS: "-DCMAKE_CUDA_FLAGS=-diag-suppress=177"
-    CUDA_ARCH: 61
 
 
 # nvhpc and friends
@@ -381,7 +377,6 @@ build/nvhpc233/cuda120/nompi/nvcpp/release/static:
     CXX_FLAGS: "--diag_suppress=useless_using_declaration,declared_but_not_referenced"
     # disable spurious unused argument warning
     EXTRA_CMAKE_FLAGS: "-DCMAKE_CUDA_FLAGS=-diag-suppress=177"
-    CUDA_ARCH: 61
 
 build/nvhpc227/cuda117/nompi/nvcpp/debug/shared:
   extends:
@@ -401,7 +396,6 @@ build/nvhpc227/cuda117/nompi/nvcpp/debug/shared:
     CXX_FLAGS: "--diag_suppress=useless_using_declaration,declared_but_not_referenced"
     # disable spurious unused argument warning
     EXTRA_CMAKE_FLAGS: "-DCMAKE_CUDA_FLAGS=-diag-suppress=177"
-    CUDA_ARCH: 61
 
 # ROCm 4.5 and friends
 build/amd/nompi/gcc/rocm45/release/shared:

From 193cbbf14946da39b5b60dd535805ef6f8e7ae4e Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Fri, 5 May 2023 13:26:18 +0200
Subject: [PATCH 012/583] enable ensemble builds

- remove SYCL-specific headers from general builds
- disable failing tests for rocFFT
- disable DPC++ distributed tests
---
 include/ginkgo/core/base/math.hpp                  | 7 -------
 test/matrix/fft_kernels.cpp                        | 6 ++++++
 test/mpi/distributed/CMakeLists.txt                | 4 ++--
 test/mpi/distributed/preconditioner/CMakeLists.txt | 2 +-
 test/mpi/solver/CMakeLists.txt                     | 2 +-
 test/solver/idr_kernels.cpp                        | 5 +++++
 6 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/include/ginkgo/core/base/math.hpp b/include/ginkgo/core/base/math.hpp
index 3a6152c55d4..70e4db5bb2d 100644
--- a/include/ginkgo/core/base/math.hpp
+++ b/include/ginkgo/core/base/math.hpp
@@ -47,13 +47,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/utils.hpp>
 
 
-// Using SYCL_LANGUAGE_VERSION will lead the mismatch sycl namespace from 6.0.0
-// when using dpcpp compiler without dpcpp module
-#if GINKGO_DPCPP_MAJOR_VERSION
-#include <CL/sycl.hpp>
-#endif
-
-
 namespace gko {
 
 
diff --git a/test/matrix/fft_kernels.cpp b/test/matrix/fft_kernels.cpp
index 59d2d2de68e..fd9dda821c0 100644
--- a/test/matrix/fft_kernels.cpp
+++ b/test/matrix/fft_kernels.cpp
@@ -138,6 +138,9 @@ TYPED_TEST(Fft, Apply1DIsEqualToReference)
 
 TYPED_TEST(Fft, ApplyStrided1DIsEqualToReference)
 {
+#if defined(GKO_COMPILING_HIP) && GINKGO_HIP_PLATFORM_HCC
+    GTEST_SKIP() << "rocFFT has a bug related to strided 1D FFT";
+#endif
     using T = typename TestFixture::value_type;
 
     this->fft->apply(this->data_strided, this->out_strided);
@@ -160,6 +163,9 @@ TYPED_TEST(Fft, Apply1DInverseIsEqualToReference)
 
 TYPED_TEST(Fft, ApplyStrided1DInverseIsEqualToReference)
 {
+#if defined(GKO_COMPILING_HIP) && GINKGO_HIP_PLATFORM_HCC
+    GTEST_SKIP() << "rocFFT has a bug related to strided 1D FFT";
+#endif
     using T = typename TestFixture::value_type;
 
     this->ifft->apply(this->data_strided, this->out_strided);
diff --git a/test/mpi/distributed/CMakeLists.txt b/test/mpi/distributed/CMakeLists.txt
index a92e0ef4f70..b2368777589 100644
--- a/test/mpi/distributed/CMakeLists.txt
+++ b/test/mpi/distributed/CMakeLists.txt
@@ -1,4 +1,4 @@
-ginkgo_create_common_and_reference_test(matrix MPI_SIZE 3)
-ginkgo_create_common_and_reference_test(vector MPI_SIZE 3)
+ginkgo_create_common_and_reference_test(matrix MPI_SIZE 3 DISABLE_EXECUTORS dpcpp)
+ginkgo_create_common_and_reference_test(vector MPI_SIZE 3 DISABLE_EXECUTORS dpcpp)
 
 add_subdirectory(preconditioner)
diff --git a/test/mpi/distributed/preconditioner/CMakeLists.txt b/test/mpi/distributed/preconditioner/CMakeLists.txt
index 681bbec3bc9..4f734d21df8 100644
--- a/test/mpi/distributed/preconditioner/CMakeLists.txt
+++ b/test/mpi/distributed/preconditioner/CMakeLists.txt
@@ -1 +1 @@
-ginkgo_create_common_and_reference_test(schwarz MPI_SIZE 3)
+ginkgo_create_common_and_reference_test(schwarz MPI_SIZE 3 DISABLE_EXECUTORS dpcpp)
diff --git a/test/mpi/solver/CMakeLists.txt b/test/mpi/solver/CMakeLists.txt
index 43a2d870d3f..bffd7b5ab10 100644
--- a/test/mpi/solver/CMakeLists.txt
+++ b/test/mpi/solver/CMakeLists.txt
@@ -1 +1 @@
-ginkgo_create_common_and_reference_test(solver MPI_SIZE 3)
+ginkgo_create_common_and_reference_test(solver MPI_SIZE 3 DISABLE_EXECUTORS dpcpp)
diff --git a/test/solver/idr_kernels.cpp b/test/solver/idr_kernels.cpp
index f7191483615..959c857cb71 100644
--- a/test/solver/idr_kernels.cpp
+++ b/test/solver/idr_kernels.cpp
@@ -40,6 +40,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <gtest/gtest.h>
 
 
+#ifdef GKO_COMPILING_DPCPP
+#include <CL/sycl.hpp>
+#endif
+
+
 #include <ginkgo/core/base/array.hpp>
 #include <ginkgo/core/base/exception.hpp>
 #include <ginkgo/core/base/executor.hpp>

From d954f6d3a56990859dcd0a26756f8673ee2f2804 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Fri, 5 May 2023 13:26:50 +0200
Subject: [PATCH 013/583] uniform distributed test folder structure

---
 test/mpi/CMakeLists.txt                          | 1 -
 test/mpi/distributed/CMakeLists.txt              | 1 +
 test/mpi/{ => distributed}/solver/CMakeLists.txt | 0
 test/mpi/{ => distributed}/solver/solver.cpp     | 0
 4 files changed, 1 insertion(+), 1 deletion(-)
 rename test/mpi/{ => distributed}/solver/CMakeLists.txt (100%)
 rename test/mpi/{ => distributed}/solver/solver.cpp (100%)

diff --git a/test/mpi/CMakeLists.txt b/test/mpi/CMakeLists.txt
index f715ea482ec..9066de66970 100644
--- a/test/mpi/CMakeLists.txt
+++ b/test/mpi/CMakeLists.txt
@@ -1,2 +1 @@
 add_subdirectory(distributed)
-add_subdirectory(solver)
diff --git a/test/mpi/distributed/CMakeLists.txt b/test/mpi/distributed/CMakeLists.txt
index b2368777589..b02a57b9983 100644
--- a/test/mpi/distributed/CMakeLists.txt
+++ b/test/mpi/distributed/CMakeLists.txt
@@ -2,3 +2,4 @@ ginkgo_create_common_and_reference_test(matrix MPI_SIZE 3 DISABLE_EXECUTORS dpcp
 ginkgo_create_common_and_reference_test(vector MPI_SIZE 3 DISABLE_EXECUTORS dpcpp)
 
 add_subdirectory(preconditioner)
+add_subdirectory(solver)
diff --git a/test/mpi/solver/CMakeLists.txt b/test/mpi/distributed/solver/CMakeLists.txt
similarity index 100%
rename from test/mpi/solver/CMakeLists.txt
rename to test/mpi/distributed/solver/CMakeLists.txt
diff --git a/test/mpi/solver/solver.cpp b/test/mpi/distributed/solver/solver.cpp
similarity index 100%
rename from test/mpi/solver/solver.cpp
rename to test/mpi/distributed/solver/solver.cpp

From 0b49cd3743af03b509cfc6c78c661b2a7f7ecf0c Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Wed, 5 Jul 2023 11:05:01 +0200
Subject: [PATCH 014/583] reenable distributed vector test for dpcpp

---
 test/mpi/distributed/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/mpi/distributed/CMakeLists.txt b/test/mpi/distributed/CMakeLists.txt
index b02a57b9983..3d5e3cadd58 100644
--- a/test/mpi/distributed/CMakeLists.txt
+++ b/test/mpi/distributed/CMakeLists.txt
@@ -1,5 +1,5 @@
 ginkgo_create_common_and_reference_test(matrix MPI_SIZE 3 DISABLE_EXECUTORS dpcpp)
-ginkgo_create_common_and_reference_test(vector MPI_SIZE 3 DISABLE_EXECUTORS dpcpp)
+ginkgo_create_common_and_reference_test(vector MPI_SIZE 3)
 
 add_subdirectory(preconditioner)
 add_subdirectory(solver)

From b9f80134d934b650820f3c09209e7360d192d418 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Thu, 6 Jul 2023 15:12:54 +0200
Subject: [PATCH 015/583] move test/mpi/distributed to test/mpi

---
 test/mpi/CMakeLists.txt                                  | 6 +++++-
 test/mpi/distributed/CMakeLists.txt                      | 5 -----
 test/mpi/{distributed => }/matrix.cpp                    | 0
 test/mpi/{distributed => }/preconditioner/CMakeLists.txt | 0
 test/mpi/{distributed => }/preconditioner/schwarz.cpp    | 0
 test/mpi/{distributed => }/solver/CMakeLists.txt         | 0
 test/mpi/{distributed => }/solver/solver.cpp             | 0
 test/mpi/{distributed => }/vector.cpp                    | 0
 8 files changed, 5 insertions(+), 6 deletions(-)
 delete mode 100644 test/mpi/distributed/CMakeLists.txt
 rename test/mpi/{distributed => }/matrix.cpp (100%)
 rename test/mpi/{distributed => }/preconditioner/CMakeLists.txt (100%)
 rename test/mpi/{distributed => }/preconditioner/schwarz.cpp (100%)
 rename test/mpi/{distributed => }/solver/CMakeLists.txt (100%)
 rename test/mpi/{distributed => }/solver/solver.cpp (100%)
 rename test/mpi/{distributed => }/vector.cpp (100%)

diff --git a/test/mpi/CMakeLists.txt b/test/mpi/CMakeLists.txt
index 9066de66970..3d5e3cadd58 100644
--- a/test/mpi/CMakeLists.txt
+++ b/test/mpi/CMakeLists.txt
@@ -1 +1,5 @@
-add_subdirectory(distributed)
+ginkgo_create_common_and_reference_test(matrix MPI_SIZE 3 DISABLE_EXECUTORS dpcpp)
+ginkgo_create_common_and_reference_test(vector MPI_SIZE 3)
+
+add_subdirectory(preconditioner)
+add_subdirectory(solver)
diff --git a/test/mpi/distributed/CMakeLists.txt b/test/mpi/distributed/CMakeLists.txt
deleted file mode 100644
index 3d5e3cadd58..00000000000
--- a/test/mpi/distributed/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-ginkgo_create_common_and_reference_test(matrix MPI_SIZE 3 DISABLE_EXECUTORS dpcpp)
-ginkgo_create_common_and_reference_test(vector MPI_SIZE 3)
-
-add_subdirectory(preconditioner)
-add_subdirectory(solver)
diff --git a/test/mpi/distributed/matrix.cpp b/test/mpi/matrix.cpp
similarity index 100%
rename from test/mpi/distributed/matrix.cpp
rename to test/mpi/matrix.cpp
diff --git a/test/mpi/distributed/preconditioner/CMakeLists.txt b/test/mpi/preconditioner/CMakeLists.txt
similarity index 100%
rename from test/mpi/distributed/preconditioner/CMakeLists.txt
rename to test/mpi/preconditioner/CMakeLists.txt
diff --git a/test/mpi/distributed/preconditioner/schwarz.cpp b/test/mpi/preconditioner/schwarz.cpp
similarity index 100%
rename from test/mpi/distributed/preconditioner/schwarz.cpp
rename to test/mpi/preconditioner/schwarz.cpp
diff --git a/test/mpi/distributed/solver/CMakeLists.txt b/test/mpi/solver/CMakeLists.txt
similarity index 100%
rename from test/mpi/distributed/solver/CMakeLists.txt
rename to test/mpi/solver/CMakeLists.txt
diff --git a/test/mpi/distributed/solver/solver.cpp b/test/mpi/solver/solver.cpp
similarity index 100%
rename from test/mpi/distributed/solver/solver.cpp
rename to test/mpi/solver/solver.cpp
diff --git a/test/mpi/distributed/vector.cpp b/test/mpi/vector.cpp
similarity index 100%
rename from test/mpi/distributed/vector.cpp
rename to test/mpi/vector.cpp

From a07875294f8ce1edf35667c733ee807ad57286a6 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Mon, 10 Jul 2023 14:50:06 +0200
Subject: [PATCH 016/583] adds interruptible to gitlab CI

---
 .gitlab-ci.yml            | 7 +++++++
 .gitlab/add-interrupt.yml | 2 ++
 2 files changed, 9 insertions(+)
 create mode 100644 .gitlab/add-interrupt.yml

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index d15c25dc270..b9385ebb3cb 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -19,6 +19,13 @@ include:
   - local: '.gitlab/rules.yml'
   - local: '.gitlab/scripts.yml'
   - local: '.gitlab/variables.yml'
+  # This is a workaround to conditionally make the branch pipelines
+  # interruptible, because the flag does not directly support rules [1].
+  #
+  # [1] https://gitlab.com/gitlab-org/gitlab/-/issues/194023#note_1225906002
+  - local: '.gitlab/add-interrupt.yml'
+    rules:
+      - if: $CI_COMMIT_BRANCH != "master" && $CI_COMMIT_BRANCH != "develop"
 
 sync:
   stage: sync
diff --git a/.gitlab/add-interrupt.yml b/.gitlab/add-interrupt.yml
new file mode 100644
index 00000000000..cf6fd95fe1e
--- /dev/null
+++ b/.gitlab/add-interrupt.yml
@@ -0,0 +1,2 @@
+default:
+  interruptible: true

From ba2d3d055c192d7037639cc9890616228889c8fc Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Mon, 10 Jul 2023 15:42:38 +0200
Subject: [PATCH 017/583] don't interrupt gh-pages or new-issue

---
 .gitlab-ci.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index b9385ebb3cb..d6124211222 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -819,6 +819,7 @@ sonarqube_cov:
 # Deploy documentation to github-pages
 gh-pages:
   stage: deploy
+  interruptible: false
   extends:
     - .default_variables
     - .deploy_condition
@@ -922,6 +923,7 @@ cudamemcheck:
 
 new-issue-on-failure:
   stage: on-failure
+  interruptible: false
   extends:
     - .default_variables
     - .use_status-job-settings

From 032711ec88156ba231db7016a69a0a41334e0191 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Mon, 10 Jul 2023 15:52:33 +0200
Subject: [PATCH 018/583] no interrupt on tag

---
 .gitlab-ci.yml | 44 ++++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index d6124211222..d43040620bb 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -25,7 +25,7 @@ include:
   # [1] https://gitlab.com/gitlab-org/gitlab/-/issues/194023#note_1225906002
   - local: '.gitlab/add-interrupt.yml'
     rules:
-      - if: $CI_COMMIT_BRANCH != "master" && $CI_COMMIT_BRANCH != "develop"
+      - if: $CI_COMMIT_BRANCH != "master" && $CI_COMMIT_BRANCH != "develop" && CI_COMMIT_TAG !~ /^v\d+\.\d+\.\d+/
 
 sync:
   stage: sync
@@ -779,15 +779,15 @@ sonarqube_cov_:
     - PR_ID=$(curl -s "https://api.github.com/search/issues?q=sha:${CI_COMMIT_SHA}"
       | jq '.items[0].number')
     - if [[ "${PR_ID}" != "null" ]]; then
-        target_branch=$(curl -s
-          "https://api.github.com/repos/ginkgo-project/ginkgo/pulls/${PR_ID}" | jq
-          '.base.ref' | sed 's/"//g');
-        sonar_branching="-Dsonar.pullrequest.branch=${CI_COMMIT_REF_NAME}
-          -Dsonar.pullrequest.base=${target_branch}
-          -Dsonar.pullrequest.key=${PR_ID}";
+      target_branch=$(curl -s
+      "https://api.github.com/repos/ginkgo-project/ginkgo/pulls/${PR_ID}" | jq
+      '.base.ref' | sed 's/"//g');
+      sonar_branching="-Dsonar.pullrequest.branch=${CI_COMMIT_REF_NAME}
+      -Dsonar.pullrequest.base=${target_branch}
+      -Dsonar.pullrequest.key=${PR_ID}";
       else
-        sonar_branching="-Dsonar.branch.name=${CI_COMMIT_REF_NAME}
-        -Dsonar.branch.target=develop";
+      sonar_branching="-Dsonar.branch.name=${CI_COMMIT_REF_NAME}
+      -Dsonar.branch.target=develop";
       fi
     - ctest -S cmake/CTestScript.cmake -DCTEST_BUILD_CONFIGURATION=COVERAGE
       -DGINKGO_SONARQUBE_TEST=ON
@@ -831,13 +831,13 @@ gh-pages:
     # build docs
     - mkdir -p ${CI_JOB_NAME} && pushd ${CI_JOB_NAME}
     - cmake ${CI_PROJECT_DIR}
-        -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${CXX_COMPILER}
-        -DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} -DCMAKE_BUILD_TYPE=${BUILD_TYPE}
-        -DBUILD_SHARED_LIBS=ON ${EXTRA_CMAKE_FLAGS} -DGINKGO_DEVEL_TOOLS=OFF
-        -DGINKGO_BUILD_REFERENCE=OFF -DGINKGO_BUILD_OMP=OFF -DGINKGO_BUILD_CUDA=OFF
-        -DGINKGO_BUILD_HIP=OFF -DGINKGO_BUILD_DPCPP=OFF -DGINKGO_BUILD_MPI=OFF
-        -DGINKGO_BUILD_TESTS=OFF -DGINKGO_BUILD_EXAMPLES=OFF
-        -DGINKGO_BUILD_DOC=ON -DGINKGO_DOC_GENERATE_PDF=ON
+      -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${CXX_COMPILER}
+      -DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} -DCMAKE_BUILD_TYPE=${BUILD_TYPE}
+      -DBUILD_SHARED_LIBS=ON ${EXTRA_CMAKE_FLAGS} -DGINKGO_DEVEL_TOOLS=OFF
+      -DGINKGO_BUILD_REFERENCE=OFF -DGINKGO_BUILD_OMP=OFF -DGINKGO_BUILD_CUDA=OFF
+      -DGINKGO_BUILD_HIP=OFF -DGINKGO_BUILD_DPCPP=OFF -DGINKGO_BUILD_MPI=OFF
+      -DGINKGO_BUILD_TESTS=OFF -DGINKGO_BUILD_EXAMPLES=OFF
+      -DGINKGO_BUILD_DOC=ON -DGINKGO_DOC_GENERATE_PDF=ON
     - make usr
     - make pdf
     - popd
@@ -854,7 +854,7 @@ gh-pages:
     - git diff --quiet HEAD ||
       (git commit -m "Update documentation from ginkgo-project/ginkgo@${CURRENT_SHA}" && git push)
   dependencies: null
-  needs: []
+  needs: [ ]
 
 
 threadsanitizer:
@@ -867,10 +867,10 @@ threadsanitizer:
   script:
     - LD_PRELOAD=/usr/local/lib/libomp.so
       CC=clang CXX=clang++
-        ctest -V -S cmake/CTestScript.cmake -DCTEST_BUILD_CONFIGURATION=TSAN
-        -DCTEST_MEMORYCHECK_TYPE=ThreadSanitizer
-        -DCTEST_MEMORYCHECK_SANITIZER_OPTIONS=ignore_noninstrumented_modules=1
-        --timeout 6000
+      ctest -V -S cmake/CTestScript.cmake -DCTEST_BUILD_CONFIGURATION=TSAN
+      -DCTEST_MEMORYCHECK_TYPE=ThreadSanitizer
+      -DCTEST_MEMORYCHECK_SANITIZER_OPTIONS=ignore_noninstrumented_modules=1
+      --timeout 6000
 
 leaksanitizer:
   stage: QoS_tools
@@ -933,7 +933,7 @@ new-issue-on-failure:
     refs:
       - develop
       - master
-  dependencies: []
+  dependencies: [ ]
 
 
 ## Benchmark SpMV

From 564e5e480aab680b7f3cf591b7000a402ffbcdb1 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Mon, 10 Jul 2023 16:01:31 +0200
Subject: [PATCH 019/583] undo formatting

---
 .gitlab-ci.yml | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index d43040620bb..9d374d81eef 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -779,15 +779,15 @@ sonarqube_cov_:
     - PR_ID=$(curl -s "https://api.github.com/search/issues?q=sha:${CI_COMMIT_SHA}"
       | jq '.items[0].number')
     - if [[ "${PR_ID}" != "null" ]]; then
-      target_branch=$(curl -s
-      "https://api.github.com/repos/ginkgo-project/ginkgo/pulls/${PR_ID}" | jq
-      '.base.ref' | sed 's/"//g');
-      sonar_branching="-Dsonar.pullrequest.branch=${CI_COMMIT_REF_NAME}
-      -Dsonar.pullrequest.base=${target_branch}
-      -Dsonar.pullrequest.key=${PR_ID}";
+        target_branch=$(curl -s
+          "https://api.github.com/repos/ginkgo-project/ginkgo/pulls/${PR_ID}" | jq
+          '.base.ref' | sed 's/"//g');
+        sonar_branching="-Dsonar.pullrequest.branch=${CI_COMMIT_REF_NAME}
+          -Dsonar.pullrequest.base=${target_branch}
+          -Dsonar.pullrequest.key=${PR_ID}";
       else
-      sonar_branching="-Dsonar.branch.name=${CI_COMMIT_REF_NAME}
-      -Dsonar.branch.target=develop";
+        sonar_branching="-Dsonar.branch.name=${CI_COMMIT_REF_NAME}
+        -Dsonar.branch.target=develop";
       fi
     - ctest -S cmake/CTestScript.cmake -DCTEST_BUILD_CONFIGURATION=COVERAGE
       -DGINKGO_SONARQUBE_TEST=ON
@@ -831,13 +831,13 @@ gh-pages:
     # build docs
     - mkdir -p ${CI_JOB_NAME} && pushd ${CI_JOB_NAME}
     - cmake ${CI_PROJECT_DIR}
-      -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${CXX_COMPILER}
-      -DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} -DCMAKE_BUILD_TYPE=${BUILD_TYPE}
-      -DBUILD_SHARED_LIBS=ON ${EXTRA_CMAKE_FLAGS} -DGINKGO_DEVEL_TOOLS=OFF
-      -DGINKGO_BUILD_REFERENCE=OFF -DGINKGO_BUILD_OMP=OFF -DGINKGO_BUILD_CUDA=OFF
-      -DGINKGO_BUILD_HIP=OFF -DGINKGO_BUILD_DPCPP=OFF -DGINKGO_BUILD_MPI=OFF
-      -DGINKGO_BUILD_TESTS=OFF -DGINKGO_BUILD_EXAMPLES=OFF
-      -DGINKGO_BUILD_DOC=ON -DGINKGO_DOC_GENERATE_PDF=ON
+        -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${CXX_COMPILER}
+        -DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} -DCMAKE_BUILD_TYPE=${BUILD_TYPE}
+        -DBUILD_SHARED_LIBS=ON ${EXTRA_CMAKE_FLAGS} -DGINKGO_DEVEL_TOOLS=OFF
+        -DGINKGO_BUILD_REFERENCE=OFF -DGINKGO_BUILD_OMP=OFF -DGINKGO_BUILD_CUDA=OFF
+        -DGINKGO_BUILD_HIP=OFF -DGINKGO_BUILD_DPCPP=OFF -DGINKGO_BUILD_MPI=OFF
+        -DGINKGO_BUILD_TESTS=OFF -DGINKGO_BUILD_EXAMPLES=OFF
+        -DGINKGO_BUILD_DOC=ON -DGINKGO_DOC_GENERATE_PDF=ON
     - make usr
     - make pdf
     - popd
@@ -854,7 +854,7 @@ gh-pages:
     - git diff --quiet HEAD ||
       (git commit -m "Update documentation from ginkgo-project/ginkgo@${CURRENT_SHA}" && git push)
   dependencies: null
-  needs: [ ]
+  needs: []
 
 
 threadsanitizer:
@@ -867,10 +867,10 @@ threadsanitizer:
   script:
     - LD_PRELOAD=/usr/local/lib/libomp.so
       CC=clang CXX=clang++
-      ctest -V -S cmake/CTestScript.cmake -DCTEST_BUILD_CONFIGURATION=TSAN
-      -DCTEST_MEMORYCHECK_TYPE=ThreadSanitizer
-      -DCTEST_MEMORYCHECK_SANITIZER_OPTIONS=ignore_noninstrumented_modules=1
-      --timeout 6000
+        ctest -V -S cmake/CTestScript.cmake -DCTEST_BUILD_CONFIGURATION=TSAN
+        -DCTEST_MEMORYCHECK_TYPE=ThreadSanitizer
+        -DCTEST_MEMORYCHECK_SANITIZER_OPTIONS=ignore_noninstrumented_modules=1
+        --timeout 6000
 
 leaksanitizer:
   stage: QoS_tools
@@ -933,7 +933,7 @@ new-issue-on-failure:
     refs:
       - develop
       - master
-  dependencies: [ ]
+  dependencies: []
 
 
 ## Benchmark SpMV

From 986acd9ac682fcdea22b389bcf188c85a6bd2ad6 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Fri, 20 Jan 2023 12:48:36 +0100
Subject: [PATCH 020/583] adds test for reduction on zero size inputs

---
 test/base/kernel_launch_generic.cpp | 173 +++++++++++++++-------------
 1 file changed, 95 insertions(+), 78 deletions(-)

diff --git a/test/base/kernel_launch_generic.cpp b/test/base/kernel_launch_generic.cpp
index a90a5ea6c70..4e57904a9d2 100644
--- a/test/base/kernel_launch_generic.cpp
+++ b/test/base/kernel_launch_generic.cpp
@@ -321,44 +321,45 @@ TEST_F(KernelLaunch, Runs2DDense)
 
 void run1d_reduction(std::shared_ptr<gko::EXEC_TYPE> exec)
 {
-    gko::array<int64> output{exec, 1};
+    gko::array<int64> output{exec, {-1l}};
+    auto run_reduction = [&](int64 init, size_type size) {
+        gko::kernels::EXEC_NAMESPACE::run_kernel_reduction(
+            exec,
+            [] GKO_KERNEL(auto i, auto a, auto dummy) {
+                static_assert(is_same<decltype(i), int64>::value, "index");
+                static_assert(is_same<decltype(a), int64*>::value, "value");
+                static_assert(is_same<decltype(dummy), int64>::value, "dummy");
+                return i + 1;
+            },
+            [] GKO_KERNEL(auto i, auto j) { return i + j; },
+            [] GKO_KERNEL(auto j) { return j * 2; }, init, output.get_data(),
+            size, output, move_only_val);
+    };
 
-    gko::kernels::EXEC_NAMESPACE::run_kernel_reduction(
-        exec,
-        [] GKO_KERNEL(auto i, auto a, auto dummy) {
-            static_assert(is_same<decltype(i), int64>::value, "index");
-            static_assert(is_same<decltype(a), int64*>::value, "value");
-            static_assert(is_same<decltype(dummy), int64>::value, "dummy");
-            return i + 1;
-        },
-        [] GKO_KERNEL(auto i, auto j) { return i + j; },
-        [] GKO_KERNEL(auto j) { return j * 2; }, int64{}, output.get_data(),
-        size_type{100000}, output, move_only_val);
+    {
+        SCOPED_TRACE("Size 0");
+        run_reduction(int64{1}, size_type{0});
 
-    // 2 * sum i=0...99999 (i+1)
-    ASSERT_EQ(exec->copy_val_to_host(output.get_const_data()), 10000100000LL);
+        ASSERT_EQ(exec->copy_val_to_host(output.get_const_data()), int64{1});
+    }
 
-    gko::kernels::EXEC_NAMESPACE::run_kernel_reduction(
-        exec,
-        [] GKO_KERNEL(auto i, auto a, auto dummy) {
-            static_assert(is_same<decltype(i), int64>::value, "index");
-            static_assert(is_same<decltype(a), int64*>::value, "value");
-            static_assert(is_same<decltype(dummy), int64>::value, "dummy");
-            return i + 1;
-        },
-        [] GKO_KERNEL(auto i, auto j) {
-            static_assert(is_same<decltype(i), int64>::value, "a");
-            static_assert(is_same<decltype(i), int64>::value, "b");
-            return i + j;
-        },
-        [] GKO_KERNEL(auto j) {
-            static_assert(is_same<decltype(j), int64>::value, "value");
-            return j * 2;
-        },
-        int64{}, output.get_data(), size_type{100}, output, move_only_val);
+    {
+        SCOPED_TRACE("Size 100000");
+        run_reduction(int64{0}, size_type{100000});
 
-    // 2 * sum i=0...99 (i+1)
-    ASSERT_EQ(exec->copy_val_to_host(output.get_const_data()), 10100LL);
+        // 2 * sum i=0...99999 (i+1)
+        ASSERT_EQ(exec->copy_val_to_host(output.get_const_data()),
+                  int64{10000100000});
+    }
+
+    {
+        SCOPED_TRACE("Size 100");
+        run_reduction(int64{0}, size_type{100});
+
+        // 2 * sum i=0...99 (i+1)
+        ASSERT_EQ(exec->copy_val_to_host(output.get_const_data()),
+                  int64{10100});
+    }
 }
 
 TEST_F(KernelLaunch, Reduction1D) { run1d_reduction(exec); }
@@ -366,54 +367,70 @@ TEST_F(KernelLaunch, Reduction1D) { run1d_reduction(exec); }
 
 void run2d_reduction(std::shared_ptr<gko::EXEC_TYPE> exec)
 {
-    gko::array<int64> output{exec, 1};
+    gko::array<int64> output{exec, {-1l}};
+    auto run_reduction = [&](int64 init, gko::dim<2> size) {
+        gko::kernels::EXEC_NAMESPACE::run_kernel_reduction(
+            exec,
+            [] GKO_KERNEL(auto i, auto j, auto a, auto dummy) {
+                static_assert(is_same<decltype(i), int64>::value, "index");
+                static_assert(is_same<decltype(j), int64>::value, "index");
+                static_assert(is_same<decltype(a), int64*>::value, "value");
+                static_assert(is_same<decltype(dummy), int64>::value, "dummy");
+                return (i + 1) * (j + 1);
+            },
+            [] GKO_KERNEL(auto i, auto j) {
+                static_assert(is_same<decltype(i), int64>::value, "a");
+                static_assert(is_same<decltype(i), int64>::value, "b");
+                return i + j;
+            },
+            [] GKO_KERNEL(auto j) {
+                static_assert(is_same<decltype(j), int64>::value, "value");
+                return j * 4;
+            },
+            init, output.get_data(), size, output, move_only_val);
+    };
 
-    gko::kernels::EXEC_NAMESPACE::run_kernel_reduction(
-        exec,
-        [] GKO_KERNEL(auto i, auto j, auto a, auto dummy) {
-            static_assert(is_same<decltype(i), int64>::value, "index");
-            static_assert(is_same<decltype(j), int64>::value, "index");
-            static_assert(is_same<decltype(a), int64*>::value, "value");
-            static_assert(is_same<decltype(dummy), int64>::value, "dummy");
-            return (i + 1) * (j + 1);
-        },
-        [] GKO_KERNEL(auto i, auto j) {
-            static_assert(is_same<decltype(i), int64>::value, "a");
-            static_assert(is_same<decltype(i), int64>::value, "b");
-            return i + j;
-        },
-        [] GKO_KERNEL(auto j) {
-            static_assert(is_same<decltype(j), int64>::value, "value");
-            return j * 4;
-        },
-        int64{}, output.get_data(), gko::dim<2>{1000, 100}, output,
-        move_only_val);
+    {
+        SCOPED_TRACE("Dim 0x0");
+        run_reduction(int64{0}, gko::dim<2>{0, 0});
 
-    // 4 * sum i=0...999 sum j=0...99 of (i+1)*(j+1)
-    ASSERT_EQ(exec->copy_val_to_host(output.get_const_data()), 10110100000LL);
+        // 4 * sum i=0...999 sum j=0...99 of (i+1)*(j+1)
+        ASSERT_EQ(exec->copy_val_to_host(output.get_const_data()), int64{0});
+    }
 
-    gko::kernels::EXEC_NAMESPACE::run_kernel_reduction(
-        exec,
-        [] GKO_KERNEL(auto i, auto j, auto a, auto dummy) {
-            static_assert(is_same<decltype(i), int64>::value, "index");
-            static_assert(is_same<decltype(j), int64>::value, "index");
-            static_assert(is_same<decltype(a), int64*>::value, "value");
-            static_assert(is_same<decltype(dummy), int64>::value, "dummy");
-            return (i + 1) * (j + 1);
-        },
-        [] GKO_KERNEL(auto i, auto j) {
-            static_assert(is_same<decltype(i), int64>::value, "a");
-            static_assert(is_same<decltype(i), int64>::value, "b");
-            return i + j;
-        },
-        [] GKO_KERNEL(auto j) {
-            static_assert(is_same<decltype(j), int64>::value, "value");
-            return j * 4;
-        },
-        int64{}, output.get_data(), gko::dim<2>{10, 10}, output, move_only_val);
+    {
+        SCOPED_TRACE("Dim 0x10");
+        run_reduction(int64{0}, gko::dim<2>{0, 10});
+
+        // 4 * sum i=0...999 sum j=0...99 of (i+1)*(j+1)
+        ASSERT_EQ(exec->copy_val_to_host(output.get_const_data()), int64{0});
+    }
+
+    {
+        SCOPED_TRACE("Dim 10x0");
+        run_reduction(int64{0}, gko::dim<2>{10, 0});
+
+        // 4 * sum i=0...999 sum j=0...99 of (i+1)*(j+1)
+        ASSERT_EQ(exec->copy_val_to_host(output.get_const_data()), int64{0});
+    }
+
+    {
+        SCOPED_TRACE("Dim 1000x100");
+        run_reduction(int64{0}, gko::dim<2>{1000, 100});
+
+        // 4 * sum i=0...999 sum j=0...99 of (i+1)*(j+1)
+        ASSERT_EQ(exec->copy_val_to_host(output.get_const_data()),
+                  int64{10110100000});
+    }
 
-    // 4 * sum i=0...9 sum j=0...9 of (i+1)*(j+1)
-    ASSERT_EQ(exec->copy_val_to_host(output.get_const_data()), 12100LL);
+    {
+        SCOPED_TRACE("Dim 10x10");
+        run_reduction(int64{0}, gko::dim<2>{10, 10});
+
+        // 4 * sum i=0...9 sum j=0...9 of (i+1)*(j+1)
+        ASSERT_EQ(exec->copy_val_to_host(output.get_const_data()),
+                  int64{12100});
+    }
 }
 
 TEST_F(KernelLaunch, Reduction2D) { run2d_reduction(exec); }

From acab03e7910f81e7c260afebbe6eb9471a4bfc76 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Thu, 6 Jul 2023 12:07:52 +0200
Subject: [PATCH 021/583] fix dpcpp reduction for size=0 inputs

---
 dpcpp/base/kernel_launch_reduction.dp.hpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/dpcpp/base/kernel_launch_reduction.dp.hpp b/dpcpp/base/kernel_launch_reduction.dp.hpp
index 6cae0c72dcb..1cf7c1f774a 100644
--- a/dpcpp/base/kernel_launch_reduction.dp.hpp
+++ b/dpcpp/base/kernel_launch_reduction.dp.hpp
@@ -194,8 +194,8 @@ void run_kernel_reduction_impl(std::shared_ptr<const DpcppExecutor> exec,
     } else {
         queue->submit([&](sycl::handler& cgh) {
             generic_kernel_reduction_1d<DeviceConfig>(
-                cgh, static_cast<int64>(size), num_workgroups, fn, op, finalize,
-                identity, result, args...);
+                cgh, static_cast<int64>(size), 1, fn, op, finalize, identity,
+                result, args...);
         });
     }
 }
@@ -240,9 +240,9 @@ void run_kernel_reduction_impl(std::shared_ptr<const DpcppExecutor> exec,
         });
     } else {
         queue->submit([&](sycl::handler& cgh) {
-            generic_kernel_reduction_2d<DeviceConfig>(
-                cgh, rows, cols, num_workgroups, fn, op, finalize, identity,
-                result, args...);
+            generic_kernel_reduction_2d<DeviceConfig>(cgh, rows, cols, 1, fn,
+                                                      op, finalize, identity,
+                                                      result, args...);
         });
     }
 }

From 95f3cb87ea5504c8abd9a54dde5c9a74f2e162a2 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Fri, 7 Jul 2023 13:32:22 +0200
Subject: [PATCH 022/583] review updates:

- use correct identity value
- remove incorrect comments

Co-authored-by: Tobias Ribizel <ribizel@kit.edu>
---
 test/base/kernel_launch_generic.cpp | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/test/base/kernel_launch_generic.cpp b/test/base/kernel_launch_generic.cpp
index 4e57904a9d2..3dd1570c5f8 100644
--- a/test/base/kernel_launch_generic.cpp
+++ b/test/base/kernel_launch_generic.cpp
@@ -338,16 +338,15 @@ void run1d_reduction(std::shared_ptr<gko::EXEC_TYPE> exec)
 
     {
         SCOPED_TRACE("Size 0");
-        run_reduction(int64{1}, size_type{0});
+        run_reduction(int64{0}, size_type{0});
 
-        ASSERT_EQ(exec->copy_val_to_host(output.get_const_data()), int64{1});
+        ASSERT_EQ(exec->copy_val_to_host(output.get_const_data()), int64{0});
     }
 
     {
         SCOPED_TRACE("Size 100000");
         run_reduction(int64{0}, size_type{100000});
 
-        // 2 * sum i=0...99999 (i+1)
         ASSERT_EQ(exec->copy_val_to_host(output.get_const_data()),
                   int64{10000100000});
     }
@@ -394,7 +393,6 @@ void run2d_reduction(std::shared_ptr<gko::EXEC_TYPE> exec)
         SCOPED_TRACE("Dim 0x0");
         run_reduction(int64{0}, gko::dim<2>{0, 0});
 
-        // 4 * sum i=0...999 sum j=0...99 of (i+1)*(j+1)
         ASSERT_EQ(exec->copy_val_to_host(output.get_const_data()), int64{0});
     }
 
@@ -402,7 +400,6 @@ void run2d_reduction(std::shared_ptr<gko::EXEC_TYPE> exec)
         SCOPED_TRACE("Dim 0x10");
         run_reduction(int64{0}, gko::dim<2>{0, 10});
 
-        // 4 * sum i=0...999 sum j=0...99 of (i+1)*(j+1)
         ASSERT_EQ(exec->copy_val_to_host(output.get_const_data()), int64{0});
     }
 
@@ -410,7 +407,6 @@ void run2d_reduction(std::shared_ptr<gko::EXEC_TYPE> exec)
         SCOPED_TRACE("Dim 10x0");
         run_reduction(int64{0}, gko::dim<2>{10, 0});
 
-        // 4 * sum i=0...999 sum j=0...99 of (i+1)*(j+1)
         ASSERT_EQ(exec->copy_val_to_host(output.get_const_data()), int64{0});
     }
 

From 8c3076aa4805394339960476282b8c453561fd3b Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Sat, 15 Apr 2023 17:08:57 +0200
Subject: [PATCH 023/583] make output more easily parseable

---
 benchmark/blas/blas_common.hpp                    | 5 +++--
 benchmark/conversions/conversions.cpp             | 5 +++--
 benchmark/matrix_statistics/matrix_statistics.cpp | 2 +-
 benchmark/preconditioner/preconditioner.cpp       | 5 +++--
 benchmark/solver/solver_common.hpp                | 5 +++--
 benchmark/sparse_blas/sparse_blas.cpp             | 5 +++--
 benchmark/spmv/spmv_common.hpp                    | 5 +++--
 7 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/benchmark/blas/blas_common.hpp b/benchmark/blas/blas_common.hpp
index f36b7649ffc..fe0110f82fb 100644
--- a/benchmark/blas/blas_common.hpp
+++ b/benchmark/blas/blas_common.hpp
@@ -509,7 +509,8 @@ void apply_blas(const char* operation_name, std::shared_ptr<gko::Executor> exec,
             add_or_set_member(test_case["blas"][operation_name], "error",
                               msg_value, allocator);
         }
-        std::cerr << "Error when processing test case " << test_case << "\n"
+        std::cerr << "Error when processing test case\n"
+                  << test_case << "\n"
                   << "what(): " << e.what() << std::endl;
     }
 }
@@ -546,7 +547,7 @@ void run_blas_benchmarks(std::shared_ptr<gko::Executor> exec,
                 continue;
             }
             if (do_print) {
-                std::clog << "Running test case: " << test_case << std::endl;
+                std::clog << "Running test case\n" << test_case << std::endl;
             }
             // annotate the test case
             auto test_case_range = annotate(describe(test_case));
diff --git a/benchmark/conversions/conversions.cpp b/benchmark/conversions/conversions.cpp
index ec7febf262f..b249293116b 100644
--- a/benchmark/conversions/conversions.cpp
+++ b/benchmark/conversions/conversions.cpp
@@ -103,7 +103,8 @@ void convert_matrix(const gko::LinOp* matrix_from, const char* format_to,
             add_or_set_member(test_case["conversions"][conversion_name],
                               "error", msg_value, allocator);
         }
-        std::cerr << "Error when processing test case " << test_case << "\n"
+        std::cerr << "Error when processing test case\n"
+                  << test_case << "\n"
                   << "what(): " << e.what() << std::endl;
     }
 }
@@ -150,7 +151,7 @@ int main(int argc, char* argv[])
         }
         auto& conversion_case = test_case["conversions"];
 
-        std::clog << "Running test case: " << test_case << std::endl;
+        std::clog << "Running test case\n" << test_case << std::endl;
         gko::matrix_data<etype, itype> data;
         try {
             data = generator.generate_matrix_data(test_case);
diff --git a/benchmark/matrix_statistics/matrix_statistics.cpp b/benchmark/matrix_statistics/matrix_statistics.cpp
index 45f21ca1e35..09cae6a7554 100644
--- a/benchmark/matrix_statistics/matrix_statistics.cpp
+++ b/benchmark/matrix_statistics/matrix_statistics.cpp
@@ -197,7 +197,7 @@ int main(int argc, char* argv[])
             }
             auto& problem = test_case["problem"];
 
-            std::clog << "Running test case: " << test_case << std::endl;
+            std::clog << "Running test case\n" << test_case << std::endl;
 
             auto matrix =
                 DefaultSystemGenerator<etype, gko::int64>::generate_matrix_data(
diff --git a/benchmark/preconditioner/preconditioner.cpp b/benchmark/preconditioner/preconditioner.cpp
index 281e64ddd76..d125b46bb34 100644
--- a/benchmark/preconditioner/preconditioner.cpp
+++ b/benchmark/preconditioner/preconditioner.cpp
@@ -248,7 +248,8 @@ void run_preconditioner(const char* precond_name,
             add_or_set_member(test_case["preconditioner"][encoded_name.c_str()],
                               "error", msg_value, allocator);
         }
-        std::cerr << "Error when processing test case " << test_case << "\n"
+        std::cerr << "Error when processing test case\n"
+                  << test_case << "\n"
                   << "what(): " << e.what() << std::endl;
     }
 }
@@ -310,7 +311,7 @@ int main(int argc, char* argv[])
                        })) {
                 continue;
             }
-            std::clog << "Running test case: " << test_case << std::endl;
+            std::clog << "Running test case\n" << test_case << std::endl;
 
             // annotate the test case
             auto test_case_range =
diff --git a/benchmark/solver/solver_common.hpp b/benchmark/solver/solver_common.hpp
index 64190f8d968..ae9ae6dc1fb 100644
--- a/benchmark/solver/solver_common.hpp
+++ b/benchmark/solver/solver_common.hpp
@@ -587,7 +587,8 @@ void solve_system(const std::string& solver_name,
             add_or_set_member(test_case["solver"][precond_solver_name], "error",
                               msg_value, allocator);
         }
-        std::cerr << "Error when processing test case " << test_case << "\n"
+        std::cerr << "Error when processing test case\n"
+                  << test_case << "\n"
                   << "what(): " << e.what() << std::endl;
     }
 }
@@ -638,7 +639,7 @@ void run_solver_benchmarks(std::shared_ptr<gko::Executor> exec,
                 annotate(system_generator.describe_config(test_case));
 
             if (do_print) {
-                std::clog << "Running test case: " << test_case << std::endl;
+                std::clog << "Running test case\n" << test_case << std::endl;
             }
 
             using Vec = typename SystemGenerator::Vec;
diff --git a/benchmark/sparse_blas/sparse_blas.cpp b/benchmark/sparse_blas/sparse_blas.cpp
index 3b0ce26db5f..cfa56ef81fe 100644
--- a/benchmark/sparse_blas/sparse_blas.cpp
+++ b/benchmark/sparse_blas/sparse_blas.cpp
@@ -145,7 +145,8 @@ void apply_sparse_blas(const char* operation_name,
             add_or_set_member(test_case[operation_name], "error", msg_value,
                               allocator);
         }
-        std::cerr << "Error when processing test case " << test_case << "\n"
+        std::cerr << "Error when processing test case\n"
+                  << test_case << "\n"
                   << "what(): " << e.what() << std::endl;
     }
 }
@@ -192,7 +193,7 @@ int main(int argc, char* argv[])
                                     allocator);
             }
             auto& sp_blas_case = test_case[benchmark_name];
-            std::clog << "Running test case: " << test_case << std::endl;
+            std::clog << "Running test case\n" << test_case << std::endl;
             auto data = generator.generate_matrix_data(test_case);
             data.ensure_row_major_order();
             std::clog << "Matrix is of size (" << data.size[0] << ", "
diff --git a/benchmark/spmv/spmv_common.hpp b/benchmark/spmv/spmv_common.hpp
index 4c40f1b9a7b..3c8d886df3b 100644
--- a/benchmark/spmv/spmv_common.hpp
+++ b/benchmark/spmv/spmv_common.hpp
@@ -146,7 +146,8 @@ void apply_spmv(const char* format_name, std::shared_ptr<gko::Executor> exec,
             add_or_set_member(test_case["spmv"][format_name], "error",
                               msg_value, allocator);
         }
-        std::cerr << "Error when processing test case " << test_case << "\n"
+        std::cerr << "Error when processing test case\n"
+                  << test_case << "\n"
                   << "what(): " << e.what() << std::endl;
     }
 }
@@ -184,7 +185,7 @@ void run_spmv_benchmark(std::shared_ptr<gko::Executor> exec,
                 continue;
             }
             if (do_print) {
-                std::clog << "Running test case: " << test_case << std::endl;
+                std::clog << "Running test case\n" << test_case << std::endl;
             }
             // annotate the test case
             auto test_case_range =

From d82282bfc827508fc0220657e6c050cdab3d1a86 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Sat, 15 Apr 2023 17:15:08 +0200
Subject: [PATCH 024/583] simplify run_all_benchmarks copy

---
 benchmark/CMakeLists.txt        | 13 +------------
 benchmark/run_all_benchmarks.sh |  0
 2 files changed, 1 insertion(+), 12 deletions(-)
 mode change 100644 => 100755 benchmark/run_all_benchmarks.sh

diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
index 040356f1666..72647928185 100644
--- a/benchmark/CMakeLists.txt
+++ b/benchmark/CMakeLists.txt
@@ -161,21 +161,10 @@ add_subdirectory(sparse_blas)
 add_subdirectory(spmv)
 add_subdirectory(tools)
 
-add_custom_target(make_run_all_benchmarks ALL)
-file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/run_all_benchmarks.sh
-    DESTINATION ${CMAKE_CURRENT_BINARY_DIR}
-    FILE_PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE
-    WORLD_READ WORLD_EXECUTE)
-
-add_custom_command(
-    TARGET make_run_all_benchmarks POST_BUILD
-    COMMAND ${CMAKE_COMMAND} -E copy
-            ${CMAKE_CURRENT_SOURCE_DIR}/run_all_benchmarks.sh
-            ${CMAKE_CURRENT_BINARY_DIR}/run_all_benchmarks.sh)
+configure_file(run_all_benchmarks.sh run_all_benchmarks.sh COPYONLY)
 
 add_custom_target(benchmark)
 add_custom_command(
     TARGET benchmark POST_BUILD
     COMMAND bash run_all_benchmarks.sh >/dev/null
-    DEPENDS make_run_all_benchmarks
     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
diff --git a/benchmark/run_all_benchmarks.sh b/benchmark/run_all_benchmarks.sh
old mode 100644
new mode 100755

From d1a82974199396f8da494a9f24cfd0ef998de54a Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Sat, 15 Apr 2023 14:25:29 +0200
Subject: [PATCH 025/583] add test framework for benchmarks

---
 .gitignore                                    |    3 +
 benchmark/CMakeLists.txt                      |    3 +
 benchmark/blas/distributed/CMakeLists.txt     |    2 +-
 benchmark/conversions/CMakeLists.txt          |    2 +-
 benchmark/solver/distributed/CMakeLists.txt   |    2 +-
 benchmark/spmv/distributed/CMakeLists.txt     |    2 +-
 benchmark/test/CMakeLists.txt                 |   28 +
 benchmark/test/blas.py                        |   25 +
 benchmark/test/conversion.py                  |   28 +
 benchmark/test/input.blas.json                |    5 +
 benchmark/test/input.distributed_mtx.json     |    7 +
 benchmark/test/input.distributed_solver.json  |   10 +
 benchmark/test/input.mtx.json                 |    6 +
 benchmark/test/input.solver.json              |    9 +
 benchmark/test/matrix_statistics.py           |   18 +
 benchmark/test/multi_vector_distributed.py    |   30 +
 benchmark/test/preconditioner.py              |   23 +
 benchmark/test/reference/blas.profile.stderr  |  130 ++
 benchmark/test/reference/blas.profile.stdout  |   29 +
 benchmark/test/reference/blas.simple.stderr   |   76 +
 benchmark/test/reference/blas.simple.stdout   |   29 +
 .../test/reference/conversion.all.stderr      |   26 +
 .../test/reference/conversion.all.stdout      |   77 +
 .../test/reference/conversion.profile.stderr  |  153 ++
 .../test/reference/conversion.profile.stdout  |   32 +
 .../test/reference/conversion.simple.stderr   |   17 +
 .../test/reference/conversion.simple.stdout   |   32 +
 .../distributed_solver.profile.stderr         | 1845 +++++++++++++++++
 .../distributed_solver.profile.stdout         |   64 +
 .../distributed_solver.simple.stderr          |   19 +
 .../distributed_solver.simple.stdout          |   65 +
 .../reference/matrix_statistics.simple.stderr |    9 +
 .../reference/matrix_statistics.simple.stdout |   38 +
 .../multi_vector_distributed.profile.stderr   |    0
 .../multi_vector_distributed.profile.stdout   |   29 +
 .../multi_vector_distributed.simple.stderr    |   86 +
 .../multi_vector_distributed.simple.stdout    |   29 +
 .../reference/preconditioner.profile.stderr   |  137 ++
 .../reference/preconditioner.profile.stdout   |   30 +
 .../reference/preconditioner.simple.stderr    |   43 +
 .../reference/preconditioner.simple.stdout    |   30 +
 .../test/reference/solver.profile.stderr      | 1336 ++++++++++++
 .../test/reference/solver.profile.stdout      |   59 +
 benchmark/test/reference/solver.simple.stderr |   18 +
 benchmark/test/reference/solver.simple.stdout |   60 +
 .../test/reference/sparse_blas.profile.stderr |  104 +
 .../test/reference/sparse_blas.profile.stdout |   26 +
 .../test/reference/sparse_blas.simple.stderr  |   38 +
 .../test/reference/sparse_blas.simple.stdout  |   26 +
 benchmark/test/reference/spmv.profile.stderr  |  178 ++
 benchmark/test/reference/spmv.profile.stdout  |   20 +
 benchmark/test/reference/spmv.simple.stderr   |   32 +
 benchmark/test/reference/spmv.simple.stdout   |   20 +
 .../reference/spmv_distributed.profile.stderr |    0
 .../reference/spmv_distributed.profile.stdout |   21 +
 .../reference/spmv_distributed.simple.stderr  |   34 +
 .../reference/spmv_distributed.simple.stdout  |   21 +
 benchmark/test/solver.py                      |   23 +
 benchmark/test/solver_distributed.py          |   24 +
 benchmark/test/sparse_blas.py                 |   23 +
 benchmark/test/spmv.py                        |   23 +
 benchmark/test/spmv_distributed.py            |   28 +
 benchmark/test/test_framework.py.in           |  120 ++
 63 files changed, 5458 insertions(+), 4 deletions(-)
 create mode 100644 benchmark/test/CMakeLists.txt
 create mode 100755 benchmark/test/blas.py
 create mode 100755 benchmark/test/conversion.py
 create mode 100644 benchmark/test/input.blas.json
 create mode 100644 benchmark/test/input.distributed_mtx.json
 create mode 100644 benchmark/test/input.distributed_solver.json
 create mode 100644 benchmark/test/input.mtx.json
 create mode 100644 benchmark/test/input.solver.json
 create mode 100755 benchmark/test/matrix_statistics.py
 create mode 100644 benchmark/test/multi_vector_distributed.py
 create mode 100755 benchmark/test/preconditioner.py
 create mode 100644 benchmark/test/reference/blas.profile.stderr
 create mode 100644 benchmark/test/reference/blas.profile.stdout
 create mode 100644 benchmark/test/reference/blas.simple.stderr
 create mode 100644 benchmark/test/reference/blas.simple.stdout
 create mode 100644 benchmark/test/reference/conversion.all.stderr
 create mode 100644 benchmark/test/reference/conversion.all.stdout
 create mode 100644 benchmark/test/reference/conversion.profile.stderr
 create mode 100644 benchmark/test/reference/conversion.profile.stdout
 create mode 100644 benchmark/test/reference/conversion.simple.stderr
 create mode 100644 benchmark/test/reference/conversion.simple.stdout
 create mode 100644 benchmark/test/reference/distributed_solver.profile.stderr
 create mode 100644 benchmark/test/reference/distributed_solver.profile.stdout
 create mode 100644 benchmark/test/reference/distributed_solver.simple.stderr
 create mode 100644 benchmark/test/reference/distributed_solver.simple.stdout
 create mode 100644 benchmark/test/reference/matrix_statistics.simple.stderr
 create mode 100644 benchmark/test/reference/matrix_statistics.simple.stdout
 create mode 100644 benchmark/test/reference/multi_vector_distributed.profile.stderr
 create mode 100644 benchmark/test/reference/multi_vector_distributed.profile.stdout
 create mode 100644 benchmark/test/reference/multi_vector_distributed.simple.stderr
 create mode 100644 benchmark/test/reference/multi_vector_distributed.simple.stdout
 create mode 100644 benchmark/test/reference/preconditioner.profile.stderr
 create mode 100644 benchmark/test/reference/preconditioner.profile.stdout
 create mode 100644 benchmark/test/reference/preconditioner.simple.stderr
 create mode 100644 benchmark/test/reference/preconditioner.simple.stdout
 create mode 100644 benchmark/test/reference/solver.profile.stderr
 create mode 100644 benchmark/test/reference/solver.profile.stdout
 create mode 100644 benchmark/test/reference/solver.simple.stderr
 create mode 100644 benchmark/test/reference/solver.simple.stdout
 create mode 100644 benchmark/test/reference/sparse_blas.profile.stderr
 create mode 100644 benchmark/test/reference/sparse_blas.profile.stdout
 create mode 100644 benchmark/test/reference/sparse_blas.simple.stderr
 create mode 100644 benchmark/test/reference/sparse_blas.simple.stdout
 create mode 100644 benchmark/test/reference/spmv.profile.stderr
 create mode 100644 benchmark/test/reference/spmv.profile.stdout
 create mode 100644 benchmark/test/reference/spmv.simple.stderr
 create mode 100644 benchmark/test/reference/spmv.simple.stdout
 create mode 100644 benchmark/test/reference/spmv_distributed.profile.stderr
 create mode 100644 benchmark/test/reference/spmv_distributed.profile.stdout
 create mode 100644 benchmark/test/reference/spmv_distributed.simple.stderr
 create mode 100644 benchmark/test/reference/spmv_distributed.simple.stdout
 create mode 100755 benchmark/test/solver.py
 create mode 100644 benchmark/test/solver_distributed.py
 create mode 100755 benchmark/test/sparse_blas.py
 create mode 100755 benchmark/test/spmv.py
 create mode 100644 benchmark/test/spmv_distributed.py
 create mode 100644 benchmark/test/test_framework.py.in

diff --git a/.gitignore b/.gitignore
index af0a88ef513..827f4025a2e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,9 @@ compile_commands.json
 CTestTestfile.cmake
 build
 
+### Python
+__pycache__
+
 ### IDE
 # Clion
 .idea
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
index 72647928185..434474fd336 100644
--- a/benchmark/CMakeLists.txt
+++ b/benchmark/CMakeLists.txt
@@ -160,6 +160,9 @@ add_subdirectory(solver)
 add_subdirectory(sparse_blas)
 add_subdirectory(spmv)
 add_subdirectory(tools)
+if (GINKGO_BUILD_TESTS)
+    add_subdirectory(test)
+endif()
 
 configure_file(run_all_benchmarks.sh run_all_benchmarks.sh COPYONLY)
 
diff --git a/benchmark/blas/distributed/CMakeLists.txt b/benchmark/blas/distributed/CMakeLists.txt
index 1371294efb8..a756b9c0071 100644
--- a/benchmark/blas/distributed/CMakeLists.txt
+++ b/benchmark/blas/distributed/CMakeLists.txt
@@ -1 +1 @@
-ginkgo_add_typed_benchmark_executables(multi-vector-distributed "NO" multi_vector.cpp)
+ginkgo_add_typed_benchmark_executables(multi_vector_distributed "NO" multi_vector.cpp)
diff --git a/benchmark/conversions/CMakeLists.txt b/benchmark/conversions/CMakeLists.txt
index 0e0893c3aec..21dd363d3c0 100644
--- a/benchmark/conversions/CMakeLists.txt
+++ b/benchmark/conversions/CMakeLists.txt
@@ -1 +1 @@
-ginkgo_add_typed_benchmark_executables(conversions "NO" conversions.cpp)
+ginkgo_add_typed_benchmark_executables(conversion "NO" conversions.cpp)
diff --git a/benchmark/solver/distributed/CMakeLists.txt b/benchmark/solver/distributed/CMakeLists.txt
index ca6586f1acf..5f6acd5a06c 100644
--- a/benchmark/solver/distributed/CMakeLists.txt
+++ b/benchmark/solver/distributed/CMakeLists.txt
@@ -1 +1 @@
-ginkgo_add_typed_benchmark_executables(solver-distributed "YES" solver.cpp)
+ginkgo_add_typed_benchmark_executables(solver_distributed "YES" solver.cpp)
diff --git a/benchmark/spmv/distributed/CMakeLists.txt b/benchmark/spmv/distributed/CMakeLists.txt
index cadde3eea34..4322dd70e90 100644
--- a/benchmark/spmv/distributed/CMakeLists.txt
+++ b/benchmark/spmv/distributed/CMakeLists.txt
@@ -1 +1 @@
-ginkgo_add_typed_benchmark_executables(spmv-distributed "YES" spmv.cpp)
+ginkgo_add_typed_benchmark_executables(spmv_distributed "YES" spmv.cpp)
diff --git a/benchmark/test/CMakeLists.txt b/benchmark/test/CMakeLists.txt
new file mode 100644
index 00000000000..b3acaf3b709
--- /dev/null
+++ b/benchmark/test/CMakeLists.txt
@@ -0,0 +1,28 @@
+find_package(Python3 COMPONENTS Interpreter REQUIRED)
+function(add_benchmark_test test_name)
+    configure_file(${test_name}.py ${test_name}.py COPYONLY)
+    add_test(NAME benchmark_${test_name}
+             COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.py
+             WORKING_DIRECTORY "$<TARGET_FILE_DIR:ginkgo>")
+    set(regenerate_target benchmark_test_${test_name}_regenerate)
+    add_custom_target(${regenerate_target}
+                      COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.py --generate
+                      COMMENT "Regenerating reference output for ${test_name}"
+                      WORKING_DIRECTORY "$<TARGET_FILE_DIR:ginkgo>")
+    add_dependencies(${regenerate_target} ${test_name})
+    add_dependencies(benchmark_test_regenerate ${regenerate_target})
+endfunction()
+add_custom_target(benchmark_test_regenerate)
+configure_file(test_framework.py.in test_framework.py @ONLY)
+add_benchmark_test(blas)
+add_benchmark_test(conversion)
+add_benchmark_test(matrix_statistics)
+add_benchmark_test(preconditioner)
+add_benchmark_test(solver)
+add_benchmark_test(sparse_blas)
+add_benchmark_test(spmv)
+if (GINKGO_BUILD_MPI)
+    add_benchmark_test(multi_vector_distributed)
+    add_benchmark_test(spmv_distributed)
+    add_benchmark_test(solver_distributed)
+endif()
\ No newline at end of file
diff --git a/benchmark/test/blas.py b/benchmark/test/blas.py
new file mode 100755
index 00000000000..16a423ba696
--- /dev/null
+++ b/benchmark/test/blas.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env python3
+import test_framework
+# check that all input modes work:
+# parameter
+test_framework.compare_output(["blas/blas", "-input", '[{"n": 100}]'],
+                              expected_stdout="blas.simple.stdout",
+                              expected_stderr="blas.simple.stderr")
+
+# stdin
+test_framework.compare_output(["blas/blas"],
+                              expected_stdout="blas.simple.stdout",
+                              expected_stderr="blas.simple.stderr",
+                              stdin='[{"n": 100}]')
+
+# file
+test_framework.compare_output(["blas/blas", "-input", str(test_framework.sourcepath / "input.blas.json")],
+                              expected_stdout="blas.simple.stdout",
+                              expected_stderr="blas.simple.stderr",
+                              stdin='[{"n": 100}]')
+
+# profiler annotations
+test_framework.compare_output(["blas/blas", "-input", '[{"n": 100}]', '-profile', '-profiler_hook', 'debug'],
+                              expected_stdout="blas.profile.stdout",
+                              expected_stderr="blas.profile.stderr",
+                              stdin='[{"n": 100}]')
diff --git a/benchmark/test/conversion.py b/benchmark/test/conversion.py
new file mode 100755
index 00000000000..1ef41c4a8ea
--- /dev/null
+++ b/benchmark/test/conversion.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+import test_framework
+# check that all input modes work:
+# parameter
+test_framework.compare_output(["conversion/conversion", "-input", '[{"size": 100, "stencil": "7pt"}]', "-formats", "coo,csr"],
+                              expected_stdout="conversion.simple.stdout",
+                              expected_stderr="conversion.simple.stderr")
+
+# stdin
+test_framework.compare_output(["conversion/conversion", "-formats", "coo,csr"],
+                              expected_stdout="conversion.simple.stdout",
+                              expected_stderr="conversion.simple.stderr",
+                              stdin='[{"size": 100, "stencil": "7pt"}]')
+
+# input file
+test_framework.compare_output(["conversion/conversion", "-input", str(test_framework.sourcepath / "input.mtx.json"), "-formats", "coo,csr"],
+                              expected_stdout="conversion.simple.stdout",
+                              expected_stderr="conversion.simple.stderr")
+
+# check that all conversions work
+test_framework.compare_output(["conversion/conversion", "-input", '[{"size": 100, "stencil": "7pt"}]', "-formats", "coo,csr,ell,sellp,hybrid"],
+                              expected_stdout="conversion.all.stdout",
+                              expected_stderr="conversion.all.stderr")
+
+# profiler annotations
+test_framework.compare_output(["conversion/conversion", "-input", '[{"size": 100, "stencil": "7pt"}]', "-formats", "coo,csr", '-profile', '-profiler_hook', 'debug'],
+                              expected_stdout="conversion.profile.stdout",
+                              expected_stderr="conversion.profile.stderr")
diff --git a/benchmark/test/input.blas.json b/benchmark/test/input.blas.json
new file mode 100644
index 00000000000..fe366aa6fa0
--- /dev/null
+++ b/benchmark/test/input.blas.json
@@ -0,0 +1,5 @@
+[
+    {
+        "n": 100
+    }
+]
\ No newline at end of file
diff --git a/benchmark/test/input.distributed_mtx.json b/benchmark/test/input.distributed_mtx.json
new file mode 100644
index 00000000000..aca115179e6
--- /dev/null
+++ b/benchmark/test/input.distributed_mtx.json
@@ -0,0 +1,7 @@
+[
+    {
+        "size": 100,
+        "stencil": "7pt",
+        "comm_pattern": "stencil"
+    }
+]
\ No newline at end of file
diff --git a/benchmark/test/input.distributed_solver.json b/benchmark/test/input.distributed_solver.json
new file mode 100644
index 00000000000..16efbf03fba
--- /dev/null
+++ b/benchmark/test/input.distributed_solver.json
@@ -0,0 +1,10 @@
+[
+    {
+        "size": 100,
+        "stencil": "7pt",
+        "comm_pattern": "stencil",
+        "optimal": {
+            "spmv": "csr-csr"
+        }
+    }
+]
\ No newline at end of file
diff --git a/benchmark/test/input.mtx.json b/benchmark/test/input.mtx.json
new file mode 100644
index 00000000000..fdeb10c8eee
--- /dev/null
+++ b/benchmark/test/input.mtx.json
@@ -0,0 +1,6 @@
+[
+    {
+        "size": 100,
+        "stencil": "7pt"
+    }
+]
\ No newline at end of file
diff --git a/benchmark/test/input.solver.json b/benchmark/test/input.solver.json
new file mode 100644
index 00000000000..0183700dfe8
--- /dev/null
+++ b/benchmark/test/input.solver.json
@@ -0,0 +1,9 @@
+[
+    {
+        "size": 100,
+        "stencil": "7pt",
+        "optimal": {
+            "spmv": "csr"
+        }
+    }
+]
\ No newline at end of file
diff --git a/benchmark/test/matrix_statistics.py b/benchmark/test/matrix_statistics.py
new file mode 100755
index 00000000000..d350c94fae5
--- /dev/null
+++ b/benchmark/test/matrix_statistics.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python3
+import test_framework
+# check that all input modes work:
+# parameter
+test_framework.compare_output(["matrix_statistics/matrix_statistics", "-input", '[{"size": 100, "stencil": "7pt"}]'],
+                              expected_stdout="matrix_statistics.simple.stdout",
+                              expected_stderr="matrix_statistics.simple.stderr")
+
+# stdin
+test_framework.compare_output(["matrix_statistics/matrix_statistics"],
+                              expected_stdout="matrix_statistics.simple.stdout",
+                              expected_stderr="matrix_statistics.simple.stderr",
+                              stdin='[{"size": 100, "stencil": "7pt"}]')
+
+# input file
+test_framework.compare_output(["matrix_statistics/matrix_statistics", "-input", '[{"size": 100, "stencil": "7pt"}]'],
+                              expected_stdout="matrix_statistics.simple.stdout",
+                              expected_stderr="matrix_statistics.simple.stderr")
diff --git a/benchmark/test/multi_vector_distributed.py b/benchmark/test/multi_vector_distributed.py
new file mode 100644
index 00000000000..bc039a1b9fe
--- /dev/null
+++ b/benchmark/test/multi_vector_distributed.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+import test_framework
+base_flags = ["blas/distributed/multi_vector_distributed"]
+# check that all input modes work:
+# parameter
+test_framework.compare_output_distributed(base_flags + ["-input", '[{"n": 100}]'],
+                                          expected_stdout="multi_vector_distributed.simple.stdout",
+                                          expected_stderr="multi_vector_distributed.simple.stderr",
+                                          num_procs=3)
+
+# stdin
+test_framework.compare_output_distributed(base_flags,
+                                          expected_stdout="multi_vector_distributed.simple.stdout",
+                                          expected_stderr="multi_vector_distributed.simple.stderr",
+                                          stdin='[{"n": 100}]',
+                                          num_procs=3)
+
+# file
+test_framework.compare_output_distributed(base_flags + ["-input", str(test_framework.sourcepath / "input.blas.json")],
+                                          expected_stdout="multi_vector_distributed.simple.stdout",
+                                          expected_stderr="multi_vector_distributed.simple.stderr",
+                                          stdin='[{"n": 100}]',
+                                          num_procs=3)
+
+# profiler annotations
+test_framework.compare_output_distributed(base_flags + ["-input", '[{"n": 100}]', '-profile', '-profiler_hook', 'debug'],
+                                          expected_stdout="multi_vector_distributed.profile.stdout",
+                                          expected_stderr="multi_vector_distributed.profile.stderr",
+                                          stdin='[{"n": 100}]',
+                                          num_procs=3)
diff --git a/benchmark/test/preconditioner.py b/benchmark/test/preconditioner.py
new file mode 100755
index 00000000000..67266e78324
--- /dev/null
+++ b/benchmark/test/preconditioner.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+import test_framework
+# check that all input modes work:
+# parameter
+test_framework.compare_output(["preconditioner/preconditioner", "-input", '[{"size": 100, "stencil": "7pt"}]'],
+                              expected_stdout="preconditioner.simple.stdout",
+                              expected_stderr="preconditioner.simple.stderr")
+
+# stdin
+test_framework.compare_output(["preconditioner/preconditioner"],
+                              expected_stdout="preconditioner.simple.stdout",
+                              expected_stderr="preconditioner.simple.stderr",
+                              stdin='[{"size": 100, "stencil": "7pt"}]')
+
+# input file
+test_framework.compare_output(["preconditioner/preconditioner", "-input", str(test_framework.sourcepath / "input.mtx.json")],
+                              expected_stdout="preconditioner.simple.stdout",
+                              expected_stderr="preconditioner.simple.stderr")
+
+# profiler annotations
+test_framework.compare_output(["preconditioner/preconditioner", "-input", '[{"size": 100, "stencil": "7pt"}]', '-profile', '-profiler_hook', 'debug'],
+                              expected_stdout="preconditioner.profile.stdout",
+                              expected_stderr="preconditioner.profile.stderr")
diff --git a/benchmark/test/reference/blas.profile.stderr b/benchmark/test/reference/blas.profile.stderr
new file mode 100644
index 00000000000..1fb7d5b93bc
--- /dev/null
+++ b/benchmark/test/reference/blas.profile.stderr
@@ -0,0 +1,130 @@
+This is Ginkgo 1.5.0 (develop)
+    running with core module 1.5.0 (develop)
+Running on reference(0)
+Running with 0 warm iterations and 1 running iterations
+The random seed for right hand sides is 42
+The operations are copy,axpy,scalRunning test case
+{
+    "n": 100,
+    "blas": {}
+}
+DEBUG: begin n = 100 
+DEBUG: begin copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   copy
+Current state:
+[
+    {
+        "n": 100,
+        "blas": {
+            "copy": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 1,
+                "completed": true
+            }
+        }
+    }
+]
+DEBUG: begin axpy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::add_scaled
+DEBUG: end   dense::add_scaled
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   axpy
+Current state:
+[
+    {
+        "n": 100,
+        "blas": {
+            "copy": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 1,
+                "completed": true
+            },
+            "axpy": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 1,
+                "completed": true
+            }
+        }
+    }
+]
+DEBUG: begin scal
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::scale
+DEBUG: end   dense::scale
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   scal
+Current state:
+[
+    {
+        "n": 100,
+        "blas": {
+            "copy": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 1,
+                "completed": true
+            },
+            "axpy": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 1,
+                "completed": true
+            },
+            "scal": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 1,
+                "completed": true
+            }
+        }
+    }
+]
+DEBUG: end   n = 100 
diff --git a/benchmark/test/reference/blas.profile.stdout b/benchmark/test/reference/blas.profile.stdout
new file mode 100644
index 00000000000..3a2e7e54f80
--- /dev/null
+++ b/benchmark/test/reference/blas.profile.stdout
@@ -0,0 +1,29 @@
+
+[
+    {
+        "n": 100,
+        "blas": {
+            "copy": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 1,
+                "completed": true
+            },
+            "axpy": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 1,
+                "completed": true
+            },
+            "scal": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 1,
+                "completed": true
+            }
+        }
+    }
+]
diff --git a/benchmark/test/reference/blas.simple.stderr b/benchmark/test/reference/blas.simple.stderr
new file mode 100644
index 00000000000..e9b186e1353
--- /dev/null
+++ b/benchmark/test/reference/blas.simple.stderr
@@ -0,0 +1,76 @@
+This is Ginkgo 1.5.0 (develop)
+    running with core module 1.5.0 (develop)
+Running on reference(0)
+Running with 2 warm iterations and 10 running iterations
+The random seed for right hand sides is 42
+The operations are copy,axpy,scalRunning test case
+{
+    "n": 100,
+    "blas": {}
+}
+Current state:
+[
+    {
+        "n": 100,
+        "blas": {
+            "copy": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        }
+    }
+]
+Current state:
+[
+    {
+        "n": 100,
+        "blas": {
+            "copy": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "axpy": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        }
+    }
+]
+Current state:
+[
+    {
+        "n": 100,
+        "blas": {
+            "copy": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "axpy": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "scal": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        }
+    }
+]
diff --git a/benchmark/test/reference/blas.simple.stdout b/benchmark/test/reference/blas.simple.stdout
new file mode 100644
index 00000000000..08e692727fe
--- /dev/null
+++ b/benchmark/test/reference/blas.simple.stdout
@@ -0,0 +1,29 @@
+
+[
+    {
+        "n": 100,
+        "blas": {
+            "copy": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "axpy": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "scal": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        }
+    }
+]
diff --git a/benchmark/test/reference/conversion.all.stderr b/benchmark/test/reference/conversion.all.stderr
new file mode 100644
index 00000000000..dbc5720527c
--- /dev/null
+++ b/benchmark/test/reference/conversion.all.stderr
@@ -0,0 +1,26 @@
+This is Ginkgo 1.5.0 (develop)
+    running with core module 1.5.0 (develop)
+Running on reference(0)
+Running with 2 warm iterations and 10 running iterations
+The random seed for right hand sides is 42
+The formats are coo,csr,ell,sellp,hybrid
+Running test case
+{
+    "size": 100,
+    "stencil": "7pt",
+    "conversion": {}
+}
+Matrix is of size (125, 125), 725
+	Running conversion: coo-read
+	Running conversion: coo-csr
+	Running conversion: csr-read
+	Running conversion: csr-coo
+	Running conversion: csr-ell
+	Running conversion: csr-sellp
+	Running conversion: csr-hybrid
+	Running conversion: ell-read
+	Running conversion: ell-csr
+	Running conversion: sellp-read
+	Running conversion: sellp-csr
+	Running conversion: hybrid-read
+	Running conversion: hybrid-csr
diff --git a/benchmark/test/reference/conversion.all.stdout b/benchmark/test/reference/conversion.all.stdout
new file mode 100644
index 00000000000..c4b657a42c4
--- /dev/null
+++ b/benchmark/test/reference/conversion.all.stdout
@@ -0,0 +1,77 @@
+
+[
+    {
+        "size": 100,
+        "stencil": "7pt",
+        "conversion": {
+            "coo-read": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-read": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-coo": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-ell": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-sellp": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-hybrid": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "ell-read": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "ell-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "sellp-read": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "sellp-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "hybrid-read": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "hybrid-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        },
+        "rows": 125,
+        "cols": 125,
+        "nonzeros": 725
+    }
+]
diff --git a/benchmark/test/reference/conversion.profile.stderr b/benchmark/test/reference/conversion.profile.stderr
new file mode 100644
index 00000000000..6fc5cde206e
--- /dev/null
+++ b/benchmark/test/reference/conversion.profile.stderr
@@ -0,0 +1,153 @@
+This is Ginkgo 1.5.0 (develop)
+    running with core module 1.5.0 (develop)
+Running on reference(0)
+Running with 0 warm iterations and 1 running iterations
+The random seed for right hand sides is 42
+The formats are coo,csr
+Running test case
+{
+    "size": 100,
+    "stencil": "7pt",
+    "conversion": {}
+}
+Matrix is of size (125, 125), 725
+DEBUG: begin stencil(100,7pt)
+	Running conversion: coo-read
+DEBUG: begin coo-read
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   coo-read
+	Running conversion: coo-csr
+DEBUG: begin coo-csr
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin copy(gko::matrix::Coo<double, int>,gko::matrix::Csr<double, int>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin components::convert_idxs_to_ptrs
+DEBUG: end   components::convert_idxs_to_ptrs
+DEBUG: end   copy(gko::matrix::Coo<double, int>,gko::matrix::Csr<double, int>)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   coo-csr
+	Running conversion: csr-read
+DEBUG: begin csr-read
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin components::convert_idxs_to_ptrs
+DEBUG: end   components::convert_idxs_to_ptrs
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   csr-read
+	Running conversion: csr-coo
+DEBUG: begin csr-coo
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin components::convert_idxs_to_ptrs
+DEBUG: end   components::convert_idxs_to_ptrs
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy(gko::matrix::Csr<double, int>,gko::matrix::Coo<double, int>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::convert_ptrs_to_idxs
+DEBUG: end   components::convert_ptrs_to_idxs
+DEBUG: end   copy(gko::matrix::Csr<double, int>,gko::matrix::Coo<double, int>)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   csr-coo
+DEBUG: end   stencil(100,7pt)
diff --git a/benchmark/test/reference/conversion.profile.stdout b/benchmark/test/reference/conversion.profile.stdout
new file mode 100644
index 00000000000..b29815f6c17
--- /dev/null
+++ b/benchmark/test/reference/conversion.profile.stdout
@@ -0,0 +1,32 @@
+
+[
+    {
+        "size": 100,
+        "stencil": "7pt",
+        "conversion": {
+            "coo-read": {
+                "time": 1.0,
+                "repetitions": 1,
+                "completed": true
+            },
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 1,
+                "completed": true
+            },
+            "csr-read": {
+                "time": 1.0,
+                "repetitions": 1,
+                "completed": true
+            },
+            "csr-coo": {
+                "time": 1.0,
+                "repetitions": 1,
+                "completed": true
+            }
+        },
+        "rows": 125,
+        "cols": 125,
+        "nonzeros": 725
+    }
+]
diff --git a/benchmark/test/reference/conversion.simple.stderr b/benchmark/test/reference/conversion.simple.stderr
new file mode 100644
index 00000000000..1e4dbc4bd51
--- /dev/null
+++ b/benchmark/test/reference/conversion.simple.stderr
@@ -0,0 +1,17 @@
+This is Ginkgo 1.5.0 (develop)
+    running with core module 1.5.0 (develop)
+Running on reference(0)
+Running with 2 warm iterations and 10 running iterations
+The random seed for right hand sides is 42
+The formats are coo,csr
+Running test case
+{
+    "size": 100,
+    "stencil": "7pt",
+    "conversion": {}
+}
+Matrix is of size (125, 125), 725
+	Running conversion: coo-read
+	Running conversion: coo-csr
+	Running conversion: csr-read
+	Running conversion: csr-coo
diff --git a/benchmark/test/reference/conversion.simple.stdout b/benchmark/test/reference/conversion.simple.stdout
new file mode 100644
index 00000000000..856f1330eea
--- /dev/null
+++ b/benchmark/test/reference/conversion.simple.stdout
@@ -0,0 +1,32 @@
+
+[
+    {
+        "size": 100,
+        "stencil": "7pt",
+        "conversion": {
+            "coo-read": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-read": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-coo": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        },
+        "rows": 125,
+        "cols": 125,
+        "nonzeros": 725
+    }
+]
diff --git a/benchmark/test/reference/distributed_solver.profile.stderr b/benchmark/test/reference/distributed_solver.profile.stderr
new file mode 100644
index 00000000000..64b09a754c3
--- /dev/null
+++ b/benchmark/test/reference/distributed_solver.profile.stderr
@@ -0,0 +1,1845 @@
+This is Ginkgo 1.5.0 (develop)
+    running with core module 1.5.0 (develop)
+Running on reference(0)
+Running with 0 warm iterations and 1 running iterations
+The random seed for right hand sides is 42
+Running cg with 1000 iterations and residual goal of 1.000000e-06
+The number of right hand sides is 1
+DEBUG: begin stencil(100,7pt,stencil)
+Running test case
+{
+    "size": 100,
+    "stencil": "7pt",
+    "comm_pattern": "stencil",
+    "optimal": {
+        "spmv": "csr-csr"
+    },
+    "solver": {}
+}
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin copy(gko::matrix::Csr<double, int>,gko::matrix::Csr<double, int>)
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: end   copy(gko::matrix::Csr<double, int>,gko::matrix::Csr<double, int>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin copy(gko::matrix::Csr<double, int>,gko::matrix::Csr<double, int>)
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: end   copy(gko::matrix::Csr<double, int>,gko::matrix::Csr<double, int>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin distributed_matrix::build_local_nonlocal
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   distributed_matrix::build_local_nonlocal
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin components::convert_idxs_to_ptrs
+DEBUG: end   components::convert_idxs_to_ptrs
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin components::convert_idxs_to_ptrs
+DEBUG: end   components::convert_idxs_to_ptrs
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill_in_matrix_data
+DEBUG: end   dense::fill_in_matrix_data
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+Matrix is of size (125, 125)
+DEBUG: begin cg
+	Running solver: cg
+DEBUG: begin none
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin generate(gko::solver::Cg<double>::Factory)
+DEBUG: begin generate(gko::matrix::IdentityFactory<double>)
+DEBUG: end   generate(gko::matrix::IdentityFactory<double>)
+DEBUG: end   generate(gko::solver::Cg<double>::Factory)
+DEBUG: begin copy(gko::matrix::Identity<double>,gko::matrix::Identity<double>)
+DEBUG: end   copy(gko::matrix::Identity<double>,gko::matrix::Identity<double>)
+DEBUG: begin apply(gko::solver::Cg<double>)
+DEBUG: begin iteration
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin cg::initialize
+DEBUG: end   cg::initialize
+DEBUG: begin advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   iteration
+DEBUG: end   apply(gko::solver::Cg<double>)
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin apply(gko::solver::Cg<double>)
+DEBUG: begin iteration
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin cg::initialize
+DEBUG: end   cg::initialize
+DEBUG: begin advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   iteration
+DEBUG: end   apply(gko::solver::Cg<double>)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin generate(gko::solver::Cg<double>::Factory)
+DEBUG: begin generate(gko::matrix::IdentityFactory<double>)
+DEBUG: end   generate(gko::matrix::IdentityFactory<double>)
+DEBUG: end   generate(gko::solver::Cg<double>::Factory)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin apply(gko::solver::Cg<double>)
+DEBUG: begin iteration
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin cg::initialize
+DEBUG: end   cg::initialize
+DEBUG: begin advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   iteration
+DEBUG: end   apply(gko::solver::Cg<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   none
+DEBUG: end   cg
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   stencil(100,7pt,stencil)
diff --git a/benchmark/test/reference/distributed_solver.profile.stdout b/benchmark/test/reference/distributed_solver.profile.stdout
new file mode 100644
index 00000000000..16dc6741930
--- /dev/null
+++ b/benchmark/test/reference/distributed_solver.profile.stdout
@@ -0,0 +1,64 @@
+
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "comm_pattern": "stencil",
+        "optimal": {
+            "spmv": "csr-csr"
+        },
+        "solver": {
+            "cg": {
+                "recurrent_residuals": [],
+                "true_residuals": [],
+                "implicit_residuals": [],
+                "iteration_timestamps": [],
+                "rhs_norm": 1.0,
+                "generate": {
+                    "components": {
+                        "generate(gko::solver::Cg<double>::Factory)": 1.0,
+                        "generate(gko::matrix::IdentityFactory<double>)": 1.0,
+                        "overhead": 1.0
+                    },
+                    "time": 1.0
+                },
+                "apply": {
+                    "components": {
+                        "apply(gko::solver::Cg<double>)": 1.0,
+                        "iteration": 1.0,
+                        "allocate": 1.0,
+                        "dense::fill": 1.0,
+                        "cg::initialize": 1.0,
+                        "advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)": 1.0,
+                        "dense::row_gather": 1.0,
+                        "advanced_apply(gko::matrix::Csr<double, int>)": 1.0,
+                        "csr::advanced_spmv": 1.0,
+                        "dense::compute_squared_norm2": 1.0,
+                        "dense::compute_sqrt": 1.0,
+                        "apply(gko::matrix::Identity<double>)": 1.0,
+                        "copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)": 1.0,
+                        "dense::copy": 1.0,
+                        "dense::compute_conj_dot_dispatch": 1.0,
+                        "check(gko::stop::Combined)": 1.0,
+                        "check(gko::stop::ResidualNorm<double>)": 1.0,
+                        "residual_norm::residual_norm": 1.0,
+                        "check(gko::stop::Iteration)": 1.0,
+                        "cg::step_1": 1.0,
+                        "apply(gko::experimental::distributed::Matrix<double, int, long>)": 1.0,
+                        "apply(gko::matrix::Csr<double, int>)": 1.0,
+                        "csr::spmv": 1.0,
+                        "cg::step_2": 1.0,
+                        "free": 1.0,
+                        "overhead": 1.0
+                    },
+                    "iterations": 7,
+                    "time": 1.0
+                },
+                "preconditioner": {},
+                "residual_norm": 1.0,
+                "repetitions": 1,
+                "completed": true
+            }
+        }
+    }
+]
diff --git a/benchmark/test/reference/distributed_solver.simple.stderr b/benchmark/test/reference/distributed_solver.simple.stderr
new file mode 100644
index 00000000000..7800bb0b97e
--- /dev/null
+++ b/benchmark/test/reference/distributed_solver.simple.stderr
@@ -0,0 +1,19 @@
+This is Ginkgo 1.5.0 (develop)
+    running with core module 1.5.0 (develop)
+Running on reference(0)
+Running with 2 warm iterations and 1 running iterations
+The random seed for right hand sides is 42
+Running cg with 1000 iterations and residual goal of 1.000000e-06
+The number of right hand sides is 1
+Running test case
+{
+    "size": 100,
+    "stencil": "7pt",
+    "comm_pattern": "stencil",
+    "optimal": {
+        "spmv": "csr-csr"
+    },
+    "solver": {}
+}
+Matrix is of size (125, 125)
+	Running solver: cg
diff --git a/benchmark/test/reference/distributed_solver.simple.stdout b/benchmark/test/reference/distributed_solver.simple.stdout
new file mode 100644
index 00000000000..96ef102f8b8
--- /dev/null
+++ b/benchmark/test/reference/distributed_solver.simple.stdout
@@ -0,0 +1,65 @@
+
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "comm_pattern": "stencil",
+        "optimal": {
+            "spmv": "csr-csr"
+        },
+        "solver": {
+            "cg": {
+                "recurrent_residuals": [],
+                "true_residuals": [],
+                "implicit_residuals": [],
+                "iteration_timestamps": [],
+                "rhs_norm": 1.0,
+                "generate": {
+                    "components": {
+                        "generate(gko::solver::Cg<double>::Factory)": 1.0,
+                        "generate(gko::matrix::IdentityFactory<double>)": 1.0,
+                        "free": 1.0,
+                        "overhead": 1.0
+                    },
+                    "time": 1.0
+                },
+                "apply": {
+                    "components": {
+                        "apply(gko::solver::Cg<double>)": 1.0,
+                        "iteration": 1.0,
+                        "allocate": 1.0,
+                        "dense::fill": 1.0,
+                        "cg::initialize": 1.0,
+                        "advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)": 1.0,
+                        "dense::row_gather": 1.0,
+                        "advanced_apply(gko::matrix::Csr<double, int>)": 1.0,
+                        "csr::advanced_spmv": 1.0,
+                        "dense::compute_squared_norm2": 1.0,
+                        "dense::compute_sqrt": 1.0,
+                        "apply(gko::matrix::Identity<double>)": 1.0,
+                        "copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)": 1.0,
+                        "dense::copy": 1.0,
+                        "dense::compute_conj_dot_dispatch": 1.0,
+                        "check(gko::stop::Combined)": 1.0,
+                        "check(gko::stop::ResidualNorm<double>)": 1.0,
+                        "residual_norm::residual_norm": 1.0,
+                        "check(gko::stop::Iteration)": 1.0,
+                        "cg::step_1": 1.0,
+                        "apply(gko::experimental::distributed::Matrix<double, int, long>)": 1.0,
+                        "apply(gko::matrix::Csr<double, int>)": 1.0,
+                        "csr::spmv": 1.0,
+                        "cg::step_2": 1.0,
+                        "free": 1.0,
+                        "overhead": 1.0
+                    },
+                    "iterations": 7,
+                    "time": 1.0
+                },
+                "preconditioner": {},
+                "residual_norm": 1.0,
+                "repetitions": 1,
+                "completed": true
+            }
+        }
+    }
+]
diff --git a/benchmark/test/reference/matrix_statistics.simple.stderr b/benchmark/test/reference/matrix_statistics.simple.stderr
new file mode 100644
index 00000000000..e77cd5d413a
--- /dev/null
+++ b/benchmark/test/reference/matrix_statistics.simple.stderr
@@ -0,0 +1,9 @@
+This is Ginkgo 1.5.0 (develop)
+    running with core module 1.5.0 (develop)
+Running test case
+{
+    "size": 100,
+    "stencil": "7pt",
+    "problem": {}
+}
+Matrix is of size (125, 125)
diff --git a/benchmark/test/reference/matrix_statistics.simple.stdout b/benchmark/test/reference/matrix_statistics.simple.stdout
new file mode 100644
index 00000000000..4470784e7c5
--- /dev/null
+++ b/benchmark/test/reference/matrix_statistics.simple.stdout
@@ -0,0 +1,38 @@
+
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "problem": {
+            "rows": 125,
+            "columns": 125,
+            "nonzeros": 725,
+            "row_distribution": {
+                "min": 4,
+                "q1": 5.0,
+                "median": 6.0,
+                "q3": 6.0,
+                "max": 7,
+                "mean": 5.8,
+                "variance": 0.7199999999999992,
+                "skewness": -0.23570226039551892,
+                "kurtosis": 2.388888888888889,
+                "hyperskewness": -1.741577812922432,
+                "hyperflatness": 7.762345679012379
+            },
+            "col_distribution": {
+                "min": 4,
+                "q1": 5.0,
+                "median": 6.0,
+                "q3": 6.0,
+                "max": 7,
+                "mean": 5.8,
+                "variance": 0.7199999999999992,
+                "skewness": -0.23570226039551892,
+                "kurtosis": 2.388888888888889,
+                "hyperskewness": -1.741577812922432,
+                "hyperflatness": 7.762345679012379
+            }
+        }
+    }
+]
diff --git a/benchmark/test/reference/multi_vector_distributed.profile.stderr b/benchmark/test/reference/multi_vector_distributed.profile.stderr
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/benchmark/test/reference/multi_vector_distributed.profile.stdout b/benchmark/test/reference/multi_vector_distributed.profile.stdout
new file mode 100644
index 00000000000..3a2e7e54f80
--- /dev/null
+++ b/benchmark/test/reference/multi_vector_distributed.profile.stdout
@@ -0,0 +1,29 @@
+
+[
+    {
+        "n": 100,
+        "blas": {
+            "copy": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 1,
+                "completed": true
+            },
+            "axpy": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 1,
+                "completed": true
+            },
+            "scal": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 1,
+                "completed": true
+            }
+        }
+    }
+]
diff --git a/benchmark/test/reference/multi_vector_distributed.simple.stderr b/benchmark/test/reference/multi_vector_distributed.simple.stderr
new file mode 100644
index 00000000000..23f3554e9c4
--- /dev/null
+++ b/benchmark/test/reference/multi_vector_distributed.simple.stderr
@@ -0,0 +1,86 @@
+This is Ginkgo 1.5.0 (develop)
+    running with core module 1.5.0 (develop)
+Running on reference(0)
+Running with 2 warm iterations and 10 running iterations
+The random seed for right hand sides is 42
+The operations are copy,axpy,scalThis is Ginkgo 1.5.0 (develop)
+    running with core module 1.5.0 (develop)
+Running on reference(0)
+Running with 2 warm iterations and 10 running iterations
+The random seed for right hand sides is 42
+The operations are copy,axpy,scalThis is Ginkgo 1.5.0 (develop)
+    running with core module 1.5.0 (develop)
+Running on reference(0)
+Running with 2 warm iterations and 10 running iterations
+The random seed for right hand sides is 42
+The operations are copy,axpy,scalRunning test case
+{
+    "n": 100,
+    "blas": {}
+}
+Current state:
+[
+    {
+        "n": 100,
+        "blas": {
+            "copy": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        }
+    }
+]
+Current state:
+[
+    {
+        "n": 100,
+        "blas": {
+            "copy": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "axpy": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        }
+    }
+]
+Current state:
+[
+    {
+        "n": 100,
+        "blas": {
+            "copy": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "axpy": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "scal": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        }
+    }
+]
diff --git a/benchmark/test/reference/multi_vector_distributed.simple.stdout b/benchmark/test/reference/multi_vector_distributed.simple.stdout
new file mode 100644
index 00000000000..08e692727fe
--- /dev/null
+++ b/benchmark/test/reference/multi_vector_distributed.simple.stdout
@@ -0,0 +1,29 @@
+
+[
+    {
+        "n": 100,
+        "blas": {
+            "copy": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "axpy": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "scal": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        }
+    }
+]
diff --git a/benchmark/test/reference/preconditioner.profile.stderr b/benchmark/test/reference/preconditioner.profile.stderr
new file mode 100644
index 00000000000..97341459e69
--- /dev/null
+++ b/benchmark/test/reference/preconditioner.profile.stderr
@@ -0,0 +1,137 @@
+This is Ginkgo 1.5.0 (develop)
+    running with core module 1.5.0 (develop)
+Running on reference(0)
+Running with 0 warm iterations and 1 running iterations
+The random seed for right hand sides is 42
+Running with preconditioners: none
+Running test case
+{
+    "size": 100,
+    "stencil": "7pt",
+    "preconditioner": {}
+}
+DEBUG: begin stencil(100,7pt)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin components::convert_idxs_to_ptrs
+DEBUG: end   components::convert_idxs_to_ptrs
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill_in_matrix_data
+DEBUG: end   dense::fill_in_matrix_data
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill_in_matrix_data
+DEBUG: end   dense::fill_in_matrix_data
+Matrix is of size (125, 125)
+DEBUG: begin none
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin generate(gko::matrix::IdentityFactory<double>)
+DEBUG: end   generate(gko::matrix::IdentityFactory<double>)
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin generate(gko::matrix::IdentityFactory<double>)
+DEBUG: end   generate(gko::matrix::IdentityFactory<double>)
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   none
+Current state:
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "preconditioner": {
+            "none": {
+                "generate": {
+                    "components": {
+                        "generate(gko::matrix::IdentityFactory<double>)": 1.0,
+                        "overhead": 1.0
+                    },
+                    "time": 1.0,
+                    "repetitions": 1
+                },
+                "apply": {
+                    "components": {
+                        "apply(gko::matrix::Identity<double>)": 1.0,
+                        "copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)": 1.0,
+                        "dense::copy": 1.0,
+                        "overhead": 1.0
+                    },
+                    "time": 1.0,
+                    "repetitions": 1
+                },
+                "completed": true
+            }
+        }
+    }
+]
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   stencil(100,7pt)
diff --git a/benchmark/test/reference/preconditioner.profile.stdout b/benchmark/test/reference/preconditioner.profile.stdout
new file mode 100644
index 00000000000..c775fd61285
--- /dev/null
+++ b/benchmark/test/reference/preconditioner.profile.stdout
@@ -0,0 +1,30 @@
+
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "preconditioner": {
+            "none": {
+                "generate": {
+                    "components": {
+                        "generate(gko::matrix::IdentityFactory<double>)": 1.0,
+                        "overhead": 1.0
+                    },
+                    "time": 1.0,
+                    "repetitions": 1
+                },
+                "apply": {
+                    "components": {
+                        "apply(gko::matrix::Identity<double>)": 1.0,
+                        "copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)": 1.0,
+                        "dense::copy": 1.0,
+                        "overhead": 1.0
+                    },
+                    "time": 1.0,
+                    "repetitions": 1
+                },
+                "completed": true
+            }
+        }
+    }
+]
diff --git a/benchmark/test/reference/preconditioner.simple.stderr b/benchmark/test/reference/preconditioner.simple.stderr
new file mode 100644
index 00000000000..4a7ee9498d5
--- /dev/null
+++ b/benchmark/test/reference/preconditioner.simple.stderr
@@ -0,0 +1,43 @@
+This is Ginkgo 1.5.0 (develop)
+    running with core module 1.5.0 (develop)
+Running on reference(0)
+Running with 2 warm iterations and 10 running iterations
+The random seed for right hand sides is 42
+Running with preconditioners: none
+Running test case
+{
+    "size": 100,
+    "stencil": "7pt",
+    "preconditioner": {}
+}
+Matrix is of size (125, 125)
+Current state:
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "preconditioner": {
+            "none": {
+                "generate": {
+                    "components": {
+                        "generate(gko::matrix::IdentityFactory<double>)": 1.0,
+                        "overhead": 1.0
+                    },
+                    "time": 1.0,
+                    "repetitions": 10
+                },
+                "apply": {
+                    "components": {
+                        "apply(gko::matrix::Identity<double>)": 1.0,
+                        "copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)": 1.0,
+                        "dense::copy": 1.0,
+                        "overhead": 1.0
+                    },
+                    "time": 1.0,
+                    "repetitions": 10
+                },
+                "completed": true
+            }
+        }
+    }
+]
diff --git a/benchmark/test/reference/preconditioner.simple.stdout b/benchmark/test/reference/preconditioner.simple.stdout
new file mode 100644
index 00000000000..84100628d73
--- /dev/null
+++ b/benchmark/test/reference/preconditioner.simple.stdout
@@ -0,0 +1,30 @@
+
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "preconditioner": {
+            "none": {
+                "generate": {
+                    "components": {
+                        "generate(gko::matrix::IdentityFactory<double>)": 1.0,
+                        "overhead": 1.0
+                    },
+                    "time": 1.0,
+                    "repetitions": 10
+                },
+                "apply": {
+                    "components": {
+                        "apply(gko::matrix::Identity<double>)": 1.0,
+                        "copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)": 1.0,
+                        "dense::copy": 1.0,
+                        "overhead": 1.0
+                    },
+                    "time": 1.0,
+                    "repetitions": 10
+                },
+                "completed": true
+            }
+        }
+    }
+]
diff --git a/benchmark/test/reference/solver.profile.stderr b/benchmark/test/reference/solver.profile.stderr
new file mode 100644
index 00000000000..e50ab7f27b3
--- /dev/null
+++ b/benchmark/test/reference/solver.profile.stderr
@@ -0,0 +1,1336 @@
+This is Ginkgo 1.5.0 (develop)
+    running with core module 1.5.0 (develop)
+Running on reference(0)
+Running with 0 warm iterations and 1 running iterations
+The random seed for right hand sides is 42
+Running cg with 1000 iterations and residual goal of 1.000000e-06
+The number of right hand sides is 1
+DEBUG: begin stencil(100,7pt)
+Running test case
+{
+    "size": 100,
+    "stencil": "7pt",
+    "optimal": {
+        "spmv": "csr"
+    },
+    "solver": {}
+}
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin components::convert_idxs_to_ptrs
+DEBUG: end   components::convert_idxs_to_ptrs
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill_in_matrix_data
+DEBUG: end   dense::fill_in_matrix_data
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+Matrix is of size (125, 125)
+DEBUG: begin cg
+	Running solver: cg
+DEBUG: begin none
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin generate(gko::solver::Cg<double>::Factory)
+DEBUG: begin generate(gko::matrix::IdentityFactory<double>)
+DEBUG: end   generate(gko::matrix::IdentityFactory<double>)
+DEBUG: end   generate(gko::solver::Cg<double>::Factory)
+DEBUG: begin copy(gko::matrix::Identity<double>,gko::matrix::Identity<double>)
+DEBUG: end   copy(gko::matrix::Identity<double>,gko::matrix::Identity<double>)
+DEBUG: begin apply(gko::solver::Cg<double>)
+DEBUG: begin iteration
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin cg::initialize
+DEBUG: end   cg::initialize
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   iteration
+DEBUG: end   apply(gko::solver::Cg<double>)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin apply(gko::solver::Cg<double>)
+DEBUG: begin iteration
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin cg::initialize
+DEBUG: end   cg::initialize
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   iteration
+DEBUG: end   apply(gko::solver::Cg<double>)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin generate(gko::solver::Cg<double>::Factory)
+DEBUG: begin generate(gko::matrix::IdentityFactory<double>)
+DEBUG: end   generate(gko::matrix::IdentityFactory<double>)
+DEBUG: end   generate(gko::solver::Cg<double>::Factory)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin apply(gko::solver::Cg<double>)
+DEBUG: begin iteration
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin cg::initialize
+DEBUG: end   cg::initialize
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Iteration)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin cg::step_1
+DEBUG: end   cg::step_1
+DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin cg::step_2
+DEBUG: end   cg::step_2
+DEBUG: begin apply(gko::matrix::Identity<double>)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: begin dense::compute_conj_dot_dispatch
+DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: end   iteration
+DEBUG: begin iteration
+DEBUG: begin check(gko::stop::Combined)
+DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(gko::stop::ResidualNorm<double>)
+DEBUG: end   check(gko::stop::Combined)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   iteration
+DEBUG: end   apply(gko::solver::Cg<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   none
+DEBUG: end   cg
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   stencil(100,7pt)
diff --git a/benchmark/test/reference/solver.profile.stdout b/benchmark/test/reference/solver.profile.stdout
new file mode 100644
index 00000000000..a61b432ca0d
--- /dev/null
+++ b/benchmark/test/reference/solver.profile.stdout
@@ -0,0 +1,59 @@
+
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "optimal": {
+            "spmv": "csr"
+        },
+        "solver": {
+            "cg": {
+                "recurrent_residuals": [],
+                "true_residuals": [],
+                "implicit_residuals": [],
+                "iteration_timestamps": [],
+                "rhs_norm": 1.0,
+                "generate": {
+                    "components": {
+                        "generate(gko::solver::Cg<double>::Factory)": 1.0,
+                        "generate(gko::matrix::IdentityFactory<double>)": 1.0,
+                        "overhead": 1.0
+                    },
+                    "time": 1.0
+                },
+                "apply": {
+                    "components": {
+                        "apply(gko::solver::Cg<double>)": 1.0,
+                        "iteration": 1.0,
+                        "allocate": 1.0,
+                        "dense::fill": 1.0,
+                        "cg::initialize": 1.0,
+                        "advanced_apply(gko::matrix::Csr<double, int>)": 1.0,
+                        "csr::advanced_spmv": 1.0,
+                        "dense::compute_norm2_dispatch": 1.0,
+                        "apply(gko::matrix::Identity<double>)": 1.0,
+                        "copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)": 1.0,
+                        "dense::copy": 1.0,
+                        "dense::compute_conj_dot_dispatch": 1.0,
+                        "check(gko::stop::Combined)": 1.0,
+                        "check(gko::stop::ResidualNorm<double>)": 1.0,
+                        "residual_norm::residual_norm": 1.0,
+                        "check(gko::stop::Iteration)": 1.0,
+                        "cg::step_1": 1.0,
+                        "apply(gko::matrix::Csr<double, int>)": 1.0,
+                        "csr::spmv": 1.0,
+                        "cg::step_2": 1.0,
+                        "free": 1.0,
+                        "overhead": 1.0
+                    },
+                    "iterations": 7,
+                    "time": 1.0
+                },
+                "preconditioner": {},
+                "residual_norm": 1.0,
+                "repetitions": 1,
+                "completed": true
+            }
+        }
+    }
+]
diff --git a/benchmark/test/reference/solver.simple.stderr b/benchmark/test/reference/solver.simple.stderr
new file mode 100644
index 00000000000..dad85f1c921
--- /dev/null
+++ b/benchmark/test/reference/solver.simple.stderr
@@ -0,0 +1,18 @@
+This is Ginkgo 1.5.0 (develop)
+    running with core module 1.5.0 (develop)
+Running on reference(0)
+Running with 2 warm iterations and 1 running iterations
+The random seed for right hand sides is 42
+Running cg with 1000 iterations and residual goal of 1.000000e-06
+The number of right hand sides is 1
+Running test case
+{
+    "size": 100,
+    "stencil": "7pt",
+    "optimal": {
+        "spmv": "csr"
+    },
+    "solver": {}
+}
+Matrix is of size (125, 125)
+	Running solver: cg
diff --git a/benchmark/test/reference/solver.simple.stdout b/benchmark/test/reference/solver.simple.stdout
new file mode 100644
index 00000000000..2e44c73fdfa
--- /dev/null
+++ b/benchmark/test/reference/solver.simple.stdout
@@ -0,0 +1,60 @@
+
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "optimal": {
+            "spmv": "csr"
+        },
+        "solver": {
+            "cg": {
+                "recurrent_residuals": [],
+                "true_residuals": [],
+                "implicit_residuals": [],
+                "iteration_timestamps": [],
+                "rhs_norm": 1.0,
+                "generate": {
+                    "components": {
+                        "generate(gko::solver::Cg<double>::Factory)": 1.0,
+                        "generate(gko::matrix::IdentityFactory<double>)": 1.0,
+                        "free": 1.0,
+                        "overhead": 1.0
+                    },
+                    "time": 1.0
+                },
+                "apply": {
+                    "components": {
+                        "apply(gko::solver::Cg<double>)": 1.0,
+                        "iteration": 1.0,
+                        "allocate": 1.0,
+                        "dense::fill": 1.0,
+                        "cg::initialize": 1.0,
+                        "advanced_apply(gko::matrix::Csr<double, int>)": 1.0,
+                        "csr::advanced_spmv": 1.0,
+                        "dense::compute_norm2_dispatch": 1.0,
+                        "apply(gko::matrix::Identity<double>)": 1.0,
+                        "copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)": 1.0,
+                        "dense::copy": 1.0,
+                        "dense::compute_conj_dot_dispatch": 1.0,
+                        "check(gko::stop::Combined)": 1.0,
+                        "check(gko::stop::ResidualNorm<double>)": 1.0,
+                        "residual_norm::residual_norm": 1.0,
+                        "check(gko::stop::Iteration)": 1.0,
+                        "cg::step_1": 1.0,
+                        "apply(gko::matrix::Csr<double, int>)": 1.0,
+                        "csr::spmv": 1.0,
+                        "cg::step_2": 1.0,
+                        "free": 1.0,
+                        "overhead": 1.0
+                    },
+                    "iterations": 7,
+                    "time": 1.0
+                },
+                "preconditioner": {},
+                "residual_norm": 1.0,
+                "repetitions": 1,
+                "completed": true
+            }
+        }
+    }
+]
diff --git a/benchmark/test/reference/sparse_blas.profile.stderr b/benchmark/test/reference/sparse_blas.profile.stderr
new file mode 100644
index 00000000000..02dfdfdacfd
--- /dev/null
+++ b/benchmark/test/reference/sparse_blas.profile.stderr
@@ -0,0 +1,104 @@
+This is Ginkgo 1.5.0 (develop)
+    running with core module 1.5.0 (develop)
+Running on reference(0)
+Running with 0 warm iterations and 1 running iterations
+The random seed for right hand sides is 42
+The operations are transposeRunning test case
+{
+    "size": 100,
+    "stencil": "7pt",
+    "sparse_blas": {}
+}
+DEBUG: begin stencil(100,7pt)
+Matrix is of size (125, 125), 725
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin components::convert_idxs_to_ptrs
+DEBUG: end   components::convert_idxs_to_ptrs
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin transpose
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin csr::transpose
+DEBUG: end   csr::transpose
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin csr::transpose
+DEBUG: end   csr::transpose
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   transpose
+Current state:
+[
+    {
+        "size": 100,
+        "stencil": "7pt",
+        "sparse_blas": {
+            "transpose": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 1,
+                "components": {
+                    "allocate": 1.0,
+                    "components::fill_array": 1.0,
+                    "csr::transpose": 1.0,
+                    "free": 1.0,
+                    "overhead": 1.0
+                },
+                "completed": true
+            }
+        },
+        "rows": 125,
+        "cols": 125,
+        "nonzeros": 725
+    }
+]
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   stencil(100,7pt)
diff --git a/benchmark/test/reference/sparse_blas.profile.stdout b/benchmark/test/reference/sparse_blas.profile.stdout
new file mode 100644
index 00000000000..ba92c30298a
--- /dev/null
+++ b/benchmark/test/reference/sparse_blas.profile.stdout
@@ -0,0 +1,26 @@
+
+[
+    {
+        "size": 100,
+        "stencil": "7pt",
+        "sparse_blas": {
+            "transpose": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 1,
+                "components": {
+                    "allocate": 1.0,
+                    "components::fill_array": 1.0,
+                    "csr::transpose": 1.0,
+                    "free": 1.0,
+                    "overhead": 1.0
+                },
+                "completed": true
+            }
+        },
+        "rows": 125,
+        "cols": 125,
+        "nonzeros": 725
+    }
+]
diff --git a/benchmark/test/reference/sparse_blas.simple.stderr b/benchmark/test/reference/sparse_blas.simple.stderr
new file mode 100644
index 00000000000..a813994e739
--- /dev/null
+++ b/benchmark/test/reference/sparse_blas.simple.stderr
@@ -0,0 +1,38 @@
+This is Ginkgo 1.5.0 (develop)
+    running with core module 1.5.0 (develop)
+Running on reference(0)
+Running with 2 warm iterations and 10 running iterations
+The random seed for right hand sides is 42
+The operations are transposeRunning test case
+{
+    "size": 100,
+    "stencil": "7pt",
+    "sparse_blas": {}
+}
+Matrix is of size (125, 125), 725
+Current state:
+[
+    {
+        "size": 100,
+        "stencil": "7pt",
+        "sparse_blas": {
+            "transpose": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 10,
+                "components": {
+                    "allocate": 1.0,
+                    "components::fill_array": 1.0,
+                    "csr::transpose": 1.0,
+                    "free": 1.0,
+                    "overhead": 1.0
+                },
+                "completed": true
+            }
+        },
+        "rows": 125,
+        "cols": 125,
+        "nonzeros": 725
+    }
+]
diff --git a/benchmark/test/reference/sparse_blas.simple.stdout b/benchmark/test/reference/sparse_blas.simple.stdout
new file mode 100644
index 00000000000..f39300ca35b
--- /dev/null
+++ b/benchmark/test/reference/sparse_blas.simple.stdout
@@ -0,0 +1,26 @@
+
+[
+    {
+        "size": 100,
+        "stencil": "7pt",
+        "sparse_blas": {
+            "transpose": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 10,
+                "components": {
+                    "allocate": 1.0,
+                    "components::fill_array": 1.0,
+                    "csr::transpose": 1.0,
+                    "free": 1.0,
+                    "overhead": 1.0
+                },
+                "completed": true
+            }
+        },
+        "rows": 125,
+        "cols": 125,
+        "nonzeros": 725
+    }
+]
diff --git a/benchmark/test/reference/spmv.profile.stderr b/benchmark/test/reference/spmv.profile.stderr
new file mode 100644
index 00000000000..3ddabd987ad
--- /dev/null
+++ b/benchmark/test/reference/spmv.profile.stderr
@@ -0,0 +1,178 @@
+This is Ginkgo 1.5.0 (develop)
+    running with core module 1.5.0 (develop)
+Running on reference(0)
+Running with 0 warm iterations and 1 running iterations
+The random seed for right hand sides is 42
+The formats are coo
+The number of right hand sides is 1
+Running test case
+{
+    "size": 100,
+    "stencil": "7pt",
+    "spmv": {}
+}
+DEBUG: begin stencil(100,7pt)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill_in_matrix_data
+DEBUG: end   dense::fill_in_matrix_data
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill_in_matrix_data
+DEBUG: end   dense::fill_in_matrix_data
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+Matrix is of size (125, 125)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
+DEBUG: begin apply(gko::matrix::Coo<double, int>)
+DEBUG: begin coo::spmv
+DEBUG: end   coo::spmv
+DEBUG: end   apply(gko::matrix::Coo<double, int>)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin coo
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin apply(gko::matrix::Coo<double, int>)
+DEBUG: begin coo::spmv
+DEBUG: end   coo::spmv
+DEBUG: end   apply(gko::matrix::Coo<double, int>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::add_scaled
+DEBUG: end   dense::add_scaled
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin apply(gko::matrix::Coo<double, int>)
+DEBUG: begin coo::spmv
+DEBUG: end   coo::spmv
+DEBUG: end   apply(gko::matrix::Coo<double, int>)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   coo
+Current state:
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "spmv": {
+            "coo": {
+                "storage": 11600,
+                "max_relative_norm2": 1.0,
+                "time": 1.0,
+                "repetitions": 1,
+                "completed": true
+            }
+        },
+        "nnz": 725,
+        "optimal": {}
+    }
+]
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   stencil(100,7pt)
diff --git a/benchmark/test/reference/spmv.profile.stdout b/benchmark/test/reference/spmv.profile.stdout
new file mode 100644
index 00000000000..ec7309613b6
--- /dev/null
+++ b/benchmark/test/reference/spmv.profile.stdout
@@ -0,0 +1,20 @@
+
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "spmv": {
+            "coo": {
+                "storage": 11600,
+                "max_relative_norm2": 1.0,
+                "time": 1.0,
+                "repetitions": 1,
+                "completed": true
+            }
+        },
+        "nnz": 725,
+        "optimal": {
+            "spmv": "coo"
+        }
+    }
+]
diff --git a/benchmark/test/reference/spmv.simple.stderr b/benchmark/test/reference/spmv.simple.stderr
new file mode 100644
index 00000000000..8a2ebe9fe15
--- /dev/null
+++ b/benchmark/test/reference/spmv.simple.stderr
@@ -0,0 +1,32 @@
+This is Ginkgo 1.5.0 (develop)
+    running with core module 1.5.0 (develop)
+Running on reference(0)
+Running with 2 warm iterations and 10 running iterations
+The random seed for right hand sides is 42
+The formats are coo
+The number of right hand sides is 1
+Running test case
+{
+    "size": 100,
+    "stencil": "7pt",
+    "spmv": {}
+}
+Matrix is of size (125, 125)
+Current state:
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "spmv": {
+            "coo": {
+                "storage": 11600,
+                "max_relative_norm2": 1.0,
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        },
+        "nnz": 725,
+        "optimal": {}
+    }
+]
diff --git a/benchmark/test/reference/spmv.simple.stdout b/benchmark/test/reference/spmv.simple.stdout
new file mode 100644
index 00000000000..90f8903a452
--- /dev/null
+++ b/benchmark/test/reference/spmv.simple.stdout
@@ -0,0 +1,20 @@
+
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "spmv": {
+            "coo": {
+                "storage": 11600,
+                "max_relative_norm2": 1.0,
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        },
+        "nnz": 725,
+        "optimal": {
+            "spmv": "coo"
+        }
+    }
+]
diff --git a/benchmark/test/reference/spmv_distributed.profile.stderr b/benchmark/test/reference/spmv_distributed.profile.stderr
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/benchmark/test/reference/spmv_distributed.profile.stdout b/benchmark/test/reference/spmv_distributed.profile.stdout
new file mode 100644
index 00000000000..2aeeeb5b0d5
--- /dev/null
+++ b/benchmark/test/reference/spmv_distributed.profile.stdout
@@ -0,0 +1,21 @@
+
+[
+    {
+        "size": 81,
+        "stencil": "7pt",
+        "comm_pattern": "stencil",
+        "spmv": {
+            "csr-csr": {
+                "storage": 2316,
+                "max_relative_norm2": 1.0,
+                "time": 1.0,
+                "repetitions": 1,
+                "completed": true
+            }
+        },
+        "nnz": 135,
+        "optimal": {
+            "spmv": "csr-csr"
+        }
+    }
+]
diff --git a/benchmark/test/reference/spmv_distributed.simple.stderr b/benchmark/test/reference/spmv_distributed.simple.stderr
new file mode 100644
index 00000000000..57f31d44686
--- /dev/null
+++ b/benchmark/test/reference/spmv_distributed.simple.stderr
@@ -0,0 +1,34 @@
+This is Ginkgo 1.5.0 (develop)
+    running with core module 1.5.0 (develop)
+Running on reference(0)
+Running with 2 warm iterations and 10 running iterations
+The random seed for right hand sides is 42
+The formats are [csr]x[csr]
+The number of right hand sides is 1
+Running test case
+{
+    "size": 100,
+    "stencil": "7pt",
+    "comm_pattern": "stencil",
+    "spmv": {}
+}
+Matrix is of size (81, 81)
+Current state:
+[
+    {
+        "size": 81,
+        "stencil": "7pt",
+        "comm_pattern": "stencil",
+        "spmv": {
+            "csr-csr": {
+                "storage": 2316,
+                "max_relative_norm2": 1.0,
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        },
+        "nnz": 135,
+        "optimal": {}
+    }
+]
diff --git a/benchmark/test/reference/spmv_distributed.simple.stdout b/benchmark/test/reference/spmv_distributed.simple.stdout
new file mode 100644
index 00000000000..d8cd32ba834
--- /dev/null
+++ b/benchmark/test/reference/spmv_distributed.simple.stdout
@@ -0,0 +1,21 @@
+
+[
+    {
+        "size": 81,
+        "stencil": "7pt",
+        "comm_pattern": "stencil",
+        "spmv": {
+            "csr-csr": {
+                "storage": 2316,
+                "max_relative_norm2": 1.0,
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        },
+        "nnz": 135,
+        "optimal": {
+            "spmv": "csr-csr"
+        }
+    }
+]
diff --git a/benchmark/test/solver.py b/benchmark/test/solver.py
new file mode 100755
index 00000000000..afcbfde1a44
--- /dev/null
+++ b/benchmark/test/solver.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+import test_framework
+# check that all input modes work:
+# parameter
+test_framework.compare_output(["solver/solver", "-input", '[{"size": 100, "stencil": "7pt", "optimal": {"spmv": "csr"}}]'],
+                              expected_stdout="solver.simple.stdout",
+                              expected_stderr="solver.simple.stderr")
+
+# stdin
+test_framework.compare_output(["solver/solver"],
+                              expected_stdout="solver.simple.stdout",
+                              expected_stderr="solver.simple.stderr",
+                              stdin='[{"size": 100, "stencil": "7pt", "optimal": {"spmv": "csr"}}]')
+
+# input file
+test_framework.compare_output(["solver/solver", "-input", str(test_framework.sourcepath / "input.solver.json")],
+                              expected_stdout="solver.simple.stdout",
+                              expected_stderr="solver.simple.stderr")
+
+# profiler annotations
+test_framework.compare_output(["solver/solver", "-input", '[{"size": 100, "stencil": "7pt", "optimal": {"spmv": "csr"}}]', '-profile', '-profiler_hook', 'debug'],
+                              expected_stdout="solver.profile.stdout",
+                              expected_stderr="solver.profile.stderr")
diff --git a/benchmark/test/solver_distributed.py b/benchmark/test/solver_distributed.py
new file mode 100644
index 00000000000..c19e14718c2
--- /dev/null
+++ b/benchmark/test/solver_distributed.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+import test_framework
+base_flags = ["solver/distributed/solver_distributed"]
+# check that all input modes work:
+# parameter
+test_framework.compare_output(base_flags + ["-input", '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil", "optimal": {"spmv": "csr-csr"}}]'],
+                              expected_stdout="distributed_solver.simple.stdout",
+                              expected_stderr="distributed_solver.simple.stderr")
+
+# stdin
+test_framework.compare_output(base_flags,
+                              expected_stdout="distributed_solver.simple.stdout",
+                              expected_stderr="distributed_solver.simple.stderr",
+                              stdin='[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil", "optimal": {"spmv": "csr-csr"}}]')
+
+# input file
+test_framework.compare_output(base_flags + ["-input", str(test_framework.sourcepath / "input.distributed_solver.json")],
+                              expected_stdout="distributed_solver.simple.stdout",
+                              expected_stderr="distributed_solver.simple.stderr")
+
+# profiler annotations
+test_framework.compare_output(base_flags + ["-input", '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil", "optimal": {"spmv": "csr-csr"}}]', '-profile', '-profiler_hook', 'debug'],
+                              expected_stdout="distributed_solver.profile.stdout",
+                              expected_stderr="distributed_solver.profile.stderr")
diff --git a/benchmark/test/sparse_blas.py b/benchmark/test/sparse_blas.py
new file mode 100755
index 00000000000..94b3041ff96
--- /dev/null
+++ b/benchmark/test/sparse_blas.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+import test_framework
+# check that all input modes work:
+# parameter
+test_framework.compare_output(["sparse_blas/sparse_blas", "-operations", "transpose", "-input", '[{"size": 100, "stencil": "7pt"}]'],
+                              expected_stdout="sparse_blas.simple.stdout",
+                              expected_stderr="sparse_blas.simple.stderr")
+
+# stdin
+test_framework.compare_output(["sparse_blas/sparse_blas", "-operations", "transpose"],
+                              expected_stdout="sparse_blas.simple.stdout",
+                              expected_stderr="sparse_blas.simple.stderr",
+                              stdin='[{"size": 100, "stencil": "7pt"}]')
+
+# input file
+test_framework.compare_output(["sparse_blas/sparse_blas", "-operations", "transpose", "-input", str(test_framework.sourcepath / "input.mtx.json")],
+                              expected_stdout="sparse_blas.simple.stdout",
+                              expected_stderr="sparse_blas.simple.stderr")
+
+# profiler annotations (transpose has the smallest number of allocations)
+test_framework.compare_output(["sparse_blas/sparse_blas", "-operations", "transpose", "-input", '[{"size": 100, "stencil": "7pt"}]', '-profile', '-profiler_hook', 'debug'],
+                              expected_stdout="sparse_blas.profile.stdout",
+                              expected_stderr="sparse_blas.profile.stderr")
diff --git a/benchmark/test/spmv.py b/benchmark/test/spmv.py
new file mode 100755
index 00000000000..718b34a2290
--- /dev/null
+++ b/benchmark/test/spmv.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+import test_framework
+# check that all input modes work:
+# parameter
+test_framework.compare_output(["spmv/spmv", "-input", '[{"size": 100, "stencil": "7pt"}]'],
+                              expected_stdout="spmv.simple.stdout",
+                              expected_stderr="spmv.simple.stderr")
+
+# stdin
+test_framework.compare_output(["spmv/spmv"],
+                              expected_stdout="spmv.simple.stdout",
+                              expected_stderr="spmv.simple.stderr",
+                              stdin='[{"size": 100, "stencil": "7pt"}]')
+
+# input file
+test_framework.compare_output(["spmv/spmv", "-input", str(test_framework.sourcepath / "input.mtx.json")],
+                              expected_stdout="spmv.simple.stdout",
+                              expected_stderr="spmv.simple.stderr")
+
+# profiler annotations
+test_framework.compare_output(["spmv/spmv", "-input", '[{"size": 100, "stencil": "7pt"}]', '-profile', '-profiler_hook', 'debug'],
+                              expected_stdout="spmv.profile.stdout",
+                              expected_stderr="spmv.profile.stderr")
diff --git a/benchmark/test/spmv_distributed.py b/benchmark/test/spmv_distributed.py
new file mode 100644
index 00000000000..d74730d2f49
--- /dev/null
+++ b/benchmark/test/spmv_distributed.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+import test_framework
+base_flags = ["spmv/distributed/spmv_distributed"]
+# check that all input modes work:
+# parameter
+test_framework.compare_output_distributed(base_flags + ["-input", '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}]'],
+                                          expected_stdout="spmv_distributed.simple.stdout",
+                                          expected_stderr="spmv_distributed.simple.stderr",
+                                          num_procs=3)
+
+# stdin
+test_framework.compare_output_distributed(base_flags,
+                                          expected_stdout="spmv_distributed.simple.stdout",
+                                          expected_stderr="spmv_distributed.simple.stderr",
+                                          num_procs=3,
+                                          stdin='[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}]')
+
+# input file
+test_framework.compare_output_distributed(base_flags + ["-input", str(test_framework.sourcepath / "input.distributed_mtx.json")],
+                                          expected_stdout="spmv_distributed.simple.stdout",
+                                          expected_stderr="spmv_distributed.simple.stderr",
+                                          num_procs=3)
+
+# profiler annotations
+test_framework.compare_output_distributed(base_flags + ["-input", '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}]', '-profile', '-profiler_hook', 'debug'],
+                                          expected_stdout="spmv_distributed.profile.stdout",
+                                          expected_stderr="spmv_distributed.profile.stderr",
+                                          num_procs=3)
diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in
new file mode 100644
index 00000000000..2d42f3677e6
--- /dev/null
+++ b/benchmark/test/test_framework.py.in
@@ -0,0 +1,120 @@
+#!/usr/bin/env python3
+import subprocess
+import difflib
+import json
+import typing
+import re
+import pathlib
+import sys
+sourcepath = pathlib.Path("@CMAKE_CURRENT_SOURCE_DIR@")
+binpath = pathlib.Path("@PROJECT_BINARY_DIR@")
+generate = False
+if len(sys.argv) > 1 and sys.argv[1] == "--generate":
+    generate = True
+
+
+denumberify_paths = ["time", "bandwidth", "flops", "components",
+                     "residual_norm", "rhs_norm", "max_relative_norm2"]
+empty_array_paths = ["recurrent_residuals", "true_residuals",
+                     "implicit_residuals", "iteration_timestamps"]
+
+
+def sanitize_json_single(key, value, sanitize_all):
+    if key in denumberify_paths and isinstance(value, float):
+        return 1.0
+    if key in denumberify_paths and isinstance(value, typing.Dict):
+        return sanitize_json(value, True)
+    if key in empty_array_paths and isinstance(value, typing.List):
+        return []
+    return sanitize_json(value, sanitize_all)
+
+
+def sanitize_json(parsed_input, sanitize_all=False):
+    if isinstance(parsed_input, typing.Dict):
+        return {key: sanitize_json_single(key, value, sanitize_all) for key, value in parsed_input.items()}
+    elif isinstance(parsed_input, typing.List):
+        return [sanitize_json(e, sanitize_all) for e in parsed_input]
+    elif sanitize_all and isinstance(parsed_input, float):
+        return 1.0
+    else:
+        return parsed_input
+
+
+def sanitize_text(lines):
+    json_begins = [i for i, l in enumerate(lines) if l in ["[", "{"]]
+    json_ends = [i + 1 for i, l in enumerate(lines) if l in ["]", "}"]]
+    json_pairs = list(zip(json_begins, json_ends))
+    if (len(json_pairs) == 0):
+        return lines
+    assert (all(begin < end for begin, end in json_pairs))
+    nonjson_pairs = [(0, json_begins[0])] + list(zip(json_ends[:-1],
+                                                     json_begins[1:])) + [(json_ends[-1], len(lines))]
+    combined_pairs = sorted([(begin, end, False) for begin, end in nonjson_pairs] + [
+                            (begin, end, True) for begin, end in json_pairs])
+    texts = [("\n".join(lines[begin:end]), do_sanitize)
+             for begin, end, do_sanitize in combined_pairs]
+    reconstructed = [json.dumps(sanitize_json(json.loads(
+        t)), indent=4) if do_sanitize else t for t, do_sanitize in texts]
+    return "\n".join(reconstructed).split("\n")
+
+
+def determinize_text(input, denumberify_paths=[], remove_paths=[], ignore_patterns=[]):
+    lines = input.split("\n")
+    output_lines = []
+    patterns = [re.compile(pattern) for pattern in ignore_patterns]
+    for line in lines:
+        keep = True
+        for pattern in patterns:
+            if re.match(pattern, line):
+                keep = False
+                break
+        if keep:
+            output_lines.append(line)
+    return sanitize_text(output_lines)
+
+
+def compare_output(args, expected_stdout, expected_stderr, stdin="", launcher_flags=[]):
+    args[0] = binpath / "benchmark" / args[0]
+    expected_stdout = sourcepath / "reference" / expected_stdout
+    expected_stderr = sourcepath / "reference" / expected_stderr
+    result = subprocess.run(args=launcher_flags + args, stdout=subprocess.PIPE,
+                            stderr=subprocess.PIPE, input=bytes(stdin, "utf-8"))
+    print("TEST: {}".format(
+        " ".join(["'{}'".format(arg) for arg in launcher_flags + args])))
+    version_patterns = [
+        "    the .* module is",
+    ]
+    if generate:
+        open(expected_stdout, "w").write("\n".join(determinize_text(
+            result.stdout.decode())))
+        open(expected_stderr, "w").write("\n".join(determinize_text(result.stderr.decode(
+        ), ignore_patterns=version_patterns)))
+        print("GENERATED")
+        return
+    result_stdout_processed = determinize_text(
+        result.stdout.decode())
+    result_stderr_processed = determinize_text(result.stderr.decode(
+    ), ignore_patterns=version_patterns)
+    expected_stdout_processed = determinize_text(
+        open(expected_stdout).read())
+    expected_stderr_processed = determinize_text(open(expected_stderr).read(
+    ), ignore_patterns=version_patterns)
+    failed = False
+    if result_stdout_processed != expected_stdout_processed:
+        print("FAIL: stdout differs")
+        print("\n".join(difflib.unified_diff(
+            expected_stdout_processed, result_stdout_processed)))
+        failed = True
+    if result_stderr_processed != expected_stderr_processed:
+        print("FAIL: stderr differs")
+        print("\n".join(difflib.unified_diff(
+            expected_stderr_processed, result_stderr_processed)))
+        failed = True
+    if failed:
+        exit(1)
+    print("PASS")
+
+
+def compare_output_distributed(args, expected_stdout, expected_stderr, num_procs, stdin=""):
+    compare_output(args, expected_stdout, expected_stderr, stdin, [
+                   "@MPIEXEC_EXECUTABLE@", "@MPIEXEC_NUMPROC_FLAG@", str(num_procs)])

From 1a3af15487cce3dd95c000099a5407ea5740667a Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Tue, 18 Apr 2023 15:25:22 +0200
Subject: [PATCH 026/583] fix pathlib issue

---
 benchmark/test/test_framework.py.in | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in
index 2d42f3677e6..56ff9ccbbb8 100644
--- a/benchmark/test/test_framework.py.in
+++ b/benchmark/test/test_framework.py.in
@@ -74,9 +74,9 @@ def determinize_text(input, denumberify_paths=[], remove_paths=[], ignore_patter
 
 
 def compare_output(args, expected_stdout, expected_stderr, stdin="", launcher_flags=[]):
-    args[0] = binpath / "benchmark" / args[0]
-    expected_stdout = sourcepath / "reference" / expected_stdout
-    expected_stderr = sourcepath / "reference" / expected_stderr
+    args[0] = str(binpath / "benchmark" / args[0])
+    expected_stdout = str(sourcepath / "reference" / expected_stdout)
+    expected_stderr = str(sourcepath / "reference" / expected_stderr)
     result = subprocess.run(args=launcher_flags + args, stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE, input=bytes(stdin, "utf-8"))
     print("TEST: {}".format(

From e605f2b397ecc17b3e584e0819a7b41ed5ca8be6 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Wed, 19 Apr 2023 09:58:22 +0200
Subject: [PATCH 027/583] fix benchmark tests for multi-config generators

---
 benchmark/test/CMakeLists.txt              |  4 ++--
 benchmark/test/blas.py                     |  8 ++++----
 benchmark/test/conversion.py               | 10 +++++-----
 benchmark/test/matrix_statistics.py        |  6 +++---
 benchmark/test/multi_vector_distributed.py |  9 ++++-----
 benchmark/test/preconditioner.py           |  8 ++++----
 benchmark/test/solver.py                   |  8 ++++----
 benchmark/test/solver_distributed.py       |  9 ++++-----
 benchmark/test/sparse_blas.py              |  8 ++++----
 benchmark/test/spmv.py                     |  8 ++++----
 benchmark/test/spmv_distributed.py         |  9 ++++-----
 benchmark/test/test_framework.py.in        |  4 ++--
 12 files changed, 44 insertions(+), 47 deletions(-)

diff --git a/benchmark/test/CMakeLists.txt b/benchmark/test/CMakeLists.txt
index b3acaf3b709..1cd589927fa 100644
--- a/benchmark/test/CMakeLists.txt
+++ b/benchmark/test/CMakeLists.txt
@@ -2,11 +2,11 @@ find_package(Python3 COMPONENTS Interpreter REQUIRED)
 function(add_benchmark_test test_name)
     configure_file(${test_name}.py ${test_name}.py COPYONLY)
     add_test(NAME benchmark_${test_name}
-             COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.py
+             COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.py $<TARGET_FILE:${test_name}>
              WORKING_DIRECTORY "$<TARGET_FILE_DIR:ginkgo>")
     set(regenerate_target benchmark_test_${test_name}_regenerate)
     add_custom_target(${regenerate_target}
-                      COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.py --generate
+                      COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.py $<TARGET_FILE:${test_name}> --generate
                       COMMENT "Regenerating reference output for ${test_name}"
                       WORKING_DIRECTORY "$<TARGET_FILE_DIR:ginkgo>")
     add_dependencies(${regenerate_target} ${test_name})
diff --git a/benchmark/test/blas.py b/benchmark/test/blas.py
index 16a423ba696..e099718bae0 100755
--- a/benchmark/test/blas.py
+++ b/benchmark/test/blas.py
@@ -2,24 +2,24 @@
 import test_framework
 # check that all input modes work:
 # parameter
-test_framework.compare_output(["blas/blas", "-input", '[{"n": 100}]'],
+test_framework.compare_output(["-input", '[{"n": 100}]'],
                               expected_stdout="blas.simple.stdout",
                               expected_stderr="blas.simple.stderr")
 
 # stdin
-test_framework.compare_output(["blas/blas"],
+test_framework.compare_output([],
                               expected_stdout="blas.simple.stdout",
                               expected_stderr="blas.simple.stderr",
                               stdin='[{"n": 100}]')
 
 # file
-test_framework.compare_output(["blas/blas", "-input", str(test_framework.sourcepath / "input.blas.json")],
+test_framework.compare_output(["-input", str(test_framework.sourcepath / "input.blas.json")],
                               expected_stdout="blas.simple.stdout",
                               expected_stderr="blas.simple.stderr",
                               stdin='[{"n": 100}]')
 
 # profiler annotations
-test_framework.compare_output(["blas/blas", "-input", '[{"n": 100}]', '-profile', '-profiler_hook', 'debug'],
+test_framework.compare_output(["-input", '[{"n": 100}]', '-profile', '-profiler_hook', 'debug'],
                               expected_stdout="blas.profile.stdout",
                               expected_stderr="blas.profile.stderr",
                               stdin='[{"n": 100}]')
diff --git a/benchmark/test/conversion.py b/benchmark/test/conversion.py
index 1ef41c4a8ea..91e71cc9e89 100755
--- a/benchmark/test/conversion.py
+++ b/benchmark/test/conversion.py
@@ -2,27 +2,27 @@
 import test_framework
 # check that all input modes work:
 # parameter
-test_framework.compare_output(["conversion/conversion", "-input", '[{"size": 100, "stencil": "7pt"}]', "-formats", "coo,csr"],
+test_framework.compare_output(["-input", '[{"size": 100, "stencil": "7pt"}]', "-formats", "coo,csr"],
                               expected_stdout="conversion.simple.stdout",
                               expected_stderr="conversion.simple.stderr")
 
 # stdin
-test_framework.compare_output(["conversion/conversion", "-formats", "coo,csr"],
+test_framework.compare_output(["-formats", "coo,csr"],
                               expected_stdout="conversion.simple.stdout",
                               expected_stderr="conversion.simple.stderr",
                               stdin='[{"size": 100, "stencil": "7pt"}]')
 
 # input file
-test_framework.compare_output(["conversion/conversion", "-input", str(test_framework.sourcepath / "input.mtx.json"), "-formats", "coo,csr"],
+test_framework.compare_output(["-input", str(test_framework.sourcepath / "input.mtx.json"), "-formats", "coo,csr"],
                               expected_stdout="conversion.simple.stdout",
                               expected_stderr="conversion.simple.stderr")
 
 # check that all conversions work
-test_framework.compare_output(["conversion/conversion", "-input", '[{"size": 100, "stencil": "7pt"}]', "-formats", "coo,csr,ell,sellp,hybrid"],
+test_framework.compare_output(["-input", '[{"size": 100, "stencil": "7pt"}]', "-formats", "coo,csr,ell,sellp,hybrid"],
                               expected_stdout="conversion.all.stdout",
                               expected_stderr="conversion.all.stderr")
 
 # profiler annotations
-test_framework.compare_output(["conversion/conversion", "-input", '[{"size": 100, "stencil": "7pt"}]', "-formats", "coo,csr", '-profile', '-profiler_hook', 'debug'],
+test_framework.compare_output(["-input", '[{"size": 100, "stencil": "7pt"}]', "-formats", "coo,csr", '-profile', '-profiler_hook', 'debug'],
                               expected_stdout="conversion.profile.stdout",
                               expected_stderr="conversion.profile.stderr")
diff --git a/benchmark/test/matrix_statistics.py b/benchmark/test/matrix_statistics.py
index d350c94fae5..62547acfbeb 100755
--- a/benchmark/test/matrix_statistics.py
+++ b/benchmark/test/matrix_statistics.py
@@ -2,17 +2,17 @@
 import test_framework
 # check that all input modes work:
 # parameter
-test_framework.compare_output(["matrix_statistics/matrix_statistics", "-input", '[{"size": 100, "stencil": "7pt"}]'],
+test_framework.compare_output(["-input", '[{"size": 100, "stencil": "7pt"}]'],
                               expected_stdout="matrix_statistics.simple.stdout",
                               expected_stderr="matrix_statistics.simple.stderr")
 
 # stdin
-test_framework.compare_output(["matrix_statistics/matrix_statistics"],
+test_framework.compare_output([],
                               expected_stdout="matrix_statistics.simple.stdout",
                               expected_stderr="matrix_statistics.simple.stderr",
                               stdin='[{"size": 100, "stencil": "7pt"}]')
 
 # input file
-test_framework.compare_output(["matrix_statistics/matrix_statistics", "-input", '[{"size": 100, "stencil": "7pt"}]'],
+test_framework.compare_output(["-input", '[{"size": 100, "stencil": "7pt"}]'],
                               expected_stdout="matrix_statistics.simple.stdout",
                               expected_stderr="matrix_statistics.simple.stderr")
diff --git a/benchmark/test/multi_vector_distributed.py b/benchmark/test/multi_vector_distributed.py
index bc039a1b9fe..808a7c3e458 100644
--- a/benchmark/test/multi_vector_distributed.py
+++ b/benchmark/test/multi_vector_distributed.py
@@ -1,29 +1,28 @@
 #!/usr/bin/env python3
 import test_framework
-base_flags = ["blas/distributed/multi_vector_distributed"]
 # check that all input modes work:
 # parameter
-test_framework.compare_output_distributed(base_flags + ["-input", '[{"n": 100}]'],
+test_framework.compare_output_distributed(["-input", '[{"n": 100}]'],
                                           expected_stdout="multi_vector_distributed.simple.stdout",
                                           expected_stderr="multi_vector_distributed.simple.stderr",
                                           num_procs=3)
 
 # stdin
-test_framework.compare_output_distributed(base_flags,
+test_framework.compare_output_distributed([],
                                           expected_stdout="multi_vector_distributed.simple.stdout",
                                           expected_stderr="multi_vector_distributed.simple.stderr",
                                           stdin='[{"n": 100}]',
                                           num_procs=3)
 
 # file
-test_framework.compare_output_distributed(base_flags + ["-input", str(test_framework.sourcepath / "input.blas.json")],
+test_framework.compare_output_distributed(["-input", str(test_framework.sourcepath / "input.blas.json")],
                                           expected_stdout="multi_vector_distributed.simple.stdout",
                                           expected_stderr="multi_vector_distributed.simple.stderr",
                                           stdin='[{"n": 100}]',
                                           num_procs=3)
 
 # profiler annotations
-test_framework.compare_output_distributed(base_flags + ["-input", '[{"n": 100}]', '-profile', '-profiler_hook', 'debug'],
+test_framework.compare_output_distributed(["-input", '[{"n": 100}]', '-profile', '-profiler_hook', 'debug'],
                                           expected_stdout="multi_vector_distributed.profile.stdout",
                                           expected_stderr="multi_vector_distributed.profile.stderr",
                                           stdin='[{"n": 100}]',
diff --git a/benchmark/test/preconditioner.py b/benchmark/test/preconditioner.py
index 67266e78324..4a044cd25f5 100755
--- a/benchmark/test/preconditioner.py
+++ b/benchmark/test/preconditioner.py
@@ -2,22 +2,22 @@
 import test_framework
 # check that all input modes work:
 # parameter
-test_framework.compare_output(["preconditioner/preconditioner", "-input", '[{"size": 100, "stencil": "7pt"}]'],
+test_framework.compare_output(["-input", '[{"size": 100, "stencil": "7pt"}]'],
                               expected_stdout="preconditioner.simple.stdout",
                               expected_stderr="preconditioner.simple.stderr")
 
 # stdin
-test_framework.compare_output(["preconditioner/preconditioner"],
+test_framework.compare_output([],
                               expected_stdout="preconditioner.simple.stdout",
                               expected_stderr="preconditioner.simple.stderr",
                               stdin='[{"size": 100, "stencil": "7pt"}]')
 
 # input file
-test_framework.compare_output(["preconditioner/preconditioner", "-input", str(test_framework.sourcepath / "input.mtx.json")],
+test_framework.compare_output(["-input", str(test_framework.sourcepath / "input.mtx.json")],
                               expected_stdout="preconditioner.simple.stdout",
                               expected_stderr="preconditioner.simple.stderr")
 
 # profiler annotations
-test_framework.compare_output(["preconditioner/preconditioner", "-input", '[{"size": 100, "stencil": "7pt"}]', '-profile', '-profiler_hook', 'debug'],
+test_framework.compare_output(["-input", '[{"size": 100, "stencil": "7pt"}]', '-profile', '-profiler_hook', 'debug'],
                               expected_stdout="preconditioner.profile.stdout",
                               expected_stderr="preconditioner.profile.stderr")
diff --git a/benchmark/test/solver.py b/benchmark/test/solver.py
index afcbfde1a44..fd8130e0ae1 100755
--- a/benchmark/test/solver.py
+++ b/benchmark/test/solver.py
@@ -2,22 +2,22 @@
 import test_framework
 # check that all input modes work:
 # parameter
-test_framework.compare_output(["solver/solver", "-input", '[{"size": 100, "stencil": "7pt", "optimal": {"spmv": "csr"}}]'],
+test_framework.compare_output(["-input", '[{"size": 100, "stencil": "7pt", "optimal": {"spmv": "csr"}}]'],
                               expected_stdout="solver.simple.stdout",
                               expected_stderr="solver.simple.stderr")
 
 # stdin
-test_framework.compare_output(["solver/solver"],
+test_framework.compare_output([],
                               expected_stdout="solver.simple.stdout",
                               expected_stderr="solver.simple.stderr",
                               stdin='[{"size": 100, "stencil": "7pt", "optimal": {"spmv": "csr"}}]')
 
 # input file
-test_framework.compare_output(["solver/solver", "-input", str(test_framework.sourcepath / "input.solver.json")],
+test_framework.compare_output(["-input", str(test_framework.sourcepath / "input.solver.json")],
                               expected_stdout="solver.simple.stdout",
                               expected_stderr="solver.simple.stderr")
 
 # profiler annotations
-test_framework.compare_output(["solver/solver", "-input", '[{"size": 100, "stencil": "7pt", "optimal": {"spmv": "csr"}}]', '-profile', '-profiler_hook', 'debug'],
+test_framework.compare_output(["-input", '[{"size": 100, "stencil": "7pt", "optimal": {"spmv": "csr"}}]', '-profile', '-profiler_hook', 'debug'],
                               expected_stdout="solver.profile.stdout",
                               expected_stderr="solver.profile.stderr")
diff --git a/benchmark/test/solver_distributed.py b/benchmark/test/solver_distributed.py
index c19e14718c2..f8a02861e26 100644
--- a/benchmark/test/solver_distributed.py
+++ b/benchmark/test/solver_distributed.py
@@ -1,24 +1,23 @@
 #!/usr/bin/env python3
 import test_framework
-base_flags = ["solver/distributed/solver_distributed"]
 # check that all input modes work:
 # parameter
-test_framework.compare_output(base_flags + ["-input", '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil", "optimal": {"spmv": "csr-csr"}}]'],
+test_framework.compare_output(["-input", '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil", "optimal": {"spmv": "csr-csr"}}]'],
                               expected_stdout="distributed_solver.simple.stdout",
                               expected_stderr="distributed_solver.simple.stderr")
 
 # stdin
-test_framework.compare_output(base_flags,
+test_framework.compare_output([],
                               expected_stdout="distributed_solver.simple.stdout",
                               expected_stderr="distributed_solver.simple.stderr",
                               stdin='[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil", "optimal": {"spmv": "csr-csr"}}]')
 
 # input file
-test_framework.compare_output(base_flags + ["-input", str(test_framework.sourcepath / "input.distributed_solver.json")],
+test_framework.compare_output(["-input", str(test_framework.sourcepath / "input.distributed_solver.json")],
                               expected_stdout="distributed_solver.simple.stdout",
                               expected_stderr="distributed_solver.simple.stderr")
 
 # profiler annotations
-test_framework.compare_output(base_flags + ["-input", '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil", "optimal": {"spmv": "csr-csr"}}]', '-profile', '-profiler_hook', 'debug'],
+test_framework.compare_output(["-input", '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil", "optimal": {"spmv": "csr-csr"}}]', '-profile', '-profiler_hook', 'debug'],
                               expected_stdout="distributed_solver.profile.stdout",
                               expected_stderr="distributed_solver.profile.stderr")
diff --git a/benchmark/test/sparse_blas.py b/benchmark/test/sparse_blas.py
index 94b3041ff96..913aac94d07 100755
--- a/benchmark/test/sparse_blas.py
+++ b/benchmark/test/sparse_blas.py
@@ -2,22 +2,22 @@
 import test_framework
 # check that all input modes work:
 # parameter
-test_framework.compare_output(["sparse_blas/sparse_blas", "-operations", "transpose", "-input", '[{"size": 100, "stencil": "7pt"}]'],
+test_framework.compare_output(["-operations", "transpose", "-input", '[{"size": 100, "stencil": "7pt"}]'],
                               expected_stdout="sparse_blas.simple.stdout",
                               expected_stderr="sparse_blas.simple.stderr")
 
 # stdin
-test_framework.compare_output(["sparse_blas/sparse_blas", "-operations", "transpose"],
+test_framework.compare_output(["-operations", "transpose"],
                               expected_stdout="sparse_blas.simple.stdout",
                               expected_stderr="sparse_blas.simple.stderr",
                               stdin='[{"size": 100, "stencil": "7pt"}]')
 
 # input file
-test_framework.compare_output(["sparse_blas/sparse_blas", "-operations", "transpose", "-input", str(test_framework.sourcepath / "input.mtx.json")],
+test_framework.compare_output(["-operations", "transpose", "-input", str(test_framework.sourcepath / "input.mtx.json")],
                               expected_stdout="sparse_blas.simple.stdout",
                               expected_stderr="sparse_blas.simple.stderr")
 
 # profiler annotations (transpose has the smallest number of allocations)
-test_framework.compare_output(["sparse_blas/sparse_blas", "-operations", "transpose", "-input", '[{"size": 100, "stencil": "7pt"}]', '-profile', '-profiler_hook', 'debug'],
+test_framework.compare_output(["-operations", "transpose", "-input", '[{"size": 100, "stencil": "7pt"}]', '-profile', '-profiler_hook', 'debug'],
                               expected_stdout="sparse_blas.profile.stdout",
                               expected_stderr="sparse_blas.profile.stderr")
diff --git a/benchmark/test/spmv.py b/benchmark/test/spmv.py
index 718b34a2290..d3f3015b9dd 100755
--- a/benchmark/test/spmv.py
+++ b/benchmark/test/spmv.py
@@ -2,22 +2,22 @@
 import test_framework
 # check that all input modes work:
 # parameter
-test_framework.compare_output(["spmv/spmv", "-input", '[{"size": 100, "stencil": "7pt"}]'],
+test_framework.compare_output(["-input", '[{"size": 100, "stencil": "7pt"}]'],
                               expected_stdout="spmv.simple.stdout",
                               expected_stderr="spmv.simple.stderr")
 
 # stdin
-test_framework.compare_output(["spmv/spmv"],
+test_framework.compare_output([],
                               expected_stdout="spmv.simple.stdout",
                               expected_stderr="spmv.simple.stderr",
                               stdin='[{"size": 100, "stencil": "7pt"}]')
 
 # input file
-test_framework.compare_output(["spmv/spmv", "-input", str(test_framework.sourcepath / "input.mtx.json")],
+test_framework.compare_output(["-input", str(test_framework.sourcepath / "input.mtx.json")],
                               expected_stdout="spmv.simple.stdout",
                               expected_stderr="spmv.simple.stderr")
 
 # profiler annotations
-test_framework.compare_output(["spmv/spmv", "-input", '[{"size": 100, "stencil": "7pt"}]', '-profile', '-profiler_hook', 'debug'],
+test_framework.compare_output(["-input", '[{"size": 100, "stencil": "7pt"}]', '-profile', '-profiler_hook', 'debug'],
                               expected_stdout="spmv.profile.stdout",
                               expected_stderr="spmv.profile.stderr")
diff --git a/benchmark/test/spmv_distributed.py b/benchmark/test/spmv_distributed.py
index d74730d2f49..f6aa1accbe9 100644
--- a/benchmark/test/spmv_distributed.py
+++ b/benchmark/test/spmv_distributed.py
@@ -1,28 +1,27 @@
 #!/usr/bin/env python3
 import test_framework
-base_flags = ["spmv/distributed/spmv_distributed"]
 # check that all input modes work:
 # parameter
-test_framework.compare_output_distributed(base_flags + ["-input", '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}]'],
+test_framework.compare_output_distributed(["-input", '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}]'],
                                           expected_stdout="spmv_distributed.simple.stdout",
                                           expected_stderr="spmv_distributed.simple.stderr",
                                           num_procs=3)
 
 # stdin
-test_framework.compare_output_distributed(base_flags,
+test_framework.compare_output_distributed([],
                                           expected_stdout="spmv_distributed.simple.stdout",
                                           expected_stderr="spmv_distributed.simple.stderr",
                                           num_procs=3,
                                           stdin='[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}]')
 
 # input file
-test_framework.compare_output_distributed(base_flags + ["-input", str(test_framework.sourcepath / "input.distributed_mtx.json")],
+test_framework.compare_output_distributed(["-input", str(test_framework.sourcepath / "input.distributed_mtx.json")],
                                           expected_stdout="spmv_distributed.simple.stdout",
                                           expected_stderr="spmv_distributed.simple.stderr",
                                           num_procs=3)
 
 # profiler annotations
-test_framework.compare_output_distributed(base_flags + ["-input", '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}]', '-profile', '-profiler_hook', 'debug'],
+test_framework.compare_output_distributed(["-input", '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}]', '-profile', '-profiler_hook', 'debug'],
                                           expected_stdout="spmv_distributed.profile.stdout",
                                           expected_stderr="spmv_distributed.profile.stderr",
                                           num_procs=3)
diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in
index 56ff9ccbbb8..e53a35c30a8 100644
--- a/benchmark/test/test_framework.py.in
+++ b/benchmark/test/test_framework.py.in
@@ -9,7 +9,7 @@ import sys
 sourcepath = pathlib.Path("@CMAKE_CURRENT_SOURCE_DIR@")
 binpath = pathlib.Path("@PROJECT_BINARY_DIR@")
 generate = False
-if len(sys.argv) > 1 and sys.argv[1] == "--generate":
+if len(sys.argv) > 2 and sys.argv[2] == "--generate":
     generate = True
 
 
@@ -74,7 +74,7 @@ def determinize_text(input, denumberify_paths=[], remove_paths=[], ignore_patter
 
 
 def compare_output(args, expected_stdout, expected_stderr, stdin="", launcher_flags=[]):
-    args[0] = str(binpath / "benchmark" / args[0])
+    args = [sys.argv[1]] + args
     expected_stdout = str(sourcepath / "reference" / expected_stdout)
     expected_stderr = str(sourcepath / "reference" / expected_stderr)
     result = subprocess.run(args=launcher_flags + args, stdout=subprocess.PIPE,

From 5a3f4d3aff51a74179dc64edc7fb4b775fab4dcd Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Wed, 19 Apr 2023 13:13:29 +0200
Subject: [PATCH 028/583] handle windows newlines correctly

---
 benchmark/test/test_framework.py.in | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in
index e53a35c30a8..27424cc30b6 100644
--- a/benchmark/test/test_framework.py.in
+++ b/benchmark/test/test_framework.py.in
@@ -96,8 +96,8 @@ def compare_output(args, expected_stdout, expected_stderr, stdin="", launcher_fl
     result_stderr_processed = determinize_text(result.stderr.decode(
     ), ignore_patterns=version_patterns)
     expected_stdout_processed = determinize_text(
-        open(expected_stdout).read())
-    expected_stderr_processed = determinize_text(open(expected_stderr).read(
+        open(expected_stdout, 'rU').read())
+    expected_stderr_processed = determinize_text(open(expected_stderr, 'rU').read(
     ), ignore_patterns=version_patterns)
     failed = False
     if result_stdout_processed != expected_stdout_processed:

From de15622efc78465b6acbf60a951910a67ce00d01 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Wed, 19 Apr 2023 13:16:42 +0200
Subject: [PATCH 029/583] fix SYCL warnings in output

---
 .gitlab-ci.yml       | 6 +++---
 dpcpp/get_info.cmake | 3 +--
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 9d374d81eef..85683fc100c 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -615,7 +615,7 @@ build/dpcpp/2022-1/cpu/release/static:
     BUILD_DPCPP: "ON"
     BUILD_TYPE: "Release"
     BUILD_SHARED_LIBS: "ON"
-    SYCL_DEVICE_TYPE: "CPU"
+    SYCL_DEVICE_FILTER: "CPU"
     SLURM_PARTITION: "cpu"
     SLURM_TIME: "2:00:00"
     # This job is not in exclusive mode
@@ -634,7 +634,7 @@ build/dpcpp/igpu/release/shared:
     BUILD_TYPE: "Release"
     BUILD_SHARED_LIBS: "ON"
     DPCPP_SINGLE_MODE: "ON"
-    SYCL_DEVICE_TYPE: "GPU"
+    SYCL_DEVICE_FILTER: "GPU"
 
 # TODO: Enable when debug shared library size issues are fixed
 # build/dpcpp/level_zero_igpu/debug/shared:
@@ -666,7 +666,7 @@ build/dpcpp/dgpu/release/static:
     BUILD_TYPE: "Release"
     BUILD_SHARED_LIBS: "OF"
     DPCPP_SINGLE_MODE: "ON"
-    SYCL_DEVICE_TYPE: "GPU"
+    SYCL_DEVICE_FILTER: "GPU"
 
 build/dpcpp/level_zero_dgpu/release/shared:
   extends:
diff --git a/dpcpp/get_info.cmake b/dpcpp/get_info.cmake
index 36918a3a8c6..ee9c0398f3e 100644
--- a/dpcpp/get_info.cmake
+++ b/dpcpp/get_info.cmake
@@ -3,6 +3,5 @@ ginkgo_print_module_footer(${detailed_log} "DPCPP variables:")
 ginkgo_print_variable(${detailed_log} "GINKGO_DPCPP_FLAGS")
 ginkgo_print_variable(${detailed_log} "GINKGO_DPCPP_SINGLE_MODE")
 ginkgo_print_module_footer(${detailed_log} "DPCPP environment variables:")
-ginkgo_print_env_variable(${detailed_log} "SYCL_DEVICE_TYPE")
-ginkgo_print_env_variable(${detailed_log} "SYCL_BE")
+ginkgo_print_env_variable(${detailed_log} "SYCL_DEVICE_FILTER")
 ginkgo_print_module_footer(${detailed_log} "")

From f7051d1a2c23a142c03f04637d544093a2101590 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Thu, 20 Apr 2023 11:53:19 +0200
Subject: [PATCH 030/583] strip implementation-dependent demangled typenames

---
 .../test/reference/conversion.profile.stderr  |   8 +-
 .../reference/preconditioner.profile.stderr   |  32 +-
 .../reference/preconditioner.profile.stdout   |   6 +-
 .../reference/preconditioner.simple.stdout    |   6 +-
 .../test/reference/solver.profile.stderr      | 688 +++++++++---------
 .../test/reference/solver.profile.stdout      |  15 +-
 benchmark/test/reference/solver.simple.stdout |  15 +-
 benchmark/test/reference/spmv.profile.stderr  |  32 +-
 benchmark/test/test_framework.py.in           |  28 +-
 9 files changed, 414 insertions(+), 416 deletions(-)

diff --git a/benchmark/test/reference/conversion.profile.stderr b/benchmark/test/reference/conversion.profile.stderr
index 6fc5cde206e..8ea580247d8 100644
--- a/benchmark/test/reference/conversion.profile.stderr
+++ b/benchmark/test/reference/conversion.profile.stderr
@@ -43,7 +43,7 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin copy(gko::matrix::Coo<double, int>,gko::matrix::Csr<double, int>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
@@ -58,7 +58,7 @@ DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin components::convert_idxs_to_ptrs
 DEBUG: end   components::convert_idxs_to_ptrs
-DEBUG: end   copy(gko::matrix::Coo<double, int>,gko::matrix::Csr<double, int>)
+DEBUG: end   copy(<typename>)
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
@@ -123,7 +123,7 @@ DEBUG: begin components::convert_idxs_to_ptrs
 DEBUG: end   components::convert_idxs_to_ptrs
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin copy(gko::matrix::Csr<double, int>,gko::matrix::Coo<double, int>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin copy
@@ -136,7 +136,7 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin components::convert_ptrs_to_idxs
 DEBUG: end   components::convert_ptrs_to_idxs
-DEBUG: end   copy(gko::matrix::Csr<double, int>,gko::matrix::Coo<double, int>)
+DEBUG: end   copy(<typename>)
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
diff --git a/benchmark/test/reference/preconditioner.profile.stderr b/benchmark/test/reference/preconditioner.profile.stderr
index 97341459e69..86ec044eb40 100644
--- a/benchmark/test/reference/preconditioner.profile.stderr
+++ b/benchmark/test/reference/preconditioner.profile.stderr
@@ -61,36 +61,36 @@ DEBUG: begin dense::fill_in_matrix_data
 DEBUG: end   dense::fill_in_matrix_data
 Matrix is of size (125, 125)
 DEBUG: begin none
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: begin generate(gko::matrix::IdentityFactory<double>)
-DEBUG: end   generate(gko::matrix::IdentityFactory<double>)
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin generate(<typename>)
+DEBUG: end   generate(<typename>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: begin generate(gko::matrix::IdentityFactory<double>)
-DEBUG: end   generate(gko::matrix::IdentityFactory<double>)
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin generate(<typename>)
+DEBUG: end   generate(<typename>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: end   none
diff --git a/benchmark/test/reference/preconditioner.profile.stdout b/benchmark/test/reference/preconditioner.profile.stdout
index c775fd61285..ba967989af4 100644
--- a/benchmark/test/reference/preconditioner.profile.stdout
+++ b/benchmark/test/reference/preconditioner.profile.stdout
@@ -7,7 +7,7 @@
             "none": {
                 "generate": {
                     "components": {
-                        "generate(gko::matrix::IdentityFactory<double>)": 1.0,
+                        "generate(<typename>)": 1.0,
                         "overhead": 1.0
                     },
                     "time": 1.0,
@@ -15,8 +15,8 @@
                 },
                 "apply": {
                     "components": {
-                        "apply(gko::matrix::Identity<double>)": 1.0,
-                        "copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)": 1.0,
+                        "apply(<typename>)": 1.0,
+                        "copy(<typename>)": 1.0,
                         "dense::copy": 1.0,
                         "overhead": 1.0
                     },
diff --git a/benchmark/test/reference/preconditioner.simple.stdout b/benchmark/test/reference/preconditioner.simple.stdout
index 84100628d73..c47146a72e1 100644
--- a/benchmark/test/reference/preconditioner.simple.stdout
+++ b/benchmark/test/reference/preconditioner.simple.stdout
@@ -7,7 +7,7 @@
             "none": {
                 "generate": {
                     "components": {
-                        "generate(gko::matrix::IdentityFactory<double>)": 1.0,
+                        "generate(<typename>)": 1.0,
                         "overhead": 1.0
                     },
                     "time": 1.0,
@@ -15,8 +15,8 @@
                 },
                 "apply": {
                     "components": {
-                        "apply(gko::matrix::Identity<double>)": 1.0,
-                        "copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)": 1.0,
+                        "apply(<typename>)": 1.0,
+                        "copy(<typename>)": 1.0,
                         "dense::copy": 1.0,
                         "overhead": 1.0
                     },
diff --git a/benchmark/test/reference/solver.profile.stderr b/benchmark/test/reference/solver.profile.stderr
index e50ab7f27b3..8aa04832601 100644
--- a/benchmark/test/reference/solver.profile.stderr
+++ b/benchmark/test/reference/solver.profile.stderr
@@ -55,12 +55,12 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   copy(<typename>)
 Matrix is of size (125, 125)
 DEBUG: begin cg
 	Running solver: cg
@@ -73,19 +73,19 @@ DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: begin generate(gko::solver::Cg<double>::Factory)
-DEBUG: begin generate(gko::matrix::IdentityFactory<double>)
-DEBUG: end   generate(gko::matrix::IdentityFactory<double>)
-DEBUG: end   generate(gko::solver::Cg<double>::Factory)
-DEBUG: begin copy(gko::matrix::Identity<double>,gko::matrix::Identity<double>)
-DEBUG: end   copy(gko::matrix::Identity<double>,gko::matrix::Identity<double>)
-DEBUG: begin apply(gko::solver::Cg<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin generate(<typename>)
+DEBUG: begin generate(<typename>)
+DEBUG: end   generate(<typename>)
+DEBUG: end   generate(<typename>)
+DEBUG: begin copy(<typename>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin iteration
 DEBUG: begin allocate
 DEBUG: end   allocate
@@ -115,10 +115,10 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin cg::initialize
 DEBUG: end   cg::initialize
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
@@ -131,232 +131,232 @@ DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: end   iteration
 DEBUG: begin iteration
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: end   iteration
 DEBUG: begin iteration
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: end   iteration
 DEBUG: begin iteration
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: end   iteration
 DEBUG: begin iteration
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: end   iteration
 DEBUG: begin iteration
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: end   iteration
 DEBUG: begin iteration
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: end   iteration
 DEBUG: begin iteration
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
@@ -368,16 +368,16 @@ DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: end   iteration
-DEBUG: end   apply(gko::solver::Cg<double>)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   copy(<typename>)
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin apply(gko::solver::Cg<double>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin iteration
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
@@ -385,10 +385,10 @@ DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin cg::initialize
 DEBUG: end   cg::initialize
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
@@ -401,12 +401,12 @@ DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin allocate
@@ -421,16 +421,16 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::compute_norm2_dispatch
@@ -447,32 +447,32 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: end   iteration
@@ -489,16 +489,16 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::compute_norm2_dispatch
@@ -515,32 +515,32 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: end   iteration
@@ -557,16 +557,16 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::compute_norm2_dispatch
@@ -583,32 +583,32 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: end   iteration
@@ -625,16 +625,16 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::compute_norm2_dispatch
@@ -651,32 +651,32 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: end   iteration
@@ -693,16 +693,16 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::compute_norm2_dispatch
@@ -719,32 +719,32 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: end   iteration
@@ -761,16 +761,16 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::compute_norm2_dispatch
@@ -787,32 +787,32 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: end   iteration
@@ -829,16 +829,16 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::compute_norm2_dispatch
@@ -855,32 +855,32 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: end   iteration
@@ -897,16 +897,16 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::compute_norm2_dispatch
@@ -923,14 +923,14 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
@@ -942,27 +942,27 @@ DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: end   iteration
-DEBUG: end   apply(gko::solver::Cg<double>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   copy(<typename>)
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin generate(gko::solver::Cg<double>::Factory)
-DEBUG: begin generate(gko::matrix::IdentityFactory<double>)
-DEBUG: end   generate(gko::matrix::IdentityFactory<double>)
-DEBUG: end   generate(gko::solver::Cg<double>::Factory)
+DEBUG: begin generate(<typename>)
+DEBUG: begin generate(<typename>)
+DEBUG: end   generate(<typename>)
+DEBUG: end   generate(<typename>)
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
@@ -985,7 +985,7 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin apply(gko::solver::Cg<double>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin iteration
 DEBUG: begin allocate
 DEBUG: end   allocate
@@ -1015,10 +1015,10 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin cg::initialize
 DEBUG: end   cg::initialize
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
@@ -1031,232 +1031,232 @@ DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: end   iteration
 DEBUG: begin iteration
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: end   iteration
 DEBUG: begin iteration
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: end   iteration
 DEBUG: begin iteration
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: end   iteration
 DEBUG: begin iteration
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: end   iteration
 DEBUG: begin iteration
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: end   iteration
 DEBUG: begin iteration
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: end   iteration
 DEBUG: begin iteration
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
@@ -1268,21 +1268,21 @@ DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: end   iteration
-DEBUG: end   apply(gko::solver::Cg<double>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::compute_norm2_dispatch
diff --git a/benchmark/test/reference/solver.profile.stdout b/benchmark/test/reference/solver.profile.stdout
index a61b432ca0d..f66daea1f30 100644
--- a/benchmark/test/reference/solver.profile.stdout
+++ b/benchmark/test/reference/solver.profile.stdout
@@ -15,32 +15,27 @@
                 "rhs_norm": 1.0,
                 "generate": {
                     "components": {
-                        "generate(gko::solver::Cg<double>::Factory)": 1.0,
-                        "generate(gko::matrix::IdentityFactory<double>)": 1.0,
+                        "generate(<typename>)": 1.0,
                         "overhead": 1.0
                     },
                     "time": 1.0
                 },
                 "apply": {
                     "components": {
-                        "apply(gko::solver::Cg<double>)": 1.0,
+                        "apply(<typename>)": 1.0,
                         "iteration": 1.0,
                         "allocate": 1.0,
                         "dense::fill": 1.0,
                         "cg::initialize": 1.0,
-                        "advanced_apply(gko::matrix::Csr<double, int>)": 1.0,
+                        "advanced_apply(<typename>)": 1.0,
                         "csr::advanced_spmv": 1.0,
                         "dense::compute_norm2_dispatch": 1.0,
-                        "apply(gko::matrix::Identity<double>)": 1.0,
-                        "copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)": 1.0,
+                        "copy(<typename>)": 1.0,
                         "dense::copy": 1.0,
                         "dense::compute_conj_dot_dispatch": 1.0,
-                        "check(gko::stop::Combined)": 1.0,
-                        "check(gko::stop::ResidualNorm<double>)": 1.0,
+                        "check(<typename>)": 1.0,
                         "residual_norm::residual_norm": 1.0,
-                        "check(gko::stop::Iteration)": 1.0,
                         "cg::step_1": 1.0,
-                        "apply(gko::matrix::Csr<double, int>)": 1.0,
                         "csr::spmv": 1.0,
                         "cg::step_2": 1.0,
                         "free": 1.0,
diff --git a/benchmark/test/reference/solver.simple.stdout b/benchmark/test/reference/solver.simple.stdout
index 2e44c73fdfa..c6055339d67 100644
--- a/benchmark/test/reference/solver.simple.stdout
+++ b/benchmark/test/reference/solver.simple.stdout
@@ -15,8 +15,7 @@
                 "rhs_norm": 1.0,
                 "generate": {
                     "components": {
-                        "generate(gko::solver::Cg<double>::Factory)": 1.0,
-                        "generate(gko::matrix::IdentityFactory<double>)": 1.0,
+                        "generate(<typename>)": 1.0,
                         "free": 1.0,
                         "overhead": 1.0
                     },
@@ -24,24 +23,20 @@
                 },
                 "apply": {
                     "components": {
-                        "apply(gko::solver::Cg<double>)": 1.0,
+                        "apply(<typename>)": 1.0,
                         "iteration": 1.0,
                         "allocate": 1.0,
                         "dense::fill": 1.0,
                         "cg::initialize": 1.0,
-                        "advanced_apply(gko::matrix::Csr<double, int>)": 1.0,
+                        "advanced_apply(<typename>)": 1.0,
                         "csr::advanced_spmv": 1.0,
                         "dense::compute_norm2_dispatch": 1.0,
-                        "apply(gko::matrix::Identity<double>)": 1.0,
-                        "copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)": 1.0,
+                        "copy(<typename>)": 1.0,
                         "dense::copy": 1.0,
                         "dense::compute_conj_dot_dispatch": 1.0,
-                        "check(gko::stop::Combined)": 1.0,
-                        "check(gko::stop::ResidualNorm<double>)": 1.0,
+                        "check(<typename>)": 1.0,
                         "residual_norm::residual_norm": 1.0,
-                        "check(gko::stop::Iteration)": 1.0,
                         "cg::step_1": 1.0,
-                        "apply(gko::matrix::Csr<double, int>)": 1.0,
                         "csr::spmv": 1.0,
                         "cg::step_2": 1.0,
                         "free": 1.0,
diff --git a/benchmark/test/reference/spmv.profile.stderr b/benchmark/test/reference/spmv.profile.stderr
index 3ddabd987ad..ea170aac1a8 100644
--- a/benchmark/test/reference/spmv.profile.stderr
+++ b/benchmark/test/reference/spmv.profile.stderr
@@ -53,12 +53,12 @@ DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 Matrix is of size (125, 125)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
@@ -67,10 +67,10 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin components::aos_to_soa
 DEBUG: end   components::aos_to_soa
-DEBUG: begin apply(gko::matrix::Coo<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin coo::spmv
 DEBUG: end   coo::spmv
-DEBUG: end   apply(gko::matrix::Coo<double, int>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
@@ -86,16 +86,16 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin components::aos_to_soa
 DEBUG: end   components::aos_to_soa
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: begin apply(gko::matrix::Coo<double, int>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin coo::spmv
 DEBUG: end   coo::spmv
-DEBUG: end   apply(gko::matrix::Coo<double, int>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::compute_norm2_dispatch
@@ -108,18 +108,18 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   copy(<typename>)
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
@@ -132,16 +132,16 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
-DEBUG: begin apply(gko::matrix::Coo<double, int>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin coo::spmv
 DEBUG: end   coo::spmv
-DEBUG: end   apply(gko::matrix::Coo<double, int>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in
index 27424cc30b6..e570458e4a4 100644
--- a/benchmark/test/test_framework.py.in
+++ b/benchmark/test/test_framework.py.in
@@ -58,11 +58,13 @@ def sanitize_text(lines):
     return "\n".join(reconstructed).split("\n")
 
 
-def determinize_text(input, denumberify_paths=[], remove_paths=[], ignore_patterns=[]):
-    lines = input.split("\n")
+def determinize_text(input, denumberify_paths=[], remove_paths=[], ignore_patterns=[], replace_patterns=[]):
+    lines = input.splitlines()
     output_lines = []
     patterns = [re.compile(pattern) for pattern in ignore_patterns]
     for line in lines:
+        for pattern, replacement in replace_patterns:
+            line = re.sub(pattern, replacement, line)
         keep = True
         for pattern in patterns:
             if re.match(pattern, line):
@@ -70,7 +72,10 @@ def determinize_text(input, denumberify_paths=[], remove_paths=[], ignore_patter
                 break
         if keep:
             output_lines.append(line)
-    return sanitize_text(output_lines)
+    try:
+        return sanitize_text(output_lines)
+    except json.decoder.JSONDecodeError:
+        return output_lines
 
 
 def compare_output(args, expected_stdout, expected_stderr, stdin="", launcher_flags=[]):
@@ -84,21 +89,24 @@ def compare_output(args, expected_stdout, expected_stderr, stdin="", launcher_fl
     version_patterns = [
         "    the .* module is",
     ]
+    profiler_hook_typename_patterns = [
+        ("(apply|generate|check|copy|move)\([^())]*\)", "\\1(<typename>)")
+    ]
     if generate:
         open(expected_stdout, "w").write("\n".join(determinize_text(
-            result.stdout.decode())))
+            result.stdout.decode(), replace_patterns=profiler_hook_typename_patterns)))
         open(expected_stderr, "w").write("\n".join(determinize_text(result.stderr.decode(
-        ), ignore_patterns=version_patterns)))
+        ), ignore_patterns=version_patterns, replace_patterns=profiler_hook_typename_patterns)))
         print("GENERATED")
         return
     result_stdout_processed = determinize_text(
-        result.stdout.decode())
+        result.stdout.decode(), replace_patterns=profiler_hook_typename_patterns)
     result_stderr_processed = determinize_text(result.stderr.decode(
-    ), ignore_patterns=version_patterns)
+    ), ignore_patterns=version_patterns, replace_patterns=profiler_hook_typename_patterns)
     expected_stdout_processed = determinize_text(
-        open(expected_stdout, 'rU').read())
-    expected_stderr_processed = determinize_text(open(expected_stderr, 'rU').read(
-    ), ignore_patterns=version_patterns)
+        open(expected_stdout).read(), replace_patterns=profiler_hook_typename_patterns)
+    expected_stderr_processed = determinize_text(open(expected_stderr).read(
+    ), ignore_patterns=version_patterns, replace_patterns=profiler_hook_typename_patterns)
     failed = False
     if result_stdout_processed != expected_stdout_processed:
         print("FAIL: stdout differs")

From c9a448a9ef8a036f26eee3c0ed881e02de948f5a Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Sun, 21 May 2023 11:04:24 +0200
Subject: [PATCH 031/583] strip more path-depentent output in test framework

---
 benchmark/test/test_framework.py.in | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in
index e570458e4a4..a0a7757b043 100644
--- a/benchmark/test/test_framework.py.in
+++ b/benchmark/test/test_framework.py.in
@@ -8,6 +8,7 @@ import pathlib
 import sys
 sourcepath = pathlib.Path("@CMAKE_CURRENT_SOURCE_DIR@")
 binpath = pathlib.Path("@PROJECT_BINARY_DIR@")
+projectroot = "@PROJECT_SOURCE_DIR@"
 generate = False
 if len(sys.argv) > 2 and sys.argv[2] == "--generate":
     generate = True
@@ -15,11 +16,14 @@ if len(sys.argv) > 2 and sys.argv[2] == "--generate":
 
 denumberify_paths = ["time", "bandwidth", "flops", "components",
                      "residual_norm", "rhs_norm", "max_relative_norm2"]
+empty_string_paths = ["error"]
 empty_array_paths = ["recurrent_residuals", "true_residuals",
                      "implicit_residuals", "iteration_timestamps"]
 
 
 def sanitize_json_single(key, value, sanitize_all):
+    if key in empty_string_paths and isinstance(value, str):
+        return ""
     if key in denumberify_paths and isinstance(value, float):
         return 1.0
     if key in denumberify_paths and isinstance(value, typing.Dict):
@@ -63,6 +67,7 @@ def determinize_text(input, denumberify_paths=[], remove_paths=[], ignore_patter
     output_lines = []
     patterns = [re.compile(pattern) for pattern in ignore_patterns]
     for line in lines:
+        line = line.replace(projectroot, "ginkgo")
         for pattern, replacement in replace_patterns:
             line = re.sub(pattern, replacement, line)
         keep = True
@@ -72,6 +77,8 @@ def determinize_text(input, denumberify_paths=[], remove_paths=[], ignore_patter
                 break
         if keep:
             output_lines.append(line)
+    if output_lines[-1] != "":
+        output_lines.append("")
     try:
         return sanitize_text(output_lines)
     except json.decoder.JSONDecodeError:
@@ -89,24 +96,25 @@ def compare_output(args, expected_stdout, expected_stderr, stdin="", launcher_fl
     version_patterns = [
         "    the .* module is",
     ]
-    profiler_hook_typename_patterns = [
-        ("(apply|generate|check|copy|move)\([^())]*\)", "\\1(<typename>)")
+    typename_patterns = [
+        ("(apply|generate|check|copy|move)\([^())]*\)", "\\1(<typename>)"),
+        ("Operation .* does not support [^\"]*", "Operation does not support")
     ]
     if generate:
         open(expected_stdout, "w").write("\n".join(determinize_text(
-            result.stdout.decode(), replace_patterns=profiler_hook_typename_patterns)))
+            result.stdout.decode(), replace_patterns=typename_patterns)))
         open(expected_stderr, "w").write("\n".join(determinize_text(result.stderr.decode(
-        ), ignore_patterns=version_patterns, replace_patterns=profiler_hook_typename_patterns)))
+        ), ignore_patterns=version_patterns, replace_patterns=typename_patterns)))
         print("GENERATED")
         return
     result_stdout_processed = determinize_text(
-        result.stdout.decode(), replace_patterns=profiler_hook_typename_patterns)
+        result.stdout.decode(), replace_patterns=typename_patterns)
     result_stderr_processed = determinize_text(result.stderr.decode(
-    ), ignore_patterns=version_patterns, replace_patterns=profiler_hook_typename_patterns)
+    ), ignore_patterns=version_patterns, replace_patterns=typename_patterns)
     expected_stdout_processed = determinize_text(
-        open(expected_stdout).read(), replace_patterns=profiler_hook_typename_patterns)
+        open(expected_stdout).read(), replace_patterns=typename_patterns)
     expected_stderr_processed = determinize_text(open(expected_stderr).read(
-    ), ignore_patterns=version_patterns, replace_patterns=profiler_hook_typename_patterns)
+    ), ignore_patterns=version_patterns, replace_patterns=typename_patterns)
     failed = False
     if result_stdout_processed != expected_stdout_processed:
         print("FAIL: stdout differs")

From 84c4871b75c94d5b0aee718928b3bbf2a85eae6a Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Sun, 21 May 2023 11:04:43 +0200
Subject: [PATCH 032/583] update benchmark test outputs

---
 benchmark/test/reference/blas.profile.stderr  |    4 +-
 benchmark/test/reference/blas.simple.stderr   |    4 +-
 .../test/reference/conversion.all.stderr      | 1866 ++++++++++++++++-
 .../test/reference/conversion.all.stdout      |   74 +-
 .../test/reference/conversion.profile.stderr  |  102 +-
 .../test/reference/conversion.profile.stdout  |   19 +-
 .../test/reference/conversion.simple.stderr   |   46 +-
 .../test/reference/conversion.simple.stdout   |   19 +-
 .../reference/matrix_statistics.simple.stderr |    4 +-
 .../reference/preconditioner.profile.stderr   |   10 +-
 .../reference/preconditioner.simple.stderr    |   10 +-
 .../test/reference/solver.profile.stderr      |  214 +-
 benchmark/test/reference/solver.simple.stderr |    4 +-
 .../test/reference/sparse_blas.profile.stderr |    4 +-
 .../test/reference/sparse_blas.simple.stderr  |    4 +-
 benchmark/test/reference/spmv.profile.stderr  |    4 +-
 benchmark/test/reference/spmv.simple.stderr   |    4 +-
 17 files changed, 2130 insertions(+), 262 deletions(-)

diff --git a/benchmark/test/reference/blas.profile.stderr b/benchmark/test/reference/blas.profile.stderr
index 1fb7d5b93bc..16a86bd4c94 100644
--- a/benchmark/test/reference/blas.profile.stderr
+++ b/benchmark/test/reference/blas.profile.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.5.0 (develop)
-    running with core module 1.5.0 (develop)
+This is Ginkgo 1.6.0 (develop)
+    running with core module 1.6.0 (develop)
 Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/blas.simple.stderr b/benchmark/test/reference/blas.simple.stderr
index e9b186e1353..72a2fbb9b90 100644
--- a/benchmark/test/reference/blas.simple.stderr
+++ b/benchmark/test/reference/blas.simple.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.5.0 (develop)
-    running with core module 1.5.0 (develop)
+This is Ginkgo 1.6.0 (develop)
+    running with core module 1.6.0 (develop)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/conversion.all.stderr b/benchmark/test/reference/conversion.all.stderr
index dbc5720527c..a21a0254200 100644
--- a/benchmark/test/reference/conversion.all.stderr
+++ b/benchmark/test/reference/conversion.all.stderr
@@ -1,26 +1,1856 @@
-This is Ginkgo 1.5.0 (develop)
-    running with core module 1.5.0 (develop)
+This is Ginkgo 1.6.0 (develop)
+    running with core module 1.6.0 (develop)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
 The formats are coo,csr,ell,sellp,hybrid
+Benchmarking conversions. 
 Running test case
 {
     "size": 100,
     "stencil": "7pt",
-    "conversion": {}
-}
-Matrix is of size (125, 125), 725
-	Running conversion: coo-read
-	Running conversion: coo-csr
-	Running conversion: csr-read
-	Running conversion: csr-coo
-	Running conversion: csr-ell
-	Running conversion: csr-sellp
-	Running conversion: csr-hybrid
-	Running conversion: ell-read
-	Running conversion: ell-csr
-	Running conversion: sellp-read
-	Running conversion: sellp-csr
-	Running conversion: hybrid-read
-	Running conversion: hybrid-csr
+    "conversions": {}
+}
+Matrix is of size (125, 125)
+Current state:
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "conversions": {
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        }
+    }
+]
+Error when processing test case
+{
+    "size": 125,
+    "stencil": "7pt",
+    "conversions": {
+        "coo-csr": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "coo-ell": {
+            "completed": false,
+            "error": ""
+        }
+    }
+}
+what(): ginkgo/include/ginkgo/core/base/utils_helper.hpp:368: Operation does not support
+Current state:
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "conversions": {
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "coo-ell": {
+                "completed": false,
+                "error": ""
+            }
+        }
+    }
+]
+Error when processing test case
+{
+    "size": 125,
+    "stencil": "7pt",
+    "conversions": {
+        "coo-csr": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "coo-ell": {
+            "completed": false,
+            "error": ""
+        },
+        "coo-sellp": {
+            "completed": false,
+            "error": ""
+        }
+    }
+}
+what(): ginkgo/include/ginkgo/core/base/utils_helper.hpp:368: Operation does not support
+Current state:
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "conversions": {
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "coo-ell": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-sellp": {
+                "completed": false,
+                "error": ""
+            }
+        }
+    }
+]
+Error when processing test case
+{
+    "size": 125,
+    "stencil": "7pt",
+    "conversions": {
+        "coo-csr": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "coo-ell": {
+            "completed": false,
+            "error": ""
+        },
+        "coo-sellp": {
+            "completed": false,
+            "error": ""
+        },
+        "coo-hybrid": {
+            "completed": false,
+            "error": ""
+        }
+    }
+}
+what(): ginkgo/include/ginkgo/core/base/utils_helper.hpp:368: Operation does not support
+Current state:
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "conversions": {
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "coo-ell": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-sellp": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-hybrid": {
+                "completed": false,
+                "error": ""
+            }
+        }
+    }
+]
+Current state:
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "conversions": {
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "coo-ell": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-sellp": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "csr-coo": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        }
+    }
+]
+Current state:
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "conversions": {
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "coo-ell": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-sellp": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "csr-coo": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-ell": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        }
+    }
+]
+Current state:
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "conversions": {
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "coo-ell": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-sellp": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "csr-coo": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-ell": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-sellp": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        }
+    }
+]
+Current state:
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "conversions": {
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "coo-ell": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-sellp": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "csr-coo": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-ell": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-sellp": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-hybrid": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        }
+    }
+]
+Error when processing test case
+{
+    "size": 125,
+    "stencil": "7pt",
+    "conversions": {
+        "coo-csr": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "coo-ell": {
+            "completed": false,
+            "error": ""
+        },
+        "coo-sellp": {
+            "completed": false,
+            "error": ""
+        },
+        "coo-hybrid": {
+            "completed": false,
+            "error": ""
+        },
+        "csr-coo": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "csr-ell": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "csr-sellp": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "csr-hybrid": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "ell-coo": {
+            "completed": false,
+            "error": ""
+        }
+    }
+}
+what(): ginkgo/include/ginkgo/core/base/utils_helper.hpp:368: Operation does not support
+Current state:
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "conversions": {
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "coo-ell": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-sellp": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "csr-coo": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-ell": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-sellp": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-hybrid": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "ell-coo": {
+                "completed": false,
+                "error": ""
+            }
+        }
+    }
+]
+Current state:
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "conversions": {
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "coo-ell": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-sellp": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "csr-coo": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-ell": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-sellp": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-hybrid": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "ell-coo": {
+                "completed": false,
+                "error": ""
+            },
+            "ell-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        }
+    }
+]
+Error when processing test case
+{
+    "size": 125,
+    "stencil": "7pt",
+    "conversions": {
+        "coo-csr": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "coo-ell": {
+            "completed": false,
+            "error": ""
+        },
+        "coo-sellp": {
+            "completed": false,
+            "error": ""
+        },
+        "coo-hybrid": {
+            "completed": false,
+            "error": ""
+        },
+        "csr-coo": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "csr-ell": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "csr-sellp": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "csr-hybrid": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "ell-coo": {
+            "completed": false,
+            "error": ""
+        },
+        "ell-csr": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "ell-sellp": {
+            "completed": false,
+            "error": ""
+        }
+    }
+}
+what(): ginkgo/include/ginkgo/core/base/utils_helper.hpp:368: Operation does not support
+Current state:
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "conversions": {
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "coo-ell": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-sellp": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "csr-coo": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-ell": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-sellp": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-hybrid": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "ell-coo": {
+                "completed": false,
+                "error": ""
+            },
+            "ell-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "ell-sellp": {
+                "completed": false,
+                "error": ""
+            }
+        }
+    }
+]
+Error when processing test case
+{
+    "size": 125,
+    "stencil": "7pt",
+    "conversions": {
+        "coo-csr": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "coo-ell": {
+            "completed": false,
+            "error": ""
+        },
+        "coo-sellp": {
+            "completed": false,
+            "error": ""
+        },
+        "coo-hybrid": {
+            "completed": false,
+            "error": ""
+        },
+        "csr-coo": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "csr-ell": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "csr-sellp": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "csr-hybrid": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "ell-coo": {
+            "completed": false,
+            "error": ""
+        },
+        "ell-csr": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "ell-sellp": {
+            "completed": false,
+            "error": ""
+        },
+        "ell-hybrid": {
+            "completed": false,
+            "error": ""
+        }
+    }
+}
+what(): ginkgo/include/ginkgo/core/base/utils_helper.hpp:368: Operation does not support
+Current state:
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "conversions": {
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "coo-ell": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-sellp": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "csr-coo": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-ell": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-sellp": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-hybrid": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "ell-coo": {
+                "completed": false,
+                "error": ""
+            },
+            "ell-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "ell-sellp": {
+                "completed": false,
+                "error": ""
+            },
+            "ell-hybrid": {
+                "completed": false,
+                "error": ""
+            }
+        }
+    }
+]
+Error when processing test case
+{
+    "size": 125,
+    "stencil": "7pt",
+    "conversions": {
+        "coo-csr": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "coo-ell": {
+            "completed": false,
+            "error": ""
+        },
+        "coo-sellp": {
+            "completed": false,
+            "error": ""
+        },
+        "coo-hybrid": {
+            "completed": false,
+            "error": ""
+        },
+        "csr-coo": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "csr-ell": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "csr-sellp": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "csr-hybrid": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "ell-coo": {
+            "completed": false,
+            "error": ""
+        },
+        "ell-csr": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "ell-sellp": {
+            "completed": false,
+            "error": ""
+        },
+        "ell-hybrid": {
+            "completed": false,
+            "error": ""
+        },
+        "sellp-coo": {
+            "completed": false,
+            "error": ""
+        }
+    }
+}
+what(): ginkgo/include/ginkgo/core/base/utils_helper.hpp:368: Operation does not support
+Current state:
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "conversions": {
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "coo-ell": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-sellp": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "csr-coo": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-ell": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-sellp": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-hybrid": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "ell-coo": {
+                "completed": false,
+                "error": ""
+            },
+            "ell-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "ell-sellp": {
+                "completed": false,
+                "error": ""
+            },
+            "ell-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "sellp-coo": {
+                "completed": false,
+                "error": ""
+            }
+        }
+    }
+]
+Current state:
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "conversions": {
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "coo-ell": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-sellp": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "csr-coo": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-ell": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-sellp": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-hybrid": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "ell-coo": {
+                "completed": false,
+                "error": ""
+            },
+            "ell-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "ell-sellp": {
+                "completed": false,
+                "error": ""
+            },
+            "ell-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "sellp-coo": {
+                "completed": false,
+                "error": ""
+            },
+            "sellp-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        }
+    }
+]
+Error when processing test case
+{
+    "size": 125,
+    "stencil": "7pt",
+    "conversions": {
+        "coo-csr": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "coo-ell": {
+            "completed": false,
+            "error": ""
+        },
+        "coo-sellp": {
+            "completed": false,
+            "error": ""
+        },
+        "coo-hybrid": {
+            "completed": false,
+            "error": ""
+        },
+        "csr-coo": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "csr-ell": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "csr-sellp": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "csr-hybrid": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "ell-coo": {
+            "completed": false,
+            "error": ""
+        },
+        "ell-csr": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "ell-sellp": {
+            "completed": false,
+            "error": ""
+        },
+        "ell-hybrid": {
+            "completed": false,
+            "error": ""
+        },
+        "sellp-coo": {
+            "completed": false,
+            "error": ""
+        },
+        "sellp-csr": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "sellp-ell": {
+            "completed": false,
+            "error": ""
+        }
+    }
+}
+what(): ginkgo/include/ginkgo/core/base/utils_helper.hpp:368: Operation does not support
+Current state:
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "conversions": {
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "coo-ell": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-sellp": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "csr-coo": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-ell": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-sellp": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-hybrid": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "ell-coo": {
+                "completed": false,
+                "error": ""
+            },
+            "ell-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "ell-sellp": {
+                "completed": false,
+                "error": ""
+            },
+            "ell-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "sellp-coo": {
+                "completed": false,
+                "error": ""
+            },
+            "sellp-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "sellp-ell": {
+                "completed": false,
+                "error": ""
+            }
+        }
+    }
+]
+Error when processing test case
+{
+    "size": 125,
+    "stencil": "7pt",
+    "conversions": {
+        "coo-csr": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "coo-ell": {
+            "completed": false,
+            "error": ""
+        },
+        "coo-sellp": {
+            "completed": false,
+            "error": ""
+        },
+        "coo-hybrid": {
+            "completed": false,
+            "error": ""
+        },
+        "csr-coo": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "csr-ell": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "csr-sellp": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "csr-hybrid": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "ell-coo": {
+            "completed": false,
+            "error": ""
+        },
+        "ell-csr": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "ell-sellp": {
+            "completed": false,
+            "error": ""
+        },
+        "ell-hybrid": {
+            "completed": false,
+            "error": ""
+        },
+        "sellp-coo": {
+            "completed": false,
+            "error": ""
+        },
+        "sellp-csr": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "sellp-ell": {
+            "completed": false,
+            "error": ""
+        },
+        "sellp-hybrid": {
+            "completed": false,
+            "error": ""
+        }
+    }
+}
+what(): ginkgo/include/ginkgo/core/base/utils_helper.hpp:368: Operation does not support
+Current state:
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "conversions": {
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "coo-ell": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-sellp": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "csr-coo": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-ell": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-sellp": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-hybrid": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "ell-coo": {
+                "completed": false,
+                "error": ""
+            },
+            "ell-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "ell-sellp": {
+                "completed": false,
+                "error": ""
+            },
+            "ell-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "sellp-coo": {
+                "completed": false,
+                "error": ""
+            },
+            "sellp-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "sellp-ell": {
+                "completed": false,
+                "error": ""
+            },
+            "sellp-hybrid": {
+                "completed": false,
+                "error": ""
+            }
+        }
+    }
+]
+Error when processing test case
+{
+    "size": 125,
+    "stencil": "7pt",
+    "conversions": {
+        "coo-csr": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "coo-ell": {
+            "completed": false,
+            "error": ""
+        },
+        "coo-sellp": {
+            "completed": false,
+            "error": ""
+        },
+        "coo-hybrid": {
+            "completed": false,
+            "error": ""
+        },
+        "csr-coo": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "csr-ell": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "csr-sellp": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "csr-hybrid": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "ell-coo": {
+            "completed": false,
+            "error": ""
+        },
+        "ell-csr": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "ell-sellp": {
+            "completed": false,
+            "error": ""
+        },
+        "ell-hybrid": {
+            "completed": false,
+            "error": ""
+        },
+        "sellp-coo": {
+            "completed": false,
+            "error": ""
+        },
+        "sellp-csr": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "sellp-ell": {
+            "completed": false,
+            "error": ""
+        },
+        "sellp-hybrid": {
+            "completed": false,
+            "error": ""
+        },
+        "hybrid-coo": {
+            "completed": false,
+            "error": ""
+        }
+    }
+}
+what(): ginkgo/include/ginkgo/core/base/utils_helper.hpp:368: Operation does not support
+Current state:
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "conversions": {
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "coo-ell": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-sellp": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "csr-coo": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-ell": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-sellp": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-hybrid": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "ell-coo": {
+                "completed": false,
+                "error": ""
+            },
+            "ell-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "ell-sellp": {
+                "completed": false,
+                "error": ""
+            },
+            "ell-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "sellp-coo": {
+                "completed": false,
+                "error": ""
+            },
+            "sellp-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "sellp-ell": {
+                "completed": false,
+                "error": ""
+            },
+            "sellp-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "hybrid-coo": {
+                "completed": false,
+                "error": ""
+            }
+        }
+    }
+]
+Current state:
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "conversions": {
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "coo-ell": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-sellp": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "csr-coo": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-ell": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-sellp": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-hybrid": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "ell-coo": {
+                "completed": false,
+                "error": ""
+            },
+            "ell-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "ell-sellp": {
+                "completed": false,
+                "error": ""
+            },
+            "ell-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "sellp-coo": {
+                "completed": false,
+                "error": ""
+            },
+            "sellp-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "sellp-ell": {
+                "completed": false,
+                "error": ""
+            },
+            "sellp-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "hybrid-coo": {
+                "completed": false,
+                "error": ""
+            },
+            "hybrid-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        }
+    }
+]
+Error when processing test case
+{
+    "size": 125,
+    "stencil": "7pt",
+    "conversions": {
+        "coo-csr": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "coo-ell": {
+            "completed": false,
+            "error": ""
+        },
+        "coo-sellp": {
+            "completed": false,
+            "error": ""
+        },
+        "coo-hybrid": {
+            "completed": false,
+            "error": ""
+        },
+        "csr-coo": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "csr-ell": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "csr-sellp": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "csr-hybrid": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "ell-coo": {
+            "completed": false,
+            "error": ""
+        },
+        "ell-csr": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "ell-sellp": {
+            "completed": false,
+            "error": ""
+        },
+        "ell-hybrid": {
+            "completed": false,
+            "error": ""
+        },
+        "sellp-coo": {
+            "completed": false,
+            "error": ""
+        },
+        "sellp-csr": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "sellp-ell": {
+            "completed": false,
+            "error": ""
+        },
+        "sellp-hybrid": {
+            "completed": false,
+            "error": ""
+        },
+        "hybrid-coo": {
+            "completed": false,
+            "error": ""
+        },
+        "hybrid-csr": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "hybrid-ell": {
+            "completed": false,
+            "error": ""
+        }
+    }
+}
+what(): ginkgo/include/ginkgo/core/base/utils_helper.hpp:368: Operation does not support
+Current state:
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "conversions": {
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "coo-ell": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-sellp": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "csr-coo": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-ell": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-sellp": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-hybrid": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "ell-coo": {
+                "completed": false,
+                "error": ""
+            },
+            "ell-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "ell-sellp": {
+                "completed": false,
+                "error": ""
+            },
+            "ell-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "sellp-coo": {
+                "completed": false,
+                "error": ""
+            },
+            "sellp-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "sellp-ell": {
+                "completed": false,
+                "error": ""
+            },
+            "sellp-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "hybrid-coo": {
+                "completed": false,
+                "error": ""
+            },
+            "hybrid-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "hybrid-ell": {
+                "completed": false,
+                "error": ""
+            }
+        }
+    }
+]
+Error when processing test case
+{
+    "size": 125,
+    "stencil": "7pt",
+    "conversions": {
+        "coo-csr": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "coo-ell": {
+            "completed": false,
+            "error": ""
+        },
+        "coo-sellp": {
+            "completed": false,
+            "error": ""
+        },
+        "coo-hybrid": {
+            "completed": false,
+            "error": ""
+        },
+        "csr-coo": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "csr-ell": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "csr-sellp": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "csr-hybrid": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "ell-coo": {
+            "completed": false,
+            "error": ""
+        },
+        "ell-csr": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "ell-sellp": {
+            "completed": false,
+            "error": ""
+        },
+        "ell-hybrid": {
+            "completed": false,
+            "error": ""
+        },
+        "sellp-coo": {
+            "completed": false,
+            "error": ""
+        },
+        "sellp-csr": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "sellp-ell": {
+            "completed": false,
+            "error": ""
+        },
+        "sellp-hybrid": {
+            "completed": false,
+            "error": ""
+        },
+        "hybrid-coo": {
+            "completed": false,
+            "error": ""
+        },
+        "hybrid-csr": {
+            "time": 1.0,
+            "repetitions": 10,
+            "completed": true
+        },
+        "hybrid-ell": {
+            "completed": false,
+            "error": ""
+        },
+        "hybrid-sellp": {
+            "completed": false,
+            "error": ""
+        }
+    }
+}
+what(): ginkgo/include/ginkgo/core/base/utils_helper.hpp:368: Operation does not support
+Current state:
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "conversions": {
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "coo-ell": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-sellp": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "csr-coo": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-ell": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-sellp": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-hybrid": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "ell-coo": {
+                "completed": false,
+                "error": ""
+            },
+            "ell-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "ell-sellp": {
+                "completed": false,
+                "error": ""
+            },
+            "ell-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "sellp-coo": {
+                "completed": false,
+                "error": ""
+            },
+            "sellp-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "sellp-ell": {
+                "completed": false,
+                "error": ""
+            },
+            "sellp-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "hybrid-coo": {
+                "completed": false,
+                "error": ""
+            },
+            "hybrid-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "hybrid-ell": {
+                "completed": false,
+                "error": ""
+            },
+            "hybrid-sellp": {
+                "completed": false,
+                "error": ""
+            }
+        }
+    }
+]
diff --git a/benchmark/test/reference/conversion.all.stdout b/benchmark/test/reference/conversion.all.stdout
index c4b657a42c4..cb53bb81a6c 100644
--- a/benchmark/test/reference/conversion.all.stdout
+++ b/benchmark/test/reference/conversion.all.stdout
@@ -1,23 +1,25 @@
 
 [
     {
-        "size": 100,
+        "size": 125,
         "stencil": "7pt",
-        "conversion": {
-            "coo-read": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
+        "conversions": {
             "coo-csr": {
                 "time": 1.0,
                 "repetitions": 10,
                 "completed": true
             },
-            "csr-read": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
+            "coo-ell": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-sellp": {
+                "completed": false,
+                "error": ""
+            },
+            "coo-hybrid": {
+                "completed": false,
+                "error": ""
             },
             "csr-coo": {
                 "time": 1.0,
@@ -39,39 +41,57 @@
                 "repetitions": 10,
                 "completed": true
             },
-            "ell-read": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
+            "ell-coo": {
+                "completed": false,
+                "error": ""
             },
             "ell-csr": {
                 "time": 1.0,
                 "repetitions": 10,
                 "completed": true
             },
-            "sellp-read": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
+            "ell-sellp": {
+                "completed": false,
+                "error": ""
+            },
+            "ell-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "sellp-coo": {
+                "completed": false,
+                "error": ""
             },
             "sellp-csr": {
                 "time": 1.0,
                 "repetitions": 10,
                 "completed": true
             },
-            "hybrid-read": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
+            "sellp-ell": {
+                "completed": false,
+                "error": ""
+            },
+            "sellp-hybrid": {
+                "completed": false,
+                "error": ""
+            },
+            "hybrid-coo": {
+                "completed": false,
+                "error": ""
             },
             "hybrid-csr": {
                 "time": 1.0,
                 "repetitions": 10,
                 "completed": true
+            },
+            "hybrid-ell": {
+                "completed": false,
+                "error": ""
+            },
+            "hybrid-sellp": {
+                "completed": false,
+                "error": ""
             }
-        },
-        "rows": 125,
-        "cols": 125,
-        "nonzeros": 725
+        }
     }
 ]
diff --git a/benchmark/test/reference/conversion.profile.stderr b/benchmark/test/reference/conversion.profile.stderr
index 8ea580247d8..e772752ea4a 100644
--- a/benchmark/test/reference/conversion.profile.stderr
+++ b/benchmark/test/reference/conversion.profile.stderr
@@ -1,19 +1,18 @@
-This is Ginkgo 1.5.0 (develop)
-    running with core module 1.5.0 (develop)
+This is Ginkgo 1.6.0 (develop)
+    running with core module 1.6.0 (develop)
 Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
 The formats are coo,csr
+Benchmarking conversions. 
 Running test case
 {
     "size": 100,
     "stencil": "7pt",
-    "conversion": {}
+    "conversions": {}
 }
-Matrix is of size (125, 125), 725
-DEBUG: begin stencil(100,7pt)
-	Running conversion: coo-read
-DEBUG: begin coo-read
+Matrix is of size (125, 125)
+DEBUG: begin stencil(125,7pt)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
@@ -22,17 +21,13 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin components::aos_to_soa
 DEBUG: end   components::aos_to_soa
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   coo-read
-	Running conversion: coo-csr
 DEBUG: begin coo-csr
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin allocate
+DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
@@ -41,8 +36,12 @@ DEBUG: begin components::aos_to_soa
 DEBUG: end   components::aos_to_soa
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin components::convert_idxs_to_ptrs
+DEBUG: end   components::convert_idxs_to_ptrs
+DEBUG: begin free
+DEBUG: end   free
 DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
@@ -50,10 +49,14 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin components::convert_idxs_to_ptrs
@@ -65,15 +68,27 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
+DEBUG: end   coo-csr
+Current state:
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "conversions": {
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 1,
+                "completed": true
+            }
+        }
+    }
+]
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: end   coo-csr
-	Running conversion: csr-read
-DEBUG: begin csr-read
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin components::fill_array
@@ -94,46 +109,32 @@ DEBUG: begin components::convert_idxs_to_ptrs
 DEBUG: end   components::convert_idxs_to_ptrs
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   csr-read
-	Running conversion: csr-coo
 DEBUG: begin csr-coo
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin components::aos_to_soa
 DEBUG: end   components::aos_to_soa
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin components::convert_idxs_to_ptrs
-DEBUG: end   components::convert_idxs_to_ptrs
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
 DEBUG: begin components::convert_ptrs_to_idxs
 DEBUG: end   components::convert_ptrs_to_idxs
 DEBUG: end   copy(<typename>)
@@ -143,11 +144,30 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
+DEBUG: end   csr-coo
+Current state:
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "conversions": {
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 1,
+                "completed": true
+            },
+            "csr-coo": {
+                "time": 1.0,
+                "repetitions": 1,
+                "completed": true
+            }
+        }
+    }
+]
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: end   csr-coo
-DEBUG: end   stencil(100,7pt)
+DEBUG: end   stencil(125,7pt)
diff --git a/benchmark/test/reference/conversion.profile.stdout b/benchmark/test/reference/conversion.profile.stdout
index b29815f6c17..3e76bc26934 100644
--- a/benchmark/test/reference/conversion.profile.stdout
+++ b/benchmark/test/reference/conversion.profile.stdout
@@ -1,32 +1,19 @@
 
 [
     {
-        "size": 100,
+        "size": 125,
         "stencil": "7pt",
-        "conversion": {
-            "coo-read": {
-                "time": 1.0,
-                "repetitions": 1,
-                "completed": true
-            },
+        "conversions": {
             "coo-csr": {
                 "time": 1.0,
                 "repetitions": 1,
                 "completed": true
             },
-            "csr-read": {
-                "time": 1.0,
-                "repetitions": 1,
-                "completed": true
-            },
             "csr-coo": {
                 "time": 1.0,
                 "repetitions": 1,
                 "completed": true
             }
-        },
-        "rows": 125,
-        "cols": 125,
-        "nonzeros": 725
+        }
     }
 ]
diff --git a/benchmark/test/reference/conversion.simple.stderr b/benchmark/test/reference/conversion.simple.stderr
index 1e4dbc4bd51..f044da61804 100644
--- a/benchmark/test/reference/conversion.simple.stderr
+++ b/benchmark/test/reference/conversion.simple.stderr
@@ -1,17 +1,47 @@
-This is Ginkgo 1.5.0 (develop)
-    running with core module 1.5.0 (develop)
+This is Ginkgo 1.6.0 (develop)
+    running with core module 1.6.0 (develop)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
 The formats are coo,csr
+Benchmarking conversions. 
 Running test case
 {
     "size": 100,
     "stencil": "7pt",
-    "conversion": {}
+    "conversions": {}
 }
-Matrix is of size (125, 125), 725
-	Running conversion: coo-read
-	Running conversion: coo-csr
-	Running conversion: csr-read
-	Running conversion: csr-coo
+Matrix is of size (125, 125)
+Current state:
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "conversions": {
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        }
+    }
+]
+Current state:
+[
+    {
+        "size": 125,
+        "stencil": "7pt",
+        "conversions": {
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-coo": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        }
+    }
+]
diff --git a/benchmark/test/reference/conversion.simple.stdout b/benchmark/test/reference/conversion.simple.stdout
index 856f1330eea..9ecdd46f5e1 100644
--- a/benchmark/test/reference/conversion.simple.stdout
+++ b/benchmark/test/reference/conversion.simple.stdout
@@ -1,32 +1,19 @@
 
 [
     {
-        "size": 100,
+        "size": 125,
         "stencil": "7pt",
-        "conversion": {
-            "coo-read": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
+        "conversions": {
             "coo-csr": {
                 "time": 1.0,
                 "repetitions": 10,
                 "completed": true
             },
-            "csr-read": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
             "csr-coo": {
                 "time": 1.0,
                 "repetitions": 10,
                 "completed": true
             }
-        },
-        "rows": 125,
-        "cols": 125,
-        "nonzeros": 725
+        }
     }
 ]
diff --git a/benchmark/test/reference/matrix_statistics.simple.stderr b/benchmark/test/reference/matrix_statistics.simple.stderr
index e77cd5d413a..69d2bbf9098 100644
--- a/benchmark/test/reference/matrix_statistics.simple.stderr
+++ b/benchmark/test/reference/matrix_statistics.simple.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.5.0 (develop)
-    running with core module 1.5.0 (develop)
+This is Ginkgo 1.6.0 (develop)
+    running with core module 1.6.0 (develop)
 Running test case
 {
     "size": 100,
diff --git a/benchmark/test/reference/preconditioner.profile.stderr b/benchmark/test/reference/preconditioner.profile.stderr
index 86ec044eb40..2bebc03be8d 100644
--- a/benchmark/test/reference/preconditioner.profile.stderr
+++ b/benchmark/test/reference/preconditioner.profile.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.5.0 (develop)
-    running with core module 1.5.0 (develop)
+This is Ginkgo 1.6.0 (develop)
+    running with core module 1.6.0 (develop)
 Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
@@ -103,7 +103,7 @@ Current state:
             "none": {
                 "generate": {
                     "components": {
-                        "generate(gko::matrix::IdentityFactory<double>)": 1.0,
+                        "generate(<typename>)": 1.0,
                         "overhead": 1.0
                     },
                     "time": 1.0,
@@ -111,8 +111,8 @@ Current state:
                 },
                 "apply": {
                     "components": {
-                        "apply(gko::matrix::Identity<double>)": 1.0,
-                        "copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)": 1.0,
+                        "apply(<typename>)": 1.0,
+                        "copy(<typename>)": 1.0,
                         "dense::copy": 1.0,
                         "overhead": 1.0
                     },
diff --git a/benchmark/test/reference/preconditioner.simple.stderr b/benchmark/test/reference/preconditioner.simple.stderr
index 4a7ee9498d5..bfec4a697ee 100644
--- a/benchmark/test/reference/preconditioner.simple.stderr
+++ b/benchmark/test/reference/preconditioner.simple.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.5.0 (develop)
-    running with core module 1.5.0 (develop)
+This is Ginkgo 1.6.0 (develop)
+    running with core module 1.6.0 (develop)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
@@ -20,7 +20,7 @@ Current state:
             "none": {
                 "generate": {
                     "components": {
-                        "generate(gko::matrix::IdentityFactory<double>)": 1.0,
+                        "generate(<typename>)": 1.0,
                         "overhead": 1.0
                     },
                     "time": 1.0,
@@ -28,8 +28,8 @@ Current state:
                 },
                 "apply": {
                     "components": {
-                        "apply(gko::matrix::Identity<double>)": 1.0,
-                        "copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)": 1.0,
+                        "apply(<typename>)": 1.0,
+                        "copy(<typename>)": 1.0,
                         "dense::copy": 1.0,
                         "overhead": 1.0
                     },
diff --git a/benchmark/test/reference/solver.profile.stderr b/benchmark/test/reference/solver.profile.stderr
index 8aa04832601..a601444163d 100644
--- a/benchmark/test/reference/solver.profile.stderr
+++ b/benchmark/test/reference/solver.profile.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.5.0 (develop)
-    running with core module 1.5.0 (develop)
+This is Ginkgo 1.6.0 (develop)
+    running with core module 1.6.0 (develop)
 Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
@@ -167,8 +167,6 @@ DEBUG: end   copy(<typename>)
 DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
 DEBUG: begin check(<typename>)
 DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
@@ -179,6 +177,8 @@ DEBUG: end   check(<typename>)
 DEBUG: begin check(<typename>)
 DEBUG: end   check(<typename>)
 DEBUG: end   check(<typename>)
+DEBUG: end   iteration
+DEBUG: begin iteration
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
 DEBUG: begin apply(<typename>)
@@ -197,8 +197,6 @@ DEBUG: end   copy(<typename>)
 DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
 DEBUG: begin check(<typename>)
 DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
@@ -209,6 +207,8 @@ DEBUG: end   check(<typename>)
 DEBUG: begin check(<typename>)
 DEBUG: end   check(<typename>)
 DEBUG: end   check(<typename>)
+DEBUG: end   iteration
+DEBUG: begin iteration
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
 DEBUG: begin apply(<typename>)
@@ -227,8 +227,6 @@ DEBUG: end   copy(<typename>)
 DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
 DEBUG: begin check(<typename>)
 DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
@@ -239,6 +237,8 @@ DEBUG: end   check(<typename>)
 DEBUG: begin check(<typename>)
 DEBUG: end   check(<typename>)
 DEBUG: end   check(<typename>)
+DEBUG: end   iteration
+DEBUG: begin iteration
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
 DEBUG: begin apply(<typename>)
@@ -257,8 +257,6 @@ DEBUG: end   copy(<typename>)
 DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
 DEBUG: begin check(<typename>)
 DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
@@ -269,6 +267,8 @@ DEBUG: end   check(<typename>)
 DEBUG: begin check(<typename>)
 DEBUG: end   check(<typename>)
 DEBUG: end   check(<typename>)
+DEBUG: end   iteration
+DEBUG: begin iteration
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
 DEBUG: begin apply(<typename>)
@@ -287,8 +287,6 @@ DEBUG: end   copy(<typename>)
 DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
 DEBUG: begin check(<typename>)
 DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
@@ -299,6 +297,8 @@ DEBUG: end   check(<typename>)
 DEBUG: begin check(<typename>)
 DEBUG: end   check(<typename>)
 DEBUG: end   check(<typename>)
+DEBUG: end   iteration
+DEBUG: begin iteration
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
 DEBUG: begin apply(<typename>)
@@ -317,8 +317,6 @@ DEBUG: end   copy(<typename>)
 DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
 DEBUG: begin check(<typename>)
 DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
@@ -329,6 +327,8 @@ DEBUG: end   check(<typename>)
 DEBUG: begin check(<typename>)
 DEBUG: end   check(<typename>)
 DEBUG: end   check(<typename>)
+DEBUG: end   iteration
+DEBUG: begin iteration
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
 DEBUG: begin apply(<typename>)
@@ -347,8 +347,6 @@ DEBUG: end   copy(<typename>)
 DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
 DEBUG: begin check(<typename>)
 DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
@@ -409,6 +407,16 @@ DEBUG: end   copy(<typename>)
 DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::compute_norm2_dispatch
@@ -447,16 +455,6 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
 DEBUG: begin apply(<typename>)
@@ -475,6 +473,16 @@ DEBUG: end   copy(<typename>)
 DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: end   iteration
 DEBUG: begin iteration
 DEBUG: begin allocate
@@ -515,16 +523,6 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
 DEBUG: begin apply(<typename>)
@@ -543,6 +541,16 @@ DEBUG: end   copy(<typename>)
 DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: end   iteration
 DEBUG: begin iteration
 DEBUG: begin allocate
@@ -583,16 +591,6 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
 DEBUG: begin apply(<typename>)
@@ -611,6 +609,16 @@ DEBUG: end   copy(<typename>)
 DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: end   iteration
 DEBUG: begin iteration
 DEBUG: begin allocate
@@ -651,16 +659,6 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
 DEBUG: begin apply(<typename>)
@@ -679,6 +677,16 @@ DEBUG: end   copy(<typename>)
 DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: end   iteration
 DEBUG: begin iteration
 DEBUG: begin allocate
@@ -719,16 +727,6 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
 DEBUG: begin apply(<typename>)
@@ -747,6 +745,16 @@ DEBUG: end   copy(<typename>)
 DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: end   iteration
 DEBUG: begin iteration
 DEBUG: begin allocate
@@ -787,16 +795,6 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
 DEBUG: begin apply(<typename>)
@@ -815,6 +813,16 @@ DEBUG: end   copy(<typename>)
 DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: end   iteration
 DEBUG: begin iteration
 DEBUG: begin allocate
@@ -855,16 +863,6 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
 DEBUG: begin apply(<typename>)
@@ -883,8 +881,14 @@ DEBUG: end   copy(<typename>)
 DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: begin dense::compute_norm2_dispatch
+DEBUG: end   dense::compute_norm2_dispatch
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::compute_norm2_dispatch
@@ -923,14 +927,6 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
@@ -1067,8 +1063,6 @@ DEBUG: end   copy(<typename>)
 DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
 DEBUG: begin check(<typename>)
 DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
@@ -1079,6 +1073,8 @@ DEBUG: end   check(<typename>)
 DEBUG: begin check(<typename>)
 DEBUG: end   check(<typename>)
 DEBUG: end   check(<typename>)
+DEBUG: end   iteration
+DEBUG: begin iteration
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
 DEBUG: begin apply(<typename>)
@@ -1097,8 +1093,6 @@ DEBUG: end   copy(<typename>)
 DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
 DEBUG: begin check(<typename>)
 DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
@@ -1109,6 +1103,8 @@ DEBUG: end   check(<typename>)
 DEBUG: begin check(<typename>)
 DEBUG: end   check(<typename>)
 DEBUG: end   check(<typename>)
+DEBUG: end   iteration
+DEBUG: begin iteration
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
 DEBUG: begin apply(<typename>)
@@ -1127,8 +1123,6 @@ DEBUG: end   copy(<typename>)
 DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
 DEBUG: begin check(<typename>)
 DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
@@ -1139,6 +1133,8 @@ DEBUG: end   check(<typename>)
 DEBUG: begin check(<typename>)
 DEBUG: end   check(<typename>)
 DEBUG: end   check(<typename>)
+DEBUG: end   iteration
+DEBUG: begin iteration
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
 DEBUG: begin apply(<typename>)
@@ -1157,8 +1153,6 @@ DEBUG: end   copy(<typename>)
 DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
 DEBUG: begin check(<typename>)
 DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
@@ -1169,6 +1163,8 @@ DEBUG: end   check(<typename>)
 DEBUG: begin check(<typename>)
 DEBUG: end   check(<typename>)
 DEBUG: end   check(<typename>)
+DEBUG: end   iteration
+DEBUG: begin iteration
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
 DEBUG: begin apply(<typename>)
@@ -1187,8 +1183,6 @@ DEBUG: end   copy(<typename>)
 DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
 DEBUG: begin check(<typename>)
 DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
@@ -1199,6 +1193,8 @@ DEBUG: end   check(<typename>)
 DEBUG: begin check(<typename>)
 DEBUG: end   check(<typename>)
 DEBUG: end   check(<typename>)
+DEBUG: end   iteration
+DEBUG: begin iteration
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
 DEBUG: begin apply(<typename>)
@@ -1217,8 +1213,6 @@ DEBUG: end   copy(<typename>)
 DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
 DEBUG: begin check(<typename>)
 DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
@@ -1229,6 +1223,8 @@ DEBUG: end   check(<typename>)
 DEBUG: begin check(<typename>)
 DEBUG: end   check(<typename>)
 DEBUG: end   check(<typename>)
+DEBUG: end   iteration
+DEBUG: begin iteration
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
 DEBUG: begin apply(<typename>)
@@ -1247,8 +1243,6 @@ DEBUG: end   copy(<typename>)
 DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
 DEBUG: begin check(<typename>)
 DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_norm2_dispatch
diff --git a/benchmark/test/reference/solver.simple.stderr b/benchmark/test/reference/solver.simple.stderr
index dad85f1c921..936046c4949 100644
--- a/benchmark/test/reference/solver.simple.stderr
+++ b/benchmark/test/reference/solver.simple.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.5.0 (develop)
-    running with core module 1.5.0 (develop)
+This is Ginkgo 1.6.0 (develop)
+    running with core module 1.6.0 (develop)
 Running on reference(0)
 Running with 2 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/sparse_blas.profile.stderr b/benchmark/test/reference/sparse_blas.profile.stderr
index 02dfdfdacfd..3dee884861e 100644
--- a/benchmark/test/reference/sparse_blas.profile.stderr
+++ b/benchmark/test/reference/sparse_blas.profile.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.5.0 (develop)
-    running with core module 1.5.0 (develop)
+This is Ginkgo 1.6.0 (develop)
+    running with core module 1.6.0 (develop)
 Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/sparse_blas.simple.stderr b/benchmark/test/reference/sparse_blas.simple.stderr
index a813994e739..e6e0884e267 100644
--- a/benchmark/test/reference/sparse_blas.simple.stderr
+++ b/benchmark/test/reference/sparse_blas.simple.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.5.0 (develop)
-    running with core module 1.5.0 (develop)
+This is Ginkgo 1.6.0 (develop)
+    running with core module 1.6.0 (develop)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/spmv.profile.stderr b/benchmark/test/reference/spmv.profile.stderr
index ea170aac1a8..735e4bf5d23 100644
--- a/benchmark/test/reference/spmv.profile.stderr
+++ b/benchmark/test/reference/spmv.profile.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.5.0 (develop)
-    running with core module 1.5.0 (develop)
+This is Ginkgo 1.6.0 (develop)
+    running with core module 1.6.0 (develop)
 Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/spmv.simple.stderr b/benchmark/test/reference/spmv.simple.stderr
index 8a2ebe9fe15..1bb4472bce6 100644
--- a/benchmark/test/reference/spmv.simple.stderr
+++ b/benchmark/test/reference/spmv.simple.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.5.0 (develop)
-    running with core module 1.5.0 (develop)
+This is Ginkgo 1.6.0 (develop)
+    running with core module 1.6.0 (develop)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42

From eba1b0b37253a34d1338bb50f20ce910007720fc Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Sun, 21 May 2023 12:12:04 +0200
Subject: [PATCH 033/583] more strict path removal

---
 .../test/reference/conversion.all.stderr      | 24 +++++++++----------
 benchmark/test/test_framework.py.in           |  4 +---
 2 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/benchmark/test/reference/conversion.all.stderr b/benchmark/test/reference/conversion.all.stderr
index a21a0254200..d6aab6a0331 100644
--- a/benchmark/test/reference/conversion.all.stderr
+++ b/benchmark/test/reference/conversion.all.stderr
@@ -42,7 +42,7 @@ Error when processing test case
         }
     }
 }
-what(): ginkgo/include/ginkgo/core/base/utils_helper.hpp:368: Operation does not support
+what(): <removed>
 Current state:
 [
     {
@@ -81,7 +81,7 @@ Error when processing test case
         }
     }
 }
-what(): ginkgo/include/ginkgo/core/base/utils_helper.hpp:368: Operation does not support
+what(): <removed>
 Current state:
 [
     {
@@ -128,7 +128,7 @@ Error when processing test case
         }
     }
 }
-what(): ginkgo/include/ginkgo/core/base/utils_helper.hpp:368: Operation does not support
+what(): <removed>
 Current state:
 [
     {
@@ -357,7 +357,7 @@ Error when processing test case
         }
     }
 }
-what(): ginkgo/include/ginkgo/core/base/utils_helper.hpp:368: Operation does not support
+what(): <removed>
 Current state:
 [
     {
@@ -520,7 +520,7 @@ Error when processing test case
         }
     }
 }
-what(): ginkgo/include/ginkgo/core/base/utils_helper.hpp:368: Operation does not support
+what(): <removed>
 Current state:
 [
     {
@@ -641,7 +641,7 @@ Error when processing test case
         }
     }
 }
-what(): ginkgo/include/ginkgo/core/base/utils_helper.hpp:368: Operation does not support
+what(): <removed>
 Current state:
 [
     {
@@ -770,7 +770,7 @@ Error when processing test case
         }
     }
 }
-what(): ginkgo/include/ginkgo/core/base/utils_helper.hpp:368: Operation does not support
+what(): <removed>
 Current state:
 [
     {
@@ -984,7 +984,7 @@ Error when processing test case
         }
     }
 }
-what(): ginkgo/include/ginkgo/core/base/utils_helper.hpp:368: Operation does not support
+what(): <removed>
 Current state:
 [
     {
@@ -1139,7 +1139,7 @@ Error when processing test case
         }
     }
 }
-what(): ginkgo/include/ginkgo/core/base/utils_helper.hpp:368: Operation does not support
+what(): <removed>
 Current state:
 [
     {
@@ -1302,7 +1302,7 @@ Error when processing test case
         }
     }
 }
-what(): ginkgo/include/ginkgo/core/base/utils_helper.hpp:368: Operation does not support
+what(): <removed>
 Current state:
 [
     {
@@ -1567,7 +1567,7 @@ Error when processing test case
         }
     }
 }
-what(): ginkgo/include/ginkgo/core/base/utils_helper.hpp:368: Operation does not support
+what(): <removed>
 Current state:
 [
     {
@@ -1756,7 +1756,7 @@ Error when processing test case
         }
     }
 }
-what(): ginkgo/include/ginkgo/core/base/utils_helper.hpp:368: Operation does not support
+what(): <removed>
 Current state:
 [
     {
diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in
index a0a7757b043..2d22f11ac4f 100644
--- a/benchmark/test/test_framework.py.in
+++ b/benchmark/test/test_framework.py.in
@@ -8,7 +8,6 @@ import pathlib
 import sys
 sourcepath = pathlib.Path("@CMAKE_CURRENT_SOURCE_DIR@")
 binpath = pathlib.Path("@PROJECT_BINARY_DIR@")
-projectroot = "@PROJECT_SOURCE_DIR@"
 generate = False
 if len(sys.argv) > 2 and sys.argv[2] == "--generate":
     generate = True
@@ -67,7 +66,6 @@ def determinize_text(input, denumberify_paths=[], remove_paths=[], ignore_patter
     output_lines = []
     patterns = [re.compile(pattern) for pattern in ignore_patterns]
     for line in lines:
-        line = line.replace(projectroot, "ginkgo")
         for pattern, replacement in replace_patterns:
             line = re.sub(pattern, replacement, line)
         keep = True
@@ -98,7 +96,7 @@ def compare_output(args, expected_stdout, expected_stderr, stdin="", launcher_fl
     ]
     typename_patterns = [
         ("(apply|generate|check|copy|move)\([^())]*\)", "\\1(<typename>)"),
-        ("Operation .* does not support [^\"]*", "Operation does not support")
+        ("what\\(\\): .*", "what(): <removed>")
     ]
     if generate:
         open(expected_stdout, "w").write("\n".join(determinize_text(

From 041e2740f58b1b6f0dff07aecd05312a3672b7a6 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Mon, 22 May 2023 13:56:13 +0200
Subject: [PATCH 034/583] update distributed outputs

---
 .../distributed_solver.profile.stderr         | 1114 ++++----
 .../distributed_solver.profile.stdout         |   17 +-
 .../distributed_solver.simple.stderr          |    4 +-
 .../distributed_solver.simple.stdout          |   17 +-
 .../multi_vector_distributed.profile.stderr   |  808 ++++++
 .../multi_vector_distributed.simple.stderr    |   14 +-
 .../reference/spmv_distributed.profile.stderr | 2380 +++++++++++++++++
 .../reference/spmv_distributed.profile.stdout |    4 +-
 .../reference/spmv_distributed.simple.stderr  |    8 +-
 .../reference/spmv_distributed.simple.stdout  |    4 +-
 10 files changed, 3764 insertions(+), 606 deletions(-)

diff --git a/benchmark/test/reference/distributed_solver.profile.stderr b/benchmark/test/reference/distributed_solver.profile.stderr
index 64b09a754c3..e0ddd10ab54 100644
--- a/benchmark/test/reference/distributed_solver.profile.stderr
+++ b/benchmark/test/reference/distributed_solver.profile.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.5.0 (develop)
-    running with core module 1.5.0 (develop)
+This is Ginkgo 1.6.0 (develop)
+    running with core module 1.6.0 (develop)
 Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
@@ -56,7 +56,7 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin copy(gko::matrix::Csr<double, int>,gko::matrix::Csr<double, int>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin copy
@@ -65,12 +65,12 @@ DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: end   copy(gko::matrix::Csr<double, int>,gko::matrix::Csr<double, int>)
+DEBUG: end   copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin copy(gko::matrix::Csr<double, int>,gko::matrix::Csr<double, int>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin copy
@@ -79,7 +79,7 @@ DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: end   copy(gko::matrix::Csr<double, int>,gko::matrix::Csr<double, int>)
+DEBUG: end   copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::fill
@@ -206,12 +206,12 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::matrix::Dense<double>,gko::matrix::Dense<double>)
+DEBUG: end   copy(<typename>)
 Matrix is of size (125, 125)
 DEBUG: begin cg
 	Running solver: cg
@@ -226,19 +226,19 @@ DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: begin generate(gko::solver::Cg<double>::Factory)
-DEBUG: begin generate(gko::matrix::IdentityFactory<double>)
-DEBUG: end   generate(gko::matrix::IdentityFactory<double>)
-DEBUG: end   generate(gko::solver::Cg<double>::Factory)
-DEBUG: begin copy(gko::matrix::Identity<double>,gko::matrix::Identity<double>)
-DEBUG: end   copy(gko::matrix::Identity<double>,gko::matrix::Identity<double>)
-DEBUG: begin apply(gko::solver::Cg<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin generate(<typename>)
+DEBUG: begin generate(<typename>)
+DEBUG: end   generate(<typename>)
+DEBUG: end   generate(<typename>)
+DEBUG: begin copy(<typename>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin iteration
 DEBUG: begin allocate
 DEBUG: end   allocate
@@ -268,18 +268,18 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin cg::initialize
 DEBUG: end   cg::initialize
-DEBUG: begin advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   advanced_apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
@@ -294,304 +294,302 @@ DEBUG: begin dense::compute_sqrt
 DEBUG: end   dense::compute_sqrt
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_squared_norm2
 DEBUG: end   dense::compute_squared_norm2
 DEBUG: begin dense::compute_sqrt
 DEBUG: end   dense::compute_sqrt
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_squared_norm2
 DEBUG: end   dense::compute_squared_norm2
 DEBUG: begin dense::compute_sqrt
 DEBUG: end   dense::compute_sqrt
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   iteration
+DEBUG: begin iteration
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_squared_norm2
 DEBUG: end   dense::compute_squared_norm2
 DEBUG: begin dense::compute_sqrt
 DEBUG: end   dense::compute_sqrt
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   iteration
+DEBUG: begin iteration
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_squared_norm2
 DEBUG: end   dense::compute_squared_norm2
 DEBUG: begin dense::compute_sqrt
 DEBUG: end   dense::compute_sqrt
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   iteration
+DEBUG: begin iteration
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_squared_norm2
 DEBUG: end   dense::compute_squared_norm2
 DEBUG: begin dense::compute_sqrt
 DEBUG: end   dense::compute_sqrt
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   iteration
+DEBUG: begin iteration
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_squared_norm2
 DEBUG: end   dense::compute_squared_norm2
 DEBUG: begin dense::compute_sqrt
 DEBUG: end   dense::compute_sqrt
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   iteration
+DEBUG: begin iteration
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_squared_norm2
 DEBUG: end   dense::compute_squared_norm2
 DEBUG: begin dense::compute_sqrt
 DEBUG: end   dense::compute_sqrt
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   iteration
+DEBUG: begin iteration
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_squared_norm2
 DEBUG: end   dense::compute_squared_norm2
 DEBUG: begin dense::compute_sqrt
 DEBUG: end   dense::compute_sqrt
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
@@ -603,16 +601,16 @@ DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: end   iteration
-DEBUG: end   apply(gko::solver::Cg<double>)
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: end   copy(<typename>)
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin apply(gko::solver::Cg<double>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin iteration
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
@@ -620,18 +618,18 @@ DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin cg::initialize
 DEBUG: end   cg::initialize
-DEBUG: begin advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   advanced_apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
@@ -646,14 +644,26 @@ DEBUG: begin dense::compute_sqrt
 DEBUG: end   dense::compute_sqrt
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::compute_squared_norm2
@@ -668,24 +678,24 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: begin advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   advanced_apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::compute_squared_norm2
@@ -704,44 +714,44 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: end   iteration
 DEBUG: begin iteration
 DEBUG: begin allocate
@@ -758,24 +768,24 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: begin advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   advanced_apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::compute_squared_norm2
@@ -794,44 +804,44 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: end   iteration
 DEBUG: begin iteration
 DEBUG: begin allocate
@@ -848,24 +858,24 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: begin advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   advanced_apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::compute_squared_norm2
@@ -884,44 +894,44 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: end   iteration
 DEBUG: begin iteration
 DEBUG: begin allocate
@@ -938,24 +948,24 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: begin advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   advanced_apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::compute_squared_norm2
@@ -974,44 +984,44 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: end   iteration
 DEBUG: begin iteration
 DEBUG: begin allocate
@@ -1028,24 +1038,24 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: begin advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   advanced_apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::compute_squared_norm2
@@ -1064,44 +1074,44 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: end   iteration
 DEBUG: begin iteration
 DEBUG: begin allocate
@@ -1118,24 +1128,24 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: begin advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   advanced_apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::compute_squared_norm2
@@ -1154,44 +1164,44 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: end   iteration
 DEBUG: begin iteration
 DEBUG: begin allocate
@@ -1208,24 +1218,24 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: begin advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   advanced_apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::compute_squared_norm2
@@ -1244,46 +1254,42 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin residual_norm::residual_norm
+DEBUG: end   residual_norm::residual_norm
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::compute_squared_norm2
@@ -1298,24 +1304,24 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: begin advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   advanced_apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::compute_squared_norm2
@@ -1334,16 +1340,6 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: end   check(gko::stop::Combined)
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
@@ -1355,27 +1351,27 @@ DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: end   iteration
-DEBUG: end   apply(gko::solver::Cg<double>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: end   copy(<typename>)
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin generate(gko::solver::Cg<double>::Factory)
-DEBUG: begin generate(gko::matrix::IdentityFactory<double>)
-DEBUG: end   generate(gko::matrix::IdentityFactory<double>)
-DEBUG: end   generate(gko::solver::Cg<double>::Factory)
+DEBUG: begin generate(<typename>)
+DEBUG: begin generate(<typename>)
+DEBUG: end   generate(<typename>)
+DEBUG: end   generate(<typename>)
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
@@ -1398,7 +1394,7 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin apply(gko::solver::Cg<double>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin iteration
 DEBUG: begin allocate
 DEBUG: end   allocate
@@ -1428,18 +1424,18 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin cg::initialize
 DEBUG: end   cg::initialize
-DEBUG: begin advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   advanced_apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
@@ -1454,304 +1450,302 @@ DEBUG: begin dense::compute_sqrt
 DEBUG: end   dense::compute_sqrt
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_squared_norm2
 DEBUG: end   dense::compute_squared_norm2
 DEBUG: begin dense::compute_sqrt
 DEBUG: end   dense::compute_sqrt
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_squared_norm2
 DEBUG: end   dense::compute_squared_norm2
 DEBUG: begin dense::compute_sqrt
 DEBUG: end   dense::compute_sqrt
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   iteration
+DEBUG: begin iteration
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_squared_norm2
 DEBUG: end   dense::compute_squared_norm2
 DEBUG: begin dense::compute_sqrt
 DEBUG: end   dense::compute_sqrt
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   iteration
+DEBUG: begin iteration
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_squared_norm2
 DEBUG: end   dense::compute_squared_norm2
 DEBUG: begin dense::compute_sqrt
 DEBUG: end   dense::compute_sqrt
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   iteration
+DEBUG: begin iteration
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_squared_norm2
 DEBUG: end   dense::compute_squared_norm2
 DEBUG: begin dense::compute_sqrt
 DEBUG: end   dense::compute_sqrt
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   iteration
+DEBUG: begin iteration
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_squared_norm2
 DEBUG: end   dense::compute_squared_norm2
 DEBUG: begin dense::compute_sqrt
 DEBUG: end   dense::compute_sqrt
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   iteration
+DEBUG: begin iteration
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_squared_norm2
 DEBUG: end   dense::compute_squared_norm2
 DEBUG: begin dense::compute_sqrt
 DEBUG: end   dense::compute_sqrt
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: begin check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Iteration)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: begin check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
+DEBUG: end   iteration
+DEBUG: begin iteration
 DEBUG: begin cg::step_1
 DEBUG: end   cg::step_1
-DEBUG: begin apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin apply(gko::matrix::Csr<double, int>)
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
 DEBUG: begin cg::step_2
 DEBUG: end   cg::step_2
-DEBUG: begin apply(gko::matrix::Identity<double>)
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: end   apply(gko::matrix::Identity<double>)
+DEBUG: end   copy(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin dense::compute_conj_dot_dispatch
 DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin check(gko::stop::Combined)
-DEBUG: begin check(gko::stop::ResidualNorm<double>)
+DEBUG: begin check(<typename>)
+DEBUG: begin check(<typename>)
 DEBUG: begin dense::compute_squared_norm2
 DEBUG: end   dense::compute_squared_norm2
 DEBUG: begin dense::compute_sqrt
 DEBUG: end   dense::compute_sqrt
 DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(gko::stop::ResidualNorm<double>)
-DEBUG: end   check(gko::stop::Combined)
+DEBUG: end   check(<typename>)
+DEBUG: end   check(<typename>)
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
@@ -1763,29 +1757,29 @@ DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: end   iteration
-DEBUG: end   apply(gko::solver::Cg<double>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)
-DEBUG: begin advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: begin advanced_apply(gko::matrix::Csr<double, int>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(gko::matrix::Csr<double, int>)
-DEBUG: end   advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   advanced_apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::compute_squared_norm2
diff --git a/benchmark/test/reference/distributed_solver.profile.stdout b/benchmark/test/reference/distributed_solver.profile.stdout
index 16dc6741930..a31b88ff582 100644
--- a/benchmark/test/reference/distributed_solver.profile.stdout
+++ b/benchmark/test/reference/distributed_solver.profile.stdout
@@ -16,36 +16,29 @@
                 "rhs_norm": 1.0,
                 "generate": {
                     "components": {
-                        "generate(gko::solver::Cg<double>::Factory)": 1.0,
-                        "generate(gko::matrix::IdentityFactory<double>)": 1.0,
+                        "generate(<typename>)": 1.0,
                         "overhead": 1.0
                     },
                     "time": 1.0
                 },
                 "apply": {
                     "components": {
-                        "apply(gko::solver::Cg<double>)": 1.0,
+                        "apply(<typename>)": 1.0,
                         "iteration": 1.0,
                         "allocate": 1.0,
                         "dense::fill": 1.0,
                         "cg::initialize": 1.0,
-                        "advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)": 1.0,
+                        "advanced_apply(<typename>)": 1.0,
                         "dense::row_gather": 1.0,
-                        "advanced_apply(gko::matrix::Csr<double, int>)": 1.0,
                         "csr::advanced_spmv": 1.0,
                         "dense::compute_squared_norm2": 1.0,
                         "dense::compute_sqrt": 1.0,
-                        "apply(gko::matrix::Identity<double>)": 1.0,
-                        "copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)": 1.0,
+                        "copy(<typename>)": 1.0,
                         "dense::copy": 1.0,
                         "dense::compute_conj_dot_dispatch": 1.0,
-                        "check(gko::stop::Combined)": 1.0,
-                        "check(gko::stop::ResidualNorm<double>)": 1.0,
+                        "check(<typename>)": 1.0,
                         "residual_norm::residual_norm": 1.0,
-                        "check(gko::stop::Iteration)": 1.0,
                         "cg::step_1": 1.0,
-                        "apply(gko::experimental::distributed::Matrix<double, int, long>)": 1.0,
-                        "apply(gko::matrix::Csr<double, int>)": 1.0,
                         "csr::spmv": 1.0,
                         "cg::step_2": 1.0,
                         "free": 1.0,
diff --git a/benchmark/test/reference/distributed_solver.simple.stderr b/benchmark/test/reference/distributed_solver.simple.stderr
index 7800bb0b97e..6a5dab5d844 100644
--- a/benchmark/test/reference/distributed_solver.simple.stderr
+++ b/benchmark/test/reference/distributed_solver.simple.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.5.0 (develop)
-    running with core module 1.5.0 (develop)
+This is Ginkgo 1.6.0 (develop)
+    running with core module 1.6.0 (develop)
 Running on reference(0)
 Running with 2 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/distributed_solver.simple.stdout b/benchmark/test/reference/distributed_solver.simple.stdout
index 96ef102f8b8..54d7233ba77 100644
--- a/benchmark/test/reference/distributed_solver.simple.stdout
+++ b/benchmark/test/reference/distributed_solver.simple.stdout
@@ -16,8 +16,7 @@
                 "rhs_norm": 1.0,
                 "generate": {
                     "components": {
-                        "generate(gko::solver::Cg<double>::Factory)": 1.0,
-                        "generate(gko::matrix::IdentityFactory<double>)": 1.0,
+                        "generate(<typename>)": 1.0,
                         "free": 1.0,
                         "overhead": 1.0
                     },
@@ -25,28 +24,22 @@
                 },
                 "apply": {
                     "components": {
-                        "apply(gko::solver::Cg<double>)": 1.0,
+                        "apply(<typename>)": 1.0,
                         "iteration": 1.0,
                         "allocate": 1.0,
                         "dense::fill": 1.0,
                         "cg::initialize": 1.0,
-                        "advanced_apply(gko::experimental::distributed::Matrix<double, int, long>)": 1.0,
+                        "advanced_apply(<typename>)": 1.0,
                         "dense::row_gather": 1.0,
-                        "advanced_apply(gko::matrix::Csr<double, int>)": 1.0,
                         "csr::advanced_spmv": 1.0,
                         "dense::compute_squared_norm2": 1.0,
                         "dense::compute_sqrt": 1.0,
-                        "apply(gko::matrix::Identity<double>)": 1.0,
-                        "copy(gko::experimental::distributed::Vector<double>,gko::experimental::distributed::Vector<double>)": 1.0,
+                        "copy(<typename>)": 1.0,
                         "dense::copy": 1.0,
                         "dense::compute_conj_dot_dispatch": 1.0,
-                        "check(gko::stop::Combined)": 1.0,
-                        "check(gko::stop::ResidualNorm<double>)": 1.0,
+                        "check(<typename>)": 1.0,
                         "residual_norm::residual_norm": 1.0,
-                        "check(gko::stop::Iteration)": 1.0,
                         "cg::step_1": 1.0,
-                        "apply(gko::experimental::distributed::Matrix<double, int, long>)": 1.0,
-                        "apply(gko::matrix::Csr<double, int>)": 1.0,
                         "csr::spmv": 1.0,
                         "cg::step_2": 1.0,
                         "free": 1.0,
diff --git a/benchmark/test/reference/multi_vector_distributed.profile.stderr b/benchmark/test/reference/multi_vector_distributed.profile.stderr
index e69de29bb2d..3e650323bfa 100644
--- a/benchmark/test/reference/multi_vector_distributed.profile.stderr
+++ b/benchmark/test/reference/multi_vector_distributed.profile.stderr
@@ -0,0 +1,808 @@
+This is Ginkgo 1.6.0 (develop)
+    running with core module 1.6.0 (develop)
+Running on reference(0)
+Running with 0 warm iterations and 1 running iterations
+The random seed for right hand sides is 42
+The operations are copy,axpy,scalRunning test case
+{
+    "n": DEBUG: begin n = 100 
+DEBUG: begin copy
+100,
+    "blaDEBUG: begin allocate
+DEBUG: end   s": {}
+}
+DEBUG: begin n = 100 
+DEBUG: begin allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+copy
+DEBUG: begin allocateDEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: end   allocate
+
+DEBUG: begin allocate
+DEBUG: end   DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguousDEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin partition::build_starting_indicesDEBUG: begin n = 100 
+DEBUG: begin copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocate
+DEBUG: end   allocate
+
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin DEBUG: begin components::fill_array
+DEBUG: end   components::fill_arrayallocate
+DEBUG: end   allocate
+allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   DEBUG: end   DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocatecomponents::fill_array
+allocate
+DEBUG: begin 
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin freeDEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin freeDEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin 
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocateDEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   free
+DEBUG: end   free
+DEBUG: begin 
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocate
+DEBUG: end   allocate
+components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indicescopy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+
+DEBUG: begin copy
+DEBUG: end   copyDEBUG: begin free
+DEBUG: end   free
+DEBUG: begin DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_arrayallocate
+DEBUG: end   allocate
+DEBUG: begin DEBUG: begin dense::fill
+DEBUG: end   
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+dense::fill
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   freeDEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   copy
+DEBUG: begin axpy
+DEBUG: begin allocate
+DEBUG: end   copy
+Current state:
+[
+    DEBUG: begin axpy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin {
+        "n": 10allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin 0,
+       DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end    "blas": {
+   DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array       components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+  "copy": {
+  components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indicesDEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free              "ti
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocateme": 0.000013,
+ DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end         allocate
+DEBUG: begin free
+free
+DEBUG: begin free
+DEBUG: end   free
+         "flopDEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   freeDEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin s": 7692307.692307693,
+    allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate        
+DEBUG: begin free
+DEBUG: end   free
+    "DEBUG: begin allocate
+DEBUG: end   allocate
+
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin bandwidth":DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array 1230allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   76923.076923DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices09,
+ components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   
+DEBUG: begin copy
+DEBUG: end   copy
+components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+               "rcomponents::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copyDEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill
+DEBUG: end   epetitions": 1,
+                "completed": t
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin freedense::fill
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+rue
+  
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   DEBUG: begin dense::add_scaled
+DEBUG: end   dense::add_scaled
+          }
+  free
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+      }
+   DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+ }
+]
+DEBUG: begin axpy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin DEBUG: begin dense::add_scaled
+DEBUG: end   dense::add_scaled
+allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::add_scaled
+DEBUG: end   dense::add_scaled
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   axpy
+DEBUG: begin DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   axpy
+DEBUG: begin scal
+scal
+DEBUG: begin allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: end   axpy
+Current state:
+[
+    {
+     DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocate
+   "n": 100,
+    allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate    "blasallocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   ": {
+ DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_arraycomponents::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+           "co
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   py": {partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copy
+             free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+   "tiallocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+me": 0.000013,
+ DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   freeDEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   
+DEBUG: begin dense::fill
+DEBUG: end   free
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+               "flops":DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::scale
+DEBUG: end   dense::scale
+ 7692307.692307693,
+     dense::fill
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::scale
+DEBUG: end   dense::scale
+           "bandwidth": 123076923.07692309,
+                "repetitions": 1,
+                "completed": true
+            },
+            "axpy": {
+                "time": 0.000017,
+                "flops": 11764705.88235294,
+                "bandwidth": 141176470.5882353,
+                "repetitions": 1,
+                "completed": true
+            }
+        }
+    }
+]
+DEBUG: begin scal
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::scale
+DEBUG: end   dense::scale
+DEBUG: begin freeDEBUG: begin free
+
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   scal
+DEBUG: end   n = 100 scal
+Current state:
+[
+    {
+        "n":scal
+DEBUG: end   n = 100 
+
+ 100,
+        "blas": {
+            "copy": {
+                "time": 0.000013,
+                "flops": 7692307.692307693,
+                "bandwidth": 123076923.07692309,
+                "repetitions": 1,
+                "completed": true
+            },
+            "axpy": {
+                "time": 0.000017,
+                "flops": 11764705.88235294,
+                "bandwidth": 141176470.5882353,
+                "repetitions": 1,
+                "completed": true
+            },
+            "scal": {
+                "time": 0.000007,
+                "flops": 14285714.285714285,
+                "bandwidth": 228571428.57142857,
+                "repetitions": 1,
+                "completed": true
+            }
+        }
+    }
+]
+DEBUG: end   n = 100 
diff --git a/benchmark/test/reference/multi_vector_distributed.simple.stderr b/benchmark/test/reference/multi_vector_distributed.simple.stderr
index 23f3554e9c4..72a2fbb9b90 100644
--- a/benchmark/test/reference/multi_vector_distributed.simple.stderr
+++ b/benchmark/test/reference/multi_vector_distributed.simple.stderr
@@ -1,15 +1,5 @@
-This is Ginkgo 1.5.0 (develop)
-    running with core module 1.5.0 (develop)
-Running on reference(0)
-Running with 2 warm iterations and 10 running iterations
-The random seed for right hand sides is 42
-The operations are copy,axpy,scalThis is Ginkgo 1.5.0 (develop)
-    running with core module 1.5.0 (develop)
-Running on reference(0)
-Running with 2 warm iterations and 10 running iterations
-The random seed for right hand sides is 42
-The operations are copy,axpy,scalThis is Ginkgo 1.5.0 (develop)
-    running with core module 1.5.0 (develop)
+This is Ginkgo 1.6.0 (develop)
+    running with core module 1.6.0 (develop)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/spmv_distributed.profile.stderr b/benchmark/test/reference/spmv_distributed.profile.stderr
index e69de29bb2d..1ce62b48dc2 100644
--- a/benchmark/test/reference/spmv_distributed.profile.stderr
+++ b/benchmark/test/reference/spmv_distributed.profile.stderr
@@ -0,0 +1,2380 @@
+This is Ginkgo 1.6.0 (develop)
+    running with core module 1.6.0 (develop)
+Running on reference(0)
+Running with 0 warm iterations and 1 running iterations
+The random seed for right hand sides is 42
+The formats are [csr]x[csr]
+The number of right hand sides is 1
+DEBUG: begin stencil(100,7pt,stencil)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin stencil(100,7pt,stencil)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocateDEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   Running test case
+{
+ DEBUG: end   allocate
+DEBUG: begin components::fill_array
+allocate
+DEBUG: begin components::fill_array
+   "size":DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array 100,
+ DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin    "stencil": 
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   DEBUG: begin copy
+DEBUG: end   copy
+"7pt",
+    "comm_copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   DEBUG: begin allocate
+DEBUG: end   allocate
+pattern": "stallocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocateDEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin encil",
+    "spmv": {}
+}
+
+DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
+DEBUG: begin allocate
+DEBUG: end   allocate
+dense::fill
+DEBUG: begin dense::fill
+DEBUG: begin stencil(100,7pt,stencil)
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill_in_matrix_data
+DEBUG: end   dense::fill_in_matrix_data
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill_in_matrix_data
+DEBUG: end   dense::fill_in_matrix_data
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   freeDEBUG: begin allocate
+DEBUG: end   allocate
+free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin partition::build_ranges_from_global_size
+
+DEBUG: begin free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   freeDEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   partition::build_ranges_from_global_size
+
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_sizeDEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocateDEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+
+DEBUG: end   allocate
+DEBUG: begin 
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin DEBUG: begin allocate
+DEBUG: end   components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguouscomponents::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copyDEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copyDEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
+DEBUG: begin components::aos_to_soa
+DEBUG: end   
+DEBUG: end   copy
+DEBUG: begin DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::fillcomponents::aos_to_soa
+DEBUG: begin allocate
+DEBUG: end   allocate
+allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin 
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill_in_matrix_data
+DEBUG: end   dense::fill_in_matrix_data
+DEBUG: begin free
+DEBUG: begin dense::fill
+allocate
+DEBUG: end   allocate
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   DEBUG: end   dense::fill
+DEBUG: begin dense::fill_in_matrix_data
+DEBUG: end   dense::fill_in_matrix_dataDEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::fillfree
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+
+DEBUG: begin free
+DEBUG: end   free
+
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill_in_matrix_data
+DEBUG: end   dense::fill_in_matrix_data
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin DEBUG: begin DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+free
+freeDEBUG: begin copy(<typename>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   DEBUG: begin copy(<typename>)
+copy(<typename>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocate
+DEBUG: end   DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocateallocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+
+DEBUG: end   copy(<typename>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguousDEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguousallocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin 
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin DEBUG: begin copy(<typename>)
+components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_arraycopy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+components::aos_to_soa
+DEBUG: end   components::aos_to_soa
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: end   copy(<typename>)
+DEBUG: begin copy(<typename>)DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   DEBUG: begin dense::fill_in_matrix_data
+DEBUG: end   dense::fill_in_matrix_data
+
+DEBUG: begin copy
+DEBUG: end   copy
+free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: end   copy(<typename>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin copy(<typename>)
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: end   copy(<typename>)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin DEBUG: end   components::aos_to_soa
+DEBUG: begin allocate
+DEBUG: begin copy(<typename>)
+DEBUG: begin copy
+DEBUG: end   copyfree
+DEBUG: end   free
+Matrix is of size (81DEBUG: end   allocate
+DEBUG: begin distributed_matrix::build_local_nonlocal
+
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy, 81)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+
+DEBUG: end   copy(<typename>)
+DEBUG: begin allocate
+DEBUG: end   DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+allocate
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin copy(<typename>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   dense::copy
+DEBUG: end   copy(<typename>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_sizeDEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
+DEBUG: begin 
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   allocate
+DEBUG: end   allocate
+DEBUG: begin distributed_matrix::build_local_nonlocal
+DEBUG: end   allocate
+DEBUG: begin allocate
+free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_arrayDEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin 
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+free
+DEBUG: end   free
+DEBUG: begin allocatecomponents::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indicesDEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+
+DEBUG: end   free
+DEBUG: begin copy(<typename>)
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: end   copy(<typename>)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocateDEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy(<typename>)
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: end   copy(<typename>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+
+DEBUG: begin allocate
+DEBUG: end   DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   freeDEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
+
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocateDEBUG: end   components::aos_to_soa
+DEBUG: begin allocate
+DEBUG: end   allocate
+
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin distributed_matrix::build_local_nonlocal
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocateDEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   freeDEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin 
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocateallocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocateallocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocatefree
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin 
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocateDEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin freeDEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocatefree
+DEBUG: begin free
+DEBUG: begin free
+DEBUG: end   free
+
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   freeDEBUG: begin free
+DEBUG: end   free
+DEBUG: end   distributed_matrix::build_local_nonlocal
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: begin allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   copy
+
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   distributed_matrix::build_local_nonlocal
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: end   copy
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   distributed_matrix::build_local_nonlocal
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: begin freeDEBUG: end   free
+DEBUG: begin freeDEBUG: begin free
+DEBUG: end   free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin freeDEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin 
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin apply(<typename>)
+free
+DEBUG: end   free
+DEBUG: begin apply(<typename>)
+free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocateDEBUG: begin apply(<typename>)
+
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: begin allocate
+DEBUG: end   DEBUG: begin dense::row_gather
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+allocate
+DEBUG: end   allocate
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin dense::row_gather
+DEBUG: end   DEBUG: end   dense::row_gather
+dense::row_gather
+DEBUG: begin DEBUG: begin apply(<typename>)
+apply(<typename>)
+DEBUG: begin coo::spmv
+DEBUG: end   coo::spmv
+DEBUG: begin apply(<typename>)
+DEBUG: begin coo::spmv
+DEBUG: end   coo::spmvDEBUG: begin coo::spmv
+DEBUG: end   DEBUG: end   apply(<typename>)
+
+DEBUG: end   apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
+coo::spmv
+DEBUG: begin advanced_apply(<typename>)
+DEBUG: begin coo::advanced_spmv
+DEBUG: end   coo::advanced_spmv
+DEBUG: begin coo::advanced_spmv
+DEBUG: end   coo::advanced_spmv
+DEBUG: end   advanced_apply(<typename>)DEBUG: end   apply(<typename>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin 
+DEBUG: begin advanced_apply(<typename>)
+DEBUG: begin coo::advanced_spmv
+DEBUG: end   coo::advanced_spmv
+free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin freeDEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin free
+DEBUG: end   freeDEBUG: end   apply(<typename>)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   
+DEBUG: begin free
+DEBUG: end   free
+
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   freefree
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin 
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+free
+DEBUG: end   free
+free
+DEBUG: end   free
+DEBUG: end   free
+DEBUG: begin csr-csr
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin csr-csr
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocateDEBUG: begin csr-csr
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   DEBUG: end   allocate
+DEBUG: begin allocate
+allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   DEBUG: end   components::fill_array
+DEBUG: begin components::fill_arraycomponents::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin freeDEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+free
+
+DEBUG: end   free
+DEBUG: end   components::fill_array
+DEBUG: begin allocate
+DEBUG: end   DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin allocateDEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin allocate
+DEBUG: end   allocate
+allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   DEBUG: begin copy(<typename>)
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin allocate
+DEBUG: end   components::fill_array
+DEBUG: begin DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copyallocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+copy(<typename>)
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: end   copy(<typename>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin copy(<typename>)DEBUG: begin copy(<typename>)
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: end   copy(<typename>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy(<typename>)
+DEBUG: begin allocateDEBUG: begin copy(<typename>)
+DEBUG: begin DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: end   copy(<typename>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::fill
+DEBUG: end   
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin copy(<typename>)
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: end   copy(<typename>)
+DEBUG: begin allocate
+dense::fill
+DEBUG: begin allocate
+DEBUG: end   DEBUG: begin copy
+DEBUG: end   DEBUG: end   allocate
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin allocate
+allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin copy
+DEBUG: end   copy(<typename>)
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::fill
+allocate
+DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soaDEBUG: begin distributed_matrix::build_local_nonlocal
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: end   dense::fill
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin distributed_matrix::build_local_nonlocal
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocateDEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
+
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocateDEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin distributed_matrix::build_local_nonlocal
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocateallocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocateDEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin freeDEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocateDEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocateDEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   freefree
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocateDEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin 
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocateallocate
+
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocateDEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocatefree
+DEBUG: end   free
+
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin 
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocateDEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin 
+DEBUG: begin allocate
+allocate
+DEBUG: end   allocate
+DEBUG: begin DEBUG: end   allocate
+free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocateDEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocateallocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin 
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocateallocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin 
+DEBUG: begin allocate
+DEBUG: end   allocateallocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin 
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocateallocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocateDEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocateDEBUG: end   allocate
+DEBUG: begin free
+
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin freeallocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   freefree
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   freefree
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   freeDEBUG: begin free
+DEBUG: end   free
+DEBUG: begin DEBUG: end   free
+DEBUG: end   distributed_matrix::build_local_nonlocal
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   freeDEBUG: begin allocatedistributed_matrix::build_local_nonlocal
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin 
+DEBUG: end   copy
+free
+DEBUG: end   free
+DEBUG: begin free
+
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin components::convert_idxs_to_ptrsDEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   distributed_matrix::build_local_nonlocal
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+
+DEBUG: end   components::convert_idxs_to_ptrs
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin components::convert_idxs_to_ptrs
+DEBUG: end   components::convert_idxs_to_ptrs
+DEBUG: begin free
+DEBUG: end   DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin components::convert_idxs_to_ptrs
+DEBUG: end   components::convert_idxs_to_ptrs
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   DEBUG: end   copy
+free
+DEBUG: begin copy
+DEBUG: end   copy
+free
+DEBUG: begin components::convert_idxs_to_ptrs
+DEBUG: end   components::convert_idxs_to_ptrs
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin components::convert_idxs_to_ptrs
+DEBUG: end   components::convert_idxs_to_ptrs
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin components::convert_idxs_to_ptrs
+DEBUG: end   components::convert_idxs_to_ptrs
+DEBUG: begin free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin freeDEBUG: begin free
+DEBUG: end   free
+DEBUG: begin 
+DEBUG: end   free
+DEBUG: begin free
+free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+free
+DEBUG: end   free
+DEBUG: begin DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   free
+free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   freeDEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin 
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   freefree
+DEBUG: end   free
+DEBUG: begin 
+DEBUG: begin free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy(<typename>)
+free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy(<typename>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy(<typename>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   DEBUG: end   dense::copy
+DEBUG: end   copy(<typename>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin allocatedense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(<typename>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin allocate
+
+DEBUG: end   allocate
+DEBUG: begin DEBUG: end   allocate
+DEBUG: begin allocate
+copy(<typename>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocateallocate
+DEBUG: end   allocate
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: end   allocate
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+
+DEBUG: begin apply(<typename>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: begin csr::spmv
+DEBUG: begin apply(<typename>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   DEBUG: end   apply(<typename>)
+DEBUG: begin DEBUG: end   apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
+apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)advanced_apply(<typename>)
+DEBUG: begin csr::advanced_spmv
+
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   apply(<typename>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2dense::compute_squared_norm2
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin allocate
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin allocate
+DEBUG: end   DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: begin dense::add_scaled
+dense::add_scaled
+DEBUG: end   dense::add_scaled
+DEBUG: end   dense::add_scaled
+DEBUG: begin DEBUG: begin dense::add_scaled
+DEBUG: end   dense::add_scaled
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+allocate
+DEBUG: end   allocate
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin DEBUG: begin dense::compute_sqrt
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
+DEBUG: begin copy(<typename>)dense::compute_sqrt
+DEBUG: begin copy(<typename>)
+DEBUG: begin copy(<typename>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+
+DEBUG: end   dense::copy
+DEBUG: end   copy(<typename>)
+DEBUG: end   copy(<typename>)
+DEBUG: begin copy(<typename>)
+DEBUG: begin allocate
+DEBUG: end   allocateDEBUG: end   copy(<typename>)
+DEBUG: begin copy(<typename>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(<typename>)
+DEBUG: begin free
+DEBUG: end   freeDEBUG: begin copy(<typename>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(<typename>)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin DEBUG: end   copy(<typename>)
+free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin 
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+free
+DEBUG: end   free
+DEBUG: begin freeDEBUG: begin copy(<typename>)
+DEBUG: begin allocate
+DEBUG: begin copy(<typename>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: end   free
+DEBUG: begin copy(<typename>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(<typename>)
+DEBUG: begin apply(<typename>)
+
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(<typename>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(<typename>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin apply(<typename>)
+DEBUG: begin csr::spmvDEBUG: begin apply(<typename>)
+DEBUG: begin dense::row_gather
+DEBUG: end   DEBUG: begin apply(<typename>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   dense::row_gather
+DEBUG: begin apply(<typename>)
+apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
+
+DEBUG: end   csr::spmv
+DEBUG: end   apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmvDEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
+
+DEBUG: end   apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   freeDEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   freefree
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin freefree
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin 
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   freefree
+DEBUG: begin free
+
+DEBUG: end   csr-csr
+DEBUG: begin free
+
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin DEBUG: end   csr-csr
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   csr-csr
+Current state:free
+DEBUG: end   free
+DEBUG: end   stencil(100,7pt,stencil)
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   stencil(100,7pt,stencil)
+[
+  
+  {
+        "size": 81,
+        "stencil": "7pt",
+        "comm_pattern": "stencil",
+        "spmv": {
+            "csr-csr": {
+                "storage": 6420,
+                "max_relative_norm2": 0.0,
+                "time": 0.000037,
+                "repetitions": 1,
+                "completed": true
+            }
+        },
+        "nnz": 144,
+        "optimal": {}
+    }
+]
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   stencil(100,7pt,stencil)
diff --git a/benchmark/test/reference/spmv_distributed.profile.stdout b/benchmark/test/reference/spmv_distributed.profile.stdout
index 2aeeeb5b0d5..5512866fdf0 100644
--- a/benchmark/test/reference/spmv_distributed.profile.stdout
+++ b/benchmark/test/reference/spmv_distributed.profile.stdout
@@ -6,14 +6,14 @@
         "comm_pattern": "stencil",
         "spmv": {
             "csr-csr": {
-                "storage": 2316,
+                "storage": 6420,
                 "max_relative_norm2": 1.0,
                 "time": 1.0,
                 "repetitions": 1,
                 "completed": true
             }
         },
-        "nnz": 135,
+        "nnz": 144,
         "optimal": {
             "spmv": "csr-csr"
         }
diff --git a/benchmark/test/reference/spmv_distributed.simple.stderr b/benchmark/test/reference/spmv_distributed.simple.stderr
index 57f31d44686..7fa9aeb581f 100644
--- a/benchmark/test/reference/spmv_distributed.simple.stderr
+++ b/benchmark/test/reference/spmv_distributed.simple.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.5.0 (develop)
-    running with core module 1.5.0 (develop)
+This is Ginkgo 1.6.0 (develop)
+    running with core module 1.6.0 (develop)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
@@ -21,14 +21,14 @@ Current state:
         "comm_pattern": "stencil",
         "spmv": {
             "csr-csr": {
-                "storage": 2316,
+                "storage": 6420,
                 "max_relative_norm2": 1.0,
                 "time": 1.0,
                 "repetitions": 10,
                 "completed": true
             }
         },
-        "nnz": 135,
+        "nnz": 144,
         "optimal": {}
     }
 ]
diff --git a/benchmark/test/reference/spmv_distributed.simple.stdout b/benchmark/test/reference/spmv_distributed.simple.stdout
index d8cd32ba834..7b6e0883c14 100644
--- a/benchmark/test/reference/spmv_distributed.simple.stdout
+++ b/benchmark/test/reference/spmv_distributed.simple.stdout
@@ -6,14 +6,14 @@
         "comm_pattern": "stencil",
         "spmv": {
             "csr-csr": {
-                "storage": 2316,
+                "storage": 6420,
                 "max_relative_norm2": 1.0,
                 "time": 1.0,
                 "repetitions": 10,
                 "completed": true
             }
         },
-        "nnz": 135,
+        "nnz": 144,
         "optimal": {
             "spmv": "csr-csr"
         }

From 19a4402a27a558a2dac6eb4ae3895811477e8161 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Mon, 22 May 2023 13:56:37 +0200
Subject: [PATCH 035/583] sanitize more output

---
 benchmark/blas/distributed/multi_vector.cpp   |    9 +-
 .../multi_vector_distributed.profile.stderr   |  680 +++----
 .../reference/spmv_distributed.profile.stderr | 1794 ++++++++---------
 3 files changed, 1243 insertions(+), 1240 deletions(-)

diff --git a/benchmark/blas/distributed/multi_vector.cpp b/benchmark/blas/distributed/multi_vector.cpp
index 4d3b821ed2e..be326b08b96 100644
--- a/benchmark/blas/distributed/multi_vector.cpp
+++ b/benchmark/blas/distributed/multi_vector.cpp
@@ -63,12 +63,15 @@ Parameters for a benchmark case are:
     std::string format = example_config;
     initialize_argument_parsing(&argc, &argv, header, format);
 
-    std::string extra_information = "The operations are " + FLAGS_operations;
-    print_general_information(extra_information);
-
     const auto comm = gko::experimental::mpi::communicator(MPI_COMM_WORLD);
     const auto rank = comm.rank();
 
+    if (rank == 0) {
+        std::string extra_information =
+            "The operations are " + FLAGS_operations;
+        print_general_information(extra_information);
+    }
+
     auto exec = executor_factory_mpi.at(FLAGS_executor)(comm.get());
 
     std::string json_input = broadcast_json_input(get_input_stream(), comm);
diff --git a/benchmark/test/reference/multi_vector_distributed.profile.stderr b/benchmark/test/reference/multi_vector_distributed.profile.stderr
index 3e650323bfa..3cf18472311 100644
--- a/benchmark/test/reference/multi_vector_distributed.profile.stderr
+++ b/benchmark/test/reference/multi_vector_distributed.profile.stderr
@@ -5,517 +5,517 @@ Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
 The operations are copy,axpy,scalRunning test case
 {
-    "n": DEBUG: begin n = 100 
+    "n": 100,
+    "blasDEBUG: begin n = 100 
 DEBUG: begin copy
-100,
-    "blaDEBUG: begin allocate
-DEBUG: end   s": {}
+": {}
 }
 DEBUG: begin n = 100 
+DEBUG: begin copy
 DEBUG: begin allocate
-DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_size
-copy
-DEBUG: begin allocateDEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_size
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_sizeDEBUG: begin n = 100 
+DEBUG: begin copy
+DEBUG: begin allocate
 DEBUG: end   allocate
 
-DEBUG: begin allocate
-DEBUG: end   DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_size
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
-allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguousDEBUG: end   allocate
+DEBUG: end   partition::build_ranges_from_global_size
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin partition::build_starting_indicesDEBUG: begin n = 100 
-DEBUG: begin copy
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin partition::build_ranges_from_global_size
 DEBUG: end   partition::build_ranges_from_global_size
 DEBUG: begin allocate
+DEBUG: end   DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: begin allocate
+DEBUG: end   allocateDEBUG: end   allocate
+DEBUG: begin allocate
 DEBUG: end   allocate
-
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin components::fill_array
+allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
 
-DEBUG: end   partition::build_starting_indices
-DEBUG: begin copy
-DEBUG: end   copy
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin DEBUG: begin components::fill_array
-DEBUG: end   components::fill_arrayallocate
-DEBUG: end   allocate
-allocate
-DEBUG: end   allocate
 DEBUG: begin components::fill_array
-
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin partition::build_from_contiguous
-DEBUG: end   DEBUG: begin free
-DEBUG: end   free
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copyDEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin free
-DEBUG: end   free
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copyDEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
-partition::build_from_contiguous
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
 DEBUG: begin partition::build_starting_indices
 DEBUG: end   partition::build_starting_indices
 DEBUG: begin copy
 DEBUG: end   copy
+DEBUG: end   copy
 DEBUG: begin free
-DEBUG: end   free
+
+DEBUG: end   copy
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   DEBUG: end   DEBUG: begin copy
+DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocatecomponents::fill_array
-allocate
-DEBUG: begin 
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
 DEBUG: end   allocate
+
+DEBUG: begin copy
+DEBUG: end   copy
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   DEBUG: begin free
+DEBUG: end   freefree
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
 free
+DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin freeDEBUG: begin allocate
-DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin 
+DEBUG: begin free
+DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indices
-DEBUG: begin copy
-DEBUG: end   copy
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
-components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-
+DEBUG: end   allocate
+DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin freeDEBUG: end   allocate
 DEBUG: begin partition::build_ranges_from_global_size
 DEBUG: end   partition::build_ranges_from_global_size
 DEBUG: begin allocate
+DEBUG: end   DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
+partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
 DEBUG: begin allocate
-DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indices
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin 
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
+allocate
 DEBUG: begin allocate
 DEBUG: end   allocateDEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   free
-DEBUG: end   free
-DEBUG: begin 
-DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_size
 DEBUG: begin allocate
 DEBUG: end   allocate
-components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indicescopy
-DEBUG: end   copy
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
+DEBUG: begin components::fill_array
+DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 
-DEBUG: begin copy
-DEBUG: end   copyDEBUG: begin free
-DEBUG: end   free
-DEBUG: begin DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_arrayDEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-
+DEBUG: begin components::fill_array
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin components::fill_arrayallocate
-DEBUG: end   allocate
-DEBUG: begin DEBUG: begin dense::fill
-DEBUG: end   
+DEBUG: begin components::fill_array
+components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin partition::build_from_contiguous
 DEBUG: end   partition::build_from_contiguous
 DEBUG: begin partition::build_starting_indices
 DEBUG: end   partition::build_starting_indices
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin allocate
-free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
 DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin freecopy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-dense::fill
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
+DEBUG: end   free
+
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+free
 DEBUG: end   free
 DEBUG: begin dense::fill
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
 DEBUG: begin free
+DEBUG: end   DEBUG: begin free
 DEBUG: end   free
+free
 DEBUG: begin free
-DEBUG: end   freeDEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
+DEBUG: end   DEBUG: begin free
 DEBUG: end   free
-DEBUG: end   copy
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: end   copy
-DEBUG: begin axpy
-DEBUG: begin allocate
-DEBUG: end   copy
 Current state:
 [
-    DEBUG: begin axpy
+  free
+DEBUG: end   copy
+DEBUG: begin axpy
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: end   copy
+DEBUG: begin axpy
 DEBUG: begin allocate
-DEBUG: end   
 DEBUG: end   allocate
-DEBUG: begin allocate
+DEBUG: begin allocate  {
+        "n": DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin {
-        "n": 10allocate
 DEBUG: begin partition::build_ranges_from_global_size
 DEBUG: end   partition::build_ranges_from_global_size
 DEBUG: begin allocate
+DEBUG: end   
 DEBUG: end   allocate
-DEBUG: begin allocate
-partition::build_ranges_from_global_size
+DEBUG: begin partition::build_ranges_from_global_size
 DEBUG: end   partition::build_ranges_from_global_size
 DEBUG: begin allocate
 DEBUG: end   allocate
+allocate
 DEBUG: begin allocate
+DEBUG: end   DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin 0,
-       DEBUG: end   allocate
 DEBUG: begin allocate
-DEBUG: end   allocate
-allocate
-DEBUG: end   allocate
+100,
+        "blas": {
+           allocateDEBUG: end   allocate
 DEBUG: begin allocate
-DEBUG: end    "blas": {
-   DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array       components::fill_array
+DEBUG: begin  "copy": {
+     
+DEBUG: begin allocate
+components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   
-DEBUG: begin components::fill_array
+DEBUG: begin           DEBUG: end   allocate
+DEBUG: begin allocate
 DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-  "copy": {
-  components::fill_array
-DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indicesDEBUG: end   components::fill_array
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
 DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
 DEBUG: begin partition::build_starting_indices
 DEBUG: end   partition::build_starting_indices
 DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free              "ti
-DEBUG: begin copy
-DEBUG: end   copy
+ "time": 0.0000components::fill_array
+DEBUG: end   components::fill_arrayDEBUG: end   copy
 DEBUG: begin free
-
+08,
+       
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
 DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin allocateme": 0.000013,
- DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
 DEBUG: begin allocate
-DEBUG: end   
 DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
-DEBUG: end         allocate
-DEBUG: begin free
-free
-DEBUG: begin free
-DEBUG: end   free
-         "flopDEBUG: end   free
+         "flops": 12DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   freeDEBUG: begin free
+DEBUG: begin 500000.0,
+   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin freefree
 DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin s": 7692307.692307693,
-    allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate        
-DEBUG: begin free
+          
 DEBUG: end   free
-    "DEBUG: begin allocate
-DEBUG: end   allocate
-
+DEBUG: begin copy
+DEBUG: end   DEBUG: end   partition::build_ranges_from_global_size
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin bandwidth":DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_size
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array 1230allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   76923.076923DEBUG: end   allocate
+DEBUG: begin    "bandwidth": 200000000.0,
+         copy
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+allocate
+DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indices09,
- components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   
-DEBUG: begin copy
-DEBUG: end   copy
-components::fill_array
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
-DEBUG: end   DEBUG: begin free
+DEBUG: end   components::fill_array       "repetitiDEBUG: begin free
 DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-               "rcomponents::fill_array
-DEBUG: begin partition::build_from_contiguous
+DEBUG: begin free
+DEBUG: end   free
+
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguousons": 1,
+   DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_size         
+DEBUG: end   partition::build_ranges_from_global_size
+
 DEBUG: end   partition::build_from_contiguous
 DEBUG: begin partition::build_starting_indices
 DEBUG: end   partition::build_starting_indices
 DEBUG: begin copy
-DEBUG: end   copyDEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-DEBUG: begin dense::fill
-DEBUG: end   epetitions": 1,
-                "completed": t
+DEBUG: end   copy
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin allocate
+    "completed": tDEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+rue
+  DEBUG: end   allocate
+DEBUG: begin allocateDEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin freedense::fill
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-rue
-  
+DEBUG: begin           }
+   
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_arrayfree
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
-DEBUG: end   DEBUG: begin dense::add_scaled
-DEBUG: end   dense::add_scaled
-          }
-  free
+DEBUG: end   free
+DEBUG: begin      }
+    }
+]
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin 
+DEBUG: begin axpy
+DEBUG: begin allocatecomponents::fill_array
+DEBUG: end   components::fill_array
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
-      }
-   DEBUG: begin dense::fill
+DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
- }
-]
-DEBUG: begin axpy
-DEBUG: begin allocate
+
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocate
-DEBUG: end   allocate
+DEBUG: end   partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copypartition::build_ranges_from_global_size
 DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin DEBUG: begin dense::add_scaled
+DEBUG: end   
+DEBUG: end   copyDEBUG: begin dense::add_scaled
 DEBUG: end   dense::add_scaled
 allocate
+DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin components::fill_array
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin 
+DEBUG: begin allocate
 DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
+DEBUG: begin components::fill_arrayallocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   
+DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin partition::build_from_contiguous
 DEBUG: end   partition::build_from_contiguous
 DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indices
-DEBUG: begin copy
-DEBUG: end   copy
+free
+DEBUG: end   DEBUG: begin free
+DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
+partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+copy
+DEBUG: begin free
+DEBUG: end   DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+free
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin free
+DEBUG: begin DEBUG: begin dense::add_scaled
+DEBUG: end   dense::add_scaled
+free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
@@ -573,137 +573,137 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin free
+DEBUG: begin DEBUG: begin free
+DEBUG: end   freeDEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
-DEBUG: end   free
+
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: end   axpy
-DEBUG: begin DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin free
 DEBUG: end   free
 DEBUG: end   axpy
 DEBUG: begin scal
-scal
-DEBUG: begin allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin DEBUG: end   free
 DEBUG: end   axpy
-Current state:
-[
-    {
-     DEBUG: end   allocate
+DEBUG: begin scal
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin DEBUG: begin allocate
+DEBUG: begin allocate
+DEBUG: end   free
+DEBUG: end   axpy
+allocate
 DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_sizeallocate
 DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_sizeCurrent state:
+[
+    {
+   
 DEBUG: end   partition::build_ranges_from_global_size
 DEBUG: begin allocate
-   "n": 100,
-    allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin 
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
-DEBUG: end   DEBUG: end   allocate
-DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
+DEBUG: end        "n"allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
-DEBUG: end   allocate    "blasallocate
+DEBUG: end   allocateallocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array: 10
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
-
-DEBUG: begin components::fill_array
+DEBUG: end   
 DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
-DEBUG: end   ": {
- DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_arraycomponents::fill_array
+DEBUG: end   components::fill_array
+0,
+ components::fill_array
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-           "co
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin partition::build_from_contiguous
 DEBUG: end   partition::build_from_contiguous
 DEBUG: begin partition::build_starting_indices
-DEBUG: end   DEBUG: begin components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin partition::build_from_contiguous
 DEBUG: end   partition::build_from_contiguous
 DEBUG: begin partition::build_starting_indices
-DEBUG: end   py": {partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
 DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin partition::build_starting_indices
+DEBUG: end   copy       "blas": {
+  DEBUG: end   partition::build_starting_indices
 DEBUG: begin copy
 DEBUG: end   copy
-             free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin copy
+DEBUG: begin copy       
 DEBUG: end   copy
 DEBUG: begin allocate
-   "tiallocate
 DEBUG: begin free
 DEBUG: end   free
+   "copy": {
 DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
-me": 0.000013,
- DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin free
+DEBUG: end   DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin freefree
+DEBUG: begin free
+
 DEBUG: end   free
 DEBUG: begin free
-DEBUG: end   freeDEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   
-DEBUG: begin dense::fill
+       DEBUG: end   free
+DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
-               "flops":DEBUG: begin dense::fill
+DEBUG: begin dense::fill       
 DEBUG: end   dense::fill
 DEBUG: begin dense::scale
-DEBUG: end   dense::scale
- 7692307.692307693,
-     dense::fill
+DEBUG: end   dense::scalefree
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+  "ti
 DEBUG: begin dense::scale
 DEBUG: end   dense::scale
-           "bandwidth": 123076923.07692309,
+me": 0.000008,
+                "flops": 12500000.0,
+                "bandwidth": 200000000.0,
                 "repetitions": 1,
                 "completed": true
             },
             "axpy": {
-                "time": 0.000017,
-                "flops": 11764705.88235294,
-                "bandwidth": 141176470.5882353,
+                "time": 0.00002,
+                "flops": 10000000.0,
+                "bandwidth": 119999999.99999999,
                 "repetitions": 1,
                 "completed": true
             }
@@ -759,46 +759,46 @@ DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin dense::scale
 DEBUG: end   dense::scale
-DEBUG: begin freeDEBUG: begin free
-
-DEBUG: end   free
 DEBUG: begin free
+DEBUG: end   freeDEBUG: begin free
 DEBUG: end   free
-DEBUG: end   DEBUG: begin free
+DEBUG: begin freeDEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: end   DEBUG: end   free
+DEBUG: end   scal
+DEBUG: end   n = 100 
+
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: end   scal
-DEBUG: end   n = 100 scal
 Current state:
 [
     {
-        "n":scal
+        "n": 100,
+      
+DEBUG: end   free
+DEBUG: end   scal
 DEBUG: end   n = 100 
-
- 100,
-        "blas": {
+  "blas": {
             "copy": {
-                "time": 0.000013,
-                "flops": 7692307.692307693,
-                "bandwidth": 123076923.07692309,
+                "time": 0.000008,
+                "flops": 12500000.0,
+                "bandwidth": 200000000.0,
                 "repetitions": 1,
                 "completed": true
             },
             "axpy": {
-                "time": 0.000017,
-                "flops": 11764705.88235294,
-                "bandwidth": 141176470.5882353,
+                "time": 0.00002,
+                "flops": 10000000.0,
+                "bandwidth": 119999999.99999999,
                 "repetitions": 1,
                 "completed": true
             },
             "scal": {
-                "time": 0.000007,
-                "flops": 14285714.285714285,
-                "bandwidth": 228571428.57142857,
+                "time": 0.000006,
+                "flops": 16666666.666666666,
+                "bandwidth": 266666666.66666666,
                 "repetitions": 1,
                 "completed": true
             }
diff --git a/benchmark/test/reference/spmv_distributed.profile.stderr b/benchmark/test/reference/spmv_distributed.profile.stderr
index 1ce62b48dc2..b190ac8a458 100644
--- a/benchmark/test/reference/spmv_distributed.profile.stderr
+++ b/benchmark/test/reference/spmv_distributed.profile.stderr
@@ -5,227 +5,270 @@ Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
 The formats are [csr]x[csr]
 The number of right hand sides is 1
-DEBUG: begin stencil(100,7pt,stencil)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin stencil(100,7pt,stencil)
+Running test case
+{
+    "size": 100,
+    "stencil": "7pt",
+    "comm_pattern": "stencil",
+    "spmv": {}
+}
+DEBUG: begin stencil(100,7pt,stencil)DEBUG: begin stencil(100,7pt,stencil)
+
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_sizeDEBUG: begin allocate
+DEBUG: end   allocate
 DEBUG: begin partition::build_ranges_from_global_size
 DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocateDEBUG: begin allocate
+DEBUG: begin stencil(100,7pt,stencil)
+
+DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
+DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
-
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   allocate
 DEBUG: begin allocate
-DEBUG: end   Running test case
-{
- DEBUG: end   allocate
-DEBUG: begin components::fill_array
-allocate
-DEBUG: begin components::fill_array
-   "size":DEBUG: end   components::fill_array
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array 100,
- DEBUG: begin components::fill_array
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
+DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocateDEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
+
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin    "stencil": 
 DEBUG: begin partition::build_from_contiguous
 DEBUG: end   partition::build_from_contiguous
 DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indices
-partition::build_from_contiguous
+DEBUG: end   partition::build_starting_indicesDEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
 DEBUG: end   partition::build_from_contiguous
 DEBUG: begin partition::build_starting_indices
 DEBUG: end   partition::build_starting_indices
 DEBUG: begin copy
-DEBUG: end   DEBUG: begin copy
 DEBUG: end   copy
-"7pt",
-    "comm_copy
 DEBUG: begin free
-DEBUG: end   free
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin DEBUG: begin allocate
 DEBUG: begin allocate
-DEBUG: end   DEBUG: begin allocate
 DEBUG: end   allocate
-pattern": "stallocate
+DEBUG: begin allocate
+DEBUG: end   allocatepartition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+
+DEBUG: begin allocate
+DEBUG: end   DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin allocateDEBUG: begin allocate
+allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   freeDEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin DEBUG: end   components::aos_to_soa
+
+DEBUG: begin copy
+DEBUG: end   copycomponents::aos_to_soa
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin encil",
-    "spmv": {}
-}
-
+DEBUG: begin dense::fill
+DEBUG: end   
+DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin components::aos_to_soa
-DEBUG: end   components::aos_to_soa
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin components::aos_to_soa
+DEBUG: begin allocate
 DEBUG: end   components::aos_to_soa
+dense::fill
+DEBUG: begin dense::fill_in_matrix_data
+DEBUG: end   dense::fill_in_matrix_data
 DEBUG: begin allocate
 DEBUG: end   allocate
-dense::fill
+DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
 DEBUG: begin dense::fill
-DEBUG: begin stencil(100,7pt,stencil)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   components::aos_to_soa
+DEBUG: begin allocate
+DEBUG: end   freeDEBUG: end   allocate
 DEBUG: end   dense::fill
 DEBUG: begin dense::fill_in_matrix_data
-DEBUG: end   dense::fill_in_matrix_data
+
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin dense::fill_in_matrix_data
 DEBUG: end   dense::fill_in_matrix_data
 DEBUG: begin free
+DEBUG: end   dense::fill_in_matrix_data
 DEBUG: end   free
 DEBUG: begin free
-DEBUG: end   DEBUG: begin free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin freeDEBUG: begin free
+DEBUG: end   free
 DEBUG: end   free
 DEBUG: begin free
+DEBUG: end   free
+
 DEBUG: end   free
 DEBUG: begin free
-DEBUG: end   freeDEBUG: begin allocate
-DEBUG: end   allocate
-free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin partition::build_ranges_from_global_size
-
 DEBUG: begin free
+DEBUG: end   DEBUG: end   free
+free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
-DEBUG: end   freeDEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: end   partition::build_ranges_from_global_size
-
+DEBUG: begin allocate
+DEBUG: end   DEBUG: begin free
+DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
+DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
+DEBUG: begin partition::build_ranges_from_global_size
 DEBUG: end   allocate
-DEBUG: begin partition::build_ranges_from_global_sizeDEBUG: begin allocate
 DEBUG: end   allocate
+partition::build_ranges_from_global_size
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin allocateDEBUG: begin DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
 DEBUG: begin allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocateDEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-
 DEBUG: end   allocate
 DEBUG: begin 
-DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin allocate
+DEBUG: begin allocatefree
+DEBUG: end   free
+allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
+
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocateDEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
 DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
+DEBUG: end   components::fill_arrayDEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
+DEBUG: end   
+DEBUG: begin partition::build_ranges_from_global_size
 DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocate
+
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin DEBUG: begin allocate
+components::fill_array
+DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguouscomponents::fill_array
 DEBUG: begin partition::build_from_contiguous
 DEBUG: end   partition::build_from_contiguous
 DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indices
-DEBUG: begin copy
 allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-
+DEBUG: begin allocate
+DEBUG: end   allocateDEBUG: begin partition::build_from_contiguous
 DEBUG: end   partition::build_from_contiguous
 DEBUG: begin partition::build_starting_indices
 DEBUG: end   partition::build_starting_indices
-DEBUG: begin copy
-DEBUG: end   copyDEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copyDEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
+
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin copy
+DEBUG: end   copy
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-DEBUG: end   
-DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
-DEBUG: end   allocate
-partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
@@ -233,430 +276,341 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin components::aos_to_soa
 DEBUG: end   components::aos_to_soa
-DEBUG: begin components::aos_to_soa
-DEBUG: end   
-DEBUG: end   copy
-DEBUG: begin DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::fillcomponents::aos_to_soa
-DEBUG: begin allocate
-DEBUG: end   allocate
-allocate
-DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin 
-DEBUG: end   dense::fill
-DEBUG: begin dense::fill_in_matrix_data
-DEBUG: end   dense::fill_in_matrix_data
-DEBUG: begin free
 DEBUG: begin dense::fill
-allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   DEBUG: end   allocate
+DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   DEBUG: end   dense::fill
-DEBUG: begin dense::fill_in_matrix_data
-DEBUG: end   dense::fill_in_matrix_dataDEBUG: begin components::aos_to_soa
+DEBUG: begin components::aos_to_soa
 DEBUG: end   components::aos_to_soa
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin dense::fillfree
+DEBUG: begin dense::fill
+DEBUG: end   dense::fillDEBUG: end   dense::fill
+DEBUG: begin dense::fill_in_matrix_data
+DEBUG: end   dense::fill_in_matrix_data
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
-DEBUG: end   free
-
+copy
 DEBUG: begin free
 DEBUG: end   free
-
-DEBUG: end   dense::fill
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin 
 DEBUG: begin dense::fill_in_matrix_data
 DEBUG: end   dense::fill_in_matrix_data
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
-DEBUG: end   free
+allocate
+DEBUG: end   allocate
+DEBUG: begin DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin DEBUG: begin DEBUG: begin free
+DEBUG: begin free
+DEBUG: end   freeallocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
+DEBUG: begin 
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
+DEBUG: end   DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 free
-freeDEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: end   free
+Matrix is of size (81, 81)
+DEBUG: begin dense::fill_in_matrix_data
+DEBUG: end   dense::fill_in_matrix_data
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-
+DEBUG: begin DEBUG: begin copy(<typename>)
+DEBUG: begin copy(<typename>)
+free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   freeDEBUG: begin allocate
+DEBUG: end   allocate
 DEBUG: begin allocate
+DEBUG: end   allocate
+
+DEBUG: begin free
+DEBUG: end   free
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   DEBUG: begin copy(<typename>)
-copy(<typename>)
-DEBUG: begin allocate
+DEBUG: end   copy(<typename>)
+DEBUG: begin allocateDEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: begin free
+DEBUG: end   free
+
 DEBUG: end   allocate
 DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: end   copy(<typename>)
 DEBUG: begin allocate
-DEBUG: end   DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: end   allocate
-DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: begin partition::build_ranges_from_global_sizeDEBUG: begin copy(<typename>)
 DEBUG: end   partition::build_ranges_from_global_size
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
+
+DEBUG: end   partition::build_ranges_from_global_size
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin allocateallocate
+DEBUG: begin DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copyDEBUG: begin allocate
+DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: end   allocate
 DEBUG: begin allocate
-DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   allocate
+allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
+DEBUG: begin 
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-
-DEBUG: end   copy(<typename>)
+DEBUG: begin components::fill_arrayDEBUG: end   copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin DEBUG: end   components::fill_array
+components::fill_array
+DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
+
 DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguousDEBUG: begin components::fill_array
+DEBUG: begin components::fill_arrayDEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocateDEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   
 DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguousallocate
+
 DEBUG: end   allocate
 DEBUG: begin allocate
-DEBUG: end   partition::build_from_contiguous
+DEBUG: end   allocate
+DEBUG: begin allocatepartition::build_from_contiguous
 DEBUG: begin partition::build_starting_indices
 DEBUG: end   partition::build_starting_indices
 DEBUG: begin copy
 DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
 DEBUG: end   partition::build_from_contiguous
 DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indices
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin 
-DEBUG: end   allocate
-DEBUG: begin allocate
+
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-
 DEBUG: begin components::fill_array
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
 DEBUG: end   components::fill_array
-DEBUG: begin DEBUG: begin copy(<typename>)
-components::fill_array
+DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin components::fill_arraycopy
+DEBUG: begin free
+DEBUG: begin copy(<typename>)
+DEBUG: begin copycomponents::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   
 DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin DEBUG: begin copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: end   free
+partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   DEBUG: begin copy
 DEBUG: end   copy
+partition::build_starting_indices
 DEBUG: begin copy
 DEBUG: end   copy
-
-DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-components::aos_to_soa
-DEBUG: end   components::aos_to_soa
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
+DEBUG: begin freeDEBUG: end   copy(<typename>)
+DEBUG: begin copy(<typename>)DEBUG: begin copy(<typename>)
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin copy(<typename>)DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indices
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   DEBUG: begin dense::fill_in_matrix_data
-DEBUG: end   dense::fill_in_matrix_data
+DEBUG: end   free
 
 DEBUG: begin copy
 DEBUG: end   copy
-free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin DEBUG: begin copy
+DEBUG: begin copy
+
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy(<typename>)
 DEBUG: end   copy
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: end   copy(<typename>)
+DEBUG: end   copy(<typename>)DEBUG: end   copy(<typename>)DEBUG: begin copy
+DEBUG: end   copy
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
+
 DEBUG: begin copy(<typename>)
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin copy
-free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::aos_to_soa
 DEBUG: end   copy
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin DEBUG: end   components::aos_to_soa
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
 DEBUG: begin allocate
-DEBUG: begin copy(<typename>)
-DEBUG: begin copy
-DEBUG: end   copyfree
-DEBUG: end   free
-Matrix is of size (81DEBUG: end   allocate
-DEBUG: begin distributed_matrix::build_local_nonlocal
-
+DEBUG: end   
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin copy
-DEBUG: end   copy, 81)
-DEBUG: begin allocate
-DEBUG: end   allocate
+DEBUG: end   copy(<typename>)
+DEBUG: begin copy(<typename>)
+allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
-
+DEBUG: begin copy
+DEBUG: end   copyDEBUG: end   allocate
+DEBUG: begin components::aos_to_soaDEBUG: end   copy
 DEBUG: end   copy(<typename>)
 DEBUG: begin allocate
-DEBUG: end   DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin free
-allocate
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+
+DEBUG: end   components::aos_to_soa
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
-DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin partition::build_ranges_from_global_sizeDEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: begin components::aos_to_soa
-DEBUG: end   components::aos_to_soa
-DEBUG: begin 
-DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocate
+
+DEBUG: begin distributed_matrix::build_local_nonlocal
 DEBUG: end   allocate
 DEBUG: begin allocate
-free
-DEBUG: end   free
+DEBUG: end   allocateDEBUG: begin dense::fill
+DEBUG: end   dense::fill
+
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   allocate
-DEBUG: end   allocate
-DEBUG: begin distributed_matrix::build_local_nonlocal
-DEBUG: end   allocate
-DEBUG: begin allocate
-free
+DEBUG: begin components::aos_to_soa
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin allocateDEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_arrayDEBUG: begin free
-DEBUG: end   free
 DEBUG: begin allocate
-DEBUG: end   allocate
+DEBUG: end   DEBUG: end   components::aos_to_soa
+allocate
 DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin allocate
+
 DEBUG: end   allocate
 DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin 
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-free
-DEBUG: end   free
-DEBUG: begin allocatecomponents::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indicesDEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
+DEBUG: end   allocateDEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
+DEBUG: begin distributed_matrix::build_local_nonlocal
+
+DEBUG: begin components::aos_to_soa
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin allocate
-DEBUG: end   allocate
-free
+DEBUG: end   DEBUG: end   components::aos_to_soa
 DEBUG: begin allocate
 DEBUG: end   allocate
+allocate
 DEBUG: begin free
-DEBUG: end   free
-
-DEBUG: end   free
-DEBUG: begin copy(<typename>)
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: end   copy(<typename>)
+DEBUG: begin distributed_matrix::build_local_nonlocal
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
+DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin allocate
+DEBUG: begin free
+DEBUG: end   DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin allocateDEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
-DEBUG: end   free
+free
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
-DEBUG: end   free
+DEBUG: end   DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin free
+DEBUG: begin freefree
+DEBUG: begin allocate
 DEBUG: end   free
-DEBUG: begin copy(<typename>)
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: end   copy(<typename>)
+DEBUG: begin allocateDEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
@@ -669,146 +623,119 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
 
-DEBUG: begin allocate
-DEBUG: end   DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
-DEBUG: end   freeDEBUG: end   allocate
-DEBUG: begin free
 DEBUG: end   free
-allocate
-DEBUG: begin allocate
+DEBUG: begin allocateDEBUG: end   allocate
+DEBUG: begin free
 DEBUG: end   allocate
-DEBUG: begin allocate
+DEBUG: begin free
 DEBUG: end   allocate
-DEBUG: begin components::aos_to_soa
-
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: end   components::aos_to_soa
-DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin free
 
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin distributed_matrix::build_local_nonlocal
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocateDEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
+DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
-DEBUG: end   freeDEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin 
-DEBUG: end   allocate
-DEBUG: begin allocate
+DEBUG: end   
 DEBUG: end   allocate
+DEBUG: begin freeDEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
 free
-DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: begin free
 DEBUG: end   free
+
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
-DEBUG: end   
+DEBUG: begin allocate
+DEBUG: end   allocateDEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
+
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
+DEBUG: begin DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin allocate
+DEBUG: begin freeallocate
+DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
-allocate
-DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
 DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin DEBUG: end   allocate
+DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin allocate
+
 DEBUG: end   allocate
+DEBUG: begin freefree
+DEBUG: begin allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
+DEBUG: end   free
+
 DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocateallocate
 DEBUG: end   allocate
-DEBUG: begin allocate
+DEBUG: begin free
+DEBUG: end   freeDEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
+
 DEBUG: begin allocate
-DEBUG: end   DEBUG: begin allocate
-DEBUG: end   allocate
+DEBUG: end   allocateDEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin allocate
+DEBUG: begin freeDEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
-DEBUG: end   allocate
+
+DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin allocateallocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocateDEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
+
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
@@ -817,180 +744,253 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   
 DEBUG: begin free
 DEBUG: end   free
+DEBUG: begin DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
+allocate
+DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
+DEBUG: begin 
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
+DEBUG: end   freeDEBUG: begin free
 DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
-DEBUG: end   allocatefree
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
+
+DEBUG: begin allocate
+DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin 
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin freeallocate
+DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: end   free
+DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-free
+DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-
+DEBUG: begin allocate
+DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin DEBUG: begin free
+DEBUG: begin DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin freeDEBUG: begin free
+DEBUG: begin allocate
 DEBUG: end   free
 DEBUG: begin free
-allocate
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   freeallocate
+
 DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-
-DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   DEBUG: end   free
+DEBUG: begin allocate
+
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin allocatefree
 DEBUG: begin free
+DEBUG: end   freeDEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
-
-DEBUG: end   allocate
+DEBUG: begin 
 DEBUG: begin free
 DEBUG: end   free
+allocate
+DEBUG: end   allocate
 DEBUG: begin allocate
+DEBUG: end   allocateallocate
 DEBUG: end   allocate
-DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
-DEBUG: end   freeDEBUG: begin free
 DEBUG: end   free
 DEBUG: end   distributed_matrix::build_local_nonlocal
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin copy
+
 DEBUG: begin allocate
-DEBUG: begin free
-DEBUG: end   free
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocateDEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocateDEBUG: begin copy
 DEBUG: end   copy
+allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
 
+DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
+DEBUG: end   DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocateDEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin 
+DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
+DEBUG: end   allocate
+free
+DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
+DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
+DEBUG: begin DEBUG: begin free
+DEBUG: end   allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: end   distributed_matrix::build_local_nonlocal
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: end   copy
+DEBUG: begin free
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
-DEBUG: end   allocate
+DEBUG: end   free
+DEBUG: begin freeallocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin allocate
+DEBUG: begin DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   distributed_matrix::build_local_nonlocal
+DEBUG: begin copy
+allocate
 DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin freeDEBUG: end   copy
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin 
+DEBUG: begin copyfree
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+
+DEBUG: end   copy
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
@@ -1031,130 +1031,123 @@ DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
-DEBUG: begin freeDEBUG: end   free
-DEBUG: begin freeDEBUG: begin free
 DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   freeDEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-
+DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
-
 DEBUG: begin free
-DEBUG: end   free
+DEBUG: end   
 DEBUG: begin free
 DEBUG: end   free
+free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin freeDEBUG: begin free
-DEBUG: end   free
 DEBUG: begin free
+DEBUG: end   freeDEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
+DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin DEBUG: end   free
+DEBUG: begin free
+
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
+DEBUG: begin freeDEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin 
+DEBUG: begin freeDEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
+DEBUG: end   free
+
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin apply(<typename>)
-free
-DEBUG: end   free
 DEBUG: begin apply(<typename>)
-free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: begin apply(<typename>)
 
+DEBUG: end   free
+DEBUG: begin apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
+DEBUG: end   allocate
 DEBUG: begin allocate
-DEBUG: end   DEBUG: begin dense::row_gather
 DEBUG: end   allocate
 DEBUG: begin allocate
+DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-allocate
+DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::row_gather
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
 DEBUG: end   dense::row_gather
 DEBUG: begin dense::row_gather
-DEBUG: end   DEBUG: end   dense::row_gather
-dense::row_gather
-DEBUG: begin DEBUG: begin apply(<typename>)
-apply(<typename>)
-DEBUG: begin coo::spmv
-DEBUG: end   coo::spmv
+DEBUG: end   dense::row_gather
+DEBUG: begin apply(<typename>)
 DEBUG: begin apply(<typename>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin coo::spmv
 DEBUG: begin coo::spmv
-DEBUG: end   coo::spmvDEBUG: begin coo::spmv
+DEBUG: end   coo::spmv
+DEBUG: end   coo::spmv
 DEBUG: end   DEBUG: end   apply(<typename>)
-
+DEBUG: begin coo::spmv
 DEBUG: end   apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
 coo::spmv
+DEBUG: end   apply(<typename>)
 DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin coo::advanced_spmv
-DEBUG: end   coo::advanced_spmv
+DEBUG: end   DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin coo::advanced_spmv
 DEBUG: end   coo::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)DEBUG: end   apply(<typename>)
+DEBUG: end   DEBUG: begin advanced_apply(<typename>)
+DEBUG: begin coo::advanced_spmv
+coo::advanced_spmv
 DEBUG: end   advanced_apply(<typename>)
+advanced_apply(<typename>)
 DEBUG: end   apply(<typename>)
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin 
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin coo::advanced_spmv
 DEBUG: end   coo::advanced_spmv
-free
-DEBUG: end   free
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)DEBUG: end   apply(<typename>)
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin freeDEBUG: end   advanced_apply(<typename>)
-DEBUG: end   apply(<typename>)
 DEBUG: begin free
-DEBUG: end   freeDEBUG: end   apply(<typename>)
+DEBUG: end   DEBUG: begin free
+DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
-DEBUG: end   
 DEBUG: begin free
-DEBUG: end   free
-
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
-DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
@@ -1166,24 +1159,19 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   freefree
-DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin 
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
@@ -1191,63 +1179,76 @@ DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
-free
-DEBUG: end   free
-free
 DEBUG: end   free
+DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin csr-csr
 DEBUG: begin free
 DEBUG: end   free
+DEBUG: begin free
 DEBUG: begin csr-csr
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin partition::build_ranges_from_global_size
+
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin DEBUG: begin partition::build_ranges_from_global_size
 DEBUG: end   partition::build_ranges_from_global_size
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: begin csr-csr
+DEBUG: begin free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin csr-csrfree
+DEBUG: end   free
+DEBUG: begin csr-csr
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_size
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_sizeallocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+partition::build_ranges_from_global_size
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
-
+DEBUG: end   
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
-DEBUG: end   DEBUG: end   allocate
+DEBUG: end   allocate
 DEBUG: begin allocate
-allocate
-DEBUG: begin components::fill_array
+DEBUG: end   allocateDEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::fill_array
+DEBUG: end   components::fill_arrayallocate
+DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin components::fill_array
+
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indicesDEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
-DEBUG: end   DEBUG: end   components::fill_array
-DEBUG: begin components::fill_arraycomponents::fill_array
-DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
@@ -1255,345 +1256,349 @@ DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
+DEBUG: begin components::fill_array
+
 DEBUG: end   partition::build_starting_indices
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin free
 DEBUG: end   free
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
 DEBUG: begin allocate
-partition::build_from_contiguous
+DEBUG: end   allocate
+DEBUG: begin 
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
 DEBUG: end   partition::build_from_contiguous
 DEBUG: begin partition::build_starting_indices
 DEBUG: end   partition::build_starting_indices
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin free
-DEBUG: end   
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
+DEBUG: end   DEBUG: end   components::fill_array
 DEBUG: begin partition::build_from_contiguous
 DEBUG: end   partition::build_from_contiguous
 DEBUG: begin partition::build_starting_indices
 DEBUG: end   partition::build_starting_indices
 DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin freeDEBUG: end   allocate
-DEBUG: begin components::fill_array
+DEBUG: end   copycomponents::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin components::fill_array
 free
-
-DEBUG: end   free
-DEBUG: end   components::fill_array
 DEBUG: begin allocate
-DEBUG: end   DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin allocateDEBUG: begin allocate
+
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
+DEBUG: begin components::fill_arrayDEBUG: end   components::fill_array
 DEBUG: begin allocate
 DEBUG: end   allocate
-allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 
+DEBUG: end   components::fill_array
+DEBUG: begin copy(<typename>)
+DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
 DEBUG: begin components::fill_array
-DEBUG: end   DEBUG: begin copy(<typename>)
+DEBUG: end   components::fill_array
+DEBUG: begin copy
+DEBUG: end   copy
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
-DEBUG: end   components::fill_array
-DEBUG: begin DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copyallocate
+DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-copy(<typename>)
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin copy
+DEBUG: end   copy(<typename>)DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin copy
 DEBUG: end   copy
-
+DEBUG: begin copy(<typename>)
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: end   copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin copy(<typename>)DEBUG: begin copy(<typename>)
 DEBUG: begin copy
 DEBUG: end   copy
+
 DEBUG: begin copy
-DEBUG: end   copy
+DEBUG: end   copyDEBUG: begin copy(<typename>)
 DEBUG: begin copy
-DEBUG: end   copy
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: end   copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
 
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin allocateDEBUG: begin copy(<typename>)
-DEBUG: begin DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy
-copy
 DEBUG: end   copy
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::fill
-DEBUG: end   
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
+DEBUG: end   copyDEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
+DEBUG: end   copy(<typename>)
+DEBUG: begin 
+DEBUG: end   copy(<typename>)
 DEBUG: begin copy(<typename>)
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin allocate
-dense::fill
+DEBUG: begin copyallocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
-DEBUG: end   DEBUG: begin copy
-DEBUG: end   DEBUG: end   allocate
+DEBUG: end   allocate
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin allocate
-allocate
-DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: end   copy
 DEBUG: begin copy
+DEBUG: end   copy
 DEBUG: end   copy(<typename>)
-DEBUG: end   allocate
+DEBUG: begin allocateDEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin components::aos_to_soa
 DEBUG: end   components::aos_to_soa
+
+DEBUG: end   allocate
+DEBUG: begin dense::fillDEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+
+DEBUG: end   dense::fill
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
+DEBUG: end   allocateDEBUG: end   copy(<typename>)
+DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::fill
-allocate
+DEBUG: end   DEBUG: begin distributed_matrix::build_local_nonlocal
+DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin components::aos_to_soa
-DEBUG: end   components::aos_to_soaDEBUG: begin distributed_matrix::build_local_nonlocal
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin allocate
 DEBUG: end   dense::fill
 DEBUG: begin allocate
-DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
 DEBUG: begin allocate
 DEBUG: end   allocate
+allocate
+DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin distributed_matrix::build_local_nonlocal
+DEBUG: begin allocatefree
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocateDEBUG: begin distributed_matrix::build_local_nonlocal
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
 DEBUG: begin allocate
+DEBUG: end   
 DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin free
+DEBUG: begin 
 DEBUG: end   free
 DEBUG: begin allocate
-DEBUG: begin components::aos_to_soa
-DEBUG: end   components::aos_to_soa
-
 DEBUG: end   allocate
 DEBUG: begin free
+allocate
+DEBUG: begin distributed_matrix::build_local_nonlocal
+DEBUG: begin allocate
+DEBUG: end   allocate
+free
 DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
-
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocateDEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin distributed_matrix::build_local_nonlocal
+DEBUG: begin DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
+
+DEBUG: begin free
+DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
+allocate
+DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
+DEBUG: begin allocateDEBUG: begin free
+DEBUG: end   free
 DEBUG: begin allocate
-DEBUG: end   allocateallocate
 DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
+DEBUG: end   DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
-DEBUG: end   
-DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
+allocate
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
-
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: end   allocate
+DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
+DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
-
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin 
+DEBUG: end   free
 DEBUG: begin allocate
-allocate
 DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
-DEBUG: begin free
+DEBUG: end   allocateDEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin freeDEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   allocate
 DEBUG: begin free
+free
 DEBUG: end   free
-DEBUG: begin allocateDEBUG: begin allocate
+DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
-DEBUG: end   free
+DEBUG: end   DEBUG: end   free
+DEBUG: begin allocate
+free
 DEBUG: begin allocate
 
-DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
-
+DEBUG: begin freeDEBUG: end   allocate
+DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin freeDEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocateDEBUG: begin free
+DEBUG: begin allocate
+DEBUG: end   allocate
 DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
 DEBUG: end   
-DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
+DEBUG: begin 
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   allocate
+allocate
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
+DEBUG: end   allocateallocate
 DEBUG: end   allocate
 DEBUG: begin free
-DEBUG: end   freefree
+DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-free
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
-
+DEBUG: begin allocate
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
-DEBUG: end   DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin DEBUG: begin free
+DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin allocateDEBUG: begin allocate
+DEBUG: begin 
 DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
@@ -1601,82 +1606,53 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin 
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin allocateallocate
-
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
-allocate
 DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
+free
+DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin allocate
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin allocateDEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin allocate
+DEBUG: end   allocateDEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
+DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
+DEBUG: end   
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin allocatefree
-DEBUG: end   free
 
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin allocate
+DEBUG: end   allocateallocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin 
-DEBUG: begin allocate
+DEBUG: begin allocateDEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
-allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: end   free
-DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   DEBUG: end   allocate
 DEBUG: begin allocate
+DEBUG: end   
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
@@ -1684,171 +1660,232 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin 
 DEBUG: begin allocate
 allocate
-DEBUG: end   allocate
-DEBUG: begin DEBUG: end   allocate
-free
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
+DEBUG: begin DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin allocateallocate
 DEBUG: begin allocate
+DEBUG: end   allocateallocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin 
+DEBUG: begin free
+DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
+
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
-DEBUG: end   
 DEBUG: end   allocate
 DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin allocate
-DEBUG: end   allocateallocate
 DEBUG: end   allocate
-DEBUG: begin allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin 
-DEBUG: begin allocate
-DEBUG: end   allocateallocate
 DEBUG: begin allocate
+DEBUG: end   
 DEBUG: end   allocate
 DEBUG: begin allocate
+allocate
 DEBUG: end   allocate
+DEBUG: begin freeallocate
 DEBUG: begin free
-DEBUG: end   free
+DEBUG: end   DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin 
+DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocateallocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-
+DEBUG: begin free
+DEBUG: end   freefree
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
 DEBUG: end   allocate
-DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin 
 DEBUG: begin allocate
-DEBUG: end   allocate
+DEBUG: end   allocatefree
+DEBUG: begin free
 allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
 DEBUG: begin allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocateDEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin allocateDEBUG: end   allocate
+DEBUG: begin allocateDEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
 DEBUG: begin free
+DEBUG: end   
+DEBUG: begin allocate
+DEBUG: end   allocate
 
 DEBUG: end   allocate
+DEBUG: begin allocatefree
+DEBUG: begin free
+DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
 DEBUG: end   
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin freeallocate
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin DEBUG: begin free
-DEBUG: end   free
+free
 DEBUG: begin free
 DEBUG: end   free
-
+DEBUG: begin DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocateDEBUG: begin free
 DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   freefree
 DEBUG: end   free
-DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
-DEBUG: end   DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+distributed_matrix::build_local_nonlocal
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
+DEBUG: end   allocate
+DEBUG: begin allocate
 DEBUG: begin free
 DEBUG: end   free
+DEBUG: begin components::convert_idxs_to_ptrs
+DEBUG: end   components::convert_idxs_to_ptrs
+DEBUG: begin freeDEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin free
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocateDEBUG: begin free
 DEBUG: end   free
-free
+DEBUG: begin 
+DEBUG: begin allocate
+DEBUG: end   allocate
+
 DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin free
+DEBUG: begin components::convert_idxs_to_ptrs
+DEBUG: end   components::convert_idxs_to_ptrs
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin free
+free
 DEBUG: end   free
+DEBUG: end   distributed_matrix::build_local_nonlocal
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
 DEBUG: begin free
-DEBUG: end   freefree
 DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
 DEBUG: begin free
+DEBUG: begin allocate
+DEBUG: end   allocateDEBUG: begin copy
+DEBUG: end   copy
 DEBUG: end   free
-DEBUG: begin free
+DEBUG: begin components::convert_idxs_to_ptrs
+DEBUG: end   components::convert_idxs_to_ptrs
+DEBUG: begin 
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocatefree
 DEBUG: end   free
-DEBUG: begin free
+DEBUG: begin 
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
+DEBUG: begin components::convert_idxs_to_ptrs
+allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocateDEBUG: end   components::convert_idxs_to_ptrs
 DEBUG: begin free
+DEBUG: end   
+DEBUG: begin allocate
 DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
-
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
@@ -1856,81 +1893,44 @@ DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
-DEBUG: end   freeDEBUG: begin free
 DEBUG: end   free
-DEBUG: begin DEBUG: end   free
-DEBUG: end   distributed_matrix::build_local_nonlocal
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy
-
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: end   free
-DEBUG: end   free
 DEBUG: begin free
-DEBUG: end   freeDEBUG: begin allocatedistributed_matrix::build_local_nonlocal
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy
+DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin 
-DEBUG: end   copy
-free
+DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
-
-DEBUG: end   allocate
+DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin components::convert_idxs_to_ptrsDEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: end   distributed_matrix::build_local_nonlocal
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy
-
-DEBUG: end   components::convert_idxs_to_ptrs
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin components::convert_idxs_to_ptrs
-DEBUG: end   components::convert_idxs_to_ptrs
 DEBUG: begin free
-DEBUG: end   DEBUG: begin allocate
-DEBUG: end   allocate
+DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin components::convert_idxs_to_ptrs
-DEBUG: end   components::convert_idxs_to_ptrs
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin free
-DEBUG: end   DEBUG: end   copy
-free
+DEBUG: end   free
+DEBUG: end   distributed_matrix::build_local_nonlocal
 DEBUG: begin copy
 DEBUG: end   copy
-free
-DEBUG: begin components::convert_idxs_to_ptrs
-DEBUG: end   components::convert_idxs_to_ptrs
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin allocate
@@ -1948,8 +1948,6 @@ DEBUG: end   free
 DEBUG: begin components::convert_idxs_to_ptrs
 DEBUG: end   components::convert_idxs_to_ptrs
 DEBUG: begin free
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
@@ -1957,148 +1955,150 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin freeDEBUG: begin free
-DEBUG: end   free
-DEBUG: begin 
-DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
 DEBUG: begin free
-free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
+DEBUG: end   DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin DEBUG: end   free
 DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
-free
 DEBUG: end   free
 DEBUG: begin DEBUG: begin free
 DEBUG: end   free
+DEBUG: begin free
 DEBUG: end   free
 free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin free
+free
+DEBUG: end   free
+DEBUG: begin DEBUG: begin free
+DEBUG: end   freefree
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
+DEBUG: end   freeDEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
-DEBUG: end   freeDEBUG: begin free
 DEBUG: end   free
+
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin 
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
-DEBUG: end   freefree
 DEBUG: end   free
-DEBUG: begin 
 DEBUG: begin free
+DEBUG: end   free
+
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin copy(<typename>)
-free
+DEBUG: begin free
+DEBUG: end   DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy(<typename>)
+free
+DEBUG: begin free
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
+DEBUG: begin DEBUG: end   free
 DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
 DEBUG: begin apply(<typename>)
-DEBUG: begin allocatedense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin apply(<typename>)
-DEBUG: begin allocate
-
-DEBUG: end   allocate
-DEBUG: begin DEBUG: end   allocate
 DEBUG: begin allocate
 copy(<typename>)
-DEBUG: begin apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   DEBUG: end   allocate
+DEBUG: begin DEBUG: end   allocate
 DEBUG: begin allocate
-DEBUG: end   allocateallocate
 DEBUG: end   allocate
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
+dense::copy
+DEBUG: end   copy(<typename>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(<typename>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin DEBUG: begin apply(<typename>)
+DEBUG: begin csr::spmv
+allocate
 DEBUG: end   allocate
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
+allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-
-DEBUG: begin apply(<typename>)
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: begin csr::spmv
+DEBUG: end   apply(<typename>)
 DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
+DEBUG: begin DEBUG: begin apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
+csr::spmv
 DEBUG: end   csr::spmv
+DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
-DEBUG: end   DEBUG: end   apply(<typename>)
-DEBUG: begin DEBUG: end   apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-
+DEBUG: end   apply(<typename>)
+DEBUG: end   apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
 DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: end   advanced_apply(<typename>)
 DEBUG: end   apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2dense::compute_squared_norm2
+DEBUG: begin advanced_apply(<typename>)
+DEBUG: begin csr::advanced_spmv
 DEBUG: begin csr::advanced_spmv
+DEBUG: end   DEBUG: end   dense::compute_squared_norm2
 DEBUG: end   csr::advanced_spmv
-
 DEBUG: end   advanced_apply(<typename>)
 DEBUG: end   apply(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
+csr::advanced_spmv
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin allocate
+DEBUG: end   allocateDEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+
 DEBUG: begin dense::compute_squared_norm2
 DEBUG: end   dense::compute_squared_norm2
 DEBUG: begin dense::compute_sqrt
@@ -2109,79 +2109,78 @@ DEBUG: begin allocate
 DEBUG: begin dense::compute_sqrt
 DEBUG: end   dense::compute_sqrt
 DEBUG: begin allocate
-DEBUG: end   DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::add_scaled
+DEBUG: end   allocate
 DEBUG: begin dense::add_scaled
-dense::add_scaled
-DEBUG: end   dense::add_scaled
 DEBUG: end   dense::add_scaled
 DEBUG: begin DEBUG: begin dense::add_scaled
 DEBUG: end   dense::add_scaled
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
 allocate
 DEBUG: end   allocate
 DEBUG: begin dense::compute_squared_norm2
 DEBUG: end   dense::compute_squared_norm2
+DEBUG: end   dense::add_scaled
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin DEBUG: begin dense::compute_sqrt
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_squared_norm2
 DEBUG: end   dense::compute_squared_norm2
 DEBUG: begin dense::compute_sqrt
 DEBUG: end   dense::compute_sqrt
+DEBUG: begin dense::compute_squared_norm2
+DEBUG: end   dense::compute_squared_norm2
+DEBUG: begin dense::compute_sqrt
 DEBUG: end   dense::compute_sqrt
-DEBUG: begin copy(<typename>)dense::compute_sqrt
 DEBUG: begin copy(<typename>)
+DEBUG: begin dense::compute_sqrt
+DEBUG: end   dense::compute_sqrt
 DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
+DEBUG: end   allocateDEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: begin allocate
+DEBUG: end   DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
+DEBUG: end   
+DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-
-DEBUG: end   dense::copy
+DEBUG: end   copy(<typename>)dense::copy
 DEBUG: end   copy(<typename>)
+dense::copy
 DEBUG: end   copy(<typename>)
 DEBUG: begin copy(<typename>)
+DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: end   copy(<typename>)
+DEBUG: end   allocate
+DEBUG: begin dense::copy
 DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin free
-DEBUG: end   freeDEBUG: begin copy(<typename>)
+DEBUG: end   
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-
-DEBUG: begin dense::copy
 DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin DEBUG: end   copy(<typename>)
-free
-DEBUG: end   free
+DEBUG: begin freecopy(<typename>)
 DEBUG: begin free
+DEBUG: end   freeDEBUG: end   copy(<typename>)
 DEBUG: begin free
+DEBUG: end   
 DEBUG: end   free
 DEBUG: begin free
+DEBUG: end   
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
@@ -2190,81 +2189,80 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin 
-DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
-DEBUG: end   free
+free
 DEBUG: begin free
 DEBUG: end   free
 free
 DEBUG: end   free
-DEBUG: begin freeDEBUG: begin copy(<typename>)
-DEBUG: begin allocate
+DEBUG: begin DEBUG: end   free
 DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
+DEBUG: begin DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
-DEBUG: end   allocate
+DEBUG: end   allocatefree
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy(<typename>)
+allocate
 DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
-DEBUG: begin apply(<typename>)
 
 DEBUG: begin dense::copy
-DEBUG: end   dense::copy
+DEBUG: end   dense::copyDEBUG: begin allocate
+DEBUG: end   allocateDEBUG: begin apply(<typename>)
+DEBUG: begin 
 DEBUG: end   copy(<typename>)
 DEBUG: begin apply(<typename>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
 DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmvDEBUG: begin apply(<typename>)
-DEBUG: begin dense::row_gather
-DEBUG: end   DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   dense::row_gather
-DEBUG: begin apply(<typename>)
+DEBUG: end   DEBUG: begin apply(<typename>)
 apply(<typename>)
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+csr::spmv
+DEBUG: end   apply(<typename>)
 DEBUG: begin advanced_apply(<typename>)
-
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   DEBUG: begin csr::spmv
 DEBUG: end   csr::spmv
 DEBUG: end   apply(<typename>)
 DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
+DEBUG: begin apply(<typename>)
 DEBUG: begin csr::spmv
-DEBUG: end   csr::spmvDEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::spmv
+DEBUG: end   DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
+apply(<typename>)
+csr::advanced_spmv
 DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   apply(<typename>)
-
+DEBUG: end   DEBUG: end   advanced_apply(<typename>)
 DEBUG: end   apply(<typename>)
 DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
 DEBUG: end   advanced_apply(<typename>)
 DEBUG: end   apply(<typename>)
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin free
-DEBUG: end   free
+apply(<typename>)
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
@@ -2275,94 +2273,96 @@ DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   freeDEBUG: end   free
+DEBUG: end   DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
+free
 DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
-DEBUG: end   DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
+DEBUG: end   freeDEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: end   freefree
 DEBUG: begin free
 DEBUG: end   free
+
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin freefree
 DEBUG: begin free
+DEBUG: end   DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin 
+free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
+free
+DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
+DEBUG: begin freeDEBUG: end   free
+DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
+DEBUG: begin freeDEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
-DEBUG: end   freefree
+DEBUG: end   free
 DEBUG: begin free
 
-DEBUG: end   csr-csr
+DEBUG: end   free
 DEBUG: begin free
-
+DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
+DEBUG: end   csr-csr
 DEBUG: end   free
-DEBUG: begin free
+DEBUG: end   csr-csr
+Current state:
+[
+    {
 DEBUG: end   free
-DEBUG: begin DEBUG: end   csr-csr
+DEBUG: end   csr-csr
 DEBUG: begin free
 DEBUG: end   free
+
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
+        "size":DEBUG: begin free
 DEBUG: end   free
-DEBUG: end   csr-csr
-Current state:free
+DEBUG: begin free
 DEBUG: end   free
 DEBUG: end   stencil(100,7pt,stencil)
 DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: end   stencil(100,7pt,stencil)
-[
-  
-  {
-        "size": 81,
+ 81,
         "stencil": "7pt",
         "comm_pattern": "stencil",
         "spmv": {
             "csr-csr": {
                 "storage": 6420,
                 "max_relative_norm2": 0.0,
-                "time": 0.000037,
+                "time": 0.000046,
                 "repetitions": 1,
                 "completed": true
             }

From e3a85538e8e03ad307cd4448661d91fc7f2dea05 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Mon, 22 May 2023 13:56:46 +0200
Subject: [PATCH 036/583] format files

---
 benchmark/test/blas.py                     |  39 +++---
 benchmark/test/conversion.py               |  61 ++++++---
 benchmark/test/matrix_statistics.py        |  27 ++--
 benchmark/test/multi_vector_distributed.py |  46 ++++---
 benchmark/test/preconditioner.py           |  41 ++++--
 benchmark/test/solver.py                   |  41 ++++--
 benchmark/test/solver_distributed.py       |  44 +++++--
 benchmark/test/sparse_blas.py              |  48 +++++--
 benchmark/test/spmv.py                     |  41 ++++--
 benchmark/test/spmv_distributed.py         |  42 +++---
 benchmark/test/test_framework.py.in        | 146 +++++++++++++++------
 11 files changed, 396 insertions(+), 180 deletions(-)

diff --git a/benchmark/test/blas.py b/benchmark/test/blas.py
index e099718bae0..160d5364e20 100755
--- a/benchmark/test/blas.py
+++ b/benchmark/test/blas.py
@@ -1,25 +1,34 @@
 #!/usr/bin/env python3
 import test_framework
+
 # check that all input modes work:
 # parameter
-test_framework.compare_output(["-input", '[{"n": 100}]'],
-                              expected_stdout="blas.simple.stdout",
-                              expected_stderr="blas.simple.stderr")
+test_framework.compare_output(
+    ["-input", '[{"n": 100}]'],
+    expected_stdout="blas.simple.stdout",
+    expected_stderr="blas.simple.stderr",
+)
 
 # stdin
-test_framework.compare_output([],
-                              expected_stdout="blas.simple.stdout",
-                              expected_stderr="blas.simple.stderr",
-                              stdin='[{"n": 100}]')
+test_framework.compare_output(
+    [],
+    expected_stdout="blas.simple.stdout",
+    expected_stderr="blas.simple.stderr",
+    stdin='[{"n": 100}]',
+)
 
 # file
-test_framework.compare_output(["-input", str(test_framework.sourcepath / "input.blas.json")],
-                              expected_stdout="blas.simple.stdout",
-                              expected_stderr="blas.simple.stderr",
-                              stdin='[{"n": 100}]')
+test_framework.compare_output(
+    ["-input", str(test_framework.sourcepath / "input.blas.json")],
+    expected_stdout="blas.simple.stdout",
+    expected_stderr="blas.simple.stderr",
+    stdin='[{"n": 100}]',
+)
 
 # profiler annotations
-test_framework.compare_output(["-input", '[{"n": 100}]', '-profile', '-profiler_hook', 'debug'],
-                              expected_stdout="blas.profile.stdout",
-                              expected_stderr="blas.profile.stderr",
-                              stdin='[{"n": 100}]')
+test_framework.compare_output(
+    ["-input", '[{"n": 100}]', "-profile", "-profiler_hook", "debug"],
+    expected_stdout="blas.profile.stdout",
+    expected_stderr="blas.profile.stderr",
+    stdin='[{"n": 100}]',
+)
diff --git a/benchmark/test/conversion.py b/benchmark/test/conversion.py
index 91e71cc9e89..cf2e33983af 100755
--- a/benchmark/test/conversion.py
+++ b/benchmark/test/conversion.py
@@ -1,28 +1,57 @@
 #!/usr/bin/env python3
 import test_framework
+
 # check that all input modes work:
 # parameter
-test_framework.compare_output(["-input", '[{"size": 100, "stencil": "7pt"}]', "-formats", "coo,csr"],
-                              expected_stdout="conversion.simple.stdout",
-                              expected_stderr="conversion.simple.stderr")
+test_framework.compare_output(
+    ["-input", '[{"size": 100, "stencil": "7pt"}]', "-formats", "coo,csr"],
+    expected_stdout="conversion.simple.stdout",
+    expected_stderr="conversion.simple.stderr",
+)
 
 # stdin
-test_framework.compare_output(["-formats", "coo,csr"],
-                              expected_stdout="conversion.simple.stdout",
-                              expected_stderr="conversion.simple.stderr",
-                              stdin='[{"size": 100, "stencil": "7pt"}]')
+test_framework.compare_output(
+    ["-formats", "coo,csr"],
+    expected_stdout="conversion.simple.stdout",
+    expected_stderr="conversion.simple.stderr",
+    stdin='[{"size": 100, "stencil": "7pt"}]',
+)
 
 # input file
-test_framework.compare_output(["-input", str(test_framework.sourcepath / "input.mtx.json"), "-formats", "coo,csr"],
-                              expected_stdout="conversion.simple.stdout",
-                              expected_stderr="conversion.simple.stderr")
+test_framework.compare_output(
+    [
+        "-input",
+        str(test_framework.sourcepath / "input.mtx.json"),
+        "-formats",
+        "coo,csr",
+    ],
+    expected_stdout="conversion.simple.stdout",
+    expected_stderr="conversion.simple.stderr",
+)
 
 # check that all conversions work
-test_framework.compare_output(["-input", '[{"size": 100, "stencil": "7pt"}]', "-formats", "coo,csr,ell,sellp,hybrid"],
-                              expected_stdout="conversion.all.stdout",
-                              expected_stderr="conversion.all.stderr")
+test_framework.compare_output(
+    [
+        "-input",
+        '[{"size": 100, "stencil": "7pt"}]',
+        "-formats",
+        "coo,csr,ell,sellp,hybrid",
+    ],
+    expected_stdout="conversion.all.stdout",
+    expected_stderr="conversion.all.stderr",
+)
 
 # profiler annotations
-test_framework.compare_output(["-input", '[{"size": 100, "stencil": "7pt"}]', "-formats", "coo,csr", '-profile', '-profiler_hook', 'debug'],
-                              expected_stdout="conversion.profile.stdout",
-                              expected_stderr="conversion.profile.stderr")
+test_framework.compare_output(
+    [
+        "-input",
+        '[{"size": 100, "stencil": "7pt"}]',
+        "-formats",
+        "coo,csr",
+        "-profile",
+        "-profiler_hook",
+        "debug",
+    ],
+    expected_stdout="conversion.profile.stdout",
+    expected_stderr="conversion.profile.stderr",
+)
diff --git a/benchmark/test/matrix_statistics.py b/benchmark/test/matrix_statistics.py
index 62547acfbeb..365cfe025dd 100755
--- a/benchmark/test/matrix_statistics.py
+++ b/benchmark/test/matrix_statistics.py
@@ -1,18 +1,25 @@
 #!/usr/bin/env python3
 import test_framework
+
 # check that all input modes work:
 # parameter
-test_framework.compare_output(["-input", '[{"size": 100, "stencil": "7pt"}]'],
-                              expected_stdout="matrix_statistics.simple.stdout",
-                              expected_stderr="matrix_statistics.simple.stderr")
+test_framework.compare_output(
+    ["-input", '[{"size": 100, "stencil": "7pt"}]'],
+    expected_stdout="matrix_statistics.simple.stdout",
+    expected_stderr="matrix_statistics.simple.stderr",
+)
 
 # stdin
-test_framework.compare_output([],
-                              expected_stdout="matrix_statistics.simple.stdout",
-                              expected_stderr="matrix_statistics.simple.stderr",
-                              stdin='[{"size": 100, "stencil": "7pt"}]')
+test_framework.compare_output(
+    [],
+    expected_stdout="matrix_statistics.simple.stdout",
+    expected_stderr="matrix_statistics.simple.stderr",
+    stdin='[{"size": 100, "stencil": "7pt"}]',
+)
 
 # input file
-test_framework.compare_output(["-input", '[{"size": 100, "stencil": "7pt"}]'],
-                              expected_stdout="matrix_statistics.simple.stdout",
-                              expected_stderr="matrix_statistics.simple.stderr")
+test_framework.compare_output(
+    ["-input", '[{"size": 100, "stencil": "7pt"}]'],
+    expected_stdout="matrix_statistics.simple.stdout",
+    expected_stderr="matrix_statistics.simple.stderr",
+)
diff --git a/benchmark/test/multi_vector_distributed.py b/benchmark/test/multi_vector_distributed.py
index 808a7c3e458..aab886ca509 100644
--- a/benchmark/test/multi_vector_distributed.py
+++ b/benchmark/test/multi_vector_distributed.py
@@ -1,29 +1,37 @@
 #!/usr/bin/env python3
 import test_framework
+
 # check that all input modes work:
 # parameter
-test_framework.compare_output_distributed(["-input", '[{"n": 100}]'],
-                                          expected_stdout="multi_vector_distributed.simple.stdout",
-                                          expected_stderr="multi_vector_distributed.simple.stderr",
-                                          num_procs=3)
+test_framework.compare_output_distributed(
+    ["-input", '[{"n": 100}]'],
+    expected_stdout="multi_vector_distributed.simple.stdout",
+    expected_stderr="multi_vector_distributed.simple.stderr",
+    num_procs=3,
+)
 
 # stdin
-test_framework.compare_output_distributed([],
-                                          expected_stdout="multi_vector_distributed.simple.stdout",
-                                          expected_stderr="multi_vector_distributed.simple.stderr",
-                                          stdin='[{"n": 100}]',
-                                          num_procs=3)
+test_framework.compare_output_distributed(
+    [],
+    expected_stdout="multi_vector_distributed.simple.stdout",
+    expected_stderr="multi_vector_distributed.simple.stderr",
+    stdin='[{"n": 100}]',
+    num_procs=3,
+)
 
 # file
-test_framework.compare_output_distributed(["-input", str(test_framework.sourcepath / "input.blas.json")],
-                                          expected_stdout="multi_vector_distributed.simple.stdout",
-                                          expected_stderr="multi_vector_distributed.simple.stderr",
-                                          stdin='[{"n": 100}]',
-                                          num_procs=3)
+test_framework.compare_output_distributed(
+    ["-input", str(test_framework.sourcepath / "input.blas.json")],
+    expected_stdout="multi_vector_distributed.simple.stdout",
+    expected_stderr="multi_vector_distributed.simple.stderr",
+    stdin='[{"n": 100}]',
+    num_procs=3,
+)
 
 # profiler annotations
-test_framework.compare_output_distributed(["-input", '[{"n": 100}]', '-profile', '-profiler_hook', 'debug'],
-                                          expected_stdout="multi_vector_distributed.profile.stdout",
-                                          expected_stderr="multi_vector_distributed.profile.stderr",
-                                          stdin='[{"n": 100}]',
-                                          num_procs=3)
+# currently still unstable output and thus disabled
+# test_framework.compare_output_distributed(["-input", '[{"n": 100}]', '-profile', '-profiler_hook', 'debug'],
+#                                          expected_stdout="multi_vector_distributed.profile.stdout",
+#                                          expected_stderr="multi_vector_distributed.profile.stderr",
+#                                          stdin='[{"n": 100}]',
+#                                          num_procs=3)
diff --git a/benchmark/test/preconditioner.py b/benchmark/test/preconditioner.py
index 4a044cd25f5..a5a8dd3f13f 100755
--- a/benchmark/test/preconditioner.py
+++ b/benchmark/test/preconditioner.py
@@ -1,23 +1,38 @@
 #!/usr/bin/env python3
 import test_framework
+
 # check that all input modes work:
 # parameter
-test_framework.compare_output(["-input", '[{"size": 100, "stencil": "7pt"}]'],
-                              expected_stdout="preconditioner.simple.stdout",
-                              expected_stderr="preconditioner.simple.stderr")
+test_framework.compare_output(
+    ["-input", '[{"size": 100, "stencil": "7pt"}]'],
+    expected_stdout="preconditioner.simple.stdout",
+    expected_stderr="preconditioner.simple.stderr",
+)
 
 # stdin
-test_framework.compare_output([],
-                              expected_stdout="preconditioner.simple.stdout",
-                              expected_stderr="preconditioner.simple.stderr",
-                              stdin='[{"size": 100, "stencil": "7pt"}]')
+test_framework.compare_output(
+    [],
+    expected_stdout="preconditioner.simple.stdout",
+    expected_stderr="preconditioner.simple.stderr",
+    stdin='[{"size": 100, "stencil": "7pt"}]',
+)
 
 # input file
-test_framework.compare_output(["-input", str(test_framework.sourcepath / "input.mtx.json")],
-                              expected_stdout="preconditioner.simple.stdout",
-                              expected_stderr="preconditioner.simple.stderr")
+test_framework.compare_output(
+    ["-input", str(test_framework.sourcepath / "input.mtx.json")],
+    expected_stdout="preconditioner.simple.stdout",
+    expected_stderr="preconditioner.simple.stderr",
+)
 
 # profiler annotations
-test_framework.compare_output(["-input", '[{"size": 100, "stencil": "7pt"}]', '-profile', '-profiler_hook', 'debug'],
-                              expected_stdout="preconditioner.profile.stdout",
-                              expected_stderr="preconditioner.profile.stderr")
+test_framework.compare_output(
+    [
+        "-input",
+        '[{"size": 100, "stencil": "7pt"}]',
+        "-profile",
+        "-profiler_hook",
+        "debug",
+    ],
+    expected_stdout="preconditioner.profile.stdout",
+    expected_stderr="preconditioner.profile.stderr",
+)
diff --git a/benchmark/test/solver.py b/benchmark/test/solver.py
index fd8130e0ae1..e974f849276 100755
--- a/benchmark/test/solver.py
+++ b/benchmark/test/solver.py
@@ -1,23 +1,38 @@
 #!/usr/bin/env python3
 import test_framework
+
 # check that all input modes work:
 # parameter
-test_framework.compare_output(["-input", '[{"size": 100, "stencil": "7pt", "optimal": {"spmv": "csr"}}]'],
-                              expected_stdout="solver.simple.stdout",
-                              expected_stderr="solver.simple.stderr")
+test_framework.compare_output(
+    ["-input", '[{"size": 100, "stencil": "7pt", "optimal": {"spmv": "csr"}}]'],
+    expected_stdout="solver.simple.stdout",
+    expected_stderr="solver.simple.stderr",
+)
 
 # stdin
-test_framework.compare_output([],
-                              expected_stdout="solver.simple.stdout",
-                              expected_stderr="solver.simple.stderr",
-                              stdin='[{"size": 100, "stencil": "7pt", "optimal": {"spmv": "csr"}}]')
+test_framework.compare_output(
+    [],
+    expected_stdout="solver.simple.stdout",
+    expected_stderr="solver.simple.stderr",
+    stdin='[{"size": 100, "stencil": "7pt", "optimal": {"spmv": "csr"}}]',
+)
 
 # input file
-test_framework.compare_output(["-input", str(test_framework.sourcepath / "input.solver.json")],
-                              expected_stdout="solver.simple.stdout",
-                              expected_stderr="solver.simple.stderr")
+test_framework.compare_output(
+    ["-input", str(test_framework.sourcepath / "input.solver.json")],
+    expected_stdout="solver.simple.stdout",
+    expected_stderr="solver.simple.stderr",
+)
 
 # profiler annotations
-test_framework.compare_output(["-input", '[{"size": 100, "stencil": "7pt", "optimal": {"spmv": "csr"}}]', '-profile', '-profiler_hook', 'debug'],
-                              expected_stdout="solver.profile.stdout",
-                              expected_stderr="solver.profile.stderr")
+test_framework.compare_output(
+    [
+        "-input",
+        '[{"size": 100, "stencil": "7pt", "optimal": {"spmv": "csr"}}]',
+        "-profile",
+        "-profiler_hook",
+        "debug",
+    ],
+    expected_stdout="solver.profile.stdout",
+    expected_stderr="solver.profile.stderr",
+)
diff --git a/benchmark/test/solver_distributed.py b/benchmark/test/solver_distributed.py
index f8a02861e26..c6623723a43 100644
--- a/benchmark/test/solver_distributed.py
+++ b/benchmark/test/solver_distributed.py
@@ -1,23 +1,41 @@
 #!/usr/bin/env python3
 import test_framework
+
 # check that all input modes work:
 # parameter
-test_framework.compare_output(["-input", '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil", "optimal": {"spmv": "csr-csr"}}]'],
-                              expected_stdout="distributed_solver.simple.stdout",
-                              expected_stderr="distributed_solver.simple.stderr")
+test_framework.compare_output(
+    [
+        "-input",
+        '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil", "optimal": {"spmv": "csr-csr"}}]',
+    ],
+    expected_stdout="distributed_solver.simple.stdout",
+    expected_stderr="distributed_solver.simple.stderr",
+)
 
 # stdin
-test_framework.compare_output([],
-                              expected_stdout="distributed_solver.simple.stdout",
-                              expected_stderr="distributed_solver.simple.stderr",
-                              stdin='[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil", "optimal": {"spmv": "csr-csr"}}]')
+test_framework.compare_output(
+    [],
+    expected_stdout="distributed_solver.simple.stdout",
+    expected_stderr="distributed_solver.simple.stderr",
+    stdin='[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil", "optimal": {"spmv": "csr-csr"}}]',
+)
 
 # input file
-test_framework.compare_output(["-input", str(test_framework.sourcepath / "input.distributed_solver.json")],
-                              expected_stdout="distributed_solver.simple.stdout",
-                              expected_stderr="distributed_solver.simple.stderr")
+test_framework.compare_output(
+    ["-input", str(test_framework.sourcepath / "input.distributed_solver.json")],
+    expected_stdout="distributed_solver.simple.stdout",
+    expected_stderr="distributed_solver.simple.stderr",
+)
 
 # profiler annotations
-test_framework.compare_output(["-input", '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil", "optimal": {"spmv": "csr-csr"}}]', '-profile', '-profiler_hook', 'debug'],
-                              expected_stdout="distributed_solver.profile.stdout",
-                              expected_stderr="distributed_solver.profile.stderr")
+test_framework.compare_output(
+    [
+        "-input",
+        '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil", "optimal": {"spmv": "csr-csr"}}]',
+        "-profile",
+        "-profiler_hook",
+        "debug",
+    ],
+    expected_stdout="distributed_solver.profile.stdout",
+    expected_stderr="distributed_solver.profile.stderr",
+)
diff --git a/benchmark/test/sparse_blas.py b/benchmark/test/sparse_blas.py
index 913aac94d07..7b0968a710c 100755
--- a/benchmark/test/sparse_blas.py
+++ b/benchmark/test/sparse_blas.py
@@ -1,23 +1,45 @@
 #!/usr/bin/env python3
 import test_framework
+
 # check that all input modes work:
 # parameter
-test_framework.compare_output(["-operations", "transpose", "-input", '[{"size": 100, "stencil": "7pt"}]'],
-                              expected_stdout="sparse_blas.simple.stdout",
-                              expected_stderr="sparse_blas.simple.stderr")
+test_framework.compare_output(
+    ["-operations", "transpose", "-input", '[{"size": 100, "stencil": "7pt"}]'],
+    expected_stdout="sparse_blas.simple.stdout",
+    expected_stderr="sparse_blas.simple.stderr",
+)
 
 # stdin
-test_framework.compare_output(["-operations", "transpose"],
-                              expected_stdout="sparse_blas.simple.stdout",
-                              expected_stderr="sparse_blas.simple.stderr",
-                              stdin='[{"size": 100, "stencil": "7pt"}]')
+test_framework.compare_output(
+    ["-operations", "transpose"],
+    expected_stdout="sparse_blas.simple.stdout",
+    expected_stderr="sparse_blas.simple.stderr",
+    stdin='[{"size": 100, "stencil": "7pt"}]',
+)
 
 # input file
-test_framework.compare_output(["-operations", "transpose", "-input", str(test_framework.sourcepath / "input.mtx.json")],
-                              expected_stdout="sparse_blas.simple.stdout",
-                              expected_stderr="sparse_blas.simple.stderr")
+test_framework.compare_output(
+    [
+        "-operations",
+        "transpose",
+        "-input",
+        str(test_framework.sourcepath / "input.mtx.json"),
+    ],
+    expected_stdout="sparse_blas.simple.stdout",
+    expected_stderr="sparse_blas.simple.stderr",
+)
 
 # profiler annotations (transpose has the smallest number of allocations)
-test_framework.compare_output(["-operations", "transpose", "-input", '[{"size": 100, "stencil": "7pt"}]', '-profile', '-profiler_hook', 'debug'],
-                              expected_stdout="sparse_blas.profile.stdout",
-                              expected_stderr="sparse_blas.profile.stderr")
+test_framework.compare_output(
+    [
+        "-operations",
+        "transpose",
+        "-input",
+        '[{"size": 100, "stencil": "7pt"}]',
+        "-profile",
+        "-profiler_hook",
+        "debug",
+    ],
+    expected_stdout="sparse_blas.profile.stdout",
+    expected_stderr="sparse_blas.profile.stderr",
+)
diff --git a/benchmark/test/spmv.py b/benchmark/test/spmv.py
index d3f3015b9dd..6e2d9f05d49 100755
--- a/benchmark/test/spmv.py
+++ b/benchmark/test/spmv.py
@@ -1,23 +1,38 @@
 #!/usr/bin/env python3
 import test_framework
+
 # check that all input modes work:
 # parameter
-test_framework.compare_output(["-input", '[{"size": 100, "stencil": "7pt"}]'],
-                              expected_stdout="spmv.simple.stdout",
-                              expected_stderr="spmv.simple.stderr")
+test_framework.compare_output(
+    ["-input", '[{"size": 100, "stencil": "7pt"}]'],
+    expected_stdout="spmv.simple.stdout",
+    expected_stderr="spmv.simple.stderr",
+)
 
 # stdin
-test_framework.compare_output([],
-                              expected_stdout="spmv.simple.stdout",
-                              expected_stderr="spmv.simple.stderr",
-                              stdin='[{"size": 100, "stencil": "7pt"}]')
+test_framework.compare_output(
+    [],
+    expected_stdout="spmv.simple.stdout",
+    expected_stderr="spmv.simple.stderr",
+    stdin='[{"size": 100, "stencil": "7pt"}]',
+)
 
 # input file
-test_framework.compare_output(["-input", str(test_framework.sourcepath / "input.mtx.json")],
-                              expected_stdout="spmv.simple.stdout",
-                              expected_stderr="spmv.simple.stderr")
+test_framework.compare_output(
+    ["-input", str(test_framework.sourcepath / "input.mtx.json")],
+    expected_stdout="spmv.simple.stdout",
+    expected_stderr="spmv.simple.stderr",
+)
 
 # profiler annotations
-test_framework.compare_output(["-input", '[{"size": 100, "stencil": "7pt"}]', '-profile', '-profiler_hook', 'debug'],
-                              expected_stdout="spmv.profile.stdout",
-                              expected_stderr="spmv.profile.stderr")
+test_framework.compare_output(
+    [
+        "-input",
+        '[{"size": 100, "stencil": "7pt"}]',
+        "-profile",
+        "-profiler_hook",
+        "debug",
+    ],
+    expected_stdout="spmv.profile.stdout",
+    expected_stderr="spmv.profile.stderr",
+)
diff --git a/benchmark/test/spmv_distributed.py b/benchmark/test/spmv_distributed.py
index f6aa1accbe9..1b219b34cda 100644
--- a/benchmark/test/spmv_distributed.py
+++ b/benchmark/test/spmv_distributed.py
@@ -1,27 +1,35 @@
 #!/usr/bin/env python3
 import test_framework
+
 # check that all input modes work:
 # parameter
-test_framework.compare_output_distributed(["-input", '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}]'],
-                                          expected_stdout="spmv_distributed.simple.stdout",
-                                          expected_stderr="spmv_distributed.simple.stderr",
-                                          num_procs=3)
+test_framework.compare_output_distributed(
+    ["-input", '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}]'],
+    expected_stdout="spmv_distributed.simple.stdout",
+    expected_stderr="spmv_distributed.simple.stderr",
+    num_procs=3,
+)
 
 # stdin
-test_framework.compare_output_distributed([],
-                                          expected_stdout="spmv_distributed.simple.stdout",
-                                          expected_stderr="spmv_distributed.simple.stderr",
-                                          num_procs=3,
-                                          stdin='[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}]')
+test_framework.compare_output_distributed(
+    [],
+    expected_stdout="spmv_distributed.simple.stdout",
+    expected_stderr="spmv_distributed.simple.stderr",
+    num_procs=3,
+    stdin='[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}]',
+)
 
 # input file
-test_framework.compare_output_distributed(["-input", str(test_framework.sourcepath / "input.distributed_mtx.json")],
-                                          expected_stdout="spmv_distributed.simple.stdout",
-                                          expected_stderr="spmv_distributed.simple.stderr",
-                                          num_procs=3)
+test_framework.compare_output_distributed(
+    ["-input", str(test_framework.sourcepath / "input.distributed_mtx.json")],
+    expected_stdout="spmv_distributed.simple.stdout",
+    expected_stderr="spmv_distributed.simple.stderr",
+    num_procs=3,
+)
 
 # profiler annotations
-test_framework.compare_output_distributed(["-input", '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}]', '-profile', '-profiler_hook', 'debug'],
-                                          expected_stdout="spmv_distributed.profile.stdout",
-                                          expected_stderr="spmv_distributed.profile.stderr",
-                                          num_procs=3)
+# currently still unstable output and thus disabled
+# test_framework.compare_output_distributed(["-input", '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}]', '-profile', '-profiler_hook', 'debug'],
+#                                          expected_stdout="spmv_distributed.profile.stdout",
+#                                          expected_stderr="spmv_distributed.profile.stderr",
+#                                          num_procs=3)
diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in
index 2d22f11ac4f..fff93548ad6 100644
--- a/benchmark/test/test_framework.py.in
+++ b/benchmark/test/test_framework.py.in
@@ -6,6 +6,7 @@ import typing
 import re
 import pathlib
 import sys
+
 sourcepath = pathlib.Path("@CMAKE_CURRENT_SOURCE_DIR@")
 binpath = pathlib.Path("@PROJECT_BINARY_DIR@")
 generate = False
@@ -13,11 +14,22 @@ if len(sys.argv) > 2 and sys.argv[2] == "--generate":
     generate = True
 
 
-denumberify_paths = ["time", "bandwidth", "flops", "components",
-                     "residual_norm", "rhs_norm", "max_relative_norm2"]
+denumberify_paths = [
+    "time",
+    "bandwidth",
+    "flops",
+    "components",
+    "residual_norm",
+    "rhs_norm",
+    "max_relative_norm2",
+]
 empty_string_paths = ["error"]
-empty_array_paths = ["recurrent_residuals", "true_residuals",
-                     "implicit_residuals", "iteration_timestamps"]
+empty_array_paths = [
+    "recurrent_residuals",
+    "true_residuals",
+    "implicit_residuals",
+    "iteration_timestamps",
+]
 
 
 def sanitize_json_single(key, value, sanitize_all):
@@ -34,7 +46,10 @@ def sanitize_json_single(key, value, sanitize_all):
 
 def sanitize_json(parsed_input, sanitize_all=False):
     if isinstance(parsed_input, typing.Dict):
-        return {key: sanitize_json_single(key, value, sanitize_all) for key, value in parsed_input.items()}
+        return {
+            key: sanitize_json_single(key, value, sanitize_all)
+            for key, value in parsed_input.items()
+        }
     elif isinstance(parsed_input, typing.List):
         return [sanitize_json(e, sanitize_all) for e in parsed_input]
     elif sanitize_all and isinstance(parsed_input, float):
@@ -47,21 +62,36 @@ def sanitize_text(lines):
     json_begins = [i for i, l in enumerate(lines) if l in ["[", "{"]]
     json_ends = [i + 1 for i, l in enumerate(lines) if l in ["]", "}"]]
     json_pairs = list(zip(json_begins, json_ends))
-    if (len(json_pairs) == 0):
+    if len(json_pairs) == 0:
         return lines
-    assert (all(begin < end for begin, end in json_pairs))
-    nonjson_pairs = [(0, json_begins[0])] + list(zip(json_ends[:-1],
-                                                     json_begins[1:])) + [(json_ends[-1], len(lines))]
-    combined_pairs = sorted([(begin, end, False) for begin, end in nonjson_pairs] + [
-                            (begin, end, True) for begin, end in json_pairs])
-    texts = [("\n".join(lines[begin:end]), do_sanitize)
-             for begin, end, do_sanitize in combined_pairs]
-    reconstructed = [json.dumps(sanitize_json(json.loads(
-        t)), indent=4) if do_sanitize else t for t, do_sanitize in texts]
+    assert all(begin < end for begin, end in json_pairs)
+    nonjson_pairs = (
+        [(0, json_begins[0])]
+        + list(zip(json_ends[:-1], json_begins[1:]))
+        + [(json_ends[-1], len(lines))]
+    )
+    combined_pairs = sorted(
+        [(begin, end, False) for begin, end in nonjson_pairs]
+        + [(begin, end, True) for begin, end in json_pairs]
+    )
+    texts = [
+        ("\n".join(lines[begin:end]), do_sanitize)
+        for begin, end, do_sanitize in combined_pairs
+    ]
+    reconstructed = [
+        json.dumps(sanitize_json(json.loads(t)), indent=4) if do_sanitize else t
+        for t, do_sanitize in texts
+    ]
     return "\n".join(reconstructed).split("\n")
 
 
-def determinize_text(input, denumberify_paths=[], remove_paths=[], ignore_patterns=[], replace_patterns=[]):
+def determinize_text(
+    input,
+    denumberify_paths=[],
+    remove_paths=[],
+    ignore_patterns=[],
+    replace_patterns=[],
+):
     lines = input.splitlines()
     output_lines = []
     patterns = [re.compile(pattern) for pattern in ignore_patterns]
@@ -87,48 +117,88 @@ def compare_output(args, expected_stdout, expected_stderr, stdin="", launcher_fl
     args = [sys.argv[1]] + args
     expected_stdout = str(sourcepath / "reference" / expected_stdout)
     expected_stderr = str(sourcepath / "reference" / expected_stderr)
-    result = subprocess.run(args=launcher_flags + args, stdout=subprocess.PIPE,
-                            stderr=subprocess.PIPE, input=bytes(stdin, "utf-8"))
-    print("TEST: {}".format(
-        " ".join(["'{}'".format(arg) for arg in launcher_flags + args])))
+    result = subprocess.run(
+        args=launcher_flags + args,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        input=bytes(stdin, "utf-8"),
+    )
+    print(
+        "TEST: {}".format(
+            " ".join(["'{}'".format(arg) for arg in launcher_flags + args])
+        )
+    )
     version_patterns = [
         "    the .* module is",
     ]
     typename_patterns = [
         ("(apply|generate|check|copy|move)\([^())]*\)", "\\1(<typename>)"),
-        ("what\\(\\): .*", "what(): <removed>")
+        ("what\\(\\): .*", "what(): <removed>"),
     ]
     if generate:
-        open(expected_stdout, "w").write("\n".join(determinize_text(
-            result.stdout.decode(), replace_patterns=typename_patterns)))
-        open(expected_stderr, "w").write("\n".join(determinize_text(result.stderr.decode(
-        ), ignore_patterns=version_patterns, replace_patterns=typename_patterns)))
+        open(expected_stdout, "w").write(
+            "\n".join(
+                determinize_text(
+                    result.stdout.decode(), replace_patterns=typename_patterns
+                )
+            )
+        )
+        open(expected_stderr, "w").write(
+            "\n".join(
+                determinize_text(
+                    result.stderr.decode(),
+                    ignore_patterns=version_patterns,
+                    replace_patterns=typename_patterns,
+                )
+            )
+        )
         print("GENERATED")
         return
     result_stdout_processed = determinize_text(
-        result.stdout.decode(), replace_patterns=typename_patterns)
-    result_stderr_processed = determinize_text(result.stderr.decode(
-    ), ignore_patterns=version_patterns, replace_patterns=typename_patterns)
+        result.stdout.decode(), replace_patterns=typename_patterns
+    )
+    result_stderr_processed = determinize_text(
+        result.stderr.decode(),
+        ignore_patterns=version_patterns,
+        replace_patterns=typename_patterns,
+    )
     expected_stdout_processed = determinize_text(
-        open(expected_stdout).read(), replace_patterns=typename_patterns)
-    expected_stderr_processed = determinize_text(open(expected_stderr).read(
-    ), ignore_patterns=version_patterns, replace_patterns=typename_patterns)
+        open(expected_stdout).read(), replace_patterns=typename_patterns
+    )
+    expected_stderr_processed = determinize_text(
+        open(expected_stderr).read(),
+        ignore_patterns=version_patterns,
+        replace_patterns=typename_patterns,
+    )
     failed = False
     if result_stdout_processed != expected_stdout_processed:
         print("FAIL: stdout differs")
-        print("\n".join(difflib.unified_diff(
-            expected_stdout_processed, result_stdout_processed)))
+        print(
+            "\n".join(
+                difflib.unified_diff(expected_stdout_processed, result_stdout_processed)
+            )
+        )
         failed = True
     if result_stderr_processed != expected_stderr_processed:
         print("FAIL: stderr differs")
-        print("\n".join(difflib.unified_diff(
-            expected_stderr_processed, result_stderr_processed)))
+        print(
+            "\n".join(
+                difflib.unified_diff(expected_stderr_processed, result_stderr_processed)
+            )
+        )
         failed = True
     if failed:
         exit(1)
     print("PASS")
 
 
-def compare_output_distributed(args, expected_stdout, expected_stderr, num_procs, stdin=""):
-    compare_output(args, expected_stdout, expected_stderr, stdin, [
-                   "@MPIEXEC_EXECUTABLE@", "@MPIEXEC_NUMPROC_FLAG@", str(num_procs)])
+def compare_output_distributed(
+    args, expected_stdout, expected_stderr, num_procs, stdin=""
+):
+    compare_output(
+        args,
+        expected_stdout,
+        expected_stderr,
+        stdin,
+        ["@MPIEXEC_EXECUTABLE@", "@MPIEXEC_NUMPROC_FLAG@", str(num_procs)],
+    )

From bd7f565b30d8d10d3f27589af6ae7ac4ac4fffdb Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Mon, 22 May 2023 15:10:03 +0200
Subject: [PATCH 037/583] disable unstable tests

---
 benchmark/test/CMakeLists.txt | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/benchmark/test/CMakeLists.txt b/benchmark/test/CMakeLists.txt
index 1cd589927fa..0a2be0e662c 100644
--- a/benchmark/test/CMakeLists.txt
+++ b/benchmark/test/CMakeLists.txt
@@ -22,7 +22,8 @@ add_benchmark_test(solver)
 add_benchmark_test(sparse_blas)
 add_benchmark_test(spmv)
 if (GINKGO_BUILD_MPI)
-    add_benchmark_test(multi_vector_distributed)
-    add_benchmark_test(spmv_distributed)
+    # the distributed tests are still failing due to unstable output
+    #add_benchmark_test(multi_vector_distributed)
+    #add_benchmark_test(spmv_distributed)
     add_benchmark_test(solver_distributed)
 endif()
\ No newline at end of file

From 82d567fb88562f458cb35b1f0e9ddaae3a20aca8 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Tue, 23 May 2023 10:52:20 +0200
Subject: [PATCH 038/583] move SYCL_DEVICE_FILTER by ONEAPI_DEVICE_SELECTOR

---
 .gitlab-ci.yml      | 10 +++++-----
 .gitlab/scripts.yml |  1 +
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 85683fc100c..d899ff00ad0 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -615,7 +615,7 @@ build/dpcpp/2022-1/cpu/release/static:
     BUILD_DPCPP: "ON"
     BUILD_TYPE: "Release"
     BUILD_SHARED_LIBS: "ON"
-    SYCL_DEVICE_FILTER: "CPU"
+    ONEAPI_DEVICE_SELECTOR: "*:cpu"
     SLURM_PARTITION: "cpu"
     SLURM_TIME: "2:00:00"
     # This job is not in exclusive mode
@@ -634,7 +634,7 @@ build/dpcpp/igpu/release/shared:
     BUILD_TYPE: "Release"
     BUILD_SHARED_LIBS: "ON"
     DPCPP_SINGLE_MODE: "ON"
-    SYCL_DEVICE_FILTER: "GPU"
+    ONEAPI_DEVICE_SELECTOR: "*:gpu"
 
 # TODO: Enable when debug shared library size issues are fixed
 # build/dpcpp/level_zero_igpu/debug/shared:
@@ -650,7 +650,7 @@ build/dpcpp/igpu/release/shared:
 #     BUILD_TYPE: "Debug"
 #     BUILD_SHARED_LIBS: "ON"
 #     DPCPP_SINGLE_MODE: "ON"
-#     SYCL_DEVICE_FILTER: "Level_Zero:GPU"
+#     ONEAPI_DEVICE_SELECTOR: "level_zero:gpu"
 
 # It gives two available backends of GPU on tests
 build/dpcpp/dgpu/release/static:
@@ -666,7 +666,7 @@ build/dpcpp/dgpu/release/static:
     BUILD_TYPE: "Release"
     BUILD_SHARED_LIBS: "OF"
     DPCPP_SINGLE_MODE: "ON"
-    SYCL_DEVICE_FILTER: "GPU"
+    ONEAPI_DEVICE_SELECTOR: "*:gpu"
 
 build/dpcpp/level_zero_dgpu/release/shared:
   extends:
@@ -680,7 +680,7 @@ build/dpcpp/level_zero_dgpu/release/shared:
     BUILD_DPCPP: "ON"
     BUILD_TYPE: "Release"
     DPCPP_SINGLE_MODE: "ON"
-    SYCL_DEVICE_FILTER: "Level_Zero:GPU"
+    ONEAPI_DEVICE_SELECTOR: "level_zero:gpu"
 
 # Job with important warnings as error
 warnings:
diff --git a/.gitlab/scripts.yml b/.gitlab/scripts.yml
index 537f2e5e83e..7b1c30c27c0 100644
--- a/.gitlab/scripts.yml
+++ b/.gitlab/scripts.yml
@@ -68,6 +68,7 @@
       fi
     - if [ -n "${SYCL_DEVICE_TYPE}" ]; then export SYCL_DEVICE_TYPE; fi
     - if [ -n "${SYCL_DEVICE_FILTER}" ]; then export SYCL_DEVICE_FILTER; fi
+    - if [ -n "${ONEAPI_DEVICE_SELECTOR}" ]; then export ONEAPI_DEVICE_SELECTOR; fi
     - if [[ "${MPI_AS_ROOT}" == "ON" ]];then
       export OMPI_ALLOW_RUN_AS_ROOT=1;
       export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1;

From 2241e7ea7e00bfd8a21f4d8860e84cecaffb7ea1 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Sun, 4 Jun 2023 16:16:20 +0200
Subject: [PATCH 039/583] update benchmark outputs

- no more -detailed information in the output
- moved the range annotation closer to the hot loop
---
 .../reference/preconditioner.profile.stderr   |  28 +-
 .../reference/preconditioner.profile.stdout   |  12 +-
 .../test/reference/solver.profile.stderr      | 890 ------------------
 .../test/reference/solver.profile.stdout      |  27 +-
 .../test/reference/sparse_blas.profile.stderr |  27 +-
 .../test/reference/sparse_blas.profile.stdout |   7 -
 benchmark/test/reference/spmv.profile.stderr  |  65 --
 benchmark/test/reference/spmv.profile.stdout  |   1 -
 8 files changed, 8 insertions(+), 1049 deletions(-)

diff --git a/benchmark/test/reference/preconditioner.profile.stderr b/benchmark/test/reference/preconditioner.profile.stderr
index 2bebc03be8d..bd8628be212 100644
--- a/benchmark/test/reference/preconditioner.profile.stderr
+++ b/benchmark/test/reference/preconditioner.profile.stderr
@@ -77,22 +77,6 @@ DEBUG: end   copy(<typename>)
 DEBUG: end   apply(<typename>)
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin generate(<typename>)
-DEBUG: end   generate(<typename>)
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: end   none
 Current state:
 [
@@ -102,20 +86,12 @@ Current state:
         "preconditioner": {
             "none": {
                 "generate": {
-                    "components": {
-                        "generate(<typename>)": 1.0,
-                        "overhead": 1.0
-                    },
+                    "components": {},
                     "time": 1.0,
                     "repetitions": 1
                 },
                 "apply": {
-                    "components": {
-                        "apply(<typename>)": 1.0,
-                        "copy(<typename>)": 1.0,
-                        "dense::copy": 1.0,
-                        "overhead": 1.0
-                    },
+                    "components": {},
                     "time": 1.0,
                     "repetitions": 1
                 },
diff --git a/benchmark/test/reference/preconditioner.profile.stdout b/benchmark/test/reference/preconditioner.profile.stdout
index ba967989af4..cc73c4c4552 100644
--- a/benchmark/test/reference/preconditioner.profile.stdout
+++ b/benchmark/test/reference/preconditioner.profile.stdout
@@ -6,20 +6,12 @@
         "preconditioner": {
             "none": {
                 "generate": {
-                    "components": {
-                        "generate(<typename>)": 1.0,
-                        "overhead": 1.0
-                    },
+                    "components": {},
                     "time": 1.0,
                     "repetitions": 1
                 },
                 "apply": {
-                    "components": {
-                        "apply(<typename>)": 1.0,
-                        "copy(<typename>)": 1.0,
-                        "dense::copy": 1.0,
-                        "overhead": 1.0
-                    },
+                    "components": {},
                     "time": 1.0,
                     "repetitions": 1
                 },
diff --git a/benchmark/test/reference/solver.profile.stderr b/benchmark/test/reference/solver.profile.stderr
index a601444163d..3d9b9a3ad10 100644
--- a/benchmark/test/reference/solver.profile.stderr
+++ b/benchmark/test/reference/solver.profile.stderr
@@ -79,874 +79,6 @@ DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
-DEBUG: begin generate(<typename>)
-DEBUG: begin generate(<typename>)
-DEBUG: end   generate(<typename>)
-DEBUG: end   generate(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: end   copy(<typename>)
-DEBUG: begin apply(<typename>)
-DEBUG: begin iteration
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin cg::initialize
-DEBUG: end   cg::initialize
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: begin cg::step_1
-DEBUG: end   cg::step_1
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin cg::step_2
-DEBUG: end   cg::step_2
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin cg::step_1
-DEBUG: end   cg::step_1
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin cg::step_2
-DEBUG: end   cg::step_2
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin cg::step_1
-DEBUG: end   cg::step_1
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin cg::step_2
-DEBUG: end   cg::step_2
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin cg::step_1
-DEBUG: end   cg::step_1
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin cg::step_2
-DEBUG: end   cg::step_2
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin cg::step_1
-DEBUG: end   cg::step_1
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin cg::step_2
-DEBUG: end   cg::step_2
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin cg::step_1
-DEBUG: end   cg::step_1
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin cg::step_2
-DEBUG: end   cg::step_2
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin cg::step_1
-DEBUG: end   cg::step_1
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin cg::step_2
-DEBUG: end   cg::step_2
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   iteration
-DEBUG: end   apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin apply(<typename>)
-DEBUG: begin iteration
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-DEBUG: begin cg::initialize
-DEBUG: end   cg::initialize
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin cg::step_1
-DEBUG: end   cg::step_1
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin cg::step_2
-DEBUG: end   cg::step_2
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin cg::step_1
-DEBUG: end   cg::step_1
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin cg::step_2
-DEBUG: end   cg::step_2
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin cg::step_1
-DEBUG: end   cg::step_1
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin cg::step_2
-DEBUG: end   cg::step_2
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin cg::step_1
-DEBUG: end   cg::step_1
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin cg::step_2
-DEBUG: end   cg::step_2
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin cg::step_1
-DEBUG: end   cg::step_1
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin cg::step_2
-DEBUG: end   cg::step_2
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin cg::step_1
-DEBUG: end   cg::step_1
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin cg::step_2
-DEBUG: end   cg::step_2
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin cg::step_1
-DEBUG: end   cg::step_1
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin cg::step_2
-DEBUG: end   cg::step_2
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   iteration
-DEBUG: end   apply(<typename>)
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
 DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
@@ -959,28 +91,6 @@ DEBUG: begin generate(<typename>)
 DEBUG: begin generate(<typename>)
 DEBUG: end   generate(<typename>)
 DEBUG: end   generate(<typename>)
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin apply(<typename>)
 DEBUG: begin iteration
 DEBUG: begin allocate
diff --git a/benchmark/test/reference/solver.profile.stdout b/benchmark/test/reference/solver.profile.stdout
index f66daea1f30..128a8a1f169 100644
--- a/benchmark/test/reference/solver.profile.stdout
+++ b/benchmark/test/reference/solver.profile.stdout
@@ -14,37 +14,14 @@
                 "iteration_timestamps": [],
                 "rhs_norm": 1.0,
                 "generate": {
-                    "components": {
-                        "generate(<typename>)": 1.0,
-                        "overhead": 1.0
-                    },
+                    "components": {},
                     "time": 1.0
                 },
                 "apply": {
-                    "components": {
-                        "apply(<typename>)": 1.0,
-                        "iteration": 1.0,
-                        "allocate": 1.0,
-                        "dense::fill": 1.0,
-                        "cg::initialize": 1.0,
-                        "advanced_apply(<typename>)": 1.0,
-                        "csr::advanced_spmv": 1.0,
-                        "dense::compute_norm2_dispatch": 1.0,
-                        "copy(<typename>)": 1.0,
-                        "dense::copy": 1.0,
-                        "dense::compute_conj_dot_dispatch": 1.0,
-                        "check(<typename>)": 1.0,
-                        "residual_norm::residual_norm": 1.0,
-                        "cg::step_1": 1.0,
-                        "csr::spmv": 1.0,
-                        "cg::step_2": 1.0,
-                        "free": 1.0,
-                        "overhead": 1.0
-                    },
+                    "components": {},
                     "iterations": 7,
                     "time": 1.0
                 },
-                "preconditioner": {},
                 "residual_norm": 1.0,
                 "repetitions": 1,
                 "completed": true
diff --git a/benchmark/test/reference/sparse_blas.profile.stderr b/benchmark/test/reference/sparse_blas.profile.stderr
index 3dee884861e..66c67cf84ea 100644
--- a/benchmark/test/reference/sparse_blas.profile.stderr
+++ b/benchmark/test/reference/sparse_blas.profile.stderr
@@ -9,7 +9,6 @@ The operations are transposeRunning test case
     "stencil": "7pt",
     "sparse_blas": {}
 }
-DEBUG: begin stencil(100,7pt)
 Matrix is of size (125, 125), 725
 DEBUG: begin allocate
 DEBUG: end   allocate
@@ -35,6 +34,7 @@ DEBUG: begin components::convert_idxs_to_ptrs
 DEBUG: end   components::convert_idxs_to_ptrs
 DEBUG: begin free
 DEBUG: end   free
+DEBUG: begin stencil(100,7pt)
 DEBUG: begin transpose
 DEBUG: begin allocate
 DEBUG: end   allocate
@@ -46,22 +46,6 @@ DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin csr::transpose
 DEBUG: end   csr::transpose
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin csr::transpose
-DEBUG: end   csr::transpose
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
@@ -80,13 +64,6 @@ Current state:
                 "flops": 1.0,
                 "bandwidth": 1.0,
                 "repetitions": 1,
-                "components": {
-                    "allocate": 1.0,
-                    "components::fill_array": 1.0,
-                    "csr::transpose": 1.0,
-                    "free": 1.0,
-                    "overhead": 1.0
-                },
                 "completed": true
             }
         },
@@ -95,10 +72,10 @@ Current state:
         "nonzeros": 725
     }
 ]
+DEBUG: end   stencil(100,7pt)
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: end   stencil(100,7pt)
diff --git a/benchmark/test/reference/sparse_blas.profile.stdout b/benchmark/test/reference/sparse_blas.profile.stdout
index ba92c30298a..848fb503ed4 100644
--- a/benchmark/test/reference/sparse_blas.profile.stdout
+++ b/benchmark/test/reference/sparse_blas.profile.stdout
@@ -9,13 +9,6 @@
                 "flops": 1.0,
                 "bandwidth": 1.0,
                 "repetitions": 1,
-                "components": {
-                    "allocate": 1.0,
-                    "components::fill_array": 1.0,
-                    "csr::transpose": 1.0,
-                    "free": 1.0,
-                    "overhead": 1.0
-                },
                 "completed": true
             }
         },
diff --git a/benchmark/test/reference/spmv.profile.stderr b/benchmark/test/reference/spmv.profile.stderr
index 735e4bf5d23..5a12a077bc5 100644
--- a/benchmark/test/reference/spmv.profile.stderr
+++ b/benchmark/test/reference/spmv.profile.stderr
@@ -59,24 +59,6 @@ DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::aos_to_soa
-DEBUG: end   components::aos_to_soa
-DEBUG: begin apply(<typename>)
-DEBUG: begin coo::spmv
-DEBUG: end   coo::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin coo
 DEBUG: begin allocate
 DEBUG: end   allocate
@@ -96,52 +78,6 @@ DEBUG: begin apply(<typename>)
 DEBUG: begin coo::spmv
 DEBUG: end   coo::spmv
 DEBUG: end   apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::add_scaled
-DEBUG: end   dense::add_scaled
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_norm2_dispatch
-DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin apply(<typename>)
-DEBUG: begin coo::spmv
-DEBUG: end   coo::spmv
-DEBUG: end   apply(<typename>)
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
@@ -159,7 +95,6 @@ Current state:
         "spmv": {
             "coo": {
                 "storage": 11600,
-                "max_relative_norm2": 1.0,
                 "time": 1.0,
                 "repetitions": 1,
                 "completed": true
diff --git a/benchmark/test/reference/spmv.profile.stdout b/benchmark/test/reference/spmv.profile.stdout
index ec7309613b6..dacc490ddf0 100644
--- a/benchmark/test/reference/spmv.profile.stdout
+++ b/benchmark/test/reference/spmv.profile.stdout
@@ -6,7 +6,6 @@
         "spmv": {
             "coo": {
                 "storage": 11600,
-                "max_relative_norm2": 1.0,
                 "time": 1.0,
                 "repetitions": 1,
                 "completed": true

From c5d24893b6a446fe69e670a738d24ea577233a65 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Mon, 5 Jun 2023 10:23:43 +0200
Subject: [PATCH 040/583] update distributed benchmark outputs

---
 .../distributed_solver.profile.stderr         | 1150 -----------------
 .../distributed_solver.profile.stdout         |   29 +-
 2 files changed, 2 insertions(+), 1177 deletions(-)

diff --git a/benchmark/test/reference/distributed_solver.profile.stderr b/benchmark/test/reference/distributed_solver.profile.stderr
index e0ddd10ab54..718240f5a38 100644
--- a/benchmark/test/reference/distributed_solver.profile.stderr
+++ b/benchmark/test/reference/distributed_solver.profile.stderr
@@ -232,1134 +232,6 @@ DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
-DEBUG: begin generate(<typename>)
-DEBUG: begin generate(<typename>)
-DEBUG: end   generate(<typename>)
-DEBUG: end   generate(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: end   copy(<typename>)
-DEBUG: begin apply(<typename>)
-DEBUG: begin iteration
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin cg::initialize
-DEBUG: end   cg::initialize
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: begin cg::step_1
-DEBUG: end   cg::step_1
-DEBUG: begin apply(<typename>)
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin cg::step_2
-DEBUG: end   cg::step_2
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin cg::step_1
-DEBUG: end   cg::step_1
-DEBUG: begin apply(<typename>)
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin cg::step_2
-DEBUG: end   cg::step_2
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin cg::step_1
-DEBUG: end   cg::step_1
-DEBUG: begin apply(<typename>)
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin cg::step_2
-DEBUG: end   cg::step_2
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin cg::step_1
-DEBUG: end   cg::step_1
-DEBUG: begin apply(<typename>)
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin cg::step_2
-DEBUG: end   cg::step_2
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin cg::step_1
-DEBUG: end   cg::step_1
-DEBUG: begin apply(<typename>)
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin cg::step_2
-DEBUG: end   cg::step_2
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin cg::step_1
-DEBUG: end   cg::step_1
-DEBUG: begin apply(<typename>)
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin cg::step_2
-DEBUG: end   cg::step_2
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin cg::step_1
-DEBUG: end   cg::step_1
-DEBUG: begin apply(<typename>)
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin cg::step_2
-DEBUG: end   cg::step_2
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   iteration
-DEBUG: end   apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin apply(<typename>)
-DEBUG: begin iteration
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-DEBUG: begin cg::initialize
-DEBUG: end   cg::initialize
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin cg::step_1
-DEBUG: end   cg::step_1
-DEBUG: begin apply(<typename>)
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin cg::step_2
-DEBUG: end   cg::step_2
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin cg::step_1
-DEBUG: end   cg::step_1
-DEBUG: begin apply(<typename>)
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin cg::step_2
-DEBUG: end   cg::step_2
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin cg::step_1
-DEBUG: end   cg::step_1
-DEBUG: begin apply(<typename>)
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin cg::step_2
-DEBUG: end   cg::step_2
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin cg::step_1
-DEBUG: end   cg::step_1
-DEBUG: begin apply(<typename>)
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin cg::step_2
-DEBUG: end   cg::step_2
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin cg::step_1
-DEBUG: end   cg::step_1
-DEBUG: begin apply(<typename>)
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin cg::step_2
-DEBUG: end   cg::step_2
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin cg::step_1
-DEBUG: end   cg::step_1
-DEBUG: begin apply(<typename>)
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin cg::step_2
-DEBUG: end   cg::step_2
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: end   iteration
-DEBUG: begin iteration
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin cg::step_1
-DEBUG: end   cg::step_1
-DEBUG: begin apply(<typename>)
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin cg::step_2
-DEBUG: end   cg::step_2
-DEBUG: begin apply(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin dense::compute_conj_dot_dispatch
-DEBUG: end   dense::compute_conj_dot_dispatch
-DEBUG: begin check(<typename>)
-DEBUG: begin check(<typename>)
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin residual_norm::residual_norm
-DEBUG: end   residual_norm::residual_norm
-DEBUG: end   check(<typename>)
-DEBUG: end   check(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   iteration
-DEBUG: end   apply(<typename>)
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
 DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
@@ -1372,28 +244,6 @@ DEBUG: begin generate(<typename>)
 DEBUG: begin generate(<typename>)
 DEBUG: end   generate(<typename>)
 DEBUG: end   generate(<typename>)
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin apply(<typename>)
 DEBUG: begin iteration
 DEBUG: begin allocate
diff --git a/benchmark/test/reference/distributed_solver.profile.stdout b/benchmark/test/reference/distributed_solver.profile.stdout
index a31b88ff582..c61541a5d5b 100644
--- a/benchmark/test/reference/distributed_solver.profile.stdout
+++ b/benchmark/test/reference/distributed_solver.profile.stdout
@@ -15,39 +15,14 @@
                 "iteration_timestamps": [],
                 "rhs_norm": 1.0,
                 "generate": {
-                    "components": {
-                        "generate(<typename>)": 1.0,
-                        "overhead": 1.0
-                    },
+                    "components": {},
                     "time": 1.0
                 },
                 "apply": {
-                    "components": {
-                        "apply(<typename>)": 1.0,
-                        "iteration": 1.0,
-                        "allocate": 1.0,
-                        "dense::fill": 1.0,
-                        "cg::initialize": 1.0,
-                        "advanced_apply(<typename>)": 1.0,
-                        "dense::row_gather": 1.0,
-                        "csr::advanced_spmv": 1.0,
-                        "dense::compute_squared_norm2": 1.0,
-                        "dense::compute_sqrt": 1.0,
-                        "copy(<typename>)": 1.0,
-                        "dense::copy": 1.0,
-                        "dense::compute_conj_dot_dispatch": 1.0,
-                        "check(<typename>)": 1.0,
-                        "residual_norm::residual_norm": 1.0,
-                        "cg::step_1": 1.0,
-                        "csr::spmv": 1.0,
-                        "cg::step_2": 1.0,
-                        "free": 1.0,
-                        "overhead": 1.0
-                    },
+                    "components": {},
                     "iterations": 7,
                     "time": 1.0
                 },
-                "preconditioner": {},
                 "residual_norm": 1.0,
                 "repetitions": 1,
                 "completed": true

From edf3c6b6a3b2f4b19480b7ef80c2260accca8a13 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Tue, 6 Jun 2023 10:59:22 +0200
Subject: [PATCH 041/583] Replace deprecated SYCL_DEVICE_FILTER

---
 .github/workflows/intel.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml
index e612c72b7e7..0d8acd52a34 100644
--- a/.github/workflows/intel.yml
+++ b/.github/workflows/intel.yml
@@ -37,7 +37,7 @@ jobs:
         cd build
         cmake .. -DCMAKE_INSTALL_PREFIX=install_ginkgo -DCMAKE_CXX_COMPILER=dpcpp -DCMAKE_BUILD_TYPE=${{ matrix.config.build_type }} -DGINKGO_MIXED_PRECISION=${{ matrix.config.mixed }} -DGINKGO_DPCPP_SINGLE_MODE=ON
         make -j8
-        SYCL_DEVICE_FILTER=level_zero ctest -j10 --output-on-failure
+        ONEAPI_DEVICE_SELECTOR=level_zero:gpu ctest -j10 --output-on-failure
 
     - name: install
       run: |

From d194cb01cf73ed341e15aa11c6844a42a0b7727a Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Tue, 20 Jun 2023 14:30:08 +0200
Subject: [PATCH 042/583] improve documentation and function naming

Co-authored-by: Gregor Olenik <gregor.olenik@kit.edu>
---
 benchmark/test/test_framework.py.in | 86 ++++++++++++++++++++++-------
 1 file changed, 66 insertions(+), 20 deletions(-)

diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in
index fff93548ad6..09986fabdf1 100644
--- a/benchmark/test/test_framework.py.in
+++ b/benchmark/test/test_framework.py.in
@@ -12,8 +12,6 @@ binpath = pathlib.Path("@PROJECT_BINARY_DIR@")
 generate = False
 if len(sys.argv) > 2 and sys.argv[2] == "--generate":
     generate = True
-
-
 denumberify_paths = [
     "time",
     "bandwidth",
@@ -32,25 +30,35 @@ empty_array_paths = [
 ]
 
 
-def sanitize_json_single(key, value, sanitize_all):
+def sanitize_json_key_value(key: str, value, sanitize_all: bool):
+    """Applies sanitation to a single key-value pair.
+
+    Strings with a key in empty_string_paths will be emptied
+    Numbers with a key in denumberify_paths will be set to 1.0
+
+    """
     if key in empty_string_paths and isinstance(value, str):
         return ""
     if key in denumberify_paths and isinstance(value, float):
         return 1.0
-    if key in denumberify_paths and isinstance(value, typing.Dict):
+    if key in denumberify_paths and isinstance(value, dict):
         return sanitize_json(value, True)
-    if key in empty_array_paths and isinstance(value, typing.List):
+    if key in empty_array_paths and isinstance(value, list):
         return []
     return sanitize_json(value, sanitize_all)
 
 
 def sanitize_json(parsed_input, sanitize_all=False):
-    if isinstance(parsed_input, typing.Dict):
+    """Removes non-deterministic parts of a parsed JSON input.
+
+    If sanitize_all is set to True, all nested float values will be set to 0.
+    Otherwise, the sanitation"""
+    if isinstance(parsed_input, dict):
         return {
-            key: sanitize_json_single(key, value, sanitize_all)
+            key: sanitize_json_key_value(key, value, sanitize_all)
             for key, value in parsed_input.items()
         }
-    elif isinstance(parsed_input, typing.List):
+    elif isinstance(parsed_input, list):
         return [sanitize_json(e, sanitize_all) for e in parsed_input]
     elif sanitize_all and isinstance(parsed_input, float):
         return 1.0
@@ -58,7 +66,15 @@ def sanitize_json(parsed_input, sanitize_all=False):
         return parsed_input
 
 
-def sanitize_text(lines):
+def sanitize_json_in_text(lines: list[str]) -> list[str]:
+    """Sanitizes all occurrences of JSON content inside text input.
+
+    Takes a list of text lines and detects any pretty-printed JSON output inside
+    (recognized by a single [, {, } or ] in an otherwise empty line).
+    The JSON output will be parsed and sanitized through sanitize_json(...)
+    and pretty-printed to replace the original JSON input.
+    The function returns the resulting output"""
+
     json_begins = [i for i, l in enumerate(lines) if l in ["[", "{"]]
     json_ends = [i + 1 for i, l in enumerate(lines) if l in ["]", "}"]]
     json_pairs = list(zip(json_begins, json_ends))
@@ -86,12 +102,20 @@ def sanitize_text(lines):
 
 
 def determinize_text(
-    input,
-    denumberify_paths=[],
-    remove_paths=[],
-    ignore_patterns=[],
-    replace_patterns=[],
-):
+    input: str,
+    ignore_patterns: list[str],
+    replace_patterns: list[(str, str)],
+) -> list[str]:
+    """Sanitizes the given input string.
+
+    Every input line matching an entry from ignore_patterns will be removed.
+    Every line matching the first string in an entry from replace_patterns
+    will be replaced by the second string.
+    Finally, the text will be passed to sanitize_json_in_text, which removes
+    nondeterministic parts from JSON objects/arrays in the input,
+    if it can be parsed correctly.
+    The output is guaranteed to end with an empty line.
+    """
     lines = input.splitlines()
     output_lines = []
     patterns = [re.compile(pattern) for pattern in ignore_patterns]
@@ -108,12 +132,18 @@ def determinize_text(
     if output_lines[-1] != "":
         output_lines.append("")
     try:
-        return sanitize_text(output_lines)
+        return sanitize_json_in_text(output_lines)
     except json.decoder.JSONDecodeError:
         return output_lines
 
 
-def compare_output(args, expected_stdout, expected_stderr, stdin="", launcher_flags=[]):
+def compare_output_impl(
+    args: list[str],
+    expected_stdout: str,
+    expected_stderr: str,
+    stdin: str,
+    launcher_flags: list[str],
+):
     args = [sys.argv[1]] + args
     expected_stdout = str(sourcepath / "reference" / expected_stdout)
     expected_stderr = str(sourcepath / "reference" / expected_stderr)
@@ -139,7 +169,9 @@ def compare_output(args, expected_stdout, expected_stderr, stdin="", launcher_fl
         open(expected_stdout, "w").write(
             "\n".join(
                 determinize_text(
-                    result.stdout.decode(), replace_patterns=typename_patterns
+                    result.stdout.decode(),
+                    ignore_patterns=[],
+                    replace_patterns=typename_patterns,
                 )
             )
         )
@@ -155,7 +187,7 @@ def compare_output(args, expected_stdout, expected_stderr, stdin="", launcher_fl
         print("GENERATED")
         return
     result_stdout_processed = determinize_text(
-        result.stdout.decode(), replace_patterns=typename_patterns
+        result.stdout.decode(), ignore_patterns=[], replace_patterns=typename_patterns
     )
     result_stderr_processed = determinize_text(
         result.stderr.decode(),
@@ -163,7 +195,9 @@ def compare_output(args, expected_stdout, expected_stderr, stdin="", launcher_fl
         replace_patterns=typename_patterns,
     )
     expected_stdout_processed = determinize_text(
-        open(expected_stdout).read(), replace_patterns=typename_patterns
+        open(expected_stdout).read(),
+        ignore_patterns=[],
+        replace_patterns=typename_patterns,
     )
     expected_stderr_processed = determinize_text(
         open(expected_stderr).read(),
@@ -192,6 +226,18 @@ def compare_output(args, expected_stdout, expected_stderr, stdin="", launcher_fl
     print("PASS")
 
 
+def compare_output(
+    args: list[str], expected_stdout: str, expected_stderr: str, stdin: str = ""
+):
+    compare_output_impl(
+        args,
+        expected_stdout=expected_stdout,
+        expected_stderr=expected_stderr,
+        stdin=stdin,
+        launcher_flags=[],
+    )
+
+
 def compare_output_distributed(
     args, expected_stdout, expected_stderr, num_procs, stdin=""
 ):

From 5047d143622facb66c983ef8bb9356ba73001324 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Wed, 21 Jun 2023 12:20:48 +0200
Subject: [PATCH 043/583] update version

---
 benchmark/test/reference/blas.profile.stderr               | 4 ++--
 benchmark/test/reference/blas.simple.stderr                | 4 ++--
 benchmark/test/reference/conversion.all.stderr             | 4 ++--
 benchmark/test/reference/conversion.profile.stderr         | 4 ++--
 benchmark/test/reference/conversion.simple.stderr          | 4 ++--
 benchmark/test/reference/distributed_solver.profile.stderr | 4 ++--
 benchmark/test/reference/distributed_solver.simple.stderr  | 4 ++--
 benchmark/test/reference/matrix_statistics.simple.stderr   | 4 ++--
 benchmark/test/reference/preconditioner.profile.stderr     | 4 ++--
 benchmark/test/reference/preconditioner.simple.stderr      | 4 ++--
 benchmark/test/reference/solver.profile.stderr             | 4 ++--
 benchmark/test/reference/solver.simple.stderr              | 4 ++--
 benchmark/test/reference/sparse_blas.profile.stderr        | 4 ++--
 benchmark/test/reference/sparse_blas.simple.stderr         | 4 ++--
 benchmark/test/reference/spmv.profile.stderr               | 4 ++--
 benchmark/test/reference/spmv.simple.stderr                | 4 ++--
 16 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/benchmark/test/reference/blas.profile.stderr b/benchmark/test/reference/blas.profile.stderr
index 16a86bd4c94..b697ad41392 100644
--- a/benchmark/test/reference/blas.profile.stderr
+++ b/benchmark/test/reference/blas.profile.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.6.0 (develop)
-    running with core module 1.6.0 (develop)
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
 Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/blas.simple.stderr b/benchmark/test/reference/blas.simple.stderr
index 72a2fbb9b90..02b6f94ba28 100644
--- a/benchmark/test/reference/blas.simple.stderr
+++ b/benchmark/test/reference/blas.simple.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.6.0 (develop)
-    running with core module 1.6.0 (develop)
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/conversion.all.stderr b/benchmark/test/reference/conversion.all.stderr
index d6aab6a0331..9ab8a899649 100644
--- a/benchmark/test/reference/conversion.all.stderr
+++ b/benchmark/test/reference/conversion.all.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.6.0 (develop)
-    running with core module 1.6.0 (develop)
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/conversion.profile.stderr b/benchmark/test/reference/conversion.profile.stderr
index e772752ea4a..6733472be8f 100644
--- a/benchmark/test/reference/conversion.profile.stderr
+++ b/benchmark/test/reference/conversion.profile.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.6.0 (develop)
-    running with core module 1.6.0 (develop)
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
 Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/conversion.simple.stderr b/benchmark/test/reference/conversion.simple.stderr
index f044da61804..d221ead12a4 100644
--- a/benchmark/test/reference/conversion.simple.stderr
+++ b/benchmark/test/reference/conversion.simple.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.6.0 (develop)
-    running with core module 1.6.0 (develop)
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/distributed_solver.profile.stderr b/benchmark/test/reference/distributed_solver.profile.stderr
index 718240f5a38..efd79f66dc5 100644
--- a/benchmark/test/reference/distributed_solver.profile.stderr
+++ b/benchmark/test/reference/distributed_solver.profile.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.6.0 (develop)
-    running with core module 1.6.0 (develop)
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
 Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/distributed_solver.simple.stderr b/benchmark/test/reference/distributed_solver.simple.stderr
index 6a5dab5d844..9feb7fa9522 100644
--- a/benchmark/test/reference/distributed_solver.simple.stderr
+++ b/benchmark/test/reference/distributed_solver.simple.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.6.0 (develop)
-    running with core module 1.6.0 (develop)
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
 Running on reference(0)
 Running with 2 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/matrix_statistics.simple.stderr b/benchmark/test/reference/matrix_statistics.simple.stderr
index 69d2bbf9098..6b853c3f4ea 100644
--- a/benchmark/test/reference/matrix_statistics.simple.stderr
+++ b/benchmark/test/reference/matrix_statistics.simple.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.6.0 (develop)
-    running with core module 1.6.0 (develop)
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
 Running test case
 {
     "size": 100,
diff --git a/benchmark/test/reference/preconditioner.profile.stderr b/benchmark/test/reference/preconditioner.profile.stderr
index bd8628be212..5b47bc9bd94 100644
--- a/benchmark/test/reference/preconditioner.profile.stderr
+++ b/benchmark/test/reference/preconditioner.profile.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.6.0 (develop)
-    running with core module 1.6.0 (develop)
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
 Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/preconditioner.simple.stderr b/benchmark/test/reference/preconditioner.simple.stderr
index bfec4a697ee..d480d4fedbd 100644
--- a/benchmark/test/reference/preconditioner.simple.stderr
+++ b/benchmark/test/reference/preconditioner.simple.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.6.0 (develop)
-    running with core module 1.6.0 (develop)
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/solver.profile.stderr b/benchmark/test/reference/solver.profile.stderr
index 3d9b9a3ad10..65b7560d936 100644
--- a/benchmark/test/reference/solver.profile.stderr
+++ b/benchmark/test/reference/solver.profile.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.6.0 (develop)
-    running with core module 1.6.0 (develop)
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
 Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/solver.simple.stderr b/benchmark/test/reference/solver.simple.stderr
index 936046c4949..c5e4267a6bd 100644
--- a/benchmark/test/reference/solver.simple.stderr
+++ b/benchmark/test/reference/solver.simple.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.6.0 (develop)
-    running with core module 1.6.0 (develop)
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
 Running on reference(0)
 Running with 2 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/sparse_blas.profile.stderr b/benchmark/test/reference/sparse_blas.profile.stderr
index 66c67cf84ea..d05f5117b8e 100644
--- a/benchmark/test/reference/sparse_blas.profile.stderr
+++ b/benchmark/test/reference/sparse_blas.profile.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.6.0 (develop)
-    running with core module 1.6.0 (develop)
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
 Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/sparse_blas.simple.stderr b/benchmark/test/reference/sparse_blas.simple.stderr
index e6e0884e267..bf5001f67b7 100644
--- a/benchmark/test/reference/sparse_blas.simple.stderr
+++ b/benchmark/test/reference/sparse_blas.simple.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.6.0 (develop)
-    running with core module 1.6.0 (develop)
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/spmv.profile.stderr b/benchmark/test/reference/spmv.profile.stderr
index 5a12a077bc5..961ac587990 100644
--- a/benchmark/test/reference/spmv.profile.stderr
+++ b/benchmark/test/reference/spmv.profile.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.6.0 (develop)
-    running with core module 1.6.0 (develop)
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
 Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/spmv.simple.stderr b/benchmark/test/reference/spmv.simple.stderr
index 1bb4472bce6..dc9933b40ec 100644
--- a/benchmark/test/reference/spmv.simple.stderr
+++ b/benchmark/test/reference/spmv.simple.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.6.0 (develop)
-    running with core module 1.6.0 (develop)
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42

From ccabb4d052a966b7f073e24b1c0dba024865d851 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Wed, 21 Jun 2023 14:16:06 +0200
Subject: [PATCH 044/583] review updates

- add missing newline
- remove disable test outputs
- fix docstrings
- fix duplicate matrix_statistics test

Co-authored-by: Yuhsiang M. Tsai <yhmtsai@gmail.com>
---
 .gitlab-ci.yml                                |    2 +-
 benchmark/blas/blas.cpp                       |    3 +-
 benchmark/test/matrix_statistics.py           |    2 +-
 benchmark/test/reference/blas.profile.stderr  |    3 +-
 benchmark/test/reference/blas.simple.stderr   |    3 +-
 .../multi_vector_distributed.profile.stderr   |  808 ------
 .../multi_vector_distributed.profile.stdout   |   29 -
 .../multi_vector_distributed.simple.stderr    |   76 -
 .../multi_vector_distributed.simple.stdout    |   29 -
 .../reference/spmv_distributed.profile.stderr | 2380 -----------------
 .../reference/spmv_distributed.profile.stdout |   21 -
 .../reference/spmv_distributed.simple.stderr  |   34 -
 .../reference/spmv_distributed.simple.stdout  |   21 -
 benchmark/test/test_framework.py.in           |   13 +-
 14 files changed, 17 insertions(+), 3407 deletions(-)
 delete mode 100644 benchmark/test/reference/multi_vector_distributed.profile.stderr
 delete mode 100644 benchmark/test/reference/multi_vector_distributed.profile.stdout
 delete mode 100644 benchmark/test/reference/multi_vector_distributed.simple.stderr
 delete mode 100644 benchmark/test/reference/multi_vector_distributed.simple.stdout
 delete mode 100644 benchmark/test/reference/spmv_distributed.profile.stderr
 delete mode 100644 benchmark/test/reference/spmv_distributed.profile.stdout
 delete mode 100644 benchmark/test/reference/spmv_distributed.simple.stderr
 delete mode 100644 benchmark/test/reference/spmv_distributed.simple.stdout

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index d899ff00ad0..eafeae20729 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -615,7 +615,7 @@ build/dpcpp/2022-1/cpu/release/static:
     BUILD_DPCPP: "ON"
     BUILD_TYPE: "Release"
     BUILD_SHARED_LIBS: "ON"
-    ONEAPI_DEVICE_SELECTOR: "*:cpu"
+    SYCL_DEVICE_TYPE: "CPU"
     SLURM_PARTITION: "cpu"
     SLURM_TIME: "2:00:00"
     # This job is not in exclusive mode
diff --git a/benchmark/blas/blas.cpp b/benchmark/blas/blas.cpp
index ee2dc06d01b..11228ed5818 100644
--- a/benchmark/blas/blas.cpp
+++ b/benchmark/blas/blas.cpp
@@ -133,7 +133,8 @@ Parameters for a benchmark case are:
     std::string format = example_config;
     initialize_argument_parsing(&argc, &argv, header, format);
 
-    std::string extra_information = "The operations are " + FLAGS_operations;
+    std::string extra_information =
+        "The operations are " + FLAGS_operations + "\n";
     print_general_information(extra_information);
     auto exec = executor_factory.at(FLAGS_executor)(FLAGS_gpu_timer);
 
diff --git a/benchmark/test/matrix_statistics.py b/benchmark/test/matrix_statistics.py
index 365cfe025dd..a29c80a0a7a 100755
--- a/benchmark/test/matrix_statistics.py
+++ b/benchmark/test/matrix_statistics.py
@@ -19,7 +19,7 @@
 
 # input file
 test_framework.compare_output(
-    ["-input", '[{"size": 100, "stencil": "7pt"}]'],
+    ["-input", str(test_framework.sourcepath / "input.mtx.json")],
     expected_stdout="matrix_statistics.simple.stdout",
     expected_stderr="matrix_statistics.simple.stderr",
 )
diff --git a/benchmark/test/reference/blas.profile.stderr b/benchmark/test/reference/blas.profile.stderr
index b697ad41392..abc496b0921 100644
--- a/benchmark/test/reference/blas.profile.stderr
+++ b/benchmark/test/reference/blas.profile.stderr
@@ -3,7 +3,8 @@ This is Ginkgo 1.7.0 (develop)
 Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
-The operations are copy,axpy,scalRunning test case
+The operations are copy,axpy,scal
+Running test case
 {
     "n": 100,
     "blas": {}
diff --git a/benchmark/test/reference/blas.simple.stderr b/benchmark/test/reference/blas.simple.stderr
index 02b6f94ba28..9508b0dcf1e 100644
--- a/benchmark/test/reference/blas.simple.stderr
+++ b/benchmark/test/reference/blas.simple.stderr
@@ -3,7 +3,8 @@ This is Ginkgo 1.7.0 (develop)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
-The operations are copy,axpy,scalRunning test case
+The operations are copy,axpy,scal
+Running test case
 {
     "n": 100,
     "blas": {}
diff --git a/benchmark/test/reference/multi_vector_distributed.profile.stderr b/benchmark/test/reference/multi_vector_distributed.profile.stderr
deleted file mode 100644
index 3cf18472311..00000000000
--- a/benchmark/test/reference/multi_vector_distributed.profile.stderr
+++ /dev/null
@@ -1,808 +0,0 @@
-This is Ginkgo 1.6.0 (develop)
-    running with core module 1.6.0 (develop)
-Running on reference(0)
-Running with 0 warm iterations and 1 running iterations
-The random seed for right hand sides is 42
-The operations are copy,axpy,scalRunning test case
-{
-    "n": 100,
-    "blasDEBUG: begin n = 100 
-DEBUG: begin copy
-": {}
-}
-DEBUG: begin n = 100 
-DEBUG: begin copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_sizeDEBUG: begin n = 100 
-DEBUG: begin copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocate
-DEBUG: end   DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
-allocate
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indices
-DEBUG: begin copyDEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indices
-DEBUG: begin copyDEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indices
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: end   copy
-DEBUG: begin free
-
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   DEBUG: begin free
-DEBUG: end   freefree
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin 
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocate
-DEBUG: end   DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocate
-allocate
-DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_arrayDEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indices
-DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indices
-DEBUG: begin copy
-DEBUG: end   partition::build_starting_indices
-DEBUG: end   partition::build_starting_indices
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin freecopy
-DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   free
-
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-free
-DEBUG: end   free
-DEBUG: begin dense::fill
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-DEBUG: end   dense::fill
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: begin free
-DEBUG: end   DEBUG: begin free
-DEBUG: end   free
-free
-DEBUG: begin free
-DEBUG: end   DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   copy
-Current state:
-[
-  free
-DEBUG: end   copy
-DEBUG: begin axpy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: end   copy
-DEBUG: begin axpy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate  {
-        "n": DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocate
-DEBUG: end   
-DEBUG: end   allocate
-DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocate
-DEBUG: end   allocate
-allocate
-DEBUG: begin allocate
-DEBUG: end   DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-100,
-        "blas": {
-           allocateDEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin  "copy": {
-     
-DEBUG: begin allocate
-components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin           DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indices
-DEBUG: begin copy
- "time": 0.0000components::fill_array
-DEBUG: end   components::fill_arrayDEBUG: end   copy
-DEBUG: begin free
-08,
-       
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-         "flops": 12DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-DEBUG: end   DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin 500000.0,
-   partition::build_starting_indices
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin freefree
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin partition::build_ranges_from_global_size
-          
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin    "bandwidth": 200000000.0,
-         copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-allocate
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array       "repetitiDEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguousons": 1,
-   DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin partition::build_ranges_from_global_size         
-DEBUG: end   partition::build_ranges_from_global_size
-
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indices
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin allocate
-    "completed": tDEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-rue
-  DEBUG: end   allocate
-DEBUG: begin allocateDEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin           }
-   
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_arrayfree
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin      }
-    }
-]
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin 
-DEBUG: begin axpy
-DEBUG: begin allocatecomponents::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indices
-DEBUG: begin copypartition::build_ranges_from_global_size
-DEBUG: begin allocate
-DEBUG: end   
-DEBUG: end   copyDEBUG: begin dense::add_scaled
-DEBUG: end   dense::add_scaled
-allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin 
-DEBUG: begin allocate
-DEBUG: end   components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_arrayallocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-free
-DEBUG: end   DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-partition::build_starting_indices
-DEBUG: begin copy
-DEBUG: end   DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-copy
-DEBUG: begin free
-DEBUG: end   DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin DEBUG: begin dense::add_scaled
-DEBUG: end   dense::add_scaled
-free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indices
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-DEBUG: begin dense::add_scaled
-DEBUG: end   dense::add_scaled
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin DEBUG: begin free
-DEBUG: end   freeDEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   free
-DEBUG: end   axpy
-DEBUG: begin scal
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin DEBUG: end   free
-DEBUG: end   axpy
-DEBUG: begin scal
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   free
-DEBUG: end   axpy
-allocate
-DEBUG: end   allocate
-DEBUG: begin partition::build_ranges_from_global_sizeallocate
-DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_sizeCurrent state:
-[
-    {
-   
-DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin 
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end        "n"allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocateallocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::fill_array: 10
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-0,
- components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indices
-DEBUG: begin copy
-DEBUG: end   copy       "blas": {
-  DEBUG: end   partition::build_starting_indices
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy       
-DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: begin free
-DEBUG: end   free
-   "copy": {
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin freefree
-DEBUG: begin free
-
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   
-       DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-DEBUG: begin dense::fill       
-DEBUG: end   dense::fill
-DEBUG: begin dense::scale
-DEBUG: end   dense::scalefree
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-  "ti
-DEBUG: begin dense::scale
-DEBUG: end   dense::scale
-me": 0.000008,
-                "flops": 12500000.0,
-                "bandwidth": 200000000.0,
-                "repetitions": 1,
-                "completed": true
-            },
-            "axpy": {
-                "time": 0.00002,
-                "flops": 10000000.0,
-                "bandwidth": 119999999.99999999,
-                "repetitions": 1,
-                "completed": true
-            }
-        }
-    }
-]
-DEBUG: begin scal
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indices
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-DEBUG: begin dense::scale
-DEBUG: end   dense::scale
-DEBUG: begin free
-DEBUG: end   freeDEBUG: begin free
-DEBUG: end   free
-DEBUG: begin freeDEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   scal
-DEBUG: end   n = 100 
-
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   scal
-Current state:
-[
-    {
-        "n": 100,
-      
-DEBUG: end   free
-DEBUG: end   scal
-DEBUG: end   n = 100 
-  "blas": {
-            "copy": {
-                "time": 0.000008,
-                "flops": 12500000.0,
-                "bandwidth": 200000000.0,
-                "repetitions": 1,
-                "completed": true
-            },
-            "axpy": {
-                "time": 0.00002,
-                "flops": 10000000.0,
-                "bandwidth": 119999999.99999999,
-                "repetitions": 1,
-                "completed": true
-            },
-            "scal": {
-                "time": 0.000006,
-                "flops": 16666666.666666666,
-                "bandwidth": 266666666.66666666,
-                "repetitions": 1,
-                "completed": true
-            }
-        }
-    }
-]
-DEBUG: end   n = 100 
diff --git a/benchmark/test/reference/multi_vector_distributed.profile.stdout b/benchmark/test/reference/multi_vector_distributed.profile.stdout
deleted file mode 100644
index 3a2e7e54f80..00000000000
--- a/benchmark/test/reference/multi_vector_distributed.profile.stdout
+++ /dev/null
@@ -1,29 +0,0 @@
-
-[
-    {
-        "n": 100,
-        "blas": {
-            "copy": {
-                "time": 1.0,
-                "flops": 1.0,
-                "bandwidth": 1.0,
-                "repetitions": 1,
-                "completed": true
-            },
-            "axpy": {
-                "time": 1.0,
-                "flops": 1.0,
-                "bandwidth": 1.0,
-                "repetitions": 1,
-                "completed": true
-            },
-            "scal": {
-                "time": 1.0,
-                "flops": 1.0,
-                "bandwidth": 1.0,
-                "repetitions": 1,
-                "completed": true
-            }
-        }
-    }
-]
diff --git a/benchmark/test/reference/multi_vector_distributed.simple.stderr b/benchmark/test/reference/multi_vector_distributed.simple.stderr
deleted file mode 100644
index 72a2fbb9b90..00000000000
--- a/benchmark/test/reference/multi_vector_distributed.simple.stderr
+++ /dev/null
@@ -1,76 +0,0 @@
-This is Ginkgo 1.6.0 (develop)
-    running with core module 1.6.0 (develop)
-Running on reference(0)
-Running with 2 warm iterations and 10 running iterations
-The random seed for right hand sides is 42
-The operations are copy,axpy,scalRunning test case
-{
-    "n": 100,
-    "blas": {}
-}
-Current state:
-[
-    {
-        "n": 100,
-        "blas": {
-            "copy": {
-                "time": 1.0,
-                "flops": 1.0,
-                "bandwidth": 1.0,
-                "repetitions": 10,
-                "completed": true
-            }
-        }
-    }
-]
-Current state:
-[
-    {
-        "n": 100,
-        "blas": {
-            "copy": {
-                "time": 1.0,
-                "flops": 1.0,
-                "bandwidth": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "axpy": {
-                "time": 1.0,
-                "flops": 1.0,
-                "bandwidth": 1.0,
-                "repetitions": 10,
-                "completed": true
-            }
-        }
-    }
-]
-Current state:
-[
-    {
-        "n": 100,
-        "blas": {
-            "copy": {
-                "time": 1.0,
-                "flops": 1.0,
-                "bandwidth": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "axpy": {
-                "time": 1.0,
-                "flops": 1.0,
-                "bandwidth": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "scal": {
-                "time": 1.0,
-                "flops": 1.0,
-                "bandwidth": 1.0,
-                "repetitions": 10,
-                "completed": true
-            }
-        }
-    }
-]
diff --git a/benchmark/test/reference/multi_vector_distributed.simple.stdout b/benchmark/test/reference/multi_vector_distributed.simple.stdout
deleted file mode 100644
index 08e692727fe..00000000000
--- a/benchmark/test/reference/multi_vector_distributed.simple.stdout
+++ /dev/null
@@ -1,29 +0,0 @@
-
-[
-    {
-        "n": 100,
-        "blas": {
-            "copy": {
-                "time": 1.0,
-                "flops": 1.0,
-                "bandwidth": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "axpy": {
-                "time": 1.0,
-                "flops": 1.0,
-                "bandwidth": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "scal": {
-                "time": 1.0,
-                "flops": 1.0,
-                "bandwidth": 1.0,
-                "repetitions": 10,
-                "completed": true
-            }
-        }
-    }
-]
diff --git a/benchmark/test/reference/spmv_distributed.profile.stderr b/benchmark/test/reference/spmv_distributed.profile.stderr
deleted file mode 100644
index b190ac8a458..00000000000
--- a/benchmark/test/reference/spmv_distributed.profile.stderr
+++ /dev/null
@@ -1,2380 +0,0 @@
-This is Ginkgo 1.6.0 (develop)
-    running with core module 1.6.0 (develop)
-Running on reference(0)
-Running with 0 warm iterations and 1 running iterations
-The random seed for right hand sides is 42
-The formats are [csr]x[csr]
-The number of right hand sides is 1
-Running test case
-{
-    "size": 100,
-    "stencil": "7pt",
-    "comm_pattern": "stencil",
-    "spmv": {}
-}
-DEBUG: begin stencil(100,7pt,stencil)DEBUG: begin stencil(100,7pt,stencil)
-
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_sizeDEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin stencil(100,7pt,stencil)
-
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocateDEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indicesDEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indices
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin DEBUG: begin allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocatepartition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indices
-
-DEBUG: begin allocate
-DEBUG: end   DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-allocate
-DEBUG: begin components::aos_to_soa
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   freeDEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin DEBUG: end   components::aos_to_soa
-
-DEBUG: begin copy
-DEBUG: end   copycomponents::aos_to_soa
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::fill
-DEBUG: end   
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   components::aos_to_soa
-dense::fill
-DEBUG: begin dense::fill_in_matrix_data
-DEBUG: end   dense::fill_in_matrix_data
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: end   allocate
-DEBUG: begin components::aos_to_soa
-DEBUG: begin dense::fill
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   components::aos_to_soa
-DEBUG: begin allocate
-DEBUG: end   freeDEBUG: end   allocate
-DEBUG: end   dense::fill
-DEBUG: begin dense::fill_in_matrix_data
-
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-DEBUG: begin dense::fill_in_matrix_data
-DEBUG: end   dense::fill_in_matrix_data
-DEBUG: begin free
-DEBUG: end   dense::fill_in_matrix_data
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin freeDEBUG: begin free
-DEBUG: end   free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   DEBUG: end   free
-free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   allocate
-DEBUG: end   allocate
-partition::build_ranges_from_global_size
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocateDEBUG: begin DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin 
-DEBUG: end   allocate
-DEBUG: begin allocatefree
-DEBUG: end   free
-allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_arrayDEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   
-DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_size
-
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indices
-
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   partition::build_starting_indices
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::aos_to_soa
-DEBUG: end   components::aos_to_soa
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::fill
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indices
-DEBUG: begin copy
-DEBUG: end   DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::aos_to_soa
-DEBUG: end   components::aos_to_soa
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::fill
-DEBUG: end   dense::fillDEBUG: end   dense::fill
-DEBUG: begin dense::fill_in_matrix_data
-DEBUG: end   dense::fill_in_matrix_data
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin 
-DEBUG: begin dense::fill_in_matrix_data
-DEBUG: end   dense::fill_in_matrix_data
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-allocate
-DEBUG: end   allocate
-DEBUG: begin DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   freeallocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::aos_to_soa
-DEBUG: end   components::aos_to_soa
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin 
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-free
-Matrix is of size (81, 81)
-DEBUG: begin dense::fill_in_matrix_data
-DEBUG: end   dense::fill_in_matrix_data
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin DEBUG: begin copy(<typename>)
-DEBUG: begin copy(<typename>)
-free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   freeDEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin allocateDEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: begin free
-DEBUG: end   free
-
-DEBUG: end   allocate
-DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin partition::build_ranges_from_global_sizeDEBUG: begin copy(<typename>)
-DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-
-DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copyDEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   dense::copy
-allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin 
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::fill_arrayDEBUG: end   copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin partition::build_ranges_from_global_size
-components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_arrayDEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocateDEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocatepartition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indices
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   partition::build_starting_indices
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin free
-DEBUG: begin copy(<typename>)
-DEBUG: begin copycomponents::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: end   free
-partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-DEBUG: end   DEBUG: begin copy
-DEBUG: end   copy
-partition::build_starting_indices
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin freeDEBUG: end   copy(<typename>)
-DEBUG: begin copy(<typename>)DEBUG: begin copy(<typename>)
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: end   free
-
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy
-
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy(<typename>)
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: end   copy(<typename>)DEBUG: end   copy(<typename>)DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-
-DEBUG: begin copy(<typename>)
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-DEBUG: begin allocate
-DEBUG: end   
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin copy(<typename>)
-allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: begin copy
-DEBUG: end   copyDEBUG: end   allocate
-DEBUG: begin components::aos_to_soaDEBUG: end   copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy
-
-DEBUG: end   components::aos_to_soa
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-
-DEBUG: begin distributed_matrix::build_local_nonlocal
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: begin dense::fill
-DEBUG: end   dense::fill
-
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::aos_to_soa
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocateDEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   DEBUG: end   components::aos_to_soa
-allocate
-DEBUG: begin free
-DEBUG: begin allocate
-DEBUG: end   allocate
-
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin distributed_matrix::build_local_nonlocal
-
-DEBUG: begin components::aos_to_soa
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   DEBUG: end   components::aos_to_soa
-DEBUG: begin allocate
-DEBUG: end   allocate
-allocate
-DEBUG: begin free
-DEBUG: begin distributed_matrix::build_local_nonlocal
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin freefree
-DEBUG: begin allocate
-DEBUG: end   free
-DEBUG: begin allocateDEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocateDEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   
-DEBUG: end   allocate
-DEBUG: begin freeDEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   free
-
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin freeallocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: begin free
-DEBUG: end   DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-
-DEBUG: end   allocate
-DEBUG: begin freefree
-DEBUG: begin allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   freeDEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-
-DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin freeDEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocateDEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin 
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   freeDEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin freeallocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   freeallocate
-
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   freeDEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin 
-DEBUG: begin free
-DEBUG: end   free
-allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocateallocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   distributed_matrix::build_local_nonlocal
-DEBUG: begin copy
-DEBUG: end   copy
-
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: begin copy
-DEBUG: end   copy
-allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin 
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin DEBUG: begin free
-DEBUG: end   allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   free
-DEBUG: begin freeallocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   distributed_matrix::build_local_nonlocal
-DEBUG: begin copy
-allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin freeDEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin 
-DEBUG: begin copyfree
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-
-DEBUG: end   copy
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   distributed_matrix::build_local_nonlocal
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   freeDEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: begin free
-DEBUG: end   
-DEBUG: begin free
-DEBUG: end   free
-free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   freeDEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin freeDEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin freeDEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin apply(<typename>)
-DEBUG: begin apply(<typename>)
-
-DEBUG: end   free
-DEBUG: begin apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::row_gather
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-DEBUG: end   dense::row_gather
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-DEBUG: begin apply(<typename>)
-DEBUG: begin apply(<typename>)
-DEBUG: begin apply(<typename>)
-DEBUG: begin coo::spmv
-DEBUG: begin coo::spmv
-DEBUG: end   coo::spmv
-DEBUG: end   coo::spmv
-DEBUG: end   DEBUG: end   apply(<typename>)
-DEBUG: begin coo::spmv
-DEBUG: end   apply(<typename>)
-coo::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin coo::advanced_spmv
-DEBUG: end   DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin coo::advanced_spmv
-DEBUG: end   coo::advanced_spmv
-DEBUG: end   DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin coo::advanced_spmv
-coo::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-advanced_apply(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   coo::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   apply(<typename>)DEBUG: end   apply(<typename>)
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   freefree
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: begin csr-csr
-DEBUG: begin allocate
-DEBUG: end   allocate
-
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin csr-csrfree
-DEBUG: end   free
-DEBUG: begin csr-csr
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   
-DEBUG: begin partition::build_ranges_from_global_size
-DEBUG: end   partition::build_ranges_from_global_sizeallocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-partition::build_ranges_from_global_size
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_arrayallocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indicesDEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-
-DEBUG: end   partition::build_starting_indices
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   components::fill_array
-DEBUG: begin components::fill_array
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin 
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indices
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   DEBUG: end   components::fill_array
-DEBUG: begin partition::build_from_contiguous
-DEBUG: end   partition::build_from_contiguous
-DEBUG: begin partition::build_starting_indices
-DEBUG: end   partition::build_starting_indices
-DEBUG: begin copy
-DEBUG: end   copycomponents::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
-free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::fill_arrayDEBUG: end   components::fill_array
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-
-DEBUG: end   components::fill_array
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: end   copy(<typename>)DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy(<typename>)
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin copy
-DEBUG: end   copy
-
-DEBUG: begin copy
-DEBUG: end   copyDEBUG: begin copy(<typename>)
-DEBUG: begin copy
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copyDEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: end   copy(<typename>)
-DEBUG: begin 
-DEBUG: end   copy(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copyallocate
-DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::fill
-DEBUG: end   dense::fill
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin allocateDEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy
-
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::aos_to_soa
-DEBUG: end   components::aos_to_soa
-
-DEBUG: end   allocate
-DEBUG: begin dense::fillDEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-
-DEBUG: end   dense::fill
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: end   copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::fill
-DEBUG: end   DEBUG: begin distributed_matrix::build_local_nonlocal
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   dense::fill
-DEBUG: begin allocate
-DEBUG: begin free
-DEBUG: end   
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin components::aos_to_soa
-DEBUG: end   components::aos_to_soa
-DEBUG: begin allocate
-DEBUG: end   allocate
-allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocatefree
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocateDEBUG: begin distributed_matrix::build_local_nonlocal
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   allocate
-DEBUG: begin components::aos_to_soa
-DEBUG: end   components::aos_to_soa
-DEBUG: begin allocate
-DEBUG: end   
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin 
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-allocate
-DEBUG: begin distributed_matrix::build_local_nonlocal
-DEBUG: begin allocate
-DEBUG: end   allocate
-free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocateDEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin 
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   DEBUG: end   free
-DEBUG: begin allocate
-free
-DEBUG: begin allocate
-
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin freeDEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin freeDEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin 
-DEBUG: begin allocate
-DEBUG: end   allocate
-allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocateallocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin 
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocateallocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   
-DEBUG: begin free
-DEBUG: end   free
-
-DEBUG: begin allocate
-DEBUG: end   allocateallocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocateDEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocateallocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   
-DEBUG: end   allocate
-DEBUG: begin allocate
-allocate
-DEBUG: end   allocate
-DEBUG: begin freeallocate
-DEBUG: begin free
-DEBUG: end   DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin 
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   freefree
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   allocate
-DEBUG: end   allocate
-DEBUG: begin 
-DEBUG: begin allocate
-DEBUG: end   allocatefree
-DEBUG: begin free
-allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocateDEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   
-DEBUG: begin allocate
-DEBUG: end   allocate
-
-DEBUG: end   allocate
-DEBUG: begin allocatefree
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocateDEBUG: begin free
-DEBUG: end   free
-DEBUG: end   free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-distributed_matrix::build_local_nonlocal
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin components::convert_idxs_to_ptrs
-DEBUG: end   components::convert_idxs_to_ptrs
-DEBUG: begin freeDEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: begin free
-DEBUG: end   free
-DEBUG: begin 
-DEBUG: begin allocate
-DEBUG: end   allocate
-
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin components::convert_idxs_to_ptrs
-DEBUG: end   components::convert_idxs_to_ptrs
-DEBUG: begin free
-DEBUG: end   free
-free
-DEBUG: end   free
-DEBUG: end   distributed_matrix::build_local_nonlocal
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: begin copy
-DEBUG: end   copy
-DEBUG: end   free
-DEBUG: begin components::convert_idxs_to_ptrs
-DEBUG: end   components::convert_idxs_to_ptrs
-DEBUG: begin 
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocatefree
-DEBUG: end   free
-DEBUG: begin 
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin components::convert_idxs_to_ptrs
-allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: end   components::convert_idxs_to_ptrs
-DEBUG: begin free
-DEBUG: end   
-DEBUG: begin allocate
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   distributed_matrix::build_local_nonlocal
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin components::convert_idxs_to_ptrs
-DEBUG: end   components::convert_idxs_to_ptrs
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin components::convert_idxs_to_ptrs
-DEBUG: end   components::convert_idxs_to_ptrs
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy
-DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-free
-DEBUG: end   free
-DEBUG: begin DEBUG: begin free
-DEBUG: end   freefree
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   freeDEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy(<typename>)
-free
-DEBUG: begin free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin DEBUG: end   free
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin apply(<typename>)
-DEBUG: begin allocate
-copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   DEBUG: end   allocate
-DEBUG: begin DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin apply(<typename>)
-DEBUG: begin DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-allocate
-DEBUG: end   allocate
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin apply(<typename>)
-DEBUG: begin DEBUG: begin apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   DEBUG: end   dense::compute_squared_norm2
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin allocate
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::add_scaled
-DEBUG: end   allocate
-DEBUG: begin dense::add_scaled
-DEBUG: end   dense::add_scaled
-DEBUG: begin DEBUG: begin dense::add_scaled
-DEBUG: end   dense::add_scaled
-DEBUG: begin allocate
-DEBUG: end   allocate
-allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: end   dense::add_scaled
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin dense::compute_squared_norm2
-DEBUG: end   dense::compute_squared_norm2
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin copy(<typename>)
-DEBUG: begin dense::compute_sqrt
-DEBUG: end   dense::compute_sqrt
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocateDEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)dense::copy
-DEBUG: end   copy(<typename>)
-dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin freecopy(<typename>)
-DEBUG: begin free
-DEBUG: end   freeDEBUG: end   copy(<typename>)
-DEBUG: begin free
-DEBUG: end   
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-free
-DEBUG: begin free
-DEBUG: end   free
-free
-DEBUG: end   free
-DEBUG: begin DEBUG: end   free
-DEBUG: begin copy(<typename>)
-DEBUG: begin DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocatefree
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin copy(<typename>)
-allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-
-DEBUG: begin dense::copy
-DEBUG: end   dense::copyDEBUG: begin allocate
-DEBUG: end   allocateDEBUG: begin apply(<typename>)
-DEBUG: begin 
-DEBUG: end   copy(<typename>)
-DEBUG: begin apply(<typename>)
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   DEBUG: begin apply(<typename>)
-apply(<typename>)
-DEBUG: begin dense::row_gather
-DEBUG: end   dense::row_gather
-csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin apply(<typename>)
-DEBUG: begin csr::spmv
-DEBUG: end   csr::spmv
-DEBUG: end   DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-apply(<typename>)
-csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   apply(<typename>)
-DEBUG: begin advanced_apply(<typename>)
-DEBUG: begin csr::advanced_spmv
-DEBUG: end   csr::advanced_spmv
-DEBUG: end   advanced_apply(<typename>)
-DEBUG: end   apply(<typename>)
-apply(<typename>)
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-free
-DEBUG: begin free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   freeDEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin freeDEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin freeDEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   csr-csr
-DEBUG: end   free
-DEBUG: end   csr-csr
-Current state:
-[
-    {
-DEBUG: end   free
-DEBUG: end   csr-csr
-DEBUG: begin free
-DEBUG: end   free
-
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-        "size":DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   stencil(100,7pt,stencil)
-DEBUG: end   free
-DEBUG: end   stencil(100,7pt,stencil)
- 81,
-        "stencil": "7pt",
-        "comm_pattern": "stencil",
-        "spmv": {
-            "csr-csr": {
-                "storage": 6420,
-                "max_relative_norm2": 0.0,
-                "time": 0.000046,
-                "repetitions": 1,
-                "completed": true
-            }
-        },
-        "nnz": 144,
-        "optimal": {}
-    }
-]
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: end   stencil(100,7pt,stencil)
diff --git a/benchmark/test/reference/spmv_distributed.profile.stdout b/benchmark/test/reference/spmv_distributed.profile.stdout
deleted file mode 100644
index 5512866fdf0..00000000000
--- a/benchmark/test/reference/spmv_distributed.profile.stdout
+++ /dev/null
@@ -1,21 +0,0 @@
-
-[
-    {
-        "size": 81,
-        "stencil": "7pt",
-        "comm_pattern": "stencil",
-        "spmv": {
-            "csr-csr": {
-                "storage": 6420,
-                "max_relative_norm2": 1.0,
-                "time": 1.0,
-                "repetitions": 1,
-                "completed": true
-            }
-        },
-        "nnz": 144,
-        "optimal": {
-            "spmv": "csr-csr"
-        }
-    }
-]
diff --git a/benchmark/test/reference/spmv_distributed.simple.stderr b/benchmark/test/reference/spmv_distributed.simple.stderr
deleted file mode 100644
index 7fa9aeb581f..00000000000
--- a/benchmark/test/reference/spmv_distributed.simple.stderr
+++ /dev/null
@@ -1,34 +0,0 @@
-This is Ginkgo 1.6.0 (develop)
-    running with core module 1.6.0 (develop)
-Running on reference(0)
-Running with 2 warm iterations and 10 running iterations
-The random seed for right hand sides is 42
-The formats are [csr]x[csr]
-The number of right hand sides is 1
-Running test case
-{
-    "size": 100,
-    "stencil": "7pt",
-    "comm_pattern": "stencil",
-    "spmv": {}
-}
-Matrix is of size (81, 81)
-Current state:
-[
-    {
-        "size": 81,
-        "stencil": "7pt",
-        "comm_pattern": "stencil",
-        "spmv": {
-            "csr-csr": {
-                "storage": 6420,
-                "max_relative_norm2": 1.0,
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            }
-        },
-        "nnz": 144,
-        "optimal": {}
-    }
-]
diff --git a/benchmark/test/reference/spmv_distributed.simple.stdout b/benchmark/test/reference/spmv_distributed.simple.stdout
deleted file mode 100644
index 7b6e0883c14..00000000000
--- a/benchmark/test/reference/spmv_distributed.simple.stdout
+++ /dev/null
@@ -1,21 +0,0 @@
-
-[
-    {
-        "size": 81,
-        "stencil": "7pt",
-        "comm_pattern": "stencil",
-        "spmv": {
-            "csr-csr": {
-                "storage": 6420,
-                "max_relative_norm2": 1.0,
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            }
-        },
-        "nnz": 144,
-        "optimal": {
-            "spmv": "csr-csr"
-        }
-    }
-]
diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in
index 09986fabdf1..16a30c35410 100644
--- a/benchmark/test/test_framework.py.in
+++ b/benchmark/test/test_framework.py.in
@@ -35,8 +35,8 @@ def sanitize_json_key_value(key: str, value, sanitize_all: bool):
 
     Strings with a key in empty_string_paths will be emptied
     Numbers with a key in denumberify_paths will be set to 1.0
-
     """
+
     if key in empty_string_paths and isinstance(value, str):
         return ""
     if key in denumberify_paths and isinstance(value, float):
@@ -48,11 +48,14 @@ def sanitize_json_key_value(key: str, value, sanitize_all: bool):
     return sanitize_json(value, sanitize_all)
 
 
-def sanitize_json(parsed_input, sanitize_all=False):
+def sanitize_json(parsed_input, sanitize_all: bool = False):
     """Removes non-deterministic parts of a parsed JSON input.
 
     If sanitize_all is set to True, all nested float values will be set to 0.
-    Otherwise, the sanitation"""
+    Otherwise, only JSON object entries will be sanitized
+    using sanitize_json_key_value.
+    """
+
     if isinstance(parsed_input, dict):
         return {
             key: sanitize_json_key_value(key, value, sanitize_all)
@@ -73,7 +76,8 @@ def sanitize_json_in_text(lines: list[str]) -> list[str]:
     (recognized by a single [, {, } or ] in an otherwise empty line).
     The JSON output will be parsed and sanitized through sanitize_json(...)
     and pretty-printed to replace the original JSON input.
-    The function returns the resulting output"""
+    The function returns the resulting output.
+    """
 
     json_begins = [i for i, l in enumerate(lines) if l in ["[", "{"]]
     json_ends = [i + 1 for i, l in enumerate(lines) if l in ["]", "}"]]
@@ -116,6 +120,7 @@ def determinize_text(
     if it can be parsed correctly.
     The output is guaranteed to end with an empty line.
     """
+
     lines = input.splitlines()
     output_lines = []
     patterns = [re.compile(pattern) for pattern in ignore_patterns]

From 1b648448905ff513b99e7bef8893f683e08247ea Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Wed, 19 Jul 2023 15:21:34 +0200
Subject: [PATCH 045/583] support older python versions

---
 benchmark/test/test_framework.py.in | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in
index 16a30c35410..912d2f0d203 100644
--- a/benchmark/test/test_framework.py.in
+++ b/benchmark/test/test_framework.py.in
@@ -2,7 +2,7 @@
 import subprocess
 import difflib
 import json
-import typing
+from typing import List, Tuple
 import re
 import pathlib
 import sys
@@ -69,7 +69,7 @@ def sanitize_json(parsed_input, sanitize_all: bool = False):
         return parsed_input
 
 
-def sanitize_json_in_text(lines: list[str]) -> list[str]:
+def sanitize_json_in_text(lines: List[str]) -> List[str]:
     """Sanitizes all occurrences of JSON content inside text input.
 
     Takes a list of text lines and detects any pretty-printed JSON output inside
@@ -99,7 +99,8 @@ def sanitize_json_in_text(lines: list[str]) -> list[str]:
         for begin, end, do_sanitize in combined_pairs
     ]
     reconstructed = [
-        json.dumps(sanitize_json(json.loads(t)), indent=4) if do_sanitize else t
+        json.dumps(sanitize_json(json.loads(t)),
+                   indent=4) if do_sanitize else t
         for t, do_sanitize in texts
     ]
     return "\n".join(reconstructed).split("\n")
@@ -107,9 +108,9 @@ def sanitize_json_in_text(lines: list[str]) -> list[str]:
 
 def determinize_text(
     input: str,
-    ignore_patterns: list[str],
-    replace_patterns: list[(str, str)],
-) -> list[str]:
+    ignore_patterns: List[str],
+    replace_patterns: List[Tuple[str, str]],
+) -> List[str]:
     """Sanitizes the given input string.
 
     Every input line matching an entry from ignore_patterns will be removed.
@@ -143,11 +144,11 @@ def determinize_text(
 
 
 def compare_output_impl(
-    args: list[str],
+    args: List[str],
     expected_stdout: str,
     expected_stderr: str,
     stdin: str,
-    launcher_flags: list[str],
+    launcher_flags: List[str],
 ):
     args = [sys.argv[1]] + args
     expected_stdout = str(sourcepath / "reference" / expected_stdout)
@@ -214,7 +215,8 @@ def compare_output_impl(
         print("FAIL: stdout differs")
         print(
             "\n".join(
-                difflib.unified_diff(expected_stdout_processed, result_stdout_processed)
+                difflib.unified_diff(
+                    expected_stdout_processed, result_stdout_processed)
             )
         )
         failed = True
@@ -222,7 +224,8 @@ def compare_output_impl(
         print("FAIL: stderr differs")
         print(
             "\n".join(
-                difflib.unified_diff(expected_stderr_processed, result_stderr_processed)
+                difflib.unified_diff(
+                    expected_stderr_processed, result_stderr_processed)
             )
         )
         failed = True
@@ -232,7 +235,7 @@ def compare_output_impl(
 
 
 def compare_output(
-    args: list[str], expected_stdout: str, expected_stderr: str, stdin: str = ""
+    args: List[str], expected_stdout: str, expected_stderr: str, stdin: str = ""
 ):
     compare_output_impl(
         args,

From 4fa0a5a6554f10b88cb277295443ffa99c782bbc Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Wed, 19 Jul 2023 15:21:43 +0200
Subject: [PATCH 046/583] fix typing error

---
 benchmark/test/test_framework.py.in | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in
index 912d2f0d203..6037f8c594e 100644
--- a/benchmark/test/test_framework.py.in
+++ b/benchmark/test/test_framework.py.in
@@ -129,8 +129,8 @@ def determinize_text(
         for pattern, replacement in replace_patterns:
             line = re.sub(pattern, replacement, line)
         keep = True
-        for pattern in patterns:
-            if re.match(pattern, line):
+        for compiled_pattern in patterns:
+            if re.match(compiled_pattern, line):
                 keep = False
                 break
         if keep:

From 2de207dbb10ea3d877485d111114545777eee020 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Wed, 19 Jul 2023 15:21:57 +0200
Subject: [PATCH 047/583] remove unused tests

---
 benchmark/test/CMakeLists.txt              |  5 +--
 benchmark/test/input.distributed_mtx.json  |  7 ----
 benchmark/test/multi_vector_distributed.py | 37 ----------------------
 benchmark/test/spmv_distributed.py         | 35 --------------------
 4 files changed, 1 insertion(+), 83 deletions(-)
 delete mode 100644 benchmark/test/input.distributed_mtx.json
 delete mode 100644 benchmark/test/multi_vector_distributed.py
 delete mode 100644 benchmark/test/spmv_distributed.py

diff --git a/benchmark/test/CMakeLists.txt b/benchmark/test/CMakeLists.txt
index 0a2be0e662c..e1aab6dd75d 100644
--- a/benchmark/test/CMakeLists.txt
+++ b/benchmark/test/CMakeLists.txt
@@ -22,8 +22,5 @@ add_benchmark_test(solver)
 add_benchmark_test(sparse_blas)
 add_benchmark_test(spmv)
 if (GINKGO_BUILD_MPI)
-    # the distributed tests are still failing due to unstable output
-    #add_benchmark_test(multi_vector_distributed)
-    #add_benchmark_test(spmv_distributed)
     add_benchmark_test(solver_distributed)
-endif()
\ No newline at end of file
+endif()
diff --git a/benchmark/test/input.distributed_mtx.json b/benchmark/test/input.distributed_mtx.json
deleted file mode 100644
index aca115179e6..00000000000
--- a/benchmark/test/input.distributed_mtx.json
+++ /dev/null
@@ -1,7 +0,0 @@
-[
-    {
-        "size": 100,
-        "stencil": "7pt",
-        "comm_pattern": "stencil"
-    }
-]
\ No newline at end of file
diff --git a/benchmark/test/multi_vector_distributed.py b/benchmark/test/multi_vector_distributed.py
deleted file mode 100644
index aab886ca509..00000000000
--- a/benchmark/test/multi_vector_distributed.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python3
-import test_framework
-
-# check that all input modes work:
-# parameter
-test_framework.compare_output_distributed(
-    ["-input", '[{"n": 100}]'],
-    expected_stdout="multi_vector_distributed.simple.stdout",
-    expected_stderr="multi_vector_distributed.simple.stderr",
-    num_procs=3,
-)
-
-# stdin
-test_framework.compare_output_distributed(
-    [],
-    expected_stdout="multi_vector_distributed.simple.stdout",
-    expected_stderr="multi_vector_distributed.simple.stderr",
-    stdin='[{"n": 100}]',
-    num_procs=3,
-)
-
-# file
-test_framework.compare_output_distributed(
-    ["-input", str(test_framework.sourcepath / "input.blas.json")],
-    expected_stdout="multi_vector_distributed.simple.stdout",
-    expected_stderr="multi_vector_distributed.simple.stderr",
-    stdin='[{"n": 100}]',
-    num_procs=3,
-)
-
-# profiler annotations
-# currently still unstable output and thus disabled
-# test_framework.compare_output_distributed(["-input", '[{"n": 100}]', '-profile', '-profiler_hook', 'debug'],
-#                                          expected_stdout="multi_vector_distributed.profile.stdout",
-#                                          expected_stderr="multi_vector_distributed.profile.stderr",
-#                                          stdin='[{"n": 100}]',
-#                                          num_procs=3)
diff --git a/benchmark/test/spmv_distributed.py b/benchmark/test/spmv_distributed.py
deleted file mode 100644
index 1b219b34cda..00000000000
--- a/benchmark/test/spmv_distributed.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env python3
-import test_framework
-
-# check that all input modes work:
-# parameter
-test_framework.compare_output_distributed(
-    ["-input", '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}]'],
-    expected_stdout="spmv_distributed.simple.stdout",
-    expected_stderr="spmv_distributed.simple.stderr",
-    num_procs=3,
-)
-
-# stdin
-test_framework.compare_output_distributed(
-    [],
-    expected_stdout="spmv_distributed.simple.stdout",
-    expected_stderr="spmv_distributed.simple.stderr",
-    num_procs=3,
-    stdin='[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}]',
-)
-
-# input file
-test_framework.compare_output_distributed(
-    ["-input", str(test_framework.sourcepath / "input.distributed_mtx.json")],
-    expected_stdout="spmv_distributed.simple.stdout",
-    expected_stderr="spmv_distributed.simple.stderr",
-    num_procs=3,
-)
-
-# profiler annotations
-# currently still unstable output and thus disabled
-# test_framework.compare_output_distributed(["-input", '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}]', '-profile', '-profiler_hook', 'debug'],
-#                                          expected_stdout="spmv_distributed.profile.stdout",
-#                                          expected_stderr="spmv_distributed.profile.stderr",
-#                                          num_procs=3)

From 138f44c15404ac02a1dc6a73255f56a6855d526b Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Wed, 19 Jul 2023 15:48:29 +0200
Subject: [PATCH 048/583] fix device memory access segfault

---
 test/mpi/preconditioner/schwarz.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/mpi/preconditioner/schwarz.cpp b/test/mpi/preconditioner/schwarz.cpp
index 95bfe3f66b4..8586711a114 100644
--- a/test/mpi/preconditioner/schwarz.cpp
+++ b/test/mpi/preconditioner/schwarz.cpp
@@ -162,13 +162,14 @@ class SchwarzPreconditioner : public CommonMpiTestFixture {
         std::shared_ptr<dist_vec_type> dist_vec,
         std::shared_ptr<local_vec_type> local_vec)
     {
+        auto host_row_part = row_part->clone(ref);
         auto l_dist_vec = dist_vec->get_local_vector();
         auto vec_view = local_vec_type::create_const(
             exec, l_dist_vec->get_size(),
             gko::array<value_type>::const_view(
                 exec, l_dist_vec->get_size()[0],
                 local_vec->get_const_values() +
-                    row_part->get_range_bounds()[comm.rank()]),
+                    host_row_part->get_range_bounds()[comm.rank()]),
             l_dist_vec->get_size()[1]);
         GKO_ASSERT_MTX_NEAR(l_dist_vec, vec_view.get(), r<value_type>::value);
     }

From 684df2076d97d298d74771bd1207cefac8351d6f Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Wed, 19 Jul 2023 19:38:32 +0200
Subject: [PATCH 049/583] remove deprecated SYCL environment variables

---
 .gitlab-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index eafeae20729..4ad66eca652 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -615,7 +615,7 @@ build/dpcpp/2022-1/cpu/release/static:
     BUILD_DPCPP: "ON"
     BUILD_TYPE: "Release"
     BUILD_SHARED_LIBS: "ON"
-    SYCL_DEVICE_TYPE: "CPU"
+    SYCL_DEVICE_FILTER: "*:cpu"
     SLURM_PARTITION: "cpu"
     SLURM_TIME: "2:00:00"
     # This job is not in exclusive mode

From 9b614a3c3e5d7945cfcf551aba493acd2151935e Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Thu, 30 Mar 2023 12:20:20 +0200
Subject: [PATCH 050/583] add allocator support to all executors

---
 benchmark/utils/general.hpp                   |   4 +-
 core/CMakeLists.txt                           |   1 +
 core/base/memory.cpp                          |  59 ++++
 core/device_hooks/cuda_hooks.cpp              |  44 +++
 core/device_hooks/dpcpp_hooks.cpp             |  18 ++
 core/device_hooks/hip_hooks.cpp               |  19 ++
 core/test/base/executor.cpp                   |  79 +-----
 cuda/CMakeLists.txt                           |   4 +
 cuda/base/device.cpp                          |  65 +++++
 cuda/base/executor.cpp                        | 173 ++----------
 cuda/base/memory.cpp                          | 168 +++++++++++
 cuda/base/nvtx.cpp                            |  96 +++++++
 .../stream.cpp}                               |  53 ++--
 cuda/test/base/CMakeLists.txt                 |   2 +-
 cuda/test/base/cuda_executor.cu               |  15 +-
 cuda/test/base/memory.cpp                     | 126 +++++++++
 cuda/test/utils.hpp                           |   4 +-
 devices/cuda/executor.cpp                     |  27 --
 devices/hip/executor.cpp                      |  34 ---
 devices/omp/executor.cpp                      |   7 +-
 dpcpp/base/executor.dp.cpp                    |  33 +++
 .../base/memory.dp.cpp                        |  47 ++--
 dpcpp/test/base/CMakeLists.txt                |   1 +
 dpcpp/test/base/memory.dp.cpp                 |  98 +++++++
 .../adaptiveprecision-blockjacobi.cpp         |   7 +-
 examples/cb-gmres/cb-gmres.cpp                |   7 +-
 examples/custom-logger/custom-logger.cpp      |   7 +-
 .../custom-matrix-format.cpp                  |   7 +-
 .../custom-stopping-criterion.cpp             |   7 +-
 .../ilu-preconditioned-solver.cpp             |   7 +-
 .../inverse-iteration/inverse-iteration.cpp   |   7 +-
 .../ir-ilu-preconditioned-solver.cpp          |   7 +-
 .../iterative-refinement.cpp                  |   7 +-
 .../minimal-cuda-solver.cpp                   |   2 +-
 .../mixed-multigrid-preconditioned-solver.cpp |   7 +-
 .../mixed-multigrid-solver.cpp                |   7 +-
 .../mixed-precision-ir/mixed-precision-ir.cpp |   7 +-
 examples/mixed-spmv/mixed-spmv.cpp            |   7 +-
 ...igrid-preconditioned-solver-customized.cpp |   7 +-
 .../multigrid-preconditioned-solver.cpp       |   7 +-
 .../nine-pt-stencil-solver.cpp                |   7 +-
 examples/papi-logging/papi-logging.cpp        |   7 +-
 .../performance-debugging.cpp                 |   7 +-
 examples/poisson-solver/poisson-solver.cpp    |   7 +-
 .../preconditioned-solver.cpp                 |   7 +-
 .../simple-solver-logging.cpp                 |   7 +-
 examples/simple-solver/simple-solver.cpp      |   7 +-
 .../three-pt-stencil-solver.cpp               |   7 +-
 hip/CMakeLists.txt                            |   4 +
 hip/base/device.hip.cpp                       |  67 +++++
 hip/base/executor.hip.cpp                     | 137 +--------
 hip/base/memory.hip.cpp                       |  97 +++++++
 hip/base/roctx.hip.cpp                        |  70 +++++
 hip/base/stream.hip.cpp                       |  78 +++++
 hip/test/base/CMakeLists.txt                  |   1 -
 hip/test/base/hip_executor.hip.cpp            |  14 +-
 hip/test/utils.hip.hpp                        |   4 +-
 include/ginkgo/core/base/executor.hpp         | 266 +++++-------------
 include/ginkgo/core/base/fwd_defs.hpp         |  90 ++++++
 include/ginkgo/core/base/memory.hpp           | 211 ++++++++++++++
 include/ginkgo/core/base/stream.hpp           | 124 ++++++++
 include/ginkgo/ginkgo.hpp                     |   3 +
 test/utils/executor.hpp                       |  19 +-
 test/utils/mpi/executor.hpp                   |   5 +-
 64 files changed, 1755 insertions(+), 775 deletions(-)
 create mode 100644 core/base/memory.cpp
 create mode 100644 cuda/base/device.cpp
 create mode 100644 cuda/base/memory.cpp
 create mode 100644 cuda/base/nvtx.cpp
 rename cuda/{test/base/cuda_executor_reset.cpp => base/stream.cpp} (62%)
 create mode 100644 cuda/test/base/memory.cpp
 rename hip/test/base/hip_executor_reset.cpp => dpcpp/base/memory.dp.cpp (63%)
 create mode 100644 dpcpp/test/base/memory.dp.cpp
 create mode 100644 hip/base/device.hip.cpp
 create mode 100644 hip/base/memory.hip.cpp
 create mode 100644 hip/base/roctx.hip.cpp
 create mode 100644 hip/base/stream.hip.cpp
 create mode 100644 include/ginkgo/core/base/fwd_defs.hpp
 create mode 100644 include/ginkgo/core/base/memory.hpp
 create mode 100644 include/ginkgo/core/base/stream.hpp

diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp
index 92c3e5c9b13..35077f66d4b 100644
--- a/benchmark/utils/general.hpp
+++ b/benchmark/utils/general.hpp
@@ -337,12 +337,12 @@ const std::map<std::string, std::function<std::shared_ptr<gko::Executor>(bool)>>
         {"cuda",
          [](bool) {
              return gko::CudaExecutor::create(FLAGS_device_id,
-                                              gko::OmpExecutor::create(), true);
+                                              gko::OmpExecutor::create());
          }},
         {"hip",
          [](bool) {
              return gko::HipExecutor::create(FLAGS_device_id,
-                                             gko::OmpExecutor::create(), true);
+                                             gko::OmpExecutor::create());
          }},
         {"dpcpp", [](bool use_gpu_timer) {
              auto property = dpcpp_queue_property::in_order;
diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
index 2f9643115c9..49cf89b66d6 100644
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -10,6 +10,7 @@ target_sources(ginkgo
     base/device_matrix_data.cpp
     base/executor.cpp
     base/index_set.cpp
+    base/memory.cpp
     base/mpi.cpp
     base/mtx_io.cpp
     base/perturbation.cpp
diff --git a/core/base/memory.cpp b/core/base/memory.cpp
new file mode 100644
index 00000000000..88d97bcc765
--- /dev/null
+++ b/core/base/memory.cpp
@@ -0,0 +1,59 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/base/memory.hpp>
+
+
+#include <new>
+
+
+#include <ginkgo/core/base/exception_helpers.hpp>
+
+
+namespace gko {
+
+
+void* CpuAllocator::allocate(size_type num_bytes) const
+{
+    auto ptr = ::operator new (num_bytes, std::nothrow_t{});
+    GKO_ENSURE_ALLOCATED(ptr, "cpu", num_bytes);
+    return ptr;
+}
+
+
+void CpuAllocator::deallocate(void* ptr) const
+{
+    ::operator delete (ptr, std::nothrow_t{});
+}
+
+
+}  // namespace gko
\ No newline at end of file
diff --git a/core/device_hooks/cuda_hooks.cpp b/core/device_hooks/cuda_hooks.cpp
index dd4c3f19f7c..cdecf735a9d 100644
--- a/core/device_hooks/cuda_hooks.cpp
+++ b/core/device_hooks/cuda_hooks.cpp
@@ -35,6 +35,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/memory.hpp>
+#include <ginkgo/core/base/stream.hpp>
 #include <ginkgo/core/base/timer.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/base/version.hpp>
@@ -52,6 +54,45 @@ version version_info::get_cuda_version() noexcept
 }
 
 
+void* CudaAllocator::allocate(size_type num_bytes) const GKO_NOT_COMPILED(cuda);
+
+
+void CudaAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(cuda);
+
+
+CudaAsyncAllocator::CudaAsyncAllocator(CUstream_st* stream)
+    GKO_NOT_COMPILED(cuda);
+
+
+void* CudaAsyncAllocator::allocate(size_type num_bytes) const
+    GKO_NOT_COMPILED(cuda);
+
+
+void CudaAsyncAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(cuda);
+
+
+CudaUnifiedAllocator::CudaUnifiedAllocator(int device_id, unsigned int flags)
+    GKO_NOT_COMPILED(cuda);
+
+
+void* CudaUnifiedAllocator::allocate(size_type num_bytes) const
+    GKO_NOT_COMPILED(cuda);
+
+
+void CudaUnifiedAllocator::deallocate(void* dev_ptr) const
+    GKO_NOT_COMPILED(cuda);
+
+
+CudaHostAllocator::CudaHostAllocator(int device_id) GKO_NOT_COMPILED(cuda);
+
+
+void* CudaHostAllocator::allocate(size_type num_bytes) const
+    GKO_NOT_COMPILED(cuda);
+
+
+void CudaHostAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(cuda);
+
+
 std::shared_ptr<CudaExecutor> CudaExecutor::create(
     int device_id, std::shared_ptr<Executor> master, bool device_reset,
     allocation_mode alloc_mode, CUstream_st* stream)
@@ -154,6 +195,9 @@ scoped_device_id_guard::scoped_device_id_guard(const CudaExecutor* exec,
     GKO_NOT_COMPILED(cuda);
 
 
+cuda_stream::cuda_stream() GKO_NOT_COMPILED(cuda);
+
+
 cuda_stream::cuda_stream(int device_id) GKO_NOT_COMPILED(cuda);
 
 
diff --git a/core/device_hooks/dpcpp_hooks.cpp b/core/device_hooks/dpcpp_hooks.cpp
index a08f1f608fb..0ee3e6f289f 100644
--- a/core/device_hooks/dpcpp_hooks.cpp
+++ b/core/device_hooks/dpcpp_hooks.cpp
@@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/memory.hpp>
 #include <ginkgo/core/base/timer.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/base/version.hpp>
@@ -52,6 +53,23 @@ version version_info::get_dpcpp_version() noexcept
 }
 
 
+void* DpcppAllocator::allocate_impl(sycl::queue* queue, size_type size) const
+    GKO_NOT_COMPILED(dpcpp);
+
+
+void DpcppAllocator::deallocate_impl(sycl::queue* queue, void* ptr) const
+    GKO_NOT_COMPILED(dpcpp);
+
+
+void* DpcppUnifiedAllocator::allocate_impl(sycl::queue* queue,
+                                           size_type size) const
+    GKO_NOT_COMPILED(dpcpp);
+
+
+void DpcppUnifiedAllocator::deallocate_impl(sycl::queue* queue, void* ptr) const
+    GKO_NOT_COMPILED(dpcpp);
+
+
 std::shared_ptr<DpcppExecutor> DpcppExecutor::create(
     int device_id, std::shared_ptr<Executor> master, std::string device_type,
     dpcpp_queue_property property)
diff --git a/core/device_hooks/hip_hooks.cpp b/core/device_hooks/hip_hooks.cpp
index 50637f7b3f0..739dac39f08 100644
--- a/core/device_hooks/hip_hooks.cpp
+++ b/core/device_hooks/hip_hooks.cpp
@@ -53,6 +53,22 @@ version version_info::get_hip_version() noexcept
 }
 
 
+void* HipAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip);
+
+
+void HipAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip);
+
+
+HipAsyncAllocator::HipAsyncAllocator(GKO_HIP_STREAM_STRUCT* stream)
+    GKO_NOT_COMPILED(hip);
+
+
+void* HipAsyncAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip);
+
+
+void HipAsyncAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip);
+
+
 std::shared_ptr<HipExecutor> HipExecutor::create(
     int device_id, std::shared_ptr<Executor> master, bool device_reset,
     allocation_mode alloc_mode, GKO_HIP_STREAM_STRUCT* stream)
@@ -155,6 +171,9 @@ scoped_device_id_guard::scoped_device_id_guard(const HipExecutor* exec,
     GKO_NOT_COMPILED(hip);
 
 
+hip_stream::hip_stream() GKO_NOT_COMPILED(hip);
+
+
 hip_stream::hip_stream(int device_id) GKO_NOT_COMPILED(hip);
 
 
diff --git a/core/test/base/executor.cpp b/core/test/base/executor.cpp
index 0d64dfcf3cf..71064cf01d2 100644
--- a/core/test/base/executor.cpp
+++ b/core/test/base/executor.cpp
@@ -35,6 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <thread>
 #include <type_traits>
+#include "ginkgo/core/base/memory.hpp"
 
 
 #if defined(__unix__) || defined(__APPLE__)
@@ -263,35 +264,6 @@ TEST(CudaExecutor, KnowsItsDeviceId)
 }
 
 
-TEST(CudaExecutor, CanGetDeviceResetBoolean)
-{
-    auto omp = gko::OmpExecutor::create();
-    auto cuda = gko::CudaExecutor::create(0, omp);
-
-    ASSERT_EQ(false, cuda->get_device_reset());
-}
-
-
-TEST(CudaExecutor, CanSetDefaultDeviceResetBoolean)
-{
-    auto omp = gko::OmpExecutor::create();
-    auto cuda = gko::CudaExecutor::create(0, omp, true);
-
-    ASSERT_EQ(true, cuda->get_device_reset());
-}
-
-
-TEST(CudaExecutor, CanSetDeviceResetBoolean)
-{
-    auto omp = gko::OmpExecutor::create();
-    auto cuda = gko::CudaExecutor::create(0, omp);
-
-    cuda->set_device_reset(true);
-
-    ASSERT_EQ(true, cuda->get_device_reset());
-}
-
-
 TEST(HipExecutor, KnowsItsMaster)
 {
     auto omp = gko::OmpExecutor::create();
@@ -310,35 +282,6 @@ TEST(HipExecutor, KnowsItsDeviceId)
 }
 
 
-TEST(HipExecutor, CanGetDeviceResetBoolean)
-{
-    auto omp = gko::OmpExecutor::create();
-    auto hip = gko::HipExecutor::create(0, omp);
-
-    ASSERT_EQ(false, hip->get_device_reset());
-}
-
-
-TEST(HipExecutor, CanSetDefaultDeviceResetBoolean)
-{
-    auto omp = gko::OmpExecutor::create();
-    auto hip = gko::HipExecutor::create(0, omp, true);
-
-    ASSERT_EQ(true, hip->get_device_reset());
-}
-
-
-TEST(HipExecutor, CanSetDeviceResetBoolean)
-{
-    auto omp = gko::OmpExecutor::create();
-    auto hip = gko::HipExecutor::create(0, omp);
-
-    hip->set_device_reset(true);
-
-    ASSERT_EQ(true, hip->get_device_reset());
-}
-
-
 TEST(DpcppExecutor, KnowsItsMaster)
 {
     auto omp = gko::OmpExecutor::create();
@@ -442,20 +385,11 @@ TEST(Executor, CanVerifyMemory)
 }
 
 
-template <typename T>
-struct mock_free : T {
-    /**
-     * @internal Due to a bug with gcc 5.3, the constructor needs to be called
-     * with `()` operator instead of `{}`.
-     */
-    template <typename... Params>
-    mock_free(Params&&... params) : T(std::forward<Params>(params)...)
-    {}
-
-    void raw_free(void* ptr) const noexcept override
+struct MockAllocator : gko::CpuAllocator {
+    void deallocate(void* ptr) const noexcept override
     {
         called_free = true;
-        T::raw_free(ptr);
+        CpuAllocator::deallocate(ptr);
     }
 
     mutable bool called_free{false};
@@ -464,12 +398,13 @@ struct mock_free : T {
 
 TEST(ExecutorDeleter, DeletesObject)
 {
-    auto ref = std::make_shared<mock_free<gko::ReferenceExecutor>>();
+    auto alloc = std::make_shared<MockAllocator>();
+    auto ref = gko::ReferenceExecutor::create(alloc);
     auto x = ref->alloc<int>(5);
 
     gko::executor_deleter<int>{ref}(x);
 
-    ASSERT_TRUE(ref->called_free);
+    ASSERT_TRUE(alloc->called_free);
 }
 
 
diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt
index bbe7a953dbd..aecf4e1c2f2 100644
--- a/cuda/CMakeLists.txt
+++ b/cuda/CMakeLists.txt
@@ -1,11 +1,15 @@
 add_library(ginkgo_cuda $<TARGET_OBJECTS:ginkgo_cuda_device> "")
 target_sources(ginkgo_cuda
     PRIVATE
+    base/device.cpp
     base/device_matrix_data_kernels.cu
     base/exception.cpp
     base/executor.cpp
     base/index_set_kernels.cpp
+    base/memory.cpp
+    base/nvtx.cpp
     base/scoped_device_id.cpp
+    base/stream.cpp
     base/timer.cpp
     base/version.cpp
     components/prefix_sum_kernels.cu
diff --git a/cuda/base/device.cpp b/cuda/base/device.cpp
new file mode 100644
index 00000000000..31ab5bcde63
--- /dev/null
+++ b/cuda/base/device.cpp
@@ -0,0 +1,65 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "cuda/base/device.hpp"
+
+
+#include <cuda_runtime.h>
+
+
+#include <ginkgo/core/base/exception_helpers.hpp>
+
+
+#include "cuda/base/scoped_device_id.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace cuda {
+
+
+void reset_device(int device_id)
+{
+    gko::detail::cuda_scoped_device_id_guard guard{device_id};
+    cudaDeviceReset();
+}
+
+
+void destroy_event(CUevent_st* event)
+{
+    GKO_ASSERT_NO_CUDA_ERRORS(cudaEventDestroy(event));
+}
+
+
+}  // namespace cuda
+}  // namespace kernels
+}  // namespace gko
diff --git a/cuda/base/executor.cpp b/cuda/base/executor.cpp
index e474d9c9f49..f6e838dd2dd 100644
--- a/cuda/base/executor.cpp
+++ b/cuda/base/executor.cpp
@@ -39,18 +39,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <cuda_runtime.h>
-#ifdef GKO_LEGACY_NVTX
-#include <nvToolsExt.h>
-#else
-#include <nvtx3/nvToolsExt.h>
-#endif
 
 
 #include <ginkgo/config.hpp>
 #include <ginkgo/core/base/device.hpp>
 #include <ginkgo/core/base/exception.hpp>
 #include <ginkgo/core/base/exception_helpers.hpp>
-#include <ginkgo/core/log/profiler_hook.hpp>
+#include <ginkgo/core/base/memory.hpp>
 
 
 #include "cuda/base/config.hpp"
@@ -65,25 +60,38 @@ namespace gko {
 #include "common/cuda_hip/base/executor.hpp.inc"
 
 
+std::unique_ptr<CudaAllocatorBase> allocator_from_mode(int device_id,
+                                                       allocation_mode mode)
+{
+    switch (mode) {
+    case allocation_mode::device:
+        return std::make_unique<CudaAllocator>();
+    case allocation_mode::unified_global:
+        return std::make_unique<CudaUnifiedAllocator>(device_id);
+    case allocation_mode::unified_host:
+        return std::make_unique<CudaUnifiedAllocator>(device_id);
+    default:
+        GKO_NOT_SUPPORTED(mode);
+    }
+}
+
+
 std::shared_ptr<CudaExecutor> CudaExecutor::create(
     int device_id, std::shared_ptr<Executor> master, bool device_reset,
     allocation_mode alloc_mode, cudaStream_t stream)
 {
     return std::shared_ptr<CudaExecutor>(
-        new CudaExecutor(device_id, std::move(master), device_reset, alloc_mode,
-                         stream),
-        [device_id](CudaExecutor* exec) {
-            auto device_reset = exec->get_device_reset();
-            std::lock_guard<std::mutex> guard(
-                nvidia_device::get_mutex(device_id));
-            delete exec;
-            auto& num_execs = nvidia_device::get_num_execs(device_id);
-            num_execs--;
-            if (!num_execs && device_reset) {
-                detail::cuda_scoped_device_id_guard g(device_id);
-                cudaDeviceReset();
-            }
-        });
+        new CudaExecutor(device_id, std::move(master),
+                         allocator_from_mode(device_id, alloc_mode), stream));
+}
+
+
+std::shared_ptr<CudaExecutor> CudaExecutor::create(
+    int device_id, std::shared_ptr<Executor> master,
+    std::shared_ptr<CudaAllocatorBase> alloc, cudaStream_t stream)
+{
+    return std::shared_ptr<CudaExecutor>(new CudaExecutor(
+        device_id, std::move(master), std::move(alloc), stream));
 }
 
 
@@ -123,41 +131,14 @@ void OmpExecutor::raw_copy_to(const CudaExecutor* dest, size_type num_bytes,
 void CudaExecutor::raw_free(void* ptr) const noexcept
 {
     detail::cuda_scoped_device_id_guard g(this->get_device_id());
-    auto error_code = cudaFree(ptr);
-    if (error_code != cudaSuccess) {
-#if GKO_VERBOSE_LEVEL >= 1
-        // Unfortunately, if memory free fails, there's not much we can do
-        std::cerr << "Unrecoverable CUDA error on device "
-                  << this->get_device_id() << " in " << __func__ << ": "
-                  << cudaGetErrorName(error_code) << ": "
-                  << cudaGetErrorString(error_code) << std::endl
-                  << "Exiting program" << std::endl;
-#endif  // GKO_VERBOSE_LEVEL >= 1
-        std::exit(error_code);
-    }
+    alloc_->deallocate(ptr);
 }
 
 
 void* CudaExecutor::raw_alloc(size_type num_bytes) const
 {
-    void* dev_ptr = nullptr;
     detail::cuda_scoped_device_id_guard g(this->get_device_id());
-    int error_code = 0;
-    if (this->alloc_mode_ == allocation_mode::unified_host) {
-        error_code = cudaMallocManaged(&dev_ptr, num_bytes, cudaMemAttachHost);
-    } else if (this->alloc_mode_ == allocation_mode::unified_global) {
-        error_code =
-            cudaMallocManaged(&dev_ptr, num_bytes, cudaMemAttachGlobal);
-    } else if (this->alloc_mode_ == allocation_mode::device) {
-        error_code = cudaMalloc(&dev_ptr, num_bytes);
-    } else {
-        GKO_NOT_SUPPORTED(this->alloc_mode_);
-    }
-    if (error_code != cudaErrorMemoryAllocation) {
-        GKO_ASSERT_NO_CUDA_ERRORS(error_code);
-    }
-    GKO_ENSURE_ALLOCATED(dev_ptr, "cuda", num_bytes);
-    return dev_ptr;
+    return alloc_->allocate(num_bytes);
 }
 
 
@@ -298,98 +279,4 @@ void CudaExecutor::init_handles()
 }
 
 
-cuda_stream::cuda_stream(int device_id) : stream_{}, device_id_(device_id)
-{
-    detail::cuda_scoped_device_id_guard g(device_id_);
-    GKO_ASSERT_NO_CUDA_ERRORS(cudaStreamCreate(&stream_));
-}
-
-
-cuda_stream::~cuda_stream()
-{
-    if (stream_) {
-        detail::cuda_scoped_device_id_guard g(device_id_);
-        cudaStreamDestroy(stream_);
-    }
-}
-
-
-cuda_stream::cuda_stream(cuda_stream&& other)
-    : stream_{std::exchange(other.stream_, nullptr)},
-      device_id_(std::exchange(other.device_id_, -1))
-{}
-
-
-CUstream_st* cuda_stream::get() const { return stream_; }
-
-
-namespace log {
-
-
-// "GKO" in ASCII to avoid collision with other application's categories
-constexpr static uint32 category_magic_offset = 0x676B6FU;
-
-
-void init_nvtx()
-{
-#define NAMED_CATEGORY(_name)                                             \
-    nvtxNameCategory(static_cast<uint32>(profile_event_category::_name) + \
-                         category_magic_offset,                           \
-                     "gko::" #_name)
-    NAMED_CATEGORY(memory);
-    NAMED_CATEGORY(operation);
-    NAMED_CATEGORY(object);
-    NAMED_CATEGORY(linop);
-    NAMED_CATEGORY(factory);
-    NAMED_CATEGORY(solver);
-    NAMED_CATEGORY(criterion);
-    NAMED_CATEGORY(user);
-    NAMED_CATEGORY(internal);
-#undef NAMED_CATEGORY
-}
-
-
-std::function<void(const char*, profile_event_category)> begin_nvtx_fn(
-    uint32_t color_argb)
-{
-    return [color_argb](const char* name, profile_event_category category) {
-        nvtxEventAttributes_t attr{};
-        attr.version = NVTX_VERSION;
-        attr.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
-        attr.category = static_cast<uint32>(category) + category_magic_offset;
-        attr.colorType = NVTX_COLOR_ARGB;
-        attr.color = color_argb;
-        attr.payloadType = NVTX_PAYLOAD_UNKNOWN;
-        attr.messageType = NVTX_MESSAGE_TYPE_ASCII;
-        attr.message.ascii = name;
-        nvtxRangePushEx(&attr);
-    };
-}
-
-
-void end_nvtx(const char* name, profile_event_category) { nvtxRangePop(); }
-
-
-}  // namespace log
-
-
-namespace kernels {
-namespace cuda {
-
-
-void reset_device(int device_id)
-{
-    gko::detail::cuda_scoped_device_id_guard guard{device_id};
-    cudaDeviceReset();
-}
-
-
-void destroy_event(CUevent_st* event)
-{
-    GKO_ASSERT_NO_CUDA_ERRORS(cudaEventDestroy(event));
-}
-
-
-}  // namespace cuda
-}  // namespace kernels
 }  // namespace gko
diff --git a/cuda/base/memory.cpp b/cuda/base/memory.cpp
new file mode 100644
index 00000000000..11dee81ad42
--- /dev/null
+++ b/cuda/base/memory.cpp
@@ -0,0 +1,168 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/base/memory.hpp>
+
+
+#include <cuda_runtime.h>
+
+
+#include <ginkgo/core/base/exception_helpers.hpp>
+
+
+#include "cuda/base/scoped_device_id.hpp"
+
+
+namespace gko {
+
+
+#define GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS(_operation, _size)       \
+    {                                                                 \
+        auto error_code = _operation;                                 \
+        if (error_code == cudaErrorMemoryAllocation) {                \
+            throw AllocationError(__FILE__, __LINE__, "cuda", _size); \
+        } else {                                                      \
+            GKO_ASSERT_NO_CUDA_ERRORS(error_code);                    \
+        }                                                             \
+    }
+
+
+#if GKO_VERBOSE_LEVEL >= 1
+#define GKO_EXIT_ON_CUDA_ERROR(_operation)                                  \
+    {                                                                       \
+        const auto error_code = _operation;                                 \
+        if (error_code != cudaSuccess) {                                    \
+            int device_id{-1};                                              \
+            cudaGetDevice(&device_id);                                      \
+            std::cerr << "Unrecoverable CUDA error on device " << device_id \
+                      << " in " << __func__ << ":" << __LINE__ << ": "      \
+                      << cudaGetErrorName(error_code) << ": "               \
+                      << cudaGetErrorString(error_code) << std::endl        \
+                      << "Exiting program" << std::endl;                    \
+            std::exit(error_code);                                          \
+        }                                                                   \
+    }
+#else
+#define GKO_EXIT_ON_CUDA_ERROR(_operation)  \
+    {                                       \
+        const auto error_code = _operation; \
+        if (error_code != cudaSuccess) {    \
+            std::exit(error_code);          \
+        }                                   \
+    }
+#endif
+
+
+void* CudaAllocator::allocate(size_type num_bytes) const
+{
+    void* ptr{};
+    GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS(cudaMalloc(&ptr, num_bytes),
+                                         num_bytes);
+    return ptr;
+}
+
+
+void CudaAllocator::deallocate(void* ptr) const
+{
+    GKO_EXIT_ON_CUDA_ERROR(cudaFree(ptr));
+}
+
+
+CudaAsyncAllocator::CudaAsyncAllocator(cudaStream_t stream) : stream_{stream} {}
+
+
+void* CudaAsyncAllocator::allocate(size_type num_bytes) const
+{
+    void* ptr{};
+    GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS(
+        cudaMallocAsync(&ptr, num_bytes, stream_), num_bytes);
+    return ptr;
+}
+
+void CudaAsyncAllocator::deallocate(void* ptr) const
+{
+    GKO_EXIT_ON_CUDA_ERROR(cudaFreeAsync(ptr, stream_));
+}
+
+
+CudaUnifiedAllocator::CudaUnifiedAllocator(int device_id)
+    : CudaUnifiedAllocator{device_id, cudaMemAttachGlobal}
+{}
+
+
+CudaUnifiedAllocator::CudaUnifiedAllocator(int device_id, unsigned int flags)
+    : device_id_{device_id}, flags_{flags}
+{}
+
+
+void* CudaUnifiedAllocator::allocate(size_type num_bytes) const
+{
+    // we need to set the device ID in case this gets used in a host executor
+    detail::cuda_scoped_device_id_guard g(device_id_);
+    void* ptr{};
+    GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS(
+        cudaMallocManaged(&ptr, num_bytes, flags_), num_bytes);
+    return ptr;
+}
+
+
+void CudaUnifiedAllocator::deallocate(void* ptr) const
+{
+    // we need to set the device ID in case this gets used in a host executor
+    detail::cuda_scoped_device_id_guard g(device_id_);
+    GKO_EXIT_ON_CUDA_ERROR(cudaFree(ptr));
+}
+
+
+CudaHostAllocator::CudaHostAllocator(int device_id) : device_id_{device_id} {}
+
+
+void* CudaHostAllocator::allocate(size_type num_bytes) const
+{
+    // we need to set the device ID in case this gets used in a host executor
+    detail::cuda_scoped_device_id_guard g(device_id_);
+    void* ptr{};
+    GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS(cudaMallocHost(&ptr, num_bytes),
+                                         num_bytes);
+    return ptr;
+}
+
+
+void CudaHostAllocator::deallocate(void* ptr) const
+{
+    // we need to set the device ID in case this gets used in a host executor
+    detail::cuda_scoped_device_id_guard g(device_id_);
+    GKO_EXIT_ON_CUDA_ERROR(cudaFreeHost(ptr));
+}
+
+
+}  // namespace gko
\ No newline at end of file
diff --git a/cuda/base/nvtx.cpp b/cuda/base/nvtx.cpp
new file mode 100644
index 00000000000..e313c110ea2
--- /dev/null
+++ b/cuda/base/nvtx.cpp
@@ -0,0 +1,96 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/config.hpp>
+
+
+#include <cuda_runtime.h>
+#ifdef GKO_LEGACY_NVTX
+#include <nvToolsExt.h>
+#else
+#include <nvtx3/nvToolsExt.h>
+#endif
+
+
+#include <ginkgo/core/log/profiler_hook.hpp>
+
+
+namespace gko {
+namespace log {
+
+
+// "GKO" in ASCII to avoid collision with other application's categories
+constexpr static uint32 category_magic_offset = 0x676B6FU;
+
+
+void init_nvtx()
+{
+#define NAMED_CATEGORY(_name)                                             \
+    nvtxNameCategory(static_cast<uint32>(profile_event_category::_name) + \
+                         category_magic_offset,                           \
+                     "gko::" #_name)
+    NAMED_CATEGORY(memory);
+    NAMED_CATEGORY(operation);
+    NAMED_CATEGORY(object);
+    NAMED_CATEGORY(linop);
+    NAMED_CATEGORY(factory);
+    NAMED_CATEGORY(solver);
+    NAMED_CATEGORY(criterion);
+    NAMED_CATEGORY(user);
+    NAMED_CATEGORY(internal);
+#undef NAMED_CATEGORY
+}
+
+
+std::function<void(const char*, profile_event_category)> begin_nvtx_fn(
+    uint32_t color_argb)
+{
+    return [color_argb](const char* name, profile_event_category category) {
+        nvtxEventAttributes_t attr{};
+        attr.version = NVTX_VERSION;
+        attr.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+        attr.category = static_cast<uint32>(category) + category_magic_offset;
+        attr.colorType = NVTX_COLOR_ARGB;
+        attr.color = color_argb;
+        attr.payloadType = NVTX_PAYLOAD_UNKNOWN;
+        attr.messageType = NVTX_MESSAGE_TYPE_ASCII;
+        attr.message.ascii = name;
+        nvtxRangePushEx(&attr);
+    };
+}
+
+
+void end_nvtx(const char* name, profile_event_category) { nvtxRangePop(); }
+
+
+}  // namespace log
+}  // namespace gko
diff --git a/cuda/test/base/cuda_executor_reset.cpp b/cuda/base/stream.cpp
similarity index 62%
rename from cuda/test/base/cuda_executor_reset.cpp
rename to cuda/base/stream.cpp
index c8159b9c4d7..8c6aa92c28b 100644
--- a/cuda/test/base/cuda_executor_reset.cpp
+++ b/cuda/base/stream.cpp
@@ -30,58 +30,45 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include <ginkgo/core/base/executor.hpp>
+#include <cuda_runtime.h>
 
 
-#include <thread>
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/stream.hpp>
 
 
-#include <gtest/gtest.h>
+#include "cuda/base/scoped_device_id.hpp"
 
 
-namespace {
+namespace gko {
 
 
-#define GTEST_ASSERT_NO_EXIT(statement) \
-    ASSERT_EXIT({ {statement} exit(0); }, ::testing::ExitedWithCode(0), "")
+cuda_stream::cuda_stream() : stream_{}, device_id_{-1} {}
 
 
-TEST(DeviceReset, HipCuda)
+cuda_stream::cuda_stream(int device_id) : stream_{}, device_id_(device_id)
 {
-    GTEST_ASSERT_NO_EXIT({
-        auto ref = gko::ReferenceExecutor::create();
-        auto hip = gko::HipExecutor::create(0, ref, true);
-        auto cuda = gko::CudaExecutor::create(0, ref, true);
-    });
+    detail::cuda_scoped_device_id_guard g(device_id_);
+    GKO_ASSERT_NO_CUDA_ERRORS(cudaStreamCreate(&stream_));
 }
 
 
-TEST(DeviceReset, CudaHip)
+cuda_stream::~cuda_stream()
 {
-    GTEST_ASSERT_NO_EXIT({
-        auto ref = gko::ReferenceExecutor::create();
-        auto cuda = gko::CudaExecutor::create(0, ref, true);
-        auto hip = gko::HipExecutor::create(0, ref, true);
-    });
+    if (stream_) {
+        detail::cuda_scoped_device_id_guard g(device_id_);
+        cudaStreamDestroy(stream_);
+    }
 }
 
 
-void func()
-{
-    auto ref = gko::ReferenceExecutor::create();
-    auto exec = gko::CudaExecutor::create(0, ref, true);
-}
+cuda_stream::cuda_stream(cuda_stream&& other)
+    : stream_{std::exchange(other.stream_, nullptr)},
+      device_id_(std::exchange(other.device_id_, -1))
+{}
 
 
-TEST(DeviceReset, CudaCuda)
-{
-    GTEST_ASSERT_NO_EXIT({
-        std::thread t1(func);
-        std::thread t2(func);
-        t1.join();
-        t2.join();
-    });
-}
+CUstream_st* cuda_stream::get() const { return stream_; }
 
 
-}  // namespace
+}  // namespace gko
diff --git a/cuda/test/base/CMakeLists.txt b/cuda/test/base/CMakeLists.txt
index 9be3caf9faa..a213e65277a 100644
--- a/cuda/test/base/CMakeLists.txt
+++ b/cuda/test/base/CMakeLists.txt
@@ -1,7 +1,6 @@
 ginkgo_create_cuda_test(array)
 ginkgo_create_cuda_test(cuda_executor)
 ginkgo_create_test(index_set)
-ginkgo_create_test(cuda_executor_reset ADDITIONAL_LIBRARIES Threads::Threads)
 if(GINKGO_HAVE_HWLOC)
     find_package(NUMA REQUIRED)
     ginkgo_create_cuda_test(cuda_executor_topology ADDITIONAL_LIBRARIES NUMA::NUMA)
@@ -10,4 +9,5 @@ ginkgo_create_cuda_test(exception_helpers)
 ginkgo_create_cuda_test(kernel_launch)
 ginkgo_create_cuda_test(lin_op)
 ginkgo_create_cuda_test(math)
+ginkgo_create_test(memory)
 ginkgo_create_cuda_test(scoped_device_id)
diff --git a/cuda/test/base/cuda_executor.cu b/cuda/test/base/cuda_executor.cu
index 5f489ac22f0..afb23c06186 100644
--- a/cuda/test/base/cuda_executor.cu
+++ b/cuda/test/base/cuda_executor.cu
@@ -42,6 +42,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/exception.hpp>
 #include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/stream.hpp>
+
 
 #include "common/cuda_hip/base/executor.hpp.inc"
 #include "cuda/base/scoped_device_id.hpp"
@@ -103,18 +105,19 @@ protected:
         ASSERT_GT(gko::CudaExecutor::get_num_devices(), 0);
 #ifdef GKO_TEST_NONDEFAULT_STREAM
         cuda = gko::CudaExecutor::create(
-            0, omp, false, gko::default_cuda_alloc_mode, stream.get());
+            0, omp, std::make_shared<gko::CudaAllocator>(), stream.get());
         cuda2 = gko::CudaExecutor::create(
-            gko::CudaExecutor::get_num_devices() - 1, omp, false,
-            gko::default_cuda_alloc_mode, other_stream.get());
+            gko::CudaExecutor::get_num_devices() - 1, omp,
+            std::make_shared<gko::CudaAllocator>(), other_stream.get());
         cuda3 = gko::CudaExecutor::create(
-            0, omp, false, gko::allocation_mode::unified_global, stream.get());
+            0, omp, std::make_shared<gko::CudaUnifiedAllocator>(0),
+            stream.get());
 #else
         cuda = gko::CudaExecutor::create(0, omp);
         cuda2 = gko::CudaExecutor::create(
             gko::CudaExecutor::get_num_devices() - 1, omp);
-        cuda3 = gko::CudaExecutor::create(0, omp, false,
-                                          gko::allocation_mode::unified_global);
+        cuda3 = gko::CudaExecutor::create(
+            0, omp, std::make_shared<gko::CudaUnifiedAllocator>(0));
 #endif
     }
 
diff --git a/cuda/test/base/memory.cpp b/cuda/test/base/memory.cpp
new file mode 100644
index 00000000000..a329817f4af
--- /dev/null
+++ b/cuda/test/base/memory.cpp
@@ -0,0 +1,126 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/base/memory.hpp>
+
+
+#include <memory>
+#include <type_traits>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/exception.hpp>
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/executor.hpp>
+
+
+#include "cuda/test/utils.hpp"
+
+
+namespace {
+
+
+class Memory : public CudaTestFixture {
+protected:
+    Memory()
+        : host_exec_with_pinned{gko::OmpExecutor::create(
+              std::make_shared<gko::CudaHostAllocator>(0))},
+          host_exec_with_unified{gko::OmpExecutor::create(
+              std::make_shared<gko::CudaUnifiedAllocator>(0))},
+          exec_with_normal{gko::CudaExecutor::create(
+              0, ref, std::make_shared<gko::CudaAllocator>(),
+              exec->get_stream())},
+          exec_with_async{gko::CudaExecutor::create(
+              0, host_exec_with_pinned,
+              std::make_shared<gko::CudaAsyncAllocator>(exec->get_stream()),
+              exec->get_stream())},
+          exec_with_unified{gko::CudaExecutor::create(
+              0, host_exec_with_unified,
+              std::make_shared<gko::CudaUnifiedAllocator>(0),
+              exec->get_stream())}
+    {}
+
+    std::shared_ptr<gko::OmpExecutor> host_exec_with_pinned;
+    std::shared_ptr<gko::OmpExecutor> host_exec_with_unified;
+    std::shared_ptr<gko::CudaExecutor> exec_with_normal;
+    std::shared_ptr<gko::CudaExecutor> exec_with_async;
+    std::shared_ptr<gko::CudaExecutor> exec_with_unified;
+};
+
+
+TEST_F(Memory, DeviceAllocationWorks)
+{
+    gko::array<int> data{exec_with_normal, {1, 2}};
+
+    GKO_ASSERT_ARRAY_EQ(data, I<int>({1, 2}));
+}
+
+
+TEST_F(Memory, AsyncDeviceAllocationWorks)
+{
+    gko::array<int> data{exec_with_async, {1, 2}};
+
+    GKO_ASSERT_ARRAY_EQ(data, I<int>({1, 2}));
+}
+
+
+TEST_F(Memory, UnifiedDeviceAllocationWorks)
+{
+    gko::array<int> data{exec_with_unified, {1, 2}};
+    exec->synchronize();
+
+    ASSERT_EQ(data.get_const_data()[0], 1);
+    ASSERT_EQ(data.get_const_data()[1], 2);
+}
+
+
+TEST_F(Memory, HostUnifiedAllocationWorks)
+{
+    gko::array<int> data{host_exec_with_unified, {1, 2}};
+
+    ASSERT_EQ(data.get_const_data()[0], 1);
+    ASSERT_EQ(data.get_const_data()[1], 2);
+}
+
+
+TEST_F(Memory, HostPinnedAllocationWorks)
+{
+    gko::array<int> data{host_exec_with_pinned, {1, 2}};
+
+    ASSERT_EQ(data.get_const_data()[0], 1);
+    ASSERT_EQ(data.get_const_data()[1], 2);
+}
+
+
+}  // namespace
diff --git a/cuda/test/utils.hpp b/cuda/test/utils.hpp
index 814405ba0d9..e1156b91903 100644
--- a/cuda/test/utils.hpp
+++ b/cuda/test/utils.hpp
@@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/stream.hpp>
 
 
 #include "cuda/base/device.hpp"
@@ -60,8 +61,9 @@ class CudaTestFixture : public ::testing::Test {
     CudaTestFixture()
         : ref(gko::ReferenceExecutor::create()),
 #ifdef GKO_TEST_NONDEFAULT_STREAM
+          stream(0),
           exec(gko::CudaExecutor::create(
-              0, ref, false, gko::default_cuda_alloc_mode, stream.get()))
+              0, ref, std::make_shared<gko::CudaAllocator>(), stream.get()))
 #else
           exec(gko::CudaExecutor::create(0, ref))
 #endif
diff --git a/devices/cuda/executor.cpp b/devices/cuda/executor.cpp
index d066d272f81..3789274c3f3 100644
--- a/devices/cuda/executor.cpp
+++ b/devices/cuda/executor.cpp
@@ -64,31 +64,4 @@ bool CudaExecutor::verify_memory_to(const HipExecutor* dest_exec) const
 }
 
 
-void CudaExecutor::increase_num_execs(unsigned device_id)
-{
-#ifdef GKO_COMPILING_CUDA_DEVICE
-    // increase the Cuda Device count only when ginkgo build cuda
-    std::lock_guard<std::mutex> guard(nvidia_device::get_mutex(device_id));
-    nvidia_device::get_num_execs(device_id)++;
-#endif  // GKO_COMPILING_CUDA_DEVICE
-}
-
-
-void CudaExecutor::decrease_num_execs(unsigned device_id)
-{
-#ifdef GKO_COMPILING_CUDA_DEVICE
-    // increase the Cuda Device count only when ginkgo build cuda
-    std::lock_guard<std::mutex> guard(nvidia_device::get_mutex(device_id));
-    nvidia_device::get_num_execs(device_id)--;
-#endif  // GKO_COMPILING_CUDA_DEVICE
-}
-
-
-unsigned CudaExecutor::get_num_execs(unsigned device_id)
-{
-    std::lock_guard<std::mutex> guard(nvidia_device::get_mutex(device_id));
-    return nvidia_device::get_num_execs(device_id);
-}
-
-
 }  // namespace gko
diff --git a/devices/hip/executor.cpp b/devices/hip/executor.cpp
index 60efb4c53a3..b044074c19e 100644
--- a/devices/hip/executor.cpp
+++ b/devices/hip/executor.cpp
@@ -61,38 +61,4 @@ bool HipExecutor::verify_memory_to(const CudaExecutor* dest_exec) const
 }
 
 
-#if (GINKGO_HIP_PLATFORM_NVCC == 1)
-using hip_device_class = nvidia_device;
-#else
-using hip_device_class = amd_device;
-#endif
-
-
-void HipExecutor::increase_num_execs(int device_id)
-{
-#ifdef GKO_COMPILING_HIP_DEVICE
-    // increase the HIP Device count only when ginkgo build hip
-    std::lock_guard<std::mutex> guard(hip_device_class::get_mutex(device_id));
-    hip_device_class::get_num_execs(device_id)++;
-#endif  // GKO_COMPILING_HIP_DEVICE
-}
-
-
-void HipExecutor::decrease_num_execs(int device_id)
-{
-#ifdef GKO_COMPILING_HIP_DEVICE
-    // increase the HIP Device count only when ginkgo build hip
-    std::lock_guard<std::mutex> guard(hip_device_class::get_mutex(device_id));
-    hip_device_class::get_num_execs(device_id)--;
-#endif  // GKO_COMPILING_HIP_DEVICE
-}
-
-
-int HipExecutor::get_num_execs(int device_id)
-{
-    std::lock_guard<std::mutex> guard(hip_device_class::get_mutex(device_id));
-    return hip_device_class::get_num_execs(device_id);
-}
-
-
 }  // namespace gko
diff --git a/devices/omp/executor.cpp b/devices/omp/executor.cpp
index 352216f7633..f8e700bc2d5 100644
--- a/devices/omp/executor.cpp
+++ b/devices/omp/executor.cpp
@@ -55,7 +55,10 @@ void OmpExecutor::populate_exec_info(const machine_topology* mach_topo)
 }
 
 
-void OmpExecutor::raw_free(void* ptr) const noexcept { std::free(ptr); }
+void OmpExecutor::raw_free(void* ptr) const noexcept
+{
+    return alloc_->deallocate(ptr);
+}
 
 
 std::shared_ptr<Executor> OmpExecutor::get_master() noexcept
@@ -72,7 +75,7 @@ std::shared_ptr<const Executor> OmpExecutor::get_master() const noexcept
 
 void* OmpExecutor::raw_alloc(size_type num_bytes) const
 {
-    return GKO_ENSURE_ALLOCATED(std::malloc(num_bytes), "OMP", num_bytes);
+    return alloc_->allocate(num_bytes);
 }
 
 
diff --git a/dpcpp/base/executor.dp.cpp b/dpcpp/base/executor.dp.cpp
index c2015c8664c..d668331a43b 100644
--- a/dpcpp/base/executor.dp.cpp
+++ b/dpcpp/base/executor.dp.cpp
@@ -51,6 +51,39 @@ namespace gko {
 namespace detail {
 
 
+DpcppAllocator::DpcppAllocator(sycl::queue* queue) : queue_{queue} {}
+
+
+void* DpcppAllocator::allocate(size_type size)
+{
+    return sycl::malloc_device(size, *queue_);
+}
+
+
+void DpcppAllocator::deallocate(void* ptr)
+{
+    queue_->wait_and_throw();
+    sycl::free(ptr, queue_->get_context());
+}
+
+
+DpcppUnifiedAllocator::DpcppUnifiedAllocator(sycl::queue* queue) : queue_{queue}
+{}
+
+
+void* DpcppUnifiedAllocator::allocate(size_type size)
+{
+    return sycl::malloc_shared(size, *queue_);
+}
+
+
+void DpcppUnifiedAllocator::deallocate(void* ptr)
+{
+    queue_->wait_and_throw();
+    sycl::free(ptr, queue_->get_context());
+}
+
+
 const std::vector<sycl::device> get_devices(std::string device_type)
 {
     std::map<std::string, sycl::info::device_type> device_type_map{
diff --git a/hip/test/base/hip_executor_reset.cpp b/dpcpp/base/memory.dp.cpp
similarity index 63%
rename from hip/test/base/hip_executor_reset.cpp
rename to dpcpp/base/memory.dp.cpp
index 39e3252e053..b1ccd007dea 100644
--- a/hip/test/base/hip_executor_reset.cpp
+++ b/dpcpp/base/memory.dp.cpp
@@ -30,58 +30,43 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/memory.hpp>
 
 
-#include <thread>
+#include <CL/sycl.hpp>
 
 
-#include <gtest/gtest.h>
+namespace gko {
 
 
-namespace {
+DpcppAllocatorBase::DpcppAllocatorBase(sycl::queue* queue) : queue_{queue} {}
 
 
-#define GTEST_ASSERT_NO_EXIT(statement) \
-    ASSERT_EXIT({ {statement} exit(0); }, ::testing::ExitedWithCode(0), "")
-
-
-TEST(DeviceReset, HipCuda)
+void* DpcppAllocator::allocate_impl(sycl::queue* queue,
+                                    size_type num_bytes) const
 {
-    GTEST_ASSERT_NO_EXIT({
-        auto ref = gko::ReferenceExecutor::create();
-        auto hip = gko::HipExecutor::create(0, ref, true);
-        auto cuda = gko::CudaExecutor::create(0, ref, true);
-    });
+    return sycl::malloc_device(size, *queue);
 }
 
 
-TEST(DeviceReset, CudaHip)
+void DpcppAllocator::deallocate_impl(sycl::queue* queue, void* ptr) const
 {
-    GTEST_ASSERT_NO_EXIT({
-        auto ref = gko::ReferenceExecutor::create();
-        auto cuda = gko::CudaExecutor::create(0, ref, true);
-        auto hip = gko::HipExecutor::create(0, ref, true);
-    });
+    queue->wait_and_throw();
+    sycl::free(ptr, queue->get_context());
 }
 
 
-void func()
+void* DpcppUnifiedAllocator::allocate(size_type num_bytes)
 {
-    auto ref = gko::ReferenceExecutor::create();
-    auto exec = gko::HipExecutor::create(0, ref, true);
+    return sycl::malloc_shared(size, *queue_);
 }
 
 
-TEST(DeviceReset, HipHip)
+void DpcppUnifiedAllocator::deallocate(void* ptr)
 {
-    GTEST_ASSERT_NO_EXIT({
-        std::thread t1(func);
-        std::thread t2(func);
-        t1.join();
-        t2.join();
-    });
+    queue_->wait_and_throw();
+    sycl::free(ptr, queue_->get_context());
 }
 
 
-}  // namespace
+}  // namespace gko
diff --git a/dpcpp/test/base/CMakeLists.txt b/dpcpp/test/base/CMakeLists.txt
index bb9c8a75050..5c0ca601f04 100644
--- a/dpcpp/test/base/CMakeLists.txt
+++ b/dpcpp/test/base/CMakeLists.txt
@@ -3,3 +3,4 @@ ginkgo_create_dpcpp_test(dim3)
 ginkgo_create_dpcpp_test(kernel_launch)
 # set correct flags for kernel_launch.hpp
 target_compile_definitions(dpcpp_test_base_kernel_launch PRIVATE GKO_COMPILING_DPCPP)
+ginkgo_create_dpcpp_test(memory)
\ No newline at end of file
diff --git a/dpcpp/test/base/memory.dp.cpp b/dpcpp/test/base/memory.dp.cpp
new file mode 100644
index 00000000000..e587660cde3
--- /dev/null
+++ b/dpcpp/test/base/memory.dp.cpp
@@ -0,0 +1,98 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/base/memory.hpp>
+
+
+#include <memory>
+#include <type_traits>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/exception.hpp>
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/executor.hpp>
+
+
+#include "dpcpp/test/utils.hpp"
+
+
+namespace {
+
+
+class Memory : public ::testing::Test {
+protected:
+    Memory()
+        : exec{gko::DpcppExecutor::create(0, gko::OmpExecutor::create())},
+          host_exec_with_unified{gko::OmpExecutor::create(
+              std::make_shared<gko::DpcppUnifiedAllocator>(exec->get_queue()))},
+          exec_with_unified{gko::DpcppExecutor::create(
+              exec->get_queue(), host_exec_with_unified,
+              std::make_shared<gko::DpcppUnifiedAllocator>(exec->get_queue()))}
+    {}
+
+    std::shared_ptr<gko::DpcppExecutor> exec;
+    std::shared_ptr<gko::OmpExecutor> host_exec_with_unified;
+    std::shared_ptr<gko::DpcppExecutor> exec_with_unified;
+};
+
+
+TEST_F(Memory, DeviceAllocationWorks)
+{
+    gko::array<int> data{exec, {1, 2}};
+
+    GKO_ASSERT_ARRAY_EQ(data, I<int>({1, 2}));
+}
+
+
+TEST_F(Memory, UnifiedDeviceAllocationWorks)
+{
+    gko::array<int> data{exec_with_unified, {1, 2}};
+    exec->synchronize();
+
+    ASSERT_EQ(data.get_const_data()[0], 1);
+    ASSERT_EQ(data.get_const_data()[1], 2);
+}
+
+
+TEST_F(Memory, HostUnifiedAllocationWorks)
+{
+    gko::array<int> data{host_exec_with_unified, {1, 2}};
+
+    ASSERT_EQ(data.get_const_data()[0], 1);
+    ASSERT_EQ(data.get_const_data()[1], 2);
+}
+
+
+}  // namespace
diff --git a/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp b/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp
index b300292e9a3..79b197aacc8 100644
--- a/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp
+++ b/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp
@@ -68,13 +68,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/cb-gmres/cb-gmres.cpp b/examples/cb-gmres/cb-gmres.cpp
index c0235f75e55..b096e48c71a 100644
--- a/examples/cb-gmres/cb-gmres.cpp
+++ b/examples/cb-gmres/cb-gmres.cpp
@@ -108,13 +108,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/custom-logger/custom-logger.cpp b/examples/custom-logger/custom-logger.cpp
index c2270cadb0d..7e6cf531edd 100644
--- a/examples/custom-logger/custom-logger.cpp
+++ b/examples/custom-logger/custom-logger.cpp
@@ -249,13 +249,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/custom-matrix-format/custom-matrix-format.cpp b/examples/custom-matrix-format/custom-matrix-format.cpp
index af08dbdf226..4610413fe9c 100644
--- a/examples/custom-matrix-format/custom-matrix-format.cpp
+++ b/examples/custom-matrix-format/custom-matrix-format.cpp
@@ -255,13 +255,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/custom-stopping-criterion/custom-stopping-criterion.cpp b/examples/custom-stopping-criterion/custom-stopping-criterion.cpp
index 9389f86cc45..e07f1bf92fb 100644
--- a/examples/custom-stopping-criterion/custom-stopping-criterion.cpp
+++ b/examples/custom-stopping-criterion/custom-stopping-criterion.cpp
@@ -158,13 +158,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp b/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp
index aa32e0e879a..33946b7de44 100644
--- a/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp
+++ b/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp
@@ -68,13 +68,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/inverse-iteration/inverse-iteration.cpp b/examples/inverse-iteration/inverse-iteration.cpp
index 5d8270f1ca1..460370b7e00 100644
--- a/examples/inverse-iteration/inverse-iteration.cpp
+++ b/examples/inverse-iteration/inverse-iteration.cpp
@@ -72,13 +72,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp b/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp
index e676e15cc6d..407a083e548 100644
--- a/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp
+++ b/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp
@@ -71,13 +71,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/iterative-refinement/iterative-refinement.cpp b/examples/iterative-refinement/iterative-refinement.cpp
index cbd2156be60..14384eaab52 100644
--- a/examples/iterative-refinement/iterative-refinement.cpp
+++ b/examples/iterative-refinement/iterative-refinement.cpp
@@ -68,13 +68,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/minimal-cuda-solver/minimal-cuda-solver.cpp b/examples/minimal-cuda-solver/minimal-cuda-solver.cpp
index 7182bc9ad8c..5a7a8c086af 100644
--- a/examples/minimal-cuda-solver/minimal-cuda-solver.cpp
+++ b/examples/minimal-cuda-solver/minimal-cuda-solver.cpp
@@ -36,7 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 int main()
 {
     // Instantiate a CUDA executor
-    auto gpu = gko::CudaExecutor::create(0, gko::OmpExecutor::create(), true);
+    auto gpu = gko::CudaExecutor::create(0, gko::OmpExecutor::create());
     // Read data
     auto A = gko::read<gko::matrix::Csr<>>(std::cin, gpu);
     auto b = gko::read<gko::matrix::Dense<>>(std::cin, gpu);
diff --git a/examples/mixed-multigrid-preconditioned-solver/mixed-multigrid-preconditioned-solver.cpp b/examples/mixed-multigrid-preconditioned-solver/mixed-multigrid-preconditioned-solver.cpp
index 6f1600d2805..9edd7ff29a1 100644
--- a/examples/mixed-multigrid-preconditioned-solver/mixed-multigrid-preconditioned-solver.cpp
+++ b/examples/mixed-multigrid-preconditioned-solver/mixed-multigrid-preconditioned-solver.cpp
@@ -71,13 +71,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/mixed-multigrid-solver/mixed-multigrid-solver.cpp b/examples/mixed-multigrid-solver/mixed-multigrid-solver.cpp
index d3f45cda916..cbecbbbdc02 100644
--- a/examples/mixed-multigrid-solver/mixed-multigrid-solver.cpp
+++ b/examples/mixed-multigrid-solver/mixed-multigrid-solver.cpp
@@ -69,13 +69,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/mixed-precision-ir/mixed-precision-ir.cpp b/examples/mixed-precision-ir/mixed-precision-ir.cpp
index 3510a2163e1..0882d755cdc 100644
--- a/examples/mixed-precision-ir/mixed-precision-ir.cpp
+++ b/examples/mixed-precision-ir/mixed-precision-ir.cpp
@@ -76,13 +76,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/mixed-spmv/mixed-spmv.cpp b/examples/mixed-spmv/mixed-spmv.cpp
index 78461de39ef..6b327c1c708 100644
--- a/examples/mixed-spmv/mixed-spmv.cpp
+++ b/examples/mixed-spmv/mixed-spmv.cpp
@@ -170,13 +170,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/multigrid-preconditioned-solver-customized/multigrid-preconditioned-solver-customized.cpp b/examples/multigrid-preconditioned-solver-customized/multigrid-preconditioned-solver-customized.cpp
index 6f75ca29630..a455ca2e8ed 100644
--- a/examples/multigrid-preconditioned-solver-customized/multigrid-preconditioned-solver-customized.cpp
+++ b/examples/multigrid-preconditioned-solver-customized/multigrid-preconditioned-solver-customized.cpp
@@ -64,13 +64,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/multigrid-preconditioned-solver/multigrid-preconditioned-solver.cpp b/examples/multigrid-preconditioned-solver/multigrid-preconditioned-solver.cpp
index 7f47d039072..75c03259c67 100644
--- a/examples/multigrid-preconditioned-solver/multigrid-preconditioned-solver.cpp
+++ b/examples/multigrid-preconditioned-solver/multigrid-preconditioned-solver.cpp
@@ -62,13 +62,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp b/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp
index 51fdf97d4a4..05ee0503a5f 100644
--- a/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp
+++ b/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp
@@ -230,13 +230,12 @@ void solve_system(const std::string& executor_string,
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/papi-logging/papi-logging.cpp b/examples/papi-logging/papi-logging.cpp
index 0d81ef65909..1ae2ae9ec08 100644
--- a/examples/papi-logging/papi-logging.cpp
+++ b/examples/papi-logging/papi-logging.cpp
@@ -151,13 +151,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/performance-debugging/performance-debugging.cpp b/examples/performance-debugging/performance-debugging.cpp
index f357a8d4619..5f036728924 100644
--- a/examples/performance-debugging/performance-debugging.cpp
+++ b/examples/performance-debugging/performance-debugging.cpp
@@ -371,13 +371,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/poisson-solver/poisson-solver.cpp b/examples/poisson-solver/poisson-solver.cpp
index 7602600a514..e16f0b26968 100644
--- a/examples/poisson-solver/poisson-solver.cpp
+++ b/examples/poisson-solver/poisson-solver.cpp
@@ -144,13 +144,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/preconditioned-solver/preconditioned-solver.cpp b/examples/preconditioned-solver/preconditioned-solver.cpp
index 37963f205cc..b64b588c4ef 100644
--- a/examples/preconditioned-solver/preconditioned-solver.cpp
+++ b/examples/preconditioned-solver/preconditioned-solver.cpp
@@ -69,13 +69,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/simple-solver-logging/simple-solver-logging.cpp b/examples/simple-solver-logging/simple-solver-logging.cpp
index 6aa85462605..02318dd7784 100644
--- a/examples/simple-solver-logging/simple-solver-logging.cpp
+++ b/examples/simple-solver-logging/simple-solver-logging.cpp
@@ -85,13 +85,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/simple-solver/simple-solver.cpp b/examples/simple-solver/simple-solver.cpp
index 8f665f98496..81dc9ee6d74 100644
--- a/examples/simple-solver/simple-solver.cpp
+++ b/examples/simple-solver/simple-solver.cpp
@@ -89,13 +89,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp b/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp
index 6bf3cc21a8a..63adfaa5571 100644
--- a/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp
+++ b/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp
@@ -165,13 +165,12 @@ void solve_system(const std::string& executor_string,
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt
index 6c6fc235f45..61b06ad4058 100644
--- a/hip/CMakeLists.txt
+++ b/hip/CMakeLists.txt
@@ -1,9 +1,13 @@
 set(GINKGO_HIP_SOURCES
+    base/device.hip.cpp
     base/device_matrix_data_kernels.hip.cpp
     base/exception.hip.cpp
     base/executor.hip.cpp
     base/index_set_kernels.hip.cpp
+    base/memory.hip.cpp
+    base/roctx.hip.cpp
     base/scoped_device_id.hip.cpp
+    base/stream.hip.cpp
     base/timer.hip.cpp
     base/version.hip.cpp
     components/prefix_sum_kernels.hip.cpp
diff --git a/hip/base/device.hip.cpp b/hip/base/device.hip.cpp
new file mode 100644
index 00000000000..b5ec1bec6d6
--- /dev/null
+++ b/hip/base/device.hip.cpp
@@ -0,0 +1,67 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/base/stream.hpp>
+
+
+#include <hip/hip_runtime.h>
+
+
+#include <ginkgo/config.hpp>
+#include <ginkgo/core/base/device.hpp>
+#include <ginkgo/core/base/exception_helpers.hpp>
+
+
+#include "hip/base/scoped_device_id.hip.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace hip {
+
+
+void reset_device(int device_id)
+{
+    gko::detail::hip_scoped_device_id_guard guard{device_id};
+    hipDeviceReset();
+}
+
+
+void destroy_event(GKO_HIP_EVENT_STRUCT* event)
+{
+    GKO_ASSERT_NO_HIP_ERRORS(hipEventDestroy(event));
+}
+
+
+}  // namespace hip
+}  // namespace kernels
+}  // namespace gko
diff --git a/hip/base/executor.hip.cpp b/hip/base/executor.hip.cpp
index cd8a485c19d..6b4b0fd5ddc 100644
--- a/hip/base/executor.hip.cpp
+++ b/hip/base/executor.hip.cpp
@@ -37,15 +37,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <hip/hip_runtime.h>
-#if GINKGO_HIP_PLATFORM_HCC && GKO_HAVE_ROCTX
-#include <roctx.h>
-#endif
 
 
 #include <ginkgo/config.hpp>
 #include <ginkgo/core/base/device.hpp>
 #include <ginkgo/core/base/exception_helpers.hpp>
-#include <ginkgo/core/log/profiler_hook.hpp>
 
 
 #include "hip/base/config.hip.hpp"
@@ -60,32 +56,22 @@ namespace gko {
 #include "common/cuda_hip/base/executor.hpp.inc"
 
 
-#if (GINKGO_HIP_PLATFORM_NVCC == 1)
-using hip_device_class = nvidia_device;
-#else
-using hip_device_class = amd_device;
-#endif
-
-
 std::shared_ptr<HipExecutor> HipExecutor::create(
     int device_id, std::shared_ptr<Executor> master, bool device_reset,
     allocation_mode alloc_mode, hipStream_t stream)
 {
     return std::shared_ptr<HipExecutor>(
-        new HipExecutor(device_id, std::move(master), device_reset, alloc_mode,
-                        stream),
-        [device_id](HipExecutor* exec) {
-            auto device_reset = exec->get_device_reset();
-            std::lock_guard<std::mutex> guard(
-                hip_device_class::get_mutex(device_id));
-            delete exec;
-            auto& num_execs = hip_device_class::get_num_execs(device_id);
-            num_execs--;
-            if (!num_execs && device_reset) {
-                detail::hip_scoped_device_id_guard g(device_id);
-                hipDeviceReset();
-            }
-        });
+        new HipExecutor(device_id, std::move(master),
+                        std::make_shared<HipAllocator>(), stream));
+}
+
+
+std::shared_ptr<HipExecutor> HipExecutor::create(
+    int device_id, std::shared_ptr<Executor> master,
+    std::shared_ptr<HipAllocatorBase> alloc, hipStream_t stream)
+{
+    return std::shared_ptr<HipExecutor>(new HipExecutor(
+        device_id, std::move(master), std::move(alloc), stream));
 }
 
 
@@ -125,42 +111,14 @@ void OmpExecutor::raw_copy_to(const HipExecutor* dest, size_type num_bytes,
 void HipExecutor::raw_free(void* ptr) const noexcept
 {
     detail::hip_scoped_device_id_guard g(this->get_device_id());
-    auto error_code = hipFree(ptr);
-    if (error_code != hipSuccess) {
-#if GKO_VERBOSE_LEVEL >= 1
-        // Unfortunately, if memory free fails, there's not much we can do
-        std::cerr << "Unrecoverable HIP error on device "
-                  << this->get_device_id() << " in " << __func__ << ": "
-                  << hipGetErrorName(error_code) << ": "
-                  << hipGetErrorString(error_code) << std::endl
-                  << "Exiting program" << std::endl;
-#endif  // GKO_VERBOSE_LEVEL >= 1
-        std::exit(error_code);
-    }
+    alloc_->deallocate(ptr);
 }
 
 
 void* HipExecutor::raw_alloc(size_type num_bytes) const
 {
-    void* dev_ptr = nullptr;
     detail::hip_scoped_device_id_guard g(this->get_device_id());
-    int error_code = 0;
-    if (this->alloc_mode_ == allocation_mode::device) {
-        error_code = hipMalloc(&dev_ptr, num_bytes);
-#if !(GKO_HIP_PLATFORM_HCC == 1)
-    } else if (this->alloc_mode_ == allocation_mode::unified_global) {
-        error_code = hipMallocManaged(&dev_ptr, num_bytes, hipMemAttachGlobal);
-    } else if (this->alloc_mode_ == allocation_mode::unified_host) {
-        error_code = hipMallocManaged(&dev_ptr, num_bytes, hipMemAttachHost);
-#endif
-    } else {
-        GKO_NOT_SUPPORTED(this->alloc_mode_);
-    }
-    if (error_code != hipErrorMemoryAllocation) {
-        GKO_ASSERT_NO_HIP_ERRORS(error_code);
-    }
-    GKO_ENSURE_ALLOCATED(dev_ptr, "hip", num_bytes);
-    return dev_ptr;
+    return alloc_->allocate(num_bytes);
 }
 
 
@@ -309,73 +267,4 @@ void HipExecutor::init_handles()
 }
 
 
-hip_stream::hip_stream(int device_id) : stream_{}, device_id_(device_id)
-{
-    detail::hip_scoped_device_id_guard g(device_id_);
-    GKO_ASSERT_NO_HIP_ERRORS(hipStreamCreate(&stream_));
-}
-
-
-hip_stream::~hip_stream()
-{
-    if (stream_) {
-        detail::hip_scoped_device_id_guard g(device_id_);
-        hipStreamDestroy(stream_);
-    }
-}
-
-
-hip_stream::hip_stream(hip_stream&& other)
-    : stream_{std::exchange(other.stream_, nullptr)},
-      device_id_{std::exchange(other.device_id_, -1)}
-{}
-
-
-GKO_HIP_STREAM_STRUCT* hip_stream::get() const { return stream_; }
-
-
-namespace log {
-
-
-#if GINKGO_HIP_PLATFORM_HCC && GKO_HAVE_ROCTX
-
-void begin_roctx(const char* name, profile_event_category)
-{
-    roctxRangePush(name);
-}
-
-void end_roctx(const char*, profile_event_category) { roctxRangePop(); }
-
-#else
-
-void begin_roctx(const char* name, profile_event_category)
-    GKO_NOT_COMPILED(roctx);
-
-void end_roctx(const char*, profile_event_category) GKO_NOT_COMPILED(roctx);
-
-#endif
-
-
-}  // namespace log
-
-
-namespace kernels {
-namespace hip {
-
-
-void reset_device(int device_id)
-{
-    gko::detail::hip_scoped_device_id_guard guard{device_id};
-    hipDeviceReset();
-}
-
-
-void destroy_event(GKO_HIP_EVENT_STRUCT* event)
-{
-    GKO_ASSERT_NO_HIP_ERRORS(hipEventDestroy(event));
-}
-
-
-}  // namespace hip
-}  // namespace kernels
 }  // namespace gko
diff --git a/hip/base/memory.hip.cpp b/hip/base/memory.hip.cpp
new file mode 100644
index 00000000000..f2a8977525f
--- /dev/null
+++ b/hip/base/memory.hip.cpp
@@ -0,0 +1,97 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/base/memory.hpp>
+
+
+#include <hip/hip_runtime.h>
+
+
+#include <ginkgo/core/base/exception_helpers.hpp>
+
+
+namespace gko {
+
+
+#define GKO_ASSERT_NO_HIP_ALLOCATION_ERRORS(_operation, _size)       \
+    {                                                                \
+        auto error_code = _operation;                                \
+        if (error_code == hipErrorMemoryAllocation) {                \
+            throw AllocationError(__FILE__, __LINE__, "hip", _size); \
+        } else {                                                     \
+            GKO_ASSERT_NO_HIP_ERRORS(error_code);                    \
+        }                                                            \
+    }
+
+
+#if GKO_VERBOSE_LEVEL >= 1
+#define GKO_EXIT_ON_HIP_ERROR(_operation)                                  \
+    {                                                                      \
+        const auto error_code = _operation;                                \
+        if (error_code != hipSuccess) {                                    \
+            int device_id{-1};                                             \
+            hipGetDevice(&device_id);                                      \
+            std::cerr << "Unrecoverable HIP error on device " << device_id \
+                      << " in " << __func__ << ": "                        \
+                      << hipGetErrorName(error_code) << ": "               \
+                      << hipGetErrorString(error_code) << std::endl        \
+                      << "Exiting program" << std::endl;                   \
+            std::exit(error_code);                                         \
+        }                                                                  \
+    }
+#else
+#define GKO_EXIT_ON_HIP_ERROR(_operation)   \
+    {                                       \
+        const auto error_code = _operation; \
+        if (error_code != hipSuccess) {     \
+            std::exit(error_code);          \
+        }                                   \
+    }
+#endif
+
+
+void* HipAllocator::allocate(size_type num_bytes) const
+{
+    void* dev_ptr{};
+    GKO_ASSERT_NO_HIP_ALLOCATION_ERRORS(hipMalloc(&dev_ptr, num_bytes),
+                                        num_bytes);
+    return dev_ptr;
+}
+
+
+void HipAllocator::deallocate(void* dev_ptr) const
+{
+    GKO_EXIT_ON_HIP_ERROR(hipFree(dev_ptr));
+}
+
+
+}  // namespace gko
diff --git a/hip/base/roctx.hip.cpp b/hip/base/roctx.hip.cpp
new file mode 100644
index 00000000000..9f309b93362
--- /dev/null
+++ b/hip/base/roctx.hip.cpp
@@ -0,0 +1,70 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/config.hpp>
+
+
+#include <hip/hip_runtime.h>
+#if GINKGO_HIP_PLATFORM_HCC && GKO_HAVE_ROCTX
+#include <roctx.h>
+#endif
+
+
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/log/profiler_hook.hpp>
+
+
+namespace gko {
+namespace log {
+
+
+#if GINKGO_HIP_PLATFORM_HCC && GKO_HAVE_ROCTX
+
+void begin_roctx(const char* name, profile_event_category)
+{
+    roctxRangePush(name);
+}
+
+void end_roctx(const char*, profile_event_category) { roctxRangePop(); }
+
+#else
+
+void begin_roctx(const char* name, profile_event_category)
+    GKO_NOT_COMPILED(roctx);
+
+void end_roctx(const char*, profile_event_category) GKO_NOT_COMPILED(roctx);
+
+#endif
+
+
+}  // namespace log
+}  // namespace gko
diff --git a/hip/base/stream.hip.cpp b/hip/base/stream.hip.cpp
new file mode 100644
index 00000000000..e5817eb9ebd
--- /dev/null
+++ b/hip/base/stream.hip.cpp
@@ -0,0 +1,78 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/base/stream.hpp>
+
+
+#include <hip/hip_runtime.h>
+
+
+#include <ginkgo/config.hpp>
+#include <ginkgo/core/base/device.hpp>
+#include <ginkgo/core/base/exception_helpers.hpp>
+
+
+#include "hip/base/scoped_device_id.hip.hpp"
+
+
+namespace gko {
+
+
+hip_stream::hip_stream() : stream_{}, device_id_{-1} {}
+
+
+hip_stream::hip_stream(int device_id) : stream_{}, device_id_(device_id)
+{
+    detail::hip_scoped_device_id_guard g(device_id_);
+    GKO_ASSERT_NO_HIP_ERRORS(hipStreamCreate(&stream_));
+}
+
+
+hip_stream::~hip_stream()
+{
+    if (stream_) {
+        detail::hip_scoped_device_id_guard g(device_id_);
+        hipStreamDestroy(stream_);
+    }
+}
+
+
+hip_stream::hip_stream(hip_stream&& other)
+    : stream_{std::exchange(other.stream_, nullptr)},
+      device_id_{std::exchange(other.device_id_, -1)}
+{}
+
+
+GKO_HIP_STREAM_STRUCT* hip_stream::get() const { return stream_; }
+
+
+}  // namespace gko
diff --git a/hip/test/base/CMakeLists.txt b/hip/test/base/CMakeLists.txt
index 7ed0d2ceb52..f597a3d6e3d 100644
--- a/hip/test/base/CMakeLists.txt
+++ b/hip/test/base/CMakeLists.txt
@@ -1,6 +1,5 @@
 ginkgo_create_hip_test(hip_executor)
 ginkgo_create_test(index_set)
-ginkgo_create_test(hip_executor_reset ADDITIONAL_LIBRARIES Threads::Threads)
 if(GINKGO_HAVE_HWLOC)
     find_package(NUMA REQUIRED)
     ginkgo_create_hip_test(hip_executor_topology ADDITIONAL_LIBRARIES NUMA::NUMA)
diff --git a/hip/test/base/hip_executor.hip.cpp b/hip/test/base/hip_executor.hip.cpp
index d27dd58d132..e531fa739e6 100644
--- a/hip/test/base/hip_executor.hip.cpp
+++ b/hip/test/base/hip_executor.hip.cpp
@@ -109,18 +109,18 @@ class HipExecutor : public ::testing::Test {
         ASSERT_GT(gko::HipExecutor::get_num_devices(), 0);
 #ifdef GKO_TEST_NONDEFAULT_STREAM
         hip = gko::HipExecutor::create(
-            0, omp, false, gko::default_hip_alloc_mode, stream.get());
-        hip2 = gko::HipExecutor::create(gko::HipExecutor::get_num_devices() - 1,
-                                        omp, false, gko::default_hip_alloc_mode,
-                                        other_stream.get());
+            0, omp, std::make_shared<gko::HipAllocator>(), stream.get());
+        hip2 = gko::HipExecutor::create(
+            gko::HipExecutor::get_num_devices() - 1, omp,
+            std::make_shared<gko::HipAllocator>(), other_stream.get());
         hip3 = gko::HipExecutor::create(
-            0, omp, false, gko::allocation_mode::unified_global, stream.get());
+            0, omp, std::make_shared<gko::HipAllocator>(), stream.get());
 #else
         hip = gko::HipExecutor::create(0, omp);
         hip2 = gko::HipExecutor::create(gko::HipExecutor::get_num_devices() - 1,
                                         omp);
-        hip3 = gko::HipExecutor::create(0, omp, false,
-                                        gko::allocation_mode::unified_global);
+        hip3 = gko::HipExecutor::create(0, omp,
+                                        std::make_shared<gko::HipAllocator>());
 #endif
     }
 
diff --git a/hip/test/utils.hip.hpp b/hip/test/utils.hip.hpp
index 9337da14139..bf7073cf9a1 100644
--- a/hip/test/utils.hip.hpp
+++ b/hip/test/utils.hip.hpp
@@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/stream.hpp>
 
 
 #include "hip/base/device.hpp"
@@ -60,8 +61,9 @@ class HipTestFixture : public ::testing::Test {
     HipTestFixture()
         : ref(gko::ReferenceExecutor::create()),
 #ifdef GKO_TEST_NONDEFAULT_STREAM
+          stream(0),
           exec(gko::HipExecutor::create(
-              0, ref, false, gko::default_hip_alloc_mode, stream.get()))
+              0, ref, std::make_shared<gko::HipAllocator>(), stream.get()))
 #else
           exec(gko::HipExecutor::create(0, ref))
 #endif
diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp
index 965cd562bff..4545b216f86 100644
--- a/include/ginkgo/core/base/executor.hpp
+++ b/include/ginkgo/core/base/executor.hpp
@@ -47,7 +47,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/device.hpp>
+#include <ginkgo/core/base/fwd_defs.hpp>
 #include <ginkgo/core/base/machine_topology.hpp>
+#include <ginkgo/core/base/memory.hpp>
 #include <ginkgo/core/base/scoped_device_id_guard.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/log/logger.hpp>
@@ -121,33 +123,6 @@ constexpr allocation_mode default_hip_alloc_mode =
 }  // namespace gko
 
 
-// after intel/llvm September'22 release, which uses major version 6, they
-// introduce another inline namespace _V1.
-#if GINKGO_DPCPP_MAJOR_VERSION >= 6
-namespace sycl {
-inline namespace _V1 {
-
-
-class queue;
-class event;
-
-
-}  // namespace _V1
-}  // namespace sycl
-#else  // GINKGO_DPCPP_MAJOR_VERSION < 6
-inline namespace cl {
-namespace sycl {
-
-
-class queue;
-class event;
-
-
-}  // namespace sycl
-}  // namespace cl
-#endif
-
-
 /**
  * The enum class is for the dpcpp queue property. It's legal to use a binary
  * or(|) operation to combine several properties.
@@ -172,29 +147,6 @@ GKO_ATTRIBUTES GKO_INLINE dpcpp_queue_property operator|(dpcpp_queue_property a,
 }
 
 
-struct cublasContext;
-
-struct cusparseContext;
-
-struct CUstream_st;
-
-struct CUevent_st;
-
-struct hipblasContext;
-
-struct hipsparseContext;
-
-#if GINKGO_HIP_PLATFORM_HCC
-struct ihipStream_t;
-struct ihipEvent_t;
-#define GKO_HIP_STREAM_STRUCT ihipStream_t
-#define GKO_HIP_EVENT_STRUCT ihipEvent_t
-#else
-#define GKO_HIP_STREAM_STRUCT CUstream_st
-#define GKO_HIP_EVENT_STRUCT CUevent_st
-#endif
-
-
 namespace gko {
 
 
@@ -1355,26 +1307,14 @@ class EnableDeviceReset {
      *
      * @param device_reset  whether to allow a device reset or not
      */
-    void set_device_reset(bool device_reset) { device_reset_ = device_reset; }
+    void set_device_reset(bool device_reset) {}
 
     /**
      * Returns the current status of the device reset boolean for this executor.
      *
      * @return the current status of the device reset boolean for this executor.
      */
-    bool get_device_reset() { return device_reset_; }
-
-protected:
-    /**
-     * Instantiate an EnableDeviceReset class
-     *
-     * @param device_reset  the starting device_reset status. Defaults to false.
-     */
-    EnableDeviceReset(bool device_reset = false) : device_reset_{device_reset}
-    {}
-
-private:
-    bool device_reset_{};
+    bool get_device_reset() { return false; }
 };
 
 
@@ -1411,9 +1351,11 @@ class OmpExecutor : public detail::ExecutorBase<OmpExecutor>,
     /**
      * Creates a new OmpExecutor.
      */
-    static std::shared_ptr<OmpExecutor> create()
+    static std::shared_ptr<OmpExecutor> create(
+        std::shared_ptr<CpuAllocatorBase> alloc =
+            std::make_shared<CpuAllocator>())
     {
-        return std::shared_ptr<OmpExecutor>(new OmpExecutor());
+        return std::shared_ptr<OmpExecutor>(new OmpExecutor(std::move(alloc)));
     }
 
     std::shared_ptr<Executor> get_master() noexcept override;
@@ -1435,7 +1377,8 @@ class OmpExecutor : public detail::ExecutorBase<OmpExecutor>,
     scoped_device_id_guard get_scoped_device_id_guard() const override;
 
 protected:
-    OmpExecutor()
+    OmpExecutor(std::shared_ptr<CpuAllocatorBase> alloc)
+        : alloc_{std::move(alloc)}
     {
         this->OmpExecutor::populate_exec_info(machine_topology::get_instance());
     }
@@ -1457,6 +1400,8 @@ class OmpExecutor : public detail::ExecutorBase<OmpExecutor>,
     GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(CudaExecutor, false);
 
     bool verify_memory_to(const DpcppExecutor* dest_exec) const override;
+
+    std::shared_ptr<CpuAllocatorBase> alloc_;
 };
 
 
@@ -1476,9 +1421,12 @@ using DefaultExecutor = OmpExecutor;
  */
 class ReferenceExecutor : public OmpExecutor {
 public:
-    static std::shared_ptr<ReferenceExecutor> create()
+    static std::shared_ptr<ReferenceExecutor> create(
+        std::shared_ptr<CpuAllocatorBase> alloc =
+            std::make_shared<CpuAllocator>())
     {
-        return std::shared_ptr<ReferenceExecutor>(new ReferenceExecutor());
+        return std::shared_ptr<ReferenceExecutor>(
+            new ReferenceExecutor(std::move(alloc)));
     }
 
     scoped_device_id_guard get_scoped_device_id_guard() const override
@@ -1495,7 +1443,8 @@ class ReferenceExecutor : public OmpExecutor {
     }
 
 protected:
-    ReferenceExecutor()
+    ReferenceExecutor(std::shared_ptr<CpuAllocatorBase> alloc)
+        : OmpExecutor{std::move(alloc)}
     {
         this->ReferenceExecutor::populate_exec_info(
             machine_topology::get_instance());
@@ -1550,15 +1499,32 @@ class CudaExecutor : public detail::ExecutorBase<CudaExecutor>,
      * @param device_id  the CUDA device id of this device
      * @param master  an executor on the host that is used to invoke the device
      * kernels
-     * @param device_reset  whether to reset the device after the object exits
-     *                      the scope.
+     * @param device_reset  this option no longer has any effect.
      * @param alloc_mode  the allocation mode that the executor should operate
      *                    on. See @allocation_mode for more details
+     * @param stream  the stream to execute operations on.
+     */
+    [[deprecated(
+        "device_reset is deprecated entirely, call cudaDeviceReset directly. "
+        "alloc_mode was replaced by the Allocator type "
+        "hierarchy.")]] static std::shared_ptr<CudaExecutor>
+    create(int device_id, std::shared_ptr<Executor> master, bool device_reset,
+           allocation_mode alloc_mode = default_cuda_alloc_mode,
+           CUstream_st* stream = nullptr);
+
+    /**
+     * Creates a new CudaExecutor with a custom allocator and device stream.
+     *
+     * @param device_id  the CUDA device id of this device
+     * @param master  an executor on the host that is used to invoke the device
+     *                kernels.
+     * @param alloc  the allocator to use for device memory allocations.
+     * @param stream  the stream to execute operations on.
      */
     static std::shared_ptr<CudaExecutor> create(
         int device_id, std::shared_ptr<Executor> master,
-        bool device_reset = false,
-        allocation_mode alloc_mode = default_cuda_alloc_mode,
+        std::shared_ptr<CudaAllocatorBase> alloc =
+            std::make_shared<CudaAllocator>(),
         CUstream_st* stream = nullptr);
 
     std::shared_ptr<Executor> get_master() noexcept override;
@@ -1679,26 +1645,15 @@ class CudaExecutor : public detail::ExecutorBase<CudaExecutor>,
     void init_handles();
 
     CudaExecutor(int device_id, std::shared_ptr<Executor> master,
-                 bool device_reset = false,
-                 allocation_mode alloc_mode = default_cuda_alloc_mode,
-                 CUstream_st* stream = nullptr)
-        : EnableDeviceReset{device_reset},
-          master_(master),
-          alloc_mode_{alloc_mode},
-          stream_{stream}
+                 std::shared_ptr<CudaAllocatorBase> alloc, CUstream_st* stream)
+        : alloc_{std::move(alloc)}, master_(master), stream_{stream}
     {
         this->get_exec_info().device_id = device_id;
         this->get_exec_info().num_computing_units = 0;
         this->get_exec_info().num_pu_per_cu = 0;
         this->CudaExecutor::populate_exec_info(
             machine_topology::get_instance());
-
-        // it only gets attribute from device, so it should not be affected by
-        // DeviceReset.
         this->set_gpu_property();
-        // increase the number of executor before any operations may be affected
-        // by DeviceReset.
-        increase_num_execs(this->get_exec_info().device_id);
         this->init_handles();
     }
 
@@ -1718,12 +1673,6 @@ class CudaExecutor : public detail::ExecutorBase<CudaExecutor>,
 
     bool verify_memory_to(const CudaExecutor* dest_exec) const override;
 
-    static void increase_num_execs(unsigned device_id);
-
-    static void decrease_num_execs(unsigned device_id);
-
-    static unsigned get_num_execs(unsigned device_id);
-
     void populate_exec_info(const machine_topology* mach_topo) override;
 
 private:
@@ -1733,45 +1682,8 @@ class CudaExecutor : public detail::ExecutorBase<CudaExecutor>,
     using handle_manager = std::unique_ptr<T, std::function<void(T*)>>;
     handle_manager<cublasContext> cublas_handle_;
     handle_manager<cusparseContext> cusparse_handle_;
+    std::shared_ptr<CudaAllocatorBase> alloc_;
     CUstream_st* stream_;
-
-    allocation_mode alloc_mode_;
-};
-
-
-/**
- * An RAII wrapper for a custom CUDA stream.
- * The stream will be created on construction and destroyed when the lifetime of
- * the wrapper ends.
- */
-class cuda_stream {
-public:
-    /** Creates a new custom CUDA stream. */
-    cuda_stream(int device_id = 0);
-
-    /** Destroys the custom CUDA stream, if it wasn't moved-from already. */
-    ~cuda_stream();
-
-    cuda_stream(const cuda_stream&) = delete;
-
-    /** Move-constructs from an existing stream, which will be emptied. */
-    cuda_stream(cuda_stream&&);
-
-    cuda_stream& operator=(const cuda_stream&) = delete;
-
-    /** Move-assigns from an existing stream, which will be emptied. */
-    cuda_stream& operator=(cuda_stream&&) = delete;
-
-    /**
-     * Returns the native CUDA stream handle.
-     * In a moved-from cuda_stream, this will return nullptr.
-     */
-    CUstream_st* get() const;
-
-private:
-    CUstream_st* stream_;
-
-    int device_id_;
 };
 
 
@@ -1805,10 +1717,15 @@ class HipExecutor : public detail::ExecutorBase<HipExecutor>,
      * @param alloc_mode  the allocation mode that the executor should operate
      *                    on. See @allocation_mode for more details
      */
+    [[deprecated("")]] static std::shared_ptr<HipExecutor> create(
+        int device_id, std::shared_ptr<Executor> master, bool device_reset,
+        allocation_mode alloc_mode = default_hip_alloc_mode,
+        GKO_HIP_STREAM_STRUCT* stream = nullptr);
+
     static std::shared_ptr<HipExecutor> create(
         int device_id, std::shared_ptr<Executor> master,
-        bool device_reset = false,
-        allocation_mode alloc_mode = default_hip_alloc_mode,
+        std::shared_ptr<HipAllocatorBase> alloc =
+            std::make_shared<HipAllocator>(),
         GKO_HIP_STREAM_STRUCT* stream = nullptr);
 
     std::shared_ptr<Executor> get_master() noexcept override;
@@ -1923,25 +1840,15 @@ class HipExecutor : public detail::ExecutorBase<HipExecutor>,
     void init_handles();
 
     HipExecutor(int device_id, std::shared_ptr<Executor> master,
-                bool device_reset = false,
-                allocation_mode alloc_mode = default_hip_alloc_mode,
-                GKO_HIP_STREAM_STRUCT* stream = nullptr)
-        : EnableDeviceReset{device_reset},
-          master_(master),
-          alloc_mode_(alloc_mode),
-          stream_{stream}
+                std::shared_ptr<HipAllocatorBase> alloc,
+                GKO_HIP_STREAM_STRUCT* stream)
+        : master_{std::move(master)}, alloc_{std::move(alloc)}, stream_{stream}
     {
         this->get_exec_info().device_id = device_id;
         this->get_exec_info().num_computing_units = 0;
         this->get_exec_info().num_pu_per_cu = 0;
         this->HipExecutor::populate_exec_info(machine_topology::get_instance());
-
-        // it only gets attribute from device, so it should not be affected by
-        // DeviceReset.
         this->set_gpu_property();
-        // increase the number of executor before any operations may be affected
-        // by DeviceReset.
-        increase_num_execs(this->get_exec_info().device_id);
         this->init_handles();
     }
 
@@ -1961,12 +1868,6 @@ class HipExecutor : public detail::ExecutorBase<HipExecutor>,
 
     bool verify_memory_to(const HipExecutor* dest_exec) const override;
 
-    static void increase_num_execs(int device_id);
-
-    static void decrease_num_execs(int device_id);
-
-    static int get_num_execs(int device_id);
-
     void populate_exec_info(const machine_topology* mach_topo) override;
 
 private:
@@ -1976,48 +1877,11 @@ class HipExecutor : public detail::ExecutorBase<HipExecutor>,
     using handle_manager = std::unique_ptr<T, std::function<void(T*)>>;
     handle_manager<hipblasContext> hipblas_handle_;
     handle_manager<hipsparseContext> hipsparse_handle_;
-
-    allocation_mode alloc_mode_;
+    std::shared_ptr<HipAllocatorBase> alloc_;
     GKO_HIP_STREAM_STRUCT* stream_;
 };
 
 
-/**
- * An RAII wrapper for a custom HIP stream.
- * The stream will be created on construction and destroyed when the lifetime of
- * the wrapper ends.
- */
-class hip_stream {
-public:
-    /** Creates a new custom HIP stream. */
-    hip_stream(int device_id = 0);
-
-    /** Destroys the custom HIP stream, if it wasn't moved-from already. */
-    ~hip_stream();
-
-    hip_stream(const hip_stream&) = delete;
-
-    /** Move-constructs from an existing stream, which will be emptied. */
-    hip_stream(hip_stream&&);
-
-    hip_stream& operator=(const hip_stream&) = delete;
-
-    /** Move-assigns from an existing stream, which will be emptied. */
-    hip_stream& operator=(hip_stream&&) = delete;
-
-    /**
-     * Returns the native HIP stream handle.
-     * In a moved-from hip_stream, this will return nullptr.
-     */
-    GKO_HIP_STREAM_STRUCT* get() const;
-
-private:
-    GKO_HIP_STREAM_STRUCT* stream_;
-
-    int device_id_;
-};
-
-
 namespace kernels {
 namespace hip {
 using DefaultExecutor = HipExecutor;
@@ -2050,6 +1914,28 @@ class DpcppExecutor : public detail::ExecutorBase<DpcppExecutor>,
         std::string device_type = "all",
         dpcpp_queue_property property = dpcpp_queue_property::in_order);
 
+    /**
+     * Creates a new DpcppExecutor from an existing SYCL queue.
+     *
+     * @param queue  the DPCPP device id of this device
+     * @param master  an executor on the host that is used to invoke the device
+     *                kernels
+     */
+    static std::shared_ptr<DpcppExecutor> create(
+        sycl::queue* queue, std::shared_ptr<Executor> master);
+
+    /**
+     * Creates a new DpcppExecutor from an existing SYCL queue.
+     *
+     * @param queue  the DPCPP device id of this device
+     * @param master  an executor on the host that is used to invoke the device
+     *                kernels
+     * @param alloc  the allocator used for memory allocation
+     */
+    static std::shared_ptr<DpcppExecutor> create(
+        sycl::queue* queue, std::shared_ptr<Executor> master,
+        std::shared_ptr<DpcppAllocatorBase> alloc);
+
     std::shared_ptr<Executor> get_master() noexcept override;
 
     std::shared_ptr<const Executor> get_master() const noexcept override;
diff --git a/include/ginkgo/core/base/fwd_defs.hpp b/include/ginkgo/core/base/fwd_defs.hpp
new file mode 100644
index 00000000000..5f0cbd9d960
--- /dev/null
+++ b/include/ginkgo/core/base/fwd_defs.hpp
@@ -0,0 +1,90 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_PUBLIC_CORE_BASE_FWD_DEFS_HPP_
+#define GKO_PUBLIC_CORE_BASE_FWD_DEFS_HPP_
+
+
+#include <ginkgo/config.hpp>
+
+
+struct cublasContext;
+
+struct cusparseContext;
+
+struct CUstream_st;
+
+struct CUevent_st;
+
+struct hipblasContext;
+
+struct hipsparseContext;
+
+#if GINKGO_HIP_PLATFORM_HCC
+struct ihipStream_t;
+struct ihipEvent_t;
+#define GKO_HIP_STREAM_STRUCT ihipStream_t
+#define GKO_HIP_EVENT_STRUCT ihipEvent_t
+#else
+#define GKO_HIP_STREAM_STRUCT CUstream_st
+#define GKO_HIP_EVENT_STRUCT CUevent_st
+#endif
+
+
+// after intel/llvm September'22 release, which uses major version 6, they
+// introduce another inline namespace _V1.
+#if GINKGO_DPCPP_MAJOR_VERSION >= 6
+namespace sycl {
+inline namespace _V1 {
+
+
+class queue;
+class event;
+
+
+}  // namespace _V1
+}  // namespace sycl
+#else  // GINKGO_DPCPP_MAJOR_VERSION < 6
+inline namespace cl {
+namespace sycl {
+
+
+class queue;
+class event;
+
+
+}  // namespace sycl
+}  // namespace cl
+#endif
+
+
+#endif  // GKO_PUBLIC_CORE_BASE_FWD_DEFS_HPP_
diff --git a/include/ginkgo/core/base/memory.hpp b/include/ginkgo/core/base/memory.hpp
new file mode 100644
index 00000000000..ec25920dcea
--- /dev/null
+++ b/include/ginkgo/core/base/memory.hpp
@@ -0,0 +1,211 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_PUBLIC_CORE_BASE_MEMORY_HPP_
+#define GKO_PUBLIC_CORE_BASE_MEMORY_HPP_
+
+
+#include <ginkgo/core/base/fwd_defs.hpp>
+#include <ginkgo/core/base/types.hpp>
+
+
+namespace gko {
+
+
+/**
+ * Provides generic allocation and deallocation functionality to be used by an
+ * Executor.
+ */
+class Allocator {
+public:
+    virtual ~Allocator() = default;
+
+    virtual void* allocate(size_type num_bytes) const = 0;
+
+    virtual void deallocate(void* ptr) const = 0;
+};
+
+
+/**
+ * Implement this interface to provide an allocator for OmpExecutor or
+ * ReferenceExecutor.
+ */
+class CpuAllocatorBase : public Allocator {};
+
+
+/**
+ * Implement this interface to provide an allocator for CudaExecutor.
+ */
+class CudaAllocatorBase : public Allocator {};
+
+
+/**
+ * Implement this interface to provide an allocator for HipExecutor.
+ */
+class HipAllocatorBase : public Allocator {};
+
+
+/**
+ * Implement this interface to provide an allocator for DpcppExecutor.
+ */
+class DpcppAllocatorBase : public Allocator {
+public:
+    DpcppAllocatorBase(sycl::queue* queue);
+
+protected:
+    virtual void* allocate_impl(sycl::queue* queue,
+                                size_type num_bytes) const = 0;
+
+    virtual void deallocate_impl(sycl::queue* queue, void* ptr) const = 0;
+
+private:
+    sycl::queue* queue_;
+};
+
+
+/**
+ * Allocator using new/delete.
+ */
+class CpuAllocator : public CpuAllocatorBase {
+public:
+    void* allocate(size_type num_bytes) const override;
+
+    void deallocate(void* ptr) const override;
+};
+
+
+/**
+ * Allocator using cudaMalloc.
+ */
+class CudaAllocator : public CudaAllocatorBase {
+public:
+    void* allocate(size_type num_bytes) const override;
+
+    void deallocate(void* ptr) const override;
+};
+
+
+/*
+ * Allocator using cudaMallocAsync.
+ */
+class CudaAsyncAllocator : public CudaAllocatorBase {
+public:
+    void* allocate(size_type num_bytes) const override;
+
+    void deallocate(void* ptr) const override;
+
+    CudaAsyncAllocator(CUstream_st* stream);
+
+private:
+    CUstream_st* stream_;
+};
+
+
+/*
+ * Allocator using cudaMallocManaged
+ */
+class CudaUnifiedAllocator : public CudaAllocatorBase, public CpuAllocatorBase {
+public:
+    void* allocate(size_type num_bytes) const override;
+
+    void deallocate(void* ptr) const override;
+
+    CudaUnifiedAllocator(int device_id);
+
+    CudaUnifiedAllocator(int device_id, unsigned int flags);
+
+private:
+    int device_id_;
+    unsigned int flags_;
+};
+
+
+/*
+ * Allocator using cudaMallocHost.
+ */
+class CudaHostAllocator : public CudaAllocatorBase, public CpuAllocatorBase {
+public:
+    void* allocate(size_type num_bytes) const override;
+
+    void deallocate(void* ptr) const override;
+
+    CudaHostAllocator(int device_id);
+
+private:
+    int device_id_;
+};
+
+
+/*
+ * Allocator using hipMalloc.
+ */
+class HipAllocator : public HipAllocatorBase {
+public:
+    void* allocate(size_type num_bytes) const override;
+
+    void deallocate(void* ptr) const override;
+};
+
+
+/*
+ * Allocator using sycl::malloc_device.
+ */
+class DpcppAllocator : public DpcppAllocatorBase {
+public:
+    using DpcppAllocatorBase::DpcppAllocatorBase;
+
+protected:
+    void* allocate_impl(sycl::queue* queue, size_type num_bytes) const override;
+
+    void deallocate_impl(sycl::queue* queue, void* ptr) const override;
+};
+
+
+/*
+ * Allocator using sycl::malloc_shared.
+ */
+class DpcppUnifiedAllocator : public DpcppAllocatorBase,
+                              public CpuAllocatorBase {
+public:
+    using DpcppAllocatorBase::DpcppAllocatorBase;
+
+protected:
+    void* allocate_impl(sycl::queue* queue, size_type num_bytes) const override;
+
+    void deallocate_impl(sycl::queue* queue, void* ptr) const override;
+};
+
+
+}  // namespace gko
+
+
+#endif  // GKO_PUBLIC_CORE_BASE_MEMORY_HPP_
diff --git a/include/ginkgo/core/base/stream.hpp b/include/ginkgo/core/base/stream.hpp
new file mode 100644
index 00000000000..4bb4aeecf9e
--- /dev/null
+++ b/include/ginkgo/core/base/stream.hpp
@@ -0,0 +1,124 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_PUBLIC_CORE_BASE_STREAM_HPP_
+#define GKO_PUBLIC_CORE_BASE_STREAM_HPP_
+
+
+#include <ginkgo/core/base/executor.hpp>
+
+
+namespace gko {
+
+
+/**
+ * An RAII wrapper for a custom CUDA stream.
+ * The stream will be created on construction and destroyed when the lifetime of
+ * the wrapper ends.
+ */
+class cuda_stream {
+public:
+    /** Creates an empty stream wrapper, representing the default stream. */
+    cuda_stream();
+
+    /** Creates a new custom CUDA stream. */
+    cuda_stream(int device_id);
+
+    /** Destroys the custom CUDA stream, if it isn't empty. */
+    ~cuda_stream();
+
+    cuda_stream(const cuda_stream&) = delete;
+
+    /** Move-constructs from an existing stream, which will be emptied. */
+    cuda_stream(cuda_stream&&);
+
+    cuda_stream& operator=(const cuda_stream&) = delete;
+
+    /** Move-assigns from an existing stream, which will be emptied. */
+    cuda_stream& operator=(cuda_stream&&) = delete;
+
+    /**
+     * Returns the native CUDA stream handle.
+     * In an empty cuda_stream, this will return nullptr.
+     */
+    CUstream_st* get() const;
+
+private:
+    CUstream_st* stream_;
+
+    int device_id_;
+};
+
+
+/**
+ * An RAII wrapper for a custom HIP stream.
+ * The stream will be created on construction and destroyed when the lifetime of
+ * the wrapper ends.
+ */
+class hip_stream {
+public:
+    /** Creates an empty stream wrapper, representing the default stream. */
+    hip_stream();
+
+    /** Creates a new custom HIP stream. */
+    hip_stream(int device_id);
+
+    /** Destroys the custom HIP stream, if it isn't empty. */
+    ~hip_stream();
+
+    hip_stream(const hip_stream&) = delete;
+
+    /** Move-constructs from an existing stream, which will be emptied. */
+    hip_stream(hip_stream&&);
+
+    hip_stream& operator=(const hip_stream&) = delete;
+
+    /** Move-assigns from an existing stream, which will be emptied. */
+    hip_stream& operator=(hip_stream&&) = delete;
+
+    /**
+     * Returns the native HIP stream handle.
+     * In an empty hip_stream, this will return nullptr.
+     */
+    GKO_HIP_STREAM_STRUCT* get() const;
+
+private:
+    GKO_HIP_STREAM_STRUCT* stream_;
+
+    int device_id_;
+};
+
+
+}  // namespace gko
+
+
+#endif  // GKO_PUBLIC_CORE_BASE_EXECUTOR_HPP_
diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp
index 93663b02290..d73bf669700 100644
--- a/include/ginkgo/ginkgo.hpp
+++ b/include/ginkgo/ginkgo.hpp
@@ -48,6 +48,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/exception.hpp>
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/fwd_defs.hpp>
 #include <ginkgo/core/base/index_set.hpp>
 #include <ginkgo/core/base/intrinsics.hpp>
 #include <ginkgo/core/base/lin_op.hpp>
@@ -55,6 +56,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/base/matrix_assembly_data.hpp>
 #include <ginkgo/core/base/matrix_data.hpp>
+#include <ginkgo/core/base/memory.hpp>
 #include <ginkgo/core/base/mpi.hpp>
 #include <ginkgo/core/base/mtx_io.hpp>
 #include <ginkgo/core/base/name_demangling.hpp>
@@ -65,6 +67,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/range_accessors.hpp>
 #include <ginkgo/core/base/scoped_device_id_guard.hpp>
 #include <ginkgo/core/base/std_extensions.hpp>
+#include <ginkgo/core/base/stream.hpp>
 #include <ginkgo/core/base/temporary_clone.hpp>
 #include <ginkgo/core/base/temporary_conversion.hpp>
 #include <ginkgo/core/base/timer.hpp>
diff --git a/test/utils/executor.hpp b/test/utils/executor.hpp
index 25482cf18c8..33e6258fbbd 100644
--- a/test/utils/executor.hpp
+++ b/test/utils/executor.hpp
@@ -44,6 +44,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <gtest/gtest.h>
 
 
+#include <ginkgo/core/base/stream.hpp>
+
+
 #ifdef GKO_COMPILING_CUDA
 
 #include "cuda/base/device.hpp"
@@ -106,8 +109,8 @@ inline void init_executor(std::shared_ptr<gko::ReferenceExecutor> ref,
         if (gko::CudaExecutor::get_num_devices() == 0) {
             throw std::runtime_error{"No suitable CUDA devices"};
         }
-        exec = gko::CudaExecutor::create(0, ref, false,
-                                         gko::default_cuda_alloc_mode, stream);
+        exec = gko::CudaExecutor::create(
+            0, ref, std::make_shared<gko::CudaAsyncAllocator>(stream), stream);
     }
 }
 
@@ -119,8 +122,8 @@ inline void init_executor(std::shared_ptr<gko::ReferenceExecutor> ref,
     if (gko::HipExecutor::get_num_devices() == 0) {
         throw std::runtime_error{"No suitable HIP devices"};
     }
-    exec = gko::HipExecutor::create(0, ref, false, gko::default_hip_alloc_mode,
-                                    stream);
+    exec = gko::HipExecutor::create(
+        0, ref, std::make_shared<gko::HipAllocator>(), stream);
 }
 
 
@@ -146,7 +149,13 @@ class CommonTestFixture : public ::testing::Test {
 #endif
     using index_type = int;
 
-    CommonTestFixture() : ref{gko::ReferenceExecutor::create()}
+    CommonTestFixture()
+        :
+#if defined(GKO_TEST_NONDEFAULT_STREAM) && \
+    (defined(GKO_COMPILING_CUDA) || defined(GKO_COMPILING_HIP))
+          stream{0},
+#endif
+          ref{gko::ReferenceExecutor::create()}
     {
 #if defined(GKO_TEST_NONDEFAULT_STREAM) && \
     (defined(GKO_COMPILING_CUDA) || defined(GKO_COMPILING_HIP))
diff --git a/test/utils/mpi/executor.hpp b/test/utils/mpi/executor.hpp
index 59c3f1e3f3b..d8c94e01804 100644
--- a/test/utils/mpi/executor.hpp
+++ b/test/utils/mpi/executor.hpp
@@ -44,6 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/mpi.hpp>
+#include <ginkgo/core/base/stream.hpp>
 
 
 inline void init_executor(std::shared_ptr<gko::ReferenceExecutor>,
@@ -71,7 +72,7 @@ inline void init_executor(std::shared_ptr<gko::ReferenceExecutor> ref,
         exec = gko::CudaExecutor::create(
             gko::experimental::mpi::map_rank_to_device_id(
                 MPI_COMM_WORLD, gko::CudaExecutor::get_num_devices()),
-            ref, false, gko::default_cuda_alloc_mode, stream);
+            ref, std::make_shared<gko::CudaAllocator>(), stream);
     }
 }
 
@@ -86,7 +87,7 @@ inline void init_executor(std::shared_ptr<gko::ReferenceExecutor> ref,
     exec = gko::HipExecutor::create(
         gko::experimental::mpi::map_rank_to_device_id(
             MPI_COMM_WORLD, gko::HipExecutor::get_num_devices()),
-        ref, false, gko::default_hip_alloc_mode, stream);
+        ref, std::make_shared<gko::HipAllocator>(), stream);
 }
 
 

From 5dfebba3cbe3495ae150bc5b902cb25851a6796d Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Thu, 30 Mar 2023 12:23:58 +0200
Subject: [PATCH 051/583] reset to default CUDA allocator

---
 test/utils/executor.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/utils/executor.hpp b/test/utils/executor.hpp
index 33e6258fbbd..c588ac74260 100644
--- a/test/utils/executor.hpp
+++ b/test/utils/executor.hpp
@@ -110,7 +110,7 @@ inline void init_executor(std::shared_ptr<gko::ReferenceExecutor> ref,
             throw std::runtime_error{"No suitable CUDA devices"};
         }
         exec = gko::CudaExecutor::create(
-            0, ref, std::make_shared<gko::CudaAsyncAllocator>(stream), stream);
+            0, ref, std::make_shared<gko::CudaAllocator>(stream), stream);
     }
 }
 

From cd7cfc224321bd1013161b92226ce905e4b4d16d Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Thu, 30 Mar 2023 16:18:42 +0200
Subject: [PATCH 052/583] fix some compilation issues

---
 core/device_hooks/cuda_hooks.cpp    | 12 ++++++++++-
 core/device_hooks/dpcpp_hooks.cpp   | 15 +++++++++++--
 core/device_hooks/hip_hooks.cpp     | 25 +++++++++-------------
 cuda/base/memory.cpp                | 28 ++++++++++++++++++++++++
 dpcpp/CMakeLists.txt                |  1 +
 dpcpp/base/executor.dp.cpp          | 33 -----------------------------
 dpcpp/base/memory.dp.cpp            | 25 ++++++++++++++++------
 include/ginkgo/core/base/memory.hpp |  4 ++++
 8 files changed, 86 insertions(+), 57 deletions(-)

diff --git a/core/device_hooks/cuda_hooks.cpp b/core/device_hooks/cuda_hooks.cpp
index cdecf735a9d..f8489908cc9 100644
--- a/core/device_hooks/cuda_hooks.cpp
+++ b/core/device_hooks/cuda_hooks.cpp
@@ -96,9 +96,19 @@ void CudaHostAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(cuda);
 std::shared_ptr<CudaExecutor> CudaExecutor::create(
     int device_id, std::shared_ptr<Executor> master, bool device_reset,
     allocation_mode alloc_mode, CUstream_st* stream)
+{
+    return std::shared_ptr<CudaExecutor>(
+        new CudaExecutor(device_id, std::move(master),
+                         std::make_shared<CudaAllocator>(), stream));
+}
+
+
+std::shared_ptr<CudaExecutor> CudaExecutor::create(
+    int device_id, std::shared_ptr<Executor> master,
+    std::shared_ptr<CudaAllocatorBase> alloc, CUstream_st* stream)
 {
     return std::shared_ptr<CudaExecutor>(new CudaExecutor(
-        device_id, std::move(master), device_reset, alloc_mode, stream));
+        device_id, std::move(master), std::move(alloc), stream));
 }
 
 
diff --git a/core/device_hooks/dpcpp_hooks.cpp b/core/device_hooks/dpcpp_hooks.cpp
index 0ee3e6f289f..1981c712872 100644
--- a/core/device_hooks/dpcpp_hooks.cpp
+++ b/core/device_hooks/dpcpp_hooks.cpp
@@ -53,7 +53,18 @@ version version_info::get_dpcpp_version() noexcept
 }
 
 
-void* DpcppAllocator::allocate_impl(sycl::queue* queue, size_type size) const
+DpcppAllocatorBase::DpcppAllocatorBase(sycl::queue*) GKO_NOT_COMPILED(dpcpp);
+
+
+void* DpcppAllocatorBase::allocate(size_type num_bytes) const
+    GKO_NOT_COMPILED(dpcpp);
+
+
+void DpcppAllocatorBase::deallocate(void* ptr) const GKO_NOT_COMPILED(dpcpp);
+
+
+void* DpcppAllocator::allocate_impl(sycl::queue* queue,
+                                    size_type num_bytes) const
     GKO_NOT_COMPILED(dpcpp);
 
 
@@ -62,7 +73,7 @@ void DpcppAllocator::deallocate_impl(sycl::queue* queue, void* ptr) const
 
 
 void* DpcppUnifiedAllocator::allocate_impl(sycl::queue* queue,
-                                           size_type size) const
+                                           size_type num_bytes) const
     GKO_NOT_COMPILED(dpcpp);
 
 
diff --git a/core/device_hooks/hip_hooks.cpp b/core/device_hooks/hip_hooks.cpp
index 739dac39f08..54486cc4e74 100644
--- a/core/device_hooks/hip_hooks.cpp
+++ b/core/device_hooks/hip_hooks.cpp
@@ -36,10 +36,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/stream.hpp>
 #include <ginkgo/core/base/timer.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/base/version.hpp>
 #include <ginkgo/core/log/profiler_hook.hpp>
+#include "ginkgo/core/base/memory.hpp"
 
 
 namespace gko {
@@ -53,29 +55,22 @@ version version_info::get_hip_version() noexcept
 }
 
 
-void* HipAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip);
+void* HipAllocator::allocate(size_type num_bytes) const GKO_NOT_COMPILED(hip);
 
 
-void HipAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip);
+void HipAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(hip);
 
 
-HipAsyncAllocator::HipAsyncAllocator(GKO_HIP_STREAM_STRUCT* stream)
+std::shared_ptr<HipExecutor> HipExecutor::create(
+    int device_id, std::shared_ptr<Executor> master, bool device_reset,
+    allocation_mode alloc_mode, GKO_HIP_STREAM_STRUCT* stream)
     GKO_NOT_COMPILED(hip);
 
 
-void* HipAsyncAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip);
-
-
-void HipAsyncAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip);
-
-
 std::shared_ptr<HipExecutor> HipExecutor::create(
-    int device_id, std::shared_ptr<Executor> master, bool device_reset,
-    allocation_mode alloc_mode, GKO_HIP_STREAM_STRUCT* stream)
-{
-    return std::shared_ptr<HipExecutor>(new HipExecutor(
-        device_id, std::move(master), device_reset, alloc_mode, stream));
-}
+    int device_id, std::shared_ptr<Executor> master,
+    std::shared_ptr<HipAllocatorBase> alloc, GKO_HIP_STREAM_STRUCT* stream)
+    GKO_NOT_COMPILED(hip);
 
 
 void HipExecutor::populate_exec_info(const machine_topology* mach_topo)
diff --git a/cuda/base/memory.cpp b/cuda/base/memory.cpp
index 11dee81ad42..c1b0a5d517f 100644
--- a/cuda/base/memory.cpp
+++ b/cuda/base/memory.cpp
@@ -97,6 +97,9 @@ void CudaAllocator::deallocate(void* ptr) const
 }
 
 
+#if CUDA_VERSION >= 11020
+
+
 CudaAsyncAllocator::CudaAsyncAllocator(cudaStream_t stream) : stream_{stream} {}
 
 
@@ -108,12 +111,37 @@ void* CudaAsyncAllocator::allocate(size_type num_bytes) const
     return ptr;
 }
 
+
 void CudaAsyncAllocator::deallocate(void* ptr) const
 {
     GKO_EXIT_ON_CUDA_ERROR(cudaFreeAsync(ptr, stream_));
 }
 
 
+#else  // Fall back to regular allocation
+
+
+CudaAsyncAllocator::CudaAsyncAllocator(cudaStream_t stream) : stream_{} {}
+
+
+void* CudaAsyncAllocator::allocate(size_type num_bytes) const
+{
+    void* ptr{};
+    GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS(cudaMalloc(&ptr, num_bytes),
+                                         num_bytes);
+    return ptr;
+}
+
+
+void CudaAsyncAllocator::deallocate(void* ptr) const
+{
+    GKO_EXIT_ON_CUDA_ERROR(cudaFree(ptr));
+}
+
+
+#endif
+
+
 CudaUnifiedAllocator::CudaUnifiedAllocator(int device_id)
     : CudaUnifiedAllocator{device_id, cudaMemAttachGlobal}
 {}
diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt
index 31b5e0543ba..55763ca5525 100644
--- a/dpcpp/CMakeLists.txt
+++ b/dpcpp/CMakeLists.txt
@@ -10,6 +10,7 @@ target_sources(ginkgo_dpcpp
     base/executor.dp.cpp
     base/helper.dp.cpp
     base/index_set_kernels.dp.cpp
+    base/memory.dp.cpp
     base/scoped_device_id.dp.cpp
     base/timer.dp.cpp
     base/version.dp.cpp
diff --git a/dpcpp/base/executor.dp.cpp b/dpcpp/base/executor.dp.cpp
index d668331a43b..c2015c8664c 100644
--- a/dpcpp/base/executor.dp.cpp
+++ b/dpcpp/base/executor.dp.cpp
@@ -51,39 +51,6 @@ namespace gko {
 namespace detail {
 
 
-DpcppAllocator::DpcppAllocator(sycl::queue* queue) : queue_{queue} {}
-
-
-void* DpcppAllocator::allocate(size_type size)
-{
-    return sycl::malloc_device(size, *queue_);
-}
-
-
-void DpcppAllocator::deallocate(void* ptr)
-{
-    queue_->wait_and_throw();
-    sycl::free(ptr, queue_->get_context());
-}
-
-
-DpcppUnifiedAllocator::DpcppUnifiedAllocator(sycl::queue* queue) : queue_{queue}
-{}
-
-
-void* DpcppUnifiedAllocator::allocate(size_type size)
-{
-    return sycl::malloc_shared(size, *queue_);
-}
-
-
-void DpcppUnifiedAllocator::deallocate(void* ptr)
-{
-    queue_->wait_and_throw();
-    sycl::free(ptr, queue_->get_context());
-}
-
-
 const std::vector<sycl::device> get_devices(std::string device_type)
 {
     std::map<std::string, sycl::info::device_type> device_type_map{
diff --git a/dpcpp/base/memory.dp.cpp b/dpcpp/base/memory.dp.cpp
index b1ccd007dea..2582fa331a0 100644
--- a/dpcpp/base/memory.dp.cpp
+++ b/dpcpp/base/memory.dp.cpp
@@ -42,10 +42,22 @@ namespace gko {
 DpcppAllocatorBase::DpcppAllocatorBase(sycl::queue* queue) : queue_{queue} {}
 
 
+void* DpcppAllocatorBase::allocate(size_type num_bytes) const
+{
+    return this->allocate_impl(queue_, num_bytes);
+}
+
+
+void DpcppAllocatorBase::deallocate(void* ptr) const
+{
+    this->deallocate_impl(queue_, ptr);
+}
+
+
 void* DpcppAllocator::allocate_impl(sycl::queue* queue,
                                     size_type num_bytes) const
 {
-    return sycl::malloc_device(size, *queue);
+    return sycl::malloc_device(num_bytes, *queue);
 }
 
 
@@ -56,16 +68,17 @@ void DpcppAllocator::deallocate_impl(sycl::queue* queue, void* ptr) const
 }
 
 
-void* DpcppUnifiedAllocator::allocate(size_type num_bytes)
+void* DpcppUnifiedAllocator::allocate_impl(sycl::queue* queue,
+                                           size_type num_bytes)
 {
-    return sycl::malloc_shared(size, *queue_);
+    return sycl::malloc_shared(num_bytes, *queue);
 }
 
 
-void DpcppUnifiedAllocator::deallocate(void* ptr)
+void DpcppUnifiedAllocator::deallocate_impl(sycl::queue* queue, void* ptr)
 {
-    queue_->wait_and_throw();
-    sycl::free(ptr, queue_->get_context());
+    queue->wait_and_throw();
+    sycl::free(ptr, queue->get_context());
 }
 
 
diff --git a/include/ginkgo/core/base/memory.hpp b/include/ginkgo/core/base/memory.hpp
index ec25920dcea..872a25a9a33 100644
--- a/include/ginkgo/core/base/memory.hpp
+++ b/include/ginkgo/core/base/memory.hpp
@@ -81,6 +81,10 @@ class DpcppAllocatorBase : public Allocator {
 public:
     DpcppAllocatorBase(sycl::queue* queue);
 
+    void* allocate(size_type num_bytes) const final;
+
+    void deallocate(void* ptr) const final;
+
 protected:
     virtual void* allocate_impl(sycl::queue* queue,
                                 size_type num_bytes) const = 0;

From 223b06b477de4570a27b94d175d0f30219faf1c5 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Wed, 31 May 2023 14:08:44 +0200
Subject: [PATCH 053/583] formatting

---
 core/base/memory.cpp                | 2 +-
 core/device_hooks/hip_hooks.cpp     | 2 +-
 core/test/base/executor.cpp         | 2 +-
 cuda/base/device.cpp                | 4 +---
 cuda/base/memory.cpp                | 2 +-
 cuda/base/nvtx.cpp                  | 4 +++-
 cuda/base/stream.cpp                | 4 +++-
 cuda/test/base/cuda_executor.cu     | 1 -
 hip/base/device.hip.cpp             | 4 ++--
 hip/base/roctx.hip.cpp              | 4 +++-
 include/ginkgo/core/base/stream.hpp | 2 +-
 11 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/core/base/memory.cpp b/core/base/memory.cpp
index 88d97bcc765..4e9f0b7e24a 100644
--- a/core/base/memory.cpp
+++ b/core/base/memory.cpp
@@ -56,4 +56,4 @@ void CpuAllocator::deallocate(void* ptr) const
 }
 
 
-}  // namespace gko
\ No newline at end of file
+}  // namespace gko
diff --git a/core/device_hooks/hip_hooks.cpp b/core/device_hooks/hip_hooks.cpp
index 54486cc4e74..4dbe6409c01 100644
--- a/core/device_hooks/hip_hooks.cpp
+++ b/core/device_hooks/hip_hooks.cpp
@@ -36,12 +36,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/memory.hpp>
 #include <ginkgo/core/base/stream.hpp>
 #include <ginkgo/core/base/timer.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/base/version.hpp>
 #include <ginkgo/core/log/profiler_hook.hpp>
-#include "ginkgo/core/base/memory.hpp"
 
 
 namespace gko {
diff --git a/core/test/base/executor.cpp b/core/test/base/executor.cpp
index 71064cf01d2..94e7bc02d79 100644
--- a/core/test/base/executor.cpp
+++ b/core/test/base/executor.cpp
@@ -35,7 +35,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <thread>
 #include <type_traits>
-#include "ginkgo/core/base/memory.hpp"
 
 
 #if defined(__unix__) || defined(__APPLE__)
@@ -47,6 +46,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/exception.hpp>
+#include <ginkgo/core/base/memory.hpp>
 
 
 namespace {
diff --git a/cuda/base/device.cpp b/cuda/base/device.cpp
index 31ab5bcde63..2db0876ca95 100644
--- a/cuda/base/device.cpp
+++ b/cuda/base/device.cpp
@@ -30,15 +30,13 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include "cuda/base/device.hpp"
-
-
 #include <cuda_runtime.h>
 
 
 #include <ginkgo/core/base/exception_helpers.hpp>
 
 
+#include "cuda/base/device.hpp"
 #include "cuda/base/scoped_device_id.hpp"
 
 
diff --git a/cuda/base/memory.cpp b/cuda/base/memory.cpp
index c1b0a5d517f..afc1f9f62fa 100644
--- a/cuda/base/memory.cpp
+++ b/cuda/base/memory.cpp
@@ -193,4 +193,4 @@ void CudaHostAllocator::deallocate(void* ptr) const
 }
 
 
-}  // namespace gko
\ No newline at end of file
+}  // namespace gko
diff --git a/cuda/base/nvtx.cpp b/cuda/base/nvtx.cpp
index e313c110ea2..3cbc59299b0 100644
--- a/cuda/base/nvtx.cpp
+++ b/cuda/base/nvtx.cpp
@@ -30,10 +30,12 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
+#include <cuda_runtime.h>
+
+
 #include <ginkgo/config.hpp>
 
 
-#include <cuda_runtime.h>
 #ifdef GKO_LEGACY_NVTX
 #include <nvToolsExt.h>
 #else
diff --git a/cuda/base/stream.cpp b/cuda/base/stream.cpp
index 8c6aa92c28b..0bbc9b1cc83 100644
--- a/cuda/base/stream.cpp
+++ b/cuda/base/stream.cpp
@@ -30,11 +30,13 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
+#include <ginkgo/core/base/stream.hpp>
+
+
 #include <cuda_runtime.h>
 
 
 #include <ginkgo/core/base/exception_helpers.hpp>
-#include <ginkgo/core/base/stream.hpp>
 
 
 #include "cuda/base/scoped_device_id.hpp"
diff --git a/cuda/test/base/cuda_executor.cu b/cuda/test/base/cuda_executor.cu
index afb23c06186..c81799e0dae 100644
--- a/cuda/test/base/cuda_executor.cu
+++ b/cuda/test/base/cuda_executor.cu
@@ -44,7 +44,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/stream.hpp>
 
-
 #include "common/cuda_hip/base/executor.hpp.inc"
 #include "cuda/base/scoped_device_id.hpp"
 #include "cuda/test/utils.hpp"
diff --git a/hip/base/device.hip.cpp b/hip/base/device.hip.cpp
index b5ec1bec6d6..9a01d6aacee 100644
--- a/hip/base/device.hip.cpp
+++ b/hip/base/device.hip.cpp
@@ -30,15 +30,15 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include <ginkgo/core/base/stream.hpp>
+#include <ginkgo/core/base/device.hpp>
 
 
 #include <hip/hip_runtime.h>
 
 
 #include <ginkgo/config.hpp>
-#include <ginkgo/core/base/device.hpp>
 #include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/stream.hpp>
 
 
 #include "hip/base/scoped_device_id.hip.hpp"
diff --git a/hip/base/roctx.hip.cpp b/hip/base/roctx.hip.cpp
index 9f309b93362..a01bc11dc47 100644
--- a/hip/base/roctx.hip.cpp
+++ b/hip/base/roctx.hip.cpp
@@ -30,10 +30,12 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
+#include <hip/hip_runtime.h>
+
+
 #include <ginkgo/config.hpp>
 
 
-#include <hip/hip_runtime.h>
 #if GINKGO_HIP_PLATFORM_HCC && GKO_HAVE_ROCTX
 #include <roctx.h>
 #endif
diff --git a/include/ginkgo/core/base/stream.hpp b/include/ginkgo/core/base/stream.hpp
index 4bb4aeecf9e..8ee8333e41a 100644
--- a/include/ginkgo/core/base/stream.hpp
+++ b/include/ginkgo/core/base/stream.hpp
@@ -121,4 +121,4 @@ class hip_stream {
 }  // namespace gko
 
 
-#endif  // GKO_PUBLIC_CORE_BASE_EXECUTOR_HPP_
+#endif  // GKO_PUBLIC_CORE_BASE_STREAM_HPP_

From 19c23eca2d0472d5a003bdcbe026315d14d0a761 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Thu, 1 Jun 2023 12:00:18 +0200
Subject: [PATCH 054/583] fix compilation

---
 core/device_hooks/hip_hooks.cpp | 12 ++++++++++--
 test/utils/executor.hpp         |  2 +-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/core/device_hooks/hip_hooks.cpp b/core/device_hooks/hip_hooks.cpp
index 4dbe6409c01..ba7563f1ef0 100644
--- a/core/device_hooks/hip_hooks.cpp
+++ b/core/device_hooks/hip_hooks.cpp
@@ -64,13 +64,21 @@ void HipAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(hip);
 std::shared_ptr<HipExecutor> HipExecutor::create(
     int device_id, std::shared_ptr<Executor> master, bool device_reset,
     allocation_mode alloc_mode, GKO_HIP_STREAM_STRUCT* stream)
-    GKO_NOT_COMPILED(hip);
+{
+    return std::shared_ptr<HipExecutor>(
+        new HipExecutor(device_id, std::move(master),
+                        std::make_shared<HipAllocator>(), stream));
+}
 
 
 std::shared_ptr<HipExecutor> HipExecutor::create(
     int device_id, std::shared_ptr<Executor> master,
     std::shared_ptr<HipAllocatorBase> alloc, GKO_HIP_STREAM_STRUCT* stream)
-    GKO_NOT_COMPILED(hip);
+{
+    return std::shared_ptr<HipExecutor>(
+        new HipExecutor(device_id, std::move(master),
+                        std::make_shared<HipAllocator>(), stream));
+}
 
 
 void HipExecutor::populate_exec_info(const machine_topology* mach_topo)
diff --git a/test/utils/executor.hpp b/test/utils/executor.hpp
index c588ac74260..200f4652644 100644
--- a/test/utils/executor.hpp
+++ b/test/utils/executor.hpp
@@ -110,7 +110,7 @@ inline void init_executor(std::shared_ptr<gko::ReferenceExecutor> ref,
             throw std::runtime_error{"No suitable CUDA devices"};
         }
         exec = gko::CudaExecutor::create(
-            0, ref, std::make_shared<gko::CudaAllocator>(stream), stream);
+            0, ref, std::make_shared<gko::CudaAllocator>(), stream);
     }
 }
 

From eb31e6215cbaf00ddc1aed87c1af925b5b50a68b Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Thu, 6 Jul 2023 15:45:46 +0200
Subject: [PATCH 055/583] update interface and remove DPC++ allocator

---
 core/base/memory.cpp                  |   4 +-
 core/device_hooks/cuda_hooks.cpp      |  32 ++++--
 core/device_hooks/dpcpp_hooks.cpp     |  28 -----
 core/device_hooks/hip_hooks.cpp       |  51 ++++++++-
 core/test/base/executor.cpp           |   2 +-
 cuda/base/executor.cpp                |   9 +-
 cuda/base/memory.cpp                  |  43 ++++++--
 dpcpp/CMakeLists.txt                  |   1 -
 dpcpp/base/memory.dp.cpp              |  85 ---------------
 dpcpp/test/base/CMakeLists.txt        |   1 -
 dpcpp/test/base/memory.dp.cpp         |  98 -----------------
 hip/base/executor.hip.cpp             |   9 +-
 hip/base/memory.hip.cpp               | 123 ++++++++++++++++++++-
 include/ginkgo/core/base/executor.hpp |  33 ++----
 include/ginkgo/core/base/memory.hpp   | 151 ++++++++++++++++++--------
 15 files changed, 348 insertions(+), 322 deletions(-)
 delete mode 100644 dpcpp/base/memory.dp.cpp
 delete mode 100644 dpcpp/test/base/memory.dp.cpp

diff --git a/core/base/memory.cpp b/core/base/memory.cpp
index 4e9f0b7e24a..b6c6f8f265c 100644
--- a/core/base/memory.cpp
+++ b/core/base/memory.cpp
@@ -42,7 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace gko {
 
 
-void* CpuAllocator::allocate(size_type num_bytes) const
+void* CpuAllocator::allocate(size_type num_bytes)
 {
     auto ptr = ::operator new (num_bytes, std::nothrow_t{});
     GKO_ENSURE_ALLOCATED(ptr, "cpu", num_bytes);
@@ -50,7 +50,7 @@ void* CpuAllocator::allocate(size_type num_bytes) const
 }
 
 
-void CpuAllocator::deallocate(void* ptr) const
+void CpuAllocator::deallocate(void* ptr)
 {
     ::operator delete (ptr, std::nothrow_t{});
 }
diff --git a/core/device_hooks/cuda_hooks.cpp b/core/device_hooks/cuda_hooks.cpp
index f8489908cc9..03ab12deb46 100644
--- a/core/device_hooks/cuda_hooks.cpp
+++ b/core/device_hooks/cuda_hooks.cpp
@@ -54,43 +54,55 @@ version version_info::get_cuda_version() noexcept
 }
 
 
-void* CudaAllocator::allocate(size_type num_bytes) const GKO_NOT_COMPILED(cuda);
+void* CudaAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(cuda);
 
 
-void CudaAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(cuda);
+void CudaAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(cuda);
 
 
 CudaAsyncAllocator::CudaAsyncAllocator(CUstream_st* stream)
     GKO_NOT_COMPILED(cuda);
 
 
-void* CudaAsyncAllocator::allocate(size_type num_bytes) const
-    GKO_NOT_COMPILED(cuda);
+void* CudaAsyncAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(cuda);
+
 
+void CudaAsyncAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(cuda);
 
-void CudaAsyncAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(cuda);
+
+bool CudaAsyncAllocator::check_environment(int device_id,
+                                           CUstream_st* stream) const
+    GKO_NOT_COMPILED(cuda);
 
 
 CudaUnifiedAllocator::CudaUnifiedAllocator(int device_id, unsigned int flags)
     GKO_NOT_COMPILED(cuda);
 
 
-void* CudaUnifiedAllocator::allocate(size_type num_bytes) const
+void* CudaUnifiedAllocator::allocate(size_type num_bytes)
     GKO_NOT_COMPILED(cuda);
 
 
-void CudaUnifiedAllocator::deallocate(void* dev_ptr) const
+void CudaUnifiedAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(cuda);
+
+
+bool CudaUnifiedAllocator::check_environment(int device_id,
+                                             CUstream_st* stream) const
     GKO_NOT_COMPILED(cuda);
 
 
 CudaHostAllocator::CudaHostAllocator(int device_id) GKO_NOT_COMPILED(cuda);
 
 
-void* CudaHostAllocator::allocate(size_type num_bytes) const
-    GKO_NOT_COMPILED(cuda);
+void* CudaHostAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(cuda);
+
 
+void CudaHostAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(cuda);
 
-void CudaHostAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(cuda);
+
+bool CudaHostAllocator::check_environment(int device_id,
+                                          CUstream_st* stream) const
+    GKO_NOT_COMPILED(cuda);
 
 
 std::shared_ptr<CudaExecutor> CudaExecutor::create(
diff --git a/core/device_hooks/dpcpp_hooks.cpp b/core/device_hooks/dpcpp_hooks.cpp
index 1981c712872..532e9c55bbe 100644
--- a/core/device_hooks/dpcpp_hooks.cpp
+++ b/core/device_hooks/dpcpp_hooks.cpp
@@ -53,34 +53,6 @@ version version_info::get_dpcpp_version() noexcept
 }
 
 
-DpcppAllocatorBase::DpcppAllocatorBase(sycl::queue*) GKO_NOT_COMPILED(dpcpp);
-
-
-void* DpcppAllocatorBase::allocate(size_type num_bytes) const
-    GKO_NOT_COMPILED(dpcpp);
-
-
-void DpcppAllocatorBase::deallocate(void* ptr) const GKO_NOT_COMPILED(dpcpp);
-
-
-void* DpcppAllocator::allocate_impl(sycl::queue* queue,
-                                    size_type num_bytes) const
-    GKO_NOT_COMPILED(dpcpp);
-
-
-void DpcppAllocator::deallocate_impl(sycl::queue* queue, void* ptr) const
-    GKO_NOT_COMPILED(dpcpp);
-
-
-void* DpcppUnifiedAllocator::allocate_impl(sycl::queue* queue,
-                                           size_type num_bytes) const
-    GKO_NOT_COMPILED(dpcpp);
-
-
-void DpcppUnifiedAllocator::deallocate_impl(sycl::queue* queue, void* ptr) const
-    GKO_NOT_COMPILED(dpcpp);
-
-
 std::shared_ptr<DpcppExecutor> DpcppExecutor::create(
     int device_id, std::shared_ptr<Executor> master, std::string device_type,
     dpcpp_queue_property property)
diff --git a/core/device_hooks/hip_hooks.cpp b/core/device_hooks/hip_hooks.cpp
index ba7563f1ef0..dec1de15933 100644
--- a/core/device_hooks/hip_hooks.cpp
+++ b/core/device_hooks/hip_hooks.cpp
@@ -55,10 +55,54 @@ version version_info::get_hip_version() noexcept
 }
 
 
-void* HipAllocator::allocate(size_type num_bytes) const GKO_NOT_COMPILED(hip);
+void* HipAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip);
 
 
-void HipAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(hip);
+void HipAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip);
+
+
+HipAsyncAllocator::HipAsyncAllocator(GKO_HIP_STREAM_STRUCT* stream)
+    GKO_NOT_COMPILED(hip);
+
+
+void* HipAsyncAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip);
+
+
+void HipAsyncAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip);
+
+
+bool HipAsyncAllocator::check_environment(int device_id,
+                                          GKO_HIP_STREAM_STRUCT* stream) const
+    GKO_NOT_COMPILED(hip);
+
+
+HipUnifiedAllocator::HipUnifiedAllocator(int device_id, unsigned int flags)
+    GKO_NOT_COMPILED(hip);
+
+
+void* HipUnifiedAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip);
+
+
+void HipUnifiedAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip);
+
+
+bool HipUnifiedAllocator::check_environment(int device_id,
+                                            GKO_HIP_STREAM_STRUCT* stream) const
+    GKO_NOT_COMPILED(hip);
+
+
+HipHostAllocator::HipHostAllocator(int device_id) GKO_NOT_COMPILED(hip);
+
+
+void* HipHostAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip);
+
+
+void HipHostAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip);
+
+
+bool HipHostAllocator::check_environment(int device_id,
+                                         GKO_HIP_STREAM_STRUCT* stream) const
+    GKO_NOT_COMPILED(hip);
 
 
 std::shared_ptr<HipExecutor> HipExecutor::create(
@@ -76,8 +120,7 @@ std::shared_ptr<HipExecutor> HipExecutor::create(
     std::shared_ptr<HipAllocatorBase> alloc, GKO_HIP_STREAM_STRUCT* stream)
 {
     return std::shared_ptr<HipExecutor>(
-        new HipExecutor(device_id, std::move(master),
-                        std::make_shared<HipAllocator>(), stream));
+        new HipExecutor(device_id, std::move(master), alloc, stream));
 }
 
 
diff --git a/core/test/base/executor.cpp b/core/test/base/executor.cpp
index 94e7bc02d79..13cba09e2b6 100644
--- a/core/test/base/executor.cpp
+++ b/core/test/base/executor.cpp
@@ -386,7 +386,7 @@ TEST(Executor, CanVerifyMemory)
 
 
 struct MockAllocator : gko::CpuAllocator {
-    void deallocate(void* ptr) const noexcept override
+    void deallocate(void* ptr) noexcept override
     {
         called_free = true;
         CpuAllocator::deallocate(ptr);
diff --git a/cuda/base/executor.cpp b/cuda/base/executor.cpp
index f6e838dd2dd..faf90037a0f 100644
--- a/cuda/base/executor.cpp
+++ b/cuda/base/executor.cpp
@@ -80,9 +80,8 @@ std::shared_ptr<CudaExecutor> CudaExecutor::create(
     int device_id, std::shared_ptr<Executor> master, bool device_reset,
     allocation_mode alloc_mode, cudaStream_t stream)
 {
-    return std::shared_ptr<CudaExecutor>(
-        new CudaExecutor(device_id, std::move(master),
-                         allocator_from_mode(device_id, alloc_mode), stream));
+    return create(device_id, master, allocator_from_mode(device_id, alloc_mode),
+                  stream);
 }
 
 
@@ -90,6 +89,10 @@ std::shared_ptr<CudaExecutor> CudaExecutor::create(
     int device_id, std::shared_ptr<Executor> master,
     std::shared_ptr<CudaAllocatorBase> alloc, cudaStream_t stream)
 {
+    if (!alloc->check_environment(device_id, stream)) {
+        throw Error{__FILE__, __LINE__,
+                    "Allocator uses incorrect stream or device ID."};
+    }
     return std::shared_ptr<CudaExecutor>(new CudaExecutor(
         device_id, std::move(master), std::move(alloc), stream));
 }
diff --git a/cuda/base/memory.cpp b/cuda/base/memory.cpp
index afc1f9f62fa..08c64c0ba05 100644
--- a/cuda/base/memory.cpp
+++ b/cuda/base/memory.cpp
@@ -82,7 +82,7 @@ namespace gko {
 #endif
 
 
-void* CudaAllocator::allocate(size_type num_bytes) const
+void* CudaAllocator::allocate(size_type num_bytes)
 {
     void* ptr{};
     GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS(cudaMalloc(&ptr, num_bytes),
@@ -91,7 +91,7 @@ void* CudaAllocator::allocate(size_type num_bytes) const
 }
 
 
-void CudaAllocator::deallocate(void* ptr) const
+void CudaAllocator::deallocate(void* ptr)
 {
     GKO_EXIT_ON_CUDA_ERROR(cudaFree(ptr));
 }
@@ -103,7 +103,7 @@ void CudaAllocator::deallocate(void* ptr) const
 CudaAsyncAllocator::CudaAsyncAllocator(cudaStream_t stream) : stream_{stream} {}
 
 
-void* CudaAsyncAllocator::allocate(size_type num_bytes) const
+void* CudaAsyncAllocator::allocate(size_type num_bytes)
 {
     void* ptr{};
     GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS(
@@ -112,7 +112,7 @@ void* CudaAsyncAllocator::allocate(size_type num_bytes) const
 }
 
 
-void CudaAsyncAllocator::deallocate(void* ptr) const
+void CudaAsyncAllocator::deallocate(void* ptr)
 {
     GKO_EXIT_ON_CUDA_ERROR(cudaFreeAsync(ptr, stream_));
 }
@@ -121,10 +121,10 @@ void CudaAsyncAllocator::deallocate(void* ptr) const
 #else  // Fall back to regular allocation
 
 
-CudaAsyncAllocator::CudaAsyncAllocator(cudaStream_t stream) : stream_{} {}
+CudaAsyncAllocator::CudaAsyncAllocator(cudaStream_t stream) : stream_{stream} {}
 
 
-void* CudaAsyncAllocator::allocate(size_type num_bytes) const
+void* CudaAsyncAllocator::allocate(size_type num_bytes)
 {
     void* ptr{};
     GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS(cudaMalloc(&ptr, num_bytes),
@@ -133,7 +133,7 @@ void* CudaAsyncAllocator::allocate(size_type num_bytes) const
 }
 
 
-void CudaAsyncAllocator::deallocate(void* ptr) const
+void CudaAsyncAllocator::deallocate(void* ptr)
 {
     GKO_EXIT_ON_CUDA_ERROR(cudaFree(ptr));
 }
@@ -142,6 +142,13 @@ void CudaAsyncAllocator::deallocate(void* ptr) const
 #endif
 
 
+bool CudaAsyncAllocator::check_environment(int device_id,
+                                           CUstream_st* stream) const
+{
+    return stream == stream_;
+}
+
+
 CudaUnifiedAllocator::CudaUnifiedAllocator(int device_id)
     : CudaUnifiedAllocator{device_id, cudaMemAttachGlobal}
 {}
@@ -152,7 +159,7 @@ CudaUnifiedAllocator::CudaUnifiedAllocator(int device_id, unsigned int flags)
 {}
 
 
-void* CudaUnifiedAllocator::allocate(size_type num_bytes) const
+void* CudaUnifiedAllocator::allocate(size_type num_bytes)
 {
     // we need to set the device ID in case this gets used in a host executor
     detail::cuda_scoped_device_id_guard g(device_id_);
@@ -163,7 +170,7 @@ void* CudaUnifiedAllocator::allocate(size_type num_bytes) const
 }
 
 
-void CudaUnifiedAllocator::deallocate(void* ptr) const
+void CudaUnifiedAllocator::deallocate(void* ptr)
 {
     // we need to set the device ID in case this gets used in a host executor
     detail::cuda_scoped_device_id_guard g(device_id_);
@@ -171,10 +178,17 @@ void CudaUnifiedAllocator::deallocate(void* ptr) const
 }
 
 
+bool CudaUnifiedAllocator::check_environment(int device_id,
+                                             CUstream_st* stream) const
+{
+    return device_id == device_id_;
+}
+
+
 CudaHostAllocator::CudaHostAllocator(int device_id) : device_id_{device_id} {}
 
 
-void* CudaHostAllocator::allocate(size_type num_bytes) const
+void* CudaHostAllocator::allocate(size_type num_bytes)
 {
     // we need to set the device ID in case this gets used in a host executor
     detail::cuda_scoped_device_id_guard g(device_id_);
@@ -185,7 +199,7 @@ void* CudaHostAllocator::allocate(size_type num_bytes) const
 }
 
 
-void CudaHostAllocator::deallocate(void* ptr) const
+void CudaHostAllocator::deallocate(void* ptr)
 {
     // we need to set the device ID in case this gets used in a host executor
     detail::cuda_scoped_device_id_guard g(device_id_);
@@ -193,4 +207,11 @@ void CudaHostAllocator::deallocate(void* ptr) const
 }
 
 
+bool CudaHostAllocator::check_environment(int device_id,
+                                          CUstream_st* stream) const
+{
+    return device_id == device_id_;
+}
+
+
 }  // namespace gko
diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt
index 55763ca5525..31b5e0543ba 100644
--- a/dpcpp/CMakeLists.txt
+++ b/dpcpp/CMakeLists.txt
@@ -10,7 +10,6 @@ target_sources(ginkgo_dpcpp
     base/executor.dp.cpp
     base/helper.dp.cpp
     base/index_set_kernels.dp.cpp
-    base/memory.dp.cpp
     base/scoped_device_id.dp.cpp
     base/timer.dp.cpp
     base/version.dp.cpp
diff --git a/dpcpp/base/memory.dp.cpp b/dpcpp/base/memory.dp.cpp
deleted file mode 100644
index 2582fa331a0..00000000000
--- a/dpcpp/base/memory.dp.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-/*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2023, the Ginkgo authors
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
-1. Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
-notice, this list of conditions and the following disclaimer in the
-documentation and/or other materials provided with the distribution.
-
-3. Neither the name of the copyright holder nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-******************************<GINKGO LICENSE>*******************************/
-
-#include <ginkgo/core/base/memory.hpp>
-
-
-#include <CL/sycl.hpp>
-
-
-namespace gko {
-
-
-DpcppAllocatorBase::DpcppAllocatorBase(sycl::queue* queue) : queue_{queue} {}
-
-
-void* DpcppAllocatorBase::allocate(size_type num_bytes) const
-{
-    return this->allocate_impl(queue_, num_bytes);
-}
-
-
-void DpcppAllocatorBase::deallocate(void* ptr) const
-{
-    this->deallocate_impl(queue_, ptr);
-}
-
-
-void* DpcppAllocator::allocate_impl(sycl::queue* queue,
-                                    size_type num_bytes) const
-{
-    return sycl::malloc_device(num_bytes, *queue);
-}
-
-
-void DpcppAllocator::deallocate_impl(sycl::queue* queue, void* ptr) const
-{
-    queue->wait_and_throw();
-    sycl::free(ptr, queue->get_context());
-}
-
-
-void* DpcppUnifiedAllocator::allocate_impl(sycl::queue* queue,
-                                           size_type num_bytes)
-{
-    return sycl::malloc_shared(num_bytes, *queue);
-}
-
-
-void DpcppUnifiedAllocator::deallocate_impl(sycl::queue* queue, void* ptr)
-{
-    queue->wait_and_throw();
-    sycl::free(ptr, queue->get_context());
-}
-
-
-}  // namespace gko
diff --git a/dpcpp/test/base/CMakeLists.txt b/dpcpp/test/base/CMakeLists.txt
index 5c0ca601f04..bb9c8a75050 100644
--- a/dpcpp/test/base/CMakeLists.txt
+++ b/dpcpp/test/base/CMakeLists.txt
@@ -3,4 +3,3 @@ ginkgo_create_dpcpp_test(dim3)
 ginkgo_create_dpcpp_test(kernel_launch)
 # set correct flags for kernel_launch.hpp
 target_compile_definitions(dpcpp_test_base_kernel_launch PRIVATE GKO_COMPILING_DPCPP)
-ginkgo_create_dpcpp_test(memory)
\ No newline at end of file
diff --git a/dpcpp/test/base/memory.dp.cpp b/dpcpp/test/base/memory.dp.cpp
deleted file mode 100644
index e587660cde3..00000000000
--- a/dpcpp/test/base/memory.dp.cpp
+++ /dev/null
@@ -1,98 +0,0 @@
-/*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2023, the Ginkgo authors
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
-1. Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
-notice, this list of conditions and the following disclaimer in the
-documentation and/or other materials provided with the distribution.
-
-3. Neither the name of the copyright holder nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-******************************<GINKGO LICENSE>*******************************/
-
-#include <ginkgo/core/base/memory.hpp>
-
-
-#include <memory>
-#include <type_traits>
-
-
-#include <gtest/gtest.h>
-
-
-#include <ginkgo/core/base/exception.hpp>
-#include <ginkgo/core/base/exception_helpers.hpp>
-#include <ginkgo/core/base/executor.hpp>
-
-
-#include "dpcpp/test/utils.hpp"
-
-
-namespace {
-
-
-class Memory : public ::testing::Test {
-protected:
-    Memory()
-        : exec{gko::DpcppExecutor::create(0, gko::OmpExecutor::create())},
-          host_exec_with_unified{gko::OmpExecutor::create(
-              std::make_shared<gko::DpcppUnifiedAllocator>(exec->get_queue()))},
-          exec_with_unified{gko::DpcppExecutor::create(
-              exec->get_queue(), host_exec_with_unified,
-              std::make_shared<gko::DpcppUnifiedAllocator>(exec->get_queue()))}
-    {}
-
-    std::shared_ptr<gko::DpcppExecutor> exec;
-    std::shared_ptr<gko::OmpExecutor> host_exec_with_unified;
-    std::shared_ptr<gko::DpcppExecutor> exec_with_unified;
-};
-
-
-TEST_F(Memory, DeviceAllocationWorks)
-{
-    gko::array<int> data{exec, {1, 2}};
-
-    GKO_ASSERT_ARRAY_EQ(data, I<int>({1, 2}));
-}
-
-
-TEST_F(Memory, UnifiedDeviceAllocationWorks)
-{
-    gko::array<int> data{exec_with_unified, {1, 2}};
-    exec->synchronize();
-
-    ASSERT_EQ(data.get_const_data()[0], 1);
-    ASSERT_EQ(data.get_const_data()[1], 2);
-}
-
-
-TEST_F(Memory, HostUnifiedAllocationWorks)
-{
-    gko::array<int> data{host_exec_with_unified, {1, 2}};
-
-    ASSERT_EQ(data.get_const_data()[0], 1);
-    ASSERT_EQ(data.get_const_data()[1], 2);
-}
-
-
-}  // namespace
diff --git a/hip/base/executor.hip.cpp b/hip/base/executor.hip.cpp
index 6b4b0fd5ddc..2df5c9a4847 100644
--- a/hip/base/executor.hip.cpp
+++ b/hip/base/executor.hip.cpp
@@ -60,9 +60,8 @@ std::shared_ptr<HipExecutor> HipExecutor::create(
     int device_id, std::shared_ptr<Executor> master, bool device_reset,
     allocation_mode alloc_mode, hipStream_t stream)
 {
-    return std::shared_ptr<HipExecutor>(
-        new HipExecutor(device_id, std::move(master),
-                        std::make_shared<HipAllocator>(), stream));
+    return create(device_id, std::move(master),
+                  std::make_shared<HipAllocator>(), stream);
 }
 
 
@@ -70,6 +69,10 @@ std::shared_ptr<HipExecutor> HipExecutor::create(
     int device_id, std::shared_ptr<Executor> master,
     std::shared_ptr<HipAllocatorBase> alloc, hipStream_t stream)
 {
+    if (!alloc->check_environment(device_id, stream)) {
+        throw Error{__FILE__, __LINE__,
+                    "Allocator uses incorrect stream or device ID."};
+    }
     return std::shared_ptr<HipExecutor>(new HipExecutor(
         device_id, std::move(master), std::move(alloc), stream));
 }
diff --git a/hip/base/memory.hip.cpp b/hip/base/memory.hip.cpp
index f2a8977525f..7acb208173a 100644
--- a/hip/base/memory.hip.cpp
+++ b/hip/base/memory.hip.cpp
@@ -39,6 +39,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/exception_helpers.hpp>
 
 
+#include "hip/base/scoped_device_id.hip.hpp"
+
+
 namespace gko {
 
 
@@ -79,7 +82,7 @@ namespace gko {
 #endif
 
 
-void* HipAllocator::allocate(size_type num_bytes) const
+void* HipAllocator::allocate(size_type num_bytes)
 {
     void* dev_ptr{};
     GKO_ASSERT_NO_HIP_ALLOCATION_ERRORS(hipMalloc(&dev_ptr, num_bytes),
@@ -88,10 +91,126 @@ void* HipAllocator::allocate(size_type num_bytes) const
 }
 
 
-void HipAllocator::deallocate(void* dev_ptr) const
+void HipAllocator::deallocate(void* dev_ptr)
 {
     GKO_EXIT_ON_HIP_ERROR(hipFree(dev_ptr));
 }
 
 
+#if HIP_VERSION_MAJOR >= 5
+
+
+HipAsyncAllocator::HipAsyncAllocator(hipStream_t stream) : stream_{stream} {}
+
+
+void* HipAsyncAllocator::allocate(size_type num_bytes)
+{
+    void* ptr{};
+    GKO_ASSERT_NO_HIP_ALLOCATION_ERRORS(
+        hipMallocAsync(&ptr, num_bytes, stream_), num_bytes);
+    return ptr;
+}
+
+
+void HipAsyncAllocator::deallocate(void* ptr)
+{
+    GKO_EXIT_ON_HIP_ERROR(hipFreeAsync(ptr, stream_));
+}
+
+
+#else  // Fall back to regular allocation
+
+
+HipAsyncAllocator::HipAsyncAllocator(hipStream_t stream) : stream_{stream} {}
+
+
+void* HipAsyncAllocator::allocate(size_type num_bytes)
+{
+    void* ptr{};
+    GKO_ASSERT_NO_HIP_ALLOCATION_ERRORS(hipMalloc(&ptr, num_bytes), num_bytes);
+    return ptr;
+}
+
+
+void HipAsyncAllocator::deallocate(void* ptr)
+{
+    GKO_EXIT_ON_HIP_ERROR(hipFree(ptr));
+}
+
+
+#endif
+
+
+bool HipAsyncAllocator::check_environment(int device_id,
+                                          hipStream_t stream) const
+{
+    return stream == stream_;
+}
+
+
+HipUnifiedAllocator::HipUnifiedAllocator(int device_id)
+    : HipUnifiedAllocator{device_id, hipMemAttachGlobal}
+{}
+
+
+HipUnifiedAllocator::HipUnifiedAllocator(int device_id, unsigned int flags)
+    : device_id_{device_id}, flags_{flags}
+{}
+
+
+void* HipUnifiedAllocator::allocate(size_type num_bytes)
+{
+    // we need to set the device ID in case this gets used in a host executor
+    detail::hip_scoped_device_id_guard g(device_id_);
+    void* ptr{};
+    GKO_ASSERT_NO_HIP_ALLOCATION_ERRORS(
+        hipMallocManaged(&ptr, num_bytes, flags_), num_bytes);
+    return ptr;
+}
+
+
+void HipUnifiedAllocator::deallocate(void* ptr)
+{
+    // we need to set the device ID in case this gets used in a host executor
+    detail::hip_scoped_device_id_guard g(device_id_);
+    GKO_EXIT_ON_HIP_ERROR(hipFree(ptr));
+}
+
+
+bool HipUnifiedAllocator::check_environment(int device_id,
+                                            hipStream_t stream) const
+{
+    return device_id == device_id_;
+}
+
+
+HipHostAllocator::HipHostAllocator(int device_id) : device_id_{device_id} {}
+
+
+void* HipHostAllocator::allocate(size_type num_bytes)
+{
+    // we need to set the device ID in case this gets used in a host executor
+    detail::hip_scoped_device_id_guard g(device_id_);
+    void* ptr{};
+    GKO_ASSERT_NO_HIP_ALLOCATION_ERRORS(hipHostMalloc(&ptr, num_bytes),
+                                        num_bytes);
+    return ptr;
+}
+
+
+void HipHostAllocator::deallocate(void* ptr)
+{
+    // we need to set the device ID in case this gets used in a host executor
+    detail::hip_scoped_device_id_guard g(device_id_);
+    GKO_EXIT_ON_HIP_ERROR(hipFreeHost(ptr));
+}
+
+
+bool HipHostAllocator::check_environment(int device_id,
+                                         hipStream_t stream) const
+{
+    return device_id == device_id_;
+}
+
+
 }  // namespace gko
diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp
index 4545b216f86..f033873e392 100644
--- a/include/ginkgo/core/base/executor.hpp
+++ b/include/ginkgo/core/base/executor.hpp
@@ -1717,10 +1717,13 @@ class HipExecutor : public detail::ExecutorBase<HipExecutor>,
      * @param alloc_mode  the allocation mode that the executor should operate
      *                    on. See @allocation_mode for more details
      */
-    [[deprecated("")]] static std::shared_ptr<HipExecutor> create(
-        int device_id, std::shared_ptr<Executor> master, bool device_reset,
-        allocation_mode alloc_mode = default_hip_alloc_mode,
-        GKO_HIP_STREAM_STRUCT* stream = nullptr);
+    [[deprecated(
+        "device_reset is deprecated entirely, call hipDeviceReset directly. "
+        "alloc_mode was replaced by the Allocator type "
+        "hierarchy.")]] static std::shared_ptr<HipExecutor>
+    create(int device_id, std::shared_ptr<Executor> master, bool device_reset,
+           allocation_mode alloc_mode = default_hip_alloc_mode,
+           GKO_HIP_STREAM_STRUCT* stream = nullptr);
 
     static std::shared_ptr<HipExecutor> create(
         int device_id, std::shared_ptr<Executor> master,
@@ -1914,28 +1917,6 @@ class DpcppExecutor : public detail::ExecutorBase<DpcppExecutor>,
         std::string device_type = "all",
         dpcpp_queue_property property = dpcpp_queue_property::in_order);
 
-    /**
-     * Creates a new DpcppExecutor from an existing SYCL queue.
-     *
-     * @param queue  the DPCPP device id of this device
-     * @param master  an executor on the host that is used to invoke the device
-     *                kernels
-     */
-    static std::shared_ptr<DpcppExecutor> create(
-        sycl::queue* queue, std::shared_ptr<Executor> master);
-
-    /**
-     * Creates a new DpcppExecutor from an existing SYCL queue.
-     *
-     * @param queue  the DPCPP device id of this device
-     * @param master  an executor on the host that is used to invoke the device
-     *                kernels
-     * @param alloc  the allocator used for memory allocation
-     */
-    static std::shared_ptr<DpcppExecutor> create(
-        sycl::queue* queue, std::shared_ptr<Executor> master,
-        std::shared_ptr<DpcppAllocatorBase> alloc);
-
     std::shared_ptr<Executor> get_master() noexcept override;
 
     std::shared_ptr<const Executor> get_master() const noexcept override;
diff --git a/include/ginkgo/core/base/memory.hpp b/include/ginkgo/core/base/memory.hpp
index 872a25a9a33..1086c9aacb4 100644
--- a/include/ginkgo/core/base/memory.hpp
+++ b/include/ginkgo/core/base/memory.hpp
@@ -49,9 +49,9 @@ class Allocator {
 public:
     virtual ~Allocator() = default;
 
-    virtual void* allocate(size_type num_bytes) const = 0;
+    virtual void* allocate(size_type num_bytes) = 0;
 
-    virtual void deallocate(void* ptr) const = 0;
+    virtual void deallocate(void* ptr) = 0;
 };
 
 
@@ -65,34 +65,49 @@ class CpuAllocatorBase : public Allocator {};
 /**
  * Implement this interface to provide an allocator for CudaExecutor.
  */
-class CudaAllocatorBase : public Allocator {};
+class CudaAllocatorBase : public Allocator {
+    friend class CudaExecutor;
 
-
-/**
- * Implement this interface to provide an allocator for HipExecutor.
- */
-class HipAllocatorBase : public Allocator {};
+protected:
+    /**
+     * Checks if the allocator can be used safely with the provided device ID
+     * and stream. The check is necessary to ensure safe usage of stream-ordered
+     * allocators and unified shared memory allocators.
+     *
+     * @param device_id the device ID the allocator will be used in.
+     * @param stream the stream the allocator will be used with.
+     * @return true if and only if the allocator can be used by CudaExecutor in
+     *         the given environment.
+     */
+    virtual bool check_environment(int device_id, CUstream_st* stream) const
+    {
+        return true;
+    }
+};
 
 
 /**
- * Implement this interface to provide an allocator for DpcppExecutor.
+ * Implement this interface to provide an allocator for HipExecutor.
  */
-class DpcppAllocatorBase : public Allocator {
-public:
-    DpcppAllocatorBase(sycl::queue* queue);
-
-    void* allocate(size_type num_bytes) const final;
-
-    void deallocate(void* ptr) const final;
+class HipAllocatorBase : public Allocator {
+    friend class HipExecutor;
 
 protected:
-    virtual void* allocate_impl(sycl::queue* queue,
-                                size_type num_bytes) const = 0;
-
-    virtual void deallocate_impl(sycl::queue* queue, void* ptr) const = 0;
-
-private:
-    sycl::queue* queue_;
+    /**
+     * Checks if the allocator can be used safely with the provided device ID
+     * and stream. The check is necessary to ensure safe usage of stream-ordered
+     * allocators and unified shared memory allocators.
+     *
+     * @param device_id the device ID the allocator will be used in.
+     * @param stream the stream the allocator will be used with.
+     * @return true if and only if the allocator can be used by CudaExecutor in
+     *         the given environment.
+     */
+    virtual bool check_environment(int device_id,
+                                   GKO_HIP_STREAM_STRUCT* stream) const
+    {
+        return true;
+    }
 };
 
 
@@ -101,9 +116,9 @@ class DpcppAllocatorBase : public Allocator {
  */
 class CpuAllocator : public CpuAllocatorBase {
 public:
-    void* allocate(size_type num_bytes) const override;
+    void* allocate(size_type num_bytes) override;
 
-    void deallocate(void* ptr) const override;
+    void deallocate(void* ptr) override;
 };
 
 
@@ -112,9 +127,9 @@ class CpuAllocator : public CpuAllocatorBase {
  */
 class CudaAllocator : public CudaAllocatorBase {
 public:
-    void* allocate(size_type num_bytes) const override;
+    void* allocate(size_type num_bytes) override;
 
-    void deallocate(void* ptr) const override;
+    void deallocate(void* ptr) override;
 };
 
 
@@ -123,12 +138,14 @@ class CudaAllocator : public CudaAllocatorBase {
  */
 class CudaAsyncAllocator : public CudaAllocatorBase {
 public:
-    void* allocate(size_type num_bytes) const override;
+    void* allocate(size_type num_bytes) override;
 
-    void deallocate(void* ptr) const override;
+    void deallocate(void* ptr) override;
 
     CudaAsyncAllocator(CUstream_st* stream);
 
+    bool check_environment(int device_id, CUstream_st* stream) const override;
+
 private:
     CUstream_st* stream_;
 };
@@ -139,14 +156,17 @@ class CudaAsyncAllocator : public CudaAllocatorBase {
  */
 class CudaUnifiedAllocator : public CudaAllocatorBase, public CpuAllocatorBase {
 public:
-    void* allocate(size_type num_bytes) const override;
+    void* allocate(size_type num_bytes) override;
 
-    void deallocate(void* ptr) const override;
+    void deallocate(void* ptr) override;
 
     CudaUnifiedAllocator(int device_id);
 
     CudaUnifiedAllocator(int device_id, unsigned int flags);
 
+protected:
+    bool check_environment(int device_id, CUstream_st* stream) const override;
+
 private:
     int device_id_;
     unsigned int flags_;
@@ -154,16 +174,19 @@ class CudaUnifiedAllocator : public CudaAllocatorBase, public CpuAllocatorBase {
 
 
 /*
- * Allocator using cudaMallocHost.
+ * Allocator using cudaHostMalloc.
  */
 class CudaHostAllocator : public CudaAllocatorBase, public CpuAllocatorBase {
 public:
-    void* allocate(size_type num_bytes) const override;
+    void* allocate(size_type num_bytes) override;
 
-    void deallocate(void* ptr) const override;
+    void deallocate(void* ptr) override;
 
     CudaHostAllocator(int device_id);
 
+protected:
+    bool check_environment(int device_id, CUstream_st* stream) const override;
+
 private:
     int device_id_;
 };
@@ -174,38 +197,72 @@ class CudaHostAllocator : public CudaAllocatorBase, public CpuAllocatorBase {
  */
 class HipAllocator : public HipAllocatorBase {
 public:
-    void* allocate(size_type num_bytes) const override;
+    void* allocate(size_type num_bytes) override;
+
+    void deallocate(void* ptr) override;
+};
+
+
+/*
+ * Allocator using hipMallocAsync.
+ */
+class HipAsyncAllocator : public HipAllocatorBase {
+public:
+    void* allocate(size_type num_bytes) override;
 
-    void deallocate(void* ptr) const override;
+    void deallocate(void* ptr) override;
+
+    HipAsyncAllocator(GKO_HIP_STREAM_STRUCT* stream);
+
+protected:
+    bool check_environment(int device_id,
+                           GKO_HIP_STREAM_STRUCT* stream) const override;
+
+private:
+    GKO_HIP_STREAM_STRUCT* stream_;
 };
 
 
 /*
- * Allocator using sycl::malloc_device.
+ * Allocator using hipMallocManaged
  */
-class DpcppAllocator : public DpcppAllocatorBase {
+class HipUnifiedAllocator : public HipAllocatorBase, public CpuAllocatorBase {
 public:
-    using DpcppAllocatorBase::DpcppAllocatorBase;
+    void* allocate(size_type num_bytes) override;
+
+    void deallocate(void* ptr) override;
+
+    HipUnifiedAllocator(int device_id);
+
+    HipUnifiedAllocator(int device_id, unsigned int flags);
 
 protected:
-    void* allocate_impl(sycl::queue* queue, size_type num_bytes) const override;
+    bool check_environment(int device_id,
+                           GKO_HIP_STREAM_STRUCT* stream) const override;
 
-    void deallocate_impl(sycl::queue* queue, void* ptr) const override;
+private:
+    int device_id_;
+    unsigned int flags_;
 };
 
 
 /*
- * Allocator using sycl::malloc_shared.
+ * Allocator using hipHostAlloc.
  */
-class DpcppUnifiedAllocator : public DpcppAllocatorBase,
-                              public CpuAllocatorBase {
+class HipHostAllocator : public HipAllocatorBase, public CpuAllocatorBase {
 public:
-    using DpcppAllocatorBase::DpcppAllocatorBase;
+    void* allocate(size_type num_bytes) override;
+
+    void deallocate(void* ptr) override;
+
+    HipHostAllocator(int device_id);
 
 protected:
-    void* allocate_impl(sycl::queue* queue, size_type num_bytes) const override;
+    bool check_environment(int device_id,
+                           GKO_HIP_STREAM_STRUCT* stream) const override;
 
-    void deallocate_impl(sycl::queue* queue, void* ptr) const override;
+private:
+    int device_id_;
 };
 
 

From 7a7960774b3e8bc5e6461357f4360cd5fdb2373b Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Fri, 7 Jul 2023 11:49:57 +0200
Subject: [PATCH 056/583] add HIP allocator tests

---
 benchmark/utils/general.hpp  |   9 ++-
 hip/base/memory.hip.cpp      |   2 +-
 hip/test/base/CMakeLists.txt |   1 +
 hip/test/base/memory.hip.cpp | 126 +++++++++++++++++++++++++++++++++++
 4 files changed, 132 insertions(+), 6 deletions(-)
 create mode 100644 hip/test/base/memory.hip.cpp

diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp
index 35077f66d4b..19c71b74a1a 100644
--- a/benchmark/utils/general.hpp
+++ b/benchmark/utils/general.hpp
@@ -368,16 +368,15 @@ const std::map<std::string,
          [](MPI_Comm comm) {
              FLAGS_device_id = gko::experimental::mpi::map_rank_to_device_id(
                  comm, gko::CudaExecutor::get_num_devices());
-             return gko::CudaExecutor::create(
-                 FLAGS_device_id, gko::ReferenceExecutor::create(), false,
-                 gko::allocation_mode::device);
+             return gko::CudaExecutor::create(FLAGS_device_id,
+                                              gko::ReferenceExecutor::create());
          }},
         {"hip",
          [](MPI_Comm comm) {
              FLAGS_device_id = gko::experimental::mpi::map_rank_to_device_id(
                  comm, gko::HipExecutor::get_num_devices());
-             return gko::HipExecutor::create(
-                 FLAGS_device_id, gko::ReferenceExecutor::create(), true);
+             return gko::HipExecutor::create(FLAGS_device_id,
+                                             gko::ReferenceExecutor::create());
          }},
         {"dpcpp", [](MPI_Comm comm) {
              if (gko::DpcppExecutor::get_num_devices("gpu")) {
diff --git a/hip/base/memory.hip.cpp b/hip/base/memory.hip.cpp
index 7acb208173a..86ac31c3154 100644
--- a/hip/base/memory.hip.cpp
+++ b/hip/base/memory.hip.cpp
@@ -202,7 +202,7 @@ void HipHostAllocator::deallocate(void* ptr)
 {
     // we need to set the device ID in case this gets used in a host executor
     detail::hip_scoped_device_id_guard g(device_id_);
-    GKO_EXIT_ON_HIP_ERROR(hipFreeHost(ptr));
+    GKO_EXIT_ON_HIP_ERROR(hipHostFree(ptr));
 }
 
 
diff --git a/hip/test/base/CMakeLists.txt b/hip/test/base/CMakeLists.txt
index f597a3d6e3d..486fca294c2 100644
--- a/hip/test/base/CMakeLists.txt
+++ b/hip/test/base/CMakeLists.txt
@@ -8,6 +8,7 @@ ginkgo_create_hip_test(kernel_launch)
 # correct flags for kernel_launch.hpp are set in GINKGO_HIPCC_OPTIONS
 ginkgo_create_hip_test(lin_op)
 ginkgo_create_hip_test(math)
+ginkgo_create_hip_test(memory)
 # Only hcc needs the libraries. nvcc only requires the headers.
 if (GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_AMD_REGEX}")
     ginkgo_create_hip_test(exception_helpers ADDITIONAL_LIBRARIES roc::hipblas roc::hipsparse hip::hiprand roc::rocrand)
diff --git a/hip/test/base/memory.hip.cpp b/hip/test/base/memory.hip.cpp
new file mode 100644
index 00000000000..2dc0a3aa337
--- /dev/null
+++ b/hip/test/base/memory.hip.cpp
@@ -0,0 +1,126 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/base/memory.hpp>
+
+
+#include <memory>
+#include <type_traits>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/exception.hpp>
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/executor.hpp>
+
+
+#include "hip/test/utils.hip.hpp"
+
+
+namespace {
+
+
+class Memory : public HipTestFixture {
+protected:
+    Memory()
+        : host_exec_with_pinned{gko::OmpExecutor::create(
+              std::make_shared<gko::HipHostAllocator>(0))},
+          host_exec_with_unified{gko::OmpExecutor::create(
+              std::make_shared<gko::HipUnifiedAllocator>(0))},
+          exec_with_normal{gko::HipExecutor::create(
+              0, ref, std::make_shared<gko::HipAllocator>(),
+              exec->get_stream())},
+          exec_with_async{gko::HipExecutor::create(
+              0, host_exec_with_pinned,
+              std::make_shared<gko::HipAsyncAllocator>(exec->get_stream()),
+              exec->get_stream())},
+          exec_with_unified{gko::HipExecutor::create(
+              0, host_exec_with_unified,
+              std::make_shared<gko::HipUnifiedAllocator>(0),
+              exec->get_stream())}
+    {}
+
+    std::shared_ptr<gko::OmpExecutor> host_exec_with_pinned;
+    std::shared_ptr<gko::OmpExecutor> host_exec_with_unified;
+    std::shared_ptr<gko::HipExecutor> exec_with_normal;
+    std::shared_ptr<gko::HipExecutor> exec_with_async;
+    std::shared_ptr<gko::HipExecutor> exec_with_unified;
+};
+
+
+TEST_F(Memory, DeviceAllocationWorks)
+{
+    gko::array<int> data{exec_with_normal, {1, 2}};
+
+    GKO_ASSERT_ARRAY_EQ(data, I<int>({1, 2}));
+}
+
+
+TEST_F(Memory, AsyncDeviceAllocationWorks)
+{
+    gko::array<int> data{exec_with_async, {1, 2}};
+
+    GKO_ASSERT_ARRAY_EQ(data, I<int>({1, 2}));
+}
+
+
+TEST_F(Memory, UnifiedDeviceAllocationWorks)
+{
+    gko::array<int> data{exec_with_unified, {1, 2}};
+    exec->synchronize();
+
+    ASSERT_EQ(data.get_const_data()[0], 1);
+    ASSERT_EQ(data.get_const_data()[1], 2);
+}
+
+
+TEST_F(Memory, HostUnifiedAllocationWorks)
+{
+    gko::array<int> data{host_exec_with_unified, {1, 2}};
+
+    ASSERT_EQ(data.get_const_data()[0], 1);
+    ASSERT_EQ(data.get_const_data()[1], 2);
+}
+
+
+TEST_F(Memory, HostPinnedAllocationWorks)
+{
+    gko::array<int> data{host_exec_with_pinned, {1, 2}};
+
+    ASSERT_EQ(data.get_const_data()[0], 1);
+    ASSERT_EQ(data.get_const_data()[1], 2);
+}
+
+
+}  // namespace

From 5e4881a2ca0498350b8ed9dbdbc287c48d2e2e95 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Fri, 7 Jul 2023 13:22:33 +0200
Subject: [PATCH 057/583] review updates

* honor allocation_mode for HIP
* use correct allocation flags for cudaMallocManaged allocation_mode
* use valid device_id in moved-from stream wrapper
* add more deprecation warnings for device_reset functionality
* documentation

Co-authored-by: Yuhsiang M. Tsai <yhmtsai@gmail.com>
Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 cuda/base/executor.cpp                |  6 ++++--
 cuda/base/stream.cpp                  |  4 ++--
 hip/base/executor.hip.cpp             | 20 ++++++++++++++++++-
 hip/base/roctx.hip.cpp                |  2 ++
 hip/base/stream.hip.cpp               |  4 ++--
 include/ginkgo/core/base/executor.hpp | 28 +++++++++++++++++++++++++--
 include/ginkgo/core/base/stream.hpp   | 12 ++++++++++--
 7 files changed, 65 insertions(+), 11 deletions(-)

diff --git a/cuda/base/executor.cpp b/cuda/base/executor.cpp
index faf90037a0f..fd16815456a 100644
--- a/cuda/base/executor.cpp
+++ b/cuda/base/executor.cpp
@@ -67,9 +67,11 @@ std::unique_ptr<CudaAllocatorBase> allocator_from_mode(int device_id,
     case allocation_mode::device:
         return std::make_unique<CudaAllocator>();
     case allocation_mode::unified_global:
-        return std::make_unique<CudaUnifiedAllocator>(device_id);
+        return std::make_unique<CudaUnifiedAllocator>(device_id,
+                                                      cudaMemAttachGlobal);
     case allocation_mode::unified_host:
-        return std::make_unique<CudaUnifiedAllocator>(device_id);
+        return std::make_unique<CudaUnifiedAllocator>(device_id,
+                                                      cudaMemAttachHost);
     default:
         GKO_NOT_SUPPORTED(mode);
     }
diff --git a/cuda/base/stream.cpp b/cuda/base/stream.cpp
index 0bbc9b1cc83..76027bd51e2 100644
--- a/cuda/base/stream.cpp
+++ b/cuda/base/stream.cpp
@@ -45,7 +45,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace gko {
 
 
-cuda_stream::cuda_stream() : stream_{}, device_id_{-1} {}
+cuda_stream::cuda_stream() : stream_{nullptr}, device_id_{} {}
 
 
 cuda_stream::cuda_stream(int device_id) : stream_{}, device_id_(device_id)
@@ -66,7 +66,7 @@ cuda_stream::~cuda_stream()
 
 cuda_stream::cuda_stream(cuda_stream&& other)
     : stream_{std::exchange(other.stream_, nullptr)},
-      device_id_(std::exchange(other.device_id_, -1))
+      device_id_(std::exchange(other.device_id_, 0))
 {}
 
 
diff --git a/hip/base/executor.hip.cpp b/hip/base/executor.hip.cpp
index 2df5c9a4847..a89e765becb 100644
--- a/hip/base/executor.hip.cpp
+++ b/hip/base/executor.hip.cpp
@@ -56,12 +56,30 @@ namespace gko {
 #include "common/cuda_hip/base/executor.hpp.inc"
 
 
+std::unique_ptr<HipAllocatorBase> allocator_from_mode(int device_id,
+                                                      allocation_mode mode)
+{
+    switch (mode) {
+    case allocation_mode::device:
+        return std::make_unique<HipAllocator>();
+    case allocation_mode::unified_global:
+        return std::make_unique<HipUnifiedAllocator>(device_id,
+                                                     hipMemAttachGlobal);
+    case allocation_mode::unified_host:
+        return std::make_unique<HipUnifiedAllocator>(device_id,
+                                                     hipMemAttachHost);
+    default:
+        GKO_NOT_SUPPORTED(mode);
+    }
+}
+
+
 std::shared_ptr<HipExecutor> HipExecutor::create(
     int device_id, std::shared_ptr<Executor> master, bool device_reset,
     allocation_mode alloc_mode, hipStream_t stream)
 {
     return create(device_id, std::move(master),
-                  std::make_shared<HipAllocator>(), stream);
+                  allocator_from_mode(device_id, alloc_mode), stream);
 }
 
 
diff --git a/hip/base/roctx.hip.cpp b/hip/base/roctx.hip.cpp
index a01bc11dc47..23b07e60254 100644
--- a/hip/base/roctx.hip.cpp
+++ b/hip/base/roctx.hip.cpp
@@ -56,6 +56,7 @@ void begin_roctx(const char* name, profile_event_category)
     roctxRangePush(name);
 }
 
+
 void end_roctx(const char*, profile_event_category) { roctxRangePop(); }
 
 #else
@@ -63,6 +64,7 @@ void end_roctx(const char*, profile_event_category) { roctxRangePop(); }
 void begin_roctx(const char* name, profile_event_category)
     GKO_NOT_COMPILED(roctx);
 
+
 void end_roctx(const char*, profile_event_category) GKO_NOT_COMPILED(roctx);
 
 #endif
diff --git a/hip/base/stream.hip.cpp b/hip/base/stream.hip.cpp
index e5817eb9ebd..dc2d99b8b17 100644
--- a/hip/base/stream.hip.cpp
+++ b/hip/base/stream.hip.cpp
@@ -47,7 +47,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace gko {
 
 
-hip_stream::hip_stream() : stream_{}, device_id_{-1} {}
+hip_stream::hip_stream() : stream_{}, device_id_{} {}
 
 
 hip_stream::hip_stream(int device_id) : stream_{}, device_id_(device_id)
@@ -68,7 +68,7 @@ hip_stream::~hip_stream()
 
 hip_stream::hip_stream(hip_stream&& other)
     : stream_{std::exchange(other.stream_, nullptr)},
-      device_id_{std::exchange(other.device_id_, -1)}
+      device_id_{std::exchange(other.device_id_, 0)}
 {}
 
 
diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp
index f033873e392..4f476b9286d 100644
--- a/include/ginkgo/core/base/executor.hpp
+++ b/include/ginkgo/core/base/executor.hpp
@@ -1307,14 +1307,38 @@ class EnableDeviceReset {
      *
      * @param device_reset  whether to allow a device reset or not
      */
-    void set_device_reset(bool device_reset) {}
+    [[deprecated(
+        "device_reset is no longer supported, call "
+        "cudaDeviceReset/hipDeviceReset manually")]] void
+    set_device_reset(bool device_reset)
+    {}
 
     /**
      * Returns the current status of the device reset boolean for this executor.
      *
      * @return the current status of the device reset boolean for this executor.
      */
-    bool get_device_reset() { return false; }
+    [[deprecated(
+        "device_reset is no longer supported, call "
+        "cudaDeviceReset/hipDeviceReset manually")]] bool
+    get_device_reset()
+    {
+        return false;
+    }
+
+protected:
+    /**
+     * Instantiate an EnableDeviceReset class
+     *
+     * @param device_reset  the starting device_reset status. Defaults to false.
+     */
+    EnableDeviceReset() {}
+
+    [[deprecated(
+        "device_reset is no longer supported, call "
+        "cudaDeviceReset/hipDeviceReset manually")]] EnableDeviceReset(bool
+                                                                           device_reset)
+    {}
 };
 
 
diff --git a/include/ginkgo/core/base/stream.hpp b/include/ginkgo/core/base/stream.hpp
index 8ee8333e41a..f7d45f59c5a 100644
--- a/include/ginkgo/core/base/stream.hpp
+++ b/include/ginkgo/core/base/stream.hpp
@@ -50,7 +50,11 @@ class cuda_stream {
     /** Creates an empty stream wrapper, representing the default stream. */
     cuda_stream();
 
-    /** Creates a new custom CUDA stream. */
+    /**
+     * Creates a new custom CUDA stream on the given device.
+     *
+     * @param device_id  the device ID to create the stream on.
+     */
     cuda_stream(int device_id);
 
     /** Destroys the custom CUDA stream, if it isn't empty. */
@@ -89,7 +93,11 @@ class hip_stream {
     /** Creates an empty stream wrapper, representing the default stream. */
     hip_stream();
 
-    /** Creates a new custom HIP stream. */
+    /**
+     * Creates a new custom HIP stream on the given device.
+     *
+     * @param device_id  the device ID to create the stream on.
+     */
     hip_stream(int device_id);
 
     /** Destroys the custom HIP stream, if it isn't empty. */

From 0954951f813e27d2e83023fcbae7940e68f65a74 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Mon, 10 Jul 2023 14:23:11 +0000
Subject: [PATCH 058/583] avoid creating OmpExecutor in tests

---
 core/test/base/executor.cpp                 | 54 ++++++++++-----------
 cuda/test/base/cuda_executor.cu             | 28 +++++------
 cuda/test/base/cuda_executor_topology.cu    | 18 +++----
 hip/test/base/hip_executor.hip.cpp          | 28 +++++------
 hip/test/base/hip_executor_topology.hip.cpp | 19 ++++----
 5 files changed, 74 insertions(+), 73 deletions(-)

diff --git a/core/test/base/executor.cpp b/core/test/base/executor.cpp
index 13cba09e2b6..a331d8f3485 100644
--- a/core/test/base/executor.cpp
+++ b/core/test/base/executor.cpp
@@ -248,17 +248,17 @@ TEST(ReferenceExecutor, IsItsOwnMaster)
 
 TEST(CudaExecutor, KnowsItsMaster)
 {
-    auto omp = gko::OmpExecutor::create();
-    exec_ptr cuda = gko::CudaExecutor::create(0, omp);
+    auto ref = gko::ReferenceExecutor::create();
+    exec_ptr cuda = gko::CudaExecutor::create(0, ref);
 
-    ASSERT_EQ(omp, cuda->get_master());
+    ASSERT_EQ(ref, cuda->get_master());
 }
 
 
 TEST(CudaExecutor, KnowsItsDeviceId)
 {
-    auto omp = gko::OmpExecutor::create();
-    auto cuda = gko::CudaExecutor::create(0, omp);
+    auto ref = gko::ReferenceExecutor::create();
+    auto cuda = gko::CudaExecutor::create(0, ref);
 
     ASSERT_EQ(0, cuda->get_device_id());
 }
@@ -266,17 +266,17 @@ TEST(CudaExecutor, KnowsItsDeviceId)
 
 TEST(HipExecutor, KnowsItsMaster)
 {
-    auto omp = gko::OmpExecutor::create();
-    exec_ptr hip = gko::HipExecutor::create(0, omp);
+    auto ref = gko::ReferenceExecutor::create();
+    exec_ptr hip = gko::HipExecutor::create(0, ref);
 
-    ASSERT_EQ(omp, hip->get_master());
+    ASSERT_EQ(ref, hip->get_master());
 }
 
 
 TEST(HipExecutor, KnowsItsDeviceId)
 {
-    auto omp = gko::OmpExecutor::create();
-    auto hip = gko::HipExecutor::create(0, omp);
+    auto ref = gko::ReferenceExecutor::create();
+    auto hip = gko::HipExecutor::create(0, ref);
 
     ASSERT_EQ(0, hip->get_device_id());
 }
@@ -284,17 +284,17 @@ TEST(HipExecutor, KnowsItsDeviceId)
 
 TEST(DpcppExecutor, KnowsItsMaster)
 {
-    auto omp = gko::OmpExecutor::create();
-    exec_ptr dpcpp = gko::DpcppExecutor::create(0, omp);
+    auto ref = gko::ReferenceExecutor::create();
+    exec_ptr dpcpp = gko::DpcppExecutor::create(0, ref);
 
-    ASSERT_EQ(omp, dpcpp->get_master());
+    ASSERT_EQ(ref, dpcpp->get_master());
 }
 
 
 TEST(DpcppExecutor, KnowsItsDeviceId)
 {
-    auto omp = gko::OmpExecutor::create();
-    auto dpcpp = gko::DpcppExecutor::create(0, omp);
+    auto ref = gko::ReferenceExecutor::create();
+    auto dpcpp = gko::DpcppExecutor::create(0, ref);
 
     ASSERT_EQ(0, dpcpp->get_device_id());
 }
@@ -304,13 +304,13 @@ TEST(Executor, CanVerifyMemory)
 {
     auto ref = gko::ReferenceExecutor::create();
     auto omp = gko::OmpExecutor::create();
-    auto hip = gko::HipExecutor::create(0, omp);
-    auto cuda = gko::CudaExecutor::create(0, omp);
+    auto hip = gko::HipExecutor::create(0, ref);
+    auto cuda = gko::CudaExecutor::create(0, ref);
     auto omp2 = gko::OmpExecutor::create();
-    auto hip2 = gko::HipExecutor::create(0, omp);
-    auto cuda2 = gko::CudaExecutor::create(0, omp);
-    auto hip_1 = gko::HipExecutor::create(1, omp);
-    auto cuda_1 = gko::CudaExecutor::create(1, omp);
+    auto hip2 = gko::HipExecutor::create(0, ref);
+    auto cuda2 = gko::CudaExecutor::create(0, ref);
+    auto hip_1 = gko::HipExecutor::create(1, ref);
+    auto cuda_1 = gko::CudaExecutor::create(1, ref);
     std::shared_ptr<gko::DpcppExecutor> host_dpcpp;
     std::shared_ptr<gko::DpcppExecutor> cpu_dpcpp;
     std::shared_ptr<gko::DpcppExecutor> gpu_dpcpp;
@@ -318,16 +318,16 @@ TEST(Executor, CanVerifyMemory)
     std::shared_ptr<gko::DpcppExecutor> cpu_dpcpp_dup;
     std::shared_ptr<gko::DpcppExecutor> gpu_dpcpp_dup;
     if (gko::DpcppExecutor::get_num_devices("host")) {
-        host_dpcpp = gko::DpcppExecutor::create(0, omp, "host");
-        host_dpcpp_dup = gko::DpcppExecutor::create(0, omp, "host");
+        host_dpcpp = gko::DpcppExecutor::create(0, ref, "host");
+        host_dpcpp_dup = gko::DpcppExecutor::create(0, ref, "host");
     }
     if (gko::DpcppExecutor::get_num_devices("cpu")) {
-        cpu_dpcpp = gko::DpcppExecutor::create(0, omp, "cpu");
-        cpu_dpcpp_dup = gko::DpcppExecutor::create(0, omp, "cpu");
+        cpu_dpcpp = gko::DpcppExecutor::create(0, ref, "cpu");
+        cpu_dpcpp_dup = gko::DpcppExecutor::create(0, ref, "cpu");
     }
     if (gko::DpcppExecutor::get_num_devices("gpu")) {
-        gpu_dpcpp = gko::DpcppExecutor::create(0, omp, "gpu");
-        gpu_dpcpp_dup = gko::DpcppExecutor::create(0, omp, "gpu");
+        gpu_dpcpp = gko::DpcppExecutor::create(0, ref, "gpu");
+        gpu_dpcpp_dup = gko::DpcppExecutor::create(0, ref, "gpu");
     }
 
     ASSERT_EQ(false, ref->memory_accessible(omp));
diff --git a/cuda/test/base/cuda_executor.cu b/cuda/test/base/cuda_executor.cu
index c81799e0dae..83cfd1827ad 100644
--- a/cuda/test/base/cuda_executor.cu
+++ b/cuda/test/base/cuda_executor.cu
@@ -93,7 +93,7 @@ protected:
           stream(0),
           other_stream(gko::CudaExecutor::get_num_devices() - 1),
 #endif
-          omp(gko::OmpExecutor::create()),
+          ref(gko::ReferenceExecutor::create()),
           cuda(nullptr),
           cuda2(nullptr),
           cuda3(nullptr)
@@ -104,19 +104,19 @@ protected:
         ASSERT_GT(gko::CudaExecutor::get_num_devices(), 0);
 #ifdef GKO_TEST_NONDEFAULT_STREAM
         cuda = gko::CudaExecutor::create(
-            0, omp, std::make_shared<gko::CudaAllocator>(), stream.get());
+            0, ref, std::make_shared<gko::CudaAllocator>(), stream.get());
         cuda2 = gko::CudaExecutor::create(
-            gko::CudaExecutor::get_num_devices() - 1, omp,
+            gko::CudaExecutor::get_num_devices() - 1, ref,
             std::make_shared<gko::CudaAllocator>(), other_stream.get());
         cuda3 = gko::CudaExecutor::create(
-            0, omp, std::make_shared<gko::CudaUnifiedAllocator>(0),
+            0, ref, std::make_shared<gko::CudaUnifiedAllocator>(0),
             stream.get());
 #else
-        cuda = gko::CudaExecutor::create(0, omp);
+        cuda = gko::CudaExecutor::create(0, ref);
         cuda2 = gko::CudaExecutor::create(
-            gko::CudaExecutor::get_num_devices() - 1, omp);
+            gko::CudaExecutor::get_num_devices() - 1, ref);
         cuda3 = gko::CudaExecutor::create(
-            0, omp, std::make_shared<gko::CudaUnifiedAllocator>(0));
+            0, ref, std::make_shared<gko::CudaUnifiedAllocator>(0));
 #endif
     }
 
@@ -132,7 +132,7 @@ protected:
     gko::cuda_stream stream;
     gko::cuda_stream other_stream;
 #endif
-    std::shared_ptr<gko::Executor> omp;
+    std::shared_ptr<gko::ReferenceExecutor> ref;
     std::shared_ptr<gko::CudaExecutor> cuda;
     std::shared_ptr<gko::CudaExecutor> cuda2;
     std::shared_ptr<gko::CudaExecutor> cuda3;
@@ -141,8 +141,8 @@ protected:
 
 TEST_F(CudaExecutor, CanInstantiateTwoExecutorsOnOneDevice)
 {
-    auto cuda = gko::CudaExecutor::create(0, omp);
-    auto cuda2 = gko::CudaExecutor::create(0, omp);
+    auto cuda = gko::CudaExecutor::create(0, ref);
+    auto cuda2 = gko::CudaExecutor::create(0, ref);
 
     // We want automatic deinitialization to not create any error
 }
@@ -197,7 +197,7 @@ TEST_F(CudaExecutor, CopiesDataToCuda)
     int orig[] = {3, 8};
     auto* copy = cuda->alloc<int>(2);
 
-    cuda->copy_from(omp, 2, orig, copy);
+    cuda->copy_from(ref, 2, orig, copy);
 
     check_data<<<1, 1, 0, cuda->get_stream()>>>(copy);
     ASSERT_NO_THROW(cuda->synchronize());
@@ -218,7 +218,7 @@ TEST_F(CudaExecutor, CanAllocateOnUnifiedMemory)
     int orig[] = {3, 8};
     auto* copy = cuda3->alloc<int>(2);
 
-    cuda3->copy_from(omp, 2, orig, copy);
+    cuda3->copy_from(ref, 2, orig, copy);
 
     check_data<<<1, 1, 0, cuda3->get_stream()>>>(copy);
     ASSERT_NO_THROW(cuda3->synchronize());
@@ -240,7 +240,7 @@ TEST_F(CudaExecutor, CopiesDataFromCuda)
     auto orig = cuda->alloc<int>(2);
     init_data<<<1, 1, 0, cuda->get_stream()>>>(orig);
 
-    omp->copy_from(cuda, 2, orig, copy);
+    ref->copy_from(cuda, 2, orig, copy);
 
     EXPECT_EQ(3, copy[0]);
     ASSERT_EQ(8, copy[1]);
@@ -293,7 +293,7 @@ TEST_F(CudaExecutor, CopiesDataFromCudaToCuda)
     cuda2->run(ExampleOperation(value));
     ASSERT_EQ(value, cuda2->get_device_id());
     // Put the results on OpenMP and run CPU side assertions
-    omp->copy_from(cuda2, 2, copy_cuda2, copy);
+    ref->copy_from(cuda2, 2, copy_cuda2, copy);
     EXPECT_EQ(3, copy[0]);
     ASSERT_EQ(8, copy[1]);
     cuda2->free(copy_cuda2);
diff --git a/cuda/test/base/cuda_executor_topology.cu b/cuda/test/base/cuda_executor_topology.cu
index a0ee6826ded..3b91cc7941a 100644
--- a/cuda/test/base/cuda_executor_topology.cu
+++ b/cuda/test/base/cuda_executor_topology.cu
@@ -60,15 +60,15 @@ namespace {
 class CudaExecutor : public ::testing::Test {
 protected:
     CudaExecutor()
-        : omp(gko::OmpExecutor::create()), cuda(nullptr), cuda2(nullptr)
+        : ref(gko::ReferenceExecutor::create()), cuda(nullptr), cuda2(nullptr)
     {}
 
     void SetUp()
     {
         ASSERT_GT(gko::CudaExecutor::get_num_devices(), 0);
-        cuda = gko::CudaExecutor::create(0, omp);
+        cuda = gko::CudaExecutor::create(0, ref);
         cuda2 = gko::CudaExecutor::create(
-            gko::CudaExecutor::get_num_devices() - 1, omp);
+            gko::CudaExecutor::get_num_devices() - 1, ref);
     }
 
     void TearDown()
@@ -79,7 +79,7 @@ protected:
         }
     }
 
-    std::shared_ptr<gko::Executor> omp;
+    std::shared_ptr<gko::ReferenceExecutor> ref;
     std::shared_ptr<const gko::CudaExecutor> cuda;
     std::shared_ptr<const gko::CudaExecutor> cuda2;
 };
@@ -102,7 +102,7 @@ inline int get_core_os_id(int log_id)
 
 TEST_F(CudaExecutor, CanBindToSinglePu)
 {
-    cuda = gko::CudaExecutor::create(0, gko::OmpExecutor::create());
+    cuda = gko::CudaExecutor::create(0, gko::ReferenceExecutor::create());
 
     const int bind_pu = 1;
     gko::machine_topology::get_instance()->bind_to_pu(bind_pu);
@@ -114,7 +114,7 @@ TEST_F(CudaExecutor, CanBindToSinglePu)
 
 TEST_F(CudaExecutor, CanBindToPus)
 {
-    cuda = gko::CudaExecutor::create(0, gko::OmpExecutor::create());
+    cuda = gko::CudaExecutor::create(0, gko::ReferenceExecutor::create());
 
     std::vector<int> bind_pus = {1, 3};
     gko::machine_topology::get_instance()->bind_to_pus(bind_pus);
@@ -126,7 +126,7 @@ TEST_F(CudaExecutor, CanBindToPus)
 
 TEST_F(CudaExecutor, CanBindToCores)
 {
-    cuda = gko::CudaExecutor::create(0, gko::OmpExecutor::create());
+    cuda = gko::CudaExecutor::create(0, gko::ReferenceExecutor::create());
 
     std::vector<int> bind_cores = {1, 3};
     gko::machine_topology::get_instance()->bind_to_cores(bind_cores);
@@ -138,7 +138,7 @@ TEST_F(CudaExecutor, CanBindToCores)
 
 TEST_F(CudaExecutor, ClosestCpusIsPopulated)
 {
-    cuda = gko::CudaExecutor::create(0, gko::OmpExecutor::create());
+    cuda = gko::CudaExecutor::create(0, gko::ReferenceExecutor::create());
     auto close_cpus = cuda->get_closest_pus();
     if (close_cpus.size() == 0) {
         GTEST_SKIP();
@@ -150,7 +150,7 @@ TEST_F(CudaExecutor, ClosestCpusIsPopulated)
 
 TEST_F(CudaExecutor, KnowsItsNuma)
 {
-    cuda = gko::CudaExecutor::create(0, gko::OmpExecutor::create());
+    cuda = gko::CudaExecutor::create(0, gko::ReferenceExecutor::create());
     auto numa0 = cuda->get_closest_numa();
     auto close_cpus = cuda->get_closest_pus();
     if (close_cpus.size() == 0) {
diff --git a/hip/test/base/hip_executor.hip.cpp b/hip/test/base/hip_executor.hip.cpp
index e531fa739e6..e63543ef77c 100644
--- a/hip/test/base/hip_executor.hip.cpp
+++ b/hip/test/base/hip_executor.hip.cpp
@@ -98,7 +98,7 @@ class HipExecutor : public ::testing::Test {
           stream(0),
           other_stream(gko::HipExecutor::get_num_devices() - 1),
 #endif
-          omp(gko::OmpExecutor::create()),
+          ref(gko::ReferenceExecutor::create()),
           hip(nullptr),
           hip2(nullptr),
           hip3(nullptr)
@@ -109,17 +109,17 @@ class HipExecutor : public ::testing::Test {
         ASSERT_GT(gko::HipExecutor::get_num_devices(), 0);
 #ifdef GKO_TEST_NONDEFAULT_STREAM
         hip = gko::HipExecutor::create(
-            0, omp, std::make_shared<gko::HipAllocator>(), stream.get());
+            0, ref, std::make_shared<gko::HipAllocator>(), stream.get());
         hip2 = gko::HipExecutor::create(
-            gko::HipExecutor::get_num_devices() - 1, omp,
+            gko::HipExecutor::get_num_devices() - 1, ref,
             std::make_shared<gko::HipAllocator>(), other_stream.get());
         hip3 = gko::HipExecutor::create(
-            0, omp, std::make_shared<gko::HipAllocator>(), stream.get());
+            0, ref, std::make_shared<gko::HipAllocator>(), stream.get());
 #else
-        hip = gko::HipExecutor::create(0, omp);
+        hip = gko::HipExecutor::create(0, ref);
         hip2 = gko::HipExecutor::create(gko::HipExecutor::get_num_devices() - 1,
-                                        omp);
-        hip3 = gko::HipExecutor::create(0, omp,
+                                        ref);
+        hip3 = gko::HipExecutor::create(0, ref,
                                         std::make_shared<gko::HipAllocator>());
 #endif
     }
@@ -136,7 +136,7 @@ class HipExecutor : public ::testing::Test {
     gko::hip_stream stream;
     gko::hip_stream other_stream;
 #endif
-    std::shared_ptr<gko::Executor> omp;
+    std::shared_ptr<gko::ReferenceExecutor> ref;
     std::shared_ptr<gko::HipExecutor> hip;
     std::shared_ptr<gko::HipExecutor> hip2;
     std::shared_ptr<gko::HipExecutor> hip3;
@@ -145,8 +145,8 @@ class HipExecutor : public ::testing::Test {
 
 TEST_F(HipExecutor, CanInstantiateTwoExecutorsOnOneDevice)
 {
-    auto hip = gko::HipExecutor::create(0, omp);
-    auto hip2 = gko::HipExecutor::create(0, omp);
+    auto hip = gko::HipExecutor::create(0, ref);
+    auto hip2 = gko::HipExecutor::create(0, ref);
 
     // We want automatic deinitialization to not create any error
 }
@@ -204,7 +204,7 @@ TEST_F(HipExecutor, CopiesDataToHip)
     int orig[] = {3, 8};
     auto* copy = hip->alloc<int>(2);
 
-    hip->copy_from(omp, 2, orig, copy);
+    hip->copy_from(ref, 2, orig, copy);
 
     check_data<<<1, 1, 0, hip->get_stream()>>>(copy);
     ASSERT_NO_THROW(hip->synchronize());
@@ -232,7 +232,7 @@ TEST_F(HipExecutor, CanAllocateOnUnifiedMemory)
     int orig[] = {3, 8};
     auto* copy = hip3->alloc<int>(2);
 
-    hip3->copy_from(omp, 2, orig, copy);
+    hip3->copy_from(ref, 2, orig, copy);
 
     check_data<<<1, 1, 0, hip3->get_stream()>>>(copy);
     ASSERT_NO_THROW(hip3->synchronize());
@@ -257,7 +257,7 @@ TEST_F(HipExecutor, CopiesDataFromHip)
     auto orig = hip->alloc<int>(2);
     init_data<<<1, 1, 0, hip->get_stream()>>>(orig);
 
-    omp->copy_from(hip, 2, orig, copy);
+    ref->copy_from(hip, 2, orig, copy);
 
     EXPECT_EQ(3, copy[0]);
     ASSERT_EQ(8, copy[1]);
@@ -310,7 +310,7 @@ TEST_F(HipExecutor, CopiesDataFromHipToHip)
     hip2->run(ExampleOperation(value));
     ASSERT_EQ(value, hip2->get_device_id());
     // Put the results on OpenMP and run CPU side assertions
-    omp->copy_from(hip2, 2, copy_hip2, copy);
+    ref->copy_from(hip2, 2, copy_hip2, copy);
     EXPECT_EQ(3, copy[0]);
     ASSERT_EQ(8, copy[1]);
     hip2->free(copy_hip2);
diff --git a/hip/test/base/hip_executor_topology.hip.cpp b/hip/test/base/hip_executor_topology.hip.cpp
index 394b2776319..3d6e3f2bddc 100644
--- a/hip/test/base/hip_executor_topology.hip.cpp
+++ b/hip/test/base/hip_executor_topology.hip.cpp
@@ -65,15 +65,16 @@ namespace {
 
 class HipExecutor : public ::testing::Test {
 protected:
-    HipExecutor() : omp(gko::OmpExecutor::create()), hip(nullptr), hip2(nullptr)
+    HipExecutor()
+        : ref(gko::ReferenceExecutor::create()), hip(nullptr), hip2(nullptr)
     {}
 
     void SetUp()
     {
         ASSERT_GT(gko::HipExecutor::get_num_devices(), 0);
-        hip = gko::HipExecutor::create(0, omp);
+        hip = gko::HipExecutor::create(0, ref);
         hip2 = gko::HipExecutor::create(gko::HipExecutor::get_num_devices() - 1,
-                                        omp);
+                                        ref);
     }
 
     void TearDown()
@@ -84,7 +85,7 @@ class HipExecutor : public ::testing::Test {
         }
     }
 
-    std::shared_ptr<gko::Executor> omp;
+    std::shared_ptr<gko::ReferenceExecutor> ref;
     std::shared_ptr<const gko::HipExecutor> hip;
     std::shared_ptr<const gko::HipExecutor> hip2;
 };
@@ -107,7 +108,7 @@ inline int get_core_os_id(int log_id)
 
 TEST_F(HipExecutor, CanBindToSinglePu)
 {
-    hip = gko::HipExecutor::create(0, gko::OmpExecutor::create());
+    hip = gko::HipExecutor::create(0, gko::ReferenceExecutor::create());
 
     const int bind_pu = 1;
     gko::machine_topology::get_instance()->bind_to_pu(bind_pu);
@@ -119,7 +120,7 @@ TEST_F(HipExecutor, CanBindToSinglePu)
 
 TEST_F(HipExecutor, CanBindToPus)
 {
-    hip = gko::HipExecutor::create(0, gko::OmpExecutor::create());
+    hip = gko::HipExecutor::create(0, gko::ReferenceExecutor::create());
 
     std::vector<int> bind_pus = {1, 3};
     gko::machine_topology::get_instance()->bind_to_pus(bind_pus);
@@ -131,7 +132,7 @@ TEST_F(HipExecutor, CanBindToPus)
 
 TEST_F(HipExecutor, CanBindToCores)
 {
-    hip = gko::HipExecutor::create(0, gko::OmpExecutor::create());
+    hip = gko::HipExecutor::create(0, gko::ReferenceExecutor::create());
 
     std::vector<int> bind_cores = {1, 3};
     gko::machine_topology::get_instance()->bind_to_cores(bind_cores);
@@ -143,7 +144,7 @@ TEST_F(HipExecutor, CanBindToCores)
 
 TEST_F(HipExecutor, ClosestCpusIsPopulated)
 {
-    hip = gko::HipExecutor::create(0, gko::OmpExecutor::create());
+    hip = gko::HipExecutor::create(0, gko::ReferenceExecutor::create());
     auto close_cpus = hip->get_closest_pus();
     if (close_cpus.size() == 0) {
         GTEST_SKIP();
@@ -155,7 +156,7 @@ TEST_F(HipExecutor, ClosestCpusIsPopulated)
 
 TEST_F(HipExecutor, KnowsItsNuma)
 {
-    hip = gko::HipExecutor::create(0, gko::OmpExecutor::create());
+    hip = gko::HipExecutor::create(0, gko::ReferenceExecutor::create());
     auto numa0 = hip->get_closest_numa();
     auto close_cpus = hip->get_closest_pus();
     if (close_cpus.size() == 0) {

From e365d292f73b3e8423e25c39f9fdc107316ea9ed Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Mon, 10 Jul 2023 14:24:15 +0000
Subject: [PATCH 059/583] warn if using unsupported allocator

---
 cuda/base/memory.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/cuda/base/memory.cpp b/cuda/base/memory.cpp
index 08c64c0ba05..f605d9135ea 100644
--- a/cuda/base/memory.cpp
+++ b/cuda/base/memory.cpp
@@ -121,7 +121,13 @@ void CudaAsyncAllocator::deallocate(void* ptr)
 #else  // Fall back to regular allocation
 
 
-CudaAsyncAllocator::CudaAsyncAllocator(cudaStream_t stream) : stream_{stream} {}
+CudaAsyncAllocator::CudaAsyncAllocator(cudaStream_t stream) : stream_{stream}
+{
+#if GKO_VERBOSE_LEVEL >= 1
+    std::cerr << "This version of CUDA does not support cudaMallocAsync, "
+                 "please use CudaAllocator instead of CudaAsyncAllocator.\n";
+#endif
+}
 
 
 void* CudaAsyncAllocator::allocate(size_type num_bytes)

From d01e1a957a4d370cc02d9294cb685461373a4d35 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Mon, 10 Jul 2023 14:27:48 +0000
Subject: [PATCH 060/583] improve documentation

Co-authored-by: Yuhsiang M. Tsai <yhmtsai@gmail.com>
Co-authored-by: Pratik Nayak <pratik.nayak@kit.edu>
---
 include/ginkgo/core/base/memory.hpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/ginkgo/core/base/memory.hpp b/include/ginkgo/core/base/memory.hpp
index 1086c9aacb4..f421abf7da4 100644
--- a/include/ginkgo/core/base/memory.hpp
+++ b/include/ginkgo/core/base/memory.hpp
@@ -76,6 +76,7 @@ class CudaAllocatorBase : public Allocator {
      *
      * @param device_id the device ID the allocator will be used in.
      * @param stream the stream the allocator will be used with.
+     *
      * @return true if and only if the allocator can be used by CudaExecutor in
      *         the given environment.
      */
@@ -100,7 +101,8 @@ class HipAllocatorBase : public Allocator {
      *
      * @param device_id the device ID the allocator will be used in.
      * @param stream the stream the allocator will be used with.
-     * @return true if and only if the allocator can be used by CudaExecutor in
+     *
+     * @return true if and only if the allocator can be used by HipExecutor in
      *         the given environment.
      */
     virtual bool check_environment(int device_id,

From 1420f3e2d5fe1b76e8d92e82be39c54bd19c311f Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Wed, 19 Jul 2023 11:46:47 +0200
Subject: [PATCH 061/583] fix HIP requirements for stream-ordered allocation

---
 hip/base/memory.hip.cpp | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/hip/base/memory.hip.cpp b/hip/base/memory.hip.cpp
index 86ac31c3154..be795bb3397 100644
--- a/hip/base/memory.hip.cpp
+++ b/hip/base/memory.hip.cpp
@@ -97,7 +97,7 @@ void HipAllocator::deallocate(void* dev_ptr)
 }
 
 
-#if HIP_VERSION_MAJOR >= 5
+#if HIP_VERSION >= 50200000
 
 
 HipAsyncAllocator::HipAsyncAllocator(hipStream_t stream) : stream_{stream} {}
@@ -121,7 +121,13 @@ void HipAsyncAllocator::deallocate(void* ptr)
 #else  // Fall back to regular allocation
 
 
-HipAsyncAllocator::HipAsyncAllocator(hipStream_t stream) : stream_{stream} {}
+HipAsyncAllocator::HipAsyncAllocator(hipStream_t stream) : stream_{stream}
+{
+#if GKO_VERBOSE_LEVEL >= 1
+    std::cerr << "This version of HIP does not support hipMallocAsync, "
+                 "please use HipAllocator instead of HipAsyncAllocator.\n";
+#endif
+}
 
 
 void* HipAsyncAllocator::allocate(size_type num_bytes)

From a3ab2a253024d257fa55f9f10f4b65926bfcf5f5 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Wed, 19 Jul 2023 12:13:05 +0200
Subject: [PATCH 062/583] use unified allocator in some HIP tests

---
 hip/test/base/hip_executor.hip.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/hip/test/base/hip_executor.hip.cpp b/hip/test/base/hip_executor.hip.cpp
index e63543ef77c..42499384704 100644
--- a/hip/test/base/hip_executor.hip.cpp
+++ b/hip/test/base/hip_executor.hip.cpp
@@ -114,13 +114,14 @@ class HipExecutor : public ::testing::Test {
             gko::HipExecutor::get_num_devices() - 1, ref,
             std::make_shared<gko::HipAllocator>(), other_stream.get());
         hip3 = gko::HipExecutor::create(
-            0, ref, std::make_shared<gko::HipAllocator>(), stream.get());
+            0, ref, std::make_shared<gko::HipUnifiedAllocator>(0),
+            stream.get());
 #else
         hip = gko::HipExecutor::create(0, ref);
         hip2 = gko::HipExecutor::create(gko::HipExecutor::get_num_devices() - 1,
                                         ref);
-        hip3 = gko::HipExecutor::create(0, ref,
-                                        std::make_shared<gko::HipAllocator>());
+        hip3 = gko::HipExecutor::create(
+            0, ref, std::make_shared<gko::HipUnifiedAllocator>(0));
 #endif
     }
 

From 6cda4fc348710efa4e2d825e15367dacce1966b6 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Wed, 19 Jul 2023 13:52:49 +0200
Subject: [PATCH 063/583] resolve ambiguous symbol

---
 cuda/base/executor.cpp    | 8 ++++----
 hip/base/executor.hip.cpp | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/cuda/base/executor.cpp b/cuda/base/executor.cpp
index fd16815456a..f296fb9da86 100644
--- a/cuda/base/executor.cpp
+++ b/cuda/base/executor.cpp
@@ -60,8 +60,8 @@ namespace gko {
 #include "common/cuda_hip/base/executor.hpp.inc"
 
 
-std::unique_ptr<CudaAllocatorBase> allocator_from_mode(int device_id,
-                                                       allocation_mode mode)
+std::unique_ptr<CudaAllocatorBase> cuda_allocator_from_mode(
+    int device_id, allocation_mode mode)
 {
     switch (mode) {
     case allocation_mode::device:
@@ -82,8 +82,8 @@ std::shared_ptr<CudaExecutor> CudaExecutor::create(
     int device_id, std::shared_ptr<Executor> master, bool device_reset,
     allocation_mode alloc_mode, cudaStream_t stream)
 {
-    return create(device_id, master, allocator_from_mode(device_id, alloc_mode),
-                  stream);
+    return create(device_id, master,
+                  cuda_allocator_from_mode(device_id, alloc_mode), stream);
 }
 
 
diff --git a/hip/base/executor.hip.cpp b/hip/base/executor.hip.cpp
index a89e765becb..8d175c0e424 100644
--- a/hip/base/executor.hip.cpp
+++ b/hip/base/executor.hip.cpp
@@ -56,8 +56,8 @@ namespace gko {
 #include "common/cuda_hip/base/executor.hpp.inc"
 
 
-std::unique_ptr<HipAllocatorBase> allocator_from_mode(int device_id,
-                                                      allocation_mode mode)
+std::unique_ptr<HipAllocatorBase> hip_allocator_from_mode(int device_id,
+                                                          allocation_mode mode)
 {
     switch (mode) {
     case allocation_mode::device:
@@ -79,7 +79,7 @@ std::shared_ptr<HipExecutor> HipExecutor::create(
     allocation_mode alloc_mode, hipStream_t stream)
 {
     return create(device_id, std::move(master),
-                  allocator_from_mode(device_id, alloc_mode), stream);
+                  hip_allocator_from_mode(device_id, alloc_mode), stream);
 }
 
 

From c64da5b092a3daebddad7430cf1779dcf4371087 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Wed, 27 Apr 2022 15:08:52 +0200
Subject: [PATCH 064/583] add partition dpcpp kernels

---
 dpcpp/distributed/partition_kernels.dp.cpp | 115 ++++++++++++++++++++-
 1 file changed, 112 insertions(+), 3 deletions(-)

diff --git a/dpcpp/distributed/partition_kernels.dp.cpp b/dpcpp/distributed/partition_kernels.dp.cpp
index 7d9210894e2..42cc0a72711 100644
--- a/dpcpp/distributed/partition_kernels.dp.cpp
+++ b/dpcpp/distributed/partition_kernels.dp.cpp
@@ -30,17 +30,86 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
+// force-top: on
+#include <oneapi/dpl/algorithm>
+#include <oneapi/dpl/execution>
+#include <oneapi/dpl/iterator>
+// force-top: off
+
+
 #include "core/distributed/partition_kernels.hpp"
 
 
+#include "common/unified/base/kernel_launch.hpp"
+#include "core/components/fill_array_kernels.hpp"
+
+
 namespace gko {
 namespace kernels {
 namespace dpcpp {
 namespace partition {
+namespace kernel {
+
+
+template <typename LocalIndexType, typename GlobalIndexType>
+void setup_sizes_ids_permutation(
+    std::shared_ptr<const DefaultExecutor> exec, size_type num_ranges,
+    comm_index_type num_parts, const GlobalIndexType* range_offsets,
+    const comm_index_type* range_parts, Array<LocalIndexType>& range_sizes,
+    Array<comm_index_type>& part_ids, Array<GlobalIndexType>& permutation)
+{
+    run_kernel(
+        exec,
+        [] GKO_KERNEL(auto i, auto num_ranges, auto num_parts,
+                      auto range_offsets, auto range_parts, auto range_sizes,
+                      auto part_ids, auto permutation) {
+            if (i == 0) {
+                // set sentinel value at the end
+                part_ids[num_ranges] = num_parts;
+            }
+            range_sizes[i] = range_offsets[i + 1] - range_offsets[i];
+            part_ids[i] = range_parts[i];
+            permutation[i] = static_cast<GlobalIndexType>(i);
+        },
+        num_ranges, num_ranges, num_parts, range_offsets, range_parts,
+        range_sizes.get_data(), part_ids.get_data(), permutation.get_data());
+}
+
+
+template <typename LocalIndexType, typename GlobalIndexType>
+void compute_part_sizes_and_starting_indices(
+    std::shared_ptr<const DefaultExecutor> exec, size_type num_ranges,
+    const Array<LocalIndexType>& range_sizes,
+    const Array<comm_index_type>& part_ids,
+    const Array<GlobalIndexType>& permutation, LocalIndexType* starting_indices,
+    LocalIndexType* part_sizes)
+{
+    run_kernel(
+        exec,
+        [] GKO_KERNEL(auto i, auto grouped_starting_indices,
+                      auto grouped_part_ids, auto orig_idxs,
+                      auto starting_indices, auto part_sizes) {
+            auto prev_part = i > 0 ? grouped_part_ids[i - 1]
+                                   : invalid_index<comm_index_type>();
+            auto cur_part = grouped_part_ids[i];
+            auto next_part =
+                grouped_part_ids[i + 1];  // last element has to be num_parts
+            if (cur_part != next_part) {
+                part_sizes[cur_part] = grouped_starting_indices[i];
+            }
+            // write result shifted by one entry to get exclusive prefix sum
+            starting_indices[orig_idxs[i]] =
+                prev_part == cur_part ? grouped_starting_indices[i - 1]
+                                      : LocalIndexType{};
+        },
+        num_ranges, range_sizes.get_const_data(), part_ids.get_const_data(),
+        permutation.get_const_data(), starting_indices, part_sizes);
+}
+
+
+}  // namespace kernel
 
 
-// TODO: wait until https://github.com/oneapi-src/oneDPL/pull/388 is release to
-// implement it similar to cuda/hip
 template <typename LocalIndexType, typename GlobalIndexType>
 void build_starting_indices(std::shared_ptr<const DefaultExecutor> exec,
                             const GlobalIndexType* range_offsets,
@@ -48,7 +117,47 @@ void build_starting_indices(std::shared_ptr<const DefaultExecutor> exec,
                             size_type num_ranges, comm_index_type num_parts,
                             comm_index_type& num_empty_parts,
                             LocalIndexType* starting_indices,
-                            LocalIndexType* part_sizes) GKO_NOT_IMPLEMENTED;
+                            LocalIndexType* part_sizes)
+{
+    if (num_ranges > 0) {
+        auto policy =
+            oneapi::dpl::execution::make_device_policy(*exec->get_queue());
+
+        Array<LocalIndexType> range_sizes{exec, num_ranges};
+        // num_parts sentinel at the end
+        Array<comm_index_type> tmp_part_ids{exec, num_ranges + 1};
+        Array<GlobalIndexType> permutation{exec, num_ranges};
+        // set part_sizes to 0 in case of empty parts
+        components::fill_array(exec, part_sizes, num_parts,
+                               zero<LocalIndexType>());
+
+        kernel::setup_sizes_ids_permutation(
+            exec, num_ranges, num_parts, range_offsets, range_parts,
+            range_sizes, tmp_part_ids, permutation);
+
+        auto tmp_part_id_ptr = tmp_part_ids.get_data();
+        auto range_sizes_ptr = range_sizes.get_data();
+        auto sort_it = oneapi::dpl::make_zip_iterator(
+            tmp_part_id_ptr, range_sizes_ptr, permutation.get_data());
+        // group range_sizes by part ID
+        oneapi::dpl::stable_sort(policy, sort_it, sort_it + num_ranges,
+                                 [](const auto t_a, const auto t_b) {
+                                     return std::get<0>(t_a) < std::get<0>(t_b);
+                                 });
+        // compute inclusive prefix sum for each part
+        oneapi::dpl::inclusive_scan_by_segment(
+            policy, tmp_part_id_ptr, tmp_part_id_ptr + num_ranges,
+            range_sizes_ptr, range_sizes_ptr);
+        // write back the results
+        kernel::compute_part_sizes_and_starting_indices(
+            exec, num_ranges, range_sizes, tmp_part_ids, permutation,
+            starting_indices, part_sizes);
+        num_empty_parts =
+            oneapi::dpl::count(policy, part_sizes, part_sizes + num_parts, 0);
+    } else {
+        num_empty_parts = num_parts;
+    }
+}
 
 GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE(
     GKO_DECLARE_PARTITION_BUILD_STARTING_INDICES);

From fe3ff935db618637df0e633560e9c0899d278c7b Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Fri, 7 Jul 2023 11:19:38 +0200
Subject: [PATCH 065/583] review updates:

- adds helper to create oneDPL policy
- re-enable distributed matrix test with dpcpp

Co-authored-by: Tobias Ribizel <ribizel@kit.edu>
---
 dpcpp/base/device_matrix_data_kernels.dp.cpp | 13 ++---
 dpcpp/base/onedpl.hpp                        | 61 ++++++++++++++++++++
 dpcpp/distributed/partition_kernels.dp.cpp   | 11 ++--
 dpcpp/multigrid/pgm_kernels.dp.cpp           | 10 ++--
 test/mpi/CMakeLists.txt                      |  2 +-
 5 files changed, 78 insertions(+), 19 deletions(-)
 create mode 100644 dpcpp/base/onedpl.hpp

diff --git a/dpcpp/base/device_matrix_data_kernels.dp.cpp b/dpcpp/base/device_matrix_data_kernels.dp.cpp
index 9d387ce7ecf..f8185d884c1 100644
--- a/dpcpp/base/device_matrix_data_kernels.dp.cpp
+++ b/dpcpp/base/device_matrix_data_kernels.dp.cpp
@@ -33,7 +33,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 // force-top: on
 // oneDPL needs to be first to avoid issues with libstdc++ TBB impl
 #include <oneapi/dpl/algorithm>
-#include <oneapi/dpl/execution>
 // force-top: off
 
 
@@ -43,6 +42,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/exception_helpers.hpp>
 
 
+#include "dpcpp/base/onedpl.hpp"
+
+
 namespace gko {
 namespace kernels {
 namespace dpcpp {
@@ -56,8 +58,7 @@ void remove_zeros(std::shared_ptr<const DefaultExecutor> exec,
 {
     using nonzero_type = matrix_data_entry<ValueType, IndexType>;
     auto size = values.get_num_elems();
-    auto policy =
-        oneapi::dpl::execution::make_device_policy(*exec->get_queue());
+    auto policy = onedpl_policy(exec);
     auto nnz = std::count_if(
         policy, values.get_const_data(), values.get_const_data() + size,
         [](ValueType val) { return is_nonzero<ValueType>(val); });
@@ -96,8 +97,7 @@ void sum_duplicates(std::shared_ptr<const DefaultExecutor> exec, size_type,
     if (size == 0) {
         return;
     }
-    auto policy =
-        oneapi::dpl::execution::make_device_policy(*exec->get_queue());
+    auto policy = onedpl_policy(exec);
     auto in_loc_it = oneapi::dpl::make_zip_iterator(row_idxs.get_const_data(),
                                                     col_idxs.get_const_data());
     auto adj_in_loc_it =
@@ -136,8 +136,7 @@ template <typename ValueType, typename IndexType>
 void sort_row_major(std::shared_ptr<const DefaultExecutor> exec,
                     device_matrix_data<ValueType, IndexType>& data)
 {
-    auto policy =
-        oneapi::dpl::execution::make_device_policy(*exec->get_queue());
+    auto policy = onedpl_policy(exec);
     auto input_it = oneapi::dpl::make_zip_iterator(
         data.get_row_idxs(), data.get_col_idxs(), data.get_values());
     std::sort(policy, input_it, input_it + data.get_num_elems(),
diff --git a/dpcpp/base/onedpl.hpp b/dpcpp/base/onedpl.hpp
new file mode 100644
index 00000000000..4af31d3e115
--- /dev/null
+++ b/dpcpp/base/onedpl.hpp
@@ -0,0 +1,61 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_DPCPP_BASE_ONEDPL_HPP_
+#define GKO_DPCPP_BASE_ONEDPL_HPP_
+
+
+// force-top: on
+#include <oneapi/dpl/execution>
+// force-top: off
+
+
+#include <ginkgo/core/base/executor.hpp>
+
+
+namespace gko {
+namespace kernels {
+namespace dpcpp {
+
+
+inline auto onedpl_policy(std::shared_ptr<const DpcppExecutor> exec)
+{
+    return oneapi::dpl::execution::make_device_policy(*exec->get_queue());
+}
+
+
+}  // namespace dpcpp
+}  // namespace kernels
+}  // namespace gko
+
+
+#endif  // GKO_DPCPP_BASE_ONEDPL_HPP_
diff --git a/dpcpp/distributed/partition_kernels.dp.cpp b/dpcpp/distributed/partition_kernels.dp.cpp
index 42cc0a72711..04b7ff215ed 100644
--- a/dpcpp/distributed/partition_kernels.dp.cpp
+++ b/dpcpp/distributed/partition_kernels.dp.cpp
@@ -32,7 +32,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 // force-top: on
 #include <oneapi/dpl/algorithm>
-#include <oneapi/dpl/execution>
 #include <oneapi/dpl/iterator>
 // force-top: off
 
@@ -42,6 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "common/unified/base/kernel_launch.hpp"
 #include "core/components/fill_array_kernels.hpp"
+#include "dpcpp/base/onedpl.hpp"
 
 
 namespace gko {
@@ -72,7 +72,7 @@ void setup_sizes_ids_permutation(
             permutation[i] = static_cast<GlobalIndexType>(i);
         },
         num_ranges, num_ranges, num_parts, range_offsets, range_parts,
-        range_sizes.get_data(), part_ids.get_data(), permutation.get_data());
+        range_sizes, part_ids, permutation);
 }
 
 
@@ -102,8 +102,8 @@ void compute_part_sizes_and_starting_indices(
                 prev_part == cur_part ? grouped_starting_indices[i - 1]
                                       : LocalIndexType{};
         },
-        num_ranges, range_sizes.get_const_data(), part_ids.get_const_data(),
-        permutation.get_const_data(), starting_indices, part_sizes);
+        num_ranges, range_sizes, part_ids, permutation, starting_indices,
+        part_sizes);
 }
 
 
@@ -120,8 +120,7 @@ void build_starting_indices(std::shared_ptr<const DefaultExecutor> exec,
                             LocalIndexType* part_sizes)
 {
     if (num_ranges > 0) {
-        auto policy =
-            oneapi::dpl::execution::make_device_policy(*exec->get_queue());
+        auto policy = onedpl_policy(exec);
 
         Array<LocalIndexType> range_sizes{exec, num_ranges};
         // num_parts sentinel at the end
diff --git a/dpcpp/multigrid/pgm_kernels.dp.cpp b/dpcpp/multigrid/pgm_kernels.dp.cpp
index 15bd22180c0..2234d8ffe38 100644
--- a/dpcpp/multigrid/pgm_kernels.dp.cpp
+++ b/dpcpp/multigrid/pgm_kernels.dp.cpp
@@ -33,7 +33,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 // force-top: on
 // oneDPL needs to be first to avoid issues with libstdc++ TBB impl
 #include <oneapi/dpl/algorithm>
-#include <oneapi/dpl/execution>
 // force-top: off
 
 
@@ -48,6 +47,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/multigrid/pgm.hpp>
 
 
+#include "dpcpp/base/onedpl.hpp"
+
+
 namespace gko {
 namespace kernels {
 namespace dpcpp {
@@ -63,8 +65,7 @@ template <typename IndexType>
 void sort_agg(std::shared_ptr<const DefaultExecutor> exec, IndexType num,
               IndexType* row_idxs, IndexType* col_idxs)
 {
-    auto policy =
-        oneapi::dpl::execution::make_device_policy(*exec->get_queue());
+    auto policy = onedpl_policy(exec);
     auto it = oneapi::dpl::make_zip_iterator(row_idxs, col_idxs);
     std::sort(policy, it, it + num, [](auto a, auto b) {
         return std::tie(std::get<0>(a), std::get<1>(a)) <
@@ -79,8 +80,7 @@ template <typename ValueType, typename IndexType>
 void sort_row_major(std::shared_ptr<const DefaultExecutor> exec, size_type nnz,
                     IndexType* row_idxs, IndexType* col_idxs, ValueType* vals)
 {
-    auto policy =
-        oneapi::dpl::execution::make_device_policy(*exec->get_queue());
+    auto policy = onedpl_policy(exec);
     auto it = oneapi::dpl::make_zip_iterator(row_idxs, col_idxs, vals);
     // Because reduce_by_segment is not determinstic, so we do not need
     // stable_sort
diff --git a/test/mpi/CMakeLists.txt b/test/mpi/CMakeLists.txt
index 3d5e3cadd58..08050bde58f 100644
--- a/test/mpi/CMakeLists.txt
+++ b/test/mpi/CMakeLists.txt
@@ -1,4 +1,4 @@
-ginkgo_create_common_and_reference_test(matrix MPI_SIZE 3 DISABLE_EXECUTORS dpcpp)
+ginkgo_create_common_and_reference_test(matrix MPI_SIZE 3)
 ginkgo_create_common_and_reference_test(vector MPI_SIZE 3)
 
 add_subdirectory(preconditioner)

From 20b7f8cc9ea21cd2a8b01b3d780dddace5b03144 Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Mon, 12 Jun 2023 10:36:15 +0200
Subject: [PATCH 066/583] Fix some typos

---
 core/base/dispatch_helper.hpp                 |  4 ++--
 core/base/iterator_factory.hpp                |  2 +-
 core/base/mtx_io.cpp                          |  8 ++++----
 core/base/types.hpp                           |  2 +-
 core/solver/gcr.cpp                           |  2 +-
 core/solver/multigrid.cpp                     |  4 ++--
 core/test/base/mtx_io.cpp                     |  6 +++---
 core/test/mpi/base/polymorphic_object.cpp     |  2 +-
 core/test/solver/idr.cpp                      |  2 +-
 core/test/utils/assertions_test.cpp           |  2 +-
 core/test/utils/fb_matrix_generator.hpp       |  2 +-
 core/test/utils/matrix_generator.hpp          |  2 +-
 examples/CMakeLists.txt                       |  2 +-
 .../custom-stopping-criterion.cpp             |  2 +-
 .../distributed-solver/distributed-solver.cpp |  4 ++--
 .../external-lib-interfacing.cpp              |  2 +-
 .../doc/results.dox                           |  2 +-
 .../mixed-multigrid-preconditioned-solver.cpp |  2 +-
 .../mixed-multigrid-solver/doc/results.dox    |  2 +-
 .../mixed-multigrid-solver.cpp                |  2 +-
 .../doc/results.dox                           |  2 +-
 ...igrid-preconditioned-solver-customized.cpp |  2 +-
 .../doc/results.dox                           |  2 +-
 .../multigrid-preconditioned-solver.cpp       |  2 +-
 include/ginkgo/core/base/composition.hpp      |  4 ++--
 include/ginkgo/core/base/exception.hpp        |  6 +++---
 include/ginkgo/core/base/executor.hpp         | 12 +++++------
 include/ginkgo/core/base/index_set.hpp        |  2 +-
 include/ginkgo/core/base/lin_op.hpp           |  2 +-
 include/ginkgo/core/base/machine_topology.hpp |  2 +-
 include/ginkgo/core/base/perturbation.hpp     |  2 +-
 .../ginkgo/core/base/polymorphic_object.hpp   |  4 ++--
 include/ginkgo/core/base/range.hpp            | 20 +++++++++----------
 .../core/base/scoped_device_id_guard.hpp      |  2 +-
 .../distributed/preconditioner/schwarz.hpp    |  4 ++--
 include/ginkgo/core/distributed/vector.hpp    |  8 ++++----
 include/ginkgo/core/factorization/par_ic.hpp  |  2 +-
 include/ginkgo/core/factorization/par_ict.hpp |  2 +-
 include/ginkgo/core/factorization/par_ilu.hpp |  2 +-
 .../ginkgo/core/factorization/par_ilut.hpp    |  2 +-
 include/ginkgo/core/log/logger.hpp            |  4 ++--
 include/ginkgo/core/log/profiler_hook.hpp     |  4 ++--
 include/ginkgo/core/matrix/coo.hpp            |  2 +-
 include/ginkgo/core/matrix/dense.hpp          |  8 ++++----
 include/ginkgo/core/matrix/hybrid.hpp         |  8 ++++----
 include/ginkgo/core/preconditioner/isai.hpp   |  2 +-
 include/ginkgo/core/preconditioner/jacobi.hpp |  6 +++---
 include/ginkgo/core/reorder/rcm.hpp           |  2 +-
 .../ginkgo/core/reorder/reordering_base.hpp   |  2 +-
 include/ginkgo/core/solver/idr.hpp            |  2 +-
 include/ginkgo/core/solver/ir.hpp             |  4 ++--
 include/ginkgo/core/solver/solver_base.hpp    |  2 +-
 include/ginkgo/core/stop/criterion.hpp        |  2 +-
 include/ginkgo/core/stop/stopping_status.hpp  |  4 ++--
 include/ginkgo/core/stop/time.hpp             |  2 +-
 55 files changed, 97 insertions(+), 97 deletions(-)

diff --git a/core/base/dispatch_helper.hpp b/core/base/dispatch_helper.hpp
index 155d5ef6c23..2226ffc6b6d 100644
--- a/core/base/dispatch_helper.hpp
+++ b/core/base/dispatch_helper.hpp
@@ -63,7 +63,7 @@ void run(T, Func, Args...)
  * run uses template to go through the list and select the valid
  * template and run it.
  *
- * @tparam K  the current type tried in the convertion
+ * @tparam K  the current type tried in the conversion
  * @tparam ...Types  the other types will be tried in the conversion if K fails
  * @tparam T  the type of input object
  * @tparam Func  the function will run if the object can be converted to K
@@ -108,7 +108,7 @@ void run(T, Func, Args...)
  *
  * @tparam Base  the Base class with one template
  * @tparam K  the current template type of B. pointer of const Base<K> is tried
- *            in the convertion.
+ *            in the conversion.
  * @tparam ...Types  the other types will be tried in the conversion if K fails
  * @tparam T  the type of input object waiting converted
  * @tparam Func  the function will run if the object can be converted to pointer
diff --git a/core/base/iterator_factory.hpp b/core/base/iterator_factory.hpp
index 6384d5bfbce..7ebbc510f74 100644
--- a/core/base/iterator_factory.hpp
+++ b/core/base/iterator_factory.hpp
@@ -84,7 +84,7 @@ class zip_iterator_reference
     template <std::size_t... idxs>
     value_type cast_impl(std::index_sequence<idxs...>) const
     {
-        // gcc 5 throws error as using unintialized array
+        // gcc 5 throws error as using uninitialized array
         // std::tuple<int, char> t = { 1, '2' }; is not allowed.
         // converting to 'std::tuple<...>' from initializer list would use
         // explicit constructor
diff --git a/core/base/mtx_io.cpp b/core/base/mtx_io.cpp
index d8604e95b5f..de4f6ec1e86 100644
--- a/core/base/mtx_io.cpp
+++ b/core/base/mtx_io.cpp
@@ -267,7 +267,7 @@ class mtx_io {
 
     /**
      * storage modifier hierarchy provides algorithms for handling storage
-     * modifiers (general, symetric, skew symetric, hermitian) and filling the
+     * modifiers (general, symmetric, skew symmetric, hermitian) and filling the
      * entire matrix from the stored parts
      */
     struct storage_modifier {
@@ -491,7 +491,7 @@ class mtx_io {
          * @param os  The output stream to write to
          * @param data  The matrix data to write
          * @param entry_writer  The entry format to write in.
-         * @param modifier  The strorage modifer
+         * @param modifier  The storage modifier
          */
         virtual void write_data(std::ostream& os,
                                 const matrix_data<ValueType, IndexType>& data,
@@ -554,7 +554,7 @@ class mtx_io {
          * @param os  The output stream to write to
          * @param data  The matrix data to write
          * @param entry_writer  The entry format to write in.
-         * @param modifier  The strorage modifer
+         * @param modifier  The storage modifier
          */
         void write_data(std::ostream& os,
                         const matrix_data<ValueType, IndexType>& data,
@@ -623,7 +623,7 @@ class mtx_io {
          * @param os  The output stream to write to
          * @param data  The matrix data to write
          * @param entry_writer  The entry format to write in.
-         * @param modifier  The strorage modifer
+         * @param modifier  The storage modifier
          */
         void write_data(std::ostream& os,
                         const matrix_data<ValueType, IndexType>& data,
diff --git a/core/base/types.hpp b/core/base/types.hpp
index 5f90ed2cafe..39ca169d486 100644
--- a/core/base/types.hpp
+++ b/core/base/types.hpp
@@ -109,7 +109,7 @@ constexpr std::enable_if_t<(num_groups > current_shift + 1), int> shift(
  *
  * The usage will be the following
  * Set the method with bits Cfg = ConfigSet<b_0, b_1, ..., b_k>
- * Encode the given infomation encoded = Cfg::encode(x_0, x_1, ..., x_k)
+ * Encode the given information encoded = Cfg::encode(x_0, x_1, ..., x_k)
  * Decode the specific position information x_t = Cfg::decode<t>(encoded)
  * The encoded result will use 32 bits to record
  * rrrrr0..01....1...k..k, which 1/2/.../k means the bits store the information
diff --git a/core/solver/gcr.cpp b/core/solver/gcr.cpp
index e1df71491e5..4b767ad40ad 100644
--- a/core/solver/gcr.cpp
+++ b/core/solver/gcr.cpp
@@ -186,7 +186,7 @@ void Gcr<ValueType>::apply_dense_impl(const VectorType* dense_b,
     size_type restart_iter = 0;
 
     /* Memory movement summary for average iteration with krylov_dim d:
-     * (4d+22+4/d)n+(d+1+1/d) * values + matrix/preconditioner stroage
+     * (4d+22+4/d)n+(d+1+1/d) * values + matrix/preconditioner storage
      * 1x SpMV:                       2n * values + storage
      * 1x Preconditioner:             2n * values + storage
      * 1x step 1       (scal, axpys)  6n
diff --git a/core/solver/multigrid.cpp b/core/solver/multigrid.cpp
index 7a521f5f53e..074fa95d848 100644
--- a/core/solver/multigrid.cpp
+++ b/core/solver/multigrid.cpp
@@ -182,8 +182,8 @@ namespace multigrid {
 
 
 /**
- * The enum class is to combine the cycle infomation  It's legal to use a binary
- * or(|) operation to combine several properties.
+ * The enum class is to combine the cycle information  It's legal to use a
+ * binary or(|) operation to combine several properties.
  */
 enum class cycle_mode {
     /**
diff --git a/core/test/base/mtx_io.cpp b/core/test/base/mtx_io.cpp
index a1029bd9d12..a25f462556a 100644
--- a/core/test/base/mtx_io.cpp
+++ b/core/test/base/mtx_io.cpp
@@ -286,7 +286,7 @@ TEST(MtxReader, ReadsSparseRealMtx)
 }
 
 
-TEST(MtxReader, ReadsSparseRealSymetricMtx)
+TEST(MtxReader, ReadsSparseRealSymmetricMtx)
 {
     using tpl = gko::matrix_data<double, gko::int32>::nonzero_type;
     std::istringstream iss(
@@ -310,7 +310,7 @@ TEST(MtxReader, ReadsSparseRealSymetricMtx)
 }
 
 
-TEST(MtxReader, ReadsSparseRealSkewSymetricMtx)
+TEST(MtxReader, ReadsSparseRealSkewSymmetricMtx)
 {
     using tpl = gko::matrix_data<double, gko::int32>::nonzero_type;
     std::istringstream iss(
@@ -330,7 +330,7 @@ TEST(MtxReader, ReadsSparseRealSkewSymetricMtx)
 }
 
 
-TEST(MtxReader, ReadsSparseRealSkewSymetricMtxWithExplicitDiagonal)
+TEST(MtxReader, ReadsSparseRealSkewSymmetricMtxWithExplicitDiagonal)
 {
     using tpl = gko::matrix_data<double, gko::int32>::nonzero_type;
     std::istringstream iss(
diff --git a/core/test/mpi/base/polymorphic_object.cpp b/core/test/mpi/base/polymorphic_object.cpp
index 88bcb756f4b..1cacc5d52f4 100644
--- a/core/test/mpi/base/polymorphic_object.cpp
+++ b/core/test/mpi/base/polymorphic_object.cpp
@@ -152,7 +152,7 @@ class EnableDistributedPolymorphicObject : public testing::Test {
 protected:
     std::shared_ptr<gko::ReferenceExecutor> ref{
         gko::ReferenceExecutor::create()};
-    // TDOD: We can't rely on Omp module being available in this test!
+    // TODO: We can't rely on Omp module being available in this test!
     std::shared_ptr<gko::OmpExecutor> omp{gko::OmpExecutor::create()};
     gko::experimental::mpi::communicator comm{MPI_COMM_WORLD};
     gko::experimental::mpi::communicator split_comm{comm.get(), comm.rank() < 2,
diff --git a/core/test/solver/idr.cpp b/core/test/solver/idr.cpp
index f9109acb69e..45511be8e1b 100644
--- a/core/test/solver/idr.cpp
+++ b/core/test/solver/idr.cpp
@@ -420,7 +420,7 @@ TYPED_TEST(Idr, CanSetComplexSubspaceAgain)
 
     auto solver = idr_factory->generate(this->mtx);
 
-    solver->set_complex_subpsace(false);
+    solver->set_complex_subspace(false);
 
     ASSERT_EQ(solver->get_complex_subspace(), false);
 }
diff --git a/core/test/utils/assertions_test.cpp b/core/test/utils/assertions_test.cpp
index 2e3cbefaaf6..029af45e076 100644
--- a/core/test/utils/assertions_test.cpp
+++ b/core/test/utils/assertions_test.cpp
@@ -98,7 +98,7 @@ class MatricesNear : public ::testing::Test {
 };
 
 
-TEST_F(MatricesNear, SuceedsIfSame)
+TEST_F(MatricesNear, SucceedsIfSame)
 {
     ASSERT_PRED_FORMAT3(gko::test::assertions::matrices_near, mtx1.get(),
                         mtx1.get(), 0.0);
diff --git a/core/test/utils/fb_matrix_generator.hpp b/core/test/utils/fb_matrix_generator.hpp
index 1c5c818757b..7c43b0905c1 100644
--- a/core/test/utils/fb_matrix_generator.hpp
+++ b/core/test/utils/fb_matrix_generator.hpp
@@ -129,7 +129,7 @@ std::unique_ptr<MatrixType> generate_random_matrix_with_diag(
  *                generated FBCSR matrix.
  * @param block_size  Block size of output Fbcsr matrix.
  * @param row_diag_dominant  If true, a row-diagonal-dominant Fbcsr matrix is
- *                           generated. Note that in this case, the intput Csr
+ *                           generated. Note that in this case, the input Csr
  *                           matrix must have diagonal entries in all rows.
  * @param rand_engine  Random number engine to use, such as
  * std::default_random_engine.
diff --git a/core/test/utils/matrix_generator.hpp b/core/test/utils/matrix_generator.hpp
index 23ab84cc491..6928c5424a5 100644
--- a/core/test/utils/matrix_generator.hpp
+++ b/core/test/utils/matrix_generator.hpp
@@ -553,7 +553,7 @@ std::unique_ptr<MatrixType> generate_tridiag_matrix(
 /**
  * This computes an inverse of an tridiagonal Toeplitz matrix.
  *
- * The compuation is based on the formula is from
+ * The computation is based on the formula is from
  * https://en.wikipedia.org/wiki/Tridiagonal_matrix#Inversion
  *
  * @param size  the (square) size of the resulting matrix
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 41ed77d9002..33e3bab735a 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -59,7 +59,7 @@ find_package(Kokkos QUIET)
 if(Kokkos_FOUND)
     if(GINKGO_WITH_CCACHE)
         message(WARNING "The CMAKE_CXX_COMPILER_LAUNCHER is set due to "
-            "GINKGO_WITH_CCACHE=ON which is known to casue issues with CUDA enabled "
+            "GINKGO_WITH_CCACHE=ON which is known to cause issues with CUDA enabled "
             "Kokkos (https://github.com/kokkos/kokkos/issues/4821) including compilation "
             "failures. This can be prevented by setting GINKGO_WITH_CCACHE=OFF.")
     endif()
diff --git a/examples/custom-stopping-criterion/custom-stopping-criterion.cpp b/examples/custom-stopping-criterion/custom-stopping-criterion.cpp
index e07f1bf92fb..800846cfbd9 100644
--- a/examples/custom-stopping-criterion/custom-stopping-criterion.cpp
+++ b/examples/custom-stopping-criterion/custom-stopping-criterion.cpp
@@ -175,7 +175,7 @@ int main(int argc, char* argv[])
     // executor where Ginkgo will perform the computation
     const auto exec = exec_map.at(executor_string)();  // throws if not valid
 
-    // Declare a user controled boolean for the iteration process
+    // Declare a user controlled boolean for the iteration process
     volatile bool stop_iteration_process{};
 
     // Create a new a thread to launch the solver
diff --git a/examples/distributed-solver/distributed-solver.cpp b/examples/distributed-solver/distributed-solver.cpp
index 865a44b0643..123f93775f5 100644
--- a/examples/distributed-solver/distributed-solver.cpp
+++ b/examples/distributed-solver/distributed-solver.cpp
@@ -51,9 +51,9 @@ int main(int argc, char* argv[])
     // done with the following helper construct that uses RAII to automate the
     // initialization and finalization.
     const gko::experimental::mpi::environment env(argc, argv);
-    // @sect3{Type Definitiions}
+    // @sect3{Type Definitions}
     // Define the needed types. In a parallel program we need to differentiate
-    // beweeen global and local indices, thus we have two index types.
+    // between global and local indices, thus we have two index types.
     using GlobalIndexType = gko::int64;
     using LocalIndexType = gko::int32;
     // The underlying value type.
diff --git a/examples/external-lib-interfacing/external-lib-interfacing.cpp b/examples/external-lib-interfacing/external-lib-interfacing.cpp
index 08b35923b30..1766af3001f 100644
--- a/examples/external-lib-interfacing/external-lib-interfacing.cpp
+++ b/examples/external-lib-interfacing/external-lib-interfacing.cpp
@@ -1324,7 +1324,7 @@ void GradientEstimation::estimate_cell(
 // <code>set_thread_limit</code>, the default value from the Intel Threading
 // Building Blocks (TBB) library is used. If the call to
 // <code>set_thread_limit</code> is omitted, the number of threads will be
-// chosen by TBB indepently of DEAL_II_NUM_THREADS.
+// chosen by TBB independently of DEAL_II_NUM_THREADS.
 int main()
 {
     try {
diff --git a/examples/mixed-multigrid-preconditioned-solver/doc/results.dox b/examples/mixed-multigrid-preconditioned-solver/doc/results.dox
index af922a27ebc..dccd3ccad93 100644
--- a/examples/mixed-multigrid-preconditioned-solver/doc/results.dox
+++ b/examples/mixed-multigrid-preconditioned-solver/doc/results.dox
@@ -14,7 +14,7 @@ Final residual norm sqrt(r^T r):
 CG iteration count:     39
 CG generation time [ms]: 2.04293
 CG execution time [ms]: 22.3874
-CG execution time per iteraion[ms]: 0.574036
+CG execution time per iteration[ms]: 0.574036
 
 @endcode
 
diff --git a/examples/mixed-multigrid-preconditioned-solver/mixed-multigrid-preconditioned-solver.cpp b/examples/mixed-multigrid-preconditioned-solver/mixed-multigrid-preconditioned-solver.cpp
index 9edd7ff29a1..cef918983e9 100644
--- a/examples/mixed-multigrid-preconditioned-solver/mixed-multigrid-preconditioned-solver.cpp
+++ b/examples/mixed-multigrid-preconditioned-solver/mixed-multigrid-preconditioned-solver.cpp
@@ -250,7 +250,7 @@ int main(int argc, char* argv[])
               << static_cast<double>(gen_time.count()) / 1000000.0 << std::endl;
     std::cout << "CG execution time [ms]: "
               << static_cast<double>(time.count()) / 1000000.0 << std::endl;
-    std::cout << "CG execution time per iteraion[ms]: "
+    std::cout << "CG execution time per iteration[ms]: "
               << static_cast<double>(time.count()) / 1000000.0 /
                      logger->get_num_iterations()
               << std::endl;
diff --git a/examples/mixed-multigrid-solver/doc/results.dox b/examples/mixed-multigrid-solver/doc/results.dox
index 7cbaa772d18..045fe343743 100644
--- a/examples/mixed-multigrid-solver/doc/results.dox
+++ b/examples/mixed-multigrid-solver/doc/results.dox
@@ -14,7 +14,7 @@ Final residual norm sqrt(r^T r):
 Multigrid iteration count:     9
 Multigrid generation time [ms]: 3.35361
 Multigrid execution time [ms]: 10.048
-Multigrid execution time per iteraion[ms]: 1.11644
+Multigrid execution time per iteration[ms]: 1.11644
 
 @endcode
 
diff --git a/examples/mixed-multigrid-solver/mixed-multigrid-solver.cpp b/examples/mixed-multigrid-solver/mixed-multigrid-solver.cpp
index cbecbbbdc02..4241a74cdf2 100644
--- a/examples/mixed-multigrid-solver/mixed-multigrid-solver.cpp
+++ b/examples/mixed-multigrid-solver/mixed-multigrid-solver.cpp
@@ -232,7 +232,7 @@ int main(int argc, char* argv[])
               << static_cast<double>(gen_time.count()) / 1000000.0 << std::endl;
     std::cout << "Multigrid execution time [ms]: "
               << static_cast<double>(time.count()) / 1000000.0 << std::endl;
-    std::cout << "Multigrid execution time per iteraion[ms]: "
+    std::cout << "Multigrid execution time per iteration[ms]: "
               << static_cast<double>(time.count()) / 1000000.0 /
                      logger->get_num_iterations()
               << std::endl;
diff --git a/examples/multigrid-preconditioned-solver-customized/doc/results.dox b/examples/multigrid-preconditioned-solver-customized/doc/results.dox
index c7ba90d2fbb..2135f715934 100644
--- a/examples/multigrid-preconditioned-solver-customized/doc/results.dox
+++ b/examples/multigrid-preconditioned-solver-customized/doc/results.dox
@@ -14,7 +14,7 @@ Final residual norm sqrt(r^T r):
 CG iteration count:     12
 CG generation time [ms]: 1.41642
 CG execution time [ms]: 6.59244
-CG execution time per iteraion[ms]: 0.54937
+CG execution time per iteration[ms]: 0.54937
 
 @endcode
 
diff --git a/examples/multigrid-preconditioned-solver-customized/multigrid-preconditioned-solver-customized.cpp b/examples/multigrid-preconditioned-solver-customized/multigrid-preconditioned-solver-customized.cpp
index a455ca2e8ed..f82a603d662 100644
--- a/examples/multigrid-preconditioned-solver-customized/multigrid-preconditioned-solver-customized.cpp
+++ b/examples/multigrid-preconditioned-solver-customized/multigrid-preconditioned-solver-customized.cpp
@@ -202,7 +202,7 @@ int main(int argc, char* argv[])
               << static_cast<double>(gen_time.count()) / 1000000.0 << std::endl;
     std::cout << "CG execution time [ms]: "
               << static_cast<double>(time.count()) / 1000000.0 << std::endl;
-    std::cout << "CG execution time per iteraion[ms]: "
+    std::cout << "CG execution time per iteration[ms]: "
               << static_cast<double>(time.count()) / 1000000.0 /
                      logger->get_num_iterations()
               << std::endl;
diff --git a/examples/multigrid-preconditioned-solver/doc/results.dox b/examples/multigrid-preconditioned-solver/doc/results.dox
index af922a27ebc..dccd3ccad93 100644
--- a/examples/multigrid-preconditioned-solver/doc/results.dox
+++ b/examples/multigrid-preconditioned-solver/doc/results.dox
@@ -14,7 +14,7 @@ Final residual norm sqrt(r^T r):
 CG iteration count:     39
 CG generation time [ms]: 2.04293
 CG execution time [ms]: 22.3874
-CG execution time per iteraion[ms]: 0.574036
+CG execution time per iteration[ms]: 0.574036
 
 @endcode
 
diff --git a/examples/multigrid-preconditioned-solver/multigrid-preconditioned-solver.cpp b/examples/multigrid-preconditioned-solver/multigrid-preconditioned-solver.cpp
index 75c03259c67..b31b7906902 100644
--- a/examples/multigrid-preconditioned-solver/multigrid-preconditioned-solver.cpp
+++ b/examples/multigrid-preconditioned-solver/multigrid-preconditioned-solver.cpp
@@ -161,7 +161,7 @@ int main(int argc, char* argv[])
               << static_cast<double>(gen_time.count()) / 1000000.0 << std::endl;
     std::cout << "CG execution time [ms]: "
               << static_cast<double>(time.count()) / 1000000.0 << std::endl;
-    std::cout << "CG execution time per iteraion[ms]: "
+    std::cout << "CG execution time per iteration[ms]: "
               << static_cast<double>(time.count()) / 1000000.0 /
                      logger->get_num_iterations()
               << std::endl;
diff --git a/include/ginkgo/core/base/composition.hpp b/include/ginkgo/core/base/composition.hpp
index 44c24b901b3..5091b4a439e 100644
--- a/include/ginkgo/core/base/composition.hpp
+++ b/include/ginkgo/core/base/composition.hpp
@@ -176,7 +176,7 @@ class Composition : public EnableLinOp<Composition<ValueType>>,
      * @tparam Rest  types of trailing parameters
      *
      * @param oper  the first operator
-     * @param rest  remainging operators
+     * @param rest  remaining operators
      */
     template <typename... Rest>
     explicit Composition(std::shared_ptr<const LinOp> oper, Rest&&... rest)
@@ -217,7 +217,7 @@ class UseComposition {
     }
 
     /**
-     * Returns the operator at index-th poistion of composition
+     * Returns the operator at index-th position of composition
      *
      * @return index-th operator
      *
diff --git a/include/ginkgo/core/base/exception.hpp b/include/ginkgo/core/base/exception.hpp
index ad39adf7a36..8b270ed7a98 100644
--- a/include/ginkgo/core/base/exception.hpp
+++ b/include/ginkgo/core/base/exception.hpp
@@ -73,7 +73,7 @@ namespace gko {
  *     try {
  *         auto y = apply(A, x);
  *     } catch(Error e) {
- *         // an error occured, write the message to screen and exit
+ *         // an error occurred, write the message to screen and exit
  *         std::cout << e.what() << std::endl;
  *         return -1;
  *     }
@@ -160,7 +160,7 @@ class NotSupported : public Error {
      *
      * @param file  The name of the offending source file
      * @param line  The source code line number where the error occurred
-     * @param func  The name of the function where the error occured
+     * @param func  The name of the function where the error occurred
      * @param obj_type  The object type on which the requested operation
                        cannot be performed.
      */
@@ -513,7 +513,7 @@ class BadDimension : public Error {
  * Error that denotes issues between block sizes and matrix dimensions
  *
  * \tparam IndexType  Type of index used by the linear algebra object that is
- *                    incompatible with the requried block size.
+ *                    incompatible with the required block size.
  */
 template <typename IndexType>
 class BlockSizeError : public Error {
diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp
index 4f476b9286d..456b69d3d7e 100644
--- a/include/ginkgo/core/base/executor.hpp
+++ b/include/ginkgo/core/base/executor.hpp
@@ -85,8 +85,8 @@ enum class log_propagation_mode {
  * host through the Unified memory model.
  *
  * `unified_host` allocates memory on the
- * host and it is not available on devices which do not have concurrent acesses
- * switched on, but this access can be explictly switched on, when necessary.
+ * host and it is not available on devices which do not have concurrent accesses
+ * switched on, but this access can be explicitly switched on, when necessary.
  */
 enum class allocation_mode { device, unified_global, unified_host };
 
@@ -1606,7 +1606,7 @@ class CudaExecutor : public detail::ExecutorBase<CudaExecutor>,
     }
 
     /**
-     * Get the major verion of compute capability.
+     * Get the major version of compute capability.
      */
     int get_major_version() const noexcept
     {
@@ -1614,7 +1614,7 @@ class CudaExecutor : public detail::ExecutorBase<CudaExecutor>,
     }
 
     /**
-     * Get the minor verion of compute capability.
+     * Get the minor version of compute capability.
      */
     int get_minor_version() const noexcept
     {
@@ -1793,7 +1793,7 @@ class HipExecutor : public detail::ExecutorBase<HipExecutor>,
     }
 
     /**
-     * Get the major verion of compute capability.
+     * Get the major version of compute capability.
      */
     int get_major_version() const noexcept
     {
@@ -1801,7 +1801,7 @@ class HipExecutor : public detail::ExecutorBase<HipExecutor>,
     }
 
     /**
-     * Get the minor verion of compute capability.
+     * Get the minor version of compute capability.
      */
     int get_minor_version() const noexcept
     {
diff --git a/include/ginkgo/core/base/index_set.hpp b/include/ginkgo/core/base/index_set.hpp
index 3594d837f88..281690b7807 100644
--- a/include/ginkgo/core/base/index_set.hpp
+++ b/include/ginkgo/core/base/index_set.hpp
@@ -360,7 +360,7 @@ class index_set {
         const bool is_sorted = false) const;
 
     /**
-     * This function allows the user obtain a decompresed global_indices array
+     * This function allows the user obtain a decompressed global_indices array
      * from the indices stored in the index set
      *
      * @return  the decompressed set of indices.
diff --git a/include/ginkgo/core/base/lin_op.hpp b/include/ginkgo/core/base/lin_op.hpp
index c7043f4ae25..c06c43bbb6e 100644
--- a/include/ginkgo/core/base/lin_op.hpp
+++ b/include/ginkgo/core/base/lin_op.hpp
@@ -931,7 +931,7 @@ class EnableLinOp
  * template parameters to enable a subclass of LinOpFactory.
  *
  * @tparam ConcreteFactory  the concrete factory which is being implemented
- *                          [CRTP parmeter]
+ *                          [CRTP parameter]
  * @tparam ConcreteLinOp  the concrete LinOp type which this factory produces,
  *                        needs to have a constructor which takes a
  *                        const ConcreteFactory *, and an
diff --git a/include/ginkgo/core/base/machine_topology.hpp b/include/ginkgo/core/base/machine_topology.hpp
index 4fa7c2f8e17..317a768fb8a 100644
--- a/include/ginkgo/core/base/machine_topology.hpp
+++ b/include/ginkgo/core/base/machine_topology.hpp
@@ -71,7 +71,7 @@ namespace gko {
 
 /**
  * The machine topology class represents the hierarchical topology of a machine,
- * including NUMA nodes, cores and PCI Devices. Various infomation of the
+ * including NUMA nodes, cores and PCI Devices. Various information of the
  * machine are gathered with the help of the Hardware Locality library (hwloc).
  *
  * This class also provides functionalities to bind objects in the topology to
diff --git a/include/ginkgo/core/base/perturbation.hpp b/include/ginkgo/core/base/perturbation.hpp
index 4e9adc4e94e..e0378b8cec2 100644
--- a/include/ginkgo/core/base/perturbation.hpp
+++ b/include/ginkgo/core/base/perturbation.hpp
@@ -186,7 +186,7 @@ class Perturbation : public EnableLinOp<Perturbation<ValueType>>,
         cache_struct(const cache_struct& other) {}
         cache_struct& operator=(const cache_struct& other) { return *this; }
 
-        // allocate linops of cache. The dimenstion of `intermediate` is
+        // allocate linops of cache. The dimension of `intermediate` is
         // (the number of rows of projector, the number of columns of b). Others
         // are 1x1 scalar.
         void allocate(std::shared_ptr<const Executor> exec, dim<2> size)
diff --git a/include/ginkgo/core/base/polymorphic_object.hpp b/include/ginkgo/core/base/polymorphic_object.hpp
index 8d4c327ac33..da19a63d51d 100644
--- a/include/ginkgo/core/base/polymorphic_object.hpp
+++ b/include/ginkgo/core/base/polymorphic_object.hpp
@@ -59,7 +59,7 @@ namespace gko {
  * @note Most of the public methods of this class should not be overridden
  *       directly, and are thus not virtual. Instead, there are equivalent
  *       protected methods (ending in <method_name>_impl) that should be
- *       overriden instead. This allows polymorphic objects to implement default
+ *       overridden instead. This allows polymorphic objects to implement default
  *       behavior around virtual methods (parameter checking, type casting).
  *
  * @see EnablePolymorphicObject if you wish to implement a concrete polymorphic
@@ -657,7 +657,7 @@ std::shared_ptr<const R> copy_and_convert_to(
  * The mixin changes parameter and return types of appropriate public methods of
  * PolymorphicObject in the same way EnableAbstractPolymorphicObject does.
  * In addition, it also provides default implementations of PolymorphicObject's
- * vritual methods by using the _executor default constructor_ and the
+ * virtual methods by using the _executor default constructor_ and the
  * assignment operator of ConcreteObject. Consequently, the following is a
  * minimal example of PolymorphicObject:
  *
diff --git a/include/ginkgo/core/base/range.hpp b/include/ginkgo/core/base/range.hpp
index ed8901075bd..29c7baba8d8 100644
--- a/include/ginkgo/core/base/range.hpp
+++ b/include/ginkgo/core/base/range.hpp
@@ -276,7 +276,7 @@ using head_t = typename head<T...>::type;
  * `x` an `y` are ranges, and `alpha` is a scalar.
  * Range operations are optimized for memory access, and the above code does not
  * allocate additional storage for intermediate ranges `alpha * x`
- * or `aplha * x + y`. In fact, the entire computation is done during the
+ * or `alpha * x + y`. In fact, the entire computation is done during the
  * assignment, and the results of operations `+` and `*` only register the data,
  * and the types of operations that will be computed once the results are
  * needed.
@@ -295,7 +295,7 @@ using head_t = typename head<T...>::type;
  *
  * __`mmul` is not a highly-optimized BLAS-3 version of the matrix
  * multiplication.__ The current design of ranges and accessors prevents that,
- * so if you need a high-perfromance matrix multiplication, you should use one
+ * so if you need a high-performance matrix multiplication, you should use one
  * of the libraries that provide that, or implement your own
  * (you can use pointwise range operations to help simplify that). However,
  * range design might get improved in the future to allow efficient
@@ -710,17 +710,17 @@ GKO_ENABLE_UNARY_RANGE_OPERATION(bitwise_not, operator~,
 // common unary functions
 GKO_ENABLE_UNARY_RANGE_OPERATION(zero_operation, zero,
                                  accessor::detail::zero_operation);
-GKO_ENABLE_UNARY_RANGE_OPERATION(one_operaton, one,
+GKO_ENABLE_UNARY_RANGE_OPERATION(one_operation, one,
                                  accessor::detail::one_operation);
-GKO_ENABLE_UNARY_RANGE_OPERATION(abs_operaton, abs,
+GKO_ENABLE_UNARY_RANGE_OPERATION(abs_operation, abs,
                                  accessor::detail::abs_operation);
-GKO_ENABLE_UNARY_RANGE_OPERATION(real_operaton, real,
+GKO_ENABLE_UNARY_RANGE_OPERATION(real_operation, real,
                                  accessor::detail::real_operation);
-GKO_ENABLE_UNARY_RANGE_OPERATION(imag_operaton, imag,
+GKO_ENABLE_UNARY_RANGE_OPERATION(imag_operation, imag,
                                  accessor::detail::imag_operation);
-GKO_ENABLE_UNARY_RANGE_OPERATION(conj_operaton, conj,
+GKO_ENABLE_UNARY_RANGE_OPERATION(conj_operation, conj,
                                  accessor::detail::conj_operation);
-GKO_ENABLE_UNARY_RANGE_OPERATION(squared_norm_operaton, squared_norm,
+GKO_ENABLE_UNARY_RANGE_OPERATION(squared_norm_operation, squared_norm,
                                  accessor::detail::squared_norm_operation);
 
 namespace accessor {
@@ -961,9 +961,9 @@ GKO_ENABLE_BINARY_RANGE_OPERATION(right_shift, operator>>,
                                   accessor::detail::right_shift);
 
 // common binary functions
-GKO_ENABLE_BINARY_RANGE_OPERATION(max_operaton, max,
+GKO_ENABLE_BINARY_RANGE_OPERATION(max_operation, max,
                                   accessor::detail::max_operation);
-GKO_ENABLE_BINARY_RANGE_OPERATION(min_operaton, min,
+GKO_ENABLE_BINARY_RANGE_OPERATION(min_operation, min,
                                   accessor::detail::min_operation);
 
 
diff --git a/include/ginkgo/core/base/scoped_device_id_guard.hpp b/include/ginkgo/core/base/scoped_device_id_guard.hpp
index 52fccdd241c..6b236a6a37e 100644
--- a/include/ginkgo/core/base/scoped_device_id_guard.hpp
+++ b/include/ginkgo/core/base/scoped_device_id_guard.hpp
@@ -58,7 +58,7 @@ class generic_scoped_device_id_guard {
 public:
     generic_scoped_device_id_guard() = default;
 
-    // TODO: this should be a purely virtual funtion, but somehow that leads to
+    // TODO: this should be a purely virtual function, but somehow that leads to
     // linker errors
     virtual ~generic_scoped_device_id_guard() = default;
 
diff --git a/include/ginkgo/core/distributed/preconditioner/schwarz.hpp b/include/ginkgo/core/distributed/preconditioner/schwarz.hpp
index 9016442df67..441bc63d22c 100644
--- a/include/ginkgo/core/distributed/preconditioner/schwarz.hpp
+++ b/include/ginkgo/core/distributed/preconditioner/schwarz.hpp
@@ -112,7 +112,7 @@ class Schwarz
     /**
      * Creates a Schwarz preconditioner from a matrix using a Schwarz::Factory.
      *
-     * @param factory  the factory to use to create the preconditoner
+     * @param factory  the factory to use to create the preconditioner
      * @param system_matrix  the matrix this preconditioner should be created
      *                       from
      */
@@ -126,7 +126,7 @@ class Schwarz
     }
 
     /**
-     * Generates the preconditoner.
+     * Generates the preconditioner.
      */
     void generate(std::shared_ptr<const LinOp> system_matrix);
 
diff --git a/include/ginkgo/core/distributed/vector.hpp b/include/ginkgo/core/distributed/vector.hpp
index 1ad0b171788..61ceab8e380 100644
--- a/include/ginkgo/core/distributed/vector.hpp
+++ b/include/ginkgo/core/distributed/vector.hpp
@@ -337,7 +337,7 @@ class Vector
                           array<char>& tmp) const;
 
     /**
-     * Computes the square of the column-wise Euclidian ($L^2$) norm of this
+     * Computes the square of the column-wise Euclidean ($L^2$) norm of this
      * (multi-)vector using a global reduction.
      *
      * @param result  a Dense row vector, used to store the norm
@@ -347,7 +347,7 @@ class Vector
     void compute_squared_norm2(ptr_param<LinOp> result) const;
 
     /**
-     * Computes the square of the column-wise Euclidian ($L^2$) norm of this
+     * Computes the square of the column-wise Euclidean ($L^2$) norm of this
      * (multi-)vector using a global reduction.
      *
      * @param result  a Dense row vector, used to store the norm
@@ -360,7 +360,7 @@ class Vector
     void compute_squared_norm2(ptr_param<LinOp> result, array<char>& tmp) const;
 
     /**
-     * Computes the Euclidian (L^2) norm of this (multi-)vector using a global
+     * Computes the Euclidean (L^2) norm of this (multi-)vector using a global
      * reduction.
      *
      * @param result  a Dense row matrix, used to store the norm
@@ -370,7 +370,7 @@ class Vector
     void compute_norm2(ptr_param<LinOp> result) const;
 
     /**
-     * Computes the Euclidian (L^2) norm of this (multi-)vector using a global
+     * Computes the Euclidean (L^2) norm of this (multi-)vector using a global
      * reduction.
      *
      * @param result  a Dense row matrix, used to store the norm
diff --git a/include/ginkgo/core/factorization/par_ic.hpp b/include/ginkgo/core/factorization/par_ic.hpp
index 365a431208a..2df350f31a2 100644
--- a/include/ginkgo/core/factorization/par_ic.hpp
+++ b/include/ginkgo/core/factorization/par_ic.hpp
@@ -130,7 +130,7 @@ class ParIc : public Composition<ValueType> {
          * The number of iterations the `compute` kernel will use when doing
          * the factorization. The default value `0` means `Auto`, so the
          * implementation decides on the actual value depending on the
-         * ressources that are available.
+         * resources that are available.
          */
         size_type GKO_FACTORY_PARAMETER_SCALAR(iterations, 0);
 
diff --git a/include/ginkgo/core/factorization/par_ict.hpp b/include/ginkgo/core/factorization/par_ict.hpp
index a9b41f33d90..173136fa682 100644
--- a/include/ginkgo/core/factorization/par_ict.hpp
+++ b/include/ginkgo/core/factorization/par_ict.hpp
@@ -236,7 +236,7 @@ class ParIct : public Composition<ValueType> {
      * matrix_type
      *
      * @param system_matrix  the source matrix used to generate the factors.
-     *                       @note: system_matrix must be convertable to a Csr
+     *                       @note: system_matrix must be convertible to a Csr
      *                              Matrix, otherwise, an exception is thrown.
      * @return  A Composition, containing the incomplete LU factors for the
      *          given system_matrix (first element is L, then L^T)
diff --git a/include/ginkgo/core/factorization/par_ilu.hpp b/include/ginkgo/core/factorization/par_ilu.hpp
index 539946befec..878721afbd5 100644
--- a/include/ginkgo/core/factorization/par_ilu.hpp
+++ b/include/ginkgo/core/factorization/par_ilu.hpp
@@ -128,7 +128,7 @@ class ParIlu : public Composition<ValueType> {
          * The number of iterations the `compute` kernel will use when doing
          * the factorization. The default value `0` means `Auto`, so the
          * implementation decides on the actual value depending on the
-         * ressources that are available.
+         * resources that are available.
          */
         size_type GKO_FACTORY_PARAMETER_SCALAR(iterations, 0);
 
diff --git a/include/ginkgo/core/factorization/par_ilut.hpp b/include/ginkgo/core/factorization/par_ilut.hpp
index ba4ce7d1629..76f3789a44e 100644
--- a/include/ginkgo/core/factorization/par_ilut.hpp
+++ b/include/ginkgo/core/factorization/par_ilut.hpp
@@ -242,7 +242,7 @@ class ParIlut : public Composition<ValueType> {
      * while the dynamic type of U is u_matrix_type.
      *
      * @param system_matrix  the source matrix used to generate the factors.
-     *                       @note: system_matrix must be convertable to a Csr
+     *                       @note: system_matrix must be convertible to a Csr
      *                              Matrix, otherwise, an exception is thrown.
      * @return  A Composition, containing the incomplete LU factors for the
      *          given system_matrix (first element is L, then U)
diff --git a/include/ginkgo/core/log/logger.hpp b/include/ginkgo/core/log/logger.hpp
index 0f22663347c..b700e1e703a 100644
--- a/include/ginkgo/core/log/logger.hpp
+++ b/include/ginkgo/core/log/logger.hpp
@@ -111,10 +111,10 @@ class Logger {
      * call only if the user activates this event through the mask. If the
      * event is activated, we rely on polymorphism and the virtual method
      * `on_##_event_name()` to either call the Logger class's function,
-     * which does nothing, or the overriden version in the derived class if
+     * which does nothing, or the overridden version in the derived class if
      * any. Therefore, to support a new event in any Logger (i.e. class
      * which derive from this class), the function `on_##_event_name()`
-     * should be overriden and implemented.
+     * should be overridden and implemented.
      *
      * @param _id  the unique id of the event
      *
diff --git a/include/ginkgo/core/log/profiler_hook.hpp b/include/ginkgo/core/log/profiler_hook.hpp
index 9a26acd6ab0..6a9b00dfac7 100644
--- a/include/ginkgo/core/log/profiler_hook.hpp
+++ b/include/ginkgo/core/log/profiler_hook.hpp
@@ -298,7 +298,7 @@ class ProfilerHook : public Logger {
         std::vector<nested_summary_entry> children{};
     };
 
-    /** Recieves the results from ProfilerHook::create_summary(). */
+    /** Receives the results from ProfilerHook::create_summary(). */
     class SummaryWriter {
     public:
         virtual ~SummaryWriter() = default;
@@ -313,7 +313,7 @@ class ProfilerHook : public Logger {
                            std::chrono::nanoseconds overhead) = 0;
     };
 
-    /** Recieves the results from ProfilerHook::create_nested_summary(). */
+    /** Receives the results from ProfilerHook::create_nested_summary(). */
     class NestedSummaryWriter {
     public:
         virtual ~NestedSummaryWriter() = default;
diff --git a/include/ginkgo/core/matrix/coo.hpp b/include/ginkgo/core/matrix/coo.hpp
index 9ccd02d48db..15662294607 100644
--- a/include/ginkgo/core/matrix/coo.hpp
+++ b/include/ginkgo/core/matrix/coo.hpp
@@ -63,7 +63,7 @@ class Hybrid;
 /**
  * COO stores a matrix in the coordinate matrix format.
  *
- * The nonzero elements are stored in an array row-wise (but not neccessarily
+ * The nonzero elements are stored in an array row-wise (but not necessarily
  * sorted by column index within a row). Two extra arrays contain the row and
  * column indexes of each nonzero element of the matrix.
  *
diff --git a/include/ginkgo/core/matrix/dense.hpp b/include/ginkgo/core/matrix/dense.hpp
index a1e08d38c65..16bff356223 100644
--- a/include/ginkgo/core/matrix/dense.hpp
+++ b/include/ginkgo/core/matrix/dense.hpp
@@ -853,7 +853,7 @@ class Dense
                           array<char>& tmp) const;
 
     /**
-     * Computes the column-wise Euclidian (L^2) norm of this matrix.
+     * Computes the column-wise Euclidean (L^2) norm of this matrix.
      *
      * @param result  a Dense row vector, used to store the norm
      *                (the number of columns in the vector must match the number
@@ -862,7 +862,7 @@ class Dense
     void compute_norm2(ptr_param<LinOp> result) const;
 
     /**
-     * Computes the column-wise Euclidian (L^2) norm of this matrix.
+     * Computes the column-wise Euclidean (L^2) norm of this matrix.
      *
      * @param result  a Dense row vector, used to store the norm
      *                (the number of columns in the vector must match the
@@ -895,7 +895,7 @@ class Dense
     void compute_norm1(ptr_param<LinOp> result, array<char>& tmp) const;
 
     /**
-     * Computes the square of the column-wise Euclidian (L^2) norm of this
+     * Computes the square of the column-wise Euclidean (L^2) norm of this
      * matrix.
      *
      * @param result  a Dense row vector, used to store the norm
@@ -905,7 +905,7 @@ class Dense
     void compute_squared_norm2(ptr_param<LinOp> result) const;
 
     /**
-     * Computes the square of the column-wise Euclidian (L^2) norm of this
+     * Computes the square of the column-wise Euclidean (L^2) norm of this
      * matrix.
      *
      * @param result  a Dense row vector, used to store the norm
diff --git a/include/ginkgo/core/matrix/hybrid.hpp b/include/ginkgo/core/matrix/hybrid.hpp
index a923e7b9079..db65b57b6fb 100644
--- a/include/ginkgo/core/matrix/hybrid.hpp
+++ b/include/ginkgo/core/matrix/hybrid.hpp
@@ -279,7 +279,7 @@ class Hybrid
         /**
          * Get the percent setting
          *
-         * @retrun percent
+         * @return percent
          */
         auto get_percentage() const { return percent_; }
 
@@ -314,14 +314,14 @@ class Hybrid
         /**
          * Get the percent setting
          *
-         * @retrun percent
+         * @return percent
          */
         auto get_percentage() const { return strategy_.get_percentage(); }
 
         /**
          * Get the ratio setting
          *
-         * @retrun ratio
+         * @return ratio
          */
         auto get_ratio() const { return ratio_; }
 
@@ -356,7 +356,7 @@ class Hybrid
         /**
          * Get the percent setting
          *
-         * @retrun percent
+         * @return percent
          */
         auto get_percentage() const { return strategy_.get_percentage(); }
 
diff --git a/include/ginkgo/core/preconditioner/isai.hpp b/include/ginkgo/core/preconditioner/isai.hpp
index 7f03deae0a1..c5d5ddc6471 100644
--- a/include/ginkgo/core/preconditioner/isai.hpp
+++ b/include/ginkgo/core/preconditioner/isai.hpp
@@ -233,7 +233,7 @@ class Isai : public EnableLinOp<Isai<IsaiType, ValueType, IndexType>>,
     /**
      * Creates an Isai preconditioner from a matrix using an Isai::Factory.
      *
-     * @param factory  the factory to use to create the preconditoner
+     * @param factory  the factory to use to create the preconditioner
      * @param system_matrix  the matrix for which an ISAI is to be computed
      */
     explicit Isai(const Factory* factory,
diff --git a/include/ginkgo/core/preconditioner/jacobi.hpp b/include/ginkgo/core/preconditioner/jacobi.hpp
index bf215082a85..f48d8e34c8c 100644
--- a/include/ginkgo/core/preconditioner/jacobi.hpp
+++ b/include/ginkgo/core/preconditioner/jacobi.hpp
@@ -546,7 +546,7 @@ class Jacobi : public EnableLinOp<Jacobi<ValueType, IndexType>>,
     /**
      * Creates a Jacobi preconditioner from a matrix using a Jacobi::Factory.
      *
-     * @param factory  the factory to use to create the preconditoner
+     * @param factory  the factory to use to create the preconditioner
      * @param system_matrix  the matrix this preconditioner should be created
      *                       from
      */
@@ -593,7 +593,7 @@ class Jacobi : public EnableLinOp<Jacobi<ValueType, IndexType>>,
             max_block_stride = param_max_block_stride;
             if (this->get_executor() != this->get_executor()->get_master() &&
                 max_block_stride != default_block_stride) {
-                // only support the default value on the gpu devive
+                // only support the default value on the gpu device
                 GKO_NOT_SUPPORTED(this);
             }
         }
@@ -612,7 +612,7 @@ class Jacobi : public EnableLinOp<Jacobi<ValueType, IndexType>>,
     }
 
     /**
-     * Generates the preconditoner.
+     * Generates the preconditioner.
      *
      * @param system_matrix  the source matrix used to generate the
      *                       preconditioner
diff --git a/include/ginkgo/core/reorder/rcm.hpp b/include/ginkgo/core/reorder/rcm.hpp
index bb6e7986206..72ba6827f2b 100644
--- a/include/ginkgo/core/reorder/rcm.hpp
+++ b/include/ginkgo/core/reorder/rcm.hpp
@@ -77,7 +77,7 @@ enum class starting_strategy { minimum_degree, pseudo_peripheral };
  *
  * There are two "starting strategies" currently available: minimum degree and
  * pseudo-peripheral. These strategies control how a starting vertex for a
- * connected component is choosen, which is then renumbered as first vertex in
+ * connected component is chosen, which is then renumbered as first vertex in
  * the component, starting the algorithm from there.
  * In general, the bandwidths obtained by choosing a pseudo-peripheral vertex
  * are slightly smaller than those obtained from choosing a vertex of minimum
diff --git a/include/ginkgo/core/reorder/reordering_base.hpp b/include/ginkgo/core/reorder/reordering_base.hpp
index 8cfb4c10c48..e0b80adb4cd 100644
--- a/include/ginkgo/core/reorder/reordering_base.hpp
+++ b/include/ginkgo/core/reorder/reordering_base.hpp
@@ -113,7 +113,7 @@ using ReorderingBaseFactory =
  * template parameters to enable a subclass of ReorderingBaseFactory.
  *
  * @tparam ConcreteFactory  the concrete factory which is being implemented
- *                          [CRTP parmeter]
+ *                          [CRTP parameter]
  * @tparam ConcreteReorderingBase  the concrete ReorderingBase type which this
  * factory produces, needs to have a constructor which takes a const
  * ConcreteFactory *, and a const ReorderingBaseArgs * as parameters.
diff --git a/include/ginkgo/core/solver/idr.hpp b/include/ginkgo/core/solver/idr.hpp
index 7ad152f6808..fde0bc67157 100644
--- a/include/ginkgo/core/solver/idr.hpp
+++ b/include/ginkgo/core/solver/idr.hpp
@@ -163,7 +163,7 @@ class Idr
      *
      * @param other  the new complex_subspace parameter
      */
-    void set_complex_subpsace(const bool other)
+    void set_complex_subspace(const bool other)
     {
         parameters_.complex_subspace = other;
     }
diff --git a/include/ginkgo/core/solver/ir.hpp b/include/ginkgo/core/solver/ir.hpp
index 251924b70ff..c5c69c1fb67 100644
--- a/include/ginkgo/core/solver/ir.hpp
+++ b/include/ginkgo/core/solver/ir.hpp
@@ -306,7 +306,7 @@ struct workspace_traits<Ir<ValueType>> {
  * limited stop criterion(iterations and relacation_factor).
  *
  * @param factory  the shared pointer of factory
- * @param iteration  the maximum number of iteraion, which default is 1
+ * @param iteration  the maximum number of iteration, which default is 1
  * @param relaxation_factor  the relaxation factor for Richardson
  *
  * @return the pointer of Ir(Richardson)
@@ -329,7 +329,7 @@ auto build_smoother(std::shared_ptr<const LinOpFactory> factory,
  * limited stop criterion(iterations and relacation_factor).
  *
  * @param solver  the shared pointer of solver
- * @param iteration  the maximum number of iteraion, which default is 1
+ * @param iteration  the maximum number of iteration, which default is 1
  * @param relaxation_factor  the relaxation factor for Richardson
  *
  * @return the pointer of Ir(Richardson)
diff --git a/include/ginkgo/core/solver/solver_base.hpp b/include/ginkgo/core/solver/solver_base.hpp
index 6687a6df82e..53909337554 100644
--- a/include/ginkgo/core/solver/solver_base.hpp
+++ b/include/ginkgo/core/solver/solver_base.hpp
@@ -244,7 +244,7 @@ class EnableApplyWithInitialGuess : public ApplyWithInitialGuess {
             self(), alpha, b, beta, x);
     }
 
-    // TODO: should we provide the defaule implementation?
+    // TODO: should we provide the default implementation?
     /**
      * The class should override this method and must modify the input vectors
      * according to the initial_guess_mode
diff --git a/include/ginkgo/core/stop/criterion.hpp b/include/ginkgo/core/stop/criterion.hpp
index e094cc90206..1a52da3efae 100644
--- a/include/ginkgo/core/stop/criterion.hpp
+++ b/include/ginkgo/core/stop/criterion.hpp
@@ -259,7 +259,7 @@ using CriterionFactory = AbstractFactory<Criterion, CriterionArgs>;
  * template parameters to enable a subclass of CriterionFactory.
  *
  * @tparam ConcreteFactory  the concrete factory which is being implemented
- *                          [CRTP parmeter]
+ *                          [CRTP parameter]
  * @tparam ConcreteCriterion  the concrete Criterion type which this factory
  *                            produces, needs to have a constructor which takes
  *                            a const ConcreteFactory *, and a
diff --git a/include/ginkgo/core/stop/stopping_status.hpp b/include/ginkgo/core/stop/stopping_status.hpp
index c644e1977df..ee7d7890cf4 100644
--- a/include/ginkgo/core/stop/stopping_status.hpp
+++ b/include/ginkgo/core/stop/stopping_status.hpp
@@ -96,7 +96,7 @@ class stopping_status {
     GKO_ATTRIBUTES GKO_INLINE void reset() noexcept { data_ = uint8{0}; }
 
     /**
-     * Call if a stop occured due to a hard limit (and convergence was not
+     * Call if a stop occurred due to a hard limit (and convergence was not
      * reached).
      * @param id  id of the stopping criteria.
      * @param set_finalized  Controls if the current version should count as
@@ -114,7 +114,7 @@ class stopping_status {
     }
 
     /**
-     * Call if convergence occured.
+     * Call if convergence occurred.
      * @param id  id of the stopping criteria.
      * @param set_finalized  Controls if the current version should count as
      * finalized (set to true) or not (set to false).
diff --git a/include/ginkgo/core/stop/time.hpp b/include/ginkgo/core/stop/time.hpp
index d1a752c5042..3d39b1de082 100644
--- a/include/ginkgo/core/stop/time.hpp
+++ b/include/ginkgo/core/stop/time.hpp
@@ -45,7 +45,7 @@ namespace stop {
 
 /**
  * The Time class is a stopping criterion which stops the iteration process
- * after a certain amout of time has passed.
+ * after a certain amount of time has passed.
  *
  * @ingroup stop
  */

From 8612d9ca09051e52a333f60ad401668469e60bef Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Mon, 12 Jun 2023 12:39:50 +0200
Subject: [PATCH 067/583] Manual typo fix

---
 CHANGELOG.md                                  | 48 +++++++++----------
 CMakeLists.txt                                |  2 +-
 CONTRIBUTING.md                               |  2 +-
 _typos.toml                                   | 10 ++++
 accessor/accessor_helper.hpp                  |  2 +-
 accessor/row_major.hpp                        |  2 +-
 accessor/utils.hpp                            |  2 +-
 benchmark/CMakeLists.txt                      |  4 +-
 benchmark/tools/mtx_to_binary.cpp             |  4 +-
 benchmark/utils/formats.hpp                   |  4 +-
 benchmark/utils/general.hpp                   |  2 +-
 cmake/CTestScript.cmake                       |  2 +-
 cmake/Modules/CudaArchitectureSelector.cmake  |  2 +-
 cmake/hip.cmake                               |  2 +-
 cmake/information_helpers.cmake               |  2 +-
 common/cuda_hip/base/executor.hpp.inc         |  2 +-
 .../cuda_hip/components/segment_scan.hpp.inc  |  2 +-
 common/cuda_hip/matrix/csr_kernels.hpp.inc    |  2 +-
 common/cuda_hip/multigrid/pgm_kernels.hpp.inc |  4 +-
 common/unified/multigrid/pgm_kernels.cpp      |  6 +--
 core/solver/multigrid.cpp                     |  2 +-
 cuda/components/cooperative_groups.cuh        |  2 +-
 cuda/solver/common_trs_kernels.cuh            |  2 +-
 dev_tools/oneapi/convert_source.sh            |  8 ++--
 dev_tools/scripts/format_header.sh            |  6 +--
 devices/reference/dummy.cpp                   |  2 +-
 doc/examples/examples.hpp.in                  |  2 +-
 dpcpp/base/executor.dp.cpp                    |  2 +-
 dpcpp/base/helper.hpp                         |  2 +-
 dpcpp/components/segment_scan.dp.hpp          |  2 +-
 dpcpp/components/thread_ids.dp.hpp            |  4 +-
 dpcpp/multigrid/pgm_kernels.dp.cpp            |  5 +-
 .../ginkgo/core/base/polymorphic_object.hpp   |  4 +-
 include/ginkgo/core/matrix/dense.hpp          |  2 +-
 include/ginkgo/core/solver/solver_base.hpp    |  3 +-
 omp/reorder/rcm_kernels.cpp                   |  6 +--
 reference/reorder/rcm_kernels.cpp             |  2 +-
 reference/test/matrix/csr_kernels.cpp         |  2 +-
 .../test/matrix/sparsity_csr_kernels.cpp      |  2 +-
 reference/test/preconditioner/ilu.cpp         |  2 +-
 reference/test/stop/residual_norm_kernels.cpp |  6 +--
 41 files changed, 92 insertions(+), 82 deletions(-)
 create mode 100644 _typos.toml

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 34d53363898..e5728ef2cc2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -215,7 +215,7 @@ Supported systems and requirements:
 + Add reduce_add for arrays ([#831](https://github.com/ginkgo-project/ginkgo/pull/831))
 + Add utility to simplify Dense View creation from an existing Dense vector ([#1136](https://github.com/ginkgo-project/ginkgo/pull/1136)).
 + Add a custom transpose implementation for Fbcsr and Csr transpose for unsupported vendor types ([#1123](https://github.com/ginkgo-project/ginkgo/pull/1123))
-+ Make IDR random initilization deterministic ([#1116](https://github.com/ginkgo-project/ginkgo/pull/1116))
++ Make IDR random initialization deterministic ([#1116](https://github.com/ginkgo-project/ginkgo/pull/1116))
 + Move the algorithm choice for triangular solvers from Csr::strategy_type to a factory parameter ([#1088](https://github.com/ginkgo-project/ginkgo/pull/1088))
 + Update CUDA archCoresPerSM ([#1175](https://github.com/ginkgo-project/ginkgo/pull/1116))
 + Add kernels for Csr sparsity pattern lookup ([#994](https://github.com/ginkgo-project/ginkgo/pull/994))
@@ -620,7 +620,7 @@ page](https://github.com/ginkgo-project/ginkgo/wiki/Known-Issues).
 
 
 ### Additions
-+ Upper and lower triangular solvers ([#327](https://github.com/ginkgo-project/ginkgo/issues/327), [#336](https://github.com/ginkgo-project/ginkgo/issues/336), [#341](https://github.com/ginkgo-project/ginkgo/issues/341), [#342](https://github.com/ginkgo-project/ginkgo/issues/342)) 
++ Upper and lower triangular solvers ([#327](https://github.com/ginkgo-project/ginkgo/issues/327), [#336](https://github.com/ginkgo-project/ginkgo/issues/336), [#341](https://github.com/ginkgo-project/ginkgo/issues/341), [#342](https://github.com/ginkgo-project/ginkgo/issues/342))
 + New factorization support in Ginkgo, and addition of the ParILU
   algorithm ([#305](https://github.com/ginkgo-project/ginkgo/issues/305), [#315](https://github.com/ginkgo-project/ginkgo/issues/315), [#319](https://github.com/ginkgo-project/ginkgo/issues/319), [#324](https://github.com/ginkgo-project/ginkgo/issues/324))
 + New ILU preconditioner ([#348](https://github.com/ginkgo-project/ginkgo/issues/348), [#353](https://github.com/ginkgo-project/ginkgo/issues/353))
@@ -632,7 +632,7 @@ page](https://github.com/ginkgo-project/ginkgo/wiki/Known-Issues).
 + Allow benchmarking CuSPARSE spmv formats through Ginkgo's benchmarks ([#303](https://github.com/ginkgo-project/ginkgo/issues/303))
 + New benchmark for sparse matrix format conversions ([#312](https://github.com/ginkgo-project/ginkgo/issues/312)[#317](https://github.com/ginkgo-project/ginkgo/issues/317))
 + Add conversions between CSR and Hybrid formats ([#302](https://github.com/ginkgo-project/ginkgo/issues/302), [#310](https://github.com/ginkgo-project/ginkgo/issues/310))
-+ Support for sorting rows in the CSR format by column idices ([#322](https://github.com/ginkgo-project/ginkgo/issues/322))
++ Support for sorting rows in the CSR format by column indices ([#322](https://github.com/ginkgo-project/ginkgo/issues/322))
 + Addition of a CUDA COO SpMM kernel for improved performance ([#345](https://github.com/ginkgo-project/ginkgo/issues/345))
 + Addition of a LinOp to handle perturbations of the form (identity + scalar *
   basis * projector) ([#334](https://github.com/ginkgo-project/ginkgo/issues/334))
@@ -845,35 +845,35 @@ About
 
 Ginkgo 1.0.0 is brought to you by:
 
-**Karlsruhe Institute of Technology**, Germany  
-**Universitat Jaume I**, Spain  
-**University of Tennessee, Knoxville**, US   
+**Karlsruhe Institute of Technology**, Germany
+**Universitat Jaume I**, Spain
+**University of Tennessee, Knoxville**, US
 
 These universities, along with various project grants, supported the development team and provided resources needed for the development of Ginkgo.
 
 Ginkgo 1.0.0 contains contributions from:
 
-**Hartwig Anzt**, Karlsruhe Institute of Technology  
-**Yenchen Chen**, National Taiwan University  
-**Terry Cojean**, Karlsruhe Institute of Technology  
-**Goran Flegar**, Universitat Jaume I  
-**Fritz Göbel**, Karlsruhe Institute of Technology  
-**Thomas Grützmacher**, Karlsruhe Institute of Technology  
-**Pratik Nayak**, Karlsruhe Institue of Technologgy  
-**Tobias Ribizel**, Karlsruhe Institute of Technology  
-**Yuhsiang Tsai**, National Taiwan University  
+**Hartwig Anzt**, Karlsruhe Institute of Technology
+**Yenchen Chen**, National Taiwan University
+**Terry Cojean**, Karlsruhe Institute of Technology
+**Goran Flegar**, Universitat Jaume I
+**Fritz Göbel**, Karlsruhe Institute of Technology
+**Thomas Grützmacher**, Karlsruhe Institute of Technology
+**Pratik Nayak**, Karlsruhe Institute of Technology
+**Tobias Ribizel**, Karlsruhe Institute of Technology
+**Yuhsiang Tsai**, National Taiwan University
 
 Supporting materials are provided by the following individuals:
 
-**David Rogers** - the Ginkgo logo  
-**Frithjof Fleischhammer** - the Ginkgo website  
+**David Rogers** - the Ginkgo logo
+**Frithjof Fleischhammer** - the Ginkgo website
 
 The development team is grateful to the following individuals for discussions and comments:
- 
-**Erik Boman**  
-**Jelena Držaić**  
-**Mike Heroux**  
-**Mark Hoemmen**  
-**Timo Heister**    
-**Jens Saak**  
+
+**Erik Boman**
+**Jelena Držaić**
+**Mike Heroux**
+**Mark Hoemmen**
+**Timo Heister**
+**Jens Saak**
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index df6f0ffb89a..6351ce98bfa 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -69,7 +69,7 @@ endif()
 set(GINKGO_CUDA_COMPILER_FLAGS "" CACHE STRING
     "Set the required NVCC compiler flags, mainly used for warnings. Current default is an empty string")
 set(GINKGO_CUDA_ARCHITECTURES "Auto" CACHE STRING
-    "A list of target NVIDIA GPU achitectures. See README.md for more detail.")
+    "A list of target NVIDIA GPU architectures. See README.md for more detail.")
 option(GINKGO_CUDA_DEFAULT_HOST_COMPILER "Tell Ginkgo to not automatically set the CUDA host compiler" OFF)
 # the details of fine/coarse grain memory and unsafe atomic are available https://docs.olcf.ornl.gov/systems/crusher_quick_start_guide.html#floating-point-fp-atomic-operations-and-coarse-fine-grained-memory-allocations
 option(GINKGO_HIP_AMD_UNSAFE_ATOMIC "Compiler uses unsafe floating point atomic (only for AMD GPU and ROCM >= 5). Default is ON because we use hipMalloc, which is always on coarse grain. Must turn off when allocating memory on fine grain" ON)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 1dd6f412876..8e2f3990aca 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -312,7 +312,7 @@ Thus, contributors should be aware of the following rules for blank lines:
         However, simply calling function `f` from function `g` does not imply
         that `f` and `g` are "related".
 2.  Statements within structures / classes are separated with 1 blank line.
-    There are no blank lines betweeen the first / last statement in the
+    There are no blank lines between the first / last statement in the
     structure / class.
     1.  _exception_: there is no blank line between an access modifier (`private`, `protected`, `public`) and the following statement.
        _example_:
diff --git a/_typos.toml b/_typos.toml
new file mode 100644
index 00000000000..5ba4cd4f662
--- /dev/null
+++ b/_typos.toml
@@ -0,0 +1,10 @@
+[files]
+extend-exclude = ["third_party/*", "*.svg"]
+
+[default.extend-words]
+dout = "dout"
+nd = "nd"
+tht = "tht"
+automatical = "automatical"
+strat = "strat"
+entrie = "entrie"
diff --git a/accessor/accessor_helper.hpp b/accessor/accessor_helper.hpp
index 5ee536d28db..5b80f4e13d8 100644
--- a/accessor/accessor_helper.hpp
+++ b/accessor/accessor_helper.hpp
@@ -78,7 +78,7 @@ struct row_major_helper_s {
         const std::array<SizeType, (total_dim > 1 ? total_dim - 1 : 0)>& stride,
         IndexType first, Indices&&... idxs)
     {
-        // The ASSERT size check must NOT be indexed with `dim_idx` directy,
+        // The ASSERT size check must NOT be indexed with `dim_idx` directly,
         // otherwise, it leads to a linker error. The reason is likely that
         // `std::array<size_type, N>::operator[](const size_type &)` uses a
         // reference. Since `dim_idx` is constexpr (and not defined in a
diff --git a/accessor/row_major.hpp b/accessor/row_major.hpp
index 757110f4912..9026cef2116 100644
--- a/accessor/row_major.hpp
+++ b/accessor/row_major.hpp
@@ -55,7 +55,7 @@ namespace acc {
  * constructor parameters for this class to the range (it will forward it to
  * this class).
  *
- * @warning For backward compatability reasons, a specialization is provided
+ * @warning For backward compatibility reasons, a specialization is provided
  *          for dimensionality == 2.
  *
  * @tparam ValueType  type of values this accessor returns
diff --git a/accessor/utils.hpp b/accessor/utils.hpp
index e692138ee4d..dfe30188f83 100644
--- a/accessor/utils.hpp
+++ b/accessor/utils.hpp
@@ -243,7 +243,7 @@ to_arithmetic_type(const Ref& ref)
  * @internal
  * Struct used for testing if an implicit cast is present. The constructor only
  * takes an OutType, so any argument of a type that is not implicitly
- * convertable to OutType is incompatible.
+ * convertible to OutType is incompatible.
  */
 template <typename OutType>
 struct test_for_implicit_cast {
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
index 434474fd336..f12dbad7f19 100644
--- a/benchmark/CMakeLists.txt
+++ b/benchmark/CMakeLists.txt
@@ -33,7 +33,7 @@ function(ginkgo_benchmark_hipsparse_linops type def)
     # use Thrust C++ device just for compilation, we don't use thrust::complex in the benchmarks
     target_compile_definitions(hipsparse_linops_${type} PUBLIC -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_CPP)
     target_include_directories(hipsparse_linops_${type} SYSTEM PRIVATE
-        ${HSA_HEADER} ${HIP_INCLUDE_DIRS}
+        ${HAS_HEADER} ${HIP_INCLUDE_DIRS}
         ${HIPBLAS_INCLUDE_DIRS} ${HIPSPARSE_INCLUDE_DIRS})
     target_link_libraries(hipsparse_linops_${type} Ginkgo::ginkgo ${HIPSPARSE_LIBRARIES})
 endfunction()
@@ -133,7 +133,7 @@ if (GINKGO_BUILD_HIP)
     add_library(hip_timer utils/hip_timer.hip.cpp)
     EXECUTE_PROCESS(COMMAND ${HIP_PATH}/bin/hipconfig --cpp_config OUTPUT_VARIABLE HIP_CXX_FLAGS)
     set_target_properties(hip_timer PROPERTIES COMPILE_FLAGS ${HIP_CXX_FLAGS})
-    target_include_directories(hip_timer SYSTEM PRIVATE ${HSA_HEADER} ${HIP_INCLUDE_DIRS})
+    target_include_directories(hip_timer SYSTEM PRIVATE ${HAS_HEADER} ${HIP_INCLUDE_DIRS})
     target_link_libraries(hip_timer ginkgo)
 endif()
 
diff --git a/benchmark/tools/mtx_to_binary.cpp b/benchmark/tools/mtx_to_binary.cpp
index 487687ff605..1d2f4f94e02 100644
--- a/benchmark/tools/mtx_to_binary.cpp
+++ b/benchmark/tools/mtx_to_binary.cpp
@@ -61,8 +61,8 @@ void process(const char* input, const char* output, bool validate)
         }
     }
     if (validate) {
-        std::ifstream ois(output, std::ios_base::in | std::ios_base::binary);
-        auto data2 = gko::read_binary_raw<ValueType, gko::int64>(ois);
+        std::ifstream is(output, std::ios_base::in | std::ios_base::binary);
+        auto data2 = gko::read_binary_raw<ValueType, gko::int64>(is);
         std::cerr << "Comparing against previously read data\n";
         if (data.size != data2.size) {
             throw GKO_STREAM_ERROR("Mismatching sizes!");
diff --git a/benchmark/utils/formats.hpp b/benchmark/utils/formats.hpp
index deecc4b530c..6b024b16d1c 100644
--- a/benchmark/utils/formats.hpp
+++ b/benchmark/utils/formats.hpp
@@ -78,8 +78,8 @@ std::string format_description =
     "     Irregular Sparse Matrices.\n"
     "csr: Compressed Sparse Row storage. Ginkgo implementation with\n"
     "     automatic strategy.\n"
-    "csrc: Ginkgo's CSR implementation with automatic stategy.\n"
-    "csri: Ginkgo's CSR implementation with inbalance strategy.\n"
+    "csrc: Ginkgo's CSR implementation with automatic strategy.\n"
+    "csri: Ginkgo's CSR implementation with imbalance strategy.\n"
     "csrm: Ginkgo's CSR implementation with merge_path strategy.\n"
     "csrs: Ginkgo's CSR implementation with sparselib strategy.\n"
     "ell: Ellpack format according to Bell and Garland: Efficient Sparse\n"
diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp
index 19c71b74a1a..5c6d849fe36 100644
--- a/benchmark/utils/general.hpp
+++ b/benchmark/utils/general.hpp
@@ -179,7 +179,7 @@ void initialize_argument_parsing(int* argc, char** argv[], std::string& header,
 }
 
 /**
- * Print general benchmark informations using the common available parameters
+ * Print general benchmark information using the common available parameters
  *
  * @param extra  describes benchmark specific extra parameters to output
  */
diff --git a/cmake/CTestScript.cmake b/cmake/CTestScript.cmake
index 61d53b0442a..81ff86625d1 100644
--- a/cmake/CTestScript.cmake
+++ b/cmake/CTestScript.cmake
@@ -4,7 +4,7 @@
 #
 # Runs our tests through CTest, with support for Coverage or memory checking.
 #
-# This script provides a full CTest run whith result submission to Ginkgo's
+# This script provides a full CTest run with result submission to Ginkgo's
 # CDash dashboard. The supported runs are:
 # + With or without coverage, requires the gcov tool.
 # + With or without address sanitizers.
diff --git a/cmake/Modules/CudaArchitectureSelector.cmake b/cmake/Modules/CudaArchitectureSelector.cmake
index 63e8c767446..1838ed4b932 100644
--- a/cmake/Modules/CudaArchitectureSelector.cmake
+++ b/cmake/Modules/CudaArchitectureSelector.cmake
@@ -119,7 +119,7 @@
 # identifiers in this list will be removed from the list specified by the
 # ``ARCHITECTURES`` list. A warning will be printed for each removed entry.
 # The list also supports aggregates ``All``, ``Auto`` and GPU generation names
-# wich have the same meaning as in the ``ARCHITECTURES'' specification list.
+# which have the same meaning as in the ``ARCHITECTURES'' specification list.
 
 
 if(NOT DEFINED CMAKE_CUDA_COMPILER)
diff --git a/cmake/hip.cmake b/cmake/hip.cmake
index 1b9aa0e8723..5b7a268c7b6 100644
--- a/cmake/hip.cmake
+++ b/cmake/hip.cmake
@@ -197,7 +197,7 @@ if (GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_NVIDIA_REGEX}")
     # Remove false positive CUDA warnings when calling one<T>() and zero<T>()
     list(APPEND GINKGO_HIP_NVCC_ADDITIONAL_FLAGS --expt-relaxed-constexpr --expt-extended-lambda)
 
-    if (GINKGO_HIP_PLATFROM MATCHES "${HIP_PLATFORM_NVIDIA_REGEX}"
+    if (GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_NVIDIA_REGEX}"
             AND CMAKE_CUDA_COMPILER_VERSION MATCHES "9.2"
             AND CMAKE_CUDA_HOST_COMPILER MATCHES ".*clang.*" )
         ginkgo_extract_clang_version(${CMAKE_CUDA_HOST_COMPILER} GINKGO_CUDA_HOST_CLANG_VERSION)
diff --git a/cmake/information_helpers.cmake b/cmake/information_helpers.cmake
index 8bed7320caa..9a6a4481bf5 100644
--- a/cmake/information_helpers.cmake
+++ b/cmake/information_helpers.cmake
@@ -103,7 +103,7 @@ macro(ginkgo_interface_information)
     get_target_property(GINKGO_INTERFACE_LINK_LIBRARIES ginkgo INTERFACE_LINK_LIBRARIES)
     ginkgo_interface_libraries_recursively("${GINKGO_INTERFACE_LINK_LIBRARIES}")
     # Format and store the interface libraries found
-    # remove duplicates on the reversed list to keep the dependecy in the end of list.
+    # remove duplicates on the reversed list to keep the dependency in the end of list.
     list(REVERSE GINKGO_INTERFACE_LIBS_FOUND)
     list(REMOVE_DUPLICATES GINKGO_INTERFACE_LIBS_FOUND)
     list(REVERSE GINKGO_INTERFACE_LIBS_FOUND)
diff --git a/common/cuda_hip/base/executor.hpp.inc b/common/cuda_hip/base/executor.hpp.inc
index 7e71a3e24c0..ad641ecea5b 100644
--- a/common/cuda_hip/base/executor.hpp.inc
+++ b/common/cuda_hip/base/executor.hpp.inc
@@ -40,7 +40,7 @@ inline int convert_sm_ver_to_cores(int major, int minor)
     // Defines for GPU Architecture types (using the SM version to determine
     // the # of cores per SM
     typedef struct {
-        int SM;  // 0xMm (hexidecimal notation), M = SM Major version,
+        int SM;  // 0xMm (hexadecimal notation), M = SM Major version,
         // and m = SM minor version
         int Cores;
     } sSMtoCores;
diff --git a/common/cuda_hip/components/segment_scan.hpp.inc b/common/cuda_hip/components/segment_scan.hpp.inc
index 947c2c3afd7..584f44b6415 100644
--- a/common/cuda_hip/components/segment_scan.hpp.inc
+++ b/common/cuda_hip/components/segment_scan.hpp.inc
@@ -33,7 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 /**
  * @internal
  *
- * Compute a segement scan using add operation (+) of a subwarp. Each segment
+ * Compute a segment scan using add operation (+) of a subwarp. Each segment
  * performs suffix sum. Works on the source array and returns whether the thread
  * is the first element of its segment with same `ind`.
  */
diff --git a/common/cuda_hip/matrix/csr_kernels.hpp.inc b/common/cuda_hip/matrix/csr_kernels.hpp.inc
index e73dfde00fb..1fca1ee7215 100644
--- a/common/cuda_hip/matrix/csr_kernels.hpp.inc
+++ b/common/cuda_hip/matrix/csr_kernels.hpp.inc
@@ -606,7 +606,7 @@ __global__ __launch_bounds__(default_block_size) void spgeam(
             }
             // advance by the number of merged elements
             // in theory, we would need to mask by `valid`, but this
-            // would only be false somwhere in the last iteration, where
+            // would only be false somewhere in the last iteration, where
             // we don't need the value of c_begin afterwards, anyways.
             c_begin += popcnt(~prev_equal_mask & lanemask_full);
             return true;
diff --git a/common/cuda_hip/multigrid/pgm_kernels.hpp.inc b/common/cuda_hip/multigrid/pgm_kernels.hpp.inc
index 30cce92b8de..d8b6c4786b0 100644
--- a/common/cuda_hip/multigrid/pgm_kernels.hpp.inc
+++ b/common/cuda_hip/multigrid/pgm_kernels.hpp.inc
@@ -51,9 +51,9 @@ void sort_row_major(std::shared_ptr<const DefaultExecutor> exec, size_type nnz,
     using device_value_type = device_member_type<ValueType>;
     auto vals_it = reinterpret_cast<device_value_type*>(vals);
     auto it = thrust::make_zip_iterator(thrust::make_tuple(row_idxs, col_idxs));
-    // Because reduce_by_key is not determinstic, so we do not need
+    // Because reduce_by_key is not deterministic, so we do not need
     // stable_sort_by_key
-    // TODO: If we have determinstic reduce_by_key, it should be
+    // TODO: If we have deterministic reduce_by_key, it should be
     // stable_sort_by_key
     thrust::sort_by_key(thrust_policy(exec), it, it + nnz, vals_it);
 }
diff --git a/common/unified/multigrid/pgm_kernels.cpp b/common/unified/multigrid/pgm_kernels.cpp
index 5836486f2a6..a61b32dacbd 100644
--- a/common/unified/multigrid/pgm_kernels.cpp
+++ b/common/unified/multigrid/pgm_kernels.cpp
@@ -135,7 +135,7 @@ void map_row(std::shared_ptr<const DefaultExecutor> exec,
         exec,
         [] GKO_KERNEL(auto tidx, auto fine_row_ptrs, auto agg, auto row_idxs) {
             const auto coarse_row = agg[tidx];
-            // TODO: when it is neccessary, it can use warp per row to improve.
+            // TODO: when it is necessary, it can use warp per row to improve.
             for (auto i = fine_row_ptrs[tidx]; i < fine_row_ptrs[tidx + 1];
                  i++) {
                 row_idxs[i] = coarse_row;
@@ -232,7 +232,7 @@ void find_strongest_neighbor(
                 // all neighbor is agg, connect to the strongest agg
                 // Also, no others will use this item as their
                 // strongest_neighbor because they are already aggregated. Thus,
-                // it is determinstic behavior
+                // it is deterministic behavior
                 agg[row] = agg[strongest_agg];
             } else if (strongest_unagg != -1) {
                 // set the strongest neighbor in the unagg group
@@ -260,7 +260,7 @@ void assign_to_exist_agg(std::shared_ptr<const DefaultExecutor> exec,
 {
     const auto num = agg.get_num_elems();
     if (intermediate_agg.get_num_elems() > 0) {
-        // determinstic kernel
+        // deterministic kernel
         run_kernel(
             exec,
             [] GKO_KERNEL(auto row, auto row_ptrs, auto col_idxs,
diff --git a/core/solver/multigrid.cpp b/core/solver/multigrid.cpp
index 074fa95d848..303106fa4f6 100644
--- a/core/solver/multigrid.cpp
+++ b/core/solver/multigrid.cpp
@@ -197,7 +197,7 @@ enum class cycle_mode {
     first_of_cycle = 2,
 
     /**
-     * current procees is the end one of the cycle
+     * current process is the end one of the cycle
      */
     end_of_cycle = 4
 };
diff --git a/cuda/components/cooperative_groups.cuh b/cuda/components/cooperative_groups.cuh
index 93db80f2c31..db59a47658d 100644
--- a/cuda/components/cooperative_groups.cuh
+++ b/cuda/components/cooperative_groups.cuh
@@ -399,7 +399,7 @@ using cooperative_groups::thread_block_tile;
 // public API:
 // void sync() const
 // unsigned thread_rank() const
-// usigned size() const
+// unsigned size() const
 // T shfl(T, int)
 // T shfl_up(T, unsigned)
 // T shfl_down(T, unsigned)
diff --git a/cuda/solver/common_trs_kernels.cuh b/cuda/solver/common_trs_kernels.cuh
index a8b134cebf2..bfdb4a5f854 100644
--- a/cuda/solver/common_trs_kernels.cuh
+++ b/cuda/solver/common_trs_kernels.cuh
@@ -445,7 +445,7 @@ __global__ void sptrsv_naive_caching_kernel(
     store(x_s, self_shid, r);
     x[row * x_stride + rhs] = r;
 
-    // This check to ensure no infinte loops happen.
+    // This check to ensure no infinite loops happen.
     if (is_nan(r)) {
         store(x_s, self_shid, zero<ValueType>());
         x[row * x_stride + rhs] = zero<ValueType>();
diff --git a/dev_tools/oneapi/convert_source.sh b/dev_tools/oneapi/convert_source.sh
index f6983dd500a..7aabca6f17d 100755
--- a/dev_tools/oneapi/convert_source.sh
+++ b/dev_tools/oneapi/convert_source.sh
@@ -3,7 +3,7 @@
 # convert_source.sh converts cuda (and c++ code) to dpcpp code with ginkgo design.
 
 # Usage:
-# EnvironementSet ./dev_tools/oneapi/convert_source.sh <the_file_in_cuda>
+# EnvironmentSet ./dev_tools/oneapi/convert_source.sh <the_file_in_cuda>
 # <the_file_in_cuda> can be .hpp/.cpp/.cu/.cuh
 
 # the following are parameters set by environment variables
@@ -18,7 +18,7 @@
 #   ROOT_BUILD_DIR: the complete path for build folder. The default is "${ROOT_DIR}/${BUILD_DIR}"
 #   GTEST_HEADER_DIR: the gtest header folder. The default is "${ROOT_BUILD_DIR}/_deps/googletest-src/googletest/include"
 #   CLANG_FORMAT: the clang-format exec. The default is "clang-format"
-#   VERBOSE: if it is set as 1, script will ouput the path information
+#   VERBOSE: if it is set as 1, script will output the path information
 CURRENT_DIR="$( pwd )"
 cd "$( dirname "${BASH_SOURCE[0]}" )"
 SCRIPT_DIR="$( pwd )"
@@ -33,7 +33,7 @@ GTEST_HEADER_DIR="${GTEST_HEADER_DIR:="${ROOT_BUILD_DIR}/_deps/googletest-src/go
 CLANG_FORMAT=${CLANG_FORMAT:="clang-format"}
 if [[ "${VERBOSE}" == 1 ]]; then
     echo "#####################"
-    echo "# Enviroment Setting:"
+    echo "# Environment Setting:"
     echo "CURRENT_DIR ${CURRENT_DIR}"
     echo "SCRIPT_DIR ${SCRIPT_DIR}"
     echo "ROOT_DIR ${ROOT_DIR}"
@@ -262,7 +262,7 @@ rm "${OUTPUT_FOLDER}/${OUTPUT_FILE}"
 rm "${OUTPUT_FOLDER}/${OUTPUT_FILE}.dp.cpp"
 
 # Call DPCT
-echo "# Call DPCT on the previosly generated file."
+echo "# Call DPCT on the previously generated file."
 echo "############################################"
 dpct --extra-arg="-std=c++14" --extra-arg="-I ${ROOT_DIR}" --extra-arg="-I ${ROOT_DIR}/include" --extra-arg="-I ${ROOT_BUILD_DIR}/include" --extra-arg="-I ${ROOT_DIR}/dev_tools/oneapi" --extra-arg="-I ${GTEST_HEADER_DIR}" --cuda-include-path="${CUDA_HEADER_DIR}" --format-range=none ${OUTPUT_FILE} --suppress-warnings=1049 --out-root=${OUTPUT_FOLDER}
 echo "############################################"
diff --git a/dev_tools/scripts/format_header.sh b/dev_tools/scripts/format_header.sh
index a501b6f97d2..2437a03d623 100755
--- a/dev_tools/scripts/format_header.sh
+++ b/dev_tools/scripts/format_header.sh
@@ -266,7 +266,7 @@ while IFS='' read -r line || [ -n "$line" ]; do
             echo "${line}" >> "${CONTENT}"
             SKIP="false"
             if [[ "${line}" =~ $START_BLOCK_REX ]]; then
-                # keep everythin in #if block and /* block
+                # keep everything in #if block and /* block
                 IN_BLOCK=$((IN_BLOCK+1))
                 if [ -z "${ALARM}" ]; then
                     ALARM="set"
@@ -291,13 +291,13 @@ if [ "${ALARM}" = "true" ]; then
     echo "Warning $1: sorting is probably incorrect"
 fi
 
-# Wrtie license
+# Write license
 echo "/*${GINKGO_LICENSE_BEACON}" > "$1"
 cat LICENSE >> "$1"
 echo "${GINKGO_LICENSE_BEACON}*/" >> "$1"
 echo "" >> "$1"
 
-# Wrtie the definition of header according to path
+# Write the definition of header according to path
 if [ -n "${IFNDEF}" ] && [ -n "${DEFINE}" ]; then
     IFNDEF="#ifndef ${HEADER_DEF}"
     DEFINE="#define ${HEADER_DEF}"
diff --git a/devices/reference/dummy.cpp b/devices/reference/dummy.cpp
index 210666655f7..6ab5dde07f3 100644
--- a/devices/reference/dummy.cpp
+++ b/devices/reference/dummy.cpp
@@ -31,4 +31,4 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
 // Remove this file once there is at least one source file in
-// ginkgo_referece_device
+// ginkgo_reference_device
diff --git a/doc/examples/examples.hpp.in b/doc/examples/examples.hpp.in
index 5e685e2aa7b..a75ac59f186 100644
--- a/doc/examples/examples.hpp.in
+++ b/doc/examples/examples.hpp.in
@@ -212,7 +212,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *   <tr valign="top">
  *       <td>@ref heat_equation</td>
  *       <td> Solving a 2D heat equation and showing matrix assembly, vector
- *            initalization and solver setup in a more complex setting with
+ *            initialization and solver setup in a more complex setting with
  *            output visualization.
  *       </td></tr>
  *
diff --git a/dpcpp/base/executor.dp.cpp b/dpcpp/base/executor.dp.cpp
index c2015c8664c..3d01e271f15 100644
--- a/dpcpp/base/executor.dp.cpp
+++ b/dpcpp/base/executor.dp.cpp
@@ -103,7 +103,7 @@ void DpcppExecutor::populate_exec_info(const machine_topology* mach_topo)
 
 void DpcppExecutor::raw_free(void* ptr) const noexcept
 {
-    // the free function may syncronize excution or not, which depends on
+    // the free function may synchronize execution or not, which depends on
     // implementation or backend, so it is not guaranteed.
     // TODO: maybe a light wait implementation?
     try {
diff --git a/dpcpp/base/helper.hpp b/dpcpp/base/helper.hpp
index 714f5a0d37a..b38b6c1ef8b 100644
--- a/dpcpp/base/helper.hpp
+++ b/dpcpp/base/helper.hpp
@@ -203,7 +203,7 @@ bool validate(sycl::queue* queue, unsigned workgroup_size,
  * get_first_cfg will return the first valid config by validate function from
  * given config array.
  *
- * @tparam IterArr  the iteratable array type
+ * @tparam IterArr  the iterable array type
  * @tparam Validate  the validate function type
  *
  * @param arr  the config array
diff --git a/dpcpp/components/segment_scan.dp.hpp b/dpcpp/components/segment_scan.dp.hpp
index ba0d9577fe3..b73ae12e9b3 100644
--- a/dpcpp/components/segment_scan.dp.hpp
+++ b/dpcpp/components/segment_scan.dp.hpp
@@ -50,7 +50,7 @@ namespace dpcpp {
 /**
  * @internal
  *
- * Compute a segement scan using add operation (+) of a subgroup_size. Each
+ * Compute a segment scan using add operation (+) of a subgroup_size. Each
  * segment performs suffix sum. Works on the source array and returns whether
  * the thread is the first element of its segment with same `ind`.
  */
diff --git a/dpcpp/components/thread_ids.dp.hpp b/dpcpp/components/thread_ids.dp.hpp
index 2792e2307e4..e689e9f14ba 100644
--- a/dpcpp/components/thread_ids.dp.hpp
+++ b/dpcpp/components/thread_ids.dp.hpp
@@ -238,7 +238,7 @@ __dpct_inline__ size_type get_thread_id(sycl::nd_item<3> item_ct1)
  *
  * Returns the global ID of the thread in the given index type.
  * This function assumes one-dimensional thread and block indexing in cuda
- * sense. It uses the third position infomation to get the information.
+ * sense. It uses the third position information to get the information.
  *
  * @return the global ID of the thread in the given index type.
  *
@@ -258,7 +258,7 @@ __dpct_inline__ IndexType get_thread_id_flat(sycl::nd_item<3> item_ct1)
  *
  * Returns the total number of threads in the given index type.
  * This function assumes one-dimensional thread and block indexing in cuda
- * sense. It uses the third position infomation to get the information.
+ * sense. It uses the third position information to get the information.
  *
  * @return the total number of threads in the given index type.
  *
diff --git a/dpcpp/multigrid/pgm_kernels.dp.cpp b/dpcpp/multigrid/pgm_kernels.dp.cpp
index 2234d8ffe38..b404b1c10ab 100644
--- a/dpcpp/multigrid/pgm_kernels.dp.cpp
+++ b/dpcpp/multigrid/pgm_kernels.dp.cpp
@@ -82,9 +82,10 @@ void sort_row_major(std::shared_ptr<const DefaultExecutor> exec, size_type nnz,
 {
     auto policy = onedpl_policy(exec);
     auto it = oneapi::dpl::make_zip_iterator(row_idxs, col_idxs, vals);
-    // Because reduce_by_segment is not determinstic, so we do not need
+    // Because reduce_by_segment is not deterministic, so we do not need
+    // stable_sort
+    // TODO: If we have deterministic reduce_by_segment, it should be
     // stable_sort
-    // TODO: If we have determinstic reduce_by_segment, it should be stable_sort
     std::sort(policy, it, it + nnz, [](auto a, auto b) {
         return std::tie(std::get<0>(a), std::get<1>(a)) <
                std::tie(std::get<0>(b), std::get<1>(b));
diff --git a/include/ginkgo/core/base/polymorphic_object.hpp b/include/ginkgo/core/base/polymorphic_object.hpp
index da19a63d51d..fe5fdeae258 100644
--- a/include/ginkgo/core/base/polymorphic_object.hpp
+++ b/include/ginkgo/core/base/polymorphic_object.hpp
@@ -59,8 +59,8 @@ namespace gko {
  * @note Most of the public methods of this class should not be overridden
  *       directly, and are thus not virtual. Instead, there are equivalent
  *       protected methods (ending in <method_name>_impl) that should be
- *       overridden instead. This allows polymorphic objects to implement default
- *       behavior around virtual methods (parameter checking, type casting).
+ *       overridden instead. This allows polymorphic objects to implement
+ * default behavior around virtual methods (parameter checking, type casting).
  *
  * @see EnablePolymorphicObject if you wish to implement a concrete polymorphic
  *      object and have sensible defaults generated automatically.
diff --git a/include/ginkgo/core/matrix/dense.hpp b/include/ginkgo/core/matrix/dense.hpp
index 16bff356223..ae738d49b93 100644
--- a/include/ginkgo/core/matrix/dense.hpp
+++ b/include/ginkgo/core/matrix/dense.hpp
@@ -792,7 +792,7 @@ class Dense
     void add_scaled(ptr_param<const LinOp> alpha, ptr_param<const LinOp> b);
 
     /**
-     * Subtracts `b` scaled by `alpha` fron the matrix (aka: BLAS axpy).
+     * Subtracts `b` scaled by `alpha` from the matrix (aka: BLAS axpy).
      *
      * @param alpha  If alpha is 1x1 Dense matrix, b is scaled
      *               by alpha. If it is a Dense row vector of values,
diff --git a/include/ginkgo/core/solver/solver_base.hpp b/include/ginkgo/core/solver/solver_base.hpp
index 53909337554..ca2b5cee1b1 100644
--- a/include/ginkgo/core/solver/solver_base.hpp
+++ b/include/ginkgo/core/solver/solver_base.hpp
@@ -537,8 +537,7 @@ class
     // clang-format off
     [[deprecated("This class will be replaced by the template-less detail::SolverBaseLinOp in a future release")]] SolverBase
     // clang-format on
-    : public detail::SolverBaseLinOp
-{
+    : public detail::SolverBaseLinOp {
 public:
     using detail::SolverBaseLinOp::SolverBaseLinOp;
 
diff --git a/omp/reorder/rcm_kernels.cpp b/omp/reorder/rcm_kernels.cpp
index 579770b9b2f..c0042224b3c 100644
--- a/omp/reorder/rcm_kernels.cpp
+++ b/omp/reorder/rcm_kernels.cpp
@@ -99,7 +99,7 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_DEGREE_OF_NODES_KERNEL);
 
 // This constant controls how many nodes can be dequeued from the
 // UbfsLinearQueue at once at most. Increasing it reduces lock contention and
-// "unneccesary work", but disturbs queue ordering, generating extra work.
+// "unnecessary work", but disturbs queue ordering, generating extra work.
 constexpr int32 chunk_bound = 512;
 
 
@@ -633,7 +633,7 @@ vector<IndexType> compute_level_offsets(std::shared_ptr<const OmpExecutor> exec,
 }
 
 
-// Signal value to which the entire permutation is intialized.
+// Signal value to which the entire permutation is initialized.
 // Threads spin on this value, until it is replaced by another value,
 // written by another thread.
 constexpr int32 perm_untouched = -1;
@@ -697,7 +697,7 @@ void write_permutation(std::shared_ptr<const OmpExecutor> exec,
 
                     // Will not be written by multiple threads, but can be read
                     // while written. This is only necessary to guarantee the
-                    // abscence of reads-while-writes.
+                    // absence of reads-while-writes.
                     IndexType neighbour_level;
 #pragma omp atomic read
                     neighbour_level = levels[neighbour];
diff --git a/reference/reorder/rcm_kernels.cpp b/reference/reorder/rcm_kernels.cpp
index be14aeb557d..9ad8de1d170 100644
--- a/reference/reorder/rcm_kernels.cpp
+++ b/reference/reorder/rcm_kernels.cpp
@@ -255,7 +255,7 @@ void get_permutation(std::shared_ptr<const ReferenceExecutor> exec,
             ++tail_offset;
         }
 
-        // Get the neigbours of the next vertex,
+        // Get the neighbours of the next vertex,
         // check if they have already been visited,
         // if no, insert them to sort.
         auto prev_head_offset = head_offset;
diff --git a/reference/test/matrix/csr_kernels.cpp b/reference/test/matrix/csr_kernels.cpp
index 0c5ac3bde53..d56201ade02 100644
--- a/reference/test/matrix/csr_kernels.cpp
+++ b/reference/test/matrix/csr_kernels.cpp
@@ -123,7 +123,7 @@ class Csr : public ::testing::Test {
         value_type* v = m->get_values();
         index_type* c = m->get_col_idxs();
         index_type* r = m->get_row_ptrs();
-        // It keeps an explict zero
+        // It keeps an explicit zero
         /*
          *  1    3   2
          * {0}   5   0
diff --git a/reference/test/matrix/sparsity_csr_kernels.cpp b/reference/test/matrix/sparsity_csr_kernels.cpp
index 4d356ffd828..dde558d27fd 100644
--- a/reference/test/matrix/sparsity_csr_kernels.cpp
+++ b/reference/test/matrix/sparsity_csr_kernels.cpp
@@ -96,7 +96,7 @@ class SparsityCsr : public ::testing::Test {
     {
         index_type* c = m->get_col_idxs();
         index_type* r = m->get_row_ptrs();
-        // It keeps an explict zero
+        // It keeps an explicit zero
         /*
          *  1    1   1
          * {0}   1   0
diff --git a/reference/test/preconditioner/ilu.cpp b/reference/test/preconditioner/ilu.cpp
index 3511de4f011..ce3ea72725f 100644
--- a/reference/test/preconditioner/ilu.cpp
+++ b/reference/test/preconditioner/ilu.cpp
@@ -301,7 +301,7 @@ TYPED_TEST(Ilu, SolvesCustomTypeDefaultFactorySingleRhs)
         ilu_prec_type::build().on(this->exec)->generate(this->mtx);
     preconditioner->apply(b, x);
 
-    // Since it uses Bicgstab with default parmeters, the result will not be
+    // Since it uses Bicgstab with default parameters, the result will not be
     // accurate
     GKO_ASSERT_MTX_NEAR(x, l({-0.125, 0.25, 1.0}), 1e-1);
 }
diff --git a/reference/test/stop/residual_norm_kernels.cpp b/reference/test/stop/residual_norm_kernels.cpp
index cc8d145231e..1c18fbb895d 100644
--- a/reference/test/stop/residual_norm_kernels.cpp
+++ b/reference/test/stop/residual_norm_kernels.cpp
@@ -240,7 +240,7 @@ TYPED_TEST(ResidualNorm, WaitsTillResidualGoal)
 }
 
 
-TYPED_TEST(ResidualNorm, SelfCalulatesThrowWithoutMatrix)
+TYPED_TEST(ResidualNorm, SelfCalculatesThrowWithoutMatrix)
 {
     using Mtx = typename TestFixture::Mtx;
     using NormVector = typename TestFixture::NormVector;
@@ -297,7 +297,7 @@ TYPED_TEST(ResidualNorm, SelfCalulatesThrowWithoutMatrix)
 }
 
 
-TYPED_TEST(ResidualNorm, RelativeSelfCalulatesThrowWithoutRhs)
+TYPED_TEST(ResidualNorm, RelativeSelfCalculatesThrowWithoutRhs)
 {
     // only relative residual norm allows generation without rhs.
     using Mtx = typename TestFixture::Mtx;
@@ -322,7 +322,7 @@ TYPED_TEST(ResidualNorm, RelativeSelfCalulatesThrowWithoutRhs)
 }
 
 
-TYPED_TEST(ResidualNorm, SelfCalulatesAndWaitsTillResidualGoal)
+TYPED_TEST(ResidualNorm, SelfCalculatesAndWaitsTillResidualGoal)
 {
     using Mtx = typename TestFixture::Mtx;
     using NormVector = typename TestFixture::NormVector;

From 42cc35a1cf2a83dbe03d72919aff5ba53d5a99c1 Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Mon, 12 Jun 2023 10:42:59 +0000
Subject: [PATCH 068/583] Format files

Co-authored-by: Gregor Olenik <greole@users.noreply.github.com>
---
 include/ginkgo/core/solver/solver_base.hpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/ginkgo/core/solver/solver_base.hpp b/include/ginkgo/core/solver/solver_base.hpp
index ca2b5cee1b1..53909337554 100644
--- a/include/ginkgo/core/solver/solver_base.hpp
+++ b/include/ginkgo/core/solver/solver_base.hpp
@@ -537,7 +537,8 @@ class
     // clang-format off
     [[deprecated("This class will be replaced by the template-less detail::SolverBaseLinOp in a future release")]] SolverBase
     // clang-format on
-    : public detail::SolverBaseLinOp {
+    : public detail::SolverBaseLinOp
+{
 public:
     using detail::SolverBaseLinOp::SolverBaseLinOp;
 

From 06aa470f563200e1aca95e879dae629b62834134 Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Mon, 12 Jun 2023 12:51:38 +0200
Subject: [PATCH 069/583] Deprecate set_complex_subpsace

---
 _typos.toml                        |  1 +
 include/ginkgo/core/solver/idr.hpp | 12 ++++++++++++
 2 files changed, 13 insertions(+)

diff --git a/_typos.toml b/_typos.toml
index 5ba4cd4f662..e3229ce22f5 100644
--- a/_typos.toml
+++ b/_typos.toml
@@ -8,3 +8,4 @@ tht = "tht"
 automatical = "automatical"
 strat = "strat"
 entrie = "entrie"
+set_complex_subpsace = "set_complex_subpsace"
diff --git a/include/ginkgo/core/solver/idr.hpp b/include/ginkgo/core/solver/idr.hpp
index fde0bc67157..da41e6229a5 100644
--- a/include/ginkgo/core/solver/idr.hpp
+++ b/include/ginkgo/core/solver/idr.hpp
@@ -158,6 +158,18 @@ class Idr
      */
     bool get_complex_subspace() const { return parameters_.complex_subspace; }
 
+    /**
+     * Sets the complex_subspace parameter of the solver.
+     *
+     * @param other  the new complex_subspace parameter
+     * @deprecated Please use set_complex_subspace instead
+     */
+    [[deprecated("Use set_complex_subspace instead")]]
+    void set_complex_subpsace(const bool other)
+    {
+        set_complex_subspace(other);
+    }
+
     /**
      * Sets the complex_subspace parameter of the solver.
      *

From 4a3f722500bdb2bb6ffb339d5e0e4798dc9afbec Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Mon, 12 Jun 2023 10:59:45 +0000
Subject: [PATCH 070/583] Format files

Co-authored-by: Gregor Olenik <greole@users.noreply.github.com>
---
 include/ginkgo/core/solver/idr.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/ginkgo/core/solver/idr.hpp b/include/ginkgo/core/solver/idr.hpp
index da41e6229a5..61020eb4f05 100644
--- a/include/ginkgo/core/solver/idr.hpp
+++ b/include/ginkgo/core/solver/idr.hpp
@@ -164,8 +164,8 @@ class Idr
      * @param other  the new complex_subspace parameter
      * @deprecated Please use set_complex_subspace instead
      */
-    [[deprecated("Use set_complex_subspace instead")]]
-    void set_complex_subpsace(const bool other)
+    [[deprecated("Use set_complex_subspace instead")]] void
+    set_complex_subpsace(const bool other)
     {
         set_complex_subspace(other);
     }

From d95f7e812cab65694064e91f50970df2c48f94ab Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Mon, 12 Jun 2023 13:16:26 +0200
Subject: [PATCH 071/583] add spell check workflow

---
 .github/workflows/spell_check.yml | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 .github/workflows/spell_check.yml

diff --git a/.github/workflows/spell_check.yml b/.github/workflows/spell_check.yml
new file mode 100644
index 00000000000..9aaeca57385
--- /dev/null
+++ b/.github/workflows/spell_check.yml
@@ -0,0 +1,12 @@
+name: Test GitHub Action
+on: [pull_request]
+
+jobs:
+  run:
+    name: Spell Check with Typos
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+    - name: Check for typos
+      uses: crate-ci/typos@master
+

From 102ac329cd5bb572ff55699a2bc09531a554404a Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Mon, 12 Jun 2023 13:35:18 +0200
Subject: [PATCH 072/583] add exception for idr

---
 _typos.toml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/_typos.toml b/_typos.toml
index e3229ce22f5..b3456752b8f 100644
--- a/_typos.toml
+++ b/_typos.toml
@@ -8,4 +8,7 @@ tht = "tht"
 automatical = "automatical"
 strat = "strat"
 entrie = "entrie"
-set_complex_subpsace = "set_complex_subpsace"
+agregate = "agregate" # since that script name is already in ginkgo-data repo
+
+[default.extend-identifiers]
+set_complex_subpsace = "set_complex_subpsace" # remove when deprecated function is gone

From e3651aaea1b6a3c00070b194ee093c88e7992d0c Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Mon, 12 Jun 2023 14:18:36 +0200
Subject: [PATCH 073/583] revert HAS_HEADER to HSA_HEADER

---
 _typos.toml              | 1 +
 benchmark/CMakeLists.txt | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/_typos.toml b/_typos.toml
index b3456752b8f..e261242ffa3 100644
--- a/_typos.toml
+++ b/_typos.toml
@@ -12,3 +12,4 @@ agregate = "agregate" # since that script name is already in ginkgo-data repo
 
 [default.extend-identifiers]
 set_complex_subpsace = "set_complex_subpsace" # remove when deprecated function is gone
+HSA_HEADER = "HSA_HEADER"
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
index f12dbad7f19..434474fd336 100644
--- a/benchmark/CMakeLists.txt
+++ b/benchmark/CMakeLists.txt
@@ -33,7 +33,7 @@ function(ginkgo_benchmark_hipsparse_linops type def)
     # use Thrust C++ device just for compilation, we don't use thrust::complex in the benchmarks
     target_compile_definitions(hipsparse_linops_${type} PUBLIC -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_CPP)
     target_include_directories(hipsparse_linops_${type} SYSTEM PRIVATE
-        ${HAS_HEADER} ${HIP_INCLUDE_DIRS}
+        ${HSA_HEADER} ${HIP_INCLUDE_DIRS}
         ${HIPBLAS_INCLUDE_DIRS} ${HIPSPARSE_INCLUDE_DIRS})
     target_link_libraries(hipsparse_linops_${type} Ginkgo::ginkgo ${HIPSPARSE_LIBRARIES})
 endfunction()
@@ -133,7 +133,7 @@ if (GINKGO_BUILD_HIP)
     add_library(hip_timer utils/hip_timer.hip.cpp)
     EXECUTE_PROCESS(COMMAND ${HIP_PATH}/bin/hipconfig --cpp_config OUTPUT_VARIABLE HIP_CXX_FLAGS)
     set_target_properties(hip_timer PROPERTIES COMPILE_FLAGS ${HIP_CXX_FLAGS})
-    target_include_directories(hip_timer SYSTEM PRIVATE ${HAS_HEADER} ${HIP_INCLUDE_DIRS})
+    target_include_directories(hip_timer SYSTEM PRIVATE ${HSA_HEADER} ${HIP_INCLUDE_DIRS})
     target_link_libraries(hip_timer ginkgo)
 endif()
 

From a465b7d82b54b8c4dbb99d6e9b22699e52765f39 Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Wed, 14 Jun 2023 13:52:08 +0200
Subject: [PATCH 074/583] Address review comments

---
 .github/workflows/spell_check.yml |  2 ++
 CHANGELOG.md                      | 18 +++++++++---------
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/spell_check.yml b/.github/workflows/spell_check.yml
index 9aaeca57385..0fee0cce1aa 100644
--- a/.github/workflows/spell_check.yml
+++ b/.github/workflows/spell_check.yml
@@ -9,4 +9,6 @@ jobs:
     - uses: actions/checkout@v3
     - name: Check for typos
       uses: crate-ci/typos@master
+      with:
+        config: ./_typos.toml
 
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e5728ef2cc2..834bb6aa061 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -853,15 +853,15 @@ These universities, along with various project grants, supported the development
 
 Ginkgo 1.0.0 contains contributions from:
 
-**Hartwig Anzt**, Karlsruhe Institute of Technology
-**Yenchen Chen**, National Taiwan University
-**Terry Cojean**, Karlsruhe Institute of Technology
-**Goran Flegar**, Universitat Jaume I
-**Fritz Göbel**, Karlsruhe Institute of Technology
-**Thomas Grützmacher**, Karlsruhe Institute of Technology
-**Pratik Nayak**, Karlsruhe Institute of Technology
-**Tobias Ribizel**, Karlsruhe Institute of Technology
-**Yuhsiang Tsai**, National Taiwan University
+**Hartwig Anzt**, Karlsruhe Institute of Technology  
+**Yenchen Chen**, National Taiwan University  
+**Terry Cojean**, Karlsruhe Institute of Technology  
+**Goran Flegar**, Universitat Jaume I  
+**Fritz Göbel**, Karlsruhe Institute of Technology  
+**Thomas Grützmacher**, Karlsruhe Institute of Technology  
+**Pratik Nayak**, Karlsruhe Institute of Technology  
+**Tobias Ribizel**, Karlsruhe Institute of Technology  
+**Yuhsiang Tsai**, National Taiwan University  
 
 Supporting materials are provided by the following individuals:
 

From 1751160aae41811efbd90cab80963a683a0c6b37 Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Wed, 14 Jun 2023 13:57:28 +0200
Subject: [PATCH 075/583] Address review comments

Co-authored-by: Yuhsiang Tsai <yhmtsai@gmail.com>
---
 include/ginkgo/core/base/polymorphic_object.hpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/ginkgo/core/base/polymorphic_object.hpp b/include/ginkgo/core/base/polymorphic_object.hpp
index fe5fdeae258..fc758f97699 100644
--- a/include/ginkgo/core/base/polymorphic_object.hpp
+++ b/include/ginkgo/core/base/polymorphic_object.hpp
@@ -60,7 +60,8 @@ namespace gko {
  *       directly, and are thus not virtual. Instead, there are equivalent
  *       protected methods (ending in <method_name>_impl) that should be
  *       overridden instead. This allows polymorphic objects to implement
- * default behavior around virtual methods (parameter checking, type casting).
+ *       default behavior around virtual methods (parameter checking, type
+ *       casting).
  *
  * @see EnablePolymorphicObject if you wish to implement a concrete polymorphic
  *      object and have sensible defaults generated automatically.

From 376184d29a7f008891deaf36a413b1a7c35347a8 Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Wed, 14 Jun 2023 14:00:37 +0200
Subject: [PATCH 076/583] Address review comments

Co-authored-by: Yuhsiang Tsai <yhmtsai@gmail.com>
---
 include/ginkgo/core/solver/idr.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/ginkgo/core/solver/idr.hpp b/include/ginkgo/core/solver/idr.hpp
index 61020eb4f05..fc677f33171 100644
--- a/include/ginkgo/core/solver/idr.hpp
+++ b/include/ginkgo/core/solver/idr.hpp
@@ -167,7 +167,7 @@ class Idr
     [[deprecated("Use set_complex_subspace instead")]] void
     set_complex_subpsace(const bool other)
     {
-        set_complex_subspace(other);
+        this->set_complex_subspace(other);
     }
 
     /**

From a3a6b9e5cee39442d586163d5091c34543183a25 Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Wed, 14 Jun 2023 17:05:06 +0200
Subject: [PATCH 077/583] Update CHANGELOG.md

Add linebreaks
---
 CHANGELOG.md | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 834bb6aa061..af4d3c06bb3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -845,9 +845,9 @@ About
 
 Ginkgo 1.0.0 is brought to you by:
 
-**Karlsruhe Institute of Technology**, Germany
-**Universitat Jaume I**, Spain
-**University of Tennessee, Knoxville**, US
+**Karlsruhe Institute of Technology**, Germany  
+**Universitat Jaume I**, Spain  
+**University of Tennessee, Knoxville**, US  
 
 These universities, along with various project grants, supported the development team and provided resources needed for the development of Ginkgo.
 
@@ -865,15 +865,15 @@ Ginkgo 1.0.0 contains contributions from:
 
 Supporting materials are provided by the following individuals:
 
-**David Rogers** - the Ginkgo logo
-**Frithjof Fleischhammer** - the Ginkgo website
+**David Rogers** - the Ginkgo logo  
+**Frithjof Fleischhammer** - the Ginkgo website  
 
 The development team is grateful to the following individuals for discussions and comments:
 
-**Erik Boman**
-**Jelena Držaić**
-**Mike Heroux**
-**Mark Hoemmen**
-**Timo Heister**
-**Jens Saak**
+**Erik Boman**  
+**Jelena Držaić**  
+**Mike Heroux**  
+**Mark Hoemmen**  
+**Timo Heister**  
+**Jens Saak**  
 

From daa4cc6edb89c9dc268f60a6e0a9536484bb424d Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Tue, 20 Jun 2023 20:25:06 +0200
Subject: [PATCH 078/583] move spellchecker config to .github folder

---
 _typos.toml => .github/_typos.toml | 0
 .github/workflows/spell_check.yml  | 2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename _typos.toml => .github/_typos.toml (100%)

diff --git a/_typos.toml b/.github/_typos.toml
similarity index 100%
rename from _typos.toml
rename to .github/_typos.toml
diff --git a/.github/workflows/spell_check.yml b/.github/workflows/spell_check.yml
index 0fee0cce1aa..fa0550858f9 100644
--- a/.github/workflows/spell_check.yml
+++ b/.github/workflows/spell_check.yml
@@ -10,5 +10,5 @@ jobs:
     - name: Check for typos
       uses: crate-ci/typos@master
       with:
-        config: ./_typos.toml
+        config: .github/_typos.toml
 

From 0734b0b1316b9a338b55659387cd9a22f6d61c22 Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Tue, 20 Jun 2023 20:26:41 +0200
Subject: [PATCH 079/583] consistent us spelling

---
 reference/reorder/rcm_kernels.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/reference/reorder/rcm_kernels.cpp b/reference/reorder/rcm_kernels.cpp
index 9ad8de1d170..5e357cf775c 100644
--- a/reference/reorder/rcm_kernels.cpp
+++ b/reference/reorder/rcm_kernels.cpp
@@ -111,7 +111,7 @@ std::pair<IndexType, size_type> rls_contender_and_height(
     // The last levels size is required to compute the contender.
     IndexType last_level_size = 0;
 
-    // While there are still nodes whose neighbours haven't been inspected.
+    // While there are still nodes whose neighbors haven't been inspected.
     while (rls_index < rls_offset) {
         auto parent = rls_p[rls_index];
         --current_level_countdown;
@@ -255,12 +255,12 @@ void get_permutation(std::shared_ptr<const ReferenceExecutor> exec,
             ++tail_offset;
         }
 
-        // Get the neighbours of the next vertex,
+        // Get the neighbors of the next vertex,
         // check if they have already been visited,
         // if no, insert them to sort.
         auto prev_head_offset = head_offset;
 
-        // Get the next vertex neighbours.
+        // Get the next vertex neighbors.
         auto row_start = row_ptrs[next_vertex];
         auto row_end = row_ptrs[next_vertex + 1];
         for (auto neighbor_idx = row_start; neighbor_idx < row_end;
@@ -276,7 +276,7 @@ void get_permutation(std::shared_ptr<const ReferenceExecutor> exec,
             }
         }
 
-        // Sort all just-added neighbours by degree.
+        // Sort all just-added neighbors by degree.
         std::sort(
             linear_queue_p + prev_head_offset, linear_queue_p + head_offset,
             [&](IndexType i, IndexType j) { return degrees[i] < degrees[j]; });

From 32f3d54e22077235c22a3c515194865c2d58b5a8 Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Tue, 20 Jun 2023 20:41:46 +0200
Subject: [PATCH 080/583] Fix interface break

Co-authored-by: Yuhsiang Tsai <yhmtsai@gmail.com>
---
 .github/_typos.toml                | 2 ++
 include/ginkgo/core/base/range.hpp | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/_typos.toml b/.github/_typos.toml
index e261242ffa3..c9643175c07 100644
--- a/.github/_typos.toml
+++ b/.github/_typos.toml
@@ -13,3 +13,5 @@ agregate = "agregate" # since that script name is already in ginkgo-data repo
 [default.extend-identifiers]
 set_complex_subpsace = "set_complex_subpsace" # remove when deprecated function is gone
 HSA_HEADER = "HSA_HEADER"
+one_operaton = "one_operaton" # considered interface break in range.hpp
+
diff --git a/include/ginkgo/core/base/range.hpp b/include/ginkgo/core/base/range.hpp
index 29c7baba8d8..6e1fdb3a007 100644
--- a/include/ginkgo/core/base/range.hpp
+++ b/include/ginkgo/core/base/range.hpp
@@ -710,7 +710,7 @@ GKO_ENABLE_UNARY_RANGE_OPERATION(bitwise_not, operator~,
 // common unary functions
 GKO_ENABLE_UNARY_RANGE_OPERATION(zero_operation, zero,
                                  accessor::detail::zero_operation);
-GKO_ENABLE_UNARY_RANGE_OPERATION(one_operation, one,
+GKO_ENABLE_UNARY_RANGE_OPERATION(one_operaton, one,
                                  accessor::detail::one_operation);
 GKO_ENABLE_UNARY_RANGE_OPERATION(abs_operation, abs,
                                  accessor::detail::abs_operation);

From fc4a9a79854c377b5cae242ffc738b30c1dacaa8 Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Wed, 21 Jun 2023 12:53:39 +0200
Subject: [PATCH 081/583] Update .github/workflows/spell_check.yml

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 .github/workflows/spell_check.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/spell_check.yml b/.github/workflows/spell_check.yml
index fa0550858f9..0049dce9180 100644
--- a/.github/workflows/spell_check.yml
+++ b/.github/workflows/spell_check.yml
@@ -1,5 +1,7 @@
 name: Test GitHub Action
-on: [pull_request]
+on:
+  pull_request:
+    types: [opened, synchronize]
 
 jobs:
   run:

From 2827fbe811c7b9a925c5d3a4a2bec3d5314cce60 Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Wed, 21 Jun 2023 14:20:05 +0200
Subject: [PATCH 082/583] Add deprecation notes

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 include/ginkgo/core/base/range.hpp | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/include/ginkgo/core/base/range.hpp b/include/ginkgo/core/base/range.hpp
index 6e1fdb3a007..4bc9a77267f 100644
--- a/include/ginkgo/core/base/range.hpp
+++ b/include/ginkgo/core/base/range.hpp
@@ -614,6 +614,18 @@ struct implement_binary_operation<operation_kind::range_by_scalar,
 
 }  // namespace detail
 
+#define GKO_DEPRECATED_UNARY_RANGE_OPERATION(_operation_deprecated_name,     \
+                                             _operation_name)                \
+    namespace accessor {                                                     \
+    template <typename Operand>                                              \
+    struct [[deprecated(                                                     \
+        "Please use " #_operation_name)]] _operation_deprecated_name         \
+        : _operation_name<Operand> {};                                       \
+    }                                                                        \
+    static_assert(true,                                                      \
+                  "This assert is used to counter the false positive extra " \
+                  "semi-colon warnings")
+
 
 #define GKO_ENABLE_UNARY_RANGE_OPERATION(_operation_name, _operator_name, \
                                          _operator)                       \
@@ -708,9 +720,10 @@ GKO_ENABLE_UNARY_RANGE_OPERATION(bitwise_not, operator~,
                                  accessor::detail::bitwise_not);
 
 // common unary functions
+
 GKO_ENABLE_UNARY_RANGE_OPERATION(zero_operation, zero,
                                  accessor::detail::zero_operation);
-GKO_ENABLE_UNARY_RANGE_OPERATION(one_operaton, one,
+GKO_ENABLE_UNARY_RANGE_OPERATION(one_operation, one,
                                  accessor::detail::one_operation);
 GKO_ENABLE_UNARY_RANGE_OPERATION(abs_operation, abs,
                                  accessor::detail::abs_operation);
@@ -723,6 +736,14 @@ GKO_ENABLE_UNARY_RANGE_OPERATION(conj_operation, conj,
 GKO_ENABLE_UNARY_RANGE_OPERATION(squared_norm_operation, squared_norm,
                                  accessor::detail::squared_norm_operation);
 
+GKO_DEPRECATED_UNARY_RANGE_OPERATION(one_operaton, one_operation);
+GKO_DEPRECATED_UNARY_RANGE_OPERATION(abs_operaton, abs_operation);
+GKO_DEPRECATED_UNARY_RANGE_OPERATION(real_operaton, real_operation);
+GKO_DEPRECATED_UNARY_RANGE_OPERATION(imag_operaton, imag_operation);
+GKO_DEPRECATED_UNARY_RANGE_OPERATION(conj_operaton, conj_operation);
+GKO_DEPRECATED_UNARY_RANGE_OPERATION(squared_norm_operaton,
+                                     squared_norm_operation);
+
 namespace accessor {
 
 
@@ -766,6 +787,7 @@ struct transpose_operation {
 GKO_BIND_UNARY_RANGE_OPERATION_TO_OPERATOR(transpose_operation, transpose);
 
 
+#undef GKO_DEPRECATED_UNARY_RANGE_OPERATION
 #undef GKO_DEFINE_SIMPLE_UNARY_OPERATION
 #undef GKO_ENABLE_UNARY_RANGE_OPERATION
 
@@ -841,6 +863,9 @@ GKO_BIND_UNARY_RANGE_OPERATION_TO_OPERATOR(transpose_operation, transpose);
                   "semi-colon warnings")
 
 
+#define GKO_DEPRECATED_SIMPLE_BINARY_OPERATION(_deprecated_name, _name) \
+    struct [[deprecated("Please use " #_name)]] _deprecated_name : _name {};
+
 #define GKO_DEFINE_SIMPLE_BINARY_OPERATION(_name, ...)                         \
     struct _name {                                                             \
     private:                                                                   \
@@ -919,6 +944,8 @@ GKO_DEFINE_SIMPLE_BINARY_OPERATION(right_shift, first >> second);
 GKO_DEFINE_SIMPLE_BINARY_OPERATION(max_operation, max(first, second));
 GKO_DEFINE_SIMPLE_BINARY_OPERATION(min_operation, min(first, second));
 
+GKO_DEPRECATED_SIMPLE_BINARY_OPERATION(max_operaton, max_operation);
+GKO_DEPRECATED_SIMPLE_BINARY_OPERATION(min_operaton, min_operation);
 }  // namespace detail
 }  // namespace accessor
 

From 8c96828a4e4618213befb359e2a5ee332252ed16 Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Mon, 24 Jul 2023 04:33:48 +0000
Subject: [PATCH 083/583] Format files

Co-authored-by: Gregor Olenik <greole@users.noreply.github.com>
---
 include/ginkgo/core/base/range.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/ginkgo/core/base/range.hpp b/include/ginkgo/core/base/range.hpp
index 4bc9a77267f..5ba07aa834f 100644
--- a/include/ginkgo/core/base/range.hpp
+++ b/include/ginkgo/core/base/range.hpp
@@ -620,7 +620,7 @@ struct implement_binary_operation<operation_kind::range_by_scalar,
     template <typename Operand>                                              \
     struct [[deprecated(                                                     \
         "Please use " #_operation_name)]] _operation_deprecated_name         \
-        : _operation_name<Operand> {};                                       \
+        : _operation_name<Operand>{};                                        \
     }                                                                        \
     static_assert(true,                                                      \
                   "This assert is used to counter the false positive extra " \
@@ -864,7 +864,7 @@ GKO_BIND_UNARY_RANGE_OPERATION_TO_OPERATOR(transpose_operation, transpose);
 
 
 #define GKO_DEPRECATED_SIMPLE_BINARY_OPERATION(_deprecated_name, _name) \
-    struct [[deprecated("Please use " #_name)]] _deprecated_name : _name {};
+    struct [[deprecated("Please use " #_name)]] _deprecated_name : _name{};
 
 #define GKO_DEFINE_SIMPLE_BINARY_OPERATION(_name, ...)                         \
     struct _name {                                                             \

From 7708236f1bb5b8571521921ffff5b5ac8da21537 Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Wed, 21 Jun 2023 15:15:15 +0200
Subject: [PATCH 084/583] add operaton exception, reformat

---
 .github/_typos.toml | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/.github/_typos.toml b/.github/_typos.toml
index c9643175c07..4b9d9be6403 100644
--- a/.github/_typos.toml
+++ b/.github/_typos.toml
@@ -13,5 +13,11 @@ agregate = "agregate" # since that script name is already in ginkgo-data repo
 [default.extend-identifiers]
 set_complex_subpsace = "set_complex_subpsace" # remove when deprecated function is gone
 HSA_HEADER = "HSA_HEADER"
+conj_operaton = "conj_operaton" # considered interface break in range.hpp
+imag_operaton = "imag_operaton" # considered interface break in range.hpp
+real_operaton = "real_operaton" # considered interface break in range.hpp
 one_operaton = "one_operaton" # considered interface break in range.hpp
-
+abs_operaton = "abs_operaton" # considered interface break in range.hpp
+max_operaton = "max_operaton" # considered interface break in range.hpp
+min_operaton = "min_operaton" # considered interface break in range.hpp
+squared_norm_operaton = "squared_norm_operaton" # considered interface break in range.hpp

From 905658eb953473a1822834c040d589e71a1a77aa Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Wed, 5 Apr 2023 11:09:18 +0200
Subject: [PATCH 085/583] clean up cholesky reference tests

---
 .../test/factorization/cholesky_kernels.cpp   | 660 ++++++++----------
 1 file changed, 279 insertions(+), 381 deletions(-)

diff --git a/reference/test/factorization/cholesky_kernels.cpp b/reference/test/factorization/cholesky_kernels.cpp
index d5fae12a2e9..36bbd7e176e 100644
--- a/reference/test/factorization/cholesky_kernels.cpp
+++ b/reference/test/factorization/cholesky_kernels.cpp
@@ -34,6 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <algorithm>
+#include <initializer_list>
 #include <memory>
 
 
@@ -67,12 +68,15 @@ class Cholesky : public ::testing::Test {
     using index_type =
         typename std::tuple_element<1, decltype(ValueIndexType())>::type;
     using matrix_type = gko::matrix::Csr<value_type, index_type>;
+    using sparsity_matrix_type =
+        gko::matrix::SparsityCsr<value_type, index_type>;
     using elimination_forest =
         gko::factorization::elimination_forest<index_type>;
 
     Cholesky()
         : ref(gko::ReferenceExecutor::create()),
           tmp{ref},
+          ref_row_nnz{ref},
           storage_offsets{ref},
           storage{ref},
           row_descs{ref}
@@ -98,13 +102,27 @@ class Cholesky : public ::testing::Test {
         return result;
     }
 
+    void setup(
+        std::initializer_list<std::initializer_list<value_type>> mtx_list,
+        std::initializer_list<std::initializer_list<value_type>> factor_list)
+    {
+        mtx = gko::initialize<matrix_type>(mtx_list, ref);
+        l_factor_ref = gko::initialize<matrix_type>(factor_list, ref);
+        setup_impl();
+    }
+
     void setup(const char* name_mtx, const char* name_factor)
     {
         std::ifstream stream{name_mtx};
         std::ifstream ref_stream{name_factor};
         mtx = gko::read<matrix_type>(stream, this->ref);
-        num_rows = mtx->get_size()[0];
         l_factor_ref = gko::read<matrix_type>(ref_stream, this->ref);
+        setup_impl();
+    }
+
+    void setup_impl()
+    {
+        num_rows = mtx->get_size()[0];
         combined_ref = combined_factor(l_factor_ref.get());
         l_factor = matrix_type::create(ref, l_factor_ref->get_size(),
                                        l_factor_ref->get_num_stored_elements());
@@ -123,6 +141,13 @@ class Cholesky : public ::testing::Test {
         storage_offsets.resize_and_reset(num_rows + 1);
         row_descs.resize_and_reset(num_rows);
 
+        ref_row_nnz.resize_and_reset(num_rows);
+        const auto ref_row_ptrs = l_factor_ref->get_const_row_ptrs();
+        for (gko::size_type row = 0; row < num_rows; row++) {
+            ref_row_nnz.get_data()[row] =
+                ref_row_ptrs[row + 1] - ref_row_ptrs[row];
+        }
+
         const auto allowed = gko::matrix::csr::sparsity_type::bitmap |
                              gko::matrix::csr::sparsity_type::full |
                              gko::matrix::csr::sparsity_type::hash;
@@ -149,7 +174,7 @@ class Cholesky : public ::testing::Test {
         }
     }
 
-    void forall_matrices(std::function<void()> fn)
+    void forall_matrices(std::function<void()> fn, bool non_spd)
     {
         {
             SCOPED_TRACE("ani1");
@@ -163,11 +188,87 @@ class Cholesky : public ::testing::Test {
                         gko::matrices::location_ani1_amd_chol_mtx);
             fn();
         }
+        {
+            SCOPED_TRACE("example");
+            this->setup(
+                {{4, 0, 1, 0, 0, 0, 0, 1, 0, 0},
+                 {0, 4, 0, 0, 1, 0, 0, 0, 0, 1},
+                 {1, 0, 4.25, 0, 0, 0, 1, 0, 0, 0},
+                 {0, 0, 0, 4, 0, 0, 0, 0, 1, 1},
+                 {0, 1, 0, 0, 4.25, 0, 0, 0, 1, 1},
+                 {0, 0, 0, 0, 0, 4, 2, 4, 0, 0},
+                 {0, 0, 1, 0, 0, 2, 5.25, 0, 0, 0},
+                 {1, 0, 0, 0, 0, 4, 0, 8, 1, 1},
+                 {0, 0, 0, 1, 1, 0, 0, 1, 4, 0},
+                 {0, 1, 0, 1, 1, 0, 0, 1, 0, 4}},
+                {{2, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                 {0, 2, 0, 0, 0, 0, 0, 0, 0, 0},
+                 {0.5, 0, 2, 0, 0, 0, 0, 0, 0, 0},
+                 {0, 0, 0, 2, 0, 0, 0, 0, 0, 0},
+                 {0, 0.5, 0, 0, 2, 0, 0, 0, 0, 0},
+                 {0, 0, 0, 0, 0, 2, 0, 0, 0, 0},
+                 {0, 0, 0.5, 0, 0, 1, 2, 0, 0, 0},
+                 {0.5, 0, -0.125, 0, 0, 2, -0.96875, 1.67209402770897, 0, 0},
+                 {0, 0, 0, 0.5, 0.5, 0, 0, 0.598052491922453, 1.7726627476498,
+                  0},
+                 {0, 0.5, 0, 0.5, 0.375, 0, 0, 0.598052491922453,
+                  -0.448571948696326, 1.67346688755653}});
+            fn();
+        }
+        {
+            SCOPED_TRACE("separable");
+            this->setup({{4, 0, 1, 0, 0, 0, 0, 0, 0, 0},
+                         {0, 4, 2, 0, 0, 0, 0, 0, 0, 0},
+                         {1, 2, 5.25, 0, 0, 0, 0, 0, 0, 0},
+                         {0, 0, 0, 4, 1, 0, 0, 0, 0, 0},
+                         {0, 0, 0, 1, 4.25, 1, 0, 0, 0, 4},
+                         {0, 0, 0, 0, 1, 4.25, 0, 0, 0, 0},
+                         {0, 0, 0, 0, 0, 0, 4, 1, 0, 4},
+                         {0, 0, 0, 0, 0, 0, 1, 4.25, 0, 0},
+                         {0, 0, 0, 0, 0, 0, 0, 0, 4, 1},
+                         {0, 0, 0, 0, 4, 0, 4, 0, 1, 17.75}},
+                        {{2, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                         {0, 2, 0, 0, 0, 0, 0, 0, 0, 0},
+                         {0.5, 1, 2, 0, 0, 0, 0, 0, 0, 0},
+                         {0, 0, 0, 2, 0, 0, 0, 0, 0, 0},
+                         {0, 0, 0, 0.5, 2, 0, 0, 0, 0, 0},
+                         {0, 0, 0, 0, 0.5, 2, 0, 0, 0, 0},
+                         {0, 0, 0, 0, 0, 0, 2, 0, 0, 0},
+                         {0, 0, 0, 0, 0, 0, 0.5, 2, 0, 0},
+                         {0, 0, 0, 0, 0, 0, 0, 0, 2, 0},
+                         {0, 0, 0, 0, 2, -0.5, 2, -0.5, 0.5, 3}});
+            fn();
+        }
+        if (non_spd) {
+            SCOPED_TRACE("missing diagonal");
+            this->setup({{1, 0, 1, 0, 0, 0, 0, 0, 0, 0},
+                         {0, 1, 1, 0, 0, 0, 0, 0, 0, 0},
+                         {1, 1, 0, 1, 0, 0, 0, 0, 0, 0},
+                         {0, 0, 1, 1, 1, 0, 0, 0, 0, 0},
+                         {0, 0, 0, 1, 0, 1, 0, 0, 0, 0},
+                         {0, 0, 0, 0, 1, 1, 1, 0, 0, 0},
+                         {0, 0, 0, 0, 0, 1, 1, 1, 0, 1},
+                         {0, 0, 0, 0, 0, 0, 1, 1, 0, 0},
+                         {0, 0, 0, 0, 0, 0, 0, 0, 1, 1},
+                         {0, 0, 0, 0, 0, 0, 1, 0, 1, 0}},
+                        {{1., 0., 0., 0., 0., 0., 0., 0., 0., 0.},
+                         {0., 1., 0., 0., 0., 0., 0., 0., 0., 0.},
+                         {1., 1., 1., 0., 0., 0., 0., 0., 0., 0.},
+                         {0., 0., 1., 1., 0., 0., 0., 0., 0., 0.},
+                         {0., 0., 0., 1., 1., 0., 0., 0., 0., 0.},
+                         {0., 0., 0., 0., 1., 1., 0., 0., 0., 0.},
+                         {0., 0., 0., 0., 0., 1., 1., 0., 0., 0.},
+                         {0., 0., 0., 0., 0., 0., 1., 1., 0., 0.},
+                         {0., 0., 0., 0., 0., 0., 0., 0., 1., 0.},
+                         {0., 0., 0., 0., 0., 0., 1., 1., 1., 1.}});
+            fn();
+        }
     }
 
     std::shared_ptr<const gko::ReferenceExecutor> ref;
     gko::size_type num_rows;
     gko::array<index_type> tmp;
+    gko::array<index_type> ref_row_nnz;
     gko::array<index_type> storage_offsets;
     gko::array<gko::int32> storage;
     gko::array<gko::int64> row_descs;
@@ -183,255 +284,51 @@ TYPED_TEST_SUITE(Cholesky, gko::test::ValueIndexTypes,
                  PairTypenameNameGenerator);
 
 
-TYPED_TEST(Cholesky, KernelSymbolicCountExample)
-{
-    using matrix_type = typename TestFixture::matrix_type;
-    using elimination_forest = typename TestFixture::elimination_forest;
-    using index_type = typename TestFixture::index_type;
-    auto mtx = gko::initialize<typename TestFixture::matrix_type>(
-        {{1, 0, 1, 0, 0, 0, 0, 1, 0, 0},
-         {0, 1, 0, 1, 0, 0, 0, 0, 0, 1},
-         {1, 0, 1, 0, 0, 0, 0, 0, 0, 0},
-         {0, 0, 0, 1, 0, 0, 0, 0, 1, 1},
-         {0, 1, 0, 0, 1, 0, 0, 0, 1, 1},
-         {0, 0, 0, 0, 0, 1, 0, 1, 0, 0},
-         {0, 0, 1, 0, 0, 1, 1, 0, 0, 0},
-         {1, 0, 0, 0, 0, 1, 0, 1, 1, 1},
-         {0, 0, 0, 1, 1, 0, 0, 1, 1, 0},
-         {0, 1, 0, 1, 1, 0, 0, 1, 0, 1}},
-        this->ref);
-    std::unique_ptr<elimination_forest> forest;
-    gko::factorization::compute_elim_forest(mtx.get(), forest);
-    gko::array<index_type> row_nnz{this->ref, 10};
-
-    gko::kernels::reference::cholesky::symbolic_count(
-        this->ref, mtx.get(), *forest, row_nnz.get_data(), this->tmp);
-
-    GKO_ASSERT_ARRAY_EQ(row_nnz, I<index_type>({1, 1, 2, 1, 2, 1, 3, 5, 4, 6}));
-}
-
-
-TYPED_TEST(Cholesky, KernelSymbolicFactorizeExample)
-{
-    using matrix_type = typename TestFixture::matrix_type;
-    using elimination_forest = typename TestFixture::elimination_forest;
-    using index_type = typename TestFixture::index_type;
-    auto mtx = gko::initialize<typename TestFixture::matrix_type>(
-        {{1, 0, 1, 0, 0, 0, 0, 1, 0, 0},
-         {0, 1, 0, 1, 0, 0, 0, 0, 0, 1},
-         {1, 0, 1, 0, 0, 0, 0, 0, 0, 0},
-         {0, 0, 0, 1, 0, 0, 0, 0, 1, 1},
-         {0, 1, 0, 0, 1, 0, 0, 0, 1, 1},
-         {0, 0, 0, 0, 0, 1, 0, 1, 0, 0},
-         {0, 0, 1, 0, 0, 1, 1, 0, 0, 0},
-         {1, 0, 0, 0, 0, 1, 0, 1, 1, 1},
-         {0, 0, 0, 1, 1, 0, 0, 1, 1, 0},
-         {0, 1, 0, 1, 1, 0, 0, 1, 0, 1}},
-        this->ref);
-    std::unique_ptr<elimination_forest> forest;
-    gko::factorization::compute_elim_forest(mtx.get(), forest);
-    auto l_factor = matrix_type::create(this->ref, gko::dim<2>{10, 10}, 26);
-    gko::kernels::reference::cholesky::symbolic_count(
-        this->ref, mtx.get(), *forest, l_factor->get_row_ptrs(), this->tmp);
-    gko::kernels::reference::components::prefix_sum_nonnegative(
-        this->ref, l_factor->get_row_ptrs(), 11);
-
-    gko::kernels::reference::cholesky::symbolic_factorize(
-        this->ref, mtx.get(), *forest, l_factor.get(), this->tmp);
-
-    GKO_ASSERT_MTX_EQ_SPARSITY(l_factor,
-                               l({{1., 0., 0., 0., 0., 0., 0., 0., 0., 0.},
-                                  {0., 1., 0., 0., 0., 0., 0., 0., 0., 0.},
-                                  {1., 0., 1., 0., 0., 0., 0., 0., 0., 0.},
-                                  {0., 0., 0., 1., 0., 0., 0., 0., 0., 0.},
-                                  {0., 1., 0., 0., 1., 0., 0., 0., 0., 0.},
-                                  {0., 0., 0., 0., 0., 1., 0., 0., 0., 0.},
-                                  {0., 0., 1., 0., 0., 1., 1., 0., 0., 0.},
-                                  {1., 0., 1., 0., 0., 1., 1., 1., 0., 0.},
-                                  {0., 0., 0., 1., 1., 0., 0., 1., 1., 0.},
-                                  {0., 1., 0., 1., 1., 0., 0., 1., 1., 1.}}));
-}
-
-
-TYPED_TEST(Cholesky, KernelSymbolicCountSeparable)
-{
-    using matrix_type = typename TestFixture::matrix_type;
-    using elimination_forest = typename TestFixture::elimination_forest;
-    using index_type = typename TestFixture::index_type;
-    auto mtx = gko::initialize<typename TestFixture::matrix_type>(
-        {{1, 0, 1, 0, 0, 0, 0, 0, 0, 0},
-         {0, 1, 1, 0, 0, 0, 0, 0, 0, 0},
-         {1, 1, 1, 0, 0, 0, 0, 0, 0, 0},
-         {0, 0, 0, 1, 1, 0, 0, 0, 0, 0},
-         {0, 0, 0, 1, 1, 1, 0, 0, 0, 1},
-         {0, 0, 0, 0, 1, 1, 0, 0, 0, 0},
-         {0, 0, 0, 0, 0, 0, 1, 1, 0, 1},
-         {0, 0, 0, 0, 0, 0, 1, 1, 0, 0},
-         {0, 0, 0, 0, 0, 0, 0, 0, 1, 1},
-         {0, 0, 0, 0, 1, 0, 1, 0, 1, 1}},
-        this->ref);
-    std::unique_ptr<elimination_forest> forest;
-    gko::factorization::compute_elim_forest(mtx.get(), forest);
-    gko::array<index_type> row_nnz{this->ref, 10};
-
-    gko::kernels::reference::cholesky::symbolic_count(
-        this->ref, mtx.get(), *forest, row_nnz.get_data(), this->tmp);
-
-    GKO_ASSERT_ARRAY_EQ(row_nnz, I<index_type>({1, 1, 3, 1, 2, 2, 1, 2, 1, 6}));
-}
-
-
-TYPED_TEST(Cholesky, KernelSymbolicFactorizeSeparable)
+TYPED_TEST(Cholesky, KernelSymbolicCount)
 {
     using matrix_type = typename TestFixture::matrix_type;
-    using index_type = typename TestFixture::index_type;
+    using sparsity_matrix_type = typename TestFixture::sparsity_matrix_type;
     using elimination_forest = typename TestFixture::elimination_forest;
-    auto mtx = gko::initialize<typename TestFixture::matrix_type>(
-        {{1, 0, 1, 0, 0, 0, 0, 0, 0, 0},
-         {0, 1, 1, 0, 0, 0, 0, 0, 0, 0},
-         {1, 1, 1, 0, 0, 0, 0, 0, 0, 0},
-         {0, 0, 0, 1, 1, 0, 0, 0, 0, 0},
-         {0, 0, 0, 1, 1, 1, 0, 0, 0, 1},
-         {0, 0, 0, 0, 1, 1, 0, 0, 0, 0},
-         {0, 0, 0, 0, 0, 0, 1, 1, 0, 1},
-         {0, 0, 0, 0, 0, 0, 1, 1, 0, 0},
-         {0, 0, 0, 0, 0, 0, 0, 0, 1, 1},
-         {0, 0, 0, 0, 1, 0, 1, 0, 1, 1}},
-        this->ref);
-    std::unique_ptr<elimination_forest> forest;
-    gko::factorization::compute_elim_forest(mtx.get(), forest);
-    auto l_factor = matrix_type::create(this->ref, gko::dim<2>{10, 10}, 26);
-    gko::kernels::reference::cholesky::symbolic_count(
-        this->ref, mtx.get(), *forest, l_factor->get_row_ptrs(), this->tmp);
-    gko::kernels::reference::components::prefix_sum_nonnegative(
-        this->ref, l_factor->get_row_ptrs(), 11);
-
-    gko::kernels::reference::cholesky::symbolic_factorize(
-        this->ref, mtx.get(), *forest, l_factor.get(), this->tmp);
-
-    GKO_ASSERT_MTX_EQ_SPARSITY(l_factor,
-                               l({{1., 0., 0., 0., 0., 0., 0., 0., 0., 0.},
-                                  {0., 1., 0., 0., 0., 0., 0., 0., 0., 0.},
-                                  {1., 1., 1., 0., 0., 0., 0., 0., 0., 0.},
-                                  {0., 0., 0., 1., 0., 0., 0., 0., 0., 0.},
-                                  {0., 0., 0., 1., 1., 0., 0., 0., 0., 0.},
-                                  {0., 0., 0., 0., 1., 1., 0., 0., 0., 0.},
-                                  {0., 0., 0., 0., 0., 0., 1., 0., 0., 0.},
-                                  {0., 0., 0., 0., 0., 0., 1., 1., 0., 0.},
-                                  {0., 0., 0., 0., 0., 0., 0., 0., 1., 0.},
-                                  {0., 0., 0., 0., 1., 1., 1., 1., 1., 1.}}));
-}
-
-
-TYPED_TEST(Cholesky, KernelSymbolicCountMissingDiagonal)
-{
-    using matrix_type = typename TestFixture::matrix_type;
     using index_type = typename TestFixture::index_type;
-    using elimination_forest = typename TestFixture::elimination_forest;
-    auto mtx = gko::initialize<typename TestFixture::matrix_type>(
-        {{1, 0, 1, 0, 0, 0, 0, 0, 0, 0},
-         {0, 1, 1, 0, 0, 0, 0, 0, 0, 0},
-         {1, 1, 0, 1, 0, 0, 0, 0, 0, 0},
-         {0, 0, 1, 1, 1, 0, 0, 0, 0, 0},
-         {0, 0, 0, 1, 0, 1, 0, 0, 0, 0},
-         {0, 0, 0, 0, 1, 1, 1, 0, 0, 0},
-         {0, 0, 0, 0, 0, 1, 1, 1, 0, 1},
-         {0, 0, 0, 0, 0, 0, 1, 1, 0, 0},
-         {0, 0, 0, 0, 0, 0, 0, 0, 1, 1},
-         {0, 0, 0, 0, 0, 0, 1, 0, 1, 0}},
-        this->ref);
-    std::unique_ptr<elimination_forest> forest;
-    gko::factorization::compute_elim_forest(mtx.get(), forest);
-    gko::array<index_type> row_nnz{this->ref, 10};
-
-    gko::kernels::reference::cholesky::symbolic_count(
-        this->ref, mtx.get(), *forest, row_nnz.get_data(), this->tmp);
-
-    GKO_ASSERT_ARRAY_EQ(row_nnz, I<index_type>({1, 1, 3, 2, 2, 2, 2, 2, 1, 4}));
+    this->forall_matrices(
+        [this] {
+            gko::factorization::compute_elim_forest(this->mtx.get(),
+                                                    this->forest);
+            gko::array<index_type> row_nnz{this->ref, this->num_rows};
+
+            gko::kernels::reference::cholesky::symbolic_count(
+                this->ref, this->mtx.get(), *this->forest, row_nnz.get_data(),
+                this->tmp);
+
+            GKO_ASSERT_ARRAY_EQ(row_nnz, this->ref_row_nnz);
+        },
+        true);
 }
 
 
-TYPED_TEST(Cholesky, KernelSymbolicFactorizeMissingDiagonal)
+TYPED_TEST(Cholesky, KernelSymbolicFactorize)
 {
     using matrix_type = typename TestFixture::matrix_type;
-    using index_type = typename TestFixture::index_type;
+    using sparsity_matrix_type = typename TestFixture::sparsity_matrix_type;
     using elimination_forest = typename TestFixture::elimination_forest;
-    auto mtx = gko::initialize<typename TestFixture::matrix_type>(
-        {{1, 0, 1, 0, 0, 0, 0, 0, 0, 0},
-         {0, 1, 1, 0, 0, 0, 0, 0, 0, 0},
-         {1, 1, 0, 1, 0, 0, 0, 0, 0, 0},
-         {0, 0, 1, 1, 1, 0, 0, 0, 0, 0},
-         {0, 0, 0, 1, 0, 1, 0, 0, 0, 0},
-         {0, 0, 0, 0, 1, 1, 1, 0, 0, 0},
-         {0, 0, 0, 0, 0, 1, 1, 1, 0, 1},
-         {0, 0, 0, 0, 0, 0, 1, 1, 0, 0},
-         {0, 0, 0, 0, 0, 0, 0, 0, 1, 1},
-         {0, 0, 0, 0, 0, 0, 1, 0, 1, 0}},
-        this->ref);
-    std::unique_ptr<elimination_forest> forest;
-    gko::factorization::compute_elim_forest(mtx.get(), forest);
-    auto l_factor = matrix_type::create(this->ref, gko::dim<2>{10, 10}, 20);
-    gko::kernels::reference::cholesky::symbolic_count(
-        this->ref, mtx.get(), *forest, l_factor->get_row_ptrs(), this->tmp);
-    gko::kernels::reference::components::prefix_sum_nonnegative(
-        this->ref, l_factor->get_row_ptrs(), 11);
-
-    gko::kernels::reference::cholesky::symbolic_factorize(
-        this->ref, mtx.get(), *forest, l_factor.get(), this->tmp);
-
-    GKO_ASSERT_MTX_EQ_SPARSITY(l_factor,
-                               l({{1., 0., 0., 0., 0., 0., 0., 0., 0., 0.},
-                                  {0., 1., 0., 0., 0., 0., 0., 0., 0., 0.},
-                                  {1., 1., 1., 0., 0., 0., 0., 0., 0., 0.},
-                                  {0., 0., 1., 1., 0., 0., 0., 0., 0., 0.},
-                                  {0., 0., 0., 1., 1., 0., 0., 0., 0., 0.},
-                                  {0., 0., 0., 0., 1., 1., 0., 0., 0., 0.},
-                                  {0., 0., 0., 0., 0., 1., 1., 0., 0., 0.},
-                                  {0., 0., 0., 0., 0., 0., 1., 1., 0., 0.},
-                                  {0., 0., 0., 0., 0., 0., 0., 0., 1., 0.},
-                                  {0., 0., 0., 0., 0., 0., 1., 1., 1., 1.}}));
-}
-
-
-TYPED_TEST(Cholesky, KernelSymbolicCountAni1)
-{
     using index_type = typename TestFixture::index_type;
-    using elimination_forest = typename TestFixture::elimination_forest;
-    this->setup(gko::matrices::location_ani1_mtx,
-                gko::matrices::location_ani1_chol_mtx);
-    std::unique_ptr<elimination_forest> forest;
-    gko::factorization::compute_elim_forest(this->mtx.get(), forest);
-    gko::array<index_type> row_nnz{this->ref, this->mtx->get_size()[0]};
-
-    gko::kernels::reference::cholesky::symbolic_count(
-        this->ref, this->mtx.get(), *forest, row_nnz.get_data(), this->tmp);
-
-    GKO_ASSERT_ARRAY_EQ(
-        row_nnz, I<index_type>({1, 2, 3, 3, 2, 2,  7,  7,  7,  8, 8, 7,
-                                8, 8, 8, 8, 2, 10, 10, 10, 10, 9, 8, 8,
-                                8, 7, 8, 2, 8, 8,  7,  5,  8,  6, 4, 4}));
-}
-
-
-TYPED_TEST(Cholesky, KernelSymbolicFactorize)
-{
-    using elimination_forest = typename TestFixture::elimination_forest;
-    this->forall_matrices([this] {
-        std::unique_ptr<elimination_forest> forest;
-        gko::factorization::compute_elim_forest(this->mtx.get(), forest);
-        gko::kernels::reference::cholesky::symbolic_count(
-            this->ref, this->mtx.get(), *forest, this->l_factor->get_row_ptrs(),
-            this->tmp);
-        gko::kernels::reference::components::prefix_sum_nonnegative(
-            this->ref, this->l_factor->get_row_ptrs(),
-            this->mtx->get_size()[0] + 1);
-
-        gko::kernels::reference::cholesky::symbolic_factorize(
-            this->ref, this->mtx.get(), *forest, this->l_factor.get(),
-            this->tmp);
-
-        GKO_ASSERT_MTX_EQ_SPARSITY(this->l_factor, this->l_factor_ref);
-    });
+    this->forall_matrices(
+        [this] {
+            gko::factorization::compute_elim_forest(this->mtx.get(),
+                                                    this->forest);
+            gko::kernels::reference::cholesky::symbolic_count(
+                this->ref, this->mtx.get(), *this->forest,
+                this->l_factor->get_row_ptrs(), this->tmp);
+            gko::kernels::reference::components::prefix_sum_nonnegative(
+                this->ref, this->l_factor->get_row_ptrs(), this->num_rows + 1);
+
+            gko::kernels::reference::cholesky::symbolic_factorize(
+                this->ref, this->mtx.get(), *this->forest, this->l_factor.get(),
+                this->tmp);
+
+            GKO_ASSERT_MTX_EQ_SPARSITY(this->l_factor, this->l_factor_ref);
+        },
+        true);
 }
 
 
@@ -439,14 +336,16 @@ TYPED_TEST(Cholesky, SymbolicFactorize)
 {
     using matrix_type = typename TestFixture::matrix_type;
     using elimination_forest = typename TestFixture::elimination_forest;
-    this->forall_matrices([this] {
-        std::unique_ptr<matrix_type> combined_factor;
-        std::unique_ptr<elimination_forest> forest;
-        gko::factorization::symbolic_cholesky(this->mtx.get(), true,
-                                              combined_factor, forest);
-
-        GKO_ASSERT_MTX_EQ_SPARSITY(combined_factor, this->combined_ref);
-    });
+    this->forall_matrices(
+        [this] {
+            std::unique_ptr<matrix_type> combined_factor;
+            std::unique_ptr<elimination_forest> forest;
+            gko::factorization::symbolic_cholesky(this->mtx.get(), true,
+                                                  combined_factor, forest);
+
+            GKO_ASSERT_MTX_EQ_SPARSITY(combined_factor, this->combined_ref);
+        },
+        true);
 }
 
 
@@ -454,55 +353,39 @@ TYPED_TEST(Cholesky, SymbolicFactorizeOnlyLower)
 {
     using matrix_type = typename TestFixture::matrix_type;
     using elimination_forest = typename TestFixture::elimination_forest;
-    this->forall_matrices([this] {
-        std::unique_ptr<matrix_type> l_factor;
-        std::unique_ptr<elimination_forest> forest;
-        gko::factorization::symbolic_cholesky(this->mtx.get(), false, l_factor,
-                                              forest);
-
-        GKO_ASSERT_MTX_EQ_SPARSITY(l_factor, this->l_factor_ref);
-    });
+    this->forall_matrices(
+        [this] {
+            std::unique_ptr<matrix_type> l_factor;
+            std::unique_ptr<elimination_forest> forest;
+            gko::factorization::symbolic_cholesky(this->mtx.get(), false,
+                                                  l_factor, forest);
+
+            GKO_ASSERT_MTX_EQ_SPARSITY(l_factor, this->l_factor_ref);
+        },
+        true);
 }
 
 
-TYPED_TEST(Cholesky, KernelSymbolicCountAni1Amd)
-{
-    using index_type = typename TestFixture::index_type;
-    using elimination_forest = typename TestFixture::elimination_forest;
-    this->setup(gko::matrices::location_ani1_amd_mtx,
-                gko::matrices::location_ani1_amd_chol_mtx);
-    std::unique_ptr<elimination_forest> forest;
-    gko::factorization::compute_elim_forest(this->mtx.get(), forest);
-    gko::array<index_type> row_nnz{this->ref, this->mtx->get_size()[0]};
-
-    gko::kernels::reference::cholesky::symbolic_count(
-        this->ref, this->mtx.get(), *forest, row_nnz.get_data(), this->tmp);
-
-    GKO_ASSERT_ARRAY_EQ(
-        row_nnz, I<index_type>({1, 1,  2, 3, 5,  4, 1, 2,  3,  4, 1,  2,
-                                2, 2,  5, 1, 4,  4, 4, 1,  2,  3, 4,  3,
-                                8, 10, 4, 8, 10, 7, 7, 13, 21, 6, 11, 14}));
-}
-
-
-TYPED_TEST(Cholesky, KernelForestFromFactor)
+TYPED_TEST(Cholesky, KernelForestFromFactorPlusPostprocessing)
 {
     using matrix_type = typename TestFixture::matrix_type;
     using index_type = typename TestFixture::index_type;
     using elimination_forest = typename TestFixture::elimination_forest;
-    this->forall_matrices([this] {
-        std::unique_ptr<matrix_type> combined_factor;
-        std::unique_ptr<elimination_forest> forest_ref;
-        gko::factorization::symbolic_cholesky(this->mtx.get(), true,
-                                              combined_factor, forest_ref);
-        elimination_forest forest{this->ref,
-                                  static_cast<index_type>(this->num_rows)};
-
-        gko::kernels::reference::cholesky::forest_from_factor(
-            this->ref, combined_factor.get(), forest);
-
-        this->assert_equal_forests(forest, *forest_ref);
-    });
+    this->forall_matrices(
+        [this] {
+            std::unique_ptr<matrix_type> combined_factor;
+            std::unique_ptr<elimination_forest> forest_ref;
+            gko::factorization::symbolic_cholesky(this->mtx.get(), true,
+                                                  combined_factor, forest_ref);
+            elimination_forest forest{this->ref,
+                                      static_cast<index_type>(this->num_rows)};
+
+            gko::kernels::reference::cholesky::forest_from_factor(
+                this->ref, combined_factor.get(), forest);
+
+            this->assert_equal_forests(forest, *forest_ref);
+        },
+        true);
 }
 
 
@@ -510,39 +393,46 @@ TYPED_TEST(Cholesky, KernelInitializeWorks)
 {
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    this->forall_matrices([this] {
-        std::fill_n(this->combined->get_values(),
-                    this->combined->get_num_stored_elements(),
-                    gko::zero<value_type>());
-        gko::array<index_type> diag_idxs{this->ref, this->num_rows};
-        gko::array<index_type> transpose_idxs{
-            this->ref, this->combined->get_num_stored_elements()};
-
-        gko::kernels::reference::cholesky::initialize(
-            this->ref, this->mtx.get(), this->storage_offsets.get_const_data(),
-            this->row_descs.get_const_data(), this->storage.get_const_data(),
-            diag_idxs.get_data(), transpose_idxs.get_data(),
-            this->combined.get());
-
-        GKO_ASSERT_MTX_NEAR(this->mtx, this->combined, 0.0);
-        for (gko::size_type row = 0; row < this->num_rows; row++) {
-            const auto diag_pos = diag_idxs.get_const_data()[row];
-            const auto begin_pos = this->combined->get_const_row_ptrs()[row];
-            const auto end_pos = this->combined->get_const_row_ptrs()[row + 1];
-            ASSERT_GE(diag_pos, begin_pos);
-            ASSERT_LT(diag_pos, end_pos);
-            ASSERT_EQ(this->combined->get_const_col_idxs()[diag_pos], row);
-            for (auto nz = begin_pos; nz < end_pos; nz++) {
-                const auto trans_pos = transpose_idxs.get_const_data()[nz];
-                const auto col = this->combined->get_const_col_idxs()[nz];
-                ASSERT_GE(trans_pos, this->combined->get_const_row_ptrs()[col]);
-                ASSERT_LT(trans_pos,
-                          this->combined->get_const_row_ptrs()[col + 1]);
-                ASSERT_EQ(this->combined->get_const_col_idxs()[trans_pos], row);
-                ASSERT_EQ(transpose_idxs.get_const_data()[trans_pos], nz);
+    this->forall_matrices(
+        [this] {
+            std::fill_n(this->combined->get_values(),
+                        this->combined->get_num_stored_elements(),
+                        gko::zero<value_type>());
+            gko::array<index_type> diag_idxs{this->ref, this->num_rows};
+            gko::array<index_type> transpose_idxs{
+                this->ref, this->combined->get_num_stored_elements()};
+
+            gko::kernels::reference::cholesky::initialize(
+                this->ref, this->mtx.get(),
+                this->storage_offsets.get_const_data(),
+                this->row_descs.get_const_data(),
+                this->storage.get_const_data(), diag_idxs.get_data(),
+                transpose_idxs.get_data(), this->combined.get());
+
+            GKO_ASSERT_MTX_NEAR(this->mtx, this->combined, 0.0);
+            for (gko::size_type row = 0; row < this->num_rows; row++) {
+                const auto diag_pos = diag_idxs.get_const_data()[row];
+                const auto begin_pos =
+                    this->combined->get_const_row_ptrs()[row];
+                const auto end_pos =
+                    this->combined->get_const_row_ptrs()[row + 1];
+                ASSERT_GE(diag_pos, begin_pos);
+                ASSERT_LT(diag_pos, end_pos);
+                ASSERT_EQ(this->combined->get_const_col_idxs()[diag_pos], row);
+                for (auto nz = begin_pos; nz < end_pos; nz++) {
+                    const auto trans_pos = transpose_idxs.get_const_data()[nz];
+                    const auto col = this->combined->get_const_col_idxs()[nz];
+                    ASSERT_GE(trans_pos,
+                              this->combined->get_const_row_ptrs()[col]);
+                    ASSERT_LT(trans_pos,
+                              this->combined->get_const_row_ptrs()[col + 1]);
+                    ASSERT_EQ(this->combined->get_const_col_idxs()[trans_pos],
+                              row);
+                    ASSERT_EQ(transpose_idxs.get_const_data()[trans_pos], nz);
+                }
             }
-        }
-    });
+        },
+        true);
 }
 
 
@@ -550,26 +440,30 @@ TYPED_TEST(Cholesky, KernelFactorizeWorks)
 {
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    this->forall_matrices([this] {
-        gko::array<index_type> diag_idxs{this->ref, this->num_rows};
-        gko::array<index_type> transpose_idxs{
-            this->ref, this->combined->get_num_stored_elements()};
-        gko::array<int> tmp{this->ref};
-        gko::kernels::reference::cholesky::initialize(
-            this->ref, this->mtx.get(), this->storage_offsets.get_const_data(),
-            this->row_descs.get_const_data(), this->storage.get_const_data(),
-            diag_idxs.get_data(), transpose_idxs.get_data(),
-            this->combined.get());
-
-        gko::kernels::reference::cholesky::factorize(
-            this->ref, this->storage_offsets.get_const_data(),
-            this->row_descs.get_const_data(), this->storage.get_const_data(),
-            diag_idxs.get_data(), transpose_idxs.get_data(), *this->forest,
-            this->combined.get(), tmp);
-
-        GKO_ASSERT_MTX_NEAR(this->combined, this->combined_ref,
-                            r<value_type>::value);
-    });
+    this->forall_matrices(
+        [this] {
+            gko::array<index_type> diag_idxs{this->ref, this->num_rows};
+            gko::array<index_type> transpose_idxs{
+                this->ref, this->combined->get_num_stored_elements()};
+            gko::array<int> tmp{this->ref};
+            gko::kernels::reference::cholesky::initialize(
+                this->ref, this->mtx.get(),
+                this->storage_offsets.get_const_data(),
+                this->row_descs.get_const_data(),
+                this->storage.get_const_data(), diag_idxs.get_data(),
+                transpose_idxs.get_data(), this->combined.get());
+
+            gko::kernels::reference::cholesky::factorize(
+                this->ref, this->storage_offsets.get_const_data(),
+                this->row_descs.get_const_data(),
+                this->storage.get_const_data(), diag_idxs.get_data(),
+                transpose_idxs.get_data(), *this->forest, this->combined.get(),
+                tmp);
+
+            GKO_ASSERT_MTX_NEAR(this->combined, this->combined_ref,
+                                r<value_type>::value);
+        },
+        false);
 }
 
 
@@ -577,23 +471,25 @@ TYPED_TEST(Cholesky, FactorizeWorks)
 {
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    this->forall_matrices([this] {
-        auto factory =
-            gko::experimental::factorization::Cholesky<value_type,
-                                                       index_type>::build()
-                .on(this->ref);
-
-        auto cholesky = factory->generate(this->mtx);
-
-        GKO_ASSERT_MTX_NEAR(cholesky->get_combined(), this->combined_ref,
-                            r<value_type>::value);
-        ASSERT_EQ(cholesky->get_storage_type(),
-                  gko::experimental::factorization::storage_type::
-                      symm_combined_cholesky);
-        ASSERT_EQ(cholesky->get_lower_factor(), nullptr);
-        ASSERT_EQ(cholesky->get_upper_factor(), nullptr);
-        ASSERT_EQ(cholesky->get_diagonal(), nullptr);
-    });
+    this->forall_matrices(
+        [this] {
+            auto factory =
+                gko::experimental::factorization::Cholesky<value_type,
+                                                           index_type>::build()
+                    .on(this->ref);
+
+            auto cholesky = factory->generate(this->mtx);
+
+            GKO_ASSERT_MTX_NEAR(cholesky->get_combined(), this->combined_ref,
+                                r<value_type>::value);
+            ASSERT_EQ(cholesky->get_storage_type(),
+                      gko::experimental::factorization::storage_type::
+                          symm_combined_cholesky);
+            ASSERT_EQ(cholesky->get_lower_factor(), nullptr);
+            ASSERT_EQ(cholesky->get_upper_factor(), nullptr);
+            ASSERT_EQ(cholesky->get_diagonal(), nullptr);
+        },
+        false);
 }
 
 
@@ -601,28 +497,30 @@ TYPED_TEST(Cholesky, FactorizeWithKnownSparsityWorks)
 {
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    this->forall_matrices([this] {
-        auto pattern =
-            gko::share(gko::matrix::SparsityCsr<value_type, index_type>::create(
-                this->ref));
-        pattern->copy_from(this->combined_ref.get());
-        auto factory =
-            gko::experimental::factorization::Cholesky<value_type,
-                                                       index_type>::build()
-                .with_symbolic_factorization(pattern)
-                .on(this->ref);
-
-        auto cholesky = factory->generate(this->mtx);
-
-        GKO_ASSERT_MTX_NEAR(cholesky->get_combined(), this->combined_ref,
-                            r<value_type>::value);
-        ASSERT_EQ(cholesky->get_storage_type(),
-                  gko::experimental::factorization::storage_type::
-                      symm_combined_cholesky);
-        ASSERT_EQ(cholesky->get_lower_factor(), nullptr);
-        ASSERT_EQ(cholesky->get_upper_factor(), nullptr);
-        ASSERT_EQ(cholesky->get_diagonal(), nullptr);
-    });
+    this->forall_matrices(
+        [this] {
+            auto pattern = gko::share(
+                gko::matrix::SparsityCsr<value_type, index_type>::create(
+                    this->ref));
+            pattern->copy_from(this->combined_ref.get());
+            auto factory =
+                gko::experimental::factorization::Cholesky<value_type,
+                                                           index_type>::build()
+                    .with_symbolic_factorization(pattern)
+                    .on(this->ref);
+
+            auto cholesky = factory->generate(this->mtx);
+
+            GKO_ASSERT_MTX_NEAR(cholesky->get_combined(), this->combined_ref,
+                                r<value_type>::value);
+            ASSERT_EQ(cholesky->get_storage_type(),
+                      gko::experimental::factorization::storage_type::
+                          symm_combined_cholesky);
+            ASSERT_EQ(cholesky->get_lower_factor(), nullptr);
+            ASSERT_EQ(cholesky->get_upper_factor(), nullptr);
+            ASSERT_EQ(cholesky->get_diagonal(), nullptr);
+        },
+        false);
 }
 
 

From 93a3ef84a20bd212436050d641a51ef0fb722196 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Tue, 6 Jun 2023 15:02:49 +0200
Subject: [PATCH 086/583] use column Cholesky for GPU

---
 .../factorization/cholesky_kernels.hpp.inc    | 47 +++++++------------
 1 file changed, 16 insertions(+), 31 deletions(-)

diff --git a/common/cuda_hip/factorization/cholesky_kernels.hpp.inc b/common/cuda_hip/factorization/cholesky_kernels.hpp.inc
index f87969a7ad0..eb90127a8ca 100644
--- a/common/cuda_hip/factorization/cholesky_kernels.hpp.inc
+++ b/common/cuda_hip/factorization/cholesky_kernels.hpp.inc
@@ -149,8 +149,6 @@ __global__ __launch_bounds__(default_block_size) void symbolic_factorize(
 template <typename ValueType, typename IndexType>
 __global__ __launch_bounds__(default_block_size) void factorize(
     const IndexType* __restrict__ row_ptrs, const IndexType* __restrict__ cols,
-    const IndexType* __restrict__ elim_tree_child_ptrs,
-    const IndexType* __restrict__ elim_tree_children,
     const IndexType* __restrict__ storage_offsets,
     const int32* __restrict__ storage, const int64* __restrict__ row_descs,
     const IndexType* __restrict__ diag_idxs,
@@ -171,32 +169,21 @@ __global__ __launch_bounds__(default_block_size) void factorize(
     const auto row_begin = row_ptrs[row];
     const auto row_diag = diag_idxs[row];
     const auto row_end = row_ptrs[row + 1];
-    const auto child_begin = elim_tree_child_ptrs[row];
-    const auto child_end = elim_tree_child_ptrs[row + 1];
     gko::matrix::csr::device_sparsity_lookup<IndexType> lookup{
         row_ptrs, cols,      storage_offsets,
         storage,  row_descs, static_cast<size_type>(row)};
-    for (auto child = child_begin; child < child_end; child++) {
-        const auto dep = elim_tree_children[child];
-        scheduler.wait(dep);
-        // TODO evaluate parallel waiting with __all_sync
-    }
-    // for each lower triangular entry: eliminate with corresponding row
+    // for each lower triangular entry: eliminate with corresponding column
     for (auto lower_nz = row_begin; lower_nz < row_diag; lower_nz++) {
         const auto dep = cols[lower_nz];
-        auto val = vals[lower_nz];
+        scheduler.wait(dep);
+        const auto scale = vals[lower_nz];
         const auto diag_idx = diag_idxs[dep];
         const auto dep_end = row_ptrs[dep + 1];
-        const auto diag = vals[diag_idx];
-        const auto scale = val / diag;
-        if (lane == 0) {
-            vals[lower_nz] = scale;
-        }
-        // subtract all entries past the diagonal
-        for (auto upper_nz = diag_idx + 1 + lane; upper_nz < dep_end;
+        // subtract column dep from current column
+        for (auto upper_nz = diag_idx + lane; upper_nz < dep_end;
              upper_nz += config::warp_size) {
             const auto upper_col = cols[upper_nz];
-            if (upper_col < row) {
+            if (upper_col >= row) {
                 const auto upper_val = vals[upper_nz];
                 const auto output_pos =
                     lookup.lookup_unsafe(upper_col) + row_begin;
@@ -204,17 +191,16 @@ __global__ __launch_bounds__(default_block_size) void factorize(
             }
         }
     }
-    ValueType sum{};
-    for (auto lower_nz = row_begin + lane; lower_nz < row_diag;
-         lower_nz += config::warp_size) {
-        sum += squared_norm(vals[lower_nz]);
-        // copy the lower triangular entries to the transpose
-        vals[transpose_idxs[lower_nz]] = conj(vals[lower_nz]);
+    auto diag_val = sqrt(vals[row_diag]);
+    for (auto upper_nz = row_diag + 1 + lane; upper_nz < row_end;
+         upper_nz += config::warp_size) {
+        vals[upper_nz] /= diag_val;
+        // copy the upper triangular entries to the transpose
+        vals[transpose_idxs[upper_nz]] = conj(vals[upper_nz]);
     }
-    sum = reduce(warp, sum, thrust::plus<ValueType>{});
     if (lane == 0) {
         // store computed diagonal
-        vals[row_diag] = sqrt(vals[row_diag] - sum);
+        vals[row_diag] = diag_val;
     }
     scheduler.mark_ready();
 }
@@ -365,10 +351,9 @@ void factorize(std::shared_ptr<const DefaultExecutor> exec,
         kernel::factorize<<<num_blocks, default_block_size, 0,
                             exec->get_stream()>>>(
             factors->get_const_row_ptrs(), factors->get_const_col_idxs(),
-            forest.child_ptrs.get_const_data(),
-            forest.children.get_const_data(), lookup_offsets, lookup_storage,
-            lookup_descs, diag_idxs, transpose_idxs,
-            as_device_type(factors->get_values()), storage, num_rows);
+            lookup_offsets, lookup_storage, lookup_descs, diag_idxs,
+            transpose_idxs, as_device_type(factors->get_values()), storage,
+            num_rows);
     }
 }
 

From 4d28ade0809b89d7ae7fa8a99d2de1d91485f1f4 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Fri, 21 Jul 2023 17:59:18 +0200
Subject: [PATCH 087/583] adds check that downstream compiler match the ginkgo
 compiler

---
 cmake/GinkgoConfig.cmake.in | 33 +++++++++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/cmake/GinkgoConfig.cmake.in b/cmake/GinkgoConfig.cmake.in
index 44aaf34fc3f..5194d76b5af 100644
--- a/cmake/GinkgoConfig.cmake.in
+++ b/cmake/GinkgoConfig.cmake.in
@@ -63,7 +63,6 @@ set(GINKGO_JACOBI_FULL_OPTIMIZATIONS @GINKGO_JACOBI_FULL_OPTIMIZATIONS@)
 
 set(GINKGO_CUDA_ARCHITECTURES "@GINKGO_CUDA_ARCHITECTURES@")
 set(GINKGO_CUDA_DEFAULT_HOST_COMPILER @GINKGO_CUDA_DEFAULT_HOST_COMPILER@)
-set(GINKGO_CUDA_HOST_COMPILER "@CMAKE_CUDA_HOST_COMPILER@")
 set(GINKGO_CUDA_ARCH_FLAGS "@GINKGO_CUDA_ARCH_FLAGS@")
 
 set(GINKGO_HIP_COMPILER_FLAGS "@GINKGO_HIP_COMPILER_FLAGS@")
@@ -91,6 +90,15 @@ set(GINKGO_HAVE_HWLOC @GINKGO_HAVE_HWLOC@)
 
 set(GINKGO_HAVE_ROCTX @GINKGO_HAVE_ROCTX@)
 
+# Ginkgo compiler information
+set(GINKGO_CXX_COMPILER "@CMAKE_CXX_COMPILER@")
+set(GINKGO_CXX_COMPILER_SHORT "@CMAKE_CXX_COMPILER_ID@:@CMAKE_CXX_COMPILER_VERSION@")
+set(GINKGO_CUDA_COMPILER "@CMAKE_CUDA_COMPILER@")
+set(GINKGO_CUDA_COMPILER_SHORT "@CMAKE_CUDA_COMPILER_ID@:@CMAKE_CUDA_COMPILER_VERSION@")
+set(GINKGO_CUDA_HOST_COMPILER "@CMAKE_CUDA_HOST_COMPILER@")
+set(GINKGO_CUDA_HOST_COMPILER_SHORT "")  # dummy value to stay consistent
+set(GINKGO_HIP_COMPILER "@HIP_HIPCC@")
+
 # Ginkgo installation configuration
 set(GINKGO_INSTALL_PREFIX "@PACKAGE_CMAKE_INSTALL_PREFIX@")
 set(GINKGO_INSTALL_INCLUDE_DIR "@PACKAGE_CMAKE_INSTALL_FULL_INCLUDEDIR@")
@@ -107,7 +115,6 @@ if(GINKGO_BUILD_HIP)
 endif()
 list(APPEND CMAKE_PREFIX_PATH "${GINKGO_INSTALL_PREFIX}")
 
-
 set(GINKGO_INTERFACE_LINK_LIBRARIES "@GINKGO_INTERFACE_LINK_LIBRARIES@")
 set(GINKGO_INTERFACE_LINK_FLAGS "@GINKGO_INTERFACE_LINK_FLAGS@")
 set(GINKGO_INTERFACE_CXX_FLAGS "@GINKGO_INTERFACE_CXX_FLAGS@")
@@ -207,4 +214,26 @@ if((NOT GINKGO_BUILD_SHARED_LIBS) AND GINKGO_HAVE_TAU)
     find_package(PerfStubs REQUIRED)
 endif()
 
+# Check that the same compilers as for Ginkgo are used
+function(_ginkgo_check_compiler lang)
+    if(NOT ${CMAKE_${lang}_COMPILER} STREQUAL ${GINKGO_${lang}_COMPILER})
+        set(_compiler_short "${CMAKE_${lang}_COMPILER_ID}:${CMAKE_${lang}_COMPILER_VERSION}")
+        if(NOT _compiler_short STREQUAL ${GINKGO_${lang}_COMPILER_SHORT})
+            message(WARNING "The currently used ${lang} compiler: ${CMAKE_${lang}_COMPILER} does not match the compiler used to "
+                            "build Ginkgo: ${GINKGO_${lang}_COMPILER}. It is encouraged to use the same compiler as Ginkgo to prevent ABI mismatch.")
+        endif()
+    endif()
+endfunction()
+_ginkgo_check_compiler(CXX)
+if(GINKGO_BUILD_CUDA)
+    _ginkgo_check_compiler(CUDA)
+endif()
+if(GINKGO_BUILD_HIP)
+    _ginkgo_check_compiler(HIP)
+    if(NOT HIP_HIPCC STREQUAL ${GINKGO_HIP_COMPILER})
+        message(WARNING "The currently used HIP compiler: ${HIP_HIPCC} does not match the compiler used to "
+                        "build Ginkgo: ${GINKGO_HIP_COMPILER}. It is encouraged to use the same compiler as Ginkgo to prevent ABI mismatch.")
+    endif()
+endif()
+
 include(${CMAKE_CURRENT_LIST_DIR}/GinkgoTargets.cmake)

From d40aca8108b0610a760acb61ab7161e34de58838 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Mon, 24 Jul 2023 10:29:11 +0200
Subject: [PATCH 088/583] don't check hip compiler until cmake update

---
 cmake/GinkgoConfig.cmake.in | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/cmake/GinkgoConfig.cmake.in b/cmake/GinkgoConfig.cmake.in
index 5194d76b5af..ea251a64b86 100644
--- a/cmake/GinkgoConfig.cmake.in
+++ b/cmake/GinkgoConfig.cmake.in
@@ -97,7 +97,6 @@ set(GINKGO_CUDA_COMPILER "@CMAKE_CUDA_COMPILER@")
 set(GINKGO_CUDA_COMPILER_SHORT "@CMAKE_CUDA_COMPILER_ID@:@CMAKE_CUDA_COMPILER_VERSION@")
 set(GINKGO_CUDA_HOST_COMPILER "@CMAKE_CUDA_HOST_COMPILER@")
 set(GINKGO_CUDA_HOST_COMPILER_SHORT "")  # dummy value to stay consistent
-set(GINKGO_HIP_COMPILER "@HIP_HIPCC@")
 
 # Ginkgo installation configuration
 set(GINKGO_INSTALL_PREFIX "@PACKAGE_CMAKE_INSTALL_PREFIX@")
@@ -228,12 +227,5 @@ _ginkgo_check_compiler(CXX)
 if(GINKGO_BUILD_CUDA)
     _ginkgo_check_compiler(CUDA)
 endif()
-if(GINKGO_BUILD_HIP)
-    _ginkgo_check_compiler(HIP)
-    if(NOT HIP_HIPCC STREQUAL ${GINKGO_HIP_COMPILER})
-        message(WARNING "The currently used HIP compiler: ${HIP_HIPCC} does not match the compiler used to "
-                        "build Ginkgo: ${GINKGO_HIP_COMPILER}. It is encouraged to use the same compiler as Ginkgo to prevent ABI mismatch.")
-    endif()
-endif()
 
 include(${CMAKE_CURRENT_LIST_DIR}/GinkgoTargets.cmake)

From 00d5cf6519730688c2d036af481aa5d7e9bc3306 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Mon, 24 Jul 2023 10:29:23 +0200
Subject: [PATCH 089/583] also check cuda host compiler

---
 cmake/GinkgoConfig.cmake.in | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cmake/GinkgoConfig.cmake.in b/cmake/GinkgoConfig.cmake.in
index ea251a64b86..fe2ac05d7e5 100644
--- a/cmake/GinkgoConfig.cmake.in
+++ b/cmake/GinkgoConfig.cmake.in
@@ -217,7 +217,7 @@ endif()
 function(_ginkgo_check_compiler lang)
     if(NOT ${CMAKE_${lang}_COMPILER} STREQUAL ${GINKGO_${lang}_COMPILER})
         set(_compiler_short "${CMAKE_${lang}_COMPILER_ID}:${CMAKE_${lang}_COMPILER_VERSION}")
-        if(NOT _compiler_short STREQUAL ${GINKGO_${lang}_COMPILER_SHORT})
+        if(NOT _compiler_short STREQUAL "${GINKGO_${lang}_COMPILER_SHORT}")
             message(WARNING "The currently used ${lang} compiler: ${CMAKE_${lang}_COMPILER} does not match the compiler used to "
                             "build Ginkgo: ${GINKGO_${lang}_COMPILER}. It is encouraged to use the same compiler as Ginkgo to prevent ABI mismatch.")
         endif()
@@ -226,6 +226,7 @@ endfunction()
 _ginkgo_check_compiler(CXX)
 if(GINKGO_BUILD_CUDA)
     _ginkgo_check_compiler(CUDA)
+    _ginkgo_check_compiler(CUDA_HOST)
 endif()
 
 include(${CMAKE_CURRENT_LIST_DIR}/GinkgoTargets.cmake)

From e9b9f68bb8d89148a2f83c542b69a738b03cdaa4 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Mon, 31 Jul 2023 16:51:33 +0200
Subject: [PATCH 090/583] messy approach to splitting up files

---
 CMakeLists.txt                                |   2 +
 cmake/template_instantiation.cmake            |  60 ++++++++++
 common/CMakeLists.txt                         |  34 +-----
 common/unified/CMakeLists.txt                 |  34 ++++++
 .../matrix/dense_kernels.instantiate.cpp      | 108 ++++++++++++++++++
 .../{dense_kernels.cpp => dense_kernels.tpp}  |  83 --------------
 omp/CMakeLists.txt                            |   1 +
 7 files changed, 208 insertions(+), 114 deletions(-)
 create mode 100644 cmake/template_instantiation.cmake
 create mode 100644 common/unified/CMakeLists.txt
 create mode 100644 common/unified/matrix/dense_kernels.instantiate.cpp
 rename common/unified/matrix/{dense_kernels.cpp => dense_kernels.tpp} (87%)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6351ce98bfa..809c39991bb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -304,6 +304,8 @@ configure_file(${Ginkgo_SOURCE_DIR}/include/ginkgo/config.hpp.in
 # propagated to the other parts of Ginkgo in case of building as static libraries
 add_subdirectory(devices)        # Basic device functionalities. Always compiled.
 add_subdirectory(common)         # Import list of unified kernel source files
+set_source_files_properties(${GKO_UNIFIED_COMMON_INSTANTIATE_SOURCES} PROPERTIES GENERATED 1)
+message("${GKO_UNIFIED_COMMON_INSTANTIATE_SOURCES}")
 if(GINKGO_BUILD_CUDA)
     add_subdirectory(cuda)       # High-performance kernels for NVIDIA GPUs
 endif()
diff --git a/cmake/template_instantiation.cmake b/cmake/template_instantiation.cmake
new file mode 100644
index 00000000000..af5c395279c
--- /dev/null
+++ b/cmake/template_instantiation.cmake
@@ -0,0 +1,60 @@
+cmake_minimum_required(VERSION 3.13)
+function(add_instantiation_files source_file output_files_var)
+    file(READ "${source_file}" file_contents)
+    string(REPLACE ";" "<semicolon>" file_contents "${file_contents}")
+    string(REGEX REPLACE "[\r\n]" ";" file_contents "${file_contents}")
+    set(begin_location)
+    set(end_location)
+    set(split_locations)
+    list(LENGTH file_contents total_length)
+    set(counter 0)
+    foreach(line IN LISTS file_contents)
+        if(line MATCHES "// begin")
+            set(begin_location ${counter})
+        elseif(line MATCHES "// split")
+            list(APPEND split_locations ${counter})
+        elseif(line MATCHES "// end")
+            set(end_location ${counter})
+        endif()
+        math(EXPR counter "${counter} + 1")
+    endforeach()
+    if (NOT (begin_location AND end_location AND split_locations))
+        message(FATAL_ERROR "Nothing to split")
+    endif()
+    if (begin_location GREATER_EQUAL end_location)
+        message(FATAL_ERROR "Incorrect begin/end order")
+    endif()
+    set(range_begins ${begin_location} ${split_locations})
+    set(range_ends ${split_locations} ${end_location})
+    list(LENGTH begin_locations range_count)
+    list(LENGTH split_locations range_count_minus_one)
+    math(EXPR length_header "${begin_location}")
+    math(EXPR end_location_past "${end_location} + 1")
+    math(EXPR length_footer "${total_length} - ${end_location_past}")
+    list(SUBLIST file_contents 0 ${length_header} header)
+    list(SUBLIST file_contents ${end_location_past} ${length_footer} footer)
+    set(output_files)
+    foreach(range RANGE 0 ${range_count_minus_one})
+        set(filename "${source_file}.${range}.cpp")
+        list(APPEND output_files "${filename}")
+        list(GET range_begins ${range} begin)
+        list(GET range_ends ${range} end)
+        math(EXPR begin "${begin} + 1")
+        math(EXPR length "${end} - ${begin}")
+        list(SUBLIST file_contents ${begin} ${length} content)
+        string(REPLACE ";" "\n" content "${header};${content};${footer}")
+        string(REPLACE "<semicolon>" ";" content "${content}")
+        # create a .tmp file, but only copy it over if source file changed
+        # this way, we don't rebuild unnecessarily
+        file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/${filename}.tmp" "${content}")
+        add_custom_command(
+            OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${filename}"
+            COMMAND ${CMAKE_COMMAND}
+            -E copy "${CMAKE_CURRENT_BINARY_DIR}/${filename}.tmp"
+                    "${CMAKE_CURRENT_BINARY_DIR}/${filename}"
+            MAIN_DEPENDENCY "${source_file}")
+    endforeach()
+    # lazy workaround to make cmake generation depend on the source file
+    configure_file("${source_file}", "${source_file}.tmp" COPYONLY)
+    set(${output_files_var} ${output_files} PARENT_SCOPE)
+endfunction()
diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt
index 3a7cb1ceb15..8512e05d07a 100644
--- a/common/CMakeLists.txt
+++ b/common/CMakeLists.txt
@@ -1,31 +1,3 @@
-set(UNIFIED_SOURCES
-    base/device_matrix_data_kernels.cpp
-    base/index_set_kernels.cpp
-    components/absolute_array_kernels.cpp
-    components/fill_array_kernels.cpp
-    components/format_conversion_kernels.cpp
-    components/precision_conversion_kernels.cpp
-    components/reduce_array_kernels.cpp
-    distributed/partition_kernels.cpp
-    matrix/coo_kernels.cpp
-    matrix/csr_kernels.cpp
-    matrix/dense_kernels.cpp
-    matrix/ell_kernels.cpp
-    matrix/hybrid_kernels.cpp
-    matrix/sellp_kernels.cpp
-    matrix/sparsity_csr_kernels.cpp
-    matrix/diagonal_kernels.cpp
-    multigrid/pgm_kernels.cpp
-    preconditioner/jacobi_kernels.cpp
-    solver/bicg_kernels.cpp
-    solver/bicgstab_kernels.cpp
-    solver/cg_kernels.cpp
-    solver/cgs_kernels.cpp
-    solver/common_gmres_kernels.cpp
-    solver/fcg_kernels.cpp
-    solver/gcr_kernels.cpp
-    solver/gmres_kernels.cpp
-    solver/ir_kernels.cpp
-    )
-list(TRANSFORM UNIFIED_SOURCES PREPEND ${CMAKE_CURRENT_SOURCE_DIR}/unified/)
-set(GKO_UNIFIED_COMMON_SOURCES ${UNIFIED_SOURCES} PARENT_SCOPE)
+add_subdirectory(unified)
+set(GKO_UNIFIED_COMMON_SOURCES ${GKO_UNIFIED_COMMON_SOURCES} PARENT_SCOPE)
+set(GKO_UNIFIED_COMMON_INSTANTIATE_SOURCES ${GKO_UNIFIED_COMMON_INSTANTIATE_SOURCES} PARENT_SCOPE)
diff --git a/common/unified/CMakeLists.txt b/common/unified/CMakeLists.txt
new file mode 100644
index 00000000000..a9f45c63f13
--- /dev/null
+++ b/common/unified/CMakeLists.txt
@@ -0,0 +1,34 @@
+include(../../cmake/template_instantiation.cmake)
+add_instantiation_files(matrix/dense_kernels.instantiate.cpp UNIFIED_INSTANTIATE_SOURCES)
+set(UNIFIED_SOURCES
+    base/device_matrix_data_kernels.cpp
+    base/index_set_kernels.cpp
+    components/absolute_array_kernels.cpp
+    components/fill_array_kernels.cpp
+    components/format_conversion_kernels.cpp
+    components/precision_conversion_kernels.cpp
+    components/reduce_array_kernels.cpp
+    distributed/partition_kernels.cpp
+    matrix/coo_kernels.cpp
+    matrix/csr_kernels.cpp
+    matrix/ell_kernels.cpp
+    matrix/hybrid_kernels.cpp
+    matrix/sellp_kernels.cpp
+    matrix/sparsity_csr_kernels.cpp
+    matrix/diagonal_kernels.cpp
+    multigrid/pgm_kernels.cpp
+    preconditioner/jacobi_kernels.cpp
+    solver/bicg_kernels.cpp
+    solver/bicgstab_kernels.cpp
+    solver/cg_kernels.cpp
+    solver/cgs_kernels.cpp
+    solver/common_gmres_kernels.cpp
+    solver/fcg_kernels.cpp
+    solver/gcr_kernels.cpp
+    solver/gmres_kernels.cpp
+    solver/ir_kernels.cpp
+    )
+list(TRANSFORM UNIFIED_SOURCES PREPEND ${CMAKE_CURRENT_SOURCE_DIR}/)
+list(TRANSFORM UNIFIED_INSTANTIATE_SOURCES PREPEND ${CMAKE_CURRENT_BINARY_DIR}/)
+set(GKO_UNIFIED_COMMON_SOURCES ${UNIFIED_SOURCES} PARENT_SCOPE)
+set(GKO_UNIFIED_COMMON_INSTANTIATE_SOURCES ${UNIFIED_INSTANTIATE_SOURCES} PARENT_SCOPE)
\ No newline at end of file
diff --git a/common/unified/matrix/dense_kernels.instantiate.cpp b/common/unified/matrix/dense_kernels.instantiate.cpp
new file mode 100644
index 00000000000..92d9fa26a00
--- /dev/null
+++ b/common/unified/matrix/dense_kernels.instantiate.cpp
@@ -0,0 +1,108 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "common/unified/matrix/dense_kernels.tpp"
+
+
+namespace gko {
+namespace kernels {
+namespace GKO_DEVICE_NAMESPACE {
+namespace dense {
+
+
+// begin
+GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION_OR_COPY(
+    GKO_DECLARE_DENSE_COPY_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_FILL_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_DENSE_FILL_IN_MATRIX_DATA_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_SCALAR_TYPE(GKO_DECLARE_DENSE_SCALE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_SCALAR_TYPE(
+    GKO_DECLARE_DENSE_INV_SCALE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_SCALAR_TYPE(
+    GKO_DECLARE_DENSE_ADD_SCALED_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_SCALAR_TYPE(
+    GKO_DECLARE_DENSE_SUB_SCALED_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_ADD_SCALED_DIAG_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_SUB_SCALED_DIAG_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_SQRT_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_DENSE_SYMM_PERMUTE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_DENSE_INV_SYMM_PERMUTE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2(
+    GKO_DECLARE_DENSE_ROW_GATHER_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2(
+    GKO_DECLARE_DENSE_ADVANCED_ROW_GATHER_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_DENSE_COLUMN_PERMUTE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_DENSE_INV_ROW_PERMUTE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_DENSE_INV_COLUMN_PERMUTE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_EXTRACT_DIAGONAL_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_INPLACE_ABSOLUTE_DENSE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_OUTPLACE_ABSOLUTE_DENSE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_MAKE_COMPLEX_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GET_REAL_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GET_IMAG_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_SCALAR_TYPE(
+    GKO_DECLARE_DENSE_ADD_SCALED_IDENTITY_KERNEL);
+// split
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_DOT_KERNEL);
+// split
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL);
+// split
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM2_KERNEL);
+// split
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM1_KERNEL);
+// split
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_DENSE_COMPUTE_MAX_NNZ_PER_ROW_KERNEL);
+// split
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_DENSE_COMPUTE_SLICE_SETS_KERNEL);
+// split
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_DENSE_COUNT_NONZEROS_PER_ROW_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_DENSE_COUNT_NONZEROS_PER_ROW_KERNEL_SIZE_T);
+// split
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_DENSE_COMPUTE_SQUARED_NORM2_KERNEL);
+// end
+
+
+}  // namespace dense
+}  // namespace GKO_DEVICE_NAMESPACE
+}  // namespace kernels
+}  // namespace gko
\ No newline at end of file
diff --git a/common/unified/matrix/dense_kernels.cpp b/common/unified/matrix/dense_kernels.tpp
similarity index 87%
rename from common/unified/matrix/dense_kernels.cpp
rename to common/unified/matrix/dense_kernels.tpp
index 18d2fbabe6c..b6ed5fb37e0 100644
--- a/common/unified/matrix/dense_kernels.cpp
+++ b/common/unified/matrix/dense_kernels.tpp
@@ -67,9 +67,6 @@ void copy(std::shared_ptr<const DefaultExecutor> exec,
         input->get_size(), input, output);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION_OR_COPY(
-    GKO_DECLARE_DENSE_COPY_KERNEL);
-
 
 template <typename ValueType>
 void fill(std::shared_ptr<const DefaultExecutor> exec,
@@ -83,8 +80,6 @@ void fill(std::shared_ptr<const DefaultExecutor> exec,
         mat->get_size(), mat, value);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_FILL_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void fill_in_matrix_data(std::shared_ptr<const DefaultExecutor> exec,
@@ -100,9 +95,6 @@ void fill_in_matrix_data(std::shared_ptr<const DefaultExecutor> exec,
         data.get_const_col_idxs(), data.get_const_values(), output);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_DENSE_FILL_IN_MATRIX_DATA_KERNEL);
-
 
 template <typename ValueType, typename ScalarType>
 void scale(std::shared_ptr<const DefaultExecutor> exec,
@@ -125,8 +117,6 @@ void scale(std::shared_ptr<const DefaultExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_SCALAR_TYPE(GKO_DECLARE_DENSE_SCALE_KERNEL);
-
 
 template <typename ValueType, typename ScalarType>
 void inv_scale(std::shared_ptr<const DefaultExecutor> exec,
@@ -150,9 +140,6 @@ void inv_scale(std::shared_ptr<const DefaultExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_SCALAR_TYPE(
-    GKO_DECLARE_DENSE_INV_SCALE_KERNEL);
-
 
 template <typename ValueType, typename ScalarType>
 void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
@@ -176,9 +163,6 @@ void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_SCALAR_TYPE(
-    GKO_DECLARE_DENSE_ADD_SCALED_KERNEL);
-
 
 template <typename ValueType, typename ScalarType>
 void sub_scaled(std::shared_ptr<const DefaultExecutor> exec,
@@ -202,9 +186,6 @@ void sub_scaled(std::shared_ptr<const DefaultExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_SCALAR_TYPE(
-    GKO_DECLARE_DENSE_SUB_SCALED_KERNEL);
-
 
 template <typename ValueType>
 void add_scaled_diag(std::shared_ptr<const DefaultExecutor> exec,
@@ -221,8 +202,6 @@ void add_scaled_diag(std::shared_ptr<const DefaultExecutor> exec,
         x->get_size()[0], alpha->get_const_values(), x->get_const_values(), y);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_ADD_SCALED_DIAG_KERNEL);
-
 
 template <typename ValueType>
 void sub_scaled_diag(std::shared_ptr<const DefaultExecutor> exec,
@@ -239,8 +218,6 @@ void sub_scaled_diag(std::shared_ptr<const DefaultExecutor> exec,
         x->get_size()[0], alpha->get_const_values(), x->get_const_values(), y);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_SUB_SCALED_DIAG_KERNEL);
-
 
 template <typename ValueType>
 void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
@@ -257,8 +234,6 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
         tmp, x, y);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_DOT_KERNEL);
-
 
 template <typename ValueType>
 void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
@@ -275,8 +250,6 @@ void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
         tmp, x, y);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL);
-
 
 template <typename ValueType>
 void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
@@ -292,8 +265,6 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
         result->get_values(), x->get_size(), tmp, x);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM2_KERNEL);
-
 template <typename ValueType>
 void compute_norm1(std::shared_ptr<const DefaultExecutor> exec,
                    const matrix::Dense<ValueType>* x,
@@ -306,8 +277,6 @@ void compute_norm1(std::shared_ptr<const DefaultExecutor> exec,
         x->get_size(), tmp, x);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM1_KERNEL);
-
 
 template <typename ValueType>
 void compute_max_nnz_per_row(std::shared_ptr<const DefaultExecutor> exec,
@@ -325,9 +294,6 @@ void compute_max_nnz_per_row(std::shared_ptr<const DefaultExecutor> exec,
                                     source->get_size()[0]);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_DENSE_COMPUTE_MAX_NNZ_PER_ROW_KERNEL);
-
 
 template <typename ValueType>
 void compute_slice_sets(std::shared_ptr<const DefaultExecutor> exec,
@@ -357,9 +323,6 @@ void compute_slice_sets(std::shared_ptr<const DefaultExecutor> exec,
     components::prefix_sum_nonnegative(exec, slice_sets, num_slices + 1);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_DENSE_COMPUTE_SLICE_SETS_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void count_nonzeros_per_row(std::shared_ptr<const DefaultExecutor> exec,
@@ -374,11 +337,6 @@ void count_nonzeros_per_row(std::shared_ptr<const DefaultExecutor> exec,
         GKO_KERNEL_REDUCE_SUM(IndexType), result, 1, mtx->get_size(), mtx);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_DENSE_COUNT_NONZEROS_PER_ROW_KERNEL);
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_DENSE_COUNT_NONZEROS_PER_ROW_KERNEL_SIZE_T);
-
 
 template <typename ValueType>
 void compute_squared_norm2(std::shared_ptr<const DefaultExecutor> exec,
@@ -393,9 +351,6 @@ void compute_squared_norm2(std::shared_ptr<const DefaultExecutor> exec,
         x->get_size(), tmp, x);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_DENSE_COMPUTE_SQUARED_NORM2_KERNEL);
-
 
 template <typename ValueType>
 void compute_sqrt(std::shared_ptr<const DefaultExecutor> exec,
@@ -409,8 +364,6 @@ void compute_sqrt(std::shared_ptr<const DefaultExecutor> exec,
         x->get_size(), x);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_SQRT_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void symm_permute(std::shared_ptr<const DefaultExecutor> exec,
@@ -426,9 +379,6 @@ void symm_permute(std::shared_ptr<const DefaultExecutor> exec,
         orig->get_size(), orig, *permutation_indices, permuted);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_DENSE_SYMM_PERMUTE_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void inv_symm_permute(std::shared_ptr<const DefaultExecutor> exec,
@@ -444,9 +394,6 @@ void inv_symm_permute(std::shared_ptr<const DefaultExecutor> exec,
         orig->get_size(), orig, *permutation_indices, permuted);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_DENSE_INV_SYMM_PERMUTE_KERNEL);
-
 
 template <typename ValueType, typename OutputType, typename IndexType>
 void row_gather(std::shared_ptr<const DefaultExecutor> exec,
@@ -463,9 +410,6 @@ void row_gather(std::shared_ptr<const DefaultExecutor> exec,
         row_collection);
 }
 
-GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2(
-    GKO_DECLARE_DENSE_ROW_GATHER_KERNEL);
-
 
 template <typename ValueType, typename OutputType, typename IndexType>
 void advanced_row_gather(std::shared_ptr<const DefaultExecutor> exec,
@@ -490,9 +434,6 @@ void advanced_row_gather(std::shared_ptr<const DefaultExecutor> exec,
         row_collection);
 }
 
-GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2(
-    GKO_DECLARE_DENSE_ADVANCED_ROW_GATHER_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void column_permute(std::shared_ptr<const DefaultExecutor> exec,
@@ -508,9 +449,6 @@ void column_permute(std::shared_ptr<const DefaultExecutor> exec,
         orig->get_size(), orig, *permutation_indices, column_permuted);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_DENSE_COLUMN_PERMUTE_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void inverse_row_permute(std::shared_ptr<const DefaultExecutor> exec,
@@ -526,9 +464,6 @@ void inverse_row_permute(std::shared_ptr<const DefaultExecutor> exec,
         orig->get_size(), orig, *permutation_indices, row_permuted);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_DENSE_INV_ROW_PERMUTE_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void inverse_column_permute(std::shared_ptr<const DefaultExecutor> exec,
@@ -544,9 +479,6 @@ void inverse_column_permute(std::shared_ptr<const DefaultExecutor> exec,
         orig->get_size(), orig, *permutation_indices, column_permuted);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_DENSE_INV_COLUMN_PERMUTE_KERNEL);
-
 
 template <typename ValueType>
 void extract_diagonal(std::shared_ptr<const DefaultExecutor> exec,
@@ -559,8 +491,6 @@ void extract_diagonal(std::shared_ptr<const DefaultExecutor> exec,
         diag->get_size()[0], orig, diag->get_values());
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_EXTRACT_DIAGONAL_KERNEL);
-
 
 template <typename ValueType>
 void inplace_absolute_dense(std::shared_ptr<const DefaultExecutor> exec,
@@ -574,8 +504,6 @@ void inplace_absolute_dense(std::shared_ptr<const DefaultExecutor> exec,
         source->get_size(), source);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_INPLACE_ABSOLUTE_DENSE_KERNEL);
-
 
 template <typename ValueType>
 void outplace_absolute_dense(std::shared_ptr<const DefaultExecutor> exec,
@@ -590,8 +518,6 @@ void outplace_absolute_dense(std::shared_ptr<const DefaultExecutor> exec,
         source->get_size(), source, result);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_OUTPLACE_ABSOLUTE_DENSE_KERNEL);
-
 
 template <typename ValueType>
 void make_complex(std::shared_ptr<const DefaultExecutor> exec,
@@ -606,8 +532,6 @@ void make_complex(std::shared_ptr<const DefaultExecutor> exec,
         source->get_size(), source, result);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_MAKE_COMPLEX_KERNEL);
-
 
 template <typename ValueType>
 void get_real(std::shared_ptr<const DefaultExecutor> exec,
@@ -622,8 +546,6 @@ void get_real(std::shared_ptr<const DefaultExecutor> exec,
         source->get_size(), source, result);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GET_REAL_KERNEL);
-
 
 template <typename ValueType>
 void get_imag(std::shared_ptr<const DefaultExecutor> exec,
@@ -638,8 +560,6 @@ void get_imag(std::shared_ptr<const DefaultExecutor> exec,
         source->get_size(), source, result);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GET_IMAG_KERNEL);
-
 
 template <typename ValueType, typename ScalarType>
 void add_scaled_identity(std::shared_ptr<const DefaultExecutor> exec,
@@ -659,9 +579,6 @@ void add_scaled_identity(std::shared_ptr<const DefaultExecutor> exec,
         mtx);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_SCALAR_TYPE(
-    GKO_DECLARE_DENSE_ADD_SCALED_IDENTITY_KERNEL);
-
 
 }  // namespace dense
 }  // namespace GKO_DEVICE_NAMESPACE
diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt
index 6499e3b49d4..74e5e5b8806 100644
--- a/omp/CMakeLists.txt
+++ b/omp/CMakeLists.txt
@@ -39,6 +39,7 @@ target_sources(ginkgo_omp
     stop/criterion_kernels.cpp
     stop/residual_norm_kernels.cpp
     ${GKO_UNIFIED_COMMON_SOURCES}
+    ${GKO_UNIFIED_COMMON_INSTANTIATE_SOURCES}
     )
 
 ginkgo_compile_features(ginkgo_omp)

From 72367a856a612d29e988bada2592646020ed7597 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Mon, 31 Jul 2023 19:27:28 +0200
Subject: [PATCH 091/583] clean up dependency structure

---
 CMakeLists.txt                     |  1 -
 cmake/template_instantiation.cmake | 34 ++++++++++++++++++------------
 common/CMakeLists.txt              |  1 -
 common/unified/CMakeLists.txt      |  6 +-----
 dpcpp/CMakeLists.txt               |  3 +++
 omp/CMakeLists.txt                 |  2 +-
 6 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 809c39991bb..9e625892c3d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -304,7 +304,6 @@ configure_file(${Ginkgo_SOURCE_DIR}/include/ginkgo/config.hpp.in
 # propagated to the other parts of Ginkgo in case of building as static libraries
 add_subdirectory(devices)        # Basic device functionalities. Always compiled.
 add_subdirectory(common)         # Import list of unified kernel source files
-set_source_files_properties(${GKO_UNIFIED_COMMON_INSTANTIATE_SOURCES} PROPERTIES GENERATED 1)
 message("${GKO_UNIFIED_COMMON_INSTANTIATE_SOURCES}")
 if(GINKGO_BUILD_CUDA)
     add_subdirectory(cuda)       # High-performance kernels for NVIDIA GPUs
diff --git a/cmake/template_instantiation.cmake b/cmake/template_instantiation.cmake
index af5c395279c..bc37d895537 100644
--- a/cmake/template_instantiation.cmake
+++ b/cmake/template_instantiation.cmake
@@ -1,8 +1,11 @@
-cmake_minimum_required(VERSION 3.13)
-function(add_instantiation_files source_file output_files_var)
-    file(READ "${source_file}" file_contents)
+function(add_instantiation_files source_dir source_file output_files_var)
+    # read full file into variable
+    set(source_path "${source_dir}/${source_file}")
+    file(READ "${source_path}" file_contents)
+    # escape semicolons and use them for line separation
     string(REPLACE ";" "<semicolon>" file_contents "${file_contents}")
     string(REGEX REPLACE "[\r\n]" ";" file_contents "${file_contents}")
+    # find location of // begin|split|end comments
     set(begin_location)
     set(end_location)
     set(split_locations)
@@ -24,6 +27,7 @@ function(add_instantiation_files source_file output_files_var)
     if (begin_location GREATER_EQUAL end_location)
         message(FATAL_ERROR "Incorrect begin/end order")
     endif()
+    # determine which lines belong to the header and footer
     set(range_begins ${begin_location} ${split_locations})
     set(range_ends ${split_locations} ${end_location})
     list(LENGTH begin_locations range_count)
@@ -34,27 +38,31 @@ function(add_instantiation_files source_file output_files_var)
     list(SUBLIST file_contents 0 ${length_header} header)
     list(SUBLIST file_contents ${end_location_past} ${length_footer} footer)
     set(output_files)
+    # for each range between // begin|split|end pairs
     foreach(range RANGE 0 ${range_count_minus_one})
-        set(filename "${source_file}.${range}.cpp")
-        list(APPEND output_files "${filename}")
+        # create an output filename
+        string(REGEX REPLACE "(\.hip\.cpp|\.dp\.cpp|\.cpp|\.cu)$" ".${range}\\1" target_file "${source_file}")
+        set(target_path "${CMAKE_CURRENT_BINARY_DIR}/${target_file}")
+        list(APPEND output_files "${target_path}")
+        # extract the range between the comments
         list(GET range_begins ${range} begin)
         list(GET range_ends ${range} end)
         math(EXPR begin "${begin} + 1")
         math(EXPR length "${end} - ${begin}")
         list(SUBLIST file_contents ${begin} ${length} content)
+        # concatenate header, content and footer and turn semicolons into newlines
         string(REPLACE ";" "\n" content "${header};${content};${footer}")
+        # and escaped semicolons into regular semicolons again
         string(REPLACE "<semicolon>" ";" content "${content}")
         # create a .tmp file, but only copy it over if source file changed
         # this way, we don't rebuild unnecessarily
-        file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/${filename}.tmp" "${content}")
+        file(WRITE "${target_path}.tmp" "${content}")
         add_custom_command(
-            OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${filename}"
-            COMMAND ${CMAKE_COMMAND}
-            -E copy "${CMAKE_CURRENT_BINARY_DIR}/${filename}.tmp"
-                    "${CMAKE_CURRENT_BINARY_DIR}/${filename}"
-            MAIN_DEPENDENCY "${source_file}")
+            OUTPUT "${target_path}"
+            COMMAND ${CMAKE_COMMAND} -E copy "${target_path}.tmp" "${target_path}"
+            MAIN_DEPENDENCY "${source_path}")
     endforeach()
-    # lazy workaround to make cmake generation depend on the source file
-    configure_file("${source_file}", "${source_file}.tmp" COPYONLY)
+    # make sure cmake gets called when the source file was updated
+    set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS "${source_path}")
     set(${output_files_var} ${output_files} PARENT_SCOPE)
 endfunction()
diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt
index 8512e05d07a..77bdd7230b9 100644
--- a/common/CMakeLists.txt
+++ b/common/CMakeLists.txt
@@ -1,3 +1,2 @@
 add_subdirectory(unified)
 set(GKO_UNIFIED_COMMON_SOURCES ${GKO_UNIFIED_COMMON_SOURCES} PARENT_SCOPE)
-set(GKO_UNIFIED_COMMON_INSTANTIATE_SOURCES ${GKO_UNIFIED_COMMON_INSTANTIATE_SOURCES} PARENT_SCOPE)
diff --git a/common/unified/CMakeLists.txt b/common/unified/CMakeLists.txt
index a9f45c63f13..5a37eb022f9 100644
--- a/common/unified/CMakeLists.txt
+++ b/common/unified/CMakeLists.txt
@@ -1,5 +1,3 @@
-include(../../cmake/template_instantiation.cmake)
-add_instantiation_files(matrix/dense_kernels.instantiate.cpp UNIFIED_INSTANTIATE_SOURCES)
 set(UNIFIED_SOURCES
     base/device_matrix_data_kernels.cpp
     base/index_set_kernels.cpp
@@ -29,6 +27,4 @@ set(UNIFIED_SOURCES
     solver/ir_kernels.cpp
     )
 list(TRANSFORM UNIFIED_SOURCES PREPEND ${CMAKE_CURRENT_SOURCE_DIR}/)
-list(TRANSFORM UNIFIED_INSTANTIATE_SOURCES PREPEND ${CMAKE_CURRENT_BINARY_DIR}/)
-set(GKO_UNIFIED_COMMON_SOURCES ${UNIFIED_SOURCES} PARENT_SCOPE)
-set(GKO_UNIFIED_COMMON_INSTANTIATE_SOURCES ${UNIFIED_INSTANTIATE_SOURCES} PARENT_SCOPE)
\ No newline at end of file
+set(GKO_UNIFIED_COMMON_SOURCES ${UNIFIED_SOURCES} PARENT_SCOPE)
\ No newline at end of file
diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt
index 31b5e0543ba..b33b63d4af9 100644
--- a/dpcpp/CMakeLists.txt
+++ b/dpcpp/CMakeLists.txt
@@ -3,6 +3,8 @@ set(GINKGO_MKL_ROOT "${MKL_ROOT}" PARENT_SCOPE)
 find_package(oneDPL REQUIRED HINTS "$ENV{DPL_ROOT}")
 set(GINKGO_DPL_ROOT "${DPL_ROOT}" PARENT_SCOPE)
 
+include(${PROJECT_SOURCE_DIR}/cmake/template_instantiation.cmake)
+add_instantiation_files(${PROJECT_SOURCE_DIR}/common/unified matrix/dense_kernels.instantiate.cpp DENSE_INSTANTIATE)
 add_library(ginkgo_dpcpp $<TARGET_OBJECTS:ginkgo_dpcpp_device> "")
 target_sources(ginkgo_dpcpp
     PRIVATE
@@ -55,6 +57,7 @@ target_sources(ginkgo_dpcpp
     stop/criterion_kernels.dp.cpp
     stop/residual_norm_kernels.dp.cpp
     ${GKO_UNIFIED_COMMON_SOURCES}
+    ${DENSE_INSTANTIATE}
     )
 
 # TODO: adjust it when dpcpp jacobi supports more block size
diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt
index 74e5e5b8806..50f46cd23cd 100644
--- a/omp/CMakeLists.txt
+++ b/omp/CMakeLists.txt
@@ -39,7 +39,7 @@ target_sources(ginkgo_omp
     stop/criterion_kernels.cpp
     stop/residual_norm_kernels.cpp
     ${GKO_UNIFIED_COMMON_SOURCES}
-    ${GKO_UNIFIED_COMMON_INSTANTIATE_SOURCES}
+    ${PROJECT_SOURCE_DIR}/common/unified/matrix/dense_kernels.instantiate.cpp
     )
 
 ginkgo_compile_features(ginkgo_omp)

From 08202a03b48b67467aa33e6a5f8d6c531b4ac5e3 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Mon, 31 Jul 2023 20:07:37 +0000
Subject: [PATCH 092/583] split CUDA and HIP (fb)csr_kernels compilation

---
 common/cuda_hip/matrix/csr_kernels.hpp.inc    |  5 -
 common/cuda_hip/matrix/fbcsr_kernels.hpp.inc  | 18 ----
 cuda/CMakeLists.txt                           |  9 +-
 cuda/matrix/csr_kernels.instantiate.cu        | 99 +++++++++++++++++++
 ...csr_kernels.cu => csr_kernels.template.cu} | 56 -----------
 cuda/matrix/fbcsr_kernels.instantiate.cu      | 75 ++++++++++++++
 ...r_kernels.cu => fbcsr_kernels.template.cu} | 11 ---
 hip/CMakeLists.txt                            |  9 +-
 hip/matrix/csr_kernels.instantiate.hip.cpp    | 99 +++++++++++++++++++
 ...s.hip.cpp => csr_kernels.template.hip.cpp} | 56 -----------
 hip/matrix/fbcsr_kernels.instantiate.hip.cpp  | 75 ++++++++++++++
 ...hip.cpp => fbcsr_kernels.template.hip.cpp} | 11 ---
 omp/CMakeLists.txt                            |  3 +-
 13 files changed, 364 insertions(+), 162 deletions(-)
 create mode 100644 cuda/matrix/csr_kernels.instantiate.cu
 rename cuda/matrix/{csr_kernels.cu => csr_kernels.template.cu} (96%)
 create mode 100644 cuda/matrix/fbcsr_kernels.instantiate.cu
 rename cuda/matrix/{fbcsr_kernels.cu => fbcsr_kernels.template.cu} (97%)
 create mode 100644 hip/matrix/csr_kernels.instantiate.hip.cpp
 rename hip/matrix/{csr_kernels.hip.cpp => csr_kernels.template.hip.cpp} (96%)
 create mode 100644 hip/matrix/fbcsr_kernels.instantiate.hip.cpp
 rename hip/matrix/{fbcsr_kernels.hip.cpp => fbcsr_kernels.template.hip.cpp} (96%)

diff --git a/common/cuda_hip/matrix/csr_kernels.hpp.inc b/common/cuda_hip/matrix/csr_kernels.hpp.inc
index 1fca1ee7215..c370075c8a8 100644
--- a/common/cuda_hip/matrix/csr_kernels.hpp.inc
+++ b/common/cuda_hip/matrix/csr_kernels.hpp.inc
@@ -937,9 +937,6 @@ void convert_to_fbcsr(std::shared_ptr<const DefaultExecutor> exec,
         });
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_CONVERT_TO_FBCSR_KERNEL);
-
 
 namespace kernel {
 
@@ -1122,8 +1119,6 @@ void build_lookup(std::shared_ptr<const DefaultExecutor> exec,
             storage);
 }
 
-GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_CSR_BUILD_LOOKUP_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void fallback_transpose(std::shared_ptr<const DefaultExecutor> exec,
diff --git a/common/cuda_hip/matrix/fbcsr_kernels.hpp.inc b/common/cuda_hip/matrix/fbcsr_kernels.hpp.inc
index 27314c06a59..d71d593b0a2 100644
--- a/common/cuda_hip/matrix/fbcsr_kernels.hpp.inc
+++ b/common/cuda_hip/matrix/fbcsr_kernels.hpp.inc
@@ -238,9 +238,6 @@ void fill_in_matrix_data(std::shared_ptr<const DefaultExecutor> exec,
         });
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_FBCSR_FILL_IN_MATRIX_DATA_KERNEL);
-
 
 namespace kernel {
 
@@ -323,9 +320,6 @@ void fill_in_dense(std::shared_ptr<const DefaultExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_FBCSR_FILL_IN_DENSE_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void convert_to_csr(const std::shared_ptr<const DefaultExecutor> exec,
@@ -345,9 +339,6 @@ void convert_to_csr(const std::shared_ptr<const DefaultExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void is_sorted_by_column_index(
@@ -372,23 +363,14 @@ void is_sorted_by_column_index(
     *is_sorted = exec->copy_val_to_host(gpu_array.get_data());
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX);
-
 
 template <typename ValueType, typename IndexType>
 void sort_by_column_index(const std::shared_ptr<const DefaultExecutor> exec,
                           matrix::Fbcsr<ValueType, IndexType>* const to_sort)
     GKO_NOT_IMPLEMENTED;
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX);
-
 
 template <typename ValueType, typename IndexType>
 void extract_diagonal(std::shared_ptr<const DefaultExecutor> exec,
                       const matrix::Fbcsr<ValueType, IndexType>* orig,
                       matrix::Diagonal<ValueType>* diag) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL);
diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt
index aecf4e1c2f2..6cfb83a59e8 100644
--- a/cuda/CMakeLists.txt
+++ b/cuda/CMakeLists.txt
@@ -1,4 +1,9 @@
 add_library(ginkgo_cuda $<TARGET_OBJECTS:ginkgo_cuda_device> "")
+include(${PROJECT_SOURCE_DIR}/cmake/template_instantiation.cmake)
+add_instantiation_files(. matrix/csr_kernels.instantiate.cu CSR_INSTANTIATE)
+add_instantiation_files(. matrix/fbcsr_kernels.instantiate.cu FBCSR_INSTANTIATE)
+# we don't split up the dense kernels into distinct compliations
+list(APPEND GKO_UNIFIED_COMMON_SOURCES ${PROJECT_SOURCE_DIR}/common/unified/matrix/dense_kernels.instantiate.cpp)
 target_sources(ginkgo_cuda
     PRIVATE
     base/device.cpp
@@ -31,11 +36,11 @@ target_sources(ginkgo_cuda
     factorization/par_ilut_spgeam_kernel.cu
     factorization/par_ilut_sweep_kernel.cu
     matrix/coo_kernels.cu
-    matrix/csr_kernels.cu
+    ${CSR_INSTANTIATE}
     matrix/dense_kernels.cu
     matrix/diagonal_kernels.cu
     matrix/ell_kernels.cu
-    matrix/fbcsr_kernels.cu
+    ${FBCSR_INSTANTIATE}
     matrix/fft_kernels.cu
     matrix/sellp_kernels.cu
     matrix/sparsity_csr_kernels.cu
diff --git a/cuda/matrix/csr_kernels.instantiate.cu b/cuda/matrix/csr_kernels.instantiate.cu
new file mode 100644
index 00000000000..75747bf074b
--- /dev/null
+++ b/cuda/matrix/csr_kernels.instantiate.cu
@@ -0,0 +1,99 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "cuda/matrix/csr_kernels.template.cu"
+
+
+namespace gko {
+namespace kernels {
+namespace cuda {
+/**
+ * @brief The Compressed sparse row matrix format namespace.
+ *
+ * @ingroup csr
+ */
+namespace csr {
+
+
+// begin
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_CONVERT_TO_FBCSR_KERNEL);
+// split
+GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_SPMV_KERNEL);
+// split
+GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL);
+// split
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_TRANSPOSE_KERNEL);
+// split
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_CONJ_TRANSPOSE_KERNEL);
+// split
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_SORT_BY_COLUMN_INDEX);
+// split
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_CSR_BUILD_LOOKUP_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEAM_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_FILL_IN_DENSE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_INDEX_SET_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_FROM_INDEX_SET_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_IS_SORTED_BY_COLUMN_INDEX);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_EXTRACT_DIAGONAL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_CHECK_DIAGONAL_ENTRIES_EXIST);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_ADD_SCALED_IDENTITY_KERNEL);
+// end
+
+
+}  // namespace csr
+}  // namespace cuda
+}  // namespace kernels
+}  // namespace gko
diff --git a/cuda/matrix/csr_kernels.cu b/cuda/matrix/csr_kernels.template.cu
similarity index 96%
rename from cuda/matrix/csr_kernels.cu
rename to cuda/matrix/csr_kernels.template.cu
index 619ead5bbbb..1b4b20a1e75 100644
--- a/cuda/matrix/csr_kernels.cu
+++ b/cuda/matrix/csr_kernels.template.cu
@@ -533,9 +533,6 @@ void spmv(std::shared_ptr<const CudaExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_SPMV_KERNEL);
-
 
 template <typename MatrixValueType, typename InputValueType,
           typename OutputValueType, typename IndexType>
@@ -598,9 +595,6 @@ void advanced_spmv(std::shared_ptr<const CudaExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void spgemm(std::shared_ptr<const CudaExecutor> exec,
@@ -724,8 +718,6 @@ void spgemm(std::shared_ptr<const CudaExecutor> exec,
 #endif  // CUDA_VERSION >= 11000
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL);
-
 
 namespace {
 
@@ -920,9 +912,6 @@ void advanced_spgemm(std::shared_ptr<const CudaExecutor> exec,
 #endif  // CUDA_VERSION >= 11000
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void spgeam(std::shared_ptr<const DefaultExecutor> exec,
@@ -948,8 +937,6 @@ void spgeam(std::shared_ptr<const DefaultExecutor> exec,
         b->get_const_col_idxs(), b->get_const_values(), c);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEAM_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void fill_in_dense(std::shared_ptr<const CudaExecutor> exec,
@@ -972,9 +959,6 @@ void fill_in_dense(std::shared_ptr<const CudaExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_FILL_IN_DENSE_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void transpose(std::shared_ptr<const CudaExecutor> exec,
@@ -1024,8 +1008,6 @@ void transpose(std::shared_ptr<const CudaExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_TRANSPOSE_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void conj_transpose(std::shared_ptr<const CudaExecutor> exec,
@@ -1083,9 +1065,6 @@ void conj_transpose(std::shared_ptr<const CudaExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_CONJ_TRANSPOSE_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void inv_symm_permute(std::shared_ptr<const CudaExecutor> exec,
@@ -1116,9 +1095,6 @@ void inv_symm_permute(std::shared_ptr<const CudaExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void row_permute(std::shared_ptr<const CudaExecutor> exec,
@@ -1149,9 +1125,6 @@ void row_permute(std::shared_ptr<const CudaExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void inverse_row_permute(std::shared_ptr<const CudaExecutor> exec,
@@ -1182,9 +1155,6 @@ void inverse_row_permute(std::shared_ptr<const CudaExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void calculate_nonzeros_per_row_in_span(
@@ -1204,9 +1174,6 @@ void calculate_nonzeros_per_row_in_span(
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void compute_submatrix(std::shared_ptr<const DefaultExecutor> exec,
@@ -1233,9 +1200,6 @@ void compute_submatrix(std::shared_ptr<const DefaultExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void calculate_nonzeros_per_row_in_index_set(
@@ -1245,9 +1209,6 @@ void calculate_nonzeros_per_row_in_index_set(
     const gko::index_set<IndexType>& col_index_set,
     IndexType* row_nnz) GKO_NOT_IMPLEMENTED;
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_INDEX_SET_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void compute_submatrix_from_index_set(
@@ -1257,9 +1218,6 @@ void compute_submatrix_from_index_set(
     const gko::index_set<IndexType>& col_index_set,
     matrix::Csr<ValueType, IndexType>* result) GKO_NOT_IMPLEMENTED;
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_FROM_INDEX_SET_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void sort_by_column_index(std::shared_ptr<const CudaExecutor> exec,
@@ -1312,9 +1270,6 @@ void sort_by_column_index(std::shared_ptr<const CudaExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_SORT_BY_COLUMN_INDEX);
-
 
 template <typename ValueType, typename IndexType>
 void is_sorted_by_column_index(
@@ -1336,9 +1291,6 @@ void is_sorted_by_column_index(
     cpu_array = gpu_array;
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_IS_SORTED_BY_COLUMN_INDEX);
-
 
 template <typename ValueType, typename IndexType>
 void extract_diagonal(std::shared_ptr<const CudaExecutor> exec,
@@ -1364,8 +1316,6 @@ void extract_diagonal(std::shared_ptr<const CudaExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_EXTRACT_DIAGONAL);
-
 
 template <typename ValueType, typename IndexType>
 void check_diagonal_entries_exist(
@@ -1389,9 +1339,6 @@ void check_diagonal_entries_exist(
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_CHECK_DIAGONAL_ENTRIES_EXIST);
-
 
 template <typename ValueType, typename IndexType>
 void add_scaled_identity(std::shared_ptr<const CudaExecutor> exec,
@@ -1413,9 +1360,6 @@ void add_scaled_identity(std::shared_ptr<const CudaExecutor> exec,
         as_device_type(mtx->get_values()));
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_ADD_SCALED_IDENTITY_KERNEL);
-
 
 }  // namespace csr
 }  // namespace cuda
diff --git a/cuda/matrix/fbcsr_kernels.instantiate.cu b/cuda/matrix/fbcsr_kernels.instantiate.cu
new file mode 100644
index 00000000000..73c3fc136ba
--- /dev/null
+++ b/cuda/matrix/fbcsr_kernels.instantiate.cu
@@ -0,0 +1,75 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "cuda/matrix/fbcsr_kernels.template.cu"
+
+
+namespace gko {
+namespace kernels {
+namespace cuda {
+/**
+ * @brief The fixed-size block compressed sparse row matrix format namespace.
+ *
+ * @ingroup fbcsr
+ */
+namespace fbcsr {
+
+
+// begin
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_FBCSR_FILL_IN_MATRIX_DATA_KERNEL);
+// split
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_FBCSR_FILL_IN_DENSE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL);
+// split
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL);
+// end
+
+
+}  // namespace fbcsr
+}  // namespace cuda
+}  // namespace kernels
+}  // namespace gko
diff --git a/cuda/matrix/fbcsr_kernels.cu b/cuda/matrix/fbcsr_kernels.template.cu
similarity index 97%
rename from cuda/matrix/fbcsr_kernels.cu
rename to cuda/matrix/fbcsr_kernels.template.cu
index 8160a0ac5a5..c629b292bfb 100644
--- a/cuda/matrix/fbcsr_kernels.cu
+++ b/cuda/matrix/fbcsr_kernels.template.cu
@@ -180,8 +180,6 @@ void spmv(std::shared_ptr<const CudaExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void advanced_spmv(std::shared_ptr<const CudaExecutor> exec,
@@ -240,9 +238,6 @@ void advanced_spmv(std::shared_ptr<const CudaExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL);
-
 
 namespace {
 
@@ -305,9 +300,6 @@ void transpose(const std::shared_ptr<const CudaExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void conj_transpose(std::shared_ptr<const CudaExecutor> exec,
@@ -325,9 +317,6 @@ void conj_transpose(std::shared_ptr<const CudaExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL);
-
 
 }  // namespace fbcsr
 }  // namespace cuda
diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt
index 61b06ad4058..7e0558844cf 100644
--- a/hip/CMakeLists.txt
+++ b/hip/CMakeLists.txt
@@ -1,3 +1,8 @@
+include(${PROJECT_SOURCE_DIR}/cmake/template_instantiation.cmake)
+add_instantiation_files(. matrix/csr_kernels.instantiate.hip.cpp CSR_INSTANTIATE)
+add_instantiation_files(. matrix/fbcsr_kernels.instantiate.hip.cpp FBCSR_INSTANTIATE)
+# we don't split up the dense kernels into distinct compliations
+list(APPEND GKO_UNIFIED_COMMON_SOURCES ${PROJECT_SOURCE_DIR}/common/unified/matrix/dense_kernels.instantiate.cpp)
 set(GINKGO_HIP_SOURCES
     base/device.hip.cpp
     base/device_matrix_data_kernels.hip.cpp
@@ -29,11 +34,11 @@ set(GINKGO_HIP_SOURCES
     factorization/par_ilut_spgeam_kernel.hip.cpp
     factorization/par_ilut_sweep_kernel.hip.cpp
     matrix/coo_kernels.hip.cpp
-    matrix/csr_kernels.hip.cpp
+    ${CSR_INSTANTIATE}
     matrix/dense_kernels.hip.cpp
     matrix/diagonal_kernels.hip.cpp
     matrix/ell_kernels.hip.cpp
-    matrix/fbcsr_kernels.hip.cpp
+    ${FBCSR_INSTANTIATE}
     matrix/sellp_kernels.hip.cpp
     matrix/sparsity_csr_kernels.hip.cpp
     multigrid/pgm_kernels.hip.cpp
diff --git a/hip/matrix/csr_kernels.instantiate.hip.cpp b/hip/matrix/csr_kernels.instantiate.hip.cpp
new file mode 100644
index 00000000000..498f3ec1795
--- /dev/null
+++ b/hip/matrix/csr_kernels.instantiate.hip.cpp
@@ -0,0 +1,99 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "hip/matrix/csr_kernels.template.hip.cpp"
+
+
+namespace gko {
+namespace kernels {
+namespace hip {
+/**
+ * @brief The Compressed sparse row matrix format namespace.
+ *
+ * @ingroup csr
+ */
+namespace csr {
+
+
+// begin
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_CONVERT_TO_FBCSR_KERNEL);
+// split
+GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_SPMV_KERNEL);
+// split
+GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL);
+// split
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_TRANSPOSE_KERNEL);
+// split
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_CONJ_TRANSPOSE_KERNEL);
+// split
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_SORT_BY_COLUMN_INDEX);
+// split
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_CSR_BUILD_LOOKUP_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEAM_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_FILL_IN_DENSE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_INDEX_SET_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_FROM_INDEX_SET_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_IS_SORTED_BY_COLUMN_INDEX);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_EXTRACT_DIAGONAL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_CHECK_DIAGONAL_ENTRIES_EXIST);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_ADD_SCALED_IDENTITY_KERNEL);
+// end
+
+
+}  // namespace csr
+}  // namespace hip
+}  // namespace kernels
+}  // namespace gko
diff --git a/hip/matrix/csr_kernels.hip.cpp b/hip/matrix/csr_kernels.template.hip.cpp
similarity index 96%
rename from hip/matrix/csr_kernels.hip.cpp
rename to hip/matrix/csr_kernels.template.hip.cpp
index b18cfa0f12b..e6a4fb64041 100644
--- a/hip/matrix/csr_kernels.hip.cpp
+++ b/hip/matrix/csr_kernels.template.hip.cpp
@@ -493,9 +493,6 @@ void spmv(std::shared_ptr<const HipExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_SPMV_KERNEL);
-
 
 template <typename MatrixValueType, typename InputValueType,
           typename OutputValueType, typename IndexType>
@@ -558,9 +555,6 @@ void advanced_spmv(std::shared_ptr<const HipExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void spgemm(std::shared_ptr<const HipExecutor> exec,
@@ -634,8 +628,6 @@ void spgemm(std::shared_ptr<const HipExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL);
-
 
 namespace {
 
@@ -775,9 +767,6 @@ void advanced_spgemm(std::shared_ptr<const HipExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void spgeam(std::shared_ptr<const DefaultExecutor> exec,
@@ -803,8 +792,6 @@ void spgeam(std::shared_ptr<const DefaultExecutor> exec,
         b->get_const_col_idxs(), b->get_const_values(), c);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEAM_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void fill_in_dense(std::shared_ptr<const HipExecutor> exec,
@@ -827,9 +814,6 @@ void fill_in_dense(std::shared_ptr<const HipExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_FILL_IN_DENSE_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void transpose(std::shared_ptr<const HipExecutor> exec,
@@ -854,8 +838,6 @@ void transpose(std::shared_ptr<const HipExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_TRANSPOSE_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void conj_transpose(std::shared_ptr<const HipExecutor> exec,
@@ -888,9 +870,6 @@ void conj_transpose(std::shared_ptr<const HipExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_CONJ_TRANSPOSE_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void inv_symm_permute(std::shared_ptr<const HipExecutor> exec,
@@ -921,9 +900,6 @@ void inv_symm_permute(std::shared_ptr<const HipExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void row_permute(std::shared_ptr<const HipExecutor> exec, const IndexType* perm,
@@ -953,9 +929,6 @@ void row_permute(std::shared_ptr<const HipExecutor> exec, const IndexType* perm,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void inverse_row_permute(std::shared_ptr<const HipExecutor> exec,
@@ -986,9 +959,6 @@ void inverse_row_permute(std::shared_ptr<const HipExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void calculate_nonzeros_per_row_in_span(
@@ -1009,9 +979,6 @@ void calculate_nonzeros_per_row_in_span(
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void compute_submatrix(std::shared_ptr<const DefaultExecutor> exec,
@@ -1038,9 +1005,6 @@ void compute_submatrix(std::shared_ptr<const DefaultExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void calculate_nonzeros_per_row_in_index_set(
@@ -1050,9 +1014,6 @@ void calculate_nonzeros_per_row_in_index_set(
     const gko::index_set<IndexType>& col_index_set,
     IndexType* row_nnz) GKO_NOT_IMPLEMENTED;
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_INDEX_SET_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void compute_submatrix_from_index_set(
@@ -1062,9 +1023,6 @@ void compute_submatrix_from_index_set(
     const gko::index_set<IndexType>& col_index_set,
     matrix::Csr<ValueType, IndexType>* result) GKO_NOT_IMPLEMENTED;
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_FROM_INDEX_SET_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void sort_by_column_index(std::shared_ptr<const HipExecutor> exec,
@@ -1110,9 +1068,6 @@ void sort_by_column_index(std::shared_ptr<const HipExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_SORT_BY_COLUMN_INDEX);
-
 
 template <typename ValueType, typename IndexType>
 void is_sorted_by_column_index(
@@ -1134,9 +1089,6 @@ void is_sorted_by_column_index(
     cpu_array = gpu_array;
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_IS_SORTED_BY_COLUMN_INDEX);
-
 
 template <typename ValueType, typename IndexType>
 void extract_diagonal(std::shared_ptr<const HipExecutor> exec,
@@ -1161,8 +1113,6 @@ void extract_diagonal(std::shared_ptr<const HipExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_EXTRACT_DIAGONAL);
-
 
 template <typename ValueType, typename IndexType>
 void check_diagonal_entries_exist(
@@ -1186,9 +1136,6 @@ void check_diagonal_entries_exist(
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_CHECK_DIAGONAL_ENTRIES_EXIST);
-
 
 template <typename ValueType, typename IndexType>
 void add_scaled_identity(std::shared_ptr<const HipExecutor> exec,
@@ -1210,9 +1157,6 @@ void add_scaled_identity(std::shared_ptr<const HipExecutor> exec,
         as_device_type(mtx->get_values()));
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_ADD_SCALED_IDENTITY_KERNEL);
-
 
 }  // namespace csr
 }  // namespace hip
diff --git a/hip/matrix/fbcsr_kernels.instantiate.hip.cpp b/hip/matrix/fbcsr_kernels.instantiate.hip.cpp
new file mode 100644
index 00000000000..8cf4944e08a
--- /dev/null
+++ b/hip/matrix/fbcsr_kernels.instantiate.hip.cpp
@@ -0,0 +1,75 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "hip/matrix/fbcsr_kernels.template.hip.cpp"
+
+
+namespace gko {
+namespace kernels {
+namespace hip {
+/**
+ * @brief The fixed-size block compressed sparse row matrix format namespace.
+ *
+ * @ingroup fbcsr
+ */
+namespace fbcsr {
+
+
+// begin
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_FBCSR_FILL_IN_MATRIX_DATA_KERNEL);
+// split
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_FBCSR_FILL_IN_DENSE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL);
+// split
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL);
+// end
+
+
+}  // namespace fbcsr
+}  // namespace hip
+}  // namespace kernels
+}  // namespace gko
diff --git a/hip/matrix/fbcsr_kernels.hip.cpp b/hip/matrix/fbcsr_kernels.template.hip.cpp
similarity index 96%
rename from hip/matrix/fbcsr_kernels.hip.cpp
rename to hip/matrix/fbcsr_kernels.template.hip.cpp
index 8a4d78e7e40..88cad66753c 100644
--- a/hip/matrix/fbcsr_kernels.hip.cpp
+++ b/hip/matrix/fbcsr_kernels.template.hip.cpp
@@ -182,8 +182,6 @@ void spmv(std::shared_ptr<const HipExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void advanced_spmv(std::shared_ptr<const HipExecutor> exec,
@@ -242,9 +240,6 @@ void advanced_spmv(std::shared_ptr<const HipExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void transpose(const std::shared_ptr<const DefaultExecutor> exec,
@@ -254,9 +249,6 @@ void transpose(const std::shared_ptr<const DefaultExecutor> exec,
     fallback_transpose(exec, input, output);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL);
-
 
 template <typename ValueType, typename IndexType>
 void conj_transpose(std::shared_ptr<const HipExecutor> exec,
@@ -274,9 +266,6 @@ void conj_transpose(std::shared_ptr<const HipExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL);
-
 
 }  // namespace fbcsr
 }  // namespace hip
diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt
index 50f46cd23cd..d552cc612bf 100644
--- a/omp/CMakeLists.txt
+++ b/omp/CMakeLists.txt
@@ -1,4 +1,6 @@
 add_library(ginkgo_omp $<TARGET_OBJECTS:ginkgo_omp_device> "")
+# we don't split up the dense kernels into distinct compliations
+list(APPEND GKO_UNIFIED_COMMON_SOURCES ${PROJECT_SOURCE_DIR}/common/unified/matrix/dense_kernels.instantiate.cpp)
 target_sources(ginkgo_omp
     PRIVATE
     base/device_matrix_data_kernels.cpp
@@ -39,7 +41,6 @@ target_sources(ginkgo_omp
     stop/criterion_kernels.cpp
     stop/residual_norm_kernels.cpp
     ${GKO_UNIFIED_COMMON_SOURCES}
-    ${PROJECT_SOURCE_DIR}/common/unified/matrix/dense_kernels.instantiate.cpp
     )
 
 ginkgo_compile_features(ginkgo_omp)

From d83d35cee09f7074526afb4c522d4526a05998f4 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Tue, 1 Aug 2023 16:02:54 +0200
Subject: [PATCH 093/583] improve formatting

---
 CMakeLists.txt                                              | 1 -
 common/unified/matrix/dense_kernels.instantiate.cpp         | 4 ++--
 .../{dense_kernels.tpp => dense_kernels.template.cpp}       | 0
 dev_tools/scripts/config                                    | 6 ++++++
 4 files changed, 8 insertions(+), 3 deletions(-)
 rename common/unified/matrix/{dense_kernels.tpp => dense_kernels.template.cpp} (100%)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9e625892c3d..6351ce98bfa 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -304,7 +304,6 @@ configure_file(${Ginkgo_SOURCE_DIR}/include/ginkgo/config.hpp.in
 # propagated to the other parts of Ginkgo in case of building as static libraries
 add_subdirectory(devices)        # Basic device functionalities. Always compiled.
 add_subdirectory(common)         # Import list of unified kernel source files
-message("${GKO_UNIFIED_COMMON_INSTANTIATE_SOURCES}")
 if(GINKGO_BUILD_CUDA)
     add_subdirectory(cuda)       # High-performance kernels for NVIDIA GPUs
 endif()
diff --git a/common/unified/matrix/dense_kernels.instantiate.cpp b/common/unified/matrix/dense_kernels.instantiate.cpp
index 92d9fa26a00..bf20c8a19b6 100644
--- a/common/unified/matrix/dense_kernels.instantiate.cpp
+++ b/common/unified/matrix/dense_kernels.instantiate.cpp
@@ -30,7 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include "common/unified/matrix/dense_kernels.tpp"
+#include "common/unified/matrix/dense_kernels.template.cpp"
 
 
 namespace gko {
@@ -105,4 +105,4 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 }  // namespace dense
 }  // namespace GKO_DEVICE_NAMESPACE
 }  // namespace kernels
-}  // namespace gko
\ No newline at end of file
+}  // namespace gko
diff --git a/common/unified/matrix/dense_kernels.tpp b/common/unified/matrix/dense_kernels.template.cpp
similarity index 100%
rename from common/unified/matrix/dense_kernels.tpp
rename to common/unified/matrix/dense_kernels.template.cpp
diff --git a/dev_tools/scripts/config b/dev_tools/scripts/config
index 03b160e3656..937af4a31d1 100644
--- a/dev_tools/scripts/config
+++ b/dev_tools/scripts/config
@@ -32,6 +32,12 @@
     - FixInclude: "common/unified/base/kernel_launch_solver.hpp"
 - "(cuda|hip|dpcpp|omp)/base/kernel_launch_solver\."
     - FixInclude: "common/unified/base/kernel_launch_solver.hpp"
+- "dense_kernels.template.cpp"
+    - FixInclude: "core/matrix/dense_kernels.hpp"
+- "/csr_kernels.template.*"
+    - FixInclude: "core/matrix/csr_kernels.hpp"
+- "/fbcsr_kernels.template.*"
+    - FixInclude: "core/matrix/fbcsr_kernels.hpp"
 - "test/base/kernel_launch_generic.cpp"
     - FixInclude: "common/unified/base/kernel_launch.hpp"
 - "^test/solver/(lower|upper)_trs_kernels.cpp"

From bb065af2e0c7b0073d090c8ddd4ff91589f4bdd3 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Tue, 1 Aug 2023 16:03:45 +0200
Subject: [PATCH 094/583] fix typos

---
 cuda/CMakeLists.txt | 2 +-
 hip/CMakeLists.txt  | 2 +-
 omp/CMakeLists.txt  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt
index 6cfb83a59e8..37d56e5855f 100644
--- a/cuda/CMakeLists.txt
+++ b/cuda/CMakeLists.txt
@@ -2,7 +2,7 @@ add_library(ginkgo_cuda $<TARGET_OBJECTS:ginkgo_cuda_device> "")
 include(${PROJECT_SOURCE_DIR}/cmake/template_instantiation.cmake)
 add_instantiation_files(. matrix/csr_kernels.instantiate.cu CSR_INSTANTIATE)
 add_instantiation_files(. matrix/fbcsr_kernels.instantiate.cu FBCSR_INSTANTIATE)
-# we don't split up the dense kernels into distinct compliations
+# we don't split up the dense kernels into distinct compilations
 list(APPEND GKO_UNIFIED_COMMON_SOURCES ${PROJECT_SOURCE_DIR}/common/unified/matrix/dense_kernels.instantiate.cpp)
 target_sources(ginkgo_cuda
     PRIVATE
diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt
index 7e0558844cf..e433322e644 100644
--- a/hip/CMakeLists.txt
+++ b/hip/CMakeLists.txt
@@ -1,7 +1,7 @@
 include(${PROJECT_SOURCE_DIR}/cmake/template_instantiation.cmake)
 add_instantiation_files(. matrix/csr_kernels.instantiate.hip.cpp CSR_INSTANTIATE)
 add_instantiation_files(. matrix/fbcsr_kernels.instantiate.hip.cpp FBCSR_INSTANTIATE)
-# we don't split up the dense kernels into distinct compliations
+# we don't split up the dense kernels into distinct compilations
 list(APPEND GKO_UNIFIED_COMMON_SOURCES ${PROJECT_SOURCE_DIR}/common/unified/matrix/dense_kernels.instantiate.cpp)
 set(GINKGO_HIP_SOURCES
     base/device.hip.cpp
diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt
index d552cc612bf..bda26ad63d3 100644
--- a/omp/CMakeLists.txt
+++ b/omp/CMakeLists.txt
@@ -1,5 +1,5 @@
 add_library(ginkgo_omp $<TARGET_OBJECTS:ginkgo_omp_device> "")
-# we don't split up the dense kernels into distinct compliations
+# we don't split up the dense kernels into distinct compilations
 list(APPEND GKO_UNIFIED_COMMON_SOURCES ${PROJECT_SOURCE_DIR}/common/unified/matrix/dense_kernels.instantiate.cpp)
 target_sources(ginkgo_omp
     PRIVATE

From 62d8f2abf6aaea7092d63d05f5cafd4baa5ab57c Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Wed, 2 Aug 2023 10:30:49 +0200
Subject: [PATCH 095/583] review updates

- remove unused variables
- warn on incorrect instantiation file format
- allow disabling the template split
- simpler format_header config entries

Co-authored-by: Yuhsiang M. Tsai <yhmtsai@gmail.com>
Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 CMakeLists.txt                     |  1 +
 cmake/template_instantiation.cmake | 15 ++++++++++++++-
 dev_tools/scripts/config           |  8 ++------
 3 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6351ce98bfa..32552a77d6a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -81,6 +81,7 @@ set(GINKGO_HIP_CLANG_COMPILER_FLAGS "" CACHE STRING
     "Set the required HIP CLANG compiler flags. Current default is an empty string.")
 set(GINKGO_HIP_AMDGPU "" CACHE STRING
     "The amdgpu_target(s) variable passed to hipcc. The default is none (auto).")
+option(GINKGO_SPLIT_TEMPLATE_INSTANTIATIONS "Split template instantiations for slow-to-compile files. This improves parallel build performance" ON)
 option(GINKGO_JACOBI_FULL_OPTIMIZATIONS "Use all the optimizations for the CUDA Jacobi algorithm" OFF)
 option(BUILD_SHARED_LIBS "Build shared (.so, .dylib, .dll) libraries" ON)
 if(MSVC OR WIN32 OR CYGWIN OR APPLE)
diff --git a/cmake/template_instantiation.cmake b/cmake/template_instantiation.cmake
index bc37d895537..f77527e0092 100644
--- a/cmake/template_instantiation.cmake
+++ b/cmake/template_instantiation.cmake
@@ -1,4 +1,9 @@
 function(add_instantiation_files source_dir source_file output_files_var)
+    # if instantiation is disabled, compile the file directly
+    if(NOT GINKGO_SPLIT_TEMPLATE_INSTANTIATIONS)
+        set(${output_files_var} "${source_dir}/${source_file}" PARENT_SCOPE)
+        return()
+    endif()
     # read full file into variable
     set(source_path "${source_dir}/${source_file}")
     file(READ "${source_path}" file_contents)
@@ -13,10 +18,19 @@ function(add_instantiation_files source_dir source_file output_files_var)
     set(counter 0)
     foreach(line IN LISTS file_contents)
         if(line MATCHES "// begin")
+            if(begin_location)
+                message(FATAL_ERROR "Duplicate begin in line ${counter}, first found in ${begin_location}")
+            endif()
             set(begin_location ${counter})
         elseif(line MATCHES "// split")
+            if((NOT begin_location) OR end_location)
+                message(FATAL_ERROR "Found split outside begin/end in line ${counter}")
+            endif()
             list(APPEND split_locations ${counter})
         elseif(line MATCHES "// end")
+            if(end_location)
+                message(FATAL_ERROR "Duplicate end in line ${counter}, first found in ${end_location}")
+            endif()
             set(end_location ${counter})
         endif()
         math(EXPR counter "${counter} + 1")
@@ -30,7 +44,6 @@ function(add_instantiation_files source_dir source_file output_files_var)
     # determine which lines belong to the header and footer
     set(range_begins ${begin_location} ${split_locations})
     set(range_ends ${split_locations} ${end_location})
-    list(LENGTH begin_locations range_count)
     list(LENGTH split_locations range_count_minus_one)
     math(EXPR length_header "${begin_location}")
     math(EXPR end_location_past "${end_location} + 1")
diff --git a/dev_tools/scripts/config b/dev_tools/scripts/config
index 937af4a31d1..79e6a227530 100644
--- a/dev_tools/scripts/config
+++ b/dev_tools/scripts/config
@@ -32,12 +32,6 @@
     - FixInclude: "common/unified/base/kernel_launch_solver.hpp"
 - "(cuda|hip|dpcpp|omp)/base/kernel_launch_solver\."
     - FixInclude: "common/unified/base/kernel_launch_solver.hpp"
-- "dense_kernels.template.cpp"
-    - FixInclude: "core/matrix/dense_kernels.hpp"
-- "/csr_kernels.template.*"
-    - FixInclude: "core/matrix/csr_kernels.hpp"
-- "/fbcsr_kernels.template.*"
-    - FixInclude: "core/matrix/fbcsr_kernels.hpp"
 - "test/base/kernel_launch_generic.cpp"
     - FixInclude: "common/unified/base/kernel_launch.hpp"
 - "^test/solver/(lower|upper)_trs_kernels.cpp"
@@ -57,6 +51,7 @@
 - "common/unified/.*.cpp"
     - PathIgnore: "2"
     - PathPrefix: "core"
+    - CoreSuffix: "\.template"
 - "core/test/base/(extended_float|iterator_factory)"
     - RemoveTest: "true"
 - "core/test/base/allocator"
@@ -102,3 +97,4 @@
 - ".*"
     - PathPrefix: "core"
     - PathIgnore: "1"
+    - CoreSuffix: "\.template"

From 4bebc266ef076af3f27035d813d38f6622bc644d Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Wed, 2 Aug 2023 11:16:45 +0200
Subject: [PATCH 096/583] mark switch as advanced

---
 CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 32552a77d6a..4d70ac404ce 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -82,6 +82,7 @@ set(GINKGO_HIP_CLANG_COMPILER_FLAGS "" CACHE STRING
 set(GINKGO_HIP_AMDGPU "" CACHE STRING
     "The amdgpu_target(s) variable passed to hipcc. The default is none (auto).")
 option(GINKGO_SPLIT_TEMPLATE_INSTANTIATIONS "Split template instantiations for slow-to-compile files. This improves parallel build performance" ON)
+mark_as_advanced(GINKGO_SPLIT_TEMPLATE_INSTANTIATIONS)
 option(GINKGO_JACOBI_FULL_OPTIMIZATIONS "Use all the optimizations for the CUDA Jacobi algorithm" OFF)
 option(BUILD_SHARED_LIBS "Build shared (.so, .dylib, .dll) libraries" ON)
 if(MSVC OR WIN32 OR CYGWIN OR APPLE)

From 6a3ac15aa1850a856af2751afe47e39e147a7d39 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Gr=C3=BCtzmacher?= <thomas.gruetzmacher@kit.edu>
Date: Tue, 11 Jul 2023 19:22:36 +0200
Subject: [PATCH 097/583] Fix OMP row reduction kernel

The kernel requested more memory than necessary in most scenarios
because of a faulty temporary storage estimation.
---
 omp/base/kernel_launch_reduction.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/omp/base/kernel_launch_reduction.hpp b/omp/base/kernel_launch_reduction.hpp
index d8d081e323b..a46ce970421 100644
--- a/omp/base/kernel_launch_reduction.hpp
+++ b/omp/base/kernel_launch_reduction.hpp
@@ -327,7 +327,7 @@ void run_kernel_col_reduction_sized_impl(
         const auto reduction_size =
             ceildiv(reduction_kernel_oversubscription * num_threads, cols);
         const auto rows_per_thread = ceildiv(rows, reduction_size);
-        const auto required_storage = sizeof(ValueType) * rows * reduction_size;
+        const auto required_storage = sizeof(ValueType) * cols * reduction_size;
         if (tmp.get_num_elems() < required_storage) {
             tmp.resize_and_reset(required_storage);
         }

From 2c9332182305cbf29fe44035df9e039c3b6406b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Gr=C3=BCtzmacher?= <thomas.gruetzmacher@kit.edu>
Date: Thu, 13 Jul 2023 16:01:44 +0200
Subject: [PATCH 098/583] Add specific tests for OMP reductions

---
 test/base/kernel_launch_generic.cpp | 168 ++++++++++++++++++++++++++++
 1 file changed, 168 insertions(+)

diff --git a/test/base/kernel_launch_generic.cpp b/test/base/kernel_launch_generic.cpp
index 3dd1570c5f8..cf07f867c82 100644
--- a/test/base/kernel_launch_generic.cpp
+++ b/test/base/kernel_launch_generic.cpp
@@ -33,6 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "common/unified/base/kernel_launch.hpp"
 
 
+#include <algorithm>
 #include <memory>
 #include <type_traits>
 
@@ -364,6 +365,39 @@ void run1d_reduction(std::shared_ptr<gko::EXEC_TYPE> exec)
 TEST_F(KernelLaunch, Reduction1D) { run1d_reduction(exec); }
 
 
+void run1d_reduction_cached(std::shared_ptr<gko::EXEC_TYPE> exec,
+                            std::vector<size_type> sizes)
+{
+    gko::array<int64> output{exec, 1};
+    gko::array<char> temp(exec);
+    for (const auto& size : sizes) {
+        temp.clear();
+        gko::kernels::EXEC_NAMESPACE::run_kernel_reduction_cached(
+            exec,
+            [] GKO_KERNEL(auto i) {
+                static_assert(is_same<decltype(i), int64>::value, "index");
+                return i + 1;
+            },
+            [] GKO_KERNEL(auto i, auto j) { return std::max(i, j); },
+            [] GKO_KERNEL(auto j) { return j; }, int64{}, output.get_data(),
+            size, temp);
+
+        ASSERT_EQ(exec->copy_val_to_host(output.get_const_data()),
+                  static_cast<int64>(size));
+        // The temporary storage (used for partial sums) must be smaller than
+        // the input array
+        ASSERT_LT(temp.get_num_elems() / sizeof(int64), size);
+    }
+}
+
+TEST_F(KernelLaunch, Reduction1DCached)
+{
+    // Note: Start with at least 200 elements in case the machine has a lot of
+    //       cores
+    run1d_reduction_cached(exec, {1000, 1000000, 1234567, 7654321});
+}
+
+
 void run2d_reduction(std::shared_ptr<gko::EXEC_TYPE> exec)
 {
     gko::array<int64> output{exec, {-1l}};
@@ -432,6 +466,47 @@ void run2d_reduction(std::shared_ptr<gko::EXEC_TYPE> exec)
 TEST_F(KernelLaunch, Reduction2D) { run2d_reduction(exec); }
 
 
+void run2d_reduction_cached(std::shared_ptr<gko::EXEC_TYPE> exec,
+                            std::vector<gko::dim<2>> dims)
+{
+    gko::array<int64> output{exec, 1};
+    gko::array<char> temp(exec);
+    for (const auto& dim : dims) {
+        temp.clear();
+        gko::kernels::EXEC_NAMESPACE::run_kernel_reduction_cached(
+            exec,
+            [] GKO_KERNEL(auto i, auto j) {
+                static_assert(is_same<decltype(i), int64>::value, "index");
+                static_assert(is_same<decltype(j), int64>::value, "index");
+                return i + j + 2;
+            },
+            [] GKO_KERNEL(auto i, auto j) { return std::max(i, j); },
+            [] GKO_KERNEL(auto j) { return j; }, int64{}, output.get_data(),
+            dim, temp);
+
+        ASSERT_EQ(exec->copy_val_to_host(output.get_const_data()),
+                  static_cast<int64>(dim[0] + dim[1]));
+        // The temporary storage (used for partial sums) must be smaller than
+        // the input array
+        ASSERT_LT(temp.get_num_elems() / sizeof(int64), dim[0] * dim[1]);
+    }
+}
+
+TEST_F(KernelLaunch, Reduction2DCached)
+{
+    // Note: Start with at least 200 elements in case the machine has a lot of
+    //       cores
+    run2d_reduction_cached(exec, {{20, 10},
+                                  {10, 3000},
+                                  {1000, 5},
+                                  {30, 50},
+                                  {1, 100000},
+                                  {100000, 1},
+                                  {500000, 20},
+                                  {20, 500000}});
+}
+
+
 void run2d_row_reduction(std::shared_ptr<gko::EXEC_TYPE> exec)
 {
     for (auto num_rows : {0, 100, 1000, 10000}) {
@@ -481,6 +556,53 @@ void run2d_row_reduction(std::shared_ptr<gko::EXEC_TYPE> exec)
 TEST_F(KernelLaunch, ReductionRow2D) { run2d_row_reduction(exec); }
 
 
+void run2d_row_reduction_cached(std::shared_ptr<gko::EXEC_TYPE> exec,
+                                std::vector<gko::dim<2>> dims)
+{
+    // This assumes at most 256 OpenMP Threads
+    constexpr int64_t max_tmp_elems = 4 * 256;
+    const size_type result_stride = 1;
+    gko::array<char> temp(exec);
+    for (const auto& dim : dims) {
+        gko::array<int64> host_ref{exec->get_master(), dim[0]};
+        gko::array<int64> output{exec, host_ref};
+        temp.clear();
+        for (int64 i = 0; i < host_ref.get_num_elems(); ++i) {
+            host_ref.get_data()[i] = dim[1] + i + 1;
+        }
+
+        gko::kernels::EXEC_NAMESPACE::run_kernel_row_reduction_cached(
+            exec,
+            [] GKO_KERNEL(auto i, auto j) {
+                static_assert(is_same<decltype(i), int64>::value, "index");
+                static_assert(is_same<decltype(j), int64>::value, "index");
+                return i + j + 2;
+            },
+            [] GKO_KERNEL(auto i, auto j) { return std::max(i, j); },
+            [] GKO_KERNEL(auto j) { return j; }, int64{}, output.get_data(),
+            result_stride, dim, temp);
+
+        GKO_ASSERT_ARRAY_EQ(host_ref, output);
+        ASSERT_LT(temp.get_num_elems() / sizeof(int64),
+                  max_tmp_elems * max_tmp_elems);
+    }
+}
+
+TEST_F(KernelLaunch, ReductionRowCached)
+{
+    // Note: Start with at least 200 elements in case the machine has a lot of
+    //       cores
+    run2d_row_reduction_cached(exec, {{20, 10},
+                                      {10, 3000},
+                                      {1000, 5},
+                                      {30, 50},
+                                      {1, 100000},
+                                      {100000, 1},
+                                      {500000, 20},
+                                      {20, 500000}});
+}
+
+
 void run2d_col_reduction(std::shared_ptr<gko::EXEC_TYPE> exec)
 {
     // empty, most threads idle, most threads busy, multiple blocks
@@ -530,3 +652,49 @@ void run2d_col_reduction(std::shared_ptr<gko::EXEC_TYPE> exec)
 }
 
 TEST_F(KernelLaunch, ReductionCol2D) { run2d_col_reduction(exec); }
+
+
+void run2d_col_reduction_cached(std::shared_ptr<gko::EXEC_TYPE> exec,
+                                std::vector<gko::dim<2>> dims)
+{
+    gko::array<char> temp(exec);
+    for (const auto& dim : dims) {
+        gko::array<int64> host_ref{exec->get_master(), dim[1]};
+        gko::array<int64> output{exec, host_ref};
+        temp.clear();
+        for (int64 i = 0; i < host_ref.get_num_elems(); ++i) {
+            host_ref.get_data()[i] = dim[0] + i + 1;
+        }
+
+        gko::kernels::EXEC_NAMESPACE::run_kernel_col_reduction_cached(
+            exec,
+            [] GKO_KERNEL(auto i, auto j) {
+                static_assert(is_same<decltype(i), int64>::value, "index");
+                static_assert(is_same<decltype(j), int64>::value, "index");
+                return i + j + 2;
+            },
+            [] GKO_KERNEL(auto i, auto j) { return std::max(i, j); },
+            [] GKO_KERNEL(auto j) { return j; }, int64{}, output.get_data(),
+            dim, temp);
+
+        GKO_ASSERT_ARRAY_EQ(host_ref, output);
+        // This assumes at most 256 OpenMP Threads
+        const size_type temp_elem_limit =
+            std::max(size_type{4 * 256}, dim[0] * dim[1]);
+        ASSERT_LT(temp.get_num_elems() / sizeof(int64), temp_elem_limit);
+    }
+}
+
+TEST_F(KernelLaunch, ReductionColCached)
+{
+    // Note: Start with at least 200 elements in case the machine has a lot of
+    //       cores
+    run2d_col_reduction_cached(exec, {{20, 10},
+                                      {10, 3000},
+                                      {1000, 5},
+                                      {30, 50},
+                                      {1, 100000},
+                                      {100000, 1},
+                                      {500000, 20},
+                                      {20, 500000}});
+}

From 585eea1273684024d0c852000bf5536326c3e43f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Gr=C3=BCtzmacher?= <thomas.gruetzmacher@kit.edu>
Date: Fri, 21 Jul 2023 15:26:07 +0200
Subject: [PATCH 099/583] Update reduction tests to all scale with size

---
 test/base/kernel_launch_generic.cpp | 68 +++++++++++++----------------
 1 file changed, 31 insertions(+), 37 deletions(-)

diff --git a/test/base/kernel_launch_generic.cpp b/test/base/kernel_launch_generic.cpp
index cf07f867c82..57bab96d9c0 100644
--- a/test/base/kernel_launch_generic.cpp
+++ b/test/base/kernel_launch_generic.cpp
@@ -373,11 +373,7 @@ void run1d_reduction_cached(std::shared_ptr<gko::EXEC_TYPE> exec,
     for (const auto& size : sizes) {
         temp.clear();
         gko::kernels::EXEC_NAMESPACE::run_kernel_reduction_cached(
-            exec,
-            [] GKO_KERNEL(auto i) {
-                static_assert(is_same<decltype(i), int64>::value, "index");
-                return i + 1;
-            },
+            exec, [] GKO_KERNEL(auto i) { return i + 1; },
             [] GKO_KERNEL(auto i, auto j) { return std::max(i, j); },
             [] GKO_KERNEL(auto j) { return j; }, int64{}, output.get_data(),
             size, temp);
@@ -469,17 +465,13 @@ TEST_F(KernelLaunch, Reduction2D) { run2d_reduction(exec); }
 void run2d_reduction_cached(std::shared_ptr<gko::EXEC_TYPE> exec,
                             std::vector<gko::dim<2>> dims)
 {
+    constexpr size_type min_allowed_tmp_elems = 4 * 256;
     gko::array<int64> output{exec, 1};
     gko::array<char> temp(exec);
     for (const auto& dim : dims) {
         temp.clear();
         gko::kernels::EXEC_NAMESPACE::run_kernel_reduction_cached(
-            exec,
-            [] GKO_KERNEL(auto i, auto j) {
-                static_assert(is_same<decltype(i), int64>::value, "index");
-                static_assert(is_same<decltype(j), int64>::value, "index");
-                return i + j + 2;
-            },
+            exec, [] GKO_KERNEL(auto i, auto j) { return i + j + 2; },
             [] GKO_KERNEL(auto i, auto j) { return std::max(i, j); },
             [] GKO_KERNEL(auto j) { return j; }, int64{}, output.get_data(),
             dim, temp);
@@ -487,19 +479,23 @@ void run2d_reduction_cached(std::shared_ptr<gko::EXEC_TYPE> exec,
         ASSERT_EQ(exec->copy_val_to_host(output.get_const_data()),
                   static_cast<int64>(dim[0] + dim[1]));
         // The temporary storage (used for partial sums) must be smaller than
-        // the input array
-        ASSERT_LT(temp.get_num_elems() / sizeof(int64), dim[0] * dim[1]);
+        // the input array (or smaller than a set minimum)
+        const size_type max_tmp_elems =
+            std::max(dim[0] * dim[1], min_allowed_tmp_elems);
+        ASSERT_LT(temp.get_num_elems() / sizeof(int64), max_tmp_elems);
     }
 }
 
 TEST_F(KernelLaunch, Reduction2DCached)
 {
-    // Note: Start with at least 200 elements in case the machine has a lot of
-    //       cores
     run2d_reduction_cached(exec, {{20, 10},
                                   {10, 3000},
                                   {1000, 5},
                                   {30, 50},
+                                  {600, 500},
+                                  {500, 600},
+                                  {1000, 900},
+                                  {900, 1000},
                                   {1, 100000},
                                   {100000, 1},
                                   {500000, 20},
@@ -559,8 +555,9 @@ TEST_F(KernelLaunch, ReductionRow2D) { run2d_row_reduction(exec); }
 void run2d_row_reduction_cached(std::shared_ptr<gko::EXEC_TYPE> exec,
                                 std::vector<gko::dim<2>> dims)
 {
-    // This assumes at most 256 OpenMP Threads
-    constexpr int64_t max_tmp_elems = 4 * 256;
+    // The 2D row reduction potentially needs a lot of memory for small input
+    // sizes
+    constexpr size_type min_allowed_tmp_elems = 4 * 256 * 4 * 256;
     const size_type result_stride = 1;
     gko::array<char> temp(exec);
     for (const auto& dim : dims) {
@@ -572,30 +569,30 @@ void run2d_row_reduction_cached(std::shared_ptr<gko::EXEC_TYPE> exec,
         }
 
         gko::kernels::EXEC_NAMESPACE::run_kernel_row_reduction_cached(
-            exec,
-            [] GKO_KERNEL(auto i, auto j) {
-                static_assert(is_same<decltype(i), int64>::value, "index");
-                static_assert(is_same<decltype(j), int64>::value, "index");
-                return i + j + 2;
-            },
+            exec, [] GKO_KERNEL(auto i, auto j) { return i + j + 2; },
             [] GKO_KERNEL(auto i, auto j) { return std::max(i, j); },
             [] GKO_KERNEL(auto j) { return j; }, int64{}, output.get_data(),
             result_stride, dim, temp);
 
         GKO_ASSERT_ARRAY_EQ(host_ref, output);
-        ASSERT_LT(temp.get_num_elems() / sizeof(int64),
-                  max_tmp_elems * max_tmp_elems);
+        // The temporary storage (used for partial sums) must be smaller than
+        // the input array (or smaller than a set minimum)
+        const size_type max_tmp_elems =
+            std::max(dim[0] * dim[1], min_allowed_tmp_elems);
+        ASSERT_LT(temp.get_num_elems() / sizeof(int64), max_tmp_elems);
     }
 }
 
 TEST_F(KernelLaunch, ReductionRowCached)
 {
-    // Note: Start with at least 200 elements in case the machine has a lot of
-    //       cores
     run2d_row_reduction_cached(exec, {{20, 10},
                                       {10, 3000},
                                       {1000, 5},
                                       {30, 50},
+                                      {600, 500},
+                                      {500, 600},
+                                      {1000, 900},
+                                      {900, 1000},
                                       {1, 100000},
                                       {100000, 1},
                                       {500000, 20},
@@ -657,6 +654,7 @@ TEST_F(KernelLaunch, ReductionCol2D) { run2d_col_reduction(exec); }
 void run2d_col_reduction_cached(std::shared_ptr<gko::EXEC_TYPE> exec,
                                 std::vector<gko::dim<2>> dims)
 {
+    constexpr size_type min_allowed_tmp_elems = 4 * 256;
     gko::array<char> temp(exec);
     for (const auto& dim : dims) {
         gko::array<int64> host_ref{exec->get_master(), dim[1]};
@@ -667,32 +665,28 @@ void run2d_col_reduction_cached(std::shared_ptr<gko::EXEC_TYPE> exec,
         }
 
         gko::kernels::EXEC_NAMESPACE::run_kernel_col_reduction_cached(
-            exec,
-            [] GKO_KERNEL(auto i, auto j) {
-                static_assert(is_same<decltype(i), int64>::value, "index");
-                static_assert(is_same<decltype(j), int64>::value, "index");
-                return i + j + 2;
-            },
+            exec, [] GKO_KERNEL(auto i, auto j) { return i + j + 2; },
             [] GKO_KERNEL(auto i, auto j) { return std::max(i, j); },
             [] GKO_KERNEL(auto j) { return j; }, int64{}, output.get_data(),
             dim, temp);
 
         GKO_ASSERT_ARRAY_EQ(host_ref, output);
-        // This assumes at most 256 OpenMP Threads
         const size_type temp_elem_limit =
-            std::max(size_type{4 * 256}, dim[0] * dim[1]);
+            std::max(min_allowed_tmp_elems, dim[0] * dim[1]);
         ASSERT_LT(temp.get_num_elems() / sizeof(int64), temp_elem_limit);
     }
 }
 
 TEST_F(KernelLaunch, ReductionColCached)
 {
-    // Note: Start with at least 200 elements in case the machine has a lot of
-    //       cores
     run2d_col_reduction_cached(exec, {{20, 10},
                                       {10, 3000},
                                       {1000, 5},
                                       {30, 50},
+                                      {600, 500},
+                                      {500, 600},
+                                      {1000, 900},
+                                      {900, 1000},
                                       {1, 100000},
                                       {100000, 1},
                                       {500000, 20},

From b7c8c15f0ee49814d4b1a4a07b03e5d1a9118f66 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Gr=C3=BCtzmacher?= <thomas.gruetzmacher@kit.edu>
Date: Tue, 25 Jul 2023 18:02:46 +0200
Subject: [PATCH 100/583] Change OMP reduction implementation

At most allocate as much as the input vector for OMP reductions.
---
 omp/base/kernel_launch_reduction.hpp | 45 +++++++++++++++++-----------
 test/base/kernel_launch_generic.cpp  | 27 +++++------------
 2 files changed, 34 insertions(+), 38 deletions(-)

diff --git a/omp/base/kernel_launch_reduction.hpp b/omp/base/kernel_launch_reduction.hpp
index a46ce970421..5dfbd5ba6c0 100644
--- a/omp/base/kernel_launch_reduction.hpp
+++ b/omp/base/kernel_launch_reduction.hpp
@@ -62,8 +62,9 @@ void run_kernel_reduction_impl(std::shared_ptr<const OmpExecutor> exec,
                                ValueType* result, size_type size,
                                array<char>& tmp, MappedKernelArgs... args)
 {
-    const auto num_threads = static_cast<int64>(omp_get_max_threads());
     const auto ssize = static_cast<int64>(size);
+    // Limit the number of threads to the number of columns
+    const auto num_threads = std::min<int64>(omp_get_max_threads(), ssize);
     const auto work_per_thread = ceildiv(ssize, num_threads);
     const auto required_storage = sizeof(ValueType) * num_threads;
     if (tmp.get_num_elems() < required_storage) {
@@ -82,8 +83,8 @@ void run_kernel_reduction_impl(std::shared_ptr<const OmpExecutor> exec,
         }
         partial[thread_id] = local_partial;
     }
-    *result =
-        finalize(std::accumulate(partial, partial + num_threads, identity, op));
+    *result = finalize(std::accumulate(
+        partial, partial + required_storage / sizeof(ValueType), identity, op));
 }
 
 
@@ -99,7 +100,8 @@ void run_kernel_reduction_sized_impl(syn::value_list<int, remainder_cols>,
 {
     const auto rows = static_cast<int64>(size[0]);
     const auto cols = static_cast<int64>(size[1]);
-    const auto num_threads = static_cast<int64>(omp_get_max_threads());
+    // Limit the number of threads to the number of columns
+    const auto num_threads = std::min<int64>(omp_get_max_threads(), rows);
     const auto work_per_thread = ceildiv(rows, num_threads);
     const auto required_storage = sizeof(ValueType) * num_threads;
     if (tmp.get_num_elems() < required_storage) {
@@ -109,7 +111,7 @@ void run_kernel_reduction_sized_impl(syn::value_list<int, remainder_cols>,
     static_assert(remainder_cols < block_size, "remainder too large");
     const auto rounded_cols = cols / block_size * block_size;
     GKO_ASSERT(rounded_cols + remainder_cols == cols);
-#pragma omp parallel
+#pragma omp parallel num_threads(num_threads)
     {
         const auto thread_id = omp_get_thread_num();
         const auto begin = thread_id * work_per_thread;
@@ -147,8 +149,8 @@ void run_kernel_reduction_sized_impl(syn::value_list<int, remainder_cols>,
         }
         partial[thread_id] = local_partial;
     }
-    *result =
-        finalize(std::accumulate(partial, partial + num_threads, identity, op));
+    *result = finalize(std::accumulate(
+        partial, partial + required_storage / sizeof(ValueType), identity, op));
 }
 
 GKO_ENABLE_IMPLEMENTATION_SELECTION(select_run_kernel_reduction_sized,
@@ -210,12 +212,12 @@ void run_kernel_row_reduction_impl(std::shared_ptr<const OmpExecutor> exec,
     constexpr int block_size = 8;
     const auto rows = static_cast<int64>(size[0]);
     const auto cols = static_cast<int64>(size[1]);
-    const auto num_threads = static_cast<int64>(omp_get_max_threads());
+    const auto available_threads = static_cast<int64>(omp_get_max_threads());
     if (rows <= 0) {
         return;
     }
     // enough work to keep all threads busy or only very small reduction sizes
-    if (rows >= reduction_kernel_oversubscription * num_threads ||
+    if (rows >= reduction_kernel_oversubscription * available_threads ||
         cols < rows) {
 #pragma omp parallel for
         for (int64 row = 0; row < rows; row++) {
@@ -229,8 +231,11 @@ void run_kernel_row_reduction_impl(std::shared_ptr<const OmpExecutor> exec,
         }
     } else {
         // small number of rows and large reduction sizes: do partial sum first
+        const auto num_threads = std::min<int64>(available_threads, cols);
         const auto work_per_thread = ceildiv(cols, num_threads);
-        const auto required_storage = sizeof(ValueType) * rows * num_threads;
+        const auto temp_elems_per_row = num_threads;
+        const auto required_storage =
+            sizeof(ValueType) * rows * temp_elems_per_row;
         if (tmp.get_num_elems() < required_storage) {
             tmp.resize_and_reset(required_storage);
         }
@@ -247,7 +252,7 @@ void run_kernel_row_reduction_impl(std::shared_ptr<const OmpExecutor> exec,
                         return fn(row, col, args...);
                     }());
                 }
-                partial[row * num_threads + thread_id] = local_partial;
+                partial[row * temp_elems_per_row + thread_id] = local_partial;
             }
         }
         // then accumulate the partial sums and write to result
@@ -255,10 +260,11 @@ void run_kernel_row_reduction_impl(std::shared_ptr<const OmpExecutor> exec,
         for (int64 row = 0; row < rows; row++) {
             [&] {
                 auto local_partial = identity;
-                for (int64 thread_id = 0; thread_id < num_threads;
+                for (int64 thread_id = 0; thread_id < temp_elems_per_row;
                      thread_id++) {
-                    local_partial = op(local_partial,
-                                       partial[row * num_threads + thread_id]);
+                    local_partial =
+                        op(local_partial,
+                           partial[row * temp_elems_per_row + thread_id]);
                 }
                 result[row * result_stride] = finalize(local_partial);
             }();
@@ -302,12 +308,12 @@ void run_kernel_col_reduction_sized_impl(
 {
     const auto rows = static_cast<int64>(size[0]);
     const auto cols = static_cast<int64>(size[1]);
-    const auto num_threads = static_cast<int64>(omp_get_max_threads());
+    const auto available_threads = static_cast<int64>(omp_get_max_threads());
     static_assert(remainder_cols < block_size, "remainder too large");
     GKO_ASSERT(cols % block_size == remainder_cols);
     const auto num_col_blocks = ceildiv(cols, block_size);
     // enough work to keep all threads busy or only very small reduction sizes
-    if (cols >= reduction_kernel_oversubscription * num_threads ||
+    if (cols >= reduction_kernel_oversubscription * available_threads ||
         rows < cols) {
 #pragma omp parallel for
         for (int64 col_block = 0; col_block < num_col_blocks; col_block++) {
@@ -324,8 +330,11 @@ void run_kernel_col_reduction_sized_impl(
         }
     } else {
         // number of blocks that need to be reduced afterwards
-        const auto reduction_size =
-            ceildiv(reduction_kernel_oversubscription * num_threads, cols);
+        // This reduction_size definition ensures we don't use more temporary
+        // storage than the input vector
+        const auto reduction_size = std::min(
+            rows, ceildiv(reduction_kernel_oversubscription * available_threads,
+                          cols));
         const auto rows_per_thread = ceildiv(rows, reduction_size);
         const auto required_storage = sizeof(ValueType) * cols * reduction_size;
         if (tmp.get_num_elems() < required_storage) {
diff --git a/test/base/kernel_launch_generic.cpp b/test/base/kernel_launch_generic.cpp
index 57bab96d9c0..bc4119d2806 100644
--- a/test/base/kernel_launch_generic.cpp
+++ b/test/base/kernel_launch_generic.cpp
@@ -382,15 +382,13 @@ void run1d_reduction_cached(std::shared_ptr<gko::EXEC_TYPE> exec,
                   static_cast<int64>(size));
         // The temporary storage (used for partial sums) must be smaller than
         // the input array
-        ASSERT_LT(temp.get_num_elems() / sizeof(int64), size);
+        ASSERT_LE(temp.get_num_elems() / sizeof(int64), size);
     }
 }
 
 TEST_F(KernelLaunch, Reduction1DCached)
 {
-    // Note: Start with at least 200 elements in case the machine has a lot of
-    //       cores
-    run1d_reduction_cached(exec, {1000, 1000000, 1234567, 7654321});
+    run1d_reduction_cached(exec, {10, 1000, 1000000, 1234567, 7654321});
 }
 
 
@@ -465,7 +463,6 @@ TEST_F(KernelLaunch, Reduction2D) { run2d_reduction(exec); }
 void run2d_reduction_cached(std::shared_ptr<gko::EXEC_TYPE> exec,
                             std::vector<gko::dim<2>> dims)
 {
-    constexpr size_type min_allowed_tmp_elems = 4 * 256;
     gko::array<int64> output{exec, 1};
     gko::array<char> temp(exec);
     for (const auto& dim : dims) {
@@ -479,10 +476,8 @@ void run2d_reduction_cached(std::shared_ptr<gko::EXEC_TYPE> exec,
         ASSERT_EQ(exec->copy_val_to_host(output.get_const_data()),
                   static_cast<int64>(dim[0] + dim[1]));
         // The temporary storage (used for partial sums) must be smaller than
-        // the input array (or smaller than a set minimum)
-        const size_type max_tmp_elems =
-            std::max(dim[0] * dim[1], min_allowed_tmp_elems);
-        ASSERT_LT(temp.get_num_elems() / sizeof(int64), max_tmp_elems);
+        // the input array
+        ASSERT_LE(temp.get_num_elems() / sizeof(int64), dim[0] * dim[1]);
     }
 }
 
@@ -555,9 +550,6 @@ TEST_F(KernelLaunch, ReductionRow2D) { run2d_row_reduction(exec); }
 void run2d_row_reduction_cached(std::shared_ptr<gko::EXEC_TYPE> exec,
                                 std::vector<gko::dim<2>> dims)
 {
-    // The 2D row reduction potentially needs a lot of memory for small input
-    // sizes
-    constexpr size_type min_allowed_tmp_elems = 4 * 256 * 4 * 256;
     const size_type result_stride = 1;
     gko::array<char> temp(exec);
     for (const auto& dim : dims) {
@@ -576,10 +568,8 @@ void run2d_row_reduction_cached(std::shared_ptr<gko::EXEC_TYPE> exec,
 
         GKO_ASSERT_ARRAY_EQ(host_ref, output);
         // The temporary storage (used for partial sums) must be smaller than
-        // the input array (or smaller than a set minimum)
-        const size_type max_tmp_elems =
-            std::max(dim[0] * dim[1], min_allowed_tmp_elems);
-        ASSERT_LT(temp.get_num_elems() / sizeof(int64), max_tmp_elems);
+        // the input array
+        ASSERT_LE(temp.get_num_elems() / sizeof(int64), dim[0] * dim[1]);
     }
 }
 
@@ -654,7 +644,6 @@ TEST_F(KernelLaunch, ReductionCol2D) { run2d_col_reduction(exec); }
 void run2d_col_reduction_cached(std::shared_ptr<gko::EXEC_TYPE> exec,
                                 std::vector<gko::dim<2>> dims)
 {
-    constexpr size_type min_allowed_tmp_elems = 4 * 256;
     gko::array<char> temp(exec);
     for (const auto& dim : dims) {
         gko::array<int64> host_ref{exec->get_master(), dim[1]};
@@ -671,9 +660,7 @@ void run2d_col_reduction_cached(std::shared_ptr<gko::EXEC_TYPE> exec,
             dim, temp);
 
         GKO_ASSERT_ARRAY_EQ(host_ref, output);
-        const size_type temp_elem_limit =
-            std::max(min_allowed_tmp_elems, dim[0] * dim[1]);
-        ASSERT_LT(temp.get_num_elems() / sizeof(int64), temp_elem_limit);
+        ASSERT_LE(temp.get_num_elems() / sizeof(int64), dim[0] * dim[1]);
     }
 }
 

From bc0adb04a9e0a44226c95a6c27764fb8b71dbd7b Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Wed, 2 Aug 2023 14:14:33 +0200
Subject: [PATCH 101/583] fix warning

---
 include/ginkgo/core/base/range.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/ginkgo/core/base/range.hpp b/include/ginkgo/core/base/range.hpp
index 5ba07aa834f..1e4c7a5d00e 100644
--- a/include/ginkgo/core/base/range.hpp
+++ b/include/ginkgo/core/base/range.hpp
@@ -864,7 +864,7 @@ GKO_BIND_UNARY_RANGE_OPERATION_TO_OPERATOR(transpose_operation, transpose);
 
 
 #define GKO_DEPRECATED_SIMPLE_BINARY_OPERATION(_deprecated_name, _name) \
-    struct [[deprecated("Please use " #_name)]] _deprecated_name : _name{};
+    struct [[deprecated("Please use " #_name)]] _deprecated_name : _name {}
 
 #define GKO_DEFINE_SIMPLE_BINARY_OPERATION(_name, ...)                         \
     struct _name {                                                             \

From 46c8bbd9539bc58b4f65dc2271ab956b66ee62e8 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Wed, 2 Aug 2023 14:15:03 +0200
Subject: [PATCH 102/583] fix divisions by zero and num_threads == 0

---
 omp/base/kernel_launch_reduction.hpp | 112 +++++++++++++++------------
 1 file changed, 62 insertions(+), 50 deletions(-)

diff --git a/omp/base/kernel_launch_reduction.hpp b/omp/base/kernel_launch_reduction.hpp
index 5dfbd5ba6c0..ef57803ad31 100644
--- a/omp/base/kernel_launch_reduction.hpp
+++ b/omp/base/kernel_launch_reduction.hpp
@@ -65,7 +65,8 @@ void run_kernel_reduction_impl(std::shared_ptr<const OmpExecutor> exec,
     const auto ssize = static_cast<int64>(size);
     // Limit the number of threads to the number of columns
     const auto num_threads = std::min<int64>(omp_get_max_threads(), ssize);
-    const auto work_per_thread = ceildiv(ssize, num_threads);
+    const auto work_per_thread =
+        ceildiv(ssize, std::max<int64>(num_threads, 1));
     const auto required_storage = sizeof(ValueType) * num_threads;
     if (tmp.get_num_elems() < required_storage) {
         tmp.resize_and_reset(required_storage);
@@ -74,17 +75,20 @@ void run_kernel_reduction_impl(std::shared_ptr<const OmpExecutor> exec,
 #pragma omp parallel num_threads(num_threads)
     {
         const auto thread_id = omp_get_thread_num();
-        const auto begin = thread_id * work_per_thread;
-        const auto end = std::min(ssize, begin + work_per_thread);
+        if (thread_id < num_threads) {
+            const auto begin = thread_id * work_per_thread;
+            const auto end = std::min(ssize, begin + work_per_thread);
 
-        auto local_partial = identity;
-        for (auto i = begin; i < end; i++) {
-            local_partial = op(local_partial, fn(i, map_to_device(args)...));
+            auto local_partial = identity;
+            for (auto i = begin; i < end; i++) {
+                local_partial =
+                    op(local_partial, fn(i, map_to_device(args)...));
+            }
+            partial[thread_id] = local_partial;
         }
-        partial[thread_id] = local_partial;
     }
-    *result = finalize(std::accumulate(
-        partial, partial + required_storage / sizeof(ValueType), identity, op));
+    *result =
+        finalize(std::accumulate(partial, partial + num_threads, identity, op));
 }
 
 
@@ -102,7 +106,7 @@ void run_kernel_reduction_sized_impl(syn::value_list<int, remainder_cols>,
     const auto cols = static_cast<int64>(size[1]);
     // Limit the number of threads to the number of columns
     const auto num_threads = std::min<int64>(omp_get_max_threads(), rows);
-    const auto work_per_thread = ceildiv(rows, num_threads);
+    const auto work_per_thread = ceildiv(rows, std::max<int64>(num_threads, 1));
     const auto required_storage = sizeof(ValueType) * num_threads;
     if (tmp.get_num_elems() < required_storage) {
         tmp.resize_and_reset(required_storage);
@@ -114,43 +118,46 @@ void run_kernel_reduction_sized_impl(syn::value_list<int, remainder_cols>,
 #pragma omp parallel num_threads(num_threads)
     {
         const auto thread_id = omp_get_thread_num();
-        const auto begin = thread_id * work_per_thread;
-        const auto end = std::min(rows, begin + work_per_thread);
-
-        auto local_partial = identity;
-        if (rounded_cols == 0 || cols == block_size) {
-            // we group all sizes <= block_size here and unroll explicitly
-            constexpr auto local_cols =
-                remainder_cols == 0 ? block_size : remainder_cols;
-            for (auto row = begin; row < end; row++) {
-#pragma unroll
-                for (int64 col = 0; col < local_cols; col++) {
-                    local_partial = op(local_partial, fn(row, col, args...));
-                }
-            }
-        } else {
-            // we operate in block_size blocks plus an explicitly unrolled
-            // remainder
-            for (auto row = begin; row < end; row++) {
-                for (int64 base_col = 0; base_col < rounded_cols;
-                     base_col += block_size) {
+        if (thread_id < num_threads) {
+            const auto begin = thread_id * work_per_thread;
+            const auto end = std::min(rows, begin + work_per_thread);
+
+            auto local_partial = identity;
+            if (rounded_cols == 0 || cols == block_size) {
+                // we group all sizes <= block_size here and unroll explicitly
+                constexpr auto local_cols =
+                    remainder_cols == 0 ? block_size : remainder_cols;
+                for (auto row = begin; row < end; row++) {
 #pragma unroll
-                    for (int64 i = 0; i < block_size; i++) {
+                    for (int64 col = 0; col < local_cols; col++) {
                         local_partial =
-                            op(local_partial, fn(row, base_col + i, args...));
+                            op(local_partial, fn(row, col, args...));
                     }
                 }
+            } else {
+                // we operate in block_size blocks plus an explicitly unrolled
+                // remainder
+                for (auto row = begin; row < end; row++) {
+                    for (int64 base_col = 0; base_col < rounded_cols;
+                         base_col += block_size) {
 #pragma unroll
-                for (int64 i = 0; i < remainder_cols; i++) {
-                    local_partial =
-                        op(local_partial, fn(row, rounded_cols + i, args...));
+                        for (int64 i = 0; i < block_size; i++) {
+                            local_partial = op(local_partial,
+                                               fn(row, base_col + i, args...));
+                        }
+                    }
+#pragma unroll
+                    for (int64 i = 0; i < remainder_cols; i++) {
+                        local_partial = op(local_partial,
+                                           fn(row, rounded_cols + i, args...));
+                    }
                 }
             }
+            partial[thread_id] = local_partial;
         }
-        partial[thread_id] = local_partial;
     }
-    *result = finalize(std::accumulate(
-        partial, partial + required_storage / sizeof(ValueType), identity, op));
+    *result =
+        finalize(std::accumulate(partial, partial + num_threads, identity, op));
 }
 
 GKO_ENABLE_IMPLEMENTATION_SELECTION(select_run_kernel_reduction_sized,
@@ -232,7 +239,8 @@ void run_kernel_row_reduction_impl(std::shared_ptr<const OmpExecutor> exec,
     } else {
         // small number of rows and large reduction sizes: do partial sum first
         const auto num_threads = std::min<int64>(available_threads, cols);
-        const auto work_per_thread = ceildiv(cols, num_threads);
+        const auto work_per_thread =
+            ceildiv(cols, std::max<int64>(num_threads, 1));
         const auto temp_elems_per_row = num_threads;
         const auto required_storage =
             sizeof(ValueType) * rows * temp_elems_per_row;
@@ -243,16 +251,19 @@ void run_kernel_row_reduction_impl(std::shared_ptr<const OmpExecutor> exec,
 #pragma omp parallel num_threads(num_threads)
         {
             const auto thread_id = static_cast<int64>(omp_get_thread_num());
-            const auto begin = thread_id * work_per_thread;
-            const auto end = std::min(begin + work_per_thread, cols);
-            for (int64 row = 0; row < rows; row++) {
-                auto local_partial = identity;
-                for (int64 col = begin; col < end; col++) {
-                    local_partial = op(local_partial, [&]() {
-                        return fn(row, col, args...);
-                    }());
+            if (thread_id < num_threads) {
+                const auto begin = thread_id * work_per_thread;
+                const auto end = std::min(begin + work_per_thread, cols);
+                for (int64 row = 0; row < rows; row++) {
+                    auto local_partial = identity;
+                    for (int64 col = begin; col < end; col++) {
+                        local_partial = op(local_partial, [&]() {
+                            return fn(row, col, args...);
+                        }());
+                    }
+                    partial[row * temp_elems_per_row + thread_id] =
+                        local_partial;
                 }
-                partial[row * temp_elems_per_row + thread_id] = local_partial;
             }
         }
         // then accumulate the partial sums and write to result
@@ -334,8 +345,9 @@ void run_kernel_col_reduction_sized_impl(
         // storage than the input vector
         const auto reduction_size = std::min(
             rows, ceildiv(reduction_kernel_oversubscription * available_threads,
-                          cols));
-        const auto rows_per_thread = ceildiv(rows, reduction_size);
+                          std::max<int64>(cols, 1)));
+        const auto rows_per_thread =
+            ceildiv(rows, std::max<int64>(reduction_size, 1));
         const auto required_storage = sizeof(ValueType) * cols * reduction_size;
         if (tmp.get_num_elems() < required_storage) {
             tmp.resize_and_reset(required_storage);

From 8cb678ac36cff7bdc95bd5a0605c3d794ea71343 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Wed, 2 Aug 2023 14:15:16 +0200
Subject: [PATCH 103/583] simplify size calculations

---
 test/base/kernel_launch_generic.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/base/kernel_launch_generic.cpp b/test/base/kernel_launch_generic.cpp
index bc4119d2806..d4a0f83c819 100644
--- a/test/base/kernel_launch_generic.cpp
+++ b/test/base/kernel_launch_generic.cpp
@@ -382,7 +382,7 @@ void run1d_reduction_cached(std::shared_ptr<gko::EXEC_TYPE> exec,
                   static_cast<int64>(size));
         // The temporary storage (used for partial sums) must be smaller than
         // the input array
-        ASSERT_LE(temp.get_num_elems() / sizeof(int64), size);
+        ASSERT_LE(temp.get_num_elems(), size * sizeof(int64));
     }
 }
 
@@ -477,7 +477,7 @@ void run2d_reduction_cached(std::shared_ptr<gko::EXEC_TYPE> exec,
                   static_cast<int64>(dim[0] + dim[1]));
         // The temporary storage (used for partial sums) must be smaller than
         // the input array
-        ASSERT_LE(temp.get_num_elems() / sizeof(int64), dim[0] * dim[1]);
+        ASSERT_LE(temp.get_num_elems(), dim[0] * dim[1] * sizeof(int64));
     }
 }
 
@@ -569,7 +569,7 @@ void run2d_row_reduction_cached(std::shared_ptr<gko::EXEC_TYPE> exec,
         GKO_ASSERT_ARRAY_EQ(host_ref, output);
         // The temporary storage (used for partial sums) must be smaller than
         // the input array
-        ASSERT_LE(temp.get_num_elems() / sizeof(int64), dim[0] * dim[1]);
+        ASSERT_LE(temp.get_num_elems(), dim[0] * dim[1] * sizeof(int64));
     }
 }
 
@@ -660,7 +660,7 @@ void run2d_col_reduction_cached(std::shared_ptr<gko::EXEC_TYPE> exec,
             dim, temp);
 
         GKO_ASSERT_ARRAY_EQ(host_ref, output);
-        ASSERT_LE(temp.get_num_elems() / sizeof(int64), dim[0] * dim[1]);
+        ASSERT_LE(temp.get_num_elems(), dim[0] * dim[1] * sizeof(int64));
     }
 }
 

From ff410a8e88707162c735e8d05968b403ff55ab21 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Fri, 30 Jun 2023 15:31:12 +0200
Subject: [PATCH 104/583] Add BatchDense class, kernels and tests.

Co-authored-by: Aditya Kashi <aditya.kashi@mail.mcgill.ca>
Co-authored-by: Isha Aggarwal <aggarwal2000chd@gmail.com>
---
 .../matrix/batch_vector_kernels.hpp.inc       |  196 +++
 core/CMakeLists.txt                           |    1 +
 core/matrix/batch_vector.cpp                  |  469 +++++++
 core/matrix/batch_vector_kernels.hpp          |  284 +++++
 core/test/matrix/batch_dense.cpp              |  520 ++++++++
 cuda/CMakeLists.txt                           |    1 +
 cuda/matrix/batch_vector_kernels.cu           |  434 +++++++
 hip/CMakeLists.txt                            |    1 +
 hip/matrix/batch_vector_kernels.hip.cpp       |  449 +++++++
 include/ginkgo/core/base/dim.hpp              |  186 +++
 include/ginkgo/core/matrix/batch_vector.hpp   | 1093 +++++++++++++++++
 omp/CMakeLists.txt                            |    1 +
 omp/matrix/batch_vector_kernels.cpp           |  614 +++++++++
 reference/CMakeLists.txt                      |    1 +
 reference/matrix/batch_vector_kernels.cpp     |  580 +++++++++
 reference/matrix/batch_vector_kernels.hpp.inc |  392 ++++++
 .../test/matrix/batch_vector_kernels.cpp      | 1023 +++++++++++++++
 test/matrix/batch_vector_kernels.cpp          |  433 +++++++
 18 files changed, 6678 insertions(+)
 create mode 100644 common/cuda_hip/matrix/batch_vector_kernels.hpp.inc
 create mode 100644 core/matrix/batch_vector.cpp
 create mode 100644 core/matrix/batch_vector_kernels.hpp
 create mode 100644 core/test/matrix/batch_dense.cpp
 create mode 100644 cuda/matrix/batch_vector_kernels.cu
 create mode 100644 hip/matrix/batch_vector_kernels.hip.cpp
 create mode 100644 include/ginkgo/core/matrix/batch_vector.hpp
 create mode 100644 omp/matrix/batch_vector_kernels.cpp
 create mode 100644 reference/matrix/batch_vector_kernels.cpp
 create mode 100644 reference/matrix/batch_vector_kernels.hpp.inc
 create mode 100644 reference/test/matrix/batch_vector_kernels.cpp
 create mode 100644 test/matrix/batch_vector_kernels.cpp

diff --git a/common/cuda_hip/matrix/batch_vector_kernels.hpp.inc b/common/cuda_hip/matrix/batch_vector_kernels.hpp.inc
new file mode 100644
index 00000000000..0eb86996c81
--- /dev/null
+++ b/common/cuda_hip/matrix/batch_vector_kernels.hpp.inc
@@ -0,0 +1,196 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+
+/**
+ * Copies the values of vector into another.
+ *
+ * @param num_rows  Length of vector.
+ * @param in  Vector to copy from.
+ * @param out  Vector to copy into.
+ */
+template <typename ValueType>
+__device__ __forceinline__ void single_copy(const int num_rows,
+                                            const ValueType* const in,
+                                            ValueType* const out)
+{
+    for (int iz = threadIdx.x; iz < num_rows; iz += blockDim.x) {
+        out[iz] = in[iz];
+    }
+}
+
+template <typename ValueType>
+__global__ __launch_bounds__(default_block_size) void single_copy(
+    const size_type num_batch, const int num_rows,
+    const ValueType* const __restrict__ in, ValueType* const __restrict__ out)
+{
+    for (size_type ibatch = blockIdx.x; ibatch < num_batch;
+         ibatch += gridDim.x) {
+        const auto in_b = gko::batch::batch_entry_ptr(in, 1, num_rows, ibatch);
+        const auto out_b =
+            gko::batch::batch_entry_ptr(out, 1, num_rows, ibatch);
+        single_copy(num_rows, in_b, out_b);
+    }
+}
+
+
+/**
+ * Adds a scaled vector to another.
+ *
+ * @param num_rows  Common length of both vectors.
+ * @param alpha  Scaling factor.
+ * @param[in] x  Vector to scale and add.
+ * @param[in,out] y  Vector to add to.
+ */
+template <typename ValueType>
+__device__ __forceinline__ void single_add_scaled(const int num_rows,
+                                                  const ValueType alpha,
+                                                  const ValueType* const x,
+                                                  ValueType* const y)
+{
+    for (int li = threadIdx.x; li < num_rows; li += blockDim.x) {
+        y[li] += alpha * x[li];
+    }
+}
+
+template <typename ValueType>
+__global__ __launch_bounds__(default_block_size) void single_add_scaled(
+    const size_type num_batch, const int num_rows,
+    const ValueType* const __restrict__ alpha,
+    const ValueType* const __restrict__ x, ValueType* const __restrict__ y)
+{
+    for (size_type ibatch = blockIdx.x; ibatch < num_batch;
+         ibatch += gridDim.x) {
+        const auto x_b = gko::batch::batch_entry_ptr(x, 1, num_rows, ibatch);
+        const auto y_b = gko::batch::batch_entry_ptr(y, 1, num_rows, ibatch);
+        single_add_scaled(num_rows, alpha[0], x_b, y_b);
+    }
+}
+
+
+/**
+ * Computes the 2-norm of a vector in global or shared memory.
+ *
+ * @param x  A row-major vector (only 1 column).
+ * @param result  Norm value.
+ */
+template <typename ValueType>
+__device__ __forceinline__ void single_compute_norm2(
+    group::thread_block_tile<config::warp_size>& warp_grp, const int num_rows,
+    const ValueType* const x, remove_complex<ValueType>& result)
+{
+    using real_type = typename gko::remove_complex<ValueType>;
+    real_type val = zero<real_type>();
+
+    for (int r = warp_grp.thread_rank(); r < num_rows; r += warp_grp.size()) {
+        val += squared_norm(x[r]);
+    }
+
+    // warp level reduction
+#pragma unroll
+    for (int j = config::warp_size / 2; j > 0; j /= 2) {
+        val += warp_grp.shfl_down(val, j);
+    }
+
+    if (warp_grp.thread_rank() == 0) {
+        result = sqrt(val);
+    }
+}
+
+
+template <typename ValueType>
+__global__ __launch_bounds__(default_block_size) void single_compute_norm2(
+    const size_type num_batch, const int num_rows,
+    const ValueType* const __restrict__ x,
+    remove_complex<ValueType>* const __restrict__ result)
+{
+    auto warp_grp =
+        group::tiled_partition<config::warp_size>(group::this_thread_block());
+    for (size_type ibatch = blockIdx.x; ibatch < num_batch;
+         ibatch += gridDim.x) {
+        const auto x_b = gko::batch::batch_entry_ptr(x, 1, num_rows, ibatch);
+        const auto r_b = gko::batch::batch_entry_ptr(result, 1, 1, ibatch);
+        if (threadIdx.x / config::warp_size == 0) {
+            single_compute_norm2(warp_grp, num_rows, x_b, r_b[0]);
+        }
+    }
+}
+
+
+/**
+ * Computes the dot product of some column vectors in global or shared memory.
+ *
+ * @param result  Holds dot product value for vector in x and y.
+ */
+template <typename ValueType>
+__device__ __forceinline__ void single_compute_dot_product(
+    group::thread_block_tile<config::warp_size>& warp_grp, const int num_rows,
+    const ValueType* const x, const ValueType* const y, ValueType& result)
+{
+    ValueType val = zero<ValueType>();
+
+    for (int r = warp_grp.thread_rank(); r < num_rows; r += warp_grp.size()) {
+        val += conj(x[r]) * y[r];
+    }
+
+    // warp level reduction
+#pragma unroll
+    for (int j = config::warp_size / 2; j > 0; j /= 2) {
+        val += warp_grp.shfl_down(val, j);
+    }
+
+    if (warp_grp.thread_rank() == 0) {
+        result = val;
+    }
+}
+
+
+// clang-format off
+template <typename ValueType>
+__global__ __launch_bounds__(default_block_size)
+void single_compute_dot_product(const size_type num_batch,
+                                const int num_rows,
+                                const ValueType *const __restrict__ x,
+                                const ValueType *const __restrict__ y,
+                                ValueType *const __restrict__ result)
+// clang-format on
+{
+    auto warp_grp =
+        group::tiled_partition<config::warp_size>(group::this_thread_block());
+    for (size_type ibatch = blockIdx.x; ibatch < num_batch;
+         ibatch += gridDim.x) {
+        const auto x_b = gko::batch::batch_entry_ptr(x, 1, num_rows, ibatch);
+        const auto y_b = gko::batch::batch_entry_ptr(y, 1, num_rows, ibatch);
+        const auto r_b = gko::batch::batch_entry_ptr(result, 1, 1, ibatch);
+        single_compute_dot_product(warp_grp, num_rows, x_b, y_b, r_b[0]);
+    }
+}
diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
index 49cf89b66d6..03d558562dc 100644
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -38,6 +38,7 @@ target_sources(ginkgo
     log/vtune.cpp
     log/record.cpp
     log/stream.cpp
+    matrix/batch_vector.cpp
     matrix/coo.cpp
     matrix/csr.cpp
     matrix/dense.cpp
diff --git a/core/matrix/batch_vector.cpp b/core/matrix/batch_vector.cpp
new file mode 100644
index 00000000000..4449516d5a1
--- /dev/null
+++ b/core/matrix/batch_vector.cpp
@@ -0,0 +1,469 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/matrix/batch_dense.hpp>
+
+
+#include <algorithm>
+#include <type_traits>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/exception.hpp>
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/utils.hpp>
+#include <ginkgo/core/matrix/batch_csr.hpp>
+#include <ginkgo/core/matrix/batch_diagonal.hpp>
+#include <ginkgo/core/matrix/batch_identity.hpp>
+
+
+#include "core/matrix/batch_dense_kernels.hpp"
+
+
+namespace gko {
+namespace matrix {
+namespace batch_dense {
+
+
+GKO_REGISTER_OPERATION(simple_apply, batch_dense::simple_apply);
+GKO_REGISTER_OPERATION(apply, batch_dense::apply);
+GKO_REGISTER_OPERATION(scale, batch_dense::scale);
+GKO_REGISTER_OPERATION(add_scaled, batch_dense::add_scaled);
+GKO_REGISTER_OPERATION(add_scale, batch_dense::add_scale);
+GKO_REGISTER_OPERATION(convergence_add_scaled,
+                       batch_dense::convergence_add_scaled);
+GKO_REGISTER_OPERATION(add_scaled_diag, batch_dense::add_scaled_diag);
+GKO_REGISTER_OPERATION(compute_dot, batch_dense::compute_dot);
+GKO_REGISTER_OPERATION(convergence_compute_dot,
+                       batch_dense::convergence_compute_dot);
+GKO_REGISTER_OPERATION(compute_norm2, batch_dense::compute_norm2);
+GKO_REGISTER_OPERATION(convergence_compute_norm2,
+                       batch_dense::convergence_compute_norm2);
+GKO_REGISTER_OPERATION(copy, batch_dense::copy);
+GKO_REGISTER_OPERATION(convergence_copy, batch_dense::convergence_copy);
+GKO_REGISTER_OPERATION(convert_to_batch_csr, batch_dense::convert_to_batch_csr);
+GKO_REGISTER_OPERATION(count_nonzeros, batch_dense::count_nonzeros);
+GKO_REGISTER_OPERATION(calculate_max_nnz_per_row,
+                       batch_dense::calculate_max_nnz_per_row);
+GKO_REGISTER_OPERATION(calculate_nonzeros_per_row,
+                       batch_dense::calculate_nonzeros_per_row);
+GKO_REGISTER_OPERATION(calculate_total_cols, batch_dense::calculate_total_cols);
+GKO_REGISTER_OPERATION(transpose, batch_dense::transpose);
+GKO_REGISTER_OPERATION(conj_transpose, batch_dense::conj_transpose);
+GKO_REGISTER_OPERATION(add_scaled_identity, batch_dense::add_scaled_identity);
+
+
+}  // namespace batch_dense
+
+
+template <typename ValueType>
+void BatchDense<ValueType>::apply_impl(const BatchLinOp* b, BatchLinOp* x) const
+{
+    // TODO: Remove this when non-uniform batching kernels have been
+    // implemented
+    if (!this->get_size().stores_equal_sizes() ||
+        !this->get_stride().stores_equal_strides()) {
+        GKO_NOT_IMPLEMENTED;
+    }
+    this->get_executor()->run(batch_dense::make_simple_apply(
+        this, as<BatchDense<ValueType>>(b), as<BatchDense<ValueType>>(x)));
+}
+
+
+template <typename ValueType>
+void BatchDense<ValueType>::apply_impl(const BatchLinOp* alpha,
+                                       const BatchLinOp* b,
+                                       const BatchLinOp* beta,
+                                       BatchLinOp* x) const
+{
+    if (!this->get_size().stores_equal_sizes() ||
+        !this->get_stride().stores_equal_strides()) {
+        GKO_NOT_IMPLEMENTED;
+    }
+    if (auto bid = dynamic_cast<const BatchIdentity<ValueType>*>(b)) {
+        if (auto xdense = dynamic_cast<BatchDense<ValueType>*>(x)) {
+            xdense->add_scale(alpha, this, beta);
+        } else {
+            GKO_NOT_SUPPORTED(x);
+        }
+    } else {
+        this->get_executor()->run(batch_dense::make_apply(
+            as<BatchDense<ValueType>>(alpha), this,
+            as<BatchDense<ValueType>>(b), as<BatchDense<ValueType>>(beta),
+            as<BatchDense<ValueType>>(x)));
+    }
+}
+
+
+template <typename ValueType>
+void BatchDense<ValueType>::scale_impl(const BatchLinOp* alpha)
+{
+    auto batch_alpha = as<BatchDense<ValueType>>(alpha);
+    GKO_ASSERT_BATCH_EQUAL_ROWS(
+        batch_alpha, batch_dim<2>(this->get_num_batch_entries(), dim<2>(1, 1)));
+    for (size_type b = 0; b < batch_alpha->get_num_batch_entries(); ++b) {
+        if (batch_alpha->get_size().at(b)[1] != 1) {
+            // different alpha for each column
+            GKO_ASSERT_BATCH_EQUAL_COLS(this, batch_alpha);
+        }
+    }
+    auto exec = this->get_executor();
+    exec->run(batch_dense::make_scale(batch_alpha, this));
+}
+
+
+template <typename ValueType>
+void BatchDense<ValueType>::add_scaled_impl(const BatchLinOp* alpha,
+                                            const BatchLinOp* b)
+{
+    auto batch_alpha = as<BatchDense<ValueType>>(alpha);
+    auto batch_b = as<BatchDense<ValueType>>(b);
+    GKO_ASSERT_BATCH_EQUAL_ROWS(
+        batch_alpha, batch_dim<2>(this->get_num_batch_entries(), dim<2>(1, 1)));
+    for (size_type b = 0; b < batch_alpha->get_num_batch_entries(); ++b) {
+        if (batch_alpha->get_size().at(b)[1] != 1) {
+            // different alpha for each column
+            GKO_ASSERT_BATCH_EQUAL_COLS(this, batch_alpha);
+        }
+    }
+    GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(this, batch_b);
+    auto exec = this->get_executor();
+
+    exec->run(batch_dense::make_add_scaled(batch_alpha, batch_b, this));
+}
+
+
+template <typename ValueType>
+void BatchDense<ValueType>::add_scale(const BatchLinOp* const alpha,
+                                      const BatchLinOp* const a,
+                                      const BatchLinOp* const beta)
+{
+    auto batch_alpha = as<BatchDense<ValueType>>(alpha);
+    auto batch_beta = as<BatchDense<ValueType>>(beta);
+    auto batch_a = as<BatchDense<ValueType>>(a);
+    GKO_ASSERT_BATCH_EQUAL_ROWS(
+        batch_alpha, batch_dim<2>(this->get_num_batch_entries(), dim<2>(1, 1)));
+    if (batch_alpha->get_size().stores_equal_sizes()) {
+        if (batch_alpha->get_size().at(0)[1] != 1) {
+            // different alpha for each column
+            GKO_ASSERT_BATCH_EQUAL_COLS(this, batch_alpha);
+        }
+    } else {
+        for (size_type b = 0; b < batch_alpha->get_num_batch_entries(); ++b) {
+            if (batch_alpha->get_size().at(b)[1] != 1) {
+                GKO_ASSERT(this->get_size().at(b)[1] ==
+                           batch_alpha->get_size().at(b)[1]);
+            }
+        }
+    }
+    GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(this, batch_a);
+    GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(batch_alpha, batch_beta);
+    this->get_executor()->run(
+        batch_dense::make_add_scale(batch_alpha, batch_a, batch_beta, this));
+}
+
+
+inline const batch_dim<2> get_col_sizes(const batch_dim<2>& sizes)
+{
+    auto col_sizes = std::vector<dim<2>>(sizes.get_num_batch_entries());
+    for (size_type i = 0; i < col_sizes.size(); ++i) {
+        col_sizes[i] = dim<2>(1, sizes.at(i)[1]);
+    }
+    return batch_dim<2>(col_sizes);
+}
+
+
+template <typename ValueType>
+void BatchDense<ValueType>::compute_dot_impl(const BatchLinOp* b,
+                                             BatchLinOp* result) const
+{
+    auto batch_result = as<BatchDense<ValueType>>(result);
+    auto batch_b = as<BatchDense<ValueType>>(b);
+    GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(this, batch_b);
+    GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(batch_result,
+                                      get_col_sizes(this->get_size()));
+    auto exec = this->get_executor();
+    exec->run(batch_dense::make_compute_dot(this, batch_b, batch_result));
+}
+
+
+template <typename ValueType>
+void BatchDense<ValueType>::compute_norm2_impl(BatchLinOp* result) const
+{
+    using NormVector = BatchDense<remove_complex<ValueType>>;
+    auto batch_result = as<NormVector>(result);
+    GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(batch_result,
+                                      get_col_sizes(this->get_size()));
+    auto exec = this->get_executor();
+    exec->run(batch_dense::make_compute_norm2(as<BatchDense<ValueType>>(this),
+                                              batch_result));
+}
+
+
+template <typename ValueType>
+void BatchDense<ValueType>::convert_to(
+    BatchDense<next_precision<ValueType>>* result) const
+{
+    result->values_ = this->values_;
+    result->stride_ = this->stride_;
+    result->num_elems_per_batch_cumul_ = this->num_elems_per_batch_cumul_;
+    result->set_size(this->get_size());
+}
+
+
+template <typename ValueType>
+void BatchDense<ValueType>::move_to(
+    BatchDense<next_precision<ValueType>>* result)
+{
+    this->convert_to(result);
+}
+
+
+template <typename ValueType>
+void BatchDense<ValueType>::convert_to(BatchCsr<ValueType, int32>* result) const
+{
+    auto exec = this->get_executor();
+
+    auto batch_size = this->get_size();
+    if (!batch_size.stores_equal_sizes()) {
+        GKO_NOT_IMPLEMENTED;
+    }
+
+    auto num_stored_nonzeros =
+        array<size_type>{exec->get_master(), this->get_num_batch_entries()};
+    exec->run(
+        batch_dense::make_count_nonzeros(this, num_stored_nonzeros.get_data()));
+    gko::dim<2> main_size = this->get_size().at(0);
+    const size_type num_nnz =
+        num_stored_nonzeros.get_data() ? num_stored_nonzeros.get_data()[0] : 0;
+    auto tmp = BatchCsr<ValueType, int32>::create(
+        exec, this->get_num_batch_entries(), main_size, num_nnz);
+    exec->run(batch_dense::make_convert_to_batch_csr(this, tmp.get()));
+    tmp->move_to(result);
+}
+
+
+template <typename ValueType>
+void BatchDense<ValueType>::move_to(BatchCsr<ValueType, int32>* result)
+{
+    this->convert_to(result);
+}
+
+
+template <typename ValueType>
+void BatchDense<ValueType>::convert_to(
+    BatchDiagonal<ValueType>* const result) const
+{
+    auto exec = this->get_executor();
+
+    auto batch_size = this->get_size();
+    if (!batch_size.stores_equal_sizes()) {
+        GKO_NOT_IMPLEMENTED;
+    }
+    GKO_ASSERT_BATCH_HAS_SINGLE_COLUMN(this);
+    if (this->get_stride().at(0) != 1) {
+        GKO_NOT_IMPLEMENTED;
+    }
+    auto temp = BatchDiagonal<ValueType>::create(
+        exec, batch_dim<2>{batch_size.get_num_batch_entries(),
+                           dim<2>{batch_size.at(0)[0]}});
+    exec->copy(this->get_num_stored_elements(), this->get_const_values(),
+               temp->get_values());
+    result->copy_from(temp.get());
+}
+
+
+template <typename ValueType>
+void BatchDense<ValueType>::move_to(BatchDiagonal<ValueType>* const result)
+{
+    auto exec = this->get_executor();
+
+    auto batch_size = this->get_size();
+    if (!batch_size.stores_equal_sizes()) {
+        GKO_NOT_IMPLEMENTED;
+    }
+    GKO_ASSERT_BATCH_HAS_SINGLE_COLUMN(this);
+    if (this->get_stride().at(0) != 1) {
+        GKO_NOT_IMPLEMENTED;
+    }
+    auto temp = BatchDiagonal<ValueType>::create(
+        exec,
+        batch_dim<2>{batch_size.get_num_batch_entries(),
+                     dim<2>{batch_size.at(0)[0]}},
+        std::move(this->values_));
+    *result = std::move(*temp);
+    // set the size of this to 0
+    this->set_size(batch_dim<2>());
+}
+
+
+namespace {
+
+
+template <typename MatrixType, typename MatrixData>
+inline void read_impl(MatrixType* mtx, const std::vector<MatrixData>& data)
+{
+    auto batch_sizes = std::vector<dim<2>>(data.size());
+    size_type ind = 0;
+    for (const auto& b : data) {
+        batch_sizes[ind] = b.size;
+        ++ind;
+    }
+    auto tmp = MatrixType::create(mtx->get_executor()->get_master(),
+                                  batch_dim<2>(batch_sizes));
+    for (size_type b = 0; b < data.size(); ++b) {
+        size_type ind = 0;
+        for (size_type row = 0; row < data[b].size[0]; ++row) {
+            for (size_type col = 0; col < data[b].size[1]; ++col) {
+                if (ind < data[b].nonzeros.size() &&
+                    data[b].nonzeros[ind].row == row &&
+                    data[b].nonzeros[ind].column == col) {
+                    tmp->at(b, row, col) = data[b].nonzeros[ind].value;
+                    ++ind;
+                } else {
+                    tmp->at(b, row, col) =
+                        zero<typename MatrixType::value_type>();
+                }
+            }
+        }
+    }
+    tmp->move_to(mtx);
+}
+
+
+}  // namespace
+
+
+template <typename ValueType>
+void BatchDense<ValueType>::read(const std::vector<mat_data>& data)
+{
+    read_impl(this, data);
+}
+
+
+template <typename ValueType>
+void BatchDense<ValueType>::read(const std::vector<mat_data32>& data)
+{
+    read_impl(this, data);
+}
+
+
+namespace {
+
+
+template <typename MatrixType, typename MatrixData>
+inline void write_impl(const MatrixType* mtx, std::vector<MatrixData>& data)
+{
+    std::unique_ptr<const BatchLinOp> op{};
+    const MatrixType* tmp{};
+    if (mtx->get_executor()->get_master() != mtx->get_executor()) {
+        op = mtx->clone(mtx->get_executor()->get_master());
+        tmp = static_cast<const MatrixType*>(op.get());
+    } else {
+        tmp = mtx;
+    }
+
+    data = std::vector<MatrixData>(mtx->get_num_batch_entries());
+    for (size_type b = 0; b < mtx->get_num_batch_entries(); ++b) {
+        data[b] = {mtx->get_size().at(b), {}};
+        for (size_type row = 0; row < data[b].size[0]; ++row) {
+            for (size_type col = 0; col < data[b].size[1]; ++col) {
+                if (tmp->at(b, row, col) !=
+                    zero<typename MatrixType::value_type>()) {
+                    data[b].nonzeros.emplace_back(row, col,
+                                                  tmp->at(b, row, col));
+                }
+            }
+        }
+    }
+}
+
+
+}  // namespace
+
+
+template <typename ValueType>
+void BatchDense<ValueType>::write(std::vector<mat_data>& data) const
+{
+    write_impl(this, data);
+}
+
+
+template <typename ValueType>
+void BatchDense<ValueType>::write(std::vector<mat_data32>& data) const
+{
+    write_impl(this, data);
+}
+
+
+template <typename ValueType>
+std::unique_ptr<BatchLinOp> BatchDense<ValueType>::transpose() const
+{
+    auto exec = this->get_executor();
+    auto trans_cpy = BatchDense::create(exec, gko::transpose(this->get_size()));
+
+    exec->run(batch_dense::make_transpose(this, trans_cpy.get()));
+
+    return std::move(trans_cpy);
+}
+
+
+template <typename ValueType>
+std::unique_ptr<BatchLinOp> BatchDense<ValueType>::conj_transpose() const
+{
+    auto exec = this->get_executor();
+    auto trans_cpy = BatchDense::create(exec, gko::transpose(this->get_size()));
+
+    exec->run(batch_dense::make_conj_transpose(this, trans_cpy.get()));
+    return std::move(trans_cpy);
+}
+
+
+template <typename ValueType>
+void BatchDense<ValueType>::add_scaled_identity_impl(const BatchLinOp* const a,
+                                                     const BatchLinOp* const b)
+{
+    this->get_executor()->run(batch_dense::make_add_scaled_identity(
+        as<BatchDense<ValueType>>(a), as<BatchDense<ValueType>>(b), this));
+}
+
+
+#define GKO_DECLARE_BATCH_DENSE_MATRIX(_type) class BatchDense<_type>
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_MATRIX);
+
+
+}  // namespace matrix
+
+
+}  // namespace gko
diff --git a/core/matrix/batch_vector_kernels.hpp b/core/matrix/batch_vector_kernels.hpp
new file mode 100644
index 00000000000..91dd3e6f5b7
--- /dev/null
+++ b/core/matrix/batch_vector_kernels.hpp
@@ -0,0 +1,284 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CORE_MATRIX_BATCH_DENSE_KERNELS_HPP_
+#define GKO_CORE_MATRIX_BATCH_DENSE_KERNELS_HPP_
+
+
+#include <ginkgo/core/matrix/batch_dense.hpp>
+
+
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/matrix/diagonal.hpp>
+
+
+namespace gko {
+namespace kernels {
+
+
+#define GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL(_type)         \
+    void simple_apply(std::shared_ptr<const DefaultExecutor> exec, \
+                      const matrix::BatchDense<_type>* a,          \
+                      const matrix::BatchDense<_type>* b,          \
+                      matrix::BatchDense<_type>* c)
+
+#define GKO_DECLARE_BATCH_DENSE_APPLY_KERNEL(_type)         \
+    void apply(std::shared_ptr<const DefaultExecutor> exec, \
+               const matrix::BatchDense<_type>* alpha,      \
+               const matrix::BatchDense<_type>* a,          \
+               const matrix::BatchDense<_type>* b,          \
+               const matrix::BatchDense<_type>* beta,       \
+               matrix::BatchDense<_type>* c)
+
+#define GKO_DECLARE_BATCH_DENSE_SCALE_KERNEL(_type)         \
+    void scale(std::shared_ptr<const DefaultExecutor> exec, \
+               const matrix::BatchDense<_type>* alpha,      \
+               matrix::BatchDense<_type>* x)
+
+#define GKO_DECLARE_BATCH_DENSE_ADD_SCALED_KERNEL(_type)         \
+    void add_scaled(std::shared_ptr<const DefaultExecutor> exec, \
+                    const matrix::BatchDense<_type>* alpha,      \
+                    const matrix::BatchDense<_type>* x,          \
+                    matrix::BatchDense<_type>* y)
+
+#define GKO_DECLARE_BATCH_DENSE_ADD_SCALE_KERNEL(_type)         \
+    void add_scale(std::shared_ptr<const DefaultExecutor> exec, \
+                   const matrix::BatchDense<_type>* alpha,      \
+                   const matrix::BatchDense<_type>* x,          \
+                   const matrix::BatchDense<_type>* beta,       \
+                   matrix::BatchDense<_type>* y)
+
+#define GKO_DECLARE_BATCH_DENSE_CONVERGENCE_ADD_SCALED_KERNEL(_type)         \
+    void convergence_add_scaled(std::shared_ptr<const DefaultExecutor> exec, \
+                                const matrix::BatchDense<_type>* alpha,      \
+                                const matrix::BatchDense<_type>* x,          \
+                                matrix::BatchDense<_type>* y,                \
+                                const uint32& converged)
+
+#define GKO_DECLARE_BATCH_DENSE_ADD_SCALED_DIAG_KERNEL(_type)         \
+    void add_scaled_diag(std::shared_ptr<const DefaultExecutor> exec, \
+                         const matrix::BatchDense<_type>* alpha,      \
+                         const matrix::Diagonal<_type>* x,            \
+                         matrix::BatchDense<_type>* y)
+
+#define GKO_DECLARE_BATCH_DENSE_COMPUTE_DOT_KERNEL(_type)         \
+    void compute_dot(std::shared_ptr<const DefaultExecutor> exec, \
+                     const matrix::BatchDense<_type>* x,          \
+                     const matrix::BatchDense<_type>* y,          \
+                     matrix::BatchDense<_type>* result)
+
+
+#define GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COMPUTE_DOT_KERNEL(_type)         \
+    void convergence_compute_dot(std::shared_ptr<const DefaultExecutor> exec, \
+                                 const matrix::BatchDense<_type>* x,          \
+                                 const matrix::BatchDense<_type>* y,          \
+                                 matrix::BatchDense<_type>* result,           \
+                                 const uint32& converged)
+
+#define GKO_DECLARE_BATCH_DENSE_COMPUTE_NORM2_KERNEL(_type)         \
+    void compute_norm2(std::shared_ptr<const DefaultExecutor> exec, \
+                       const matrix::BatchDense<_type>* x,          \
+                       matrix::BatchDense<remove_complex<_type>>* result)
+
+#define GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COMPUTE_NORM2_KERNEL(_type) \
+    void convergence_compute_norm2(                                     \
+        std::shared_ptr<const DefaultExecutor> exec,                    \
+        const matrix::BatchDense<_type>* x,                             \
+        matrix::BatchDense<remove_complex<_type>>* result,              \
+        const uint32& converged)
+
+
+#define GKO_DECLARE_BATCH_DENSE_COPY_KERNEL(_type)         \
+    void copy(std::shared_ptr<const DefaultExecutor> exec, \
+              const matrix::BatchDense<_type>* x,          \
+              matrix::BatchDense<_type>* result)
+
+#define GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COPY_KERNEL(_type)         \
+    void convergence_copy(std::shared_ptr<const DefaultExecutor> exec, \
+                          const matrix::BatchDense<_type>* x,          \
+                          matrix::BatchDense<_type>* result,           \
+                          const uint32& converged)
+
+#define GKO_DECLARE_BATCH_DENSE_CONVERT_TO_BATCH_CSR_KERNEL(_type, _prec)  \
+    void convert_to_batch_csr(std::shared_ptr<const DefaultExecutor> exec, \
+                              const matrix::BatchDense<_type>* source,     \
+                              matrix::BatchCsr<_type, _prec>* other)
+
+#define GKO_DECLARE_BATCH_DENSE_COUNT_NONZEROS_KERNEL(_type)         \
+    void count_nonzeros(std::shared_ptr<const DefaultExecutor> exec, \
+                        const matrix::BatchDense<_type>* source,     \
+                        size_type* result)
+
+#define GKO_DECLARE_BATCH_DENSE_CALCULATE_MAX_NNZ_PER_ROW_KERNEL(_type) \
+    void calculate_max_nnz_per_row(                                     \
+        std::shared_ptr<const DefaultExecutor> exec,                    \
+        const matrix::BatchDense<_type>* source, size_type* result)
+
+#define GKO_DECLARE_BATCH_DENSE_CALCULATE_NONZEROS_PER_ROW_KERNEL(_type) \
+    void calculate_nonzeros_per_row(                                     \
+        std::shared_ptr<const DefaultExecutor> exec,                     \
+        const matrix::BatchDense<_type>* source, array<size_type>* result)
+
+#define GKO_DECLARE_BATCH_DENSE_CALCULATE_TOTAL_COLS_KERNEL(_type)  \
+    void calculate_total_cols(                                      \
+        std::shared_ptr<const DefaultExecutor> exec,                \
+        const matrix::BatchDense<_type>* source, size_type* result, \
+        const size_type* stride_factor, const size_type* slice_size)
+
+#define GKO_DECLARE_BATCH_DENSE_TRANSPOSE_KERNEL(_type)         \
+    void transpose(std::shared_ptr<const DefaultExecutor> exec, \
+                   const matrix::BatchDense<_type>* orig,       \
+                   matrix::BatchDense<_type>* trans)
+
+#define GKO_DECLARE_BATCH_DENSE_CONJ_TRANSPOSE_KERNEL(_type)         \
+    void conj_transpose(std::shared_ptr<const DefaultExecutor> exec, \
+                        const matrix::BatchDense<_type>* orig,       \
+                        matrix::BatchDense<_type>* trans)
+
+#define GKO_DECLARE_BATCH_DENSE_BATCH_SCALE_KERNEL(ValueType)             \
+    void batch_scale(std::shared_ptr<const DefaultExecutor> exec,         \
+                     const matrix::BatchDiagonal<ValueType>* left_scale,  \
+                     const matrix::BatchDiagonal<ValueType>* right_scale, \
+                     matrix::BatchDense<ValueType>* vec_to_scale)
+
+#define GKO_DECLARE_BATCH_DENSE_ADD_SCALED_IDENTITY_KERNEL(ValueType)     \
+    void add_scaled_identity(std::shared_ptr<const DefaultExecutor> exec, \
+                             const matrix::BatchDense<ValueType>* a,      \
+                             const matrix::BatchDense<ValueType>* b,      \
+                             matrix::BatchDense<ValueType>* mtx)
+
+
+#define GKO_DECLARE_ALL_AS_TEMPLATES                                           \
+    template <typename ValueType>                                              \
+    GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL(ValueType);                    \
+    template <typename ValueType>                                              \
+    GKO_DECLARE_BATCH_DENSE_APPLY_KERNEL(ValueType);                           \
+    template <typename ValueType>                                              \
+    GKO_DECLARE_BATCH_DENSE_SCALE_KERNEL(ValueType);                           \
+    template <typename ValueType>                                              \
+    GKO_DECLARE_BATCH_DENSE_ADD_SCALED_KERNEL(ValueType);                      \
+    template <typename ValueType>                                              \
+    GKO_DECLARE_BATCH_DENSE_ADD_SCALE_KERNEL(ValueType);                       \
+    template <typename ValueType>                                              \
+    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_ADD_SCALED_KERNEL(ValueType);          \
+    template <typename ValueType>                                              \
+    GKO_DECLARE_BATCH_DENSE_ADD_SCALED_DIAG_KERNEL(ValueType);                 \
+    template <typename ValueType>                                              \
+    GKO_DECLARE_BATCH_DENSE_COMPUTE_DOT_KERNEL(ValueType);                     \
+    template <typename ValueType>                                              \
+    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COMPUTE_DOT_KERNEL(ValueType);         \
+    template <typename ValueType>                                              \
+    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COMPUTE_DOT_KERNEL(ValueType);         \
+    template <typename ValueType>                                              \
+    GKO_DECLARE_BATCH_DENSE_COMPUTE_NORM2_KERNEL(ValueType);                   \
+    template <typename ValueType>                                              \
+    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COMPUTE_NORM2_KERNEL(ValueType);       \
+    template <typename ValueType>                                              \
+    GKO_DECLARE_BATCH_DENSE_COPY_KERNEL(ValueType);                            \
+    template <typename ValueType>                                              \
+    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COPY_KERNEL(ValueType);                \
+    template <typename ValueType>                                              \
+    GKO_DECLARE_BATCH_DENSE_BATCH_SCALE_KERNEL(ValueType);                     \
+    template <typename ValueType, typename IndexType>                          \
+    GKO_DECLARE_BATCH_DENSE_CONVERT_TO_BATCH_CSR_KERNEL(ValueType, IndexType); \
+    template <typename ValueType>                                              \
+    GKO_DECLARE_BATCH_DENSE_COUNT_NONZEROS_KERNEL(ValueType);                  \
+    template <typename ValueType>                                              \
+    GKO_DECLARE_BATCH_DENSE_CALCULATE_MAX_NNZ_PER_ROW_KERNEL(ValueType);       \
+    template <typename ValueType>                                              \
+    GKO_DECLARE_BATCH_DENSE_CALCULATE_NONZEROS_PER_ROW_KERNEL(ValueType);      \
+    template <typename ValueType>                                              \
+    GKO_DECLARE_BATCH_DENSE_CALCULATE_TOTAL_COLS_KERNEL(ValueType);            \
+    template <typename ValueType>                                              \
+    GKO_DECLARE_BATCH_DENSE_TRANSPOSE_KERNEL(ValueType);                       \
+    template <typename ValueType>                                              \
+    GKO_DECLARE_BATCH_DENSE_CONJ_TRANSPOSE_KERNEL(ValueType);                  \
+    template <typename ValueType>                                              \
+    GKO_DECLARE_BATCH_DENSE_BATCH_SCALE_KERNEL(ValueType);                     \
+    template <typename ValueType>                                              \
+    GKO_DECLARE_BATCH_DENSE_ADD_SCALED_IDENTITY_KERNEL(ValueType)
+
+
+namespace omp {
+namespace batch_dense {
+
+GKO_DECLARE_ALL_AS_TEMPLATES;
+
+}  // namespace batch_dense
+}  // namespace omp
+
+
+namespace cuda {
+namespace batch_dense {
+
+GKO_DECLARE_ALL_AS_TEMPLATES;
+
+}  // namespace batch_dense
+}  // namespace cuda
+
+
+namespace reference {
+namespace batch_dense {
+
+GKO_DECLARE_ALL_AS_TEMPLATES;
+
+}  // namespace batch_dense
+}  // namespace reference
+
+
+namespace hip {
+namespace batch_dense {
+
+GKO_DECLARE_ALL_AS_TEMPLATES;
+
+}  // namespace batch_dense
+}  // namespace hip
+
+
+namespace dpcpp {
+namespace batch_dense {
+
+GKO_DECLARE_ALL_AS_TEMPLATES;
+
+}  // namespace batch_dense
+}  // namespace dpcpp
+
+
+#undef GKO_DECLARE_ALL_AS_TEMPLATES
+
+
+}  // namespace kernels
+}  // namespace gko
+
+
+#endif  // GKO_CORE_MATRIX_BATCH_DENSE_KERNELS_HPP_
diff --git a/core/test/matrix/batch_dense.cpp b/core/test/matrix/batch_dense.cpp
new file mode 100644
index 00000000000..7db7469baf6
--- /dev/null
+++ b/core/test/matrix/batch_dense.cpp
@@ -0,0 +1,520 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/matrix/batch_dense.hpp>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/range.hpp>
+#include <ginkgo/core/matrix/dense.hpp>
+
+
+#include "core/test/utils.hpp"
+
+
+namespace {
+
+
+template <typename T>
+class BatchDense : public ::testing::Test {
+protected:
+    using value_type = T;
+    using DenseMtx = gko::matrix::Dense<value_type>;
+    using size_type = gko::size_type;
+    BatchDense()
+        : exec(gko::ReferenceExecutor::create()),
+          mtx(gko::batch_initialize<gko::matrix::BatchDense<value_type>>(
+              std::vector<size_type>{4, 3},
+              {{{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
+               {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}},
+              exec))
+    {}
+
+
+    static void assert_equal_to_original_mtx(
+        gko::matrix::BatchDense<value_type>* m)
+    {
+        ASSERT_EQ(m->get_num_batch_entries(), 2);
+        ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3));
+        ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3));
+        ASSERT_EQ(m->get_stride().at(0), 4);
+        ASSERT_EQ(m->get_stride().at(1), 3);
+        ASSERT_EQ(m->get_num_stored_elements(), (2 * 4) + (2 * 3));
+        ASSERT_EQ(m->get_num_stored_elements(0), 2 * 4);
+        ASSERT_EQ(m->get_num_stored_elements(1), 2 * 3);
+        EXPECT_EQ(m->at(0, 0, 0), value_type{-1.0});
+        EXPECT_EQ(m->at(0, 0, 1), value_type{2.0});
+        EXPECT_EQ(m->at(0, 0, 2), value_type{3.0});
+        EXPECT_EQ(m->at(0, 1, 0), value_type{-1.5});
+        EXPECT_EQ(m->at(0, 1, 1), value_type{2.5});
+        ASSERT_EQ(m->at(0, 1, 2), value_type{3.5});
+        EXPECT_EQ(m->at(1, 0, 0), value_type{1.0});
+        EXPECT_EQ(m->at(1, 0, 1), value_type{2.5});
+        EXPECT_EQ(m->at(1, 0, 2), value_type{3.0});
+        EXPECT_EQ(m->at(1, 1, 0), value_type{1.0});
+        EXPECT_EQ(m->at(1, 1, 1), value_type{2.0});
+        ASSERT_EQ(m->at(1, 1, 2), value_type{3.0});
+    }
+
+    static void assert_empty(gko::matrix::BatchDense<value_type>* m)
+    {
+        ASSERT_EQ(m->get_num_batch_entries(), 0);
+        ASSERT_EQ(m->get_num_stored_elements(), 0);
+    }
+
+    std::shared_ptr<const gko::Executor> exec;
+    std::unique_ptr<gko::matrix::BatchDense<value_type>> mtx;
+};
+
+TYPED_TEST_SUITE(BatchDense, gko::test::ValueTypes);
+
+
+TYPED_TEST(BatchDense, CanBeEmpty)
+{
+    auto empty = gko::matrix::BatchDense<TypeParam>::create(this->exec);
+    this->assert_empty(empty.get());
+}
+
+
+TYPED_TEST(BatchDense, ReturnsNullValuesArrayWhenEmpty)
+{
+    auto empty = gko::matrix::BatchDense<TypeParam>::create(this->exec);
+    ASSERT_EQ(empty->get_const_values(), nullptr);
+}
+
+
+TYPED_TEST(BatchDense, CanBeConstructedWithSize)
+{
+    using size_type = gko::size_type;
+    auto m = gko::matrix::BatchDense<TypeParam>::create(
+        this->exec,
+        std::vector<gko::dim<2>>{gko::dim<2>{2, 4}, gko::dim<2>{2, 3}});
+
+    ASSERT_EQ(m->get_num_batch_entries(), 2);
+    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 4));
+    ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 3));
+    EXPECT_EQ(m->get_stride().at(0), 4);
+    EXPECT_EQ(m->get_stride().at(1), 3);
+    ASSERT_EQ(m->get_num_stored_elements(), 14);
+    ASSERT_EQ(m->get_num_stored_elements(0), 8);
+    ASSERT_EQ(m->get_num_stored_elements(1), 6);
+}
+
+
+TYPED_TEST(BatchDense, CanBeConstructedWithSizeAndStride)
+{
+    using size_type = gko::size_type;
+    auto m = gko::matrix::BatchDense<TypeParam>::create(
+        this->exec, std::vector<gko::dim<2>>{gko::dim<2>{2, 3}},
+        std::vector<size_type>{4});
+
+    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3));
+    EXPECT_EQ(m->get_stride().at(0), 4);
+    ASSERT_EQ(m->get_num_stored_elements(), 8);
+}
+
+
+TYPED_TEST(BatchDense, CanBeConstructedFromExistingData)
+{
+    using value_type = typename TestFixture::value_type;
+    using size_type = gko::size_type;
+    // clang-format off
+    value_type data[] = {
+       1.0, 2.0, -1.0,
+       3.0, 4.0, -1.0,
+       3.0, 5.0, 1.0,
+       5.0, 6.0, -3.0};
+    // clang-format on
+
+    auto m = gko::matrix::BatchDense<TypeParam>::create(
+        this->exec,
+        std::vector<gko::dim<2>>{gko::dim<2>{2, 2}, gko::dim<2>{2, 2}},
+        gko::array<value_type>::view(this->exec, 12, data),
+        std::vector<size_type>{3, 3});
+
+    ASSERT_EQ(m->get_const_values(), data);
+    ASSERT_EQ(m->at(0, 0, 1), value_type{2.0});
+    ASSERT_EQ(m->at(0, 1, 2), value_type{-1.0});
+    ASSERT_EQ(m->at(1, 0, 1), value_type{5.0});
+    ASSERT_EQ(m->at(1, 1, 2), value_type{-3.0});
+}
+
+
+TYPED_TEST(BatchDense, CanBeConstructedFromExistingConstData)
+{
+    using value_type = typename TestFixture::value_type;
+    using size_type = gko::size_type;
+    // clang-format off
+    const value_type data[] = {
+       1.0, 2.0, -1.0,
+       3.0, 4.0, -1.0,
+       3.0, 5.0, 1.0,
+       5.0, 6.0, -3.0};
+    // clang-format on
+
+    auto m = gko::matrix::BatchDense<TypeParam>::create_const(
+        this->exec,
+        std::vector<gko::dim<2>>{gko::dim<2>{2, 2}, gko::dim<2>{2, 2}},
+        gko::array<value_type>::const_view(this->exec, 12, data),
+        std::vector<size_type>{3, 3});
+
+    ASSERT_EQ(m->get_const_values(), data);
+    ASSERT_EQ(m->at(0, 0, 1), value_type{2.0});
+    ASSERT_EQ(m->at(0, 1, 2), value_type{-1.0});
+    ASSERT_EQ(m->at(1, 0, 1), value_type{5.0});
+    ASSERT_EQ(m->at(1, 1, 2), value_type{-3.0});
+}
+
+
+TYPED_TEST(BatchDense, CanBeConstructedFromBatchDenseMatrices)
+{
+    using value_type = typename TestFixture::value_type;
+    using DenseMtx = typename TestFixture::DenseMtx;
+    using size_type = gko::size_type;
+    auto mat1 = gko::initialize<DenseMtx>(
+        3, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, this->exec);
+    auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
+                                          this->exec);
+
+    auto m = gko::matrix::BatchDense<TypeParam>::create(
+        this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get()});
+    auto m_ref = gko::matrix::BatchDense<TypeParam>::create(
+        this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get(), mat1.get(),
+                                           mat2.get(), mat1.get(), mat2.get()});
+    auto m2 =
+        gko::matrix::BatchDense<TypeParam>::create(this->exec, 3, m.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 1e-14);
+}
+
+
+TYPED_TEST(BatchDense, CanBeConstructedFromDenseMatricesByDuplication)
+{
+    using value_type = typename TestFixture::value_type;
+    using DenseMtx = typename TestFixture::DenseMtx;
+    using size_type = gko::size_type;
+    auto mat1 = gko::initialize<DenseMtx>(
+        4, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, this->exec);
+    auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
+                                          this->exec);
+
+    auto bat_m = gko::matrix::BatchDense<TypeParam>::create(
+        this->exec, std::vector<DenseMtx*>{mat1.get(), mat1.get(), mat1.get()});
+    auto m =
+        gko::matrix::BatchDense<TypeParam>::create(this->exec, 3, mat1.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(bat_m.get(), m.get(), 1e-14);
+}
+
+
+TYPED_TEST(BatchDense, CanBeConstructedFromDenseMatrices)
+{
+    using value_type = typename TestFixture::value_type;
+    using DenseMtx = typename TestFixture::DenseMtx;
+    using size_type = gko::size_type;
+    auto mat1 = gko::initialize<DenseMtx>(
+        4, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, this->exec);
+    auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
+                                          this->exec);
+
+    auto m = gko::matrix::BatchDense<TypeParam>::create(
+        this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get()});
+
+    this->assert_equal_to_original_mtx(m.get());
+}
+
+
+TYPED_TEST(BatchDense, CanBeUnbatchedIntoDenseMatrices)
+{
+    using value_type = typename TestFixture::value_type;
+    using DenseMtx = typename TestFixture::DenseMtx;
+    using size_type = gko::size_type;
+    auto mat1 = gko::initialize<DenseMtx>(
+        4, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, this->exec);
+    auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
+                                          this->exec);
+
+    auto dense_mats = this->mtx->unbatch();
+
+
+    GKO_ASSERT_MTX_NEAR(dense_mats[0].get(), mat1.get(), 0.);
+    GKO_ASSERT_MTX_NEAR(dense_mats[1].get(), mat2.get(), 0.);
+}
+
+
+TYPED_TEST(BatchDense, KnowsItsSizeAndValues)
+{
+    this->assert_equal_to_original_mtx(this->mtx.get());
+}
+
+
+TYPED_TEST(BatchDense, CanBeListConstructed)
+{
+    using value_type = typename TestFixture::value_type;
+    auto m = gko::batch_initialize<gko::matrix::BatchDense<TypeParam>>(
+        {{1.0, 2.0}, {1.0, 3.0}}, this->exec);
+
+    ASSERT_EQ(m->get_num_batch_entries(), 2);
+    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 1));
+    ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 1));
+    ASSERT_EQ(m->get_num_stored_elements(), 4);
+    EXPECT_EQ(m->at(0, 0), value_type{1});
+    EXPECT_EQ(m->at(0, 1), value_type{2});
+    EXPECT_EQ(m->at(1, 0), value_type{1});
+    EXPECT_EQ(m->at(1, 1), value_type{3});
+}
+
+
+TYPED_TEST(BatchDense, CanBeListConstructedWithstride)
+{
+    using value_type = typename TestFixture::value_type;
+    auto m = gko::batch_initialize<gko::matrix::BatchDense<TypeParam>>(
+        std::vector<gko::size_type>{2}, {{1.0, 2.0}}, this->exec);
+    ASSERT_EQ(m->get_num_batch_entries(), 1);
+    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 1));
+    ASSERT_EQ(m->get_num_stored_elements(), 4);
+    EXPECT_EQ(m->at(0, 0), value_type{1.0});
+    EXPECT_EQ(m->at(0, 1), value_type{2.0});
+}
+
+
+TYPED_TEST(BatchDense, CanBeListConstructedByCopies)
+{
+    using value_type = typename TestFixture::value_type;
+    auto m = gko::batch_initialize<gko::matrix::BatchDense<TypeParam>>(
+        2, I<value_type>({1.0, 2.0}), this->exec);
+    ASSERT_EQ(m->get_num_batch_entries(), 2);
+    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 1));
+    ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 1));
+    ASSERT_EQ(m->get_num_stored_elements(), 4);
+    EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
+    EXPECT_EQ(m->at(0, 0, 1), value_type{2.0});
+    EXPECT_EQ(m->at(1, 0, 0), value_type{1.0});
+    EXPECT_EQ(m->at(1, 0, 1), value_type{2.0});
+}
+
+
+TYPED_TEST(BatchDense, CanBeDoubleListConstructed)
+{
+    using value_type = typename TestFixture::value_type;
+    using T = value_type;
+    auto m = gko::batch_initialize<gko::matrix::BatchDense<TypeParam>>(
+        {{I<T>{1.0, 1.0, 0.0}, I<T>{2.0, 4.0, 3.0}, I<T>{3.0, 6.0, 1.0}},
+         {I<T>{1.0, 2.0}, I<T>{3.0, 4.0}, I<T>{5.0, 6.0}}},
+        this->exec);
+
+    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(3, 3));
+    ASSERT_EQ(m->get_size().at(1), gko::dim<2>(3, 2));
+    ASSERT_EQ(m->get_stride().at(0), 3);
+    ASSERT_EQ(m->get_stride().at(1), 2);
+    EXPECT_EQ(m->get_num_stored_elements(), 15);
+    ASSERT_EQ(m->get_num_stored_elements(0), 9);
+    ASSERT_EQ(m->get_num_stored_elements(1), 6);
+    EXPECT_EQ(m->at(0, 0), value_type{1.0});
+    EXPECT_EQ(m->at(0, 1), value_type{1.0});
+    EXPECT_EQ(m->at(0, 2), value_type{0.0});
+    ASSERT_EQ(m->at(0, 3), value_type{2.0});
+    EXPECT_EQ(m->at(0, 4), value_type{4.0});
+    EXPECT_EQ(m->at(1, 0), value_type{1.0});
+    EXPECT_EQ(m->at(1, 1), value_type{2.0});
+    EXPECT_EQ(m->at(1, 2), value_type{3.0});
+    ASSERT_EQ(m->at(1, 3), value_type{4.0});
+    EXPECT_EQ(m->at(1, 4), value_type{5.0});
+}
+
+
+TYPED_TEST(BatchDense, CanBeDoubleListConstructedWithstride)
+{
+    using value_type = typename TestFixture::value_type;
+    using T = value_type;
+    auto m = gko::batch_initialize<gko::matrix::BatchDense<TypeParam>>(
+        {4, 3},
+        {{I<T>{1.0, 1.0, 0.0}, I<T>{2.0, 4.0, 3.0}, I<T>{3.0, 6.0, 1.0}},
+         {I<T>{1.0, 2.0}, I<T>{3.0, 4.0}, I<T>{5.0, 6.0}}},
+        this->exec);
+
+    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(3, 3));
+    ASSERT_EQ(m->get_size().at(1), gko::dim<2>(3, 2));
+    ASSERT_EQ(m->get_stride().at(0), 4);
+    ASSERT_EQ(m->get_stride().at(1), 3);
+    EXPECT_EQ(m->get_num_stored_elements(), 21);
+    ASSERT_EQ(m->get_num_stored_elements(0), 12);
+    ASSERT_EQ(m->get_num_stored_elements(1), 9);
+    EXPECT_EQ(m->at(0, 0), value_type{1.0});
+    EXPECT_EQ(m->at(0, 1), value_type{1.0});
+    EXPECT_EQ(m->at(0, 2), value_type{0.0});
+    ASSERT_EQ(m->at(0, 3), value_type{2.0});
+    EXPECT_EQ(m->at(0, 4), value_type{4.0});
+    EXPECT_EQ(m->at(1, 0), value_type{1.0});
+    EXPECT_EQ(m->at(1, 1), value_type{2.0});
+    EXPECT_EQ(m->at(1, 2), value_type{3.0});
+    ASSERT_EQ(m->at(1, 3), value_type{4.0});
+    EXPECT_EQ(m->at(1, 4), value_type{5.0});
+}
+
+
+TYPED_TEST(BatchDense, CanBeCopied)
+{
+    auto mtx_copy = gko::matrix::BatchDense<TypeParam>::create(this->exec);
+    mtx_copy->copy_from(this->mtx.get());
+    this->assert_equal_to_original_mtx(this->mtx.get());
+    this->mtx->at(0, 0, 0) = 7;
+    this->mtx->at(0, 1) = 7;
+    this->assert_equal_to_original_mtx(mtx_copy.get());
+}
+
+
+TYPED_TEST(BatchDense, CanBeMoved)
+{
+    auto mtx_copy = gko::matrix::BatchDense<TypeParam>::create(this->exec);
+    mtx_copy->copy_from(std::move(this->mtx));
+    this->assert_equal_to_original_mtx(mtx_copy.get());
+}
+
+
+TYPED_TEST(BatchDense, CanBeCloned)
+{
+    auto mtx_clone = this->mtx->clone();
+    this->assert_equal_to_original_mtx(
+        dynamic_cast<decltype(this->mtx.get())>(mtx_clone.get()));
+}
+
+
+TYPED_TEST(BatchDense, CanBeCleared)
+{
+    this->mtx->clear();
+    this->assert_empty(this->mtx.get());
+}
+
+
+TYPED_TEST(BatchDense, CanBeReadFromMatrixData)
+{
+    using value_type = typename TestFixture::value_type;
+    auto m = gko::matrix::BatchDense<TypeParam>::create(this->exec);
+    // clang-format off
+    m->read({gko::matrix_data<TypeParam>{{2, 3},
+                                         {{0, 0, 1.0},
+                                          {0, 1, 3.0},
+                                          {0, 2, 2.0},
+                                          {1, 0, 0.0},
+                                          {1, 1, 5.0},
+                                          {1, 2, 0.0}}},
+             gko::matrix_data<TypeParam>{{2, 2},
+                                         {{0, 0, -1.0},
+                                          {0, 1, 0.5},
+                                          {1, 0, 0.0},
+                                          {1, 1, 9.0}}}});
+    // clang-format on
+
+    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3));
+    ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 2));
+    ASSERT_EQ(m->get_num_stored_elements(), 10);
+    ASSERT_EQ(m->get_num_stored_elements(0), 6);
+    ASSERT_EQ(m->get_num_stored_elements(1), 4);
+    EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
+    EXPECT_EQ(m->at(0, 1, 0), value_type{0.0});
+    EXPECT_EQ(m->at(0, 0, 1), value_type{3.0});
+    EXPECT_EQ(m->at(0, 1, 1), value_type{5.0});
+    EXPECT_EQ(m->at(0, 0, 2), value_type{2.0});
+    EXPECT_EQ(m->at(0, 1, 2), value_type{0.0});
+    EXPECT_EQ(m->at(1, 0, 0), value_type{-1.0});
+    EXPECT_EQ(m->at(1, 0, 1), value_type{0.5});
+    EXPECT_EQ(m->at(1, 1, 0), value_type{0.0});
+    EXPECT_EQ(m->at(1, 1, 1), value_type{9.0});
+}
+
+
+TYPED_TEST(BatchDense, GeneratesCorrectMatrixData)
+{
+    using value_type = typename TestFixture::value_type;
+    using tpl = typename gko::matrix_data<TypeParam>::nonzero_type;
+    std::vector<gko::matrix_data<TypeParam>> data;
+
+    this->mtx->write(data);
+
+    ASSERT_EQ(data[0].size, gko::dim<2>(2, 3));
+    ASSERT_EQ(data[0].nonzeros.size(), 6);
+    EXPECT_EQ(data[0].nonzeros[0], tpl(0, 0, value_type{-1.0}));
+    EXPECT_EQ(data[0].nonzeros[1], tpl(0, 1, value_type{2.0}));
+    EXPECT_EQ(data[0].nonzeros[2], tpl(0, 2, value_type{3.0}));
+    EXPECT_EQ(data[0].nonzeros[3], tpl(1, 0, value_type{-1.5}));
+    EXPECT_EQ(data[0].nonzeros[4], tpl(1, 1, value_type{2.5}));
+    EXPECT_EQ(data[0].nonzeros[5], tpl(1, 2, value_type{3.5}));
+    ASSERT_EQ(data[1].size, gko::dim<2>(2, 3));
+    ASSERT_EQ(data[1].nonzeros.size(), 6);
+    EXPECT_EQ(data[1].nonzeros[0], tpl(0, 0, value_type{1.0}));
+    EXPECT_EQ(data[1].nonzeros[1], tpl(0, 1, value_type{2.5}));
+    EXPECT_EQ(data[1].nonzeros[2], tpl(0, 2, value_type{3.0}));
+    EXPECT_EQ(data[1].nonzeros[3], tpl(1, 0, value_type{1.0}));
+    EXPECT_EQ(data[1].nonzeros[4], tpl(1, 1, value_type{2.0}));
+    EXPECT_EQ(data[1].nonzeros[5], tpl(1, 2, value_type{3.0}));
+}
+
+
+TYPED_TEST(BatchDense, CanBeReadFromMatrixAssemblyData)
+{
+    using value_type = typename TestFixture::value_type;
+    auto m = gko::matrix::BatchDense<TypeParam>::create(this->exec);
+    gko::matrix_assembly_data<TypeParam> data1(gko::dim<2>{2, 3});
+    data1.set_value(0, 0, 1.0);
+    data1.set_value(0, 1, 3.0);
+    data1.set_value(0, 2, 2.0);
+    data1.set_value(1, 0, 0.0);
+    data1.set_value(1, 1, 5.0);
+    data1.set_value(1, 2, 0.0);
+    gko::matrix_assembly_data<TypeParam> data2(gko::dim<2>{2, 1});
+    data2.set_value(0, 0, 2.0);
+    data2.set_value(1, 0, 5.0);
+    auto data = std::vector<gko::matrix_assembly_data<TypeParam>>{data1, data2};
+
+    m->read(data);
+
+    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3));
+    ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 1));
+    ASSERT_EQ(m->get_num_stored_elements(), 8);
+    ASSERT_EQ(m->get_num_stored_elements(0), 6);
+    ASSERT_EQ(m->get_num_stored_elements(1), 2);
+    EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
+    EXPECT_EQ(m->at(0, 1, 0), value_type{0.0});
+    EXPECT_EQ(m->at(0, 0, 1), value_type{3.0});
+    EXPECT_EQ(m->at(0, 1, 1), value_type{5.0});
+    EXPECT_EQ(m->at(0, 0, 2), value_type{2.0});
+    ASSERT_EQ(m->at(0, 1, 2), value_type{0.0});
+    EXPECT_EQ(m->at(1, 0, 0), value_type{2.0});
+    EXPECT_EQ(m->at(1, 1, 0), value_type{5.0});
+}
+
+
+}  // namespace
diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt
index 37d56e5855f..d630fb9a92a 100644
--- a/cuda/CMakeLists.txt
+++ b/cuda/CMakeLists.txt
@@ -35,6 +35,7 @@ target_sources(ginkgo_cuda
     factorization/par_ilut_select_kernel.cu
     factorization/par_ilut_spgeam_kernel.cu
     factorization/par_ilut_sweep_kernel.cu
+    matrix/batch_vector_kernels.cu
     matrix/coo_kernels.cu
     ${CSR_INSTANTIATE}
     matrix/dense_kernels.cu
diff --git a/cuda/matrix/batch_vector_kernels.cu b/cuda/matrix/batch_vector_kernels.cu
new file mode 100644
index 00000000000..af67fa1597a
--- /dev/null
+++ b/cuda/matrix/batch_vector_kernels.cu
@@ -0,0 +1,434 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/batch_dense_kernels.hpp"
+
+
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/range_accessors.hpp>
+#include <ginkgo/core/matrix/batch_diagonal.hpp>
+
+
+#include "core/matrix/batch_struct.hpp"
+#include "cuda/base/config.hpp"
+#include "cuda/base/cublas_bindings.hpp"
+#include "cuda/base/pointer_mode_guard.hpp"
+#include "cuda/components/cooperative_groups.cuh"
+#include "cuda/components/reduction.cuh"
+#include "cuda/components/thread_ids.cuh"
+#include "cuda/components/uninitialized_array.hpp"
+#include "cuda/matrix/batch_struct.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace cuda {
+/**
+ * @brief The BatchDense matrix format namespace.
+ *
+ * @ingroup batch_dense
+ */
+namespace batch_dense {
+
+
+constexpr auto default_block_size = 256;
+constexpr int sm_multiplier = 4;
+
+
+#include "common/cuda_hip/matrix/batch_dense_kernels.hpp.inc"
+#include "common/cuda_hip/matrix/batch_vector_kernels.hpp.inc"
+
+
+template <typename ValueType>
+void simple_apply(std::shared_ptr<const CudaExecutor> exec,
+                  const matrix::BatchDense<ValueType>* a,
+                  const matrix::BatchDense<ValueType>* b,
+                  matrix::BatchDense<ValueType>* c)
+{
+    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
+    const auto a_ub = get_batch_struct(a);
+    const auto b_ub = get_batch_struct(b);
+    const auto c_ub = get_batch_struct(c);
+    if (b_ub.num_rhs > 1) {
+        GKO_NOT_IMPLEMENTED;
+    }
+    mv<<<num_blocks, default_block_size>>>(a_ub, b_ub, c_ub);
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL);
+
+
+template <typename ValueType>
+void apply(std::shared_ptr<const CudaExecutor> exec,
+           const matrix::BatchDense<ValueType>* alpha,
+           const matrix::BatchDense<ValueType>* a,
+           const matrix::BatchDense<ValueType>* b,
+           const matrix::BatchDense<ValueType>* beta,
+           matrix::BatchDense<ValueType>* c)
+{
+    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
+    const auto a_ub = get_batch_struct(a);
+    const auto b_ub = get_batch_struct(b);
+    const auto c_ub = get_batch_struct(c);
+    const auto alpha_ub = get_batch_struct(alpha);
+    const auto beta_ub = get_batch_struct(beta);
+    if (b_ub.num_rhs > 1) {
+        GKO_NOT_IMPLEMENTED;
+    }
+    advanced_mv<<<num_blocks, default_block_size>>>(alpha_ub, a_ub, b_ub,
+                                                    beta_ub, c_ub);
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_APPLY_KERNEL);
+
+
+template <typename ValueType>
+void scale(std::shared_ptr<const CudaExecutor> exec,
+           const matrix::BatchDense<ValueType>* const alpha,
+           matrix::BatchDense<ValueType>* const x)
+{
+    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
+    const auto alpha_ub = get_batch_struct(alpha);
+    const auto x_ub = get_batch_struct(x);
+    scale<<<num_blocks, default_block_size>>>(alpha_ub, x_ub);
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_SCALE_KERNEL);
+
+
+template <typename ValueType>
+void add_scaled(std::shared_ptr<const CudaExecutor> exec,
+                const matrix::BatchDense<ValueType>* const alpha,
+                const matrix::BatchDense<ValueType>* const x,
+                matrix::BatchDense<ValueType>* const y)
+{
+    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
+    const size_type nrhs = x->get_size().at(0)[1];
+    if (nrhs == 1) {
+        const auto num_batch = x->get_num_batch_entries();
+        const auto num_rows = x->get_size().at(0)[0];
+        single_add_scaled<<<num_blocks, default_block_size>>>(
+            num_batch, num_rows, as_cuda_type(alpha->get_const_values()),
+            as_cuda_type(x->get_const_values()), as_cuda_type(y->get_values()));
+    } else {
+        const auto alpha_ub = get_batch_struct(alpha);
+        const auto x_ub = get_batch_struct(x);
+        const auto y_ub = get_batch_struct(y);
+        add_scaled<<<num_blocks, default_block_size>>>(alpha_ub, x_ub, y_ub);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_ADD_SCALED_KERNEL);
+
+
+template <typename ValueType>
+void add_scale(std::shared_ptr<const DefaultExecutor> exec,
+               const matrix::BatchDense<ValueType>* const alpha,
+               const matrix::BatchDense<ValueType>* const x,
+               const matrix::BatchDense<ValueType>* const beta,
+               matrix::BatchDense<ValueType>* const y)
+{
+    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
+    const size_type nrhs = x->get_size().at(0)[1];
+    const auto alpha_ub = get_batch_struct(alpha);
+    const auto beta_ub = get_batch_struct(beta);
+    const auto x_ub = get_batch_struct(x);
+    const auto y_ub = get_batch_struct(y);
+    add_scale<<<num_blocks, default_block_size>>>(alpha_ub, x_ub, beta_ub,
+                                                  y_ub);
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_ADD_SCALE_KERNEL);
+
+
+template <typename ValueType>
+void convergence_add_scaled(std::shared_ptr<const CudaExecutor> exec,
+                            const matrix::BatchDense<ValueType>* const alpha,
+                            const matrix::BatchDense<ValueType>* const x,
+                            matrix::BatchDense<ValueType>* const y,
+                            const uint32& converged) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_ADD_SCALED_KERNEL);
+
+
+template <typename ValueType>
+void add_scaled_diag(std::shared_ptr<const CudaExecutor> exec,
+                     const matrix::BatchDense<ValueType>* alpha,
+                     const matrix::Diagonal<ValueType>* x,
+                     matrix::BatchDense<ValueType>* y) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_ADD_SCALED_DIAG_KERNEL);
+
+
+template <typename ValueType>
+void compute_dot(std::shared_ptr<const CudaExecutor> exec,
+                 const matrix::BatchDense<ValueType>* x,
+                 const matrix::BatchDense<ValueType>* y,
+                 matrix::BatchDense<ValueType>* result)
+{
+    const auto num_blocks = x->get_num_batch_entries();
+    const auto num_rhs = x->get_size().at()[1];
+    if (num_rhs == 1) {
+        const auto num_rows = x->get_size().at()[0];
+        single_compute_dot_product<<<num_blocks, default_block_size>>>(
+            num_blocks, num_rows, as_cuda_type(x->get_const_values()),
+            as_cuda_type(y->get_const_values()),
+            as_cuda_type(result->get_values()));
+    } else {
+        const auto x_ub = get_batch_struct(x);
+        const auto y_ub = get_batch_struct(y);
+        const auto res_ub = get_batch_struct(result);
+        compute_dot_product<<<num_blocks, default_block_size>>>(x_ub, y_ub,
+                                                                res_ub);
+    }
+}
+
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_COMPUTE_DOT_KERNEL);
+
+
+template <typename ValueType>
+void convergence_compute_dot(std::shared_ptr<const CudaExecutor> exec,
+                             const matrix::BatchDense<ValueType>* x,
+                             const matrix::BatchDense<ValueType>* y,
+                             matrix::BatchDense<ValueType>* result,
+                             const uint32& converged) GKO_NOT_IMPLEMENTED;
+
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COMPUTE_DOT_KERNEL);
+
+
+template <typename ValueType>
+void compute_norm2(std::shared_ptr<const CudaExecutor> exec,
+                   const matrix::BatchDense<ValueType>* const x,
+                   matrix::BatchDense<remove_complex<ValueType>>* const result)
+{
+    const auto num_blocks = x->get_num_batch_entries();
+    const auto num_rhs = x->get_size().at()[1];
+    if (num_rhs == 1) {
+        const auto num_rows = x->get_size().at()[0];
+        single_compute_norm2<<<num_blocks, default_block_size>>>(
+            num_blocks, num_rows, as_cuda_type(x->get_const_values()),
+            as_cuda_type(result->get_values()));
+    } else {
+        const auto x_ub = get_batch_struct(x);
+        const auto res_ub = get_batch_struct(result);
+        compute_norm2<<<num_blocks, default_block_size>>>(x_ub, res_ub);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_COMPUTE_NORM2_KERNEL);
+
+
+template <typename ValueType>
+void convergence_compute_norm2(
+    std::shared_ptr<const CudaExecutor> exec,
+    const matrix::BatchDense<ValueType>* const x,
+    matrix::BatchDense<remove_complex<ValueType>>* const result,
+    const uint32& converged) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COMPUTE_NORM2_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void convert_to_batch_csr(std::shared_ptr<const DefaultExecutor> exec,
+                          const matrix::BatchDense<ValueType>* source,
+                          matrix::BatchCsr<ValueType, IndexType>* other)
+    GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CONVERT_TO_BATCH_CSR_KERNEL);
+
+
+template <typename ValueType>
+void count_nonzeros(std::shared_ptr<const CudaExecutor> exec,
+                    const matrix::BatchDense<ValueType>* source,
+                    size_type* result) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_COUNT_NONZEROS_KERNEL);
+
+
+template <typename ValueType>
+void calculate_max_nnz_per_row(std::shared_ptr<const CudaExecutor> exec,
+                               const matrix::BatchDense<ValueType>* source,
+                               size_type* result) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CALCULATE_MAX_NNZ_PER_ROW_KERNEL);
+
+
+template <typename ValueType>
+void calculate_nonzeros_per_row(std::shared_ptr<const CudaExecutor> exec,
+                                const matrix::BatchDense<ValueType>* source,
+                                array<size_type>* result) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CALCULATE_NONZEROS_PER_ROW_KERNEL);
+
+
+template <typename ValueType>
+void calculate_total_cols(std::shared_ptr<const CudaExecutor> exec,
+                          const matrix::BatchDense<ValueType>* source,
+                          size_type* result, const size_type* stride_factor,
+                          const size_type* slice_size) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CALCULATE_TOTAL_COLS_KERNEL);
+
+
+template <typename ValueType>
+void transpose(std::shared_ptr<const CudaExecutor> exec,
+               const matrix::BatchDense<ValueType>* const orig,
+               matrix::BatchDense<ValueType>* const trans)
+{
+    using cu_val_type = cuda_type<ValueType>;
+    const size_type nbatch = orig->get_num_batch_entries();
+    const size_type orig_stride = orig->get_stride().at();
+    const size_type trans_stride = trans->get_stride().at();
+    const int nrows = orig->get_size().at()[0];
+    const int ncols = orig->get_size().at()[1];
+    transpose<<<nbatch, default_block_size>>>(
+        nrows, ncols, orig_stride, as_cuda_type(orig->get_const_values()),
+        trans_stride, as_cuda_type(trans->get_values()),
+        [] __device__(cu_val_type x) { return x; });
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_TRANSPOSE_KERNEL);
+
+
+template <typename ValueType>
+void conj_transpose(std::shared_ptr<const CudaExecutor> exec,
+                    const matrix::BatchDense<ValueType>* orig,
+                    matrix::BatchDense<ValueType>* trans)
+{
+    using cu_val_type = cuda_type<ValueType>;
+    const size_type nbatch = orig->get_num_batch_entries();
+    const size_type orig_stride = orig->get_stride().at();
+    const size_type trans_stride = trans->get_stride().at();
+    const int nrows = orig->get_size().at()[0];
+    const int ncols = orig->get_size().at()[1];
+    transpose<<<nbatch, default_block_size>>>(
+        nrows, ncols, orig_stride, as_cuda_type(orig->get_const_values()),
+        trans_stride, as_cuda_type(trans->get_values()),
+        [] __device__(cu_val_type x) { return conj(x); });
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CONJ_TRANSPOSE_KERNEL);
+
+
+template <typename ValueType>
+void copy(std::shared_ptr<const DefaultExecutor> exec,
+          const matrix::BatchDense<ValueType>* x,
+          matrix::BatchDense<ValueType>* result)
+{
+    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
+    const auto result_ub = get_batch_struct(result);
+    const auto x_ub = get_batch_struct(x);
+    copy<<<num_blocks, default_block_size>>>(x_ub, result_ub);
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_COPY_KERNEL);
+
+
+template <typename ValueType>
+void convergence_copy(std::shared_ptr<const DefaultExecutor> exec,
+                      const matrix::BatchDense<ValueType>* x,
+                      matrix::BatchDense<ValueType>* result,
+                      const uint32& converged) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COPY_KERNEL);
+
+
+template <typename ValueType>
+void batch_scale(std::shared_ptr<const CudaExecutor> exec,
+                 const matrix::BatchDiagonal<ValueType>* const left_scale,
+                 const matrix::BatchDiagonal<ValueType>* const rght_scale,
+                 matrix::BatchDense<ValueType>* const vec_to_scale)
+{
+    if (!left_scale->get_size().stores_equal_sizes()) GKO_NOT_IMPLEMENTED;
+    if (!rght_scale->get_size().stores_equal_sizes()) GKO_NOT_IMPLEMENTED;
+    if (!vec_to_scale->get_size().stores_equal_sizes()) GKO_NOT_IMPLEMENTED;
+
+    const auto stride = vec_to_scale->get_stride().at();
+    const auto nrows = static_cast<int>(vec_to_scale->get_size().at()[0]);
+    const auto nrhs = static_cast<int>(vec_to_scale->get_size().at()[1]);
+    const auto nbatch = vec_to_scale->get_num_batch_entries();
+
+    const int num_blocks = vec_to_scale->get_num_batch_entries();
+    uniform_batch_scale<<<num_blocks, default_block_size>>>(
+        nrows, stride, nrhs, nbatch,
+        as_cuda_type(left_scale->get_const_values()),
+        as_cuda_type(rght_scale->get_const_values()),
+        as_cuda_type(vec_to_scale->get_values()));
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_BATCH_SCALE_KERNEL);
+
+
+template <typename ValueType>
+void add_scaled_identity(std::shared_ptr<const CudaExecutor> exec,
+                         const matrix::BatchDense<ValueType>* const a,
+                         const matrix::BatchDense<ValueType>* const b,
+                         matrix::BatchDense<ValueType>* const mtx)
+{
+    if (!mtx->get_size().stores_equal_sizes()) GKO_NOT_IMPLEMENTED;
+    const auto num_blocks = mtx->get_num_batch_entries();
+    const auto nrows = static_cast<int>(mtx->get_size().at(0)[0]);
+    const auto ncols = static_cast<int>(mtx->get_size().at(0)[1]);
+    const auto stride = mtx->get_stride().at(0);
+    const auto values = mtx->get_values();
+    const auto alpha = a->get_const_values();
+    const auto a_stride = a->get_stride().at(0);
+    const auto b_stride = b->get_stride().at(0);
+    const auto beta = b->get_const_values();
+    add_scaled_identity<<<num_blocks, default_block_size>>>(
+        num_blocks, nrows, ncols, stride, as_cuda_type(values), a_stride,
+        as_cuda_type(alpha), b_stride, as_cuda_type(beta));
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_ADD_SCALED_IDENTITY_KERNEL);
+
+
+}  // namespace batch_dense
+}  // namespace cuda
+}  // namespace kernels
+}  // namespace gko
diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt
index e433322e644..fea0dec5c8c 100644
--- a/hip/CMakeLists.txt
+++ b/hip/CMakeLists.txt
@@ -33,6 +33,7 @@ set(GINKGO_HIP_SOURCES
     factorization/par_ilut_select_kernel.hip.cpp
     factorization/par_ilut_spgeam_kernel.hip.cpp
     factorization/par_ilut_sweep_kernel.hip.cpp
+    matrix/batch_vector_kernels.hip.cpp
     matrix/coo_kernels.hip.cpp
     ${CSR_INSTANTIATE}
     matrix/dense_kernels.hip.cpp
diff --git a/hip/matrix/batch_vector_kernels.hip.cpp b/hip/matrix/batch_vector_kernels.hip.cpp
new file mode 100644
index 00000000000..32665e31191
--- /dev/null
+++ b/hip/matrix/batch_vector_kernels.hip.cpp
@@ -0,0 +1,449 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/batch_dense_kernels.hpp"
+
+
+#include <hip/hip_runtime.h>
+
+
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/range_accessors.hpp>
+#include <ginkgo/core/matrix/batch_diagonal.hpp>
+
+
+#include "core/matrix/batch_struct.hpp"
+#include "hip/base/config.hip.hpp"
+#include "hip/base/hipblas_bindings.hip.hpp"
+#include "hip/base/pointer_mode_guard.hip.hpp"
+#include "hip/components/cooperative_groups.hip.hpp"
+#include "hip/components/reduction.hip.hpp"
+#include "hip/components/thread_ids.hip.hpp"
+#include "hip/components/uninitialized_array.hip.hpp"
+#include "hip/matrix/batch_struct.hip.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace hip {
+/**
+ * @brief The BatchDense matrix format namespace.
+ *
+ * @ingroup batch_dense
+ */
+namespace batch_dense {
+
+
+constexpr auto default_block_size = 256;
+constexpr int sm_multiplier = 4;
+
+
+#include "common/cuda_hip/matrix/batch_dense_kernels.hpp.inc"
+#include "common/cuda_hip/matrix/batch_vector_kernels.hpp.inc"
+
+
+template <typename ValueType>
+void simple_apply(std::shared_ptr<const HipExecutor> exec,
+                  const matrix::BatchDense<ValueType>* a,
+                  const matrix::BatchDense<ValueType>* b,
+                  matrix::BatchDense<ValueType>* c)
+{
+    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
+    const auto a_ub = get_batch_struct(a);
+    const auto b_ub = get_batch_struct(b);
+    const auto c_ub = get_batch_struct(c);
+    if (b_ub.num_rhs > 1) {
+        GKO_NOT_IMPLEMENTED;
+    }
+    hipLaunchKernelGGL(mv, num_blocks, default_block_size, 0, 0, a_ub, b_ub,
+                       c_ub);
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL);
+
+
+template <typename ValueType>
+void apply(std::shared_ptr<const HipExecutor> exec,
+           const matrix::BatchDense<ValueType>* alpha,
+           const matrix::BatchDense<ValueType>* a,
+           const matrix::BatchDense<ValueType>* b,
+           const matrix::BatchDense<ValueType>* beta,
+           matrix::BatchDense<ValueType>* c)
+{
+    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
+    const auto a_ub = get_batch_struct(a);
+    const auto b_ub = get_batch_struct(b);
+    const auto c_ub = get_batch_struct(c);
+    const auto alpha_ub = get_batch_struct(alpha);
+    const auto beta_ub = get_batch_struct(beta);
+    if (b_ub.num_rhs > 1) {
+        GKO_NOT_IMPLEMENTED;
+    }
+    hipLaunchKernelGGL(advanced_mv, num_blocks, default_block_size, 0, 0,
+                       alpha_ub, a_ub, b_ub, beta_ub, c_ub);
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_APPLY_KERNEL);
+
+
+template <typename ValueType>
+void scale(std::shared_ptr<const HipExecutor> exec,
+           const matrix::BatchDense<ValueType>* const alpha,
+           matrix::BatchDense<ValueType>* const x)
+{
+    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
+    const auto alpha_ub = get_batch_struct(alpha);
+    const auto x_ub = get_batch_struct(x);
+    hipLaunchKernelGGL(scale, dim3(num_blocks), dim3(default_block_size), 0, 0,
+                       alpha_ub, x_ub);
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_SCALE_KERNEL);
+
+
+template <typename ValueType>
+void add_scaled(std::shared_ptr<const HipExecutor> exec,
+                const matrix::BatchDense<ValueType>* const alpha,
+                const matrix::BatchDense<ValueType>* const x,
+                matrix::BatchDense<ValueType>* const y)
+{
+    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
+    const size_type nrhs = x->get_size().at(0)[1];
+    if (nrhs == 1) {
+        const auto num_batch = x->get_num_batch_entries();
+        const auto num_rows = x->get_size().at(0)[0];
+        hipLaunchKernelGGL(
+            single_add_scaled, dim3(num_blocks), dim3(default_block_size), 0, 0,
+            num_batch, num_rows, as_hip_type(alpha->get_const_values()),
+            as_hip_type(x->get_const_values()), as_hip_type(y->get_values()));
+    } else {
+        const auto alpha_ub = get_batch_struct(alpha);
+        const auto x_ub = get_batch_struct(x);
+        const auto y_ub = get_batch_struct(y);
+        hipLaunchKernelGGL(add_scaled, dim3(num_blocks),
+                           dim3(default_block_size), 0, 0, alpha_ub, x_ub,
+                           y_ub);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_ADD_SCALED_KERNEL);
+
+
+template <typename ValueType>
+void add_scale(std::shared_ptr<const DefaultExecutor> exec,
+               const matrix::BatchDense<ValueType>* const alpha,
+               const matrix::BatchDense<ValueType>* const x,
+               const matrix::BatchDense<ValueType>* const beta,
+               matrix::BatchDense<ValueType>* const y)
+{
+    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
+    const size_type nrhs = x->get_size().at(0)[1];
+    const auto alpha_ub = get_batch_struct(alpha);
+    const auto beta_ub = get_batch_struct(beta);
+    const auto x_ub = get_batch_struct(x);
+    const auto y_ub = get_batch_struct(y);
+    hipLaunchKernelGGL(add_scale, num_blocks, default_block_size, 0, 0,
+                       alpha_ub, x_ub, beta_ub, y_ub);
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_ADD_SCALE_KERNEL);
+
+
+template <typename ValueType>
+void convergence_add_scaled(std::shared_ptr<const HipExecutor> exec,
+                            const matrix::BatchDense<ValueType>* const alpha,
+                            const matrix::BatchDense<ValueType>* const x,
+                            matrix::BatchDense<ValueType>* const y,
+                            const uint32& converged) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_ADD_SCALED_KERNEL);
+
+
+template <typename ValueType>
+void add_scaled_diag(std::shared_ptr<const HipExecutor> exec,
+                     const matrix::BatchDense<ValueType>* alpha,
+                     const matrix::Diagonal<ValueType>* x,
+                     matrix::BatchDense<ValueType>* y) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_ADD_SCALED_DIAG_KERNEL);
+
+
+template <typename ValueType>
+void compute_dot(std::shared_ptr<const HipExecutor> exec,
+                 const matrix::BatchDense<ValueType>* x,
+                 const matrix::BatchDense<ValueType>* y,
+                 matrix::BatchDense<ValueType>* result)
+{
+    const auto num_blocks = x->get_num_batch_entries();
+    const auto num_rhs = x->get_size().at()[1];
+    if (num_rhs == 1) {
+        const auto num_rows = x->get_size().at()[0];
+        hipLaunchKernelGGL(single_compute_dot_product, dim3(num_blocks),
+                           dim3(default_block_size), 0, 0, num_blocks, num_rows,
+                           as_hip_type(x->get_const_values()),
+                           as_hip_type(y->get_const_values()),
+                           as_hip_type(result->get_values()));
+    } else {
+        const auto x_ub = get_batch_struct(x);
+        const auto y_ub = get_batch_struct(y);
+        const auto res_ub = get_batch_struct(result);
+        hipLaunchKernelGGL(compute_dot_product, dim3(num_blocks),
+                           dim3(default_block_size), 0, 0, x_ub, y_ub, res_ub);
+    }
+}
+
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_COMPUTE_DOT_KERNEL);
+
+
+template <typename ValueType>
+void convergence_compute_dot(std::shared_ptr<const HipExecutor> exec,
+                             const matrix::BatchDense<ValueType>* x,
+                             const matrix::BatchDense<ValueType>* y,
+                             matrix::BatchDense<ValueType>* result,
+                             const uint32& converged) GKO_NOT_IMPLEMENTED;
+
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COMPUTE_DOT_KERNEL);
+
+
+template <typename ValueType>
+void compute_norm2(std::shared_ptr<const HipExecutor> exec,
+                   const matrix::BatchDense<ValueType>* const x,
+                   matrix::BatchDense<remove_complex<ValueType>>* const result)
+{
+    const auto num_blocks = x->get_num_batch_entries();
+    const auto num_rhs = x->get_size().at()[1];
+    if (num_rhs == 1) {
+        const auto num_rows = x->get_size().at()[0];
+        hipLaunchKernelGGL(single_compute_norm2, dim3(num_blocks),
+                           dim3(default_block_size), 0, 0, num_blocks, num_rows,
+                           as_hip_type(x->get_const_values()),
+                           as_hip_type(result->get_values()));
+    } else {
+        const auto x_ub = get_batch_struct(x);
+        const auto res_ub = get_batch_struct(result);
+        hipLaunchKernelGGL(compute_norm2, dim3(num_blocks),
+                           dim3(default_block_size), 0, 0, x_ub, res_ub);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_COMPUTE_NORM2_KERNEL);
+
+
+template <typename ValueType>
+void convergence_compute_norm2(
+    std::shared_ptr<const HipExecutor> exec,
+    const matrix::BatchDense<ValueType>* const x,
+    matrix::BatchDense<remove_complex<ValueType>>* const result,
+    const uint32& converged) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COMPUTE_NORM2_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void convert_to_batch_csr(std::shared_ptr<const DefaultExecutor> exec,
+                          const matrix::BatchDense<ValueType>* source,
+                          matrix::BatchCsr<ValueType, IndexType>* other)
+    GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CONVERT_TO_BATCH_CSR_KERNEL);
+
+
+template <typename ValueType>
+void count_nonzeros(std::shared_ptr<const HipExecutor> exec,
+                    const matrix::BatchDense<ValueType>* source,
+                    size_type* result) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_COUNT_NONZEROS_KERNEL);
+
+
+template <typename ValueType>
+void calculate_max_nnz_per_row(std::shared_ptr<const HipExecutor> exec,
+                               const matrix::BatchDense<ValueType>* source,
+                               size_type* result) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CALCULATE_MAX_NNZ_PER_ROW_KERNEL);
+
+
+template <typename ValueType>
+void calculate_nonzeros_per_row(std::shared_ptr<const HipExecutor> exec,
+                                const matrix::BatchDense<ValueType>* source,
+                                array<size_type>* result) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CALCULATE_NONZEROS_PER_ROW_KERNEL);
+
+
+template <typename ValueType>
+void calculate_total_cols(std::shared_ptr<const HipExecutor> exec,
+                          const matrix::BatchDense<ValueType>* source,
+                          size_type* result, const size_type* stride_factor,
+                          const size_type* slice_size) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CALCULATE_TOTAL_COLS_KERNEL);
+
+
+template <typename ValueType>
+void transpose(std::shared_ptr<const HipExecutor> exec,
+               const matrix::BatchDense<ValueType>* const orig,
+               matrix::BatchDense<ValueType>* const trans)
+{
+    using hip_val_type = hip_type<ValueType>;
+    const size_type nbatch = orig->get_num_batch_entries();
+    const size_type orig_stride = orig->get_stride().at();
+    const size_type trans_stride = trans->get_stride().at();
+    const int nrows = orig->get_size().at()[0];
+    const int ncols = orig->get_size().at()[1];
+    hipLaunchKernelGGL(transpose, dim3(nbatch), dim3(default_block_size), 0, 0,
+                       nrows, ncols, orig_stride,
+                       as_hip_type(orig->get_const_values()), trans_stride,
+                       as_hip_type(trans->get_values()),
+                       [] __device__(hip_val_type x) { return x; });
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_TRANSPOSE_KERNEL);
+
+
+template <typename ValueType>
+void conj_transpose(std::shared_ptr<const HipExecutor> exec,
+                    const matrix::BatchDense<ValueType>* orig,
+                    matrix::BatchDense<ValueType>* trans)
+{
+    using hip_val_type = hip_type<ValueType>;
+    const size_type nbatch = orig->get_num_batch_entries();
+    const size_type orig_stride = orig->get_stride().at();
+    const size_type trans_stride = trans->get_stride().at();
+    const int nrows = orig->get_size().at()[0];
+    const int ncols = orig->get_size().at()[1];
+    hipLaunchKernelGGL(transpose, dim3(nbatch), dim3(default_block_size), 0, 0,
+                       nrows, ncols, orig_stride,
+                       as_hip_type(orig->get_const_values()), trans_stride,
+                       as_hip_type(trans->get_values()),
+                       [] __device__(hip_val_type x) { return conj(x); });
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CONJ_TRANSPOSE_KERNEL);
+
+
+template <typename ValueType>
+void copy(std::shared_ptr<const DefaultExecutor> exec,
+          const matrix::BatchDense<ValueType>* x,
+          matrix::BatchDense<ValueType>* result)
+{
+    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
+    const auto result_ub = get_batch_struct(result);
+    const auto x_ub = get_batch_struct(x);
+    hipLaunchKernelGGL(copy, dim3(num_blocks), dim3(default_block_size), 0, 0,
+                       x_ub, result_ub);
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_COPY_KERNEL);
+
+
+template <typename ValueType>
+void convergence_copy(std::shared_ptr<const DefaultExecutor> exec,
+                      const matrix::BatchDense<ValueType>* x,
+                      matrix::BatchDense<ValueType>* result,
+                      const uint32& converged) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COPY_KERNEL);
+
+
+template <typename ValueType>
+void batch_scale(std::shared_ptr<const HipExecutor> exec,
+                 const matrix::BatchDiagonal<ValueType>* const left_scale,
+                 const matrix::BatchDiagonal<ValueType>* const rght_scale,
+                 matrix::BatchDense<ValueType>* const vec_to_scale)
+{
+    if (!left_scale->get_size().stores_equal_sizes()) GKO_NOT_IMPLEMENTED;
+    if (!rght_scale->get_size().stores_equal_sizes()) GKO_NOT_IMPLEMENTED;
+    if (!vec_to_scale->get_size().stores_equal_sizes()) GKO_NOT_IMPLEMENTED;
+
+    const auto stride = vec_to_scale->get_stride().at();
+    const auto nrows = static_cast<int>(vec_to_scale->get_size().at()[0]);
+    const auto nrhs = static_cast<int>(vec_to_scale->get_size().at()[1]);
+    const auto nbatch = vec_to_scale->get_num_batch_entries();
+
+    const int num_blocks = vec_to_scale->get_num_batch_entries();
+    hipLaunchKernelGGL(uniform_batch_scale, dim3(num_blocks),
+                       dim3(default_block_size), 0, 0, nrows, stride, nrhs,
+                       nbatch, as_hip_type(left_scale->get_const_values()),
+                       as_hip_type(rght_scale->get_const_values()),
+                       as_hip_type(vec_to_scale->get_values()));
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_BATCH_SCALE_KERNEL);
+
+
+template <typename ValueType>
+void add_scaled_identity(std::shared_ptr<const HipExecutor> exec,
+                         const matrix::BatchDense<ValueType>* const a,
+                         const matrix::BatchDense<ValueType>* const b,
+                         matrix::BatchDense<ValueType>* const mtx)
+{
+    if (!mtx->get_size().stores_equal_sizes()) GKO_NOT_IMPLEMENTED;
+    const auto num_blocks = mtx->get_num_batch_entries();
+    const auto nrows = static_cast<int>(mtx->get_size().at(0)[0]);
+    const auto ncols = static_cast<int>(mtx->get_size().at(0)[1]);
+    const auto stride = mtx->get_stride().at(0);
+    const auto values = mtx->get_values();
+    const auto alpha = a->get_const_values();
+    const auto a_stride = a->get_stride().at(0);
+    const auto b_stride = b->get_stride().at(0);
+    const auto beta = b->get_const_values();
+    hipLaunchKernelGGL(add_scaled_identity, num_blocks, default_block_size, 0,
+                       0, num_blocks, nrows, ncols, stride, as_hip_type(values),
+                       a_stride, as_hip_type(alpha), b_stride,
+                       as_hip_type(beta));
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_ADD_SCALED_IDENTITY_KERNEL);
+
+
+}  // namespace batch_dense
+}  // namespace hip
+}  // namespace kernels
+}  // namespace gko
diff --git a/include/ginkgo/core/base/dim.hpp b/include/ginkgo/core/base/dim.hpp
index c70c5f054ec..ae13290cdd2 100644
--- a/include/ginkgo/core/base/dim.hpp
+++ b/include/ginkgo/core/base/dim.hpp
@@ -243,6 +243,144 @@ struct dim<1u, DimensionType> {
 };
 
 
+/**
+ * A type representing the dimensions of a multidimensional batch object.
+ *
+ * @tparam Dimensionality  number of dimensions of the object
+ * @tparam DimensionType  datatype used to represent each dimension
+ *
+ * @ingroup batch_dim
+ */
+template <size_type Dimensionality = 2, typename DimensionType = size_type>
+struct batch_dim {
+    static constexpr size_type dimensionality = Dimensionality;
+    using dimension_type = DimensionType;
+
+    /**
+     * Checks if the batch_dim object stores equal sizes.
+     *
+     * @return bool representing whether equal sizes are being stored
+     */
+    bool stores_equal_sizes() const { return equal_sizes_; }
+
+    /**
+     * Get the number of batch entries stored
+     *
+     * @return num_batch_entries
+     */
+    size_type get_num_batch_entries() const { return num_batch_entries_; }
+
+    /**
+     * Get the sizes of all entries as a std::vector.
+     *
+     * @return  the std::vector of batch sizes
+     */
+    std::vector<dim<dimensionality, dimension_type>> get_batch_sizes() const
+    {
+        if (equal_sizes_) {
+            if (num_batch_entries_ > 0) {
+                return std::vector<dim<dimensionality, dimension_type>>(
+                    num_batch_entries_, common_size_);
+            } else {
+                return std::vector<dim<dimensionality, dimension_type>>{
+                    common_size_};
+            }
+        } else {
+            return sizes_;
+        }
+    }
+
+    /**
+     * Get the batch size at a particular index.
+     *
+     * @param batch_entry  the index of the entry whose size is needed
+     *
+     * @return  the size of the batch entry at the requested batch-index
+     */
+    const dim<dimensionality, dimension_type>& at(
+        const size_type batch_entry = 0) const
+    {
+        if (equal_sizes_) {
+            return common_size_;
+        } else {
+            GKO_ASSERT(batch_entry < num_batch_entries_);
+            return sizes_[batch_entry];
+        }
+    }
+
+    /**
+     * Checks if two batch_dim objects are equal.
+     *
+     * @param x  first object
+     * @param y  second object
+     *
+     * @return true if and only if all dimensions of both objects are equal.
+     */
+    friend bool operator==(const batch_dim& x, const batch_dim& y)
+    {
+        if (x.equal_sizes_ && y.equal_sizes_) {
+            return x.num_batch_entries_ == y.num_batch_entries_ &&
+                   x.common_size_ == y.common_size_;
+        } else {
+            return x.sizes_ == y.sizes_;
+        }
+    }
+
+    /**
+     * Creates a batch_dim object which stores a uniform size for all batch
+     * entries.
+     *
+     * @param num_batch_entries  number of batch entries to be stored
+     * @param common_size  the common size of all the batch entries stored
+     *
+     * @note  Use this constructor when uniform batches need to be stored.
+     */
+    explicit batch_dim(const size_type num_batch_entries = 0,
+                       const dim<dimensionality, dimension_type>& common_size =
+                           dim<dimensionality, dimension_type>{})
+        : equal_sizes_(true),
+          common_size_(common_size),
+          num_batch_entries_(num_batch_entries),
+          sizes_()
+    {}
+
+    /**
+     * Creates a batch_dim object which stores possibly non-uniform sizes for
+     * the different batch entries.
+     *
+     * @param batch_sizes  the std::vector object that stores the batch_sizes
+     *
+     * @note  Use this constructor when non-uniform batches need to be stored.
+     */
+    batch_dim(
+        const std::vector<dim<dimensionality, dimension_type>>& batch_sizes)
+        : equal_sizes_(false),
+          common_size_(dim<dimensionality, dimension_type>{}),
+          num_batch_entries_(batch_sizes.size()),
+          sizes_(batch_sizes)
+    {
+        check_size_equality();
+    }
+
+private:
+    void check_size_equality()
+    {
+        for (size_type i = 0; i < num_batch_entries_; ++i) {
+            if (!(sizes_[i] == sizes_[0])) {
+                return;
+            }
+        }
+        common_size_ = sizes_[0];
+        equal_sizes_ = true;
+    }
+
+    bool equal_sizes_{};
+    size_type num_batch_entries_{};
+    dim<dimensionality, dimension_type> common_size_{};
+    std::vector<dim<dimensionality, dimension_type>> sizes_{};
+};
+
+
 /**
  * Checks if two dim objects are different.
  *
@@ -280,6 +418,54 @@ constexpr GKO_ATTRIBUTES GKO_INLINE dim<2, DimensionType> transpose(
 }
 
 
+/**
+ * Checks if two batch dim objects are different.
+ *
+ * @tparam Dimensionality  number of dimensions of the dim objects
+ * @tparam DimensionType  datatype used to represent each dimension
+ *
+ * @param x  first object
+ * @param y  second object
+ *
+ * @return `!(x == y)`
+ */
+template <size_type Dimensionality, typename DimensionType>
+inline bool operator!=(const batch_dim<Dimensionality, DimensionType>& x,
+                       const batch_dim<Dimensionality, DimensionType>& y)
+{
+    return !(x == y);
+}
+
+
+/**
+ * Returns a batch_dim object with its dimensions swapped for batched operators
+ *
+ * @tparam DimensionType  datatype used to represent each dimension
+ *
+ * @param dimensions original object
+ *
+ * @return a batch_dim object with the individual batches having their
+ *         dimensions swapped
+ */
+template <typename DimensionType>
+inline batch_dim<2, DimensionType> transpose(
+    const batch_dim<2, DimensionType>& input)
+{
+    batch_dim<2, DimensionType> out{};
+    if (input.stores_equal_sizes()) {
+        out = batch_dim<2, DimensionType>(input.get_num_batch_entries(),
+                                          gko::transpose(input.at(0)));
+        return out;
+    }
+    auto trans =
+        std::vector<dim<2, DimensionType>>(input.get_num_batch_entries());
+    for (size_type i = 0; i < trans.size(); ++i) {
+        trans[i] = transpose(input.at(i));
+    }
+    return batch_dim<2, DimensionType>(trans);
+}
+
+
 }  // namespace gko
 
 
diff --git a/include/ginkgo/core/matrix/batch_vector.hpp b/include/ginkgo/core/matrix/batch_vector.hpp
new file mode 100644
index 00000000000..f4061114052
--- /dev/null
+++ b/include/ginkgo/core/matrix/batch_vector.hpp
@@ -0,0 +1,1093 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_PUBLIC_CORE_MATRIX_BATCH_DENSE_HPP_
+#define GKO_PUBLIC_CORE_MATRIX_BATCH_DENSE_HPP_
+
+
+#include <initializer_list>
+#include <vector>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/batch_lin_op.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/mtx_io.hpp>
+#include <ginkgo/core/base/range_accessors.hpp>
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/base/utils.hpp>
+#include <ginkgo/core/matrix/dense.hpp>
+
+
+namespace gko {
+namespace matrix {
+
+
+template <typename ValueType>
+class BatchDiagonal;
+
+
+template <typename ValueType, typename IndexType>
+class BatchCsr;
+
+
+/**
+ * BatchDense is a batch matrix format which explicitly stores all values of the
+ * matrix in each of the batches.
+ *
+ * The values in each of the batches are stored in row-major format (values
+ * belonging to the same row appear consecutive in the memory). Optionally, rows
+ * can be padded for better memory access.
+ *
+ * @tparam ValueType  precision of matrix elements
+ *
+ * @note While this format is not very useful for storing sparse matrices, it
+ *       is often suitable to store vectors, and sets of vectors.
+ * @ingroup batch_dense
+ * @ingroup mat_formats
+ * @ingroup BatchLinOp
+ */
+template <typename ValueType = default_precision>
+class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
+                   public EnableCreateMethod<BatchDense<ValueType>>,
+                   public ConvertibleTo<BatchDense<next_precision<ValueType>>>,
+                   public ConvertibleTo<BatchCsr<ValueType, int32>>,
+                   public ConvertibleTo<BatchDiagonal<ValueType>>,
+                   public BatchReadableFromMatrixData<ValueType, int32>,
+                   public BatchReadableFromMatrixData<ValueType, int64>,
+                   public BatchWritableToMatrixData<ValueType, int32>,
+                   public BatchWritableToMatrixData<ValueType, int64>,
+                   public BatchTransposable,
+                   public BatchScaledIdentityAddable {
+    friend class EnableCreateMethod<BatchDense>;
+    friend class EnablePolymorphicObject<BatchDense, BatchLinOp>;
+    friend class BatchDense<to_complex<ValueType>>;
+
+public:
+    using EnableBatchLinOp<BatchDense>::convert_to;
+    using EnableBatchLinOp<BatchDense>::move_to;
+    using BatchReadableFromMatrixData<ValueType, int32>::read;
+    using BatchReadableFromMatrixData<ValueType, int64>::read;
+
+    using value_type = ValueType;
+    using index_type = int32;
+    using transposed_type = BatchDense<ValueType>;
+    using unbatch_type = Dense<ValueType>;
+    using mat_data = gko::matrix_data<ValueType, int64>;
+    using mat_data32 = gko::matrix_data<ValueType, int32>;
+    using absolute_type = remove_complex<BatchDense>;
+    using complex_type = to_complex<BatchDense>;
+
+    using row_major_range = gko::range<gko::accessor::row_major<ValueType, 2>>;
+
+    /**
+     * Creates a BatchDense matrix with the configuration of another BatchDense
+     * matrix.
+     *
+     * @param other  The other matrix whose configuration needs to copied.
+     */
+    static std::unique_ptr<BatchDense> create_with_config_of(
+        const BatchDense* other)
+    {
+        // De-referencing `other` before calling the functions (instead of
+        // using operator `->`) is currently required to be compatible with
+        // CUDA 10.1.
+        // Otherwise, it results in a compile error.
+        return (*other).create_with_same_config();
+    }
+
+    friend class BatchDense<next_precision<ValueType>>;
+
+    void convert_to(
+        BatchDense<next_precision<ValueType>>* result) const override;
+
+    void move_to(BatchDense<next_precision<ValueType>>* result) override;
+
+    void convert_to(BatchCsr<ValueType, index_type>* result) const override;
+
+    void move_to(BatchCsr<ValueType, index_type>* result) override;
+
+    void convert_to(BatchDiagonal<ValueType>* result) const override;
+
+    void move_to(BatchDiagonal<ValueType>* result) override;
+
+    void read(const std::vector<mat_data>& data) override;
+
+    void read(const std::vector<mat_data32>& data) override;
+
+    void write(std::vector<mat_data>& data) const override;
+
+    void write(std::vector<mat_data32>& data) const override;
+
+    std::unique_ptr<BatchLinOp> transpose() const override;
+
+    std::unique_ptr<BatchLinOp> conj_transpose() const override;
+
+    /**
+     * Unbatches the batched dense and creates a std::vector of Dense matrices
+     *
+     * @return  a std::vector containing the Dense matrices.
+     */
+    std::vector<std::unique_ptr<unbatch_type>> unbatch() const
+    {
+        auto exec = this->get_executor();
+        auto unbatch_mats = std::vector<std::unique_ptr<unbatch_type>>{};
+        for (size_type b = 0; b < this->get_num_batch_entries(); ++b) {
+            auto mat = unbatch_type::create(exec, this->get_size().at(b),
+                                            this->get_stride().at(b));
+            exec->copy_from(exec.get(), mat->get_num_stored_elements(),
+                            this->get_const_values() +
+                                num_elems_per_batch_cumul_.get_const_data()[b],
+                            mat->get_values());
+            unbatch_mats.emplace_back(std::move(mat));
+        }
+        return unbatch_mats;
+    }
+
+    /**
+     * Returns a pointer to the array of values of the matrix.
+     *
+     * @return the pointer to the array of values
+     */
+    value_type* get_values() noexcept { return values_.get_data(); }
+
+    /**
+     * Returns a pointer to the array of values of the matrix.
+     *
+     * @return the pointer to the array of values
+     */
+    value_type* get_values(size_type batch) noexcept
+    {
+        GKO_ASSERT(batch < this->get_num_batch_entries());
+        return values_.get_data() +
+               num_elems_per_batch_cumul_.get_const_data()[batch];
+    }
+
+    /**
+     * @copydoc get_values()
+     *
+     * @note This is the constant version of the function, which can be
+     *       significantly more memory efficient than the non-constant version,
+     *       so always prefer this version.
+     */
+    const value_type* get_const_values() const noexcept
+    {
+        return values_.get_const_data();
+    }
+
+    /**
+     * @copydoc get_values(size_type)
+     *
+     * @note This is the constant version of the function, which can be
+     *       significantly more memory efficient than the non-constant version,
+     *       so always prefer this version.
+     */
+    const value_type* get_const_values(size_type batch) const noexcept
+    {
+        GKO_ASSERT(batch < this->get_num_batch_entries());
+        return values_.get_const_data() +
+               num_elems_per_batch_cumul_.get_const_data()[batch];
+    }
+
+    /**
+     * Returns the batch_stride of the matrix.
+     *
+     * @return the batch_stride of the matrix.
+     */
+    const batch_stride& get_stride() const noexcept { return stride_; }
+
+    /**
+     * Returns the number of elements explicitly stored in the batch matrix,
+     * cumulative across all the batches.
+     *
+     * @return the number of elements explicitly stored in the matrix,
+     *         cumulative across all the batches
+     */
+    size_type get_num_stored_elements() const noexcept
+    {
+        return values_.get_num_elems();
+    }
+
+    /**
+     * Returns the number of elements explicitly stored at a specific batch
+     * index.
+     *
+     * @param batch  the batch index to be queried
+     *
+     * @return the number of elements explicitly stored in the matrix
+     */
+    size_type get_num_stored_elements(size_type batch) const noexcept
+    {
+        GKO_ASSERT(batch < this->get_num_batch_entries());
+        return num_elems_per_batch_cumul_.get_const_data()[batch + 1] -
+               num_elems_per_batch_cumul_.get_const_data()[batch];
+    }
+
+    /**
+     * Returns a single element for a particular batch.
+     *
+     * @param batch  the batch index to be queried
+     * @param row  the row of the requested element
+     * @param col  the column of the requested element
+     *
+     * @note  the method has to be called on the same Executor the matrix is
+     *        stored at (e.g. trying to call this method on a GPU matrix from
+     *        the OMP results in a runtime error)
+     */
+    value_type& at(size_type batch, size_type row, size_type col) noexcept
+    {
+        GKO_ASSERT(batch < this->get_num_batch_entries());
+        return values_.get_data()[linearize_index(batch, row, col)];
+    }
+
+    /**
+     * @copydoc BatchDense::at(size_type, size_type, size_type)
+     */
+    value_type at(size_type batch, size_type row, size_type col) const noexcept
+    {
+        GKO_ASSERT(batch < this->get_num_batch_entries());
+        return values_.get_const_data()[linearize_index(batch, row, col)];
+    }
+
+    /**
+     * Returns a single element for a particular batch entry.
+     *
+     * Useful for iterating across all elements of the matrix.
+     * However, it is less efficient than the two-parameter variant of this
+     * method.
+     *
+     * @param batch  the batch index to be queried
+     * @param idx  a linear index of the requested element
+     *             (ignoring the stride)
+     *
+     * @note  the method has to be called on the same Executor the matrix is
+     *        stored at (e.g. trying to call this method on a GPU matrix from
+     *        the OMP results in a runtime error)
+     */
+    ValueType& at(size_type batch, size_type idx) noexcept
+    {
+        return values_.get_data()[linearize_index(batch, idx)];
+    }
+
+    /**
+     * @copydoc BatchDense::at(size_type, size_type, size_type)
+     */
+    ValueType at(size_type batch, size_type idx) const noexcept
+    {
+        return values_.get_const_data()[linearize_index(batch, idx)];
+    }
+
+    /**
+     * Scales the matrix with a scalar (aka: BLAS scal).
+     *
+     * @param alpha  If alpha is 1x1 BatchDense matrix, the entire matrix (all
+     * batches) is scaled by alpha. If it is a BatchDense row vector of values,
+     * then i-th column of the matrix is scaled with the i-th element of alpha
+     * (the number of columns of alpha has to match the number of columns of the
+     * matrix).
+     */
+    void scale(const BatchLinOp* alpha)
+    {
+        auto exec = this->get_executor();
+        this->scale_impl(make_temporary_clone(exec, alpha).get());
+    }
+
+    /**
+     * Adds `b` scaled by `alpha` to the matrix (aka: BLAS axpy).
+     *
+     * @param alpha  If alpha is 1x1 BatchDense matrix, the entire matrix is
+     * scaled by alpha. If it is a BatchDense row vector of values, then i-th
+     * column of the matrix is scaled with the i-th element of alpha (the number
+     * of columns of alpha has to match the number of columns of the matrix).
+     * @param b  a matrix of the same dimension as this
+     */
+    void add_scaled(const BatchLinOp* alpha, const BatchLinOp* b)
+    {
+        auto exec = this->get_executor();
+        this->add_scaled_impl(make_temporary_clone(exec, alpha).get(),
+                              make_temporary_clone(exec, b).get());
+    }
+
+    /**
+     * Adds `a` scaled by `alpha` to the matrix scaled by `beta`:
+     * this <- alpha * a + beta * this.
+     *
+     * @param alpha  If alpha is 1x1 BatchDense matrix, the entire matrix a is
+     *               scaled by alpha. If it is a BatchDense row vector of
+     *               values, then i-th column of a is scaled with the i-th
+     *               element of alpha (the number of columns of alpha has to
+     *               match the number of columns of a).
+     * @param a  a matrix of the same dimension as this.
+     * @param beta  Scalar(s), of the same size as alpha, to multiply this
+     * matrix.
+     */
+    void add_scale(const BatchLinOp* alpha, const BatchLinOp* a,
+                   const BatchLinOp* beta);
+
+    /**
+     * Computes the column-wise dot product of each matrix in this batch and its
+     * corresponding entry in `b`. If the matrix has complex value_type, then
+     * the conjugate of this is taken.
+     *
+     * @param b  a BatchDense matrix of same dimension as this
+     * @param result  a BatchDense row vector, used to store the dot product
+     *                (the number of column in the vector must match the number
+     *                of columns of this)
+     */
+    void compute_dot(const BatchLinOp* b, BatchLinOp* result) const
+    {
+        auto exec = this->get_executor();
+        this->compute_dot_impl(make_temporary_clone(exec, b).get(),
+                               make_temporary_clone(exec, result).get());
+    }
+
+    /**
+     * Computes the Euclidean (L^2) norm of each matrix in this batch.
+     *
+     * @param result  a BatchDense row vector, used to store the norm
+     *                (the number of columns in the vector must match the number
+     *                of columns of this)
+     */
+    void compute_norm2(BatchLinOp* result) const
+    {
+        auto exec = this->get_executor();
+        this->compute_norm2_impl(make_temporary_clone(exec, result).get());
+    }
+
+    /**
+     * Creates a constant (immutable) batch dense matrix from a constant array.
+     *
+     * @param exec  the executor to create the matrix on
+     * @param size  the dimensions of the matrix
+     * @param values  the value array of the matrix
+     * @param stride  the row-stride of the matrix
+     * @returns A smart pointer to the constant matrix wrapping the input array
+     *          (if it resides on the same executor as the matrix) or a copy of
+     *          the array on the correct executor.
+     */
+    static std::unique_ptr<const BatchDense> create_const(
+        std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
+        gko::detail::const_array_view<ValueType>&& values,
+        const batch_stride& strides)
+    {
+        // cast const-ness away, but return a const object afterwards,
+        // so we can ensure that no modifications take place.
+        return std::unique_ptr<const BatchDense>(new BatchDense{
+            exec, sizes, gko::detail::array_const_cast(std::move(values)),
+            strides});
+    }
+
+private:
+    /**
+     * Compute the memory required for the values array from the sizes and the
+     * strides.
+     */
+    inline size_type compute_batch_mem(const batch_dim<2>& sizes,
+                                       const batch_stride& strides)
+    {
+        GKO_ASSERT(sizes.get_num_batch_entries() ==
+                   strides.get_num_batch_entries());
+        if (sizes.stores_equal_sizes() && strides.stores_equal_strides()) {
+            return (sizes.at(0))[0] * strides.at(0) *
+                   sizes.get_num_batch_entries();
+        }
+        size_type mem_req = 0;
+        for (auto i = 0; i < sizes.get_num_batch_entries(); ++i) {
+            mem_req += (sizes.at(i))[0] * strides.at(i);
+        }
+        return mem_req;
+    }
+
+    /**
+     * Extract the nth dim of the batch sizes from the input batch_dim object.
+     */
+    inline batch_stride extract_nth_dim(const int dim, const batch_dim<2>& size)
+    {
+        if (size.stores_equal_sizes()) {
+            return batch_stride(size.get_num_batch_entries(), size.at(0)[dim]);
+        }
+        std::vector<size_type> stride(size.get_num_batch_entries());
+        for (auto i = 0; i < size.get_num_batch_entries(); ++i) {
+            stride[i] = (size.at(i))[dim];
+        }
+        return batch_stride(stride);
+    }
+
+    /**
+     * Extract strides from the vector of the distinct Dense matrices.
+     */
+    inline batch_stride get_strides_from_mtxs(
+        const std::vector<Dense<ValueType>*> mtxs)
+    {
+        auto strides = std::vector<size_type>(mtxs.size());
+        for (auto i = 0; i < mtxs.size(); ++i) {
+            strides[i] = mtxs[i]->get_stride();
+        }
+        return batch_stride(strides);
+    }
+
+    /**
+     * Extract sizes from the vector of the distinct Dense matrices.
+     */
+    inline batch_dim<2> get_sizes_from_mtxs(
+        const std::vector<Dense<ValueType>*> mtxs)
+    {
+        auto sizes = std::vector<dim<2>>(mtxs.size());
+        for (auto i = 0; i < mtxs.size(); ++i) {
+            sizes[i] = mtxs[i]->get_size();
+        }
+        return batch_dim<2>(sizes);
+    }
+
+    /**
+     * Compute the number of elements stored in each batch and store it in a
+     * prefixed sum fashion
+     */
+    inline array<size_type> compute_num_elems_per_batch_cumul(
+        std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
+        const batch_stride& strides)
+    {
+        auto num_elems = array<size_type>(exec->get_master(),
+                                          sizes.get_num_batch_entries() + 1);
+        num_elems.get_data()[0] = 0;
+        for (auto i = 0; i < sizes.get_num_batch_entries(); ++i) {
+            num_elems.get_data()[i + 1] =
+                num_elems.get_data()[i] + (sizes.at(i))[0] * strides.at(i);
+        }
+        num_elems.set_executor(exec);
+        return num_elems;
+    }
+
+protected:
+    /**
+     * Creates an uninitialized BatchDense matrix of the specified size.
+     *
+     * @param exec  Executor associated to the matrix
+     * @param size  size of the matrix
+     */
+    BatchDense(std::shared_ptr<const Executor> exec,
+               const batch_dim<2>& size = batch_dim<2>{})
+        : BatchDense(std::move(exec), size,
+                     size.get_num_batch_entries() > 0 ? extract_nth_dim(1, size)
+                                                      : batch_stride{})
+    {}
+
+    /**
+     * Creates an uninitialized BatchDense matrix of the specified size.
+     *
+     * @param exec  Executor associated to the matrix
+     * @param size  size of the batch matrices in a batch_dim object
+     * @param stride  stride of the rows (i.e. offset between the first
+     *                  elements of two consecutive rows, expressed as the
+     *                  number of matrix elements)
+     */
+    BatchDense(std::shared_ptr<const Executor> exec, const batch_dim<2>& size,
+               const batch_stride& stride)
+        : EnableBatchLinOp<BatchDense>(exec, size),
+          values_(exec, compute_batch_mem(size, stride)),
+          stride_(stride)
+    {
+        num_elems_per_batch_cumul_ =
+            compute_num_elems_per_batch_cumul(exec, this->get_size(), stride);
+    }
+
+    /**
+     * Creates a BatchDense matrix from an already allocated (and initialized)
+     * array.
+     *
+     * @tparam ValuesArray  type of array of values
+     *
+     * @param exec  Executor associated to the matrix
+     * @param size  sizes of the batch matrices in a batch_dim object
+     * @param values  array of matrix values
+     * @param strides  stride of the rows (i.e. offset between the first
+     *                  elements of two consecutive rows, expressed as the
+     *                  number of matrix elements)
+     *
+     * @note If `values` is not an rvalue, not an array of ValueType, or is on
+     *       the wrong executor, an internal copy will be created, and the
+     *       original array data will not be used in the matrix.
+     */
+    template <typename ValuesArray>
+    BatchDense(std::shared_ptr<const Executor> exec, const batch_dim<2>& size,
+               ValuesArray&& values, const batch_stride& stride)
+        : EnableBatchLinOp<BatchDense>(exec, size),
+          values_{exec, std::forward<ValuesArray>(values)},
+          stride_{stride},
+          num_elems_per_batch_cumul_(
+              exec->get_master(),
+              compute_num_elems_per_batch_cumul(exec->get_master(),
+                                                this->get_size(), stride))
+    {
+        auto num_elems =
+            num_elems_per_batch_cumul_
+                .get_const_data()[num_elems_per_batch_cumul_.get_num_elems() -
+                                  1] -
+            1;
+        GKO_ENSURE_IN_BOUNDS(num_elems, values_.get_num_elems());
+    }
+
+    /**
+     * Creates a BatchDense matrix from a vector of matrices
+     *
+     * @param exec  Executor associated to the matrix
+     * @param matrices  The matrices that need to be batched.
+     */
+    BatchDense(std::shared_ptr<const Executor> exec,
+               const std::vector<Dense<ValueType>*>& matrices)
+        : EnableBatchLinOp<BatchDense>(exec, get_sizes_from_mtxs(matrices)),
+          stride_{get_strides_from_mtxs(matrices)},
+          values_(exec, compute_batch_mem(this->get_size(), stride_))
+    {
+        num_elems_per_batch_cumul_ = compute_num_elems_per_batch_cumul(
+            exec->get_master(), this->get_size(), stride_);
+        for (size_type i = 0; i < this->get_num_batch_entries(); ++i) {
+            auto local_exec = matrices[i]->get_executor();
+            exec->copy_from(local_exec.get(),
+                            matrices[i]->get_num_stored_elements(),
+                            matrices[i]->get_const_values(),
+                            this->get_values() +
+                                num_elems_per_batch_cumul_.get_const_data()[i]);
+        }
+    }
+
+    /**
+     * Creates a BatchDense matrix by duplicating BatchDense matrix
+     *
+     * @param exec  Executor associated to the matrix
+     * @param num_duplications  The number of times to duplicate
+     * @param input  The matrix to be duplicated.
+     */
+    BatchDense(std::shared_ptr<const Executor> exec, size_type num_duplications,
+               const BatchDense<value_type>* input)
+        : EnableBatchLinOp<BatchDense>(
+              exec, gko::batch_dim<2>(
+                        input->get_num_batch_entries() * num_duplications,
+                        input->get_size().at(0))),
+          stride_{gko::batch_stride(
+              input->get_num_batch_entries() * num_duplications,
+              input->get_stride().at(0))},
+          values_(exec, compute_batch_mem(this->get_size(), stride_))
+    {
+        // Check if it works when stride neq num_cols
+        num_elems_per_batch_cumul_ = compute_num_elems_per_batch_cumul(
+            exec->get_master(), this->get_size(), stride_);
+        size_type offset = 0;
+        for (size_type i = 0; i < num_duplications; ++i) {
+            exec->copy_from(
+                input->get_executor().get(), input->get_num_stored_elements(),
+                input->get_const_values(), this->get_values() + offset);
+            offset += input->get_num_stored_elements();
+        }
+    }
+
+    /**
+     * Creates a BatchDense matrix by duplicating Dense matrix
+     *
+     * @param exec  Executor associated to the matrix
+     * @param num_duplications  The number of times to duplicate
+     * @param input  The matrix to be duplicated.
+     */
+    BatchDense(std::shared_ptr<const Executor> exec, size_type num_duplications,
+               const Dense<value_type>* input)
+        : EnableBatchLinOp<BatchDense>(
+              exec, gko::batch_dim<2>(num_duplications, input->get_size())),
+          stride_{gko::batch_stride(num_duplications, input->get_stride())},
+          values_(exec, compute_batch_mem(this->get_size(), stride_))
+    {
+        // Check if it works when stride neq num_cols
+        num_elems_per_batch_cumul_ = compute_num_elems_per_batch_cumul(
+            exec->get_master(), this->get_size(), stride_);
+        size_type offset = 0;
+        for (size_type i = 0; i < num_duplications; ++i) {
+            exec->copy_from(
+                input->get_executor().get(), input->get_num_stored_elements(),
+                input->get_const_values(), this->get_values() + offset);
+            offset += input->get_num_stored_elements();
+        }
+    }
+
+    /**
+     * Creates a BatchDense matrix with the same configuration as the callers
+     * matrix.
+     *
+     * @returns a BatchDense matrix with the same configuration as the caller.
+     */
+    virtual std::unique_ptr<BatchDense> create_with_same_config() const
+    {
+        return BatchDense::create(this->get_executor(), this->get_size(),
+                                  this->get_stride());
+    }
+
+    /**
+     * @copydoc scale(const BatchLinOp *)
+     *
+     * @note  Other implementations of batch_dense should override this function
+     *        instead of scale(const BatchLinOp *alpha).
+     */
+    virtual void scale_impl(const BatchLinOp* alpha);
+
+    /**
+     * @copydoc add_scaled(const BatchLinOp *, const BatchLinOp *)
+     *
+     * @note  Other implementations of batch_dense should override this function
+     *        instead of add_scale(const BatchLinOp *alpha, const BatchLinOp
+     * *b).
+     */
+    virtual void add_scaled_impl(const BatchLinOp* alpha, const BatchLinOp* b);
+
+    /**
+     * @copydoc compute_dot(const BatchLinOp *, BatchLinOp *) const
+     *
+     * @note  Other implementations of batch_dense should override this function
+     *        instead of compute_dot(const BatchLinOp *b, BatchLinOp *result).
+     */
+    virtual void compute_dot_impl(const BatchLinOp* b,
+                                  BatchLinOp* result) const;
+
+    /**
+     * @copydoc compute_norm2(BatchLinOp *) const
+     *
+     * @note  Other implementations of batch_dense should override this function
+     *        instead of compute_norm2(BatchLinOp *result).
+     */
+    virtual void compute_norm2_impl(BatchLinOp* result) const;
+
+    void apply_impl(const BatchLinOp* b, BatchLinOp* x) const override;
+
+    void apply_impl(const BatchLinOp* alpha, const BatchLinOp* b,
+                    const BatchLinOp* beta, BatchLinOp* x) const override;
+
+    size_type linearize_index(size_type batch, size_type row,
+                              size_type col) const noexcept
+    {
+        return num_elems_per_batch_cumul_.get_const_data()[batch] +
+               row * stride_.at(batch) + col;
+    }
+
+    size_type linearize_index(size_type batch, size_type idx) const noexcept
+    {
+        return linearize_index(batch, idx / this->get_size().at(batch)[1],
+                               idx % this->get_size().at(batch)[1]);
+    }
+
+private:
+    batch_stride stride_;
+    array<size_type> num_elems_per_batch_cumul_;
+    array<value_type> values_;
+
+    void add_scaled_identity_impl(const BatchLinOp* a,
+                                  const BatchLinOp* b) override;
+};
+
+
+}  // namespace matrix
+
+
+/**
+ * Creates and initializes a batch of column-vectors.
+ *
+ * This function first creates a temporary Dense matrix, fills it with passed in
+ * values, and then converts the matrix to the requested type.
+ *
+ * @tparam Matrix  matrix type to initialize
+ *                 (Dense has to implement the ConvertibleTo<Matrix> interface)
+ * @tparam TArgs  argument types for Matrix::create method
+ *                (not including the implied Executor as the first argument)
+ *
+ * @param stride  row stride for the temporary Dense matrix
+ * @param vals  values used to initialize the batch vector
+ * @param exec  Executor associated to the vector
+ * @param create_args  additional arguments passed to Matrix::create, not
+ *                     including the Executor, which is passed as the first
+ *                     argument
+ *
+ * @ingroup BatchLinOp
+ * @ingroup mat_formats
+ */
+template <typename Matrix, typename... TArgs>
+std::unique_ptr<Matrix> batch_initialize(
+    std::vector<size_type> stride,
+    std::initializer_list<std::initializer_list<typename Matrix::value_type>>
+        vals,
+    std::shared_ptr<const Executor> exec, TArgs&&... create_args)
+{
+    using batch_dense = matrix::BatchDense<typename Matrix::value_type>;
+    size_type num_batch_entries = vals.size();
+    std::vector<size_type> num_rows(num_batch_entries);
+    std::vector<dim<2>> sizes(num_batch_entries);
+    auto vals_begin = begin(vals);
+    for (size_type b = 0; b < num_batch_entries; ++b) {
+        num_rows[b] = vals_begin->size();
+        sizes[b] = dim<2>(num_rows[b], 1);
+        vals_begin++;
+    }
+    auto b_size = batch_dim<2>(sizes);
+    auto b_stride = batch_stride(stride);
+    auto tmp = batch_dense::create(exec->get_master(), b_size, b_stride);
+    size_type batch = 0;
+    for (const auto& b : vals) {
+        size_type idx = 0;
+        for (const auto& elem : b) {
+            tmp->at(batch, idx) = elem;
+            ++idx;
+        }
+        ++batch;
+    }
+    auto mtx = Matrix::create(exec, std::forward<TArgs>(create_args)...);
+    tmp->move_to(mtx.get());
+    return mtx;
+}
+
+/**
+ * Creates and initializes a batch of column-vectors.
+ *
+ * This function first creates a temporary Dense matrix, fills it with passed in
+ * values, and then converts the matrix to the requested type. The stride of
+ * the intermediate Dense matrix is set to 1.
+ *
+ * @tparam Matrix  matrix type to initialize
+ *                 (Dense has to implement the ConvertibleTo<Matrix> interface)
+ * @tparam TArgs  argument types for Matrix::create method
+ *                (not including the implied Executor as the first argument)
+ *
+ * @param vals  values used to initialize the vector
+ * @param exec  Executor associated to the vector
+ * @param create_args  additional arguments passed to Matrix::create, not
+ *                     including the Executor, which is passed as the first
+ *                     argument
+ *
+ * @ingroup BatchLinOp
+ * @ingroup mat_formats
+ */
+template <typename Matrix, typename... TArgs>
+std::unique_ptr<Matrix> batch_initialize(
+    std::initializer_list<std::initializer_list<typename Matrix::value_type>>
+        vals,
+    std::shared_ptr<const Executor> exec, TArgs&&... create_args)
+{
+    return batch_initialize<Matrix>(std::vector<size_type>(vals.size(), 1),
+                                    vals, std::move(exec),
+                                    std::forward<TArgs>(create_args)...);
+}
+
+
+/**
+ * Creates and initializes a batch of matrices.
+ *
+ * This function first creates a temporary Dense matrix, fills it with passed in
+ * values, and then converts the matrix to the requested type.
+ *
+ * @tparam Matrix  matrix type to initialize
+ *                 (Dense has to implement the ConvertibleTo<Matrix> interface)
+ * @tparam TArgs  argument types for Matrix::create method
+ *                (not including the implied Executor as the first argument)
+ *
+ * @param stride  row stride for the temporary Dense matrix
+ * @param vals  values used to initialize the matrix
+ * @param exec  Executor associated to the matrix
+ * @param create_args  additional arguments passed to Matrix::create, not
+ *                     including the Executor, which is passed as the first
+ *                     argument
+ *
+ * @ingroup BatchLinOp
+ * @ingroup mat_formats
+ */
+template <typename Matrix, typename... TArgs>
+std::unique_ptr<Matrix> batch_initialize(
+    std::vector<size_type> stride,
+    std::initializer_list<std::initializer_list<
+        std::initializer_list<typename Matrix::value_type>>>
+        vals,
+    std::shared_ptr<const Executor> exec, TArgs&&... create_args)
+{
+    using batch_dense = matrix::BatchDense<typename Matrix::value_type>;
+    size_type num_batch_entries = vals.size();
+    std::vector<size_type> num_rows(num_batch_entries);
+    std::vector<size_type> num_cols(num_batch_entries);
+    std::vector<dim<2>> sizes(num_batch_entries);
+    size_type ind = 0;
+    for (const auto& b : vals) {
+        num_rows[ind] = b.size();
+        num_cols[ind] = num_rows[ind] > 0 ? begin(b)->size() : 1;
+        sizes[ind] = dim<2>(num_rows[ind], num_cols[ind]);
+        ++ind;
+    }
+    auto b_size = batch_dim<2>(sizes);
+    auto b_stride = batch_stride(stride);
+    auto tmp = batch_dense::create(exec->get_master(), b_size, b_stride);
+    size_type batch = 0;
+    for (const auto& b : vals) {
+        size_type ridx = 0;
+        for (const auto& row : b) {
+            size_type cidx = 0;
+            for (const auto& elem : row) {
+                tmp->at(batch, ridx, cidx) = elem;
+                ++cidx;
+            }
+            ++ridx;
+        }
+        ++batch;
+    }
+    auto mtx = Matrix::create(exec, std::forward<TArgs>(create_args)...);
+    tmp->move_to(mtx.get());
+    return mtx;
+}
+
+
+/**
+ * Creates and initializes a batch of matrices.
+ *
+ * This function first creates a temporary Dense matrix, fills it with passed in
+ * values, and then converts the matrix to the requested type. The stride of
+ * the intermediate Dense matrix is set to the number of columns of the
+ * initializer list.
+ *
+ * @tparam Matrix  matrix type to initialize
+ *                 (Dense has to implement the ConvertibleTo<Matrix> interface)
+ * @tparam TArgs  argument types for Matrix::create method
+ *                (not including the implied Executor as the first argument)
+ *
+ * @param vals  values used to initialize the matrix
+ * @param exec  Executor associated to the matrix
+ * @param create_args  additional arguments passed to Matrix::create, not
+ *                     including the Executor, which is passed as the first
+ *                     argument
+ *
+ * @ingroup BatchLinOp
+ * @ingroup mat_formats
+ */
+template <typename Matrix, typename... TArgs>
+std::unique_ptr<Matrix> batch_initialize(
+    std::initializer_list<std::initializer_list<
+        std::initializer_list<typename Matrix::value_type>>>
+        vals,
+    std::shared_ptr<const Executor> exec, TArgs&&... create_args)
+{
+    auto strides = std::vector<size_type>(vals.size(), 0);
+    size_type ind = 0;
+    for (const auto& b : vals) {
+        strides[ind] = begin(b)->size();
+        ++ind;
+    }
+    return batch_initialize<Matrix>(strides, vals, std::move(exec),
+                                    std::forward<TArgs>(create_args)...);
+}
+
+
+/**
+ * Creates and initializes a batch column-vector by making copies of the single
+ * input column vector.
+ *
+ * This function first creates a temporary batch dense matrix, fills it with
+ * passed in values, and then converts the matrix to the requested type.
+ *
+ * @tparam Matrix  matrix type to initialize
+ *                 (Dense has to implement the ConvertibleTo<Matrix>
+ *                  interface)
+ * @tparam TArgs  argument types for Matrix::create method
+ *                (not including the implied Executor as the first argument)
+ *
+ * @param stride  row strides for the temporary batch dense matrix
+ * @param num_vectors  The number of times the input vector is copied into
+ *                     the final output
+ * @param vals  values used to initialize each vector in the temp. batch
+ * @param exec  Executor associated to the vector
+ * @param create_args  additional arguments passed to Matrix::create, not
+ *                     including the Executor, which is passed as the first
+ *                     argument
+ *
+ * @ingroup BatchLinOp
+ * @ingroup mat_formats
+ */
+template <typename Matrix, typename... TArgs>
+std::unique_ptr<Matrix> batch_initialize(
+    std::vector<size_type> stride, const size_type num_vectors,
+    std::initializer_list<typename Matrix::value_type> vals,
+    std::shared_ptr<const Executor> exec, TArgs&&... create_args)
+{
+    using batch_dense = matrix::BatchDense<typename Matrix::value_type>;
+    std::vector<size_type> num_rows(num_vectors);
+    std::vector<dim<2>> sizes(num_vectors);
+    for (size_type b = 0; b < num_vectors; ++b) {
+        num_rows[b] = vals.size();
+        sizes[b] = dim<2>(vals.size(), 1);
+    }
+    auto b_size = batch_dim<2>(sizes);
+    auto b_stride = batch_stride(stride);
+    auto tmp = batch_dense::create(exec->get_master(), b_size, b_stride);
+    for (size_type batch = 0; batch < num_vectors; batch++) {
+        size_type idx = 0;
+        for (const auto& elem : vals) {
+            tmp->at(batch, idx) = elem;
+            ++idx;
+        }
+    }
+    auto mtx = Matrix::create(exec, std::forward<TArgs>(create_args)...);
+    tmp->move_to(mtx.get());
+    return mtx;
+}
+
+
+/**
+ * Creates and initializes a column-vector from copies of a given vector.
+ *
+ * This function first creates a temporary Dense matrix, fills it with passed
+ * in values, and then converts the matrix to the requested type. The stride of
+ * the intermediate Dense matrix is set to 1.
+ *
+ * @tparam Matrix  matrix type to initialize
+ *                 (Dense has to implement the ConvertibleTo<Matrix>
+ *                  interface)
+ * @tparam TArgs  argument types for Matrix::create method
+ *                (not including the implied Executor as the first argument)
+ *
+ * @param num_vectors  The number of times the input vector is copied into
+ *                     the final output
+ * @param vals  values used to initialize the vector
+ * @param exec  Executor associated to the vector
+ * @param create_args  additional arguments passed to Matrix::create, not
+ *                     including the Executor, which is passed as the first
+ *                     argument
+ *
+ * @ingroup BatchLinOp
+ * @ingroup mat_formats
+ */
+template <typename Matrix, typename... TArgs>
+std::unique_ptr<Matrix> batch_initialize(
+    const size_type num_vectors,
+    std::initializer_list<typename Matrix::value_type> vals,
+    std::shared_ptr<const Executor> exec, TArgs&&... create_args)
+{
+    return batch_initialize<Matrix>(std::vector<size_type>(num_vectors, 1),
+                                    num_vectors, vals, std::move(exec),
+                                    std::forward<TArgs>(create_args)...);
+}
+
+/**
+ * Creates and initializes a matrix from copies of a given matrix.
+ *
+ * This function first creates a temporary batch dense matrix, fills it with
+ * passed in values, and then converts the matrix to the requested type.
+ *
+ * @tparam Matrix  matrix type to initialize
+ *                 (Dense has to implement the ConvertibleTo<Matrix> interface)
+ * @tparam TArgs  argument types for Matrix::create method
+ *                (not including the implied Executor as the first argument)
+ *
+ * @param stride  row strides for the temporary batch dense matrix
+ * @param num_matrices  The number of times the input matrix is copied into
+ *                     the final output
+ * @param vals  values used to initialize each vector in the temp. batch
+ * @param exec  Executor associated to the vector
+ * @param create_args  additional arguments passed to Matrix::create, not
+ *                     including the Executor, which is passed as the first
+ *                     argument
+ *
+ * @ingroup LinOp
+ * @ingroup mat_formats
+ */
+template <typename Matrix, typename... TArgs>
+std::unique_ptr<Matrix> batch_initialize(
+    std::vector<size_type> stride, const size_type num_matrices,
+    std::initializer_list<std::initializer_list<typename Matrix::value_type>>
+        vals,
+    std::shared_ptr<const Executor> exec, TArgs&&... create_args)
+{
+    using batch_dense = matrix::BatchDense<typename Matrix::value_type>;
+    std::vector<dim<2>> sizes(num_matrices);
+    const size_type num_rows = vals.size();
+    for (size_type b = 0; b < num_matrices; ++b) {
+        const size_type num_cols = begin(vals)->size();
+        sizes[b] = dim<2>(num_rows, num_cols);
+        for (auto blockit = begin(vals); blockit != end(vals); ++blockit) {
+            GKO_ASSERT(blockit->size() == num_cols);
+        }
+    }
+    auto tmp = batch_dense::create(exec->get_master(), sizes, stride);
+    for (size_type batch = 0; batch < num_matrices; batch++) {
+        size_type ridx = 0;
+        for (const auto& row : vals) {
+            size_type cidx = 0;
+            for (const auto& elem : row) {
+                tmp->at(batch, ridx, cidx) = elem;
+                ++cidx;
+            }
+            ++ridx;
+        }
+    }
+    auto mtx = Matrix::create(exec, std::forward<TArgs>(create_args)...);
+    tmp->move_to(mtx.get());
+    return mtx;
+}
+
+/**
+ * Creates and initializes a matrix from copies of a given matrix.
+ *
+ * This function first creates a temporary Dense matrix, fills it with passed in
+ * values, and then converts the matrix to the requested type. The stride of
+ * the intermediate Dense matrix is set to 1.
+ *
+ * @tparam Matrix  matrix type to initialize
+ *                 (Dense has to implement the ConvertibleTo<Matrix> interface)
+ * @tparam TArgs  argument types for Matrix::create method
+ *                (not including the implied Executor as the first argument)
+ *
+ * @param num_vectors  The number of times the input vector is copied into
+ *                     the final output
+ * @param vals  values used to initialize the vector
+ * @param exec  Executor associated to the vector
+ * @param create_args  additional arguments passed to Matrix::create, not
+ *                     including the Executor, which is passed as the first
+ *                     argument
+ *
+ * @ingroup LinOp
+ * @ingroup mat_formats
+ */
+template <typename Matrix, typename... TArgs>
+std::unique_ptr<Matrix> batch_initialize(
+    const size_type num_matrices,
+    std::initializer_list<std::initializer_list<typename Matrix::value_type>>
+        vals,
+    std::shared_ptr<const Executor> exec, TArgs&&... create_args)
+{
+    auto strides = std::vector<size_type>(num_matrices, begin(vals)->size());
+    return batch_initialize<Matrix>(strides, num_matrices, vals,
+                                    std::move(exec),
+                                    std::forward<TArgs>(create_args)...);
+}
+
+
+}  // namespace gko
+
+
+#endif  // GKO_PUBLIC_CORE_MATRIX_BATCH_DENSE_HPP_
diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt
index bda26ad63d3..abb50ffc09f 100644
--- a/omp/CMakeLists.txt
+++ b/omp/CMakeLists.txt
@@ -20,6 +20,7 @@ target_sources(ginkgo_omp
     factorization/par_ict_kernels.cpp
     factorization/par_ilu_kernels.cpp
     factorization/par_ilut_kernels.cpp
+    matrix/batch_vector_kernels.cpp
     matrix/coo_kernels.cpp
     matrix/csr_kernels.cpp
     matrix/dense_kernels.cpp
diff --git a/omp/matrix/batch_vector_kernels.cpp b/omp/matrix/batch_vector_kernels.cpp
new file mode 100644
index 00000000000..70c0794f4a8
--- /dev/null
+++ b/omp/matrix/batch_vector_kernels.cpp
@@ -0,0 +1,614 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/batch_dense_kernels.hpp"
+
+
+#include <algorithm>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/range_accessors.hpp>
+#include <ginkgo/core/matrix/batch_csr.hpp>
+#include <ginkgo/core/matrix/batch_diagonal.hpp>
+
+
+#include "core/components/prefix_sum_kernels.hpp"
+#include "reference/matrix/batch_struct.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace omp {
+/**
+ * @brief The BatchDense matrix format namespace.
+ * @ref BatchDense
+ * @ingroup batch_dense
+ */
+namespace batch_dense {
+
+
+#include "reference/matrix/batch_dense_kernels.hpp.inc"
+
+
+template <typename ValueType>
+void simple_apply(std::shared_ptr<const OmpExecutor> exec,
+                  const matrix::BatchDense<ValueType>* const a,
+                  const matrix::BatchDense<ValueType>* const b,
+                  matrix::BatchDense<ValueType>* const c)
+{
+    const auto a_ub = host::get_batch_struct(a);
+    const auto b_ub = host::get_batch_struct(b);
+    const auto c_ub = host::get_batch_struct(c);
+#pragma omp parallel for
+    for (size_type batch = 0; batch < c->get_num_batch_entries(); ++batch) {
+        const auto a_b = gko::batch::batch_entry(a_ub, batch);
+        const auto b_b = gko::batch::batch_entry(b_ub, batch);
+        const auto c_b = gko::batch::batch_entry(c_ub, batch);
+        matvec_kernel(a_b, b_b, c_b);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL);
+
+
+template <typename ValueType>
+void apply(std::shared_ptr<const OmpExecutor> exec,
+           const matrix::BatchDense<ValueType>* const alpha,
+           const matrix::BatchDense<ValueType>* const a,
+           const matrix::BatchDense<ValueType>* const b,
+           const matrix::BatchDense<ValueType>* const beta,
+           matrix::BatchDense<ValueType>* const c)
+{
+    const auto a_ub = host::get_batch_struct(a);
+    const auto b_ub = host::get_batch_struct(b);
+    const auto c_ub = host::get_batch_struct(c);
+    const auto alpha_ub = host::get_batch_struct(alpha);
+    const auto beta_ub = host::get_batch_struct(beta);
+#pragma omp parallel for
+    for (size_type batch = 0; batch < c->get_num_batch_entries(); ++batch) {
+        const auto a_b = gko::batch::batch_entry(a_ub, batch);
+        const auto b_b = gko::batch::batch_entry(b_ub, batch);
+        const auto c_b = gko::batch::batch_entry(c_ub, batch);
+        const auto alpha_b = gko::batch::batch_entry(alpha_ub, batch);
+        const auto beta_b = gko::batch::batch_entry(beta_ub, batch);
+        advanced_matvec_kernel(alpha_b.values[0], a_b, b_b, beta_b.values[0],
+                               c_b);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_APPLY_KERNEL);
+
+
+template <typename ValueType>
+void scale(std::shared_ptr<const OmpExecutor> exec,
+           const matrix::BatchDense<ValueType>* const alpha,
+           matrix::BatchDense<ValueType>* const x)
+{
+    const auto x_ub = host::get_batch_struct(x);
+    const auto alpha_ub = host::get_batch_struct(alpha);
+#pragma omp parallel for
+    for (size_type batch = 0; batch < x->get_num_batch_entries(); ++batch) {
+        const auto alpha_b = gko::batch::batch_entry(alpha_ub, batch);
+        const auto x_b = gko::batch::batch_entry(x_ub, batch);
+        scale(alpha_b, x_b);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_SCALE_KERNEL);
+
+
+template <typename ValueType>
+void add_scaled(std::shared_ptr<const OmpExecutor> exec,
+                const matrix::BatchDense<ValueType>* const alpha,
+                const matrix::BatchDense<ValueType>* const x,
+                matrix::BatchDense<ValueType>* const y)
+{
+    const auto x_ub = host::get_batch_struct(x);
+    const auto y_ub = host::get_batch_struct(y);
+    const auto alpha_ub = host::get_batch_struct(alpha);
+#pragma omp parallel for
+    for (size_type batch = 0; batch < y->get_num_batch_entries(); ++batch) {
+        const auto alpha_b = gko::batch::batch_entry(alpha_ub, batch);
+        const auto x_b = gko::batch::batch_entry(x_ub, batch);
+        const auto y_b = gko::batch::batch_entry(y_ub, batch);
+        add_scaled(alpha_b, x_b, y_b);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_ADD_SCALED_KERNEL);
+
+
+template <typename ValueType>
+void add_scale(std::shared_ptr<const DefaultExecutor> exec,
+               const matrix::BatchDense<ValueType>* const alpha,
+               const matrix::BatchDense<ValueType>* const x,
+               const matrix::BatchDense<ValueType>* const beta,
+               matrix::BatchDense<ValueType>* const y)
+{
+    const auto x_ub = host::get_batch_struct(x);
+    const auto y_ub = host::get_batch_struct(y);
+    const auto alpha_ub = host::get_batch_struct(alpha);
+    const auto beta_ub = host::get_batch_struct(beta);
+#pragma omp parallel for
+    for (size_type batch = 0; batch < y->get_num_batch_entries(); ++batch) {
+        const auto alpha_b = gko::batch::batch_entry(alpha_ub, batch);
+        const auto beta_b = gko::batch::batch_entry(beta_ub, batch);
+        const auto x_b = gko::batch::batch_entry(x_ub, batch);
+        const auto y_b = gko::batch::batch_entry(y_ub, batch);
+        add_scale(alpha_b, x_b, beta_b, y_b);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_ADD_SCALE_KERNEL);
+
+
+template <typename ValueType>
+void convergence_add_scaled(std::shared_ptr<const OmpExecutor> exec,
+                            const matrix::BatchDense<ValueType>* const alpha,
+                            const matrix::BatchDense<ValueType>* const x,
+                            matrix::BatchDense<ValueType>* const y,
+                            const uint32& converged)
+{
+    const auto x_ub = host::get_batch_struct(x);
+    const auto y_ub = host::get_batch_struct(y);
+    const auto alpha_ub = host::get_batch_struct(alpha);
+#pragma omp parallel for
+    for (size_type batch = 0; batch < y->get_num_batch_entries(); ++batch) {
+        const auto alpha_b = gko::batch::batch_entry(alpha_ub, batch);
+        const auto x_b = gko::batch::batch_entry(x_ub, batch);
+        const auto y_b = gko::batch::batch_entry(y_ub, batch);
+        add_scaled(alpha_b, x_b, y_b, converged);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_ADD_SCALED_KERNEL);
+
+
+template <typename ValueType>
+void add_scaled_diag(std::shared_ptr<const OmpExecutor>,
+                     const matrix::BatchDense<ValueType>*,
+                     const matrix::Diagonal<ValueType>*,
+                     matrix::BatchDense<ValueType>*) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_ADD_SCALED_DIAG_KERNEL);
+
+
+template <typename ValueType>
+void compute_dot(std::shared_ptr<const OmpExecutor> exec,
+                 const matrix::BatchDense<ValueType>* const x,
+                 const matrix::BatchDense<ValueType>* const y,
+                 matrix::BatchDense<ValueType>* const result)
+{
+    const auto x_ub = host::get_batch_struct(x);
+    const auto y_ub = host::get_batch_struct(y);
+    const auto res_ub = host::get_batch_struct(result);
+#pragma omp parallel for
+    for (size_type batch = 0; batch < result->get_num_batch_entries();
+         ++batch) {
+        const auto res_b = gko::batch::batch_entry(res_ub, batch);
+        const auto x_b = gko::batch::batch_entry(x_ub, batch);
+        const auto y_b = gko::batch::batch_entry(y_ub, batch);
+        compute_dot_product(x_b, y_b, res_b);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_COMPUTE_DOT_KERNEL);
+
+
+template <typename ValueType>
+void convergence_compute_dot(std::shared_ptr<const OmpExecutor> exec,
+                             const matrix::BatchDense<ValueType>* const x,
+                             const matrix::BatchDense<ValueType>* const y,
+                             matrix::BatchDense<ValueType>* const result,
+                             const uint32& converged)
+{
+    const auto x_ub = host::get_batch_struct(x);
+    const auto y_ub = host::get_batch_struct(y);
+    const auto res_ub = host::get_batch_struct(result);
+#pragma omp parallel for
+    for (size_type batch = 0; batch < result->get_num_batch_entries();
+         ++batch) {
+        const auto res_b = gko::batch::batch_entry(res_ub, batch);
+        const auto x_b = gko::batch::batch_entry(x_ub, batch);
+        const auto y_b = gko::batch::batch_entry(y_ub, batch);
+        compute_dot_product(x_b, y_b, res_b, converged);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COMPUTE_DOT_KERNEL);
+
+
+template <typename ValueType>
+void compute_norm2(std::shared_ptr<const OmpExecutor> exec,
+                   const matrix::BatchDense<ValueType>* const x,
+                   matrix::BatchDense<remove_complex<ValueType>>* const result)
+{
+    const auto x_ub = host::get_batch_struct(x);
+    const auto res_ub = host::get_batch_struct(result);
+#pragma omp parallel for
+    for (size_type batch = 0; batch < result->get_num_batch_entries();
+         ++batch) {
+        const auto res_b = gko::batch::batch_entry(res_ub, batch);
+        const auto x_b = gko::batch::batch_entry(x_ub, batch);
+        compute_norm2(x_b, res_b);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_COMPUTE_NORM2_KERNEL);
+
+
+template <typename ValueType>
+void convergence_compute_norm2(
+    std::shared_ptr<const OmpExecutor> exec,
+    const matrix::BatchDense<ValueType>* const x,
+    matrix::BatchDense<remove_complex<ValueType>>* const result,
+    const uint32& converged)
+{
+    const auto x_ub = host::get_batch_struct(x);
+    const auto res_ub = host::get_batch_struct(result);
+#pragma omp parallel for
+    for (size_type batch = 0; batch < result->get_num_batch_entries();
+         ++batch) {
+        const auto res_b = gko::batch::batch_entry(res_ub, batch);
+        const auto x_b = gko::batch::batch_entry(x_ub, batch);
+        compute_norm2(x_b, res_b, converged);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COMPUTE_NORM2_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void convert_to_batch_csr(std::shared_ptr<const DefaultExecutor> exec,
+                          const matrix::BatchDense<ValueType>* const source,
+                          matrix::BatchCsr<ValueType, IndexType>* const result)
+{
+    GKO_ASSERT(source->get_size().stores_equal_sizes() == true);
+    auto num_rows = result->get_size().at(0)[0];
+    auto num_cols = result->get_size().at(0)[1];
+    auto num_batches = result->get_num_batch_entries();
+
+    auto row_ptrs = result->get_row_ptrs();
+    auto col_idxs = result->get_col_idxs();
+    auto values = result->get_values();
+
+
+#pragma omp parallel for
+    for (size_type row = 0; row < num_rows; ++row) {
+        IndexType row_nnz{};
+        for (size_type col = 0; col < num_cols; ++col) {
+            auto val = source->at(0, row, col);
+            row_nnz += static_cast<IndexType>(val != zero<ValueType>());
+        }
+        row_ptrs[row] = row_nnz;
+    }
+
+    components::prefix_sum(exec, row_ptrs, num_rows + 1);
+
+#pragma omp parallel for
+    for (size_type row = 0; row < num_rows; ++row) {
+        auto cur_ptr = row_ptrs[row];
+        for (size_type col = 0; col < num_cols; ++col) {
+            auto val = source->at(0, row, col);
+            if (val != zero<ValueType>()) {
+                col_idxs[cur_ptr] = static_cast<IndexType>(col);
+                ++cur_ptr;
+            }
+        }
+    }
+
+#pragma omp parallel for
+    for (size_type batch = 0; batch < num_batches; ++batch) {
+        size_type cur_ptr =
+            batch * row_ptrs[num_rows];  // as row_ptrs[num_rows] is the num of
+                                         // non zero elements in the matrix
+        for (size_type row = 0; row < num_rows; ++row) {
+            for (size_type col = 0; col < num_cols; ++col) {
+                auto val = source->at(batch, row, col);
+                if (val != zero<ValueType>()) {
+                    values[cur_ptr] = val;
+                    ++cur_ptr;
+                }
+            }
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE_AND_INT32_INDEX(
+    GKO_DECLARE_BATCH_DENSE_CONVERT_TO_BATCH_CSR_KERNEL);
+
+
+template <typename ValueType>
+void count_nonzeros(std::shared_ptr<const OmpExecutor> exec,
+                    const matrix::BatchDense<ValueType>* const source,
+                    size_type* const result)
+{
+#pragma omp parallel for
+    for (size_type batch = 0; batch < source->get_num_batch_entries();
+         ++batch) {
+        auto num_rows = source->get_size().at(batch)[0];
+        auto num_cols = source->get_size().at(batch)[1];
+        size_type num_nonzeros = 0;
+
+        for (size_type row = 0; row < num_rows; ++row) {
+            for (size_type col = 0; col < num_cols; ++col) {
+                num_nonzeros += static_cast<size_type>(
+                    source->at(batch, row, col) != zero<ValueType>());
+            }
+        }
+        result[batch] = num_nonzeros;
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_COUNT_NONZEROS_KERNEL);
+
+
+template <typename ValueType>
+void calculate_max_nnz_per_row(
+    std::shared_ptr<const OmpExecutor>,
+    const matrix::BatchDense<ValueType>* const source, size_type* const result)
+{
+#pragma omp parallel for
+    for (size_type batch = 0; batch < source->get_num_batch_entries();
+         ++batch) {
+        auto num_rows = source->get_size().at(batch)[0];
+        auto num_cols = source->get_size().at(batch)[1];
+        size_type num_stored_elements_per_row = 0;
+        size_type num_nonzeros = 0;
+
+        for (size_type row = 0; row < num_rows; ++row) {
+            num_nonzeros = 0;
+            for (size_type col = 0; col < num_cols; ++col) {
+                num_nonzeros += static_cast<size_type>(
+                    source->at(batch, row, col) != zero<ValueType>());
+            }
+            num_stored_elements_per_row =
+                std::max(num_nonzeros, num_stored_elements_per_row);
+        }
+        result[batch] = num_stored_elements_per_row;
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CALCULATE_MAX_NNZ_PER_ROW_KERNEL);
+
+
+template <typename ValueType>
+void calculate_nonzeros_per_row(
+    std::shared_ptr<const OmpExecutor>,
+    const matrix::BatchDense<ValueType>* const source,
+    array<size_type>* const result)
+{
+    size_type cumul_prev_rows = 0;
+    for (size_type batch = 0; batch < source->get_num_batch_entries();
+         ++batch) {
+        auto num_rows = source->get_size().at(batch)[0];
+        auto num_cols = source->get_size().at(batch)[1];
+        auto row_nnz_val = result->get_data() + cumul_prev_rows;
+
+#pragma omp parallel for reduction(+ : cumul_prev_rows)
+        for (size_type row = 0; row < num_rows; ++row) {
+            size_type num_nonzeros = 0;
+
+            for (size_type col = 0; col < num_cols; ++col) {
+                num_nonzeros += static_cast<size_type>(
+                    source->at(batch, row, col) != zero<ValueType>());
+            }
+            row_nnz_val[row] = num_nonzeros;
+            ++cumul_prev_rows;
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CALCULATE_NONZEROS_PER_ROW_KERNEL);
+
+
+template <typename ValueType>
+void calculate_total_cols(std::shared_ptr<const OmpExecutor>,
+                          const matrix::BatchDense<ValueType>* const source,
+                          size_type* const result,
+                          const size_type* const stride_factor,
+                          const size_type* const slice_size)
+{
+#pragma omp parallel for
+    for (size_type batch = 0; batch < source->get_num_batch_entries();
+         ++batch) {
+        auto num_rows = source->get_size().at(batch)[0];
+        auto num_cols = source->get_size().at(batch)[1];
+        auto slice_num = ceildiv(num_rows, slice_size[batch]);
+        size_type total_cols = 0;
+        size_type temp = 0;
+        size_type slice_temp = 0;
+
+        for (size_type slice = 0; slice < slice_num; slice++) {
+            slice_temp = 0;
+            for (size_type row = 0; row < slice_size[batch] &&
+                                    row + slice * slice_size[batch] < num_rows;
+                 row++) {
+                temp = 0;
+                for (size_type col = 0; col < num_cols; col++) {
+                    temp += static_cast<size_type>(
+                        source->at(batch, row + slice * slice_size[batch],
+                                   col) != zero<ValueType>());
+                }
+                slice_temp = (slice_temp < temp) ? temp : slice_temp;
+            }
+            slice_temp = ceildiv(slice_temp, stride_factor[batch]) *
+                         stride_factor[batch];
+            total_cols += slice_temp;
+        }
+        result[batch] = total_cols;
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CALCULATE_TOTAL_COLS_KERNEL);
+
+
+template <typename ValueType>
+void transpose(std::shared_ptr<const OmpExecutor>,
+               const matrix::BatchDense<ValueType>* const orig,
+               matrix::BatchDense<ValueType>* const trans)
+{
+#pragma omp parallel for
+    for (size_type batch = 0; batch < orig->get_num_batch_entries(); ++batch) {
+        for (size_type i = 0; i < orig->get_size().at(batch)[0]; ++i) {
+            for (size_type j = 0; j < orig->get_size().at(batch)[1]; ++j) {
+                trans->at(batch, j, i) = orig->at(batch, i, j);
+            }
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_TRANSPOSE_KERNEL);
+
+
+template <typename ValueType>
+void conj_transpose(std::shared_ptr<const OmpExecutor>,
+                    const matrix::BatchDense<ValueType>* const orig,
+                    matrix::BatchDense<ValueType>* const trans)
+{
+#pragma omp parallel for
+    for (size_type batch = 0; batch < orig->get_num_batch_entries(); ++batch) {
+        for (size_type i = 0; i < orig->get_size().at(batch)[0]; ++i) {
+            for (size_type j = 0; j < orig->get_size().at(batch)[1]; ++j) {
+                trans->at(batch, j, i) = conj(orig->at(batch, i, j));
+            }
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CONJ_TRANSPOSE_KERNEL);
+
+
+template <typename ValueType>
+void copy(std::shared_ptr<const DefaultExecutor> exec,
+          const matrix::BatchDense<ValueType>* x,
+          matrix::BatchDense<ValueType>* result)
+{
+    const auto x_ub = host::get_batch_struct(x);
+    const auto result_ub = host::get_batch_struct(result);
+#pragma omp parallel for
+    for (size_type batch = 0; batch < x->get_num_batch_entries(); ++batch) {
+        const auto result_b = gko::batch::batch_entry(result_ub, batch);
+        const auto x_b = gko::batch::batch_entry(x_ub, batch);
+        copy(x_b, result_b);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_COPY_KERNEL);
+
+
+template <typename ValueType>
+void convergence_copy(std::shared_ptr<const DefaultExecutor> exec,
+                      const matrix::BatchDense<ValueType>* x,
+                      matrix::BatchDense<ValueType>* result,
+                      const uint32& converged)
+{
+    const auto x_ub = host::get_batch_struct(x);
+    const auto result_ub = host::get_batch_struct(result);
+#pragma omp parallel for
+    for (size_type batch = 0; batch < x->get_num_batch_entries(); ++batch) {
+        const auto result_b = gko::batch::batch_entry(result_ub, batch);
+        const auto x_b = gko::batch::batch_entry(x_ub, batch);
+        copy(x_b, result_b, converged);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COPY_KERNEL);
+
+
+template <typename ValueType>
+void batch_scale(std::shared_ptr<const OmpExecutor> exec,
+                 const matrix::BatchDiagonal<ValueType>* const left,
+                 const matrix::BatchDiagonal<ValueType>* const rght,
+                 matrix::BatchDense<ValueType>* const vecs)
+{
+    const auto left_vals = left->get_const_values();
+    const auto rght_vals = rght->get_const_values();
+    const auto v_vals = vecs->get_values();
+    const auto nrows = static_cast<int>(vecs->get_size().at(0)[0]);
+    const auto ncols = static_cast<int>(vecs->get_size().at(0)[1]);
+    const auto vstride = vecs->get_stride().at(0);
+#pragma omp parallel for
+    for (size_type batch = 0; batch < vecs->get_num_batch_entries(); ++batch) {
+        const auto left_b =
+            gko::batch::batch_entry_ptr(left_vals, 1, nrows, batch);
+        const auto rght_b =
+            gko::batch::batch_entry_ptr(rght_vals, 1, ncols, batch);
+        const auto v_b =
+            gko::batch::batch_entry_ptr(v_vals, vstride, nrows, batch);
+        batch_scale(nrows, ncols, vstride, left_b, rght_b, v_b);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_BATCH_SCALE_KERNEL);
+
+
+template <typename ValueType>
+void add_scaled_identity(std::shared_ptr<const OmpExecutor> exec,
+                         const matrix::BatchDense<ValueType>* const a,
+                         const matrix::BatchDense<ValueType>* const b,
+                         matrix::BatchDense<ValueType>* const mtx)
+{
+    const auto a_ub = host::get_batch_struct(a);
+    const auto b_ub = host::get_batch_struct(b);
+    const auto mtx_ub = host::get_batch_struct(mtx);
+#pragma omp parallel for
+    for (size_type batch = 0; batch < mtx->get_num_batch_entries(); ++batch) {
+        auto a_b = gko::batch::batch_entry(a_ub, batch);
+        auto b_b = gko::batch::batch_entry(b_ub, batch);
+        auto mtx_b = gko::batch::batch_entry(mtx_ub, batch);
+        add_scaled_identity(a_b.values[0], b_b.values[0], mtx_b);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_ADD_SCALED_IDENTITY_KERNEL);
+
+
+}  // namespace batch_dense
+}  // namespace omp
+}  // namespace kernels
+}  // namespace gko
diff --git a/reference/CMakeLists.txt b/reference/CMakeLists.txt
index ab04aec75a1..224fb70dc0e 100644
--- a/reference/CMakeLists.txt
+++ b/reference/CMakeLists.txt
@@ -23,6 +23,7 @@ target_sources(ginkgo_reference
     factorization/par_ict_kernels.cpp
     factorization/par_ilu_kernels.cpp
     factorization/par_ilut_kernels.cpp
+    matrix/batch_vector_kernels.cpp
     matrix/coo_kernels.cpp
     matrix/csr_kernels.cpp
     matrix/dense_kernels.cpp
diff --git a/reference/matrix/batch_vector_kernels.cpp b/reference/matrix/batch_vector_kernels.cpp
new file mode 100644
index 00000000000..8e9e857cc5b
--- /dev/null
+++ b/reference/matrix/batch_vector_kernels.cpp
@@ -0,0 +1,580 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/batch_dense_kernels.hpp"
+
+
+#include <algorithm>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/range_accessors.hpp>
+#include <ginkgo/core/matrix/batch_csr.hpp>
+#include <ginkgo/core/matrix/batch_diagonal.hpp>
+
+
+#include "core/matrix/batch_struct.hpp"
+#include "reference/matrix/batch_struct.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace reference {
+/**
+ * @brief The BatchDense matrix format namespace.
+ * @ref BatchDense
+ * @ingroup batch_dense
+ */
+namespace batch_dense {
+
+
+#include "reference/matrix/batch_dense_kernels.hpp.inc"
+
+
+template <typename ValueType>
+void simple_apply(std::shared_ptr<const ReferenceExecutor> exec,
+                  const matrix::BatchDense<ValueType>* const a,
+                  const matrix::BatchDense<ValueType>* const b,
+                  matrix::BatchDense<ValueType>* const c)
+{
+    const auto a_ub = host::get_batch_struct(a);
+    const auto b_ub = host::get_batch_struct(b);
+    const auto c_ub = host::get_batch_struct(c);
+    for (size_type batch = 0; batch < c->get_num_batch_entries(); ++batch) {
+        const auto a_b = gko::batch::batch_entry(a_ub, batch);
+        const auto b_b = gko::batch::batch_entry(b_ub, batch);
+        const auto c_b = gko::batch::batch_entry(c_ub, batch);
+        matvec_kernel(a_b, b_b, c_b);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL);
+
+
+template <typename ValueType>
+void apply(std::shared_ptr<const ReferenceExecutor> exec,
+           const matrix::BatchDense<ValueType>* const alpha,
+           const matrix::BatchDense<ValueType>* const a,
+           const matrix::BatchDense<ValueType>* const b,
+           const matrix::BatchDense<ValueType>* const beta,
+           matrix::BatchDense<ValueType>* const c)
+{
+    const auto a_ub = host::get_batch_struct(a);
+    const auto b_ub = host::get_batch_struct(b);
+    const auto c_ub = host::get_batch_struct(c);
+    const auto alpha_ub = host::get_batch_struct(alpha);
+    const auto beta_ub = host::get_batch_struct(beta);
+    for (size_type batch = 0; batch < c->get_num_batch_entries(); ++batch) {
+        const auto a_b = gko::batch::batch_entry(a_ub, batch);
+        const auto b_b = gko::batch::batch_entry(b_ub, batch);
+        const auto c_b = gko::batch::batch_entry(c_ub, batch);
+        const auto alpha_b = gko::batch::batch_entry(alpha_ub, batch);
+        const auto beta_b = gko::batch::batch_entry(beta_ub, batch);
+        advanced_matvec_kernel(alpha_b.values[0], a_b, b_b, beta_b.values[0],
+                               c_b);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_APPLY_KERNEL);
+
+
+template <typename ValueType>
+void scale(std::shared_ptr<const ReferenceExecutor> exec,
+           const matrix::BatchDense<ValueType>* alpha,
+           matrix::BatchDense<ValueType>* x)
+{
+    const auto x_ub = host::get_batch_struct(x);
+    const auto alpha_ub = host::get_batch_struct(alpha);
+    for (size_type batch = 0; batch < x->get_num_batch_entries(); ++batch) {
+        const auto alpha_b = gko::batch::batch_entry(alpha_ub, batch);
+        const auto x_b = gko::batch::batch_entry(x_ub, batch);
+        scale(alpha_b, x_b);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_SCALE_KERNEL);
+
+
+template <typename ValueType>
+void add_scaled(std::shared_ptr<const ReferenceExecutor> exec,
+                const matrix::BatchDense<ValueType>* alpha,
+                const matrix::BatchDense<ValueType>* x,
+                matrix::BatchDense<ValueType>* y)
+{
+    const auto x_ub = host::get_batch_struct(x);
+    const auto y_ub = host::get_batch_struct(y);
+    const auto alpha_ub = host::get_batch_struct(alpha);
+    for (size_type batch = 0; batch < y->get_num_batch_entries(); ++batch) {
+        const auto alpha_b = gko::batch::batch_entry(alpha_ub, batch);
+        const auto x_b = gko::batch::batch_entry(x_ub, batch);
+        const auto y_b = gko::batch::batch_entry(y_ub, batch);
+        add_scaled(alpha_b, x_b, y_b);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_ADD_SCALED_KERNEL);
+
+
+template <typename ValueType>
+void add_scale(std::shared_ptr<const ReferenceExecutor> exec,
+               const matrix::BatchDense<ValueType>* const alpha,
+               const matrix::BatchDense<ValueType>* const x,
+               const matrix::BatchDense<ValueType>* const beta,
+               matrix::BatchDense<ValueType>* const y)
+{
+    const auto x_ub = host::get_batch_struct(x);
+    const auto y_ub = host::get_batch_struct(y);
+    const auto alpha_ub = host::get_batch_struct(alpha);
+    const auto beta_ub = host::get_batch_struct(beta);
+    for (size_type batch = 0; batch < y->get_num_batch_entries(); ++batch) {
+        const auto alpha_b = gko::batch::batch_entry(alpha_ub, batch);
+        const auto beta_b = gko::batch::batch_entry(beta_ub, batch);
+        const auto x_b = gko::batch::batch_entry(x_ub, batch);
+        const auto y_b = gko::batch::batch_entry(y_ub, batch);
+        add_scale(alpha_b, x_b, beta_b, y_b);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_ADD_SCALE_KERNEL);
+
+
+template <typename ValueType>
+void convergence_add_scaled(std::shared_ptr<const ReferenceExecutor> exec,
+                            const matrix::BatchDense<ValueType>* alpha,
+                            const matrix::BatchDense<ValueType>* x,
+                            matrix::BatchDense<ValueType>* y,
+                            const uint32& converged)
+{
+    const auto x_ub = host::get_batch_struct(x);
+    const auto y_ub = host::get_batch_struct(y);
+    const auto alpha_ub = host::get_batch_struct(alpha);
+    for (size_type batch = 0; batch < y->get_num_batch_entries(); ++batch) {
+        const auto alpha_b = gko::batch::batch_entry(alpha_ub, batch);
+        const auto x_b = gko::batch::batch_entry(x_ub, batch);
+        const auto y_b = gko::batch::batch_entry(y_ub, batch);
+        add_scaled(alpha_b, x_b, y_b, converged);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_ADD_SCALED_KERNEL);
+
+
+template <typename ValueType>
+void add_scaled_diag(std::shared_ptr<const ReferenceExecutor> exec,
+                     const matrix::BatchDense<ValueType>* alpha,
+                     const matrix::Diagonal<ValueType>* x,
+                     matrix::BatchDense<ValueType>* y) GKO_NOT_IMPLEMENTED;
+// {
+// for (size_type batch = 0; batch < y->get_num_batch_entries(); ++batch) {
+//     const auto diag_values = x->get_const_values();
+//     for (size_type i = 0; i < x->get_size().at(batch)[0]; i++) {
+//         y->at(batch,i, i) += alpha->at(batch,0, 0) * diag_values[i];
+//     }
+// }
+// }
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_ADD_SCALED_DIAG_KERNEL);
+
+
+template <typename ValueType>
+void compute_dot(std::shared_ptr<const ReferenceExecutor> exec,
+                 const matrix::BatchDense<ValueType>* x,
+                 const matrix::BatchDense<ValueType>* y,
+                 matrix::BatchDense<ValueType>* result)
+{
+    const auto x_ub = host::get_batch_struct(x);
+    const auto y_ub = host::get_batch_struct(y);
+    const auto res_ub = host::get_batch_struct(result);
+    for (size_type batch = 0; batch < result->get_num_batch_entries();
+         ++batch) {
+        const auto res_b = gko::batch::batch_entry(res_ub, batch);
+        const auto x_b = gko::batch::batch_entry(x_ub, batch);
+        const auto y_b = gko::batch::batch_entry(y_ub, batch);
+        compute_dot_product(x_b, y_b, res_b);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_COMPUTE_DOT_KERNEL);
+
+
+template <typename ValueType>
+void convergence_compute_dot(std::shared_ptr<const ReferenceExecutor> exec,
+                             const matrix::BatchDense<ValueType>* x,
+                             const matrix::BatchDense<ValueType>* y,
+                             matrix::BatchDense<ValueType>* result,
+                             const uint32& converged)
+{
+    const auto x_ub = host::get_batch_struct(x);
+    const auto y_ub = host::get_batch_struct(y);
+    const auto res_ub = host::get_batch_struct(result);
+    for (size_type batch = 0; batch < result->get_num_batch_entries();
+         ++batch) {
+        const auto res_b = gko::batch::batch_entry(res_ub, batch);
+        const auto x_b = gko::batch::batch_entry(x_ub, batch);
+        const auto y_b = gko::batch::batch_entry(y_ub, batch);
+        compute_dot_product(x_b, y_b, res_b, converged);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COMPUTE_DOT_KERNEL);
+
+
+template <typename ValueType>
+void compute_norm2(std::shared_ptr<const ReferenceExecutor> exec,
+                   const matrix::BatchDense<ValueType>* x,
+                   matrix::BatchDense<remove_complex<ValueType>>* result)
+{
+    const auto x_ub = host::get_batch_struct(x);
+    const auto res_ub = host::get_batch_struct(result);
+    for (size_type batch = 0; batch < result->get_num_batch_entries();
+         ++batch) {
+        const auto res_b = gko::batch::batch_entry(res_ub, batch);
+        const auto x_b = gko::batch::batch_entry(x_ub, batch);
+        compute_norm2(x_b, res_b);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_COMPUTE_NORM2_KERNEL);
+
+
+template <typename ValueType>
+void convergence_compute_norm2(
+    std::shared_ptr<const ReferenceExecutor> exec,
+    const matrix::BatchDense<ValueType>* x,
+    matrix::BatchDense<remove_complex<ValueType>>* result,
+    const uint32& converged)
+{
+    const auto x_ub = host::get_batch_struct(x);
+    const auto res_ub = host::get_batch_struct(result);
+    for (size_type batch = 0; batch < result->get_num_batch_entries();
+         ++batch) {
+        const auto res_b = gko::batch::batch_entry(res_ub, batch);
+        const auto x_b = gko::batch::batch_entry(x_ub, batch);
+        compute_norm2(x_b, res_b, converged);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COMPUTE_NORM2_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void convert_to_batch_csr(std::shared_ptr<const DefaultExecutor> exec,
+                          const matrix::BatchDense<ValueType>* source,
+                          matrix::BatchCsr<ValueType, IndexType>* result)
+{
+    GKO_ASSERT(source->get_size().stores_equal_sizes() == true);
+    auto num_rows = result->get_size().at(0)[0];
+    auto num_cols = result->get_size().at(0)[1];
+    auto num_batch_entries = result->get_num_batch_entries();
+
+    auto row_ptrs = result->get_row_ptrs();
+    auto col_idxs = result->get_col_idxs();
+    auto values = result->get_values();
+
+    size_type cur_ptr = 0;
+    row_ptrs[0] = cur_ptr;
+    for (size_type row = 0; row < num_rows; ++row) {
+        for (size_type col = 0; col < num_cols; ++col) {
+            auto val = source->at(0, row, col);
+            if (val != zero<ValueType>()) {
+                col_idxs[cur_ptr] = col;
+                ++cur_ptr;
+            }
+        }
+        row_ptrs[row + 1] = cur_ptr;
+    }
+
+    cur_ptr = 0;
+    for (size_type batch = 0; batch < num_batch_entries; ++batch) {
+        for (size_type row = 0; row < num_rows; ++row) {
+            for (size_type col = 0; col < num_cols; ++col) {
+                auto val = source->at(batch, row, col);
+                if (val != zero<ValueType>()) {
+                    values[cur_ptr] = val;
+                    ++cur_ptr;
+                }
+            }
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE_AND_INT32_INDEX(
+    GKO_DECLARE_BATCH_DENSE_CONVERT_TO_BATCH_CSR_KERNEL);
+
+
+template <typename ValueType>
+void count_nonzeros(std::shared_ptr<const ReferenceExecutor> exec,
+                    const matrix::BatchDense<ValueType>* source,
+                    size_type* result)
+{
+    for (size_type batch = 0; batch < source->get_num_batch_entries();
+         ++batch) {
+        auto num_rows = source->get_size().at(batch)[0];
+        auto num_cols = source->get_size().at(batch)[1];
+        auto num_nonzeros = 0;
+
+        for (size_type row = 0; row < num_rows; ++row) {
+            for (size_type col = 0; col < num_cols; ++col) {
+                num_nonzeros +=
+                    (source->at(batch, row, col) != zero<ValueType>());
+            }
+        }
+        result[batch] = num_nonzeros;
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_COUNT_NONZEROS_KERNEL);
+
+
+template <typename ValueType>
+void calculate_max_nnz_per_row(std::shared_ptr<const ReferenceExecutor> exec,
+                               const matrix::BatchDense<ValueType>* source,
+                               size_type* result)
+{
+    for (size_type batch = 0; batch < source->get_num_batch_entries();
+         ++batch) {
+        auto num_rows = source->get_size().at(batch)[0];
+        auto num_cols = source->get_size().at(batch)[1];
+        size_type num_stored_elements_per_row = 0;
+        size_type num_nonzeros = 0;
+        for (size_type row = 0; row < num_rows; ++row) {
+            num_nonzeros = 0;
+            for (size_type col = 0; col < num_cols; ++col) {
+                num_nonzeros +=
+                    (source->at(batch, row, col) != zero<ValueType>());
+            }
+            num_stored_elements_per_row =
+                std::max(num_nonzeros, num_stored_elements_per_row);
+        }
+        result[batch] = num_stored_elements_per_row;
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CALCULATE_MAX_NNZ_PER_ROW_KERNEL);
+
+
+template <typename ValueType>
+void calculate_nonzeros_per_row(std::shared_ptr<const ReferenceExecutor> exec,
+                                const matrix::BatchDense<ValueType>* source,
+                                array<size_type>* result)
+{
+    for (size_type batch = 0; batch < source->get_num_batch_entries();
+         ++batch) {
+        auto num_rows = source->get_size().at(batch)[0];
+        auto num_cols = source->get_size().at(batch)[1];
+        auto row_nnz_val = result->get_data();
+        size_type offset = 0;
+        for (size_type row = 0; row < num_rows; ++row) {
+            size_type num_nonzeros = 0;
+            for (size_type col = 0; col < num_cols; ++col) {
+                num_nonzeros +=
+                    (source->at(batch, row, col) != zero<ValueType>());
+            }
+            row_nnz_val[offset + row] = num_nonzeros;
+            ++offset;
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CALCULATE_NONZEROS_PER_ROW_KERNEL);
+
+
+template <typename ValueType>
+void calculate_total_cols(std::shared_ptr<const ReferenceExecutor> exec,
+                          const matrix::BatchDense<ValueType>* const source,
+                          size_type* const result,
+                          const size_type* const stride_factor,
+                          const size_type* const slice_size)
+{
+    for (size_type batch = 0; batch < source->get_num_batch_entries();
+         ++batch) {
+        auto num_rows = source->get_size().at(batch)[0];
+        auto num_cols = source->get_size().at(batch)[1];
+        auto slice_num = ceildiv(num_rows, slice_size[batch]);
+        auto total_cols = 0;
+        auto temp = 0, slice_temp = 0;
+        for (size_type slice = 0; slice < slice_num; slice++) {
+            slice_temp = 0;
+            for (size_type row = 0; row < slice_size[batch] &&
+                                    row + slice * slice_size[batch] < num_rows;
+                 row++) {
+                temp = 0;
+                for (size_type col = 0; col < num_cols; col++) {
+                    temp += (source->at(batch, row + slice * slice_size[batch],
+                                        col) != zero<ValueType>());
+                }
+                slice_temp = (slice_temp < temp) ? temp : slice_temp;
+            }
+            slice_temp = ceildiv(slice_temp, stride_factor[batch]) *
+                         stride_factor[batch];
+            total_cols += slice_temp;
+        }
+        result[batch] = total_cols;
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CALCULATE_TOTAL_COLS_KERNEL);
+
+
+template <typename ValueType>
+void transpose(std::shared_ptr<const ReferenceExecutor> exec,
+               const matrix::BatchDense<ValueType>* const orig,
+               matrix::BatchDense<ValueType>* const trans)
+{
+    for (size_type batch = 0; batch < orig->get_num_batch_entries(); ++batch) {
+        for (size_type i = 0; i < orig->get_size().at(batch)[0]; ++i) {
+            for (size_type j = 0; j < orig->get_size().at(batch)[1]; ++j) {
+                trans->at(batch, j, i) = orig->at(batch, i, j);
+            }
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_TRANSPOSE_KERNEL);
+
+
+template <typename ValueType>
+void conj_transpose(std::shared_ptr<const ReferenceExecutor> exec,
+                    const matrix::BatchDense<ValueType>* orig,
+                    matrix::BatchDense<ValueType>* trans)
+{
+    for (size_type batch = 0; batch < orig->get_num_batch_entries(); ++batch) {
+        for (size_type i = 0; i < orig->get_size().at(batch)[0]; ++i) {
+            for (size_type j = 0; j < orig->get_size().at(batch)[1]; ++j) {
+                trans->at(batch, j, i) = conj(orig->at(batch, i, j));
+            }
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CONJ_TRANSPOSE_KERNEL);
+
+
+template <typename ValueType>
+void copy(std::shared_ptr<const DefaultExecutor> exec,
+          const matrix::BatchDense<ValueType>* x,
+          matrix::BatchDense<ValueType>* result)
+{
+    const auto x_ub = host::get_batch_struct(x);
+    const auto result_ub = host::get_batch_struct(result);
+    for (size_type batch = 0; batch < x->get_num_batch_entries(); ++batch) {
+        const auto result_b = gko::batch::batch_entry(result_ub, batch);
+        const auto x_b = gko::batch::batch_entry(x_ub, batch);
+        copy(x_b, result_b);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_COPY_KERNEL);
+
+
+template <typename ValueType>
+void convergence_copy(std::shared_ptr<const DefaultExecutor> exec,
+                      const matrix::BatchDense<ValueType>* x,
+                      matrix::BatchDense<ValueType>* result,
+                      const uint32& converged)
+{
+    const auto x_ub = host::get_batch_struct(x);
+    const auto result_ub = host::get_batch_struct(result);
+    for (size_type batch = 0; batch < x->get_num_batch_entries(); ++batch) {
+        const auto result_b = gko::batch::batch_entry(result_ub, batch);
+        const auto x_b = gko::batch::batch_entry(x_ub, batch);
+        copy(x_b, result_b, converged);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COPY_KERNEL);
+
+
+template <typename ValueType>
+void batch_scale(std::shared_ptr<const ReferenceExecutor> exec,
+                 const matrix::BatchDiagonal<ValueType>* const left,
+                 const matrix::BatchDiagonal<ValueType>* const rght,
+                 matrix::BatchDense<ValueType>* const vecs)
+{
+    const auto left_vals = left->get_const_values();
+    const auto rght_vals = rght->get_const_values();
+    const auto v_vals = vecs->get_values();
+    const auto nrows = static_cast<int>(vecs->get_size().at(0)[0]);
+    const auto ncols = static_cast<int>(vecs->get_size().at(0)[1]);
+    const auto vstride = vecs->get_stride().at(0);
+    for (size_type batch = 0; batch < vecs->get_num_batch_entries(); ++batch) {
+        const auto left_b =
+            gko::batch::batch_entry_ptr(left_vals, 1, nrows, batch);
+        const auto rght_b =
+            gko::batch::batch_entry_ptr(rght_vals, 1, ncols, batch);
+        const auto v_b =
+            gko::batch::batch_entry_ptr(v_vals, vstride, nrows, batch);
+        batch_scale(nrows, ncols, vstride, left_b, rght_b, v_b);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_BATCH_SCALE_KERNEL);
+
+
+template <typename ValueType>
+void add_scaled_identity(std::shared_ptr<const ReferenceExecutor> exec,
+                         const matrix::BatchDense<ValueType>* const a,
+                         const matrix::BatchDense<ValueType>* const b,
+                         matrix::BatchDense<ValueType>* const mtx)
+{
+    const auto a_ub = host::get_batch_struct(a);
+    const auto b_ub = host::get_batch_struct(b);
+    const auto mtx_ub = host::get_batch_struct(mtx);
+    for (size_type batch = 0; batch < mtx->get_num_batch_entries(); ++batch) {
+        auto a_b = gko::batch::batch_entry(a_ub, batch);
+        auto b_b = gko::batch::batch_entry(b_ub, batch);
+        auto mtx_b = gko::batch::batch_entry(mtx_ub, batch);
+        add_scaled_identity(a_b.values[0], b_b.values[0], mtx_b);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_ADD_SCALED_IDENTITY_KERNEL);
+
+
+}  // namespace batch_dense
+}  // namespace reference
+}  // namespace kernels
+}  // namespace gko
diff --git a/reference/matrix/batch_vector_kernels.hpp.inc b/reference/matrix/batch_vector_kernels.hpp.inc
new file mode 100644
index 00000000000..db828206239
--- /dev/null
+++ b/reference/matrix/batch_vector_kernels.hpp.inc
@@ -0,0 +1,392 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+template <typename ValueType>
+inline void matvec_kernel(
+    const gko::batch_dense::BatchEntry<const ValueType>& a,
+    const gko::batch_dense::BatchEntry<const ValueType>& b,
+    const gko::batch_dense::BatchEntry<ValueType>& c)
+{
+    for (int row = 0; row < c.num_rows; ++row) {
+        for (int col = 0; col < c.num_rhs; ++col) {
+            c.values[row * c.stride + col] = gko::zero<ValueType>();
+        }
+    }
+
+    for (int row = 0; row < c.num_rows; ++row) {
+        for (int inner = 0; inner < a.num_rhs; ++inner) {
+            for (int col = 0; col < c.num_rhs; ++col) {
+                c.values[row * c.stride + col] +=
+                    a.values[row * a.stride + inner] *
+                    b.values[inner * b.stride + col];
+            }
+        }
+    }
+}
+
+
+template <typename ValueType>
+inline void advanced_matvec_kernel(
+    const ValueType alpha,
+    const gko::batch_dense::BatchEntry<const ValueType>& a,
+    const gko::batch_dense::BatchEntry<const ValueType>& b,
+    const ValueType beta, const gko::batch_dense::BatchEntry<ValueType>& c)
+{
+    if (beta != gko::zero<ValueType>()) {
+        for (int row = 0; row < c.num_rows; ++row) {
+            for (int col = 0; col < c.num_rhs; ++col) {
+                c.values[row * c.stride + col] *= beta;
+            }
+        }
+    } else {
+        for (int row = 0; row < c.num_rows; ++row) {
+            for (int col = 0; col < c.num_rhs; ++col) {
+                c.values[row * c.stride + col] *= gko::zero<ValueType>();
+            }
+        }
+    }
+
+    for (int row = 0; row < c.num_rows; ++row) {
+        for (int inner = 0; inner < a.num_rhs; ++inner) {
+            for (int col = 0; col < c.num_rhs; ++col) {
+                c.values[row * c.stride + col] +=
+                    alpha * a.values[row * a.stride + inner] *
+                    b.values[inner * b.stride + col];
+            }
+        }
+    }
+}
+
+
+template <typename ValueType>
+inline void scale(const gko::batch_dense::BatchEntry<const ValueType>& alpha,
+                  const gko::batch_dense::BatchEntry<ValueType>& x)
+{
+    if (alpha.num_rhs == 1) {
+        for (int i = 0; i < x.num_rows; ++i) {
+            for (int j = 0; j < x.num_rhs; ++j) {
+                x.values[i * x.stride + j] *= alpha.values[0];
+            }
+        }
+    } else {
+        for (int i = 0; i < x.num_rows; ++i) {
+            for (int j = 0; j < x.num_rhs; ++j) {
+                x.values[i * x.stride + j] *= alpha.values[j];
+            }
+        }
+    }
+}
+
+
+template <typename ValueType>
+inline void add_scaled(
+    const gko::batch_dense::BatchEntry<const ValueType>& alpha,
+    const gko::batch_dense::BatchEntry<const ValueType>& x,
+    const gko::batch_dense::BatchEntry<ValueType>& y)
+{
+    if (alpha.num_rhs == 1) {
+        for (int i = 0; i < x.num_rows; ++i) {
+            for (int j = 0; j < x.num_rhs; ++j) {
+                y.values[i * y.stride + j] +=
+                    alpha.values[0] * x.values[i * x.stride + j];
+            }
+        }
+    } else {
+        for (int i = 0; i < x.num_rows; ++i) {
+            for (int j = 0; j < x.num_rhs; ++j) {
+                y.values[i * y.stride + j] +=
+                    alpha.values[j] * x.values[i * x.stride + j];
+            }
+        }
+    }
+}
+
+
+template <typename ValueType>
+inline void add_scale(
+    const gko::batch_dense::BatchEntry<const ValueType>& alpha,
+    const gko::batch_dense::BatchEntry<const ValueType>& x,
+    const gko::batch_dense::BatchEntry<const ValueType>& beta,
+    const gko::batch_dense::BatchEntry<ValueType>& y)
+{
+    if (alpha.num_rhs == 1) {
+        for (int i = 0; i < x.num_rows; ++i) {
+            for (int j = 0; j < x.num_rhs; ++j) {
+                y.values[i * y.stride + j] =
+                    alpha.values[0] * x.values[i * x.stride + j] +
+                    beta.values[0] * y.values[i * y.stride + j];
+            }
+        }
+    } else {
+        for (int i = 0; i < x.num_rows; ++i) {
+            for (int j = 0; j < x.num_rhs; ++j) {
+                y.values[i * y.stride + j] =
+                    alpha.values[j] * x.values[i * x.stride + j] +
+                    beta.values[j] * y.values[i * y.stride + j];
+            }
+        }
+    }
+}
+
+
+template <typename ValueType>
+inline void compute_norm2(
+    const gko::batch_dense::BatchEntry<const ValueType>& x,
+    const gko::batch_dense::BatchEntry<gko::remove_complex<ValueType>>& result)
+{
+    for (int j = 0; j < x.num_rhs; ++j) {
+        result.values[j] = gko::zero<gko::remove_complex<ValueType>>();
+    }
+    for (int i = 0; i < x.num_rows; ++i) {
+        for (int j = 0; j < x.num_rhs; ++j) {
+            result.values[j] += squared_norm(x.values[i * x.stride + j]);
+        }
+    }
+    for (int j = 0; j < x.num_rhs; ++j) {
+        result.values[j] = sqrt(result.values[j]);
+    }
+}
+
+
+/**
+ * Multiplies with a diagonal matrix represented as a dense vector.
+ *
+ * @param[in] diag_vec  The entries of the diagonal matrix.
+ * @param[in,out] a  The dense matrix or vectors to scale.
+ */
+template <typename ValueType>
+inline void batch_scale(
+    const gko::batch_dense::BatchEntry<const ValueType>& diag_vec,
+    const gko::batch_dense::BatchEntry<ValueType>& a)
+{
+    for (int i_row = 0; i_row < a.num_rows; i_row++) {
+        const ValueType scale = diag_vec.values[i_row];
+        for (int j = 0; j < a.num_rhs; j++) {
+            a.values[i_row * a.stride + j] *= scale;
+        }
+    }
+}
+
+template <typename ValueType>
+inline void batch_scale(const int nrows, const int ncols,
+                        const size_type a_stride, const ValueType* const left,
+                        const ValueType* const right, ValueType* const a)
+{
+    for (int i_row = 0; i_row < nrows; i_row++) {
+        const ValueType scale = left[i_row];
+        for (int j = 0; j < ncols; j++) {
+            a[i_row * a_stride + j] *= scale * right[j];
+        }
+    }
+}
+
+
+/**
+ * Copies the values of one multi-vector into another.
+ *
+ * Note that the output multi-vector should already have memory allocated
+ * and stride set.
+ */
+template <typename ValueType>
+inline void copy(const gko::batch_dense::BatchEntry<const ValueType>& in,
+                 const gko::batch_dense::BatchEntry<ValueType>& out)
+{
+    for (int iz = 0; iz < in.num_rows * in.num_rhs; iz++) {
+        const int i = iz / in.num_rhs;
+        const int j = iz % in.num_rhs;
+        out.values[i * out.stride + j] = in.values[i * in.stride + j];
+    }
+}
+
+
+template <typename ValueType>
+inline void compute_dot_product(
+    const gko::batch_dense::BatchEntry<const ValueType>& x,
+    const gko::batch_dense::BatchEntry<const ValueType>& y,
+    const gko::batch_dense::BatchEntry<ValueType>& result)
+{
+    for (int c = 0; c < result.num_rhs; c++) {
+        result.values[c] = gko::zero<ValueType>();
+    }
+
+    for (int r = 0; r < x.num_rows; r++) {
+        for (int c = 0; c < x.num_rhs; c++) {
+            result.values[c] +=
+                conj(x.values[r * x.stride + c]) * y.values[r * y.stride + c];
+        }
+    }
+}
+
+
+template <typename ValueType>
+inline void copy(
+    const gko::batch_dense::BatchEntry<const ValueType>& source_entry,
+    const gko::batch_dense::BatchEntry<ValueType>& destination_entry,
+    const gko::uint32& converged)
+{
+    for (int r = 0; r < source_entry.num_rows; r++) {
+        for (int c = 0; c < source_entry.num_rhs; c++) {
+            const gko::uint32 conv = converged & (1 << c);
+
+            if (conv) {
+                continue;
+            }
+
+            destination_entry.values[r * destination_entry.stride + c] =
+                source_entry.values[r * source_entry.stride + c];
+        }
+    }
+}
+
+
+template <typename ValueType>
+inline void add_scaled(
+    const gko::batch_dense::BatchEntry<const ValueType>& alpha,
+    const gko::batch_dense::BatchEntry<const ValueType>& x,
+    const gko::batch_dense::BatchEntry<ValueType>& y,
+    const gko::uint32& converged)
+{
+    if (alpha.num_rhs == 1) {
+        for (int i = 0; i < x.num_rows; ++i) {
+            for (int j = 0; j < x.num_rhs; ++j) {
+                const gko::uint32 conv = converged & (1 << j);
+
+                if (conv) {
+                    continue;
+                }
+
+                y.values[i * y.stride + j] +=
+                    alpha.values[0] * x.values[i * x.stride + j];
+            }
+        }
+    } else {
+        for (int i = 0; i < x.num_rows; ++i) {
+            for (int j = 0; j < x.num_rhs; ++j) {
+                const gko::uint32 conv = converged & (1 << j);
+
+                if (conv) {
+                    continue;
+                }
+
+
+                y.values[i * y.stride + j] +=
+                    alpha.values[j] * x.values[i * x.stride + j];
+            }
+        }
+    }
+}
+
+
+template <typename ValueType>
+inline void compute_norm2(
+    const gko::batch_dense::BatchEntry<const ValueType>& x,
+    const gko::batch_dense::BatchEntry<gko::remove_complex<ValueType>>& result,
+    const gko::uint32& converged)
+{
+    for (int j = 0; j < x.num_rhs; ++j) {
+        const gko::uint32 conv = converged & (1 << j);
+
+        if (conv) {
+            continue;
+        }
+
+        result.values[j] = gko::zero<gko::remove_complex<ValueType>>();
+    }
+    for (int i = 0; i < x.num_rows; ++i) {
+        for (int j = 0; j < x.num_rhs; ++j) {
+            const gko::uint32 conv = converged & (1 << j);
+
+            if (conv) {
+                continue;
+            }
+
+            result.values[j] += squared_norm(x.values[i * x.stride + j]);
+        }
+    }
+    for (int j = 0; j < x.num_rhs; ++j) {
+        const gko::uint32 conv = converged & (1 << j);
+
+        if (conv) {
+            continue;
+        }
+
+        result.values[j] = sqrt(result.values[j]);
+    }
+}
+
+
+template <typename ValueType>
+inline void compute_dot_product(
+    const gko::batch_dense::BatchEntry<const ValueType>& x,
+    const gko::batch_dense::BatchEntry<const ValueType>& y,
+    const gko::batch_dense::BatchEntry<ValueType>& result,
+    const gko::uint32& converged)
+{
+    for (int c = 0; c < result.num_rhs; c++) {
+        const gko::uint32 conv = converged & (1 << c);
+
+        if (conv) {
+            continue;
+        }
+
+        result.values[c] = gko::zero<ValueType>();
+    }
+
+    for (int r = 0; r < x.num_rows; r++) {
+        for (int c = 0; c < x.num_rhs; c++) {
+            const gko::uint32 conv = converged & (1 << c);
+
+            if (conv) {
+                continue;
+            }
+
+            result.values[c] +=
+                conj(x.values[r * x.stride + c]) * y.values[r * y.stride + c];
+        }
+    }
+}
+
+
+template <typename ValueType>
+inline void add_scaled_identity(
+    const ValueType& a, const ValueType& b,
+    const gko::batch_dense::BatchEntry<ValueType>& mat)
+{
+    for (int i = 0; i < mat.num_rows; i++) {
+        for (int j = 0; j < mat.num_rhs; j++) {
+            mat.values[i * mat.stride + j] *= b;
+            if (i == j) {
+                mat.values[i * mat.stride + i] += a;
+            }
+        }
+    }
+}
diff --git a/reference/test/matrix/batch_vector_kernels.cpp b/reference/test/matrix/batch_vector_kernels.cpp
new file mode 100644
index 00000000000..6e1a6c2f8e1
--- /dev/null
+++ b/reference/test/matrix/batch_vector_kernels.cpp
@@ -0,0 +1,1023 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/matrix/batch_dense.hpp>
+
+
+#include <complex>
+#include <memory>
+#include <random>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/exception.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/matrix/batch_csr.hpp>
+#include <ginkgo/core/matrix/batch_diagonal.hpp>
+#include <ginkgo/core/matrix/batch_identity.hpp>
+#include <ginkgo/core/matrix/dense.hpp>
+
+
+#include "core/matrix/batch_dense_kernels.hpp"
+#include "core/test/utils.hpp"
+
+
+namespace {
+
+
+template <typename T>
+class BatchDense : public ::testing::Test {
+protected:
+    using value_type = T;
+    using size_type = gko::size_type;
+    using Mtx = gko::matrix::BatchDense<value_type>;
+    using DenseMtx = gko::matrix::Dense<value_type>;
+    using ComplexMtx = gko::to_complex<Mtx>;
+    using RealMtx = gko::remove_complex<Mtx>;
+    BatchDense()
+        : exec(gko::ReferenceExecutor::create()),
+          mtx_0(gko::batch_initialize<Mtx>(
+              {{I<T>({1.0, -1.0, 1.5}), I<T>({-2.0, 2.0, 3.0})},
+               {{1.0, -2.0, -0.5}, {1.0, -2.5, 4.0}}},
+              exec)),
+          mtx_00(gko::initialize<DenseMtx>(
+              {I<T>({1.0, -1.0, 1.5}), I<T>({-2.0, 2.0, 3.0})}, exec)),
+          mtx_01(gko::initialize<DenseMtx>(
+              {I<T>({1.0, -2.0, -0.5}), I<T>({1.0, -2.5, 4.0})}, exec)),
+          mtx_1(
+              gko::batch_initialize<Mtx>(std::vector<size_type>{4, 4},
+                                         {{{1.0, -1.0, 2.2}, {-2.0, 2.0, -0.5}},
+                                          {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}},
+                                         exec)),
+          mtx_10(gko::initialize<DenseMtx>(
+              {I<T>({1.0, -1.0, 2.2}), I<T>({-2.0, 2.0, -0.5})}, exec)),
+          mtx_11(gko::initialize<DenseMtx>(
+              4, {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, exec)),
+          mtx_2(gko::batch_initialize<Mtx>(
+              std::vector<size_type>{2, 2},
+              {{{1.0, 1.5}, {6.0, 1.0}, {-0.25, 1.0}},
+               {I<T>({2.0, -2.0}), I<T>({1.0, 3.0}), I<T>({4.0, 3.0})}},
+              exec)),
+          mtx_20(gko::initialize<DenseMtx>(
+              4, {I<T>({1.0, 1.5}), I<T>({6.0, 1.0}), I<T>({-0.25, 1.0})},
+              exec)),
+          mtx_21(gko::initialize<DenseMtx>(
+              {I<T>({2.0, -2.0}), I<T>({1.0, 3.0}), I<T>({4.0, 3.0})}, exec)),
+          mtx_3(gko::batch_initialize<Mtx>(
+              std::vector<size_type>{4, 4},
+              {{I<T>({1.0, 1.5}), I<T>({6.0, 1.0})}, {{2.0, -2.0}, {1.0, 3.0}}},
+              exec)),
+          mtx_30(gko::initialize<DenseMtx>({I<T>({1.0, 1.5}), I<T>({6.0, 1.0})},
+                                           exec)),
+          mtx_31(gko::initialize<DenseMtx>(
+              {I<T>({2.0, -2.0}), I<T>({1.0, 3.0})}, exec)),
+          mtx_4(gko::batch_initialize<Mtx>(
+              {{{1.0, 1.5, 3.0}, {6.0, 1.0, 5.0}, {6.0, 1.0, 5.5}},
+               {{2.0, -2.0, 1.5}, {4.0, 3.0, 2.2}, {-1.25, 3.0, 0.5}}},
+              exec)),
+          mtx_5(gko::batch_initialize<Mtx>(
+              {{{1.0, 1.5}, {6.0, 1.0}, {7.0, -4.5}},
+               {I<T>({2.0, -2.0}), I<T>({1.0, 3.0}), I<T>({4.0, 3.0})}},
+              exec)),
+          mtx_6(gko::batch_initialize<Mtx>(
+              {{{1.0, 0.0, 3.0}, {0.0, 3.0, 0.0}, {0.0, 1.0, 5.0}},
+               {{2.0, 0.0, 5.0}, {0.0, 1.0, 0.0}, {0.0, -1.0, 8.0}}},
+              exec))
+    {}
+
+    std::shared_ptr<const gko::ReferenceExecutor> exec;
+    std::unique_ptr<Mtx> mtx_0;
+    std::unique_ptr<DenseMtx> mtx_00;
+    std::unique_ptr<DenseMtx> mtx_01;
+    std::unique_ptr<Mtx> mtx_1;
+    std::unique_ptr<DenseMtx> mtx_10;
+    std::unique_ptr<DenseMtx> mtx_11;
+    std::unique_ptr<Mtx> mtx_2;
+    std::unique_ptr<DenseMtx> mtx_20;
+    std::unique_ptr<DenseMtx> mtx_21;
+    std::unique_ptr<Mtx> mtx_3;
+    std::unique_ptr<DenseMtx> mtx_30;
+    std::unique_ptr<DenseMtx> mtx_31;
+    std::unique_ptr<Mtx> mtx_4;
+    std::unique_ptr<Mtx> mtx_5;
+    std::unique_ptr<Mtx> mtx_6;
+
+    std::ranlux48 rand_engine;
+};
+
+
+TYPED_TEST_SUITE(BatchDense, gko::test::ValueTypes);
+
+
+TYPED_TEST(BatchDense, AppliesToBatchDense)
+{
+    using T = typename TestFixture::value_type;
+    this->mtx_1->apply(this->mtx_2.get(), this->mtx_3.get());
+    this->mtx_10->apply(this->mtx_20.get(), this->mtx_30.get());
+    this->mtx_11->apply(this->mtx_21.get(), this->mtx_31.get());
+
+
+    auto res = this->mtx_3->unbatch();
+    GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_30.get(), 0.);
+    GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_31.get(), 0.);
+}
+
+
+TYPED_TEST(BatchDense, AppliesLinearCombinationToBatchDense)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using DenseMtx = typename TestFixture::DenseMtx;
+    using T = typename TestFixture::value_type;
+    auto alpha = gko::batch_initialize<Mtx>({{1.5}, {-1.0}}, this->exec);
+    auto beta = gko::batch_initialize<Mtx>({{2.5}, {-4.0}}, this->exec);
+    auto alpha0 = gko::initialize<DenseMtx>({1.5}, this->exec);
+    auto alpha1 = gko::initialize<DenseMtx>({-1.0}, this->exec);
+    auto beta0 = gko::initialize<DenseMtx>({2.5}, this->exec);
+    auto beta1 = gko::initialize<DenseMtx>({-4.0}, this->exec);
+
+    this->mtx_1->apply(alpha.get(), this->mtx_2.get(), beta.get(),
+                       this->mtx_3.get());
+    this->mtx_10->apply(alpha0.get(), this->mtx_20.get(), beta0.get(),
+                        this->mtx_30.get());
+    this->mtx_11->apply(alpha1.get(), this->mtx_21.get(), beta1.get(),
+                        this->mtx_31.get());
+
+    auto res = this->mtx_3->unbatch();
+    GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_30.get(), 0.);
+    GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_31.get(), 0.);
+}
+
+
+TYPED_TEST(BatchDense, ApplyFailsOnWrongInnerDimension)
+{
+    using Mtx = typename TestFixture::Mtx;
+    auto res = Mtx::create(
+        this->exec, std::vector<gko::dim<2>>{gko::dim<2>{2}, gko::dim<2>{2}});
+
+    ASSERT_THROW(this->mtx_2->apply(this->mtx_1.get(), res.get()),
+                 gko::DimensionMismatch);
+}
+
+
+TYPED_TEST(BatchDense, ApplyFailsForNonUniformBatches)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    auto mat1 = gko::batch_initialize<Mtx>(
+        std::vector<gko::size_type>{4, 4},
+        {{I<T>({1.0, -1.0}), I<T>({1.0, -1.0}), I<T>({2.0, -0.5})},
+         {{1.0, 2.5, 3.0}, {1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}},
+        this->exec);
+    auto mat2 = gko::batch_initialize<Mtx>(
+        std::vector<gko::size_type>{4, 4},
+        {{{1.0, -1.0, 2.2}, {-2.0, 2.0, -0.5}},
+         {{1.0, 2.5, -3.0}, {1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}},
+        this->exec);
+    auto res = Mtx::create(
+        this->exec, std::vector<gko::dim<2>>{gko::dim<2>{2}, gko::dim<2>{3}});
+
+    ASSERT_THROW(mat2->apply(mat1.get(), res.get()), gko::NotImplemented);
+}
+
+
+TYPED_TEST(BatchDense, ApplyFailsOnWrongNumberOfRows)
+{
+    using Mtx = typename TestFixture::Mtx;
+    auto res = Mtx::create(
+        this->exec, std::vector<gko::dim<2>>{gko::dim<2>{3}, gko::dim<2>{3}});
+
+    ASSERT_THROW(this->mtx_1->apply(this->mtx_2.get(), res.get()),
+                 gko::DimensionMismatch);
+}
+
+
+TYPED_TEST(BatchDense, ApplyFailsOnWrongNumberOfCols)
+{
+    using Mtx = typename TestFixture::Mtx;
+    auto res = Mtx::create(
+        this->exec,
+        std::vector<gko::dim<2>>{gko::dim<2>{2, 1}, gko::dim<2>{2, 1}},
+        std::vector<gko::size_type>{3, 3});
+
+
+    ASSERT_THROW(this->mtx_1->apply(this->mtx_2.get(), res.get()),
+                 gko::DimensionMismatch);
+}
+
+
+TYPED_TEST(BatchDense, ScalesData)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    auto alpha = gko::batch_initialize<Mtx>(
+        std::vector<gko::size_type>{3, 3},
+        {{{2.0, -2.0, 1.5}}, {{3.0, -1.0, 0.25}}}, this->exec);
+
+    auto ualpha = alpha->unbatch();
+
+    this->mtx_0->scale(alpha.get());
+    this->mtx_00->scale(ualpha[0].get());
+    this->mtx_01->scale(ualpha[1].get());
+
+    auto res = this->mtx_0->unbatch();
+    GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_00.get(), 0.);
+    GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_01.get(), 0.);
+}
+
+
+TYPED_TEST(BatchDense, ScalesDataWithScalar)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    auto alpha = gko::batch_initialize<Mtx>({{2.0}, {-2.0}}, this->exec);
+
+    auto ualpha = alpha->unbatch();
+
+    this->mtx_1->scale(alpha.get());
+    this->mtx_10->scale(ualpha[0].get());
+    this->mtx_11->scale(ualpha[1].get());
+
+    auto res = this->mtx_1->unbatch();
+    GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.);
+    GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_11.get(), 0.);
+}
+
+
+TYPED_TEST(BatchDense, ScalesDataWithStride)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    auto alpha = gko::batch_initialize<Mtx>(
+        {{{2.0, -2.0, -1.5}}, {{2.0, -2.0, 3.0}}}, this->exec);
+
+    auto ualpha = alpha->unbatch();
+
+    this->mtx_1->scale(alpha.get());
+    this->mtx_10->scale(ualpha[0].get());
+    this->mtx_11->scale(ualpha[1].get());
+
+    auto res = this->mtx_1->unbatch();
+    GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.);
+    GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_11.get(), 0.);
+}
+
+
+TYPED_TEST(BatchDense, AddsScaled)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    auto alpha = gko::batch_initialize<Mtx>(
+        {{{2.0, -2.0, 1.5}}, {{2.0, -2.0, 3.0}}}, this->exec);
+
+    auto ualpha = alpha->unbatch();
+
+    this->mtx_1->add_scaled(alpha.get(), this->mtx_0.get());
+    this->mtx_10->add_scaled(ualpha[0].get(), this->mtx_00.get());
+    this->mtx_11->add_scaled(ualpha[1].get(), this->mtx_01.get());
+
+    auto res = this->mtx_1->unbatch();
+    GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.);
+    GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_11.get(), 0.);
+}
+
+
+TYPED_TEST(BatchDense, AddsScale)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    auto alpha = gko::batch_initialize<Mtx>(
+        {{{2.0, -2.0, 1.5}}, {{2.0, -2.0, 3.0}}}, this->exec);
+    auto beta = gko::batch_initialize<Mtx>(
+        {{{-1.0, 3.0, 0.5}}, {{1.5, 0.5, -4.0}}}, this->exec);
+
+    auto ualpha = alpha->unbatch();
+    auto ubeta = beta->unbatch();
+
+    this->mtx_1->add_scale(alpha.get(), this->mtx_0.get(), beta.get());
+    this->mtx_10->add_scale(ualpha[0].get(), this->mtx_00.get(),
+                            ubeta[0].get());
+    this->mtx_11->add_scale(ualpha[1].get(), this->mtx_01.get(),
+                            ubeta[1].get());
+
+    auto res = this->mtx_1->unbatch();
+    GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.);
+    GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_11.get(), 0.);
+}
+
+
+TYPED_TEST(BatchDense, ConvergenceAddScaled)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    auto alpha = gko::batch_initialize<Mtx>(
+        {{{2.0, -2.0, 1.5}}, {{2.0, -2.0, 3.0}}}, this->exec);
+
+    auto ualpha = alpha->unbatch();
+
+
+    const int num_rhs = 3;
+    const gko::uint32 converged = 0xfffffffd | (0 - (1 << num_rhs));
+
+    gko::kernels::reference::batch_dense::convergence_add_scaled(
+        this->exec, alpha.get(), this->mtx_0.get(), this->mtx_1.get(),
+        converged);
+
+    auto mtx_10_clone = gko::clone(this->mtx_10);
+    auto mtx_11_clone = gko::clone(this->mtx_11);
+
+    this->mtx_10->add_scaled(ualpha[0].get(), this->mtx_00.get());
+    this->mtx_11->add_scaled(ualpha[1].get(), this->mtx_01.get());
+
+    auto res = this->mtx_1->unbatch();
+
+    EXPECT_EQ(res[0]->at(0, 0), mtx_10_clone->at(0, 0));
+    EXPECT_EQ(res[0]->at(1, 0), mtx_10_clone->at(1, 0));
+    EXPECT_EQ(res[0]->at(0, 1), this->mtx_10->at(0, 1));
+    EXPECT_EQ(res[0]->at(1, 1), this->mtx_10->at(1, 1));
+    EXPECT_EQ(res[0]->at(0, 2), mtx_10_clone->at(0, 2));
+    EXPECT_EQ(res[0]->at(1, 2), mtx_10_clone->at(1, 2));
+
+    EXPECT_EQ(res[1]->at(0, 0), mtx_11_clone->at(0, 0));
+    EXPECT_EQ(res[1]->at(1, 0), mtx_11_clone->at(1, 0));
+    EXPECT_EQ(res[1]->at(0, 1), this->mtx_11->at(0, 1));
+    EXPECT_EQ(res[1]->at(1, 1), this->mtx_11->at(1, 1));
+    EXPECT_EQ(res[1]->at(0, 2), mtx_11_clone->at(0, 2));
+    EXPECT_EQ(res[1]->at(1, 2), mtx_11_clone->at(1, 2));
+}
+
+
+TYPED_TEST(BatchDense, AddsScaledWithScalar)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    auto alpha = gko::batch_initialize<Mtx>({{2.0}, {-2.0}}, this->exec);
+
+    auto ualpha = alpha->unbatch();
+
+    this->mtx_1->add_scaled(alpha.get(), this->mtx_0.get());
+    this->mtx_10->add_scaled(ualpha[0].get(), this->mtx_00.get());
+    this->mtx_11->add_scaled(ualpha[1].get(), this->mtx_01.get());
+
+    auto res = this->mtx_1->unbatch();
+    GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.);
+    GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_11.get(), 0.);
+}
+
+
+TYPED_TEST(BatchDense, AddsScaleWithScalar)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    auto alpha = gko::batch_initialize<Mtx>({{2.0}, {-2.0}}, this->exec);
+    auto beta = gko::batch_initialize<Mtx>({{-0.5}, {3.0}}, this->exec);
+
+    auto ualpha = alpha->unbatch();
+    auto ubeta = beta->unbatch();
+
+    this->mtx_1->add_scale(alpha.get(), this->mtx_0.get(), beta.get());
+    this->mtx_10->add_scale(ualpha[0].get(), this->mtx_00.get(),
+                            ubeta[0].get());
+    this->mtx_11->add_scale(ualpha[1].get(), this->mtx_01.get(),
+                            ubeta[1].get());
+
+    auto res = this->mtx_1->unbatch();
+    GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.);
+    GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_11.get(), 0.);
+}
+
+
+TYPED_TEST(BatchDense, AddScaleWithScalarViaApply)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    auto alpha = gko::batch_initialize<Mtx>({{2.0}, {-2.0}}, this->exec);
+    auto beta = gko::batch_initialize<Mtx>({{-0.5}, {3.0}}, this->exec);
+    auto id = gko::matrix::BatchIdentity<T>::create(
+        this->exec, gko::batch_dim<2>(2, gko::dim<2>(3, 3)));
+    auto ualpha = alpha->unbatch();
+    auto ubeta = beta->unbatch();
+
+    this->mtx_0->apply(alpha.get(), id.get(), beta.get(), this->mtx_1.get());
+    this->mtx_10->add_scale(ualpha[0].get(), this->mtx_00.get(),
+                            ubeta[0].get());
+    this->mtx_11->add_scale(ualpha[1].get(), this->mtx_01.get(),
+                            ubeta[1].get());
+
+    auto res = this->mtx_1->unbatch();
+    GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.);
+    GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_11.get(), 0.);
+}
+
+
+TYPED_TEST(BatchDense, ConvergenceAddScaledWithScalar)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    auto alpha = gko::batch_initialize<Mtx>({{2.0}, {-2.0}}, this->exec);
+
+    auto ualpha = alpha->unbatch();
+
+
+    const int num_rhs = 3;
+    const gko::uint32 converged = 0xfffffffd | (0 - (1 << num_rhs));
+
+    gko::kernels::reference::batch_dense::convergence_add_scaled(
+        this->exec, alpha.get(), this->mtx_0.get(), this->mtx_1.get(),
+        converged);
+
+    auto mtx_10_clone = gko::clone(this->mtx_10);
+    auto mtx_11_clone = gko::clone(this->mtx_11);
+
+    this->mtx_10->add_scaled(ualpha[0].get(), this->mtx_00.get());
+    this->mtx_11->add_scaled(ualpha[1].get(), this->mtx_01.get());
+
+    auto res = this->mtx_1->unbatch();
+
+    EXPECT_EQ(res[0]->at(0, 0), mtx_10_clone->at(0, 0));
+    EXPECT_EQ(res[0]->at(1, 0), mtx_10_clone->at(1, 0));
+    EXPECT_EQ(res[0]->at(0, 1), this->mtx_10->at(0, 1));
+    EXPECT_EQ(res[0]->at(1, 1), this->mtx_10->at(1, 1));
+    EXPECT_EQ(res[0]->at(0, 2), mtx_10_clone->at(0, 2));
+    EXPECT_EQ(res[0]->at(1, 2), mtx_10_clone->at(1, 2));
+
+    EXPECT_EQ(res[1]->at(0, 0), mtx_11_clone->at(0, 0));
+    EXPECT_EQ(res[1]->at(1, 0), mtx_11_clone->at(1, 0));
+    EXPECT_EQ(res[1]->at(0, 1), this->mtx_11->at(0, 1));
+    EXPECT_EQ(res[1]->at(1, 1), this->mtx_11->at(1, 1));
+    EXPECT_EQ(res[1]->at(0, 2), mtx_11_clone->at(0, 2));
+    EXPECT_EQ(res[1]->at(1, 2), mtx_11_clone->at(1, 2));
+}
+
+
+TYPED_TEST(BatchDense, AddScaledFailsOnWrongSizes)
+{
+    using Mtx = typename TestFixture::Mtx;
+    auto alpha =
+        gko::batch_initialize<Mtx>({{2.0, 3.0, 4.0, 5.0}, {-2.0}}, this->exec);
+
+    ASSERT_THROW(this->mtx_1->add_scaled(alpha.get(), this->mtx_2.get()),
+                 gko::DimensionMismatch);
+}
+
+
+TYPED_TEST(BatchDense, AddScaleFailsOnWrongSizes)
+{
+    using Mtx = typename TestFixture::Mtx;
+    auto alpha = gko::batch_initialize<Mtx>({{2.0}, {-2.0}}, this->exec);
+    auto beta = gko::batch_initialize<Mtx>({{2.0}, {3.0}}, this->exec);
+
+    ASSERT_THROW(
+        this->mtx_1->add_scale(alpha.get(), this->mtx_2.get(), beta.get()),
+        gko::DimensionMismatch);
+}
+
+
+TYPED_TEST(BatchDense, AddScaleFailsOnWrongScalarSizes)
+{
+    using Mtx = typename TestFixture::Mtx;
+    auto alpha = gko::batch_initialize<Mtx>(
+        {{{2.0, -2.0, 1.5}}, {{2.0, -2.0, 3.0}}}, this->exec);
+    auto beta = gko::batch_initialize<Mtx>({{3.0}, {1.5}}, this->exec);
+
+    ASSERT_THROW(
+        this->mtx_1->add_scale(alpha.get(), this->mtx_0.get(), beta.get()),
+        gko::DimensionMismatch);
+}
+
+
+TYPED_TEST(BatchDense, ComputesDot)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    auto result =
+        Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 3}));
+
+    auto ures = result->unbatch();
+
+    this->mtx_0->compute_dot(this->mtx_1.get(), result.get());
+    this->mtx_00->compute_dot(this->mtx_10.get(), ures[0].get());
+    this->mtx_01->compute_dot(this->mtx_11.get(), ures[1].get());
+
+    auto res = result->unbatch();
+    GKO_ASSERT_MTX_NEAR(res[0].get(), ures[0].get(), 0.);
+    GKO_ASSERT_MTX_NEAR(res[1].get(), ures[1].get(), 0.);
+}
+
+
+TYPED_TEST(BatchDense, ConvergenceComputeDot)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    auto result =
+        Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 3}));
+
+    for (int ibatch = 0; ibatch < result->get_size().get_batch_sizes().size();
+         ibatch++) {
+        for (int icol = 0; icol < result->get_size().at()[1]; icol++) {
+            result->at(ibatch, 0, icol) = gko::zero<T>();
+        }
+    }
+
+    auto ures = result->unbatch();
+
+    const int num_rhs = 3;
+    const gko::uint32 converged = 0xfffffffd | (0 - (1 << num_rhs));
+
+    gko::kernels::reference::batch_dense::convergence_compute_dot(
+        this->exec, this->mtx_0.get(), this->mtx_1.get(), result.get(),
+        converged);
+
+    auto ures_00_clone = gko::clone(ures[0]);
+    auto ures_01_clone = gko::clone(ures[1]);
+
+    this->mtx_00->compute_dot(this->mtx_10.get(), ures[0].get());
+    this->mtx_01->compute_dot(this->mtx_11.get(), ures[1].get());
+
+    auto res = result->unbatch();
+
+    EXPECT_EQ(res[0]->at(0, 0), ures_00_clone->at(0, 0));
+    EXPECT_EQ(res[0]->at(0, 1), ures[0]->at(0, 1));
+    EXPECT_EQ(res[0]->at(0, 2), ures_00_clone->at(0, 2));
+
+    EXPECT_EQ(res[1]->at(0, 0), ures_01_clone->at(0, 0));
+    EXPECT_EQ(res[1]->at(0, 1), ures[1]->at(0, 1));
+    EXPECT_EQ(res[1]->at(0, 2), ures_01_clone->at(0, 2));
+}
+
+
+TYPED_TEST(BatchDense, ComputesNorm2)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    using T_nc = gko::remove_complex<T>;
+    using NormVector = gko::matrix::BatchDense<T_nc>;
+    auto mtx(gko::batch_initialize<Mtx>(
+        {{I<T>{1.0, 0.0}, I<T>{2.0, 3.0}, I<T>{2.0, 4.0}},
+         {I<T>{-4.0, 2.0}, I<T>{-3.0, -2.0}, I<T>{0.0, 1.0}}},
+        this->exec));
+    auto batch_size = gko::batch_dim<2>(
+        std::vector<gko::dim<2>>{gko::dim<2>{1, 2}, gko::dim<2>{1, 2}});
+    auto result =
+        NormVector::create(this->exec, batch_size, gko::batch_stride(2, 2));
+
+    mtx->compute_norm2(result.get());
+
+    EXPECT_EQ(result->at(0, 0, 0), T_nc{3.0});
+    EXPECT_EQ(result->at(0, 0, 1), T_nc{5.0});
+    EXPECT_EQ(result->at(1, 0, 0), T_nc{5.0});
+    EXPECT_EQ(result->at(1, 0, 1), T_nc{3.0});
+}
+
+
+TYPED_TEST(BatchDense, ConvergenceComputeNorm2)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    using T_nc = gko::remove_complex<T>;
+    using NormVector = gko::matrix::BatchDense<T_nc>;
+    auto mtx(gko::batch_initialize<Mtx>(
+        {{I<T>{1.0, 0.0}, I<T>{2.0, 3.0}, I<T>{2.0, 4.0}},
+         {I<T>{-4.0, 2.0}, I<T>{-3.0, -2.0}, I<T>{0.0, 1.0}}},
+        this->exec));
+    auto batch_size = gko::batch_dim<2>(
+        std::vector<gko::dim<2>>{gko::dim<2>{1, 2}, gko::dim<2>{1, 2}});
+    auto result =
+        NormVector::create(this->exec, batch_size, gko::batch_stride(2, 2));
+
+    for (int ibatch = 0; ibatch < result->get_size().get_batch_sizes().size();
+         ibatch++) {
+        for (int icol = 0; icol < result->get_size().at()[1]; icol++) {
+            result->at(ibatch, 0, icol) = gko::zero<T_nc>();
+        }
+    }
+
+    auto result_clone = gko::clone(result);
+
+    const int num_rhs = 2;
+    const gko::uint32 converged = 0xfffffffd | (0 - (1 << num_rhs));
+
+    gko::kernels::reference::batch_dense::convergence_compute_norm2(
+        this->exec, mtx.get(), result.get(), converged);
+
+    EXPECT_EQ(result->at(0, 0, 0), result_clone->at(0, 0, 0));
+    EXPECT_EQ(result->at(0, 0, 1), T_nc{5.0});
+
+    EXPECT_EQ(result->at(1, 0, 0), result_clone->at(1, 0, 0));
+    EXPECT_EQ(result->at(1, 0, 1), T_nc{3.0});
+}
+
+
+TYPED_TEST(BatchDense, ComputDotFailsOnWrongInputSize)
+{
+    using Mtx = typename TestFixture::Mtx;
+    auto result =
+        Mtx::create(this->exec, gko::batch_dim<2>(std::vector<gko::dim<2>>{
+                                    gko::dim<2>{1, 2}, gko::dim<2>{1, 3}}));
+
+    ASSERT_THROW(this->mtx_1->compute_dot(this->mtx_2.get(), result.get()),
+                 gko::DimensionMismatch);
+}
+
+
+TYPED_TEST(BatchDense, ComputDotFailsOnWrongResultSize)
+{
+    using Mtx = typename TestFixture::Mtx;
+    auto result =
+        Mtx::create(this->exec, gko::batch_dim<2>(std::vector<gko::dim<2>>{
+                                    gko::dim<2>{1, 2}, gko::dim<2>{1, 2}}));
+    auto result2 =
+        Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 2}));
+
+    ASSERT_THROW(this->mtx_0->compute_dot(this->mtx_1.get(), result.get()),
+                 gko::DimensionMismatch);
+    ASSERT_THROW(this->mtx_0->compute_dot(this->mtx_1.get(), result2.get()),
+                 gko::DimensionMismatch);
+}
+
+
+TYPED_TEST(BatchDense, CopiesData)
+{
+    gko::kernels::reference::batch_dense::copy(this->exec, this->mtx_0.get(),
+                                               this->mtx_1.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(this->mtx_1.get(), this->mtx_0.get(), 0.);
+}
+
+
+TYPED_TEST(BatchDense, ConvergenceCopyData)
+{
+    auto umtx_0 = this->mtx_0->unbatch();
+
+    const int num_rhs = 3;
+    const gko::uint32 converged = 0xfffffffd | (0 - (1 << num_rhs));
+    gko::kernels::reference::batch_dense::convergence_copy(
+        this->exec, this->mtx_0.get(), this->mtx_1.get(), converged);
+
+    auto mtx_10_clone = gko::clone(this->mtx_10);
+    auto mtx_11_clone = gko::clone(this->mtx_11);
+
+    auto res = this->mtx_1->unbatch();
+
+    EXPECT_EQ(res[0]->at(0, 0), mtx_10_clone->at(0, 0));
+    EXPECT_EQ(res[0]->at(1, 0), mtx_10_clone->at(1, 0));
+    EXPECT_EQ(res[0]->at(0, 1), this->mtx_0->at(0, 0, 1));
+    EXPECT_EQ(res[0]->at(1, 1), this->mtx_0->at(0, 1, 1));
+    EXPECT_EQ(res[0]->at(0, 2), mtx_10_clone->at(0, 2));
+    EXPECT_EQ(res[0]->at(1, 2), mtx_10_clone->at(1, 2));
+
+    EXPECT_EQ(res[1]->at(0, 0), mtx_11_clone->at(0, 0));
+    EXPECT_EQ(res[1]->at(1, 0), mtx_11_clone->at(1, 0));
+    EXPECT_EQ(res[1]->at(0, 1), this->mtx_0->at(1, 0, 1));
+    EXPECT_EQ(res[1]->at(1, 1), this->mtx_0->at(1, 1, 1));
+    EXPECT_EQ(res[1]->at(0, 2), mtx_11_clone->at(0, 2));
+    EXPECT_EQ(res[1]->at(1, 2), mtx_11_clone->at(1, 2));
+}
+
+
+TYPED_TEST(BatchDense, BatchScale)
+{
+    using T = typename TestFixture::value_type;
+    using Mtx = typename TestFixture::Mtx;
+    using BDiag = gko::matrix::BatchDiagonal<T>;
+
+    auto mtx(gko::batch_initialize<Mtx>(
+        {{I<T>{1.0, 0.0}, I<T>{2.0, 3.0}, I<T>{2.0, 4.0}},
+         {I<T>{-4.0, 2.0}, I<T>{-3.0, -2.0}, I<T>{0.0, 1.0}}},
+        this->exec));
+
+    auto left(gko::batch_diagonal_initialize(
+        I<I<T>>{I<T>{1.0, 2.0, 3.0}, I<T>{-1.0, -2.0, -3.0}}, this->exec));
+    auto rght(gko::batch_diagonal_initialize(
+        I<I<T>>{I<T>{-0.5, -2.0}, I<T>{2.0, 0.25}}, this->exec));
+
+    gko::kernels::reference::batch_dense::batch_scale(this->exec, left.get(),
+                                                      rght.get(), mtx.get());
+
+    EXPECT_EQ(mtx->at(0, 0, 0), T{-0.5});
+    EXPECT_EQ(mtx->at(0, 1, 0), T{-2.0});
+    EXPECT_EQ(mtx->at(0, 2, 0), T{-3.0});
+    EXPECT_EQ(mtx->at(0, 0, 1), T{0.0});
+    EXPECT_EQ(mtx->at(0, 1, 1), T{-12.0});
+    EXPECT_EQ(mtx->at(0, 2, 1), T{-24.0});
+
+    EXPECT_EQ(mtx->at(1, 0, 0), T{8.0});
+    EXPECT_EQ(mtx->at(1, 1, 0), T{12.0});
+    EXPECT_EQ(mtx->at(1, 2, 0), T{0.0});
+    EXPECT_EQ(mtx->at(1, 0, 1), T{-0.5});
+    EXPECT_EQ(mtx->at(1, 1, 1), T{1.0});
+    EXPECT_EQ(mtx->at(1, 2, 1), T{-0.75});
+}
+
+
+TYPED_TEST(BatchDense, ConvertsToPrecision)
+{
+    using BatchDense = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    using OtherT = typename gko::next_precision<T>;
+    using OtherBatchDense = typename gko::matrix::BatchDense<OtherT>;
+    auto tmp = OtherBatchDense::create(this->exec);
+    auto res = BatchDense::create(this->exec);
+    // If OtherT is more precise: 0, otherwise r
+    auto residual = r<OtherT>::value < r<T>::value
+                        ? gko::remove_complex<T>{0}
+                        : gko::remove_complex<T>{r<OtherT>::value};
+
+    this->mtx_1->convert_to(tmp.get());
+    tmp->convert_to(res.get());
+
+    auto ures = res->unbatch();
+    auto umtx = this->mtx_1->unbatch();
+    GKO_ASSERT_MTX_NEAR(umtx[0].get(), ures[0].get(), residual);
+    GKO_ASSERT_MTX_NEAR(umtx[1].get(), ures[1].get(), residual);
+}
+
+
+TYPED_TEST(BatchDense, MovesToPrecision)
+{
+    using BatchDense = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    using OtherT = typename gko::next_precision<T>;
+    using OtherBatchDense = typename gko::matrix::BatchDense<OtherT>;
+    auto tmp = OtherBatchDense::create(this->exec);
+    auto res = BatchDense::create(this->exec);
+    // If OtherT is more precise: 0, otherwise r
+    auto residual = r<OtherT>::value < r<T>::value
+                        ? gko::remove_complex<T>{0}
+                        : gko::remove_complex<T>{r<OtherT>::value};
+
+    this->mtx_1->move_to(tmp.get());
+    tmp->move_to(res.get());
+
+    auto ures = res->unbatch();
+    auto umtx = this->mtx_1->unbatch();
+    GKO_ASSERT_MTX_NEAR(umtx[0].get(), ures[0].get(), residual);
+    GKO_ASSERT_MTX_NEAR(umtx[1].get(), ures[1].get(), residual);
+}
+
+
+TYPED_TEST(BatchDense, ConvertsToCsr32)
+{
+    using T = typename TestFixture::value_type;
+    using BatchCsr = typename gko::matrix::BatchCsr<T, gko::int32>;
+    auto batch_csr_mtx = BatchCsr::create(this->mtx_6->get_executor());
+
+    this->mtx_6->convert_to(batch_csr_mtx.get());
+
+    auto v = batch_csr_mtx->get_const_values();
+    auto c = batch_csr_mtx->get_const_col_idxs();
+    auto r = batch_csr_mtx->get_const_row_ptrs();
+    ASSERT_EQ(batch_csr_mtx->get_num_batch_entries(), 2);
+    ASSERT_EQ(batch_csr_mtx->get_size().at(0), gko::dim<2>(3, 3));
+    ASSERT_EQ(batch_csr_mtx->get_size().at(1), gko::dim<2>(3, 3));
+    ASSERT_EQ(batch_csr_mtx->get_num_stored_elements(), 10);
+    EXPECT_EQ(r[0], 0);
+    EXPECT_EQ(r[1], 2);
+    EXPECT_EQ(r[2], 3);
+    EXPECT_EQ(r[3], 5);
+    EXPECT_EQ(c[0], 0);
+    EXPECT_EQ(c[1], 2);
+    EXPECT_EQ(c[2], 1);
+    EXPECT_EQ(c[3], 1);
+    EXPECT_EQ(c[4], 2);
+    EXPECT_EQ(v[0], T{1.0});
+    EXPECT_EQ(v[1], T{3.0});
+    EXPECT_EQ(v[2], T{3.0});
+    EXPECT_EQ(v[3], T{1.0});
+    EXPECT_EQ(v[4], T{5.0});
+    EXPECT_EQ(v[5], T{2.0});
+    EXPECT_EQ(v[6], T{5.0});
+    EXPECT_EQ(v[7], T{1.0});
+    EXPECT_EQ(v[8], T{-1.0});
+    EXPECT_EQ(v[9], T{8.0});
+}
+
+
+TYPED_TEST(BatchDense, MovesToCsr32)
+{
+    using T = typename TestFixture::value_type;
+    using BatchCsr = typename gko::matrix::BatchCsr<T, gko::int32>;
+    auto batch_csr_mtx = BatchCsr::create(this->mtx_6->get_executor());
+
+    this->mtx_6->move_to(batch_csr_mtx.get());
+
+    auto v = batch_csr_mtx->get_const_values();
+    auto c = batch_csr_mtx->get_const_col_idxs();
+    auto r = batch_csr_mtx->get_const_row_ptrs();
+    ASSERT_EQ(batch_csr_mtx->get_num_batch_entries(), 2);
+    ASSERT_EQ(batch_csr_mtx->get_size().at(0), gko::dim<2>(3, 3));
+    ASSERT_EQ(batch_csr_mtx->get_size().at(1), gko::dim<2>(3, 3));
+    ASSERT_EQ(batch_csr_mtx->get_num_stored_elements(), 10);
+    EXPECT_EQ(r[0], 0);
+    EXPECT_EQ(r[1], 2);
+    EXPECT_EQ(r[2], 3);
+    EXPECT_EQ(r[3], 5);
+    EXPECT_EQ(c[0], 0);
+    EXPECT_EQ(c[1], 2);
+    EXPECT_EQ(c[2], 1);
+    EXPECT_EQ(c[3], 1);
+    EXPECT_EQ(c[4], 2);
+    EXPECT_EQ(v[0], T{1.0});
+    EXPECT_EQ(v[1], T{3.0});
+    EXPECT_EQ(v[2], T{3.0});
+    EXPECT_EQ(v[3], T{1.0});
+    EXPECT_EQ(v[4], T{5.0});
+    EXPECT_EQ(v[5], T{2.0});
+    EXPECT_EQ(v[6], T{5.0});
+    EXPECT_EQ(v[7], T{1.0});
+    EXPECT_EQ(v[8], T{-1.0});
+    EXPECT_EQ(v[9], T{8.0});
+}
+
+
+TYPED_TEST(BatchDense, ConvertsEmptyToPrecision)
+{
+    using BatchDense = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    using OtherT = typename gko::next_precision<T>;
+    using OtherBatchDense = typename gko::matrix::BatchDense<OtherT>;
+    auto empty = OtherBatchDense::create(this->exec);
+    auto res = BatchDense::create(this->exec);
+
+    empty->convert_to(res.get());
+
+    ASSERT_FALSE(res->get_num_batch_entries());
+}
+
+
+TYPED_TEST(BatchDense, MovesEmptyToPrecision)
+{
+    using BatchDense = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    using OtherT = typename gko::next_precision<T>;
+    using OtherBatchDense = typename gko::matrix::BatchDense<OtherT>;
+    auto empty = OtherBatchDense::create(this->exec);
+    auto res = BatchDense::create(this->exec);
+
+    empty->move_to(res.get());
+
+    ASSERT_FALSE(res->get_num_batch_entries());
+}
+
+
+TYPED_TEST(BatchDense, ConvertsEmptyMatrixToCsr)
+{
+    using BatchDense = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    using BatchCsr = typename gko::matrix::BatchCsr<T, gko::int32>;
+    auto empty = BatchDense::create(this->exec);
+    auto res = BatchCsr::create(this->exec);
+
+    empty->convert_to(res.get());
+
+    ASSERT_EQ(res->get_num_stored_elements(), 0);
+    ASSERT_EQ(*res->get_const_row_ptrs(), 0);
+    ASSERT_FALSE(res->get_num_batch_entries());
+}
+
+
+TYPED_TEST(BatchDense, MovesEmptyMatrixToCsr)
+{
+    using BatchDense = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    using BatchCsr = typename gko::matrix::BatchCsr<T, gko::int32>;
+    auto empty = BatchDense::create(this->exec);
+    auto res = BatchCsr::create(this->exec);
+
+    empty->move_to(res.get());
+
+    ASSERT_EQ(res->get_num_stored_elements(), 0);
+    ASSERT_EQ(*res->get_const_row_ptrs(), 0);
+    ASSERT_FALSE(res->get_num_batch_entries());
+}
+
+
+TYPED_TEST(BatchDense, ConvertsToBatchDiagonal)
+{
+    using BDense = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    using BDiag = gko::matrix::BatchDiagonal<T>;
+    auto vec = gko::batch_initialize<BDense>(
+        {I<T>({2.0, 3.0, -1.0}), I<T>({1.0, -2.0, 8.0})}, this->exec);
+    auto diag = BDiag::create(this->exec);
+
+    vec->convert_to(diag.get());
+
+    auto check_sz = gko::batch_dim<2>{2, gko::dim<2>{3}};
+    ASSERT_EQ(diag->get_size(), check_sz);
+    auto diag_vals = diag->get_const_values();
+    ASSERT_EQ(diag_vals[0], T{2.0});
+    ASSERT_EQ(diag_vals[1], T{3.0});
+    ASSERT_EQ(diag_vals[2], T{-1.0});
+    ASSERT_EQ(diag_vals[3], T{1.0});
+    ASSERT_EQ(diag_vals[4], T{-2.0});
+    ASSERT_EQ(diag_vals[5], T{8.0});
+}
+
+
+TYPED_TEST(BatchDense, MovesToBatchDiagonal)
+{
+    using BDense = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    using BDiag = gko::matrix::BatchDiagonal<T>;
+    auto vec = gko::batch_initialize<BDense>(
+        {I<T>({2.0, 3.0, -1.0}), I<T>({1.0, -2.0, 8.0})}, this->exec);
+    auto vec_ptr = vec->get_const_values();
+    auto diag = BDiag::create(this->exec);
+
+    vec->move_to(diag.get());
+
+    auto check_sz = gko::batch_dim<2>{2, gko::dim<2>{3}};
+    ASSERT_EQ(diag->get_size(), check_sz);
+    auto diag_vals = diag->get_const_values();
+    ASSERT_EQ(diag_vals, vec_ptr);
+    ASSERT_NE(diag_vals, vec->get_const_values());
+    ASSERT_EQ(vec->get_num_batch_entries(), 0);
+}
+
+
+TYPED_TEST(BatchDense, SquareMatrixIsTransposable)
+{
+    using Mtx = typename TestFixture::Mtx;
+    auto trans = this->mtx_4->transpose();
+    auto trans_as_batch_dense = static_cast<Mtx*>(trans.get());
+
+    auto utb = trans_as_batch_dense->unbatch();
+    GKO_ASSERT_MTX_NEAR(utb[0].get(),
+                        l({{1.0, 6.0, 6.0}, {1.5, 1.0, 1.0}, {3.0, 5.0, 5.5}}),
+                        r<TypeParam>::value);
+    GKO_ASSERT_MTX_NEAR(
+        utb[1].get(), l({{2.0, 4.0, -1.25}, {-2.0, 3.0, 3.0}, {1.5, 2.2, 0.5}}),
+        r<TypeParam>::value);
+}
+
+
+TYPED_TEST(BatchDense, NonSquareMatrixIsTransposable)
+{
+    using Mtx = typename TestFixture::Mtx;
+    auto trans = this->mtx_5->transpose();
+    auto trans_as_batch_dense = static_cast<Mtx*>(trans.get());
+
+    auto utb = trans_as_batch_dense->unbatch();
+    GKO_ASSERT_MTX_NEAR(utb[0].get(), l({{1.0, 6.0, 7.0}, {1.5, 1.0, -4.5}}),
+                        r<TypeParam>::value);
+    GKO_ASSERT_MTX_NEAR(utb[1].get(), l({{2.0, 1.0, 4.0}, {-2.0, 3.0, 3.0}}),
+                        r<TypeParam>::value);
+}
+
+
+TYPED_TEST(BatchDense, SquareMatrixAddScaledIdentity)
+{
+    using T = typename TestFixture::value_type;
+    using Mtx = typename TestFixture::Mtx;
+    auto mtx = gko::batch_initialize<Mtx>(
+        {{I<T>({1.0, -1.0, 1.5}), I<T>({-2.0, 0.0, 3.0}),
+          I<T>({1.2, -0.5, 1.0})},
+         {{1.0, -2.0, -0.5}, {1.0, -2.5, 4.0}, {3.0, 0.0, -1.5}}},
+        this->exec);
+    auto alpha = gko::batch_initialize<Mtx>({{2.0}, {-2.0}}, this->exec);
+    auto beta = gko::batch_initialize<Mtx>({{3.0}, {-1.0}}, this->exec);
+    auto sol_mtx = gko::batch_initialize<Mtx>(
+        {{I<T>({5.0, -3.0, 4.5}), I<T>({-6.0, 2.0, 9.0}),
+          I<T>({3.6, -1.5, 5.0})},
+         {{-3.0, 2.0, 0.5}, {-1.0, 0.5, -4.0}, {-3.0, 0.0, -0.5}}},
+        this->exec);
+
+    mtx->add_scaled_identity(alpha.get(), beta.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(mtx, sol_mtx, r<T>::value);
+}
+
+
+}  // namespace
diff --git a/test/matrix/batch_vector_kernels.cpp b/test/matrix/batch_vector_kernels.cpp
new file mode 100644
index 00000000000..5d275dbea5b
--- /dev/null
+++ b/test/matrix/batch_vector_kernels.cpp
@@ -0,0 +1,433 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/batch_dense_kernels.hpp"
+
+
+#include <random>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/matrix/batch_dense.hpp>
+#include <ginkgo/core/matrix/batch_diagonal.hpp>
+
+
+#include "core/test/utils.hpp"
+#include "core/test/utils/batch.hpp"
+#include "test/utils/executor.hpp"
+
+
+#ifndef GKO_COMPILING_DPCPP
+
+
+class BatchDense : public CommonTestFixture {
+protected:
+    using vtype = double;
+    using Mtx = gko::matrix::BatchDense<vtype>;
+    using NormVector = gko::matrix::BatchDense<gko::remove_complex<vtype>>;
+    using ComplexMtx = gko::matrix::BatchDense<std::complex<vtype>>;
+
+    BatchDense() : rand_engine(15) {}
+
+    template <typename MtxType>
+    std::unique_ptr<MtxType> gen_mtx(const size_t batchsize, int num_rows,
+                                     int num_cols)
+    {
+        return gko::test::generate_uniform_batch_random_matrix<MtxType>(
+            batchsize, num_rows, num_cols,
+            std::uniform_int_distribution<>(num_cols, num_cols),
+            std::normal_distribution<>(-1.0, 1.0), rand_engine, false, ref);
+    }
+
+    void set_up_vector_data(gko::size_type num_vecs,
+                            bool different_alpha = false)
+    {
+        const int num_rows = 252;
+        x = gen_mtx<Mtx>(batch_size, num_rows, num_vecs);
+        y = gen_mtx<Mtx>(batch_size, num_rows, num_vecs);
+        if (different_alpha) {
+            alpha = gen_mtx<Mtx>(batch_size, 1, num_vecs);
+            beta = gen_mtx<Mtx>(batch_size, 1, num_vecs);
+        } else {
+            alpha = gko::batch_initialize<Mtx>(batch_size, {2.0}, ref);
+            beta = gko::batch_initialize<Mtx>(batch_size, {-0.5}, ref);
+        }
+        dx = Mtx::create(exec);
+        dx->copy_from(x.get());
+        dy = Mtx::create(exec);
+        dy->copy_from(y.get());
+        dalpha = Mtx::create(exec);
+        dalpha->copy_from(alpha.get());
+        dbeta = gko::clone(exec, beta.get());
+        expected = Mtx::create(
+            ref, gko::batch_dim<>(batch_size, gko::dim<2>{1, num_vecs}));
+        dresult = Mtx::create(
+            exec, gko::batch_dim<>(batch_size, gko::dim<2>{1, num_vecs}));
+    }
+
+    void set_up_apply_data(const int p = 1)
+    {
+        const int m = 35, n = 15;
+        x = gen_mtx<Mtx>(batch_size, m, n);
+        c_x = gen_mtx<ComplexMtx>(batch_size, m, n);
+        y = gen_mtx<Mtx>(batch_size, n, p);
+        expected = gen_mtx<Mtx>(batch_size, m, p);
+        alpha = gko::batch_initialize<Mtx>(batch_size, {2.0}, ref);
+        beta = gko::batch_initialize<Mtx>(batch_size, {-1.0}, ref);
+        square = gen_mtx<Mtx>(batch_size, x->get_size().at()[0],
+                              x->get_size().at()[0]);
+        dx = Mtx::create(exec);
+        dx->copy_from(x.get());
+        dc_x = ComplexMtx::create(exec);
+        dc_x->copy_from(c_x.get());
+        dy = Mtx::create(exec);
+        dy->copy_from(y.get());
+        dresult = Mtx::create(exec);
+        dresult->copy_from(expected.get());
+        dalpha = Mtx::create(exec);
+        dalpha->copy_from(alpha.get());
+        dbeta = Mtx::create(exec);
+        dbeta->copy_from(beta.get());
+        dsquare = Mtx::create(exec);
+        dsquare->copy_from(square.get());
+    }
+
+    std::ranlux48 rand_engine;
+
+    const size_t batch_size = 11;
+    std::unique_ptr<Mtx> x;
+    std::unique_ptr<ComplexMtx> c_x;
+    std::unique_ptr<Mtx> y;
+    std::unique_ptr<Mtx> alpha;
+    std::unique_ptr<Mtx> beta;
+    std::unique_ptr<Mtx> expected;
+    std::unique_ptr<Mtx> square;
+    std::unique_ptr<Mtx> dresult;
+    std::unique_ptr<Mtx> dx;
+    std::unique_ptr<ComplexMtx> dc_x;
+    std::unique_ptr<Mtx> dy;
+    std::unique_ptr<Mtx> dalpha;
+    std::unique_ptr<Mtx> dbeta;
+    std::unique_ptr<Mtx> dsquare;
+};
+
+
+TEST_F(BatchDense, SingleVectorAppyIsEquivalentToRef)
+{
+    set_up_apply_data(1);
+
+    x->apply(y.get(), expected.get());
+    dx->apply(dy.get(), dresult.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, 1e-14);
+}
+
+
+TEST_F(BatchDense, SingleVectorAdvancedAppyIsEquivalentToRef)
+{
+    set_up_apply_data(1);
+
+    x->apply(alpha.get(), y.get(), beta.get(), expected.get());
+    dx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, 1e-14);
+}
+
+
+TEST_F(BatchDense, SingleVectorAddScaledIsEquivalentToRef)
+{
+    set_up_vector_data(1);
+
+    x->add_scaled(alpha.get(), y.get());
+    dx->add_scaled(dalpha.get(), dy.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(dx, x, 1e-14);
+}
+
+
+TEST_F(BatchDense, SingleVectorAddScaleIsEquivalentToRef)
+{
+    set_up_vector_data(1);
+
+    x->add_scale(alpha.get(), y.get(), beta.get());
+    dx->add_scale(dalpha.get(), dy.get(), dbeta.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(dx, x, 1e-14);
+}
+
+
+TEST_F(BatchDense, MultipleVectorAddScaledIsEquivalentToRef)
+{
+    set_up_vector_data(20);
+
+    x->add_scaled(alpha.get(), y.get());
+    dx->add_scaled(dalpha.get(), dy.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(dx, x, 1e-14);
+}
+
+
+TEST_F(BatchDense, MultipleVectorAddScaleIsEquivalentToRef)
+{
+    set_up_vector_data(20);
+
+    x->add_scale(alpha.get(), y.get(), beta.get());
+    dx->add_scale(dalpha.get(), dy.get(), dbeta.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(dx, x, 1e-14);
+}
+
+
+TEST_F(BatchDense, MultipleVectorAddScaledWithDifferentAlphaIsEquivalentToRef)
+{
+    set_up_vector_data(20, true);
+
+    x->add_scaled(alpha.get(), y.get());
+    dx->add_scaled(dalpha.get(), dy.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(dx, x, 1e-14);
+}
+
+
+TEST_F(BatchDense, MultipleVectorAddScaleWithDifferentScalarsIsEquivalentToRef)
+{
+    set_up_vector_data(20, true);
+
+    x->add_scale(alpha.get(), y.get(), beta.get());
+    dx->add_scale(dalpha.get(), dy.get(), dbeta.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(dx, x, 1e-14);
+}
+
+
+TEST_F(BatchDense, SingleVectorScaleIsEquivalentToRef)
+{
+    set_up_vector_data(1);
+
+    x->scale(alpha.get());
+    dx->scale(dalpha.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(dx, x, 1e-14);
+}
+
+
+TEST_F(BatchDense, MultipleVectorScaleIsEquivalentToRef)
+{
+    set_up_vector_data(20);
+
+    x->scale(alpha.get());
+    dx->scale(dalpha.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(dx, x, 1e-14);
+}
+
+
+TEST_F(BatchDense, MultipleVectorScaleWithDifferentAlphaIsEquivalentToRef)
+{
+    set_up_vector_data(20, true);
+
+    x->scale(alpha.get());
+    dx->scale(dalpha.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(dx, x, 1e-14);
+}
+
+
+TEST_F(BatchDense, ComputeNorm2SingleIsEquivalentToRef)
+{
+    set_up_vector_data(1);
+    auto norm_size =
+        gko::batch_dim<>(batch_size, gko::dim<2>{1, x->get_size().at()[1]});
+    auto norm_expected = NormVector::create(this->ref, norm_size);
+    auto dnorm = NormVector::create(this->exec, norm_size);
+
+    x->compute_norm2(norm_expected.get());
+    dx->compute_norm2(dnorm.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(norm_expected, dnorm, 1e-14);
+}
+
+
+TEST_F(BatchDense, ComputeNorm2IsEquivalentToRef)
+{
+    set_up_vector_data(20);
+    auto norm_size =
+        gko::batch_dim<>(batch_size, gko::dim<2>{1, x->get_size().at()[1]});
+    auto norm_expected = NormVector::create(this->ref, norm_size);
+    auto dnorm = NormVector::create(this->exec, norm_size);
+
+    x->compute_norm2(norm_expected.get());
+    dx->compute_norm2(dnorm.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(norm_expected, dnorm, 1e-14);
+}
+
+
+TEST_F(BatchDense, ComputeDotIsEquivalentToRef)
+{
+    set_up_vector_data(20);
+    auto dot_size =
+        gko::batch_dim<>(batch_size, gko::dim<2>{1, x->get_size().at()[1]});
+    auto dot_expected = Mtx::create(this->ref, dot_size);
+    auto ddot = Mtx::create(this->exec, dot_size);
+
+    x->compute_dot(y.get(), dot_expected.get());
+    dx->compute_dot(dy.get(), ddot.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(dot_expected, ddot, 1e-14);
+}
+
+
+TEST_F(BatchDense, ComputeDotSingleIsEquivalentToRef)
+{
+    set_up_vector_data(1);
+    auto dot_size =
+        gko::batch_dim<>(batch_size, gko::dim<2>{1, x->get_size().at()[1]});
+    auto dot_expected = Mtx::create(this->ref, dot_size);
+    auto ddot = Mtx::create(this->exec, dot_size);
+
+    x->compute_dot(y.get(), dot_expected.get());
+    dx->compute_dot(dy.get(), ddot.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(dot_expected, ddot, 1e-14);
+}
+
+
+TEST_F(BatchDense, CopySingleIsEquivalentToRef)
+{
+    set_up_vector_data(1);
+
+    gko::kernels::reference::batch_dense::copy(this->ref, x.get(), y.get());
+    gko::kernels::EXEC_NAMESPACE::batch_dense::copy(this->exec, dx.get(),
+                                                    dy.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(dy, y, 0.0);
+}
+
+
+TEST_F(BatchDense, CopyIsEquivalentToRef)
+{
+    set_up_vector_data(20);
+
+    gko::kernels::reference::batch_dense::copy(this->ref, x.get(), y.get());
+    gko::kernels::EXEC_NAMESPACE::batch_dense::copy(this->exec, dx.get(),
+                                                    dy.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(dy, y, 0.0);
+}
+
+
+TEST_F(BatchDense, BatchScaleIsEquivalentToRef)
+{
+    using BDiag = gko::matrix::BatchDiagonal<vtype>;
+    const int num_rhs = 20;
+    set_up_vector_data(num_rhs);
+
+    const int num_rows_in_mat = x->get_size().at(0)[0];
+    const auto left =
+        gen_mtx<BDiag>(batch_size, num_rows_in_mat, num_rows_in_mat);
+    const auto rght = gen_mtx<BDiag>(batch_size, num_rhs, num_rhs);
+    auto dleft = BDiag::create(this->exec);
+    dleft->copy_from(left.get());
+    auto drght = BDiag::create(this->exec);
+    drght->copy_from(rght.get());
+
+    gko::kernels::reference::batch_dense::batch_scale(this->ref, left.get(),
+                                                      rght.get(), x.get());
+    gko::kernels::EXEC_NAMESPACE::batch_dense::batch_scale(
+        this->exec, dleft.get(), drght.get(), dx.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(dx, x, 1e-14);
+}
+
+
+TEST_F(BatchDense, TransposeIsEquivalentToRef)
+{
+    const int nrows = 11;
+    const int ncols = 6;
+    const size_t nbatch = 5;
+    const auto orig = gen_mtx<Mtx>(nbatch, nrows, ncols);
+    auto corig = Mtx::create(exec);
+    corig->copy_from(orig.get());
+
+    auto trans = orig->transpose();
+    auto ctrans = corig->transpose();
+
+    auto dtrans = static_cast<const Mtx*>(trans.get());
+    auto dctrans = static_cast<const Mtx*>(ctrans.get());
+    GKO_ASSERT_BATCH_MTX_NEAR(dtrans, dctrans, 0.0);
+}
+
+
+TEST_F(BatchDense, ConjugateTransposeIsEquivalentToRef)
+{
+    const int nrows = 11;
+    const int ncols = 6;
+    const size_t nbatch = 5;
+    const auto orig = gen_mtx<Mtx>(nbatch, nrows, ncols);
+    auto corig = Mtx::create(exec);
+    corig->copy_from(orig.get());
+
+    auto trans = orig->conj_transpose();
+    auto ctrans = corig->conj_transpose();
+
+    auto dtrans = static_cast<const Mtx*>(trans.get());
+    auto dctrans = static_cast<const Mtx*>(ctrans.get());
+    GKO_ASSERT_BATCH_MTX_NEAR(dtrans, dctrans, 0.0);
+}
+
+
+TEST_F(BatchDense, AddScaledIdentityNonSquareIsEquivalentToReference)
+{
+    set_up_apply_data();
+    const gko::size_type batchsize = 10;
+    const gko::size_type num_rows = 62;
+    const gko::size_type num_cols = 51;
+    auto rmtx = gko::test::generate_uniform_batch_random_matrix<Mtx>(
+        batchsize, num_rows, num_cols,
+        std::uniform_int_distribution<>(num_cols, num_cols),
+        std::normal_distribution<>(-1.0, 1.0), rand_engine, true, ref);
+    auto dmtx = Mtx::create(exec);
+    dmtx->copy_from(rmtx.get());
+
+    rmtx->add_scaled_identity(alpha.get(), beta.get());
+    dmtx->add_scaled_identity(dalpha.get(), dbeta.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(rmtx, dmtx, 1e-15)
+}
+
+
+#endif

From f74a8b90986590153f82bfe8fd6ac8c348b4f586 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Fri, 30 Jun 2023 18:13:59 +0200
Subject: [PATCH 105/583] Move batch_dim to separate class and simplify

---
 core/test/base/batch_dim.cpp           |  92 ++++++++++++
 include/ginkgo/core/base/batch_dim.hpp | 152 ++++++++++++++++++++
 include/ginkgo/core/base/dim.hpp       | 186 -------------------------
 3 files changed, 244 insertions(+), 186 deletions(-)
 create mode 100644 core/test/base/batch_dim.cpp
 create mode 100644 include/ginkgo/core/base/batch_dim.hpp

diff --git a/core/test/base/batch_dim.cpp b/core/test/base/batch_dim.cpp
new file mode 100644
index 00000000000..f4361195d7c
--- /dev/null
+++ b/core/test/base/batch_dim.cpp
@@ -0,0 +1,92 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <memory>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/batch_dim.hpp>
+
+
+TEST(BatchDim, ConstructsCorrectUniformObject)
+{
+    gko::batch_dim<2> d{4, gko::dim<2>(5)};
+
+    ASSERT_EQ(d.get_num_batch_entries(), 4);
+    ASSERT_EQ(d.get_common_size(), gko::dim<2>(5));
+}
+
+
+TEST(BatchDim, ConstructsNullObject)
+{
+    gko::batch_dim<2> d{};
+
+    ASSERT_EQ(d.get_num_batch_entries(), 0);
+    ASSERT_EQ(d.get_common_size(), gko::dim<2>{});
+}
+
+
+TEST(BatchDim, EqualityReturnsTrueWhenEqual)
+{
+    ASSERT_TRUE(gko::batch_dim<2>(2, gko::dim<2>{3}) ==
+                gko::batch_dim<2>(2, gko::dim<2>{3}));
+}
+
+
+TEST(BatchDim, EqualityReturnsFalseWhenDifferentNumBatches)
+{
+    ASSERT_FALSE(gko::batch_dim<2>(3, gko::dim<2>{3}) ==
+                 gko::batch_dim<2>(2, gko::dim<2>{3}));
+}
+
+
+TEST(BatchDim, EqualityReturnsFalseWhenDifferentBatchSizes)
+{
+    ASSERT_FALSE(gko::batch_dim<2>(3, gko::dim<2>{3}) ==
+                 gko::batch_dim<2>(3, gko::dim<2>{4}));
+}
+
+
+TEST(BatchDim, NotEqualWorks)
+{
+    ASSERT_TRUE(gko::batch_dim<2>(3, gko::dim<2>{3}) !=
+                gko::batch_dim<2>(3, gko::dim<2>{4}));
+}
+
+
+TEST(BatchDim, TransposesBatchDimensions)
+{
+    ASSERT_EQ(gko::transpose(gko::batch_dim<2>(2, gko::dim<2>{4, 2})),
+              gko::batch_dim<2>(2, gko::dim<2>{2, 4}));
+}
diff --git a/include/ginkgo/core/base/batch_dim.hpp b/include/ginkgo/core/base/batch_dim.hpp
new file mode 100644
index 00000000000..211225d7df2
--- /dev/null
+++ b/include/ginkgo/core/base/batch_dim.hpp
@@ -0,0 +1,152 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_PUBLIC_CORE_BASE_DIM_HPP_
+#define GKO_PUBLIC_CORE_BASE_DIM_HPP_
+
+
+#include <iostream>
+
+
+#include <ginkgo/core/base/dim.hpp>
+#include <ginkgo/core/base/types.hpp>
+
+
+namespace gko {
+
+
+/**
+ * A type representing the dimensions of a multidimensional batch object.
+ *
+ * @tparam Dimensionality  number of dimensions of the object
+ * @tparam DimensionType  datatype used to represent each dimension
+ *
+ * @ingroup batch_dim
+ */
+template <size_type Dimensionality = 2, typename DimensionType = size_type>
+struct batch_dim {
+    static constexpr size_type dimensionality = Dimensionality;
+    using dimension_type = DimensionType;
+
+    /**
+     * Get the number of batch entries stored
+     *
+     * @return num_batch_entries
+     */
+    size_type get_num_batch_entries() const { return num_batch_entries_; }
+
+    /**
+     * Get the common size of the batches
+     *
+     * @return common_size
+     */
+    dim<dimensionality, dimension_type> get_common_size() const
+    {
+        return common_size_;
+    }
+
+    /**
+     * Checks if two batch_dim objects are equal.
+     *
+     * @param x  first object
+     * @param y  second object
+     *
+     * @return true if and only if all dimensions of both objects are equal.
+     */
+    friend bool operator==(const batch_dim& x, const batch_dim& y)
+    {
+        return x.num_batch_entries_ == y.num_batch_entries_ &&
+               x.common_size_ == y.common_size_;
+    }
+
+    /**
+     * Creates a batch_dim object which stores a uniform size for all batch
+     * entries.
+     *
+     * @param num_batch_entries  number of batch entries to be stored
+     * @param common_size  the common size of all the batch entries stored
+     *
+     * @note  Use this constructor when uniform batches need to be stored.
+     */
+    explicit batch_dim(const size_type num_batch_entries = 0,
+                       const dim<dimensionality, dimension_type>& common_size =
+                           dim<dimensionality, dimension_type>{})
+        : common_size_(common_size), num_batch_entries_(num_batch_entries)
+    {}
+
+private:
+    size_type num_batch_entries_{};
+    dim<dimensionality, dimension_type> common_size_{};
+};
+
+
+/**
+ * Checks if two batch dim objects are different.
+ *
+ * @tparam Dimensionality  number of dimensions of the dim objects
+ * @tparam DimensionType  datatype used to represent each dimension
+ *
+ * @param x  first object
+ * @param y  second object
+ *
+ * @return `!(x == y)`
+ */
+template <size_type Dimensionality, typename DimensionType>
+inline bool operator!=(const batch_dim<Dimensionality, DimensionType>& x,
+                       const batch_dim<Dimensionality, DimensionType>& y)
+{
+    return !(x == y);
+}
+
+
+/**
+ * Returns a batch_dim object with its dimensions swapped for batched operators
+ *
+ * @tparam DimensionType  datatype used to represent each dimension
+ *
+ * @param dimensions original object
+ *
+ * @return a batch_dim object with dimensions swapped
+ */
+template <typename DimensionType>
+inline batch_dim<2, DimensionType> transpose(
+    const batch_dim<2, DimensionType>& input)
+{
+    return batch_dim<2, DimensionType>(input.get_num_batch_entries(),
+                                       gko::transpose(input.get_common_size()));
+}
+
+
+}  // namespace gko
+
+
+#endif  // GKO_PUBLIC_CORE_BASE_BATCH_DIM_HPP_
diff --git a/include/ginkgo/core/base/dim.hpp b/include/ginkgo/core/base/dim.hpp
index ae13290cdd2..c70c5f054ec 100644
--- a/include/ginkgo/core/base/dim.hpp
+++ b/include/ginkgo/core/base/dim.hpp
@@ -243,144 +243,6 @@ struct dim<1u, DimensionType> {
 };
 
 
-/**
- * A type representing the dimensions of a multidimensional batch object.
- *
- * @tparam Dimensionality  number of dimensions of the object
- * @tparam DimensionType  datatype used to represent each dimension
- *
- * @ingroup batch_dim
- */
-template <size_type Dimensionality = 2, typename DimensionType = size_type>
-struct batch_dim {
-    static constexpr size_type dimensionality = Dimensionality;
-    using dimension_type = DimensionType;
-
-    /**
-     * Checks if the batch_dim object stores equal sizes.
-     *
-     * @return bool representing whether equal sizes are being stored
-     */
-    bool stores_equal_sizes() const { return equal_sizes_; }
-
-    /**
-     * Get the number of batch entries stored
-     *
-     * @return num_batch_entries
-     */
-    size_type get_num_batch_entries() const { return num_batch_entries_; }
-
-    /**
-     * Get the sizes of all entries as a std::vector.
-     *
-     * @return  the std::vector of batch sizes
-     */
-    std::vector<dim<dimensionality, dimension_type>> get_batch_sizes() const
-    {
-        if (equal_sizes_) {
-            if (num_batch_entries_ > 0) {
-                return std::vector<dim<dimensionality, dimension_type>>(
-                    num_batch_entries_, common_size_);
-            } else {
-                return std::vector<dim<dimensionality, dimension_type>>{
-                    common_size_};
-            }
-        } else {
-            return sizes_;
-        }
-    }
-
-    /**
-     * Get the batch size at a particular index.
-     *
-     * @param batch_entry  the index of the entry whose size is needed
-     *
-     * @return  the size of the batch entry at the requested batch-index
-     */
-    const dim<dimensionality, dimension_type>& at(
-        const size_type batch_entry = 0) const
-    {
-        if (equal_sizes_) {
-            return common_size_;
-        } else {
-            GKO_ASSERT(batch_entry < num_batch_entries_);
-            return sizes_[batch_entry];
-        }
-    }
-
-    /**
-     * Checks if two batch_dim objects are equal.
-     *
-     * @param x  first object
-     * @param y  second object
-     *
-     * @return true if and only if all dimensions of both objects are equal.
-     */
-    friend bool operator==(const batch_dim& x, const batch_dim& y)
-    {
-        if (x.equal_sizes_ && y.equal_sizes_) {
-            return x.num_batch_entries_ == y.num_batch_entries_ &&
-                   x.common_size_ == y.common_size_;
-        } else {
-            return x.sizes_ == y.sizes_;
-        }
-    }
-
-    /**
-     * Creates a batch_dim object which stores a uniform size for all batch
-     * entries.
-     *
-     * @param num_batch_entries  number of batch entries to be stored
-     * @param common_size  the common size of all the batch entries stored
-     *
-     * @note  Use this constructor when uniform batches need to be stored.
-     */
-    explicit batch_dim(const size_type num_batch_entries = 0,
-                       const dim<dimensionality, dimension_type>& common_size =
-                           dim<dimensionality, dimension_type>{})
-        : equal_sizes_(true),
-          common_size_(common_size),
-          num_batch_entries_(num_batch_entries),
-          sizes_()
-    {}
-
-    /**
-     * Creates a batch_dim object which stores possibly non-uniform sizes for
-     * the different batch entries.
-     *
-     * @param batch_sizes  the std::vector object that stores the batch_sizes
-     *
-     * @note  Use this constructor when non-uniform batches need to be stored.
-     */
-    batch_dim(
-        const std::vector<dim<dimensionality, dimension_type>>& batch_sizes)
-        : equal_sizes_(false),
-          common_size_(dim<dimensionality, dimension_type>{}),
-          num_batch_entries_(batch_sizes.size()),
-          sizes_(batch_sizes)
-    {
-        check_size_equality();
-    }
-
-private:
-    void check_size_equality()
-    {
-        for (size_type i = 0; i < num_batch_entries_; ++i) {
-            if (!(sizes_[i] == sizes_[0])) {
-                return;
-            }
-        }
-        common_size_ = sizes_[0];
-        equal_sizes_ = true;
-    }
-
-    bool equal_sizes_{};
-    size_type num_batch_entries_{};
-    dim<dimensionality, dimension_type> common_size_{};
-    std::vector<dim<dimensionality, dimension_type>> sizes_{};
-};
-
-
 /**
  * Checks if two dim objects are different.
  *
@@ -418,54 +280,6 @@ constexpr GKO_ATTRIBUTES GKO_INLINE dim<2, DimensionType> transpose(
 }
 
 
-/**
- * Checks if two batch dim objects are different.
- *
- * @tparam Dimensionality  number of dimensions of the dim objects
- * @tparam DimensionType  datatype used to represent each dimension
- *
- * @param x  first object
- * @param y  second object
- *
- * @return `!(x == y)`
- */
-template <size_type Dimensionality, typename DimensionType>
-inline bool operator!=(const batch_dim<Dimensionality, DimensionType>& x,
-                       const batch_dim<Dimensionality, DimensionType>& y)
-{
-    return !(x == y);
-}
-
-
-/**
- * Returns a batch_dim object with its dimensions swapped for batched operators
- *
- * @tparam DimensionType  datatype used to represent each dimension
- *
- * @param dimensions original object
- *
- * @return a batch_dim object with the individual batches having their
- *         dimensions swapped
- */
-template <typename DimensionType>
-inline batch_dim<2, DimensionType> transpose(
-    const batch_dim<2, DimensionType>& input)
-{
-    batch_dim<2, DimensionType> out{};
-    if (input.stores_equal_sizes()) {
-        out = batch_dim<2, DimensionType>(input.get_num_batch_entries(),
-                                          gko::transpose(input.at(0)));
-        return out;
-    }
-    auto trans =
-        std::vector<dim<2, DimensionType>>(input.get_num_batch_entries());
-    for (size_type i = 0; i < trans.size(); ++i) {
-        trans[i] = transpose(input.at(i));
-    }
-    return batch_dim<2, DimensionType>(trans);
-}
-
-
 }  // namespace gko
 
 

From bf211ef2fa7a08c3fe10de65770e40d527ac69a8 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Mon, 3 Jul 2023 14:27:28 +0200
Subject: [PATCH 106/583] WIP batch_vector updates

---
 core/device_hooks/common_kernels.inc.cpp      |  13 +
 core/matrix/batch_struct.hpp                  | 143 +++++
 core/matrix/batch_vector.cpp                  | 287 ++--------
 core/matrix/batch_vector_kernels.hpp          | 251 ++-------
 core/test/matrix/CMakeLists.txt               |   1 +
 .../{batch_dense.cpp => batch_vector.cpp}     | 110 ++--
 cuda/matrix/batch_struct.hpp                  | 118 +++++
 cuda/matrix/batch_vector_kernels.cu           | 303 +----------
 hip/matrix/batch_struct.hip.hpp               | 120 +++++
 hip/matrix/batch_vector_kernels.hip.cpp       | 306 +----------
 include/ginkgo/core/matrix/batch_vector.hpp   | 297 +++++------
 omp/matrix/batch_vector_kernels.cpp           | 497 +-----------------
 reference/matrix/batch_struct.hpp             | 120 +++++
 reference/matrix/batch_vector_kernels.cpp     | 476 +----------------
 reference/matrix/batch_vector_kernels.hpp.inc |  70 +--
 .../test/matrix/batch_vector_kernels.cpp      | 164 +++---
 test/matrix/batch_vector_kernels.cpp          |  68 +--
 17 files changed, 1012 insertions(+), 2332 deletions(-)
 create mode 100644 core/matrix/batch_struct.hpp
 rename core/test/matrix/{batch_dense.cpp => batch_vector.cpp} (84%)
 create mode 100644 cuda/matrix/batch_struct.hpp
 create mode 100644 hip/matrix/batch_struct.hip.hpp
 create mode 100644 reference/matrix/batch_struct.hpp

diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp
index f1af9318f9f..a5aa43100a3 100644
--- a/core/device_hooks/common_kernels.inc.cpp
+++ b/core/device_hooks/common_kernels.inc.cpp
@@ -272,6 +272,19 @@ GKO_STUB_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE(GKO_DECLARE_BUILD_LOCAL_NONLOCAL);
 }  // namespace distributed_matrix
 
 
+namespace batch_vector {
+
+
+GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_SCALE_KERNEL);
+GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_ADD_SCALED_KERNEL);
+GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_COMPUTE_DOT_KERNEL);
+GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_COMPUTE_NORM2_KERNEL);
+GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_COPY_KERNEL);
+
+
+}  // namespace batch_vector
+
+
 namespace dense {
 
 
diff --git a/core/matrix/batch_struct.hpp b/core/matrix/batch_struct.hpp
new file mode 100644
index 00000000000..01092f0e4d0
--- /dev/null
+++ b/core/matrix/batch_struct.hpp
@@ -0,0 +1,143 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CORE_MATRIX_BATCH_STRUCT_HPP_
+#define GKO_CORE_MATRIX_BATCH_STRUCT_HPP_
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/lin_op.hpp>
+#include <ginkgo/core/base/types.hpp>
+
+
+namespace gko {
+namespace batch_vector {
+
+
+/**
+ * Encapsulates one matrix from a batch of dense matrices (vectors).
+ */
+template <typename ValueType>
+struct BatchEntry {
+    using value_type = ValueType;
+    ValueType* values;
+    size_type stride;
+    int num_rows;
+    int num_rhs;
+};
+
+/**
+ * A 'simple' structure to store a global uniform batch of dense matrices.
+ *
+ * It is uniform in the sense that all matrices in the batch have common sizes.
+ */
+template <typename ValueType>
+struct UniformBatch {
+    using value_type = ValueType;
+    using entry_type = BatchEntry<ValueType>;
+
+    ValueType* values;    ///< Concatenated values of all matrices in the batch
+    size_type num_batch;  ///< Number of matrices in the batch
+    size_type stride;     ///< Common stride of each dense matrix
+    int num_rows;         ///< Common number of rows in each matrix
+    int num_rhs;          ///< Common number of columns of each matrix
+    int num_nnz;          ///< Common number of non-zeros of each matrix, ie.,
+                          ///< the number or rows times the number of columns
+
+    size_type get_entry_storage() const { return num_nnz * sizeof(value_type); }
+};
+
+
+}  // namespace batch_vector
+
+
+namespace batch {
+
+
+template <typename ValueType>
+GKO_ATTRIBUTES GKO_INLINE gko::batch_vector::BatchEntry<const ValueType>
+to_const(const gko::batch_vector::BatchEntry<ValueType>& b)
+{
+    return {b.values, b.stride, b.num_rows, b.num_rhs};
+}
+
+
+template <typename ValueType>
+GKO_ATTRIBUTES GKO_INLINE gko::batch_vector::UniformBatch<const ValueType>
+to_const(const gko::batch_vector::UniformBatch<ValueType>& ub)
+{
+    return {ub.values, ub.num_batch, ub.stride, ub.num_rows, ub.num_rhs};
+}
+
+
+/**
+ * Extract one object (matrix, vector etc.) from a batch of objects
+ *
+ * This overload is for batch dense matrices.
+ * These overloads are intended to be called from within a kernel.
+ *
+ * @param batch  The batch of objects to extract from
+ * @param batch_idx  The position of the desired object in the batch
+ */
+template <typename ValueType>
+GKO_ATTRIBUTES GKO_INLINE batch_vector::BatchEntry<ValueType> batch_entry(
+    const batch_vector::UniformBatch<ValueType>& batch,
+    const size_type batch_idx)
+{
+    return {batch.values + batch_idx * batch.stride * batch.num_rows,
+            batch.stride, batch.num_rows, batch.num_rhs};
+}
+
+template <typename ValueType>
+GKO_ATTRIBUTES GKO_INLINE batch_vector::BatchEntry<ValueType> batch_entry(
+    ValueType* const batch_values, const size_type stride, const int num_rows,
+    const int num_rhs, const size_type batch_idx)
+{
+    return {batch_values + batch_idx * stride * num_rows, stride, num_rows,
+            num_rhs};
+}
+
+template <typename ValueType>
+GKO_ATTRIBUTES GKO_INLINE ValueType* batch_entry_ptr(
+    ValueType* const batch_start, const size_type stride, const int num_rows,
+    const size_type batch_idx)
+{
+    return batch_start + batch_idx * stride * num_rows;
+}
+
+
+}  // namespace batch
+
+
+}  // namespace gko
+
+#endif  // GKO_CORE_MATRIX_BATCH_STRUCT_HPP_
diff --git a/core/matrix/batch_vector.cpp b/core/matrix/batch_vector.cpp
index 4449516d5a1..abacd9b1cd8 100644
--- a/core/matrix/batch_vector.cpp
+++ b/core/matrix/batch_vector.cpp
@@ -30,7 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include <ginkgo/core/matrix/batch_dense.hpp>
+#include <ginkgo/core/matrix/batch_vector.hpp>
 
 
 #include <algorithm>
@@ -43,93 +43,30 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/executor.hpp>
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/base/utils.hpp>
-#include <ginkgo/core/matrix/batch_csr.hpp>
-#include <ginkgo/core/matrix/batch_diagonal.hpp>
-#include <ginkgo/core/matrix/batch_identity.hpp>
 
 
-#include "core/matrix/batch_dense_kernels.hpp"
+#include "core/matrix/batch_vector_kernels.hpp"
 
 
 namespace gko {
 namespace matrix {
-namespace batch_dense {
-
-
-GKO_REGISTER_OPERATION(simple_apply, batch_dense::simple_apply);
-GKO_REGISTER_OPERATION(apply, batch_dense::apply);
-GKO_REGISTER_OPERATION(scale, batch_dense::scale);
-GKO_REGISTER_OPERATION(add_scaled, batch_dense::add_scaled);
-GKO_REGISTER_OPERATION(add_scale, batch_dense::add_scale);
-GKO_REGISTER_OPERATION(convergence_add_scaled,
-                       batch_dense::convergence_add_scaled);
-GKO_REGISTER_OPERATION(add_scaled_diag, batch_dense::add_scaled_diag);
-GKO_REGISTER_OPERATION(compute_dot, batch_dense::compute_dot);
-GKO_REGISTER_OPERATION(convergence_compute_dot,
-                       batch_dense::convergence_compute_dot);
-GKO_REGISTER_OPERATION(compute_norm2, batch_dense::compute_norm2);
-GKO_REGISTER_OPERATION(convergence_compute_norm2,
-                       batch_dense::convergence_compute_norm2);
-GKO_REGISTER_OPERATION(copy, batch_dense::copy);
-GKO_REGISTER_OPERATION(convergence_copy, batch_dense::convergence_copy);
-GKO_REGISTER_OPERATION(convert_to_batch_csr, batch_dense::convert_to_batch_csr);
-GKO_REGISTER_OPERATION(count_nonzeros, batch_dense::count_nonzeros);
-GKO_REGISTER_OPERATION(calculate_max_nnz_per_row,
-                       batch_dense::calculate_max_nnz_per_row);
-GKO_REGISTER_OPERATION(calculate_nonzeros_per_row,
-                       batch_dense::calculate_nonzeros_per_row);
-GKO_REGISTER_OPERATION(calculate_total_cols, batch_dense::calculate_total_cols);
-GKO_REGISTER_OPERATION(transpose, batch_dense::transpose);
-GKO_REGISTER_OPERATION(conj_transpose, batch_dense::conj_transpose);
-GKO_REGISTER_OPERATION(add_scaled_identity, batch_dense::add_scaled_identity);
-
-
-}  // namespace batch_dense
+namespace batch_vector {
 
 
-template <typename ValueType>
-void BatchDense<ValueType>::apply_impl(const BatchLinOp* b, BatchLinOp* x) const
-{
-    // TODO: Remove this when non-uniform batching kernels have been
-    // implemented
-    if (!this->get_size().stores_equal_sizes() ||
-        !this->get_stride().stores_equal_strides()) {
-        GKO_NOT_IMPLEMENTED;
-    }
-    this->get_executor()->run(batch_dense::make_simple_apply(
-        this, as<BatchDense<ValueType>>(b), as<BatchDense<ValueType>>(x)));
-}
+GKO_REGISTER_OPERATION(scale, batch_vector::scale);
+GKO_REGISTER_OPERATION(add_scaled, batch_vector::add_scaled);
+GKO_REGISTER_OPERATION(compute_dot, batch_vector::compute_dot);
+GKO_REGISTER_OPERATION(compute_norm2, batch_vector::compute_norm2);
+GKO_REGISTER_OPERATION(copy, batch_vector::copy);
 
 
-template <typename ValueType>
-void BatchDense<ValueType>::apply_impl(const BatchLinOp* alpha,
-                                       const BatchLinOp* b,
-                                       const BatchLinOp* beta,
-                                       BatchLinOp* x) const
-{
-    if (!this->get_size().stores_equal_sizes() ||
-        !this->get_stride().stores_equal_strides()) {
-        GKO_NOT_IMPLEMENTED;
-    }
-    if (auto bid = dynamic_cast<const BatchIdentity<ValueType>*>(b)) {
-        if (auto xdense = dynamic_cast<BatchDense<ValueType>*>(x)) {
-            xdense->add_scale(alpha, this, beta);
-        } else {
-            GKO_NOT_SUPPORTED(x);
-        }
-    } else {
-        this->get_executor()->run(batch_dense::make_apply(
-            as<BatchDense<ValueType>>(alpha), this,
-            as<BatchDense<ValueType>>(b), as<BatchDense<ValueType>>(beta),
-            as<BatchDense<ValueType>>(x)));
-    }
-}
+}  // namespace batch_vector
 
 
 template <typename ValueType>
-void BatchDense<ValueType>::scale_impl(const BatchLinOp* alpha)
+void BatchVector<ValueType>::scale_impl(const BatchLinOp* alpha)
 {
-    auto batch_alpha = as<BatchDense<ValueType>>(alpha);
+    auto batch_alpha = as<BatchVector<ValueType>>(alpha);
     GKO_ASSERT_BATCH_EQUAL_ROWS(
         batch_alpha, batch_dim<2>(this->get_num_batch_entries(), dim<2>(1, 1)));
     for (size_type b = 0; b < batch_alpha->get_num_batch_entries(); ++b) {
@@ -139,16 +76,16 @@ void BatchDense<ValueType>::scale_impl(const BatchLinOp* alpha)
         }
     }
     auto exec = this->get_executor();
-    exec->run(batch_dense::make_scale(batch_alpha, this));
+    exec->run(batch_vector::make_scale(batch_alpha, this));
 }
 
 
 template <typename ValueType>
-void BatchDense<ValueType>::add_scaled_impl(const BatchLinOp* alpha,
-                                            const BatchLinOp* b)
+void BatchVector<ValueType>::add_scaled_impl(const BatchLinOp* alpha,
+                                             const BatchLinOp* b)
 {
-    auto batch_alpha = as<BatchDense<ValueType>>(alpha);
-    auto batch_b = as<BatchDense<ValueType>>(b);
+    auto batch_alpha = as<BatchVector<ValueType>>(alpha);
+    auto batch_b = as<BatchVector<ValueType>>(b);
     GKO_ASSERT_BATCH_EQUAL_ROWS(
         batch_alpha, batch_dim<2>(this->get_num_batch_entries(), dim<2>(1, 1)));
     for (size_type b = 0; b < batch_alpha->get_num_batch_entries(); ++b) {
@@ -160,37 +97,7 @@ void BatchDense<ValueType>::add_scaled_impl(const BatchLinOp* alpha,
     GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(this, batch_b);
     auto exec = this->get_executor();
 
-    exec->run(batch_dense::make_add_scaled(batch_alpha, batch_b, this));
-}
-
-
-template <typename ValueType>
-void BatchDense<ValueType>::add_scale(const BatchLinOp* const alpha,
-                                      const BatchLinOp* const a,
-                                      const BatchLinOp* const beta)
-{
-    auto batch_alpha = as<BatchDense<ValueType>>(alpha);
-    auto batch_beta = as<BatchDense<ValueType>>(beta);
-    auto batch_a = as<BatchDense<ValueType>>(a);
-    GKO_ASSERT_BATCH_EQUAL_ROWS(
-        batch_alpha, batch_dim<2>(this->get_num_batch_entries(), dim<2>(1, 1)));
-    if (batch_alpha->get_size().stores_equal_sizes()) {
-        if (batch_alpha->get_size().at(0)[1] != 1) {
-            // different alpha for each column
-            GKO_ASSERT_BATCH_EQUAL_COLS(this, batch_alpha);
-        }
-    } else {
-        for (size_type b = 0; b < batch_alpha->get_num_batch_entries(); ++b) {
-            if (batch_alpha->get_size().at(b)[1] != 1) {
-                GKO_ASSERT(this->get_size().at(b)[1] ==
-                           batch_alpha->get_size().at(b)[1]);
-            }
-        }
-    }
-    GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(this, batch_a);
-    GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(batch_alpha, batch_beta);
-    this->get_executor()->run(
-        batch_dense::make_add_scale(batch_alpha, batch_a, batch_beta, this));
+    exec->run(batch_vector::make_add_scaled(batch_alpha, batch_b, this));
 }
 
 
@@ -205,35 +112,35 @@ inline const batch_dim<2> get_col_sizes(const batch_dim<2>& sizes)
 
 
 template <typename ValueType>
-void BatchDense<ValueType>::compute_dot_impl(const BatchLinOp* b,
-                                             BatchLinOp* result) const
+void BatchVector<ValueType>::compute_dot_impl(const BatchLinOp* b,
+                                              BatchLinOp* result) const
 {
-    auto batch_result = as<BatchDense<ValueType>>(result);
-    auto batch_b = as<BatchDense<ValueType>>(b);
+    auto batch_result = as<BatchVector<ValueType>>(result);
+    auto batch_b = as<BatchVector<ValueType>>(b);
     GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(this, batch_b);
     GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(batch_result,
                                       get_col_sizes(this->get_size()));
     auto exec = this->get_executor();
-    exec->run(batch_dense::make_compute_dot(this, batch_b, batch_result));
+    exec->run(batch_vector::make_compute_dot(this, batch_b, batch_result));
 }
 
 
 template <typename ValueType>
-void BatchDense<ValueType>::compute_norm2_impl(BatchLinOp* result) const
+void BatchVector<ValueType>::compute_norm2_impl(BatchLinOp* result) const
 {
-    using NormVector = BatchDense<remove_complex<ValueType>>;
+    using NormVector = BatchVector<remove_complex<ValueType>>;
     auto batch_result = as<NormVector>(result);
     GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(batch_result,
                                       get_col_sizes(this->get_size()));
     auto exec = this->get_executor();
-    exec->run(batch_dense::make_compute_norm2(as<BatchDense<ValueType>>(this),
-                                              batch_result));
+    exec->run(batch_vector::make_compute_norm2(as<BatchVector<ValueType>>(this),
+                                               batch_result));
 }
 
 
 template <typename ValueType>
-void BatchDense<ValueType>::convert_to(
-    BatchDense<next_precision<ValueType>>* result) const
+void BatchVector<ValueType>::convert_to(
+    BatchVector<next_precision<ValueType>>* result) const
 {
     result->values_ = this->values_;
     result->stride_ = this->stride_;
@@ -243,94 +150,13 @@ void BatchDense<ValueType>::convert_to(
 
 
 template <typename ValueType>
-void BatchDense<ValueType>::move_to(
-    BatchDense<next_precision<ValueType>>* result)
+void BatchVector<ValueType>::move_to(
+    BatchVector<next_precision<ValueType>>* result)
 {
     this->convert_to(result);
 }
 
 
-template <typename ValueType>
-void BatchDense<ValueType>::convert_to(BatchCsr<ValueType, int32>* result) const
-{
-    auto exec = this->get_executor();
-
-    auto batch_size = this->get_size();
-    if (!batch_size.stores_equal_sizes()) {
-        GKO_NOT_IMPLEMENTED;
-    }
-
-    auto num_stored_nonzeros =
-        array<size_type>{exec->get_master(), this->get_num_batch_entries()};
-    exec->run(
-        batch_dense::make_count_nonzeros(this, num_stored_nonzeros.get_data()));
-    gko::dim<2> main_size = this->get_size().at(0);
-    const size_type num_nnz =
-        num_stored_nonzeros.get_data() ? num_stored_nonzeros.get_data()[0] : 0;
-    auto tmp = BatchCsr<ValueType, int32>::create(
-        exec, this->get_num_batch_entries(), main_size, num_nnz);
-    exec->run(batch_dense::make_convert_to_batch_csr(this, tmp.get()));
-    tmp->move_to(result);
-}
-
-
-template <typename ValueType>
-void BatchDense<ValueType>::move_to(BatchCsr<ValueType, int32>* result)
-{
-    this->convert_to(result);
-}
-
-
-template <typename ValueType>
-void BatchDense<ValueType>::convert_to(
-    BatchDiagonal<ValueType>* const result) const
-{
-    auto exec = this->get_executor();
-
-    auto batch_size = this->get_size();
-    if (!batch_size.stores_equal_sizes()) {
-        GKO_NOT_IMPLEMENTED;
-    }
-    GKO_ASSERT_BATCH_HAS_SINGLE_COLUMN(this);
-    if (this->get_stride().at(0) != 1) {
-        GKO_NOT_IMPLEMENTED;
-    }
-    auto temp = BatchDiagonal<ValueType>::create(
-        exec, batch_dim<2>{batch_size.get_num_batch_entries(),
-                           dim<2>{batch_size.at(0)[0]}});
-    exec->copy(this->get_num_stored_elements(), this->get_const_values(),
-               temp->get_values());
-    result->copy_from(temp.get());
-}
-
-
-template <typename ValueType>
-void BatchDense<ValueType>::move_to(BatchDiagonal<ValueType>* const result)
-{
-    auto exec = this->get_executor();
-
-    auto batch_size = this->get_size();
-    if (!batch_size.stores_equal_sizes()) {
-        GKO_NOT_IMPLEMENTED;
-    }
-    GKO_ASSERT_BATCH_HAS_SINGLE_COLUMN(this);
-    if (this->get_stride().at(0) != 1) {
-        GKO_NOT_IMPLEMENTED;
-    }
-    auto temp = BatchDiagonal<ValueType>::create(
-        exec,
-        batch_dim<2>{batch_size.get_num_batch_entries(),
-                     dim<2>{batch_size.at(0)[0]}},
-        std::move(this->values_));
-    *result = std::move(*temp);
-    // set the size of this to 0
-    this->set_size(batch_dim<2>());
-}
-
-
-namespace {
-
-
 template <typename MatrixType, typename MatrixData>
 inline void read_impl(MatrixType* mtx, const std::vector<MatrixData>& data)
 {
@@ -362,26 +188,20 @@ inline void read_impl(MatrixType* mtx, const std::vector<MatrixData>& data)
 }
 
 
-}  // namespace
-
-
 template <typename ValueType>
-void BatchDense<ValueType>::read(const std::vector<mat_data>& data)
+void BatchVector<ValueType>::read(const std::vector<mat_data>& data)
 {
     read_impl(this, data);
 }
 
 
 template <typename ValueType>
-void BatchDense<ValueType>::read(const std::vector<mat_data32>& data)
+void BatchVector<ValueType>::read(const std::vector<mat_data32>& data)
 {
     read_impl(this, data);
 }
 
 
-namespace {
-
-
 template <typename MatrixType, typename MatrixData>
 inline void write_impl(const MatrixType* mtx, std::vector<MatrixData>& data)
 {
@@ -410,57 +230,22 @@ inline void write_impl(const MatrixType* mtx, std::vector<MatrixData>& data)
 }
 
 
-}  // namespace
-
-
 template <typename ValueType>
-void BatchDense<ValueType>::write(std::vector<mat_data>& data) const
+void BatchVector<ValueType>::write(std::vector<mat_data>& data) const
 {
     write_impl(this, data);
 }
 
 
 template <typename ValueType>
-void BatchDense<ValueType>::write(std::vector<mat_data32>& data) const
+void BatchVector<ValueType>::write(std::vector<mat_data32>& data) const
 {
     write_impl(this, data);
 }
 
 
-template <typename ValueType>
-std::unique_ptr<BatchLinOp> BatchDense<ValueType>::transpose() const
-{
-    auto exec = this->get_executor();
-    auto trans_cpy = BatchDense::create(exec, gko::transpose(this->get_size()));
-
-    exec->run(batch_dense::make_transpose(this, trans_cpy.get()));
-
-    return std::move(trans_cpy);
-}
-
-
-template <typename ValueType>
-std::unique_ptr<BatchLinOp> BatchDense<ValueType>::conj_transpose() const
-{
-    auto exec = this->get_executor();
-    auto trans_cpy = BatchDense::create(exec, gko::transpose(this->get_size()));
-
-    exec->run(batch_dense::make_conj_transpose(this, trans_cpy.get()));
-    return std::move(trans_cpy);
-}
-
-
-template <typename ValueType>
-void BatchDense<ValueType>::add_scaled_identity_impl(const BatchLinOp* const a,
-                                                     const BatchLinOp* const b)
-{
-    this->get_executor()->run(batch_dense::make_add_scaled_identity(
-        as<BatchDense<ValueType>>(a), as<BatchDense<ValueType>>(b), this));
-}
-
-
-#define GKO_DECLARE_BATCH_DENSE_MATRIX(_type) class BatchDense<_type>
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_MATRIX);
+#define GKO_DECLARE_BATCH_VECTOR_MATRIX(_type) class BatchVector<_type>
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_MATRIX);
 
 
 }  // namespace matrix
diff --git a/core/matrix/batch_vector_kernels.hpp b/core/matrix/batch_vector_kernels.hpp
index 91dd3e6f5b7..6ddfc9e2676 100644
--- a/core/matrix/batch_vector_kernels.hpp
+++ b/core/matrix/batch_vector_kernels.hpp
@@ -30,11 +30,11 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#ifndef GKO_CORE_MATRIX_BATCH_DENSE_KERNELS_HPP_
-#define GKO_CORE_MATRIX_BATCH_DENSE_KERNELS_HPP_
+#ifndef GKO_CORE_MATRIX_BATCH_VECTOR_KERNELS_HPP_
+#define GKO_CORE_MATRIX_BATCH_VECTOR_KERNELS_HPP_
 
 
-#include <ginkgo/core/matrix/batch_dense.hpp>
+#include <ginkgo/core/matrix/batch_vector.hpp>
 
 
 #include <ginkgo/core/base/math.hpp>
@@ -46,232 +46,49 @@ namespace gko {
 namespace kernels {
 
 
-#define GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL(_type)         \
-    void simple_apply(std::shared_ptr<const DefaultExecutor> exec, \
-                      const matrix::BatchDense<_type>* a,          \
-                      const matrix::BatchDense<_type>* b,          \
-                      matrix::BatchDense<_type>* c)
-
-#define GKO_DECLARE_BATCH_DENSE_APPLY_KERNEL(_type)         \
-    void apply(std::shared_ptr<const DefaultExecutor> exec, \
-               const matrix::BatchDense<_type>* alpha,      \
-               const matrix::BatchDense<_type>* a,          \
-               const matrix::BatchDense<_type>* b,          \
-               const matrix::BatchDense<_type>* beta,       \
-               matrix::BatchDense<_type>* c)
-
-#define GKO_DECLARE_BATCH_DENSE_SCALE_KERNEL(_type)         \
+#define GKO_DECLARE_BATCH_VECTOR_SCALE_KERNEL(_type)        \
     void scale(std::shared_ptr<const DefaultExecutor> exec, \
-               const matrix::BatchDense<_type>* alpha,      \
-               matrix::BatchDense<_type>* x)
+               const matrix::BatchVector<_type>* alpha,     \
+               matrix::BatchVector<_type>* x)
 
-#define GKO_DECLARE_BATCH_DENSE_ADD_SCALED_KERNEL(_type)         \
+#define GKO_DECLARE_BATCH_VECTOR_ADD_SCALED_KERNEL(_type)        \
     void add_scaled(std::shared_ptr<const DefaultExecutor> exec, \
-                    const matrix::BatchDense<_type>* alpha,      \
-                    const matrix::BatchDense<_type>* x,          \
-                    matrix::BatchDense<_type>* y)
-
-#define GKO_DECLARE_BATCH_DENSE_ADD_SCALE_KERNEL(_type)         \
-    void add_scale(std::shared_ptr<const DefaultExecutor> exec, \
-                   const matrix::BatchDense<_type>* alpha,      \
-                   const matrix::BatchDense<_type>* x,          \
-                   const matrix::BatchDense<_type>* beta,       \
-                   matrix::BatchDense<_type>* y)
-
-#define GKO_DECLARE_BATCH_DENSE_CONVERGENCE_ADD_SCALED_KERNEL(_type)         \
-    void convergence_add_scaled(std::shared_ptr<const DefaultExecutor> exec, \
-                                const matrix::BatchDense<_type>* alpha,      \
-                                const matrix::BatchDense<_type>* x,          \
-                                matrix::BatchDense<_type>* y,                \
-                                const uint32& converged)
-
-#define GKO_DECLARE_BATCH_DENSE_ADD_SCALED_DIAG_KERNEL(_type)         \
-    void add_scaled_diag(std::shared_ptr<const DefaultExecutor> exec, \
-                         const matrix::BatchDense<_type>* alpha,      \
-                         const matrix::Diagonal<_type>* x,            \
-                         matrix::BatchDense<_type>* y)
+                    const matrix::BatchVector<_type>* alpha,     \
+                    const matrix::BatchVector<_type>* x,         \
+                    matrix::BatchVector<_type>* y)
 
-#define GKO_DECLARE_BATCH_DENSE_COMPUTE_DOT_KERNEL(_type)         \
+#define GKO_DECLARE_BATCH_VECTOR_COMPUTE_DOT_KERNEL(_type)        \
     void compute_dot(std::shared_ptr<const DefaultExecutor> exec, \
-                     const matrix::BatchDense<_type>* x,          \
-                     const matrix::BatchDense<_type>* y,          \
-                     matrix::BatchDense<_type>* result)
+                     const matrix::BatchVector<_type>* x,         \
+                     const matrix::BatchVector<_type>* y,         \
+                     matrix::BatchVector<_type>* result)
 
-
-#define GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COMPUTE_DOT_KERNEL(_type)         \
-    void convergence_compute_dot(std::shared_ptr<const DefaultExecutor> exec, \
-                                 const matrix::BatchDense<_type>* x,          \
-                                 const matrix::BatchDense<_type>* y,          \
-                                 matrix::BatchDense<_type>* result,           \
-                                 const uint32& converged)
-
-#define GKO_DECLARE_BATCH_DENSE_COMPUTE_NORM2_KERNEL(_type)         \
+#define GKO_DECLARE_BATCH_VECTOR_COMPUTE_NORM2_KERNEL(_type)        \
     void compute_norm2(std::shared_ptr<const DefaultExecutor> exec, \
-                       const matrix::BatchDense<_type>* x,          \
-                       matrix::BatchDense<remove_complex<_type>>* result)
-
-#define GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COMPUTE_NORM2_KERNEL(_type) \
-    void convergence_compute_norm2(                                     \
-        std::shared_ptr<const DefaultExecutor> exec,                    \
-        const matrix::BatchDense<_type>* x,                             \
-        matrix::BatchDense<remove_complex<_type>>* result,              \
-        const uint32& converged)
+                       const matrix::BatchVector<_type>* x,         \
+                       matrix::BatchVector<remove_complex<_type>>* result)
 
-
-#define GKO_DECLARE_BATCH_DENSE_COPY_KERNEL(_type)         \
+#define GKO_DECLARE_BATCH_VECTOR_COPY_KERNEL(_type)        \
     void copy(std::shared_ptr<const DefaultExecutor> exec, \
-              const matrix::BatchDense<_type>* x,          \
-              matrix::BatchDense<_type>* result)
-
-#define GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COPY_KERNEL(_type)         \
-    void convergence_copy(std::shared_ptr<const DefaultExecutor> exec, \
-                          const matrix::BatchDense<_type>* x,          \
-                          matrix::BatchDense<_type>* result,           \
-                          const uint32& converged)
-
-#define GKO_DECLARE_BATCH_DENSE_CONVERT_TO_BATCH_CSR_KERNEL(_type, _prec)  \
-    void convert_to_batch_csr(std::shared_ptr<const DefaultExecutor> exec, \
-                              const matrix::BatchDense<_type>* source,     \
-                              matrix::BatchCsr<_type, _prec>* other)
-
-#define GKO_DECLARE_BATCH_DENSE_COUNT_NONZEROS_KERNEL(_type)         \
-    void count_nonzeros(std::shared_ptr<const DefaultExecutor> exec, \
-                        const matrix::BatchDense<_type>* source,     \
-                        size_type* result)
-
-#define GKO_DECLARE_BATCH_DENSE_CALCULATE_MAX_NNZ_PER_ROW_KERNEL(_type) \
-    void calculate_max_nnz_per_row(                                     \
-        std::shared_ptr<const DefaultExecutor> exec,                    \
-        const matrix::BatchDense<_type>* source, size_type* result)
-
-#define GKO_DECLARE_BATCH_DENSE_CALCULATE_NONZEROS_PER_ROW_KERNEL(_type) \
-    void calculate_nonzeros_per_row(                                     \
-        std::shared_ptr<const DefaultExecutor> exec,                     \
-        const matrix::BatchDense<_type>* source, array<size_type>* result)
-
-#define GKO_DECLARE_BATCH_DENSE_CALCULATE_TOTAL_COLS_KERNEL(_type)  \
-    void calculate_total_cols(                                      \
-        std::shared_ptr<const DefaultExecutor> exec,                \
-        const matrix::BatchDense<_type>* source, size_type* result, \
-        const size_type* stride_factor, const size_type* slice_size)
-
-#define GKO_DECLARE_BATCH_DENSE_TRANSPOSE_KERNEL(_type)         \
-    void transpose(std::shared_ptr<const DefaultExecutor> exec, \
-                   const matrix::BatchDense<_type>* orig,       \
-                   matrix::BatchDense<_type>* trans)
-
-#define GKO_DECLARE_BATCH_DENSE_CONJ_TRANSPOSE_KERNEL(_type)         \
-    void conj_transpose(std::shared_ptr<const DefaultExecutor> exec, \
-                        const matrix::BatchDense<_type>* orig,       \
-                        matrix::BatchDense<_type>* trans)
-
-#define GKO_DECLARE_BATCH_DENSE_BATCH_SCALE_KERNEL(ValueType)             \
-    void batch_scale(std::shared_ptr<const DefaultExecutor> exec,         \
-                     const matrix::BatchDiagonal<ValueType>* left_scale,  \
-                     const matrix::BatchDiagonal<ValueType>* right_scale, \
-                     matrix::BatchDense<ValueType>* vec_to_scale)
-
-#define GKO_DECLARE_BATCH_DENSE_ADD_SCALED_IDENTITY_KERNEL(ValueType)     \
-    void add_scaled_identity(std::shared_ptr<const DefaultExecutor> exec, \
-                             const matrix::BatchDense<ValueType>* a,      \
-                             const matrix::BatchDense<ValueType>* b,      \
-                             matrix::BatchDense<ValueType>* mtx)
-
-
-#define GKO_DECLARE_ALL_AS_TEMPLATES                                           \
-    template <typename ValueType>                                              \
-    GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL(ValueType);                    \
-    template <typename ValueType>                                              \
-    GKO_DECLARE_BATCH_DENSE_APPLY_KERNEL(ValueType);                           \
-    template <typename ValueType>                                              \
-    GKO_DECLARE_BATCH_DENSE_SCALE_KERNEL(ValueType);                           \
-    template <typename ValueType>                                              \
-    GKO_DECLARE_BATCH_DENSE_ADD_SCALED_KERNEL(ValueType);                      \
-    template <typename ValueType>                                              \
-    GKO_DECLARE_BATCH_DENSE_ADD_SCALE_KERNEL(ValueType);                       \
-    template <typename ValueType>                                              \
-    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_ADD_SCALED_KERNEL(ValueType);          \
-    template <typename ValueType>                                              \
-    GKO_DECLARE_BATCH_DENSE_ADD_SCALED_DIAG_KERNEL(ValueType);                 \
-    template <typename ValueType>                                              \
-    GKO_DECLARE_BATCH_DENSE_COMPUTE_DOT_KERNEL(ValueType);                     \
-    template <typename ValueType>                                              \
-    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COMPUTE_DOT_KERNEL(ValueType);         \
-    template <typename ValueType>                                              \
-    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COMPUTE_DOT_KERNEL(ValueType);         \
-    template <typename ValueType>                                              \
-    GKO_DECLARE_BATCH_DENSE_COMPUTE_NORM2_KERNEL(ValueType);                   \
-    template <typename ValueType>                                              \
-    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COMPUTE_NORM2_KERNEL(ValueType);       \
-    template <typename ValueType>                                              \
-    GKO_DECLARE_BATCH_DENSE_COPY_KERNEL(ValueType);                            \
-    template <typename ValueType>                                              \
-    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COPY_KERNEL(ValueType);                \
-    template <typename ValueType>                                              \
-    GKO_DECLARE_BATCH_DENSE_BATCH_SCALE_KERNEL(ValueType);                     \
-    template <typename ValueType, typename IndexType>                          \
-    GKO_DECLARE_BATCH_DENSE_CONVERT_TO_BATCH_CSR_KERNEL(ValueType, IndexType); \
-    template <typename ValueType>                                              \
-    GKO_DECLARE_BATCH_DENSE_COUNT_NONZEROS_KERNEL(ValueType);                  \
-    template <typename ValueType>                                              \
-    GKO_DECLARE_BATCH_DENSE_CALCULATE_MAX_NNZ_PER_ROW_KERNEL(ValueType);       \
-    template <typename ValueType>                                              \
-    GKO_DECLARE_BATCH_DENSE_CALCULATE_NONZEROS_PER_ROW_KERNEL(ValueType);      \
-    template <typename ValueType>                                              \
-    GKO_DECLARE_BATCH_DENSE_CALCULATE_TOTAL_COLS_KERNEL(ValueType);            \
-    template <typename ValueType>                                              \
-    GKO_DECLARE_BATCH_DENSE_TRANSPOSE_KERNEL(ValueType);                       \
-    template <typename ValueType>                                              \
-    GKO_DECLARE_BATCH_DENSE_CONJ_TRANSPOSE_KERNEL(ValueType);                  \
-    template <typename ValueType>                                              \
-    GKO_DECLARE_BATCH_DENSE_BATCH_SCALE_KERNEL(ValueType);                     \
-    template <typename ValueType>                                              \
-    GKO_DECLARE_BATCH_DENSE_ADD_SCALED_IDENTITY_KERNEL(ValueType)
-
-
-namespace omp {
-namespace batch_dense {
-
-GKO_DECLARE_ALL_AS_TEMPLATES;
-
-}  // namespace batch_dense
-}  // namespace omp
-
-
-namespace cuda {
-namespace batch_dense {
-
-GKO_DECLARE_ALL_AS_TEMPLATES;
-
-}  // namespace batch_dense
-}  // namespace cuda
-
-
-namespace reference {
-namespace batch_dense {
-
-GKO_DECLARE_ALL_AS_TEMPLATES;
-
-}  // namespace batch_dense
-}  // namespace reference
-
-
-namespace hip {
-namespace batch_dense {
-
-GKO_DECLARE_ALL_AS_TEMPLATES;
-
-}  // namespace batch_dense
-}  // namespace hip
+              const matrix::BatchVector<_type>* x,         \
+              matrix::BatchVector<_type>* result)
 
 
-namespace dpcpp {
-namespace batch_dense {
+#define GKO_DECLARE_ALL_AS_TEMPLATES                          \
+    template <typename ValueType>                             \
+    GKO_DECLARE_BATCH_VECTOR_SCALE_KERNEL(ValueType);         \
+    template <typename ValueType>                             \
+    GKO_DECLARE_BATCH_VECTOR_ADD_SCALED_KERNEL(ValueType);    \
+    template <typename ValueType>                             \
+    GKO_DECLARE_BATCH_VECTOR_COMPUTE_DOT_KERNEL(ValueType);   \
+    template <typename ValueType>                             \
+    GKO_DECLARE_BATCH_VECTOR_COMPUTE_NORM2_KERNEL(ValueType); \
+    template <typename ValueType>                             \
+    GKO_DECLARE_BATCH_VECTOR_COPY_KERNEL(ValueType)
 
-GKO_DECLARE_ALL_AS_TEMPLATES;
 
-}  // namespace batch_dense
-}  // namespace dpcpp
+GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(batch_vector,
+                                        GKO_DECLARE_ALL_AS_TEMPLATES);
 
 
 #undef GKO_DECLARE_ALL_AS_TEMPLATES
@@ -281,4 +98,4 @@ GKO_DECLARE_ALL_AS_TEMPLATES;
 }  // namespace gko
 
 
-#endif  // GKO_CORE_MATRIX_BATCH_DENSE_KERNELS_HPP_
+#endif  // GKO_CORE_MATRIX_BATCH_VECTOR_KERNELS_HPP_
diff --git a/core/test/matrix/CMakeLists.txt b/core/test/matrix/CMakeLists.txt
index 433361a054f..fbfe5f95e3f 100644
--- a/core/test/matrix/CMakeLists.txt
+++ b/core/test/matrix/CMakeLists.txt
@@ -1,3 +1,4 @@
+ginkgo_create_test(batch_vector)
 ginkgo_create_test(coo)
 ginkgo_create_test(coo_builder)
 ginkgo_create_test(csr)
diff --git a/core/test/matrix/batch_dense.cpp b/core/test/matrix/batch_vector.cpp
similarity index 84%
rename from core/test/matrix/batch_dense.cpp
rename to core/test/matrix/batch_vector.cpp
index 7db7469baf6..4735d5eead2 100644
--- a/core/test/matrix/batch_dense.cpp
+++ b/core/test/matrix/batch_vector.cpp
@@ -30,7 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include <ginkgo/core/matrix/batch_dense.hpp>
+#include <ginkgo/core/matrix/batch_vector.hpp>
 
 
 #include <gtest/gtest.h>
@@ -44,18 +44,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/test/utils.hpp"
 
 
-namespace {
-
-
 template <typename T>
-class BatchDense : public ::testing::Test {
+class BatchVector : public ::testing::Test {
 protected:
     using value_type = T;
     using DenseMtx = gko::matrix::Dense<value_type>;
     using size_type = gko::size_type;
-    BatchDense()
+    BatchVector()
         : exec(gko::ReferenceExecutor::create()),
-          mtx(gko::batch_initialize<gko::matrix::BatchDense<value_type>>(
+          mtx(gko::batch_initialize<gko::matrix::BatchVector<value_type>>(
               std::vector<size_type>{4, 3},
               {{{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
                {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}},
@@ -64,7 +61,7 @@ class BatchDense : public ::testing::Test {
 
 
     static void assert_equal_to_original_mtx(
-        gko::matrix::BatchDense<value_type>* m)
+        gko::matrix::BatchVector<value_type>* m)
     {
         ASSERT_EQ(m->get_num_batch_entries(), 2);
         ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3));
@@ -88,37 +85,37 @@ class BatchDense : public ::testing::Test {
         ASSERT_EQ(m->at(1, 1, 2), value_type{3.0});
     }
 
-    static void assert_empty(gko::matrix::BatchDense<value_type>* m)
+    static void assert_empty(gko::matrix::BatchVector<value_type>* m)
     {
         ASSERT_EQ(m->get_num_batch_entries(), 0);
         ASSERT_EQ(m->get_num_stored_elements(), 0);
     }
 
     std::shared_ptr<const gko::Executor> exec;
-    std::unique_ptr<gko::matrix::BatchDense<value_type>> mtx;
+    std::unique_ptr<gko::matrix::BatchVector<value_type>> mtx;
 };
 
-TYPED_TEST_SUITE(BatchDense, gko::test::ValueTypes);
+TYPED_TEST_SUITE(BatchVector, gko::test::ValueTypes);
 
 
-TYPED_TEST(BatchDense, CanBeEmpty)
+TYPED_TEST(BatchVector, CanBeEmpty)
 {
-    auto empty = gko::matrix::BatchDense<TypeParam>::create(this->exec);
+    auto empty = gko::matrix::BatchVector<TypeParam>::create(this->exec);
     this->assert_empty(empty.get());
 }
 
 
-TYPED_TEST(BatchDense, ReturnsNullValuesArrayWhenEmpty)
+TYPED_TEST(BatchVector, ReturnsNullValuesArrayWhenEmpty)
 {
-    auto empty = gko::matrix::BatchDense<TypeParam>::create(this->exec);
+    auto empty = gko::matrix::BatchVector<TypeParam>::create(this->exec);
     ASSERT_EQ(empty->get_const_values(), nullptr);
 }
 
 
-TYPED_TEST(BatchDense, CanBeConstructedWithSize)
+TYPED_TEST(BatchVector, CanBeConstructedWithSize)
 {
     using size_type = gko::size_type;
-    auto m = gko::matrix::BatchDense<TypeParam>::create(
+    auto m = gko::matrix::BatchVector<TypeParam>::create(
         this->exec,
         std::vector<gko::dim<2>>{gko::dim<2>{2, 4}, gko::dim<2>{2, 3}});
 
@@ -133,10 +130,10 @@ TYPED_TEST(BatchDense, CanBeConstructedWithSize)
 }
 
 
-TYPED_TEST(BatchDense, CanBeConstructedWithSizeAndStride)
+TYPED_TEST(BatchVector, CanBeConstructedWithSizeAndStride)
 {
     using size_type = gko::size_type;
-    auto m = gko::matrix::BatchDense<TypeParam>::create(
+    auto m = gko::matrix::BatchVector<TypeParam>::create(
         this->exec, std::vector<gko::dim<2>>{gko::dim<2>{2, 3}},
         std::vector<size_type>{4});
 
@@ -146,7 +143,7 @@ TYPED_TEST(BatchDense, CanBeConstructedWithSizeAndStride)
 }
 
 
-TYPED_TEST(BatchDense, CanBeConstructedFromExistingData)
+TYPED_TEST(BatchVector, CanBeConstructedFromExistingData)
 {
     using value_type = typename TestFixture::value_type;
     using size_type = gko::size_type;
@@ -158,7 +155,7 @@ TYPED_TEST(BatchDense, CanBeConstructedFromExistingData)
        5.0, 6.0, -3.0};
     // clang-format on
 
-    auto m = gko::matrix::BatchDense<TypeParam>::create(
+    auto m = gko::matrix::BatchVector<TypeParam>::create(
         this->exec,
         std::vector<gko::dim<2>>{gko::dim<2>{2, 2}, gko::dim<2>{2, 2}},
         gko::array<value_type>::view(this->exec, 12, data),
@@ -172,7 +169,7 @@ TYPED_TEST(BatchDense, CanBeConstructedFromExistingData)
 }
 
 
-TYPED_TEST(BatchDense, CanBeConstructedFromExistingConstData)
+TYPED_TEST(BatchVector, CanBeConstructedFromExistingConstData)
 {
     using value_type = typename TestFixture::value_type;
     using size_type = gko::size_type;
@@ -184,7 +181,7 @@ TYPED_TEST(BatchDense, CanBeConstructedFromExistingConstData)
        5.0, 6.0, -3.0};
     // clang-format on
 
-    auto m = gko::matrix::BatchDense<TypeParam>::create_const(
+    auto m = gko::matrix::BatchVector<TypeParam>::create_const(
         this->exec,
         std::vector<gko::dim<2>>{gko::dim<2>{2, 2}, gko::dim<2>{2, 2}},
         gko::array<value_type>::const_view(this->exec, 12, data),
@@ -198,7 +195,7 @@ TYPED_TEST(BatchDense, CanBeConstructedFromExistingConstData)
 }
 
 
-TYPED_TEST(BatchDense, CanBeConstructedFromBatchDenseMatrices)
+TYPED_TEST(BatchVector, CanBeConstructedFromBatchVectorMatrices)
 {
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
@@ -208,19 +205,19 @@ TYPED_TEST(BatchDense, CanBeConstructedFromBatchDenseMatrices)
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
 
-    auto m = gko::matrix::BatchDense<TypeParam>::create(
+    auto m = gko::matrix::BatchVector<TypeParam>::create(
         this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get()});
-    auto m_ref = gko::matrix::BatchDense<TypeParam>::create(
+    auto m_ref = gko::matrix::BatchVector<TypeParam>::create(
         this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get(), mat1.get(),
                                            mat2.get(), mat1.get(), mat2.get()});
     auto m2 =
-        gko::matrix::BatchDense<TypeParam>::create(this->exec, 3, m.get());
+        gko::matrix::BatchVector<TypeParam>::create(this->exec, 3, m.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 1e-14);
 }
 
 
-TYPED_TEST(BatchDense, CanBeConstructedFromDenseMatricesByDuplication)
+TYPED_TEST(BatchVector, CanBeConstructedFromDenseMatricesByDuplication)
 {
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
@@ -230,16 +227,16 @@ TYPED_TEST(BatchDense, CanBeConstructedFromDenseMatricesByDuplication)
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
 
-    auto bat_m = gko::matrix::BatchDense<TypeParam>::create(
+    auto bat_m = gko::matrix::BatchVector<TypeParam>::create(
         this->exec, std::vector<DenseMtx*>{mat1.get(), mat1.get(), mat1.get()});
     auto m =
-        gko::matrix::BatchDense<TypeParam>::create(this->exec, 3, mat1.get());
+        gko::matrix::BatchVector<TypeParam>::create(this->exec, 3, mat1.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(bat_m.get(), m.get(), 1e-14);
 }
 
 
-TYPED_TEST(BatchDense, CanBeConstructedFromDenseMatrices)
+TYPED_TEST(BatchVector, CanBeConstructedFromDenseMatrices)
 {
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
@@ -249,14 +246,14 @@ TYPED_TEST(BatchDense, CanBeConstructedFromDenseMatrices)
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
 
-    auto m = gko::matrix::BatchDense<TypeParam>::create(
+    auto m = gko::matrix::BatchVector<TypeParam>::create(
         this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get()});
 
     this->assert_equal_to_original_mtx(m.get());
 }
 
 
-TYPED_TEST(BatchDense, CanBeUnbatchedIntoDenseMatrices)
+TYPED_TEST(BatchVector, CanBeUnbatchedIntoDenseMatrices)
 {
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
@@ -274,16 +271,16 @@ TYPED_TEST(BatchDense, CanBeUnbatchedIntoDenseMatrices)
 }
 
 
-TYPED_TEST(BatchDense, KnowsItsSizeAndValues)
+TYPED_TEST(BatchVector, KnowsItsSizeAndValues)
 {
     this->assert_equal_to_original_mtx(this->mtx.get());
 }
 
 
-TYPED_TEST(BatchDense, CanBeListConstructed)
+TYPED_TEST(BatchVector, CanBeListConstructed)
 {
     using value_type = typename TestFixture::value_type;
-    auto m = gko::batch_initialize<gko::matrix::BatchDense<TypeParam>>(
+    auto m = gko::batch_initialize<gko::matrix::BatchVector<TypeParam>>(
         {{1.0, 2.0}, {1.0, 3.0}}, this->exec);
 
     ASSERT_EQ(m->get_num_batch_entries(), 2);
@@ -297,10 +294,10 @@ TYPED_TEST(BatchDense, CanBeListConstructed)
 }
 
 
-TYPED_TEST(BatchDense, CanBeListConstructedWithstride)
+TYPED_TEST(BatchVector, CanBeListConstructedWithstride)
 {
     using value_type = typename TestFixture::value_type;
-    auto m = gko::batch_initialize<gko::matrix::BatchDense<TypeParam>>(
+    auto m = gko::batch_initialize<gko::matrix::BatchVector<TypeParam>>(
         std::vector<gko::size_type>{2}, {{1.0, 2.0}}, this->exec);
     ASSERT_EQ(m->get_num_batch_entries(), 1);
     ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 1));
@@ -310,10 +307,10 @@ TYPED_TEST(BatchDense, CanBeListConstructedWithstride)
 }
 
 
-TYPED_TEST(BatchDense, CanBeListConstructedByCopies)
+TYPED_TEST(BatchVector, CanBeListConstructedByCopies)
 {
     using value_type = typename TestFixture::value_type;
-    auto m = gko::batch_initialize<gko::matrix::BatchDense<TypeParam>>(
+    auto m = gko::batch_initialize<gko::matrix::BatchVector<TypeParam>>(
         2, I<value_type>({1.0, 2.0}), this->exec);
     ASSERT_EQ(m->get_num_batch_entries(), 2);
     ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 1));
@@ -326,11 +323,11 @@ TYPED_TEST(BatchDense, CanBeListConstructedByCopies)
 }
 
 
-TYPED_TEST(BatchDense, CanBeDoubleListConstructed)
+TYPED_TEST(BatchVector, CanBeDoubleListConstructed)
 {
     using value_type = typename TestFixture::value_type;
     using T = value_type;
-    auto m = gko::batch_initialize<gko::matrix::BatchDense<TypeParam>>(
+    auto m = gko::batch_initialize<gko::matrix::BatchVector<TypeParam>>(
         {{I<T>{1.0, 1.0, 0.0}, I<T>{2.0, 4.0, 3.0}, I<T>{3.0, 6.0, 1.0}},
          {I<T>{1.0, 2.0}, I<T>{3.0, 4.0}, I<T>{5.0, 6.0}}},
         this->exec);
@@ -355,11 +352,11 @@ TYPED_TEST(BatchDense, CanBeDoubleListConstructed)
 }
 
 
-TYPED_TEST(BatchDense, CanBeDoubleListConstructedWithstride)
+TYPED_TEST(BatchVector, CanBeDoubleListConstructedWithstride)
 {
     using value_type = typename TestFixture::value_type;
     using T = value_type;
-    auto m = gko::batch_initialize<gko::matrix::BatchDense<TypeParam>>(
+    auto m = gko::batch_initialize<gko::matrix::BatchVector<TypeParam>>(
         {4, 3},
         {{I<T>{1.0, 1.0, 0.0}, I<T>{2.0, 4.0, 3.0}, I<T>{3.0, 6.0, 1.0}},
          {I<T>{1.0, 2.0}, I<T>{3.0, 4.0}, I<T>{5.0, 6.0}}},
@@ -385,9 +382,9 @@ TYPED_TEST(BatchDense, CanBeDoubleListConstructedWithstride)
 }
 
 
-TYPED_TEST(BatchDense, CanBeCopied)
+TYPED_TEST(BatchVector, CanBeCopied)
 {
-    auto mtx_copy = gko::matrix::BatchDense<TypeParam>::create(this->exec);
+    auto mtx_copy = gko::matrix::BatchVector<TypeParam>::create(this->exec);
     mtx_copy->copy_from(this->mtx.get());
     this->assert_equal_to_original_mtx(this->mtx.get());
     this->mtx->at(0, 0, 0) = 7;
@@ -396,15 +393,15 @@ TYPED_TEST(BatchDense, CanBeCopied)
 }
 
 
-TYPED_TEST(BatchDense, CanBeMoved)
+TYPED_TEST(BatchVector, CanBeMoved)
 {
-    auto mtx_copy = gko::matrix::BatchDense<TypeParam>::create(this->exec);
+    auto mtx_copy = gko::matrix::BatchVector<TypeParam>::create(this->exec);
     mtx_copy->copy_from(std::move(this->mtx));
     this->assert_equal_to_original_mtx(mtx_copy.get());
 }
 
 
-TYPED_TEST(BatchDense, CanBeCloned)
+TYPED_TEST(BatchVector, CanBeCloned)
 {
     auto mtx_clone = this->mtx->clone();
     this->assert_equal_to_original_mtx(
@@ -412,17 +409,17 @@ TYPED_TEST(BatchDense, CanBeCloned)
 }
 
 
-TYPED_TEST(BatchDense, CanBeCleared)
+TYPED_TEST(BatchVector, CanBeCleared)
 {
     this->mtx->clear();
     this->assert_empty(this->mtx.get());
 }
 
 
-TYPED_TEST(BatchDense, CanBeReadFromMatrixData)
+TYPED_TEST(BatchVector, CanBeReadFromMatrixData)
 {
     using value_type = typename TestFixture::value_type;
-    auto m = gko::matrix::BatchDense<TypeParam>::create(this->exec);
+    auto m = gko::matrix::BatchVector<TypeParam>::create(this->exec);
     // clang-format off
     m->read({gko::matrix_data<TypeParam>{{2, 3},
                                          {{0, 0, 1.0},
@@ -456,7 +453,7 @@ TYPED_TEST(BatchDense, CanBeReadFromMatrixData)
 }
 
 
-TYPED_TEST(BatchDense, GeneratesCorrectMatrixData)
+TYPED_TEST(BatchVector, GeneratesCorrectMatrixData)
 {
     using value_type = typename TestFixture::value_type;
     using tpl = typename gko::matrix_data<TypeParam>::nonzero_type;
@@ -483,10 +480,10 @@ TYPED_TEST(BatchDense, GeneratesCorrectMatrixData)
 }
 
 
-TYPED_TEST(BatchDense, CanBeReadFromMatrixAssemblyData)
+TYPED_TEST(BatchVector, CanBeReadFromMatrixAssemblyData)
 {
     using value_type = typename TestFixture::value_type;
-    auto m = gko::matrix::BatchDense<TypeParam>::create(this->exec);
+    auto m = gko::matrix::BatchVector<TypeParam>::create(this->exec);
     gko::matrix_assembly_data<TypeParam> data1(gko::dim<2>{2, 3});
     data1.set_value(0, 0, 1.0);
     data1.set_value(0, 1, 3.0);
@@ -515,6 +512,3 @@ TYPED_TEST(BatchDense, CanBeReadFromMatrixAssemblyData)
     EXPECT_EQ(m->at(1, 0, 0), value_type{2.0});
     EXPECT_EQ(m->at(1, 1, 0), value_type{5.0});
 }
-
-
-}  // namespace
diff --git a/cuda/matrix/batch_struct.hpp b/cuda/matrix/batch_struct.hpp
new file mode 100644
index 00000000000..104286f66b9
--- /dev/null
+++ b/cuda/matrix/batch_struct.hpp
@@ -0,0 +1,118 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CUDA_MATRIX_BATCH_STRUCT_HPP_
+#define GKO_CUDA_MATRIX_BATCH_STRUCT_HPP_
+
+
+#include "core/matrix/batch_struct.hpp"
+
+
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/matrix/batch_vector.hpp>
+
+
+#include "cuda/base/config.hpp"
+#include "cuda/base/types.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace cuda {
+
+
+/** @file batch_struct.hpp
+ *
+ * Helper functions to generate a batch struct from a batch LinOp,
+ * while also shallow-casting to the requried CUDA scalar type.
+ *
+ * A specialization is needed for every format of every kind of linear algebra
+ * object. These are intended to be called on the host.
+ */
+
+
+/**
+ * Generates an immutable uniform batch struct from a batch of dense matrices.
+ */
+template <typename ValueType>
+inline gko::batch_vector::UniformBatch<const cuda_type<ValueType>>
+get_batch_struct(const matrix::BatchVector<ValueType>* const op)
+{
+    return {
+        as_cuda_type(op->get_const_values()),
+        op->get_num_batch_entries(),
+        op->get_stride().at(0),
+        static_cast<int>(op->get_size().at(0)[0]),
+        static_cast<int>(op->get_size().at(0)[1]),
+        static_cast<int>(op->get_size().at(0)[0] * op->get_size().at(0)[1])};
+}
+
+/**
+ * Generates a uniform batch struct from a batch of dense matrices.
+ */
+template <typename ValueType>
+inline gko::batch_vector::UniformBatch<cuda_type<ValueType>> get_batch_struct(
+    matrix::BatchVector<ValueType>* const op)
+{
+    return {
+        as_cuda_type(op->get_values()),
+        op->get_num_batch_entries(),
+        op->get_stride().at(0),
+        static_cast<int>(op->get_size().at(0)[0]),
+        static_cast<int>(op->get_size().at(0)[1]),
+        static_cast<int>(op->get_size().at(0)[0] * op->get_size().at(0)[1])};
+}
+
+
+/**
+ * Generates an immutable uniform batch struct from a batch of dense matrices
+ * that may be null.
+ */
+template <typename ValueType>
+inline gko::batch_vector::UniformBatch<const cuda_type<ValueType>>
+maybe_null_batch_struct(const matrix::BatchVector<ValueType>* const op)
+{
+    if (op) {
+        return {as_cuda_type(op->get_const_values()),
+                op->get_num_batch_entries(), op->get_stride().at(0),
+                static_cast<int>(op->get_size().at(0)[0]),
+                static_cast<int>(op->get_size().at(0)[1])};
+    } else {
+        return {nullptr, 0, 0, 0, 0};
+    }
+}
+
+
+}  // namespace cuda
+}  // namespace kernels
+}  // namespace gko
+#endif  // GKO_CUDA_MATRIX_BATCH_STRUCT_HPP_
diff --git a/cuda/matrix/batch_vector_kernels.cu b/cuda/matrix/batch_vector_kernels.cu
index af67fa1597a..9ceca9e2b3a 100644
--- a/cuda/matrix/batch_vector_kernels.cu
+++ b/cuda/matrix/batch_vector_kernels.cu
@@ -30,12 +30,11 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include "core/matrix/batch_dense_kernels.hpp"
+#include "core/matrix/batch_vector_kernels.hpp"
 
 
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/base/range_accessors.hpp>
-#include <ginkgo/core/matrix/batch_diagonal.hpp>
 
 
 #include "core/matrix/batch_struct.hpp"
@@ -53,69 +52,24 @@ namespace gko {
 namespace kernels {
 namespace cuda {
 /**
- * @brief The BatchDense matrix format namespace.
+ * @brief The BatchVector matrix format namespace.
  *
- * @ingroup batch_dense
+ * @ingroup batch_vector
  */
-namespace batch_dense {
+namespace batch_vector {
 
 
 constexpr auto default_block_size = 256;
 constexpr int sm_multiplier = 4;
 
 
-#include "common/cuda_hip/matrix/batch_dense_kernels.hpp.inc"
 #include "common/cuda_hip/matrix/batch_vector_kernels.hpp.inc"
 
 
-template <typename ValueType>
-void simple_apply(std::shared_ptr<const CudaExecutor> exec,
-                  const matrix::BatchDense<ValueType>* a,
-                  const matrix::BatchDense<ValueType>* b,
-                  matrix::BatchDense<ValueType>* c)
-{
-    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
-    const auto a_ub = get_batch_struct(a);
-    const auto b_ub = get_batch_struct(b);
-    const auto c_ub = get_batch_struct(c);
-    if (b_ub.num_rhs > 1) {
-        GKO_NOT_IMPLEMENTED;
-    }
-    mv<<<num_blocks, default_block_size>>>(a_ub, b_ub, c_ub);
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL);
-
-
-template <typename ValueType>
-void apply(std::shared_ptr<const CudaExecutor> exec,
-           const matrix::BatchDense<ValueType>* alpha,
-           const matrix::BatchDense<ValueType>* a,
-           const matrix::BatchDense<ValueType>* b,
-           const matrix::BatchDense<ValueType>* beta,
-           matrix::BatchDense<ValueType>* c)
-{
-    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
-    const auto a_ub = get_batch_struct(a);
-    const auto b_ub = get_batch_struct(b);
-    const auto c_ub = get_batch_struct(c);
-    const auto alpha_ub = get_batch_struct(alpha);
-    const auto beta_ub = get_batch_struct(beta);
-    if (b_ub.num_rhs > 1) {
-        GKO_NOT_IMPLEMENTED;
-    }
-    advanced_mv<<<num_blocks, default_block_size>>>(alpha_ub, a_ub, b_ub,
-                                                    beta_ub, c_ub);
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_APPLY_KERNEL);
-
-
 template <typename ValueType>
 void scale(std::shared_ptr<const CudaExecutor> exec,
-           const matrix::BatchDense<ValueType>* const alpha,
-           matrix::BatchDense<ValueType>* const x)
+           const matrix::BatchVector<ValueType>* const alpha,
+           matrix::BatchVector<ValueType>* const x)
 {
     const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
     const auto alpha_ub = get_batch_struct(alpha);
@@ -123,14 +77,14 @@ void scale(std::shared_ptr<const CudaExecutor> exec,
     scale<<<num_blocks, default_block_size>>>(alpha_ub, x_ub);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_SCALE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_SCALE_KERNEL);
 
 
 template <typename ValueType>
 void add_scaled(std::shared_ptr<const CudaExecutor> exec,
-                const matrix::BatchDense<ValueType>* const alpha,
-                const matrix::BatchDense<ValueType>* const x,
-                matrix::BatchDense<ValueType>* const y)
+                const matrix::BatchVector<ValueType>* const alpha,
+                const matrix::BatchVector<ValueType>* const x,
+                matrix::BatchVector<ValueType>* const y)
 {
     const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
     const size_type nrhs = x->get_size().at(0)[1];
@@ -148,55 +102,14 @@ void add_scaled(std::shared_ptr<const CudaExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_ADD_SCALED_KERNEL);
-
-
-template <typename ValueType>
-void add_scale(std::shared_ptr<const DefaultExecutor> exec,
-               const matrix::BatchDense<ValueType>* const alpha,
-               const matrix::BatchDense<ValueType>* const x,
-               const matrix::BatchDense<ValueType>* const beta,
-               matrix::BatchDense<ValueType>* const y)
-{
-    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
-    const size_type nrhs = x->get_size().at(0)[1];
-    const auto alpha_ub = get_batch_struct(alpha);
-    const auto beta_ub = get_batch_struct(beta);
-    const auto x_ub = get_batch_struct(x);
-    const auto y_ub = get_batch_struct(y);
-    add_scale<<<num_blocks, default_block_size>>>(alpha_ub, x_ub, beta_ub,
-                                                  y_ub);
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_ADD_SCALE_KERNEL);
-
-
-template <typename ValueType>
-void convergence_add_scaled(std::shared_ptr<const CudaExecutor> exec,
-                            const matrix::BatchDense<ValueType>* const alpha,
-                            const matrix::BatchDense<ValueType>* const x,
-                            matrix::BatchDense<ValueType>* const y,
-                            const uint32& converged) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_ADD_SCALED_KERNEL);
-
-
-template <typename ValueType>
-void add_scaled_diag(std::shared_ptr<const CudaExecutor> exec,
-                     const matrix::BatchDense<ValueType>* alpha,
-                     const matrix::Diagonal<ValueType>* x,
-                     matrix::BatchDense<ValueType>* y) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_ADD_SCALED_DIAG_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_ADD_SCALED_KERNEL);
 
 
 template <typename ValueType>
 void compute_dot(std::shared_ptr<const CudaExecutor> exec,
-                 const matrix::BatchDense<ValueType>* x,
-                 const matrix::BatchDense<ValueType>* y,
-                 matrix::BatchDense<ValueType>* result)
+                 const matrix::BatchVector<ValueType>* x,
+                 const matrix::BatchVector<ValueType>* y,
+                 matrix::BatchVector<ValueType>* result)
 {
     const auto num_blocks = x->get_num_batch_entries();
     const auto num_rhs = x->get_size().at()[1];
@@ -215,26 +128,14 @@ void compute_dot(std::shared_ptr<const CudaExecutor> exec,
     }
 }
 
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_COMPUTE_DOT_KERNEL);
-
-
-template <typename ValueType>
-void convergence_compute_dot(std::shared_ptr<const CudaExecutor> exec,
-                             const matrix::BatchDense<ValueType>* x,
-                             const matrix::BatchDense<ValueType>* y,
-                             matrix::BatchDense<ValueType>* result,
-                             const uint32& converged) GKO_NOT_IMPLEMENTED;
-
-
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COMPUTE_DOT_KERNEL);
+    GKO_DECLARE_BATCH_VECTOR_COMPUTE_DOT_KERNEL);
 
 
 template <typename ValueType>
 void compute_norm2(std::shared_ptr<const CudaExecutor> exec,
-                   const matrix::BatchDense<ValueType>* const x,
-                   matrix::BatchDense<remove_complex<ValueType>>* const result)
+                   const matrix::BatchVector<ValueType>* const x,
+                   matrix::BatchVector<remove_complex<ValueType>>* const result)
 {
     const auto num_blocks = x->get_num_batch_entries();
     const auto num_rhs = x->get_size().at()[1];
@@ -251,112 +152,13 @@ void compute_norm2(std::shared_ptr<const CudaExecutor> exec,
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_COMPUTE_NORM2_KERNEL);
-
-
-template <typename ValueType>
-void convergence_compute_norm2(
-    std::shared_ptr<const CudaExecutor> exec,
-    const matrix::BatchDense<ValueType>* const x,
-    matrix::BatchDense<remove_complex<ValueType>>* const result,
-    const uint32& converged) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COMPUTE_NORM2_KERNEL);
-
-
-template <typename ValueType, typename IndexType>
-void convert_to_batch_csr(std::shared_ptr<const DefaultExecutor> exec,
-                          const matrix::BatchDense<ValueType>* source,
-                          matrix::BatchCsr<ValueType, IndexType>* other)
-    GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CONVERT_TO_BATCH_CSR_KERNEL);
-
-
-template <typename ValueType>
-void count_nonzeros(std::shared_ptr<const CudaExecutor> exec,
-                    const matrix::BatchDense<ValueType>* source,
-                    size_type* result) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_COUNT_NONZEROS_KERNEL);
-
-
-template <typename ValueType>
-void calculate_max_nnz_per_row(std::shared_ptr<const CudaExecutor> exec,
-                               const matrix::BatchDense<ValueType>* source,
-                               size_type* result) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CALCULATE_MAX_NNZ_PER_ROW_KERNEL);
-
-
-template <typename ValueType>
-void calculate_nonzeros_per_row(std::shared_ptr<const CudaExecutor> exec,
-                                const matrix::BatchDense<ValueType>* source,
-                                array<size_type>* result) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CALCULATE_NONZEROS_PER_ROW_KERNEL);
-
-
-template <typename ValueType>
-void calculate_total_cols(std::shared_ptr<const CudaExecutor> exec,
-                          const matrix::BatchDense<ValueType>* source,
-                          size_type* result, const size_type* stride_factor,
-                          const size_type* slice_size) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CALCULATE_TOTAL_COLS_KERNEL);
-
-
-template <typename ValueType>
-void transpose(std::shared_ptr<const CudaExecutor> exec,
-               const matrix::BatchDense<ValueType>* const orig,
-               matrix::BatchDense<ValueType>* const trans)
-{
-    using cu_val_type = cuda_type<ValueType>;
-    const size_type nbatch = orig->get_num_batch_entries();
-    const size_type orig_stride = orig->get_stride().at();
-    const size_type trans_stride = trans->get_stride().at();
-    const int nrows = orig->get_size().at()[0];
-    const int ncols = orig->get_size().at()[1];
-    transpose<<<nbatch, default_block_size>>>(
-        nrows, ncols, orig_stride, as_cuda_type(orig->get_const_values()),
-        trans_stride, as_cuda_type(trans->get_values()),
-        [] __device__(cu_val_type x) { return x; });
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_TRANSPOSE_KERNEL);
-
-
-template <typename ValueType>
-void conj_transpose(std::shared_ptr<const CudaExecutor> exec,
-                    const matrix::BatchDense<ValueType>* orig,
-                    matrix::BatchDense<ValueType>* trans)
-{
-    using cu_val_type = cuda_type<ValueType>;
-    const size_type nbatch = orig->get_num_batch_entries();
-    const size_type orig_stride = orig->get_stride().at();
-    const size_type trans_stride = trans->get_stride().at();
-    const int nrows = orig->get_size().at()[0];
-    const int ncols = orig->get_size().at()[1];
-    transpose<<<nbatch, default_block_size>>>(
-        nrows, ncols, orig_stride, as_cuda_type(orig->get_const_values()),
-        trans_stride, as_cuda_type(trans->get_values()),
-        [] __device__(cu_val_type x) { return conj(x); });
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CONJ_TRANSPOSE_KERNEL);
+    GKO_DECLARE_BATCH_VECTOR_COMPUTE_NORM2_KERNEL);
 
 
 template <typename ValueType>
 void copy(std::shared_ptr<const DefaultExecutor> exec,
-          const matrix::BatchDense<ValueType>* x,
-          matrix::BatchDense<ValueType>* result)
+          const matrix::BatchVector<ValueType>* x,
+          matrix::BatchVector<ValueType>* result)
 {
     const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
     const auto result_ub = get_batch_struct(result);
@@ -364,71 +166,10 @@ void copy(std::shared_ptr<const DefaultExecutor> exec,
     copy<<<num_blocks, default_block_size>>>(x_ub, result_ub);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_COPY_KERNEL);
-
-
-template <typename ValueType>
-void convergence_copy(std::shared_ptr<const DefaultExecutor> exec,
-                      const matrix::BatchDense<ValueType>* x,
-                      matrix::BatchDense<ValueType>* result,
-                      const uint32& converged) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COPY_KERNEL);
-
-
-template <typename ValueType>
-void batch_scale(std::shared_ptr<const CudaExecutor> exec,
-                 const matrix::BatchDiagonal<ValueType>* const left_scale,
-                 const matrix::BatchDiagonal<ValueType>* const rght_scale,
-                 matrix::BatchDense<ValueType>* const vec_to_scale)
-{
-    if (!left_scale->get_size().stores_equal_sizes()) GKO_NOT_IMPLEMENTED;
-    if (!rght_scale->get_size().stores_equal_sizes()) GKO_NOT_IMPLEMENTED;
-    if (!vec_to_scale->get_size().stores_equal_sizes()) GKO_NOT_IMPLEMENTED;
-
-    const auto stride = vec_to_scale->get_stride().at();
-    const auto nrows = static_cast<int>(vec_to_scale->get_size().at()[0]);
-    const auto nrhs = static_cast<int>(vec_to_scale->get_size().at()[1]);
-    const auto nbatch = vec_to_scale->get_num_batch_entries();
-
-    const int num_blocks = vec_to_scale->get_num_batch_entries();
-    uniform_batch_scale<<<num_blocks, default_block_size>>>(
-        nrows, stride, nrhs, nbatch,
-        as_cuda_type(left_scale->get_const_values()),
-        as_cuda_type(rght_scale->get_const_values()),
-        as_cuda_type(vec_to_scale->get_values()));
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_BATCH_SCALE_KERNEL);
-
-
-template <typename ValueType>
-void add_scaled_identity(std::shared_ptr<const CudaExecutor> exec,
-                         const matrix::BatchDense<ValueType>* const a,
-                         const matrix::BatchDense<ValueType>* const b,
-                         matrix::BatchDense<ValueType>* const mtx)
-{
-    if (!mtx->get_size().stores_equal_sizes()) GKO_NOT_IMPLEMENTED;
-    const auto num_blocks = mtx->get_num_batch_entries();
-    const auto nrows = static_cast<int>(mtx->get_size().at(0)[0]);
-    const auto ncols = static_cast<int>(mtx->get_size().at(0)[1]);
-    const auto stride = mtx->get_stride().at(0);
-    const auto values = mtx->get_values();
-    const auto alpha = a->get_const_values();
-    const auto a_stride = a->get_stride().at(0);
-    const auto b_stride = b->get_stride().at(0);
-    const auto beta = b->get_const_values();
-    add_scaled_identity<<<num_blocks, default_block_size>>>(
-        num_blocks, nrows, ncols, stride, as_cuda_type(values), a_stride,
-        as_cuda_type(alpha), b_stride, as_cuda_type(beta));
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_ADD_SCALED_IDENTITY_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_COPY_KERNEL);
 
 
-}  // namespace batch_dense
+}  // namespace batch_vector
 }  // namespace cuda
 }  // namespace kernels
 }  // namespace gko
diff --git a/hip/matrix/batch_struct.hip.hpp b/hip/matrix/batch_struct.hip.hpp
new file mode 100644
index 00000000000..e2648ba4a25
--- /dev/null
+++ b/hip/matrix/batch_struct.hip.hpp
@@ -0,0 +1,120 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_HIP_MATRIX_BATCH_STRUCT_HIP_HPP_
+#define GKO_HIP_MATRIX_BATCH_STRUCT_HIP_HPP_
+
+
+#include "core/matrix/batch_struct.hpp"
+
+
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/matrix/batch_vector.hpp>
+
+
+#include "hip/base/config.hip.hpp"
+#include "hip/base/types.hip.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace hip {
+
+
+/** @file batch_struct.hpp
+ *
+ * Helper functions to generate a batch struct from a batch LinOp,
+ * while also shallow-casting to the requried Hip scalar type.
+ *
+ * A specialization is needed for every format of every kind of linear algebra
+ * object. These are intended to be called on the host.
+ */
+
+
+/**
+ * Generates an immutable uniform batch struct from a batch of dense matrices.
+ */
+template <typename ValueType>
+inline gko::batch_vector::UniformBatch<const hip_type<ValueType>>
+get_batch_struct(const matrix::BatchVector<ValueType>* const op)
+{
+    return {
+        as_hip_type(op->get_const_values()),
+        op->get_num_batch_entries(),
+        op->get_stride().at(0),
+        static_cast<int>(op->get_size().at(0)[0]),
+        static_cast<int>(op->get_size().at(0)[1]),
+        static_cast<int>(op->get_size().at(0)[0] * op->get_size().at(0)[1])};
+}
+
+/**
+ * Generates a uniform batch struct from a batch of dense matrices.
+ */
+template <typename ValueType>
+inline gko::batch_vector::UniformBatch<hip_type<ValueType>> get_batch_struct(
+    matrix::BatchVector<ValueType>* const op)
+{
+    return {
+        as_hip_type(op->get_values()),
+        op->get_num_batch_entries(),
+        op->get_stride().at(0),
+        static_cast<int>(op->get_size().at(0)[0]),
+        static_cast<int>(op->get_size().at(0)[1]),
+        static_cast<int>(op->get_size().at(0)[0] * op->get_size().at(0)[1])};
+}
+
+
+/**
+ * Generates an immutable uniform batch struct from a batch of dense matrices
+ * that may be null.
+ */
+template <typename ValueType>
+inline gko::batch_vector::UniformBatch<const hip_type<ValueType>>
+maybe_null_batch_struct(const matrix::BatchVector<ValueType>* const op)
+{
+    if (op) {
+        return {as_hip_type(op->get_const_values()),
+                op->get_num_batch_entries(), op->get_stride().at(0),
+                static_cast<int>(op->get_size().at(0)[0]),
+                static_cast<int>(op->get_size().at(0)[1])};
+    } else {
+        return {nullptr, 0, 0, 0, 0};
+    }
+}
+
+
+}  // namespace hip
+}  // namespace kernels
+}  // namespace gko
+
+
+#endif  // GKO_HIP_MATRIX_BATCH_STRUCT_HIP_HPP_
diff --git a/hip/matrix/batch_vector_kernels.hip.cpp b/hip/matrix/batch_vector_kernels.hip.cpp
index 32665e31191..97bbaf50440 100644
--- a/hip/matrix/batch_vector_kernels.hip.cpp
+++ b/hip/matrix/batch_vector_kernels.hip.cpp
@@ -30,7 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include "core/matrix/batch_dense_kernels.hpp"
+#include "core/matrix/batch_vector_kernels.hpp"
 
 
 #include <hip/hip_runtime.h>
@@ -38,7 +38,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/base/range_accessors.hpp>
-#include <ginkgo/core/matrix/batch_diagonal.hpp>
 
 
 #include "core/matrix/batch_struct.hpp"
@@ -56,70 +55,24 @@ namespace gko {
 namespace kernels {
 namespace hip {
 /**
- * @brief The BatchDense matrix format namespace.
+ * @brief The BatchVector matrix format namespace.
  *
- * @ingroup batch_dense
+ * @ingroup batch_vector
  */
-namespace batch_dense {
+namespace batch_vector {
 
 
 constexpr auto default_block_size = 256;
 constexpr int sm_multiplier = 4;
 
 
-#include "common/cuda_hip/matrix/batch_dense_kernels.hpp.inc"
 #include "common/cuda_hip/matrix/batch_vector_kernels.hpp.inc"
 
 
-template <typename ValueType>
-void simple_apply(std::shared_ptr<const HipExecutor> exec,
-                  const matrix::BatchDense<ValueType>* a,
-                  const matrix::BatchDense<ValueType>* b,
-                  matrix::BatchDense<ValueType>* c)
-{
-    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
-    const auto a_ub = get_batch_struct(a);
-    const auto b_ub = get_batch_struct(b);
-    const auto c_ub = get_batch_struct(c);
-    if (b_ub.num_rhs > 1) {
-        GKO_NOT_IMPLEMENTED;
-    }
-    hipLaunchKernelGGL(mv, num_blocks, default_block_size, 0, 0, a_ub, b_ub,
-                       c_ub);
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL);
-
-
-template <typename ValueType>
-void apply(std::shared_ptr<const HipExecutor> exec,
-           const matrix::BatchDense<ValueType>* alpha,
-           const matrix::BatchDense<ValueType>* a,
-           const matrix::BatchDense<ValueType>* b,
-           const matrix::BatchDense<ValueType>* beta,
-           matrix::BatchDense<ValueType>* c)
-{
-    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
-    const auto a_ub = get_batch_struct(a);
-    const auto b_ub = get_batch_struct(b);
-    const auto c_ub = get_batch_struct(c);
-    const auto alpha_ub = get_batch_struct(alpha);
-    const auto beta_ub = get_batch_struct(beta);
-    if (b_ub.num_rhs > 1) {
-        GKO_NOT_IMPLEMENTED;
-    }
-    hipLaunchKernelGGL(advanced_mv, num_blocks, default_block_size, 0, 0,
-                       alpha_ub, a_ub, b_ub, beta_ub, c_ub);
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_APPLY_KERNEL);
-
-
 template <typename ValueType>
 void scale(std::shared_ptr<const HipExecutor> exec,
-           const matrix::BatchDense<ValueType>* const alpha,
-           matrix::BatchDense<ValueType>* const x)
+           const matrix::BatchVector<ValueType>* const alpha,
+           matrix::BatchVector<ValueType>* const x)
 {
     const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
     const auto alpha_ub = get_batch_struct(alpha);
@@ -128,14 +81,14 @@ void scale(std::shared_ptr<const HipExecutor> exec,
                        alpha_ub, x_ub);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_SCALE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_SCALE_KERNEL);
 
 
 template <typename ValueType>
 void add_scaled(std::shared_ptr<const HipExecutor> exec,
-                const matrix::BatchDense<ValueType>* const alpha,
-                const matrix::BatchDense<ValueType>* const x,
-                matrix::BatchDense<ValueType>* const y)
+                const matrix::BatchVector<ValueType>* const alpha,
+                const matrix::BatchVector<ValueType>* const x,
+                matrix::BatchVector<ValueType>* const y)
 {
     const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
     const size_type nrhs = x->get_size().at(0)[1];
@@ -156,55 +109,14 @@ void add_scaled(std::shared_ptr<const HipExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_ADD_SCALED_KERNEL);
-
-
-template <typename ValueType>
-void add_scale(std::shared_ptr<const DefaultExecutor> exec,
-               const matrix::BatchDense<ValueType>* const alpha,
-               const matrix::BatchDense<ValueType>* const x,
-               const matrix::BatchDense<ValueType>* const beta,
-               matrix::BatchDense<ValueType>* const y)
-{
-    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
-    const size_type nrhs = x->get_size().at(0)[1];
-    const auto alpha_ub = get_batch_struct(alpha);
-    const auto beta_ub = get_batch_struct(beta);
-    const auto x_ub = get_batch_struct(x);
-    const auto y_ub = get_batch_struct(y);
-    hipLaunchKernelGGL(add_scale, num_blocks, default_block_size, 0, 0,
-                       alpha_ub, x_ub, beta_ub, y_ub);
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_ADD_SCALE_KERNEL);
-
-
-template <typename ValueType>
-void convergence_add_scaled(std::shared_ptr<const HipExecutor> exec,
-                            const matrix::BatchDense<ValueType>* const alpha,
-                            const matrix::BatchDense<ValueType>* const x,
-                            matrix::BatchDense<ValueType>* const y,
-                            const uint32& converged) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_ADD_SCALED_KERNEL);
-
-
-template <typename ValueType>
-void add_scaled_diag(std::shared_ptr<const HipExecutor> exec,
-                     const matrix::BatchDense<ValueType>* alpha,
-                     const matrix::Diagonal<ValueType>* x,
-                     matrix::BatchDense<ValueType>* y) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_ADD_SCALED_DIAG_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_ADD_SCALED_KERNEL);
 
 
 template <typename ValueType>
 void compute_dot(std::shared_ptr<const HipExecutor> exec,
-                 const matrix::BatchDense<ValueType>* x,
-                 const matrix::BatchDense<ValueType>* y,
-                 matrix::BatchDense<ValueType>* result)
+                 const matrix::BatchVector<ValueType>* x,
+                 const matrix::BatchVector<ValueType>* y,
+                 matrix::BatchVector<ValueType>* result)
 {
     const auto num_blocks = x->get_num_batch_entries();
     const auto num_rhs = x->get_size().at()[1];
@@ -225,25 +137,14 @@ void compute_dot(std::shared_ptr<const HipExecutor> exec,
 }
 
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_COMPUTE_DOT_KERNEL);
-
-
-template <typename ValueType>
-void convergence_compute_dot(std::shared_ptr<const HipExecutor> exec,
-                             const matrix::BatchDense<ValueType>* x,
-                             const matrix::BatchDense<ValueType>* y,
-                             matrix::BatchDense<ValueType>* result,
-                             const uint32& converged) GKO_NOT_IMPLEMENTED;
-
-
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COMPUTE_DOT_KERNEL);
+    GKO_DECLARE_BATCH_VECTOR_COMPUTE_DOT_KERNEL);
 
 
 template <typename ValueType>
 void compute_norm2(std::shared_ptr<const HipExecutor> exec,
-                   const matrix::BatchDense<ValueType>* const x,
-                   matrix::BatchDense<remove_complex<ValueType>>* const result)
+                   const matrix::BatchVector<ValueType>* const x,
+                   matrix::BatchVector<remove_complex<ValueType>>* const result)
 {
     const auto num_blocks = x->get_num_batch_entries();
     const auto num_rhs = x->get_size().at()[1];
@@ -262,114 +163,13 @@ void compute_norm2(std::shared_ptr<const HipExecutor> exec,
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_COMPUTE_NORM2_KERNEL);
-
-
-template <typename ValueType>
-void convergence_compute_norm2(
-    std::shared_ptr<const HipExecutor> exec,
-    const matrix::BatchDense<ValueType>* const x,
-    matrix::BatchDense<remove_complex<ValueType>>* const result,
-    const uint32& converged) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COMPUTE_NORM2_KERNEL);
-
-
-template <typename ValueType, typename IndexType>
-void convert_to_batch_csr(std::shared_ptr<const DefaultExecutor> exec,
-                          const matrix::BatchDense<ValueType>* source,
-                          matrix::BatchCsr<ValueType, IndexType>* other)
-    GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CONVERT_TO_BATCH_CSR_KERNEL);
-
-
-template <typename ValueType>
-void count_nonzeros(std::shared_ptr<const HipExecutor> exec,
-                    const matrix::BatchDense<ValueType>* source,
-                    size_type* result) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_COUNT_NONZEROS_KERNEL);
-
-
-template <typename ValueType>
-void calculate_max_nnz_per_row(std::shared_ptr<const HipExecutor> exec,
-                               const matrix::BatchDense<ValueType>* source,
-                               size_type* result) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CALCULATE_MAX_NNZ_PER_ROW_KERNEL);
-
-
-template <typename ValueType>
-void calculate_nonzeros_per_row(std::shared_ptr<const HipExecutor> exec,
-                                const matrix::BatchDense<ValueType>* source,
-                                array<size_type>* result) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CALCULATE_NONZEROS_PER_ROW_KERNEL);
-
-
-template <typename ValueType>
-void calculate_total_cols(std::shared_ptr<const HipExecutor> exec,
-                          const matrix::BatchDense<ValueType>* source,
-                          size_type* result, const size_type* stride_factor,
-                          const size_type* slice_size) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CALCULATE_TOTAL_COLS_KERNEL);
-
-
-template <typename ValueType>
-void transpose(std::shared_ptr<const HipExecutor> exec,
-               const matrix::BatchDense<ValueType>* const orig,
-               matrix::BatchDense<ValueType>* const trans)
-{
-    using hip_val_type = hip_type<ValueType>;
-    const size_type nbatch = orig->get_num_batch_entries();
-    const size_type orig_stride = orig->get_stride().at();
-    const size_type trans_stride = trans->get_stride().at();
-    const int nrows = orig->get_size().at()[0];
-    const int ncols = orig->get_size().at()[1];
-    hipLaunchKernelGGL(transpose, dim3(nbatch), dim3(default_block_size), 0, 0,
-                       nrows, ncols, orig_stride,
-                       as_hip_type(orig->get_const_values()), trans_stride,
-                       as_hip_type(trans->get_values()),
-                       [] __device__(hip_val_type x) { return x; });
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_TRANSPOSE_KERNEL);
-
-
-template <typename ValueType>
-void conj_transpose(std::shared_ptr<const HipExecutor> exec,
-                    const matrix::BatchDense<ValueType>* orig,
-                    matrix::BatchDense<ValueType>* trans)
-{
-    using hip_val_type = hip_type<ValueType>;
-    const size_type nbatch = orig->get_num_batch_entries();
-    const size_type orig_stride = orig->get_stride().at();
-    const size_type trans_stride = trans->get_stride().at();
-    const int nrows = orig->get_size().at()[0];
-    const int ncols = orig->get_size().at()[1];
-    hipLaunchKernelGGL(transpose, dim3(nbatch), dim3(default_block_size), 0, 0,
-                       nrows, ncols, orig_stride,
-                       as_hip_type(orig->get_const_values()), trans_stride,
-                       as_hip_type(trans->get_values()),
-                       [] __device__(hip_val_type x) { return conj(x); });
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CONJ_TRANSPOSE_KERNEL);
+    GKO_DECLARE_BATCH_VECTOR_COMPUTE_NORM2_KERNEL);
 
 
 template <typename ValueType>
 void copy(std::shared_ptr<const DefaultExecutor> exec,
-          const matrix::BatchDense<ValueType>* x,
-          matrix::BatchDense<ValueType>* result)
+          const matrix::BatchVector<ValueType>* x,
+          matrix::BatchVector<ValueType>* result)
 {
     const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
     const auto result_ub = get_batch_struct(result);
@@ -378,72 +178,10 @@ void copy(std::shared_ptr<const DefaultExecutor> exec,
                        x_ub, result_ub);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_COPY_KERNEL);
-
-
-template <typename ValueType>
-void convergence_copy(std::shared_ptr<const DefaultExecutor> exec,
-                      const matrix::BatchDense<ValueType>* x,
-                      matrix::BatchDense<ValueType>* result,
-                      const uint32& converged) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COPY_KERNEL);
-
-
-template <typename ValueType>
-void batch_scale(std::shared_ptr<const HipExecutor> exec,
-                 const matrix::BatchDiagonal<ValueType>* const left_scale,
-                 const matrix::BatchDiagonal<ValueType>* const rght_scale,
-                 matrix::BatchDense<ValueType>* const vec_to_scale)
-{
-    if (!left_scale->get_size().stores_equal_sizes()) GKO_NOT_IMPLEMENTED;
-    if (!rght_scale->get_size().stores_equal_sizes()) GKO_NOT_IMPLEMENTED;
-    if (!vec_to_scale->get_size().stores_equal_sizes()) GKO_NOT_IMPLEMENTED;
-
-    const auto stride = vec_to_scale->get_stride().at();
-    const auto nrows = static_cast<int>(vec_to_scale->get_size().at()[0]);
-    const auto nrhs = static_cast<int>(vec_to_scale->get_size().at()[1]);
-    const auto nbatch = vec_to_scale->get_num_batch_entries();
-
-    const int num_blocks = vec_to_scale->get_num_batch_entries();
-    hipLaunchKernelGGL(uniform_batch_scale, dim3(num_blocks),
-                       dim3(default_block_size), 0, 0, nrows, stride, nrhs,
-                       nbatch, as_hip_type(left_scale->get_const_values()),
-                       as_hip_type(rght_scale->get_const_values()),
-                       as_hip_type(vec_to_scale->get_values()));
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_BATCH_SCALE_KERNEL);
-
-
-template <typename ValueType>
-void add_scaled_identity(std::shared_ptr<const HipExecutor> exec,
-                         const matrix::BatchDense<ValueType>* const a,
-                         const matrix::BatchDense<ValueType>* const b,
-                         matrix::BatchDense<ValueType>* const mtx)
-{
-    if (!mtx->get_size().stores_equal_sizes()) GKO_NOT_IMPLEMENTED;
-    const auto num_blocks = mtx->get_num_batch_entries();
-    const auto nrows = static_cast<int>(mtx->get_size().at(0)[0]);
-    const auto ncols = static_cast<int>(mtx->get_size().at(0)[1]);
-    const auto stride = mtx->get_stride().at(0);
-    const auto values = mtx->get_values();
-    const auto alpha = a->get_const_values();
-    const auto a_stride = a->get_stride().at(0);
-    const auto b_stride = b->get_stride().at(0);
-    const auto beta = b->get_const_values();
-    hipLaunchKernelGGL(add_scaled_identity, num_blocks, default_block_size, 0,
-                       0, num_blocks, nrows, ncols, stride, as_hip_type(values),
-                       a_stride, as_hip_type(alpha), b_stride,
-                       as_hip_type(beta));
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_ADD_SCALED_IDENTITY_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_COPY_KERNEL);
 
 
-}  // namespace batch_dense
+}  // namespace batch_vector
 }  // namespace hip
 }  // namespace kernels
 }  // namespace gko
diff --git a/include/ginkgo/core/matrix/batch_vector.hpp b/include/ginkgo/core/matrix/batch_vector.hpp
index f4061114052..aee16bbc27b 100644
--- a/include/ginkgo/core/matrix/batch_vector.hpp
+++ b/include/ginkgo/core/matrix/batch_vector.hpp
@@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#ifndef GKO_PUBLIC_CORE_MATRIX_BATCH_DENSE_HPP_
-#define GKO_PUBLIC_CORE_MATRIX_BATCH_DENSE_HPP_
+#ifndef GKO_PUBLIC_CORE_MATRIX_BATCH_VECTOR_HPP_
+#define GKO_PUBLIC_CORE_MATRIX_BATCH_VECTOR_HPP_
 
 
 #include <initializer_list>
@@ -39,7 +39,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/array.hpp>
-#include <ginkgo/core/base/batch_lin_op.hpp>
 #include <ginkgo/core/base/executor.hpp>
 #include <ginkgo/core/base/mtx_io.hpp>
 #include <ginkgo/core/base/range_accessors.hpp>
@@ -52,17 +51,9 @@ namespace gko {
 namespace matrix {
 
 
-template <typename ValueType>
-class BatchDiagonal;
-
-
-template <typename ValueType, typename IndexType>
-class BatchCsr;
-
-
 /**
- * BatchDense is a batch matrix format which explicitly stores all values of the
- * matrix in each of the batches.
+ * BatchVector is a batch matrix format which explicitly stores all values of
+ * the vector in each of the batches.
  *
  * The values in each of the batches are stored in row-major format (values
  * belonging to the same row appear consecutive in the memory). Optionally, rows
@@ -72,51 +63,44 @@ class BatchCsr;
  *
  * @note While this format is not very useful for storing sparse matrices, it
  *       is often suitable to store vectors, and sets of vectors.
- * @ingroup batch_dense
+ * @ingroup batch_vector
  * @ingroup mat_formats
  * @ingroup BatchLinOp
  */
 template <typename ValueType = default_precision>
-class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
-                   public EnableCreateMethod<BatchDense<ValueType>>,
-                   public ConvertibleTo<BatchDense<next_precision<ValueType>>>,
-                   public ConvertibleTo<BatchCsr<ValueType, int32>>,
-                   public ConvertibleTo<BatchDiagonal<ValueType>>,
-                   public BatchReadableFromMatrixData<ValueType, int32>,
-                   public BatchReadableFromMatrixData<ValueType, int64>,
-                   public BatchWritableToMatrixData<ValueType, int32>,
-                   public BatchWritableToMatrixData<ValueType, int64>,
-                   public BatchTransposable,
-                   public BatchScaledIdentityAddable {
-    friend class EnableCreateMethod<BatchDense>;
-    friend class EnablePolymorphicObject<BatchDense, BatchLinOp>;
-    friend class BatchDense<to_complex<ValueType>>;
+class BatchVector
+    : public EnableAbstractPolymorphicObject<BatchVector<ValueType>>,
+      public EnableCreateMethod<BatchVector<ValueType>>,
+      public ConvertibleTo<BatchVector<next_precision<ValueType>>>,
+      public BatchReadableFromMatrixData<ValueType, int32>,
+      public BatchReadableFromMatrixData<ValueType, int64>,
+      public BatchWritableToMatrixData<ValueType, int32>,
+      public BatchWritableToMatrixData<ValueType, int64> {
+    friend class EnableCreateMethod<BatchVector>;
+    friend class BatchVector<to_complex<ValueType>>;
 
 public:
-    using EnableBatchLinOp<BatchDense>::convert_to;
-    using EnableBatchLinOp<BatchDense>::move_to;
     using BatchReadableFromMatrixData<ValueType, int32>::read;
     using BatchReadableFromMatrixData<ValueType, int64>::read;
 
     using value_type = ValueType;
     using index_type = int32;
-    using transposed_type = BatchDense<ValueType>;
     using unbatch_type = Dense<ValueType>;
     using mat_data = gko::matrix_data<ValueType, int64>;
     using mat_data32 = gko::matrix_data<ValueType, int32>;
-    using absolute_type = remove_complex<BatchDense>;
-    using complex_type = to_complex<BatchDense>;
+    using absolute_type = remove_complex<BatchVector>;
+    using complex_type = to_complex<BatchVector>;
 
     using row_major_range = gko::range<gko::accessor::row_major<ValueType, 2>>;
 
     /**
-     * Creates a BatchDense matrix with the configuration of another BatchDense
-     * matrix.
+     * Creates a BatchVector matrix with the configuration of another
+     * BatchVector matrix.
      *
      * @param other  The other matrix whose configuration needs to copied.
      */
-    static std::unique_ptr<BatchDense> create_with_config_of(
-        const BatchDense* other)
+    static std::unique_ptr<BatchVector> create_with_config_of(
+        const BatchVector* other)
     {
         // De-referencing `other` before calling the functions (instead of
         // using operator `->`) is currently required to be compatible with
@@ -125,20 +109,12 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
         return (*other).create_with_same_config();
     }
 
-    friend class BatchDense<next_precision<ValueType>>;
+    friend class BatchVector<next_precision<ValueType>>;
 
     void convert_to(
-        BatchDense<next_precision<ValueType>>* result) const override;
-
-    void move_to(BatchDense<next_precision<ValueType>>* result) override;
+        BatchVector<next_precision<ValueType>>* result) const override;
 
-    void convert_to(BatchCsr<ValueType, index_type>* result) const override;
-
-    void move_to(BatchCsr<ValueType, index_type>* result) override;
-
-    void convert_to(BatchDiagonal<ValueType>* result) const override;
-
-    void move_to(BatchDiagonal<ValueType>* result) override;
+    void move_to(BatchVector<next_precision<ValueType>>* result) override;
 
     void read(const std::vector<mat_data>& data) override;
 
@@ -148,10 +124,6 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
 
     void write(std::vector<mat_data32>& data) const override;
 
-    std::unique_ptr<BatchLinOp> transpose() const override;
-
-    std::unique_ptr<BatchLinOp> conj_transpose() const override;
-
     /**
      * Unbatches the batched dense and creates a std::vector of Dense matrices
      *
@@ -174,14 +146,14 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
     }
 
     /**
-     * Returns a pointer to the array of values of the matrix.
+     * Returns a pointer to the array of values of the vector.
      *
      * @return the pointer to the array of values
      */
     value_type* get_values() noexcept { return values_.get_data(); }
 
     /**
-     * Returns a pointer to the array of values of the matrix.
+     * Returns a pointer to the array of values of the vector.
      *
      * @return the pointer to the array of values
      */
@@ -218,18 +190,11 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
                num_elems_per_batch_cumul_.get_const_data()[batch];
     }
 
-    /**
-     * Returns the batch_stride of the matrix.
-     *
-     * @return the batch_stride of the matrix.
-     */
-    const batch_stride& get_stride() const noexcept { return stride_; }
-
     /**
      * Returns the number of elements explicitly stored in the batch matrix,
      * cumulative across all the batches.
      *
-     * @return the number of elements explicitly stored in the matrix,
+     * @return the number of elements explicitly stored in the vector,
      *         cumulative across all the batches
      */
     size_type get_num_stored_elements() const noexcept
@@ -243,7 +208,7 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
      *
      * @param batch  the batch index to be queried
      *
-     * @return the number of elements explicitly stored in the matrix
+     * @return the number of elements explicitly stored in the vector
      */
     size_type get_num_stored_elements(size_type batch) const noexcept
     {
@@ -259,7 +224,7 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
      * @param row  the row of the requested element
      * @param col  the column of the requested element
      *
-     * @note  the method has to be called on the same Executor the matrix is
+     * @note  the method has to be called on the same Executor the vector is
      *        stored at (e.g. trying to call this method on a GPU matrix from
      *        the OMP results in a runtime error)
      */
@@ -270,7 +235,7 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
     }
 
     /**
-     * @copydoc BatchDense::at(size_type, size_type, size_type)
+     * @copydoc BatchVector::at(size_type, size_type, size_type)
      */
     value_type at(size_type batch, size_type row, size_type col) const noexcept
     {
@@ -281,7 +246,7 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
     /**
      * Returns a single element for a particular batch entry.
      *
-     * Useful for iterating across all elements of the matrix.
+     * Useful for iterating across all elements of the vector.
      * However, it is less efficient than the two-parameter variant of this
      * method.
      *
@@ -289,7 +254,7 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
      * @param idx  a linear index of the requested element
      *             (ignoring the stride)
      *
-     * @note  the method has to be called on the same Executor the matrix is
+     * @note  the method has to be called on the same Executor the vector is
      *        stored at (e.g. trying to call this method on a GPU matrix from
      *        the OMP results in a runtime error)
      */
@@ -299,7 +264,7 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
     }
 
     /**
-     * @copydoc BatchDense::at(size_type, size_type, size_type)
+     * @copydoc BatchVector::at(size_type, size_type, size_type)
      */
     ValueType at(size_type batch, size_type idx) const noexcept
     {
@@ -307,11 +272,11 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
     }
 
     /**
-     * Scales the matrix with a scalar (aka: BLAS scal).
+     * Scales the vector with a scalar (aka: BLAS scal).
      *
-     * @param alpha  If alpha is 1x1 BatchDense matrix, the entire matrix (all
-     * batches) is scaled by alpha. If it is a BatchDense row vector of values,
-     * then i-th column of the matrix is scaled with the i-th element of alpha
+     * @param alpha  If alpha is 1x1 BatchVector matrix, the entire matrix (all
+     * batches) is scaled by alpha. If it is a BatchVector row vector of values,
+     * then i-th column of the vector is scaled with the i-th element of alpha
      * (the number of columns of alpha has to match the number of columns of the
      * matrix).
      */
@@ -322,12 +287,12 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
     }
 
     /**
-     * Adds `b` scaled by `alpha` to the matrix (aka: BLAS axpy).
+     * Adds `b` scaled by `alpha` to the vector (aka: BLAS axpy).
      *
-     * @param alpha  If alpha is 1x1 BatchDense matrix, the entire matrix is
-     * scaled by alpha. If it is a BatchDense row vector of values, then i-th
-     * column of the matrix is scaled with the i-th element of alpha (the number
-     * of columns of alpha has to match the number of columns of the matrix).
+     * @param alpha  If alpha is 1x1 BatchVector matrix, the entire matrix is
+     * scaled by alpha. If it is a BatchVector row vector of values, then i-th
+     * column of the vector is scaled with the i-th element of alpha (the number
+     * of columns of alpha has to match the number of columns of the vector).
      * @param b  a matrix of the same dimension as this
      */
     void add_scaled(const BatchLinOp* alpha, const BatchLinOp* b)
@@ -338,11 +303,11 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
     }
 
     /**
-     * Adds `a` scaled by `alpha` to the matrix scaled by `beta`:
+     * Adds `a` scaled by `alpha` to the vector scaled by `beta`:
      * this <- alpha * a + beta * this.
      *
-     * @param alpha  If alpha is 1x1 BatchDense matrix, the entire matrix a is
-     *               scaled by alpha. If it is a BatchDense row vector of
+     * @param alpha  If alpha is 1x1 BatchVector matrix, the entire matrix a is
+     *               scaled by alpha. If it is a BatchVector row vector of
      *               values, then i-th column of a is scaled with the i-th
      *               element of alpha (the number of columns of alpha has to
      *               match the number of columns of a).
@@ -355,11 +320,11 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
 
     /**
      * Computes the column-wise dot product of each matrix in this batch and its
-     * corresponding entry in `b`. If the matrix has complex value_type, then
+     * corresponding entry in `b`. If the vector has complex value_type, then
      * the conjugate of this is taken.
      *
-     * @param b  a BatchDense matrix of same dimension as this
-     * @param result  a BatchDense row vector, used to store the dot product
+     * @param b  a BatchVector matrix of same dimension as this
+     * @param result  a BatchVector row vector, used to store the dot product
      *                (the number of column in the vector must match the number
      *                of columns of this)
      */
@@ -373,7 +338,7 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
     /**
      * Computes the Euclidean (L^2) norm of each matrix in this batch.
      *
-     * @param result  a BatchDense row vector, used to store the norm
+     * @param result  a BatchVector row vector, used to store the norm
      *                (the number of columns in the vector must match the number
      *                of columns of this)
      */
@@ -386,22 +351,22 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
     /**
      * Creates a constant (immutable) batch dense matrix from a constant array.
      *
-     * @param exec  the executor to create the matrix on
-     * @param size  the dimensions of the matrix
-     * @param values  the value array of the matrix
-     * @param stride  the row-stride of the matrix
+     * @param exec  the executor to create the vector on
+     * @param size  the dimensions of the vector
+     * @param values  the value array of the vector
+     * @param stride  the row-stride of the vector
      * @returns A smart pointer to the constant matrix wrapping the input array
-     *          (if it resides on the same executor as the matrix) or a copy of
+     *          (if it resides on the same executor as the vector) or a copy of
      *          the array on the correct executor.
      */
-    static std::unique_ptr<const BatchDense> create_const(
+    static std::unique_ptr<const BatchVector> create_const(
         std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
         gko::detail::const_array_view<ValueType>&& values,
         const batch_stride& strides)
     {
         // cast const-ness away, but return a const object afterwards,
         // so we can ensure that no modifications take place.
-        return std::unique_ptr<const BatchDense>(new BatchDense{
+        return std::unique_ptr<const BatchVector>(new BatchVector{
             exec, sizes, gko::detail::array_const_cast(std::move(values)),
             strides});
     }
@@ -489,30 +454,31 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
 
 protected:
     /**
-     * Creates an uninitialized BatchDense matrix of the specified size.
+     * Creates an uninitialized BatchVector matrix of the specified size.
      *
-     * @param exec  Executor associated to the matrix
-     * @param size  size of the matrix
+     * @param exec  Executor associated to the vector
+     * @param size  size of the vector
      */
-    BatchDense(std::shared_ptr<const Executor> exec,
-               const batch_dim<2>& size = batch_dim<2>{})
-        : BatchDense(std::move(exec), size,
-                     size.get_num_batch_entries() > 0 ? extract_nth_dim(1, size)
-                                                      : batch_stride{})
+    BatchVector(std::shared_ptr<const Executor> exec,
+                const batch_dim<2>& size = batch_dim<2>{})
+        : BatchVector(std::move(exec), size,
+                      size.get_num_batch_entries() > 0
+                          ? extract_nth_dim(1, size)
+                          : batch_stride{})
     {}
 
     /**
-     * Creates an uninitialized BatchDense matrix of the specified size.
+     * Creates an uninitialized BatchVector matrix of the specified size.
      *
-     * @param exec  Executor associated to the matrix
+     * @param exec  Executor associated to the vector
      * @param size  size of the batch matrices in a batch_dim object
      * @param stride  stride of the rows (i.e. offset between the first
      *                  elements of two consecutive rows, expressed as the
      *                  number of matrix elements)
      */
-    BatchDense(std::shared_ptr<const Executor> exec, const batch_dim<2>& size,
-               const batch_stride& stride)
-        : EnableBatchLinOp<BatchDense>(exec, size),
+    BatchVector(std::shared_ptr<const Executor> exec, const batch_dim<2>& size,
+                const batch_stride& stride)
+        : EnableBatchLinOp<BatchVector>(exec, size),
           values_(exec, compute_batch_mem(size, stride)),
           stride_(stride)
     {
@@ -521,12 +487,12 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
     }
 
     /**
-     * Creates a BatchDense matrix from an already allocated (and initialized)
+     * Creates a BatchVector matrix from an already allocated (and initialized)
      * array.
      *
      * @tparam ValuesArray  type of array of values
      *
-     * @param exec  Executor associated to the matrix
+     * @param exec  Executor associated to the vector
      * @param size  sizes of the batch matrices in a batch_dim object
      * @param values  array of matrix values
      * @param strides  stride of the rows (i.e. offset between the first
@@ -535,12 +501,12 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
      *
      * @note If `values` is not an rvalue, not an array of ValueType, or is on
      *       the wrong executor, an internal copy will be created, and the
-     *       original array data will not be used in the matrix.
+     *       original array data will not be used in the vector.
      */
     template <typename ValuesArray>
-    BatchDense(std::shared_ptr<const Executor> exec, const batch_dim<2>& size,
-               ValuesArray&& values, const batch_stride& stride)
-        : EnableBatchLinOp<BatchDense>(exec, size),
+    BatchVector(std::shared_ptr<const Executor> exec, const batch_dim<2>& size,
+                ValuesArray&& values, const batch_stride& stride)
+        : EnableBatchLinOp<BatchVector>(exec, size),
           values_{exec, std::forward<ValuesArray>(values)},
           stride_{stride},
           num_elems_per_batch_cumul_(
@@ -557,14 +523,14 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
     }
 
     /**
-     * Creates a BatchDense matrix from a vector of matrices
+     * Creates a BatchVector matrix from a vector of matrices
      *
-     * @param exec  Executor associated to the matrix
+     * @param exec  Executor associated to the vector
      * @param matrices  The matrices that need to be batched.
      */
-    BatchDense(std::shared_ptr<const Executor> exec,
-               const std::vector<Dense<ValueType>*>& matrices)
-        : EnableBatchLinOp<BatchDense>(exec, get_sizes_from_mtxs(matrices)),
+    BatchVector(std::shared_ptr<const Executor> exec,
+                const std::vector<Dense<ValueType>*>& matrices)
+        : EnableBatchLinOp<BatchVector>(exec, get_sizes_from_mtxs(matrices)),
           stride_{get_strides_from_mtxs(matrices)},
           values_(exec, compute_batch_mem(this->get_size(), stride_))
     {
@@ -581,15 +547,16 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
     }
 
     /**
-     * Creates a BatchDense matrix by duplicating BatchDense matrix
+     * Creates a BatchVector matrix by duplicating BatchVector matrix
      *
-     * @param exec  Executor associated to the matrix
+     * @param exec  Executor associated to the vector
      * @param num_duplications  The number of times to duplicate
-     * @param input  The matrix to be duplicated.
+     * @param input  the vector to be duplicated.
      */
-    BatchDense(std::shared_ptr<const Executor> exec, size_type num_duplications,
-               const BatchDense<value_type>* input)
-        : EnableBatchLinOp<BatchDense>(
+    BatchVector(std::shared_ptr<const Executor> exec,
+                size_type num_duplications,
+                const BatchVector<value_type>* input)
+        : EnableBatchLinOp<BatchVector>(
               exec, gko::batch_dim<2>(
                         input->get_num_batch_entries() * num_duplications,
                         input->get_size().at(0))),
@@ -611,15 +578,15 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
     }
 
     /**
-     * Creates a BatchDense matrix by duplicating Dense matrix
+     * Creates a BatchVector matrix by duplicating Dense matrix
      *
-     * @param exec  Executor associated to the matrix
+     * @param exec  Executor associated to the vector
      * @param num_duplications  The number of times to duplicate
-     * @param input  The matrix to be duplicated.
+     * @param input  the vector to be duplicated.
      */
-    BatchDense(std::shared_ptr<const Executor> exec, size_type num_duplications,
-               const Dense<value_type>* input)
-        : EnableBatchLinOp<BatchDense>(
+    BatchVector(std::shared_ptr<const Executor> exec,
+                size_type num_duplications, const Dense<value_type>* input)
+        : EnableBatchLinOp<BatchVector>(
               exec, gko::batch_dim<2>(num_duplications, input->get_size())),
           stride_{gko::batch_stride(num_duplications, input->get_stride())},
           values_(exec, compute_batch_mem(this->get_size(), stride_))
@@ -637,30 +604,30 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
     }
 
     /**
-     * Creates a BatchDense matrix with the same configuration as the callers
+     * Creates a BatchVector matrix with the same configuration as the callers
      * matrix.
      *
-     * @returns a BatchDense matrix with the same configuration as the caller.
+     * @returns a BatchVector matrix with the same configuration as the caller.
      */
-    virtual std::unique_ptr<BatchDense> create_with_same_config() const
+    virtual std::unique_ptr<BatchVector> create_with_same_config() const
     {
-        return BatchDense::create(this->get_executor(), this->get_size(),
-                                  this->get_stride());
+        return BatchVector::create(this->get_executor(), this->get_size(),
+                                   this->get_stride());
     }
 
     /**
      * @copydoc scale(const BatchLinOp *)
      *
-     * @note  Other implementations of batch_dense should override this function
-     *        instead of scale(const BatchLinOp *alpha).
+     * @note  Other implementations of batch_vector should override this
+     * function instead of scale(const BatchLinOp *alpha).
      */
     virtual void scale_impl(const BatchLinOp* alpha);
 
     /**
      * @copydoc add_scaled(const BatchLinOp *, const BatchLinOp *)
      *
-     * @note  Other implementations of batch_dense should override this function
-     *        instead of add_scale(const BatchLinOp *alpha, const BatchLinOp
+     * @note  Other implementations of batch_vector should override this
+     * function instead of add_scale(const BatchLinOp *alpha, const BatchLinOp
      * *b).
      */
     virtual void add_scaled_impl(const BatchLinOp* alpha, const BatchLinOp* b);
@@ -668,8 +635,8 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
     /**
      * @copydoc compute_dot(const BatchLinOp *, BatchLinOp *) const
      *
-     * @note  Other implementations of batch_dense should override this function
-     *        instead of compute_dot(const BatchLinOp *b, BatchLinOp *result).
+     * @note  Other implementations of batch_vector should override this
+     * function instead of compute_dot(const BatchLinOp *b, BatchLinOp *result).
      */
     virtual void compute_dot_impl(const BatchLinOp* b,
                                   BatchLinOp* result) const;
@@ -677,16 +644,11 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
     /**
      * @copydoc compute_norm2(BatchLinOp *) const
      *
-     * @note  Other implementations of batch_dense should override this function
-     *        instead of compute_norm2(BatchLinOp *result).
+     * @note  Other implementations of batch_vector should override this
+     * function instead of compute_norm2(BatchLinOp *result).
      */
     virtual void compute_norm2_impl(BatchLinOp* result) const;
 
-    void apply_impl(const BatchLinOp* b, BatchLinOp* x) const override;
-
-    void apply_impl(const BatchLinOp* alpha, const BatchLinOp* b,
-                    const BatchLinOp* beta, BatchLinOp* x) const override;
-
     size_type linearize_index(size_type batch, size_type row,
                               size_type col) const noexcept
     {
@@ -704,9 +666,6 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
     batch_stride stride_;
     array<size_type> num_elems_per_batch_cumul_;
     array<value_type> values_;
-
-    void add_scaled_identity_impl(const BatchLinOp* a,
-                                  const BatchLinOp* b) override;
 };
 
 
@@ -717,7 +676,7 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
  * Creates and initializes a batch of column-vectors.
  *
  * This function first creates a temporary Dense matrix, fills it with passed in
- * values, and then converts the matrix to the requested type.
+ * values, and then converts the vector to the requested type.
  *
  * @tparam Matrix  matrix type to initialize
  *                 (Dense has to implement the ConvertibleTo<Matrix> interface)
@@ -741,7 +700,7 @@ std::unique_ptr<Matrix> batch_initialize(
         vals,
     std::shared_ptr<const Executor> exec, TArgs&&... create_args)
 {
-    using batch_dense = matrix::BatchDense<typename Matrix::value_type>;
+    using batch_vector = matrix::BatchVector<typename Matrix::value_type>;
     size_type num_batch_entries = vals.size();
     std::vector<size_type> num_rows(num_batch_entries);
     std::vector<dim<2>> sizes(num_batch_entries);
@@ -753,7 +712,7 @@ std::unique_ptr<Matrix> batch_initialize(
     }
     auto b_size = batch_dim<2>(sizes);
     auto b_stride = batch_stride(stride);
-    auto tmp = batch_dense::create(exec->get_master(), b_size, b_stride);
+    auto tmp = batch_vector::create(exec->get_master(), b_size, b_stride);
     size_type batch = 0;
     for (const auto& b : vals) {
         size_type idx = 0;
@@ -772,7 +731,7 @@ std::unique_ptr<Matrix> batch_initialize(
  * Creates and initializes a batch of column-vectors.
  *
  * This function first creates a temporary Dense matrix, fills it with passed in
- * values, and then converts the matrix to the requested type. The stride of
+ * values, and then converts the vector to the requested type. The stride of
  * the intermediate Dense matrix is set to 1.
  *
  * @tparam Matrix  matrix type to initialize
@@ -805,7 +764,7 @@ std::unique_ptr<Matrix> batch_initialize(
  * Creates and initializes a batch of matrices.
  *
  * This function first creates a temporary Dense matrix, fills it with passed in
- * values, and then converts the matrix to the requested type.
+ * values, and then converts the vector to the requested type.
  *
  * @tparam Matrix  matrix type to initialize
  *                 (Dense has to implement the ConvertibleTo<Matrix> interface)
@@ -813,8 +772,8 @@ std::unique_ptr<Matrix> batch_initialize(
  *                (not including the implied Executor as the first argument)
  *
  * @param stride  row stride for the temporary Dense matrix
- * @param vals  values used to initialize the matrix
- * @param exec  Executor associated to the matrix
+ * @param vals  values used to initialize the vector
+ * @param exec  Executor associated to the vector
  * @param create_args  additional arguments passed to Matrix::create, not
  *                     including the Executor, which is passed as the first
  *                     argument
@@ -830,7 +789,7 @@ std::unique_ptr<Matrix> batch_initialize(
         vals,
     std::shared_ptr<const Executor> exec, TArgs&&... create_args)
 {
-    using batch_dense = matrix::BatchDense<typename Matrix::value_type>;
+    using batch_vector = matrix::BatchVector<typename Matrix::value_type>;
     size_type num_batch_entries = vals.size();
     std::vector<size_type> num_rows(num_batch_entries);
     std::vector<size_type> num_cols(num_batch_entries);
@@ -844,7 +803,7 @@ std::unique_ptr<Matrix> batch_initialize(
     }
     auto b_size = batch_dim<2>(sizes);
     auto b_stride = batch_stride(stride);
-    auto tmp = batch_dense::create(exec->get_master(), b_size, b_stride);
+    auto tmp = batch_vector::create(exec->get_master(), b_size, b_stride);
     size_type batch = 0;
     for (const auto& b : vals) {
         size_type ridx = 0;
@@ -868,7 +827,7 @@ std::unique_ptr<Matrix> batch_initialize(
  * Creates and initializes a batch of matrices.
  *
  * This function first creates a temporary Dense matrix, fills it with passed in
- * values, and then converts the matrix to the requested type. The stride of
+ * values, and then converts the vector to the requested type. The stride of
  * the intermediate Dense matrix is set to the number of columns of the
  * initializer list.
  *
@@ -877,8 +836,8 @@ std::unique_ptr<Matrix> batch_initialize(
  * @tparam TArgs  argument types for Matrix::create method
  *                (not including the implied Executor as the first argument)
  *
- * @param vals  values used to initialize the matrix
- * @param exec  Executor associated to the matrix
+ * @param vals  values used to initialize the vector
+ * @param exec  Executor associated to the vector
  * @param create_args  additional arguments passed to Matrix::create, not
  *                     including the Executor, which is passed as the first
  *                     argument
@@ -909,7 +868,7 @@ std::unique_ptr<Matrix> batch_initialize(
  * input column vector.
  *
  * This function first creates a temporary batch dense matrix, fills it with
- * passed in values, and then converts the matrix to the requested type.
+ * passed in values, and then converts the vector to the requested type.
  *
  * @tparam Matrix  matrix type to initialize
  *                 (Dense has to implement the ConvertibleTo<Matrix>
@@ -935,7 +894,7 @@ std::unique_ptr<Matrix> batch_initialize(
     std::initializer_list<typename Matrix::value_type> vals,
     std::shared_ptr<const Executor> exec, TArgs&&... create_args)
 {
-    using batch_dense = matrix::BatchDense<typename Matrix::value_type>;
+    using batch_vector = matrix::BatchVector<typename Matrix::value_type>;
     std::vector<size_type> num_rows(num_vectors);
     std::vector<dim<2>> sizes(num_vectors);
     for (size_type b = 0; b < num_vectors; ++b) {
@@ -944,7 +903,7 @@ std::unique_ptr<Matrix> batch_initialize(
     }
     auto b_size = batch_dim<2>(sizes);
     auto b_stride = batch_stride(stride);
-    auto tmp = batch_dense::create(exec->get_master(), b_size, b_stride);
+    auto tmp = batch_vector::create(exec->get_master(), b_size, b_stride);
     for (size_type batch = 0; batch < num_vectors; batch++) {
         size_type idx = 0;
         for (const auto& elem : vals) {
@@ -962,7 +921,7 @@ std::unique_ptr<Matrix> batch_initialize(
  * Creates and initializes a column-vector from copies of a given vector.
  *
  * This function first creates a temporary Dense matrix, fills it with passed
- * in values, and then converts the matrix to the requested type. The stride of
+ * in values, and then converts the vector to the requested type. The stride of
  * the intermediate Dense matrix is set to 1.
  *
  * @tparam Matrix  matrix type to initialize
@@ -997,7 +956,7 @@ std::unique_ptr<Matrix> batch_initialize(
  * Creates and initializes a matrix from copies of a given matrix.
  *
  * This function first creates a temporary batch dense matrix, fills it with
- * passed in values, and then converts the matrix to the requested type.
+ * passed in values, and then converts the vector to the requested type.
  *
  * @tparam Matrix  matrix type to initialize
  *                 (Dense has to implement the ConvertibleTo<Matrix> interface)
@@ -1023,7 +982,7 @@ std::unique_ptr<Matrix> batch_initialize(
         vals,
     std::shared_ptr<const Executor> exec, TArgs&&... create_args)
 {
-    using batch_dense = matrix::BatchDense<typename Matrix::value_type>;
+    using batch_vector = matrix::BatchVector<typename Matrix::value_type>;
     std::vector<dim<2>> sizes(num_matrices);
     const size_type num_rows = vals.size();
     for (size_type b = 0; b < num_matrices; ++b) {
@@ -1033,7 +992,7 @@ std::unique_ptr<Matrix> batch_initialize(
             GKO_ASSERT(blockit->size() == num_cols);
         }
     }
-    auto tmp = batch_dense::create(exec->get_master(), sizes, stride);
+    auto tmp = batch_vector::create(exec->get_master(), sizes, stride);
     for (size_type batch = 0; batch < num_matrices; batch++) {
         size_type ridx = 0;
         for (const auto& row : vals) {
@@ -1054,7 +1013,7 @@ std::unique_ptr<Matrix> batch_initialize(
  * Creates and initializes a matrix from copies of a given matrix.
  *
  * This function first creates a temporary Dense matrix, fills it with passed in
- * values, and then converts the matrix to the requested type. The stride of
+ * values, and then converts the vector to the requested type. The stride of
  * the intermediate Dense matrix is set to 1.
  *
  * @tparam Matrix  matrix type to initialize
@@ -1090,4 +1049,4 @@ std::unique_ptr<Matrix> batch_initialize(
 }  // namespace gko
 
 
-#endif  // GKO_PUBLIC_CORE_MATRIX_BATCH_DENSE_HPP_
+#endif  // GKO_PUBLIC_CORE_MATRIX_BATCH_VECTOR_HPP_
diff --git a/omp/matrix/batch_vector_kernels.cpp b/omp/matrix/batch_vector_kernels.cpp
index 70c0794f4a8..7ade2fcca23 100644
--- a/omp/matrix/batch_vector_kernels.cpp
+++ b/omp/matrix/batch_vector_kernels.cpp
@@ -30,7 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include "core/matrix/batch_dense_kernels.hpp"
+#include "core/matrix/batch_vector_kernels.hpp"
 
 
 #include <algorithm>
@@ -39,8 +39,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/array.hpp>
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/base/range_accessors.hpp>
-#include <ginkgo/core/matrix/batch_csr.hpp>
-#include <ginkgo/core/matrix/batch_diagonal.hpp>
 
 
 #include "core/components/prefix_sum_kernels.hpp"
@@ -51,70 +49,20 @@ namespace gko {
 namespace kernels {
 namespace omp {
 /**
- * @brief The BatchDense matrix format namespace.
- * @ref BatchDense
- * @ingroup batch_dense
+ * @brief The BatchVector matrix format namespace.
+ * @ref BatchVector
+ * @ingroup batch_vector
  */
-namespace batch_dense {
+namespace batch_vector {
 
 
-#include "reference/matrix/batch_dense_kernels.hpp.inc"
-
-
-template <typename ValueType>
-void simple_apply(std::shared_ptr<const OmpExecutor> exec,
-                  const matrix::BatchDense<ValueType>* const a,
-                  const matrix::BatchDense<ValueType>* const b,
-                  matrix::BatchDense<ValueType>* const c)
-{
-    const auto a_ub = host::get_batch_struct(a);
-    const auto b_ub = host::get_batch_struct(b);
-    const auto c_ub = host::get_batch_struct(c);
-#pragma omp parallel for
-    for (size_type batch = 0; batch < c->get_num_batch_entries(); ++batch) {
-        const auto a_b = gko::batch::batch_entry(a_ub, batch);
-        const auto b_b = gko::batch::batch_entry(b_ub, batch);
-        const auto c_b = gko::batch::batch_entry(c_ub, batch);
-        matvec_kernel(a_b, b_b, c_b);
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL);
-
-
-template <typename ValueType>
-void apply(std::shared_ptr<const OmpExecutor> exec,
-           const matrix::BatchDense<ValueType>* const alpha,
-           const matrix::BatchDense<ValueType>* const a,
-           const matrix::BatchDense<ValueType>* const b,
-           const matrix::BatchDense<ValueType>* const beta,
-           matrix::BatchDense<ValueType>* const c)
-{
-    const auto a_ub = host::get_batch_struct(a);
-    const auto b_ub = host::get_batch_struct(b);
-    const auto c_ub = host::get_batch_struct(c);
-    const auto alpha_ub = host::get_batch_struct(alpha);
-    const auto beta_ub = host::get_batch_struct(beta);
-#pragma omp parallel for
-    for (size_type batch = 0; batch < c->get_num_batch_entries(); ++batch) {
-        const auto a_b = gko::batch::batch_entry(a_ub, batch);
-        const auto b_b = gko::batch::batch_entry(b_ub, batch);
-        const auto c_b = gko::batch::batch_entry(c_ub, batch);
-        const auto alpha_b = gko::batch::batch_entry(alpha_ub, batch);
-        const auto beta_b = gko::batch::batch_entry(beta_ub, batch);
-        advanced_matvec_kernel(alpha_b.values[0], a_b, b_b, beta_b.values[0],
-                               c_b);
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_APPLY_KERNEL);
+#include "reference/matrix/batch_vector_kernels.hpp.inc"
 
 
 template <typename ValueType>
 void scale(std::shared_ptr<const OmpExecutor> exec,
-           const matrix::BatchDense<ValueType>* const alpha,
-           matrix::BatchDense<ValueType>* const x)
+           const matrix::BatchVector<ValueType>* const alpha,
+           matrix::BatchVector<ValueType>* const x)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto alpha_ub = host::get_batch_struct(alpha);
@@ -126,14 +74,14 @@ void scale(std::shared_ptr<const OmpExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_SCALE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_SCALE_KERNEL);
 
 
 template <typename ValueType>
 void add_scaled(std::shared_ptr<const OmpExecutor> exec,
-                const matrix::BatchDense<ValueType>* const alpha,
-                const matrix::BatchDense<ValueType>* const x,
-                matrix::BatchDense<ValueType>* const y)
+                const matrix::BatchVector<ValueType>* const alpha,
+                const matrix::BatchVector<ValueType>* const x,
+                matrix::BatchVector<ValueType>* const y)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto y_ub = host::get_batch_struct(y);
@@ -147,71 +95,14 @@ void add_scaled(std::shared_ptr<const OmpExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_ADD_SCALED_KERNEL);
-
-
-template <typename ValueType>
-void add_scale(std::shared_ptr<const DefaultExecutor> exec,
-               const matrix::BatchDense<ValueType>* const alpha,
-               const matrix::BatchDense<ValueType>* const x,
-               const matrix::BatchDense<ValueType>* const beta,
-               matrix::BatchDense<ValueType>* const y)
-{
-    const auto x_ub = host::get_batch_struct(x);
-    const auto y_ub = host::get_batch_struct(y);
-    const auto alpha_ub = host::get_batch_struct(alpha);
-    const auto beta_ub = host::get_batch_struct(beta);
-#pragma omp parallel for
-    for (size_type batch = 0; batch < y->get_num_batch_entries(); ++batch) {
-        const auto alpha_b = gko::batch::batch_entry(alpha_ub, batch);
-        const auto beta_b = gko::batch::batch_entry(beta_ub, batch);
-        const auto x_b = gko::batch::batch_entry(x_ub, batch);
-        const auto y_b = gko::batch::batch_entry(y_ub, batch);
-        add_scale(alpha_b, x_b, beta_b, y_b);
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_ADD_SCALE_KERNEL);
-
-
-template <typename ValueType>
-void convergence_add_scaled(std::shared_ptr<const OmpExecutor> exec,
-                            const matrix::BatchDense<ValueType>* const alpha,
-                            const matrix::BatchDense<ValueType>* const x,
-                            matrix::BatchDense<ValueType>* const y,
-                            const uint32& converged)
-{
-    const auto x_ub = host::get_batch_struct(x);
-    const auto y_ub = host::get_batch_struct(y);
-    const auto alpha_ub = host::get_batch_struct(alpha);
-#pragma omp parallel for
-    for (size_type batch = 0; batch < y->get_num_batch_entries(); ++batch) {
-        const auto alpha_b = gko::batch::batch_entry(alpha_ub, batch);
-        const auto x_b = gko::batch::batch_entry(x_ub, batch);
-        const auto y_b = gko::batch::batch_entry(y_ub, batch);
-        add_scaled(alpha_b, x_b, y_b, converged);
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_ADD_SCALED_KERNEL);
-
-
-template <typename ValueType>
-void add_scaled_diag(std::shared_ptr<const OmpExecutor>,
-                     const matrix::BatchDense<ValueType>*,
-                     const matrix::Diagonal<ValueType>*,
-                     matrix::BatchDense<ValueType>*) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_ADD_SCALED_DIAG_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_ADD_SCALED_KERNEL);
 
 
 template <typename ValueType>
 void compute_dot(std::shared_ptr<const OmpExecutor> exec,
-                 const matrix::BatchDense<ValueType>* const x,
-                 const matrix::BatchDense<ValueType>* const y,
-                 matrix::BatchDense<ValueType>* const result)
+                 const matrix::BatchVector<ValueType>* const x,
+                 const matrix::BatchVector<ValueType>* const y,
+                 matrix::BatchVector<ValueType>* const result)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto y_ub = host::get_batch_struct(y);
@@ -226,37 +117,14 @@ void compute_dot(std::shared_ptr<const OmpExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_COMPUTE_DOT_KERNEL);
-
-
-template <typename ValueType>
-void convergence_compute_dot(std::shared_ptr<const OmpExecutor> exec,
-                             const matrix::BatchDense<ValueType>* const x,
-                             const matrix::BatchDense<ValueType>* const y,
-                             matrix::BatchDense<ValueType>* const result,
-                             const uint32& converged)
-{
-    const auto x_ub = host::get_batch_struct(x);
-    const auto y_ub = host::get_batch_struct(y);
-    const auto res_ub = host::get_batch_struct(result);
-#pragma omp parallel for
-    for (size_type batch = 0; batch < result->get_num_batch_entries();
-         ++batch) {
-        const auto res_b = gko::batch::batch_entry(res_ub, batch);
-        const auto x_b = gko::batch::batch_entry(x_ub, batch);
-        const auto y_b = gko::batch::batch_entry(y_ub, batch);
-        compute_dot_product(x_b, y_b, res_b, converged);
-    }
-}
-
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COMPUTE_DOT_KERNEL);
+    GKO_DECLARE_BATCH_VECTOR_COMPUTE_DOT_KERNEL);
 
 
 template <typename ValueType>
 void compute_norm2(std::shared_ptr<const OmpExecutor> exec,
-                   const matrix::BatchDense<ValueType>* const x,
-                   matrix::BatchDense<remove_complex<ValueType>>* const result)
+                   const matrix::BatchVector<ValueType>* const x,
+                   matrix::BatchVector<remove_complex<ValueType>>* const result)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto res_ub = host::get_batch_struct(result);
@@ -270,261 +138,13 @@ void compute_norm2(std::shared_ptr<const OmpExecutor> exec,
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_COMPUTE_NORM2_KERNEL);
-
-
-template <typename ValueType>
-void convergence_compute_norm2(
-    std::shared_ptr<const OmpExecutor> exec,
-    const matrix::BatchDense<ValueType>* const x,
-    matrix::BatchDense<remove_complex<ValueType>>* const result,
-    const uint32& converged)
-{
-    const auto x_ub = host::get_batch_struct(x);
-    const auto res_ub = host::get_batch_struct(result);
-#pragma omp parallel for
-    for (size_type batch = 0; batch < result->get_num_batch_entries();
-         ++batch) {
-        const auto res_b = gko::batch::batch_entry(res_ub, batch);
-        const auto x_b = gko::batch::batch_entry(x_ub, batch);
-        compute_norm2(x_b, res_b, converged);
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COMPUTE_NORM2_KERNEL);
-
-
-template <typename ValueType, typename IndexType>
-void convert_to_batch_csr(std::shared_ptr<const DefaultExecutor> exec,
-                          const matrix::BatchDense<ValueType>* const source,
-                          matrix::BatchCsr<ValueType, IndexType>* const result)
-{
-    GKO_ASSERT(source->get_size().stores_equal_sizes() == true);
-    auto num_rows = result->get_size().at(0)[0];
-    auto num_cols = result->get_size().at(0)[1];
-    auto num_batches = result->get_num_batch_entries();
-
-    auto row_ptrs = result->get_row_ptrs();
-    auto col_idxs = result->get_col_idxs();
-    auto values = result->get_values();
-
-
-#pragma omp parallel for
-    for (size_type row = 0; row < num_rows; ++row) {
-        IndexType row_nnz{};
-        for (size_type col = 0; col < num_cols; ++col) {
-            auto val = source->at(0, row, col);
-            row_nnz += static_cast<IndexType>(val != zero<ValueType>());
-        }
-        row_ptrs[row] = row_nnz;
-    }
-
-    components::prefix_sum(exec, row_ptrs, num_rows + 1);
-
-#pragma omp parallel for
-    for (size_type row = 0; row < num_rows; ++row) {
-        auto cur_ptr = row_ptrs[row];
-        for (size_type col = 0; col < num_cols; ++col) {
-            auto val = source->at(0, row, col);
-            if (val != zero<ValueType>()) {
-                col_idxs[cur_ptr] = static_cast<IndexType>(col);
-                ++cur_ptr;
-            }
-        }
-    }
-
-#pragma omp parallel for
-    for (size_type batch = 0; batch < num_batches; ++batch) {
-        size_type cur_ptr =
-            batch * row_ptrs[num_rows];  // as row_ptrs[num_rows] is the num of
-                                         // non zero elements in the matrix
-        for (size_type row = 0; row < num_rows; ++row) {
-            for (size_type col = 0; col < num_cols; ++col) {
-                auto val = source->at(batch, row, col);
-                if (val != zero<ValueType>()) {
-                    values[cur_ptr] = val;
-                    ++cur_ptr;
-                }
-            }
-        }
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE_AND_INT32_INDEX(
-    GKO_DECLARE_BATCH_DENSE_CONVERT_TO_BATCH_CSR_KERNEL);
-
-
-template <typename ValueType>
-void count_nonzeros(std::shared_ptr<const OmpExecutor> exec,
-                    const matrix::BatchDense<ValueType>* const source,
-                    size_type* const result)
-{
-#pragma omp parallel for
-    for (size_type batch = 0; batch < source->get_num_batch_entries();
-         ++batch) {
-        auto num_rows = source->get_size().at(batch)[0];
-        auto num_cols = source->get_size().at(batch)[1];
-        size_type num_nonzeros = 0;
-
-        for (size_type row = 0; row < num_rows; ++row) {
-            for (size_type col = 0; col < num_cols; ++col) {
-                num_nonzeros += static_cast<size_type>(
-                    source->at(batch, row, col) != zero<ValueType>());
-            }
-        }
-        result[batch] = num_nonzeros;
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_COUNT_NONZEROS_KERNEL);
-
-
-template <typename ValueType>
-void calculate_max_nnz_per_row(
-    std::shared_ptr<const OmpExecutor>,
-    const matrix::BatchDense<ValueType>* const source, size_type* const result)
-{
-#pragma omp parallel for
-    for (size_type batch = 0; batch < source->get_num_batch_entries();
-         ++batch) {
-        auto num_rows = source->get_size().at(batch)[0];
-        auto num_cols = source->get_size().at(batch)[1];
-        size_type num_stored_elements_per_row = 0;
-        size_type num_nonzeros = 0;
-
-        for (size_type row = 0; row < num_rows; ++row) {
-            num_nonzeros = 0;
-            for (size_type col = 0; col < num_cols; ++col) {
-                num_nonzeros += static_cast<size_type>(
-                    source->at(batch, row, col) != zero<ValueType>());
-            }
-            num_stored_elements_per_row =
-                std::max(num_nonzeros, num_stored_elements_per_row);
-        }
-        result[batch] = num_stored_elements_per_row;
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CALCULATE_MAX_NNZ_PER_ROW_KERNEL);
-
-
-template <typename ValueType>
-void calculate_nonzeros_per_row(
-    std::shared_ptr<const OmpExecutor>,
-    const matrix::BatchDense<ValueType>* const source,
-    array<size_type>* const result)
-{
-    size_type cumul_prev_rows = 0;
-    for (size_type batch = 0; batch < source->get_num_batch_entries();
-         ++batch) {
-        auto num_rows = source->get_size().at(batch)[0];
-        auto num_cols = source->get_size().at(batch)[1];
-        auto row_nnz_val = result->get_data() + cumul_prev_rows;
-
-#pragma omp parallel for reduction(+ : cumul_prev_rows)
-        for (size_type row = 0; row < num_rows; ++row) {
-            size_type num_nonzeros = 0;
-
-            for (size_type col = 0; col < num_cols; ++col) {
-                num_nonzeros += static_cast<size_type>(
-                    source->at(batch, row, col) != zero<ValueType>());
-            }
-            row_nnz_val[row] = num_nonzeros;
-            ++cumul_prev_rows;
-        }
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CALCULATE_NONZEROS_PER_ROW_KERNEL);
-
-
-template <typename ValueType>
-void calculate_total_cols(std::shared_ptr<const OmpExecutor>,
-                          const matrix::BatchDense<ValueType>* const source,
-                          size_type* const result,
-                          const size_type* const stride_factor,
-                          const size_type* const slice_size)
-{
-#pragma omp parallel for
-    for (size_type batch = 0; batch < source->get_num_batch_entries();
-         ++batch) {
-        auto num_rows = source->get_size().at(batch)[0];
-        auto num_cols = source->get_size().at(batch)[1];
-        auto slice_num = ceildiv(num_rows, slice_size[batch]);
-        size_type total_cols = 0;
-        size_type temp = 0;
-        size_type slice_temp = 0;
-
-        for (size_type slice = 0; slice < slice_num; slice++) {
-            slice_temp = 0;
-            for (size_type row = 0; row < slice_size[batch] &&
-                                    row + slice * slice_size[batch] < num_rows;
-                 row++) {
-                temp = 0;
-                for (size_type col = 0; col < num_cols; col++) {
-                    temp += static_cast<size_type>(
-                        source->at(batch, row + slice * slice_size[batch],
-                                   col) != zero<ValueType>());
-                }
-                slice_temp = (slice_temp < temp) ? temp : slice_temp;
-            }
-            slice_temp = ceildiv(slice_temp, stride_factor[batch]) *
-                         stride_factor[batch];
-            total_cols += slice_temp;
-        }
-        result[batch] = total_cols;
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CALCULATE_TOTAL_COLS_KERNEL);
-
-
-template <typename ValueType>
-void transpose(std::shared_ptr<const OmpExecutor>,
-               const matrix::BatchDense<ValueType>* const orig,
-               matrix::BatchDense<ValueType>* const trans)
-{
-#pragma omp parallel for
-    for (size_type batch = 0; batch < orig->get_num_batch_entries(); ++batch) {
-        for (size_type i = 0; i < orig->get_size().at(batch)[0]; ++i) {
-            for (size_type j = 0; j < orig->get_size().at(batch)[1]; ++j) {
-                trans->at(batch, j, i) = orig->at(batch, i, j);
-            }
-        }
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_TRANSPOSE_KERNEL);
-
-
-template <typename ValueType>
-void conj_transpose(std::shared_ptr<const OmpExecutor>,
-                    const matrix::BatchDense<ValueType>* const orig,
-                    matrix::BatchDense<ValueType>* const trans)
-{
-#pragma omp parallel for
-    for (size_type batch = 0; batch < orig->get_num_batch_entries(); ++batch) {
-        for (size_type i = 0; i < orig->get_size().at(batch)[0]; ++i) {
-            for (size_type j = 0; j < orig->get_size().at(batch)[1]; ++j) {
-                trans->at(batch, j, i) = conj(orig->at(batch, i, j));
-            }
-        }
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CONJ_TRANSPOSE_KERNEL);
+    GKO_DECLARE_BATCH_VECTOR_COMPUTE_NORM2_KERNEL);
 
 
 template <typename ValueType>
 void copy(std::shared_ptr<const DefaultExecutor> exec,
-          const matrix::BatchDense<ValueType>* x,
-          matrix::BatchDense<ValueType>* result)
+          const matrix::BatchVector<ValueType>* x,
+          matrix::BatchVector<ValueType>* result)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto result_ub = host::get_batch_struct(result);
@@ -536,79 +156,10 @@ void copy(std::shared_ptr<const DefaultExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_COPY_KERNEL);
-
-
-template <typename ValueType>
-void convergence_copy(std::shared_ptr<const DefaultExecutor> exec,
-                      const matrix::BatchDense<ValueType>* x,
-                      matrix::BatchDense<ValueType>* result,
-                      const uint32& converged)
-{
-    const auto x_ub = host::get_batch_struct(x);
-    const auto result_ub = host::get_batch_struct(result);
-#pragma omp parallel for
-    for (size_type batch = 0; batch < x->get_num_batch_entries(); ++batch) {
-        const auto result_b = gko::batch::batch_entry(result_ub, batch);
-        const auto x_b = gko::batch::batch_entry(x_ub, batch);
-        copy(x_b, result_b, converged);
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COPY_KERNEL);
-
-
-template <typename ValueType>
-void batch_scale(std::shared_ptr<const OmpExecutor> exec,
-                 const matrix::BatchDiagonal<ValueType>* const left,
-                 const matrix::BatchDiagonal<ValueType>* const rght,
-                 matrix::BatchDense<ValueType>* const vecs)
-{
-    const auto left_vals = left->get_const_values();
-    const auto rght_vals = rght->get_const_values();
-    const auto v_vals = vecs->get_values();
-    const auto nrows = static_cast<int>(vecs->get_size().at(0)[0]);
-    const auto ncols = static_cast<int>(vecs->get_size().at(0)[1]);
-    const auto vstride = vecs->get_stride().at(0);
-#pragma omp parallel for
-    for (size_type batch = 0; batch < vecs->get_num_batch_entries(); ++batch) {
-        const auto left_b =
-            gko::batch::batch_entry_ptr(left_vals, 1, nrows, batch);
-        const auto rght_b =
-            gko::batch::batch_entry_ptr(rght_vals, 1, ncols, batch);
-        const auto v_b =
-            gko::batch::batch_entry_ptr(v_vals, vstride, nrows, batch);
-        batch_scale(nrows, ncols, vstride, left_b, rght_b, v_b);
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_BATCH_SCALE_KERNEL);
-
-
-template <typename ValueType>
-void add_scaled_identity(std::shared_ptr<const OmpExecutor> exec,
-                         const matrix::BatchDense<ValueType>* const a,
-                         const matrix::BatchDense<ValueType>* const b,
-                         matrix::BatchDense<ValueType>* const mtx)
-{
-    const auto a_ub = host::get_batch_struct(a);
-    const auto b_ub = host::get_batch_struct(b);
-    const auto mtx_ub = host::get_batch_struct(mtx);
-#pragma omp parallel for
-    for (size_type batch = 0; batch < mtx->get_num_batch_entries(); ++batch) {
-        auto a_b = gko::batch::batch_entry(a_ub, batch);
-        auto b_b = gko::batch::batch_entry(b_ub, batch);
-        auto mtx_b = gko::batch::batch_entry(mtx_ub, batch);
-        add_scaled_identity(a_b.values[0], b_b.values[0], mtx_b);
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_ADD_SCALED_IDENTITY_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_COPY_KERNEL);
 
 
-}  // namespace batch_dense
+}  // namespace batch_vector
 }  // namespace omp
 }  // namespace kernels
 }  // namespace gko
diff --git a/reference/matrix/batch_struct.hpp b/reference/matrix/batch_struct.hpp
new file mode 100644
index 00000000000..0c07956d9d6
--- /dev/null
+++ b/reference/matrix/batch_struct.hpp
@@ -0,0 +1,120 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_REFERENCE_MATRIX_BATCH_STRUCT_HPP_
+#define GKO_REFERENCE_MATRIX_BATCH_STRUCT_HPP_
+
+
+#include "core/matrix/batch_struct.hpp"
+
+
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/matrix/batch_vector.hpp>
+
+
+namespace gko {
+namespace kernels {
+/**
+ * @brief A namespace for shared functionality between omp and reference
+ *  executors.
+ */
+namespace host {
+
+
+/** @file batch_struct.hpp
+ *
+ * Helper functions to generate a batch struct from a batch LinOp.
+ *
+ * A specialization is needed for every format of every kind of linear algebra
+ * object. These are intended to be called on the host.
+ */
+
+
+/**
+ * Generates an immutable uniform batch struct from a batch of dense matrices.
+ */
+template <typename ValueType>
+inline gko::batch_vector::UniformBatch<const ValueType> get_batch_struct(
+    const matrix::BatchVector<ValueType>* const op)
+{
+    return {
+        op->get_const_values(),
+        op->get_num_batch_entries(),
+        op->get_stride().at(0),
+        static_cast<int>(op->get_size().at(0)[0]),
+        static_cast<int>(op->get_size().at(0)[1]),
+        static_cast<int>(op->get_size().at(0)[0] * op->get_size().at(0)[1])};
+}
+
+
+/**
+ * Generates a uniform batch struct from a batch of dense matrices.
+ */
+template <typename ValueType>
+inline gko::batch_vector::UniformBatch<ValueType> get_batch_struct(
+    matrix::BatchVector<ValueType>* const op)
+{
+    return {
+        op->get_values(),
+        op->get_num_batch_entries(),
+        op->get_stride().at(0),
+        static_cast<int>(op->get_size().at(0)[0]),
+        static_cast<int>(op->get_size().at(0)[1]),
+        static_cast<int>(op->get_size().at(0)[0] * op->get_size().at(0)[1])};
+}
+
+
+/**
+ * Generates an immutable uniform batch struct from a batch of dense matrices
+ * that may be null.
+ */
+template <typename ValueType>
+inline gko::batch_vector::UniformBatch<const ValueType> maybe_null_batch_struct(
+    const matrix::BatchVector<ValueType>* const op)
+{
+    if (op) {
+        return {op->get_const_values(), op->get_num_batch_entries(),
+                op->get_stride().at(0),
+                static_cast<int>(op->get_size().at(0)[0]),
+                static_cast<int>(op->get_size().at(0)[1])};
+    } else {
+        return {nullptr, 0, 0, 0, 0};
+    }
+}
+
+
+}  // namespace host
+}  // namespace kernels
+}  // namespace gko
+
+
+#endif  // GKO_REFERENCE_MATRIX_BATCH_STRUCT_HPP_
diff --git a/reference/matrix/batch_vector_kernels.cpp b/reference/matrix/batch_vector_kernels.cpp
index 8e9e857cc5b..01748c6e524 100644
--- a/reference/matrix/batch_vector_kernels.cpp
+++ b/reference/matrix/batch_vector_kernels.cpp
@@ -30,7 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include "core/matrix/batch_dense_kernels.hpp"
+#include "core/matrix/batch_vector_kernels.hpp"
 
 
 #include <algorithm>
@@ -39,8 +39,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/array.hpp>
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/base/range_accessors.hpp>
-#include <ginkgo/core/matrix/batch_csr.hpp>
-#include <ginkgo/core/matrix/batch_diagonal.hpp>
 
 
 #include "core/matrix/batch_struct.hpp"
@@ -51,68 +49,20 @@ namespace gko {
 namespace kernels {
 namespace reference {
 /**
- * @brief The BatchDense matrix format namespace.
- * @ref BatchDense
- * @ingroup batch_dense
+ * @brief The BatchVector matrix format namespace.
+ * @ref BatchVector
+ * @ingroup batch_vector
  */
-namespace batch_dense {
+namespace batch_vector {
 
 
-#include "reference/matrix/batch_dense_kernels.hpp.inc"
+#include "reference/matrix/batch_vector_kernels.hpp.inc"
 
 
 template <typename ValueType>
-void simple_apply(std::shared_ptr<const ReferenceExecutor> exec,
-                  const matrix::BatchDense<ValueType>* const a,
-                  const matrix::BatchDense<ValueType>* const b,
-                  matrix::BatchDense<ValueType>* const c)
-{
-    const auto a_ub = host::get_batch_struct(a);
-    const auto b_ub = host::get_batch_struct(b);
-    const auto c_ub = host::get_batch_struct(c);
-    for (size_type batch = 0; batch < c->get_num_batch_entries(); ++batch) {
-        const auto a_b = gko::batch::batch_entry(a_ub, batch);
-        const auto b_b = gko::batch::batch_entry(b_ub, batch);
-        const auto c_b = gko::batch::batch_entry(c_ub, batch);
-        matvec_kernel(a_b, b_b, c_b);
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL);
-
-
-template <typename ValueType>
-void apply(std::shared_ptr<const ReferenceExecutor> exec,
-           const matrix::BatchDense<ValueType>* const alpha,
-           const matrix::BatchDense<ValueType>* const a,
-           const matrix::BatchDense<ValueType>* const b,
-           const matrix::BatchDense<ValueType>* const beta,
-           matrix::BatchDense<ValueType>* const c)
-{
-    const auto a_ub = host::get_batch_struct(a);
-    const auto b_ub = host::get_batch_struct(b);
-    const auto c_ub = host::get_batch_struct(c);
-    const auto alpha_ub = host::get_batch_struct(alpha);
-    const auto beta_ub = host::get_batch_struct(beta);
-    for (size_type batch = 0; batch < c->get_num_batch_entries(); ++batch) {
-        const auto a_b = gko::batch::batch_entry(a_ub, batch);
-        const auto b_b = gko::batch::batch_entry(b_ub, batch);
-        const auto c_b = gko::batch::batch_entry(c_ub, batch);
-        const auto alpha_b = gko::batch::batch_entry(alpha_ub, batch);
-        const auto beta_b = gko::batch::batch_entry(beta_ub, batch);
-        advanced_matvec_kernel(alpha_b.values[0], a_b, b_b, beta_b.values[0],
-                               c_b);
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_APPLY_KERNEL);
-
-
-template <typename ValueType>
-void scale(std::shared_ptr<const ReferenceExecutor> exec,
-           const matrix::BatchDense<ValueType>* alpha,
-           matrix::BatchDense<ValueType>* x)
+void scale(std::shared_ptr<const DefaultExecutor> exec,
+           const matrix::BatchVector<ValueType>* alpha,
+           matrix::BatchVector<ValueType>* x)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto alpha_ub = host::get_batch_struct(alpha);
@@ -123,14 +73,14 @@ void scale(std::shared_ptr<const ReferenceExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_SCALE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_SCALE_KERNEL);
 
 
 template <typename ValueType>
-void add_scaled(std::shared_ptr<const ReferenceExecutor> exec,
-                const matrix::BatchDense<ValueType>* alpha,
-                const matrix::BatchDense<ValueType>* x,
-                matrix::BatchDense<ValueType>* y)
+void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
+                const matrix::BatchVector<ValueType>* alpha,
+                const matrix::BatchVector<ValueType>* x,
+                matrix::BatchVector<ValueType>* y)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto y_ub = host::get_batch_struct(y);
@@ -143,77 +93,14 @@ void add_scaled(std::shared_ptr<const ReferenceExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_ADD_SCALED_KERNEL);
-
-
-template <typename ValueType>
-void add_scale(std::shared_ptr<const ReferenceExecutor> exec,
-               const matrix::BatchDense<ValueType>* const alpha,
-               const matrix::BatchDense<ValueType>* const x,
-               const matrix::BatchDense<ValueType>* const beta,
-               matrix::BatchDense<ValueType>* const y)
-{
-    const auto x_ub = host::get_batch_struct(x);
-    const auto y_ub = host::get_batch_struct(y);
-    const auto alpha_ub = host::get_batch_struct(alpha);
-    const auto beta_ub = host::get_batch_struct(beta);
-    for (size_type batch = 0; batch < y->get_num_batch_entries(); ++batch) {
-        const auto alpha_b = gko::batch::batch_entry(alpha_ub, batch);
-        const auto beta_b = gko::batch::batch_entry(beta_ub, batch);
-        const auto x_b = gko::batch::batch_entry(x_ub, batch);
-        const auto y_b = gko::batch::batch_entry(y_ub, batch);
-        add_scale(alpha_b, x_b, beta_b, y_b);
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_ADD_SCALE_KERNEL);
-
-
-template <typename ValueType>
-void convergence_add_scaled(std::shared_ptr<const ReferenceExecutor> exec,
-                            const matrix::BatchDense<ValueType>* alpha,
-                            const matrix::BatchDense<ValueType>* x,
-                            matrix::BatchDense<ValueType>* y,
-                            const uint32& converged)
-{
-    const auto x_ub = host::get_batch_struct(x);
-    const auto y_ub = host::get_batch_struct(y);
-    const auto alpha_ub = host::get_batch_struct(alpha);
-    for (size_type batch = 0; batch < y->get_num_batch_entries(); ++batch) {
-        const auto alpha_b = gko::batch::batch_entry(alpha_ub, batch);
-        const auto x_b = gko::batch::batch_entry(x_ub, batch);
-        const auto y_b = gko::batch::batch_entry(y_ub, batch);
-        add_scaled(alpha_b, x_b, y_b, converged);
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_ADD_SCALED_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_ADD_SCALED_KERNEL);
 
 
 template <typename ValueType>
-void add_scaled_diag(std::shared_ptr<const ReferenceExecutor> exec,
-                     const matrix::BatchDense<ValueType>* alpha,
-                     const matrix::Diagonal<ValueType>* x,
-                     matrix::BatchDense<ValueType>* y) GKO_NOT_IMPLEMENTED;
-// {
-// for (size_type batch = 0; batch < y->get_num_batch_entries(); ++batch) {
-//     const auto diag_values = x->get_const_values();
-//     for (size_type i = 0; i < x->get_size().at(batch)[0]; i++) {
-//         y->at(batch,i, i) += alpha->at(batch,0, 0) * diag_values[i];
-//     }
-// }
-// }
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_ADD_SCALED_DIAG_KERNEL);
-
-
-template <typename ValueType>
-void compute_dot(std::shared_ptr<const ReferenceExecutor> exec,
-                 const matrix::BatchDense<ValueType>* x,
-                 const matrix::BatchDense<ValueType>* y,
-                 matrix::BatchDense<ValueType>* result)
+void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
+                 const matrix::BatchVector<ValueType>* x,
+                 const matrix::BatchVector<ValueType>* y,
+                 matrix::BatchVector<ValueType>* result)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto y_ub = host::get_batch_struct(y);
@@ -227,36 +114,14 @@ void compute_dot(std::shared_ptr<const ReferenceExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_COMPUTE_DOT_KERNEL);
-
-
-template <typename ValueType>
-void convergence_compute_dot(std::shared_ptr<const ReferenceExecutor> exec,
-                             const matrix::BatchDense<ValueType>* x,
-                             const matrix::BatchDense<ValueType>* y,
-                             matrix::BatchDense<ValueType>* result,
-                             const uint32& converged)
-{
-    const auto x_ub = host::get_batch_struct(x);
-    const auto y_ub = host::get_batch_struct(y);
-    const auto res_ub = host::get_batch_struct(result);
-    for (size_type batch = 0; batch < result->get_num_batch_entries();
-         ++batch) {
-        const auto res_b = gko::batch::batch_entry(res_ub, batch);
-        const auto x_b = gko::batch::batch_entry(x_ub, batch);
-        const auto y_b = gko::batch::batch_entry(y_ub, batch);
-        compute_dot_product(x_b, y_b, res_b, converged);
-    }
-}
-
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COMPUTE_DOT_KERNEL);
+    GKO_DECLARE_BATCH_VECTOR_COMPUTE_DOT_KERNEL);
 
 
 template <typename ValueType>
-void compute_norm2(std::shared_ptr<const ReferenceExecutor> exec,
-                   const matrix::BatchDense<ValueType>* x,
-                   matrix::BatchDense<remove_complex<ValueType>>* result)
+void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
+                   const matrix::BatchVector<ValueType>* x,
+                   matrix::BatchVector<remove_complex<ValueType>>* result)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto res_ub = host::get_batch_struct(result);
@@ -269,232 +134,13 @@ void compute_norm2(std::shared_ptr<const ReferenceExecutor> exec,
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_COMPUTE_NORM2_KERNEL);
-
-
-template <typename ValueType>
-void convergence_compute_norm2(
-    std::shared_ptr<const ReferenceExecutor> exec,
-    const matrix::BatchDense<ValueType>* x,
-    matrix::BatchDense<remove_complex<ValueType>>* result,
-    const uint32& converged)
-{
-    const auto x_ub = host::get_batch_struct(x);
-    const auto res_ub = host::get_batch_struct(result);
-    for (size_type batch = 0; batch < result->get_num_batch_entries();
-         ++batch) {
-        const auto res_b = gko::batch::batch_entry(res_ub, batch);
-        const auto x_b = gko::batch::batch_entry(x_ub, batch);
-        compute_norm2(x_b, res_b, converged);
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COMPUTE_NORM2_KERNEL);
-
-
-template <typename ValueType, typename IndexType>
-void convert_to_batch_csr(std::shared_ptr<const DefaultExecutor> exec,
-                          const matrix::BatchDense<ValueType>* source,
-                          matrix::BatchCsr<ValueType, IndexType>* result)
-{
-    GKO_ASSERT(source->get_size().stores_equal_sizes() == true);
-    auto num_rows = result->get_size().at(0)[0];
-    auto num_cols = result->get_size().at(0)[1];
-    auto num_batch_entries = result->get_num_batch_entries();
-
-    auto row_ptrs = result->get_row_ptrs();
-    auto col_idxs = result->get_col_idxs();
-    auto values = result->get_values();
-
-    size_type cur_ptr = 0;
-    row_ptrs[0] = cur_ptr;
-    for (size_type row = 0; row < num_rows; ++row) {
-        for (size_type col = 0; col < num_cols; ++col) {
-            auto val = source->at(0, row, col);
-            if (val != zero<ValueType>()) {
-                col_idxs[cur_ptr] = col;
-                ++cur_ptr;
-            }
-        }
-        row_ptrs[row + 1] = cur_ptr;
-    }
-
-    cur_ptr = 0;
-    for (size_type batch = 0; batch < num_batch_entries; ++batch) {
-        for (size_type row = 0; row < num_rows; ++row) {
-            for (size_type col = 0; col < num_cols; ++col) {
-                auto val = source->at(batch, row, col);
-                if (val != zero<ValueType>()) {
-                    values[cur_ptr] = val;
-                    ++cur_ptr;
-                }
-            }
-        }
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE_AND_INT32_INDEX(
-    GKO_DECLARE_BATCH_DENSE_CONVERT_TO_BATCH_CSR_KERNEL);
-
-
-template <typename ValueType>
-void count_nonzeros(std::shared_ptr<const ReferenceExecutor> exec,
-                    const matrix::BatchDense<ValueType>* source,
-                    size_type* result)
-{
-    for (size_type batch = 0; batch < source->get_num_batch_entries();
-         ++batch) {
-        auto num_rows = source->get_size().at(batch)[0];
-        auto num_cols = source->get_size().at(batch)[1];
-        auto num_nonzeros = 0;
-
-        for (size_type row = 0; row < num_rows; ++row) {
-            for (size_type col = 0; col < num_cols; ++col) {
-                num_nonzeros +=
-                    (source->at(batch, row, col) != zero<ValueType>());
-            }
-        }
-        result[batch] = num_nonzeros;
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_COUNT_NONZEROS_KERNEL);
-
-
-template <typename ValueType>
-void calculate_max_nnz_per_row(std::shared_ptr<const ReferenceExecutor> exec,
-                               const matrix::BatchDense<ValueType>* source,
-                               size_type* result)
-{
-    for (size_type batch = 0; batch < source->get_num_batch_entries();
-         ++batch) {
-        auto num_rows = source->get_size().at(batch)[0];
-        auto num_cols = source->get_size().at(batch)[1];
-        size_type num_stored_elements_per_row = 0;
-        size_type num_nonzeros = 0;
-        for (size_type row = 0; row < num_rows; ++row) {
-            num_nonzeros = 0;
-            for (size_type col = 0; col < num_cols; ++col) {
-                num_nonzeros +=
-                    (source->at(batch, row, col) != zero<ValueType>());
-            }
-            num_stored_elements_per_row =
-                std::max(num_nonzeros, num_stored_elements_per_row);
-        }
-        result[batch] = num_stored_elements_per_row;
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CALCULATE_MAX_NNZ_PER_ROW_KERNEL);
-
-
-template <typename ValueType>
-void calculate_nonzeros_per_row(std::shared_ptr<const ReferenceExecutor> exec,
-                                const matrix::BatchDense<ValueType>* source,
-                                array<size_type>* result)
-{
-    for (size_type batch = 0; batch < source->get_num_batch_entries();
-         ++batch) {
-        auto num_rows = source->get_size().at(batch)[0];
-        auto num_cols = source->get_size().at(batch)[1];
-        auto row_nnz_val = result->get_data();
-        size_type offset = 0;
-        for (size_type row = 0; row < num_rows; ++row) {
-            size_type num_nonzeros = 0;
-            for (size_type col = 0; col < num_cols; ++col) {
-                num_nonzeros +=
-                    (source->at(batch, row, col) != zero<ValueType>());
-            }
-            row_nnz_val[offset + row] = num_nonzeros;
-            ++offset;
-        }
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CALCULATE_NONZEROS_PER_ROW_KERNEL);
-
-
-template <typename ValueType>
-void calculate_total_cols(std::shared_ptr<const ReferenceExecutor> exec,
-                          const matrix::BatchDense<ValueType>* const source,
-                          size_type* const result,
-                          const size_type* const stride_factor,
-                          const size_type* const slice_size)
-{
-    for (size_type batch = 0; batch < source->get_num_batch_entries();
-         ++batch) {
-        auto num_rows = source->get_size().at(batch)[0];
-        auto num_cols = source->get_size().at(batch)[1];
-        auto slice_num = ceildiv(num_rows, slice_size[batch]);
-        auto total_cols = 0;
-        auto temp = 0, slice_temp = 0;
-        for (size_type slice = 0; slice < slice_num; slice++) {
-            slice_temp = 0;
-            for (size_type row = 0; row < slice_size[batch] &&
-                                    row + slice * slice_size[batch] < num_rows;
-                 row++) {
-                temp = 0;
-                for (size_type col = 0; col < num_cols; col++) {
-                    temp += (source->at(batch, row + slice * slice_size[batch],
-                                        col) != zero<ValueType>());
-                }
-                slice_temp = (slice_temp < temp) ? temp : slice_temp;
-            }
-            slice_temp = ceildiv(slice_temp, stride_factor[batch]) *
-                         stride_factor[batch];
-            total_cols += slice_temp;
-        }
-        result[batch] = total_cols;
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CALCULATE_TOTAL_COLS_KERNEL);
-
-
-template <typename ValueType>
-void transpose(std::shared_ptr<const ReferenceExecutor> exec,
-               const matrix::BatchDense<ValueType>* const orig,
-               matrix::BatchDense<ValueType>* const trans)
-{
-    for (size_type batch = 0; batch < orig->get_num_batch_entries(); ++batch) {
-        for (size_type i = 0; i < orig->get_size().at(batch)[0]; ++i) {
-            for (size_type j = 0; j < orig->get_size().at(batch)[1]; ++j) {
-                trans->at(batch, j, i) = orig->at(batch, i, j);
-            }
-        }
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_TRANSPOSE_KERNEL);
-
-
-template <typename ValueType>
-void conj_transpose(std::shared_ptr<const ReferenceExecutor> exec,
-                    const matrix::BatchDense<ValueType>* orig,
-                    matrix::BatchDense<ValueType>* trans)
-{
-    for (size_type batch = 0; batch < orig->get_num_batch_entries(); ++batch) {
-        for (size_type i = 0; i < orig->get_size().at(batch)[0]; ++i) {
-            for (size_type j = 0; j < orig->get_size().at(batch)[1]; ++j) {
-                trans->at(batch, j, i) = conj(orig->at(batch, i, j));
-            }
-        }
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CONJ_TRANSPOSE_KERNEL);
+    GKO_DECLARE_BATCH_VECTOR_COMPUTE_NORM2_KERNEL);
 
 
 template <typename ValueType>
 void copy(std::shared_ptr<const DefaultExecutor> exec,
-          const matrix::BatchDense<ValueType>* x,
-          matrix::BatchDense<ValueType>* result)
+          const matrix::BatchVector<ValueType>* x,
+          matrix::BatchVector<ValueType>* result)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto result_ub = host::get_batch_struct(result);
@@ -505,76 +151,10 @@ void copy(std::shared_ptr<const DefaultExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_COPY_KERNEL);
-
-
-template <typename ValueType>
-void convergence_copy(std::shared_ptr<const DefaultExecutor> exec,
-                      const matrix::BatchDense<ValueType>* x,
-                      matrix::BatchDense<ValueType>* result,
-                      const uint32& converged)
-{
-    const auto x_ub = host::get_batch_struct(x);
-    const auto result_ub = host::get_batch_struct(result);
-    for (size_type batch = 0; batch < x->get_num_batch_entries(); ++batch) {
-        const auto result_b = gko::batch::batch_entry(result_ub, batch);
-        const auto x_b = gko::batch::batch_entry(x_ub, batch);
-        copy(x_b, result_b, converged);
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_CONVERGENCE_COPY_KERNEL);
-
-
-template <typename ValueType>
-void batch_scale(std::shared_ptr<const ReferenceExecutor> exec,
-                 const matrix::BatchDiagonal<ValueType>* const left,
-                 const matrix::BatchDiagonal<ValueType>* const rght,
-                 matrix::BatchDense<ValueType>* const vecs)
-{
-    const auto left_vals = left->get_const_values();
-    const auto rght_vals = rght->get_const_values();
-    const auto v_vals = vecs->get_values();
-    const auto nrows = static_cast<int>(vecs->get_size().at(0)[0]);
-    const auto ncols = static_cast<int>(vecs->get_size().at(0)[1]);
-    const auto vstride = vecs->get_stride().at(0);
-    for (size_type batch = 0; batch < vecs->get_num_batch_entries(); ++batch) {
-        const auto left_b =
-            gko::batch::batch_entry_ptr(left_vals, 1, nrows, batch);
-        const auto rght_b =
-            gko::batch::batch_entry_ptr(rght_vals, 1, ncols, batch);
-        const auto v_b =
-            gko::batch::batch_entry_ptr(v_vals, vstride, nrows, batch);
-        batch_scale(nrows, ncols, vstride, left_b, rght_b, v_b);
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_BATCH_SCALE_KERNEL);
-
-
-template <typename ValueType>
-void add_scaled_identity(std::shared_ptr<const ReferenceExecutor> exec,
-                         const matrix::BatchDense<ValueType>* const a,
-                         const matrix::BatchDense<ValueType>* const b,
-                         matrix::BatchDense<ValueType>* const mtx)
-{
-    const auto a_ub = host::get_batch_struct(a);
-    const auto b_ub = host::get_batch_struct(b);
-    const auto mtx_ub = host::get_batch_struct(mtx);
-    for (size_type batch = 0; batch < mtx->get_num_batch_entries(); ++batch) {
-        auto a_b = gko::batch::batch_entry(a_ub, batch);
-        auto b_b = gko::batch::batch_entry(b_ub, batch);
-        auto mtx_b = gko::batch::batch_entry(mtx_ub, batch);
-        add_scaled_identity(a_b.values[0], b_b.values[0], mtx_b);
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_ADD_SCALED_IDENTITY_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_COPY_KERNEL);
 
 
-}  // namespace batch_dense
+}  // namespace batch_vector
 }  // namespace reference
 }  // namespace kernels
 }  // namespace gko
diff --git a/reference/matrix/batch_vector_kernels.hpp.inc b/reference/matrix/batch_vector_kernels.hpp.inc
index db828206239..eb4a8cfab2a 100644
--- a/reference/matrix/batch_vector_kernels.hpp.inc
+++ b/reference/matrix/batch_vector_kernels.hpp.inc
@@ -32,9 +32,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 template <typename ValueType>
 inline void matvec_kernel(
-    const gko::batch_dense::BatchEntry<const ValueType>& a,
-    const gko::batch_dense::BatchEntry<const ValueType>& b,
-    const gko::batch_dense::BatchEntry<ValueType>& c)
+    const gko::batch_vector::BatchEntry<const ValueType>& a,
+    const gko::batch_vector::BatchEntry<const ValueType>& b,
+    const gko::batch_vector::BatchEntry<ValueType>& c)
 {
     for (int row = 0; row < c.num_rows; ++row) {
         for (int col = 0; col < c.num_rhs; ++col) {
@@ -57,9 +57,9 @@ inline void matvec_kernel(
 template <typename ValueType>
 inline void advanced_matvec_kernel(
     const ValueType alpha,
-    const gko::batch_dense::BatchEntry<const ValueType>& a,
-    const gko::batch_dense::BatchEntry<const ValueType>& b,
-    const ValueType beta, const gko::batch_dense::BatchEntry<ValueType>& c)
+    const gko::batch_vector::BatchEntry<const ValueType>& a,
+    const gko::batch_vector::BatchEntry<const ValueType>& b,
+    const ValueType beta, const gko::batch_vector::BatchEntry<ValueType>& c)
 {
     if (beta != gko::zero<ValueType>()) {
         for (int row = 0; row < c.num_rows; ++row) {
@@ -88,8 +88,8 @@ inline void advanced_matvec_kernel(
 
 
 template <typename ValueType>
-inline void scale(const gko::batch_dense::BatchEntry<const ValueType>& alpha,
-                  const gko::batch_dense::BatchEntry<ValueType>& x)
+inline void scale(const gko::batch_vector::BatchEntry<const ValueType>& alpha,
+                  const gko::batch_vector::BatchEntry<ValueType>& x)
 {
     if (alpha.num_rhs == 1) {
         for (int i = 0; i < x.num_rows; ++i) {
@@ -109,9 +109,9 @@ inline void scale(const gko::batch_dense::BatchEntry<const ValueType>& alpha,
 
 template <typename ValueType>
 inline void add_scaled(
-    const gko::batch_dense::BatchEntry<const ValueType>& alpha,
-    const gko::batch_dense::BatchEntry<const ValueType>& x,
-    const gko::batch_dense::BatchEntry<ValueType>& y)
+    const gko::batch_vector::BatchEntry<const ValueType>& alpha,
+    const gko::batch_vector::BatchEntry<const ValueType>& x,
+    const gko::batch_vector::BatchEntry<ValueType>& y)
 {
     if (alpha.num_rhs == 1) {
         for (int i = 0; i < x.num_rows; ++i) {
@@ -133,10 +133,10 @@ inline void add_scaled(
 
 template <typename ValueType>
 inline void add_scale(
-    const gko::batch_dense::BatchEntry<const ValueType>& alpha,
-    const gko::batch_dense::BatchEntry<const ValueType>& x,
-    const gko::batch_dense::BatchEntry<const ValueType>& beta,
-    const gko::batch_dense::BatchEntry<ValueType>& y)
+    const gko::batch_vector::BatchEntry<const ValueType>& alpha,
+    const gko::batch_vector::BatchEntry<const ValueType>& x,
+    const gko::batch_vector::BatchEntry<const ValueType>& beta,
+    const gko::batch_vector::BatchEntry<ValueType>& y)
 {
     if (alpha.num_rhs == 1) {
         for (int i = 0; i < x.num_rows; ++i) {
@@ -160,8 +160,8 @@ inline void add_scale(
 
 template <typename ValueType>
 inline void compute_norm2(
-    const gko::batch_dense::BatchEntry<const ValueType>& x,
-    const gko::batch_dense::BatchEntry<gko::remove_complex<ValueType>>& result)
+    const gko::batch_vector::BatchEntry<const ValueType>& x,
+    const gko::batch_vector::BatchEntry<gko::remove_complex<ValueType>>& result)
 {
     for (int j = 0; j < x.num_rhs; ++j) {
         result.values[j] = gko::zero<gko::remove_complex<ValueType>>();
@@ -185,8 +185,8 @@ inline void compute_norm2(
  */
 template <typename ValueType>
 inline void batch_scale(
-    const gko::batch_dense::BatchEntry<const ValueType>& diag_vec,
-    const gko::batch_dense::BatchEntry<ValueType>& a)
+    const gko::batch_vector::BatchEntry<const ValueType>& diag_vec,
+    const gko::batch_vector::BatchEntry<ValueType>& a)
 {
     for (int i_row = 0; i_row < a.num_rows; i_row++) {
         const ValueType scale = diag_vec.values[i_row];
@@ -217,8 +217,8 @@ inline void batch_scale(const int nrows, const int ncols,
  * and stride set.
  */
 template <typename ValueType>
-inline void copy(const gko::batch_dense::BatchEntry<const ValueType>& in,
-                 const gko::batch_dense::BatchEntry<ValueType>& out)
+inline void copy(const gko::batch_vector::BatchEntry<const ValueType>& in,
+                 const gko::batch_vector::BatchEntry<ValueType>& out)
 {
     for (int iz = 0; iz < in.num_rows * in.num_rhs; iz++) {
         const int i = iz / in.num_rhs;
@@ -230,9 +230,9 @@ inline void copy(const gko::batch_dense::BatchEntry<const ValueType>& in,
 
 template <typename ValueType>
 inline void compute_dot_product(
-    const gko::batch_dense::BatchEntry<const ValueType>& x,
-    const gko::batch_dense::BatchEntry<const ValueType>& y,
-    const gko::batch_dense::BatchEntry<ValueType>& result)
+    const gko::batch_vector::BatchEntry<const ValueType>& x,
+    const gko::batch_vector::BatchEntry<const ValueType>& y,
+    const gko::batch_vector::BatchEntry<ValueType>& result)
 {
     for (int c = 0; c < result.num_rhs; c++) {
         result.values[c] = gko::zero<ValueType>();
@@ -249,8 +249,8 @@ inline void compute_dot_product(
 
 template <typename ValueType>
 inline void copy(
-    const gko::batch_dense::BatchEntry<const ValueType>& source_entry,
-    const gko::batch_dense::BatchEntry<ValueType>& destination_entry,
+    const gko::batch_vector::BatchEntry<const ValueType>& source_entry,
+    const gko::batch_vector::BatchEntry<ValueType>& destination_entry,
     const gko::uint32& converged)
 {
     for (int r = 0; r < source_entry.num_rows; r++) {
@@ -270,9 +270,9 @@ inline void copy(
 
 template <typename ValueType>
 inline void add_scaled(
-    const gko::batch_dense::BatchEntry<const ValueType>& alpha,
-    const gko::batch_dense::BatchEntry<const ValueType>& x,
-    const gko::batch_dense::BatchEntry<ValueType>& y,
+    const gko::batch_vector::BatchEntry<const ValueType>& alpha,
+    const gko::batch_vector::BatchEntry<const ValueType>& x,
+    const gko::batch_vector::BatchEntry<ValueType>& y,
     const gko::uint32& converged)
 {
     if (alpha.num_rhs == 1) {
@@ -308,8 +308,8 @@ inline void add_scaled(
 
 template <typename ValueType>
 inline void compute_norm2(
-    const gko::batch_dense::BatchEntry<const ValueType>& x,
-    const gko::batch_dense::BatchEntry<gko::remove_complex<ValueType>>& result,
+    const gko::batch_vector::BatchEntry<const ValueType>& x,
+    const gko::batch_vector::BatchEntry<gko::remove_complex<ValueType>>& result,
     const gko::uint32& converged)
 {
     for (int j = 0; j < x.num_rhs; ++j) {
@@ -346,9 +346,9 @@ inline void compute_norm2(
 
 template <typename ValueType>
 inline void compute_dot_product(
-    const gko::batch_dense::BatchEntry<const ValueType>& x,
-    const gko::batch_dense::BatchEntry<const ValueType>& y,
-    const gko::batch_dense::BatchEntry<ValueType>& result,
+    const gko::batch_vector::BatchEntry<const ValueType>& x,
+    const gko::batch_vector::BatchEntry<const ValueType>& y,
+    const gko::batch_vector::BatchEntry<ValueType>& result,
     const gko::uint32& converged)
 {
     for (int c = 0; c < result.num_rhs; c++) {
@@ -379,7 +379,7 @@ inline void compute_dot_product(
 template <typename ValueType>
 inline void add_scaled_identity(
     const ValueType& a, const ValueType& b,
-    const gko::batch_dense::BatchEntry<ValueType>& mat)
+    const gko::batch_vector::BatchEntry<ValueType>& mat)
 {
     for (int i = 0; i < mat.num_rows; i++) {
         for (int j = 0; j < mat.num_rhs; j++) {
diff --git a/reference/test/matrix/batch_vector_kernels.cpp b/reference/test/matrix/batch_vector_kernels.cpp
index 6e1a6c2f8e1..e8aaad8d584 100644
--- a/reference/test/matrix/batch_vector_kernels.cpp
+++ b/reference/test/matrix/batch_vector_kernels.cpp
@@ -30,7 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include <ginkgo/core/matrix/batch_dense.hpp>
+#include <ginkgo/core/matrix/batch_vector.hpp>
 
 
 #include <complex>
@@ -50,7 +50,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/matrix/dense.hpp>
 
 
-#include "core/matrix/batch_dense_kernels.hpp"
+#include "core/matrix/batch_vector_kernels.hpp"
 #include "core/test/utils.hpp"
 
 
@@ -58,15 +58,15 @@ namespace {
 
 
 template <typename T>
-class BatchDense : public ::testing::Test {
+class BatchVector : public ::testing::Test {
 protected:
     using value_type = T;
     using size_type = gko::size_type;
-    using Mtx = gko::matrix::BatchDense<value_type>;
+    using Mtx = gko::matrix::BatchVector<value_type>;
     using DenseMtx = gko::matrix::Dense<value_type>;
     using ComplexMtx = gko::to_complex<Mtx>;
     using RealMtx = gko::remove_complex<Mtx>;
-    BatchDense()
+    BatchVector()
         : exec(gko::ReferenceExecutor::create()),
           mtx_0(gko::batch_initialize<Mtx>(
               {{I<T>({1.0, -1.0, 1.5}), I<T>({-2.0, 2.0, 3.0})},
@@ -138,10 +138,10 @@ class BatchDense : public ::testing::Test {
 };
 
 
-TYPED_TEST_SUITE(BatchDense, gko::test::ValueTypes);
+TYPED_TEST_SUITE(BatchVector, gko::test::ValueTypes);
 
 
-TYPED_TEST(BatchDense, AppliesToBatchDense)
+TYPED_TEST(BatchVector, AppliesToBatchVector)
 {
     using T = typename TestFixture::value_type;
     this->mtx_1->apply(this->mtx_2.get(), this->mtx_3.get());
@@ -155,7 +155,7 @@ TYPED_TEST(BatchDense, AppliesToBatchDense)
 }
 
 
-TYPED_TEST(BatchDense, AppliesLinearCombinationToBatchDense)
+TYPED_TEST(BatchVector, AppliesLinearCombinationToBatchVector)
 {
     using Mtx = typename TestFixture::Mtx;
     using DenseMtx = typename TestFixture::DenseMtx;
@@ -180,7 +180,7 @@ TYPED_TEST(BatchDense, AppliesLinearCombinationToBatchDense)
 }
 
 
-TYPED_TEST(BatchDense, ApplyFailsOnWrongInnerDimension)
+TYPED_TEST(BatchVector, ApplyFailsOnWrongInnerDimension)
 {
     using Mtx = typename TestFixture::Mtx;
     auto res = Mtx::create(
@@ -191,7 +191,7 @@ TYPED_TEST(BatchDense, ApplyFailsOnWrongInnerDimension)
 }
 
 
-TYPED_TEST(BatchDense, ApplyFailsForNonUniformBatches)
+TYPED_TEST(BatchVector, ApplyFailsForNonUniformBatches)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -212,7 +212,7 @@ TYPED_TEST(BatchDense, ApplyFailsForNonUniformBatches)
 }
 
 
-TYPED_TEST(BatchDense, ApplyFailsOnWrongNumberOfRows)
+TYPED_TEST(BatchVector, ApplyFailsOnWrongNumberOfRows)
 {
     using Mtx = typename TestFixture::Mtx;
     auto res = Mtx::create(
@@ -223,7 +223,7 @@ TYPED_TEST(BatchDense, ApplyFailsOnWrongNumberOfRows)
 }
 
 
-TYPED_TEST(BatchDense, ApplyFailsOnWrongNumberOfCols)
+TYPED_TEST(BatchVector, ApplyFailsOnWrongNumberOfCols)
 {
     using Mtx = typename TestFixture::Mtx;
     auto res = Mtx::create(
@@ -237,7 +237,7 @@ TYPED_TEST(BatchDense, ApplyFailsOnWrongNumberOfCols)
 }
 
 
-TYPED_TEST(BatchDense, ScalesData)
+TYPED_TEST(BatchVector, ScalesData)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -257,7 +257,7 @@ TYPED_TEST(BatchDense, ScalesData)
 }
 
 
-TYPED_TEST(BatchDense, ScalesDataWithScalar)
+TYPED_TEST(BatchVector, ScalesDataWithScalar)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -275,7 +275,7 @@ TYPED_TEST(BatchDense, ScalesDataWithScalar)
 }
 
 
-TYPED_TEST(BatchDense, ScalesDataWithStride)
+TYPED_TEST(BatchVector, ScalesDataWithStride)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -294,7 +294,7 @@ TYPED_TEST(BatchDense, ScalesDataWithStride)
 }
 
 
-TYPED_TEST(BatchDense, AddsScaled)
+TYPED_TEST(BatchVector, AddsScaled)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -313,7 +313,7 @@ TYPED_TEST(BatchDense, AddsScaled)
 }
 
 
-TYPED_TEST(BatchDense, AddsScale)
+TYPED_TEST(BatchVector, AddsScale)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -337,7 +337,7 @@ TYPED_TEST(BatchDense, AddsScale)
 }
 
 
-TYPED_TEST(BatchDense, ConvergenceAddScaled)
+TYPED_TEST(BatchVector, ConvergenceAddScaled)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -350,7 +350,7 @@ TYPED_TEST(BatchDense, ConvergenceAddScaled)
     const int num_rhs = 3;
     const gko::uint32 converged = 0xfffffffd | (0 - (1 << num_rhs));
 
-    gko::kernels::reference::batch_dense::convergence_add_scaled(
+    gko::kernels::reference::batch_vector::convergence_add_scaled(
         this->exec, alpha.get(), this->mtx_0.get(), this->mtx_1.get(),
         converged);
 
@@ -378,7 +378,7 @@ TYPED_TEST(BatchDense, ConvergenceAddScaled)
 }
 
 
-TYPED_TEST(BatchDense, AddsScaledWithScalar)
+TYPED_TEST(BatchVector, AddsScaledWithScalar)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -396,7 +396,7 @@ TYPED_TEST(BatchDense, AddsScaledWithScalar)
 }
 
 
-TYPED_TEST(BatchDense, AddsScaleWithScalar)
+TYPED_TEST(BatchVector, AddsScaleWithScalar)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -418,7 +418,7 @@ TYPED_TEST(BatchDense, AddsScaleWithScalar)
 }
 
 
-TYPED_TEST(BatchDense, AddScaleWithScalarViaApply)
+TYPED_TEST(BatchVector, AddScaleWithScalarViaApply)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -441,7 +441,7 @@ TYPED_TEST(BatchDense, AddScaleWithScalarViaApply)
 }
 
 
-TYPED_TEST(BatchDense, ConvergenceAddScaledWithScalar)
+TYPED_TEST(BatchVector, ConvergenceAddScaledWithScalar)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -453,7 +453,7 @@ TYPED_TEST(BatchDense, ConvergenceAddScaledWithScalar)
     const int num_rhs = 3;
     const gko::uint32 converged = 0xfffffffd | (0 - (1 << num_rhs));
 
-    gko::kernels::reference::batch_dense::convergence_add_scaled(
+    gko::kernels::reference::batch_vector::convergence_add_scaled(
         this->exec, alpha.get(), this->mtx_0.get(), this->mtx_1.get(),
         converged);
 
@@ -481,7 +481,7 @@ TYPED_TEST(BatchDense, ConvergenceAddScaledWithScalar)
 }
 
 
-TYPED_TEST(BatchDense, AddScaledFailsOnWrongSizes)
+TYPED_TEST(BatchVector, AddScaledFailsOnWrongSizes)
 {
     using Mtx = typename TestFixture::Mtx;
     auto alpha =
@@ -492,7 +492,7 @@ TYPED_TEST(BatchDense, AddScaledFailsOnWrongSizes)
 }
 
 
-TYPED_TEST(BatchDense, AddScaleFailsOnWrongSizes)
+TYPED_TEST(BatchVector, AddScaleFailsOnWrongSizes)
 {
     using Mtx = typename TestFixture::Mtx;
     auto alpha = gko::batch_initialize<Mtx>({{2.0}, {-2.0}}, this->exec);
@@ -504,7 +504,7 @@ TYPED_TEST(BatchDense, AddScaleFailsOnWrongSizes)
 }
 
 
-TYPED_TEST(BatchDense, AddScaleFailsOnWrongScalarSizes)
+TYPED_TEST(BatchVector, AddScaleFailsOnWrongScalarSizes)
 {
     using Mtx = typename TestFixture::Mtx;
     auto alpha = gko::batch_initialize<Mtx>(
@@ -517,7 +517,7 @@ TYPED_TEST(BatchDense, AddScaleFailsOnWrongScalarSizes)
 }
 
 
-TYPED_TEST(BatchDense, ComputesDot)
+TYPED_TEST(BatchVector, ComputesDot)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -536,7 +536,7 @@ TYPED_TEST(BatchDense, ComputesDot)
 }
 
 
-TYPED_TEST(BatchDense, ConvergenceComputeDot)
+TYPED_TEST(BatchVector, ConvergenceComputeDot)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -555,7 +555,7 @@ TYPED_TEST(BatchDense, ConvergenceComputeDot)
     const int num_rhs = 3;
     const gko::uint32 converged = 0xfffffffd | (0 - (1 << num_rhs));
 
-    gko::kernels::reference::batch_dense::convergence_compute_dot(
+    gko::kernels::reference::batch_vector::convergence_compute_dot(
         this->exec, this->mtx_0.get(), this->mtx_1.get(), result.get(),
         converged);
 
@@ -577,12 +577,12 @@ TYPED_TEST(BatchDense, ConvergenceComputeDot)
 }
 
 
-TYPED_TEST(BatchDense, ComputesNorm2)
+TYPED_TEST(BatchVector, ComputesNorm2)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     using T_nc = gko::remove_complex<T>;
-    using NormVector = gko::matrix::BatchDense<T_nc>;
+    using NormVector = gko::matrix::BatchVector<T_nc>;
     auto mtx(gko::batch_initialize<Mtx>(
         {{I<T>{1.0, 0.0}, I<T>{2.0, 3.0}, I<T>{2.0, 4.0}},
          {I<T>{-4.0, 2.0}, I<T>{-3.0, -2.0}, I<T>{0.0, 1.0}}},
@@ -601,12 +601,12 @@ TYPED_TEST(BatchDense, ComputesNorm2)
 }
 
 
-TYPED_TEST(BatchDense, ConvergenceComputeNorm2)
+TYPED_TEST(BatchVector, ConvergenceComputeNorm2)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     using T_nc = gko::remove_complex<T>;
-    using NormVector = gko::matrix::BatchDense<T_nc>;
+    using NormVector = gko::matrix::BatchVector<T_nc>;
     auto mtx(gko::batch_initialize<Mtx>(
         {{I<T>{1.0, 0.0}, I<T>{2.0, 3.0}, I<T>{2.0, 4.0}},
          {I<T>{-4.0, 2.0}, I<T>{-3.0, -2.0}, I<T>{0.0, 1.0}}},
@@ -628,7 +628,7 @@ TYPED_TEST(BatchDense, ConvergenceComputeNorm2)
     const int num_rhs = 2;
     const gko::uint32 converged = 0xfffffffd | (0 - (1 << num_rhs));
 
-    gko::kernels::reference::batch_dense::convergence_compute_norm2(
+    gko::kernels::reference::batch_vector::convergence_compute_norm2(
         this->exec, mtx.get(), result.get(), converged);
 
     EXPECT_EQ(result->at(0, 0, 0), result_clone->at(0, 0, 0));
@@ -639,7 +639,7 @@ TYPED_TEST(BatchDense, ConvergenceComputeNorm2)
 }
 
 
-TYPED_TEST(BatchDense, ComputDotFailsOnWrongInputSize)
+TYPED_TEST(BatchVector, ComputDotFailsOnWrongInputSize)
 {
     using Mtx = typename TestFixture::Mtx;
     auto result =
@@ -651,7 +651,7 @@ TYPED_TEST(BatchDense, ComputDotFailsOnWrongInputSize)
 }
 
 
-TYPED_TEST(BatchDense, ComputDotFailsOnWrongResultSize)
+TYPED_TEST(BatchVector, ComputDotFailsOnWrongResultSize)
 {
     using Mtx = typename TestFixture::Mtx;
     auto result =
@@ -667,22 +667,22 @@ TYPED_TEST(BatchDense, ComputDotFailsOnWrongResultSize)
 }
 
 
-TYPED_TEST(BatchDense, CopiesData)
+TYPED_TEST(BatchVector, CopiesData)
 {
-    gko::kernels::reference::batch_dense::copy(this->exec, this->mtx_0.get(),
-                                               this->mtx_1.get());
+    gko::kernels::reference::batch_vector::copy(this->exec, this->mtx_0.get(),
+                                                this->mtx_1.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(this->mtx_1.get(), this->mtx_0.get(), 0.);
 }
 
 
-TYPED_TEST(BatchDense, ConvergenceCopyData)
+TYPED_TEST(BatchVector, ConvergenceCopyData)
 {
     auto umtx_0 = this->mtx_0->unbatch();
 
     const int num_rhs = 3;
     const gko::uint32 converged = 0xfffffffd | (0 - (1 << num_rhs));
-    gko::kernels::reference::batch_dense::convergence_copy(
+    gko::kernels::reference::batch_vector::convergence_copy(
         this->exec, this->mtx_0.get(), this->mtx_1.get(), converged);
 
     auto mtx_10_clone = gko::clone(this->mtx_10);
@@ -706,7 +706,7 @@ TYPED_TEST(BatchDense, ConvergenceCopyData)
 }
 
 
-TYPED_TEST(BatchDense, BatchScale)
+TYPED_TEST(BatchVector, BatchScale)
 {
     using T = typename TestFixture::value_type;
     using Mtx = typename TestFixture::Mtx;
@@ -722,8 +722,8 @@ TYPED_TEST(BatchDense, BatchScale)
     auto rght(gko::batch_diagonal_initialize(
         I<I<T>>{I<T>{-0.5, -2.0}, I<T>{2.0, 0.25}}, this->exec));
 
-    gko::kernels::reference::batch_dense::batch_scale(this->exec, left.get(),
-                                                      rght.get(), mtx.get());
+    gko::kernels::reference::batch_vector::batch_scale(this->exec, left.get(),
+                                                       rght.get(), mtx.get());
 
     EXPECT_EQ(mtx->at(0, 0, 0), T{-0.5});
     EXPECT_EQ(mtx->at(0, 1, 0), T{-2.0});
@@ -741,14 +741,14 @@ TYPED_TEST(BatchDense, BatchScale)
 }
 
 
-TYPED_TEST(BatchDense, ConvertsToPrecision)
+TYPED_TEST(BatchVector, ConvertsToPrecision)
 {
-    using BatchDense = typename TestFixture::Mtx;
+    using BatchVector = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     using OtherT = typename gko::next_precision<T>;
-    using OtherBatchDense = typename gko::matrix::BatchDense<OtherT>;
-    auto tmp = OtherBatchDense::create(this->exec);
-    auto res = BatchDense::create(this->exec);
+    using OtherBatchVector = typename gko::matrix::BatchVector<OtherT>;
+    auto tmp = OtherBatchVector::create(this->exec);
+    auto res = BatchVector::create(this->exec);
     // If OtherT is more precise: 0, otherwise r
     auto residual = r<OtherT>::value < r<T>::value
                         ? gko::remove_complex<T>{0}
@@ -764,14 +764,14 @@ TYPED_TEST(BatchDense, ConvertsToPrecision)
 }
 
 
-TYPED_TEST(BatchDense, MovesToPrecision)
+TYPED_TEST(BatchVector, MovesToPrecision)
 {
-    using BatchDense = typename TestFixture::Mtx;
+    using BatchVector = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     using OtherT = typename gko::next_precision<T>;
-    using OtherBatchDense = typename gko::matrix::BatchDense<OtherT>;
-    auto tmp = OtherBatchDense::create(this->exec);
-    auto res = BatchDense::create(this->exec);
+    using OtherBatchVector = typename gko::matrix::BatchVector<OtherT>;
+    auto tmp = OtherBatchVector::create(this->exec);
+    auto res = BatchVector::create(this->exec);
     // If OtherT is more precise: 0, otherwise r
     auto residual = r<OtherT>::value < r<T>::value
                         ? gko::remove_complex<T>{0}
@@ -787,7 +787,7 @@ TYPED_TEST(BatchDense, MovesToPrecision)
 }
 
 
-TYPED_TEST(BatchDense, ConvertsToCsr32)
+TYPED_TEST(BatchVector, ConvertsToCsr32)
 {
     using T = typename TestFixture::value_type;
     using BatchCsr = typename gko::matrix::BatchCsr<T, gko::int32>;
@@ -824,7 +824,7 @@ TYPED_TEST(BatchDense, ConvertsToCsr32)
 }
 
 
-TYPED_TEST(BatchDense, MovesToCsr32)
+TYPED_TEST(BatchVector, MovesToCsr32)
 {
     using T = typename TestFixture::value_type;
     using BatchCsr = typename gko::matrix::BatchCsr<T, gko::int32>;
@@ -861,14 +861,14 @@ TYPED_TEST(BatchDense, MovesToCsr32)
 }
 
 
-TYPED_TEST(BatchDense, ConvertsEmptyToPrecision)
+TYPED_TEST(BatchVector, ConvertsEmptyToPrecision)
 {
-    using BatchDense = typename TestFixture::Mtx;
+    using BatchVector = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     using OtherT = typename gko::next_precision<T>;
-    using OtherBatchDense = typename gko::matrix::BatchDense<OtherT>;
-    auto empty = OtherBatchDense::create(this->exec);
-    auto res = BatchDense::create(this->exec);
+    using OtherBatchVector = typename gko::matrix::BatchVector<OtherT>;
+    auto empty = OtherBatchVector::create(this->exec);
+    auto res = BatchVector::create(this->exec);
 
     empty->convert_to(res.get());
 
@@ -876,14 +876,14 @@ TYPED_TEST(BatchDense, ConvertsEmptyToPrecision)
 }
 
 
-TYPED_TEST(BatchDense, MovesEmptyToPrecision)
+TYPED_TEST(BatchVector, MovesEmptyToPrecision)
 {
-    using BatchDense = typename TestFixture::Mtx;
+    using BatchVector = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     using OtherT = typename gko::next_precision<T>;
-    using OtherBatchDense = typename gko::matrix::BatchDense<OtherT>;
-    auto empty = OtherBatchDense::create(this->exec);
-    auto res = BatchDense::create(this->exec);
+    using OtherBatchVector = typename gko::matrix::BatchVector<OtherT>;
+    auto empty = OtherBatchVector::create(this->exec);
+    auto res = BatchVector::create(this->exec);
 
     empty->move_to(res.get());
 
@@ -891,12 +891,12 @@ TYPED_TEST(BatchDense, MovesEmptyToPrecision)
 }
 
 
-TYPED_TEST(BatchDense, ConvertsEmptyMatrixToCsr)
+TYPED_TEST(BatchVector, ConvertsEmptyMatrixToCsr)
 {
-    using BatchDense = typename TestFixture::Mtx;
+    using BatchVector = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     using BatchCsr = typename gko::matrix::BatchCsr<T, gko::int32>;
-    auto empty = BatchDense::create(this->exec);
+    auto empty = BatchVector::create(this->exec);
     auto res = BatchCsr::create(this->exec);
 
     empty->convert_to(res.get());
@@ -907,12 +907,12 @@ TYPED_TEST(BatchDense, ConvertsEmptyMatrixToCsr)
 }
 
 
-TYPED_TEST(BatchDense, MovesEmptyMatrixToCsr)
+TYPED_TEST(BatchVector, MovesEmptyMatrixToCsr)
 {
-    using BatchDense = typename TestFixture::Mtx;
+    using BatchVector = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     using BatchCsr = typename gko::matrix::BatchCsr<T, gko::int32>;
-    auto empty = BatchDense::create(this->exec);
+    auto empty = BatchVector::create(this->exec);
     auto res = BatchCsr::create(this->exec);
 
     empty->move_to(res.get());
@@ -923,7 +923,7 @@ TYPED_TEST(BatchDense, MovesEmptyMatrixToCsr)
 }
 
 
-TYPED_TEST(BatchDense, ConvertsToBatchDiagonal)
+TYPED_TEST(BatchVector, ConvertsToBatchDiagonal)
 {
     using BDense = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -946,7 +946,7 @@ TYPED_TEST(BatchDense, ConvertsToBatchDiagonal)
 }
 
 
-TYPED_TEST(BatchDense, MovesToBatchDiagonal)
+TYPED_TEST(BatchVector, MovesToBatchDiagonal)
 {
     using BDense = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -967,13 +967,13 @@ TYPED_TEST(BatchDense, MovesToBatchDiagonal)
 }
 
 
-TYPED_TEST(BatchDense, SquareMatrixIsTransposable)
+TYPED_TEST(BatchVector, SquareMatrixIsTransposable)
 {
     using Mtx = typename TestFixture::Mtx;
     auto trans = this->mtx_4->transpose();
-    auto trans_as_batch_dense = static_cast<Mtx*>(trans.get());
+    auto trans_as_batch_vector = static_cast<Mtx*>(trans.get());
 
-    auto utb = trans_as_batch_dense->unbatch();
+    auto utb = trans_as_batch_vector->unbatch();
     GKO_ASSERT_MTX_NEAR(utb[0].get(),
                         l({{1.0, 6.0, 6.0}, {1.5, 1.0, 1.0}, {3.0, 5.0, 5.5}}),
                         r<TypeParam>::value);
@@ -983,13 +983,13 @@ TYPED_TEST(BatchDense, SquareMatrixIsTransposable)
 }
 
 
-TYPED_TEST(BatchDense, NonSquareMatrixIsTransposable)
+TYPED_TEST(BatchVector, NonSquareMatrixIsTransposable)
 {
     using Mtx = typename TestFixture::Mtx;
     auto trans = this->mtx_5->transpose();
-    auto trans_as_batch_dense = static_cast<Mtx*>(trans.get());
+    auto trans_as_batch_vector = static_cast<Mtx*>(trans.get());
 
-    auto utb = trans_as_batch_dense->unbatch();
+    auto utb = trans_as_batch_vector->unbatch();
     GKO_ASSERT_MTX_NEAR(utb[0].get(), l({{1.0, 6.0, 7.0}, {1.5, 1.0, -4.5}}),
                         r<TypeParam>::value);
     GKO_ASSERT_MTX_NEAR(utb[1].get(), l({{2.0, 1.0, 4.0}, {-2.0, 3.0, 3.0}}),
@@ -997,7 +997,7 @@ TYPED_TEST(BatchDense, NonSquareMatrixIsTransposable)
 }
 
 
-TYPED_TEST(BatchDense, SquareMatrixAddScaledIdentity)
+TYPED_TEST(BatchVector, SquareMatrixAddScaledIdentity)
 {
     using T = typename TestFixture::value_type;
     using Mtx = typename TestFixture::Mtx;
diff --git a/test/matrix/batch_vector_kernels.cpp b/test/matrix/batch_vector_kernels.cpp
index 5d275dbea5b..150f02a3772 100644
--- a/test/matrix/batch_vector_kernels.cpp
+++ b/test/matrix/batch_vector_kernels.cpp
@@ -30,7 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include "core/matrix/batch_dense_kernels.hpp"
+#include "core/matrix/batch_vector_kernels.hpp"
 
 
 #include <random>
@@ -41,7 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/array.hpp>
 #include <ginkgo/core/base/math.hpp>
-#include <ginkgo/core/matrix/batch_dense.hpp>
+#include <ginkgo/core/matrix/batch_vector.hpp>
 #include <ginkgo/core/matrix/batch_diagonal.hpp>
 
 
@@ -53,14 +53,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #ifndef GKO_COMPILING_DPCPP
 
 
-class BatchDense : public CommonTestFixture {
+class BatchVector : public CommonTestFixture {
 protected:
     using vtype = double;
-    using Mtx = gko::matrix::BatchDense<vtype>;
-    using NormVector = gko::matrix::BatchDense<gko::remove_complex<vtype>>;
-    using ComplexMtx = gko::matrix::BatchDense<std::complex<vtype>>;
+    using Mtx = gko::matrix::BatchVector<vtype>;
+    using NormVector = gko::matrix::BatchVector<gko::remove_complex<vtype>>;
+    using ComplexMtx = gko::matrix::BatchVector<std::complex<vtype>>;
 
-    BatchDense() : rand_engine(15) {}
+    BatchVector() : rand_engine(15) {}
 
     template <typename MtxType>
     std::unique_ptr<MtxType> gen_mtx(const size_t batchsize, int num_rows,
@@ -145,7 +145,7 @@ class BatchDense : public CommonTestFixture {
 };
 
 
-TEST_F(BatchDense, SingleVectorAppyIsEquivalentToRef)
+TEST_F(BatchVector, SingleVectorAppyIsEquivalentToRef)
 {
     set_up_apply_data(1);
 
@@ -156,7 +156,7 @@ TEST_F(BatchDense, SingleVectorAppyIsEquivalentToRef)
 }
 
 
-TEST_F(BatchDense, SingleVectorAdvancedAppyIsEquivalentToRef)
+TEST_F(BatchVector, SingleVectorAdvancedAppyIsEquivalentToRef)
 {
     set_up_apply_data(1);
 
@@ -167,7 +167,7 @@ TEST_F(BatchDense, SingleVectorAdvancedAppyIsEquivalentToRef)
 }
 
 
-TEST_F(BatchDense, SingleVectorAddScaledIsEquivalentToRef)
+TEST_F(BatchVector, SingleVectorAddScaledIsEquivalentToRef)
 {
     set_up_vector_data(1);
 
@@ -178,7 +178,7 @@ TEST_F(BatchDense, SingleVectorAddScaledIsEquivalentToRef)
 }
 
 
-TEST_F(BatchDense, SingleVectorAddScaleIsEquivalentToRef)
+TEST_F(BatchVector, SingleVectorAddScaleIsEquivalentToRef)
 {
     set_up_vector_data(1);
 
@@ -189,7 +189,7 @@ TEST_F(BatchDense, SingleVectorAddScaleIsEquivalentToRef)
 }
 
 
-TEST_F(BatchDense, MultipleVectorAddScaledIsEquivalentToRef)
+TEST_F(BatchVector, MultipleVectorAddScaledIsEquivalentToRef)
 {
     set_up_vector_data(20);
 
@@ -200,7 +200,7 @@ TEST_F(BatchDense, MultipleVectorAddScaledIsEquivalentToRef)
 }
 
 
-TEST_F(BatchDense, MultipleVectorAddScaleIsEquivalentToRef)
+TEST_F(BatchVector, MultipleVectorAddScaleIsEquivalentToRef)
 {
     set_up_vector_data(20);
 
@@ -211,7 +211,7 @@ TEST_F(BatchDense, MultipleVectorAddScaleIsEquivalentToRef)
 }
 
 
-TEST_F(BatchDense, MultipleVectorAddScaledWithDifferentAlphaIsEquivalentToRef)
+TEST_F(BatchVector, MultipleVectorAddScaledWithDifferentAlphaIsEquivalentToRef)
 {
     set_up_vector_data(20, true);
 
@@ -222,7 +222,7 @@ TEST_F(BatchDense, MultipleVectorAddScaledWithDifferentAlphaIsEquivalentToRef)
 }
 
 
-TEST_F(BatchDense, MultipleVectorAddScaleWithDifferentScalarsIsEquivalentToRef)
+TEST_F(BatchVector, MultipleVectorAddScaleWithDifferentScalarsIsEquivalentToRef)
 {
     set_up_vector_data(20, true);
 
@@ -233,7 +233,7 @@ TEST_F(BatchDense, MultipleVectorAddScaleWithDifferentScalarsIsEquivalentToRef)
 }
 
 
-TEST_F(BatchDense, SingleVectorScaleIsEquivalentToRef)
+TEST_F(BatchVector, SingleVectorScaleIsEquivalentToRef)
 {
     set_up_vector_data(1);
 
@@ -244,7 +244,7 @@ TEST_F(BatchDense, SingleVectorScaleIsEquivalentToRef)
 }
 
 
-TEST_F(BatchDense, MultipleVectorScaleIsEquivalentToRef)
+TEST_F(BatchVector, MultipleVectorScaleIsEquivalentToRef)
 {
     set_up_vector_data(20);
 
@@ -255,7 +255,7 @@ TEST_F(BatchDense, MultipleVectorScaleIsEquivalentToRef)
 }
 
 
-TEST_F(BatchDense, MultipleVectorScaleWithDifferentAlphaIsEquivalentToRef)
+TEST_F(BatchVector, MultipleVectorScaleWithDifferentAlphaIsEquivalentToRef)
 {
     set_up_vector_data(20, true);
 
@@ -266,7 +266,7 @@ TEST_F(BatchDense, MultipleVectorScaleWithDifferentAlphaIsEquivalentToRef)
 }
 
 
-TEST_F(BatchDense, ComputeNorm2SingleIsEquivalentToRef)
+TEST_F(BatchVector, ComputeNorm2SingleIsEquivalentToRef)
 {
     set_up_vector_data(1);
     auto norm_size =
@@ -281,7 +281,7 @@ TEST_F(BatchDense, ComputeNorm2SingleIsEquivalentToRef)
 }
 
 
-TEST_F(BatchDense, ComputeNorm2IsEquivalentToRef)
+TEST_F(BatchVector, ComputeNorm2IsEquivalentToRef)
 {
     set_up_vector_data(20);
     auto norm_size =
@@ -296,7 +296,7 @@ TEST_F(BatchDense, ComputeNorm2IsEquivalentToRef)
 }
 
 
-TEST_F(BatchDense, ComputeDotIsEquivalentToRef)
+TEST_F(BatchVector, ComputeDotIsEquivalentToRef)
 {
     set_up_vector_data(20);
     auto dot_size =
@@ -311,7 +311,7 @@ TEST_F(BatchDense, ComputeDotIsEquivalentToRef)
 }
 
 
-TEST_F(BatchDense, ComputeDotSingleIsEquivalentToRef)
+TEST_F(BatchVector, ComputeDotSingleIsEquivalentToRef)
 {
     set_up_vector_data(1);
     auto dot_size =
@@ -326,31 +326,31 @@ TEST_F(BatchDense, ComputeDotSingleIsEquivalentToRef)
 }
 
 
-TEST_F(BatchDense, CopySingleIsEquivalentToRef)
+TEST_F(BatchVector, CopySingleIsEquivalentToRef)
 {
     set_up_vector_data(1);
 
-    gko::kernels::reference::batch_dense::copy(this->ref, x.get(), y.get());
-    gko::kernels::EXEC_NAMESPACE::batch_dense::copy(this->exec, dx.get(),
+    gko::kernels::reference::batch_vector::copy(this->ref, x.get(), y.get());
+    gko::kernels::EXEC_NAMESPACE::batch_vector::copy(this->exec, dx.get(),
                                                     dy.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(dy, y, 0.0);
 }
 
 
-TEST_F(BatchDense, CopyIsEquivalentToRef)
+TEST_F(BatchVector, CopyIsEquivalentToRef)
 {
     set_up_vector_data(20);
 
-    gko::kernels::reference::batch_dense::copy(this->ref, x.get(), y.get());
-    gko::kernels::EXEC_NAMESPACE::batch_dense::copy(this->exec, dx.get(),
+    gko::kernels::reference::batch_vector::copy(this->ref, x.get(), y.get());
+    gko::kernels::EXEC_NAMESPACE::batch_vector::copy(this->exec, dx.get(),
                                                     dy.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(dy, y, 0.0);
 }
 
 
-TEST_F(BatchDense, BatchScaleIsEquivalentToRef)
+TEST_F(BatchVector, BatchScaleIsEquivalentToRef)
 {
     using BDiag = gko::matrix::BatchDiagonal<vtype>;
     const int num_rhs = 20;
@@ -365,16 +365,16 @@ TEST_F(BatchDense, BatchScaleIsEquivalentToRef)
     auto drght = BDiag::create(this->exec);
     drght->copy_from(rght.get());
 
-    gko::kernels::reference::batch_dense::batch_scale(this->ref, left.get(),
+    gko::kernels::reference::batch_vector::batch_scale(this->ref, left.get(),
                                                       rght.get(), x.get());
-    gko::kernels::EXEC_NAMESPACE::batch_dense::batch_scale(
+    gko::kernels::EXEC_NAMESPACE::batch_vector::batch_scale(
         this->exec, dleft.get(), drght.get(), dx.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(dx, x, 1e-14);
 }
 
 
-TEST_F(BatchDense, TransposeIsEquivalentToRef)
+TEST_F(BatchVector, TransposeIsEquivalentToRef)
 {
     const int nrows = 11;
     const int ncols = 6;
@@ -392,7 +392,7 @@ TEST_F(BatchDense, TransposeIsEquivalentToRef)
 }
 
 
-TEST_F(BatchDense, ConjugateTransposeIsEquivalentToRef)
+TEST_F(BatchVector, ConjugateTransposeIsEquivalentToRef)
 {
     const int nrows = 11;
     const int ncols = 6;
@@ -410,7 +410,7 @@ TEST_F(BatchDense, ConjugateTransposeIsEquivalentToRef)
 }
 
 
-TEST_F(BatchDense, AddScaledIdentityNonSquareIsEquivalentToReference)
+TEST_F(BatchVector, AddScaledIdentityNonSquareIsEquivalentToReference)
 {
     set_up_apply_data();
     const gko::size_type batchsize = 10;

From 69fd4b0d9e95185fde5f546d9eaba5dde8234d42 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Thu, 6 Jul 2023 12:07:36 +0200
Subject: [PATCH 107/583] Remove matrix namespace and use MutliVector

---
 .../batch_multi_vector_kernels.hpp.inc}       |   0
 core/CMakeLists.txt                           |   2 +-
 .../batch_multi_vector.cpp}                   |  74 +++----
 .../batch_multi_vector_kernels.hpp}           |  66 +++---
 core/{matrix => base}/batch_struct.hpp        |  24 +--
 core/device_hooks/common_kernels.inc.cpp      |  14 +-
 core/test/base/CMakeLists.txt                 |   1 +
 .../batch_multi_vector.cpp}                   | 105 +++++-----
 core/test/matrix/CMakeLists.txt               |   1 -
 cuda/CMakeLists.txt                           |   2 +-
 .../batch_multi_vector_kernels.cu}            |  52 ++---
 cuda/{matrix => base}/batch_struct.hpp        |  16 +-
 hip/CMakeLists.txt                            |   2 +-
 .../batch_multi_vector_kernels.hip.cpp}       |  52 ++---
 hip/{matrix => base}/batch_struct.hip.hpp     |  16 +-
 .../batch_multi_vector.hpp}                   | 190 +++++++++---------
 omp/CMakeLists.txt                            |   2 +-
 .../batch_multi_vector_kernels.cpp}           |  50 ++---
 reference/CMakeLists.txt                      |   2 +-
 .../batch_multi_vector_kernels.cpp}           |  52 ++---
 .../batch_multi_vector_kernels.hpp.inc}       |  74 +++----
 reference/{matrix => base}/batch_struct.hpp   |  16 +-
 .../batch_multi_vector_kernels.cpp}           | 170 ++++++++--------
 test/base/CMakeLists.txt                      |   1 +
 .../batch_multi_vector_kernels.cpp}           |  75 ++++---
 25 files changed, 535 insertions(+), 524 deletions(-)
 rename common/cuda_hip/{matrix/batch_vector_kernels.hpp.inc => base/batch_multi_vector_kernels.hpp.inc} (100%)
 rename core/{matrix/batch_vector.cpp => base/batch_multi_vector.cpp} (73%)
 rename core/{matrix/batch_vector_kernels.hpp => base/batch_multi_vector_kernels.hpp} (53%)
 rename core/{matrix => base}/batch_struct.hpp (85%)
 rename core/test/{matrix/batch_vector.cpp => base/batch_multi_vector.cpp} (83%)
 rename cuda/{matrix/batch_vector_kernels.cu => base/batch_multi_vector_kernels.cu} (78%)
 rename cuda/{matrix => base}/batch_struct.hpp (88%)
 rename hip/{matrix/batch_vector_kernels.hip.cpp => base/batch_multi_vector_kernels.hip.cpp} (80%)
 rename hip/{matrix => base}/batch_struct.hip.hpp (88%)
 rename include/ginkgo/core/{matrix/batch_vector.hpp => base/batch_multi_vector.hpp} (85%)
 rename omp/{matrix/batch_vector_kernels.cpp => base/batch_multi_vector_kernels.cpp} (77%)
 rename reference/{matrix/batch_vector_kernels.cpp => base/batch_multi_vector_kernels.cpp} (77%)
 rename reference/{matrix/batch_vector_kernels.hpp.inc => base/batch_multi_vector_kernels.hpp.inc} (80%)
 rename reference/{matrix => base}/batch_struct.hpp (88%)
 rename reference/test/{matrix/batch_vector_kernels.cpp => base/batch_multi_vector_kernels.cpp} (87%)
 rename test/{matrix/batch_vector_kernels.cpp => base/batch_multi_vector_kernels.cpp} (81%)

diff --git a/common/cuda_hip/matrix/batch_vector_kernels.hpp.inc b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
similarity index 100%
rename from common/cuda_hip/matrix/batch_vector_kernels.hpp.inc
rename to common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
index 03d558562dc..d224a7e0f90 100644
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -4,6 +4,7 @@ add_library(ginkgo "")
 target_sources(ginkgo
     PRIVATE
     base/array.cpp
+    base/batch_multi_vector.cpp
     base/combination.cpp
     base/composition.cpp
     base/dense_cache.cpp
@@ -38,7 +39,6 @@ target_sources(ginkgo
     log/vtune.cpp
     log/record.cpp
     log/stream.cpp
-    matrix/batch_vector.cpp
     matrix/coo.cpp
     matrix/csr.cpp
     matrix/dense.cpp
diff --git a/core/matrix/batch_vector.cpp b/core/base/batch_multi_vector.cpp
similarity index 73%
rename from core/matrix/batch_vector.cpp
rename to core/base/batch_multi_vector.cpp
index abacd9b1cd8..76639494088 100644
--- a/core/matrix/batch_vector.cpp
+++ b/core/base/batch_multi_vector.cpp
@@ -30,7 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include <ginkgo/core/matrix/batch_vector.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
 
 
 #include <algorithm>
@@ -45,28 +45,28 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/utils.hpp>
 
 
-#include "core/matrix/batch_vector_kernels.hpp"
+#include "core/base/batch_multi_vector_kernels.hpp"
 
 
 namespace gko {
 namespace matrix {
-namespace batch_vector {
+namespace batch_multi_vector {
 
 
-GKO_REGISTER_OPERATION(scale, batch_vector::scale);
-GKO_REGISTER_OPERATION(add_scaled, batch_vector::add_scaled);
-GKO_REGISTER_OPERATION(compute_dot, batch_vector::compute_dot);
-GKO_REGISTER_OPERATION(compute_norm2, batch_vector::compute_norm2);
-GKO_REGISTER_OPERATION(copy, batch_vector::copy);
+GKO_REGISTER_OPERATION(scale, batch_multi_vector::scale);
+GKO_REGISTER_OPERATION(add_scaled, batch_multi_vector::add_scaled);
+GKO_REGISTER_OPERATION(compute_dot, batch_multi_vector::compute_dot);
+GKO_REGISTER_OPERATION(compute_norm2, batch_multi_vector::compute_norm2);
+GKO_REGISTER_OPERATION(copy, batch_multi_vector::copy);
 
 
-}  // namespace batch_vector
+}  // namespace batch_multi_vector
 
 
 template <typename ValueType>
-void BatchVector<ValueType>::scale_impl(const BatchLinOp* alpha)
+void BatchMultiVector<ValueType>::scale_impl(const BatchLinOp* alpha)
 {
-    auto batch_alpha = as<BatchVector<ValueType>>(alpha);
+    auto batch_alpha = as<BatchMultiVector<ValueType>>(alpha);
     GKO_ASSERT_BATCH_EQUAL_ROWS(
         batch_alpha, batch_dim<2>(this->get_num_batch_entries(), dim<2>(1, 1)));
     for (size_type b = 0; b < batch_alpha->get_num_batch_entries(); ++b) {
@@ -76,16 +76,16 @@ void BatchVector<ValueType>::scale_impl(const BatchLinOp* alpha)
         }
     }
     auto exec = this->get_executor();
-    exec->run(batch_vector::make_scale(batch_alpha, this));
+    exec->run(batch_multi_vector::make_scale(batch_alpha, this));
 }
 
 
 template <typename ValueType>
-void BatchVector<ValueType>::add_scaled_impl(const BatchLinOp* alpha,
-                                             const BatchLinOp* b)
+void BatchMultiVector<ValueType>::add_scaled_impl(const BatchLinOp* alpha,
+                                                  const BatchLinOp* b)
 {
-    auto batch_alpha = as<BatchVector<ValueType>>(alpha);
-    auto batch_b = as<BatchVector<ValueType>>(b);
+    auto batch_alpha = as<BatchMultiVector<ValueType>>(alpha);
+    auto batch_b = as<BatchMultiVector<ValueType>>(b);
     GKO_ASSERT_BATCH_EQUAL_ROWS(
         batch_alpha, batch_dim<2>(this->get_num_batch_entries(), dim<2>(1, 1)));
     for (size_type b = 0; b < batch_alpha->get_num_batch_entries(); ++b) {
@@ -97,7 +97,7 @@ void BatchVector<ValueType>::add_scaled_impl(const BatchLinOp* alpha,
     GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(this, batch_b);
     auto exec = this->get_executor();
 
-    exec->run(batch_vector::make_add_scaled(batch_alpha, batch_b, this));
+    exec->run(batch_multi_vector::make_add_scaled(batch_alpha, batch_b, this));
 }
 
 
@@ -112,35 +112,36 @@ inline const batch_dim<2> get_col_sizes(const batch_dim<2>& sizes)
 
 
 template <typename ValueType>
-void BatchVector<ValueType>::compute_dot_impl(const BatchLinOp* b,
-                                              BatchLinOp* result) const
+void BatchMultiVector<ValueType>::compute_dot_impl(const BatchLinOp* b,
+                                                   BatchLinOp* result) const
 {
-    auto batch_result = as<BatchVector<ValueType>>(result);
-    auto batch_b = as<BatchVector<ValueType>>(b);
+    auto batch_result = as<BatchMultiVector<ValueType>>(result);
+    auto batch_b = as<BatchMultiVector<ValueType>>(b);
     GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(this, batch_b);
     GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(batch_result,
                                       get_col_sizes(this->get_size()));
     auto exec = this->get_executor();
-    exec->run(batch_vector::make_compute_dot(this, batch_b, batch_result));
+    exec->run(
+        batch_multi_vector::make_compute_dot(this, batch_b, batch_result));
 }
 
 
 template <typename ValueType>
-void BatchVector<ValueType>::compute_norm2_impl(BatchLinOp* result) const
+void BatchMultiVector<ValueType>::compute_norm2_impl(BatchLinOp* result) const
 {
-    using NormVector = BatchVector<remove_complex<ValueType>>;
+    using NormVector = BatchMultiVector<remove_complex<ValueType>>;
     auto batch_result = as<NormVector>(result);
     GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(batch_result,
                                       get_col_sizes(this->get_size()));
     auto exec = this->get_executor();
-    exec->run(batch_vector::make_compute_norm2(as<BatchVector<ValueType>>(this),
-                                               batch_result));
+    exec->run(batch_multi_vector::make_compute_norm2(
+        as<BatchMultiVector<ValueType>>(this), batch_result));
 }
 
 
 template <typename ValueType>
-void BatchVector<ValueType>::convert_to(
-    BatchVector<next_precision<ValueType>>* result) const
+void BatchMultiVector<ValueType>::convert_to(
+    BatchMultiVector<next_precision<ValueType>>* result) const
 {
     result->values_ = this->values_;
     result->stride_ = this->stride_;
@@ -150,8 +151,8 @@ void BatchVector<ValueType>::convert_to(
 
 
 template <typename ValueType>
-void BatchVector<ValueType>::move_to(
-    BatchVector<next_precision<ValueType>>* result)
+void BatchMultiVector<ValueType>::move_to(
+    BatchMultiVector<next_precision<ValueType>>* result)
 {
     this->convert_to(result);
 }
@@ -189,14 +190,14 @@ inline void read_impl(MatrixType* mtx, const std::vector<MatrixData>& data)
 
 
 template <typename ValueType>
-void BatchVector<ValueType>::read(const std::vector<mat_data>& data)
+void BatchMultiVector<ValueType>::read(const std::vector<mat_data>& data)
 {
     read_impl(this, data);
 }
 
 
 template <typename ValueType>
-void BatchVector<ValueType>::read(const std::vector<mat_data32>& data)
+void BatchMultiVector<ValueType>::read(const std::vector<mat_data32>& data)
 {
     read_impl(this, data);
 }
@@ -231,21 +232,22 @@ inline void write_impl(const MatrixType* mtx, std::vector<MatrixData>& data)
 
 
 template <typename ValueType>
-void BatchVector<ValueType>::write(std::vector<mat_data>& data) const
+void BatchMultiVector<ValueType>::write(std::vector<mat_data>& data) const
 {
     write_impl(this, data);
 }
 
 
 template <typename ValueType>
-void BatchVector<ValueType>::write(std::vector<mat_data32>& data) const
+void BatchMultiVector<ValueType>::write(std::vector<mat_data32>& data) const
 {
     write_impl(this, data);
 }
 
 
-#define GKO_DECLARE_BATCH_VECTOR_MATRIX(_type) class BatchVector<_type>
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_MATRIX);
+#define GKO_DECLARE_BATCH_MULTI_VECTOR_MATRIX(_type) \
+    class BatchMultiVector<_type>
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_MATRIX);
 
 
 }  // namespace matrix
diff --git a/core/matrix/batch_vector_kernels.hpp b/core/base/batch_multi_vector_kernels.hpp
similarity index 53%
rename from core/matrix/batch_vector_kernels.hpp
rename to core/base/batch_multi_vector_kernels.hpp
index 6ddfc9e2676..34da4ce4c2f 100644
--- a/core/matrix/batch_vector_kernels.hpp
+++ b/core/base/batch_multi_vector_kernels.hpp
@@ -30,11 +30,11 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#ifndef GKO_CORE_MATRIX_BATCH_VECTOR_KERNELS_HPP_
-#define GKO_CORE_MATRIX_BATCH_VECTOR_KERNELS_HPP_
+#ifndef GKO_CORE_MATRIX_BATCH_MULTI_VECTOR_KERNELS_HPP_
+#define GKO_CORE_MATRIX_BATCH_MULTI_VECTOR_KERNELS_HPP_
 
 
-#include <ginkgo/core/matrix/batch_vector.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
 
 
 #include <ginkgo/core/base/math.hpp>
@@ -46,48 +46,48 @@ namespace gko {
 namespace kernels {
 
 
-#define GKO_DECLARE_BATCH_VECTOR_SCALE_KERNEL(_type)        \
+#define GKO_DECLARE_BATCH_MULTI_VECTOR_SCALE_KERNEL(_type)  \
     void scale(std::shared_ptr<const DefaultExecutor> exec, \
-               const matrix::BatchVector<_type>* alpha,     \
-               matrix::BatchVector<_type>* x)
+               const BatchMultiVector<_type>* alpha,        \
+               BatchMultiVector<_type>* x)
 
-#define GKO_DECLARE_BATCH_VECTOR_ADD_SCALED_KERNEL(_type)        \
+#define GKO_DECLARE_BATCH_MULTI_VECTOR_ADD_SCALED_KERNEL(_type)  \
     void add_scaled(std::shared_ptr<const DefaultExecutor> exec, \
-                    const matrix::BatchVector<_type>* alpha,     \
-                    const matrix::BatchVector<_type>* x,         \
-                    matrix::BatchVector<_type>* y)
+                    const BatchMultiVector<_type>* alpha,        \
+                    const BatchMultiVector<_type>* x,            \
+                    BatchMultiVector<_type>* y)
 
-#define GKO_DECLARE_BATCH_VECTOR_COMPUTE_DOT_KERNEL(_type)        \
+#define GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_DOT_KERNEL(_type)  \
     void compute_dot(std::shared_ptr<const DefaultExecutor> exec, \
-                     const matrix::BatchVector<_type>* x,         \
-                     const matrix::BatchVector<_type>* y,         \
-                     matrix::BatchVector<_type>* result)
+                     const BatchMultiVector<_type>* x,            \
+                     const BatchMultiVector<_type>* y,            \
+                     BatchMultiVector<_type>* result)
 
-#define GKO_DECLARE_BATCH_VECTOR_COMPUTE_NORM2_KERNEL(_type)        \
+#define GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_NORM2_KERNEL(_type)  \
     void compute_norm2(std::shared_ptr<const DefaultExecutor> exec, \
-                       const matrix::BatchVector<_type>* x,         \
-                       matrix::BatchVector<remove_complex<_type>>* result)
+                       const BatchMultiVector<_type>* x,            \
+                       BatchMultiVector<remove_complex<_type>>* result)
 
-#define GKO_DECLARE_BATCH_VECTOR_COPY_KERNEL(_type)        \
+#define GKO_DECLARE_BATCH_MULTI_VECTOR_COPY_KERNEL(_type)  \
     void copy(std::shared_ptr<const DefaultExecutor> exec, \
-              const matrix::BatchVector<_type>* x,         \
-              matrix::BatchVector<_type>* result)
+              const BatchMultiVector<_type>* x,            \
+              BatchMultiVector<_type>* result)
 
 
-#define GKO_DECLARE_ALL_AS_TEMPLATES                          \
-    template <typename ValueType>                             \
-    GKO_DECLARE_BATCH_VECTOR_SCALE_KERNEL(ValueType);         \
-    template <typename ValueType>                             \
-    GKO_DECLARE_BATCH_VECTOR_ADD_SCALED_KERNEL(ValueType);    \
-    template <typename ValueType>                             \
-    GKO_DECLARE_BATCH_VECTOR_COMPUTE_DOT_KERNEL(ValueType);   \
-    template <typename ValueType>                             \
-    GKO_DECLARE_BATCH_VECTOR_COMPUTE_NORM2_KERNEL(ValueType); \
-    template <typename ValueType>                             \
-    GKO_DECLARE_BATCH_VECTOR_COPY_KERNEL(ValueType)
+#define GKO_DECLARE_ALL_AS_TEMPLATES                                \
+    template <typename ValueType>                                   \
+    GKO_DECLARE_BATCH_MULTI_VECTOR_SCALE_KERNEL(ValueType);         \
+    template <typename ValueType>                                   \
+    GKO_DECLARE_BATCH_MULTI_VECTOR_ADD_SCALED_KERNEL(ValueType);    \
+    template <typename ValueType>                                   \
+    GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_DOT_KERNEL(ValueType);   \
+    template <typename ValueType>                                   \
+    GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_NORM2_KERNEL(ValueType); \
+    template <typename ValueType>                                   \
+    GKO_DECLARE_BATCH_MULTI_VECTOR_COPY_KERNEL(ValueType)
 
 
-GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(batch_vector,
+GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(batch_multi_vector,
                                         GKO_DECLARE_ALL_AS_TEMPLATES);
 
 
@@ -98,4 +98,4 @@ GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(batch_vector,
 }  // namespace gko
 
 
-#endif  // GKO_CORE_MATRIX_BATCH_VECTOR_KERNELS_HPP_
+#endif  // GKO_CORE_MATRIX_BATCH_MULTI_VECTOR_KERNELS_HPP_
diff --git a/core/matrix/batch_struct.hpp b/core/base/batch_struct.hpp
similarity index 85%
rename from core/matrix/batch_struct.hpp
rename to core/base/batch_struct.hpp
index 01092f0e4d0..68fcdd9c8a0 100644
--- a/core/matrix/batch_struct.hpp
+++ b/core/base/batch_struct.hpp
@@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#ifndef GKO_CORE_MATRIX_BATCH_STRUCT_HPP_
-#define GKO_CORE_MATRIX_BATCH_STRUCT_HPP_
+#ifndef GKO_CORE_BASE_BATCH_STRUCT_HPP_
+#define GKO_CORE_BASE_BATCH_STRUCT_HPP_
 
 
 #include <ginkgo/core/base/array.hpp>
@@ -40,7 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 namespace gko {
-namespace batch_vector {
+namespace batch_multi_vector {
 
 
 /**
@@ -77,23 +77,23 @@ struct UniformBatch {
 };
 
 
-}  // namespace batch_vector
+}  // namespace batch_multi_vector
 
 
 namespace batch {
 
 
 template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE gko::batch_vector::BatchEntry<const ValueType>
-to_const(const gko::batch_vector::BatchEntry<ValueType>& b)
+GKO_ATTRIBUTES GKO_INLINE gko::batch_multi_vector::BatchEntry<const ValueType>
+to_const(const gko::batch_multi_vector::BatchEntry<ValueType>& b)
 {
     return {b.values, b.stride, b.num_rows, b.num_rhs};
 }
 
 
 template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE gko::batch_vector::UniformBatch<const ValueType>
-to_const(const gko::batch_vector::UniformBatch<ValueType>& ub)
+GKO_ATTRIBUTES GKO_INLINE gko::batch_multi_vector::UniformBatch<const ValueType>
+to_const(const gko::batch_multi_vector::UniformBatch<ValueType>& ub)
 {
     return {ub.values, ub.num_batch, ub.stride, ub.num_rows, ub.num_rhs};
 }
@@ -109,8 +109,8 @@ to_const(const gko::batch_vector::UniformBatch<ValueType>& ub)
  * @param batch_idx  The position of the desired object in the batch
  */
 template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE batch_vector::BatchEntry<ValueType> batch_entry(
-    const batch_vector::UniformBatch<ValueType>& batch,
+GKO_ATTRIBUTES GKO_INLINE batch_multi_vector::BatchEntry<ValueType> batch_entry(
+    const batch_multi_vector::UniformBatch<ValueType>& batch,
     const size_type batch_idx)
 {
     return {batch.values + batch_idx * batch.stride * batch.num_rows,
@@ -118,7 +118,7 @@ GKO_ATTRIBUTES GKO_INLINE batch_vector::BatchEntry<ValueType> batch_entry(
 }
 
 template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE batch_vector::BatchEntry<ValueType> batch_entry(
+GKO_ATTRIBUTES GKO_INLINE batch_multi_vector::BatchEntry<ValueType> batch_entry(
     ValueType* const batch_values, const size_type stride, const int num_rows,
     const int num_rhs, const size_type batch_idx)
 {
@@ -140,4 +140,4 @@ GKO_ATTRIBUTES GKO_INLINE ValueType* batch_entry_ptr(
 
 }  // namespace gko
 
-#endif  // GKO_CORE_MATRIX_BATCH_STRUCT_HPP_
+#endif  // GKO_CORE_BASE_BATCH_STRUCT_HPP_
diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp
index a5aa43100a3..3fe1372558b 100644
--- a/core/device_hooks/common_kernels.inc.cpp
+++ b/core/device_hooks/common_kernels.inc.cpp
@@ -272,17 +272,17 @@ GKO_STUB_VALUE_AND_LOCAL_GLOBAL_INDEX_TYPE(GKO_DECLARE_BUILD_LOCAL_NONLOCAL);
 }  // namespace distributed_matrix
 
 
-namespace batch_vector {
+namespace batch_multi_vector {
 
 
-GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_SCALE_KERNEL);
-GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_ADD_SCALED_KERNEL);
-GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_COMPUTE_DOT_KERNEL);
-GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_COMPUTE_NORM2_KERNEL);
-GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_COPY_KERNEL);
+GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_SCALE_KERNEL);
+GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_ADD_SCALED_KERNEL);
+GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_DOT_KERNEL);
+GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_NORM2_KERNEL);
+GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_COPY_KERNEL);
 
 
-}  // namespace batch_vector
+}  // namespace batch_multi_vector
 
 
 namespace dense {
diff --git a/core/test/base/CMakeLists.txt b/core/test/base/CMakeLists.txt
index aa79ca3ed92..f51862e8244 100644
--- a/core/test/base/CMakeLists.txt
+++ b/core/test/base/CMakeLists.txt
@@ -1,6 +1,7 @@
 ginkgo_create_test(abstract_factory)
 ginkgo_create_test(allocator)
 ginkgo_create_test(array)
+ginkgo_create_test(batch_multi_vector)
 ginkgo_create_test(dense_cache)
 ginkgo_create_test(combination)
 ginkgo_create_test(composition)
diff --git a/core/test/matrix/batch_vector.cpp b/core/test/base/batch_multi_vector.cpp
similarity index 83%
rename from core/test/matrix/batch_vector.cpp
rename to core/test/base/batch_multi_vector.cpp
index 4735d5eead2..e43be1e7b86 100644
--- a/core/test/matrix/batch_vector.cpp
+++ b/core/test/base/batch_multi_vector.cpp
@@ -30,7 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include <ginkgo/core/matrix/batch_vector.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
 
 
 #include <gtest/gtest.h>
@@ -45,14 +45,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 template <typename T>
-class BatchVector : public ::testing::Test {
+class BatchMultiVector : public ::testing::Test {
 protected:
     using value_type = T;
     using DenseMtx = gko::matrix::Dense<value_type>;
     using size_type = gko::size_type;
-    BatchVector()
+    BatchMultiVector()
         : exec(gko::ReferenceExecutor::create()),
-          mtx(gko::batch_initialize<gko::matrix::BatchVector<value_type>>(
+          mtx(gko::batch_initialize<gko::BatchMultiVector<value_type>>(
               std::vector<size_type>{4, 3},
               {{{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
                {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}},
@@ -61,7 +61,7 @@ class BatchVector : public ::testing::Test {
 
 
     static void assert_equal_to_original_mtx(
-        gko::matrix::BatchVector<value_type>* m)
+        gko::BatchMultiVector<value_type>* m)
     {
         ASSERT_EQ(m->get_num_batch_entries(), 2);
         ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3));
@@ -85,37 +85,37 @@ class BatchVector : public ::testing::Test {
         ASSERT_EQ(m->at(1, 1, 2), value_type{3.0});
     }
 
-    static void assert_empty(gko::matrix::BatchVector<value_type>* m)
+    static void assert_empty(gko::BatchMultiVector<value_type>* m)
     {
         ASSERT_EQ(m->get_num_batch_entries(), 0);
         ASSERT_EQ(m->get_num_stored_elements(), 0);
     }
 
     std::shared_ptr<const gko::Executor> exec;
-    std::unique_ptr<gko::matrix::BatchVector<value_type>> mtx;
+    std::unique_ptr<gko::BatchMultiVector<value_type>> mtx;
 };
 
-TYPED_TEST_SUITE(BatchVector, gko::test::ValueTypes);
+TYPED_TEST_SUITE(BatchMultiVector, gko::test::ValueTypes);
 
 
-TYPED_TEST(BatchVector, CanBeEmpty)
+TYPED_TEST(BatchMultiVector, CanBeEmpty)
 {
-    auto empty = gko::matrix::BatchVector<TypeParam>::create(this->exec);
+    auto empty = gko::BatchMultiVector<TypeParam>::create(this->exec);
     this->assert_empty(empty.get());
 }
 
 
-TYPED_TEST(BatchVector, ReturnsNullValuesArrayWhenEmpty)
+TYPED_TEST(BatchMultiVector, ReturnsNullValuesArrayWhenEmpty)
 {
-    auto empty = gko::matrix::BatchVector<TypeParam>::create(this->exec);
+    auto empty = gko::BatchMultiVector<TypeParam>::create(this->exec);
     ASSERT_EQ(empty->get_const_values(), nullptr);
 }
 
 
-TYPED_TEST(BatchVector, CanBeConstructedWithSize)
+TYPED_TEST(BatchMultiVector, CanBeConstructedWithSize)
 {
     using size_type = gko::size_type;
-    auto m = gko::matrix::BatchVector<TypeParam>::create(
+    auto m = gko::BatchMultiVector<TypeParam>::create(
         this->exec,
         std::vector<gko::dim<2>>{gko::dim<2>{2, 4}, gko::dim<2>{2, 3}});
 
@@ -130,10 +130,10 @@ TYPED_TEST(BatchVector, CanBeConstructedWithSize)
 }
 
 
-TYPED_TEST(BatchVector, CanBeConstructedWithSizeAndStride)
+TYPED_TEST(BatchMultiVector, CanBeConstructedWithSizeAndStride)
 {
     using size_type = gko::size_type;
-    auto m = gko::matrix::BatchVector<TypeParam>::create(
+    auto m = gko::BatchMultiVector<TypeParam>::create(
         this->exec, std::vector<gko::dim<2>>{gko::dim<2>{2, 3}},
         std::vector<size_type>{4});
 
@@ -143,7 +143,7 @@ TYPED_TEST(BatchVector, CanBeConstructedWithSizeAndStride)
 }
 
 
-TYPED_TEST(BatchVector, CanBeConstructedFromExistingData)
+TYPED_TEST(BatchMultiVector, CanBeConstructedFromExistingData)
 {
     using value_type = typename TestFixture::value_type;
     using size_type = gko::size_type;
@@ -155,7 +155,7 @@ TYPED_TEST(BatchVector, CanBeConstructedFromExistingData)
        5.0, 6.0, -3.0};
     // clang-format on
 
-    auto m = gko::matrix::BatchVector<TypeParam>::create(
+    auto m = gko::BatchMultiVector<TypeParam>::create(
         this->exec,
         std::vector<gko::dim<2>>{gko::dim<2>{2, 2}, gko::dim<2>{2, 2}},
         gko::array<value_type>::view(this->exec, 12, data),
@@ -169,7 +169,7 @@ TYPED_TEST(BatchVector, CanBeConstructedFromExistingData)
 }
 
 
-TYPED_TEST(BatchVector, CanBeConstructedFromExistingConstData)
+TYPED_TEST(BatchMultiVector, CanBeConstructedFromExistingConstData)
 {
     using value_type = typename TestFixture::value_type;
     using size_type = gko::size_type;
@@ -181,7 +181,7 @@ TYPED_TEST(BatchVector, CanBeConstructedFromExistingConstData)
        5.0, 6.0, -3.0};
     // clang-format on
 
-    auto m = gko::matrix::BatchVector<TypeParam>::create_const(
+    auto m = gko::BatchMultiVector<TypeParam>::create_const(
         this->exec,
         std::vector<gko::dim<2>>{gko::dim<2>{2, 2}, gko::dim<2>{2, 2}},
         gko::array<value_type>::const_view(this->exec, 12, data),
@@ -195,7 +195,7 @@ TYPED_TEST(BatchVector, CanBeConstructedFromExistingConstData)
 }
 
 
-TYPED_TEST(BatchVector, CanBeConstructedFromBatchVectorMatrices)
+TYPED_TEST(BatchMultiVector, CanBeConstructedFromBatchMultiVectorMatrices)
 {
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
@@ -205,19 +205,18 @@ TYPED_TEST(BatchVector, CanBeConstructedFromBatchVectorMatrices)
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
 
-    auto m = gko::matrix::BatchVector<TypeParam>::create(
+    auto m = gko::BatchMultiVector<TypeParam>::create(
         this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get()});
-    auto m_ref = gko::matrix::BatchVector<TypeParam>::create(
+    auto m_ref = gko::BatchMultiVector<TypeParam>::create(
         this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get(), mat1.get(),
                                            mat2.get(), mat1.get(), mat2.get()});
-    auto m2 =
-        gko::matrix::BatchVector<TypeParam>::create(this->exec, 3, m.get());
+    auto m2 = gko::BatchMultiVector<TypeParam>::create(this->exec, 3, m.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 1e-14);
 }
 
 
-TYPED_TEST(BatchVector, CanBeConstructedFromDenseMatricesByDuplication)
+TYPED_TEST(BatchMultiVector, CanBeConstructedFromDenseMatricesByDuplication)
 {
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
@@ -227,16 +226,16 @@ TYPED_TEST(BatchVector, CanBeConstructedFromDenseMatricesByDuplication)
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
 
-    auto bat_m = gko::matrix::BatchVector<TypeParam>::create(
+    auto bat_m = gko::BatchMultiVector<TypeParam>::create(
         this->exec, std::vector<DenseMtx*>{mat1.get(), mat1.get(), mat1.get()});
     auto m =
-        gko::matrix::BatchVector<TypeParam>::create(this->exec, 3, mat1.get());
+        gko::BatchMultiVector<TypeParam>::create(this->exec, 3, mat1.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(bat_m.get(), m.get(), 1e-14);
 }
 
 
-TYPED_TEST(BatchVector, CanBeConstructedFromDenseMatrices)
+TYPED_TEST(BatchMultiVector, CanBeConstructedFromDenseMatrices)
 {
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
@@ -246,14 +245,14 @@ TYPED_TEST(BatchVector, CanBeConstructedFromDenseMatrices)
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
 
-    auto m = gko::matrix::BatchVector<TypeParam>::create(
+    auto m = gko::BatchMultiVector<TypeParam>::create(
         this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get()});
 
     this->assert_equal_to_original_mtx(m.get());
 }
 
 
-TYPED_TEST(BatchVector, CanBeUnbatchedIntoDenseMatrices)
+TYPED_TEST(BatchMultiVector, CanBeUnbatchedIntoDenseMatrices)
 {
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
@@ -271,16 +270,16 @@ TYPED_TEST(BatchVector, CanBeUnbatchedIntoDenseMatrices)
 }
 
 
-TYPED_TEST(BatchVector, KnowsItsSizeAndValues)
+TYPED_TEST(BatchMultiVector, KnowsItsSizeAndValues)
 {
     this->assert_equal_to_original_mtx(this->mtx.get());
 }
 
 
-TYPED_TEST(BatchVector, CanBeListConstructed)
+TYPED_TEST(BatchMultiVector, CanBeListConstructed)
 {
     using value_type = typename TestFixture::value_type;
-    auto m = gko::batch_initialize<gko::matrix::BatchVector<TypeParam>>(
+    auto m = gko::batch_initialize<gko::BatchMultiVector<TypeParam>>(
         {{1.0, 2.0}, {1.0, 3.0}}, this->exec);
 
     ASSERT_EQ(m->get_num_batch_entries(), 2);
@@ -294,10 +293,10 @@ TYPED_TEST(BatchVector, CanBeListConstructed)
 }
 
 
-TYPED_TEST(BatchVector, CanBeListConstructedWithstride)
+TYPED_TEST(BatchMultiVector, CanBeListConstructedWithstride)
 {
     using value_type = typename TestFixture::value_type;
-    auto m = gko::batch_initialize<gko::matrix::BatchVector<TypeParam>>(
+    auto m = gko::batch_initialize<gko::BatchMultiVector<TypeParam>>(
         std::vector<gko::size_type>{2}, {{1.0, 2.0}}, this->exec);
     ASSERT_EQ(m->get_num_batch_entries(), 1);
     ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 1));
@@ -307,10 +306,10 @@ TYPED_TEST(BatchVector, CanBeListConstructedWithstride)
 }
 
 
-TYPED_TEST(BatchVector, CanBeListConstructedByCopies)
+TYPED_TEST(BatchMultiVector, CanBeListConstructedByCopies)
 {
     using value_type = typename TestFixture::value_type;
-    auto m = gko::batch_initialize<gko::matrix::BatchVector<TypeParam>>(
+    auto m = gko::batch_initialize<gko::BatchMultiVector<TypeParam>>(
         2, I<value_type>({1.0, 2.0}), this->exec);
     ASSERT_EQ(m->get_num_batch_entries(), 2);
     ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 1));
@@ -323,11 +322,11 @@ TYPED_TEST(BatchVector, CanBeListConstructedByCopies)
 }
 
 
-TYPED_TEST(BatchVector, CanBeDoubleListConstructed)
+TYPED_TEST(BatchMultiVector, CanBeDoubleListConstructed)
 {
     using value_type = typename TestFixture::value_type;
     using T = value_type;
-    auto m = gko::batch_initialize<gko::matrix::BatchVector<TypeParam>>(
+    auto m = gko::batch_initialize<gko::BatchMultiVector<TypeParam>>(
         {{I<T>{1.0, 1.0, 0.0}, I<T>{2.0, 4.0, 3.0}, I<T>{3.0, 6.0, 1.0}},
          {I<T>{1.0, 2.0}, I<T>{3.0, 4.0}, I<T>{5.0, 6.0}}},
         this->exec);
@@ -352,11 +351,11 @@ TYPED_TEST(BatchVector, CanBeDoubleListConstructed)
 }
 
 
-TYPED_TEST(BatchVector, CanBeDoubleListConstructedWithstride)
+TYPED_TEST(BatchMultiVector, CanBeDoubleListConstructedWithstride)
 {
     using value_type = typename TestFixture::value_type;
     using T = value_type;
-    auto m = gko::batch_initialize<gko::matrix::BatchVector<TypeParam>>(
+    auto m = gko::batch_initialize<gko::BatchMultiVector<TypeParam>>(
         {4, 3},
         {{I<T>{1.0, 1.0, 0.0}, I<T>{2.0, 4.0, 3.0}, I<T>{3.0, 6.0, 1.0}},
          {I<T>{1.0, 2.0}, I<T>{3.0, 4.0}, I<T>{5.0, 6.0}}},
@@ -382,9 +381,9 @@ TYPED_TEST(BatchVector, CanBeDoubleListConstructedWithstride)
 }
 
 
-TYPED_TEST(BatchVector, CanBeCopied)
+TYPED_TEST(BatchMultiVector, CanBeCopied)
 {
-    auto mtx_copy = gko::matrix::BatchVector<TypeParam>::create(this->exec);
+    auto mtx_copy = gko::BatchMultiVector<TypeParam>::create(this->exec);
     mtx_copy->copy_from(this->mtx.get());
     this->assert_equal_to_original_mtx(this->mtx.get());
     this->mtx->at(0, 0, 0) = 7;
@@ -393,15 +392,15 @@ TYPED_TEST(BatchVector, CanBeCopied)
 }
 
 
-TYPED_TEST(BatchVector, CanBeMoved)
+TYPED_TEST(BatchMultiVector, CanBeMoved)
 {
-    auto mtx_copy = gko::matrix::BatchVector<TypeParam>::create(this->exec);
+    auto mtx_copy = gko::BatchMultiVector<TypeParam>::create(this->exec);
     mtx_copy->copy_from(std::move(this->mtx));
     this->assert_equal_to_original_mtx(mtx_copy.get());
 }
 
 
-TYPED_TEST(BatchVector, CanBeCloned)
+TYPED_TEST(BatchMultiVector, CanBeCloned)
 {
     auto mtx_clone = this->mtx->clone();
     this->assert_equal_to_original_mtx(
@@ -409,17 +408,17 @@ TYPED_TEST(BatchVector, CanBeCloned)
 }
 
 
-TYPED_TEST(BatchVector, CanBeCleared)
+TYPED_TEST(BatchMultiVector, CanBeCleared)
 {
     this->mtx->clear();
     this->assert_empty(this->mtx.get());
 }
 
 
-TYPED_TEST(BatchVector, CanBeReadFromMatrixData)
+TYPED_TEST(BatchMultiVector, CanBeReadFromMatrixData)
 {
     using value_type = typename TestFixture::value_type;
-    auto m = gko::matrix::BatchVector<TypeParam>::create(this->exec);
+    auto m = gko::BatchMultiVector<TypeParam>::create(this->exec);
     // clang-format off
     m->read({gko::matrix_data<TypeParam>{{2, 3},
                                          {{0, 0, 1.0},
@@ -453,7 +452,7 @@ TYPED_TEST(BatchVector, CanBeReadFromMatrixData)
 }
 
 
-TYPED_TEST(BatchVector, GeneratesCorrectMatrixData)
+TYPED_TEST(BatchMultiVector, GeneratesCorrectMatrixData)
 {
     using value_type = typename TestFixture::value_type;
     using tpl = typename gko::matrix_data<TypeParam>::nonzero_type;
@@ -480,10 +479,10 @@ TYPED_TEST(BatchVector, GeneratesCorrectMatrixData)
 }
 
 
-TYPED_TEST(BatchVector, CanBeReadFromMatrixAssemblyData)
+TYPED_TEST(BatchMultiVector, CanBeReadFromMatrixAssemblyData)
 {
     using value_type = typename TestFixture::value_type;
-    auto m = gko::matrix::BatchVector<TypeParam>::create(this->exec);
+    auto m = gko::BatchMultiVector<TypeParam>::create(this->exec);
     gko::matrix_assembly_data<TypeParam> data1(gko::dim<2>{2, 3});
     data1.set_value(0, 0, 1.0);
     data1.set_value(0, 1, 3.0);
diff --git a/core/test/matrix/CMakeLists.txt b/core/test/matrix/CMakeLists.txt
index fbfe5f95e3f..433361a054f 100644
--- a/core/test/matrix/CMakeLists.txt
+++ b/core/test/matrix/CMakeLists.txt
@@ -1,4 +1,3 @@
-ginkgo_create_test(batch_vector)
 ginkgo_create_test(coo)
 ginkgo_create_test(coo_builder)
 ginkgo_create_test(csr)
diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt
index d630fb9a92a..dccc9e91401 100644
--- a/cuda/CMakeLists.txt
+++ b/cuda/CMakeLists.txt
@@ -6,6 +6,7 @@ add_instantiation_files(. matrix/fbcsr_kernels.instantiate.cu FBCSR_INSTANTIATE)
 list(APPEND GKO_UNIFIED_COMMON_SOURCES ${PROJECT_SOURCE_DIR}/common/unified/matrix/dense_kernels.instantiate.cpp)
 target_sources(ginkgo_cuda
     PRIVATE
+    base/batch_multi_vector_kernels.cu
     base/device.cpp
     base/device_matrix_data_kernels.cu
     base/exception.cpp
@@ -35,7 +36,6 @@ target_sources(ginkgo_cuda
     factorization/par_ilut_select_kernel.cu
     factorization/par_ilut_spgeam_kernel.cu
     factorization/par_ilut_sweep_kernel.cu
-    matrix/batch_vector_kernels.cu
     matrix/coo_kernels.cu
     ${CSR_INSTANTIATE}
     matrix/dense_kernels.cu
diff --git a/cuda/matrix/batch_vector_kernels.cu b/cuda/base/batch_multi_vector_kernels.cu
similarity index 78%
rename from cuda/matrix/batch_vector_kernels.cu
rename to cuda/base/batch_multi_vector_kernels.cu
index 9ceca9e2b3a..039ab94b767 100644
--- a/cuda/matrix/batch_vector_kernels.cu
+++ b/cuda/base/batch_multi_vector_kernels.cu
@@ -30,14 +30,15 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include "core/matrix/batch_vector_kernels.hpp"
+#include "core/base/batch_multi_vector_kernels.hpp"
 
 
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/base/range_accessors.hpp>
 
 
-#include "core/matrix/batch_struct.hpp"
+#include "core/base/batch_struct.hpp"
+#include "cuda/base/batch_struct.hpp"
 #include "cuda/base/config.hpp"
 #include "cuda/base/cublas_bindings.hpp"
 #include "cuda/base/pointer_mode_guard.hpp"
@@ -45,31 +46,30 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "cuda/components/reduction.cuh"
 #include "cuda/components/thread_ids.cuh"
 #include "cuda/components/uninitialized_array.hpp"
-#include "cuda/matrix/batch_struct.hpp"
 
 
 namespace gko {
 namespace kernels {
 namespace cuda {
 /**
- * @brief The BatchVector matrix format namespace.
+ * @brief The BatchMultiVector matrix format namespace.
  *
- * @ingroup batch_vector
+ * @ingroup batch_multi_vector
  */
-namespace batch_vector {
+namespace batch_multi_vector {
 
 
 constexpr auto default_block_size = 256;
 constexpr int sm_multiplier = 4;
 
 
-#include "common/cuda_hip/matrix/batch_vector_kernels.hpp.inc"
+#include "common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc"
 
 
 template <typename ValueType>
 void scale(std::shared_ptr<const CudaExecutor> exec,
-           const matrix::BatchVector<ValueType>* const alpha,
-           matrix::BatchVector<ValueType>* const x)
+           const BatchMultiVector<ValueType>* const alpha,
+           BatchMultiVector<ValueType>* const x)
 {
     const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
     const auto alpha_ub = get_batch_struct(alpha);
@@ -77,14 +77,15 @@ void scale(std::shared_ptr<const CudaExecutor> exec,
     scale<<<num_blocks, default_block_size>>>(alpha_ub, x_ub);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_SCALE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_MULTI_VECTOR_SCALE_KERNEL);
 
 
 template <typename ValueType>
 void add_scaled(std::shared_ptr<const CudaExecutor> exec,
-                const matrix::BatchVector<ValueType>* const alpha,
-                const matrix::BatchVector<ValueType>* const x,
-                matrix::BatchVector<ValueType>* const y)
+                const BatchMultiVector<ValueType>* const alpha,
+                const BatchMultiVector<ValueType>* const x,
+                BatchMultiVector<ValueType>* const y)
 {
     const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
     const size_type nrhs = x->get_size().at(0)[1];
@@ -102,14 +103,15 @@ void add_scaled(std::shared_ptr<const CudaExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_ADD_SCALED_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_MULTI_VECTOR_ADD_SCALED_KERNEL);
 
 
 template <typename ValueType>
 void compute_dot(std::shared_ptr<const CudaExecutor> exec,
-                 const matrix::BatchVector<ValueType>* x,
-                 const matrix::BatchVector<ValueType>* y,
-                 matrix::BatchVector<ValueType>* result)
+                 const BatchMultiVector<ValueType>* x,
+                 const BatchMultiVector<ValueType>* y,
+                 BatchMultiVector<ValueType>* result)
 {
     const auto num_blocks = x->get_num_batch_entries();
     const auto num_rhs = x->get_size().at()[1];
@@ -129,13 +131,13 @@ void compute_dot(std::shared_ptr<const CudaExecutor> exec,
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_VECTOR_COMPUTE_DOT_KERNEL);
+    GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_DOT_KERNEL);
 
 
 template <typename ValueType>
 void compute_norm2(std::shared_ptr<const CudaExecutor> exec,
-                   const matrix::BatchVector<ValueType>* const x,
-                   matrix::BatchVector<remove_complex<ValueType>>* const result)
+                   const BatchMultiVector<ValueType>* const x,
+                   BatchMultiVector<remove_complex<ValueType>>* const result)
 {
     const auto num_blocks = x->get_num_batch_entries();
     const auto num_rhs = x->get_size().at()[1];
@@ -152,13 +154,13 @@ void compute_norm2(std::shared_ptr<const CudaExecutor> exec,
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_VECTOR_COMPUTE_NORM2_KERNEL);
+    GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_NORM2_KERNEL);
 
 
 template <typename ValueType>
 void copy(std::shared_ptr<const DefaultExecutor> exec,
-          const matrix::BatchVector<ValueType>* x,
-          matrix::BatchVector<ValueType>* result)
+          const BatchMultiVector<ValueType>* x,
+          BatchMultiVector<ValueType>* result)
 {
     const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
     const auto result_ub = get_batch_struct(result);
@@ -166,10 +168,10 @@ void copy(std::shared_ptr<const DefaultExecutor> exec,
     copy<<<num_blocks, default_block_size>>>(x_ub, result_ub);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_COPY_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_COPY_KERNEL);
 
 
-}  // namespace batch_vector
+}  // namespace batch_multi_vector
 }  // namespace cuda
 }  // namespace kernels
 }  // namespace gko
diff --git a/cuda/matrix/batch_struct.hpp b/cuda/base/batch_struct.hpp
similarity index 88%
rename from cuda/matrix/batch_struct.hpp
rename to cuda/base/batch_struct.hpp
index 104286f66b9..0bd9bd6dc40 100644
--- a/cuda/matrix/batch_struct.hpp
+++ b/cuda/base/batch_struct.hpp
@@ -34,11 +34,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define GKO_CUDA_MATRIX_BATCH_STRUCT_HPP_
 
 
-#include "core/matrix/batch_struct.hpp"
+#include "core/base/batch_struct.hpp"
 
 
+#include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/math.hpp>
-#include <ginkgo/core/matrix/batch_vector.hpp>
 
 
 #include "cuda/base/config.hpp"
@@ -64,8 +64,8 @@ namespace cuda {
  * Generates an immutable uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
-inline gko::batch_vector::UniformBatch<const cuda_type<ValueType>>
-get_batch_struct(const matrix::BatchVector<ValueType>* const op)
+inline gko::batch_multi_vector::UniformBatch<const cuda_type<ValueType>>
+get_batch_struct(const BatchMultiVector<ValueType>* const op)
 {
     return {
         as_cuda_type(op->get_const_values()),
@@ -80,8 +80,8 @@ get_batch_struct(const matrix::BatchVector<ValueType>* const op)
  * Generates a uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
-inline gko::batch_vector::UniformBatch<cuda_type<ValueType>> get_batch_struct(
-    matrix::BatchVector<ValueType>* const op)
+inline gko::batch_multi_vector::UniformBatch<cuda_type<ValueType>>
+get_batch_struct(BatchMultiVector<ValueType>* const op)
 {
     return {
         as_cuda_type(op->get_values()),
@@ -98,8 +98,8 @@ inline gko::batch_vector::UniformBatch<cuda_type<ValueType>> get_batch_struct(
  * that may be null.
  */
 template <typename ValueType>
-inline gko::batch_vector::UniformBatch<const cuda_type<ValueType>>
-maybe_null_batch_struct(const matrix::BatchVector<ValueType>* const op)
+inline gko::batch_multi_vector::UniformBatch<const cuda_type<ValueType>>
+maybe_null_batch_struct(const BatchMultiVector<ValueType>* const op)
 {
     if (op) {
         return {as_cuda_type(op->get_const_values()),
diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt
index fea0dec5c8c..1573169527d 100644
--- a/hip/CMakeLists.txt
+++ b/hip/CMakeLists.txt
@@ -4,6 +4,7 @@ add_instantiation_files(. matrix/fbcsr_kernels.instantiate.hip.cpp FBCSR_INSTANT
 # we don't split up the dense kernels into distinct compilations
 list(APPEND GKO_UNIFIED_COMMON_SOURCES ${PROJECT_SOURCE_DIR}/common/unified/matrix/dense_kernels.instantiate.cpp)
 set(GINKGO_HIP_SOURCES
+    base/batch_multi_vector_kernels.hip.cpp
     base/device.hip.cpp
     base/device_matrix_data_kernels.hip.cpp
     base/exception.hip.cpp
@@ -33,7 +34,6 @@ set(GINKGO_HIP_SOURCES
     factorization/par_ilut_select_kernel.hip.cpp
     factorization/par_ilut_spgeam_kernel.hip.cpp
     factorization/par_ilut_sweep_kernel.hip.cpp
-    matrix/batch_vector_kernels.hip.cpp
     matrix/coo_kernels.hip.cpp
     ${CSR_INSTANTIATE}
     matrix/dense_kernels.hip.cpp
diff --git a/hip/matrix/batch_vector_kernels.hip.cpp b/hip/base/batch_multi_vector_kernels.hip.cpp
similarity index 80%
rename from hip/matrix/batch_vector_kernels.hip.cpp
rename to hip/base/batch_multi_vector_kernels.hip.cpp
index 97bbaf50440..01a443558e9 100644
--- a/hip/matrix/batch_vector_kernels.hip.cpp
+++ b/hip/base/batch_multi_vector_kernels.hip.cpp
@@ -30,7 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include "core/matrix/batch_vector_kernels.hpp"
+#include "core/base/batch_multi_vector_kernels.hpp"
 
 
 #include <hip/hip_runtime.h>
@@ -40,7 +40,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/range_accessors.hpp>
 
 
-#include "core/matrix/batch_struct.hpp"
+#include "core/base/batch_struct.hpp"
+#include "hip/base/batch_struct.hip.hpp"
 #include "hip/base/config.hip.hpp"
 #include "hip/base/hipblas_bindings.hip.hpp"
 #include "hip/base/pointer_mode_guard.hip.hpp"
@@ -48,31 +49,30 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "hip/components/reduction.hip.hpp"
 #include "hip/components/thread_ids.hip.hpp"
 #include "hip/components/uninitialized_array.hip.hpp"
-#include "hip/matrix/batch_struct.hip.hpp"
 
 
 namespace gko {
 namespace kernels {
 namespace hip {
 /**
- * @brief The BatchVector matrix format namespace.
+ * @brief The BatchMultiVector matrix format namespace.
  *
- * @ingroup batch_vector
+ * @ingroup batch_multi_vector
  */
-namespace batch_vector {
+namespace batch_multi_vector {
 
 
 constexpr auto default_block_size = 256;
 constexpr int sm_multiplier = 4;
 
 
-#include "common/cuda_hip/matrix/batch_vector_kernels.hpp.inc"
+#include "common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc"
 
 
 template <typename ValueType>
 void scale(std::shared_ptr<const HipExecutor> exec,
-           const matrix::BatchVector<ValueType>* const alpha,
-           matrix::BatchVector<ValueType>* const x)
+           const BatchMultiVector<ValueType>* const alpha,
+           BatchMultiVector<ValueType>* const x)
 {
     const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
     const auto alpha_ub = get_batch_struct(alpha);
@@ -81,14 +81,15 @@ void scale(std::shared_ptr<const HipExecutor> exec,
                        alpha_ub, x_ub);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_SCALE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_MULTI_VECTOR_SCALE_KERNEL);
 
 
 template <typename ValueType>
 void add_scaled(std::shared_ptr<const HipExecutor> exec,
-                const matrix::BatchVector<ValueType>* const alpha,
-                const matrix::BatchVector<ValueType>* const x,
-                matrix::BatchVector<ValueType>* const y)
+                const BatchMultiVector<ValueType>* const alpha,
+                const BatchMultiVector<ValueType>* const x,
+                BatchMultiVector<ValueType>* const y)
 {
     const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
     const size_type nrhs = x->get_size().at(0)[1];
@@ -109,14 +110,15 @@ void add_scaled(std::shared_ptr<const HipExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_ADD_SCALED_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_MULTI_VECTOR_ADD_SCALED_KERNEL);
 
 
 template <typename ValueType>
 void compute_dot(std::shared_ptr<const HipExecutor> exec,
-                 const matrix::BatchVector<ValueType>* x,
-                 const matrix::BatchVector<ValueType>* y,
-                 matrix::BatchVector<ValueType>* result)
+                 const BatchMultiVector<ValueType>* x,
+                 const BatchMultiVector<ValueType>* y,
+                 BatchMultiVector<ValueType>* result)
 {
     const auto num_blocks = x->get_num_batch_entries();
     const auto num_rhs = x->get_size().at()[1];
@@ -138,13 +140,13 @@ void compute_dot(std::shared_ptr<const HipExecutor> exec,
 
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_VECTOR_COMPUTE_DOT_KERNEL);
+    GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_DOT_KERNEL);
 
 
 template <typename ValueType>
 void compute_norm2(std::shared_ptr<const HipExecutor> exec,
-                   const matrix::BatchVector<ValueType>* const x,
-                   matrix::BatchVector<remove_complex<ValueType>>* const result)
+                   const BatchMultiVector<ValueType>* const x,
+                   BatchMultiVector<remove_complex<ValueType>>* const result)
 {
     const auto num_blocks = x->get_num_batch_entries();
     const auto num_rhs = x->get_size().at()[1];
@@ -163,13 +165,13 @@ void compute_norm2(std::shared_ptr<const HipExecutor> exec,
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_VECTOR_COMPUTE_NORM2_KERNEL);
+    GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_NORM2_KERNEL);
 
 
 template <typename ValueType>
 void copy(std::shared_ptr<const DefaultExecutor> exec,
-          const matrix::BatchVector<ValueType>* x,
-          matrix::BatchVector<ValueType>* result)
+          const BatchMultiVector<ValueType>* x,
+          BatchMultiVector<ValueType>* result)
 {
     const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
     const auto result_ub = get_batch_struct(result);
@@ -178,10 +180,10 @@ void copy(std::shared_ptr<const DefaultExecutor> exec,
                        x_ub, result_ub);
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_COPY_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_COPY_KERNEL);
 
 
-}  // namespace batch_vector
+}  // namespace batch_multi_vector
 }  // namespace hip
 }  // namespace kernels
 }  // namespace gko
diff --git a/hip/matrix/batch_struct.hip.hpp b/hip/base/batch_struct.hip.hpp
similarity index 88%
rename from hip/matrix/batch_struct.hip.hpp
rename to hip/base/batch_struct.hip.hpp
index e2648ba4a25..214039f060b 100644
--- a/hip/matrix/batch_struct.hip.hpp
+++ b/hip/base/batch_struct.hip.hpp
@@ -34,11 +34,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define GKO_HIP_MATRIX_BATCH_STRUCT_HIP_HPP_
 
 
-#include "core/matrix/batch_struct.hpp"
+#include "core/base/batch_struct.hpp"
 
 
+#include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/math.hpp>
-#include <ginkgo/core/matrix/batch_vector.hpp>
 
 
 #include "hip/base/config.hip.hpp"
@@ -64,8 +64,8 @@ namespace hip {
  * Generates an immutable uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
-inline gko::batch_vector::UniformBatch<const hip_type<ValueType>>
-get_batch_struct(const matrix::BatchVector<ValueType>* const op)
+inline gko::batch_multi_vector::UniformBatch<const hip_type<ValueType>>
+get_batch_struct(const BatchMultiVector<ValueType>* const op)
 {
     return {
         as_hip_type(op->get_const_values()),
@@ -80,8 +80,8 @@ get_batch_struct(const matrix::BatchVector<ValueType>* const op)
  * Generates a uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
-inline gko::batch_vector::UniformBatch<hip_type<ValueType>> get_batch_struct(
-    matrix::BatchVector<ValueType>* const op)
+inline gko::batch_multi_vector::UniformBatch<hip_type<ValueType>>
+get_batch_struct(BatchMultiVector<ValueType>* const op)
 {
     return {
         as_hip_type(op->get_values()),
@@ -98,8 +98,8 @@ inline gko::batch_vector::UniformBatch<hip_type<ValueType>> get_batch_struct(
  * that may be null.
  */
 template <typename ValueType>
-inline gko::batch_vector::UniformBatch<const hip_type<ValueType>>
-maybe_null_batch_struct(const matrix::BatchVector<ValueType>* const op)
+inline gko::batch_multi_vector::UniformBatch<const hip_type<ValueType>>
+maybe_null_batch_struct(const BatchMultiVector<ValueType>* const op)
 {
     if (op) {
         return {as_hip_type(op->get_const_values()),
diff --git a/include/ginkgo/core/matrix/batch_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
similarity index 85%
rename from include/ginkgo/core/matrix/batch_vector.hpp
rename to include/ginkgo/core/base/batch_multi_vector.hpp
index aee16bbc27b..a4dafd75faa 100644
--- a/include/ginkgo/core/matrix/batch_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#ifndef GKO_PUBLIC_CORE_MATRIX_BATCH_VECTOR_HPP_
-#define GKO_PUBLIC_CORE_MATRIX_BATCH_VECTOR_HPP_
+#ifndef GKO_PUBLIC_CORE_BASE_BATCH_MULTI_VECTOR_HPP_
+#define GKO_PUBLIC_CORE_BASE_BATCH_MULTI_VECTOR_HPP_
 
 
 #include <initializer_list>
@@ -48,12 +48,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 namespace gko {
-namespace matrix {
-
 
 /**
- * BatchVector is a batch matrix format which explicitly stores all values of
- * the vector in each of the batches.
+ * BatchMultiVector is a batch matrix format which explicitly stores all values
+ * of the vector in each of the batches.
  *
  * The values in each of the batches are stored in row-major format (values
  * belonging to the same row appear consecutive in the memory). Optionally, rows
@@ -63,21 +61,21 @@ namespace matrix {
  *
  * @note While this format is not very useful for storing sparse matrices, it
  *       is often suitable to store vectors, and sets of vectors.
- * @ingroup batch_vector
+ * @ingroup batch_multi_vector
  * @ingroup mat_formats
  * @ingroup BatchLinOp
  */
 template <typename ValueType = default_precision>
-class BatchVector
-    : public EnableAbstractPolymorphicObject<BatchVector<ValueType>>,
-      public EnableCreateMethod<BatchVector<ValueType>>,
-      public ConvertibleTo<BatchVector<next_precision<ValueType>>>,
+class BatchMultiVector
+    : public EnableAbstractPolymorphicObject<BatchMultiVector<ValueType>>,
+      public EnableCreateMethod<BatchMultiVector<ValueType>>,
+      public ConvertibleTo<BatchMultiVector<next_precision<ValueType>>>,
       public BatchReadableFromMatrixData<ValueType, int32>,
       public BatchReadableFromMatrixData<ValueType, int64>,
       public BatchWritableToMatrixData<ValueType, int32>,
       public BatchWritableToMatrixData<ValueType, int64> {
-    friend class EnableCreateMethod<BatchVector>;
-    friend class BatchVector<to_complex<ValueType>>;
+    friend class EnableCreateMethod<BatchMultiVector>;
+    friend class BatchMultiVector<to_complex<ValueType>>;
 
 public:
     using BatchReadableFromMatrixData<ValueType, int32>::read;
@@ -88,19 +86,19 @@ class BatchVector
     using unbatch_type = Dense<ValueType>;
     using mat_data = gko::matrix_data<ValueType, int64>;
     using mat_data32 = gko::matrix_data<ValueType, int32>;
-    using absolute_type = remove_complex<BatchVector>;
-    using complex_type = to_complex<BatchVector>;
+    using absolute_type = remove_complex<BatchMultiVector>;
+    using complex_type = to_complex<BatchMultiVector>;
 
     using row_major_range = gko::range<gko::accessor::row_major<ValueType, 2>>;
 
     /**
-     * Creates a BatchVector matrix with the configuration of another
-     * BatchVector matrix.
+     * Creates a BatchMultiVector matrix with the configuration of another
+     * BatchMultiVector matrix.
      *
      * @param other  The other matrix whose configuration needs to copied.
      */
-    static std::unique_ptr<BatchVector> create_with_config_of(
-        const BatchVector* other)
+    static std::unique_ptr<BatchMultiVector> create_with_config_of(
+        const BatchMultiVector* other)
     {
         // De-referencing `other` before calling the functions (instead of
         // using operator `->`) is currently required to be compatible with
@@ -109,12 +107,12 @@ class BatchVector
         return (*other).create_with_same_config();
     }
 
-    friend class BatchVector<next_precision<ValueType>>;
+    friend class BatchMultiVector<next_precision<ValueType>>;
 
     void convert_to(
-        BatchVector<next_precision<ValueType>>* result) const override;
+        BatchMultiVector<next_precision<ValueType>>* result) const override;
 
-    void move_to(BatchVector<next_precision<ValueType>>* result) override;
+    void move_to(BatchMultiVector<next_precision<ValueType>>* result) override;
 
     void read(const std::vector<mat_data>& data) override;
 
@@ -235,7 +233,7 @@ class BatchVector
     }
 
     /**
-     * @copydoc BatchVector::at(size_type, size_type, size_type)
+     * @copydoc BatchMultiVector::at(size_type, size_type, size_type)
      */
     value_type at(size_type batch, size_type row, size_type col) const noexcept
     {
@@ -264,7 +262,7 @@ class BatchVector
     }
 
     /**
-     * @copydoc BatchVector::at(size_type, size_type, size_type)
+     * @copydoc BatchMultiVector::at(size_type, size_type, size_type)
      */
     ValueType at(size_type batch, size_type idx) const noexcept
     {
@@ -274,11 +272,11 @@ class BatchVector
     /**
      * Scales the vector with a scalar (aka: BLAS scal).
      *
-     * @param alpha  If alpha is 1x1 BatchVector matrix, the entire matrix (all
-     * batches) is scaled by alpha. If it is a BatchVector row vector of values,
-     * then i-th column of the vector is scaled with the i-th element of alpha
-     * (the number of columns of alpha has to match the number of columns of the
-     * matrix).
+     * @param alpha  If alpha is 1x1 BatchMultiVector matrix, the entire matrix
+     * (all batches) is scaled by alpha. If it is a BatchMultiVector row vector
+     * of values, then i-th column of the vector is scaled with the i-th element
+     * of alpha (the number of columns of alpha has to match the number of
+     * columns of the matrix).
      */
     void scale(const BatchLinOp* alpha)
     {
@@ -289,10 +287,11 @@ class BatchVector
     /**
      * Adds `b` scaled by `alpha` to the vector (aka: BLAS axpy).
      *
-     * @param alpha  If alpha is 1x1 BatchVector matrix, the entire matrix is
-     * scaled by alpha. If it is a BatchVector row vector of values, then i-th
-     * column of the vector is scaled with the i-th element of alpha (the number
-     * of columns of alpha has to match the number of columns of the vector).
+     * @param alpha  If alpha is 1x1 BatchMultiVector matrix, the entire matrix
+     * is scaled by alpha. If it is a BatchMultiVector row vector of values,
+     * then i-th column of the vector is scaled with the i-th element of alpha
+     * (the number of columns of alpha has to match the number of columns of the
+     * vector).
      * @param b  a matrix of the same dimension as this
      */
     void add_scaled(const BatchLinOp* alpha, const BatchLinOp* b)
@@ -306,11 +305,10 @@ class BatchVector
      * Adds `a` scaled by `alpha` to the vector scaled by `beta`:
      * this <- alpha * a + beta * this.
      *
-     * @param alpha  If alpha is 1x1 BatchVector matrix, the entire matrix a is
-     *               scaled by alpha. If it is a BatchVector row vector of
-     *               values, then i-th column of a is scaled with the i-th
-     *               element of alpha (the number of columns of alpha has to
-     *               match the number of columns of a).
+     * @param alpha  If alpha is 1x1 BatchMultiVector matrix, the entire matrix
+     * a is scaled by alpha. If it is a BatchMultiVector row vector of values,
+     * then i-th column of a is scaled with the i-th element of alpha (the
+     * number of columns of alpha has to match the number of columns of a).
      * @param a  a matrix of the same dimension as this.
      * @param beta  Scalar(s), of the same size as alpha, to multiply this
      * matrix.
@@ -323,10 +321,10 @@ class BatchVector
      * corresponding entry in `b`. If the vector has complex value_type, then
      * the conjugate of this is taken.
      *
-     * @param b  a BatchVector matrix of same dimension as this
-     * @param result  a BatchVector row vector, used to store the dot product
-     *                (the number of column in the vector must match the number
-     *                of columns of this)
+     * @param b  a BatchMultiVector matrix of same dimension as this
+     * @param result  a BatchMultiVector row vector, used to store the dot
+     * product (the number of column in the vector must match the number of
+     * columns of this)
      */
     void compute_dot(const BatchLinOp* b, BatchLinOp* result) const
     {
@@ -338,7 +336,7 @@ class BatchVector
     /**
      * Computes the Euclidean (L^2) norm of each matrix in this batch.
      *
-     * @param result  a BatchVector row vector, used to store the norm
+     * @param result  a BatchMultiVector row vector, used to store the norm
      *                (the number of columns in the vector must match the number
      *                of columns of this)
      */
@@ -359,14 +357,14 @@ class BatchVector
      *          (if it resides on the same executor as the vector) or a copy of
      *          the array on the correct executor.
      */
-    static std::unique_ptr<const BatchVector> create_const(
+    static std::unique_ptr<const BatchMultiVector> create_const(
         std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
         gko::detail::const_array_view<ValueType>&& values,
         const batch_stride& strides)
     {
         // cast const-ness away, but return a const object afterwards,
         // so we can ensure that no modifications take place.
-        return std::unique_ptr<const BatchVector>(new BatchVector{
+        return std::unique_ptr<const BatchMultiVector>(new BatchMultiVector{
             exec, sizes, gko::detail::array_const_cast(std::move(values)),
             strides});
     }
@@ -454,21 +452,21 @@ class BatchVector
 
 protected:
     /**
-     * Creates an uninitialized BatchVector matrix of the specified size.
+     * Creates an uninitialized BatchMultiVector matrix of the specified size.
      *
      * @param exec  Executor associated to the vector
      * @param size  size of the vector
      */
-    BatchVector(std::shared_ptr<const Executor> exec,
-                const batch_dim<2>& size = batch_dim<2>{})
-        : BatchVector(std::move(exec), size,
-                      size.get_num_batch_entries() > 0
-                          ? extract_nth_dim(1, size)
-                          : batch_stride{})
+    BatchMultiVector(std::shared_ptr<const Executor> exec,
+                     const batch_dim<2>& size = batch_dim<2>{})
+        : BatchMultiVector(std::move(exec), size,
+                           size.get_num_batch_entries() > 0
+                               ? extract_nth_dim(1, size)
+                               : batch_stride{})
     {}
 
     /**
-     * Creates an uninitialized BatchVector matrix of the specified size.
+     * Creates an uninitialized BatchMultiVector matrix of the specified size.
      *
      * @param exec  Executor associated to the vector
      * @param size  size of the batch matrices in a batch_dim object
@@ -476,9 +474,9 @@ class BatchVector
      *                  elements of two consecutive rows, expressed as the
      *                  number of matrix elements)
      */
-    BatchVector(std::shared_ptr<const Executor> exec, const batch_dim<2>& size,
-                const batch_stride& stride)
-        : EnableBatchLinOp<BatchVector>(exec, size),
+    BatchMultiVector(std::shared_ptr<const Executor> exec,
+                     const batch_dim<2>& size, const batch_stride& stride)
+        : EnableBatchLinOp<BatchMultiVector>(exec, size),
           values_(exec, compute_batch_mem(size, stride)),
           stride_(stride)
     {
@@ -487,8 +485,8 @@ class BatchVector
     }
 
     /**
-     * Creates a BatchVector matrix from an already allocated (and initialized)
-     * array.
+     * Creates a BatchMultiVector matrix from an already allocated (and
+     * initialized) array.
      *
      * @tparam ValuesArray  type of array of values
      *
@@ -504,9 +502,10 @@ class BatchVector
      *       original array data will not be used in the vector.
      */
     template <typename ValuesArray>
-    BatchVector(std::shared_ptr<const Executor> exec, const batch_dim<2>& size,
-                ValuesArray&& values, const batch_stride& stride)
-        : EnableBatchLinOp<BatchVector>(exec, size),
+    BatchMultiVector(std::shared_ptr<const Executor> exec,
+                     const batch_dim<2>& size, ValuesArray&& values,
+                     const batch_stride& stride)
+        : EnableBatchLinOp<BatchMultiVector>(exec, size),
           values_{exec, std::forward<ValuesArray>(values)},
           stride_{stride},
           num_elems_per_batch_cumul_(
@@ -523,14 +522,15 @@ class BatchVector
     }
 
     /**
-     * Creates a BatchVector matrix from a vector of matrices
+     * Creates a BatchMultiVector matrix from a vector of matrices
      *
      * @param exec  Executor associated to the vector
      * @param matrices  The matrices that need to be batched.
      */
-    BatchVector(std::shared_ptr<const Executor> exec,
-                const std::vector<Dense<ValueType>*>& matrices)
-        : EnableBatchLinOp<BatchVector>(exec, get_sizes_from_mtxs(matrices)),
+    BatchMultiVector(std::shared_ptr<const Executor> exec,
+                     const std::vector<Dense<ValueType>*>& matrices)
+        : EnableBatchLinOp<BatchMultiVector>(exec,
+                                             get_sizes_from_mtxs(matrices)),
           stride_{get_strides_from_mtxs(matrices)},
           values_(exec, compute_batch_mem(this->get_size(), stride_))
     {
@@ -547,16 +547,16 @@ class BatchVector
     }
 
     /**
-     * Creates a BatchVector matrix by duplicating BatchVector matrix
+     * Creates a BatchMultiVector matrix by duplicating BatchMultiVector matrix
      *
      * @param exec  Executor associated to the vector
      * @param num_duplications  The number of times to duplicate
      * @param input  the vector to be duplicated.
      */
-    BatchVector(std::shared_ptr<const Executor> exec,
-                size_type num_duplications,
-                const BatchVector<value_type>* input)
-        : EnableBatchLinOp<BatchVector>(
+    BatchMultiVector(std::shared_ptr<const Executor> exec,
+                     size_type num_duplications,
+                     const BatchMultiVector<value_type>* input)
+        : EnableBatchLinOp<BatchMultiVector>(
               exec, gko::batch_dim<2>(
                         input->get_num_batch_entries() * num_duplications,
                         input->get_size().at(0))),
@@ -578,15 +578,15 @@ class BatchVector
     }
 
     /**
-     * Creates a BatchVector matrix by duplicating Dense matrix
+     * Creates a BatchMultiVector matrix by duplicating Dense matrix
      *
      * @param exec  Executor associated to the vector
      * @param num_duplications  The number of times to duplicate
      * @param input  the vector to be duplicated.
      */
-    BatchVector(std::shared_ptr<const Executor> exec,
-                size_type num_duplications, const Dense<value_type>* input)
-        : EnableBatchLinOp<BatchVector>(
+    BatchMultiVector(std::shared_ptr<const Executor> exec,
+                     size_type num_duplications, const Dense<value_type>* input)
+        : EnableBatchLinOp<BatchMultiVector>(
               exec, gko::batch_dim<2>(num_duplications, input->get_size())),
           stride_{gko::batch_stride(num_duplications, input->get_stride())},
           values_(exec, compute_batch_mem(this->get_size(), stride_))
@@ -604,21 +604,22 @@ class BatchVector
     }
 
     /**
-     * Creates a BatchVector matrix with the same configuration as the callers
-     * matrix.
+     * Creates a BatchMultiVector matrix with the same configuration as the
+     * callers matrix.
      *
-     * @returns a BatchVector matrix with the same configuration as the caller.
+     * @returns a BatchMultiVector matrix with the same configuration as the
+     * caller.
      */
-    virtual std::unique_ptr<BatchVector> create_with_same_config() const
+    virtual std::unique_ptr<BatchMultiVector> create_with_same_config() const
     {
-        return BatchVector::create(this->get_executor(), this->get_size(),
-                                   this->get_stride());
+        return BatchMultiVector::create(this->get_executor(), this->get_size(),
+                                        this->get_stride());
     }
 
     /**
      * @copydoc scale(const BatchLinOp *)
      *
-     * @note  Other implementations of batch_vector should override this
+     * @note  Other implementations of batch_multi_vector should override this
      * function instead of scale(const BatchLinOp *alpha).
      */
     virtual void scale_impl(const BatchLinOp* alpha);
@@ -626,7 +627,7 @@ class BatchVector
     /**
      * @copydoc add_scaled(const BatchLinOp *, const BatchLinOp *)
      *
-     * @note  Other implementations of batch_vector should override this
+     * @note  Other implementations of batch_multi_vector should override this
      * function instead of add_scale(const BatchLinOp *alpha, const BatchLinOp
      * *b).
      */
@@ -635,7 +636,7 @@ class BatchVector
     /**
      * @copydoc compute_dot(const BatchLinOp *, BatchLinOp *) const
      *
-     * @note  Other implementations of batch_vector should override this
+     * @note  Other implementations of batch_multi_vector should override this
      * function instead of compute_dot(const BatchLinOp *b, BatchLinOp *result).
      */
     virtual void compute_dot_impl(const BatchLinOp* b,
@@ -644,7 +645,7 @@ class BatchVector
     /**
      * @copydoc compute_norm2(BatchLinOp *) const
      *
-     * @note  Other implementations of batch_vector should override this
+     * @note  Other implementations of batch_multi_vector should override this
      * function instead of compute_norm2(BatchLinOp *result).
      */
     virtual void compute_norm2_impl(BatchLinOp* result) const;
@@ -669,9 +670,6 @@ class BatchVector
 };
 
 
-}  // namespace matrix
-
-
 /**
  * Creates and initializes a batch of column-vectors.
  *
@@ -700,7 +698,7 @@ std::unique_ptr<Matrix> batch_initialize(
         vals,
     std::shared_ptr<const Executor> exec, TArgs&&... create_args)
 {
-    using batch_vector = matrix::BatchVector<typename Matrix::value_type>;
+    using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
     size_type num_batch_entries = vals.size();
     std::vector<size_type> num_rows(num_batch_entries);
     std::vector<dim<2>> sizes(num_batch_entries);
@@ -712,7 +710,7 @@ std::unique_ptr<Matrix> batch_initialize(
     }
     auto b_size = batch_dim<2>(sizes);
     auto b_stride = batch_stride(stride);
-    auto tmp = batch_vector::create(exec->get_master(), b_size, b_stride);
+    auto tmp = batch_multi_vector::create(exec->get_master(), b_size, b_stride);
     size_type batch = 0;
     for (const auto& b : vals) {
         size_type idx = 0;
@@ -789,7 +787,7 @@ std::unique_ptr<Matrix> batch_initialize(
         vals,
     std::shared_ptr<const Executor> exec, TArgs&&... create_args)
 {
-    using batch_vector = matrix::BatchVector<typename Matrix::value_type>;
+    using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
     size_type num_batch_entries = vals.size();
     std::vector<size_type> num_rows(num_batch_entries);
     std::vector<size_type> num_cols(num_batch_entries);
@@ -803,7 +801,7 @@ std::unique_ptr<Matrix> batch_initialize(
     }
     auto b_size = batch_dim<2>(sizes);
     auto b_stride = batch_stride(stride);
-    auto tmp = batch_vector::create(exec->get_master(), b_size, b_stride);
+    auto tmp = batch_multi_vector::create(exec->get_master(), b_size, b_stride);
     size_type batch = 0;
     for (const auto& b : vals) {
         size_type ridx = 0;
@@ -894,7 +892,7 @@ std::unique_ptr<Matrix> batch_initialize(
     std::initializer_list<typename Matrix::value_type> vals,
     std::shared_ptr<const Executor> exec, TArgs&&... create_args)
 {
-    using batch_vector = matrix::BatchVector<typename Matrix::value_type>;
+    using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
     std::vector<size_type> num_rows(num_vectors);
     std::vector<dim<2>> sizes(num_vectors);
     for (size_type b = 0; b < num_vectors; ++b) {
@@ -903,7 +901,7 @@ std::unique_ptr<Matrix> batch_initialize(
     }
     auto b_size = batch_dim<2>(sizes);
     auto b_stride = batch_stride(stride);
-    auto tmp = batch_vector::create(exec->get_master(), b_size, b_stride);
+    auto tmp = batch_multi_vector::create(exec->get_master(), b_size, b_stride);
     for (size_type batch = 0; batch < num_vectors; batch++) {
         size_type idx = 0;
         for (const auto& elem : vals) {
@@ -982,7 +980,7 @@ std::unique_ptr<Matrix> batch_initialize(
         vals,
     std::shared_ptr<const Executor> exec, TArgs&&... create_args)
 {
-    using batch_vector = matrix::BatchVector<typename Matrix::value_type>;
+    using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
     std::vector<dim<2>> sizes(num_matrices);
     const size_type num_rows = vals.size();
     for (size_type b = 0; b < num_matrices; ++b) {
@@ -992,7 +990,7 @@ std::unique_ptr<Matrix> batch_initialize(
             GKO_ASSERT(blockit->size() == num_cols);
         }
     }
-    auto tmp = batch_vector::create(exec->get_master(), sizes, stride);
+    auto tmp = batch_multi_vector::create(exec->get_master(), sizes, stride);
     for (size_type batch = 0; batch < num_matrices; batch++) {
         size_type ridx = 0;
         for (const auto& row : vals) {
@@ -1049,4 +1047,4 @@ std::unique_ptr<Matrix> batch_initialize(
 }  // namespace gko
 
 
-#endif  // GKO_PUBLIC_CORE_MATRIX_BATCH_VECTOR_HPP_
+#endif  // GKO_PUBLIC_CORE_BASE_BATCH_MULTI_VECTOR_HPP_
diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt
index abb50ffc09f..02248983385 100644
--- a/omp/CMakeLists.txt
+++ b/omp/CMakeLists.txt
@@ -3,6 +3,7 @@ add_library(ginkgo_omp $<TARGET_OBJECTS:ginkgo_omp_device> "")
 list(APPEND GKO_UNIFIED_COMMON_SOURCES ${PROJECT_SOURCE_DIR}/common/unified/matrix/dense_kernels.instantiate.cpp)
 target_sources(ginkgo_omp
     PRIVATE
+    base/batch_multi_vector_kernels.cpp
     base/device_matrix_data_kernels.cpp
     base/index_set_kernels.cpp
     base/scoped_device_id.cpp
@@ -20,7 +21,6 @@ target_sources(ginkgo_omp
     factorization/par_ict_kernels.cpp
     factorization/par_ilu_kernels.cpp
     factorization/par_ilut_kernels.cpp
-    matrix/batch_vector_kernels.cpp
     matrix/coo_kernels.cpp
     matrix/csr_kernels.cpp
     matrix/dense_kernels.cpp
diff --git a/omp/matrix/batch_vector_kernels.cpp b/omp/base/batch_multi_vector_kernels.cpp
similarity index 77%
rename from omp/matrix/batch_vector_kernels.cpp
rename to omp/base/batch_multi_vector_kernels.cpp
index 7ade2fcca23..96b6716f0ba 100644
--- a/omp/matrix/batch_vector_kernels.cpp
+++ b/omp/base/batch_multi_vector_kernels.cpp
@@ -30,7 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include "core/matrix/batch_vector_kernels.hpp"
+#include "core/base/batch_multi_vector_kernels.hpp"
 
 
 #include <algorithm>
@@ -49,20 +49,20 @@ namespace gko {
 namespace kernels {
 namespace omp {
 /**
- * @brief The BatchVector matrix format namespace.
- * @ref BatchVector
- * @ingroup batch_vector
+ * @brief The BatchMultiVector matrix format namespace.
+ * @ref BatchMultiVector
+ * @ingroup batch_multi_vector
  */
-namespace batch_vector {
+namespace batch_multi_vector {
 
 
-#include "reference/matrix/batch_vector_kernels.hpp.inc"
+#include "reference/matrix/batch_multi_vector_kernels.hpp.inc"
 
 
 template <typename ValueType>
 void scale(std::shared_ptr<const OmpExecutor> exec,
-           const matrix::BatchVector<ValueType>* const alpha,
-           matrix::BatchVector<ValueType>* const x)
+           const BatchMultiVector<ValueType>* const alpha,
+           BatchMultiVector<ValueType>* const x)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto alpha_ub = host::get_batch_struct(alpha);
@@ -74,14 +74,15 @@ void scale(std::shared_ptr<const OmpExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_SCALE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_MULTI_VECTOR_SCALE_KERNEL);
 
 
 template <typename ValueType>
 void add_scaled(std::shared_ptr<const OmpExecutor> exec,
-                const matrix::BatchVector<ValueType>* const alpha,
-                const matrix::BatchVector<ValueType>* const x,
-                matrix::BatchVector<ValueType>* const y)
+                const BatchMultiVector<ValueType>* const alpha,
+                const BatchMultiVector<ValueType>* const x,
+                BatchMultiVector<ValueType>* const y)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto y_ub = host::get_batch_struct(y);
@@ -95,14 +96,15 @@ void add_scaled(std::shared_ptr<const OmpExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_ADD_SCALED_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_MULTI_VECTOR_ADD_SCALED_KERNEL);
 
 
 template <typename ValueType>
 void compute_dot(std::shared_ptr<const OmpExecutor> exec,
-                 const matrix::BatchVector<ValueType>* const x,
-                 const matrix::BatchVector<ValueType>* const y,
-                 matrix::BatchVector<ValueType>* const result)
+                 const BatchMultiVector<ValueType>* const x,
+                 const BatchMultiVector<ValueType>* const y,
+                 BatchMultiVector<ValueType>* const result)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto y_ub = host::get_batch_struct(y);
@@ -118,13 +120,13 @@ void compute_dot(std::shared_ptr<const OmpExecutor> exec,
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_VECTOR_COMPUTE_DOT_KERNEL);
+    GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_DOT_KERNEL);
 
 
 template <typename ValueType>
 void compute_norm2(std::shared_ptr<const OmpExecutor> exec,
-                   const matrix::BatchVector<ValueType>* const x,
-                   matrix::BatchVector<remove_complex<ValueType>>* const result)
+                   const BatchMultiVector<ValueType>* const x,
+                   BatchMultiVector<remove_complex<ValueType>>* const result)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto res_ub = host::get_batch_struct(result);
@@ -138,13 +140,13 @@ void compute_norm2(std::shared_ptr<const OmpExecutor> exec,
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_VECTOR_COMPUTE_NORM2_KERNEL);
+    GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_NORM2_KERNEL);
 
 
 template <typename ValueType>
 void copy(std::shared_ptr<const DefaultExecutor> exec,
-          const matrix::BatchVector<ValueType>* x,
-          matrix::BatchVector<ValueType>* result)
+          const BatchMultiVector<ValueType>* x,
+          BatchMultiVector<ValueType>* result)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto result_ub = host::get_batch_struct(result);
@@ -156,10 +158,10 @@ void copy(std::shared_ptr<const DefaultExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_COPY_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_COPY_KERNEL);
 
 
-}  // namespace batch_vector
+}  // namespace batch_multi_vector
 }  // namespace omp
 }  // namespace kernels
 }  // namespace gko
diff --git a/reference/CMakeLists.txt b/reference/CMakeLists.txt
index 224fb70dc0e..074d5efe818 100644
--- a/reference/CMakeLists.txt
+++ b/reference/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_library(ginkgo_reference $<TARGET_OBJECTS:ginkgo_reference_device> "")
 target_sources(ginkgo_reference
     PRIVATE
+    base/batch_multi_vector_kernels.cpp
     base/device_matrix_data_kernels.cpp
     base/index_set_kernels.cpp
     base/scoped_device_id.cpp
@@ -23,7 +24,6 @@ target_sources(ginkgo_reference
     factorization/par_ict_kernels.cpp
     factorization/par_ilu_kernels.cpp
     factorization/par_ilut_kernels.cpp
-    matrix/batch_vector_kernels.cpp
     matrix/coo_kernels.cpp
     matrix/csr_kernels.cpp
     matrix/dense_kernels.cpp
diff --git a/reference/matrix/batch_vector_kernels.cpp b/reference/base/batch_multi_vector_kernels.cpp
similarity index 77%
rename from reference/matrix/batch_vector_kernels.cpp
rename to reference/base/batch_multi_vector_kernels.cpp
index 01748c6e524..27f6539b9eb 100644
--- a/reference/matrix/batch_vector_kernels.cpp
+++ b/reference/base/batch_multi_vector_kernels.cpp
@@ -30,7 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include "core/matrix/batch_vector_kernels.hpp"
+#include "core/base/batch_multi_vector_kernels.hpp"
 
 
 #include <algorithm>
@@ -41,7 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/range_accessors.hpp>
 
 
-#include "core/matrix/batch_struct.hpp"
+#include "core/base/batch_struct.hpp"
 #include "reference/matrix/batch_struct.hpp"
 
 
@@ -49,20 +49,20 @@ namespace gko {
 namespace kernels {
 namespace reference {
 /**
- * @brief The BatchVector matrix format namespace.
- * @ref BatchVector
- * @ingroup batch_vector
+ * @brief The BatchMultiVector matrix format namespace.
+ * @ref BatchMultiVector
+ * @ingroup batch_multi_vector
  */
-namespace batch_vector {
+namespace batch_multi_vector {
 
 
-#include "reference/matrix/batch_vector_kernels.hpp.inc"
+#include "reference/matrix/batch_multi_vector_kernels.hpp.inc"
 
 
 template <typename ValueType>
 void scale(std::shared_ptr<const DefaultExecutor> exec,
-           const matrix::BatchVector<ValueType>* alpha,
-           matrix::BatchVector<ValueType>* x)
+           const BatchMultiVector<ValueType>* alpha,
+           BatchMultiVector<ValueType>* x)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto alpha_ub = host::get_batch_struct(alpha);
@@ -73,14 +73,15 @@ void scale(std::shared_ptr<const DefaultExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_SCALE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_MULTI_VECTOR_SCALE_KERNEL);
 
 
 template <typename ValueType>
 void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
-                const matrix::BatchVector<ValueType>* alpha,
-                const matrix::BatchVector<ValueType>* x,
-                matrix::BatchVector<ValueType>* y)
+                const BatchMultiVector<ValueType>* alpha,
+                const BatchMultiVector<ValueType>* x,
+                BatchMultiVector<ValueType>* y)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto y_ub = host::get_batch_struct(y);
@@ -93,14 +94,15 @@ void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_ADD_SCALED_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_MULTI_VECTOR_ADD_SCALED_KERNEL);
 
 
 template <typename ValueType>
 void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
-                 const matrix::BatchVector<ValueType>* x,
-                 const matrix::BatchVector<ValueType>* y,
-                 matrix::BatchVector<ValueType>* result)
+                 const BatchMultiVector<ValueType>* x,
+                 const BatchMultiVector<ValueType>* y,
+                 BatchMultiVector<ValueType>* result)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto y_ub = host::get_batch_struct(y);
@@ -115,13 +117,13 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_VECTOR_COMPUTE_DOT_KERNEL);
+    GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_DOT_KERNEL);
 
 
 template <typename ValueType>
 void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
-                   const matrix::BatchVector<ValueType>* x,
-                   matrix::BatchVector<remove_complex<ValueType>>* result)
+                   const BatchMultiVector<ValueType>* x,
+                   BatchMultiVector<remove_complex<ValueType>>* result)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto res_ub = host::get_batch_struct(result);
@@ -134,13 +136,13 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_VECTOR_COMPUTE_NORM2_KERNEL);
+    GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_NORM2_KERNEL);
 
 
 template <typename ValueType>
 void copy(std::shared_ptr<const DefaultExecutor> exec,
-          const matrix::BatchVector<ValueType>* x,
-          matrix::BatchVector<ValueType>* result)
+          const BatchMultiVector<ValueType>* x,
+          BatchMultiVector<ValueType>* result)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto result_ub = host::get_batch_struct(result);
@@ -151,10 +153,10 @@ void copy(std::shared_ptr<const DefaultExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_VECTOR_COPY_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_COPY_KERNEL);
 
 
-}  // namespace batch_vector
+}  // namespace batch_multi_vector
 }  // namespace reference
 }  // namespace kernels
 }  // namespace gko
diff --git a/reference/matrix/batch_vector_kernels.hpp.inc b/reference/base/batch_multi_vector_kernels.hpp.inc
similarity index 80%
rename from reference/matrix/batch_vector_kernels.hpp.inc
rename to reference/base/batch_multi_vector_kernels.hpp.inc
index eb4a8cfab2a..2f9c88e53f1 100644
--- a/reference/matrix/batch_vector_kernels.hpp.inc
+++ b/reference/base/batch_multi_vector_kernels.hpp.inc
@@ -32,9 +32,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 template <typename ValueType>
 inline void matvec_kernel(
-    const gko::batch_vector::BatchEntry<const ValueType>& a,
-    const gko::batch_vector::BatchEntry<const ValueType>& b,
-    const gko::batch_vector::BatchEntry<ValueType>& c)
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& a,
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& b,
+    const gko::batch_multi_vector::BatchEntry<ValueType>& c)
 {
     for (int row = 0; row < c.num_rows; ++row) {
         for (int col = 0; col < c.num_rhs; ++col) {
@@ -57,9 +57,10 @@ inline void matvec_kernel(
 template <typename ValueType>
 inline void advanced_matvec_kernel(
     const ValueType alpha,
-    const gko::batch_vector::BatchEntry<const ValueType>& a,
-    const gko::batch_vector::BatchEntry<const ValueType>& b,
-    const ValueType beta, const gko::batch_vector::BatchEntry<ValueType>& c)
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& a,
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& b,
+    const ValueType beta,
+    const gko::batch_multi_vector::BatchEntry<ValueType>& c)
 {
     if (beta != gko::zero<ValueType>()) {
         for (int row = 0; row < c.num_rows; ++row) {
@@ -88,8 +89,9 @@ inline void advanced_matvec_kernel(
 
 
 template <typename ValueType>
-inline void scale(const gko::batch_vector::BatchEntry<const ValueType>& alpha,
-                  const gko::batch_vector::BatchEntry<ValueType>& x)
+inline void scale(
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& alpha,
+    const gko::batch_multi_vector::BatchEntry<ValueType>& x)
 {
     if (alpha.num_rhs == 1) {
         for (int i = 0; i < x.num_rows; ++i) {
@@ -109,9 +111,9 @@ inline void scale(const gko::batch_vector::BatchEntry<const ValueType>& alpha,
 
 template <typename ValueType>
 inline void add_scaled(
-    const gko::batch_vector::BatchEntry<const ValueType>& alpha,
-    const gko::batch_vector::BatchEntry<const ValueType>& x,
-    const gko::batch_vector::BatchEntry<ValueType>& y)
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& alpha,
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
+    const gko::batch_multi_vector::BatchEntry<ValueType>& y)
 {
     if (alpha.num_rhs == 1) {
         for (int i = 0; i < x.num_rows; ++i) {
@@ -133,10 +135,10 @@ inline void add_scaled(
 
 template <typename ValueType>
 inline void add_scale(
-    const gko::batch_vector::BatchEntry<const ValueType>& alpha,
-    const gko::batch_vector::BatchEntry<const ValueType>& x,
-    const gko::batch_vector::BatchEntry<const ValueType>& beta,
-    const gko::batch_vector::BatchEntry<ValueType>& y)
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& alpha,
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& beta,
+    const gko::batch_multi_vector::BatchEntry<ValueType>& y)
 {
     if (alpha.num_rhs == 1) {
         for (int i = 0; i < x.num_rows; ++i) {
@@ -160,8 +162,9 @@ inline void add_scale(
 
 template <typename ValueType>
 inline void compute_norm2(
-    const gko::batch_vector::BatchEntry<const ValueType>& x,
-    const gko::batch_vector::BatchEntry<gko::remove_complex<ValueType>>& result)
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
+    const gko::batch_multi_vector::BatchEntry<gko::remove_complex<ValueType>>&
+        result)
 {
     for (int j = 0; j < x.num_rhs; ++j) {
         result.values[j] = gko::zero<gko::remove_complex<ValueType>>();
@@ -185,8 +188,8 @@ inline void compute_norm2(
  */
 template <typename ValueType>
 inline void batch_scale(
-    const gko::batch_vector::BatchEntry<const ValueType>& diag_vec,
-    const gko::batch_vector::BatchEntry<ValueType>& a)
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& diag_vec,
+    const gko::batch_multi_vector::BatchEntry<ValueType>& a)
 {
     for (int i_row = 0; i_row < a.num_rows; i_row++) {
         const ValueType scale = diag_vec.values[i_row];
@@ -217,8 +220,8 @@ inline void batch_scale(const int nrows, const int ncols,
  * and stride set.
  */
 template <typename ValueType>
-inline void copy(const gko::batch_vector::BatchEntry<const ValueType>& in,
-                 const gko::batch_vector::BatchEntry<ValueType>& out)
+inline void copy(const gko::batch_multi_vector::BatchEntry<const ValueType>& in,
+                 const gko::batch_multi_vector::BatchEntry<ValueType>& out)
 {
     for (int iz = 0; iz < in.num_rows * in.num_rhs; iz++) {
         const int i = iz / in.num_rhs;
@@ -230,9 +233,9 @@ inline void copy(const gko::batch_vector::BatchEntry<const ValueType>& in,
 
 template <typename ValueType>
 inline void compute_dot_product(
-    const gko::batch_vector::BatchEntry<const ValueType>& x,
-    const gko::batch_vector::BatchEntry<const ValueType>& y,
-    const gko::batch_vector::BatchEntry<ValueType>& result)
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& y,
+    const gko::batch_multi_vector::BatchEntry<ValueType>& result)
 {
     for (int c = 0; c < result.num_rhs; c++) {
         result.values[c] = gko::zero<ValueType>();
@@ -249,8 +252,8 @@ inline void compute_dot_product(
 
 template <typename ValueType>
 inline void copy(
-    const gko::batch_vector::BatchEntry<const ValueType>& source_entry,
-    const gko::batch_vector::BatchEntry<ValueType>& destination_entry,
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& source_entry,
+    const gko::batch_multi_vector::BatchEntry<ValueType>& destination_entry,
     const gko::uint32& converged)
 {
     for (int r = 0; r < source_entry.num_rows; r++) {
@@ -270,9 +273,9 @@ inline void copy(
 
 template <typename ValueType>
 inline void add_scaled(
-    const gko::batch_vector::BatchEntry<const ValueType>& alpha,
-    const gko::batch_vector::BatchEntry<const ValueType>& x,
-    const gko::batch_vector::BatchEntry<ValueType>& y,
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& alpha,
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
+    const gko::batch_multi_vector::BatchEntry<ValueType>& y,
     const gko::uint32& converged)
 {
     if (alpha.num_rhs == 1) {
@@ -308,8 +311,9 @@ inline void add_scaled(
 
 template <typename ValueType>
 inline void compute_norm2(
-    const gko::batch_vector::BatchEntry<const ValueType>& x,
-    const gko::batch_vector::BatchEntry<gko::remove_complex<ValueType>>& result,
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
+    const gko::batch_multi_vector::BatchEntry<gko::remove_complex<ValueType>>&
+        result,
     const gko::uint32& converged)
 {
     for (int j = 0; j < x.num_rhs; ++j) {
@@ -346,9 +350,9 @@ inline void compute_norm2(
 
 template <typename ValueType>
 inline void compute_dot_product(
-    const gko::batch_vector::BatchEntry<const ValueType>& x,
-    const gko::batch_vector::BatchEntry<const ValueType>& y,
-    const gko::batch_vector::BatchEntry<ValueType>& result,
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& y,
+    const gko::batch_multi_vector::BatchEntry<ValueType>& result,
     const gko::uint32& converged)
 {
     for (int c = 0; c < result.num_rhs; c++) {
@@ -379,7 +383,7 @@ inline void compute_dot_product(
 template <typename ValueType>
 inline void add_scaled_identity(
     const ValueType& a, const ValueType& b,
-    const gko::batch_vector::BatchEntry<ValueType>& mat)
+    const gko::batch_multi_vector::BatchEntry<ValueType>& mat)
 {
     for (int i = 0; i < mat.num_rows; i++) {
         for (int j = 0; j < mat.num_rhs; j++) {
diff --git a/reference/matrix/batch_struct.hpp b/reference/base/batch_struct.hpp
similarity index 88%
rename from reference/matrix/batch_struct.hpp
rename to reference/base/batch_struct.hpp
index 0c07956d9d6..32c90db9d7f 100644
--- a/reference/matrix/batch_struct.hpp
+++ b/reference/base/batch_struct.hpp
@@ -34,11 +34,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define GKO_REFERENCE_MATRIX_BATCH_STRUCT_HPP_
 
 
-#include "core/matrix/batch_struct.hpp"
+#include "core/base/batch_struct.hpp"
 
 
+#include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/math.hpp>
-#include <ginkgo/core/matrix/batch_vector.hpp>
 
 
 namespace gko {
@@ -63,8 +63,8 @@ namespace host {
  * Generates an immutable uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
-inline gko::batch_vector::UniformBatch<const ValueType> get_batch_struct(
-    const matrix::BatchVector<ValueType>* const op)
+inline gko::batch_multi_vector::UniformBatch<const ValueType> get_batch_struct(
+    const BatchMultiVector<ValueType>* const op)
 {
     return {
         op->get_const_values(),
@@ -80,8 +80,8 @@ inline gko::batch_vector::UniformBatch<const ValueType> get_batch_struct(
  * Generates a uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
-inline gko::batch_vector::UniformBatch<ValueType> get_batch_struct(
-    matrix::BatchVector<ValueType>* const op)
+inline gko::batch_multi_vector::UniformBatch<ValueType> get_batch_struct(
+    BatchMultiVector<ValueType>* const op)
 {
     return {
         op->get_values(),
@@ -98,8 +98,8 @@ inline gko::batch_vector::UniformBatch<ValueType> get_batch_struct(
  * that may be null.
  */
 template <typename ValueType>
-inline gko::batch_vector::UniformBatch<const ValueType> maybe_null_batch_struct(
-    const matrix::BatchVector<ValueType>* const op)
+inline gko::batch_multi_vector::UniformBatch<const ValueType>
+maybe_null_batch_struct(const BatchMultiVector<ValueType>* const op)
 {
     if (op) {
         return {op->get_const_values(), op->get_num_batch_entries(),
diff --git a/reference/test/matrix/batch_vector_kernels.cpp b/reference/test/base/batch_multi_vector_kernels.cpp
similarity index 87%
rename from reference/test/matrix/batch_vector_kernels.cpp
rename to reference/test/base/batch_multi_vector_kernels.cpp
index e8aaad8d584..f2062a4e393 100644
--- a/reference/test/matrix/batch_vector_kernels.cpp
+++ b/reference/test/base/batch_multi_vector_kernels.cpp
@@ -30,7 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include <ginkgo/core/matrix/batch_vector.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
 
 
 #include <complex>
@@ -41,16 +41,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <gtest/gtest.h>
 
 
+#include <ginkgo/core/base/batch_csr.hpp>
+#include <ginkgo/core/base/batch_diagonal.hpp>
+#include <ginkgo/core/base/batch_identity.hpp>
 #include <ginkgo/core/base/exception.hpp>
 #include <ginkgo/core/base/executor.hpp>
 #include <ginkgo/core/base/math.hpp>
-#include <ginkgo/core/matrix/batch_csr.hpp>
-#include <ginkgo/core/matrix/batch_diagonal.hpp>
-#include <ginkgo/core/matrix/batch_identity.hpp>
 #include <ginkgo/core/matrix/dense.hpp>
 
 
-#include "core/matrix/batch_vector_kernels.hpp"
+#include "core/base/batch_multi_vector_kernels.hpp"
 #include "core/test/utils.hpp"
 
 
@@ -58,15 +58,15 @@ namespace {
 
 
 template <typename T>
-class BatchVector : public ::testing::Test {
+class BatchMultiVector : public ::testing::Test {
 protected:
     using value_type = T;
     using size_type = gko::size_type;
-    using Mtx = gko::matrix::BatchVector<value_type>;
+    using Mtx = gko::BatchMultiVector<value_type>;
     using DenseMtx = gko::matrix::Dense<value_type>;
     using ComplexMtx = gko::to_complex<Mtx>;
     using RealMtx = gko::remove_complex<Mtx>;
-    BatchVector()
+    BatchMultiVector()
         : exec(gko::ReferenceExecutor::create()),
           mtx_0(gko::batch_initialize<Mtx>(
               {{I<T>({1.0, -1.0, 1.5}), I<T>({-2.0, 2.0, 3.0})},
@@ -138,10 +138,10 @@ class BatchVector : public ::testing::Test {
 };
 
 
-TYPED_TEST_SUITE(BatchVector, gko::test::ValueTypes);
+TYPED_TEST_SUITE(BatchMultiVector, gko::test::ValueTypes);
 
 
-TYPED_TEST(BatchVector, AppliesToBatchVector)
+TYPED_TEST(BatchMultiVector, AppliesToBatchMultiVector)
 {
     using T = typename TestFixture::value_type;
     this->mtx_1->apply(this->mtx_2.get(), this->mtx_3.get());
@@ -155,7 +155,7 @@ TYPED_TEST(BatchVector, AppliesToBatchVector)
 }
 
 
-TYPED_TEST(BatchVector, AppliesLinearCombinationToBatchVector)
+TYPED_TEST(BatchMultiVector, AppliesLinearCombinationToBatchMultiVector)
 {
     using Mtx = typename TestFixture::Mtx;
     using DenseMtx = typename TestFixture::DenseMtx;
@@ -180,7 +180,7 @@ TYPED_TEST(BatchVector, AppliesLinearCombinationToBatchVector)
 }
 
 
-TYPED_TEST(BatchVector, ApplyFailsOnWrongInnerDimension)
+TYPED_TEST(BatchMultiVector, ApplyFailsOnWrongInnerDimension)
 {
     using Mtx = typename TestFixture::Mtx;
     auto res = Mtx::create(
@@ -191,7 +191,7 @@ TYPED_TEST(BatchVector, ApplyFailsOnWrongInnerDimension)
 }
 
 
-TYPED_TEST(BatchVector, ApplyFailsForNonUniformBatches)
+TYPED_TEST(BatchMultiVector, ApplyFailsForNonUniformBatches)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -212,7 +212,7 @@ TYPED_TEST(BatchVector, ApplyFailsForNonUniformBatches)
 }
 
 
-TYPED_TEST(BatchVector, ApplyFailsOnWrongNumberOfRows)
+TYPED_TEST(BatchMultiVector, ApplyFailsOnWrongNumberOfRows)
 {
     using Mtx = typename TestFixture::Mtx;
     auto res = Mtx::create(
@@ -223,7 +223,7 @@ TYPED_TEST(BatchVector, ApplyFailsOnWrongNumberOfRows)
 }
 
 
-TYPED_TEST(BatchVector, ApplyFailsOnWrongNumberOfCols)
+TYPED_TEST(BatchMultiVector, ApplyFailsOnWrongNumberOfCols)
 {
     using Mtx = typename TestFixture::Mtx;
     auto res = Mtx::create(
@@ -237,7 +237,7 @@ TYPED_TEST(BatchVector, ApplyFailsOnWrongNumberOfCols)
 }
 
 
-TYPED_TEST(BatchVector, ScalesData)
+TYPED_TEST(BatchMultiVector, ScalesData)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -257,7 +257,7 @@ TYPED_TEST(BatchVector, ScalesData)
 }
 
 
-TYPED_TEST(BatchVector, ScalesDataWithScalar)
+TYPED_TEST(BatchMultiVector, ScalesDataWithScalar)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -275,7 +275,7 @@ TYPED_TEST(BatchVector, ScalesDataWithScalar)
 }
 
 
-TYPED_TEST(BatchVector, ScalesDataWithStride)
+TYPED_TEST(BatchMultiVector, ScalesDataWithStride)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -294,7 +294,7 @@ TYPED_TEST(BatchVector, ScalesDataWithStride)
 }
 
 
-TYPED_TEST(BatchVector, AddsScaled)
+TYPED_TEST(BatchMultiVector, AddsScaled)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -313,7 +313,7 @@ TYPED_TEST(BatchVector, AddsScaled)
 }
 
 
-TYPED_TEST(BatchVector, AddsScale)
+TYPED_TEST(BatchMultiVector, AddsScale)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -337,7 +337,7 @@ TYPED_TEST(BatchVector, AddsScale)
 }
 
 
-TYPED_TEST(BatchVector, ConvergenceAddScaled)
+TYPED_TEST(BatchMultiVector, ConvergenceAddScaled)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -350,7 +350,7 @@ TYPED_TEST(BatchVector, ConvergenceAddScaled)
     const int num_rhs = 3;
     const gko::uint32 converged = 0xfffffffd | (0 - (1 << num_rhs));
 
-    gko::kernels::reference::batch_vector::convergence_add_scaled(
+    gko::kernels::reference::batch_multi_vector::convergence_add_scaled(
         this->exec, alpha.get(), this->mtx_0.get(), this->mtx_1.get(),
         converged);
 
@@ -378,7 +378,7 @@ TYPED_TEST(BatchVector, ConvergenceAddScaled)
 }
 
 
-TYPED_TEST(BatchVector, AddsScaledWithScalar)
+TYPED_TEST(BatchMultiVector, AddsScaledWithScalar)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -396,7 +396,7 @@ TYPED_TEST(BatchVector, AddsScaledWithScalar)
 }
 
 
-TYPED_TEST(BatchVector, AddsScaleWithScalar)
+TYPED_TEST(BatchMultiVector, AddsScaleWithScalar)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -418,7 +418,7 @@ TYPED_TEST(BatchVector, AddsScaleWithScalar)
 }
 
 
-TYPED_TEST(BatchVector, AddScaleWithScalarViaApply)
+TYPED_TEST(BatchMultiVector, AddScaleWithScalarViaApply)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -441,7 +441,7 @@ TYPED_TEST(BatchVector, AddScaleWithScalarViaApply)
 }
 
 
-TYPED_TEST(BatchVector, ConvergenceAddScaledWithScalar)
+TYPED_TEST(BatchMultiVector, ConvergenceAddScaledWithScalar)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -453,7 +453,7 @@ TYPED_TEST(BatchVector, ConvergenceAddScaledWithScalar)
     const int num_rhs = 3;
     const gko::uint32 converged = 0xfffffffd | (0 - (1 << num_rhs));
 
-    gko::kernels::reference::batch_vector::convergence_add_scaled(
+    gko::kernels::reference::batch_multi_vector::convergence_add_scaled(
         this->exec, alpha.get(), this->mtx_0.get(), this->mtx_1.get(),
         converged);
 
@@ -481,7 +481,7 @@ TYPED_TEST(BatchVector, ConvergenceAddScaledWithScalar)
 }
 
 
-TYPED_TEST(BatchVector, AddScaledFailsOnWrongSizes)
+TYPED_TEST(BatchMultiVector, AddScaledFailsOnWrongSizes)
 {
     using Mtx = typename TestFixture::Mtx;
     auto alpha =
@@ -492,7 +492,7 @@ TYPED_TEST(BatchVector, AddScaledFailsOnWrongSizes)
 }
 
 
-TYPED_TEST(BatchVector, AddScaleFailsOnWrongSizes)
+TYPED_TEST(BatchMultiVector, AddScaleFailsOnWrongSizes)
 {
     using Mtx = typename TestFixture::Mtx;
     auto alpha = gko::batch_initialize<Mtx>({{2.0}, {-2.0}}, this->exec);
@@ -504,7 +504,7 @@ TYPED_TEST(BatchVector, AddScaleFailsOnWrongSizes)
 }
 
 
-TYPED_TEST(BatchVector, AddScaleFailsOnWrongScalarSizes)
+TYPED_TEST(BatchMultiVector, AddScaleFailsOnWrongScalarSizes)
 {
     using Mtx = typename TestFixture::Mtx;
     auto alpha = gko::batch_initialize<Mtx>(
@@ -517,7 +517,7 @@ TYPED_TEST(BatchVector, AddScaleFailsOnWrongScalarSizes)
 }
 
 
-TYPED_TEST(BatchVector, ComputesDot)
+TYPED_TEST(BatchMultiVector, ComputesDot)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -536,7 +536,7 @@ TYPED_TEST(BatchVector, ComputesDot)
 }
 
 
-TYPED_TEST(BatchVector, ConvergenceComputeDot)
+TYPED_TEST(BatchMultiVector, ConvergenceComputeDot)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -555,7 +555,7 @@ TYPED_TEST(BatchVector, ConvergenceComputeDot)
     const int num_rhs = 3;
     const gko::uint32 converged = 0xfffffffd | (0 - (1 << num_rhs));
 
-    gko::kernels::reference::batch_vector::convergence_compute_dot(
+    gko::kernels::reference::batch_multi_vector::convergence_compute_dot(
         this->exec, this->mtx_0.get(), this->mtx_1.get(), result.get(),
         converged);
 
@@ -577,12 +577,12 @@ TYPED_TEST(BatchVector, ConvergenceComputeDot)
 }
 
 
-TYPED_TEST(BatchVector, ComputesNorm2)
+TYPED_TEST(BatchMultiVector, ComputesNorm2)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     using T_nc = gko::remove_complex<T>;
-    using NormVector = gko::matrix::BatchVector<T_nc>;
+    using NormVector = gko::BatchMultiVector<T_nc>;
     auto mtx(gko::batch_initialize<Mtx>(
         {{I<T>{1.0, 0.0}, I<T>{2.0, 3.0}, I<T>{2.0, 4.0}},
          {I<T>{-4.0, 2.0}, I<T>{-3.0, -2.0}, I<T>{0.0, 1.0}}},
@@ -601,12 +601,12 @@ TYPED_TEST(BatchVector, ComputesNorm2)
 }
 
 
-TYPED_TEST(BatchVector, ConvergenceComputeNorm2)
+TYPED_TEST(BatchMultiVector, ConvergenceComputeNorm2)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     using T_nc = gko::remove_complex<T>;
-    using NormVector = gko::matrix::BatchVector<T_nc>;
+    using NormVector = gko::BatchMultiVector<T_nc>;
     auto mtx(gko::batch_initialize<Mtx>(
         {{I<T>{1.0, 0.0}, I<T>{2.0, 3.0}, I<T>{2.0, 4.0}},
          {I<T>{-4.0, 2.0}, I<T>{-3.0, -2.0}, I<T>{0.0, 1.0}}},
@@ -628,7 +628,7 @@ TYPED_TEST(BatchVector, ConvergenceComputeNorm2)
     const int num_rhs = 2;
     const gko::uint32 converged = 0xfffffffd | (0 - (1 << num_rhs));
 
-    gko::kernels::reference::batch_vector::convergence_compute_norm2(
+    gko::kernels::reference::batch_multi_vector::convergence_compute_norm2(
         this->exec, mtx.get(), result.get(), converged);
 
     EXPECT_EQ(result->at(0, 0, 0), result_clone->at(0, 0, 0));
@@ -639,7 +639,7 @@ TYPED_TEST(BatchVector, ConvergenceComputeNorm2)
 }
 
 
-TYPED_TEST(BatchVector, ComputDotFailsOnWrongInputSize)
+TYPED_TEST(BatchMultiVector, ComputDotFailsOnWrongInputSize)
 {
     using Mtx = typename TestFixture::Mtx;
     auto result =
@@ -651,7 +651,7 @@ TYPED_TEST(BatchVector, ComputDotFailsOnWrongInputSize)
 }
 
 
-TYPED_TEST(BatchVector, ComputDotFailsOnWrongResultSize)
+TYPED_TEST(BatchMultiVector, ComputDotFailsOnWrongResultSize)
 {
     using Mtx = typename TestFixture::Mtx;
     auto result =
@@ -667,22 +667,22 @@ TYPED_TEST(BatchVector, ComputDotFailsOnWrongResultSize)
 }
 
 
-TYPED_TEST(BatchVector, CopiesData)
+TYPED_TEST(BatchMultiVector, CopiesData)
 {
-    gko::kernels::reference::batch_vector::copy(this->exec, this->mtx_0.get(),
-                                                this->mtx_1.get());
+    gko::kernels::reference::batch_multi_vector::copy(
+        this->exec, this->mtx_0.get(), this->mtx_1.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(this->mtx_1.get(), this->mtx_0.get(), 0.);
 }
 
 
-TYPED_TEST(BatchVector, ConvergenceCopyData)
+TYPED_TEST(BatchMultiVector, ConvergenceCopyData)
 {
     auto umtx_0 = this->mtx_0->unbatch();
 
     const int num_rhs = 3;
     const gko::uint32 converged = 0xfffffffd | (0 - (1 << num_rhs));
-    gko::kernels::reference::batch_vector::convergence_copy(
+    gko::kernels::reference::batch_multi_vector::convergence_copy(
         this->exec, this->mtx_0.get(), this->mtx_1.get(), converged);
 
     auto mtx_10_clone = gko::clone(this->mtx_10);
@@ -706,7 +706,7 @@ TYPED_TEST(BatchVector, ConvergenceCopyData)
 }
 
 
-TYPED_TEST(BatchVector, BatchScale)
+TYPED_TEST(BatchMultiVector, BatchScale)
 {
     using T = typename TestFixture::value_type;
     using Mtx = typename TestFixture::Mtx;
@@ -722,8 +722,8 @@ TYPED_TEST(BatchVector, BatchScale)
     auto rght(gko::batch_diagonal_initialize(
         I<I<T>>{I<T>{-0.5, -2.0}, I<T>{2.0, 0.25}}, this->exec));
 
-    gko::kernels::reference::batch_vector::batch_scale(this->exec, left.get(),
-                                                       rght.get(), mtx.get());
+    gko::kernels::reference::batch_multi_vector::batch_scale(
+        this->exec, left.get(), rght.get(), mtx.get());
 
     EXPECT_EQ(mtx->at(0, 0, 0), T{-0.5});
     EXPECT_EQ(mtx->at(0, 1, 0), T{-2.0});
@@ -741,14 +741,14 @@ TYPED_TEST(BatchVector, BatchScale)
 }
 
 
-TYPED_TEST(BatchVector, ConvertsToPrecision)
+TYPED_TEST(BatchMultiVector, ConvertsToPrecision)
 {
-    using BatchVector = typename TestFixture::Mtx;
+    using BatchMultiVector = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     using OtherT = typename gko::next_precision<T>;
-    using OtherBatchVector = typename gko::matrix::BatchVector<OtherT>;
-    auto tmp = OtherBatchVector::create(this->exec);
-    auto res = BatchVector::create(this->exec);
+    using OtherBatchMultiVector = typename gko::BatchMultiVector<OtherT>;
+    auto tmp = OtherBatchMultiVector::create(this->exec);
+    auto res = BatchMultiVector::create(this->exec);
     // If OtherT is more precise: 0, otherwise r
     auto residual = r<OtherT>::value < r<T>::value
                         ? gko::remove_complex<T>{0}
@@ -764,14 +764,14 @@ TYPED_TEST(BatchVector, ConvertsToPrecision)
 }
 
 
-TYPED_TEST(BatchVector, MovesToPrecision)
+TYPED_TEST(BatchMultiVector, MovesToPrecision)
 {
-    using BatchVector = typename TestFixture::Mtx;
+    using BatchMultiVector = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     using OtherT = typename gko::next_precision<T>;
-    using OtherBatchVector = typename gko::matrix::BatchVector<OtherT>;
-    auto tmp = OtherBatchVector::create(this->exec);
-    auto res = BatchVector::create(this->exec);
+    using OtherBatchMultiVector = typename gko::BatchMultiVector<OtherT>;
+    auto tmp = OtherBatchMultiVector::create(this->exec);
+    auto res = BatchMultiVector::create(this->exec);
     // If OtherT is more precise: 0, otherwise r
     auto residual = r<OtherT>::value < r<T>::value
                         ? gko::remove_complex<T>{0}
@@ -787,7 +787,7 @@ TYPED_TEST(BatchVector, MovesToPrecision)
 }
 
 
-TYPED_TEST(BatchVector, ConvertsToCsr32)
+TYPED_TEST(BatchMultiVector, ConvertsToCsr32)
 {
     using T = typename TestFixture::value_type;
     using BatchCsr = typename gko::matrix::BatchCsr<T, gko::int32>;
@@ -824,7 +824,7 @@ TYPED_TEST(BatchVector, ConvertsToCsr32)
 }
 
 
-TYPED_TEST(BatchVector, MovesToCsr32)
+TYPED_TEST(BatchMultiVector, MovesToCsr32)
 {
     using T = typename TestFixture::value_type;
     using BatchCsr = typename gko::matrix::BatchCsr<T, gko::int32>;
@@ -861,14 +861,14 @@ TYPED_TEST(BatchVector, MovesToCsr32)
 }
 
 
-TYPED_TEST(BatchVector, ConvertsEmptyToPrecision)
+TYPED_TEST(BatchMultiVector, ConvertsEmptyToPrecision)
 {
-    using BatchVector = typename TestFixture::Mtx;
+    using BatchMultiVector = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     using OtherT = typename gko::next_precision<T>;
-    using OtherBatchVector = typename gko::matrix::BatchVector<OtherT>;
-    auto empty = OtherBatchVector::create(this->exec);
-    auto res = BatchVector::create(this->exec);
+    using OtherBatchMultiVector = typename gko::BatchMultiVector<OtherT>;
+    auto empty = OtherBatchMultiVector::create(this->exec);
+    auto res = BatchMultiVector::create(this->exec);
 
     empty->convert_to(res.get());
 
@@ -876,14 +876,14 @@ TYPED_TEST(BatchVector, ConvertsEmptyToPrecision)
 }
 
 
-TYPED_TEST(BatchVector, MovesEmptyToPrecision)
+TYPED_TEST(BatchMultiVector, MovesEmptyToPrecision)
 {
-    using BatchVector = typename TestFixture::Mtx;
+    using BatchMultiVector = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     using OtherT = typename gko::next_precision<T>;
-    using OtherBatchVector = typename gko::matrix::BatchVector<OtherT>;
-    auto empty = OtherBatchVector::create(this->exec);
-    auto res = BatchVector::create(this->exec);
+    using OtherBatchMultiVector = typename gko::BatchMultiVector<OtherT>;
+    auto empty = OtherBatchMultiVector::create(this->exec);
+    auto res = BatchMultiVector::create(this->exec);
 
     empty->move_to(res.get());
 
@@ -891,12 +891,12 @@ TYPED_TEST(BatchVector, MovesEmptyToPrecision)
 }
 
 
-TYPED_TEST(BatchVector, ConvertsEmptyMatrixToCsr)
+TYPED_TEST(BatchMultiVector, ConvertsEmptyMatrixToCsr)
 {
-    using BatchVector = typename TestFixture::Mtx;
+    using BatchMultiVector = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     using BatchCsr = typename gko::matrix::BatchCsr<T, gko::int32>;
-    auto empty = BatchVector::create(this->exec);
+    auto empty = BatchMultiVector::create(this->exec);
     auto res = BatchCsr::create(this->exec);
 
     empty->convert_to(res.get());
@@ -907,12 +907,12 @@ TYPED_TEST(BatchVector, ConvertsEmptyMatrixToCsr)
 }
 
 
-TYPED_TEST(BatchVector, MovesEmptyMatrixToCsr)
+TYPED_TEST(BatchMultiVector, MovesEmptyMatrixToCsr)
 {
-    using BatchVector = typename TestFixture::Mtx;
+    using BatchMultiVector = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     using BatchCsr = typename gko::matrix::BatchCsr<T, gko::int32>;
-    auto empty = BatchVector::create(this->exec);
+    auto empty = BatchMultiVector::create(this->exec);
     auto res = BatchCsr::create(this->exec);
 
     empty->move_to(res.get());
@@ -923,7 +923,7 @@ TYPED_TEST(BatchVector, MovesEmptyMatrixToCsr)
 }
 
 
-TYPED_TEST(BatchVector, ConvertsToBatchDiagonal)
+TYPED_TEST(BatchMultiVector, ConvertsToBatchDiagonal)
 {
     using BDense = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -946,7 +946,7 @@ TYPED_TEST(BatchVector, ConvertsToBatchDiagonal)
 }
 
 
-TYPED_TEST(BatchVector, MovesToBatchDiagonal)
+TYPED_TEST(BatchMultiVector, MovesToBatchDiagonal)
 {
     using BDense = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -967,13 +967,13 @@ TYPED_TEST(BatchVector, MovesToBatchDiagonal)
 }
 
 
-TYPED_TEST(BatchVector, SquareMatrixIsTransposable)
+TYPED_TEST(BatchMultiVector, SquareMatrixIsTransposable)
 {
     using Mtx = typename TestFixture::Mtx;
     auto trans = this->mtx_4->transpose();
-    auto trans_as_batch_vector = static_cast<Mtx*>(trans.get());
+    auto trans_as_batch_multi_vector = static_cast<Mtx*>(trans.get());
 
-    auto utb = trans_as_batch_vector->unbatch();
+    auto utb = trans_as_batch_multi_vector->unbatch();
     GKO_ASSERT_MTX_NEAR(utb[0].get(),
                         l({{1.0, 6.0, 6.0}, {1.5, 1.0, 1.0}, {3.0, 5.0, 5.5}}),
                         r<TypeParam>::value);
@@ -983,13 +983,13 @@ TYPED_TEST(BatchVector, SquareMatrixIsTransposable)
 }
 
 
-TYPED_TEST(BatchVector, NonSquareMatrixIsTransposable)
+TYPED_TEST(BatchMultiVector, NonSquareMatrixIsTransposable)
 {
     using Mtx = typename TestFixture::Mtx;
     auto trans = this->mtx_5->transpose();
-    auto trans_as_batch_vector = static_cast<Mtx*>(trans.get());
+    auto trans_as_batch_multi_vector = static_cast<Mtx*>(trans.get());
 
-    auto utb = trans_as_batch_vector->unbatch();
+    auto utb = trans_as_batch_multi_vector->unbatch();
     GKO_ASSERT_MTX_NEAR(utb[0].get(), l({{1.0, 6.0, 7.0}, {1.5, 1.0, -4.5}}),
                         r<TypeParam>::value);
     GKO_ASSERT_MTX_NEAR(utb[1].get(), l({{2.0, 1.0, 4.0}, {-2.0, 3.0, 3.0}}),
@@ -997,7 +997,7 @@ TYPED_TEST(BatchVector, NonSquareMatrixIsTransposable)
 }
 
 
-TYPED_TEST(BatchVector, SquareMatrixAddScaledIdentity)
+TYPED_TEST(BatchMultiVector, SquareMatrixAddScaledIdentity)
 {
     using T = typename TestFixture::value_type;
     using Mtx = typename TestFixture::Mtx;
diff --git a/test/base/CMakeLists.txt b/test/base/CMakeLists.txt
index 80026fdabe1..a80be354878 100644
--- a/test/base/CMakeLists.txt
+++ b/test/base/CMakeLists.txt
@@ -1,3 +1,4 @@
+ginkgo_create_common_and_reference_test(batch_multi_vector_kernels)
 ginkgo_create_common_and_reference_test(device_matrix_data_kernels)
 ginkgo_create_common_device_test(kernel_launch_generic)
 ginkgo_create_common_and_reference_test(executor)
diff --git a/test/matrix/batch_vector_kernels.cpp b/test/base/batch_multi_vector_kernels.cpp
similarity index 81%
rename from test/matrix/batch_vector_kernels.cpp
rename to test/base/batch_multi_vector_kernels.cpp
index 150f02a3772..e16607db844 100644
--- a/test/matrix/batch_vector_kernels.cpp
+++ b/test/base/batch_multi_vector_kernels.cpp
@@ -30,7 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include "core/matrix/batch_vector_kernels.hpp"
+#include "core/base/batch_multi_vector_kernels.hpp"
 
 
 #include <random>
@@ -40,9 +40,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/math.hpp>
-#include <ginkgo/core/matrix/batch_vector.hpp>
-#include <ginkgo/core/matrix/batch_diagonal.hpp>
 
 
 #include "core/test/utils.hpp"
@@ -53,14 +52,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #ifndef GKO_COMPILING_DPCPP
 
 
-class BatchVector : public CommonTestFixture {
+class BatchMultiVector : public CommonTestFixture {
 protected:
     using vtype = double;
-    using Mtx = gko::matrix::BatchVector<vtype>;
-    using NormVector = gko::matrix::BatchVector<gko::remove_complex<vtype>>;
-    using ComplexMtx = gko::matrix::BatchVector<std::complex<vtype>>;
+    using Mtx = gko::BatchMultiVector<vtype>;
+    using NormVector = gko::BatchMultiVector<gko::remove_complex<vtype>>;
+    using ComplexMtx = gko::BatchMultiVector<std::complex<vtype>>;
 
-    BatchVector() : rand_engine(15) {}
+    BatchMultiVector() : rand_engine(15) {}
 
     template <typename MtxType>
     std::unique_ptr<MtxType> gen_mtx(const size_t batchsize, int num_rows,
@@ -145,7 +144,7 @@ class BatchVector : public CommonTestFixture {
 };
 
 
-TEST_F(BatchVector, SingleVectorAppyIsEquivalentToRef)
+TEST_F(BatchMultiVector, SingleVectorAppyIsEquivalentToRef)
 {
     set_up_apply_data(1);
 
@@ -156,7 +155,7 @@ TEST_F(BatchVector, SingleVectorAppyIsEquivalentToRef)
 }
 
 
-TEST_F(BatchVector, SingleVectorAdvancedAppyIsEquivalentToRef)
+TEST_F(BatchMultiVector, SingleVectorAdvancedAppyIsEquivalentToRef)
 {
     set_up_apply_data(1);
 
@@ -167,7 +166,7 @@ TEST_F(BatchVector, SingleVectorAdvancedAppyIsEquivalentToRef)
 }
 
 
-TEST_F(BatchVector, SingleVectorAddScaledIsEquivalentToRef)
+TEST_F(BatchMultiVector, SingleVectorAddScaledIsEquivalentToRef)
 {
     set_up_vector_data(1);
 
@@ -178,7 +177,7 @@ TEST_F(BatchVector, SingleVectorAddScaledIsEquivalentToRef)
 }
 
 
-TEST_F(BatchVector, SingleVectorAddScaleIsEquivalentToRef)
+TEST_F(BatchMultiVector, SingleVectorAddScaleIsEquivalentToRef)
 {
     set_up_vector_data(1);
 
@@ -189,7 +188,7 @@ TEST_F(BatchVector, SingleVectorAddScaleIsEquivalentToRef)
 }
 
 
-TEST_F(BatchVector, MultipleVectorAddScaledIsEquivalentToRef)
+TEST_F(BatchMultiVector, MultipleVectorAddScaledIsEquivalentToRef)
 {
     set_up_vector_data(20);
 
@@ -200,7 +199,7 @@ TEST_F(BatchVector, MultipleVectorAddScaledIsEquivalentToRef)
 }
 
 
-TEST_F(BatchVector, MultipleVectorAddScaleIsEquivalentToRef)
+TEST_F(BatchMultiVector, MultipleVectorAddScaleIsEquivalentToRef)
 {
     set_up_vector_data(20);
 
@@ -211,7 +210,7 @@ TEST_F(BatchVector, MultipleVectorAddScaleIsEquivalentToRef)
 }
 
 
-TEST_F(BatchVector, MultipleVectorAddScaledWithDifferentAlphaIsEquivalentToRef)
+TEST_F(BatchMultiVector, MultipleVectorAddScaledWithDifferentAlphaIsEquivalentToRef)
 {
     set_up_vector_data(20, true);
 
@@ -222,7 +221,7 @@ TEST_F(BatchVector, MultipleVectorAddScaledWithDifferentAlphaIsEquivalentToRef)
 }
 
 
-TEST_F(BatchVector, MultipleVectorAddScaleWithDifferentScalarsIsEquivalentToRef)
+TEST_F(BatchMultiVector, MultipleVectorAddScaleWithDifferentScalarsIsEquivalentToRef)
 {
     set_up_vector_data(20, true);
 
@@ -233,7 +232,7 @@ TEST_F(BatchVector, MultipleVectorAddScaleWithDifferentScalarsIsEquivalentToRef)
 }
 
 
-TEST_F(BatchVector, SingleVectorScaleIsEquivalentToRef)
+TEST_F(BatchMultiVector, SingleVectorScaleIsEquivalentToRef)
 {
     set_up_vector_data(1);
 
@@ -244,7 +243,7 @@ TEST_F(BatchVector, SingleVectorScaleIsEquivalentToRef)
 }
 
 
-TEST_F(BatchVector, MultipleVectorScaleIsEquivalentToRef)
+TEST_F(BatchMultiVector, MultipleVectorScaleIsEquivalentToRef)
 {
     set_up_vector_data(20);
 
@@ -255,7 +254,7 @@ TEST_F(BatchVector, MultipleVectorScaleIsEquivalentToRef)
 }
 
 
-TEST_F(BatchVector, MultipleVectorScaleWithDifferentAlphaIsEquivalentToRef)
+TEST_F(BatchMultiVector, MultipleVectorScaleWithDifferentAlphaIsEquivalentToRef)
 {
     set_up_vector_data(20, true);
 
@@ -266,7 +265,7 @@ TEST_F(BatchVector, MultipleVectorScaleWithDifferentAlphaIsEquivalentToRef)
 }
 
 
-TEST_F(BatchVector, ComputeNorm2SingleIsEquivalentToRef)
+TEST_F(BatchMultiVector, ComputeNorm2SingleIsEquivalentToRef)
 {
     set_up_vector_data(1);
     auto norm_size =
@@ -281,7 +280,7 @@ TEST_F(BatchVector, ComputeNorm2SingleIsEquivalentToRef)
 }
 
 
-TEST_F(BatchVector, ComputeNorm2IsEquivalentToRef)
+TEST_F(BatchMultiVector, ComputeNorm2IsEquivalentToRef)
 {
     set_up_vector_data(20);
     auto norm_size =
@@ -296,7 +295,7 @@ TEST_F(BatchVector, ComputeNorm2IsEquivalentToRef)
 }
 
 
-TEST_F(BatchVector, ComputeDotIsEquivalentToRef)
+TEST_F(BatchMultiVector, ComputeDotIsEquivalentToRef)
 {
     set_up_vector_data(20);
     auto dot_size =
@@ -311,7 +310,7 @@ TEST_F(BatchVector, ComputeDotIsEquivalentToRef)
 }
 
 
-TEST_F(BatchVector, ComputeDotSingleIsEquivalentToRef)
+TEST_F(BatchMultiVector, ComputeDotSingleIsEquivalentToRef)
 {
     set_up_vector_data(1);
     auto dot_size =
@@ -326,31 +325,31 @@ TEST_F(BatchVector, ComputeDotSingleIsEquivalentToRef)
 }
 
 
-TEST_F(BatchVector, CopySingleIsEquivalentToRef)
+TEST_F(BatchMultiVector, CopySingleIsEquivalentToRef)
 {
     set_up_vector_data(1);
 
-    gko::kernels::reference::batch_vector::copy(this->ref, x.get(), y.get());
-    gko::kernels::EXEC_NAMESPACE::batch_vector::copy(this->exec, dx.get(),
-                                                    dy.get());
+    gko::kernels::reference::batch_multi_vector::copy(this->ref, x.get(), y.get());
+    gko::kernels::EXEC_NAMESPACE::batch_multi_vector::copy(this->exec, dx.get(),
+                                                     dy.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(dy, y, 0.0);
 }
 
 
-TEST_F(BatchVector, CopyIsEquivalentToRef)
+TEST_F(BatchMultiVector, CopyIsEquivalentToRef)
 {
     set_up_vector_data(20);
 
-    gko::kernels::reference::batch_vector::copy(this->ref, x.get(), y.get());
-    gko::kernels::EXEC_NAMESPACE::batch_vector::copy(this->exec, dx.get(),
-                                                    dy.get());
+    gko::kernels::reference::batch_multi_vector::copy(this->ref, x.get(), y.get());
+    gko::kernels::EXEC_NAMESPACE::batch_multi_vector::copy(this->exec, dx.get(),
+                                                     dy.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(dy, y, 0.0);
 }
 
 
-TEST_F(BatchVector, BatchScaleIsEquivalentToRef)
+TEST_F(BatchMultiVector, BatchScaleIsEquivalentToRef)
 {
     using BDiag = gko::matrix::BatchDiagonal<vtype>;
     const int num_rhs = 20;
@@ -365,16 +364,16 @@ TEST_F(BatchVector, BatchScaleIsEquivalentToRef)
     auto drght = BDiag::create(this->exec);
     drght->copy_from(rght.get());
 
-    gko::kernels::reference::batch_vector::batch_scale(this->ref, left.get(),
-                                                      rght.get(), x.get());
-    gko::kernels::EXEC_NAMESPACE::batch_vector::batch_scale(
+    gko::kernels::reference::batch_multi_vector::batch_scale(this->ref, left.get(),
+                                                       rght.get(), x.get());
+    gko::kernels::EXEC_NAMESPACE::batch_multi_vector::batch_scale(
         this->exec, dleft.get(), drght.get(), dx.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(dx, x, 1e-14);
 }
 
 
-TEST_F(BatchVector, TransposeIsEquivalentToRef)
+TEST_F(BatchMultiVector, TransposeIsEquivalentToRef)
 {
     const int nrows = 11;
     const int ncols = 6;
@@ -392,7 +391,7 @@ TEST_F(BatchVector, TransposeIsEquivalentToRef)
 }
 
 
-TEST_F(BatchVector, ConjugateTransposeIsEquivalentToRef)
+TEST_F(BatchMultiVector, ConjugateTransposeIsEquivalentToRef)
 {
     const int nrows = 11;
     const int ncols = 6;
@@ -410,7 +409,7 @@ TEST_F(BatchVector, ConjugateTransposeIsEquivalentToRef)
 }
 
 
-TEST_F(BatchVector, AddScaledIdentityNonSquareIsEquivalentToReference)
+TEST_F(BatchMultiVector, AddScaledIdentityNonSquareIsEquivalentToReference)
 {
     set_up_apply_data();
     const gko::size_type batchsize = 10;

From 92c3a7289fbb60b8de06899622eadbee08d57ac8 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Fri, 7 Jul 2023 18:39:25 +0200
Subject: [PATCH 108/583] Updates to BatchMultiVector

---
 include/ginkgo/core/base/batch_dim.hpp        |  12 +
 .../ginkgo/core/base/batch_multi_vector.hpp   | 294 ++++++------------
 2 files changed, 108 insertions(+), 198 deletions(-)

diff --git a/include/ginkgo/core/base/batch_dim.hpp b/include/ginkgo/core/base/batch_dim.hpp
index 211225d7df2..3e650745a50 100644
--- a/include/ginkgo/core/base/batch_dim.hpp
+++ b/include/ginkgo/core/base/batch_dim.hpp
@@ -74,6 +74,18 @@ struct batch_dim {
         return common_size_;
     }
 
+    /**
+     * Get the cumulative storage size offset
+     *
+     * @param b the batch id
+     *
+     * @return the cumulative offset
+     */
+    size_type get_cumulative_offset(size_type b) const
+    {
+        return b * common_size_[0] * common_size_[1];
+    }
+
     /**
      * Checks if two batch_dim objects are equal.
      *
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index a4dafd75faa..a4860e2c7b3 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -62,8 +62,6 @@ namespace gko {
  * @note While this format is not very useful for storing sparse matrices, it
  *       is often suitable to store vectors, and sets of vectors.
  * @ingroup batch_multi_vector
- * @ingroup mat_formats
- * @ingroup BatchLinOp
  */
 template <typename ValueType = default_precision>
 class BatchMultiVector
@@ -132,17 +130,41 @@ class BatchMultiVector
         auto exec = this->get_executor();
         auto unbatch_mats = std::vector<std::unique_ptr<unbatch_type>>{};
         for (size_type b = 0; b < this->get_num_batch_entries(); ++b) {
-            auto mat = unbatch_type::create(exec, this->get_size().at(b),
-                                            this->get_stride().at(b));
+            auto mat = unbatch_type::create(exec, this->get_common_size(),
+                                            this->get_common_size()[1]);
             exec->copy_from(exec.get(), mat->get_num_stored_elements(),
                             this->get_const_values() +
-                                num_elems_per_batch_cumul_.get_const_data()[b],
+                                this->get_size().get_cumulative_offset(b),
                             mat->get_values());
             unbatch_mats.emplace_back(std::move(mat));
         }
         return unbatch_mats;
     }
 
+    /**
+     * Returns the batch size.
+     *
+     * @return the batch size
+     */
+    batch_dim<2> get_size() { return batch_size_; }
+
+    /**
+     * Returns the number of batch entries.
+     *
+     * @return the number of batch entries
+     */
+    size_type get_num_batch_entries()
+    {
+        return batch_size_.get_num_batch_entries();
+    }
+
+    /**
+     * Returns the common size of the batch entries.
+     *
+     * @return the common size stored
+     */
+    dim<2> get_common_size() { return batch_size_.get_common_size(); }
+
     /**
      * Returns a pointer to the array of values of the vector.
      *
@@ -158,8 +180,9 @@ class BatchMultiVector
     value_type* get_values(size_type batch) noexcept
     {
         GKO_ASSERT(batch < this->get_num_batch_entries());
+        // TODO Verify
         return values_.get_data() +
-               num_elems_per_batch_cumul_.get_const_data()[batch];
+               this->get_size().get_cumulative_offset(batch);
     }
 
     /**
@@ -185,7 +208,7 @@ class BatchMultiVector
     {
         GKO_ASSERT(batch < this->get_num_batch_entries());
         return values_.get_const_data() +
-               num_elems_per_batch_cumul_.get_const_data()[batch];
+               this->get_size().get_cumulative_offset(batch);
     }
 
     /**
@@ -200,21 +223,6 @@ class BatchMultiVector
         return values_.get_num_elems();
     }
 
-    /**
-     * Returns the number of elements explicitly stored at a specific batch
-     * index.
-     *
-     * @param batch  the batch index to be queried
-     *
-     * @return the number of elements explicitly stored in the vector
-     */
-    size_type get_num_stored_elements(size_type batch) const noexcept
-    {
-        GKO_ASSERT(batch < this->get_num_batch_entries());
-        return num_elems_per_batch_cumul_.get_const_data()[batch + 1] -
-               num_elems_per_batch_cumul_.get_const_data()[batch];
-    }
-
     /**
      * Returns a single element for a particular batch.
      *
@@ -226,7 +234,7 @@ class BatchMultiVector
      *        stored at (e.g. trying to call this method on a GPU matrix from
      *        the OMP results in a runtime error)
      */
-    value_type& at(size_type batch, size_type row, size_type col) noexcept
+    value_type& at(size_type batch, size_type row, size_type col)
     {
         GKO_ASSERT(batch < this->get_num_batch_entries());
         return values_.get_data()[linearize_index(batch, row, col)];
@@ -235,7 +243,7 @@ class BatchMultiVector
     /**
      * @copydoc BatchMultiVector::at(size_type, size_type, size_type)
      */
-    value_type at(size_type batch, size_type row, size_type col) const noexcept
+    value_type at(size_type batch, size_type row, size_type col) const
     {
         GKO_ASSERT(batch < this->get_num_batch_entries());
         return values_.get_const_data()[linearize_index(batch, row, col)];
@@ -278,7 +286,7 @@ class BatchMultiVector
      * of alpha (the number of columns of alpha has to match the number of
      * columns of the matrix).
      */
-    void scale(const BatchLinOp* alpha)
+    void scale(const BatchMultiVector* alpha)
     {
         auto exec = this->get_executor();
         this->scale_impl(make_temporary_clone(exec, alpha).get());
@@ -294,7 +302,7 @@ class BatchMultiVector
      * vector).
      * @param b  a matrix of the same dimension as this
      */
-    void add_scaled(const BatchLinOp* alpha, const BatchLinOp* b)
+    void add_scaled(const BatchMultiVector* alpha, const BatchMultiVector* b)
     {
         auto exec = this->get_executor();
         this->add_scaled_impl(make_temporary_clone(exec, alpha).get(),
@@ -313,8 +321,8 @@ class BatchMultiVector
      * @param beta  Scalar(s), of the same size as alpha, to multiply this
      * matrix.
      */
-    void add_scale(const BatchLinOp* alpha, const BatchLinOp* a,
-                   const BatchLinOp* beta);
+    void add_scale(const BatchMultiVector* alpha, const BatchMultiVector* a,
+                   const BatchMultiVector* beta);
 
     /**
      * Computes the column-wise dot product of each matrix in this batch and its
@@ -326,7 +334,7 @@ class BatchMultiVector
      * product (the number of column in the vector must match the number of
      * columns of this)
      */
-    void compute_dot(const BatchLinOp* b, BatchLinOp* result) const
+    void compute_dot(const BatchMultiVector* b, BatchMultiVector* result) const
     {
         auto exec = this->get_executor();
         this->compute_dot_impl(make_temporary_clone(exec, b).get(),
@@ -340,7 +348,7 @@ class BatchMultiVector
      *                (the number of columns in the vector must match the number
      *                of columns of this)
      */
-    void compute_norm2(BatchLinOp* result) const
+    void compute_norm2(BatchMultiVector* result) const
     {
         auto exec = this->get_executor();
         this->compute_norm2_impl(make_temporary_clone(exec, result).get());
@@ -359,95 +367,28 @@ class BatchMultiVector
      */
     static std::unique_ptr<const BatchMultiVector> create_const(
         std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
-        gko::detail::const_array_view<ValueType>&& values,
-        const batch_stride& strides)
+        gko::detail::const_array_view<ValueType>&& values)
     {
         // cast const-ness away, but return a const object afterwards,
         // so we can ensure that no modifications take place.
         return std::unique_ptr<const BatchMultiVector>(new BatchMultiVector{
-            exec, sizes, gko::detail::array_const_cast(std::move(values)),
-            strides});
+            exec, sizes, gko::detail::array_const_cast(std::move(values))});
     }
 
 private:
-    /**
-     * Compute the memory required for the values array from the sizes and the
-     * strides.
-     */
-    inline size_type compute_batch_mem(const batch_dim<2>& sizes,
-                                       const batch_stride& strides)
-    {
-        GKO_ASSERT(sizes.get_num_batch_entries() ==
-                   strides.get_num_batch_entries());
-        if (sizes.stores_equal_sizes() && strides.stores_equal_strides()) {
-            return (sizes.at(0))[0] * strides.at(0) *
-                   sizes.get_num_batch_entries();
-        }
-        size_type mem_req = 0;
-        for (auto i = 0; i < sizes.get_num_batch_entries(); ++i) {
-            mem_req += (sizes.at(i))[0] * strides.at(i);
-        }
-        return mem_req;
-    }
-
-    /**
-     * Extract the nth dim of the batch sizes from the input batch_dim object.
-     */
-    inline batch_stride extract_nth_dim(const int dim, const batch_dim<2>& size)
-    {
-        if (size.stores_equal_sizes()) {
-            return batch_stride(size.get_num_batch_entries(), size.at(0)[dim]);
-        }
-        std::vector<size_type> stride(size.get_num_batch_entries());
-        for (auto i = 0; i < size.get_num_batch_entries(); ++i) {
-            stride[i] = (size.at(i))[dim];
-        }
-        return batch_stride(stride);
-    }
-
-    /**
-     * Extract strides from the vector of the distinct Dense matrices.
-     */
-    inline batch_stride get_strides_from_mtxs(
-        const std::vector<Dense<ValueType>*> mtxs)
+    inline batch_dim<2> compute_batch_size(
+        const std::vector<Dense<ValueType>*>& matrices)
     {
-        auto strides = std::vector<size_type>(mtxs.size());
-        for (auto i = 0; i < mtxs.size(); ++i) {
-            strides[i] = mtxs[i]->get_stride();
+        auto common_size = matrices[0]->get_size();
+        for (int i = 1; i < matrices.size(); ++i) {
+            GKO_ASSERT_EQ(common_size, matrices[i]->get_size());
         }
-        return batch_stride(strides);
+        return batch_dim<2>{num_entries, common_size};
     }
 
-    /**
-     * Extract sizes from the vector of the distinct Dense matrices.
-     */
-    inline batch_dim<2> get_sizes_from_mtxs(
-        const std::vector<Dense<ValueType>*> mtxs)
+    inline size_type compute_num_elems(const batch_dim<2>& size)
     {
-        auto sizes = std::vector<dim<2>>(mtxs.size());
-        for (auto i = 0; i < mtxs.size(); ++i) {
-            sizes[i] = mtxs[i]->get_size();
-        }
-        return batch_dim<2>(sizes);
-    }
-
-    /**
-     * Compute the number of elements stored in each batch and store it in a
-     * prefixed sum fashion
-     */
-    inline array<size_type> compute_num_elems_per_batch_cumul(
-        std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
-        const batch_stride& strides)
-    {
-        auto num_elems = array<size_type>(exec->get_master(),
-                                          sizes.get_num_batch_entries() + 1);
-        num_elems.get_data()[0] = 0;
-        for (auto i = 0; i < sizes.get_num_batch_entries(); ++i) {
-            num_elems.get_data()[i + 1] =
-                num_elems.get_data()[i] + (sizes.at(i))[0] * strides.at(i);
-        }
-        num_elems.set_executor(exec);
-        return num_elems;
+        return size.get_cumulative_offset(size.get_num_batch_entries());
     }
 
 protected:
@@ -459,31 +400,11 @@ class BatchMultiVector
      */
     BatchMultiVector(std::shared_ptr<const Executor> exec,
                      const batch_dim<2>& size = batch_dim<2>{})
-        : BatchMultiVector(std::move(exec), size,
-                           size.get_num_batch_entries() > 0
-                               ? extract_nth_dim(1, size)
-                               : batch_stride{})
+        : batch_size_(size),
+          values_(exec, compute_num_elems(size)),
+          exec(std::move(exec))
     {}
 
-    /**
-     * Creates an uninitialized BatchMultiVector matrix of the specified size.
-     *
-     * @param exec  Executor associated to the vector
-     * @param size  size of the batch matrices in a batch_dim object
-     * @param stride  stride of the rows (i.e. offset between the first
-     *                  elements of two consecutive rows, expressed as the
-     *                  number of matrix elements)
-     */
-    BatchMultiVector(std::shared_ptr<const Executor> exec,
-                     const batch_dim<2>& size, const batch_stride& stride)
-        : EnableBatchLinOp<BatchMultiVector>(exec, size),
-          values_(exec, compute_batch_mem(size, stride)),
-          stride_(stride)
-    {
-        num_elems_per_batch_cumul_ =
-            compute_num_elems_per_batch_cumul(exec, this->get_size(), stride);
-    }
-
     /**
      * Creates a BatchMultiVector matrix from an already allocated (and
      * initialized) array.
@@ -503,21 +424,13 @@ class BatchMultiVector
      */
     template <typename ValuesArray>
     BatchMultiVector(std::shared_ptr<const Executor> exec,
-                     const batch_dim<2>& size, ValuesArray&& values,
-                     const batch_stride& stride)
-        : EnableBatchLinOp<BatchMultiVector>(exec, size),
+                     const batch_dim<2>& size, ValuesArray&& values)
+        : batch_size_(size),
           values_{exec, std::forward<ValuesArray>(values)},
-          stride_{stride},
-          num_elems_per_batch_cumul_(
-              exec->get_master(),
-              compute_num_elems_per_batch_cumul(exec->get_master(),
-                                                this->get_size(), stride))
+          exec_(std::move(exec))
     {
-        auto num_elems =
-            num_elems_per_batch_cumul_
-                .get_const_data()[num_elems_per_batch_cumul_.get_num_elems() -
-                                  1] -
-            1;
+        // Ensure that the values array has the correct size
+        auto num_elems = compute_num_elems(size);
         GKO_ENSURE_IN_BOUNDS(num_elems, values_.get_num_elems());
     }
 
@@ -529,20 +442,16 @@ class BatchMultiVector
      */
     BatchMultiVector(std::shared_ptr<const Executor> exec,
                      const std::vector<Dense<ValueType>*>& matrices)
-        : EnableBatchLinOp<BatchMultiVector>(exec,
-                                             get_sizes_from_mtxs(matrices)),
-          stride_{get_strides_from_mtxs(matrices)},
-          values_(exec, compute_batch_mem(this->get_size(), stride_))
+        : batch_size_{compute_batch_size(matrices)},
+          values(exec, compute_num_elems(batch_size_)),
+          exec(std::move(exec))
     {
-        num_elems_per_batch_cumul_ = compute_num_elems_per_batch_cumul(
-            exec->get_master(), this->get_size(), stride_);
         for (size_type i = 0; i < this->get_num_batch_entries(); ++i) {
             auto local_exec = matrices[i]->get_executor();
-            exec->copy_from(local_exec.get(),
-                            matrices[i]->get_num_stored_elements(),
-                            matrices[i]->get_const_values(),
-                            this->get_values() +
-                                num_elems_per_batch_cumul_.get_const_data()[i]);
+            exec->copy_from(
+                local_exec.get(), matrices[i]->get_num_stored_elements(),
+                matrices[i]->get_const_values(),
+                this->get_values() + this->get_size().get_cumulative_offset(i));
         }
     }
 
@@ -556,18 +465,11 @@ class BatchMultiVector
     BatchMultiVector(std::shared_ptr<const Executor> exec,
                      size_type num_duplications,
                      const BatchMultiVector<value_type>* input)
-        : EnableBatchLinOp<BatchMultiVector>(
+        : EnableBatchMultiVector<BatchMultiVector>(
               exec, gko::batch_dim<2>(
                         input->get_num_batch_entries() * num_duplications,
-                        input->get_size().at(0))),
-          stride_{gko::batch_stride(
-              input->get_num_batch_entries() * num_duplications,
-              input->get_stride().at(0))},
-          values_(exec, compute_batch_mem(this->get_size(), stride_))
+                        input->get_common_size()))
     {
-        // Check if it works when stride neq num_cols
-        num_elems_per_batch_cumul_ = compute_num_elems_per_batch_cumul(
-            exec->get_master(), this->get_size(), stride_);
         size_type offset = 0;
         for (size_type i = 0; i < num_duplications; ++i) {
             exec->copy_from(
@@ -586,14 +488,9 @@ class BatchMultiVector
      */
     BatchMultiVector(std::shared_ptr<const Executor> exec,
                      size_type num_duplications, const Dense<value_type>* input)
-        : EnableBatchLinOp<BatchMultiVector>(
-              exec, gko::batch_dim<2>(num_duplications, input->get_size())),
-          stride_{gko::batch_stride(num_duplications, input->get_stride())},
-          values_(exec, compute_batch_mem(this->get_size(), stride_))
+        : EnableBatchMultiVector<BatchMultiVector>(
+              exec, gko::batch_dim<2>(num_duplications, input->get_size()))
     {
-        // Check if it works when stride neq num_cols
-        num_elems_per_batch_cumul_ = compute_num_elems_per_batch_cumul(
-            exec->get_master(), this->get_size(), stride_);
         size_type offset = 0;
         for (size_type i = 0; i < num_duplications; ++i) {
             exec->copy_from(
@@ -612,61 +509,62 @@ class BatchMultiVector
      */
     virtual std::unique_ptr<BatchMultiVector> create_with_same_config() const
     {
-        return BatchMultiVector::create(this->get_executor(), this->get_size(),
-                                        this->get_stride());
+        return BatchMultiVector::create(this->get_executor(), this->get_size());
     }
 
     /**
-     * @copydoc scale(const BatchLinOp *)
+     * @copydoc scale(const BatchMultiVector *)
      *
      * @note  Other implementations of batch_multi_vector should override this
-     * function instead of scale(const BatchLinOp *alpha).
+     * function instead of scale(const BatchMultiVector *alpha).
      */
-    virtual void scale_impl(const BatchLinOp* alpha);
+    virtual void scale_impl(const BatchMultiVector* alpha);
 
     /**
-     * @copydoc add_scaled(const BatchLinOp *, const BatchLinOp *)
+     * @copydoc add_scaled(const BatchMultiVector *, const BatchMultiVector *)
      *
      * @note  Other implementations of batch_multi_vector should override this
-     * function instead of add_scale(const BatchLinOp *alpha, const BatchLinOp
-     * *b).
+     * function instead of add_scale(const BatchMultiVector *alpha, const
+     * BatchMultiVector *b).
      */
-    virtual void add_scaled_impl(const BatchLinOp* alpha, const BatchLinOp* b);
+    virtual void add_scaled_impl(const BatchMultiVector* alpha,
+                                 const BatchMultiVector* b);
 
     /**
-     * @copydoc compute_dot(const BatchLinOp *, BatchLinOp *) const
+     * @copydoc compute_dot(const BatchMultiVector *, BatchMultiVector *) const
      *
      * @note  Other implementations of batch_multi_vector should override this
-     * function instead of compute_dot(const BatchLinOp *b, BatchLinOp *result).
+     * function instead of compute_dot(const BatchMultiVector *b,
+     * BatchMultiVector *result).
      */
-    virtual void compute_dot_impl(const BatchLinOp* b,
-                                  BatchLinOp* result) const;
+    virtual void compute_dot_impl(const BatchMultiVector* b,
+                                  BatchMultiVector* result) const;
 
     /**
-     * @copydoc compute_norm2(BatchLinOp *) const
+     * @copydoc compute_norm2(BatchMultiVector *) const
      *
      * @note  Other implementations of batch_multi_vector should override this
-     * function instead of compute_norm2(BatchLinOp *result).
+     * function instead of compute_norm2(BatchMultiVector *result).
      */
-    virtual void compute_norm2_impl(BatchLinOp* result) const;
+    virtual void compute_norm2_impl(BatchMultiVector* result) const;
 
     size_type linearize_index(size_type batch, size_type row,
                               size_type col) const noexcept
     {
-        return num_elems_per_batch_cumul_.get_const_data()[batch] +
-               row * stride_.at(batch) + col;
+        return batch_size_.get_cumulative_offset(batch) +
+               row * batch_size_.get_common_size()[1] + col;
     }
 
     size_type linearize_index(size_type batch, size_type idx) const noexcept
     {
-        return linearize_index(batch, idx / this->get_size().at(batch)[1],
-                               idx % this->get_size().at(batch)[1]);
+        return linearize_index(batch, idx / this->get_common_size()[1],
+                               idx % this->get_common_size()[1]);
     }
 
 private:
-    batch_stride stride_;
-    array<size_type> num_elems_per_batch_cumul_;
+    batch_dim<2> batch_size_;
     array<value_type> values_;
+    std::shared_ptr<const Executor> exec;
 };
 
 
@@ -688,7 +586,7 @@ class BatchMultiVector
  *                     including the Executor, which is passed as the first
  *                     argument
  *
- * @ingroup BatchLinOp
+ * @ingroup BatchMultiVector
  * @ingroup mat_formats
  */
 template <typename Matrix, typename... TArgs>
@@ -743,7 +641,7 @@ std::unique_ptr<Matrix> batch_initialize(
  *                     including the Executor, which is passed as the first
  *                     argument
  *
- * @ingroup BatchLinOp
+ * @ingroup BatchMultiVector
  * @ingroup mat_formats
  */
 template <typename Matrix, typename... TArgs>
@@ -776,7 +674,7 @@ std::unique_ptr<Matrix> batch_initialize(
  *                     including the Executor, which is passed as the first
  *                     argument
  *
- * @ingroup BatchLinOp
+ * @ingroup BatchMultiVector
  * @ingroup mat_formats
  */
 template <typename Matrix, typename... TArgs>
@@ -840,7 +738,7 @@ std::unique_ptr<Matrix> batch_initialize(
  *                     including the Executor, which is passed as the first
  *                     argument
  *
- * @ingroup BatchLinOp
+ * @ingroup BatchMultiVector
  * @ingroup mat_formats
  */
 template <typename Matrix, typename... TArgs>
@@ -883,7 +781,7 @@ std::unique_ptr<Matrix> batch_initialize(
  *                     including the Executor, which is passed as the first
  *                     argument
  *
- * @ingroup BatchLinOp
+ * @ingroup BatchMultiVector
  * @ingroup mat_formats
  */
 template <typename Matrix, typename... TArgs>
@@ -936,7 +834,7 @@ std::unique_ptr<Matrix> batch_initialize(
  *                     including the Executor, which is passed as the first
  *                     argument
  *
- * @ingroup BatchLinOp
+ * @ingroup BatchMultiVector
  * @ingroup mat_formats
  */
 template <typename Matrix, typename... TArgs>

From 3979e1bdab4b8c0ca7c22f26025337e957a2c1eb Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sun, 9 Jul 2023 00:38:11 +0200
Subject: [PATCH 109/583] Use PolymorphicObject, fix batch_initialize

---
 core/base/batch_multi_vector.cpp              |  73 ++---
 .../ginkgo/core/base/batch_multi_vector.hpp   | 310 ++++++------------
 2 files changed, 127 insertions(+), 256 deletions(-)

diff --git a/core/base/batch_multi_vector.cpp b/core/base/batch_multi_vector.cpp
index 76639494088..cc83638ee92 100644
--- a/core/base/batch_multi_vector.cpp
+++ b/core/base/batch_multi_vector.cpp
@@ -64,78 +64,67 @@ GKO_REGISTER_OPERATION(copy, batch_multi_vector::copy);
 
 
 template <typename ValueType>
-void BatchMultiVector<ValueType>::scale_impl(const BatchLinOp* alpha)
+void BatchMultiVector<ValueType>::scale_impl(
+    const BatchMultiVector<ValueType>* alpha)
 {
-    auto batch_alpha = as<BatchMultiVector<ValueType>>(alpha);
     GKO_ASSERT_BATCH_EQUAL_ROWS(
-        batch_alpha, batch_dim<2>(this->get_num_batch_entries(), dim<2>(1, 1)));
-    for (size_type b = 0; b < batch_alpha->get_num_batch_entries(); ++b) {
-        if (batch_alpha->get_size().at(b)[1] != 1) {
+        alpha, batch_dim<2>(this->get_num_batch_entries(), dim<2>(1, 1)));
+    for (size_type b = 0; b < alpha->get_num_batch_entries(); ++b) {
+        if (alpha->get_common_size()[1] != 1) {
             // different alpha for each column
-            GKO_ASSERT_BATCH_EQUAL_COLS(this, batch_alpha);
+            GKO_ASSERT_BATCH_EQUAL_COLS(this, alpha);
         }
     }
-    auto exec = this->get_executor();
-    exec->run(batch_multi_vector::make_scale(batch_alpha, this));
+    this->get_executor()->run(batch_multi_vector::make_scale(alpha, this));
 }
 
 
 template <typename ValueType>
-void BatchMultiVector<ValueType>::add_scaled_impl(const BatchLinOp* alpha,
-                                                  const BatchLinOp* b)
+void BatchMultiVector<ValueType>::add_scaled_impl(
+    const BatchMultiVector<ValueType>* alpha,
+    const BatchMultiVector<ValueType>* b)
 {
-    auto batch_alpha = as<BatchMultiVector<ValueType>>(alpha);
-    auto batch_b = as<BatchMultiVector<ValueType>>(b);
     GKO_ASSERT_BATCH_EQUAL_ROWS(
-        batch_alpha, batch_dim<2>(this->get_num_batch_entries(), dim<2>(1, 1)));
-    for (size_type b = 0; b < batch_alpha->get_num_batch_entries(); ++b) {
-        if (batch_alpha->get_size().at(b)[1] != 1) {
+        alpha, batch_dim<2>(this->get_num_batch_entries(), dim<2>(1, 1)));
+    for (size_type b = 0; b < alpha->get_num_batch_entries(); ++b) {
+        if (alpha->get_common_size()[1] != 1) {
             // different alpha for each column
-            GKO_ASSERT_BATCH_EQUAL_COLS(this, batch_alpha);
+            GKO_ASSERT_BATCH_EQUAL_COLS(this, alpha);
         }
     }
-    GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(this, batch_b);
-    auto exec = this->get_executor();
+    GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(this, b);
 
-    exec->run(batch_multi_vector::make_add_scaled(batch_alpha, batch_b, this));
+    this->get_executor()->run(
+        batch_multi_vector::make_add_scaled(alpha, b, this));
 }
 
 
 inline const batch_dim<2> get_col_sizes(const batch_dim<2>& sizes)
 {
-    auto col_sizes = std::vector<dim<2>>(sizes.get_num_batch_entries());
-    for (size_type i = 0; i < col_sizes.size(); ++i) {
-        col_sizes[i] = dim<2>(1, sizes.at(i)[1]);
-    }
-    return batch_dim<2>(col_sizes);
+    return batch_dim<2>(sizes.get_num_batch_entries(), dim<2>(1, sizes[1]));
 }
 
 
 template <typename ValueType>
-void BatchMultiVector<ValueType>::compute_dot_impl(const BatchLinOp* b,
-                                                   BatchLinOp* result) const
+void BatchMultiVector<ValueType>::compute_dot_impl(
+    const BatchMultiVector<ValueType>* b,
+    BatchMultiVector<ValueType>* result) const
 {
-    auto batch_result = as<BatchMultiVector<ValueType>>(result);
-    auto batch_b = as<BatchMultiVector<ValueType>>(b);
-    GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(this, batch_b);
+    GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(this, b);
     GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(batch_result,
                                       get_col_sizes(this->get_size()));
-    auto exec = this->get_executor();
-    exec->run(
-        batch_multi_vector::make_compute_dot(this, batch_b, batch_result));
+    this->get_executor()->run(
+        batch_multi_vector::make_compute_dot(this, b, result));
 }
 
 
 template <typename ValueType>
-void BatchMultiVector<ValueType>::compute_norm2_impl(BatchLinOp* result) const
+void BatchMultiVector<ValueType>::compute_norm2_impl(
+    BatchMultiVector<remove_complex<ValueType>>* result) const
 {
-    using NormVector = BatchMultiVector<remove_complex<ValueType>>;
-    auto batch_result = as<NormVector>(result);
-    GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(batch_result,
-                                      get_col_sizes(this->get_size()));
-    auto exec = this->get_executor();
-    exec->run(batch_multi_vector::make_compute_norm2(
-        as<BatchMultiVector<ValueType>>(this), batch_result));
+    GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(result, get_col_sizes(this->get_size()));
+    this->get_executor()->run(batch_multi_vector::make_compute_norm2(
+        as<BatchMultiVector<ValueType>>(this), result));
 }
 
 
@@ -144,8 +133,6 @@ void BatchMultiVector<ValueType>::convert_to(
     BatchMultiVector<next_precision<ValueType>>* result) const
 {
     result->values_ = this->values_;
-    result->stride_ = this->stride_;
-    result->num_elems_per_batch_cumul_ = this->num_elems_per_batch_cumul_;
     result->set_size(this->get_size());
 }
 
@@ -206,7 +193,7 @@ void BatchMultiVector<ValueType>::read(const std::vector<mat_data32>& data)
 template <typename MatrixType, typename MatrixData>
 inline void write_impl(const MatrixType* mtx, std::vector<MatrixData>& data)
 {
-    std::unique_ptr<const BatchLinOp> op{};
+    std::unique_ptr<const BatchMultiVector<ValueType>> op{};
     const MatrixType* tmp{};
     if (mtx->get_executor()->get_master() != mtx->get_executor()) {
         op = mtx->clone(mtx->get_executor()->get_master());
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index a4860e2c7b3..9513272648d 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -84,8 +84,8 @@ class BatchMultiVector
     using unbatch_type = Dense<ValueType>;
     using mat_data = gko::matrix_data<ValueType, int64>;
     using mat_data32 = gko::matrix_data<ValueType, int32>;
-    using absolute_type = remove_complex<BatchMultiVector>;
-    using complex_type = to_complex<BatchMultiVector>;
+    using absolute_type = remove_complex<BatchMultiVector<ValueType>>;
+    using complex_type = to_complex<BatchMultiVector<ValueType>>;
 
     using row_major_range = gko::range<gko::accessor::row_major<ValueType, 2>>;
 
@@ -286,7 +286,7 @@ class BatchMultiVector
      * of alpha (the number of columns of alpha has to match the number of
      * columns of the matrix).
      */
-    void scale(const BatchMultiVector* alpha)
+    void scale(const BatchMultiVector<ValueType>* alpha)
     {
         auto exec = this->get_executor();
         this->scale_impl(make_temporary_clone(exec, alpha).get());
@@ -302,7 +302,8 @@ class BatchMultiVector
      * vector).
      * @param b  a matrix of the same dimension as this
      */
-    void add_scaled(const BatchMultiVector* alpha, const BatchMultiVector* b)
+    void add_scaled(const BatchMultiVector<ValueType>* alpha,
+                    const BatchMultiVector<ValueType>* b)
     {
         auto exec = this->get_executor();
         this->add_scaled_impl(make_temporary_clone(exec, alpha).get(),
@@ -321,8 +322,9 @@ class BatchMultiVector
      * @param beta  Scalar(s), of the same size as alpha, to multiply this
      * matrix.
      */
-    void add_scale(const BatchMultiVector* alpha, const BatchMultiVector* a,
-                   const BatchMultiVector* beta);
+    void add_scale(const BatchMultiVector<ValueType>* alpha,
+                   const BatchMultiVector<ValueType>* a,
+                   const BatchMultiVector<ValueType>* beta);
 
     /**
      * Computes the column-wise dot product of each matrix in this batch and its
@@ -334,7 +336,8 @@ class BatchMultiVector
      * product (the number of column in the vector must match the number of
      * columns of this)
      */
-    void compute_dot(const BatchMultiVector* b, BatchMultiVector* result) const
+    void compute_dot(const BatchMultiVector<ValueType>* b,
+                     BatchMultiVector<ValueType>* result) const
     {
         auto exec = this->get_executor();
         this->compute_dot_impl(make_temporary_clone(exec, b).get(),
@@ -348,7 +351,7 @@ class BatchMultiVector
      *                (the number of columns in the vector must match the number
      *                of columns of this)
      */
-    void compute_norm2(BatchMultiVector* result) const
+    void compute_norm2(BatchMultiVector<ValueType>* result) const
     {
         auto exec = this->get_executor();
         this->compute_norm2_impl(make_temporary_clone(exec, result).get());
@@ -365,7 +368,7 @@ class BatchMultiVector
      *          (if it resides on the same executor as the vector) or a copy of
      *          the array on the correct executor.
      */
-    static std::unique_ptr<const BatchMultiVector> create_const(
+    static std::unique_ptr<const BatchMultiVector<ValueType>> create_const(
         std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
         gko::detail::const_array_view<ValueType>&& values)
     {
@@ -375,6 +378,43 @@ class BatchMultiVector
             exec, sizes, gko::detail::array_const_cast(std::move(values))});
     }
 
+    /**
+     * Copy-assigns a BatchMultiVector. Preserves the executor and copies the
+     * size.
+     */
+    BatchMultiVector& operator=(const BatchMultiVector&) = default;
+
+    /**
+     * Move-assigns a BatchMultiVector. Preserves the executor and moves the
+     * size. The moved-from object has size 0x0 afterwards, but its executor is
+     * unchanged.
+     */
+    BatchMultiVector& operator=(BatchMultiVector&& other)
+    {
+        if (this != &other) {
+            EnableAbstractPolymorphicObject<BatchMultiVector>::operator=(
+                std::move(other));
+            this->set_size(other.get_size());
+            other.set_size({});
+        }
+        return *this;
+    }
+
+    /**
+     * Copy-constructs a BatchMultiVector. Inherits executor and size from the
+     * input.
+     */
+    BatchMultiVector(const BatchMultiVector&) = default;
+
+    /**
+     * Move-constructs a BatchMultiVector. Inherits executor and size from the
+     * input, which will have size 0x0 and unchanged executor afterwards.
+     */
+    BatchMultiVector(BatchMultiVector&& other)
+        : EnableAbstractPolymorphicObject<BatchMultiVector>(std::move(other)),
+          batch_size_{std::exchange(other.batch_size_, batch_dim<2>{})}
+    {}
+
 private:
     inline batch_dim<2> compute_batch_size(
         const std::vector<Dense<ValueType>*>& matrices)
@@ -392,6 +432,13 @@ class BatchMultiVector
     }
 
 protected:
+    /**
+     * Sets the size of the BatchMultiVector.
+     *
+     * @param value  the new size of the operator
+     */
+    void set_size(const batch_dim<2>& value) noexcept { batch_size_ = value; }
+
     /**
      * Creates an uninitialized BatchMultiVector matrix of the specified size.
      *
@@ -400,9 +447,9 @@ class BatchMultiVector
      */
     BatchMultiVector(std::shared_ptr<const Executor> exec,
                      const batch_dim<2>& size = batch_dim<2>{})
-        : batch_size_(size),
-          values_(exec, compute_num_elems(size)),
-          exec(std::move(exec))
+        : EnableAbstractPolymorphicObject<BatchMultiVector>(exec),
+          batch_size_(size),
+          values_(exec, compute_num_elems(size))
     {}
 
     /**
@@ -425,9 +472,9 @@ class BatchMultiVector
     template <typename ValuesArray>
     BatchMultiVector(std::shared_ptr<const Executor> exec,
                      const batch_dim<2>& size, ValuesArray&& values)
-        : batch_size_(size),
-          values_{exec, std::forward<ValuesArray>(values)},
-          exec_(std::move(exec))
+        : EnableAbstractPolymorphicObject<BatchMultiVector>(exec),
+          batch_size_(size),
+          values_{exec, std::forward<ValuesArray>(values)}
     {
         // Ensure that the values array has the correct size
         auto num_elems = compute_num_elems(size);
@@ -442,9 +489,9 @@ class BatchMultiVector
      */
     BatchMultiVector(std::shared_ptr<const Executor> exec,
                      const std::vector<Dense<ValueType>*>& matrices)
-        : batch_size_{compute_batch_size(matrices)},
-          values(exec, compute_num_elems(batch_size_)),
-          exec(std::move(exec))
+        : EnableAbstractPolymorphicObject<BatchMultiVector>(exec),
+          batch_size_{compute_batch_size(matrices)},
+          values(exec, compute_num_elems(batch_size_))
     {
         for (size_type i = 0; i < this->get_num_batch_entries(); ++i) {
             auto local_exec = matrices[i]->get_executor();
@@ -518,7 +565,7 @@ class BatchMultiVector
      * @note  Other implementations of batch_multi_vector should override this
      * function instead of scale(const BatchMultiVector *alpha).
      */
-    virtual void scale_impl(const BatchMultiVector* alpha);
+    virtual void scale_impl(const BatchMultiVector<ValueType>* alpha);
 
     /**
      * @copydoc add_scaled(const BatchMultiVector *, const BatchMultiVector *)
@@ -527,8 +574,8 @@ class BatchMultiVector
      * function instead of add_scale(const BatchMultiVector *alpha, const
      * BatchMultiVector *b).
      */
-    virtual void add_scaled_impl(const BatchMultiVector* alpha,
-                                 const BatchMultiVector* b);
+    virtual void add_scaled_impl(const BatchMultiVector<ValueType>* alpha,
+                                 const BatchMultiVector<ValueType>* b);
 
     /**
      * @copydoc compute_dot(const BatchMultiVector *, BatchMultiVector *) const
@@ -537,8 +584,8 @@ class BatchMultiVector
      * function instead of compute_dot(const BatchMultiVector *b,
      * BatchMultiVector *result).
      */
-    virtual void compute_dot_impl(const BatchMultiVector* b,
-                                  BatchMultiVector* result) const;
+    virtual void compute_dot_impl(const BatchMultiVector<ValueType>* b,
+                                  BatchMultiVector<ValueType>* result) const;
 
     /**
      * @copydoc compute_norm2(BatchMultiVector *) const
@@ -546,7 +593,7 @@ class BatchMultiVector
      * @note  Other implementations of batch_multi_vector should override this
      * function instead of compute_norm2(BatchMultiVector *result).
      */
-    virtual void compute_norm2_impl(BatchMultiVector* result) const;
+    virtual void compute_norm2_impl(BatchMultiVector<ValueType>* result) const;
 
     size_type linearize_index(size_type batch, size_type row,
                               size_type col) const noexcept
@@ -564,7 +611,6 @@ class BatchMultiVector
 private:
     batch_dim<2> batch_size_;
     array<value_type> values_;
-    std::shared_ptr<const Executor> exec;
 };
 
 
@@ -579,7 +625,6 @@ class BatchMultiVector
  * @tparam TArgs  argument types for Matrix::create method
  *                (not including the implied Executor as the first argument)
  *
- * @param stride  row stride for the temporary Dense matrix
  * @param vals  values used to initialize the batch vector
  * @param exec  Executor associated to the vector
  * @param create_args  additional arguments passed to Matrix::create, not
@@ -591,24 +636,21 @@ class BatchMultiVector
  */
 template <typename Matrix, typename... TArgs>
 std::unique_ptr<Matrix> batch_initialize(
-    std::vector<size_type> stride,
     std::initializer_list<std::initializer_list<typename Matrix::value_type>>
         vals,
     std::shared_ptr<const Executor> exec, TArgs&&... create_args)
 {
     using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
-    size_type num_batch_entries = vals.size();
-    std::vector<size_type> num_rows(num_batch_entries);
-    std::vector<dim<2>> sizes(num_batch_entries);
+    size_type common_num_rows = vals_begin->size();
+    size_type common_size = dim<2>(common_num_rows, 1);
+    dim<2> common_size;
     auto vals_begin = begin(vals);
     for (size_type b = 0; b < num_batch_entries; ++b) {
-        num_rows[b] = vals_begin->size();
-        sizes[b] = dim<2>(num_rows[b], 1);
+        GKO_ASSERT_EQ(common_num_rows, vals_begin->size());
         vals_begin++;
     }
-    auto b_size = batch_dim<2>(sizes);
-    auto b_stride = batch_stride(stride);
-    auto tmp = batch_multi_vector::create(exec->get_master(), b_size, b_stride);
+    auto b_size = batch_dim<2>(num_batch_entries, common_size);
+    auto tmp = batch_multi_vector::create(exec->get_master(), b_size);
     size_type batch = 0;
     for (const auto& b : vals) {
         size_type idx = 0;
@@ -623,38 +665,6 @@ std::unique_ptr<Matrix> batch_initialize(
     return mtx;
 }
 
-/**
- * Creates and initializes a batch of column-vectors.
- *
- * This function first creates a temporary Dense matrix, fills it with passed in
- * values, and then converts the vector to the requested type. The stride of
- * the intermediate Dense matrix is set to 1.
- *
- * @tparam Matrix  matrix type to initialize
- *                 (Dense has to implement the ConvertibleTo<Matrix> interface)
- * @tparam TArgs  argument types for Matrix::create method
- *                (not including the implied Executor as the first argument)
- *
- * @param vals  values used to initialize the vector
- * @param exec  Executor associated to the vector
- * @param create_args  additional arguments passed to Matrix::create, not
- *                     including the Executor, which is passed as the first
- *                     argument
- *
- * @ingroup BatchMultiVector
- * @ingroup mat_formats
- */
-template <typename Matrix, typename... TArgs>
-std::unique_ptr<Matrix> batch_initialize(
-    std::initializer_list<std::initializer_list<typename Matrix::value_type>>
-        vals,
-    std::shared_ptr<const Executor> exec, TArgs&&... create_args)
-{
-    return batch_initialize<Matrix>(std::vector<size_type>(vals.size(), 1),
-                                    vals, std::move(exec),
-                                    std::forward<TArgs>(create_args)...);
-}
-
 
 /**
  * Creates and initializes a batch of matrices.
@@ -667,7 +677,6 @@ std::unique_ptr<Matrix> batch_initialize(
  * @tparam TArgs  argument types for Matrix::create method
  *                (not including the implied Executor as the first argument)
  *
- * @param stride  row stride for the temporary Dense matrix
  * @param vals  values used to initialize the vector
  * @param exec  Executor associated to the vector
  * @param create_args  additional arguments passed to Matrix::create, not
@@ -679,7 +688,6 @@ std::unique_ptr<Matrix> batch_initialize(
  */
 template <typename Matrix, typename... TArgs>
 std::unique_ptr<Matrix> batch_initialize(
-    std::vector<size_type> stride,
     std::initializer_list<std::initializer_list<
         std::initializer_list<typename Matrix::value_type>>>
         vals,
@@ -687,19 +695,20 @@ std::unique_ptr<Matrix> batch_initialize(
 {
     using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
     size_type num_batch_entries = vals.size();
-    std::vector<size_type> num_rows(num_batch_entries);
-    std::vector<size_type> num_cols(num_batch_entries);
-    std::vector<dim<2>> sizes(num_batch_entries);
+
+    auto vals_begin = begin(vals);
+    size_type common_num_rows = vals_begin->size();
+    size_type common_num_cols = begin(vals_begin)->size();
+    auto common_size = dim<2>(common_num_rows, common_num_cols);
     size_type ind = 0;
     for (const auto& b : vals) {
-        num_rows[ind] = b.size();
-        num_cols[ind] = num_rows[ind] > 0 ? begin(b)->size() : 1;
-        sizes[ind] = dim<2>(num_rows[ind], num_cols[ind]);
-        ++ind;
-    }
-    auto b_size = batch_dim<2>(sizes);
-    auto b_stride = batch_stride(stride);
-    auto tmp = batch_multi_vector::create(exec->get_master(), b_size, b_stride);
+        auto num_rows = b.size();
+        auto num_cols = begin(b)->size();
+        auto b_size = dim<2>(num_rows, num_cols);
+        GKO_ASSERT_EQ(b_size, common_size);
+    }
+    auto b_size = batch_dim<2>(num_batch_entries, common_size);
+    auto tmp = batch_multi_vector::create(exec->get_master(), b_size);
     size_type batch = 0;
     for (const auto& b : vals) {
         size_type ridx = 0;
@@ -719,46 +728,6 @@ std::unique_ptr<Matrix> batch_initialize(
 }
 
 
-/**
- * Creates and initializes a batch of matrices.
- *
- * This function first creates a temporary Dense matrix, fills it with passed in
- * values, and then converts the vector to the requested type. The stride of
- * the intermediate Dense matrix is set to the number of columns of the
- * initializer list.
- *
- * @tparam Matrix  matrix type to initialize
- *                 (Dense has to implement the ConvertibleTo<Matrix> interface)
- * @tparam TArgs  argument types for Matrix::create method
- *                (not including the implied Executor as the first argument)
- *
- * @param vals  values used to initialize the vector
- * @param exec  Executor associated to the vector
- * @param create_args  additional arguments passed to Matrix::create, not
- *                     including the Executor, which is passed as the first
- *                     argument
- *
- * @ingroup BatchMultiVector
- * @ingroup mat_formats
- */
-template <typename Matrix, typename... TArgs>
-std::unique_ptr<Matrix> batch_initialize(
-    std::initializer_list<std::initializer_list<
-        std::initializer_list<typename Matrix::value_type>>>
-        vals,
-    std::shared_ptr<const Executor> exec, TArgs&&... create_args)
-{
-    auto strides = std::vector<size_type>(vals.size(), 0);
-    size_type ind = 0;
-    for (const auto& b : vals) {
-        strides[ind] = begin(b)->size();
-        ++ind;
-    }
-    return batch_initialize<Matrix>(strides, vals, std::move(exec),
-                                    std::forward<TArgs>(create_args)...);
-}
-
-
 /**
  * Creates and initializes a batch column-vector by making copies of the single
  * input column vector.
@@ -772,7 +741,6 @@ std::unique_ptr<Matrix> batch_initialize(
  * @tparam TArgs  argument types for Matrix::create method
  *                (not including the implied Executor as the first argument)
  *
- * @param stride  row strides for the temporary batch dense matrix
  * @param num_vectors  The number of times the input vector is copied into
  *                     the final output
  * @param vals  values used to initialize each vector in the temp. batch
@@ -786,20 +754,14 @@ std::unique_ptr<Matrix> batch_initialize(
  */
 template <typename Matrix, typename... TArgs>
 std::unique_ptr<Matrix> batch_initialize(
-    std::vector<size_type> stride, const size_type num_vectors,
+    const size_type num_vectors,
     std::initializer_list<typename Matrix::value_type> vals,
     std::shared_ptr<const Executor> exec, TArgs&&... create_args)
 {
     using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
-    std::vector<size_type> num_rows(num_vectors);
-    std::vector<dim<2>> sizes(num_vectors);
-    for (size_type b = 0; b < num_vectors; ++b) {
-        num_rows[b] = vals.size();
-        sizes[b] = dim<2>(vals.size(), 1);
-    }
-    auto b_size = batch_dim<2>(sizes);
-    auto b_stride = batch_stride(stride);
-    auto tmp = batch_multi_vector::create(exec->get_master(), b_size, b_stride);
+    size_type num_batch_entries = num_vectors;
+    auto b_size = batch_dim<2>(num_batch_entries, dim<2>(vals.size(), 1));
+    auto tmp = batch_multi_vector::create(exec->get_master(), b_size);
     for (size_type batch = 0; batch < num_vectors; batch++) {
         size_type idx = 0;
         for (const auto& elem : vals) {
@@ -813,41 +775,6 @@ std::unique_ptr<Matrix> batch_initialize(
 }
 
 
-/**
- * Creates and initializes a column-vector from copies of a given vector.
- *
- * This function first creates a temporary Dense matrix, fills it with passed
- * in values, and then converts the vector to the requested type. The stride of
- * the intermediate Dense matrix is set to 1.
- *
- * @tparam Matrix  matrix type to initialize
- *                 (Dense has to implement the ConvertibleTo<Matrix>
- *                  interface)
- * @tparam TArgs  argument types for Matrix::create method
- *                (not including the implied Executor as the first argument)
- *
- * @param num_vectors  The number of times the input vector is copied into
- *                     the final output
- * @param vals  values used to initialize the vector
- * @param exec  Executor associated to the vector
- * @param create_args  additional arguments passed to Matrix::create, not
- *                     including the Executor, which is passed as the first
- *                     argument
- *
- * @ingroup BatchMultiVector
- * @ingroup mat_formats
- */
-template <typename Matrix, typename... TArgs>
-std::unique_ptr<Matrix> batch_initialize(
-    const size_type num_vectors,
-    std::initializer_list<typename Matrix::value_type> vals,
-    std::shared_ptr<const Executor> exec, TArgs&&... create_args)
-{
-    return batch_initialize<Matrix>(std::vector<size_type>(num_vectors, 1),
-                                    num_vectors, vals, std::move(exec),
-                                    std::forward<TArgs>(create_args)...);
-}
-
 /**
  * Creates and initializes a matrix from copies of a given matrix.
  *
@@ -873,22 +800,15 @@ std::unique_ptr<Matrix> batch_initialize(
  */
 template <typename Matrix, typename... TArgs>
 std::unique_ptr<Matrix> batch_initialize(
-    std::vector<size_type> stride, const size_type num_matrices,
+    const size_type num_matrices,
     std::initializer_list<std::initializer_list<typename Matrix::value_type>>
         vals,
     std::shared_ptr<const Executor> exec, TArgs&&... create_args)
 {
     using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
-    std::vector<dim<2>> sizes(num_matrices);
-    const size_type num_rows = vals.size();
-    for (size_type b = 0; b < num_matrices; ++b) {
-        const size_type num_cols = begin(vals)->size();
-        sizes[b] = dim<2>(num_rows, num_cols);
-        for (auto blockit = begin(vals); blockit != end(vals); ++blockit) {
-            GKO_ASSERT(blockit->size() == num_cols);
-        }
-    }
-    auto tmp = batch_multi_vector::create(exec->get_master(), sizes, stride);
+    auto common_size = dim<2>(vals.size(), begin(vals)->size());
+    batch_dim<2> b_size(num_matrices, common_size);
+    auto tmp = batch_multi_vector::create(exec->get_master(), b_size);
     for (size_type batch = 0; batch < num_matrices; batch++) {
         size_type ridx = 0;
         for (const auto& row : vals) {
@@ -905,42 +825,6 @@ std::unique_ptr<Matrix> batch_initialize(
     return mtx;
 }
 
-/**
- * Creates and initializes a matrix from copies of a given matrix.
- *
- * This function first creates a temporary Dense matrix, fills it with passed in
- * values, and then converts the vector to the requested type. The stride of
- * the intermediate Dense matrix is set to 1.
- *
- * @tparam Matrix  matrix type to initialize
- *                 (Dense has to implement the ConvertibleTo<Matrix> interface)
- * @tparam TArgs  argument types for Matrix::create method
- *                (not including the implied Executor as the first argument)
- *
- * @param num_vectors  The number of times the input vector is copied into
- *                     the final output
- * @param vals  values used to initialize the vector
- * @param exec  Executor associated to the vector
- * @param create_args  additional arguments passed to Matrix::create, not
- *                     including the Executor, which is passed as the first
- *                     argument
- *
- * @ingroup LinOp
- * @ingroup mat_formats
- */
-template <typename Matrix, typename... TArgs>
-std::unique_ptr<Matrix> batch_initialize(
-    const size_type num_matrices,
-    std::initializer_list<std::initializer_list<typename Matrix::value_type>>
-        vals,
-    std::shared_ptr<const Executor> exec, TArgs&&... create_args)
-{
-    auto strides = std::vector<size_type>(num_matrices, begin(vals)->size());
-    return batch_initialize<Matrix>(strides, num_matrices, vals,
-                                    std::move(exec),
-                                    std::forward<TArgs>(create_args)...);
-}
-
 
 }  // namespace gko
 

From 7bfaf49339f334d034738976822cd0ba278c2939 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sun, 9 Jul 2023 00:42:33 +0200
Subject: [PATCH 110/583] Add read and write impls

---
 core/base/batch_multi_vector.cpp | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/core/base/batch_multi_vector.cpp b/core/base/batch_multi_vector.cpp
index cc83638ee92..3a3f0aff757 100644
--- a/core/base/batch_multi_vector.cpp
+++ b/core/base/batch_multi_vector.cpp
@@ -148,14 +148,15 @@ void BatchMultiVector<ValueType>::move_to(
 template <typename MatrixType, typename MatrixData>
 inline void read_impl(MatrixType* mtx, const std::vector<MatrixData>& data)
 {
-    auto batch_sizes = std::vector<dim<2>>(data.size());
+    auto common_size = data[0].size;
+    auto batch_size = batch_dim<2>(data.size(), common_size);
     size_type ind = 0;
     for (const auto& b : data) {
-        batch_sizes[ind] = b.size;
-        ++ind;
+        b_size = b.size;
+        GKO_ASSERT_EQ(common_size, b_size);
     }
-    auto tmp = MatrixType::create(mtx->get_executor()->get_master(),
-                                  batch_dim<2>(batch_sizes));
+    auto tmp =
+        MatrixType::create(mtx->get_executor()->get_master(), batch_size);
     for (size_type b = 0; b < data.size(); ++b) {
         size_type ind = 0;
         for (size_type row = 0; row < data[b].size[0]; ++row) {
@@ -204,7 +205,7 @@ inline void write_impl(const MatrixType* mtx, std::vector<MatrixData>& data)
 
     data = std::vector<MatrixData>(mtx->get_num_batch_entries());
     for (size_type b = 0; b < mtx->get_num_batch_entries(); ++b) {
-        data[b] = {mtx->get_size().at(b), {}};
+        data[b] = {mtx->get_common_size(), {}};
         for (size_type row = 0; row < data[b].size[0]; ++row) {
             for (size_type col = 0; col < data[b].size[1]; ++col) {
                 if (tmp->at(b, row, col) !=

From 63743596a0bcd622fdc1d0f7393d3d3df6767054 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sun, 9 Jul 2023 23:14:27 +0200
Subject: [PATCH 111/583] Fix kernels and batch_struct.

---
 core/base/batch_struct.hpp                    |  27 +-
 cuda/base/batch_multi_vector_kernels.cu       |  20 +-
 cuda/base/batch_struct.hpp                    |  24 +-
 dpcpp/CMakeLists.txt                          |   1 +
 dpcpp/base/batch_multi_vector_kernels.dp.cpp  | 232 +++++++++++++++
 dpcpp/base/batch_multi_vector_kernels.hpp.inc | 168 +++++++++++
 hip/base/batch_multi_vector_kernels.hip.cpp   |  20 +-
 hip/base/batch_struct.hip.hpp                 |  22 +-
 .../ginkgo/core/base/batch_lin_op_helpers.hpp | 126 +++++++++
 .../ginkgo/core/base/batch_multi_vector.hpp   |   4 +-
 include/ginkgo/ginkgo.hpp                     |   2 +
 omp/base/batch_multi_vector_kernels.cpp       |   8 +-
 reference/base/batch_multi_vector_kernels.cpp |   4 +-
 .../base/batch_multi_vector_kernels.hpp.inc   | 263 ------------------
 reference/base/batch_struct.hpp               |  22 +-
 15 files changed, 605 insertions(+), 338 deletions(-)
 create mode 100644 dpcpp/base/batch_multi_vector_kernels.dp.cpp
 create mode 100644 dpcpp/base/batch_multi_vector_kernels.hpp.inc
 create mode 100644 include/ginkgo/core/base/batch_lin_op_helpers.hpp

diff --git a/core/base/batch_struct.hpp b/core/base/batch_struct.hpp
index 68fcdd9c8a0..05ac4f0d105 100644
--- a/core/base/batch_struct.hpp
+++ b/core/base/batch_struct.hpp
@@ -65,15 +65,16 @@ struct UniformBatch {
     using value_type = ValueType;
     using entry_type = BatchEntry<ValueType>;
 
-    ValueType* values;    ///< Concatenated values of all matrices in the batch
-    size_type num_batch;  ///< Number of matrices in the batch
-    size_type stride;     ///< Common stride of each dense matrix
-    int num_rows;         ///< Common number of rows in each matrix
-    int num_rhs;          ///< Common number of columns of each matrix
-    int num_nnz;          ///< Common number of non-zeros of each matrix, ie.,
-                          ///< the number or rows times the number of columns
-
-    size_type get_entry_storage() const { return num_nnz * sizeof(value_type); }
+    ValueType* values;
+    size_type num_batch_entries;
+    size_type stride;
+    int num_rows;
+    int num_rhs;
+
+    size_type get_entry_storage() const
+    {
+        return num_rows * stride * sizeof(value_type);
+    }
 };
 
 
@@ -95,14 +96,15 @@ template <typename ValueType>
 GKO_ATTRIBUTES GKO_INLINE gko::batch_multi_vector::UniformBatch<const ValueType>
 to_const(const gko::batch_multi_vector::UniformBatch<ValueType>& ub)
 {
-    return {ub.values, ub.num_batch, ub.stride, ub.num_rows, ub.num_rhs};
+    return {ub.values, ub.num_batch_entries, ub.stride, ub.num_rows,
+            ub.num_rhs};
 }
 
 
 /**
  * Extract one object (matrix, vector etc.) from a batch of objects
  *
- * This overload is for batch dense matrices.
+ * This overload is for batch multi-vectors.
  * These overloads are intended to be called from within a kernel.
  *
  * @param batch  The batch of objects to extract from
@@ -136,8 +138,7 @@ GKO_ATTRIBUTES GKO_INLINE ValueType* batch_entry_ptr(
 
 
 }  // namespace batch
-
-
 }  // namespace gko
 
+
 #endif  // GKO_CORE_BASE_BATCH_STRUCT_HPP_
diff --git a/cuda/base/batch_multi_vector_kernels.cu b/cuda/base/batch_multi_vector_kernels.cu
index 039ab94b767..df5aa9149a5 100644
--- a/cuda/base/batch_multi_vector_kernels.cu
+++ b/cuda/base/batch_multi_vector_kernels.cu
@@ -67,7 +67,7 @@ constexpr int sm_multiplier = 4;
 
 
 template <typename ValueType>
-void scale(std::shared_ptr<const CudaExecutor> exec,
+void scale(std::shared_ptr<const DefaultExecutor> exec,
            const BatchMultiVector<ValueType>* const alpha,
            BatchMultiVector<ValueType>* const x)
 {
@@ -82,16 +82,16 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 
 template <typename ValueType>
-void add_scaled(std::shared_ptr<const CudaExecutor> exec,
+void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
                 const BatchMultiVector<ValueType>* const alpha,
                 const BatchMultiVector<ValueType>* const x,
                 BatchMultiVector<ValueType>* const y)
 {
     const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
-    const size_type nrhs = x->get_size().at(0)[1];
+    const size_type nrhs = x->get_common_size()[1];
     if (nrhs == 1) {
         const auto num_batch = x->get_num_batch_entries();
-        const auto num_rows = x->get_size().at(0)[0];
+        const auto num_rows = x->get_common_size()[0];
         single_add_scaled<<<num_blocks, default_block_size>>>(
             num_batch, num_rows, as_cuda_type(alpha->get_const_values()),
             as_cuda_type(x->get_const_values()), as_cuda_type(y->get_values()));
@@ -108,15 +108,15 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 
 template <typename ValueType>
-void compute_dot(std::shared_ptr<const CudaExecutor> exec,
+void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
                  const BatchMultiVector<ValueType>* x,
                  const BatchMultiVector<ValueType>* y,
                  BatchMultiVector<ValueType>* result)
 {
     const auto num_blocks = x->get_num_batch_entries();
-    const auto num_rhs = x->get_size().at()[1];
+    const auto num_rhs = x->get_common_size()[1];
     if (num_rhs == 1) {
-        const auto num_rows = x->get_size().at()[0];
+        const auto num_rows = x->get_common_size()[0];
         single_compute_dot_product<<<num_blocks, default_block_size>>>(
             num_blocks, num_rows, as_cuda_type(x->get_const_values()),
             as_cuda_type(y->get_const_values()),
@@ -135,14 +135,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 
 template <typename ValueType>
-void compute_norm2(std::shared_ptr<const CudaExecutor> exec,
+void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
                    const BatchMultiVector<ValueType>* const x,
                    BatchMultiVector<remove_complex<ValueType>>* const result)
 {
     const auto num_blocks = x->get_num_batch_entries();
-    const auto num_rhs = x->get_size().at()[1];
+    const auto num_rhs = x->get_common_size()[1];
     if (num_rhs == 1) {
-        const auto num_rows = x->get_size().at()[0];
+        const auto num_rows = x->get_common_size()[0];
         single_compute_norm2<<<num_blocks, default_block_size>>>(
             num_blocks, num_rows, as_cuda_type(x->get_const_values()),
             as_cuda_type(result->get_values()));
diff --git a/cuda/base/batch_struct.hpp b/cuda/base/batch_struct.hpp
index 0bd9bd6dc40..5db50064e2f 100644
--- a/cuda/base/batch_struct.hpp
+++ b/cuda/base/batch_struct.hpp
@@ -70,10 +70,10 @@ get_batch_struct(const BatchMultiVector<ValueType>* const op)
     return {
         as_cuda_type(op->get_const_values()),
         op->get_num_batch_entries(),
-        op->get_stride().at(0),
-        static_cast<int>(op->get_size().at(0)[0]),
-        static_cast<int>(op->get_size().at(0)[1]),
-        static_cast<int>(op->get_size().at(0)[0] * op->get_size().at(0)[1])};
+        op->get_common_size()[1],
+        static_cast<int>(op->get_common_size()[0]),
+        static_cast<int>(op->get_common_size()[1]),
+        static_cast<int>(op->get_common_size()[0] * op->get_common_size()[1])};
 }
 
 /**
@@ -86,10 +86,10 @@ get_batch_struct(BatchMultiVector<ValueType>* const op)
     return {
         as_cuda_type(op->get_values()),
         op->get_num_batch_entries(),
-        op->get_stride().at(0),
-        static_cast<int>(op->get_size().at(0)[0]),
-        static_cast<int>(op->get_size().at(0)[1]),
-        static_cast<int>(op->get_size().at(0)[0] * op->get_size().at(0)[1])};
+        op->get_common_size()[1],
+        static_cast<int>(op->get_common_size()[0]),
+        static_cast<int>(op->get_common_size()[1]),
+        static_cast<int>(op->get_common_size()[0] * op->get_common_size()[1])};
 }
 
 
@@ -103,9 +103,9 @@ maybe_null_batch_struct(const BatchMultiVector<ValueType>* const op)
 {
     if (op) {
         return {as_cuda_type(op->get_const_values()),
-                op->get_num_batch_entries(), op->get_stride().at(0),
-                static_cast<int>(op->get_size().at(0)[0]),
-                static_cast<int>(op->get_size().at(0)[1])};
+                op->get_num_batch_entries(), op->get_common_size()[1],
+                static_cast<int>(op->get_common_size()[0]),
+                static_cast<int>(op->get_common_size()[1])};
     } else {
         return {nullptr, 0, 0, 0, 0};
     }
@@ -115,4 +115,6 @@ maybe_null_batch_struct(const BatchMultiVector<ValueType>* const op)
 }  // namespace cuda
 }  // namespace kernels
 }  // namespace gko
+
+
 #endif  // GKO_CUDA_MATRIX_BATCH_STRUCT_HPP_
diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt
index b33b63d4af9..b70175c6b12 100644
--- a/dpcpp/CMakeLists.txt
+++ b/dpcpp/CMakeLists.txt
@@ -8,6 +8,7 @@ add_instantiation_files(${PROJECT_SOURCE_DIR}/common/unified matrix/dense_kernel
 add_library(ginkgo_dpcpp $<TARGET_OBJECTS:ginkgo_dpcpp_device> "")
 target_sources(ginkgo_dpcpp
     PRIVATE
+    base/batch_multi_vector_kernels.dp.cpp
     base/device_matrix_data_kernels.dp.cpp
     base/executor.dp.cpp
     base/helper.dp.cpp
diff --git a/dpcpp/base/batch_multi_vector_kernels.dp.cpp b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
new file mode 100644
index 00000000000..6101ed3da4d
--- /dev/null
+++ b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
@@ -0,0 +1,232 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/base/batch_multi_vector_kernels.hpp"
+
+
+#include <algorithm>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/range_accessors.hpp>
+
+
+#include "core/components/prefix_sum_kernels.hpp"
+#include "dpcpp/matrix/batch_struct.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace dpcpp {
+/**
+ * @brief The BatchMultiVector matrix format namespace.
+ * @ref BatchMultiVector
+ * @ingroup batch_multi_vector
+ */
+namespace batch_multi_vector {
+
+
+#include "dpcpp/base/batch_multi_vector_kernels.hpp.inc"
+
+
+template <typename ValueType>
+void scale(std::shared_ptr<const DefaultExecutor> exec,
+           const BatchMultiVector<ValueType>* const alpha,
+           BatchMultiVector<ValueType>* const x)
+{
+    const auto alpha_ub = get_batch_struct(alpha);
+    const auto x_ub = get_batch_struct(x);
+
+    const auto num_batches = x_ub.num_batch_entries;
+    auto device = exec->get_queue()->get_device();
+    auto group_size =
+        device.get_info<sycl::info::device::max_work_group_size>();
+
+    const dim3 block(group_size);
+    const dim3 grid(num_batches);
+
+    // Launch a kernel that has nbatches blocks, each block has max group size
+    (exec->get_queue())->submit([&](sycl::handler& cgh) {
+        cgh.parallel_for(
+            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+                auto group = item_ct1.get_group();
+                auto group_id = group.get_group_linear_id();
+                const auto alpha_b = batch::batch_entry(alpha_ub, group_id);
+                const auto x_b = batch::batch_entry(x_ub, group_id);
+                single_scale_kernel(alpha_b, x_b, item_ct1);
+            });
+    });
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_MULTI_VECTOR_SCALE_KERNEL);
+
+
+template <typename ValueType>
+void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
+                const BatchMultiVector<ValueType>* const alpha,
+                const BatchMultiVector<ValueType>* const x,
+                BatchMultiVector<ValueType>* const y)
+{
+    const size_type num_rows = x->get_common_size()[0];
+    const size_type num_cols = x->get_common_size()[1];
+
+    const auto num_batches = x->get_num_batch_entries();
+    auto device = exec->get_queue()->get_device();
+    auto group_size =
+        device.get_info<sycl::info::device::max_work_group_size>();
+
+    const dim3 block(group_size);
+    const dim3 grid(num_batches);
+    const auto alpha_ub = get_batch_struct(alpha);
+    const auto x_ub = get_batch_struct(x);
+    const auto y_ub = get_batch_struct(y);
+    (exec->get_queue())->submit([&](sycl::handler& cgh) {
+        cgh.parallel_for(
+            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+                auto group = item_ct1.get_group();
+                auto group_id = group.get_group_linear_id();
+                const auto alpha_b = batch::batch_entry(alpha_ub, group_id);
+                const auto x_b = batch::batch_entry(x_ub, group_id);
+                const auto y_b = batch::batch_entry(y_ub, group_id);
+                add_scaled_kernel(alpha_b, x_b, y_b, item_ct1);
+            });
+    });
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_MULTI_VECTOR_ADD_SCALED_KERNEL);
+
+
+template <typename ValueType>
+void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
+                 const BatchMultiVector<ValueType>* const x,
+                 const BatchMultiVector<ValueType>* const y,
+                 BatchMultiVector<ValueType>* const result)
+{
+    const auto x_ub = get_batch_struct(x);
+    const auto y_ub = get_batch_struct(y);
+    const auto res_ub = get_batch_struct(result);
+
+    const auto num_batches = x_ub.num_batch_entries;
+    auto device = exec->get_queue()->get_device();
+    auto group_size =
+        device.get_info<sycl::info::device::max_work_group_size>();
+
+    const dim3 block(group_size);
+    const dim3 grid(num_batches);
+
+    (exec->get_queue())->submit([&](sycl::handler& cgh) {
+        cgh.parallel_for(
+            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+                auto group = item_ct1.get_group();
+                auto group_id = group.get_group_linear_id();
+                const auto x_b = batch::batch_entry(x_ub, group_id);
+                const auto y_b = batch::batch_entry(y_ub, group_id);
+                const auto res_b = batch::batch_entry(res_ub, group_id);
+                compute_dot_product_kernel(x_b, y_b, res_b, item_ct1);
+            });
+    });
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_DOT_KERNEL);
+
+
+template <typename ValueType>
+void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
+                   const BatchMultiVector<ValueType>* const x,
+                   BatchMultiVector<remove_complex<ValueType>>* const result)
+{
+    const auto x_ub = get_batch_struct(x);
+    const auto res_ub = get_batch_struct(result);
+
+    const auto num_batches = x_ub.num_batch_entries;
+    auto device = exec->get_queue()->get_device();
+    auto group_size =
+        device.get_info<sycl::info::device::max_work_group_size>();
+
+    const dim3 block(group_size);
+    const dim3 grid(num_batches);
+
+    (exec->get_queue())->submit([&](sycl::handler& cgh) {
+        cgh.parallel_for(
+            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+                auto group = item_ct1.get_group();
+                auto group_id = group.get_group_linear_id();
+                const auto x_b = batch::batch_entry(x_ub, group_id);
+                const auto res_b = batch::batch_entry(res_ub, group_id);
+                compute_norm2_kernel(x_b, res_b, item_ct1);
+            });
+    });
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_NORM2_KERNEL);
+
+
+template <typename ValueType>
+void copy(std::shared_ptr<const DefaultExecutor> exec,
+          const BatchMultiVector<ValueType>* x,
+          BatchMultiVector<ValueType>* result)
+{
+    const auto x_ub = get_batch_struct(x);
+    const auto result_ub = get_batch_struct(result);
+
+    const auto num_batches = x_ub.num_batch_entries;
+    auto device = exec->get_queue()->get_device();
+    auto group_size =
+        device.get_info<sycl::info::device::max_work_group_size>();
+
+    const dim3 block(group_size);
+    const dim3 grid(num_batches);
+
+    (exec->get_queue())->submit([&](sycl::handler& cgh) {
+        cgh.parallel_for(
+            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+                auto group = item_ct1.get_group();
+                auto group_id = group.get_group_linear_id();
+                const auto x_b = batch::batch_entry(x_ub, group_id);
+                const auto result_b = batch::batch_entry(result_ub, group_id);
+                copy_kernel(x_b, result_b, item_ct1);
+            });
+    });
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_COPY_KERNEL);
+
+
+}  // namespace batch_multi_vector
+}  // namespace dpcpp
+}  // namespace kernels
+}  // namespace gko
diff --git a/dpcpp/base/batch_multi_vector_kernels.hpp.inc b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
new file mode 100644
index 00000000000..7ea25fb4c22
--- /dev/null
+++ b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
@@ -0,0 +1,168 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+/**
+ * Copies the values of vector into another.
+ *
+ * @param num_rows  Length of vector.
+ * @param in  Vector to copy from.
+ * @param out  Vector to copy into.
+ */
+template <typename ValueType>
+__dpct_inline__ void copy_kernel(const int num_rows,
+                                 const ValueType* const __restrict__ in,
+                                 ValueType* const __restrict__ out,
+                                 sycl::nd_item<3> item_ct1)
+{
+    for (int iz = item_ct1.get_local_linear_id(); iz < num_rows;
+         iz += item_ct1.get_local_range().size()) {
+        out[iz] = in[iz];
+    }
+}
+
+/**
+ * Adds a scaled vector to another.
+ *
+ * @param num_rows  Common length of both vectors.
+ * @param alpha  Scaling factor.
+ * @param[in] x  Vector to scale and add.
+ * @param[in,out] y  Vector to add to.
+ */
+template <typename ValueType>
+__dpct_inline__ void add_scaled_kernel(const int num_rows,
+                                       const ValueType alpha,
+                                       const ValueType* const __restrict__ x,
+                                       ValueType* const __restrict__ y,
+                                       sycl::nd_item<3> item_ct1)
+{
+    for (int li = item_ct1.get_local_linear_id(); li < num_rows;
+         li += item_ct1.get_local_range().size()) {
+        y[li] += alpha * x[li];
+    }
+}
+
+/**
+ * Computes the 2-norm of a vector in global or shared memory.
+ *
+ * @param x  A row-major vector (only 1 column).
+ * @param result  Norm value.
+ */
+template <typename ValueType>
+__dpct_inline__ void compute_norm2_sg_kernel(
+    const int num_rows, const ValueType* const __restrict__ x,
+    gko::remove_complex<ValueType>& result, sycl::nd_item<3> item_ct1)
+{
+    const auto sg = item_ct1.get_sub_group();
+    const auto sg_size = sg.get_local_range().size();
+    const auto sg_tid = sg.get_local_id();
+
+    using real_type = typename gko::remove_complex<ValueType>;
+    real_type val = zero<real_type>();
+
+    for (int r = sg_tid; r < num_rows; r += sg_size) {
+        val += squared_norm(x[r]);
+    }
+
+    val = sycl::reduce_over_group(sg, val, sycl::plus<>());
+
+    if (sg_tid == 0) {
+        result = sqrt(val);
+    }
+}
+
+template <typename ValueType>
+__dpct_inline__ void compute_norm2_kernel(
+    const int num_rows, const ValueType* const __restrict__ x,
+    gko::remove_complex<ValueType>& result, sycl::nd_item<3> item_ct1)
+{
+    const auto group = item_ct1.get_group();
+    const auto group_size = item_ct1.get_local_range().size();
+    const auto tid = item_ct1.get_local_linear_id();
+
+    using real_type = typename gko::remove_complex<ValueType>;
+    real_type val = zero<real_type>();
+
+    for (int r = tid; r < num_rows; r += group_size) {
+        val += squared_norm(x[r]);
+    }
+
+    val = sycl::reduce_over_group(group, val, sycl::plus<>());
+
+    result = sqrt(val);
+}
+
+
+/**
+ * Computes the dot product of some column vectors in global or shared memory.
+ *
+ * @param result  Holds dot product value for vector in x and y.
+ */
+template <typename ValueType>
+__dpct_inline__ void compute_dot_product_sg_kernel(
+    const int num_rows, const ValueType* const __restrict__ x,
+    const ValueType* const __restrict__ y, ValueType& result,
+    sycl::nd_item<3> item_ct1)
+{
+    const auto sg = item_ct1.get_sub_group();
+    const auto sg_size = sg.get_local_range().size();
+    const auto sg_tid = sg.get_local_id();
+
+    ValueType val = zero<ValueType>();
+
+    for (int r = sg_tid; r < num_rows; r += sg_size) {
+        val += conj(x[r]) * y[r];
+    }
+
+    val = sycl::reduce_over_group(sg, val, sycl::plus<>());
+
+    if (sg_tid == 0) {
+        result = val;
+    }
+}
+
+template <typename ValueType>
+__dpct_inline__ void compute_dot_product_kernel(
+    const int num_rows, const ValueType* const __restrict__ x,
+    const ValueType* const __restrict__ y, ValueType& result,
+    sycl::nd_item<3> item_ct1)
+{
+    const auto group = item_ct1.get_group();
+    const auto group_size = item_ct1.get_local_range().size();
+    const auto tid = item_ct1.get_local_linear_id();
+
+    ValueType val = zero<ValueType>();
+
+    for (int r = tid; r < num_rows; r += group_size) {
+        val += conj(x[r]) * y[r];
+    }
+    result = sycl::reduce_over_group(group, val, sycl::plus<>());
+}
diff --git a/hip/base/batch_multi_vector_kernels.hip.cpp b/hip/base/batch_multi_vector_kernels.hip.cpp
index 01a443558e9..2a6c3085772 100644
--- a/hip/base/batch_multi_vector_kernels.hip.cpp
+++ b/hip/base/batch_multi_vector_kernels.hip.cpp
@@ -70,7 +70,7 @@ constexpr int sm_multiplier = 4;
 
 
 template <typename ValueType>
-void scale(std::shared_ptr<const HipExecutor> exec,
+void scale(std::shared_ptr<const DefaultExecutor> exec,
            const BatchMultiVector<ValueType>* const alpha,
            BatchMultiVector<ValueType>* const x)
 {
@@ -86,16 +86,16 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 
 template <typename ValueType>
-void add_scaled(std::shared_ptr<const HipExecutor> exec,
+void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
                 const BatchMultiVector<ValueType>* const alpha,
                 const BatchMultiVector<ValueType>* const x,
                 BatchMultiVector<ValueType>* const y)
 {
     const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
-    const size_type nrhs = x->get_size().at(0)[1];
+    const size_type nrhs = x->get_common_size()[1];
     if (nrhs == 1) {
         const auto num_batch = x->get_num_batch_entries();
-        const auto num_rows = x->get_size().at(0)[0];
+        const auto num_rows = x->get_common_size()[0];
         hipLaunchKernelGGL(
             single_add_scaled, dim3(num_blocks), dim3(default_block_size), 0, 0,
             num_batch, num_rows, as_hip_type(alpha->get_const_values()),
@@ -115,15 +115,15 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 
 template <typename ValueType>
-void compute_dot(std::shared_ptr<const HipExecutor> exec,
+void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
                  const BatchMultiVector<ValueType>* x,
                  const BatchMultiVector<ValueType>* y,
                  BatchMultiVector<ValueType>* result)
 {
     const auto num_blocks = x->get_num_batch_entries();
-    const auto num_rhs = x->get_size().at()[1];
+    const auto num_rhs = x->get_common_size()[1];
     if (num_rhs == 1) {
-        const auto num_rows = x->get_size().at()[0];
+        const auto num_rows = x->get_common_size()[0];
         hipLaunchKernelGGL(single_compute_dot_product, dim3(num_blocks),
                            dim3(default_block_size), 0, 0, num_blocks, num_rows,
                            as_hip_type(x->get_const_values()),
@@ -144,14 +144,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 
 template <typename ValueType>
-void compute_norm2(std::shared_ptr<const HipExecutor> exec,
+void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
                    const BatchMultiVector<ValueType>* const x,
                    BatchMultiVector<remove_complex<ValueType>>* const result)
 {
     const auto num_blocks = x->get_num_batch_entries();
-    const auto num_rhs = x->get_size().at()[1];
+    const auto num_rhs = x->get_common_size()[1];
     if (num_rhs == 1) {
-        const auto num_rows = x->get_size().at()[0];
+        const auto num_rows = x->get_common_size()[0];
         hipLaunchKernelGGL(single_compute_norm2, dim3(num_blocks),
                            dim3(default_block_size), 0, 0, num_blocks, num_rows,
                            as_hip_type(x->get_const_values()),
diff --git a/hip/base/batch_struct.hip.hpp b/hip/base/batch_struct.hip.hpp
index 214039f060b..c921e55d857 100644
--- a/hip/base/batch_struct.hip.hpp
+++ b/hip/base/batch_struct.hip.hpp
@@ -70,10 +70,10 @@ get_batch_struct(const BatchMultiVector<ValueType>* const op)
     return {
         as_hip_type(op->get_const_values()),
         op->get_num_batch_entries(),
-        op->get_stride().at(0),
-        static_cast<int>(op->get_size().at(0)[0]),
-        static_cast<int>(op->get_size().at(0)[1]),
-        static_cast<int>(op->get_size().at(0)[0] * op->get_size().at(0)[1])};
+        op->get_common_size()[1],
+        static_cast<int>(op->get_common_size()[0]),
+        static_cast<int>(op->get_common_size()[1]),
+        static_cast<int>(op->get_common_size()[0] * op->get_common_size()[1])};
 }
 
 /**
@@ -86,10 +86,10 @@ get_batch_struct(BatchMultiVector<ValueType>* const op)
     return {
         as_hip_type(op->get_values()),
         op->get_num_batch_entries(),
-        op->get_stride().at(0),
-        static_cast<int>(op->get_size().at(0)[0]),
-        static_cast<int>(op->get_size().at(0)[1]),
-        static_cast<int>(op->get_size().at(0)[0] * op->get_size().at(0)[1])};
+        op->get_common_size()[1],
+        static_cast<int>(op->get_common_size()[0]),
+        static_cast<int>(op->get_common_size()[1]),
+        static_cast<int>(op->get_common_size()[0] * op->get_common_size()[1])};
 }
 
 
@@ -103,9 +103,9 @@ maybe_null_batch_struct(const BatchMultiVector<ValueType>* const op)
 {
     if (op) {
         return {as_hip_type(op->get_const_values()),
-                op->get_num_batch_entries(), op->get_stride().at(0),
-                static_cast<int>(op->get_size().at(0)[0]),
-                static_cast<int>(op->get_size().at(0)[1])};
+                op->get_num_batch_entries(), op->get_common_size()[1],
+                static_cast<int>(op->get_common_size()[0]),
+                static_cast<int>(op->get_common_size()[1])};
     } else {
         return {nullptr, 0, 0, 0, 0};
     }
diff --git a/include/ginkgo/core/base/batch_lin_op_helpers.hpp b/include/ginkgo/core/base/batch_lin_op_helpers.hpp
new file mode 100644
index 00000000000..ecb8bcc4556
--- /dev/null
+++ b/include/ginkgo/core/base/batch_lin_op_helpers.hpp
@@ -0,0 +1,126 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_PUBLIC_CORE_BASE_BATCH_LIN_OP_HELPERS_HPP_
+#define GKO_PUBLIC_CORE_BASE_BATCH_LIN_OP_HELPERS_HPP_
+
+
+#include <memory>
+#include <type_traits>
+#include <utility>
+
+
+#include <ginkgo/core/base/abstract_factory.hpp>
+#include <ginkgo/core/base/dim.hpp>
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/matrix_assembly_data.hpp>
+#include <ginkgo/core/base/matrix_data.hpp>
+#include <ginkgo/core/base/polymorphic_object.hpp>
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/base/utils.hpp>
+#include <ginkgo/core/log/logger.hpp>
+
+
+namespace gko {
+
+
+/**
+ * A BatchLinOp implementing this interface can read its data from a matrix_data
+ * structure.
+ *
+ * @ingroup BatchLinOp
+ */
+template <typename ValueType, typename IndexType>
+class BatchReadableFromMatrixData {
+public:
+    using value_type = ValueType;
+    using index_type = IndexType;
+
+    virtual ~BatchReadableFromMatrixData() = default;
+
+    /**
+     * Reads a batch matrix from a std::vector of matrix_data objects.
+     *
+     * @param data  the std::vector of matrix_data objects
+     */
+    virtual void read(
+        const std::vector<matrix_data<ValueType, IndexType>>& data) = 0;
+
+    /**
+     * Reads a matrix from a std::vector of matrix_assembly_data objects.
+     *
+     * @param data  the std::vector of matrix_assembly_data objects
+     */
+    void read(const std::vector<matrix_assembly_data<ValueType, IndexType>>&
+                  assembly_data)
+    {
+        auto mat_data = std::vector<matrix_data<ValueType, IndexType>>(
+            assembly_data.size());
+        size_type ind = 0;
+        for (const auto& i : assembly_data) {
+            mat_data[ind] = i.get_ordered_data();
+            ++ind;
+        }
+        this->read(mat_data);
+    }
+};
+
+
+/**
+ * A BatchLinOp implementing this interface can write its data to a std::vector
+ * of matrix_data objects.
+ *
+ * @ingroup BatchLinOp
+ */
+template <typename ValueType, typename IndexType>
+class BatchWritableToMatrixData {
+public:
+    using value_type = ValueType;
+    using index_type = IndexType;
+
+    virtual ~BatchWritableToMatrixData() = default;
+
+    /**
+     * Writes a matrix to a matrix_data structure.
+     *
+     * @param data  the matrix_data structure
+     */
+    virtual void write(
+        std::vector<matrix_data<ValueType, IndexType>>& data) const = 0;
+};
+
+
+}  // namespace gko
+
+
+#endif  // GKO_PUBLIC_CORE_BASE_BATCH_LIN_OP_HELPERS_HPP_
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index 9513272648d..c6614df0d66 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -39,6 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/batch_lin_op_helpers.hpp>
 #include <ginkgo/core/base/executor.hpp>
 #include <ginkgo/core/base/mtx_io.hpp>
 #include <ginkgo/core/base/range_accessors.hpp>
@@ -461,9 +462,6 @@ class BatchMultiVector
      * @param exec  Executor associated to the vector
      * @param size  sizes of the batch matrices in a batch_dim object
      * @param values  array of matrix values
-     * @param strides  stride of the rows (i.e. offset between the first
-     *                  elements of two consecutive rows, expressed as the
-     *                  number of matrix elements)
      *
      * @note If `values` is not an rvalue, not an array of ValueType, or is on
      *       the wrong executor, an internal copy will be created, and the
diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp
index d73bf669700..8a88bf003f8 100644
--- a/include/ginkgo/ginkgo.hpp
+++ b/include/ginkgo/ginkgo.hpp
@@ -39,6 +39,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/abstract_factory.hpp>
 #include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/batch_lin_op_helpers.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/combination.hpp>
 #include <ginkgo/core/base/composition.hpp>
 #include <ginkgo/core/base/dense_cache.hpp>
diff --git a/omp/base/batch_multi_vector_kernels.cpp b/omp/base/batch_multi_vector_kernels.cpp
index 96b6716f0ba..6dd8b38e6d8 100644
--- a/omp/base/batch_multi_vector_kernels.cpp
+++ b/omp/base/batch_multi_vector_kernels.cpp
@@ -60,7 +60,7 @@ namespace batch_multi_vector {
 
 
 template <typename ValueType>
-void scale(std::shared_ptr<const OmpExecutor> exec,
+void scale(std::shared_ptr<const DefaultExecutor> exec,
            const BatchMultiVector<ValueType>* const alpha,
            BatchMultiVector<ValueType>* const x)
 {
@@ -79,7 +79,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 
 template <typename ValueType>
-void add_scaled(std::shared_ptr<const OmpExecutor> exec,
+void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
                 const BatchMultiVector<ValueType>* const alpha,
                 const BatchMultiVector<ValueType>* const x,
                 BatchMultiVector<ValueType>* const y)
@@ -101,7 +101,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 
 template <typename ValueType>
-void compute_dot(std::shared_ptr<const OmpExecutor> exec,
+void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
                  const BatchMultiVector<ValueType>* const x,
                  const BatchMultiVector<ValueType>* const y,
                  BatchMultiVector<ValueType>* const result)
@@ -124,7 +124,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 
 template <typename ValueType>
-void compute_norm2(std::shared_ptr<const OmpExecutor> exec,
+void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
                    const BatchMultiVector<ValueType>* const x,
                    BatchMultiVector<remove_complex<ValueType>>* const result)
 {
diff --git a/reference/base/batch_multi_vector_kernels.cpp b/reference/base/batch_multi_vector_kernels.cpp
index 27f6539b9eb..31e10fbe22f 100644
--- a/reference/base/batch_multi_vector_kernels.cpp
+++ b/reference/base/batch_multi_vector_kernels.cpp
@@ -42,7 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include "core/base/batch_struct.hpp"
-#include "reference/matrix/batch_struct.hpp"
+#include "reference/base/batch_struct.hpp"
 
 
 namespace gko {
@@ -56,7 +56,7 @@ namespace reference {
 namespace batch_multi_vector {
 
 
-#include "reference/matrix/batch_multi_vector_kernels.hpp.inc"
+#include "reference/base/batch_multi_vector_kernels.hpp.inc"
 
 
 template <typename ValueType>
diff --git a/reference/base/batch_multi_vector_kernels.hpp.inc b/reference/base/batch_multi_vector_kernels.hpp.inc
index 2f9c88e53f1..3cda19cfc06 100644
--- a/reference/base/batch_multi_vector_kernels.hpp.inc
+++ b/reference/base/batch_multi_vector_kernels.hpp.inc
@@ -30,63 +30,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-template <typename ValueType>
-inline void matvec_kernel(
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& a,
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& b,
-    const gko::batch_multi_vector::BatchEntry<ValueType>& c)
-{
-    for (int row = 0; row < c.num_rows; ++row) {
-        for (int col = 0; col < c.num_rhs; ++col) {
-            c.values[row * c.stride + col] = gko::zero<ValueType>();
-        }
-    }
-
-    for (int row = 0; row < c.num_rows; ++row) {
-        for (int inner = 0; inner < a.num_rhs; ++inner) {
-            for (int col = 0; col < c.num_rhs; ++col) {
-                c.values[row * c.stride + col] +=
-                    a.values[row * a.stride + inner] *
-                    b.values[inner * b.stride + col];
-            }
-        }
-    }
-}
-
-
-template <typename ValueType>
-inline void advanced_matvec_kernel(
-    const ValueType alpha,
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& a,
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& b,
-    const ValueType beta,
-    const gko::batch_multi_vector::BatchEntry<ValueType>& c)
-{
-    if (beta != gko::zero<ValueType>()) {
-        for (int row = 0; row < c.num_rows; ++row) {
-            for (int col = 0; col < c.num_rhs; ++col) {
-                c.values[row * c.stride + col] *= beta;
-            }
-        }
-    } else {
-        for (int row = 0; row < c.num_rows; ++row) {
-            for (int col = 0; col < c.num_rhs; ++col) {
-                c.values[row * c.stride + col] *= gko::zero<ValueType>();
-            }
-        }
-    }
-
-    for (int row = 0; row < c.num_rows; ++row) {
-        for (int inner = 0; inner < a.num_rhs; ++inner) {
-            for (int col = 0; col < c.num_rhs; ++col) {
-                c.values[row * c.stride + col] +=
-                    alpha * a.values[row * a.stride + inner] *
-                    b.values[inner * b.stride + col];
-            }
-        }
-    }
-}
-
 
 template <typename ValueType>
 inline void scale(
@@ -133,33 +76,6 @@ inline void add_scaled(
 }
 
 
-template <typename ValueType>
-inline void add_scale(
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& alpha,
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& beta,
-    const gko::batch_multi_vector::BatchEntry<ValueType>& y)
-{
-    if (alpha.num_rhs == 1) {
-        for (int i = 0; i < x.num_rows; ++i) {
-            for (int j = 0; j < x.num_rhs; ++j) {
-                y.values[i * y.stride + j] =
-                    alpha.values[0] * x.values[i * x.stride + j] +
-                    beta.values[0] * y.values[i * y.stride + j];
-            }
-        }
-    } else {
-        for (int i = 0; i < x.num_rows; ++i) {
-            for (int j = 0; j < x.num_rhs; ++j) {
-                y.values[i * y.stride + j] =
-                    alpha.values[j] * x.values[i * x.stride + j] +
-                    beta.values[j] * y.values[i * y.stride + j];
-            }
-        }
-    }
-}
-
-
 template <typename ValueType>
 inline void compute_norm2(
     const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
@@ -180,39 +96,6 @@ inline void compute_norm2(
 }
 
 
-/**
- * Multiplies with a diagonal matrix represented as a dense vector.
- *
- * @param[in] diag_vec  The entries of the diagonal matrix.
- * @param[in,out] a  The dense matrix or vectors to scale.
- */
-template <typename ValueType>
-inline void batch_scale(
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& diag_vec,
-    const gko::batch_multi_vector::BatchEntry<ValueType>& a)
-{
-    for (int i_row = 0; i_row < a.num_rows; i_row++) {
-        const ValueType scale = diag_vec.values[i_row];
-        for (int j = 0; j < a.num_rhs; j++) {
-            a.values[i_row * a.stride + j] *= scale;
-        }
-    }
-}
-
-template <typename ValueType>
-inline void batch_scale(const int nrows, const int ncols,
-                        const size_type a_stride, const ValueType* const left,
-                        const ValueType* const right, ValueType* const a)
-{
-    for (int i_row = 0; i_row < nrows; i_row++) {
-        const ValueType scale = left[i_row];
-        for (int j = 0; j < ncols; j++) {
-            a[i_row * a_stride + j] *= scale * right[j];
-        }
-    }
-}
-
-
 /**
  * Copies the values of one multi-vector into another.
  *
@@ -248,149 +131,3 @@ inline void compute_dot_product(
         }
     }
 }
-
-
-template <typename ValueType>
-inline void copy(
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& source_entry,
-    const gko::batch_multi_vector::BatchEntry<ValueType>& destination_entry,
-    const gko::uint32& converged)
-{
-    for (int r = 0; r < source_entry.num_rows; r++) {
-        for (int c = 0; c < source_entry.num_rhs; c++) {
-            const gko::uint32 conv = converged & (1 << c);
-
-            if (conv) {
-                continue;
-            }
-
-            destination_entry.values[r * destination_entry.stride + c] =
-                source_entry.values[r * source_entry.stride + c];
-        }
-    }
-}
-
-
-template <typename ValueType>
-inline void add_scaled(
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& alpha,
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
-    const gko::batch_multi_vector::BatchEntry<ValueType>& y,
-    const gko::uint32& converged)
-{
-    if (alpha.num_rhs == 1) {
-        for (int i = 0; i < x.num_rows; ++i) {
-            for (int j = 0; j < x.num_rhs; ++j) {
-                const gko::uint32 conv = converged & (1 << j);
-
-                if (conv) {
-                    continue;
-                }
-
-                y.values[i * y.stride + j] +=
-                    alpha.values[0] * x.values[i * x.stride + j];
-            }
-        }
-    } else {
-        for (int i = 0; i < x.num_rows; ++i) {
-            for (int j = 0; j < x.num_rhs; ++j) {
-                const gko::uint32 conv = converged & (1 << j);
-
-                if (conv) {
-                    continue;
-                }
-
-
-                y.values[i * y.stride + j] +=
-                    alpha.values[j] * x.values[i * x.stride + j];
-            }
-        }
-    }
-}
-
-
-template <typename ValueType>
-inline void compute_norm2(
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
-    const gko::batch_multi_vector::BatchEntry<gko::remove_complex<ValueType>>&
-        result,
-    const gko::uint32& converged)
-{
-    for (int j = 0; j < x.num_rhs; ++j) {
-        const gko::uint32 conv = converged & (1 << j);
-
-        if (conv) {
-            continue;
-        }
-
-        result.values[j] = gko::zero<gko::remove_complex<ValueType>>();
-    }
-    for (int i = 0; i < x.num_rows; ++i) {
-        for (int j = 0; j < x.num_rhs; ++j) {
-            const gko::uint32 conv = converged & (1 << j);
-
-            if (conv) {
-                continue;
-            }
-
-            result.values[j] += squared_norm(x.values[i * x.stride + j]);
-        }
-    }
-    for (int j = 0; j < x.num_rhs; ++j) {
-        const gko::uint32 conv = converged & (1 << j);
-
-        if (conv) {
-            continue;
-        }
-
-        result.values[j] = sqrt(result.values[j]);
-    }
-}
-
-
-template <typename ValueType>
-inline void compute_dot_product(
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& y,
-    const gko::batch_multi_vector::BatchEntry<ValueType>& result,
-    const gko::uint32& converged)
-{
-    for (int c = 0; c < result.num_rhs; c++) {
-        const gko::uint32 conv = converged & (1 << c);
-
-        if (conv) {
-            continue;
-        }
-
-        result.values[c] = gko::zero<ValueType>();
-    }
-
-    for (int r = 0; r < x.num_rows; r++) {
-        for (int c = 0; c < x.num_rhs; c++) {
-            const gko::uint32 conv = converged & (1 << c);
-
-            if (conv) {
-                continue;
-            }
-
-            result.values[c] +=
-                conj(x.values[r * x.stride + c]) * y.values[r * y.stride + c];
-        }
-    }
-}
-
-
-template <typename ValueType>
-inline void add_scaled_identity(
-    const ValueType& a, const ValueType& b,
-    const gko::batch_multi_vector::BatchEntry<ValueType>& mat)
-{
-    for (int i = 0; i < mat.num_rows; i++) {
-        for (int j = 0; j < mat.num_rhs; j++) {
-            mat.values[i * mat.stride + j] *= b;
-            if (i == j) {
-                mat.values[i * mat.stride + i] += a;
-            }
-        }
-    }
-}
diff --git a/reference/base/batch_struct.hpp b/reference/base/batch_struct.hpp
index 32c90db9d7f..bb492488b28 100644
--- a/reference/base/batch_struct.hpp
+++ b/reference/base/batch_struct.hpp
@@ -69,10 +69,10 @@ inline gko::batch_multi_vector::UniformBatch<const ValueType> get_batch_struct(
     return {
         op->get_const_values(),
         op->get_num_batch_entries(),
-        op->get_stride().at(0),
-        static_cast<int>(op->get_size().at(0)[0]),
-        static_cast<int>(op->get_size().at(0)[1]),
-        static_cast<int>(op->get_size().at(0)[0] * op->get_size().at(0)[1])};
+        op->get_common_size()[1],
+        static_cast<int>(op->get_common_size()[0]),
+        static_cast<int>(op->get_common_size()[1]),
+        static_cast<int>(op->get_common_size()[0] * op->get_common_size()[1])};
 }
 
 
@@ -86,10 +86,10 @@ inline gko::batch_multi_vector::UniformBatch<ValueType> get_batch_struct(
     return {
         op->get_values(),
         op->get_num_batch_entries(),
-        op->get_stride().at(0),
-        static_cast<int>(op->get_size().at(0)[0]),
-        static_cast<int>(op->get_size().at(0)[1]),
-        static_cast<int>(op->get_size().at(0)[0] * op->get_size().at(0)[1])};
+        op->get_common_size()[1],
+        static_cast<int>(op->get_common_size()[0]),
+        static_cast<int>(op->get_common_size()[1]),
+        static_cast<int>(op->get_common_size()[0] * op->get_common_size()[1])};
 }
 
 
@@ -103,9 +103,9 @@ maybe_null_batch_struct(const BatchMultiVector<ValueType>* const op)
 {
     if (op) {
         return {op->get_const_values(), op->get_num_batch_entries(),
-                op->get_stride().at(0),
-                static_cast<int>(op->get_size().at(0)[0]),
-                static_cast<int>(op->get_size().at(0)[1])};
+                op->get_common_size()[1],
+                static_cast<int>(op->get_common_size()[0]),
+                static_cast<int>(op->get_common_size()[1])};
     } else {
         return {nullptr, 0, 0, 0, 0};
     }

From c7bc6998d5cc8cdebc7e28e6202ea57f9a80bec0 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 11 Jul 2023 07:16:28 +0200
Subject: [PATCH 112/583] Minor typos and fixes

---
 core/base/batch_multi_vector_kernels.hpp      |  3 ++
 include/ginkgo/core/base/batch_dim.hpp        |  4 +--
 .../ginkgo/core/base/batch_multi_vector.hpp   | 32 ++++++++++---------
 3 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/core/base/batch_multi_vector_kernels.hpp b/core/base/batch_multi_vector_kernels.hpp
index 34da4ce4c2f..7e7f9c3bb37 100644
--- a/core/base/batch_multi_vector_kernels.hpp
+++ b/core/base/batch_multi_vector_kernels.hpp
@@ -42,6 +42,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/matrix/diagonal.hpp>
 
 
+#include "core/base/kernel_declaration.hpp"
+
+
 namespace gko {
 namespace kernels {
 
diff --git a/include/ginkgo/core/base/batch_dim.hpp b/include/ginkgo/core/base/batch_dim.hpp
index 3e650745a50..bc17648be52 100644
--- a/include/ginkgo/core/base/batch_dim.hpp
+++ b/include/ginkgo/core/base/batch_dim.hpp
@@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#ifndef GKO_PUBLIC_CORE_BASE_DIM_HPP_
-#define GKO_PUBLIC_CORE_BASE_DIM_HPP_
+#ifndef GKO_PUBLIC_CORE_BASE_BATCH_DIM_HPP_
+#define GKO_PUBLIC_CORE_BASE_BATCH_DIM_HPP_
 
 
 #include <iostream>
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index c6614df0d66..1050ec28224 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -39,7 +39,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/batch_dim.hpp>
 #include <ginkgo/core/base/batch_lin_op_helpers.hpp>
+#include <ginkgo/core/base/dim.hpp>
 #include <ginkgo/core/base/executor.hpp>
 #include <ginkgo/core/base/mtx_io.hpp>
 #include <ginkgo/core/base/range_accessors.hpp>
@@ -82,7 +84,7 @@ class BatchMultiVector
 
     using value_type = ValueType;
     using index_type = int32;
-    using unbatch_type = Dense<ValueType>;
+    using unbatch_type = matrix::Dense<ValueType>;
     using mat_data = gko::matrix_data<ValueType, int64>;
     using mat_data32 = gko::matrix_data<ValueType, int32>;
     using absolute_type = remove_complex<BatchMultiVector<ValueType>>;
@@ -147,14 +149,14 @@ class BatchMultiVector
      *
      * @return the batch size
      */
-    batch_dim<2> get_size() { return batch_size_; }
+    batch_dim<2> get_size() const { return batch_size_; }
 
     /**
      * Returns the number of batch entries.
      *
      * @return the number of batch entries
      */
-    size_type get_num_batch_entries()
+    size_type get_num_batch_entries() const
     {
         return batch_size_.get_num_batch_entries();
     }
@@ -164,7 +166,7 @@ class BatchMultiVector
      *
      * @return the common size stored
      */
-    dim<2> get_common_size() { return batch_size_.get_common_size(); }
+    dim<2> get_common_size() const { return batch_size_.get_common_size(); }
 
     /**
      * Returns a pointer to the array of values of the vector.
@@ -418,13 +420,13 @@ class BatchMultiVector
 
 private:
     inline batch_dim<2> compute_batch_size(
-        const std::vector<Dense<ValueType>*>& matrices)
+        const std::vector<matrix::Dense<ValueType>*>& matrices)
     {
         auto common_size = matrices[0]->get_size();
         for (int i = 1; i < matrices.size(); ++i) {
             GKO_ASSERT_EQ(common_size, matrices[i]->get_size());
         }
-        return batch_dim<2>{num_entries, common_size};
+        return batch_dim<2>{matrices.size(), common_size};
     }
 
     inline size_type compute_num_elems(const batch_dim<2>& size)
@@ -486,10 +488,10 @@ class BatchMultiVector
      * @param matrices  The matrices that need to be batched.
      */
     BatchMultiVector(std::shared_ptr<const Executor> exec,
-                     const std::vector<Dense<ValueType>*>& matrices)
+                     const std::vector<matrix::Dense<ValueType>*>& matrices)
         : EnableAbstractPolymorphicObject<BatchMultiVector>(exec),
           batch_size_{compute_batch_size(matrices)},
-          values(exec, compute_num_elems(batch_size_))
+          values_(exec, compute_num_elems(batch_size_))
     {
         for (size_type i = 0; i < this->get_num_batch_entries(); ++i) {
             auto local_exec = matrices[i]->get_executor();
@@ -510,7 +512,7 @@ class BatchMultiVector
     BatchMultiVector(std::shared_ptr<const Executor> exec,
                      size_type num_duplications,
                      const BatchMultiVector<value_type>* input)
-        : EnableBatchMultiVector<BatchMultiVector>(
+        : BatchMultiVector(
               exec, gko::batch_dim<2>(
                         input->get_num_batch_entries() * num_duplications,
                         input->get_common_size()))
@@ -532,8 +534,9 @@ class BatchMultiVector
      * @param input  the vector to be duplicated.
      */
     BatchMultiVector(std::shared_ptr<const Executor> exec,
-                     size_type num_duplications, const Dense<value_type>* input)
-        : EnableBatchMultiVector<BatchMultiVector>(
+                     size_type num_duplications,
+                     const matrix::Dense<value_type>* input)
+        : BatchMultiVector(
               exec, gko::batch_dim<2>(num_duplications, input->get_size()))
     {
         size_type offset = 0;
@@ -639,10 +642,10 @@ std::unique_ptr<Matrix> batch_initialize(
     std::shared_ptr<const Executor> exec, TArgs&&... create_args)
 {
     using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
-    size_type common_num_rows = vals_begin->size();
-    size_type common_size = dim<2>(common_num_rows, 1);
-    dim<2> common_size;
+    size_type num_batch_entries = vals.size();
     auto vals_begin = begin(vals);
+    size_type common_num_rows = vals_begin->size();
+    auto common_size = dim<2>(common_num_rows, 1);
     for (size_type b = 0; b < num_batch_entries; ++b) {
         GKO_ASSERT_EQ(common_num_rows, vals_begin->size());
         vals_begin++;
@@ -693,7 +696,6 @@ std::unique_ptr<Matrix> batch_initialize(
 {
     using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
     size_type num_batch_entries = vals.size();
-
     auto vals_begin = begin(vals);
     size_type common_num_rows = vals_begin->size();
     size_type common_num_cols = begin(vals_begin)->size();

From 848461eed080285831eb570513e8ecacb57438aa Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 11 Jul 2023 17:09:12 +0200
Subject: [PATCH 113/583] Update cuda/hip kernels

---
 .../base/batch_multi_vector_kernels.hpp.inc   | 234 ++++++++++--------
 core/device_hooks/common_kernels.inc.cpp      |   1 +
 cuda/base/batch_struct.hpp                    |  22 +-
 hip/base/batch_struct.hip.hpp                 |  22 +-
 reference/base/batch_struct.hpp               |  22 +-
 5 files changed, 156 insertions(+), 145 deletions(-)

diff --git a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
index 0eb86996c81..0ef0408674a 100644
--- a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
+++ b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
@@ -32,165 +32,193 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 /**
- * Copies the values of vector into another.
- *
- * @param num_rows  Length of vector.
- * @param in  Vector to copy from.
- * @param out  Vector to copy into.
+ * Scales the vectors in global or shared memory with a factor of alpha (alpha
+ * is in global memory or shared memory)
  */
 template <typename ValueType>
-__device__ __forceinline__ void single_copy(const int num_rows,
-                                            const ValueType* const in,
-                                            ValueType* const out)
+__device__ __forceinline__ void scale(const BatchEntry<const ValueType>& alpha,
+                                      const BatchEntry<ValueType>& x)
 {
-    for (int iz = threadIdx.x; iz < num_rows; iz += blockDim.x) {
-        out[iz] = in[iz];
+    const int max_li = x.num_rows * x.num_rhs;
+    for (int li = threadIdx.x; li < max_li; li += blockDim.x) {
+        const int row = li / x.num_rhs;
+        const int col = li % x.num_rhs;
+
+        if (alpha.num_rhs == 1) {
+            x.values[row * x.stride + col] =
+                alpha.values[0] * x.values[row * x.stride + col];
+        } else {
+            x.values[row * x.stride + col] =
+                alpha.values[col] * x.values[row * x.stride + col];
+        }
     }
 }
 
 template <typename ValueType>
-__global__ __launch_bounds__(default_block_size) void single_copy(
-    const size_type num_batch, const int num_rows,
-    const ValueType* const __restrict__ in, ValueType* const __restrict__ out)
+__global__ __launch_bounds__(default_block_size, sm_multiplier) void scale(
+    const gko::batch_dense::UniformBatch<const ValueType> alpha,
+    const gko::batch_dense::UniformBatch<ValueType> x)
 {
-    for (size_type ibatch = blockIdx.x; ibatch < num_batch;
+    for (size_type ibatch = blockIdx.x; ibatch < x.num_batch;
          ibatch += gridDim.x) {
-        const auto in_b = gko::batch::batch_entry_ptr(in, 1, num_rows, ibatch);
-        const auto out_b =
-            gko::batch::batch_entry_ptr(out, 1, num_rows, ibatch);
-        single_copy(num_rows, in_b, out_b);
+        const auto alpha_b = gko::batch::batch_entry(alpha, ibatch);
+        const auto x_b = gko::batch::batch_entry(x, ibatch);
+        scale(alpha_b, x_b);
     }
 }
 
 
-/**
- * Adds a scaled vector to another.
- *
- * @param num_rows  Common length of both vectors.
- * @param alpha  Scaling factor.
- * @param[in] x  Vector to scale and add.
- * @param[in,out] y  Vector to add to.
- */
 template <typename ValueType>
-__device__ __forceinline__ void single_add_scaled(const int num_rows,
-                                                  const ValueType alpha,
-                                                  const ValueType* const x,
-                                                  ValueType* const y)
+__device__ __forceinline__ void add_scaled(
+    const gko::batch_dense::BatchEntry<const ValueType>& alpha,
+    const gko::batch_dense::BatchEntry<const ValueType>& x,
+    const gko::batch_dense::BatchEntry<ValueType>& y)
 {
-    for (int li = threadIdx.x; li < num_rows; li += blockDim.x) {
-        y[li] += alpha * x[li];
+    const int max_li = x.num_rows * x.num_rhs;
+    for (int li = threadIdx.x; li < max_li; li += blockDim.x) {
+        const int row = li / x.num_rhs;
+        const int col = li % x.num_rhs;
+
+        if (alpha.num_rhs == 1) {
+            y.values[row * y.stride + col] +=
+                alpha.values[0] * x.values[row * x.stride + col];
+        } else {
+            y.values[row * y.stride + col] +=
+                alpha.values[col] * x.values[row * x.stride + col];
+        }
     }
 }
 
 template <typename ValueType>
-__global__ __launch_bounds__(default_block_size) void single_add_scaled(
-    const size_type num_batch, const int num_rows,
-    const ValueType* const __restrict__ alpha,
-    const ValueType* const __restrict__ x, ValueType* const __restrict__ y)
+__global__ __launch_bounds__(default_block_size, sm_multiplier) void add_scaled(
+    const gko::batch_dense::UniformBatch<const ValueType> alpha,
+    const gko::batch_dense::UniformBatch<const ValueType> x,
+    const gko::batch_dense::UniformBatch<ValueType> y)
 {
-    for (size_type ibatch = blockIdx.x; ibatch < num_batch;
+    for (size_type ibatch = blockIdx.x; ibatch < x.num_batch;
          ibatch += gridDim.x) {
-        const auto x_b = gko::batch::batch_entry_ptr(x, 1, num_rows, ibatch);
-        const auto y_b = gko::batch::batch_entry_ptr(y, 1, num_rows, ibatch);
-        single_add_scaled(num_rows, alpha[0], x_b, y_b);
+        const auto alpha_b = gko::batch::batch_entry(alpha, ibatch);
+        const auto x_b = gko::batch::batch_entry(x, ibatch);
+        const auto y_b = gko::batch::batch_entry(y, ibatch);
+        add_scaled(alpha_b, x_b, y_b);
     }
 }
 
 
 /**
- * Computes the 2-norm of a vector in global or shared memory.
+ * Computes the dot product of some column vectors in global or shared memory.
  *
- * @param x  A row-major vector (only 1 column).
- * @param result  Norm value.
+ * @param result  Holds dot product value for vector in x and y.
  */
 template <typename ValueType>
-__device__ __forceinline__ void single_compute_norm2(
-    group::thread_block_tile<config::warp_size>& warp_grp, const int num_rows,
-    const ValueType* const x, remove_complex<ValueType>& result)
+__device__ __forceinline__ void compute_dot_product(
+    const BatchEntry<const ValueType>& x, const BatchEntry<const ValueType>& y,
+    const BatchEntry<ValueType>& result)
 {
-    using real_type = typename gko::remove_complex<ValueType>;
-    real_type val = zero<real_type>();
-
-    for (int r = warp_grp.thread_rank(); r < num_rows; r += warp_grp.size()) {
-        val += squared_norm(x[r]);
-    }
-
-    // warp level reduction
-#pragma unroll
-    for (int j = config::warp_size / 2; j > 0; j /= 2) {
-        val += warp_grp.shfl_down(val, j);
-    }
-
-    if (warp_grp.thread_rank() == 0) {
-        result = sqrt(val);
+    constexpr auto tile_size = config::warp_size;
+    auto thread_block = group::this_thread_block();
+    auto subwarp_grp = group::tiled_partition<tile_size>(thread_block);
+    const auto subwarp_grp_id = static_cast<int>(threadIdx.x / tile_size);
+    const int num_subwarp_grps_per_block = ceildiv(blockDim.x, tile_size);
+
+    for (int rhs_index = subwarp_grp_id; rhs_index < x.num_rhs;
+         rhs_index += num_subwarp_grps_per_block) {
+        one_dot(x, y, rhs_index, result, subwarp_grp);
     }
 }
 
 
 template <typename ValueType>
-__global__ __launch_bounds__(default_block_size) void single_compute_norm2(
-    const size_type num_batch, const int num_rows,
-    const ValueType* const __restrict__ x,
-    remove_complex<ValueType>* const __restrict__ result)
+__global__ __launch_bounds__(
+    default_block_size,
+    sm_multiplier) void compute_dot_product(const gko::batch_dense::
+                                                UniformBatch<const ValueType>
+                                                    x,
+                                            const gko::batch_dense::
+                                                UniformBatch<const ValueType>
+                                                    y,
+                                            const gko::batch_dense::
+                                                UniformBatch<ValueType>
+                                                    result)
 {
-    auto warp_grp =
-        group::tiled_partition<config::warp_size>(group::this_thread_block());
-    for (size_type ibatch = blockIdx.x; ibatch < num_batch;
+    for (size_type ibatch = blockIdx.x; ibatch < x.num_batch;
          ibatch += gridDim.x) {
-        const auto x_b = gko::batch::batch_entry_ptr(x, 1, num_rows, ibatch);
-        const auto r_b = gko::batch::batch_entry_ptr(result, 1, 1, ibatch);
-        if (threadIdx.x / config::warp_size == 0) {
-            single_compute_norm2(warp_grp, num_rows, x_b, r_b[0]);
-        }
+        const auto x_b = gko::batch::batch_entry(x, ibatch);
+        const auto y_b = gko::batch::batch_entry(y, ibatch);
+        const auto r_b = gko::batch::batch_entry(result, ibatch);
+        compute_dot_product(x_b, y_b, r_b);
     }
 }
 
 
 /**
- * Computes the dot product of some column vectors in global or shared memory.
+ * Computes the 2-norms of some column vectors in global or shared memory.
  *
- * @param result  Holds dot product value for vector in x and y.
+ * @param x  A row-major multivector with nrhs columns.
+ * @param result  Holds norm value for each vector in x.
  */
 template <typename ValueType>
-__device__ __forceinline__ void single_compute_dot_product(
-    group::thread_block_tile<config::warp_size>& warp_grp, const int num_rows,
-    const ValueType* const x, const ValueType* const y, ValueType& result)
+__device__ __forceinline__ void compute_norm2(
+    const gko::batch_dense::BatchEntry<const ValueType>& x,
+    const gko::batch_dense::BatchEntry<remove_complex<ValueType>>& result)
 {
-    ValueType val = zero<ValueType>();
-
-    for (int r = warp_grp.thread_rank(); r < num_rows; r += warp_grp.size()) {
-        val += conj(x[r]) * y[r];
+    constexpr auto tile_size = config::warp_size;
+    auto thread_block = group::this_thread_block();
+    auto subwarp_grp = group::tiled_partition<tile_size>(thread_block);
+    const auto subwarp_grp_id = static_cast<int>(threadIdx.x / tile_size);
+    const int num_subwarp_grps_per_block = ceildiv(blockDim.x, tile_size);
+
+    for (int rhs_index = subwarp_grp_id; rhs_index < x.num_rhs;
+         rhs_index += num_subwarp_grps_per_block) {
+        one_norm2(x, rhs_index, result, subwarp_grp);
     }
+}
+
 
-    // warp level reduction
-#pragma unroll
-    for (int j = config::warp_size / 2; j > 0; j /= 2) {
-        val += warp_grp.shfl_down(val, j);
+template <typename ValueType>
+__global__
+    __launch_bounds__(default_block_size, sm_multiplier) void compute_norm2(
+        const gko::batch_dense::UniformBatch<const ValueType> x,
+        const gko::batch_dense::UniformBatch<remove_complex<ValueType>> result)
+{
+    for (size_type ibatch = blockIdx.x; ibatch < x.num_batch;
+         ibatch += gridDim.x) {
+        const auto x_b = gko::batch::batch_entry(x, ibatch);
+        const auto r_b = gko::batch::batch_entry(result, ibatch);
+        compute_norm2(x_b, r_b);
     }
+}
+
 
-    if (warp_grp.thread_rank() == 0) {
-        result = val;
+/**
+ * Copies the values of one multi-vector into another.
+ *
+ * Note that the output multi-vector should already have memory allocated
+ * and stride set.
+ */
+template <typename ValueType>
+__device__ __forceinline__ void copy(
+    const gko::batch_dense::BatchEntry<const ValueType>& in,
+    const gko::batch_dense::BatchEntry<ValueType>& out)
+{
+    for (int iz = threadIdx.x; iz < in.num_rows * in.num_rhs;
+         iz += blockDim.x) {
+        const int i = iz / in.num_rhs;
+        const int j = iz % in.num_rhs;
+        out.values[i * out.stride + j] = in.values[i * in.stride + j];
     }
 }
 
 
-// clang-format off
 template <typename ValueType>
-__global__ __launch_bounds__(default_block_size)
-void single_compute_dot_product(const size_type num_batch,
-                                const int num_rows,
-                                const ValueType *const __restrict__ x,
-                                const ValueType *const __restrict__ y,
-                                ValueType *const __restrict__ result)
-// clang-format on
+__global__ __launch_bounds__(default_block_size, sm_multiplier) void copy(
+    const gko::batch_dense::UniformBatch<const ValueType> src,
+    const gko::batch_dense::UniformBatch<ValueType> dst)
 {
-    auto warp_grp =
-        group::tiled_partition<config::warp_size>(group::this_thread_block());
-    for (size_type ibatch = blockIdx.x; ibatch < num_batch;
+    for (size_type ibatch = blockIdx.x; ibatch < src.num_batch;
          ibatch += gridDim.x) {
-        const auto x_b = gko::batch::batch_entry_ptr(x, 1, num_rows, ibatch);
-        const auto y_b = gko::batch::batch_entry_ptr(y, 1, num_rows, ibatch);
-        const auto r_b = gko::batch::batch_entry_ptr(result, 1, 1, ibatch);
-        single_compute_dot_product(warp_grp, num_rows, x_b, y_b, r_b[0]);
+        const auto dst_b = gko::batch::batch_entry(dst, ibatch);
+        const auto src_b = gko::batch::batch_entry(src, ibatch);
+        copy(src_b, dst_b);
     }
 }
diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp
index 3fe1372558b..9ab79160394 100644
--- a/core/device_hooks/common_kernels.inc.cpp
+++ b/core/device_hooks/common_kernels.inc.cpp
@@ -34,6 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/types.hpp>
 
 
+#include "core/base/batch_multi_vector_kernels.hpp"
 #include "core/base/device_matrix_data_kernels.hpp"
 #include "core/base/index_set_kernels.hpp"
 #include "core/base/mixed_precision_types.hpp"
diff --git a/cuda/base/batch_struct.hpp b/cuda/base/batch_struct.hpp
index 5db50064e2f..9d4eb436c16 100644
--- a/cuda/base/batch_struct.hpp
+++ b/cuda/base/batch_struct.hpp
@@ -67,13 +67,10 @@ template <typename ValueType>
 inline gko::batch_multi_vector::UniformBatch<const cuda_type<ValueType>>
 get_batch_struct(const BatchMultiVector<ValueType>* const op)
 {
-    return {
-        as_cuda_type(op->get_const_values()),
-        op->get_num_batch_entries(),
-        op->get_common_size()[1],
-        static_cast<int>(op->get_common_size()[0]),
-        static_cast<int>(op->get_common_size()[1]),
-        static_cast<int>(op->get_common_size()[0] * op->get_common_size()[1])};
+    return {as_cuda_type(op->get_const_values()), op->get_num_batch_entries(),
+            op->get_common_size()[1],
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[1])};
 }
 
 /**
@@ -83,13 +80,10 @@ template <typename ValueType>
 inline gko::batch_multi_vector::UniformBatch<cuda_type<ValueType>>
 get_batch_struct(BatchMultiVector<ValueType>* const op)
 {
-    return {
-        as_cuda_type(op->get_values()),
-        op->get_num_batch_entries(),
-        op->get_common_size()[1],
-        static_cast<int>(op->get_common_size()[0]),
-        static_cast<int>(op->get_common_size()[1]),
-        static_cast<int>(op->get_common_size()[0] * op->get_common_size()[1])};
+    return {as_cuda_type(op->get_values()), op->get_num_batch_entries(),
+            op->get_common_size()[1],
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[1])};
 }
 
 
diff --git a/hip/base/batch_struct.hip.hpp b/hip/base/batch_struct.hip.hpp
index c921e55d857..d796cdcdb37 100644
--- a/hip/base/batch_struct.hip.hpp
+++ b/hip/base/batch_struct.hip.hpp
@@ -67,13 +67,10 @@ template <typename ValueType>
 inline gko::batch_multi_vector::UniformBatch<const hip_type<ValueType>>
 get_batch_struct(const BatchMultiVector<ValueType>* const op)
 {
-    return {
-        as_hip_type(op->get_const_values()),
-        op->get_num_batch_entries(),
-        op->get_common_size()[1],
-        static_cast<int>(op->get_common_size()[0]),
-        static_cast<int>(op->get_common_size()[1]),
-        static_cast<int>(op->get_common_size()[0] * op->get_common_size()[1])};
+    return {as_hip_type(op->get_const_values()), op->get_num_batch_entries(),
+            op->get_common_size()[1],
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[1])};
 }
 
 /**
@@ -83,13 +80,10 @@ template <typename ValueType>
 inline gko::batch_multi_vector::UniformBatch<hip_type<ValueType>>
 get_batch_struct(BatchMultiVector<ValueType>* const op)
 {
-    return {
-        as_hip_type(op->get_values()),
-        op->get_num_batch_entries(),
-        op->get_common_size()[1],
-        static_cast<int>(op->get_common_size()[0]),
-        static_cast<int>(op->get_common_size()[1]),
-        static_cast<int>(op->get_common_size()[0] * op->get_common_size()[1])};
+    return {as_hip_type(op->get_values()), op->get_num_batch_entries(),
+            op->get_common_size()[1],
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[1])};
 }
 
 
diff --git a/reference/base/batch_struct.hpp b/reference/base/batch_struct.hpp
index bb492488b28..056bb575f8a 100644
--- a/reference/base/batch_struct.hpp
+++ b/reference/base/batch_struct.hpp
@@ -66,13 +66,10 @@ template <typename ValueType>
 inline gko::batch_multi_vector::UniformBatch<const ValueType> get_batch_struct(
     const BatchMultiVector<ValueType>* const op)
 {
-    return {
-        op->get_const_values(),
-        op->get_num_batch_entries(),
-        op->get_common_size()[1],
-        static_cast<int>(op->get_common_size()[0]),
-        static_cast<int>(op->get_common_size()[1]),
-        static_cast<int>(op->get_common_size()[0] * op->get_common_size()[1])};
+    return {op->get_const_values(), op->get_num_batch_entries(),
+            op->get_common_size()[1],
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[1])};
 }
 
 
@@ -83,13 +80,10 @@ template <typename ValueType>
 inline gko::batch_multi_vector::UniformBatch<ValueType> get_batch_struct(
     BatchMultiVector<ValueType>* const op)
 {
-    return {
-        op->get_values(),
-        op->get_num_batch_entries(),
-        op->get_common_size()[1],
-        static_cast<int>(op->get_common_size()[0]),
-        static_cast<int>(op->get_common_size()[1]),
-        static_cast<int>(op->get_common_size()[0] * op->get_common_size()[1])};
+    return {op->get_values(), op->get_num_batch_entries(),
+            op->get_common_size()[1],
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[1])};
 }
 
 

From b4802c278c249e6b38741f99db779aa967c876c7 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Fri, 14 Jul 2023 13:54:50 +0200
Subject: [PATCH 114/583] Rename cuda/hip kernels

---
 .../base/batch_multi_vector_kernels.hpp.inc   | 151 +++++++++++++-----
 cuda/base/batch_multi_vector_kernels.cu       |  51 ++----
 hip/base/batch_multi_vector_kernels.hip.cpp   |  64 +++-----
 3 files changed, 144 insertions(+), 122 deletions(-)

diff --git a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
index 0ef0408674a..6e9dc57681a 100644
--- a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
+++ b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
@@ -36,8 +36,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  * is in global memory or shared memory)
  */
 template <typename ValueType>
-__device__ __forceinline__ void scale(const BatchEntry<const ValueType>& alpha,
-                                      const BatchEntry<ValueType>& x)
+__device__ __forceinline__ void scale(
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& alpha,
+    const gko::batch_multi_vector::BatchEntry<ValueType>& x)
 {
     const int max_li = x.num_rows * x.num_rhs;
     for (int li = threadIdx.x; li < max_li; li += blockDim.x) {
@@ -55,11 +56,12 @@ __device__ __forceinline__ void scale(const BatchEntry<const ValueType>& alpha,
 }
 
 template <typename ValueType>
-__global__ __launch_bounds__(default_block_size, sm_multiplier) void scale(
-    const gko::batch_dense::UniformBatch<const ValueType> alpha,
-    const gko::batch_dense::UniformBatch<ValueType> x)
+__global__
+    __launch_bounds__(default_block_size, sm_multiplier) void scale_kernel(
+        const gko::batch_multi_vector::UniformBatch<const ValueType> alpha,
+        const gko::batch_multi_vector::UniformBatch<ValueType> x)
 {
-    for (size_type ibatch = blockIdx.x; ibatch < x.num_batch;
+    for (size_type ibatch = blockIdx.x; ibatch < x.num_batch_entries;
          ibatch += gridDim.x) {
         const auto alpha_b = gko::batch::batch_entry(alpha, ibatch);
         const auto x_b = gko::batch::batch_entry(x, ibatch);
@@ -70,9 +72,9 @@ __global__ __launch_bounds__(default_block_size, sm_multiplier) void scale(
 
 template <typename ValueType>
 __device__ __forceinline__ void add_scaled(
-    const gko::batch_dense::BatchEntry<const ValueType>& alpha,
-    const gko::batch_dense::BatchEntry<const ValueType>& x,
-    const gko::batch_dense::BatchEntry<ValueType>& y)
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& alpha,
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
+    const gko::batch_multi_vector::BatchEntry<ValueType>& y)
 {
     const int max_li = x.num_rows * x.num_rhs;
     for (int li = threadIdx.x; li < max_li; li += blockDim.x) {
@@ -90,12 +92,13 @@ __device__ __forceinline__ void add_scaled(
 }
 
 template <typename ValueType>
-__global__ __launch_bounds__(default_block_size, sm_multiplier) void add_scaled(
-    const gko::batch_dense::UniformBatch<const ValueType> alpha,
-    const gko::batch_dense::UniformBatch<const ValueType> x,
-    const gko::batch_dense::UniformBatch<ValueType> y)
+__global__
+    __launch_bounds__(default_block_size, sm_multiplier) void add_scaled_kernel(
+        const gko::batch_multi_vector::UniformBatch<const ValueType> alpha,
+        const gko::batch_multi_vector::UniformBatch<const ValueType> x,
+        const gko::batch_multi_vector::UniformBatch<ValueType> y)
 {
-    for (size_type ibatch = blockIdx.x; ibatch < x.num_batch;
+    for (size_type ibatch = blockIdx.x; ibatch < x.num_batch_entries;
          ibatch += gridDim.x) {
         const auto alpha_b = gko::batch::batch_entry(alpha, ibatch);
         const auto x_b = gko::batch::batch_entry(x, ibatch);
@@ -105,6 +108,34 @@ __global__ __launch_bounds__(default_block_size, sm_multiplier) void add_scaled(
 }
 
 
+template <typename ValueType>
+__device__ __forceinline__ void one_dot(
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& y,
+    const int rhs_index,
+    const gko::batch_multi_vector::BatchEntry<ValueType>& result,
+    group::thread_block_tile<config::warp_size>& subwarp_grp)
+{
+    ValueType val = zero<ValueType>();
+
+    for (int r = subwarp_grp.thread_rank(); r < x.num_rows;
+         r += subwarp_grp.size()) {
+        val += conj(x.values[r * x.stride + rhs_index]) *
+               y.values[r * y.stride + rhs_index];
+    }
+
+    // subwarp_grp level reduction
+#pragma unroll
+    for (int j = config::warp_size / 2; j > 0; j /= 2) {
+        val += subwarp_grp.shfl_down(val, j);
+    }
+
+    if (subwarp_grp.thread_rank() == 0) {
+        result.values[rhs_index] = val;
+    }
+}
+
+
 /**
  * Computes the dot product of some column vectors in global or shared memory.
  *
@@ -112,8 +143,9 @@ __global__ __launch_bounds__(default_block_size, sm_multiplier) void add_scaled(
  */
 template <typename ValueType>
 __device__ __forceinline__ void compute_dot_product(
-    const BatchEntry<const ValueType>& x, const BatchEntry<const ValueType>& y,
-    const BatchEntry<ValueType>& result)
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& y,
+    const gko::batch_multi_vector::BatchEntry<ValueType>& result)
 {
     constexpr auto tile_size = config::warp_size;
     auto thread_block = group::this_thread_block();
@@ -131,17 +163,23 @@ __device__ __forceinline__ void compute_dot_product(
 template <typename ValueType>
 __global__ __launch_bounds__(
     default_block_size,
-    sm_multiplier) void compute_dot_product(const gko::batch_dense::
-                                                UniformBatch<const ValueType>
-                                                    x,
-                                            const gko::batch_dense::
-                                                UniformBatch<const ValueType>
-                                                    y,
-                                            const gko::batch_dense::
-                                                UniformBatch<ValueType>
-                                                    result)
+    sm_multiplier) void compute_dot_product_kernel(const gko::
+                                                       batch_multi_vector::
+                                                           UniformBatch<
+                                                               const ValueType>
+                                                               x,
+                                                   const gko::
+                                                       batch_multi_vector::
+                                                           UniformBatch<
+                                                               const ValueType>
+                                                               y,
+                                                   const gko::
+                                                       batch_multi_vector::
+                                                           UniformBatch<
+                                                               ValueType>
+                                                               result)
 {
-    for (size_type ibatch = blockIdx.x; ibatch < x.num_batch;
+    for (size_type ibatch = blockIdx.x; ibatch < x.num_batch_entries;
          ibatch += gridDim.x) {
         const auto x_b = gko::batch::batch_entry(x, ibatch);
         const auto y_b = gko::batch::batch_entry(y, ibatch);
@@ -151,6 +189,34 @@ __global__ __launch_bounds__(
 }
 
 
+template <typename ValueType>
+__device__ __forceinline__ void one_norm2(
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
+    const int rhs_index,
+    const gko::batch_multi_vector::BatchEntry<remove_complex<ValueType>>&
+        result,
+    group::thread_block_tile<config::warp_size>& subwarp_grp)
+{
+    using real_type = typename gko::remove_complex<ValueType>;
+    real_type val = zero<real_type>();
+
+    for (int r = subwarp_grp.thread_rank(); r < x.num_rows;
+         r += subwarp_grp.size()) {
+        val += squared_norm(x.values[r * x.stride + rhs_index]);
+    }
+
+    // subwarp_grp level reduction
+#pragma unroll
+    for (int j = config::warp_size / 2; j > 0; j /= 2) {
+        val += subwarp_grp.shfl_down(val, j);
+    }
+
+    if (subwarp_grp.thread_rank() == 0) {
+        result.values[rhs_index] = sqrt(val);
+    }
+}
+
+
 /**
  * Computes the 2-norms of some column vectors in global or shared memory.
  *
@@ -159,8 +225,9 @@ __global__ __launch_bounds__(
  */
 template <typename ValueType>
 __device__ __forceinline__ void compute_norm2(
-    const gko::batch_dense::BatchEntry<const ValueType>& x,
-    const gko::batch_dense::BatchEntry<remove_complex<ValueType>>& result)
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
+    const gko::batch_multi_vector::BatchEntry<remove_complex<ValueType>>&
+        result)
 {
     constexpr auto tile_size = config::warp_size;
     auto thread_block = group::this_thread_block();
@@ -176,12 +243,17 @@ __device__ __forceinline__ void compute_norm2(
 
 
 template <typename ValueType>
-__global__
-    __launch_bounds__(default_block_size, sm_multiplier) void compute_norm2(
-        const gko::batch_dense::UniformBatch<const ValueType> x,
-        const gko::batch_dense::UniformBatch<remove_complex<ValueType>> result)
+__global__ __launch_bounds__(
+    default_block_size,
+    sm_multiplier) void compute_norm2_kernel(const gko::batch_multi_vector::
+                                                 UniformBatch<const ValueType>
+                                                     x,
+                                             const gko::batch_multi_vector::
+                                                 UniformBatch<
+                                                     remove_complex<ValueType>>
+                                                     result)
 {
-    for (size_type ibatch = blockIdx.x; ibatch < x.num_batch;
+    for (size_type ibatch = blockIdx.x; ibatch < x.num_batch_entries;
          ibatch += gridDim.x) {
         const auto x_b = gko::batch::batch_entry(x, ibatch);
         const auto r_b = gko::batch::batch_entry(result, ibatch);
@@ -198,8 +270,8 @@ __global__
  */
 template <typename ValueType>
 __device__ __forceinline__ void copy(
-    const gko::batch_dense::BatchEntry<const ValueType>& in,
-    const gko::batch_dense::BatchEntry<ValueType>& out)
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& in,
+    const gko::batch_multi_vector::BatchEntry<ValueType>& out)
 {
     for (int iz = threadIdx.x; iz < in.num_rows * in.num_rhs;
          iz += blockDim.x) {
@@ -211,11 +283,12 @@ __device__ __forceinline__ void copy(
 
 
 template <typename ValueType>
-__global__ __launch_bounds__(default_block_size, sm_multiplier) void copy(
-    const gko::batch_dense::UniformBatch<const ValueType> src,
-    const gko::batch_dense::UniformBatch<ValueType> dst)
+__global__
+    __launch_bounds__(default_block_size, sm_multiplier) void copy_kernel(
+        const gko::batch_multi_vector::UniformBatch<const ValueType> src,
+        const gko::batch_multi_vector::UniformBatch<ValueType> dst)
 {
-    for (size_type ibatch = blockIdx.x; ibatch < src.num_batch;
+    for (size_type ibatch = blockIdx.x; ibatch < src.num_batch_entries;
          ibatch += gridDim.x) {
         const auto dst_b = gko::batch::batch_entry(dst, ibatch);
         const auto src_b = gko::batch::batch_entry(src, ibatch);
diff --git a/cuda/base/batch_multi_vector_kernels.cu b/cuda/base/batch_multi_vector_kernels.cu
index df5aa9149a5..8bfb6fc0167 100644
--- a/cuda/base/batch_multi_vector_kernels.cu
+++ b/cuda/base/batch_multi_vector_kernels.cu
@@ -74,7 +74,7 @@ void scale(std::shared_ptr<const DefaultExecutor> exec,
     const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
     const auto alpha_ub = get_batch_struct(alpha);
     const auto x_ub = get_batch_struct(x);
-    scale<<<num_blocks, default_block_size>>>(alpha_ub, x_ub);
+    scale_kernel<<<num_blocks, default_block_size>>>(alpha_ub, x_ub);
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
@@ -89,18 +89,10 @@ void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
 {
     const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
     const size_type nrhs = x->get_common_size()[1];
-    if (nrhs == 1) {
-        const auto num_batch = x->get_num_batch_entries();
-        const auto num_rows = x->get_common_size()[0];
-        single_add_scaled<<<num_blocks, default_block_size>>>(
-            num_batch, num_rows, as_cuda_type(alpha->get_const_values()),
-            as_cuda_type(x->get_const_values()), as_cuda_type(y->get_values()));
-    } else {
-        const auto alpha_ub = get_batch_struct(alpha);
-        const auto x_ub = get_batch_struct(x);
-        const auto y_ub = get_batch_struct(y);
-        add_scaled<<<num_blocks, default_block_size>>>(alpha_ub, x_ub, y_ub);
-    }
+    const auto alpha_ub = get_batch_struct(alpha);
+    const auto x_ub = get_batch_struct(x);
+    const auto y_ub = get_batch_struct(y);
+    add_scaled_kernel<<<num_blocks, default_block_size>>>(alpha_ub, x_ub, y_ub);
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
@@ -115,19 +107,11 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
 {
     const auto num_blocks = x->get_num_batch_entries();
     const auto num_rhs = x->get_common_size()[1];
-    if (num_rhs == 1) {
-        const auto num_rows = x->get_common_size()[0];
-        single_compute_dot_product<<<num_blocks, default_block_size>>>(
-            num_blocks, num_rows, as_cuda_type(x->get_const_values()),
-            as_cuda_type(y->get_const_values()),
-            as_cuda_type(result->get_values()));
-    } else {
-        const auto x_ub = get_batch_struct(x);
-        const auto y_ub = get_batch_struct(y);
-        const auto res_ub = get_batch_struct(result);
-        compute_dot_product<<<num_blocks, default_block_size>>>(x_ub, y_ub,
-                                                                res_ub);
-    }
+    const auto x_ub = get_batch_struct(x);
+    const auto y_ub = get_batch_struct(y);
+    const auto res_ub = get_batch_struct(result);
+    compute_dot_product_kernel<<<num_blocks, default_block_size>>>(x_ub, y_ub,
+                                                                   res_ub);
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
@@ -141,16 +125,9 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
 {
     const auto num_blocks = x->get_num_batch_entries();
     const auto num_rhs = x->get_common_size()[1];
-    if (num_rhs == 1) {
-        const auto num_rows = x->get_common_size()[0];
-        single_compute_norm2<<<num_blocks, default_block_size>>>(
-            num_blocks, num_rows, as_cuda_type(x->get_const_values()),
-            as_cuda_type(result->get_values()));
-    } else {
-        const auto x_ub = get_batch_struct(x);
-        const auto res_ub = get_batch_struct(result);
-        compute_norm2<<<num_blocks, default_block_size>>>(x_ub, res_ub);
-    }
+    const auto x_ub = get_batch_struct(x);
+    const auto res_ub = get_batch_struct(result);
+    compute_norm2_kernel<<<num_blocks, default_block_size>>>(x_ub, res_ub);
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
@@ -165,7 +142,7 @@ void copy(std::shared_ptr<const DefaultExecutor> exec,
     const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
     const auto result_ub = get_batch_struct(result);
     const auto x_ub = get_batch_struct(x);
-    copy<<<num_blocks, default_block_size>>>(x_ub, result_ub);
+    copy_kernel<<<num_blocks, default_block_size>>>(x_ub, result_ub);
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_COPY_KERNEL);
diff --git a/hip/base/batch_multi_vector_kernels.hip.cpp b/hip/base/batch_multi_vector_kernels.hip.cpp
index 2a6c3085772..50f8593ffec 100644
--- a/hip/base/batch_multi_vector_kernels.hip.cpp
+++ b/hip/base/batch_multi_vector_kernels.hip.cpp
@@ -77,8 +77,8 @@ void scale(std::shared_ptr<const DefaultExecutor> exec,
     const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
     const auto alpha_ub = get_batch_struct(alpha);
     const auto x_ub = get_batch_struct(x);
-    hipLaunchKernelGGL(scale, dim3(num_blocks), dim3(default_block_size), 0, 0,
-                       alpha_ub, x_ub);
+    hipLaunchKernelGGL(scale_kernel, dim3(num_blocks), dim3(default_block_size),
+                       0, 0, alpha_ub, x_ub);
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
@@ -93,21 +93,11 @@ void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
 {
     const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
     const size_type nrhs = x->get_common_size()[1];
-    if (nrhs == 1) {
-        const auto num_batch = x->get_num_batch_entries();
-        const auto num_rows = x->get_common_size()[0];
-        hipLaunchKernelGGL(
-            single_add_scaled, dim3(num_blocks), dim3(default_block_size), 0, 0,
-            num_batch, num_rows, as_hip_type(alpha->get_const_values()),
-            as_hip_type(x->get_const_values()), as_hip_type(y->get_values()));
-    } else {
-        const auto alpha_ub = get_batch_struct(alpha);
-        const auto x_ub = get_batch_struct(x);
-        const auto y_ub = get_batch_struct(y);
-        hipLaunchKernelGGL(add_scaled, dim3(num_blocks),
-                           dim3(default_block_size), 0, 0, alpha_ub, x_ub,
-                           y_ub);
-    }
+    const auto alpha_ub = get_batch_struct(alpha);
+    const auto x_ub = get_batch_struct(x);
+    const auto y_ub = get_batch_struct(y);
+    hipLaunchKernelGGL(add_scaled_kernel, dim3(num_blocks),
+                       dim3(default_block_size), 0, 0, alpha_ub, x_ub, y_ub);
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
@@ -122,23 +112,13 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
 {
     const auto num_blocks = x->get_num_batch_entries();
     const auto num_rhs = x->get_common_size()[1];
-    if (num_rhs == 1) {
-        const auto num_rows = x->get_common_size()[0];
-        hipLaunchKernelGGL(single_compute_dot_product, dim3(num_blocks),
-                           dim3(default_block_size), 0, 0, num_blocks, num_rows,
-                           as_hip_type(x->get_const_values()),
-                           as_hip_type(y->get_const_values()),
-                           as_hip_type(result->get_values()));
-    } else {
-        const auto x_ub = get_batch_struct(x);
-        const auto y_ub = get_batch_struct(y);
-        const auto res_ub = get_batch_struct(result);
-        hipLaunchKernelGGL(compute_dot_product, dim3(num_blocks),
-                           dim3(default_block_size), 0, 0, x_ub, y_ub, res_ub);
-    }
+    const auto x_ub = get_batch_struct(x);
+    const auto y_ub = get_batch_struct(y);
+    const auto res_ub = get_batch_struct(result);
+    hipLaunchKernelGGL(compute_dot_product_kernel, dim3(num_blocks),
+                       dim3(default_block_size), 0, 0, x_ub, y_ub, res_ub);
 }
 
-
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
     GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_DOT_KERNEL);
 
@@ -150,18 +130,10 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
 {
     const auto num_blocks = x->get_num_batch_entries();
     const auto num_rhs = x->get_common_size()[1];
-    if (num_rhs == 1) {
-        const auto num_rows = x->get_common_size()[0];
-        hipLaunchKernelGGL(single_compute_norm2, dim3(num_blocks),
-                           dim3(default_block_size), 0, 0, num_blocks, num_rows,
-                           as_hip_type(x->get_const_values()),
-                           as_hip_type(result->get_values()));
-    } else {
-        const auto x_ub = get_batch_struct(x);
-        const auto res_ub = get_batch_struct(result);
-        hipLaunchKernelGGL(compute_norm2, dim3(num_blocks),
-                           dim3(default_block_size), 0, 0, x_ub, res_ub);
-    }
+    const auto x_ub = get_batch_struct(x);
+    const auto res_ub = get_batch_struct(result);
+    hipLaunchKernelGGL(compute_norm2_kernel, dim3(num_blocks),
+                       dim3(default_block_size), 0, 0, x_ub, res_ub);
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
@@ -176,8 +148,8 @@ void copy(std::shared_ptr<const DefaultExecutor> exec,
     const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
     const auto result_ub = get_batch_struct(result);
     const auto x_ub = get_batch_struct(x);
-    hipLaunchKernelGGL(copy, dim3(num_blocks), dim3(default_block_size), 0, 0,
-                       x_ub, result_ub);
+    hipLaunchKernelGGL(copy_kernel, dim3(num_blocks), dim3(default_block_size),
+                       0, 0, x_ub, result_ub);
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_COPY_KERNEL);

From f40f79271c7edb9962c246e56b27749f52fa6f46 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Fri, 14 Jul 2023 13:55:06 +0200
Subject: [PATCH 115/583] Update and fix dpcpp kernels

Co-authored-by: Phuong Nguyen<phuong.nguyen@icl.utk.edu>
---
 dpcpp/base/batch_multi_vector_kernels.dp.cpp  |   2 +-
 dpcpp/base/batch_multi_vector_kernels.hpp.inc | 115 +++++++-----------
 2 files changed, 43 insertions(+), 74 deletions(-)

diff --git a/dpcpp/base/batch_multi_vector_kernels.dp.cpp b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
index 6101ed3da4d..88cdb1d6e6f 100644
--- a/dpcpp/base/batch_multi_vector_kernels.dp.cpp
+++ b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
@@ -83,7 +83,7 @@ void scale(std::shared_ptr<const DefaultExecutor> exec,
                 auto group_id = group.get_group_linear_id();
                 const auto alpha_b = batch::batch_entry(alpha_ub, group_id);
                 const auto x_b = batch::batch_entry(x_ub, group_id);
-                single_scale_kernel(alpha_b, x_b, item_ct1);
+                scale_kernel(alpha_b, x_b, item_ct1);
             });
     });
 }
diff --git a/dpcpp/base/batch_multi_vector_kernels.hpp.inc b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
index 7ea25fb4c22..07d6d97ff0a 100644
--- a/dpcpp/base/batch_multi_vector_kernels.hpp.inc
+++ b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
@@ -30,25 +30,30 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-/**
- * Copies the values of vector into another.
- *
- * @param num_rows  Length of vector.
- * @param in  Vector to copy from.
- * @param out  Vector to copy into.
- */
+
 template <typename ValueType>
-__dpct_inline__ void copy_kernel(const int num_rows,
-                                 const ValueType* const __restrict__ in,
-                                 ValueType* const __restrict__ out,
-                                 sycl::nd_item<3> item_ct1)
+__dpct_inline__ void scale_kernel(
+    const gko::batch_dense::BatchEntry<const ValueType>& alpha,
+    const gko::batch_dense::BatchEntry<ValueType>& x,
+    sycl::nd_item<3>& item_ct1)
 {
-    for (int iz = item_ct1.get_local_linear_id(); iz < num_rows;
-         iz += item_ct1.get_local_range().size()) {
-        out[iz] = in[iz];
+    const int max_li = x.num_rows * x.num_rhs;
+    for (int li = item_ct1.get_local_linear_id(); li < max_li;
+         li += item_ct1.get_local_range().size()) {
+        const int row = li / x.num_rhs;
+        const int col = li % x.num_rhs;
+
+        if (alpha.num_rhs == 1) {
+            x.values[row * x.stride + col] =
+                alpha.values[0] * x.values[row * x.stride + col];
+        } else {
+            x.values[row * x.stride + col] =
+                alpha.values[col] * x.values[row * x.stride + col];
+        }
     }
 }
 
+
 /**
  * Adds a scaled vector to another.
  *
@@ -70,35 +75,26 @@ __dpct_inline__ void add_scaled_kernel(const int num_rows,
     }
 }
 
-/**
- * Computes the 2-norm of a vector in global or shared memory.
- *
- * @param x  A row-major vector (only 1 column).
- * @param result  Norm value.
- */
+
 template <typename ValueType>
-__dpct_inline__ void compute_norm2_sg_kernel(
+__dpct_inline__ void compute_dot_product_kernel(
     const int num_rows, const ValueType* const __restrict__ x,
-    gko::remove_complex<ValueType>& result, sycl::nd_item<3> item_ct1)
+    const ValueType* const __restrict__ y, ValueType& result,
+    sycl::nd_item<3> item_ct1)
 {
-    const auto sg = item_ct1.get_sub_group();
-    const auto sg_size = sg.get_local_range().size();
-    const auto sg_tid = sg.get_local_id();
-
-    using real_type = typename gko::remove_complex<ValueType>;
-    real_type val = zero<real_type>();
-
-    for (int r = sg_tid; r < num_rows; r += sg_size) {
-        val += squared_norm(x[r]);
-    }
+    const auto group = item_ct1.get_group();
+    const auto group_size = item_ct1.get_local_range().size();
+    const auto tid = item_ct1.get_local_linear_id();
 
-    val = sycl::reduce_over_group(sg, val, sycl::plus<>());
+    ValueType val = zero<ValueType>();
 
-    if (sg_tid == 0) {
-        result = sqrt(val);
+    for (int r = tid; r < num_rows; r += group_size) {
+        val += conj(x[r]) * y[r];
     }
+    result = sycl::reduce_over_group(group, val, sycl::plus<>());
 }
 
+
 template <typename ValueType>
 __dpct_inline__ void compute_norm2_kernel(
     const int num_rows, const ValueType* const __restrict__ x,
@@ -122,47 +118,20 @@ __dpct_inline__ void compute_norm2_kernel(
 
 
 /**
- * Computes the dot product of some column vectors in global or shared memory.
+ * Copies the values of vector into another.
  *
- * @param result  Holds dot product value for vector in x and y.
+ * @param num_rows  Length of vector.
+ * @param in  Vector to copy from.
+ * @param out  Vector to copy into.
  */
 template <typename ValueType>
-__dpct_inline__ void compute_dot_product_sg_kernel(
-    const int num_rows, const ValueType* const __restrict__ x,
-    const ValueType* const __restrict__ y, ValueType& result,
-    sycl::nd_item<3> item_ct1)
-{
-    const auto sg = item_ct1.get_sub_group();
-    const auto sg_size = sg.get_local_range().size();
-    const auto sg_tid = sg.get_local_id();
-
-    ValueType val = zero<ValueType>();
-
-    for (int r = sg_tid; r < num_rows; r += sg_size) {
-        val += conj(x[r]) * y[r];
-    }
-
-    val = sycl::reduce_over_group(sg, val, sycl::plus<>());
-
-    if (sg_tid == 0) {
-        result = val;
-    }
-}
-
-template <typename ValueType>
-__dpct_inline__ void compute_dot_product_kernel(
-    const int num_rows, const ValueType* const __restrict__ x,
-    const ValueType* const __restrict__ y, ValueType& result,
-    sycl::nd_item<3> item_ct1)
+__dpct_inline__ void copy_kernel(const int num_rows,
+                                 const ValueType* const __restrict__ in,
+                                 ValueType* const __restrict__ out,
+                                 sycl::nd_item<3> item_ct1)
 {
-    const auto group = item_ct1.get_group();
-    const auto group_size = item_ct1.get_local_range().size();
-    const auto tid = item_ct1.get_local_linear_id();
-
-    ValueType val = zero<ValueType>();
-
-    for (int r = tid; r < num_rows; r += group_size) {
-        val += conj(x[r]) * y[r];
+    for (int iz = item_ct1.get_local_linear_id(); iz < num_rows;
+         iz += item_ct1.get_local_range().size()) {
+        out[iz] = in[iz];
     }
-    result = sycl::reduce_over_group(group, val, sycl::plus<>());
 }

From 0edae740a72fac469d4c57d8ab6a73d80adc79b2 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Fri, 14 Jul 2023 13:55:26 +0200
Subject: [PATCH 116/583] Fix omp and ref kernels

---
 omp/base/batch_multi_vector_kernels.cpp       |  14 +-
 reference/base/batch_multi_vector_kernels.cpp |  10 +-
 .../base/batch_multi_vector_kernels.hpp.inc   |  49 +-
 .../test/base/batch_multi_vector_kernels.cpp  | 641 +-----------------
 4 files changed, 55 insertions(+), 659 deletions(-)

diff --git a/omp/base/batch_multi_vector_kernels.cpp b/omp/base/batch_multi_vector_kernels.cpp
index 6dd8b38e6d8..f46cbb12ead 100644
--- a/omp/base/batch_multi_vector_kernels.cpp
+++ b/omp/base/batch_multi_vector_kernels.cpp
@@ -42,7 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include "core/components/prefix_sum_kernels.hpp"
-#include "reference/matrix/batch_struct.hpp"
+#include "reference/base/batch_struct.hpp"
 
 
 namespace gko {
@@ -56,7 +56,7 @@ namespace omp {
 namespace batch_multi_vector {
 
 
-#include "reference/matrix/batch_multi_vector_kernels.hpp.inc"
+#include "reference/base/batch_multi_vector_kernels.hpp.inc"
 
 
 template <typename ValueType>
@@ -70,7 +70,7 @@ void scale(std::shared_ptr<const DefaultExecutor> exec,
     for (size_type batch = 0; batch < x->get_num_batch_entries(); ++batch) {
         const auto alpha_b = gko::batch::batch_entry(alpha_ub, batch);
         const auto x_b = gko::batch::batch_entry(x_ub, batch);
-        scale(alpha_b, x_b);
+        scale_kernel(alpha_b, x_b);
     }
 }
 
@@ -92,7 +92,7 @@ void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
         const auto alpha_b = gko::batch::batch_entry(alpha_ub, batch);
         const auto x_b = gko::batch::batch_entry(x_ub, batch);
         const auto y_b = gko::batch::batch_entry(y_ub, batch);
-        add_scaled(alpha_b, x_b, y_b);
+        add_scaled_kernel(alpha_b, x_b, y_b);
     }
 }
 
@@ -115,7 +115,7 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
         const auto res_b = gko::batch::batch_entry(res_ub, batch);
         const auto x_b = gko::batch::batch_entry(x_ub, batch);
         const auto y_b = gko::batch::batch_entry(y_ub, batch);
-        compute_dot_product(x_b, y_b, res_b);
+        compute_dot_product_kernel(x_b, y_b, res_b);
     }
 }
 
@@ -135,7 +135,7 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
          ++batch) {
         const auto res_b = gko::batch::batch_entry(res_ub, batch);
         const auto x_b = gko::batch::batch_entry(x_ub, batch);
-        compute_norm2(x_b, res_b);
+        compute_norm2_kernel(x_b, res_b);
     }
 }
 
@@ -154,7 +154,7 @@ void copy(std::shared_ptr<const DefaultExecutor> exec,
     for (size_type batch = 0; batch < x->get_num_batch_entries(); ++batch) {
         const auto result_b = gko::batch::batch_entry(result_ub, batch);
         const auto x_b = gko::batch::batch_entry(x_ub, batch);
-        copy(x_b, result_b);
+        copy_kernel(x_b, result_b);
     }
 }
 
diff --git a/reference/base/batch_multi_vector_kernels.cpp b/reference/base/batch_multi_vector_kernels.cpp
index 31e10fbe22f..f494a326773 100644
--- a/reference/base/batch_multi_vector_kernels.cpp
+++ b/reference/base/batch_multi_vector_kernels.cpp
@@ -69,7 +69,7 @@ void scale(std::shared_ptr<const DefaultExecutor> exec,
     for (size_type batch = 0; batch < x->get_num_batch_entries(); ++batch) {
         const auto alpha_b = gko::batch::batch_entry(alpha_ub, batch);
         const auto x_b = gko::batch::batch_entry(x_ub, batch);
-        scale(alpha_b, x_b);
+        scale_kernel(alpha_b, x_b);
     }
 }
 
@@ -90,7 +90,7 @@ void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
         const auto alpha_b = gko::batch::batch_entry(alpha_ub, batch);
         const auto x_b = gko::batch::batch_entry(x_ub, batch);
         const auto y_b = gko::batch::batch_entry(y_ub, batch);
-        add_scaled(alpha_b, x_b, y_b);
+        add_scaled_kernel(alpha_b, x_b, y_b);
     }
 }
 
@@ -112,7 +112,7 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
         const auto res_b = gko::batch::batch_entry(res_ub, batch);
         const auto x_b = gko::batch::batch_entry(x_ub, batch);
         const auto y_b = gko::batch::batch_entry(y_ub, batch);
-        compute_dot_product(x_b, y_b, res_b);
+        compute_dot_product_kernel(x_b, y_b, res_b);
     }
 }
 
@@ -131,7 +131,7 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
          ++batch) {
         const auto res_b = gko::batch::batch_entry(res_ub, batch);
         const auto x_b = gko::batch::batch_entry(x_ub, batch);
-        compute_norm2(x_b, res_b);
+        compute_norm2_kernel(x_b, res_b);
     }
 }
 
@@ -149,7 +149,7 @@ void copy(std::shared_ptr<const DefaultExecutor> exec,
     for (size_type batch = 0; batch < x->get_num_batch_entries(); ++batch) {
         const auto result_b = gko::batch::batch_entry(result_ub, batch);
         const auto x_b = gko::batch::batch_entry(x_ub, batch);
-        copy(x_b, result_b);
+        copy_kernel(x_b, result_b);
     }
 }
 
diff --git a/reference/base/batch_multi_vector_kernels.hpp.inc b/reference/base/batch_multi_vector_kernels.hpp.inc
index 3cda19cfc06..a793fe030f9 100644
--- a/reference/base/batch_multi_vector_kernels.hpp.inc
+++ b/reference/base/batch_multi_vector_kernels.hpp.inc
@@ -32,7 +32,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 template <typename ValueType>
-inline void scale(
+inline void scale_kernel(
     const gko::batch_multi_vector::BatchEntry<const ValueType>& alpha,
     const gko::batch_multi_vector::BatchEntry<ValueType>& x)
 {
@@ -53,7 +53,7 @@ inline void scale(
 
 
 template <typename ValueType>
-inline void add_scaled(
+inline void add_scaled_kernel(
     const gko::batch_multi_vector::BatchEntry<const ValueType>& alpha,
     const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
     const gko::batch_multi_vector::BatchEntry<ValueType>& y)
@@ -77,7 +77,26 @@ inline void add_scaled(
 
 
 template <typename ValueType>
-inline void compute_norm2(
+inline void compute_dot_product_kernel(
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& y,
+    const gko::batch_multi_vector::BatchEntry<ValueType>& result)
+{
+    for (int c = 0; c < result.num_rhs; c++) {
+        result.values[c] = gko::zero<ValueType>();
+    }
+
+    for (int r = 0; r < x.num_rows; r++) {
+        for (int c = 0; c < x.num_rhs; c++) {
+            result.values[c] +=
+                conj(x.values[r * x.stride + c]) * y.values[r * y.stride + c];
+        }
+    }
+}
+
+
+template <typename ValueType>
+inline void compute_norm2_kernel(
     const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
     const gko::batch_multi_vector::BatchEntry<gko::remove_complex<ValueType>>&
         result)
@@ -103,8 +122,9 @@ inline void compute_norm2(
  * and stride set.
  */
 template <typename ValueType>
-inline void copy(const gko::batch_multi_vector::BatchEntry<const ValueType>& in,
-                 const gko::batch_multi_vector::BatchEntry<ValueType>& out)
+inline void copy_kernel(
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& in,
+    const gko::batch_multi_vector::BatchEntry<ValueType>& out)
 {
     for (int iz = 0; iz < in.num_rows * in.num_rhs; iz++) {
         const int i = iz / in.num_rhs;
@@ -112,22 +132,3 @@ inline void copy(const gko::batch_multi_vector::BatchEntry<const ValueType>& in,
         out.values[i * out.stride + j] = in.values[i * in.stride + j];
     }
 }
-
-
-template <typename ValueType>
-inline void compute_dot_product(
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& y,
-    const gko::batch_multi_vector::BatchEntry<ValueType>& result)
-{
-    for (int c = 0; c < result.num_rhs; c++) {
-        result.values[c] = gko::zero<ValueType>();
-    }
-
-    for (int r = 0; r < x.num_rows; r++) {
-        for (int c = 0; c < x.num_rhs; c++) {
-            result.values[c] +=
-                conj(x.values[r * x.stride + c]) * y.values[r * y.stride + c];
-        }
-    }
-}
diff --git a/reference/test/base/batch_multi_vector_kernels.cpp b/reference/test/base/batch_multi_vector_kernels.cpp
index f2062a4e393..8ed8f03dc25 100644
--- a/reference/test/base/batch_multi_vector_kernels.cpp
+++ b/reference/test/base/batch_multi_vector_kernels.cpp
@@ -141,102 +141,6 @@ class BatchMultiVector : public ::testing::Test {
 TYPED_TEST_SUITE(BatchMultiVector, gko::test::ValueTypes);
 
 
-TYPED_TEST(BatchMultiVector, AppliesToBatchMultiVector)
-{
-    using T = typename TestFixture::value_type;
-    this->mtx_1->apply(this->mtx_2.get(), this->mtx_3.get());
-    this->mtx_10->apply(this->mtx_20.get(), this->mtx_30.get());
-    this->mtx_11->apply(this->mtx_21.get(), this->mtx_31.get());
-
-
-    auto res = this->mtx_3->unbatch();
-    GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_30.get(), 0.);
-    GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_31.get(), 0.);
-}
-
-
-TYPED_TEST(BatchMultiVector, AppliesLinearCombinationToBatchMultiVector)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using DenseMtx = typename TestFixture::DenseMtx;
-    using T = typename TestFixture::value_type;
-    auto alpha = gko::batch_initialize<Mtx>({{1.5}, {-1.0}}, this->exec);
-    auto beta = gko::batch_initialize<Mtx>({{2.5}, {-4.0}}, this->exec);
-    auto alpha0 = gko::initialize<DenseMtx>({1.5}, this->exec);
-    auto alpha1 = gko::initialize<DenseMtx>({-1.0}, this->exec);
-    auto beta0 = gko::initialize<DenseMtx>({2.5}, this->exec);
-    auto beta1 = gko::initialize<DenseMtx>({-4.0}, this->exec);
-
-    this->mtx_1->apply(alpha.get(), this->mtx_2.get(), beta.get(),
-                       this->mtx_3.get());
-    this->mtx_10->apply(alpha0.get(), this->mtx_20.get(), beta0.get(),
-                        this->mtx_30.get());
-    this->mtx_11->apply(alpha1.get(), this->mtx_21.get(), beta1.get(),
-                        this->mtx_31.get());
-
-    auto res = this->mtx_3->unbatch();
-    GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_30.get(), 0.);
-    GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_31.get(), 0.);
-}
-
-
-TYPED_TEST(BatchMultiVector, ApplyFailsOnWrongInnerDimension)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto res = Mtx::create(
-        this->exec, std::vector<gko::dim<2>>{gko::dim<2>{2}, gko::dim<2>{2}});
-
-    ASSERT_THROW(this->mtx_2->apply(this->mtx_1.get(), res.get()),
-                 gko::DimensionMismatch);
-}
-
-
-TYPED_TEST(BatchMultiVector, ApplyFailsForNonUniformBatches)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto mat1 = gko::batch_initialize<Mtx>(
-        std::vector<gko::size_type>{4, 4},
-        {{I<T>({1.0, -1.0}), I<T>({1.0, -1.0}), I<T>({2.0, -0.5})},
-         {{1.0, 2.5, 3.0}, {1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}},
-        this->exec);
-    auto mat2 = gko::batch_initialize<Mtx>(
-        std::vector<gko::size_type>{4, 4},
-        {{{1.0, -1.0, 2.2}, {-2.0, 2.0, -0.5}},
-         {{1.0, 2.5, -3.0}, {1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}},
-        this->exec);
-    auto res = Mtx::create(
-        this->exec, std::vector<gko::dim<2>>{gko::dim<2>{2}, gko::dim<2>{3}});
-
-    ASSERT_THROW(mat2->apply(mat1.get(), res.get()), gko::NotImplemented);
-}
-
-
-TYPED_TEST(BatchMultiVector, ApplyFailsOnWrongNumberOfRows)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto res = Mtx::create(
-        this->exec, std::vector<gko::dim<2>>{gko::dim<2>{3}, gko::dim<2>{3}});
-
-    ASSERT_THROW(this->mtx_1->apply(this->mtx_2.get(), res.get()),
-                 gko::DimensionMismatch);
-}
-
-
-TYPED_TEST(BatchMultiVector, ApplyFailsOnWrongNumberOfCols)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto res = Mtx::create(
-        this->exec,
-        std::vector<gko::dim<2>>{gko::dim<2>{2, 1}, gko::dim<2>{2, 1}},
-        std::vector<gko::size_type>{3, 3});
-
-
-    ASSERT_THROW(this->mtx_1->apply(this->mtx_2.get(), res.get()),
-                 gko::DimensionMismatch);
-}
-
-
 TYPED_TEST(BatchMultiVector, ScalesData)
 {
     using Mtx = typename TestFixture::Mtx;
@@ -313,71 +217,6 @@ TYPED_TEST(BatchMultiVector, AddsScaled)
 }
 
 
-TYPED_TEST(BatchMultiVector, AddsScale)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto alpha = gko::batch_initialize<Mtx>(
-        {{{2.0, -2.0, 1.5}}, {{2.0, -2.0, 3.0}}}, this->exec);
-    auto beta = gko::batch_initialize<Mtx>(
-        {{{-1.0, 3.0, 0.5}}, {{1.5, 0.5, -4.0}}}, this->exec);
-
-    auto ualpha = alpha->unbatch();
-    auto ubeta = beta->unbatch();
-
-    this->mtx_1->add_scale(alpha.get(), this->mtx_0.get(), beta.get());
-    this->mtx_10->add_scale(ualpha[0].get(), this->mtx_00.get(),
-                            ubeta[0].get());
-    this->mtx_11->add_scale(ualpha[1].get(), this->mtx_01.get(),
-                            ubeta[1].get());
-
-    auto res = this->mtx_1->unbatch();
-    GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.);
-    GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_11.get(), 0.);
-}
-
-
-TYPED_TEST(BatchMultiVector, ConvergenceAddScaled)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto alpha = gko::batch_initialize<Mtx>(
-        {{{2.0, -2.0, 1.5}}, {{2.0, -2.0, 3.0}}}, this->exec);
-
-    auto ualpha = alpha->unbatch();
-
-
-    const int num_rhs = 3;
-    const gko::uint32 converged = 0xfffffffd | (0 - (1 << num_rhs));
-
-    gko::kernels::reference::batch_multi_vector::convergence_add_scaled(
-        this->exec, alpha.get(), this->mtx_0.get(), this->mtx_1.get(),
-        converged);
-
-    auto mtx_10_clone = gko::clone(this->mtx_10);
-    auto mtx_11_clone = gko::clone(this->mtx_11);
-
-    this->mtx_10->add_scaled(ualpha[0].get(), this->mtx_00.get());
-    this->mtx_11->add_scaled(ualpha[1].get(), this->mtx_01.get());
-
-    auto res = this->mtx_1->unbatch();
-
-    EXPECT_EQ(res[0]->at(0, 0), mtx_10_clone->at(0, 0));
-    EXPECT_EQ(res[0]->at(1, 0), mtx_10_clone->at(1, 0));
-    EXPECT_EQ(res[0]->at(0, 1), this->mtx_10->at(0, 1));
-    EXPECT_EQ(res[0]->at(1, 1), this->mtx_10->at(1, 1));
-    EXPECT_EQ(res[0]->at(0, 2), mtx_10_clone->at(0, 2));
-    EXPECT_EQ(res[0]->at(1, 2), mtx_10_clone->at(1, 2));
-
-    EXPECT_EQ(res[1]->at(0, 0), mtx_11_clone->at(0, 0));
-    EXPECT_EQ(res[1]->at(1, 0), mtx_11_clone->at(1, 0));
-    EXPECT_EQ(res[1]->at(0, 1), this->mtx_11->at(0, 1));
-    EXPECT_EQ(res[1]->at(1, 1), this->mtx_11->at(1, 1));
-    EXPECT_EQ(res[1]->at(0, 2), mtx_11_clone->at(0, 2));
-    EXPECT_EQ(res[1]->at(1, 2), mtx_11_clone->at(1, 2));
-}
-
-
 TYPED_TEST(BatchMultiVector, AddsScaledWithScalar)
 {
     using Mtx = typename TestFixture::Mtx;
@@ -396,91 +235,6 @@ TYPED_TEST(BatchMultiVector, AddsScaledWithScalar)
 }
 
 
-TYPED_TEST(BatchMultiVector, AddsScaleWithScalar)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto alpha = gko::batch_initialize<Mtx>({{2.0}, {-2.0}}, this->exec);
-    auto beta = gko::batch_initialize<Mtx>({{-0.5}, {3.0}}, this->exec);
-
-    auto ualpha = alpha->unbatch();
-    auto ubeta = beta->unbatch();
-
-    this->mtx_1->add_scale(alpha.get(), this->mtx_0.get(), beta.get());
-    this->mtx_10->add_scale(ualpha[0].get(), this->mtx_00.get(),
-                            ubeta[0].get());
-    this->mtx_11->add_scale(ualpha[1].get(), this->mtx_01.get(),
-                            ubeta[1].get());
-
-    auto res = this->mtx_1->unbatch();
-    GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.);
-    GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_11.get(), 0.);
-}
-
-
-TYPED_TEST(BatchMultiVector, AddScaleWithScalarViaApply)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto alpha = gko::batch_initialize<Mtx>({{2.0}, {-2.0}}, this->exec);
-    auto beta = gko::batch_initialize<Mtx>({{-0.5}, {3.0}}, this->exec);
-    auto id = gko::matrix::BatchIdentity<T>::create(
-        this->exec, gko::batch_dim<2>(2, gko::dim<2>(3, 3)));
-    auto ualpha = alpha->unbatch();
-    auto ubeta = beta->unbatch();
-
-    this->mtx_0->apply(alpha.get(), id.get(), beta.get(), this->mtx_1.get());
-    this->mtx_10->add_scale(ualpha[0].get(), this->mtx_00.get(),
-                            ubeta[0].get());
-    this->mtx_11->add_scale(ualpha[1].get(), this->mtx_01.get(),
-                            ubeta[1].get());
-
-    auto res = this->mtx_1->unbatch();
-    GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.);
-    GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_11.get(), 0.);
-}
-
-
-TYPED_TEST(BatchMultiVector, ConvergenceAddScaledWithScalar)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto alpha = gko::batch_initialize<Mtx>({{2.0}, {-2.0}}, this->exec);
-
-    auto ualpha = alpha->unbatch();
-
-
-    const int num_rhs = 3;
-    const gko::uint32 converged = 0xfffffffd | (0 - (1 << num_rhs));
-
-    gko::kernels::reference::batch_multi_vector::convergence_add_scaled(
-        this->exec, alpha.get(), this->mtx_0.get(), this->mtx_1.get(),
-        converged);
-
-    auto mtx_10_clone = gko::clone(this->mtx_10);
-    auto mtx_11_clone = gko::clone(this->mtx_11);
-
-    this->mtx_10->add_scaled(ualpha[0].get(), this->mtx_00.get());
-    this->mtx_11->add_scaled(ualpha[1].get(), this->mtx_01.get());
-
-    auto res = this->mtx_1->unbatch();
-
-    EXPECT_EQ(res[0]->at(0, 0), mtx_10_clone->at(0, 0));
-    EXPECT_EQ(res[0]->at(1, 0), mtx_10_clone->at(1, 0));
-    EXPECT_EQ(res[0]->at(0, 1), this->mtx_10->at(0, 1));
-    EXPECT_EQ(res[0]->at(1, 1), this->mtx_10->at(1, 1));
-    EXPECT_EQ(res[0]->at(0, 2), mtx_10_clone->at(0, 2));
-    EXPECT_EQ(res[0]->at(1, 2), mtx_10_clone->at(1, 2));
-
-    EXPECT_EQ(res[1]->at(0, 0), mtx_11_clone->at(0, 0));
-    EXPECT_EQ(res[1]->at(1, 0), mtx_11_clone->at(1, 0));
-    EXPECT_EQ(res[1]->at(0, 1), this->mtx_11->at(0, 1));
-    EXPECT_EQ(res[1]->at(1, 1), this->mtx_11->at(1, 1));
-    EXPECT_EQ(res[1]->at(0, 2), mtx_11_clone->at(0, 2));
-    EXPECT_EQ(res[1]->at(1, 2), mtx_11_clone->at(1, 2));
-}
-
-
 TYPED_TEST(BatchMultiVector, AddScaledFailsOnWrongSizes)
 {
     using Mtx = typename TestFixture::Mtx;
@@ -492,18 +246,6 @@ TYPED_TEST(BatchMultiVector, AddScaledFailsOnWrongSizes)
 }
 
 
-TYPED_TEST(BatchMultiVector, AddScaleFailsOnWrongSizes)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto alpha = gko::batch_initialize<Mtx>({{2.0}, {-2.0}}, this->exec);
-    auto beta = gko::batch_initialize<Mtx>({{2.0}, {3.0}}, this->exec);
-
-    ASSERT_THROW(
-        this->mtx_1->add_scale(alpha.get(), this->mtx_2.get(), beta.get()),
-        gko::DimensionMismatch);
-}
-
-
 TYPED_TEST(BatchMultiVector, AddScaleFailsOnWrongScalarSizes)
 {
     using Mtx = typename TestFixture::Mtx;
@@ -536,72 +278,35 @@ TYPED_TEST(BatchMultiVector, ComputesDot)
 }
 
 
-TYPED_TEST(BatchMultiVector, ConvergenceComputeDot)
+TYPED_TEST(BatchMultiVector, ComputDotFailsOnWrongInputSize)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
     auto result =
-        Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 3}));
-
-    for (int ibatch = 0; ibatch < result->get_size().get_batch_sizes().size();
-         ibatch++) {
-        for (int icol = 0; icol < result->get_size().at()[1]; icol++) {
-            result->at(ibatch, 0, icol) = gko::zero<T>();
-        }
-    }
-
-    auto ures = result->unbatch();
-
-    const int num_rhs = 3;
-    const gko::uint32 converged = 0xfffffffd | (0 - (1 << num_rhs));
-
-    gko::kernels::reference::batch_multi_vector::convergence_compute_dot(
-        this->exec, this->mtx_0.get(), this->mtx_1.get(), result.get(),
-        converged);
-
-    auto ures_00_clone = gko::clone(ures[0]);
-    auto ures_01_clone = gko::clone(ures[1]);
-
-    this->mtx_00->compute_dot(this->mtx_10.get(), ures[0].get());
-    this->mtx_01->compute_dot(this->mtx_11.get(), ures[1].get());
-
-    auto res = result->unbatch();
-
-    EXPECT_EQ(res[0]->at(0, 0), ures_00_clone->at(0, 0));
-    EXPECT_EQ(res[0]->at(0, 1), ures[0]->at(0, 1));
-    EXPECT_EQ(res[0]->at(0, 2), ures_00_clone->at(0, 2));
+        Mtx::create(this->exec, gko::batch_dim<2>(std::vector<gko::dim<2>>{
+                                    gko::dim<2>{1, 2}, gko::dim<2>{1, 3}}));
 
-    EXPECT_EQ(res[1]->at(0, 0), ures_01_clone->at(0, 0));
-    EXPECT_EQ(res[1]->at(0, 1), ures[1]->at(0, 1));
-    EXPECT_EQ(res[1]->at(0, 2), ures_01_clone->at(0, 2));
+    ASSERT_THROW(this->mtx_1->compute_dot(this->mtx_2.get(), result.get()),
+                 gko::DimensionMismatch);
 }
 
 
-TYPED_TEST(BatchMultiVector, ComputesNorm2)
+TYPED_TEST(BatchMultiVector, ComputDotFailsOnWrongResultSize)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    using T_nc = gko::remove_complex<T>;
-    using NormVector = gko::BatchMultiVector<T_nc>;
-    auto mtx(gko::batch_initialize<Mtx>(
-        {{I<T>{1.0, 0.0}, I<T>{2.0, 3.0}, I<T>{2.0, 4.0}},
-         {I<T>{-4.0, 2.0}, I<T>{-3.0, -2.0}, I<T>{0.0, 1.0}}},
-        this->exec));
-    auto batch_size = gko::batch_dim<2>(
-        std::vector<gko::dim<2>>{gko::dim<2>{1, 2}, gko::dim<2>{1, 2}});
     auto result =
-        NormVector::create(this->exec, batch_size, gko::batch_stride(2, 2));
-
-    mtx->compute_norm2(result.get());
+        Mtx::create(this->exec, gko::batch_dim<2>(std::vector<gko::dim<2>>{
+                                    gko::dim<2>{1, 2}, gko::dim<2>{1, 2}}));
+    auto result2 =
+        Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 2}));
 
-    EXPECT_EQ(result->at(0, 0, 0), T_nc{3.0});
-    EXPECT_EQ(result->at(0, 0, 1), T_nc{5.0});
-    EXPECT_EQ(result->at(1, 0, 0), T_nc{5.0});
-    EXPECT_EQ(result->at(1, 0, 1), T_nc{3.0});
+    ASSERT_THROW(this->mtx_0->compute_dot(this->mtx_1.get(), result.get()),
+                 gko::DimensionMismatch);
+    ASSERT_THROW(this->mtx_0->compute_dot(this->mtx_1.get(), result2.get()),
+                 gko::DimensionMismatch);
 }
 
 
-TYPED_TEST(BatchMultiVector, ConvergenceComputeNorm2)
+TYPED_TEST(BatchMultiVector, ComputesNorm2)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -616,57 +321,15 @@ TYPED_TEST(BatchMultiVector, ConvergenceComputeNorm2)
     auto result =
         NormVector::create(this->exec, batch_size, gko::batch_stride(2, 2));
 
-    for (int ibatch = 0; ibatch < result->get_size().get_batch_sizes().size();
-         ibatch++) {
-        for (int icol = 0; icol < result->get_size().at()[1]; icol++) {
-            result->at(ibatch, 0, icol) = gko::zero<T_nc>();
-        }
-    }
-
-    auto result_clone = gko::clone(result);
-
-    const int num_rhs = 2;
-    const gko::uint32 converged = 0xfffffffd | (0 - (1 << num_rhs));
-
-    gko::kernels::reference::batch_multi_vector::convergence_compute_norm2(
-        this->exec, mtx.get(), result.get(), converged);
+    mtx->compute_norm2(result.get());
 
-    EXPECT_EQ(result->at(0, 0, 0), result_clone->at(0, 0, 0));
+    EXPECT_EQ(result->at(0, 0, 0), T_nc{3.0});
     EXPECT_EQ(result->at(0, 0, 1), T_nc{5.0});
-
-    EXPECT_EQ(result->at(1, 0, 0), result_clone->at(1, 0, 0));
+    EXPECT_EQ(result->at(1, 0, 0), T_nc{5.0});
     EXPECT_EQ(result->at(1, 0, 1), T_nc{3.0});
 }
 
 
-TYPED_TEST(BatchMultiVector, ComputDotFailsOnWrongInputSize)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto result =
-        Mtx::create(this->exec, gko::batch_dim<2>(std::vector<gko::dim<2>>{
-                                    gko::dim<2>{1, 2}, gko::dim<2>{1, 3}}));
-
-    ASSERT_THROW(this->mtx_1->compute_dot(this->mtx_2.get(), result.get()),
-                 gko::DimensionMismatch);
-}
-
-
-TYPED_TEST(BatchMultiVector, ComputDotFailsOnWrongResultSize)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto result =
-        Mtx::create(this->exec, gko::batch_dim<2>(std::vector<gko::dim<2>>{
-                                    gko::dim<2>{1, 2}, gko::dim<2>{1, 2}}));
-    auto result2 =
-        Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 2}));
-
-    ASSERT_THROW(this->mtx_0->compute_dot(this->mtx_1.get(), result.get()),
-                 gko::DimensionMismatch);
-    ASSERT_THROW(this->mtx_0->compute_dot(this->mtx_1.get(), result2.get()),
-                 gko::DimensionMismatch);
-}
-
-
 TYPED_TEST(BatchMultiVector, CopiesData)
 {
     gko::kernels::reference::batch_multi_vector::copy(
@@ -676,71 +339,6 @@ TYPED_TEST(BatchMultiVector, CopiesData)
 }
 
 
-TYPED_TEST(BatchMultiVector, ConvergenceCopyData)
-{
-    auto umtx_0 = this->mtx_0->unbatch();
-
-    const int num_rhs = 3;
-    const gko::uint32 converged = 0xfffffffd | (0 - (1 << num_rhs));
-    gko::kernels::reference::batch_multi_vector::convergence_copy(
-        this->exec, this->mtx_0.get(), this->mtx_1.get(), converged);
-
-    auto mtx_10_clone = gko::clone(this->mtx_10);
-    auto mtx_11_clone = gko::clone(this->mtx_11);
-
-    auto res = this->mtx_1->unbatch();
-
-    EXPECT_EQ(res[0]->at(0, 0), mtx_10_clone->at(0, 0));
-    EXPECT_EQ(res[0]->at(1, 0), mtx_10_clone->at(1, 0));
-    EXPECT_EQ(res[0]->at(0, 1), this->mtx_0->at(0, 0, 1));
-    EXPECT_EQ(res[0]->at(1, 1), this->mtx_0->at(0, 1, 1));
-    EXPECT_EQ(res[0]->at(0, 2), mtx_10_clone->at(0, 2));
-    EXPECT_EQ(res[0]->at(1, 2), mtx_10_clone->at(1, 2));
-
-    EXPECT_EQ(res[1]->at(0, 0), mtx_11_clone->at(0, 0));
-    EXPECT_EQ(res[1]->at(1, 0), mtx_11_clone->at(1, 0));
-    EXPECT_EQ(res[1]->at(0, 1), this->mtx_0->at(1, 0, 1));
-    EXPECT_EQ(res[1]->at(1, 1), this->mtx_0->at(1, 1, 1));
-    EXPECT_EQ(res[1]->at(0, 2), mtx_11_clone->at(0, 2));
-    EXPECT_EQ(res[1]->at(1, 2), mtx_11_clone->at(1, 2));
-}
-
-
-TYPED_TEST(BatchMultiVector, BatchScale)
-{
-    using T = typename TestFixture::value_type;
-    using Mtx = typename TestFixture::Mtx;
-    using BDiag = gko::matrix::BatchDiagonal<T>;
-
-    auto mtx(gko::batch_initialize<Mtx>(
-        {{I<T>{1.0, 0.0}, I<T>{2.0, 3.0}, I<T>{2.0, 4.0}},
-         {I<T>{-4.0, 2.0}, I<T>{-3.0, -2.0}, I<T>{0.0, 1.0}}},
-        this->exec));
-
-    auto left(gko::batch_diagonal_initialize(
-        I<I<T>>{I<T>{1.0, 2.0, 3.0}, I<T>{-1.0, -2.0, -3.0}}, this->exec));
-    auto rght(gko::batch_diagonal_initialize(
-        I<I<T>>{I<T>{-0.5, -2.0}, I<T>{2.0, 0.25}}, this->exec));
-
-    gko::kernels::reference::batch_multi_vector::batch_scale(
-        this->exec, left.get(), rght.get(), mtx.get());
-
-    EXPECT_EQ(mtx->at(0, 0, 0), T{-0.5});
-    EXPECT_EQ(mtx->at(0, 1, 0), T{-2.0});
-    EXPECT_EQ(mtx->at(0, 2, 0), T{-3.0});
-    EXPECT_EQ(mtx->at(0, 0, 1), T{0.0});
-    EXPECT_EQ(mtx->at(0, 1, 1), T{-12.0});
-    EXPECT_EQ(mtx->at(0, 2, 1), T{-24.0});
-
-    EXPECT_EQ(mtx->at(1, 0, 0), T{8.0});
-    EXPECT_EQ(mtx->at(1, 1, 0), T{12.0});
-    EXPECT_EQ(mtx->at(1, 2, 0), T{0.0});
-    EXPECT_EQ(mtx->at(1, 0, 1), T{-0.5});
-    EXPECT_EQ(mtx->at(1, 1, 1), T{1.0});
-    EXPECT_EQ(mtx->at(1, 2, 1), T{-0.75});
-}
-
-
 TYPED_TEST(BatchMultiVector, ConvertsToPrecision)
 {
     using BatchMultiVector = typename TestFixture::Mtx;
@@ -787,80 +385,6 @@ TYPED_TEST(BatchMultiVector, MovesToPrecision)
 }
 
 
-TYPED_TEST(BatchMultiVector, ConvertsToCsr32)
-{
-    using T = typename TestFixture::value_type;
-    using BatchCsr = typename gko::matrix::BatchCsr<T, gko::int32>;
-    auto batch_csr_mtx = BatchCsr::create(this->mtx_6->get_executor());
-
-    this->mtx_6->convert_to(batch_csr_mtx.get());
-
-    auto v = batch_csr_mtx->get_const_values();
-    auto c = batch_csr_mtx->get_const_col_idxs();
-    auto r = batch_csr_mtx->get_const_row_ptrs();
-    ASSERT_EQ(batch_csr_mtx->get_num_batch_entries(), 2);
-    ASSERT_EQ(batch_csr_mtx->get_size().at(0), gko::dim<2>(3, 3));
-    ASSERT_EQ(batch_csr_mtx->get_size().at(1), gko::dim<2>(3, 3));
-    ASSERT_EQ(batch_csr_mtx->get_num_stored_elements(), 10);
-    EXPECT_EQ(r[0], 0);
-    EXPECT_EQ(r[1], 2);
-    EXPECT_EQ(r[2], 3);
-    EXPECT_EQ(r[3], 5);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 2);
-    EXPECT_EQ(c[2], 1);
-    EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(c[4], 2);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{3.0});
-    EXPECT_EQ(v[2], T{3.0});
-    EXPECT_EQ(v[3], T{1.0});
-    EXPECT_EQ(v[4], T{5.0});
-    EXPECT_EQ(v[5], T{2.0});
-    EXPECT_EQ(v[6], T{5.0});
-    EXPECT_EQ(v[7], T{1.0});
-    EXPECT_EQ(v[8], T{-1.0});
-    EXPECT_EQ(v[9], T{8.0});
-}
-
-
-TYPED_TEST(BatchMultiVector, MovesToCsr32)
-{
-    using T = typename TestFixture::value_type;
-    using BatchCsr = typename gko::matrix::BatchCsr<T, gko::int32>;
-    auto batch_csr_mtx = BatchCsr::create(this->mtx_6->get_executor());
-
-    this->mtx_6->move_to(batch_csr_mtx.get());
-
-    auto v = batch_csr_mtx->get_const_values();
-    auto c = batch_csr_mtx->get_const_col_idxs();
-    auto r = batch_csr_mtx->get_const_row_ptrs();
-    ASSERT_EQ(batch_csr_mtx->get_num_batch_entries(), 2);
-    ASSERT_EQ(batch_csr_mtx->get_size().at(0), gko::dim<2>(3, 3));
-    ASSERT_EQ(batch_csr_mtx->get_size().at(1), gko::dim<2>(3, 3));
-    ASSERT_EQ(batch_csr_mtx->get_num_stored_elements(), 10);
-    EXPECT_EQ(r[0], 0);
-    EXPECT_EQ(r[1], 2);
-    EXPECT_EQ(r[2], 3);
-    EXPECT_EQ(r[3], 5);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 2);
-    EXPECT_EQ(c[2], 1);
-    EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(c[4], 2);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{3.0});
-    EXPECT_EQ(v[2], T{3.0});
-    EXPECT_EQ(v[3], T{1.0});
-    EXPECT_EQ(v[4], T{5.0});
-    EXPECT_EQ(v[5], T{2.0});
-    EXPECT_EQ(v[6], T{5.0});
-    EXPECT_EQ(v[7], T{1.0});
-    EXPECT_EQ(v[8], T{-1.0});
-    EXPECT_EQ(v[9], T{8.0});
-}
-
-
 TYPED_TEST(BatchMultiVector, ConvertsEmptyToPrecision)
 {
     using BatchMultiVector = typename TestFixture::Mtx;
@@ -891,133 +415,4 @@ TYPED_TEST(BatchMultiVector, MovesEmptyToPrecision)
 }
 
 
-TYPED_TEST(BatchMultiVector, ConvertsEmptyMatrixToCsr)
-{
-    using BatchMultiVector = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    using BatchCsr = typename gko::matrix::BatchCsr<T, gko::int32>;
-    auto empty = BatchMultiVector::create(this->exec);
-    auto res = BatchCsr::create(this->exec);
-
-    empty->convert_to(res.get());
-
-    ASSERT_EQ(res->get_num_stored_elements(), 0);
-    ASSERT_EQ(*res->get_const_row_ptrs(), 0);
-    ASSERT_FALSE(res->get_num_batch_entries());
-}
-
-
-TYPED_TEST(BatchMultiVector, MovesEmptyMatrixToCsr)
-{
-    using BatchMultiVector = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    using BatchCsr = typename gko::matrix::BatchCsr<T, gko::int32>;
-    auto empty = BatchMultiVector::create(this->exec);
-    auto res = BatchCsr::create(this->exec);
-
-    empty->move_to(res.get());
-
-    ASSERT_EQ(res->get_num_stored_elements(), 0);
-    ASSERT_EQ(*res->get_const_row_ptrs(), 0);
-    ASSERT_FALSE(res->get_num_batch_entries());
-}
-
-
-TYPED_TEST(BatchMultiVector, ConvertsToBatchDiagonal)
-{
-    using BDense = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    using BDiag = gko::matrix::BatchDiagonal<T>;
-    auto vec = gko::batch_initialize<BDense>(
-        {I<T>({2.0, 3.0, -1.0}), I<T>({1.0, -2.0, 8.0})}, this->exec);
-    auto diag = BDiag::create(this->exec);
-
-    vec->convert_to(diag.get());
-
-    auto check_sz = gko::batch_dim<2>{2, gko::dim<2>{3}};
-    ASSERT_EQ(diag->get_size(), check_sz);
-    auto diag_vals = diag->get_const_values();
-    ASSERT_EQ(diag_vals[0], T{2.0});
-    ASSERT_EQ(diag_vals[1], T{3.0});
-    ASSERT_EQ(diag_vals[2], T{-1.0});
-    ASSERT_EQ(diag_vals[3], T{1.0});
-    ASSERT_EQ(diag_vals[4], T{-2.0});
-    ASSERT_EQ(diag_vals[5], T{8.0});
-}
-
-
-TYPED_TEST(BatchMultiVector, MovesToBatchDiagonal)
-{
-    using BDense = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    using BDiag = gko::matrix::BatchDiagonal<T>;
-    auto vec = gko::batch_initialize<BDense>(
-        {I<T>({2.0, 3.0, -1.0}), I<T>({1.0, -2.0, 8.0})}, this->exec);
-    auto vec_ptr = vec->get_const_values();
-    auto diag = BDiag::create(this->exec);
-
-    vec->move_to(diag.get());
-
-    auto check_sz = gko::batch_dim<2>{2, gko::dim<2>{3}};
-    ASSERT_EQ(diag->get_size(), check_sz);
-    auto diag_vals = diag->get_const_values();
-    ASSERT_EQ(diag_vals, vec_ptr);
-    ASSERT_NE(diag_vals, vec->get_const_values());
-    ASSERT_EQ(vec->get_num_batch_entries(), 0);
-}
-
-
-TYPED_TEST(BatchMultiVector, SquareMatrixIsTransposable)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto trans = this->mtx_4->transpose();
-    auto trans_as_batch_multi_vector = static_cast<Mtx*>(trans.get());
-
-    auto utb = trans_as_batch_multi_vector->unbatch();
-    GKO_ASSERT_MTX_NEAR(utb[0].get(),
-                        l({{1.0, 6.0, 6.0}, {1.5, 1.0, 1.0}, {3.0, 5.0, 5.5}}),
-                        r<TypeParam>::value);
-    GKO_ASSERT_MTX_NEAR(
-        utb[1].get(), l({{2.0, 4.0, -1.25}, {-2.0, 3.0, 3.0}, {1.5, 2.2, 0.5}}),
-        r<TypeParam>::value);
-}
-
-
-TYPED_TEST(BatchMultiVector, NonSquareMatrixIsTransposable)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto trans = this->mtx_5->transpose();
-    auto trans_as_batch_multi_vector = static_cast<Mtx*>(trans.get());
-
-    auto utb = trans_as_batch_multi_vector->unbatch();
-    GKO_ASSERT_MTX_NEAR(utb[0].get(), l({{1.0, 6.0, 7.0}, {1.5, 1.0, -4.5}}),
-                        r<TypeParam>::value);
-    GKO_ASSERT_MTX_NEAR(utb[1].get(), l({{2.0, 1.0, 4.0}, {-2.0, 3.0, 3.0}}),
-                        r<TypeParam>::value);
-}
-
-
-TYPED_TEST(BatchMultiVector, SquareMatrixAddScaledIdentity)
-{
-    using T = typename TestFixture::value_type;
-    using Mtx = typename TestFixture::Mtx;
-    auto mtx = gko::batch_initialize<Mtx>(
-        {{I<T>({1.0, -1.0, 1.5}), I<T>({-2.0, 0.0, 3.0}),
-          I<T>({1.2, -0.5, 1.0})},
-         {{1.0, -2.0, -0.5}, {1.0, -2.5, 4.0}, {3.0, 0.0, -1.5}}},
-        this->exec);
-    auto alpha = gko::batch_initialize<Mtx>({{2.0}, {-2.0}}, this->exec);
-    auto beta = gko::batch_initialize<Mtx>({{3.0}, {-1.0}}, this->exec);
-    auto sol_mtx = gko::batch_initialize<Mtx>(
-        {{I<T>({5.0, -3.0, 4.5}), I<T>({-6.0, 2.0, 9.0}),
-          I<T>({3.6, -1.5, 5.0})},
-         {{-3.0, 2.0, 0.5}, {-1.0, 0.5, -4.0}, {-3.0, 0.0, -0.5}}},
-        this->exec);
-
-    mtx->add_scaled_identity(alpha.get(), beta.get());
-
-    GKO_ASSERT_BATCH_MTX_NEAR(mtx, sol_mtx, r<T>::value);
-}
-
-
 }  // namespace

From 80cbcbd4ab207475d475783f8e5f2e8734af2bfa Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Fri, 14 Jul 2023 13:55:42 +0200
Subject: [PATCH 117/583] Core and interface updates

---
 core/base/batch_multi_vector.cpp              | 50 ++++++++++++-------
 .../ginkgo/core/base/batch_multi_vector.hpp   | 27 ++++++----
 2 files changed, 48 insertions(+), 29 deletions(-)

diff --git a/core/base/batch_multi_vector.cpp b/core/base/batch_multi_vector.cpp
index 3a3f0aff757..0a3612ab205 100644
--- a/core/base/batch_multi_vector.cpp
+++ b/core/base/batch_multi_vector.cpp
@@ -42,6 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/executor.hpp>
 #include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/matrix_data.hpp>
 #include <ginkgo/core/base/utils.hpp>
 
 
@@ -49,7 +50,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 namespace gko {
-namespace matrix {
 namespace batch_multi_vector {
 
 
@@ -67,12 +67,14 @@ template <typename ValueType>
 void BatchMultiVector<ValueType>::scale_impl(
     const BatchMultiVector<ValueType>* alpha)
 {
-    GKO_ASSERT_BATCH_EQUAL_ROWS(
-        alpha, batch_dim<2>(this->get_num_batch_entries(), dim<2>(1, 1)));
+    GKO_ASSERT_EQ(alpha->get_num_batch_entries(),
+                  this->get_num_batch_entries());
+    GKO_ASSERT_EQUAL_ROWS(alpha->get_common_size(), dim<2>(1, 1));
     for (size_type b = 0; b < alpha->get_num_batch_entries(); ++b) {
         if (alpha->get_common_size()[1] != 1) {
             // different alpha for each column
-            GKO_ASSERT_BATCH_EQUAL_COLS(this, alpha);
+            GKO_ASSERT_EQUAL_COLS(this->get_common_size(),
+                                  alpha->get_common_size());
         }
     }
     this->get_executor()->run(batch_multi_vector::make_scale(alpha, this));
@@ -84,15 +86,18 @@ void BatchMultiVector<ValueType>::add_scaled_impl(
     const BatchMultiVector<ValueType>* alpha,
     const BatchMultiVector<ValueType>* b)
 {
-    GKO_ASSERT_BATCH_EQUAL_ROWS(
-        alpha, batch_dim<2>(this->get_num_batch_entries(), dim<2>(1, 1)));
+    GKO_ASSERT_EQ(alpha->get_num_batch_entries(),
+                  this->get_num_batch_entries());
+    GKO_ASSERT_EQUAL_ROWS(alpha->get_common_size(), dim<2>(1, 1));
     for (size_type b = 0; b < alpha->get_num_batch_entries(); ++b) {
         if (alpha->get_common_size()[1] != 1) {
             // different alpha for each column
-            GKO_ASSERT_BATCH_EQUAL_COLS(this, alpha);
+            GKO_ASSERT_EQUAL_COLS(this->get_common_size(),
+                                  alpha->get_common_size());
         }
     }
-    GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(this, b);
+    GKO_ASSERT_EQ(b->get_num_batch_entries(), this->get_num_batch_entries());
+    GKO_ASSERT_EQUAL_DIMENSIONS(this->get_common_size(), b->get_common_size());
 
     this->get_executor()->run(
         batch_multi_vector::make_add_scaled(alpha, b, this));
@@ -101,7 +106,8 @@ void BatchMultiVector<ValueType>::add_scaled_impl(
 
 inline const batch_dim<2> get_col_sizes(const batch_dim<2>& sizes)
 {
-    return batch_dim<2>(sizes.get_num_batch_entries(), dim<2>(1, sizes[1]));
+    return batch_dim<2>(sizes.get_num_batch_entries(),
+                        dim<2>(1, sizes.get_common_size()[1]));
 }
 
 
@@ -110,9 +116,13 @@ void BatchMultiVector<ValueType>::compute_dot_impl(
     const BatchMultiVector<ValueType>* b,
     BatchMultiVector<ValueType>* result) const
 {
-    GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(this, b);
-    GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(batch_result,
-                                      get_col_sizes(this->get_size()));
+    GKO_ASSERT_EQ(b->get_num_batch_entries(), this->get_num_batch_entries());
+    GKO_ASSERT_EQUAL_DIMENSIONS(this->get_common_size(), b->get_common_size());
+    GKO_ASSERT_EQ(this->get_num_batch_entries(),
+                  result->get_num_batch_entries());
+    GKO_ASSERT_EQUAL_DIMENSIONS(
+        result->get_common_size(),
+        get_col_sizes(this->get_size()).get_common_size());
     this->get_executor()->run(
         batch_multi_vector::make_compute_dot(this, b, result));
 }
@@ -122,7 +132,11 @@ template <typename ValueType>
 void BatchMultiVector<ValueType>::compute_norm2_impl(
     BatchMultiVector<remove_complex<ValueType>>* result) const
 {
-    GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(result, get_col_sizes(this->get_size()));
+    GKO_ASSERT_EQ(this->get_num_batch_entries(),
+                  result->get_num_batch_entries());
+    GKO_ASSERT_EQUAL_DIMENSIONS(
+        result->get_common_size(),
+        get_col_sizes(this->get_size()).get_common_size());
     this->get_executor()->run(batch_multi_vector::make_compute_norm2(
         as<BatchMultiVector<ValueType>>(this), result));
 }
@@ -152,8 +166,8 @@ inline void read_impl(MatrixType* mtx, const std::vector<MatrixData>& data)
     auto batch_size = batch_dim<2>(data.size(), common_size);
     size_type ind = 0;
     for (const auto& b : data) {
-        b_size = b.size;
-        GKO_ASSERT_EQ(common_size, b_size);
+        auto b_size = b.size;
+        GKO_ASSERT_EQUAL_DIMENSIONS(common_size, b_size);
     }
     auto tmp =
         MatrixType::create(mtx->get_executor()->get_master(), batch_size);
@@ -194,7 +208,8 @@ void BatchMultiVector<ValueType>::read(const std::vector<mat_data32>& data)
 template <typename MatrixType, typename MatrixData>
 inline void write_impl(const MatrixType* mtx, std::vector<MatrixData>& data)
 {
-    std::unique_ptr<const BatchMultiVector<ValueType>> op{};
+    std::unique_ptr<const BatchMultiVector<typename MatrixData::value_type>>
+        op{};
     const MatrixType* tmp{};
     if (mtx->get_executor()->get_master() != mtx->get_executor()) {
         op = mtx->clone(mtx->get_executor()->get_master());
@@ -238,7 +253,4 @@ void BatchMultiVector<ValueType>::write(std::vector<mat_data32>& data) const
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_MATRIX);
 
 
-}  // namespace matrix
-
-
 }  // namespace gko
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index 1050ec28224..d1a0c01ddb9 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -68,7 +68,8 @@ namespace gko {
  */
 template <typename ValueType = default_precision>
 class BatchMultiVector
-    : public EnableAbstractPolymorphicObject<BatchMultiVector<ValueType>>,
+    : public EnablePolymorphicObject<BatchMultiVector<ValueType>>,
+      public EnablePolymorphicAssignment<BatchMultiVector<ValueType>>,
       public EnableCreateMethod<BatchMultiVector<ValueType>>,
       public ConvertibleTo<BatchMultiVector<next_precision<ValueType>>>,
       public BatchReadableFromMatrixData<ValueType, int32>,
@@ -76,11 +77,15 @@ class BatchMultiVector
       public BatchWritableToMatrixData<ValueType, int32>,
       public BatchWritableToMatrixData<ValueType, int64> {
     friend class EnableCreateMethod<BatchMultiVector>;
+    friend class EnablePolymorphicObject<BatchMultiVector>;
     friend class BatchMultiVector<to_complex<ValueType>>;
 
 public:
     using BatchReadableFromMatrixData<ValueType, int32>::read;
     using BatchReadableFromMatrixData<ValueType, int64>::read;
+    using EnablePolymorphicObject<BatchMultiVector>::EnablePolymorphicObject;
+    using EnablePolymorphicAssignment<BatchMultiVector>::convert_to;
+    using EnablePolymorphicAssignment<BatchMultiVector>::move_to;
 
     using value_type = ValueType;
     using index_type = int32;
@@ -354,7 +359,8 @@ class BatchMultiVector
      *                (the number of columns in the vector must match the number
      *                of columns of this)
      */
-    void compute_norm2(BatchMultiVector<ValueType>* result) const
+    void compute_norm2(
+        BatchMultiVector<remove_complex<ValueType>>* result) const
     {
         auto exec = this->get_executor();
         this->compute_norm2_impl(make_temporary_clone(exec, result).get());
@@ -395,10 +401,10 @@ class BatchMultiVector
     BatchMultiVector& operator=(BatchMultiVector&& other)
     {
         if (this != &other) {
-            EnableAbstractPolymorphicObject<BatchMultiVector>::operator=(
+            EnablePolymorphicObject<BatchMultiVector>::operator=(
                 std::move(other));
             this->set_size(other.get_size());
-            other.set_size({});
+            other.set_size(batch_dim<2>{});
         }
         return *this;
     }
@@ -414,7 +420,7 @@ class BatchMultiVector
      * input, which will have size 0x0 and unchanged executor afterwards.
      */
     BatchMultiVector(BatchMultiVector&& other)
-        : EnableAbstractPolymorphicObject<BatchMultiVector>(std::move(other)),
+        : EnablePolymorphicObject<BatchMultiVector>(std::move(other)),
           batch_size_{std::exchange(other.batch_size_, batch_dim<2>{})}
     {}
 
@@ -424,7 +430,7 @@ class BatchMultiVector
     {
         auto common_size = matrices[0]->get_size();
         for (int i = 1; i < matrices.size(); ++i) {
-            GKO_ASSERT_EQ(common_size, matrices[i]->get_size());
+            GKO_ASSERT_EQUAL_DIMENSIONS(common_size, matrices[i]->get_size());
         }
         return batch_dim<2>{matrices.size(), common_size};
     }
@@ -450,7 +456,7 @@ class BatchMultiVector
      */
     BatchMultiVector(std::shared_ptr<const Executor> exec,
                      const batch_dim<2>& size = batch_dim<2>{})
-        : EnableAbstractPolymorphicObject<BatchMultiVector>(exec),
+        : EnablePolymorphicObject<BatchMultiVector>(exec),
           batch_size_(size),
           values_(exec, compute_num_elems(size))
     {}
@@ -472,7 +478,7 @@ class BatchMultiVector
     template <typename ValuesArray>
     BatchMultiVector(std::shared_ptr<const Executor> exec,
                      const batch_dim<2>& size, ValuesArray&& values)
-        : EnableAbstractPolymorphicObject<BatchMultiVector>(exec),
+        : EnablePolymorphicObject<BatchMultiVector>(exec),
           batch_size_(size),
           values_{exec, std::forward<ValuesArray>(values)}
     {
@@ -489,7 +495,7 @@ class BatchMultiVector
      */
     BatchMultiVector(std::shared_ptr<const Executor> exec,
                      const std::vector<matrix::Dense<ValueType>*>& matrices)
-        : EnableAbstractPolymorphicObject<BatchMultiVector>(exec),
+        : EnablePolymorphicObject<BatchMultiVector>(exec),
           batch_size_{compute_batch_size(matrices)},
           values_(exec, compute_num_elems(batch_size_))
     {
@@ -594,7 +600,8 @@ class BatchMultiVector
      * @note  Other implementations of batch_multi_vector should override this
      * function instead of compute_norm2(BatchMultiVector *result).
      */
-    virtual void compute_norm2_impl(BatchMultiVector<ValueType>* result) const;
+    virtual void compute_norm2_impl(
+        BatchMultiVector<remove_complex<ValueType>>* result) const;
 
     size_type linearize_index(size_type batch, size_type row,
                               size_type col) const noexcept

From 3fdf20b094645c02ce987560129ad6869c9a2a2d Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Fri, 14 Jul 2023 16:18:42 +0200
Subject: [PATCH 118/583] Core test fixes

---
 core/test/base/batch_multi_vector.cpp         | 173 ++++--------------
 .../ginkgo/core/base/batch_multi_vector.hpp   |   4 +-
 test/base/batch_multi_vector_kernels.cpp      | 154 +---------------
 3 files changed, 49 insertions(+), 282 deletions(-)

diff --git a/core/test/base/batch_multi_vector.cpp b/core/test/base/batch_multi_vector.cpp
index e43be1e7b86..60f5fc071ec 100644
--- a/core/test/base/batch_multi_vector.cpp
+++ b/core/test/base/batch_multi_vector.cpp
@@ -53,7 +53,6 @@ class BatchMultiVector : public ::testing::Test {
     BatchMultiVector()
         : exec(gko::ReferenceExecutor::create()),
           mtx(gko::batch_initialize<gko::BatchMultiVector<value_type>>(
-              std::vector<size_type>{4, 3},
               {{{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
                {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}},
               exec))
@@ -64,13 +63,7 @@ class BatchMultiVector : public ::testing::Test {
         gko::BatchMultiVector<value_type>* m)
     {
         ASSERT_EQ(m->get_num_batch_entries(), 2);
-        ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3));
-        ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3));
-        ASSERT_EQ(m->get_stride().at(0), 4);
-        ASSERT_EQ(m->get_stride().at(1), 3);
-        ASSERT_EQ(m->get_num_stored_elements(), (2 * 4) + (2 * 3));
-        ASSERT_EQ(m->get_num_stored_elements(0), 2 * 4);
-        ASSERT_EQ(m->get_num_stored_elements(1), 2 * 3);
+        ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 3));
         EXPECT_EQ(m->at(0, 0, 0), value_type{-1.0});
         EXPECT_EQ(m->at(0, 0, 1), value_type{2.0});
         EXPECT_EQ(m->at(0, 0, 2), value_type{3.0});
@@ -88,7 +81,7 @@ class BatchMultiVector : public ::testing::Test {
     static void assert_empty(gko::BatchMultiVector<value_type>* m)
     {
         ASSERT_EQ(m->get_num_batch_entries(), 0);
-        ASSERT_EQ(m->get_num_stored_elements(), 0);
+        ASSERT_EQ(m->get_common_size(), {});
     }
 
     std::shared_ptr<const gko::Executor> exec;
@@ -116,30 +109,10 @@ TYPED_TEST(BatchMultiVector, CanBeConstructedWithSize)
 {
     using size_type = gko::size_type;
     auto m = gko::BatchMultiVector<TypeParam>::create(
-        this->exec,
-        std::vector<gko::dim<2>>{gko::dim<2>{2, 4}, gko::dim<2>{2, 3}});
+        this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 4)));
 
     ASSERT_EQ(m->get_num_batch_entries(), 2);
-    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 4));
-    ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 3));
-    EXPECT_EQ(m->get_stride().at(0), 4);
-    EXPECT_EQ(m->get_stride().at(1), 3);
-    ASSERT_EQ(m->get_num_stored_elements(), 14);
-    ASSERT_EQ(m->get_num_stored_elements(0), 8);
-    ASSERT_EQ(m->get_num_stored_elements(1), 6);
-}
-
-
-TYPED_TEST(BatchMultiVector, CanBeConstructedWithSizeAndStride)
-{
-    using size_type = gko::size_type;
-    auto m = gko::BatchMultiVector<TypeParam>::create(
-        this->exec, std::vector<gko::dim<2>>{gko::dim<2>{2, 3}},
-        std::vector<size_type>{4});
-
-    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3));
-    EXPECT_EQ(m->get_stride().at(0), 4);
-    ASSERT_EQ(m->get_num_stored_elements(), 8);
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 4));
 }
 
 
@@ -156,16 +129,14 @@ TYPED_TEST(BatchMultiVector, CanBeConstructedFromExistingData)
     // clang-format on
 
     auto m = gko::BatchMultiVector<TypeParam>::create(
-        this->exec,
-        std::vector<gko::dim<2>>{gko::dim<2>{2, 2}, gko::dim<2>{2, 2}},
-        gko::array<value_type>::view(this->exec, 12, data),
-        std::vector<size_type>{3, 3});
+        this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 2)),
+        gko::array<value_type>::view(this->exec, 4, data));
 
     ASSERT_EQ(m->get_const_values(), data);
-    ASSERT_EQ(m->at(0, 0, 1), value_type{2.0});
-    ASSERT_EQ(m->at(0, 1, 2), value_type{-1.0});
-    ASSERT_EQ(m->at(1, 0, 1), value_type{5.0});
-    ASSERT_EQ(m->at(1, 1, 2), value_type{-3.0});
+    ASSERT_EQ(m->at(0, 0, 1), value_type{1.0});
+    ASSERT_EQ(m->at(0, 1, 2), value_type{2.0});
+    ASSERT_EQ(m->at(1, 0, 1), value_type{-1.0});
+    ASSERT_EQ(m->at(1, 1, 2), value_type{3.0});
 }
 
 
@@ -184,14 +155,13 @@ TYPED_TEST(BatchMultiVector, CanBeConstructedFromExistingConstData)
     auto m = gko::BatchMultiVector<TypeParam>::create_const(
         this->exec,
         std::vector<gko::dim<2>>{gko::dim<2>{2, 2}, gko::dim<2>{2, 2}},
-        gko::array<value_type>::const_view(this->exec, 12, data),
-        std::vector<size_type>{3, 3});
+        gko::array<value_type>::const_view(this->exec, 4, data));
 
     ASSERT_EQ(m->get_const_values(), data);
-    ASSERT_EQ(m->at(0, 0, 1), value_type{2.0});
-    ASSERT_EQ(m->at(0, 1, 2), value_type{-1.0});
-    ASSERT_EQ(m->at(1, 0, 1), value_type{5.0});
-    ASSERT_EQ(m->at(1, 1, 2), value_type{-3.0});
+    ASSERT_EQ(m->at(0, 0, 1), value_type{1.0});
+    ASSERT_EQ(m->at(0, 1, 2), value_type{2.0});
+    ASSERT_EQ(m->at(1, 0, 1), value_type{-1.0});
+    ASSERT_EQ(m->at(1, 1, 2), value_type{3.0});
 }
 
 
@@ -200,8 +170,8 @@ TYPED_TEST(BatchMultiVector, CanBeConstructedFromBatchMultiVectorMatrices)
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
     using size_type = gko::size_type;
-    auto mat1 = gko::initialize<DenseMtx>(
-        3, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, this->exec);
+    auto mat1 = gko::initialize<DenseMtx>({{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
+                                          this->exec);
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
 
@@ -221,8 +191,8 @@ TYPED_TEST(BatchMultiVector, CanBeConstructedFromDenseMatricesByDuplication)
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
     using size_type = gko::size_type;
-    auto mat1 = gko::initialize<DenseMtx>(
-        4, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, this->exec);
+    auto mat1 = gko::initialize<DenseMtx>({{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
+                                          this->exec);
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
 
@@ -240,8 +210,8 @@ TYPED_TEST(BatchMultiVector, CanBeConstructedFromDenseMatrices)
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
     using size_type = gko::size_type;
-    auto mat1 = gko::initialize<DenseMtx>(
-        4, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, this->exec);
+    auto mat1 = gko::initialize<DenseMtx>({{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
+                                          this->exec);
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
 
@@ -257,8 +227,8 @@ TYPED_TEST(BatchMultiVector, CanBeUnbatchedIntoDenseMatrices)
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
     using size_type = gko::size_type;
-    auto mat1 = gko::initialize<DenseMtx>(
-        4, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, this->exec);
+    auto mat1 = gko::initialize<DenseMtx>({{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
+                                          this->exec);
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
 
@@ -283,9 +253,7 @@ TYPED_TEST(BatchMultiVector, CanBeListConstructed)
         {{1.0, 2.0}, {1.0, 3.0}}, this->exec);
 
     ASSERT_EQ(m->get_num_batch_entries(), 2);
-    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 1));
-    ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 1));
-    ASSERT_EQ(m->get_num_stored_elements(), 4);
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 1));
     EXPECT_EQ(m->at(0, 0), value_type{1});
     EXPECT_EQ(m->at(0, 1), value_type{2});
     EXPECT_EQ(m->at(1, 0), value_type{1});
@@ -293,28 +261,13 @@ TYPED_TEST(BatchMultiVector, CanBeListConstructed)
 }
 
 
-TYPED_TEST(BatchMultiVector, CanBeListConstructedWithstride)
-{
-    using value_type = typename TestFixture::value_type;
-    auto m = gko::batch_initialize<gko::BatchMultiVector<TypeParam>>(
-        std::vector<gko::size_type>{2}, {{1.0, 2.0}}, this->exec);
-    ASSERT_EQ(m->get_num_batch_entries(), 1);
-    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 1));
-    ASSERT_EQ(m->get_num_stored_elements(), 4);
-    EXPECT_EQ(m->at(0, 0), value_type{1.0});
-    EXPECT_EQ(m->at(0, 1), value_type{2.0});
-}
-
-
 TYPED_TEST(BatchMultiVector, CanBeListConstructedByCopies)
 {
     using value_type = typename TestFixture::value_type;
     auto m = gko::batch_initialize<gko::BatchMultiVector<TypeParam>>(
         2, I<value_type>({1.0, 2.0}), this->exec);
     ASSERT_EQ(m->get_num_batch_entries(), 2);
-    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 1));
-    ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 1));
-    ASSERT_EQ(m->get_num_stored_elements(), 4);
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 1));
     EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
     EXPECT_EQ(m->at(0, 0, 1), value_type{2.0});
     EXPECT_EQ(m->at(1, 0, 0), value_type{1.0});
@@ -328,46 +281,10 @@ TYPED_TEST(BatchMultiVector, CanBeDoubleListConstructed)
     using T = value_type;
     auto m = gko::batch_initialize<gko::BatchMultiVector<TypeParam>>(
         {{I<T>{1.0, 1.0, 0.0}, I<T>{2.0, 4.0, 3.0}, I<T>{3.0, 6.0, 1.0}},
-         {I<T>{1.0, 2.0}, I<T>{3.0, 4.0}, I<T>{5.0, 6.0}}},
-        this->exec);
-
-    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(3, 3));
-    ASSERT_EQ(m->get_size().at(1), gko::dim<2>(3, 2));
-    ASSERT_EQ(m->get_stride().at(0), 3);
-    ASSERT_EQ(m->get_stride().at(1), 2);
-    EXPECT_EQ(m->get_num_stored_elements(), 15);
-    ASSERT_EQ(m->get_num_stored_elements(0), 9);
-    ASSERT_EQ(m->get_num_stored_elements(1), 6);
-    EXPECT_EQ(m->at(0, 0), value_type{1.0});
-    EXPECT_EQ(m->at(0, 1), value_type{1.0});
-    EXPECT_EQ(m->at(0, 2), value_type{0.0});
-    ASSERT_EQ(m->at(0, 3), value_type{2.0});
-    EXPECT_EQ(m->at(0, 4), value_type{4.0});
-    EXPECT_EQ(m->at(1, 0), value_type{1.0});
-    EXPECT_EQ(m->at(1, 1), value_type{2.0});
-    EXPECT_EQ(m->at(1, 2), value_type{3.0});
-    ASSERT_EQ(m->at(1, 3), value_type{4.0});
-    EXPECT_EQ(m->at(1, 4), value_type{5.0});
-}
-
-
-TYPED_TEST(BatchMultiVector, CanBeDoubleListConstructedWithstride)
-{
-    using value_type = typename TestFixture::value_type;
-    using T = value_type;
-    auto m = gko::batch_initialize<gko::BatchMultiVector<TypeParam>>(
-        {4, 3},
-        {{I<T>{1.0, 1.0, 0.0}, I<T>{2.0, 4.0, 3.0}, I<T>{3.0, 6.0, 1.0}},
-         {I<T>{1.0, 2.0}, I<T>{3.0, 4.0}, I<T>{5.0, 6.0}}},
+         {I<T>{1.0, 2.0, -1.0}, I<T>{3.0, 4.0, -2.0}, I<T>{5.0, 6.0, -3.0}}},
         this->exec);
 
-    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(3, 3));
-    ASSERT_EQ(m->get_size().at(1), gko::dim<2>(3, 2));
-    ASSERT_EQ(m->get_stride().at(0), 4);
-    ASSERT_EQ(m->get_stride().at(1), 3);
-    EXPECT_EQ(m->get_num_stored_elements(), 21);
-    ASSERT_EQ(m->get_num_stored_elements(0), 12);
-    ASSERT_EQ(m->get_num_stored_elements(1), 9);
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(3, 3));
     EXPECT_EQ(m->at(0, 0), value_type{1.0});
     EXPECT_EQ(m->at(0, 1), value_type{1.0});
     EXPECT_EQ(m->at(0, 2), value_type{0.0});
@@ -375,9 +292,9 @@ TYPED_TEST(BatchMultiVector, CanBeDoubleListConstructedWithstride)
     EXPECT_EQ(m->at(0, 4), value_type{4.0});
     EXPECT_EQ(m->at(1, 0), value_type{1.0});
     EXPECT_EQ(m->at(1, 1), value_type{2.0});
-    EXPECT_EQ(m->at(1, 2), value_type{3.0});
-    ASSERT_EQ(m->at(1, 3), value_type{4.0});
-    EXPECT_EQ(m->at(1, 4), value_type{5.0});
+    EXPECT_EQ(m->at(1, 2), value_type{-1.0});
+    ASSERT_EQ(m->at(1, 3), value_type{3.0});
+    EXPECT_EQ(m->at(1, 4), value_type{4.0});
 }
 
 
@@ -420,13 +337,11 @@ TYPED_TEST(BatchMultiVector, CanBeReadFromMatrixData)
     using value_type = typename TestFixture::value_type;
     auto m = gko::BatchMultiVector<TypeParam>::create(this->exec);
     // clang-format off
-    m->read({gko::matrix_data<TypeParam>{{2, 3},
+    m->read({gko::matrix_data<TypeParam>{{2, 2},
                                          {{0, 0, 1.0},
                                           {0, 1, 3.0},
-                                          {0, 2, 2.0},
                                           {1, 0, 0.0},
-                                          {1, 1, 5.0},
-                                          {1, 2, 0.0}}},
+                                          {1, 1, 5.0}}},
              gko::matrix_data<TypeParam>{{2, 2},
                                          {{0, 0, -1.0},
                                           {0, 1, 0.5},
@@ -434,17 +349,11 @@ TYPED_TEST(BatchMultiVector, CanBeReadFromMatrixData)
                                           {1, 1, 9.0}}}});
     // clang-format on
 
-    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3));
-    ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 2));
-    ASSERT_EQ(m->get_num_stored_elements(), 10);
-    ASSERT_EQ(m->get_num_stored_elements(0), 6);
-    ASSERT_EQ(m->get_num_stored_elements(1), 4);
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2));
     EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
     EXPECT_EQ(m->at(0, 1, 0), value_type{0.0});
     EXPECT_EQ(m->at(0, 0, 1), value_type{3.0});
     EXPECT_EQ(m->at(0, 1, 1), value_type{5.0});
-    EXPECT_EQ(m->at(0, 0, 2), value_type{2.0});
-    EXPECT_EQ(m->at(0, 1, 2), value_type{0.0});
     EXPECT_EQ(m->at(1, 0, 0), value_type{-1.0});
     EXPECT_EQ(m->at(1, 0, 1), value_type{0.5});
     EXPECT_EQ(m->at(1, 1, 0), value_type{0.0});
@@ -483,31 +392,27 @@ TYPED_TEST(BatchMultiVector, CanBeReadFromMatrixAssemblyData)
 {
     using value_type = typename TestFixture::value_type;
     auto m = gko::BatchMultiVector<TypeParam>::create(this->exec);
-    gko::matrix_assembly_data<TypeParam> data1(gko::dim<2>{2, 3});
+    gko::matrix_assembly_data<TypeParam> data1(gko::dim<2>{2, 2});
     data1.set_value(0, 0, 1.0);
     data1.set_value(0, 1, 3.0);
-    data1.set_value(0, 2, 2.0);
     data1.set_value(1, 0, 0.0);
     data1.set_value(1, 1, 5.0);
-    data1.set_value(1, 2, 0.0);
     gko::matrix_assembly_data<TypeParam> data2(gko::dim<2>{2, 1});
     data2.set_value(0, 0, 2.0);
+    data2.set_value(0, 1, 1.0);
     data2.set_value(1, 0, 5.0);
+    data2.set_value(1, 1, 4.0);
     auto data = std::vector<gko::matrix_assembly_data<TypeParam>>{data1, data2};
 
     m->read(data);
 
-    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3));
-    ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 1));
-    ASSERT_EQ(m->get_num_stored_elements(), 8);
-    ASSERT_EQ(m->get_num_stored_elements(0), 6);
-    ASSERT_EQ(m->get_num_stored_elements(1), 2);
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2));
     EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
     EXPECT_EQ(m->at(0, 1, 0), value_type{0.0});
     EXPECT_EQ(m->at(0, 0, 1), value_type{3.0});
     EXPECT_EQ(m->at(0, 1, 1), value_type{5.0});
-    EXPECT_EQ(m->at(0, 0, 2), value_type{2.0});
-    ASSERT_EQ(m->at(0, 1, 2), value_type{0.0});
     EXPECT_EQ(m->at(1, 0, 0), value_type{2.0});
     EXPECT_EQ(m->at(1, 1, 0), value_type{5.0});
+    EXPECT_EQ(m->at(1, 0, 1), value_type{1.0});
+    EXPECT_EQ(m->at(1, 1, 1), value_type{4.0});
 }
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index d1a0c01ddb9..0b8cb8b375e 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -705,14 +705,14 @@ std::unique_ptr<Matrix> batch_initialize(
     size_type num_batch_entries = vals.size();
     auto vals_begin = begin(vals);
     size_type common_num_rows = vals_begin->size();
-    size_type common_num_cols = begin(vals_begin)->size();
+    size_type common_num_cols = vals_begin->begin()->size();
     auto common_size = dim<2>(common_num_rows, common_num_cols);
     size_type ind = 0;
     for (const auto& b : vals) {
         auto num_rows = b.size();
         auto num_cols = begin(b)->size();
         auto b_size = dim<2>(num_rows, num_cols);
-        GKO_ASSERT_EQ(b_size, common_size);
+        GKO_ASSERT_EQUAL_DIMENSIONS(b_size, common_size);
     }
     auto b_size = batch_dim<2>(num_batch_entries, common_size);
     auto tmp = batch_multi_vector::create(exec->get_master(), b_size);
diff --git a/test/base/batch_multi_vector_kernels.cpp b/test/base/batch_multi_vector_kernels.cpp
index e16607db844..8cff141e0a0 100644
--- a/test/base/batch_multi_vector_kernels.cpp
+++ b/test/base/batch_multi_vector_kernels.cpp
@@ -49,9 +49,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "test/utils/executor.hpp"
 
 
-#ifndef GKO_COMPILING_DPCPP
-
-
 class BatchMultiVector : public CommonTestFixture {
 protected:
     using vtype = double;
@@ -144,28 +141,6 @@ class BatchMultiVector : public CommonTestFixture {
 };
 
 
-TEST_F(BatchMultiVector, SingleVectorAppyIsEquivalentToRef)
-{
-    set_up_apply_data(1);
-
-    x->apply(y.get(), expected.get());
-    dx->apply(dy.get(), dresult.get());
-
-    GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, 1e-14);
-}
-
-
-TEST_F(BatchMultiVector, SingleVectorAdvancedAppyIsEquivalentToRef)
-{
-    set_up_apply_data(1);
-
-    x->apply(alpha.get(), y.get(), beta.get(), expected.get());
-    dx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get());
-
-    GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, 1e-14);
-}
-
-
 TEST_F(BatchMultiVector, SingleVectorAddScaledIsEquivalentToRef)
 {
     set_up_vector_data(1);
@@ -177,17 +152,6 @@ TEST_F(BatchMultiVector, SingleVectorAddScaledIsEquivalentToRef)
 }
 
 
-TEST_F(BatchMultiVector, SingleVectorAddScaleIsEquivalentToRef)
-{
-    set_up_vector_data(1);
-
-    x->add_scale(alpha.get(), y.get(), beta.get());
-    dx->add_scale(dalpha.get(), dy.get(), dbeta.get());
-
-    GKO_ASSERT_BATCH_MTX_NEAR(dx, x, 1e-14);
-}
-
-
 TEST_F(BatchMultiVector, MultipleVectorAddScaledIsEquivalentToRef)
 {
     set_up_vector_data(20);
@@ -199,18 +163,8 @@ TEST_F(BatchMultiVector, MultipleVectorAddScaledIsEquivalentToRef)
 }
 
 
-TEST_F(BatchMultiVector, MultipleVectorAddScaleIsEquivalentToRef)
-{
-    set_up_vector_data(20);
-
-    x->add_scale(alpha.get(), y.get(), beta.get());
-    dx->add_scale(dalpha.get(), dy.get(), dbeta.get());
-
-    GKO_ASSERT_BATCH_MTX_NEAR(dx, x, 1e-14);
-}
-
-
-TEST_F(BatchMultiVector, MultipleVectorAddScaledWithDifferentAlphaIsEquivalentToRef)
+TEST_F(BatchMultiVector,
+       MultipleVectorAddScaledWithDifferentAlphaIsEquivalentToRef)
 {
     set_up_vector_data(20, true);
 
@@ -221,17 +175,6 @@ TEST_F(BatchMultiVector, MultipleVectorAddScaledWithDifferentAlphaIsEquivalentTo
 }
 
 
-TEST_F(BatchMultiVector, MultipleVectorAddScaleWithDifferentScalarsIsEquivalentToRef)
-{
-    set_up_vector_data(20, true);
-
-    x->add_scale(alpha.get(), y.get(), beta.get());
-    dx->add_scale(dalpha.get(), dy.get(), dbeta.get());
-
-    GKO_ASSERT_BATCH_MTX_NEAR(dx, x, 1e-14);
-}
-
-
 TEST_F(BatchMultiVector, SingleVectorScaleIsEquivalentToRef)
 {
     set_up_vector_data(1);
@@ -329,9 +272,10 @@ TEST_F(BatchMultiVector, CopySingleIsEquivalentToRef)
 {
     set_up_vector_data(1);
 
-    gko::kernels::reference::batch_multi_vector::copy(this->ref, x.get(), y.get());
+    gko::kernels::reference::batch_multi_vector::copy(this->ref, x.get(),
+                                                      y.get());
     gko::kernels::EXEC_NAMESPACE::batch_multi_vector::copy(this->exec, dx.get(),
-                                                     dy.get());
+                                                           dy.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(dy, y, 0.0);
 }
@@ -341,92 +285,10 @@ TEST_F(BatchMultiVector, CopyIsEquivalentToRef)
 {
     set_up_vector_data(20);
 
-    gko::kernels::reference::batch_multi_vector::copy(this->ref, x.get(), y.get());
+    gko::kernels::reference::batch_multi_vector::copy(this->ref, x.get(),
+                                                      y.get());
     gko::kernels::EXEC_NAMESPACE::batch_multi_vector::copy(this->exec, dx.get(),
-                                                     dy.get());
+                                                           dy.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(dy, y, 0.0);
 }
-
-
-TEST_F(BatchMultiVector, BatchScaleIsEquivalentToRef)
-{
-    using BDiag = gko::matrix::BatchDiagonal<vtype>;
-    const int num_rhs = 20;
-    set_up_vector_data(num_rhs);
-
-    const int num_rows_in_mat = x->get_size().at(0)[0];
-    const auto left =
-        gen_mtx<BDiag>(batch_size, num_rows_in_mat, num_rows_in_mat);
-    const auto rght = gen_mtx<BDiag>(batch_size, num_rhs, num_rhs);
-    auto dleft = BDiag::create(this->exec);
-    dleft->copy_from(left.get());
-    auto drght = BDiag::create(this->exec);
-    drght->copy_from(rght.get());
-
-    gko::kernels::reference::batch_multi_vector::batch_scale(this->ref, left.get(),
-                                                       rght.get(), x.get());
-    gko::kernels::EXEC_NAMESPACE::batch_multi_vector::batch_scale(
-        this->exec, dleft.get(), drght.get(), dx.get());
-
-    GKO_ASSERT_BATCH_MTX_NEAR(dx, x, 1e-14);
-}
-
-
-TEST_F(BatchMultiVector, TransposeIsEquivalentToRef)
-{
-    const int nrows = 11;
-    const int ncols = 6;
-    const size_t nbatch = 5;
-    const auto orig = gen_mtx<Mtx>(nbatch, nrows, ncols);
-    auto corig = Mtx::create(exec);
-    corig->copy_from(orig.get());
-
-    auto trans = orig->transpose();
-    auto ctrans = corig->transpose();
-
-    auto dtrans = static_cast<const Mtx*>(trans.get());
-    auto dctrans = static_cast<const Mtx*>(ctrans.get());
-    GKO_ASSERT_BATCH_MTX_NEAR(dtrans, dctrans, 0.0);
-}
-
-
-TEST_F(BatchMultiVector, ConjugateTransposeIsEquivalentToRef)
-{
-    const int nrows = 11;
-    const int ncols = 6;
-    const size_t nbatch = 5;
-    const auto orig = gen_mtx<Mtx>(nbatch, nrows, ncols);
-    auto corig = Mtx::create(exec);
-    corig->copy_from(orig.get());
-
-    auto trans = orig->conj_transpose();
-    auto ctrans = corig->conj_transpose();
-
-    auto dtrans = static_cast<const Mtx*>(trans.get());
-    auto dctrans = static_cast<const Mtx*>(ctrans.get());
-    GKO_ASSERT_BATCH_MTX_NEAR(dtrans, dctrans, 0.0);
-}
-
-
-TEST_F(BatchMultiVector, AddScaledIdentityNonSquareIsEquivalentToReference)
-{
-    set_up_apply_data();
-    const gko::size_type batchsize = 10;
-    const gko::size_type num_rows = 62;
-    const gko::size_type num_cols = 51;
-    auto rmtx = gko::test::generate_uniform_batch_random_matrix<Mtx>(
-        batchsize, num_rows, num_cols,
-        std::uniform_int_distribution<>(num_cols, num_cols),
-        std::normal_distribution<>(-1.0, 1.0), rand_engine, true, ref);
-    auto dmtx = Mtx::create(exec);
-    dmtx->copy_from(rmtx.get());
-
-    rmtx->add_scaled_identity(alpha.get(), beta.get());
-    dmtx->add_scaled_identity(dalpha.get(), dbeta.get());
-
-    GKO_ASSERT_BATCH_MTX_NEAR(rmtx, dmtx, 1e-15)
-}
-
-
-#endif

From f99b1f3b92654b8f1e46d3a5ac08cc889832b144 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Wed, 19 Jul 2023 17:22:25 +0200
Subject: [PATCH 119/583] Test fixes and dpcpp additions

---
 core/test/base/CMakeLists.txt                 |   1 +
 core/test/base/batch_dim.cpp                  |  10 +
 core/test/base/batch_multi_vector.cpp         | 134 ++++++------
 core/test/utils/assertions.hpp                | 201 ++++++++++++++++++
 core/test/utils/batch_helpers.hpp             | 144 +++++++++++++
 dpcpp/base/batch_struct.hpp                   | 114 ++++++++++
 .../ginkgo/core/base/batch_multi_vector.hpp   |  33 +--
 test/base/batch_multi_vector_kernels.cpp      |   2 +-
 8 files changed, 562 insertions(+), 77 deletions(-)
 create mode 100644 core/test/utils/batch_helpers.hpp
 create mode 100644 dpcpp/base/batch_struct.hpp

diff --git a/core/test/base/CMakeLists.txt b/core/test/base/CMakeLists.txt
index f51862e8244..36bad656b07 100644
--- a/core/test/base/CMakeLists.txt
+++ b/core/test/base/CMakeLists.txt
@@ -1,6 +1,7 @@
 ginkgo_create_test(abstract_factory)
 ginkgo_create_test(allocator)
 ginkgo_create_test(array)
+ginkgo_create_test(batch_dim)
 ginkgo_create_test(batch_multi_vector)
 ginkgo_create_test(dense_cache)
 ginkgo_create_test(combination)
diff --git a/core/test/base/batch_dim.cpp b/core/test/base/batch_dim.cpp
index f4361195d7c..40743656ca3 100644
--- a/core/test/base/batch_dim.cpp
+++ b/core/test/base/batch_dim.cpp
@@ -85,6 +85,16 @@ TEST(BatchDim, NotEqualWorks)
 }
 
 
+TEST(BatchDim, CanGetCumulativeOffsets)
+{
+    auto d = gko::batch_dim<2>(3, gko::dim<2>(4, 2));
+
+    ASSERT_EQ(d.get_cumulative_offset(0), 0);
+    ASSERT_EQ(d.get_cumulative_offset(1), 8);
+    ASSERT_EQ(d.get_cumulative_offset(2), 16);
+}
+
+
 TEST(BatchDim, TransposesBatchDimensions)
 {
     ASSERT_EQ(gko::transpose(gko::batch_dim<2>(2, gko::dim<2>{4, 2})),
diff --git a/core/test/base/batch_multi_vector.cpp b/core/test/base/batch_multi_vector.cpp
index 60f5fc071ec..225c6b799ac 100644
--- a/core/test/base/batch_multi_vector.cpp
+++ b/core/test/base/batch_multi_vector.cpp
@@ -62,6 +62,7 @@ class BatchMultiVector : public ::testing::Test {
     static void assert_equal_to_original_mtx(
         gko::BatchMultiVector<value_type>* m)
     {
+        EXPECT_EQ(m->get_values()[0], value_type{-1.0});
         ASSERT_EQ(m->get_num_batch_entries(), 2);
         ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 3));
         EXPECT_EQ(m->at(0, 0, 0), value_type{-1.0});
@@ -81,7 +82,7 @@ class BatchMultiVector : public ::testing::Test {
     static void assert_empty(gko::BatchMultiVector<value_type>* m)
     {
         ASSERT_EQ(m->get_num_batch_entries(), 0);
-        ASSERT_EQ(m->get_common_size(), {});
+        ASSERT_EQ(m->get_common_size(), gko::dim<2>{});
     }
 
     std::shared_ptr<const gko::Executor> exec;
@@ -105,6 +106,46 @@ TYPED_TEST(BatchMultiVector, ReturnsNullValuesArrayWhenEmpty)
 }
 
 
+TYPED_TEST(BatchMultiVector, KnowsItsSizeAndValues)
+{
+    this->assert_equal_to_original_mtx(this->mtx.get());
+}
+
+
+TYPED_TEST(BatchMultiVector, CanBeCopied)
+{
+    auto mtx_copy = gko::BatchMultiVector<TypeParam>::create(this->exec);
+    mtx_copy->copy_from(this->mtx.get());
+    this->assert_equal_to_original_mtx(this->mtx.get());
+    this->mtx->at(0, 0, 0) = 7;
+    this->mtx->at(0, 1) = 7;
+    this->assert_equal_to_original_mtx(mtx_copy.get());
+}
+
+
+TYPED_TEST(BatchMultiVector, CanBeMoved)
+{
+    auto mtx_copy = gko::BatchMultiVector<TypeParam>::create(this->exec);
+    this->mtx->move_to(mtx_copy.get());
+    this->assert_equal_to_original_mtx(mtx_copy.get());
+}
+
+
+TYPED_TEST(BatchMultiVector, CanBeCloned)
+{
+    auto mtx_clone = this->mtx->clone();
+    this->assert_equal_to_original_mtx(
+        dynamic_cast<decltype(this->mtx.get())>(mtx_clone.get()));
+}
+
+
+TYPED_TEST(BatchMultiVector, CanBeCleared)
+{
+    this->mtx->clear();
+    this->assert_empty(this->mtx.get());
+}
+
+
 TYPED_TEST(BatchMultiVector, CanBeConstructedWithSize)
 {
     using size_type = gko::size_type;
@@ -130,13 +171,17 @@ TYPED_TEST(BatchMultiVector, CanBeConstructedFromExistingData)
 
     auto m = gko::BatchMultiVector<TypeParam>::create(
         this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 2)),
-        gko::array<value_type>::view(this->exec, 4, data));
+        gko::array<value_type>::view(this->exec, 8, data));
 
     ASSERT_EQ(m->get_const_values(), data);
-    ASSERT_EQ(m->at(0, 0, 1), value_type{1.0});
-    ASSERT_EQ(m->at(0, 1, 2), value_type{2.0});
+    ASSERT_EQ(m->at(0, 0, 0), value_type{1.0});
+    ASSERT_EQ(m->at(0, 0, 1), value_type{2.0});
+    ASSERT_EQ(m->at(0, 1, 0), value_type{-1.0});
+    ASSERT_EQ(m->at(0, 1, 1), value_type{3.0});
+    ASSERT_EQ(m->at(1, 0, 0), value_type{4.0});
     ASSERT_EQ(m->at(1, 0, 1), value_type{-1.0});
-    ASSERT_EQ(m->at(1, 1, 2), value_type{3.0});
+    ASSERT_EQ(m->at(1, 1, 0), value_type{3.0});
+    ASSERT_EQ(m->at(1, 1, 1), value_type{5.0});
 }
 
 
@@ -153,15 +198,18 @@ TYPED_TEST(BatchMultiVector, CanBeConstructedFromExistingConstData)
     // clang-format on
 
     auto m = gko::BatchMultiVector<TypeParam>::create_const(
-        this->exec,
-        std::vector<gko::dim<2>>{gko::dim<2>{2, 2}, gko::dim<2>{2, 2}},
-        gko::array<value_type>::const_view(this->exec, 4, data));
+        this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 2)),
+        gko::array<value_type>::const_view(this->exec, 8, data));
 
     ASSERT_EQ(m->get_const_values(), data);
-    ASSERT_EQ(m->at(0, 0, 1), value_type{1.0});
-    ASSERT_EQ(m->at(0, 1, 2), value_type{2.0});
+    ASSERT_EQ(m->at(0, 0, 0), value_type{1.0});
+    ASSERT_EQ(m->at(0, 0, 1), value_type{2.0});
+    ASSERT_EQ(m->at(0, 1, 0), value_type{-1.0});
+    ASSERT_EQ(m->at(0, 1, 1), value_type{3.0});
+    ASSERT_EQ(m->at(1, 0, 0), value_type{4.0});
     ASSERT_EQ(m->at(1, 0, 1), value_type{-1.0});
-    ASSERT_EQ(m->at(1, 1, 2), value_type{3.0});
+    ASSERT_EQ(m->at(1, 1, 0), value_type{3.0});
+    ASSERT_EQ(m->at(1, 1, 1), value_type{5.0});
 }
 
 
@@ -222,30 +270,6 @@ TYPED_TEST(BatchMultiVector, CanBeConstructedFromDenseMatrices)
 }
 
 
-TYPED_TEST(BatchMultiVector, CanBeUnbatchedIntoDenseMatrices)
-{
-    using value_type = typename TestFixture::value_type;
-    using DenseMtx = typename TestFixture::DenseMtx;
-    using size_type = gko::size_type;
-    auto mat1 = gko::initialize<DenseMtx>({{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
-                                          this->exec);
-    auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
-                                          this->exec);
-
-    auto dense_mats = this->mtx->unbatch();
-
-
-    GKO_ASSERT_MTX_NEAR(dense_mats[0].get(), mat1.get(), 0.);
-    GKO_ASSERT_MTX_NEAR(dense_mats[1].get(), mat2.get(), 0.);
-}
-
-
-TYPED_TEST(BatchMultiVector, KnowsItsSizeAndValues)
-{
-    this->assert_equal_to_original_mtx(this->mtx.get());
-}
-
-
 TYPED_TEST(BatchMultiVector, CanBeListConstructed)
 {
     using value_type = typename TestFixture::value_type;
@@ -266,6 +290,7 @@ TYPED_TEST(BatchMultiVector, CanBeListConstructedByCopies)
     using value_type = typename TestFixture::value_type;
     auto m = gko::batch_initialize<gko::BatchMultiVector<TypeParam>>(
         2, I<value_type>({1.0, 2.0}), this->exec);
+
     ASSERT_EQ(m->get_num_batch_entries(), 2);
     ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 1));
     EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
@@ -298,37 +323,20 @@ TYPED_TEST(BatchMultiVector, CanBeDoubleListConstructed)
 }
 
 
-TYPED_TEST(BatchMultiVector, CanBeCopied)
-{
-    auto mtx_copy = gko::BatchMultiVector<TypeParam>::create(this->exec);
-    mtx_copy->copy_from(this->mtx.get());
-    this->assert_equal_to_original_mtx(this->mtx.get());
-    this->mtx->at(0, 0, 0) = 7;
-    this->mtx->at(0, 1) = 7;
-    this->assert_equal_to_original_mtx(mtx_copy.get());
-}
-
-
-TYPED_TEST(BatchMultiVector, CanBeMoved)
-{
-    auto mtx_copy = gko::BatchMultiVector<TypeParam>::create(this->exec);
-    mtx_copy->copy_from(std::move(this->mtx));
-    this->assert_equal_to_original_mtx(mtx_copy.get());
-}
-
-
-TYPED_TEST(BatchMultiVector, CanBeCloned)
+TYPED_TEST(BatchMultiVector, CanBeUnbatchedIntoDenseMatrices)
 {
-    auto mtx_clone = this->mtx->clone();
-    this->assert_equal_to_original_mtx(
-        dynamic_cast<decltype(this->mtx.get())>(mtx_clone.get()));
-}
+    using value_type = typename TestFixture::value_type;
+    using DenseMtx = typename TestFixture::DenseMtx;
+    using size_type = gko::size_type;
+    auto mat1 = gko::initialize<DenseMtx>({{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
+                                          this->exec);
+    auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
+                                          this->exec);
 
+    auto dense_mats = this->mtx->unbatch();
 
-TYPED_TEST(BatchMultiVector, CanBeCleared)
-{
-    this->mtx->clear();
-    this->assert_empty(this->mtx.get());
+    GKO_ASSERT_MTX_NEAR(dense_mats[0].get(), mat1.get(), 0.);
+    GKO_ASSERT_MTX_NEAR(dense_mats[1].get(), mat2.get(), 0.);
 }
 
 
diff --git a/core/test/utils/assertions.hpp b/core/test/utils/assertions.hpp
index e0ec27b8624..8e825a32d4f 100644
--- a/core/test/utils/assertions.hpp
+++ b/core/test/utils/assertions.hpp
@@ -49,6 +49,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/base/mtx_io.hpp>
 #include <ginkgo/core/matrix/dense.hpp>
@@ -315,6 +316,89 @@ double get_relative_error(const MatrixData1& first, const MatrixData2& second)
 }
 
 
+template <typename MatrixData1, typename MatrixData2>
+::testing::AssertionResult batch_matrices_near_impl(
+    const std::string& first_expression, const std::string& second_expression,
+    const std::string& tolerance_expression, const MatrixData1& first,
+    const MatrixData2& second, double tolerance)
+{
+    std::vector<double> err;
+    std::vector<bool> err_flag;
+    for (size_type b = 0; b < first.size(); ++b) {
+        auto num_rows = first[b].size[0];
+        auto num_cols = first[b].size[1];
+        if (num_rows != second[b].size[0] || num_cols != second[b].size[1]) {
+            return ::testing::AssertionFailure()
+                   << "Expected matrices of equal size\n\t" << first_expression
+                   << " is of size [" << num_rows << " x " << num_cols
+                   << "]\n\t" << second_expression << " is of size ["
+                   << second[b].size[0] << " x " << second[b].size[1] << "]"
+                   << " for batch " << b;
+        }
+
+        err.push_back(detail::get_relative_error(first[b], second[b]));
+        err_flag.push_back(err.back() <= tolerance);
+    }
+
+    auto bat = std::find_if(err.begin(), err.end(),
+                            [&](double& e) { return !(e <= tolerance); });
+    if (bat == err.end()) {
+        return ::testing::AssertionSuccess();
+    } else {
+        const auto b_pos = static_cast<ptrdiff_t>(bat - err.begin());
+        auto num_rows = first[b_pos].size[0];
+        auto num_cols = first[b_pos].size[1];
+        auto fail = ::testing::AssertionFailure();
+        fail << "Error for batch: " << b_pos << "\n Relative error between "
+             << first_expression << " and " << second_expression << " is "
+             << err[b_pos] << "\n"
+             << "\twhich is larger than " << tolerance_expression
+             << " (which is " << tolerance << ")\n";
+        if (num_rows * num_cols <= 1000) {
+            fail << first_expression << " is:\n";
+            detail::print_matrix(fail, first[b_pos]);
+            fail << second_expression << " is:\n";
+            detail::print_matrix(fail, second[b_pos]);
+            fail << "component-wise relative error is:\n";
+            detail::print_componentwise_error(fail, first[b_pos],
+                                              second[b_pos]);
+        } else {
+            // build output filenames
+            auto test_case_info =
+                ::testing::UnitTest::GetInstance()->current_test_info();
+            auto testname =
+                test_case_info ? std::string{test_case_info->test_case_name()} +
+                                     "." + test_case_info->name()
+                               : std::string{"null"};
+            auto firstfile = testname + "." + first_expression + ".mtx";
+            auto secondfile = testname + "." + second_expression + ".mtx";
+            auto to_remove = [](char c) {
+                return !std::isalnum(c) && c != '_' && c != '.' && c != '-' &&
+                       c != '<' && c != '>';
+            };
+            // remove all but alphanumerical and _.-<> characters from
+            // expressions
+            firstfile.erase(
+                std::remove_if(firstfile.begin(), firstfile.end(), to_remove),
+                firstfile.end());
+            secondfile.erase(
+                std::remove_if(secondfile.begin(), secondfile.end(), to_remove),
+                secondfile.end());
+            // save matrices
+            std::ofstream first_stream{firstfile};
+            gko::write_raw(first_stream, first[b_pos],
+                           gko::layout_type::coordinate);
+            std::ofstream second_stream{secondfile};
+            gko::write_raw(second_stream, second[b_pos],
+                           gko::layout_type::coordinate);
+            fail << first_expression << " saved as " << firstfile << "\n";
+            fail << second_expression << " saved as " << secondfile << "\n";
+        }
+        return fail;
+    }
+}
+
+
 template <typename MatrixData1, typename MatrixData2>
 ::testing::AssertionResult matrices_near_impl(
     const std::string& first_expression, const std::string& second_expression,
@@ -600,6 +684,85 @@ ::testing::AssertionResult values_near<std::complex<half>, std::complex<half>>(
 }
 
 
+/**
+ * This is a gtest predicate which checks if two batch matrices are relatively
+ * near.
+ *
+ * More formally, it checks whether the following equation holds for each of the
+ * matrices in the batch:
+ *
+ * ```
+ * ||first - second|| <= tolerance * max(||first||, ||second||)
+ * ```
+ *
+ * This function should not be called directly, but used in conjunction with
+ * `ASSERT_PRED_FORMAT3` as follows:
+ *
+ * ```
+ * // Check if first and second are near
+ * ASSERT_PRED_FORMAT3(gko::test::assertions::batch_matrices_near,
+ *                     first, second, tolerance);
+ * // Check if first and second are far
+ * ASSERT_PRED_FORMAT3(!gko::test::assertions::batch_matrices_near,
+ *                     first, second, tolerance);
+ * ```
+ *
+ * @see GKO_ASSERT_BATCH_MTX_NEAR
+ * @see GKO_EXPECT_BATCH_MTX_NEAR
+ */
+template <typename Mat1, typename Mat2>
+::testing::AssertionResult batch_matrices_near(
+    const std::string& first_expression, const std::string& second_expression,
+    const std::string& tolerance_expression, const Mat1* first,
+    const Mat2* second, double tolerance)
+{
+    auto exec = first->get_executor()->get_master();
+    std::vector<
+        matrix_data<typename Mat1::value_type, typename Mat1::index_type>>
+        first_data;
+    std::vector<
+        matrix_data<typename Mat2::value_type, typename Mat2::index_type>>
+        second_data;
+
+    first->write(first_data);
+    second->write(second_data);
+
+    if (first_data.size() != second_data.size()) {
+        return ::testing::AssertionFailure()
+               << "Expected same batch sizes for " << first_expression
+               << " and " << second_expression << ", but got batch size "
+               << first_data.size() << " for " << first_expression
+               << " and batch size " << second_data.size() << " for "
+               << second_expression;
+    }
+
+    for (size_type b = 0; b < first_data.size(); ++b) {
+        first_data[b].ensure_row_major_order();
+        second_data[b].ensure_row_major_order();
+    }
+
+    return detail::batch_matrices_near_impl(
+        detail::remove_pointer_wrapper(first_expression),
+        detail::remove_pointer_wrapper(second_expression), tolerance_expression,
+        first_data, second_data, tolerance);
+}
+
+
+template <typename Mat1, typename T>
+::testing::AssertionResult batch_matrices_near(
+    const std::string& first_expression, const std::string& second_expression,
+    const std::string& tolerance_expression, const Mat1* first,
+    std::initializer_list<std::initializer_list<T>> second, double tolerance)
+{
+    auto second_mtx =
+        batch_initialize<BatchMultiVector<detail::remove_container<T>>>(
+            second, first->get_executor()->get_master());
+    return batch_matrices_near(
+        first_expression, detail::remove_list_wrapper(second_expression),
+        tolerance_expression, first, second_mtx.get(), tolerance);
+}
+
+
 /**
  * This is a gtest predicate which checks if two matrices are relatively near.
  *
@@ -940,6 +1103,44 @@ T* plain_ptr(T* ptr)
     }
 
 
+/**
+ * Checks if two batched matrices are near each other.
+ *
+ * More formally, it checks whether the following equation holds:
+ *
+ * ```
+ * ||_mtx1 - _mtx2|| <= _tol * max(||_mtx1||, ||_mtx2||)
+ * ```
+ * for all batches
+ *
+ * Has to be called from within a google test unit test.
+ * Internally calls gko::test::assertions::batch_matrices_near().
+ *
+ * @param _mtx1  first matrix
+ * @param _mtx2  second matrix
+ * @param _tol  tolerance level
+ */
+#define GKO_ASSERT_BATCH_MTX_NEAR(_mtx1, _mtx2, _tol)                     \
+    {                                                                     \
+        using ::gko::test::assertions::detail::l;                         \
+        using ::gko::test::assertions::detail::plain_ptr;                 \
+        ASSERT_PRED_FORMAT3(::gko::test::assertions::batch_matrices_near, \
+                            plain_ptr(_mtx1), plain_ptr(_mtx2), _tol);    \
+    }
+
+
+/**
+ * @copydoc GKO_ASSERT_MTX_NEAR
+ */
+#define GKO_EXPECT_BATCH_MTX_NEAR(_mtx1, _mtx2, _tol)                     \
+    {                                                                     \
+        using ::gko::test::assertions::detail::l;                         \
+        using ::gko::test::assertions::detail::plain_ptr;                 \
+        EXPECT_PRED_FORMAT3(::gko::test::assertions::batch_matrices_near, \
+                            plain_ptr(_mtx1), plain_ptr(_mtx2), _tol);    \
+    }
+
+
 /**
  * Checks if two matrices are near each other.
  *
diff --git a/core/test/utils/batch_helpers.hpp b/core/test/utils/batch_helpers.hpp
new file mode 100644
index 00000000000..c00cfbeee50
--- /dev/null
+++ b/core/test/utils/batch_helpers.hpp
@@ -0,0 +1,144 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CORE_TEST_UTILS_BATCH_HELPERS_HPP_
+#define GKO_CORE_TEST_UTILS_BATCH_HELPERS_HPP_
+
+
+#include <random>
+
+
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/matrix_data.hpp>
+
+
+#include "core/test/utils/assertions.hpp"
+
+
+namespace gko {
+namespace test {
+
+
+/**
+ * Converts a vector of unique pointers to a vector of shared pointers.
+ */
+template <typename T>
+std::vector<std::shared_ptr<T>> share(std::vector<std::unique_ptr<T>>&& objs)
+{
+    std::vector<std::shared_ptr<T>> out;
+    out.reserve(objs.size());
+    for (auto& obj : objs) {
+        out.push_back(std::move(obj));
+    }
+    return out;
+}
+
+
+/**
+ * Generates a batch of random matrices of the specified type.
+ */
+template <typename MatrixType, typename NonzeroDistribution,
+          typename ValueDistribution, typename Engine, typename... MatrixArgs>
+std::unique_ptr<MatrixType> generate_uniform_batch_random_matrix(
+    const size_type batch_size, const size_type num_rows,
+    const size_type num_cols, NonzeroDistribution&& nonzero_dist,
+    ValueDistribution&& value_dist, Engine&& engine,
+    const bool with_all_diagonals, std::shared_ptr<const Executor> exec,
+    MatrixArgs&&... args)
+{
+    using value_type = typename MatrixType::value_type;
+    using index_type = typename MatrixType::index_type;
+
+    // generate sparsity pattern
+    matrix_data<value_type, index_type> sdata{gko::dim<2>{num_rows, num_cols},
+                                              {}};
+
+    for (size_type row = 0; row < num_rows; ++row) {
+        // randomly generate number of nonzeros in this row
+        std::vector<size_type> col_idx(num_cols);
+        std::iota(begin(col_idx), end(col_idx), size_type(0));
+        const auto nnz_row = static_cast<size_type>(nonzero_dist(engine));
+        size_type nnz_in_row =
+            std::max(size_type(0), std::min(nnz_row, num_cols));
+        std::shuffle(std::begin(col_idx), std::end(col_idx), engine);
+
+        if (with_all_diagonals) {
+            if (nnz_in_row == 0) {
+                nnz_in_row = 1;
+            }
+            bool has_diagonal = false;
+            for (size_type icol = 0; icol < nnz_in_row; icol++) {
+                if (col_idx[icol] == row) {
+                    has_diagonal = true;
+                }
+            }
+            if (!has_diagonal) {
+                col_idx[0] = row;
+            }
+        }
+
+        std::for_each(
+            std::begin(col_idx), std::begin(col_idx) + nnz_in_row,
+            [&](size_type col) { sdata.nonzeros.emplace_back(row, col, 1.0); });
+    }
+
+    std::vector<matrix_data<value_type, index_type>> batchmtx;
+    batchmtx.reserve(batch_size);
+
+    for (size_t ibatch = 0; ibatch < batch_size; ibatch++) {
+        matrix_data<value_type, index_type> data = sdata;
+        for (size_type iz = 0; iz < data.nonzeros.size(); ++iz) {
+            value_type valnz =
+                gko::detail::get_rand_value<value_type>(value_dist, engine);
+            if (data.nonzeros[iz].column == data.nonzeros[iz].row &&
+                valnz == zero<value_type>()) {
+                valnz = 1.0;
+            }
+            data.nonzeros[iz].value = valnz;
+        }
+
+        data.ensure_row_major_order();
+        batchmtx.emplace_back(std::move(data));
+    }
+
+    // convert to the correct matrix type
+    auto result = MatrixType::create(exec, std::forward<MatrixArgs>(args)...);
+    result->read(batchmtx);
+    return result;
+}
+
+
+}  // namespace test
+}  // namespace gko
+
+
+#endif  // GKO_CORE_TEST_UTILS_BATCH_HELPERS_HPP_
diff --git a/dpcpp/base/batch_struct.hpp b/dpcpp/base/batch_struct.hpp
new file mode 100644
index 00000000000..86534768c2b
--- /dev/null
+++ b/dpcpp/base/batch_struct.hpp
@@ -0,0 +1,114 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_DPCPP_BASE_BATCH_STRUCT_HPP_
+#define GKO_DPCPP_BASE_BATCH_STRUCT_HPP_
+
+
+#include "core/base/batch_struct.hpp"
+
+
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/matrix/batch_multi_vector.hpp>
+
+
+#include "dpcpp/base/config.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace dpcpp {
+
+
+/** @file batch_struct.hpp
+ *
+ * Helper functions to generate a batch struct from a batch LinOp,
+ * while also shallow-casting to the requried DPCPP scalar type.
+ *
+ * A specialization is needed for every format of every kind of linear algebra
+ * object. These are intended to be called on the host.
+ */
+
+
+/**
+ * Generates an immutable uniform batch struct from a batch of dense matrices.
+ */
+template <typename ValueType>
+inline gko::batch_multi_vector::UniformBatch<const ValueType> get_batch_struct(
+    const BatchMultiVector<ValueType>* const op)
+{
+    return {op->get_const_values(), op->get_num_batch_entries(),
+            op->get_common_size()[1],
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[1])};
+}
+
+
+/**
+ * Generates a uniform batch struct from a batch of dense matrices.
+ */
+template <typename ValueType>
+inline gko::batch_multi_vector::UniformBatch<ValueType> get_batch_struct(
+    BatchMultiVector<ValueType>* const op)
+{
+    return {op->get_values(), op->get_num_batch_entries(),
+            op->get_common_size()[1],
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[1])};
+}
+
+
+/**
+ * Generates an immutable uniform batch struct from a batch of dense matrices
+ * that may be null.
+ */
+template <typename ValueType>
+inline gko::batch_multi_vector::UniformBatch<const ValueType>
+maybe_null_batch_struct(const BatchMultiVector<ValueType>* const op)
+{
+    if (op) {
+        return {op->get_const_values(), op->get_num_batch_entries(),
+                op->get_common_size()[1],
+                static_cast<int>(op->get_common_size()[0]),
+                static_cast<int>(op->get_common_size()[1])};
+    } else {
+        return {nullptr, 0, 0, 0, 0};
+    }
+}
+
+
+}  // namespace dpcpp
+}  // namespace kernels
+}  // namespace gko
+
+
+#endif  // GKO_DPCPP_MATRIX_BATCH_STRUCT_HPP_
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index 0b8cb8b375e..3e2c90653dc 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -138,8 +138,7 @@ class BatchMultiVector
         auto exec = this->get_executor();
         auto unbatch_mats = std::vector<std::unique_ptr<unbatch_type>>{};
         for (size_type b = 0; b < this->get_num_batch_entries(); ++b) {
-            auto mat = unbatch_type::create(exec, this->get_common_size(),
-                                            this->get_common_size()[1]);
+            auto mat = unbatch_type::create(exec, this->get_common_size());
             exec->copy_from(exec.get(), mat->get_num_stored_elements(),
                             this->get_const_values() +
                                 this->get_size().get_cumulative_offset(b),
@@ -484,7 +483,7 @@ class BatchMultiVector
     {
         // Ensure that the values array has the correct size
         auto num_elems = compute_num_elems(size);
-        GKO_ENSURE_IN_BOUNDS(num_elems, values_.get_num_elems());
+        GKO_ENSURE_IN_BOUNDS(num_elems, values_.get_num_elems() + 1);
     }
 
     /**
@@ -669,7 +668,7 @@ std::unique_ptr<Matrix> batch_initialize(
         ++batch;
     }
     auto mtx = Matrix::create(exec, std::forward<TArgs>(create_args)...);
-    tmp->move_to(mtx.get());
+    tmp->move_to(mtx);
     return mtx;
 }
 
@@ -703,17 +702,25 @@ std::unique_ptr<Matrix> batch_initialize(
 {
     using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
     size_type num_batch_entries = vals.size();
-    auto vals_begin = begin(vals);
-    size_type common_num_rows = vals_begin->size();
-    size_type common_num_cols = vals_begin->begin()->size();
-    auto common_size = dim<2>(common_num_rows, common_num_cols);
     size_type ind = 0;
+    size_type num_rows = 0;
+    size_type num_cols = 0;
+    gko::dim<2> common_size{};
+    size_type idx = 0;
     for (const auto& b : vals) {
-        auto num_rows = b.size();
-        auto num_cols = begin(b)->size();
+        num_rows = b.size();
+        num_cols = begin(b)->size();
+        if (idx == 0) {
+            common_size = dim<2>(num_rows, num_cols);
+        }
         auto b_size = dim<2>(num_rows, num_cols);
         GKO_ASSERT_EQUAL_DIMENSIONS(b_size, common_size);
+        ++idx;
     }
+
+    size_type common_num_rows = num_rows;
+    size_type common_num_cols = num_cols;
+    common_size = dim<2>(common_num_rows, common_num_cols);
     auto b_size = batch_dim<2>(num_batch_entries, common_size);
     auto tmp = batch_multi_vector::create(exec->get_master(), b_size);
     size_type batch = 0;
@@ -730,7 +737,7 @@ std::unique_ptr<Matrix> batch_initialize(
         ++batch;
     }
     auto mtx = Matrix::create(exec, std::forward<TArgs>(create_args)...);
-    tmp->move_to(mtx.get());
+    tmp->move_to(mtx);
     return mtx;
 }
 
@@ -777,7 +784,7 @@ std::unique_ptr<Matrix> batch_initialize(
         }
     }
     auto mtx = Matrix::create(exec, std::forward<TArgs>(create_args)...);
-    tmp->move_to(mtx.get());
+    tmp->move_to(mtx);
     return mtx;
 }
 
@@ -828,7 +835,7 @@ std::unique_ptr<Matrix> batch_initialize(
         }
     }
     auto mtx = Matrix::create(exec, std::forward<TArgs>(create_args)...);
-    tmp->move_to(mtx.get());
+    tmp->move_to(mtx);
     return mtx;
 }
 
diff --git a/test/base/batch_multi_vector_kernels.cpp b/test/base/batch_multi_vector_kernels.cpp
index 8cff141e0a0..b2f86fa0383 100644
--- a/test/base/batch_multi_vector_kernels.cpp
+++ b/test/base/batch_multi_vector_kernels.cpp
@@ -45,7 +45,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include "core/test/utils.hpp"
-#include "core/test/utils/batch.hpp"
+#include "core/test/utils/batch_helpers.hpp"
 #include "test/utils/executor.hpp"
 
 

From 1a4d7ab6d2efb4c12d4d95189302df78d2d7bfb4 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Thu, 20 Jul 2023 11:01:20 +0200
Subject: [PATCH 120/583] Core move_to and copy fixes

---
 core/test/base/batch_multi_vector.cpp         |  6 ++-
 .../ginkgo/core/base/batch_multi_vector.hpp   | 50 +++----------------
 test/base/CMakeLists.txt                      |  2 +-
 3 files changed, 11 insertions(+), 47 deletions(-)

diff --git a/core/test/base/batch_multi_vector.cpp b/core/test/base/batch_multi_vector.cpp
index 225c6b799ac..9d0c15a2b0d 100644
--- a/core/test/base/batch_multi_vector.cpp
+++ b/core/test/base/batch_multi_vector.cpp
@@ -60,9 +60,10 @@ class BatchMultiVector : public ::testing::Test {
 
 
     static void assert_equal_to_original_mtx(
-        gko::BatchMultiVector<value_type>* m)
+        const gko::BatchMultiVector<value_type>* m)
     {
-        EXPECT_EQ(m->get_values()[0], value_type{-1.0});
+        ASSERT_NE(m->get_const_values(), nullptr);
+        EXPECT_EQ(m->get_const_values()[0], value_type{-1.0});
         ASSERT_EQ(m->get_num_batch_entries(), 2);
         ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 3));
         EXPECT_EQ(m->at(0, 0, 0), value_type{-1.0});
@@ -108,6 +109,7 @@ TYPED_TEST(BatchMultiVector, ReturnsNullValuesArrayWhenEmpty)
 
 TYPED_TEST(BatchMultiVector, KnowsItsSizeAndValues)
 {
+    ASSERT_NE(this->mtx->get_const_values(), nullptr);
     this->assert_equal_to_original_mtx(this->mtx.get());
 }
 
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index 3e2c90653dc..567fcb25662 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -44,6 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/dim.hpp>
 #include <ginkgo/core/base/executor.hpp>
 #include <ginkgo/core/base/mtx_io.hpp>
+#include <ginkgo/core/base/polymorphic_object.hpp>
 #include <ginkgo/core/base/range_accessors.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/base/utils.hpp>
@@ -57,13 +58,10 @@ namespace gko {
  * of the vector in each of the batches.
  *
  * The values in each of the batches are stored in row-major format (values
- * belonging to the same row appear consecutive in the memory). Optionally, rows
- * can be padded for better memory access.
+ * belonging to the same row appear consecutive in the memory).
  *
  * @tparam ValueType  precision of matrix elements
  *
- * @note While this format is not very useful for storing sparse matrices, it
- *       is often suitable to store vectors, and sets of vectors.
  * @ingroup batch_multi_vector
  */
 template <typename ValueType = default_precision>
@@ -83,9 +81,11 @@ class BatchMultiVector
 public:
     using BatchReadableFromMatrixData<ValueType, int32>::read;
     using BatchReadableFromMatrixData<ValueType, int64>::read;
-    using EnablePolymorphicObject<BatchMultiVector>::EnablePolymorphicObject;
     using EnablePolymorphicAssignment<BatchMultiVector>::convert_to;
     using EnablePolymorphicAssignment<BatchMultiVector>::move_to;
+    using ConvertibleTo<
+        BatchMultiVector<next_precision<ValueType>>>::convert_to;
+    using ConvertibleTo<BatchMultiVector<next_precision<ValueType>>>::move_to;
 
     using value_type = ValueType;
     using index_type = int32;
@@ -187,7 +187,6 @@ class BatchMultiVector
     value_type* get_values(size_type batch) noexcept
     {
         GKO_ASSERT(batch < this->get_num_batch_entries());
-        // TODO Verify
         return values_.get_data() +
                this->get_size().get_cumulative_offset(batch);
     }
@@ -386,43 +385,6 @@ class BatchMultiVector
             exec, sizes, gko::detail::array_const_cast(std::move(values))});
     }
 
-    /**
-     * Copy-assigns a BatchMultiVector. Preserves the executor and copies the
-     * size.
-     */
-    BatchMultiVector& operator=(const BatchMultiVector&) = default;
-
-    /**
-     * Move-assigns a BatchMultiVector. Preserves the executor and moves the
-     * size. The moved-from object has size 0x0 afterwards, but its executor is
-     * unchanged.
-     */
-    BatchMultiVector& operator=(BatchMultiVector&& other)
-    {
-        if (this != &other) {
-            EnablePolymorphicObject<BatchMultiVector>::operator=(
-                std::move(other));
-            this->set_size(other.get_size());
-            other.set_size(batch_dim<2>{});
-        }
-        return *this;
-    }
-
-    /**
-     * Copy-constructs a BatchMultiVector. Inherits executor and size from the
-     * input.
-     */
-    BatchMultiVector(const BatchMultiVector&) = default;
-
-    /**
-     * Move-constructs a BatchMultiVector. Inherits executor and size from the
-     * input, which will have size 0x0 and unchanged executor afterwards.
-     */
-    BatchMultiVector(BatchMultiVector&& other)
-        : EnablePolymorphicObject<BatchMultiVector>(std::move(other)),
-          batch_size_{std::exchange(other.batch_size_, batch_dim<2>{})}
-    {}
-
 private:
     inline batch_dim<2> compute_batch_size(
         const std::vector<matrix::Dense<ValueType>*>& matrices)
@@ -737,7 +699,7 @@ std::unique_ptr<Matrix> batch_initialize(
         ++batch;
     }
     auto mtx = Matrix::create(exec, std::forward<TArgs>(create_args)...);
-    tmp->move_to(mtx);
+    mtx->copy_from(tmp.get());
     return mtx;
 }
 
diff --git a/test/base/CMakeLists.txt b/test/base/CMakeLists.txt
index a80be354878..3c34a9068d4 100644
--- a/test/base/CMakeLists.txt
+++ b/test/base/CMakeLists.txt
@@ -1,4 +1,4 @@
-ginkgo_create_common_and_reference_test(batch_multi_vector_kernels)
+# ginkgo_create_common_and_reference_test(batch_multi_vector_kernels)
 ginkgo_create_common_and_reference_test(device_matrix_data_kernels)
 ginkgo_create_common_device_test(kernel_launch_generic)
 ginkgo_create_common_and_reference_test(executor)

From 241babacbcc9456572e617db73ddd281ad31f5ef Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Thu, 20 Jul 2023 11:23:17 +0200
Subject: [PATCH 121/583] Fix reference kernel tests

---
 core/test/base/batch_multi_vector.cpp         |  2 +-
 .../ginkgo/core/base/batch_multi_vector.hpp   | 22 +++-----
 reference/test/base/CMakeLists.txt            |  1 +
 .../test/base/batch_multi_vector_kernels.cpp  | 51 ++++---------------
 4 files changed, 19 insertions(+), 57 deletions(-)

diff --git a/core/test/base/batch_multi_vector.cpp b/core/test/base/batch_multi_vector.cpp
index 9d0c15a2b0d..410ea70b4dd 100644
--- a/core/test/base/batch_multi_vector.cpp
+++ b/core/test/base/batch_multi_vector.cpp
@@ -407,7 +407,7 @@ TYPED_TEST(BatchMultiVector, CanBeReadFromMatrixAssemblyData)
     data1.set_value(0, 1, 3.0);
     data1.set_value(1, 0, 0.0);
     data1.set_value(1, 1, 5.0);
-    gko::matrix_assembly_data<TypeParam> data2(gko::dim<2>{2, 1});
+    gko::matrix_assembly_data<TypeParam> data2(gko::dim<2>{2, 2});
     data2.set_value(0, 0, 2.0);
     data2.set_value(0, 1, 1.0);
     data2.set_value(1, 0, 5.0);
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index 567fcb25662..143de27335b 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -664,25 +664,17 @@ std::unique_ptr<Matrix> batch_initialize(
 {
     using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
     size_type num_batch_entries = vals.size();
-    size_type ind = 0;
-    size_type num_rows = 0;
-    size_type num_cols = 0;
-    gko::dim<2> common_size{};
-    size_type idx = 0;
+    auto vals_begin = begin(vals);
+    size_type common_num_rows = vals_begin->size();
+    size_type common_num_cols = vals_begin->begin()->size();
+    auto common_size = dim<2>(common_num_rows, common_num_cols);
     for (const auto& b : vals) {
-        num_rows = b.size();
-        num_cols = begin(b)->size();
-        if (idx == 0) {
-            common_size = dim<2>(num_rows, num_cols);
-        }
+        auto num_rows = b.size();
+        auto num_cols = begin(b)->size();
         auto b_size = dim<2>(num_rows, num_cols);
         GKO_ASSERT_EQUAL_DIMENSIONS(b_size, common_size);
-        ++idx;
     }
 
-    size_type common_num_rows = num_rows;
-    size_type common_num_cols = num_cols;
-    common_size = dim<2>(common_num_rows, common_num_cols);
     auto b_size = batch_dim<2>(num_batch_entries, common_size);
     auto tmp = batch_multi_vector::create(exec->get_master(), b_size);
     size_type batch = 0;
@@ -699,7 +691,7 @@ std::unique_ptr<Matrix> batch_initialize(
         ++batch;
     }
     auto mtx = Matrix::create(exec, std::forward<TArgs>(create_args)...);
-    mtx->copy_from(tmp.get());
+    tmp->move_to(mtx);
     return mtx;
 }
 
diff --git a/reference/test/base/CMakeLists.txt b/reference/test/base/CMakeLists.txt
index b4d922ec187..7230b329918 100644
--- a/reference/test/base/CMakeLists.txt
+++ b/reference/test/base/CMakeLists.txt
@@ -1,4 +1,5 @@
 ginkgo_create_test(array)
+ginkgo_create_test(batch_multi_vector_kernels)
 ginkgo_create_test(combination)
 ginkgo_create_test(composition)
 ginkgo_create_test(index_set)
diff --git a/reference/test/base/batch_multi_vector_kernels.cpp b/reference/test/base/batch_multi_vector_kernels.cpp
index 8ed8f03dc25..c7ba4a0bcf2 100644
--- a/reference/test/base/batch_multi_vector_kernels.cpp
+++ b/reference/test/base/batch_multi_vector_kernels.cpp
@@ -41,9 +41,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <gtest/gtest.h>
 
 
-#include <ginkgo/core/base/batch_csr.hpp>
-#include <ginkgo/core/base/batch_diagonal.hpp>
-#include <ginkgo/core/base/batch_identity.hpp>
 #include <ginkgo/core/base/exception.hpp>
 #include <ginkgo/core/base/executor.hpp>
 #include <ginkgo/core/base/math.hpp>
@@ -54,9 +51,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/test/utils.hpp"
 
 
-namespace {
-
-
 template <typename T>
 class BatchMultiVector : public ::testing::Test {
 protected:
@@ -77,26 +71,22 @@ class BatchMultiVector : public ::testing::Test {
           mtx_01(gko::initialize<DenseMtx>(
               {I<T>({1.0, -2.0, -0.5}), I<T>({1.0, -2.5, 4.0})}, exec)),
           mtx_1(
-              gko::batch_initialize<Mtx>(std::vector<size_type>{4, 4},
-                                         {{{1.0, -1.0, 2.2}, {-2.0, 2.0, -0.5}},
+              gko::batch_initialize<Mtx>({{{1.0, -1.0, 2.2}, {-2.0, 2.0, -0.5}},
                                           {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}},
                                          exec)),
           mtx_10(gko::initialize<DenseMtx>(
               {I<T>({1.0, -1.0, 2.2}), I<T>({-2.0, 2.0, -0.5})}, exec)),
-          mtx_11(gko::initialize<DenseMtx>(
-              4, {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, exec)),
+          mtx_11(gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
+                                           exec)),
           mtx_2(gko::batch_initialize<Mtx>(
-              std::vector<size_type>{2, 2},
               {{{1.0, 1.5}, {6.0, 1.0}, {-0.25, 1.0}},
                {I<T>({2.0, -2.0}), I<T>({1.0, 3.0}), I<T>({4.0, 3.0})}},
               exec)),
           mtx_20(gko::initialize<DenseMtx>(
-              4, {I<T>({1.0, 1.5}), I<T>({6.0, 1.0}), I<T>({-0.25, 1.0})},
-              exec)),
+              {I<T>({1.0, 1.5}), I<T>({6.0, 1.0}), I<T>({-0.25, 1.0})}, exec)),
           mtx_21(gko::initialize<DenseMtx>(
               {I<T>({2.0, -2.0}), I<T>({1.0, 3.0}), I<T>({4.0, 3.0})}, exec)),
           mtx_3(gko::batch_initialize<Mtx>(
-              std::vector<size_type>{4, 4},
               {{I<T>({1.0, 1.5}), I<T>({6.0, 1.0})}, {{2.0, -2.0}, {1.0, 3.0}}},
               exec)),
           mtx_30(gko::initialize<DenseMtx>({I<T>({1.0, 1.5}), I<T>({6.0, 1.0})},
@@ -146,7 +136,6 @@ TYPED_TEST(BatchMultiVector, ScalesData)
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     auto alpha = gko::batch_initialize<Mtx>(
-        std::vector<gko::size_type>{3, 3},
         {{{2.0, -2.0, 1.5}}, {{3.0, -1.0, 0.25}}}, this->exec);
 
     auto ualpha = alpha->unbatch();
@@ -238,27 +227,14 @@ TYPED_TEST(BatchMultiVector, AddsScaledWithScalar)
 TYPED_TEST(BatchMultiVector, AddScaledFailsOnWrongSizes)
 {
     using Mtx = typename TestFixture::Mtx;
-    auto alpha =
-        gko::batch_initialize<Mtx>({{2.0, 3.0, 4.0, 5.0}, {-2.0}}, this->exec);
+    auto alpha = gko::batch_initialize<Mtx>(
+        {{2.0, 3.0, 4.0, 5.0}, {-2.0, 2.0, 4.0, 5.0}}, this->exec);
 
     ASSERT_THROW(this->mtx_1->add_scaled(alpha.get(), this->mtx_2.get()),
                  gko::DimensionMismatch);
 }
 
 
-TYPED_TEST(BatchMultiVector, AddScaleFailsOnWrongScalarSizes)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto alpha = gko::batch_initialize<Mtx>(
-        {{{2.0, -2.0, 1.5}}, {{2.0, -2.0, 3.0}}}, this->exec);
-    auto beta = gko::batch_initialize<Mtx>({{3.0}, {1.5}}, this->exec);
-
-    ASSERT_THROW(
-        this->mtx_1->add_scale(alpha.get(), this->mtx_0.get(), beta.get()),
-        gko::DimensionMismatch);
-}
-
-
 TYPED_TEST(BatchMultiVector, ComputesDot)
 {
     using Mtx = typename TestFixture::Mtx;
@@ -282,8 +258,7 @@ TYPED_TEST(BatchMultiVector, ComputDotFailsOnWrongInputSize)
 {
     using Mtx = typename TestFixture::Mtx;
     auto result =
-        Mtx::create(this->exec, gko::batch_dim<2>(std::vector<gko::dim<2>>{
-                                    gko::dim<2>{1, 2}, gko::dim<2>{1, 3}}));
+        Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 3}));
 
     ASSERT_THROW(this->mtx_1->compute_dot(this->mtx_2.get(), result.get()),
                  gko::DimensionMismatch);
@@ -294,8 +269,7 @@ TYPED_TEST(BatchMultiVector, ComputDotFailsOnWrongResultSize)
 {
     using Mtx = typename TestFixture::Mtx;
     auto result =
-        Mtx::create(this->exec, gko::batch_dim<2>(std::vector<gko::dim<2>>{
-                                    gko::dim<2>{1, 2}, gko::dim<2>{1, 2}}));
+        Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 2}));
     auto result2 =
         Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 2}));
 
@@ -316,10 +290,8 @@ TYPED_TEST(BatchMultiVector, ComputesNorm2)
         {{I<T>{1.0, 0.0}, I<T>{2.0, 3.0}, I<T>{2.0, 4.0}},
          {I<T>{-4.0, 2.0}, I<T>{-3.0, -2.0}, I<T>{0.0, 1.0}}},
         this->exec));
-    auto batch_size = gko::batch_dim<2>(
-        std::vector<gko::dim<2>>{gko::dim<2>{1, 2}, gko::dim<2>{1, 2}});
-    auto result =
-        NormVector::create(this->exec, batch_size, gko::batch_stride(2, 2));
+    auto batch_size = gko::batch_dim<2>(2, gko::dim<2>{1, 2});
+    auto result = NormVector::create(this->exec, batch_size);
 
     mtx->compute_norm2(result.get());
 
@@ -413,6 +385,3 @@ TYPED_TEST(BatchMultiVector, MovesEmptyToPrecision)
 
     ASSERT_FALSE(res->get_num_batch_entries());
 }
-
-
-}  // namespace

From bc7a5edb5e65cafd6cb608f7828fc13276a70ed1 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Thu, 20 Jul 2023 11:48:15 +0200
Subject: [PATCH 122/583] Fix CUDA/HIP/DPCPP tests.

Co-authored-by: Aditya Kashi<kashia@ornl.gov>
---
 core/test/utils/batch_helpers.hpp        | 35 ++++++++++++------------
 test/base/CMakeLists.txt                 |  2 +-
 test/base/batch_multi_vector_kernels.cpp | 20 +++++++-------
 3 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/core/test/utils/batch_helpers.hpp b/core/test/utils/batch_helpers.hpp
index c00cfbeee50..3b9e673922e 100644
--- a/core/test/utils/batch_helpers.hpp
+++ b/core/test/utils/batch_helpers.hpp
@@ -79,8 +79,8 @@ std::unique_ptr<MatrixType> generate_uniform_batch_random_matrix(
     using index_type = typename MatrixType::index_type;
 
     // generate sparsity pattern
-    matrix_data<value_type, index_type> sdata{gko::dim<2>{num_rows, num_cols},
-                                              {}};
+    matrix_data<value_type, index_type> in_data{gko::dim<2>{num_rows, num_cols},
+                                                {}};
 
     for (size_type row = 0; row < num_rows; ++row) {
         // randomly generate number of nonzeros in this row
@@ -106,33 +106,34 @@ std::unique_ptr<MatrixType> generate_uniform_batch_random_matrix(
             }
         }
 
-        std::for_each(
-            std::begin(col_idx), std::begin(col_idx) + nnz_in_row,
-            [&](size_type col) { sdata.nonzeros.emplace_back(row, col, 1.0); });
+        std::for_each(std::begin(col_idx), std::begin(col_idx) + nnz_in_row,
+                      [&](size_type col) {
+                          in_data.nonzeros.emplace_back(row, col, 1.0);
+                      });
     }
 
-    std::vector<matrix_data<value_type, index_type>> batchmtx;
-    batchmtx.reserve(batch_size);
+    std::vector<matrix_data<value_type, index_type>> batch_mtx;
+    batch_mtx.reserve(batch_size);
 
-    for (size_t ibatch = 0; ibatch < batch_size; ibatch++) {
-        matrix_data<value_type, index_type> data = sdata;
-        for (size_type iz = 0; iz < data.nonzeros.size(); ++iz) {
-            value_type valnz =
+    for (int batch = 0; batch < batch_size; batch++) {
+        matrix_data<value_type, index_type> data = in_data;
+        for (size_type nnz = 0; nnz < data.nonzeros.size(); ++nnz) {
+            value_type val =
                 gko::detail::get_rand_value<value_type>(value_dist, engine);
-            if (data.nonzeros[iz].column == data.nonzeros[iz].row &&
-                valnz == zero<value_type>()) {
-                valnz = 1.0;
+            if (data.nonzeros[nnz].column == data.nonzeros[nnz].row &&
+                val == zero<value_type>()) {
+                val = 1.0;
             }
-            data.nonzeros[iz].value = valnz;
+            data.nonzeros[nnz].value = val;
         }
 
         data.ensure_row_major_order();
-        batchmtx.emplace_back(std::move(data));
+        batch_mtx.emplace_back(std::move(data));
     }
 
     // convert to the correct matrix type
     auto result = MatrixType::create(exec, std::forward<MatrixArgs>(args)...);
-    result->read(batchmtx);
+    result->read(batch_mtx);
     return result;
 }
 
diff --git a/test/base/CMakeLists.txt b/test/base/CMakeLists.txt
index 3c34a9068d4..d0567f45403 100644
--- a/test/base/CMakeLists.txt
+++ b/test/base/CMakeLists.txt
@@ -1,4 +1,4 @@
-# ginkgo_create_common_and_reference_test(batch_multi_vector_kernels)
+ginkgo_create_common_test(batch_multi_vector_kernels)
 ginkgo_create_common_and_reference_test(device_matrix_data_kernels)
 ginkgo_create_common_device_test(kernel_launch_generic)
 ginkgo_create_common_and_reference_test(executor)
diff --git a/test/base/batch_multi_vector_kernels.cpp b/test/base/batch_multi_vector_kernels.cpp
index b2f86fa0383..07bdf5899e9 100644
--- a/test/base/batch_multi_vector_kernels.cpp
+++ b/test/base/batch_multi_vector_kernels.cpp
@@ -59,11 +59,11 @@ class BatchMultiVector : public CommonTestFixture {
     BatchMultiVector() : rand_engine(15) {}
 
     template <typename MtxType>
-    std::unique_ptr<MtxType> gen_mtx(const size_t batchsize, int num_rows,
+    std::unique_ptr<MtxType> gen_mtx(const size_t batch_size, int num_rows,
                                      int num_cols)
     {
         return gko::test::generate_uniform_batch_random_matrix<MtxType>(
-            batchsize, num_rows, num_cols,
+            batch_size, num_rows, num_cols,
             std::uniform_int_distribution<>(num_cols, num_cols),
             std::normal_distribution<>(-1.0, 1.0), rand_engine, false, ref);
     }
@@ -89,9 +89,9 @@ class BatchMultiVector : public CommonTestFixture {
         dalpha->copy_from(alpha.get());
         dbeta = gko::clone(exec, beta.get());
         expected = Mtx::create(
-            ref, gko::batch_dim<>(batch_size, gko::dim<2>{1, num_vecs}));
+            ref, gko::batch_dim<2>(batch_size, gko::dim<2>{1, num_vecs}));
         dresult = Mtx::create(
-            exec, gko::batch_dim<>(batch_size, gko::dim<2>{1, num_vecs}));
+            exec, gko::batch_dim<2>(batch_size, gko::dim<2>{1, num_vecs}));
     }
 
     void set_up_apply_data(const int p = 1)
@@ -103,8 +103,8 @@ class BatchMultiVector : public CommonTestFixture {
         expected = gen_mtx<Mtx>(batch_size, m, p);
         alpha = gko::batch_initialize<Mtx>(batch_size, {2.0}, ref);
         beta = gko::batch_initialize<Mtx>(batch_size, {-1.0}, ref);
-        square = gen_mtx<Mtx>(batch_size, x->get_size().at()[0],
-                              x->get_size().at()[0]);
+        square = gen_mtx<Mtx>(batch_size, x->get_common_size()[0],
+                              x->get_common_size()[0]);
         dx = Mtx::create(exec);
         dx->copy_from(x.get());
         dc_x = ComplexMtx::create(exec);
@@ -212,7 +212,7 @@ TEST_F(BatchMultiVector, ComputeNorm2SingleIsEquivalentToRef)
 {
     set_up_vector_data(1);
     auto norm_size =
-        gko::batch_dim<>(batch_size, gko::dim<2>{1, x->get_size().at()[1]});
+        gko::batch_dim<2>(batch_size, gko::dim<2>{1, x->get_common_size()[1]});
     auto norm_expected = NormVector::create(this->ref, norm_size);
     auto dnorm = NormVector::create(this->exec, norm_size);
 
@@ -227,7 +227,7 @@ TEST_F(BatchMultiVector, ComputeNorm2IsEquivalentToRef)
 {
     set_up_vector_data(20);
     auto norm_size =
-        gko::batch_dim<>(batch_size, gko::dim<2>{1, x->get_size().at()[1]});
+        gko::batch_dim<2>(batch_size, gko::dim<2>{1, x->get_common_size()[1]});
     auto norm_expected = NormVector::create(this->ref, norm_size);
     auto dnorm = NormVector::create(this->exec, norm_size);
 
@@ -242,7 +242,7 @@ TEST_F(BatchMultiVector, ComputeDotIsEquivalentToRef)
 {
     set_up_vector_data(20);
     auto dot_size =
-        gko::batch_dim<>(batch_size, gko::dim<2>{1, x->get_size().at()[1]});
+        gko::batch_dim<2>(batch_size, gko::dim<2>{1, x->get_common_size()[1]});
     auto dot_expected = Mtx::create(this->ref, dot_size);
     auto ddot = Mtx::create(this->exec, dot_size);
 
@@ -257,7 +257,7 @@ TEST_F(BatchMultiVector, ComputeDotSingleIsEquivalentToRef)
 {
     set_up_vector_data(1);
     auto dot_size =
-        gko::batch_dim<>(batch_size, gko::dim<2>{1, x->get_size().at()[1]});
+        gko::batch_dim<2>(batch_size, gko::dim<2>{1, x->get_common_size()[1]});
     auto dot_expected = Mtx::create(this->ref, dot_size);
     auto ddot = Mtx::create(this->exec, dot_size);
 

From b28df4f23e12b613593ed1ec9ee4722c58ac2ba7 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Thu, 20 Jul 2023 15:07:18 +0200
Subject: [PATCH 123/583] Use ptr_param<>

---
 .../ginkgo/core/base/batch_multi_vector.hpp   | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index 143de27335b..47dbe6078f5 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -104,7 +104,7 @@ class BatchMultiVector
      * @param other  The other matrix whose configuration needs to copied.
      */
     static std::unique_ptr<BatchMultiVector> create_with_config_of(
-        const BatchMultiVector* other)
+        ptr_param<const BatchMultiVector> other)
     {
         // De-referencing `other` before calling the functions (instead of
         // using operator `->`) is currently required to be compatible with
@@ -292,7 +292,7 @@ class BatchMultiVector
      * of alpha (the number of columns of alpha has to match the number of
      * columns of the matrix).
      */
-    void scale(const BatchMultiVector<ValueType>* alpha)
+    void scale(ptr_param<const BatchMultiVector<ValueType>> alpha)
     {
         auto exec = this->get_executor();
         this->scale_impl(make_temporary_clone(exec, alpha).get());
@@ -308,8 +308,8 @@ class BatchMultiVector
      * vector).
      * @param b  a matrix of the same dimension as this
      */
-    void add_scaled(const BatchMultiVector<ValueType>* alpha,
-                    const BatchMultiVector<ValueType>* b)
+    void add_scaled(ptr_param<const BatchMultiVector<ValueType>> alpha,
+                    ptr_param<const BatchMultiVector<ValueType>> b)
     {
         auto exec = this->get_executor();
         this->add_scaled_impl(make_temporary_clone(exec, alpha).get(),
@@ -328,9 +328,9 @@ class BatchMultiVector
      * @param beta  Scalar(s), of the same size as alpha, to multiply this
      * matrix.
      */
-    void add_scale(const BatchMultiVector<ValueType>* alpha,
-                   const BatchMultiVector<ValueType>* a,
-                   const BatchMultiVector<ValueType>* beta);
+    void add_scale(ptr_param<const BatchMultiVector<ValueType>> alpha,
+                   ptr_param<const BatchMultiVector<ValueType>> a,
+                   ptr_param<const BatchMultiVector<ValueType>> beta);
 
     /**
      * Computes the column-wise dot product of each matrix in this batch and its
@@ -342,8 +342,8 @@ class BatchMultiVector
      * product (the number of column in the vector must match the number of
      * columns of this)
      */
-    void compute_dot(const BatchMultiVector<ValueType>* b,
-                     BatchMultiVector<ValueType>* result) const
+    void compute_dot(ptr_param<const BatchMultiVector<ValueType>> b,
+                     ptr_param<BatchMultiVector<ValueType>> result) const
     {
         auto exec = this->get_executor();
         this->compute_dot_impl(make_temporary_clone(exec, b).get(),
@@ -358,7 +358,7 @@ class BatchMultiVector
      *                of columns of this)
      */
     void compute_norm2(
-        BatchMultiVector<remove_complex<ValueType>>* result) const
+        ptr_param<BatchMultiVector<remove_complex<ValueType>>> result) const
     {
         auto exec = this->get_executor();
         this->compute_norm2_impl(make_temporary_clone(exec, result).get());

From 3384a6d10422682f17c23254852d6a13f99616d7 Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Thu, 20 Jul 2023 13:53:06 +0000
Subject: [PATCH 124/583] Format files

Co-authored-by: Pratik Nayak <pratikvn@pm.me>
---
 .../base/batch_multi_vector_kernels.hpp.inc   | 34 ++++++++-----------
 core/base/batch_multi_vector_kernels.hpp      |  6 ++--
 core/test/base/batch_dim.cpp                  |  6 ++--
 cuda/base/batch_struct.hpp                    | 10 +++---
 dpcpp/base/batch_multi_vector_kernels.dp.cpp  |  2 +-
 dpcpp/base/batch_multi_vector_kernels.hpp.inc |  1 -
 dpcpp/base/batch_struct.hpp                   |  6 ++--
 hip/base/batch_struct.hip.hpp                 | 10 +++---
 include/ginkgo/ginkgo.hpp                     |  1 +
 .../base/batch_multi_vector_kernels.hpp.inc   |  1 -
 reference/base/batch_struct.hpp               | 12 +++----
 test/base/batch_multi_vector_kernels.cpp      |  4 +--
 12 files changed, 41 insertions(+), 52 deletions(-)

diff --git a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
index 6e9dc57681a..fa6270a0b60 100644
--- a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
+++ b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
@@ -57,9 +57,9 @@ __device__ __forceinline__ void scale(
 
 template <typename ValueType>
 __global__
-    __launch_bounds__(default_block_size, sm_multiplier) void scale_kernel(
-        const gko::batch_multi_vector::UniformBatch<const ValueType> alpha,
-        const gko::batch_multi_vector::UniformBatch<ValueType> x)
+__launch_bounds__(default_block_size, sm_multiplier) void scale_kernel(
+    const gko::batch_multi_vector::UniformBatch<const ValueType> alpha,
+    const gko::batch_multi_vector::UniformBatch<ValueType> x)
 {
     for (size_type ibatch = blockIdx.x; ibatch < x.num_batch_entries;
          ibatch += gridDim.x) {
@@ -93,10 +93,10 @@ __device__ __forceinline__ void add_scaled(
 
 template <typename ValueType>
 __global__
-    __launch_bounds__(default_block_size, sm_multiplier) void add_scaled_kernel(
-        const gko::batch_multi_vector::UniformBatch<const ValueType> alpha,
-        const gko::batch_multi_vector::UniformBatch<const ValueType> x,
-        const gko::batch_multi_vector::UniformBatch<ValueType> y)
+__launch_bounds__(default_block_size, sm_multiplier) void add_scaled_kernel(
+    const gko::batch_multi_vector::UniformBatch<const ValueType> alpha,
+    const gko::batch_multi_vector::UniformBatch<const ValueType> x,
+    const gko::batch_multi_vector::UniformBatch<ValueType> y)
 {
     for (size_type ibatch = blockIdx.x; ibatch < x.num_batch_entries;
          ibatch += gridDim.x) {
@@ -243,15 +243,11 @@ __device__ __forceinline__ void compute_norm2(
 
 
 template <typename ValueType>
-__global__ __launch_bounds__(
-    default_block_size,
-    sm_multiplier) void compute_norm2_kernel(const gko::batch_multi_vector::
-                                                 UniformBatch<const ValueType>
-                                                     x,
-                                             const gko::batch_multi_vector::
-                                                 UniformBatch<
-                                                     remove_complex<ValueType>>
-                                                     result)
+__global__
+__launch_bounds__(default_block_size, sm_multiplier) void compute_norm2_kernel(
+    const gko::batch_multi_vector::UniformBatch<const ValueType> x,
+    const gko::batch_multi_vector::UniformBatch<remove_complex<ValueType>>
+        result)
 {
     for (size_type ibatch = blockIdx.x; ibatch < x.num_batch_entries;
          ibatch += gridDim.x) {
@@ -284,9 +280,9 @@ __device__ __forceinline__ void copy(
 
 template <typename ValueType>
 __global__
-    __launch_bounds__(default_block_size, sm_multiplier) void copy_kernel(
-        const gko::batch_multi_vector::UniformBatch<const ValueType> src,
-        const gko::batch_multi_vector::UniformBatch<ValueType> dst)
+__launch_bounds__(default_block_size, sm_multiplier) void copy_kernel(
+    const gko::batch_multi_vector::UniformBatch<const ValueType> src,
+    const gko::batch_multi_vector::UniformBatch<ValueType> dst)
 {
     for (size_type ibatch = blockIdx.x; ibatch < src.num_batch_entries;
          ibatch += gridDim.x) {
diff --git a/core/base/batch_multi_vector_kernels.hpp b/core/base/batch_multi_vector_kernels.hpp
index 7e7f9c3bb37..28c7b87de10 100644
--- a/core/base/batch_multi_vector_kernels.hpp
+++ b/core/base/batch_multi_vector_kernels.hpp
@@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#ifndef GKO_CORE_MATRIX_BATCH_MULTI_VECTOR_KERNELS_HPP_
-#define GKO_CORE_MATRIX_BATCH_MULTI_VECTOR_KERNELS_HPP_
+#ifndef GKO_CORE_BASE_BATCH_MULTI_VECTOR_KERNELS_HPP_
+#define GKO_CORE_BASE_BATCH_MULTI_VECTOR_KERNELS_HPP_
 
 
 #include <ginkgo/core/base/batch_multi_vector.hpp>
@@ -101,4 +101,4 @@ GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(batch_multi_vector,
 }  // namespace gko
 
 
-#endif  // GKO_CORE_MATRIX_BATCH_MULTI_VECTOR_KERNELS_HPP_
+#endif  // GKO_CORE_BASE_BATCH_MULTI_VECTOR_KERNELS_HPP_
diff --git a/core/test/base/batch_dim.cpp b/core/test/base/batch_dim.cpp
index 40743656ca3..71b954264c3 100644
--- a/core/test/base/batch_dim.cpp
+++ b/core/test/base/batch_dim.cpp
@@ -30,13 +30,13 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include <memory>
+#include <ginkgo/core/base/batch_dim.hpp>
 
 
-#include <gtest/gtest.h>
+#include <memory>
 
 
-#include <ginkgo/core/base/batch_dim.hpp>
+#include <gtest/gtest.h>
 
 
 TEST(BatchDim, ConstructsCorrectUniformObject)
diff --git a/cuda/base/batch_struct.hpp b/cuda/base/batch_struct.hpp
index 9d4eb436c16..9084cddfdfa 100644
--- a/cuda/base/batch_struct.hpp
+++ b/cuda/base/batch_struct.hpp
@@ -30,17 +30,15 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#ifndef GKO_CUDA_MATRIX_BATCH_STRUCT_HPP_
-#define GKO_CUDA_MATRIX_BATCH_STRUCT_HPP_
-
-
-#include "core/base/batch_struct.hpp"
+#ifndef GKO_CUDA_BASE_BATCH_STRUCT_HPP_
+#define GKO_CUDA_BASE_BATCH_STRUCT_HPP_
 
 
 #include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/math.hpp>
 
 
+#include "core/base/batch_struct.hpp"
 #include "cuda/base/config.hpp"
 #include "cuda/base/types.hpp"
 
@@ -111,4 +109,4 @@ maybe_null_batch_struct(const BatchMultiVector<ValueType>* const op)
 }  // namespace gko
 
 
-#endif  // GKO_CUDA_MATRIX_BATCH_STRUCT_HPP_
+#endif  // GKO_CUDA_BASE_BATCH_STRUCT_HPP_
diff --git a/dpcpp/base/batch_multi_vector_kernels.dp.cpp b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
index 88cdb1d6e6f..64343e02fad 100644
--- a/dpcpp/base/batch_multi_vector_kernels.dp.cpp
+++ b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
@@ -34,6 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <algorithm>
+#include <dpcpp/matrix/batch_struct.hpp>
 
 
 #include <ginkgo/core/base/array.hpp>
@@ -42,7 +43,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include "core/components/prefix_sum_kernels.hpp"
-#include "dpcpp/matrix/batch_struct.hpp"
 
 
 namespace gko {
diff --git a/dpcpp/base/batch_multi_vector_kernels.hpp.inc b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
index 07d6d97ff0a..c5e2848e1d6 100644
--- a/dpcpp/base/batch_multi_vector_kernels.hpp.inc
+++ b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
@@ -30,7 +30,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-
 template <typename ValueType>
 __dpct_inline__ void scale_kernel(
     const gko::batch_dense::BatchEntry<const ValueType>& alpha,
diff --git a/dpcpp/base/batch_struct.hpp b/dpcpp/base/batch_struct.hpp
index 86534768c2b..bae0d43f6c0 100644
--- a/dpcpp/base/batch_struct.hpp
+++ b/dpcpp/base/batch_struct.hpp
@@ -34,13 +34,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define GKO_DPCPP_BASE_BATCH_STRUCT_HPP_
 
 
-#include "core/base/batch_struct.hpp"
-
-
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/matrix/batch_multi_vector.hpp>
 
 
+#include "core/base/batch_struct.hpp"
 #include "dpcpp/base/config.hpp"
 
 
@@ -111,4 +109,4 @@ maybe_null_batch_struct(const BatchMultiVector<ValueType>* const op)
 }  // namespace gko
 
 
-#endif  // GKO_DPCPP_MATRIX_BATCH_STRUCT_HPP_
+#endif  // GKO_DPCPP_BASE_BATCH_STRUCT_HPP_
diff --git a/hip/base/batch_struct.hip.hpp b/hip/base/batch_struct.hip.hpp
index d796cdcdb37..f76e4fa8a79 100644
--- a/hip/base/batch_struct.hip.hpp
+++ b/hip/base/batch_struct.hip.hpp
@@ -30,17 +30,15 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#ifndef GKO_HIP_MATRIX_BATCH_STRUCT_HIP_HPP_
-#define GKO_HIP_MATRIX_BATCH_STRUCT_HIP_HPP_
-
-
-#include "core/base/batch_struct.hpp"
+#ifndef GKO_HIP_BASE_BATCH_STRUCT_HIP_HPP_
+#define GKO_HIP_BASE_BATCH_STRUCT_HIP_HPP_
 
 
 #include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/math.hpp>
 
 
+#include "core/base/batch_struct.hpp"
 #include "hip/base/config.hip.hpp"
 #include "hip/base/types.hip.hpp"
 
@@ -111,4 +109,4 @@ maybe_null_batch_struct(const BatchMultiVector<ValueType>* const op)
 }  // namespace gko
 
 
-#endif  // GKO_HIP_MATRIX_BATCH_STRUCT_HIP_HPP_
+#endif  // GKO_HIP_BASE_BATCH_STRUCT_HIP_HPP_
diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp
index 8a88bf003f8..eebb31772ea 100644
--- a/include/ginkgo/ginkgo.hpp
+++ b/include/ginkgo/ginkgo.hpp
@@ -39,6 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/abstract_factory.hpp>
 #include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/batch_dim.hpp>
 #include <ginkgo/core/base/batch_lin_op_helpers.hpp>
 #include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/combination.hpp>
diff --git a/reference/base/batch_multi_vector_kernels.hpp.inc b/reference/base/batch_multi_vector_kernels.hpp.inc
index a793fe030f9..a80415572c2 100644
--- a/reference/base/batch_multi_vector_kernels.hpp.inc
+++ b/reference/base/batch_multi_vector_kernels.hpp.inc
@@ -30,7 +30,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-
 template <typename ValueType>
 inline void scale_kernel(
     const gko::batch_multi_vector::BatchEntry<const ValueType>& alpha,
diff --git a/reference/base/batch_struct.hpp b/reference/base/batch_struct.hpp
index 056bb575f8a..fec5b4f8803 100644
--- a/reference/base/batch_struct.hpp
+++ b/reference/base/batch_struct.hpp
@@ -30,17 +30,17 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#ifndef GKO_REFERENCE_MATRIX_BATCH_STRUCT_HPP_
-#define GKO_REFERENCE_MATRIX_BATCH_STRUCT_HPP_
-
-
-#include "core/base/batch_struct.hpp"
+#ifndef GKO_REFERENCE_BASE_BATCH_STRUCT_HPP_
+#define GKO_REFERENCE_BASE_BATCH_STRUCT_HPP_
 
 
 #include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/math.hpp>
 
 
+#include "core/base/batch_struct.hpp"
+
+
 namespace gko {
 namespace kernels {
 /**
@@ -111,4 +111,4 @@ maybe_null_batch_struct(const BatchMultiVector<ValueType>* const op)
 }  // namespace gko
 
 
-#endif  // GKO_REFERENCE_MATRIX_BATCH_STRUCT_HPP_
+#endif  // GKO_REFERENCE_BASE_BATCH_STRUCT_HPP_
diff --git a/test/base/batch_multi_vector_kernels.cpp b/test/base/batch_multi_vector_kernels.cpp
index 07bdf5899e9..fe5fa0ed85f 100644
--- a/test/base/batch_multi_vector_kernels.cpp
+++ b/test/base/batch_multi_vector_kernels.cpp
@@ -30,7 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include "core/base/batch_multi_vector_kernels.hpp"
+#include <ginkgo/core/base/batch_multi_vector.hpp>
 
 
 #include <random>
@@ -40,10 +40,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/array.hpp>
-#include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/math.hpp>
 
 
+#include "core/base/batch_multi_vector_kernels.hpp"
 #include "core/test/utils.hpp"
 #include "core/test/utils/batch_helpers.hpp"
 #include "test/utils/executor.hpp"

From 8f5e9c31d88dfd1669cb5eea0a5dd5622ddeac91 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Fri, 21 Jul 2023 09:46:46 +0200
Subject: [PATCH 125/583] dpcpp kernel updates

Co-authored-by: Phuong Nguyen<phuong.nguyen@icl.utk.edu>
---
 core/base/batch_multi_vector.cpp              |   7 +-
 dpcpp/base/batch_multi_vector_kernels.dp.cpp  |   9 +-
 dpcpp/base/batch_multi_vector_kernels.hpp.inc | 115 ++++++++++--------
 dpcpp/base/batch_struct.hpp                   |   2 +-
 test/base/batch_multi_vector_kernels.cpp      |  29 ++---
 5 files changed, 90 insertions(+), 72 deletions(-)

diff --git a/core/base/batch_multi_vector.cpp b/core/base/batch_multi_vector.cpp
index 0a3612ab205..0c2f1e0c1ba 100644
--- a/core/base/batch_multi_vector.cpp
+++ b/core/base/batch_multi_vector.cpp
@@ -51,6 +51,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 namespace gko {
 namespace batch_multi_vector {
+namespace {
 
 
 GKO_REGISTER_OPERATION(scale, batch_multi_vector::scale);
@@ -60,6 +61,7 @@ GKO_REGISTER_OPERATION(compute_norm2, batch_multi_vector::compute_norm2);
 GKO_REGISTER_OPERATION(copy, batch_multi_vector::copy);
 
 
+}  // namespace
 }  // namespace batch_multi_vector
 
 
@@ -248,9 +250,8 @@ void BatchMultiVector<ValueType>::write(std::vector<mat_data32>& data) const
 }
 
 
-#define GKO_DECLARE_BATCH_MULTI_VECTOR_MATRIX(_type) \
-    class BatchMultiVector<_type>
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_MATRIX);
+#define GKO_DECLARE_BATCH_MULTI_VECTOR(_type) class BatchMultiVector<_type>
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR);
 
 
 }  // namespace gko
diff --git a/dpcpp/base/batch_multi_vector_kernels.dp.cpp b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
index 64343e02fad..74c3b842297 100644
--- a/dpcpp/base/batch_multi_vector_kernels.dp.cpp
+++ b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
@@ -33,16 +33,21 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/base/batch_multi_vector_kernels.hpp"
 
 
-#include <algorithm>
-#include <dpcpp/matrix/batch_struct.hpp>
+#include <CL/sycl.hpp>
 
 
 #include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/base/range_accessors.hpp>
 
 
 #include "core/components/prefix_sum_kernels.hpp"
+#include "dpcpp/base/batch_struct.hpp"
+#include "dpcpp/base/config.hpp"
+#include "dpcpp/base/dim3.dp.hpp"
+#include "dpcpp/base/dpct.hpp"
+#include "dpcpp/base/helper.hpp"
 
 
 namespace gko {
diff --git a/dpcpp/base/batch_multi_vector_kernels.hpp.inc b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
index c5e2848e1d6..d881586e362 100644
--- a/dpcpp/base/batch_multi_vector_kernels.hpp.inc
+++ b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
@@ -32,8 +32,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 template <typename ValueType>
 __dpct_inline__ void scale_kernel(
-    const gko::batch_dense::BatchEntry<const ValueType>& alpha,
-    const gko::batch_dense::BatchEntry<ValueType>& x,
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& alpha,
+    const gko::batch_multi_vector::BatchEntry<ValueType>& x,
     sycl::nd_item<3>& item_ct1)
 {
     const int max_li = x.num_rows * x.num_rhs;
@@ -53,84 +53,95 @@ __dpct_inline__ void scale_kernel(
 }
 
 
-/**
- * Adds a scaled vector to another.
- *
- * @param num_rows  Common length of both vectors.
- * @param alpha  Scaling factor.
- * @param[in] x  Vector to scale and add.
- * @param[in,out] y  Vector to add to.
- */
 template <typename ValueType>
-__dpct_inline__ void add_scaled_kernel(const int num_rows,
-                                       const ValueType alpha,
-                                       const ValueType* const __restrict__ x,
-                                       ValueType* const __restrict__ y,
-                                       sycl::nd_item<3> item_ct1)
+__dpct_inline__ void add_scaled_kernel(
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& alpha,
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
+    const gko::batch_multi_vector::BatchEntry<ValueType>& y,
+    sycl::nd_item<3>& item_ct1)
 {
-    for (int li = item_ct1.get_local_linear_id(); li < num_rows;
-         li += item_ct1.get_local_range().size()) {
-        y[li] += alpha * x[li];
+    const int max_li = x.num_rows * x.num_rhs;
+    for (int li = item_ct1.get_local_id(2); li < max_li;
+         li += item_ct1.get_local_range(2)) {
+        const int row = li / x.num_rhs;
+        const int col = li % x.num_rhs;
+
+        if (alpha.num_rhs == 1) {
+            y.values[row * y.stride + col] +=
+                alpha.values[0] * x.values[row * x.stride + col];
+        } else {
+            y.values[row * y.stride + col] +=
+                alpha.values[col] * x.values[row * x.stride + col];
+        }
     }
 }
 
 
 template <typename ValueType>
 __dpct_inline__ void compute_dot_product_kernel(
-    const int num_rows, const ValueType* const __restrict__ x,
-    const ValueType* const __restrict__ y, ValueType& result,
-    sycl::nd_item<3> item_ct1)
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& y,
+    const gko::batch_multi_vector::BatchEntry<ValueType>& result,
+    sycl::nd_item<3>& item_ct1)
 {
-    const auto group = item_ct1.get_group();
-    const auto group_size = item_ct1.get_local_range().size();
-    const auto tid = item_ct1.get_local_linear_id();
+    const auto sg = item_ct1.get_sub_group();
+    const int sg_id = sg.get_group_id();
+    const int sg_size = sg.get_local_range().size();
+    const int num_sg = sg.get_group_range().size();
+
+    for (int rhs_index = sg_id; rhs_index < x.num_rhs; rhs_index += num_sg) {
+        ValueType val = zero<ValueType>();
+
+        for (int r = sg.get_local_id(); r < x.num_rows; r += sg_size) {
+            val += conj(x.values[r * x.stride + rhs_index]) *
+                   y.values[r * y.stride + rhs_index];
+        }
 
-    ValueType val = zero<ValueType>();
+        val = sycl::reduce_over_group(sg, val, sycl::plus<>());
 
-    for (int r = tid; r < num_rows; r += group_size) {
-        val += conj(x[r]) * y[r];
+        if (sg.get_local_id() == 0) {
+            result.values[rhs_index] = val;
+        }
     }
-    result = sycl::reduce_over_group(group, val, sycl::plus<>());
 }
 
 
 template <typename ValueType>
 __dpct_inline__ void compute_norm2_kernel(
-    const int num_rows, const ValueType* const __restrict__ x,
-    gko::remove_complex<ValueType>& result, sycl::nd_item<3> item_ct1)
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
+    const gko::batch_multi_vector::BatchEntry<remove_complex<ValueType>>&
+        result,
+    sycl::nd_item<3>& item_ct1)
 {
-    const auto group = item_ct1.get_group();
-    const auto group_size = item_ct1.get_local_range().size();
-    const auto tid = item_ct1.get_local_linear_id();
+    const auto sg = item_ct1.get_sub_group();
+    const int sg_id = sg.get_group_id();
+    const int sg_size = sg.get_local_range().size();
+    const int num_sg = sg.get_group_range().size();
 
     using real_type = typename gko::remove_complex<ValueType>;
-    real_type val = zero<real_type>();
+    for (int rhs_index = sg_id; rhs_index < x.num_rhs; rhs_index += num_sg) {
+        real_type val = zero<real_type>();
 
-    for (int r = tid; r < num_rows; r += group_size) {
-        val += squared_norm(x[r]);
-    }
+        for (int r = sg.get_local_id(); r < x.num_rows; r += sg_size)
+            val += squared_norm(x.values[r * x.stride + rhs_index]);
 
-    val = sycl::reduce_over_group(group, val, sycl::plus<>());
+        val = sycl::reduce_over_group(sg, val, sycl::plus<>());
 
-    result = sqrt(val);
+        if (sg.get_local_id() == 0) result.values[rhs_index] = sqrt(val);
+    }
 }
 
 
-/**
- * Copies the values of vector into another.
- *
- * @param num_rows  Length of vector.
- * @param in  Vector to copy from.
- * @param out  Vector to copy into.
- */
 template <typename ValueType>
-__dpct_inline__ void copy_kernel(const int num_rows,
-                                 const ValueType* const __restrict__ in,
-                                 ValueType* const __restrict__ out,
-                                 sycl::nd_item<3> item_ct1)
+__dpct_inline__ void copy_kernel(
+    const gko::batch_multi_vector::BatchEntry<const ValueType>& in,
+    const gko::batch_multi_vector::BatchEntry<ValueType>& out,
+    sycl::nd_item<3>& item_ct1)
 {
-    for (int iz = item_ct1.get_local_linear_id(); iz < num_rows;
+    for (int iz = item_ct1.get_local_linear_id(); iz < in.num_rows * in.num_rhs;
          iz += item_ct1.get_local_range().size()) {
-        out[iz] = in[iz];
+        const int i = iz / in.num_rhs;
+        const int j = iz % in.num_rhs;
+        out.values[i * out.stride + j] = in.values[i * in.stride + j];
     }
 }
diff --git a/dpcpp/base/batch_struct.hpp b/dpcpp/base/batch_struct.hpp
index bae0d43f6c0..16f0b528dda 100644
--- a/dpcpp/base/batch_struct.hpp
+++ b/dpcpp/base/batch_struct.hpp
@@ -34,8 +34,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define GKO_DPCPP_BASE_BATCH_STRUCT_HPP_
 
 
+#include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/math.hpp>
-#include <ginkgo/core/matrix/batch_multi_vector.hpp>
 
 
 #include "core/base/batch_struct.hpp"
diff --git a/test/base/batch_multi_vector_kernels.cpp b/test/base/batch_multi_vector_kernels.cpp
index fe5fa0ed85f..05ea67bee1d 100644
--- a/test/base/batch_multi_vector_kernels.cpp
+++ b/test/base/batch_multi_vector_kernels.cpp
@@ -33,6 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/batch_multi_vector.hpp>
 
 
+#include <memory>
 #include <random>
 
 
@@ -45,16 +46,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "core/base/batch_multi_vector_kernels.hpp"
 #include "core/test/utils.hpp"
+#include "core/test/utils/assertions.hpp"
 #include "core/test/utils/batch_helpers.hpp"
 #include "test/utils/executor.hpp"
 
 
 class BatchMultiVector : public CommonTestFixture {
 protected:
-    using vtype = double;
-    using Mtx = gko::BatchMultiVector<vtype>;
-    using NormVector = gko::BatchMultiVector<gko::remove_complex<vtype>>;
-    using ComplexMtx = gko::BatchMultiVector<std::complex<vtype>>;
+    using Mtx = gko::BatchMultiVector<value_type>;
+    using NormVector = gko::BatchMultiVector<gko::remove_complex<value_type>>;
+    using ComplexMtx = gko::BatchMultiVector<std::complex<value_type>>;
 
     BatchMultiVector() : rand_engine(15) {}
 
@@ -148,7 +149,7 @@ TEST_F(BatchMultiVector, SingleVectorAddScaledIsEquivalentToRef)
     x->add_scaled(alpha.get(), y.get());
     dx->add_scaled(dalpha.get(), dy.get());
 
-    GKO_ASSERT_BATCH_MTX_NEAR(dx, x, 1e-14);
+    GKO_ASSERT_BATCH_MTX_NEAR(dx, x, r<value_type>::value);
 }
 
 
@@ -159,7 +160,7 @@ TEST_F(BatchMultiVector, MultipleVectorAddScaledIsEquivalentToRef)
     x->add_scaled(alpha.get(), y.get());
     dx->add_scaled(dalpha.get(), dy.get());
 
-    GKO_ASSERT_BATCH_MTX_NEAR(dx, x, 1e-14);
+    GKO_ASSERT_BATCH_MTX_NEAR(dx, x, 5 * r<value_type>::value);
 }
 
 
@@ -171,7 +172,7 @@ TEST_F(BatchMultiVector,
     x->add_scaled(alpha.get(), y.get());
     dx->add_scaled(dalpha.get(), dy.get());
 
-    GKO_ASSERT_BATCH_MTX_NEAR(dx, x, 1e-14);
+    GKO_ASSERT_BATCH_MTX_NEAR(dx, x, 5 * r<value_type>::value);
 }
 
 
@@ -182,7 +183,7 @@ TEST_F(BatchMultiVector, SingleVectorScaleIsEquivalentToRef)
     x->scale(alpha.get());
     dx->scale(dalpha.get());
 
-    GKO_ASSERT_BATCH_MTX_NEAR(dx, x, 1e-14);
+    GKO_ASSERT_BATCH_MTX_NEAR(dx, x, 5 * r<value_type>::value);
 }
 
 
@@ -193,7 +194,7 @@ TEST_F(BatchMultiVector, MultipleVectorScaleIsEquivalentToRef)
     x->scale(alpha.get());
     dx->scale(dalpha.get());
 
-    GKO_ASSERT_BATCH_MTX_NEAR(dx, x, 1e-14);
+    GKO_ASSERT_BATCH_MTX_NEAR(dx, x, 5 * r<value_type>::value);
 }
 
 
@@ -204,7 +205,7 @@ TEST_F(BatchMultiVector, MultipleVectorScaleWithDifferentAlphaIsEquivalentToRef)
     x->scale(alpha.get());
     dx->scale(dalpha.get());
 
-    GKO_ASSERT_BATCH_MTX_NEAR(dx, x, 1e-14);
+    GKO_ASSERT_BATCH_MTX_NEAR(dx, x, 5 * r<value_type>::value);
 }
 
 
@@ -219,7 +220,7 @@ TEST_F(BatchMultiVector, ComputeNorm2SingleIsEquivalentToRef)
     x->compute_norm2(norm_expected.get());
     dx->compute_norm2(dnorm.get());
 
-    GKO_ASSERT_BATCH_MTX_NEAR(norm_expected, dnorm, 1e-14);
+    GKO_ASSERT_BATCH_MTX_NEAR(norm_expected, dnorm, 5 * r<value_type>::value);
 }
 
 
@@ -234,7 +235,7 @@ TEST_F(BatchMultiVector, ComputeNorm2IsEquivalentToRef)
     x->compute_norm2(norm_expected.get());
     dx->compute_norm2(dnorm.get());
 
-    GKO_ASSERT_BATCH_MTX_NEAR(norm_expected, dnorm, 1e-14);
+    GKO_ASSERT_BATCH_MTX_NEAR(norm_expected, dnorm, 5 * r<value_type>::value);
 }
 
 
@@ -249,7 +250,7 @@ TEST_F(BatchMultiVector, ComputeDotIsEquivalentToRef)
     x->compute_dot(y.get(), dot_expected.get());
     dx->compute_dot(dy.get(), ddot.get());
 
-    GKO_ASSERT_BATCH_MTX_NEAR(dot_expected, ddot, 1e-14);
+    GKO_ASSERT_BATCH_MTX_NEAR(dot_expected, ddot, 5 * r<value_type>::value);
 }
 
 
@@ -264,7 +265,7 @@ TEST_F(BatchMultiVector, ComputeDotSingleIsEquivalentToRef)
     x->compute_dot(y.get(), dot_expected.get());
     dx->compute_dot(dy.get(), ddot.get());
 
-    GKO_ASSERT_BATCH_MTX_NEAR(dot_expected, ddot, 1e-14);
+    GKO_ASSERT_BATCH_MTX_NEAR(dot_expected, ddot, 5 * r<value_type>::value);
 }
 
 

From 33b726dc77f6a3d77a8b3e963387d21d119175e5 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Fri, 21 Jul 2023 10:39:35 +0200
Subject: [PATCH 126/583] Unify CUDA/HIP and enable streams

---
 ...batch_multi_vector_kernel_launcher.hpp.inc | 117 ++++++++++++++++++
 cuda/base/batch_multi_vector_kernels.cu       |  84 +------------
 hip/base/batch_multi_vector_kernels.hip.cpp   |  87 +------------
 3 files changed, 121 insertions(+), 167 deletions(-)
 create mode 100644 common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc

diff --git a/common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc b/common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc
new file mode 100644
index 00000000000..24cd24d1bf7
--- /dev/null
+++ b/common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc
@@ -0,0 +1,117 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+
+template <typename ValueType>
+void scale(std::shared_ptr<const DefaultExecutor> exec,
+           const BatchMultiVector<ValueType>* const alpha,
+           BatchMultiVector<ValueType>* const x)
+{
+    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
+    const auto alpha_ub = get_batch_struct(alpha);
+    const auto x_ub = get_batch_struct(x);
+    scale_kernel<<<num_blocks, default_block_size, 0, exec->get_stream()>>>(
+        alpha_ub, x_ub);
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_MULTI_VECTOR_SCALE_KERNEL);
+
+
+template <typename ValueType>
+void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
+                const BatchMultiVector<ValueType>* const alpha,
+                const BatchMultiVector<ValueType>* const x,
+                BatchMultiVector<ValueType>* const y)
+{
+    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
+    const size_type nrhs = x->get_common_size()[1];
+    const auto alpha_ub = get_batch_struct(alpha);
+    const auto x_ub = get_batch_struct(x);
+    const auto y_ub = get_batch_struct(y);
+    add_scaled_kernel<<<num_blocks, default_block_size, 0,
+                        exec->get_stream()>>>(alpha_ub, x_ub, y_ub);
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_MULTI_VECTOR_ADD_SCALED_KERNEL);
+
+
+template <typename ValueType>
+void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
+                 const BatchMultiVector<ValueType>* x,
+                 const BatchMultiVector<ValueType>* y,
+                 BatchMultiVector<ValueType>* result)
+{
+    const auto num_blocks = x->get_num_batch_entries();
+    const auto num_rhs = x->get_common_size()[1];
+    const auto x_ub = get_batch_struct(x);
+    const auto y_ub = get_batch_struct(y);
+    const auto res_ub = get_batch_struct(result);
+    compute_dot_product_kernel<<<num_blocks, default_block_size, 0,
+                                 exec->get_stream()>>>(x_ub, y_ub, res_ub);
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_DOT_KERNEL);
+
+
+template <typename ValueType>
+void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
+                   const BatchMultiVector<ValueType>* const x,
+                   BatchMultiVector<remove_complex<ValueType>>* const result)
+{
+    const auto num_blocks = x->get_num_batch_entries();
+    const auto num_rhs = x->get_common_size()[1];
+    const auto x_ub = get_batch_struct(x);
+    const auto res_ub = get_batch_struct(result);
+    compute_norm2_kernel<<<num_blocks, default_block_size, 0,
+                           exec->get_stream()>>>(x_ub, res_ub);
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_NORM2_KERNEL);
+
+
+template <typename ValueType>
+void copy(std::shared_ptr<const DefaultExecutor> exec,
+          const BatchMultiVector<ValueType>* x,
+          BatchMultiVector<ValueType>* result)
+{
+    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
+    const auto result_ub = get_batch_struct(result);
+    const auto x_ub = get_batch_struct(x);
+    copy_kernel<<<num_blocks, default_block_size, 0, exec->get_stream()>>>(
+        x_ub, result_ub);
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_COPY_KERNEL);
diff --git a/cuda/base/batch_multi_vector_kernels.cu b/cuda/base/batch_multi_vector_kernels.cu
index 8bfb6fc0167..e7c57111463 100644
--- a/cuda/base/batch_multi_vector_kernels.cu
+++ b/cuda/base/batch_multi_vector_kernels.cu
@@ -62,90 +62,10 @@ namespace batch_multi_vector {
 constexpr auto default_block_size = 256;
 constexpr int sm_multiplier = 4;
 
-
+// NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES
 #include "common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc"
 
-
-template <typename ValueType>
-void scale(std::shared_ptr<const DefaultExecutor> exec,
-           const BatchMultiVector<ValueType>* const alpha,
-           BatchMultiVector<ValueType>* const x)
-{
-    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
-    const auto alpha_ub = get_batch_struct(alpha);
-    const auto x_ub = get_batch_struct(x);
-    scale_kernel<<<num_blocks, default_block_size>>>(alpha_ub, x_ub);
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_MULTI_VECTOR_SCALE_KERNEL);
-
-
-template <typename ValueType>
-void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
-                const BatchMultiVector<ValueType>* const alpha,
-                const BatchMultiVector<ValueType>* const x,
-                BatchMultiVector<ValueType>* const y)
-{
-    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
-    const size_type nrhs = x->get_common_size()[1];
-    const auto alpha_ub = get_batch_struct(alpha);
-    const auto x_ub = get_batch_struct(x);
-    const auto y_ub = get_batch_struct(y);
-    add_scaled_kernel<<<num_blocks, default_block_size>>>(alpha_ub, x_ub, y_ub);
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_MULTI_VECTOR_ADD_SCALED_KERNEL);
-
-
-template <typename ValueType>
-void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
-                 const BatchMultiVector<ValueType>* x,
-                 const BatchMultiVector<ValueType>* y,
-                 BatchMultiVector<ValueType>* result)
-{
-    const auto num_blocks = x->get_num_batch_entries();
-    const auto num_rhs = x->get_common_size()[1];
-    const auto x_ub = get_batch_struct(x);
-    const auto y_ub = get_batch_struct(y);
-    const auto res_ub = get_batch_struct(result);
-    compute_dot_product_kernel<<<num_blocks, default_block_size>>>(x_ub, y_ub,
-                                                                   res_ub);
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_DOT_KERNEL);
-
-
-template <typename ValueType>
-void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
-                   const BatchMultiVector<ValueType>* const x,
-                   BatchMultiVector<remove_complex<ValueType>>* const result)
-{
-    const auto num_blocks = x->get_num_batch_entries();
-    const auto num_rhs = x->get_common_size()[1];
-    const auto x_ub = get_batch_struct(x);
-    const auto res_ub = get_batch_struct(result);
-    compute_norm2_kernel<<<num_blocks, default_block_size>>>(x_ub, res_ub);
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_NORM2_KERNEL);
-
-
-template <typename ValueType>
-void copy(std::shared_ptr<const DefaultExecutor> exec,
-          const BatchMultiVector<ValueType>* x,
-          BatchMultiVector<ValueType>* result)
-{
-    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
-    const auto result_ub = get_batch_struct(result);
-    const auto x_ub = get_batch_struct(x);
-    copy_kernel<<<num_blocks, default_block_size>>>(x_ub, result_ub);
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_COPY_KERNEL);
+#include "common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc"
 
 
 }  // namespace batch_multi_vector
diff --git a/hip/base/batch_multi_vector_kernels.hip.cpp b/hip/base/batch_multi_vector_kernels.hip.cpp
index 50f8593ffec..a8f0f8a7cd6 100644
--- a/hip/base/batch_multi_vector_kernels.hip.cpp
+++ b/hip/base/batch_multi_vector_kernels.hip.cpp
@@ -66,93 +66,10 @@ constexpr auto default_block_size = 256;
 constexpr int sm_multiplier = 4;
 
 
+// NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES
 #include "common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc"
 
-
-template <typename ValueType>
-void scale(std::shared_ptr<const DefaultExecutor> exec,
-           const BatchMultiVector<ValueType>* const alpha,
-           BatchMultiVector<ValueType>* const x)
-{
-    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
-    const auto alpha_ub = get_batch_struct(alpha);
-    const auto x_ub = get_batch_struct(x);
-    hipLaunchKernelGGL(scale_kernel, dim3(num_blocks), dim3(default_block_size),
-                       0, 0, alpha_ub, x_ub);
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_MULTI_VECTOR_SCALE_KERNEL);
-
-
-template <typename ValueType>
-void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
-                const BatchMultiVector<ValueType>* const alpha,
-                const BatchMultiVector<ValueType>* const x,
-                BatchMultiVector<ValueType>* const y)
-{
-    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
-    const size_type nrhs = x->get_common_size()[1];
-    const auto alpha_ub = get_batch_struct(alpha);
-    const auto x_ub = get_batch_struct(x);
-    const auto y_ub = get_batch_struct(y);
-    hipLaunchKernelGGL(add_scaled_kernel, dim3(num_blocks),
-                       dim3(default_block_size), 0, 0, alpha_ub, x_ub, y_ub);
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_MULTI_VECTOR_ADD_SCALED_KERNEL);
-
-
-template <typename ValueType>
-void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
-                 const BatchMultiVector<ValueType>* x,
-                 const BatchMultiVector<ValueType>* y,
-                 BatchMultiVector<ValueType>* result)
-{
-    const auto num_blocks = x->get_num_batch_entries();
-    const auto num_rhs = x->get_common_size()[1];
-    const auto x_ub = get_batch_struct(x);
-    const auto y_ub = get_batch_struct(y);
-    const auto res_ub = get_batch_struct(result);
-    hipLaunchKernelGGL(compute_dot_product_kernel, dim3(num_blocks),
-                       dim3(default_block_size), 0, 0, x_ub, y_ub, res_ub);
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_DOT_KERNEL);
-
-
-template <typename ValueType>
-void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
-                   const BatchMultiVector<ValueType>* const x,
-                   BatchMultiVector<remove_complex<ValueType>>* const result)
-{
-    const auto num_blocks = x->get_num_batch_entries();
-    const auto num_rhs = x->get_common_size()[1];
-    const auto x_ub = get_batch_struct(x);
-    const auto res_ub = get_batch_struct(result);
-    hipLaunchKernelGGL(compute_norm2_kernel, dim3(num_blocks),
-                       dim3(default_block_size), 0, 0, x_ub, res_ub);
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_NORM2_KERNEL);
-
-
-template <typename ValueType>
-void copy(std::shared_ptr<const DefaultExecutor> exec,
-          const BatchMultiVector<ValueType>* x,
-          BatchMultiVector<ValueType>* result)
-{
-    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
-    const auto result_ub = get_batch_struct(result);
-    const auto x_ub = get_batch_struct(x);
-    hipLaunchKernelGGL(copy_kernel, dim3(num_blocks), dim3(default_block_size),
-                       0, 0, x_ub, result_ub);
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_COPY_KERNEL);
+#include "common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc"
 
 
 }  // namespace batch_multi_vector

From 591f95f6cbe46c29bc248d39d04cd3c830850a07 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Fri, 21 Jul 2023 10:54:28 +0200
Subject: [PATCH 127/583] Force the correct include ordering.

---
 cuda/base/batch_multi_vector_kernels.cu     | 3 +++
 hip/base/batch_multi_vector_kernels.hip.cpp | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/cuda/base/batch_multi_vector_kernels.cu b/cuda/base/batch_multi_vector_kernels.cu
index e7c57111463..c1246df7374 100644
--- a/cuda/base/batch_multi_vector_kernels.cu
+++ b/cuda/base/batch_multi_vector_kernels.cu
@@ -63,7 +63,10 @@ constexpr auto default_block_size = 256;
 constexpr int sm_multiplier = 4;
 
 // NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES
+// force-top: on
 #include "common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc"
+// force-top: off
+
 
 #include "common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc"
 
diff --git a/hip/base/batch_multi_vector_kernels.hip.cpp b/hip/base/batch_multi_vector_kernels.hip.cpp
index a8f0f8a7cd6..f3acaf9ec36 100644
--- a/hip/base/batch_multi_vector_kernels.hip.cpp
+++ b/hip/base/batch_multi_vector_kernels.hip.cpp
@@ -67,7 +67,10 @@ constexpr int sm_multiplier = 4;
 
 
 // NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES
+// force-top: on
 #include "common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc"
+// force-top: off
+
 
 #include "common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc"
 

From 74037d98edb07e971d0f023527369cbd0294fb7e Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Fri, 21 Jul 2023 15:57:14 +0200
Subject: [PATCH 128/583] Review updates

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
Co-authored-by: Tobias Ribizel <ribizel@kit.edu>
---
 ...batch_multi_vector_kernel_launcher.hpp.inc |  26 +++--
 .../base/batch_multi_vector_kernels.hpp.inc   | 104 +++++++++---------
 core/base/batch_multi_vector.cpp              |  36 ++----
 core/base/batch_struct.hpp                    |  35 +++---
 cuda/base/batch_multi_vector_kernels.cu       |   5 +
 cuda/base/batch_struct.hpp                    |   6 +-
 dpcpp/base/batch_multi_vector_kernels.hpp.inc |  24 ++--
 dpcpp/base/batch_struct.hpp                   |   6 +-
 hip/base/batch_multi_vector_kernels.hip.cpp   |   5 +
 hip/base/batch_struct.hip.hpp                 |   6 +-
 include/ginkgo/core/base/batch_dim.hpp        |  44 ++++----
 .../ginkgo/core/base/batch_multi_vector.hpp   |  29 ++---
 .../base/batch_multi_vector_kernels.hpp.inc   |  24 ++--
 reference/base/batch_struct.hpp               |  18 +--
 14 files changed, 184 insertions(+), 184 deletions(-)

diff --git a/common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc b/common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc
index 24cd24d1bf7..43b0c6d8281 100644
--- a/common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc
+++ b/common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc
@@ -36,11 +36,16 @@ void scale(std::shared_ptr<const DefaultExecutor> exec,
            const BatchMultiVector<ValueType>* const alpha,
            BatchMultiVector<ValueType>* const x)
 {
-    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
+    const auto num_blocks = x->get_num_batch_entries();
     const auto alpha_ub = get_batch_struct(alpha);
     const auto x_ub = get_batch_struct(x);
-    scale_kernel<<<num_blocks, default_block_size, 0, exec->get_stream()>>>(
-        alpha_ub, x_ub);
+    if (alpha->get_common_size()[1] == 1) {
+        scale_kernel<<<num_blocks, default_block_size, 0, exec->get_stream()>>>(
+            alpha_ub, x_ub, [] __device__(int col) { return 0; });
+    } else {
+        scale_kernel<<<num_blocks, default_block_size, 0, exec->get_stream()>>>(
+            alpha_ub, x_ub, [] __device__(int col) { return col; });
+    }
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
@@ -53,13 +58,20 @@ void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
                 const BatchMultiVector<ValueType>* const x,
                 BatchMultiVector<ValueType>* const y)
 {
-    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
+    const auto num_blocks = x->get_num_batch_entries();
     const size_type nrhs = x->get_common_size()[1];
     const auto alpha_ub = get_batch_struct(alpha);
     const auto x_ub = get_batch_struct(x);
     const auto y_ub = get_batch_struct(y);
-    add_scaled_kernel<<<num_blocks, default_block_size, 0,
-                        exec->get_stream()>>>(alpha_ub, x_ub, y_ub);
+    if (alpha->get_common_size()[1] == 1) {
+        add_scaled_kernel<<<num_blocks, default_block_size, 0,
+                            exec->get_stream()>>>(
+            alpha_ub, x_ub, y_ub, [] __device__(int col) { return 0; });
+    } else {
+        add_scaled_kernel<<<num_blocks, default_block_size, 0,
+                            exec->get_stream()>>>(
+            alpha_ub, x_ub, y_ub, [] __device__(int col) { return col; });
+    }
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
@@ -107,7 +119,7 @@ void copy(std::shared_ptr<const DefaultExecutor> exec,
           const BatchMultiVector<ValueType>* x,
           BatchMultiVector<ValueType>* result)
 {
-    const auto num_blocks = exec->get_num_multiprocessor() * sm_multiplier;
+    const auto num_blocks = x->get_num_batch_entries();
     const auto result_ub = get_batch_struct(result);
     const auto x_ub = get_batch_struct(x);
     copy_kernel<<<num_blocks, default_block_size, 0, exec->get_stream()>>>(
diff --git a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
index fa6270a0b60..6d1161aeaa6 100644
--- a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
+++ b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
@@ -35,85 +35,75 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  * Scales the vectors in global or shared memory with a factor of alpha (alpha
  * is in global memory or shared memory)
  */
-template <typename ValueType>
+template <typename ValueType, typename Mapping>
 __device__ __forceinline__ void scale(
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& alpha,
-    const gko::batch_multi_vector::BatchEntry<ValueType>& x)
+    const gko::batch_multi_vector::batch_entry<const ValueType>& alpha,
+    const gko::batch_multi_vector::batch_entry<ValueType>& x, Mapping map)
 {
     const int max_li = x.num_rows * x.num_rhs;
     for (int li = threadIdx.x; li < max_li; li += blockDim.x) {
         const int row = li / x.num_rhs;
         const int col = li % x.num_rhs;
 
-        if (alpha.num_rhs == 1) {
-            x.values[row * x.stride + col] =
-                alpha.values[0] * x.values[row * x.stride + col];
-        } else {
-            x.values[row * x.stride + col] =
-                alpha.values[col] * x.values[row * x.stride + col];
-        }
+        x.values[row * x.stride + col] =
+            alpha.values[map(col)] * x.values[row * x.stride + col];
     }
 }
 
-template <typename ValueType>
+template <typename ValueType, typename Mapping>
 __global__
-__launch_bounds__(default_block_size, sm_multiplier) void scale_kernel(
-    const gko::batch_multi_vector::UniformBatch<const ValueType> alpha,
-    const gko::batch_multi_vector::UniformBatch<ValueType> x)
+    __launch_bounds__(default_block_size, sm_multiplier) void scale_kernel(
+        const gko::batch_multi_vector::uniform_batch<const ValueType> alpha,
+        const gko::batch_multi_vector::uniform_batch<ValueType> x, Mapping map)
 {
     for (size_type ibatch = blockIdx.x; ibatch < x.num_batch_entries;
          ibatch += gridDim.x) {
         const auto alpha_b = gko::batch::batch_entry(alpha, ibatch);
         const auto x_b = gko::batch::batch_entry(x, ibatch);
-        scale(alpha_b, x_b);
+        scale(alpha_b, x_b, map);
     }
 }
 
 
-template <typename ValueType>
+template <typename ValueType, typename Mapping>
 __device__ __forceinline__ void add_scaled(
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& alpha,
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
-    const gko::batch_multi_vector::BatchEntry<ValueType>& y)
+    const gko::batch_multi_vector::batch_entry<const ValueType>& alpha,
+    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
+    const gko::batch_multi_vector::batch_entry<ValueType>& y, Mapping map)
 {
     const int max_li = x.num_rows * x.num_rhs;
     for (int li = threadIdx.x; li < max_li; li += blockDim.x) {
         const int row = li / x.num_rhs;
         const int col = li % x.num_rhs;
 
-        if (alpha.num_rhs == 1) {
-            y.values[row * y.stride + col] +=
-                alpha.values[0] * x.values[row * x.stride + col];
-        } else {
-            y.values[row * y.stride + col] +=
-                alpha.values[col] * x.values[row * x.stride + col];
-        }
+        y.values[row * y.stride + col] +=
+            alpha.values[map(col)] * x.values[row * x.stride + col];
     }
 }
 
-template <typename ValueType>
+template <typename ValueType, typename Mapping>
 __global__
-__launch_bounds__(default_block_size, sm_multiplier) void add_scaled_kernel(
-    const gko::batch_multi_vector::UniformBatch<const ValueType> alpha,
-    const gko::batch_multi_vector::UniformBatch<const ValueType> x,
-    const gko::batch_multi_vector::UniformBatch<ValueType> y)
+    __launch_bounds__(default_block_size, sm_multiplier) void add_scaled_kernel(
+        const gko::batch_multi_vector::uniform_batch<const ValueType> alpha,
+        const gko::batch_multi_vector::uniform_batch<const ValueType> x,
+        const gko::batch_multi_vector::uniform_batch<ValueType> y, Mapping map)
 {
     for (size_type ibatch = blockIdx.x; ibatch < x.num_batch_entries;
          ibatch += gridDim.x) {
         const auto alpha_b = gko::batch::batch_entry(alpha, ibatch);
         const auto x_b = gko::batch::batch_entry(x, ibatch);
         const auto y_b = gko::batch::batch_entry(y, ibatch);
-        add_scaled(alpha_b, x_b, y_b);
+        add_scaled(alpha_b, x_b, y_b, map);
     }
 }
 
 
 template <typename ValueType>
 __device__ __forceinline__ void one_dot(
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& y,
+    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
+    const gko::batch_multi_vector::batch_entry<const ValueType>& y,
     const int rhs_index,
-    const gko::batch_multi_vector::BatchEntry<ValueType>& result,
+    const gko::batch_multi_vector::batch_entry<ValueType>& result,
     group::thread_block_tile<config::warp_size>& subwarp_grp)
 {
     ValueType val = zero<ValueType>();
@@ -143,9 +133,9 @@ __device__ __forceinline__ void one_dot(
  */
 template <typename ValueType>
 __device__ __forceinline__ void compute_dot_product(
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& y,
-    const gko::batch_multi_vector::BatchEntry<ValueType>& result)
+    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
+    const gko::batch_multi_vector::batch_entry<const ValueType>& y,
+    const gko::batch_multi_vector::batch_entry<ValueType>& result)
 {
     constexpr auto tile_size = config::warp_size;
     auto thread_block = group::this_thread_block();
@@ -165,17 +155,17 @@ __global__ __launch_bounds__(
     default_block_size,
     sm_multiplier) void compute_dot_product_kernel(const gko::
                                                        batch_multi_vector::
-                                                           UniformBatch<
+                                                           uniform_batch<
                                                                const ValueType>
                                                                x,
                                                    const gko::
                                                        batch_multi_vector::
-                                                           UniformBatch<
+                                                           uniform_batch<
                                                                const ValueType>
                                                                y,
                                                    const gko::
                                                        batch_multi_vector::
-                                                           UniformBatch<
+                                                           uniform_batch<
                                                                ValueType>
                                                                result)
 {
@@ -191,9 +181,9 @@ __global__ __launch_bounds__(
 
 template <typename ValueType>
 __device__ __forceinline__ void one_norm2(
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
+    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
     const int rhs_index,
-    const gko::batch_multi_vector::BatchEntry<remove_complex<ValueType>>&
+    const gko::batch_multi_vector::batch_entry<remove_complex<ValueType>>&
         result,
     group::thread_block_tile<config::warp_size>& subwarp_grp)
 {
@@ -225,8 +215,8 @@ __device__ __forceinline__ void one_norm2(
  */
 template <typename ValueType>
 __device__ __forceinline__ void compute_norm2(
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
-    const gko::batch_multi_vector::BatchEntry<remove_complex<ValueType>>&
+    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
+    const gko::batch_multi_vector::batch_entry<remove_complex<ValueType>>&
         result)
 {
     constexpr auto tile_size = config::warp_size;
@@ -243,11 +233,15 @@ __device__ __forceinline__ void compute_norm2(
 
 
 template <typename ValueType>
-__global__
-__launch_bounds__(default_block_size, sm_multiplier) void compute_norm2_kernel(
-    const gko::batch_multi_vector::UniformBatch<const ValueType> x,
-    const gko::batch_multi_vector::UniformBatch<remove_complex<ValueType>>
-        result)
+__global__ __launch_bounds__(
+    default_block_size,
+    sm_multiplier) void compute_norm2_kernel(const gko::batch_multi_vector::
+                                                 uniform_batch<const ValueType>
+                                                     x,
+                                             const gko::batch_multi_vector::
+                                                 uniform_batch<
+                                                     remove_complex<ValueType>>
+                                                     result)
 {
     for (size_type ibatch = blockIdx.x; ibatch < x.num_batch_entries;
          ibatch += gridDim.x) {
@@ -266,8 +260,8 @@ __launch_bounds__(default_block_size, sm_multiplier) void compute_norm2_kernel(
  */
 template <typename ValueType>
 __device__ __forceinline__ void copy(
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& in,
-    const gko::batch_multi_vector::BatchEntry<ValueType>& out)
+    const gko::batch_multi_vector::batch_entry<const ValueType>& in,
+    const gko::batch_multi_vector::batch_entry<ValueType>& out)
 {
     for (int iz = threadIdx.x; iz < in.num_rows * in.num_rhs;
          iz += blockDim.x) {
@@ -280,9 +274,9 @@ __device__ __forceinline__ void copy(
 
 template <typename ValueType>
 __global__
-__launch_bounds__(default_block_size, sm_multiplier) void copy_kernel(
-    const gko::batch_multi_vector::UniformBatch<const ValueType> src,
-    const gko::batch_multi_vector::UniformBatch<ValueType> dst)
+    __launch_bounds__(default_block_size, sm_multiplier) void copy_kernel(
+        const gko::batch_multi_vector::uniform_batch<const ValueType> src,
+        const gko::batch_multi_vector::uniform_batch<ValueType> dst)
 {
     for (size_type ibatch = blockIdx.x; ibatch < src.num_batch_entries;
          ibatch += gridDim.x) {
diff --git a/core/base/batch_multi_vector.cpp b/core/base/batch_multi_vector.cpp
index 0c2f1e0c1ba..d0d76ba5ec6 100644
--- a/core/base/batch_multi_vector.cpp
+++ b/core/base/batch_multi_vector.cpp
@@ -72,12 +72,10 @@ void BatchMultiVector<ValueType>::scale_impl(
     GKO_ASSERT_EQ(alpha->get_num_batch_entries(),
                   this->get_num_batch_entries());
     GKO_ASSERT_EQUAL_ROWS(alpha->get_common_size(), dim<2>(1, 1));
-    for (size_type b = 0; b < alpha->get_num_batch_entries(); ++b) {
-        if (alpha->get_common_size()[1] != 1) {
-            // different alpha for each column
-            GKO_ASSERT_EQUAL_COLS(this->get_common_size(),
-                                  alpha->get_common_size());
-        }
+    if (alpha->get_common_size()[1] != 1) {
+        // different alpha for each column
+        GKO_ASSERT_EQUAL_COLS(this->get_common_size(),
+                              alpha->get_common_size());
     }
     this->get_executor()->run(batch_multi_vector::make_scale(alpha, this));
 }
@@ -91,12 +89,10 @@ void BatchMultiVector<ValueType>::add_scaled_impl(
     GKO_ASSERT_EQ(alpha->get_num_batch_entries(),
                   this->get_num_batch_entries());
     GKO_ASSERT_EQUAL_ROWS(alpha->get_common_size(), dim<2>(1, 1));
-    for (size_type b = 0; b < alpha->get_num_batch_entries(); ++b) {
-        if (alpha->get_common_size()[1] != 1) {
-            // different alpha for each column
-            GKO_ASSERT_EQUAL_COLS(this->get_common_size(),
-                                  alpha->get_common_size());
-        }
+    if (alpha->get_common_size()[1] != 1) {
+        // different alpha for each column
+        GKO_ASSERT_EQUAL_COLS(this->get_common_size(),
+                              alpha->get_common_size());
     }
     GKO_ASSERT_EQ(b->get_num_batch_entries(), this->get_num_batch_entries());
     GKO_ASSERT_EQUAL_DIMENSIONS(this->get_common_size(), b->get_common_size());
@@ -162,11 +158,11 @@ void BatchMultiVector<ValueType>::move_to(
 
 
 template <typename MatrixType, typename MatrixData>
-inline void read_impl(MatrixType* mtx, const std::vector<MatrixData>& data)
+void read_impl(MatrixType* mtx, const std::vector<MatrixData>& data)
 {
+    GKO_ASSERT(data.size() > 0);
     auto common_size = data[0].size;
     auto batch_size = batch_dim<2>(data.size(), common_size);
-    size_type ind = 0;
     for (const auto& b : data) {
         auto b_size = b.size;
         GKO_ASSERT_EQUAL_DIMENSIONS(common_size, b_size);
@@ -208,17 +204,9 @@ void BatchMultiVector<ValueType>::read(const std::vector<mat_data32>& data)
 
 
 template <typename MatrixType, typename MatrixData>
-inline void write_impl(const MatrixType* mtx, std::vector<MatrixData>& data)
+void write_impl(const MatrixType* mtx, std::vector<MatrixData>& data)
 {
-    std::unique_ptr<const BatchMultiVector<typename MatrixData::value_type>>
-        op{};
-    const MatrixType* tmp{};
-    if (mtx->get_executor()->get_master() != mtx->get_executor()) {
-        op = mtx->clone(mtx->get_executor()->get_master());
-        tmp = static_cast<const MatrixType*>(op.get());
-    } else {
-        tmp = mtx;
-    }
+    auto tmp = make_temporary_clone(mtx->get_executor()->get_master(), mtx);
 
     data = std::vector<MatrixData>(mtx->get_num_batch_entries());
     for (size_type b = 0; b < mtx->get_num_batch_entries(); ++b) {
diff --git a/core/base/batch_struct.hpp b/core/base/batch_struct.hpp
index 05ac4f0d105..d85c413e691 100644
--- a/core/base/batch_struct.hpp
+++ b/core/base/batch_struct.hpp
@@ -47,7 +47,7 @@ namespace batch_multi_vector {
  * Encapsulates one matrix from a batch of dense matrices (vectors).
  */
 template <typename ValueType>
-struct BatchEntry {
+struct batch_entry {
     using value_type = ValueType;
     ValueType* values;
     size_type stride;
@@ -61,9 +61,9 @@ struct BatchEntry {
  * It is uniform in the sense that all matrices in the batch have common sizes.
  */
 template <typename ValueType>
-struct UniformBatch {
+struct uniform_batch {
     using value_type = ValueType;
-    using entry_type = BatchEntry<ValueType>;
+    using entry_type = batch_entry<ValueType>;
 
     ValueType* values;
     size_type num_batch_entries;
@@ -85,16 +85,17 @@ namespace batch {
 
 
 template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE gko::batch_multi_vector::BatchEntry<const ValueType>
-to_const(const gko::batch_multi_vector::BatchEntry<ValueType>& b)
+GKO_ATTRIBUTES GKO_INLINE gko::batch_multi_vector::batch_entry<const ValueType>
+to_const(const gko::batch_multi_vector::batch_entry<ValueType>& b)
 {
     return {b.values, b.stride, b.num_rows, b.num_rhs};
 }
 
 
 template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE gko::batch_multi_vector::UniformBatch<const ValueType>
-to_const(const gko::batch_multi_vector::UniformBatch<ValueType>& ub)
+GKO_ATTRIBUTES GKO_INLINE
+    gko::batch_multi_vector::uniform_batch<const ValueType>
+    to_const(const gko::batch_multi_vector::uniform_batch<ValueType>& ub)
 {
     return {ub.values, ub.num_batch_entries, ub.stride, ub.num_rows,
             ub.num_rhs};
@@ -111,31 +112,23 @@ to_const(const gko::batch_multi_vector::UniformBatch<ValueType>& ub)
  * @param batch_idx  The position of the desired object in the batch
  */
 template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE batch_multi_vector::BatchEntry<ValueType> batch_entry(
-    const batch_multi_vector::UniformBatch<ValueType>& batch,
-    const size_type batch_idx)
+GKO_ATTRIBUTES GKO_INLINE batch_multi_vector::batch_entry<ValueType>
+batch_entry(const batch_multi_vector::uniform_batch<ValueType>& batch,
+            const size_type batch_idx)
 {
     return {batch.values + batch_idx * batch.stride * batch.num_rows,
             batch.stride, batch.num_rows, batch.num_rhs};
 }
 
 template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE batch_multi_vector::BatchEntry<ValueType> batch_entry(
-    ValueType* const batch_values, const size_type stride, const int num_rows,
-    const int num_rhs, const size_type batch_idx)
+GKO_ATTRIBUTES GKO_INLINE batch_multi_vector::batch_entry<ValueType>
+batch_entry(ValueType* const batch_values, const size_type stride,
+            const int num_rows, const int num_rhs, const size_type batch_idx)
 {
     return {batch_values + batch_idx * stride * num_rows, stride, num_rows,
             num_rhs};
 }
 
-template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE ValueType* batch_entry_ptr(
-    ValueType* const batch_start, const size_type stride, const int num_rows,
-    const size_type batch_idx)
-{
-    return batch_start + batch_idx * stride * num_rows;
-}
-
 
 }  // namespace batch
 }  // namespace gko
diff --git a/cuda/base/batch_multi_vector_kernels.cu b/cuda/base/batch_multi_vector_kernels.cu
index c1246df7374..05e08be0adb 100644
--- a/cuda/base/batch_multi_vector_kernels.cu
+++ b/cuda/base/batch_multi_vector_kernels.cu
@@ -33,6 +33,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/base/batch_multi_vector_kernels.hpp"
 
 
+#include <thrust/functional.h>
+#include <thrust/transform.h>
+
+
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/base/range_accessors.hpp>
 
@@ -42,6 +46,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "cuda/base/config.hpp"
 #include "cuda/base/cublas_bindings.hpp"
 #include "cuda/base/pointer_mode_guard.hpp"
+#include "cuda/base/thrust.cuh"
 #include "cuda/components/cooperative_groups.cuh"
 #include "cuda/components/reduction.cuh"
 #include "cuda/components/thread_ids.cuh"
diff --git a/cuda/base/batch_struct.hpp b/cuda/base/batch_struct.hpp
index 9084cddfdfa..4358d688f07 100644
--- a/cuda/base/batch_struct.hpp
+++ b/cuda/base/batch_struct.hpp
@@ -62,7 +62,7 @@ namespace cuda {
  * Generates an immutable uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
-inline gko::batch_multi_vector::UniformBatch<const cuda_type<ValueType>>
+inline gko::batch_multi_vector::uniform_batch<const cuda_type<ValueType>>
 get_batch_struct(const BatchMultiVector<ValueType>* const op)
 {
     return {as_cuda_type(op->get_const_values()), op->get_num_batch_entries(),
@@ -75,7 +75,7 @@ get_batch_struct(const BatchMultiVector<ValueType>* const op)
  * Generates a uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
-inline gko::batch_multi_vector::UniformBatch<cuda_type<ValueType>>
+inline gko::batch_multi_vector::uniform_batch<cuda_type<ValueType>>
 get_batch_struct(BatchMultiVector<ValueType>* const op)
 {
     return {as_cuda_type(op->get_values()), op->get_num_batch_entries(),
@@ -90,7 +90,7 @@ get_batch_struct(BatchMultiVector<ValueType>* const op)
  * that may be null.
  */
 template <typename ValueType>
-inline gko::batch_multi_vector::UniformBatch<const cuda_type<ValueType>>
+inline gko::batch_multi_vector::uniform_batch<const cuda_type<ValueType>>
 maybe_null_batch_struct(const BatchMultiVector<ValueType>* const op)
 {
     if (op) {
diff --git a/dpcpp/base/batch_multi_vector_kernels.hpp.inc b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
index d881586e362..75f70cc2781 100644
--- a/dpcpp/base/batch_multi_vector_kernels.hpp.inc
+++ b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
@@ -32,8 +32,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 template <typename ValueType>
 __dpct_inline__ void scale_kernel(
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& alpha,
-    const gko::batch_multi_vector::BatchEntry<ValueType>& x,
+    const gko::batch_multi_vector::batch_entry<const ValueType>& alpha,
+    const gko::batch_multi_vector::batch_entry<ValueType>& x,
     sycl::nd_item<3>& item_ct1)
 {
     const int max_li = x.num_rows * x.num_rhs;
@@ -55,9 +55,9 @@ __dpct_inline__ void scale_kernel(
 
 template <typename ValueType>
 __dpct_inline__ void add_scaled_kernel(
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& alpha,
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
-    const gko::batch_multi_vector::BatchEntry<ValueType>& y,
+    const gko::batch_multi_vector::batch_entry<const ValueType>& alpha,
+    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
+    const gko::batch_multi_vector::batch_entry<ValueType>& y,
     sycl::nd_item<3>& item_ct1)
 {
     const int max_li = x.num_rows * x.num_rhs;
@@ -79,9 +79,9 @@ __dpct_inline__ void add_scaled_kernel(
 
 template <typename ValueType>
 __dpct_inline__ void compute_dot_product_kernel(
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& y,
-    const gko::batch_multi_vector::BatchEntry<ValueType>& result,
+    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
+    const gko::batch_multi_vector::batch_entry<const ValueType>& y,
+    const gko::batch_multi_vector::batch_entry<ValueType>& result,
     sycl::nd_item<3>& item_ct1)
 {
     const auto sg = item_ct1.get_sub_group();
@@ -108,8 +108,8 @@ __dpct_inline__ void compute_dot_product_kernel(
 
 template <typename ValueType>
 __dpct_inline__ void compute_norm2_kernel(
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
-    const gko::batch_multi_vector::BatchEntry<remove_complex<ValueType>>&
+    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
+    const gko::batch_multi_vector::batch_entry<remove_complex<ValueType>>&
         result,
     sycl::nd_item<3>& item_ct1)
 {
@@ -134,8 +134,8 @@ __dpct_inline__ void compute_norm2_kernel(
 
 template <typename ValueType>
 __dpct_inline__ void copy_kernel(
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& in,
-    const gko::batch_multi_vector::BatchEntry<ValueType>& out,
+    const gko::batch_multi_vector::batch_entry<const ValueType>& in,
+    const gko::batch_multi_vector::batch_entry<ValueType>& out,
     sycl::nd_item<3>& item_ct1)
 {
     for (int iz = item_ct1.get_local_linear_id(); iz < in.num_rows * in.num_rhs;
diff --git a/dpcpp/base/batch_struct.hpp b/dpcpp/base/batch_struct.hpp
index 16f0b528dda..5b88e992665 100644
--- a/dpcpp/base/batch_struct.hpp
+++ b/dpcpp/base/batch_struct.hpp
@@ -61,7 +61,7 @@ namespace dpcpp {
  * Generates an immutable uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
-inline gko::batch_multi_vector::UniformBatch<const ValueType> get_batch_struct(
+inline gko::batch_multi_vector::uniform_batch<const ValueType> get_batch_struct(
     const BatchMultiVector<ValueType>* const op)
 {
     return {op->get_const_values(), op->get_num_batch_entries(),
@@ -75,7 +75,7 @@ inline gko::batch_multi_vector::UniformBatch<const ValueType> get_batch_struct(
  * Generates a uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
-inline gko::batch_multi_vector::UniformBatch<ValueType> get_batch_struct(
+inline gko::batch_multi_vector::uniform_batch<ValueType> get_batch_struct(
     BatchMultiVector<ValueType>* const op)
 {
     return {op->get_values(), op->get_num_batch_entries(),
@@ -90,7 +90,7 @@ inline gko::batch_multi_vector::UniformBatch<ValueType> get_batch_struct(
  * that may be null.
  */
 template <typename ValueType>
-inline gko::batch_multi_vector::UniformBatch<const ValueType>
+inline gko::batch_multi_vector::uniform_batch<const ValueType>
 maybe_null_batch_struct(const BatchMultiVector<ValueType>* const op)
 {
     if (op) {
diff --git a/hip/base/batch_multi_vector_kernels.hip.cpp b/hip/base/batch_multi_vector_kernels.hip.cpp
index f3acaf9ec36..c1e7469ef9e 100644
--- a/hip/base/batch_multi_vector_kernels.hip.cpp
+++ b/hip/base/batch_multi_vector_kernels.hip.cpp
@@ -36,6 +36,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <hip/hip_runtime.h>
 
 
+#include <thrust/functional.h>
+#include <thrust/transform.h>
+
+
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/base/range_accessors.hpp>
 
@@ -45,6 +49,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "hip/base/config.hip.hpp"
 #include "hip/base/hipblas_bindings.hip.hpp"
 #include "hip/base/pointer_mode_guard.hip.hpp"
+#include "hip/base/thrust.hip.hpp"
 #include "hip/components/cooperative_groups.hip.hpp"
 #include "hip/components/reduction.hip.hpp"
 #include "hip/components/thread_ids.hip.hpp"
diff --git a/hip/base/batch_struct.hip.hpp b/hip/base/batch_struct.hip.hpp
index f76e4fa8a79..f8788b9e6a8 100644
--- a/hip/base/batch_struct.hip.hpp
+++ b/hip/base/batch_struct.hip.hpp
@@ -62,7 +62,7 @@ namespace hip {
  * Generates an immutable uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
-inline gko::batch_multi_vector::UniformBatch<const hip_type<ValueType>>
+inline gko::batch_multi_vector::uniform_batch<const hip_type<ValueType>>
 get_batch_struct(const BatchMultiVector<ValueType>* const op)
 {
     return {as_hip_type(op->get_const_values()), op->get_num_batch_entries(),
@@ -75,7 +75,7 @@ get_batch_struct(const BatchMultiVector<ValueType>* const op)
  * Generates a uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
-inline gko::batch_multi_vector::UniformBatch<hip_type<ValueType>>
+inline gko::batch_multi_vector::uniform_batch<hip_type<ValueType>>
 get_batch_struct(BatchMultiVector<ValueType>* const op)
 {
     return {as_hip_type(op->get_values()), op->get_num_batch_entries(),
@@ -90,7 +90,7 @@ get_batch_struct(BatchMultiVector<ValueType>* const op)
  * that may be null.
  */
 template <typename ValueType>
-inline gko::batch_multi_vector::UniformBatch<const hip_type<ValueType>>
+inline gko::batch_multi_vector::uniform_batch<const hip_type<ValueType>>
 maybe_null_batch_struct(const BatchMultiVector<ValueType>* const op)
 {
     if (op) {
diff --git a/include/ginkgo/core/base/batch_dim.hpp b/include/ginkgo/core/base/batch_dim.hpp
index bc17648be52..37ce5993220 100644
--- a/include/ginkgo/core/base/batch_dim.hpp
+++ b/include/ginkgo/core/base/batch_dim.hpp
@@ -77,13 +77,13 @@ struct batch_dim {
     /**
      * Get the cumulative storage size offset
      *
-     * @param b the batch id
+     * @param batch_id the batch id
      *
      * @return the cumulative offset
      */
-    size_type get_cumulative_offset(size_type b) const
+    size_type get_cumulative_offset(size_type batch_id) const
     {
-        return b * common_size_[0] * common_size_[1];
+        return batch_id * common_size_[0] * common_size_[1];
     }
 
     /**
@@ -100,6 +100,25 @@ struct batch_dim {
                x.common_size_ == y.common_size_;
     }
 
+
+    /**
+     * Checks if two batch dim objects are different.
+     *
+     * @tparam Dimensionality  number of dimensions of the dim objects
+     * @tparam DimensionType  datatype used to represent each dimension
+     *
+     * @param x  first object
+     * @param y  second object
+     *
+     * @return `!(x == y)`
+     */
+    friend bool operator!=(const batch_dim<Dimensionality, DimensionType>& x,
+                           const batch_dim<Dimensionality, DimensionType>& y)
+    {
+        return !(x == y);
+    }
+
+
     /**
      * Creates a batch_dim object which stores a uniform size for all batch
      * entries.
@@ -121,25 +140,6 @@ struct batch_dim {
 };
 
 
-/**
- * Checks if two batch dim objects are different.
- *
- * @tparam Dimensionality  number of dimensions of the dim objects
- * @tparam DimensionType  datatype used to represent each dimension
- *
- * @param x  first object
- * @param y  second object
- *
- * @return `!(x == y)`
- */
-template <size_type Dimensionality, typename DimensionType>
-inline bool operator!=(const batch_dim<Dimensionality, DimensionType>& x,
-                       const batch_dim<Dimensionality, DimensionType>& y)
-{
-    return !(x == y);
-}
-
-
 /**
  * Returns a batch_dim object with its dimensions swapped for batched operators
  *
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index 47dbe6078f5..4ce88acc621 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -522,7 +522,7 @@ class BatchMultiVector
      * @returns a BatchMultiVector matrix with the same configuration as the
      * caller.
      */
-    virtual std::unique_ptr<BatchMultiVector> create_with_same_config() const
+    std::unique_ptr<BatchMultiVector> create_with_same_config() const
     {
         return BatchMultiVector::create(this->get_executor(), this->get_size());
     }
@@ -533,7 +533,7 @@ class BatchMultiVector
      * @note  Other implementations of batch_multi_vector should override this
      * function instead of scale(const BatchMultiVector *alpha).
      */
-    virtual void scale_impl(const BatchMultiVector<ValueType>* alpha);
+    void scale_impl(const BatchMultiVector<ValueType>* alpha);
 
     /**
      * @copydoc add_scaled(const BatchMultiVector *, const BatchMultiVector *)
@@ -542,8 +542,8 @@ class BatchMultiVector
      * function instead of add_scale(const BatchMultiVector *alpha, const
      * BatchMultiVector *b).
      */
-    virtual void add_scaled_impl(const BatchMultiVector<ValueType>* alpha,
-                                 const BatchMultiVector<ValueType>* b);
+    void add_scaled_impl(const BatchMultiVector<ValueType>* alpha,
+                         const BatchMultiVector<ValueType>* b);
 
     /**
      * @copydoc compute_dot(const BatchMultiVector *, BatchMultiVector *) const
@@ -552,8 +552,8 @@ class BatchMultiVector
      * function instead of compute_dot(const BatchMultiVector *b,
      * BatchMultiVector *result).
      */
-    virtual void compute_dot_impl(const BatchMultiVector<ValueType>* b,
-                                  BatchMultiVector<ValueType>* result) const;
+    void compute_dot_impl(const BatchMultiVector<ValueType>* b,
+                          BatchMultiVector<ValueType>* result) const;
 
     /**
      * @copydoc compute_norm2(BatchMultiVector *) const
@@ -561,7 +561,7 @@ class BatchMultiVector
      * @note  Other implementations of batch_multi_vector should override this
      * function instead of compute_norm2(BatchMultiVector *result).
      */
-    virtual void compute_norm2_impl(
+    void compute_norm2_impl(
         BatchMultiVector<remove_complex<ValueType>>* result) const;
 
     size_type linearize_index(size_type batch, size_type row,
@@ -611,12 +611,12 @@ std::unique_ptr<Matrix> batch_initialize(
 {
     using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
     size_type num_batch_entries = vals.size();
+    GKO_ASSERT(num_batch_entries > 0);
     auto vals_begin = begin(vals);
     size_type common_num_rows = vals_begin->size();
     auto common_size = dim<2>(common_num_rows, 1);
-    for (size_type b = 0; b < num_batch_entries; ++b) {
-        GKO_ASSERT_EQ(common_num_rows, vals_begin->size());
-        vals_begin++;
+    for (auto& val : vals) {
+        GKO_ASSERT_EQ(common_num_rows, val.size());
     }
     auto b_size = batch_dim<2>(num_batch_entries, common_size);
     auto tmp = batch_multi_vector::create(exec->get_master(), b_size);
@@ -664,6 +664,7 @@ std::unique_ptr<Matrix> batch_initialize(
 {
     using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
     size_type num_batch_entries = vals.size();
+    GKO_ASSERT(num_batch_entries > 0);
     auto vals_begin = begin(vals);
     size_type common_num_rows = vals_begin->size();
     size_type common_num_cols = vals_begin->begin()->size();
@@ -728,6 +729,7 @@ std::unique_ptr<Matrix> batch_initialize(
 {
     using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
     size_type num_batch_entries = num_vectors;
+    GKO_ASSERT(num_batch_entries > 0);
     auto b_size = batch_dim<2>(num_batch_entries, dim<2>(vals.size(), 1));
     auto tmp = batch_multi_vector::create(exec->get_master(), b_size);
     for (size_type batch = 0; batch < num_vectors; batch++) {
@@ -768,16 +770,17 @@ std::unique_ptr<Matrix> batch_initialize(
  */
 template <typename Matrix, typename... TArgs>
 std::unique_ptr<Matrix> batch_initialize(
-    const size_type num_matrices,
+    const size_type num_batch_entries,
     std::initializer_list<std::initializer_list<typename Matrix::value_type>>
         vals,
     std::shared_ptr<const Executor> exec, TArgs&&... create_args)
 {
     using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
+    GKO_ASSERT(num_batch_entries > 0);
     auto common_size = dim<2>(vals.size(), begin(vals)->size());
-    batch_dim<2> b_size(num_matrices, common_size);
+    batch_dim<2> b_size(num_batch_entries, common_size);
     auto tmp = batch_multi_vector::create(exec->get_master(), b_size);
-    for (size_type batch = 0; batch < num_matrices; batch++) {
+    for (size_type batch = 0; batch < num_batch_entries; batch++) {
         size_type ridx = 0;
         for (const auto& row : vals) {
             size_type cidx = 0;
diff --git a/reference/base/batch_multi_vector_kernels.hpp.inc b/reference/base/batch_multi_vector_kernels.hpp.inc
index a80415572c2..599013179ce 100644
--- a/reference/base/batch_multi_vector_kernels.hpp.inc
+++ b/reference/base/batch_multi_vector_kernels.hpp.inc
@@ -32,8 +32,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 template <typename ValueType>
 inline void scale_kernel(
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& alpha,
-    const gko::batch_multi_vector::BatchEntry<ValueType>& x)
+    const gko::batch_multi_vector::batch_entry<const ValueType>& alpha,
+    const gko::batch_multi_vector::batch_entry<ValueType>& x)
 {
     if (alpha.num_rhs == 1) {
         for (int i = 0; i < x.num_rows; ++i) {
@@ -53,9 +53,9 @@ inline void scale_kernel(
 
 template <typename ValueType>
 inline void add_scaled_kernel(
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& alpha,
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
-    const gko::batch_multi_vector::BatchEntry<ValueType>& y)
+    const gko::batch_multi_vector::batch_entry<const ValueType>& alpha,
+    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
+    const gko::batch_multi_vector::batch_entry<ValueType>& y)
 {
     if (alpha.num_rhs == 1) {
         for (int i = 0; i < x.num_rows; ++i) {
@@ -77,9 +77,9 @@ inline void add_scaled_kernel(
 
 template <typename ValueType>
 inline void compute_dot_product_kernel(
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& y,
-    const gko::batch_multi_vector::BatchEntry<ValueType>& result)
+    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
+    const gko::batch_multi_vector::batch_entry<const ValueType>& y,
+    const gko::batch_multi_vector::batch_entry<ValueType>& result)
 {
     for (int c = 0; c < result.num_rhs; c++) {
         result.values[c] = gko::zero<ValueType>();
@@ -96,8 +96,8 @@ inline void compute_dot_product_kernel(
 
 template <typename ValueType>
 inline void compute_norm2_kernel(
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& x,
-    const gko::batch_multi_vector::BatchEntry<gko::remove_complex<ValueType>>&
+    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
+    const gko::batch_multi_vector::batch_entry<gko::remove_complex<ValueType>>&
         result)
 {
     for (int j = 0; j < x.num_rhs; ++j) {
@@ -122,8 +122,8 @@ inline void compute_norm2_kernel(
  */
 template <typename ValueType>
 inline void copy_kernel(
-    const gko::batch_multi_vector::BatchEntry<const ValueType>& in,
-    const gko::batch_multi_vector::BatchEntry<ValueType>& out)
+    const gko::batch_multi_vector::batch_entry<const ValueType>& in,
+    const gko::batch_multi_vector::batch_entry<ValueType>& out)
 {
     for (int iz = 0; iz < in.num_rows * in.num_rhs; iz++) {
         const int i = iz / in.num_rhs;
diff --git a/reference/base/batch_struct.hpp b/reference/base/batch_struct.hpp
index fec5b4f8803..cec3a4ed813 100644
--- a/reference/base/batch_struct.hpp
+++ b/reference/base/batch_struct.hpp
@@ -30,15 +30,15 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#ifndef GKO_REFERENCE_BASE_BATCH_STRUCT_HPP_
-#define GKO_REFERENCE_BASE_BATCH_STRUCT_HPP_
+#ifndef GKO_REFERENCE_MATRIX_BATCH_STRUCT_HPP_
+#define GKO_REFERENCE_MATRIX_BATCH_STRUCT_HPP_
 
 
-#include <ginkgo/core/base/batch_multi_vector.hpp>
-#include <ginkgo/core/base/math.hpp>
+#include "core/base/batch_struct.hpp"
 
 
-#include "core/base/batch_struct.hpp"
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/math.hpp>
 
 
 namespace gko {
@@ -63,7 +63,7 @@ namespace host {
  * Generates an immutable uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
-inline gko::batch_multi_vector::UniformBatch<const ValueType> get_batch_struct(
+inline gko::batch_multi_vector::uniform_batch<const ValueType> get_batch_struct(
     const BatchMultiVector<ValueType>* const op)
 {
     return {op->get_const_values(), op->get_num_batch_entries(),
@@ -77,7 +77,7 @@ inline gko::batch_multi_vector::UniformBatch<const ValueType> get_batch_struct(
  * Generates a uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
-inline gko::batch_multi_vector::UniformBatch<ValueType> get_batch_struct(
+inline gko::batch_multi_vector::uniform_batch<ValueType> get_batch_struct(
     BatchMultiVector<ValueType>* const op)
 {
     return {op->get_values(), op->get_num_batch_entries(),
@@ -92,7 +92,7 @@ inline gko::batch_multi_vector::UniformBatch<ValueType> get_batch_struct(
  * that may be null.
  */
 template <typename ValueType>
-inline gko::batch_multi_vector::UniformBatch<const ValueType>
+inline gko::batch_multi_vector::uniform_batch<const ValueType>
 maybe_null_batch_struct(const BatchMultiVector<ValueType>* const op)
 {
     if (op) {
@@ -111,4 +111,4 @@ maybe_null_batch_struct(const BatchMultiVector<ValueType>* const op)
 }  // namespace gko
 
 
-#endif  // GKO_REFERENCE_BASE_BATCH_STRUCT_HPP_
+#endif  // GKO_REFERENCE_MATRIX_BATCH_STRUCT_HPP_

From d55865ca822002c05900307796b538b1f95820e9 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sat, 22 Jul 2023 23:58:37 +0200
Subject: [PATCH 129/583] Add compute_conj_dot and kernels

---
 ...batch_multi_vector_kernel_launcher.hpp.inc | 19 +++++
 .../base/batch_multi_vector_kernels.hpp.inc   | 81 +++++++++++++++++--
 core/base/batch_multi_vector.cpp              | 18 +++++
 core/base/batch_multi_vector_kernels.hpp      | 28 ++++---
 core/device_hooks/common_kernels.inc.cpp      |  1 +
 dpcpp/base/batch_multi_vector_kernels.dp.cpp  | 35 ++++++++
 dpcpp/base/batch_multi_vector_kernels.hpp.inc | 29 +++++++
 .../ginkgo/core/base/batch_multi_vector.hpp   | 42 ++++++----
 omp/base/batch_multi_vector_kernels.cpp       | 23 ++++++
 reference/base/batch_multi_vector_kernels.cpp | 22 +++++
 .../base/batch_multi_vector_kernels.hpp.inc   | 19 +++++
 .../test/base/batch_multi_vector_kernels.cpp  | 50 +++++++++++-
 test/base/batch_multi_vector_kernels.cpp      | 30 +++++++
 13 files changed, 364 insertions(+), 33 deletions(-)

diff --git a/common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc b/common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc
index 43b0c6d8281..b797850059b 100644
--- a/common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc
+++ b/common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc
@@ -97,6 +97,25 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
     GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_DOT_KERNEL);
 
 
+template <typename ValueType>
+void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
+                      const BatchMultiVector<ValueType>* x,
+                      const BatchMultiVector<ValueType>* y,
+                      BatchMultiVector<ValueType>* result)
+{
+    const auto num_blocks = x->get_num_batch_entries();
+    const auto num_rhs = x->get_common_size()[1];
+    const auto x_ub = get_batch_struct(x);
+    const auto y_ub = get_batch_struct(y);
+    const auto res_ub = get_batch_struct(result);
+    compute_conj_dot_product_kernel<<<num_blocks, default_block_size, 0,
+                                      exec->get_stream()>>>(x_ub, y_ub, res_ub);
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_CONJ_DOT_KERNEL);
+
+
 template <typename ValueType>
 void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
                    const BatchMultiVector<ValueType>* const x,
diff --git a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
index 6d1161aeaa6..28ea60c7df4 100644
--- a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
+++ b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
@@ -108,6 +108,34 @@ __device__ __forceinline__ void one_dot(
 {
     ValueType val = zero<ValueType>();
 
+    for (int r = subwarp_grp.thread_rank(); r < x.num_rows;
+         r += subwarp_grp.size()) {
+        val += x.values[r * x.stride + rhs_index] *
+               y.values[r * y.stride + rhs_index];
+    }
+
+    // subwarp_grp level reduction
+#pragma unroll
+    for (int j = config::warp_size / 2; j > 0; j /= 2) {
+        val += subwarp_grp.shfl_down(val, j);
+    }
+
+    if (subwarp_grp.thread_rank() == 0) {
+        result.values[rhs_index] = val;
+    }
+}
+
+
+template <typename ValueType>
+__device__ __forceinline__ void one_conj_dot(
+    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
+    const gko::batch_multi_vector::batch_entry<const ValueType>& y,
+    const int rhs_index,
+    const gko::batch_multi_vector::batch_entry<ValueType>& result,
+    group::thread_block_tile<config::warp_size>& subwarp_grp)
+{
+    ValueType val = zero<ValueType>();
+
     for (int r = subwarp_grp.thread_rank(); r < x.num_rows;
          r += subwarp_grp.size()) {
         val += conj(x.values[r * x.stride + rhs_index]) *
@@ -126,11 +154,6 @@ __device__ __forceinline__ void one_dot(
 }
 
 
-/**
- * Computes the dot product of some column vectors in global or shared memory.
- *
- * @param result  Holds dot product value for vector in x and y.
- */
 template <typename ValueType>
 __device__ __forceinline__ void compute_dot_product(
     const gko::batch_multi_vector::batch_entry<const ValueType>& x,
@@ -150,6 +173,25 @@ __device__ __forceinline__ void compute_dot_product(
 }
 
 
+template <typename ValueType>
+__device__ __forceinline__ void compute_conj_dot_product(
+    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
+    const gko::batch_multi_vector::batch_entry<const ValueType>& y,
+    const gko::batch_multi_vector::batch_entry<ValueType>& result)
+{
+    constexpr auto tile_size = config::warp_size;
+    auto thread_block = group::this_thread_block();
+    auto subwarp_grp = group::tiled_partition<tile_size>(thread_block);
+    const auto subwarp_grp_id = static_cast<int>(threadIdx.x / tile_size);
+    const int num_subwarp_grps_per_block = ceildiv(blockDim.x, tile_size);
+
+    for (int rhs_index = subwarp_grp_id; rhs_index < x.num_rhs;
+         rhs_index += num_subwarp_grps_per_block) {
+        one_conj_dot(x, y, rhs_index, result, subwarp_grp);
+    }
+}
+
+
 template <typename ValueType>
 __global__ __launch_bounds__(
     default_block_size,
@@ -179,6 +221,35 @@ __global__ __launch_bounds__(
 }
 
 
+template <typename ValueType>
+__global__ __launch_bounds__(
+    default_block_size,
+    sm_multiplier) void compute_conj_dot_product_kernel(const gko::
+                                                            batch_multi_vector::
+                                                                uniform_batch<
+                                                                    const ValueType>
+                                                                    x,
+                                                        const gko::
+                                                            batch_multi_vector::
+                                                                uniform_batch<
+                                                                    const ValueType>
+                                                                    y,
+                                                        const gko::
+                                                            batch_multi_vector::
+                                                                uniform_batch<
+                                                                    ValueType>
+                                                                    result)
+{
+    for (size_type ibatch = blockIdx.x; ibatch < x.num_batch_entries;
+         ibatch += gridDim.x) {
+        const auto x_b = gko::batch::batch_entry(x, ibatch);
+        const auto y_b = gko::batch::batch_entry(y, ibatch);
+        const auto r_b = gko::batch::batch_entry(result, ibatch);
+        compute_conj_dot_product(x_b, y_b, r_b);
+    }
+}
+
+
 template <typename ValueType>
 __device__ __forceinline__ void one_norm2(
     const gko::batch_multi_vector::batch_entry<const ValueType>& x,
diff --git a/core/base/batch_multi_vector.cpp b/core/base/batch_multi_vector.cpp
index d0d76ba5ec6..3578d0678de 100644
--- a/core/base/batch_multi_vector.cpp
+++ b/core/base/batch_multi_vector.cpp
@@ -57,6 +57,7 @@ namespace {
 GKO_REGISTER_OPERATION(scale, batch_multi_vector::scale);
 GKO_REGISTER_OPERATION(add_scaled, batch_multi_vector::add_scaled);
 GKO_REGISTER_OPERATION(compute_dot, batch_multi_vector::compute_dot);
+GKO_REGISTER_OPERATION(compute_conj_dot, batch_multi_vector::compute_conj_dot);
 GKO_REGISTER_OPERATION(compute_norm2, batch_multi_vector::compute_norm2);
 GKO_REGISTER_OPERATION(copy, batch_multi_vector::copy);
 
@@ -109,6 +110,23 @@ inline const batch_dim<2> get_col_sizes(const batch_dim<2>& sizes)
 }
 
 
+template <typename ValueType>
+void BatchMultiVector<ValueType>::compute_conj_dot_impl(
+    const BatchMultiVector<ValueType>* b,
+    BatchMultiVector<ValueType>* result) const
+{
+    GKO_ASSERT_EQ(b->get_num_batch_entries(), this->get_num_batch_entries());
+    GKO_ASSERT_EQUAL_DIMENSIONS(this->get_common_size(), b->get_common_size());
+    GKO_ASSERT_EQ(this->get_num_batch_entries(),
+                  result->get_num_batch_entries());
+    GKO_ASSERT_EQUAL_DIMENSIONS(
+        result->get_common_size(),
+        get_col_sizes(this->get_size()).get_common_size());
+    this->get_executor()->run(
+        batch_multi_vector::make_compute_conj_dot(this, b, result));
+}
+
+
 template <typename ValueType>
 void BatchMultiVector<ValueType>::compute_dot_impl(
     const BatchMultiVector<ValueType>* b,
diff --git a/core/base/batch_multi_vector_kernels.hpp b/core/base/batch_multi_vector_kernels.hpp
index 28c7b87de10..6eba9eac829 100644
--- a/core/base/batch_multi_vector_kernels.hpp
+++ b/core/base/batch_multi_vector_kernels.hpp
@@ -66,6 +66,12 @@ namespace kernels {
                      const BatchMultiVector<_type>* y,            \
                      BatchMultiVector<_type>* result)
 
+#define GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_CONJ_DOT_KERNEL(_type)  \
+    void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec, \
+                          const BatchMultiVector<_type>* x,            \
+                          const BatchMultiVector<_type>* y,            \
+                          BatchMultiVector<_type>* result)
+
 #define GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_NORM2_KERNEL(_type)  \
     void compute_norm2(std::shared_ptr<const DefaultExecutor> exec, \
                        const BatchMultiVector<_type>* x,            \
@@ -77,16 +83,18 @@ namespace kernels {
               BatchMultiVector<_type>* result)
 
 
-#define GKO_DECLARE_ALL_AS_TEMPLATES                                \
-    template <typename ValueType>                                   \
-    GKO_DECLARE_BATCH_MULTI_VECTOR_SCALE_KERNEL(ValueType);         \
-    template <typename ValueType>                                   \
-    GKO_DECLARE_BATCH_MULTI_VECTOR_ADD_SCALED_KERNEL(ValueType);    \
-    template <typename ValueType>                                   \
-    GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_DOT_KERNEL(ValueType);   \
-    template <typename ValueType>                                   \
-    GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_NORM2_KERNEL(ValueType); \
-    template <typename ValueType>                                   \
+#define GKO_DECLARE_ALL_AS_TEMPLATES                                   \
+    template <typename ValueType>                                      \
+    GKO_DECLARE_BATCH_MULTI_VECTOR_SCALE_KERNEL(ValueType);            \
+    template <typename ValueType>                                      \
+    GKO_DECLARE_BATCH_MULTI_VECTOR_ADD_SCALED_KERNEL(ValueType);       \
+    template <typename ValueType>                                      \
+    GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_DOT_KERNEL(ValueType);      \
+    template <typename ValueType>                                      \
+    GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_CONJ_DOT_KERNEL(ValueType); \
+    template <typename ValueType>                                      \
+    GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_NORM2_KERNEL(ValueType);    \
+    template <typename ValueType>                                      \
     GKO_DECLARE_BATCH_MULTI_VECTOR_COPY_KERNEL(ValueType)
 
 
diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp
index 9ab79160394..0f898b3ae73 100644
--- a/core/device_hooks/common_kernels.inc.cpp
+++ b/core/device_hooks/common_kernels.inc.cpp
@@ -279,6 +279,7 @@ namespace batch_multi_vector {
 GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_SCALE_KERNEL);
 GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_ADD_SCALED_KERNEL);
 GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_DOT_KERNEL);
+GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_CONJ_DOT_KERNEL);
 GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_NORM2_KERNEL);
 GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_COPY_KERNEL);
 
diff --git a/dpcpp/base/batch_multi_vector_kernels.dp.cpp b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
index 74c3b842297..97f7469a6f6 100644
--- a/dpcpp/base/batch_multi_vector_kernels.dp.cpp
+++ b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
@@ -168,6 +168,41 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
     GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_DOT_KERNEL);
 
 
+template <typename ValueType>
+void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
+                      const BatchMultiVector<ValueType>* const x,
+                      const BatchMultiVector<ValueType>* const y,
+                      BatchMultiVector<ValueType>* const result)
+{
+    const auto x_ub = get_batch_struct(x);
+    const auto y_ub = get_batch_struct(y);
+    const auto res_ub = get_batch_struct(result);
+
+    const auto num_batches = x_ub.num_batch_entries;
+    auto device = exec->get_queue()->get_device();
+    auto group_size =
+        device.get_info<sycl::info::device::max_work_group_size>();
+
+    const dim3 block(group_size);
+    const dim3 grid(num_batches);
+
+    (exec->get_queue())->submit([&](sycl::handler& cgh) {
+        cgh.parallel_for(
+            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+                auto group = item_ct1.get_group();
+                auto group_id = group.get_group_linear_id();
+                const auto x_b = batch::batch_entry(x_ub, group_id);
+                const auto y_b = batch::batch_entry(y_ub, group_id);
+                const auto res_b = batch::batch_entry(res_ub, group_id);
+                compute_conj_dot_product_kernel(x_b, y_b, res_b, item_ct1);
+            });
+    });
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_CONJ_DOT_KERNEL);
+
+
 template <typename ValueType>
 void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
                    const BatchMultiVector<ValueType>* const x,
diff --git a/dpcpp/base/batch_multi_vector_kernels.hpp.inc b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
index 75f70cc2781..cb2ccd4ae50 100644
--- a/dpcpp/base/batch_multi_vector_kernels.hpp.inc
+++ b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
@@ -89,6 +89,35 @@ __dpct_inline__ void compute_dot_product_kernel(
     const int sg_size = sg.get_local_range().size();
     const int num_sg = sg.get_group_range().size();
 
+    for (int rhs_index = sg_id; rhs_index < x.num_rhs; rhs_index += num_sg) {
+        ValueType val = zero<ValueType>();
+
+        for (int r = sg.get_local_id(); r < x.num_rows; r += sg_size) {
+            val += x.values[r * x.stride + rhs_index] *
+                   y.values[r * y.stride + rhs_index];
+        }
+
+        val = sycl::reduce_over_group(sg, val, sycl::plus<>());
+
+        if (sg.get_local_id() == 0) {
+            result.values[rhs_index] = val;
+        }
+    }
+}
+
+
+template <typename ValueType>
+__dpct_inline__ void compute_conj_dot_product_kernel(
+    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
+    const gko::batch_multi_vector::batch_entry<const ValueType>& y,
+    const gko::batch_multi_vector::batch_entry<ValueType>& result,
+    sycl::nd_item<3>& item_ct1)
+{
+    const auto sg = item_ct1.get_sub_group();
+    const int sg_id = sg.get_group_id();
+    const int sg_size = sg.get_local_range().size();
+    const int num_sg = sg.get_group_range().size();
+
     for (int rhs_index = sg_id; rhs_index < x.num_rhs; rhs_index += num_sg) {
         ValueType val = zero<ValueType>();
 
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index 4ce88acc621..34ca15db8c0 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -334,8 +334,7 @@ class BatchMultiVector
 
     /**
      * Computes the column-wise dot product of each matrix in this batch and its
-     * corresponding entry in `b`. If the vector has complex value_type, then
-     * the conjugate of this is taken.
+     * corresponding entry in `b`.
      *
      * @param b  a BatchMultiVector matrix of same dimension as this
      * @param result  a BatchMultiVector row vector, used to store the dot
@@ -350,6 +349,24 @@ class BatchMultiVector
                                make_temporary_clone(exec, result).get());
     }
 
+    /**
+     * Computes the column-wise conjugate dot product of each matrix in this
+     * batch and its corresponding entry in `b`. If the vector has complex
+     * value_type, then the conjugate of this is taken.
+     *
+     * @param b  a BatchMultiVector matrix of same dimension as this
+     * @param result  a BatchMultiVector row vector, used to store the dot
+     * product (the number of column in the vector must match the number of
+     * columns of this)
+     */
+    void compute_conj_dot(ptr_param<const BatchMultiVector<ValueType>> b,
+                          ptr_param<BatchMultiVector<ValueType>> result) const
+    {
+        auto exec = this->get_executor();
+        this->compute_conj_dot_impl(make_temporary_clone(exec, b).get(),
+                                    make_temporary_clone(exec, result).get());
+    }
+
     /**
      * Computes the Euclidean (L^2) norm of each matrix in this batch.
      *
@@ -529,37 +546,30 @@ class BatchMultiVector
 
     /**
      * @copydoc scale(const BatchMultiVector *)
-     *
-     * @note  Other implementations of batch_multi_vector should override this
-     * function instead of scale(const BatchMultiVector *alpha).
      */
     void scale_impl(const BatchMultiVector<ValueType>* alpha);
 
     /**
      * @copydoc add_scaled(const BatchMultiVector *, const BatchMultiVector *)
-     *
-     * @note  Other implementations of batch_multi_vector should override this
-     * function instead of add_scale(const BatchMultiVector *alpha, const
-     * BatchMultiVector *b).
      */
     void add_scaled_impl(const BatchMultiVector<ValueType>* alpha,
                          const BatchMultiVector<ValueType>* b);
 
     /**
      * @copydoc compute_dot(const BatchMultiVector *, BatchMultiVector *) const
-     *
-     * @note  Other implementations of batch_multi_vector should override this
-     * function instead of compute_dot(const BatchMultiVector *b,
-     * BatchMultiVector *result).
      */
     void compute_dot_impl(const BatchMultiVector<ValueType>* b,
                           BatchMultiVector<ValueType>* result) const;
 
+    /**
+     * @copydoc compute_conj_dot(const BatchMultiVector *, BatchMultiVector *)
+     * const
+     */
+    void compute_conj_dot_impl(const BatchMultiVector<ValueType>* b,
+                               BatchMultiVector<ValueType>* result) const;
+
     /**
      * @copydoc compute_norm2(BatchMultiVector *) const
-     *
-     * @note  Other implementations of batch_multi_vector should override this
-     * function instead of compute_norm2(BatchMultiVector *result).
      */
     void compute_norm2_impl(
         BatchMultiVector<remove_complex<ValueType>>* result) const;
diff --git a/omp/base/batch_multi_vector_kernels.cpp b/omp/base/batch_multi_vector_kernels.cpp
index f46cbb12ead..a88443f60b9 100644
--- a/omp/base/batch_multi_vector_kernels.cpp
+++ b/omp/base/batch_multi_vector_kernels.cpp
@@ -123,6 +123,29 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
     GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_DOT_KERNEL);
 
 
+template <typename ValueType>
+void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
+                      const BatchMultiVector<ValueType>* const x,
+                      const BatchMultiVector<ValueType>* const y,
+                      BatchMultiVector<ValueType>* const result)
+{
+    const auto x_ub = host::get_batch_struct(x);
+    const auto y_ub = host::get_batch_struct(y);
+    const auto res_ub = host::get_batch_struct(result);
+#pragma omp parallel for
+    for (size_type batch = 0; batch < result->get_num_batch_entries();
+         ++batch) {
+        const auto res_b = gko::batch::batch_entry(res_ub, batch);
+        const auto x_b = gko::batch::batch_entry(x_ub, batch);
+        const auto y_b = gko::batch::batch_entry(y_ub, batch);
+        compute_conj_dot_product_kernel(x_b, y_b, res_b);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_CONJ_DOT_KERNEL);
+
+
 template <typename ValueType>
 void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
                    const BatchMultiVector<ValueType>* const x,
diff --git a/reference/base/batch_multi_vector_kernels.cpp b/reference/base/batch_multi_vector_kernels.cpp
index f494a326773..967dddb108a 100644
--- a/reference/base/batch_multi_vector_kernels.cpp
+++ b/reference/base/batch_multi_vector_kernels.cpp
@@ -120,6 +120,28 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
     GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_DOT_KERNEL);
 
 
+template <typename ValueType>
+void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
+                      const BatchMultiVector<ValueType>* x,
+                      const BatchMultiVector<ValueType>* y,
+                      BatchMultiVector<ValueType>* result)
+{
+    const auto x_ub = host::get_batch_struct(x);
+    const auto y_ub = host::get_batch_struct(y);
+    const auto res_ub = host::get_batch_struct(result);
+    for (size_type batch = 0; batch < result->get_num_batch_entries();
+         ++batch) {
+        const auto res_b = gko::batch::batch_entry(res_ub, batch);
+        const auto x_b = gko::batch::batch_entry(x_ub, batch);
+        const auto y_b = gko::batch::batch_entry(y_ub, batch);
+        compute_conj_dot_product_kernel(x_b, y_b, res_b);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_CONJ_DOT_KERNEL);
+
+
 template <typename ValueType>
 void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
                    const BatchMultiVector<ValueType>* x,
diff --git a/reference/base/batch_multi_vector_kernels.hpp.inc b/reference/base/batch_multi_vector_kernels.hpp.inc
index 599013179ce..6e3b195e175 100644
--- a/reference/base/batch_multi_vector_kernels.hpp.inc
+++ b/reference/base/batch_multi_vector_kernels.hpp.inc
@@ -85,6 +85,25 @@ inline void compute_dot_product_kernel(
         result.values[c] = gko::zero<ValueType>();
     }
 
+    for (int r = 0; r < x.num_rows; r++) {
+        for (int c = 0; c < x.num_rhs; c++) {
+            result.values[c] +=
+                x.values[r * x.stride + c] * y.values[r * y.stride + c];
+        }
+    }
+}
+
+
+template <typename ValueType>
+inline void compute_conj_dot_product_kernel(
+    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
+    const gko::batch_multi_vector::batch_entry<const ValueType>& y,
+    const gko::batch_multi_vector::batch_entry<ValueType>& result)
+{
+    for (int c = 0; c < result.num_rhs; c++) {
+        result.values[c] = gko::zero<ValueType>();
+    }
+
     for (int r = 0; r < x.num_rows; r++) {
         for (int c = 0; c < x.num_rhs; c++) {
             result.values[c] +=
diff --git a/reference/test/base/batch_multi_vector_kernels.cpp b/reference/test/base/batch_multi_vector_kernels.cpp
index c7ba4a0bcf2..445cdedb73f 100644
--- a/reference/test/base/batch_multi_vector_kernels.cpp
+++ b/reference/test/base/batch_multi_vector_kernels.cpp
@@ -254,7 +254,7 @@ TYPED_TEST(BatchMultiVector, ComputesDot)
 }
 
 
-TYPED_TEST(BatchMultiVector, ComputDotFailsOnWrongInputSize)
+TYPED_TEST(BatchMultiVector, ComputeDotFailsOnWrongInputSize)
 {
     using Mtx = typename TestFixture::Mtx;
     auto result =
@@ -265,7 +265,7 @@ TYPED_TEST(BatchMultiVector, ComputDotFailsOnWrongInputSize)
 }
 
 
-TYPED_TEST(BatchMultiVector, ComputDotFailsOnWrongResultSize)
+TYPED_TEST(BatchMultiVector, ComputeDotFailsOnWrongResultSize)
 {
     using Mtx = typename TestFixture::Mtx;
     auto result =
@@ -280,6 +280,52 @@ TYPED_TEST(BatchMultiVector, ComputDotFailsOnWrongResultSize)
 }
 
 
+TYPED_TEST(BatchMultiVector, ComputesConjDot)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    auto result =
+        Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 3}));
+
+    auto ures = result->unbatch();
+
+    this->mtx_0->compute_conj_dot(this->mtx_1.get(), result.get());
+    this->mtx_00->compute_conj_dot(this->mtx_10.get(), ures[0].get());
+    this->mtx_01->compute_conj_dot(this->mtx_11.get(), ures[1].get());
+
+    auto res = result->unbatch();
+    GKO_ASSERT_MTX_NEAR(res[0].get(), ures[0].get(), 0.);
+    GKO_ASSERT_MTX_NEAR(res[1].get(), ures[1].get(), 0.);
+}
+
+
+TYPED_TEST(BatchMultiVector, ComputeConjDotFailsOnWrongInputSize)
+{
+    using Mtx = typename TestFixture::Mtx;
+    auto result =
+        Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 3}));
+
+    ASSERT_THROW(this->mtx_1->compute_conj_dot(this->mtx_2.get(), result.get()),
+                 gko::DimensionMismatch);
+}
+
+
+TYPED_TEST(BatchMultiVector, ComputeConjDotFailsOnWrongResultSize)
+{
+    using Mtx = typename TestFixture::Mtx;
+    auto result =
+        Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 2}));
+    auto result2 =
+        Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 2}));
+
+    ASSERT_THROW(this->mtx_0->compute_conj_dot(this->mtx_1.get(), result.get()),
+                 gko::DimensionMismatch);
+    ASSERT_THROW(
+        this->mtx_0->compute_conj_dot(this->mtx_1.get(), result2.get()),
+        gko::DimensionMismatch);
+}
+
+
 TYPED_TEST(BatchMultiVector, ComputesNorm2)
 {
     using Mtx = typename TestFixture::Mtx;
diff --git a/test/base/batch_multi_vector_kernels.cpp b/test/base/batch_multi_vector_kernels.cpp
index 05ea67bee1d..631b9a10c24 100644
--- a/test/base/batch_multi_vector_kernels.cpp
+++ b/test/base/batch_multi_vector_kernels.cpp
@@ -269,6 +269,36 @@ TEST_F(BatchMultiVector, ComputeDotSingleIsEquivalentToRef)
 }
 
 
+TEST_F(BatchMultiVector, ComputeConjDotIsEquivalentToRef)
+{
+    set_up_vector_data(20);
+    auto dot_size =
+        gko::batch_dim<2>(batch_size, gko::dim<2>{1, x->get_common_size()[1]});
+    auto dot_expected = Mtx::create(this->ref, dot_size);
+    auto ddot = Mtx::create(this->exec, dot_size);
+
+    x->compute_conj_dot(y.get(), dot_expected.get());
+    dx->compute_conj_dot(dy.get(), ddot.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(dot_expected, ddot, 5 * r<value_type>::value);
+}
+
+
+TEST_F(BatchMultiVector, ComputeConjDotSingleIsEquivalentToRef)
+{
+    set_up_vector_data(1);
+    auto dot_size =
+        gko::batch_dim<2>(batch_size, gko::dim<2>{1, x->get_common_size()[1]});
+    auto dot_expected = Mtx::create(this->ref, dot_size);
+    auto ddot = Mtx::create(this->exec, dot_size);
+
+    x->compute_conj_dot(y.get(), dot_expected.get());
+    dx->compute_conj_dot(dy.get(), ddot.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(dot_expected, ddot, 5 * r<value_type>::value);
+}
+
+
 TEST_F(BatchMultiVector, CopySingleIsEquivalentToRef)
 {
     set_up_vector_data(1);

From 4aa2d8b19839fc97772e6004d478ebd1d79be928 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Mon, 24 Jul 2023 09:35:57 +0200
Subject: [PATCH 130/583] Generalize CUDA/HIP kernels and use reduce prim

---
 ...batch_multi_vector_kernel_launcher.hpp.inc |  10 +-
 .../base/batch_multi_vector_kernels.hpp.inc   | 220 ++++++------------
 2 files changed, 75 insertions(+), 155 deletions(-)

diff --git a/common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc b/common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc
index b797850059b..60af1de45af 100644
--- a/common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc
+++ b/common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc
@@ -89,8 +89,9 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
     const auto x_ub = get_batch_struct(x);
     const auto y_ub = get_batch_struct(y);
     const auto res_ub = get_batch_struct(result);
-    compute_dot_product_kernel<<<num_blocks, default_block_size, 0,
-                                 exec->get_stream()>>>(x_ub, y_ub, res_ub);
+    compute_gen_dot_product_kernel<<<num_blocks, default_block_size, 0,
+                                     exec->get_stream()>>>(
+        x_ub, y_ub, res_ub, [] __device__(auto val) { return val; });
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
@@ -108,8 +109,9 @@ void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
     const auto x_ub = get_batch_struct(x);
     const auto y_ub = get_batch_struct(y);
     const auto res_ub = get_batch_struct(result);
-    compute_conj_dot_product_kernel<<<num_blocks, default_block_size, 0,
-                                      exec->get_stream()>>>(x_ub, y_ub, res_ub);
+    compute_gen_dot_product_kernel<<<num_blocks, default_block_size, 0,
+                                     exec->get_stream()>>>(
+        x_ub, y_ub, res_ub, [] __device__(auto val) { return conj(val); });
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
diff --git a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
index 28ea60c7df4..18c4f48811b 100644
--- a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
+++ b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
@@ -56,10 +56,10 @@ __global__
         const gko::batch_multi_vector::uniform_batch<const ValueType> alpha,
         const gko::batch_multi_vector::uniform_batch<ValueType> x, Mapping map)
 {
-    for (size_type ibatch = blockIdx.x; ibatch < x.num_batch_entries;
-         ibatch += gridDim.x) {
-        const auto alpha_b = gko::batch::batch_entry(alpha, ibatch);
-        const auto x_b = gko::batch::batch_entry(x, ibatch);
+    for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_entries;
+         batch_id += gridDim.x) {
+        const auto alpha_b = gko::batch::batch_entry(alpha, batch_id);
+        const auto x_b = gko::batch::batch_entry(x, batch_id);
         scale(alpha_b, x_b, map);
     }
 }
@@ -88,191 +88,109 @@ __global__
         const gko::batch_multi_vector::uniform_batch<const ValueType> x,
         const gko::batch_multi_vector::uniform_batch<ValueType> y, Mapping map)
 {
-    for (size_type ibatch = blockIdx.x; ibatch < x.num_batch_entries;
-         ibatch += gridDim.x) {
-        const auto alpha_b = gko::batch::batch_entry(alpha, ibatch);
-        const auto x_b = gko::batch::batch_entry(x, ibatch);
-        const auto y_b = gko::batch::batch_entry(y, ibatch);
+    for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_entries;
+         batch_id += gridDim.x) {
+        const auto alpha_b = gko::batch::batch_entry(alpha, batch_id);
+        const auto x_b = gko::batch::batch_entry(x, batch_id);
+        const auto y_b = gko::batch::batch_entry(y, batch_id);
         add_scaled(alpha_b, x_b, y_b, map);
     }
 }
 
 
-template <typename ValueType>
-__device__ __forceinline__ void one_dot(
+template <typename Group, typename ValueType, typename Mapping>
+__device__ __forceinline__ void gen_one_dot(
     const gko::batch_multi_vector::batch_entry<const ValueType>& x,
     const gko::batch_multi_vector::batch_entry<const ValueType>& y,
     const int rhs_index,
     const gko::batch_multi_vector::batch_entry<ValueType>& result,
-    group::thread_block_tile<config::warp_size>& subwarp_grp)
+    Group subgroup, Mapping conj_map)
 {
     ValueType val = zero<ValueType>();
 
-    for (int r = subwarp_grp.thread_rank(); r < x.num_rows;
-         r += subwarp_grp.size()) {
-        val += x.values[r * x.stride + rhs_index] *
+    for (int r = subgroup.thread_rank(); r < x.num_rows; r += subgroup.size()) {
+        val += conj_map(x.values[r * x.stride + rhs_index]) *
                y.values[r * y.stride + rhs_index];
     }
 
-    // subwarp_grp level reduction
-#pragma unroll
-    for (int j = config::warp_size / 2; j > 0; j /= 2) {
-        val += subwarp_grp.shfl_down(val, j);
-    }
+    // subgroup level reduction
+    val = reduce(subgroup, val, thrust::plus<ValueType>{});
 
-    if (subwarp_grp.thread_rank() == 0) {
+    if (subgroup.thread_rank() == 0) {
         result.values[rhs_index] = val;
     }
 }
 
 
-template <typename ValueType>
-__device__ __forceinline__ void one_conj_dot(
+template <typename ValueType, typename Mapping>
+__device__ __forceinline__ void compute_gen_dot_product(
     const gko::batch_multi_vector::batch_entry<const ValueType>& x,
     const gko::batch_multi_vector::batch_entry<const ValueType>& y,
-    const int rhs_index,
     const gko::batch_multi_vector::batch_entry<ValueType>& result,
-    group::thread_block_tile<config::warp_size>& subwarp_grp)
-{
-    ValueType val = zero<ValueType>();
-
-    for (int r = subwarp_grp.thread_rank(); r < x.num_rows;
-         r += subwarp_grp.size()) {
-        val += conj(x.values[r * x.stride + rhs_index]) *
-               y.values[r * y.stride + rhs_index];
-    }
-
-    // subwarp_grp level reduction
-#pragma unroll
-    for (int j = config::warp_size / 2; j > 0; j /= 2) {
-        val += subwarp_grp.shfl_down(val, j);
-    }
-
-    if (subwarp_grp.thread_rank() == 0) {
-        result.values[rhs_index] = val;
-    }
-}
-
-
-template <typename ValueType>
-__device__ __forceinline__ void compute_dot_product(
-    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
-    const gko::batch_multi_vector::batch_entry<const ValueType>& y,
-    const gko::batch_multi_vector::batch_entry<ValueType>& result)
-{
-    constexpr auto tile_size = config::warp_size;
-    auto thread_block = group::this_thread_block();
-    auto subwarp_grp = group::tiled_partition<tile_size>(thread_block);
-    const auto subwarp_grp_id = static_cast<int>(threadIdx.x / tile_size);
-    const int num_subwarp_grps_per_block = ceildiv(blockDim.x, tile_size);
-
-    for (int rhs_index = subwarp_grp_id; rhs_index < x.num_rhs;
-         rhs_index += num_subwarp_grps_per_block) {
-        one_dot(x, y, rhs_index, result, subwarp_grp);
-    }
-}
-
-
-template <typename ValueType>
-__device__ __forceinline__ void compute_conj_dot_product(
-    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
-    const gko::batch_multi_vector::batch_entry<const ValueType>& y,
-    const gko::batch_multi_vector::batch_entry<ValueType>& result)
+    Mapping conj_map)
 {
     constexpr auto tile_size = config::warp_size;
     auto thread_block = group::this_thread_block();
-    auto subwarp_grp = group::tiled_partition<tile_size>(thread_block);
-    const auto subwarp_grp_id = static_cast<int>(threadIdx.x / tile_size);
-    const int num_subwarp_grps_per_block = ceildiv(blockDim.x, tile_size);
-
-    for (int rhs_index = subwarp_grp_id; rhs_index < x.num_rhs;
-         rhs_index += num_subwarp_grps_per_block) {
-        one_conj_dot(x, y, rhs_index, result, subwarp_grp);
-    }
-}
-
+    auto subgroup = group::tiled_partition<tile_size>(thread_block);
+    const auto subgroup_id = static_cast<int>(threadIdx.x / tile_size);
+    const int num_subgroups_per_block = ceildiv(blockDim.x, tile_size);
 
-template <typename ValueType>
-__global__ __launch_bounds__(
-    default_block_size,
-    sm_multiplier) void compute_dot_product_kernel(const gko::
-                                                       batch_multi_vector::
-                                                           uniform_batch<
-                                                               const ValueType>
-                                                               x,
-                                                   const gko::
-                                                       batch_multi_vector::
-                                                           uniform_batch<
-                                                               const ValueType>
-                                                               y,
-                                                   const gko::
-                                                       batch_multi_vector::
-                                                           uniform_batch<
-                                                               ValueType>
-                                                               result)
-{
-    for (size_type ibatch = blockIdx.x; ibatch < x.num_batch_entries;
-         ibatch += gridDim.x) {
-        const auto x_b = gko::batch::batch_entry(x, ibatch);
-        const auto y_b = gko::batch::batch_entry(y, ibatch);
-        const auto r_b = gko::batch::batch_entry(result, ibatch);
-        compute_dot_product(x_b, y_b, r_b);
+    for (int rhs_index = subgroup_id; rhs_index < x.num_rhs;
+         rhs_index += num_subgroups_per_block) {
+        gen_one_dot(x, y, rhs_index, result, subgroup, conj_map);
     }
 }
 
 
-template <typename ValueType>
+template <typename ValueType, typename Mapping>
 __global__ __launch_bounds__(
     default_block_size,
-    sm_multiplier) void compute_conj_dot_product_kernel(const gko::
-                                                            batch_multi_vector::
-                                                                uniform_batch<
-                                                                    const ValueType>
-                                                                    x,
-                                                        const gko::
-                                                            batch_multi_vector::
-                                                                uniform_batch<
-                                                                    const ValueType>
-                                                                    y,
-                                                        const gko::
-                                                            batch_multi_vector::
-                                                                uniform_batch<
-                                                                    ValueType>
-                                                                    result)
+    sm_multiplier) void compute_gen_dot_product_kernel(const gko::
+                                                           batch_multi_vector::
+                                                               uniform_batch<
+                                                                   const ValueType>
+                                                                   x,
+                                                       const gko::
+                                                           batch_multi_vector::
+                                                               uniform_batch<
+                                                                   const ValueType>
+                                                                   y,
+                                                       const gko::
+                                                           batch_multi_vector::
+                                                               uniform_batch<
+                                                                   ValueType>
+                                                                   result,
+                                                       Mapping map)
 {
-    for (size_type ibatch = blockIdx.x; ibatch < x.num_batch_entries;
-         ibatch += gridDim.x) {
-        const auto x_b = gko::batch::batch_entry(x, ibatch);
-        const auto y_b = gko::batch::batch_entry(y, ibatch);
-        const auto r_b = gko::batch::batch_entry(result, ibatch);
-        compute_conj_dot_product(x_b, y_b, r_b);
+    for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_entries;
+         batch_id += gridDim.x) {
+        const auto x_b = gko::batch::batch_entry(x, batch_id);
+        const auto y_b = gko::batch::batch_entry(y, batch_id);
+        const auto r_b = gko::batch::batch_entry(result, batch_id);
+        compute_gen_dot_product(x_b, y_b, r_b, map);
     }
 }
 
 
-template <typename ValueType>
+template <typename Group, typename ValueType>
 __device__ __forceinline__ void one_norm2(
     const gko::batch_multi_vector::batch_entry<const ValueType>& x,
     const int rhs_index,
     const gko::batch_multi_vector::batch_entry<remove_complex<ValueType>>&
         result,
-    group::thread_block_tile<config::warp_size>& subwarp_grp)
+    Group subgroup)
 {
     using real_type = typename gko::remove_complex<ValueType>;
     real_type val = zero<real_type>();
 
-    for (int r = subwarp_grp.thread_rank(); r < x.num_rows;
-         r += subwarp_grp.size()) {
+    for (int r = subgroup.thread_rank(); r < x.num_rows; r += subgroup.size()) {
         val += squared_norm(x.values[r * x.stride + rhs_index]);
     }
 
-    // subwarp_grp level reduction
-#pragma unroll
-    for (int j = config::warp_size / 2; j > 0; j /= 2) {
-        val += subwarp_grp.shfl_down(val, j);
-    }
+    // subgroup level reduction
+    val = reduce(subgroup, val, thrust::plus<remove_complex<ValueType>>{});
 
-    if (subwarp_grp.thread_rank() == 0) {
+    if (subgroup.thread_rank() == 0) {
         result.values[rhs_index] = sqrt(val);
     }
 }
@@ -292,13 +210,13 @@ __device__ __forceinline__ void compute_norm2(
 {
     constexpr auto tile_size = config::warp_size;
     auto thread_block = group::this_thread_block();
-    auto subwarp_grp = group::tiled_partition<tile_size>(thread_block);
-    const auto subwarp_grp_id = static_cast<int>(threadIdx.x / tile_size);
-    const int num_subwarp_grps_per_block = ceildiv(blockDim.x, tile_size);
+    auto subgroup = group::tiled_partition<tile_size>(thread_block);
+    const auto subgroup_id = static_cast<int>(threadIdx.x / tile_size);
+    const int num_subgroups_per_block = ceildiv(blockDim.x, tile_size);
 
-    for (int rhs_index = subwarp_grp_id; rhs_index < x.num_rhs;
-         rhs_index += num_subwarp_grps_per_block) {
-        one_norm2(x, rhs_index, result, subwarp_grp);
+    for (int rhs_index = subgroup_id; rhs_index < x.num_rhs;
+         rhs_index += num_subgroups_per_block) {
+        one_norm2(x, rhs_index, result, subgroup);
     }
 }
 
@@ -314,10 +232,10 @@ __global__ __launch_bounds__(
                                                      remove_complex<ValueType>>
                                                      result)
 {
-    for (size_type ibatch = blockIdx.x; ibatch < x.num_batch_entries;
-         ibatch += gridDim.x) {
-        const auto x_b = gko::batch::batch_entry(x, ibatch);
-        const auto r_b = gko::batch::batch_entry(result, ibatch);
+    for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_entries;
+         batch_id += gridDim.x) {
+        const auto x_b = gko::batch::batch_entry(x, batch_id);
+        const auto r_b = gko::batch::batch_entry(result, batch_id);
         compute_norm2(x_b, r_b);
     }
 }
@@ -349,10 +267,10 @@ __global__
         const gko::batch_multi_vector::uniform_batch<const ValueType> src,
         const gko::batch_multi_vector::uniform_batch<ValueType> dst)
 {
-    for (size_type ibatch = blockIdx.x; ibatch < src.num_batch_entries;
-         ibatch += gridDim.x) {
-        const auto dst_b = gko::batch::batch_entry(dst, ibatch);
-        const auto src_b = gko::batch::batch_entry(src, ibatch);
+    for (size_type batch_id = blockIdx.x; batch_id < src.num_batch_entries;
+         batch_id += gridDim.x) {
+        const auto dst_b = gko::batch::batch_entry(dst, batch_id);
+        const auto src_b = gko::batch::batch_entry(src, batch_id);
         copy(src_b, dst_b);
     }
 }

From c72ffadb8164c72b19376a8f9cced1fd813bbdb3 Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Mon, 24 Jul 2023 08:42:37 +0000
Subject: [PATCH 131/583] Format files

Co-authored-by: Pratik Nayak <pratikvn@pm.me>
---
 .../base/batch_multi_vector_kernels.hpp.inc   | 34 ++++++++-----------
 hip/base/batch_multi_vector_kernels.hip.cpp   |  2 --
 reference/base/batch_struct.hpp               | 12 +++----
 3 files changed, 21 insertions(+), 27 deletions(-)

diff --git a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
index 18c4f48811b..efbbd323ef6 100644
--- a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
+++ b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
@@ -52,9 +52,9 @@ __device__ __forceinline__ void scale(
 
 template <typename ValueType, typename Mapping>
 __global__
-    __launch_bounds__(default_block_size, sm_multiplier) void scale_kernel(
-        const gko::batch_multi_vector::uniform_batch<const ValueType> alpha,
-        const gko::batch_multi_vector::uniform_batch<ValueType> x, Mapping map)
+__launch_bounds__(default_block_size, sm_multiplier) void scale_kernel(
+    const gko::batch_multi_vector::uniform_batch<const ValueType> alpha,
+    const gko::batch_multi_vector::uniform_batch<ValueType> x, Mapping map)
 {
     for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_entries;
          batch_id += gridDim.x) {
@@ -83,10 +83,10 @@ __device__ __forceinline__ void add_scaled(
 
 template <typename ValueType, typename Mapping>
 __global__
-    __launch_bounds__(default_block_size, sm_multiplier) void add_scaled_kernel(
-        const gko::batch_multi_vector::uniform_batch<const ValueType> alpha,
-        const gko::batch_multi_vector::uniform_batch<const ValueType> x,
-        const gko::batch_multi_vector::uniform_batch<ValueType> y, Mapping map)
+__launch_bounds__(default_block_size, sm_multiplier) void add_scaled_kernel(
+    const gko::batch_multi_vector::uniform_batch<const ValueType> alpha,
+    const gko::batch_multi_vector::uniform_batch<const ValueType> x,
+    const gko::batch_multi_vector::uniform_batch<ValueType> y, Mapping map)
 {
     for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_entries;
          batch_id += gridDim.x) {
@@ -222,15 +222,11 @@ __device__ __forceinline__ void compute_norm2(
 
 
 template <typename ValueType>
-__global__ __launch_bounds__(
-    default_block_size,
-    sm_multiplier) void compute_norm2_kernel(const gko::batch_multi_vector::
-                                                 uniform_batch<const ValueType>
-                                                     x,
-                                             const gko::batch_multi_vector::
-                                                 uniform_batch<
-                                                     remove_complex<ValueType>>
-                                                     result)
+__global__
+__launch_bounds__(default_block_size, sm_multiplier) void compute_norm2_kernel(
+    const gko::batch_multi_vector::uniform_batch<const ValueType> x,
+    const gko::batch_multi_vector::uniform_batch<remove_complex<ValueType>>
+        result)
 {
     for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_entries;
          batch_id += gridDim.x) {
@@ -263,9 +259,9 @@ __device__ __forceinline__ void copy(
 
 template <typename ValueType>
 __global__
-    __launch_bounds__(default_block_size, sm_multiplier) void copy_kernel(
-        const gko::batch_multi_vector::uniform_batch<const ValueType> src,
-        const gko::batch_multi_vector::uniform_batch<ValueType> dst)
+__launch_bounds__(default_block_size, sm_multiplier) void copy_kernel(
+    const gko::batch_multi_vector::uniform_batch<const ValueType> src,
+    const gko::batch_multi_vector::uniform_batch<ValueType> dst)
 {
     for (size_type batch_id = blockIdx.x; batch_id < src.num_batch_entries;
          batch_id += gridDim.x) {
diff --git a/hip/base/batch_multi_vector_kernels.hip.cpp b/hip/base/batch_multi_vector_kernels.hip.cpp
index c1e7469ef9e..096c5e8a5d3 100644
--- a/hip/base/batch_multi_vector_kernels.hip.cpp
+++ b/hip/base/batch_multi_vector_kernels.hip.cpp
@@ -34,8 +34,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <hip/hip_runtime.h>
-
-
 #include <thrust/functional.h>
 #include <thrust/transform.h>
 
diff --git a/reference/base/batch_struct.hpp b/reference/base/batch_struct.hpp
index cec3a4ed813..f3512968d9e 100644
--- a/reference/base/batch_struct.hpp
+++ b/reference/base/batch_struct.hpp
@@ -30,17 +30,17 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#ifndef GKO_REFERENCE_MATRIX_BATCH_STRUCT_HPP_
-#define GKO_REFERENCE_MATRIX_BATCH_STRUCT_HPP_
-
-
-#include "core/base/batch_struct.hpp"
+#ifndef GKO_REFERENCE_BASE_BATCH_STRUCT_HPP_
+#define GKO_REFERENCE_BASE_BATCH_STRUCT_HPP_
 
 
 #include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/math.hpp>
 
 
+#include "core/base/batch_struct.hpp"
+
+
 namespace gko {
 namespace kernels {
 /**
@@ -111,4 +111,4 @@ maybe_null_batch_struct(const BatchMultiVector<ValueType>* const op)
 }  // namespace gko
 
 
-#endif  // GKO_REFERENCE_MATRIX_BATCH_STRUCT_HPP_
+#endif  // GKO_REFERENCE_BASE_BATCH_STRUCT_HPP_

From b5feec142d8234f5191d36c996b1bfee1adabaed Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Mon, 24 Jul 2023 10:18:29 +0200
Subject: [PATCH 132/583] Add a fill method and test

---
 core/base/batch_multi_vector.cpp              | 12 +++---------
 core/test/base/batch_multi_vector.cpp         | 19 +++++++++++++++++++
 .../ginkgo/core/base/batch_multi_vector.hpp   | 11 +++++++++++
 3 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/core/base/batch_multi_vector.cpp b/core/base/batch_multi_vector.cpp
index 3578d0678de..7f6473fc5fc 100644
--- a/core/base/batch_multi_vector.cpp
+++ b/core/base/batch_multi_vector.cpp
@@ -187,19 +187,13 @@ void read_impl(MatrixType* mtx, const std::vector<MatrixData>& data)
     }
     auto tmp =
         MatrixType::create(mtx->get_executor()->get_master(), batch_size);
+    tmp->fill(zero<typename MatrixType::value_type>());
     for (size_type b = 0; b < data.size(); ++b) {
         size_type ind = 0;
         for (size_type row = 0; row < data[b].size[0]; ++row) {
             for (size_type col = 0; col < data[b].size[1]; ++col) {
-                if (ind < data[b].nonzeros.size() &&
-                    data[b].nonzeros[ind].row == row &&
-                    data[b].nonzeros[ind].column == col) {
-                    tmp->at(b, row, col) = data[b].nonzeros[ind].value;
-                    ++ind;
-                } else {
-                    tmp->at(b, row, col) =
-                        zero<typename MatrixType::value_type>();
-                }
+                tmp->at(b, row, col) = data[b].nonzeros[ind].value;
+                ++ind;
             }
         }
     }
diff --git a/core/test/base/batch_multi_vector.cpp b/core/test/base/batch_multi_vector.cpp
index 410ea70b4dd..5fbc4d5aa32 100644
--- a/core/test/base/batch_multi_vector.cpp
+++ b/core/test/base/batch_multi_vector.cpp
@@ -325,6 +325,25 @@ TYPED_TEST(BatchMultiVector, CanBeDoubleListConstructed)
 }
 
 
+TYPED_TEST(BatchMultiVector, CanBeFilledWithValue)
+{
+    using value_type = typename TestFixture::value_type;
+    auto m = gko::BatchMultiVector<TypeParam>::create(
+        this->exec, gko::batch_dim<2>(2, gko::dim<2>(3, 1)));
+
+    m->fill(value_type(2.0));
+
+    ASSERT_EQ(m->get_num_batch_entries(), 2);
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(3, 1));
+    EXPECT_EQ(m->at(0, 0, 0), value_type{2.0});
+    EXPECT_EQ(m->at(0, 0, 1), value_type{2.0});
+    EXPECT_EQ(m->at(0, 0, 2), value_type{2.0});
+    EXPECT_EQ(m->at(1, 0, 0), value_type{2.0});
+    EXPECT_EQ(m->at(1, 0, 1), value_type{2.0});
+    EXPECT_EQ(m->at(1, 0, 2), value_type{2.0});
+}
+
+
 TYPED_TEST(BatchMultiVector, CanBeUnbatchedIntoDenseMatrices)
 {
     using value_type = typename TestFixture::value_type;
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index 34ca15db8c0..e8e3d72ef09 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -402,6 +402,17 @@ class BatchMultiVector
             exec, sizes, gko::detail::array_const_cast(std::move(values))});
     }
 
+    /**
+     * Fills the input BatchMultiVector with a given value
+     *
+     * @param value  the value to be filled
+     */
+    void fill(ValueType value)
+    {
+        GKO_ASSERT(this->values_.get_num_elems() > 0);
+        this->values_.fill(value);
+    }
+
 private:
     inline batch_dim<2> compute_batch_size(
         const std::vector<matrix::Dense<ValueType>*>& matrices)

From 9f0282050beec93013db612ae6f1cd7cf924e8dd Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Mon, 24 Jul 2023 14:14:45 +0200
Subject: [PATCH 133/583] Update dpcpp kernels and fix for 2022-1

Cannot use sycl::reduce_over_group for older DPCPP versions.
---
 dpcpp/base/batch_multi_vector_kernels.dp.cpp  |  84 +++++++++----
 dpcpp/base/batch_multi_vector_kernels.hpp.inc | 111 +++++++-----------
 2 files changed, 104 insertions(+), 91 deletions(-)

diff --git a/dpcpp/base/batch_multi_vector_kernels.dp.cpp b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
index 97f7469a6f6..1cd7061c161 100644
--- a/dpcpp/base/batch_multi_vector_kernels.dp.cpp
+++ b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
@@ -48,6 +48,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "dpcpp/base/dim3.dp.hpp"
 #include "dpcpp/base/dpct.hpp"
 #include "dpcpp/base/helper.hpp"
+#include "dpcpp/components/cooperative_groups.dp.hpp"
+#include "dpcpp/components/intrinsics.dp.hpp"
+#include "dpcpp/components/reduction.dp.hpp"
+#include "dpcpp/components/thread_ids.dp.hpp"
 
 
 namespace gko {
@@ -81,16 +85,31 @@ void scale(std::shared_ptr<const DefaultExecutor> exec,
     const dim3 grid(num_batches);
 
     // Launch a kernel that has nbatches blocks, each block has max group size
-    (exec->get_queue())->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
-                auto group = item_ct1.get_group();
-                auto group_id = group.get_group_linear_id();
-                const auto alpha_b = batch::batch_entry(alpha_ub, group_id);
-                const auto x_b = batch::batch_entry(x_ub, group_id);
-                scale_kernel(alpha_b, x_b, item_ct1);
-            });
-    });
+    if (alpha->get_common_size()[1] == 1) {
+        (exec->get_queue())->submit([&](sycl::handler& cgh) {
+            cgh.parallel_for(
+                sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto alpha_b = batch::batch_entry(alpha_ub, group_id);
+                    const auto x_b = batch::batch_entry(x_ub, group_id);
+                    scale_kernel(alpha_b, x_b, item_ct1,
+                                 [](int col) { return 0; });
+                });
+        });
+    } else {
+        (exec->get_queue())->submit([&](sycl::handler& cgh) {
+            cgh.parallel_for(
+                sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto alpha_b = batch::batch_entry(alpha_ub, group_id);
+                    const auto x_b = batch::batch_entry(x_ub, group_id);
+                    scale_kernel(alpha_b, x_b, item_ct1,
+                                 [](int col) { return col; });
+                });
+        });
+    }
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
@@ -116,17 +135,33 @@ void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
     const auto alpha_ub = get_batch_struct(alpha);
     const auto x_ub = get_batch_struct(x);
     const auto y_ub = get_batch_struct(y);
-    (exec->get_queue())->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
-                auto group = item_ct1.get_group();
-                auto group_id = group.get_group_linear_id();
-                const auto alpha_b = batch::batch_entry(alpha_ub, group_id);
-                const auto x_b = batch::batch_entry(x_ub, group_id);
-                const auto y_b = batch::batch_entry(y_ub, group_id);
-                add_scaled_kernel(alpha_b, x_b, y_b, item_ct1);
-            });
-    });
+    if (alpha->get_common_size()[1] == 1) {
+        (exec->get_queue())->submit([&](sycl::handler& cgh) {
+            cgh.parallel_for(
+                sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto alpha_b = batch::batch_entry(alpha_ub, group_id);
+                    const auto x_b = batch::batch_entry(x_ub, group_id);
+                    const auto y_b = batch::batch_entry(y_ub, group_id);
+                    add_scaled_kernel(alpha_b, x_b, y_b, item_ct1,
+                                      [](auto col) { return 0; });
+                });
+        });
+    } else {
+        (exec->get_queue())->submit([&](sycl::handler& cgh) {
+            cgh.parallel_for(
+                sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto alpha_b = batch::batch_entry(alpha_ub, group_id);
+                    const auto x_b = batch::batch_entry(x_ub, group_id);
+                    const auto y_b = batch::batch_entry(y_ub, group_id);
+                    add_scaled_kernel(alpha_b, x_b, y_b, item_ct1,
+                                      [](auto col) { return col; });
+                });
+        });
+    }
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
@@ -159,7 +194,8 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
                 const auto x_b = batch::batch_entry(x_ub, group_id);
                 const auto y_b = batch::batch_entry(y_ub, group_id);
                 const auto res_b = batch::batch_entry(res_ub, group_id);
-                compute_dot_product_kernel(x_b, y_b, res_b, item_ct1);
+                compute_gen_dot_product_kernel(x_b, y_b, res_b, item_ct1,
+                                               [](auto val) { return val; });
             });
     });
 }
@@ -194,7 +230,9 @@ void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
                 const auto x_b = batch::batch_entry(x_ub, group_id);
                 const auto y_b = batch::batch_entry(y_ub, group_id);
                 const auto res_b = batch::batch_entry(res_ub, group_id);
-                compute_conj_dot_product_kernel(x_b, y_b, res_b, item_ct1);
+                compute_gen_dot_product_kernel(
+                    x_b, y_b, res_b, item_ct1,
+                    [](auto val) { return conj(val); });
             });
     });
 }
diff --git a/dpcpp/base/batch_multi_vector_kernels.hpp.inc b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
index cb2ccd4ae50..6e22c5c078f 100644
--- a/dpcpp/base/batch_multi_vector_kernels.hpp.inc
+++ b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
@@ -30,11 +30,11 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-template <typename ValueType>
+template <typename ValueType, typename Mapping>
 __dpct_inline__ void scale_kernel(
     const gko::batch_multi_vector::batch_entry<const ValueType>& alpha,
     const gko::batch_multi_vector::batch_entry<ValueType>& x,
-    sycl::nd_item<3>& item_ct1)
+    sycl::nd_item<3>& item_ct1, Mapping map)
 {
     const int max_li = x.num_rows * x.num_rhs;
     for (int li = item_ct1.get_local_linear_id(); li < max_li;
@@ -42,23 +42,18 @@ __dpct_inline__ void scale_kernel(
         const int row = li / x.num_rhs;
         const int col = li % x.num_rhs;
 
-        if (alpha.num_rhs == 1) {
-            x.values[row * x.stride + col] =
-                alpha.values[0] * x.values[row * x.stride + col];
-        } else {
-            x.values[row * x.stride + col] =
-                alpha.values[col] * x.values[row * x.stride + col];
-        }
+        x.values[row * x.stride + col] =
+            alpha.values[map(col)] * x.values[row * x.stride + col];
     }
 }
 
 
-template <typename ValueType>
+template <typename ValueType, typename Mapping>
 __dpct_inline__ void add_scaled_kernel(
     const gko::batch_multi_vector::batch_entry<const ValueType>& alpha,
     const gko::batch_multi_vector::batch_entry<const ValueType>& x,
     const gko::batch_multi_vector::batch_entry<ValueType>& y,
-    sycl::nd_item<3>& item_ct1)
+    sycl::nd_item<3>& item_ct1, Mapping map)
 {
     const int max_li = x.num_rows * x.num_rhs;
     for (int li = item_ct1.get_local_id(2); li < max_li;
@@ -66,69 +61,41 @@ __dpct_inline__ void add_scaled_kernel(
         const int row = li / x.num_rhs;
         const int col = li % x.num_rhs;
 
-        if (alpha.num_rhs == 1) {
-            y.values[row * y.stride + col] +=
-                alpha.values[0] * x.values[row * x.stride + col];
-        } else {
-            y.values[row * y.stride + col] +=
-                alpha.values[col] * x.values[row * x.stride + col];
-        }
+        y.values[row * y.stride + col] +=
+            alpha.values[map(col)] * x.values[row * x.stride + col];
     }
 }
 
 
-template <typename ValueType>
-__dpct_inline__ void compute_dot_product_kernel(
+template <typename ValueType, typename Mapping>
+__dpct_inline__ void compute_gen_dot_product_kernel(
     const gko::batch_multi_vector::batch_entry<const ValueType>& x,
     const gko::batch_multi_vector::batch_entry<const ValueType>& y,
     const gko::batch_multi_vector::batch_entry<ValueType>& result,
-    sycl::nd_item<3>& item_ct1)
+    sycl::nd_item<3>& item_ct1, Mapping conj_map)
 {
-    const auto sg = item_ct1.get_sub_group();
-    const int sg_id = sg.get_group_id();
-    const int sg_size = sg.get_local_range().size();
-    const int num_sg = sg.get_group_range().size();
-
-    for (int rhs_index = sg_id; rhs_index < x.num_rhs; rhs_index += num_sg) {
+    constexpr auto tile_size = config::warp_size;
+    const auto subgroup = item_ct1.get_sub_group();
+    const int subgroup_id = subgroup.get_group_id();
+    const int subgroup_size = subgroup.get_local_range().size();
+    const int num_subgroups = subgroup.get_group_range().size();
+    auto subg =
+        group::tiled_partition<tile_size>(group::this_thread_block(item_ct1));
+
+    for (int rhs_index = subgroup_id; rhs_index < x.num_rhs;
+         rhs_index += num_subgroups) {
         ValueType val = zero<ValueType>();
 
-        for (int r = sg.get_local_id(); r < x.num_rows; r += sg_size) {
-            val += x.values[r * x.stride + rhs_index] *
+        for (int r = subgroup.get_local_id(); r < x.num_rows;
+             r += subgroup_size) {
+            val += conj_map(x.values[r * x.stride + rhs_index]) *
                    y.values[r * y.stride + rhs_index];
         }
 
-        val = sycl::reduce_over_group(sg, val, sycl::plus<>());
+        val = ::gko::kernels::dpcpp::reduce(
+            subg, val, [](ValueType a, ValueType b) { return a + b; });
 
-        if (sg.get_local_id() == 0) {
-            result.values[rhs_index] = val;
-        }
-    }
-}
-
-
-template <typename ValueType>
-__dpct_inline__ void compute_conj_dot_product_kernel(
-    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
-    const gko::batch_multi_vector::batch_entry<const ValueType>& y,
-    const gko::batch_multi_vector::batch_entry<ValueType>& result,
-    sycl::nd_item<3>& item_ct1)
-{
-    const auto sg = item_ct1.get_sub_group();
-    const int sg_id = sg.get_group_id();
-    const int sg_size = sg.get_local_range().size();
-    const int num_sg = sg.get_group_range().size();
-
-    for (int rhs_index = sg_id; rhs_index < x.num_rhs; rhs_index += num_sg) {
-        ValueType val = zero<ValueType>();
-
-        for (int r = sg.get_local_id(); r < x.num_rows; r += sg_size) {
-            val += conj(x.values[r * x.stride + rhs_index]) *
-                   y.values[r * y.stride + rhs_index];
-        }
-
-        val = sycl::reduce_over_group(sg, val, sycl::plus<>());
-
-        if (sg.get_local_id() == 0) {
+        if (subgroup.get_local_id() == 0) {
             result.values[rhs_index] = val;
         }
     }
@@ -142,21 +109,29 @@ __dpct_inline__ void compute_norm2_kernel(
         result,
     sycl::nd_item<3>& item_ct1)
 {
-    const auto sg = item_ct1.get_sub_group();
-    const int sg_id = sg.get_group_id();
-    const int sg_size = sg.get_local_range().size();
-    const int num_sg = sg.get_group_range().size();
+    constexpr auto tile_size = config::warp_size;
+    const auto subgroup = item_ct1.get_sub_group();
+    const int subgroup_id = subgroup.get_group_id();
+    const int subgroup_size = subgroup.get_local_range().size();
+    const int num_subgroups = subgroup.get_group_range().size();
+    auto subg =
+        group::tiled_partition<tile_size>(group::this_thread_block(item_ct1));
 
     using real_type = typename gko::remove_complex<ValueType>;
-    for (int rhs_index = sg_id; rhs_index < x.num_rhs; rhs_index += num_sg) {
+    for (int rhs_index = subgroup_id; rhs_index < x.num_rhs;
+         rhs_index += num_subgroups) {
         real_type val = zero<real_type>();
 
-        for (int r = sg.get_local_id(); r < x.num_rows; r += sg_size)
+        for (int r = subgroup.get_local_id(); r < x.num_rows;
+             r += subgroup_size)
             val += squared_norm(x.values[r * x.stride + rhs_index]);
 
-        val = sycl::reduce_over_group(sg, val, sycl::plus<>());
+        val = ::gko::kernels::dpcpp::reduce(
+            subg, val, [](real_type a, real_type b) { return a + b; });
 
-        if (sg.get_local_id() == 0) result.values[rhs_index] = sqrt(val);
+        if (subgroup.get_local_id() == 0) {
+            result.values[rhs_index] = sqrt(val);
+        }
     }
 }
 

From 02d85ae66bc3ba651971329a825dc7a79ef39379 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 25 Jul 2023 11:39:02 +0200
Subject: [PATCH 134/583] Fix dpcpp CPU subgroup_size issue

Co-authored-by: Yu-Hsiang Mike Tsai <yhmtsai@gmail.com>
---
 dpcpp/base/batch_multi_vector_kernels.dp.cpp  | 13 ++++++++++---
 dpcpp/base/batch_multi_vector_kernels.hpp.inc | 12 ++++++------
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/dpcpp/base/batch_multi_vector_kernels.dp.cpp b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
index 1cd7061c161..2c48970d13d 100644
--- a/dpcpp/base/batch_multi_vector_kernels.dp.cpp
+++ b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
@@ -186,9 +186,12 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
     const dim3 block(group_size);
     const dim3 grid(num_batches);
 
+    // TODO: Remove reqd_sub_group size and use sycl::reduce_over_group
     (exec->get_queue())->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=
+        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                            config::warp_size)]] {
                 auto group = item_ct1.get_group();
                 auto group_id = group.get_group_linear_id();
                 const auto x_b = batch::batch_entry(x_ub, group_id);
@@ -224,7 +227,9 @@ void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
 
     (exec->get_queue())->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=
+        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                            config::warp_size)]] {
                 auto group = item_ct1.get_group();
                 auto group_id = group.get_group_linear_id();
                 const auto x_b = batch::batch_entry(x_ub, group_id);
@@ -259,7 +264,9 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
 
     (exec->get_queue())->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=
+        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                            config::warp_size)]] {
                 auto group = item_ct1.get_group();
                 auto group_id = group.get_group_linear_id();
                 const auto x_b = batch::batch_entry(x_ub, group_id);
diff --git a/dpcpp/base/batch_multi_vector_kernels.hpp.inc b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
index 6e22c5c078f..7dfe13d0fda 100644
--- a/dpcpp/base/batch_multi_vector_kernels.hpp.inc
+++ b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
@@ -75,12 +75,12 @@ __dpct_inline__ void compute_gen_dot_product_kernel(
     sycl::nd_item<3>& item_ct1, Mapping conj_map)
 {
     constexpr auto tile_size = config::warp_size;
-    const auto subgroup = item_ct1.get_sub_group();
+    auto subg =
+        group::tiled_partition<tile_size>(group::this_thread_block(item_ct1));
+    const auto subgroup = static_cast<sycl::sub_group>(subg);
     const int subgroup_id = subgroup.get_group_id();
     const int subgroup_size = subgroup.get_local_range().size();
     const int num_subgroups = subgroup.get_group_range().size();
-    auto subg =
-        group::tiled_partition<tile_size>(group::this_thread_block(item_ct1));
 
     for (int rhs_index = subgroup_id; rhs_index < x.num_rhs;
          rhs_index += num_subgroups) {
@@ -110,12 +110,12 @@ __dpct_inline__ void compute_norm2_kernel(
     sycl::nd_item<3>& item_ct1)
 {
     constexpr auto tile_size = config::warp_size;
-    const auto subgroup = item_ct1.get_sub_group();
+    auto subg =
+        group::tiled_partition<tile_size>(group::this_thread_block(item_ct1));
+    const auto subgroup = static_cast<sycl::sub_group>(subg);
     const int subgroup_id = subgroup.get_group_id();
     const int subgroup_size = subgroup.get_local_range().size();
     const int num_subgroups = subgroup.get_group_range().size();
-    auto subg =
-        group::tiled_partition<tile_size>(group::this_thread_block(item_ct1));
 
     using real_type = typename gko::remove_complex<ValueType>;
     for (int rhs_index = subgroup_id; rhs_index < x.num_rhs;

From 4c2fafd78cf2bcae25d867f9e03d89876813a62a Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 25 Jul 2023 14:45:28 +0200
Subject: [PATCH 135/583] Move impls to source from header

---
 core/base/batch_multi_vector.cpp              | 123 +++++++++++++---
 .../ginkgo/core/base/batch_multi_vector.hpp   | 135 +++---------------
 2 files changed, 118 insertions(+), 140 deletions(-)

diff --git a/core/base/batch_multi_vector.cpp b/core/base/batch_multi_vector.cpp
index 7f6473fc5fc..b73a92467f6 100644
--- a/core/base/batch_multi_vector.cpp
+++ b/core/base/batch_multi_vector.cpp
@@ -67,8 +67,77 @@ GKO_REGISTER_OPERATION(copy, batch_multi_vector::copy);
 
 
 template <typename ValueType>
-void BatchMultiVector<ValueType>::scale_impl(
-    const BatchMultiVector<ValueType>* alpha)
+std::unique_ptr<BatchMultiVector<ValueType>>
+BatchMultiVector<ValueType>::create_with_config_of(
+    ptr_param<const BatchMultiVector> other)
+{
+    // De-referencing `other` before calling the functions (instead of
+    // using operator `->`) is currently required to be compatible with
+    // CUDA 10.1.
+    // Otherwise, it results in a compile error.
+    return (*other).create_with_same_config();
+}
+
+
+template <typename ValueType>
+std::vector<std::unique_ptr<matrix::Dense<ValueType>>>
+BatchMultiVector<ValueType>::unbatch() const
+{
+    using unbatch_type = matrix::Dense<ValueType>;
+    auto exec = this->get_executor();
+    auto unbatch_mats = std::vector<std::unique_ptr<unbatch_type>>{};
+    for (size_type b = 0; b < this->get_num_batch_entries(); ++b) {
+        auto mat = unbatch_type::create(exec, this->get_common_size());
+        exec->copy_from(exec.get(), mat->get_num_stored_elements(),
+                        this->get_const_values() +
+                            this->get_size().get_cumulative_offset(b),
+                        mat->get_values());
+        unbatch_mats.emplace_back(std::move(mat));
+    }
+    return unbatch_mats;
+}
+
+
+template <typename ValueType>
+std::unique_ptr<const BatchMultiVector<ValueType>>
+BatchMultiVector<ValueType>::create_const(
+    std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
+    gko::detail::const_array_view<ValueType>&& values)
+{
+    // cast const-ness away, but return a const object afterwards,
+    // so we can ensure that no modifications take place.
+    return std::unique_ptr<const BatchMultiVector>(new BatchMultiVector{
+        exec, sizes, gko::detail::array_const_cast(std::move(values))});
+}
+
+
+template <typename ValueType>
+void BatchMultiVector<ValueType>::fill(ValueType value)
+{
+    GKO_ASSERT(this->values_.get_num_elems() > 0);
+    this->values_.fill(value);
+}
+
+
+template <typename ValueType>
+void BatchMultiVector<ValueType>::set_size(const batch_dim<2>& value) noexcept
+{
+    batch_size_ = value;
+}
+
+
+template <typename ValueType>
+std::unique_ptr<BatchMultiVector<ValueType>>
+BatchMultiVector<ValueType>::create_with_same_config() const
+{
+    return BatchMultiVector<ValueType>::create(this->get_executor(),
+                                               this->get_size());
+}
+
+
+template <typename ValueType>
+void BatchMultiVector<ValueType>::scale(
+    ptr_param<const BatchMultiVector<ValueType>> alpha)
 {
     GKO_ASSERT_EQ(alpha->get_num_batch_entries(),
                   this->get_num_batch_entries());
@@ -78,14 +147,16 @@ void BatchMultiVector<ValueType>::scale_impl(
         GKO_ASSERT_EQUAL_COLS(this->get_common_size(),
                               alpha->get_common_size());
     }
-    this->get_executor()->run(batch_multi_vector::make_scale(alpha, this));
+    auto exec = this->get_executor();
+    exec->run(batch_multi_vector::make_scale(
+        make_temporary_clone(exec, alpha).get(), this));
 }
 
 
 template <typename ValueType>
-void BatchMultiVector<ValueType>::add_scaled_impl(
-    const BatchMultiVector<ValueType>* alpha,
-    const BatchMultiVector<ValueType>* b)
+void BatchMultiVector<ValueType>::add_scaled(
+    ptr_param<const BatchMultiVector<ValueType>> alpha,
+    ptr_param<const BatchMultiVector<ValueType>> b)
 {
     GKO_ASSERT_EQ(alpha->get_num_batch_entries(),
                   this->get_num_batch_entries());
@@ -98,8 +169,10 @@ void BatchMultiVector<ValueType>::add_scaled_impl(
     GKO_ASSERT_EQ(b->get_num_batch_entries(), this->get_num_batch_entries());
     GKO_ASSERT_EQUAL_DIMENSIONS(this->get_common_size(), b->get_common_size());
 
-    this->get_executor()->run(
-        batch_multi_vector::make_add_scaled(alpha, b, this));
+    auto exec = this->get_executor();
+    exec->run(batch_multi_vector::make_add_scaled(
+        make_temporary_clone(exec, alpha).get(),
+        make_temporary_clone(exec, b).get(), this));
 }
 
 
@@ -111,9 +184,9 @@ inline const batch_dim<2> get_col_sizes(const batch_dim<2>& sizes)
 
 
 template <typename ValueType>
-void BatchMultiVector<ValueType>::compute_conj_dot_impl(
-    const BatchMultiVector<ValueType>* b,
-    BatchMultiVector<ValueType>* result) const
+void BatchMultiVector<ValueType>::compute_conj_dot(
+    ptr_param<const BatchMultiVector<ValueType>> b,
+    ptr_param<BatchMultiVector<ValueType>> result) const
 {
     GKO_ASSERT_EQ(b->get_num_batch_entries(), this->get_num_batch_entries());
     GKO_ASSERT_EQUAL_DIMENSIONS(this->get_common_size(), b->get_common_size());
@@ -122,15 +195,17 @@ void BatchMultiVector<ValueType>::compute_conj_dot_impl(
     GKO_ASSERT_EQUAL_DIMENSIONS(
         result->get_common_size(),
         get_col_sizes(this->get_size()).get_common_size());
-    this->get_executor()->run(
-        batch_multi_vector::make_compute_conj_dot(this, b, result));
+    auto exec = this->get_executor();
+    exec->run(batch_multi_vector::make_compute_conj_dot(
+        this, make_temporary_clone(exec, b).get(),
+        make_temporary_output_clone(exec, result).get()));
 }
 
 
 template <typename ValueType>
-void BatchMultiVector<ValueType>::compute_dot_impl(
-    const BatchMultiVector<ValueType>* b,
-    BatchMultiVector<ValueType>* result) const
+void BatchMultiVector<ValueType>::compute_dot(
+    ptr_param<const BatchMultiVector<ValueType>> b,
+    ptr_param<BatchMultiVector<ValueType>> result) const
 {
     GKO_ASSERT_EQ(b->get_num_batch_entries(), this->get_num_batch_entries());
     GKO_ASSERT_EQUAL_DIMENSIONS(this->get_common_size(), b->get_common_size());
@@ -139,22 +214,26 @@ void BatchMultiVector<ValueType>::compute_dot_impl(
     GKO_ASSERT_EQUAL_DIMENSIONS(
         result->get_common_size(),
         get_col_sizes(this->get_size()).get_common_size());
-    this->get_executor()->run(
-        batch_multi_vector::make_compute_dot(this, b, result));
+    auto exec = this->get_executor();
+    exec->run(batch_multi_vector::make_compute_dot(
+        this, make_temporary_clone(exec, b).get(),
+        make_temporary_output_clone(exec, result).get()));
 }
 
 
 template <typename ValueType>
-void BatchMultiVector<ValueType>::compute_norm2_impl(
-    BatchMultiVector<remove_complex<ValueType>>* result) const
+void BatchMultiVector<ValueType>::compute_norm2(
+    ptr_param<BatchMultiVector<remove_complex<ValueType>>> result) const
 {
     GKO_ASSERT_EQ(this->get_num_batch_entries(),
                   result->get_num_batch_entries());
     GKO_ASSERT_EQUAL_DIMENSIONS(
         result->get_common_size(),
         get_col_sizes(this->get_size()).get_common_size());
-    this->get_executor()->run(batch_multi_vector::make_compute_norm2(
-        as<BatchMultiVector<ValueType>>(this), result));
+
+    auto exec = this->get_executor();
+    exec->run(batch_multi_vector::make_compute_norm2(
+        this, make_temporary_output_clone(exec, result).get()));
 }
 
 
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index e8e3d72ef09..4ee3def0af7 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -104,14 +104,7 @@ class BatchMultiVector
      * @param other  The other matrix whose configuration needs to copied.
      */
     static std::unique_ptr<BatchMultiVector> create_with_config_of(
-        ptr_param<const BatchMultiVector> other)
-    {
-        // De-referencing `other` before calling the functions (instead of
-        // using operator `->`) is currently required to be compatible with
-        // CUDA 10.1.
-        // Otherwise, it results in a compile error.
-        return (*other).create_with_same_config();
-    }
+        ptr_param<const BatchMultiVector> other);
 
     friend class BatchMultiVector<next_precision<ValueType>>;
 
@@ -133,20 +126,7 @@ class BatchMultiVector
      *
      * @return  a std::vector containing the Dense matrices.
      */
-    std::vector<std::unique_ptr<unbatch_type>> unbatch() const
-    {
-        auto exec = this->get_executor();
-        auto unbatch_mats = std::vector<std::unique_ptr<unbatch_type>>{};
-        for (size_type b = 0; b < this->get_num_batch_entries(); ++b) {
-            auto mat = unbatch_type::create(exec, this->get_common_size());
-            exec->copy_from(exec.get(), mat->get_num_stored_elements(),
-                            this->get_const_values() +
-                                this->get_size().get_cumulative_offset(b),
-                            mat->get_values());
-            unbatch_mats.emplace_back(std::move(mat));
-        }
-        return unbatch_mats;
-    }
+    std::vector<std::unique_ptr<unbatch_type>> unbatch() const;
 
     /**
      * Returns the batch size.
@@ -292,11 +272,7 @@ class BatchMultiVector
      * of alpha (the number of columns of alpha has to match the number of
      * columns of the matrix).
      */
-    void scale(ptr_param<const BatchMultiVector<ValueType>> alpha)
-    {
-        auto exec = this->get_executor();
-        this->scale_impl(make_temporary_clone(exec, alpha).get());
-    }
+    void scale(ptr_param<const BatchMultiVector<ValueType>> alpha);
 
     /**
      * Adds `b` scaled by `alpha` to the vector (aka: BLAS axpy).
@@ -309,28 +285,7 @@ class BatchMultiVector
      * @param b  a matrix of the same dimension as this
      */
     void add_scaled(ptr_param<const BatchMultiVector<ValueType>> alpha,
-                    ptr_param<const BatchMultiVector<ValueType>> b)
-    {
-        auto exec = this->get_executor();
-        this->add_scaled_impl(make_temporary_clone(exec, alpha).get(),
-                              make_temporary_clone(exec, b).get());
-    }
-
-    /**
-     * Adds `a` scaled by `alpha` to the vector scaled by `beta`:
-     * this <- alpha * a + beta * this.
-     *
-     * @param alpha  If alpha is 1x1 BatchMultiVector matrix, the entire matrix
-     * a is scaled by alpha. If it is a BatchMultiVector row vector of values,
-     * then i-th column of a is scaled with the i-th element of alpha (the
-     * number of columns of alpha has to match the number of columns of a).
-     * @param a  a matrix of the same dimension as this.
-     * @param beta  Scalar(s), of the same size as alpha, to multiply this
-     * matrix.
-     */
-    void add_scale(ptr_param<const BatchMultiVector<ValueType>> alpha,
-                   ptr_param<const BatchMultiVector<ValueType>> a,
-                   ptr_param<const BatchMultiVector<ValueType>> beta);
+                    ptr_param<const BatchMultiVector<ValueType>> b);
 
     /**
      * Computes the column-wise dot product of each matrix in this batch and its
@@ -342,12 +297,7 @@ class BatchMultiVector
      * columns of this)
      */
     void compute_dot(ptr_param<const BatchMultiVector<ValueType>> b,
-                     ptr_param<BatchMultiVector<ValueType>> result) const
-    {
-        auto exec = this->get_executor();
-        this->compute_dot_impl(make_temporary_clone(exec, b).get(),
-                               make_temporary_clone(exec, result).get());
-    }
+                     ptr_param<BatchMultiVector<ValueType>> result) const;
 
     /**
      * Computes the column-wise conjugate dot product of each matrix in this
@@ -360,12 +310,7 @@ class BatchMultiVector
      * columns of this)
      */
     void compute_conj_dot(ptr_param<const BatchMultiVector<ValueType>> b,
-                          ptr_param<BatchMultiVector<ValueType>> result) const
-    {
-        auto exec = this->get_executor();
-        this->compute_conj_dot_impl(make_temporary_clone(exec, b).get(),
-                                    make_temporary_clone(exec, result).get());
-    }
+                          ptr_param<BatchMultiVector<ValueType>> result) const;
 
     /**
      * Computes the Euclidean (L^2) norm of each matrix in this batch.
@@ -375,11 +320,7 @@ class BatchMultiVector
      *                of columns of this)
      */
     void compute_norm2(
-        ptr_param<BatchMultiVector<remove_complex<ValueType>>> result) const
-    {
-        auto exec = this->get_executor();
-        this->compute_norm2_impl(make_temporary_clone(exec, result).get());
-    }
+        ptr_param<BatchMultiVector<remove_complex<ValueType>>> result) const;
 
     /**
      * Creates a constant (immutable) batch dense matrix from a constant array.
@@ -394,24 +335,14 @@ class BatchMultiVector
      */
     static std::unique_ptr<const BatchMultiVector<ValueType>> create_const(
         std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
-        gko::detail::const_array_view<ValueType>&& values)
-    {
-        // cast const-ness away, but return a const object afterwards,
-        // so we can ensure that no modifications take place.
-        return std::unique_ptr<const BatchMultiVector>(new BatchMultiVector{
-            exec, sizes, gko::detail::array_const_cast(std::move(values))});
-    }
+        gko::detail::const_array_view<ValueType>&& values);
 
     /**
      * Fills the input BatchMultiVector with a given value
      *
      * @param value  the value to be filled
      */
-    void fill(ValueType value)
-    {
-        GKO_ASSERT(this->values_.get_num_elems() > 0);
-        this->values_.fill(value);
-    }
+    void fill(ValueType value);
 
 private:
     inline batch_dim<2> compute_batch_size(
@@ -429,13 +360,14 @@ class BatchMultiVector
         return size.get_cumulative_offset(size.get_num_batch_entries());
     }
 
+
 protected:
     /**
      * Sets the size of the BatchMultiVector.
      *
      * @param value  the new size of the operator
      */
-    void set_size(const batch_dim<2>& value) noexcept { batch_size_ = value; }
+    void set_size(const batch_dim<2>& value) noexcept;
 
     /**
      * Creates an uninitialized BatchMultiVector matrix of the specified size.
@@ -445,7 +377,7 @@ class BatchMultiVector
      */
     BatchMultiVector(std::shared_ptr<const Executor> exec,
                      const batch_dim<2>& size = batch_dim<2>{})
-        : EnablePolymorphicObject<BatchMultiVector>(exec),
+        : EnablePolymorphicObject<BatchMultiVector<ValueType>>(exec),
           batch_size_(size),
           values_(exec, compute_num_elems(size))
     {}
@@ -467,7 +399,7 @@ class BatchMultiVector
     template <typename ValuesArray>
     BatchMultiVector(std::shared_ptr<const Executor> exec,
                      const batch_dim<2>& size, ValuesArray&& values)
-        : EnablePolymorphicObject<BatchMultiVector>(exec),
+        : EnablePolymorphicObject<BatchMultiVector<ValueType>>(exec),
           batch_size_(size),
           values_{exec, std::forward<ValuesArray>(values)}
     {
@@ -484,7 +416,7 @@ class BatchMultiVector
      */
     BatchMultiVector(std::shared_ptr<const Executor> exec,
                      const std::vector<matrix::Dense<ValueType>*>& matrices)
-        : EnablePolymorphicObject<BatchMultiVector>(exec),
+        : EnablePolymorphicObject<BatchMultiVector<ValueType>>(exec),
           batch_size_{compute_batch_size(matrices)},
           values_(exec, compute_num_elems(batch_size_))
     {
@@ -507,7 +439,7 @@ class BatchMultiVector
     BatchMultiVector(std::shared_ptr<const Executor> exec,
                      size_type num_duplications,
                      const BatchMultiVector<value_type>* input)
-        : BatchMultiVector(
+        : BatchMultiVector<ValueType>(
               exec, gko::batch_dim<2>(
                         input->get_num_batch_entries() * num_duplications,
                         input->get_common_size()))
@@ -531,7 +463,7 @@ class BatchMultiVector
     BatchMultiVector(std::shared_ptr<const Executor> exec,
                      size_type num_duplications,
                      const matrix::Dense<value_type>* input)
-        : BatchMultiVector(
+        : BatchMultiVector<ValueType>(
               exec, gko::batch_dim<2>(num_duplications, input->get_size()))
     {
         size_type offset = 0;
@@ -550,40 +482,7 @@ class BatchMultiVector
      * @returns a BatchMultiVector matrix with the same configuration as the
      * caller.
      */
-    std::unique_ptr<BatchMultiVector> create_with_same_config() const
-    {
-        return BatchMultiVector::create(this->get_executor(), this->get_size());
-    }
-
-    /**
-     * @copydoc scale(const BatchMultiVector *)
-     */
-    void scale_impl(const BatchMultiVector<ValueType>* alpha);
-
-    /**
-     * @copydoc add_scaled(const BatchMultiVector *, const BatchMultiVector *)
-     */
-    void add_scaled_impl(const BatchMultiVector<ValueType>* alpha,
-                         const BatchMultiVector<ValueType>* b);
-
-    /**
-     * @copydoc compute_dot(const BatchMultiVector *, BatchMultiVector *) const
-     */
-    void compute_dot_impl(const BatchMultiVector<ValueType>* b,
-                          BatchMultiVector<ValueType>* result) const;
-
-    /**
-     * @copydoc compute_conj_dot(const BatchMultiVector *, BatchMultiVector *)
-     * const
-     */
-    void compute_conj_dot_impl(const BatchMultiVector<ValueType>* b,
-                               BatchMultiVector<ValueType>* result) const;
-
-    /**
-     * @copydoc compute_norm2(BatchMultiVector *) const
-     */
-    void compute_norm2_impl(
-        BatchMultiVector<remove_complex<ValueType>>* result) const;
+    std::unique_ptr<BatchMultiVector> create_with_same_config() const;
 
     size_type linearize_index(size_type batch, size_type row,
                               size_type col) const noexcept

From 5556cc0a6e7d7af4b1e80161f0fd253e8aa1e372 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Wed, 26 Jul 2023 13:58:20 +0200
Subject: [PATCH 136/583] Update docs and zero-size issues

---
 .../ginkgo/core/base/batch_multi_vector.hpp   | 223 +++++++++++-------
 1 file changed, 132 insertions(+), 91 deletions(-)

diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index 4ee3def0af7..2096f30b85b 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -54,15 +54,27 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace gko {
 
 /**
- * BatchMultiVector is a batch matrix format which explicitly stores all values
- * of the vector in each of the batches.
+ * BatchMultiVector stores multiple vectors in a batched fashion and is useful
+ * for batched operations. For example, if you want to store two batch entries
+ * with multi-vectors of size (3 x 2) given below:
  *
- * The values in each of the batches are stored in row-major format (values
- * belonging to the same row appear consecutive in the memory).
+ * [1 2 ; 3 4
+ *  1 2 ; 3 4
+ *  1 2 ; 3 4]
  *
- * @tparam ValueType  precision of matrix elements
+ * In memory, they would be stored as a single array:
+ * [1 2 1 2 1 2 3 4 3 4 3 4].
+ *
+ * Access functions @at can help access individual
+ * entries if necessary.
+ *
+ * The values of the batches are stored consecutively and in each batch, the
+ * vectors are stored in a row-major fashion.
+ *
+ * @tparam ValueType  precision of multi-vector elements
  *
  * @ingroup batch_multi_vector
+ * @ingroup batched
  */
 template <typename ValueType = default_precision>
 class BatchMultiVector
@@ -98,10 +110,10 @@ class BatchMultiVector
     using row_major_range = gko::range<gko::accessor::row_major<ValueType, 2>>;
 
     /**
-     * Creates a BatchMultiVector matrix with the configuration of another
-     * BatchMultiVector matrix.
+     * Creates a BatchMultiVector with the configuration of another
+     * BatchMultiVector.
      *
-     * @param other  The other matrix whose configuration needs to copied.
+     * @param other  The other multi-vector whose configuration needs to copied.
      */
     static std::unique_ptr<BatchMultiVector> create_with_config_of(
         ptr_param<const BatchMultiVector> other);
@@ -122,9 +134,15 @@ class BatchMultiVector
     void write(std::vector<mat_data32>& data) const override;
 
     /**
-     * Unbatches the batched dense and creates a std::vector of Dense matrices
+     * Unbatches the batched multi-vector and creates a std::vector of Dense
+     * matrices
      *
-     * @return  a std::vector containing the Dense matrices.
+     * @note This is an expensive operation as new memory needs to be allocated
+     * and the data from the batched multi-vector needs to copied to the
+     * individual matrices. This is mainly intended as a utility function
+     * for debugging and testing purposes.
+     *
+     * @return  a std::vector containing the matrix::Dense objects.
      */
     std::vector<std::unique_ptr<unbatch_type>> unbatch() const;
 
@@ -153,24 +171,13 @@ class BatchMultiVector
     dim<2> get_common_size() const { return batch_size_.get_common_size(); }
 
     /**
-     * Returns a pointer to the array of values of the vector.
+     * Returns a pointer to the array of values of the beginning of the batched
+     * multi-vector.
      *
      * @return the pointer to the array of values
      */
     value_type* get_values() noexcept { return values_.get_data(); }
 
-    /**
-     * Returns a pointer to the array of values of the vector.
-     *
-     * @return the pointer to the array of values
-     */
-    value_type* get_values(size_type batch) noexcept
-    {
-        GKO_ASSERT(batch < this->get_num_batch_entries());
-        return values_.get_data() +
-               this->get_size().get_cumulative_offset(batch);
-    }
-
     /**
      * @copydoc get_values()
      *
@@ -183,6 +190,21 @@ class BatchMultiVector
         return values_.get_const_data();
     }
 
+    /**
+     * Returns a pointer to the array of values of the multi-vector for a
+     * specific batch entry.
+     *
+     * @param batch_id  the id of the batch entry.
+     *
+     * @return the pointer to the array of values
+     */
+    value_type* get_values(size_type batch_id) noexcept
+    {
+        GKO_ASSERT(batch_id < this->get_num_batch_entries());
+        return values_.get_data() +
+               this->get_size().get_cumulative_offset(batch_id);
+    }
+
     /**
      * @copydoc get_values(size_type)
      *
@@ -190,11 +212,11 @@ class BatchMultiVector
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const value_type* get_const_values(size_type batch) const noexcept
+    const value_type* get_const_values(size_type batch_id) const noexcept
     {
-        GKO_ASSERT(batch < this->get_num_batch_entries());
+        GKO_ASSERT(batch_id < this->get_num_batch_entries());
         return values_.get_const_data() +
-               this->get_size().get_cumulative_offset(batch);
+               this->get_size().get_cumulative_offset(batch_id);
     }
 
     /**
@@ -217,8 +239,8 @@ class BatchMultiVector
      * @param col  the column of the requested element
      *
      * @note  the method has to be called on the same Executor the vector is
-     *        stored at (e.g. trying to call this method on a GPU matrix from
-     *        the OMP results in a runtime error)
+     *        stored at (e.g. trying to call this method on a GPU multi-vector
+     *        from the OMP results in a runtime error)
      */
     value_type& at(size_type batch, size_type row, size_type col)
     {
@@ -244,11 +266,10 @@ class BatchMultiVector
      *
      * @param batch  the batch index to be queried
      * @param idx  a linear index of the requested element
-     *             (ignoring the stride)
      *
      * @note  the method has to be called on the same Executor the vector is
-     *        stored at (e.g. trying to call this method on a GPU matrix from
-     *        the OMP results in a runtime error)
+     *        stored at (e.g. trying to call this method on a GPU multi-vector
+     *        from the OMP results in a runtime error)
      */
     ValueType& at(size_type batch, size_type idx) noexcept
     {
@@ -266,56 +287,59 @@ class BatchMultiVector
     /**
      * Scales the vector with a scalar (aka: BLAS scal).
      *
-     * @param alpha  If alpha is 1x1 BatchMultiVector matrix, the entire matrix
-     * (all batches) is scaled by alpha. If it is a BatchMultiVector row vector
-     * of values, then i-th column of the vector is scaled with the i-th element
-     * of alpha (the number of columns of alpha has to match the number of
-     * columns of the matrix).
+     * @param alpha  the scalar
+     *
+     * @note If alpha is 1x1 BatchMultiVector matrix, the entire multi-vector
+     *      (all batches) is scaled by alpha. If it is a BatchMultiVector row
+     *      vector of values, then i-th column of the vector is scaled with the
+     *      i-th element of alpha (the number of columns of alpha has to match
+     *      the number of columns of the multi-vector).
      */
     void scale(ptr_param<const BatchMultiVector<ValueType>> alpha);
 
     /**
      * Adds `b` scaled by `alpha` to the vector (aka: BLAS axpy).
      *
-     * @param alpha  If alpha is 1x1 BatchMultiVector matrix, the entire matrix
-     * is scaled by alpha. If it is a BatchMultiVector row vector of values,
-     * then i-th column of the vector is scaled with the i-th element of alpha
-     * (the number of columns of alpha has to match the number of columns of the
-     * vector).
-     * @param b  a matrix of the same dimension as this
+     * @param alpha  the scalar
+     * @param b  a multi-vector of the same dimension as this
+     *
+     * @note If alpha is 1x1 BatchMultiVector matrix, the entire multi-vector
+     *      (all batches) is scaled by alpha. If it is a BatchMultiVector row
+     *      vector of values, then i-th column of the vector is scaled with the
+     *      i-th element of alpha (the number of columns of alpha has to match
+     *      the number of columns of the multi-vector).
      */
     void add_scaled(ptr_param<const BatchMultiVector<ValueType>> alpha,
                     ptr_param<const BatchMultiVector<ValueType>> b);
 
     /**
-     * Computes the column-wise dot product of each matrix in this batch and its
-     * corresponding entry in `b`.
+     * Computes the column-wise dot product of each multi-vector in this batch
+     * and its corresponding entry in `b`.
      *
-     * @param b  a BatchMultiVector matrix of same dimension as this
+     * @param b  a BatchMultiVector of same dimension as this
      * @param result  a BatchMultiVector row vector, used to store the dot
-     * product (the number of column in the vector must match the number of
-     * columns of this)
+     * product
      */
     void compute_dot(ptr_param<const BatchMultiVector<ValueType>> b,
                      ptr_param<BatchMultiVector<ValueType>> result) const;
 
     /**
-     * Computes the column-wise conjugate dot product of each matrix in this
-     * batch and its corresponding entry in `b`. If the vector has complex
+     * Computes the column-wise conjugate dot product of each multi-vector in
+     * this batch and its corresponding entry in `b`. If the vector has complex
      * value_type, then the conjugate of this is taken.
      *
-     * @param b  a BatchMultiVector matrix of same dimension as this
+     * @param b  a BatchMultiVector of same dimension as this
      * @param result  a BatchMultiVector row vector, used to store the dot
-     * product (the number of column in the vector must match the number of
-     * columns of this)
+     *                product (the number of column in the vector must match the
+     *                number of columns of this)
      */
     void compute_conj_dot(ptr_param<const BatchMultiVector<ValueType>> b,
                           ptr_param<BatchMultiVector<ValueType>> result) const;
 
     /**
-     * Computes the Euclidean (L^2) norm of each matrix in this batch.
+     * Computes the Euclidean (L^2) norm of each multi-vector in this batch.
      *
-     * @param result  a BatchMultiVector row vector, used to store the norm
+     * @param result  a BatchMultiVector, used to store the norm
      *                (the number of columns in the vector must match the number
      *                of columns of this)
      */
@@ -323,15 +347,17 @@ class BatchMultiVector
         ptr_param<BatchMultiVector<remove_complex<ValueType>>> result) const;
 
     /**
-     * Creates a constant (immutable) batch dense matrix from a constant array.
+     * Creates a constant (immutable) batch multi-vector from a constant
+     * array.
      *
      * @param exec  the executor to create the vector on
      * @param size  the dimensions of the vector
      * @param values  the value array of the vector
      * @param stride  the row-stride of the vector
-     * @returns A smart pointer to the constant matrix wrapping the input array
-     *          (if it resides on the same executor as the vector) or a copy of
-     *          the array on the correct executor.
+     *
+     * @return A smart pointer to the constant multi-vector wrapping the input
+     * array (if it resides on the same executor as the vector) or a copy of the
+     * array on the correct executor.
      */
     static std::unique_ptr<const BatchMultiVector<ValueType>> create_const(
         std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
@@ -370,7 +396,8 @@ class BatchMultiVector
     void set_size(const batch_dim<2>& value) noexcept;
 
     /**
-     * Creates an uninitialized BatchMultiVector matrix of the specified size.
+     * Creates an uninitialized multi-vector of the specified
+     * size.
      *
      * @param exec  Executor associated to the vector
      * @param size  size of the vector
@@ -383,14 +410,14 @@ class BatchMultiVector
     {}
 
     /**
-     * Creates a BatchMultiVector matrix from an already allocated (and
+     * Creates a BatchMultiVector from an already allocated (and
      * initialized) array.
      *
      * @tparam ValuesArray  type of array of values
      *
      * @param exec  Executor associated to the vector
      * @param size  sizes of the batch matrices in a batch_dim object
-     * @param values  array of matrix values
+     * @param values  array of values
      *
      * @note If `values` is not an rvalue, not an array of ValueType, or is on
      *       the wrong executor, an internal copy will be created, and the
@@ -409,10 +436,16 @@ class BatchMultiVector
     }
 
     /**
-     * Creates a BatchMultiVector matrix from a vector of matrices
+     * Creates a BatchMultiVector from a vector of matrices
      *
      * @param exec  Executor associated to the vector
-     * @param matrices  The matrices that need to be batched.
+     * @param matrices  The matrix::Dense objects that need to be batched.
+     *
+     * @note This is a utility function that can serve as a first step to port
+     * to batched data-structures and solvers. Even if the matrices are in
+     * device memory, this method can have siginificant overhead, as new
+     * allocations and deep copies are necessary and hence this constructor must
+     * not be used in performance sensitive applications
      */
     BatchMultiVector(std::shared_ptr<const Executor> exec,
                      const std::vector<matrix::Dense<ValueType>*>& matrices)
@@ -430,11 +463,17 @@ class BatchMultiVector
     }
 
     /**
-     * Creates a BatchMultiVector matrix by duplicating BatchMultiVector matrix
+     * Creates a BatchMultiVector matrix by duplicating BatchMultiVector object
      *
      * @param exec  Executor associated to the vector
      * @param num_duplications  The number of times to duplicate
      * @param input  the vector to be duplicated.
+     *
+     * @note This is a utility function that can serve as a first step to port
+     * to batched data-structures and solvers. Even if the matrices are in
+     * device memory, this method can have siginificant overhead, as new
+     * allocations and deep copies are necessary and hence this constructor must
+     * not be used in performance sensitive applications.
      */
     BatchMultiVector(std::shared_ptr<const Executor> exec,
                      size_type num_duplications,
@@ -454,11 +493,11 @@ class BatchMultiVector
     }
 
     /**
-     * Creates a BatchMultiVector matrix by duplicating Dense matrix
+     * Creates a BatchMultiVector matrix by a duplicating a matrix::Dense object
      *
      * @param exec  Executor associated to the vector
      * @param num_duplications  The number of times to duplicate
-     * @param input  the vector to be duplicated.
+     * @param input  the matrix to be duplicated.
      */
     BatchMultiVector(std::shared_ptr<const Executor> exec,
                      size_type num_duplications,
@@ -476,10 +515,10 @@ class BatchMultiVector
     }
 
     /**
-     * Creates a BatchMultiVector matrix with the same configuration as the
-     * callers matrix.
+     * Creates a BatchMultiVector with the same configuration as the
+     * callers object.
      *
-     * @returns a BatchMultiVector matrix with the same configuration as the
+     * @returns a BatchMultiVector with the same configuration as the
      * caller.
      */
     std::unique_ptr<BatchMultiVector> create_with_same_config() const;
@@ -504,13 +543,14 @@ class BatchMultiVector
 
 
 /**
- * Creates and initializes a batch of column-vectors.
+ * Creates and initializes a batch of single column-vectors.
  *
- * This function first creates a temporary Dense matrix, fills it with passed in
- * values, and then converts the vector to the requested type.
+ * This function first creates a temporary BatchMultiVector, fills it with
+ * passed in values, and then converts the vector to the requested type.
  *
  * @tparam Matrix  matrix type to initialize
- *                 (Dense has to implement the ConvertibleTo<Matrix> interface)
+ *                 (BatchMultiVector has to implement the ConvertibleTo<Matrix>
+ *                 interface)
  * @tparam TArgs  argument types for Matrix::create method
  *                (not including the implied Executor as the first argument)
  *
@@ -556,10 +596,10 @@ std::unique_ptr<Matrix> batch_initialize(
 
 
 /**
- * Creates and initializes a batch of matrices.
+ * Creates and initializes a batch of multi-vectors.
  *
- * This function first creates a temporary Dense matrix, fills it with passed in
- * values, and then converts the vector to the requested type.
+ * This function first creates a temporary BatchMultiVector, fills it with
+ * passed in values, and then converts the vector to the requested type.
  *
  * @tparam Matrix  matrix type to initialize
  *                 (Dense has to implement the ConvertibleTo<Matrix> interface)
@@ -586,8 +626,9 @@ std::unique_ptr<Matrix> batch_initialize(
     size_type num_batch_entries = vals.size();
     GKO_ASSERT(num_batch_entries > 0);
     auto vals_begin = begin(vals);
-    size_type common_num_rows = vals_begin->size();
-    size_type common_num_cols = vals_begin->begin()->size();
+    size_type common_num_rows = vals_begin ? vals_begin->size() : 0;
+    size_type common_num_cols =
+        vals_begin->begin() ? vals_begin->begin()->size() : 0;
     auto common_size = dim<2>(common_num_rows, common_num_cols);
     for (const auto& b : vals) {
         auto num_rows = b.size();
@@ -618,20 +659,19 @@ std::unique_ptr<Matrix> batch_initialize(
 
 
 /**
- * Creates and initializes a batch column-vector by making copies of the single
- * input column vector.
+ * Creates and initializes a batch single column-vector by making copies of the
+ * single input column vector.
  *
- * This function first creates a temporary batch dense matrix, fills it with
+ * This function first creates a temporary batch multi-vector, fills it with
  * passed in values, and then converts the vector to the requested type.
  *
  * @tparam Matrix  matrix type to initialize
- *                 (Dense has to implement the ConvertibleTo<Matrix>
+ *                 (BatchMultiVector has to implement the ConvertibleTo<Matrix>
  *                  interface)
  * @tparam TArgs  argument types for Matrix::create method
  *                (not including the implied Executor as the first argument)
  *
- * @param num_vectors  The number of times the input vector is copied into
- *                     the final output
+ * @param num_vectors  The number of times the input vector is to be duplicated
  * @param vals  values used to initialize each vector in the temp. batch
  * @param exec  Executor associated to the vector
  * @param create_args  additional arguments passed to Matrix::create, not
@@ -650,7 +690,8 @@ std::unique_ptr<Matrix> batch_initialize(
     using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
     size_type num_batch_entries = num_vectors;
     GKO_ASSERT(num_batch_entries > 0);
-    auto b_size = batch_dim<2>(num_batch_entries, dim<2>(vals.size(), 1));
+    auto b_size =
+        batch_dim<2>(num_batch_entries, dim<2>(vals ? vals.size() : 0, 1));
     auto tmp = batch_multi_vector::create(exec->get_master(), b_size);
     for (size_type batch = 0; batch < num_vectors; batch++) {
         size_type idx = 0;
@@ -668,17 +709,16 @@ std::unique_ptr<Matrix> batch_initialize(
 /**
  * Creates and initializes a matrix from copies of a given matrix.
  *
- * This function first creates a temporary batch dense matrix, fills it with
+ * This function first creates a temporary batch multi-vector, fills it with
  * passed in values, and then converts the vector to the requested type.
  *
  * @tparam Matrix  matrix type to initialize
- *                 (Dense has to implement the ConvertibleTo<Matrix> interface)
+ *                 (BatchMultiVector has to implement the ConvertibleTo<Matrix>
+ *                  interface)
  * @tparam TArgs  argument types for Matrix::create method
  *                (not including the implied Executor as the first argument)
  *
- * @param stride  row strides for the temporary batch dense matrix
- * @param num_matrices  The number of times the input matrix is copied into
- *                     the final output
+ * @param num_batch_entries The number of times the input matrix is duplicated
  * @param vals  values used to initialize each vector in the temp. batch
  * @param exec  Executor associated to the vector
  * @param create_args  additional arguments passed to Matrix::create, not
@@ -697,7 +737,8 @@ std::unique_ptr<Matrix> batch_initialize(
 {
     using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
     GKO_ASSERT(num_batch_entries > 0);
-    auto common_size = dim<2>(vals.size(), begin(vals)->size());
+    auto common_size =
+        dim<2>(vals ? vals.size() : 0, vals ? begin(vals)->size() : 0);
     batch_dim<2> b_size(num_batch_entries, common_size);
     auto tmp = batch_multi_vector::create(exec->get_master(), b_size);
     for (size_type batch = 0; batch < num_batch_entries; batch++) {

From 5f55ccdf71bd900c40f797f449701d1dea4bccf6 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Wed, 26 Jul 2023 14:50:12 +0200
Subject: [PATCH 137/583] Review and doc updates

---
 core/base/batch_multi_vector.cpp              |  3 ++-
 cuda/base/batch_struct.hpp                    | 23 ++--------------
 dpcpp/base/batch_struct.hpp                   | 23 ++--------------
 hip/base/batch_struct.hip.hpp                 | 23 ++--------------
 .../ginkgo/core/base/batch_multi_vector.hpp   | 20 +++++++-------
 .../ginkgo/core/base/exception_helpers.hpp    | 27 +++++++++++++++++++
 reference/base/batch_struct.hpp               | 23 ++--------------
 7 files changed, 48 insertions(+), 94 deletions(-)

diff --git a/core/base/batch_multi_vector.cpp b/core/base/batch_multi_vector.cpp
index b73a92467f6..3784c6645d7 100644
--- a/core/base/batch_multi_vector.cpp
+++ b/core/base/batch_multi_vector.cpp
@@ -257,7 +257,8 @@ void BatchMultiVector<ValueType>::move_to(
 template <typename MatrixType, typename MatrixData>
 void read_impl(MatrixType* mtx, const std::vector<MatrixData>& data)
 {
-    GKO_ASSERT(data.size() > 0);
+    GKO_THROW_IF_INVALID(data.size() > 0, "Input data is empty");
+
     auto common_size = data[0].size;
     auto batch_size = batch_dim<2>(data.size(), common_size);
     for (const auto& b : data) {
diff --git a/cuda/base/batch_struct.hpp b/cuda/base/batch_struct.hpp
index 4358d688f07..f9a50376362 100644
--- a/cuda/base/batch_struct.hpp
+++ b/cuda/base/batch_struct.hpp
@@ -59,7 +59,7 @@ namespace cuda {
 
 
 /**
- * Generates an immutable uniform batch struct from a batch of dense matrices.
+ * Generates an immutable uniform batch struct from a batch of multi-vectors.
  */
 template <typename ValueType>
 inline gko::batch_multi_vector::uniform_batch<const cuda_type<ValueType>>
@@ -72,7 +72,7 @@ get_batch_struct(const BatchMultiVector<ValueType>* const op)
 }
 
 /**
- * Generates a uniform batch struct from a batch of dense matrices.
+ * Generates a uniform batch struct from a batch of multi-vectors.
  */
 template <typename ValueType>
 inline gko::batch_multi_vector::uniform_batch<cuda_type<ValueType>>
@@ -85,25 +85,6 @@ get_batch_struct(BatchMultiVector<ValueType>* const op)
 }
 
 
-/**
- * Generates an immutable uniform batch struct from a batch of dense matrices
- * that may be null.
- */
-template <typename ValueType>
-inline gko::batch_multi_vector::uniform_batch<const cuda_type<ValueType>>
-maybe_null_batch_struct(const BatchMultiVector<ValueType>* const op)
-{
-    if (op) {
-        return {as_cuda_type(op->get_const_values()),
-                op->get_num_batch_entries(), op->get_common_size()[1],
-                static_cast<int>(op->get_common_size()[0]),
-                static_cast<int>(op->get_common_size()[1])};
-    } else {
-        return {nullptr, 0, 0, 0, 0};
-    }
-}
-
-
 }  // namespace cuda
 }  // namespace kernels
 }  // namespace gko
diff --git a/dpcpp/base/batch_struct.hpp b/dpcpp/base/batch_struct.hpp
index 5b88e992665..1a83fad020c 100644
--- a/dpcpp/base/batch_struct.hpp
+++ b/dpcpp/base/batch_struct.hpp
@@ -58,7 +58,7 @@ namespace dpcpp {
 
 
 /**
- * Generates an immutable uniform batch struct from a batch of dense matrices.
+ * Generates an immutable uniform batch struct from a batch of multi-vectors.
  */
 template <typename ValueType>
 inline gko::batch_multi_vector::uniform_batch<const ValueType> get_batch_struct(
@@ -72,7 +72,7 @@ inline gko::batch_multi_vector::uniform_batch<const ValueType> get_batch_struct(
 
 
 /**
- * Generates a uniform batch struct from a batch of dense matrices.
+ * Generates a uniform batch struct from a batch of multi-vectors.
  */
 template <typename ValueType>
 inline gko::batch_multi_vector::uniform_batch<ValueType> get_batch_struct(
@@ -85,25 +85,6 @@ inline gko::batch_multi_vector::uniform_batch<ValueType> get_batch_struct(
 }
 
 
-/**
- * Generates an immutable uniform batch struct from a batch of dense matrices
- * that may be null.
- */
-template <typename ValueType>
-inline gko::batch_multi_vector::uniform_batch<const ValueType>
-maybe_null_batch_struct(const BatchMultiVector<ValueType>* const op)
-{
-    if (op) {
-        return {op->get_const_values(), op->get_num_batch_entries(),
-                op->get_common_size()[1],
-                static_cast<int>(op->get_common_size()[0]),
-                static_cast<int>(op->get_common_size()[1])};
-    } else {
-        return {nullptr, 0, 0, 0, 0};
-    }
-}
-
-
 }  // namespace dpcpp
 }  // namespace kernels
 }  // namespace gko
diff --git a/hip/base/batch_struct.hip.hpp b/hip/base/batch_struct.hip.hpp
index f8788b9e6a8..bff659838bd 100644
--- a/hip/base/batch_struct.hip.hpp
+++ b/hip/base/batch_struct.hip.hpp
@@ -59,7 +59,7 @@ namespace hip {
 
 
 /**
- * Generates an immutable uniform batch struct from a batch of dense matrices.
+ * Generates an immutable uniform batch struct from a batch of multi-vectors.
  */
 template <typename ValueType>
 inline gko::batch_multi_vector::uniform_batch<const hip_type<ValueType>>
@@ -72,7 +72,7 @@ get_batch_struct(const BatchMultiVector<ValueType>* const op)
 }
 
 /**
- * Generates a uniform batch struct from a batch of dense matrices.
+ * Generates a uniform batch struct from a batch of multi-vectors.
  */
 template <typename ValueType>
 inline gko::batch_multi_vector::uniform_batch<hip_type<ValueType>>
@@ -85,25 +85,6 @@ get_batch_struct(BatchMultiVector<ValueType>* const op)
 }
 
 
-/**
- * Generates an immutable uniform batch struct from a batch of dense matrices
- * that may be null.
- */
-template <typename ValueType>
-inline gko::batch_multi_vector::uniform_batch<const hip_type<ValueType>>
-maybe_null_batch_struct(const BatchMultiVector<ValueType>* const op)
-{
-    if (op) {
-        return {as_hip_type(op->get_const_values()),
-                op->get_num_batch_entries(), op->get_common_size()[1],
-                static_cast<int>(op->get_common_size()[0]),
-                static_cast<int>(op->get_common_size()[1])};
-    } else {
-        return {nullptr, 0, 0, 0, 0};
-    }
-}
-
-
 }  // namespace hip
 }  // namespace kernels
 }  // namespace gko
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index 2096f30b85b..ac4a2feb419 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -571,9 +571,9 @@ std::unique_ptr<Matrix> batch_initialize(
 {
     using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
     size_type num_batch_entries = vals.size();
-    GKO_ASSERT(num_batch_entries > 0);
+    GKO_THROW_IF_INVALID(num_batch_entries > 0, "Input data is empty");
     auto vals_begin = begin(vals);
-    size_type common_num_rows = vals_begin->size();
+    size_type common_num_rows = vals_begin ? vals_begin->size() : 0;
     auto common_size = dim<2>(common_num_rows, 1);
     for (auto& val : vals) {
         GKO_ASSERT_EQ(common_num_rows, val.size());
@@ -624,7 +624,7 @@ std::unique_ptr<Matrix> batch_initialize(
 {
     using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
     size_type num_batch_entries = vals.size();
-    GKO_ASSERT(num_batch_entries > 0);
+    GKO_THROW_IF_INVALID(num_batch_entries > 0, "Input data is empty");
     auto vals_begin = begin(vals);
     size_type common_num_rows = vals_begin ? vals_begin->size() : 0;
     size_type common_num_cols =
@@ -689,9 +689,10 @@ std::unique_ptr<Matrix> batch_initialize(
 {
     using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
     size_type num_batch_entries = num_vectors;
-    GKO_ASSERT(num_batch_entries > 0);
-    auto b_size =
-        batch_dim<2>(num_batch_entries, dim<2>(vals ? vals.size() : 0, 1));
+    GKO_THROW_IF_INVALID(num_batch_entries > 0 && vals.size() > 0,
+                         "Input data is empty");
+    auto b_size = batch_dim<2>(num_batch_entries,
+                               dim<2>(begin(vals) ? vals.size() : 0, 1));
     auto tmp = batch_multi_vector::create(exec->get_master(), b_size);
     for (size_type batch = 0; batch < num_vectors; batch++) {
         size_type idx = 0;
@@ -736,9 +737,10 @@ std::unique_ptr<Matrix> batch_initialize(
     std::shared_ptr<const Executor> exec, TArgs&&... create_args)
 {
     using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
-    GKO_ASSERT(num_batch_entries > 0);
-    auto common_size =
-        dim<2>(vals ? vals.size() : 0, vals ? begin(vals)->size() : 0);
+    GKO_THROW_IF_INVALID(num_batch_entries > 0 && vals.size() > 0,
+                         "Input data is empty");
+    auto common_size = dim<2>(begin(vals) ? vals.size() : 0,
+                              begin(vals) ? begin(vals)->size() : 0);
     batch_dim<2> b_size(num_batch_entries, common_size);
     auto tmp = batch_multi_vector::create(exec->get_master(), b_size);
     for (size_type batch = 0; batch < num_batch_entries; batch++) {
diff --git a/include/ginkgo/core/base/exception_helpers.hpp b/include/ginkgo/core/base/exception_helpers.hpp
index 50ff0354105..a9a93f15fe8 100644
--- a/include/ginkgo/core/base/exception_helpers.hpp
+++ b/include/ginkgo/core/base/exception_helpers.hpp
@@ -706,6 +706,13 @@ inline T ensure_allocated_impl(T ptr, const std::string& file, int line,
                   "semi-colon warnings")
 
 
+/**
+ * Throws an InvalidStateError with a user-specified message
+ *
+ * @param _message  message to be displayed.
+ *
+ * @throw  InvalidStateError.
+ */
 #define GKO_INVALID_STATE(_message)                                          \
     {                                                                        \
         throw ::gko::InvalidStateError(__FILE__, __LINE__, __func__,         \
@@ -716,6 +723,26 @@ inline T ensure_allocated_impl(T ptr, const std::string& file, int line,
                   "semi-colon warnings")
 
 
+/**
+ * Throws an InvalidStateError if condition is not satisfied
+ *
+ * @param _condition  the condition to check.
+ * @param _message  message to be displayed.
+ *
+ * @throw  InvalidStateError.
+ */
+#define GKO_THROW_IF_INVALID(_condition, _message)                           \
+    {                                                                        \
+        if (!_condition) {                                                   \
+            throw ::gko::InvalidStateError(__FILE__, __LINE__, __func__,     \
+                                           _message);                        \
+        }                                                                    \
+    }                                                                        \
+    static_assert(true,                                                      \
+                  "This assert is used to counter the false positive extra " \
+                  "semi-colon warnings")
+
+
 }  // namespace gko
 
 
diff --git a/reference/base/batch_struct.hpp b/reference/base/batch_struct.hpp
index f3512968d9e..ed1350dc366 100644
--- a/reference/base/batch_struct.hpp
+++ b/reference/base/batch_struct.hpp
@@ -60,7 +60,7 @@ namespace host {
 
 
 /**
- * Generates an immutable uniform batch struct from a batch of dense matrices.
+ * Generates an immutable uniform batch struct from a batch of multi-vectors.
  */
 template <typename ValueType>
 inline gko::batch_multi_vector::uniform_batch<const ValueType> get_batch_struct(
@@ -74,7 +74,7 @@ inline gko::batch_multi_vector::uniform_batch<const ValueType> get_batch_struct(
 
 
 /**
- * Generates a uniform batch struct from a batch of dense matrices.
+ * Generates a uniform batch struct from a batch of multi-vectors.
  */
 template <typename ValueType>
 inline gko::batch_multi_vector::uniform_batch<ValueType> get_batch_struct(
@@ -87,25 +87,6 @@ inline gko::batch_multi_vector::uniform_batch<ValueType> get_batch_struct(
 }
 
 
-/**
- * Generates an immutable uniform batch struct from a batch of dense matrices
- * that may be null.
- */
-template <typename ValueType>
-inline gko::batch_multi_vector::uniform_batch<const ValueType>
-maybe_null_batch_struct(const BatchMultiVector<ValueType>* const op)
-{
-    if (op) {
-        return {op->get_const_values(), op->get_num_batch_entries(),
-                op->get_common_size()[1],
-                static_cast<int>(op->get_common_size()[0]),
-                static_cast<int>(op->get_common_size()[1])};
-    } else {
-        return {nullptr, 0, 0, 0, 0};
-    }
-}
-
-
 }  // namespace host
 }  // namespace kernels
 }  // namespace gko

From 903ff5c9d62c29fe072b6bbe97a2b82b6e6e82c9 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Wed, 26 Jul 2023 17:16:07 +0200
Subject: [PATCH 138/583] Review updates

Co-authored-by: Yu-Hsiang Tsai<yhmtsai@gmail.com>
---
 .../base/batch_multi_vector_kernels.hpp.inc   | 38 +++++++-------
 core/base/batch_struct.hpp                    | 15 +++---
 core/test/base/batch_multi_vector.cpp         | 51 +++++++++----------
 core/test/utils/assertions.hpp                | 13 ++---
 cuda/base/batch_multi_vector_kernels.cu       |  6 ++-
 cuda/base/batch_struct.hpp                    |  4 +-
 dpcpp/base/batch_multi_vector_kernels.dp.cpp  | 16 +++---
 dpcpp/base/batch_struct.hpp                   |  4 +-
 hip/base/batch_multi_vector_kernels.hip.cpp   |  7 ++-
 hip/base/batch_struct.hip.hpp                 |  4 +-
 include/ginkgo/core/base/batch_dim.hpp        |  4 +-
 .../ginkgo/core/base/batch_multi_vector.hpp   | 24 +--------
 reference/base/batch_struct.hpp               |  4 +-
 .../test/base/batch_multi_vector_kernels.cpp  |  8 ---
 test/base/batch_multi_vector_kernels.cpp      | 38 ++------------
 test/test_install/test_install.cpp            | 14 +++++
 16 files changed, 103 insertions(+), 147 deletions(-)

diff --git a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
index efbbd323ef6..17a7e125332 100644
--- a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
+++ b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
@@ -31,10 +31,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
 
-/**
- * Scales the vectors in global or shared memory with a factor of alpha (alpha
- * is in global memory or shared memory)
- */
 template <typename ValueType, typename Mapping>
 __device__ __forceinline__ void scale(
     const gko::batch_multi_vector::batch_entry<const ValueType>& alpha,
@@ -52,9 +48,9 @@ __device__ __forceinline__ void scale(
 
 template <typename ValueType, typename Mapping>
 __global__
-__launch_bounds__(default_block_size, sm_multiplier) void scale_kernel(
-    const gko::batch_multi_vector::uniform_batch<const ValueType> alpha,
-    const gko::batch_multi_vector::uniform_batch<ValueType> x, Mapping map)
+    __launch_bounds__(default_block_size, sm_multiplier) void scale_kernel(
+        const gko::batch_multi_vector::uniform_batch<const ValueType> alpha,
+        const gko::batch_multi_vector::uniform_batch<ValueType> x, Mapping map)
 {
     for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_entries;
          batch_id += gridDim.x) {
@@ -83,10 +79,10 @@ __device__ __forceinline__ void add_scaled(
 
 template <typename ValueType, typename Mapping>
 __global__
-__launch_bounds__(default_block_size, sm_multiplier) void add_scaled_kernel(
-    const gko::batch_multi_vector::uniform_batch<const ValueType> alpha,
-    const gko::batch_multi_vector::uniform_batch<const ValueType> x,
-    const gko::batch_multi_vector::uniform_batch<ValueType> y, Mapping map)
+    __launch_bounds__(default_block_size, sm_multiplier) void add_scaled_kernel(
+        const gko::batch_multi_vector::uniform_batch<const ValueType> alpha,
+        const gko::batch_multi_vector::uniform_batch<const ValueType> x,
+        const gko::batch_multi_vector::uniform_batch<ValueType> y, Mapping map)
 {
     for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_entries;
          batch_id += gridDim.x) {
@@ -222,11 +218,15 @@ __device__ __forceinline__ void compute_norm2(
 
 
 template <typename ValueType>
-__global__
-__launch_bounds__(default_block_size, sm_multiplier) void compute_norm2_kernel(
-    const gko::batch_multi_vector::uniform_batch<const ValueType> x,
-    const gko::batch_multi_vector::uniform_batch<remove_complex<ValueType>>
-        result)
+__global__ __launch_bounds__(
+    default_block_size,
+    sm_multiplier) void compute_norm2_kernel(const gko::batch_multi_vector::
+                                                 uniform_batch<const ValueType>
+                                                     x,
+                                             const gko::batch_multi_vector::
+                                                 uniform_batch<
+                                                     remove_complex<ValueType>>
+                                                     result)
 {
     for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_entries;
          batch_id += gridDim.x) {
@@ -259,9 +259,9 @@ __device__ __forceinline__ void copy(
 
 template <typename ValueType>
 __global__
-__launch_bounds__(default_block_size, sm_multiplier) void copy_kernel(
-    const gko::batch_multi_vector::uniform_batch<const ValueType> src,
-    const gko::batch_multi_vector::uniform_batch<ValueType> dst)
+    __launch_bounds__(default_block_size, sm_multiplier) void copy_kernel(
+        const gko::batch_multi_vector::uniform_batch<const ValueType> src,
+        const gko::batch_multi_vector::uniform_batch<ValueType> dst)
 {
     for (size_type batch_id = blockIdx.x; batch_id < src.num_batch_entries;
          batch_id += gridDim.x) {
diff --git a/core/base/batch_struct.hpp b/core/base/batch_struct.hpp
index d85c413e691..ea1b3ef3f3f 100644
--- a/core/base/batch_struct.hpp
+++ b/core/base/batch_struct.hpp
@@ -44,21 +44,20 @@ namespace batch_multi_vector {
 
 
 /**
- * Encapsulates one matrix from a batch of dense matrices (vectors).
+ * Encapsulates one matrix from a batch of multi-vectors.
  */
 template <typename ValueType>
 struct batch_entry {
     using value_type = ValueType;
     ValueType* values;
-    size_type stride;
+    int stride;
     int num_rows;
     int num_rhs;
 };
 
+
 /**
- * A 'simple' structure to store a global uniform batch of dense matrices.
- *
- * It is uniform in the sense that all matrices in the batch have common sizes.
+ * A 'simple' structure to store a global uniform batch of multi-vectors.
  */
 template <typename ValueType>
 struct uniform_batch {
@@ -67,7 +66,7 @@ struct uniform_batch {
 
     ValueType* values;
     size_type num_batch_entries;
-    size_type stride;
+    int stride;
     int num_rows;
     int num_rhs;
 
@@ -122,8 +121,8 @@ batch_entry(const batch_multi_vector::uniform_batch<ValueType>& batch,
 
 template <typename ValueType>
 GKO_ATTRIBUTES GKO_INLINE batch_multi_vector::batch_entry<ValueType>
-batch_entry(ValueType* const batch_values, const size_type stride,
-            const int num_rows, const int num_rhs, const size_type batch_idx)
+batch_entry(ValueType* const batch_values, const int stride, const int num_rows,
+            const int num_rhs, const size_type batch_idx)
 {
     return {batch_values + batch_idx * stride * num_rows, stride, num_rows,
             num_rhs};
diff --git a/core/test/base/batch_multi_vector.cpp b/core/test/base/batch_multi_vector.cpp
index 5fbc4d5aa32..e63ed883517 100644
--- a/core/test/base/batch_multi_vector.cpp
+++ b/core/test/base/batch_multi_vector.cpp
@@ -84,6 +84,7 @@ class BatchMultiVector : public ::testing::Test {
     {
         ASSERT_EQ(m->get_num_batch_entries(), 0);
         ASSERT_EQ(m->get_common_size(), gko::dim<2>{});
+        ASSERT_EQ(m->get_const_values(), nullptr);
     }
 
     std::shared_ptr<const gko::Executor> exec;
@@ -100,13 +101,6 @@ TYPED_TEST(BatchMultiVector, CanBeEmpty)
 }
 
 
-TYPED_TEST(BatchMultiVector, ReturnsNullValuesArrayWhenEmpty)
-{
-    auto empty = gko::BatchMultiVector<TypeParam>::create(this->exec);
-    ASSERT_EQ(empty->get_const_values(), nullptr);
-}
-
-
 TYPED_TEST(BatchMultiVector, KnowsItsSizeAndValues)
 {
     ASSERT_NE(this->mtx->get_const_values(), nullptr);
@@ -165,10 +159,12 @@ TYPED_TEST(BatchMultiVector, CanBeConstructedFromExistingData)
     using size_type = gko::size_type;
     // clang-format off
     value_type data[] = {
-       1.0, 2.0, -1.0,
-       3.0, 4.0, -1.0,
-       3.0, 5.0, 1.0,
-       5.0, 6.0, -3.0};
+       1.0, 2.0,
+       -1.0,3.0,
+       4.0, -1.0,
+       3.0, 5.0,
+       1.0, 5.0,
+       6.0, -3.0};
     // clang-format on
 
     auto m = gko::BatchMultiVector<TypeParam>::create(
@@ -192,11 +188,13 @@ TYPED_TEST(BatchMultiVector, CanBeConstructedFromExistingConstData)
     using value_type = typename TestFixture::value_type;
     using size_type = gko::size_type;
     // clang-format off
-    const value_type data[] = {
-       1.0, 2.0, -1.0,
-       3.0, 4.0, -1.0,
-       3.0, 5.0, 1.0,
-       5.0, 6.0, -3.0};
+    value_type data[] = {
+       1.0, 2.0,
+       -1.0,3.0,
+       4.0, -1.0,
+       3.0, 5.0,
+       1.0, 5.0,
+       6.0, -3.0};
     // clang-format on
 
     auto m = gko::BatchMultiVector<TypeParam>::create_const(
@@ -215,7 +213,7 @@ TYPED_TEST(BatchMultiVector, CanBeConstructedFromExistingConstData)
 }
 
 
-TYPED_TEST(BatchMultiVector, CanBeConstructedFromBatchMultiVectorMatrices)
+TYPED_TEST(BatchMultiVector, CanBeConstructedFromDenseMatrices)
 {
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
@@ -227,12 +225,8 @@ TYPED_TEST(BatchMultiVector, CanBeConstructedFromBatchMultiVectorMatrices)
 
     auto m = gko::BatchMultiVector<TypeParam>::create(
         this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get()});
-    auto m_ref = gko::BatchMultiVector<TypeParam>::create(
-        this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get(), mat1.get(),
-                                           mat2.get(), mat1.get(), mat2.get()});
-    auto m2 = gko::BatchMultiVector<TypeParam>::create(this->exec, 3, m.get());
 
-    GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 1e-14);
+    this->assert_equal_to_original_mtx(m.get());
 }
 
 
@@ -255,7 +249,7 @@ TYPED_TEST(BatchMultiVector, CanBeConstructedFromDenseMatricesByDuplication)
 }
 
 
-TYPED_TEST(BatchMultiVector, CanBeConstructedFromDenseMatrices)
+TYPED_TEST(BatchMultiVector, CanBeConstructedFromBatchMultiVectorMatrices)
 {
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
@@ -264,11 +258,15 @@ TYPED_TEST(BatchMultiVector, CanBeConstructedFromDenseMatrices)
                                           this->exec);
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
-
     auto m = gko::BatchMultiVector<TypeParam>::create(
         this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get()});
+    auto m_ref = gko::BatchMultiVector<TypeParam>::create(
+        this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get(), mat1.get(),
+                                           mat2.get(), mat1.get(), mat2.get()});
 
-    this->assert_equal_to_original_mtx(m.get());
+    auto m2 = gko::BatchMultiVector<TypeParam>::create(this->exec, 3, m.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 1e-14);
 }
 
 
@@ -356,6 +354,7 @@ TYPED_TEST(BatchMultiVector, CanBeUnbatchedIntoDenseMatrices)
 
     auto dense_mats = this->mtx->unbatch();
 
+    ASSERT_EQ(dense_mats.size(), 2);
     GKO_ASSERT_MTX_NEAR(dense_mats[0].get(), mat1.get(), 0.);
     GKO_ASSERT_MTX_NEAR(dense_mats[1].get(), mat2.get(), 0.);
 }
@@ -380,8 +379,8 @@ TYPED_TEST(BatchMultiVector, CanBeReadFromMatrixData)
 
     ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2));
     EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
-    EXPECT_EQ(m->at(0, 1, 0), value_type{0.0});
     EXPECT_EQ(m->at(0, 0, 1), value_type{3.0});
+    EXPECT_EQ(m->at(0, 1, 0), value_type{0.0});
     EXPECT_EQ(m->at(0, 1, 1), value_type{5.0});
     EXPECT_EQ(m->at(1, 0, 0), value_type{-1.0});
     EXPECT_EQ(m->at(1, 0, 1), value_type{0.5});
diff --git a/core/test/utils/assertions.hpp b/core/test/utils/assertions.hpp
index 8e825a32d4f..44da77244f7 100644
--- a/core/test/utils/assertions.hpp
+++ b/core/test/utils/assertions.hpp
@@ -323,21 +323,18 @@ ::testing::AssertionResult batch_matrices_near_impl(
     const MatrixData2& second, double tolerance)
 {
     std::vector<double> err;
-    std::vector<bool> err_flag;
     for (size_type b = 0; b < first.size(); ++b) {
-        auto num_rows = first[b].size[0];
-        auto num_cols = first[b].size[1];
-        if (num_rows != second[b].size[0] || num_cols != second[b].size[1]) {
+        if (first.size() != second.size()) {
             return ::testing::AssertionFailure()
                    << "Expected matrices of equal size\n\t" << first_expression
-                   << " is of size [" << num_rows << " x " << num_cols
-                   << "]\n\t" << second_expression << " is of size ["
-                   << second[b].size[0] << " x " << second[b].size[1] << "]"
+                   << " is of size [" << first[b].size[0] << " x "
+                   << first[b].size[1] << "]\n\t" << second_expression
+                   << " is of size [" << second[b].size[0] << " x "
+                   << second[b].size[1] << "]"
                    << " for batch " << b;
         }
 
         err.push_back(detail::get_relative_error(first[b], second[b]));
-        err_flag.push_back(err.back() <= tolerance);
     }
 
     auto bat = std::find_if(err.begin(), err.end(),
diff --git a/cuda/base/batch_multi_vector_kernels.cu b/cuda/base/batch_multi_vector_kernels.cu
index 05e08be0adb..3fd80a2aa41 100644
--- a/cuda/base/batch_multi_vector_kernels.cu
+++ b/cuda/base/batch_multi_vector_kernels.cu
@@ -67,14 +67,16 @@ namespace batch_multi_vector {
 constexpr auto default_block_size = 256;
 constexpr int sm_multiplier = 4;
 
+// clang-format off
+
 // NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES
-// force-top: on
+
 #include "common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc"
-// force-top: off
 
 
 #include "common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc"
 
+// clang-format on
 
 }  // namespace batch_multi_vector
 }  // namespace cuda
diff --git a/cuda/base/batch_struct.hpp b/cuda/base/batch_struct.hpp
index f9a50376362..d9907b41531 100644
--- a/cuda/base/batch_struct.hpp
+++ b/cuda/base/batch_struct.hpp
@@ -66,7 +66,7 @@ inline gko::batch_multi_vector::uniform_batch<const cuda_type<ValueType>>
 get_batch_struct(const BatchMultiVector<ValueType>* const op)
 {
     return {as_cuda_type(op->get_const_values()), op->get_num_batch_entries(),
-            op->get_common_size()[1],
+            static_cast<int>(op->get_common_size()[1]),
             static_cast<int>(op->get_common_size()[0]),
             static_cast<int>(op->get_common_size()[1])};
 }
@@ -79,7 +79,7 @@ inline gko::batch_multi_vector::uniform_batch<cuda_type<ValueType>>
 get_batch_struct(BatchMultiVector<ValueType>* const op)
 {
     return {as_cuda_type(op->get_values()), op->get_num_batch_entries(),
-            op->get_common_size()[1],
+            static_cast<int>(op->get_common_size()[1]),
             static_cast<int>(op->get_common_size()[0]),
             static_cast<int>(op->get_common_size()[1])};
 }
diff --git a/dpcpp/base/batch_multi_vector_kernels.dp.cpp b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
index 2c48970d13d..e27b3fc810f 100644
--- a/dpcpp/base/batch_multi_vector_kernels.dp.cpp
+++ b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
@@ -86,7 +86,7 @@ void scale(std::shared_ptr<const DefaultExecutor> exec,
 
     // Launch a kernel that has nbatches blocks, each block has max group size
     if (alpha->get_common_size()[1] == 1) {
-        (exec->get_queue())->submit([&](sycl::handler& cgh) {
+        exec->get_queue()->submit([&](sycl::handler& cgh) {
             cgh.parallel_for(
                 sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
                     auto group = item_ct1.get_group();
@@ -98,7 +98,7 @@ void scale(std::shared_ptr<const DefaultExecutor> exec,
                 });
         });
     } else {
-        (exec->get_queue())->submit([&](sycl::handler& cgh) {
+        exec->get_queue()->submit([&](sycl::handler& cgh) {
             cgh.parallel_for(
                 sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
                     auto group = item_ct1.get_group();
@@ -136,7 +136,7 @@ void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
     const auto x_ub = get_batch_struct(x);
     const auto y_ub = get_batch_struct(y);
     if (alpha->get_common_size()[1] == 1) {
-        (exec->get_queue())->submit([&](sycl::handler& cgh) {
+        exec->get_queue()->submit([&](sycl::handler& cgh) {
             cgh.parallel_for(
                 sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
                     auto group = item_ct1.get_group();
@@ -149,7 +149,7 @@ void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
                 });
         });
     } else {
-        (exec->get_queue())->submit([&](sycl::handler& cgh) {
+        exec->get_queue()->submit([&](sycl::handler& cgh) {
             cgh.parallel_for(
                 sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
                     auto group = item_ct1.get_group();
@@ -187,7 +187,7 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
     const dim3 grid(num_batches);
 
     // TODO: Remove reqd_sub_group size and use sycl::reduce_over_group
-    (exec->get_queue())->submit([&](sycl::handler& cgh) {
+    exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
             sycl_nd_range(grid, block), [=
         ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
@@ -225,7 +225,7 @@ void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
     const dim3 block(group_size);
     const dim3 grid(num_batches);
 
-    (exec->get_queue())->submit([&](sycl::handler& cgh) {
+    exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
             sycl_nd_range(grid, block), [=
         ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
@@ -262,7 +262,7 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
     const dim3 block(group_size);
     const dim3 grid(num_batches);
 
-    (exec->get_queue())->submit([&](sycl::handler& cgh) {
+    exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
             sycl_nd_range(grid, block), [=
         ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
@@ -296,7 +296,7 @@ void copy(std::shared_ptr<const DefaultExecutor> exec,
     const dim3 block(group_size);
     const dim3 grid(num_batches);
 
-    (exec->get_queue())->submit([&](sycl::handler& cgh) {
+    exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
             sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
                 auto group = item_ct1.get_group();
diff --git a/dpcpp/base/batch_struct.hpp b/dpcpp/base/batch_struct.hpp
index 1a83fad020c..c9ee5800b3e 100644
--- a/dpcpp/base/batch_struct.hpp
+++ b/dpcpp/base/batch_struct.hpp
@@ -65,7 +65,7 @@ inline gko::batch_multi_vector::uniform_batch<const ValueType> get_batch_struct(
     const BatchMultiVector<ValueType>* const op)
 {
     return {op->get_const_values(), op->get_num_batch_entries(),
-            op->get_common_size()[1],
+            static_cast<int>(op->get_common_size()[1]),
             static_cast<int>(op->get_common_size()[0]),
             static_cast<int>(op->get_common_size()[1])};
 }
@@ -79,7 +79,7 @@ inline gko::batch_multi_vector::uniform_batch<ValueType> get_batch_struct(
     BatchMultiVector<ValueType>* const op)
 {
     return {op->get_values(), op->get_num_batch_entries(),
-            op->get_common_size()[1],
+            static_cast<int>(op->get_common_size()[1]),
             static_cast<int>(op->get_common_size()[0]),
             static_cast<int>(op->get_common_size()[1])};
 }
diff --git a/hip/base/batch_multi_vector_kernels.hip.cpp b/hip/base/batch_multi_vector_kernels.hip.cpp
index 096c5e8a5d3..40e828b5d45 100644
--- a/hip/base/batch_multi_vector_kernels.hip.cpp
+++ b/hip/base/batch_multi_vector_kernels.hip.cpp
@@ -69,14 +69,17 @@ constexpr auto default_block_size = 256;
 constexpr int sm_multiplier = 4;
 
 
+// clang-format off
+
 // NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES
-// force-top: on
+
 #include "common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc"
-// force-top: off
 
 
 #include "common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc"
 
+// clang-format on
+
 
 }  // namespace batch_multi_vector
 }  // namespace hip
diff --git a/hip/base/batch_struct.hip.hpp b/hip/base/batch_struct.hip.hpp
index bff659838bd..3171e7e1df8 100644
--- a/hip/base/batch_struct.hip.hpp
+++ b/hip/base/batch_struct.hip.hpp
@@ -66,7 +66,7 @@ inline gko::batch_multi_vector::uniform_batch<const hip_type<ValueType>>
 get_batch_struct(const BatchMultiVector<ValueType>* const op)
 {
     return {as_hip_type(op->get_const_values()), op->get_num_batch_entries(),
-            op->get_common_size()[1],
+            static_cast<int>(op->get_common_size()[1]),
             static_cast<int>(op->get_common_size()[0]),
             static_cast<int>(op->get_common_size()[1])};
 }
@@ -79,7 +79,7 @@ inline gko::batch_multi_vector::uniform_batch<hip_type<ValueType>>
 get_batch_struct(BatchMultiVector<ValueType>* const op)
 {
     return {as_hip_type(op->get_values()), op->get_num_batch_entries(),
-            op->get_common_size()[1],
+            static_cast<int>(op->get_common_size()[1]),
             static_cast<int>(op->get_common_size()[0]),
             static_cast<int>(op->get_common_size()[1])};
 }
diff --git a/include/ginkgo/core/base/batch_dim.hpp b/include/ginkgo/core/base/batch_dim.hpp
index 37ce5993220..c52b732f610 100644
--- a/include/ginkgo/core/base/batch_dim.hpp
+++ b/include/ginkgo/core/base/batch_dim.hpp
@@ -102,7 +102,7 @@ struct batch_dim {
 
 
     /**
-     * Checks if two batch dim objects are different.
+     * Checks if two batch_dim objects are different.
      *
      * @tparam Dimensionality  number of dimensions of the dim objects
      * @tparam DimensionType  datatype used to represent each dimension
@@ -123,7 +123,7 @@ struct batch_dim {
      * Creates a batch_dim object which stores a uniform size for all batch
      * entries.
      *
-     * @param num_batch_entries  number of batch entries to be stored
+     * @param num_batch_entries  the number of batch entries to be stored
      * @param common_size  the common size of all the batch entries stored
      *
      * @note  Use this constructor when uniform batches need to be stored.
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index ac4a2feb419..b91c50966a1 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -170,26 +170,6 @@ class BatchMultiVector
      */
     dim<2> get_common_size() const { return batch_size_.get_common_size(); }
 
-    /**
-     * Returns a pointer to the array of values of the beginning of the batched
-     * multi-vector.
-     *
-     * @return the pointer to the array of values
-     */
-    value_type* get_values() noexcept { return values_.get_data(); }
-
-    /**
-     * @copydoc get_values()
-     *
-     * @note This is the constant version of the function, which can be
-     *       significantly more memory efficient than the non-constant version,
-     *       so always prefer this version.
-     */
-    const value_type* get_const_values() const noexcept
-    {
-        return values_.get_const_data();
-    }
-
     /**
      * Returns a pointer to the array of values of the multi-vector for a
      * specific batch entry.
@@ -198,7 +178,7 @@ class BatchMultiVector
      *
      * @return the pointer to the array of values
      */
-    value_type* get_values(size_type batch_id) noexcept
+    value_type* get_values(size_type batch_id = 0) noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_entries());
         return values_.get_data() +
@@ -212,7 +192,7 @@ class BatchMultiVector
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const value_type* get_const_values(size_type batch_id) const noexcept
+    const value_type* get_const_values(size_type batch_id = 0) const noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_entries());
         return values_.get_const_data() +
diff --git a/reference/base/batch_struct.hpp b/reference/base/batch_struct.hpp
index ed1350dc366..41262be1d48 100644
--- a/reference/base/batch_struct.hpp
+++ b/reference/base/batch_struct.hpp
@@ -67,7 +67,7 @@ inline gko::batch_multi_vector::uniform_batch<const ValueType> get_batch_struct(
     const BatchMultiVector<ValueType>* const op)
 {
     return {op->get_const_values(), op->get_num_batch_entries(),
-            op->get_common_size()[1],
+            static_cast<int>(op->get_common_size()[1]),
             static_cast<int>(op->get_common_size()[0]),
             static_cast<int>(op->get_common_size()[1])};
 }
@@ -81,7 +81,7 @@ inline gko::batch_multi_vector::uniform_batch<ValueType> get_batch_struct(
     BatchMultiVector<ValueType>* const op)
 {
     return {op->get_values(), op->get_num_batch_entries(),
-            op->get_common_size()[1],
+            static_cast<int>(op->get_common_size()[1]),
             static_cast<int>(op->get_common_size()[0]),
             static_cast<int>(op->get_common_size()[1])};
 }
diff --git a/reference/test/base/batch_multi_vector_kernels.cpp b/reference/test/base/batch_multi_vector_kernels.cpp
index 445cdedb73f..f6ae66d8249 100644
--- a/reference/test/base/batch_multi_vector_kernels.cpp
+++ b/reference/test/base/batch_multi_vector_kernels.cpp
@@ -127,7 +127,6 @@ class BatchMultiVector : public ::testing::Test {
     std::ranlux48 rand_engine;
 };
 
-
 TYPED_TEST_SUITE(BatchMultiVector, gko::test::ValueTypes);
 
 
@@ -137,7 +136,6 @@ TYPED_TEST(BatchMultiVector, ScalesData)
     using T = typename TestFixture::value_type;
     auto alpha = gko::batch_initialize<Mtx>(
         {{{2.0, -2.0, 1.5}}, {{3.0, -1.0, 0.25}}}, this->exec);
-
     auto ualpha = alpha->unbatch();
 
     this->mtx_0->scale(alpha.get());
@@ -155,7 +153,6 @@ TYPED_TEST(BatchMultiVector, ScalesDataWithScalar)
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     auto alpha = gko::batch_initialize<Mtx>({{2.0}, {-2.0}}, this->exec);
-
     auto ualpha = alpha->unbatch();
 
     this->mtx_1->scale(alpha.get());
@@ -174,7 +171,6 @@ TYPED_TEST(BatchMultiVector, ScalesDataWithStride)
     using T = typename TestFixture::value_type;
     auto alpha = gko::batch_initialize<Mtx>(
         {{{2.0, -2.0, -1.5}}, {{2.0, -2.0, 3.0}}}, this->exec);
-
     auto ualpha = alpha->unbatch();
 
     this->mtx_1->scale(alpha.get());
@@ -193,7 +189,6 @@ TYPED_TEST(BatchMultiVector, AddsScaled)
     using T = typename TestFixture::value_type;
     auto alpha = gko::batch_initialize<Mtx>(
         {{{2.0, -2.0, 1.5}}, {{2.0, -2.0, 3.0}}}, this->exec);
-
     auto ualpha = alpha->unbatch();
 
     this->mtx_1->add_scaled(alpha.get(), this->mtx_0.get());
@@ -211,7 +206,6 @@ TYPED_TEST(BatchMultiVector, AddsScaledWithScalar)
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     auto alpha = gko::batch_initialize<Mtx>({{2.0}, {-2.0}}, this->exec);
-
     auto ualpha = alpha->unbatch();
 
     this->mtx_1->add_scaled(alpha.get(), this->mtx_0.get());
@@ -241,7 +235,6 @@ TYPED_TEST(BatchMultiVector, ComputesDot)
     using T = typename TestFixture::value_type;
     auto result =
         Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 3}));
-
     auto ures = result->unbatch();
 
     this->mtx_0->compute_dot(this->mtx_1.get(), result.get());
@@ -286,7 +279,6 @@ TYPED_TEST(BatchMultiVector, ComputesConjDot)
     using T = typename TestFixture::value_type;
     auto result =
         Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 3}));
-
     auto ures = result->unbatch();
 
     this->mtx_0->compute_conj_dot(this->mtx_1.get(), result.get());
diff --git a/test/base/batch_multi_vector_kernels.cpp b/test/base/batch_multi_vector_kernels.cpp
index 631b9a10c24..015adbce798 100644
--- a/test/base/batch_multi_vector_kernels.cpp
+++ b/test/base/batch_multi_vector_kernels.cpp
@@ -82,46 +82,16 @@ class BatchMultiVector : public CommonTestFixture {
             alpha = gko::batch_initialize<Mtx>(batch_size, {2.0}, ref);
             beta = gko::batch_initialize<Mtx>(batch_size, {-0.5}, ref);
         }
-        dx = Mtx::create(exec);
-        dx->copy_from(x.get());
-        dy = Mtx::create(exec);
-        dy->copy_from(y.get());
-        dalpha = Mtx::create(exec);
-        dalpha->copy_from(alpha.get());
-        dbeta = gko::clone(exec, beta.get());
+        dx = gko::clone(exec, x);
+        dy = gko::clone(exec, y);
+        dalpha = gko::clone(exec, alpha);
+        dbeta = gko::clone(exec, beta);
         expected = Mtx::create(
             ref, gko::batch_dim<2>(batch_size, gko::dim<2>{1, num_vecs}));
         dresult = Mtx::create(
             exec, gko::batch_dim<2>(batch_size, gko::dim<2>{1, num_vecs}));
     }
 
-    void set_up_apply_data(const int p = 1)
-    {
-        const int m = 35, n = 15;
-        x = gen_mtx<Mtx>(batch_size, m, n);
-        c_x = gen_mtx<ComplexMtx>(batch_size, m, n);
-        y = gen_mtx<Mtx>(batch_size, n, p);
-        expected = gen_mtx<Mtx>(batch_size, m, p);
-        alpha = gko::batch_initialize<Mtx>(batch_size, {2.0}, ref);
-        beta = gko::batch_initialize<Mtx>(batch_size, {-1.0}, ref);
-        square = gen_mtx<Mtx>(batch_size, x->get_common_size()[0],
-                              x->get_common_size()[0]);
-        dx = Mtx::create(exec);
-        dx->copy_from(x.get());
-        dc_x = ComplexMtx::create(exec);
-        dc_x->copy_from(c_x.get());
-        dy = Mtx::create(exec);
-        dy->copy_from(y.get());
-        dresult = Mtx::create(exec);
-        dresult->copy_from(expected.get());
-        dalpha = Mtx::create(exec);
-        dalpha->copy_from(alpha.get());
-        dbeta = Mtx::create(exec);
-        dbeta->copy_from(beta.get());
-        dsquare = Mtx::create(exec);
-        dsquare->copy_from(square.get());
-    }
-
     std::ranlux48 rand_engine;
 
     const size_t batch_size = 11;
diff --git a/test/test_install/test_install.cpp b/test/test_install/test_install.cpp
index 2016f00dade..2467e99f62b 100644
--- a/test/test_install/test_install.cpp
+++ b/test/test_install/test_install.cpp
@@ -210,6 +210,20 @@ int main()
         array_type test;
     }
 
+    // core/base/batch_dim.hpp
+    {
+        using type1 = int;
+        auto common_size = gko::dim<2>{4, 2};
+        auto test = gko::batch_dim<2, type1>{2, common_size};
+    }
+
+    // core/base/batch_multi_vector.hpp
+    {
+        using type1 = float;
+        using batch_multi_vector_type = gko::BatchMultiVector<type1>;
+        auto test = batch_multi_vector_type::create(exec);
+    }
+
     // core/base/combination.hpp
     {
         using type1 = int;

From 97f3eaafd9f2f80e19270235981ac5f0e76cbe7b Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Thu, 27 Jul 2023 07:37:51 +0000
Subject: [PATCH 139/583] Format files

Co-authored-by: Pratik Nayak <pratikvn@pm.me>
---
 .../base/batch_multi_vector_kernels.hpp.inc   | 34 +++++-----
 dpcpp/base/batch_multi_vector_kernels.dp.cpp  | 65 ++++++++++---------
 2 files changed, 48 insertions(+), 51 deletions(-)

diff --git a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
index 17a7e125332..3df2bc14c84 100644
--- a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
+++ b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
@@ -48,9 +48,9 @@ __device__ __forceinline__ void scale(
 
 template <typename ValueType, typename Mapping>
 __global__
-    __launch_bounds__(default_block_size, sm_multiplier) void scale_kernel(
-        const gko::batch_multi_vector::uniform_batch<const ValueType> alpha,
-        const gko::batch_multi_vector::uniform_batch<ValueType> x, Mapping map)
+__launch_bounds__(default_block_size, sm_multiplier) void scale_kernel(
+    const gko::batch_multi_vector::uniform_batch<const ValueType> alpha,
+    const gko::batch_multi_vector::uniform_batch<ValueType> x, Mapping map)
 {
     for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_entries;
          batch_id += gridDim.x) {
@@ -79,10 +79,10 @@ __device__ __forceinline__ void add_scaled(
 
 template <typename ValueType, typename Mapping>
 __global__
-    __launch_bounds__(default_block_size, sm_multiplier) void add_scaled_kernel(
-        const gko::batch_multi_vector::uniform_batch<const ValueType> alpha,
-        const gko::batch_multi_vector::uniform_batch<const ValueType> x,
-        const gko::batch_multi_vector::uniform_batch<ValueType> y, Mapping map)
+__launch_bounds__(default_block_size, sm_multiplier) void add_scaled_kernel(
+    const gko::batch_multi_vector::uniform_batch<const ValueType> alpha,
+    const gko::batch_multi_vector::uniform_batch<const ValueType> x,
+    const gko::batch_multi_vector::uniform_batch<ValueType> y, Mapping map)
 {
     for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_entries;
          batch_id += gridDim.x) {
@@ -218,15 +218,11 @@ __device__ __forceinline__ void compute_norm2(
 
 
 template <typename ValueType>
-__global__ __launch_bounds__(
-    default_block_size,
-    sm_multiplier) void compute_norm2_kernel(const gko::batch_multi_vector::
-                                                 uniform_batch<const ValueType>
-                                                     x,
-                                             const gko::batch_multi_vector::
-                                                 uniform_batch<
-                                                     remove_complex<ValueType>>
-                                                     result)
+__global__
+__launch_bounds__(default_block_size, sm_multiplier) void compute_norm2_kernel(
+    const gko::batch_multi_vector::uniform_batch<const ValueType> x,
+    const gko::batch_multi_vector::uniform_batch<remove_complex<ValueType>>
+        result)
 {
     for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_entries;
          batch_id += gridDim.x) {
@@ -259,9 +255,9 @@ __device__ __forceinline__ void copy(
 
 template <typename ValueType>
 __global__
-    __launch_bounds__(default_block_size, sm_multiplier) void copy_kernel(
-        const gko::batch_multi_vector::uniform_batch<const ValueType> src,
-        const gko::batch_multi_vector::uniform_batch<ValueType> dst)
+__launch_bounds__(default_block_size, sm_multiplier) void copy_kernel(
+    const gko::batch_multi_vector::uniform_batch<const ValueType> src,
+    const gko::batch_multi_vector::uniform_batch<ValueType> dst)
 {
     for (size_type batch_id = blockIdx.x; batch_id < src.num_batch_entries;
          batch_id += gridDim.x) {
diff --git a/dpcpp/base/batch_multi_vector_kernels.dp.cpp b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
index e27b3fc810f..85870a91df7 100644
--- a/dpcpp/base/batch_multi_vector_kernels.dp.cpp
+++ b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
@@ -189,17 +189,18 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
     // TODO: Remove reqd_sub_group size and use sycl::reduce_over_group
     exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=
-        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                            config::warp_size)]] {
-                auto group = item_ct1.get_group();
-                auto group_id = group.get_group_linear_id();
-                const auto x_b = batch::batch_entry(x_ub, group_id);
-                const auto y_b = batch::batch_entry(y_ub, group_id);
-                const auto res_b = batch::batch_entry(res_ub, group_id);
-                compute_gen_dot_product_kernel(x_b, y_b, res_b, item_ct1,
-                                               [](auto val) { return val; });
-            });
+            sycl_nd_range(grid, block),
+            [=](sycl::nd_item<3> item_ct1)
+                [[sycl::reqd_sub_group_size(config::warp_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto x_b = batch::batch_entry(x_ub, group_id);
+                    const auto y_b = batch::batch_entry(y_ub, group_id);
+                    const auto res_b = batch::batch_entry(res_ub, group_id);
+                    compute_gen_dot_product_kernel(
+                        x_b, y_b, res_b, item_ct1,
+                        [](auto val) { return val; });
+                });
     });
 }
 
@@ -227,18 +228,18 @@ void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
 
     exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=
-        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                            config::warp_size)]] {
-                auto group = item_ct1.get_group();
-                auto group_id = group.get_group_linear_id();
-                const auto x_b = batch::batch_entry(x_ub, group_id);
-                const auto y_b = batch::batch_entry(y_ub, group_id);
-                const auto res_b = batch::batch_entry(res_ub, group_id);
-                compute_gen_dot_product_kernel(
-                    x_b, y_b, res_b, item_ct1,
-                    [](auto val) { return conj(val); });
-            });
+            sycl_nd_range(grid, block),
+            [=](sycl::nd_item<3> item_ct1)
+                [[sycl::reqd_sub_group_size(config::warp_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto x_b = batch::batch_entry(x_ub, group_id);
+                    const auto y_b = batch::batch_entry(y_ub, group_id);
+                    const auto res_b = batch::batch_entry(res_ub, group_id);
+                    compute_gen_dot_product_kernel(
+                        x_b, y_b, res_b, item_ct1,
+                        [](auto val) { return conj(val); });
+                });
     });
 }
 
@@ -264,15 +265,15 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
 
     exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=
-        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                            config::warp_size)]] {
-                auto group = item_ct1.get_group();
-                auto group_id = group.get_group_linear_id();
-                const auto x_b = batch::batch_entry(x_ub, group_id);
-                const auto res_b = batch::batch_entry(res_ub, group_id);
-                compute_norm2_kernel(x_b, res_b, item_ct1);
-            });
+            sycl_nd_range(grid, block),
+            [=](sycl::nd_item<3> item_ct1)
+                [[sycl::reqd_sub_group_size(config::warp_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto x_b = batch::batch_entry(x_ub, group_id);
+                    const auto res_b = batch::batch_entry(res_ub, group_id);
+                    compute_norm2_kernel(x_b, res_b, item_ct1);
+                });
     });
 }
 

From c808852972f27fbe29bf4ce6c03b15979a1795c5 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Thu, 27 Jul 2023 10:31:56 +0200
Subject: [PATCH 140/583] Update get_values and add test

---
 core/test/base/batch_multi_vector.cpp         |  8 +++++
 .../ginkgo/core/base/batch_multi_vector.hpp   | 29 +++++++++++++++++--
 test/test_install/test_install.cpp            |  3 +-
 3 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/core/test/base/batch_multi_vector.cpp b/core/test/base/batch_multi_vector.cpp
index e63ed883517..43d3a1ddac6 100644
--- a/core/test/base/batch_multi_vector.cpp
+++ b/core/test/base/batch_multi_vector.cpp
@@ -108,6 +108,14 @@ TYPED_TEST(BatchMultiVector, KnowsItsSizeAndValues)
 }
 
 
+TYPED_TEST(BatchMultiVector, CanGetValuesForEntry)
+{
+    using value_type = typename TestFixture::value_type;
+
+    ASSERT_EQ(this->mtx->get_values_for_entry(1)[0], value_type{1.0});
+}
+
+
 TYPED_TEST(BatchMultiVector, CanBeCopied)
 {
     auto mtx_copy = gko::BatchMultiVector<TypeParam>::create(this->exec);
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index b91c50966a1..f7c8258121f 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -170,6 +170,28 @@ class BatchMultiVector
      */
     dim<2> get_common_size() const { return batch_size_.get_common_size(); }
 
+    /**
+     * Returns a pointer to the array of values of the multi-vector
+     *
+     * @return the pointer to the array of values
+     */
+    value_type* get_values(size_type batch_id = 0) noexcept
+    {
+        return values_.get_data();
+    }
+
+    /**
+     * @copydoc get_values(size_type)
+     *
+     * @note This is the constant version of the function, which can be
+     *       significantly more memory efficient than the non-constant version,
+     *       so always prefer this version.
+     */
+    const value_type* get_const_values() const noexcept
+    {
+        return values_.get_const_data();
+    }
+
     /**
      * Returns a pointer to the array of values of the multi-vector for a
      * specific batch entry.
@@ -178,7 +200,7 @@ class BatchMultiVector
      *
      * @return the pointer to the array of values
      */
-    value_type* get_values(size_type batch_id = 0) noexcept
+    value_type* get_values_for_entry(size_type batch_id) noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_entries());
         return values_.get_data() +
@@ -186,13 +208,14 @@ class BatchMultiVector
     }
 
     /**
-     * @copydoc get_values(size_type)
+     * @copydoc get_values_at_entry(size_type)
      *
      * @note This is the constant version of the function, which can be
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const value_type* get_const_values(size_type batch_id = 0) const noexcept
+    const value_type* get_const_values_for_entry(
+        size_type batch_id) const noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_entries());
         return values_.get_const_data() +
diff --git a/test/test_install/test_install.cpp b/test/test_install/test_install.cpp
index 2467e99f62b..ed62e3ca3d3 100644
--- a/test/test_install/test_install.cpp
+++ b/test/test_install/test_install.cpp
@@ -213,8 +213,7 @@ int main()
     // core/base/batch_dim.hpp
     {
         using type1 = int;
-        auto common_size = gko::dim<2>{4, 2};
-        auto test = gko::batch_dim<2, type1>{2, common_size};
+        auto test = gko::batch_dim<2, type1>{};
     }
 
     // core/base/batch_multi_vector.hpp

From 17d54c38cc0b2ea629c16007e3427d03e225f35f Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Thu, 27 Jul 2023 14:06:37 +0200
Subject: [PATCH 141/583] Fix read bug and add test

---
 core/base/batch_multi_vector.cpp              |  8 +--
 core/test/base/batch_multi_vector.cpp         | 57 +++++++++----------
 .../ginkgo/core/base/batch_lin_op_helpers.hpp | 18 ------
 3 files changed, 29 insertions(+), 54 deletions(-)

diff --git a/core/base/batch_multi_vector.cpp b/core/base/batch_multi_vector.cpp
index 3784c6645d7..9b5b908f5d1 100644
--- a/core/base/batch_multi_vector.cpp
+++ b/core/base/batch_multi_vector.cpp
@@ -269,12 +269,8 @@ void read_impl(MatrixType* mtx, const std::vector<MatrixData>& data)
         MatrixType::create(mtx->get_executor()->get_master(), batch_size);
     tmp->fill(zero<typename MatrixType::value_type>());
     for (size_type b = 0; b < data.size(); ++b) {
-        size_type ind = 0;
-        for (size_type row = 0; row < data[b].size[0]; ++row) {
-            for (size_type col = 0; col < data[b].size[1]; ++col) {
-                tmp->at(b, row, col) = data[b].nonzeros[ind].value;
-                ++ind;
-            }
+        for (const auto& elem : data[b].nonzeros) {
+            tmp->at(b, elem.row, elem.column) = elem.value;
         }
     }
     tmp->move_to(mtx);
diff --git a/core/test/base/batch_multi_vector.cpp b/core/test/base/batch_multi_vector.cpp
index 43d3a1ddac6..a201a80f741 100644
--- a/core/test/base/batch_multi_vector.cpp
+++ b/core/test/base/batch_multi_vector.cpp
@@ -397,6 +397,33 @@ TYPED_TEST(BatchMultiVector, CanBeReadFromMatrixData)
 }
 
 
+TYPED_TEST(BatchMultiVector, CanBeReadFromSparseMatrixData)
+{
+    using value_type = typename TestFixture::value_type;
+    auto m = gko::BatchMultiVector<TypeParam>::create(this->exec);
+    // clang-format off
+    m->read({gko::matrix_data<TypeParam>{{2, 2},
+                                         {{0, 0, 1.0},
+                                          {0, 1, 3.0},
+                                          {1, 1, 5.0}}},
+             gko::matrix_data<TypeParam>{{2, 2},
+                                         {{0, 0, -1.0},
+                                          {0, 1, 0.5},
+                                          {1, 1, 9.0}}}});
+    // clang-format on
+
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2));
+    EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
+    EXPECT_EQ(m->at(0, 0, 1), value_type{3.0});
+    EXPECT_EQ(m->at(0, 1, 0), value_type{0.0});
+    EXPECT_EQ(m->at(0, 1, 1), value_type{5.0});
+    EXPECT_EQ(m->at(1, 0, 0), value_type{-1.0});
+    EXPECT_EQ(m->at(1, 0, 1), value_type{0.5});
+    EXPECT_EQ(m->at(1, 1, 0), value_type{0.0});
+    EXPECT_EQ(m->at(1, 1, 1), value_type{9.0});
+}
+
+
 TYPED_TEST(BatchMultiVector, GeneratesCorrectMatrixData)
 {
     using value_type = typename TestFixture::value_type;
@@ -422,33 +449,3 @@ TYPED_TEST(BatchMultiVector, GeneratesCorrectMatrixData)
     EXPECT_EQ(data[1].nonzeros[4], tpl(1, 1, value_type{2.0}));
     EXPECT_EQ(data[1].nonzeros[5], tpl(1, 2, value_type{3.0}));
 }
-
-
-TYPED_TEST(BatchMultiVector, CanBeReadFromMatrixAssemblyData)
-{
-    using value_type = typename TestFixture::value_type;
-    auto m = gko::BatchMultiVector<TypeParam>::create(this->exec);
-    gko::matrix_assembly_data<TypeParam> data1(gko::dim<2>{2, 2});
-    data1.set_value(0, 0, 1.0);
-    data1.set_value(0, 1, 3.0);
-    data1.set_value(1, 0, 0.0);
-    data1.set_value(1, 1, 5.0);
-    gko::matrix_assembly_data<TypeParam> data2(gko::dim<2>{2, 2});
-    data2.set_value(0, 0, 2.0);
-    data2.set_value(0, 1, 1.0);
-    data2.set_value(1, 0, 5.0);
-    data2.set_value(1, 1, 4.0);
-    auto data = std::vector<gko::matrix_assembly_data<TypeParam>>{data1, data2};
-
-    m->read(data);
-
-    ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2));
-    EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
-    EXPECT_EQ(m->at(0, 1, 0), value_type{0.0});
-    EXPECT_EQ(m->at(0, 0, 1), value_type{3.0});
-    EXPECT_EQ(m->at(0, 1, 1), value_type{5.0});
-    EXPECT_EQ(m->at(1, 0, 0), value_type{2.0});
-    EXPECT_EQ(m->at(1, 1, 0), value_type{5.0});
-    EXPECT_EQ(m->at(1, 0, 1), value_type{1.0});
-    EXPECT_EQ(m->at(1, 1, 1), value_type{4.0});
-}
diff --git a/include/ginkgo/core/base/batch_lin_op_helpers.hpp b/include/ginkgo/core/base/batch_lin_op_helpers.hpp
index ecb8bcc4556..6dd9297614a 100644
--- a/include/ginkgo/core/base/batch_lin_op_helpers.hpp
+++ b/include/ginkgo/core/base/batch_lin_op_helpers.hpp
@@ -75,24 +75,6 @@ class BatchReadableFromMatrixData {
      */
     virtual void read(
         const std::vector<matrix_data<ValueType, IndexType>>& data) = 0;
-
-    /**
-     * Reads a matrix from a std::vector of matrix_assembly_data objects.
-     *
-     * @param data  the std::vector of matrix_assembly_data objects
-     */
-    void read(const std::vector<matrix_assembly_data<ValueType, IndexType>>&
-                  assembly_data)
-    {
-        auto mat_data = std::vector<matrix_data<ValueType, IndexType>>(
-            assembly_data.size());
-        size_type ind = 0;
-        for (const auto& i : assembly_data) {
-            mat_data[ind] = i.get_ordered_data();
-            ++ind;
-        }
-        this->read(mat_data);
-    }
 };
 
 

From 367d46939607f7ee230e2ffa4e404b9fb1165686 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Fri, 28 Jul 2023 11:44:33 +0200
Subject: [PATCH 142/583] Review updates.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Thomas Grützmacher <thomas.gruetzmacher@kit.edu>
Co-authored-by: Yu-Hsiang Tsai <yhmtsai@gmail.com>
Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 .../base/batch_multi_vector_kernels.hpp.inc   |  80 ++++++++------
 core/base/batch_multi_vector.cpp              |  87 ++++++++++++++-
 core/test/base/batch_multi_vector.cpp         |  13 +++
 cuda/base/batch_multi_vector_kernels.cu       |   2 +-
 cuda/base/batch_struct.hpp                    |   2 +-
 dpcpp/base/batch_struct.hpp                   |   2 +-
 hip/base/batch_multi_vector_kernels.hip.cpp   |   2 +-
 hip/base/batch_struct.hip.hpp                 |   2 +-
 include/ginkgo/core/base/batch_dim.hpp        |  15 ++-
 .../ginkgo/core/base/batch_lin_op_helpers.hpp |   1 +
 .../ginkgo/core/base/batch_multi_vector.hpp   | 101 ++++--------------
 .../test/base/batch_multi_vector_kernels.cpp  |  16 +--
 test/base/batch_multi_vector_kernels.cpp      |  25 ++++-
 13 files changed, 204 insertions(+), 144 deletions(-)

diff --git a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
index 3df2bc14c84..5e63f451d19 100644
--- a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
+++ b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
@@ -47,10 +47,15 @@ __device__ __forceinline__ void scale(
 }
 
 template <typename ValueType, typename Mapping>
-__global__
-__launch_bounds__(default_block_size, sm_multiplier) void scale_kernel(
-    const gko::batch_multi_vector::uniform_batch<const ValueType> alpha,
-    const gko::batch_multi_vector::uniform_batch<ValueType> x, Mapping map)
+__global__ __launch_bounds__(
+    default_block_size,
+    sm_oversubscription) void scale_kernel(const gko::batch_multi_vector::
+                                               uniform_batch<const ValueType>
+                                                   alpha,
+                                           const gko::batch_multi_vector::
+                                               uniform_batch<ValueType>
+                                                   x,
+                                           Mapping map)
 {
     for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_entries;
          batch_id += gridDim.x) {
@@ -78,11 +83,20 @@ __device__ __forceinline__ void add_scaled(
 }
 
 template <typename ValueType, typename Mapping>
-__global__
-__launch_bounds__(default_block_size, sm_multiplier) void add_scaled_kernel(
-    const gko::batch_multi_vector::uniform_batch<const ValueType> alpha,
-    const gko::batch_multi_vector::uniform_batch<const ValueType> x,
-    const gko::batch_multi_vector::uniform_batch<ValueType> y, Mapping map)
+__global__ __launch_bounds__(
+    default_block_size,
+    sm_oversubscription) void add_scaled_kernel(const gko::batch_multi_vector::
+                                                    uniform_batch<
+                                                        const ValueType>
+                                                        alpha,
+                                                const gko::batch_multi_vector::
+                                                    uniform_batch<
+                                                        const ValueType>
+                                                        x,
+                                                const gko::batch_multi_vector::
+                                                    uniform_batch<ValueType>
+                                                        y,
+                                                Mapping map)
 {
     for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_entries;
          batch_id += gridDim.x) {
@@ -139,24 +153,12 @@ __device__ __forceinline__ void compute_gen_dot_product(
 
 
 template <typename ValueType, typename Mapping>
-__global__ __launch_bounds__(
-    default_block_size,
-    sm_multiplier) void compute_gen_dot_product_kernel(const gko::
-                                                           batch_multi_vector::
-                                                               uniform_batch<
-                                                                   const ValueType>
-                                                                   x,
-                                                       const gko::
-                                                           batch_multi_vector::
-                                                               uniform_batch<
-                                                                   const ValueType>
-                                                                   y,
-                                                       const gko::
-                                                           batch_multi_vector::
-                                                               uniform_batch<
-                                                                   ValueType>
-                                                                   result,
-                                                       Mapping map)
+__global__
+    __launch_bounds__(default_block_size, sm_oversubscription) void compute_gen_dot_product_kernel(
+        const gko::batch_multi_vector::uniform_batch<const ValueType> x,
+        const gko::batch_multi_vector::uniform_batch<const ValueType> y,
+        const gko::batch_multi_vector::uniform_batch<ValueType> result,
+        Mapping map)
 {
     for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_entries;
          batch_id += gridDim.x) {
@@ -218,11 +220,19 @@ __device__ __forceinline__ void compute_norm2(
 
 
 template <typename ValueType>
-__global__
-__launch_bounds__(default_block_size, sm_multiplier) void compute_norm2_kernel(
-    const gko::batch_multi_vector::uniform_batch<const ValueType> x,
-    const gko::batch_multi_vector::uniform_batch<remove_complex<ValueType>>
-        result)
+__global__ __launch_bounds__(
+    default_block_size,
+    sm_oversubscription) void compute_norm2_kernel(const gko::
+                                                       batch_multi_vector::
+                                                           uniform_batch<
+                                                               const ValueType>
+                                                               x,
+                                                   const gko::
+                                                       batch_multi_vector::
+                                                           uniform_batch<
+                                                               remove_complex<
+                                                                   ValueType>>
+                                                               result)
 {
     for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_entries;
          batch_id += gridDim.x) {
@@ -255,9 +265,9 @@ __device__ __forceinline__ void copy(
 
 template <typename ValueType>
 __global__
-__launch_bounds__(default_block_size, sm_multiplier) void copy_kernel(
-    const gko::batch_multi_vector::uniform_batch<const ValueType> src,
-    const gko::batch_multi_vector::uniform_batch<ValueType> dst)
+    __launch_bounds__(default_block_size, sm_oversubscription) void copy_kernel(
+        const gko::batch_multi_vector::uniform_batch<const ValueType> src,
+        const gko::batch_multi_vector::uniform_batch<ValueType> dst)
 {
     for (size_type batch_id = blockIdx.x; batch_id < src.num_batch_entries;
          batch_id += gridDim.x) {
diff --git a/core/base/batch_multi_vector.cpp b/core/base/batch_multi_vector.cpp
index 9b5b908f5d1..ac47260d82d 100644
--- a/core/base/batch_multi_vector.cpp
+++ b/core/base/batch_multi_vector.cpp
@@ -65,6 +65,85 @@ GKO_REGISTER_OPERATION(copy, batch_multi_vector::copy);
 }  // namespace
 }  // namespace batch_multi_vector
 
+namespace detail {
+
+
+template <typename ValueType>
+batch_dim<2> compute_batch_size(
+    const std::vector<matrix::Dense<ValueType>*>& matrices)
+{
+    auto common_size = matrices[0]->get_size();
+    for (size_type i = 1; i < matrices.size(); ++i) {
+        GKO_ASSERT_EQUAL_DIMENSIONS(common_size, matrices[i]->get_size());
+    }
+    return batch_dim<2>{matrices.size(), common_size};
+}
+
+
+}  // namespace detail
+
+
+template <typename ValueType>
+BatchMultiVector<ValueType>::BatchMultiVector(
+    std::shared_ptr<const Executor> exec, const batch_dim<2>& size)
+    : EnablePolymorphicObject<BatchMultiVector<ValueType>>(exec),
+      batch_size_(size),
+      values_(exec, compute_num_elems(size))
+{}
+
+
+template <typename ValueType>
+BatchMultiVector<ValueType>::BatchMultiVector(
+    std::shared_ptr<const Executor> exec,
+    const std::vector<matrix::Dense<ValueType>*>& matrices)
+    : EnablePolymorphicObject<BatchMultiVector<ValueType>>(exec),
+      batch_size_{detail::compute_batch_size(matrices)},
+      values_(exec, compute_num_elems(batch_size_))
+{
+    for (size_type i = 0; i < this->get_num_batch_entries(); ++i) {
+        auto local_exec = matrices[i]->get_executor();
+        exec->copy_from(
+            local_exec.get(), matrices[i]->get_num_stored_elements(),
+            matrices[i]->get_const_values(),
+            this->get_values() + this->get_size().get_cumulative_offset(i));
+    }
+}
+
+
+template <typename ValueType>
+BatchMultiVector<ValueType>::BatchMultiVector(
+    std::shared_ptr<const Executor> exec, size_type num_duplications,
+    const matrix::Dense<value_type>* input)
+    : BatchMultiVector<ValueType>(
+          exec, batch_dim<2>(num_duplications, input->get_size()))
+{
+    size_type offset = 0;
+    for (size_type i = 0; i < num_duplications; ++i) {
+        exec->copy_from(input->get_executor().get(),
+                        input->get_num_stored_elements(),
+                        input->get_const_values(), this->get_values() + offset);
+        offset += input->get_num_stored_elements();
+    }
+}
+
+
+template <typename ValueType>
+BatchMultiVector<ValueType>::BatchMultiVector(
+    std::shared_ptr<const Executor> exec, size_type num_duplications,
+    const BatchMultiVector<value_type>* input)
+    : BatchMultiVector<ValueType>(
+          exec, batch_dim<2>(input->get_num_batch_entries() * num_duplications,
+                             input->get_common_size()))
+{
+    size_type offset = 0;
+    for (size_type i = 0; i < num_duplications; ++i) {
+        exec->copy_from(input->get_executor().get(),
+                        input->get_num_stored_elements(),
+                        input->get_const_values(), this->get_values() + offset);
+        offset += input->get_num_stored_elements();
+    }
+}
+
 
 template <typename ValueType>
 std::unique_ptr<BatchMultiVector<ValueType>>
@@ -102,12 +181,12 @@ template <typename ValueType>
 std::unique_ptr<const BatchMultiVector<ValueType>>
 BatchMultiVector<ValueType>::create_const(
     std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
-    gko::detail::const_array_view<ValueType>&& values)
+    detail::const_array_view<ValueType>&& values)
 {
     // cast const-ness away, but return a const object afterwards,
     // so we can ensure that no modifications take place.
     return std::unique_ptr<const BatchMultiVector>(new BatchMultiVector{
-        exec, sizes, gko::detail::array_const_cast(std::move(values))});
+        exec, sizes, detail::array_const_cast(std::move(values))});
 }
 
 
@@ -285,7 +364,7 @@ void BatchMultiVector<ValueType>::read(const std::vector<mat_data>& data)
 
 
 template <typename ValueType>
-void BatchMultiVector<ValueType>::read(const std::vector<mat_data32>& data)
+void BatchMultiVector<ValueType>::read(const std::vector<mat_data64>& data)
 {
     read_impl(this, data);
 }
@@ -320,7 +399,7 @@ void BatchMultiVector<ValueType>::write(std::vector<mat_data>& data) const
 
 
 template <typename ValueType>
-void BatchMultiVector<ValueType>::write(std::vector<mat_data32>& data) const
+void BatchMultiVector<ValueType>::write(std::vector<mat_data64>& data) const
 {
     write_impl(this, data);
 }
diff --git a/core/test/base/batch_multi_vector.cpp b/core/test/base/batch_multi_vector.cpp
index a201a80f741..486a8301cf6 100644
--- a/core/test/base/batch_multi_vector.cpp
+++ b/core/test/base/batch_multi_vector.cpp
@@ -97,6 +97,7 @@ TYPED_TEST_SUITE(BatchMultiVector, gko::test::ValueTypes);
 TYPED_TEST(BatchMultiVector, CanBeEmpty)
 {
     auto empty = gko::BatchMultiVector<TypeParam>::create(this->exec);
+
     this->assert_empty(empty.get());
 }
 
@@ -104,6 +105,7 @@ TYPED_TEST(BatchMultiVector, CanBeEmpty)
 TYPED_TEST(BatchMultiVector, KnowsItsSizeAndValues)
 {
     ASSERT_NE(this->mtx->get_const_values(), nullptr);
+
     this->assert_equal_to_original_mtx(this->mtx.get());
 }
 
@@ -119,7 +121,9 @@ TYPED_TEST(BatchMultiVector, CanGetValuesForEntry)
 TYPED_TEST(BatchMultiVector, CanBeCopied)
 {
     auto mtx_copy = gko::BatchMultiVector<TypeParam>::create(this->exec);
+
     mtx_copy->copy_from(this->mtx.get());
+
     this->assert_equal_to_original_mtx(this->mtx.get());
     this->mtx->at(0, 0, 0) = 7;
     this->mtx->at(0, 1) = 7;
@@ -130,7 +134,9 @@ TYPED_TEST(BatchMultiVector, CanBeCopied)
 TYPED_TEST(BatchMultiVector, CanBeMoved)
 {
     auto mtx_copy = gko::BatchMultiVector<TypeParam>::create(this->exec);
+
     this->mtx->move_to(mtx_copy.get());
+
     this->assert_equal_to_original_mtx(mtx_copy.get());
 }
 
@@ -138,6 +144,7 @@ TYPED_TEST(BatchMultiVector, CanBeMoved)
 TYPED_TEST(BatchMultiVector, CanBeCloned)
 {
     auto mtx_clone = this->mtx->clone();
+
     this->assert_equal_to_original_mtx(
         dynamic_cast<decltype(this->mtx.get())>(mtx_clone.get()));
 }
@@ -146,6 +153,7 @@ TYPED_TEST(BatchMultiVector, CanBeCloned)
 TYPED_TEST(BatchMultiVector, CanBeCleared)
 {
     this->mtx->clear();
+
     this->assert_empty(this->mtx.get());
 }
 
@@ -153,6 +161,7 @@ TYPED_TEST(BatchMultiVector, CanBeCleared)
 TYPED_TEST(BatchMultiVector, CanBeConstructedWithSize)
 {
     using size_type = gko::size_type;
+
     auto m = gko::BatchMultiVector<TypeParam>::create(
         this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 4)));
 
@@ -281,6 +290,7 @@ TYPED_TEST(BatchMultiVector, CanBeConstructedFromBatchMultiVectorMatrices)
 TYPED_TEST(BatchMultiVector, CanBeListConstructed)
 {
     using value_type = typename TestFixture::value_type;
+
     auto m = gko::batch_initialize<gko::BatchMultiVector<TypeParam>>(
         {{1.0, 2.0}, {1.0, 3.0}}, this->exec);
 
@@ -296,6 +306,7 @@ TYPED_TEST(BatchMultiVector, CanBeListConstructed)
 TYPED_TEST(BatchMultiVector, CanBeListConstructedByCopies)
 {
     using value_type = typename TestFixture::value_type;
+
     auto m = gko::batch_initialize<gko::BatchMultiVector<TypeParam>>(
         2, I<value_type>({1.0, 2.0}), this->exec);
 
@@ -312,6 +323,7 @@ TYPED_TEST(BatchMultiVector, CanBeDoubleListConstructed)
 {
     using value_type = typename TestFixture::value_type;
     using T = value_type;
+
     auto m = gko::batch_initialize<gko::BatchMultiVector<TypeParam>>(
         {{I<T>{1.0, 1.0, 0.0}, I<T>{2.0, 4.0, 3.0}, I<T>{3.0, 6.0, 1.0}},
          {I<T>{1.0, 2.0, -1.0}, I<T>{3.0, 4.0, -2.0}, I<T>{5.0, 6.0, -3.0}}},
@@ -401,6 +413,7 @@ TYPED_TEST(BatchMultiVector, CanBeReadFromSparseMatrixData)
 {
     using value_type = typename TestFixture::value_type;
     auto m = gko::BatchMultiVector<TypeParam>::create(this->exec);
+
     // clang-format off
     m->read({gko::matrix_data<TypeParam>{{2, 2},
                                          {{0, 0, 1.0},
diff --git a/cuda/base/batch_multi_vector_kernels.cu b/cuda/base/batch_multi_vector_kernels.cu
index 3fd80a2aa41..3e44b006552 100644
--- a/cuda/base/batch_multi_vector_kernels.cu
+++ b/cuda/base/batch_multi_vector_kernels.cu
@@ -65,7 +65,7 @@ namespace batch_multi_vector {
 
 
 constexpr auto default_block_size = 256;
-constexpr int sm_multiplier = 4;
+constexpr int sm_oversubscription = 4;
 
 // clang-format off
 
diff --git a/cuda/base/batch_struct.hpp b/cuda/base/batch_struct.hpp
index d9907b41531..70bc42aecac 100644
--- a/cuda/base/batch_struct.hpp
+++ b/cuda/base/batch_struct.hpp
@@ -51,7 +51,7 @@ namespace cuda {
 /** @file batch_struct.hpp
  *
  * Helper functions to generate a batch struct from a batch LinOp,
- * while also shallow-casting to the requried CUDA scalar type.
+ * while also shallow-casting to the required CUDA scalar type.
  *
  * A specialization is needed for every format of every kind of linear algebra
  * object. These are intended to be called on the host.
diff --git a/dpcpp/base/batch_struct.hpp b/dpcpp/base/batch_struct.hpp
index c9ee5800b3e..4f8d8aa0350 100644
--- a/dpcpp/base/batch_struct.hpp
+++ b/dpcpp/base/batch_struct.hpp
@@ -50,7 +50,7 @@ namespace dpcpp {
 /** @file batch_struct.hpp
  *
  * Helper functions to generate a batch struct from a batch LinOp,
- * while also shallow-casting to the requried DPCPP scalar type.
+ * while also shallow-casting to the required DPCPP scalar type.
  *
  * A specialization is needed for every format of every kind of linear algebra
  * object. These are intended to be called on the host.
diff --git a/hip/base/batch_multi_vector_kernels.hip.cpp b/hip/base/batch_multi_vector_kernels.hip.cpp
index 40e828b5d45..bb465ac7709 100644
--- a/hip/base/batch_multi_vector_kernels.hip.cpp
+++ b/hip/base/batch_multi_vector_kernels.hip.cpp
@@ -66,7 +66,7 @@ namespace batch_multi_vector {
 
 
 constexpr auto default_block_size = 256;
-constexpr int sm_multiplier = 4;
+constexpr int sm_oversubscription = 4;
 
 
 // clang-format off
diff --git a/hip/base/batch_struct.hip.hpp b/hip/base/batch_struct.hip.hpp
index 3171e7e1df8..55f81f7eaff 100644
--- a/hip/base/batch_struct.hip.hpp
+++ b/hip/base/batch_struct.hip.hpp
@@ -51,7 +51,7 @@ namespace hip {
 /** @file batch_struct.hpp
  *
  * Helper functions to generate a batch struct from a batch LinOp,
- * while also shallow-casting to the requried Hip scalar type.
+ * while also shallow-casting to the required Hip scalar type.
  *
  * A specialization is needed for every format of every kind of linear algebra
  * object. These are intended to be called on the host.
diff --git a/include/ginkgo/core/base/batch_dim.hpp b/include/ginkgo/core/base/batch_dim.hpp
index c52b732f610..6d840f2ee86 100644
--- a/include/ginkgo/core/base/batch_dim.hpp
+++ b/include/ginkgo/core/base/batch_dim.hpp
@@ -119,6 +119,14 @@ struct batch_dim {
     }
 
 
+    /**
+     * The default empty constructor
+     */
+    batch_dim()
+        : common_size_(dim<dimensionality, dimension_type>{}),
+          num_batch_entries_(0)
+    {}
+
     /**
      * Creates a batch_dim object which stores a uniform size for all batch
      * entries.
@@ -128,9 +136,8 @@ struct batch_dim {
      *
      * @note  Use this constructor when uniform batches need to be stored.
      */
-    explicit batch_dim(const size_type num_batch_entries = 0,
-                       const dim<dimensionality, dimension_type>& common_size =
-                           dim<dimensionality, dimension_type>{})
+    explicit batch_dim(const size_type num_batch_entries,
+                       const dim<dimensionality, dimension_type>& common_size)
         : common_size_(common_size), num_batch_entries_(num_batch_entries)
     {}
 
@@ -154,7 +161,7 @@ inline batch_dim<2, DimensionType> transpose(
     const batch_dim<2, DimensionType>& input)
 {
     return batch_dim<2, DimensionType>(input.get_num_batch_entries(),
-                                       gko::transpose(input.get_common_size()));
+                                       transpose(input.get_common_size()));
 }
 
 
diff --git a/include/ginkgo/core/base/batch_lin_op_helpers.hpp b/include/ginkgo/core/base/batch_lin_op_helpers.hpp
index 6dd9297614a..5d1a2f8ed0d 100644
--- a/include/ginkgo/core/base/batch_lin_op_helpers.hpp
+++ b/include/ginkgo/core/base/batch_lin_op_helpers.hpp
@@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <memory>
 #include <type_traits>
 #include <utility>
+#include <vector>
 
 
 #include <ginkgo/core/base/abstract_factory.hpp>
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index f7c8258121f..c5cc0040047 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -89,6 +89,7 @@ class BatchMultiVector
     friend class EnableCreateMethod<BatchMultiVector>;
     friend class EnablePolymorphicObject<BatchMultiVector>;
     friend class BatchMultiVector<to_complex<ValueType>>;
+    friend class BatchMultiVector<next_precision<ValueType>>;
 
 public:
     using BatchReadableFromMatrixData<ValueType, int32>::read;
@@ -102,13 +103,11 @@ class BatchMultiVector
     using value_type = ValueType;
     using index_type = int32;
     using unbatch_type = matrix::Dense<ValueType>;
-    using mat_data = gko::matrix_data<ValueType, int64>;
-    using mat_data32 = gko::matrix_data<ValueType, int32>;
+    using mat_data = matrix_data<ValueType, int32>;
+    using mat_data64 = matrix_data<ValueType, int64>;
     using absolute_type = remove_complex<BatchMultiVector<ValueType>>;
     using complex_type = to_complex<BatchMultiVector<ValueType>>;
 
-    using row_major_range = gko::range<gko::accessor::row_major<ValueType, 2>>;
-
     /**
      * Creates a BatchMultiVector with the configuration of another
      * BatchMultiVector.
@@ -118,8 +117,6 @@ class BatchMultiVector
     static std::unique_ptr<BatchMultiVector> create_with_config_of(
         ptr_param<const BatchMultiVector> other);
 
-    friend class BatchMultiVector<next_precision<ValueType>>;
-
     void convert_to(
         BatchMultiVector<next_precision<ValueType>>* result) const override;
 
@@ -127,11 +124,11 @@ class BatchMultiVector
 
     void read(const std::vector<mat_data>& data) override;
 
-    void read(const std::vector<mat_data32>& data) override;
+    void read(const std::vector<mat_data64>& data) override;
 
     void write(std::vector<mat_data>& data) const override;
 
-    void write(std::vector<mat_data32>& data) const override;
+    void write(std::vector<mat_data64>& data) const override;
 
     /**
      * Unbatches the batched multi-vector and creates a std::vector of Dense
@@ -175,13 +172,10 @@ class BatchMultiVector
      *
      * @return the pointer to the array of values
      */
-    value_type* get_values(size_type batch_id = 0) noexcept
-    {
-        return values_.get_data();
-    }
+    value_type* get_values() noexcept { return values_.get_data(); }
 
     /**
-     * @copydoc get_values(size_type)
+     * @copydoc get_values()
      *
      * @note This is the constant version of the function, which can be
      *       significantly more memory efficient than the non-constant version,
@@ -224,10 +218,10 @@ class BatchMultiVector
 
     /**
      * Returns the number of elements explicitly stored in the batch matrix,
-     * cumulative across all the batches.
+     * cumulative across all the batch entries.
      *
      * @return the number of elements explicitly stored in the vector,
-     *         cumulative across all the batches
+     *         cumulative across all the batch entries
      */
     size_type get_num_stored_elements() const noexcept
     {
@@ -235,7 +229,7 @@ class BatchMultiVector
     }
 
     /**
-     * Returns a single element for a particular batch.
+     * Returns a single element for a particular batch entry.
      *
      * @param batch  the batch index to be queried
      * @param row  the row of the requested element
@@ -267,24 +261,24 @@ class BatchMultiVector
      * However, it is less efficient than the two-parameter variant of this
      * method.
      *
-     * @param batch  the batch index to be queried
+     * @param batch_id  the batch entry index to be queried
      * @param idx  a linear index of the requested element
      *
      * @note  the method has to be called on the same Executor the vector is
      *        stored at (e.g. trying to call this method on a GPU multi-vector
      *        from the OMP results in a runtime error)
      */
-    ValueType& at(size_type batch, size_type idx) noexcept
+    ValueType& at(size_type batch_id, size_type idx) noexcept
     {
-        return values_.get_data()[linearize_index(batch, idx)];
+        return values_.get_data()[linearize_index(batch_id, idx)];
     }
 
     /**
      * @copydoc BatchMultiVector::at(size_type, size_type, size_type)
      */
-    ValueType at(size_type batch, size_type idx) const noexcept
+    ValueType at(size_type batch_id, size_type idx) const noexcept
     {
-        return values_.get_const_data()[linearize_index(batch, idx)];
+        return values_.get_const_data()[linearize_index(batch_id, idx)];
     }
 
     /**
@@ -374,22 +368,11 @@ class BatchMultiVector
     void fill(ValueType value);
 
 private:
-    inline batch_dim<2> compute_batch_size(
-        const std::vector<matrix::Dense<ValueType>*>& matrices)
-    {
-        auto common_size = matrices[0]->get_size();
-        for (int i = 1; i < matrices.size(); ++i) {
-            GKO_ASSERT_EQUAL_DIMENSIONS(common_size, matrices[i]->get_size());
-        }
-        return batch_dim<2>{matrices.size(), common_size};
-    }
-
     inline size_type compute_num_elems(const batch_dim<2>& size)
     {
         return size.get_cumulative_offset(size.get_num_batch_entries());
     }
 
-
 protected:
     /**
      * Sets the size of the BatchMultiVector.
@@ -403,14 +386,10 @@ class BatchMultiVector
      * size.
      *
      * @param exec  Executor associated to the vector
-     * @param size  size of the vector
+     * @param size  size of the batch multi vector
      */
     BatchMultiVector(std::shared_ptr<const Executor> exec,
-                     const batch_dim<2>& size = batch_dim<2>{})
-        : EnablePolymorphicObject<BatchMultiVector<ValueType>>(exec),
-          batch_size_(size),
-          values_(exec, compute_num_elems(size))
-    {}
+                     const batch_dim<2>& size = batch_dim<2>{});
 
     /**
      * Creates a BatchMultiVector from an already allocated (and
@@ -446,24 +425,12 @@ class BatchMultiVector
      *
      * @note This is a utility function that can serve as a first step to port
      * to batched data-structures and solvers. Even if the matrices are in
-     * device memory, this method can have siginificant overhead, as new
+     * device memory, this method can have significant overhead, as new
      * allocations and deep copies are necessary and hence this constructor must
      * not be used in performance sensitive applications
      */
     BatchMultiVector(std::shared_ptr<const Executor> exec,
-                     const std::vector<matrix::Dense<ValueType>*>& matrices)
-        : EnablePolymorphicObject<BatchMultiVector<ValueType>>(exec),
-          batch_size_{compute_batch_size(matrices)},
-          values_(exec, compute_num_elems(batch_size_))
-    {
-        for (size_type i = 0; i < this->get_num_batch_entries(); ++i) {
-            auto local_exec = matrices[i]->get_executor();
-            exec->copy_from(
-                local_exec.get(), matrices[i]->get_num_stored_elements(),
-                matrices[i]->get_const_values(),
-                this->get_values() + this->get_size().get_cumulative_offset(i));
-        }
-    }
+                     const std::vector<matrix::Dense<ValueType>*>& matrices);
 
     /**
      * Creates a BatchMultiVector matrix by duplicating BatchMultiVector object
@@ -474,26 +441,13 @@ class BatchMultiVector
      *
      * @note This is a utility function that can serve as a first step to port
      * to batched data-structures and solvers. Even if the matrices are in
-     * device memory, this method can have siginificant overhead, as new
+     * device memory, this method can have significant overhead, as new
      * allocations and deep copies are necessary and hence this constructor must
      * not be used in performance sensitive applications.
      */
     BatchMultiVector(std::shared_ptr<const Executor> exec,
                      size_type num_duplications,
-                     const BatchMultiVector<value_type>* input)
-        : BatchMultiVector<ValueType>(
-              exec, gko::batch_dim<2>(
-                        input->get_num_batch_entries() * num_duplications,
-                        input->get_common_size()))
-    {
-        size_type offset = 0;
-        for (size_type i = 0; i < num_duplications; ++i) {
-            exec->copy_from(
-                input->get_executor().get(), input->get_num_stored_elements(),
-                input->get_const_values(), this->get_values() + offset);
-            offset += input->get_num_stored_elements();
-        }
-    }
+                     const BatchMultiVector<value_type>* input);
 
     /**
      * Creates a BatchMultiVector matrix by a duplicating a matrix::Dense object
@@ -504,18 +458,7 @@ class BatchMultiVector
      */
     BatchMultiVector(std::shared_ptr<const Executor> exec,
                      size_type num_duplications,
-                     const matrix::Dense<value_type>* input)
-        : BatchMultiVector<ValueType>(
-              exec, gko::batch_dim<2>(num_duplications, input->get_size()))
-    {
-        size_type offset = 0;
-        for (size_type i = 0; i < num_duplications; ++i) {
-            exec->copy_from(
-                input->get_executor().get(), input->get_num_stored_elements(),
-                input->get_const_values(), this->get_values() + offset);
-            offset += input->get_num_stored_elements();
-        }
-    }
+                     const matrix::Dense<value_type>* input);
 
     /**
      * Creates a BatchMultiVector with the same configuration as the
diff --git a/reference/test/base/batch_multi_vector_kernels.cpp b/reference/test/base/batch_multi_vector_kernels.cpp
index f6ae66d8249..f6d169bceaf 100644
--- a/reference/test/base/batch_multi_vector_kernels.cpp
+++ b/reference/test/base/batch_multi_vector_kernels.cpp
@@ -59,7 +59,6 @@ class BatchMultiVector : public ::testing::Test {
     using Mtx = gko::BatchMultiVector<value_type>;
     using DenseMtx = gko::matrix::Dense<value_type>;
     using ComplexMtx = gko::to_complex<Mtx>;
-    using RealMtx = gko::remove_complex<Mtx>;
     BatchMultiVector()
         : exec(gko::ReferenceExecutor::create()),
           mtx_0(gko::batch_initialize<Mtx>(
@@ -124,7 +123,7 @@ class BatchMultiVector : public ::testing::Test {
     std::unique_ptr<Mtx> mtx_5;
     std::unique_ptr<Mtx> mtx_6;
 
-    std::ranlux48 rand_engine;
+    std::default_random_engine rand_engine;
 };
 
 TYPED_TEST_SUITE(BatchMultiVector, gko::test::ValueTypes);
@@ -165,7 +164,7 @@ TYPED_TEST(BatchMultiVector, ScalesDataWithScalar)
 }
 
 
-TYPED_TEST(BatchMultiVector, ScalesDataWithStride)
+TYPED_TEST(BatchMultiVector, ScalesDataWithMultipleScalars)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -261,15 +260,12 @@ TYPED_TEST(BatchMultiVector, ComputeDotFailsOnWrongInputSize)
 TYPED_TEST(BatchMultiVector, ComputeDotFailsOnWrongResultSize)
 {
     using Mtx = typename TestFixture::Mtx;
+
     auto result =
         Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 2}));
-    auto result2 =
-        Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 2}));
 
     ASSERT_THROW(this->mtx_0->compute_dot(this->mtx_1.get(), result.get()),
                  gko::DimensionMismatch);
-    ASSERT_THROW(this->mtx_0->compute_dot(this->mtx_1.get(), result2.get()),
-                 gko::DimensionMismatch);
 }
 
 
@@ -305,16 +301,12 @@ TYPED_TEST(BatchMultiVector, ComputeConjDotFailsOnWrongInputSize)
 TYPED_TEST(BatchMultiVector, ComputeConjDotFailsOnWrongResultSize)
 {
     using Mtx = typename TestFixture::Mtx;
+
     auto result =
         Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 2}));
-    auto result2 =
-        Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 2}));
 
     ASSERT_THROW(this->mtx_0->compute_conj_dot(this->mtx_1.get(), result.get()),
                  gko::DimensionMismatch);
-    ASSERT_THROW(
-        this->mtx_0->compute_conj_dot(this->mtx_1.get(), result2.get()),
-        gko::DimensionMismatch);
 }
 
 
diff --git a/test/base/batch_multi_vector_kernels.cpp b/test/base/batch_multi_vector_kernels.cpp
index 015adbce798..631464a8d27 100644
--- a/test/base/batch_multi_vector_kernels.cpp
+++ b/test/base/batch_multi_vector_kernels.cpp
@@ -40,7 +40,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <gtest/gtest.h>
 
 
-#include <ginkgo/core/base/array.hpp>
 #include <ginkgo/core/base/math.hpp>
 
 
@@ -60,11 +59,11 @@ class BatchMultiVector : public CommonTestFixture {
     BatchMultiVector() : rand_engine(15) {}
 
     template <typename MtxType>
-    std::unique_ptr<MtxType> gen_mtx(const size_t batch_size, int num_rows,
-                                     int num_cols)
+    std::unique_ptr<MtxType> gen_mtx(const size_t num_batch_entries,
+                                     int num_rows, int num_cols)
     {
         return gko::test::generate_uniform_batch_random_matrix<MtxType>(
-            batch_size, num_rows, num_cols,
+            num_batch_entries, num_rows, num_cols,
             std::uniform_int_distribution<>(num_cols, num_cols),
             std::normal_distribution<>(-1.0, 1.0), rand_engine, false, ref);
     }
@@ -75,6 +74,8 @@ class BatchMultiVector : public CommonTestFixture {
         const int num_rows = 252;
         x = gen_mtx<Mtx>(batch_size, num_rows, num_vecs);
         y = gen_mtx<Mtx>(batch_size, num_rows, num_vecs);
+        c_x = gen_mtx<ComplexMtx>(batch_size, num_rows, num_vecs);
+        c_y = gen_mtx<ComplexMtx>(batch_size, num_rows, num_vecs);
         if (different_alpha) {
             alpha = gen_mtx<Mtx>(batch_size, 1, num_vecs);
             beta = gen_mtx<Mtx>(batch_size, 1, num_vecs);
@@ -84,6 +85,8 @@ class BatchMultiVector : public CommonTestFixture {
         }
         dx = gko::clone(exec, x);
         dy = gko::clone(exec, y);
+        dc_x = gko::clone(exec, c_x);
+        dc_y = gko::clone(exec, c_y);
         dalpha = gko::clone(exec, alpha);
         dbeta = gko::clone(exec, beta);
         expected = Mtx::create(
@@ -92,11 +95,12 @@ class BatchMultiVector : public CommonTestFixture {
             exec, gko::batch_dim<2>(batch_size, gko::dim<2>{1, num_vecs}));
     }
 
-    std::ranlux48 rand_engine;
+    std::default_random_engine rand_engine;
 
     const size_t batch_size = 11;
     std::unique_ptr<Mtx> x;
     std::unique_ptr<ComplexMtx> c_x;
+    std::unique_ptr<ComplexMtx> c_y;
     std::unique_ptr<Mtx> y;
     std::unique_ptr<Mtx> alpha;
     std::unique_ptr<Mtx> beta;
@@ -105,6 +109,7 @@ class BatchMultiVector : public CommonTestFixture {
     std::unique_ptr<Mtx> dresult;
     std::unique_ptr<Mtx> dx;
     std::unique_ptr<ComplexMtx> dc_x;
+    std::unique_ptr<ComplexMtx> dc_y;
     std::unique_ptr<Mtx> dy;
     std::unique_ptr<Mtx> dalpha;
     std::unique_ptr<Mtx> dbeta;
@@ -216,11 +221,16 @@ TEST_F(BatchMultiVector, ComputeDotIsEquivalentToRef)
         gko::batch_dim<2>(batch_size, gko::dim<2>{1, x->get_common_size()[1]});
     auto dot_expected = Mtx::create(this->ref, dot_size);
     auto ddot = Mtx::create(this->exec, dot_size);
+    auto cdot_expected = ComplexMtx::create(this->ref, dot_size);
+    auto dc_dot = ComplexMtx::create(this->exec, dot_size);
 
     x->compute_dot(y.get(), dot_expected.get());
     dx->compute_dot(dy.get(), ddot.get());
+    c_x->compute_dot(c_y.get(), cdot_expected.get());
+    dc_x->compute_dot(dc_y.get(), dc_dot.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(dot_expected, ddot, 5 * r<value_type>::value);
+    GKO_ASSERT_BATCH_MTX_NEAR(cdot_expected, dc_dot, 5 * r<value_type>::value);
 }
 
 
@@ -246,11 +256,16 @@ TEST_F(BatchMultiVector, ComputeConjDotIsEquivalentToRef)
         gko::batch_dim<2>(batch_size, gko::dim<2>{1, x->get_common_size()[1]});
     auto dot_expected = Mtx::create(this->ref, dot_size);
     auto ddot = Mtx::create(this->exec, dot_size);
+    auto cdot_expected = ComplexMtx::create(this->ref, dot_size);
+    auto dc_dot = ComplexMtx::create(this->exec, dot_size);
 
     x->compute_conj_dot(y.get(), dot_expected.get());
     dx->compute_conj_dot(dy.get(), ddot.get());
+    c_x->compute_conj_dot(c_y.get(), cdot_expected.get());
+    dc_x->compute_conj_dot(dc_y.get(), dc_dot.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(dot_expected, ddot, 5 * r<value_type>::value);
+    GKO_ASSERT_BATCH_MTX_NEAR(cdot_expected, dc_dot, 5 * r<value_type>::value);
 }
 
 

From 008f04fcc154413d2454903dd9c4aea1a2f1a9cd Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Mon, 31 Jul 2023 15:43:18 +0200
Subject: [PATCH 143/583] Rename: batch_entry -> batch_item

---
 ...batch_multi_vector_kernel_launcher.hpp.inc |  12 +--
 .../base/batch_multi_vector_kernels.hpp.inc   |  68 ++++++------
 core/base/batch_multi_vector.cpp              |  33 +++---
 core/base/batch_struct.hpp                    |  25 +++--
 core/test/base/batch_dim.cpp                  |   4 +-
 core/test/base/batch_multi_vector.cpp         |  14 +--
 cuda/base/batch_struct.hpp                    |   4 +-
 dpcpp/base/batch_multi_vector_kernels.dp.cpp  | 101 +++++++++---------
 dpcpp/base/batch_multi_vector_kernels.hpp.inc |  24 ++---
 dpcpp/base/batch_struct.hpp                   |   4 +-
 hip/base/batch_struct.hip.hpp                 |   4 +-
 include/ginkgo/core/base/batch_dim.hpp        |  26 ++---
 .../ginkgo/core/base/batch_multi_vector.hpp   |  88 +++++++--------
 omp/base/batch_multi_vector_kernels.cpp       |  45 ++++----
 reference/base/batch_multi_vector_kernels.cpp |  45 ++++----
 .../base/batch_multi_vector_kernels.hpp.inc   |  30 +++---
 reference/base/batch_struct.hpp               |   4 +-
 .../test/base/batch_multi_vector_kernels.cpp  |   4 +-
 test/base/batch_multi_vector_kernels.cpp      |   4 +-
 19 files changed, 263 insertions(+), 276 deletions(-)

diff --git a/common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc b/common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc
index 60af1de45af..acd58b37327 100644
--- a/common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc
+++ b/common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc
@@ -36,7 +36,7 @@ void scale(std::shared_ptr<const DefaultExecutor> exec,
            const BatchMultiVector<ValueType>* const alpha,
            BatchMultiVector<ValueType>* const x)
 {
-    const auto num_blocks = x->get_num_batch_entries();
+    const auto num_blocks = x->get_num_batch_items();
     const auto alpha_ub = get_batch_struct(alpha);
     const auto x_ub = get_batch_struct(x);
     if (alpha->get_common_size()[1] == 1) {
@@ -58,7 +58,7 @@ void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
                 const BatchMultiVector<ValueType>* const x,
                 BatchMultiVector<ValueType>* const y)
 {
-    const auto num_blocks = x->get_num_batch_entries();
+    const auto num_blocks = x->get_num_batch_items();
     const size_type nrhs = x->get_common_size()[1];
     const auto alpha_ub = get_batch_struct(alpha);
     const auto x_ub = get_batch_struct(x);
@@ -84,7 +84,7 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
                  const BatchMultiVector<ValueType>* y,
                  BatchMultiVector<ValueType>* result)
 {
-    const auto num_blocks = x->get_num_batch_entries();
+    const auto num_blocks = x->get_num_batch_items();
     const auto num_rhs = x->get_common_size()[1];
     const auto x_ub = get_batch_struct(x);
     const auto y_ub = get_batch_struct(y);
@@ -104,7 +104,7 @@ void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
                       const BatchMultiVector<ValueType>* y,
                       BatchMultiVector<ValueType>* result)
 {
-    const auto num_blocks = x->get_num_batch_entries();
+    const auto num_blocks = x->get_num_batch_items();
     const auto num_rhs = x->get_common_size()[1];
     const auto x_ub = get_batch_struct(x);
     const auto y_ub = get_batch_struct(y);
@@ -123,7 +123,7 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
                    const BatchMultiVector<ValueType>* const x,
                    BatchMultiVector<remove_complex<ValueType>>* const result)
 {
-    const auto num_blocks = x->get_num_batch_entries();
+    const auto num_blocks = x->get_num_batch_items();
     const auto num_rhs = x->get_common_size()[1];
     const auto x_ub = get_batch_struct(x);
     const auto res_ub = get_batch_struct(result);
@@ -140,7 +140,7 @@ void copy(std::shared_ptr<const DefaultExecutor> exec,
           const BatchMultiVector<ValueType>* x,
           BatchMultiVector<ValueType>* result)
 {
-    const auto num_blocks = x->get_num_batch_entries();
+    const auto num_blocks = x->get_num_batch_items();
     const auto result_ub = get_batch_struct(result);
     const auto x_ub = get_batch_struct(x);
     copy_kernel<<<num_blocks, default_block_size, 0, exec->get_stream()>>>(
diff --git a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
index 5e63f451d19..cdb25d318f0 100644
--- a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
+++ b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
@@ -33,8 +33,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 template <typename ValueType, typename Mapping>
 __device__ __forceinline__ void scale(
-    const gko::batch_multi_vector::batch_entry<const ValueType>& alpha,
-    const gko::batch_multi_vector::batch_entry<ValueType>& x, Mapping map)
+    const gko::batch_multi_vector::batch_item<const ValueType>& alpha,
+    const gko::batch_multi_vector::batch_item<ValueType>& x, Mapping map)
 {
     const int max_li = x.num_rows * x.num_rhs;
     for (int li = threadIdx.x; li < max_li; li += blockDim.x) {
@@ -57,10 +57,10 @@ __global__ __launch_bounds__(
                                                    x,
                                            Mapping map)
 {
-    for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_entries;
+    for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_items;
          batch_id += gridDim.x) {
-        const auto alpha_b = gko::batch::batch_entry(alpha, batch_id);
-        const auto x_b = gko::batch::batch_entry(x, batch_id);
+        const auto alpha_b = gko::batch::batch_item(alpha, batch_id);
+        const auto x_b = gko::batch::batch_item(x, batch_id);
         scale(alpha_b, x_b, map);
     }
 }
@@ -68,9 +68,9 @@ __global__ __launch_bounds__(
 
 template <typename ValueType, typename Mapping>
 __device__ __forceinline__ void add_scaled(
-    const gko::batch_multi_vector::batch_entry<const ValueType>& alpha,
-    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
-    const gko::batch_multi_vector::batch_entry<ValueType>& y, Mapping map)
+    const gko::batch_multi_vector::batch_item<const ValueType>& alpha,
+    const gko::batch_multi_vector::batch_item<const ValueType>& x,
+    const gko::batch_multi_vector::batch_item<ValueType>& y, Mapping map)
 {
     const int max_li = x.num_rows * x.num_rhs;
     for (int li = threadIdx.x; li < max_li; li += blockDim.x) {
@@ -98,11 +98,11 @@ __global__ __launch_bounds__(
                                                         y,
                                                 Mapping map)
 {
-    for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_entries;
+    for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_items;
          batch_id += gridDim.x) {
-        const auto alpha_b = gko::batch::batch_entry(alpha, batch_id);
-        const auto x_b = gko::batch::batch_entry(x, batch_id);
-        const auto y_b = gko::batch::batch_entry(y, batch_id);
+        const auto alpha_b = gko::batch::batch_item(alpha, batch_id);
+        const auto x_b = gko::batch::batch_item(x, batch_id);
+        const auto y_b = gko::batch::batch_item(y, batch_id);
         add_scaled(alpha_b, x_b, y_b, map);
     }
 }
@@ -110,10 +110,10 @@ __global__ __launch_bounds__(
 
 template <typename Group, typename ValueType, typename Mapping>
 __device__ __forceinline__ void gen_one_dot(
-    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
-    const gko::batch_multi_vector::batch_entry<const ValueType>& y,
+    const gko::batch_multi_vector::batch_item<const ValueType>& x,
+    const gko::batch_multi_vector::batch_item<const ValueType>& y,
     const int rhs_index,
-    const gko::batch_multi_vector::batch_entry<ValueType>& result,
+    const gko::batch_multi_vector::batch_item<ValueType>& result,
     Group subgroup, Mapping conj_map)
 {
     ValueType val = zero<ValueType>();
@@ -134,9 +134,9 @@ __device__ __forceinline__ void gen_one_dot(
 
 template <typename ValueType, typename Mapping>
 __device__ __forceinline__ void compute_gen_dot_product(
-    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
-    const gko::batch_multi_vector::batch_entry<const ValueType>& y,
-    const gko::batch_multi_vector::batch_entry<ValueType>& result,
+    const gko::batch_multi_vector::batch_item<const ValueType>& x,
+    const gko::batch_multi_vector::batch_item<const ValueType>& y,
+    const gko::batch_multi_vector::batch_item<ValueType>& result,
     Mapping conj_map)
 {
     constexpr auto tile_size = config::warp_size;
@@ -160,11 +160,11 @@ __global__
         const gko::batch_multi_vector::uniform_batch<ValueType> result,
         Mapping map)
 {
-    for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_entries;
+    for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_items;
          batch_id += gridDim.x) {
-        const auto x_b = gko::batch::batch_entry(x, batch_id);
-        const auto y_b = gko::batch::batch_entry(y, batch_id);
-        const auto r_b = gko::batch::batch_entry(result, batch_id);
+        const auto x_b = gko::batch::batch_item(x, batch_id);
+        const auto y_b = gko::batch::batch_item(y, batch_id);
+        const auto r_b = gko::batch::batch_item(result, batch_id);
         compute_gen_dot_product(x_b, y_b, r_b, map);
     }
 }
@@ -172,9 +172,9 @@ __global__
 
 template <typename Group, typename ValueType>
 __device__ __forceinline__ void one_norm2(
-    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
+    const gko::batch_multi_vector::batch_item<const ValueType>& x,
     const int rhs_index,
-    const gko::batch_multi_vector::batch_entry<remove_complex<ValueType>>&
+    const gko::batch_multi_vector::batch_item<remove_complex<ValueType>>&
         result,
     Group subgroup)
 {
@@ -202,8 +202,8 @@ __device__ __forceinline__ void one_norm2(
  */
 template <typename ValueType>
 __device__ __forceinline__ void compute_norm2(
-    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
-    const gko::batch_multi_vector::batch_entry<remove_complex<ValueType>>&
+    const gko::batch_multi_vector::batch_item<const ValueType>& x,
+    const gko::batch_multi_vector::batch_item<remove_complex<ValueType>>&
         result)
 {
     constexpr auto tile_size = config::warp_size;
@@ -234,10 +234,10 @@ __global__ __launch_bounds__(
                                                                    ValueType>>
                                                                result)
 {
-    for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_entries;
+    for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_items;
          batch_id += gridDim.x) {
-        const auto x_b = gko::batch::batch_entry(x, batch_id);
-        const auto r_b = gko::batch::batch_entry(result, batch_id);
+        const auto x_b = gko::batch::batch_item(x, batch_id);
+        const auto r_b = gko::batch::batch_item(result, batch_id);
         compute_norm2(x_b, r_b);
     }
 }
@@ -251,8 +251,8 @@ __global__ __launch_bounds__(
  */
 template <typename ValueType>
 __device__ __forceinline__ void copy(
-    const gko::batch_multi_vector::batch_entry<const ValueType>& in,
-    const gko::batch_multi_vector::batch_entry<ValueType>& out)
+    const gko::batch_multi_vector::batch_item<const ValueType>& in,
+    const gko::batch_multi_vector::batch_item<ValueType>& out)
 {
     for (int iz = threadIdx.x; iz < in.num_rows * in.num_rhs;
          iz += blockDim.x) {
@@ -269,10 +269,10 @@ __global__
         const gko::batch_multi_vector::uniform_batch<const ValueType> src,
         const gko::batch_multi_vector::uniform_batch<ValueType> dst)
 {
-    for (size_type batch_id = blockIdx.x; batch_id < src.num_batch_entries;
+    for (size_type batch_id = blockIdx.x; batch_id < src.num_batch_items;
          batch_id += gridDim.x) {
-        const auto dst_b = gko::batch::batch_entry(dst, batch_id);
-        const auto src_b = gko::batch::batch_entry(src, batch_id);
+        const auto dst_b = gko::batch::batch_item(dst, batch_id);
+        const auto src_b = gko::batch::batch_item(src, batch_id);
         copy(src_b, dst_b);
     }
 }
diff --git a/core/base/batch_multi_vector.cpp b/core/base/batch_multi_vector.cpp
index ac47260d82d..a843ee376c1 100644
--- a/core/base/batch_multi_vector.cpp
+++ b/core/base/batch_multi_vector.cpp
@@ -100,7 +100,7 @@ BatchMultiVector<ValueType>::BatchMultiVector(
       batch_size_{detail::compute_batch_size(matrices)},
       values_(exec, compute_num_elems(batch_size_))
 {
-    for (size_type i = 0; i < this->get_num_batch_entries(); ++i) {
+    for (size_type i = 0; i < this->get_num_batch_items(); ++i) {
         auto local_exec = matrices[i]->get_executor();
         exec->copy_from(
             local_exec.get(), matrices[i]->get_num_stored_elements(),
@@ -132,7 +132,7 @@ BatchMultiVector<ValueType>::BatchMultiVector(
     std::shared_ptr<const Executor> exec, size_type num_duplications,
     const BatchMultiVector<value_type>* input)
     : BatchMultiVector<ValueType>(
-          exec, batch_dim<2>(input->get_num_batch_entries() * num_duplications,
+          exec, batch_dim<2>(input->get_num_batch_items() * num_duplications,
                              input->get_common_size()))
 {
     size_type offset = 0;
@@ -165,7 +165,7 @@ BatchMultiVector<ValueType>::unbatch() const
     using unbatch_type = matrix::Dense<ValueType>;
     auto exec = this->get_executor();
     auto unbatch_mats = std::vector<std::unique_ptr<unbatch_type>>{};
-    for (size_type b = 0; b < this->get_num_batch_entries(); ++b) {
+    for (size_type b = 0; b < this->get_num_batch_items(); ++b) {
         auto mat = unbatch_type::create(exec, this->get_common_size());
         exec->copy_from(exec.get(), mat->get_num_stored_elements(),
                         this->get_const_values() +
@@ -218,8 +218,7 @@ template <typename ValueType>
 void BatchMultiVector<ValueType>::scale(
     ptr_param<const BatchMultiVector<ValueType>> alpha)
 {
-    GKO_ASSERT_EQ(alpha->get_num_batch_entries(),
-                  this->get_num_batch_entries());
+    GKO_ASSERT_EQ(alpha->get_num_batch_items(), this->get_num_batch_items());
     GKO_ASSERT_EQUAL_ROWS(alpha->get_common_size(), dim<2>(1, 1));
     if (alpha->get_common_size()[1] != 1) {
         // different alpha for each column
@@ -237,15 +236,14 @@ void BatchMultiVector<ValueType>::add_scaled(
     ptr_param<const BatchMultiVector<ValueType>> alpha,
     ptr_param<const BatchMultiVector<ValueType>> b)
 {
-    GKO_ASSERT_EQ(alpha->get_num_batch_entries(),
-                  this->get_num_batch_entries());
+    GKO_ASSERT_EQ(alpha->get_num_batch_items(), this->get_num_batch_items());
     GKO_ASSERT_EQUAL_ROWS(alpha->get_common_size(), dim<2>(1, 1));
     if (alpha->get_common_size()[1] != 1) {
         // different alpha for each column
         GKO_ASSERT_EQUAL_COLS(this->get_common_size(),
                               alpha->get_common_size());
     }
-    GKO_ASSERT_EQ(b->get_num_batch_entries(), this->get_num_batch_entries());
+    GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items());
     GKO_ASSERT_EQUAL_DIMENSIONS(this->get_common_size(), b->get_common_size());
 
     auto exec = this->get_executor();
@@ -257,7 +255,7 @@ void BatchMultiVector<ValueType>::add_scaled(
 
 inline const batch_dim<2> get_col_sizes(const batch_dim<2>& sizes)
 {
-    return batch_dim<2>(sizes.get_num_batch_entries(),
+    return batch_dim<2>(sizes.get_num_batch_items(),
                         dim<2>(1, sizes.get_common_size()[1]));
 }
 
@@ -267,10 +265,9 @@ void BatchMultiVector<ValueType>::compute_conj_dot(
     ptr_param<const BatchMultiVector<ValueType>> b,
     ptr_param<BatchMultiVector<ValueType>> result) const
 {
-    GKO_ASSERT_EQ(b->get_num_batch_entries(), this->get_num_batch_entries());
+    GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items());
     GKO_ASSERT_EQUAL_DIMENSIONS(this->get_common_size(), b->get_common_size());
-    GKO_ASSERT_EQ(this->get_num_batch_entries(),
-                  result->get_num_batch_entries());
+    GKO_ASSERT_EQ(this->get_num_batch_items(), result->get_num_batch_items());
     GKO_ASSERT_EQUAL_DIMENSIONS(
         result->get_common_size(),
         get_col_sizes(this->get_size()).get_common_size());
@@ -286,10 +283,9 @@ void BatchMultiVector<ValueType>::compute_dot(
     ptr_param<const BatchMultiVector<ValueType>> b,
     ptr_param<BatchMultiVector<ValueType>> result) const
 {
-    GKO_ASSERT_EQ(b->get_num_batch_entries(), this->get_num_batch_entries());
+    GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items());
     GKO_ASSERT_EQUAL_DIMENSIONS(this->get_common_size(), b->get_common_size());
-    GKO_ASSERT_EQ(this->get_num_batch_entries(),
-                  result->get_num_batch_entries());
+    GKO_ASSERT_EQ(this->get_num_batch_items(), result->get_num_batch_items());
     GKO_ASSERT_EQUAL_DIMENSIONS(
         result->get_common_size(),
         get_col_sizes(this->get_size()).get_common_size());
@@ -304,8 +300,7 @@ template <typename ValueType>
 void BatchMultiVector<ValueType>::compute_norm2(
     ptr_param<BatchMultiVector<remove_complex<ValueType>>> result) const
 {
-    GKO_ASSERT_EQ(this->get_num_batch_entries(),
-                  result->get_num_batch_entries());
+    GKO_ASSERT_EQ(this->get_num_batch_items(), result->get_num_batch_items());
     GKO_ASSERT_EQUAL_DIMENSIONS(
         result->get_common_size(),
         get_col_sizes(this->get_size()).get_common_size());
@@ -375,8 +370,8 @@ void write_impl(const MatrixType* mtx, std::vector<MatrixData>& data)
 {
     auto tmp = make_temporary_clone(mtx->get_executor()->get_master(), mtx);
 
-    data = std::vector<MatrixData>(mtx->get_num_batch_entries());
-    for (size_type b = 0; b < mtx->get_num_batch_entries(); ++b) {
+    data = std::vector<MatrixData>(mtx->get_num_batch_items());
+    for (size_type b = 0; b < mtx->get_num_batch_items(); ++b) {
         data[b] = {mtx->get_common_size(), {}};
         for (size_type row = 0; row < data[b].size[0]; ++row) {
             for (size_type col = 0; col < data[b].size[1]; ++col) {
diff --git a/core/base/batch_struct.hpp b/core/base/batch_struct.hpp
index ea1b3ef3f3f..9549c4eaaee 100644
--- a/core/base/batch_struct.hpp
+++ b/core/base/batch_struct.hpp
@@ -47,7 +47,7 @@ namespace batch_multi_vector {
  * Encapsulates one matrix from a batch of multi-vectors.
  */
 template <typename ValueType>
-struct batch_entry {
+struct batch_item {
     using value_type = ValueType;
     ValueType* values;
     int stride;
@@ -62,10 +62,10 @@ struct batch_entry {
 template <typename ValueType>
 struct uniform_batch {
     using value_type = ValueType;
-    using entry_type = batch_entry<ValueType>;
+    using entry_type = batch_item<ValueType>;
 
     ValueType* values;
-    size_type num_batch_entries;
+    size_type num_batch_items;
     int stride;
     int num_rows;
     int num_rhs;
@@ -84,8 +84,8 @@ namespace batch {
 
 
 template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE gko::batch_multi_vector::batch_entry<const ValueType>
-to_const(const gko::batch_multi_vector::batch_entry<ValueType>& b)
+GKO_ATTRIBUTES GKO_INLINE gko::batch_multi_vector::batch_item<const ValueType>
+to_const(const gko::batch_multi_vector::batch_item<ValueType>& b)
 {
     return {b.values, b.stride, b.num_rows, b.num_rhs};
 }
@@ -96,8 +96,7 @@ GKO_ATTRIBUTES GKO_INLINE
     gko::batch_multi_vector::uniform_batch<const ValueType>
     to_const(const gko::batch_multi_vector::uniform_batch<ValueType>& ub)
 {
-    return {ub.values, ub.num_batch_entries, ub.stride, ub.num_rows,
-            ub.num_rhs};
+    return {ub.values, ub.num_batch_items, ub.stride, ub.num_rows, ub.num_rhs};
 }
 
 
@@ -111,18 +110,18 @@ GKO_ATTRIBUTES GKO_INLINE
  * @param batch_idx  The position of the desired object in the batch
  */
 template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE batch_multi_vector::batch_entry<ValueType>
-batch_entry(const batch_multi_vector::uniform_batch<ValueType>& batch,
-            const size_type batch_idx)
+GKO_ATTRIBUTES GKO_INLINE batch_multi_vector::batch_item<ValueType> batch_item(
+    const batch_multi_vector::uniform_batch<ValueType>& batch,
+    const size_type batch_idx)
 {
     return {batch.values + batch_idx * batch.stride * batch.num_rows,
             batch.stride, batch.num_rows, batch.num_rhs};
 }
 
 template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE batch_multi_vector::batch_entry<ValueType>
-batch_entry(ValueType* const batch_values, const int stride, const int num_rows,
-            const int num_rhs, const size_type batch_idx)
+GKO_ATTRIBUTES GKO_INLINE batch_multi_vector::batch_item<ValueType> batch_item(
+    ValueType* const batch_values, const int stride, const int num_rows,
+    const int num_rhs, const size_type batch_idx)
 {
     return {batch_values + batch_idx * stride * num_rows, stride, num_rows,
             num_rhs};
diff --git a/core/test/base/batch_dim.cpp b/core/test/base/batch_dim.cpp
index 71b954264c3..7914eb4d15e 100644
--- a/core/test/base/batch_dim.cpp
+++ b/core/test/base/batch_dim.cpp
@@ -43,7 +43,7 @@ TEST(BatchDim, ConstructsCorrectUniformObject)
 {
     gko::batch_dim<2> d{4, gko::dim<2>(5)};
 
-    ASSERT_EQ(d.get_num_batch_entries(), 4);
+    ASSERT_EQ(d.get_num_batch_items(), 4);
     ASSERT_EQ(d.get_common_size(), gko::dim<2>(5));
 }
 
@@ -52,7 +52,7 @@ TEST(BatchDim, ConstructsNullObject)
 {
     gko::batch_dim<2> d{};
 
-    ASSERT_EQ(d.get_num_batch_entries(), 0);
+    ASSERT_EQ(d.get_num_batch_items(), 0);
     ASSERT_EQ(d.get_common_size(), gko::dim<2>{});
 }
 
diff --git a/core/test/base/batch_multi_vector.cpp b/core/test/base/batch_multi_vector.cpp
index 486a8301cf6..844d4825a7a 100644
--- a/core/test/base/batch_multi_vector.cpp
+++ b/core/test/base/batch_multi_vector.cpp
@@ -64,7 +64,7 @@ class BatchMultiVector : public ::testing::Test {
     {
         ASSERT_NE(m->get_const_values(), nullptr);
         EXPECT_EQ(m->get_const_values()[0], value_type{-1.0});
-        ASSERT_EQ(m->get_num_batch_entries(), 2);
+        ASSERT_EQ(m->get_num_batch_items(), 2);
         ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 3));
         EXPECT_EQ(m->at(0, 0, 0), value_type{-1.0});
         EXPECT_EQ(m->at(0, 0, 1), value_type{2.0});
@@ -82,7 +82,7 @@ class BatchMultiVector : public ::testing::Test {
 
     static void assert_empty(gko::BatchMultiVector<value_type>* m)
     {
-        ASSERT_EQ(m->get_num_batch_entries(), 0);
+        ASSERT_EQ(m->get_num_batch_items(), 0);
         ASSERT_EQ(m->get_common_size(), gko::dim<2>{});
         ASSERT_EQ(m->get_const_values(), nullptr);
     }
@@ -114,7 +114,7 @@ TYPED_TEST(BatchMultiVector, CanGetValuesForEntry)
 {
     using value_type = typename TestFixture::value_type;
 
-    ASSERT_EQ(this->mtx->get_values_for_entry(1)[0], value_type{1.0});
+    ASSERT_EQ(this->mtx->get_values_for_item(1)[0], value_type{1.0});
 }
 
 
@@ -165,7 +165,7 @@ TYPED_TEST(BatchMultiVector, CanBeConstructedWithSize)
     auto m = gko::BatchMultiVector<TypeParam>::create(
         this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 4)));
 
-    ASSERT_EQ(m->get_num_batch_entries(), 2);
+    ASSERT_EQ(m->get_num_batch_items(), 2);
     ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 4));
 }
 
@@ -294,7 +294,7 @@ TYPED_TEST(BatchMultiVector, CanBeListConstructed)
     auto m = gko::batch_initialize<gko::BatchMultiVector<TypeParam>>(
         {{1.0, 2.0}, {1.0, 3.0}}, this->exec);
 
-    ASSERT_EQ(m->get_num_batch_entries(), 2);
+    ASSERT_EQ(m->get_num_batch_items(), 2);
     ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 1));
     EXPECT_EQ(m->at(0, 0), value_type{1});
     EXPECT_EQ(m->at(0, 1), value_type{2});
@@ -310,7 +310,7 @@ TYPED_TEST(BatchMultiVector, CanBeListConstructedByCopies)
     auto m = gko::batch_initialize<gko::BatchMultiVector<TypeParam>>(
         2, I<value_type>({1.0, 2.0}), this->exec);
 
-    ASSERT_EQ(m->get_num_batch_entries(), 2);
+    ASSERT_EQ(m->get_num_batch_items(), 2);
     ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 1));
     EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
     EXPECT_EQ(m->at(0, 0, 1), value_type{2.0});
@@ -351,7 +351,7 @@ TYPED_TEST(BatchMultiVector, CanBeFilledWithValue)
 
     m->fill(value_type(2.0));
 
-    ASSERT_EQ(m->get_num_batch_entries(), 2);
+    ASSERT_EQ(m->get_num_batch_items(), 2);
     ASSERT_EQ(m->get_common_size(), gko::dim<2>(3, 1));
     EXPECT_EQ(m->at(0, 0, 0), value_type{2.0});
     EXPECT_EQ(m->at(0, 0, 1), value_type{2.0});
diff --git a/cuda/base/batch_struct.hpp b/cuda/base/batch_struct.hpp
index 70bc42aecac..600cccc622b 100644
--- a/cuda/base/batch_struct.hpp
+++ b/cuda/base/batch_struct.hpp
@@ -65,7 +65,7 @@ template <typename ValueType>
 inline gko::batch_multi_vector::uniform_batch<const cuda_type<ValueType>>
 get_batch_struct(const BatchMultiVector<ValueType>* const op)
 {
-    return {as_cuda_type(op->get_const_values()), op->get_num_batch_entries(),
+    return {as_cuda_type(op->get_const_values()), op->get_num_batch_items(),
             static_cast<int>(op->get_common_size()[1]),
             static_cast<int>(op->get_common_size()[0]),
             static_cast<int>(op->get_common_size()[1])};
@@ -78,7 +78,7 @@ template <typename ValueType>
 inline gko::batch_multi_vector::uniform_batch<cuda_type<ValueType>>
 get_batch_struct(BatchMultiVector<ValueType>* const op)
 {
-    return {as_cuda_type(op->get_values()), op->get_num_batch_entries(),
+    return {as_cuda_type(op->get_values()), op->get_num_batch_items(),
             static_cast<int>(op->get_common_size()[1]),
             static_cast<int>(op->get_common_size()[0]),
             static_cast<int>(op->get_common_size()[1])};
diff --git a/dpcpp/base/batch_multi_vector_kernels.dp.cpp b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
index 85870a91df7..3542fc5ebad 100644
--- a/dpcpp/base/batch_multi_vector_kernels.dp.cpp
+++ b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
@@ -76,7 +76,7 @@ void scale(std::shared_ptr<const DefaultExecutor> exec,
     const auto alpha_ub = get_batch_struct(alpha);
     const auto x_ub = get_batch_struct(x);
 
-    const auto num_batches = x_ub.num_batch_entries;
+    const auto num_batches = x_ub.num_batch_items;
     auto device = exec->get_queue()->get_device();
     auto group_size =
         device.get_info<sycl::info::device::max_work_group_size>();
@@ -91,8 +91,8 @@ void scale(std::shared_ptr<const DefaultExecutor> exec,
                 sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
                     auto group = item_ct1.get_group();
                     auto group_id = group.get_group_linear_id();
-                    const auto alpha_b = batch::batch_entry(alpha_ub, group_id);
-                    const auto x_b = batch::batch_entry(x_ub, group_id);
+                    const auto alpha_b = batch::batch_item(alpha_ub, group_id);
+                    const auto x_b = batch::batch_item(x_ub, group_id);
                     scale_kernel(alpha_b, x_b, item_ct1,
                                  [](int col) { return 0; });
                 });
@@ -103,8 +103,8 @@ void scale(std::shared_ptr<const DefaultExecutor> exec,
                 sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
                     auto group = item_ct1.get_group();
                     auto group_id = group.get_group_linear_id();
-                    const auto alpha_b = batch::batch_entry(alpha_ub, group_id);
-                    const auto x_b = batch::batch_entry(x_ub, group_id);
+                    const auto alpha_b = batch::batch_item(alpha_ub, group_id);
+                    const auto x_b = batch::batch_item(x_ub, group_id);
                     scale_kernel(alpha_b, x_b, item_ct1,
                                  [](int col) { return col; });
                 });
@@ -125,7 +125,7 @@ void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
     const size_type num_rows = x->get_common_size()[0];
     const size_type num_cols = x->get_common_size()[1];
 
-    const auto num_batches = x->get_num_batch_entries();
+    const auto num_batches = x->get_num_batch_items();
     auto device = exec->get_queue()->get_device();
     auto group_size =
         device.get_info<sycl::info::device::max_work_group_size>();
@@ -141,9 +141,9 @@ void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
                 sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
                     auto group = item_ct1.get_group();
                     auto group_id = group.get_group_linear_id();
-                    const auto alpha_b = batch::batch_entry(alpha_ub, group_id);
-                    const auto x_b = batch::batch_entry(x_ub, group_id);
-                    const auto y_b = batch::batch_entry(y_ub, group_id);
+                    const auto alpha_b = batch::batch_item(alpha_ub, group_id);
+                    const auto x_b = batch::batch_item(x_ub, group_id);
+                    const auto y_b = batch::batch_item(y_ub, group_id);
                     add_scaled_kernel(alpha_b, x_b, y_b, item_ct1,
                                       [](auto col) { return 0; });
                 });
@@ -154,9 +154,9 @@ void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
                 sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
                     auto group = item_ct1.get_group();
                     auto group_id = group.get_group_linear_id();
-                    const auto alpha_b = batch::batch_entry(alpha_ub, group_id);
-                    const auto x_b = batch::batch_entry(x_ub, group_id);
-                    const auto y_b = batch::batch_entry(y_ub, group_id);
+                    const auto alpha_b = batch::batch_item(alpha_ub, group_id);
+                    const auto x_b = batch::batch_item(x_ub, group_id);
+                    const auto y_b = batch::batch_item(y_ub, group_id);
                     add_scaled_kernel(alpha_b, x_b, y_b, item_ct1,
                                       [](auto col) { return col; });
                 });
@@ -178,7 +178,7 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
     const auto y_ub = get_batch_struct(y);
     const auto res_ub = get_batch_struct(result);
 
-    const auto num_batches = x_ub.num_batch_entries;
+    const auto num_batches = x_ub.num_batch_items;
     auto device = exec->get_queue()->get_device();
     auto group_size =
         device.get_info<sycl::info::device::max_work_group_size>();
@@ -189,18 +189,17 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
     // TODO: Remove reqd_sub_group size and use sycl::reduce_over_group
     exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1)
-                [[sycl::reqd_sub_group_size(config::warp_size)]] {
-                    auto group = item_ct1.get_group();
-                    auto group_id = group.get_group_linear_id();
-                    const auto x_b = batch::batch_entry(x_ub, group_id);
-                    const auto y_b = batch::batch_entry(y_ub, group_id);
-                    const auto res_b = batch::batch_entry(res_ub, group_id);
-                    compute_gen_dot_product_kernel(
-                        x_b, y_b, res_b, item_ct1,
-                        [](auto val) { return val; });
-                });
+            sycl_nd_range(grid, block), [=
+        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                            config::warp_size)]] {
+                auto group = item_ct1.get_group();
+                auto group_id = group.get_group_linear_id();
+                const auto x_b = batch::batch_item(x_ub, group_id);
+                const auto y_b = batch::batch_item(y_ub, group_id);
+                const auto res_b = batch::batch_item(res_ub, group_id);
+                compute_gen_dot_product_kernel(x_b, y_b, res_b, item_ct1,
+                                               [](auto val) { return val; });
+            });
     });
 }
 
@@ -218,7 +217,7 @@ void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
     const auto y_ub = get_batch_struct(y);
     const auto res_ub = get_batch_struct(result);
 
-    const auto num_batches = x_ub.num_batch_entries;
+    const auto num_batches = x_ub.num_batch_items;
     auto device = exec->get_queue()->get_device();
     auto group_size =
         device.get_info<sycl::info::device::max_work_group_size>();
@@ -228,18 +227,18 @@ void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
 
     exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1)
-                [[sycl::reqd_sub_group_size(config::warp_size)]] {
-                    auto group = item_ct1.get_group();
-                    auto group_id = group.get_group_linear_id();
-                    const auto x_b = batch::batch_entry(x_ub, group_id);
-                    const auto y_b = batch::batch_entry(y_ub, group_id);
-                    const auto res_b = batch::batch_entry(res_ub, group_id);
-                    compute_gen_dot_product_kernel(
-                        x_b, y_b, res_b, item_ct1,
-                        [](auto val) { return conj(val); });
-                });
+            sycl_nd_range(grid, block), [=
+        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                            config::warp_size)]] {
+                auto group = item_ct1.get_group();
+                auto group_id = group.get_group_linear_id();
+                const auto x_b = batch::batch_item(x_ub, group_id);
+                const auto y_b = batch::batch_item(y_ub, group_id);
+                const auto res_b = batch::batch_item(res_ub, group_id);
+                compute_gen_dot_product_kernel(
+                    x_b, y_b, res_b, item_ct1,
+                    [](auto val) { return conj(val); });
+            });
     });
 }
 
@@ -255,7 +254,7 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
     const auto x_ub = get_batch_struct(x);
     const auto res_ub = get_batch_struct(result);
 
-    const auto num_batches = x_ub.num_batch_entries;
+    const auto num_batches = x_ub.num_batch_items;
     auto device = exec->get_queue()->get_device();
     auto group_size =
         device.get_info<sycl::info::device::max_work_group_size>();
@@ -265,15 +264,15 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
 
     exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1)
-                [[sycl::reqd_sub_group_size(config::warp_size)]] {
-                    auto group = item_ct1.get_group();
-                    auto group_id = group.get_group_linear_id();
-                    const auto x_b = batch::batch_entry(x_ub, group_id);
-                    const auto res_b = batch::batch_entry(res_ub, group_id);
-                    compute_norm2_kernel(x_b, res_b, item_ct1);
-                });
+            sycl_nd_range(grid, block), [=
+        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                            config::warp_size)]] {
+                auto group = item_ct1.get_group();
+                auto group_id = group.get_group_linear_id();
+                const auto x_b = batch::batch_item(x_ub, group_id);
+                const auto res_b = batch::batch_item(res_ub, group_id);
+                compute_norm2_kernel(x_b, res_b, item_ct1);
+            });
     });
 }
 
@@ -289,7 +288,7 @@ void copy(std::shared_ptr<const DefaultExecutor> exec,
     const auto x_ub = get_batch_struct(x);
     const auto result_ub = get_batch_struct(result);
 
-    const auto num_batches = x_ub.num_batch_entries;
+    const auto num_batches = x_ub.num_batch_items;
     auto device = exec->get_queue()->get_device();
     auto group_size =
         device.get_info<sycl::info::device::max_work_group_size>();
@@ -302,8 +301,8 @@ void copy(std::shared_ptr<const DefaultExecutor> exec,
             sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
                 auto group = item_ct1.get_group();
                 auto group_id = group.get_group_linear_id();
-                const auto x_b = batch::batch_entry(x_ub, group_id);
-                const auto result_b = batch::batch_entry(result_ub, group_id);
+                const auto x_b = batch::batch_item(x_ub, group_id);
+                const auto result_b = batch::batch_item(result_ub, group_id);
                 copy_kernel(x_b, result_b, item_ct1);
             });
     });
diff --git a/dpcpp/base/batch_multi_vector_kernels.hpp.inc b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
index 7dfe13d0fda..c328a50465a 100644
--- a/dpcpp/base/batch_multi_vector_kernels.hpp.inc
+++ b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
@@ -32,8 +32,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 template <typename ValueType, typename Mapping>
 __dpct_inline__ void scale_kernel(
-    const gko::batch_multi_vector::batch_entry<const ValueType>& alpha,
-    const gko::batch_multi_vector::batch_entry<ValueType>& x,
+    const gko::batch_multi_vector::batch_item<const ValueType>& alpha,
+    const gko::batch_multi_vector::batch_item<ValueType>& x,
     sycl::nd_item<3>& item_ct1, Mapping map)
 {
     const int max_li = x.num_rows * x.num_rhs;
@@ -50,9 +50,9 @@ __dpct_inline__ void scale_kernel(
 
 template <typename ValueType, typename Mapping>
 __dpct_inline__ void add_scaled_kernel(
-    const gko::batch_multi_vector::batch_entry<const ValueType>& alpha,
-    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
-    const gko::batch_multi_vector::batch_entry<ValueType>& y,
+    const gko::batch_multi_vector::batch_item<const ValueType>& alpha,
+    const gko::batch_multi_vector::batch_item<const ValueType>& x,
+    const gko::batch_multi_vector::batch_item<ValueType>& y,
     sycl::nd_item<3>& item_ct1, Mapping map)
 {
     const int max_li = x.num_rows * x.num_rhs;
@@ -69,9 +69,9 @@ __dpct_inline__ void add_scaled_kernel(
 
 template <typename ValueType, typename Mapping>
 __dpct_inline__ void compute_gen_dot_product_kernel(
-    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
-    const gko::batch_multi_vector::batch_entry<const ValueType>& y,
-    const gko::batch_multi_vector::batch_entry<ValueType>& result,
+    const gko::batch_multi_vector::batch_item<const ValueType>& x,
+    const gko::batch_multi_vector::batch_item<const ValueType>& y,
+    const gko::batch_multi_vector::batch_item<ValueType>& result,
     sycl::nd_item<3>& item_ct1, Mapping conj_map)
 {
     constexpr auto tile_size = config::warp_size;
@@ -104,8 +104,8 @@ __dpct_inline__ void compute_gen_dot_product_kernel(
 
 template <typename ValueType>
 __dpct_inline__ void compute_norm2_kernel(
-    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
-    const gko::batch_multi_vector::batch_entry<remove_complex<ValueType>>&
+    const gko::batch_multi_vector::batch_item<const ValueType>& x,
+    const gko::batch_multi_vector::batch_item<remove_complex<ValueType>>&
         result,
     sycl::nd_item<3>& item_ct1)
 {
@@ -138,8 +138,8 @@ __dpct_inline__ void compute_norm2_kernel(
 
 template <typename ValueType>
 __dpct_inline__ void copy_kernel(
-    const gko::batch_multi_vector::batch_entry<const ValueType>& in,
-    const gko::batch_multi_vector::batch_entry<ValueType>& out,
+    const gko::batch_multi_vector::batch_item<const ValueType>& in,
+    const gko::batch_multi_vector::batch_item<ValueType>& out,
     sycl::nd_item<3>& item_ct1)
 {
     for (int iz = item_ct1.get_local_linear_id(); iz < in.num_rows * in.num_rhs;
diff --git a/dpcpp/base/batch_struct.hpp b/dpcpp/base/batch_struct.hpp
index 4f8d8aa0350..ff3a6a87ade 100644
--- a/dpcpp/base/batch_struct.hpp
+++ b/dpcpp/base/batch_struct.hpp
@@ -64,7 +64,7 @@ template <typename ValueType>
 inline gko::batch_multi_vector::uniform_batch<const ValueType> get_batch_struct(
     const BatchMultiVector<ValueType>* const op)
 {
-    return {op->get_const_values(), op->get_num_batch_entries(),
+    return {op->get_const_values(), op->get_num_batch_items(),
             static_cast<int>(op->get_common_size()[1]),
             static_cast<int>(op->get_common_size()[0]),
             static_cast<int>(op->get_common_size()[1])};
@@ -78,7 +78,7 @@ template <typename ValueType>
 inline gko::batch_multi_vector::uniform_batch<ValueType> get_batch_struct(
     BatchMultiVector<ValueType>* const op)
 {
-    return {op->get_values(), op->get_num_batch_entries(),
+    return {op->get_values(), op->get_num_batch_items(),
             static_cast<int>(op->get_common_size()[1]),
             static_cast<int>(op->get_common_size()[0]),
             static_cast<int>(op->get_common_size()[1])};
diff --git a/hip/base/batch_struct.hip.hpp b/hip/base/batch_struct.hip.hpp
index 55f81f7eaff..1732505bc6f 100644
--- a/hip/base/batch_struct.hip.hpp
+++ b/hip/base/batch_struct.hip.hpp
@@ -65,7 +65,7 @@ template <typename ValueType>
 inline gko::batch_multi_vector::uniform_batch<const hip_type<ValueType>>
 get_batch_struct(const BatchMultiVector<ValueType>* const op)
 {
-    return {as_hip_type(op->get_const_values()), op->get_num_batch_entries(),
+    return {as_hip_type(op->get_const_values()), op->get_num_batch_items(),
             static_cast<int>(op->get_common_size()[1]),
             static_cast<int>(op->get_common_size()[0]),
             static_cast<int>(op->get_common_size()[1])};
@@ -78,7 +78,7 @@ template <typename ValueType>
 inline gko::batch_multi_vector::uniform_batch<hip_type<ValueType>>
 get_batch_struct(BatchMultiVector<ValueType>* const op)
 {
-    return {as_hip_type(op->get_values()), op->get_num_batch_entries(),
+    return {as_hip_type(op->get_values()), op->get_num_batch_items(),
             static_cast<int>(op->get_common_size()[1]),
             static_cast<int>(op->get_common_size()[0]),
             static_cast<int>(op->get_common_size()[1])};
diff --git a/include/ginkgo/core/base/batch_dim.hpp b/include/ginkgo/core/base/batch_dim.hpp
index 6d840f2ee86..3bda352fb9d 100644
--- a/include/ginkgo/core/base/batch_dim.hpp
+++ b/include/ginkgo/core/base/batch_dim.hpp
@@ -58,14 +58,14 @@ struct batch_dim {
     using dimension_type = DimensionType;
 
     /**
-     * Get the number of batch entries stored
+     * Get the number of batch items stored
      *
-     * @return num_batch_entries
+     * @return num_batch_items
      */
-    size_type get_num_batch_entries() const { return num_batch_entries_; }
+    size_type get_num_batch_items() const { return num_batch_items_; }
 
     /**
-     * Get the common size of the batches
+     * Get the common size of the batch items
      *
      * @return common_size
      */
@@ -96,7 +96,7 @@ struct batch_dim {
      */
     friend bool operator==(const batch_dim& x, const batch_dim& y)
     {
-        return x.num_batch_entries_ == y.num_batch_entries_ &&
+        return x.num_batch_items_ == y.num_batch_items_ &&
                x.common_size_ == y.common_size_;
     }
 
@@ -120,29 +120,29 @@ struct batch_dim {
 
 
     /**
-     * The default empty constructor
+     * The default constructor
      */
     batch_dim()
         : common_size_(dim<dimensionality, dimension_type>{}),
-          num_batch_entries_(0)
+          num_batch_items_(0)
     {}
 
     /**
      * Creates a batch_dim object which stores a uniform size for all batch
      * entries.
      *
-     * @param num_batch_entries  the number of batch entries to be stored
-     * @param common_size  the common size of all the batch entries stored
+     * @param num_batch_items  the number of batch items to be stored
+     * @param common_size  the common size of all the batch items stored
      *
      * @note  Use this constructor when uniform batches need to be stored.
      */
-    explicit batch_dim(const size_type num_batch_entries,
+    explicit batch_dim(const size_type num_batch_items,
                        const dim<dimensionality, dimension_type>& common_size)
-        : common_size_(common_size), num_batch_entries_(num_batch_entries)
+        : common_size_(common_size), num_batch_items_(num_batch_items)
     {}
 
 private:
-    size_type num_batch_entries_{};
+    size_type num_batch_items_{};
     dim<dimensionality, dimension_type> common_size_{};
 };
 
@@ -160,7 +160,7 @@ template <typename DimensionType>
 inline batch_dim<2, DimensionType> transpose(
     const batch_dim<2, DimensionType>& input)
 {
-    return batch_dim<2, DimensionType>(input.get_num_batch_entries(),
+    return batch_dim<2, DimensionType>(input.get_num_batch_items(),
                                        transpose(input.get_common_size()));
 }
 
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index c5cc0040047..a502a701307 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -55,7 +55,7 @@ namespace gko {
 
 /**
  * BatchMultiVector stores multiple vectors in a batched fashion and is useful
- * for batched operations. For example, if you want to store two batch entries
+ * for batched operations. For example, if you want to store two batch items
  * with multi-vectors of size (3 x 2) given below:
  *
  * [1 2 ; 3 4
@@ -66,10 +66,10 @@ namespace gko {
  * [1 2 1 2 1 2 3 4 3 4 3 4].
  *
  * Access functions @at can help access individual
- * entries if necessary.
+ * item if necessary.
  *
- * The values of the batches are stored consecutively and in each batch, the
- * vectors are stored in a row-major fashion.
+ * The values of the different batch items are stored consecutively and in each
+ * batch item, the multi-vectors are stored in a row-major fashion.
  *
  * @tparam ValueType  precision of multi-vector elements
  *
@@ -151,17 +151,17 @@ class BatchMultiVector
     batch_dim<2> get_size() const { return batch_size_; }
 
     /**
-     * Returns the number of batch entries.
+     * Returns the number of batch items.
      *
-     * @return the number of batch entries
+     * @return the number of batch items
      */
-    size_type get_num_batch_entries() const
+    size_type get_num_batch_items() const
     {
-        return batch_size_.get_num_batch_entries();
+        return batch_size_.get_num_batch_items();
     }
 
     /**
-     * Returns the common size of the batch entries.
+     * Returns the common size of the batch items.
      *
      * @return the common size stored
      */
@@ -188,40 +188,40 @@ class BatchMultiVector
 
     /**
      * Returns a pointer to the array of values of the multi-vector for a
-     * specific batch entry.
+     * specific batch item.
      *
-     * @param batch_id  the id of the batch entry.
+     * @param batch_id  the id of the batch item.
      *
      * @return the pointer to the array of values
      */
-    value_type* get_values_for_entry(size_type batch_id) noexcept
+    value_type* get_values_for_item(size_type batch_id) noexcept
     {
-        GKO_ASSERT(batch_id < this->get_num_batch_entries());
+        GKO_ASSERT(batch_id < this->get_num_batch_items());
         return values_.get_data() +
                this->get_size().get_cumulative_offset(batch_id);
     }
 
     /**
-     * @copydoc get_values_at_entry(size_type)
+     * @copydoc get_values_for_item(size_type)
      *
      * @note This is the constant version of the function, which can be
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const value_type* get_const_values_for_entry(
+    const value_type* get_const_values_for_item(
         size_type batch_id) const noexcept
     {
-        GKO_ASSERT(batch_id < this->get_num_batch_entries());
+        GKO_ASSERT(batch_id < this->get_num_batch_items());
         return values_.get_const_data() +
                this->get_size().get_cumulative_offset(batch_id);
     }
 
     /**
      * Returns the number of elements explicitly stored in the batch matrix,
-     * cumulative across all the batch entries.
+     * cumulative across all the batch items.
      *
      * @return the number of elements explicitly stored in the vector,
-     *         cumulative across all the batch entries
+     *         cumulative across all the batch items
      */
     size_type get_num_stored_elements() const noexcept
     {
@@ -229,9 +229,9 @@ class BatchMultiVector
     }
 
     /**
-     * Returns a single element for a particular batch entry.
+     * Returns a single element for a particular batch item.
      *
-     * @param batch  the batch index to be queried
+     * @param batch_id  the batch item index to be queried
      * @param row  the row of the requested element
      * @param col  the column of the requested element
      *
@@ -239,29 +239,29 @@ class BatchMultiVector
      *        stored at (e.g. trying to call this method on a GPU multi-vector
      *        from the OMP results in a runtime error)
      */
-    value_type& at(size_type batch, size_type row, size_type col)
+    value_type& at(size_type batch_id, size_type row, size_type col)
     {
-        GKO_ASSERT(batch < this->get_num_batch_entries());
-        return values_.get_data()[linearize_index(batch, row, col)];
+        GKO_ASSERT(batch_id < this->get_num_batch_items());
+        return values_.get_data()[linearize_index(batch_id, row, col)];
     }
 
     /**
      * @copydoc BatchMultiVector::at(size_type, size_type, size_type)
      */
-    value_type at(size_type batch, size_type row, size_type col) const
+    value_type at(size_type batch_id, size_type row, size_type col) const
     {
-        GKO_ASSERT(batch < this->get_num_batch_entries());
-        return values_.get_const_data()[linearize_index(batch, row, col)];
+        GKO_ASSERT(batch_id < this->get_num_batch_items());
+        return values_.get_const_data()[linearize_index(batch_id, row, col)];
     }
 
     /**
-     * Returns a single element for a particular batch entry.
+     * Returns a single element for a particular batch item.
      *
      * Useful for iterating across all elements of the vector.
      * However, it is less efficient than the two-parameter variant of this
      * method.
      *
-     * @param batch_id  the batch entry index to be queried
+     * @param batch_id  the batch item index to be queried
      * @param idx  a linear index of the requested element
      *
      * @note  the method has to be called on the same Executor the vector is
@@ -370,7 +370,7 @@ class BatchMultiVector
 private:
     inline size_type compute_num_elems(const batch_dim<2>& size)
     {
-        return size.get_cumulative_offset(size.get_num_batch_entries());
+        return size.get_cumulative_offset(size.get_num_batch_items());
     }
 
 protected:
@@ -516,15 +516,15 @@ std::unique_ptr<Matrix> batch_initialize(
     std::shared_ptr<const Executor> exec, TArgs&&... create_args)
 {
     using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
-    size_type num_batch_entries = vals.size();
-    GKO_THROW_IF_INVALID(num_batch_entries > 0, "Input data is empty");
+    size_type num_batch_items = vals.size();
+    GKO_THROW_IF_INVALID(num_batch_items > 0, "Input data is empty");
     auto vals_begin = begin(vals);
     size_type common_num_rows = vals_begin ? vals_begin->size() : 0;
     auto common_size = dim<2>(common_num_rows, 1);
     for (auto& val : vals) {
         GKO_ASSERT_EQ(common_num_rows, val.size());
     }
-    auto b_size = batch_dim<2>(num_batch_entries, common_size);
+    auto b_size = batch_dim<2>(num_batch_items, common_size);
     auto tmp = batch_multi_vector::create(exec->get_master(), b_size);
     size_type batch = 0;
     for (const auto& b : vals) {
@@ -569,8 +569,8 @@ std::unique_ptr<Matrix> batch_initialize(
     std::shared_ptr<const Executor> exec, TArgs&&... create_args)
 {
     using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
-    size_type num_batch_entries = vals.size();
-    GKO_THROW_IF_INVALID(num_batch_entries > 0, "Input data is empty");
+    size_type num_batch_items = vals.size();
+    GKO_THROW_IF_INVALID(num_batch_items > 0, "Input data is empty");
     auto vals_begin = begin(vals);
     size_type common_num_rows = vals_begin ? vals_begin->size() : 0;
     size_type common_num_cols =
@@ -583,7 +583,7 @@ std::unique_ptr<Matrix> batch_initialize(
         GKO_ASSERT_EQUAL_DIMENSIONS(b_size, common_size);
     }
 
-    auto b_size = batch_dim<2>(num_batch_entries, common_size);
+    auto b_size = batch_dim<2>(num_batch_items, common_size);
     auto tmp = batch_multi_vector::create(exec->get_master(), b_size);
     size_type batch = 0;
     for (const auto& b : vals) {
@@ -634,11 +634,11 @@ std::unique_ptr<Matrix> batch_initialize(
     std::shared_ptr<const Executor> exec, TArgs&&... create_args)
 {
     using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
-    size_type num_batch_entries = num_vectors;
-    GKO_THROW_IF_INVALID(num_batch_entries > 0 && vals.size() > 0,
+    size_type num_batch_items = num_vectors;
+    GKO_THROW_IF_INVALID(num_batch_items > 0 && vals.size() > 0,
                          "Input data is empty");
-    auto b_size = batch_dim<2>(num_batch_entries,
-                               dim<2>(begin(vals) ? vals.size() : 0, 1));
+    auto b_size =
+        batch_dim<2>(num_batch_items, dim<2>(begin(vals) ? vals.size() : 0, 1));
     auto tmp = batch_multi_vector::create(exec->get_master(), b_size);
     for (size_type batch = 0; batch < num_vectors; batch++) {
         size_type idx = 0;
@@ -665,7 +665,7 @@ std::unique_ptr<Matrix> batch_initialize(
  * @tparam TArgs  argument types for Matrix::create method
  *                (not including the implied Executor as the first argument)
  *
- * @param num_batch_entries The number of times the input matrix is duplicated
+ * @param num_batch_items The number of times the input matrix is duplicated
  * @param vals  values used to initialize each vector in the temp. batch
  * @param exec  Executor associated to the vector
  * @param create_args  additional arguments passed to Matrix::create, not
@@ -677,19 +677,19 @@ std::unique_ptr<Matrix> batch_initialize(
  */
 template <typename Matrix, typename... TArgs>
 std::unique_ptr<Matrix> batch_initialize(
-    const size_type num_batch_entries,
+    const size_type num_batch_items,
     std::initializer_list<std::initializer_list<typename Matrix::value_type>>
         vals,
     std::shared_ptr<const Executor> exec, TArgs&&... create_args)
 {
     using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
-    GKO_THROW_IF_INVALID(num_batch_entries > 0 && vals.size() > 0,
+    GKO_THROW_IF_INVALID(num_batch_items > 0 && vals.size() > 0,
                          "Input data is empty");
     auto common_size = dim<2>(begin(vals) ? vals.size() : 0,
                               begin(vals) ? begin(vals)->size() : 0);
-    batch_dim<2> b_size(num_batch_entries, common_size);
+    batch_dim<2> b_size(num_batch_items, common_size);
     auto tmp = batch_multi_vector::create(exec->get_master(), b_size);
-    for (size_type batch = 0; batch < num_batch_entries; batch++) {
+    for (size_type batch = 0; batch < num_batch_items; batch++) {
         size_type ridx = 0;
         for (const auto& row : vals) {
             size_type cidx = 0;
diff --git a/omp/base/batch_multi_vector_kernels.cpp b/omp/base/batch_multi_vector_kernels.cpp
index a88443f60b9..057efe5f05c 100644
--- a/omp/base/batch_multi_vector_kernels.cpp
+++ b/omp/base/batch_multi_vector_kernels.cpp
@@ -67,9 +67,9 @@ void scale(std::shared_ptr<const DefaultExecutor> exec,
     const auto x_ub = host::get_batch_struct(x);
     const auto alpha_ub = host::get_batch_struct(alpha);
 #pragma omp parallel for
-    for (size_type batch = 0; batch < x->get_num_batch_entries(); ++batch) {
-        const auto alpha_b = gko::batch::batch_entry(alpha_ub, batch);
-        const auto x_b = gko::batch::batch_entry(x_ub, batch);
+    for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
+        const auto alpha_b = gko::batch::batch_item(alpha_ub, batch);
+        const auto x_b = gko::batch::batch_item(x_ub, batch);
         scale_kernel(alpha_b, x_b);
     }
 }
@@ -88,10 +88,10 @@ void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
     const auto y_ub = host::get_batch_struct(y);
     const auto alpha_ub = host::get_batch_struct(alpha);
 #pragma omp parallel for
-    for (size_type batch = 0; batch < y->get_num_batch_entries(); ++batch) {
-        const auto alpha_b = gko::batch::batch_entry(alpha_ub, batch);
-        const auto x_b = gko::batch::batch_entry(x_ub, batch);
-        const auto y_b = gko::batch::batch_entry(y_ub, batch);
+    for (size_type batch = 0; batch < y->get_num_batch_items(); ++batch) {
+        const auto alpha_b = gko::batch::batch_item(alpha_ub, batch);
+        const auto x_b = gko::batch::batch_item(x_ub, batch);
+        const auto y_b = gko::batch::batch_item(y_ub, batch);
         add_scaled_kernel(alpha_b, x_b, y_b);
     }
 }
@@ -110,11 +110,10 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
     const auto y_ub = host::get_batch_struct(y);
     const auto res_ub = host::get_batch_struct(result);
 #pragma omp parallel for
-    for (size_type batch = 0; batch < result->get_num_batch_entries();
-         ++batch) {
-        const auto res_b = gko::batch::batch_entry(res_ub, batch);
-        const auto x_b = gko::batch::batch_entry(x_ub, batch);
-        const auto y_b = gko::batch::batch_entry(y_ub, batch);
+    for (size_type batch = 0; batch < result->get_num_batch_items(); ++batch) {
+        const auto res_b = gko::batch::batch_item(res_ub, batch);
+        const auto x_b = gko::batch::batch_item(x_ub, batch);
+        const auto y_b = gko::batch::batch_item(y_ub, batch);
         compute_dot_product_kernel(x_b, y_b, res_b);
     }
 }
@@ -133,11 +132,10 @@ void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
     const auto y_ub = host::get_batch_struct(y);
     const auto res_ub = host::get_batch_struct(result);
 #pragma omp parallel for
-    for (size_type batch = 0; batch < result->get_num_batch_entries();
-         ++batch) {
-        const auto res_b = gko::batch::batch_entry(res_ub, batch);
-        const auto x_b = gko::batch::batch_entry(x_ub, batch);
-        const auto y_b = gko::batch::batch_entry(y_ub, batch);
+    for (size_type batch = 0; batch < result->get_num_batch_items(); ++batch) {
+        const auto res_b = gko::batch::batch_item(res_ub, batch);
+        const auto x_b = gko::batch::batch_item(x_ub, batch);
+        const auto y_b = gko::batch::batch_item(y_ub, batch);
         compute_conj_dot_product_kernel(x_b, y_b, res_b);
     }
 }
@@ -154,10 +152,9 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
     const auto x_ub = host::get_batch_struct(x);
     const auto res_ub = host::get_batch_struct(result);
 #pragma omp parallel for
-    for (size_type batch = 0; batch < result->get_num_batch_entries();
-         ++batch) {
-        const auto res_b = gko::batch::batch_entry(res_ub, batch);
-        const auto x_b = gko::batch::batch_entry(x_ub, batch);
+    for (size_type batch = 0; batch < result->get_num_batch_items(); ++batch) {
+        const auto res_b = gko::batch::batch_item(res_ub, batch);
+        const auto x_b = gko::batch::batch_item(x_ub, batch);
         compute_norm2_kernel(x_b, res_b);
     }
 }
@@ -174,9 +171,9 @@ void copy(std::shared_ptr<const DefaultExecutor> exec,
     const auto x_ub = host::get_batch_struct(x);
     const auto result_ub = host::get_batch_struct(result);
 #pragma omp parallel for
-    for (size_type batch = 0; batch < x->get_num_batch_entries(); ++batch) {
-        const auto result_b = gko::batch::batch_entry(result_ub, batch);
-        const auto x_b = gko::batch::batch_entry(x_ub, batch);
+    for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
+        const auto result_b = gko::batch::batch_item(result_ub, batch);
+        const auto x_b = gko::batch::batch_item(x_ub, batch);
         copy_kernel(x_b, result_b);
     }
 }
diff --git a/reference/base/batch_multi_vector_kernels.cpp b/reference/base/batch_multi_vector_kernels.cpp
index 967dddb108a..b5cdb03d214 100644
--- a/reference/base/batch_multi_vector_kernels.cpp
+++ b/reference/base/batch_multi_vector_kernels.cpp
@@ -66,9 +66,9 @@ void scale(std::shared_ptr<const DefaultExecutor> exec,
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto alpha_ub = host::get_batch_struct(alpha);
-    for (size_type batch = 0; batch < x->get_num_batch_entries(); ++batch) {
-        const auto alpha_b = gko::batch::batch_entry(alpha_ub, batch);
-        const auto x_b = gko::batch::batch_entry(x_ub, batch);
+    for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
+        const auto alpha_b = gko::batch::batch_item(alpha_ub, batch);
+        const auto x_b = gko::batch::batch_item(x_ub, batch);
         scale_kernel(alpha_b, x_b);
     }
 }
@@ -86,10 +86,10 @@ void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
     const auto x_ub = host::get_batch_struct(x);
     const auto y_ub = host::get_batch_struct(y);
     const auto alpha_ub = host::get_batch_struct(alpha);
-    for (size_type batch = 0; batch < y->get_num_batch_entries(); ++batch) {
-        const auto alpha_b = gko::batch::batch_entry(alpha_ub, batch);
-        const auto x_b = gko::batch::batch_entry(x_ub, batch);
-        const auto y_b = gko::batch::batch_entry(y_ub, batch);
+    for (size_type batch = 0; batch < y->get_num_batch_items(); ++batch) {
+        const auto alpha_b = gko::batch::batch_item(alpha_ub, batch);
+        const auto x_b = gko::batch::batch_item(x_ub, batch);
+        const auto y_b = gko::batch::batch_item(y_ub, batch);
         add_scaled_kernel(alpha_b, x_b, y_b);
     }
 }
@@ -107,11 +107,10 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
     const auto x_ub = host::get_batch_struct(x);
     const auto y_ub = host::get_batch_struct(y);
     const auto res_ub = host::get_batch_struct(result);
-    for (size_type batch = 0; batch < result->get_num_batch_entries();
-         ++batch) {
-        const auto res_b = gko::batch::batch_entry(res_ub, batch);
-        const auto x_b = gko::batch::batch_entry(x_ub, batch);
-        const auto y_b = gko::batch::batch_entry(y_ub, batch);
+    for (size_type batch = 0; batch < result->get_num_batch_items(); ++batch) {
+        const auto res_b = gko::batch::batch_item(res_ub, batch);
+        const auto x_b = gko::batch::batch_item(x_ub, batch);
+        const auto y_b = gko::batch::batch_item(y_ub, batch);
         compute_dot_product_kernel(x_b, y_b, res_b);
     }
 }
@@ -129,11 +128,10 @@ void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
     const auto x_ub = host::get_batch_struct(x);
     const auto y_ub = host::get_batch_struct(y);
     const auto res_ub = host::get_batch_struct(result);
-    for (size_type batch = 0; batch < result->get_num_batch_entries();
-         ++batch) {
-        const auto res_b = gko::batch::batch_entry(res_ub, batch);
-        const auto x_b = gko::batch::batch_entry(x_ub, batch);
-        const auto y_b = gko::batch::batch_entry(y_ub, batch);
+    for (size_type batch = 0; batch < result->get_num_batch_items(); ++batch) {
+        const auto res_b = gko::batch::batch_item(res_ub, batch);
+        const auto x_b = gko::batch::batch_item(x_ub, batch);
+        const auto y_b = gko::batch::batch_item(y_ub, batch);
         compute_conj_dot_product_kernel(x_b, y_b, res_b);
     }
 }
@@ -149,10 +147,9 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto res_ub = host::get_batch_struct(result);
-    for (size_type batch = 0; batch < result->get_num_batch_entries();
-         ++batch) {
-        const auto res_b = gko::batch::batch_entry(res_ub, batch);
-        const auto x_b = gko::batch::batch_entry(x_ub, batch);
+    for (size_type batch = 0; batch < result->get_num_batch_items(); ++batch) {
+        const auto res_b = gko::batch::batch_item(res_ub, batch);
+        const auto x_b = gko::batch::batch_item(x_ub, batch);
         compute_norm2_kernel(x_b, res_b);
     }
 }
@@ -168,9 +165,9 @@ void copy(std::shared_ptr<const DefaultExecutor> exec,
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto result_ub = host::get_batch_struct(result);
-    for (size_type batch = 0; batch < x->get_num_batch_entries(); ++batch) {
-        const auto result_b = gko::batch::batch_entry(result_ub, batch);
-        const auto x_b = gko::batch::batch_entry(x_ub, batch);
+    for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
+        const auto result_b = gko::batch::batch_item(result_ub, batch);
+        const auto x_b = gko::batch::batch_item(x_ub, batch);
         copy_kernel(x_b, result_b);
     }
 }
diff --git a/reference/base/batch_multi_vector_kernels.hpp.inc b/reference/base/batch_multi_vector_kernels.hpp.inc
index 6e3b195e175..a6935866f56 100644
--- a/reference/base/batch_multi_vector_kernels.hpp.inc
+++ b/reference/base/batch_multi_vector_kernels.hpp.inc
@@ -32,8 +32,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 template <typename ValueType>
 inline void scale_kernel(
-    const gko::batch_multi_vector::batch_entry<const ValueType>& alpha,
-    const gko::batch_multi_vector::batch_entry<ValueType>& x)
+    const gko::batch_multi_vector::batch_item<const ValueType>& alpha,
+    const gko::batch_multi_vector::batch_item<ValueType>& x)
 {
     if (alpha.num_rhs == 1) {
         for (int i = 0; i < x.num_rows; ++i) {
@@ -53,9 +53,9 @@ inline void scale_kernel(
 
 template <typename ValueType>
 inline void add_scaled_kernel(
-    const gko::batch_multi_vector::batch_entry<const ValueType>& alpha,
-    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
-    const gko::batch_multi_vector::batch_entry<ValueType>& y)
+    const gko::batch_multi_vector::batch_item<const ValueType>& alpha,
+    const gko::batch_multi_vector::batch_item<const ValueType>& x,
+    const gko::batch_multi_vector::batch_item<ValueType>& y)
 {
     if (alpha.num_rhs == 1) {
         for (int i = 0; i < x.num_rows; ++i) {
@@ -77,9 +77,9 @@ inline void add_scaled_kernel(
 
 template <typename ValueType>
 inline void compute_dot_product_kernel(
-    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
-    const gko::batch_multi_vector::batch_entry<const ValueType>& y,
-    const gko::batch_multi_vector::batch_entry<ValueType>& result)
+    const gko::batch_multi_vector::batch_item<const ValueType>& x,
+    const gko::batch_multi_vector::batch_item<const ValueType>& y,
+    const gko::batch_multi_vector::batch_item<ValueType>& result)
 {
     for (int c = 0; c < result.num_rhs; c++) {
         result.values[c] = gko::zero<ValueType>();
@@ -96,9 +96,9 @@ inline void compute_dot_product_kernel(
 
 template <typename ValueType>
 inline void compute_conj_dot_product_kernel(
-    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
-    const gko::batch_multi_vector::batch_entry<const ValueType>& y,
-    const gko::batch_multi_vector::batch_entry<ValueType>& result)
+    const gko::batch_multi_vector::batch_item<const ValueType>& x,
+    const gko::batch_multi_vector::batch_item<const ValueType>& y,
+    const gko::batch_multi_vector::batch_item<ValueType>& result)
 {
     for (int c = 0; c < result.num_rhs; c++) {
         result.values[c] = gko::zero<ValueType>();
@@ -115,8 +115,8 @@ inline void compute_conj_dot_product_kernel(
 
 template <typename ValueType>
 inline void compute_norm2_kernel(
-    const gko::batch_multi_vector::batch_entry<const ValueType>& x,
-    const gko::batch_multi_vector::batch_entry<gko::remove_complex<ValueType>>&
+    const gko::batch_multi_vector::batch_item<const ValueType>& x,
+    const gko::batch_multi_vector::batch_item<gko::remove_complex<ValueType>>&
         result)
 {
     for (int j = 0; j < x.num_rhs; ++j) {
@@ -141,8 +141,8 @@ inline void compute_norm2_kernel(
  */
 template <typename ValueType>
 inline void copy_kernel(
-    const gko::batch_multi_vector::batch_entry<const ValueType>& in,
-    const gko::batch_multi_vector::batch_entry<ValueType>& out)
+    const gko::batch_multi_vector::batch_item<const ValueType>& in,
+    const gko::batch_multi_vector::batch_item<ValueType>& out)
 {
     for (int iz = 0; iz < in.num_rows * in.num_rhs; iz++) {
         const int i = iz / in.num_rhs;
diff --git a/reference/base/batch_struct.hpp b/reference/base/batch_struct.hpp
index 41262be1d48..21ff280baba 100644
--- a/reference/base/batch_struct.hpp
+++ b/reference/base/batch_struct.hpp
@@ -66,7 +66,7 @@ template <typename ValueType>
 inline gko::batch_multi_vector::uniform_batch<const ValueType> get_batch_struct(
     const BatchMultiVector<ValueType>* const op)
 {
-    return {op->get_const_values(), op->get_num_batch_entries(),
+    return {op->get_const_values(), op->get_num_batch_items(),
             static_cast<int>(op->get_common_size()[1]),
             static_cast<int>(op->get_common_size()[0]),
             static_cast<int>(op->get_common_size()[1])};
@@ -80,7 +80,7 @@ template <typename ValueType>
 inline gko::batch_multi_vector::uniform_batch<ValueType> get_batch_struct(
     BatchMultiVector<ValueType>* const op)
 {
-    return {op->get_values(), op->get_num_batch_entries(),
+    return {op->get_values(), op->get_num_batch_items(),
             static_cast<int>(op->get_common_size()[1]),
             static_cast<int>(op->get_common_size()[0]),
             static_cast<int>(op->get_common_size()[1])};
diff --git a/reference/test/base/batch_multi_vector_kernels.cpp b/reference/test/base/batch_multi_vector_kernels.cpp
index f6d169bceaf..62567cc91ee 100644
--- a/reference/test/base/batch_multi_vector_kernels.cpp
+++ b/reference/test/base/batch_multi_vector_kernels.cpp
@@ -398,7 +398,7 @@ TYPED_TEST(BatchMultiVector, ConvertsEmptyToPrecision)
 
     empty->convert_to(res.get());
 
-    ASSERT_FALSE(res->get_num_batch_entries());
+    ASSERT_FALSE(res->get_num_batch_items());
 }
 
 
@@ -413,5 +413,5 @@ TYPED_TEST(BatchMultiVector, MovesEmptyToPrecision)
 
     empty->move_to(res.get());
 
-    ASSERT_FALSE(res->get_num_batch_entries());
+    ASSERT_FALSE(res->get_num_batch_items());
 }
diff --git a/test/base/batch_multi_vector_kernels.cpp b/test/base/batch_multi_vector_kernels.cpp
index 631464a8d27..a55ff0792ad 100644
--- a/test/base/batch_multi_vector_kernels.cpp
+++ b/test/base/batch_multi_vector_kernels.cpp
@@ -59,11 +59,11 @@ class BatchMultiVector : public CommonTestFixture {
     BatchMultiVector() : rand_engine(15) {}
 
     template <typename MtxType>
-    std::unique_ptr<MtxType> gen_mtx(const size_t num_batch_entries,
+    std::unique_ptr<MtxType> gen_mtx(const size_t num_batch_items,
                                      int num_rows, int num_cols)
     {
         return gko::test::generate_uniform_batch_random_matrix<MtxType>(
-            num_batch_entries, num_rows, num_cols,
+            num_batch_items, num_rows, num_cols,
             std::uniform_int_distribution<>(num_cols, num_cols),
             std::normal_distribution<>(-1.0, 1.0), rand_engine, false, ref);
     }

From 18697f44ac497a10c1c277235ed1f538b3f96498 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Mon, 31 Jul 2023 17:14:17 +0200
Subject: [PATCH 144/583] Use batch:: namespace,rename to batch::MultiVector

---
 ...batch_multi_vector_kernel_launcher.hpp.inc |  30 ++--
 .../base/batch_multi_vector_kernels.hpp.inc   |  62 +++----
 core/base/batch_multi_vector.cpp              | 121 ++++++-------
 core/base/batch_multi_vector_kernels.hpp      |  30 ++--
 core/base/batch_struct.hpp                    |  23 ++-
 core/test/base/batch_multi_vector.cpp         |  93 +++++-----
 core/test/utils/assertions.hpp                |   2 +-
 cuda/base/batch_multi_vector_kernels.cu       |   2 +-
 cuda/base/batch_struct.hpp                    |   8 +-
 dpcpp/base/batch_multi_vector_kernels.dp.cpp  |  34 ++--
 dpcpp/base/batch_multi_vector_kernels.hpp.inc |  24 +--
 dpcpp/base/batch_struct.hpp                   |   8 +-
 hip/base/batch_multi_vector_kernels.hip.cpp   |   2 +-
 hip/base/batch_struct.hip.hpp                 |   8 +-
 .../ginkgo/core/base/batch_multi_vector.hpp   | 163 +++++++++---------
 omp/base/batch_multi_vector_kernels.cpp       |  34 ++--
 reference/base/batch_multi_vector_kernels.cpp |  64 +++----
 .../base/batch_multi_vector_kernels.hpp.inc   |  30 ++--
 reference/base/batch_struct.hpp               |   8 +-
 .../test/base/batch_multi_vector_kernels.cpp  | 112 ++++++------
 test/base/batch_multi_vector_kernels.cpp      |  42 ++---
 test/test_install/test_install.cpp            |   2 +-
 22 files changed, 452 insertions(+), 450 deletions(-)

diff --git a/common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc b/common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc
index acd58b37327..6c0c5363baa 100644
--- a/common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc
+++ b/common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc
@@ -33,8 +33,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 template <typename ValueType>
 void scale(std::shared_ptr<const DefaultExecutor> exec,
-           const BatchMultiVector<ValueType>* const alpha,
-           BatchMultiVector<ValueType>* const x)
+           const batch::MultiVector<ValueType>* const alpha,
+           batch::MultiVector<ValueType>* const x)
 {
     const auto num_blocks = x->get_num_batch_items();
     const auto alpha_ub = get_batch_struct(alpha);
@@ -54,9 +54,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 template <typename ValueType>
 void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
-                const BatchMultiVector<ValueType>* const alpha,
-                const BatchMultiVector<ValueType>* const x,
-                BatchMultiVector<ValueType>* const y)
+                const batch::MultiVector<ValueType>* const alpha,
+                const batch::MultiVector<ValueType>* const x,
+                batch::MultiVector<ValueType>* const y)
 {
     const auto num_blocks = x->get_num_batch_items();
     const size_type nrhs = x->get_common_size()[1];
@@ -80,9 +80,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 template <typename ValueType>
 void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
-                 const BatchMultiVector<ValueType>* x,
-                 const BatchMultiVector<ValueType>* y,
-                 BatchMultiVector<ValueType>* result)
+                 const batch::MultiVector<ValueType>* x,
+                 const batch::MultiVector<ValueType>* y,
+                 batch::MultiVector<ValueType>* result)
 {
     const auto num_blocks = x->get_num_batch_items();
     const auto num_rhs = x->get_common_size()[1];
@@ -100,9 +100,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 template <typename ValueType>
 void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
-                      const BatchMultiVector<ValueType>* x,
-                      const BatchMultiVector<ValueType>* y,
-                      BatchMultiVector<ValueType>* result)
+                      const batch::MultiVector<ValueType>* x,
+                      const batch::MultiVector<ValueType>* y,
+                      batch::MultiVector<ValueType>* result)
 {
     const auto num_blocks = x->get_num_batch_items();
     const auto num_rhs = x->get_common_size()[1];
@@ -120,8 +120,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 template <typename ValueType>
 void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
-                   const BatchMultiVector<ValueType>* const x,
-                   BatchMultiVector<remove_complex<ValueType>>* const result)
+                   const batch::MultiVector<ValueType>* const x,
+                   batch::MultiVector<remove_complex<ValueType>>* const result)
 {
     const auto num_blocks = x->get_num_batch_items();
     const auto num_rhs = x->get_common_size()[1];
@@ -137,8 +137,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 template <typename ValueType>
 void copy(std::shared_ptr<const DefaultExecutor> exec,
-          const BatchMultiVector<ValueType>* x,
-          BatchMultiVector<ValueType>* result)
+          const batch::MultiVector<ValueType>* x,
+          batch::MultiVector<ValueType>* result)
 {
     const auto num_blocks = x->get_num_batch_items();
     const auto result_ub = get_batch_struct(result);
diff --git a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
index cdb25d318f0..df64e5cfe85 100644
--- a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
+++ b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
@@ -33,8 +33,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 template <typename ValueType, typename Mapping>
 __device__ __forceinline__ void scale(
-    const gko::batch_multi_vector::batch_item<const ValueType>& alpha,
-    const gko::batch_multi_vector::batch_item<ValueType>& x, Mapping map)
+    const gko::batch::multi_vector::batch_item<const ValueType>& alpha,
+    const gko::batch::multi_vector::batch_item<ValueType>& x, Mapping map)
 {
     const int max_li = x.num_rows * x.num_rhs;
     for (int li = threadIdx.x; li < max_li; li += blockDim.x) {
@@ -49,10 +49,10 @@ __device__ __forceinline__ void scale(
 template <typename ValueType, typename Mapping>
 __global__ __launch_bounds__(
     default_block_size,
-    sm_oversubscription) void scale_kernel(const gko::batch_multi_vector::
+    sm_oversubscription) void scale_kernel(const gko::batch::multi_vector::
                                                uniform_batch<const ValueType>
                                                    alpha,
-                                           const gko::batch_multi_vector::
+                                           const gko::batch::multi_vector::
                                                uniform_batch<ValueType>
                                                    x,
                                            Mapping map)
@@ -68,9 +68,9 @@ __global__ __launch_bounds__(
 
 template <typename ValueType, typename Mapping>
 __device__ __forceinline__ void add_scaled(
-    const gko::batch_multi_vector::batch_item<const ValueType>& alpha,
-    const gko::batch_multi_vector::batch_item<const ValueType>& x,
-    const gko::batch_multi_vector::batch_item<ValueType>& y, Mapping map)
+    const gko::batch::multi_vector::batch_item<const ValueType>& alpha,
+    const gko::batch::multi_vector::batch_item<const ValueType>& x,
+    const gko::batch::multi_vector::batch_item<ValueType>& y, Mapping map)
 {
     const int max_li = x.num_rows * x.num_rhs;
     for (int li = threadIdx.x; li < max_li; li += blockDim.x) {
@@ -85,15 +85,15 @@ __device__ __forceinline__ void add_scaled(
 template <typename ValueType, typename Mapping>
 __global__ __launch_bounds__(
     default_block_size,
-    sm_oversubscription) void add_scaled_kernel(const gko::batch_multi_vector::
+    sm_oversubscription) void add_scaled_kernel(const gko::batch::multi_vector::
                                                     uniform_batch<
                                                         const ValueType>
                                                         alpha,
-                                                const gko::batch_multi_vector::
+                                                const gko::batch::multi_vector::
                                                     uniform_batch<
                                                         const ValueType>
                                                         x,
-                                                const gko::batch_multi_vector::
+                                                const gko::batch::multi_vector::
                                                     uniform_batch<ValueType>
                                                         y,
                                                 Mapping map)
@@ -110,10 +110,10 @@ __global__ __launch_bounds__(
 
 template <typename Group, typename ValueType, typename Mapping>
 __device__ __forceinline__ void gen_one_dot(
-    const gko::batch_multi_vector::batch_item<const ValueType>& x,
-    const gko::batch_multi_vector::batch_item<const ValueType>& y,
+    const gko::batch::multi_vector::batch_item<const ValueType>& x,
+    const gko::batch::multi_vector::batch_item<const ValueType>& y,
     const int rhs_index,
-    const gko::batch_multi_vector::batch_item<ValueType>& result,
+    const gko::batch::multi_vector::batch_item<ValueType>& result,
     Group subgroup, Mapping conj_map)
 {
     ValueType val = zero<ValueType>();
@@ -134,9 +134,9 @@ __device__ __forceinline__ void gen_one_dot(
 
 template <typename ValueType, typename Mapping>
 __device__ __forceinline__ void compute_gen_dot_product(
-    const gko::batch_multi_vector::batch_item<const ValueType>& x,
-    const gko::batch_multi_vector::batch_item<const ValueType>& y,
-    const gko::batch_multi_vector::batch_item<ValueType>& result,
+    const gko::batch::multi_vector::batch_item<const ValueType>& x,
+    const gko::batch::multi_vector::batch_item<const ValueType>& y,
+    const gko::batch::multi_vector::batch_item<ValueType>& result,
     Mapping conj_map)
 {
     constexpr auto tile_size = config::warp_size;
@@ -155,9 +155,9 @@ __device__ __forceinline__ void compute_gen_dot_product(
 template <typename ValueType, typename Mapping>
 __global__
     __launch_bounds__(default_block_size, sm_oversubscription) void compute_gen_dot_product_kernel(
-        const gko::batch_multi_vector::uniform_batch<const ValueType> x,
-        const gko::batch_multi_vector::uniform_batch<const ValueType> y,
-        const gko::batch_multi_vector::uniform_batch<ValueType> result,
+        const gko::batch::multi_vector::uniform_batch<const ValueType> x,
+        const gko::batch::multi_vector::uniform_batch<const ValueType> y,
+        const gko::batch::multi_vector::uniform_batch<ValueType> result,
         Mapping map)
 {
     for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_items;
@@ -172,9 +172,9 @@ __global__
 
 template <typename Group, typename ValueType>
 __device__ __forceinline__ void one_norm2(
-    const gko::batch_multi_vector::batch_item<const ValueType>& x,
+    const gko::batch::multi_vector::batch_item<const ValueType>& x,
     const int rhs_index,
-    const gko::batch_multi_vector::batch_item<remove_complex<ValueType>>&
+    const gko::batch::multi_vector::batch_item<remove_complex<ValueType>>&
         result,
     Group subgroup)
 {
@@ -202,8 +202,8 @@ __device__ __forceinline__ void one_norm2(
  */
 template <typename ValueType>
 __device__ __forceinline__ void compute_norm2(
-    const gko::batch_multi_vector::batch_item<const ValueType>& x,
-    const gko::batch_multi_vector::batch_item<remove_complex<ValueType>>&
+    const gko::batch::multi_vector::batch_item<const ValueType>& x,
+    const gko::batch::multi_vector::batch_item<remove_complex<ValueType>>&
         result)
 {
     constexpr auto tile_size = config::warp_size;
@@ -222,13 +222,13 @@ __device__ __forceinline__ void compute_norm2(
 template <typename ValueType>
 __global__ __launch_bounds__(
     default_block_size,
-    sm_oversubscription) void compute_norm2_kernel(const gko::
-                                                       batch_multi_vector::
+    sm_oversubscription) void compute_norm2_kernel(const gko::batch::
+                                                       multi_vector::
                                                            uniform_batch<
                                                                const ValueType>
                                                                x,
-                                                   const gko::
-                                                       batch_multi_vector::
+                                                   const gko::batch::
+                                                       multi_vector::
                                                            uniform_batch<
                                                                remove_complex<
                                                                    ValueType>>
@@ -251,8 +251,8 @@ __global__ __launch_bounds__(
  */
 template <typename ValueType>
 __device__ __forceinline__ void copy(
-    const gko::batch_multi_vector::batch_item<const ValueType>& in,
-    const gko::batch_multi_vector::batch_item<ValueType>& out)
+    const gko::batch::multi_vector::batch_item<const ValueType>& in,
+    const gko::batch::multi_vector::batch_item<ValueType>& out)
 {
     for (int iz = threadIdx.x; iz < in.num_rows * in.num_rhs;
          iz += blockDim.x) {
@@ -266,8 +266,8 @@ __device__ __forceinline__ void copy(
 template <typename ValueType>
 __global__
     __launch_bounds__(default_block_size, sm_oversubscription) void copy_kernel(
-        const gko::batch_multi_vector::uniform_batch<const ValueType> src,
-        const gko::batch_multi_vector::uniform_batch<ValueType> dst)
+        const gko::batch::multi_vector::uniform_batch<const ValueType> src,
+        const gko::batch::multi_vector::uniform_batch<ValueType> dst)
 {
     for (size_type batch_id = blockIdx.x; batch_id < src.num_batch_items;
          batch_id += gridDim.x) {
diff --git a/core/base/batch_multi_vector.cpp b/core/base/batch_multi_vector.cpp
index a843ee376c1..f17f1479f5f 100644
--- a/core/base/batch_multi_vector.cpp
+++ b/core/base/batch_multi_vector.cpp
@@ -50,7 +50,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 namespace gko {
-namespace batch_multi_vector {
+namespace batch {
+namespace multi_vector {
 namespace {
 
 
@@ -63,7 +64,8 @@ GKO_REGISTER_OPERATION(copy, batch_multi_vector::copy);
 
 
 }  // namespace
-}  // namespace batch_multi_vector
+}  // namespace multi_vector
+
 
 namespace detail {
 
@@ -84,19 +86,19 @@ batch_dim<2> compute_batch_size(
 
 
 template <typename ValueType>
-BatchMultiVector<ValueType>::BatchMultiVector(
-    std::shared_ptr<const Executor> exec, const batch_dim<2>& size)
-    : EnablePolymorphicObject<BatchMultiVector<ValueType>>(exec),
+MultiVector<ValueType>::MultiVector(std::shared_ptr<const Executor> exec,
+                                    const batch_dim<2>& size)
+    : EnablePolymorphicObject<MultiVector<ValueType>>(exec),
       batch_size_(size),
       values_(exec, compute_num_elems(size))
 {}
 
 
 template <typename ValueType>
-BatchMultiVector<ValueType>::BatchMultiVector(
+MultiVector<ValueType>::MultiVector(
     std::shared_ptr<const Executor> exec,
     const std::vector<matrix::Dense<ValueType>*>& matrices)
-    : EnablePolymorphicObject<BatchMultiVector<ValueType>>(exec),
+    : EnablePolymorphicObject<MultiVector<ValueType>>(exec),
       batch_size_{detail::compute_batch_size(matrices)},
       values_(exec, compute_num_elems(batch_size_))
 {
@@ -111,11 +113,11 @@ BatchMultiVector<ValueType>::BatchMultiVector(
 
 
 template <typename ValueType>
-BatchMultiVector<ValueType>::BatchMultiVector(
-    std::shared_ptr<const Executor> exec, size_type num_duplications,
-    const matrix::Dense<value_type>* input)
-    : BatchMultiVector<ValueType>(
-          exec, batch_dim<2>(num_duplications, input->get_size()))
+MultiVector<ValueType>::MultiVector(std::shared_ptr<const Executor> exec,
+                                    size_type num_duplications,
+                                    const matrix::Dense<value_type>* input)
+    : MultiVector<ValueType>(exec,
+                             batch_dim<2>(num_duplications, input->get_size()))
 {
     size_type offset = 0;
     for (size_type i = 0; i < num_duplications; ++i) {
@@ -128,10 +130,10 @@ BatchMultiVector<ValueType>::BatchMultiVector(
 
 
 template <typename ValueType>
-BatchMultiVector<ValueType>::BatchMultiVector(
-    std::shared_ptr<const Executor> exec, size_type num_duplications,
-    const BatchMultiVector<value_type>* input)
-    : BatchMultiVector<ValueType>(
+MultiVector<ValueType>::MultiVector(std::shared_ptr<const Executor> exec,
+                                    size_type num_duplications,
+                                    const MultiVector<value_type>* input)
+    : MultiVector<ValueType>(
           exec, batch_dim<2>(input->get_num_batch_items() * num_duplications,
                              input->get_common_size()))
 {
@@ -146,9 +148,9 @@ BatchMultiVector<ValueType>::BatchMultiVector(
 
 
 template <typename ValueType>
-std::unique_ptr<BatchMultiVector<ValueType>>
-BatchMultiVector<ValueType>::create_with_config_of(
-    ptr_param<const BatchMultiVector> other)
+std::unique_ptr<MultiVector<ValueType>>
+MultiVector<ValueType>::create_with_config_of(
+    ptr_param<const MultiVector> other)
 {
     // De-referencing `other` before calling the functions (instead of
     // using operator `->`) is currently required to be compatible with
@@ -160,7 +162,7 @@ BatchMultiVector<ValueType>::create_with_config_of(
 
 template <typename ValueType>
 std::vector<std::unique_ptr<matrix::Dense<ValueType>>>
-BatchMultiVector<ValueType>::unbatch() const
+MultiVector<ValueType>::unbatch() const
 {
     using unbatch_type = matrix::Dense<ValueType>;
     auto exec = this->get_executor();
@@ -178,20 +180,20 @@ BatchMultiVector<ValueType>::unbatch() const
 
 
 template <typename ValueType>
-std::unique_ptr<const BatchMultiVector<ValueType>>
-BatchMultiVector<ValueType>::create_const(
+std::unique_ptr<const MultiVector<ValueType>>
+MultiVector<ValueType>::create_const(
     std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
-    detail::const_array_view<ValueType>&& values)
+    gko::detail::const_array_view<ValueType>&& values)
 {
     // cast const-ness away, but return a const object afterwards,
     // so we can ensure that no modifications take place.
-    return std::unique_ptr<const BatchMultiVector>(new BatchMultiVector{
-        exec, sizes, detail::array_const_cast(std::move(values))});
+    return std::unique_ptr<const MultiVector>(new MultiVector{
+        exec, sizes, gko::detail::array_const_cast(std::move(values))});
 }
 
 
 template <typename ValueType>
-void BatchMultiVector<ValueType>::fill(ValueType value)
+void MultiVector<ValueType>::fill(ValueType value)
 {
     GKO_ASSERT(this->values_.get_num_elems() > 0);
     this->values_.fill(value);
@@ -199,24 +201,24 @@ void BatchMultiVector<ValueType>::fill(ValueType value)
 
 
 template <typename ValueType>
-void BatchMultiVector<ValueType>::set_size(const batch_dim<2>& value) noexcept
+void MultiVector<ValueType>::set_size(const batch_dim<2>& value) noexcept
 {
     batch_size_ = value;
 }
 
 
 template <typename ValueType>
-std::unique_ptr<BatchMultiVector<ValueType>>
-BatchMultiVector<ValueType>::create_with_same_config() const
+std::unique_ptr<MultiVector<ValueType>>
+MultiVector<ValueType>::create_with_same_config() const
 {
-    return BatchMultiVector<ValueType>::create(this->get_executor(),
-                                               this->get_size());
+    return MultiVector<ValueType>::create(this->get_executor(),
+                                          this->get_size());
 }
 
 
 template <typename ValueType>
-void BatchMultiVector<ValueType>::scale(
-    ptr_param<const BatchMultiVector<ValueType>> alpha)
+void MultiVector<ValueType>::scale(
+    ptr_param<const MultiVector<ValueType>> alpha)
 {
     GKO_ASSERT_EQ(alpha->get_num_batch_items(), this->get_num_batch_items());
     GKO_ASSERT_EQUAL_ROWS(alpha->get_common_size(), dim<2>(1, 1));
@@ -226,15 +228,15 @@ void BatchMultiVector<ValueType>::scale(
                               alpha->get_common_size());
     }
     auto exec = this->get_executor();
-    exec->run(batch_multi_vector::make_scale(
-        make_temporary_clone(exec, alpha).get(), this));
+    exec->run(multi_vector::make_scale(make_temporary_clone(exec, alpha).get(),
+                                       this));
 }
 
 
 template <typename ValueType>
-void BatchMultiVector<ValueType>::add_scaled(
-    ptr_param<const BatchMultiVector<ValueType>> alpha,
-    ptr_param<const BatchMultiVector<ValueType>> b)
+void MultiVector<ValueType>::add_scaled(
+    ptr_param<const MultiVector<ValueType>> alpha,
+    ptr_param<const MultiVector<ValueType>> b)
 {
     GKO_ASSERT_EQ(alpha->get_num_batch_items(), this->get_num_batch_items());
     GKO_ASSERT_EQUAL_ROWS(alpha->get_common_size(), dim<2>(1, 1));
@@ -247,7 +249,7 @@ void BatchMultiVector<ValueType>::add_scaled(
     GKO_ASSERT_EQUAL_DIMENSIONS(this->get_common_size(), b->get_common_size());
 
     auto exec = this->get_executor();
-    exec->run(batch_multi_vector::make_add_scaled(
+    exec->run(multi_vector::make_add_scaled(
         make_temporary_clone(exec, alpha).get(),
         make_temporary_clone(exec, b).get(), this));
 }
@@ -261,9 +263,9 @@ inline const batch_dim<2> get_col_sizes(const batch_dim<2>& sizes)
 
 
 template <typename ValueType>
-void BatchMultiVector<ValueType>::compute_conj_dot(
-    ptr_param<const BatchMultiVector<ValueType>> b,
-    ptr_param<BatchMultiVector<ValueType>> result) const
+void MultiVector<ValueType>::compute_conj_dot(
+    ptr_param<const MultiVector<ValueType>> b,
+    ptr_param<MultiVector<ValueType>> result) const
 {
     GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items());
     GKO_ASSERT_EQUAL_DIMENSIONS(this->get_common_size(), b->get_common_size());
@@ -272,16 +274,16 @@ void BatchMultiVector<ValueType>::compute_conj_dot(
         result->get_common_size(),
         get_col_sizes(this->get_size()).get_common_size());
     auto exec = this->get_executor();
-    exec->run(batch_multi_vector::make_compute_conj_dot(
+    exec->run(multi_vector::make_compute_conj_dot(
         this, make_temporary_clone(exec, b).get(),
         make_temporary_output_clone(exec, result).get()));
 }
 
 
 template <typename ValueType>
-void BatchMultiVector<ValueType>::compute_dot(
-    ptr_param<const BatchMultiVector<ValueType>> b,
-    ptr_param<BatchMultiVector<ValueType>> result) const
+void MultiVector<ValueType>::compute_dot(
+    ptr_param<const MultiVector<ValueType>> b,
+    ptr_param<MultiVector<ValueType>> result) const
 {
     GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items());
     GKO_ASSERT_EQUAL_DIMENSIONS(this->get_common_size(), b->get_common_size());
@@ -290,15 +292,15 @@ void BatchMultiVector<ValueType>::compute_dot(
         result->get_common_size(),
         get_col_sizes(this->get_size()).get_common_size());
     auto exec = this->get_executor();
-    exec->run(batch_multi_vector::make_compute_dot(
+    exec->run(multi_vector::make_compute_dot(
         this, make_temporary_clone(exec, b).get(),
         make_temporary_output_clone(exec, result).get()));
 }
 
 
 template <typename ValueType>
-void BatchMultiVector<ValueType>::compute_norm2(
-    ptr_param<BatchMultiVector<remove_complex<ValueType>>> result) const
+void MultiVector<ValueType>::compute_norm2(
+    ptr_param<MultiVector<remove_complex<ValueType>>> result) const
 {
     GKO_ASSERT_EQ(this->get_num_batch_items(), result->get_num_batch_items());
     GKO_ASSERT_EQUAL_DIMENSIONS(
@@ -306,14 +308,14 @@ void BatchMultiVector<ValueType>::compute_norm2(
         get_col_sizes(this->get_size()).get_common_size());
 
     auto exec = this->get_executor();
-    exec->run(batch_multi_vector::make_compute_norm2(
+    exec->run(multi_vector::make_compute_norm2(
         this, make_temporary_output_clone(exec, result).get()));
 }
 
 
 template <typename ValueType>
-void BatchMultiVector<ValueType>::convert_to(
-    BatchMultiVector<next_precision<ValueType>>* result) const
+void MultiVector<ValueType>::convert_to(
+    MultiVector<next_precision<ValueType>>* result) const
 {
     result->values_ = this->values_;
     result->set_size(this->get_size());
@@ -321,8 +323,8 @@ void BatchMultiVector<ValueType>::convert_to(
 
 
 template <typename ValueType>
-void BatchMultiVector<ValueType>::move_to(
-    BatchMultiVector<next_precision<ValueType>>* result)
+void MultiVector<ValueType>::move_to(
+    MultiVector<next_precision<ValueType>>* result)
 {
     this->convert_to(result);
 }
@@ -352,14 +354,14 @@ void read_impl(MatrixType* mtx, const std::vector<MatrixData>& data)
 
 
 template <typename ValueType>
-void BatchMultiVector<ValueType>::read(const std::vector<mat_data>& data)
+void MultiVector<ValueType>::read(const std::vector<mat_data>& data)
 {
     read_impl(this, data);
 }
 
 
 template <typename ValueType>
-void BatchMultiVector<ValueType>::read(const std::vector<mat_data64>& data)
+void MultiVector<ValueType>::read(const std::vector<mat_data64>& data)
 {
     read_impl(this, data);
 }
@@ -387,21 +389,22 @@ void write_impl(const MatrixType* mtx, std::vector<MatrixData>& data)
 
 
 template <typename ValueType>
-void BatchMultiVector<ValueType>::write(std::vector<mat_data>& data) const
+void MultiVector<ValueType>::write(std::vector<mat_data>& data) const
 {
     write_impl(this, data);
 }
 
 
 template <typename ValueType>
-void BatchMultiVector<ValueType>::write(std::vector<mat_data64>& data) const
+void MultiVector<ValueType>::write(std::vector<mat_data64>& data) const
 {
     write_impl(this, data);
 }
 
 
-#define GKO_DECLARE_BATCH_MULTI_VECTOR(_type) class BatchMultiVector<_type>
+#define GKO_DECLARE_BATCH_MULTI_VECTOR(_type) class MultiVector<_type>
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR);
 
 
+}  // namespace batch
 }  // namespace gko
diff --git a/core/base/batch_multi_vector_kernels.hpp b/core/base/batch_multi_vector_kernels.hpp
index 6eba9eac829..8603a2b9055 100644
--- a/core/base/batch_multi_vector_kernels.hpp
+++ b/core/base/batch_multi_vector_kernels.hpp
@@ -51,36 +51,36 @@ namespace kernels {
 
 #define GKO_DECLARE_BATCH_MULTI_VECTOR_SCALE_KERNEL(_type)  \
     void scale(std::shared_ptr<const DefaultExecutor> exec, \
-               const BatchMultiVector<_type>* alpha,        \
-               BatchMultiVector<_type>* x)
+               const batch::MultiVector<_type>* alpha,      \
+               batch::MultiVector<_type>* x)
 
 #define GKO_DECLARE_BATCH_MULTI_VECTOR_ADD_SCALED_KERNEL(_type)  \
     void add_scaled(std::shared_ptr<const DefaultExecutor> exec, \
-                    const BatchMultiVector<_type>* alpha,        \
-                    const BatchMultiVector<_type>* x,            \
-                    BatchMultiVector<_type>* y)
+                    const batch::MultiVector<_type>* alpha,      \
+                    const batch::MultiVector<_type>* x,          \
+                    batch::MultiVector<_type>* y)
 
 #define GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_DOT_KERNEL(_type)  \
     void compute_dot(std::shared_ptr<const DefaultExecutor> exec, \
-                     const BatchMultiVector<_type>* x,            \
-                     const BatchMultiVector<_type>* y,            \
-                     BatchMultiVector<_type>* result)
+                     const batch::MultiVector<_type>* x,          \
+                     const batch::MultiVector<_type>* y,          \
+                     batch::MultiVector<_type>* result)
 
 #define GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_CONJ_DOT_KERNEL(_type)  \
     void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec, \
-                          const BatchMultiVector<_type>* x,            \
-                          const BatchMultiVector<_type>* y,            \
-                          BatchMultiVector<_type>* result)
+                          const batch::MultiVector<_type>* x,          \
+                          const batch::MultiVector<_type>* y,          \
+                          batch::MultiVector<_type>* result)
 
 #define GKO_DECLARE_BATCH_MULTI_VECTOR_COMPUTE_NORM2_KERNEL(_type)  \
     void compute_norm2(std::shared_ptr<const DefaultExecutor> exec, \
-                       const BatchMultiVector<_type>* x,            \
-                       BatchMultiVector<remove_complex<_type>>* result)
+                       const batch::MultiVector<_type>* x,          \
+                       batch::MultiVector<remove_complex<_type>>* result)
 
 #define GKO_DECLARE_BATCH_MULTI_VECTOR_COPY_KERNEL(_type)  \
     void copy(std::shared_ptr<const DefaultExecutor> exec, \
-              const BatchMultiVector<_type>* x,            \
-              BatchMultiVector<_type>* result)
+              const batch::MultiVector<_type>* x,          \
+              batch::MultiVector<_type>* result)
 
 
 #define GKO_DECLARE_ALL_AS_TEMPLATES                                   \
diff --git a/core/base/batch_struct.hpp b/core/base/batch_struct.hpp
index 9549c4eaaee..d22b64f3320 100644
--- a/core/base/batch_struct.hpp
+++ b/core/base/batch_struct.hpp
@@ -40,7 +40,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 namespace gko {
-namespace batch_multi_vector {
+namespace batch {
+namespace multi_vector {
 
 
 /**
@@ -77,24 +78,20 @@ struct uniform_batch {
 };
 
 
-}  // namespace batch_multi_vector
-
-
-namespace batch {
+}  // namespace multi_vector
 
 
 template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE gko::batch_multi_vector::batch_item<const ValueType>
-to_const(const gko::batch_multi_vector::batch_item<ValueType>& b)
+GKO_ATTRIBUTES GKO_INLINE multi_vector::batch_item<const ValueType> to_const(
+    const multi_vector::batch_item<ValueType>& b)
 {
     return {b.values, b.stride, b.num_rows, b.num_rhs};
 }
 
 
 template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE
-    gko::batch_multi_vector::uniform_batch<const ValueType>
-    to_const(const gko::batch_multi_vector::uniform_batch<ValueType>& ub)
+GKO_ATTRIBUTES GKO_INLINE multi_vector::uniform_batch<const ValueType> to_const(
+    const multi_vector::uniform_batch<ValueType>& ub)
 {
     return {ub.values, ub.num_batch_items, ub.stride, ub.num_rows, ub.num_rhs};
 }
@@ -110,8 +107,8 @@ GKO_ATTRIBUTES GKO_INLINE
  * @param batch_idx  The position of the desired object in the batch
  */
 template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE batch_multi_vector::batch_item<ValueType> batch_item(
-    const batch_multi_vector::uniform_batch<ValueType>& batch,
+GKO_ATTRIBUTES GKO_INLINE multi_vector::batch_item<ValueType> batch_item(
+    const multi_vector::uniform_batch<ValueType>& batch,
     const size_type batch_idx)
 {
     return {batch.values + batch_idx * batch.stride * batch.num_rows,
@@ -119,7 +116,7 @@ GKO_ATTRIBUTES GKO_INLINE batch_multi_vector::batch_item<ValueType> batch_item(
 }
 
 template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE batch_multi_vector::batch_item<ValueType> batch_item(
+GKO_ATTRIBUTES GKO_INLINE multi_vector::batch_item<ValueType> batch_item(
     ValueType* const batch_values, const int stride, const int num_rows,
     const int num_rhs, const size_type batch_idx)
 {
diff --git a/core/test/base/batch_multi_vector.cpp b/core/test/base/batch_multi_vector.cpp
index 844d4825a7a..e87cedca913 100644
--- a/core/test/base/batch_multi_vector.cpp
+++ b/core/test/base/batch_multi_vector.cpp
@@ -45,14 +45,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 template <typename T>
-class BatchMultiVector : public ::testing::Test {
+class MultiVector : public ::testing::Test {
 protected:
     using value_type = T;
     using DenseMtx = gko::matrix::Dense<value_type>;
     using size_type = gko::size_type;
-    BatchMultiVector()
+    MultiVector()
         : exec(gko::ReferenceExecutor::create()),
-          mtx(gko::batch_initialize<gko::BatchMultiVector<value_type>>(
+          mtx(gko::batch::initialize<gko::batch::MultiVector<value_type>>(
               {{{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
                {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}},
               exec))
@@ -60,7 +60,7 @@ class BatchMultiVector : public ::testing::Test {
 
 
     static void assert_equal_to_original_mtx(
-        const gko::BatchMultiVector<value_type>* m)
+        const gko::batch::MultiVector<value_type>* m)
     {
         ASSERT_NE(m->get_const_values(), nullptr);
         EXPECT_EQ(m->get_const_values()[0], value_type{-1.0});
@@ -80,7 +80,7 @@ class BatchMultiVector : public ::testing::Test {
         ASSERT_EQ(m->at(1, 1, 2), value_type{3.0});
     }
 
-    static void assert_empty(gko::BatchMultiVector<value_type>* m)
+    static void assert_empty(gko::batch::MultiVector<value_type>* m)
     {
         ASSERT_EQ(m->get_num_batch_items(), 0);
         ASSERT_EQ(m->get_common_size(), gko::dim<2>{});
@@ -88,21 +88,21 @@ class BatchMultiVector : public ::testing::Test {
     }
 
     std::shared_ptr<const gko::Executor> exec;
-    std::unique_ptr<gko::BatchMultiVector<value_type>> mtx;
+    std::unique_ptr<gko::batch::MultiVector<value_type>> mtx;
 };
 
-TYPED_TEST_SUITE(BatchMultiVector, gko::test::ValueTypes);
+TYPED_TEST_SUITE(MultiVector, gko::test::ValueTypes);
 
 
-TYPED_TEST(BatchMultiVector, CanBeEmpty)
+TYPED_TEST(MultiVector, CanBeEmpty)
 {
-    auto empty = gko::BatchMultiVector<TypeParam>::create(this->exec);
+    auto empty = gko::batch::MultiVector<TypeParam>::create(this->exec);
 
     this->assert_empty(empty.get());
 }
 
 
-TYPED_TEST(BatchMultiVector, KnowsItsSizeAndValues)
+TYPED_TEST(MultiVector, KnowsItsSizeAndValues)
 {
     ASSERT_NE(this->mtx->get_const_values(), nullptr);
 
@@ -110,7 +110,7 @@ TYPED_TEST(BatchMultiVector, KnowsItsSizeAndValues)
 }
 
 
-TYPED_TEST(BatchMultiVector, CanGetValuesForEntry)
+TYPED_TEST(MultiVector, CanGetValuesForEntry)
 {
     using value_type = typename TestFixture::value_type;
 
@@ -118,9 +118,9 @@ TYPED_TEST(BatchMultiVector, CanGetValuesForEntry)
 }
 
 
-TYPED_TEST(BatchMultiVector, CanBeCopied)
+TYPED_TEST(MultiVector, CanBeCopied)
 {
-    auto mtx_copy = gko::BatchMultiVector<TypeParam>::create(this->exec);
+    auto mtx_copy = gko::batch::MultiVector<TypeParam>::create(this->exec);
 
     mtx_copy->copy_from(this->mtx.get());
 
@@ -131,9 +131,9 @@ TYPED_TEST(BatchMultiVector, CanBeCopied)
 }
 
 
-TYPED_TEST(BatchMultiVector, CanBeMoved)
+TYPED_TEST(MultiVector, CanBeMoved)
 {
-    auto mtx_copy = gko::BatchMultiVector<TypeParam>::create(this->exec);
+    auto mtx_copy = gko::batch::MultiVector<TypeParam>::create(this->exec);
 
     this->mtx->move_to(mtx_copy.get());
 
@@ -141,7 +141,7 @@ TYPED_TEST(BatchMultiVector, CanBeMoved)
 }
 
 
-TYPED_TEST(BatchMultiVector, CanBeCloned)
+TYPED_TEST(MultiVector, CanBeCloned)
 {
     auto mtx_clone = this->mtx->clone();
 
@@ -150,7 +150,7 @@ TYPED_TEST(BatchMultiVector, CanBeCloned)
 }
 
 
-TYPED_TEST(BatchMultiVector, CanBeCleared)
+TYPED_TEST(MultiVector, CanBeCleared)
 {
     this->mtx->clear();
 
@@ -158,11 +158,11 @@ TYPED_TEST(BatchMultiVector, CanBeCleared)
 }
 
 
-TYPED_TEST(BatchMultiVector, CanBeConstructedWithSize)
+TYPED_TEST(MultiVector, CanBeConstructedWithSize)
 {
     using size_type = gko::size_type;
 
-    auto m = gko::BatchMultiVector<TypeParam>::create(
+    auto m = gko::batch::MultiVector<TypeParam>::create(
         this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 4)));
 
     ASSERT_EQ(m->get_num_batch_items(), 2);
@@ -170,7 +170,7 @@ TYPED_TEST(BatchMultiVector, CanBeConstructedWithSize)
 }
 
 
-TYPED_TEST(BatchMultiVector, CanBeConstructedFromExistingData)
+TYPED_TEST(MultiVector, CanBeConstructedFromExistingData)
 {
     using value_type = typename TestFixture::value_type;
     using size_type = gko::size_type;
@@ -184,7 +184,7 @@ TYPED_TEST(BatchMultiVector, CanBeConstructedFromExistingData)
        6.0, -3.0};
     // clang-format on
 
-    auto m = gko::BatchMultiVector<TypeParam>::create(
+    auto m = gko::batch::MultiVector<TypeParam>::create(
         this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 2)),
         gko::array<value_type>::view(this->exec, 8, data));
 
@@ -200,7 +200,7 @@ TYPED_TEST(BatchMultiVector, CanBeConstructedFromExistingData)
 }
 
 
-TYPED_TEST(BatchMultiVector, CanBeConstructedFromExistingConstData)
+TYPED_TEST(MultiVector, CanBeConstructedFromExistingConstData)
 {
     using value_type = typename TestFixture::value_type;
     using size_type = gko::size_type;
@@ -214,7 +214,7 @@ TYPED_TEST(BatchMultiVector, CanBeConstructedFromExistingConstData)
        6.0, -3.0};
     // clang-format on
 
-    auto m = gko::BatchMultiVector<TypeParam>::create_const(
+    auto m = gko::batch::MultiVector<TypeParam>::create_const(
         this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 2)),
         gko::array<value_type>::const_view(this->exec, 8, data));
 
@@ -230,7 +230,7 @@ TYPED_TEST(BatchMultiVector, CanBeConstructedFromExistingConstData)
 }
 
 
-TYPED_TEST(BatchMultiVector, CanBeConstructedFromDenseMatrices)
+TYPED_TEST(MultiVector, CanBeConstructedFromDenseMatrices)
 {
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
@@ -240,14 +240,14 @@ TYPED_TEST(BatchMultiVector, CanBeConstructedFromDenseMatrices)
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
 
-    auto m = gko::BatchMultiVector<TypeParam>::create(
+    auto m = gko::batch::MultiVector<TypeParam>::create(
         this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get()});
 
     this->assert_equal_to_original_mtx(m.get());
 }
 
 
-TYPED_TEST(BatchMultiVector, CanBeConstructedFromDenseMatricesByDuplication)
+TYPED_TEST(MultiVector, CanBeConstructedFromDenseMatricesByDuplication)
 {
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
@@ -257,16 +257,16 @@ TYPED_TEST(BatchMultiVector, CanBeConstructedFromDenseMatricesByDuplication)
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
 
-    auto bat_m = gko::BatchMultiVector<TypeParam>::create(
+    auto bat_m = gko::batch::MultiVector<TypeParam>::create(
         this->exec, std::vector<DenseMtx*>{mat1.get(), mat1.get(), mat1.get()});
     auto m =
-        gko::BatchMultiVector<TypeParam>::create(this->exec, 3, mat1.get());
+        gko::batch::MultiVector<TypeParam>::create(this->exec, 3, mat1.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(bat_m.get(), m.get(), 1e-14);
 }
 
 
-TYPED_TEST(BatchMultiVector, CanBeConstructedFromBatchMultiVectorMatrices)
+TYPED_TEST(MultiVector, CanBeConstructedFromMultiVectorMatrices)
 {
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
@@ -275,23 +275,24 @@ TYPED_TEST(BatchMultiVector, CanBeConstructedFromBatchMultiVectorMatrices)
                                           this->exec);
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
-    auto m = gko::BatchMultiVector<TypeParam>::create(
+    auto m = gko::batch::MultiVector<TypeParam>::create(
         this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get()});
-    auto m_ref = gko::BatchMultiVector<TypeParam>::create(
+    auto m_ref = gko::batch::MultiVector<TypeParam>::create(
         this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get(), mat1.get(),
                                            mat2.get(), mat1.get(), mat2.get()});
 
-    auto m2 = gko::BatchMultiVector<TypeParam>::create(this->exec, 3, m.get());
+    auto m2 =
+        gko::batch::MultiVector<TypeParam>::create(this->exec, 3, m.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 1e-14);
 }
 
 
-TYPED_TEST(BatchMultiVector, CanBeListConstructed)
+TYPED_TEST(MultiVector, CanBeListConstructed)
 {
     using value_type = typename TestFixture::value_type;
 
-    auto m = gko::batch_initialize<gko::BatchMultiVector<TypeParam>>(
+    auto m = gko::batch::initialize<gko::batch::MultiVector<TypeParam>>(
         {{1.0, 2.0}, {1.0, 3.0}}, this->exec);
 
     ASSERT_EQ(m->get_num_batch_items(), 2);
@@ -303,11 +304,11 @@ TYPED_TEST(BatchMultiVector, CanBeListConstructed)
 }
 
 
-TYPED_TEST(BatchMultiVector, CanBeListConstructedByCopies)
+TYPED_TEST(MultiVector, CanBeListConstructedByCopies)
 {
     using value_type = typename TestFixture::value_type;
 
-    auto m = gko::batch_initialize<gko::BatchMultiVector<TypeParam>>(
+    auto m = gko::batch::initialize<gko::batch::MultiVector<TypeParam>>(
         2, I<value_type>({1.0, 2.0}), this->exec);
 
     ASSERT_EQ(m->get_num_batch_items(), 2);
@@ -319,12 +320,12 @@ TYPED_TEST(BatchMultiVector, CanBeListConstructedByCopies)
 }
 
 
-TYPED_TEST(BatchMultiVector, CanBeDoubleListConstructed)
+TYPED_TEST(MultiVector, CanBeDoubleListConstructed)
 {
     using value_type = typename TestFixture::value_type;
     using T = value_type;
 
-    auto m = gko::batch_initialize<gko::BatchMultiVector<TypeParam>>(
+    auto m = gko::batch::initialize<gko::batch::MultiVector<TypeParam>>(
         {{I<T>{1.0, 1.0, 0.0}, I<T>{2.0, 4.0, 3.0}, I<T>{3.0, 6.0, 1.0}},
          {I<T>{1.0, 2.0, -1.0}, I<T>{3.0, 4.0, -2.0}, I<T>{5.0, 6.0, -3.0}}},
         this->exec);
@@ -343,10 +344,10 @@ TYPED_TEST(BatchMultiVector, CanBeDoubleListConstructed)
 }
 
 
-TYPED_TEST(BatchMultiVector, CanBeFilledWithValue)
+TYPED_TEST(MultiVector, CanBeFilledWithValue)
 {
     using value_type = typename TestFixture::value_type;
-    auto m = gko::BatchMultiVector<TypeParam>::create(
+    auto m = gko::batch::MultiVector<TypeParam>::create(
         this->exec, gko::batch_dim<2>(2, gko::dim<2>(3, 1)));
 
     m->fill(value_type(2.0));
@@ -362,7 +363,7 @@ TYPED_TEST(BatchMultiVector, CanBeFilledWithValue)
 }
 
 
-TYPED_TEST(BatchMultiVector, CanBeUnbatchedIntoDenseMatrices)
+TYPED_TEST(MultiVector, CanBeUnbatchedIntoDenseMatrices)
 {
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
@@ -380,10 +381,10 @@ TYPED_TEST(BatchMultiVector, CanBeUnbatchedIntoDenseMatrices)
 }
 
 
-TYPED_TEST(BatchMultiVector, CanBeReadFromMatrixData)
+TYPED_TEST(MultiVector, CanBeReadFromMatrixData)
 {
     using value_type = typename TestFixture::value_type;
-    auto m = gko::BatchMultiVector<TypeParam>::create(this->exec);
+    auto m = gko::batch::MultiVector<TypeParam>::create(this->exec);
     // clang-format off
     m->read({gko::matrix_data<TypeParam>{{2, 2},
                                          {{0, 0, 1.0},
@@ -409,10 +410,10 @@ TYPED_TEST(BatchMultiVector, CanBeReadFromMatrixData)
 }
 
 
-TYPED_TEST(BatchMultiVector, CanBeReadFromSparseMatrixData)
+TYPED_TEST(MultiVector, CanBeReadFromSparseMatrixData)
 {
     using value_type = typename TestFixture::value_type;
-    auto m = gko::BatchMultiVector<TypeParam>::create(this->exec);
+    auto m = gko::batch::MultiVector<TypeParam>::create(this->exec);
 
     // clang-format off
     m->read({gko::matrix_data<TypeParam>{{2, 2},
@@ -437,7 +438,7 @@ TYPED_TEST(BatchMultiVector, CanBeReadFromSparseMatrixData)
 }
 
 
-TYPED_TEST(BatchMultiVector, GeneratesCorrectMatrixData)
+TYPED_TEST(MultiVector, GeneratesCorrectMatrixData)
 {
     using value_type = typename TestFixture::value_type;
     using tpl = typename gko::matrix_data<TypeParam>::nonzero_type;
diff --git a/core/test/utils/assertions.hpp b/core/test/utils/assertions.hpp
index 44da77244f7..bae78912a6c 100644
--- a/core/test/utils/assertions.hpp
+++ b/core/test/utils/assertions.hpp
@@ -752,7 +752,7 @@ ::testing::AssertionResult batch_matrices_near(
     std::initializer_list<std::initializer_list<T>> second, double tolerance)
 {
     auto second_mtx =
-        batch_initialize<BatchMultiVector<detail::remove_container<T>>>(
+        batch::initialize<batch::MultiVector<detail::remove_container<T>>>(
             second, first->get_executor()->get_master());
     return batch_matrices_near(
         first_expression, detail::remove_list_wrapper(second_expression),
diff --git a/cuda/base/batch_multi_vector_kernels.cu b/cuda/base/batch_multi_vector_kernels.cu
index 3e44b006552..7729d006b75 100644
--- a/cuda/base/batch_multi_vector_kernels.cu
+++ b/cuda/base/batch_multi_vector_kernels.cu
@@ -57,7 +57,7 @@ namespace gko {
 namespace kernels {
 namespace cuda {
 /**
- * @brief The BatchMultiVector matrix format namespace.
+ * @brief The MultiVector matrix format namespace.
  *
  * @ingroup batch_multi_vector
  */
diff --git a/cuda/base/batch_struct.hpp b/cuda/base/batch_struct.hpp
index 600cccc622b..715332418fb 100644
--- a/cuda/base/batch_struct.hpp
+++ b/cuda/base/batch_struct.hpp
@@ -62,8 +62,8 @@ namespace cuda {
  * Generates an immutable uniform batch struct from a batch of multi-vectors.
  */
 template <typename ValueType>
-inline gko::batch_multi_vector::uniform_batch<const cuda_type<ValueType>>
-get_batch_struct(const BatchMultiVector<ValueType>* const op)
+inline batch::multi_vector::uniform_batch<const cuda_type<ValueType>>
+get_batch_struct(const batch::MultiVector<ValueType>* const op)
 {
     return {as_cuda_type(op->get_const_values()), op->get_num_batch_items(),
             static_cast<int>(op->get_common_size()[1]),
@@ -75,8 +75,8 @@ get_batch_struct(const BatchMultiVector<ValueType>* const op)
  * Generates a uniform batch struct from a batch of multi-vectors.
  */
 template <typename ValueType>
-inline gko::batch_multi_vector::uniform_batch<cuda_type<ValueType>>
-get_batch_struct(BatchMultiVector<ValueType>* const op)
+inline batch::multi_vector::uniform_batch<cuda_type<ValueType>>
+get_batch_struct(batch::MultiVector<ValueType>* const op)
 {
     return {as_cuda_type(op->get_values()), op->get_num_batch_items(),
             static_cast<int>(op->get_common_size()[1]),
diff --git a/dpcpp/base/batch_multi_vector_kernels.dp.cpp b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
index 3542fc5ebad..f307b6ba240 100644
--- a/dpcpp/base/batch_multi_vector_kernels.dp.cpp
+++ b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
@@ -58,8 +58,8 @@ namespace gko {
 namespace kernels {
 namespace dpcpp {
 /**
- * @brief The BatchMultiVector matrix format namespace.
- * @ref BatchMultiVector
+ * @brief The MultiVector matrix format namespace.
+ * @ref MultiVector
  * @ingroup batch_multi_vector
  */
 namespace batch_multi_vector {
@@ -70,8 +70,8 @@ namespace batch_multi_vector {
 
 template <typename ValueType>
 void scale(std::shared_ptr<const DefaultExecutor> exec,
-           const BatchMultiVector<ValueType>* const alpha,
-           BatchMultiVector<ValueType>* const x)
+           const batch::MultiVector<ValueType>* const alpha,
+           batch::MultiVector<ValueType>* const x)
 {
     const auto alpha_ub = get_batch_struct(alpha);
     const auto x_ub = get_batch_struct(x);
@@ -118,9 +118,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 template <typename ValueType>
 void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
-                const BatchMultiVector<ValueType>* const alpha,
-                const BatchMultiVector<ValueType>* const x,
-                BatchMultiVector<ValueType>* const y)
+                const batch::MultiVector<ValueType>* const alpha,
+                const batch::MultiVector<ValueType>* const x,
+                batch::MultiVector<ValueType>* const y)
 {
     const size_type num_rows = x->get_common_size()[0];
     const size_type num_cols = x->get_common_size()[1];
@@ -170,9 +170,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 template <typename ValueType>
 void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
-                 const BatchMultiVector<ValueType>* const x,
-                 const BatchMultiVector<ValueType>* const y,
-                 BatchMultiVector<ValueType>* const result)
+                 const batch::MultiVector<ValueType>* const x,
+                 const batch::MultiVector<ValueType>* const y,
+                 batch::MultiVector<ValueType>* const result)
 {
     const auto x_ub = get_batch_struct(x);
     const auto y_ub = get_batch_struct(y);
@@ -209,9 +209,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 template <typename ValueType>
 void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
-                      const BatchMultiVector<ValueType>* const x,
-                      const BatchMultiVector<ValueType>* const y,
-                      BatchMultiVector<ValueType>* const result)
+                      const batch::MultiVector<ValueType>* const x,
+                      const batch::MultiVector<ValueType>* const y,
+                      batch::MultiVector<ValueType>* const result)
 {
     const auto x_ub = get_batch_struct(x);
     const auto y_ub = get_batch_struct(y);
@@ -248,8 +248,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 template <typename ValueType>
 void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
-                   const BatchMultiVector<ValueType>* const x,
-                   BatchMultiVector<remove_complex<ValueType>>* const result)
+                   const batch::MultiVector<ValueType>* const x,
+                   batch::MultiVector<remove_complex<ValueType>>* const result)
 {
     const auto x_ub = get_batch_struct(x);
     const auto res_ub = get_batch_struct(result);
@@ -282,8 +282,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 template <typename ValueType>
 void copy(std::shared_ptr<const DefaultExecutor> exec,
-          const BatchMultiVector<ValueType>* x,
-          BatchMultiVector<ValueType>* result)
+          const batch::MultiVector<ValueType>* x,
+          batch::MultiVector<ValueType>* result)
 {
     const auto x_ub = get_batch_struct(x);
     const auto result_ub = get_batch_struct(result);
diff --git a/dpcpp/base/batch_multi_vector_kernels.hpp.inc b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
index c328a50465a..22d00d780f9 100644
--- a/dpcpp/base/batch_multi_vector_kernels.hpp.inc
+++ b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
@@ -32,8 +32,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 template <typename ValueType, typename Mapping>
 __dpct_inline__ void scale_kernel(
-    const gko::batch_multi_vector::batch_item<const ValueType>& alpha,
-    const gko::batch_multi_vector::batch_item<ValueType>& x,
+    const gko::batch::multi_vector::batch_item<const ValueType>& alpha,
+    const gko::batch::multi_vector::batch_item<ValueType>& x,
     sycl::nd_item<3>& item_ct1, Mapping map)
 {
     const int max_li = x.num_rows * x.num_rhs;
@@ -50,9 +50,9 @@ __dpct_inline__ void scale_kernel(
 
 template <typename ValueType, typename Mapping>
 __dpct_inline__ void add_scaled_kernel(
-    const gko::batch_multi_vector::batch_item<const ValueType>& alpha,
-    const gko::batch_multi_vector::batch_item<const ValueType>& x,
-    const gko::batch_multi_vector::batch_item<ValueType>& y,
+    const gko::batch::multi_vector::batch_item<const ValueType>& alpha,
+    const gko::batch::multi_vector::batch_item<const ValueType>& x,
+    const gko::batch::multi_vector::batch_item<ValueType>& y,
     sycl::nd_item<3>& item_ct1, Mapping map)
 {
     const int max_li = x.num_rows * x.num_rhs;
@@ -69,9 +69,9 @@ __dpct_inline__ void add_scaled_kernel(
 
 template <typename ValueType, typename Mapping>
 __dpct_inline__ void compute_gen_dot_product_kernel(
-    const gko::batch_multi_vector::batch_item<const ValueType>& x,
-    const gko::batch_multi_vector::batch_item<const ValueType>& y,
-    const gko::batch_multi_vector::batch_item<ValueType>& result,
+    const gko::batch::multi_vector::batch_item<const ValueType>& x,
+    const gko::batch::multi_vector::batch_item<const ValueType>& y,
+    const gko::batch::multi_vector::batch_item<ValueType>& result,
     sycl::nd_item<3>& item_ct1, Mapping conj_map)
 {
     constexpr auto tile_size = config::warp_size;
@@ -104,8 +104,8 @@ __dpct_inline__ void compute_gen_dot_product_kernel(
 
 template <typename ValueType>
 __dpct_inline__ void compute_norm2_kernel(
-    const gko::batch_multi_vector::batch_item<const ValueType>& x,
-    const gko::batch_multi_vector::batch_item<remove_complex<ValueType>>&
+    const gko::batch::multi_vector::batch_item<const ValueType>& x,
+    const gko::batch::multi_vector::batch_item<remove_complex<ValueType>>&
         result,
     sycl::nd_item<3>& item_ct1)
 {
@@ -138,8 +138,8 @@ __dpct_inline__ void compute_norm2_kernel(
 
 template <typename ValueType>
 __dpct_inline__ void copy_kernel(
-    const gko::batch_multi_vector::batch_item<const ValueType>& in,
-    const gko::batch_multi_vector::batch_item<ValueType>& out,
+    const gko::batch::multi_vector::batch_item<const ValueType>& in,
+    const gko::batch::multi_vector::batch_item<ValueType>& out,
     sycl::nd_item<3>& item_ct1)
 {
     for (int iz = item_ct1.get_local_linear_id(); iz < in.num_rows * in.num_rhs;
diff --git a/dpcpp/base/batch_struct.hpp b/dpcpp/base/batch_struct.hpp
index ff3a6a87ade..9c752a94b4f 100644
--- a/dpcpp/base/batch_struct.hpp
+++ b/dpcpp/base/batch_struct.hpp
@@ -61,8 +61,8 @@ namespace dpcpp {
  * Generates an immutable uniform batch struct from a batch of multi-vectors.
  */
 template <typename ValueType>
-inline gko::batch_multi_vector::uniform_batch<const ValueType> get_batch_struct(
-    const BatchMultiVector<ValueType>* const op)
+inline batch::multi_vector::uniform_batch<const ValueType> get_batch_struct(
+    const batch::MultiVector<ValueType>* const op)
 {
     return {op->get_const_values(), op->get_num_batch_items(),
             static_cast<int>(op->get_common_size()[1]),
@@ -75,8 +75,8 @@ inline gko::batch_multi_vector::uniform_batch<const ValueType> get_batch_struct(
  * Generates a uniform batch struct from a batch of multi-vectors.
  */
 template <typename ValueType>
-inline gko::batch_multi_vector::uniform_batch<ValueType> get_batch_struct(
-    BatchMultiVector<ValueType>* const op)
+inline batch::multi_vector::uniform_batch<ValueType> get_batch_struct(
+    batch::MultiVector<ValueType>* const op)
 {
     return {op->get_values(), op->get_num_batch_items(),
             static_cast<int>(op->get_common_size()[1]),
diff --git a/hip/base/batch_multi_vector_kernels.hip.cpp b/hip/base/batch_multi_vector_kernels.hip.cpp
index bb465ac7709..f59d873840c 100644
--- a/hip/base/batch_multi_vector_kernels.hip.cpp
+++ b/hip/base/batch_multi_vector_kernels.hip.cpp
@@ -58,7 +58,7 @@ namespace gko {
 namespace kernels {
 namespace hip {
 /**
- * @brief The BatchMultiVector matrix format namespace.
+ * @brief The MultiVector matrix format namespace.
  *
  * @ingroup batch_multi_vector
  */
diff --git a/hip/base/batch_struct.hip.hpp b/hip/base/batch_struct.hip.hpp
index 1732505bc6f..442260e50e6 100644
--- a/hip/base/batch_struct.hip.hpp
+++ b/hip/base/batch_struct.hip.hpp
@@ -62,8 +62,8 @@ namespace hip {
  * Generates an immutable uniform batch struct from a batch of multi-vectors.
  */
 template <typename ValueType>
-inline gko::batch_multi_vector::uniform_batch<const hip_type<ValueType>>
-get_batch_struct(const BatchMultiVector<ValueType>* const op)
+inline batch::multi_vector::uniform_batch<const hip_type<ValueType>>
+get_batch_struct(const batch::MultiVector<ValueType>* const op)
 {
     return {as_hip_type(op->get_const_values()), op->get_num_batch_items(),
             static_cast<int>(op->get_common_size()[1]),
@@ -75,8 +75,8 @@ get_batch_struct(const BatchMultiVector<ValueType>* const op)
  * Generates a uniform batch struct from a batch of multi-vectors.
  */
 template <typename ValueType>
-inline gko::batch_multi_vector::uniform_batch<hip_type<ValueType>>
-get_batch_struct(BatchMultiVector<ValueType>* const op)
+inline batch::multi_vector::uniform_batch<hip_type<ValueType>> get_batch_struct(
+    batch::MultiVector<ValueType>* const op)
 {
     return {as_hip_type(op->get_values()), op->get_num_batch_items(),
             static_cast<int>(op->get_common_size()[1]),
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index a502a701307..0e011f6b3ef 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -52,9 +52,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 namespace gko {
+namespace batch {
 
 /**
- * BatchMultiVector stores multiple vectors in a batched fashion and is useful
+ * MultiVector stores multiple vectors in a batched fashion and is useful
  * for batched operations. For example, if you want to store two batch items
  * with multi-vectors of size (3 x 2) given below:
  *
@@ -77,50 +78,49 @@ namespace gko {
  * @ingroup batched
  */
 template <typename ValueType = default_precision>
-class BatchMultiVector
-    : public EnablePolymorphicObject<BatchMultiVector<ValueType>>,
-      public EnablePolymorphicAssignment<BatchMultiVector<ValueType>>,
-      public EnableCreateMethod<BatchMultiVector<ValueType>>,
-      public ConvertibleTo<BatchMultiVector<next_precision<ValueType>>>,
+class MultiVector
+    : public EnablePolymorphicObject<MultiVector<ValueType>>,
+      public EnablePolymorphicAssignment<MultiVector<ValueType>>,
+      public EnableCreateMethod<MultiVector<ValueType>>,
+      public ConvertibleTo<MultiVector<next_precision<ValueType>>>,
       public BatchReadableFromMatrixData<ValueType, int32>,
       public BatchReadableFromMatrixData<ValueType, int64>,
       public BatchWritableToMatrixData<ValueType, int32>,
       public BatchWritableToMatrixData<ValueType, int64> {
-    friend class EnableCreateMethod<BatchMultiVector>;
-    friend class EnablePolymorphicObject<BatchMultiVector>;
-    friend class BatchMultiVector<to_complex<ValueType>>;
-    friend class BatchMultiVector<next_precision<ValueType>>;
+    friend class EnableCreateMethod<MultiVector>;
+    friend class EnablePolymorphicObject<MultiVector>;
+    friend class MultiVector<to_complex<ValueType>>;
+    friend class MultiVector<next_precision<ValueType>>;
 
 public:
     using BatchReadableFromMatrixData<ValueType, int32>::read;
     using BatchReadableFromMatrixData<ValueType, int64>::read;
-    using EnablePolymorphicAssignment<BatchMultiVector>::convert_to;
-    using EnablePolymorphicAssignment<BatchMultiVector>::move_to;
-    using ConvertibleTo<
-        BatchMultiVector<next_precision<ValueType>>>::convert_to;
-    using ConvertibleTo<BatchMultiVector<next_precision<ValueType>>>::move_to;
+    using EnablePolymorphicAssignment<MultiVector>::convert_to;
+    using EnablePolymorphicAssignment<MultiVector>::move_to;
+    using ConvertibleTo<MultiVector<next_precision<ValueType>>>::convert_to;
+    using ConvertibleTo<MultiVector<next_precision<ValueType>>>::move_to;
 
     using value_type = ValueType;
     using index_type = int32;
     using unbatch_type = matrix::Dense<ValueType>;
     using mat_data = matrix_data<ValueType, int32>;
     using mat_data64 = matrix_data<ValueType, int64>;
-    using absolute_type = remove_complex<BatchMultiVector<ValueType>>;
-    using complex_type = to_complex<BatchMultiVector<ValueType>>;
+    using absolute_type = remove_complex<MultiVector<ValueType>>;
+    using complex_type = to_complex<MultiVector<ValueType>>;
 
     /**
-     * Creates a BatchMultiVector with the configuration of another
-     * BatchMultiVector.
+     * Creates a MultiVector with the configuration of another
+     * MultiVector.
      *
      * @param other  The other multi-vector whose configuration needs to copied.
      */
-    static std::unique_ptr<BatchMultiVector> create_with_config_of(
-        ptr_param<const BatchMultiVector> other);
+    static std::unique_ptr<MultiVector> create_with_config_of(
+        ptr_param<const MultiVector> other);
 
     void convert_to(
-        BatchMultiVector<next_precision<ValueType>>* result) const override;
+        MultiVector<next_precision<ValueType>>* result) const override;
 
-    void move_to(BatchMultiVector<next_precision<ValueType>>* result) override;
+    void move_to(MultiVector<next_precision<ValueType>>* result) override;
 
     void read(const std::vector<mat_data>& data) override;
 
@@ -246,7 +246,7 @@ class BatchMultiVector
     }
 
     /**
-     * @copydoc BatchMultiVector::at(size_type, size_type, size_type)
+     * @copydoc MultiVector::at(size_type, size_type, size_type)
      */
     value_type at(size_type batch_id, size_type row, size_type col) const
     {
@@ -274,7 +274,7 @@ class BatchMultiVector
     }
 
     /**
-     * @copydoc BatchMultiVector::at(size_type, size_type, size_type)
+     * @copydoc MultiVector::at(size_type, size_type, size_type)
      */
     ValueType at(size_type batch_id, size_type idx) const noexcept
     {
@@ -286,13 +286,13 @@ class BatchMultiVector
      *
      * @param alpha  the scalar
      *
-     * @note If alpha is 1x1 BatchMultiVector matrix, the entire multi-vector
-     *      (all batches) is scaled by alpha. If it is a BatchMultiVector row
+     * @note If alpha is 1x1 MultiVector matrix, the entire multi-vector
+     *      (all batches) is scaled by alpha. If it is a MultiVector row
      *      vector of values, then i-th column of the vector is scaled with the
      *      i-th element of alpha (the number of columns of alpha has to match
      *      the number of columns of the multi-vector).
      */
-    void scale(ptr_param<const BatchMultiVector<ValueType>> alpha);
+    void scale(ptr_param<const MultiVector<ValueType>> alpha);
 
     /**
      * Adds `b` scaled by `alpha` to the vector (aka: BLAS axpy).
@@ -300,48 +300,48 @@ class BatchMultiVector
      * @param alpha  the scalar
      * @param b  a multi-vector of the same dimension as this
      *
-     * @note If alpha is 1x1 BatchMultiVector matrix, the entire multi-vector
-     *      (all batches) is scaled by alpha. If it is a BatchMultiVector row
+     * @note If alpha is 1x1 MultiVector matrix, the entire multi-vector
+     *      (all batches) is scaled by alpha. If it is a MultiVector row
      *      vector of values, then i-th column of the vector is scaled with the
      *      i-th element of alpha (the number of columns of alpha has to match
      *      the number of columns of the multi-vector).
      */
-    void add_scaled(ptr_param<const BatchMultiVector<ValueType>> alpha,
-                    ptr_param<const BatchMultiVector<ValueType>> b);
+    void add_scaled(ptr_param<const MultiVector<ValueType>> alpha,
+                    ptr_param<const MultiVector<ValueType>> b);
 
     /**
      * Computes the column-wise dot product of each multi-vector in this batch
      * and its corresponding entry in `b`.
      *
-     * @param b  a BatchMultiVector of same dimension as this
-     * @param result  a BatchMultiVector row vector, used to store the dot
+     * @param b  a MultiVector of same dimension as this
+     * @param result  a MultiVector row vector, used to store the dot
      * product
      */
-    void compute_dot(ptr_param<const BatchMultiVector<ValueType>> b,
-                     ptr_param<BatchMultiVector<ValueType>> result) const;
+    void compute_dot(ptr_param<const MultiVector<ValueType>> b,
+                     ptr_param<MultiVector<ValueType>> result) const;
 
     /**
      * Computes the column-wise conjugate dot product of each multi-vector in
      * this batch and its corresponding entry in `b`. If the vector has complex
      * value_type, then the conjugate of this is taken.
      *
-     * @param b  a BatchMultiVector of same dimension as this
-     * @param result  a BatchMultiVector row vector, used to store the dot
+     * @param b  a MultiVector of same dimension as this
+     * @param result  a MultiVector row vector, used to store the dot
      *                product (the number of column in the vector must match the
      *                number of columns of this)
      */
-    void compute_conj_dot(ptr_param<const BatchMultiVector<ValueType>> b,
-                          ptr_param<BatchMultiVector<ValueType>> result) const;
+    void compute_conj_dot(ptr_param<const MultiVector<ValueType>> b,
+                          ptr_param<MultiVector<ValueType>> result) const;
 
     /**
      * Computes the Euclidean (L^2) norm of each multi-vector in this batch.
      *
-     * @param result  a BatchMultiVector, used to store the norm
+     * @param result  a MultiVector, used to store the norm
      *                (the number of columns in the vector must match the number
      *                of columns of this)
      */
     void compute_norm2(
-        ptr_param<BatchMultiVector<remove_complex<ValueType>>> result) const;
+        ptr_param<MultiVector<remove_complex<ValueType>>> result) const;
 
     /**
      * Creates a constant (immutable) batch multi-vector from a constant
@@ -356,12 +356,12 @@ class BatchMultiVector
      * array (if it resides on the same executor as the vector) or a copy of the
      * array on the correct executor.
      */
-    static std::unique_ptr<const BatchMultiVector<ValueType>> create_const(
+    static std::unique_ptr<const MultiVector<ValueType>> create_const(
         std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
         gko::detail::const_array_view<ValueType>&& values);
 
     /**
-     * Fills the input BatchMultiVector with a given value
+     * Fills the input MultiVector with a given value
      *
      * @param value  the value to be filled
      */
@@ -375,7 +375,7 @@ class BatchMultiVector
 
 protected:
     /**
-     * Sets the size of the BatchMultiVector.
+     * Sets the size of the MultiVector.
      *
      * @param value  the new size of the operator
      */
@@ -388,11 +388,11 @@ class BatchMultiVector
      * @param exec  Executor associated to the vector
      * @param size  size of the batch multi vector
      */
-    BatchMultiVector(std::shared_ptr<const Executor> exec,
-                     const batch_dim<2>& size = batch_dim<2>{});
+    MultiVector(std::shared_ptr<const Executor> exec,
+                const batch_dim<2>& size = batch_dim<2>{});
 
     /**
-     * Creates a BatchMultiVector from an already allocated (and
+     * Creates a MultiVector from an already allocated (and
      * initialized) array.
      *
      * @tparam ValuesArray  type of array of values
@@ -406,9 +406,9 @@ class BatchMultiVector
      *       original array data will not be used in the vector.
      */
     template <typename ValuesArray>
-    BatchMultiVector(std::shared_ptr<const Executor> exec,
-                     const batch_dim<2>& size, ValuesArray&& values)
-        : EnablePolymorphicObject<BatchMultiVector<ValueType>>(exec),
+    MultiVector(std::shared_ptr<const Executor> exec, const batch_dim<2>& size,
+                ValuesArray&& values)
+        : EnablePolymorphicObject<MultiVector<ValueType>>(exec),
           batch_size_(size),
           values_{exec, std::forward<ValuesArray>(values)}
     {
@@ -418,7 +418,7 @@ class BatchMultiVector
     }
 
     /**
-     * Creates a BatchMultiVector from a vector of matrices
+     * Creates a MultiVector from a vector of matrices
      *
      * @param exec  Executor associated to the vector
      * @param matrices  The matrix::Dense objects that need to be batched.
@@ -429,11 +429,11 @@ class BatchMultiVector
      * allocations and deep copies are necessary and hence this constructor must
      * not be used in performance sensitive applications
      */
-    BatchMultiVector(std::shared_ptr<const Executor> exec,
-                     const std::vector<matrix::Dense<ValueType>*>& matrices);
+    MultiVector(std::shared_ptr<const Executor> exec,
+                const std::vector<matrix::Dense<ValueType>*>& matrices);
 
     /**
-     * Creates a BatchMultiVector matrix by duplicating BatchMultiVector object
+     * Creates a MultiVector matrix by duplicating MultiVector object
      *
      * @param exec  Executor associated to the vector
      * @param num_duplications  The number of times to duplicate
@@ -445,29 +445,29 @@ class BatchMultiVector
      * allocations and deep copies are necessary and hence this constructor must
      * not be used in performance sensitive applications.
      */
-    BatchMultiVector(std::shared_ptr<const Executor> exec,
-                     size_type num_duplications,
-                     const BatchMultiVector<value_type>* input);
+    MultiVector(std::shared_ptr<const Executor> exec,
+                size_type num_duplications,
+                const MultiVector<value_type>* input);
 
     /**
-     * Creates a BatchMultiVector matrix by a duplicating a matrix::Dense object
+     * Creates a MultiVector matrix by a duplicating a matrix::Dense object
      *
      * @param exec  Executor associated to the vector
      * @param num_duplications  The number of times to duplicate
      * @param input  the matrix to be duplicated.
      */
-    BatchMultiVector(std::shared_ptr<const Executor> exec,
-                     size_type num_duplications,
-                     const matrix::Dense<value_type>* input);
+    MultiVector(std::shared_ptr<const Executor> exec,
+                size_type num_duplications,
+                const matrix::Dense<value_type>* input);
 
     /**
-     * Creates a BatchMultiVector with the same configuration as the
+     * Creates a MultiVector with the same configuration as the
      * callers object.
      *
-     * @returns a BatchMultiVector with the same configuration as the
+     * @returns a MultiVector with the same configuration as the
      * caller.
      */
-    std::unique_ptr<BatchMultiVector> create_with_same_config() const;
+    std::unique_ptr<MultiVector> create_with_same_config() const;
 
     size_type linearize_index(size_type batch, size_type row,
                               size_type col) const noexcept
@@ -491,11 +491,11 @@ class BatchMultiVector
 /**
  * Creates and initializes a batch of single column-vectors.
  *
- * This function first creates a temporary BatchMultiVector, fills it with
+ * This function first creates a temporary MultiVector, fills it with
  * passed in values, and then converts the vector to the requested type.
  *
  * @tparam Matrix  matrix type to initialize
- *                 (BatchMultiVector has to implement the ConvertibleTo<Matrix>
+ *                 (MultiVector has to implement the ConvertibleTo<Matrix>
  *                 interface)
  * @tparam TArgs  argument types for Matrix::create method
  *                (not including the implied Executor as the first argument)
@@ -506,16 +506,16 @@ class BatchMultiVector
  *                     including the Executor, which is passed as the first
  *                     argument
  *
- * @ingroup BatchMultiVector
+ * @ingroup MultiVector
  * @ingroup mat_formats
  */
 template <typename Matrix, typename... TArgs>
-std::unique_ptr<Matrix> batch_initialize(
+std::unique_ptr<Matrix> initialize(
     std::initializer_list<std::initializer_list<typename Matrix::value_type>>
         vals,
     std::shared_ptr<const Executor> exec, TArgs&&... create_args)
 {
-    using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
+    using batch_multi_vector = MultiVector<typename Matrix::value_type>;
     size_type num_batch_items = vals.size();
     GKO_THROW_IF_INVALID(num_batch_items > 0, "Input data is empty");
     auto vals_begin = begin(vals);
@@ -544,7 +544,7 @@ std::unique_ptr<Matrix> batch_initialize(
 /**
  * Creates and initializes a batch of multi-vectors.
  *
- * This function first creates a temporary BatchMultiVector, fills it with
+ * This function first creates a temporary MultiVector, fills it with
  * passed in values, and then converts the vector to the requested type.
  *
  * @tparam Matrix  matrix type to initialize
@@ -558,17 +558,17 @@ std::unique_ptr<Matrix> batch_initialize(
  *                     including the Executor, which is passed as the first
  *                     argument
  *
- * @ingroup BatchMultiVector
+ * @ingroup MultiVector
  * @ingroup mat_formats
  */
 template <typename Matrix, typename... TArgs>
-std::unique_ptr<Matrix> batch_initialize(
+std::unique_ptr<Matrix> initialize(
     std::initializer_list<std::initializer_list<
         std::initializer_list<typename Matrix::value_type>>>
         vals,
     std::shared_ptr<const Executor> exec, TArgs&&... create_args)
 {
-    using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
+    using batch_multi_vector = MultiVector<typename Matrix::value_type>;
     size_type num_batch_items = vals.size();
     GKO_THROW_IF_INVALID(num_batch_items > 0, "Input data is empty");
     auto vals_begin = begin(vals);
@@ -612,7 +612,7 @@ std::unique_ptr<Matrix> batch_initialize(
  * passed in values, and then converts the vector to the requested type.
  *
  * @tparam Matrix  matrix type to initialize
- *                 (BatchMultiVector has to implement the ConvertibleTo<Matrix>
+ *                 (MultiVector has to implement the ConvertibleTo<Matrix>
  *                  interface)
  * @tparam TArgs  argument types for Matrix::create method
  *                (not including the implied Executor as the first argument)
@@ -624,16 +624,16 @@ std::unique_ptr<Matrix> batch_initialize(
  *                     including the Executor, which is passed as the first
  *                     argument
  *
- * @ingroup BatchMultiVector
+ * @ingroup MultiVector
  * @ingroup mat_formats
  */
 template <typename Matrix, typename... TArgs>
-std::unique_ptr<Matrix> batch_initialize(
+std::unique_ptr<Matrix> initialize(
     const size_type num_vectors,
     std::initializer_list<typename Matrix::value_type> vals,
     std::shared_ptr<const Executor> exec, TArgs&&... create_args)
 {
-    using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
+    using batch_multi_vector = MultiVector<typename Matrix::value_type>;
     size_type num_batch_items = num_vectors;
     GKO_THROW_IF_INVALID(num_batch_items > 0 && vals.size() > 0,
                          "Input data is empty");
@@ -660,7 +660,7 @@ std::unique_ptr<Matrix> batch_initialize(
  * passed in values, and then converts the vector to the requested type.
  *
  * @tparam Matrix  matrix type to initialize
- *                 (BatchMultiVector has to implement the ConvertibleTo<Matrix>
+ *                 (MultiVector has to implement the ConvertibleTo<Matrix>
  *                  interface)
  * @tparam TArgs  argument types for Matrix::create method
  *                (not including the implied Executor as the first argument)
@@ -676,13 +676,13 @@ std::unique_ptr<Matrix> batch_initialize(
  * @ingroup mat_formats
  */
 template <typename Matrix, typename... TArgs>
-std::unique_ptr<Matrix> batch_initialize(
+std::unique_ptr<Matrix> initialize(
     const size_type num_batch_items,
     std::initializer_list<std::initializer_list<typename Matrix::value_type>>
         vals,
     std::shared_ptr<const Executor> exec, TArgs&&... create_args)
 {
-    using batch_multi_vector = BatchMultiVector<typename Matrix::value_type>;
+    using batch_multi_vector = MultiVector<typename Matrix::value_type>;
     GKO_THROW_IF_INVALID(num_batch_items > 0 && vals.size() > 0,
                          "Input data is empty");
     auto common_size = dim<2>(begin(vals) ? vals.size() : 0,
@@ -706,6 +706,7 @@ std::unique_ptr<Matrix> batch_initialize(
 }
 
 
+}  // namespace batch
 }  // namespace gko
 
 
diff --git a/omp/base/batch_multi_vector_kernels.cpp b/omp/base/batch_multi_vector_kernels.cpp
index 057efe5f05c..deef105db0d 100644
--- a/omp/base/batch_multi_vector_kernels.cpp
+++ b/omp/base/batch_multi_vector_kernels.cpp
@@ -49,8 +49,8 @@ namespace gko {
 namespace kernels {
 namespace omp {
 /**
- * @brief The BatchMultiVector matrix format namespace.
- * @ref BatchMultiVector
+ * @brief The batch::MultiVector matrix format namespace.
+ * @ref batch::MultiVector
  * @ingroup batch_multi_vector
  */
 namespace batch_multi_vector {
@@ -61,8 +61,8 @@ namespace batch_multi_vector {
 
 template <typename ValueType>
 void scale(std::shared_ptr<const DefaultExecutor> exec,
-           const BatchMultiVector<ValueType>* const alpha,
-           BatchMultiVector<ValueType>* const x)
+           const batch::MultiVector<ValueType>* const alpha,
+           batch::MultiVector<ValueType>* const x)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto alpha_ub = host::get_batch_struct(alpha);
@@ -80,9 +80,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 template <typename ValueType>
 void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
-                const BatchMultiVector<ValueType>* const alpha,
-                const BatchMultiVector<ValueType>* const x,
-                BatchMultiVector<ValueType>* const y)
+                const batch::MultiVector<ValueType>* const alpha,
+                const batch::MultiVector<ValueType>* const x,
+                batch::MultiVector<ValueType>* const y)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto y_ub = host::get_batch_struct(y);
@@ -102,9 +102,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 template <typename ValueType>
 void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
-                 const BatchMultiVector<ValueType>* const x,
-                 const BatchMultiVector<ValueType>* const y,
-                 BatchMultiVector<ValueType>* const result)
+                 const batch::MultiVector<ValueType>* const x,
+                 const batch::MultiVector<ValueType>* const y,
+                 batch::MultiVector<ValueType>* const result)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto y_ub = host::get_batch_struct(y);
@@ -124,9 +124,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 template <typename ValueType>
 void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
-                      const BatchMultiVector<ValueType>* const x,
-                      const BatchMultiVector<ValueType>* const y,
-                      BatchMultiVector<ValueType>* const result)
+                      const batch::MultiVector<ValueType>* const x,
+                      const batch::MultiVector<ValueType>* const y,
+                      batch::MultiVector<ValueType>* const result)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto y_ub = host::get_batch_struct(y);
@@ -146,8 +146,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 template <typename ValueType>
 void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
-                   const BatchMultiVector<ValueType>* const x,
-                   BatchMultiVector<remove_complex<ValueType>>* const result)
+                   const batch::MultiVector<ValueType>* const x,
+                   batch::MultiVector<remove_complex<ValueType>>* const result)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto res_ub = host::get_batch_struct(result);
@@ -165,8 +165,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 template <typename ValueType>
 void copy(std::shared_ptr<const DefaultExecutor> exec,
-          const BatchMultiVector<ValueType>* x,
-          BatchMultiVector<ValueType>* result)
+          const batch::MultiVector<ValueType>* x,
+          batch::MultiVector<ValueType>* result)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto result_ub = host::get_batch_struct(result);
diff --git a/reference/base/batch_multi_vector_kernels.cpp b/reference/base/batch_multi_vector_kernels.cpp
index b5cdb03d214..076fd87778d 100644
--- a/reference/base/batch_multi_vector_kernels.cpp
+++ b/reference/base/batch_multi_vector_kernels.cpp
@@ -49,8 +49,8 @@ namespace gko {
 namespace kernels {
 namespace reference {
 /**
- * @brief The BatchMultiVector matrix format namespace.
- * @ref BatchMultiVector
+ * @brief The batch::MultiVector matrix format namespace.
+ * @ref batch::MultiVector
  * @ingroup batch_multi_vector
  */
 namespace batch_multi_vector {
@@ -61,14 +61,14 @@ namespace batch_multi_vector {
 
 template <typename ValueType>
 void scale(std::shared_ptr<const DefaultExecutor> exec,
-           const BatchMultiVector<ValueType>* alpha,
-           BatchMultiVector<ValueType>* x)
+           const batch::MultiVector<ValueType>* alpha,
+           batch::MultiVector<ValueType>* x)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto alpha_ub = host::get_batch_struct(alpha);
     for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
-        const auto alpha_b = gko::batch::batch_item(alpha_ub, batch);
-        const auto x_b = gko::batch::batch_item(x_ub, batch);
+        const auto alpha_b = batch::batch_item(alpha_ub, batch);
+        const auto x_b = batch::batch_item(x_ub, batch);
         scale_kernel(alpha_b, x_b);
     }
 }
@@ -79,17 +79,17 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 template <typename ValueType>
 void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
-                const BatchMultiVector<ValueType>* alpha,
-                const BatchMultiVector<ValueType>* x,
-                BatchMultiVector<ValueType>* y)
+                const batch::MultiVector<ValueType>* alpha,
+                const batch::MultiVector<ValueType>* x,
+                batch::MultiVector<ValueType>* y)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto y_ub = host::get_batch_struct(y);
     const auto alpha_ub = host::get_batch_struct(alpha);
     for (size_type batch = 0; batch < y->get_num_batch_items(); ++batch) {
-        const auto alpha_b = gko::batch::batch_item(alpha_ub, batch);
-        const auto x_b = gko::batch::batch_item(x_ub, batch);
-        const auto y_b = gko::batch::batch_item(y_ub, batch);
+        const auto alpha_b = batch::batch_item(alpha_ub, batch);
+        const auto x_b = batch::batch_item(x_ub, batch);
+        const auto y_b = batch::batch_item(y_ub, batch);
         add_scaled_kernel(alpha_b, x_b, y_b);
     }
 }
@@ -100,17 +100,17 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 template <typename ValueType>
 void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
-                 const BatchMultiVector<ValueType>* x,
-                 const BatchMultiVector<ValueType>* y,
-                 BatchMultiVector<ValueType>* result)
+                 const batch::MultiVector<ValueType>* x,
+                 const batch::MultiVector<ValueType>* y,
+                 batch::MultiVector<ValueType>* result)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto y_ub = host::get_batch_struct(y);
     const auto res_ub = host::get_batch_struct(result);
     for (size_type batch = 0; batch < result->get_num_batch_items(); ++batch) {
-        const auto res_b = gko::batch::batch_item(res_ub, batch);
-        const auto x_b = gko::batch::batch_item(x_ub, batch);
-        const auto y_b = gko::batch::batch_item(y_ub, batch);
+        const auto res_b = batch::batch_item(res_ub, batch);
+        const auto x_b = batch::batch_item(x_ub, batch);
+        const auto y_b = batch::batch_item(y_ub, batch);
         compute_dot_product_kernel(x_b, y_b, res_b);
     }
 }
@@ -121,17 +121,17 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 template <typename ValueType>
 void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
-                      const BatchMultiVector<ValueType>* x,
-                      const BatchMultiVector<ValueType>* y,
-                      BatchMultiVector<ValueType>* result)
+                      const batch::MultiVector<ValueType>* x,
+                      const batch::MultiVector<ValueType>* y,
+                      batch::MultiVector<ValueType>* result)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto y_ub = host::get_batch_struct(y);
     const auto res_ub = host::get_batch_struct(result);
     for (size_type batch = 0; batch < result->get_num_batch_items(); ++batch) {
-        const auto res_b = gko::batch::batch_item(res_ub, batch);
-        const auto x_b = gko::batch::batch_item(x_ub, batch);
-        const auto y_b = gko::batch::batch_item(y_ub, batch);
+        const auto res_b = batch::batch_item(res_ub, batch);
+        const auto x_b = batch::batch_item(x_ub, batch);
+        const auto y_b = batch::batch_item(y_ub, batch);
         compute_conj_dot_product_kernel(x_b, y_b, res_b);
     }
 }
@@ -142,14 +142,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 template <typename ValueType>
 void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
-                   const BatchMultiVector<ValueType>* x,
-                   BatchMultiVector<remove_complex<ValueType>>* result)
+                   const batch::MultiVector<ValueType>* x,
+                   batch::MultiVector<remove_complex<ValueType>>* result)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto res_ub = host::get_batch_struct(result);
     for (size_type batch = 0; batch < result->get_num_batch_items(); ++batch) {
-        const auto res_b = gko::batch::batch_item(res_ub, batch);
-        const auto x_b = gko::batch::batch_item(x_ub, batch);
+        const auto res_b = batch::batch_item(res_ub, batch);
+        const auto x_b = batch::batch_item(x_ub, batch);
         compute_norm2_kernel(x_b, res_b);
     }
 }
@@ -160,14 +160,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 
 template <typename ValueType>
 void copy(std::shared_ptr<const DefaultExecutor> exec,
-          const BatchMultiVector<ValueType>* x,
-          BatchMultiVector<ValueType>* result)
+          const batch::MultiVector<ValueType>* x,
+          batch::MultiVector<ValueType>* result)
 {
     const auto x_ub = host::get_batch_struct(x);
     const auto result_ub = host::get_batch_struct(result);
     for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
-        const auto result_b = gko::batch::batch_item(result_ub, batch);
-        const auto x_b = gko::batch::batch_item(x_ub, batch);
+        const auto result_b = batch::batch_item(result_ub, batch);
+        const auto x_b = batch::batch_item(x_ub, batch);
         copy_kernel(x_b, result_b);
     }
 }
diff --git a/reference/base/batch_multi_vector_kernels.hpp.inc b/reference/base/batch_multi_vector_kernels.hpp.inc
index a6935866f56..a14b18ec9f7 100644
--- a/reference/base/batch_multi_vector_kernels.hpp.inc
+++ b/reference/base/batch_multi_vector_kernels.hpp.inc
@@ -32,8 +32,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 template <typename ValueType>
 inline void scale_kernel(
-    const gko::batch_multi_vector::batch_item<const ValueType>& alpha,
-    const gko::batch_multi_vector::batch_item<ValueType>& x)
+    const gko::batch::multi_vector::batch_item<const ValueType>& alpha,
+    const gko::batch::multi_vector::batch_item<ValueType>& x)
 {
     if (alpha.num_rhs == 1) {
         for (int i = 0; i < x.num_rows; ++i) {
@@ -53,9 +53,9 @@ inline void scale_kernel(
 
 template <typename ValueType>
 inline void add_scaled_kernel(
-    const gko::batch_multi_vector::batch_item<const ValueType>& alpha,
-    const gko::batch_multi_vector::batch_item<const ValueType>& x,
-    const gko::batch_multi_vector::batch_item<ValueType>& y)
+    const gko::batch::multi_vector::batch_item<const ValueType>& alpha,
+    const gko::batch::multi_vector::batch_item<const ValueType>& x,
+    const gko::batch::multi_vector::batch_item<ValueType>& y)
 {
     if (alpha.num_rhs == 1) {
         for (int i = 0; i < x.num_rows; ++i) {
@@ -77,9 +77,9 @@ inline void add_scaled_kernel(
 
 template <typename ValueType>
 inline void compute_dot_product_kernel(
-    const gko::batch_multi_vector::batch_item<const ValueType>& x,
-    const gko::batch_multi_vector::batch_item<const ValueType>& y,
-    const gko::batch_multi_vector::batch_item<ValueType>& result)
+    const gko::batch::multi_vector::batch_item<const ValueType>& x,
+    const gko::batch::multi_vector::batch_item<const ValueType>& y,
+    const gko::batch::multi_vector::batch_item<ValueType>& result)
 {
     for (int c = 0; c < result.num_rhs; c++) {
         result.values[c] = gko::zero<ValueType>();
@@ -96,9 +96,9 @@ inline void compute_dot_product_kernel(
 
 template <typename ValueType>
 inline void compute_conj_dot_product_kernel(
-    const gko::batch_multi_vector::batch_item<const ValueType>& x,
-    const gko::batch_multi_vector::batch_item<const ValueType>& y,
-    const gko::batch_multi_vector::batch_item<ValueType>& result)
+    const gko::batch::multi_vector::batch_item<const ValueType>& x,
+    const gko::batch::multi_vector::batch_item<const ValueType>& y,
+    const gko::batch::multi_vector::batch_item<ValueType>& result)
 {
     for (int c = 0; c < result.num_rhs; c++) {
         result.values[c] = gko::zero<ValueType>();
@@ -115,8 +115,8 @@ inline void compute_conj_dot_product_kernel(
 
 template <typename ValueType>
 inline void compute_norm2_kernel(
-    const gko::batch_multi_vector::batch_item<const ValueType>& x,
-    const gko::batch_multi_vector::batch_item<gko::remove_complex<ValueType>>&
+    const gko::batch::multi_vector::batch_item<const ValueType>& x,
+    const gko::batch::multi_vector::batch_item<gko::remove_complex<ValueType>>&
         result)
 {
     for (int j = 0; j < x.num_rhs; ++j) {
@@ -141,8 +141,8 @@ inline void compute_norm2_kernel(
  */
 template <typename ValueType>
 inline void copy_kernel(
-    const gko::batch_multi_vector::batch_item<const ValueType>& in,
-    const gko::batch_multi_vector::batch_item<ValueType>& out)
+    const gko::batch::multi_vector::batch_item<const ValueType>& in,
+    const gko::batch::multi_vector::batch_item<ValueType>& out)
 {
     for (int iz = 0; iz < in.num_rows * in.num_rhs; iz++) {
         const int i = iz / in.num_rhs;
diff --git a/reference/base/batch_struct.hpp b/reference/base/batch_struct.hpp
index 21ff280baba..ce7c7af5605 100644
--- a/reference/base/batch_struct.hpp
+++ b/reference/base/batch_struct.hpp
@@ -63,8 +63,8 @@ namespace host {
  * Generates an immutable uniform batch struct from a batch of multi-vectors.
  */
 template <typename ValueType>
-inline gko::batch_multi_vector::uniform_batch<const ValueType> get_batch_struct(
-    const BatchMultiVector<ValueType>* const op)
+inline batch::multi_vector::uniform_batch<const ValueType> get_batch_struct(
+    const batch::MultiVector<ValueType>* const op)
 {
     return {op->get_const_values(), op->get_num_batch_items(),
             static_cast<int>(op->get_common_size()[1]),
@@ -77,8 +77,8 @@ inline gko::batch_multi_vector::uniform_batch<const ValueType> get_batch_struct(
  * Generates a uniform batch struct from a batch of multi-vectors.
  */
 template <typename ValueType>
-inline gko::batch_multi_vector::uniform_batch<ValueType> get_batch_struct(
-    BatchMultiVector<ValueType>* const op)
+inline batch::multi_vector::uniform_batch<ValueType> get_batch_struct(
+    batch::MultiVector<ValueType>* const op)
 {
     return {op->get_values(), op->get_num_batch_items(),
             static_cast<int>(op->get_common_size()[1]),
diff --git a/reference/test/base/batch_multi_vector_kernels.cpp b/reference/test/base/batch_multi_vector_kernels.cpp
index 62567cc91ee..82429660b32 100644
--- a/reference/test/base/batch_multi_vector_kernels.cpp
+++ b/reference/test/base/batch_multi_vector_kernels.cpp
@@ -52,16 +52,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 template <typename T>
-class BatchMultiVector : public ::testing::Test {
+class MultiVector : public ::testing::Test {
 protected:
     using value_type = T;
     using size_type = gko::size_type;
-    using Mtx = gko::BatchMultiVector<value_type>;
+    using Mtx = gko::batch::MultiVector<value_type>;
     using DenseMtx = gko::matrix::Dense<value_type>;
     using ComplexMtx = gko::to_complex<Mtx>;
-    BatchMultiVector()
+    MultiVector()
         : exec(gko::ReferenceExecutor::create()),
-          mtx_0(gko::batch_initialize<Mtx>(
+          mtx_0(gko::batch::initialize<Mtx>(
               {{I<T>({1.0, -1.0, 1.5}), I<T>({-2.0, 2.0, 3.0})},
                {{1.0, -2.0, -0.5}, {1.0, -2.5, 4.0}}},
               exec)),
@@ -69,15 +69,15 @@ class BatchMultiVector : public ::testing::Test {
               {I<T>({1.0, -1.0, 1.5}), I<T>({-2.0, 2.0, 3.0})}, exec)),
           mtx_01(gko::initialize<DenseMtx>(
               {I<T>({1.0, -2.0, -0.5}), I<T>({1.0, -2.5, 4.0})}, exec)),
-          mtx_1(
-              gko::batch_initialize<Mtx>({{{1.0, -1.0, 2.2}, {-2.0, 2.0, -0.5}},
-                                          {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}},
-                                         exec)),
+          mtx_1(gko::batch::initialize<Mtx>(
+              {{{1.0, -1.0, 2.2}, {-2.0, 2.0, -0.5}},
+               {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}},
+              exec)),
           mtx_10(gko::initialize<DenseMtx>(
               {I<T>({1.0, -1.0, 2.2}), I<T>({-2.0, 2.0, -0.5})}, exec)),
           mtx_11(gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                            exec)),
-          mtx_2(gko::batch_initialize<Mtx>(
+          mtx_2(gko::batch::initialize<Mtx>(
               {{{1.0, 1.5}, {6.0, 1.0}, {-0.25, 1.0}},
                {I<T>({2.0, -2.0}), I<T>({1.0, 3.0}), I<T>({4.0, 3.0})}},
               exec)),
@@ -85,22 +85,22 @@ class BatchMultiVector : public ::testing::Test {
               {I<T>({1.0, 1.5}), I<T>({6.0, 1.0}), I<T>({-0.25, 1.0})}, exec)),
           mtx_21(gko::initialize<DenseMtx>(
               {I<T>({2.0, -2.0}), I<T>({1.0, 3.0}), I<T>({4.0, 3.0})}, exec)),
-          mtx_3(gko::batch_initialize<Mtx>(
+          mtx_3(gko::batch::initialize<Mtx>(
               {{I<T>({1.0, 1.5}), I<T>({6.0, 1.0})}, {{2.0, -2.0}, {1.0, 3.0}}},
               exec)),
           mtx_30(gko::initialize<DenseMtx>({I<T>({1.0, 1.5}), I<T>({6.0, 1.0})},
                                            exec)),
           mtx_31(gko::initialize<DenseMtx>(
               {I<T>({2.0, -2.0}), I<T>({1.0, 3.0})}, exec)),
-          mtx_4(gko::batch_initialize<Mtx>(
+          mtx_4(gko::batch::initialize<Mtx>(
               {{{1.0, 1.5, 3.0}, {6.0, 1.0, 5.0}, {6.0, 1.0, 5.5}},
                {{2.0, -2.0, 1.5}, {4.0, 3.0, 2.2}, {-1.25, 3.0, 0.5}}},
               exec)),
-          mtx_5(gko::batch_initialize<Mtx>(
+          mtx_5(gko::batch::initialize<Mtx>(
               {{{1.0, 1.5}, {6.0, 1.0}, {7.0, -4.5}},
                {I<T>({2.0, -2.0}), I<T>({1.0, 3.0}), I<T>({4.0, 3.0})}},
               exec)),
-          mtx_6(gko::batch_initialize<Mtx>(
+          mtx_6(gko::batch::initialize<Mtx>(
               {{{1.0, 0.0, 3.0}, {0.0, 3.0, 0.0}, {0.0, 1.0, 5.0}},
                {{2.0, 0.0, 5.0}, {0.0, 1.0, 0.0}, {0.0, -1.0, 8.0}}},
               exec))
@@ -126,14 +126,14 @@ class BatchMultiVector : public ::testing::Test {
     std::default_random_engine rand_engine;
 };
 
-TYPED_TEST_SUITE(BatchMultiVector, gko::test::ValueTypes);
+TYPED_TEST_SUITE(MultiVector, gko::test::ValueTypes);
 
 
-TYPED_TEST(BatchMultiVector, ScalesData)
+TYPED_TEST(MultiVector, ScalesData)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
-    auto alpha = gko::batch_initialize<Mtx>(
+    auto alpha = gko::batch::initialize<Mtx>(
         {{{2.0, -2.0, 1.5}}, {{3.0, -1.0, 0.25}}}, this->exec);
     auto ualpha = alpha->unbatch();
 
@@ -147,11 +147,11 @@ TYPED_TEST(BatchMultiVector, ScalesData)
 }
 
 
-TYPED_TEST(BatchMultiVector, ScalesDataWithScalar)
+TYPED_TEST(MultiVector, ScalesDataWithScalar)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
-    auto alpha = gko::batch_initialize<Mtx>({{2.0}, {-2.0}}, this->exec);
+    auto alpha = gko::batch::initialize<Mtx>({{2.0}, {-2.0}}, this->exec);
     auto ualpha = alpha->unbatch();
 
     this->mtx_1->scale(alpha.get());
@@ -164,11 +164,11 @@ TYPED_TEST(BatchMultiVector, ScalesDataWithScalar)
 }
 
 
-TYPED_TEST(BatchMultiVector, ScalesDataWithMultipleScalars)
+TYPED_TEST(MultiVector, ScalesDataWithMultipleScalars)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
-    auto alpha = gko::batch_initialize<Mtx>(
+    auto alpha = gko::batch::initialize<Mtx>(
         {{{2.0, -2.0, -1.5}}, {{2.0, -2.0, 3.0}}}, this->exec);
     auto ualpha = alpha->unbatch();
 
@@ -182,11 +182,11 @@ TYPED_TEST(BatchMultiVector, ScalesDataWithMultipleScalars)
 }
 
 
-TYPED_TEST(BatchMultiVector, AddsScaled)
+TYPED_TEST(MultiVector, AddsScaled)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
-    auto alpha = gko::batch_initialize<Mtx>(
+    auto alpha = gko::batch::initialize<Mtx>(
         {{{2.0, -2.0, 1.5}}, {{2.0, -2.0, 3.0}}}, this->exec);
     auto ualpha = alpha->unbatch();
 
@@ -200,11 +200,11 @@ TYPED_TEST(BatchMultiVector, AddsScaled)
 }
 
 
-TYPED_TEST(BatchMultiVector, AddsScaledWithScalar)
+TYPED_TEST(MultiVector, AddsScaledWithScalar)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
-    auto alpha = gko::batch_initialize<Mtx>({{2.0}, {-2.0}}, this->exec);
+    auto alpha = gko::batch::initialize<Mtx>({{2.0}, {-2.0}}, this->exec);
     auto ualpha = alpha->unbatch();
 
     this->mtx_1->add_scaled(alpha.get(), this->mtx_0.get());
@@ -217,10 +217,10 @@ TYPED_TEST(BatchMultiVector, AddsScaledWithScalar)
 }
 
 
-TYPED_TEST(BatchMultiVector, AddScaledFailsOnWrongSizes)
+TYPED_TEST(MultiVector, AddScaledFailsOnWrongSizes)
 {
     using Mtx = typename TestFixture::Mtx;
-    auto alpha = gko::batch_initialize<Mtx>(
+    auto alpha = gko::batch::initialize<Mtx>(
         {{2.0, 3.0, 4.0, 5.0}, {-2.0, 2.0, 4.0, 5.0}}, this->exec);
 
     ASSERT_THROW(this->mtx_1->add_scaled(alpha.get(), this->mtx_2.get()),
@@ -228,7 +228,7 @@ TYPED_TEST(BatchMultiVector, AddScaledFailsOnWrongSizes)
 }
 
 
-TYPED_TEST(BatchMultiVector, ComputesDot)
+TYPED_TEST(MultiVector, ComputesDot)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -246,7 +246,7 @@ TYPED_TEST(BatchMultiVector, ComputesDot)
 }
 
 
-TYPED_TEST(BatchMultiVector, ComputeDotFailsOnWrongInputSize)
+TYPED_TEST(MultiVector, ComputeDotFailsOnWrongInputSize)
 {
     using Mtx = typename TestFixture::Mtx;
     auto result =
@@ -257,7 +257,7 @@ TYPED_TEST(BatchMultiVector, ComputeDotFailsOnWrongInputSize)
 }
 
 
-TYPED_TEST(BatchMultiVector, ComputeDotFailsOnWrongResultSize)
+TYPED_TEST(MultiVector, ComputeDotFailsOnWrongResultSize)
 {
     using Mtx = typename TestFixture::Mtx;
 
@@ -269,7 +269,7 @@ TYPED_TEST(BatchMultiVector, ComputeDotFailsOnWrongResultSize)
 }
 
 
-TYPED_TEST(BatchMultiVector, ComputesConjDot)
+TYPED_TEST(MultiVector, ComputesConjDot)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
@@ -287,7 +287,7 @@ TYPED_TEST(BatchMultiVector, ComputesConjDot)
 }
 
 
-TYPED_TEST(BatchMultiVector, ComputeConjDotFailsOnWrongInputSize)
+TYPED_TEST(MultiVector, ComputeConjDotFailsOnWrongInputSize)
 {
     using Mtx = typename TestFixture::Mtx;
     auto result =
@@ -298,7 +298,7 @@ TYPED_TEST(BatchMultiVector, ComputeConjDotFailsOnWrongInputSize)
 }
 
 
-TYPED_TEST(BatchMultiVector, ComputeConjDotFailsOnWrongResultSize)
+TYPED_TEST(MultiVector, ComputeConjDotFailsOnWrongResultSize)
 {
     using Mtx = typename TestFixture::Mtx;
 
@@ -310,13 +310,13 @@ TYPED_TEST(BatchMultiVector, ComputeConjDotFailsOnWrongResultSize)
 }
 
 
-TYPED_TEST(BatchMultiVector, ComputesNorm2)
+TYPED_TEST(MultiVector, ComputesNorm2)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     using T_nc = gko::remove_complex<T>;
-    using NormVector = gko::BatchMultiVector<T_nc>;
-    auto mtx(gko::batch_initialize<Mtx>(
+    using NormVector = gko::batch::MultiVector<T_nc>;
+    auto mtx(gko::batch::initialize<Mtx>(
         {{I<T>{1.0, 0.0}, I<T>{2.0, 3.0}, I<T>{2.0, 4.0}},
          {I<T>{-4.0, 2.0}, I<T>{-3.0, -2.0}, I<T>{0.0, 1.0}}},
         this->exec));
@@ -332,7 +332,7 @@ TYPED_TEST(BatchMultiVector, ComputesNorm2)
 }
 
 
-TYPED_TEST(BatchMultiVector, CopiesData)
+TYPED_TEST(MultiVector, CopiesData)
 {
     gko::kernels::reference::batch_multi_vector::copy(
         this->exec, this->mtx_0.get(), this->mtx_1.get());
@@ -341,14 +341,14 @@ TYPED_TEST(BatchMultiVector, CopiesData)
 }
 
 
-TYPED_TEST(BatchMultiVector, ConvertsToPrecision)
+TYPED_TEST(MultiVector, ConvertsToPrecision)
 {
-    using BatchMultiVector = typename TestFixture::Mtx;
+    using MultiVector = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     using OtherT = typename gko::next_precision<T>;
-    using OtherBatchMultiVector = typename gko::BatchMultiVector<OtherT>;
-    auto tmp = OtherBatchMultiVector::create(this->exec);
-    auto res = BatchMultiVector::create(this->exec);
+    using OtherMultiVector = typename gko::batch::MultiVector<OtherT>;
+    auto tmp = OtherMultiVector::create(this->exec);
+    auto res = MultiVector::create(this->exec);
     // If OtherT is more precise: 0, otherwise r
     auto residual = r<OtherT>::value < r<T>::value
                         ? gko::remove_complex<T>{0}
@@ -364,14 +364,14 @@ TYPED_TEST(BatchMultiVector, ConvertsToPrecision)
 }
 
 
-TYPED_TEST(BatchMultiVector, MovesToPrecision)
+TYPED_TEST(MultiVector, MovesToPrecision)
 {
-    using BatchMultiVector = typename TestFixture::Mtx;
+    using MultiVector = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     using OtherT = typename gko::next_precision<T>;
-    using OtherBatchMultiVector = typename gko::BatchMultiVector<OtherT>;
-    auto tmp = OtherBatchMultiVector::create(this->exec);
-    auto res = BatchMultiVector::create(this->exec);
+    using OtherMultiVector = typename gko::batch::MultiVector<OtherT>;
+    auto tmp = OtherMultiVector::create(this->exec);
+    auto res = MultiVector::create(this->exec);
     // If OtherT is more precise: 0, otherwise r
     auto residual = r<OtherT>::value < r<T>::value
                         ? gko::remove_complex<T>{0}
@@ -387,14 +387,14 @@ TYPED_TEST(BatchMultiVector, MovesToPrecision)
 }
 
 
-TYPED_TEST(BatchMultiVector, ConvertsEmptyToPrecision)
+TYPED_TEST(MultiVector, ConvertsEmptyToPrecision)
 {
-    using BatchMultiVector = typename TestFixture::Mtx;
+    using MultiVector = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     using OtherT = typename gko::next_precision<T>;
-    using OtherBatchMultiVector = typename gko::BatchMultiVector<OtherT>;
-    auto empty = OtherBatchMultiVector::create(this->exec);
-    auto res = BatchMultiVector::create(this->exec);
+    using OtherMultiVector = typename gko::batch::MultiVector<OtherT>;
+    auto empty = OtherMultiVector::create(this->exec);
+    auto res = MultiVector::create(this->exec);
 
     empty->convert_to(res.get());
 
@@ -402,14 +402,14 @@ TYPED_TEST(BatchMultiVector, ConvertsEmptyToPrecision)
 }
 
 
-TYPED_TEST(BatchMultiVector, MovesEmptyToPrecision)
+TYPED_TEST(MultiVector, MovesEmptyToPrecision)
 {
-    using BatchMultiVector = typename TestFixture::Mtx;
+    using MultiVector = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     using OtherT = typename gko::next_precision<T>;
-    using OtherBatchMultiVector = typename gko::BatchMultiVector<OtherT>;
-    auto empty = OtherBatchMultiVector::create(this->exec);
-    auto res = BatchMultiVector::create(this->exec);
+    using OtherMultiVector = typename gko::batch::MultiVector<OtherT>;
+    auto empty = OtherMultiVector::create(this->exec);
+    auto res = MultiVector::create(this->exec);
 
     empty->move_to(res.get());
 
diff --git a/test/base/batch_multi_vector_kernels.cpp b/test/base/batch_multi_vector_kernels.cpp
index a55ff0792ad..abd7b02fd1a 100644
--- a/test/base/batch_multi_vector_kernels.cpp
+++ b/test/base/batch_multi_vector_kernels.cpp
@@ -50,13 +50,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "test/utils/executor.hpp"
 
 
-class BatchMultiVector : public CommonTestFixture {
+class MultiVector : public CommonTestFixture {
 protected:
-    using Mtx = gko::BatchMultiVector<value_type>;
-    using NormVector = gko::BatchMultiVector<gko::remove_complex<value_type>>;
-    using ComplexMtx = gko::BatchMultiVector<std::complex<value_type>>;
+    using Mtx = gko::batch::MultiVector<value_type>;
+    using NormVector = gko::batch::MultiVector<gko::remove_complex<value_type>>;
+    using ComplexMtx = gko::batch::MultiVector<std::complex<value_type>>;
 
-    BatchMultiVector() : rand_engine(15) {}
+    MultiVector() : rand_engine(15) {}
 
     template <typename MtxType>
     std::unique_ptr<MtxType> gen_mtx(const size_t num_batch_items,
@@ -80,8 +80,8 @@ class BatchMultiVector : public CommonTestFixture {
             alpha = gen_mtx<Mtx>(batch_size, 1, num_vecs);
             beta = gen_mtx<Mtx>(batch_size, 1, num_vecs);
         } else {
-            alpha = gko::batch_initialize<Mtx>(batch_size, {2.0}, ref);
-            beta = gko::batch_initialize<Mtx>(batch_size, {-0.5}, ref);
+            alpha = gko::batch::initialize<Mtx>(batch_size, {2.0}, ref);
+            beta = gko::batch::initialize<Mtx>(batch_size, {-0.5}, ref);
         }
         dx = gko::clone(exec, x);
         dy = gko::clone(exec, y);
@@ -117,7 +117,7 @@ class BatchMultiVector : public CommonTestFixture {
 };
 
 
-TEST_F(BatchMultiVector, SingleVectorAddScaledIsEquivalentToRef)
+TEST_F(MultiVector, SingleVectorAddScaledIsEquivalentToRef)
 {
     set_up_vector_data(1);
 
@@ -128,7 +128,7 @@ TEST_F(BatchMultiVector, SingleVectorAddScaledIsEquivalentToRef)
 }
 
 
-TEST_F(BatchMultiVector, MultipleVectorAddScaledIsEquivalentToRef)
+TEST_F(MultiVector, MultipleVectorAddScaledIsEquivalentToRef)
 {
     set_up_vector_data(20);
 
@@ -139,7 +139,7 @@ TEST_F(BatchMultiVector, MultipleVectorAddScaledIsEquivalentToRef)
 }
 
 
-TEST_F(BatchMultiVector,
+TEST_F(MultiVector,
        MultipleVectorAddScaledWithDifferentAlphaIsEquivalentToRef)
 {
     set_up_vector_data(20, true);
@@ -151,7 +151,7 @@ TEST_F(BatchMultiVector,
 }
 
 
-TEST_F(BatchMultiVector, SingleVectorScaleIsEquivalentToRef)
+TEST_F(MultiVector, SingleVectorScaleIsEquivalentToRef)
 {
     set_up_vector_data(1);
 
@@ -162,7 +162,7 @@ TEST_F(BatchMultiVector, SingleVectorScaleIsEquivalentToRef)
 }
 
 
-TEST_F(BatchMultiVector, MultipleVectorScaleIsEquivalentToRef)
+TEST_F(MultiVector, MultipleVectorScaleIsEquivalentToRef)
 {
     set_up_vector_data(20);
 
@@ -173,7 +173,7 @@ TEST_F(BatchMultiVector, MultipleVectorScaleIsEquivalentToRef)
 }
 
 
-TEST_F(BatchMultiVector, MultipleVectorScaleWithDifferentAlphaIsEquivalentToRef)
+TEST_F(MultiVector, MultipleVectorScaleWithDifferentAlphaIsEquivalentToRef)
 {
     set_up_vector_data(20, true);
 
@@ -184,7 +184,7 @@ TEST_F(BatchMultiVector, MultipleVectorScaleWithDifferentAlphaIsEquivalentToRef)
 }
 
 
-TEST_F(BatchMultiVector, ComputeNorm2SingleIsEquivalentToRef)
+TEST_F(MultiVector, ComputeNorm2SingleIsEquivalentToRef)
 {
     set_up_vector_data(1);
     auto norm_size =
@@ -199,7 +199,7 @@ TEST_F(BatchMultiVector, ComputeNorm2SingleIsEquivalentToRef)
 }
 
 
-TEST_F(BatchMultiVector, ComputeNorm2IsEquivalentToRef)
+TEST_F(MultiVector, ComputeNorm2IsEquivalentToRef)
 {
     set_up_vector_data(20);
     auto norm_size =
@@ -214,7 +214,7 @@ TEST_F(BatchMultiVector, ComputeNorm2IsEquivalentToRef)
 }
 
 
-TEST_F(BatchMultiVector, ComputeDotIsEquivalentToRef)
+TEST_F(MultiVector, ComputeDotIsEquivalentToRef)
 {
     set_up_vector_data(20);
     auto dot_size =
@@ -234,7 +234,7 @@ TEST_F(BatchMultiVector, ComputeDotIsEquivalentToRef)
 }
 
 
-TEST_F(BatchMultiVector, ComputeDotSingleIsEquivalentToRef)
+TEST_F(MultiVector, ComputeDotSingleIsEquivalentToRef)
 {
     set_up_vector_data(1);
     auto dot_size =
@@ -249,7 +249,7 @@ TEST_F(BatchMultiVector, ComputeDotSingleIsEquivalentToRef)
 }
 
 
-TEST_F(BatchMultiVector, ComputeConjDotIsEquivalentToRef)
+TEST_F(MultiVector, ComputeConjDotIsEquivalentToRef)
 {
     set_up_vector_data(20);
     auto dot_size =
@@ -269,7 +269,7 @@ TEST_F(BatchMultiVector, ComputeConjDotIsEquivalentToRef)
 }
 
 
-TEST_F(BatchMultiVector, ComputeConjDotSingleIsEquivalentToRef)
+TEST_F(MultiVector, ComputeConjDotSingleIsEquivalentToRef)
 {
     set_up_vector_data(1);
     auto dot_size =
@@ -284,7 +284,7 @@ TEST_F(BatchMultiVector, ComputeConjDotSingleIsEquivalentToRef)
 }
 
 
-TEST_F(BatchMultiVector, CopySingleIsEquivalentToRef)
+TEST_F(MultiVector, CopySingleIsEquivalentToRef)
 {
     set_up_vector_data(1);
 
@@ -297,7 +297,7 @@ TEST_F(BatchMultiVector, CopySingleIsEquivalentToRef)
 }
 
 
-TEST_F(BatchMultiVector, CopyIsEquivalentToRef)
+TEST_F(MultiVector, CopyIsEquivalentToRef)
 {
     set_up_vector_data(20);
 
diff --git a/test/test_install/test_install.cpp b/test/test_install/test_install.cpp
index ed62e3ca3d3..d2c273b4e0f 100644
--- a/test/test_install/test_install.cpp
+++ b/test/test_install/test_install.cpp
@@ -219,7 +219,7 @@ int main()
     // core/base/batch_multi_vector.hpp
     {
         using type1 = float;
-        using batch_multi_vector_type = gko::BatchMultiVector<type1>;
+        using batch_multi_vector_type = gko::batch::MultiVector<type1>;
         auto test = batch_multi_vector_type::create(exec);
     }
 

From 3cc3925acbc4eebc5296f754d986c165896edda2 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 1 Aug 2023 14:57:30 +0200
Subject: [PATCH 145/583] Rename to extract_batch_item

---
 .../base/batch_multi_vector_kernels.hpp.inc   | 24 +++++-----
 core/base/batch_struct.hpp                    | 13 +++---
 dpcpp/base/batch_multi_vector_kernels.dp.cpp  | 45 ++++++++++---------
 omp/base/batch_multi_vector_kernels.cpp       | 30 ++++++-------
 reference/base/batch_multi_vector_kernels.cpp | 30 ++++++-------
 5 files changed, 74 insertions(+), 68 deletions(-)

diff --git a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
index df64e5cfe85..19c3c330f45 100644
--- a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
+++ b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
@@ -59,8 +59,8 @@ __global__ __launch_bounds__(
 {
     for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_items;
          batch_id += gridDim.x) {
-        const auto alpha_b = gko::batch::batch_item(alpha, batch_id);
-        const auto x_b = gko::batch::batch_item(x, batch_id);
+        const auto alpha_b = gko::batch::extract_batch_item(alpha, batch_id);
+        const auto x_b = gko::batch::extract_batch_item(x, batch_id);
         scale(alpha_b, x_b, map);
     }
 }
@@ -100,9 +100,9 @@ __global__ __launch_bounds__(
 {
     for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_items;
          batch_id += gridDim.x) {
-        const auto alpha_b = gko::batch::batch_item(alpha, batch_id);
-        const auto x_b = gko::batch::batch_item(x, batch_id);
-        const auto y_b = gko::batch::batch_item(y, batch_id);
+        const auto alpha_b = gko::batch::extract_batch_item(alpha, batch_id);
+        const auto x_b = gko::batch::extract_batch_item(x, batch_id);
+        const auto y_b = gko::batch::extract_batch_item(y, batch_id);
         add_scaled(alpha_b, x_b, y_b, map);
     }
 }
@@ -162,9 +162,9 @@ __global__
 {
     for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_items;
          batch_id += gridDim.x) {
-        const auto x_b = gko::batch::batch_item(x, batch_id);
-        const auto y_b = gko::batch::batch_item(y, batch_id);
-        const auto r_b = gko::batch::batch_item(result, batch_id);
+        const auto x_b = gko::batch::extract_batch_item(x, batch_id);
+        const auto y_b = gko::batch::extract_batch_item(y, batch_id);
+        const auto r_b = gko::batch::extract_batch_item(result, batch_id);
         compute_gen_dot_product(x_b, y_b, r_b, map);
     }
 }
@@ -236,8 +236,8 @@ __global__ __launch_bounds__(
 {
     for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_items;
          batch_id += gridDim.x) {
-        const auto x_b = gko::batch::batch_item(x, batch_id);
-        const auto r_b = gko::batch::batch_item(result, batch_id);
+        const auto x_b = gko::batch::extract_batch_item(x, batch_id);
+        const auto r_b = gko::batch::extract_batch_item(result, batch_id);
         compute_norm2(x_b, r_b);
     }
 }
@@ -271,8 +271,8 @@ __global__
 {
     for (size_type batch_id = blockIdx.x; batch_id < src.num_batch_items;
          batch_id += gridDim.x) {
-        const auto dst_b = gko::batch::batch_item(dst, batch_id);
-        const auto src_b = gko::batch::batch_item(src, batch_id);
+        const auto dst_b = gko::batch::extract_batch_item(dst, batch_id);
+        const auto src_b = gko::batch::extract_batch_item(src, batch_id);
         copy(src_b, dst_b);
     }
 }
diff --git a/core/base/batch_struct.hpp b/core/base/batch_struct.hpp
index d22b64f3320..caca4577cf7 100644
--- a/core/base/batch_struct.hpp
+++ b/core/base/batch_struct.hpp
@@ -107,18 +107,19 @@ GKO_ATTRIBUTES GKO_INLINE multi_vector::uniform_batch<const ValueType> to_const(
  * @param batch_idx  The position of the desired object in the batch
  */
 template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE multi_vector::batch_item<ValueType> batch_item(
-    const multi_vector::uniform_batch<ValueType>& batch,
-    const size_type batch_idx)
+GKO_ATTRIBUTES GKO_INLINE multi_vector::batch_item<ValueType>
+extract_batch_item(const multi_vector::uniform_batch<ValueType>& batch,
+                   const size_type batch_idx)
 {
     return {batch.values + batch_idx * batch.stride * batch.num_rows,
             batch.stride, batch.num_rows, batch.num_rhs};
 }
 
 template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE multi_vector::batch_item<ValueType> batch_item(
-    ValueType* const batch_values, const int stride, const int num_rows,
-    const int num_rhs, const size_type batch_idx)
+GKO_ATTRIBUTES GKO_INLINE multi_vector::batch_item<ValueType>
+extract_batch_item(ValueType* const batch_values, const int stride,
+                   const int num_rows, const int num_rhs,
+                   const size_type batch_idx)
 {
     return {batch_values + batch_idx * stride * num_rows, stride, num_rows,
             num_rhs};
diff --git a/dpcpp/base/batch_multi_vector_kernels.dp.cpp b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
index f307b6ba240..5c52ab5a50f 100644
--- a/dpcpp/base/batch_multi_vector_kernels.dp.cpp
+++ b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
@@ -91,8 +91,9 @@ void scale(std::shared_ptr<const DefaultExecutor> exec,
                 sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
                     auto group = item_ct1.get_group();
                     auto group_id = group.get_group_linear_id();
-                    const auto alpha_b = batch::batch_item(alpha_ub, group_id);
-                    const auto x_b = batch::batch_item(x_ub, group_id);
+                    const auto alpha_b =
+                        batch::extract_batch_item(alpha_ub, group_id);
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
                     scale_kernel(alpha_b, x_b, item_ct1,
                                  [](int col) { return 0; });
                 });
@@ -103,8 +104,9 @@ void scale(std::shared_ptr<const DefaultExecutor> exec,
                 sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
                     auto group = item_ct1.get_group();
                     auto group_id = group.get_group_linear_id();
-                    const auto alpha_b = batch::batch_item(alpha_ub, group_id);
-                    const auto x_b = batch::batch_item(x_ub, group_id);
+                    const auto alpha_b =
+                        batch::extract_batch_item(alpha_ub, group_id);
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
                     scale_kernel(alpha_b, x_b, item_ct1,
                                  [](int col) { return col; });
                 });
@@ -141,9 +143,10 @@ void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
                 sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
                     auto group = item_ct1.get_group();
                     auto group_id = group.get_group_linear_id();
-                    const auto alpha_b = batch::batch_item(alpha_ub, group_id);
-                    const auto x_b = batch::batch_item(x_ub, group_id);
-                    const auto y_b = batch::batch_item(y_ub, group_id);
+                    const auto alpha_b =
+                        batch::extract_batch_item(alpha_ub, group_id);
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    const auto y_b = batch::extract_batch_item(y_ub, group_id);
                     add_scaled_kernel(alpha_b, x_b, y_b, item_ct1,
                                       [](auto col) { return 0; });
                 });
@@ -154,9 +157,10 @@ void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
                 sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
                     auto group = item_ct1.get_group();
                     auto group_id = group.get_group_linear_id();
-                    const auto alpha_b = batch::batch_item(alpha_ub, group_id);
-                    const auto x_b = batch::batch_item(x_ub, group_id);
-                    const auto y_b = batch::batch_item(y_ub, group_id);
+                    const auto alpha_b =
+                        batch::extract_batch_item(alpha_ub, group_id);
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    const auto y_b = batch::extract_batch_item(y_ub, group_id);
                     add_scaled_kernel(alpha_b, x_b, y_b, item_ct1,
                                       [](auto col) { return col; });
                 });
@@ -194,9 +198,9 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
                                             config::warp_size)]] {
                 auto group = item_ct1.get_group();
                 auto group_id = group.get_group_linear_id();
-                const auto x_b = batch::batch_item(x_ub, group_id);
-                const auto y_b = batch::batch_item(y_ub, group_id);
-                const auto res_b = batch::batch_item(res_ub, group_id);
+                const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                const auto y_b = batch::extract_batch_item(y_ub, group_id);
+                const auto res_b = batch::extract_batch_item(res_ub, group_id);
                 compute_gen_dot_product_kernel(x_b, y_b, res_b, item_ct1,
                                                [](auto val) { return val; });
             });
@@ -232,9 +236,9 @@ void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
                                             config::warp_size)]] {
                 auto group = item_ct1.get_group();
                 auto group_id = group.get_group_linear_id();
-                const auto x_b = batch::batch_item(x_ub, group_id);
-                const auto y_b = batch::batch_item(y_ub, group_id);
-                const auto res_b = batch::batch_item(res_ub, group_id);
+                const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                const auto y_b = batch::extract_batch_item(y_ub, group_id);
+                const auto res_b = batch::extract_batch_item(res_ub, group_id);
                 compute_gen_dot_product_kernel(
                     x_b, y_b, res_b, item_ct1,
                     [](auto val) { return conj(val); });
@@ -269,8 +273,8 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
                                             config::warp_size)]] {
                 auto group = item_ct1.get_group();
                 auto group_id = group.get_group_linear_id();
-                const auto x_b = batch::batch_item(x_ub, group_id);
-                const auto res_b = batch::batch_item(res_ub, group_id);
+                const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                const auto res_b = batch::extract_batch_item(res_ub, group_id);
                 compute_norm2_kernel(x_b, res_b, item_ct1);
             });
     });
@@ -301,8 +305,9 @@ void copy(std::shared_ptr<const DefaultExecutor> exec,
             sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
                 auto group = item_ct1.get_group();
                 auto group_id = group.get_group_linear_id();
-                const auto x_b = batch::batch_item(x_ub, group_id);
-                const auto result_b = batch::batch_item(result_ub, group_id);
+                const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                const auto result_b =
+                    batch::extract_batch_item(result_ub, group_id);
                 copy_kernel(x_b, result_b, item_ct1);
             });
     });
diff --git a/omp/base/batch_multi_vector_kernels.cpp b/omp/base/batch_multi_vector_kernels.cpp
index deef105db0d..6067e762c98 100644
--- a/omp/base/batch_multi_vector_kernels.cpp
+++ b/omp/base/batch_multi_vector_kernels.cpp
@@ -68,8 +68,8 @@ void scale(std::shared_ptr<const DefaultExecutor> exec,
     const auto alpha_ub = host::get_batch_struct(alpha);
 #pragma omp parallel for
     for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
-        const auto alpha_b = gko::batch::batch_item(alpha_ub, batch);
-        const auto x_b = gko::batch::batch_item(x_ub, batch);
+        const auto alpha_b = gko::batch::extract_batch_item(alpha_ub, batch);
+        const auto x_b = gko::batch::extract_batch_item(x_ub, batch);
         scale_kernel(alpha_b, x_b);
     }
 }
@@ -89,9 +89,9 @@ void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
     const auto alpha_ub = host::get_batch_struct(alpha);
 #pragma omp parallel for
     for (size_type batch = 0; batch < y->get_num_batch_items(); ++batch) {
-        const auto alpha_b = gko::batch::batch_item(alpha_ub, batch);
-        const auto x_b = gko::batch::batch_item(x_ub, batch);
-        const auto y_b = gko::batch::batch_item(y_ub, batch);
+        const auto alpha_b = gko::batch::extract_batch_item(alpha_ub, batch);
+        const auto x_b = gko::batch::extract_batch_item(x_ub, batch);
+        const auto y_b = gko::batch::extract_batch_item(y_ub, batch);
         add_scaled_kernel(alpha_b, x_b, y_b);
     }
 }
@@ -111,9 +111,9 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
     const auto res_ub = host::get_batch_struct(result);
 #pragma omp parallel for
     for (size_type batch = 0; batch < result->get_num_batch_items(); ++batch) {
-        const auto res_b = gko::batch::batch_item(res_ub, batch);
-        const auto x_b = gko::batch::batch_item(x_ub, batch);
-        const auto y_b = gko::batch::batch_item(y_ub, batch);
+        const auto res_b = gko::batch::extract_batch_item(res_ub, batch);
+        const auto x_b = gko::batch::extract_batch_item(x_ub, batch);
+        const auto y_b = gko::batch::extract_batch_item(y_ub, batch);
         compute_dot_product_kernel(x_b, y_b, res_b);
     }
 }
@@ -133,9 +133,9 @@ void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
     const auto res_ub = host::get_batch_struct(result);
 #pragma omp parallel for
     for (size_type batch = 0; batch < result->get_num_batch_items(); ++batch) {
-        const auto res_b = gko::batch::batch_item(res_ub, batch);
-        const auto x_b = gko::batch::batch_item(x_ub, batch);
-        const auto y_b = gko::batch::batch_item(y_ub, batch);
+        const auto res_b = gko::batch::extract_batch_item(res_ub, batch);
+        const auto x_b = gko::batch::extract_batch_item(x_ub, batch);
+        const auto y_b = gko::batch::extract_batch_item(y_ub, batch);
         compute_conj_dot_product_kernel(x_b, y_b, res_b);
     }
 }
@@ -153,8 +153,8 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
     const auto res_ub = host::get_batch_struct(result);
 #pragma omp parallel for
     for (size_type batch = 0; batch < result->get_num_batch_items(); ++batch) {
-        const auto res_b = gko::batch::batch_item(res_ub, batch);
-        const auto x_b = gko::batch::batch_item(x_ub, batch);
+        const auto res_b = gko::batch::extract_batch_item(res_ub, batch);
+        const auto x_b = gko::batch::extract_batch_item(x_ub, batch);
         compute_norm2_kernel(x_b, res_b);
     }
 }
@@ -172,8 +172,8 @@ void copy(std::shared_ptr<const DefaultExecutor> exec,
     const auto result_ub = host::get_batch_struct(result);
 #pragma omp parallel for
     for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
-        const auto result_b = gko::batch::batch_item(result_ub, batch);
-        const auto x_b = gko::batch::batch_item(x_ub, batch);
+        const auto result_b = gko::batch::extract_batch_item(result_ub, batch);
+        const auto x_b = gko::batch::extract_batch_item(x_ub, batch);
         copy_kernel(x_b, result_b);
     }
 }
diff --git a/reference/base/batch_multi_vector_kernels.cpp b/reference/base/batch_multi_vector_kernels.cpp
index 076fd87778d..89476e61453 100644
--- a/reference/base/batch_multi_vector_kernels.cpp
+++ b/reference/base/batch_multi_vector_kernels.cpp
@@ -67,8 +67,8 @@ void scale(std::shared_ptr<const DefaultExecutor> exec,
     const auto x_ub = host::get_batch_struct(x);
     const auto alpha_ub = host::get_batch_struct(alpha);
     for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
-        const auto alpha_b = batch::batch_item(alpha_ub, batch);
-        const auto x_b = batch::batch_item(x_ub, batch);
+        const auto alpha_b = batch::extract_batch_item(alpha_ub, batch);
+        const auto x_b = batch::extract_batch_item(x_ub, batch);
         scale_kernel(alpha_b, x_b);
     }
 }
@@ -87,9 +87,9 @@ void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
     const auto y_ub = host::get_batch_struct(y);
     const auto alpha_ub = host::get_batch_struct(alpha);
     for (size_type batch = 0; batch < y->get_num_batch_items(); ++batch) {
-        const auto alpha_b = batch::batch_item(alpha_ub, batch);
-        const auto x_b = batch::batch_item(x_ub, batch);
-        const auto y_b = batch::batch_item(y_ub, batch);
+        const auto alpha_b = batch::extract_batch_item(alpha_ub, batch);
+        const auto x_b = batch::extract_batch_item(x_ub, batch);
+        const auto y_b = batch::extract_batch_item(y_ub, batch);
         add_scaled_kernel(alpha_b, x_b, y_b);
     }
 }
@@ -108,9 +108,9 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
     const auto y_ub = host::get_batch_struct(y);
     const auto res_ub = host::get_batch_struct(result);
     for (size_type batch = 0; batch < result->get_num_batch_items(); ++batch) {
-        const auto res_b = batch::batch_item(res_ub, batch);
-        const auto x_b = batch::batch_item(x_ub, batch);
-        const auto y_b = batch::batch_item(y_ub, batch);
+        const auto res_b = batch::extract_batch_item(res_ub, batch);
+        const auto x_b = batch::extract_batch_item(x_ub, batch);
+        const auto y_b = batch::extract_batch_item(y_ub, batch);
         compute_dot_product_kernel(x_b, y_b, res_b);
     }
 }
@@ -129,9 +129,9 @@ void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
     const auto y_ub = host::get_batch_struct(y);
     const auto res_ub = host::get_batch_struct(result);
     for (size_type batch = 0; batch < result->get_num_batch_items(); ++batch) {
-        const auto res_b = batch::batch_item(res_ub, batch);
-        const auto x_b = batch::batch_item(x_ub, batch);
-        const auto y_b = batch::batch_item(y_ub, batch);
+        const auto res_b = batch::extract_batch_item(res_ub, batch);
+        const auto x_b = batch::extract_batch_item(x_ub, batch);
+        const auto y_b = batch::extract_batch_item(y_ub, batch);
         compute_conj_dot_product_kernel(x_b, y_b, res_b);
     }
 }
@@ -148,8 +148,8 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
     const auto x_ub = host::get_batch_struct(x);
     const auto res_ub = host::get_batch_struct(result);
     for (size_type batch = 0; batch < result->get_num_batch_items(); ++batch) {
-        const auto res_b = batch::batch_item(res_ub, batch);
-        const auto x_b = batch::batch_item(x_ub, batch);
+        const auto res_b = batch::extract_batch_item(res_ub, batch);
+        const auto x_b = batch::extract_batch_item(x_ub, batch);
         compute_norm2_kernel(x_b, res_b);
     }
 }
@@ -166,8 +166,8 @@ void copy(std::shared_ptr<const DefaultExecutor> exec,
     const auto x_ub = host::get_batch_struct(x);
     const auto result_ub = host::get_batch_struct(result);
     for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
-        const auto result_b = batch::batch_item(result_ub, batch);
-        const auto x_b = batch::batch_item(x_ub, batch);
+        const auto result_b = batch::extract_batch_item(result_ub, batch);
+        const auto x_b = batch::extract_batch_item(x_ub, batch);
         copy_kernel(x_b, result_b);
     }
 }

From f41b0df4797363f9b7e4727b112b6c89920a7c2e Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Tue, 1 Aug 2023 13:00:43 +0000
Subject: [PATCH 146/583] Format files

Co-authored-by: Pratik Nayak <pratikvn@pm.me>
---
 .../base/batch_multi_vector_kernels.hpp.inc   | 29 +++++------
 dpcpp/base/batch_multi_vector_kernels.dp.cpp  | 52 ++++++++++---------
 test/base/batch_multi_vector_kernels.cpp      |  7 ++-
 3 files changed, 42 insertions(+), 46 deletions(-)

diff --git a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
index 19c3c330f45..9f77598ff5a 100644
--- a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
+++ b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
@@ -47,15 +47,10 @@ __device__ __forceinline__ void scale(
 }
 
 template <typename ValueType, typename Mapping>
-__global__ __launch_bounds__(
-    default_block_size,
-    sm_oversubscription) void scale_kernel(const gko::batch::multi_vector::
-                                               uniform_batch<const ValueType>
-                                                   alpha,
-                                           const gko::batch::multi_vector::
-                                               uniform_batch<ValueType>
-                                                   x,
-                                           Mapping map)
+__global__
+__launch_bounds__(default_block_size, sm_oversubscription) void scale_kernel(
+    const gko::batch::multi_vector::uniform_batch<const ValueType> alpha,
+    const gko::batch::multi_vector::uniform_batch<ValueType> x, Mapping map)
 {
     for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_items;
          batch_id += gridDim.x) {
@@ -154,11 +149,11 @@ __device__ __forceinline__ void compute_gen_dot_product(
 
 template <typename ValueType, typename Mapping>
 __global__
-    __launch_bounds__(default_block_size, sm_oversubscription) void compute_gen_dot_product_kernel(
-        const gko::batch::multi_vector::uniform_batch<const ValueType> x,
-        const gko::batch::multi_vector::uniform_batch<const ValueType> y,
-        const gko::batch::multi_vector::uniform_batch<ValueType> result,
-        Mapping map)
+__launch_bounds__(default_block_size, sm_oversubscription) void compute_gen_dot_product_kernel(
+    const gko::batch::multi_vector::uniform_batch<const ValueType> x,
+    const gko::batch::multi_vector::uniform_batch<const ValueType> y,
+    const gko::batch::multi_vector::uniform_batch<ValueType> result,
+    Mapping map)
 {
     for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_items;
          batch_id += gridDim.x) {
@@ -265,9 +260,9 @@ __device__ __forceinline__ void copy(
 
 template <typename ValueType>
 __global__
-    __launch_bounds__(default_block_size, sm_oversubscription) void copy_kernel(
-        const gko::batch::multi_vector::uniform_batch<const ValueType> src,
-        const gko::batch::multi_vector::uniform_batch<ValueType> dst)
+__launch_bounds__(default_block_size, sm_oversubscription) void copy_kernel(
+    const gko::batch::multi_vector::uniform_batch<const ValueType> src,
+    const gko::batch::multi_vector::uniform_batch<ValueType> dst)
 {
     for (size_type batch_id = blockIdx.x; batch_id < src.num_batch_items;
          batch_id += gridDim.x) {
diff --git a/dpcpp/base/batch_multi_vector_kernels.dp.cpp b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
index 5c52ab5a50f..10e47ba080e 100644
--- a/dpcpp/base/batch_multi_vector_kernels.dp.cpp
+++ b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
@@ -193,9 +193,9 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
     // TODO: Remove reqd_sub_group size and use sycl::reduce_over_group
     exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=
-        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                            config::warp_size)]] {
+            sycl_nd_range(grid, block),
+            [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                config::warp_size)]] {
                 auto group = item_ct1.get_group();
                 auto group_id = group.get_group_linear_id();
                 const auto x_b = batch::extract_batch_item(x_ub, group_id);
@@ -231,18 +231,19 @@ void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
 
     exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=
-        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                            config::warp_size)]] {
-                auto group = item_ct1.get_group();
-                auto group_id = group.get_group_linear_id();
-                const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                const auto y_b = batch::extract_batch_item(y_ub, group_id);
-                const auto res_b = batch::extract_batch_item(res_ub, group_id);
-                compute_gen_dot_product_kernel(
-                    x_b, y_b, res_b, item_ct1,
-                    [](auto val) { return conj(val); });
-            });
+            sycl_nd_range(grid, block),
+            [=](sycl::nd_item<3> item_ct1)
+                [[sycl::reqd_sub_group_size(config::warp_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    const auto y_b = batch::extract_batch_item(y_ub, group_id);
+                    const auto res_b =
+                        batch::extract_batch_item(res_ub, group_id);
+                    compute_gen_dot_product_kernel(
+                        x_b, y_b, res_b, item_ct1,
+                        [](auto val) { return conj(val); });
+                });
     });
 }
 
@@ -267,16 +268,17 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
     const dim3 grid(num_batches);
 
     exec->get_queue()->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(
-            sycl_nd_range(grid, block), [=
-        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                            config::warp_size)]] {
-                auto group = item_ct1.get_group();
-                auto group_id = group.get_group_linear_id();
-                const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                const auto res_b = batch::extract_batch_item(res_ub, group_id);
-                compute_norm2_kernel(x_b, res_b, item_ct1);
-            });
+        cgh.parallel_for(sycl_nd_range(grid, block),
+                         [=](sycl::nd_item<3> item_ct1)
+                             [[sycl::reqd_sub_group_size(config::warp_size)]] {
+                                 auto group = item_ct1.get_group();
+                                 auto group_id = group.get_group_linear_id();
+                                 const auto x_b =
+                                     batch::extract_batch_item(x_ub, group_id);
+                                 const auto res_b = batch::extract_batch_item(
+                                     res_ub, group_id);
+                                 compute_norm2_kernel(x_b, res_b, item_ct1);
+                             });
     });
 }
 
diff --git a/test/base/batch_multi_vector_kernels.cpp b/test/base/batch_multi_vector_kernels.cpp
index abd7b02fd1a..2d0c79d0664 100644
--- a/test/base/batch_multi_vector_kernels.cpp
+++ b/test/base/batch_multi_vector_kernels.cpp
@@ -59,8 +59,8 @@ class MultiVector : public CommonTestFixture {
     MultiVector() : rand_engine(15) {}
 
     template <typename MtxType>
-    std::unique_ptr<MtxType> gen_mtx(const size_t num_batch_items,
-                                     int num_rows, int num_cols)
+    std::unique_ptr<MtxType> gen_mtx(const size_t num_batch_items, int num_rows,
+                                     int num_cols)
     {
         return gko::test::generate_uniform_batch_random_matrix<MtxType>(
             num_batch_items, num_rows, num_cols,
@@ -139,8 +139,7 @@ TEST_F(MultiVector, MultipleVectorAddScaledIsEquivalentToRef)
 }
 
 
-TEST_F(MultiVector,
-       MultipleVectorAddScaledWithDifferentAlphaIsEquivalentToRef)
+TEST_F(MultiVector, MultipleVectorAddScaledWithDifferentAlphaIsEquivalentToRef)
 {
     set_up_vector_data(20, true);
 

From 9f150ba084c068d459f4c27e84718ab158e79852 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Wed, 2 Aug 2023 00:41:59 +0200
Subject: [PATCH 147/583] Add Dense matrix view creation

---
 core/base/batch_multi_vector.cpp              | 73 +++++++++++--------
 core/test/base/batch_multi_vector.cpp         | 12 ++-
 .../ginkgo/core/base/batch_multi_vector.hpp   | 22 +++++-
 3 files changed, 74 insertions(+), 33 deletions(-)

diff --git a/core/base/batch_multi_vector.cpp b/core/base/batch_multi_vector.cpp
index f17f1479f5f..88a203300de 100644
--- a/core/base/batch_multi_vector.cpp
+++ b/core/base/batch_multi_vector.cpp
@@ -85,6 +85,38 @@ batch_dim<2> compute_batch_size(
 }  // namespace detail
 
 
+template <typename ValueType>
+std::unique_ptr<matrix::Dense<ValueType>>
+MultiVector<ValueType>::create_view_for_item(size_type item_id)
+{
+    auto exec = this->get_executor();
+    auto num_rows = this->get_common_size()[0];
+    auto stride = this->get_common_size()[1];
+    auto mat = unbatch_type::create(
+        exec, this->get_common_size(),
+        make_array_view(exec, num_rows * stride,
+                        this->get_values_for_item(item_id)),
+        stride);
+    return mat;
+}
+
+
+template <typename ValueType>
+std::unique_ptr<const matrix::Dense<ValueType>>
+MultiVector<ValueType>::create_const_view_for_item(size_type item_id) const
+{
+    auto exec = this->get_executor();
+    auto num_rows = this->get_common_size()[0];
+    auto stride = this->get_common_size()[1];
+    auto mat = unbatch_type::create_const(
+        exec, this->get_common_size(),
+        make_const_array_view(exec, num_rows * stride,
+                              this->get_const_values_for_item(item_id)),
+        stride);
+    return mat;
+}
+
+
 template <typename ValueType>
 MultiVector<ValueType>::MultiVector(std::shared_ptr<const Executor> exec,
                                     const batch_dim<2>& size)
@@ -164,18 +196,13 @@ template <typename ValueType>
 std::vector<std::unique_ptr<matrix::Dense<ValueType>>>
 MultiVector<ValueType>::unbatch() const
 {
-    using unbatch_type = matrix::Dense<ValueType>;
     auto exec = this->get_executor();
-    auto unbatch_mats = std::vector<std::unique_ptr<unbatch_type>>{};
+    auto unbatched_mats = std::vector<std::unique_ptr<unbatch_type>>{};
     for (size_type b = 0; b < this->get_num_batch_items(); ++b) {
-        auto mat = unbatch_type::create(exec, this->get_common_size());
-        exec->copy_from(exec.get(), mat->get_num_stored_elements(),
-                        this->get_const_values() +
-                            this->get_size().get_cumulative_offset(b),
-                        mat->get_values());
-        unbatch_mats.emplace_back(std::move(mat));
+        unbatched_mats.emplace_back(
+            this->create_const_view_for_item(b)->clone());
     }
-    return unbatch_mats;
+    return unbatched_mats;
 }
 
 
@@ -336,19 +363,15 @@ void read_impl(MatrixType* mtx, const std::vector<MatrixData>& data)
     GKO_THROW_IF_INVALID(data.size() > 0, "Input data is empty");
 
     auto common_size = data[0].size;
-    auto batch_size = batch_dim<2>(data.size(), common_size);
-    for (const auto& b : data) {
-        auto b_size = b.size;
-        GKO_ASSERT_EQUAL_DIMENSIONS(common_size, b_size);
-    }
+    auto num_batch_items = data.size();
+    auto batch_size = batch_dim<2>(num_batch_items, common_size);
     auto tmp =
         MatrixType::create(mtx->get_executor()->get_master(), batch_size);
-    tmp->fill(zero<typename MatrixType::value_type>());
-    for (size_type b = 0; b < data.size(); ++b) {
-        for (const auto& elem : data[b].nonzeros) {
-            tmp->at(b, elem.row, elem.column) = elem.value;
-        }
+    for (size_type b = 0; b < num_batch_items; ++b) {
+        assert(data[b].size == common_size);
+        tmp->create_view_for_item(b)->read(data[b]);
     }
+
     tmp->move_to(mtx);
 }
 
@@ -370,20 +393,10 @@ void MultiVector<ValueType>::read(const std::vector<mat_data64>& data)
 template <typename MatrixType, typename MatrixData>
 void write_impl(const MatrixType* mtx, std::vector<MatrixData>& data)
 {
-    auto tmp = make_temporary_clone(mtx->get_executor()->get_master(), mtx);
-
     data = std::vector<MatrixData>(mtx->get_num_batch_items());
     for (size_type b = 0; b < mtx->get_num_batch_items(); ++b) {
         data[b] = {mtx->get_common_size(), {}};
-        for (size_type row = 0; row < data[b].size[0]; ++row) {
-            for (size_type col = 0; col < data[b].size[1]; ++col) {
-                if (tmp->at(b, row, col) !=
-                    zero<typename MatrixType::value_type>()) {
-                    data[b].nonzeros.emplace_back(row, col,
-                                                  tmp->at(b, row, col));
-                }
-            }
-        }
+        mtx->create_const_view_for_item(b)->write(data[b]);
     }
 }
 
diff --git a/core/test/base/batch_multi_vector.cpp b/core/test/base/batch_multi_vector.cpp
index e87cedca913..055c2b899d0 100644
--- a/core/test/base/batch_multi_vector.cpp
+++ b/core/test/base/batch_multi_vector.cpp
@@ -55,7 +55,9 @@ class MultiVector : public ::testing::Test {
           mtx(gko::batch::initialize<gko::batch::MultiVector<value_type>>(
               {{{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
                {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}},
-              exec))
+              exec)),
+          dense_mtx(gko::initialize<gko::matrix::Dense<value_type>>(
+              {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, exec))
     {}
 
 
@@ -89,6 +91,7 @@ class MultiVector : public ::testing::Test {
 
     std::shared_ptr<const gko::Executor> exec;
     std::unique_ptr<gko::batch::MultiVector<value_type>> mtx;
+    std::unique_ptr<gko::matrix::Dense<value_type>> dense_mtx;
 };
 
 TYPED_TEST_SUITE(MultiVector, gko::test::ValueTypes);
@@ -118,6 +121,13 @@ TYPED_TEST(MultiVector, CanGetValuesForEntry)
 }
 
 
+TYPED_TEST(MultiVector, CanCreateDenseItemView)
+{
+    GKO_ASSERT_MTX_NEAR(this->mtx->create_view_for_item(1), this->dense_mtx,
+                        0.0);
+}
+
+
 TYPED_TEST(MultiVector, CanBeCopied)
 {
     auto mtx_copy = gko::batch::MultiVector<TypeParam>::create(this->exec);
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index 0e011f6b3ef..77171569320 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -130,6 +130,24 @@ class MultiVector
 
     void write(std::vector<mat_data64>& data) const override;
 
+    /**
+     * Creates a mutable view (of matrix::Dense type) of one item of the Batch
+     * MultiVector object. Does not perform any deep copies, but only returns a
+     * view of the data.
+     *
+     * @param item_id  The index of the batch item
+     *
+     * @return  a matrix::Dense object with the data from the batch item at the
+     *          given index.
+     */
+    std::unique_ptr<unbatch_type> create_view_for_item(size_type item_id);
+
+    /**
+     * @copydoc create_view_for_item(size_type)
+     */
+    std::unique_ptr<const unbatch_type> create_const_view_for_item(
+        size_type item_id) const;
+
     /**
      * Unbatches the batched multi-vector and creates a std::vector of Dense
      * matrices
@@ -208,8 +226,8 @@ class MultiVector
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const value_type* get_const_values_for_item(
-        size_type batch_id) const noexcept
+    const value_type* get_const_values_for_item(size_type batch_id) const
+        noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
         return values_.get_const_data() +

From 93e401b11bc1ab105a8c865ba8e7885d1f5c6288 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Wed, 2 Aug 2023 17:24:47 +0200
Subject: [PATCH 148/583] Move read/write/unbatch to Ginkgo internal

---
 core/base/batch_multi_vector.cpp              | 125 -------------
 core/base/batch_utilities.hpp                 | 167 ++++++++++++++++++
 core/test/base/batch_multi_vector.cpp         |  67 ++++---
 core/test/utils/assertions.hpp                |  13 +-
 core/test/utils/batch_helpers.hpp             |  76 ++------
 .../ginkgo/core/base/batch_lin_op_helpers.hpp | 109 ------------
 .../ginkgo/core/base/batch_multi_vector.hpp   |  75 +-------
 include/ginkgo/ginkgo.hpp                     |   1 -
 .../test/base/batch_multi_vector_kernels.cpp  |  38 ++--
 test/base/batch_multi_vector_kernels.cpp      |  10 +-
 10 files changed, 248 insertions(+), 433 deletions(-)
 create mode 100644 core/base/batch_utilities.hpp
 delete mode 100644 include/ginkgo/core/base/batch_lin_op_helpers.hpp

diff --git a/core/base/batch_multi_vector.cpp b/core/base/batch_multi_vector.cpp
index 88a203300de..23591cd1ffe 100644
--- a/core/base/batch_multi_vector.cpp
+++ b/core/base/batch_multi_vector.cpp
@@ -126,59 +126,6 @@ MultiVector<ValueType>::MultiVector(std::shared_ptr<const Executor> exec,
 {}
 
 
-template <typename ValueType>
-MultiVector<ValueType>::MultiVector(
-    std::shared_ptr<const Executor> exec,
-    const std::vector<matrix::Dense<ValueType>*>& matrices)
-    : EnablePolymorphicObject<MultiVector<ValueType>>(exec),
-      batch_size_{detail::compute_batch_size(matrices)},
-      values_(exec, compute_num_elems(batch_size_))
-{
-    for (size_type i = 0; i < this->get_num_batch_items(); ++i) {
-        auto local_exec = matrices[i]->get_executor();
-        exec->copy_from(
-            local_exec.get(), matrices[i]->get_num_stored_elements(),
-            matrices[i]->get_const_values(),
-            this->get_values() + this->get_size().get_cumulative_offset(i));
-    }
-}
-
-
-template <typename ValueType>
-MultiVector<ValueType>::MultiVector(std::shared_ptr<const Executor> exec,
-                                    size_type num_duplications,
-                                    const matrix::Dense<value_type>* input)
-    : MultiVector<ValueType>(exec,
-                             batch_dim<2>(num_duplications, input->get_size()))
-{
-    size_type offset = 0;
-    for (size_type i = 0; i < num_duplications; ++i) {
-        exec->copy_from(input->get_executor().get(),
-                        input->get_num_stored_elements(),
-                        input->get_const_values(), this->get_values() + offset);
-        offset += input->get_num_stored_elements();
-    }
-}
-
-
-template <typename ValueType>
-MultiVector<ValueType>::MultiVector(std::shared_ptr<const Executor> exec,
-                                    size_type num_duplications,
-                                    const MultiVector<value_type>* input)
-    : MultiVector<ValueType>(
-          exec, batch_dim<2>(input->get_num_batch_items() * num_duplications,
-                             input->get_common_size()))
-{
-    size_type offset = 0;
-    for (size_type i = 0; i < num_duplications; ++i) {
-        exec->copy_from(input->get_executor().get(),
-                        input->get_num_stored_elements(),
-                        input->get_const_values(), this->get_values() + offset);
-        offset += input->get_num_stored_elements();
-    }
-}
-
-
 template <typename ValueType>
 std::unique_ptr<MultiVector<ValueType>>
 MultiVector<ValueType>::create_with_config_of(
@@ -192,20 +139,6 @@ MultiVector<ValueType>::create_with_config_of(
 }
 
 
-template <typename ValueType>
-std::vector<std::unique_ptr<matrix::Dense<ValueType>>>
-MultiVector<ValueType>::unbatch() const
-{
-    auto exec = this->get_executor();
-    auto unbatched_mats = std::vector<std::unique_ptr<unbatch_type>>{};
-    for (size_type b = 0; b < this->get_num_batch_items(); ++b) {
-        unbatched_mats.emplace_back(
-            this->create_const_view_for_item(b)->clone());
-    }
-    return unbatched_mats;
-}
-
-
 template <typename ValueType>
 std::unique_ptr<const MultiVector<ValueType>>
 MultiVector<ValueType>::create_const(
@@ -357,64 +290,6 @@ void MultiVector<ValueType>::move_to(
 }
 
 
-template <typename MatrixType, typename MatrixData>
-void read_impl(MatrixType* mtx, const std::vector<MatrixData>& data)
-{
-    GKO_THROW_IF_INVALID(data.size() > 0, "Input data is empty");
-
-    auto common_size = data[0].size;
-    auto num_batch_items = data.size();
-    auto batch_size = batch_dim<2>(num_batch_items, common_size);
-    auto tmp =
-        MatrixType::create(mtx->get_executor()->get_master(), batch_size);
-    for (size_type b = 0; b < num_batch_items; ++b) {
-        assert(data[b].size == common_size);
-        tmp->create_view_for_item(b)->read(data[b]);
-    }
-
-    tmp->move_to(mtx);
-}
-
-
-template <typename ValueType>
-void MultiVector<ValueType>::read(const std::vector<mat_data>& data)
-{
-    read_impl(this, data);
-}
-
-
-template <typename ValueType>
-void MultiVector<ValueType>::read(const std::vector<mat_data64>& data)
-{
-    read_impl(this, data);
-}
-
-
-template <typename MatrixType, typename MatrixData>
-void write_impl(const MatrixType* mtx, std::vector<MatrixData>& data)
-{
-    data = std::vector<MatrixData>(mtx->get_num_batch_items());
-    for (size_type b = 0; b < mtx->get_num_batch_items(); ++b) {
-        data[b] = {mtx->get_common_size(), {}};
-        mtx->create_const_view_for_item(b)->write(data[b]);
-    }
-}
-
-
-template <typename ValueType>
-void MultiVector<ValueType>::write(std::vector<mat_data>& data) const
-{
-    write_impl(this, data);
-}
-
-
-template <typename ValueType>
-void MultiVector<ValueType>::write(std::vector<mat_data64>& data) const
-{
-    write_impl(this, data);
-}
-
-
 #define GKO_DECLARE_BATCH_MULTI_VECTOR(_type) class MultiVector<_type>
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR);
 
diff --git a/core/base/batch_utilities.hpp b/core/base/batch_utilities.hpp
new file mode 100644
index 00000000000..f7f28a616d8
--- /dev/null
+++ b/core/base/batch_utilities.hpp
@@ -0,0 +1,167 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CORE_BASE_BATCH_UTILITIES_HPP_
+#define GKO_CORE_BASE_BATCH_UTILITIES_HPP_
+
+
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+
+
+#include <algorithm>
+#include <type_traits>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/exception.hpp>
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/matrix_data.hpp>
+#include <ginkgo/core/base/utils.hpp>
+#include <ginkgo/core/base/utils_helper.hpp>
+
+
+namespace gko {
+namespace batch {
+namespace multivector {
+
+
+template <typename ValueType>
+std::unique_ptr<batch::MultiVector<ValueType>> duplicate(
+    std::shared_ptr<const Executor> exec, size_type num_duplications,
+    const batch::MultiVector<ValueType>* input)
+{
+    auto num_batch_items = input->get_num_batch_items();
+    auto tmp = batch::MultiVector<ValueType>::create(
+        exec, batch_dim<2>(num_batch_items * num_duplications,
+                           input->get_common_size()));
+
+    for (size_type i = 0; i < num_duplications; ++i) {
+        for (size_type b = 0; b < num_batch_items; ++b) {
+            tmp->create_view_for_item(i * num_batch_items + b)
+                ->copy_from(input->create_const_view_for_item(b).get());
+        }
+    }
+
+    return std::move(tmp);
+}
+
+
+template <typename ValueType>
+std::unique_ptr<batch::MultiVector<ValueType>> create_from_dense(
+    std::shared_ptr<const Executor> exec, const size_type num_duplications,
+    const matrix::Dense<ValueType>* input)
+{
+    auto num_batch_items = num_duplications;
+    auto tmp = batch::MultiVector<ValueType>::create(
+        exec, batch_dim<2>(num_batch_items, input->get_size()));
+
+    for (size_type b = 0; b < num_batch_items; ++b) {
+        tmp->create_view_for_item(b)->copy_from(input);
+    }
+
+    return std::move(tmp);
+}
+
+
+template <typename ValueType>
+std::unique_ptr<batch::MultiVector<ValueType>> create_from_dense(
+    std::shared_ptr<const Executor> exec,
+    const std::vector<matrix::Dense<ValueType>*>& input)
+{
+    auto num_batch_items = input.size();
+    auto tmp = batch::MultiVector<ValueType>::create(
+        exec, batch_dim<2>(num_batch_items, input[0]->get_size()));
+
+    for (size_type b = 0; b < num_batch_items; ++b) {
+        tmp->create_view_for_item(b)->copy_from(input[b]);
+    }
+
+    return std::move(tmp);
+}
+
+
+template <typename ValueType>
+std::vector<std::unique_ptr<matrix::Dense<ValueType>>> unbatch(
+    const batch::MultiVector<ValueType>* batch_multivec)
+{
+    auto exec = batch_multivec->get_executor();
+    auto unbatched_mats =
+        std::vector<std::unique_ptr<matrix::Dense<ValueType>>>{};
+    for (size_type b = 0; b < batch_multivec->get_num_batch_items(); ++b) {
+        unbatched_mats.emplace_back(
+            batch_multivec->create_const_view_for_item(b)->clone());
+    }
+    return unbatched_mats;
+}
+
+
+template <typename ValueType, typename IndexType>
+std::unique_ptr<MultiVector<ValueType>> read(
+    std::shared_ptr<const Executor> exec,
+    const std::vector<gko::matrix_data<ValueType, IndexType>>& data)
+{
+    auto num_batch_items = data.size();
+    auto tmp = MultiVector<ValueType>::create(
+        exec, batch_dim<2>(num_batch_items, data[0].size));
+
+    for (size_type b = 0; b < num_batch_items; ++b) {
+        tmp->create_view_for_item(b)->read(data[b]);
+    }
+
+    return std::move(tmp);
+}
+
+
+template <typename ValueType, typename IndexType>
+std::vector<gko::matrix_data<ValueType, IndexType>> write(
+    const MultiVector<ValueType>* mvec)
+{
+    auto data = std::vector<gko::matrix_data<ValueType, IndexType>>(
+        mvec->get_num_batch_items());
+
+    for (size_type b = 0; b < mvec->get_num_batch_items(); ++b) {
+        data[b] = {mvec->get_common_size(), {}};
+        mvec->create_const_view_for_item(b)->write(data[b]);
+    }
+
+    return data;
+}
+
+
+}  // namespace multivector
+}  // namespace batch
+}  // namespace gko
+
+
+#endif  // GKO_CORE_BASE_BATCH_UTILITIES_HPP_
diff --git a/core/test/base/batch_multi_vector.cpp b/core/test/base/batch_multi_vector.cpp
index 055c2b899d0..85168a406cc 100644
--- a/core/test/base/batch_multi_vector.cpp
+++ b/core/test/base/batch_multi_vector.cpp
@@ -41,7 +41,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/matrix/dense.hpp>
 
 
+#include "core/base/batch_utilities.hpp"
 #include "core/test/utils.hpp"
+#include "core/test/utils/batch_helpers.hpp"
 
 
 template <typename T>
@@ -250,7 +252,7 @@ TYPED_TEST(MultiVector, CanBeConstructedFromDenseMatrices)
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
 
-    auto m = gko::batch::MultiVector<TypeParam>::create(
+    auto m = gko::batch::multivector::create_from_dense(
         this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get()});
 
     this->assert_equal_to_original_mtx(m.get());
@@ -267,16 +269,16 @@ TYPED_TEST(MultiVector, CanBeConstructedFromDenseMatricesByDuplication)
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
 
-    auto bat_m = gko::batch::MultiVector<TypeParam>::create(
+    auto bat_m = gko::batch::multivector::create_from_dense(
         this->exec, std::vector<DenseMtx*>{mat1.get(), mat1.get(), mat1.get()});
     auto m =
-        gko::batch::MultiVector<TypeParam>::create(this->exec, 3, mat1.get());
+        gko::batch::multivector::create_from_dense(this->exec, 3, mat1.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(bat_m.get(), m.get(), 1e-14);
 }
 
 
-TYPED_TEST(MultiVector, CanBeConstructedFromMultiVectorMatrices)
+TYPED_TEST(MultiVector, CanBeConstructedByDuplicatingMultiVectors)
 {
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
@@ -285,14 +287,14 @@ TYPED_TEST(MultiVector, CanBeConstructedFromMultiVectorMatrices)
                                           this->exec);
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
-    auto m = gko::batch::MultiVector<TypeParam>::create(
+    auto m = gko::batch::multivector::create_from_dense(
         this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get()});
-    auto m_ref = gko::batch::MultiVector<TypeParam>::create(
+    auto m_ref = gko::batch::multivector::create_from_dense(
         this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get(), mat1.get(),
                                            mat2.get(), mat1.get(), mat2.get()});
 
     auto m2 =
-        gko::batch::MultiVector<TypeParam>::create(this->exec, 3, m.get());
+        gko::batch::multivector::duplicate<value_type>(this->exec, 3, m.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 1e-14);
 }
@@ -383,7 +385,7 @@ TYPED_TEST(MultiVector, CanBeUnbatchedIntoDenseMatrices)
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
 
-    auto dense_mats = this->mtx->unbatch();
+    auto dense_mats = gko::batch::multivector::unbatch(this->mtx.get());
 
     ASSERT_EQ(dense_mats.size(), 2);
     GKO_ASSERT_MTX_NEAR(dense_mats[0].get(), mat1.get(), 0.);
@@ -394,22 +396,19 @@ TYPED_TEST(MultiVector, CanBeUnbatchedIntoDenseMatrices)
 TYPED_TEST(MultiVector, CanBeReadFromMatrixData)
 {
     using value_type = typename TestFixture::value_type;
-    auto m = gko::batch::MultiVector<TypeParam>::create(this->exec);
-    // clang-format off
-    m->read({gko::matrix_data<TypeParam>{{2, 2},
-                                         {{0, 0, 1.0},
-                                          {0, 1, 3.0},
-                                          {1, 0, 0.0},
-                                          {1, 1, 5.0}}},
-             gko::matrix_data<TypeParam>{{2, 2},
-                                         {{0, 0, -1.0},
-                                          {0, 1, 0.5},
-                                          {1, 0, 0.0},
-                                          {1, 1, 9.0}}}});
-    // clang-format on
+    using index_type = int;
 
-    ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2));
+    auto vec_data = std::vector<gko::matrix_data<value_type, index_type>>{};
+    vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
+        {2, 2}, {{0, 0, 1.0}, {0, 1, 3.0}, {1, 0, 0.0}, {1, 1, 5.0}}));
+    vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
+        {2, 2}, {{0, 0, -1.0}, {0, 1, 0.5}, {1, 0, 0.0}, {1, 1, 9.0}}));
+
+    auto m = gko::batch::multivector::read<value_type, index_type>(this->exec,
+                                                                   vec_data);
     EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
+
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2));
     EXPECT_EQ(m->at(0, 0, 1), value_type{3.0});
     EXPECT_EQ(m->at(0, 1, 0), value_type{0.0});
     EXPECT_EQ(m->at(0, 1, 1), value_type{5.0});
@@ -423,18 +422,15 @@ TYPED_TEST(MultiVector, CanBeReadFromMatrixData)
 TYPED_TEST(MultiVector, CanBeReadFromSparseMatrixData)
 {
     using value_type = typename TestFixture::value_type;
-    auto m = gko::batch::MultiVector<TypeParam>::create(this->exec);
+    using index_type = int;
+    auto vec_data = std::vector<gko::matrix_data<value_type, index_type>>{};
+    vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
+        {2, 2}, {{0, 0, 1.0}, {0, 1, 3.0}, {1, 1, 5.0}}));
+    vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
+        {2, 2}, {{0, 0, -1.0}, {0, 1, 0.5}, {1, 1, 9.0}}));
 
-    // clang-format off
-    m->read({gko::matrix_data<TypeParam>{{2, 2},
-                                         {{0, 0, 1.0},
-                                          {0, 1, 3.0},
-                                          {1, 1, 5.0}}},
-             gko::matrix_data<TypeParam>{{2, 2},
-                                         {{0, 0, -1.0},
-                                          {0, 1, 0.5},
-                                          {1, 1, 9.0}}}});
-    // clang-format on
+    auto m = gko::batch::multivector::read<value_type, index_type>(this->exec,
+                                                                   vec_data);
 
     ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2));
     EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
@@ -451,10 +447,11 @@ TYPED_TEST(MultiVector, CanBeReadFromSparseMatrixData)
 TYPED_TEST(MultiVector, GeneratesCorrectMatrixData)
 {
     using value_type = typename TestFixture::value_type;
+    using index_type = int;
     using tpl = typename gko::matrix_data<TypeParam>::nonzero_type;
-    std::vector<gko::matrix_data<TypeParam>> data;
 
-    this->mtx->write(data);
+    auto data =
+        gko::batch::multivector::write<value_type, index_type>(this->mtx.get());
 
     ASSERT_EQ(data[0].size, gko::dim<2>(2, 3));
     ASSERT_EQ(data[0].nonzeros.size(), 6);
diff --git a/core/test/utils/assertions.hpp b/core/test/utils/assertions.hpp
index bae78912a6c..153907cf2cf 100644
--- a/core/test/utils/assertions.hpp
+++ b/core/test/utils/assertions.hpp
@@ -55,6 +55,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/matrix/dense.hpp>
 
 
+#include "core/base/batch_utilities.hpp"
 #include "core/base/extended_float.hpp"
 
 
@@ -714,15 +715,11 @@ ::testing::AssertionResult batch_matrices_near(
     const Mat2* second, double tolerance)
 {
     auto exec = first->get_executor()->get_master();
-    std::vector<
-        matrix_data<typename Mat1::value_type, typename Mat1::index_type>>
-        first_data;
-    std::vector<
-        matrix_data<typename Mat2::value_type, typename Mat2::index_type>>
-        second_data;
+    using value_type1 = typename Mat1::value_type;
+    using value_type2 = typename Mat2::value_type;
 
-    first->write(first_data);
-    second->write(second_data);
+    auto first_data = gko::batch::multivector::write<value_type1, int>(first);
+    auto second_data = gko::batch::multivector::write<value_type2, int>(second);
 
     if (first_data.size() != second_data.size()) {
         return ::testing::AssertionFailure()
diff --git a/core/test/utils/batch_helpers.hpp b/core/test/utils/batch_helpers.hpp
index 3b9e673922e..4cf9d4973e2 100644
--- a/core/test/utils/batch_helpers.hpp
+++ b/core/test/utils/batch_helpers.hpp
@@ -35,13 +35,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <random>
+#include <vector>
 
 
 #include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/matrix_data.hpp>
+#include <ginkgo/core/matrix/dense.hpp>
 
 
 #include "core/test/utils/assertions.hpp"
+#include "core/test/utils/matrix_generator.hpp"
 
 
 namespace gko {
@@ -68,72 +71,27 @@ std::vector<std::shared_ptr<T>> share(std::vector<std::unique_ptr<T>>&& objs)
  */
 template <typename MatrixType, typename NonzeroDistribution,
           typename ValueDistribution, typename Engine, typename... MatrixArgs>
-std::unique_ptr<MatrixType> generate_uniform_batch_random_matrix(
-    const size_type batch_size, const size_type num_rows,
+std::unique_ptr<MatrixType> generate_random_batch_matrix(
+    const size_type num_batch_items, const size_type num_rows,
     const size_type num_cols, NonzeroDistribution&& nonzero_dist,
     ValueDistribution&& value_dist, Engine&& engine,
-    const bool with_all_diagonals, std::shared_ptr<const Executor> exec,
-    MatrixArgs&&... args)
+    std::shared_ptr<const Executor> exec, MatrixArgs&&... args)
 {
     using value_type = typename MatrixType::value_type;
     using index_type = typename MatrixType::index_type;
-
-    // generate sparsity pattern
-    matrix_data<value_type, index_type> in_data{gko::dim<2>{num_rows, num_cols},
-                                                {}};
-
-    for (size_type row = 0; row < num_rows; ++row) {
-        // randomly generate number of nonzeros in this row
-        std::vector<size_type> col_idx(num_cols);
-        std::iota(begin(col_idx), end(col_idx), size_type(0));
-        const auto nnz_row = static_cast<size_type>(nonzero_dist(engine));
-        size_type nnz_in_row =
-            std::max(size_type(0), std::min(nnz_row, num_cols));
-        std::shuffle(std::begin(col_idx), std::end(col_idx), engine);
-
-        if (with_all_diagonals) {
-            if (nnz_in_row == 0) {
-                nnz_in_row = 1;
-            }
-            bool has_diagonal = false;
-            for (size_type icol = 0; icol < nnz_in_row; icol++) {
-                if (col_idx[icol] == row) {
-                    has_diagonal = true;
-                }
-            }
-            if (!has_diagonal) {
-                col_idx[0] = row;
-            }
-        }
-
-        std::for_each(std::begin(col_idx), std::begin(col_idx) + nnz_in_row,
-                      [&](size_type col) {
-                          in_data.nonzeros.emplace_back(row, col, 1.0);
-                      });
-    }
-
-    std::vector<matrix_data<value_type, index_type>> batch_mtx;
-    batch_mtx.reserve(batch_size);
-
-    for (int batch = 0; batch < batch_size; batch++) {
-        matrix_data<value_type, index_type> data = in_data;
-        for (size_type nnz = 0; nnz < data.nonzeros.size(); ++nnz) {
-            value_type val =
-                gko::detail::get_rand_value<value_type>(value_dist, engine);
-            if (data.nonzeros[nnz].column == data.nonzeros[nnz].row &&
-                val == zero<value_type>()) {
-                val = 1.0;
-            }
-            data.nonzeros[nnz].value = val;
-        }
-
-        data.ensure_row_major_order();
-        batch_mtx.emplace_back(std::move(data));
+    auto result = MatrixType::create(
+        exec, batch_dim<2>(num_batch_items, dim<2>(num_rows, num_cols)),
+        std::forward<MatrixArgs>(args)...);
+
+    // TODO: Need to preserve sparsity pattern across batch items for batched
+    // sparse matrix formats
+    for (size_type b = 0; b < num_batch_items; b++) {
+        auto rand_mat =
+            generate_random_matrix<typename MatrixType::unbatch_type>(
+                num_rows, num_cols, nonzero_dist, value_dist, engine, exec);
+        result->create_view_for_item(b)->copy_from(rand_mat.get());
     }
 
-    // convert to the correct matrix type
-    auto result = MatrixType::create(exec, std::forward<MatrixArgs>(args)...);
-    result->read(batch_mtx);
     return result;
 }
 
diff --git a/include/ginkgo/core/base/batch_lin_op_helpers.hpp b/include/ginkgo/core/base/batch_lin_op_helpers.hpp
deleted file mode 100644
index 5d1a2f8ed0d..00000000000
--- a/include/ginkgo/core/base/batch_lin_op_helpers.hpp
+++ /dev/null
@@ -1,109 +0,0 @@
-/*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2023, the Ginkgo authors
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
-1. Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
-notice, this list of conditions and the following disclaimer in the
-documentation and/or other materials provided with the distribution.
-
-3. Neither the name of the copyright holder nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-******************************<GINKGO LICENSE>*******************************/
-
-#ifndef GKO_PUBLIC_CORE_BASE_BATCH_LIN_OP_HELPERS_HPP_
-#define GKO_PUBLIC_CORE_BASE_BATCH_LIN_OP_HELPERS_HPP_
-
-
-#include <memory>
-#include <type_traits>
-#include <utility>
-#include <vector>
-
-
-#include <ginkgo/core/base/abstract_factory.hpp>
-#include <ginkgo/core/base/dim.hpp>
-#include <ginkgo/core/base/exception_helpers.hpp>
-#include <ginkgo/core/base/math.hpp>
-#include <ginkgo/core/base/matrix_assembly_data.hpp>
-#include <ginkgo/core/base/matrix_data.hpp>
-#include <ginkgo/core/base/polymorphic_object.hpp>
-#include <ginkgo/core/base/types.hpp>
-#include <ginkgo/core/base/utils.hpp>
-#include <ginkgo/core/log/logger.hpp>
-
-
-namespace gko {
-
-
-/**
- * A BatchLinOp implementing this interface can read its data from a matrix_data
- * structure.
- *
- * @ingroup BatchLinOp
- */
-template <typename ValueType, typename IndexType>
-class BatchReadableFromMatrixData {
-public:
-    using value_type = ValueType;
-    using index_type = IndexType;
-
-    virtual ~BatchReadableFromMatrixData() = default;
-
-    /**
-     * Reads a batch matrix from a std::vector of matrix_data objects.
-     *
-     * @param data  the std::vector of matrix_data objects
-     */
-    virtual void read(
-        const std::vector<matrix_data<ValueType, IndexType>>& data) = 0;
-};
-
-
-/**
- * A BatchLinOp implementing this interface can write its data to a std::vector
- * of matrix_data objects.
- *
- * @ingroup BatchLinOp
- */
-template <typename ValueType, typename IndexType>
-class BatchWritableToMatrixData {
-public:
-    using value_type = ValueType;
-    using index_type = IndexType;
-
-    virtual ~BatchWritableToMatrixData() = default;
-
-    /**
-     * Writes a matrix to a matrix_data structure.
-     *
-     * @param data  the matrix_data structure
-     */
-    virtual void write(
-        std::vector<matrix_data<ValueType, IndexType>>& data) const = 0;
-};
-
-
-}  // namespace gko
-
-
-#endif  // GKO_PUBLIC_CORE_BASE_BATCH_LIN_OP_HELPERS_HPP_
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index 77171569320..8003f5499f1 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -40,7 +40,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/array.hpp>
 #include <ginkgo/core/base/batch_dim.hpp>
-#include <ginkgo/core/base/batch_lin_op_helpers.hpp>
 #include <ginkgo/core/base/dim.hpp>
 #include <ginkgo/core/base/executor.hpp>
 #include <ginkgo/core/base/mtx_io.hpp>
@@ -82,19 +81,13 @@ class MultiVector
     : public EnablePolymorphicObject<MultiVector<ValueType>>,
       public EnablePolymorphicAssignment<MultiVector<ValueType>>,
       public EnableCreateMethod<MultiVector<ValueType>>,
-      public ConvertibleTo<MultiVector<next_precision<ValueType>>>,
-      public BatchReadableFromMatrixData<ValueType, int32>,
-      public BatchReadableFromMatrixData<ValueType, int64>,
-      public BatchWritableToMatrixData<ValueType, int32>,
-      public BatchWritableToMatrixData<ValueType, int64> {
+      public ConvertibleTo<MultiVector<next_precision<ValueType>>> {
     friend class EnableCreateMethod<MultiVector>;
     friend class EnablePolymorphicObject<MultiVector>;
     friend class MultiVector<to_complex<ValueType>>;
     friend class MultiVector<next_precision<ValueType>>;
 
 public:
-    using BatchReadableFromMatrixData<ValueType, int32>::read;
-    using BatchReadableFromMatrixData<ValueType, int64>::read;
     using EnablePolymorphicAssignment<MultiVector>::convert_to;
     using EnablePolymorphicAssignment<MultiVector>::move_to;
     using ConvertibleTo<MultiVector<next_precision<ValueType>>>::convert_to;
@@ -103,8 +96,6 @@ class MultiVector
     using value_type = ValueType;
     using index_type = int32;
     using unbatch_type = matrix::Dense<ValueType>;
-    using mat_data = matrix_data<ValueType, int32>;
-    using mat_data64 = matrix_data<ValueType, int64>;
     using absolute_type = remove_complex<MultiVector<ValueType>>;
     using complex_type = to_complex<MultiVector<ValueType>>;
 
@@ -122,14 +113,6 @@ class MultiVector
 
     void move_to(MultiVector<next_precision<ValueType>>* result) override;
 
-    void read(const std::vector<mat_data>& data) override;
-
-    void read(const std::vector<mat_data64>& data) override;
-
-    void write(std::vector<mat_data>& data) const override;
-
-    void write(std::vector<mat_data64>& data) const override;
-
     /**
      * Creates a mutable view (of matrix::Dense type) of one item of the Batch
      * MultiVector object. Does not perform any deep copies, but only returns a
@@ -148,19 +131,6 @@ class MultiVector
     std::unique_ptr<const unbatch_type> create_const_view_for_item(
         size_type item_id) const;
 
-    /**
-     * Unbatches the batched multi-vector and creates a std::vector of Dense
-     * matrices
-     *
-     * @note This is an expensive operation as new memory needs to be allocated
-     * and the data from the batched multi-vector needs to copied to the
-     * individual matrices. This is mainly intended as a utility function
-     * for debugging and testing purposes.
-     *
-     * @return  a std::vector containing the matrix::Dense objects.
-     */
-    std::vector<std::unique_ptr<unbatch_type>> unbatch() const;
-
     /**
      * Returns the batch size.
      *
@@ -435,49 +405,6 @@ class MultiVector
         GKO_ENSURE_IN_BOUNDS(num_elems, values_.get_num_elems() + 1);
     }
 
-    /**
-     * Creates a MultiVector from a vector of matrices
-     *
-     * @param exec  Executor associated to the vector
-     * @param matrices  The matrix::Dense objects that need to be batched.
-     *
-     * @note This is a utility function that can serve as a first step to port
-     * to batched data-structures and solvers. Even if the matrices are in
-     * device memory, this method can have significant overhead, as new
-     * allocations and deep copies are necessary and hence this constructor must
-     * not be used in performance sensitive applications
-     */
-    MultiVector(std::shared_ptr<const Executor> exec,
-                const std::vector<matrix::Dense<ValueType>*>& matrices);
-
-    /**
-     * Creates a MultiVector matrix by duplicating MultiVector object
-     *
-     * @param exec  Executor associated to the vector
-     * @param num_duplications  The number of times to duplicate
-     * @param input  the vector to be duplicated.
-     *
-     * @note This is a utility function that can serve as a first step to port
-     * to batched data-structures and solvers. Even if the matrices are in
-     * device memory, this method can have significant overhead, as new
-     * allocations and deep copies are necessary and hence this constructor must
-     * not be used in performance sensitive applications.
-     */
-    MultiVector(std::shared_ptr<const Executor> exec,
-                size_type num_duplications,
-                const MultiVector<value_type>* input);
-
-    /**
-     * Creates a MultiVector matrix by a duplicating a matrix::Dense object
-     *
-     * @param exec  Executor associated to the vector
-     * @param num_duplications  The number of times to duplicate
-     * @param input  the matrix to be duplicated.
-     */
-    MultiVector(std::shared_ptr<const Executor> exec,
-                size_type num_duplications,
-                const matrix::Dense<value_type>* input);
-
     /**
      * Creates a MultiVector with the same configuration as the
      * callers object.
diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp
index eebb31772ea..179a8a01a46 100644
--- a/include/ginkgo/ginkgo.hpp
+++ b/include/ginkgo/ginkgo.hpp
@@ -40,7 +40,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/abstract_factory.hpp>
 #include <ginkgo/core/base/array.hpp>
 #include <ginkgo/core/base/batch_dim.hpp>
-#include <ginkgo/core/base/batch_lin_op_helpers.hpp>
 #include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/combination.hpp>
 #include <ginkgo/core/base/composition.hpp>
diff --git a/reference/test/base/batch_multi_vector_kernels.cpp b/reference/test/base/batch_multi_vector_kernels.cpp
index 82429660b32..4f922c37703 100644
--- a/reference/test/base/batch_multi_vector_kernels.cpp
+++ b/reference/test/base/batch_multi_vector_kernels.cpp
@@ -48,7 +48,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include "core/base/batch_multi_vector_kernels.hpp"
+#include "core/base/batch_utilities.hpp"
 #include "core/test/utils.hpp"
+#include "core/test/utils/batch_helpers.hpp"
 
 
 template <typename T>
@@ -135,13 +137,13 @@ TYPED_TEST(MultiVector, ScalesData)
     using T = typename TestFixture::value_type;
     auto alpha = gko::batch::initialize<Mtx>(
         {{{2.0, -2.0, 1.5}}, {{3.0, -1.0, 0.25}}}, this->exec);
-    auto ualpha = alpha->unbatch();
+    auto ualpha = gko::batch::multivector::unbatch(alpha.get());
 
     this->mtx_0->scale(alpha.get());
     this->mtx_00->scale(ualpha[0].get());
     this->mtx_01->scale(ualpha[1].get());
 
-    auto res = this->mtx_0->unbatch();
+    auto res = gko::batch::multivector::unbatch(this->mtx_0.get());
     GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_00.get(), 0.);
     GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_01.get(), 0.);
 }
@@ -152,13 +154,13 @@ TYPED_TEST(MultiVector, ScalesDataWithScalar)
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     auto alpha = gko::batch::initialize<Mtx>({{2.0}, {-2.0}}, this->exec);
-    auto ualpha = alpha->unbatch();
+    auto ualpha = gko::batch::multivector::unbatch(alpha.get());
 
     this->mtx_1->scale(alpha.get());
     this->mtx_10->scale(ualpha[0].get());
     this->mtx_11->scale(ualpha[1].get());
 
-    auto res = this->mtx_1->unbatch();
+    auto res = gko::batch::multivector::unbatch(this->mtx_1.get());
     GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.);
     GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_11.get(), 0.);
 }
@@ -170,13 +172,13 @@ TYPED_TEST(MultiVector, ScalesDataWithMultipleScalars)
     using T = typename TestFixture::value_type;
     auto alpha = gko::batch::initialize<Mtx>(
         {{{2.0, -2.0, -1.5}}, {{2.0, -2.0, 3.0}}}, this->exec);
-    auto ualpha = alpha->unbatch();
+    auto ualpha = gko::batch::multivector::unbatch(alpha.get());
 
     this->mtx_1->scale(alpha.get());
     this->mtx_10->scale(ualpha[0].get());
     this->mtx_11->scale(ualpha[1].get());
 
-    auto res = this->mtx_1->unbatch();
+    auto res = gko::batch::multivector::unbatch(this->mtx_1.get());
     GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.);
     GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_11.get(), 0.);
 }
@@ -188,13 +190,13 @@ TYPED_TEST(MultiVector, AddsScaled)
     using T = typename TestFixture::value_type;
     auto alpha = gko::batch::initialize<Mtx>(
         {{{2.0, -2.0, 1.5}}, {{2.0, -2.0, 3.0}}}, this->exec);
-    auto ualpha = alpha->unbatch();
+    auto ualpha = gko::batch::multivector::unbatch(alpha.get());
 
     this->mtx_1->add_scaled(alpha.get(), this->mtx_0.get());
     this->mtx_10->add_scaled(ualpha[0].get(), this->mtx_00.get());
     this->mtx_11->add_scaled(ualpha[1].get(), this->mtx_01.get());
 
-    auto res = this->mtx_1->unbatch();
+    auto res = gko::batch::multivector::unbatch(this->mtx_1.get());
     GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.);
     GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_11.get(), 0.);
 }
@@ -205,13 +207,13 @@ TYPED_TEST(MultiVector, AddsScaledWithScalar)
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     auto alpha = gko::batch::initialize<Mtx>({{2.0}, {-2.0}}, this->exec);
-    auto ualpha = alpha->unbatch();
+    auto ualpha = gko::batch::multivector::unbatch(alpha.get());
 
     this->mtx_1->add_scaled(alpha.get(), this->mtx_0.get());
     this->mtx_10->add_scaled(ualpha[0].get(), this->mtx_00.get());
     this->mtx_11->add_scaled(ualpha[1].get(), this->mtx_01.get());
 
-    auto res = this->mtx_1->unbatch();
+    auto res = gko::batch::multivector::unbatch(this->mtx_1.get());
     GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.);
     GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_11.get(), 0.);
 }
@@ -234,13 +236,13 @@ TYPED_TEST(MultiVector, ComputesDot)
     using T = typename TestFixture::value_type;
     auto result =
         Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 3}));
-    auto ures = result->unbatch();
+    auto ures = gko::batch::multivector::unbatch(result.get());
 
     this->mtx_0->compute_dot(this->mtx_1.get(), result.get());
     this->mtx_00->compute_dot(this->mtx_10.get(), ures[0].get());
     this->mtx_01->compute_dot(this->mtx_11.get(), ures[1].get());
 
-    auto res = result->unbatch();
+    auto res = gko::batch::multivector::unbatch(result.get());
     GKO_ASSERT_MTX_NEAR(res[0].get(), ures[0].get(), 0.);
     GKO_ASSERT_MTX_NEAR(res[1].get(), ures[1].get(), 0.);
 }
@@ -275,13 +277,13 @@ TYPED_TEST(MultiVector, ComputesConjDot)
     using T = typename TestFixture::value_type;
     auto result =
         Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 3}));
-    auto ures = result->unbatch();
+    auto ures = gko::batch::multivector::unbatch(result.get());
 
     this->mtx_0->compute_conj_dot(this->mtx_1.get(), result.get());
     this->mtx_00->compute_conj_dot(this->mtx_10.get(), ures[0].get());
     this->mtx_01->compute_conj_dot(this->mtx_11.get(), ures[1].get());
 
-    auto res = result->unbatch();
+    auto res = gko::batch::multivector::unbatch(result.get());
     GKO_ASSERT_MTX_NEAR(res[0].get(), ures[0].get(), 0.);
     GKO_ASSERT_MTX_NEAR(res[1].get(), ures[1].get(), 0.);
 }
@@ -357,8 +359,8 @@ TYPED_TEST(MultiVector, ConvertsToPrecision)
     this->mtx_1->convert_to(tmp.get());
     tmp->convert_to(res.get());
 
-    auto ures = res->unbatch();
-    auto umtx = this->mtx_1->unbatch();
+    auto ures = gko::batch::multivector::unbatch(res.get());
+    auto umtx = gko::batch::multivector::unbatch(this->mtx_1.get());
     GKO_ASSERT_MTX_NEAR(umtx[0].get(), ures[0].get(), residual);
     GKO_ASSERT_MTX_NEAR(umtx[1].get(), ures[1].get(), residual);
 }
@@ -380,8 +382,8 @@ TYPED_TEST(MultiVector, MovesToPrecision)
     this->mtx_1->move_to(tmp.get());
     tmp->move_to(res.get());
 
-    auto ures = res->unbatch();
-    auto umtx = this->mtx_1->unbatch();
+    auto ures = gko::batch::multivector::unbatch(res.get());
+    auto umtx = gko::batch::multivector::unbatch(this->mtx_1.get());
     GKO_ASSERT_MTX_NEAR(umtx[0].get(), ures[0].get(), residual);
     GKO_ASSERT_MTX_NEAR(umtx[1].get(), ures[1].get(), residual);
 }
diff --git a/test/base/batch_multi_vector_kernels.cpp b/test/base/batch_multi_vector_kernels.cpp
index 2d0c79d0664..be625853656 100644
--- a/test/base/batch_multi_vector_kernels.cpp
+++ b/test/base/batch_multi_vector_kernels.cpp
@@ -44,6 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include "core/base/batch_multi_vector_kernels.hpp"
+#include "core/base/batch_utilities.hpp"
 #include "core/test/utils.hpp"
 #include "core/test/utils/assertions.hpp"
 #include "core/test/utils/batch_helpers.hpp"
@@ -59,13 +60,14 @@ class MultiVector : public CommonTestFixture {
     MultiVector() : rand_engine(15) {}
 
     template <typename MtxType>
-    std::unique_ptr<MtxType> gen_mtx(const size_t num_batch_items, int num_rows,
-                                     int num_cols)
+    std::unique_ptr<MtxType> gen_mtx(const gko::size_type num_batch_items,
+                                     gko::size_type num_rows,
+                                     gko::size_type num_cols)
     {
-        return gko::test::generate_uniform_batch_random_matrix<MtxType>(
+        return gko::test::generate_random_batch_matrix<MtxType>(
             num_batch_items, num_rows, num_cols,
             std::uniform_int_distribution<>(num_cols, num_cols),
-            std::normal_distribution<>(-1.0, 1.0), rand_engine, false, ref);
+            std::normal_distribution<>(-1.0, 1.0), rand_engine, ref);
     }
 
     void set_up_vector_data(gko::size_type num_vecs,

From 0df4d692992b2fd639230914ab9f8486fdef44f3 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Wed, 2 Aug 2023 22:56:49 +0200
Subject: [PATCH 149/583] Remove warnings from CI builds

---
 .github/workflows/intel.yml | 2 +-
 .gitlab-ci.yml              | 6 ++++++
 .gitlab/scripts.yml         | 2 ++
 .gitlab/variables.yml       | 1 +
 4 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml
index 0d8acd52a34..9fd85708737 100644
--- a/.github/workflows/intel.yml
+++ b/.github/workflows/intel.yml
@@ -35,7 +35,7 @@ jobs:
         spack find --loaded
         mkdir build
         cd build
-        cmake .. -DCMAKE_INSTALL_PREFIX=install_ginkgo -DCMAKE_CXX_COMPILER=dpcpp -DCMAKE_BUILD_TYPE=${{ matrix.config.build_type }} -DGINKGO_MIXED_PRECISION=${{ matrix.config.mixed }} -DGINKGO_DPCPP_SINGLE_MODE=ON
+        cmake .. -DCMAKE_INSTALL_PREFIX=install_ginkgo -DGINKGO_COMPILER_FLAGS="-ffp-model=precise" -DCMAKE_CXX_COMPILER=dpcpp -DCMAKE_BUILD_TYPE=${{ matrix.config.build_type }} -DGINKGO_MIXED_PRECISION=${{ matrix.config.mixed }} -DGINKGO_DPCPP_SINGLE_MODE=ON
         make -j8
         ONEAPI_DEVICE_SELECTOR=level_zero:gpu ctest -j10 --output-on-failure
 
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 4ad66eca652..d2cae1ddf5f 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -613,6 +613,7 @@ build/dpcpp/2022-1/cpu/release/static:
     C_COMPILER: "gcc"
     CXX_COMPILER: "dpcpp"
     BUILD_DPCPP: "ON"
+    GKO_COMPILER_FLAGS: "-ffp-model=precise"
     BUILD_TYPE: "Release"
     BUILD_SHARED_LIBS: "ON"
     SYCL_DEVICE_FILTER: "*:cpu"
@@ -631,6 +632,7 @@ build/dpcpp/igpu/release/shared:
     C_COMPILER: "gcc"
     CXX_COMPILER: "dpcpp"
     BUILD_DPCPP: "ON"
+    GKO_COMPILER_FLAGS: "-ffp-model=precise"
     BUILD_TYPE: "Release"
     BUILD_SHARED_LIBS: "ON"
     DPCPP_SINGLE_MODE: "ON"
@@ -647,6 +649,7 @@ build/dpcpp/igpu/release/shared:
 #     C_COMPILER: "gcc"
 #     CXX_COMPILER: "dpcpp"
 #     BUILD_DPCPP: "ON"
+#     GKO_COMPILER_FLAGS: "-ffp-model=precise"
 #     BUILD_TYPE: "Debug"
 #     BUILD_SHARED_LIBS: "ON"
 #     DPCPP_SINGLE_MODE: "ON"
@@ -663,6 +666,7 @@ build/dpcpp/dgpu/release/static:
     C_COMPILER: "gcc"
     CXX_COMPILER: "dpcpp"
     BUILD_DPCPP: "ON"
+    GKO_COMPILER_FLAGS: "-ffp-model=precise"
     BUILD_TYPE: "Release"
     BUILD_SHARED_LIBS: "OF"
     DPCPP_SINGLE_MODE: "ON"
@@ -678,6 +682,7 @@ build/dpcpp/level_zero_dgpu/release/shared:
     C_COMPILER: "gcc"
     CXX_COMPILER: "dpcpp"
     BUILD_DPCPP: "ON"
+    GKO_COMPILER_FLAGS: "-ffp-model=precise"
     BUILD_TYPE: "Release"
     DPCPP_SINGLE_MODE: "ON"
     ONEAPI_DEVICE_SELECTOR: "level_zero:gpu"
@@ -695,6 +700,7 @@ warnings:
     BUILD_CUDA: "ON"
     BUILD_HIP: "ON"
     CXX_FLAGS: "-Werror=pedantic -pedantic-errors"
+    GKO_COMPILER_FLAGS: "-Wpedantic"
   allow_failure: yes
 
 # Ensure kernel modules do not depend on core
diff --git a/.gitlab/scripts.yml b/.gitlab/scripts.yml
index 7b1c30c27c0..4f699cb53fc 100644
--- a/.gitlab/scripts.yml
+++ b/.gitlab/scripts.yml
@@ -38,6 +38,7 @@
         -DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} -DCMAKE_BUILD_TYPE=${BUILD_TYPE}
         -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" -DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS}
         ${EXTRA_CMAKE_FLAGS} ${CUDA_ARCH_STR} ${CUDA_HOST_STR}
+        -DGINKGO_COMPILER_FLAGS=${GKO_COMPILER_FLAGS}
         -DGINKGO_DEVEL_TOOLS=OFF -DGINKGO_BUILD_REFERENCE=${BUILD_REFERENCE}
         -DGINKGO_BUILD_OMP=${BUILD_OMP} -DGINKGO_BUILD_CUDA=${BUILD_CUDA}
         -DGINKGO_BUILD_HIP=${BUILD_HIP}
@@ -82,6 +83,7 @@
         -DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} -DCMAKE_BUILD_TYPE=${BUILD_TYPE}
         -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" -DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS}
         ${EXTRA_CMAKE_FLAGS} ${CUDA_ARCH_STR} ${CUDA_HOST_STR}
+        -DGINKGO_COMPILER_FLAGS=${GKO_COMPILER_FLAGS}
         -DGINKGO_DEVEL_TOOLS=OFF -DGINKGO_BUILD_REFERENCE=${BUILD_REFERENCE}
         -DGINKGO_BUILD_OMP=${BUILD_OMP} -DGINKGO_BUILD_CUDA=${BUILD_CUDA}
         -DGINKGO_BUILD_HIP=${BUILD_HIP}
diff --git a/.gitlab/variables.yml b/.gitlab/variables.yml
index 6ae62b8c899..183bdef9e4e 100644
--- a/.gitlab/variables.yml
+++ b/.gitlab/variables.yml
@@ -13,6 +13,7 @@
     BUILD_HIP: "OFF"
     BUILD_HWLOC: "ON"
     BUILD_MPI: "OFF"
+    GKO_COMPILER_FLAGS: ""
     MPI_AS_ROOT: "OFF"
     FAST_TESTS: "OFF"
     NONDEFAULT_STREAM: "OFF"

From 12a7e66f62b1d50aef24158a670cd70ad87eab6b Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Wed, 2 Aug 2023 21:45:29 +0000
Subject: [PATCH 150/583] Format files

Co-authored-by: Pratik Nayak <pratikvn@pm.me>
---
 core/base/batch_utilities.hpp                   | 4 +---
 include/ginkgo/core/base/batch_multi_vector.hpp | 4 ++--
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/core/base/batch_utilities.hpp b/core/base/batch_utilities.hpp
index f7f28a616d8..e5dc22faeda 100644
--- a/core/base/batch_utilities.hpp
+++ b/core/base/batch_utilities.hpp
@@ -34,14 +34,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define GKO_CORE_BASE_BATCH_UTILITIES_HPP_
 
 
-#include <ginkgo/core/base/batch_multi_vector.hpp>
-
-
 #include <algorithm>
 #include <type_traits>
 
 
 #include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/exception.hpp>
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/executor.hpp>
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index 8003f5499f1..d91274526d3 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -196,8 +196,8 @@ class MultiVector
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const value_type* get_const_values_for_item(size_type batch_id) const
-        noexcept
+    const value_type* get_const_values_for_item(
+        size_type batch_id) const noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
         return values_.get_const_data() +

From bd28e2b040c60778a685a8ecf142e903a52c390a Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Wed, 2 Aug 2023 23:49:00 +0200
Subject: [PATCH 151/583] Fix warning in exception

---
 include/ginkgo/core/base/exception_helpers.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/ginkgo/core/base/exception_helpers.hpp b/include/ginkgo/core/base/exception_helpers.hpp
index a9a93f15fe8..4a12865f374 100644
--- a/include/ginkgo/core/base/exception_helpers.hpp
+++ b/include/ginkgo/core/base/exception_helpers.hpp
@@ -733,7 +733,7 @@ inline T ensure_allocated_impl(T ptr, const std::string& file, int line,
  */
 #define GKO_THROW_IF_INVALID(_condition, _message)                           \
     {                                                                        \
-        if (!_condition) {                                                   \
+        if (!(_condition)) {                                                 \
             throw ::gko::InvalidStateError(__FILE__, __LINE__, __func__,     \
                                            _message);                        \
         }                                                                    \

From 24d58587eee7a26b01e3d6c263cec799024a430d Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Mon, 7 Aug 2023 09:42:23 +0200
Subject: [PATCH 152/583] remove CUDA 9.2 support

---
 .gitlab-ci.yml                                | 13 ------
 .gitlab/image.yml                             |  6 ---
 README.md                                     |  6 +--
 cmake/cuda.cmake                              |  9 ----
 cmake/hip.cmake                               | 15 -------
 .../base/device_matrix_data_kernels.hpp.inc   | 44 +++++++------------
 common/cuda_hip/matrix/csr_kernels.hpp.inc    | 38 +++++++---------
 common/cuda_hip/matrix/fbcsr_kernels.hpp.inc  |  9 +---
 common/cuda_hip/multigrid/pgm_kernels.hpp.inc | 15 ++-----
 common/unified/matrix/csr_kernels.cpp         |  8 ++--
 cuda/solver/common_trs_kernels.cuh            |  2 +-
 hip/CMakeLists.txt                            |  2 +-
 .../identify_stream_usage.cpp                 |  4 --
 13 files changed, 44 insertions(+), 127 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index d2cae1ddf5f..709f2b4f53a 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -93,19 +93,6 @@ trigger_pipeline:
 
 # Build jobs
 # Job with example runs.
-# cuda 9.2 and friends
-build/cuda92/nompi/gcc/all/release/shared:
-  extends:
-    - .build_and_test_template
-    - .default_variables
-    - .quick_test_condition
-    - .use_gko-cuda92-mvapich2-gnu7-llvm50-intel2017
-  variables:
-    BUILD_OMP: "ON"
-    BUILD_CUDA: "ON"
-    BUILD_HIP: "ON"
-    BUILD_TYPE: "Release"
-
 # cuda 10.1 and friends
 # Build CUDA NVIDIA without omp
 # Make sure that our jobs run when HWLOC is
diff --git a/.gitlab/image.yml b/.gitlab/image.yml
index 50dfbe9d2f8..cad06674aee 100644
--- a/.gitlab/image.yml
+++ b/.gitlab/image.yml
@@ -24,12 +24,6 @@
     - cpu
     - controller
 
-.use_gko-cuda92-mvapich2-gnu7-llvm50-intel2017:
-  image: ginkgohub/cuda:92-mvapich2-gnu7-llvm50-intel2017
-  tags:
-    - private_ci
-    - nvidia-gpu
-
 .use_gko-cuda101-openmpi-gnu8-llvm7-intel2019:
   image: ginkgohub/cuda:101-openmpi-gnu8-llvm7-intel2019
   tags:
diff --git a/README.md b/README.md
index be865e933f2..ba9082839bd 100644
--- a/README.md
+++ b/README.md
@@ -47,7 +47,7 @@ For Ginkgo core library:
 
 The Ginkgo CUDA module has the following __additional__ requirements:
 
-*   _CUDA 9.2+_ or _NVHPC Package 22.7+_
+*   _CUDA 10.1+_ or _NVHPC Package 22.7+_
 *   Any host compiler restrictions your version of CUDA may impose also apply
     here. For the newest CUDA version, this information can be found in the
     [CUDA installation guide for Linux](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html)
@@ -58,7 +58,7 @@ The Ginkgo HIP module has the following __additional__ requirements:
 * _ROCm 4.5+_
 *    the HIP, hipBLAS, hipSPARSE, hip/rocRAND and rocThrust packages compiled with either:
     * _AMD_ backend (using the `clang` compiler)
-    * _9.2 <= CUDA < 11_ backend
+    * _10.1 <= CUDA < 11_ backend
 * if the hipFFT package is available, it is used to implement the FFT LinOps.
 
 The Ginkgo DPC++ module has the following __additional__ requirements:
@@ -90,7 +90,7 @@ following:
 
 The Ginkgo CUDA module has the following __additional__ requirements:
 
-*   _CUDA 9.2+_
+*   _CUDA 10.1+_
 *   _Microsoft Visual Studio_
 *   Any host compiler restrictions your version of CUDA may impose also apply
     here. For the newest CUDA version, this information can be found in the
diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake
index c5ba334e983..88a1b4e777a 100644
--- a/cmake/cuda.cmake
+++ b/cmake/cuda.cmake
@@ -85,12 +85,3 @@ if(CMAKE_CUDA_HOST_COMPILER AND NOT CMAKE_CXX_COMPILER STREQUAL CMAKE_CUDA_HOST_
         "The CUDA host compiler is ${CMAKE_CUDA_HOST_COMPILER}.")
 endif()
 
-if (CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND CMAKE_CUDA_COMPILER_VERSION
-    MATCHES "9.2" AND CMAKE_CUDA_HOST_COMPILER MATCHES ".*clang.*" )
-    ginkgo_extract_clang_version(${CMAKE_CUDA_HOST_COMPILER} GINKGO_CUDA_HOST_CLANG_VERSION)
-
-    if (GINKGO_CUDA_HOST_CLANG_VERSION MATCHES "5\.0.*")
-        message(FATAL_ERROR "There is a bug between nvcc 9.2 and clang 5.0 which create a compiling issue."
-            "Consider using a different CUDA host compiler or CUDA version.")
-    endif()
-endif()
diff --git a/cmake/hip.cmake b/cmake/hip.cmake
index 5b7a268c7b6..bb141450b25 100644
--- a/cmake/hip.cmake
+++ b/cmake/hip.cmake
@@ -22,11 +22,6 @@ if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.21)
     set(CMAKE_HIP_ARCHITECTURES OFF)
 endif()
 
-if (GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_NVIDIA_REGEX}"
-    AND GINKGO_BUILD_CUDA AND CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 9.2)
-    message(FATAL_ERROR "Ginkgo HIP backend requires CUDA >= 9.2.")
-endif()
-
 if(NOT DEFINED ROCM_PATH)
     if(DEFINED ENV{ROCM_PATH})
         set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCM has been installed")
@@ -197,16 +192,6 @@ if (GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_NVIDIA_REGEX}")
     # Remove false positive CUDA warnings when calling one<T>() and zero<T>()
     list(APPEND GINKGO_HIP_NVCC_ADDITIONAL_FLAGS --expt-relaxed-constexpr --expt-extended-lambda)
 
-    if (GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_NVIDIA_REGEX}"
-            AND CMAKE_CUDA_COMPILER_VERSION MATCHES "9.2"
-            AND CMAKE_CUDA_HOST_COMPILER MATCHES ".*clang.*" )
-        ginkgo_extract_clang_version(${CMAKE_CUDA_HOST_COMPILER} GINKGO_CUDA_HOST_CLANG_VERSION)
-
-        if (GINKGO_CUDA_HOST_CLANG_VERSION MATCHES "5\.0.*")
-            message(FATAL_ERROR "There is a bug between nvcc 9.2 and clang 5.0 which create a compiling issue."
-                "Consider using a different CUDA host compiler or CUDA version.")
-        endif()
-    endif()
     # select GPU architecture    
     include(cmake/Modules/CudaArchitectureSelector.cmake)
     cas_variable_cuda_architectures(GINKGO_HIP_NVCC_ARCH
diff --git a/common/cuda_hip/base/device_matrix_data_kernels.hpp.inc b/common/cuda_hip/base/device_matrix_data_kernels.hpp.inc
index 5930902ed37..faf0ad15146 100644
--- a/common/cuda_hip/base/device_matrix_data_kernels.hpp.inc
+++ b/common/cuda_hip/base/device_matrix_data_kernels.hpp.inc
@@ -35,19 +35,13 @@ void remove_zeros(std::shared_ptr<const DefaultExecutor> exec,
                   array<ValueType>& values, array<IndexType>& row_idxs,
                   array<IndexType>& col_idxs)
 {
-    // workaround for CUDA 9.2 Thrust: Their complex<> implementation is broken
-    // due to overly generic assignment operator and constructor leading to
-    // ambiguities. So we need to use our own fake_complex type
-    using device_value_type = device_member_type<ValueType>;
-    auto value_ptr =
-        reinterpret_cast<const device_value_type*>(values.get_const_data());
+    using device_value_type = device_type<ValueType>;
+    auto value_ptr = as_device_type(values.get_const_data());
     auto size = values.get_num_elems();
     // count nonzeros
-    auto nnz =
-        thrust::count_if(thrust_policy(exec), value_ptr, value_ptr + size,
-                         [] __device__(device_value_type value) {
-                             return is_nonzero(fake_complex_unpack(value));
-                         });
+    auto nnz = thrust::count_if(
+        thrust_policy(exec), value_ptr, value_ptr + size,
+        [] __device__(device_value_type value) { return is_nonzero(value); });
     if (nnz < size) {
         using tuple_type =
             thrust::tuple<IndexType, IndexType, device_value_type>;
@@ -58,14 +52,13 @@ void remove_zeros(std::shared_ptr<const DefaultExecutor> exec,
         // copy nonzeros
         auto it = thrust::make_zip_iterator(thrust::make_tuple(
             row_idxs.get_const_data(), col_idxs.get_const_data(), value_ptr));
-        auto out_it = thrust::make_zip_iterator(thrust::make_tuple(
-            new_row_idxs.get_data(), new_col_idxs.get_data(),
-            reinterpret_cast<device_value_type*>(new_values.get_data())));
-        thrust::copy_if(
-            thrust_policy(exec), it, it + size, out_it,
-            [] __device__(tuple_type entry) {
-                return is_nonzero(fake_complex_unpack(thrust::get<2>(entry)));
-            });
+        auto out_it = thrust::make_zip_iterator(
+            thrust::make_tuple(new_row_idxs.get_data(), new_col_idxs.get_data(),
+                               as_device_type(new_values.get_data())));
+        thrust::copy_if(thrust_policy(exec), it, it + size, out_it,
+                        [] __device__(tuple_type entry) {
+                            return is_nonzero(thrust::get<2>(entry));
+                        });
         // swap out storage
         values = std::move(new_values);
         row_idxs = std::move(new_row_idxs);
@@ -82,7 +75,6 @@ void sum_duplicates(std::shared_ptr<const DefaultExecutor> exec, size_type,
                     array<ValueType>& values, array<IndexType>& row_idxs,
                     array<IndexType>& col_idxs)
 {
-    using device_value_type = device_member_type<ValueType>;
     const auto size = values.get_num_elems();
     const auto rows = row_idxs.get_const_data();
     const auto cols = col_idxs.get_const_data();
@@ -104,12 +96,10 @@ void sum_duplicates(std::shared_ptr<const DefaultExecutor> exec, size_type,
         // reduce duplicates
         auto in_locs =
             thrust::make_zip_iterator(thrust::make_tuple(rows, cols));
-        auto in_vals =
-            reinterpret_cast<const device_value_type*>(values.get_const_data());
+        auto in_vals = as_device_type(values.get_const_data());
         auto out_locs = thrust::make_zip_iterator(thrust::make_tuple(
             new_row_idxs.get_data(), new_col_idxs.get_data()));
-        auto out_vals =
-            reinterpret_cast<device_value_type*>(new_values.get_data());
+        auto out_vals = as_device_type(new_values.get_data());
         thrust::reduce_by_key(thrust_policy(exec), in_locs, in_locs + size,
                               in_vals, out_locs, out_vals);
         // swap out storage
@@ -127,13 +117,9 @@ template <typename ValueType, typename IndexType>
 void sort_row_major(std::shared_ptr<const DefaultExecutor> exec,
                     device_matrix_data<ValueType, IndexType>& data)
 {
-    // workaround for CUDA 9.2 Thrust: Their complex<> implementation is broken
-    // due to overly generic assignment operator and constructor leading to
-    // ambiguities. So we need to use our own fake_complex type
-    using device_value_type = device_member_type<ValueType>;
     auto it = thrust::make_zip_iterator(
         thrust::make_tuple(data.get_row_idxs(), data.get_col_idxs()));
-    auto vals = reinterpret_cast<device_value_type*>(data.get_values());
+    auto vals = as_device_type(data.get_values());
     thrust::sort_by_key(thrust_policy(exec), it, it + data.get_num_elems(),
                         vals);
 }
diff --git a/common/cuda_hip/matrix/csr_kernels.hpp.inc b/common/cuda_hip/matrix/csr_kernels.hpp.inc
index c370075c8a8..3f02337747e 100644
--- a/common/cuda_hip/matrix/csr_kernels.hpp.inc
+++ b/common/cuda_hip/matrix/csr_kernels.hpp.inc
@@ -872,11 +872,7 @@ void convert_to_fbcsr(std::shared_ptr<const DefaultExecutor> exec,
     }
     auto in_rows = in_row_idxs.get_data();
     auto in_cols = in_col_idxs.get_data();
-    // workaround for CUDA 9.2 Thrust: Their complex<> implementation is broken
-    // due to overly generic assignment operator and constructor leading to
-    // ambiguities. So we need to use our own fake_complex type
-    auto in_vals =
-        reinterpret_cast<device_member_type<ValueType>*>(in_values.get_data());
+    auto in_vals = as_device_type(in_values.get_data());
     auto in_loc_it =
         thrust::make_zip_iterator(thrust::make_tuple(in_rows, in_cols));
     thrust::sort_by_key(thrust_policy(exec), in_loc_it, in_loc_it + nnz,
@@ -924,17 +920,17 @@ void convert_to_fbcsr(std::shared_ptr<const DefaultExecutor> exec,
     // fill in values
     components::fill_array(exec, block_value_array.get_data(),
                            num_blocks * bs * bs, zero<ValueType>());
-    thrust::for_each_n(
-        thrust_policy(exec), iota, num_blocks,
-        [block_ptrs, nnz, num_blocks, bs, in_rows, in_cols, in_vals,
-         values] __device__(size_type i) {
-            const auto block_begin = block_ptrs[i];
-            const auto block_end = i < num_blocks - 1 ? block_ptrs[i + 1] : nnz;
-            for (auto nz = block_begin; nz < block_end; nz++) {
-                values[i * bs * bs + (in_cols[nz] % bs) * bs +
-                       (in_rows[nz] % bs)] = fake_complex_unpack(in_vals[nz]);
-            }
-        });
+    thrust::for_each_n(thrust_policy(exec), iota, num_blocks,
+                       [block_ptrs, nnz, num_blocks, bs, in_rows, in_cols,
+                        in_vals, values] __device__(size_type i) {
+                           const auto block_begin = block_ptrs[i];
+                           const auto block_end =
+                               i < num_blocks - 1 ? block_ptrs[i + 1] : nnz;
+                           for (auto nz = block_begin; nz < block_end; nz++) {
+                               values[i * bs * bs + (in_cols[nz] % bs) * bs +
+                                      (in_rows[nz] % bs)] = in_vals[nz];
+                           }
+                       });
 }
 
 
@@ -1130,13 +1126,10 @@ void fallback_transpose(std::shared_ptr<const DefaultExecutor> exec,
     const auto nnz = output->get_num_stored_elements();
     const auto in_row_ptrs = input->get_const_row_ptrs();
     const auto in_col_idxs = input->get_const_col_idxs();
-    // workaround for CUDA 9.2 Thrust unconstrained constructor issues
-    const auto in_vals = reinterpret_cast<const device_member_type<ValueType>*>(
-        input->get_const_values());
+    const auto in_vals = as_device_type(input->get_const_values());
     const auto out_row_ptrs = output->get_row_ptrs();
     const auto out_col_idxs = output->get_col_idxs();
-    const auto out_vals =
-        reinterpret_cast<device_member_type<ValueType>*>(output->get_values());
+    const auto out_vals = as_device_type(output->get_values());
     array<IndexType> out_row_idxs{exec, nnz};
     components::convert_ptrs_to_idxs(exec, in_row_ptrs, in_num_rows,
                                      out_col_idxs);
@@ -1156,8 +1149,7 @@ void fallback_sort(std::shared_ptr<const DefaultExecutor> exec,
 {
     const auto row_ptrs = to_sort->get_const_row_ptrs();
     const auto col_idxs = to_sort->get_col_idxs();
-    const auto vals =
-        reinterpret_cast<device_member_type<ValueType>*>(to_sort->get_values());
+    const auto vals = as_device_type(to_sort->get_values());
     const auto nnz = to_sort->get_num_stored_elements();
     const auto num_rows = to_sort->get_size()[0];
     array<IndexType> row_idx_array(exec, nnz);
diff --git a/common/cuda_hip/matrix/fbcsr_kernels.hpp.inc b/common/cuda_hip/matrix/fbcsr_kernels.hpp.inc
index d71d593b0a2..607ec5046ea 100644
--- a/common/cuda_hip/matrix/fbcsr_kernels.hpp.inc
+++ b/common/cuda_hip/matrix/fbcsr_kernels.hpp.inc
@@ -172,11 +172,7 @@ void fill_in_matrix_data(std::shared_ptr<const DefaultExecutor> exec,
     }
     auto in_rows = data.get_row_idxs();
     auto in_cols = data.get_col_idxs();
-    // workaround for CUDA 9.2 Thrust: Their complex<> implementation is broken
-    // due to overly generic assignment operator and constructor leading to
-    // ambiguities. So we need to use our own fake_complex type
-    auto in_vals =
-        reinterpret_cast<device_member_type<ValueType>*>(data.get_values());
+    auto in_vals = as_device_type(data.get_values());
     auto in_loc_it =
         thrust::make_zip_iterator(thrust::make_tuple(in_rows, in_cols));
     thrust::sort_by_key(thrust_policy(exec), in_loc_it, in_loc_it + nnz,
@@ -232,8 +228,7 @@ void fill_in_matrix_data(std::shared_ptr<const DefaultExecutor> exec,
             const auto block_end = i < num_blocks - 1 ? block_ptrs[i + 1] : nnz;
             for (auto nz = block_begin; nz < block_end; nz++) {
                 block_values[i * bs * bs + (in_cols[nz] % bs) * bs +
-                             (in_rows[nz] % bs)] =
-                    fake_complex_unpack(in_vals[nz]);
+                             (in_rows[nz] % bs)] = in_vals[nz];
             }
         });
 }
diff --git a/common/cuda_hip/multigrid/pgm_kernels.hpp.inc b/common/cuda_hip/multigrid/pgm_kernels.hpp.inc
index d8b6c4786b0..b08e86efaaa 100644
--- a/common/cuda_hip/multigrid/pgm_kernels.hpp.inc
+++ b/common/cuda_hip/multigrid/pgm_kernels.hpp.inc
@@ -45,11 +45,7 @@ template <typename ValueType, typename IndexType>
 void sort_row_major(std::shared_ptr<const DefaultExecutor> exec, size_type nnz,
                     IndexType* row_idxs, IndexType* col_idxs, ValueType* vals)
 {
-    // workaround for CUDA 9.2 Thrust: Their complex<> implementation is broken
-    // due to overly generic assignment operator and constructor leading to
-    // ambiguities. So we need to use our own fake_complex type
-    using device_value_type = device_member_type<ValueType>;
-    auto vals_it = reinterpret_cast<device_value_type*>(vals);
+    auto vals_it = as_device_type(vals);
     auto it = thrust::make_zip_iterator(thrust::make_tuple(row_idxs, col_idxs));
     // Because reduce_by_key is not deterministic, so we do not need
     // stable_sort_by_key
@@ -67,16 +63,11 @@ void compute_coarse_coo(std::shared_ptr<const DefaultExecutor> exec,
                         const IndexType* col_idxs, const ValueType* vals,
                         matrix::Coo<ValueType, IndexType>* coarse_coo)
 {
-    // workaround for CUDA 9.2 Thrust: Their complex<> implementation is broken
-    // due to overly generic assignment operator and constructor leading to
-    // ambiguities. So we need to use our own fake_complex type
-    using device_value_type = device_member_type<ValueType>;
-    auto vals_it = reinterpret_cast<const device_value_type*>(vals);
+    auto vals_it = as_device_type(vals);
     auto key_it =
         thrust::make_zip_iterator(thrust::make_tuple(row_idxs, col_idxs));
 
-    auto coarse_vals_it =
-        reinterpret_cast<device_value_type*>(coarse_coo->get_values());
+    auto coarse_vals_it = as_device_type(coarse_coo->get_values());
     auto coarse_key_it = thrust::make_zip_iterator(thrust::make_tuple(
         coarse_coo->get_row_idxs(), coarse_coo->get_col_idxs()));
 
diff --git a/common/unified/matrix/csr_kernels.cpp b/common/unified/matrix/csr_kernels.cpp
index f4e034998bd..1704fdd1f9c 100644
--- a/common/unified/matrix/csr_kernels.cpp
+++ b/common/unified/matrix/csr_kernels.cpp
@@ -154,8 +154,8 @@ void convert_to_sellp(std::shared_ptr<const DefaultExecutor> exec,
             for (auto i = row_begin; i < row_begin + slice_length; i++) {
                 cols[out_idx] =
                     i < row_end ? in_cols[i] : invalid_index<IndexType>();
-                values[out_idx] = i < row_end ? unpack_member(in_values[i])
-                                              : zero(values[out_idx]);
+                values[out_idx] =
+                    i < row_end ? in_values[i] : zero(values[out_idx]);
                 out_idx += slice_size;
             }
         },
@@ -185,8 +185,8 @@ void convert_to_ell(std::shared_ptr<const DefaultExecutor> exec,
             for (auto i = row_begin; i < row_begin + ell_length; i++) {
                 cols[out_idx] =
                     i < row_end ? in_cols[i] : invalid_index<IndexType>();
-                values[out_idx] = i < row_end ? unpack_member(in_values[i])
-                                              : zero(values[out_idx]);
+                values[out_idx] =
+                    i < row_end ? in_values[i] : zero(values[out_idx]);
                 out_idx += ell_stride;
             }
         },
diff --git a/cuda/solver/common_trs_kernels.cuh b/cuda/solver/common_trs_kernels.cuh
index bfdb4a5f854..f42b11f510d 100644
--- a/cuda/solver/common_trs_kernels.cuh
+++ b/cuda/solver/common_trs_kernels.cuh
@@ -198,7 +198,7 @@ struct CudaSolveStruct : gko::solver::SolveStruct {
 };
 
 
-#elif (defined(CUDA_VERSION) && (CUDA_VERSION >= 9020))
+#else
 
 template <typename ValueType, typename IndexType>
 struct CudaSolveStruct : gko::solver::SolveStruct {
diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt
index 1573169527d..5ec1718ca4d 100644
--- a/hip/CMakeLists.txt
+++ b/hip/CMakeLists.txt
@@ -142,7 +142,7 @@ if(GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_AMD_REGEX}")
     endif()
     target_link_libraries(ginkgo_hip PUBLIC ${HIP_LIBAMDHIP64_LIBRARIES})
 elseif(GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_NVIDIA_REGEX}")
-    find_package(CUDA 9.2 REQUIRED)
+    find_package(CUDA 10.1 REQUIRED)
     target_link_libraries(ginkgo_hip PUBLIC ${CUDA_LIBRARIES})
 endif()
 
diff --git a/third_party/identify_stream_usage/identify_stream_usage.cpp b/third_party/identify_stream_usage/identify_stream_usage.cpp
index a88de4ee427..5cdd4d30b09 100644
--- a/third_party/identify_stream_usage/identify_stream_usage.cpp
+++ b/third_party/identify_stream_usage/identify_stream_usage.cpp
@@ -124,14 +124,10 @@ DEFINE_OVERLOAD(cudaLaunchCooperativeKernel,
                     size_t sharedMem, cudaStream_t stream),
                 ARG(func, gridDim, blockDim, args, sharedMem, stream));
 
-#if CUDA_VERSION >= 10000
-
 DEFINE_OVERLOAD(cudaLaunchHostFunc,
                 ARG(cudaStream_t stream, cudaHostFn_t fn, void* userData),
                 ARG(stream, fn, userData));
 
-#endif
-
 // Memory transfer APIS:
 // https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY
 DEFINE_OVERLOAD(cudaMemPrefetchAsync,

From f63484b51363cb3da9b530b45453efe9ab4ae9ae Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Sun, 7 May 2023 12:55:12 +0200
Subject: [PATCH 153/583] modernize CUDA setup

---
 CMakeLists.txt                                |  5 +-
 benchmark/CMakeLists.txt                      |  8 +--
 cmake/DownloadNonCMakeCMakeLists.txt.in       |  2 +-
 cmake/GinkgoConfig.cmake.in                   |  6 +-
 cmake/Modules/FindNVTX.cmake                  |  4 +-
 cmake/cuda.cmake                              | 62 +------------------
 cuda/CMakeLists.txt                           |  7 +--
 cuda/get_info.cmake                           |  5 --
 cuda/test/solver/CMakeLists.txt               |  4 +-
 doc/CMakeLists.txt                            |  2 +-
 .../CMakeLists.txt                            |  2 +-
 examples/cb-gmres/CMakeLists.txt              |  2 +-
 examples/custom-logger/CMakeLists.txt         |  2 +-
 examples/custom-matrix-format/CMakeLists.txt  |  2 +-
 .../custom-stopping-criterion/CMakeLists.txt  |  2 +-
 .../external-lib-interfacing/CMakeLists.txt   |  2 +-
 examples/ginkgo-overhead/CMakeLists.txt       |  2 +-
 examples/ginkgo-ranges/CMakeLists.txt         |  2 +-
 examples/heat-equation/CMakeLists.txt         |  2 +-
 .../ilu-preconditioned-solver/CMakeLists.txt  |  2 +-
 examples/inverse-iteration/CMakeLists.txt     |  2 +-
 .../CMakeLists.txt                            |  2 +-
 examples/iterative-refinement/CMakeLists.txt  |  2 +-
 examples/kokkos_assembly/CMakeLists.txt       |  2 +-
 examples/minimal-cuda-solver/CMakeLists.txt   |  2 +-
 .../CMakeLists.txt                            |  2 +-
 .../mixed-multigrid-solver/CMakeLists.txt     |  2 +-
 examples/mixed-precision-ir/CMakeLists.txt    |  2 +-
 examples/mixed-spmv/CMakeLists.txt            |  2 +-
 .../CMakeLists.txt                            |  2 +-
 .../CMakeLists.txt                            |  2 +-
 .../nine-pt-stencil-solver/CMakeLists.txt     |  2 +-
 examples/papi-logging/CMakeLists.txt          |  2 +-
 examples/par-ilu-convergence/CMakeLists.txt   |  2 +-
 examples/performance-debugging/CMakeLists.txt |  2 +-
 examples/poisson-solver/CMakeLists.txt        |  2 +-
 examples/preconditioned-solver/CMakeLists.txt |  2 +-
 examples/preconditioner-export/CMakeLists.txt |  2 +-
 .../schroedinger-splitting/CMakeLists.txt     |  2 +-
 examples/simple-solver-logging/CMakeLists.txt |  2 +-
 examples/simple-solver/CMakeLists.txt         |  2 +-
 .../three-pt-stencil-solver/CMakeLists.txt    |  2 +-
 test/test_exportbuild/CMakeLists.txt          |  2 +-
 test/test_install/CMakeLists.txt              |  2 +-
 test/test_pkgconfig/CMakeLists.txt            |  2 +-
 test/test_subdir/CMakeLists.txt               |  2 +-
 46 files changed, 52 insertions(+), 125 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4d70ac404ce..89c2b65d74b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,7 +1,4 @@
-cmake_minimum_required(VERSION 3.13)
-
-# Use *_ROOT environment variables for find_package calls
-cmake_policy(SET CMP0074 NEW)
+cmake_minimum_required(VERSION 3.16)
 
 # Let CAS handle the CUDA architecture flags (for now)
 # Windows still gives CMP0104 warning if putting it in cuda.
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
index 434474fd336..f29620fac41 100644
--- a/benchmark/CMakeLists.txt
+++ b/benchmark/CMakeLists.txt
@@ -20,8 +20,8 @@ function(ginkgo_benchmark_cusparse_linops type def)
     endif()
     # make the dependency public to catch issues
     target_compile_definitions(cusparse_linops_${type} PUBLIC ${def})
-    target_link_libraries(cusparse_linops_${type} Ginkgo::ginkgo ${CUDA_RUNTIME_LIBS} ${CUBLAS} ${CUSPARSE})
-    target_include_directories(cusparse_linops_${type} SYSTEM PRIVATE ${CUDA_INCLUDE_DIRS})
+    target_link_libraries(cusparse_linops_${type} Ginkgo::ginkgo CUDA::cudart CUDA::cublas CUDA::cusparse)
+    target_include_directories(cusparse_linops_${type} SYSTEM PRIVATE ${CUDAToolkit_INCLUDE_DIRS})
     target_compile_definitions(cusparse_linops_${type} PRIVATE ALLOWMP=1)
 endfunction()
 
@@ -122,8 +122,8 @@ if (GINKGO_BUILD_CUDA)
     ginkgo_benchmark_cusparse_linops(z GKO_BENCHMARK_USE_DOUBLE_COMPLEX_PRECISION)
     ginkgo_benchmark_cusparse_linops(c GKO_BENCHMARK_USE_SINGLE_COMPLEX_PRECISION)
     add_library(cuda_timer utils/cuda_timer.cpp)
-    target_link_libraries(cuda_timer ginkgo ${CUDA_RUNTIME_LIBS})
-    target_include_directories(cuda_timer SYSTEM PRIVATE ${CUDA_INCLUDE_DIRS})
+    target_link_libraries(cuda_timer ginkgo CUDA::cudart)
+    target_include_directories(cuda_timer SYSTEM PRIVATE ${CUDAToolkit_INCLUDE_DIRS})
 endif()
 if (GINKGO_BUILD_HIP)
     ginkgo_benchmark_hipsparse_linops(d GKO_BENCHMARK_USE_DOUBLE_PRECISION)
diff --git a/cmake/DownloadNonCMakeCMakeLists.txt.in b/cmake/DownloadNonCMakeCMakeLists.txt.in
index c2d848e8d49..bae2281e63b 100644
--- a/cmake/DownloadNonCMakeCMakeLists.txt.in
+++ b/cmake/DownloadNonCMakeCMakeLists.txt.in
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(${package_name})
 
 include(ExternalProject)
diff --git a/cmake/GinkgoConfig.cmake.in b/cmake/GinkgoConfig.cmake.in
index fe2ac05d7e5..a1e209a0c79 100644
--- a/cmake/GinkgoConfig.cmake.in
+++ b/cmake/GinkgoConfig.cmake.in
@@ -122,11 +122,7 @@ set(GINKGO_INTERFACE_CXX_FLAGS "@GINKGO_INTERFACE_CXX_FLAGS@")
 set(GINKGO_CUDA_COMPILER "@CMAKE_CUDA_COMPILER@")
 set(GINKGO_CUDA_COMPILER_VERSION @CMAKE_CUDA_COMPILER_VERSION@)
 set(GINKGO_CUDA_HOST_LINK_LAUNCHER "@CMAKE_CUDA_HOST_LINK_LAUNCHER@")
-
-set(GINKGO_CUBLAS_LIBRARIES "@CUBLAS@")
-set(GINKGO_CUSPARSE_LIBRARIES "@CUSPARSE@")
-set(GINKGO_CUDA_LIBRARIES "@CUDA_RUNTIME_LIBS@")
-set(GINKGO_CUDA_TOOLKIT_INCLUDE_DIRECTORIES "@CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES@")
+set(GINKGO_CUDA_TOOLKIT_INCLUDE_DIRECTORIES "@CUDAToolkit_INCLUDE_DIRS@")
 
 set(GINKGO_CUDA_FLAGS "@CMAKE_CUDA_FLAGS_MODIFY@")
 set(GINKGO_CUDA_FLAGS_DEBUG "@CMAKE_CUDA_FLAGS_DEBUG_MODIFY@")
diff --git a/cmake/Modules/FindNVTX.cmake b/cmake/Modules/FindNVTX.cmake
index 7078c9dcb36..879c66f2d59 100644
--- a/cmake/Modules/FindNVTX.cmake
+++ b/cmake/Modules/FindNVTX.cmake
@@ -27,8 +27,8 @@
 # ``NVTX_FOUND``
 #   If false, do not try to use the NVTX library.
 
-find_path(NVTX3_INCLUDE_DIR NAMES nvToolsExt.h HINTS ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}/nvtx3)
-find_path(NVTX_INCLUDE_DIR NAMES nvToolsExt.h HINTS ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
+find_path(NVTX3_INCLUDE_DIR NAMES nvToolsExt.h HINTS ${CUDAToolkit_INCLUDE_DIRS}/nvtx3)
+find_path(NVTX_INCLUDE_DIR NAMES nvToolsExt.h HINTS ${CUDAToolkit_INCLUDE_DIRS})
 mark_as_advanced(NVTX3_INCLUDE_DIR)
 mark_as_advanced(NVTX_INCLUDE_DIR)
 include(FindPackageHandleStandardArgs)
diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake
index 88a1b4e777a..9d0b435be9f 100644
--- a/cmake/cuda.cmake
+++ b/cmake/cuda.cmake
@@ -1,76 +1,16 @@
 enable_language(CUDA)
 
-if(MSVC)
-    # MSVC can not find CUDA automatically
-    # Use CUDA_COMPILER PATH to define the CUDA TOOLKIT ROOT DIR
-    string(REPLACE "/bin/nvcc.exe" "" CMAKE_CUDA_ROOT_DIR ${CMAKE_CUDA_COMPILER})
-    if("${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}" STREQUAL "")
-        set(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES "${CMAKE_CUDA_ROOT_DIR}/include")
-    endif()
-    if("${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}" STREQUAL "")
-        set(CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES "${CMAKE_CUDA_ROOT_DIR}/lib/x64")
-    endif()
-endif()
+find_package(CUDAToolkit REQUIRED)
 
 include(cmake/Modules/CudaArchitectureSelector.cmake)
 
-set(CUDA_INCLUDE_DIRS ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
-
 # Detect the CUDA architecture flags and propagate to all the project
 cas_variable_cuda_architectures(GINKGO_CUDA_ARCH_FLAGS
     ARCHITECTURES ${GINKGO_CUDA_ARCHITECTURES}
     UNSUPPORTED "20" "21")
 
-if (CMAKE_CXX_COMPILER_ID MATCHES "PGI|NVHPC")
-    find_package(NVHPC REQUIRED
-        HINTS
-        $ENV{NVIDIA_PATH}
-        ${CMAKE_CUDA_COMPILER}/../../..
-        )
-
-    set(CUDA_RUNTIME_LIBS_DYNAMIC ${NVHPC_CUDART_LIBRARY})
-    set(CUDA_RUNTIME_LIBS_STATIC ${NVHPC_CUDART_LIBRARY_STATIC})
-    set(CUBLAS ${NVHPC_CUBLAS_LIBRARY})
-    set(CUSPARSE ${NVHPC_CUSPARSE_LIBRARY})
-    set(CURAND ${NVHPC_CURAND_LIBRARY})
-    set(CUFFT ${NVHPC_CUFFT_LIBRARY})
-else()
-    find_library(CUDA_RUNTIME_LIBS_DYNAMIC cudart
-        HINT ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
-    find_library(CUDA_RUNTIME_LIBS_STATIC cudart_static
-        HINT ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
-
-    # CUDA 10.1/10.2 put cublas, cublasLt, cudnn in /usr/lib/<arch>-linux-gnu/, but
-    # others (<= 10.0 or >= 11) put them in cuda own directory
-    # If the environment installs several cuda including 10.1/10.2, cmake will find
-    # the 10.1/10.2 .so files when searching others cuda in the default path.
-    # CMake already puts /usr/lib/<arch>-linux-gnu/ after cuda own directory in the
-    # `CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES`, so we always put NO_DEFAULT_PATH here.
-    find_library(CUBLAS cublas
-        HINT ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} NO_DEFAULT_PATH)
-    find_library(CUSPARSE cusparse
-        HINT ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
-    find_library(CURAND curand
-        HINT ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
-    find_library(CUFFT cufft
-        HINT ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
-endif()
-
 find_package(NVTX REQUIRED)
 
-# MSVC nvcc uses static cudartlibrary by default, and other platforms use shared cudartlibrary.
-# add `-cudart shared` or `-cudart=shared` according system into CMAKE_CUDA_FLAGS
-# to force nvcc to use dynamic cudart library in MSVC.
-if(MSVC)
-    if("${CMAKE_CUDA_FLAGS}" MATCHES "-cudart(=| )shared")
-        set(CUDA_RUNTIME_LIBS "${CUDA_RUNTIME_LIBS_DYNAMIC}" CACHE STRING "Path to a library" FORCE)
-    else()
-        set(CUDA_RUNTIME_LIBS "${CUDA_RUNTIME_LIBS_STATIC}" CACHE STRING "Path to a library" FORCE)
-    endif()
-else()
-    set(CUDA_RUNTIME_LIBS "${CUDA_RUNTIME_LIBS_DYNAMIC}" CACHE STRING "Path to a library" FORCE)
-endif()
-
 if (NOT CMAKE_CUDA_HOST_COMPILER AND NOT GINKGO_CUDA_DEFAULT_HOST_COMPILER)
     set(CMAKE_CUDA_HOST_COMPILER "${CMAKE_CXX_COMPILER}" CACHE STRING "" FORCE)
 elseif(GINKGO_CUDA_DEFAULT_HOST_COMPILER)
diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt
index dccc9e91401..7cf9053e2cf 100644
--- a/cuda/CMakeLists.txt
+++ b/cuda/CMakeLists.txt
@@ -114,13 +114,12 @@ target_compile_options(ginkgo_cuda PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:${GINKGO_C
 target_compile_options(ginkgo_cuda PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${GINKGO_COMPILER_FLAGS}>)
 ginkgo_compile_features(ginkgo_cuda)
 target_compile_definitions(ginkgo_cuda PRIVATE GKO_COMPILING_CUDA)
-target_include_directories(ginkgo_cuda
-    SYSTEM PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
+target_include_directories(ginkgo_cuda SYSTEM PRIVATE ${CUDAToolkit_INCLUDE_DIRS})
 
 # include path for generated headers like jacobi_common.hpp
 target_include_directories(ginkgo_cuda
     PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/..)
-target_link_libraries(ginkgo_cuda PRIVATE ${CUDA_RUNTIME_LIBS} ${CUBLAS} ${CUSPARSE} ${CURAND} ${CUFFT} nvtx::nvtx)
+target_link_libraries(ginkgo_cuda PRIVATE CUDA::cudart CUDA::cublas CUDA::cusparse CUDA::curand CUDA::cufft nvtx::nvtx)
 # NVTX3 is header-only and requires dlopen/dlclose in static builds
 target_link_libraries(ginkgo_cuda PUBLIC ginkgo_device ${CMAKE_DL_LIBS})
 target_compile_options(ginkgo_cuda
@@ -133,7 +132,7 @@ list(GET CUDA_RUNTIME_LIBS 0 CUDA_FIRST_LIB)
 get_filename_component(GKO_CUDA_LIBDIR "${CUDA_FIRST_LIB}" DIRECTORY)
 
 ginkgo_default_includes(ginkgo_cuda)
-ginkgo_install_library(ginkgo_cuda "${GKO_CUDA_LIBDIR}")
+ginkgo_install_library(ginkgo_cuda "${CUDAToolkit_LIBRARY_DIR}")
 
 if (GINKGO_CHECK_CIRCULAR_DEPS)
     ginkgo_check_headers(ginkgo_cuda GKO_COMPILING_CUDA)
diff --git a/cuda/get_info.cmake b/cuda/get_info.cmake
index 3d91ea9f23a..7955c3f636b 100644
--- a/cuda/get_info.cmake
+++ b/cuda/get_info.cmake
@@ -8,9 +8,4 @@ ginkgo_print_variable(${detailed_log} "CMAKE_CUDA_COMPILER")
 ginkgo_print_variable(${detailed_log} "CMAKE_CUDA_COMPILER_VERSION")
 ginkgo_print_flags(${detailed_log} "CMAKE_CUDA_FLAGS")
 ginkgo_print_variable(${detailed_log} "CMAKE_CUDA_HOST_COMPILER")
-ginkgo_print_variable(${detailed_log} "CUDA_INCLUDE_DIRS")
-ginkgo_print_module_footer(${detailed_log} "CUDA Libraries:")
-ginkgo_print_variable(${detailed_log} "CUBLAS")
-ginkgo_print_variable(${detailed_log} "CUDA_RUNTIME_LIBS")
-ginkgo_print_variable(${detailed_log} "CUSPARSE")
 ginkgo_print_module_footer(${detailed_log} "")
diff --git a/cuda/test/solver/CMakeLists.txt b/cuda/test/solver/CMakeLists.txt
index 0220d94c8d9..65187e68e1b 100644
--- a/cuda/test/solver/CMakeLists.txt
+++ b/cuda/test/solver/CMakeLists.txt
@@ -1,2 +1,2 @@
-ginkgo_create_test(lower_trs_kernels ADDITIONAL_INCLUDES ${CUDA_INCLUDE_DIRS})
-ginkgo_create_test(upper_trs_kernels ADDITIONAL_INCLUDES ${CUDA_INCLUDE_DIRS})
+ginkgo_create_test(lower_trs_kernels ADDITIONAL_INCLUDES ${CUDAToolkit_INCLUDE_DIRS})
+ginkgo_create_test(upper_trs_kernels ADDITIONAL_INCLUDES ${CUDAToolkit_INCLUDE_DIRS})
diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt
index 8b975bb6544..8965b42add4 100644
--- a/doc/CMakeLists.txt
+++ b/doc/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 include(helpers.cmake)
 find_package(Doxygen REQUIRED)
 find_package(Perl REQUIRED)
diff --git a/examples/adaptiveprecision-blockjacobi/CMakeLists.txt b/examples/adaptiveprecision-blockjacobi/CMakeLists.txt
index 744df84a74b..324400e9cb4 100644
--- a/examples/adaptiveprecision-blockjacobi/CMakeLists.txt
+++ b/examples/adaptiveprecision-blockjacobi/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(adaptiveprecision-blockjacobi)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/cb-gmres/CMakeLists.txt b/examples/cb-gmres/CMakeLists.txt
index d616b16c882..826100b8bd2 100644
--- a/examples/cb-gmres/CMakeLists.txt
+++ b/examples/cb-gmres/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(cb-gmres)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/custom-logger/CMakeLists.txt b/examples/custom-logger/CMakeLists.txt
index f986dd52e76..8278d3e72ba 100644
--- a/examples/custom-logger/CMakeLists.txt
+++ b/examples/custom-logger/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(custom-logger)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/custom-matrix-format/CMakeLists.txt b/examples/custom-matrix-format/CMakeLists.txt
index 47eeda0143c..26034b7dce2 100644
--- a/examples/custom-matrix-format/CMakeLists.txt
+++ b/examples/custom-matrix-format/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(custom-matrix-format CXX CUDA)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/custom-stopping-criterion/CMakeLists.txt b/examples/custom-stopping-criterion/CMakeLists.txt
index 811baa59a9c..b429fba7c59 100644
--- a/examples/custom-stopping-criterion/CMakeLists.txt
+++ b/examples/custom-stopping-criterion/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(custom-stopping-criterion)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/external-lib-interfacing/CMakeLists.txt b/examples/external-lib-interfacing/CMakeLists.txt
index 4501ace4088..56d7b92ea0f 100644
--- a/examples/external-lib-interfacing/CMakeLists.txt
+++ b/examples/external-lib-interfacing/CMakeLists.txt
@@ -1,7 +1,7 @@
 if(GINKGO_BUILD_EXTLIB_EXAMPLE)
     # This is just an example of the CMakeLists.txt file that can be used after the
     # correct version of deal.ii has been installed.
-    cmake_minimum_required(VERSION 3.9)
+    cmake_minimum_required(VERSION 3.16)
     project(DEAL_II_EXAMPLE LANGUAGES CXX)
 
     find_package(MPI 3.1 COMPONENTS CXX REQUIRED)
diff --git a/examples/ginkgo-overhead/CMakeLists.txt b/examples/ginkgo-overhead/CMakeLists.txt
index fcd7a81c230..350b58312fc 100644
--- a/examples/ginkgo-overhead/CMakeLists.txt
+++ b/examples/ginkgo-overhead/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(ginkgo-overhead)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/ginkgo-ranges/CMakeLists.txt b/examples/ginkgo-ranges/CMakeLists.txt
index 6e30c4f9af4..734a4567376 100644
--- a/examples/ginkgo-ranges/CMakeLists.txt
+++ b/examples/ginkgo-ranges/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(ginkgo-ranges)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/heat-equation/CMakeLists.txt b/examples/heat-equation/CMakeLists.txt
index f4790edaa8d..89dfb9e513b 100644
--- a/examples/heat-equation/CMakeLists.txt
+++ b/examples/heat-equation/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(heat-equation)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/ilu-preconditioned-solver/CMakeLists.txt b/examples/ilu-preconditioned-solver/CMakeLists.txt
index e6c840f38f8..0d1d215860e 100644
--- a/examples/ilu-preconditioned-solver/CMakeLists.txt
+++ b/examples/ilu-preconditioned-solver/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(ilu-preconditioned-solver)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/inverse-iteration/CMakeLists.txt b/examples/inverse-iteration/CMakeLists.txt
index deb72accffd..c73da656587 100644
--- a/examples/inverse-iteration/CMakeLists.txt
+++ b/examples/inverse-iteration/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(inverse-iteration)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/ir-ilu-preconditioned-solver/CMakeLists.txt b/examples/ir-ilu-preconditioned-solver/CMakeLists.txt
index fc1205fbd0d..3a05cb56a81 100644
--- a/examples/ir-ilu-preconditioned-solver/CMakeLists.txt
+++ b/examples/ir-ilu-preconditioned-solver/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(ir-ilu-preconditioned-solver)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/iterative-refinement/CMakeLists.txt b/examples/iterative-refinement/CMakeLists.txt
index fe94a94455b..f8c06ddcafa 100644
--- a/examples/iterative-refinement/CMakeLists.txt
+++ b/examples/iterative-refinement/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(iterative-refinement)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/kokkos_assembly/CMakeLists.txt b/examples/kokkos_assembly/CMakeLists.txt
index bfee201c91d..9e229c29f58 100644
--- a/examples/kokkos_assembly/CMakeLists.txt
+++ b/examples/kokkos_assembly/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.13)
+cmake_minimum_required(VERSION 3.16)
 project(kokkos-assembly CXX)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/minimal-cuda-solver/CMakeLists.txt b/examples/minimal-cuda-solver/CMakeLists.txt
index 3add4bb30ad..2d81e558eec 100644
--- a/examples/minimal-cuda-solver/CMakeLists.txt
+++ b/examples/minimal-cuda-solver/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(minimal-cuda-solver)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/mixed-multigrid-preconditioned-solver/CMakeLists.txt b/examples/mixed-multigrid-preconditioned-solver/CMakeLists.txt
index d710f10f146..a66a8410bfb 100644
--- a/examples/mixed-multigrid-preconditioned-solver/CMakeLists.txt
+++ b/examples/mixed-multigrid-preconditioned-solver/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(mixed-multigrid-preconditioned-solver)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/mixed-multigrid-solver/CMakeLists.txt b/examples/mixed-multigrid-solver/CMakeLists.txt
index 17ec2fa398e..af73c94c334 100644
--- a/examples/mixed-multigrid-solver/CMakeLists.txt
+++ b/examples/mixed-multigrid-solver/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(mixed-multigrid-solver)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/mixed-precision-ir/CMakeLists.txt b/examples/mixed-precision-ir/CMakeLists.txt
index 01094a5376b..156ede4fe13 100644
--- a/examples/mixed-precision-ir/CMakeLists.txt
+++ b/examples/mixed-precision-ir/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(mixed-precision-ir)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/mixed-spmv/CMakeLists.txt b/examples/mixed-spmv/CMakeLists.txt
index 0e4378ca82f..2e2ed9bb074 100644
--- a/examples/mixed-spmv/CMakeLists.txt
+++ b/examples/mixed-spmv/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(mixed-spmv)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/multigrid-preconditioned-solver-customized/CMakeLists.txt b/examples/multigrid-preconditioned-solver-customized/CMakeLists.txt
index 411b57b2c83..99ba03167f5 100644
--- a/examples/multigrid-preconditioned-solver-customized/CMakeLists.txt
+++ b/examples/multigrid-preconditioned-solver-customized/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(multigrid-preconditioned-solver-customized)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/multigrid-preconditioned-solver/CMakeLists.txt b/examples/multigrid-preconditioned-solver/CMakeLists.txt
index 90277398b85..75c56b80062 100644
--- a/examples/multigrid-preconditioned-solver/CMakeLists.txt
+++ b/examples/multigrid-preconditioned-solver/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(multigrid-preconditioned-solver)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/nine-pt-stencil-solver/CMakeLists.txt b/examples/nine-pt-stencil-solver/CMakeLists.txt
index 35610ba758a..511bb334d7c 100644
--- a/examples/nine-pt-stencil-solver/CMakeLists.txt
+++ b/examples/nine-pt-stencil-solver/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(nine-pt-stencil-solver)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/papi-logging/CMakeLists.txt b/examples/papi-logging/CMakeLists.txt
index 6927675e2ec..3695e12b814 100644
--- a/examples/papi-logging/CMakeLists.txt
+++ b/examples/papi-logging/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(papi-logging)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/par-ilu-convergence/CMakeLists.txt b/examples/par-ilu-convergence/CMakeLists.txt
index 23b7afd1e75..8679ccdf526 100644
--- a/examples/par-ilu-convergence/CMakeLists.txt
+++ b/examples/par-ilu-convergence/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(par-ilu-convergence)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/performance-debugging/CMakeLists.txt b/examples/performance-debugging/CMakeLists.txt
index 715cd99fe1b..7f6317a491f 100644
--- a/examples/performance-debugging/CMakeLists.txt
+++ b/examples/performance-debugging/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(performance-debugging)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/poisson-solver/CMakeLists.txt b/examples/poisson-solver/CMakeLists.txt
index bd5383876d5..83791b5cfda 100644
--- a/examples/poisson-solver/CMakeLists.txt
+++ b/examples/poisson-solver/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(poisson-solver)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/preconditioned-solver/CMakeLists.txt b/examples/preconditioned-solver/CMakeLists.txt
index a412885f219..b8d9bb8fc9f 100644
--- a/examples/preconditioned-solver/CMakeLists.txt
+++ b/examples/preconditioned-solver/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(preconditioned-solver)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/preconditioner-export/CMakeLists.txt b/examples/preconditioner-export/CMakeLists.txt
index 1cfd6d7ff84..83a20952d51 100644
--- a/examples/preconditioner-export/CMakeLists.txt
+++ b/examples/preconditioner-export/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(preconditioner-export)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/schroedinger-splitting/CMakeLists.txt b/examples/schroedinger-splitting/CMakeLists.txt
index 1e49a1f88b4..555fb59b554 100644
--- a/examples/schroedinger-splitting/CMakeLists.txt
+++ b/examples/schroedinger-splitting/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(schroedinger-splitting)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/simple-solver-logging/CMakeLists.txt b/examples/simple-solver-logging/CMakeLists.txt
index befead38e7d..2272413f52a 100644
--- a/examples/simple-solver-logging/CMakeLists.txt
+++ b/examples/simple-solver-logging/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(simple-solver-logging)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/simple-solver/CMakeLists.txt b/examples/simple-solver/CMakeLists.txt
index dd0faec5f53..d2a30ac084f 100644
--- a/examples/simple-solver/CMakeLists.txt
+++ b/examples/simple-solver/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(simple-solver)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/examples/three-pt-stencil-solver/CMakeLists.txt b/examples/three-pt-stencil-solver/CMakeLists.txt
index fc0691dd7c9..164c9e08302 100644
--- a/examples/three-pt-stencil-solver/CMakeLists.txt
+++ b/examples/three-pt-stencil-solver/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(three-pt-stencil-solver)
 
 # We only need to find Ginkgo if we build this example stand-alone
diff --git a/test/test_exportbuild/CMakeLists.txt b/test/test_exportbuild/CMakeLists.txt
index 52a8d3851cd..71633b91c35 100644
--- a/test/test_exportbuild/CMakeLists.txt
+++ b/test/test_exportbuild/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(GinkgoExportBuildTest LANGUAGES CXX)
 
 find_package(Ginkgo REQUIRED)
diff --git a/test/test_install/CMakeLists.txt b/test/test_install/CMakeLists.txt
index a36a936e867..070410ec48c 100644
--- a/test/test_install/CMakeLists.txt
+++ b/test/test_install/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 
 project(TestInstall LANGUAGES CXX)
 
diff --git a/test/test_pkgconfig/CMakeLists.txt b/test/test_pkgconfig/CMakeLists.txt
index 883ad134f05..e904f997f26 100644
--- a/test/test_pkgconfig/CMakeLists.txt
+++ b/test/test_pkgconfig/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9)
+cmake_minimum_required(VERSION 3.16)
 project(GinkgoExportBuildWithPkgConfigTest LANGUAGES CXX)
 
 find_package(PkgConfig REQUIRED)
diff --git a/test/test_subdir/CMakeLists.txt b/test/test_subdir/CMakeLists.txt
index 2017b69366f..dcf846f4adc 100644
--- a/test/test_subdir/CMakeLists.txt
+++ b/test/test_subdir/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.14)
+cmake_minimum_required(VERSION 3.16)
 project(GinkgoSubdirTest LANGUAGES CXX)
 file(CREATE_LINK "${CMAKE_CURRENT_SOURCE_DIR}/../.." "${CMAKE_CURRENT_BINARY_DIR}/ginkgo" SYMBOLIC)
 

From ed0a8b31eb6577fc2be8b4a70dc9b1ddf2f462eb Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Mon, 17 Jul 2023 11:00:50 +0200
Subject: [PATCH 154/583] find CUDAToolkit in installed ginkgo

---
 cmake/GinkgoConfig.cmake.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cmake/GinkgoConfig.cmake.in b/cmake/GinkgoConfig.cmake.in
index a1e209a0c79..1ba77bd9f19 100644
--- a/cmake/GinkgoConfig.cmake.in
+++ b/cmake/GinkgoConfig.cmake.in
@@ -176,6 +176,7 @@ endif()
 # For details, see https://gitlab.kitware.com/cmake/cmake/issues/18614
 if((NOT GINKGO_BUILD_SHARED_LIBS) AND GINKGO_BUILD_CUDA)
     enable_language(CUDA)
+    find_package(CUDAToolkit REQUIRED)
     find_package(NVTX REQUIRED)
 endif()
 

From 954f5340d971c791dd9a7c476a1919e19449bb74 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Mon, 17 Jul 2023 11:04:17 +0200
Subject: [PATCH 155/583] throw error on insufficient CMake version

---
 cuda/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt
index 7cf9053e2cf..4c09bf96645 100644
--- a/cuda/CMakeLists.txt
+++ b/cuda/CMakeLists.txt
@@ -1,3 +1,4 @@
+cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
 add_library(ginkgo_cuda $<TARGET_OBJECTS:ginkgo_cuda_device> "")
 include(${PROJECT_SOURCE_DIR}/cmake/template_instantiation.cmake)
 add_instantiation_files(. matrix/csr_kernels.instantiate.cu CSR_INSTANTIATE)

From 37d09da5c8dad11aceb98a6425845927ae7db8a7 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Mon, 17 Jul 2023 11:05:52 +0200
Subject: [PATCH 156/583] fix version requirement

---
 cuda/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt
index 4c09bf96645..358ce8092c0 100644
--- a/cuda/CMakeLists.txt
+++ b/cuda/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.17 FATAL_ERROR)
 add_library(ginkgo_cuda $<TARGET_OBJECTS:ginkgo_cuda_device> "")
 include(${PROJECT_SOURCE_DIR}/cmake/template_instantiation.cmake)
 add_instantiation_files(. matrix/csr_kernels.instantiate.cu CSR_INSTANTIATE)

From fec9b94f8180c542caa6b8a6e13171e2752537fb Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Tue, 18 Jul 2023 22:43:19 +0200
Subject: [PATCH 157/583] set CUDA host compiler before enabling language

Otherwise, the host compiler might not be used
---
 cmake/cuda.cmake | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake
index 9d0b435be9f..20e734f2d9f 100644
--- a/cmake/cuda.cmake
+++ b/cmake/cuda.cmake
@@ -1,3 +1,9 @@
+if (NOT CMAKE_CUDA_HOST_COMPILER AND NOT GINKGO_CUDA_DEFAULT_HOST_COMPILER)
+    set(CMAKE_CUDA_HOST_COMPILER "${CMAKE_CXX_COMPILER}" CACHE STRING "" FORCE)
+elseif(GINKGO_CUDA_DEFAULT_HOST_COMPILER)
+    unset(CMAKE_CUDA_HOST_COMPILER CACHE)
+endif()
+
 enable_language(CUDA)
 
 find_package(CUDAToolkit REQUIRED)
@@ -11,12 +17,6 @@ cas_variable_cuda_architectures(GINKGO_CUDA_ARCH_FLAGS
 
 find_package(NVTX REQUIRED)
 
-if (NOT CMAKE_CUDA_HOST_COMPILER AND NOT GINKGO_CUDA_DEFAULT_HOST_COMPILER)
-    set(CMAKE_CUDA_HOST_COMPILER "${CMAKE_CXX_COMPILER}" CACHE STRING "" FORCE)
-elseif(GINKGO_CUDA_DEFAULT_HOST_COMPILER)
-    unset(CMAKE_CUDA_HOST_COMPILER CACHE)
-endif()
-
 if(CMAKE_CUDA_HOST_COMPILER AND NOT CMAKE_CXX_COMPILER STREQUAL CMAKE_CUDA_HOST_COMPILER)
     message(WARNING "The CMake CXX compiler and CUDA host compiler do not match. "
         "If you encounter any build error, especially while linking, try to use "

From 25b24ca0dd1250dbe5c4e8cdc3efdcf862aa661f Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Mon, 7 Aug 2023 15:37:51 +0200
Subject: [PATCH 158/583] review updates

- Set host compiler via environment variable
- update CMake version requirement with CUDA to 3.18
- remove RPATH for CUDA

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 .gitlab/scripts.yml             | 11 +++++------
 CMakeLists.txt                  |  8 --------
 README.md                       |  3 ++-
 benchmark/CMakeLists.txt        |  2 --
 benchmark/utils/cuda_linops.cpp |  2 --
 cmake/GinkgoConfig.cmake.in     | 12 +-----------
 cmake/cuda.cmake                |  7 -------
 cmake/hip.cmake                 |  5 -----
 cuda/CMakeLists.txt             | 13 ++-----------
 cuda/get_info.cmake             |  2 +-
 cuda/test/solver/CMakeLists.txt |  4 ++--
 11 files changed, 13 insertions(+), 56 deletions(-)

diff --git a/.gitlab/scripts.yml b/.gitlab/scripts.yml
index 4f699cb53fc..becf0ed5b8d 100644
--- a/.gitlab/scripts.yml
+++ b/.gitlab/scripts.yml
@@ -22,8 +22,7 @@
   script:
     - mkdir -p ${CI_JOB_NAME} && cd ${CI_JOB_NAME}
     - if [ -n "${CUDA_ARCH}" ]; then
-      CUDA_ARCH_STR=-DGINKGO_CUDA_ARCHITECTURES=${CUDA_ARCH};
-      CUDA_HOST_STR=-DCMAKE_CUDA_HOST_COMPILER=$(which ${CXX_COMPILER});
+      export CUDA_ARCH_STR=-DGINKGO_CUDA_ARCHITECTURES=${CUDA_ARCH};
       fi
     - if [[ "${MPI_AS_ROOT}" == "ON" ]];then
       export OMPI_ALLOW_RUN_AS_ROOT=1;
@@ -32,12 +31,12 @@
     - if [[ "${BUILD_MPI}" == "ON" ]]; then
       MPI_STR=-DGINKGO_MPI_EXEC_SUFFIX=${MPI_SUFFIX};
       fi
+    - export CC=${C_COMPILER} CXX=${CXX_COMPILER} CUDAHOSTCXX=${CXX_COMPILER} CUDACXX=${CUDA_COMPILER}
     - cmake ${CI_PROJECT_DIR}${CI_PROJECT_DIR_SUFFIX}
         -GNinja
-        -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${CXX_COMPILER}
-        -DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} -DCMAKE_BUILD_TYPE=${BUILD_TYPE}
+        -DCMAKE_BUILD_TYPE=${BUILD_TYPE}
         -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" -DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS}
-        ${EXTRA_CMAKE_FLAGS} ${CUDA_ARCH_STR} ${CUDA_HOST_STR}
+        ${EXTRA_CMAKE_FLAGS} ${CUDA_ARCH_STR}
         -DGINKGO_COMPILER_FLAGS=${GKO_COMPILER_FLAGS}
         -DGINKGO_DEVEL_TOOLS=OFF -DGINKGO_BUILD_REFERENCE=${BUILD_REFERENCE}
         -DGINKGO_BUILD_OMP=${BUILD_OMP} -DGINKGO_BUILD_CUDA=${BUILD_CUDA}
@@ -65,7 +64,6 @@
     - mkdir -p ${CI_JOB_NAME} && cd ${CI_JOB_NAME}
     - if [ -n "${CUDA_ARCH}" ]; then
       CUDA_ARCH_STR=-DGINKGO_CUDA_ARCHITECTURES=${CUDA_ARCH};
-      CUDA_HOST_STR=-DCMAKE_CUDA_HOST_COMPILER=$(which ${CXX_COMPILER});
       fi
     - if [ -n "${SYCL_DEVICE_TYPE}" ]; then export SYCL_DEVICE_TYPE; fi
     - if [ -n "${SYCL_DEVICE_FILTER}" ]; then export SYCL_DEVICE_FILTER; fi
@@ -77,6 +75,7 @@
     - if [[ "${BUILD_MPI}" == "ON" ]]; then
       MPI_STR=-DGINKGO_MPI_EXEC_SUFFIX=${MPI_SUFFIX};
       fi
+    - export CC=${C_COMPILER} CXX=${CXX_COMPILER} CUDAHOSTCXX=${CXX_COMPILER} CUDACXX=${CUDA_COMPILER}
     - cmake ${CI_PROJECT_DIR}${CI_PROJECT_DIR_SUFFIX}
         -GNinja
         -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${CXX_COMPILER}
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 89c2b65d74b..195a6a1df69 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,12 +1,5 @@
 cmake_minimum_required(VERSION 3.16)
 
-# Let CAS handle the CUDA architecture flags (for now)
-# Windows still gives CMP0104 warning if putting it in cuda.
-if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
-    cmake_policy(SET CMP0104 OLD)
-endif()
-
-
 project(Ginkgo LANGUAGES C CXX VERSION 1.7.0 DESCRIPTION "A numerical linear algebra library targeting many-core architectures")
 set(Ginkgo_VERSION_TAG "master")
 set(PROJECT_VERSION_TAG ${Ginkgo_VERSION_TAG})
@@ -67,7 +60,6 @@ set(GINKGO_CUDA_COMPILER_FLAGS "" CACHE STRING
     "Set the required NVCC compiler flags, mainly used for warnings. Current default is an empty string")
 set(GINKGO_CUDA_ARCHITECTURES "Auto" CACHE STRING
     "A list of target NVIDIA GPU architectures. See README.md for more detail.")
-option(GINKGO_CUDA_DEFAULT_HOST_COMPILER "Tell Ginkgo to not automatically set the CUDA host compiler" OFF)
 # the details of fine/coarse grain memory and unsafe atomic are available https://docs.olcf.ornl.gov/systems/crusher_quick_start_guide.html#floating-point-fp-atomic-operations-and-coarse-fine-grained-memory-allocations
 option(GINKGO_HIP_AMD_UNSAFE_ATOMIC "Compiler uses unsafe floating point atomic (only for AMD GPU and ROCM >= 5). Default is ON because we use hipMalloc, which is always on coarse grain. Must turn off when allocating memory on fine grain" ON)
 set(GINKGO_HIP_COMPILER_FLAGS "" CACHE STRING
diff --git a/README.md b/README.md
index ba9082839bd..b3b7d8660b7 100644
--- a/README.md
+++ b/README.md
@@ -36,7 +36,7 @@ Prerequisites
 
 For Ginkgo core library:
 
-*   _cmake 3.13+_
+*   _cmake 3.16+_
 *   C++14 compliant compiler, one of:
     *   _gcc 5.5+_
     *   _clang 3.9+_
@@ -47,6 +47,7 @@ For Ginkgo core library:
 
 The Ginkgo CUDA module has the following __additional__ requirements:
 
+*   _cmake 3.18+_
 *   _CUDA 10.1+_ or _NVHPC Package 22.7+_
 *   Any host compiler restrictions your version of CUDA may impose also apply
     here. For the newest CUDA version, this information can be found in the
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
index f29620fac41..641c6f363ec 100644
--- a/benchmark/CMakeLists.txt
+++ b/benchmark/CMakeLists.txt
@@ -21,8 +21,6 @@ function(ginkgo_benchmark_cusparse_linops type def)
     # make the dependency public to catch issues
     target_compile_definitions(cusparse_linops_${type} PUBLIC ${def})
     target_link_libraries(cusparse_linops_${type} Ginkgo::ginkgo CUDA::cudart CUDA::cublas CUDA::cusparse)
-    target_include_directories(cusparse_linops_${type} SYSTEM PRIVATE ${CUDAToolkit_INCLUDE_DIRS})
-    target_compile_definitions(cusparse_linops_${type} PRIVATE ALLOWMP=1)
 endfunction()
 
 function(ginkgo_benchmark_hipsparse_linops type def)
diff --git a/benchmark/utils/cuda_linops.cpp b/benchmark/utils/cuda_linops.cpp
index dd1dda5c774..e2221614d9c 100644
--- a/benchmark/utils/cuda_linops.cpp
+++ b/benchmark/utils/cuda_linops.cpp
@@ -438,9 +438,7 @@ class CusparseCsrEx
           trans_(CUSPARSE_OPERATION_NON_TRANSPOSE),
           buffer_(exec)
     {
-#ifdef ALLOWMP
         algmode_ = CUSPARSE_ALG_MERGE_PATH;
-#endif  // ALLOWMP
     }
 
 private:
diff --git a/cmake/GinkgoConfig.cmake.in b/cmake/GinkgoConfig.cmake.in
index 1ba77bd9f19..093690e16f8 100644
--- a/cmake/GinkgoConfig.cmake.in
+++ b/cmake/GinkgoConfig.cmake.in
@@ -62,7 +62,7 @@ set(GINKGO_IWYU_PATH @GINKGO_IWYU_PATH@)
 set(GINKGO_JACOBI_FULL_OPTIMIZATIONS @GINKGO_JACOBI_FULL_OPTIMIZATIONS@)
 
 set(GINKGO_CUDA_ARCHITECTURES "@GINKGO_CUDA_ARCHITECTURES@")
-set(GINKGO_CUDA_DEFAULT_HOST_COMPILER @GINKGO_CUDA_DEFAULT_HOST_COMPILER@)
+set(GINKGO_CUDA_HOST_COMPILER "@CMAKE_CUDA_HOST_COMPILER@")
 set(GINKGO_CUDA_ARCH_FLAGS "@GINKGO_CUDA_ARCH_FLAGS@")
 
 set(GINKGO_HIP_COMPILER_FLAGS "@GINKGO_HIP_COMPILER_FLAGS@")
@@ -144,16 +144,6 @@ set(VTune_PATH "@VTune_PATH@")
 # NOTE: we do not export benchmarks, examples, tests or devel tools
 #     so `third_party` libraries are currently unneeded.
 
-# propagate CUDA_HOST_COMPILER if needed
-if (GINKGO_BUILD_CUDA OR (GINKGO_BUILD_HIP
-    AND GINKGO_HIP_PLATFORM MATCHES "${GINKGO_HIP_PLATFORM_NVIDIA_REGEX}"))
-    if (GINKGO_CUDA_HOST_COMPILER AND NOT CMAKE_CUDA_HOST_COMPILER
-        AND EXISTS "${GINKGO_CUDA_HOST_COMPILER}")
-        message(STATUS "Ginkgo: Setting CUDA host compiler to ${GINKGO_CUDA_HOST_COMPILER}")
-        set(CMAKE_CUDA_HOST_COMPILER "${GINKGO_CUDA_HOST_COMPILER}" CACHE STRING "" FORCE)
-    endif()
-endif()
-
 if(GINKGO_HAVE_PAPI_SDE)
     find_package(PAPI REQUIRED OPTIONAL_COMPONENTS sde)
 endif()
diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake
index 20e734f2d9f..378003df50d 100644
--- a/cmake/cuda.cmake
+++ b/cmake/cuda.cmake
@@ -1,9 +1,3 @@
-if (NOT CMAKE_CUDA_HOST_COMPILER AND NOT GINKGO_CUDA_DEFAULT_HOST_COMPILER)
-    set(CMAKE_CUDA_HOST_COMPILER "${CMAKE_CXX_COMPILER}" CACHE STRING "" FORCE)
-elseif(GINKGO_CUDA_DEFAULT_HOST_COMPILER)
-    unset(CMAKE_CUDA_HOST_COMPILER CACHE)
-endif()
-
 enable_language(CUDA)
 
 find_package(CUDAToolkit REQUIRED)
@@ -24,4 +18,3 @@ if(CMAKE_CUDA_HOST_COMPILER AND NOT CMAKE_CXX_COMPILER STREQUAL CMAKE_CUDA_HOST_
         "The CXX compiler is ${CMAKE_CXX_COMPILER} with version ${CMAKE_CXX_COMPILER_VERSION}.\n"
         "The CUDA host compiler is ${CMAKE_CUDA_HOST_COMPILER}.")
 endif()
-
diff --git a/cmake/hip.cmake b/cmake/hip.cmake
index bb141450b25..e1897b42c9c 100644
--- a/cmake/hip.cmake
+++ b/cmake/hip.cmake
@@ -180,11 +180,6 @@ endif()
 
 set(GINKGO_HIP_NVCC_ARCH "")
 if (GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_NVIDIA_REGEX}")
-    if (NOT CMAKE_CUDA_HOST_COMPILER AND NOT GINKGO_CUDA_DEFAULT_HOST_COMPILER)
-        set(CMAKE_CUDA_HOST_COMPILER "${CMAKE_CXX_COMPILER}" CACHE STRING "" FORCE)
-    elseif(GINKGO_CUDA_DEFAULT_HOST_COMPILER)
-        unset(CMAKE_CUDA_HOST_COMPILER CACHE)
-    endif()
     if (CMAKE_CUDA_HOST_COMPILER)
         list(APPEND GINKGO_HIP_NVCC_ADDITIONAL_FLAGS "-ccbin=${CMAKE_CUDA_HOST_COMPILER}")
     endif()
diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt
index 358ce8092c0..764f47afb83 100644
--- a/cuda/CMakeLists.txt
+++ b/cuda/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.17 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
 add_library(ginkgo_cuda $<TARGET_OBJECTS:ginkgo_cuda_device> "")
 include(${PROJECT_SOURCE_DIR}/cmake/template_instantiation.cmake)
 add_instantiation_files(. matrix/csr_kernels.instantiate.cu CSR_INSTANTIATE)
@@ -115,7 +115,6 @@ target_compile_options(ginkgo_cuda PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:${GINKGO_C
 target_compile_options(ginkgo_cuda PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${GINKGO_COMPILER_FLAGS}>)
 ginkgo_compile_features(ginkgo_cuda)
 target_compile_definitions(ginkgo_cuda PRIVATE GKO_COMPILING_CUDA)
-target_include_directories(ginkgo_cuda SYSTEM PRIVATE ${CUDAToolkit_INCLUDE_DIRS})
 
 # include path for generated headers like jacobi_common.hpp
 target_include_directories(ginkgo_cuda
@@ -123,17 +122,9 @@ target_include_directories(ginkgo_cuda
 target_link_libraries(ginkgo_cuda PRIVATE CUDA::cudart CUDA::cublas CUDA::cusparse CUDA::curand CUDA::cufft nvtx::nvtx)
 # NVTX3 is header-only and requires dlopen/dlclose in static builds
 target_link_libraries(ginkgo_cuda PUBLIC ginkgo_device ${CMAKE_DL_LIBS})
-target_compile_options(ginkgo_cuda
-        PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${GINKGO_CUDA_ARCH_FLAGS}>")
-# we handle CUDA architecture flags for now, disable CMake handling
-if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
-    set_target_properties(ginkgo_cuda PROPERTIES CUDA_ARCHITECTURES OFF)
-endif()
-list(GET CUDA_RUNTIME_LIBS 0 CUDA_FIRST_LIB)
-get_filename_component(GKO_CUDA_LIBDIR "${CUDA_FIRST_LIB}" DIRECTORY)
 
 ginkgo_default_includes(ginkgo_cuda)
-ginkgo_install_library(ginkgo_cuda "${CUDAToolkit_LIBRARY_DIR}")
+ginkgo_install_library(ginkgo_cuda)
 
 if (GINKGO_CHECK_CIRCULAR_DEPS)
     ginkgo_check_headers(ginkgo_cuda GKO_COMPILING_CUDA)
diff --git a/cuda/get_info.cmake b/cuda/get_info.cmake
index 7955c3f636b..6d9b6c1f4d0 100644
--- a/cuda/get_info.cmake
+++ b/cuda/get_info.cmake
@@ -1,11 +1,11 @@
 ginkgo_print_module_header(${detailed_log} "CUDA")
 ginkgo_print_variable(${detailed_log} "GINKGO_CUDA_ARCHITECTURES")
 ginkgo_print_variable(${detailed_log} "GINKGO_CUDA_COMPILER_FLAGS")
-ginkgo_print_variable(${detailed_log} "GINKGO_CUDA_DEFAULT_HOST_COMPILER")
 ginkgo_print_variable(${detailed_log} "GINKGO_CUDA_ARCH_FLAGS")
 ginkgo_print_module_footer(${detailed_log} "CUDA variables:")
 ginkgo_print_variable(${detailed_log} "CMAKE_CUDA_COMPILER")
 ginkgo_print_variable(${detailed_log} "CMAKE_CUDA_COMPILER_VERSION")
 ginkgo_print_flags(${detailed_log} "CMAKE_CUDA_FLAGS")
 ginkgo_print_variable(${detailed_log} "CMAKE_CUDA_HOST_COMPILER")
+ginkgo_print_variable(${detailed_log} "CUDAToolkit_LIBRARY_DIR")
 ginkgo_print_module_footer(${detailed_log} "")
diff --git a/cuda/test/solver/CMakeLists.txt b/cuda/test/solver/CMakeLists.txt
index 65187e68e1b..f8cd67c1241 100644
--- a/cuda/test/solver/CMakeLists.txt
+++ b/cuda/test/solver/CMakeLists.txt
@@ -1,2 +1,2 @@
-ginkgo_create_test(lower_trs_kernels ADDITIONAL_INCLUDES ${CUDAToolkit_INCLUDE_DIRS})
-ginkgo_create_test(upper_trs_kernels ADDITIONAL_INCLUDES ${CUDAToolkit_INCLUDE_DIRS})
+ginkgo_create_cuda_test(lower_trs_kernels)
+ginkgo_create_cuda_test(upper_trs_kernels)

From 6a12cac0169709a45b347411303b80e8caba79fa Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Wed, 9 Aug 2023 20:51:04 +0200
Subject: [PATCH 159/583] fix test names

---
 cuda/test/solver/{lower_trs_kernels.cpp => lower_trs_kernels.cu} | 0
 cuda/test/solver/{upper_trs_kernels.cpp => upper_trs_kernels.cu} | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename cuda/test/solver/{lower_trs_kernels.cpp => lower_trs_kernels.cu} (100%)
 rename cuda/test/solver/{upper_trs_kernels.cpp => upper_trs_kernels.cu} (100%)

diff --git a/cuda/test/solver/lower_trs_kernels.cpp b/cuda/test/solver/lower_trs_kernels.cu
similarity index 100%
rename from cuda/test/solver/lower_trs_kernels.cpp
rename to cuda/test/solver/lower_trs_kernels.cu
diff --git a/cuda/test/solver/upper_trs_kernels.cpp b/cuda/test/solver/upper_trs_kernels.cu
similarity index 100%
rename from cuda/test/solver/upper_trs_kernels.cpp
rename to cuda/test/solver/upper_trs_kernels.cu

From 9245700aecfbb220584d2593c374c59ba76774a6 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Wed, 9 Aug 2023 20:52:25 +0200
Subject: [PATCH 160/583] set CMAKE_CUDA_ARCHITECTURES from CAS

---
 cmake/GinkgoConfig.cmake.in                  |  3 +-
 cmake/Modules/CudaArchitectureSelector.cmake | 36 ++++++++++++++++++++
 cmake/create_test.cmake                      |  1 -
 cmake/cuda.cmake                             |  9 ++---
 cuda/get_info.cmake                          |  3 +-
 examples/custom-matrix-format/CMakeLists.txt |  8 -----
 test/test_install/CMakeLists.txt             | 11 +-----
 7 files changed, 44 insertions(+), 27 deletions(-)

diff --git a/cmake/GinkgoConfig.cmake.in b/cmake/GinkgoConfig.cmake.in
index 093690e16f8..13888ae0b10 100644
--- a/cmake/GinkgoConfig.cmake.in
+++ b/cmake/GinkgoConfig.cmake.in
@@ -61,9 +61,8 @@ set(GINKGO_IWYU_PATH @GINKGO_IWYU_PATH@)
 
 set(GINKGO_JACOBI_FULL_OPTIMIZATIONS @GINKGO_JACOBI_FULL_OPTIMIZATIONS@)
 
-set(GINKGO_CUDA_ARCHITECTURES "@GINKGO_CUDA_ARCHITECTURES@")
+set(GINKGO_CUDA_ARCHITECTURES "@CMAKE_CUDA_ARCHITECTURES@")
 set(GINKGO_CUDA_HOST_COMPILER "@CMAKE_CUDA_HOST_COMPILER@")
-set(GINKGO_CUDA_ARCH_FLAGS "@GINKGO_CUDA_ARCH_FLAGS@")
 
 set(GINKGO_HIP_COMPILER_FLAGS "@GINKGO_HIP_COMPILER_FLAGS@")
 set(GINKGO_HIP_HCC_COMPILER_FLAGS "@GINKGO_HIP_HCC_COMPILER_FLAGS@")
diff --git a/cmake/Modules/CudaArchitectureSelector.cmake b/cmake/Modules/CudaArchitectureSelector.cmake
index 1838ed4b932..017fd2f0f1d 100644
--- a/cmake/Modules/CudaArchitectureSelector.cmake
+++ b/cmake/Modules/CudaArchitectureSelector.cmake
@@ -65,6 +65,15 @@
 # The command has the same result as ``cas_target_cuda_architectures``. It does 
 # not add the compiler flags to the target, but stores the compiler flags in 
 # the variable (string).
+# 
+#   cas_variable_cmake_cuda_architectures(
+#    [<variable>]               # variable for storing architecture list
+#    [<spec>]                   # list of architecture specifications
+#   )
+#
+# The command prepares an architecture list supported by the CMake
+# ``CUDA_ARCHITECTURES`` target property and ``CMAKE_CUDA_ARCHITECTURES``
+# variable. The architecture specification 
 #
 # 
 # ``ARCHITECTURES`` specification list
@@ -404,3 +413,30 @@ function(cas_variable_cuda_architectures variable)
     cas_get_compiler_flags(flags ${ARGN})
     set(${variable} "${flags}" PARENT_SCOPE)
 endfunction()
+
+
+function(cas_variable_cmake_cuda_architectures variable)
+    cas_get_onboard_architectures(onboard_archs)
+    cas_get_supported_architectures(supported_archs)
+    if(("${ARGN}" STREQUAL "All") OR ("${ARGN}" STREQUAL "Auto" AND (NOT onboard_archs)))
+        set(archs "${supported_archs}")
+    elseif("${ARGN}" STREQUAL "Auto")
+        set(archs "${onboard_archs}")
+    else()
+        set(archs)
+        foreach(arch IN LISTS ARGN)
+            if(arch MATCHES "${cas_spec_regex}")
+                if(CMAKE_MATCH_1)
+                    list(APPEND archs ${CMAKE_MATCH_1}-real)
+                endif()
+                if(CMAKE_MATCH_3)
+                    list(APPEND archs ${CMAKE_MATCH_3}-virtual)
+                endif()
+            else()
+                cas_get_architectures_by_name("${arch}" arch)
+                list(APPEND archs ${arch})
+            endif()
+        endforeach()
+    endif()
+    set("${variable}" "${archs}" PARENT_SCOPE)
+endfunction()
diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
index 3fbafe35858..58a49ca066c 100644
--- a/cmake/create_test.cmake
+++ b/cmake/create_test.cmake
@@ -119,7 +119,6 @@ function(ginkgo_create_cuda_test_internal test_name filename test_target_name)
     target_compile_definitions(${test_target_name} PRIVATE GKO_COMPILING_CUDA)
     target_compile_options(${test_target_name}
         PRIVATE
-            $<$<COMPILE_LANGUAGE:CUDA>:${GINKGO_CUDA_ARCH_FLAGS}>
             $<$<COMPILE_LANGUAGE:CUDA>:${GINKGO_CUDA_COMPILER_FLAGS}>)
     if(MSVC)
         target_compile_options(${test_target_name}
diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake
index 378003df50d..33d785b8c52 100644
--- a/cmake/cuda.cmake
+++ b/cmake/cuda.cmake
@@ -4,10 +4,11 @@ find_package(CUDAToolkit REQUIRED)
 
 include(cmake/Modules/CudaArchitectureSelector.cmake)
 
-# Detect the CUDA architecture flags and propagate to all the project
-cas_variable_cuda_architectures(GINKGO_CUDA_ARCH_FLAGS
-    ARCHITECTURES ${GINKGO_CUDA_ARCHITECTURES}
-    UNSUPPORTED "20" "21")
+# Detect the CUDA architecture and propagate to all the project
+cas_variable_cmake_cuda_architectures(cuda_detected_archs ${GINKGO_CUDA_ARCHITECTURES})
+if(NOT CMAKE_CUDA_ARCHITECTURES)
+    set(CMAKE_CUDA_ARCHITECTURES "${cuda_detected_archs}")
+endif()
 
 find_package(NVTX REQUIRED)
 
diff --git a/cuda/get_info.cmake b/cuda/get_info.cmake
index 6d9b6c1f4d0..eeadaf9725c 100644
--- a/cuda/get_info.cmake
+++ b/cuda/get_info.cmake
@@ -1,7 +1,6 @@
 ginkgo_print_module_header(${detailed_log} "CUDA")
-ginkgo_print_variable(${detailed_log} "GINKGO_CUDA_ARCHITECTURES")
+ginkgo_print_variable(${detailed_log} "CMAKE_CUDA_ARCHITECTURES")
 ginkgo_print_variable(${detailed_log} "GINKGO_CUDA_COMPILER_FLAGS")
-ginkgo_print_variable(${detailed_log} "GINKGO_CUDA_ARCH_FLAGS")
 ginkgo_print_module_footer(${detailed_log} "CUDA variables:")
 ginkgo_print_variable(${detailed_log} "CMAKE_CUDA_COMPILER")
 ginkgo_print_variable(${detailed_log} "CMAKE_CUDA_COMPILER_VERSION")
diff --git a/examples/custom-matrix-format/CMakeLists.txt b/examples/custom-matrix-format/CMakeLists.txt
index 26034b7dce2..0f7c5f2f32c 100644
--- a/examples/custom-matrix-format/CMakeLists.txt
+++ b/examples/custom-matrix-format/CMakeLists.txt
@@ -18,13 +18,5 @@ set(CMAKE_CUDA_STANDARD_REQUIRED ON)
 add_executable(custom-matrix-format custom-matrix-format.cpp stencil_kernel.cu)
 target_link_libraries(custom-matrix-format Ginkgo::ginkgo OpenMP::OpenMP_CXX)
 
-# inherit CUDA architecture flags from Ginkgo
-target_compile_options(custom-matrix-format
-    PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${GINKGO_CUDA_ARCH_FLAGS}>")
-# we handle CUDA architecture flags for now, disable CMake handling
-if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
-    set_target_properties(custom-matrix-format PROPERTIES CUDA_ARCHITECTURES OFF)
-endif()
-
 # workaround for clang-cuda/g++ interaction
 set_target_properties(custom-matrix-format PROPERTIES POSITION_INDEPENDENT_CODE ON)
diff --git a/test/test_install/CMakeLists.txt b/test/test_install/CMakeLists.txt
index 070410ec48c..513af67e923 100644
--- a/test/test_install/CMakeLists.txt
+++ b/test/test_install/CMakeLists.txt
@@ -38,12 +38,6 @@ if(GINKGO_BUILD_CUDA)
     enable_language(CUDA)
     configure_file(test_install.cpp test_install.cu COPYONLY)
     add_executable(test_install_cuda ${CMAKE_CURRENT_BINARY_DIR}/test_install.cu)
-    target_compile_options(test_install_cuda
-        PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${GINKGO_CUDA_ARCH_FLAGS}>")
-    # we handle CUDA architecture flags for now, disable CMake handling
-    if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
-        set_target_properties(test_install_cuda PROPERTIES CUDA_ARCHITECTURES OFF)
-    endif()
     target_compile_definitions(test_install_cuda PRIVATE HAS_CUDA=1)
     target_compile_definitions(test_install_cuda PRIVATE HAS_REFERENCE=${HAS_REFERENCE})
     target_link_libraries(test_install_cuda PRIVATE Ginkgo::ginkgo)
@@ -60,16 +54,13 @@ if(GINKGO_BUILD_HIP)
     else()
         set (GINKGO_PIC_OPTION "$<$<CONFIG:Debug>:-fPIC>")
     endif()
-    if (CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
-        set(TESTINSTALL_CUDA_ARCH_FLAGS "${GINKGO_CUDA_ARCH_FLAGS}")
-    endif()
     if (CMAKE_CUDA_HOST_COMPILER)
         set(TESTINSTALL_CUDA_HOST_COMPILER "-ccbin=${CMAKE_CUDA_HOST_COMPILER}")
     endif()
     hip_add_executable(test_install_hip test_install.cpp
         HIPCC_OPTIONS "-std=c++14"
         CLANG_OPTIONS "${GINKGO_PIC_OPTION}"
-        NVCC_OPTIONS "${GINKGO_CUDA_PIC_OPTION}" "${TESTINSTALL_CUDA_ARCH_FLAGS}" "${TESTINSTALL_CUDA_HOST_COMPILER}")
+        NVCC_OPTIONS "${GINKGO_CUDA_PIC_OPTION}" "${TESTINSTALL_CUDA_HOST_COMPILER}")
 
     target_link_libraries(test_install_hip PRIVATE Ginkgo::ginkgo)
     target_compile_definitions(test_install_hip PRIVATE HAS_HIP=1)

From 00e680d26bea29c79ec4a63c7a1ecc3a88be3291 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 10 Aug 2023 10:06:48 +0200
Subject: [PATCH 161/583] update container names

---
 .gitlab-ci.yml    | 6 +++---
 .gitlab/image.yml | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 709f2b4f53a..c976e1b15da 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -339,7 +339,7 @@ build/cuda114/nompi/gcc/cuda/debug/shared:
     - .build_and_test_template
     - .default_variables
     - .quick_test_condition
-    - .use_gko_cuda114-openmpi-gnu11-llvm12
+    - .use_gko_cuda114-openmpi-gnu10-llvm12
   variables:
     BUILD_OMP: "ON"
     BUILD_CUDA: "ON"
@@ -543,7 +543,7 @@ build/nocuda/nompi/gcc/omp/release/static:
     - .build_and_test_template
     - .default_variables
     - .quick_test_condition
-    - .use_gko-nocuda-mvapich2-gnu5-llvm39-intel2018
+    - .use_gko-nocuda-mvapich2-gnu5-llvm39-intel2019
   variables:
     BUILD_OMP: "ON"
     BUILD_TYPE: "Release"
@@ -554,7 +554,7 @@ build/nocuda-nomixed/nompi/clang/omp/release/static:
     - .build_and_test_template
     - .default_variables
     - .full_test_condition
-    - .use_gko-nocuda-mvapich2-gnu5-llvm39-intel2018
+    - .use_gko-nocuda-mvapich2-gnu5-llvm39-intel2019
   variables:
     C_COMPILER: "clang"
     CXX_COMPILER: "clang++"
diff --git a/.gitlab/image.yml b/.gitlab/image.yml
index cad06674aee..72fb51ad372 100644
--- a/.gitlab/image.yml
+++ b/.gitlab/image.yml
@@ -17,8 +17,8 @@
     - cpu
     - amdci
 
-.use_gko-nocuda-mvapich2-gnu5-llvm39-intel2018:
-  image: ginkgohub/cpu:mvapich2-gnu5-llvm39-intel2018
+.use_gko-nocuda-mvapich2-gnu5-llvm39-intel2019:
+  image: ginkgohub/cpu:mvapich2-gnu5-llvm39-intel2019
   tags:
     - private_ci
     - cpu
@@ -50,8 +50,8 @@
     - private_ci
     - horeka
 
-.use_gko_cuda114-openmpi-gnu11-llvm12:
-  image: ginkgohub/cuda:114-openmpi-gnu11-llvm12
+.use_gko_cuda114-openmpi-gnu10-llvm12:
+  image: ginkgohub/cuda:114-openmpi-gnu10-llvm12
   tags:
     - private_ci
     - nvidia-gpu

From a8985cc0fa9739c58311861c4f48f3bce87f6a4e Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Thu, 10 Aug 2023 13:48:02 +0200
Subject: [PATCH 162/583] remove duplicate pipeline

---
 .gitlab-ci.yml | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index c976e1b15da..ae7fa86fd38 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -526,17 +526,6 @@ build/nocuda/openmpi/clang/omp/debug/static:
     FAST_TESTS: "ON"
     BUILD_SHARED_LIBS: "OFF"
 
-test/nocuda/openmpi/clang/omp/debug/static:
-  extends:
-    - .build_and_test_template
-    - .default_variables
-    - .full_test_condition
-    - .use_gko-nocuda-openmpi-gnu9-llvm8
-  variables:
-    USE_NAME: "nocuda-openmpi-clang-${CI_PIPELINE_ID}"
-  dependencies: null
-  needs: [ "build/nocuda/openmpi/clang/omp/debug/static" ]
-
 # nocuda with the oldest supported compiler
 build/nocuda/nompi/gcc/omp/release/static:
   extends:

From 0b7b439e692e7539d50b68e078623abbe8f2c7ae Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Thu, 10 Aug 2023 13:48:35 +0200
Subject: [PATCH 163/583] adapt remaining CMake flags

---
 .gitlab/scripts.yml | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/.gitlab/scripts.yml b/.gitlab/scripts.yml
index becf0ed5b8d..5cd36de0b9f 100644
--- a/.gitlab/scripts.yml
+++ b/.gitlab/scripts.yml
@@ -63,7 +63,7 @@
   script:
     - mkdir -p ${CI_JOB_NAME} && cd ${CI_JOB_NAME}
     - if [ -n "${CUDA_ARCH}" ]; then
-      CUDA_ARCH_STR=-DGINKGO_CUDA_ARCHITECTURES=${CUDA_ARCH};
+      export CUDA_ARCH_STR=-DGINKGO_CUDA_ARCHITECTURES=${CUDA_ARCH};
       fi
     - if [ -n "${SYCL_DEVICE_TYPE}" ]; then export SYCL_DEVICE_TYPE; fi
     - if [ -n "${SYCL_DEVICE_FILTER}" ]; then export SYCL_DEVICE_FILTER; fi
@@ -77,11 +77,9 @@
       fi
     - export CC=${C_COMPILER} CXX=${CXX_COMPILER} CUDAHOSTCXX=${CXX_COMPILER} CUDACXX=${CUDA_COMPILER}
     - cmake ${CI_PROJECT_DIR}${CI_PROJECT_DIR_SUFFIX}
-        -GNinja
-        -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${CXX_COMPILER}
-        -DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} -DCMAKE_BUILD_TYPE=${BUILD_TYPE}
+        -GNinja -DCMAKE_BUILD_TYPE=${BUILD_TYPE}
         -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" -DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS}
-        ${EXTRA_CMAKE_FLAGS} ${CUDA_ARCH_STR} ${CUDA_HOST_STR}
+        ${EXTRA_CMAKE_FLAGS} ${CUDA_ARCH_STR}
         -DGINKGO_COMPILER_FLAGS=${GKO_COMPILER_FLAGS}
         -DGINKGO_DEVEL_TOOLS=OFF -DGINKGO_BUILD_REFERENCE=${BUILD_REFERENCE}
         -DGINKGO_BUILD_OMP=${BUILD_OMP} -DGINKGO_BUILD_CUDA=${BUILD_CUDA}

From e2009071896673bafd59c3c73913639fc4a79b9d Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Thu, 10 Aug 2023 13:57:44 +0200
Subject: [PATCH 164/583] review updates

- Only detect available GPUs if requested
- Remove unnecessary include paths
- Remove unnecessary config variables

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
Co-authored-by: Terry Cojean <terry.cojean@kit.edu>
---
 benchmark/CMakeLists.txt                     |  1 -
 cmake/GinkgoConfig.cmake.in                  |  1 -
 cmake/Modules/CudaArchitectureSelector.cmake | 10 +++++++---
 cmake/cuda.cmake                             |  5 ++---
 4 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
index 641c6f363ec..44a0a3d1d9e 100644
--- a/benchmark/CMakeLists.txt
+++ b/benchmark/CMakeLists.txt
@@ -121,7 +121,6 @@ if (GINKGO_BUILD_CUDA)
     ginkgo_benchmark_cusparse_linops(c GKO_BENCHMARK_USE_SINGLE_COMPLEX_PRECISION)
     add_library(cuda_timer utils/cuda_timer.cpp)
     target_link_libraries(cuda_timer ginkgo CUDA::cudart)
-    target_include_directories(cuda_timer SYSTEM PRIVATE ${CUDAToolkit_INCLUDE_DIRS})
 endif()
 if (GINKGO_BUILD_HIP)
     ginkgo_benchmark_hipsparse_linops(d GKO_BENCHMARK_USE_DOUBLE_PRECISION)
diff --git a/cmake/GinkgoConfig.cmake.in b/cmake/GinkgoConfig.cmake.in
index 13888ae0b10..f4eace2fdbc 100644
--- a/cmake/GinkgoConfig.cmake.in
+++ b/cmake/GinkgoConfig.cmake.in
@@ -121,7 +121,6 @@ set(GINKGO_INTERFACE_CXX_FLAGS "@GINKGO_INTERFACE_CXX_FLAGS@")
 set(GINKGO_CUDA_COMPILER "@CMAKE_CUDA_COMPILER@")
 set(GINKGO_CUDA_COMPILER_VERSION @CMAKE_CUDA_COMPILER_VERSION@)
 set(GINKGO_CUDA_HOST_LINK_LAUNCHER "@CMAKE_CUDA_HOST_LINK_LAUNCHER@")
-set(GINKGO_CUDA_TOOLKIT_INCLUDE_DIRECTORIES "@CUDAToolkit_INCLUDE_DIRS@")
 
 set(GINKGO_CUDA_FLAGS "@CMAKE_CUDA_FLAGS_MODIFY@")
 set(GINKGO_CUDA_FLAGS_DEBUG "@CMAKE_CUDA_FLAGS_DEBUG_MODIFY@")
diff --git a/cmake/Modules/CudaArchitectureSelector.cmake b/cmake/Modules/CudaArchitectureSelector.cmake
index 017fd2f0f1d..f863b144ab7 100644
--- a/cmake/Modules/CudaArchitectureSelector.cmake
+++ b/cmake/Modules/CudaArchitectureSelector.cmake
@@ -416,12 +416,16 @@ endfunction()
 
 
 function(cas_variable_cmake_cuda_architectures variable)
-    cas_get_onboard_architectures(onboard_archs)
     cas_get_supported_architectures(supported_archs)
-    if(("${ARGN}" STREQUAL "All") OR ("${ARGN}" STREQUAL "Auto" AND (NOT onboard_archs)))
+    if("${ARGN}" STREQUAL "All")
         set(archs "${supported_archs}")
     elseif("${ARGN}" STREQUAL "Auto")
-        set(archs "${onboard_archs}")
+        cas_get_onboard_architectures(onboard_archs)
+        if (onboard_archs)
+            set(archs "${onboard_archs}")
+        else()
+            set(archs "${supported_archs}")
+        endif()
     else()
         set(archs)
         foreach(arch IN LISTS ARGN)
diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake
index 33d785b8c52..2e1c82db6b0 100644
--- a/cmake/cuda.cmake
+++ b/cmake/cuda.cmake
@@ -4,10 +4,9 @@ find_package(CUDAToolkit REQUIRED)
 
 include(cmake/Modules/CudaArchitectureSelector.cmake)
 
-# Detect the CUDA architecture and propagate to all the project
-cas_variable_cmake_cuda_architectures(cuda_detected_archs ${GINKGO_CUDA_ARCHITECTURES})
 if(NOT CMAKE_CUDA_ARCHITECTURES)
-    set(CMAKE_CUDA_ARCHITECTURES "${cuda_detected_archs}")
+    # Detect the CUDA architecture and propagate it to the entire project
+    cas_variable_cmake_cuda_architectures(CMAKE_CUDA_ARCHITECTURES ${GINKGO_CUDA_ARCHITECTURES})
 endif()
 
 find_package(NVTX REQUIRED)

From 5c6ac16365063c0c619be4d5f42a15a859575031 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 10 Aug 2023 14:41:54 +0200
Subject: [PATCH 165/583] temporarily disable PAPI

---
 CMakeLists.txt | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 195a6a1df69..3886efb7c14 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -191,10 +191,11 @@ check_include_file_cxx(cxxabi.h GKO_HAVE_CXXABI_H)
 
 # Automatically find PAPI and search for the required 'sde' component
 set(GINKGO_HAVE_PAPI_SDE 0)
-find_package(PAPI OPTIONAL_COMPONENTS sde)
-if(PAPI_sde_FOUND)
-    set(GINKGO_HAVE_PAPI_SDE 1)
-endif()
+# PAPI is temporarily disabled
+#find_package(PAPI OPTIONAL_COMPONENTS sde)
+#if(PAPI_sde_FOUND)
+#    set(GINKGO_HAVE_PAPI_SDE 1)
+#endif()
 
 # Automatically find TAU
 set(GINKGO_HAVE_TAU 0)

From f545ace68fb279f78956ccdeca34054af7c21c59 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 10 Aug 2023 15:31:20 +0200
Subject: [PATCH 166/583] review updates

- Bump CMake requirement in example
- Add NVHPC CMake requirements to README

Co-authored-by: Yuhsiang M. Tsai <yhmtsai@gmail.com>
---
 README.md                                    | 2 +-
 examples/custom-matrix-format/CMakeLists.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index b3b7d8660b7..44428386b83 100644
--- a/README.md
+++ b/README.md
@@ -47,7 +47,7 @@ For Ginkgo core library:
 
 The Ginkgo CUDA module has the following __additional__ requirements:
 
-*   _cmake 3.18+_
+*   _cmake 3.18+_ (If CUDA was installed through the NVIDIA HPC Toolkit, we require _cmake 3.22+_)
 *   _CUDA 10.1+_ or _NVHPC Package 22.7+_
 *   Any host compiler restrictions your version of CUDA may impose also apply
     here. For the newest CUDA version, this information can be found in the
diff --git a/examples/custom-matrix-format/CMakeLists.txt b/examples/custom-matrix-format/CMakeLists.txt
index 0f7c5f2f32c..b5182fb6bbc 100644
--- a/examples/custom-matrix-format/CMakeLists.txt
+++ b/examples/custom-matrix-format/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.16)
+cmake_minimum_required(VERSION 3.18)
 project(custom-matrix-format CXX CUDA)
 
 # We only need to find Ginkgo if we build this example stand-alone

From 64378eadbec004680e0663280c36b0d5f5802e4c Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 10 Aug 2023 18:32:36 +0200
Subject: [PATCH 167/583] collect build time statistics

---
 .gitlab/scripts.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitlab/scripts.yml b/.gitlab/scripts.yml
index 5cd36de0b9f..cf6baad6fab 100644
--- a/.gitlab/scripts.yml
+++ b/.gitlab/scripts.yml
@@ -52,6 +52,7 @@
         -DGINKGO_DPCPP_SINGLE_MODE=${DPCPP_SINGLE_MODE}
         -DGINKGO_EXPORT_BUILD_DIR=${EXPORT_BUILD_DIR}
     - ninja -j${NUM_CORES} -l${CI_LOAD_LIMIT} install
+    - awk '!/^#/ { print ($2 - $1)/1000 " " $4 }' .ninja_log | sort -nr
     - if [ "${EXPORT_BUILD_DIR}" == "ON" ]; then ninja test_exportbuild; fi
     - LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH ninja test_pkgconfig
   dependencies: []
@@ -94,6 +95,7 @@
         -DGINKGO_RUN_EXAMPLES=${RUN_EXAMPLES}
         -DGINKGO_EXPORT_BUILD_DIR=${EXPORT_BUILD_DIR}
     - ninja -j${NUM_CORES} -l${CI_LOAD_LIMIT} install
+    - awk '!/^#/ { print ($2 - $1)/1000 " " $4 }' .ninja_log | sort -nr
     - |
         (( $(ctest -N | tail -1 | sed 's/Total Tests: //') != 0 )) || exit 1
     - ctest -V --timeout 6000

From 84af8b24a83e81ed115e7f8faf84c8b76829b80a Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 10 Aug 2023 18:33:17 +0200
Subject: [PATCH 168/583] split up mixed-precision builds for slow ROCm debug
 builds

---
 hip/matrix/csr_kernels.instantiate.hip.cpp | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/hip/matrix/csr_kernels.instantiate.hip.cpp b/hip/matrix/csr_kernels.instantiate.hip.cpp
index 498f3ec1795..dcfa4c7b8c8 100644
--- a/hip/matrix/csr_kernels.instantiate.hip.cpp
+++ b/hip/matrix/csr_kernels.instantiate.hip.cpp
@@ -48,11 +48,15 @@ namespace csr {
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_CSR_CONVERT_TO_FBCSR_KERNEL);
 // split
-GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_SPMV_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE(GKO_DECLARE_CSR_SPMV_KERNEL, int32);
 // split
-GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE(GKO_DECLARE_CSR_SPMV_KERNEL, int64);
+// split
+GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE(GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL,
+                                          int32);
+// split
+GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE(GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL,
+                                          int64);
 // split
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_TRANSPOSE_KERNEL);
 // split

From d95f4440c3cc14b0578820e3b90ea8c3ad38d142 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 10 Aug 2023 18:46:16 +0200
Subject: [PATCH 169/583] split up HIP csr mixed precision instantiations

---
 core/base/mixed_precision_types.hpp        | 83 ++++++++++++++--------
 hip/matrix/csr_kernels.instantiate.hip.cpp | 56 +++++++++++++--
 2 files changed, 103 insertions(+), 36 deletions(-)

diff --git a/core/base/mixed_precision_types.hpp b/core/base/mixed_precision_types.hpp
index 84b0af21c5e..9579caaac4f 100644
--- a/core/base/mixed_precision_types.hpp
+++ b/core/base/mixed_precision_types.hpp
@@ -39,42 +39,65 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #ifdef GINKGO_MIXED_PRECISION
-#define GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE(_macro, ...)  \
-    template _macro(float, float, float, __VA_ARGS__);          \
-    template _macro(float, float, double, __VA_ARGS__);         \
-    template _macro(float, double, float, __VA_ARGS__);         \
-    template _macro(float, double, double, __VA_ARGS__);        \
-    template _macro(double, float, float, __VA_ARGS__);         \
-    template _macro(double, float, double, __VA_ARGS__);        \
-    template _macro(double, double, float, __VA_ARGS__);        \
-    template _macro(double, double, double, __VA_ARGS__);       \
-    template _macro(std::complex<float>, std::complex<float>,   \
-                    std::complex<float>, __VA_ARGS__);          \
-    template _macro(std::complex<float>, std::complex<float>,   \
-                    std::complex<double>, __VA_ARGS__);         \
-    template _macro(std::complex<float>, std::complex<double>,  \
-                    std::complex<float>, __VA_ARGS__);          \
-    template _macro(std::complex<float>, std::complex<double>,  \
-                    std::complex<double>, __VA_ARGS__);         \
-    template _macro(std::complex<double>, std::complex<float>,  \
-                    std::complex<float>, __VA_ARGS__);          \
-    template _macro(std::complex<double>, std::complex<float>,  \
-                    std::complex<double>, __VA_ARGS__);         \
-    template _macro(std::complex<double>, std::complex<double>, \
-                    std::complex<float>, __VA_ARGS__);          \
-    template _macro(std::complex<double>, std::complex<double>, \
+
+#define GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT1(_macro, ...) \
+    template _macro(float, float, float, __VA_ARGS__);                \
+    template _macro(float, float, double, __VA_ARGS__);               \
+    template _macro(float, double, float, __VA_ARGS__);               \
+    template _macro(float, double, double, __VA_ARGS__)
+
+#define GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT2(_macro, ...) \
+    template _macro(double, float, float, __VA_ARGS__);               \
+    template _macro(double, float, double, __VA_ARGS__);              \
+    template _macro(double, double, float, __VA_ARGS__);              \
+    template _macro(double, double, double, __VA_ARGS__)
+
+#define GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT3(_macro, ...) \
+    template _macro(std::complex<float>, std::complex<float>,         \
+                    std::complex<float>, __VA_ARGS__);                \
+    template _macro(std::complex<float>, std::complex<float>,         \
+                    std::complex<double>, __VA_ARGS__);               \
+    template _macro(std::complex<float>, std::complex<double>,        \
+                    std::complex<float>, __VA_ARGS__);                \
+    template _macro(std::complex<float>, std::complex<double>,        \
+                    std::complex<double>, __VA_ARGS__)
+
+#define GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT4(_macro, ...) \
+    template _macro(std::complex<double>, std::complex<float>,        \
+                    std::complex<float>, __VA_ARGS__);                \
+    template _macro(std::complex<double>, std::complex<float>,        \
+                    std::complex<double>, __VA_ARGS__);               \
+    template _macro(std::complex<double>, std::complex<double>,       \
+                    std::complex<float>, __VA_ARGS__);                \
+    template _macro(std::complex<double>, std::complex<double>,       \
                     std::complex<double>, __VA_ARGS__)
+
 #else
-#define GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE(_macro, ...)  \
-    template _macro(float, float, float, __VA_ARGS__);          \
-    template _macro(double, double, double, __VA_ARGS__);       \
-    template _macro(std::complex<float>, std::complex<float>,   \
-                    std::complex<float>, __VA_ARGS__);          \
-    template _macro(std::complex<double>, std::complex<double>, \
+
+#define GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT1(_macro, ...) \
+    template _macro(float, float, float, __VA_ARGS__);
+
+#define GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT2(_macro, ...) \
+    template _macro(double, double, double, __VA_ARGS__)
+
+#define GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT3(_macro, ...) \
+    template _macro(std::complex<float>, std::complex<float>,         \
+                    std::complex<float>, __VA_ARGS__)
+
+#define GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT4(_macro, ...) \
+    template _macro(std::complex<double>, std::complex<double>,       \
                     std::complex<double>, __VA_ARGS__)
+
 #endif
 
 
+#define GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE(_macro, ...)             \
+    GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT1(_macro, __VA_ARGS__); \
+    GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT2(_macro, __VA_ARGS__); \
+    GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT3(_macro, __VA_ARGS__); \
+    GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT4(_macro, __VA_ARGS__)
+
+
 #define GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE(_macro) \
     GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE(_macro, int32);       \
     GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE(_macro, int64)
diff --git a/hip/matrix/csr_kernels.instantiate.hip.cpp b/hip/matrix/csr_kernels.instantiate.hip.cpp
index dcfa4c7b8c8..9a6c29206de 100644
--- a/hip/matrix/csr_kernels.instantiate.hip.cpp
+++ b/hip/matrix/csr_kernels.instantiate.hip.cpp
@@ -47,16 +47,60 @@ namespace csr {
 // begin
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_CSR_CONVERT_TO_FBCSR_KERNEL);
+
+
+// split
+GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT1(GKO_DECLARE_CSR_SPMV_KERNEL,
+                                                 int32);
+// split
+GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT2(GKO_DECLARE_CSR_SPMV_KERNEL,
+                                                 int32);
+// split
+GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT3(GKO_DECLARE_CSR_SPMV_KERNEL,
+                                                 int32);
+// split
+GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT4(GKO_DECLARE_CSR_SPMV_KERNEL,
+                                                 int32);
+// split
+GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT1(GKO_DECLARE_CSR_SPMV_KERNEL,
+                                                 int64);
+// split
+GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT2(GKO_DECLARE_CSR_SPMV_KERNEL,
+                                                 int64);
+// split
+GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT3(GKO_DECLARE_CSR_SPMV_KERNEL,
+                                                 int64);
 // split
-GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE(GKO_DECLARE_CSR_SPMV_KERNEL, int32);
+GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT4(GKO_DECLARE_CSR_SPMV_KERNEL,
+                                                 int64);
+
+
+// split
+GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT1(
+    GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL, int32);
+// split
+GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT2(
+    GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL, int32);
+// split
+GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT3(
+    GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL, int32);
 // split
-GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE(GKO_DECLARE_CSR_SPMV_KERNEL, int64);
+GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT4(
+    GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL, int32);
 // split
-GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE(GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL,
-                                          int32);
+GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT1(
+    GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL, int64);
 // split
-GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE(GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL,
-                                          int64);
+GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT2(
+    GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL, int64);
+// split
+GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT3(
+    GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL, int64);
+// split
+GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT4(
+    GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL, int64);
+
+
 // split
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_TRANSPOSE_KERNEL);
 // split

From cd26e282027fe7da340582d8eb990c76b0b71263 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 10 Aug 2023 20:34:04 +0200
Subject: [PATCH 170/583] allow specifying allocator for benchmarks

---
 benchmark/utils/general.hpp      | 54 +++++++++++++++++++++++++++++---
 core/device_hooks/cuda_hooks.cpp |  4 +++
 core/device_hooks/hip_hooks.cpp  |  3 ++
 3 files changed, 57 insertions(+), 4 deletions(-)

diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp
index 5c6d849fe36..335ed687002 100644
--- a/benchmark/utils/general.hpp
+++ b/benchmark/utils/general.hpp
@@ -45,6 +45,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ostream>
 #include <random>
 #include <sstream>
+#include <stdexcept>
 #include <string>
 #include <type_traits>
 #include <utility>
@@ -58,6 +59,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <rapidjson/prettywriter.h>
 
 
+#include <ginkgo/core/base/memory.hpp>
+
+
 #include "benchmark/utils/json.hpp"
 #include "benchmark/utils/timer.hpp"
 #include "benchmark/utils/types.hpp"
@@ -69,6 +73,10 @@ DEFINE_string(executor, "reference",
               "The executor used to run the benchmarks, one of: reference, "
               "omp, cuda, hip");
 
+DEFINE_string(allocator, "default",
+              "The allocator used in the executor. Only relevant for CUDA and "
+              "HIP executors, one of: default, async, host, unified");
+
 DEFINE_uint32(device_id, 0, "ID of the device where to run the code");
 
 DEFINE_bool(overwrite, false,
@@ -329,6 +337,40 @@ void backup_results(rapidjson::Document& results)
 }
 
 
+inline std::shared_ptr<gko::CudaAllocatorBase> create_cuda_allocator()
+{
+    std::string flag{FLAGS_allocator};
+    if (flag == "default") {
+        return std::make_shared<gko::CudaAllocator>();
+    } else if (flag == "async") {
+        return std::make_shared<gko::CudaAsyncAllocator>(nullptr);
+    } else if (flag == "unified") {
+        return std::make_shared<gko::CudaUnifiedAllocator>(FLAGS_device_id);
+    } else if (flag == "host") {
+        return std::make_shared<gko::CudaHostAllocator>(FLAGS_device_id);
+    } else {
+        throw std::runtime_error{"Unknown allocator type " + flag};
+    }
+}
+
+
+inline std::shared_ptr<gko::HipAllocatorBase> create_hip_allocator()
+{
+    std::string flag{FLAGS_allocator};
+    if (flag == "default") {
+        return std::make_shared<gko::HipAllocator>();
+    } else if (flag == "async") {
+        return std::make_shared<gko::HipAsyncAllocator>(nullptr);
+    } else if (flag == "unified") {
+        return std::make_shared<gko::HipUnifiedAllocator>(FLAGS_device_id);
+    } else if (flag == "host") {
+        return std::make_shared<gko::HipHostAllocator>(FLAGS_device_id);
+    } else {
+        throw std::runtime_error{"Unknown allocator type " + flag};
+    }
+}
+
+
 // executor mapping
 const std::map<std::string, std::function<std::shared_ptr<gko::Executor>(bool)>>
     executor_factory{
@@ -337,12 +379,14 @@ const std::map<std::string, std::function<std::shared_ptr<gko::Executor>(bool)>>
         {"cuda",
          [](bool) {
              return gko::CudaExecutor::create(FLAGS_device_id,
-                                              gko::OmpExecutor::create());
+                                              gko::OmpExecutor::create(),
+                                              create_cuda_allocator());
          }},
         {"hip",
          [](bool) {
              return gko::HipExecutor::create(FLAGS_device_id,
-                                             gko::OmpExecutor::create());
+                                             gko::OmpExecutor::create(),
+                                             create_hip_allocator());
          }},
         {"dpcpp", [](bool use_gpu_timer) {
              auto property = dpcpp_queue_property::in_order;
@@ -369,14 +413,16 @@ const std::map<std::string,
              FLAGS_device_id = gko::experimental::mpi::map_rank_to_device_id(
                  comm, gko::CudaExecutor::get_num_devices());
              return gko::CudaExecutor::create(FLAGS_device_id,
-                                              gko::ReferenceExecutor::create());
+                                              gko::ReferenceExecutor::create(),
+                                              create_cuda_allocator());
          }},
         {"hip",
          [](MPI_Comm comm) {
              FLAGS_device_id = gko::experimental::mpi::map_rank_to_device_id(
                  comm, gko::HipExecutor::get_num_devices());
              return gko::HipExecutor::create(FLAGS_device_id,
-                                             gko::ReferenceExecutor::create());
+                                             gko::ReferenceExecutor::create(),
+                                             create_hip_allocator());
          }},
         {"dpcpp", [](MPI_Comm comm) {
              if (gko::DpcppExecutor::get_num_devices("gpu")) {
diff --git a/core/device_hooks/cuda_hooks.cpp b/core/device_hooks/cuda_hooks.cpp
index 03ab12deb46..ff644a5f05f 100644
--- a/core/device_hooks/cuda_hooks.cpp
+++ b/core/device_hooks/cuda_hooks.cpp
@@ -75,6 +75,10 @@ bool CudaAsyncAllocator::check_environment(int device_id,
     GKO_NOT_COMPILED(cuda);
 
 
+CudaUnifiedAllocator::CudaUnifiedAllocator(int device_id)
+    GKO_NOT_COMPILED(cuda);
+
+
 CudaUnifiedAllocator::CudaUnifiedAllocator(int device_id, unsigned int flags)
     GKO_NOT_COMPILED(cuda);
 
diff --git a/core/device_hooks/hip_hooks.cpp b/core/device_hooks/hip_hooks.cpp
index dec1de15933..521b2590626 100644
--- a/core/device_hooks/hip_hooks.cpp
+++ b/core/device_hooks/hip_hooks.cpp
@@ -76,6 +76,9 @@ bool HipAsyncAllocator::check_environment(int device_id,
     GKO_NOT_COMPILED(hip);
 
 
+HipUnifiedAllocator::HipUnifiedAllocator(int device_id) GKO_NOT_COMPILED(hip);
+
+
 HipUnifiedAllocator::HipUnifiedAllocator(int device_id, unsigned int flags)
     GKO_NOT_COMPILED(hip);
 

From 78c71c30164a7c056ad7880a00adf272e3ec0aab Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 10 Aug 2023 21:03:25 +0200
Subject: [PATCH 171/583] fix CUDA_VERSION availability

---
 cuda/base/memory.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cuda/base/memory.cpp b/cuda/base/memory.cpp
index f605d9135ea..b5bfb14ac74 100644
--- a/cuda/base/memory.cpp
+++ b/cuda/base/memory.cpp
@@ -33,6 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/memory.hpp>
 
 
+#include <cuda.h>
 #include <cuda_runtime.h>
 
 

From 41eb9d7aca3b3983279dbcb83ddebaea23666934 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 10 Aug 2023 21:46:26 +0200
Subject: [PATCH 172/583] increase repetitions for sparse_blas

---
 benchmark/sparse_blas/sparse_blas.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/benchmark/sparse_blas/sparse_blas.cpp b/benchmark/sparse_blas/sparse_blas.cpp
index cfa56ef81fe..d906e9f9e12 100644
--- a/benchmark/sparse_blas/sparse_blas.cpp
+++ b/benchmark/sparse_blas/sparse_blas.cpp
@@ -127,9 +127,12 @@ void apply_sparse_blas(const char* operation_name,
                               allocator);
             auto gen_logger = create_operations_logger(
                 FLAGS_gpu_timer, FLAGS_nested_names, exec,
-                test_case[operation_name]["components"], allocator, 1);
+                test_case[operation_name]["components"], allocator,
+                repetitions);
             exec->add_logger(gen_logger);
-            op->run();
+            for (unsigned i = 0; i < repetitions; i++) {
+                op->run();
+            }
             exec->remove_logger(gen_logger);
         }
         op->write_stats(test_case[operation_name], allocator);

From c79874e86576e3c999e80470a0212f149eeda764 Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Thu, 6 Apr 2023 13:08:49 +0200
Subject: [PATCH 173/583] Update papi_sde to current status. Use externals.

---
 CMakeLists.txt                            |  20 +++-
 cmake/DownloadNonCMakeCMakeLists.txt.in   |  23 ++++-
 cmake/get_info.cmake                      |  13 ++-
 cmake/information_helpers.cmake           |   5 +-
 cmake/install_helpers.cmake               |   5 -
 cmake/package_helpers.cmake               |   3 +-
 core/CMakeLists.txt                       |   5 +-
 core/test/log/CMakeLists.txt              |   2 +-
 core/test/log/papi.cpp                    |   6 +-
 include/ginkgo/core/log/papi.hpp          |  33 ++++---
 reference/test/log/CMakeLists.txt         |   2 +-
 third_party/CMakeLists.txt                |   4 +
 third_party/hwloc/CMakeLists.txt          |   1 +
 third_party/papi_sde/CMakeLists.txt       |  37 +++++++
 third_party/papi_sde/papi_sde_interface.h | 113 ----------------------
 15 files changed, 119 insertions(+), 153 deletions(-)
 create mode 100644 third_party/papi_sde/CMakeLists.txt
 delete mode 100644 third_party/papi_sde/papi_sde_interface.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3886efb7c14..6dc01ed27ef 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -79,6 +79,7 @@ if(MSVC OR WIN32 OR CYGWIN OR APPLE)
 else()
     option(GINKGO_BUILD_HWLOC "Build Ginkgo with HWLOC. Default is ON. If a system HWLOC is not found, then we try to build it ourselves. Switch this OFF to disable HWLOC." ON)
 endif()
+option(GINKGO_BUILD_PAPI_SDE "Build Ginkgo with PAPI SDE. Default is OFF." OFF)
 option(GINKGO_DPCPP_SINGLE_MODE "Do not compile double kernels for the DPC++ backend." OFF)
 option(GINKGO_INSTALL_RPATH "Set the RPATH when installing its libraries." ON)
 option(GINKGO_INSTALL_RPATH_ORIGIN "Add $ORIGIN (Linux) or @loader_path (MacOS) to the installation RPATH." ON)
@@ -191,11 +192,10 @@ check_include_file_cxx(cxxabi.h GKO_HAVE_CXXABI_H)
 
 # Automatically find PAPI and search for the required 'sde' component
 set(GINKGO_HAVE_PAPI_SDE 0)
-# PAPI is temporarily disabled
-#find_package(PAPI OPTIONAL_COMPONENTS sde)
-#if(PAPI_sde_FOUND)
-#    set(GINKGO_HAVE_PAPI_SDE 1)
-#endif()
+find_package(PAPI OPTIONAL_COMPONENTS sde)
+if(PAPI_sde_FOUND)
+    set(GINKGO_HAVE_PAPI_SDE 1)
+endif()
 
 # Automatically find TAU
 set(GINKGO_HAVE_TAU 0)
@@ -232,6 +232,12 @@ else()
     set(GINKGO_HAVE_HWLOC 0)
     message(STATUS "HWLOC is being forcibly switched off")
 endif()
+if(GINKGO_BUILD_PAPI_SDE)
+    set(GINKGO_HAVE_PAPI_SDE 1)
+else()
+    set(GINKGO_HAVE_PAPI_SDE 0)
+    message(STATUS "PAPI SDE is being forcibly switched off")
+endif()
 
 set(GINKGO_HAVE_GPU_AWARE_MPI OFF)
 set(GINKGO_FORCE_SPMV_BLOCKING_COMM OFF)
@@ -272,6 +278,10 @@ endif()
 if(GINKGO_BUILD_HWLOC)
     find_package(HWLOC 2.1) # No need for QUIET as we ship FindHWLOC
 endif()
+if(GINKGO_BUILD_PAPI_SDE)
+    # No need for QUIET as we ship FindPAPI
+    find_package(PAPI OPTIONAL_COMPONENTS sde)
+endif()
 add_subdirectory(third_party)    # Third-party tools and libraries
 
 if(MSVC)
diff --git a/cmake/DownloadNonCMakeCMakeLists.txt.in b/cmake/DownloadNonCMakeCMakeLists.txt.in
index bae2281e63b..55e2f833985 100644
--- a/cmake/DownloadNonCMakeCMakeLists.txt.in
+++ b/cmake/DownloadNonCMakeCMakeLists.txt.in
@@ -3,12 +3,25 @@ project(${package_name})
 
 include(ExternalProject)
 ExternalProject_Add(${package_name}
-    URL "${package_url}"
-    URL_HASH "${package_hash}"
+    URL               "${package_url}"
+    URL_HASH          "${package_hash}"
     DOWNLOAD_NO_PROGRESS TRUE
     SOURCE_DIR        "${CMAKE_CURRENT_BINARY_DIR}/src"
-    BINARY_DIR        "${CMAKE_CURRENT_BINARY_DIR}/build"
-    CONFIGURE_COMMAND "${config_command}" "${ARGN}"
-    INSTALL_COMMAND ""
+    CONFIGURE_COMMAND ""
+    BUILD_COMMAND     ""
+    INSTALL_COMMAND   ""
     UPDATE_DISCONNECTED ${GINKGO_SKIP_DEPENDENCY_UPDATE}
     )
+
+ExternalProject_Add_Step(${package_name} custom_configure
+    COMMAND "${config_command}" "${ARGN}"
+    WORKING_DIRECTORY "${working_dir}"
+    DEPENDEES download)
+ExternalProject_Add_Step(${package_name} custom_build
+    COMMAND make
+    WORKING_DIRECTORY "${working_dir}"
+    DEPENDEES custom_configure)
+ExternalProject_Add_Step(${package_name} custom_install
+    COMMAND make all install
+    WORKING_DIRECTORY "${working_dir}"
+    DEPENDEES custom_build)
diff --git a/cmake/get_info.cmake b/cmake/get_info.cmake
index 2cf8dd06c3f..2dd068abb50 100644
--- a/cmake/get_info.cmake
+++ b/cmake/get_info.cmake
@@ -190,16 +190,21 @@ ginkgo_print_module_footer(${detailed_log} "")
 
 ginkgo_print_generic_header(${minimal_log} "  Components:")
 ginkgo_print_generic_header(${detailed_log} "  Components:")
-if(PAPI_sde_FOUND)
+ginkgo_print_variable(${minimal_log} "GINKGO_BUILD_PAPI_SDE")
+ginkgo_print_variable(${detailed_log} "GINKGO_BUILD_PAPI_SDE")
+if(TARGET PAPI::PAPI)
     ginkgo_print_variable(${detailed_log} "PAPI_VERSION")
     ginkgo_print_variable(${detailed_log} "PAPI_INCLUDE_DIR")
     ginkgo_print_flags(${detailed_log} "PAPI_LIBRARY")
 endif()
+
 ginkgo_print_variable(${minimal_log} "GINKGO_BUILD_HWLOC")
 ginkgo_print_variable(${detailed_log} "GINKGO_BUILD_HWLOC")
-ginkgo_print_variable(${detailed_log} "HWLOC_VERSION")
-ginkgo_print_variable(${detailed_log} "HWLOC_LIBRARIES")
-ginkgo_print_variable(${detailed_log} "HWLOC_INCLUDE_DIRS")
+if(TARGET hwloc)
+    ginkgo_print_variable(${detailed_log} "HWLOC_VERSION")
+    ginkgo_print_variable(${detailed_log} "HWLOC_LIBRARIES")
+    ginkgo_print_variable(${detailed_log} "HWLOC_INCLUDE_DIRS")
+endif()
 
 _minimal(
     "
diff --git a/cmake/information_helpers.cmake b/cmake/information_helpers.cmake
index 9a6a4481bf5..cef920a09ce 100644
--- a/cmake/information_helpers.cmake
+++ b/cmake/information_helpers.cmake
@@ -78,8 +78,9 @@ macro(ginkgo_interface_libraries_recursively INTERFACE_LIBS)
             list(TRANSFORM GINKGO_LIBS_INTERFACE_LIBS REPLACE "\\$<LINK_ONLY:(.*)>" "\\1")
             ginkgo_interface_libraries_recursively("${GINKGO_LIBS_INTERFACE_LIBS}")
         elseif(EXISTS "${_libs}")
-            if ("${_libs}" MATCHES "${PROJECT_BINARY_DIR}.*hwloc.so")
-                list(APPEND GINKGO_INTERFACE_LIBS_FOUND "${CMAKE_INSTALL_FULL_LIBDIR}/libhwloc.so")
+            if ("${_libs}" MATCHES "${PROJECT_BINARY_DIR}.*(papi|sde|pfm|hwloc).so")
+                get_filename_component(_lib_name "${_libs}" NAME)
+                list(APPEND GINKGO_INTERFACE_LIBS_FOUND "${CMAKE_INSTALL_FULL_LIBDIR}/${_lib_name}")
             else()
                 list(APPEND GINKGO_INTERFACE_LIBS_FOUND "${_libs}")
             endif()
diff --git a/cmake/install_helpers.cmake b/cmake/install_helpers.cmake
index 58cc730bb14..8bec34d7a41 100644
--- a/cmake/install_helpers.cmake
+++ b/cmake/install_helpers.cmake
@@ -80,11 +80,6 @@ function(ginkgo_install)
     install(FILES "${Ginkgo_BINARY_DIR}/include/ginkgo/config.hpp"
         DESTINATION "${CMAKE_INSTALL_FULL_INCLUDEDIR}/ginkgo"
         )
-    if (GINKGO_HAVE_PAPI_SDE)
-        install(FILES "${Ginkgo_SOURCE_DIR}/third_party/papi_sde/papi_sde_interface.h"
-            DESTINATION "${CMAKE_INSTALL_FULL_INCLUDEDIR}/third_party/papi_sde"
-            )
-    endif()
 
     if  (GINKGO_HAVE_HWLOC AND NOT HWLOC_FOUND)
         get_filename_component(HWLOC_LIB_PATH ${HWLOC_LIBRARIES} DIRECTORY)
diff --git a/cmake/package_helpers.cmake b/cmake/package_helpers.cmake
index e1d196ad553..1abc1a72587 100644
--- a/cmake/package_helpers.cmake
+++ b/cmake/package_helpers.cmake
@@ -7,9 +7,10 @@ set(NON_CMAKE_PACKAGE_DOWNLOADER_SCRIPT
 #   \param package_name     Name of the package
 #   \param package_url      Url of the package
 #   \param package_tag      Tag or version of the package to be downloaded.
+#   \param working_dir      The directory where the configure/build should happen.
 #   \param config_command   The command for the configuration step.
 #
-function(ginkgo_load_and_configure_package package_name package_url package_hash config_command)
+function(ginkgo_load_and_configure_package package_name package_url package_hash working_dir config_command)
     set(GINKGO_THIRD_PARTY_BUILD_TYPE "Debug")
     if (CMAKE_BUILD_TYPE MATCHES "[Rr][Ee][Ll][Ee][Aa][Ss][Ee]")
         set(GINKGO_THIRD_PARTY_BUILD_TYPE "Release")
diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
index d224a7e0f90..e7c2bf7ce45 100644
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -113,9 +113,8 @@ target_link_libraries(ginkgo
 set(GKO_RPATH_ADDITIONS "")
 
 if(GINKGO_HAVE_PAPI_SDE)
-    target_link_libraries(ginkgo PUBLIC PAPI::PAPI)
-    list(GET PAPI_LIBRARIES 0 PAPI_FIRST_LIB)
-    get_filename_component(GKO_PAPI_LIBDIR "${PAPI_FIRST_LIB}" DIRECTORY)
+    target_link_libraries(ginkgo PUBLIC PAPI::PAPI_SDE)
+    get_filename_component(GKO_PAPI_LIBDIR "${PAPI_SDE_LIBRARIES}" DIRECTORY)
     list(APPEND GKO_RPATH_ADDITIONS "${GKO_PAPI_LIBDIR}")
 endif()
 
diff --git a/core/test/log/CMakeLists.txt b/core/test/log/CMakeLists.txt
index 964572bd48c..8efd7fafc46 100644
--- a/core/test/log/CMakeLists.txt
+++ b/core/test/log/CMakeLists.txt
@@ -1,7 +1,7 @@
 ginkgo_create_test(convergence)
 ginkgo_create_test(logger)
 if (GINKGO_HAVE_PAPI_SDE)
-    ginkgo_create_test(papi PAPI::PAPI)
+    ginkgo_create_test(papi ADDITIONAL_LIBRARIES PAPI::PAPI)
 endif()
 ginkgo_create_test(performance_hint)
 ginkgo_create_test(profiler_hook)
diff --git a/core/test/log/papi.cpp b/core/test/log/papi.cpp
index 8ab0bb6421d..d089902c30c 100644
--- a/core/test/log/papi.cpp
+++ b/core/test/log/papi.cpp
@@ -71,7 +71,11 @@ class Papi : public ::testing::Test {
         }
     }
 
-    void TearDown() { eventset = PAPI_NULL; }
+    void TearDown() {
+        logger = nullptr;
+        PAPI_destroy_eventset(&eventset);
+        PAPI_shutdown();
+    }
 
     template <typename U>
     const std::string init(const gko::log::Logger::mask_type& event,
diff --git a/include/ginkgo/core/log/papi.hpp b/include/ginkgo/core/log/papi.hpp
index 5d07879d116..9645e775b3d 100644
--- a/include/ginkgo/core/log/papi.hpp
+++ b/include/ginkgo/core/log/papi.hpp
@@ -46,16 +46,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <mutex>
 
 
-#include <papi.h>
+#include <sde_lib.h>
 
 
 #include <ginkgo/core/base/polymorphic_object.hpp>
 #include <ginkgo/core/log/logger.hpp>
 
 
-#include "third_party/papi_sde/papi_sde_interface.h"
-
-
 namespace gko {
 namespace log {
 
@@ -213,7 +210,10 @@ class Papi : public Logger {
     create(std::shared_ptr<const gko::Executor>,
            const Logger::mask_type& enabled_events = Logger::all_events_mask)
     {
-        return std::shared_ptr<Papi>(new Papi(enabled_events));
+        return std::shared_ptr<Papi>(new Papi(enabled_events), [](auto logger){
+            papi_sde_shutdown(logger->get_handle());
+            delete logger;
+        });
     }
 
     /**
@@ -224,7 +224,11 @@ class Papi : public Logger {
     static std::shared_ptr<Papi> create(
         const Logger::mask_type& enabled_events = Logger::all_events_mask)
     {
-        return std::shared_ptr<Papi>(new Papi(enabled_events));
+        return std::shared_ptr<Papi>(new Papi(enabled_events), [](auto logger){
+            papi_sde_shutdown(logger->get_handle());
+            delete logger;
+        }
+);
     }
 
     /**
@@ -235,6 +239,13 @@ class Papi : public Logger {
      */
     const std::string get_handle_name() const { return name; }
 
+    /**
+     * Returns the corresponding papi_handle_t for this logger
+     *
+     * @return the corresponding papi_handle_t for this logger
+     */
+    const papi_handle_t get_handle() const { return papi_handle; }
+
 protected:
     [[deprecated("use single-parameter constructor")]] explicit Papi(
         std::shared_ptr<const gko::Executor> exec,
@@ -265,12 +276,10 @@ class Papi : public Logger {
 
         ~papi_queue()
         {
-            if (PAPI_is_initialized()) {
-                for (auto e : data) {
-                    std::ostringstream oss;
-                    oss << counter_name << "::" << e.first;
-                    papi_sde_unregister_counter(*handle, oss.str().c_str());
-                }
+            for (auto e : data) {
+                std::ostringstream oss;
+                oss << counter_name << "::" << e.first;
+                papi_sde_unregister_counter(*handle, oss.str().c_str());
             }
             data.clear();
         }
diff --git a/reference/test/log/CMakeLists.txt b/reference/test/log/CMakeLists.txt
index 2d9e8f188cb..44faca51f90 100644
--- a/reference/test/log/CMakeLists.txt
+++ b/reference/test/log/CMakeLists.txt
@@ -1,4 +1,4 @@
 ginkgo_create_test(convergence)
 if (GINKGO_HAVE_PAPI_SDE)
-    ginkgo_create_test(papi PAPI::PAPI)
+    ginkgo_create_test(papi ADDITIONAL_LIBRARIES PAPI::PAPI)
 endif()
diff --git a/third_party/CMakeLists.txt b/third_party/CMakeLists.txt
index c714a51c187..be35785d730 100644
--- a/third_party/CMakeLists.txt
+++ b/third_party/CMakeLists.txt
@@ -7,6 +7,10 @@ if(GINKGO_BUILD_HWLOC AND (NOT HWLOC_FOUND))
     add_subdirectory(hwloc)
 endif()
 
+if(GINKGO_BUILD_PAPI_SDE AND (NOT PAPI_FOUND))
+    add_subdirectory(papi_sde)
+endif()
+
 if(GINKGO_DEVEL_TOOLS)
     set(GCF_IGNORE_LIST "third_party" CACHE STRING "Ignore directories for GCF")
     add_subdirectory(git-cmake-format)
diff --git a/third_party/hwloc/CMakeLists.txt b/third_party/hwloc/CMakeLists.txt
index 9cbbb46482e..f86d6bf0e5b 100644
--- a/third_party/hwloc/CMakeLists.txt
+++ b/third_party/hwloc/CMakeLists.txt
@@ -2,6 +2,7 @@ message(STATUS "Configuring and building HWLOC")
 set(TPL_HWLOC_PATH "${PROJECT_BINARY_DIR}/third_party/hwloc")
 ginkgo_load_and_configure_package(hwloc_external "https://download.open-mpi.org/release/hwloc/v2.4/hwloc-2.4.1.tar.gz"
     "SHA1=b94950e8958e1125ca75ecac0bc0259ee3d108c4"
+    ""
     "${TPL_HWLOC_PATH}/src/configure" "--disable-nvml" "--disable-cuda" "--disable-rsmi"
     )
 
diff --git a/third_party/papi_sde/CMakeLists.txt b/third_party/papi_sde/CMakeLists.txt
new file mode 100644
index 00000000000..f9866d1b553
--- /dev/null
+++ b/third_party/papi_sde/CMakeLists.txt
@@ -0,0 +1,37 @@
+message(STATUS "Configuring and building PAPI-SDE")
+set(TPL_PAPI_PATH "${PROJECT_BINARY_DIR}/third_party/papi_sde/src")
+ginkgo_load_and_configure_package(papi_external
+    "https://bitbucket.org/terry_cojean/papi/get/77cdd0ba8db98d86c1459dd5f55013aba242d5d5.tar.gz"
+    "SHA1=540c18a14eeafb83cd60cbbf0a96706111dbff3b"
+    "${TPL_PAPI_PATH}/src"
+    "./configure" "--prefix=${TPL_PAPI_PATH}/install"
+    "--with-components=sde" "--with-libsde=yes" "--with-tests=no"
+    "--with-static-lib=no" "--with-shared-lib=yes"
+    )
+
+add_library(PAPI SHARED IMPORTED GLOBAL)
+add_library(PAPI::PAPI ALIAS PAPI)
+add_dependencies(PAPI papi_external)
+# NOTE: if changing this (e.g. to `.a`), please update the special case in
+# `cmake/information_helpers.cmake`
+set(PAPI_LIBRARIES "${TPL_PAPI_PATH}/install/lib/libpapi.so"
+    CACHE FILEPATH "The path to PAPI libraries" FORCE)
+set(PAPI_INCLUDE_DIRS "${TPL_PAPI_PATH}/install/include" CACHE PATH
+    "The directory containing the PAPI header, papi.h" FORCE)
+set_target_properties(PAPI PROPERTIES IMPORTED_LOCATION "${PAPI_LIBRARIES}")
+set_target_properties(PAPI PROPERTIES INTERFACE_LINK_LIBRARIES "${PAPI_LIBRARIES}")
+set_target_properties(PAPI PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${PAPI_INCLUDE_DIRS}")
+
+
+add_library(PAPI_SDE SHARED IMPORTED GLOBAL)
+add_library(PAPI::PAPI_SDE ALIAS PAPI_SDE)
+add_dependencies(PAPI_SDE papi_external)
+# NOTE: if changing this (e.g. to `.a`), please update the special case in
+# `cmake/information_helpers.cmake`
+set(PAPI_SDE_LIBRARIES "${TPL_PAPI_PATH}/install/lib/libsde.so"
+    CACHE FILEPATH "The path to PAPI SDE libraries" FORCE)
+set(PAPI_SDE_INCLUDE_DIRS "${TPL_PAPI_PATH}/install/include" CACHE PATH
+    "The directory containing the PAPI SDE header, sde_lib.h" FORCE)
+set_target_properties(PAPI_SDE PROPERTIES IMPORTED_LOCATION "${PAPI_SDE_LIBRARIES}")
+set_target_properties(PAPI_SDE PROPERTIES INTERFACE_LINK_LIBRARIES "${PAPI_SDE_LIBRARIES}")
+set_target_properties(PAPI_SDE PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${PAPI_SDE_INCLUDE_DIRS}")
diff --git a/third_party/papi_sde/papi_sde_interface.h b/third_party/papi_sde/papi_sde_interface.h
deleted file mode 100644
index 6a28d0089a3..00000000000
--- a/third_party/papi_sde/papi_sde_interface.h
+++ /dev/null
@@ -1,113 +0,0 @@
-#ifndef PAPI_SDE_INTERFACE_H
-#define PAPI_SDE_INTERFACE_H
-
-#include <stdint.h>
-#include <stdlib.h>
-
-#define PAPI_SDE_RO 0x00
-#define PAPI_SDE_RW 0x01
-#define PAPI_SDE_DELTA 0x00
-#define PAPI_SDE_INSTANT 0x10
-
-#define PAPI_SDE_long_long 0x0
-#define PAPI_SDE_int 0x1
-#define PAPI_SDE_double 0x2
-#define PAPI_SDE_float 0x3
-
-#define PAPI_SDE_SUM 0x0
-#define PAPI_SDE_MAX 0x1
-#define PAPI_SDE_MIN 0x2
-
-
-#define GET_FLOAT_SDE(x) *((float *)&x)
-#define GET_DOUBLE_SDE(x) *((double *)&x)
-/*
- * GET_SDE_RECORDER_ADDRESS() USAGE EXAMPLE:
- * If SDE recorder logs values of type 'double':
- *     double *ptr = GET_SDE_RECORDER_ADDRESS(papi_event_value[6], double);
- *     for (j=0; j<CNT; j++)
- *        printf("    %d: %.4e\n",j, ptr[j]);
- */
-#define GET_SDE_RECORDER_ADDRESS(x, rcrd_type) ((rcrd_type *)x)
-
-
-typedef long long int (*papi_sde_fptr_t)(void *);
-typedef int (*papi_sde_cmpr_fptr_t)(void *);
-typedef void *papi_handle_t;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-typedef struct papi_sde_fptr_struct_s {
-    papi_handle_t (*init)(const char *lib_name);
-    int (*register_counter)(void *handle, const char *event_name, int mode,
-                            int type, void *counter);
-    int (*register_fp_counter)(void *handle, const char *event_name, int mode,
-                               int type, papi_sde_fptr_t fp_counter,
-                               void *param);
-    int (*unregister_counter)(void *handle, const char *event_name);
-    int (*describe_counter)(void *handle, const char *event_name,
-                            const char *event_description);
-    int (*add_counter_to_group)(void *handle, const char *event_name,
-                                const char *group_name, uint32_t group_flags);
-    int (*create_counter)(papi_handle_t handle, const char *event_name,
-                          int cntr_type, void **cntr_handle);
-    int (*inc_counter)(papi_handle_t cntr_handle, long long int increment);
-    int (*create_recorder)(papi_handle_t handle, const char *event_name,
-                           size_t typesize,
-                           int (*cmpr_func_ptr)(const void *p1, const void *p2),
-                           void **record_handle);
-    int (*record)(void *record_handle, size_t typesize, void *value);
-    int (*reset_recorder)(void *record_handle);
-    int (*reset_counter)(void *cntr_handle);
-} papi_sde_fptr_struct_t;
-
-papi_handle_t papi_sde_init(const char *name_of_library);
-int papi_sde_register_counter(papi_handle_t handle, const char *event_name,
-                              int cntr_mode, int cntr_type, void *counter);
-int papi_sde_register_fp_counter(papi_handle_t handle, const char *event_name,
-                                 int cntr_mode, int cntr_type,
-                                 papi_sde_fptr_t func_ptr, void *param);
-int papi_sde_unregister_counter(void *handle, const char *event_name);
-int papi_sde_describe_counter(papi_handle_t handle, const char *event_name,
-                              const char *event_description);
-int papi_sde_add_counter_to_group(papi_handle_t handle, const char *event_name,
-                                  const char *group_name, uint32_t group_flags);
-int papi_sde_create_counter(papi_handle_t handle, const char *event_name,
-                            int cntr_type, void **cntr_handle);
-int papi_sde_inc_counter(void *cntr_handle, long long int increment);
-int papi_sde_create_recorder(
-    papi_handle_t handle, const char *event_name, size_t typesize,
-    int (*cmpr_func_ptr)(const void *p1, const void *p2), void **record_handle);
-int papi_sde_record(void *record_handle, size_t typesize, void *value);
-int papi_sde_reset_recorder(void *record_handle);
-int papi_sde_reset_counter(void *cntr_handle);
-void *papi_sde_get_counter_handle(papi_handle_t handle, const char *event_name);
-
-int papi_sde_compare_long_long(const void *p1, const void *p2);
-int papi_sde_compare_int(const void *p1, const void *p2);
-int papi_sde_compare_double(const void *p1, const void *p2);
-int papi_sde_compare_float(const void *p1, const void *p2);
-
-papi_handle_t papi_sde_hook_list_events(papi_sde_fptr_struct_t *fptr_struct);
-#ifdef __cplusplus
-}
-#endif
-
-#define POPULATE_SDE_FPTR_STRUCT(_A_)                             \
-    do {                                                          \
-        _A_.init = papi_sde_init;                                 \
-        _A_.register_counter = papi_sde_register_counter;         \
-        _A_.register_fp_counter = papi_sde_register_fp_counter;   \
-        _A_.unregister_counter = papi_sde_unregister_counter;     \
-        _A_.describe_counter = papi_sde_describe_counter;         \
-        _A_.add_counter_to_group = papi_sde_add_counter_to_group; \
-        _A_.create_counter = papi_sde_create_counter;             \
-        _A_.inc_counter = papi_sde_inc_counter;                   \
-        _A_.create_recorder = papi_sde_create_recorder;           \
-        _A_.record = papi_sde_record;                             \
-        _A_.reset_recorder = papi_sde_reset_recorder;             \
-        _A_.reset_counter = papi_sde_reset_counter;               \
-    } while (0)
-
-#endif

From 287e6ec1721f0e58c719464d802de06d124df7d5 Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Thu, 6 Apr 2023 13:22:41 +0200
Subject: [PATCH 174/583] Adapt hwloc to the new scheme

---
 third_party/hwloc/CMakeLists.txt    | 20 ++++++++------------
 third_party/papi_sde/CMakeLists.txt |  4 ++--
 2 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/third_party/hwloc/CMakeLists.txt b/third_party/hwloc/CMakeLists.txt
index f86d6bf0e5b..5534c07f4f2 100644
--- a/third_party/hwloc/CMakeLists.txt
+++ b/third_party/hwloc/CMakeLists.txt
@@ -1,24 +1,20 @@
 message(STATUS "Configuring and building HWLOC")
-set(TPL_HWLOC_PATH "${PROJECT_BINARY_DIR}/third_party/hwloc")
+set(TPL_HWLOC_PATH "${PROJECT_BINARY_DIR}/third_party/hwloc/src")
 ginkgo_load_and_configure_package(hwloc_external "https://download.open-mpi.org/release/hwloc/v2.4/hwloc-2.4.1.tar.gz"
     "SHA1=b94950e8958e1125ca75ecac0bc0259ee3d108c4"
-    ""
-    "${TPL_HWLOC_PATH}/src/configure" "--disable-nvml" "--disable-cuda" "--disable-rsmi"
+    "${TPL_HWLOC_PATH}"
+    "./configure" "--prefix=${TPL_HWLOC_PATH}/install"
+        "--disable-nvml" "--disable-cuda" "--disable-rsmi"
     )
 
 add_library(hwloc SHARED IMPORTED GLOBAL)
 add_dependencies(hwloc hwloc_external)
-file(MAKE_DIRECTORY ${TPL_HWLOC_PATH}/lib/)
-file(GLOB HWLOC_LIBS "${TPL_HWLOC_PATH}/build/hwloc/.libs/libhwloc.so*")
-configure_file("${TPL_HWLOC_PATH}/build/include/hwloc/autogen/config.h" "${TPL_HWLOC_PATH}/src/include/hwloc/autogen/config.h" COPYONLY)
-foreach(lib ${HWLOC_LIBS})
-    get_filename_component(lib_name ${lib} NAME)
-    configure_file("${lib}" "${TPL_HWLOC_PATH}/lib/${lib_name}" COPYONLY)
-endforeach()
 # NOTE: if changing this (e.g. to `.a`), please update the special case in
 # `cmake/information_helpers.cmake`
-set(HWLOC_LIBRARIES "${TPL_HWLOC_PATH}/lib/libhwloc.so" CACHE FILEPATH "The path to HWLOC library libhwloc.so" FORCE)
-set(HWLOC_INCLUDE_DIRS "${TPL_HWLOC_PATH}/src/include" CACHE PATH "The directory containing the hwloc header, hwloc.h" FORCE)
+set(HWLOC_LIBRARIES "${TPL_HWLOC_PATH}/install/lib/libhwloc.so"
+    CACHE FILEPATH "The path to HWLOC library libhwloc.so" FORCE)
+set(HWLOC_INCLUDE_DIRS "${TPL_HWLOC_PATH}/install/include"
+    CACHE PATH "The directory containing the hwloc header, hwloc.h" FORCE)
 set_target_properties(hwloc PROPERTIES IMPORTED_LOCATION ${HWLOC_LIBRARIES})
 set_target_properties(hwloc PROPERTIES INTERFACE_LINK_LIBRARIES ${HWLOC_LIBRARIES})
 set_target_properties(hwloc PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${HWLOC_INCLUDE_DIRS}")
diff --git a/third_party/papi_sde/CMakeLists.txt b/third_party/papi_sde/CMakeLists.txt
index f9866d1b553..4b58e368662 100644
--- a/third_party/papi_sde/CMakeLists.txt
+++ b/third_party/papi_sde/CMakeLists.txt
@@ -5,8 +5,8 @@ ginkgo_load_and_configure_package(papi_external
     "SHA1=540c18a14eeafb83cd60cbbf0a96706111dbff3b"
     "${TPL_PAPI_PATH}/src"
     "./configure" "--prefix=${TPL_PAPI_PATH}/install"
-    "--with-components=sde" "--with-libsde=yes" "--with-tests=no"
-    "--with-static-lib=no" "--with-shared-lib=yes"
+        "--with-components=sde" "--with-libsde=yes" "--with-tests=no"
+        "--with-static-lib=no" "--with-shared-lib=yes"
     )
 
 add_library(PAPI SHARED IMPORTED GLOBAL)

From 58c40d6d409b430a11e54ee8cbee7ad6a9d839b8 Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Thu, 6 Apr 2023 13:25:03 +0200
Subject: [PATCH 175/583] Enable PAPI_SDE for a pipeline.

---
 .gitlab-ci.yml        | 2 ++
 .gitlab/scripts.yml   | 2 ++
 .gitlab/variables.yml | 1 +
 3 files changed, 5 insertions(+)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index ae7fa86fd38..1cd8c0335f8 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -113,6 +113,7 @@ build/cuda101/nompi/clang/cuda_wo_omp/release/shared:
     CUDA_ARCH: 35
 
 # Job with example runs.
+# Also explicitly test PAPI SDE
 build/cuda101/openmpi/gcc/all/debug/shared:
   extends:
     - .build_template
@@ -126,6 +127,7 @@ build/cuda101/openmpi/gcc/all/debug/shared:
     MPI_AS_ROOT: "ON"
     BUILD_HIP: "ON"
     BUILD_TYPE: "Debug"
+    BUILD_PAPI_SDE: "ON"
     RUN_EXAMPLES: "ON"
     CUDA_ARCH: 35
 
diff --git a/.gitlab/scripts.yml b/.gitlab/scripts.yml
index cf6baad6fab..b007caff35f 100644
--- a/.gitlab/scripts.yml
+++ b/.gitlab/scripts.yml
@@ -43,6 +43,7 @@
         -DGINKGO_BUILD_HIP=${BUILD_HIP}
         -DGINKGO_BUILD_MPI=${BUILD_MPI} ${MPI_STR}
         -DGINKGO_BUILD_HWLOC=${BUILD_HWLOC}
+        -DGINKGO_BUILD_PAPI_SDE=${BUILD_PAPI_SDE}
         -DGINKGO_BUILD_TESTS=ON -DGINKGO_BUILD_EXAMPLES=ON
         -DGINKGO_FAST_TESTS=${FAST_TESTS}
         -DGINKGO_TEST_NONDEFAULT_STREAM=${NONDEFAULT_STREAM}
@@ -87,6 +88,7 @@
         -DGINKGO_BUILD_HIP=${BUILD_HIP}
         -DGINKGO_BUILD_MPI=${BUILD_MPI} ${MPI_STR}
         -DGINKGO_BUILD_HWLOC=${BUILD_HWLOC}
+        -DGINKGO_BUILD_PAPI_SDE=${BUILD_PAPI_SDE}
         -DGINKGO_BUILD_TESTS=ON -DGINKGO_BUILD_EXAMPLES=ON
         -DGINKGO_FAST_TESTS=${FAST_TESTS}
         -DGINKGO_MIXED_PRECISION=${MIXED_PRECISION}
diff --git a/.gitlab/variables.yml b/.gitlab/variables.yml
index 183bdef9e4e..2316b5abc71 100644
--- a/.gitlab/variables.yml
+++ b/.gitlab/variables.yml
@@ -12,6 +12,7 @@
     BUILD_CUDA: "OFF"
     BUILD_HIP: "OFF"
     BUILD_HWLOC: "ON"
+    BUILD_PAPI_SDE: "OFF"
     BUILD_MPI: "OFF"
     GKO_COMPILER_FLAGS: ""
     MPI_AS_ROOT: "OFF"

From be0f5e2da52bdb7b405724a7fce8614503cf83cf Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Fri, 14 Jul 2023 15:49:46 +0200
Subject: [PATCH 176/583] Review updates

---
 CMakeLists.txt                      | 15 ++++++---------
 cmake/GinkgoConfig.cmake.in         |  2 +-
 third_party/CMakeLists.txt          |  2 +-
 third_party/papi_sde/CMakeLists.txt |  6 +++---
 4 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6dc01ed27ef..1d18b18d00a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -79,7 +79,7 @@ if(MSVC OR WIN32 OR CYGWIN OR APPLE)
 else()
     option(GINKGO_BUILD_HWLOC "Build Ginkgo with HWLOC. Default is ON. If a system HWLOC is not found, then we try to build it ourselves. Switch this OFF to disable HWLOC." ON)
 endif()
-option(GINKGO_BUILD_PAPI_SDE "Build Ginkgo with PAPI SDE. Default is OFF." OFF)
+option(GINKGO_BUILD_PAPI_SDE "Build Ginkgo with PAPI SDE. Default is ON. Requires a system package." ON)
 option(GINKGO_DPCPP_SINGLE_MODE "Do not compile double kernels for the DPC++ backend." OFF)
 option(GINKGO_INSTALL_RPATH "Set the RPATH when installing its libraries." ON)
 option(GINKGO_INSTALL_RPATH_ORIGIN "Add $ORIGIN (Linux) or @loader_path (MacOS) to the installation RPATH." ON)
@@ -232,12 +232,6 @@ else()
     set(GINKGO_HAVE_HWLOC 0)
     message(STATUS "HWLOC is being forcibly switched off")
 endif()
-if(GINKGO_BUILD_PAPI_SDE)
-    set(GINKGO_HAVE_PAPI_SDE 1)
-else()
-    set(GINKGO_HAVE_PAPI_SDE 0)
-    message(STATUS "PAPI SDE is being forcibly switched off")
-endif()
 
 set(GINKGO_HAVE_GPU_AWARE_MPI OFF)
 set(GINKGO_FORCE_SPMV_BLOCKING_COMM OFF)
@@ -278,9 +272,12 @@ endif()
 if(GINKGO_BUILD_HWLOC)
     find_package(HWLOC 2.1) # No need for QUIET as we ship FindHWLOC
 endif()
+set(GINKGO_HAVE_PAPI_SDE 0)
 if(GINKGO_BUILD_PAPI_SDE)
-    # No need for QUIET as we ship FindPAPI
-    find_package(PAPI OPTIONAL_COMPONENTS sde)
+    find_package(PAPI 7.0.1.0 COMPONENTS sde)
+    if (PAPI_FOUND AND PAPI_SDE_FOUND)
+        set(GINKGO_HAVE_PAPI_SDE 1)
+    endif()
 endif()
 add_subdirectory(third_party)    # Third-party tools and libraries
 
diff --git a/cmake/GinkgoConfig.cmake.in b/cmake/GinkgoConfig.cmake.in
index f4eace2fdbc..0d7ce5455f1 100644
--- a/cmake/GinkgoConfig.cmake.in
+++ b/cmake/GinkgoConfig.cmake.in
@@ -143,7 +143,7 @@ set(VTune_PATH "@VTune_PATH@")
 #     so `third_party` libraries are currently unneeded.
 
 if(GINKGO_HAVE_PAPI_SDE)
-    find_package(PAPI REQUIRED OPTIONAL_COMPONENTS sde)
+    find_package(PAPI REQUIRED COMPONENTS sde)
 endif()
 
 if(GINKGO_HAVE_HWLOC)
diff --git a/third_party/CMakeLists.txt b/third_party/CMakeLists.txt
index be35785d730..062f520b8e8 100644
--- a/third_party/CMakeLists.txt
+++ b/third_party/CMakeLists.txt
@@ -7,7 +7,7 @@ if(GINKGO_BUILD_HWLOC AND (NOT HWLOC_FOUND))
     add_subdirectory(hwloc)
 endif()
 
-if(GINKGO_BUILD_PAPI_SDE AND (NOT PAPI_FOUND))
+if(GINKGO_WITH_PAPI_SDE AND (NOT PAPI_FOUND))
     add_subdirectory(papi_sde)
 endif()
 
diff --git a/third_party/papi_sde/CMakeLists.txt b/third_party/papi_sde/CMakeLists.txt
index 4b58e368662..5b300d973a5 100644
--- a/third_party/papi_sde/CMakeLists.txt
+++ b/third_party/papi_sde/CMakeLists.txt
@@ -1,9 +1,9 @@
 message(STATUS "Configuring and building PAPI-SDE")
 set(TPL_PAPI_PATH "${PROJECT_BINARY_DIR}/third_party/papi_sde/src")
 ginkgo_load_and_configure_package(papi_external
-    "https://bitbucket.org/terry_cojean/papi/get/77cdd0ba8db98d86c1459dd5f55013aba242d5d5.tar.gz"
-    "SHA1=540c18a14eeafb83cd60cbbf0a96706111dbff3b"
-    "${TPL_PAPI_PATH}/src"
+    "https://github.com/icl-utk-edu/papi/archive/d2dd17a07a3c175fbb26ce5528671e3a7e00b80f.tar.gz"
+    "SHA1=55019037c47aff216ff831b4191e7147f6932464"
+    "${TPL_PAPI_PATH}/build"
     "./configure" "--prefix=${TPL_PAPI_PATH}/install"
         "--with-components=sde" "--with-libsde=yes" "--with-tests=no"
         "--with-static-lib=no" "--with-shared-lib=yes"

From 011f8c85155477eea348db8f38a65261e98ea654 Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Tue, 8 Aug 2023 11:09:05 +0200
Subject: [PATCH 177/583] Try to not bundle SDE but rely on external only.

---
 third_party/CMakeLists.txt          |  4 ----
 third_party/papi_sde/CMakeLists.txt | 37 -----------------------------
 2 files changed, 41 deletions(-)
 delete mode 100644 third_party/papi_sde/CMakeLists.txt

diff --git a/third_party/CMakeLists.txt b/third_party/CMakeLists.txt
index 062f520b8e8..c714a51c187 100644
--- a/third_party/CMakeLists.txt
+++ b/third_party/CMakeLists.txt
@@ -7,10 +7,6 @@ if(GINKGO_BUILD_HWLOC AND (NOT HWLOC_FOUND))
     add_subdirectory(hwloc)
 endif()
 
-if(GINKGO_WITH_PAPI_SDE AND (NOT PAPI_FOUND))
-    add_subdirectory(papi_sde)
-endif()
-
 if(GINKGO_DEVEL_TOOLS)
     set(GCF_IGNORE_LIST "third_party" CACHE STRING "Ignore directories for GCF")
     add_subdirectory(git-cmake-format)
diff --git a/third_party/papi_sde/CMakeLists.txt b/third_party/papi_sde/CMakeLists.txt
deleted file mode 100644
index 5b300d973a5..00000000000
--- a/third_party/papi_sde/CMakeLists.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-message(STATUS "Configuring and building PAPI-SDE")
-set(TPL_PAPI_PATH "${PROJECT_BINARY_DIR}/third_party/papi_sde/src")
-ginkgo_load_and_configure_package(papi_external
-    "https://github.com/icl-utk-edu/papi/archive/d2dd17a07a3c175fbb26ce5528671e3a7e00b80f.tar.gz"
-    "SHA1=55019037c47aff216ff831b4191e7147f6932464"
-    "${TPL_PAPI_PATH}/build"
-    "./configure" "--prefix=${TPL_PAPI_PATH}/install"
-        "--with-components=sde" "--with-libsde=yes" "--with-tests=no"
-        "--with-static-lib=no" "--with-shared-lib=yes"
-    )
-
-add_library(PAPI SHARED IMPORTED GLOBAL)
-add_library(PAPI::PAPI ALIAS PAPI)
-add_dependencies(PAPI papi_external)
-# NOTE: if changing this (e.g. to `.a`), please update the special case in
-# `cmake/information_helpers.cmake`
-set(PAPI_LIBRARIES "${TPL_PAPI_PATH}/install/lib/libpapi.so"
-    CACHE FILEPATH "The path to PAPI libraries" FORCE)
-set(PAPI_INCLUDE_DIRS "${TPL_PAPI_PATH}/install/include" CACHE PATH
-    "The directory containing the PAPI header, papi.h" FORCE)
-set_target_properties(PAPI PROPERTIES IMPORTED_LOCATION "${PAPI_LIBRARIES}")
-set_target_properties(PAPI PROPERTIES INTERFACE_LINK_LIBRARIES "${PAPI_LIBRARIES}")
-set_target_properties(PAPI PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${PAPI_INCLUDE_DIRS}")
-
-
-add_library(PAPI_SDE SHARED IMPORTED GLOBAL)
-add_library(PAPI::PAPI_SDE ALIAS PAPI_SDE)
-add_dependencies(PAPI_SDE papi_external)
-# NOTE: if changing this (e.g. to `.a`), please update the special case in
-# `cmake/information_helpers.cmake`
-set(PAPI_SDE_LIBRARIES "${TPL_PAPI_PATH}/install/lib/libsde.so"
-    CACHE FILEPATH "The path to PAPI SDE libraries" FORCE)
-set(PAPI_SDE_INCLUDE_DIRS "${TPL_PAPI_PATH}/install/include" CACHE PATH
-    "The directory containing the PAPI SDE header, sde_lib.h" FORCE)
-set_target_properties(PAPI_SDE PROPERTIES IMPORTED_LOCATION "${PAPI_SDE_LIBRARIES}")
-set_target_properties(PAPI_SDE PROPERTIES INTERFACE_LINK_LIBRARIES "${PAPI_SDE_LIBRARIES}")
-set_target_properties(PAPI_SDE PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${PAPI_SDE_INCLUDE_DIRS}")

From fad4621c5af3bb8cbc0a6a915477265008257471 Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Tue, 8 Aug 2023 09:20:57 +0000
Subject: [PATCH 178/583] Format files

Co-authored-by: Terry Cojean <tcojean@users.noreply.github.com>
---
 core/test/log/papi.cpp           | 3 ++-
 include/ginkgo/core/log/papi.hpp | 9 +++------
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/core/test/log/papi.cpp b/core/test/log/papi.cpp
index d089902c30c..2ed266449f6 100644
--- a/core/test/log/papi.cpp
+++ b/core/test/log/papi.cpp
@@ -71,7 +71,8 @@ class Papi : public ::testing::Test {
         }
     }
 
-    void TearDown() {
+    void TearDown()
+    {
         logger = nullptr;
         PAPI_destroy_eventset(&eventset);
         PAPI_shutdown();
diff --git a/include/ginkgo/core/log/papi.hpp b/include/ginkgo/core/log/papi.hpp
index 9645e775b3d..bf22f7c876f 100644
--- a/include/ginkgo/core/log/papi.hpp
+++ b/include/ginkgo/core/log/papi.hpp
@@ -44,8 +44,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <iostream>
 #include <map>
 #include <mutex>
-
-
 #include <sde_lib.h>
 
 
@@ -210,7 +208,7 @@ class Papi : public Logger {
     create(std::shared_ptr<const gko::Executor>,
            const Logger::mask_type& enabled_events = Logger::all_events_mask)
     {
-        return std::shared_ptr<Papi>(new Papi(enabled_events), [](auto logger){
+        return std::shared_ptr<Papi>(new Papi(enabled_events), [](auto logger) {
             papi_sde_shutdown(logger->get_handle());
             delete logger;
         });
@@ -224,11 +222,10 @@ class Papi : public Logger {
     static std::shared_ptr<Papi> create(
         const Logger::mask_type& enabled_events = Logger::all_events_mask)
     {
-        return std::shared_ptr<Papi>(new Papi(enabled_events), [](auto logger){
+        return std::shared_ptr<Papi>(new Papi(enabled_events), [](auto logger) {
             papi_sde_shutdown(logger->get_handle());
             delete logger;
-        }
-);
+        });
     }
 
     /**

From 4320ad15d4430a60d434613524c3559336fc6688 Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Tue, 8 Aug 2023 17:57:40 +0200
Subject: [PATCH 179/583] Improve PAPI finder: store binaries, include SDE

---
 cmake/Modules/FindPAPI.cmake | 77 ++++++++++++++++++++++++++++++++++--
 1 file changed, 73 insertions(+), 4 deletions(-)

diff --git a/cmake/Modules/FindPAPI.cmake b/cmake/Modules/FindPAPI.cmake
index 95f26a24684..04962970e35 100644
--- a/cmake/Modules/FindPAPI.cmake
+++ b/cmake/Modules/FindPAPI.cmake
@@ -57,6 +57,7 @@ if(NOT PAPI_LIBRARY)
     select_library_configurations(PAPI)
 endif()
 
+set(WORK_DIR "${PROJECT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/FindPAPI")
 if(PAPI_INCLUDE_DIR)
     if(EXISTS "${PAPI_INCLUDE_DIR}/papi.h")
         file(STRINGS "${PAPI_INCLUDE_DIR}/papi.h" papi_version_str REGEX "^#define[\t ]+PAPI_VERSION[\t ]+.*")
@@ -70,7 +71,9 @@ if(PAPI_INCLUDE_DIR)
         # find the components
         enable_language(C)
         foreach(component IN LISTS PAPI_FIND_COMPONENTS)
-            file(WRITE "${PROJECT_BINARY_DIR}/papi_${component}_detect.c"
+            set(SRC_FILE "${WORK_DIR}/papi_${component}_detect.c")
+            set(BIN_FILE "${WORK_DIR}/papi_${component}_detect.bin")
+            file(WRITE "${SRC_FILE}"
                 "
                 #include <papi.h>
                 int main() {
@@ -78,17 +81,18 @@ if(PAPI_INCLUDE_DIR)
                  retval = PAPI_library_init(PAPI_VER_CURRENT);
                    if (retval != PAPI_VER_CURRENT && retval > 0)
                     return -1;
-                   if (PAPI_get_component_index(\"${component}\") < 0)
+                   if (PAPI_get_component_index(\"${component}\") == PAPI_ENOCMP)
                     return 0;
                    return 1;
                 }"
                 )
             try_run(PAPI_${component}_FOUND
                 gko_result_unused
-                "${PROJECT_BINARY_DIR}"
-                "${PROJECT_BINARY_DIR}/papi_${component}_detect.c"
+                "${WORK_DIR}"
+                "${SRC_FILE}"
                 CMAKE_FLAGS -DINCLUDE_DIRECTORIES=${PAPI_INCLUDE_DIR}
                 LINK_LIBRARIES ${PAPI_LIBRARY}
+                COPY_FILE ${BIN_FILE}
                 )
 
             if (NOT PAPI_${component}_FOUND EQUAL 1)
@@ -105,6 +109,33 @@ find_package_handle_standard_args(PAPI
                                   VERSION_VAR PAPI_VERSION_STRING
                                   HANDLE_COMPONENTS)
 
+if(PAPI_sde_FOUND)
+    # PAPI SDE is another library and header, let's try to find them
+    find_path(PAPI_SDE_INCLUDE_DIR NAMES sde_lib.h)
+    mark_as_advanced(PAPI_SDE_INCLUDE_DIR)
+
+    if(NOT PAPI_SDE_LIBRARY)
+        find_library(PAPI_SDE_LIBRARY_RELEASE NAMES
+            sde
+        )
+        mark_as_advanced(PAPI_SDE_LIBRARY_RELEASE)
+
+        find_library(PAPI_SDE_LIBRARY_DEBUG NAMES
+            sded
+            sde-d
+        )
+        mark_as_advanced(PAPI_SDE_LIBRARY_DEBUG)
+
+        include(SelectLibraryConfigurations)
+        select_library_configurations(PAPI_SDE)
+    endif()
+
+    # FIXME: with CMake>=3.17, use NAME_MISMATCHED to get rid of the warning
+    find_package_handle_standard_args(PAPI_SDE
+        REQUIRED_VARS PAPI_SDE_LIBRARY PAPI_SDE_INCLUDE_DIR
+        VERSION_VAR PAPI_VERSION_STRING)
+endif()
+
 if(PAPI_FOUND)
     set(PAPI_LIBRARIES ${PAPI_LIBRARY})
     set(PAPI_INCLUDE_DIRS ${PAPI_INCLUDE_DIR})
@@ -142,3 +173,41 @@ if(PAPI_FOUND)
         endif()
     endif()
 endif()
+
+if (PAPI_SDE_FOUND AND NOT TARGET PAPI::PAPI_SDE)
+    set(PAPI_SDE_LIBRARIES ${PAPI_SDE_LIBRARY})
+    set(PAPI_SDE_INCLUDE_DIRS ${PAPI_SDE_INCLUDE_DIR})
+    unset(PAPI_SDE_LIBRARY)
+    unset(PAPI_SDE_INCLUDE_DIR)
+
+    if(NOT TARGET PAPI::PAPI_SDE)
+        add_library(PAPI::PAPI_SDE UNKNOWN IMPORTED)
+        set_target_properties(PAPI::PAPI_SDE PROPERTIES
+            INTERFACE_INCLUDE_DIRECTORIES "${PAPI_SDE_INCLUDE_DIRS}")
+
+        if(EXISTS "${PAPI_SDE_LIBRARIES}")
+            set_target_properties(PAPI::PAPI_SDE PROPERTIES
+                IMPORTED_LINK_INTERFACE_LANGUAGES "C"
+                INTERFACE_LINK_LIBRARIES "${PAPI_SDE_LIBRARIES}"
+                IMPORTED_LOCATION "${PAPI_SDE_LIBRARIES}")
+        endif()
+        if(PAPI_SDE_LIBRARY_RELEASE)
+            set_property(TARGET PAPI::PAPI_SDE APPEND PROPERTY
+                IMPORTED_CONFIGURATIONS RELEASE)
+            set_target_properties(PAPI::PAPI_SDE PROPERTIES
+                IMPORTED_LINK_INTERFACE_LANGUAGES "C"
+                INTERFACE_LINK_LIBRARIES_RELEASE "${PAPI_SDE_LIBRARY_RELEASE}"
+                IMPORTED_LOCATION_RELEASE "${PAPI_SDE_LIBRARY_RELEASE}")
+            unset(PAPI_SDE_LIBRARY_RELEASE)
+        endif()
+        if(PAPI_SDE_LIBRARY_DEBUG)
+            set_property(TARGET PAPI::PAPI_SDE APPEND PROPERTY
+                IMPORTED_CONFIGURATIONS DEBUG)
+            set_target_properties(PAPI::PAPI_SDE PROPERTIES
+                IMPORTED_LINK_INTERFACE_LANGUAGES "C"
+                INTERFACE_LINK_LIBRARIES_DEBUG "${PAPI_SDE_LIBRARY_DEBUG}"
+                IMPORTED_LOCATION_DEBUG "${PAPI_SDE_LIBRARY_DEBUG}")
+            unset(PAPI_SDE_LIBRARY_DEBUG)
+        endif()
+    endif()
+endif()

From 03577c3a6fe423ab8929075699982e80073861c4 Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Tue, 8 Aug 2023 17:58:02 +0200
Subject: [PATCH 180/583] Also drop bundled hwloc. Rely on system only.

---
 CMakeLists.txt                   | 18 ++++++++++--------
 third_party/CMakeLists.txt       |  4 ----
 third_party/hwloc/CMakeLists.txt | 20 --------------------
 3 files changed, 10 insertions(+), 32 deletions(-)
 delete mode 100644 third_party/hwloc/CMakeLists.txt

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1d18b18d00a..94e0c6318ae 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -77,7 +77,7 @@ option(BUILD_SHARED_LIBS "Build shared (.so, .dylib, .dll) libraries" ON)
 if(MSVC OR WIN32 OR CYGWIN OR APPLE)
     option(GINKGO_BUILD_HWLOC "Build Ginkgo with HWLOC. Default is OFF. Ginkgo does not support HWLOC on Windows/MacOS" OFF)
 else()
-    option(GINKGO_BUILD_HWLOC "Build Ginkgo with HWLOC. Default is ON. If a system HWLOC is not found, then we try to build it ourselves. Switch this OFF to disable HWLOC." ON)
+    option(GINKGO_BUILD_HWLOC "Build Ginkgo with HWLOC. Default is ON. A system HWLOC is required, otherwise HWLOC support will be disabled." ON)
 endif()
 option(GINKGO_BUILD_PAPI_SDE "Build Ginkgo with PAPI SDE. Default is ON. Requires a system package." ON)
 option(GINKGO_DPCPP_SINGLE_MODE "Do not compile double kernels for the DPC++ backend." OFF)
@@ -226,12 +226,6 @@ if(GINKGO_BUILD_HWLOC AND (MSVC OR WIN32 OR CYGWIN OR APPLE))
     set(GINKGO_BUILD_HWLOC OFF CACHE BOOL "Build Ginkgo with HWLOC. Default is OFF. Ginkgo does not support HWLOC on Windows/MacOS" FORCE)
     message(WARNING "Ginkgo does not support HWLOC on Windows/MacOS, switch GINKGO_BUILD_HWLOC to OFF")
 endif()
-if(GINKGO_BUILD_HWLOC)
-    set(GINKGO_HAVE_HWLOC 1)
-else()
-    set(GINKGO_HAVE_HWLOC 0)
-    message(STATUS "HWLOC is being forcibly switched off")
-endif()
 
 set(GINKGO_HAVE_GPU_AWARE_MPI OFF)
 set(GINKGO_FORCE_SPMV_BLOCKING_COMM OFF)
@@ -269,8 +263,14 @@ if(GINKGO_BUILD_BENCHMARKS)
     find_package(gflags 2.2.2 QUIET)
     find_package(RapidJSON 1.1.0 QUIET)
 endif()
+
+# System provided, third party libraries (not bundled!)
+set(GINKGO_HAVE_HWLOC 0)
 if(GINKGO_BUILD_HWLOC)
-    find_package(HWLOC 2.1) # No need for QUIET as we ship FindHWLOC
+    find_package(HWLOC 2.1)
+    if (HWLOC_FOUND)
+        set(GINKGO_HAVE_HWLOC 1)
+    endif()
 endif()
 set(GINKGO_HAVE_PAPI_SDE 0)
 if(GINKGO_BUILD_PAPI_SDE)
@@ -279,6 +279,8 @@ if(GINKGO_BUILD_PAPI_SDE)
         set(GINKGO_HAVE_PAPI_SDE 1)
     endif()
 endif()
+
+# Bundled third party libraries
 add_subdirectory(third_party)    # Third-party tools and libraries
 
 if(MSVC)
diff --git a/third_party/CMakeLists.txt b/third_party/CMakeLists.txt
index c714a51c187..a54d4d506ee 100644
--- a/third_party/CMakeLists.txt
+++ b/third_party/CMakeLists.txt
@@ -3,10 +3,6 @@ if(GINKGO_BUILD_TESTS AND (NOT GTest_FOUND))
     add_subdirectory(gtest)
 endif()
 
-if(GINKGO_BUILD_HWLOC AND (NOT HWLOC_FOUND))
-    add_subdirectory(hwloc)
-endif()
-
 if(GINKGO_DEVEL_TOOLS)
     set(GCF_IGNORE_LIST "third_party" CACHE STRING "Ignore directories for GCF")
     add_subdirectory(git-cmake-format)
diff --git a/third_party/hwloc/CMakeLists.txt b/third_party/hwloc/CMakeLists.txt
deleted file mode 100644
index 5534c07f4f2..00000000000
--- a/third_party/hwloc/CMakeLists.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-message(STATUS "Configuring and building HWLOC")
-set(TPL_HWLOC_PATH "${PROJECT_BINARY_DIR}/third_party/hwloc/src")
-ginkgo_load_and_configure_package(hwloc_external "https://download.open-mpi.org/release/hwloc/v2.4/hwloc-2.4.1.tar.gz"
-    "SHA1=b94950e8958e1125ca75ecac0bc0259ee3d108c4"
-    "${TPL_HWLOC_PATH}"
-    "./configure" "--prefix=${TPL_HWLOC_PATH}/install"
-        "--disable-nvml" "--disable-cuda" "--disable-rsmi"
-    )
-
-add_library(hwloc SHARED IMPORTED GLOBAL)
-add_dependencies(hwloc hwloc_external)
-# NOTE: if changing this (e.g. to `.a`), please update the special case in
-# `cmake/information_helpers.cmake`
-set(HWLOC_LIBRARIES "${TPL_HWLOC_PATH}/install/lib/libhwloc.so"
-    CACHE FILEPATH "The path to HWLOC library libhwloc.so" FORCE)
-set(HWLOC_INCLUDE_DIRS "${TPL_HWLOC_PATH}/install/include"
-    CACHE PATH "The directory containing the hwloc header, hwloc.h" FORCE)
-set_target_properties(hwloc PROPERTIES IMPORTED_LOCATION ${HWLOC_LIBRARIES})
-set_target_properties(hwloc PROPERTIES INTERFACE_LINK_LIBRARIES ${HWLOC_LIBRARIES})
-set_target_properties(hwloc PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${HWLOC_INCLUDE_DIRS}")

From 58522a4e86808c432790c07677aeff8f9015c543 Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Tue, 8 Aug 2023 18:19:25 +0200
Subject: [PATCH 181/583] Remove non CMake external package management

---
 CMakeLists.txt                          |  1 -
 cmake/DownloadNonCMakeCMakeLists.txt.in | 27 -----------
 cmake/package_helpers.cmake             | 60 -------------------------
 3 files changed, 88 deletions(-)
 delete mode 100644 cmake/DownloadNonCMakeCMakeLists.txt.in
 delete mode 100644 cmake/package_helpers.cmake

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 94e0c6318ae..706006000c9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -255,7 +255,6 @@ if(GINKGO_BUILD_MPI)
 endif()
 
 # Try to find the third party packages before using our subdirectories
-include(cmake/package_helpers.cmake)
 if(GINKGO_BUILD_TESTS)
     find_package(GTest 1.10.0) # No need for QUIET as CMake ships FindGTest
 endif()
diff --git a/cmake/DownloadNonCMakeCMakeLists.txt.in b/cmake/DownloadNonCMakeCMakeLists.txt.in
deleted file mode 100644
index 55e2f833985..00000000000
--- a/cmake/DownloadNonCMakeCMakeLists.txt.in
+++ /dev/null
@@ -1,27 +0,0 @@
-cmake_minimum_required(VERSION 3.16)
-project(${package_name})
-
-include(ExternalProject)
-ExternalProject_Add(${package_name}
-    URL               "${package_url}"
-    URL_HASH          "${package_hash}"
-    DOWNLOAD_NO_PROGRESS TRUE
-    SOURCE_DIR        "${CMAKE_CURRENT_BINARY_DIR}/src"
-    CONFIGURE_COMMAND ""
-    BUILD_COMMAND     ""
-    INSTALL_COMMAND   ""
-    UPDATE_DISCONNECTED ${GINKGO_SKIP_DEPENDENCY_UPDATE}
-    )
-
-ExternalProject_Add_Step(${package_name} custom_configure
-    COMMAND "${config_command}" "${ARGN}"
-    WORKING_DIRECTORY "${working_dir}"
-    DEPENDEES download)
-ExternalProject_Add_Step(${package_name} custom_build
-    COMMAND make
-    WORKING_DIRECTORY "${working_dir}"
-    DEPENDEES custom_configure)
-ExternalProject_Add_Step(${package_name} custom_install
-    COMMAND make all install
-    WORKING_DIRECTORY "${working_dir}"
-    DEPENDEES custom_build)
diff --git a/cmake/package_helpers.cmake b/cmake/package_helpers.cmake
deleted file mode 100644
index 1abc1a72587..00000000000
--- a/cmake/package_helpers.cmake
+++ /dev/null
@@ -1,60 +0,0 @@
-set(NON_CMAKE_PACKAGE_DOWNLOADER_SCRIPT
-    "${CMAKE_CURRENT_LIST_DIR}/DownloadNonCMakeCMakeLists.txt.in")
-
-
-#   Load a package from the url provided and run configure (Non-CMake projects)
-#
-#   \param package_name     Name of the package
-#   \param package_url      Url of the package
-#   \param package_tag      Tag or version of the package to be downloaded.
-#   \param working_dir      The directory where the configure/build should happen.
-#   \param config_command   The command for the configuration step.
-#
-function(ginkgo_load_and_configure_package package_name package_url package_hash working_dir config_command)
-    set(GINKGO_THIRD_PARTY_BUILD_TYPE "Debug")
-    if (CMAKE_BUILD_TYPE MATCHES "[Rr][Ee][Ll][Ee][Aa][Ss][Ee]")
-        set(GINKGO_THIRD_PARTY_BUILD_TYPE "Release")
-    endif()
-    configure_file(${NON_CMAKE_PACKAGE_DOWNLOADER_SCRIPT}
-        download/CMakeLists.txt)
-    set(TOOLSET "")
-    if (NOT "${CMAKE_GENERATOR_TOOLSET}" STREQUAL "")
-        set(TOOLSET "-T${CMAKE_GENERATOR_TOOLSET}")
-    endif()
-    execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" "${TOOLSET}" .
-        RESULT_VARIABLE result
-        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/download)
-    if(result)
-        message(FATAL_ERROR
-            "CMake step for ${package_name}/download failed: ${result}")
-        return()
-    endif()
-    execute_process(COMMAND ${CMAKE_COMMAND} --build .
-        RESULT_VARIABLE result
-        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/download)
-    if(result)
-        message(FATAL_ERROR
-            "Build step for ${package_name}/download failed: ${result}")
-        return()
-    endif()
-endfunction()
-
-
-#   Download a file and verify the download
-#
-#   \param url          The url of file to be downloaded
-#   \param filename     The name of the file
-#   \param hash_type    The type of hash, See CMake file() documentation for more details.
-#   \param hash         The hash itself, See CMake file() documentation for more details.
-#
-function(ginkgo_download_file url filename hash_type hash)
-    file(DOWNLOAD ${url} ${filename}
-        TIMEOUT 60  # seconds
-        EXPECTED_HASH "${hash_type}=${hash}"
-        TLS_VERIFY ON)
-    if(EXISTS ${filename})
-        message(STATUS "${filename} downloaded from ${url}")
-    else()
-        message(FATAL_ERROR "Download of ${filename} failed.")
-    endif()
-endfunction(ginkgo_download_file)

From e4fd30ac66087f455708b77a6cbdbc619c4ee44c Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Tue, 8 Aug 2023 18:44:13 +0200
Subject: [PATCH 182/583] Also drop RPATH management for hwloc and PAPI

---
 cmake/information_helpers.cmake | 7 +------
 cmake/install_helpers.cmake     | 4 ----
 core/CMakeLists.txt             | 2 --
 3 files changed, 1 insertion(+), 12 deletions(-)

diff --git a/cmake/information_helpers.cmake b/cmake/information_helpers.cmake
index cef920a09ce..7ac7fdfeda5 100644
--- a/cmake/information_helpers.cmake
+++ b/cmake/information_helpers.cmake
@@ -78,12 +78,7 @@ macro(ginkgo_interface_libraries_recursively INTERFACE_LIBS)
             list(TRANSFORM GINKGO_LIBS_INTERFACE_LIBS REPLACE "\\$<LINK_ONLY:(.*)>" "\\1")
             ginkgo_interface_libraries_recursively("${GINKGO_LIBS_INTERFACE_LIBS}")
         elseif(EXISTS "${_libs}")
-            if ("${_libs}" MATCHES "${PROJECT_BINARY_DIR}.*(papi|sde|pfm|hwloc).so")
-                get_filename_component(_lib_name "${_libs}" NAME)
-                list(APPEND GINKGO_INTERFACE_LIBS_FOUND "${CMAKE_INSTALL_FULL_LIBDIR}/${_lib_name}")
-            else()
-                list(APPEND GINKGO_INTERFACE_LIBS_FOUND "${_libs}")
-            endif()
+            list(APPEND GINKGO_INTERFACE_LIBS_FOUND "${_libs}")
         elseif("${_libs}" STREQUAL "${CMAKE_DL_LIBS}")
             list(APPEND GINKGO_INTERFACE_LIBS_FOUND "-l${_libs}")
         endif()
diff --git a/cmake/install_helpers.cmake b/cmake/install_helpers.cmake
index 8bec34d7a41..601fc89a3db 100644
--- a/cmake/install_helpers.cmake
+++ b/cmake/install_helpers.cmake
@@ -30,10 +30,6 @@ function(ginkgo_add_install_rpath name)
     endif()
     if (GINKGO_INSTALL_RPATH_DEPENDENCIES)
         set(RPATH_DEPENDENCIES "${ARGN}")
-        if(GINKGO_HAVE_HWLOC AND HWLOC_FOUND)
-            get_filename_component(HWLOC_LIB_PATH ${HWLOC_LIBRARIES} DIRECTORY)
-            list(APPEND RPATH_DEPENDENCIES "${HWLOC_LIBRARIES}")
-        endif()
     endif()
     if (GINKGO_INSTALL_RPATH)
         set_property(TARGET "${name}" PROPERTY INSTALL_RPATH
diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
index e7c2bf7ce45..8ec4502d9c7 100644
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -114,8 +114,6 @@ set(GKO_RPATH_ADDITIONS "")
 
 if(GINKGO_HAVE_PAPI_SDE)
     target_link_libraries(ginkgo PUBLIC PAPI::PAPI_SDE)
-    get_filename_component(GKO_PAPI_LIBDIR "${PAPI_SDE_LIBRARIES}" DIRECTORY)
-    list(APPEND GKO_RPATH_ADDITIONS "${GKO_PAPI_LIBDIR}")
 endif()
 
 if(GINKGO_HAVE_TAU)

From 62f1ff75d4142ab865e203f86844a66170c2f024 Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Tue, 8 Aug 2023 18:52:26 +0200
Subject: [PATCH 183/583] Automatically detect HWLOC/PAPI. Notify force
 disabled.

---
 CMakeLists.txt                     | 21 +++++++++++----------
 cmake/autodetect_system_libs.cmake |  7 +++++++
 2 files changed, 18 insertions(+), 10 deletions(-)
 create mode 100644 cmake/autodetect_system_libs.cmake

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 706006000c9..3306f1b9ac7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -9,6 +9,7 @@ include(cmake/hip_path.cmake)
 include(cmake/autodetect_executors.cmake)
 
 list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/Modules/")
+include(cmake/autodetect_system_libs.cmake)
 
 # Ginkgo configuration options
 option(GINKGO_DEVEL_TOOLS "Add development tools to the build system" OFF)
@@ -77,9 +78,9 @@ option(BUILD_SHARED_LIBS "Build shared (.so, .dylib, .dll) libraries" ON)
 if(MSVC OR WIN32 OR CYGWIN OR APPLE)
     option(GINKGO_BUILD_HWLOC "Build Ginkgo with HWLOC. Default is OFF. Ginkgo does not support HWLOC on Windows/MacOS" OFF)
 else()
-    option(GINKGO_BUILD_HWLOC "Build Ginkgo with HWLOC. Default is ON. A system HWLOC is required, otherwise HWLOC support will be disabled." ON)
+    option(GINKGO_BUILD_HWLOC "Build Ginkgo with HWLOC. Enabled if a system installation is found." ${HWLOC_FOUND})
 endif()
-option(GINKGO_BUILD_PAPI_SDE "Build Ginkgo with PAPI SDE. Default is ON. Requires a system package." ON)
+option(GINKGO_BUILD_PAPI_SDE "Build Ginkgo with PAPI SDE. Enabled if a system installation is found." ${PAPI_SDE_FOUND})
 option(GINKGO_DPCPP_SINGLE_MODE "Do not compile double kernels for the DPC++ backend." OFF)
 option(GINKGO_INSTALL_RPATH "Set the RPATH when installing its libraries." ON)
 option(GINKGO_INSTALL_RPATH_ORIGIN "Add $ORIGIN (Linux) or @loader_path (MacOS) to the installation RPATH." ON)
@@ -190,13 +191,6 @@ endif()
 include(CheckIncludeFileCXX)
 check_include_file_cxx(cxxabi.h GKO_HAVE_CXXABI_H)
 
-# Automatically find PAPI and search for the required 'sde' component
-set(GINKGO_HAVE_PAPI_SDE 0)
-find_package(PAPI OPTIONAL_COMPONENTS sde)
-if(PAPI_sde_FOUND)
-    set(GINKGO_HAVE_PAPI_SDE 1)
-endif()
-
 # Automatically find TAU
 set(GINKGO_HAVE_TAU 0)
 find_package(PerfStubs QUIET)
@@ -269,13 +263,20 @@ if(GINKGO_BUILD_HWLOC)
     find_package(HWLOC 2.1)
     if (HWLOC_FOUND)
         set(GINKGO_HAVE_HWLOC 1)
+    else()
+        message(WARNING "HWLOC could not be found. HWLOC support will be disabled.")
+        set(GINKGO_BUILD_HWLOC OFF CACHE BOOL "HWLOC support was disabled because a system package could not be found." FORCE)
     endif()
 endif()
+
 set(GINKGO_HAVE_PAPI_SDE 0)
 if(GINKGO_BUILD_PAPI_SDE)
     find_package(PAPI 7.0.1.0 COMPONENTS sde)
-    if (PAPI_FOUND AND PAPI_SDE_FOUND)
+    if (PAPI_SDE_FOUND)
         set(GINKGO_HAVE_PAPI_SDE 1)
+    else()
+        message(WARNING "PAPI (SDE) could not be found. PAPI_SDE support will be disabled.")
+        set(GINKGO_BUILD_PAPI_SDE OFF CACHE BOOL "PAPI_SDE support was disabled because a system package could not be found." FORCE)
     endif()
 endif()
 
diff --git a/cmake/autodetect_system_libs.cmake b/cmake/autodetect_system_libs.cmake
new file mode 100644
index 00000000000..6f59a759aa8
--- /dev/null
+++ b/cmake/autodetect_system_libs.cmake
@@ -0,0 +1,7 @@
+if (NOT DEFINED GINKGO_BUILD_HWLOC)
+    find_package(HWLOC 2.1)
+endif()
+
+if (NOT DEFINED GINKGO_BUILD_PAPI_SDE)
+    find_package(PAPI 7.0.1.0 COMPONENTS sde)
+endif()

From b22c0452af0bcc089e461e275698a025ccab838c Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Mon, 14 Aug 2023 17:08:50 +0200
Subject: [PATCH 184/583] set the pthread preference first

---
 CMakeLists.txt                      | 4 +++-
 cmake/GinkgoConfig.cmake.in         | 3 +++
 core/base/mixed_precision_types.hpp | 2 +-
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3306f1b9ac7..26bc992c457 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,6 +3,8 @@ cmake_minimum_required(VERSION 3.16)
 project(Ginkgo LANGUAGES C CXX VERSION 1.7.0 DESCRIPTION "A numerical linear algebra library targeting many-core architectures")
 set(Ginkgo_VERSION_TAG "master")
 set(PROJECT_VERSION_TAG ${Ginkgo_VERSION_TAG})
+# Cuda and Hip also look for Threads. Set it before any find_package to ensure the Threads setting is not changed.
+set(THREADS_PREFER_PTHREAD_FLAG ON)
 
 # Determine which modules can be compiled
 include(cmake/hip_path.cmake)
@@ -98,7 +100,7 @@ endif()
 if(GINKGO_BUILD_OMP)
     find_package(OpenMP 3.0 REQUIRED)
 endif()
-set(THREADS_PREFER_PTHREAD_FLAG ON)
+
 find_package(Threads REQUIRED)
 include(cmake/build_type_helpers.cmake)
 
diff --git a/cmake/GinkgoConfig.cmake.in b/cmake/GinkgoConfig.cmake.in
index 0d7ce5455f1..352cf1dde8d 100644
--- a/cmake/GinkgoConfig.cmake.in
+++ b/cmake/GinkgoConfig.cmake.in
@@ -139,6 +139,9 @@ set(GINKGO_HAVE_VTUNE "@GINKGO_HAVE_VTUNE@")
 set(GINKGO_HAVE_METIS "@GINKGO_HAVE_METIS@")
 set(VTune_PATH "@VTune_PATH@")
 
+# ensure Threads settings 
+set(THREADS_PREFER_PTHREAD_FLAG ON)
+
 # NOTE: we do not export benchmarks, examples, tests or devel tools
 #     so `third_party` libraries are currently unneeded.
 
diff --git a/core/base/mixed_precision_types.hpp b/core/base/mixed_precision_types.hpp
index 9579caaac4f..b5c1e37569b 100644
--- a/core/base/mixed_precision_types.hpp
+++ b/core/base/mixed_precision_types.hpp
@@ -75,7 +75,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #else
 
 #define GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT1(_macro, ...) \
-    template _macro(float, float, float, __VA_ARGS__);
+    template _macro(float, float, float, __VA_ARGS__)
 
 #define GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT2(_macro, ...) \
     template _macro(double, double, double, __VA_ARGS__)

From a4b91c65cf59de4a85b92424618b93d3a5b3bc12 Mon Sep 17 00:00:00 2001
From: Gregor Olenik <go@hpsim.de>
Date: Wed, 23 Feb 2022 13:43:03 +0100
Subject: [PATCH 185/583] add build_from_local_range to partition

---
 core/distributed/partition.cpp                | 35 ++++++++++++++++++-
 include/ginkgo/core/distributed/partition.hpp | 16 +++++++++
 2 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/core/distributed/partition.cpp b/core/distributed/partition.cpp
index a1db99396e7..ac8f6c7fe28 100644
--- a/core/distributed/partition.cpp
+++ b/core/distributed/partition.cpp
@@ -32,7 +32,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/distributed/partition.hpp>
 
-
 #include "core/distributed/partition_kernels.hpp"
 
 
@@ -90,6 +89,40 @@ Partition<LocalIndexType, GlobalIndexType>::build_from_contiguous(
 }
 
 
+template <typename LocalIndexType, typename GlobalIndexType>
+std::unique_ptr<Partition<LocalIndexType, GlobalIndexType>>
+Partition<LocalIndexType, GlobalIndexType>::build_from_local_range(
+    std::shared_ptr<const Executor> exec, LocalIndexType local_start,
+    LocalIndexType local_end, std::shared_ptr<const communicator> comm)
+{
+    GlobalIndexType range[2] = {static_cast<GlobalIndexType>(local_start),
+                                static_cast<GlobalIndexType>(local_end)};
+
+    // make all range_ends available on each rank
+    Array<GlobalIndexType> ranges_start_end(exec->get_master(),
+                                            comm->size() * 2);
+    ranges_start_end.fill(0);
+    // comm->all_gather(range, 2, ranges_start_end.get_data(), 2);
+    mpi::all_gather(range, 2, ranges_start_end.get_data(), 2, comm);
+
+    // remove duplicates
+    Array<GlobalIndexType> ranges(exec->get_master(), comm->size() + 1);
+    auto ranges_se_data = ranges_start_end.get_const_data();
+    ranges.get_data()[0] = ranges_se_data[0];
+    for (int i = 1; i < ranges_start_end.get_num_elems() - 1; i += 2) {
+        GKO_ASSERT_EQ(ranges_se_data[i], ranges_se_data[i + 1]);
+        ranges.get_data()[i / 2 + 1] = ranges_se_data[i];
+    }
+    ranges.get_data()[ranges.get_num_elems() - 1] =
+        ranges_se_data[ranges_start_end.get_num_elems() - 1];
+
+    // move data to correct executor
+    ranges.set_executor(exec);
+
+    return Partition::build_from_contiguous(exec, ranges);
+}
+
+
 template <typename LocalIndexType, typename GlobalIndexType>
 std::unique_ptr<Partition<LocalIndexType, GlobalIndexType>>
 Partition<LocalIndexType, GlobalIndexType>::build_from_global_size_uniform(
diff --git a/include/ginkgo/core/distributed/partition.hpp b/include/ginkgo/core/distributed/partition.hpp
index 0096edf999c..c753c1beb3d 100644
--- a/include/ginkgo/core/distributed/partition.hpp
+++ b/include/ginkgo/core/distributed/partition.hpp
@@ -35,6 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/mpi.hpp>
 #include <ginkgo/core/base/polymorphic_object.hpp>
 #include <ginkgo/core/base/types.hpp>
 
@@ -285,6 +286,21 @@ class Partition
         std::shared_ptr<const Executor> exec, comm_index_type num_parts,
         global_index_type global_size);
 
+    /**
+     * Builds a partition from the local range
+     *
+     * @param exec  the Executor on which the partition should be built
+     * @param local_start the start index of the local range
+     * @param local_end the end index of the local range
+     *
+     * @return a Partition where each range has the individual local_start
+     * and local_ends
+     */
+    static std::unique_ptr<Partition> build_from_local_range(
+        std::shared_ptr<const Executor> exec, local_index_type local_start,
+        local_index_type local_end,
+        std::shared_ptr<const mpi::communicator> comm);
+
 private:
     /**
      * Creates a partition stored on the given executor with the given number of

From f3aa670260f6b6bbf7d388cbcb9e48a29a26cbe8 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Wed, 23 Feb 2022 14:40:18 +0100
Subject: [PATCH 186/583] move build_from_local_range into its own header

---
 core/CMakeLists.txt                           |  1 +
 core/distributed/partition.cpp                | 34 --------
 core/distributed/partition_helpers.cpp        | 86 +++++++++++++++++++
 include/ginkgo/core/distributed/partition.hpp | 16 ----
 .../core/distributed/partition_helpers.hpp    | 78 +++++++++++++++++
 include/ginkgo/ginkgo.hpp                     |  1 +
 6 files changed, 166 insertions(+), 50 deletions(-)
 create mode 100644 core/distributed/partition_helpers.cpp
 create mode 100644 include/ginkgo/core/distributed/partition_helpers.hpp

diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
index 8ec4502d9c7..7932976d6c9 100644
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -95,6 +95,7 @@ if(GINKGO_BUILD_MPI)
         PRIVATE
         mpi/exception.cpp
         distributed/matrix.cpp
+        distributed/partition_helpers.cpp
         distributed/vector.cpp
         distributed/preconditioner/schwarz.cpp)
 endif()
diff --git a/core/distributed/partition.cpp b/core/distributed/partition.cpp
index ac8f6c7fe28..c6e5bfc5fe0 100644
--- a/core/distributed/partition.cpp
+++ b/core/distributed/partition.cpp
@@ -89,40 +89,6 @@ Partition<LocalIndexType, GlobalIndexType>::build_from_contiguous(
 }
 
 
-template <typename LocalIndexType, typename GlobalIndexType>
-std::unique_ptr<Partition<LocalIndexType, GlobalIndexType>>
-Partition<LocalIndexType, GlobalIndexType>::build_from_local_range(
-    std::shared_ptr<const Executor> exec, LocalIndexType local_start,
-    LocalIndexType local_end, std::shared_ptr<const communicator> comm)
-{
-    GlobalIndexType range[2] = {static_cast<GlobalIndexType>(local_start),
-                                static_cast<GlobalIndexType>(local_end)};
-
-    // make all range_ends available on each rank
-    Array<GlobalIndexType> ranges_start_end(exec->get_master(),
-                                            comm->size() * 2);
-    ranges_start_end.fill(0);
-    // comm->all_gather(range, 2, ranges_start_end.get_data(), 2);
-    mpi::all_gather(range, 2, ranges_start_end.get_data(), 2, comm);
-
-    // remove duplicates
-    Array<GlobalIndexType> ranges(exec->get_master(), comm->size() + 1);
-    auto ranges_se_data = ranges_start_end.get_const_data();
-    ranges.get_data()[0] = ranges_se_data[0];
-    for (int i = 1; i < ranges_start_end.get_num_elems() - 1; i += 2) {
-        GKO_ASSERT_EQ(ranges_se_data[i], ranges_se_data[i + 1]);
-        ranges.get_data()[i / 2 + 1] = ranges_se_data[i];
-    }
-    ranges.get_data()[ranges.get_num_elems() - 1] =
-        ranges_se_data[ranges_start_end.get_num_elems() - 1];
-
-    // move data to correct executor
-    ranges.set_executor(exec);
-
-    return Partition::build_from_contiguous(exec, ranges);
-}
-
-
 template <typename LocalIndexType, typename GlobalIndexType>
 std::unique_ptr<Partition<LocalIndexType, GlobalIndexType>>
 Partition<LocalIndexType, GlobalIndexType>::build_from_global_size_uniform(
diff --git a/core/distributed/partition_helpers.cpp b/core/distributed/partition_helpers.cpp
new file mode 100644
index 00000000000..8fdd6cc0634
--- /dev/null
+++ b/core/distributed/partition_helpers.cpp
@@ -0,0 +1,86 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/distributed/partition.hpp>
+#include <ginkgo/core/distributed/partition_helpers.hpp>
+
+
+namespace gko {
+namespace distributed {
+
+
+template <typename LocalIndexType, typename GlobalIndexType>
+std::unique_ptr<Partition<LocalIndexType, GlobalIndexType>>
+build_partition_from_local_range(std::shared_ptr<const Executor> exec,
+                                 LocalIndexType local_start,
+                                 LocalIndexType local_end,
+                                 mpi::communicator comm)
+{
+    GlobalIndexType range[2] = {static_cast<GlobalIndexType>(local_start),
+                                static_cast<GlobalIndexType>(local_end)};
+
+    // make all range_ends available on each rank
+    Array<GlobalIndexType> ranges_start_end(exec->get_master(),
+                                            comm.size() * 2);
+    ranges_start_end.fill(0);
+    comm.all_gather(range, 2, ranges_start_end.get_data(), 2);
+
+    // remove duplicates
+    Array<GlobalIndexType> ranges(exec->get_master(), comm.size() + 1);
+    auto ranges_se_data = ranges_start_end.get_const_data();
+    ranges.get_data()[0] = ranges_se_data[0];
+    for (int i = 1; i < ranges_start_end.get_num_elems() - 1; i += 2) {
+        GKO_ASSERT_EQ(ranges_se_data[i], ranges_se_data[i + 1]);
+        ranges.get_data()[i / 2 + 1] = ranges_se_data[i];
+    }
+    ranges.get_data()[ranges.get_num_elems() - 1] =
+        ranges_se_data[ranges_start_end.get_num_elems() - 1];
+
+    // move data to correct executor
+    ranges.set_executor(exec);
+
+    return Partition<LocalIndexType, GlobalIndexType>::build_from_contiguous(
+        exec, ranges);
+}
+
+#define GKO_DECLARE_BUILD_PARTITION_FROM_LOCAL_RANGE(_local_type,      \
+                                                     _global_type)     \
+    std::unique_ptr<Partition<_local_type, _global_type>>              \
+    build_partition_from_local_range(                                  \
+        std::shared_ptr<const Executor> exec, _local_type local_start, \
+        _local_type local_end, mpi::communicator comm)
+GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE(
+    GKO_DECLARE_BUILD_PARTITION_FROM_LOCAL_RANGE);
+
+
+}  // namespace distributed
+}  // namespace gko
diff --git a/include/ginkgo/core/distributed/partition.hpp b/include/ginkgo/core/distributed/partition.hpp
index c753c1beb3d..0096edf999c 100644
--- a/include/ginkgo/core/distributed/partition.hpp
+++ b/include/ginkgo/core/distributed/partition.hpp
@@ -35,7 +35,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/array.hpp>
-#include <ginkgo/core/base/mpi.hpp>
 #include <ginkgo/core/base/polymorphic_object.hpp>
 #include <ginkgo/core/base/types.hpp>
 
@@ -286,21 +285,6 @@ class Partition
         std::shared_ptr<const Executor> exec, comm_index_type num_parts,
         global_index_type global_size);
 
-    /**
-     * Builds a partition from the local range
-     *
-     * @param exec  the Executor on which the partition should be built
-     * @param local_start the start index of the local range
-     * @param local_end the end index of the local range
-     *
-     * @return a Partition where each range has the individual local_start
-     * and local_ends
-     */
-    static std::unique_ptr<Partition> build_from_local_range(
-        std::shared_ptr<const Executor> exec, local_index_type local_start,
-        local_index_type local_end,
-        std::shared_ptr<const mpi::communicator> comm);
-
 private:
     /**
      * Creates a partition stored on the given executor with the given number of
diff --git a/include/ginkgo/core/distributed/partition_helpers.hpp b/include/ginkgo/core/distributed/partition_helpers.hpp
new file mode 100644
index 00000000000..4439d8311e0
--- /dev/null
+++ b/include/ginkgo/core/distributed/partition_helpers.hpp
@@ -0,0 +1,78 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_PUBLIC_CORE_DISTRIBUTED_PARTITION_HELPERS_HPP_
+#define GKO_PUBLIC_CORE_DISTRIBUTED_PARTITION_HELPERS_HPP_
+
+
+#include <ginkgo/config.hpp>
+
+
+#if GINKGO_BUILD_MPI
+
+
+#include <ginkgo/core/base/mpi.hpp>
+
+
+namespace gko {
+namespace distributed {
+
+template <typename LocalIndexType, typename GlobalIndexType>
+class Partition;
+
+
+/**
+ * Builds a partition from the local range
+ *
+ * @param exec  the Executor on which the partition should be built
+ * @param local_start the start index of the local range
+ * @param local_end the end index of the local range
+ *
+ * @return a Partition where each range has the individual local_start
+ * and local_ends
+ */
+template <typename LocalIndexType, typename GlobalIndexType>
+std::unique_ptr<Partition<LocalIndexType, GlobalIndexType>>
+build_partition_from_local_range(std::shared_ptr<const Executor> exec,
+                                 LocalIndexType local_start,
+                                 LocalIndexType local_end,
+                                 mpi::communicator comm);
+
+
+}  // namespace distributed
+}  // namespace gko
+
+
+#endif
+
+
+#endif  // GKO_PUBLIC_CORE_DISTRIBUTED_PARTITION_HELPERS_HPP_
diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp
index 179a8a01a46..594ad880b8c 100644
--- a/include/ginkgo/ginkgo.hpp
+++ b/include/ginkgo/ginkgo.hpp
@@ -82,6 +82,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/distributed/lin_op.hpp>
 #include <ginkgo/core/distributed/matrix.hpp>
 #include <ginkgo/core/distributed/partition.hpp>
+#include <ginkgo/core/distributed/partition_helpers.hpp>
 #include <ginkgo/core/distributed/polymorphic_object.hpp>
 
 #include <ginkgo/core/distributed/preconditioner/schwarz.hpp>

From bb844ffd4767b6b1ef65d2506d12ec1cf80604e1 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Mon, 5 Dec 2022 11:47:58 +0100
Subject: [PATCH 187/583] fixups after rebase

---
 core/distributed/partition_helpers.cpp               |  5 ++++-
 .../ginkgo/core/distributed/partition_helpers.hpp    | 12 +++++++-----
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/core/distributed/partition_helpers.cpp b/core/distributed/partition_helpers.cpp
index 8fdd6cc0634..24f129db36f 100644
--- a/core/distributed/partition_helpers.cpp
+++ b/core/distributed/partition_helpers.cpp
@@ -35,6 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 namespace gko {
+namespace experimental {
 namespace distributed {
 
 
@@ -52,7 +53,8 @@ build_partition_from_local_range(std::shared_ptr<const Executor> exec,
     Array<GlobalIndexType> ranges_start_end(exec->get_master(),
                                             comm.size() * 2);
     ranges_start_end.fill(0);
-    comm.all_gather(range, 2, ranges_start_end.get_data(), 2);
+    comm.all_gather(exec->get_master(), range, 2, ranges_start_end.get_data(),
+                    2);
 
     // remove duplicates
     Array<GlobalIndexType> ranges(exec->get_master(), comm.size() + 1);
@@ -83,4 +85,5 @@ GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE(
 
 
 }  // namespace distributed
+}  // namespace experimental
 }  // namespace gko
diff --git a/include/ginkgo/core/distributed/partition_helpers.hpp b/include/ginkgo/core/distributed/partition_helpers.hpp
index 4439d8311e0..1433953c738 100644
--- a/include/ginkgo/core/distributed/partition_helpers.hpp
+++ b/include/ginkgo/core/distributed/partition_helpers.hpp
@@ -44,6 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 namespace gko {
+namespace experimental {
 namespace distributed {
 
 template <typename LocalIndexType, typename GlobalIndexType>
@@ -51,14 +52,14 @@ class Partition;
 
 
 /**
- * Builds a partition from the local range
+ * Builds a partition from a local range.
  *
- * @param exec  the Executor on which the partition should be built
- * @param local_start the start index of the local range
- * @param local_end the end index of the local range
+ * @param exec  the Executor on which the partition should be built.
+ * @param local_start the start index of the local range.
+ * @param local_end the end index of the local range.
  *
  * @return a Partition where each range has the individual local_start
- * and local_ends
+ *         and local_ends.
  */
 template <typename LocalIndexType, typename GlobalIndexType>
 std::unique_ptr<Partition<LocalIndexType, GlobalIndexType>>
@@ -69,6 +70,7 @@ build_partition_from_local_range(std::shared_ptr<const Executor> exec,
 
 
 }  // namespace distributed
+}  // namespace experimental
 }  // namespace gko
 
 

From 149e479d2ca6a11c81eed3375ca16de4198aa7e9 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Mon, 5 Dec 2022 11:53:33 +0100
Subject: [PATCH 188/583] use span for local range

---
 core/distributed/partition_helpers.cpp        | 21 ++++++++-----------
 .../core/distributed/partition_helpers.hpp    |  5 ++---
 2 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/core/distributed/partition_helpers.cpp b/core/distributed/partition_helpers.cpp
index 24f129db36f..2f764c2c478 100644
--- a/core/distributed/partition_helpers.cpp
+++ b/core/distributed/partition_helpers.cpp
@@ -42,14 +42,12 @@ namespace distributed {
 template <typename LocalIndexType, typename GlobalIndexType>
 std::unique_ptr<Partition<LocalIndexType, GlobalIndexType>>
 build_partition_from_local_range(std::shared_ptr<const Executor> exec,
-                                 LocalIndexType local_start,
-                                 LocalIndexType local_end,
-                                 mpi::communicator comm)
+                                 span local_range, mpi::communicator comm)
 {
-    GlobalIndexType range[2] = {static_cast<GlobalIndexType>(local_start),
-                                static_cast<GlobalIndexType>(local_end)};
+    GlobalIndexType range[2] = {static_cast<GlobalIndexType>(local_range.begin),
+                                static_cast<GlobalIndexType>(local_range.end)};
 
-    // make all range_ends available on each rank
+    // make all range_start_ends available on each rank
     Array<GlobalIndexType> ranges_start_end(exec->get_master(),
                                             comm.size() * 2);
     ranges_start_end.fill(0);
@@ -74,12 +72,11 @@ build_partition_from_local_range(std::shared_ptr<const Executor> exec,
         exec, ranges);
 }
 
-#define GKO_DECLARE_BUILD_PARTITION_FROM_LOCAL_RANGE(_local_type,      \
-                                                     _global_type)     \
-    std::unique_ptr<Partition<_local_type, _global_type>>              \
-    build_partition_from_local_range(                                  \
-        std::shared_ptr<const Executor> exec, _local_type local_start, \
-        _local_type local_end, mpi::communicator comm)
+#define GKO_DECLARE_BUILD_PARTITION_FROM_LOCAL_RANGE(_local_type,          \
+                                                     _global_type)         \
+    std::unique_ptr<Partition<_local_type, _global_type>>                  \
+    build_partition_from_local_range(std::shared_ptr<const Executor> exec, \
+                                     span local_range, mpi::communicator comm)
 GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE(
     GKO_DECLARE_BUILD_PARTITION_FROM_LOCAL_RANGE);
 
diff --git a/include/ginkgo/core/distributed/partition_helpers.hpp b/include/ginkgo/core/distributed/partition_helpers.hpp
index 1433953c738..01bc1cc1a18 100644
--- a/include/ginkgo/core/distributed/partition_helpers.hpp
+++ b/include/ginkgo/core/distributed/partition_helpers.hpp
@@ -41,6 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/mpi.hpp>
+#include "ginkgo/core/base/range.hpp"
 
 
 namespace gko {
@@ -64,9 +65,7 @@ class Partition;
 template <typename LocalIndexType, typename GlobalIndexType>
 std::unique_ptr<Partition<LocalIndexType, GlobalIndexType>>
 build_partition_from_local_range(std::shared_ptr<const Executor> exec,
-                                 LocalIndexType local_start,
-                                 LocalIndexType local_end,
-                                 mpi::communicator comm);
+                                 span local_range, mpi::communicator comm);
 
 
 }  // namespace distributed

From 9718e48c16dfd7bcac21adc6ed45b1ba09990860 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Mon, 5 Dec 2022 12:33:27 +0100
Subject: [PATCH 189/583] adds kernel to remove duplicate start/ends

---
 common/unified/CMakeLists.txt                 |  1 +
 .../distributed/partition_helpers_kernels.cpp | 42 +++++++++++++++++++
 core/device_hooks/common_kernels.inc.cpp      | 10 +++++
 core/distributed/partition_helpers.cpp        | 38 ++++++++++-------
 .../distributed/partition_helpers_kernels.hpp | 37 ++++++++++++++++
 reference/CMakeLists.txt                      |  1 +
 .../distributed/partition_helpers_kernels.cpp | 33 +++++++++++++++
 7 files changed, 146 insertions(+), 16 deletions(-)
 create mode 100644 common/unified/distributed/partition_helpers_kernels.cpp
 create mode 100644 core/distributed/partition_helpers_kernels.hpp
 create mode 100644 reference/distributed/partition_helpers_kernels.cpp

diff --git a/common/unified/CMakeLists.txt b/common/unified/CMakeLists.txt
index 5a37eb022f9..67fc839d6a7 100644
--- a/common/unified/CMakeLists.txt
+++ b/common/unified/CMakeLists.txt
@@ -6,6 +6,7 @@ set(UNIFIED_SOURCES
     components/format_conversion_kernels.cpp
     components/precision_conversion_kernels.cpp
     components/reduce_array_kernels.cpp
+    distributed/partition_helpers_kernels.cpp
     distributed/partition_kernels.cpp
     matrix/coo_kernels.cpp
     matrix/csr_kernels.cpp
diff --git a/common/unified/distributed/partition_helpers_kernels.cpp b/common/unified/distributed/partition_helpers_kernels.cpp
new file mode 100644
index 00000000000..d5f4f407cd5
--- /dev/null
+++ b/common/unified/distributed/partition_helpers_kernels.cpp
@@ -0,0 +1,42 @@
+
+#include "core/distributed/partition_helpers_kernels.hpp"
+
+
+#include "common/unified/base/kernel_launch.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace GKO_DEVICE_NAMESPACE {
+namespace partition_helpers {
+
+
+template <typename GlobalIndexType>
+void compress_start_ends(std::shared_ptr<const DefaultExecutor> exec,
+                         const array<GlobalIndexType>& range_start_ends,
+                         array<GlobalIndexType>& ranges)
+{
+    run_kernel(
+        exec,
+        [] GKO_KERNEL(auto i, auto size, const auto* range_start_ends,
+                      auto* ranges) {
+            if (i == 0) {
+                ranges[0] = range_start_ends[0];
+            }
+            if (i != size - 1) {
+                ranges[i + 1] = range_start_ends[2 * i + 1];
+            }
+        },
+        ranges.get_num_elems() - 1, ranges.get_num_elems(),
+        range_start_ends.get_const_data(), ranges.get_data());
+}
+
+
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
+    GKO_DECLARE_PARTITION_HELPERS_COMPRESS_START_ENDS);
+
+
+}  // namespace partition_helpers
+}  // namespace GKO_DEVICE_NAMESPACE
+}  // namespace kernels
+}  // namespace gko
diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp
index 0f898b3ae73..519376dae11 100644
--- a/core/device_hooks/common_kernels.inc.cpp
+++ b/core/device_hooks/common_kernels.inc.cpp
@@ -45,6 +45,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/components/prefix_sum_kernels.hpp"
 #include "core/components/reduce_array_kernels.hpp"
 #include "core/distributed/matrix_kernels.hpp"
+#include "core/distributed/partition_helpers_kernels.hpp"
 #include "core/distributed/partition_kernels.hpp"
 #include "core/distributed/vector_kernels.hpp"
 #include "core/factorization/cholesky_kernels.hpp"
@@ -255,6 +256,15 @@ GKO_STUB_LOCAL_GLOBAL_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED);
 }  // namespace partition
 
 
+namespace partition_helpers {
+
+
+GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_HELPERS_COMPRESS_START_ENDS);
+
+
+}
+
+
 namespace distributed_vector {
 
 
diff --git a/core/distributed/partition_helpers.cpp b/core/distributed/partition_helpers.cpp
index 2f764c2c478..3a46461bcf0 100644
--- a/core/distributed/partition_helpers.cpp
+++ b/core/distributed/partition_helpers.cpp
@@ -34,9 +34,22 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/distributed/partition_helpers.hpp>
 
 
+#include "core/distributed/partition_helpers_kernels.hpp"
+
+
 namespace gko {
 namespace experimental {
 namespace distributed {
+namespace partition_helpers {
+namespace {
+
+
+GKO_REGISTER_OPERATION(compress_start_ends,
+                       partition_helpers::compress_start_ends);
+
+
+}
+}  // namespace partition_helpers
 
 
 template <typename LocalIndexType, typename GlobalIndexType>
@@ -48,25 +61,18 @@ build_partition_from_local_range(std::shared_ptr<const Executor> exec,
                                 static_cast<GlobalIndexType>(local_range.end)};
 
     // make all range_start_ends available on each rank
-    Array<GlobalIndexType> ranges_start_end(exec->get_master(),
-                                            comm.size() * 2);
+    auto mpi_exec = (exec == exec->get_master() || mpi::is_gpu_aware())
+                        ? exec
+                        : exec->get_master();
+    array<GlobalIndexType> ranges_start_end(mpi_exec, comm.size() * 2);
     ranges_start_end.fill(0);
-    comm.all_gather(exec->get_master(), range, 2, ranges_start_end.get_data(),
-                    2);
+    comm.all_gather(mpi_exec, range, 2, ranges_start_end.get_data(), 2);
+    ranges_start_end.set_executor(exec);
 
     // remove duplicates
-    Array<GlobalIndexType> ranges(exec->get_master(), comm.size() + 1);
-    auto ranges_se_data = ranges_start_end.get_const_data();
-    ranges.get_data()[0] = ranges_se_data[0];
-    for (int i = 1; i < ranges_start_end.get_num_elems() - 1; i += 2) {
-        GKO_ASSERT_EQ(ranges_se_data[i], ranges_se_data[i + 1]);
-        ranges.get_data()[i / 2 + 1] = ranges_se_data[i];
-    }
-    ranges.get_data()[ranges.get_num_elems() - 1] =
-        ranges_se_data[ranges_start_end.get_num_elems() - 1];
-
-    // move data to correct executor
-    ranges.set_executor(exec);
+    array<GlobalIndexType> ranges(exec, comm.size() + 1);
+    exec->run(
+        partition_helpers::make_compress_start_ends(ranges_start_end, ranges));
 
     return Partition<LocalIndexType, GlobalIndexType>::build_from_contiguous(
         exec, ranges);
diff --git a/core/distributed/partition_helpers_kernels.hpp b/core/distributed/partition_helpers_kernels.hpp
new file mode 100644
index 00000000000..374fedf8c1a
--- /dev/null
+++ b/core/distributed/partition_helpers_kernels.hpp
@@ -0,0 +1,37 @@
+#ifndef GINKGO_PARTITION_HELPERS_KERNELS_HPP
+#define GINKGO_PARTITION_HELPERS_KERNELS_HPP
+
+
+#include <ginkgo/core/base/array.hpp>
+
+
+#include "core/base/kernel_declaration.hpp"
+
+
+namespace gko {
+namespace kernels {
+
+
+#define GKO_DECLARE_PARTITION_HELPERS_COMPRESS_START_ENDS(_type)          \
+    void compress_start_ends(std::shared_ptr<const DefaultExecutor> exec, \
+                             const array<_type>& range_start_ends,        \
+                             array<_type>& ranges)
+
+
+#define GKO_DECLARE_ALL_AS_TEMPLATES    \
+    template <typename GlobalIndexType> \
+    GKO_DECLARE_PARTITION_HELPERS_COMPRESS_START_ENDS(GlobalIndexType)
+
+
+GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(partition_helpers,
+                                        GKO_DECLARE_ALL_AS_TEMPLATES);
+
+
+#undef GKO_DECLARE_ALL_AS_TEMPLATES
+
+
+}  // namespace kernels
+}  // namespace gko
+
+
+#endif  // GINKGO_PARTITION_HELPERS_KERNELS_HPP
diff --git a/reference/CMakeLists.txt b/reference/CMakeLists.txt
index 074d5efe818..dd54e3fb52f 100644
--- a/reference/CMakeLists.txt
+++ b/reference/CMakeLists.txt
@@ -13,6 +13,7 @@ target_sources(ginkgo_reference
     components/precision_conversion_kernels.cpp
     components/prefix_sum_kernels.cpp
     distributed/matrix_kernels.cpp
+    distributed/partition_helpers_kernels.cpp
     distributed/partition_kernels.cpp
     distributed/vector_kernels.cpp
     factorization/cholesky_kernels.cpp
diff --git a/reference/distributed/partition_helpers_kernels.cpp b/reference/distributed/partition_helpers_kernels.cpp
new file mode 100644
index 00000000000..0451e82e10b
--- /dev/null
+++ b/reference/distributed/partition_helpers_kernels.cpp
@@ -0,0 +1,33 @@
+
+#include "core/distributed/partition_helpers_kernels.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace reference {
+namespace partition_helpers {
+
+
+template <typename GlobalIndexType>
+void compress_start_ends(std::shared_ptr<const DefaultExecutor> exec,
+                         const array<GlobalIndexType>& range_start_ends,
+                         array<GlobalIndexType>& ranges)
+{
+    if (ranges.get_num_elems()) {
+        ranges.get_data()[0] = range_start_ends.get_const_data()[0];
+        for (size_type i = 0; i < ranges.get_num_elems() - 1; ++i) {
+            ranges.get_data()[i + 1] =
+                range_start_ends.get_const_data()[2 * i + 1];
+        }
+    }
+}
+
+
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
+    GKO_DECLARE_PARTITION_HELPERS_COMPRESS_START_ENDS);
+
+
+}  // namespace partition_helpers
+}  // namespace reference
+}  // namespace kernels
+}  // namespace gko

From ebda15c9e989d38383307830bf9bec15741fc144 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Mon, 5 Dec 2022 12:55:27 +0100
Subject: [PATCH 190/583] adds test for removing duplicate start/ends

---
 test/distributed/CMakeLists.txt               |  1 +
 test/distributed/partition_helper_kernels.cpp | 98 +++++++++++++++++++
 2 files changed, 99 insertions(+)
 create mode 100644 test/distributed/partition_helper_kernels.cpp

diff --git a/test/distributed/CMakeLists.txt b/test/distributed/CMakeLists.txt
index 1c8e9b1e8fc..32b3810ea31 100644
--- a/test/distributed/CMakeLists.txt
+++ b/test/distributed/CMakeLists.txt
@@ -1,3 +1,4 @@
 ginkgo_create_common_test(matrix_kernels DISABLE_EXECUTORS dpcpp)
 ginkgo_create_common_test(partition_kernels DISABLE_EXECUTORS dpcpp)
 ginkgo_create_common_test(vector_kernels DISABLE_EXECUTORS dpcpp)
+ginkgo_create_common_and_reference_test(partition_helper_kernels)
diff --git a/test/distributed/partition_helper_kernels.cpp b/test/distributed/partition_helper_kernels.cpp
new file mode 100644
index 00000000000..c52ba65e5c7
--- /dev/null
+++ b/test/distributed/partition_helper_kernels.cpp
@@ -0,0 +1,98 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/distributed/partition_helpers_kernels.hpp"
+
+
+#include <gtest/gtest-typed-test.h>
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/executor.hpp>
+
+
+#include "core/test/utils.hpp"
+#include "test/utils/executor.hpp"
+
+
+using comm_index_type = gko::experimental::distributed::comm_index_type;
+
+
+template <typename IndexType>
+class PartitionHelpers : public CommonTestFixture {
+protected:
+    using index_type = IndexType;
+};
+
+TYPED_TEST_SUITE(PartitionHelpers, gko::test::IndexTypes);
+
+
+TYPED_TEST(PartitionHelpers, CanCompressStartEndsWithOneRange)
+{
+    using itype = typename TestFixture::index_type;
+    gko::array<itype> start_ends{this->exec, {0, 3}};
+    gko::array<itype> expects{this->exec, {0, 3}};
+    gko::array<itype> result{this->exec, expects.get_num_elems()};
+
+    gko::kernels::EXEC_NAMESPACE::partition_helpers::compress_start_ends(
+        this->exec, start_ends, result);
+
+    GKO_ASSERT_ARRAY_EQ(result, expects);
+}
+
+
+TYPED_TEST(PartitionHelpers, CanCompressStartEndsWithMultipleRanges)
+{
+    using itype = typename TestFixture::index_type;
+    gko::array<itype> start_ends{this->exec, {0, 3, 3, 7, 7, 10}};
+    gko::array<itype> expects{this->exec, {0, 3, 7, 10}};
+    gko::array<itype> result{this->exec, expects.get_num_elems()};
+
+    gko::kernels::EXEC_NAMESPACE::partition_helpers::compress_start_ends(
+        this->exec, start_ends, result);
+
+    GKO_ASSERT_ARRAY_EQ(result, expects);
+}
+
+
+TYPED_TEST(PartitionHelpers, CanCompressStartEndsWithZeroRange)
+{
+    using itype = typename TestFixture::index_type;
+    gko::array<itype> start_ends{this->exec};
+    gko::array<itype> expects{this->exec, {0}};
+    gko::array<itype> result{this->exec, {0}};
+
+    gko::kernels::EXEC_NAMESPACE::partition_helpers::compress_start_ends(
+        this->exec, start_ends, result);
+
+    GKO_ASSERT_ARRAY_EQ(result, expects);
+}

From cf7429d74d25e7e5fe0f4ca2e388ffc891e90fb7 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Mon, 5 Dec 2022 13:25:23 +0100
Subject: [PATCH 191/583] adds tests for build_from_local_range

---
 test/mpi/partition_helpers.cpp | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 test/mpi/partition_helpers.cpp

diff --git a/test/mpi/partition_helpers.cpp b/test/mpi/partition_helpers.cpp
new file mode 100644
index 00000000000..303d7362856
--- /dev/null
+++ b/test/mpi/partition_helpers.cpp
@@ -0,0 +1,28 @@
+#include <ginkgo/core/distributed/partition_helpers.hpp>
+#include <ginkgo/core/distributed/partition.hpp>
+
+
+#include "core/test/utils.hpp"
+#include "test/utils/mpi/executor.hpp"
+
+
+template<typename IndexType>
+class PartitionHelpers : public CommonMpiTestFixture{
+protected:
+    using index_type = IndexType;
+
+};
+
+TYPED_TEST_SUITE(PartitionHelpers, gko::test::IndexTypes);
+
+
+TYPED_TEST(PartitionHelpers, CanBuildFromLocalRanges){
+    using itype = typename TestFixture::index_type ;
+    gko::span local_range[] = {{0u, 4u}, {4u, 9u}, {9u, 11u}};
+    gko::array<itype> expects{this->exec, {0, 4, 9, 11}};
+
+    auto part = gko::experimental::distributed::build_partition_from_local_range<gko::int32, itype>(this->exec, local_range[this->comm.rank()], this->comm);
+
+    GKO_ASSERT_ARRAY_EQ(expects,
+                        gko::make_const_array_view(this->exec, expects.get_num_elems(), part->get_range_bounds()));
+}

From 5f9a359a5d738bbd567770cea867bf6ed4eefa5d Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Mon, 5 Dec 2022 13:25:53 +0100
Subject: [PATCH 192/583] adds note on invalid inputs

---
 include/ginkgo/core/distributed/partition_helpers.hpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/include/ginkgo/core/distributed/partition_helpers.hpp b/include/ginkgo/core/distributed/partition_helpers.hpp
index 01bc1cc1a18..93b04af7f6c 100644
--- a/include/ginkgo/core/distributed/partition_helpers.hpp
+++ b/include/ginkgo/core/distributed/partition_helpers.hpp
@@ -56,8 +56,11 @@ class Partition;
  * Builds a partition from a local range.
  *
  * @param exec  the Executor on which the partition should be built.
- * @param local_start the start index of the local range.
- * @param local_end the end index of the local range.
+ * @param local_range the start and end indices of the local range
+ *
+ * @warning The local ranges have to be continuous and ascending. This means
+ *          that for a process `i` with `range[i] = [s_i, e_i)` then for process
+ *          `j = i+1` `range[j] = [s_j = e_i, e_j)`.
  *
  * @return a Partition where each range has the individual local_start
  *         and local_ends.
@@ -73,7 +76,5 @@ build_partition_from_local_range(std::shared_ptr<const Executor> exec,
 }  // namespace gko
 
 
-#endif
-
-
+#endif  // GINKGO_BUILD_MPI
 #endif  // GKO_PUBLIC_CORE_DISTRIBUTED_PARTITION_HELPERS_HPP_

From 2c9a0597b6d134ccae69ad27316379426b0f902b Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Mon, 5 Dec 2022 14:09:44 +0100
Subject: [PATCH 193/583] fixes reference kernel

---
 reference/distributed/partition_helpers_kernels.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/reference/distributed/partition_helpers_kernels.cpp b/reference/distributed/partition_helpers_kernels.cpp
index 0451e82e10b..0060d20be10 100644
--- a/reference/distributed/partition_helpers_kernels.cpp
+++ b/reference/distributed/partition_helpers_kernels.cpp
@@ -13,7 +13,7 @@ void compress_start_ends(std::shared_ptr<const DefaultExecutor> exec,
                          const array<GlobalIndexType>& range_start_ends,
                          array<GlobalIndexType>& ranges)
 {
-    if (ranges.get_num_elems()) {
+    if (ranges.get_num_elems() && range_start_ends.get_num_elems()) {
         ranges.get_data()[0] = range_start_ends.get_const_data()[0];
         for (size_type i = 0; i < ranges.get_num_elems() - 1; ++i) {
             ranges.get_data()[i + 1] =

From c4ab56b784e6baeeb14d8c58fa43c7f5d4b28e7c Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Fri, 9 Dec 2022 15:14:27 +0100
Subject: [PATCH 194/583] allows specifying part ids for contiguous partition
 constructor

---
 .../unified/distributed/partition_kernels.cpp |  9 +++++---
 core/distributed/partition.cpp                | 11 ++++++++--
 core/distributed/partition_kernels.hpp        |  9 ++++----
 include/ginkgo/core/distributed/partition.hpp | 11 ++++++----
 reference/distributed/partition_kernels.cpp   |  4 +++-
 .../test/distributed/partition_kernels.cpp    | 22 +++++++++++++++++++
 test/distributed/partition_kernels.cpp        | 16 ++++++++++++++
 7 files changed, 68 insertions(+), 14 deletions(-)

diff --git a/common/unified/distributed/partition_kernels.cpp b/common/unified/distributed/partition_kernels.cpp
index cb0f4813da5..dc13fec9f1b 100644
--- a/common/unified/distributed/partition_kernels.cpp
+++ b/common/unified/distributed/partition_kernels.cpp
@@ -66,19 +66,22 @@ void count_ranges(std::shared_ptr<const DefaultExecutor> exec,
 template <typename GlobalIndexType>
 void build_from_contiguous(std::shared_ptr<const DefaultExecutor> exec,
                            const array<GlobalIndexType>& ranges,
+                           const array<comm_index_type>& part_id_mapping,
                            GlobalIndexType* range_bounds,
                            comm_index_type* part_ids)
 {
     run_kernel(
         exec,
-        [] GKO_KERNEL(auto i, auto ranges, auto bounds, auto ids) {
+        [] GKO_KERNEL(auto i, auto ranges, auto mapping, auto bounds, auto ids,
+                      bool uses_mapping) {
             if (i == 0) {
                 bounds[0] = 0;
             }
             bounds[i + 1] = ranges[i + 1];
-            ids[i] = i;
+            ids[i] = uses_mapping ? mapping[i] : i;
         },
-        ranges.get_num_elems() - 1, ranges, range_bounds, part_ids);
+        ranges.get_num_elems() - 1, ranges, part_id_mapping, range_bounds,
+        part_ids, part_id_mapping.get_num_elems() > 0);
 }
 
 GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_PARTITION_BUILD_FROM_CONTIGUOUS);
diff --git a/core/distributed/partition.cpp b/core/distributed/partition.cpp
index c6e5bfc5fe0..575ca83aba6 100644
--- a/core/distributed/partition.cpp
+++ b/core/distributed/partition.cpp
@@ -75,14 +75,21 @@ Partition<LocalIndexType, GlobalIndexType>::build_from_mapping(
 template <typename LocalIndexType, typename GlobalIndexType>
 std::unique_ptr<Partition<LocalIndexType, GlobalIndexType>>
 Partition<LocalIndexType, GlobalIndexType>::build_from_contiguous(
-    std::shared_ptr<const Executor> exec, const array<GlobalIndexType>& ranges)
+    std::shared_ptr<const Executor> exec, const array<GlobalIndexType>& ranges,
+    const array<comm_index_type>& part_ids)
 {
+    GKO_ASSERT(part_ids.get_num_elems() == 0 ||
+               part_ids.get_num_elems() + 1 == ranges.get_num_elems());
+
+    array<comm_index_type> empty(exec);
     auto local_ranges = make_temporary_clone(exec, &ranges);
+    auto local_part_ids = make_temporary_clone(
+        exec, part_ids.get_num_elems() > 0 ? &part_ids : &empty);
     auto result = Partition::create(
         exec, static_cast<comm_index_type>(ranges.get_num_elems() - 1),
         ranges.get_num_elems() - 1);
     exec->run(partition::make_build_from_contiguous(
-        *local_ranges.get(), result->offsets_.get_data(),
+        *local_ranges, *local_part_ids, result->offsets_.get_data(),
         result->part_ids_.get_data()));
     result->finalize_construction();
     return result;
diff --git a/core/distributed/partition_kernels.hpp b/core/distributed/partition_kernels.hpp
index 3d66ed113e8..070ff0839b4 100644
--- a/core/distributed/partition_kernels.hpp
+++ b/core/distributed/partition_kernels.hpp
@@ -49,10 +49,11 @@ namespace kernels {
                       const array<comm_index_type>& mapping,       \
                       size_type& num_ranges)
 
-#define GKO_PARTITION_BUILD_FROM_CONTIGUOUS(GlobalIndexType)                \
-    void build_from_contiguous(std::shared_ptr<const DefaultExecutor> exec, \
-                               const array<GlobalIndexType>& ranges,        \
-                               GlobalIndexType* range_bounds,               \
+#define GKO_PARTITION_BUILD_FROM_CONTIGUOUS(GlobalIndexType)                  \
+    void build_from_contiguous(std::shared_ptr<const DefaultExecutor> exec,   \
+                               const array<GlobalIndexType>& ranges,          \
+                               const array<comm_index_type>& part_id_mapping, \
+                               GlobalIndexType* range_bounds,                 \
                                comm_index_type* part_ids)
 
 #define GKO_PARTITION_BUILD_FROM_MAPPING(GlobalIndexType)                \
diff --git a/include/ginkgo/core/distributed/partition.hpp b/include/ginkgo/core/distributed/partition.hpp
index 0096edf999c..fa8b2739400 100644
--- a/include/ginkgo/core/distributed/partition.hpp
+++ b/include/ginkgo/core/distributed/partition.hpp
@@ -260,15 +260,18 @@ class Partition
      *
      * @param exec  the Executor on which the partition should be built
      * @param ranges  the boundaries of the ranges representing each part.
-     *                Part i contains the indices [ranges[i], ranges[i + 1]).
-     *                Has to contain at least one element.
-     *                The first element has to be 0.
+     *                Part parti_id[i] contains the indices
+     *                [ranges[i], ranges[i + 1]). Has to contain at least
+     *                one element. The first element has to be 0.
+     * @param part_ids  the part ids of the provided ranges. If empty, then
+     *                  it will assume range i belongs to part i.
      *
      * @return  a Partition representing the given contiguous partitioning.
      */
     static std::unique_ptr<Partition> build_from_contiguous(
         std::shared_ptr<const Executor> exec,
-        const array<global_index_type>& ranges);
+        const array<global_index_type>& ranges,
+        const array<comm_index_type>& part_ids = {});
 
     /**
      * Builds a partition by evenly distributing the global range.
diff --git a/reference/distributed/partition_kernels.cpp b/reference/distributed/partition_kernels.cpp
index 6eae93d27d0..e9a2bfe7667 100644
--- a/reference/distributed/partition_kernels.cpp
+++ b/reference/distributed/partition_kernels.cpp
@@ -55,14 +55,16 @@ void count_ranges(std::shared_ptr<const DefaultExecutor> exec,
 template <typename GlobalIndexType>
 void build_from_contiguous(std::shared_ptr<const DefaultExecutor> exec,
                            const array<GlobalIndexType>& ranges,
+                           const array<comm_index_type>& part_id_mapping,
                            GlobalIndexType* range_bounds,
                            comm_index_type* part_ids)
 {
+    bool uses_mapping = part_id_mapping.get_num_elems() > 0;
     range_bounds[0] = 0;
     for (comm_index_type i = 0; i < ranges.get_num_elems() - 1; i++) {
         auto end = ranges.get_const_data()[i + 1];
         range_bounds[i + 1] = end;
-        part_ids[i] = i;
+        part_ids[i] = uses_mapping ? part_id_mapping.get_const_data()[i] : i;
     }
 }
 
diff --git a/reference/test/distributed/partition_kernels.cpp b/reference/test/distributed/partition_kernels.cpp
index 4cc7750a193..f92349ee2eb 100644
--- a/reference/test/distributed/partition_kernels.cpp
+++ b/reference/test/distributed/partition_kernels.cpp
@@ -171,6 +171,28 @@ TYPED_TEST(Partition, BuildsFromRangeWithSingleElement)
 }
 
 
+TYPED_TEST(Partition, BuildsFromRangesWithPartIds)
+{
+    using global_index_type = typename TestFixture::global_index_type;
+    using part_type = typename TestFixture::part_type;
+    gko::array<global_index_type> ranges{this->ref, {0, 5, 5, 7, 9, 10}};
+    gko::array<comm_index_type> part_id{this->ref, {0, 4, 3, 1, 2}};
+
+    auto partition =
+        part_type::build_from_contiguous(this->ref, ranges, part_id);
+
+    EXPECT_EQ(partition->get_size(),
+              ranges.get_data()[ranges.get_num_elems() - 1]);
+    EXPECT_EQ(partition->get_num_ranges(), ranges.get_num_elems() - 1);
+    EXPECT_EQ(partition->get_num_parts(), ranges.get_num_elems() - 1);
+    EXPECT_EQ(partition->get_num_empty_parts(), 1);
+    assert_equal_data(partition->get_range_bounds(), {0, 5, 5, 7, 9, 10});
+    assert_equal_data(partition->get_part_ids(), {0, 4, 3, 1, 2});
+    assert_equal_data(partition->get_range_starting_indices(), {0, 0, 0, 0, 0});
+    assert_equal_data(partition->get_part_sizes(), {5, 2, 1, 2, 0});
+}
+
+
 TYPED_TEST(Partition, BuildsFromGlobalSize)
 {
     using part_type = typename TestFixture::part_type;
diff --git a/test/distributed/partition_kernels.cpp b/test/distributed/partition_kernels.cpp
index 686d1432da5..7033abb37ef 100644
--- a/test/distributed/partition_kernels.cpp
+++ b/test/distributed/partition_kernels.cpp
@@ -276,6 +276,22 @@ TYPED_TEST(Partition, BuildsFromContiguousWithSingleEntry)
 }
 
 
+TYPED_TEST(Partition, BuildsFromContiguousWithPartId)
+{
+    using global_index_type = typename TestFixture::global_index_type;
+    using part_type = typename TestFixture::part_type;
+    gko::array<global_index_type> ranges{this->ref,
+                                         {0, 1234, 3134, 4578, 16435, 60000}};
+    gko::array<comm_index_type> part_id{this->ref, {0, 4, 3, 1, 2}};
+    gko::array<global_index_type> dranges{this->exec, ranges};
+
+    auto part = part_type::build_from_contiguous(this->ref, ranges, part_id);
+    auto dpart = part_type::build_from_contiguous(this->exec, dranges, part_id);
+
+    this->assert_equal(part, dpart);
+}
+
+
 TYPED_TEST(Partition, BuildsFromGlobalSize)
 {
     using global_index_type = typename TestFixture::global_index_type;

From 78a8927f7ff41c7698f6c6d0d52cc4131d55c3b2 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Fri, 9 Dec 2022 16:14:57 +0100
Subject: [PATCH 195/583] adds sorting kernel for ranges + part-ids

---
 .../distributed/partition_helpers_kernels.cpp |  15 +++
 core/device_hooks/common_kernels.inc.cpp      |   3 +-
 .../distributed/partition_helpers_kernels.hpp |  16 ++-
 .../distributed/partition_helpers_kernels.cpp |  24 ++++
 reference/test/distributed/CMakeLists.txt     |   1 +
 .../distributed/partition_helpers_kernels.cpp | 104 ++++++++++++++++++
 6 files changed, 158 insertions(+), 5 deletions(-)
 create mode 100644 reference/test/distributed/partition_helpers_kernels.cpp

diff --git a/common/unified/distributed/partition_helpers_kernels.cpp b/common/unified/distributed/partition_helpers_kernels.cpp
index d5f4f407cd5..a3b47718d88 100644
--- a/common/unified/distributed/partition_helpers_kernels.cpp
+++ b/common/unified/distributed/partition_helpers_kernels.cpp
@@ -32,10 +32,25 @@ void compress_start_ends(std::shared_ptr<const DefaultExecutor> exec,
 }
 
 
+template <typename GlobalIndexType>
+void check_consecutive_ranges()
+{}
+
+
 GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
     GKO_DECLARE_PARTITION_HELPERS_COMPRESS_START_ENDS);
 
 
+template <typename GlobalIndexType>
+void sort_by_range_start(std::shared_ptr<const DefaultExecutor> exec,
+                         array<GlobalIndexType>& range_start_ends,
+                         array<experimental::distributed::comm_index_type>&
+                             part_ids) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
+    GKO_DECLARE_PARTITION_HELPERS_SORT_BY_RANGE_START);
+
+
 }  // namespace partition_helpers
 }  // namespace GKO_DEVICE_NAMESPACE
 }  // namespace kernels
diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp
index 519376dae11..304f3ae45f8 100644
--- a/core/device_hooks/common_kernels.inc.cpp
+++ b/core/device_hooks/common_kernels.inc.cpp
@@ -260,9 +260,10 @@ namespace partition_helpers {
 
 
 GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_HELPERS_COMPRESS_START_ENDS);
+GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_HELPERS_SORT_BY_RANGE_START);
 
 
-}
+}  // namespace partition_helpers
 
 
 namespace distributed_vector {
diff --git a/core/distributed/partition_helpers_kernels.hpp b/core/distributed/partition_helpers_kernels.hpp
index 374fedf8c1a..b9c9984e93b 100644
--- a/core/distributed/partition_helpers_kernels.hpp
+++ b/core/distributed/partition_helpers_kernels.hpp
@@ -17,10 +17,18 @@ namespace kernels {
                              const array<_type>& range_start_ends,        \
                              array<_type>& ranges)
 
-
-#define GKO_DECLARE_ALL_AS_TEMPLATES    \
-    template <typename GlobalIndexType> \
-    GKO_DECLARE_PARTITION_HELPERS_COMPRESS_START_ENDS(GlobalIndexType)
+#define GKO_DECLARE_PARTITION_HELPERS_SORT_BY_RANGE_START(_type) \
+    void sort_by_range_start(                                    \
+        std::shared_ptr<const DefaultExecutor> exec,             \
+        array<_type>& range_start_ends,                          \
+        array<experimental::distributed::comm_index_type>& part_ids)
+
+
+#define GKO_DECLARE_ALL_AS_TEMPLATES                                    \
+    template <typename GlobalIndexType>                                 \
+    GKO_DECLARE_PARTITION_HELPERS_COMPRESS_START_ENDS(GlobalIndexType); \
+    template <typename GlobalIndexType>                                 \
+    GKO_DECLARE_PARTITION_HELPERS_SORT_BY_RANGE_START(GlobalIndexType)
 
 
 GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(partition_helpers,
diff --git a/reference/distributed/partition_helpers_kernels.cpp b/reference/distributed/partition_helpers_kernels.cpp
index 0060d20be10..d4ba757b284 100644
--- a/reference/distributed/partition_helpers_kernels.cpp
+++ b/reference/distributed/partition_helpers_kernels.cpp
@@ -1,5 +1,8 @@
 
 #include "core/distributed/partition_helpers_kernels.hpp"
+#include <numeric>
+
+#include "core/base/iterator_factory.hpp"
 
 
 namespace gko {
@@ -27,6 +30,27 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
     GKO_DECLARE_PARTITION_HELPERS_COMPRESS_START_ENDS);
 
 
+template <typename GlobalIndexType>
+void sort_by_range_start(
+    std::shared_ptr<const DefaultExecutor> exec,
+    array<GlobalIndexType>& range_start_ends,
+    array<experimental::distributed::comm_index_type>& part_ids)
+{
+    auto part_ids_d = part_ids.get_data();
+    auto num_parts = part_ids.get_num_elems();
+    auto range_starts = range_start_ends.get_data();
+    auto range_ends = range_starts + num_parts;
+    auto sort_it =
+        detail::make_zip_iterator(range_starts, range_ends, part_ids_d);
+    std::sort(sort_it, sort_it + num_parts, [](const auto& a, const auto& b) {
+        return std::get<0>(a) < std::get<0>(b);
+    });
+}
+
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
+    GKO_DECLARE_PARTITION_HELPERS_SORT_BY_RANGE_START);
+
+
 }  // namespace partition_helpers
 }  // namespace reference
 }  // namespace kernels
diff --git a/reference/test/distributed/CMakeLists.txt b/reference/test/distributed/CMakeLists.txt
index 2985c7b5e11..42ad2d7e1a2 100644
--- a/reference/test/distributed/CMakeLists.txt
+++ b/reference/test/distributed/CMakeLists.txt
@@ -1,3 +1,4 @@
 ginkgo_create_test(matrix_kernels)
+ginkgo_create_test(partition_helpers_kernels)
 ginkgo_create_test(partition_kernels)
 ginkgo_create_test(vector_kernels)
diff --git a/reference/test/distributed/partition_helpers_kernels.cpp b/reference/test/distributed/partition_helpers_kernels.cpp
new file mode 100644
index 00000000000..688762d2d9a
--- /dev/null
+++ b/reference/test/distributed/partition_helpers_kernels.cpp
@@ -0,0 +1,104 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/distributed/partition.hpp>
+
+
+#include <algorithm>
+#include <memory>
+#include <vector>
+
+
+#include <gtest/gtest-typed-test.h>
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/executor.hpp>
+
+
+#include "core/distributed/partition_helpers_kernels.hpp"
+#include "core/test/utils.hpp"
+
+
+namespace {
+
+
+using comm_index_type = gko::experimental::distributed::comm_index_type;
+
+
+template <typename GlobalIndexType>
+class PartitionHelpers : public ::testing::Test {
+protected:
+    using global_index_type = GlobalIndexType;
+
+    PartitionHelpers() : ref(gko::ReferenceExecutor::create()) {}
+
+    std::shared_ptr<const gko::ReferenceExecutor> ref;
+    gko::array<global_index_type> default_range_start_ends{
+        this->ref, {0, 4, 7, 9, 4, 7, 9, 11}};
+    gko::array<comm_index_type> default_part_ids{this->ref, {0, 1, 2, 3}};
+};
+
+TYPED_TEST_SUITE(PartitionHelpers, gko::test::IndexTypes,
+                 TypenameNameGenerator);
+
+
+TYPED_TEST(PartitionHelpers, CanSortByRangeStartIdentity)
+{
+    using itype = typename TestFixture::global_index_type;
+    auto range_start_ends = this->default_range_start_ends;
+    auto part_ids = this->default_part_ids;
+
+    gko::kernels::reference::partition_helpers::sort_by_range_start(
+        this->ref, range_start_ends, part_ids);
+
+    GKO_ASSERT_ARRAY_EQ(range_start_ends, this->default_range_start_ends);
+    GKO_ASSERT_ARRAY_EQ(part_ids, this->default_part_ids);
+}
+
+
+TYPED_TEST(PartitionHelpers, CanSortByRangeStart)
+{
+    using global_index_type = typename TestFixture::global_index_type;
+    gko::array<global_index_type> range_start_ends{this->ref,
+                                                   {7, 4, 0, 9, 9, 7, 4, 11}};
+    gko::array<comm_index_type> result_part_ids{this->ref, {2, 1, 0, 3}};
+    auto part_ids = this->default_part_ids;
+
+    gko::kernels::reference::partition_helpers::sort_by_range_start(
+        this->ref, range_start_ends, part_ids);
+
+    GKO_ASSERT_ARRAY_EQ(range_start_ends, this->default_range_start_ends);
+    GKO_ASSERT_ARRAY_EQ(part_ids, result_part_ids);
+}
+
+}  // namespace

From 8d4b61bc8e503bb5ddadfaa3b93f055efc231c30 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Fri, 9 Dec 2022 16:56:28 +0100
Subject: [PATCH 196/583] adds consistency check kernel (reference

---
 .../distributed/partition_helpers_kernels.cpp |  8 ++++++
 core/device_hooks/common_kernels.inc.cpp      |  1 +
 .../distributed/partition_helpers_kernels.hpp | 10 ++++++-
 .../distributed/partition_helpers_kernels.cpp | 25 +++++++++++++++++
 .../distributed/partition_helpers_kernels.cpp | 28 +++++++++++++++++++
 5 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/common/unified/distributed/partition_helpers_kernels.cpp b/common/unified/distributed/partition_helpers_kernels.cpp
index a3b47718d88..437d1590a43 100644
--- a/common/unified/distributed/partition_helpers_kernels.cpp
+++ b/common/unified/distributed/partition_helpers_kernels.cpp
@@ -51,6 +51,14 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
     GKO_DECLARE_PARTITION_HELPERS_SORT_BY_RANGE_START);
 
 
+template <typename GlobalIndexType>
+void check_consecutive_ranges(std::shared_ptr<const DefaultExecutor> exec,
+                              array<GlobalIndexType>& range_start_ends,
+                              bool* result) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
+    GKO_DECLARE_PARTITION_HELPERS_CHECK_CONSECUTIVE_RANGES);
+
 }  // namespace partition_helpers
 }  // namespace GKO_DEVICE_NAMESPACE
 }  // namespace kernels
diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp
index 304f3ae45f8..2e37ae53e20 100644
--- a/core/device_hooks/common_kernels.inc.cpp
+++ b/core/device_hooks/common_kernels.inc.cpp
@@ -261,6 +261,7 @@ namespace partition_helpers {
 
 GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_HELPERS_COMPRESS_START_ENDS);
 GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_HELPERS_SORT_BY_RANGE_START);
+GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_HELPERS_CHECK_CONSECUTIVE_RANGES);
 
 
 }  // namespace partition_helpers
diff --git a/core/distributed/partition_helpers_kernels.hpp b/core/distributed/partition_helpers_kernels.hpp
index b9c9984e93b..08af876c06f 100644
--- a/core/distributed/partition_helpers_kernels.hpp
+++ b/core/distributed/partition_helpers_kernels.hpp
@@ -24,11 +24,19 @@ namespace kernels {
         array<experimental::distributed::comm_index_type>& part_ids)
 
 
+#define GKO_DECLARE_PARTITION_HELPERS_CHECK_CONSECUTIVE_RANGES(_type)          \
+    void check_consecutive_ranges(std::shared_ptr<const DefaultExecutor> exec, \
+                                  array<_type>& range_start_ends,              \
+                                  bool* result)
+
+
 #define GKO_DECLARE_ALL_AS_TEMPLATES                                    \
     template <typename GlobalIndexType>                                 \
     GKO_DECLARE_PARTITION_HELPERS_COMPRESS_START_ENDS(GlobalIndexType); \
     template <typename GlobalIndexType>                                 \
-    GKO_DECLARE_PARTITION_HELPERS_SORT_BY_RANGE_START(GlobalIndexType)
+    GKO_DECLARE_PARTITION_HELPERS_SORT_BY_RANGE_START(GlobalIndexType); \
+    template <typename GlobalIndexType>                                 \
+    GKO_DECLARE_PARTITION_HELPERS_CHECK_CONSECUTIVE_RANGES(GlobalIndexType)
 
 
 GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(partition_helpers,
diff --git a/reference/distributed/partition_helpers_kernels.cpp b/reference/distributed/partition_helpers_kernels.cpp
index d4ba757b284..882d88509f0 100644
--- a/reference/distributed/partition_helpers_kernels.cpp
+++ b/reference/distributed/partition_helpers_kernels.cpp
@@ -51,6 +51,31 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
     GKO_DECLARE_PARTITION_HELPERS_SORT_BY_RANGE_START);
 
 
+template <typename GlobalIndexType>
+void check_consecutive_ranges(std::shared_ptr<const DefaultExecutor> exec,
+                              array<GlobalIndexType>& range_start_ends,
+                              bool* result)
+{
+    auto num_parts = range_start_ends.get_num_elems() / 2;
+    auto range_starts = range_start_ends.get_data();
+    auto range_ends = range_starts + num_parts;
+    auto combined_it = detail::make_zip_iterator(range_starts + 1, range_ends);
+
+    if (num_parts) {
+        *result = std::all_of(combined_it, combined_it + (num_parts - 1),
+                              [](const auto& start_end) {
+                                  return std::get<0>(start_end) ==
+                                         std::get<1>(start_end);
+                              });
+    } else {
+        *result = true;
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
+    GKO_DECLARE_PARTITION_HELPERS_CHECK_CONSECUTIVE_RANGES);
+
+
 }  // namespace partition_helpers
 }  // namespace reference
 }  // namespace kernels
diff --git a/reference/test/distributed/partition_helpers_kernels.cpp b/reference/test/distributed/partition_helpers_kernels.cpp
index 688762d2d9a..1d34a4fd530 100644
--- a/reference/test/distributed/partition_helpers_kernels.cpp
+++ b/reference/test/distributed/partition_helpers_kernels.cpp
@@ -101,4 +101,32 @@ TYPED_TEST(PartitionHelpers, CanSortByRangeStart)
     GKO_ASSERT_ARRAY_EQ(part_ids, result_part_ids);
 }
 
+
+TYPED_TEST(PartitionHelpers, CanCheckConsecutiveRanges)
+{
+    using global_index_type = typename TestFixture::global_index_type;
+    auto range_start_ends = this->default_range_start_ends;
+    bool result = false;
+
+    gko::kernels::reference::partition_helpers::check_consecutive_ranges(
+        this->ref, range_start_ends, &result);
+
+    ASSERT_TRUE(result);
+}
+
+
+TYPED_TEST(PartitionHelpers, CanCheckNonConsecutiveRanges)
+{
+    using global_index_type = typename TestFixture::global_index_type;
+    gko::array<global_index_type> range_start_ends{this->ref,
+                                                   {7, 4, 0, 9, 9, 7, 4, 11}};
+    bool result = true;
+
+    gko::kernels::reference::partition_helpers::check_consecutive_ranges(
+        this->ref, range_start_ends, &result);
+
+    ASSERT_FALSE(result);
+}
+
+
 }  // namespace

From 55fe5c1c0fb4b1dd578e2af241666f3ca02e46cc Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Fri, 9 Dec 2022 17:49:39 +0100
Subject: [PATCH 197/583] wip

---
 core/distributed/partition_helpers.cpp        | 55 +++++++++++++++----
 include/ginkgo/core/base/mpi.hpp              | 12 ++++
 .../distributed/partition_helpers_kernels.cpp |  7 +--
 test/mpi/partition_helpers.cpp                | 22 +++++---
 4 files changed, 73 insertions(+), 23 deletions(-)

diff --git a/core/distributed/partition_helpers.cpp b/core/distributed/partition_helpers.cpp
index 3a46461bcf0..4fee81f82b9 100644
--- a/core/distributed/partition_helpers.cpp
+++ b/core/distributed/partition_helpers.cpp
@@ -34,6 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/distributed/partition_helpers.hpp>
 
 
+#include "core/components/fill_array_kernels.hpp"
 #include "core/distributed/partition_helpers_kernels.hpp"
 
 
@@ -44,11 +45,14 @@ namespace partition_helpers {
 namespace {
 
 
-GKO_REGISTER_OPERATION(compress_start_ends,
-                       partition_helpers::compress_start_ends);
+GKO_REGISTER_OPERATION(fill_seq_array, components::fill_seq_array);
+GKO_REGISTER_OPERATION(sort_by_range_start,
+                       partition_helpers::sort_by_range_start);
+GKO_REGISTER_OPERATION(check_consecutive_ranges,
+                       partition_helpers::check_consecutive_ranges);
 
 
-}
+}  // namespace
 }  // namespace partition_helpers
 
 
@@ -61,18 +65,49 @@ build_partition_from_local_range(std::shared_ptr<const Executor> exec,
                                 static_cast<GlobalIndexType>(local_range.end)};
 
     // make all range_start_ends available on each rank
-    auto mpi_exec = (exec == exec->get_master() || mpi::is_gpu_aware())
-                        ? exec
-                        : exec->get_master();
+    auto mpi_exec = exec->get_master();
     array<GlobalIndexType> ranges_start_end(mpi_exec, comm.size() * 2);
-    ranges_start_end.fill(0);
-    comm.all_gather(mpi_exec, range, 2, ranges_start_end.get_data(), 2);
+    ranges_start_end.fill(invalid_index<GlobalIndexType>());
+    MPI_Datatype tmp;
+    MPI_Type_vector(2, 1, comm.size(),
+                    mpi::type_impl<GlobalIndexType>::get_type(), &tmp);
+    MPI_Type_commit(&tmp);
+    comm.all_gather(
+        mpi_exec, range, 1,
+        mpi::contiguous_type(2, mpi::type_impl<GlobalIndexType>::get_type())
+            .get(),
+        ranges_start_end.get_data(), 1, tmp);
+    MPI_Type_free(&tmp);
+    if (comm.rank() == 0) {
+        std::cout << ranges_start_end.get_num_elems() << " ";
+        for (int i = 0; i < comm.size() * 2; ++i) {
+            std::cout << ranges_start_end.get_data()[i] << " ";
+        }
+        std::cout << std::endl;
+    }
+    comm.synchronize();
     ranges_start_end.set_executor(exec);
 
+    // make_sort_by_range_start
+    array<comm_index_type> part_ids(exec, comm.size());
+    exec->run(partition_helpers::make_fill_seq_array(part_ids.get_data(),
+                                                     part_ids.get_num_elems()));
+    exec->run(partition_helpers::make_sort_by_range_start(ranges_start_end,
+                                                          part_ids));
+
+    // check for consistency
+    bool consecutive_ranges = false;
+    exec->run(partition_helpers::make_check_consecutive_ranges(
+        ranges_start_end, &consecutive_ranges));
+    if (!consecutive_ranges) {
+        throw Error(__FILE__, __LINE__, "The partition contains gaps.");
+    }
+
     // remove duplicates
     array<GlobalIndexType> ranges(exec, comm.size() + 1);
-    exec->run(
-        partition_helpers::make_compress_start_ends(ranges_start_end, ranges));
+    exec->copy(1, ranges_start_end.get_data(), ranges.get_data());
+    exec->copy(comm.size(), ranges_start_end.get_data() + comm.size(),
+               ranges.get_data() + 1);
 
     return Partition<LocalIndexType, GlobalIndexType>::build_from_contiguous(
         exec, ranges);
diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp
index bf985cabeb7..40b38b55781 100644
--- a/include/ginkgo/core/base/mpi.hpp
+++ b/include/ginkgo/core/base/mpi.hpp
@@ -1013,6 +1013,18 @@ class communicator {
             this->get()));
     }
 
+
+    void all_gather(std::shared_ptr<const Executor> exec,
+                    const void* send_buffer, const int send_count,
+                    MPI_Datatype send_type, void* recv_buffer,
+                    const int recv_count, MPI_Datatype recv_type) const
+    {
+        auto guard = exec->get_scoped_device_id_guard();
+        GKO_ASSERT_NO_MPI_ERRORS(
+            MPI_Allgather(send_buffer, send_count, send_type, recv_buffer,
+                          recv_count, recv_type, this->get()));
+    }
+
     /**
      * (Non-blocking) Gather data onto all ranks from all ranks in the
      * communicator.
diff --git a/reference/distributed/partition_helpers_kernels.cpp b/reference/distributed/partition_helpers_kernels.cpp
index 882d88509f0..0142021d34d 100644
--- a/reference/distributed/partition_helpers_kernels.cpp
+++ b/reference/distributed/partition_helpers_kernels.cpp
@@ -17,11 +17,10 @@ void compress_start_ends(std::shared_ptr<const DefaultExecutor> exec,
                          array<GlobalIndexType>& ranges)
 {
     if (ranges.get_num_elems() && range_start_ends.get_num_elems()) {
+        auto num_ranges = ranges.get_num_elems() - 1;
         ranges.get_data()[0] = range_start_ends.get_const_data()[0];
-        for (size_type i = 0; i < ranges.get_num_elems() - 1; ++i) {
-            ranges.get_data()[i + 1] =
-                range_start_ends.get_const_data()[2 * i + 1];
-        }
+        std::copy_n(range_start_ends.get_const_data() + num_ranges, num_ranges,
+                    ranges.get_data() + 1);
     }
 }
 
diff --git a/test/mpi/partition_helpers.cpp b/test/mpi/partition_helpers.cpp
index 303d7362856..6f886922e45 100644
--- a/test/mpi/partition_helpers.cpp
+++ b/test/mpi/partition_helpers.cpp
@@ -1,28 +1,32 @@
-#include <ginkgo/core/distributed/partition_helpers.hpp>
 #include <ginkgo/core/distributed/partition.hpp>
+#include <ginkgo/core/distributed/partition_helpers.hpp>
 
 
 #include "core/test/utils.hpp"
 #include "test/utils/mpi/executor.hpp"
 
 
-template<typename IndexType>
-class PartitionHelpers : public CommonMpiTestFixture{
+template <typename IndexType>
+class PartitionHelpers : public CommonMpiTestFixture {
 protected:
     using index_type = IndexType;
-
 };
 
 TYPED_TEST_SUITE(PartitionHelpers, gko::test::IndexTypes);
 
 
-TYPED_TEST(PartitionHelpers, CanBuildFromLocalRanges){
-    using itype = typename TestFixture::index_type ;
+TYPED_TEST(PartitionHelpers, CanBuildFromLocalRanges)
+{
+    using itype = typename TestFixture::index_type;
     gko::span local_range[] = {{0u, 4u}, {4u, 9u}, {9u, 11u}};
     gko::array<itype> expects{this->exec, {0, 4, 9, 11}};
 
-    auto part = gko::experimental::distributed::build_partition_from_local_range<gko::int32, itype>(this->exec, local_range[this->comm.rank()], this->comm);
+    auto part =
+        gko::experimental::distributed::build_partition_from_local_range<
+            gko::int32, itype>(this->exec, local_range[this->comm.rank()],
+                               this->comm);
 
-    GKO_ASSERT_ARRAY_EQ(expects,
-                        gko::make_const_array_view(this->exec, expects.get_num_elems(), part->get_range_bounds()));
+    GKO_ASSERT_ARRAY_EQ(
+        expects, gko::make_const_array_view(this->exec, expects.get_num_elems(),
+                                            part->get_range_bounds()));
 }

From 0c7f4ba7d4f04c5c16a019f0b974e25f9fbd7f0f Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Mon, 12 Dec 2022 17:41:33 +0100
Subject: [PATCH 198/583] fixes ranges gathering

---
 core/distributed/partition_helpers.cpp | 26 +++++++-------------------
 1 file changed, 7 insertions(+), 19 deletions(-)

diff --git a/core/distributed/partition_helpers.cpp b/core/distributed/partition_helpers.cpp
index 4fee81f82b9..1f380f3631f 100644
--- a/core/distributed/partition_helpers.cpp
+++ b/core/distributed/partition_helpers.cpp
@@ -68,24 +68,12 @@ build_partition_from_local_range(std::shared_ptr<const Executor> exec,
     auto mpi_exec = exec->get_master();
     array<GlobalIndexType> ranges_start_end(mpi_exec, comm.size() * 2);
     ranges_start_end.fill(invalid_index<GlobalIndexType>());
-    MPI_Datatype tmp;
-    MPI_Type_vector(2, 1, comm.size(),
-                    mpi::type_impl<GlobalIndexType>::get_type(), &tmp);
-    MPI_Type_commit(&tmp);
-    comm.all_gather(
-        mpi_exec, range, 1,
-        mpi::contiguous_type(2, mpi::type_impl<GlobalIndexType>::get_type())
-            .get(),
-        ranges_start_end.get_data(), 1, tmp);
-    MPI_Type_free(&tmp);
-    if (comm.rank() == 0) {
-        std::cout << ranges_start_end.get_num_elems() << " ";
-        for (int i = 0; i < comm.size() * 2; ++i) {
-            std::cout << ranges_start_end.get_data()[i] << " ";
-        }
-        std::cout << std::endl;
-    }
-    comm.synchronize();
+    std::vector<mpi::request> reqs;
+    reqs.push_back(comm.i_all_gather(mpi_exec, &range[0], 1,
+                                     ranges_start_end.get_data(), 1));
+    reqs.push_back(comm.i_all_gather(
+        mpi_exec, &range[1], 1, ranges_start_end.get_data() + comm.size(), 1));
+    mpi::wait_all(reqs);
     ranges_start_end.set_executor(exec);
 
     // make_sort_by_range_start
@@ -110,7 +98,7 @@ build_partition_from_local_range(std::shared_ptr<const Executor> exec,
                ranges.get_data() + 1);
 
     return Partition<LocalIndexType, GlobalIndexType>::build_from_contiguous(
-        exec, ranges);
+        exec, ranges, part_ids);
 }
 
 #define GKO_DECLARE_BUILD_PARTITION_FROM_LOCAL_RANGE(_local_type,          \

From 2c448c67cddc72bbbb9cef9bde837d198f5eef07 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Mon, 12 Dec 2022 17:41:57 +0100
Subject: [PATCH 199/583] adds MPI tests

---
 test/mpi/partition_helpers.cpp | 53 ++++++++++++++++++++++++++++++++--
 1 file changed, 50 insertions(+), 3 deletions(-)

diff --git a/test/mpi/partition_helpers.cpp b/test/mpi/partition_helpers.cpp
index 6f886922e45..506991c6e15 100644
--- a/test/mpi/partition_helpers.cpp
+++ b/test/mpi/partition_helpers.cpp
@@ -6,6 +6,9 @@
 #include "test/utils/mpi/executor.hpp"
 
 
+using comm_index_type = gko::experimental::distributed::comm_index_type;
+
+
 template <typename IndexType>
 class PartitionHelpers : public CommonMpiTestFixture {
 protected:
@@ -19,7 +22,31 @@ TYPED_TEST(PartitionHelpers, CanBuildFromLocalRanges)
 {
     using itype = typename TestFixture::index_type;
     gko::span local_range[] = {{0u, 4u}, {4u, 9u}, {9u, 11u}};
-    gko::array<itype> expects{this->exec, {0, 4, 9, 11}};
+    gko::array<itype> expects_ranges{this->exec, {0, 4, 9, 11}};
+    gko::array<comm_index_type> expects_pid{this->exec, {0, 1, 2}};
+
+
+    auto part =
+        gko::experimental::distributed::build_partition_from_local_range<
+            gko::int32, itype>(this->exec, local_range[this->comm.rank()],
+                               this->comm);
+
+    GKO_ASSERT_ARRAY_EQ(
+        expects_ranges,
+        gko::make_const_array_view(this->exec, expects_ranges.get_num_elems(),
+                                   part->get_range_bounds()));
+    GKO_ASSERT_ARRAY_EQ(
+        expects_pid,
+        gko::make_const_array_view(this->exec, expects_pid.get_num_elems(),
+                                   part->get_part_ids()));
+}
+
+TYPED_TEST(PartitionHelpers, CanBuildFromLocalRangesUnsorted)
+{
+    using itype = typename TestFixture::index_type;
+    gko::span local_range[] = {{4u, 9u}, {9u, 11u}, {0u, 4u}};
+    gko::array<itype> expects_ranges{this->exec, {0, 4, 9, 11}};
+    gko::array<comm_index_type> expects_pid{this->exec, {2, 0, 1}};
 
     auto part =
         gko::experimental::distributed::build_partition_from_local_range<
@@ -27,6 +54,26 @@ TYPED_TEST(PartitionHelpers, CanBuildFromLocalRanges)
                                this->comm);
 
     GKO_ASSERT_ARRAY_EQ(
-        expects, gko::make_const_array_view(this->exec, expects.get_num_elems(),
-                                            part->get_range_bounds()));
+        expects_ranges,
+        gko::make_const_array_view(this->exec, expects_ranges.get_num_elems(),
+                                   part->get_range_bounds()));
+    GKO_ASSERT_ARRAY_EQ(
+        expects_pid,
+        gko::make_const_array_view(this->exec, expects_pid.get_num_elems(),
+                                   part->get_part_ids()));
+}
+
+
+TYPED_TEST(PartitionHelpers, CanBuildFromLocalRangesThrowsOnGap)
+{
+    using itype = typename TestFixture::index_type;
+    gko::span local_range[] = {{4u, 6u}, {9u, 11u}, {0u, 4u}};
+    auto build_from_local_ranges = [](auto... args) {
+        return gko::experimental::distributed::build_partition_from_local_range<
+            gko::int32, itype>(args...);
+    };
+
+    ASSERT_THROW(build_from_local_ranges(
+                     this->exec, local_range[this->comm.rank()], this->comm),
+                 gko::Error);
 }

From 20c43b5058e078473b06bb1690ae7a520af3bf1a Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Mon, 12 Dec 2022 17:43:18 +0100
Subject: [PATCH 200/583] removes dead code

---
 .../distributed/partition_helpers_kernels.cpp | 30 -------------------
 core/device_hooks/common_kernels.inc.cpp      |  1 -
 .../distributed/partition_helpers_kernels.hpp |  7 -----
 .../distributed/partition_helpers_kernels.cpp | 18 -----------
 4 files changed, 56 deletions(-)

diff --git a/common/unified/distributed/partition_helpers_kernels.cpp b/common/unified/distributed/partition_helpers_kernels.cpp
index 437d1590a43..6858b58d7ca 100644
--- a/common/unified/distributed/partition_helpers_kernels.cpp
+++ b/common/unified/distributed/partition_helpers_kernels.cpp
@@ -11,36 +11,6 @@ namespace GKO_DEVICE_NAMESPACE {
 namespace partition_helpers {
 
 
-template <typename GlobalIndexType>
-void compress_start_ends(std::shared_ptr<const DefaultExecutor> exec,
-                         const array<GlobalIndexType>& range_start_ends,
-                         array<GlobalIndexType>& ranges)
-{
-    run_kernel(
-        exec,
-        [] GKO_KERNEL(auto i, auto size, const auto* range_start_ends,
-                      auto* ranges) {
-            if (i == 0) {
-                ranges[0] = range_start_ends[0];
-            }
-            if (i != size - 1) {
-                ranges[i + 1] = range_start_ends[2 * i + 1];
-            }
-        },
-        ranges.get_num_elems() - 1, ranges.get_num_elems(),
-        range_start_ends.get_const_data(), ranges.get_data());
-}
-
-
-template <typename GlobalIndexType>
-void check_consecutive_ranges()
-{}
-
-
-GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
-    GKO_DECLARE_PARTITION_HELPERS_COMPRESS_START_ENDS);
-
-
 template <typename GlobalIndexType>
 void sort_by_range_start(std::shared_ptr<const DefaultExecutor> exec,
                          array<GlobalIndexType>& range_start_ends,
diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp
index 2e37ae53e20..51bcf9d9587 100644
--- a/core/device_hooks/common_kernels.inc.cpp
+++ b/core/device_hooks/common_kernels.inc.cpp
@@ -259,7 +259,6 @@ GKO_STUB_LOCAL_GLOBAL_TYPE(GKO_DECLARE_PARTITION_IS_ORDERED);
 namespace partition_helpers {
 
 
-GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_HELPERS_COMPRESS_START_ENDS);
 GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_HELPERS_SORT_BY_RANGE_START);
 GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_HELPERS_CHECK_CONSECUTIVE_RANGES);
 
diff --git a/core/distributed/partition_helpers_kernels.hpp b/core/distributed/partition_helpers_kernels.hpp
index 08af876c06f..db7cd429ef9 100644
--- a/core/distributed/partition_helpers_kernels.hpp
+++ b/core/distributed/partition_helpers_kernels.hpp
@@ -12,11 +12,6 @@ namespace gko {
 namespace kernels {
 
 
-#define GKO_DECLARE_PARTITION_HELPERS_COMPRESS_START_ENDS(_type)          \
-    void compress_start_ends(std::shared_ptr<const DefaultExecutor> exec, \
-                             const array<_type>& range_start_ends,        \
-                             array<_type>& ranges)
-
 #define GKO_DECLARE_PARTITION_HELPERS_SORT_BY_RANGE_START(_type) \
     void sort_by_range_start(                                    \
         std::shared_ptr<const DefaultExecutor> exec,             \
@@ -31,8 +26,6 @@ namespace kernels {
 
 
 #define GKO_DECLARE_ALL_AS_TEMPLATES                                    \
-    template <typename GlobalIndexType>                                 \
-    GKO_DECLARE_PARTITION_HELPERS_COMPRESS_START_ENDS(GlobalIndexType); \
     template <typename GlobalIndexType>                                 \
     GKO_DECLARE_PARTITION_HELPERS_SORT_BY_RANGE_START(GlobalIndexType); \
     template <typename GlobalIndexType>                                 \
diff --git a/reference/distributed/partition_helpers_kernels.cpp b/reference/distributed/partition_helpers_kernels.cpp
index 0142021d34d..c8797682ef1 100644
--- a/reference/distributed/partition_helpers_kernels.cpp
+++ b/reference/distributed/partition_helpers_kernels.cpp
@@ -11,24 +11,6 @@ namespace reference {
 namespace partition_helpers {
 
 
-template <typename GlobalIndexType>
-void compress_start_ends(std::shared_ptr<const DefaultExecutor> exec,
-                         const array<GlobalIndexType>& range_start_ends,
-                         array<GlobalIndexType>& ranges)
-{
-    if (ranges.get_num_elems() && range_start_ends.get_num_elems()) {
-        auto num_ranges = ranges.get_num_elems() - 1;
-        ranges.get_data()[0] = range_start_ends.get_const_data()[0];
-        std::copy_n(range_start_ends.get_const_data() + num_ranges, num_ranges,
-                    ranges.get_data() + 1);
-    }
-}
-
-
-GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
-    GKO_DECLARE_PARTITION_HELPERS_COMPRESS_START_ENDS);
-
-
 template <typename GlobalIndexType>
 void sort_by_range_start(
     std::shared_ptr<const DefaultExecutor> exec,

From 80d86c03fc7a3ca8bdb6b5f5e156ebce08125042 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Mon, 19 Dec 2022 17:14:17 +0100
Subject: [PATCH 201/583] adds device consecutive ranges check

---
 .../distributed/partition_helpers_kernels.cpp |  20 ++-
 test/distributed/partition_helper_kernels.cpp | 132 ++++++++++++++----
 2 files changed, 126 insertions(+), 26 deletions(-)

diff --git a/common/unified/distributed/partition_helpers_kernels.cpp b/common/unified/distributed/partition_helpers_kernels.cpp
index 6858b58d7ca..4c1401666b4 100644
--- a/common/unified/distributed/partition_helpers_kernels.cpp
+++ b/common/unified/distributed/partition_helpers_kernels.cpp
@@ -3,6 +3,7 @@
 
 
 #include "common/unified/base/kernel_launch.hpp"
+#include "common/unified/base/kernel_launch_reduction.hpp"
 
 
 namespace gko {
@@ -24,7 +25,24 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
 template <typename GlobalIndexType>
 void check_consecutive_ranges(std::shared_ptr<const DefaultExecutor> exec,
                               array<GlobalIndexType>& range_start_ends,
-                              bool* result) GKO_NOT_IMPLEMENTED;
+                              bool* result)
+{
+    array<uint32> result_uint32{exec, 1};
+    auto num_ranges = range_start_ends.get_num_elems() / 2;
+    run_kernel_reduction(
+        exec,
+        [] GKO_KERNEL(const auto i, const auto* starts, const auto* ends) {
+            return starts[i + 1] == ends[i];
+        },
+        [] GKO_KERNEL(const auto a, const auto b) {
+            return static_cast<uint32>(a && b);
+        },
+        [] GKO_KERNEL(auto x) { return x; }, static_cast<uint32>(true),
+        result_uint32.get_data(), num_ranges - 1, range_start_ends.get_data(),
+        range_start_ends.get_data() + num_ranges);
+    *result =
+        static_cast<bool>(exec->copy_val_to_host(result_uint32.get_data()));
+}
 
 GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
     GKO_DECLARE_PARTITION_HELPERS_CHECK_CONSECUTIVE_RANGES);
diff --git a/test/distributed/partition_helper_kernels.cpp b/test/distributed/partition_helper_kernels.cpp
index c52ba65e5c7..a1a270cbe14 100644
--- a/test/distributed/partition_helper_kernels.cpp
+++ b/test/distributed/partition_helper_kernels.cpp
@@ -44,7 +44,72 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "test/utils/executor.hpp"
 
 
-using comm_index_type = gko::experimental::distributed::comm_index_type;
+template <typename IndexType>
+std::pair<std::vector<IndexType>, std::vector<IndexType>> create_ranges(
+    gko::size_type num_ranges)
+{
+    std::default_random_engine engine;
+    std::uniform_int_distribution<IndexType> dist(5, 10);
+    std::vector<IndexType> range_sizes(num_ranges);
+    std::generate(range_sizes.begin(), range_sizes.end(),
+                  [&]() { return dist(engine); });
+
+    std::vector<IndexType> range_offsets(num_ranges + 1, 0);
+    std::partial_sum(range_sizes.begin(), range_sizes.end(),
+                     range_offsets.begin() + 1);
+
+    std::vector<IndexType> range_starts(num_ranges);
+    std::vector<IndexType> range_ends(num_ranges);
+    std::copy_n(range_offsets.begin(), num_ranges, range_starts.begin());
+    std::copy_n(range_offsets.begin() + 1, num_ranges, range_ends.begin());
+
+    return {std::move(range_starts), std::move(range_ends)};
+}
+
+
+std::vector<std::size_t> sample_unique(std::size_t min, std::size_t max,
+                                       gko::size_type n)
+{
+    std::default_random_engine engine;
+    std::vector<std::size_t> values(std::clamp(max - min, 0ul, max));
+    std::iota(values.begin(), values.end(), min);
+
+    std::shuffle(values.begin(), values.end(), engine);
+
+    values.erase(values.begin() + std::clamp(n, 0ul, values.size()), values.end());
+
+    return values;
+}
+
+
+template <typename IndexType>
+std::vector<IndexType> remove_indices(const std::vector<IndexType>& source,
+                                      std::vector<std::size_t> idxs)
+{
+    std::sort(idxs.begin(), idxs.end(), std::greater<>{});
+    auto result = source;
+    for (auto idx : idxs) {
+        result.erase(result.begin() + idx);
+    }
+    return result;
+}
+
+
+template <typename IndexType>
+gko::array<IndexType> concat_start_end(
+    std::shared_ptr<const gko::Executor> exec,
+    const std::pair<std::vector<IndexType>, std::vector<IndexType>>& start_ends)
+{
+    gko::size_type num_ranges = start_ends.first.size();
+    gko::array<IndexType> concat(exec, num_ranges * 2);
+
+    exec->copy_from(exec->get_master().get(), num_ranges,
+                    start_ends.first.data(), concat.get_data());
+    exec->copy_from(exec->get_master().get(), num_ranges,
+                    start_ends.second.data(), concat.get_data() + num_ranges);
+
+    return concat;
+}
 
 
 template <typename IndexType>
@@ -56,43 +121,60 @@ class PartitionHelpers : public CommonTestFixture {
 TYPED_TEST_SUITE(PartitionHelpers, gko::test::IndexTypes);
 
 
-TYPED_TEST(PartitionHelpers, CanCompressStartEndsWithOneRange)
+TYPED_TEST(PartitionHelpers, CanCheckConsecutiveRanges)
 {
-    using itype = typename TestFixture::index_type;
-    gko::array<itype> start_ends{this->exec, {0, 3}};
-    gko::array<itype> expects{this->exec, {0, 3}};
-    gko::array<itype> result{this->exec, expects.get_num_elems()};
+    using index_type = typename TestFixture::index_type;
+    auto start_ends =
+        concat_start_end(this->exec, create_ranges<index_type>(100));
+    bool result = false;
+
+    gko::kernels::EXEC_NAMESPACE::partition_helpers::check_consecutive_ranges(
+        this->exec, start_ends, &result);
+
+    ASSERT_TRUE(result);
+}
 
-    gko::kernels::EXEC_NAMESPACE::partition_helpers::compress_start_ends(
-        this->exec, start_ends, result);
 
-    GKO_ASSERT_ARRAY_EQ(result, expects);
+TYPED_TEST(PartitionHelpers, CanCheckNonConsecutiveRanges)
+{
+    using index_type = typename TestFixture::index_type;
+    auto full_range_ends = create_ranges<index_type>(100);
+    auto removal_idxs = sample_unique(0, full_range_ends.first.size(), 4);
+    auto start_ends = concat_start_end(
+        this->ref,
+        std::make_pair(remove_indices(full_range_ends.first, removal_idxs),
+                       remove_indices(full_range_ends.second, removal_idxs)));
+    bool result = true;
+
+    gko::kernels::EXEC_NAMESPACE::partition_helpers::check_consecutive_ranges(
+        this->exec, start_ends, &result);
+
+    ASSERT_FALSE(result);
 }
 
 
-TYPED_TEST(PartitionHelpers, CanCompressStartEndsWithMultipleRanges)
+TYPED_TEST(PartitionHelpers, CanCheckConsecutiveRangesWithSingleRange)
 {
-    using itype = typename TestFixture::index_type;
-    gko::array<itype> start_ends{this->exec, {0, 3, 3, 7, 7, 10}};
-    gko::array<itype> expects{this->exec, {0, 3, 7, 10}};
-    gko::array<itype> result{this->exec, expects.get_num_elems()};
+    using index_type = typename TestFixture::index_type;
+    auto start_ends = concat_start_end(
+        this->ref,create_ranges<index_type>(1));
+    bool result = false;
 
-    gko::kernels::EXEC_NAMESPACE::partition_helpers::compress_start_ends(
-        this->exec, start_ends, result);
+    gko::kernels::EXEC_NAMESPACE::partition_helpers::check_consecutive_ranges(
+        this->exec, start_ends, &result);
 
-    GKO_ASSERT_ARRAY_EQ(result, expects);
+    ASSERT_TRUE(result);
 }
 
 
-TYPED_TEST(PartitionHelpers, CanCompressStartEndsWithZeroRange)
+TYPED_TEST(PartitionHelpers, CanCheckConsecutiveRangesWithSingleElement)
 {
-    using itype = typename TestFixture::index_type;
-    gko::array<itype> start_ends{this->exec};
-    gko::array<itype> expects{this->exec, {0}};
-    gko::array<itype> result{this->exec, {0}};
+    using index_type = typename TestFixture::index_type;
+    auto start_ends = gko::array<index_type >(this->exec, {1});
+    bool result = false;
 
-    gko::kernels::EXEC_NAMESPACE::partition_helpers::compress_start_ends(
-        this->exec, start_ends, result);
+    gko::kernels::EXEC_NAMESPACE::partition_helpers::check_consecutive_ranges(
+        this->exec, start_ends, &result);
 
-    GKO_ASSERT_ARRAY_EQ(result, expects);
+    ASSERT_TRUE(result);
 }

From 2a3c35c7e9b9856c011f2cda541e7089a0dcf3c7 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Mon, 19 Dec 2022 17:20:49 +0100
Subject: [PATCH 202/583] add omp sorting kernel

---
 .../distributed/partition_helpers_kernels.cpp | 10 -----
 cuda/CMakeLists.txt                           |  1 +
 cuda/distributed/partition_helpers_kernels.cu | 24 ++++++++++++
 dpcpp/CMakeLists.txt                          |  1 +
 .../partition_helpers_kernels.dp.cpp          | 24 ++++++++++++
 hip/CMakeLists.txt                            |  1 +
 .../partition_helpers_kernels.hip.cpp         | 24 ++++++++++++
 omp/CMakeLists.txt                            |  1 +
 omp/distributed/partition_helpers_kernels.cpp | 39 +++++++++++++++++++
 9 files changed, 115 insertions(+), 10 deletions(-)
 create mode 100644 cuda/distributed/partition_helpers_kernels.cu
 create mode 100644 dpcpp/distributed/partition_helpers_kernels.dp.cpp
 create mode 100644 hip/distributed/partition_helpers_kernels.hip.cpp
 create mode 100644 omp/distributed/partition_helpers_kernels.cpp

diff --git a/common/unified/distributed/partition_helpers_kernels.cpp b/common/unified/distributed/partition_helpers_kernels.cpp
index 4c1401666b4..8b891dd3cb2 100644
--- a/common/unified/distributed/partition_helpers_kernels.cpp
+++ b/common/unified/distributed/partition_helpers_kernels.cpp
@@ -12,16 +12,6 @@ namespace GKO_DEVICE_NAMESPACE {
 namespace partition_helpers {
 
 
-template <typename GlobalIndexType>
-void sort_by_range_start(std::shared_ptr<const DefaultExecutor> exec,
-                         array<GlobalIndexType>& range_start_ends,
-                         array<experimental::distributed::comm_index_type>&
-                             part_ids) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
-    GKO_DECLARE_PARTITION_HELPERS_SORT_BY_RANGE_START);
-
-
 template <typename GlobalIndexType>
 void check_consecutive_ranges(std::shared_ptr<const DefaultExecutor> exec,
                               array<GlobalIndexType>& range_start_ends,
diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt
index 764f47afb83..4c972d2a584 100644
--- a/cuda/CMakeLists.txt
+++ b/cuda/CMakeLists.txt
@@ -21,6 +21,7 @@ target_sources(ginkgo_cuda
     base/version.cpp
     components/prefix_sum_kernels.cu
     distributed/matrix_kernels.cu
+    distributed/partition_helpers_kernels.cu
     distributed/partition_kernels.cu
     distributed/vector_kernels.cu
     factorization/cholesky_kernels.cu
diff --git a/cuda/distributed/partition_helpers_kernels.cu b/cuda/distributed/partition_helpers_kernels.cu
new file mode 100644
index 00000000000..57729516508
--- /dev/null
+++ b/cuda/distributed/partition_helpers_kernels.cu
@@ -0,0 +1,24 @@
+
+#include "core/distributed/partition_helpers_kernels.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace cuda {
+namespace partition_helpers {
+
+
+template <typename GlobalIndexType>
+void sort_by_range_start(std::shared_ptr<const DefaultExecutor> exec,
+                         array<GlobalIndexType>& range_start_ends,
+                         array<experimental::distributed::comm_index_type>&
+                             part_ids) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
+    GKO_DECLARE_PARTITION_HELPERS_SORT_BY_RANGE_START);
+
+
+}  // namespace partition_helpers
+}  // namespace cuda
+}  // namespace kernels
+}  // namespace gko
diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt
index b70175c6b12..dd0d7c4cdfb 100644
--- a/dpcpp/CMakeLists.txt
+++ b/dpcpp/CMakeLists.txt
@@ -18,6 +18,7 @@ target_sources(ginkgo_dpcpp
     base/version.dp.cpp
     components/prefix_sum_kernels.dp.cpp
     distributed/matrix_kernels.dp.cpp
+    distributed/partition_helpers_kernels.dp.cpp
     distributed/partition_kernels.dp.cpp
     distributed/vector_kernels.dp.cpp
     factorization/cholesky_kernels.dp.cpp
diff --git a/dpcpp/distributed/partition_helpers_kernels.dp.cpp b/dpcpp/distributed/partition_helpers_kernels.dp.cpp
new file mode 100644
index 00000000000..e8dbe8444d8
--- /dev/null
+++ b/dpcpp/distributed/partition_helpers_kernels.dp.cpp
@@ -0,0 +1,24 @@
+
+#include "core/distributed/partition_helpers_kernels.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace dpcpp {
+namespace partition_helpers {
+
+
+template <typename GlobalIndexType>
+void sort_by_range_start(std::shared_ptr<const DefaultExecutor> exec,
+                         array<GlobalIndexType>& range_start_ends,
+                         array<experimental::distributed::comm_index_type>&
+                             part_ids) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
+    GKO_DECLARE_PARTITION_HELPERS_SORT_BY_RANGE_START);
+
+
+}  // namespace partition_helpers
+}  // namespace dpcpp
+}  // namespace kernels
+}  // namespace gko
diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt
index 5ec1718ca4d..779db13d36a 100644
--- a/hip/CMakeLists.txt
+++ b/hip/CMakeLists.txt
@@ -18,6 +18,7 @@ set(GINKGO_HIP_SOURCES
     base/version.hip.cpp
     components/prefix_sum_kernels.hip.cpp
     distributed/matrix_kernels.hip.cpp
+    distributed/partition_helpers_kernels.hip.cpp
     distributed/partition_kernels.hip.cpp
     distributed/vector_kernels.hip.cpp
     factorization/cholesky_kernels.hip.cpp
diff --git a/hip/distributed/partition_helpers_kernels.hip.cpp b/hip/distributed/partition_helpers_kernels.hip.cpp
new file mode 100644
index 00000000000..99f3b711794
--- /dev/null
+++ b/hip/distributed/partition_helpers_kernels.hip.cpp
@@ -0,0 +1,24 @@
+
+#include "core/distributed/partition_helpers_kernels.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace hip {
+namespace partition_helpers {
+
+
+template <typename GlobalIndexType>
+void sort_by_range_start(std::shared_ptr<const DefaultExecutor> exec,
+                         array<GlobalIndexType>& range_start_ends,
+                         array<experimental::distributed::comm_index_type>&
+                             part_ids) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
+    GKO_DECLARE_PARTITION_HELPERS_SORT_BY_RANGE_START);
+
+
+}  // namespace partition_helpers
+}  // namespace hip
+}  // namespace kernels
+}  // namespace gko
diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt
index 02248983385..c689ffc42f3 100644
--- a/omp/CMakeLists.txt
+++ b/omp/CMakeLists.txt
@@ -10,6 +10,7 @@ target_sources(ginkgo_omp
     base/version.cpp
     components/prefix_sum_kernels.cpp
     distributed/matrix_kernels.cpp
+    distributed/partition_helpers_kernels.cpp
     distributed/partition_kernels.cpp
     distributed/vector_kernels.cpp
     factorization/cholesky_kernels.cpp
diff --git a/omp/distributed/partition_helpers_kernels.cpp b/omp/distributed/partition_helpers_kernels.cpp
new file mode 100644
index 00000000000..3216782f2ac
--- /dev/null
+++ b/omp/distributed/partition_helpers_kernels.cpp
@@ -0,0 +1,39 @@
+
+#include "core/distributed/partition_helpers_kernels.hpp"
+
+
+#include "core/base/iterator_factory.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace omp {
+namespace partition_helpers {
+
+
+template <typename GlobalIndexType>
+void sort_by_range_start(
+    std::shared_ptr<const DefaultExecutor> exec,
+    array<GlobalIndexType>& range_start_ends,
+    array<experimental::distributed::comm_index_type>& part_ids)
+{
+    auto part_ids_d = part_ids.get_data();
+    auto num_parts = part_ids.get_num_elems();
+    auto range_starts = range_start_ends.get_data();
+    auto range_ends = range_starts + num_parts;
+    auto sort_it =
+        detail::make_zip_iterator(range_starts, range_ends, part_ids_d);
+    // TODO: use TBB or parallel std with c++17
+    std::sort(sort_it, sort_it + num_parts, [](const auto& a, const auto& b) {
+        return std::get<0>(a) < std::get<0>(b);
+    });
+}
+
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
+    GKO_DECLARE_PARTITION_HELPERS_SORT_BY_RANGE_START);
+
+
+}  // namespace partition_helpers
+}  // namespace omp
+}  // namespace kernels
+}  // namespace gko

From 0474cbfcf64e5fbfa5a3c4a9adc89631badbc893 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Tue, 20 Dec 2022 09:23:39 +0100
Subject: [PATCH 203/583] fixes tests

---
 test/distributed/partition_helper_kernels.cpp | 31 ++++++++++++++-----
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/test/distributed/partition_helper_kernels.cpp b/test/distributed/partition_helper_kernels.cpp
index a1a270cbe14..eeefdd415a7 100644
--- a/test/distributed/partition_helper_kernels.cpp
+++ b/test/distributed/partition_helper_kernels.cpp
@@ -44,6 +44,25 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "test/utils/executor.hpp"
 
 
+using gko::experimental::distributed::comm_index_type;
+
+
+// TODO: remove with c++17
+template<typename T>
+T clamp(const T&v, const T& lo, const T& hi){
+    return v < lo ? lo : (v > hi ? hi : v);
+}
+
+
+template <typename IndexType>
+std::vector<IndexType> create_iota(IndexType min, IndexType max)
+{
+    std::vector<IndexType> iota(clamp(max - min, 0ul, max));
+    std::iota(iota.begin(), iota.end(), min);
+    return iota;
+}
+
+
 template <typename IndexType>
 std::pair<std::vector<IndexType>, std::vector<IndexType>> create_ranges(
     gko::size_type num_ranges)
@@ -71,13 +90,9 @@ std::vector<std::size_t> sample_unique(std::size_t min, std::size_t max,
                                        gko::size_type n)
 {
     std::default_random_engine engine;
-    std::vector<std::size_t> values(std::clamp(max - min, 0ul, max));
-    std::iota(values.begin(), values.end(), min);
-
+    auto values = create_iota(min, max);
     std::shuffle(values.begin(), values.end(), engine);
-
-    values.erase(values.begin() + std::clamp(n, 0ul, values.size()), values.end());
-
+    values.erase(values.begin() + clamp(n, 0ul, values.size()), values.end());
     return values;
 }
 
@@ -141,7 +156,7 @@ TYPED_TEST(PartitionHelpers, CanCheckNonConsecutiveRanges)
     auto full_range_ends = create_ranges<index_type>(100);
     auto removal_idxs = sample_unique(0, full_range_ends.first.size(), 4);
     auto start_ends = concat_start_end(
-        this->ref,
+        this->exec,
         std::make_pair(remove_indices(full_range_ends.first, removal_idxs),
                        remove_indices(full_range_ends.second, removal_idxs)));
     bool result = true;
@@ -170,7 +185,7 @@ TYPED_TEST(PartitionHelpers, CanCheckConsecutiveRangesWithSingleRange)
 TYPED_TEST(PartitionHelpers, CanCheckConsecutiveRangesWithSingleElement)
 {
     using index_type = typename TestFixture::index_type;
-    auto start_ends = gko::array<index_type >(this->exec, {1});
+    auto start_ends = gko::array<index_type>(this->exec, {1});
     bool result = false;
 
     gko::kernels::EXEC_NAMESPACE::partition_helpers::check_consecutive_ranges(

From 3272d89cb1e88abeca981654ef3feab87ec29551 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Tue, 20 Dec 2022 11:11:42 +0100
Subject: [PATCH 204/583] adds cuda/hip/dpcpp sort kernels

---
 .../partition_helpers_kernels.hpp.inc         | 48 +++++++++++
 .../distributed/partition_helpers_kernels.cpp | 32 ++++++++
 .../distributed/partition_helpers_kernels.hpp | 32 ++++++++
 cuda/distributed/partition_helpers_kernels.cu | 47 +++++++++--
 .../partition_helpers_kernels.dp.cpp          | 57 ++++++++++++-
 .../partition_helpers_kernels.hip.cpp         | 46 +++++++++--
 omp/distributed/partition_helpers_kernels.cpp | 32 ++++++++
 .../distributed/partition_helpers_kernels.cpp | 32 ++++++++
 test/distributed/partition_helper_kernels.cpp | 80 +++++++++++++++++--
 test/mpi/partition_helpers.cpp                | 32 ++++++++
 10 files changed, 410 insertions(+), 28 deletions(-)
 create mode 100644 common/cuda_hip/distributed/partition_helpers_kernels.hpp.inc

diff --git a/common/cuda_hip/distributed/partition_helpers_kernels.hpp.inc b/common/cuda_hip/distributed/partition_helpers_kernels.hpp.inc
new file mode 100644
index 00000000000..ca12b9a2bd1
--- /dev/null
+++ b/common/cuda_hip/distributed/partition_helpers_kernels.hpp.inc
@@ -0,0 +1,48 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+template <typename GlobalIndexType>
+void sort_by_range_start(
+    std::shared_ptr<const DefaultExecutor> exec,
+    array<GlobalIndexType>& range_start_ends,
+    array<experimental::distributed::comm_index_type>& part_ids)
+{
+    auto num_ranges = range_start_ends.get_num_elems() / 2;
+    auto starts = thrust::device_pointer_cast(range_start_ends.get_data());
+    auto ends = starts + num_ranges;
+    auto zip_it = thrust::make_zip_iterator(thrust::make_tuple(
+        ends, thrust::device_pointer_cast(part_ids.get_data())));
+    thrust::sort_by_key(thrust::device, starts, starts + num_ranges, zip_it);
+}
+
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
+    GKO_DECLARE_PARTITION_HELPERS_SORT_BY_RANGE_START);
diff --git a/common/unified/distributed/partition_helpers_kernels.cpp b/common/unified/distributed/partition_helpers_kernels.cpp
index 8b891dd3cb2..e7e37bfd9db 100644
--- a/common/unified/distributed/partition_helpers_kernels.cpp
+++ b/common/unified/distributed/partition_helpers_kernels.cpp
@@ -1,3 +1,35 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
 
 #include "core/distributed/partition_helpers_kernels.hpp"
 
diff --git a/core/distributed/partition_helpers_kernels.hpp b/core/distributed/partition_helpers_kernels.hpp
index db7cd429ef9..b3bacf694f4 100644
--- a/core/distributed/partition_helpers_kernels.hpp
+++ b/core/distributed/partition_helpers_kernels.hpp
@@ -1,3 +1,35 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
 #ifndef GINKGO_PARTITION_HELPERS_KERNELS_HPP
 #define GINKGO_PARTITION_HELPERS_KERNELS_HPP
 
diff --git a/cuda/distributed/partition_helpers_kernels.cu b/cuda/distributed/partition_helpers_kernels.cu
index 57729516508..2687517ad7f 100644
--- a/cuda/distributed/partition_helpers_kernels.cu
+++ b/cuda/distributed/partition_helpers_kernels.cu
@@ -1,21 +1,52 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
 
 #include "core/distributed/partition_helpers_kernels.hpp"
 
 
+#include <thrust/device_ptr.h>
+#include <thrust/execution_policy.h>
+#include <thrust/iterator/zip_iterator.h>
+#include <thrust/sort.h>
+
+
 namespace gko {
 namespace kernels {
 namespace cuda {
 namespace partition_helpers {
 
 
-template <typename GlobalIndexType>
-void sort_by_range_start(std::shared_ptr<const DefaultExecutor> exec,
-                         array<GlobalIndexType>& range_start_ends,
-                         array<experimental::distributed::comm_index_type>&
-                             part_ids) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
-    GKO_DECLARE_PARTITION_HELPERS_SORT_BY_RANGE_START);
+#include "common/cuda_hip/distributed/partition_helpers_kernels.hpp.inc"
 
 
 }  // namespace partition_helpers
diff --git a/dpcpp/distributed/partition_helpers_kernels.dp.cpp b/dpcpp/distributed/partition_helpers_kernels.dp.cpp
index e8dbe8444d8..8aae72cd636 100644
--- a/dpcpp/distributed/partition_helpers_kernels.dp.cpp
+++ b/dpcpp/distributed/partition_helpers_kernels.dp.cpp
@@ -1,3 +1,41 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+// force-top: on
+#include <oneapi/dpl/algorithm>
+#include <oneapi/dpl/execution>
+#include <oneapi/dpl/iterator>
+// force-top: off
+
 
 #include "core/distributed/partition_helpers_kernels.hpp"
 
@@ -9,10 +47,21 @@ namespace partition_helpers {
 
 
 template <typename GlobalIndexType>
-void sort_by_range_start(std::shared_ptr<const DefaultExecutor> exec,
-                         array<GlobalIndexType>& range_start_ends,
-                         array<experimental::distributed::comm_index_type>&
-                             part_ids) GKO_NOT_IMPLEMENTED;
+void sort_by_range_start(
+    std::shared_ptr<const DefaultExecutor> exec,
+    array<GlobalIndexType>& range_start_ends,
+    array<experimental::distributed::comm_index_type>& part_ids)
+{
+    auto policy =
+        oneapi::dpl::execution::make_device_policy(*exec->get_queue());
+    auto num_ranges = range_start_ends.get_num_elems() / 2;
+    auto starts = range_start_ends.get_data();
+    auto ends = starts + num_ranges;
+    auto zip_it =
+        oneapi::dpl::make_zip_iterator(starts, ends, part_ids.get_data());
+    std::sort(policy, zip_it, zip_it + num_ranges,
+              [](auto a, auto b) { return std::get<0>(a) < std::get<0>(b); });
+}
 
 GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
     GKO_DECLARE_PARTITION_HELPERS_SORT_BY_RANGE_START);
diff --git a/hip/distributed/partition_helpers_kernels.hip.cpp b/hip/distributed/partition_helpers_kernels.hip.cpp
index 99f3b711794..d6239650979 100644
--- a/hip/distributed/partition_helpers_kernels.hip.cpp
+++ b/hip/distributed/partition_helpers_kernels.hip.cpp
@@ -1,21 +1,51 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
 
 #include "core/distributed/partition_helpers_kernels.hpp"
 
 
+#include <thrust/device_ptr.h>
+#include <thrust/execution_policy.h>
+#include <thrust/iterator/zip_iterator.h>
+#include <thrust/sort.h>
+
+
 namespace gko {
 namespace kernels {
 namespace hip {
 namespace partition_helpers {
 
 
-template <typename GlobalIndexType>
-void sort_by_range_start(std::shared_ptr<const DefaultExecutor> exec,
-                         array<GlobalIndexType>& range_start_ends,
-                         array<experimental::distributed::comm_index_type>&
-                             part_ids) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
-    GKO_DECLARE_PARTITION_HELPERS_SORT_BY_RANGE_START);
+#include "common/cuda_hip/distributed/partition_helpers_kernels.hpp.inc"
 
 
 }  // namespace partition_helpers
diff --git a/omp/distributed/partition_helpers_kernels.cpp b/omp/distributed/partition_helpers_kernels.cpp
index 3216782f2ac..9e42e8cc888 100644
--- a/omp/distributed/partition_helpers_kernels.cpp
+++ b/omp/distributed/partition_helpers_kernels.cpp
@@ -1,3 +1,35 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
 
 #include "core/distributed/partition_helpers_kernels.hpp"
 
diff --git a/reference/distributed/partition_helpers_kernels.cpp b/reference/distributed/partition_helpers_kernels.cpp
index c8797682ef1..1319c5a3951 100644
--- a/reference/distributed/partition_helpers_kernels.cpp
+++ b/reference/distributed/partition_helpers_kernels.cpp
@@ -1,3 +1,35 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
 
 #include "core/distributed/partition_helpers_kernels.hpp"
 #include <numeric>
diff --git a/test/distributed/partition_helper_kernels.cpp b/test/distributed/partition_helper_kernels.cpp
index eeefdd415a7..64fd1e49b77 100644
--- a/test/distributed/partition_helper_kernels.cpp
+++ b/test/distributed/partition_helper_kernels.cpp
@@ -40,16 +40,22 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/executor.hpp>
 
 
+#include "core/base/iterator_factory.hpp"
 #include "core/test/utils.hpp"
 #include "test/utils/executor.hpp"
 
 
 using gko::experimental::distributed::comm_index_type;
 
+template <typename IndexType>
+using range_container =
+    std::pair<std::vector<IndexType>, std::vector<IndexType>>;
+
 
 // TODO: remove with c++17
-template<typename T>
-T clamp(const T&v, const T& lo, const T& hi){
+template <typename T>
+T clamp(const T& v, const T& lo, const T& hi)
+{
     return v < lo ? lo : (v > hi ? hi : v);
 }
 
@@ -57,15 +63,15 @@ T clamp(const T&v, const T& lo, const T& hi){
 template <typename IndexType>
 std::vector<IndexType> create_iota(IndexType min, IndexType max)
 {
-    std::vector<IndexType> iota(clamp(max - min, 0ul, max));
+    std::vector<IndexType> iota(
+        clamp(max - min, static_cast<IndexType>(0), max));
     std::iota(iota.begin(), iota.end(), min);
     return iota;
 }
 
 
 template <typename IndexType>
-std::pair<std::vector<IndexType>, std::vector<IndexType>> create_ranges(
-    gko::size_type num_ranges)
+range_container<IndexType> create_ranges(gko::size_type num_ranges)
 {
     std::default_random_engine engine;
     std::uniform_int_distribution<IndexType> dist(5, 10);
@@ -113,7 +119,7 @@ std::vector<IndexType> remove_indices(const std::vector<IndexType>& source,
 template <typename IndexType>
 gko::array<IndexType> concat_start_end(
     std::shared_ptr<const gko::Executor> exec,
-    const std::pair<std::vector<IndexType>, std::vector<IndexType>>& start_ends)
+    const range_container<IndexType>& start_ends)
 {
     gko::size_type num_ranges = start_ends.first.size();
     gko::array<IndexType> concat(exec, num_ranges * 2);
@@ -127,6 +133,25 @@ gko::array<IndexType> concat_start_end(
 }
 
 
+template <typename IndexType>
+std::pair<range_container<IndexType>, std::vector<comm_index_type>>
+shuffle_range_and_pid(const range_container<IndexType>& ranges,
+                      const std::vector<comm_index_type>& pid)
+{
+    std::default_random_engine engine;
+
+    auto result = std::make_pair(ranges, pid);
+
+    auto num_ranges = result.second.size();
+    auto zip_it = gko::detail::make_zip_iterator(
+        result.first.first.begin(),
+        result.first.second.begin(),
+        result.second.begin());
+    std::shuffle(zip_it, zip_it + num_ranges, engine);
+
+    return result;
+}
+
 template <typename IndexType>
 class PartitionHelpers : public CommonTestFixture {
 protected:
@@ -171,8 +196,7 @@ TYPED_TEST(PartitionHelpers, CanCheckNonConsecutiveRanges)
 TYPED_TEST(PartitionHelpers, CanCheckConsecutiveRangesWithSingleRange)
 {
     using index_type = typename TestFixture::index_type;
-    auto start_ends = concat_start_end(
-        this->ref,create_ranges<index_type>(1));
+    auto start_ends = concat_start_end(this->ref, create_ranges<index_type>(1));
     bool result = false;
 
     gko::kernels::EXEC_NAMESPACE::partition_helpers::check_consecutive_ranges(
@@ -193,3 +217,43 @@ TYPED_TEST(PartitionHelpers, CanCheckConsecutiveRangesWithSingleElement)
 
     ASSERT_TRUE(result);
 }
+
+
+TYPED_TEST(PartitionHelpers, CanSortConsecutiveRanges)
+{
+    using index_type = typename TestFixture::index_type;
+    auto start_ends =
+        concat_start_end(this->exec, create_ranges<index_type>(100));
+    auto part_ids = create_iota<comm_index_type>(0, 100);
+    auto part_ids_arr = gko::array<comm_index_type>(
+        this->exec, part_ids.begin(), part_ids.end());
+    auto expected_start_ends = start_ends;
+    auto expected_part_ids = part_ids_arr;
+
+    gko::kernels::EXEC_NAMESPACE::partition_helpers::sort_by_range_start(
+        this->exec, start_ends, part_ids_arr);
+
+    GKO_ASSERT_ARRAY_EQ(expected_start_ends, start_ends);
+    GKO_ASSERT_ARRAY_EQ(expected_part_ids, part_ids_arr);
+}
+
+
+TYPED_TEST(PartitionHelpers, CanSortNonConsecutiveRanges)
+{
+    using index_type = typename TestFixture::index_type;
+    auto ranges = create_ranges<index_type>(100);
+    auto part_ids = create_iota(0, 100);
+    auto shuffled = shuffle_range_and_pid(ranges, part_ids);
+    auto expected_start_ends = concat_start_end(this->exec, ranges);
+    auto expected_part_ids = gko::array<comm_index_type>(
+        this->exec, part_ids.begin(), part_ids.end());
+    auto start_ends = concat_start_end(this->exec, shuffled.first);
+    auto part_ids_arr = gko::array<comm_index_type>(
+        this->exec, shuffled.second.begin(), shuffled.second.end());
+
+    gko::kernels::EXEC_NAMESPACE::partition_helpers::sort_by_range_start(
+        this->exec, start_ends, part_ids_arr);
+
+    GKO_ASSERT_ARRAY_EQ(expected_start_ends, start_ends);
+    GKO_ASSERT_ARRAY_EQ(expected_part_ids, part_ids_arr);
+}
diff --git a/test/mpi/partition_helpers.cpp b/test/mpi/partition_helpers.cpp
index 506991c6e15..ef56b4d927b 100644
--- a/test/mpi/partition_helpers.cpp
+++ b/test/mpi/partition_helpers.cpp
@@ -1,3 +1,35 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
 #include <ginkgo/core/distributed/partition.hpp>
 #include <ginkgo/core/distributed/partition_helpers.hpp>
 

From 6d0d3d52ef160c48b7b31bd48d1500a027dfe3c3 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Tue, 20 Dec 2022 11:22:10 +0100
Subject: [PATCH 205/583] adds creator from local sizes

---
 core/distributed/partition_helpers.cpp        | 51 +++++++++++++++++--
 .../core/distributed/partition_helpers.hpp    | 23 ++++++++-
 test/mpi/partition_helpers.cpp                | 36 ++++++++++---
 3 files changed, 97 insertions(+), 13 deletions(-)

diff --git a/core/distributed/partition_helpers.cpp b/core/distributed/partition_helpers.cpp
index 1f380f3631f..5e1a8b7fa7d 100644
--- a/core/distributed/partition_helpers.cpp
+++ b/core/distributed/partition_helpers.cpp
@@ -32,20 +32,33 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/distributed/partition.hpp>
 #include <ginkgo/core/distributed/partition_helpers.hpp>
+#include <numeric>
 
 
 #include "core/components/fill_array_kernels.hpp"
+#include "core/components/prefix_sum_kernels.hpp"
 #include "core/distributed/partition_helpers_kernels.hpp"
 
 
 namespace gko {
 namespace experimental {
 namespace distributed {
-namespace partition_helpers {
+namespace components {
 namespace {
 
 
 GKO_REGISTER_OPERATION(fill_seq_array, components::fill_seq_array);
+GKO_REGISTER_OPERATION(prefix_sum, components::prefix_sum);
+
+
+}  // namespace
+}  // namespace components
+
+
+namespace partition_helpers {
+namespace {
+
+
 GKO_REGISTER_OPERATION(sort_by_range_start,
                        partition_helpers::sort_by_range_start);
 GKO_REGISTER_OPERATION(check_consecutive_ranges,
@@ -59,7 +72,7 @@ GKO_REGISTER_OPERATION(check_consecutive_ranges,
 template <typename LocalIndexType, typename GlobalIndexType>
 std::unique_ptr<Partition<LocalIndexType, GlobalIndexType>>
 build_partition_from_local_range(std::shared_ptr<const Executor> exec,
-                                 span local_range, mpi::communicator comm)
+                                 mpi::communicator comm, span local_range)
 {
     GlobalIndexType range[2] = {static_cast<GlobalIndexType>(local_range.begin),
                                 static_cast<GlobalIndexType>(local_range.end)};
@@ -78,8 +91,8 @@ build_partition_from_local_range(std::shared_ptr<const Executor> exec,
 
     // make_sort_by_range_start
     array<comm_index_type> part_ids(exec, comm.size());
-    exec->run(partition_helpers::make_fill_seq_array(part_ids.get_data(),
-                                                     part_ids.get_num_elems()));
+    exec->run(components::make_fill_seq_array(part_ids.get_data(),
+                                              part_ids.get_num_elems()));
     exec->run(partition_helpers::make_sort_by_range_start(ranges_start_end,
                                                           part_ids));
 
@@ -105,11 +118,39 @@ build_partition_from_local_range(std::shared_ptr<const Executor> exec,
                                                      _global_type)         \
     std::unique_ptr<Partition<_local_type, _global_type>>                  \
     build_partition_from_local_range(std::shared_ptr<const Executor> exec, \
-                                     span local_range, mpi::communicator comm)
+                                     mpi::communicator comm, span local_range)
 GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE(
     GKO_DECLARE_BUILD_PARTITION_FROM_LOCAL_RANGE);
 
 
+template <typename LocalIndexType, typename GlobalIndexType>
+std::unique_ptr<Partition<LocalIndexType, GlobalIndexType>>
+build_partition_from_local_size(std::shared_ptr<const Executor> exec,
+                                mpi::communicator comm, size_type local_size)
+{
+    auto local_size_gi = static_cast<GlobalIndexType>(local_size);
+    std::vector<GlobalIndexType> sizes(comm.size());
+    comm.all_gather(exec, &local_size_gi, 1, sizes.data(), 1);
+
+    std::vector<GlobalIndexType> offsets(comm.size() + 1);
+    offsets[0] = 0;
+    std::partial_sum(sizes.begin(), sizes.end(), offsets.begin() + 1);
+
+    auto ranges =
+        make_array_view(exec->get_master(), offsets.size(), offsets.data());
+    return Partition<LocalIndexType, GlobalIndexType>::build_from_contiguous(
+        exec, ranges);
+}
+
+#define GKO_DECLARE_BUILD_PARTITION_FROM_LOCAL_SIZE(_local_type, _global_type) \
+    std::unique_ptr<Partition<_local_type, _global_type>>                      \
+    build_partition_from_local_size(std::shared_ptr<const Executor> exec,      \
+                                    mpi::communicator comm,                    \
+                                    size_type local_range)
+GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE(
+    GKO_DECLARE_BUILD_PARTITION_FROM_LOCAL_SIZE);
+
+
 }  // namespace distributed
 }  // namespace experimental
 }  // namespace gko
diff --git a/include/ginkgo/core/distributed/partition_helpers.hpp b/include/ginkgo/core/distributed/partition_helpers.hpp
index 93b04af7f6c..8364759567e 100644
--- a/include/ginkgo/core/distributed/partition_helpers.hpp
+++ b/include/ginkgo/core/distributed/partition_helpers.hpp
@@ -56,7 +56,8 @@ class Partition;
  * Builds a partition from a local range.
  *
  * @param exec  the Executor on which the partition should be built.
- * @param local_range the start and end indices of the local range
+ * @param local_range the start and end indices of the local range.
+ * @param comm  the communicator used to determine the global partition.
  *
  * @warning The local ranges have to be continuous and ascending. This means
  *          that for a process `i` with `range[i] = [s_i, e_i)` then for process
@@ -68,7 +69,25 @@ class Partition;
 template <typename LocalIndexType, typename GlobalIndexType>
 std::unique_ptr<Partition<LocalIndexType, GlobalIndexType>>
 build_partition_from_local_range(std::shared_ptr<const Executor> exec,
-                                 span local_range, mpi::communicator comm);
+                                 mpi::communicator comm, span local_range);
+
+
+/**
+ * Builds a partition from a local size.
+ *
+ * @param exec  the Executor on which the partition should be built.
+ * @param local_range the number of the locally owned indices
+ * @param comm  the communicator used to determine the global partition.
+ *
+ * @return a Partition where each range has the specified local size. More
+ *         specifically, if this is called on process i with local_size `s_i`,
+ *         then the range `i` has size `s_i`, and range `r_i = [start, start +
+ *         s_i)`, where `start = sum_j^(i-1) s_j`.
+ */
+template <typename LocalIndexType, typename GlobalIndexType>
+std::unique_ptr<Partition<LocalIndexType, GlobalIndexType>>
+build_partition_from_local_size(std::shared_ptr<const Executor> exec,
+                                mpi::communicator comm, size_type local_size);
 
 
 }  // namespace distributed
diff --git a/test/mpi/partition_helpers.cpp b/test/mpi/partition_helpers.cpp
index ef56b4d927b..72e01da9931 100644
--- a/test/mpi/partition_helpers.cpp
+++ b/test/mpi/partition_helpers.cpp
@@ -60,8 +60,8 @@ TYPED_TEST(PartitionHelpers, CanBuildFromLocalRanges)
 
     auto part =
         gko::experimental::distributed::build_partition_from_local_range<
-            gko::int32, itype>(this->exec, local_range[this->comm.rank()],
-                               this->comm);
+            gko::int32, itype>(this->exec, this->comm,
+                               local_range[this->comm.rank()]);
 
     GKO_ASSERT_ARRAY_EQ(
         expects_ranges,
@@ -73,6 +73,7 @@ TYPED_TEST(PartitionHelpers, CanBuildFromLocalRanges)
                                    part->get_part_ids()));
 }
 
+
 TYPED_TEST(PartitionHelpers, CanBuildFromLocalRangesUnsorted)
 {
     using itype = typename TestFixture::index_type;
@@ -82,8 +83,8 @@ TYPED_TEST(PartitionHelpers, CanBuildFromLocalRangesUnsorted)
 
     auto part =
         gko::experimental::distributed::build_partition_from_local_range<
-            gko::int32, itype>(this->exec, local_range[this->comm.rank()],
-                               this->comm);
+            gko::int32, itype>(this->exec, this->comm,
+                               local_range[this->comm.rank()]);
 
     GKO_ASSERT_ARRAY_EQ(
         expects_ranges,
@@ -100,12 +101,35 @@ TYPED_TEST(PartitionHelpers, CanBuildFromLocalRangesThrowsOnGap)
 {
     using itype = typename TestFixture::index_type;
     gko::span local_range[] = {{4u, 6u}, {9u, 11u}, {0u, 4u}};
+    // Hack because of multiple template arguments in macro
     auto build_from_local_ranges = [](auto... args) {
         return gko::experimental::distributed::build_partition_from_local_range<
             gko::int32, itype>(args...);
     };
 
-    ASSERT_THROW(build_from_local_ranges(
-                     this->exec, local_range[this->comm.rank()], this->comm),
+    ASSERT_THROW(build_from_local_ranges(this->exec, this->comm,
+                                         local_range[this->comm.rank()]),
                  gko::Error);
 }
+
+
+TYPED_TEST(PartitionHelpers, CanBuildFromLocalSize)
+{
+    using itype = typename TestFixture::index_type;
+    gko::size_type local_range[] = {4, 5, 3};
+    gko::array<itype> expects_ranges{this->exec, {0, 4, 9, 12}};
+    gko::array<comm_index_type> expects_pid{this->exec, {0, 1, 2}};
+
+    auto part = gko::experimental::distributed::build_partition_from_local_size<
+        gko::int32, itype>(this->exec, this->comm,
+                           local_range[this->comm.rank()]);
+
+    GKO_ASSERT_ARRAY_EQ(
+        expects_ranges,
+        gko::make_const_array_view(this->exec, expects_ranges.get_num_elems(),
+                                   part->get_range_bounds()));
+    GKO_ASSERT_ARRAY_EQ(
+        expects_pid,
+        gko::make_const_array_view(this->exec, expects_pid.get_num_elems(),
+                                   part->get_part_ids()));
+}

From 4213d363b6fdd8632cd7827c794caf9fea72d958 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Tue, 20 Dec 2022 11:58:29 +0100
Subject: [PATCH 206/583] review updates

Co-authored-by: Tobias Ribizel <ribizel@kit.edu>
---
 core/distributed/partition_helpers.cpp | 6 +++---
 test/mpi/partition_helpers.cpp         | 3 ++-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/core/distributed/partition_helpers.cpp b/core/distributed/partition_helpers.cpp
index 5e1a8b7fa7d..d1bc28a6fdd 100644
--- a/core/distributed/partition_helpers.cpp
+++ b/core/distributed/partition_helpers.cpp
@@ -48,7 +48,6 @@ namespace {
 
 
 GKO_REGISTER_OPERATION(fill_seq_array, components::fill_seq_array);
-GKO_REGISTER_OPERATION(prefix_sum, components::prefix_sum);
 
 
 }  // namespace
@@ -74,8 +73,9 @@ std::unique_ptr<Partition<LocalIndexType, GlobalIndexType>>
 build_partition_from_local_range(std::shared_ptr<const Executor> exec,
                                  mpi::communicator comm, span local_range)
 {
-    GlobalIndexType range[2] = {static_cast<GlobalIndexType>(local_range.begin),
-                                static_cast<GlobalIndexType>(local_range.end)};
+    std::array<GlobalIndexType, 2> range{
+        static_cast<GlobalIndexType>(local_range.begin),
+        static_cast<GlobalIndexType>(local_range.end)};
 
     // make all range_start_ends available on each rank
     auto mpi_exec = exec->get_master();
diff --git a/test/mpi/partition_helpers.cpp b/test/mpi/partition_helpers.cpp
index 72e01da9931..dc9c63d28dd 100644
--- a/test/mpi/partition_helpers.cpp
+++ b/test/mpi/partition_helpers.cpp
@@ -47,7 +47,8 @@ class PartitionHelpers : public CommonMpiTestFixture {
     using index_type = IndexType;
 };
 
-TYPED_TEST_SUITE(PartitionHelpers, gko::test::IndexTypes);
+TYPED_TEST_SUITE(PartitionHelpers, gko::test::IndexTypes,
+                 TypenameNameGenerator);
 
 
 TYPED_TEST(PartitionHelpers, CanBuildFromLocalRanges)

From 662a8e53734980a4b2fb20b6fd64a8d7ede4c1b3 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Thu, 12 Jan 2023 09:07:47 +0100
Subject: [PATCH 207/583] bump copyright

---
 common/cuda_hip/distributed/partition_helpers_kernels.hpp.inc | 2 +-
 common/unified/distributed/partition_helpers_kernels.cpp      | 2 +-
 core/distributed/partition_helpers.cpp                        | 2 +-
 core/distributed/partition_helpers_kernels.hpp                | 2 +-
 cuda/distributed/partition_helpers_kernels.cu                 | 2 +-
 dpcpp/distributed/partition_helpers_kernels.dp.cpp            | 2 +-
 hip/distributed/partition_helpers_kernels.hip.cpp             | 2 +-
 include/ginkgo/core/distributed/partition_helpers.hpp         | 2 +-
 omp/distributed/partition_helpers_kernels.cpp                 | 2 +-
 reference/distributed/partition_helpers_kernels.cpp           | 2 +-
 reference/test/distributed/partition_helpers_kernels.cpp      | 2 +-
 test/distributed/partition_helper_kernels.cpp                 | 2 +-
 test/mpi/partition_helpers.cpp                                | 2 +-
 13 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/common/cuda_hip/distributed/partition_helpers_kernels.hpp.inc b/common/cuda_hip/distributed/partition_helpers_kernels.hpp.inc
index ca12b9a2bd1..17ac375c056 100644
--- a/common/cuda_hip/distributed/partition_helpers_kernels.hpp.inc
+++ b/common/cuda_hip/distributed/partition_helpers_kernels.hpp.inc
@@ -1,5 +1,5 @@
 /*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2022, the Ginkgo authors
+Copyright (c) 2017-2023, the Ginkgo authors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/common/unified/distributed/partition_helpers_kernels.cpp b/common/unified/distributed/partition_helpers_kernels.cpp
index e7e37bfd9db..a40bda31de4 100644
--- a/common/unified/distributed/partition_helpers_kernels.cpp
+++ b/common/unified/distributed/partition_helpers_kernels.cpp
@@ -1,5 +1,5 @@
 /*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2022, the Ginkgo authors
+Copyright (c) 2017-2023, the Ginkgo authors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/core/distributed/partition_helpers.cpp b/core/distributed/partition_helpers.cpp
index d1bc28a6fdd..fb9a1cea233 100644
--- a/core/distributed/partition_helpers.cpp
+++ b/core/distributed/partition_helpers.cpp
@@ -1,5 +1,5 @@
 /*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2022, the Ginkgo authors
+Copyright (c) 2017-2023, the Ginkgo authors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/core/distributed/partition_helpers_kernels.hpp b/core/distributed/partition_helpers_kernels.hpp
index b3bacf694f4..22a946bfb8f 100644
--- a/core/distributed/partition_helpers_kernels.hpp
+++ b/core/distributed/partition_helpers_kernels.hpp
@@ -1,5 +1,5 @@
 /*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2022, the Ginkgo authors
+Copyright (c) 2017-2023, the Ginkgo authors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/cuda/distributed/partition_helpers_kernels.cu b/cuda/distributed/partition_helpers_kernels.cu
index 2687517ad7f..e37655e357e 100644
--- a/cuda/distributed/partition_helpers_kernels.cu
+++ b/cuda/distributed/partition_helpers_kernels.cu
@@ -1,5 +1,5 @@
 /*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2022, the Ginkgo authors
+Copyright (c) 2017-2023, the Ginkgo authors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/dpcpp/distributed/partition_helpers_kernels.dp.cpp b/dpcpp/distributed/partition_helpers_kernels.dp.cpp
index 8aae72cd636..797b7b5e081 100644
--- a/dpcpp/distributed/partition_helpers_kernels.dp.cpp
+++ b/dpcpp/distributed/partition_helpers_kernels.dp.cpp
@@ -1,5 +1,5 @@
 /*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2022, the Ginkgo authors
+Copyright (c) 2017-2023, the Ginkgo authors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/hip/distributed/partition_helpers_kernels.hip.cpp b/hip/distributed/partition_helpers_kernels.hip.cpp
index d6239650979..d9ae663f93f 100644
--- a/hip/distributed/partition_helpers_kernels.hip.cpp
+++ b/hip/distributed/partition_helpers_kernels.hip.cpp
@@ -1,5 +1,5 @@
 /*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2022, the Ginkgo authors
+Copyright (c) 2017-2023, the Ginkgo authors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/include/ginkgo/core/distributed/partition_helpers.hpp b/include/ginkgo/core/distributed/partition_helpers.hpp
index 8364759567e..d9b2fee3d14 100644
--- a/include/ginkgo/core/distributed/partition_helpers.hpp
+++ b/include/ginkgo/core/distributed/partition_helpers.hpp
@@ -1,5 +1,5 @@
 /*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2022, the Ginkgo authors
+Copyright (c) 2017-2023, the Ginkgo authors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/omp/distributed/partition_helpers_kernels.cpp b/omp/distributed/partition_helpers_kernels.cpp
index 9e42e8cc888..03a46d93f3b 100644
--- a/omp/distributed/partition_helpers_kernels.cpp
+++ b/omp/distributed/partition_helpers_kernels.cpp
@@ -1,5 +1,5 @@
 /*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2022, the Ginkgo authors
+Copyright (c) 2017-2023, the Ginkgo authors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/reference/distributed/partition_helpers_kernels.cpp b/reference/distributed/partition_helpers_kernels.cpp
index 1319c5a3951..bff3e26a997 100644
--- a/reference/distributed/partition_helpers_kernels.cpp
+++ b/reference/distributed/partition_helpers_kernels.cpp
@@ -1,5 +1,5 @@
 /*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2022, the Ginkgo authors
+Copyright (c) 2017-2023, the Ginkgo authors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/reference/test/distributed/partition_helpers_kernels.cpp b/reference/test/distributed/partition_helpers_kernels.cpp
index 1d34a4fd530..abaab32903b 100644
--- a/reference/test/distributed/partition_helpers_kernels.cpp
+++ b/reference/test/distributed/partition_helpers_kernels.cpp
@@ -1,5 +1,5 @@
 /*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2022, the Ginkgo authors
+Copyright (c) 2017-2023, the Ginkgo authors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/test/distributed/partition_helper_kernels.cpp b/test/distributed/partition_helper_kernels.cpp
index 64fd1e49b77..441da3b8bd4 100644
--- a/test/distributed/partition_helper_kernels.cpp
+++ b/test/distributed/partition_helper_kernels.cpp
@@ -1,5 +1,5 @@
 /*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2022, the Ginkgo authors
+Copyright (c) 2017-2023, the Ginkgo authors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/test/mpi/partition_helpers.cpp b/test/mpi/partition_helpers.cpp
index dc9c63d28dd..6f30761cbb0 100644
--- a/test/mpi/partition_helpers.cpp
+++ b/test/mpi/partition_helpers.cpp
@@ -1,5 +1,5 @@
 /*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2022, the Ginkgo authors
+Copyright (c) 2017-2023, the Ginkgo authors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without

From 309459ea42acabfb24a231266c1f5ca9c6bfb257 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Thu, 12 Jan 2023 09:19:19 +0100
Subject: [PATCH 208/583] review updates:

- documentation
- make partition checks const
- test fixes

Co-authored-by: Gregor Olenik <gregor.olenik@web.de>
---
 core/distributed/partition.cpp                       |  4 ++--
 include/ginkgo/core/distributed/partition.hpp        |  4 ++--
 .../ginkgo/core/distributed/partition_helpers.hpp    | 12 +++++++-----
 test/distributed/partition_helper_kernels.cpp        |  4 ++--
 4 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/core/distributed/partition.cpp b/core/distributed/partition.cpp
index 575ca83aba6..22f0fdb3d94 100644
--- a/core/distributed/partition.cpp
+++ b/core/distributed/partition.cpp
@@ -123,7 +123,7 @@ void Partition<LocalIndexType, GlobalIndexType>::finalize_construction()
 
 
 template <typename LocalIndexType, typename GlobalIndexType>
-bool Partition<LocalIndexType, GlobalIndexType>::has_connected_parts()
+bool Partition<LocalIndexType, GlobalIndexType>::has_connected_parts() const
 {
     return this->get_num_parts() - this->get_num_empty_parts() ==
            this->get_num_ranges();
@@ -131,7 +131,7 @@ bool Partition<LocalIndexType, GlobalIndexType>::has_connected_parts()
 
 
 template <typename LocalIndexType, typename GlobalIndexType>
-bool Partition<LocalIndexType, GlobalIndexType>::has_ordered_parts()
+bool Partition<LocalIndexType, GlobalIndexType>::has_ordered_parts() const
 {
     if (this->has_connected_parts()) {
         auto exec = this->get_executor();
diff --git a/include/ginkgo/core/distributed/partition.hpp b/include/ginkgo/core/distributed/partition.hpp
index fa8b2739400..a40f30f7137 100644
--- a/include/ginkgo/core/distributed/partition.hpp
+++ b/include/ginkgo/core/distributed/partition.hpp
@@ -231,7 +231,7 @@ class Partition
      *
      * @return  true if each part has no more than one contiguous range.
      */
-    bool has_connected_parts();
+    bool has_connected_parts() const;
 
     /**
      * Checks if the ranges are ordered by their part index.
@@ -240,7 +240,7 @@ class Partition
      *
      * @return  true if the ranges are ordered by their part index.
      */
-    bool has_ordered_parts();
+    bool has_ordered_parts() const;
 
     /**
      * Builds a partition from a given mapping global_index -> part_id.
diff --git a/include/ginkgo/core/distributed/partition_helpers.hpp b/include/ginkgo/core/distributed/partition_helpers.hpp
index d9b2fee3d14..889347674c8 100644
--- a/include/ginkgo/core/distributed/partition_helpers.hpp
+++ b/include/ginkgo/core/distributed/partition_helpers.hpp
@@ -56,12 +56,14 @@ class Partition;
  * Builds a partition from a local range.
  *
  * @param exec  the Executor on which the partition should be built.
- * @param local_range the start and end indices of the local range.
  * @param comm  the communicator used to determine the global partition.
+ * @param local_range the start and end indices of the local range.
  *
- * @warning The local ranges have to be continuous and ascending. This means
- *          that for a process `i` with `range[i] = [s_i, e_i)` then for process
- *          `j = i+1` `range[j] = [s_j = e_i, e_j)`.
+ * @warning  This throws, if the resulting partition would contain gaps.
+ *           That means that for a partition of size `n` every local range `r_i
+ *           = [s_i, e_i)` either `s_i != 0` and another local range `r_j =
+ *           [s_j, e_j = s_i)` exists, or `e_i != n` and another local range
+ *           `r_j = [s_j = e_i, e_j)` exists.
  *
  * @return a Partition where each range has the individual local_start
  *         and local_ends.
@@ -76,8 +78,8 @@ build_partition_from_local_range(std::shared_ptr<const Executor> exec,
  * Builds a partition from a local size.
  *
  * @param exec  the Executor on which the partition should be built.
- * @param local_range the number of the locally owned indices
  * @param comm  the communicator used to determine the global partition.
+ * @param local_range the number of the locally owned indices
  *
  * @return a Partition where each range has the specified local size. More
  *         specifically, if this is called on process i with local_size `s_i`,
diff --git a/test/distributed/partition_helper_kernels.cpp b/test/distributed/partition_helper_kernels.cpp
index 441da3b8bd4..3cc472cd3b6 100644
--- a/test/distributed/partition_helper_kernels.cpp
+++ b/test/distributed/partition_helper_kernels.cpp
@@ -64,7 +64,7 @@ template <typename IndexType>
 std::vector<IndexType> create_iota(IndexType min, IndexType max)
 {
     std::vector<IndexType> iota(
-        clamp(max - min, static_cast<IndexType>(0), max));
+        clamp(max - min, IndexType(0), max));
     std::iota(iota.begin(), iota.end(), min);
     return iota;
 }
@@ -98,7 +98,7 @@ std::vector<std::size_t> sample_unique(std::size_t min, std::size_t max,
     std::default_random_engine engine;
     auto values = create_iota(min, max);
     std::shuffle(values.begin(), values.end(), engine);
-    values.erase(values.begin() + clamp(n, 0ul, values.size()), values.end());
+    values.erase(values.begin() + clamp(n, gko::size_type(0), values.size()), values.end());
     return values;
 }
 

From 71fde250ef4415b310a4cb999563a936bde3556b Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Fri, 27 Jan 2023 09:53:40 +0100
Subject: [PATCH 209/583] safeguard against negative reduction size

---
 common/unified/distributed/partition_helpers_kernels.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/common/unified/distributed/partition_helpers_kernels.cpp b/common/unified/distributed/partition_helpers_kernels.cpp
index a40bda31de4..5ecff0516c4 100644
--- a/common/unified/distributed/partition_helpers_kernels.cpp
+++ b/common/unified/distributed/partition_helpers_kernels.cpp
@@ -50,7 +50,8 @@ void check_consecutive_ranges(std::shared_ptr<const DefaultExecutor> exec,
                               bool* result)
 {
     array<uint32> result_uint32{exec, 1};
-    auto num_ranges = range_start_ends.get_num_elems() / 2;
+    auto num_ranges = std::max(range_start_ends.get_num_elems() / 2,
+                               static_cast<size_type>(1));
     run_kernel_reduction(
         exec,
         [] GKO_KERNEL(const auto i, const auto* starts, const auto* ends) {

From 6fa9d0460f23bbe4cc756a5ad579861e257e7f02 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Fri, 27 Jan 2023 09:55:21 +0100
Subject: [PATCH 210/583] remove unused function

---
 include/ginkgo/core/base/mpi.hpp | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp
index 40b38b55781..9699dea4942 100644
--- a/include/ginkgo/core/base/mpi.hpp
+++ b/include/ginkgo/core/base/mpi.hpp
@@ -87,10 +87,13 @@ inline constexpr bool is_gpu_aware()
 int map_rank_to_device_id(MPI_Comm comm, int num_devices);
 
 
-#define GKO_REGISTER_MPI_TYPE(input_type, mpi_type)         \
-    template <>                                             \
-    struct type_impl<input_type> {                          \
-        static MPI_Datatype get_type() { return mpi_type; } \
+#define GKO_REGISTER_MPI_TYPE(input_type, mpi_type) \
+    template <>                                     \
+    struct type_impl<input_type> {                  \
+        static MPI_Datatype get_type()              \
+        {                                           \
+            return mpi_type;                        \
+        }                                           \
     }
 
 /**
@@ -1013,18 +1016,6 @@ class communicator {
             this->get()));
     }
 
-
-    void all_gather(std::shared_ptr<const Executor> exec,
-                    const void* send_buffer, const int send_count,
-                    MPI_Datatype send_type, void* recv_buffer,
-                    const int recv_count, MPI_Datatype recv_type) const
-    {
-        auto guard = exec->get_scoped_device_id_guard();
-        GKO_ASSERT_NO_MPI_ERRORS(
-            MPI_Allgather(send_buffer, send_count, send_type, recv_buffer,
-                          recv_count, recv_type, this->get()));
-    }
-
     /**
      * (Non-blocking) Gather data onto all ranks from all ranks in the
      * communicator.

From 2c0472f03e348ac34705acdeb4044ce74457abfe Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Fri, 27 Jan 2023 09:55:52 +0100
Subject: [PATCH 211/583] fixes partition documentation

Co-authored-by: Pratik Nayak <pratik.nayak@kit.edu>
---
 include/ginkgo/core/distributed/partition.hpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/ginkgo/core/distributed/partition.hpp b/include/ginkgo/core/distributed/partition.hpp
index a40f30f7137..c336470b923 100644
--- a/include/ginkgo/core/distributed/partition.hpp
+++ b/include/ginkgo/core/distributed/partition.hpp
@@ -83,11 +83,11 @@ namespace distributed {
  * ```
  * starting_index[0] = 0,
  * starting_index[1] = 0,
- * starting_index[2] = 3,  // second range of part 1
+ * starting_index[2] = 3,  // second range of part 0
  * starting_index[3] = 0,
- * starting_index[4] = 5,  // third range of part 1
+ * starting_index[4] = 5,  // third range of part 0
  * ```
- * which you can use to iterate only over the the second range of part 1 (the
+ * which you can use to iterate only over the the second range of part 0 (the
  * third global range) with
  * ```
  * for(int i = 0; i < r[3] - r[2]; ++i){

From 3cfe397d34bacb577a7ced0c7aa6215aae9c09e9 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Thu, 9 Feb 2023 17:16:22 +0100
Subject: [PATCH 212/583] changes layout of gather ranges

---
 core/distributed/partition_helpers.cpp | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/core/distributed/partition_helpers.cpp b/core/distributed/partition_helpers.cpp
index fb9a1cea233..deda7f4299d 100644
--- a/core/distributed/partition_helpers.cpp
+++ b/core/distributed/partition_helpers.cpp
@@ -78,16 +78,9 @@ build_partition_from_local_range(std::shared_ptr<const Executor> exec,
         static_cast<GlobalIndexType>(local_range.end)};
 
     // make all range_start_ends available on each rank
-    auto mpi_exec = exec->get_master();
-    array<GlobalIndexType> ranges_start_end(mpi_exec, comm.size() * 2);
+    array<GlobalIndexType> ranges_start_end(exec, comm.size() * 2);
     ranges_start_end.fill(invalid_index<GlobalIndexType>());
-    std::vector<mpi::request> reqs;
-    reqs.push_back(comm.i_all_gather(mpi_exec, &range[0], 1,
-                                     ranges_start_end.get_data(), 1));
-    reqs.push_back(comm.i_all_gather(
-        mpi_exec, &range[1], 1, ranges_start_end.get_data() + comm.size(), 1));
-    mpi::wait_all(reqs);
-    ranges_start_end.set_executor(exec);
+    comm.all_gather(exec, range.data(), 2, ranges_start_end.get_data(), 2);
 
     // make_sort_by_range_start
     array<comm_index_type> part_ids(exec, comm.size());

From f4c88f645d4706ee5af3e0b853ac263e1df8a228 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Thu, 9 Feb 2023 17:16:36 +0100
Subject: [PATCH 213/583] adapts sorting to changed ranges layout

---
 .../partition_helpers_kernels.hpp.inc         |  16 +-
 .../partition_helpers_kernels.dp.cpp          |   8 +-
 omp/distributed/partition_helpers_kernels.cpp |  12 +-
 .../distributed/partition_helpers_kernels.cpp |  12 +-
 .../distributed/partition_helpers_kernels.cpp |   4 +-
 test/distributed/partition_helper_kernels.cpp | 170 +++++++++---------
 6 files changed, 115 insertions(+), 107 deletions(-)

diff --git a/common/cuda_hip/distributed/partition_helpers_kernels.hpp.inc b/common/cuda_hip/distributed/partition_helpers_kernels.hpp.inc
index 17ac375c056..54d9d142df2 100644
--- a/common/cuda_hip/distributed/partition_helpers_kernels.hpp.inc
+++ b/common/cuda_hip/distributed/partition_helpers_kernels.hpp.inc
@@ -37,11 +37,17 @@ void sort_by_range_start(
     array<experimental::distributed::comm_index_type>& part_ids)
 {
     auto num_ranges = range_start_ends.get_num_elems() / 2;
-    auto starts = thrust::device_pointer_cast(range_start_ends.get_data());
-    auto ends = starts + num_ranges;
-    auto zip_it = thrust::make_zip_iterator(thrust::make_tuple(
-        ends, thrust::device_pointer_cast(part_ids.get_data())));
-    thrust::sort_by_key(thrust::device, starts, starts + num_ranges, zip_it);
+    auto strided_indices = thrust::make_transform_iterator(
+        thrust::make_counting_iterator(0),
+        [] __host__ __device__(const int i) { return 2 * i; });
+    auto start_it = thrust::make_permutation_iterator(
+        range_start_ends.get_data(), strided_indices);
+    auto end_it = thrust::make_permutation_iterator(
+        range_start_ends.get_data() + 1, strided_indices);
+    auto zip_it = thrust::make_zip_iterator(
+        thrust::make_tuple(end_it, part_ids.get_data()));
+    thrust::sort_by_key(thrust::device, start_it, start_it + num_ranges,
+                        zip_it);
 }
 
 GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
diff --git a/dpcpp/distributed/partition_helpers_kernels.dp.cpp b/dpcpp/distributed/partition_helpers_kernels.dp.cpp
index 797b7b5e081..3c4d437a750 100644
--- a/dpcpp/distributed/partition_helpers_kernels.dp.cpp
+++ b/dpcpp/distributed/partition_helpers_kernels.dp.cpp
@@ -55,10 +55,12 @@ void sort_by_range_start(
     auto policy =
         oneapi::dpl::execution::make_device_policy(*exec->get_queue());
     auto num_ranges = range_start_ends.get_num_elems() / 2;
-    auto starts = range_start_ends.get_data();
-    auto ends = starts + num_ranges;
+    auto start_it = oneapi::dpl::make_permutation_iterator(
+        range_start_ends.get_data(), [](auto i) { return 2 * i; });
+    auto end_it = oneapi::dpl::make_permutation_iterator(
+        range_start_ends.get_data(), [](auto i) { return 2 * i + 1; });
     auto zip_it =
-        oneapi::dpl::make_zip_iterator(starts, ends, part_ids.get_data());
+        oneapi::dpl::make_zip_iterator(start_it, end_it, part_ids.get_data());
     std::sort(policy, zip_it, zip_it + num_ranges,
               [](auto a, auto b) { return std::get<0>(a) < std::get<0>(b); });
 }
diff --git a/omp/distributed/partition_helpers_kernels.cpp b/omp/distributed/partition_helpers_kernels.cpp
index 03a46d93f3b..093e8f1ff51 100644
--- a/omp/distributed/partition_helpers_kernels.cpp
+++ b/omp/distributed/partition_helpers_kernels.cpp
@@ -49,15 +49,17 @@ void sort_by_range_start(
     array<GlobalIndexType>& range_start_ends,
     array<experimental::distributed::comm_index_type>& part_ids)
 {
+    struct range {
+        GlobalIndexType idxs[2];
+    };
+
     auto part_ids_d = part_ids.get_data();
     auto num_parts = part_ids.get_num_elems();
-    auto range_starts = range_start_ends.get_data();
-    auto range_ends = range_starts + num_parts;
-    auto sort_it =
-        detail::make_zip_iterator(range_starts, range_ends, part_ids_d);
+    auto range_it = reinterpret_cast<range*>(range_start_ends.get_data());
+    auto sort_it = detail::make_zip_iterator(range_it, part_ids_d);
     // TODO: use TBB or parallel std with c++17
     std::sort(sort_it, sort_it + num_parts, [](const auto& a, const auto& b) {
-        return std::get<0>(a) < std::get<0>(b);
+        return std::get<0>(a).idxs[0] < std::get<0>(b).idxs[0];
     });
 }
 
diff --git a/reference/distributed/partition_helpers_kernels.cpp b/reference/distributed/partition_helpers_kernels.cpp
index bff3e26a997..35eca44e49f 100644
--- a/reference/distributed/partition_helpers_kernels.cpp
+++ b/reference/distributed/partition_helpers_kernels.cpp
@@ -49,14 +49,16 @@ void sort_by_range_start(
     array<GlobalIndexType>& range_start_ends,
     array<experimental::distributed::comm_index_type>& part_ids)
 {
+    struct range {
+        GlobalIndexType idxs[2];
+    };
+
     auto part_ids_d = part_ids.get_data();
     auto num_parts = part_ids.get_num_elems();
-    auto range_starts = range_start_ends.get_data();
-    auto range_ends = range_starts + num_parts;
-    auto sort_it =
-        detail::make_zip_iterator(range_starts, range_ends, part_ids_d);
+    auto range_it = reinterpret_cast<range*>(range_start_ends.get_data());
+    auto sort_it = detail::make_zip_iterator(range_it, part_ids_d);
     std::sort(sort_it, sort_it + num_parts, [](const auto& a, const auto& b) {
-        return std::get<0>(a) < std::get<0>(b);
+        return std::get<0>(a).idxs[0] < std::get<0>(b).idxs[0];
     });
 }
 
diff --git a/reference/test/distributed/partition_helpers_kernels.cpp b/reference/test/distributed/partition_helpers_kernels.cpp
index abaab32903b..a5fe119d14a 100644
--- a/reference/test/distributed/partition_helpers_kernels.cpp
+++ b/reference/test/distributed/partition_helpers_kernels.cpp
@@ -64,7 +64,7 @@ class PartitionHelpers : public ::testing::Test {
 
     std::shared_ptr<const gko::ReferenceExecutor> ref;
     gko::array<global_index_type> default_range_start_ends{
-        this->ref, {0, 4, 7, 9, 4, 7, 9, 11}};
+        this->ref, {0, 4, 4, 7, 7, 9, 9, 11}};
     gko::array<comm_index_type> default_part_ids{this->ref, {0, 1, 2, 3}};
 };
 
@@ -90,7 +90,7 @@ TYPED_TEST(PartitionHelpers, CanSortByRangeStart)
 {
     using global_index_type = typename TestFixture::global_index_type;
     gko::array<global_index_type> range_start_ends{this->ref,
-                                                   {7, 4, 0, 9, 9, 7, 4, 11}};
+                                                   {7, 9, 4, 7, 0, 4, 9, 11}};
     gko::array<comm_index_type> result_part_ids{this->ref, {2, 1, 0, 3}};
     auto part_ids = this->default_part_ids;
 
diff --git a/test/distributed/partition_helper_kernels.cpp b/test/distributed/partition_helper_kernels.cpp
index 3cc472cd3b6..c50975c4615 100644
--- a/test/distributed/partition_helper_kernels.cpp
+++ b/test/distributed/partition_helper_kernels.cpp
@@ -63,16 +63,19 @@ T clamp(const T& v, const T& lo, const T& hi)
 template <typename IndexType>
 std::vector<IndexType> create_iota(IndexType min, IndexType max)
 {
-    std::vector<IndexType> iota(
-        clamp(max - min, IndexType(0), max));
+    std::vector<IndexType> iota(clamp(max - min, IndexType(0), max));
     std::iota(iota.begin(), iota.end(), min);
     return iota;
 }
 
 
 template <typename IndexType>
-range_container<IndexType> create_ranges(gko::size_type num_ranges)
+std::vector<IndexType> create_ranges(gko::size_type num_ranges)
 {
+    struct repeated_value {
+        repeated_value(IndexType i) : vals{i, i} {}
+        IndexType vals[2];
+    };
     std::default_random_engine engine;
     std::uniform_int_distribution<IndexType> dist(5, 10);
     std::vector<IndexType> range_sizes(num_ranges);
@@ -83,12 +86,11 @@ range_container<IndexType> create_ranges(gko::size_type num_ranges)
     std::partial_sum(range_sizes.begin(), range_sizes.end(),
                      range_offsets.begin() + 1);
 
-    std::vector<IndexType> range_starts(num_ranges);
-    std::vector<IndexType> range_ends(num_ranges);
-    std::copy_n(range_offsets.begin(), num_ranges, range_starts.begin());
-    std::copy_n(range_offsets.begin() + 1, num_ranges, range_ends.begin());
-
-    return {std::move(range_starts), std::move(range_ends)};
+    std::vector<IndexType> ranges(num_ranges * 2, 0);
+    auto ranges_it = reinterpret_cast<repeated_value*>(ranges.data() + 1);
+    std::copy(range_offsets.begin() + 1, range_offsets.end() - 1, ranges_it);
+    ranges.back() = range_offsets.back();
+    return ranges;
 }
 
 
@@ -98,7 +100,8 @@ std::vector<std::size_t> sample_unique(std::size_t min, std::size_t max,
     std::default_random_engine engine;
     auto values = create_iota(min, max);
     std::shuffle(values.begin(), values.end(), engine);
-    values.erase(values.begin() + clamp(n, gko::size_type(0), values.size()), values.end());
+    values.erase(values.begin() + clamp(n, gko::size_type(0), values.size()),
+                 values.end());
     return values;
 }
 
@@ -117,36 +120,29 @@ std::vector<IndexType> remove_indices(const std::vector<IndexType>& source,
 
 
 template <typename IndexType>
-gko::array<IndexType> concat_start_end(
-    std::shared_ptr<const gko::Executor> exec,
-    const range_container<IndexType>& start_ends)
+gko::array<IndexType> make_array(std::shared_ptr<const gko::Executor> exec,
+                                 const std::vector<IndexType>& v)
 {
-    gko::size_type num_ranges = start_ends.first.size();
-    gko::array<IndexType> concat(exec, num_ranges * 2);
-
-    exec->copy_from(exec->get_master().get(), num_ranges,
-                    start_ends.first.data(), concat.get_data());
-    exec->copy_from(exec->get_master().get(), num_ranges,
-                    start_ends.second.data(), concat.get_data() + num_ranges);
-
-    return concat;
+    return gko::array<IndexType>(exec, v.begin(), v.end());
 }
 
 
 template <typename IndexType>
-std::pair<range_container<IndexType>, std::vector<comm_index_type>>
-shuffle_range_and_pid(const range_container<IndexType>& ranges,
+std::pair<std::vector<IndexType>, std::vector<comm_index_type>>
+shuffle_range_and_pid(const std::vector<IndexType>& ranges,
                       const std::vector<comm_index_type>& pid)
 {
+    struct range {
+        IndexType vals[2];
+    };
+
     std::default_random_engine engine;
 
     auto result = std::make_pair(ranges, pid);
 
     auto num_ranges = result.second.size();
     auto zip_it = gko::detail::make_zip_iterator(
-        result.first.first.begin(),
-        result.first.second.begin(),
-        result.second.begin());
+        reinterpret_cast<range*>(result.first.data()), result.second.begin());
     std::shuffle(zip_it, zip_it + num_ranges, engine);
 
     return result;
@@ -161,69 +157,69 @@ class PartitionHelpers : public CommonTestFixture {
 TYPED_TEST_SUITE(PartitionHelpers, gko::test::IndexTypes);
 
 
-TYPED_TEST(PartitionHelpers, CanCheckConsecutiveRanges)
-{
-    using index_type = typename TestFixture::index_type;
-    auto start_ends =
-        concat_start_end(this->exec, create_ranges<index_type>(100));
-    bool result = false;
-
-    gko::kernels::EXEC_NAMESPACE::partition_helpers::check_consecutive_ranges(
-        this->exec, start_ends, &result);
-
-    ASSERT_TRUE(result);
-}
-
-
-TYPED_TEST(PartitionHelpers, CanCheckNonConsecutiveRanges)
-{
-    using index_type = typename TestFixture::index_type;
-    auto full_range_ends = create_ranges<index_type>(100);
-    auto removal_idxs = sample_unique(0, full_range_ends.first.size(), 4);
-    auto start_ends = concat_start_end(
-        this->exec,
-        std::make_pair(remove_indices(full_range_ends.first, removal_idxs),
-                       remove_indices(full_range_ends.second, removal_idxs)));
-    bool result = true;
-
-    gko::kernels::EXEC_NAMESPACE::partition_helpers::check_consecutive_ranges(
-        this->exec, start_ends, &result);
-
-    ASSERT_FALSE(result);
-}
-
-
-TYPED_TEST(PartitionHelpers, CanCheckConsecutiveRangesWithSingleRange)
-{
-    using index_type = typename TestFixture::index_type;
-    auto start_ends = concat_start_end(this->ref, create_ranges<index_type>(1));
-    bool result = false;
-
-    gko::kernels::EXEC_NAMESPACE::partition_helpers::check_consecutive_ranges(
-        this->exec, start_ends, &result);
-
-    ASSERT_TRUE(result);
-}
-
-
-TYPED_TEST(PartitionHelpers, CanCheckConsecutiveRangesWithSingleElement)
-{
-    using index_type = typename TestFixture::index_type;
-    auto start_ends = gko::array<index_type>(this->exec, {1});
-    bool result = false;
-
-    gko::kernels::EXEC_NAMESPACE::partition_helpers::check_consecutive_ranges(
-        this->exec, start_ends, &result);
-
-    ASSERT_TRUE(result);
-}
+// TYPED_TEST(PartitionHelpers, CanCheckConsecutiveRanges)
+//{
+//    using index_type = typename TestFixture::index_type;
+//    auto offsets =
+//        make_array(this->exec, create_ranges<index_type>(100));
+//    bool result = false;
+//
+//    gko::kernels::EXEC_NAMESPACE::partition_helpers::check_consecutive_ranges(
+//        this->exec, offsets, &result);
+//
+//    ASSERT_TRUE(result);
+//}
+//
+//
+// TYPED_TEST(PartitionHelpers, CanCheckNonConsecutiveRanges)
+//{
+//    using index_type = typename TestFixture::index_type;
+//    auto full_range_ends = create_ranges<index_type>(100);
+//    auto removal_idxs = sample_unique(0, full_range_ends.size(), 4);
+//    auto start_ends = make_array(
+//        this->exec,
+//        std::make_pair(remove_indices(full_range_ends.first, removal_idxs),
+//                       remove_indices(full_range_ends.second, removal_idxs)));
+//    bool result = true;
+//
+//    gko::kernels::EXEC_NAMESPACE::partition_helpers::check_consecutive_ranges(
+//        this->exec, start_ends, &result);
+//
+//    ASSERT_FALSE(result);
+//}
+//
+//
+// TYPED_TEST(PartitionHelpers, CanCheckConsecutiveRangesWithSingleRange)
+//{
+//    using index_type = typename TestFixture::index_type;
+//    auto start_ends =
+//        make_array(this->ref, create_ranges<index_type>(1));
+//    bool result = false;
+//
+//    gko::kernels::EXEC_NAMESPACE::partition_helpers::check_consecutive_ranges(
+//        this->exec, start_ends, &result);
+//
+//    ASSERT_TRUE(result);
+//}
+//
+//
+// TYPED_TEST(PartitionHelpers, CanCheckConsecutiveRangesWithSingleElement)
+//{
+//    using index_type = typename TestFixture::index_type;
+//    auto start_ends = gko::array<index_type>(this->exec, {1});
+//    bool result = false;
+//
+//    gko::kernels::EXEC_NAMESPACE::partition_helpers::check_consecutive_ranges(
+//        this->exec, start_ends, &result);
+//
+//    ASSERT_TRUE(result);
+//}
 
 
 TYPED_TEST(PartitionHelpers, CanSortConsecutiveRanges)
 {
     using index_type = typename TestFixture::index_type;
-    auto start_ends =
-        concat_start_end(this->exec, create_ranges<index_type>(100));
+    auto start_ends = make_array(this->exec, create_ranges<index_type>(100));
     auto part_ids = create_iota<comm_index_type>(0, 100);
     auto part_ids_arr = gko::array<comm_index_type>(
         this->exec, part_ids.begin(), part_ids.end());
@@ -244,10 +240,10 @@ TYPED_TEST(PartitionHelpers, CanSortNonConsecutiveRanges)
     auto ranges = create_ranges<index_type>(100);
     auto part_ids = create_iota(0, 100);
     auto shuffled = shuffle_range_and_pid(ranges, part_ids);
-    auto expected_start_ends = concat_start_end(this->exec, ranges);
+    auto expected_start_ends = make_array(this->exec, ranges);
     auto expected_part_ids = gko::array<comm_index_type>(
         this->exec, part_ids.begin(), part_ids.end());
-    auto start_ends = concat_start_end(this->exec, shuffled.first);
+    auto start_ends = make_array(this->exec, shuffled.first);
     auto part_ids_arr = gko::array<comm_index_type>(
         this->exec, shuffled.second.begin(), shuffled.second.end());
 

From bb5f7ae638e02dec05f682a1f118fcc907a381eb Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Fri, 10 Feb 2023 09:45:01 +0100
Subject: [PATCH 214/583] adapts consecutive check to changed ranges layout

---
 .../distributed/partition_helpers_kernels.cpp |  35 +++---
 .../distributed/partition_helpers_kernels.cpp |  18 +--
 .../distributed/partition_helpers_kernels.cpp |   2 +-
 test/distributed/partition_helper_kernels.cpp | 113 +++++++++---------
 4 files changed, 86 insertions(+), 82 deletions(-)

diff --git a/common/unified/distributed/partition_helpers_kernels.cpp b/common/unified/distributed/partition_helpers_kernels.cpp
index 5ecff0516c4..795be471175 100644
--- a/common/unified/distributed/partition_helpers_kernels.cpp
+++ b/common/unified/distributed/partition_helpers_kernels.cpp
@@ -50,21 +50,26 @@ void check_consecutive_ranges(std::shared_ptr<const DefaultExecutor> exec,
                               bool* result)
 {
     array<uint32> result_uint32{exec, 1};
-    auto num_ranges = std::max(range_start_ends.get_num_elems() / 2,
-                               static_cast<size_type>(1));
-    run_kernel_reduction(
-        exec,
-        [] GKO_KERNEL(const auto i, const auto* starts, const auto* ends) {
-            return starts[i + 1] == ends[i];
-        },
-        [] GKO_KERNEL(const auto a, const auto b) {
-            return static_cast<uint32>(a && b);
-        },
-        [] GKO_KERNEL(auto x) { return x; }, static_cast<uint32>(true),
-        result_uint32.get_data(), num_ranges - 1, range_start_ends.get_data(),
-        range_start_ends.get_data() + num_ranges);
-    *result =
-        static_cast<bool>(exec->copy_val_to_host(result_uint32.get_data()));
+    auto num_ranges = range_start_ends.get_num_elems() / 2;
+    // need additional guard because DPCPP doesn't return the initial value for
+    // empty inputs
+    if (num_ranges > 1) {
+        run_kernel_reduction(
+            exec,
+            [] GKO_KERNEL(const auto i, const auto* ranges) {
+                return ranges[2 * i] == ranges[2 * i + 1];
+            },
+            [] GKO_KERNEL(const auto a, const auto b) {
+                return static_cast<uint32>(a && b);
+            },
+            [] GKO_KERNEL(auto x) { return x; }, static_cast<uint32>(true),
+            result_uint32.get_data(), num_ranges - 1,
+            range_start_ends.get_data() + 1);
+        *result =
+            static_cast<bool>(exec->copy_val_to_host(result_uint32.get_data()));
+    } else {
+        *result = true;
+    }
 }
 
 GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
diff --git a/reference/distributed/partition_helpers_kernels.cpp b/reference/distributed/partition_helpers_kernels.cpp
index 35eca44e49f..741f676df05 100644
--- a/reference/distributed/partition_helpers_kernels.cpp
+++ b/reference/distributed/partition_helpers_kernels.cpp
@@ -71,17 +71,19 @@ void check_consecutive_ranges(std::shared_ptr<const DefaultExecutor> exec,
                               array<GlobalIndexType>& range_start_ends,
                               bool* result)
 {
+    struct end_start {
+        GlobalIndexType end;
+        GlobalIndexType start;
+    };
+
     auto num_parts = range_start_ends.get_num_elems() / 2;
-    auto range_starts = range_start_ends.get_data();
-    auto range_ends = range_starts + num_parts;
-    auto combined_it = detail::make_zip_iterator(range_starts + 1, range_ends);
+    auto range_it =
+        reinterpret_cast<end_start*>(range_start_ends.get_data() + 1);
 
     if (num_parts) {
-        *result = std::all_of(combined_it, combined_it + (num_parts - 1),
-                              [](const auto& start_end) {
-                                  return std::get<0>(start_end) ==
-                                         std::get<1>(start_end);
-                              });
+        *result =
+            std::all_of(range_it, range_it + num_parts - 1,
+                        [](const end_start& r) { return r.end == r.start; });
     } else {
         *result = true;
     }
diff --git a/reference/test/distributed/partition_helpers_kernels.cpp b/reference/test/distributed/partition_helpers_kernels.cpp
index a5fe119d14a..5617883f30a 100644
--- a/reference/test/distributed/partition_helpers_kernels.cpp
+++ b/reference/test/distributed/partition_helpers_kernels.cpp
@@ -119,7 +119,7 @@ TYPED_TEST(PartitionHelpers, CanCheckNonConsecutiveRanges)
 {
     using global_index_type = typename TestFixture::global_index_type;
     gko::array<global_index_type> range_start_ends{this->ref,
-                                                   {7, 4, 0, 9, 9, 7, 4, 11}};
+                                                   {7, 9, 4, 7, 0, 4, 9, 11}};
     bool result = true;
 
     gko::kernels::reference::partition_helpers::check_consecutive_ranges(
diff --git a/test/distributed/partition_helper_kernels.cpp b/test/distributed/partition_helper_kernels.cpp
index c50975c4615..53310e76b58 100644
--- a/test/distributed/partition_helper_kernels.cpp
+++ b/test/distributed/partition_helper_kernels.cpp
@@ -113,7 +113,7 @@ std::vector<IndexType> remove_indices(const std::vector<IndexType>& source,
     std::sort(idxs.begin(), idxs.end(), std::greater<>{});
     auto result = source;
     for (auto idx : idxs) {
-        result.erase(result.begin() + idx);
+        result.erase(result.begin() + 2 * idx, result.begin() + 2 * idx + 1);
     }
     return result;
 }
@@ -157,63 +157,60 @@ class PartitionHelpers : public CommonTestFixture {
 TYPED_TEST_SUITE(PartitionHelpers, gko::test::IndexTypes);
 
 
-// TYPED_TEST(PartitionHelpers, CanCheckConsecutiveRanges)
-//{
-//    using index_type = typename TestFixture::index_type;
-//    auto offsets =
-//        make_array(this->exec, create_ranges<index_type>(100));
-//    bool result = false;
-//
-//    gko::kernels::EXEC_NAMESPACE::partition_helpers::check_consecutive_ranges(
-//        this->exec, offsets, &result);
-//
-//    ASSERT_TRUE(result);
-//}
-//
-//
-// TYPED_TEST(PartitionHelpers, CanCheckNonConsecutiveRanges)
-//{
-//    using index_type = typename TestFixture::index_type;
-//    auto full_range_ends = create_ranges<index_type>(100);
-//    auto removal_idxs = sample_unique(0, full_range_ends.size(), 4);
-//    auto start_ends = make_array(
-//        this->exec,
-//        std::make_pair(remove_indices(full_range_ends.first, removal_idxs),
-//                       remove_indices(full_range_ends.second, removal_idxs)));
-//    bool result = true;
-//
-//    gko::kernels::EXEC_NAMESPACE::partition_helpers::check_consecutive_ranges(
-//        this->exec, start_ends, &result);
-//
-//    ASSERT_FALSE(result);
-//}
-//
-//
-// TYPED_TEST(PartitionHelpers, CanCheckConsecutiveRangesWithSingleRange)
-//{
-//    using index_type = typename TestFixture::index_type;
-//    auto start_ends =
-//        make_array(this->ref, create_ranges<index_type>(1));
-//    bool result = false;
-//
-//    gko::kernels::EXEC_NAMESPACE::partition_helpers::check_consecutive_ranges(
-//        this->exec, start_ends, &result);
-//
-//    ASSERT_TRUE(result);
-//}
-//
-//
-// TYPED_TEST(PartitionHelpers, CanCheckConsecutiveRangesWithSingleElement)
-//{
-//    using index_type = typename TestFixture::index_type;
-//    auto start_ends = gko::array<index_type>(this->exec, {1});
-//    bool result = false;
-//
-//    gko::kernels::EXEC_NAMESPACE::partition_helpers::check_consecutive_ranges(
-//        this->exec, start_ends, &result);
-//
-//    ASSERT_TRUE(result);
-//}
+TYPED_TEST(PartitionHelpers, CanCheckConsecutiveRanges)
+{
+    using index_type = typename TestFixture::index_type;
+    auto offsets = make_array(this->exec, create_ranges<index_type>(100));
+    bool result = false;
+
+    gko::kernels::EXEC_NAMESPACE::partition_helpers::check_consecutive_ranges(
+        this->exec, offsets, &result);
+
+    ASSERT_TRUE(result);
+}
+
+
+TYPED_TEST(PartitionHelpers, CanCheckNonConsecutiveRanges)
+{
+    using index_type = typename TestFixture::index_type;
+    auto full_range_ends = create_ranges<index_type>(100);
+    auto removal_idxs = sample_unique(0, full_range_ends.size() / 2, 4);
+    auto start_ends =
+        make_array(this->exec, remove_indices(full_range_ends, removal_idxs));
+    bool result = true;
+
+    gko::kernels::EXEC_NAMESPACE::partition_helpers::check_consecutive_ranges(
+        this->exec, start_ends, &result);
+
+    ASSERT_FALSE(result);
+}
+
+
+ TYPED_TEST(PartitionHelpers, CanCheckConsecutiveRangesWithSingleRange)
+{
+    using index_type = typename TestFixture::index_type;
+    auto start_ends =
+        make_array(this->ref, create_ranges<index_type>(1));
+    bool result = false;
+
+    gko::kernels::EXEC_NAMESPACE::partition_helpers::check_consecutive_ranges(
+        this->exec, start_ends, &result);
+
+    ASSERT_TRUE(result);
+}
+
+
+ TYPED_TEST(PartitionHelpers, CanCheckConsecutiveRangesWithSingleElement)
+{
+    using index_type = typename TestFixture::index_type;
+    auto start_ends = gko::array<index_type>(this->exec, {1});
+    bool result = false;
+
+    gko::kernels::EXEC_NAMESPACE::partition_helpers::check_consecutive_ranges(
+        this->exec, start_ends, &result);
+
+    ASSERT_TRUE(result);
+}
 
 
 TYPED_TEST(PartitionHelpers, CanSortConsecutiveRanges)

From 030c75ba18edc6bb178459b182b7fca0d88dbd08 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Fri, 10 Feb 2023 10:20:45 +0100
Subject: [PATCH 215/583] adds kernel to compress ranges

---
 .../distributed/partition_helpers_kernels.cpp | 22 +++++++++
 core/device_hooks/common_kernels.inc.cpp      |  1 +
 core/distributed/partition_helpers.cpp        |  6 +--
 .../distributed/partition_helpers_kernels.hpp | 18 +++++---
 .../distributed/partition_helpers_kernels.cpp | 16 +++++++
 .../distributed/partition_helpers_kernels.cpp | 15 +++++++
 test/distributed/partition_helper_kernels.cpp | 45 +++++++++++++++----
 7 files changed, 106 insertions(+), 17 deletions(-)

diff --git a/common/unified/distributed/partition_helpers_kernels.cpp b/common/unified/distributed/partition_helpers_kernels.cpp
index 795be471175..cfae171844f 100644
--- a/common/unified/distributed/partition_helpers_kernels.cpp
+++ b/common/unified/distributed/partition_helpers_kernels.cpp
@@ -75,6 +75,28 @@ void check_consecutive_ranges(std::shared_ptr<const DefaultExecutor> exec,
 GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
     GKO_DECLARE_PARTITION_HELPERS_CHECK_CONSECUTIVE_RANGES);
 
+
+template <typename GlobalIndexType>
+void compress_ranges(std::shared_ptr<const DefaultExecutor> exec,
+                     const array<GlobalIndexType>& range_start_ends,
+                     array<GlobalIndexType>& range_offsets)
+{
+    run_kernel(
+        exec,
+        [] GKO_KERNEL(const auto i, const auto* start_ends, auto* offsets) {
+            if (i == 0) {
+                offsets[0] = start_ends[0];
+            }
+            offsets[i + 1] = start_ends[2 * i + 1];
+        },
+        range_offsets.get_num_elems() - 1, range_start_ends.get_const_data(),
+        range_offsets.get_data());
+}
+
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
+    GKO_DECLARE_PARTITION_HELPERS_COMPRESS_RANGES);
+
+
 }  // namespace partition_helpers
 }  // namespace GKO_DEVICE_NAMESPACE
 }  // namespace kernels
diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp
index 51bcf9d9587..c8bbd2e0a31 100644
--- a/core/device_hooks/common_kernels.inc.cpp
+++ b/core/device_hooks/common_kernels.inc.cpp
@@ -261,6 +261,7 @@ namespace partition_helpers {
 
 GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_HELPERS_SORT_BY_RANGE_START);
 GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_HELPERS_CHECK_CONSECUTIVE_RANGES);
+GKO_STUB_INDEX_TYPE(GKO_DECLARE_PARTITION_HELPERS_COMPRESS_RANGES);
 
 
 }  // namespace partition_helpers
diff --git a/core/distributed/partition_helpers.cpp b/core/distributed/partition_helpers.cpp
index deda7f4299d..b57c3e5be53 100644
--- a/core/distributed/partition_helpers.cpp
+++ b/core/distributed/partition_helpers.cpp
@@ -62,6 +62,7 @@ GKO_REGISTER_OPERATION(sort_by_range_start,
                        partition_helpers::sort_by_range_start);
 GKO_REGISTER_OPERATION(check_consecutive_ranges,
                        partition_helpers::check_consecutive_ranges);
+GKO_REGISTER_OPERATION(compress_ranges, partition_helpers::compress_ranges);
 
 
 }  // namespace
@@ -99,9 +100,8 @@ build_partition_from_local_range(std::shared_ptr<const Executor> exec,
 
     // remove duplicates
     array<GlobalIndexType> ranges(exec, comm.size() + 1);
-    exec->copy(1, ranges_start_end.get_data(), ranges.get_data());
-    exec->copy(comm.size(), ranges_start_end.get_data() + comm.size(),
-               ranges.get_data() + 1);
+    exec->run(
+        partition_helpers::make_compress_ranges(ranges_start_end, ranges));
 
     return Partition<LocalIndexType, GlobalIndexType>::build_from_contiguous(
         exec, ranges, part_ids);
diff --git a/core/distributed/partition_helpers_kernels.hpp b/core/distributed/partition_helpers_kernels.hpp
index 22a946bfb8f..80e22699b43 100644
--- a/core/distributed/partition_helpers_kernels.hpp
+++ b/core/distributed/partition_helpers_kernels.hpp
@@ -57,11 +57,19 @@ namespace kernels {
                                   bool* result)
 
 
-#define GKO_DECLARE_ALL_AS_TEMPLATES                                    \
-    template <typename GlobalIndexType>                                 \
-    GKO_DECLARE_PARTITION_HELPERS_SORT_BY_RANGE_START(GlobalIndexType); \
-    template <typename GlobalIndexType>                                 \
-    GKO_DECLARE_PARTITION_HELPERS_CHECK_CONSECUTIVE_RANGES(GlobalIndexType)
+#define GKO_DECLARE_PARTITION_HELPERS_COMPRESS_RANGES(_type)          \
+    void compress_ranges(std::shared_ptr<const DefaultExecutor> exec, \
+                         const array<_type>& range_start_ends,        \
+                         array<_type>& range_offsets)
+
+
+#define GKO_DECLARE_ALL_AS_TEMPLATES                                         \
+    template <typename GlobalIndexType>                                      \
+    GKO_DECLARE_PARTITION_HELPERS_SORT_BY_RANGE_START(GlobalIndexType);      \
+    template <typename GlobalIndexType>                                      \
+    GKO_DECLARE_PARTITION_HELPERS_CHECK_CONSECUTIVE_RANGES(GlobalIndexType); \
+    template <typename GlobalIndexType>                                      \
+    GKO_DECLARE_PARTITION_HELPERS_COMPRESS_RANGES(GlobalIndexType)
 
 
 GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(partition_helpers,
diff --git a/reference/distributed/partition_helpers_kernels.cpp b/reference/distributed/partition_helpers_kernels.cpp
index 741f676df05..08e1c5a49c2 100644
--- a/reference/distributed/partition_helpers_kernels.cpp
+++ b/reference/distributed/partition_helpers_kernels.cpp
@@ -93,6 +93,22 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
     GKO_DECLARE_PARTITION_HELPERS_CHECK_CONSECUTIVE_RANGES);
 
 
+template <typename GlobalIndexType>
+void compress_ranges(std::shared_ptr<const DefaultExecutor> exec,
+                     const array<GlobalIndexType>& range_start_ends,
+                     array<GlobalIndexType>& range_offsets)
+{
+    range_offsets.get_data()[0] = range_start_ends.get_const_data()[0];
+    for (int i = 0; i < range_offsets.get_num_elems() - 1; ++i) {
+        range_offsets.get_data()[i + 1] =
+            range_start_ends.get_const_data()[2 * i + 1];
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
+    GKO_DECLARE_PARTITION_HELPERS_COMPRESS_RANGES);
+
+
 }  // namespace partition_helpers
 }  // namespace reference
 }  // namespace kernels
diff --git a/reference/test/distributed/partition_helpers_kernels.cpp b/reference/test/distributed/partition_helpers_kernels.cpp
index 5617883f30a..9b339fd926f 100644
--- a/reference/test/distributed/partition_helpers_kernels.cpp
+++ b/reference/test/distributed/partition_helpers_kernels.cpp
@@ -129,4 +129,19 @@ TYPED_TEST(PartitionHelpers, CanCheckNonConsecutiveRanges)
 }
 
 
+TYPED_TEST(PartitionHelpers, CanCompressRanges)
+{
+    using itype = typename TestFixture::global_index_type;
+    auto range_start_ends = this->default_range_start_ends;
+    gko::array<itype> range_offsets{this->ref,
+                                    range_start_ends.get_num_elems() / 2 + 1};
+    gko::array<itype> expected_range_offsets{this->ref, {0, 4, 7, 9, 11}};
+
+    gko::kernels::reference::partition_helpers::compress_ranges(
+        this->ref, range_start_ends, range_offsets);
+
+    GKO_ASSERT_ARRAY_EQ(range_offsets, expected_range_offsets);
+}
+
+
 }  // namespace
diff --git a/test/distributed/partition_helper_kernels.cpp b/test/distributed/partition_helper_kernels.cpp
index 53310e76b58..fdfeb553ae1 100644
--- a/test/distributed/partition_helper_kernels.cpp
+++ b/test/distributed/partition_helper_kernels.cpp
@@ -47,10 +47,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 using gko::experimental::distributed::comm_index_type;
 
-template <typename IndexType>
-using range_container =
-    std::pair<std::vector<IndexType>, std::vector<IndexType>>;
-
 
 // TODO: remove with c++17
 template <typename T>
@@ -70,12 +66,8 @@ std::vector<IndexType> create_iota(IndexType min, IndexType max)
 
 
 template <typename IndexType>
-std::vector<IndexType> create_ranges(gko::size_type num_ranges)
+std::vector<IndexType> create_range_offsets(gko::size_type num_ranges)
 {
-    struct repeated_value {
-        repeated_value(IndexType i) : vals{i, i} {}
-        IndexType vals[2];
-    };
     std::default_random_engine engine;
     std::uniform_int_distribution<IndexType> dist(5, 10);
     std::vector<IndexType> range_sizes(num_ranges);
@@ -85,7 +77,19 @@ std::vector<IndexType> create_ranges(gko::size_type num_ranges)
     std::vector<IndexType> range_offsets(num_ranges + 1, 0);
     std::partial_sum(range_sizes.begin(), range_sizes.end(),
                      range_offsets.begin() + 1);
+    return range_offsets;
+}
 
+
+template <typename IndexType>
+std::vector<IndexType> create_ranges(
+    const std::vector<IndexType>& range_offsets)
+{
+    struct repeated_value {
+        repeated_value(IndexType i) : vals{i, i} {}
+        IndexType vals[2];
+    };
+    gko::size_type num_ranges = range_offsets.size() - 1;
     std::vector<IndexType> ranges(num_ranges * 2, 0);
     auto ranges_it = reinterpret_cast<repeated_value*>(ranges.data() + 1);
     std::copy(range_offsets.begin() + 1, range_offsets.end() - 1, ranges_it);
@@ -94,6 +98,15 @@ std::vector<IndexType> create_ranges(gko::size_type num_ranges)
 }
 
 
+template <typename IndexType>
+std::vector<IndexType> create_ranges(gko::size_type num_ranges)
+{
+    auto range_offsets = create_range_offsets<IndexType>(num_ranges);
+
+    return create_ranges(range_offsets);
+}
+
+
 std::vector<std::size_t> sample_unique(std::size_t min, std::size_t max,
                                        gko::size_type n)
 {
@@ -250,3 +263,17 @@ TYPED_TEST(PartitionHelpers, CanSortNonConsecutiveRanges)
     GKO_ASSERT_ARRAY_EQ(expected_start_ends, start_ends);
     GKO_ASSERT_ARRAY_EQ(expected_part_ids, part_ids_arr);
 }
+
+
+TYPED_TEST(PartitionHelpers, CanCompressRanges)
+{
+    using index_type = typename TestFixture::index_type;
+    auto expected_offsets = create_range_offsets<index_type>(100);
+    auto ranges = make_array(this->exec, create_ranges(expected_offsets));
+    gko::array<index_type> offsets{this->exec, expected_offsets.size()};
+
+    gko::kernels::EXEC_NAMESPACE::partition_helpers::compress_ranges(
+        this->exec, ranges, offsets);
+
+    GKO_ASSERT_ARRAY_EQ(offsets, make_array(this->exec, expected_offsets));
+}

From 6f1e90e2a7f4836dea02fc74b7a1ef6d59f0dd92 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Fri, 10 Feb 2023 10:29:47 +0100
Subject: [PATCH 216/583] review updates:

- constness
- documentation

Co-authored-by: Tobias Ribizel <ribizel@kit.edu>
---
 common/unified/distributed/partition_helpers_kernels.cpp | 4 ++--
 core/distributed/partition_helpers.cpp                   | 2 +-
 core/distributed/partition_helpers_kernels.hpp           | 2 +-
 include/ginkgo/core/distributed/partition.hpp            | 2 +-
 reference/distributed/partition_helpers_kernels.cpp      | 6 +++---
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/common/unified/distributed/partition_helpers_kernels.cpp b/common/unified/distributed/partition_helpers_kernels.cpp
index cfae171844f..e5565819021 100644
--- a/common/unified/distributed/partition_helpers_kernels.cpp
+++ b/common/unified/distributed/partition_helpers_kernels.cpp
@@ -46,7 +46,7 @@ namespace partition_helpers {
 
 template <typename GlobalIndexType>
 void check_consecutive_ranges(std::shared_ptr<const DefaultExecutor> exec,
-                              array<GlobalIndexType>& range_start_ends,
+                              const array<GlobalIndexType>& range_start_ends,
                               bool* result)
 {
     array<uint32> result_uint32{exec, 1};
@@ -64,7 +64,7 @@ void check_consecutive_ranges(std::shared_ptr<const DefaultExecutor> exec,
             },
             [] GKO_KERNEL(auto x) { return x; }, static_cast<uint32>(true),
             result_uint32.get_data(), num_ranges - 1,
-            range_start_ends.get_data() + 1);
+            range_start_ends.get_const_data() + 1);
         *result =
             static_cast<bool>(exec->copy_val_to_host(result_uint32.get_data()));
     } else {
diff --git a/core/distributed/partition_helpers.cpp b/core/distributed/partition_helpers.cpp
index b57c3e5be53..acc4d535519 100644
--- a/core/distributed/partition_helpers.cpp
+++ b/core/distributed/partition_helpers.cpp
@@ -98,7 +98,7 @@ build_partition_from_local_range(std::shared_ptr<const Executor> exec,
         throw Error(__FILE__, __LINE__, "The partition contains gaps.");
     }
 
-    // remove duplicates
+    // join (now consecutive) starts and ends into combined array
     array<GlobalIndexType> ranges(exec, comm.size() + 1);
     exec->run(
         partition_helpers::make_compress_ranges(ranges_start_end, ranges));
diff --git a/core/distributed/partition_helpers_kernels.hpp b/core/distributed/partition_helpers_kernels.hpp
index 80e22699b43..6d55926db76 100644
--- a/core/distributed/partition_helpers_kernels.hpp
+++ b/core/distributed/partition_helpers_kernels.hpp
@@ -53,7 +53,7 @@ namespace kernels {
 
 #define GKO_DECLARE_PARTITION_HELPERS_CHECK_CONSECUTIVE_RANGES(_type)          \
     void check_consecutive_ranges(std::shared_ptr<const DefaultExecutor> exec, \
-                                  array<_type>& range_start_ends,              \
+                                  const array<_type>& range_start_ends,        \
                                   bool* result)
 
 
diff --git a/include/ginkgo/core/distributed/partition.hpp b/include/ginkgo/core/distributed/partition.hpp
index c336470b923..bb36528a4a8 100644
--- a/include/ginkgo/core/distributed/partition.hpp
+++ b/include/ginkgo/core/distributed/partition.hpp
@@ -260,7 +260,7 @@ class Partition
      *
      * @param exec  the Executor on which the partition should be built
      * @param ranges  the boundaries of the ranges representing each part.
-     *                Part parti_id[i] contains the indices
+     *                Part part_id[i] contains the indices
      *                [ranges[i], ranges[i + 1]). Has to contain at least
      *                one element. The first element has to be 0.
      * @param part_ids  the part ids of the provided ranges. If empty, then
diff --git a/reference/distributed/partition_helpers_kernels.cpp b/reference/distributed/partition_helpers_kernels.cpp
index 08e1c5a49c2..989ac1cddb0 100644
--- a/reference/distributed/partition_helpers_kernels.cpp
+++ b/reference/distributed/partition_helpers_kernels.cpp
@@ -68,7 +68,7 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
 
 template <typename GlobalIndexType>
 void check_consecutive_ranges(std::shared_ptr<const DefaultExecutor> exec,
-                              array<GlobalIndexType>& range_start_ends,
+                              const array<GlobalIndexType>& range_start_ends,
                               bool* result)
 {
     struct end_start {
@@ -77,8 +77,8 @@ void check_consecutive_ranges(std::shared_ptr<const DefaultExecutor> exec,
     };
 
     auto num_parts = range_start_ends.get_num_elems() / 2;
-    auto range_it =
-        reinterpret_cast<end_start*>(range_start_ends.get_data() + 1);
+    auto range_it = reinterpret_cast<const end_start*>(
+        range_start_ends.get_const_data() + 1);
 
     if (num_parts) {
         *result =

From 0cda7efa3783fdc60f44c7dfdf6487957fa889ab Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Tue, 21 Feb 2023 11:50:36 +0100
Subject: [PATCH 217/583] review update:

- use stable sort

Co-authored-by: Gregor Olenik <go@hpsim.de>
---
 .../cuda_hip/distributed/partition_helpers_kernels.hpp.inc | 4 ++--
 dpcpp/distributed/partition_helpers_kernels.dp.cpp         | 5 +++--
 omp/distributed/partition_helpers_kernels.cpp              | 7 ++++---
 reference/distributed/partition_helpers_kernels.cpp        | 7 ++++---
 4 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/common/cuda_hip/distributed/partition_helpers_kernels.hpp.inc b/common/cuda_hip/distributed/partition_helpers_kernels.hpp.inc
index 54d9d142df2..e3e8335dd22 100644
--- a/common/cuda_hip/distributed/partition_helpers_kernels.hpp.inc
+++ b/common/cuda_hip/distributed/partition_helpers_kernels.hpp.inc
@@ -46,8 +46,8 @@ void sort_by_range_start(
         range_start_ends.get_data() + 1, strided_indices);
     auto zip_it = thrust::make_zip_iterator(
         thrust::make_tuple(end_it, part_ids.get_data()));
-    thrust::sort_by_key(thrust::device, start_it, start_it + num_ranges,
-                        zip_it);
+    thrust::stable_sort_by_key(thrust::device, start_it, start_it + num_ranges,
+                               zip_it);
 }
 
 GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
diff --git a/dpcpp/distributed/partition_helpers_kernels.dp.cpp b/dpcpp/distributed/partition_helpers_kernels.dp.cpp
index 3c4d437a750..b9823e1df9f 100644
--- a/dpcpp/distributed/partition_helpers_kernels.dp.cpp
+++ b/dpcpp/distributed/partition_helpers_kernels.dp.cpp
@@ -61,8 +61,9 @@ void sort_by_range_start(
         range_start_ends.get_data(), [](auto i) { return 2 * i + 1; });
     auto zip_it =
         oneapi::dpl::make_zip_iterator(start_it, end_it, part_ids.get_data());
-    std::sort(policy, zip_it, zip_it + num_ranges,
-              [](auto a, auto b) { return std::get<0>(a) < std::get<0>(b); });
+    std::stable_sort(policy, zip_it, zip_it + num_ranges, [](auto a, auto b) {
+        return std::get<0>(a) < std::get<0>(b);
+    });
 }
 
 GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
diff --git a/omp/distributed/partition_helpers_kernels.cpp b/omp/distributed/partition_helpers_kernels.cpp
index 093e8f1ff51..5fc55862b08 100644
--- a/omp/distributed/partition_helpers_kernels.cpp
+++ b/omp/distributed/partition_helpers_kernels.cpp
@@ -58,9 +58,10 @@ void sort_by_range_start(
     auto range_it = reinterpret_cast<range*>(range_start_ends.get_data());
     auto sort_it = detail::make_zip_iterator(range_it, part_ids_d);
     // TODO: use TBB or parallel std with c++17
-    std::sort(sort_it, sort_it + num_parts, [](const auto& a, const auto& b) {
-        return std::get<0>(a).idxs[0] < std::get<0>(b).idxs[0];
-    });
+    std::stable_sort(sort_it, sort_it + num_parts,
+                     [](const auto& a, const auto& b) {
+                         return std::get<0>(a).idxs[0] < std::get<0>(b).idxs[0];
+                     });
 }
 
 GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
diff --git a/reference/distributed/partition_helpers_kernels.cpp b/reference/distributed/partition_helpers_kernels.cpp
index 989ac1cddb0..7f7dfce756c 100644
--- a/reference/distributed/partition_helpers_kernels.cpp
+++ b/reference/distributed/partition_helpers_kernels.cpp
@@ -57,9 +57,10 @@ void sort_by_range_start(
     auto num_parts = part_ids.get_num_elems();
     auto range_it = reinterpret_cast<range*>(range_start_ends.get_data());
     auto sort_it = detail::make_zip_iterator(range_it, part_ids_d);
-    std::sort(sort_it, sort_it + num_parts, [](const auto& a, const auto& b) {
-        return std::get<0>(a).idxs[0] < std::get<0>(b).idxs[0];
-    });
+    std::stable_sort(sort_it, sort_it + num_parts,
+                     [](const auto& a, const auto& b) {
+                         return std::get<0>(a).idxs[0] < std::get<0>(b).idxs[0];
+                     });
 }
 
 GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(

From 862e021321a9ae9a0a28c6f720c8b00e8e348f48 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Tue, 21 Feb 2023 12:05:35 +0100
Subject: [PATCH 218/583] fixing dpcpp

---
 .../partition_helpers_kernels.dp.cpp          | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/dpcpp/distributed/partition_helpers_kernels.dp.cpp b/dpcpp/distributed/partition_helpers_kernels.dp.cpp
index b9823e1df9f..6362c243d95 100644
--- a/dpcpp/distributed/partition_helpers_kernels.dp.cpp
+++ b/dpcpp/distributed/partition_helpers_kernels.dp.cpp
@@ -45,6 +45,20 @@ namespace kernels {
 namespace dpcpp {
 namespace partition_helpers {
 
+struct stride {
+    // Some version requires [] while some requires (), so I added both
+    template <typename Index>
+    Index operator[](const Index& i) const
+    {
+        return i * 2;
+    }
+
+    template <typename Index>
+    Index operator()(const Index& i) const
+    {
+        return operator[](i);
+    }
+};
 
 template <typename GlobalIndexType>
 void sort_by_range_start(
@@ -55,10 +69,11 @@ void sort_by_range_start(
     auto policy =
         oneapi::dpl::execution::make_device_policy(*exec->get_queue());
     auto num_ranges = range_start_ends.get_num_elems() / 2;
+
     auto start_it = oneapi::dpl::make_permutation_iterator(
-        range_start_ends.get_data(), [](auto i) { return 2 * i; });
+        range_start_ends.get_data(), stride{});
     auto end_it = oneapi::dpl::make_permutation_iterator(
-        range_start_ends.get_data(), [](auto i) { return 2 * i + 1; });
+        range_start_ends.get_data() + 1, stride{});
     auto zip_it =
         oneapi::dpl::make_zip_iterator(start_it, end_it, part_ids.get_data());
     std::stable_sort(policy, zip_it, zip_it + num_ranges, [](auto a, auto b) {

From cc702fcb3048b5d67e2adeae142901ee3762832c Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Wed, 22 Feb 2023 10:51:31 +0100
Subject: [PATCH 219/583] don't mix host and device buffers for MPI

---
 core/distributed/partition_helpers.cpp | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/core/distributed/partition_helpers.cpp b/core/distributed/partition_helpers.cpp
index acc4d535519..b906c0b6e42 100644
--- a/core/distributed/partition_helpers.cpp
+++ b/core/distributed/partition_helpers.cpp
@@ -79,9 +79,13 @@ build_partition_from_local_range(std::shared_ptr<const Executor> exec,
         static_cast<GlobalIndexType>(local_range.end)};
 
     // make all range_start_ends available on each rank
-    array<GlobalIndexType> ranges_start_end(exec, comm.size() * 2);
+    // note: not all combination of MPI + GPU library seem to support
+    // mixing host and device buffers, e.g. OpenMPI 4.0.5 and Rocm 4.0
+    auto mpi_exec = exec->get_master();
+    array<GlobalIndexType> ranges_start_end(mpi_exec, comm.size() * 2);
     ranges_start_end.fill(invalid_index<GlobalIndexType>());
-    comm.all_gather(exec, range.data(), 2, ranges_start_end.get_data(), 2);
+    comm.all_gather(mpi_exec, range.data(), 2, ranges_start_end.get_data(), 2);
+    ranges_start_end.set_executor(exec);
 
     // make_sort_by_range_start
     array<comm_index_type> part_ids(exec, comm.size());

From d8642b9a5823e0996bec17e4c28bfaa7f1a52003 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Wed, 19 Apr 2023 09:18:51 +0200
Subject: [PATCH 220/583] adds permutation iterator

Co-authored-by: Tobias Ribizel <ribizel@kit.edu>
---
 core/base/iterator_factory.hpp      | 124 +++++++++++++++++++
 core/test/base/iterator_factory.cpp | 181 +++++++++++++++++++++++++---
 2 files changed, 291 insertions(+), 14 deletions(-)

diff --git a/core/base/iterator_factory.hpp b/core/base/iterator_factory.hpp
index 7ebbc510f74..29aa99a4f86 100644
--- a/core/base/iterator_factory.hpp
+++ b/core/base/iterator_factory.hpp
@@ -366,6 +366,130 @@ void swap(zip_iterator_reference<Iterators...> a,
 }
 
 
+template <typename IteratorType, typename PermuteFn>
+class permute_iterator {
+public:
+    using difference_type = std::ptrdiff_t;
+    using value_type = typename std::iterator_traits<IteratorType>::value_type;
+    using pointer = typename std::iterator_traits<IteratorType>::pointer;
+    using reference = typename std::iterator_traits<IteratorType>::reference;
+    using iterator_category = std::random_access_iterator_tag;
+
+    explicit permute_iterator() = default;
+
+    explicit permute_iterator(IteratorType it, PermuteFn perm)
+        : it_{std::move(it)}, idx_{}, perm_{std::move(perm)}
+    {}
+
+    permute_iterator& operator=(permute_iterator other)
+    {
+        it_ = other.it_;
+        idx_ = other.idx_;
+        // no perm_ = other.perm_ because lambdas are not copy-assignable
+        return *this;
+    }
+
+    permute_iterator& operator+=(difference_type i)
+    {
+        idx_ += i;
+        return *this;
+    }
+
+    permute_iterator& operator-=(difference_type i) { return *this += -i; }
+
+    permute_iterator& operator++() { return *this += 1; }
+
+    permute_iterator operator++(int)
+    {
+        auto tmp = *this;
+        ++(*this);
+        return tmp;
+    }
+
+    permute_iterator& operator--() { return *this -= 1; }
+
+    permute_iterator operator--(int)
+    {
+        auto tmp = *this;
+        --(*this);
+        return tmp;
+    }
+
+    permute_iterator operator+(difference_type i) const
+    {
+        auto tmp = *this;
+        tmp += i;
+        return tmp;
+    }
+
+    friend permute_iterator operator+(difference_type i,
+                                      const permute_iterator& iter)
+    {
+        return iter + i;
+    }
+
+    permute_iterator operator-(difference_type i) const
+    {
+        auto tmp = *this;
+        tmp -= i;
+        return tmp;
+    }
+
+    difference_type operator-(const permute_iterator& other) const
+    {
+        return idx_ - other.idx_;
+    }
+
+    reference operator*() const { return it_[perm_(idx_)]; }
+
+    reference operator[](difference_type i) const { return *(*this + i); }
+
+    bool operator==(const permute_iterator& other) const
+    {
+        return idx_ == other.idx_;
+    }
+
+    bool operator!=(const permute_iterator& other) const
+    {
+        return !(*this == other);
+    }
+
+    bool operator<(const permute_iterator& other) const
+    {
+        return idx_ < other.idx_;
+    }
+
+    bool operator<=(const permute_iterator& other) const
+    {
+        return idx_ <= other.idx_;
+    }
+
+    bool operator>(const permute_iterator& other) const
+    {
+        return !(*this <= other);
+    }
+
+    bool operator>=(const permute_iterator& other) const
+    {
+        return !(*this < other);
+    }
+
+private:
+    IteratorType it_;
+    difference_type idx_;
+    PermuteFn perm_;
+};
+
+
+template <typename IteratorType, typename PermutationFn>
+permute_iterator<IteratorType, PermutationFn> make_permute_iterator(
+    IteratorType it, PermutationFn perm)
+{
+    return permute_iterator<IteratorType, PermutationFn>{std::move(it),
+                                                         std::move(perm)};
+}
+
+
 }  // namespace detail
 }  // namespace gko
 
diff --git a/core/test/base/iterator_factory.cpp b/core/test/base/iterator_factory.cpp
index 68ed87e07cb..f41181f0c10 100644
--- a/core/test/base/iterator_factory.cpp
+++ b/core/test/base/iterator_factory.cpp
@@ -67,13 +67,13 @@ namespace {
 
 
 template <typename ValueIndexType>
-class IteratorFactory : public ::testing::Test {
+class ZipIterator : public ::testing::Test {
 protected:
     using value_type =
         typename std::tuple_element<0, decltype(ValueIndexType())>::type;
     using index_type =
         typename std::tuple_element<1, decltype(ValueIndexType())>::type;
-    IteratorFactory()
+    ZipIterator()
         : reversed_index{100, 50, 10, 9, 8, 7, 5, 5, 4, 3, 2, 1, 0, -1, -2},
           ordered_index{-2, -1, 0, 1, 2, 3, 4, 5, 5, 7, 8, 9, 10, 50, 100},
           reversed_value{15., 14., 13., 12., 11., 10., 9., 7.,
@@ -109,11 +109,11 @@ class IteratorFactory : public ::testing::Test {
     const std::vector<value_type> ordered_value;
 };
 
-TYPED_TEST_SUITE(IteratorFactory, gko::test::ValueIndexTypes,
+TYPED_TEST_SUITE(ZipIterator, gko::test::ValueIndexTypes,
                  PairTypenameNameGenerator);
 
 
-TYPED_TEST(IteratorFactory, EmptyIterator)
+TYPED_TEST(ZipIterator, EmptyIterator)
 {
     using index_type = typename TestFixture::index_type;
     using value_type = typename TestFixture::value_type;
@@ -125,7 +125,7 @@ TYPED_TEST(IteratorFactory, EmptyIterator)
 }
 
 
-TYPED_TEST(IteratorFactory, SortingReversedWithIterator)
+TYPED_TEST(ZipIterator, SortingReversedWithIterator)
 {
     using index_type = typename TestFixture::index_type;
     using value_type = typename TestFixture::value_type;
@@ -140,7 +140,7 @@ TYPED_TEST(IteratorFactory, SortingReversedWithIterator)
 }
 
 
-TYPED_TEST(IteratorFactory, SortingAlreadySortedWithIterator)
+TYPED_TEST(ZipIterator, SortingAlreadySortedWithIterator)
 {
     using index_type = typename TestFixture::index_type;
     using value_type = typename TestFixture::value_type;
@@ -155,7 +155,7 @@ TYPED_TEST(IteratorFactory, SortingAlreadySortedWithIterator)
 }
 
 
-TYPED_TEST(IteratorFactory, IteratorReferenceOperatorSmaller)
+TYPED_TEST(ZipIterator, IteratorReferenceOperatorSmaller)
 {
     using index_type = typename TestFixture::index_type;
     using value_type = typename TestFixture::value_type;
@@ -170,7 +170,7 @@ TYPED_TEST(IteratorFactory, IteratorReferenceOperatorSmaller)
 }
 
 
-TYPED_TEST(IteratorFactory, IteratorReferenceOperatorSmaller2)
+TYPED_TEST(ZipIterator, IteratorReferenceOperatorSmaller2)
 {
     using index_type = typename TestFixture::index_type;
     using value_type = typename TestFixture::value_type;
@@ -185,7 +185,7 @@ TYPED_TEST(IteratorFactory, IteratorReferenceOperatorSmaller2)
 }
 
 
-TYPED_TEST(IteratorFactory, IncreasingIterator)
+TYPED_TEST(ZipIterator, IncreasingIterator)
 {
     using index_type = typename TestFixture::index_type;
     using value_type = typename TestFixture::value_type;
@@ -262,7 +262,7 @@ bool check_assertion_exit_code(int exit_code)
 }
 
 
-TYPED_TEST(IteratorFactory, IncompatibleIteratorDeathTest)
+TYPED_TEST(ZipIterator, IncompatibleIteratorDeathTest)
 {
     using index_type = typename TestFixture::index_type;
     using value_type = typename TestFixture::value_type;
@@ -286,7 +286,7 @@ TYPED_TEST(IteratorFactory, IncompatibleIteratorDeathTest)
 #endif
 
 
-TYPED_TEST(IteratorFactory, DecreasingIterator)
+TYPED_TEST(ZipIterator, DecreasingIterator)
 {
     using index_type = typename TestFixture::index_type;
     using value_type = typename TestFixture::value_type;
@@ -316,7 +316,7 @@ TYPED_TEST(IteratorFactory, DecreasingIterator)
 }
 
 
-TYPED_TEST(IteratorFactory, CorrectDereferencing)
+TYPED_TEST(ZipIterator, CorrectDereferencing)
 {
     using index_type_it = typename TestFixture::index_type;
     using value_type_it = typename TestFixture::value_type;
@@ -337,7 +337,7 @@ TYPED_TEST(IteratorFactory, CorrectDereferencing)
 }
 
 
-TYPED_TEST(IteratorFactory, CorrectSwapping)
+TYPED_TEST(ZipIterator, CorrectSwapping)
 {
     using index_type = typename TestFixture::index_type;
     using value_type = typename TestFixture::value_type;
@@ -361,7 +361,7 @@ TYPED_TEST(IteratorFactory, CorrectSwapping)
 }
 
 
-TYPED_TEST(IteratorFactory, CorrectHandWrittenSwapping)
+TYPED_TEST(ZipIterator, CorrectHandWrittenSwapping)
 {
     using index_type = typename TestFixture::index_type;
     using value_type = typename TestFixture::value_type;
@@ -388,4 +388,157 @@ TYPED_TEST(IteratorFactory, CorrectHandWrittenSwapping)
 }
 
 
+template <typename ValueType>
+class PermuteIterator : public ::testing::Test {
+protected:
+    using value_type = ValueType;
+};
+
+TYPED_TEST_SUITE(PermuteIterator, gko::test::ValueAndIndexTypes,
+                 TypenameNameGenerator);
+
+
+TYPED_TEST(PermuteIterator, EmptyIterator)
+{
+    auto test_iter = gko::detail::make_permute_iterator<TypeParam*>(
+        nullptr, [](int i) { return i; });
+
+    ASSERT_NO_THROW(std::sort(test_iter, test_iter));
+}
+
+
+TYPED_TEST(PermuteIterator, SortingWithIdentityPermutation)
+{
+    std::vector<TypeParam> vec{6, 2, 5, 2, 4};
+    std::vector<TypeParam> sorted{2, 2, 4, 5, 6};
+
+    auto test_iter = gko::detail::make_permute_iterator(
+        vec.begin(), [](int i) { return i; });
+
+    std::sort(test_iter, test_iter + vec.size());
+
+    ASSERT_EQ(vec, sorted);
+}
+
+
+TYPED_TEST(PermuteIterator, SortingWithReversePermutation)
+{
+    std::vector<TypeParam> vec{6, 2, 5, 2, 4};
+    std::vector<TypeParam> sorted{6, 5, 4, 2, 2};
+    auto test_iter = gko::detail::make_permute_iterator(
+        vec.begin(),
+        [size = vec.size()](int i) { return static_cast<int>(size) - 1 - i; });
+
+    std::sort(test_iter, test_iter + vec.size());
+
+    ASSERT_EQ(vec, sorted);
+}
+
+
+TYPED_TEST(PermuteIterator, SortingWithStridedPermutation)
+{
+    std::vector<TypeParam> vec{6, 8, 2, 9, 5, 1, 2, 7, 4, 0};
+    std::vector<TypeParam> sorted{2, 8, 2, 9, 4, 1, 5, 7, 6, 0};
+
+    auto test_iter = gko::detail::make_permute_iterator(
+        vec.begin(), [](int i) { return 2 * i; });
+
+    std::sort(test_iter, test_iter + vec.size() / 2);
+
+    ASSERT_EQ(vec, sorted);
+}
+
+
+TYPED_TEST(PermuteIterator, IncreasingIterator)
+{
+    std::vector<TypeParam> vec{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+    auto perm = [size = vec.size()](int i) {
+        return static_cast<int>(size) - 1 - i;
+    };
+
+    auto test_iter = gko::detail::make_permute_iterator(vec.begin(), perm);
+    auto begin = test_iter;
+    auto plus_2 = begin + 2;
+    auto plus_2_rev = 2 + begin;
+    auto plus_minus_2 = plus_2 - 2;
+    auto increment_pre_2 = begin;
+    ++increment_pre_2;
+    ++increment_pre_2;
+    auto increment_post_2 = begin;
+    increment_post_2++;
+    increment_post_2++;
+    auto increment_pre_test = begin;
+    auto increment_post_test = begin;
+
+    // check results for equality
+    ASSERT_TRUE(begin == plus_minus_2);
+    ASSERT_TRUE(plus_2 == increment_pre_2);
+    ASSERT_TRUE(plus_2_rev == increment_pre_2);
+    ASSERT_TRUE(increment_pre_2 == increment_post_2);
+    ASSERT_TRUE(begin == increment_post_test++);
+    ASSERT_TRUE(begin + 1 == ++increment_pre_test);
+    ASSERT_TRUE(*plus_2 == vec[perm(2)]);
+    // check other comparison operators and difference
+    std::vector<gko::detail::permute_iterator<
+        typename std::vector<TypeParam>::iterator, decltype(perm)>>
+        its{begin,
+            plus_2,
+            plus_2_rev,
+            plus_minus_2,
+            increment_pre_2,
+            increment_post_2,
+            increment_pre_test,
+            increment_post_test,
+            begin + 5,
+            begin + 9};
+    std::sort(its.begin(), its.end());
+    std::vector<int> dists;
+    std::vector<int> ref_dists{0, 1, 0, 1, 0, 0, 0, 3, 4};
+    for (int i = 0; i < its.size() - 1; i++) {
+        SCOPED_TRACE(i);
+        dists.push_back(its[i + 1] - its[i]);
+        auto equal = dists.back() > 0;
+        ASSERT_EQ(its[i + 1] > its[i], equal);
+        ASSERT_EQ(its[i] < its[i + 1], equal);
+        ASSERT_EQ(its[i] != its[i + 1], equal);
+        ASSERT_EQ(its[i] == its[i + 1], !equal);
+        ASSERT_EQ(its[i] >= its[i + 1], !equal);
+        ASSERT_EQ(its[i + 1] <= its[i], !equal);
+        ASSERT_TRUE(its[i + 1] >= its[i]);
+        ASSERT_TRUE(its[i] <= its[i + 1]);
+    }
+    ASSERT_EQ(dists, ref_dists);
+}
+
+
+TYPED_TEST(PermuteIterator, DecreasingIterator)
+{
+    std::vector<TypeParam> vec{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+    auto perm = [size = vec.size()](int i) {
+        return static_cast<int>(size) - 1 - i;
+    };
+
+    auto test_iter = gko::detail::make_permute_iterator(vec.begin(), perm);
+
+    auto iter = test_iter + 5;
+    auto minus_2 = iter - 2;
+    auto minus_plus_2 = minus_2 + 2;
+    auto decrement_pre_2 = iter;
+    --decrement_pre_2;
+    --decrement_pre_2;
+    auto decrement_post_2 = iter;
+    decrement_post_2--;
+    decrement_post_2--;
+    auto decrement_pre_test = iter;
+    auto decrement_post_test = iter;
+
+    ASSERT_TRUE(iter == minus_plus_2);
+    ASSERT_TRUE(minus_2 == decrement_pre_2);
+    ASSERT_TRUE(decrement_pre_2 == decrement_post_2);
+    ASSERT_TRUE(iter == decrement_post_test--);
+    ASSERT_TRUE(iter - 1 == --decrement_pre_test);
+    ASSERT_TRUE(*minus_2 == vec[perm(3)]);
+}
+
+
 }  // namespace

From 8fd45071a9af1c6aa90bb4fc086463b876429a8d Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Wed, 19 Apr 2023 09:26:08 +0200
Subject: [PATCH 221/583] use permute iterator for STL algorithms

---
 omp/distributed/partition_helpers_kernels.cpp | 13 ++++----
 .../distributed/partition_helpers_kernels.cpp | 33 +++++++++----------
 2 files changed, 22 insertions(+), 24 deletions(-)

diff --git a/omp/distributed/partition_helpers_kernels.cpp b/omp/distributed/partition_helpers_kernels.cpp
index 5fc55862b08..d03c21c0731 100644
--- a/omp/distributed/partition_helpers_kernels.cpp
+++ b/omp/distributed/partition_helpers_kernels.cpp
@@ -49,18 +49,17 @@ void sort_by_range_start(
     array<GlobalIndexType>& range_start_ends,
     array<experimental::distributed::comm_index_type>& part_ids)
 {
-    struct range {
-        GlobalIndexType idxs[2];
-    };
-
     auto part_ids_d = part_ids.get_data();
     auto num_parts = part_ids.get_num_elems();
-    auto range_it = reinterpret_cast<range*>(range_start_ends.get_data());
-    auto sort_it = detail::make_zip_iterator(range_it, part_ids_d);
+    auto start_it = detail::make_permute_iterator(
+        range_start_ends.get_data(), [](const auto i) { return 2 * i; });
+    auto end_it = detail::make_permute_iterator(
+        range_start_ends.get_data() + 1, [](const auto i) { return 2 * i; });
+    auto sort_it = detail::make_zip_iterator(start_it, end_it, part_ids_d);
     // TODO: use TBB or parallel std with c++17
     std::stable_sort(sort_it, sort_it + num_parts,
                      [](const auto& a, const auto& b) {
-                         return std::get<0>(a).idxs[0] < std::get<0>(b).idxs[0];
+                         return std::get<0>(a) < std::get<0>(b);
                      });
 }
 
diff --git a/reference/distributed/partition_helpers_kernels.cpp b/reference/distributed/partition_helpers_kernels.cpp
index 7f7dfce756c..b392dd362b4 100644
--- a/reference/distributed/partition_helpers_kernels.cpp
+++ b/reference/distributed/partition_helpers_kernels.cpp
@@ -49,17 +49,16 @@ void sort_by_range_start(
     array<GlobalIndexType>& range_start_ends,
     array<experimental::distributed::comm_index_type>& part_ids)
 {
-    struct range {
-        GlobalIndexType idxs[2];
-    };
-
     auto part_ids_d = part_ids.get_data();
     auto num_parts = part_ids.get_num_elems();
-    auto range_it = reinterpret_cast<range*>(range_start_ends.get_data());
-    auto sort_it = detail::make_zip_iterator(range_it, part_ids_d);
+    auto start_it = detail::make_permute_iterator(
+        range_start_ends.get_data(), [](const auto i) { return 2 * i; });
+    auto end_it = detail::make_permute_iterator(
+        range_start_ends.get_data() + 1, [](const auto i) { return 2 * i; });
+    auto sort_it = detail::make_zip_iterator(start_it, end_it, part_ids_d);
     std::stable_sort(sort_it, sort_it + num_parts,
                      [](const auto& a, const auto& b) {
-                         return std::get<0>(a).idxs[0] < std::get<0>(b).idxs[0];
+                         return std::get<0>(a) < std::get<0>(b);
                      });
 }
 
@@ -72,19 +71,19 @@ void check_consecutive_ranges(std::shared_ptr<const DefaultExecutor> exec,
                               const array<GlobalIndexType>& range_start_ends,
                               bool* result)
 {
-    struct end_start {
-        GlobalIndexType end;
-        GlobalIndexType start;
-    };
-
     auto num_parts = range_start_ends.get_num_elems() / 2;
-    auto range_it = reinterpret_cast<const end_start*>(
-        range_start_ends.get_const_data() + 1);
+    auto start_it =
+        detail::make_permute_iterator(range_start_ends.get_const_data() + 2,
+                                      [](const auto i) { return 2 * i; });
+    auto end_it =
+        detail::make_permute_iterator(range_start_ends.get_const_data() + 1,
+                                      [](const auto i) { return 2 * i; });
+    auto range_it = detail::make_zip_iterator(start_it, end_it);
 
     if (num_parts) {
-        *result =
-            std::all_of(range_it, range_it + num_parts - 1,
-                        [](const end_start& r) { return r.end == r.start; });
+        *result = std::all_of(
+            range_it, range_it + num_parts - 1,
+            [](const auto& r) { return std::get<0>(r) == std::get<1>(r); });
     } else {
         *result = true;
     }

From 830ed3b5221b7bc366a8fe1a50ad9d7c278a4efa Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Thu, 20 Apr 2023 09:49:42 +0200
Subject: [PATCH 222/583] review updates:
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- removes more reinterpret casts
- makes permute_iterator copy assignable

Co-authored-by: Thomas Grützmacher <thomas.gruetzmacher@kit.edu>
Co-authored-by: Tobias Ribizel <ribizel@kit.edu>
---
 core/base/iterator_factory.hpp                | 14 +++++----
 core/distributed/partition_helpers.cpp        |  3 +-
 core/test/base/iterator_factory.cpp           |  2 --
 .../distributed/partition_helpers_kernels.cpp |  2 +-
 test/distributed/partition_helper_kernels.cpp | 30 +++++++++----------
 test/mpi/partition_helpers.cpp                |  1 -
 6 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/core/base/iterator_factory.hpp b/core/base/iterator_factory.hpp
index 29aa99a4f86..76cf3dcf36d 100644
--- a/core/base/iterator_factory.hpp
+++ b/core/base/iterator_factory.hpp
@@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <iterator>
 #include <tuple>
 #include <utility>
+#include <vector>
 
 
 namespace gko {
@@ -375,17 +376,18 @@ class permute_iterator {
     using reference = typename std::iterator_traits<IteratorType>::reference;
     using iterator_category = std::random_access_iterator_tag;
 
-    explicit permute_iterator() = default;
+    explicit permute_iterator() : it_{}, idx_{}, perm_{{}} {}
 
     explicit permute_iterator(IteratorType it, PermuteFn perm)
         : it_{std::move(it)}, idx_{}, perm_{std::move(perm)}
     {}
 
-    permute_iterator& operator=(permute_iterator other)
+    permute_iterator& operator=(const permute_iterator& other)
     {
         it_ = other.it_;
         idx_ = other.idx_;
-        // no perm_ = other.perm_ because lambdas are not copy-assignable
+        perm_.clear();
+        perm_.emplace_back(other.perm_[0]);
         return *this;
     }
 
@@ -440,7 +442,7 @@ class permute_iterator {
         return idx_ - other.idx_;
     }
 
-    reference operator*() const { return it_[perm_(idx_)]; }
+    reference operator*() const { return it_[perm_[0](idx_)]; }
 
     reference operator[](difference_type i) const { return *(*this + i); }
 
@@ -477,7 +479,9 @@ class permute_iterator {
 private:
     IteratorType it_;
     difference_type idx_;
-    PermuteFn perm_;
+    // hack to make lambda function copy assignable
+    // could be better done with std::optional
+    std::vector<PermuteFn> perm_;
 };
 
 
diff --git a/core/distributed/partition_helpers.cpp b/core/distributed/partition_helpers.cpp
index b906c0b6e42..921eeee19fd 100644
--- a/core/distributed/partition_helpers.cpp
+++ b/core/distributed/partition_helpers.cpp
@@ -32,11 +32,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/distributed/partition.hpp>
 #include <ginkgo/core/distributed/partition_helpers.hpp>
+
+
 #include <numeric>
 
 
 #include "core/components/fill_array_kernels.hpp"
-#include "core/components/prefix_sum_kernels.hpp"
 #include "core/distributed/partition_helpers_kernels.hpp"
 
 
diff --git a/core/test/base/iterator_factory.cpp b/core/test/base/iterator_factory.cpp
index f41181f0c10..e4d8d39b340 100644
--- a/core/test/base/iterator_factory.cpp
+++ b/core/test/base/iterator_factory.cpp
@@ -411,7 +411,6 @@ TYPED_TEST(PermuteIterator, SortingWithIdentityPermutation)
 {
     std::vector<TypeParam> vec{6, 2, 5, 2, 4};
     std::vector<TypeParam> sorted{2, 2, 4, 5, 6};
-
     auto test_iter = gko::detail::make_permute_iterator(
         vec.begin(), [](int i) { return i; });
 
@@ -439,7 +438,6 @@ TYPED_TEST(PermuteIterator, SortingWithStridedPermutation)
 {
     std::vector<TypeParam> vec{6, 8, 2, 9, 5, 1, 2, 7, 4, 0};
     std::vector<TypeParam> sorted{2, 8, 2, 9, 4, 1, 5, 7, 6, 0};
-
     auto test_iter = gko::detail::make_permute_iterator(
         vec.begin(), [](int i) { return 2 * i; });
 
diff --git a/reference/distributed/partition_helpers_kernels.cpp b/reference/distributed/partition_helpers_kernels.cpp
index b392dd362b4..9cbc425906d 100644
--- a/reference/distributed/partition_helpers_kernels.cpp
+++ b/reference/distributed/partition_helpers_kernels.cpp
@@ -32,7 +32,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include "core/distributed/partition_helpers_kernels.hpp"
-#include <numeric>
+
 
 #include "core/base/iterator_factory.hpp"
 
diff --git a/test/distributed/partition_helper_kernels.cpp b/test/distributed/partition_helper_kernels.cpp
index fdfeb553ae1..bdf750e4675 100644
--- a/test/distributed/partition_helper_kernels.cpp
+++ b/test/distributed/partition_helper_kernels.cpp
@@ -85,14 +85,13 @@ template <typename IndexType>
 std::vector<IndexType> create_ranges(
     const std::vector<IndexType>& range_offsets)
 {
-    struct repeated_value {
-        repeated_value(IndexType i) : vals{i, i} {}
-        IndexType vals[2];
-    };
+    assert(range_offsets.size() >= 2);
     gko::size_type num_ranges = range_offsets.size() - 1;
     std::vector<IndexType> ranges(num_ranges * 2, 0);
-    auto ranges_it = reinterpret_cast<repeated_value*>(ranges.data() + 1);
-    std::copy(range_offsets.begin() + 1, range_offsets.end() - 1, ranges_it);
+    for (gko::size_type i = 1; i < num_ranges; ++i) {
+        ranges[2 * i - 1] = range_offsets[i];
+        ranges[2 * i] = range_offsets[i];
+    }
     ranges.back() = range_offsets.back();
     return ranges;
 }
@@ -145,17 +144,17 @@ std::pair<std::vector<IndexType>, std::vector<comm_index_type>>
 shuffle_range_and_pid(const std::vector<IndexType>& ranges,
                       const std::vector<comm_index_type>& pid)
 {
-    struct range {
-        IndexType vals[2];
-    };
-
     std::default_random_engine engine;
 
     auto result = std::make_pair(ranges, pid);
 
     auto num_ranges = result.second.size();
-    auto zip_it = gko::detail::make_zip_iterator(
-        reinterpret_cast<range*>(result.first.data()), result.second.begin());
+    auto range_start_it = gko::detail::make_permute_iterator(
+        result.first.begin(), [](const auto i) { return 2 * i; });
+    auto range_end_it = gko::detail::make_permute_iterator(
+        result.first.begin() + 1, [](const auto i) { return 2 * i; });
+    auto zip_it = gko::detail::make_zip_iterator(range_start_it, range_end_it,
+                                                 result.second.begin());
     std::shuffle(zip_it, zip_it + num_ranges, engine);
 
     return result;
@@ -199,11 +198,10 @@ TYPED_TEST(PartitionHelpers, CanCheckNonConsecutiveRanges)
 }
 
 
- TYPED_TEST(PartitionHelpers, CanCheckConsecutiveRangesWithSingleRange)
+TYPED_TEST(PartitionHelpers, CanCheckConsecutiveRangesWithSingleRange)
 {
     using index_type = typename TestFixture::index_type;
-    auto start_ends =
-        make_array(this->ref, create_ranges<index_type>(1));
+    auto start_ends = make_array(this->ref, create_ranges<index_type>(1));
     bool result = false;
 
     gko::kernels::EXEC_NAMESPACE::partition_helpers::check_consecutive_ranges(
@@ -213,7 +211,7 @@ TYPED_TEST(PartitionHelpers, CanCheckNonConsecutiveRanges)
 }
 
 
- TYPED_TEST(PartitionHelpers, CanCheckConsecutiveRangesWithSingleElement)
+TYPED_TEST(PartitionHelpers, CanCheckConsecutiveRangesWithSingleElement)
 {
     using index_type = typename TestFixture::index_type;
     auto start_ends = gko::array<index_type>(this->exec, {1});
diff --git a/test/mpi/partition_helpers.cpp b/test/mpi/partition_helpers.cpp
index 6f30761cbb0..dc99bb0a4ab 100644
--- a/test/mpi/partition_helpers.cpp
+++ b/test/mpi/partition_helpers.cpp
@@ -58,7 +58,6 @@ TYPED_TEST(PartitionHelpers, CanBuildFromLocalRanges)
     gko::array<itype> expects_ranges{this->exec, {0, 4, 9, 11}};
     gko::array<comm_index_type> expects_pid{this->exec, {0, 1, 2}};
 
-
     auto part =
         gko::experimental::distributed::build_partition_from_local_range<
             gko::int32, itype>(this->exec, this->comm,

From 690e89b1d6b05595d28a68f2740ce6984e88606e Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Thu, 20 Apr 2023 14:27:45 +0200
Subject: [PATCH 223/583] review updates:
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- use gko::array instead of gko::vector

Co-authored-by: Thomas Grützmacher <thomas.gruetzmacher@kit.edu>
---
 core/distributed/partition_helpers.cpp | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/core/distributed/partition_helpers.cpp b/core/distributed/partition_helpers.cpp
index 921eeee19fd..3a3f06cef77 100644
--- a/core/distributed/partition_helpers.cpp
+++ b/core/distributed/partition_helpers.cpp
@@ -127,17 +127,16 @@ build_partition_from_local_size(std::shared_ptr<const Executor> exec,
                                 mpi::communicator comm, size_type local_size)
 {
     auto local_size_gi = static_cast<GlobalIndexType>(local_size);
-    std::vector<GlobalIndexType> sizes(comm.size());
-    comm.all_gather(exec, &local_size_gi, 1, sizes.data(), 1);
+    array<GlobalIndexType> sizes(exec->get_master(), comm.size());
+    comm.all_gather(exec, &local_size_gi, 1, sizes.get_data(), 1);
 
-    std::vector<GlobalIndexType> offsets(comm.size() + 1);
-    offsets[0] = 0;
-    std::partial_sum(sizes.begin(), sizes.end(), offsets.begin() + 1);
+    array<GlobalIndexType> offsets(exec->get_master(), comm.size() + 1);
+    offsets.get_data()[0] = 0;
+    std::partial_sum(sizes.get_data(), sizes.get_data() + comm.size(),
+                     offsets.get_data() + 1);
 
-    auto ranges =
-        make_array_view(exec->get_master(), offsets.size(), offsets.data());
     return Partition<LocalIndexType, GlobalIndexType>::build_from_contiguous(
-        exec, ranges);
+        exec, offsets);
 }
 
 #define GKO_DECLARE_BUILD_PARTITION_FROM_LOCAL_SIZE(_local_type, _global_type) \

From 721d8292650265661d17f1869837ec3a5e29cc37 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Thu, 20 Apr 2023 15:59:23 +0200
Subject: [PATCH 224/583] adds copy_assignable wrapper class
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

this allows to rely on default implementation of the constructors and assignment operators in the `permute_iterator`. Except for the default constructor due to AppleClang issues

Co-authored-by: Thomas Grützmacher <thomas.gruetzmacher@kit.edu>
Co-authored-by: Tobias Ribizel <ribizel@kit.edu>
---
 core/base/copy_assignable.hpp  | 62 ++++++++++++++++++++++++++++++++++
 core/base/iterator_factory.hpp | 20 +++--------
 2 files changed, 67 insertions(+), 15 deletions(-)
 create mode 100644 core/base/copy_assignable.hpp

diff --git a/core/base/copy_assignable.hpp b/core/base/copy_assignable.hpp
new file mode 100644
index 00000000000..2c29e4e64e4
--- /dev/null
+++ b/core/base/copy_assignable.hpp
@@ -0,0 +1,62 @@
+#ifndef GKO_CORE_BASE_COPY_ASSIGNABLE_HPP
+#define GKO_CORE_BASE_COPY_ASSIGNABLE_HPP
+
+
+#include <vector>
+
+
+namespace gko {
+namespace detail {
+
+
+template <typename T, typename = void>
+class copy_assignable;
+
+
+/**
+ * Helper class to make a type copy assignable.
+ *
+ * This class wraps an object of a type that has a copy constructor, but not
+ * a copy assignment. This is most often the case for lambdas. The wrapped
+ * object can then be copy assigned, by relying on the copy constructor.
+ *
+ * @tparam T  type with a copy constructor
+ */
+template <typename T>
+class copy_assignable<
+    T, typename std::enable_if<std::is_copy_constructible<T>::value>::type> {
+public:
+    copy_assignable() : obj_{{}} {}
+    copy_assignable(const copy_assignable& other) = default;
+    copy_assignable(copy_assignable&& other) noexcept = default;
+
+    copy_assignable(const T& obj) : obj_{obj} {}
+    copy_assignable(T&& obj) : obj_{std::move(obj)} {}
+
+    copy_assignable& operator=(const copy_assignable& other)
+    {
+        obj_.clear();
+        obj_.emplace_back(other.get());
+        return *this;
+    }
+    copy_assignable& operator=(copy_assignable&& other) noexcept = default;
+
+    template <typename... Args>
+    decltype(auto) operator()(Args&&... args) const
+    {
+        return obj_[0](std::forward<Args>(args)...);
+    }
+
+    T const& get() const { return obj_[0]; }
+    T& get() { return obj_[0]; }
+
+private:
+    //!< Store wrapped object in a container that has an emplace function
+    std::vector<T> obj_;
+};
+
+
+}  // namespace detail
+}  // namespace gko
+
+#endif  // GKO_CORE_BASE_COPY_ASSIGNABLE_HPP
diff --git a/core/base/iterator_factory.hpp b/core/base/iterator_factory.hpp
index 76cf3dcf36d..1423803555c 100644
--- a/core/base/iterator_factory.hpp
+++ b/core/base/iterator_factory.hpp
@@ -40,7 +40,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <iterator>
 #include <tuple>
 #include <utility>
-#include <vector>
+
+#include <core/base/copy_assignable.hpp>
 
 
 namespace gko {
@@ -376,21 +377,12 @@ class permute_iterator {
     using reference = typename std::iterator_traits<IteratorType>::reference;
     using iterator_category = std::random_access_iterator_tag;
 
-    explicit permute_iterator() : it_{}, idx_{}, perm_{{}} {}
+    permute_iterator() = default;
 
     explicit permute_iterator(IteratorType it, PermuteFn perm)
         : it_{std::move(it)}, idx_{}, perm_{std::move(perm)}
     {}
 
-    permute_iterator& operator=(const permute_iterator& other)
-    {
-        it_ = other.it_;
-        idx_ = other.idx_;
-        perm_.clear();
-        perm_.emplace_back(other.perm_[0]);
-        return *this;
-    }
-
     permute_iterator& operator+=(difference_type i)
     {
         idx_ += i;
@@ -442,7 +434,7 @@ class permute_iterator {
         return idx_ - other.idx_;
     }
 
-    reference operator*() const { return it_[perm_[0](idx_)]; }
+    reference operator*() const { return it_[perm_(idx_)]; }
 
     reference operator[](difference_type i) const { return *(*this + i); }
 
@@ -479,9 +471,7 @@ class permute_iterator {
 private:
     IteratorType it_;
     difference_type idx_;
-    // hack to make lambda function copy assignable
-    // could be better done with std::optional
-    std::vector<PermuteFn> perm_;
+    copy_assignable<PermuteFn> perm_;
 };
 
 

From 84750fc5afe78d2dec1ccacc0b04a7d90ca27065 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Wed, 26 Apr 2023 15:46:45 +0200
Subject: [PATCH 225/583] adds invalid state exception

Co-authored-by: Pratik Nayak <pratik.nayak@kit.edu>
Co-authored-by: Tobias Ribizel <ribizel@kit.edu>
---
 core/distributed/partition_helpers.cpp | 2 +-
 test/mpi/partition_helpers.cpp         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/distributed/partition_helpers.cpp b/core/distributed/partition_helpers.cpp
index 3a3f06cef77..70ae3897a52 100644
--- a/core/distributed/partition_helpers.cpp
+++ b/core/distributed/partition_helpers.cpp
@@ -100,7 +100,7 @@ build_partition_from_local_range(std::shared_ptr<const Executor> exec,
     exec->run(partition_helpers::make_check_consecutive_ranges(
         ranges_start_end, &consecutive_ranges));
     if (!consecutive_ranges) {
-        throw Error(__FILE__, __LINE__, "The partition contains gaps.");
+        GKO_INVALID_STATE("The partition contains gaps.");
     }
 
     // join (now consecutive) starts and ends into combined array
diff --git a/test/mpi/partition_helpers.cpp b/test/mpi/partition_helpers.cpp
index dc99bb0a4ab..de0b897fd13 100644
--- a/test/mpi/partition_helpers.cpp
+++ b/test/mpi/partition_helpers.cpp
@@ -109,7 +109,7 @@ TYPED_TEST(PartitionHelpers, CanBuildFromLocalRangesThrowsOnGap)
 
     ASSERT_THROW(build_from_local_ranges(this->exec, this->comm,
                                          local_range[this->comm.rank()]),
-                 gko::Error);
+                 gko::InvalidStateError);
 }
 
 

From c20213995bbad39bc2be2334f40c8fbd5863df74 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Wed, 26 Apr 2023 15:59:25 +0200
Subject: [PATCH 226/583] review updates:

- documentation
- formatting

Co-authored-by: Pratik Nayak <pratik.nayak@kit.edu>
---
 core/base/iterator_factory.hpp                | 11 +++++++++++
 test/distributed/partition_helper_kernels.cpp |  1 +
 2 files changed, 12 insertions(+)

diff --git a/core/base/iterator_factory.hpp b/core/base/iterator_factory.hpp
index 1423803555c..e1ed0ca62d0 100644
--- a/core/base/iterator_factory.hpp
+++ b/core/base/iterator_factory.hpp
@@ -368,6 +368,17 @@ void swap(zip_iterator_reference<Iterators...> a,
 }
 
 
+/**
+ * Random access iterator that uses a function to transform the index.
+ *
+ * For a function `fn` and an underlying iterator `it`, accessing the
+ * permute_iterator at index `i` will result in accessing `it[fn(i)]`.
+ *
+ * @tparam IteratorType  Underlying iterator, has to be random access.
+ * @tparam PermuteFn  A function `difference_type -> difference_type` that
+ *                    transforms any given index. It doesn't have to be a strict
+ *                    permutation of indices (i.e. not bijective).
+ */
 template <typename IteratorType, typename PermuteFn>
 class permute_iterator {
 public:
diff --git a/test/distributed/partition_helper_kernels.cpp b/test/distributed/partition_helper_kernels.cpp
index bdf750e4675..44c514093d8 100644
--- a/test/distributed/partition_helper_kernels.cpp
+++ b/test/distributed/partition_helper_kernels.cpp
@@ -160,6 +160,7 @@ shuffle_range_and_pid(const std::vector<IndexType>& ranges,
     return result;
 }
 
+
 template <typename IndexType>
 class PartitionHelpers : public CommonTestFixture {
 protected:

From a433592932d00692381118b58d2557b2d65bbefe Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Wed, 26 Apr 2023 14:08:32 +0000
Subject: [PATCH 227/583] Format files

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 .../distributed/partition_helpers_kernels.cpp |  1 -
 core/base/copy_assignable.hpp                 | 38 +++++++++++++++++--
 core/base/iterator_factory.hpp                |  3 +-
 core/distributed/partition.cpp                |  1 +
 core/distributed/partition_helpers.cpp        |  4 +-
 cuda/distributed/partition_helpers_kernels.cu |  1 -
 include/ginkgo/core/base/mpi.hpp              | 11 ++----
 .../core/distributed/partition_helpers.hpp    |  2 +-
 omp/distributed/partition_helpers_kernels.cpp |  1 -
 .../distributed/partition_helpers_kernels.cpp |  1 -
 .../distributed/partition_helpers_kernels.cpp |  4 +-
 test/distributed/partition_helper_kernels.cpp |  4 +-
 12 files changed, 48 insertions(+), 23 deletions(-)

diff --git a/common/unified/distributed/partition_helpers_kernels.cpp b/common/unified/distributed/partition_helpers_kernels.cpp
index e5565819021..dbd20c40c15 100644
--- a/common/unified/distributed/partition_helpers_kernels.cpp
+++ b/common/unified/distributed/partition_helpers_kernels.cpp
@@ -30,7 +30,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-
 #include "core/distributed/partition_helpers_kernels.hpp"
 
 
diff --git a/core/base/copy_assignable.hpp b/core/base/copy_assignable.hpp
index 2c29e4e64e4..199a02e8388 100644
--- a/core/base/copy_assignable.hpp
+++ b/core/base/copy_assignable.hpp
@@ -1,5 +1,37 @@
-#ifndef GKO_CORE_BASE_COPY_ASSIGNABLE_HPP
-#define GKO_CORE_BASE_COPY_ASSIGNABLE_HPP
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CORE_BASE_COPY_ASSIGNABLE_HPP_
+#define GKO_CORE_BASE_COPY_ASSIGNABLE_HPP_
 
 
 #include <vector>
@@ -59,4 +91,4 @@ class copy_assignable<
 }  // namespace detail
 }  // namespace gko
 
-#endif  // GKO_CORE_BASE_COPY_ASSIGNABLE_HPP
+#endif  // GKO_CORE_BASE_COPY_ASSIGNABLE_HPP_
diff --git a/core/base/iterator_factory.hpp b/core/base/iterator_factory.hpp
index e1ed0ca62d0..bbc1d3b4b2b 100644
--- a/core/base/iterator_factory.hpp
+++ b/core/base/iterator_factory.hpp
@@ -41,7 +41,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <tuple>
 #include <utility>
 
-#include <core/base/copy_assignable.hpp>
+
+#include "core/base/copy_assignable.hpp"
 
 
 namespace gko {
diff --git a/core/distributed/partition.cpp b/core/distributed/partition.cpp
index 22f0fdb3d94..bfeb5e8c286 100644
--- a/core/distributed/partition.cpp
+++ b/core/distributed/partition.cpp
@@ -32,6 +32,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/distributed/partition.hpp>
 
+
 #include "core/distributed/partition_kernels.hpp"
 
 
diff --git a/core/distributed/partition_helpers.cpp b/core/distributed/partition_helpers.cpp
index 70ae3897a52..9085b7ec2e7 100644
--- a/core/distributed/partition_helpers.cpp
+++ b/core/distributed/partition_helpers.cpp
@@ -30,13 +30,15 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include <ginkgo/core/distributed/partition.hpp>
 #include <ginkgo/core/distributed/partition_helpers.hpp>
 
 
 #include <numeric>
 
 
+#include <ginkgo/core/distributed/partition.hpp>
+
+
 #include "core/components/fill_array_kernels.hpp"
 #include "core/distributed/partition_helpers_kernels.hpp"
 
diff --git a/cuda/distributed/partition_helpers_kernels.cu b/cuda/distributed/partition_helpers_kernels.cu
index e37655e357e..a70e728f845 100644
--- a/cuda/distributed/partition_helpers_kernels.cu
+++ b/cuda/distributed/partition_helpers_kernels.cu
@@ -30,7 +30,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-
 #include "core/distributed/partition_helpers_kernels.hpp"
 
 
diff --git a/include/ginkgo/core/base/mpi.hpp b/include/ginkgo/core/base/mpi.hpp
index 9699dea4942..bf985cabeb7 100644
--- a/include/ginkgo/core/base/mpi.hpp
+++ b/include/ginkgo/core/base/mpi.hpp
@@ -87,13 +87,10 @@ inline constexpr bool is_gpu_aware()
 int map_rank_to_device_id(MPI_Comm comm, int num_devices);
 
 
-#define GKO_REGISTER_MPI_TYPE(input_type, mpi_type) \
-    template <>                                     \
-    struct type_impl<input_type> {                  \
-        static MPI_Datatype get_type()              \
-        {                                           \
-            return mpi_type;                        \
-        }                                           \
+#define GKO_REGISTER_MPI_TYPE(input_type, mpi_type)         \
+    template <>                                             \
+    struct type_impl<input_type> {                          \
+        static MPI_Datatype get_type() { return mpi_type; } \
     }
 
 /**
diff --git a/include/ginkgo/core/distributed/partition_helpers.hpp b/include/ginkgo/core/distributed/partition_helpers.hpp
index 889347674c8..6bc20350a7d 100644
--- a/include/ginkgo/core/distributed/partition_helpers.hpp
+++ b/include/ginkgo/core/distributed/partition_helpers.hpp
@@ -41,7 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/mpi.hpp>
-#include "ginkgo/core/base/range.hpp"
+#include <ginkgo/core/base/range.hpp>
 
 
 namespace gko {
diff --git a/omp/distributed/partition_helpers_kernels.cpp b/omp/distributed/partition_helpers_kernels.cpp
index d03c21c0731..2c006a22885 100644
--- a/omp/distributed/partition_helpers_kernels.cpp
+++ b/omp/distributed/partition_helpers_kernels.cpp
@@ -30,7 +30,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-
 #include "core/distributed/partition_helpers_kernels.hpp"
 
 
diff --git a/reference/distributed/partition_helpers_kernels.cpp b/reference/distributed/partition_helpers_kernels.cpp
index 9cbc425906d..a9b476d0315 100644
--- a/reference/distributed/partition_helpers_kernels.cpp
+++ b/reference/distributed/partition_helpers_kernels.cpp
@@ -30,7 +30,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-
 #include "core/distributed/partition_helpers_kernels.hpp"
 
 
diff --git a/reference/test/distributed/partition_helpers_kernels.cpp b/reference/test/distributed/partition_helpers_kernels.cpp
index 9b339fd926f..5a139f4edb5 100644
--- a/reference/test/distributed/partition_helpers_kernels.cpp
+++ b/reference/test/distributed/partition_helpers_kernels.cpp
@@ -30,9 +30,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include <ginkgo/core/distributed/partition.hpp>
-
-
 #include <algorithm>
 #include <memory>
 #include <vector>
@@ -43,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/distributed/partition.hpp>
 
 
 #include "core/distributed/partition_helpers_kernels.hpp"
diff --git a/test/distributed/partition_helper_kernels.cpp b/test/distributed/partition_helper_kernels.cpp
index 44c514093d8..d43062d3ccd 100644
--- a/test/distributed/partition_helper_kernels.cpp
+++ b/test/distributed/partition_helper_kernels.cpp
@@ -30,9 +30,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include "core/distributed/partition_helpers_kernels.hpp"
-
-
 #include <gtest/gtest-typed-test.h>
 #include <gtest/gtest.h>
 
@@ -41,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include "core/base/iterator_factory.hpp"
+#include "core/distributed/partition_helpers_kernels.hpp"
 #include "core/test/utils.hpp"
 #include "test/utils/executor.hpp"
 

From 763131d33c351d64e5d18f9a433a17ba69ae9b4b Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Thu, 27 Apr 2023 14:29:24 +0200
Subject: [PATCH 228/583] uses placement-new for copy-assignable wrapper

Co-authored-by: Tobias Ribizel <ribizel@kit.edu>
---
 core/base/copy_assignable.hpp | 47 +++++++++++++++++++++++++++--------
 1 file changed, 37 insertions(+), 10 deletions(-)

diff --git a/core/base/copy_assignable.hpp b/core/base/copy_assignable.hpp
index 199a02e8388..40948757701 100644
--- a/core/base/copy_assignable.hpp
+++ b/core/base/copy_assignable.hpp
@@ -58,20 +58,45 @@ template <typename T>
 class copy_assignable<
     T, typename std::enable_if<std::is_copy_constructible<T>::value>::type> {
 public:
-    copy_assignable() : obj_{{}} {}
-    copy_assignable(const copy_assignable& other) = default;
-    copy_assignable(copy_assignable&& other) noexcept = default;
+    copy_assignable() : obj_(new(buf)(T)()) {}
 
-    copy_assignable(const T& obj) : obj_{obj} {}
-    copy_assignable(T&& obj) : obj_{std::move(obj)} {}
+    copy_assignable(const copy_assignable& other)
+    {
+        if (this != &other) {
+            *this = other;
+        }
+    }
+
+    copy_assignable(copy_assignable&& other) noexcept
+    {
+        if (this != &other) {
+            *this = std::move(other);
+        }
+    }
+
+    copy_assignable(const T& obj) : obj_{new(buf)(T)(obj)} {}
+
+    copy_assignable(T&& obj) : obj_{new(buf)(T)(std::move(obj))} {}
 
     copy_assignable& operator=(const copy_assignable& other)
     {
-        obj_.clear();
-        obj_.emplace_back(other.get());
+        if (this != &other) {
+            obj_->~T();
+            obj_ = new (buf)(T)(*other.obj_);
+        }
         return *this;
     }
-    copy_assignable& operator=(copy_assignable&& other) noexcept = default;
+
+    copy_assignable& operator=(copy_assignable&& other) noexcept
+    {
+        if (this != &other) {
+            obj_->~T();
+            obj_ = new (buf)(T)(std::move(*other.obj_));
+        }
+        return *this;
+    }
+
+    ~copy_assignable() { obj_->~T(); }
 
     template <typename... Args>
     decltype(auto) operator()(Args&&... args) const
@@ -80,11 +105,13 @@ class copy_assignable<
     }
 
     T const& get() const { return obj_[0]; }
+
     T& get() { return obj_[0]; }
 
 private:
-    //!< Store wrapped object in a container that has an emplace function
-    std::vector<T> obj_;
+    //!< Store wrapped object on the stack, should use std::optional in c++17
+    T* obj_;
+    alignas(T) unsigned char buf[sizeof(T)];
 };
 
 

From 32b7e621bdb49e530494429c2259e3f15174e222 Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Thu, 27 Apr 2023 15:52:34 +0000
Subject: [PATCH 229/583] Format files

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 core/base/copy_assignable.hpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/core/base/copy_assignable.hpp b/core/base/copy_assignable.hpp
index 40948757701..5d3985c5c20 100644
--- a/core/base/copy_assignable.hpp
+++ b/core/base/copy_assignable.hpp
@@ -58,7 +58,7 @@ template <typename T>
 class copy_assignable<
     T, typename std::enable_if<std::is_copy_constructible<T>::value>::type> {
 public:
-    copy_assignable() : obj_(new(buf)(T)()) {}
+    copy_assignable() : obj_(new (buf)(T)()) {}
 
     copy_assignable(const copy_assignable& other)
     {
@@ -74,9 +74,9 @@ class copy_assignable<
         }
     }
 
-    copy_assignable(const T& obj) : obj_{new(buf)(T)(obj)} {}
+    copy_assignable(const T& obj) : obj_{new (buf)(T)(obj)} {}
 
-    copy_assignable(T&& obj) : obj_{new(buf)(T)(std::move(obj))} {}
+    copy_assignable(T&& obj) : obj_{new (buf)(T)(std::move(obj))} {}
 
     copy_assignable& operator=(const copy_assignable& other)
     {

From f2af1f998bdf4499342b80dedf584824b9b903f9 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Fri, 28 Apr 2023 09:52:39 +0200
Subject: [PATCH 230/583] remove undefined lambda default constructor

---
 core/base/copy_assignable.hpp | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/core/base/copy_assignable.hpp b/core/base/copy_assignable.hpp
index 5d3985c5c20..de552831a86 100644
--- a/core/base/copy_assignable.hpp
+++ b/core/base/copy_assignable.hpp
@@ -58,7 +58,7 @@ template <typename T>
 class copy_assignable<
     T, typename std::enable_if<std::is_copy_constructible<T>::value>::type> {
 public:
-    copy_assignable() : obj_(new (buf)(T)()) {}
+    copy_assignable() = default;
 
     copy_assignable(const copy_assignable& other)
     {
@@ -74,14 +74,16 @@ class copy_assignable<
         }
     }
 
-    copy_assignable(const T& obj) : obj_{new (buf)(T)(obj)} {}
+    copy_assignable(const T& obj) : obj_{new(buf)(T)(obj)} {}
 
-    copy_assignable(T&& obj) : obj_{new (buf)(T)(std::move(obj))} {}
+    copy_assignable(T&& obj) : obj_{new(buf)(T)(std::move(obj))} {}
 
     copy_assignable& operator=(const copy_assignable& other)
     {
         if (this != &other) {
-            obj_->~T();
+            if (obj_) {
+                obj_->~T();
+            }
             obj_ = new (buf)(T)(*other.obj_);
         }
         return *this;
@@ -90,13 +92,20 @@ class copy_assignable<
     copy_assignable& operator=(copy_assignable&& other) noexcept
     {
         if (this != &other) {
-            obj_->~T();
+            if (obj_) {
+                obj_->~T();
+            }
             obj_ = new (buf)(T)(std::move(*other.obj_));
         }
         return *this;
     }
 
-    ~copy_assignable() { obj_->~T(); }
+    ~copy_assignable()
+    {
+        if (obj_) {
+            obj_->~T();
+        }
+    }
 
     template <typename... Args>
     decltype(auto) operator()(Args&&... args) const

From 85e43e83a73d6ae4f9228766dda9ccefe268685d Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Wed, 5 Jul 2023 17:34:12 +0200
Subject: [PATCH 231/583] use workaround for old dpcpp version

---
 .../partition_helpers_kernels.dp.cpp          | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/dpcpp/distributed/partition_helpers_kernels.dp.cpp b/dpcpp/distributed/partition_helpers_kernels.dp.cpp
index 6362c243d95..46f72c8ef58 100644
--- a/dpcpp/distributed/partition_helpers_kernels.dp.cpp
+++ b/dpcpp/distributed/partition_helpers_kernels.dp.cpp
@@ -74,11 +74,31 @@ void sort_by_range_start(
         range_start_ends.get_data(), stride{});
     auto end_it = oneapi::dpl::make_permutation_iterator(
         range_start_ends.get_data() + 1, stride{});
+
+    // older versions of oneDPL have a bug when sorting permutation iterators
+#if ONEDPL_VERSION_MAJOR >= 2022 && ONEDPL_VERSION_MINOR >= 1
     auto zip_it =
         oneapi::dpl::make_zip_iterator(start_it, end_it, part_ids.get_data());
     std::stable_sort(policy, zip_it, zip_it + num_ranges, [](auto a, auto b) {
         return std::get<0>(a) < std::get<0>(b);
     });
+#else
+    array<GlobalIndexType> starts(exec, num_ranges);
+    array<GlobalIndexType> ends(exec, num_ranges);
+
+    std::copy(policy, start_it, start_it + num_ranges, starts.get_data());
+    std::copy(policy, end_it, end_it + num_ranges, ends.get_data());
+
+    auto zip_it = oneapi::dpl::make_zip_iterator(
+        starts.get_data(), ends.get_data(), part_ids.get_data());
+    std::stable_sort(policy, zip_it, zip_it + num_ranges, [](auto a, auto b) {
+        return std::get<0>(a) < std::get<0>(b);
+    });
+
+    std::copy(policy, starts.get_data(), starts.get_data() + num_ranges,
+              start_it);
+    std::copy(policy, ends.get_data(), ends.get_data() + num_ranges, end_it);
+#endif
 }
 
 GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(

From 5327dd54c27bfc281ea278e46b3c6761d1f0849e Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Fri, 7 Jul 2023 09:38:28 +0200
Subject: [PATCH 232/583] fixup after rebase

---
 test/mpi/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/mpi/CMakeLists.txt b/test/mpi/CMakeLists.txt
index 08050bde58f..fc0aec8138a 100644
--- a/test/mpi/CMakeLists.txt
+++ b/test/mpi/CMakeLists.txt
@@ -1,4 +1,5 @@
 ginkgo_create_common_and_reference_test(matrix MPI_SIZE 3)
+ginkgo_create_common_and_reference_test(partition_helpers MPI_SIZE 3)
 ginkgo_create_common_and_reference_test(vector MPI_SIZE 3)
 
 add_subdirectory(preconditioner)

From 0e948b5f168d5ae7ee64863c386840dd40f95dee Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Fri, 7 Jul 2023 12:23:50 +0200
Subject: [PATCH 233/583] review updates:

- initialize pointer
- dereference pointer instead of array access
- use `bool&` as return type instead of `bool*`

Co-authored-by: Tobias Ribizel <ribizel@kit.edu>
---
 .../distributed/partition_helpers_kernels.cpp        |  6 +++---
 core/base/copy_assignable.hpp                        | 12 ++++++------
 core/distributed/partition_helpers.cpp               |  2 +-
 core/distributed/partition_helpers_kernels.hpp       |  2 +-
 reference/distributed/partition_helpers_kernels.cpp  |  6 +++---
 .../test/distributed/partition_helpers_kernels.cpp   |  4 ++--
 test/distributed/partition_helper_kernels.cpp        |  8 ++++----
 7 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/common/unified/distributed/partition_helpers_kernels.cpp b/common/unified/distributed/partition_helpers_kernels.cpp
index dbd20c40c15..3c041dd7e4b 100644
--- a/common/unified/distributed/partition_helpers_kernels.cpp
+++ b/common/unified/distributed/partition_helpers_kernels.cpp
@@ -46,7 +46,7 @@ namespace partition_helpers {
 template <typename GlobalIndexType>
 void check_consecutive_ranges(std::shared_ptr<const DefaultExecutor> exec,
                               const array<GlobalIndexType>& range_start_ends,
-                              bool* result)
+                              bool& result)
 {
     array<uint32> result_uint32{exec, 1};
     auto num_ranges = range_start_ends.get_num_elems() / 2;
@@ -64,10 +64,10 @@ void check_consecutive_ranges(std::shared_ptr<const DefaultExecutor> exec,
             [] GKO_KERNEL(auto x) { return x; }, static_cast<uint32>(true),
             result_uint32.get_data(), num_ranges - 1,
             range_start_ends.get_const_data() + 1);
-        *result =
+        result =
             static_cast<bool>(exec->copy_val_to_host(result_uint32.get_data()));
     } else {
-        *result = true;
+        result = true;
     }
 }
 
diff --git a/core/base/copy_assignable.hpp b/core/base/copy_assignable.hpp
index de552831a86..7f5e4125e10 100644
--- a/core/base/copy_assignable.hpp
+++ b/core/base/copy_assignable.hpp
@@ -74,9 +74,9 @@ class copy_assignable<
         }
     }
 
-    copy_assignable(const T& obj) : obj_{new(buf)(T)(obj)} {}
+    copy_assignable(const T& obj) : obj_{new (buf)(T)(obj)} {}
 
-    copy_assignable(T&& obj) : obj_{new(buf)(T)(std::move(obj))} {}
+    copy_assignable(T&& obj) : obj_{new (buf)(T)(std::move(obj))} {}
 
     copy_assignable& operator=(const copy_assignable& other)
     {
@@ -110,16 +110,16 @@ class copy_assignable<
     template <typename... Args>
     decltype(auto) operator()(Args&&... args) const
     {
-        return obj_[0](std::forward<Args>(args)...);
+        return (*obj_)(std::forward<Args>(args)...);
     }
 
-    T const& get() const { return obj_[0]; }
+    T const& get() const { return *obj_; }
 
-    T& get() { return obj_[0]; }
+    T& get() { return *obj_; }
 
 private:
     //!< Store wrapped object on the stack, should use std::optional in c++17
-    T* obj_;
+    T* obj_{};
     alignas(T) unsigned char buf[sizeof(T)];
 };
 
diff --git a/core/distributed/partition_helpers.cpp b/core/distributed/partition_helpers.cpp
index 9085b7ec2e7..b1fd1dd9bc5 100644
--- a/core/distributed/partition_helpers.cpp
+++ b/core/distributed/partition_helpers.cpp
@@ -100,7 +100,7 @@ build_partition_from_local_range(std::shared_ptr<const Executor> exec,
     // check for consistency
     bool consecutive_ranges = false;
     exec->run(partition_helpers::make_check_consecutive_ranges(
-        ranges_start_end, &consecutive_ranges));
+        ranges_start_end, consecutive_ranges));
     if (!consecutive_ranges) {
         GKO_INVALID_STATE("The partition contains gaps.");
     }
diff --git a/core/distributed/partition_helpers_kernels.hpp b/core/distributed/partition_helpers_kernels.hpp
index 6d55926db76..ed9fa60364f 100644
--- a/core/distributed/partition_helpers_kernels.hpp
+++ b/core/distributed/partition_helpers_kernels.hpp
@@ -54,7 +54,7 @@ namespace kernels {
 #define GKO_DECLARE_PARTITION_HELPERS_CHECK_CONSECUTIVE_RANGES(_type)          \
     void check_consecutive_ranges(std::shared_ptr<const DefaultExecutor> exec, \
                                   const array<_type>& range_start_ends,        \
-                                  bool* result)
+                                  bool& result)
 
 
 #define GKO_DECLARE_PARTITION_HELPERS_COMPRESS_RANGES(_type)          \
diff --git a/reference/distributed/partition_helpers_kernels.cpp b/reference/distributed/partition_helpers_kernels.cpp
index a9b476d0315..b68c10b1d01 100644
--- a/reference/distributed/partition_helpers_kernels.cpp
+++ b/reference/distributed/partition_helpers_kernels.cpp
@@ -68,7 +68,7 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
 template <typename GlobalIndexType>
 void check_consecutive_ranges(std::shared_ptr<const DefaultExecutor> exec,
                               const array<GlobalIndexType>& range_start_ends,
-                              bool* result)
+                              bool& result)
 {
     auto num_parts = range_start_ends.get_num_elems() / 2;
     auto start_it =
@@ -80,11 +80,11 @@ void check_consecutive_ranges(std::shared_ptr<const DefaultExecutor> exec,
     auto range_it = detail::make_zip_iterator(start_it, end_it);
 
     if (num_parts) {
-        *result = std::all_of(
+        result = std::all_of(
             range_it, range_it + num_parts - 1,
             [](const auto& r) { return std::get<0>(r) == std::get<1>(r); });
     } else {
-        *result = true;
+        result = true;
     }
 }
 
diff --git a/reference/test/distributed/partition_helpers_kernels.cpp b/reference/test/distributed/partition_helpers_kernels.cpp
index 5a139f4edb5..f0ce4918d01 100644
--- a/reference/test/distributed/partition_helpers_kernels.cpp
+++ b/reference/test/distributed/partition_helpers_kernels.cpp
@@ -107,7 +107,7 @@ TYPED_TEST(PartitionHelpers, CanCheckConsecutiveRanges)
     bool result = false;
 
     gko::kernels::reference::partition_helpers::check_consecutive_ranges(
-        this->ref, range_start_ends, &result);
+        this->ref, range_start_ends, result);
 
     ASSERT_TRUE(result);
 }
@@ -121,7 +121,7 @@ TYPED_TEST(PartitionHelpers, CanCheckNonConsecutiveRanges)
     bool result = true;
 
     gko::kernels::reference::partition_helpers::check_consecutive_ranges(
-        this->ref, range_start_ends, &result);
+        this->ref, range_start_ends, result);
 
     ASSERT_FALSE(result);
 }
diff --git a/test/distributed/partition_helper_kernels.cpp b/test/distributed/partition_helper_kernels.cpp
index d43062d3ccd..a53505cf1f6 100644
--- a/test/distributed/partition_helper_kernels.cpp
+++ b/test/distributed/partition_helper_kernels.cpp
@@ -175,7 +175,7 @@ TYPED_TEST(PartitionHelpers, CanCheckConsecutiveRanges)
     bool result = false;
 
     gko::kernels::EXEC_NAMESPACE::partition_helpers::check_consecutive_ranges(
-        this->exec, offsets, &result);
+        this->exec, offsets, result);
 
     ASSERT_TRUE(result);
 }
@@ -191,7 +191,7 @@ TYPED_TEST(PartitionHelpers, CanCheckNonConsecutiveRanges)
     bool result = true;
 
     gko::kernels::EXEC_NAMESPACE::partition_helpers::check_consecutive_ranges(
-        this->exec, start_ends, &result);
+        this->exec, start_ends, result);
 
     ASSERT_FALSE(result);
 }
@@ -204,7 +204,7 @@ TYPED_TEST(PartitionHelpers, CanCheckConsecutiveRangesWithSingleRange)
     bool result = false;
 
     gko::kernels::EXEC_NAMESPACE::partition_helpers::check_consecutive_ranges(
-        this->exec, start_ends, &result);
+        this->exec, start_ends, result);
 
     ASSERT_TRUE(result);
 }
@@ -217,7 +217,7 @@ TYPED_TEST(PartitionHelpers, CanCheckConsecutiveRangesWithSingleElement)
     bool result = false;
 
     gko::kernels::EXEC_NAMESPACE::partition_helpers::check_consecutive_ranges(
-        this->exec, start_ends, &result);
+        this->exec, start_ends, result);
 
     ASSERT_TRUE(result);
 }

From 2bb5a12e4a5dc364b10b5f0d665598ca49659827 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Thu, 10 Aug 2023 08:35:41 +0200
Subject: [PATCH 234/583] use custom stream for thrust policy

---
 common/cuda_hip/distributed/partition_helpers_kernels.hpp.inc | 4 ++--
 cuda/distributed/partition_helpers_kernels.cu                 | 3 +++
 hip/distributed/partition_helpers_kernels.hip.cpp             | 3 +++
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/common/cuda_hip/distributed/partition_helpers_kernels.hpp.inc b/common/cuda_hip/distributed/partition_helpers_kernels.hpp.inc
index e3e8335dd22..f92794ec138 100644
--- a/common/cuda_hip/distributed/partition_helpers_kernels.hpp.inc
+++ b/common/cuda_hip/distributed/partition_helpers_kernels.hpp.inc
@@ -46,8 +46,8 @@ void sort_by_range_start(
         range_start_ends.get_data() + 1, strided_indices);
     auto zip_it = thrust::make_zip_iterator(
         thrust::make_tuple(end_it, part_ids.get_data()));
-    thrust::stable_sort_by_key(thrust::device, start_it, start_it + num_ranges,
-                               zip_it);
+    thrust::stable_sort_by_key(thrust_policy(exec), start_it,
+                               start_it + num_ranges, zip_it);
 }
 
 GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
diff --git a/cuda/distributed/partition_helpers_kernels.cu b/cuda/distributed/partition_helpers_kernels.cu
index a70e728f845..62dad1efaf1 100644
--- a/cuda/distributed/partition_helpers_kernels.cu
+++ b/cuda/distributed/partition_helpers_kernels.cu
@@ -39,6 +39,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <thrust/sort.h>
 
 
+#include "cuda/base/thrust.cuh"
+
+
 namespace gko {
 namespace kernels {
 namespace cuda {
diff --git a/hip/distributed/partition_helpers_kernels.hip.cpp b/hip/distributed/partition_helpers_kernels.hip.cpp
index d9ae663f93f..d4769141676 100644
--- a/hip/distributed/partition_helpers_kernels.hip.cpp
+++ b/hip/distributed/partition_helpers_kernels.hip.cpp
@@ -39,6 +39,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <thrust/sort.h>
 
 
+#include "hip/base/thrust.hip.hpp"
+
+
 namespace gko {
 namespace kernels {
 namespace hip {

From 755827546c4fb3e162f4216af704b641f0e74012 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Thu, 17 Aug 2023 10:01:09 +0200
Subject: [PATCH 235/583] correctly define permutation map

---
 dpcpp/distributed/partition_helpers_kernels.dp.cpp | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/dpcpp/distributed/partition_helpers_kernels.dp.cpp b/dpcpp/distributed/partition_helpers_kernels.dp.cpp
index 46f72c8ef58..8b0171cd349 100644
--- a/dpcpp/distributed/partition_helpers_kernels.dp.cpp
+++ b/dpcpp/distributed/partition_helpers_kernels.dp.cpp
@@ -46,7 +46,14 @@ namespace dpcpp {
 namespace partition_helpers {
 
 struct stride {
-    // Some version requires [] while some requires (), so I added both
+#if ONEDPL_VERSION_MAJOR >= 2022 && ONEDPL_VERSION_MINOR >= 1
+    template <typename Index>
+    Index operator()(const Index& i) const
+    {
+        return i * 2;
+    }
+#else
+    // Some older version require [] while some require (), so I added both
     template <typename Index>
     Index operator[](const Index& i) const
     {
@@ -56,8 +63,9 @@ struct stride {
     template <typename Index>
     Index operator()(const Index& i) const
     {
-        return operator[](i);
+        return i * 2;
     }
+#endif
 };
 
 template <typename GlobalIndexType>

From 20a8215ef4444ee80fe701f9d66cc8b13735a265 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Mon, 14 Aug 2023 10:38:52 +0200
Subject: [PATCH 236/583] allow passing matrix filename as benchmark input

---
 benchmark/blas/blas.cpp                       |  2 ++
 benchmark/blas/distributed/multi_vector.cpp   |  2 ++
 .../matrix_generator/matrix_generator.cpp     |  2 ++
 benchmark/solver/distributed/solver.cpp       |  2 ++
 benchmark/solver/solver.cpp                   |  2 ++
 benchmark/utils/general.hpp                   | 19 ++++++++++++++++++-
 6 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/benchmark/blas/blas.cpp b/benchmark/blas/blas.cpp
index 11228ed5818..ce3a98a9a68 100644
--- a/benchmark/blas/blas.cpp
+++ b/benchmark/blas/blas.cpp
@@ -131,6 +131,8 @@ Parameters for a benchmark case are:
     stride_C: stride for C matrix in gemm (optional, default m)
 )";
     std::string format = example_config;
+    // this benchmark doesn't use input matrices
+    matrix_input = false;
     initialize_argument_parsing(&argc, &argv, header, format);
 
     std::string extra_information =
diff --git a/benchmark/blas/distributed/multi_vector.cpp b/benchmark/blas/distributed/multi_vector.cpp
index be326b08b96..bcf77594c4b 100644
--- a/benchmark/blas/distributed/multi_vector.cpp
+++ b/benchmark/blas/distributed/multi_vector.cpp
@@ -61,6 +61,8 @@ Parameters for a benchmark case are:
     stride_y: stride for in/out vector y (optional, default r)
 )";
     std::string format = example_config;
+    // this benchmark doesn't use input matrices
+    matrix_input = false;
     initialize_argument_parsing(&argc, &argv, header, format);
 
     const auto comm = gko::experimental::mpi::communicator(MPI_COMM_WORLD);
diff --git a/benchmark/matrix_generator/matrix_generator.cpp b/benchmark/matrix_generator/matrix_generator.cpp
index 138b5a9c2ce..94883f5f63d 100644
--- a/benchmark/matrix_generator/matrix_generator.cpp
+++ b/benchmark/matrix_generator/matrix_generator.cpp
@@ -127,6 +127,8 @@ int main(int argc, char* argv[])
     std::string header =
         "A utility that generates various types of "
         "matrices.\n";
+    // this benchmark doesn't use input matrices
+    matrix_input = false;
     initialize_argument_parsing(&argc, &argv, header, input_format);
 
     std::clog << gko::version_info::get() << std::endl;
diff --git a/benchmark/solver/distributed/solver.cpp b/benchmark/solver/distributed/solver.cpp
index 2db71c16ca3..4f583bcd8a8 100644
--- a/benchmark/solver/distributed/solver.cpp
+++ b/benchmark/solver/distributed/solver.cpp
@@ -98,6 +98,8 @@ int main(int argc, char* argv[])
   "<local_format>-<non_local_format>", where both "local_format" and
   "non_local_format" can be any of the recognized spmv formats.
 )";
+    // this benchmark needs an additional "optimal" object in the input
+    matrix_input_additional_json = ",\"optimal\":{\"spmv\":\"csr-csr\"}";
     initialize_argument_parsing(&argc, &argv, header, format);
 
     const auto comm = gko::experimental::mpi::communicator(MPI_COMM_WORLD);
diff --git a/benchmark/solver/solver.cpp b/benchmark/solver/solver.cpp
index 9190c99dad0..c5010116dea 100644
--- a/benchmark/solver/solver.cpp
+++ b/benchmark/solver/solver.cpp
@@ -61,6 +61,8 @@ int main(int argc, char* argv[])
     std::string format = example_config + R"(
   "optimal":"spmv" can be one of the recognized spmv formats
 )";
+    // this benchmark needs an additional "optimal" object in the input
+    matrix_input_additional_json = ",\"optimal\":{\"spmv\":\"csr\"}";
     initialize_argument_parsing(&argc, &argv, header, format);
 
     std::stringstream ss_rel_res_goal;
diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp
index 335ed687002..d1b39263468 100644
--- a/benchmark/utils/general.hpp
+++ b/benchmark/utils/general.hpp
@@ -95,7 +95,9 @@ DEFINE_string(double_buffer, "",
 DEFINE_string(
     input, "",
     "If set, the value is used as the input for the benchmark (if set to a "
-    "json string ending with ]) or as input file path (otherwise).");
+    "json string ending with ]), as the \"filename\" of a generated JSON input "
+    "(if the variable points to a MatrixMarket or Ginkgo binary matrix file) "
+    "or as JSON input file path (otherwise).");
 
 DEFINE_bool(detailed, true,
             "If set, performs several runs to obtain more detailed results");
@@ -297,6 +299,12 @@ std::vector<std::string> split(const std::string& s, char delimiter = ',')
 }
 
 
+// allow matrix files as -input value
+bool matrix_input = true;
+// additional JSON to append to the input_str if the input file is a matrix file
+std::string matrix_input_additional_json = "";
+
+
 // returns the stream to be used as input of the application
 std::istream& get_input_stream()
 {
@@ -308,6 +316,15 @@ std::istream& get_input_stream()
         if (input_str.back() == ']') {
             return std::make_unique<std::stringstream>(input_str);
         }
+        if (matrix_input) {
+            auto first_char = std::ifstream{input_str}.peek();
+            // if the input looks like a MatrixMarket or Ginkgo binary file
+            if (first_char == '%' || first_char == 'G') {
+                input_str = "[{\"filename\":\"" + input_str + "\"" +
+                            matrix_input_additional_json + "}]";
+                return std::make_unique<std::stringstream>(input_str);
+            }
+        }
         return std::make_unique<std::ifstream>(input_str);
     }();
     if (stream) {

From 56672f8a651e8defa9ce6ecf5eac778564d64da6 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Wed, 16 Aug 2023 13:33:24 +0200
Subject: [PATCH 237/583] add `-input_matrix` flag

---
 benchmark/blas/blas.cpp                       |  2 -
 benchmark/blas/distributed/multi_vector.cpp   |  2 -
 benchmark/conversions/conversions.cpp         |  4 +-
 .../matrix_generator/matrix_generator.cpp     |  2 -
 .../matrix_statistics/matrix_statistics.cpp   |  4 +-
 benchmark/preconditioner/preconditioner.cpp   |  4 +-
 benchmark/solver/distributed/solver.cpp       |  8 +--
 benchmark/solver/solver.cpp                   |  8 +--
 benchmark/sparse_blas/sparse_blas.cpp         |  4 +-
 benchmark/spmv/distributed/spmv.cpp           |  4 +-
 benchmark/spmv/spmv.cpp                       |  4 +-
 benchmark/utils/general.hpp                   | 44 ++++--------
 benchmark/utils/general_matrix.hpp            | 72 +++++++++++++++++++
 13 files changed, 106 insertions(+), 56 deletions(-)
 create mode 100644 benchmark/utils/general_matrix.hpp

diff --git a/benchmark/blas/blas.cpp b/benchmark/blas/blas.cpp
index ce3a98a9a68..11228ed5818 100644
--- a/benchmark/blas/blas.cpp
+++ b/benchmark/blas/blas.cpp
@@ -131,8 +131,6 @@ Parameters for a benchmark case are:
     stride_C: stride for C matrix in gemm (optional, default m)
 )";
     std::string format = example_config;
-    // this benchmark doesn't use input matrices
-    matrix_input = false;
     initialize_argument_parsing(&argc, &argv, header, format);
 
     std::string extra_information =
diff --git a/benchmark/blas/distributed/multi_vector.cpp b/benchmark/blas/distributed/multi_vector.cpp
index bcf77594c4b..be326b08b96 100644
--- a/benchmark/blas/distributed/multi_vector.cpp
+++ b/benchmark/blas/distributed/multi_vector.cpp
@@ -61,8 +61,6 @@ Parameters for a benchmark case are:
     stride_y: stride for in/out vector y (optional, default r)
 )";
     std::string format = example_config;
-    // this benchmark doesn't use input matrices
-    matrix_input = false;
     initialize_argument_parsing(&argc, &argv, header, format);
 
     const auto comm = gko::experimental::mpi::communicator(MPI_COMM_WORLD);
diff --git a/benchmark/conversions/conversions.cpp b/benchmark/conversions/conversions.cpp
index b249293116b..d9684321e2d 100644
--- a/benchmark/conversions/conversions.cpp
+++ b/benchmark/conversions/conversions.cpp
@@ -44,7 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include "benchmark/utils/formats.hpp"
-#include "benchmark/utils/general.hpp"
+#include "benchmark/utils/general_matrix.hpp"
 #include "benchmark/utils/generator.hpp"
 #include "benchmark/utils/spmv_validation.hpp"
 #include "benchmark/utils/timer.hpp"
@@ -115,7 +115,7 @@ int main(int argc, char* argv[])
     std::string header =
         "A benchmark for measuring performance of Ginkgo's conversions.\n";
     std::string format_str = example_config;
-    initialize_argument_parsing(&argc, &argv, header, format_str);
+    initialize_argument_parsing_matrix(&argc, &argv, header, format_str);
 
     std::string extra_information =
         std::string() + "The formats are " + FLAGS_formats + "\n";
diff --git a/benchmark/matrix_generator/matrix_generator.cpp b/benchmark/matrix_generator/matrix_generator.cpp
index 94883f5f63d..138b5a9c2ce 100644
--- a/benchmark/matrix_generator/matrix_generator.cpp
+++ b/benchmark/matrix_generator/matrix_generator.cpp
@@ -127,8 +127,6 @@ int main(int argc, char* argv[])
     std::string header =
         "A utility that generates various types of "
         "matrices.\n";
-    // this benchmark doesn't use input matrices
-    matrix_input = false;
     initialize_argument_parsing(&argc, &argv, header, input_format);
 
     std::clog << gko::version_info::get() << std::endl;
diff --git a/benchmark/matrix_statistics/matrix_statistics.cpp b/benchmark/matrix_statistics/matrix_statistics.cpp
index 09cae6a7554..fccf4391ad5 100644
--- a/benchmark/matrix_statistics/matrix_statistics.cpp
+++ b/benchmark/matrix_statistics/matrix_statistics.cpp
@@ -38,7 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <iostream>
 
 
-#include "benchmark/utils/general.hpp"
+#include "benchmark/utils/general_matrix.hpp"
 #include "benchmark/utils/generator.hpp"
 #include "benchmark/utils/spmv_validation.hpp"
 #include "benchmark/utils/types.hpp"
@@ -173,7 +173,7 @@ int main(int argc, char* argv[])
         "A utility that collects additional statistical properties of the "
         "matrix.\n";
     std::string format = example_config;
-    initialize_argument_parsing(&argc, &argv, header, format);
+    initialize_argument_parsing_matrix(&argc, &argv, header, format);
 
     std::clog << gko::version_info::get() << std::endl;
 
diff --git a/benchmark/preconditioner/preconditioner.cpp b/benchmark/preconditioner/preconditioner.cpp
index d125b46bb34..e7859e992dc 100644
--- a/benchmark/preconditioner/preconditioner.cpp
+++ b/benchmark/preconditioner/preconditioner.cpp
@@ -41,7 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include "benchmark/utils/formats.hpp"
-#include "benchmark/utils/general.hpp"
+#include "benchmark/utils/general_matrix.hpp"
 #include "benchmark/utils/generator.hpp"
 #include "benchmark/utils/loggers.hpp"
 #include "benchmark/utils/preconditioners.hpp"
@@ -262,7 +262,7 @@ int main(int argc, char* argv[])
     std::string header =
         "A benchmark for measuring preconditioner performance.\n";
     std::string format = example_config;
-    initialize_argument_parsing(&argc, &argv, header, format);
+    initialize_argument_parsing_matrix(&argc, &argv, header, format);
 
     std::string extra_information =
         "Running with preconditioners: " + FLAGS_preconditioners + "\n";
diff --git a/benchmark/solver/distributed/solver.cpp b/benchmark/solver/distributed/solver.cpp
index 4f583bcd8a8..8b285e343ce 100644
--- a/benchmark/solver/distributed/solver.cpp
+++ b/benchmark/solver/distributed/solver.cpp
@@ -40,7 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include "benchmark/solver/solver_common.hpp"
-#include "benchmark/utils/general.hpp"
+#include "benchmark/utils/general_matrix.hpp"
 #include "benchmark/utils/generator.hpp"
 
 
@@ -98,9 +98,9 @@ int main(int argc, char* argv[])
   "<local_format>-<non_local_format>", where both "local_format" and
   "non_local_format" can be any of the recognized spmv formats.
 )";
-    // this benchmark needs an additional "optimal" object in the input
-    matrix_input_additional_json = ",\"optimal\":{\"spmv\":\"csr-csr\"}";
-    initialize_argument_parsing(&argc, &argv, header, format);
+    std::string additional_json = ",\"optimal\":{\"spmv\":\"csr-csr\"}";
+    initialize_argument_parsing_matrix(&argc, &argv, header, format,
+                                       additional_json);
 
     const auto comm = gko::experimental::mpi::communicator(MPI_COMM_WORLD);
     const auto rank = comm.rank();
diff --git a/benchmark/solver/solver.cpp b/benchmark/solver/solver.cpp
index c5010116dea..910bb54d89a 100644
--- a/benchmark/solver/solver.cpp
+++ b/benchmark/solver/solver.cpp
@@ -47,7 +47,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include "benchmark/solver/solver_common.hpp"
-#include "benchmark/utils/general.hpp"
+#include "benchmark/utils/general_matrix.hpp"
 #include "benchmark/utils/generator.hpp"
 
 
@@ -61,9 +61,9 @@ int main(int argc, char* argv[])
     std::string format = example_config + R"(
   "optimal":"spmv" can be one of the recognized spmv formats
 )";
-    // this benchmark needs an additional "optimal" object in the input
-    matrix_input_additional_json = ",\"optimal\":{\"spmv\":\"csr\"}";
-    initialize_argument_parsing(&argc, &argv, header, format);
+    std::string additional_json = ",\"optimal\":{\"spmv\":\"csr\"}";
+    initialize_argument_parsing_matrix(&argc, &argv, header, format,
+                                       additional_json);
 
     std::stringstream ss_rel_res_goal;
     ss_rel_res_goal << std::scientific << FLAGS_rel_res_goal;
diff --git a/benchmark/sparse_blas/sparse_blas.cpp b/benchmark/sparse_blas/sparse_blas.cpp
index d906e9f9e12..8c054709fdf 100644
--- a/benchmark/sparse_blas/sparse_blas.cpp
+++ b/benchmark/sparse_blas/sparse_blas.cpp
@@ -45,7 +45,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include "benchmark/sparse_blas/operations.hpp"
-#include "benchmark/utils/general.hpp"
+#include "benchmark/utils/general_matrix.hpp"
 #include "benchmark/utils/generator.hpp"
 #include "benchmark/utils/spmv_validation.hpp"
 #include "benchmark/utils/types.hpp"
@@ -161,7 +161,7 @@ int main(int argc, char* argv[])
         "A benchmark for measuring performance of Ginkgo's sparse BLAS "
         "operations.\n";
     std::string format = example_config;
-    initialize_argument_parsing(&argc, &argv, header, format);
+    initialize_argument_parsing_matrix(&argc, &argv, header, format);
 
     auto exec = executor_factory.at(FLAGS_executor)(FLAGS_gpu_timer);
 
diff --git a/benchmark/spmv/distributed/spmv.cpp b/benchmark/spmv/distributed/spmv.cpp
index 3c2986846b3..9b7e4ad8c8f 100644
--- a/benchmark/spmv/distributed/spmv.cpp
+++ b/benchmark/spmv/distributed/spmv.cpp
@@ -44,7 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include "benchmark/spmv/spmv_common.hpp"
-#include "benchmark/utils/general.hpp"
+#include "benchmark/utils/general_matrix.hpp"
 #include "benchmark/utils/generator.hpp"
 #include "benchmark/utils/timer.hpp"
 #include "benchmark/utils/types.hpp"
@@ -102,7 +102,7 @@ int main(int argc, char* argv[])
     std::string header =
         "A benchmark for measuring performance of Ginkgo's spmv.\n";
     std::string format = example_config;
-    initialize_argument_parsing(&argc, &argv, header, format);
+    initialize_argument_parsing_matrix(&argc, &argv, header, format);
 
     if (rank == 0) {
         std::string extra_information = "The formats are [" +
diff --git a/benchmark/spmv/spmv.cpp b/benchmark/spmv/spmv.cpp
index df000cecd47..034437907c8 100644
--- a/benchmark/spmv/spmv.cpp
+++ b/benchmark/spmv/spmv.cpp
@@ -39,7 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "benchmark/spmv/spmv_common.hpp"
 #include "benchmark/utils/formats.hpp"
-#include "benchmark/utils/general.hpp"
+#include "benchmark/utils/general_matrix.hpp"
 #include "benchmark/utils/generator.hpp"
 #include "benchmark/utils/spmv_validation.hpp"
 
@@ -64,7 +64,7 @@ int main(int argc, char* argv[])
     std::string header =
         "A benchmark for measuring performance of Ginkgo's spmv.\n";
     std::string format = example_config;
-    initialize_argument_parsing(&argc, &argv, header, format);
+    initialize_argument_parsing_matrix(&argc, &argv, header, format);
 
     std::string extra_information = "The formats are " + FLAGS_formats +
                                     "\nThe number of right hand sides is " +
diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp
index d1b39263468..b7ec0e72cf1 100644
--- a/benchmark/utils/general.hpp
+++ b/benchmark/utils/general.hpp
@@ -95,9 +95,7 @@ DEFINE_string(double_buffer, "",
 DEFINE_string(
     input, "",
     "If set, the value is used as the input for the benchmark (if set to a "
-    "json string ending with ]), as the \"filename\" of a generated JSON input "
-    "(if the variable points to a MatrixMarket or Ginkgo binary matrix file) "
-    "or as JSON input file path (otherwise).");
+    "json string ending with ]) or as input file path (otherwise).");
 
 DEFINE_bool(detailed, true,
             "If set, performs several runs to obtain more detailed results");
@@ -147,6 +145,9 @@ DEFINE_double(
     "is lower than or equal to 1, the timing region is always 1 repetition.");
 
 
+std::unique_ptr<std::istream> input_stream;
+
+
 /**
  * Parses arguments through gflags and initialize a documentation string.
  *
@@ -186,6 +187,14 @@ void initialize_argument_parsing(int* argc, char** argv[], std::string& header,
             FLAGS_profiler_hook = "auto";
         }
     }
+    std::string input_str(FLAGS_input);
+    if (!input_str.empty()) {
+        if (input_str.back() == ']') {
+            input_stream = std::make_unique<std::stringstream>(input_str);
+        } else {
+            input_stream = std::make_unique<std::ifstream>(input_str);
+        }
+    }
 }
 
 /**
@@ -299,36 +308,11 @@ std::vector<std::string> split(const std::string& s, char delimiter = ',')
 }
 
 
-// allow matrix files as -input value
-bool matrix_input = true;
-// additional JSON to append to the input_str if the input file is a matrix file
-std::string matrix_input_additional_json = "";
-
-
 // returns the stream to be used as input of the application
 std::istream& get_input_stream()
 {
-    static auto stream = []() -> std::unique_ptr<std::istream> {
-        std::string input_str(FLAGS_input);
-        if (input_str.empty()) {
-            return nullptr;
-        }
-        if (input_str.back() == ']') {
-            return std::make_unique<std::stringstream>(input_str);
-        }
-        if (matrix_input) {
-            auto first_char = std::ifstream{input_str}.peek();
-            // if the input looks like a MatrixMarket or Ginkgo binary file
-            if (first_char == '%' || first_char == 'G') {
-                input_str = "[{\"filename\":\"" + input_str + "\"" +
-                            matrix_input_additional_json + "}]";
-                return std::make_unique<std::stringstream>(input_str);
-            }
-        }
-        return std::make_unique<std::ifstream>(input_str);
-    }();
-    if (stream) {
-        return *stream;
+    if (input_stream) {
+        return *input_stream;
     }
     return std::cin;
 }
diff --git a/benchmark/utils/general_matrix.hpp b/benchmark/utils/general_matrix.hpp
new file mode 100644
index 00000000000..3791976e3ab
--- /dev/null
+++ b/benchmark/utils/general_matrix.hpp
@@ -0,0 +1,72 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_BENCHMARK_UTILS_GENERAL_MATRIX_HPP_
+#define GKO_BENCHMARK_UTILS_GENERAL_MATRIX_HPP_
+
+
+#include <ginkgo/ginkgo.hpp>
+
+
+#include <gflags/gflags.h>
+
+
+#include "benchmark/utils/general.hpp"
+
+
+DEFINE_string(input_matrix, "",
+              "Filename of a matrix to be used as the single input. Overwrites "
+              "the value of the -input flag");
+
+
+/**
+ * @copydoc initialize_argument_parsing
+ * @param additional_matrix_file_json  text to be appended to the
+ *                                     `{"filename":"..."}` JSON object that
+ *                                     will be used as input for the benchmark
+ *                                     if the `-input_matrix` flag is used.
+ */
+void initialize_argument_parsing_matrix(
+    int* argc, char** argv[], std::string& header, std::string& format,
+    std::string additional_matrix_file_json = "")
+{
+    initialize_argument_parsing(argc, argv, header, format);
+    std::string input_matrix_str{FLAGS_input_matrix};
+    if (!input_matrix_str.empty()) {
+        auto input_json = "[{\"filename\":\"" + input_matrix_str + "\"" +
+                          additional_matrix_file_json + "}]";
+        input_stream = std::make_unique<std::stringstream>(input_json);
+    }
+}
+
+
+#endif  // GKO_BENCHMARK_UTILS_GENERAL_MATRIX_HPP_
\ No newline at end of file

From eb58d6cb6650d15b1e25300fc21fa58ffa131496 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Wed, 16 Aug 2023 13:33:51 +0200
Subject: [PATCH 238/583] test `-input_matrix` flag

---
 benchmark/test/conversion.py                  | 12 ++++
 benchmark/test/matrix_statistics.py           |  7 +++
 benchmark/test/preconditioner.py              |  7 +++
 .../test/reference/conversion.matrix.stderr   | 46 +++++++++++++++
 .../test/reference/conversion.matrix.stdout   | 19 +++++++
 .../distributed_solver.matrix.stderr          | 17 ++++++
 .../distributed_solver.matrix.stdout          | 57 +++++++++++++++++++
 .../reference/matrix_statistics.matrix.stderr |  8 +++
 .../reference/matrix_statistics.matrix.stdout | 38 +++++++++++++
 .../reference/preconditioner.matrix.stderr    | 42 ++++++++++++++
 .../reference/preconditioner.matrix.stdout    | 30 ++++++++++
 benchmark/test/reference/solver.matrix.stderr | 17 ++++++
 benchmark/test/reference/solver.matrix.stdout | 55 ++++++++++++++++++
 .../test/reference/sparse_blas.matrix.stderr  | 36 ++++++++++++
 .../test/reference/sparse_blas.matrix.stdout  | 25 ++++++++
 benchmark/test/reference/spmv.matrix.stderr   | 31 ++++++++++
 benchmark/test/reference/spmv.matrix.stdout   | 20 +++++++
 benchmark/test/solver.py                      |  7 +++
 benchmark/test/solver_distributed.py          |  7 +++
 benchmark/test/sparse_blas.py                 | 12 ++++
 benchmark/test/spmv.py                        |  7 +++
 benchmark/test/test_framework.py.in           | 12 ++--
 22 files changed, 505 insertions(+), 7 deletions(-)
 create mode 100644 benchmark/test/reference/conversion.matrix.stderr
 create mode 100644 benchmark/test/reference/conversion.matrix.stdout
 create mode 100644 benchmark/test/reference/distributed_solver.matrix.stderr
 create mode 100644 benchmark/test/reference/distributed_solver.matrix.stdout
 create mode 100644 benchmark/test/reference/matrix_statistics.matrix.stderr
 create mode 100644 benchmark/test/reference/matrix_statistics.matrix.stdout
 create mode 100644 benchmark/test/reference/preconditioner.matrix.stderr
 create mode 100644 benchmark/test/reference/preconditioner.matrix.stdout
 create mode 100644 benchmark/test/reference/solver.matrix.stderr
 create mode 100644 benchmark/test/reference/solver.matrix.stdout
 create mode 100644 benchmark/test/reference/sparse_blas.matrix.stderr
 create mode 100644 benchmark/test/reference/sparse_blas.matrix.stdout
 create mode 100644 benchmark/test/reference/spmv.matrix.stderr
 create mode 100644 benchmark/test/reference/spmv.matrix.stdout

diff --git a/benchmark/test/conversion.py b/benchmark/test/conversion.py
index cf2e33983af..2eada100731 100755
--- a/benchmark/test/conversion.py
+++ b/benchmark/test/conversion.py
@@ -29,6 +29,18 @@
     expected_stderr="conversion.simple.stderr",
 )
 
+# input matrixfile
+test_framework.compare_output(
+    [
+        "-input_matrix",
+        str(test_framework.matrixpath),
+        "-formats",
+        "coo,csr",
+    ],
+    expected_stdout="conversion.matrix.stdout",
+    expected_stderr="conversion.matrix.stderr",
+)
+
 # check that all conversions work
 test_framework.compare_output(
     [
diff --git a/benchmark/test/matrix_statistics.py b/benchmark/test/matrix_statistics.py
index a29c80a0a7a..6e4d8b1d2f5 100755
--- a/benchmark/test/matrix_statistics.py
+++ b/benchmark/test/matrix_statistics.py
@@ -23,3 +23,10 @@
     expected_stdout="matrix_statistics.simple.stdout",
     expected_stderr="matrix_statistics.simple.stderr",
 )
+
+# input matrix file
+test_framework.compare_output(
+    ["-input_matrix", str(test_framework.matrixpath)],
+    expected_stdout="matrix_statistics.matrix.stdout",
+    expected_stderr="matrix_statistics.matrix.stderr",
+)
diff --git a/benchmark/test/preconditioner.py b/benchmark/test/preconditioner.py
index a5a8dd3f13f..e05e5b780ac 100755
--- a/benchmark/test/preconditioner.py
+++ b/benchmark/test/preconditioner.py
@@ -24,6 +24,13 @@
     expected_stderr="preconditioner.simple.stderr",
 )
 
+# input matrix file
+test_framework.compare_output(
+    ["-input_matrix", str(test_framework.matrixpath)],
+    expected_stdout="preconditioner.matrix.stdout",
+    expected_stderr="preconditioner.matrix.stderr",
+)
+
 # profiler annotations
 test_framework.compare_output(
     [
diff --git a/benchmark/test/reference/conversion.matrix.stderr b/benchmark/test/reference/conversion.matrix.stderr
new file mode 100644
index 00000000000..813e04a2100
--- /dev/null
+++ b/benchmark/test/reference/conversion.matrix.stderr
@@ -0,0 +1,46 @@
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
+Running on reference(0)
+Running with 2 warm iterations and 10 running iterations
+The random seed for right hand sides is 42
+The formats are coo,csr
+Benchmarking conversions. 
+Running test case
+{
+    "filename": "../../matrices/test/ani1.mtx",
+    "conversions": {}
+}
+Matrix is of size (36, 36)
+Current state:
+[
+    {
+        "filename": "../../matrices/test/ani1.mtx",
+        "conversions": {
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        },
+        "size": 36
+    }
+]
+Current state:
+[
+    {
+        "filename": "../../matrices/test/ani1.mtx",
+        "conversions": {
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-coo": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        },
+        "size": 36
+    }
+]
diff --git a/benchmark/test/reference/conversion.matrix.stdout b/benchmark/test/reference/conversion.matrix.stdout
new file mode 100644
index 00000000000..d3f62ec2b56
--- /dev/null
+++ b/benchmark/test/reference/conversion.matrix.stdout
@@ -0,0 +1,19 @@
+
+[
+    {
+        "filename": "../../matrices/test/ani1.mtx",
+        "conversions": {
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "csr-coo": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        },
+        "size": 36
+    }
+]
diff --git a/benchmark/test/reference/distributed_solver.matrix.stderr b/benchmark/test/reference/distributed_solver.matrix.stderr
new file mode 100644
index 00000000000..7eba2fc85dd
--- /dev/null
+++ b/benchmark/test/reference/distributed_solver.matrix.stderr
@@ -0,0 +1,17 @@
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
+Running on reference(0)
+Running with 2 warm iterations and 1 running iterations
+The random seed for right hand sides is 42
+Running cg with 1000 iterations and residual goal of 1.000000e-06
+The number of right hand sides is 1
+Running test case
+{
+    "filename": "../../matrices/test/ani1.mtx",
+    "optimal": {
+        "spmv": "csr-csr"
+    },
+    "solver": {}
+}
+Matrix is of size (36, 36)
+	Running solver: cg
diff --git a/benchmark/test/reference/distributed_solver.matrix.stdout b/benchmark/test/reference/distributed_solver.matrix.stdout
new file mode 100644
index 00000000000..157c40fdccf
--- /dev/null
+++ b/benchmark/test/reference/distributed_solver.matrix.stdout
@@ -0,0 +1,57 @@
+
+[
+    {
+        "filename": "../../matrices/test/ani1.mtx",
+        "optimal": {
+            "spmv": "csr-csr"
+        },
+        "solver": {
+            "cg": {
+                "recurrent_residuals": [],
+                "true_residuals": [],
+                "implicit_residuals": [],
+                "iteration_timestamps": [],
+                "rhs_norm": 1.0,
+                "generate": {
+                    "components": {
+                        "generate(<typename>)": 1.0,
+                        "free": 1.0,
+                        "overhead": 1.0
+                    },
+                    "time": 1.0
+                },
+                "apply": {
+                    "components": {
+                        "apply(<typename>)": 1.0,
+                        "iteration": 1.0,
+                        "allocate": 1.0,
+                        "dense::fill": 1.0,
+                        "cg::initialize": 1.0,
+                        "advanced_apply(<typename>)": 1.0,
+                        "dense::row_gather": 1.0,
+                        "csr::advanced_spmv": 1.0,
+                        "dense::compute_squared_norm2": 1.0,
+                        "dense::compute_sqrt": 1.0,
+                        "copy(<typename>)": 1.0,
+                        "dense::copy": 1.0,
+                        "dense::compute_conj_dot_dispatch": 1.0,
+                        "check(<typename>)": 1.0,
+                        "residual_norm::residual_norm": 1.0,
+                        "cg::step_1": 1.0,
+                        "csr::spmv": 1.0,
+                        "cg::step_2": 1.0,
+                        "free": 1.0,
+                        "overhead": 1.0
+                    },
+                    "iterations": 27,
+                    "time": 1.0
+                },
+                "preconditioner": {},
+                "residual_norm": 1.0,
+                "repetitions": 1,
+                "completed": true
+            }
+        },
+        "size": 36
+    }
+]
diff --git a/benchmark/test/reference/matrix_statistics.matrix.stderr b/benchmark/test/reference/matrix_statistics.matrix.stderr
new file mode 100644
index 00000000000..0f7cc261c47
--- /dev/null
+++ b/benchmark/test/reference/matrix_statistics.matrix.stderr
@@ -0,0 +1,8 @@
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
+Running test case
+{
+    "filename": "../../matrices/test/ani1.mtx",
+    "problem": {}
+}
+Matrix is of size (36, 36)
diff --git a/benchmark/test/reference/matrix_statistics.matrix.stdout b/benchmark/test/reference/matrix_statistics.matrix.stdout
new file mode 100644
index 00000000000..20657f7abd5
--- /dev/null
+++ b/benchmark/test/reference/matrix_statistics.matrix.stdout
@@ -0,0 +1,38 @@
+
+[
+    {
+        "filename": "../../matrices/test/ani1.mtx",
+        "problem": {
+            "rows": 36,
+            "columns": 36,
+            "nonzeros": 208,
+            "row_distribution": {
+                "min": 4,
+                "q1": 4.5,
+                "median": 6.0,
+                "q3": 7.0,
+                "max": 9,
+                "mean": 5.777777777777778,
+                "variance": 2.061728395061728,
+                "skewness": 0.3366362745126052,
+                "kurtosis": 2.0507009932231366,
+                "hyperskewness": 1.9165991338199193,
+                "hyperflatness": 6.0545648993883665
+            },
+            "col_distribution": {
+                "min": 4,
+                "q1": 4.5,
+                "median": 6.0,
+                "q3": 7.0,
+                "max": 9,
+                "mean": 5.777777777777778,
+                "variance": 2.061728395061728,
+                "skewness": 0.3366362745126052,
+                "kurtosis": 2.0507009932231366,
+                "hyperskewness": 1.9165991338199193,
+                "hyperflatness": 6.0545648993883665
+            }
+        },
+        "size": 36
+    }
+]
diff --git a/benchmark/test/reference/preconditioner.matrix.stderr b/benchmark/test/reference/preconditioner.matrix.stderr
new file mode 100644
index 00000000000..d54c99ac971
--- /dev/null
+++ b/benchmark/test/reference/preconditioner.matrix.stderr
@@ -0,0 +1,42 @@
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
+Running on reference(0)
+Running with 2 warm iterations and 10 running iterations
+The random seed for right hand sides is 42
+Running with preconditioners: none
+Running test case
+{
+    "filename": "../../matrices/test/ani1.mtx",
+    "preconditioner": {}
+}
+Matrix is of size (36, 36)
+Current state:
+[
+    {
+        "filename": "../../matrices/test/ani1.mtx",
+        "preconditioner": {
+            "none": {
+                "generate": {
+                    "components": {
+                        "generate(<typename>)": 1.0,
+                        "overhead": 1.0
+                    },
+                    "time": 1.0,
+                    "repetitions": 10
+                },
+                "apply": {
+                    "components": {
+                        "apply(<typename>)": 1.0,
+                        "copy(<typename>)": 1.0,
+                        "dense::copy": 1.0,
+                        "overhead": 1.0
+                    },
+                    "time": 1.0,
+                    "repetitions": 10
+                },
+                "completed": true
+            }
+        },
+        "size": 36
+    }
+]
diff --git a/benchmark/test/reference/preconditioner.matrix.stdout b/benchmark/test/reference/preconditioner.matrix.stdout
new file mode 100644
index 00000000000..22c0c9cc1c1
--- /dev/null
+++ b/benchmark/test/reference/preconditioner.matrix.stdout
@@ -0,0 +1,30 @@
+
+[
+    {
+        "filename": "../../matrices/test/ani1.mtx",
+        "preconditioner": {
+            "none": {
+                "generate": {
+                    "components": {
+                        "generate(<typename>)": 1.0,
+                        "overhead": 1.0
+                    },
+                    "time": 1.0,
+                    "repetitions": 10
+                },
+                "apply": {
+                    "components": {
+                        "apply(<typename>)": 1.0,
+                        "copy(<typename>)": 1.0,
+                        "dense::copy": 1.0,
+                        "overhead": 1.0
+                    },
+                    "time": 1.0,
+                    "repetitions": 10
+                },
+                "completed": true
+            }
+        },
+        "size": 36
+    }
+]
diff --git a/benchmark/test/reference/solver.matrix.stderr b/benchmark/test/reference/solver.matrix.stderr
new file mode 100644
index 00000000000..78b75c301f7
--- /dev/null
+++ b/benchmark/test/reference/solver.matrix.stderr
@@ -0,0 +1,17 @@
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
+Running on reference(0)
+Running with 2 warm iterations and 1 running iterations
+The random seed for right hand sides is 42
+Running cg with 1000 iterations and residual goal of 1.000000e-06
+The number of right hand sides is 1
+Running test case
+{
+    "filename": "../../matrices/test/ani1.mtx",
+    "optimal": {
+        "spmv": "csr"
+    },
+    "solver": {}
+}
+Matrix is of size (36, 36)
+	Running solver: cg
diff --git a/benchmark/test/reference/solver.matrix.stdout b/benchmark/test/reference/solver.matrix.stdout
new file mode 100644
index 00000000000..4a68d8a599b
--- /dev/null
+++ b/benchmark/test/reference/solver.matrix.stdout
@@ -0,0 +1,55 @@
+
+[
+    {
+        "filename": "../../matrices/test/ani1.mtx",
+        "optimal": {
+            "spmv": "csr"
+        },
+        "solver": {
+            "cg": {
+                "recurrent_residuals": [],
+                "true_residuals": [],
+                "implicit_residuals": [],
+                "iteration_timestamps": [],
+                "rhs_norm": 1.0,
+                "generate": {
+                    "components": {
+                        "generate(<typename>)": 1.0,
+                        "free": 1.0,
+                        "overhead": 1.0
+                    },
+                    "time": 1.0
+                },
+                "apply": {
+                    "components": {
+                        "apply(<typename>)": 1.0,
+                        "iteration": 1.0,
+                        "allocate": 1.0,
+                        "dense::fill": 1.0,
+                        "cg::initialize": 1.0,
+                        "advanced_apply(<typename>)": 1.0,
+                        "csr::advanced_spmv": 1.0,
+                        "dense::compute_norm2_dispatch": 1.0,
+                        "copy(<typename>)": 1.0,
+                        "dense::copy": 1.0,
+                        "dense::compute_conj_dot_dispatch": 1.0,
+                        "check(<typename>)": 1.0,
+                        "residual_norm::residual_norm": 1.0,
+                        "cg::step_1": 1.0,
+                        "csr::spmv": 1.0,
+                        "cg::step_2": 1.0,
+                        "free": 1.0,
+                        "overhead": 1.0
+                    },
+                    "iterations": 27,
+                    "time": 1.0
+                },
+                "preconditioner": {},
+                "residual_norm": 1.0,
+                "repetitions": 1,
+                "completed": true
+            }
+        },
+        "size": 36
+    }
+]
diff --git a/benchmark/test/reference/sparse_blas.matrix.stderr b/benchmark/test/reference/sparse_blas.matrix.stderr
new file mode 100644
index 00000000000..404a761aec9
--- /dev/null
+++ b/benchmark/test/reference/sparse_blas.matrix.stderr
@@ -0,0 +1,36 @@
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
+Running on reference(0)
+Running with 2 warm iterations and 10 running iterations
+The random seed for right hand sides is 42
+The operations are transposeRunning test case
+{
+    "filename": "../../matrices/test/ani1.mtx",
+    "sparse_blas": {}
+}
+Matrix is of size (36, 36), 208
+Current state:
+[
+    {
+        "filename": "../../matrices/test/ani1.mtx",
+        "sparse_blas": {
+            "transpose": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 10,
+                "components": {
+                    "allocate": 1.0,
+                    "components::fill_array": 1.0,
+                    "csr::transpose": 1.0,
+                    "free": 1.0,
+                    "overhead": 1.0
+                },
+                "completed": true
+            }
+        },
+        "rows": 36,
+        "cols": 36,
+        "nonzeros": 208
+    }
+]
diff --git a/benchmark/test/reference/sparse_blas.matrix.stdout b/benchmark/test/reference/sparse_blas.matrix.stdout
new file mode 100644
index 00000000000..ae983436081
--- /dev/null
+++ b/benchmark/test/reference/sparse_blas.matrix.stdout
@@ -0,0 +1,25 @@
+
+[
+    {
+        "filename": "../../matrices/test/ani1.mtx",
+        "sparse_blas": {
+            "transpose": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 10,
+                "components": {
+                    "allocate": 1.0,
+                    "components::fill_array": 1.0,
+                    "csr::transpose": 1.0,
+                    "free": 1.0,
+                    "overhead": 1.0
+                },
+                "completed": true
+            }
+        },
+        "rows": 36,
+        "cols": 36,
+        "nonzeros": 208
+    }
+]
diff --git a/benchmark/test/reference/spmv.matrix.stderr b/benchmark/test/reference/spmv.matrix.stderr
new file mode 100644
index 00000000000..1096d64c74e
--- /dev/null
+++ b/benchmark/test/reference/spmv.matrix.stderr
@@ -0,0 +1,31 @@
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
+Running on reference(0)
+Running with 2 warm iterations and 10 running iterations
+The random seed for right hand sides is 42
+The formats are coo
+The number of right hand sides is 1
+Running test case
+{
+    "filename": "../../matrices/test/ani1.mtx",
+    "spmv": {}
+}
+Matrix is of size (36, 36)
+Current state:
+[
+    {
+        "filename": "../../matrices/test/ani1.mtx",
+        "spmv": {
+            "coo": {
+                "storage": 3328,
+                "max_relative_norm2": 1.0,
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        },
+        "size": 36,
+        "nnz": 208,
+        "optimal": {}
+    }
+]
diff --git a/benchmark/test/reference/spmv.matrix.stdout b/benchmark/test/reference/spmv.matrix.stdout
new file mode 100644
index 00000000000..b51f331f918
--- /dev/null
+++ b/benchmark/test/reference/spmv.matrix.stdout
@@ -0,0 +1,20 @@
+
+[
+    {
+        "filename": "../../matrices/test/ani1.mtx",
+        "spmv": {
+            "coo": {
+                "storage": 3328,
+                "max_relative_norm2": 1.0,
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        },
+        "size": 36,
+        "nnz": 208,
+        "optimal": {
+            "spmv": "coo"
+        }
+    }
+]
diff --git a/benchmark/test/solver.py b/benchmark/test/solver.py
index e974f849276..025ee92707c 100755
--- a/benchmark/test/solver.py
+++ b/benchmark/test/solver.py
@@ -24,6 +24,13 @@
     expected_stderr="solver.simple.stderr",
 )
 
+# input matrix file
+test_framework.compare_output(
+    ["-input_matrix", str(test_framework.matrixpath)],
+    expected_stdout="solver.matrix.stdout",
+    expected_stderr="solver.matrix.stderr",
+)
+
 # profiler annotations
 test_framework.compare_output(
     [
diff --git a/benchmark/test/solver_distributed.py b/benchmark/test/solver_distributed.py
index c6623723a43..54bbb030077 100644
--- a/benchmark/test/solver_distributed.py
+++ b/benchmark/test/solver_distributed.py
@@ -27,6 +27,13 @@
     expected_stderr="distributed_solver.simple.stderr",
 )
 
+# input matrix file
+test_framework.compare_output(
+    ["-input_matrix", str(test_framework.matrixpath)],
+    expected_stdout="distributed_solver.matrix.stdout",
+    expected_stderr="distributed_solver.matrix.stderr",
+)
+
 # profiler annotations
 test_framework.compare_output(
     [
diff --git a/benchmark/test/sparse_blas.py b/benchmark/test/sparse_blas.py
index 7b0968a710c..724cdb866f0 100755
--- a/benchmark/test/sparse_blas.py
+++ b/benchmark/test/sparse_blas.py
@@ -29,6 +29,18 @@
     expected_stderr="sparse_blas.simple.stderr",
 )
 
+# input matrix file
+test_framework.compare_output(
+    [
+        "-operations",
+        "transpose",
+        "-input_matrix",
+        str(test_framework.matrixpath),
+    ],
+    expected_stdout="sparse_blas.matrix.stdout",
+    expected_stderr="sparse_blas.matrix.stderr",
+)
+
 # profiler annotations (transpose has the smallest number of allocations)
 test_framework.compare_output(
     [
diff --git a/benchmark/test/spmv.py b/benchmark/test/spmv.py
index 6e2d9f05d49..865f74bb6d0 100755
--- a/benchmark/test/spmv.py
+++ b/benchmark/test/spmv.py
@@ -24,6 +24,13 @@
     expected_stderr="spmv.simple.stderr",
 )
 
+# input matrix file
+test_framework.compare_output(
+    ["-input_matrix", str(test_framework.matrixpath)],
+    expected_stdout="spmv.matrix.stdout",
+    expected_stderr="spmv.matrix.stderr",
+)
+
 # profiler annotations
 test_framework.compare_output(
     [
diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in
index 6037f8c594e..1c762905c77 100644
--- a/benchmark/test/test_framework.py.in
+++ b/benchmark/test/test_framework.py.in
@@ -9,6 +9,7 @@ import sys
 
 sourcepath = pathlib.Path("@CMAKE_CURRENT_SOURCE_DIR@")
 binpath = pathlib.Path("@PROJECT_BINARY_DIR@")
+matrixpath = pathlib.Path("../../matrices/test/ani1.mtx")
 generate = False
 if len(sys.argv) > 2 and sys.argv[2] == "--generate":
     generate = True
@@ -99,8 +100,7 @@ def sanitize_json_in_text(lines: List[str]) -> List[str]:
         for begin, end, do_sanitize in combined_pairs
     ]
     reconstructed = [
-        json.dumps(sanitize_json(json.loads(t)),
-                   indent=4) if do_sanitize else t
+        json.dumps(sanitize_json(json.loads(t)), indent=4) if do_sanitize else t
         for t, do_sanitize in texts
     ]
     return "\n".join(reconstructed).split("\n")
@@ -135,7 +135,7 @@ def determinize_text(
                 break
         if keep:
             output_lines.append(line)
-    if output_lines[-1] != "":
+    if len(output_lines) == 0 or output_lines[-1] != "":
         output_lines.append("")
     try:
         return sanitize_json_in_text(output_lines)
@@ -215,8 +215,7 @@ def compare_output_impl(
         print("FAIL: stdout differs")
         print(
             "\n".join(
-                difflib.unified_diff(
-                    expected_stdout_processed, result_stdout_processed)
+                difflib.unified_diff(expected_stdout_processed, result_stdout_processed)
             )
         )
         failed = True
@@ -224,8 +223,7 @@ def compare_output_impl(
         print("FAIL: stderr differs")
         print(
             "\n".join(
-                difflib.unified_diff(
-                    expected_stderr_processed, result_stderr_processed)
+                difflib.unified_diff(expected_stderr_processed, result_stderr_processed)
             )
         )
         failed = True

From 409bd3fae0a6052132351e7501820ac20f37e85b Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 17 Aug 2023 09:54:59 +0200
Subject: [PATCH 239/583] use absolute path

---
 benchmark/test/reference/conversion.matrix.stderr         | 6 +++---
 benchmark/test/reference/conversion.matrix.stdout         | 2 +-
 benchmark/test/reference/distributed_solver.matrix.stderr | 2 +-
 benchmark/test/reference/distributed_solver.matrix.stdout | 2 +-
 benchmark/test/reference/matrix_statistics.matrix.stderr  | 2 +-
 benchmark/test/reference/matrix_statistics.matrix.stdout  | 2 +-
 benchmark/test/reference/preconditioner.matrix.stderr     | 4 ++--
 benchmark/test/reference/preconditioner.matrix.stdout     | 2 +-
 benchmark/test/reference/solver.matrix.stderr             | 2 +-
 benchmark/test/reference/solver.matrix.stdout             | 2 +-
 benchmark/test/reference/sparse_blas.matrix.stderr        | 4 ++--
 benchmark/test/reference/sparse_blas.matrix.stdout        | 2 +-
 benchmark/test/reference/spmv.matrix.stderr               | 4 ++--
 benchmark/test/reference/spmv.matrix.stdout               | 2 +-
 benchmark/test/test_framework.py.in                       | 4 ++--
 15 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/benchmark/test/reference/conversion.matrix.stderr b/benchmark/test/reference/conversion.matrix.stderr
index 813e04a2100..1d604175479 100644
--- a/benchmark/test/reference/conversion.matrix.stderr
+++ b/benchmark/test/reference/conversion.matrix.stderr
@@ -7,14 +7,14 @@ The formats are coo,csr
 Benchmarking conversions. 
 Running test case
 {
-    "filename": "../../matrices/test/ani1.mtx",
+    "filename": "",
     "conversions": {}
 }
 Matrix is of size (36, 36)
 Current state:
 [
     {
-        "filename": "../../matrices/test/ani1.mtx",
+        "filename": "",
         "conversions": {
             "coo-csr": {
                 "time": 1.0,
@@ -28,7 +28,7 @@ Current state:
 Current state:
 [
     {
-        "filename": "../../matrices/test/ani1.mtx",
+        "filename": "",
         "conversions": {
             "coo-csr": {
                 "time": 1.0,
diff --git a/benchmark/test/reference/conversion.matrix.stdout b/benchmark/test/reference/conversion.matrix.stdout
index d3f62ec2b56..e43edda0595 100644
--- a/benchmark/test/reference/conversion.matrix.stdout
+++ b/benchmark/test/reference/conversion.matrix.stdout
@@ -1,7 +1,7 @@
 
 [
     {
-        "filename": "../../matrices/test/ani1.mtx",
+        "filename": "",
         "conversions": {
             "coo-csr": {
                 "time": 1.0,
diff --git a/benchmark/test/reference/distributed_solver.matrix.stderr b/benchmark/test/reference/distributed_solver.matrix.stderr
index 7eba2fc85dd..4f0c6b22edd 100644
--- a/benchmark/test/reference/distributed_solver.matrix.stderr
+++ b/benchmark/test/reference/distributed_solver.matrix.stderr
@@ -7,7 +7,7 @@ Running cg with 1000 iterations and residual goal of 1.000000e-06
 The number of right hand sides is 1
 Running test case
 {
-    "filename": "../../matrices/test/ani1.mtx",
+    "filename": "",
     "optimal": {
         "spmv": "csr-csr"
     },
diff --git a/benchmark/test/reference/distributed_solver.matrix.stdout b/benchmark/test/reference/distributed_solver.matrix.stdout
index 157c40fdccf..34fdda13e55 100644
--- a/benchmark/test/reference/distributed_solver.matrix.stdout
+++ b/benchmark/test/reference/distributed_solver.matrix.stdout
@@ -1,7 +1,7 @@
 
 [
     {
-        "filename": "../../matrices/test/ani1.mtx",
+        "filename": "",
         "optimal": {
             "spmv": "csr-csr"
         },
diff --git a/benchmark/test/reference/matrix_statistics.matrix.stderr b/benchmark/test/reference/matrix_statistics.matrix.stderr
index 0f7cc261c47..af205c778c0 100644
--- a/benchmark/test/reference/matrix_statistics.matrix.stderr
+++ b/benchmark/test/reference/matrix_statistics.matrix.stderr
@@ -2,7 +2,7 @@ This is Ginkgo 1.7.0 (develop)
     running with core module 1.7.0 (develop)
 Running test case
 {
-    "filename": "../../matrices/test/ani1.mtx",
+    "filename": "",
     "problem": {}
 }
 Matrix is of size (36, 36)
diff --git a/benchmark/test/reference/matrix_statistics.matrix.stdout b/benchmark/test/reference/matrix_statistics.matrix.stdout
index 20657f7abd5..a056241669b 100644
--- a/benchmark/test/reference/matrix_statistics.matrix.stdout
+++ b/benchmark/test/reference/matrix_statistics.matrix.stdout
@@ -1,7 +1,7 @@
 
 [
     {
-        "filename": "../../matrices/test/ani1.mtx",
+        "filename": "",
         "problem": {
             "rows": 36,
             "columns": 36,
diff --git a/benchmark/test/reference/preconditioner.matrix.stderr b/benchmark/test/reference/preconditioner.matrix.stderr
index d54c99ac971..c9ef583d79e 100644
--- a/benchmark/test/reference/preconditioner.matrix.stderr
+++ b/benchmark/test/reference/preconditioner.matrix.stderr
@@ -6,14 +6,14 @@ The random seed for right hand sides is 42
 Running with preconditioners: none
 Running test case
 {
-    "filename": "../../matrices/test/ani1.mtx",
+    "filename": "",
     "preconditioner": {}
 }
 Matrix is of size (36, 36)
 Current state:
 [
     {
-        "filename": "../../matrices/test/ani1.mtx",
+        "filename": "",
         "preconditioner": {
             "none": {
                 "generate": {
diff --git a/benchmark/test/reference/preconditioner.matrix.stdout b/benchmark/test/reference/preconditioner.matrix.stdout
index 22c0c9cc1c1..77979f4c54b 100644
--- a/benchmark/test/reference/preconditioner.matrix.stdout
+++ b/benchmark/test/reference/preconditioner.matrix.stdout
@@ -1,7 +1,7 @@
 
 [
     {
-        "filename": "../../matrices/test/ani1.mtx",
+        "filename": "",
         "preconditioner": {
             "none": {
                 "generate": {
diff --git a/benchmark/test/reference/solver.matrix.stderr b/benchmark/test/reference/solver.matrix.stderr
index 78b75c301f7..8a1ea117314 100644
--- a/benchmark/test/reference/solver.matrix.stderr
+++ b/benchmark/test/reference/solver.matrix.stderr
@@ -7,7 +7,7 @@ Running cg with 1000 iterations and residual goal of 1.000000e-06
 The number of right hand sides is 1
 Running test case
 {
-    "filename": "../../matrices/test/ani1.mtx",
+    "filename": "",
     "optimal": {
         "spmv": "csr"
     },
diff --git a/benchmark/test/reference/solver.matrix.stdout b/benchmark/test/reference/solver.matrix.stdout
index 4a68d8a599b..6a1f8ceb959 100644
--- a/benchmark/test/reference/solver.matrix.stdout
+++ b/benchmark/test/reference/solver.matrix.stdout
@@ -1,7 +1,7 @@
 
 [
     {
-        "filename": "../../matrices/test/ani1.mtx",
+        "filename": "",
         "optimal": {
             "spmv": "csr"
         },
diff --git a/benchmark/test/reference/sparse_blas.matrix.stderr b/benchmark/test/reference/sparse_blas.matrix.stderr
index 404a761aec9..5001c604e72 100644
--- a/benchmark/test/reference/sparse_blas.matrix.stderr
+++ b/benchmark/test/reference/sparse_blas.matrix.stderr
@@ -5,14 +5,14 @@ Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
 The operations are transposeRunning test case
 {
-    "filename": "../../matrices/test/ani1.mtx",
+    "filename": "",
     "sparse_blas": {}
 }
 Matrix is of size (36, 36), 208
 Current state:
 [
     {
-        "filename": "../../matrices/test/ani1.mtx",
+        "filename": "",
         "sparse_blas": {
             "transpose": {
                 "time": 1.0,
diff --git a/benchmark/test/reference/sparse_blas.matrix.stdout b/benchmark/test/reference/sparse_blas.matrix.stdout
index ae983436081..4a64c8ea1ce 100644
--- a/benchmark/test/reference/sparse_blas.matrix.stdout
+++ b/benchmark/test/reference/sparse_blas.matrix.stdout
@@ -1,7 +1,7 @@
 
 [
     {
-        "filename": "../../matrices/test/ani1.mtx",
+        "filename": "",
         "sparse_blas": {
             "transpose": {
                 "time": 1.0,
diff --git a/benchmark/test/reference/spmv.matrix.stderr b/benchmark/test/reference/spmv.matrix.stderr
index 1096d64c74e..8d942cd0de5 100644
--- a/benchmark/test/reference/spmv.matrix.stderr
+++ b/benchmark/test/reference/spmv.matrix.stderr
@@ -7,14 +7,14 @@ The formats are coo
 The number of right hand sides is 1
 Running test case
 {
-    "filename": "../../matrices/test/ani1.mtx",
+    "filename": "",
     "spmv": {}
 }
 Matrix is of size (36, 36)
 Current state:
 [
     {
-        "filename": "../../matrices/test/ani1.mtx",
+        "filename": "",
         "spmv": {
             "coo": {
                 "storage": 3328,
diff --git a/benchmark/test/reference/spmv.matrix.stdout b/benchmark/test/reference/spmv.matrix.stdout
index b51f331f918..47035c27549 100644
--- a/benchmark/test/reference/spmv.matrix.stdout
+++ b/benchmark/test/reference/spmv.matrix.stdout
@@ -1,7 +1,7 @@
 
 [
     {
-        "filename": "../../matrices/test/ani1.mtx",
+        "filename": "",
         "spmv": {
             "coo": {
                 "storage": 3328,
diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in
index 1c762905c77..da1b0bfd618 100644
--- a/benchmark/test/test_framework.py.in
+++ b/benchmark/test/test_framework.py.in
@@ -9,7 +9,7 @@ import sys
 
 sourcepath = pathlib.Path("@CMAKE_CURRENT_SOURCE_DIR@")
 binpath = pathlib.Path("@PROJECT_BINARY_DIR@")
-matrixpath = pathlib.Path("../../matrices/test/ani1.mtx")
+matrixpath = pathlib.Path("@PROJECT_BINARY_DIR@/matrices/test/ani1.mtx")
 generate = False
 if len(sys.argv) > 2 and sys.argv[2] == "--generate":
     generate = True
@@ -22,7 +22,7 @@ denumberify_paths = [
     "rhs_norm",
     "max_relative_norm2",
 ]
-empty_string_paths = ["error"]
+empty_string_paths = ["error", "filename"]
 empty_array_paths = [
     "recurrent_residuals",
     "true_residuals",

From cc9857693e7f340a7b071d35d3b49b67ae5c5bca Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 17 Aug 2023 10:01:34 +0200
Subject: [PATCH 240/583] review updates

* -input and -input_matrix are incompatible
* use R-strings for JSON

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
Co-authored-by: Yuhsiang M. Tsai <yhmtsai@gmail.com>
---
 benchmark/solver/distributed/solver.cpp | 2 +-
 benchmark/solver/solver.cpp             | 2 +-
 benchmark/utils/general_matrix.hpp      | 9 +++++++--
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/benchmark/solver/distributed/solver.cpp b/benchmark/solver/distributed/solver.cpp
index 8b285e343ce..a9b1f9c1c93 100644
--- a/benchmark/solver/distributed/solver.cpp
+++ b/benchmark/solver/distributed/solver.cpp
@@ -98,7 +98,7 @@ int main(int argc, char* argv[])
   "<local_format>-<non_local_format>", where both "local_format" and
   "non_local_format" can be any of the recognized spmv formats.
 )";
-    std::string additional_json = ",\"optimal\":{\"spmv\":\"csr-csr\"}";
+    std::string additional_json = R"(,"optimal":{"spmv":"csr-csr"})";
     initialize_argument_parsing_matrix(&argc, &argv, header, format,
                                        additional_json);
 
diff --git a/benchmark/solver/solver.cpp b/benchmark/solver/solver.cpp
index 910bb54d89a..4efc5558a8e 100644
--- a/benchmark/solver/solver.cpp
+++ b/benchmark/solver/solver.cpp
@@ -61,7 +61,7 @@ int main(int argc, char* argv[])
     std::string format = example_config + R"(
   "optimal":"spmv" can be one of the recognized spmv formats
 )";
-    std::string additional_json = ",\"optimal\":{\"spmv\":\"csr\"}";
+    std::string additional_json = R"(,"optimal":{"spmv":"csr"})";
     initialize_argument_parsing_matrix(&argc, &argv, header, format,
                                        additional_json);
 
diff --git a/benchmark/utils/general_matrix.hpp b/benchmark/utils/general_matrix.hpp
index 3791976e3ab..e499d5d9326 100644
--- a/benchmark/utils/general_matrix.hpp
+++ b/benchmark/utils/general_matrix.hpp
@@ -62,11 +62,16 @@ void initialize_argument_parsing_matrix(
     initialize_argument_parsing(argc, argv, header, format);
     std::string input_matrix_str{FLAGS_input_matrix};
     if (!input_matrix_str.empty()) {
-        auto input_json = "[{\"filename\":\"" + input_matrix_str + "\"" +
+        if (input_stream) {
+            std::cerr
+                << "-input and -input_matrix cannot be used simultaneously\n";
+            std::exit(1);
+        }
+        auto input_json = R"([{"filename":")" + input_matrix_str + "\"" +
                           additional_matrix_file_json + "}]";
         input_stream = std::make_unique<std::stringstream>(input_json);
     }
 }
 
 
-#endif  // GKO_BENCHMARK_UTILS_GENERAL_MATRIX_HPP_
\ No newline at end of file
+#endif  // GKO_BENCHMARK_UTILS_GENERAL_MATRIX_HPP_

From 62de06bb59f3ac1f186d6db4014bc60436fb27bf Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Thu, 17 Aug 2023 11:56:12 +0200
Subject: [PATCH 241/583] escape file path using rapidJSON

---
 benchmark/utils/general_matrix.hpp | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/benchmark/utils/general_matrix.hpp b/benchmark/utils/general_matrix.hpp
index e499d5d9326..2049dadf45f 100644
--- a/benchmark/utils/general_matrix.hpp
+++ b/benchmark/utils/general_matrix.hpp
@@ -67,9 +67,17 @@ void initialize_argument_parsing_matrix(
                 << "-input and -input_matrix cannot be used simultaneously\n";
             std::exit(1);
         }
-        auto input_json = R"([{"filename":")" + input_matrix_str + "\"" +
-                          additional_matrix_file_json + "}]";
-        input_stream = std::make_unique<std::stringstream>(input_json);
+        // create JSON for the filename via RapidJSON to ensure the string is
+        // correctly escaped
+        rapidjson::Document d;
+        auto json_template =
+            R"([{"filename":"")" + additional_matrix_file_json + "}]";
+        d.Parse(json_template.c_str());
+        d[0]["filename"].SetString(input_matrix_str.c_str(), d.GetAllocator());
+        rapidjson::StringBuffer sb;
+        rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(sb);
+        d.Accept(writer);
+        input_stream = std::make_unique<std::stringstream>(sb.GetString());
     }
 }
 

From 7d25218ef72856d8e50611aba585d5403f1b1a13 Mon Sep 17 00:00:00 2001
From: "Yuhsiang M. Tsai" <yhmtsai@gmail.com>
Date: Fri, 18 Aug 2023 14:30:21 +0200
Subject: [PATCH 242/583] fix missing $ in CI_COMMIT_TAG

---
 .gitlab-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 1cd8c0335f8..94dedd030c6 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -25,7 +25,7 @@ include:
   # [1] https://gitlab.com/gitlab-org/gitlab/-/issues/194023#note_1225906002
   - local: '.gitlab/add-interrupt.yml'
     rules:
-      - if: $CI_COMMIT_BRANCH != "master" && $CI_COMMIT_BRANCH != "develop" && CI_COMMIT_TAG !~ /^v\d+\.\d+\.\d+/
+      - if: $CI_COMMIT_BRANCH != "master" && $CI_COMMIT_BRANCH != "develop" && $CI_COMMIT_TAG !~ /^v\d+\.\d+\.\d+/
 
 sync:
   stage: sync

From e9a54182f20008bd715cf16e47f8b2fa4bfd87e6 Mon Sep 17 00:00:00 2001
From: "Yuhsiang M. Tsai" <yhmtsai@gmail.com>
Date: Tue, 22 Aug 2023 23:08:32 +0200
Subject: [PATCH 243/583] fix median case

---
 benchmark/utils/timer_impl.hpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/benchmark/utils/timer_impl.hpp b/benchmark/utils/timer_impl.hpp
index 888cb496248..a6b9d968713 100644
--- a/benchmark/utils/timer_impl.hpp
+++ b/benchmark/utils/timer_impl.hpp
@@ -111,7 +111,8 @@ class Timer {
             return copy.back();
         } else if (method == "median") {
             auto mid = copy.size() / 2;
-            if (copy.size() % 2) {
+            if (copy.size() % 2 == 0) {
+                // contains even elements
                 return (copy.at(mid) + copy.at(mid - 1)) / 2;
             } else {
                 return copy.at(mid);

From 1e1f6dd566adbf6bf2cc1279cf614fcc207a7a49 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 27 Jul 2023 23:45:18 +0200
Subject: [PATCH 244/583] nlohmann_json refactor

---
 CMakeLists.txt                                |    2 +-
 benchmark/CMakeLists.txt                      |    4 +-
 benchmark/blas/blas.cpp                       |   21 +-
 benchmark/blas/blas_common.hpp                |  247 +--
 benchmark/blas/distributed/multi_vector.cpp   |   30 +-
 .../CMakeLists.txt                            |    2 +-
 benchmark/conversion/conversion.cpp           |  194 ++
 benchmark/conversions/conversions.cpp         |  223 --
 .../matrix_generator/matrix_generator.cpp     |   36 +-
 .../matrix_statistics/matrix_statistics.cpp   |  183 +-
 benchmark/preconditioner/preconditioner.cpp   |  238 +--
 benchmark/solver/distributed/solver.cpp       |   32 +-
 benchmark/solver/solver.cpp                   |   21 +-
 benchmark/solver/solver_common.hpp            |  406 ++--
 benchmark/sparse_blas/operations.cpp          |   13 +-
 benchmark/sparse_blas/operations.hpp          |    8 +-
 benchmark/sparse_blas/sparse_blas.cpp         |  191 +-
 benchmark/spmv/distributed/spmv.cpp           |   67 +-
 benchmark/spmv/spmv.cpp                       |   33 +-
 benchmark/spmv/spmv_common.hpp                |  289 ++-
 benchmark/test/reference/blas.profile.stderr  |   69 +-
 benchmark/test/reference/blas.simple.stderr   |   69 +-
 .../test/reference/conversion.all.stderr      | 1862 +----------------
 .../test/reference/conversion.all.stdout      |   74 +-
 .../test/reference/conversion.matrix.stderr   |   42 +-
 .../test/reference/conversion.matrix.stdout   |   16 +-
 .../test/reference/conversion.profile.stderr  |   98 +-
 .../test/reference/conversion.profile.stdout  |   19 +-
 .../test/reference/conversion.simple.stderr   |   42 +-
 .../test/reference/conversion.simple.stdout   |   19 +-
 .../distributed_solver.matrix.stdout          |    3 +-
 .../distributed_solver.profile.stderr         |    8 +-
 .../distributed_solver.profile.stdout         |    6 +-
 .../distributed_solver.simple.stdout          |    6 +-
 .../reference/matrix_statistics.matrix.stderr |    2 +-
 .../reference/matrix_statistics.matrix.stdout |    4 +-
 .../reference/matrix_statistics.simple.stderr |    2 +-
 .../reference/matrix_statistics.simple.stdout |    7 +-
 .../reference/preconditioner.matrix.stderr    |   33 +-
 .../reference/preconditioner.matrix.stdout    |    4 +-
 .../reference/preconditioner.profile.stderr   |   29 +-
 .../reference/preconditioner.profile.stdout   |    7 +-
 .../reference/preconditioner.simple.stderr    |   33 +-
 .../reference/preconditioner.simple.stdout    |    7 +-
 benchmark/test/reference/solver.matrix.stdout |    3 +-
 .../test/reference/solver.profile.stderr      |    8 +-
 .../test/reference/solver.profile.stdout      |    6 +-
 benchmark/test/reference/solver.simple.stdout |    6 +-
 .../test/reference/sparse_blas.matrix.stderr  |   29 +-
 .../test/reference/sparse_blas.profile.stderr |   23 +-
 .../test/reference/sparse_blas.simple.stderr  |   30 +-
 benchmark/test/reference/spmv.matrix.stderr   |   21 +-
 benchmark/test/reference/spmv.matrix.stdout   |    5 +-
 benchmark/test/reference/spmv.profile.stderr  |   32 +-
 benchmark/test/reference/spmv.profile.stdout  |    6 +-
 benchmark/test/reference/spmv.simple.stderr   |   21 +-
 benchmark/test/reference/spmv.simple.stdout   |    6 +-
 benchmark/utils/general.hpp                   |  342 +--
 benchmark/utils/general_matrix.hpp            |   18 +-
 benchmark/utils/generator.hpp                 |  118 +-
 benchmark/utils/iteration_control.hpp         |  326 +++
 benchmark/utils/json.hpp                      |   63 +-
 benchmark/utils/loggers.hpp                   |  100 +-
 benchmark/utils/runner.hpp                    |  209 ++
 benchmark/utils/spmv_validation.hpp           |   83 -
 third_party/CMakeLists.txt                    |    4 +-
 third_party/nlohmann_json/CMakeLists.txt      |    9 +
 third_party/rapidjson/CMakeLists.txt          |   14 -
 68 files changed, 1864 insertions(+), 4319 deletions(-)
 rename benchmark/{conversions => conversion}/CMakeLists.txt (88%)
 create mode 100644 benchmark/conversion/conversion.cpp
 delete mode 100644 benchmark/conversions/conversions.cpp
 create mode 100644 benchmark/utils/iteration_control.hpp
 create mode 100644 benchmark/utils/runner.hpp
 delete mode 100644 benchmark/utils/spmv_validation.hpp
 create mode 100644 third_party/nlohmann_json/CMakeLists.txt
 delete mode 100644 third_party/rapidjson/CMakeLists.txt

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 26bc992c457..fab64e43c76 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -256,7 +256,7 @@ if(GINKGO_BUILD_TESTS)
 endif()
 if(GINKGO_BUILD_BENCHMARKS)
     find_package(gflags 2.2.2 QUIET)
-    find_package(RapidJSON 1.1.0 QUIET)
+    find_package(nlohmann_json 3.9.1 QUIET)
 endif()
 
 # System provided, third party libraries (not bundled!)
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
index 44a0a3d1d9e..e993ee6cf0c 100644
--- a/benchmark/CMakeLists.txt
+++ b/benchmark/CMakeLists.txt
@@ -57,7 +57,7 @@ endfunction()
 # All remaining arguments will be treated as source files
 function(ginkgo_add_single_benchmark_executable name use_lib_linops macro_def type)
     add_executable("${name}" ${ARGN})
-    target_link_libraries("${name}" ginkgo gflags rapidjson)
+    target_link_libraries("${name}" ginkgo gflags nlohmann_json::nlohmann_json)
     # always include the device timer
     if (GINKGO_BUILD_CUDA)
         target_compile_definitions("${name}" PRIVATE HAS_CUDA_TIMER=1)
@@ -149,7 +149,7 @@ if (GINKGO_BUILD_MPI)
 endif()
 
 add_subdirectory(blas)
-add_subdirectory(conversions)
+add_subdirectory(conversion)
 add_subdirectory(matrix_generator)
 add_subdirectory(matrix_statistics)
 add_subdirectory(preconditioner)
diff --git a/benchmark/blas/blas.cpp b/benchmark/blas/blas.cpp
index 11228ed5818..f7ad8120a80 100644
--- a/benchmark/blas/blas.cpp
+++ b/benchmark/blas/blas.cpp
@@ -130,26 +130,17 @@ Parameters for a benchmark case are:
     stride_B: stride for B matrix in gemm (optional, default m)
     stride_C: stride for C matrix in gemm (optional, default m)
 )";
-    std::string format = example_config;
+    std::string format = Generator::get_example_config();
     initialize_argument_parsing(&argc, &argv, header, format);
 
-    std::string extra_information =
-        "The operations are " + FLAGS_operations + "\n";
+    std::string extra_information = "The operations are " + FLAGS_operations;
     print_general_information(extra_information);
     auto exec = executor_factory.at(FLAGS_executor)(FLAGS_gpu_timer);
 
-    rapidjson::IStreamWrapper jcin(get_input_stream());
-    rapidjson::Document test_cases;
-    test_cases.ParseStream(jcin);
-    if (!test_cases.IsArray()) {
-        std::cerr
-            << "Input has to be a JSON array of benchmark configurations:\n"
-            << format;
-        std::exit(1);
-    }
+    auto test_cases = json::parse(get_input_stream());
 
-    run_blas_benchmarks(exec, get_timer(exec, FLAGS_gpu_timer), operation_map,
-                        test_cases, true);
+    run_test_cases(BlasBenchmark{operation_map}, exec,
+                   get_timer(exec, FLAGS_gpu_timer), test_cases);
 
-    std::cout << test_cases << std::endl;
+    std::cout << std::setw(4) << test_cases << std::endl;
 }
diff --git a/benchmark/blas/blas_common.hpp b/benchmark/blas/blas_common.hpp
index fe0110f82fb..88819a043b0 100644
--- a/benchmark/blas/blas_common.hpp
+++ b/benchmark/blas/blas_common.hpp
@@ -43,7 +43,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include "benchmark/utils/general.hpp"
+#include "benchmark/utils/iteration_control.hpp"
 #include "benchmark/utils/loggers.hpp"
+#include "benchmark/utils/runner.hpp"
 #include "benchmark/utils/timer.hpp"
 #include "benchmark/utils/types.hpp"
 #include "core/components/prefix_sum_kernels.hpp"
@@ -70,14 +72,6 @@ DEFINE_string(
     "C has dimensions n x m and x and y have dimensions n x r");
 
 
-std::string example_config = R"(
-  [
-    { "n": 100 },
-    { "n": 200, "m": 200, "k": 200 }
-  ]
-)";
-
-
 class BenchmarkOperation {
 public:
     virtual ~BenchmarkOperation() = default;
@@ -404,70 +398,101 @@ struct dimensions {
 };
 
 
-dimensions parse_dims(rapidjson::Value& test_case)
-{
-    auto get_optional = [](rapidjson::Value& obj, const char* name,
-                           gko::size_type default_value) -> gko::size_type {
-        if (obj.HasMember(name)) {
-            return obj[name].GetUint64();
-        } else {
-            return default_value;
-        }
-    };
-
-    dimensions result;
-    result.n = test_case["n"].GetInt64();
-    result.k = get_optional(test_case, "k", result.n);
-    result.m = get_optional(test_case, "m", result.n);
-    result.r = get_optional(test_case, "r", 1);
-    if (test_case.HasMember("stride")) {
-        result.stride_x = test_case["stride"].GetInt64();
-        result.stride_y = result.stride_x;
-    } else {
-        result.stride_x = get_optional(test_case, "stride_x", result.r);
-        result.stride_y = get_optional(test_case, "stride_y", result.r);
+struct BlasBenchmark : Benchmark<dimensions> {
+    using map_type =
+        std::map<std::string,
+                 std::function<std::unique_ptr<BenchmarkOperation>(
+                     std::shared_ptr<const gko::Executor>, dimensions)>>;
+    map_type operation_map;
+    std::vector<std::string> operations;
+    std::string name;
+    bool do_print;
+
+    BlasBenchmark(map_type operation_map, bool do_print = true)
+        : operation_map{std::move(operation_map)},
+          name{"blas"},
+          operations{split(FLAGS_operations)},
+          do_print{do_print}
+    {}
+
+    const std::string& get_name() const override { return name; }
+
+    const std::vector<std::string>& get_operations() const override
+    {
+        return operations;
     }
-    result.stride_A = get_optional(test_case, "stride_A", result.k);
-    result.stride_B = get_optional(test_case, "stride_B", result.m);
-    result.stride_C = get_optional(test_case, "stride_C", result.m);
-    return result;
-}
 
+    bool should_print() const override { return do_print; }
 
-std::string describe(rapidjson::Value& test_case)
-{
-    std::stringstream ss;
-    auto optional_output = [&](const char* name) {
-        if (test_case.HasMember(name) && test_case[name].IsInt64()) {
-            ss << name << " = " << test_case[name].GetInt64() << " ";
-        }
-    };
-    optional_output("n");
-    optional_output("k");
-    optional_output("m");
-    optional_output("r");
-    optional_output("stride");
-    optional_output("stride_x");
-    optional_output("stride_y");
-    optional_output("stride_A");
-    optional_output("stride_B");
-    optional_output("stride_C");
-    return ss.str();
-}
+    std::string get_example_config() const override
+    {
+        return json::parse(R"([{"n": 100}, {"n": 200, "m": 200, "k": 200}])")
+            .dump(4);
+    }
 
+    bool validate_config(const json& value) const override
+    {
+        return value.contains("n") && value["n"].is_number_integer();
+    }
 
-template <typename OpMap>
-void apply_blas(const char* operation_name, std::shared_ptr<gko::Executor> exec,
-                std::shared_ptr<Timer> timer, const OpMap& operation_map,
-                rapidjson::Value& test_case,
-                rapidjson::MemoryPoolAllocator<>& allocator)
-{
-    try {
-        auto& blas_case = test_case["blas"];
-        add_or_set_member(blas_case, operation_name,
-                          rapidjson::Value(rapidjson::kObjectType), allocator);
+    std::string describe_config(const json& test_case) const override
+    {
+        std::stringstream ss;
+        auto optional_output = [&](const char* name) {
+            if (test_case.contains(name) &&
+                test_case[name].is_number_integer()) {
+                ss << name << " = " << test_case[name].get<gko::int64>() << " ";
+            }
+        };
+        optional_output("n");
+        optional_output("k");
+        optional_output("m");
+        optional_output("r");
+        optional_output("stride");
+        optional_output("stride_x");
+        optional_output("stride_y");
+        optional_output("stride_A");
+        optional_output("stride_B");
+        optional_output("stride_C");
+        return ss.str();
+    }
+
+    dimensions setup(std::shared_ptr<gko::Executor> exec,
+                     json& test_case) const override
+    {
+        auto get_optional = [](json& obj, const char* name,
+                               gko::size_type default_value) -> gko::size_type {
+            if (obj.contains(name)) {
+                return obj[name].get<gko::uint64>();
+            } else {
+                return default_value;
+            }
+        };
+
+        dimensions result;
+        result.n = test_case["n"].get<gko::int64>();
+        result.k = get_optional(test_case, "k", result.n);
+        result.m = get_optional(test_case, "m", result.n);
+        result.r = get_optional(test_case, "r", 1);
+        if (test_case.contains("stride")) {
+            result.stride_x = test_case["stride"].get<gko::int64>();
+            result.stride_y = result.stride_x;
+        } else {
+            result.stride_x = get_optional(test_case, "stride_x", result.r);
+            result.stride_y = get_optional(test_case, "stride_y", result.r);
+        }
+        result.stride_A = get_optional(test_case, "stride_A", result.k);
+        result.stride_B = get_optional(test_case, "stride_B", result.m);
+        result.stride_C = get_optional(test_case, "stride_C", result.m);
+        return result;
+    }
 
-        auto op = operation_map.at(operation_name)(exec, parse_dims(test_case));
+
+    void run(std::shared_ptr<gko::Executor> exec, std::shared_ptr<Timer> timer,
+             dimensions& dims, const std::string& operation_name,
+             json& operation_case) const override
+    {
+        auto op = operation_map.at(operation_name)(exec, dims);
 
         IterationControl ic(timer);
 
@@ -488,89 +513,9 @@ void apply_blas(const char* operation_name, std::shared_ptr<gko::Executor> exec,
         const auto flops = static_cast<double>(op->get_flops());
         const auto mem = static_cast<double>(op->get_memory());
         const auto repetitions = ic.get_num_repetitions();
-        add_or_set_member(blas_case[operation_name], "time", runtime,
-                          allocator);
-        add_or_set_member(blas_case[operation_name], "flops", flops / runtime,
-                          allocator);
-        add_or_set_member(blas_case[operation_name], "bandwidth", mem / runtime,
-                          allocator);
-        add_or_set_member(blas_case[operation_name], "repetitions", repetitions,
-                          allocator);
-
-        // compute and write benchmark data
-        add_or_set_member(blas_case[operation_name], "completed", true,
-                          allocator);
-    } catch (const std::exception& e) {
-        add_or_set_member(test_case["blas"][operation_name], "completed", false,
-                          allocator);
-        if (FLAGS_keep_errors) {
-            rapidjson::Value msg_value;
-            msg_value.SetString(e.what(), allocator);
-            add_or_set_member(test_case["blas"][operation_name], "error",
-                              msg_value, allocator);
-        }
-        std::cerr << "Error when processing test case\n"
-                  << test_case << "\n"
-                  << "what(): " << e.what() << std::endl;
-    }
-}
-
-
-template <typename OpMap>
-void run_blas_benchmarks(std::shared_ptr<gko::Executor> exec,
-                         std::shared_ptr<Timer> timer,
-                         const OpMap& operation_map,
-                         rapidjson::Document& test_cases, bool do_print)
-{
-    auto operations = split(FLAGS_operations, ',');
-    auto& allocator = test_cases.GetAllocator();
-    auto profiler_hook = create_profiler_hook(exec);
-    if (profiler_hook) {
-        exec->add_logger(profiler_hook);
+        operation_case["time"] = runtime;
+        operation_case["flops"] = flops / runtime;
+        operation_case["bandwidth"] = mem / runtime;
+        operation_case["repetitions"] = repetitions;
     }
-    auto annotate = annotate_functor{profiler_hook};
-
-    for (auto& test_case : test_cases.GetArray()) {
-        try {
-            // set up benchmark
-            if (!test_case.HasMember("blas")) {
-                test_case.AddMember("blas",
-                                    rapidjson::Value(rapidjson::kObjectType),
-                                    allocator);
-            }
-            auto& blas_case = test_case["blas"];
-            if (!FLAGS_overwrite &&
-                all_of(begin(operations), end(operations),
-                       [&blas_case](const std::string& s) {
-                           return blas_case.HasMember(s.c_str());
-                       })) {
-                continue;
-            }
-            if (do_print) {
-                std::clog << "Running test case\n" << test_case << std::endl;
-            }
-            // annotate the test case
-            auto test_case_range = annotate(describe(test_case));
-            for (const auto& operation_name : operations) {
-                {
-                    auto operation_range = annotate(operation_name.c_str());
-                    apply_blas(operation_name.c_str(), exec, timer,
-                               operation_map, test_case, allocator);
-                }
-
-                if (do_print) {
-                    std::clog << "Current state:" << std::endl
-                              << test_cases << std::endl;
-
-                    backup_results(test_cases);
-                }
-            }
-        } catch (const std::exception& e) {
-            std::cerr << "Error setting up benchmark, what(): " << e.what()
-                      << std::endl;
-        }
-    }
-    if (profiler_hook) {
-        exec->remove_logger(profiler_hook);
-    }
-}
+};
diff --git a/benchmark/blas/distributed/multi_vector.cpp b/benchmark/blas/distributed/multi_vector.cpp
index be326b08b96..d95e5fb38ac 100644
--- a/benchmark/blas/distributed/multi_vector.cpp
+++ b/benchmark/blas/distributed/multi_vector.cpp
@@ -50,6 +50,10 @@ int main(int argc, char* argv[])
 {
     gko::experimental::mpi::environment mpi_env{argc, argv};
 
+    const auto comm = gko::experimental::mpi::communicator(MPI_COMM_WORLD);
+    const auto rank = comm.rank();
+    const auto do_print = rank == 0;
+
     std::string header = R"("
 A benchmark for measuring performance of Ginkgo's BLAS-like "
 operations.
@@ -60,13 +64,10 @@ Parameters for a benchmark case are:
     stride_x: stride for input vector x (optional, default r)
     stride_y: stride for in/out vector y (optional, default r)
 )";
-    std::string format = example_config;
-    initialize_argument_parsing(&argc, &argv, header, format);
+    std::string format = Generator::get_example_config();
+    initialize_argument_parsing(&argc, &argv, header, format, do_print);
 
-    const auto comm = gko::experimental::mpi::communicator(MPI_COMM_WORLD);
-    const auto rank = comm.rank();
-
-    if (rank == 0) {
+    if (do_print) {
         std::string extra_information =
             "The operations are " + FLAGS_operations;
         print_general_information(extra_information);
@@ -75,14 +76,7 @@ Parameters for a benchmark case are:
     auto exec = executor_factory_mpi.at(FLAGS_executor)(comm.get());
 
     std::string json_input = broadcast_json_input(get_input_stream(), comm);
-    rapidjson::Document test_cases;
-    test_cases.Parse(json_input.c_str());
-    if (!test_cases.IsArray()) {
-        std::cerr
-            << "Input has to be a JSON array of benchmark configurations:\n"
-            << format;
-        std::exit(1);
-    }
+    auto test_cases = json::parse(json_input);
 
     std::map<std::string,
              std::function<std::unique_ptr<BenchmarkOperation>(
@@ -130,10 +124,10 @@ Parameters for a benchmark case are:
                      exec, Generator{comm, {}}, dims.n, dims.r, dims.stride_y);
              }}};
 
-    run_blas_benchmarks(exec, get_mpi_timer(exec, comm, FLAGS_gpu_timer),
-                        operation_map, test_cases, rank == 0);
+    run_test_cases(BlasBenchmark{operation_map, do_print}, exec,
+                   get_mpi_timer(exec, comm, FLAGS_gpu_timer), test_cases);
 
-    if (rank == 0) {
-        std::cout << test_cases << std::endl;
+    if (do_print) {
+        std::cout << std::setw(4) << test_cases << std::endl;
     }
 }
diff --git a/benchmark/conversions/CMakeLists.txt b/benchmark/conversion/CMakeLists.txt
similarity index 88%
rename from benchmark/conversions/CMakeLists.txt
rename to benchmark/conversion/CMakeLists.txt
index 21dd363d3c0..7ecf578c055 100644
--- a/benchmark/conversions/CMakeLists.txt
+++ b/benchmark/conversion/CMakeLists.txt
@@ -1 +1 @@
-ginkgo_add_typed_benchmark_executables(conversion "NO" conversions.cpp)
+ginkgo_add_typed_benchmark_executables(conversion "NO" conversion.cpp)
diff --git a/benchmark/conversion/conversion.cpp b/benchmark/conversion/conversion.cpp
new file mode 100644
index 00000000000..b9a5d5c46d6
--- /dev/null
+++ b/benchmark/conversion/conversion.cpp
@@ -0,0 +1,194 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/ginkgo.hpp>
+
+
+#include <algorithm>
+#include <chrono>
+#include <cstdlib>
+#include <exception>
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <typeinfo>
+
+
+#include "benchmark/utils/formats.hpp"
+#include "benchmark/utils/general_matrix.hpp"
+#include "benchmark/utils/generator.hpp"
+#include "benchmark/utils/iteration_control.hpp"
+#include "benchmark/utils/runner.hpp"
+#include "benchmark/utils/timer.hpp"
+#include "benchmark/utils/types.hpp"
+
+
+#ifdef GINKGO_BENCHMARK_ENABLE_TUNING
+#include "benchmark/utils/tuning_variables.hpp"
+#endif  // GINKGO_BENCHMARK_ENABLE_TUNING
+
+
+using Generator = DefaultSystemGenerator<>;
+
+
+struct ConversionBenchmark : Benchmark<gko::matrix_data<etype, itype>> {
+    std::string name;
+    std::vector<std::string> operations;
+
+    ConversionBenchmark() : name{"conversion"}
+    {
+        auto ref_exec = gko::ReferenceExecutor::create();
+        auto formats = split(FLAGS_formats);
+        for (const auto& from_format : formats) {
+            operations.push_back(from_format + "-read");
+            auto from_mtx =
+                formats::matrix_type_factory.at(from_format)(ref_exec);
+            // all pairs of conversions that are supported by Ginkgo
+            for (const auto& to_format : formats) {
+                if (from_format == to_format) {
+                    continue;
+                }
+                auto to_mtx =
+                    formats::matrix_type_factory.at(to_format)(ref_exec);
+                try {
+                    to_mtx->copy_from(from_mtx);
+                    operations.push_back(from_format + "-" + to_format);
+                } catch (const std::exception& e) {
+                }
+            }
+        }
+    }
+
+    const std::string& get_name() const override { return name; }
+
+    const std::vector<std::string>& get_operations() const override
+    {
+        return operations;
+    }
+
+    bool should_print() const override { return true; }
+
+    std::string get_example_config() const override
+    {
+        return Generator::get_example_config();
+    }
+
+    bool validate_config(const json& test_case) const override
+    {
+        return Generator::validate_config(test_case);
+    }
+
+    std::string describe_config(const json& test_case) const override
+    {
+        return Generator::describe_config(test_case);
+    }
+
+    gko::matrix_data<etype, itype> setup(std::shared_ptr<gko::Executor> exec,
+                                         json& test_case) const override
+    {
+        gko::matrix_data<etype, itype> data;
+        data = Generator::generate_matrix_data(test_case);
+        std::clog << "Matrix is of size (" << data.size[0] << ", "
+                  << data.size[1] << "), " << data.nonzeros.size() << std::endl;
+        test_case["rows"] = data.size[0];
+        test_case["cols"] = data.size[1];
+        test_case["nonzeros"] = data.nonzeros.size();
+        return data;
+    }
+
+
+    void run(std::shared_ptr<gko::Executor> exec, std::shared_ptr<Timer> timer,
+             gko::matrix_data<etype, itype>& data,
+             const std::string& operation_name,
+             json& operation_case) const override
+    {
+        auto split_it =
+            std::find(operation_name.begin(), operation_name.end(), '-');
+        std::string from_name{operation_name.begin(), split_it};
+        std::string to_name{split_it + 1, operation_name.end()};
+        auto mtx_from = formats::matrix_type_factory.at(from_name)(exec);
+        auto readable =
+            gko::as<gko::ReadableFromMatrixData<etype, itype>>(mtx_from.get());
+        IterationControl ic{timer};
+        if (to_name == "read") {
+            // warm run
+            for (auto _ : ic.warmup_run()) {
+                exec->synchronize();
+                readable->read(data);
+                exec->synchronize();
+            }
+            // timed run
+            for (auto _ : ic.run()) {
+                readable->read(data);
+            }
+        } else {
+            readable->read(data);
+            auto mtx_to = formats::matrix_type_factory.at(to_name)(exec);
+
+            // warm run
+            for (auto _ : ic.warmup_run()) {
+                exec->synchronize();
+                mtx_to->copy_from(mtx_from);
+                exec->synchronize();
+            }
+            // timed run
+            for (auto _ : ic.run()) {
+                mtx_to->copy_from(mtx_from);
+            }
+        }
+        operation_case["time"] = ic.compute_time(FLAGS_timer_method);
+        operation_case["repetitions"] = ic.get_num_repetitions();
+    }
+};
+
+
+int main(int argc, char* argv[])
+{
+    std::string header =
+        "A benchmark for measuring performance of Ginkgo's conversions.\n";
+    std::string format_str = Generator::get_example_config();
+    initialize_argument_parsing_matrix(&argc, &argv, header, format_str);
+
+    std::string extra_information =
+        std::string() + "The formats are " + FLAGS_formats;
+    print_general_information(extra_information);
+
+    auto exec = executor_factory.at(FLAGS_executor)(FLAGS_gpu_timer);
+    auto formats = split(FLAGS_formats, ',');
+
+    auto test_cases = json::parse(get_input_stream());
+
+    run_test_cases(ConversionBenchmark{}, exec,
+                   get_timer(exec, FLAGS_gpu_timer), test_cases);
+
+    std::cout << std::setw(4) << test_cases << std::endl;
+}
diff --git a/benchmark/conversions/conversions.cpp b/benchmark/conversions/conversions.cpp
deleted file mode 100644
index d9684321e2d..00000000000
--- a/benchmark/conversions/conversions.cpp
+++ /dev/null
@@ -1,223 +0,0 @@
-/*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2023, the Ginkgo authors
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
-1. Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
-notice, this list of conditions and the following disclaimer in the
-documentation and/or other materials provided with the distribution.
-
-3. Neither the name of the copyright holder nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-******************************<GINKGO LICENSE>*******************************/
-
-#include <ginkgo/ginkgo.hpp>
-
-
-#include <algorithm>
-#include <chrono>
-#include <cstdlib>
-#include <exception>
-#include <fstream>
-#include <iomanip>
-#include <iostream>
-#include <typeinfo>
-
-
-#include "benchmark/utils/formats.hpp"
-#include "benchmark/utils/general_matrix.hpp"
-#include "benchmark/utils/generator.hpp"
-#include "benchmark/utils/spmv_validation.hpp"
-#include "benchmark/utils/timer.hpp"
-#include "benchmark/utils/types.hpp"
-
-
-#ifdef GINKGO_BENCHMARK_ENABLE_TUNING
-#include "benchmark/utils/tuning_variables.hpp"
-#endif  // GINKGO_BENCHMARK_ENABLE_TUNING
-
-
-// This function supposes that management of `FLAGS_overwrite` is done before
-// calling it
-void convert_matrix(const gko::LinOp* matrix_from, const char* format_to,
-                    const char* conversion_name,
-                    std::shared_ptr<gko::Executor> exec,
-                    rapidjson::Value& test_case,
-                    rapidjson::MemoryPoolAllocator<>& allocator)
-{
-    try {
-        auto& conversion_case = test_case["conversions"];
-        add_or_set_member(conversion_case, conversion_name,
-                          rapidjson::Value(rapidjson::kObjectType), allocator);
-
-        gko::matrix_data<etype, itype> data{gko::dim<2>{1, 1}, 1};
-        auto matrix_to = share(formats::matrix_factory(format_to, exec, data));
-
-        auto timer = get_timer(exec, FLAGS_gpu_timer);
-        IterationControl ic{timer};
-
-        // warm run
-        for (auto _ : ic.warmup_run()) {
-            exec->synchronize();
-            matrix_to->copy_from(matrix_from);
-            exec->synchronize();
-            matrix_to->clear();
-        }
-        // timed run
-        for (auto _ : ic.run()) {
-            matrix_to->copy_from(matrix_from);
-        }
-        add_or_set_member(conversion_case[conversion_name], "time",
-                          ic.compute_time(FLAGS_timer_method), allocator);
-        add_or_set_member(conversion_case[conversion_name], "repetitions",
-                          ic.get_num_repetitions(), allocator);
-
-        // compute and write benchmark data
-        add_or_set_member(conversion_case[conversion_name], "completed", true,
-                          allocator);
-    } catch (const std::exception& e) {
-        add_or_set_member(test_case["conversions"][conversion_name],
-                          "completed", false, allocator);
-        if (FLAGS_keep_errors) {
-            rapidjson::Value msg_value;
-            msg_value.SetString(e.what(), allocator);
-            add_or_set_member(test_case["conversions"][conversion_name],
-                              "error", msg_value, allocator);
-        }
-        std::cerr << "Error when processing test case\n"
-                  << test_case << "\n"
-                  << "what(): " << e.what() << std::endl;
-    }
-}
-
-
-int main(int argc, char* argv[])
-{
-    std::string header =
-        "A benchmark for measuring performance of Ginkgo's conversions.\n";
-    std::string format_str = example_config;
-    initialize_argument_parsing_matrix(&argc, &argv, header, format_str);
-
-    std::string extra_information =
-        std::string() + "The formats are " + FLAGS_formats + "\n";
-    print_general_information(extra_information);
-
-    auto exec = executor_factory.at(FLAGS_executor)(FLAGS_gpu_timer);
-    auto formats = split(FLAGS_formats, ',');
-
-    rapidjson::IStreamWrapper jcin(get_input_stream());
-    rapidjson::Document test_cases;
-    test_cases.ParseStream(jcin);
-    if (!test_cases.IsArray()) {
-        print_config_error_and_exit();
-    }
-
-    auto& allocator = test_cases.GetAllocator();
-    auto profiler_hook = create_profiler_hook(exec);
-    if (profiler_hook) {
-        exec->add_logger(profiler_hook);
-    }
-    auto annotate = annotate_functor{profiler_hook};
-
-    DefaultSystemGenerator<> generator{};
-
-    for (auto& test_case : test_cases.GetArray()) {
-        std::clog << "Benchmarking conversions. " << std::endl;
-        // set up benchmark
-        validate_option_object(test_case);
-        if (!test_case.HasMember("conversions")) {
-            test_case.AddMember("conversions",
-                                rapidjson::Value(rapidjson::kObjectType),
-                                allocator);
-        }
-        auto& conversion_case = test_case["conversions"];
-
-        std::clog << "Running test case\n" << test_case << std::endl;
-        gko::matrix_data<etype, itype> data;
-        try {
-            data = generator.generate_matrix_data(test_case);
-        } catch (std::exception& e) {
-            std::cerr << "Error setting up matrix data, what(): " << e.what()
-                      << std::endl;
-            if (FLAGS_keep_errors) {
-                rapidjson::Value msg_value;
-                msg_value.SetString(e.what(), allocator);
-                add_or_set_member(test_case, "error", msg_value, allocator);
-            }
-            continue;
-        }
-        std::clog << "Matrix is of size (" << data.size[0] << ", "
-                  << data.size[1] << ")" << std::endl;
-        add_or_set_member(test_case, "size", data.size[0], allocator);
-        // annotate the test case
-        auto test_case_range = annotate(generator.describe_config(test_case));
-        for (const auto& format_from : formats) {
-            try {
-                auto matrix_from =
-                    share(formats::matrix_factory(format_from, exec, data));
-                for (const auto& format_to : formats) {
-                    if (format_from == format_to) {
-                        continue;
-                    }
-                    auto conversion_name =
-                        std::string(format_from) + "-" + format_to;
-
-                    if (!FLAGS_overwrite &&
-                        conversion_case.HasMember(conversion_name.c_str())) {
-                        continue;
-                    }
-                    {
-                        auto conversion_range =
-                            annotate(conversion_name.c_str());
-                        convert_matrix(matrix_from.get(), format_to.c_str(),
-                                       conversion_name.c_str(), exec, test_case,
-                                       allocator);
-                    }
-                    std::clog << "Current state:" << std::endl
-                              << test_cases << std::endl;
-                }
-                backup_results(test_cases);
-            } catch (const gko::AllocationError& e) {
-                for (const auto& format : formats::matrix_type_factory) {
-                    const auto format_to = std::get<0>(format);
-                    auto conversion_name =
-                        std::string(format_from) + "-" + format_to;
-                    add_or_set_member(
-                        test_case["conversions"][conversion_name.c_str()],
-                        "completed", false, allocator);
-                }
-                std::cerr << "Error when allocating data for type "
-                          << format_from << ". what(): " << e.what()
-                          << std::endl;
-                backup_results(test_cases);
-            } catch (const std::exception& e) {
-                std::cerr << "Error when running benchmark, what(): "
-                          << e.what() << std::endl;
-            }
-        }
-    }
-    if (profiler_hook) {
-        exec->remove_logger(profiler_hook);
-    }
-
-    std::cout << test_cases << std::endl;
-}
diff --git a/benchmark/matrix_generator/matrix_generator.cpp b/benchmark/matrix_generator/matrix_generator.cpp
index 138b5a9c2ce..193d95f897f 100644
--- a/benchmark/matrix_generator/matrix_generator.cpp
+++ b/benchmark/matrix_generator/matrix_generator.cpp
@@ -85,31 +85,33 @@ std::string input_format =
 // clang-format on
 
 
-void validate_option_object(const rapidjson::Value& value)
+void validate_option_object(const json& value)
 {
-    if (!value.IsObject() || !value.HasMember("filename") ||
-        !value["filename"].IsString() || !value.HasMember("problem") ||
-        !value["problem"].IsObject() || !value["problem"].HasMember("type") ||
-        !value["problem"]["type"].IsString()) {
+    if (!value.is_object() || !value.contains("filename") ||
+        !value["filename"].is_string() || !value.contains("problem") ||
+        !value["problem"].is_object() || !value["problem"].contains("type") ||
+        !value["problem"]["type"].is_string()) {
         print_config_error_and_exit(2);
     }
 }
 
 
 using generator_function = std::function<gko::matrix_data<etype, itype>(
-    rapidjson::Value&, std::default_random_engine&)>;
+    json&, std::default_random_engine&)>;
 
 
 // matrix generators
 gko::matrix_data<etype, itype> generate_block_diagonal(
-    rapidjson::Value& config, std::default_random_engine& engine)
+    json& config, std::default_random_engine& engine)
 {
-    if (!config.HasMember("num_blocks") || !config["num_blocks"].IsUint() ||
-        !config.HasMember("block_size") || !config["block_size"].IsUint()) {
+    if (!config.contains("num_blocks") ||
+        !config["num_blocks"].is_number_unsigned() ||
+        !config.contains("block_size") ||
+        !config["block_size"].is_number_unsigned()) {
         print_config_error_and_exit(2);
     }
-    auto num_blocks = config["num_blocks"].GetUint();
-    auto block_size = config["block_size"].GetUint();
+    auto num_blocks = config["num_blocks"].get<gko::uint64>();
+    auto block_size = config["block_size"].get<gko::uint64>();
     auto block = gko::matrix_data<etype, itype>(
         gko::dim<2>(block_size),
         std::uniform_real_distribution<rc_etype>(-1.0, 1.0), engine);
@@ -132,20 +134,18 @@ int main(int argc, char* argv[])
     std::clog << gko::version_info::get() << std::endl;
 
     auto engine = get_engine();
-    rapidjson::IStreamWrapper jcin(get_input_stream());
-    rapidjson::Document configurations;
-    configurations.ParseStream(jcin);
+    auto configurations = json::parse(get_input_stream());
 
-    if (!configurations.IsArray()) {
+    if (!configurations.is_array()) {
         print_config_error_and_exit(1);
     }
 
-    for (auto& config : configurations.GetArray()) {
+    for (auto& config : configurations) {
         try {
             validate_option_object(config);
             std::clog << "Generating matrix: " << config << std::endl;
-            auto filename = config["filename"].GetString();
-            auto type = config["problem"]["type"].GetString();
+            auto filename = config["filename"].get<std::string>();
+            auto type = config["problem"]["type"].get<std::string>();
             auto mdata = generator[type](config["problem"], engine);
             std::ofstream ofs(filename);
             gko::write_raw(ofs, mdata, gko::layout_type::coordinate);
diff --git a/benchmark/matrix_statistics/matrix_statistics.cpp b/benchmark/matrix_statistics/matrix_statistics.cpp
index fccf4391ad5..40c505c7627 100644
--- a/benchmark/matrix_statistics/matrix_statistics.cpp
+++ b/benchmark/matrix_statistics/matrix_statistics.cpp
@@ -38,9 +38,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <iostream>
 
 
+#include <ginkgo/core/base/executor.hpp>
+
+
 #include "benchmark/utils/general_matrix.hpp"
 #include "benchmark/utils/generator.hpp"
-#include "benchmark/utils/spmv_validation.hpp"
+#include "benchmark/utils/runner.hpp"
 #include "benchmark/utils/types.hpp"
 
 
@@ -51,9 +54,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 // See en.wikipedia.org/wiki/Five-number_summary
 // Quartile computation uses Method 3 from en.wikipedia.org/wiki/Quartile
-void compute_summary(const std::vector<gko::size_type>& dist,
-                     rapidjson::Value& out,
-                     rapidjson::MemoryPoolAllocator<>& allocator)
+void compute_summary(const std::vector<gko::size_type>& dist, json& out)
 {
     const auto q = dist.size() / 4;
     const auto r = dist.size() % 4;
@@ -72,23 +73,14 @@ void compute_summary(const std::vector<gko::size_type>& dist,
     };
     // clang-format on
 
-    add_or_set_member(out, "min", dist[0], allocator);
-    add_or_set_member(
-        out, "q1",
-        coefs[r][0] * static_cast<double>(dist[positions[r][0]]) +
-            coefs[r][1] * static_cast<double>(dist[positions[r][1]]),
-        allocator);
-    add_or_set_member(
-        out, "median",
-        coefs[r][2] * static_cast<double>(dist[positions[r][2]]) +
-            coefs[r][3] * static_cast<double>(dist[positions[r][3]]),
-        allocator);
-    add_or_set_member(
-        out, "q3",
-        coefs[r][4] * static_cast<double>(dist[positions[r][4]]) +
-            coefs[r][5] * static_cast<double>(dist[positions[r][5]]),
-        allocator);
-    add_or_set_member(out, "max", dist[dist.size() - 1], allocator);
+    out["min"] = dist.front();
+    out["q1"] = coefs[r][0] * static_cast<double>(dist[positions[r][0]]) +
+                coefs[r][1] * static_cast<double>(dist[positions[r][1]]);
+    out["median"] = coefs[r][2] * static_cast<double>(dist[positions[r][2]]) +
+                    coefs[r][3] * static_cast<double>(dist[positions[r][3]]);
+    out["q3"] = coefs[r][4] * static_cast<double>(dist[positions[r][4]]) +
+                coefs[r][5] * static_cast<double>(dist[positions[r][5]]);
+    out["max"] = dist.back();
 }
 
 
@@ -108,39 +100,30 @@ double compute_moment(int degree, const std::vector<gko::size_type>& dist,
 
 
 // See en.wikipedia.org/wiki/Moment_(mathematics)
-void compute_moments(const std::vector<gko::size_type>& dist,
-                     rapidjson::Value& out,
-                     rapidjson::MemoryPoolAllocator<>& allocator)
+void compute_moments(const std::vector<gko::size_type>& dist, json& out)
 {
     const auto mean = compute_moment(1, dist);
-    add_or_set_member(out, "mean", mean, allocator);
+    out["mean"] = mean;
     const auto variance = compute_moment(2, dist, mean);
-    add_or_set_member(out, "variance", variance, allocator);
+    out["variance"] = variance;
     const auto dev = std::sqrt(variance);
-    add_or_set_member(out, "skewness", compute_moment(3, dist, mean, dev),
-                      allocator);
-    add_or_set_member(out, "kurtosis", compute_moment(4, dist, mean, dev),
-                      allocator);
-    add_or_set_member(out, "hyperskewness", compute_moment(5, dist, mean, dev),
-                      allocator);
-    add_or_set_member(out, "hyperflatness", compute_moment(6, dist, mean, dev),
-                      allocator);
+    out["skewness"] = compute_moment(3, dist, mean, dev);
+    out["kurtosis"] = compute_moment(4, dist, mean, dev);
+    out["hyperskewness"] = compute_moment(5, dist, mean, dev);
+    out["hyperflatness"] = compute_moment(6, dist, mean, dev);
 }
 
 
-template <typename Allocator>
 void compute_distribution_properties(const std::vector<gko::size_type>& dist,
-                                     rapidjson::Value& out,
-                                     Allocator& allocator)
+                                     json& out)
 {
-    compute_summary(dist, out, allocator);
-    compute_moments(dist, out, allocator);
+    compute_summary(dist, out);
+    compute_moments(dist, out);
 }
 
 
-template <typename Allocator>
 void extract_matrix_statistics(gko::matrix_data<etype, gko::int64>& data,
-                               rapidjson::Value& problem, Allocator& allocator)
+                               json& problem)
 {
     std::vector<gko::size_type> row_dist(data.size[0]);
     std::vector<gko::size_type> col_dist(data.size[1]);
@@ -149,72 +132,90 @@ void extract_matrix_statistics(gko::matrix_data<etype, gko::int64>& data,
         ++col_dist[v.column];
     }
 
-    add_or_set_member(problem, "rows", data.size[0], allocator);
-    add_or_set_member(problem, "columns", data.size[1], allocator);
-    add_or_set_member(problem, "nonzeros", data.nonzeros.size(), allocator);
+    problem["rows"] = data.size[0];
+    problem["columns"] = data.size[1];
+    problem["nonzeros"] = data.nonzeros.size();
 
     std::sort(begin(row_dist), end(row_dist));
-    add_or_set_member(problem, "row_distribution",
-                      rapidjson::Value(rapidjson::kObjectType), allocator);
-    compute_distribution_properties(row_dist, problem["row_distribution"],
-                                    allocator);
+    problem["row_distribution"] = json::object();
+    compute_distribution_properties(row_dist, problem["row_distribution"]);
 
     std::sort(begin(col_dist), end(col_dist));
-    add_or_set_member(problem, "col_distribution",
-                      rapidjson::Value(rapidjson::kObjectType), allocator);
-    compute_distribution_properties(col_dist, problem["col_distribution"],
-                                    allocator);
+    problem["col_distribution"] = json::object();
+    compute_distribution_properties(col_dist, problem["col_distribution"]);
 }
 
 
-int main(int argc, char* argv[])
-{
-    std::string header =
-        "A utility that collects additional statistical properties of the "
-        "matrix.\n";
-    std::string format = example_config;
-    initialize_argument_parsing_matrix(&argc, &argv, header, format);
+using Generator = DefaultSystemGenerator<etype, gko::int64>;
 
-    std::clog << gko::version_info::get() << std::endl;
 
-    rapidjson::IStreamWrapper jcin(get_input_stream());
-    rapidjson::Document test_cases;
-    test_cases.ParseStream(jcin);
-    if (!test_cases.IsArray()) {
-        print_config_error_and_exit();
-    }
+struct MatrixStatistics : Benchmark<int> {
+    std::string name;
+    std::vector<std::string> empty;
 
-    auto& allocator = test_cases.GetAllocator();
+    MatrixStatistics() : name{"problem"} {}
 
-    for (auto& test_case : test_cases.GetArray()) {
-        try {
-            // set up benchmark
-            validate_option_object(test_case);
-            if (!test_case.HasMember("problem")) {
-                test_case.AddMember("problem",
-                                    rapidjson::Value(rapidjson::kObjectType),
-                                    allocator);
-            }
-            auto& problem = test_case["problem"];
+    const std::string& get_name() const override { return name; }
 
-            std::clog << "Running test case\n" << test_case << std::endl;
+    const std::vector<std::string>& get_operations() const override
+    {
+        return empty;
+    }
 
-            auto matrix =
-                DefaultSystemGenerator<etype, gko::int64>::generate_matrix_data(
-                    test_case);
+    bool should_print() const override { return true; }
 
-            std::clog << "Matrix is of size (" << matrix.size[0] << ", "
-                      << matrix.size[1] << ")" << std::endl;
-            add_or_set_member(test_case, "size", matrix.size[0], allocator);
+    std::string get_example_config() const override
+    {
+        return Generator::get_example_config();
+    }
 
-            extract_matrix_statistics(matrix, test_case["problem"], allocator);
+    bool validate_config(const json& test_case) const override
+    {
+        return Generator::validate_config(test_case);
+    }
+
+    std::string describe_config(const json& test_case) const override
+    {
+        return Generator::describe_config(test_case);
+    }
 
-            backup_results(test_cases);
-        } catch (const std::exception& e) {
-            std::cerr << "Error extracting statistics, what(): " << e.what()
-                      << std::endl;
-        }
+    int setup(std::shared_ptr<gko::Executor> exec,
+              json& test_case) const override
+    {
+        auto data = Generator::generate_matrix_data(test_case);
+        std::clog << "Matrix is of size (" << data.size[0] << ", "
+                  << data.size[1] << "), " << data.nonzeros.size() << std::endl;
+        test_case["rows"] = data.size[0];
+        test_case["cols"] = data.size[1];
+        test_case["nonzeros"] = data.nonzeros.size();
+
+        extract_matrix_statistics(data, test_case["problem"]);
+        return 0;
     }
 
-    std::cout << test_cases << std::endl;
+
+    void run(std::shared_ptr<gko::Executor> exec, std::shared_ptr<Timer> timer,
+             int& data, const std::string& operation_name,
+             json& operation_case) const override
+    {}
+};
+
+
+int main(int argc, char* argv[])
+{
+    std::string header =
+        "A utility that collects additional statistical properties of the "
+        "matrix.\n";
+    std::string format = Generator::get_example_config();
+    initialize_argument_parsing_matrix(&argc, &argv, header, format);
+
+    std::clog << gko::version_info::get() << std::endl;
+
+    auto test_cases = json::parse(get_input_stream());
+    auto exec = gko::ReferenceExecutor::create();
+
+    run_test_cases(MatrixStatistics{}, exec, get_timer(exec, false),
+                   test_cases);
+
+    std::cout << std::setw(4) << test_cases << std::endl;
 }
diff --git a/benchmark/preconditioner/preconditioner.cpp b/benchmark/preconditioner/preconditioner.cpp
index e7859e992dc..7c130328d34 100644
--- a/benchmark/preconditioner/preconditioner.cpp
+++ b/benchmark/preconditioner/preconditioner.cpp
@@ -43,9 +43,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "benchmark/utils/formats.hpp"
 #include "benchmark/utils/general_matrix.hpp"
 #include "benchmark/utils/generator.hpp"
+#include "benchmark/utils/iteration_control.hpp"
 #include "benchmark/utils/loggers.hpp"
 #include "benchmark/utils/preconditioners.hpp"
-#include "benchmark/utils/spmv_validation.hpp"
+#include "benchmark/utils/runner.hpp"
 #include "benchmark/utils/timer.hpp"
 #include "benchmark/utils/types.hpp"
 
@@ -128,34 +129,85 @@ std::string encode_parameters(const char* precond_name)
 }
 
 
-void run_preconditioner(const char* precond_name,
-                        std::shared_ptr<gko::Executor> exec,
-                        std::shared_ptr<const gko::LinOp> system_matrix,
-                        const vec<etype>* b, const vec<etype>* x,
-                        rapidjson::Value& test_case,
-                        rapidjson::MemoryPoolAllocator<>& allocator)
-{
-    try {
-        auto& precond_object = test_case["preconditioner"];
-        auto encoded_name = encode_parameters(precond_name);
+struct preconditioner_benchmark_state {
+    std::unique_ptr<gko::LinOp> x;
+    std::unique_ptr<gko::LinOp> b;
+    std::shared_ptr<const gko::LinOp> system_matrix;
+};
+
+
+using Generator = DefaultSystemGenerator<>;
+
 
-        if (!FLAGS_overwrite &&
-            precond_object.HasMember(encoded_name.c_str())) {
-            return;
+struct PreconditionerBenchmark : Benchmark<preconditioner_benchmark_state> {
+    std::string name;
+    std::vector<std::string> preconditioners;
+    std::map<std::string, std::string> precond_decoder;
+
+    PreconditionerBenchmark()
+        : name{"preconditioner"}, preconditioners{split(FLAGS_preconditioners)}
+    {
+        for (auto precond : split(FLAGS_preconditioners)) {
+            preconditioners.push_back(encode_parameters(precond.c_str()));
+            precond_decoder[preconditioners.back()] = precond;
         }
+    }
+
+    const std::string& get_name() const override { return name; }
+
+    const std::vector<std::string>& get_operations() const override
+    {
+        return preconditioners;
+    }
+
+    bool should_print() const override { return true; }
+
+    bool validate_config(const json& value) const override
+    {
+        return Generator::validate_config(value);
+    }
 
-        add_or_set_member(precond_object, encoded_name.c_str(),
-                          rapidjson::Value(rapidjson::kObjectType), allocator);
-        auto& this_precond_data = precond_object[encoded_name.c_str()];
+    std::string get_example_config() const override
+    {
+        return Generator::get_example_config();
+    }
+
+    std::string describe_config(const json& test_case) const override
+    {
+        return Generator::describe_config(test_case);
+    }
 
-        add_or_set_member(this_precond_data, "generate",
-                          rapidjson::Value(rapidjson::kObjectType), allocator);
-        add_or_set_member(this_precond_data, "apply",
-                          rapidjson::Value(rapidjson::kObjectType), allocator);
+    preconditioner_benchmark_state setup(std::shared_ptr<gko::Executor> exec,
+                                         json& test_case) const override
+    {
+        preconditioner_benchmark_state state;
+        auto data = Generator::generate_matrix_data(test_case);
+
+        state.system_matrix =
+            formats::matrix_factory(FLAGS_formats, exec, data);
+        state.b = Generator::create_multi_vector_random(exec, data.size[0]);
+        state.x = Generator::create_multi_vector(exec, data.size[0],
+                                                 gko::zero<etype>());
+
+        std::clog << "Matrix is of size (" << data.size[0] << ", "
+                  << data.size[1] << "), " << data.nonzeros.size() << std::endl;
+        test_case["rows"] = data.size[0];
+        test_case["cols"] = data.size[1];
+        test_case["nonzeros"] = data.nonzeros.size();
+        return state;
+    }
+
+
+    void run(std::shared_ptr<gko::Executor> exec, std::shared_ptr<Timer> timer,
+             preconditioner_benchmark_state& state,
+             const std::string& encoded_precond_name,
+             json& precond_case) const override
+    {
+        auto decoded_precond_name = precond_decoder.at(encoded_precond_name);
+        precond_case["generate"] = json::object();
+        precond_case["apply"] = json::object();
         for (auto stage : {"generate", "apply"}) {
-            add_or_set_member(this_precond_data[stage], "components",
-                              rapidjson::Value(rapidjson::kObjectType),
-                              allocator);
+            precond_case[stage]["components"] = json::object();
         }
 
         IterationControl ic_gen{get_timer(exec, FLAGS_gpu_timer)};
@@ -163,54 +215,51 @@ void run_preconditioner(const char* precond_name,
 
         {
             // fast run, gets total time
-            auto x_clone = clone(x);
-
-            auto precond = precond_factory.at(precond_name)(exec);
+            auto x_clone = clone(state.x);
 
+            auto precond = precond_factory.at(decoded_precond_name)(exec);
 
             for (auto _ : ic_apply.warmup_run()) {
-                precond->generate(system_matrix)->apply(b, x_clone);
+                precond->generate(state.system_matrix)->apply(state.b, x_clone);
             }
 
             std::unique_ptr<gko::LinOp> precond_op;
             for (auto _ : ic_gen.run()) {
-                precond_op = precond->generate(system_matrix);
+                precond_op = precond->generate(state.system_matrix);
             }
 
-            add_or_set_member(this_precond_data["generate"], "time",
-                              ic_gen.compute_time(FLAGS_timer_method),
-                              allocator);
-            add_or_set_member(this_precond_data["generate"], "repetitions",
-                              ic_gen.get_num_repetitions(), allocator);
+            precond_case["generate"]["time"] =
+                ic_gen.compute_time(FLAGS_timer_method);
+            precond_case["generate"]["repetitions"] =
+                ic_gen.get_num_repetitions();
 
             for (auto _ : ic_apply.run()) {
-                precond_op->apply(b, x_clone);
+                precond_op->apply(state.b, x_clone);
             }
 
-            add_or_set_member(this_precond_data["apply"], "time",
-                              ic_apply.compute_time(FLAGS_timer_method),
-                              allocator);
-            add_or_set_member(this_precond_data["apply"], "repetitions",
-                              ic_apply.get_num_repetitions(), allocator);
+            precond_case["apply"]["time"] =
+                ic_apply.compute_time(FLAGS_timer_method);
+            precond_case["apply"]["repetitions"] =
+                ic_apply.get_num_repetitions();
         }
 
         if (FLAGS_detailed) {
             // slow run, times each component separately
-            auto x_clone = clone(x);
-            auto precond = precond_factory.at(precond_name)(exec);
+            auto x_clone = clone(state.x);
+            auto precond = precond_factory.at(decoded_precond_name)(exec);
 
             std::unique_ptr<gko::LinOp> precond_op;
             {
                 auto gen_logger = create_operations_logger(
                     FLAGS_gpu_timer, FLAGS_nested_names, exec,
-                    this_precond_data["generate"]["components"], allocator,
+                    precond_case["generate"]["components"],
                     ic_gen.get_num_repetitions());
                 exec->add_logger(gen_logger);
                 if (exec->get_master() != exec) {
                     exec->get_master()->add_logger(gen_logger);
                 }
                 for (auto i = 0u; i < ic_gen.get_num_repetitions(); ++i) {
-                    precond_op = precond->generate(system_matrix);
+                    precond_op = precond->generate(state.system_matrix);
                 }
                 if (exec->get_master() != exec) {
                     exec->get_master()->remove_logger(gen_logger);
@@ -220,39 +269,22 @@ void run_preconditioner(const char* precond_name,
 
             auto apply_logger = create_operations_logger(
                 FLAGS_gpu_timer, FLAGS_nested_names, exec,
-                this_precond_data["apply"]["components"], allocator,
+                precond_case["apply"]["components"],
                 ic_apply.get_num_repetitions());
             exec->add_logger(apply_logger);
             if (exec->get_master() != exec) {
                 exec->get_master()->add_logger(apply_logger);
             }
             for (auto i = 0u; i < ic_apply.get_num_repetitions(); ++i) {
-                precond_op->apply(b, x_clone);
+                precond_op->apply(state.b, x_clone);
             }
             if (exec->get_master() != exec) {
                 exec->get_master()->remove_logger(apply_logger);
             }
             exec->remove_logger(apply_logger);
         }
-
-        add_or_set_member(this_precond_data, "completed", true, allocator);
-    } catch (const std::exception& e) {
-        auto encoded_name = encode_parameters(precond_name);
-        add_or_set_member(test_case["preconditioner"], encoded_name.c_str(),
-                          rapidjson::Value(rapidjson::kObjectType), allocator);
-        add_or_set_member(test_case["preconditioner"][encoded_name.c_str()],
-                          "completed", false, allocator);
-        if (FLAGS_keep_errors) {
-            rapidjson::Value msg_value;
-            msg_value.SetString(e.what(), allocator);
-            add_or_set_member(test_case["preconditioner"][encoded_name.c_str()],
-                              "error", msg_value, allocator);
-        }
-        std::cerr << "Error when processing test case\n"
-                  << test_case << "\n"
-                  << "what(): " << e.what() << std::endl;
     }
-}
+};
 
 
 int main(int argc, char* argv[])
@@ -261,11 +293,11 @@ int main(int argc, char* argv[])
     FLAGS_formats = "csr";
     std::string header =
         "A benchmark for measuring preconditioner performance.\n";
-    std::string format = example_config;
+    std::string format = Generator::get_example_config();
     initialize_argument_parsing_matrix(&argc, &argv, header, format);
 
     std::string extra_information =
-        "Running with preconditioners: " + FLAGS_preconditioners + "\n";
+        "Running with preconditioners: " + FLAGS_preconditioners;
     print_general_information(extra_information);
 
     auto exec = get_executor(FLAGS_gpu_timer);
@@ -279,76 +311,10 @@ int main(int argc, char* argv[])
         std::exit(1);
     }
 
-    rapidjson::IStreamWrapper jcin(get_input_stream());
-    rapidjson::Document test_cases;
-    test_cases.ParseStream(jcin);
-    if (!test_cases.IsArray()) {
-        print_config_error_and_exit();
-    }
+    auto test_cases = json::parse(get_input_stream());
 
-    auto& allocator = test_cases.GetAllocator();
-    auto profiler_hook = create_profiler_hook(exec);
-    if (profiler_hook) {
-        exec->add_logger(profiler_hook);
-    }
-    auto annotate = annotate_functor{profiler_hook};
-    DefaultSystemGenerator<> generator{};
-
-    for (auto& test_case : test_cases.GetArray()) {
-        try {
-            // set up benchmark
-            validate_option_object(test_case);
-            if (!test_case.HasMember("preconditioner")) {
-                test_case.AddMember("preconditioner",
-                                    rapidjson::Value(rapidjson::kObjectType),
-                                    allocator);
-            }
-            auto& precond_object = test_case["preconditioner"];
-            if (!FLAGS_overwrite &&
-                all_of(begin(preconditioners), end(preconditioners),
-                       [&precond_object](const std::string& s) {
-                           return precond_object.HasMember(s.c_str());
-                       })) {
-                continue;
-            }
-            std::clog << "Running test case\n" << test_case << std::endl;
-
-            // annotate the test case
-            auto test_case_range =
-                annotate(generator.describe_config(test_case));
-
-            auto data = generator.generate_matrix_data(test_case);
-
-            auto system_matrix =
-                share(formats::matrix_factory(FLAGS_formats, exec, data));
-            auto b = generator.create_multi_vector_random(
-                exec, system_matrix->get_size()[0]);
-            auto x = generator.create_multi_vector(
-                exec, system_matrix->get_size()[0], gko::zero<etype>());
-
-            std::clog << "Matrix is of size (" << system_matrix->get_size()[0]
-                      << ", " << system_matrix->get_size()[1] << ")"
-                      << std::endl;
-            add_or_set_member(test_case, "size", data.size[0], allocator);
-            for (const auto& precond_name : preconditioners) {
-                {
-                    auto precond_range = annotate(precond_name.c_str());
-                    run_preconditioner(precond_name.c_str(), exec,
-                                       system_matrix, b.get(), x.get(),
-                                       test_case, allocator);
-                }
-                std::clog << "Current state:" << std::endl
-                          << test_cases << std::endl;
-                backup_results(test_cases);
-            }
-        } catch (const std::exception& e) {
-            std::cerr << "Error setting up preconditioner, what(): " << e.what()
-                      << std::endl;
-        }
-    }
-    if (profiler_hook) {
-        exec->remove_logger(profiler_hook);
-    }
+    run_test_cases(PreconditionerBenchmark{}, exec,
+                   get_timer(exec, FLAGS_gpu_timer), test_cases);
 
-    std::cout << test_cases << std::endl;
+    std::cout << std::setw(4) << test_cases << std::endl;
 }
diff --git a/benchmark/solver/distributed/solver.cpp b/benchmark/solver/distributed/solver.cpp
index a9b1f9c1c93..d691309ab6a 100644
--- a/benchmark/solver/distributed/solver.cpp
+++ b/benchmark/solver/distributed/solver.cpp
@@ -52,7 +52,7 @@ struct Generator : public DistributedDefaultSystemGenerator<SolverGenerator> {
 
     std::unique_ptr<Vec> generate_rhs(std::shared_ptr<const gko::Executor> exec,
                                       const gko::LinOp* system_matrix,
-                                      rapidjson::Value& config) const
+                                      json& config) const
     {
         return Vec::create(
             exec, comm, gko::dim<2>{system_matrix->get_size()[0], FLAGS_nrhs},
@@ -82,9 +82,13 @@ int main(int argc, char* argv[])
     FLAGS_repetitions = "1";
     FLAGS_min_repetitions = 1;
 
+    const auto comm = gko::experimental::mpi::communicator(MPI_COMM_WORLD);
+    const auto rank = comm.rank();
+    const auto do_print = rank == 0;
+
     std::string header =
         "A benchmark for measuring Ginkgo's distributed solvers\n";
-    std::string format = example_config + R"(
+    std::string format = solver_example_config + R"(
   The matrix will either be read from an input file if the filename parameter
   is given, or generated as a stencil matrix.
   If the filename parameter is given, all processes will read the file and
@@ -100,10 +104,7 @@ int main(int argc, char* argv[])
 )";
     std::string additional_json = R"(,"optimal":{"spmv":"csr-csr"})";
     initialize_argument_parsing_matrix(&argc, &argv, header, format,
-                                       additional_json);
-
-    const auto comm = gko::experimental::mpi::communicator(MPI_COMM_WORLD);
-    const auto rank = comm.rank();
+                                       additional_json, do_print);
 
     auto exec = executor_factory_mpi.at(FLAGS_executor)(comm.get());
 
@@ -114,8 +115,8 @@ int main(int argc, char* argv[])
         "Running " + FLAGS_solvers + " with " +
         std::to_string(FLAGS_max_iters) + " iterations and residual goal of " +
         ss_rel_res_goal.str() + "\nThe number of right hand sides is " +
-        std::to_string(FLAGS_nrhs) + "\n";
-    if (rank == 0) {
+        std::to_string(FLAGS_nrhs);
+    if (do_print) {
         print_general_information(extra_information);
     }
 
@@ -136,17 +137,12 @@ int main(int argc, char* argv[])
   "optimal": {"spmv": "csr-csr"}]
 )"
                        : broadcast_json_input(get_input_stream(), comm);
-    rapidjson::Document test_cases;
-    test_cases.Parse(json_input.c_str());
-
-    if (!test_cases.IsArray()) {
-        print_config_error_and_exit();
-    }
+    auto test_cases = json::parse(json_input);
 
-    run_solver_benchmarks(exec, get_mpi_timer(exec, comm, FLAGS_gpu_timer),
-                          test_cases, Generator(comm), rank == 0);
+    run_test_cases(SolverBenchmark<Generator>{Generator{comm}}, exec,
+                   get_mpi_timer(exec, comm, FLAGS_gpu_timer), test_cases);
 
-    if (rank == 0) {
-        std::cout << test_cases << std::endl;
+    if (do_print) {
+        std::cout << std::setw(4) << test_cases << std::endl;
     }
 }
diff --git a/benchmark/solver/solver.cpp b/benchmark/solver/solver.cpp
index 4efc5558a8e..b656102e5df 100644
--- a/benchmark/solver/solver.cpp
+++ b/benchmark/solver/solver.cpp
@@ -58,7 +58,7 @@ int main(int argc, char* argv[])
     FLAGS_min_repetitions = 1;
     std::string header =
         "A benchmark for measuring performance of Ginkgo's solvers.\n";
-    std::string format = example_config + R"(
+    std::string format = solver_example_config + R"(
   "optimal":"spmv" can be one of the recognized spmv formats
 )";
     std::string additional_json = R"(,"optimal":{"spmv":"csr"})";
@@ -72,29 +72,24 @@ int main(int argc, char* argv[])
         "Running " + FLAGS_solvers + " with " +
         std::to_string(FLAGS_max_iters) + " iterations and residual goal of " +
         ss_rel_res_goal.str() + "\nThe number of right hand sides is " +
-        std::to_string(FLAGS_nrhs) + "\n";
+        std::to_string(FLAGS_nrhs);
     print_general_information(extra_information);
 
     auto exec = get_executor(FLAGS_gpu_timer);
 
-    rapidjson::Document test_cases;
+    json test_cases;
     if (!FLAGS_overhead) {
-        rapidjson::IStreamWrapper jcin(get_input_stream());
-        test_cases.ParseStream(jcin);
+        test_cases = json::parse(get_input_stream());
     } else {
         // Fake test case to run once
         auto overhead_json = std::string() +
                              " [{\"filename\": \"overhead.mtx\", \"optimal\": "
                              "{ \"spmv\": \"csr\"}}]";
-        test_cases.Parse(overhead_json.c_str());
+        test_cases = json::parse(overhead_json);
     }
 
-    if (!test_cases.IsArray()) {
-        print_config_error_and_exit();
-    }
-
-    run_solver_benchmarks(exec, get_timer(exec, FLAGS_gpu_timer), test_cases,
-                          SolverGenerator{}, true);
+    run_test_cases(SolverBenchmark<SolverGenerator>{SolverGenerator{}}, exec,
+                   get_timer(exec, FLAGS_gpu_timer), test_cases);
 
-    std::cout << test_cases << std::endl;
+    std::cout << std::setw(4) << test_cases << std::endl;
 }
diff --git a/benchmark/solver/solver_common.hpp b/benchmark/solver/solver_common.hpp
index ae9ae6dc1fb..4976e5759d4 100644
--- a/benchmark/solver/solver_common.hpp
+++ b/benchmark/solver/solver_common.hpp
@@ -37,8 +37,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "benchmark/utils/formats.hpp"
 #include "benchmark/utils/general.hpp"
 #include "benchmark/utils/generator.hpp"
+#include "benchmark/utils/iteration_control.hpp"
 #include "benchmark/utils/loggers.hpp"
 #include "benchmark/utils/preconditioners.hpp"
+#include "benchmark/utils/runner.hpp"
 
 
 #ifdef GINKGO_BENCHMARK_ENABLE_TUNING
@@ -107,7 +109,7 @@ DEFINE_bool(overhead, false,
             "If set, uses dummy data to benchmark Ginkgo overhead");
 
 
-std::string example_config = R"(
+std::string solver_example_config = R"(
   [
     {"filename": "my_file.mtx", "optimal": {"spmv": "ell-csr"},
      "rhs": "my_file_rhs.mtx"},
@@ -119,28 +121,6 @@ std::string example_config = R"(
 )";
 
 
-// input validation
-[[noreturn]] void print_config_error_and_exit()
-{
-    std::cerr << "Input has to be a JSON array of solver configurations:\n"
-              << example_config << std::endl;
-    std::exit(1);
-}
-
-
-void validate_option_object(const rapidjson::Value& value)
-{
-    if (!value.IsObject() ||
-        !((value.HasMember("size") && value.HasMember("stencil") &&
-           value["size"].IsInt64() && value["stencil"].IsString()) ||
-          (value.HasMember("filename") && value["filename"].IsString())) ||
-        (!value.HasMember("optimal") && !value["optimal"].HasMember("spmv") &&
-         !value["optimal"]["spmv"].IsString())) {
-        print_config_error_and_exit();
-    }
-}
-
-
 std::shared_ptr<const gko::stop::CriterionFactory> create_criterion(
     std::shared_ptr<const gko::Executor> exec, std::uint32_t max_iters)
 {
@@ -284,21 +264,17 @@ std::unique_ptr<gko::LinOpFactory> generate_solver(
 }
 
 
-void write_precond_info(const gko::LinOp* precond,
-                        rapidjson::Value& precond_info,
-                        rapidjson::MemoryPoolAllocator<>& allocator)
+void write_precond_info(const gko::LinOp* precond, json& precond_info)
 {
     if (const auto jacobi =
             dynamic_cast<const gko::preconditioner::Jacobi<etype>*>(precond)) {
         // extract block sizes
         const auto bdata =
             jacobi->get_parameters().block_pointers.get_const_data();
-        add_or_set_member(precond_info, "block_sizes",
-                          rapidjson::Value(rapidjson::kArrayType), allocator);
+        precond_info["block_sizes"] = json::array();
         const auto nblocks = jacobi->get_num_blocks();
         for (auto i = decltype(nblocks){0}; i < nblocks; ++i) {
-            precond_info["block_sizes"].PushBack(bdata[i + 1] - bdata[i],
-                                                 allocator);
+            precond_info["block_sizes"].push_back(bdata[i + 1] - bdata[i]);
         }
 
         // extract block precisions
@@ -306,24 +282,19 @@ void write_precond_info(const gko::LinOp* precond,
             jacobi->get_parameters()
                 .storage_optimization.block_wise.get_const_data();
         if (pdata) {
-            add_or_set_member(precond_info, "block_precisions",
-                              rapidjson::Value(rapidjson::kArrayType),
-                              allocator);
+            precond_info["block_precisions"] = json::array();
             for (auto i = decltype(nblocks){0}; i < nblocks; ++i) {
-                precond_info["block_precisions"].PushBack(
-                    static_cast<int>(pdata[i]), allocator);
+                precond_info["block_precisions"].push_back(
+                    static_cast<int>(pdata[i]));
             }
         }
 
         // extract condition numbers
         const auto cdata = jacobi->get_conditioning();
         if (cdata) {
-            add_or_set_member(precond_info, "block_conditioning",
-                              rapidjson::Value(rapidjson::kArrayType),
-                              allocator);
+            precond_info["block_conditioning"] = json::array();
             for (auto i = decltype(nblocks){0}; i < nblocks; ++i) {
-                precond_info["block_conditioning"].PushBack(cdata[i],
-                                                            allocator);
+                precond_info["block_conditioning"].push_back(cdata[i]);
             }
         }
     }
@@ -335,10 +306,10 @@ struct SolverGenerator : DefaultSystemGenerator<> {
 
     std::unique_ptr<Vec> generate_rhs(std::shared_ptr<const gko::Executor> exec,
                                       const gko::LinOp* system_matrix,
-                                      rapidjson::Value& config) const
+                                      json& config) const
     {
-        if (config.HasMember("rhs")) {
-            std::ifstream rhs_fd{config["rhs"].GetString()};
+        if (config.contains("rhs")) {
+            std::ifstream rhs_fd{config["rhs"].get<std::string>()};
             return gko::read<Vec>(rhs_fd, std::move(exec));
         } else {
             gko::dim<2> vec_size{system_matrix->get_size()[0], FLAGS_nrhs};
@@ -399,45 +370,112 @@ struct SolverGenerator : DefaultSystemGenerator<> {
 };
 
 
-template <typename VectorType>
-void solve_system(const std::string& solver_name,
-                  const std::string& precond_name,
-                  const char* precond_solver_name,
-                  std::shared_ptr<gko::Executor> exec,
-                  std::shared_ptr<Timer> timer,
-                  std::shared_ptr<const gko::LinOp> system_matrix,
-                  const VectorType* b, const VectorType* x,
-                  rapidjson::Value& test_case,
-                  rapidjson::MemoryPoolAllocator<>& allocator)
-{
-    try {
-        auto& solver_case = test_case["solver"];
-        if (!FLAGS_overwrite && solver_case.HasMember(precond_solver_name)) {
-            return;
+template <typename Generator>
+struct solver_benchmark_state {
+    using Vec = typename Generator::Vec;
+    std::shared_ptr<gko::LinOp> system_matrix;
+    std::unique_ptr<Vec> b;
+    std::unique_ptr<Vec> x;
+};
+
+
+template <typename Generator>
+struct SolverBenchmark : Benchmark<solver_benchmark_state<Generator>> {
+    std::string name;
+    std::vector<std::string> precond_solvers;
+    std::map<std::string, std::pair<std::string, std::string>> decoder;
+    Generator generator;
+
+    SolverBenchmark(Generator generator) : name{"solver"}, generator{generator}
+    {
+        auto solvers = split(FLAGS_solvers, ',');
+        auto preconds = split(FLAGS_preconditioners, ',');
+        for (const auto& s : solvers) {
+            for (const auto& p : preconds) {
+                precond_solvers.push_back(s + (p == "none" ? "" : "-" + p));
+                decoder[precond_solvers.back()] = {s, p};
+            }
+        }
+    }
+
+    const std::string& get_name() const override { return name; }
+
+    const std::vector<std::string>& get_operations() const override
+    {
+        return precond_solvers;
+    }
+
+    bool should_print() const override { return true; }
+
+    std::string get_example_config() const override
+    {
+        return solver_example_config;
+    }
+
+    bool validate_config(const json& value) const override
+    {
+        return ((value.contains("size") && value.contains("stencil") &&
+                 value["size"].is_number_integer() &&
+                 value["stencil"].is_string()) ||
+                (value.contains("filename") &&
+                 value["filename"].is_string())) &&
+               (value.contains("optimal") &&
+                value["optimal"].contains("spmv") &&
+                value["optimal"]["spmv"].is_string());
+    }
+
+    std::string describe_config(const json& test_case) const override
+    {
+        return Generator::describe_config(test_case);
+    }
+
+    solver_benchmark_state<Generator> setup(std::shared_ptr<gko::Executor> exec,
+                                            json& test_case) const override
+    {
+        solver_benchmark_state<Generator> state;
+
+        if (FLAGS_overhead) {
+            state.system_matrix = generator.initialize({1.0}, exec);
+            state.b = generator.initialize(
+                {std::numeric_limits<rc_etype>::quiet_NaN()}, exec);
+            state.x = generator.initialize({0.0}, exec);
+        } else {
+            state.system_matrix =
+                generator.generate_matrix_with_optimal_format(exec, test_case);
+            state.b = generator.generate_rhs(exec, state.system_matrix.get(),
+                                             test_case);
+            state.x = generator.generate_initial_guess(
+                exec, state.system_matrix.get(), state.b.get());
         }
 
-        add_or_set_member(solver_case, precond_solver_name,
-                          rapidjson::Value(rapidjson::kObjectType), allocator);
-        auto& solver_json = solver_case[precond_solver_name];
-        add_or_set_member(solver_json, "recurrent_residuals",
-                          rapidjson::Value(rapidjson::kArrayType), allocator);
-        add_or_set_member(solver_json, "true_residuals",
-                          rapidjson::Value(rapidjson::kArrayType), allocator);
-        add_or_set_member(solver_json, "implicit_residuals",
-                          rapidjson::Value(rapidjson::kArrayType), allocator);
-        add_or_set_member(solver_json, "iteration_timestamps",
-                          rapidjson::Value(rapidjson::kArrayType), allocator);
-        if (b->get_size()[1] == 1 && !FLAGS_overhead) {
-            auto rhs_norm = compute_norm2(b);
-            add_or_set_member(solver_json, "rhs_norm", rhs_norm, allocator);
+        std::clog << "Matrix is of size (" << state.system_matrix->get_size()[0]
+                  << ", " << state.system_matrix->get_size()[1] << ")"
+                  << std::endl;
+        test_case["rows"] = state.system_matrix->get_size()[0];
+        test_case["cols"] = state.system_matrix->get_size()[1];
+        return state;
+    }
+
+
+    void run(std::shared_ptr<gko::Executor> exec, std::shared_ptr<Timer> timer,
+             solver_benchmark_state<Generator>& state,
+             const std::string& encoded_solver_name,
+             json& solver_case) const override
+    {
+        const auto decoded_pair = decoder.at(encoded_solver_name);
+        auto& solver_name = decoded_pair.first;
+        auto& precond_name = decoded_pair.second;
+        solver_case["recurrent_residuals"] = json::array();
+        solver_case["true_residuals"] = json::array();
+        solver_case["implicit_residuals"] = json::array();
+        solver_case["iteration_timestamps"] = json::array();
+        if (state.b->get_size()[1] == 1 && !FLAGS_overhead) {
+            auto rhs_norm = compute_norm2(state.b.get());
+            solver_case["rhs_norm"] = rhs_norm;
         }
         for (auto stage : {"generate", "apply"}) {
-            add_or_set_member(solver_json, stage,
-                              rapidjson::Value(rapidjson::kObjectType),
-                              allocator);
-            add_or_set_member(solver_json[stage], "components",
-                              rapidjson::Value(rapidjson::kObjectType),
-                              allocator);
+            solver_case[stage] = json::object();
+            solver_case[stage]["components"] = json::object();
         }
 
         IterationControl ic{timer};
@@ -445,24 +483,24 @@ void solve_system(const std::string& solver_name,
         // warm run
         std::shared_ptr<gko::LinOp> solver;
         for (auto _ : ic.warmup_run()) {
-            auto x_clone = clone(x);
+            auto x_clone = clone(state.x);
             auto precond = precond_factory.at(precond_name)(exec);
             solver = generate_solver(exec, give(precond), solver_name,
                                      FLAGS_warmup_max_iters)
-                         ->generate(system_matrix);
-            solver->apply(b, x_clone);
+                         ->generate(state.system_matrix);
+            solver->apply(state.b, x_clone);
             exec->synchronize();
         }
 
         // detail run
         if (FLAGS_detailed && !FLAGS_overhead) {
             // slow run, get the time of each functions
-            auto x_clone = clone(x);
+            auto x_clone = clone(state.x);
 
             {
                 auto gen_logger = create_operations_logger(
                     FLAGS_gpu_timer, FLAGS_nested_names, exec,
-                    solver_json["generate"]["components"], allocator, 1);
+                    solver_case["generate"]["components"], 1);
                 exec->add_logger(gen_logger);
                 if (exec != exec->get_master()) {
                     exec->get_master()->add_logger(gen_logger);
@@ -471,7 +509,7 @@ void solve_system(const std::string& solver_name,
                 auto precond = precond_factory.at(precond_name)(exec);
                 solver = generate_solver(exec, give(precond), solver_name,
                                          FLAGS_max_iters)
-                             ->generate(system_matrix);
+                             ->generate(state.system_matrix);
 
                 exec->remove_logger(gen_logger);
                 if (exec != exec->get_master()) {
@@ -481,25 +519,22 @@ void solve_system(const std::string& solver_name,
 
             if (auto prec =
                     dynamic_cast<const gko::Preconditionable*>(solver.get())) {
-                add_or_set_member(solver_json, "preconditioner",
-                                  rapidjson::Value(rapidjson::kObjectType),
-                                  allocator);
+                solver_case["preconditioner"] = json::object();
                 write_precond_info(
                     clone(exec->get_master(), prec->get_preconditioner()).get(),
-                    solver_json["preconditioner"], allocator);
+                    solver_case["preconditioner"]);
             }
 
             {
                 auto apply_logger = create_operations_logger(
                     FLAGS_gpu_timer, FLAGS_nested_names, exec,
-                    solver_json["apply"]["components"], allocator, 1);
+                    solver_case["apply"]["components"], 1);
                 exec->add_logger(apply_logger);
                 if (exec != exec->get_master()) {
                     exec->get_master()->add_logger(apply_logger);
                 }
 
-
-                solver->apply(b, x_clone);
+                solver->apply(state.b, x_clone);
 
                 exec->remove_logger(apply_logger);
                 if (exec != exec->get_master()) {
@@ -508,17 +543,18 @@ void solve_system(const std::string& solver_name,
             }
 
             // slow run, gets the recurrent and true residuals of each iteration
-            if (b->get_size()[1] == 1) {
-                x_clone = clone(x);
+            if (state.b->get_size()[1] == 1) {
+                x_clone = clone(state.x);
                 auto res_logger = std::make_shared<ResidualLogger<etype>>(
-                    system_matrix, b, solver_json["recurrent_residuals"],
-                    solver_json["true_residuals"],
-                    solver_json["implicit_residuals"],
-                    solver_json["iteration_timestamps"], allocator);
+                    state.system_matrix, state.b,
+                    solver_case["recurrent_residuals"],
+                    solver_case["true_residuals"],
+                    solver_case["implicit_residuals"],
+                    solver_case["iteration_timestamps"]);
                 solver->add_logger(res_logger);
-                solver->apply(b, x_clone);
+                solver->apply(state.b, x_clone);
                 if (!res_logger->has_implicit_res_norms()) {
-                    solver_json.RemoveMember("implicit_residuals");
+                    solver_case.erase("implicit_residuals");
                 }
             }
             exec->synchronize();
@@ -528,16 +564,16 @@ void solve_system(const std::string& solver_name,
         auto it_logger = std::make_shared<IterationLogger>();
         auto generate_timer = get_timer(exec, FLAGS_gpu_timer);
         auto apply_timer = ic.get_timer();
-        auto x_clone = clone(x);
+        auto x_clone = clone(state.x);
         for (auto status : ic.run(false)) {
-            x_clone = clone(x);
+            x_clone = clone(state.x);
 
             exec->synchronize();
             generate_timer->tic();
             auto precond = precond_factory.at(precond_name)(exec);
             solver = generate_solver(exec, give(precond), solver_name,
                                      FLAGS_max_iters)
-                         ->generate(system_matrix);
+                         ->generate(state.system_matrix);
             generate_timer->toc();
 
             exec->synchronize();
@@ -545,165 +581,33 @@ void solve_system(const std::string& solver_name,
                 solver->add_logger(it_logger);
             }
             apply_timer->tic();
-            solver->apply(b, x_clone);
+            solver->apply(state.b, x_clone);
             apply_timer->toc();
             if (ic.get_num_repetitions() == 0) {
                 solver->remove_logger(it_logger);
             }
         }
-        it_logger->write_data(solver_json["apply"], allocator);
+        it_logger->write_data(solver_case["apply"]);
 
-        if (b->get_size()[1] == 1 && !FLAGS_overhead) {
+        if (state.b->get_size()[1] == 1 && !FLAGS_overhead) {
             // a solver is considered direct if it didn't log any iterations
-            if (solver_json["apply"].HasMember("iterations") &&
-                solver_json["apply"]["iterations"].GetInt() == 0) {
-                auto error =
-                    compute_direct_error(solver.get(), b, x_clone.get());
-                add_or_set_member(solver_json, "forward_error", error,
-                                  allocator);
-            }
-            auto residual =
-                compute_residual_norm(system_matrix.get(), b, x_clone.get());
-            add_or_set_member(solver_json, "residual_norm", residual,
-                              allocator);
-        }
-        add_or_set_member(solver_json["generate"], "time",
-                          generate_timer->compute_time(FLAGS_timer_method),
-                          allocator);
-        add_or_set_member(solver_json["apply"], "time",
-                          apply_timer->compute_time(FLAGS_timer_method),
-                          allocator);
-        add_or_set_member(solver_json, "repetitions",
-                          apply_timer->get_num_repetitions(), allocator);
-
-        // compute and write benchmark data
-        add_or_set_member(solver_json, "completed", true, allocator);
-    } catch (const std::exception& e) {
-        add_or_set_member(test_case["solver"][precond_solver_name], "completed",
-                          false, allocator);
-        if (FLAGS_keep_errors) {
-            rapidjson::Value msg_value;
-            msg_value.SetString(e.what(), allocator);
-            add_or_set_member(test_case["solver"][precond_solver_name], "error",
-                              msg_value, allocator);
-        }
-        std::cerr << "Error when processing test case\n"
-                  << test_case << "\n"
-                  << "what(): " << e.what() << std::endl;
-    }
-}
-
-
-template <typename SystemGenerator>
-void run_solver_benchmarks(std::shared_ptr<gko::Executor> exec,
-                           std::shared_ptr<Timer> timer,
-                           rapidjson::Document& test_cases,
-                           const SystemGenerator& system_generator,
-                           bool do_print)
-{
-    auto solvers = split(FLAGS_solvers, ',');
-    auto preconds = split(FLAGS_preconditioners, ',');
-    std::vector<std::string> precond_solvers;
-    for (const auto& s : solvers) {
-        for (const auto& p : preconds) {
-            precond_solvers.push_back(s + (p == "none" ? "" : "-" + p));
-        }
-    }
-
-    auto& allocator = test_cases.GetAllocator();
-    auto profiler_hook = create_profiler_hook(exec);
-    if (profiler_hook) {
-        exec->add_logger(profiler_hook);
-    }
-    auto annotate = annotate_functor{profiler_hook};
-
-    for (auto& test_case : test_cases.GetArray()) {
-        try {
-            // set up benchmark
-            validate_option_object(test_case);
-            if (!test_case.HasMember("solver")) {
-                test_case.AddMember("solver",
-                                    rapidjson::Value(rapidjson::kObjectType),
-                                    allocator);
-            }
-            auto& solver_case = test_case["solver"];
-            if (!FLAGS_overwrite &&
-                all_of(begin(precond_solvers), end(precond_solvers),
-                       [&solver_case](const std::string& s) {
-                           return solver_case.HasMember(s.c_str());
-                       })) {
-                continue;
-            }
-            // annotate the test case
-            auto test_case_range =
-                annotate(system_generator.describe_config(test_case));
-
-            if (do_print) {
-                std::clog << "Running test case\n" << test_case << std::endl;
-            }
-
-            using Vec = typename SystemGenerator::Vec;
-            std::shared_ptr<gko::LinOp> system_matrix;
-            std::unique_ptr<Vec> b;
-            std::unique_ptr<Vec> x;
-            if (FLAGS_overhead) {
-                system_matrix = system_generator.initialize({1.0}, exec);
-                b = system_generator.initialize(
-                    {std::numeric_limits<rc_etype>::quiet_NaN()}, exec);
-                x = system_generator.initialize({0.0}, exec);
-            } else {
-                system_matrix =
-                    system_generator.generate_matrix_with_optimal_format(
-                        exec, test_case);
-                b = system_generator.generate_rhs(exec, system_matrix.get(),
-                                                  test_case);
-                x = system_generator.generate_initial_guess(
-                    exec, system_matrix.get(), b.get());
-            }
-
-            if (do_print) {
-                std::clog << "Matrix is of size ("
-                          << system_matrix->get_size()[0] << ", "
-                          << system_matrix->get_size()[1] << ")" << std::endl;
-            }
-            add_or_set_member(test_case, "size", system_matrix->get_size()[0],
-                              allocator);
-            auto precond_solver_name = begin(precond_solvers);
-            for (const auto& solver_name : solvers) {
-                auto solver_range = annotate(solver_name.c_str());
-                for (const auto& precond_name : preconds) {
-                    if (do_print) {
-                        std::clog
-                            << "\tRunning solver: " << *precond_solver_name
-                            << std::endl;
-                    }
-                    {
-                        auto precond_range = annotate(precond_name.c_str());
-                        solve_system(solver_name, precond_name,
-                                     precond_solver_name->c_str(), exec, timer,
-                                     system_matrix, b.get(), x.get(), test_case,
-                                     allocator);
-                    }
-                    if (do_print) {
-                        backup_results(test_cases);
-                    }
-                    ++precond_solver_name;
-                }
-            }
-        } catch (const std::exception& e) {
-            std::cerr << "Error setting up solver, what(): " << e.what()
-                      << std::endl;
-            if (FLAGS_keep_errors) {
-                rapidjson::Value msg_value;
-                msg_value.SetString(e.what(), allocator);
-                add_or_set_member(test_case, "error", msg_value, allocator);
+            if (solver_case["apply"].contains("iterations") &&
+                solver_case["apply"]["iterations"].get<gko::int64>() == 0) {
+                auto error = compute_direct_error(solver.get(), state.b.get(),
+                                                  x_clone.get());
+                solver_case["forward_error"] = error;
             }
+            auto residual = compute_residual_norm(state.system_matrix.get(),
+                                                  state.b.get(), x_clone.get());
+            solver_case["residual_norm"] = residual;
         }
+        solver_case["generate"]["time"] =
+            generate_timer->compute_time(FLAGS_timer_method);
+        solver_case["apply"]["time"] =
+            apply_timer->compute_time(FLAGS_timer_method);
+        solver_case["repetitions"] = apply_timer->get_num_repetitions();
     }
-    if (profiler_hook) {
-        exec->remove_logger(profiler_hook);
-    }
-}
+};
 
 
 #endif  // GINKGO_BENCHMARK_SOLVER_SOLVER_COMMON_HPP
diff --git a/benchmark/sparse_blas/operations.cpp b/benchmark/sparse_blas/operations.cpp
index 66e5707c559..2ee766d4f83 100644
--- a/benchmark/sparse_blas/operations.cpp
+++ b/benchmark/sparse_blas/operations.cpp
@@ -38,7 +38,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include "benchmark/sparse_blas/operations.hpp"
-#include "benchmark/utils/json.hpp"
 #include "core/factorization/elimination_forest.hpp"
 #include "core/factorization/symbolic.hpp"
 #include "core/matrix/csr_kernels.hpp"
@@ -632,11 +631,9 @@ class SymbolicLuOperation : public BenchmarkOperation {
 
     void run() override { gko::factorization::symbolic_lu(mtx_, result_); }
 
-    void write_stats(rapidjson::Value& object,
-                     rapidjson::MemoryPoolAllocator<>& allocator) override
+    void write_stats(json& object) override
     {
-        add_or_set_member(object, "factor_nonzeros",
-                          result_->get_num_stored_elements(), allocator);
+        object["factor_nonzeros"] = result_->get_num_stored_elements();
     }
 
 private:
@@ -680,11 +677,9 @@ class SymbolicCholeskyOperation : public BenchmarkOperation {
                                               forest_);
     }
 
-    void write_stats(rapidjson::Value& object,
-                     rapidjson::MemoryPoolAllocator<>& allocator) override
+    void write_stats(json& object) override
     {
-        add_or_set_member(object, "factor_nonzeros",
-                          result_->get_num_stored_elements(), allocator);
+        object["factor_nonzeros"] = result_->get_num_stored_elements();
     }
 
 private:
diff --git a/benchmark/sparse_blas/operations.hpp b/benchmark/sparse_blas/operations.hpp
index 99cf72b8e59..48034eb8a1f 100644
--- a/benchmark/sparse_blas/operations.hpp
+++ b/benchmark/sparse_blas/operations.hpp
@@ -36,9 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <tuple>
 
 
-#include <rapidjson/document.h>
-
-
+#include "benchmark/utils/json.hpp"
 #include "benchmark/utils/types.hpp"
 
 
@@ -79,9 +77,7 @@ class BenchmarkOperation {
     /**
      * Allows the operation to write arbitrary information to the JSON output.
      */
-    virtual void write_stats(rapidjson::Value& object,
-                             rapidjson::MemoryPoolAllocator<>& allocator)
-    {}
+    virtual void write_stats(json& object) {}
 };
 
 
diff --git a/benchmark/sparse_blas/sparse_blas.cpp b/benchmark/sparse_blas/sparse_blas.cpp
index 8c054709fdf..21df4d9c448 100644
--- a/benchmark/sparse_blas/sparse_blas.cpp
+++ b/benchmark/sparse_blas/sparse_blas.cpp
@@ -47,7 +47,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "benchmark/sparse_blas/operations.hpp"
 #include "benchmark/utils/general_matrix.hpp"
 #include "benchmark/utils/generator.hpp"
-#include "benchmark/utils/spmv_validation.hpp"
+#include "benchmark/utils/iteration_control.hpp"
+#include "benchmark/utils/runner.hpp"
 #include "benchmark/utils/types.hpp"
 #include "core/test/utils/matrix_generator.hpp"
 
@@ -74,18 +75,64 @@ DEFINE_bool(validate, false,
             "against the ReferenceExecutor solution.");
 
 
-void apply_sparse_blas(const char* operation_name,
-                       std::shared_ptr<gko::Executor> exec, const Mtx* mtx,
-                       rapidjson::Value& test_case,
-                       rapidjson::MemoryPoolAllocator<>& allocator)
-{
-    try {
-        add_or_set_member(test_case, operation_name,
-                          rapidjson::Value(rapidjson::kObjectType), allocator);
+using Generator = DefaultSystemGenerator<>;
+
+
+struct SparseBlasBenchmark : Benchmark<std::unique_ptr<Mtx>> {
+    std::string name;
+    std::vector<std::string> operations;
+
+    SparseBlasBenchmark()
+        : name{"sparse_blas"}, operations{split(FLAGS_operations)}
+    {}
+
+    const std::string& get_name() const override { return name; }
+
+    const std::vector<std::string>& get_operations() const override
+    {
+        return operations;
+    }
+
+    bool should_print() const override { return true; }
+
+    bool validate_config(const json& value) const override
+    {
+        return Generator::validate_config(value);
+    }
+
+    std::string get_example_config() const override
+    {
+        return Generator::get_example_config();
+    }
+
+    std::string describe_config(const json& test_case) const override
+    {
+        return Generator::describe_config(test_case);
+    }
+
+    std::unique_ptr<Mtx> setup(std::shared_ptr<gko::Executor> exec,
+                               json& test_case) const override
+    {
+        auto data = Generator::generate_matrix_data(test_case);
+        data.ensure_row_major_order();
+        std::clog << "Matrix is of size (" << data.size[0] << ", "
+                  << data.size[1] << "), " << data.nonzeros.size() << std::endl;
+        test_case["rows"] = data.size[0];
+        test_case["cols"] = data.size[1];
+        test_case["nonzeros"] = data.nonzeros.size();
+
+        auto mtx = Mtx::create(exec, data.size, data.nonzeros.size());
+        mtx->read(data);
+        return mtx;
+    }
+
 
-        auto op = get_operation(operation_name, mtx);
+    void run(std::shared_ptr<gko::Executor> exec, std::shared_ptr<Timer> timer,
+             std::unique_ptr<Mtx>& mtx, const std::string& operation_name,
+             json& operation_case) const override
+    {
+        auto op = get_operation(operation_name, mtx.get());
 
-        auto timer = get_timer(exec, FLAGS_gpu_timer);
         IterationControl ic(timer);
 
         // warm run
@@ -105,54 +152,30 @@ void apply_sparse_blas(const char* operation_name,
         const auto flops = static_cast<double>(op->get_flops());
         const auto mem = static_cast<double>(op->get_memory());
         const auto repetitions = ic.get_num_repetitions();
-        add_or_set_member(test_case[operation_name], "time", runtime,
-                          allocator);
-        add_or_set_member(test_case[operation_name], "flops", flops / runtime,
-                          allocator);
-        add_or_set_member(test_case[operation_name], "bandwidth", mem / runtime,
-                          allocator);
-        add_or_set_member(test_case[operation_name], "repetitions", repetitions,
-                          allocator);
+        operation_case["time"] = runtime;
+        operation_case["flops"] = flops / runtime;
+        operation_case["bandwidth"] = mem / runtime;
+        operation_case["repetitions"] = repetitions;
 
         if (FLAGS_validate) {
             auto validation_result = op->validate();
-            add_or_set_member(test_case[operation_name], "correct",
-                              validation_result.first, allocator);
-            add_or_set_member(test_case[operation_name], "error",
-                              validation_result.second, allocator);
+            operation_case["correct"] = validation_result.first;
+            operation_case["error"] = validation_result.second;
         }
         if (FLAGS_detailed) {
-            add_or_set_member(test_case[operation_name], "components",
-                              rapidjson::Value(rapidjson::kObjectType),
-                              allocator);
+            operation_case["components"] = json::object();
             auto gen_logger = create_operations_logger(
                 FLAGS_gpu_timer, FLAGS_nested_names, exec,
-                test_case[operation_name]["components"], allocator,
-                repetitions);
+                operation_case["components"], repetitions);
             exec->add_logger(gen_logger);
             for (unsigned i = 0; i < repetitions; i++) {
                 op->run();
             }
             exec->remove_logger(gen_logger);
         }
-        op->write_stats(test_case[operation_name], allocator);
-
-        add_or_set_member(test_case[operation_name], "completed", true,
-                          allocator);
-    } catch (const std::exception& e) {
-        add_or_set_member(test_case[operation_name], "completed", false,
-                          allocator);
-        if (FLAGS_keep_errors) {
-            rapidjson::Value msg_value;
-            msg_value.SetString(e.what(), allocator);
-            add_or_set_member(test_case[operation_name], "error", msg_value,
-                              allocator);
-        }
-        std::cerr << "Error when processing test case\n"
-                  << test_case << "\n"
-                  << "what(): " << e.what() << std::endl;
+        op->write_stats(operation_case);
     }
-}
+};
 
 
 int main(int argc, char* argv[])
@@ -160,86 +183,18 @@ int main(int argc, char* argv[])
     std::string header =
         "A benchmark for measuring performance of Ginkgo's sparse BLAS "
         "operations.\n";
-    std::string format = example_config;
+    std::string format = Generator::get_example_config();
     initialize_argument_parsing_matrix(&argc, &argv, header, format);
 
     auto exec = executor_factory.at(FLAGS_executor)(FLAGS_gpu_timer);
 
-    rapidjson::IStreamWrapper jcin(get_input_stream());
-    rapidjson::Document test_cases;
-    test_cases.ParseStream(jcin);
-    if (!test_cases.IsArray()) {
-        print_config_error_and_exit();
-    }
+    auto test_cases = json::parse(get_input_stream());
 
     std::string extra_information = "The operations are " + FLAGS_operations;
     print_general_information(extra_information);
 
-    auto& allocator = test_cases.GetAllocator();
-    auto profiler_hook = create_profiler_hook(exec);
-    if (profiler_hook) {
-        exec->add_logger(profiler_hook);
-    }
-    auto annotate = annotate_functor{profiler_hook};
-
-    auto operations = split(FLAGS_operations, ',');
-
-    DefaultSystemGenerator<> generator{};
-
-    for (auto& test_case : test_cases.GetArray()) {
-        try {
-            // set up benchmark
-            validate_option_object(test_case);
-            if (!test_case.HasMember(benchmark_name)) {
-                test_case.AddMember(rapidjson::Value(benchmark_name, allocator),
-                                    rapidjson::Value(rapidjson::kObjectType),
-                                    allocator);
-            }
-            auto& sp_blas_case = test_case[benchmark_name];
-            std::clog << "Running test case\n" << test_case << std::endl;
-            auto data = generator.generate_matrix_data(test_case);
-            data.ensure_row_major_order();
-            std::clog << "Matrix is of size (" << data.size[0] << ", "
-                      << data.size[1] << "), " << data.nonzeros.size()
-                      << std::endl;
-            add_or_set_member(test_case, "rows", data.size[0], allocator);
-            add_or_set_member(test_case, "cols", data.size[1], allocator);
-            add_or_set_member(test_case, "nonzeros", data.nonzeros.size(),
-                              allocator);
-
-            auto mtx = Mtx::create(exec, data.size, data.nonzeros.size());
-            mtx->read(data);
-            // annotate the test case
-            auto test_case_range =
-                annotate(generator.describe_config(test_case));
-            for (const auto& operation_name : operations) {
-                if (FLAGS_overwrite ||
-                    !sp_blas_case.HasMember(operation_name.c_str())) {
-                    {
-                        auto operation_range = annotate(operation_name.c_str());
-                        apply_sparse_blas(operation_name.c_str(), exec,
-                                          mtx.get(), sp_blas_case, allocator);
-                    }
-                    std::clog << "Current state:" << std::endl
-                              << test_cases << std::endl;
-                    backup_results(test_cases);
-                }
-            }
-            // write the output if we have no strategies
-            backup_results(test_cases);
-        } catch (const std::exception& e) {
-            std::cerr << "Error setting up matrix data, what(): " << e.what()
-                      << std::endl;
-            if (FLAGS_keep_errors) {
-                rapidjson::Value msg_value;
-                msg_value.SetString(e.what(), allocator);
-                add_or_set_member(test_case, "error", msg_value, allocator);
-            }
-        }
-    }
-    if (profiler_hook) {
-        exec->remove_logger(profiler_hook);
-    }
+    run_test_cases(SparseBlasBenchmark{}, exec,
+                   get_timer(exec, FLAGS_gpu_timer), test_cases);
 
-    std::cout << test_cases << std::endl;
+    std::cout << std::setw(4) << test_cases << std::endl;
 }
diff --git a/benchmark/spmv/distributed/spmv.cpp b/benchmark/spmv/distributed/spmv.cpp
index 9b7e4ad8c8f..202aad15c7e 100644
--- a/benchmark/spmv/distributed/spmv.cpp
+++ b/benchmark/spmv/distributed/spmv.cpp
@@ -58,38 +58,7 @@ DEFINE_string(non_local_formats, "csr",
               "run. See the 'formats' option for a list of supported versions");
 
 
-std::string example_config = R"(
-  [
-    {"size": 100, "stencil": "7pt", "comm_pattern": "stencil"},
-    {"filename": "my_file.mtx"}
-  ]
-)";
-
-
-[[noreturn]] void print_config_error_and_exit()
-{
-    std::cerr << "Input has to be a JSON array of matrix configurations:\n"
-              << example_config << std::endl;
-    std::exit(1);
-}
-
-
-struct Generator : DistributedDefaultSystemGenerator<DefaultSystemGenerator<>> {
-    Generator(gko::experimental::mpi::communicator comm)
-        : DistributedDefaultSystemGenerator<DefaultSystemGenerator<>>{
-              std::move(comm), {}}
-    {}
-
-    void validate_options(const rapidjson::Value& options) const
-    {
-        if (!options.IsObject() ||
-            !((options.HasMember("size") && options.HasMember("stencil") &&
-               options.HasMember("comm_pattern")) ||
-              options.HasMember("filename"))) {
-            print_config_error_and_exit();
-        }
-    }
-};
+using Generator = DistributedDefaultSystemGenerator<DefaultSystemGenerator<>>;
 
 
 int main(int argc, char* argv[])
@@ -98,18 +67,19 @@ int main(int argc, char* argv[])
 
     const auto comm = gko::experimental::mpi::communicator(MPI_COMM_WORLD);
     const auto rank = comm.rank();
+    const auto do_print = rank == 0;
 
     std::string header =
         "A benchmark for measuring performance of Ginkgo's spmv.\n";
-    std::string format = example_config;
-    initialize_argument_parsing_matrix(&argc, &argv, header, format);
-
-    if (rank == 0) {
-        std::string extra_information = "The formats are [" +
-                                        FLAGS_local_formats + "]x[" +
-                                        FLAGS_non_local_formats + "]\n" +
-                                        "The number of right hand sides is " +
-                                        std::to_string(FLAGS_nrhs) + "\n";
+    std::string format = Generator::get_example_config();
+    initialize_argument_parsing_matrix(&argc, &argv, header, format, "",
+                                       do_print);
+
+    if (do_print) {
+        std::string extra_information =
+            "The formats are [" + FLAGS_local_formats + "]x[" +
+            FLAGS_non_local_formats + "]\n" +
+            "The number of right hand sides is " + std::to_string(FLAGS_nrhs);
         print_general_information(extra_information);
     }
 
@@ -125,16 +95,13 @@ int main(int argc, char* argv[])
     }
 
     std::string json_input = broadcast_json_input(get_input_stream(), comm);
-    rapidjson::Document test_cases;
-    test_cases.Parse(json_input.c_str());
-    if (!test_cases.IsArray()) {
-        print_config_error_and_exit();
-    }
+    auto test_cases = json::parse(json_input);
 
-    run_spmv_benchmark(exec, test_cases, formats, Generator{comm},
-                       get_mpi_timer(exec, comm, FLAGS_gpu_timer), rank == 0);
+    run_test_cases(SpmvBenchmark<Generator>{Generator{comm}, formats, do_print},
+                   exec, get_mpi_timer(exec, comm, FLAGS_gpu_timer),
+                   test_cases);
 
-    if (rank == 0) {
-        std::cout << test_cases << std::endl;
+    if (do_print) {
+        std::cout << std::setw(4) << test_cases << std::endl;
     }
 }
diff --git a/benchmark/spmv/spmv.cpp b/benchmark/spmv/spmv.cpp
index 034437907c8..abd1b783019 100644
--- a/benchmark/spmv/spmv.cpp
+++ b/benchmark/spmv/spmv.cpp
@@ -41,48 +41,29 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "benchmark/utils/formats.hpp"
 #include "benchmark/utils/general_matrix.hpp"
 #include "benchmark/utils/generator.hpp"
-#include "benchmark/utils/spmv_validation.hpp"
 
 
-struct Generator : DefaultSystemGenerator<> {
-    void validate_options(const rapidjson::Value& options) const
-    {
-        if (!options.IsObject() ||
-            !((options.HasMember("size") && options.HasMember("stencil")) ||
-              options.HasMember("filename"))) {
-            std::cerr
-                << "Input has to be a JSON array of matrix configurations:\n"
-                << example_config << std::endl;
-            std::exit(1);
-        }
-    }
-};
+using Generator = DefaultSystemGenerator<>;
 
 
 int main(int argc, char* argv[])
 {
     std::string header =
         "A benchmark for measuring performance of Ginkgo's spmv.\n";
-    std::string format = example_config;
+    std::string format = Generator::get_example_config();
     initialize_argument_parsing_matrix(&argc, &argv, header, format);
 
     std::string extra_information = "The formats are " + FLAGS_formats +
                                     "\nThe number of right hand sides is " +
-                                    std::to_string(FLAGS_nrhs) + "\n";
+                                    std::to_string(FLAGS_nrhs);
     print_general_information(extra_information);
 
     auto exec = executor_factory.at(FLAGS_executor)(FLAGS_gpu_timer);
-    auto formats = split(FLAGS_formats, ',');
 
-    rapidjson::IStreamWrapper jcin(get_input_stream());
-    rapidjson::Document test_cases;
-    test_cases.ParseStream(jcin);
-    if (!test_cases.IsArray()) {
-        print_config_error_and_exit();
-    }
+    auto test_cases = json::parse(get_input_stream());
 
-    run_spmv_benchmark(exec, test_cases, formats, Generator{},
-                       get_timer(exec, FLAGS_gpu_timer), true);
+    run_test_cases(SpmvBenchmark<Generator>{Generator{}, split(FLAGS_formats)},
+                   exec, get_timer(exec, FLAGS_gpu_timer), test_cases);
 
-    std::cout << test_cases << std::endl;
+    std::cout << std::setw(4) << test_cases << std::endl;
 }
diff --git a/benchmark/spmv/spmv_common.hpp b/benchmark/spmv/spmv_common.hpp
index 3c8d886df3b..4a7d014de8b 100644
--- a/benchmark/spmv/spmv_common.hpp
+++ b/benchmark/spmv/spmv_common.hpp
@@ -36,7 +36,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "benchmark/utils/formats.hpp"
 #include "benchmark/utils/general.hpp"
+#include "benchmark/utils/iteration_control.hpp"
 #include "benchmark/utils/loggers.hpp"
+#include "benchmark/utils/runner.hpp"
 #include "benchmark/utils/timer.hpp"
 #include "benchmark/utils/types.hpp"
 #ifdef GINKGO_BENCHMARK_ENABLE_TUNING
@@ -48,57 +50,119 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 DEFINE_uint32(nrhs, 1, "The number of right hand sides");
 
 
-// This function supposes that management of `FLAGS_overwrite` is done before
-// calling it
-template <typename Generator, typename VectorType, typename IndexType>
-void apply_spmv(const char* format_name, std::shared_ptr<gko::Executor> exec,
-                const Generator& generator, std::shared_ptr<Timer> timer,
-                const gko::matrix_data<etype, IndexType>& data,
-                const VectorType* b, const VectorType* x,
-                const VectorType* answer, rapidjson::Value& test_case,
-                rapidjson::MemoryPoolAllocator<>& allocator)
-{
-    try {
-        auto& spmv_case = test_case["spmv"];
-        add_or_set_member(spmv_case, format_name,
-                          rapidjson::Value(rapidjson::kObjectType), allocator);
+template <typename Generator>
+struct spmv_benchmark_state {
+    gko::matrix_data<etype, typename Generator::index_type> data;
+    std::unique_ptr<typename Generator::Vec> x;
+    std::unique_ptr<typename Generator::Vec> b;
+    std::unique_ptr<typename Generator::Vec> answer;
+};
+
+
+template <typename Generator>
+struct SpmvBenchmark : Benchmark<spmv_benchmark_state<Generator>> {
+    using Vec = typename Generator::Vec;
+    std::string name;
+    std::vector<std::string> formats;
+    bool do_print;
+    Generator generator;
+
+    SpmvBenchmark(Generator generator, std::vector<std::string> formats,
+                  bool do_print = true)
+        : name{"spmv"},
+          formats{std::move(formats)},
+          generator{generator},
+          do_print{do_print}
+    {}
+
+    const std::string& get_name() const override { return name; }
+
+    const std::vector<std::string>& get_operations() const override
+    {
+        return formats;
+    }
+
+    bool should_print() const override { return do_print; }
 
+    std::string get_example_config() const override
+    {
+        return generator.get_example_config();
+    }
+
+    bool validate_config(const json& test_case) const override
+    {
+        return generator.validate_config(test_case);
+    }
+
+    std::string describe_config(const json& test_case) const override
+    {
+        return generator.describe_config(test_case);
+    }
+
+    spmv_benchmark_state<Generator> setup(std::shared_ptr<gko::Executor> exec,
+                                          json& test_case) const override
+    {
+        spmv_benchmark_state<Generator> state;
+        state.data = generator.generate_matrix_data(test_case);
+
+        auto nrhs = FLAGS_nrhs;
+        state.b = generator.create_multi_vector_random(
+            exec, gko::dim<2>{state.data.size[1], nrhs});
+        state.x = generator.create_multi_vector_random(
+            exec, gko::dim<2>{state.data.size[0], nrhs});
+        if (do_print) {
+            std::clog << "Matrix is of size (" << state.data.size[0] << ", "
+                      << state.data.size[1] << "), "
+                      << state.data.nonzeros.size() << std::endl;
+        }
+        test_case["rows"] = state.data.size[0];
+        test_case["cols"] = state.data.size[1];
+        test_case["nonzeros"] = state.data.nonzeros.size();
+        if (FLAGS_detailed) {
+            state.answer = gko::clone(state.x);
+            auto system_matrix =
+                generator.generate_matrix_with_default_format(exec, state.data);
+            exec->synchronize();
+            system_matrix->apply(state.b, state.answer);
+            exec->synchronize();
+        }
+        return state;
+    }
+
+    void run(std::shared_ptr<gko::Executor> exec, std::shared_ptr<Timer> timer,
+             spmv_benchmark_state<Generator>& state,
+             const std::string& format_name, json& format_case) const override
+    {
         auto system_matrix = generator.generate_matrix_with_format(
-            exec, format_name, data, &spmv_case[format_name], &allocator);
+            exec, format_name, state.data, &format_case);
 
         // check the residual
         if (FLAGS_detailed) {
-            auto x_clone = clone(x);
+            auto x_clone = clone(state.x);
             exec->synchronize();
-            system_matrix->apply(b, x_clone);
+            system_matrix->apply(state.b, x_clone);
             exec->synchronize();
             auto max_relative_norm2 =
-                compute_max_relative_norm2(x_clone.get(), answer);
-            add_or_set_member(spmv_case[format_name], "max_relative_norm2",
-                              max_relative_norm2, allocator);
+                compute_max_relative_norm2(x_clone.get(), state.answer.get());
+            format_case["max_relative_norm2"] = max_relative_norm2;
         }
 
         IterationControl ic{timer};
         // warm run
         for (auto _ : ic.warmup_run()) {
-            auto x_clone = clone(x);
+            auto x_clone = clone(state.x);
             exec->synchronize();
-            system_matrix->apply(b, x_clone);
+            system_matrix->apply(state.b, x_clone);
             exec->synchronize();
         }
 
         // tuning run
 #ifdef GINKGO_BENCHMARK_ENABLE_TUNING
         auto& format_case = spmv_case[format_name];
-        if (!format_case.HasMember("tuning")) {
-            format_case.AddMember(
-                "tuning", rapidjson::Value(rapidjson::kObjectType), allocator);
-        }
+        format_case["tuning"] = json::object();
         auto& tuning_case = format_case["tuning"];
-        add_or_set_member(tuning_case, "time",
-                          rapidjson::Value(rapidjson::kArrayType), allocator);
-        add_or_set_member(tuning_case, "values",
-                          rapidjson::Value(rapidjson::kArrayType), allocator);
+        tuning_case["time"] = json::array();
+        tuning_case["values"] = json::array();
 
         // Enable tuning for this portion of code
         gko::_tuning_flag = true;
@@ -112,13 +176,13 @@ void apply_spmv(const char* format_name, std::shared_ptr<gko::Executor> exec,
             gko::_tuned_value = val;
             auto tuning_timer = get_timer(exec, FLAGS_gpu_timer);
             IterationControl ic_tuning{tuning_timer};
-            auto x_clone = clone(x);
+            auto x_clone = clone(state.x);
             for (auto _ : ic_tuning.run()) {
-                system_matrix->apply(b, x_clone);
+                system_matrix->apply(state.b, x_clone);
             }
-            tuning_case["time"].PushBack(
-                ic_tuning.compute_time(FLAGS_timer_method), allocator);
-            tuning_case["values"].PushBack(val, allocator);
+            tuning_case["time"].push_back(
+                ic_tuning.compute_time(FLAGS_timer_method));
+            tuning_case["values"].push_back(val);
         }
         // We put back the flag to false to use the default (non-tuned) values
         // for the following
@@ -126,142 +190,41 @@ void apply_spmv(const char* format_name, std::shared_ptr<gko::Executor> exec,
 #endif  // GINKGO_BENCHMARK_ENABLE_TUNING
 
         // timed run
-        auto x_clone = clone(x);
+        auto x_clone = clone(state.x);
         for (auto _ : ic.run()) {
-            system_matrix->apply(b, x_clone);
-        }
-        add_or_set_member(spmv_case[format_name], "time",
-                          ic.compute_time(FLAGS_timer_method), allocator);
-        add_or_set_member(spmv_case[format_name], "repetitions",
-                          ic.get_num_repetitions(), allocator);
-
-        // compute and write benchmark data
-        add_or_set_member(spmv_case[format_name], "completed", true, allocator);
-    } catch (const std::exception& e) {
-        add_or_set_member(test_case["spmv"][format_name], "completed", false,
-                          allocator);
-        if (FLAGS_keep_errors) {
-            rapidjson::Value msg_value;
-            msg_value.SetString(e.what(), allocator);
-            add_or_set_member(test_case["spmv"][format_name], "error",
-                              msg_value, allocator);
+            system_matrix->apply(state.b, x_clone);
         }
-        std::cerr << "Error when processing test case\n"
-                  << test_case << "\n"
-                  << "what(): " << e.what() << std::endl;
+        format_case["time"] = ic.compute_time(FLAGS_timer_method);
+        format_case["repetitions"] = ic.get_num_repetitions();
     }
-}
-
-
-template <typename SystemGenerator>
-void run_spmv_benchmark(std::shared_ptr<gko::Executor> exec,
-                        rapidjson::Document& test_cases,
-                        const std::vector<std::string> formats,
-                        const SystemGenerator& system_generator,
-                        std::shared_ptr<Timer> timer, bool do_print)
-{
-    auto& allocator = test_cases.GetAllocator();
-    auto profiler_hook = create_profiler_hook(exec);
-    if (profiler_hook) {
-        exec->add_logger(profiler_hook);
-    }
-    auto annotate = annotate_functor{profiler_hook};
-
-    for (auto& test_case : test_cases.GetArray()) {
-        try {
-            // set up benchmark
-            system_generator.validate_options(test_case);
-            if (!test_case.HasMember("spmv")) {
-                test_case.AddMember("spmv",
-                                    rapidjson::Value(rapidjson::kObjectType),
-                                    allocator);
-            }
-            auto& spmv_case = test_case["spmv"];
-            if (!FLAGS_overwrite &&
-                all_of(begin(formats), end(formats),
-                       [&spmv_case](const std::string& s) {
-                           return spmv_case.HasMember(s.c_str());
-                       })) {
-                continue;
-            }
-            if (do_print) {
-                std::clog << "Running test case\n" << test_case << std::endl;
-            }
-            // annotate the test case
-            auto test_case_range =
-                annotate(system_generator.describe_config(test_case));
-
-            auto data = system_generator.generate_matrix_data(test_case);
-
-            auto nrhs = FLAGS_nrhs;
-            auto b = system_generator.create_multi_vector_random(
-                exec, gko::dim<2>{data.size[1], nrhs});
-            auto x = system_generator.create_multi_vector_random(
-                exec, gko::dim<2>{data.size[0], nrhs});
-            if (do_print) {
-                std::clog << "Matrix is of size (" << data.size[0] << ", "
-                          << data.size[1] << ")" << std::endl;
-            }
-            add_or_set_member(test_case, "size", data.size[0], allocator);
-            add_or_set_member(test_case, "nnz", data.nonzeros.size(),
-                              allocator);
-            auto best_performance = std::numeric_limits<double>::max();
-            if (!test_case.HasMember("optimal")) {
-                test_case.AddMember("optimal",
-                                    rapidjson::Value(rapidjson::kObjectType),
-                                    allocator);
-            }
 
-            // Compute the result from ginkgo::coo as the correct answer
-            auto answer = gko::clone(x);
-            if (FLAGS_detailed) {
-                auto system_matrix =
-                    system_generator.generate_matrix_with_default_format(exec,
-                                                                         data);
-                exec->synchronize();
-                system_matrix->apply(b, answer);
-                exec->synchronize();
+    void postprocess(json& test_case) const override
+    {
+        if (!test_case.contains("optimal")) {
+            test_case["optimal"] = json::object();
+        }
+        auto best_time = std::numeric_limits<double>::max();
+        std::string best_format;
+        // find the fastest among all formats we tested
+        for (const auto& format : formats) {
+            if (!test_case[name].contains(format)) {
+                continue;
             }
-            for (const auto& format_name : formats) {
-                {
-                    auto format_range = annotate(format_name.c_str());
-                    apply_spmv(format_name.c_str(), exec, system_generator,
-                               timer, data, b.get(), x.get(), answer.get(),
-                               test_case, allocator);
-                }
-                if (do_print) {
-                    std::clog << "Current state:" << std::endl
-                              << test_cases << std::endl;
-                }
-                if (spmv_case[format_name.c_str()]["completed"].GetBool()) {
-                    auto performance =
-                        spmv_case[format_name.c_str()]["time"].GetDouble();
-                    if (performance < best_performance) {
-                        best_performance = performance;
-                        add_or_set_member(
-                            test_case["optimal"], "spmv",
-                            rapidjson::Value(format_name.c_str(), allocator)
-                                .Move(),
-                            allocator);
-                    }
-                }
-                if (do_print) {
-                    backup_results(test_cases);
+            auto& format_case = test_case[name][format];
+            if (format_case.contains("completed") &&
+                format_case["completed"].template get<bool>()) {
+                auto time = format_case["time"];
+                if (time < best_time) {
+                    best_time = time;
+                    best_format = format;
                 }
             }
-        } catch (const std::exception& e) {
-            std::cerr << "Error setting up matrix data, what(): " << e.what()
-                      << std::endl;
-            if (FLAGS_keep_errors) {
-                rapidjson::Value msg_value;
-                msg_value.SetString(e.what(), allocator);
-                add_or_set_member(test_case, "error", msg_value, allocator);
-            }
+        }
+        if (!best_format.empty()) {
+            test_case["optimal"][name] = best_format;
         }
     }
-    if (profiler_hook) {
-        exec->remove_logger(profiler_hook);
-    }
-}
+};
+
 
 #endif  // GINKGO_BENCHMARK_SPMV_SPMV_COMMON_HPP
diff --git a/benchmark/test/reference/blas.profile.stderr b/benchmark/test/reference/blas.profile.stderr
index abc496b0921..b64f4321287 100644
--- a/benchmark/test/reference/blas.profile.stderr
+++ b/benchmark/test/reference/blas.profile.stderr
@@ -10,6 +10,7 @@ Running test case
     "blas": {}
 }
 DEBUG: begin n = 100 
+	Running blas: copy
 DEBUG: begin copy
 DEBUG: begin allocate
 DEBUG: end   allocate
@@ -24,21 +25,7 @@ DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: end   copy
-Current state:
-[
-    {
-        "n": 100,
-        "blas": {
-            "copy": {
-                "time": 1.0,
-                "flops": 1.0,
-                "bandwidth": 1.0,
-                "repetitions": 1,
-                "completed": true
-            }
-        }
-    }
-]
+	Running blas: axpy
 DEBUG: begin axpy
 DEBUG: begin allocate
 DEBUG: end   allocate
@@ -61,28 +48,7 @@ DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: end   axpy
-Current state:
-[
-    {
-        "n": 100,
-        "blas": {
-            "copy": {
-                "time": 1.0,
-                "flops": 1.0,
-                "bandwidth": 1.0,
-                "repetitions": 1,
-                "completed": true
-            },
-            "axpy": {
-                "time": 1.0,
-                "flops": 1.0,
-                "bandwidth": 1.0,
-                "repetitions": 1,
-                "completed": true
-            }
-        }
-    }
-]
+	Running blas: scal
 DEBUG: begin scal
 DEBUG: begin allocate
 DEBUG: end   allocate
@@ -99,33 +65,4 @@ DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: end   scal
-Current state:
-[
-    {
-        "n": 100,
-        "blas": {
-            "copy": {
-                "time": 1.0,
-                "flops": 1.0,
-                "bandwidth": 1.0,
-                "repetitions": 1,
-                "completed": true
-            },
-            "axpy": {
-                "time": 1.0,
-                "flops": 1.0,
-                "bandwidth": 1.0,
-                "repetitions": 1,
-                "completed": true
-            },
-            "scal": {
-                "time": 1.0,
-                "flops": 1.0,
-                "bandwidth": 1.0,
-                "repetitions": 1,
-                "completed": true
-            }
-        }
-    }
-]
 DEBUG: end   n = 100 
diff --git a/benchmark/test/reference/blas.simple.stderr b/benchmark/test/reference/blas.simple.stderr
index 9508b0dcf1e..f41b25c6ee1 100644
--- a/benchmark/test/reference/blas.simple.stderr
+++ b/benchmark/test/reference/blas.simple.stderr
@@ -9,69 +9,6 @@ Running test case
     "n": 100,
     "blas": {}
 }
-Current state:
-[
-    {
-        "n": 100,
-        "blas": {
-            "copy": {
-                "time": 1.0,
-                "flops": 1.0,
-                "bandwidth": 1.0,
-                "repetitions": 10,
-                "completed": true
-            }
-        }
-    }
-]
-Current state:
-[
-    {
-        "n": 100,
-        "blas": {
-            "copy": {
-                "time": 1.0,
-                "flops": 1.0,
-                "bandwidth": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "axpy": {
-                "time": 1.0,
-                "flops": 1.0,
-                "bandwidth": 1.0,
-                "repetitions": 10,
-                "completed": true
-            }
-        }
-    }
-]
-Current state:
-[
-    {
-        "n": 100,
-        "blas": {
-            "copy": {
-                "time": 1.0,
-                "flops": 1.0,
-                "bandwidth": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "axpy": {
-                "time": 1.0,
-                "flops": 1.0,
-                "bandwidth": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "scal": {
-                "time": 1.0,
-                "flops": 1.0,
-                "bandwidth": 1.0,
-                "repetitions": 10,
-                "completed": true
-            }
-        }
-    }
-]
+	Running blas: copy
+	Running blas: axpy
+	Running blas: scal
diff --git a/benchmark/test/reference/conversion.all.stderr b/benchmark/test/reference/conversion.all.stderr
index 9ab8a899649..1d5df7477ba 100644
--- a/benchmark/test/reference/conversion.all.stderr
+++ b/benchmark/test/reference/conversion.all.stderr
@@ -4,1853 +4,23 @@ Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
 The formats are coo,csr,ell,sellp,hybrid
-Benchmarking conversions. 
 Running test case
 {
     "size": 100,
     "stencil": "7pt",
-    "conversions": {}
-}
-Matrix is of size (125, 125)
-Current state:
-[
-    {
-        "size": 125,
-        "stencil": "7pt",
-        "conversions": {
-            "coo-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            }
-        }
-    }
-]
-Error when processing test case
-{
-    "size": 125,
-    "stencil": "7pt",
-    "conversions": {
-        "coo-csr": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "coo-ell": {
-            "completed": false,
-            "error": ""
-        }
-    }
-}
-what(): <removed>
-Current state:
-[
-    {
-        "size": 125,
-        "stencil": "7pt",
-        "conversions": {
-            "coo-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "coo-ell": {
-                "completed": false,
-                "error": ""
-            }
-        }
-    }
-]
-Error when processing test case
-{
-    "size": 125,
-    "stencil": "7pt",
-    "conversions": {
-        "coo-csr": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "coo-ell": {
-            "completed": false,
-            "error": ""
-        },
-        "coo-sellp": {
-            "completed": false,
-            "error": ""
-        }
-    }
-}
-what(): <removed>
-Current state:
-[
-    {
-        "size": 125,
-        "stencil": "7pt",
-        "conversions": {
-            "coo-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "coo-ell": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-sellp": {
-                "completed": false,
-                "error": ""
-            }
-        }
-    }
-]
-Error when processing test case
-{
-    "size": 125,
-    "stencil": "7pt",
-    "conversions": {
-        "coo-csr": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "coo-ell": {
-            "completed": false,
-            "error": ""
-        },
-        "coo-sellp": {
-            "completed": false,
-            "error": ""
-        },
-        "coo-hybrid": {
-            "completed": false,
-            "error": ""
-        }
-    }
-}
-what(): <removed>
-Current state:
-[
-    {
-        "size": 125,
-        "stencil": "7pt",
-        "conversions": {
-            "coo-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "coo-ell": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-sellp": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-hybrid": {
-                "completed": false,
-                "error": ""
-            }
-        }
-    }
-]
-Current state:
-[
-    {
-        "size": 125,
-        "stencil": "7pt",
-        "conversions": {
-            "coo-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "coo-ell": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-sellp": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "csr-coo": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            }
-        }
-    }
-]
-Current state:
-[
-    {
-        "size": 125,
-        "stencil": "7pt",
-        "conversions": {
-            "coo-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "coo-ell": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-sellp": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "csr-coo": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-ell": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            }
-        }
-    }
-]
-Current state:
-[
-    {
-        "size": 125,
-        "stencil": "7pt",
-        "conversions": {
-            "coo-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "coo-ell": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-sellp": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "csr-coo": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-ell": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-sellp": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            }
-        }
-    }
-]
-Current state:
-[
-    {
-        "size": 125,
-        "stencil": "7pt",
-        "conversions": {
-            "coo-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "coo-ell": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-sellp": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "csr-coo": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-ell": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-sellp": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-hybrid": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            }
-        }
-    }
-]
-Error when processing test case
-{
-    "size": 125,
-    "stencil": "7pt",
-    "conversions": {
-        "coo-csr": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "coo-ell": {
-            "completed": false,
-            "error": ""
-        },
-        "coo-sellp": {
-            "completed": false,
-            "error": ""
-        },
-        "coo-hybrid": {
-            "completed": false,
-            "error": ""
-        },
-        "csr-coo": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "csr-ell": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "csr-sellp": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "csr-hybrid": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "ell-coo": {
-            "completed": false,
-            "error": ""
-        }
-    }
-}
-what(): <removed>
-Current state:
-[
-    {
-        "size": 125,
-        "stencil": "7pt",
-        "conversions": {
-            "coo-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "coo-ell": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-sellp": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "csr-coo": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-ell": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-sellp": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-hybrid": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "ell-coo": {
-                "completed": false,
-                "error": ""
-            }
-        }
-    }
-]
-Current state:
-[
-    {
-        "size": 125,
-        "stencil": "7pt",
-        "conversions": {
-            "coo-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "coo-ell": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-sellp": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "csr-coo": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-ell": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-sellp": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-hybrid": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "ell-coo": {
-                "completed": false,
-                "error": ""
-            },
-            "ell-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            }
-        }
-    }
-]
-Error when processing test case
-{
-    "size": 125,
-    "stencil": "7pt",
-    "conversions": {
-        "coo-csr": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "coo-ell": {
-            "completed": false,
-            "error": ""
-        },
-        "coo-sellp": {
-            "completed": false,
-            "error": ""
-        },
-        "coo-hybrid": {
-            "completed": false,
-            "error": ""
-        },
-        "csr-coo": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "csr-ell": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "csr-sellp": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "csr-hybrid": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "ell-coo": {
-            "completed": false,
-            "error": ""
-        },
-        "ell-csr": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "ell-sellp": {
-            "completed": false,
-            "error": ""
-        }
-    }
-}
-what(): <removed>
-Current state:
-[
-    {
-        "size": 125,
-        "stencil": "7pt",
-        "conversions": {
-            "coo-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "coo-ell": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-sellp": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "csr-coo": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-ell": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-sellp": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-hybrid": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "ell-coo": {
-                "completed": false,
-                "error": ""
-            },
-            "ell-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "ell-sellp": {
-                "completed": false,
-                "error": ""
-            }
-        }
-    }
-]
-Error when processing test case
-{
-    "size": 125,
-    "stencil": "7pt",
-    "conversions": {
-        "coo-csr": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "coo-ell": {
-            "completed": false,
-            "error": ""
-        },
-        "coo-sellp": {
-            "completed": false,
-            "error": ""
-        },
-        "coo-hybrid": {
-            "completed": false,
-            "error": ""
-        },
-        "csr-coo": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "csr-ell": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "csr-sellp": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "csr-hybrid": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "ell-coo": {
-            "completed": false,
-            "error": ""
-        },
-        "ell-csr": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "ell-sellp": {
-            "completed": false,
-            "error": ""
-        },
-        "ell-hybrid": {
-            "completed": false,
-            "error": ""
-        }
-    }
-}
-what(): <removed>
-Current state:
-[
-    {
-        "size": 125,
-        "stencil": "7pt",
-        "conversions": {
-            "coo-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "coo-ell": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-sellp": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "csr-coo": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-ell": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-sellp": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-hybrid": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "ell-coo": {
-                "completed": false,
-                "error": ""
-            },
-            "ell-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "ell-sellp": {
-                "completed": false,
-                "error": ""
-            },
-            "ell-hybrid": {
-                "completed": false,
-                "error": ""
-            }
-        }
-    }
-]
-Error when processing test case
-{
-    "size": 125,
-    "stencil": "7pt",
-    "conversions": {
-        "coo-csr": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "coo-ell": {
-            "completed": false,
-            "error": ""
-        },
-        "coo-sellp": {
-            "completed": false,
-            "error": ""
-        },
-        "coo-hybrid": {
-            "completed": false,
-            "error": ""
-        },
-        "csr-coo": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "csr-ell": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "csr-sellp": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "csr-hybrid": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "ell-coo": {
-            "completed": false,
-            "error": ""
-        },
-        "ell-csr": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "ell-sellp": {
-            "completed": false,
-            "error": ""
-        },
-        "ell-hybrid": {
-            "completed": false,
-            "error": ""
-        },
-        "sellp-coo": {
-            "completed": false,
-            "error": ""
-        }
-    }
-}
-what(): <removed>
-Current state:
-[
-    {
-        "size": 125,
-        "stencil": "7pt",
-        "conversions": {
-            "coo-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "coo-ell": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-sellp": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "csr-coo": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-ell": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-sellp": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-hybrid": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "ell-coo": {
-                "completed": false,
-                "error": ""
-            },
-            "ell-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "ell-sellp": {
-                "completed": false,
-                "error": ""
-            },
-            "ell-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "sellp-coo": {
-                "completed": false,
-                "error": ""
-            }
-        }
-    }
-]
-Current state:
-[
-    {
-        "size": 125,
-        "stencil": "7pt",
-        "conversions": {
-            "coo-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "coo-ell": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-sellp": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "csr-coo": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-ell": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-sellp": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-hybrid": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "ell-coo": {
-                "completed": false,
-                "error": ""
-            },
-            "ell-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "ell-sellp": {
-                "completed": false,
-                "error": ""
-            },
-            "ell-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "sellp-coo": {
-                "completed": false,
-                "error": ""
-            },
-            "sellp-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            }
-        }
-    }
-]
-Error when processing test case
-{
-    "size": 125,
-    "stencil": "7pt",
-    "conversions": {
-        "coo-csr": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "coo-ell": {
-            "completed": false,
-            "error": ""
-        },
-        "coo-sellp": {
-            "completed": false,
-            "error": ""
-        },
-        "coo-hybrid": {
-            "completed": false,
-            "error": ""
-        },
-        "csr-coo": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "csr-ell": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "csr-sellp": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "csr-hybrid": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "ell-coo": {
-            "completed": false,
-            "error": ""
-        },
-        "ell-csr": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "ell-sellp": {
-            "completed": false,
-            "error": ""
-        },
-        "ell-hybrid": {
-            "completed": false,
-            "error": ""
-        },
-        "sellp-coo": {
-            "completed": false,
-            "error": ""
-        },
-        "sellp-csr": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "sellp-ell": {
-            "completed": false,
-            "error": ""
-        }
-    }
-}
-what(): <removed>
-Current state:
-[
-    {
-        "size": 125,
-        "stencil": "7pt",
-        "conversions": {
-            "coo-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "coo-ell": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-sellp": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "csr-coo": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-ell": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-sellp": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-hybrid": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "ell-coo": {
-                "completed": false,
-                "error": ""
-            },
-            "ell-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "ell-sellp": {
-                "completed": false,
-                "error": ""
-            },
-            "ell-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "sellp-coo": {
-                "completed": false,
-                "error": ""
-            },
-            "sellp-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "sellp-ell": {
-                "completed": false,
-                "error": ""
-            }
-        }
-    }
-]
-Error when processing test case
-{
-    "size": 125,
-    "stencil": "7pt",
-    "conversions": {
-        "coo-csr": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "coo-ell": {
-            "completed": false,
-            "error": ""
-        },
-        "coo-sellp": {
-            "completed": false,
-            "error": ""
-        },
-        "coo-hybrid": {
-            "completed": false,
-            "error": ""
-        },
-        "csr-coo": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "csr-ell": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "csr-sellp": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "csr-hybrid": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "ell-coo": {
-            "completed": false,
-            "error": ""
-        },
-        "ell-csr": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "ell-sellp": {
-            "completed": false,
-            "error": ""
-        },
-        "ell-hybrid": {
-            "completed": false,
-            "error": ""
-        },
-        "sellp-coo": {
-            "completed": false,
-            "error": ""
-        },
-        "sellp-csr": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "sellp-ell": {
-            "completed": false,
-            "error": ""
-        },
-        "sellp-hybrid": {
-            "completed": false,
-            "error": ""
-        }
-    }
-}
-what(): <removed>
-Current state:
-[
-    {
-        "size": 125,
-        "stencil": "7pt",
-        "conversions": {
-            "coo-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "coo-ell": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-sellp": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "csr-coo": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-ell": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-sellp": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-hybrid": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "ell-coo": {
-                "completed": false,
-                "error": ""
-            },
-            "ell-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "ell-sellp": {
-                "completed": false,
-                "error": ""
-            },
-            "ell-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "sellp-coo": {
-                "completed": false,
-                "error": ""
-            },
-            "sellp-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "sellp-ell": {
-                "completed": false,
-                "error": ""
-            },
-            "sellp-hybrid": {
-                "completed": false,
-                "error": ""
-            }
-        }
-    }
-]
-Error when processing test case
-{
-    "size": 125,
-    "stencil": "7pt",
-    "conversions": {
-        "coo-csr": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "coo-ell": {
-            "completed": false,
-            "error": ""
-        },
-        "coo-sellp": {
-            "completed": false,
-            "error": ""
-        },
-        "coo-hybrid": {
-            "completed": false,
-            "error": ""
-        },
-        "csr-coo": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "csr-ell": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "csr-sellp": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "csr-hybrid": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "ell-coo": {
-            "completed": false,
-            "error": ""
-        },
-        "ell-csr": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "ell-sellp": {
-            "completed": false,
-            "error": ""
-        },
-        "ell-hybrid": {
-            "completed": false,
-            "error": ""
-        },
-        "sellp-coo": {
-            "completed": false,
-            "error": ""
-        },
-        "sellp-csr": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "sellp-ell": {
-            "completed": false,
-            "error": ""
-        },
-        "sellp-hybrid": {
-            "completed": false,
-            "error": ""
-        },
-        "hybrid-coo": {
-            "completed": false,
-            "error": ""
-        }
-    }
-}
-what(): <removed>
-Current state:
-[
-    {
-        "size": 125,
-        "stencil": "7pt",
-        "conversions": {
-            "coo-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "coo-ell": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-sellp": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "csr-coo": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-ell": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-sellp": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-hybrid": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "ell-coo": {
-                "completed": false,
-                "error": ""
-            },
-            "ell-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "ell-sellp": {
-                "completed": false,
-                "error": ""
-            },
-            "ell-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "sellp-coo": {
-                "completed": false,
-                "error": ""
-            },
-            "sellp-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "sellp-ell": {
-                "completed": false,
-                "error": ""
-            },
-            "sellp-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "hybrid-coo": {
-                "completed": false,
-                "error": ""
-            }
-        }
-    }
-]
-Current state:
-[
-    {
-        "size": 125,
-        "stencil": "7pt",
-        "conversions": {
-            "coo-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "coo-ell": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-sellp": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "csr-coo": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-ell": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-sellp": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-hybrid": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "ell-coo": {
-                "completed": false,
-                "error": ""
-            },
-            "ell-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "ell-sellp": {
-                "completed": false,
-                "error": ""
-            },
-            "ell-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "sellp-coo": {
-                "completed": false,
-                "error": ""
-            },
-            "sellp-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "sellp-ell": {
-                "completed": false,
-                "error": ""
-            },
-            "sellp-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "hybrid-coo": {
-                "completed": false,
-                "error": ""
-            },
-            "hybrid-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            }
-        }
-    }
-]
-Error when processing test case
-{
-    "size": 125,
-    "stencil": "7pt",
-    "conversions": {
-        "coo-csr": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "coo-ell": {
-            "completed": false,
-            "error": ""
-        },
-        "coo-sellp": {
-            "completed": false,
-            "error": ""
-        },
-        "coo-hybrid": {
-            "completed": false,
-            "error": ""
-        },
-        "csr-coo": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "csr-ell": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "csr-sellp": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "csr-hybrid": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "ell-coo": {
-            "completed": false,
-            "error": ""
-        },
-        "ell-csr": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "ell-sellp": {
-            "completed": false,
-            "error": ""
-        },
-        "ell-hybrid": {
-            "completed": false,
-            "error": ""
-        },
-        "sellp-coo": {
-            "completed": false,
-            "error": ""
-        },
-        "sellp-csr": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "sellp-ell": {
-            "completed": false,
-            "error": ""
-        },
-        "sellp-hybrid": {
-            "completed": false,
-            "error": ""
-        },
-        "hybrid-coo": {
-            "completed": false,
-            "error": ""
-        },
-        "hybrid-csr": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "hybrid-ell": {
-            "completed": false,
-            "error": ""
-        }
-    }
-}
-what(): <removed>
-Current state:
-[
-    {
-        "size": 125,
-        "stencil": "7pt",
-        "conversions": {
-            "coo-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "coo-ell": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-sellp": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "csr-coo": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-ell": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-sellp": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-hybrid": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "ell-coo": {
-                "completed": false,
-                "error": ""
-            },
-            "ell-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "ell-sellp": {
-                "completed": false,
-                "error": ""
-            },
-            "ell-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "sellp-coo": {
-                "completed": false,
-                "error": ""
-            },
-            "sellp-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "sellp-ell": {
-                "completed": false,
-                "error": ""
-            },
-            "sellp-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "hybrid-coo": {
-                "completed": false,
-                "error": ""
-            },
-            "hybrid-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "hybrid-ell": {
-                "completed": false,
-                "error": ""
-            }
-        }
-    }
-]
-Error when processing test case
-{
-    "size": 125,
-    "stencil": "7pt",
-    "conversions": {
-        "coo-csr": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "coo-ell": {
-            "completed": false,
-            "error": ""
-        },
-        "coo-sellp": {
-            "completed": false,
-            "error": ""
-        },
-        "coo-hybrid": {
-            "completed": false,
-            "error": ""
-        },
-        "csr-coo": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "csr-ell": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "csr-sellp": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "csr-hybrid": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "ell-coo": {
-            "completed": false,
-            "error": ""
-        },
-        "ell-csr": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "ell-sellp": {
-            "completed": false,
-            "error": ""
-        },
-        "ell-hybrid": {
-            "completed": false,
-            "error": ""
-        },
-        "sellp-coo": {
-            "completed": false,
-            "error": ""
-        },
-        "sellp-csr": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "sellp-ell": {
-            "completed": false,
-            "error": ""
-        },
-        "sellp-hybrid": {
-            "completed": false,
-            "error": ""
-        },
-        "hybrid-coo": {
-            "completed": false,
-            "error": ""
-        },
-        "hybrid-csr": {
-            "time": 1.0,
-            "repetitions": 10,
-            "completed": true
-        },
-        "hybrid-ell": {
-            "completed": false,
-            "error": ""
-        },
-        "hybrid-sellp": {
-            "completed": false,
-            "error": ""
-        }
-    }
-}
-what(): <removed>
-Current state:
-[
-    {
-        "size": 125,
-        "stencil": "7pt",
-        "conversions": {
-            "coo-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "coo-ell": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-sellp": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "csr-coo": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-ell": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-sellp": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-hybrid": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "ell-coo": {
-                "completed": false,
-                "error": ""
-            },
-            "ell-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "ell-sellp": {
-                "completed": false,
-                "error": ""
-            },
-            "ell-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "sellp-coo": {
-                "completed": false,
-                "error": ""
-            },
-            "sellp-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "sellp-ell": {
-                "completed": false,
-                "error": ""
-            },
-            "sellp-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "hybrid-coo": {
-                "completed": false,
-                "error": ""
-            },
-            "hybrid-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "hybrid-ell": {
-                "completed": false,
-                "error": ""
-            },
-            "hybrid-sellp": {
-                "completed": false,
-                "error": ""
-            }
-        }
-    }
-]
+    "conversion": {}
+}
+Matrix is of size (125, 125), 725
+	Running conversion: coo-read
+	Running conversion: coo-csr
+	Running conversion: csr-read
+	Running conversion: csr-coo
+	Running conversion: csr-ell
+	Running conversion: csr-sellp
+	Running conversion: csr-hybrid
+	Running conversion: ell-read
+	Running conversion: ell-csr
+	Running conversion: sellp-read
+	Running conversion: sellp-csr
+	Running conversion: hybrid-read
+	Running conversion: hybrid-csr
diff --git a/benchmark/test/reference/conversion.all.stdout b/benchmark/test/reference/conversion.all.stdout
index cb53bb81a6c..c4b657a42c4 100644
--- a/benchmark/test/reference/conversion.all.stdout
+++ b/benchmark/test/reference/conversion.all.stdout
@@ -1,25 +1,23 @@
 
 [
     {
-        "size": 125,
+        "size": 100,
         "stencil": "7pt",
-        "conversions": {
-            "coo-csr": {
+        "conversion": {
+            "coo-read": {
                 "time": 1.0,
                 "repetitions": 10,
                 "completed": true
             },
-            "coo-ell": {
-                "completed": false,
-                "error": ""
-            },
-            "coo-sellp": {
-                "completed": false,
-                "error": ""
+            "coo-csr": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
             },
-            "coo-hybrid": {
-                "completed": false,
-                "error": ""
+            "csr-read": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
             },
             "csr-coo": {
                 "time": 1.0,
@@ -41,57 +39,39 @@
                 "repetitions": 10,
                 "completed": true
             },
-            "ell-coo": {
-                "completed": false,
-                "error": ""
+            "ell-read": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
             },
             "ell-csr": {
                 "time": 1.0,
                 "repetitions": 10,
                 "completed": true
             },
-            "ell-sellp": {
-                "completed": false,
-                "error": ""
-            },
-            "ell-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "sellp-coo": {
-                "completed": false,
-                "error": ""
+            "sellp-read": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
             },
             "sellp-csr": {
                 "time": 1.0,
                 "repetitions": 10,
                 "completed": true
             },
-            "sellp-ell": {
-                "completed": false,
-                "error": ""
-            },
-            "sellp-hybrid": {
-                "completed": false,
-                "error": ""
-            },
-            "hybrid-coo": {
-                "completed": false,
-                "error": ""
+            "hybrid-read": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
             },
             "hybrid-csr": {
                 "time": 1.0,
                 "repetitions": 10,
                 "completed": true
-            },
-            "hybrid-ell": {
-                "completed": false,
-                "error": ""
-            },
-            "hybrid-sellp": {
-                "completed": false,
-                "error": ""
             }
-        }
+        },
+        "rows": 125,
+        "cols": 125,
+        "nonzeros": 725
     }
 ]
diff --git a/benchmark/test/reference/conversion.matrix.stderr b/benchmark/test/reference/conversion.matrix.stderr
index 1d604175479..369a363a53e 100644
--- a/benchmark/test/reference/conversion.matrix.stderr
+++ b/benchmark/test/reference/conversion.matrix.stderr
@@ -4,43 +4,13 @@ Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
 The formats are coo,csr
-Benchmarking conversions. 
 Running test case
 {
     "filename": "",
-    "conversions": {}
+    "conversion": {}
 }
-Matrix is of size (36, 36)
-Current state:
-[
-    {
-        "filename": "",
-        "conversions": {
-            "coo-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            }
-        },
-        "size": 36
-    }
-]
-Current state:
-[
-    {
-        "filename": "",
-        "conversions": {
-            "coo-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-coo": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            }
-        },
-        "size": 36
-    }
-]
+Matrix is of size (36, 36), 208
+	Running conversion: coo-read
+	Running conversion: coo-csr
+	Running conversion: csr-read
+	Running conversion: csr-coo
diff --git a/benchmark/test/reference/conversion.matrix.stdout b/benchmark/test/reference/conversion.matrix.stdout
index e43edda0595..7e537fa4919 100644
--- a/benchmark/test/reference/conversion.matrix.stdout
+++ b/benchmark/test/reference/conversion.matrix.stdout
@@ -2,18 +2,30 @@
 [
     {
         "filename": "",
-        "conversions": {
+        "conversion": {
+            "coo-read": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
             "coo-csr": {
                 "time": 1.0,
                 "repetitions": 10,
                 "completed": true
             },
+            "csr-read": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
             "csr-coo": {
                 "time": 1.0,
                 "repetitions": 10,
                 "completed": true
             }
         },
-        "size": 36
+        "rows": 36,
+        "cols": 36,
+        "nonzeros": 208
     }
 ]
diff --git a/benchmark/test/reference/conversion.profile.stderr b/benchmark/test/reference/conversion.profile.stderr
index 6733472be8f..089e6be02f9 100644
--- a/benchmark/test/reference/conversion.profile.stderr
+++ b/benchmark/test/reference/conversion.profile.stderr
@@ -4,15 +4,16 @@ Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
 The formats are coo,csr
-Benchmarking conversions. 
 Running test case
 {
     "size": 100,
     "stencil": "7pt",
-    "conversions": {}
+    "conversion": {}
 }
-Matrix is of size (125, 125)
-DEBUG: begin stencil(125,7pt)
+Matrix is of size (125, 125), 725
+DEBUG: begin stencil(100,7pt)
+	Running conversion: coo-read
+DEBUG: begin coo-read
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
@@ -21,13 +22,17 @@ DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin components::aos_to_soa
 DEBUG: end   components::aos_to_soa
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   coo-read
+	Running conversion: coo-csr
 DEBUG: begin coo-csr
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin components::fill_array
-DEBUG: end   components::fill_array
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
@@ -36,12 +41,8 @@ DEBUG: begin components::aos_to_soa
 DEBUG: end   components::aos_to_soa
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin components::convert_idxs_to_ptrs
-DEBUG: end   components::convert_idxs_to_ptrs
-DEBUG: begin free
-DEBUG: end   free
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
 DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
@@ -49,14 +50,10 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin components::convert_idxs_to_ptrs
@@ -68,27 +65,15 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: end   coo-csr
-Current state:
-[
-    {
-        "size": 125,
-        "stencil": "7pt",
-        "conversions": {
-            "coo-csr": {
-                "time": 1.0,
-                "repetitions": 1,
-                "completed": true
-            }
-        }
-    }
-]
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
+DEBUG: end   coo-csr
+	Running conversion: csr-read
+DEBUG: begin csr-read
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin components::fill_array
@@ -109,32 +94,46 @@ DEBUG: begin components::convert_idxs_to_ptrs
 DEBUG: end   components::convert_idxs_to_ptrs
 DEBUG: begin free
 DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   csr-read
+	Running conversion: csr-coo
 DEBUG: begin csr-coo
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin allocate
+DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin components::aos_to_soa
 DEBUG: end   components::aos_to_soa
-DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
 DEBUG: end   free
+DEBUG: begin components::convert_idxs_to_ptrs
+DEBUG: end   components::convert_idxs_to_ptrs
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy(<typename>)
+DEBUG: begin allocate
+DEBUG: end   allocate
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin components::convert_ptrs_to_idxs
 DEBUG: end   components::convert_ptrs_to_idxs
 DEBUG: end   copy(<typename>)
@@ -144,30 +143,11 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: end   csr-coo
-Current state:
-[
-    {
-        "size": 125,
-        "stencil": "7pt",
-        "conversions": {
-            "coo-csr": {
-                "time": 1.0,
-                "repetitions": 1,
-                "completed": true
-            },
-            "csr-coo": {
-                "time": 1.0,
-                "repetitions": 1,
-                "completed": true
-            }
-        }
-    }
-]
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: end   stencil(125,7pt)
+DEBUG: end   csr-coo
+DEBUG: end   stencil(100,7pt)
diff --git a/benchmark/test/reference/conversion.profile.stdout b/benchmark/test/reference/conversion.profile.stdout
index 3e76bc26934..b29815f6c17 100644
--- a/benchmark/test/reference/conversion.profile.stdout
+++ b/benchmark/test/reference/conversion.profile.stdout
@@ -1,19 +1,32 @@
 
 [
     {
-        "size": 125,
+        "size": 100,
         "stencil": "7pt",
-        "conversions": {
+        "conversion": {
+            "coo-read": {
+                "time": 1.0,
+                "repetitions": 1,
+                "completed": true
+            },
             "coo-csr": {
                 "time": 1.0,
                 "repetitions": 1,
                 "completed": true
             },
+            "csr-read": {
+                "time": 1.0,
+                "repetitions": 1,
+                "completed": true
+            },
             "csr-coo": {
                 "time": 1.0,
                 "repetitions": 1,
                 "completed": true
             }
-        }
+        },
+        "rows": 125,
+        "cols": 125,
+        "nonzeros": 725
     }
 ]
diff --git a/benchmark/test/reference/conversion.simple.stderr b/benchmark/test/reference/conversion.simple.stderr
index d221ead12a4..a814dba6888 100644
--- a/benchmark/test/reference/conversion.simple.stderr
+++ b/benchmark/test/reference/conversion.simple.stderr
@@ -4,44 +4,14 @@ Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
 The formats are coo,csr
-Benchmarking conversions. 
 Running test case
 {
     "size": 100,
     "stencil": "7pt",
-    "conversions": {}
+    "conversion": {}
 }
-Matrix is of size (125, 125)
-Current state:
-[
-    {
-        "size": 125,
-        "stencil": "7pt",
-        "conversions": {
-            "coo-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            }
-        }
-    }
-]
-Current state:
-[
-    {
-        "size": 125,
-        "stencil": "7pt",
-        "conversions": {
-            "coo-csr": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            },
-            "csr-coo": {
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            }
-        }
-    }
-]
+Matrix is of size (125, 125), 725
+	Running conversion: coo-read
+	Running conversion: coo-csr
+	Running conversion: csr-read
+	Running conversion: csr-coo
diff --git a/benchmark/test/reference/conversion.simple.stdout b/benchmark/test/reference/conversion.simple.stdout
index 9ecdd46f5e1..856f1330eea 100644
--- a/benchmark/test/reference/conversion.simple.stdout
+++ b/benchmark/test/reference/conversion.simple.stdout
@@ -1,19 +1,32 @@
 
 [
     {
-        "size": 125,
+        "size": 100,
         "stencil": "7pt",
-        "conversions": {
+        "conversion": {
+            "coo-read": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
             "coo-csr": {
                 "time": 1.0,
                 "repetitions": 10,
                 "completed": true
             },
+            "csr-read": {
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
             "csr-coo": {
                 "time": 1.0,
                 "repetitions": 10,
                 "completed": true
             }
-        }
+        },
+        "rows": 125,
+        "cols": 125,
+        "nonzeros": 725
     }
 ]
diff --git a/benchmark/test/reference/distributed_solver.matrix.stdout b/benchmark/test/reference/distributed_solver.matrix.stdout
index 34fdda13e55..cd3c7b8bd43 100644
--- a/benchmark/test/reference/distributed_solver.matrix.stdout
+++ b/benchmark/test/reference/distributed_solver.matrix.stdout
@@ -52,6 +52,7 @@
                 "completed": true
             }
         },
-        "size": 36
+        "rows": 36,
+        "cols": 36
     }
 ]
diff --git a/benchmark/test/reference/distributed_solver.profile.stderr b/benchmark/test/reference/distributed_solver.profile.stderr
index efd79f66dc5..e583a1411a8 100644
--- a/benchmark/test/reference/distributed_solver.profile.stderr
+++ b/benchmark/test/reference/distributed_solver.profile.stderr
@@ -5,7 +5,6 @@ Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
 Running cg with 1000 iterations and residual goal of 1.000000e-06
 The number of right hand sides is 1
-DEBUG: begin stencil(100,7pt,stencil)
 Running test case
 {
     "size": 100,
@@ -213,9 +212,9 @@ DEBUG: begin dense::copy
 DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
 Matrix is of size (125, 125)
-DEBUG: begin cg
+DEBUG: begin stencil(100,7pt,stencil)
 	Running solver: cg
-DEBUG: begin none
+DEBUG: begin cg
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::compute_squared_norm2
@@ -670,8 +669,8 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: end   none
 DEBUG: end   cg
+DEBUG: end   stencil(100,7pt,stencil)
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
@@ -686,4 +685,3 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: end   stencil(100,7pt,stencil)
diff --git a/benchmark/test/reference/distributed_solver.profile.stdout b/benchmark/test/reference/distributed_solver.profile.stdout
index c61541a5d5b..aef92652256 100644
--- a/benchmark/test/reference/distributed_solver.profile.stdout
+++ b/benchmark/test/reference/distributed_solver.profile.stdout
@@ -1,7 +1,7 @@
 
 [
     {
-        "size": 125,
+        "size": 100,
         "stencil": "7pt",
         "comm_pattern": "stencil",
         "optimal": {
@@ -27,6 +27,8 @@
                 "repetitions": 1,
                 "completed": true
             }
-        }
+        },
+        "rows": 125,
+        "cols": 125
     }
 ]
diff --git a/benchmark/test/reference/distributed_solver.simple.stdout b/benchmark/test/reference/distributed_solver.simple.stdout
index 54d7233ba77..002b9d91347 100644
--- a/benchmark/test/reference/distributed_solver.simple.stdout
+++ b/benchmark/test/reference/distributed_solver.simple.stdout
@@ -1,7 +1,7 @@
 
 [
     {
-        "size": 125,
+        "size": 100,
         "stencil": "7pt",
         "comm_pattern": "stencil",
         "optimal": {
@@ -53,6 +53,8 @@
                 "repetitions": 1,
                 "completed": true
             }
-        }
+        },
+        "rows": 125,
+        "cols": 125
     }
 ]
diff --git a/benchmark/test/reference/matrix_statistics.matrix.stderr b/benchmark/test/reference/matrix_statistics.matrix.stderr
index af205c778c0..7bb33842f25 100644
--- a/benchmark/test/reference/matrix_statistics.matrix.stderr
+++ b/benchmark/test/reference/matrix_statistics.matrix.stderr
@@ -5,4 +5,4 @@ Running test case
     "filename": "",
     "problem": {}
 }
-Matrix is of size (36, 36)
+Matrix is of size (36, 36), 208
diff --git a/benchmark/test/reference/matrix_statistics.matrix.stdout b/benchmark/test/reference/matrix_statistics.matrix.stdout
index a056241669b..ea73587fde4 100644
--- a/benchmark/test/reference/matrix_statistics.matrix.stdout
+++ b/benchmark/test/reference/matrix_statistics.matrix.stdout
@@ -33,6 +33,8 @@
                 "hyperflatness": 6.0545648993883665
             }
         },
-        "size": 36
+        "rows": 36,
+        "cols": 36,
+        "nonzeros": 208
     }
 ]
diff --git a/benchmark/test/reference/matrix_statistics.simple.stderr b/benchmark/test/reference/matrix_statistics.simple.stderr
index 6b853c3f4ea..75a7cca709f 100644
--- a/benchmark/test/reference/matrix_statistics.simple.stderr
+++ b/benchmark/test/reference/matrix_statistics.simple.stderr
@@ -6,4 +6,4 @@ Running test case
     "stencil": "7pt",
     "problem": {}
 }
-Matrix is of size (125, 125)
+Matrix is of size (125, 125), 725
diff --git a/benchmark/test/reference/matrix_statistics.simple.stdout b/benchmark/test/reference/matrix_statistics.simple.stdout
index 4470784e7c5..13746ce8a46 100644
--- a/benchmark/test/reference/matrix_statistics.simple.stdout
+++ b/benchmark/test/reference/matrix_statistics.simple.stdout
@@ -1,7 +1,7 @@
 
 [
     {
-        "size": 125,
+        "size": 100,
         "stencil": "7pt",
         "problem": {
             "rows": 125,
@@ -33,6 +33,9 @@
                 "hyperskewness": -1.741577812922432,
                 "hyperflatness": 7.762345679012379
             }
-        }
+        },
+        "rows": 125,
+        "cols": 125,
+        "nonzeros": 725
     }
 ]
diff --git a/benchmark/test/reference/preconditioner.matrix.stderr b/benchmark/test/reference/preconditioner.matrix.stderr
index c9ef583d79e..4088a20c925 100644
--- a/benchmark/test/reference/preconditioner.matrix.stderr
+++ b/benchmark/test/reference/preconditioner.matrix.stderr
@@ -9,34 +9,5 @@ Running test case
     "filename": "",
     "preconditioner": {}
 }
-Matrix is of size (36, 36)
-Current state:
-[
-    {
-        "filename": "",
-        "preconditioner": {
-            "none": {
-                "generate": {
-                    "components": {
-                        "generate(<typename>)": 1.0,
-                        "overhead": 1.0
-                    },
-                    "time": 1.0,
-                    "repetitions": 10
-                },
-                "apply": {
-                    "components": {
-                        "apply(<typename>)": 1.0,
-                        "copy(<typename>)": 1.0,
-                        "dense::copy": 1.0,
-                        "overhead": 1.0
-                    },
-                    "time": 1.0,
-                    "repetitions": 10
-                },
-                "completed": true
-            }
-        },
-        "size": 36
-    }
-]
+Matrix is of size (36, 36), 208
+	Running preconditioner: none
diff --git a/benchmark/test/reference/preconditioner.matrix.stdout b/benchmark/test/reference/preconditioner.matrix.stdout
index 77979f4c54b..0415a87ea8d 100644
--- a/benchmark/test/reference/preconditioner.matrix.stdout
+++ b/benchmark/test/reference/preconditioner.matrix.stdout
@@ -25,6 +25,8 @@
                 "completed": true
             }
         },
-        "size": 36
+        "rows": 36,
+        "cols": 36,
+        "nonzeros": 208
     }
 ]
diff --git a/benchmark/test/reference/preconditioner.profile.stderr b/benchmark/test/reference/preconditioner.profile.stderr
index 5b47bc9bd94..c215b22c925 100644
--- a/benchmark/test/reference/preconditioner.profile.stderr
+++ b/benchmark/test/reference/preconditioner.profile.stderr
@@ -10,7 +10,6 @@ Running test case
     "stencil": "7pt",
     "preconditioner": {}
 }
-DEBUG: begin stencil(100,7pt)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin components::fill_array
@@ -59,7 +58,9 @@ DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin dense::fill_in_matrix_data
 DEBUG: end   dense::fill_in_matrix_data
-Matrix is of size (125, 125)
+Matrix is of size (125, 125), 725
+DEBUG: begin stencil(100,7pt)
+	Running preconditioner: none
 DEBUG: begin none
 DEBUG: begin copy(<typename>)
 DEBUG: begin allocate
@@ -78,28 +79,7 @@ DEBUG: end   apply(<typename>)
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: end   none
-Current state:
-[
-    {
-        "size": 125,
-        "stencil": "7pt",
-        "preconditioner": {
-            "none": {
-                "generate": {
-                    "components": {},
-                    "time": 1.0,
-                    "repetitions": 1
-                },
-                "apply": {
-                    "components": {},
-                    "time": 1.0,
-                    "repetitions": 1
-                },
-                "completed": true
-            }
-        }
-    }
-]
+DEBUG: end   stencil(100,7pt)
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
@@ -110,4 +90,3 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: end   stencil(100,7pt)
diff --git a/benchmark/test/reference/preconditioner.profile.stdout b/benchmark/test/reference/preconditioner.profile.stdout
index cc73c4c4552..f53407d818d 100644
--- a/benchmark/test/reference/preconditioner.profile.stdout
+++ b/benchmark/test/reference/preconditioner.profile.stdout
@@ -1,7 +1,7 @@
 
 [
     {
-        "size": 125,
+        "size": 100,
         "stencil": "7pt",
         "preconditioner": {
             "none": {
@@ -17,6 +17,9 @@
                 },
                 "completed": true
             }
-        }
+        },
+        "rows": 125,
+        "cols": 125,
+        "nonzeros": 725
     }
 ]
diff --git a/benchmark/test/reference/preconditioner.simple.stderr b/benchmark/test/reference/preconditioner.simple.stderr
index d480d4fedbd..07d2cca6704 100644
--- a/benchmark/test/reference/preconditioner.simple.stderr
+++ b/benchmark/test/reference/preconditioner.simple.stderr
@@ -10,34 +10,5 @@ Running test case
     "stencil": "7pt",
     "preconditioner": {}
 }
-Matrix is of size (125, 125)
-Current state:
-[
-    {
-        "size": 125,
-        "stencil": "7pt",
-        "preconditioner": {
-            "none": {
-                "generate": {
-                    "components": {
-                        "generate(<typename>)": 1.0,
-                        "overhead": 1.0
-                    },
-                    "time": 1.0,
-                    "repetitions": 10
-                },
-                "apply": {
-                    "components": {
-                        "apply(<typename>)": 1.0,
-                        "copy(<typename>)": 1.0,
-                        "dense::copy": 1.0,
-                        "overhead": 1.0
-                    },
-                    "time": 1.0,
-                    "repetitions": 10
-                },
-                "completed": true
-            }
-        }
-    }
-]
+Matrix is of size (125, 125), 725
+	Running preconditioner: none
diff --git a/benchmark/test/reference/preconditioner.simple.stdout b/benchmark/test/reference/preconditioner.simple.stdout
index c47146a72e1..92bb51ddb57 100644
--- a/benchmark/test/reference/preconditioner.simple.stdout
+++ b/benchmark/test/reference/preconditioner.simple.stdout
@@ -1,7 +1,7 @@
 
 [
     {
-        "size": 125,
+        "size": 100,
         "stencil": "7pt",
         "preconditioner": {
             "none": {
@@ -25,6 +25,9 @@
                 },
                 "completed": true
             }
-        }
+        },
+        "rows": 125,
+        "cols": 125,
+        "nonzeros": 725
     }
 ]
diff --git a/benchmark/test/reference/solver.matrix.stdout b/benchmark/test/reference/solver.matrix.stdout
index 6a1f8ceb959..56577288c2d 100644
--- a/benchmark/test/reference/solver.matrix.stdout
+++ b/benchmark/test/reference/solver.matrix.stdout
@@ -50,6 +50,7 @@
                 "completed": true
             }
         },
-        "size": 36
+        "rows": 36,
+        "cols": 36
     }
 ]
diff --git a/benchmark/test/reference/solver.profile.stderr b/benchmark/test/reference/solver.profile.stderr
index 65b7560d936..0c3f7060796 100644
--- a/benchmark/test/reference/solver.profile.stderr
+++ b/benchmark/test/reference/solver.profile.stderr
@@ -5,7 +5,6 @@ Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
 Running cg with 1000 iterations and residual goal of 1.000000e-06
 The number of right hand sides is 1
-DEBUG: begin stencil(100,7pt)
 Running test case
 {
     "size": 100,
@@ -62,9 +61,9 @@ DEBUG: begin dense::copy
 DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
 Matrix is of size (125, 125)
-DEBUG: begin cg
+DEBUG: begin stencil(100,7pt)
 	Running solver: cg
-DEBUG: begin none
+DEBUG: begin cg
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin dense::compute_norm2_dispatch
@@ -425,8 +424,8 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: end   none
 DEBUG: end   cg
+DEBUG: end   stencil(100,7pt)
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
@@ -437,4 +436,3 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: end   stencil(100,7pt)
diff --git a/benchmark/test/reference/solver.profile.stdout b/benchmark/test/reference/solver.profile.stdout
index 128a8a1f169..0148e6ef092 100644
--- a/benchmark/test/reference/solver.profile.stdout
+++ b/benchmark/test/reference/solver.profile.stdout
@@ -1,7 +1,7 @@
 
 [
     {
-        "size": 125,
+        "size": 100,
         "stencil": "7pt",
         "optimal": {
             "spmv": "csr"
@@ -26,6 +26,8 @@
                 "repetitions": 1,
                 "completed": true
             }
-        }
+        },
+        "rows": 125,
+        "cols": 125
     }
 ]
diff --git a/benchmark/test/reference/solver.simple.stdout b/benchmark/test/reference/solver.simple.stdout
index c6055339d67..b4e7b56b2bf 100644
--- a/benchmark/test/reference/solver.simple.stdout
+++ b/benchmark/test/reference/solver.simple.stdout
@@ -1,7 +1,7 @@
 
 [
     {
-        "size": 125,
+        "size": 100,
         "stencil": "7pt",
         "optimal": {
             "spmv": "csr"
@@ -50,6 +50,8 @@
                 "repetitions": 1,
                 "completed": true
             }
-        }
+        },
+        "rows": 125,
+        "cols": 125
     }
 ]
diff --git a/benchmark/test/reference/sparse_blas.matrix.stderr b/benchmark/test/reference/sparse_blas.matrix.stderr
index 5001c604e72..ff52b6a3269 100644
--- a/benchmark/test/reference/sparse_blas.matrix.stderr
+++ b/benchmark/test/reference/sparse_blas.matrix.stderr
@@ -3,34 +3,11 @@ This is Ginkgo 1.7.0 (develop)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
-The operations are transposeRunning test case
+The operations are transpose
+Running test case
 {
     "filename": "",
     "sparse_blas": {}
 }
 Matrix is of size (36, 36), 208
-Current state:
-[
-    {
-        "filename": "",
-        "sparse_blas": {
-            "transpose": {
-                "time": 1.0,
-                "flops": 1.0,
-                "bandwidth": 1.0,
-                "repetitions": 10,
-                "components": {
-                    "allocate": 1.0,
-                    "components::fill_array": 1.0,
-                    "csr::transpose": 1.0,
-                    "free": 1.0,
-                    "overhead": 1.0
-                },
-                "completed": true
-            }
-        },
-        "rows": 36,
-        "cols": 36,
-        "nonzeros": 208
-    }
-]
+	Running sparse_blas: transpose
diff --git a/benchmark/test/reference/sparse_blas.profile.stderr b/benchmark/test/reference/sparse_blas.profile.stderr
index d05f5117b8e..d1434dad146 100644
--- a/benchmark/test/reference/sparse_blas.profile.stderr
+++ b/benchmark/test/reference/sparse_blas.profile.stderr
@@ -3,7 +3,8 @@ This is Ginkgo 1.7.0 (develop)
 Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
-The operations are transposeRunning test case
+The operations are transpose
+Running test case
 {
     "size": 100,
     "stencil": "7pt",
@@ -35,6 +36,7 @@ DEBUG: end   components::convert_idxs_to_ptrs
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin stencil(100,7pt)
+	Running sparse_blas: transpose
 DEBUG: begin transpose
 DEBUG: begin allocate
 DEBUG: end   allocate
@@ -53,25 +55,6 @@ DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: end   transpose
-Current state:
-[
-    {
-        "size": 100,
-        "stencil": "7pt",
-        "sparse_blas": {
-            "transpose": {
-                "time": 1.0,
-                "flops": 1.0,
-                "bandwidth": 1.0,
-                "repetitions": 1,
-                "completed": true
-            }
-        },
-        "rows": 125,
-        "cols": 125,
-        "nonzeros": 725
-    }
-]
 DEBUG: end   stencil(100,7pt)
 DEBUG: begin free
 DEBUG: end   free
diff --git a/benchmark/test/reference/sparse_blas.simple.stderr b/benchmark/test/reference/sparse_blas.simple.stderr
index bf5001f67b7..452374a9268 100644
--- a/benchmark/test/reference/sparse_blas.simple.stderr
+++ b/benchmark/test/reference/sparse_blas.simple.stderr
@@ -3,36 +3,12 @@ This is Ginkgo 1.7.0 (develop)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
-The operations are transposeRunning test case
+The operations are transpose
+Running test case
 {
     "size": 100,
     "stencil": "7pt",
     "sparse_blas": {}
 }
 Matrix is of size (125, 125), 725
-Current state:
-[
-    {
-        "size": 100,
-        "stencil": "7pt",
-        "sparse_blas": {
-            "transpose": {
-                "time": 1.0,
-                "flops": 1.0,
-                "bandwidth": 1.0,
-                "repetitions": 10,
-                "components": {
-                    "allocate": 1.0,
-                    "components::fill_array": 1.0,
-                    "csr::transpose": 1.0,
-                    "free": 1.0,
-                    "overhead": 1.0
-                },
-                "completed": true
-            }
-        },
-        "rows": 125,
-        "cols": 125,
-        "nonzeros": 725
-    }
-]
+	Running sparse_blas: transpose
diff --git a/benchmark/test/reference/spmv.matrix.stderr b/benchmark/test/reference/spmv.matrix.stderr
index 8d942cd0de5..a618da5b321 100644
--- a/benchmark/test/reference/spmv.matrix.stderr
+++ b/benchmark/test/reference/spmv.matrix.stderr
@@ -10,22 +10,5 @@ Running test case
     "filename": "",
     "spmv": {}
 }
-Matrix is of size (36, 36)
-Current state:
-[
-    {
-        "filename": "",
-        "spmv": {
-            "coo": {
-                "storage": 3328,
-                "max_relative_norm2": 1.0,
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            }
-        },
-        "size": 36,
-        "nnz": 208,
-        "optimal": {}
-    }
-]
+Matrix is of size (36, 36), 208
+	Running spmv: coo
diff --git a/benchmark/test/reference/spmv.matrix.stdout b/benchmark/test/reference/spmv.matrix.stdout
index 47035c27549..dc30ab6b284 100644
--- a/benchmark/test/reference/spmv.matrix.stdout
+++ b/benchmark/test/reference/spmv.matrix.stdout
@@ -11,8 +11,9 @@
                 "completed": true
             }
         },
-        "size": 36,
-        "nnz": 208,
+        "rows": 36,
+        "cols": 36,
+        "nonzeros": 208,
         "optimal": {
             "spmv": "coo"
         }
diff --git a/benchmark/test/reference/spmv.profile.stderr b/benchmark/test/reference/spmv.profile.stderr
index 961ac587990..09a10b725ea 100644
--- a/benchmark/test/reference/spmv.profile.stderr
+++ b/benchmark/test/reference/spmv.profile.stderr
@@ -11,7 +11,6 @@ Running test case
     "stencil": "7pt",
     "spmv": {}
 }
-DEBUG: begin stencil(100,7pt)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
@@ -52,13 +51,9 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-Matrix is of size (125, 125)
-DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin dense::copy
-DEBUG: end   dense::copy
-DEBUG: end   copy(<typename>)
+Matrix is of size (125, 125), 725
+DEBUG: begin stencil(100,7pt)
+	Running spmv: coo
 DEBUG: begin coo
 DEBUG: begin allocate
 DEBUG: end   allocate
@@ -87,27 +82,8 @@ DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: end   coo
-Current state:
-[
-    {
-        "size": 125,
-        "stencil": "7pt",
-        "spmv": {
-            "coo": {
-                "storage": 11600,
-                "time": 1.0,
-                "repetitions": 1,
-                "completed": true
-            }
-        },
-        "nnz": 725,
-        "optimal": {}
-    }
-]
-DEBUG: begin free
-DEBUG: end   free
+DEBUG: end   stencil(100,7pt)
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
 DEBUG: end   free
-DEBUG: end   stencil(100,7pt)
diff --git a/benchmark/test/reference/spmv.profile.stdout b/benchmark/test/reference/spmv.profile.stdout
index dacc490ddf0..5302d54f9f0 100644
--- a/benchmark/test/reference/spmv.profile.stdout
+++ b/benchmark/test/reference/spmv.profile.stdout
@@ -1,7 +1,7 @@
 
 [
     {
-        "size": 125,
+        "size": 100,
         "stencil": "7pt",
         "spmv": {
             "coo": {
@@ -11,7 +11,9 @@
                 "completed": true
             }
         },
-        "nnz": 725,
+        "rows": 125,
+        "cols": 125,
+        "nonzeros": 725,
         "optimal": {
             "spmv": "coo"
         }
diff --git a/benchmark/test/reference/spmv.simple.stderr b/benchmark/test/reference/spmv.simple.stderr
index dc9933b40ec..a910512ff31 100644
--- a/benchmark/test/reference/spmv.simple.stderr
+++ b/benchmark/test/reference/spmv.simple.stderr
@@ -11,22 +11,5 @@ Running test case
     "stencil": "7pt",
     "spmv": {}
 }
-Matrix is of size (125, 125)
-Current state:
-[
-    {
-        "size": 125,
-        "stencil": "7pt",
-        "spmv": {
-            "coo": {
-                "storage": 11600,
-                "max_relative_norm2": 1.0,
-                "time": 1.0,
-                "repetitions": 10,
-                "completed": true
-            }
-        },
-        "nnz": 725,
-        "optimal": {}
-    }
-]
+Matrix is of size (125, 125), 725
+	Running spmv: coo
diff --git a/benchmark/test/reference/spmv.simple.stdout b/benchmark/test/reference/spmv.simple.stdout
index 90f8903a452..737938d7c96 100644
--- a/benchmark/test/reference/spmv.simple.stdout
+++ b/benchmark/test/reference/spmv.simple.stdout
@@ -1,7 +1,7 @@
 
 [
     {
-        "size": 125,
+        "size": 100,
         "stencil": "7pt",
         "spmv": {
             "coo": {
@@ -12,7 +12,9 @@
                 "completed": true
             }
         },
-        "nnz": 725,
+        "rows": 125,
+        "cols": 125,
+        "nonzeros": 725,
         "optimal": {
             "spmv": "coo"
         }
diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp
index b7ec0e72cf1..41acb560ba1 100644
--- a/benchmark/utils/general.hpp
+++ b/benchmark/utils/general.hpp
@@ -41,6 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <array>
 #include <fstream>
 #include <functional>
+#include <iomanip>
 #include <map>
 #include <ostream>
 #include <random>
@@ -53,10 +54,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <gflags/gflags.h>
-#include <rapidjson/document.h>
-#include <rapidjson/istreamwrapper.h>
-#include <rapidjson/ostreamwrapper.h>
-#include <rapidjson/prettywriter.h>
 
 
 #include <ginkgo/core/base/memory.hpp>
@@ -100,10 +97,6 @@ DEFINE_string(
 DEFINE_bool(detailed, true,
             "If set, performs several runs to obtain more detailed results");
 
-DEFINE_bool(keep_errors, true,
-            "If set, writes exception messages during the execution into the "
-            "JSON output");
-
 DEFINE_bool(nested_names, false, "If set, separately logs nested operations");
 
 DEFINE_bool(profile, false,
@@ -157,27 +150,32 @@ std::unique_ptr<std::istream> input_stream;
  * @param format  the format of the benchmark input data
  */
 void initialize_argument_parsing(int* argc, char** argv[], std::string& header,
-                                 std::string& format)
+                                 std::string& format, bool do_print = true)
 {
-    std::ostringstream doc;
-    doc << header << "Usage: " << (*argv)[0] << " [options]\n"
-        << format
-        << "  The results are written on standard output, in the same "
-           "format,\n"
-        << "  but with test cases extended to include an additional member "
-           "\n"
-        << "  object for each benchmark run.\n"
-        << "  If run with a --backup flag, an intermediate result is "
-           "written \n"
-        << "  to a file in the same format. The backup file can be used as "
-           "\n"
-        << "  input to this test suite, and the benchmarking will \n"
-        << "  continue from the point where the backup file was created.";
-
-    gflags::SetUsageMessage(doc.str());
-    std::ostringstream ver;
-    ver << gko::version_info::get();
-    gflags::SetVersionString(ver.str());
+    if (do_print) {
+        std::ostringstream doc;
+        doc << header << "Usage: " << (*argv)[0] << " [options]\n"
+            << format
+            << "  The results are written on standard output, in the same "
+               "format,\n"
+            << "  but with test cases extended to include an additional member "
+               "\n"
+            << "  object for each benchmark run.\n"
+            << "  If run with a --backup flag, an intermediate result is "
+               "written \n"
+            << "  to a file in the same format. The backup file can be used as "
+               "\n"
+            << "  input to this test suite, and the benchmarking will \n"
+            << "  continue from the point where the backup file was created.";
+
+        gflags::SetUsageMessage(doc.str());
+        std::ostringstream ver;
+        ver << gko::version_info::get();
+        gflags::SetVersionString(ver.str());
+    } else {
+        gflags::SetUsageMessage("");
+        gflags::SetVersionString("");
+    }
     gflags::ParseCommandLineFlags(argc, argv, true);
     if (FLAGS_profile) {
         FLAGS_repetitions = "1";
@@ -206,20 +204,19 @@ void print_general_information(const std::string& extra)
 {
     std::clog << gko::version_info::get() << std::endl
               << "Running on " << FLAGS_executor << "(" << FLAGS_device_id
-              << ")" << std::endl
+              << ")\n"
               << "Running with " << FLAGS_warmup << " warm iterations and ";
     if (FLAGS_repetitions == "auto") {
         std::clog << "adaptively determined repetititions with "
                   << FLAGS_min_repetitions
                   << " <= rep <= " << FLAGS_max_repetitions
-                  << " and a minimal runtime of " << FLAGS_min_runtime << "s"
-                  << std::endl;
+                  << " and a minimal runtime of " << FLAGS_min_runtime << "s\n";
     } else {
-        std::clog << FLAGS_repetitions << " running iterations" << std::endl;
+        std::clog << FLAGS_repetitions << " running iterations\n";
     }
     std::clog << "The random seed for right hand sides is " << FLAGS_seed
-              << std::endl
-              << extra;
+              << '\n'
+              << extra << '\n';
 }
 
 
@@ -319,7 +316,7 @@ std::istream& get_input_stream()
 
 
 // backup generation
-void backup_results(rapidjson::Document& results)
+void backup_results(json& results)
 {
     static int next = 0;
     static auto filenames = []() -> std::array<std::string, 2> {
@@ -576,279 +573,4 @@ gko::remove_complex<ValueType> compute_max_relative_norm2(
 }
 
 
-/**
- * A class for controlling the number warmup and timed iterations.
- *
- * The behavior is determined by the following flags
- * - 'repetitions' switch between fixed and adaptive number of iterations
- * - 'warmup' warmup iterations, applies in fixed and adaptive case
- * - 'min_repetitions' minimal number of repetitions (adaptive case)
- * - 'max_repetitions' maximal number of repetitions (adaptive case)
- * - 'min_runtime' minimal total runtime (adaptive case)
- * - 'repetition_growth_factor' controls the increase between two successive
- *   timings
- *
- * Usage:
- * `IterationControl` exposes the member functions:
- * - `warmup_run()`: controls run defined by `warmup` flag
- * - `run(bool)`: controls run defined by all other flags
- * - `get_timer()`: access to underlying timer
- * The first two methods return an object that is to be used in a range-based
- * for loop:
- * ```
- * IterationControl ic(get_timer(...));
- *
- * // warmup run always uses fixed number of iteration and does not issue
- * // timings
- * for(auto status: ic.warmup_run()){
- *   // execute benchmark
- * }
- * // run may use adaptive number of iterations (depending on cmd line flag)
- * // and issues timing (unless manage_timings is false)
- * for(auto status: ic.run(manage_timings [default is true])){
- *   if(! manage_timings) ic.get_timer->tic();
- *   // execute benchmark
- *   if(! manage_timings) ic.get_timer->toc();
- * }
- *
- * ```
- * At the beginning of both methods, the timer is reset.
- * The `status` object exposes the member
- * - `cur_it`, containing the current iteration number,
- * and the methods
- * - `is_finished`, checks if the benchmark is finished,
- */
-class IterationControl {
-    using IndexType = unsigned int;  //!< to be compatible with GFLAGS type
-
-    class run_control;
-
-public:
-    /**
-     * Creates an `IterationControl` object.
-     *
-     * Uses the commandline flags to setup the stopping criteria for the
-     * warmup and timed run.
-     *
-     * @param timer  the timer that is to be used for the timings
-     */
-    explicit IterationControl(const std::shared_ptr<Timer>& timer)
-    {
-        status_warmup_ = {TimerManager{timer, false}, FLAGS_warmup,
-                          FLAGS_warmup, 0., 0};
-        if (FLAGS_repetitions == "auto") {
-            status_run_ = {TimerManager{timer, true}, FLAGS_min_repetitions,
-                           FLAGS_max_repetitions, FLAGS_min_runtime};
-        } else {
-            const auto reps =
-                static_cast<unsigned int>(std::stoi(FLAGS_repetitions));
-            status_run_ = {TimerManager{timer, true}, reps, reps, 0., 0};
-        }
-    }
-
-    IterationControl() = default;
-    IterationControl(const IterationControl&) = default;
-    IterationControl(IterationControl&&) = default;
-
-    /**
-     * Creates iterable `run_control` object for the warmup run.
-     *
-     * This run uses always a fixed number of iterations.
-     */
-    run_control warmup_run()
-    {
-        status_warmup_.cur_it = 0;
-        status_warmup_.managed_timer.clear();
-        return run_control{&status_warmup_};
-    }
-
-    /**
-     * Creates iterable `run_control` object for the timed run.
-     *
-     * This run may be adaptive, depending on the commandline flags.
-     *
-     * @param manage_timings If true, the timer calls (`tic/toc`) are handled
-     * by the `run_control` object, otherwise they need to be executed outside
-     */
-    run_control run(bool manage_timings = true)
-    {
-        status_run_.cur_it = 0;
-        status_run_.managed_timer.clear();
-        status_run_.managed_timer.manage_timings = manage_timings;
-        return run_control{&status_run_};
-    }
-
-    std::shared_ptr<Timer> get_timer() const
-    {
-        return status_run_.managed_timer.timer;
-    }
-
-    /**
-     * Compute the time from the given statistical method
-     *
-     * @param method  the statistical method. If the timer does not have the
-     *                same iteration as the IterationControl, it can only use
-     *                average from the IterationControl.
-     *
-     * @return the statistical time
-     */
-    double compute_time(const std::string& method = "average") const
-    {
-        if (status_run_.managed_timer.timer->get_num_repetitions() ==
-            this->get_num_repetitions()) {
-            return status_run_.managed_timer.compute_time(method);
-        } else {
-            assert(method == "average");
-            return status_run_.managed_timer.get_total_time() /
-                   this->get_num_repetitions();
-        }
-    }
-
-    IndexType get_num_repetitions() const { return status_run_.cur_it; }
-
-private:
-    struct TimerManager {
-        std::shared_ptr<Timer> timer;
-        bool manage_timings = false;
-
-        void tic()
-        {
-            if (manage_timings) {
-                timer->tic();
-            }
-        }
-        void toc(unsigned int num = 1)
-        {
-            if (manage_timings) {
-                timer->toc(num);
-            }
-        }
-
-        void clear() { timer->clear(); }
-
-        double get_total_time() const { return timer->get_total_time(); }
-
-        double compute_time(const std::string& method = "average") const
-        {
-            return timer->compute_time(method);
-        }
-    };
-
-    /**
-     * Stores stopping criteria of the adaptive benchmark run as well as the
-     * current iteration number.
-     */
-    struct status {
-        TimerManager managed_timer{};
-
-        IndexType min_it = 0;
-        IndexType max_it = 0;
-        double max_runtime = 0.;
-
-        IndexType cur_it = 0;
-
-        /**
-         * checks if the adaptive run is complete
-         *
-         * the adaptive run is complete if:
-         * - the minimum number of iteration is reached
-         * - and either:
-         *   - the maximum number of repetitions is reached
-         *   - the total runtime is above the threshold
-         *
-         * @return completeness state of the adaptive run
-         */
-        bool is_finished() const
-        {
-            return cur_it >= min_it &&
-                   (cur_it >= max_it ||
-                    managed_timer.get_total_time() >= max_runtime);
-        }
-    };
-
-    /**
-     * Iterable class managing the benchmark iteration.
-     *
-     * Has to be used in a range-based for loop.
-     */
-    struct run_control {
-        struct iterator {
-            /**
-             * Increases the current iteration count and finishes timing if
-             * necessary.
-             *
-             * As `++it` is the last step of a for-loop, the managed_timer is
-             * stopped, if enough iterations have passed since the last timing.
-             * The interval between two timings is steadily increased to
-             * reduce the timing overhead.
-             */
-            iterator operator++()
-            {
-                cur_info->cur_it++;
-                if (cur_info->cur_it >= next_timing && !stopped) {
-                    cur_info->managed_timer.toc(
-                        static_cast<unsigned>(cur_info->cur_it - start_timing));
-                    stopped = true;
-                    next_timing = static_cast<IndexType>(std::ceil(
-                        next_timing * FLAGS_repetition_growth_factor));
-                    // If repetition_growth_factor <= 1, next_timing will be
-                    // next iteration.
-                    if (next_timing <= cur_info->cur_it) {
-                        next_timing = cur_info->cur_it + 1;
-                    }
-                }
-                return *this;
-            }
-
-            status operator*() const { return *cur_info; }
-
-            /**
-             * Checks if the benchmark is finished and handles timing, if
-             * necessary.
-             *
-             * As `begin != end` is the first step in a for-loop, the
-             * managed_timer is started, if it was previously stopped.
-             * Additionally, if the benchmark is complete and the managed_timer
-             * is still running it is stopped. (This may occur if the maximal
-             * number of repetitions is surpassed)
-             *
-             * Uses only the information from the `status` object, i.e.
-             * the right hand side is ignored.
-             *
-             * @return true if benchmark is not finished, else false
-             */
-            bool operator!=(const iterator&)
-            {
-                const bool is_finished = cur_info->is_finished();
-                if (!is_finished && stopped) {
-                    stopped = false;
-                    cur_info->managed_timer.tic();
-                    start_timing = cur_info->cur_it;
-                } else if (is_finished && !stopped) {
-                    cur_info->managed_timer.toc(
-                        static_cast<unsigned>(cur_info->cur_it - start_timing));
-                    stopped = true;
-                }
-                return !is_finished;
-            }
-
-            status* cur_info;
-            IndexType next_timing = 1;   //!< next iteration to stop timing
-            IndexType start_timing = 0;  //!< iteration for starting timing
-            bool stopped = true;
-        };
-
-        iterator begin() const { return iterator{info}; }
-
-        // not used, could potentially be used in c++17 as a sentinel
-        iterator end() const { return iterator{}; }
-
-        status* info;
-    };
-
-    status status_warmup_;
-    status status_run_;
-};
-
-
 #endif  // GKO_BENCHMARK_UTILS_GENERAL_HPP_
diff --git a/benchmark/utils/general_matrix.hpp b/benchmark/utils/general_matrix.hpp
index 2049dadf45f..39d8b5a8107 100644
--- a/benchmark/utils/general_matrix.hpp
+++ b/benchmark/utils/general_matrix.hpp
@@ -57,9 +57,9 @@ DEFINE_string(input_matrix, "",
  */
 void initialize_argument_parsing_matrix(
     int* argc, char** argv[], std::string& header, std::string& format,
-    std::string additional_matrix_file_json = "")
+    std::string additional_matrix_file_json = "", bool do_print = true)
 {
-    initialize_argument_parsing(argc, argv, header, format);
+    initialize_argument_parsing(argc, argv, header, format, do_print);
     std::string input_matrix_str{FLAGS_input_matrix};
     if (!input_matrix_str.empty()) {
         if (input_stream) {
@@ -67,17 +67,13 @@ void initialize_argument_parsing_matrix(
                 << "-input and -input_matrix cannot be used simultaneously\n";
             std::exit(1);
         }
-        // create JSON for the filename via RapidJSON to ensure the string is
-        // correctly escaped
-        rapidjson::Document d;
+        // create JSON for the filename via nlohmann_json to ensure the string
+        // is correctly escaped
         auto json_template =
             R"([{"filename":"")" + additional_matrix_file_json + "}]";
-        d.Parse(json_template.c_str());
-        d[0]["filename"].SetString(input_matrix_str.c_str(), d.GetAllocator());
-        rapidjson::StringBuffer sb;
-        rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(sb);
-        d.Accept(writer);
-        input_stream = std::make_unique<std::stringstream>(sb.GetString());
+        auto doc = json::parse(json_template);
+        doc[0]["filename"] = input_matrix_str;
+        input_stream = std::make_unique<std::stringstream>(doc.dump());
     }
 }
 
diff --git a/benchmark/utils/generator.hpp b/benchmark/utils/generator.hpp
index 076d2954980..257a2384634 100644
--- a/benchmark/utils/generator.hpp
+++ b/benchmark/utils/generator.hpp
@@ -53,28 +53,45 @@ struct DefaultSystemGenerator {
     using Vec = vec<ValueType>;
 
     static gko::matrix_data<ValueType, IndexType> generate_matrix_data(
-        rapidjson::Value& config)
+        const json& config)
     {
-        if (config.HasMember("filename")) {
-            std::ifstream in(config["filename"].GetString());
+        if (config.contains("filename")) {
+            std::ifstream in(config["filename"].get<std::string>());
             return gko::read_generic_raw<ValueType, IndexType>(in);
-        } else if (config.HasMember("stencil")) {
+        } else if (config.contains("stencil")) {
             return generate_stencil<ValueType, IndexType>(
-                config["stencil"].GetString(), config["size"].GetInt64());
+                config["stencil"].get<std::string>(),
+                config["size"].get<gko::int64>());
         } else {
             throw std::runtime_error(
                 "No known way to generate matrix data found.");
         }
     }
 
-    static std::string describe_config(rapidjson::Value& config)
+    static std::string get_example_config()
     {
-        if (config.HasMember("filename")) {
-            return config["filename"].GetString();
-        } else if (config.HasMember("stencil")) {
+        return json::
+            parse(R"([{"filename": "my_file.mtx"},{"filename": "my_file2.mtx"},{"size": 100, "stencil": "7pt"}])")
+                .dump(4);
+    }
+
+    static bool validate_config(const json& test_case)
+    {
+        return ((test_case.contains("size") && test_case.contains("stencil") &&
+                 test_case["size"].is_number_integer() &&
+                 test_case["stencil"].is_string()) ||
+                (test_case.contains("filename") &&
+                 test_case["filename"].is_string()));
+    }
+
+    static std::string describe_config(const json& config)
+    {
+        if (config.contains("filename")) {
+            return config["filename"].get<std::string>();
+        } else if (config.contains("stencil")) {
             std::stringstream ss;
-            ss << "stencil(" << config["size"].GetInt64() << ","
-               << config["stencil"].GetString() << ")";
+            ss << "stencil(" << config["size"].get<gko::int64>() << ","
+               << config["stencil"].get<std::string>() << ")";
             return ss.str();
         } else {
             throw std::runtime_error("No known way to describe config.");
@@ -82,30 +99,30 @@ struct DefaultSystemGenerator {
     }
 
     static std::shared_ptr<gko::LinOp> generate_matrix_with_optimal_format(
-        std::shared_ptr<gko::Executor> exec, rapidjson::Value& config)
+        std::shared_ptr<gko::Executor> exec, json& config)
     {
         auto data = generate_matrix_data(config);
         return generate_matrix_with_format(
-            std::move(exec), config["optimal"]["spmv"].GetString(), data);
+            std::move(exec), config["optimal"]["spmv"].get<std::string>(),
+            data);
     }
 
     static std::shared_ptr<gko::LinOp> generate_matrix_with_format(
         std::shared_ptr<gko::Executor> exec, const std::string& format_name,
         const gko::matrix_data<ValueType, itype>& data,
-        rapidjson::Value* spmv_case = nullptr,
-        rapidjson::MemoryPoolAllocator<>* allocator = nullptr)
+        json* spmv_case = nullptr)
     {
         auto storage_logger = std::make_shared<StorageLogger>();
-        if (spmv_case && allocator) {
+        if (spmv_case) {
             exec->add_logger(storage_logger);
         }
 
         auto mtx =
             gko::share(::formats::matrix_factory(format_name, exec, data));
 
-        if (spmv_case && allocator) {
+        if (spmv_case) {
             exec->remove_logger(storage_logger);
-            storage_logger->write_data(*spmv_case, *allocator);
+            storage_logger->write_data(*spmv_case);
         }
 
         return mtx;
@@ -172,32 +189,51 @@ struct DistributedDefaultSystemGenerator {
     using Vec = dist_vec<value_type>;
 
     gko::matrix_data<value_type, index_type> generate_matrix_data(
-        rapidjson::Value& config) const
+        const json& config) const
     {
-        if (config.HasMember("filename")) {
-            std::ifstream in(config["filename"].GetString());
+        if (config.contains("filename")) {
+            std::ifstream in(config["filename"].get<std::string>());
             return gko::read_generic_raw<value_type, index_type>(in);
-        } else if (config.HasMember("stencil")) {
+        } else if (config.contains("stencil")) {
             auto local_size = static_cast<global_itype>(
-                config["size"].GetInt64() / comm.size());
+                config["size"].get<gko::int64>() / comm.size());
             return generate_stencil<value_type, index_type>(
-                config["stencil"].GetString(), comm, local_size,
-                config["comm_pattern"].GetString() == std::string("optimal"));
+                config["stencil"].get<std::string>(), comm, local_size,
+                config["comm_pattern"].get<std::string>() ==
+                    std::string("optimal"));
         } else {
             throw std::runtime_error(
                 "No known way to generate matrix data found.");
         }
     }
 
-    std::string describe_config(rapidjson::Value& config) const
+    static std::string get_example_config()
     {
-        if (config.HasMember("filename")) {
-            return config["filename"].GetString();
-        } else if (config.HasMember("stencil")) {
+        return json::
+            parse(R"([{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}, {"filename": "my_file.mtx"}])")
+                .dump(4);
+    }
+
+    static bool validate_config(const json& test_case)
+    {
+        return ((test_case.contains("size") && test_case.contains("stencil") &&
+                 test_case.contains("comm_pattern") &&
+                 test_case["size"].is_number_integer() &&
+                 test_case["stencil"].is_string() &&
+                 test_case["comm_pattern"].is_string()) ||
+                (test_case.contains("filename") &&
+                 test_case["filename"].is_string()));
+    }
+
+    static std::string describe_config(const json& config)
+    {
+        if (config.contains("filename")) {
+            return config["filename"].get<std::string>();
+        } else if (config.contains("stencil")) {
             std::stringstream ss;
-            ss << "stencil(" << config["size"].GetInt64() << ","
-               << config["stencil"].GetString() << ","
-               << config["comm_pattern"].GetString() << ")";
+            ss << "stencil(" << config["size"].get<gko::int64>() << ","
+               << config["stencil"].get<std::string>() << ","
+               << config["comm_pattern"].get<std::string>() << ")";
             return ss.str();
         } else {
             throw std::runtime_error("No known way to describe config.");
@@ -205,29 +241,33 @@ struct DistributedDefaultSystemGenerator {
     }
 
     std::shared_ptr<gko::LinOp> generate_matrix_with_optimal_format(
-        std::shared_ptr<gko::Executor> exec, rapidjson::Value& config) const
+        std::shared_ptr<gko::Executor> exec, json& config) const
     {
         auto data = generate_matrix_data(config);
         return generate_matrix_with_format(
-            std::move(exec), config["optimal"]["spmv"].GetString(), data);
+            std::move(exec), config["optimal"]["spmv"].get<std::string>(),
+            data);
     }
 
     std::shared_ptr<gko::LinOp> generate_matrix_with_format(
         std::shared_ptr<gko::Executor> exec, const std::string& format_name,
         const gko::matrix_data<value_type, index_type>& data,
-        rapidjson::Value* spmv_case = nullptr,
-        rapidjson::MemoryPoolAllocator<>* allocator = nullptr) const
+        json* spmv_case = nullptr) const
     {
         auto part = gko::experimental::distributed::
             Partition<itype, global_itype>::build_from_global_size_uniform(
                 exec, comm.size(), static_cast<global_itype>(data.size[0]));
         auto formats = split(format_name, '-');
+        if (formats.size() != 2) {
+            throw std::runtime_error{"Invalid distributed format specifier " +
+                                     format_name};
+        }
 
         auto local_mat = formats::matrix_type_factory.at(formats[0])(exec);
         auto non_local_mat = formats::matrix_type_factory.at(formats[1])(exec);
 
         auto storage_logger = std::make_shared<StorageLogger>();
-        if (spmv_case && allocator) {
+        if (spmv_case) {
             exec->add_logger(storage_logger);
         }
 
@@ -235,9 +275,9 @@ struct DistributedDefaultSystemGenerator {
             exec, comm, local_mat, non_local_mat);
         dist_mat->read_distributed(data, part);
 
-        if (spmv_case && allocator) {
+        if (spmv_case) {
             exec->remove_logger(storage_logger);
-            storage_logger->write_data(comm, *spmv_case, *allocator);
+            storage_logger->write_data(comm, *spmv_case);
         }
 
         return dist_mat;
diff --git a/benchmark/utils/iteration_control.hpp b/benchmark/utils/iteration_control.hpp
new file mode 100644
index 00000000000..295ae7870d6
--- /dev/null
+++ b/benchmark/utils/iteration_control.hpp
@@ -0,0 +1,326 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_BENCHMARK_UTILS_ITERATION_CONTROL_HPP_
+#define GKO_BENCHMARK_UTILS_ITERATION_CONTROL_HPP_
+
+
+#include <ginkgo/ginkgo.hpp>
+
+
+#include <memory>
+#include <string>
+#include <utility>
+
+
+#include "benchmark/utils/general.hpp"
+#include "benchmark/utils/timer.hpp"
+#include "benchmark/utils/types.hpp"
+#include "core/distributed/helpers.hpp"
+
+
+/**
+ * A class for controlling the number warmup and timed iterations.
+ *
+ * The behavior is determined by the following flags
+ * - 'repetitions' switch between fixed and adaptive number of iterations
+ * - 'warmup' warmup iterations, applies in fixed and adaptive case
+ * - 'min_repetitions' minimal number of repetitions (adaptive case)
+ * - 'max_repetitions' maximal number of repetitions (adaptive case)
+ * - 'min_runtime' minimal total runtime (adaptive case)
+ * - 'repetition_growth_factor' controls the increase between two successive
+ *   timings
+ *
+ * Usage:
+ * `IterationControl` exposes the member functions:
+ * - `warmup_run()`: controls run defined by `warmup` flag
+ * - `run(bool)`: controls run defined by all other flags
+ * - `get_timer()`: access to underlying timer
+ * The first two methods return an object that is to be used in a range-based
+ * for loop:
+ * ```
+ * IterationControl ic(get_timer(...));
+ *
+ * // warmup run always uses fixed number of iteration and does not issue
+ * // timings
+ * for(auto status: ic.warmup_run()){
+ *   // execute benchmark
+ * }
+ * // run may use adaptive number of iterations (depending on cmd line flag)
+ * // and issues timing (unless manage_timings is false)
+ * for(auto status: ic.run(manage_timings [default is true])){
+ *   if(! manage_timings) ic.get_timer->tic();
+ *   // execute benchmark
+ *   if(! manage_timings) ic.get_timer->toc();
+ * }
+ *
+ * ```
+ * At the beginning of both methods, the timer is reset.
+ * The `status` object exposes the member
+ * - `cur_it`, containing the current iteration number,
+ * and the methods
+ * - `is_finished`, checks if the benchmark is finished,
+ */
+class IterationControl {
+    using IndexType = unsigned int;  //!< to be compatible with GFLAGS type
+
+    class run_control;
+
+public:
+    /**
+     * Creates an `IterationControl` object.
+     *
+     * Uses the commandline flags to setup the stopping criteria for the
+     * warmup and timed run.
+     *
+     * @param timer  the timer that is to be used for the timings
+     */
+    explicit IterationControl(const std::shared_ptr<Timer>& timer)
+    {
+        status_warmup_ = {TimerManager{timer, false}, FLAGS_warmup,
+                          FLAGS_warmup, 0., 0};
+        if (FLAGS_repetitions == "auto") {
+            status_run_ = {TimerManager{timer, true}, FLAGS_min_repetitions,
+                           FLAGS_max_repetitions, FLAGS_min_runtime};
+        } else {
+            const auto reps =
+                static_cast<unsigned int>(std::stoi(FLAGS_repetitions));
+            status_run_ = {TimerManager{timer, true}, reps, reps, 0., 0};
+        }
+    }
+
+    IterationControl() = default;
+    IterationControl(const IterationControl&) = default;
+    IterationControl(IterationControl&&) = default;
+
+    /**
+     * Creates iterable `run_control` object for the warmup run.
+     *
+     * This run uses always a fixed number of iterations.
+     */
+    run_control warmup_run()
+    {
+        status_warmup_.cur_it = 0;
+        status_warmup_.managed_timer.clear();
+        return run_control{&status_warmup_};
+    }
+
+    /**
+     * Creates iterable `run_control` object for the timed run.
+     *
+     * This run may be adaptive, depending on the commandline flags.
+     *
+     * @param manage_timings If true, the timer calls (`tic/toc`) are handled
+     * by the `run_control` object, otherwise they need to be executed outside
+     */
+    run_control run(bool manage_timings = true)
+    {
+        status_run_.cur_it = 0;
+        status_run_.managed_timer.clear();
+        status_run_.managed_timer.manage_timings = manage_timings;
+        return run_control{&status_run_};
+    }
+
+    std::shared_ptr<Timer> get_timer() const
+    {
+        return status_run_.managed_timer.timer;
+    }
+
+    /**
+     * Compute the time from the given statistical method
+     *
+     * @param method  the statistical method. If the timer does not have the
+     *                same iteration as the IterationControl, it can only use
+     *                average from the IterationControl.
+     *
+     * @return the statistical time
+     */
+    double compute_time(const std::string& method = "average") const
+    {
+        if (status_run_.managed_timer.timer->get_num_repetitions() ==
+            this->get_num_repetitions()) {
+            return status_run_.managed_timer.compute_time(method);
+        } else {
+            assert(method == "average");
+            return status_run_.managed_timer.get_total_time() /
+                   this->get_num_repetitions();
+        }
+    }
+
+    IndexType get_num_repetitions() const { return status_run_.cur_it; }
+
+private:
+    struct TimerManager {
+        std::shared_ptr<Timer> timer;
+        bool manage_timings = false;
+
+        void tic()
+        {
+            if (manage_timings) {
+                timer->tic();
+            }
+        }
+        void toc(unsigned int num = 1)
+        {
+            if (manage_timings) {
+                timer->toc(num);
+            }
+        }
+
+        void clear() { timer->clear(); }
+
+        double get_total_time() const { return timer->get_total_time(); }
+
+        double compute_time(const std::string& method = "average") const
+        {
+            return timer->compute_time(method);
+        }
+    };
+
+    /**
+     * Stores stopping criteria of the adaptive benchmark run as well as the
+     * current iteration number.
+     */
+    struct status {
+        TimerManager managed_timer{};
+
+        IndexType min_it = 0;
+        IndexType max_it = 0;
+        double max_runtime = 0.;
+
+        IndexType cur_it = 0;
+
+        /**
+         * checks if the adaptive run is complete
+         *
+         * the adaptive run is complete if:
+         * - the minimum number of iteration is reached
+         * - and either:
+         *   - the maximum number of repetitions is reached
+         *   - the total runtime is above the threshold
+         *
+         * @return completeness state of the adaptive run
+         */
+        bool is_finished() const
+        {
+            return cur_it >= min_it &&
+                   (cur_it >= max_it ||
+                    managed_timer.get_total_time() >= max_runtime);
+        }
+    };
+
+    /**
+     * Iterable class managing the benchmark iteration.
+     *
+     * Has to be used in a range-based for loop.
+     */
+    struct run_control {
+        struct iterator {
+            /**
+             * Increases the current iteration count and finishes timing if
+             * necessary.
+             *
+             * As `++it` is the last step of a for-loop, the managed_timer is
+             * stopped, if enough iterations have passed since the last timing.
+             * The interval between two timings is steadily increased to
+             * reduce the timing overhead.
+             */
+            iterator operator++()
+            {
+                cur_info->cur_it++;
+                if (cur_info->cur_it >= next_timing && !stopped) {
+                    cur_info->managed_timer.toc(
+                        static_cast<unsigned>(cur_info->cur_it - start_timing));
+                    stopped = true;
+                    next_timing = static_cast<IndexType>(std::ceil(
+                        next_timing * FLAGS_repetition_growth_factor));
+                    // If repetition_growth_factor <= 1, next_timing will be
+                    // next iteration.
+                    if (next_timing <= cur_info->cur_it) {
+                        next_timing = cur_info->cur_it + 1;
+                    }
+                }
+                return *this;
+            }
+
+            status operator*() const { return *cur_info; }
+
+            /**
+             * Checks if the benchmark is finished and handles timing, if
+             * necessary.
+             *
+             * As `begin != end` is the first step in a for-loop, the
+             * managed_timer is started, if it was previously stopped.
+             * Additionally, if the benchmark is complete and the managed_timer
+             * is still running it is stopped. (This may occur if the maximal
+             * number of repetitions is surpassed)
+             *
+             * Uses only the information from the `status` object, i.e.
+             * the right hand side is ignored.
+             *
+             * @return true if benchmark is not finished, else false
+             */
+            bool operator!=(const iterator&)
+            {
+                const bool is_finished = cur_info->is_finished();
+                if (!is_finished && stopped) {
+                    stopped = false;
+                    cur_info->managed_timer.tic();
+                    start_timing = cur_info->cur_it;
+                } else if (is_finished && !stopped) {
+                    cur_info->managed_timer.toc(
+                        static_cast<unsigned>(cur_info->cur_it - start_timing));
+                    stopped = true;
+                }
+                return !is_finished;
+            }
+
+            status* cur_info;
+            IndexType next_timing = 1;   //!< next iteration to stop timing
+            IndexType start_timing = 0;  //!< iteration for starting timing
+            bool stopped = true;
+        };
+
+        iterator begin() const { return iterator{info}; }
+
+        // not used, could potentially be used in c++17 as a sentinel
+        iterator end() const { return iterator{}; }
+
+        status* info;
+    };
+
+    status status_warmup_;
+    status status_run_;
+};
+
+
+#endif  // GKO_BENCHMARK_UTILS_ITERATION_CONTROL_HPP_
diff --git a/benchmark/utils/json.hpp b/benchmark/utils/json.hpp
index b0cd384cae5..684db0229aa 100644
--- a/benchmark/utils/json.hpp
+++ b/benchmark/utils/json.hpp
@@ -34,69 +34,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define GKO_BENCHMARK_UTILS_JSON_HPP_
 
 
-#include <ginkgo/ginkgo.hpp>
+#include <nlohmann/json.hpp>
 
 
-#include <type_traits>
-
-
-#include <rapidjson/document.h>
-#include <rapidjson/istreamwrapper.h>
-#include <rapidjson/ostreamwrapper.h>
-#include <rapidjson/prettywriter.h>
-
-
-// helper for setting rapidjson object members
-template <typename T, typename NameType, typename Allocator>
-std::enable_if_t<
-    !std::is_same<typename std::decay<T>::type, gko::size_type>::value, void>
-add_or_set_member(rapidjson::Value& object, NameType&& name, T&& value,
-                  Allocator&& allocator)
-{
-    if (object.HasMember(name)) {
-        object[name] = std::forward<T>(value);
-    } else {
-        auto n = rapidjson::Value(name, allocator);
-        object.AddMember(n, std::forward<T>(value), allocator);
-    }
-}
-
-
-/**
-   @internal This is required to fix some MacOS problems (and possibly other
-   compilers). There is no explicit RapidJSON constructor for `std::size_t` so a
-   conversion to a known constructor is required to solve any ambiguity. See the
-   last comments of https://github.com/ginkgo-project/ginkgo/issues/270.
- */
-template <typename T, typename NameType, typename Allocator>
-std::enable_if_t<
-    std::is_same<typename std::decay<T>::type, gko::size_type>::value, void>
-add_or_set_member(rapidjson::Value& object, NameType&& name, T&& value,
-                  Allocator&& allocator)
-{
-    if (object.HasMember(name)) {
-        object[name] =
-            std::forward<std::uint64_t>(static_cast<std::uint64_t>(value));
-    } else {
-        auto n = rapidjson::Value(name, allocator);
-        object.AddMember(
-            n, std::forward<std::uint64_t>(static_cast<std::uint64_t>(value)),
-            allocator);
-    }
-}
-
-
-// helper for writing out rapidjson Values
-inline std::ostream& operator<<(std::ostream& os, const rapidjson::Value& value)
-{
-    rapidjson::OStreamWrapper jos(os);
-    rapidjson::PrettyWriter<rapidjson::OStreamWrapper, rapidjson::UTF8<>,
-                            rapidjson::UTF8<>, rapidjson::CrtAllocator,
-                            rapidjson::kWriteNanAndInfFlag>
-        writer(jos);
-    value.Accept(writer);
-    return os;
-}
+using json = nlohmann::ordered_json;
 
 
 #endif  // GKO_BENCHMARK_UTILS_JSON_HPP_
diff --git a/benchmark/utils/loggers.hpp b/benchmark/utils/loggers.hpp
index e3e6228604e..1e651811f0f 100644
--- a/benchmark/utils/loggers.hpp
+++ b/benchmark/utils/loggers.hpp
@@ -50,10 +50,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 struct JsonSummaryWriter : gko::log::ProfilerHook::SummaryWriter,
                            gko::log::ProfilerHook::NestedSummaryWriter {
-    JsonSummaryWriter(rapidjson::Value& object,
-                      rapidjson::MemoryPoolAllocator<>& alloc,
-                      gko::uint32 repetitions)
-        : object{&object}, alloc{&alloc}, repetitions{repetitions}
+    JsonSummaryWriter(json& object, gko::uint32 repetitions)
+        : object{&object}, repetitions{repetitions}
     {}
 
     void write(
@@ -62,13 +60,11 @@ struct JsonSummaryWriter : gko::log::ProfilerHook::SummaryWriter,
     {
         for (const auto& entry : entries) {
             if (entry.name != "total") {
-                add_or_set_member(*object, entry.name.c_str(),
-                                  entry.exclusive.count() * 1e-9 / repetitions,
-                                  *alloc);
+                (*object)[entry.name] =
+                    entry.exclusive.count() * 1e-9 / repetitions;
             }
         }
-        add_or_set_member(*object, "overhead",
-                          overhead.count() * 1e-9 / repetitions, *alloc);
+        (*object)["overhead"] = overhead.count() * 1e-9 / repetitions;
     }
 
     void write_nested(const gko::log::ProfilerHook::nested_summary_entry& root,
@@ -84,27 +80,24 @@ struct JsonSummaryWriter : gko::log::ProfilerHook::SummaryWriter,
                 visit(visit, child, new_prefix);
                 exclusive -= child.elapsed;
             }
-            add_or_set_member(*object, (prefix + node.name).c_str(),
-                              exclusive.count() * 1e-9 / repetitions, *alloc);
+            (*object)[prefix + node.name] =
+                exclusive.count() * 1e-9 / repetitions;
         };
         // we don't need to annotate the total
         for (const auto& child : root.children) {
             visit(visit, child, "");
         }
-        add_or_set_member(*object, "overhead",
-                          overhead.count() * 1e-9 / repetitions, *alloc);
+        (*object)["overhead"] = overhead.count() * 1e-9 / repetitions;
     }
 
-    rapidjson::Value* object;
-    rapidjson::MemoryPoolAllocator<>* alloc;
+    json* object;
     gko::uint32 repetitions;
 };
 
 
 inline std::shared_ptr<gko::log::ProfilerHook> create_operations_logger(
     bool gpu_timer, bool nested, std::shared_ptr<gko::Executor> exec,
-    rapidjson::Value& object, rapidjson::MemoryPoolAllocator<>& alloc,
-    gko::uint32 repetitions)
+    json& object, gko::uint32 repetitions)
 {
     std::shared_ptr<gko::Timer> timer;
     if (gpu_timer) {
@@ -114,12 +107,10 @@ inline std::shared_ptr<gko::log::ProfilerHook> create_operations_logger(
     }
     if (nested) {
         return gko::log::ProfilerHook::create_nested_summary(
-            timer,
-            std::make_unique<JsonSummaryWriter>(object, alloc, repetitions));
+            timer, std::make_unique<JsonSummaryWriter>(object, repetitions));
     } else {
         return gko::log::ProfilerHook::create_summary(
-            timer,
-            std::make_unique<JsonSummaryWriter>(object, alloc, repetitions));
+            timer, std::make_unique<JsonSummaryWriter>(object, repetitions));
     }
 }
 
@@ -140,21 +131,18 @@ struct StorageLogger : gko::log::Logger {
         storage[location] = 0;
     }
 
-    void write_data(rapidjson::Value& output,
-                    rapidjson::MemoryPoolAllocator<>& allocator)
+    void write_data(json& output)
     {
         const std::lock_guard<std::mutex> lock(mutex);
         gko::size_type total{};
         for (const auto& e : storage) {
             total += e.second;
         }
-        add_or_set_member(output, "storage", total, allocator);
+        output["storage"] = total;
     }
 
 #if GINKGO_BUILD_MPI
-    void write_data(gko::experimental::mpi::communicator comm,
-                    rapidjson::Value& output,
-                    rapidjson::MemoryPoolAllocator<>& allocator)
+    void write_data(gko::experimental::mpi::communicator comm, json& output)
     {
         const std::lock_guard<std::mutex> lock(mutex);
         gko::size_type total{};
@@ -166,7 +154,7 @@ struct StorageLogger : gko::log::Logger {
                         ? static_cast<gko::size_type*>(MPI_IN_PLACE)
                         : &total,
                     &total, 1, MPI_SUM, 0);
-        add_or_set_member(output, "storage", total, allocator);
+        output["storage"] = total;
     }
 #endif
 
@@ -191,17 +179,16 @@ struct ResidualLogger : gko::log::Logger {
                                const gko::array<gko::stopping_status>* status,
                                bool all_stopped) const override
     {
-        timestamps.PushBack(std::chrono::duration<double>(
-                                std::chrono::steady_clock::now() - start)
-                                .count(),
-                            alloc);
+        timestamps.push_back(std::chrono::duration<double>(
+                                 std::chrono::steady_clock::now() - start)
+                                 .count());
         if (residual_norm) {
-            rec_res_norms.PushBack(
-                get_norm(gko::as<vec<rc_vtype>>(residual_norm)), alloc);
+            rec_res_norms.push_back(
+                get_norm(gko::as<vec<rc_vtype>>(residual_norm)));
         } else {
             gko::detail::vector_dispatch<rc_vtype>(
                 residual, [&](const auto v_residual) {
-                    rec_res_norms.PushBack(compute_norm2(v_residual), alloc);
+                    rec_res_norms.push_back(compute_norm2(v_residual));
                 });
         }
         if (solution) {
@@ -209,32 +196,25 @@ struct ResidualLogger : gko::log::Logger {
                 rc_vtype>(solution, [&](auto v_solution) {
                 using concrete_type =
                     std::remove_pointer_t<std::decay_t<decltype(v_solution)>>;
-                true_res_norms.PushBack(
-                    compute_residual_norm(matrix, gko::as<concrete_type>(b),
-                                          v_solution),
-                    alloc);
+                true_res_norms.push_back(compute_residual_norm(
+                    matrix, gko::as<concrete_type>(b), v_solution));
             });
         } else {
-            true_res_norms.PushBack(-1.0, alloc);
+            true_res_norms.push_back(-1.0);
         }
         if (implicit_sq_residual_norm) {
-            implicit_res_norms.PushBack(
-                std::sqrt(get_norm(
-                    gko::as<vec<rc_vtype>>(implicit_sq_residual_norm))),
-                alloc);
+            implicit_res_norms.push_back(std::sqrt(
+                get_norm(gko::as<vec<rc_vtype>>(implicit_sq_residual_norm))));
             has_implicit_res_norm = true;
         } else {
-            implicit_res_norms.PushBack(-1.0, alloc);
+            implicit_res_norms.push_back(-1.0);
         }
     }
 
     ResidualLogger(gko::ptr_param<const gko::LinOp> matrix,
-                   gko::ptr_param<const gko::LinOp> b,
-                   rapidjson::Value& rec_res_norms,
-                   rapidjson::Value& true_res_norms,
-                   rapidjson::Value& implicit_res_norms,
-                   rapidjson::Value& timestamps,
-                   rapidjson::MemoryPoolAllocator<>& alloc)
+                   gko::ptr_param<const gko::LinOp> b, json& rec_res_norms,
+                   json& true_res_norms, json& implicit_res_norms,
+                   json& timestamps)
         : gko::log::Logger(gko::log::Logger::iteration_complete_mask),
           matrix{matrix.get()},
           b{b.get()},
@@ -243,8 +223,7 @@ struct ResidualLogger : gko::log::Logger {
           true_res_norms{true_res_norms},
           has_implicit_res_norm{},
           implicit_res_norms{implicit_res_norms},
-          timestamps{timestamps},
-          alloc{alloc}
+          timestamps{timestamps}
     {}
 
     bool has_implicit_res_norms() const { return has_implicit_res_norm; }
@@ -253,12 +232,11 @@ struct ResidualLogger : gko::log::Logger {
     const gko::LinOp* matrix;
     const gko::LinOp* b;
     std::chrono::steady_clock::time_point start;
-    rapidjson::Value& rec_res_norms;
-    rapidjson::Value& true_res_norms;
+    json& rec_res_norms;
+    json& true_res_norms;
     mutable bool has_implicit_res_norm;
-    rapidjson::Value& implicit_res_norms;
-    rapidjson::Value& timestamps;
-    rapidjson::MemoryPoolAllocator<>& alloc;
+    json& implicit_res_norms;
+    json& timestamps;
 };
 
 
@@ -279,11 +257,7 @@ struct IterationLogger : gko::log::Logger {
         : gko::log::Logger(gko::log::Logger::iteration_complete_mask)
     {}
 
-    void write_data(rapidjson::Value& output,
-                    rapidjson::MemoryPoolAllocator<>& allocator)
-    {
-        add_or_set_member(output, "iterations", this->num_iters, allocator);
-    }
+    void write_data(json& output) { output["iterations"] = this->num_iters; }
 
 private:
     mutable gko::size_type num_iters{0};
diff --git a/benchmark/utils/runner.hpp b/benchmark/utils/runner.hpp
new file mode 100644
index 00000000000..3520f7299ee
--- /dev/null
+++ b/benchmark/utils/runner.hpp
@@ -0,0 +1,209 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_BENCHMARK_UTILS_RUNNER_HPP_
+#define GKO_BENCHMARK_UTILS_RUNNER_HPP_
+
+
+#include <ginkgo/ginkgo.hpp>
+
+
+#include <iomanip>
+#include <iostream>
+#include <vector>
+
+
+#include "benchmark/utils/general.hpp"
+
+
+std::shared_ptr<gko::log::ProfilerHook> create_profiler_hook(
+    std::shared_ptr<const gko::Executor> exec, bool do_print = true)
+{
+    using gko::log::ProfilerHook;
+    std::map<std::string, std::function<std::shared_ptr<ProfilerHook>()>>
+        hook_map{
+            {"none", [] { return std::shared_ptr<ProfilerHook>{}; }},
+            {"auto", [&] { return ProfilerHook::create_for_executor(exec); }},
+            {"nvtx", [] { return ProfilerHook::create_nvtx(); }},
+            {"roctx", [] { return ProfilerHook::create_roctx(); }},
+            {"tau", [] { return ProfilerHook::create_tau(); }},
+            {"vtune", [] { return ProfilerHook::create_vtune(); }},
+            {"debug", [do_print] {
+                 return ProfilerHook::create_custom(
+                     [do_print](const char* name,
+                                gko::log::profile_event_category) {
+                         if (do_print) {
+                             std::clog << "DEBUG: begin " << name << '\n';
+                         }
+                     },
+                     [do_print](const char* name,
+                                gko::log::profile_event_category) {
+                         if (do_print) {
+                             std::clog << "DEBUG: end   " << name << '\n';
+                         }
+                     });
+             }}};
+    return hook_map.at(FLAGS_profiler_hook)();
+}
+
+
+template <typename State>
+struct Benchmark {
+    /** The name to be used in the JSON output. */
+    virtual const std::string& get_name() const = 0;
+
+    /** The operations to loop over for each test case. */
+    virtual const std::vector<std::string>& get_operations() const = 0;
+
+    /** Should we write logging output? */
+    virtual bool should_print() const = 0;
+
+    /** Example JSON input */
+    virtual std::string get_example_config() const = 0;
+
+    /** Is the input test case in the correct format? */
+    virtual bool validate_config(const json& value) const = 0;
+
+    /** Textual representation of the test case for profiler annotation */
+    virtual std::string describe_config(const json& test_case) const = 0;
+
+    /** Sets up shared state and test case info */
+    virtual State setup(std::shared_ptr<gko::Executor> exec,
+                        json& test_case) const = 0;
+
+    /** Runs a single operation of the benchmark */
+    virtual void run(std::shared_ptr<gko::Executor> exec,
+                     std::shared_ptr<Timer> timer, State& state,
+                     const std::string& operation,
+                     json& operation_case) const = 0;
+
+    /** Post-process test case info. */
+    virtual void postprocess(json& test_case) const {}
+};
+
+
+template <typename State>
+void run_test_cases(const Benchmark<State>& benchmark,
+                    std::shared_ptr<gko::Executor> exec,
+                    std::shared_ptr<Timer> timer, json& test_cases)
+{
+    if (!test_cases.is_array()) {
+        if (benchmark.should_print()) {
+            std::cerr
+                << "Input has to be a JSON array of benchmark configurations:\n"
+                << benchmark.get_example_config() << std::endl;
+        }
+        std::exit(1);
+    }
+    for (const auto& test_case : test_cases) {
+        if (!test_case.is_object() || !benchmark.validate_config(test_case)) {
+            if (benchmark.should_print()) {
+                std::cerr << "Invalid test case:\n"
+                          << std::setw(4) << test_case << "\nInput format:\n"
+                          << benchmark.get_example_config() << std::endl;
+            }
+            std::exit(2);
+        }
+    }
+
+    auto profiler_hook = create_profiler_hook(exec, benchmark.should_print());
+    if (profiler_hook) {
+        exec->add_logger(profiler_hook);
+    }
+    auto annotate =
+        [profiler_hook](const char* name) -> gko::log::profiling_scope_guard {
+        if (profiler_hook) {
+            return profiler_hook->user_range(name);
+        }
+        return {};
+    };
+
+    for (auto& test_case : test_cases) {
+        try {
+            // set up benchmark
+            if (!test_case.contains(benchmark.get_name())) {
+                test_case[benchmark.get_name()] = json::object();
+            }
+            if (benchmark.should_print()) {
+                std::clog << "Running test case\n"
+                          << std::setw(4) << test_case << std::endl;
+            }
+            auto test_case_state = benchmark.setup(exec, test_case);
+            auto test_case_str = benchmark.describe_config(test_case);
+            auto test_case_range = annotate(test_case_str.c_str());
+            auto& benchmark_case = test_case[benchmark.get_name()];
+            for (const auto& operation_name : benchmark.get_operations()) {
+                if (benchmark_case.contains(operation_name) &&
+                    !FLAGS_overwrite) {
+                    continue;
+                }
+                benchmark_case[operation_name] = json::object();
+                if (benchmark.should_print()) {
+                    std::clog << "\tRunning " << benchmark.get_name() << ": "
+                              << operation_name << std::endl;
+                }
+                auto& operation_case = benchmark_case[operation_name];
+                try {
+                    auto operation_range = annotate(operation_name.c_str());
+                    benchmark.run(exec, timer, test_case_state, operation_name,
+                                  operation_case);
+                    operation_case["completed"] = true;
+                } catch (const std::exception& e) {
+                    operation_case["completed"] = false;
+                    operation_case["error_type"] =
+                        gko::name_demangling::get_dynamic_type(e);
+                    operation_case["error"] = e.what();
+                    std::cerr << "Error when processing test case\n"
+                              << std::setw(4) << test_case << "\n"
+                              << "what(): " << e.what() << std::endl;
+                }
+
+                if (benchmark.should_print()) {
+                    backup_results(test_cases);
+                }
+            }
+            benchmark.postprocess(test_case);
+        } catch (const std::exception& e) {
+            std::cerr << "Error setting up benchmark, what(): " << e.what()
+                      << std::endl;
+            test_case["error_type"] = gko::name_demangling::get_dynamic_type(e);
+            test_case["error"] = e.what();
+        }
+    }
+
+    if (profiler_hook) {
+        exec->remove_logger(profiler_hook);
+    }
+}
+
+
+#endif  // GKO_BENCHMARK_UTILS_RUNNER_HPP_
diff --git a/benchmark/utils/spmv_validation.hpp b/benchmark/utils/spmv_validation.hpp
deleted file mode 100644
index 83ea2085ec2..00000000000
--- a/benchmark/utils/spmv_validation.hpp
+++ /dev/null
@@ -1,83 +0,0 @@
-/*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2023, the Ginkgo authors
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
-1. Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
-notice, this list of conditions and the following disclaimer in the
-documentation and/or other materials provided with the distribution.
-
-3. Neither the name of the copyright holder nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-******************************<GINKGO LICENSE>*******************************/
-
-#ifndef GKO_BENCHMARK_UTILS_SPMV_VALIDATION_HPP_
-#define GKO_BENCHMARK_UTILS_SPMV_VALIDATION_HPP_
-
-
-#include <ginkgo/ginkgo.hpp>
-
-
-#include <cstdlib>
-#include <iostream>
-
-
-#include <rapidjson/document.h>
-
-
-std::string example_config = R"(
-  [
-    {"filename": "my_file.mtx"},
-    {"filename": "my_file2.mtx"},
-    {"size": 100, "stencil": "7pt"},
-  ]
-)";
-
-
-/**
- * Function which outputs the input format for benchmarks similar to the spmv.
- */
-[[noreturn]] void print_config_error_and_exit()
-{
-    std::cerr << "Input has to be a JSON array of matrix configurations:\n"
-              << example_config << std::endl;
-    std::exit(1);
-}
-
-
-/**
- * Validates whether the input format is correct for spmv-like benchmarks.
- *
- * @param value  the JSON value to test.
- */
-void validate_option_object(const rapidjson::Value& value)
-{
-    if (!value.IsObject() ||
-        !((value.HasMember("size") && value.HasMember("stencil") &&
-           value["size"].IsInt64() && value["stencil"].IsString()) ||
-          (value.HasMember("filename") && value["filename"].IsString()))) {
-        print_config_error_and_exit();
-    }
-}
-
-
-#endif  // GKO_BENCHMARK_UTILS_SPMV_VALIDATION_HPP_
diff --git a/third_party/CMakeLists.txt b/third_party/CMakeLists.txt
index a54d4d506ee..828f95bc8ca 100644
--- a/third_party/CMakeLists.txt
+++ b/third_party/CMakeLists.txt
@@ -14,8 +14,8 @@ if(GINKGO_BUILD_BENCHMARKS)
     if (NOT gflags_FOUND)
         add_subdirectory(gflags)
     endif()
-    if (NOT RapidJSON_FOUND)
-        add_subdirectory(rapidjson)
+    if (NOT nlohmann_json_FOUND)
+        add_subdirectory(nlohmann_json)
     endif()
 endif()
 
diff --git a/third_party/nlohmann_json/CMakeLists.txt b/third_party/nlohmann_json/CMakeLists.txt
new file mode 100644
index 00000000000..77064c66c40
--- /dev/null
+++ b/third_party/nlohmann_json/CMakeLists.txt
@@ -0,0 +1,9 @@
+message(STATUS "Fetching external nlohmann_json")
+include(FetchContent)
+FetchContent_Declare(
+    nlohmann_json
+    GIT_REPOSITORY https://github.com/nlohmann/json.git
+    GIT_TAG        bc889afb4c5bf1c0d8ee29ef35eaaf4c8bef8a5d
+)
+set(JSON_BuildTests OFF CACHE INTERNAL "")
+FetchContent_MakeAvailable(nlohmann_json)
diff --git a/third_party/rapidjson/CMakeLists.txt b/third_party/rapidjson/CMakeLists.txt
deleted file mode 100644
index a96b90cb882..00000000000
--- a/third_party/rapidjson/CMakeLists.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-message(STATUS "Fetching external RapidJSON")
-include(FetchContent)
-FetchContent_Declare(
-    rapidjson
-    GIT_REPOSITORY https://github.com/Tencent/rapidjson.git
-    GIT_TAG        27c3a8dc0e2c9218fe94986d249a12b5ed838f1d
-)
-FetchContent_GetProperties(rapidjson)
-if(NOT rapidjson_POPULATED)
-    FetchContent_Populate(rapidjson)
-endif()
-set(RapidJSON_INCLUDE_DIR "${rapidjson_SOURCE_DIR}/include")
-add_library(rapidjson INTERFACE)
-set_target_properties(rapidjson PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${RapidJSON_INCLUDE_DIR}")

From d25a7573a6fd071e74a6f4e81028615c59cb2ab1 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 27 Jul 2023 23:45:43 +0200
Subject: [PATCH 245/583] add distributed tests again

This reverts commit 0dab7626e920bfdf32a2285ff5741da1e36404cb.
Additionally replaces the JSON test case output by their description
---
 benchmark/test/CMakeLists.txt                 |   4 +-
 benchmark/test/input.distributed_mtx.json     |   7 +
 benchmark/test/multi_vector_distributed.py    |  38 ++
 benchmark/test/reference/blas.profile.stderr  |   6 +-
 benchmark/test/reference/blas.simple.stderr   |   6 +-
 .../test/reference/conversion.all.stderr      |   7 +-
 .../test/reference/conversion.profile.stderr  |   7 +-
 .../test/reference/conversion.simple.stderr   |   7 +-
 .../distributed_solver.profile.stderr         |  11 +-
 .../distributed_solver.simple.stderr          |  11 +-
 .../reference/matrix_statistics.simple.stderr |   7 +-
 .../multi_vector_distributed.profile.stderr   | 254 ++++++++++
 .../multi_vector_distributed.profile.stdout   |  29 ++
 .../multi_vector_distributed.simple.stderr    |  10 +
 .../multi_vector_distributed.simple.stdout    |  29 ++
 .../reference/preconditioner.profile.stderr   |   7 +-
 .../reference/preconditioner.simple.stderr    |   7 +-
 .../test/reference/solver.profile.stderr      |  10 +-
 benchmark/test/reference/solver.simple.stderr |  10 +-
 .../test/reference/sparse_blas.profile.stderr |   7 +-
 .../test/reference/sparse_blas.simple.stderr  |   7 +-
 benchmark/test/reference/spmv.profile.stderr  |   7 +-
 benchmark/test/reference/spmv.simple.stderr   |   7 +-
 .../reference/spmv_distributed.profile.stderr | 446 ++++++++++++++++++
 .../reference/spmv_distributed.profile.stdout |  22 +
 .../reference/spmv_distributed.simple.stderr  |  10 +
 .../reference/spmv_distributed.simple.stdout  |  23 +
 benchmark/test/spmv_distributed.py            |  42 ++
 benchmark/test/test_framework.py.in           |   2 +-
 benchmark/utils/general.hpp                   |  39 --
 benchmark/utils/runner.hpp                    |  10 +-
 31 files changed, 935 insertions(+), 154 deletions(-)
 create mode 100644 benchmark/test/input.distributed_mtx.json
 create mode 100644 benchmark/test/multi_vector_distributed.py
 create mode 100644 benchmark/test/reference/multi_vector_distributed.profile.stderr
 create mode 100644 benchmark/test/reference/multi_vector_distributed.profile.stdout
 create mode 100644 benchmark/test/reference/multi_vector_distributed.simple.stderr
 create mode 100644 benchmark/test/reference/multi_vector_distributed.simple.stdout
 create mode 100644 benchmark/test/reference/spmv_distributed.profile.stderr
 create mode 100644 benchmark/test/reference/spmv_distributed.profile.stdout
 create mode 100644 benchmark/test/reference/spmv_distributed.simple.stderr
 create mode 100644 benchmark/test/reference/spmv_distributed.simple.stdout
 create mode 100644 benchmark/test/spmv_distributed.py

diff --git a/benchmark/test/CMakeLists.txt b/benchmark/test/CMakeLists.txt
index e1aab6dd75d..1cd589927fa 100644
--- a/benchmark/test/CMakeLists.txt
+++ b/benchmark/test/CMakeLists.txt
@@ -22,5 +22,7 @@ add_benchmark_test(solver)
 add_benchmark_test(sparse_blas)
 add_benchmark_test(spmv)
 if (GINKGO_BUILD_MPI)
+    add_benchmark_test(multi_vector_distributed)
+    add_benchmark_test(spmv_distributed)
     add_benchmark_test(solver_distributed)
-endif()
+endif()
\ No newline at end of file
diff --git a/benchmark/test/input.distributed_mtx.json b/benchmark/test/input.distributed_mtx.json
new file mode 100644
index 00000000000..aca115179e6
--- /dev/null
+++ b/benchmark/test/input.distributed_mtx.json
@@ -0,0 +1,7 @@
+[
+    {
+        "size": 100,
+        "stencil": "7pt",
+        "comm_pattern": "stencil"
+    }
+]
\ No newline at end of file
diff --git a/benchmark/test/multi_vector_distributed.py b/benchmark/test/multi_vector_distributed.py
new file mode 100644
index 00000000000..1e0c4c8adf5
--- /dev/null
+++ b/benchmark/test/multi_vector_distributed.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+import test_framework
+
+# check that all input modes work:
+# parameter
+test_framework.compare_output_distributed(
+    ["-input", '[{"n": 100}]'],
+    expected_stdout="multi_vector_distributed.simple.stdout",
+    expected_stderr="multi_vector_distributed.simple.stderr",
+    num_procs=3,
+)
+
+# stdin
+test_framework.compare_output_distributed(
+    [],
+    expected_stdout="multi_vector_distributed.simple.stdout",
+    expected_stderr="multi_vector_distributed.simple.stderr",
+    stdin='[{"n": 100}]',
+    num_procs=3,
+)
+
+# file
+test_framework.compare_output_distributed(
+    ["-input", str(test_framework.sourcepath / "input.blas.json")],
+    expected_stdout="multi_vector_distributed.simple.stdout",
+    expected_stderr="multi_vector_distributed.simple.stderr",
+    stdin='[{"n": 100}]',
+    num_procs=3,
+)
+
+# profiler annotations
+test_framework.compare_output_distributed(
+    ["-input", '[{"n": 100}]', "-profile", "-profiler_hook", "debug"],
+    expected_stdout="multi_vector_distributed.profile.stdout",
+    expected_stderr="multi_vector_distributed.profile.stderr",
+    stdin='[{"n": 100}]',
+    num_procs=3,
+)
diff --git a/benchmark/test/reference/blas.profile.stderr b/benchmark/test/reference/blas.profile.stderr
index b64f4321287..1313c85e462 100644
--- a/benchmark/test/reference/blas.profile.stderr
+++ b/benchmark/test/reference/blas.profile.stderr
@@ -4,11 +4,7 @@ Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
 The operations are copy,axpy,scal
-Running test case
-{
-    "n": 100,
-    "blas": {}
-}
+Running test case n = 100 
 DEBUG: begin n = 100 
 	Running blas: copy
 DEBUG: begin copy
diff --git a/benchmark/test/reference/blas.simple.stderr b/benchmark/test/reference/blas.simple.stderr
index f41b25c6ee1..966ed597166 100644
--- a/benchmark/test/reference/blas.simple.stderr
+++ b/benchmark/test/reference/blas.simple.stderr
@@ -4,11 +4,7 @@ Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
 The operations are copy,axpy,scal
-Running test case
-{
-    "n": 100,
-    "blas": {}
-}
+Running test case n = 100 
 	Running blas: copy
 	Running blas: axpy
 	Running blas: scal
diff --git a/benchmark/test/reference/conversion.all.stderr b/benchmark/test/reference/conversion.all.stderr
index 1d5df7477ba..77ff50a1b89 100644
--- a/benchmark/test/reference/conversion.all.stderr
+++ b/benchmark/test/reference/conversion.all.stderr
@@ -4,12 +4,7 @@ Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
 The formats are coo,csr,ell,sellp,hybrid
-Running test case
-{
-    "size": 100,
-    "stencil": "7pt",
-    "conversion": {}
-}
+Running test case stencil(100,7pt)
 Matrix is of size (125, 125), 725
 	Running conversion: coo-read
 	Running conversion: coo-csr
diff --git a/benchmark/test/reference/conversion.profile.stderr b/benchmark/test/reference/conversion.profile.stderr
index 089e6be02f9..6078dd3db2f 100644
--- a/benchmark/test/reference/conversion.profile.stderr
+++ b/benchmark/test/reference/conversion.profile.stderr
@@ -4,12 +4,7 @@ Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
 The formats are coo,csr
-Running test case
-{
-    "size": 100,
-    "stencil": "7pt",
-    "conversion": {}
-}
+Running test case stencil(100,7pt)
 Matrix is of size (125, 125), 725
 DEBUG: begin stencil(100,7pt)
 	Running conversion: coo-read
diff --git a/benchmark/test/reference/conversion.simple.stderr b/benchmark/test/reference/conversion.simple.stderr
index a814dba6888..9b51effac09 100644
--- a/benchmark/test/reference/conversion.simple.stderr
+++ b/benchmark/test/reference/conversion.simple.stderr
@@ -4,12 +4,7 @@ Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
 The formats are coo,csr
-Running test case
-{
-    "size": 100,
-    "stencil": "7pt",
-    "conversion": {}
-}
+Running test case stencil(100,7pt)
 Matrix is of size (125, 125), 725
 	Running conversion: coo-read
 	Running conversion: coo-csr
diff --git a/benchmark/test/reference/distributed_solver.profile.stderr b/benchmark/test/reference/distributed_solver.profile.stderr
index e583a1411a8..1daab773a38 100644
--- a/benchmark/test/reference/distributed_solver.profile.stderr
+++ b/benchmark/test/reference/distributed_solver.profile.stderr
@@ -5,16 +5,7 @@ Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
 Running cg with 1000 iterations and residual goal of 1.000000e-06
 The number of right hand sides is 1
-Running test case
-{
-    "size": 100,
-    "stencil": "7pt",
-    "comm_pattern": "stencil",
-    "optimal": {
-        "spmv": "csr-csr"
-    },
-    "solver": {}
-}
+Running test case stencil(100,7pt,stencil)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin partition::build_ranges_from_global_size
diff --git a/benchmark/test/reference/distributed_solver.simple.stderr b/benchmark/test/reference/distributed_solver.simple.stderr
index 9feb7fa9522..607081a3949 100644
--- a/benchmark/test/reference/distributed_solver.simple.stderr
+++ b/benchmark/test/reference/distributed_solver.simple.stderr
@@ -5,15 +5,6 @@ Running with 2 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
 Running cg with 1000 iterations and residual goal of 1.000000e-06
 The number of right hand sides is 1
-Running test case
-{
-    "size": 100,
-    "stencil": "7pt",
-    "comm_pattern": "stencil",
-    "optimal": {
-        "spmv": "csr-csr"
-    },
-    "solver": {}
-}
+Running test case stencil(100,7pt,stencil)
 Matrix is of size (125, 125)
 	Running solver: cg
diff --git a/benchmark/test/reference/matrix_statistics.simple.stderr b/benchmark/test/reference/matrix_statistics.simple.stderr
index 75a7cca709f..d02edbc44da 100644
--- a/benchmark/test/reference/matrix_statistics.simple.stderr
+++ b/benchmark/test/reference/matrix_statistics.simple.stderr
@@ -1,9 +1,4 @@
 This is Ginkgo 1.7.0 (develop)
     running with core module 1.7.0 (develop)
-Running test case
-{
-    "size": 100,
-    "stencil": "7pt",
-    "problem": {}
-}
+Running test case stencil(100,7pt)
 Matrix is of size (125, 125), 725
diff --git a/benchmark/test/reference/multi_vector_distributed.profile.stderr b/benchmark/test/reference/multi_vector_distributed.profile.stderr
new file mode 100644
index 00000000000..a77484daacb
--- /dev/null
+++ b/benchmark/test/reference/multi_vector_distributed.profile.stderr
@@ -0,0 +1,254 @@
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
+Running on reference(0)
+Running with 0 warm iterations and 1 running iterations
+The random seed for right hand sides is 42
+The operations are copy,axpy,scal
+Running test case n = 100 
+DEBUG: begin n = 100 
+	Running blas: copy
+DEBUG: begin copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   copy
+	Running blas: axpy
+DEBUG: begin axpy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::add_scaled
+DEBUG: end   dense::add_scaled
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   axpy
+	Running blas: scal
+DEBUG: begin scal
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::scale
+DEBUG: end   dense::scale
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   scal
+DEBUG: end   n = 100 
diff --git a/benchmark/test/reference/multi_vector_distributed.profile.stdout b/benchmark/test/reference/multi_vector_distributed.profile.stdout
new file mode 100644
index 00000000000..3a2e7e54f80
--- /dev/null
+++ b/benchmark/test/reference/multi_vector_distributed.profile.stdout
@@ -0,0 +1,29 @@
+
+[
+    {
+        "n": 100,
+        "blas": {
+            "copy": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 1,
+                "completed": true
+            },
+            "axpy": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 1,
+                "completed": true
+            },
+            "scal": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 1,
+                "completed": true
+            }
+        }
+    }
+]
diff --git a/benchmark/test/reference/multi_vector_distributed.simple.stderr b/benchmark/test/reference/multi_vector_distributed.simple.stderr
new file mode 100644
index 00000000000..966ed597166
--- /dev/null
+++ b/benchmark/test/reference/multi_vector_distributed.simple.stderr
@@ -0,0 +1,10 @@
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
+Running on reference(0)
+Running with 2 warm iterations and 10 running iterations
+The random seed for right hand sides is 42
+The operations are copy,axpy,scal
+Running test case n = 100 
+	Running blas: copy
+	Running blas: axpy
+	Running blas: scal
diff --git a/benchmark/test/reference/multi_vector_distributed.simple.stdout b/benchmark/test/reference/multi_vector_distributed.simple.stdout
new file mode 100644
index 00000000000..08e692727fe
--- /dev/null
+++ b/benchmark/test/reference/multi_vector_distributed.simple.stdout
@@ -0,0 +1,29 @@
+
+[
+    {
+        "n": 100,
+        "blas": {
+            "copy": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "axpy": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 10,
+                "completed": true
+            },
+            "scal": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        }
+    }
+]
diff --git a/benchmark/test/reference/preconditioner.profile.stderr b/benchmark/test/reference/preconditioner.profile.stderr
index c215b22c925..def3a83993d 100644
--- a/benchmark/test/reference/preconditioner.profile.stderr
+++ b/benchmark/test/reference/preconditioner.profile.stderr
@@ -4,12 +4,7 @@ Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
 Running with preconditioners: none
-Running test case
-{
-    "size": 100,
-    "stencil": "7pt",
-    "preconditioner": {}
-}
+Running test case stencil(100,7pt)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin components::fill_array
diff --git a/benchmark/test/reference/preconditioner.simple.stderr b/benchmark/test/reference/preconditioner.simple.stderr
index 07d2cca6704..0090e180d2b 100644
--- a/benchmark/test/reference/preconditioner.simple.stderr
+++ b/benchmark/test/reference/preconditioner.simple.stderr
@@ -4,11 +4,6 @@ Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
 Running with preconditioners: none
-Running test case
-{
-    "size": 100,
-    "stencil": "7pt",
-    "preconditioner": {}
-}
+Running test case stencil(100,7pt)
 Matrix is of size (125, 125), 725
 	Running preconditioner: none
diff --git a/benchmark/test/reference/solver.profile.stderr b/benchmark/test/reference/solver.profile.stderr
index 0c3f7060796..43ff852f68e 100644
--- a/benchmark/test/reference/solver.profile.stderr
+++ b/benchmark/test/reference/solver.profile.stderr
@@ -5,15 +5,7 @@ Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
 Running cg with 1000 iterations and residual goal of 1.000000e-06
 The number of right hand sides is 1
-Running test case
-{
-    "size": 100,
-    "stencil": "7pt",
-    "optimal": {
-        "spmv": "csr"
-    },
-    "solver": {}
-}
+Running test case stencil(100,7pt)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin components::fill_array
diff --git a/benchmark/test/reference/solver.simple.stderr b/benchmark/test/reference/solver.simple.stderr
index c5e4267a6bd..659dd026588 100644
--- a/benchmark/test/reference/solver.simple.stderr
+++ b/benchmark/test/reference/solver.simple.stderr
@@ -5,14 +5,6 @@ Running with 2 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
 Running cg with 1000 iterations and residual goal of 1.000000e-06
 The number of right hand sides is 1
-Running test case
-{
-    "size": 100,
-    "stencil": "7pt",
-    "optimal": {
-        "spmv": "csr"
-    },
-    "solver": {}
-}
+Running test case stencil(100,7pt)
 Matrix is of size (125, 125)
 	Running solver: cg
diff --git a/benchmark/test/reference/sparse_blas.profile.stderr b/benchmark/test/reference/sparse_blas.profile.stderr
index d1434dad146..c47ce2a515b 100644
--- a/benchmark/test/reference/sparse_blas.profile.stderr
+++ b/benchmark/test/reference/sparse_blas.profile.stderr
@@ -4,12 +4,7 @@ Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
 The operations are transpose
-Running test case
-{
-    "size": 100,
-    "stencil": "7pt",
-    "sparse_blas": {}
-}
+Running test case stencil(100,7pt)
 Matrix is of size (125, 125), 725
 DEBUG: begin allocate
 DEBUG: end   allocate
diff --git a/benchmark/test/reference/sparse_blas.simple.stderr b/benchmark/test/reference/sparse_blas.simple.stderr
index 452374a9268..1f2bb34809f 100644
--- a/benchmark/test/reference/sparse_blas.simple.stderr
+++ b/benchmark/test/reference/sparse_blas.simple.stderr
@@ -4,11 +4,6 @@ Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
 The operations are transpose
-Running test case
-{
-    "size": 100,
-    "stencil": "7pt",
-    "sparse_blas": {}
-}
+Running test case stencil(100,7pt)
 Matrix is of size (125, 125), 725
 	Running sparse_blas: transpose
diff --git a/benchmark/test/reference/spmv.profile.stderr b/benchmark/test/reference/spmv.profile.stderr
index 09a10b725ea..4ff0125782f 100644
--- a/benchmark/test/reference/spmv.profile.stderr
+++ b/benchmark/test/reference/spmv.profile.stderr
@@ -5,12 +5,7 @@ Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
 The formats are coo
 The number of right hand sides is 1
-Running test case
-{
-    "size": 100,
-    "stencil": "7pt",
-    "spmv": {}
-}
+Running test case stencil(100,7pt)
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin allocate
diff --git a/benchmark/test/reference/spmv.simple.stderr b/benchmark/test/reference/spmv.simple.stderr
index a910512ff31..9d5047febb6 100644
--- a/benchmark/test/reference/spmv.simple.stderr
+++ b/benchmark/test/reference/spmv.simple.stderr
@@ -5,11 +5,6 @@ Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
 The formats are coo
 The number of right hand sides is 1
-Running test case
-{
-    "size": 100,
-    "stencil": "7pt",
-    "spmv": {}
-}
+Running test case stencil(100,7pt)
 Matrix is of size (125, 125), 725
 	Running spmv: coo
diff --git a/benchmark/test/reference/spmv_distributed.profile.stderr b/benchmark/test/reference/spmv_distributed.profile.stderr
new file mode 100644
index 00000000000..95a07c8275c
--- /dev/null
+++ b/benchmark/test/reference/spmv_distributed.profile.stderr
@@ -0,0 +1,446 @@
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
+Running on reference(0)
+Running with 0 warm iterations and 1 running iterations
+The random seed for right hand sides is 42
+The formats are [csr]x[csr]
+The number of right hand sides is 1
+Running test case stencil(100,7pt,stencil)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill_in_matrix_data
+DEBUG: end   dense::fill_in_matrix_data
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin dense::fill_in_matrix_data
+DEBUG: end   dense::fill_in_matrix_data
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+Matrix is of size (81, 81), 144
+DEBUG: begin stencil(100,7pt,stencil)
+	Running spmv: csr-csr
+DEBUG: begin csr-csr
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin partition::build_ranges_from_global_size
+DEBUG: end   partition::build_ranges_from_global_size
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin partition::build_from_contiguous
+DEBUG: end   partition::build_from_contiguous
+DEBUG: begin partition::build_starting_indices
+DEBUG: end   partition::build_starting_indices
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin copy(<typename>)
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: end   copy(<typename>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::fill_array
+DEBUG: end   components::fill_array
+DEBUG: begin copy(<typename>)
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: end   copy(<typename>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::fill
+DEBUG: end   dense::fill
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin distributed_matrix::build_local_nonlocal
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   distributed_matrix::build_local_nonlocal
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin components::convert_idxs_to_ptrs
+DEBUG: end   components::convert_idxs_to_ptrs
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin components::convert_idxs_to_ptrs
+DEBUG: end   components::convert_idxs_to_ptrs
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy
+DEBUG: end   copy
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin copy(<typename>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::copy
+DEBUG: end   dense::copy
+DEBUG: end   copy(<typename>)
+DEBUG: begin apply(<typename>)
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin dense::row_gather
+DEBUG: end   dense::row_gather
+DEBUG: begin apply(<typename>)
+DEBUG: begin csr::spmv
+DEBUG: end   csr::spmv
+DEBUG: end   apply(<typename>)
+DEBUG: begin advanced_apply(<typename>)
+DEBUG: begin csr::advanced_spmv
+DEBUG: end   csr::advanced_spmv
+DEBUG: end   advanced_apply(<typename>)
+DEBUG: end   apply(<typename>)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: end   csr-csr
+DEBUG: end   stencil(100,7pt,stencil)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
diff --git a/benchmark/test/reference/spmv_distributed.profile.stdout b/benchmark/test/reference/spmv_distributed.profile.stdout
new file mode 100644
index 00000000000..ebacddb887c
--- /dev/null
+++ b/benchmark/test/reference/spmv_distributed.profile.stdout
@@ -0,0 +1,22 @@
+
+[
+    {
+        "size": 100,
+        "stencil": "7pt",
+        "comm_pattern": "stencil",
+        "spmv": {
+            "csr-csr": {
+                "storage": 6420,
+                "time": 1.0,
+                "repetitions": 1,
+                "completed": true
+            }
+        },
+        "rows": 81,
+        "cols": 81,
+        "nonzeros": 144,
+        "optimal": {
+            "spmv": "csr-csr"
+        }
+    }
+]
diff --git a/benchmark/test/reference/spmv_distributed.simple.stderr b/benchmark/test/reference/spmv_distributed.simple.stderr
new file mode 100644
index 00000000000..0df742d5b9b
--- /dev/null
+++ b/benchmark/test/reference/spmv_distributed.simple.stderr
@@ -0,0 +1,10 @@
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
+Running on reference(0)
+Running with 2 warm iterations and 10 running iterations
+The random seed for right hand sides is 42
+The formats are [csr]x[csr]
+The number of right hand sides is 1
+Running test case stencil(100,7pt,stencil)
+Matrix is of size (81, 81), 144
+	Running spmv: csr-csr
diff --git a/benchmark/test/reference/spmv_distributed.simple.stdout b/benchmark/test/reference/spmv_distributed.simple.stdout
new file mode 100644
index 00000000000..64203476f91
--- /dev/null
+++ b/benchmark/test/reference/spmv_distributed.simple.stdout
@@ -0,0 +1,23 @@
+
+[
+    {
+        "size": 100,
+        "stencil": "7pt",
+        "comm_pattern": "stencil",
+        "spmv": {
+            "csr-csr": {
+                "storage": 6420,
+                "max_relative_norm2": 1.0,
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        },
+        "rows": 81,
+        "cols": 81,
+        "nonzeros": 144,
+        "optimal": {
+            "spmv": "csr-csr"
+        }
+    }
+]
diff --git a/benchmark/test/spmv_distributed.py b/benchmark/test/spmv_distributed.py
new file mode 100644
index 00000000000..356db48459e
--- /dev/null
+++ b/benchmark/test/spmv_distributed.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+import test_framework
+
+# check that all input modes work:
+# parameter
+test_framework.compare_output_distributed(
+    ["-input", '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}]'],
+    expected_stdout="spmv_distributed.simple.stdout",
+    expected_stderr="spmv_distributed.simple.stderr",
+    num_procs=3,
+)
+
+# stdin
+test_framework.compare_output_distributed(
+    [],
+    expected_stdout="spmv_distributed.simple.stdout",
+    expected_stderr="spmv_distributed.simple.stderr",
+    num_procs=3,
+    stdin='[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}]',
+)
+
+# input file
+test_framework.compare_output_distributed(
+    ["-input", str(test_framework.sourcepath / "input.distributed_mtx.json")],
+    expected_stdout="spmv_distributed.simple.stdout",
+    expected_stderr="spmv_distributed.simple.stderr",
+    num_procs=3,
+)
+
+# profiler annotations
+test_framework.compare_output_distributed(
+    [
+        "-input",
+        '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}]',
+        "-profile",
+        "-profiler_hook",
+        "debug",
+    ],
+    expected_stdout="spmv_distributed.profile.stdout",
+    expected_stderr="spmv_distributed.profile.stderr",
+    num_procs=3,
+)
diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in
index da1b0bfd618..faf898a21cb 100644
--- a/benchmark/test/test_framework.py.in
+++ b/benchmark/test/test_framework.py.in
@@ -247,7 +247,7 @@ def compare_output(
 def compare_output_distributed(
     args, expected_stdout, expected_stderr, num_procs, stdin=""
 ):
-    compare_output(
+    compare_output_impl(
         args,
         expected_stdout,
         expected_stderr,
diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp
index 41acb560ba1..1c48680f883 100644
--- a/benchmark/utils/general.hpp
+++ b/benchmark/utils/general.hpp
@@ -245,45 +245,6 @@ std::shared_ptr<gko::log::ProfilerHook> create_profiler_hook(
 }
 
 
-struct owning_profiling_scope_guard {
-    std::string name;
-    gko::log::profiling_scope_guard guard;
-
-    owning_profiling_scope_guard() = default;
-
-    owning_profiling_scope_guard(std::string name_,
-                                 gko::log::ProfilerHook* profiler_hook)
-        : name(std::move(name_)), guard{profiler_hook->user_range(name.c_str())}
-    {}
-};
-
-
-struct annotate_functor {
-    owning_profiling_scope_guard operator()(std::string name) const
-    {
-        if (profiler_hook) {
-            return owning_profiling_scope_guard{std::move(name),
-                                                profiler_hook.get()};
-        }
-        return {};
-    }
-
-    gko::log::profiling_scope_guard operator()(const char* name) const
-    {
-        if (profiler_hook) {
-            return profiler_hook->user_range(name);
-        }
-        return {};
-    }
-
-    annotate_functor(std::shared_ptr<gko::log::ProfilerHook> profiler_hook)
-        : profiler_hook{std::move(profiler_hook)}
-    {}
-
-    std::shared_ptr<gko::log::ProfilerHook> profiler_hook;
-};
-
-
 // Returns a random number engine
 std::default_random_engine& get_engine()
 {
diff --git a/benchmark/utils/runner.hpp b/benchmark/utils/runner.hpp
index 3520f7299ee..661c403706f 100644
--- a/benchmark/utils/runner.hpp
+++ b/benchmark/utils/runner.hpp
@@ -153,13 +153,13 @@ void run_test_cases(const Benchmark<State>& benchmark,
             if (!test_case.contains(benchmark.get_name())) {
                 test_case[benchmark.get_name()] = json::object();
             }
+            auto test_case_desc = benchmark.describe_config(test_case);
             if (benchmark.should_print()) {
-                std::clog << "Running test case\n"
-                          << std::setw(4) << test_case << std::endl;
+                std::clog << "Running test case " << test_case_desc
+                          << std::endl;
             }
             auto test_case_state = benchmark.setup(exec, test_case);
-            auto test_case_str = benchmark.describe_config(test_case);
-            auto test_case_range = annotate(test_case_str.c_str());
+            auto test_case_range = annotate(test_case_desc.c_str());
             auto& benchmark_case = test_case[benchmark.get_name()];
             for (const auto& operation_name : benchmark.get_operations()) {
                 if (benchmark_case.contains(operation_name) &&
@@ -183,7 +183,7 @@ void run_test_cases(const Benchmark<State>& benchmark,
                         gko::name_demangling::get_dynamic_type(e);
                     operation_case["error"] = e.what();
                     std::cerr << "Error when processing test case\n"
-                              << std::setw(4) << test_case << "\n"
+                              << test_case_desc << "\n"
                               << "what(): " << e.what() << std::endl;
                 }
 

From 11134cdb9e240fc16ffe3cd7276a35cef0652a39 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 27 Jul 2023 23:44:32 +0200
Subject: [PATCH 246/583] handle JSON and non-JSON test output separately

---
 benchmark/test/reference/blas.profile.stdout  |  3 +-
 benchmark/test/reference/blas.simple.stdout   |  3 +-
 .../test/reference/conversion.all.stdout      |  3 +-
 .../test/reference/conversion.profile.stdout  |  3 +-
 .../test/reference/conversion.simple.stdout   |  3 +-
 .../distributed_solver.profile.stdout         |  3 +-
 .../distributed_solver.simple.stdout          |  3 +-
 .../reference/matrix_statistics.simple.stdout |  3 +-
 .../multi_vector_distributed.profile.stdout   |  3 +-
 .../multi_vector_distributed.simple.stdout    |  3 +-
 .../reference/preconditioner.profile.stdout   |  3 +-
 .../reference/preconditioner.simple.stdout    |  3 +-
 .../test/reference/solver.profile.stdout      |  3 +-
 benchmark/test/reference/solver.simple.stdout |  3 +-
 .../test/reference/sparse_blas.profile.stdout |  3 +-
 .../test/reference/sparse_blas.simple.stdout  |  3 +-
 benchmark/test/reference/spmv.profile.stdout  |  3 +-
 benchmark/test/reference/spmv.simple.stdout   |  3 +-
 .../reference/spmv_distributed.profile.stdout |  3 +-
 .../reference/spmv_distributed.simple.stdout  |  3 +-
 benchmark/test/test_framework.py.in           | 78 ++++++-------------
 21 files changed, 44 insertions(+), 94 deletions(-)

diff --git a/benchmark/test/reference/blas.profile.stdout b/benchmark/test/reference/blas.profile.stdout
index 3a2e7e54f80..8998d5eaed7 100644
--- a/benchmark/test/reference/blas.profile.stdout
+++ b/benchmark/test/reference/blas.profile.stdout
@@ -1,4 +1,3 @@
-
 [
     {
         "n": 100,
@@ -26,4 +25,4 @@
             }
         }
     }
-]
+]
\ No newline at end of file
diff --git a/benchmark/test/reference/blas.simple.stdout b/benchmark/test/reference/blas.simple.stdout
index 08e692727fe..a586a9bc57b 100644
--- a/benchmark/test/reference/blas.simple.stdout
+++ b/benchmark/test/reference/blas.simple.stdout
@@ -1,4 +1,3 @@
-
 [
     {
         "n": 100,
@@ -26,4 +25,4 @@
             }
         }
     }
-]
+]
\ No newline at end of file
diff --git a/benchmark/test/reference/conversion.all.stdout b/benchmark/test/reference/conversion.all.stdout
index c4b657a42c4..0c77d464793 100644
--- a/benchmark/test/reference/conversion.all.stdout
+++ b/benchmark/test/reference/conversion.all.stdout
@@ -1,4 +1,3 @@
-
 [
     {
         "size": 100,
@@ -74,4 +73,4 @@
         "cols": 125,
         "nonzeros": 725
     }
-]
+]
\ No newline at end of file
diff --git a/benchmark/test/reference/conversion.profile.stdout b/benchmark/test/reference/conversion.profile.stdout
index b29815f6c17..a9c3ea674fa 100644
--- a/benchmark/test/reference/conversion.profile.stdout
+++ b/benchmark/test/reference/conversion.profile.stdout
@@ -1,4 +1,3 @@
-
 [
     {
         "size": 100,
@@ -29,4 +28,4 @@
         "cols": 125,
         "nonzeros": 725
     }
-]
+]
\ No newline at end of file
diff --git a/benchmark/test/reference/conversion.simple.stdout b/benchmark/test/reference/conversion.simple.stdout
index 856f1330eea..81c735789d1 100644
--- a/benchmark/test/reference/conversion.simple.stdout
+++ b/benchmark/test/reference/conversion.simple.stdout
@@ -1,4 +1,3 @@
-
 [
     {
         "size": 100,
@@ -29,4 +28,4 @@
         "cols": 125,
         "nonzeros": 725
     }
-]
+]
\ No newline at end of file
diff --git a/benchmark/test/reference/distributed_solver.profile.stdout b/benchmark/test/reference/distributed_solver.profile.stdout
index aef92652256..55dfb1dc428 100644
--- a/benchmark/test/reference/distributed_solver.profile.stdout
+++ b/benchmark/test/reference/distributed_solver.profile.stdout
@@ -1,4 +1,3 @@
-
 [
     {
         "size": 100,
@@ -31,4 +30,4 @@
         "rows": 125,
         "cols": 125
     }
-]
+]
\ No newline at end of file
diff --git a/benchmark/test/reference/distributed_solver.simple.stdout b/benchmark/test/reference/distributed_solver.simple.stdout
index 002b9d91347..eed8d864388 100644
--- a/benchmark/test/reference/distributed_solver.simple.stdout
+++ b/benchmark/test/reference/distributed_solver.simple.stdout
@@ -1,4 +1,3 @@
-
 [
     {
         "size": 100,
@@ -57,4 +56,4 @@
         "rows": 125,
         "cols": 125
     }
-]
+]
\ No newline at end of file
diff --git a/benchmark/test/reference/matrix_statistics.simple.stdout b/benchmark/test/reference/matrix_statistics.simple.stdout
index 13746ce8a46..923bbc9f962 100644
--- a/benchmark/test/reference/matrix_statistics.simple.stdout
+++ b/benchmark/test/reference/matrix_statistics.simple.stdout
@@ -1,4 +1,3 @@
-
 [
     {
         "size": 100,
@@ -38,4 +37,4 @@
         "cols": 125,
         "nonzeros": 725
     }
-]
+]
\ No newline at end of file
diff --git a/benchmark/test/reference/multi_vector_distributed.profile.stdout b/benchmark/test/reference/multi_vector_distributed.profile.stdout
index 3a2e7e54f80..8998d5eaed7 100644
--- a/benchmark/test/reference/multi_vector_distributed.profile.stdout
+++ b/benchmark/test/reference/multi_vector_distributed.profile.stdout
@@ -1,4 +1,3 @@
-
 [
     {
         "n": 100,
@@ -26,4 +25,4 @@
             }
         }
     }
-]
+]
\ No newline at end of file
diff --git a/benchmark/test/reference/multi_vector_distributed.simple.stdout b/benchmark/test/reference/multi_vector_distributed.simple.stdout
index 08e692727fe..a586a9bc57b 100644
--- a/benchmark/test/reference/multi_vector_distributed.simple.stdout
+++ b/benchmark/test/reference/multi_vector_distributed.simple.stdout
@@ -1,4 +1,3 @@
-
 [
     {
         "n": 100,
@@ -26,4 +25,4 @@
             }
         }
     }
-]
+]
\ No newline at end of file
diff --git a/benchmark/test/reference/preconditioner.profile.stdout b/benchmark/test/reference/preconditioner.profile.stdout
index f53407d818d..e33a6502eea 100644
--- a/benchmark/test/reference/preconditioner.profile.stdout
+++ b/benchmark/test/reference/preconditioner.profile.stdout
@@ -1,4 +1,3 @@
-
 [
     {
         "size": 100,
@@ -22,4 +21,4 @@
         "cols": 125,
         "nonzeros": 725
     }
-]
+]
\ No newline at end of file
diff --git a/benchmark/test/reference/preconditioner.simple.stdout b/benchmark/test/reference/preconditioner.simple.stdout
index 92bb51ddb57..06291228a1c 100644
--- a/benchmark/test/reference/preconditioner.simple.stdout
+++ b/benchmark/test/reference/preconditioner.simple.stdout
@@ -1,4 +1,3 @@
-
 [
     {
         "size": 100,
@@ -30,4 +29,4 @@
         "cols": 125,
         "nonzeros": 725
     }
-]
+]
\ No newline at end of file
diff --git a/benchmark/test/reference/solver.profile.stdout b/benchmark/test/reference/solver.profile.stdout
index 0148e6ef092..906c74de5e7 100644
--- a/benchmark/test/reference/solver.profile.stdout
+++ b/benchmark/test/reference/solver.profile.stdout
@@ -1,4 +1,3 @@
-
 [
     {
         "size": 100,
@@ -30,4 +29,4 @@
         "rows": 125,
         "cols": 125
     }
-]
+]
\ No newline at end of file
diff --git a/benchmark/test/reference/solver.simple.stdout b/benchmark/test/reference/solver.simple.stdout
index b4e7b56b2bf..5d127fe4b78 100644
--- a/benchmark/test/reference/solver.simple.stdout
+++ b/benchmark/test/reference/solver.simple.stdout
@@ -1,4 +1,3 @@
-
 [
     {
         "size": 100,
@@ -54,4 +53,4 @@
         "rows": 125,
         "cols": 125
     }
-]
+]
\ No newline at end of file
diff --git a/benchmark/test/reference/sparse_blas.profile.stdout b/benchmark/test/reference/sparse_blas.profile.stdout
index 848fb503ed4..e9d48fde23d 100644
--- a/benchmark/test/reference/sparse_blas.profile.stdout
+++ b/benchmark/test/reference/sparse_blas.profile.stdout
@@ -1,4 +1,3 @@
-
 [
     {
         "size": 100,
@@ -16,4 +15,4 @@
         "cols": 125,
         "nonzeros": 725
     }
-]
+]
\ No newline at end of file
diff --git a/benchmark/test/reference/sparse_blas.simple.stdout b/benchmark/test/reference/sparse_blas.simple.stdout
index f39300ca35b..3cc5f774ebf 100644
--- a/benchmark/test/reference/sparse_blas.simple.stdout
+++ b/benchmark/test/reference/sparse_blas.simple.stdout
@@ -1,4 +1,3 @@
-
 [
     {
         "size": 100,
@@ -23,4 +22,4 @@
         "cols": 125,
         "nonzeros": 725
     }
-]
+]
\ No newline at end of file
diff --git a/benchmark/test/reference/spmv.profile.stdout b/benchmark/test/reference/spmv.profile.stdout
index 5302d54f9f0..409a92d4e33 100644
--- a/benchmark/test/reference/spmv.profile.stdout
+++ b/benchmark/test/reference/spmv.profile.stdout
@@ -1,4 +1,3 @@
-
 [
     {
         "size": 100,
@@ -18,4 +17,4 @@
             "spmv": "coo"
         }
     }
-]
+]
\ No newline at end of file
diff --git a/benchmark/test/reference/spmv.simple.stdout b/benchmark/test/reference/spmv.simple.stdout
index 737938d7c96..9601a15b331 100644
--- a/benchmark/test/reference/spmv.simple.stdout
+++ b/benchmark/test/reference/spmv.simple.stdout
@@ -1,4 +1,3 @@
-
 [
     {
         "size": 100,
@@ -19,4 +18,4 @@
             "spmv": "coo"
         }
     }
-]
+]
\ No newline at end of file
diff --git a/benchmark/test/reference/spmv_distributed.profile.stdout b/benchmark/test/reference/spmv_distributed.profile.stdout
index ebacddb887c..8de6a68ae8a 100644
--- a/benchmark/test/reference/spmv_distributed.profile.stdout
+++ b/benchmark/test/reference/spmv_distributed.profile.stdout
@@ -1,4 +1,3 @@
-
 [
     {
         "size": 100,
@@ -19,4 +18,4 @@
             "spmv": "csr-csr"
         }
     }
-]
+]
\ No newline at end of file
diff --git a/benchmark/test/reference/spmv_distributed.simple.stdout b/benchmark/test/reference/spmv_distributed.simple.stdout
index 64203476f91..f94e4b992a1 100644
--- a/benchmark/test/reference/spmv_distributed.simple.stdout
+++ b/benchmark/test/reference/spmv_distributed.simple.stdout
@@ -1,4 +1,3 @@
-
 [
     {
         "size": 100,
@@ -20,4 +19,4 @@
             "spmv": "csr-csr"
         }
     }
-]
+]
\ No newline at end of file
diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in
index faf898a21cb..3deb282297a 100644
--- a/benchmark/test/test_framework.py.in
+++ b/benchmark/test/test_framework.py.in
@@ -22,7 +22,8 @@ denumberify_paths = [
     "rhs_norm",
     "max_relative_norm2",
 ]
-empty_string_paths = ["error", "filename"]
+detypenameify_key_starts = ["generate(", "apply(", "advanced_apply(", "copy(", "check("]
+empty_string_paths = ["filename"]
 empty_array_paths = [
     "recurrent_residuals",
     "true_residuals",
@@ -31,6 +32,18 @@ empty_array_paths = [
 ]
 
 
+def sanitize_json_key(key: str):
+    """Applies sanitation to a single key.
+
+    Strings that start with a name in detypenameify_key_starts will be truncated
+    """
+
+    for start in detypenameify_key_starts:
+        if key.startswith(start):
+            return start + "<typename>)"
+    return key
+
+
 def sanitize_json_key_value(key: str, value, sanitize_all: bool):
     """Applies sanitation to a single key-value pair.
 
@@ -59,7 +72,7 @@ def sanitize_json(parsed_input, sanitize_all: bool = False):
 
     if isinstance(parsed_input, dict):
         return {
-            key: sanitize_json_key_value(key, value, sanitize_all)
+            sanitize_json_key(key): sanitize_json_key_value(key, value, sanitize_all)
             for key, value in parsed_input.items()
         }
     elif isinstance(parsed_input, list):
@@ -70,40 +83,15 @@ def sanitize_json(parsed_input, sanitize_all: bool = False):
         return parsed_input
 
 
-def sanitize_json_in_text(lines: List[str]) -> List[str]:
-    """Sanitizes all occurrences of JSON content inside text input.
+def determinize_json_text(input: str) -> List[str]:
+    """Sanitizes the given input JSON string.
 
-    Takes a list of text lines and detects any pretty-printed JSON output inside
-    (recognized by a single [, {, } or ] in an otherwise empty line).
-    The JSON output will be parsed and sanitized through sanitize_json(...)
+    The JSON values will be parsed and sanitized through sanitize_json(...)
     and pretty-printed to replace the original JSON input.
-    The function returns the resulting output.
     """
 
-    json_begins = [i for i, l in enumerate(lines) if l in ["[", "{"]]
-    json_ends = [i + 1 for i, l in enumerate(lines) if l in ["]", "}"]]
-    json_pairs = list(zip(json_begins, json_ends))
-    if len(json_pairs) == 0:
-        return lines
-    assert all(begin < end for begin, end in json_pairs)
-    nonjson_pairs = (
-        [(0, json_begins[0])]
-        + list(zip(json_ends[:-1], json_begins[1:]))
-        + [(json_ends[-1], len(lines))]
-    )
-    combined_pairs = sorted(
-        [(begin, end, False) for begin, end in nonjson_pairs]
-        + [(begin, end, True) for begin, end in json_pairs]
-    )
-    texts = [
-        ("\n".join(lines[begin:end]), do_sanitize)
-        for begin, end, do_sanitize in combined_pairs
-    ]
-    reconstructed = [
-        json.dumps(sanitize_json(json.loads(t)), indent=4) if do_sanitize else t
-        for t, do_sanitize in texts
-    ]
-    return "\n".join(reconstructed).split("\n")
+    result = json.dumps(sanitize_json(json.loads(input)), indent=4)
+    return result.splitlines()
 
 
 def determinize_text(
@@ -116,9 +104,6 @@ def determinize_text(
     Every input line matching an entry from ignore_patterns will be removed.
     Every line matching the first string in an entry from replace_patterns
     will be replaced by the second string.
-    Finally, the text will be passed to sanitize_json_in_text, which removes
-    nondeterministic parts from JSON objects/arrays in the input,
-    if it can be parsed correctly.
     The output is guaranteed to end with an empty line.
     """
 
@@ -137,10 +122,7 @@ def determinize_text(
             output_lines.append(line)
     if len(output_lines) == 0 or output_lines[-1] != "":
         output_lines.append("")
-    try:
-        return sanitize_json_in_text(output_lines)
-    except json.decoder.JSONDecodeError:
-        return output_lines
+    return output_lines
 
 
 def compare_output_impl(
@@ -173,13 +155,7 @@ def compare_output_impl(
     ]
     if generate:
         open(expected_stdout, "w").write(
-            "\n".join(
-                determinize_text(
-                    result.stdout.decode(),
-                    ignore_patterns=[],
-                    replace_patterns=typename_patterns,
-                )
-            )
+            "\n".join(determinize_json_text(result.stdout.decode()))
         )
         open(expected_stderr, "w").write(
             "\n".join(
@@ -192,19 +168,13 @@ def compare_output_impl(
         )
         print("GENERATED")
         return
-    result_stdout_processed = determinize_text(
-        result.stdout.decode(), ignore_patterns=[], replace_patterns=typename_patterns
-    )
+    result_stdout_processed = determinize_json_text(result.stdout.decode())
     result_stderr_processed = determinize_text(
         result.stderr.decode(),
         ignore_patterns=version_patterns,
         replace_patterns=typename_patterns,
     )
-    expected_stdout_processed = determinize_text(
-        open(expected_stdout).read(),
-        ignore_patterns=[],
-        replace_patterns=typename_patterns,
-    )
+    expected_stdout_processed = determinize_json_text(open(expected_stdout).read())
     expected_stderr_processed = determinize_text(
         open(expected_stderr).read(),
         ignore_patterns=version_patterns,

From 306792aa148ffa366e05369574fba70e19178f7f Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Fri, 28 Jul 2023 00:15:00 +0200
Subject: [PATCH 247/583] benchmark reads on device_matrix_data

---
 benchmark/conversion/conversion.cpp           | 11 +++--
 .../test/reference/conversion.profile.stderr  | 46 +++++++++++++++----
 2 files changed, 44 insertions(+), 13 deletions(-)

diff --git a/benchmark/conversion/conversion.cpp b/benchmark/conversion/conversion.cpp
index b9a5d5c46d6..5f03cb2b933 100644
--- a/benchmark/conversion/conversion.cpp
+++ b/benchmark/conversion/conversion.cpp
@@ -60,7 +60,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 using Generator = DefaultSystemGenerator<>;
 
 
-struct ConversionBenchmark : Benchmark<gko::matrix_data<etype, itype>> {
+struct ConversionBenchmark : Benchmark<gko::device_matrix_data<etype, itype>> {
     std::string name;
     std::vector<std::string> operations;
 
@@ -112,8 +112,8 @@ struct ConversionBenchmark : Benchmark<gko::matrix_data<etype, itype>> {
         return Generator::describe_config(test_case);
     }
 
-    gko::matrix_data<etype, itype> setup(std::shared_ptr<gko::Executor> exec,
-                                         json& test_case) const override
+    gko::device_matrix_data<etype, itype> setup(
+        std::shared_ptr<gko::Executor> exec, json& test_case) const override
     {
         gko::matrix_data<etype, itype> data;
         data = Generator::generate_matrix_data(test_case);
@@ -122,12 +122,13 @@ struct ConversionBenchmark : Benchmark<gko::matrix_data<etype, itype>> {
         test_case["rows"] = data.size[0];
         test_case["cols"] = data.size[1];
         test_case["nonzeros"] = data.nonzeros.size();
-        return data;
+        return gko::device_matrix_data<etype, itype>::create_from_host(exec,
+                                                                       data);
     }
 
 
     void run(std::shared_ptr<gko::Executor> exec, std::shared_ptr<Timer> timer,
-             gko::matrix_data<etype, itype>& data,
+             gko::device_matrix_data<etype, itype>& data,
              const std::string& operation_name,
              json& operation_case) const override
     {
diff --git a/benchmark/test/reference/conversion.profile.stderr b/benchmark/test/reference/conversion.profile.stderr
index 6078dd3db2f..ca80375c5bf 100644
--- a/benchmark/test/reference/conversion.profile.stderr
+++ b/benchmark/test/reference/conversion.profile.stderr
@@ -6,17 +6,29 @@ The random seed for right hand sides is 42
 The formats are coo,csr
 Running test case stencil(100,7pt)
 Matrix is of size (125, 125), 725
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin allocate
+DEBUG: end   allocate
+DEBUG: begin components::aos_to_soa
+DEBUG: end   components::aos_to_soa
 DEBUG: begin stencil(100,7pt)
 	Running conversion: coo-read
 DEBUG: begin coo-read
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin copy
+DEBUG: end   copy
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin copy
+DEBUG: end   copy
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin components::aos_to_soa
-DEBUG: end   components::aos_to_soa
+DEBUG: begin copy
+DEBUG: end   copy
 DEBUG: begin free
 DEBUG: end   free
 DEBUG: begin free
@@ -28,12 +40,16 @@ DEBUG: end   coo-read
 DEBUG: begin coo-csr
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin copy
+DEBUG: end   copy
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin copy
+DEBUG: end   copy
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin components::aos_to_soa
-DEBUG: end   components::aos_to_soa
+DEBUG: begin copy
+DEBUG: end   copy
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin components::fill_array
@@ -75,12 +91,16 @@ DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin copy
+DEBUG: end   copy
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin copy
+DEBUG: end   copy
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin components::aos_to_soa
-DEBUG: end   components::aos_to_soa
+DEBUG: begin copy
+DEBUG: end   copy
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
@@ -104,12 +124,16 @@ DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin copy
+DEBUG: end   copy
 DEBUG: begin allocate
 DEBUG: end   allocate
+DEBUG: begin copy
+DEBUG: end   copy
 DEBUG: begin allocate
 DEBUG: end   allocate
-DEBUG: begin components::aos_to_soa
-DEBUG: end   components::aos_to_soa
+DEBUG: begin copy
+DEBUG: end   copy
 DEBUG: begin allocate
 DEBUG: end   allocate
 DEBUG: begin free
@@ -146,3 +170,9 @@ DEBUG: begin free
 DEBUG: end   free
 DEBUG: end   csr-coo
 DEBUG: end   stencil(100,7pt)
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free
+DEBUG: begin free
+DEBUG: end   free

From 6a9e59de9c0e8726a01efc2ddc3645df5ce5c680 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 17 Aug 2023 21:17:05 +0200
Subject: [PATCH 248/583] remove allocations from output

they are sometimes implementation-dependent
for libstdc++ types
---
 benchmark/test/reference/blas.profile.stderr  |  28 --
 .../test/reference/conversion.profile.stderr  | 104 ------
 .../distributed_solver.profile.stderr         | 232 -------------
 .../multi_vector_distributed.profile.stderr   | 128 --------
 .../reference/preconditioner.profile.stderr   |  44 ---
 .../test/reference/solver.profile.stderr      | 132 --------
 .../test/reference/sparse_blas.profile.stderr |  36 --
 benchmark/test/reference/spmv.profile.stderr  |  48 ---
 .../reference/spmv_distributed.profile.stderr | 308 ------------------
 benchmark/test/test_framework.py.in           |  11 +-
 10 files changed, 6 insertions(+), 1065 deletions(-)

diff --git a/benchmark/test/reference/blas.profile.stderr b/benchmark/test/reference/blas.profile.stderr
index 1313c85e462..529fc16009c 100644
--- a/benchmark/test/reference/blas.profile.stderr
+++ b/benchmark/test/reference/blas.profile.stderr
@@ -8,27 +8,13 @@ Running test case n = 100
 DEBUG: begin n = 100 
 	Running blas: copy
 DEBUG: begin copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: end   copy
 	Running blas: axpy
 DEBUG: begin axpy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin dense::fill
@@ -37,28 +23,14 @@ DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin dense::add_scaled
 DEBUG: end   dense::add_scaled
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: end   axpy
 	Running blas: scal
 DEBUG: begin scal
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin dense::scale
 DEBUG: end   dense::scale
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: end   scal
 DEBUG: end   n = 100 
diff --git a/benchmark/test/reference/conversion.profile.stderr b/benchmark/test/reference/conversion.profile.stderr
index ca80375c5bf..a233579c721 100644
--- a/benchmark/test/reference/conversion.profile.stderr
+++ b/benchmark/test/reference/conversion.profile.stderr
@@ -6,173 +6,69 @@ The random seed for right hand sides is 42
 The formats are coo,csr
 Running test case stencil(100,7pt)
 Matrix is of size (125, 125), 725
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::aos_to_soa
 DEBUG: end   components::aos_to_soa
 DEBUG: begin stencil(100,7pt)
 	Running conversion: coo-read
 DEBUG: begin coo-read
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: end   coo-read
 	Running conversion: coo-csr
 DEBUG: begin coo-csr
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin components::convert_idxs_to_ptrs
 DEBUG: end   components::convert_idxs_to_ptrs
 DEBUG: end   copy(<typename>)
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: end   coo-csr
 	Running conversion: csr-read
 DEBUG: begin csr-read
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin components::convert_idxs_to_ptrs
 DEBUG: end   components::convert_idxs_to_ptrs
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: end   csr-read
 	Running conversion: csr-coo
 DEBUG: begin csr-coo
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin components::convert_idxs_to_ptrs
 DEBUG: end   components::convert_idxs_to_ptrs
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::convert_ptrs_to_idxs
 DEBUG: end   components::convert_ptrs_to_idxs
 DEBUG: end   copy(<typename>)
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: end   csr-coo
 DEBUG: end   stencil(100,7pt)
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
diff --git a/benchmark/test/reference/distributed_solver.profile.stderr b/benchmark/test/reference/distributed_solver.profile.stderr
index 1daab773a38..4ea20730117 100644
--- a/benchmark/test/reference/distributed_solver.profile.stderr
+++ b/benchmark/test/reference/distributed_solver.profile.stderr
@@ -6,18 +6,8 @@ The random seed for right hand sides is 42
 Running cg with 1000 iterations and residual goal of 1.000000e-06
 The number of right hand sides is 1
 Running test case stencil(100,7pt,stencil)
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin partition::build_ranges_from_global_size
 DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
@@ -32,18 +22,10 @@ DEBUG: begin partition::build_starting_indices
 DEBUG: end   partition::build_starting_indices
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin copy(<typename>)
@@ -56,8 +38,6 @@ DEBUG: end   copy
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: end   copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin copy(<typename>)
@@ -70,135 +50,29 @@ DEBUG: end   copy
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: end   copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::aos_to_soa
 DEBUG: end   components::aos_to_soa
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin distributed_matrix::build_local_nonlocal
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: end   distributed_matrix::build_local_nonlocal
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin components::convert_idxs_to_ptrs
 DEBUG: end   components::convert_idxs_to_ptrs
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin components::convert_idxs_to_ptrs
 DEBUG: end   components::convert_idxs_to_ptrs
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::aos_to_soa
 DEBUG: end   components::aos_to_soa
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin dense::fill_in_matrix_data
 DEBUG: end   dense::fill_in_matrix_data
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
@@ -206,62 +80,30 @@ Matrix is of size (125, 125)
 DEBUG: begin stencil(100,7pt,stencil)
 	Running solver: cg
 DEBUG: begin cg
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::compute_squared_norm2
 DEBUG: end   dense::compute_squared_norm2
 DEBUG: begin dense::compute_sqrt
 DEBUG: end   dense::compute_sqrt
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
 DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin generate(<typename>)
 DEBUG: begin generate(<typename>)
 DEBUG: end   generate(<typename>)
 DEBUG: end   generate(<typename>)
 DEBUG: begin apply(<typename>)
 DEBUG: begin iteration
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin cg::initialize
 DEBUG: end   cg::initialize
 DEBUG: begin advanced_apply(<typename>)
@@ -276,20 +118,10 @@ DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
 DEBUG: end   advanced_apply(<typename>)
 DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::compute_squared_norm2
 DEBUG: end   dense::compute_squared_norm2
 DEBUG: begin dense::compute_sqrt
 DEBUG: end   dense::compute_sqrt
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin apply(<typename>)
 DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
@@ -586,25 +418,9 @@ DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
 DEBUG: end   check(<typename>)
 DEBUG: end   check(<typename>)
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: end   iteration
 DEBUG: end   apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
@@ -620,59 +436,11 @@ DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
 DEBUG: end   advanced_apply(<typename>)
 DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::compute_squared_norm2
 DEBUG: end   dense::compute_squared_norm2
 DEBUG: begin dense::compute_sqrt
 DEBUG: end   dense::compute_sqrt
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: end   cg
 DEBUG: end   stencil(100,7pt,stencil)
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
diff --git a/benchmark/test/reference/multi_vector_distributed.profile.stderr b/benchmark/test/reference/multi_vector_distributed.profile.stderr
index a77484daacb..102330e38f4 100644
--- a/benchmark/test/reference/multi_vector_distributed.profile.stderr
+++ b/benchmark/test/reference/multi_vector_distributed.profile.stderr
@@ -8,18 +8,8 @@ Running test case n = 100
 DEBUG: begin n = 100 
 	Running blas: copy
 DEBUG: begin copy
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin partition::build_ranges_from_global_size
 DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
@@ -34,32 +24,10 @@ DEBUG: begin partition::build_starting_indices
 DEBUG: end   partition::build_starting_indices
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin partition::build_ranges_from_global_size
 DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
@@ -74,45 +42,17 @@ DEBUG: begin partition::build_starting_indices
 DEBUG: end   partition::build_starting_indices
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: end   copy
 	Running blas: axpy
 DEBUG: begin axpy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin partition::build_ranges_from_global_size
 DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
@@ -127,32 +67,10 @@ DEBUG: begin partition::build_starting_indices
 DEBUG: end   partition::build_starting_indices
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin partition::build_ranges_from_global_size
 DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
@@ -167,20 +85,8 @@ DEBUG: begin partition::build_starting_indices
 DEBUG: end   partition::build_starting_indices
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin dense::fill
@@ -189,29 +95,11 @@ DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin dense::add_scaled
 DEBUG: end   dense::add_scaled
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: end   axpy
 	Running blas: scal
 DEBUG: begin scal
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin partition::build_ranges_from_global_size
 DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
@@ -226,29 +114,13 @@ DEBUG: begin partition::build_starting_indices
 DEBUG: end   partition::build_starting_indices
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin dense::scale
 DEBUG: end   dense::scale
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: end   scal
 DEBUG: end   n = 100 
diff --git a/benchmark/test/reference/preconditioner.profile.stderr b/benchmark/test/reference/preconditioner.profile.stderr
index def3a83993d..610dfe464ec 100644
--- a/benchmark/test/reference/preconditioner.profile.stderr
+++ b/benchmark/test/reference/preconditioner.profile.stderr
@@ -5,50 +5,20 @@ Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
 Running with preconditioners: none
 Running test case stencil(100,7pt)
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::aos_to_soa
 DEBUG: end   components::aos_to_soa
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin components::convert_idxs_to_ptrs
 DEBUG: end   components::convert_idxs_to_ptrs
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::aos_to_soa
 DEBUG: end   components::aos_to_soa
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin dense::fill_in_matrix_data
 DEBUG: end   dense::fill_in_matrix_data
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin components::aos_to_soa
 DEBUG: end   components::aos_to_soa
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin dense::fill_in_matrix_data
@@ -58,8 +28,6 @@ DEBUG: begin stencil(100,7pt)
 	Running preconditioner: none
 DEBUG: begin none
 DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
@@ -71,17 +39,5 @@ DEBUG: begin dense::copy
 DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
 DEBUG: end   apply(<typename>)
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: end   none
 DEBUG: end   stencil(100,7pt)
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
diff --git a/benchmark/test/reference/solver.profile.stderr b/benchmark/test/reference/solver.profile.stderr
index 43ff852f68e..238591eb0c9 100644
--- a/benchmark/test/reference/solver.profile.stderr
+++ b/benchmark/test/reference/solver.profile.stderr
@@ -6,49 +6,19 @@ The random seed for right hand sides is 42
 Running cg with 1000 iterations and residual goal of 1.000000e-06
 The number of right hand sides is 1
 Running test case stencil(100,7pt)
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::aos_to_soa
 DEBUG: end   components::aos_to_soa
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin components::convert_idxs_to_ptrs
 DEBUG: end   components::convert_idxs_to_ptrs
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::aos_to_soa
 DEBUG: end   components::aos_to_soa
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin dense::fill_in_matrix_data
 DEBUG: end   dense::fill_in_matrix_data
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
@@ -56,78 +26,36 @@ Matrix is of size (125, 125)
 DEBUG: begin stencil(100,7pt)
 	Running solver: cg
 DEBUG: begin cg
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
 DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin generate(<typename>)
 DEBUG: begin generate(<typename>)
 DEBUG: end   generate(<typename>)
 DEBUG: end   generate(<typename>)
 DEBUG: begin apply(<typename>)
 DEBUG: begin iteration
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin cg::initialize
 DEBUG: end   cg::initialize
 DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
 DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin apply(<typename>)
 DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
@@ -352,25 +280,9 @@ DEBUG: begin residual_norm::residual_norm
 DEBUG: end   residual_norm::residual_norm
 DEBUG: end   check(<typename>)
 DEBUG: end   check(<typename>)
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: end   iteration
 DEBUG: end   apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
@@ -378,53 +290,9 @@ DEBUG: begin advanced_apply(<typename>)
 DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
 DEBUG: end   advanced_apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::compute_norm2_dispatch
 DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: end   cg
 DEBUG: end   stencil(100,7pt)
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
diff --git a/benchmark/test/reference/sparse_blas.profile.stderr b/benchmark/test/reference/sparse_blas.profile.stderr
index c47ce2a515b..60cf41ccbae 100644
--- a/benchmark/test/reference/sparse_blas.profile.stderr
+++ b/benchmark/test/reference/sparse_blas.profile.stderr
@@ -6,54 +6,18 @@ The random seed for right hand sides is 42
 The operations are transpose
 Running test case stencil(100,7pt)
 Matrix is of size (125, 125), 725
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::aos_to_soa
 DEBUG: end   components::aos_to_soa
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin components::convert_idxs_to_ptrs
 DEBUG: end   components::convert_idxs_to_ptrs
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin stencil(100,7pt)
 	Running sparse_blas: transpose
 DEBUG: begin transpose
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin csr::transpose
 DEBUG: end   csr::transpose
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: end   transpose
 DEBUG: end   stencil(100,7pt)
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
diff --git a/benchmark/test/reference/spmv.profile.stderr b/benchmark/test/reference/spmv.profile.stderr
index 4ff0125782f..2299614c6c4 100644
--- a/benchmark/test/reference/spmv.profile.stderr
+++ b/benchmark/test/reference/spmv.profile.stderr
@@ -6,61 +6,25 @@ The random seed for right hand sides is 42
 The formats are coo
 The number of right hand sides is 1
 Running test case stencil(100,7pt)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::aos_to_soa
 DEBUG: end   components::aos_to_soa
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin dense::fill_in_matrix_data
 DEBUG: end   dense::fill_in_matrix_data
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::aos_to_soa
 DEBUG: end   components::aos_to_soa
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin dense::fill_in_matrix_data
 DEBUG: end   dense::fill_in_matrix_data
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 Matrix is of size (125, 125), 725
 DEBUG: begin stencil(100,7pt)
 	Running spmv: coo
 DEBUG: begin coo
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::aos_to_soa
 DEBUG: end   components::aos_to_soa
 DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
@@ -68,17 +32,5 @@ DEBUG: begin apply(<typename>)
 DEBUG: begin coo::spmv
 DEBUG: end   coo::spmv
 DEBUG: end   apply(<typename>)
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: end   coo
 DEBUG: end   stencil(100,7pt)
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
diff --git a/benchmark/test/reference/spmv_distributed.profile.stderr b/benchmark/test/reference/spmv_distributed.profile.stderr
index 95a07c8275c..b44cef7f3f6 100644
--- a/benchmark/test/reference/spmv_distributed.profile.stderr
+++ b/benchmark/test/reference/spmv_distributed.profile.stderr
@@ -6,18 +6,8 @@ The random seed for right hand sides is 42
 The formats are [csr]x[csr]
 The number of right hand sides is 1
 Running test case stencil(100,7pt,stencil)
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin partition::build_ranges_from_global_size
 DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
@@ -32,50 +22,16 @@ DEBUG: begin partition::build_starting_indices
 DEBUG: end   partition::build_starting_indices
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::aos_to_soa
 DEBUG: end   components::aos_to_soa
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin dense::fill_in_matrix_data
 DEBUG: end   dense::fill_in_matrix_data
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin partition::build_ranges_from_global_size
 DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
@@ -90,54 +46,20 @@ DEBUG: begin partition::build_starting_indices
 DEBUG: end   partition::build_starting_indices
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::aos_to_soa
 DEBUG: end   components::aos_to_soa
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin dense::fill_in_matrix_data
 DEBUG: end   dense::fill_in_matrix_data
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 Matrix is of size (81, 81), 144
 DEBUG: begin stencil(100,7pt,stencil)
 	Running spmv: csr-csr
 DEBUG: begin csr-csr
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin partition::build_ranges_from_global_size
 DEBUG: end   partition::build_ranges_from_global_size
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin components::fill_array
@@ -152,18 +74,10 @@ DEBUG: begin partition::build_starting_indices
 DEBUG: end   partition::build_starting_indices
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin copy(<typename>)
@@ -176,8 +90,6 @@ DEBUG: end   copy
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: end   copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin copy(<typename>)
@@ -190,219 +102,27 @@ DEBUG: end   copy
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: end   copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin components::aos_to_soa
 DEBUG: end   components::aos_to_soa
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin distributed_matrix::build_local_nonlocal
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: end   distributed_matrix::build_local_nonlocal
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin components::convert_idxs_to_ptrs
 DEBUG: end   components::convert_idxs_to_ptrs
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin components::convert_idxs_to_ptrs
 DEBUG: end   components::convert_idxs_to_ptrs
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin copy
 DEBUG: end   copy
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: begin copy(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
 DEBUG: begin apply(<typename>)
-DEBUG: begin allocate
-DEBUG: end   allocate
-DEBUG: begin allocate
-DEBUG: end   allocate
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
 DEBUG: begin apply(<typename>)
@@ -414,33 +134,5 @@ DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
 DEBUG: end   advanced_apply(<typename>)
 DEBUG: end   apply(<typename>)
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
 DEBUG: end   csr-csr
 DEBUG: end   stencil(100,7pt,stencil)
-DEBUG: begin free
-DEBUG: end   free
-DEBUG: begin free
-DEBUG: end   free
diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in
index 3deb282297a..014d3cb41a5 100644
--- a/benchmark/test/test_framework.py.in
+++ b/benchmark/test/test_framework.py.in
@@ -146,8 +146,9 @@ def compare_output_impl(
             " ".join(["'{}'".format(arg) for arg in launcher_flags + args])
         )
     )
-    version_patterns = [
-        "    the .* module is",
+    ignore_patterns = [
+        "    the .* module is",  # version numbers
+        "DEBUG: (begin|end  ) (allocate|free)",  # allocations
     ]
     typename_patterns = [
         ("(apply|generate|check|copy|move)\([^())]*\)", "\\1(<typename>)"),
@@ -161,7 +162,7 @@ def compare_output_impl(
             "\n".join(
                 determinize_text(
                     result.stderr.decode(),
-                    ignore_patterns=version_patterns,
+                    ignore_patterns=ignore_patterns,
                     replace_patterns=typename_patterns,
                 )
             )
@@ -171,13 +172,13 @@ def compare_output_impl(
     result_stdout_processed = determinize_json_text(result.stdout.decode())
     result_stderr_processed = determinize_text(
         result.stderr.decode(),
-        ignore_patterns=version_patterns,
+        ignore_patterns=ignore_patterns,
         replace_patterns=typename_patterns,
     )
     expected_stdout_processed = determinize_json_text(open(expected_stdout).read())
     expected_stderr_processed = determinize_text(
         open(expected_stderr).read(),
-        ignore_patterns=version_patterns,
+        ignore_patterns=ignore_patterns,
         replace_patterns=typename_patterns,
     )
     failed = False

From 3e0da3b3a580b185f1e0f335bc22db9693631620 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 17 Aug 2023 21:32:01 +0200
Subject: [PATCH 249/583] update matrix outputs

---
 benchmark/test/reference/conversion.matrix.stderr        | 6 +-----
 benchmark/test/reference/conversion.matrix.stdout        | 3 +--
 .../test/reference/distributed_solver.matrix.stderr      | 9 +--------
 .../test/reference/distributed_solver.matrix.stdout      | 3 +--
 benchmark/test/reference/matrix_statistics.matrix.stderr | 6 +-----
 benchmark/test/reference/matrix_statistics.matrix.stdout | 3 +--
 benchmark/test/reference/preconditioner.matrix.stderr    | 6 +-----
 benchmark/test/reference/preconditioner.matrix.stdout    | 3 +--
 benchmark/test/reference/solver.matrix.stderr            | 9 +--------
 benchmark/test/reference/solver.matrix.stdout            | 3 +--
 benchmark/test/reference/sparse_blas.matrix.stderr       | 6 +-----
 benchmark/test/reference/sparse_blas.matrix.stdout       | 3 +--
 benchmark/test/reference/spmv.matrix.stderr              | 6 +-----
 benchmark/test/reference/spmv.matrix.stdout              | 3 +--
 benchmark/test/test_framework.py.in                      | 1 +
 15 files changed, 15 insertions(+), 55 deletions(-)

diff --git a/benchmark/test/reference/conversion.matrix.stderr b/benchmark/test/reference/conversion.matrix.stderr
index 369a363a53e..5e7bd1cce24 100644
--- a/benchmark/test/reference/conversion.matrix.stderr
+++ b/benchmark/test/reference/conversion.matrix.stderr
@@ -4,11 +4,7 @@ Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
 The formats are coo,csr
-Running test case
-{
-    "filename": "",
-    "conversion": {}
-}
+Running test case <filename>
 Matrix is of size (36, 36), 208
 	Running conversion: coo-read
 	Running conversion: coo-csr
diff --git a/benchmark/test/reference/conversion.matrix.stdout b/benchmark/test/reference/conversion.matrix.stdout
index 7e537fa4919..7f27b0c25b3 100644
--- a/benchmark/test/reference/conversion.matrix.stdout
+++ b/benchmark/test/reference/conversion.matrix.stdout
@@ -1,4 +1,3 @@
-
 [
     {
         "filename": "",
@@ -28,4 +27,4 @@
         "cols": 36,
         "nonzeros": 208
     }
-]
+]
\ No newline at end of file
diff --git a/benchmark/test/reference/distributed_solver.matrix.stderr b/benchmark/test/reference/distributed_solver.matrix.stderr
index 4f0c6b22edd..cd2bb49261c 100644
--- a/benchmark/test/reference/distributed_solver.matrix.stderr
+++ b/benchmark/test/reference/distributed_solver.matrix.stderr
@@ -5,13 +5,6 @@ Running with 2 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
 Running cg with 1000 iterations and residual goal of 1.000000e-06
 The number of right hand sides is 1
-Running test case
-{
-    "filename": "",
-    "optimal": {
-        "spmv": "csr-csr"
-    },
-    "solver": {}
-}
+Running test case <filename>
 Matrix is of size (36, 36)
 	Running solver: cg
diff --git a/benchmark/test/reference/distributed_solver.matrix.stdout b/benchmark/test/reference/distributed_solver.matrix.stdout
index cd3c7b8bd43..ec1d258e2f4 100644
--- a/benchmark/test/reference/distributed_solver.matrix.stdout
+++ b/benchmark/test/reference/distributed_solver.matrix.stdout
@@ -1,4 +1,3 @@
-
 [
     {
         "filename": "",
@@ -55,4 +54,4 @@
         "rows": 36,
         "cols": 36
     }
-]
+]
\ No newline at end of file
diff --git a/benchmark/test/reference/matrix_statistics.matrix.stderr b/benchmark/test/reference/matrix_statistics.matrix.stderr
index 7bb33842f25..0b31ef3a888 100644
--- a/benchmark/test/reference/matrix_statistics.matrix.stderr
+++ b/benchmark/test/reference/matrix_statistics.matrix.stderr
@@ -1,8 +1,4 @@
 This is Ginkgo 1.7.0 (develop)
     running with core module 1.7.0 (develop)
-Running test case
-{
-    "filename": "",
-    "problem": {}
-}
+Running test case <filename>
 Matrix is of size (36, 36), 208
diff --git a/benchmark/test/reference/matrix_statistics.matrix.stdout b/benchmark/test/reference/matrix_statistics.matrix.stdout
index ea73587fde4..a6297e89b66 100644
--- a/benchmark/test/reference/matrix_statistics.matrix.stdout
+++ b/benchmark/test/reference/matrix_statistics.matrix.stdout
@@ -1,4 +1,3 @@
-
 [
     {
         "filename": "",
@@ -37,4 +36,4 @@
         "cols": 36,
         "nonzeros": 208
     }
-]
+]
\ No newline at end of file
diff --git a/benchmark/test/reference/preconditioner.matrix.stderr b/benchmark/test/reference/preconditioner.matrix.stderr
index 4088a20c925..7452ab91b3a 100644
--- a/benchmark/test/reference/preconditioner.matrix.stderr
+++ b/benchmark/test/reference/preconditioner.matrix.stderr
@@ -4,10 +4,6 @@ Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
 Running with preconditioners: none
-Running test case
-{
-    "filename": "",
-    "preconditioner": {}
-}
+Running test case <filename>
 Matrix is of size (36, 36), 208
 	Running preconditioner: none
diff --git a/benchmark/test/reference/preconditioner.matrix.stdout b/benchmark/test/reference/preconditioner.matrix.stdout
index 0415a87ea8d..51adb7383c3 100644
--- a/benchmark/test/reference/preconditioner.matrix.stdout
+++ b/benchmark/test/reference/preconditioner.matrix.stdout
@@ -1,4 +1,3 @@
-
 [
     {
         "filename": "",
@@ -29,4 +28,4 @@
         "cols": 36,
         "nonzeros": 208
     }
-]
+]
\ No newline at end of file
diff --git a/benchmark/test/reference/solver.matrix.stderr b/benchmark/test/reference/solver.matrix.stderr
index 8a1ea117314..cd2bb49261c 100644
--- a/benchmark/test/reference/solver.matrix.stderr
+++ b/benchmark/test/reference/solver.matrix.stderr
@@ -5,13 +5,6 @@ Running with 2 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
 Running cg with 1000 iterations and residual goal of 1.000000e-06
 The number of right hand sides is 1
-Running test case
-{
-    "filename": "",
-    "optimal": {
-        "spmv": "csr"
-    },
-    "solver": {}
-}
+Running test case <filename>
 Matrix is of size (36, 36)
 	Running solver: cg
diff --git a/benchmark/test/reference/solver.matrix.stdout b/benchmark/test/reference/solver.matrix.stdout
index 56577288c2d..a87e78f7f66 100644
--- a/benchmark/test/reference/solver.matrix.stdout
+++ b/benchmark/test/reference/solver.matrix.stdout
@@ -1,4 +1,3 @@
-
 [
     {
         "filename": "",
@@ -53,4 +52,4 @@
         "rows": 36,
         "cols": 36
     }
-]
+]
\ No newline at end of file
diff --git a/benchmark/test/reference/sparse_blas.matrix.stderr b/benchmark/test/reference/sparse_blas.matrix.stderr
index ff52b6a3269..483429fd71d 100644
--- a/benchmark/test/reference/sparse_blas.matrix.stderr
+++ b/benchmark/test/reference/sparse_blas.matrix.stderr
@@ -4,10 +4,6 @@ Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
 The operations are transpose
-Running test case
-{
-    "filename": "",
-    "sparse_blas": {}
-}
+Running test case <filename>
 Matrix is of size (36, 36), 208
 	Running sparse_blas: transpose
diff --git a/benchmark/test/reference/sparse_blas.matrix.stdout b/benchmark/test/reference/sparse_blas.matrix.stdout
index 4a64c8ea1ce..74fdbf98e7a 100644
--- a/benchmark/test/reference/sparse_blas.matrix.stdout
+++ b/benchmark/test/reference/sparse_blas.matrix.stdout
@@ -1,4 +1,3 @@
-
 [
     {
         "filename": "",
@@ -22,4 +21,4 @@
         "cols": 36,
         "nonzeros": 208
     }
-]
+]
\ No newline at end of file
diff --git a/benchmark/test/reference/spmv.matrix.stderr b/benchmark/test/reference/spmv.matrix.stderr
index a618da5b321..45beba6cafb 100644
--- a/benchmark/test/reference/spmv.matrix.stderr
+++ b/benchmark/test/reference/spmv.matrix.stderr
@@ -5,10 +5,6 @@ Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
 The formats are coo
 The number of right hand sides is 1
-Running test case
-{
-    "filename": "",
-    "spmv": {}
-}
+Running test case <filename>
 Matrix is of size (36, 36), 208
 	Running spmv: coo
diff --git a/benchmark/test/reference/spmv.matrix.stdout b/benchmark/test/reference/spmv.matrix.stdout
index dc30ab6b284..4d03ce3cd07 100644
--- a/benchmark/test/reference/spmv.matrix.stdout
+++ b/benchmark/test/reference/spmv.matrix.stdout
@@ -1,4 +1,3 @@
-
 [
     {
         "filename": "",
@@ -18,4 +17,4 @@
             "spmv": "coo"
         }
     }
-]
+]
\ No newline at end of file
diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in
index 014d3cb41a5..6e3092bde6c 100644
--- a/benchmark/test/test_framework.py.in
+++ b/benchmark/test/test_framework.py.in
@@ -153,6 +153,7 @@ def compare_output_impl(
     typename_patterns = [
         ("(apply|generate|check|copy|move)\([^())]*\)", "\\1(<typename>)"),
         ("what\\(\\): .*", "what(): <removed>"),
+        (re.escape(str(matrixpath)), "<filename>"),
     ]
     if generate:
         open(expected_stdout, "w").write(

From 8ed05956a03696c59f0568944db2768d2d9998ed Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Sat, 19 Aug 2023 11:27:22 +0200
Subject: [PATCH 250/583] review updates

- rename 'determinize' -> 'sanitize'
- use empty struct for empty benchmark state
- use version tag instead of commit ID
- use std::endl where appropriate

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 .../matrix_statistics/matrix_statistics.cpp   |  5 +++-
 benchmark/test/test_framework.py.in           | 30 +++++++++++--------
 benchmark/utils/general.hpp                   |  2 +-
 third_party/nlohmann_json/CMakeLists.txt      |  2 +-
 4 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/benchmark/matrix_statistics/matrix_statistics.cpp b/benchmark/matrix_statistics/matrix_statistics.cpp
index 40c505c7627..4bb63032550 100644
--- a/benchmark/matrix_statistics/matrix_statistics.cpp
+++ b/benchmark/matrix_statistics/matrix_statistics.cpp
@@ -149,7 +149,10 @@ void extract_matrix_statistics(gko::matrix_data<etype, gko::int64>& data,
 using Generator = DefaultSystemGenerator<etype, gko::int64>;
 
 
-struct MatrixStatistics : Benchmark<int> {
+struct empty_state {};
+
+
+struct MatrixStatistics : Benchmark<empty_state> {
     std::string name;
     std::vector<std::string> empty;
 
diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in
index 6e3092bde6c..1a07818df1f 100644
--- a/benchmark/test/test_framework.py.in
+++ b/benchmark/test/test_framework.py.in
@@ -22,7 +22,8 @@ denumberify_paths = [
     "rhs_norm",
     "max_relative_norm2",
 ]
-detypenameify_key_starts = ["generate(", "apply(", "advanced_apply(", "copy(", "check("]
+detypenameify_key_starts = [
+    "generate(", "apply(", "advanced_apply(", "copy(", "check("]
 empty_string_paths = ["filename"]
 empty_array_paths = [
     "recurrent_residuals",
@@ -44,7 +45,7 @@ def sanitize_json_key(key: str):
     return key
 
 
-def sanitize_json_key_value(key: str, value, sanitize_all: bool):
+def sanitize_json_value(key: str, value, sanitize_all: bool):
     """Applies sanitation to a single key-value pair.
 
     Strings with a key in empty_string_paths will be emptied
@@ -72,7 +73,7 @@ def sanitize_json(parsed_input, sanitize_all: bool = False):
 
     if isinstance(parsed_input, dict):
         return {
-            sanitize_json_key(key): sanitize_json_key_value(key, value, sanitize_all)
+            sanitize_json_key(key): sanitize_json_value(key, value, sanitize_all)
             for key, value in parsed_input.items()
         }
     elif isinstance(parsed_input, list):
@@ -83,7 +84,7 @@ def sanitize_json(parsed_input, sanitize_all: bool = False):
         return parsed_input
 
 
-def determinize_json_text(input: str) -> List[str]:
+def sanitize_json_text(input: str) -> List[str]:
     """Sanitizes the given input JSON string.
 
     The JSON values will be parsed and sanitized through sanitize_json(...)
@@ -94,7 +95,7 @@ def determinize_json_text(input: str) -> List[str]:
     return result.splitlines()
 
 
-def determinize_text(
+def sanitize_text(
     input: str,
     ignore_patterns: List[str],
     replace_patterns: List[Tuple[str, str]],
@@ -157,11 +158,11 @@ def compare_output_impl(
     ]
     if generate:
         open(expected_stdout, "w").write(
-            "\n".join(determinize_json_text(result.stdout.decode()))
+            "\n".join(sanitize_json_text(result.stdout.decode()))
         )
         open(expected_stderr, "w").write(
             "\n".join(
-                determinize_text(
+                sanitize_text(
                     result.stderr.decode(),
                     ignore_patterns=ignore_patterns,
                     replace_patterns=typename_patterns,
@@ -170,14 +171,15 @@ def compare_output_impl(
         )
         print("GENERATED")
         return
-    result_stdout_processed = determinize_json_text(result.stdout.decode())
-    result_stderr_processed = determinize_text(
+    result_stdout_processed = sanitize_json_text(result.stdout.decode())
+    result_stderr_processed = sanitize_text(
         result.stderr.decode(),
         ignore_patterns=ignore_patterns,
         replace_patterns=typename_patterns,
     )
-    expected_stdout_processed = determinize_json_text(open(expected_stdout).read())
-    expected_stderr_processed = determinize_text(
+    expected_stdout_processed = sanitize_json_text(
+        open(expected_stdout).read())
+    expected_stderr_processed = sanitize_text(
         open(expected_stderr).read(),
         ignore_patterns=ignore_patterns,
         replace_patterns=typename_patterns,
@@ -187,7 +189,8 @@ def compare_output_impl(
         print("FAIL: stdout differs")
         print(
             "\n".join(
-                difflib.unified_diff(expected_stdout_processed, result_stdout_processed)
+                difflib.unified_diff(
+                    expected_stdout_processed, result_stdout_processed)
             )
         )
         failed = True
@@ -195,7 +198,8 @@ def compare_output_impl(
         print("FAIL: stderr differs")
         print(
             "\n".join(
-                difflib.unified_diff(expected_stderr_processed, result_stderr_processed)
+                difflib.unified_diff(
+                    expected_stderr_processed, result_stderr_processed)
             )
         )
         failed = True
diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp
index 1c48680f883..550f6fe2720 100644
--- a/benchmark/utils/general.hpp
+++ b/benchmark/utils/general.hpp
@@ -216,7 +216,7 @@ void print_general_information(const std::string& extra)
     }
     std::clog << "The random seed for right hand sides is " << FLAGS_seed
               << '\n'
-              << extra << '\n';
+              << extra << std::endl;
 }
 
 
diff --git a/third_party/nlohmann_json/CMakeLists.txt b/third_party/nlohmann_json/CMakeLists.txt
index 77064c66c40..b95cfa5606a 100644
--- a/third_party/nlohmann_json/CMakeLists.txt
+++ b/third_party/nlohmann_json/CMakeLists.txt
@@ -3,7 +3,7 @@ include(FetchContent)
 FetchContent_Declare(
     nlohmann_json
     GIT_REPOSITORY https://github.com/nlohmann/json.git
-    GIT_TAG        bc889afb4c5bf1c0d8ee29ef35eaaf4c8bef8a5d
+    GIT_TAG        v3.9.1
 )
 set(JSON_BuildTests OFF CACHE INTERNAL "")
 FetchContent_MakeAvailable(nlohmann_json)

From 5fe683d85624fe376569fd8b98ee762eef037add Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Sat, 19 Aug 2023 12:19:00 +0200
Subject: [PATCH 251/583] annotate repetitions

---
 benchmark/blas/blas_common.hpp                | 17 +++++++-----
 benchmark/conversion/conversion.cpp           | 26 +++++++++++++------
 .../matrix_statistics/matrix_statistics.cpp   |  9 ++++---
 benchmark/preconditioner/preconditioner.cpp   | 13 +++++++---
 benchmark/solver/solver_common.hpp            | 21 +++++++++------
 benchmark/sparse_blas/sparse_blas.cpp         | 17 +++++++-----
 benchmark/spmv/spmv_common.hpp                | 16 +++++++-----
 benchmark/utils/general.hpp                   | 26 +++++++++++++++++++
 benchmark/utils/runner.hpp                    | 16 ++++--------
 9 files changed, 109 insertions(+), 52 deletions(-)

diff --git a/benchmark/blas/blas_common.hpp b/benchmark/blas/blas_common.hpp
index 88819a043b0..1267dc57c15 100644
--- a/benchmark/blas/blas_common.hpp
+++ b/benchmark/blas/blas_common.hpp
@@ -489,7 +489,8 @@ struct BlasBenchmark : Benchmark<dimensions> {
 
 
     void run(std::shared_ptr<gko::Executor> exec, std::shared_ptr<Timer> timer,
-             dimensions& dims, const std::string& operation_name,
+             annotate_functor annotate, dimensions& dims,
+             const std::string& operation_name,
              json& operation_case) const override
     {
         auto op = operation_map.at(operation_name)(exec, dims);
@@ -497,16 +498,20 @@ struct BlasBenchmark : Benchmark<dimensions> {
         IterationControl ic(timer);
 
         // warm run
-        for (auto _ : ic.warmup_run()) {
-            op->prepare();
-            exec->synchronize();
-            op->run();
-            exec->synchronize();
+        {
+            auto range = annotate("warmup", FLAGS_warmup > 0);
+            for (auto _ : ic.warmup_run()) {
+                op->prepare();
+                exec->synchronize();
+                op->run();
+                exec->synchronize();
+            }
         }
 
         // timed run
         op->prepare();
         for (auto _ : ic.run()) {
+            auto range = annotate("repetition");
             op->run();
         }
         const auto runtime = ic.compute_time(FLAGS_timer_method);
diff --git a/benchmark/conversion/conversion.cpp b/benchmark/conversion/conversion.cpp
index 5f03cb2b933..c777db1a35a 100644
--- a/benchmark/conversion/conversion.cpp
+++ b/benchmark/conversion/conversion.cpp
@@ -44,6 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include "benchmark/utils/formats.hpp"
+#include "benchmark/utils/general.hpp"
 #include "benchmark/utils/general_matrix.hpp"
 #include "benchmark/utils/generator.hpp"
 #include "benchmark/utils/iteration_control.hpp"
@@ -128,6 +129,7 @@ struct ConversionBenchmark : Benchmark<gko::device_matrix_data<etype, itype>> {
 
 
     void run(std::shared_ptr<gko::Executor> exec, std::shared_ptr<Timer> timer,
+             annotate_functor annotate,
              gko::device_matrix_data<etype, itype>& data,
              const std::string& operation_name,
              json& operation_case) const override
@@ -142,13 +144,17 @@ struct ConversionBenchmark : Benchmark<gko::device_matrix_data<etype, itype>> {
         IterationControl ic{timer};
         if (to_name == "read") {
             // warm run
-            for (auto _ : ic.warmup_run()) {
-                exec->synchronize();
-                readable->read(data);
-                exec->synchronize();
+            {
+                auto range = annotate("warmup", FLAGS_warmup > 0);
+                for (auto _ : ic.warmup_run()) {
+                    exec->synchronize();
+                    readable->read(data);
+                    exec->synchronize();
+                }
             }
             // timed run
             for (auto _ : ic.run()) {
+                auto range = annotate("repetition");
                 readable->read(data);
             }
         } else {
@@ -156,13 +162,17 @@ struct ConversionBenchmark : Benchmark<gko::device_matrix_data<etype, itype>> {
             auto mtx_to = formats::matrix_type_factory.at(to_name)(exec);
 
             // warm run
-            for (auto _ : ic.warmup_run()) {
-                exec->synchronize();
-                mtx_to->copy_from(mtx_from);
-                exec->synchronize();
+            {
+                auto range = annotate("warmup", FLAGS_warmup > 0);
+                for (auto _ : ic.warmup_run()) {
+                    exec->synchronize();
+                    mtx_to->copy_from(mtx_from);
+                    exec->synchronize();
+                }
             }
             // timed run
             for (auto _ : ic.run()) {
+                auto range = annotate("repetition");
                 mtx_to->copy_from(mtx_from);
             }
         }
diff --git a/benchmark/matrix_statistics/matrix_statistics.cpp b/benchmark/matrix_statistics/matrix_statistics.cpp
index 4bb63032550..20feecf5ccf 100644
--- a/benchmark/matrix_statistics/matrix_statistics.cpp
+++ b/benchmark/matrix_statistics/matrix_statistics.cpp
@@ -182,8 +182,8 @@ struct MatrixStatistics : Benchmark<empty_state> {
         return Generator::describe_config(test_case);
     }
 
-    int setup(std::shared_ptr<gko::Executor> exec,
-              json& test_case) const override
+    empty_state setup(std::shared_ptr<gko::Executor> exec,
+                      json& test_case) const override
     {
         auto data = Generator::generate_matrix_data(test_case);
         std::clog << "Matrix is of size (" << data.size[0] << ", "
@@ -193,12 +193,13 @@ struct MatrixStatistics : Benchmark<empty_state> {
         test_case["nonzeros"] = data.nonzeros.size();
 
         extract_matrix_statistics(data, test_case["problem"]);
-        return 0;
+        return {};
     }
 
 
     void run(std::shared_ptr<gko::Executor> exec, std::shared_ptr<Timer> timer,
-             int& data, const std::string& operation_name,
+             annotate_functor annotate, empty_state& data,
+             const std::string& operation_name,
              json& operation_case) const override
     {}
 };
diff --git a/benchmark/preconditioner/preconditioner.cpp b/benchmark/preconditioner/preconditioner.cpp
index 7c130328d34..98f116f9b12 100644
--- a/benchmark/preconditioner/preconditioner.cpp
+++ b/benchmark/preconditioner/preconditioner.cpp
@@ -41,6 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include "benchmark/utils/formats.hpp"
+#include "benchmark/utils/general.hpp"
 #include "benchmark/utils/general_matrix.hpp"
 #include "benchmark/utils/generator.hpp"
 #include "benchmark/utils/iteration_control.hpp"
@@ -199,7 +200,7 @@ struct PreconditionerBenchmark : Benchmark<preconditioner_benchmark_state> {
 
 
     void run(std::shared_ptr<gko::Executor> exec, std::shared_ptr<Timer> timer,
-             preconditioner_benchmark_state& state,
+             annotate_functor annotate, preconditioner_benchmark_state& state,
              const std::string& encoded_precond_name,
              json& precond_case) const override
     {
@@ -219,12 +220,17 @@ struct PreconditionerBenchmark : Benchmark<preconditioner_benchmark_state> {
 
             auto precond = precond_factory.at(decoded_precond_name)(exec);
 
-            for (auto _ : ic_apply.warmup_run()) {
-                precond->generate(state.system_matrix)->apply(state.b, x_clone);
+            {
+                auto range = annotate("warmup", FLAGS_warmup > 0);
+                for (auto _ : ic_apply.warmup_run()) {
+                    precond->generate(state.system_matrix)
+                        ->apply(state.b, x_clone);
+                }
             }
 
             std::unique_ptr<gko::LinOp> precond_op;
             for (auto _ : ic_gen.run()) {
+                auto range = annotate("repetition generate");
                 precond_op = precond->generate(state.system_matrix);
             }
 
@@ -234,6 +240,7 @@ struct PreconditionerBenchmark : Benchmark<preconditioner_benchmark_state> {
                 ic_gen.get_num_repetitions();
 
             for (auto _ : ic_apply.run()) {
+                auto range = annotate("repetition apply");
                 precond_op->apply(state.b, x_clone);
             }
 
diff --git a/benchmark/solver/solver_common.hpp b/benchmark/solver/solver_common.hpp
index 4976e5759d4..597ab76729a 100644
--- a/benchmark/solver/solver_common.hpp
+++ b/benchmark/solver/solver_common.hpp
@@ -458,6 +458,7 @@ struct SolverBenchmark : Benchmark<solver_benchmark_state<Generator>> {
 
 
     void run(std::shared_ptr<gko::Executor> exec, std::shared_ptr<Timer> timer,
+             annotate_functor annotate,
              solver_benchmark_state<Generator>& state,
              const std::string& encoded_solver_name,
              json& solver_case) const override
@@ -482,14 +483,17 @@ struct SolverBenchmark : Benchmark<solver_benchmark_state<Generator>> {
 
         // warm run
         std::shared_ptr<gko::LinOp> solver;
-        for (auto _ : ic.warmup_run()) {
-            auto x_clone = clone(state.x);
-            auto precond = precond_factory.at(precond_name)(exec);
-            solver = generate_solver(exec, give(precond), solver_name,
-                                     FLAGS_warmup_max_iters)
-                         ->generate(state.system_matrix);
-            solver->apply(state.b, x_clone);
-            exec->synchronize();
+        {
+            auto range = annotate("warmup", FLAGS_warmup > 0);
+            for (auto _ : ic.warmup_run()) {
+                auto x_clone = clone(state.x);
+                auto precond = precond_factory.at(precond_name)(exec);
+                solver = generate_solver(exec, give(precond), solver_name,
+                                         FLAGS_warmup_max_iters)
+                             ->generate(state.system_matrix);
+                solver->apply(state.b, x_clone);
+                exec->synchronize();
+            }
         }
 
         // detail run
@@ -566,6 +570,7 @@ struct SolverBenchmark : Benchmark<solver_benchmark_state<Generator>> {
         auto apply_timer = ic.get_timer();
         auto x_clone = clone(state.x);
         for (auto status : ic.run(false)) {
+            auto range = annotate("repetition");
             x_clone = clone(state.x);
 
             exec->synchronize();
diff --git a/benchmark/sparse_blas/sparse_blas.cpp b/benchmark/sparse_blas/sparse_blas.cpp
index 21df4d9c448..5d479eb7fc0 100644
--- a/benchmark/sparse_blas/sparse_blas.cpp
+++ b/benchmark/sparse_blas/sparse_blas.cpp
@@ -128,7 +128,8 @@ struct SparseBlasBenchmark : Benchmark<std::unique_ptr<Mtx>> {
 
 
     void run(std::shared_ptr<gko::Executor> exec, std::shared_ptr<Timer> timer,
-             std::unique_ptr<Mtx>& mtx, const std::string& operation_name,
+             annotate_functor annotate, std::unique_ptr<Mtx>& mtx,
+             const std::string& operation_name,
              json& operation_case) const override
     {
         auto op = get_operation(operation_name, mtx.get());
@@ -136,16 +137,20 @@ struct SparseBlasBenchmark : Benchmark<std::unique_ptr<Mtx>> {
         IterationControl ic(timer);
 
         // warm run
-        for (auto _ : ic.warmup_run()) {
-            op->prepare();
-            exec->synchronize();
-            op->run();
-            exec->synchronize();
+        {
+            auto range = annotate("warmup", FLAGS_warmup > 0);
+            for (auto _ : ic.warmup_run()) {
+                op->prepare();
+                exec->synchronize();
+                op->run();
+                exec->synchronize();
+            }
         }
 
         // timed run
         op->prepare();
         for (auto _ : ic.run()) {
+            auto range = annotate("repetition");
             op->run();
         }
         const auto runtime = ic.compute_time(FLAGS_timer_method);
diff --git a/benchmark/spmv/spmv_common.hpp b/benchmark/spmv/spmv_common.hpp
index 4a7d014de8b..f589077834e 100644
--- a/benchmark/spmv/spmv_common.hpp
+++ b/benchmark/spmv/spmv_common.hpp
@@ -130,7 +130,7 @@ struct SpmvBenchmark : Benchmark<spmv_benchmark_state<Generator>> {
     }
 
     void run(std::shared_ptr<gko::Executor> exec, std::shared_ptr<Timer> timer,
-             spmv_benchmark_state<Generator>& state,
+             annotate_functor annotate, spmv_benchmark_state<Generator>& state,
              const std::string& format_name, json& format_case) const override
     {
         auto system_matrix = generator.generate_matrix_with_format(
@@ -149,11 +149,14 @@ struct SpmvBenchmark : Benchmark<spmv_benchmark_state<Generator>> {
 
         IterationControl ic{timer};
         // warm run
-        for (auto _ : ic.warmup_run()) {
-            auto x_clone = clone(state.x);
-            exec->synchronize();
-            system_matrix->apply(state.b, x_clone);
-            exec->synchronize();
+        {
+            auto range = annotate("warmup", FLAGS_warmup > 0);
+            for (auto _ : ic.warmup_run()) {
+                auto x_clone = clone(state.x);
+                exec->synchronize();
+                system_matrix->apply(state.b, x_clone);
+                exec->synchronize();
+            }
         }
 
         // tuning run
@@ -192,6 +195,7 @@ struct SpmvBenchmark : Benchmark<spmv_benchmark_state<Generator>> {
         // timed run
         auto x_clone = clone(state.x);
         for (auto _ : ic.run()) {
+            auto range = annotate("repetition");
             system_matrix->apply(state.b, x_clone);
         }
         format_case["time"] = ic.compute_time(FLAGS_timer_method);
diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp
index 550f6fe2720..6012cb6c77b 100644
--- a/benchmark/utils/general.hpp
+++ b/benchmark/utils/general.hpp
@@ -245,6 +245,32 @@ std::shared_ptr<gko::log::ProfilerHook> create_profiler_hook(
 }
 
 
+struct annotate_functor {
+    gko::log::profiling_scope_guard operator()(const char* name) const
+    {
+        if (profiler_hook) {
+            return profiler_hook->user_range(name);
+        }
+        return {};
+    }
+
+    gko::log::profiling_scope_guard operator()(const char* name,
+                                               bool should_annotate) const
+    {
+        if (profiler_hook && should_annotate) {
+            return profiler_hook->user_range(name);
+        }
+        return {};
+    }
+
+    annotate_functor(std::shared_ptr<gko::log::ProfilerHook> profiler_hook)
+        : profiler_hook{std::move(profiler_hook)}
+    {}
+
+    std::shared_ptr<gko::log::ProfilerHook> profiler_hook;
+};
+
+
 // Returns a random number engine
 std::default_random_engine& get_engine()
 {
diff --git a/benchmark/utils/runner.hpp b/benchmark/utils/runner.hpp
index 661c403706f..264dc3965db 100644
--- a/benchmark/utils/runner.hpp
+++ b/benchmark/utils/runner.hpp
@@ -102,8 +102,8 @@ struct Benchmark {
 
     /** Runs a single operation of the benchmark */
     virtual void run(std::shared_ptr<gko::Executor> exec,
-                     std::shared_ptr<Timer> timer, State& state,
-                     const std::string& operation,
+                     std::shared_ptr<Timer> timer, annotate_functor annotate,
+                     State& state, const std::string& operation,
                      json& operation_case) const = 0;
 
     /** Post-process test case info. */
@@ -139,13 +139,7 @@ void run_test_cases(const Benchmark<State>& benchmark,
     if (profiler_hook) {
         exec->add_logger(profiler_hook);
     }
-    auto annotate =
-        [profiler_hook](const char* name) -> gko::log::profiling_scope_guard {
-        if (profiler_hook) {
-            return profiler_hook->user_range(name);
-        }
-        return {};
-    };
+    auto annotate = annotate_functor(profiler_hook);
 
     for (auto& test_case : test_cases) {
         try {
@@ -174,8 +168,8 @@ void run_test_cases(const Benchmark<State>& benchmark,
                 auto& operation_case = benchmark_case[operation_name];
                 try {
                     auto operation_range = annotate(operation_name.c_str());
-                    benchmark.run(exec, timer, test_case_state, operation_name,
-                                  operation_case);
+                    benchmark.run(exec, timer, annotate, test_case_state,
+                                  operation_name, operation_case);
                     operation_case["completed"] = true;
                 } catch (const std::exception& e) {
                     operation_case["completed"] = false;

From 10ef14a67bb6240e2cdcc5266a4568833eff3cb8 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Sat, 19 Aug 2023 12:19:10 +0200
Subject: [PATCH 252/583] update test output

---
 benchmark/test/reference/blas.profile.stderr              | 6 ++++++
 benchmark/test/reference/conversion.profile.stderr        | 8 ++++++++
 .../test/reference/distributed_solver.profile.stderr      | 2 ++
 .../reference/multi_vector_distributed.profile.stderr     | 6 ++++++
 benchmark/test/reference/preconditioner.profile.stderr    | 4 ++++
 benchmark/test/reference/solver.profile.stderr            | 2 ++
 benchmark/test/reference/sparse_blas.profile.stderr       | 2 ++
 benchmark/test/reference/spmv.profile.stderr              | 2 ++
 benchmark/test/reference/spmv_distributed.profile.stderr  | 2 ++
 9 files changed, 34 insertions(+)

diff --git a/benchmark/test/reference/blas.profile.stderr b/benchmark/test/reference/blas.profile.stderr
index 529fc16009c..7307fb0ad7e 100644
--- a/benchmark/test/reference/blas.profile.stderr
+++ b/benchmark/test/reference/blas.profile.stderr
@@ -10,8 +10,10 @@ DEBUG: begin n = 100
 DEBUG: begin copy
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
+DEBUG: begin repetition
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
+DEBUG: end   repetition
 DEBUG: end   copy
 	Running blas: axpy
 DEBUG: begin axpy
@@ -21,8 +23,10 @@ DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
+DEBUG: begin repetition
 DEBUG: begin dense::add_scaled
 DEBUG: end   dense::add_scaled
+DEBUG: end   repetition
 DEBUG: end   axpy
 	Running blas: scal
 DEBUG: begin scal
@@ -30,7 +34,9 @@ DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
+DEBUG: begin repetition
 DEBUG: begin dense::scale
 DEBUG: end   dense::scale
+DEBUG: end   repetition
 DEBUG: end   scal
 DEBUG: end   n = 100 
diff --git a/benchmark/test/reference/conversion.profile.stderr b/benchmark/test/reference/conversion.profile.stderr
index a233579c721..3a4301b13eb 100644
--- a/benchmark/test/reference/conversion.profile.stderr
+++ b/benchmark/test/reference/conversion.profile.stderr
@@ -11,12 +11,14 @@ DEBUG: end   components::aos_to_soa
 DEBUG: begin stencil(100,7pt)
 	Running conversion: coo-read
 DEBUG: begin coo-read
+DEBUG: begin repetition
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin copy
 DEBUG: end   copy
+DEBUG: end   repetition
 DEBUG: end   coo-read
 	Running conversion: coo-csr
 DEBUG: begin coo-csr
@@ -28,6 +30,7 @@ DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
+DEBUG: begin repetition
 DEBUG: begin copy(<typename>)
 DEBUG: begin copy
 DEBUG: end   copy
@@ -36,11 +39,13 @@ DEBUG: end   copy
 DEBUG: begin components::convert_idxs_to_ptrs
 DEBUG: end   components::convert_idxs_to_ptrs
 DEBUG: end   copy(<typename>)
+DEBUG: end   repetition
 DEBUG: end   coo-csr
 	Running conversion: csr-read
 DEBUG: begin csr-read
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
+DEBUG: begin repetition
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin copy
@@ -49,6 +54,7 @@ DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin components::convert_idxs_to_ptrs
 DEBUG: end   components::convert_idxs_to_ptrs
+DEBUG: end   repetition
 DEBUG: end   csr-read
 	Running conversion: csr-coo
 DEBUG: begin csr-coo
@@ -62,6 +68,7 @@ DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin components::convert_idxs_to_ptrs
 DEBUG: end   components::convert_idxs_to_ptrs
+DEBUG: begin repetition
 DEBUG: begin copy(<typename>)
 DEBUG: begin copy
 DEBUG: end   copy
@@ -70,5 +77,6 @@ DEBUG: end   copy
 DEBUG: begin components::convert_ptrs_to_idxs
 DEBUG: end   components::convert_ptrs_to_idxs
 DEBUG: end   copy(<typename>)
+DEBUG: end   repetition
 DEBUG: end   csr-coo
 DEBUG: end   stencil(100,7pt)
diff --git a/benchmark/test/reference/distributed_solver.profile.stderr b/benchmark/test/reference/distributed_solver.profile.stderr
index 4ea20730117..227737e56b3 100644
--- a/benchmark/test/reference/distributed_solver.profile.stderr
+++ b/benchmark/test/reference/distributed_solver.profile.stderr
@@ -90,6 +90,7 @@ DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
+DEBUG: begin repetition
 DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
@@ -420,6 +421,7 @@ DEBUG: end   check(<typename>)
 DEBUG: end   check(<typename>)
 DEBUG: end   iteration
 DEBUG: end   apply(<typename>)
+DEBUG: end   repetition
 DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
diff --git a/benchmark/test/reference/multi_vector_distributed.profile.stderr b/benchmark/test/reference/multi_vector_distributed.profile.stderr
index 102330e38f4..85bd138514b 100644
--- a/benchmark/test/reference/multi_vector_distributed.profile.stderr
+++ b/benchmark/test/reference/multi_vector_distributed.profile.stderr
@@ -46,8 +46,10 @@ DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
+DEBUG: begin repetition
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
+DEBUG: end   repetition
 DEBUG: end   copy
 	Running blas: axpy
 DEBUG: begin axpy
@@ -93,8 +95,10 @@ DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
+DEBUG: begin repetition
 DEBUG: begin dense::add_scaled
 DEBUG: end   dense::add_scaled
+DEBUG: end   repetition
 DEBUG: end   axpy
 	Running blas: scal
 DEBUG: begin scal
@@ -120,7 +124,9 @@ DEBUG: begin dense::fill
 DEBUG: end   dense::fill
 DEBUG: begin dense::fill
 DEBUG: end   dense::fill
+DEBUG: begin repetition
 DEBUG: begin dense::scale
 DEBUG: end   dense::scale
+DEBUG: end   repetition
 DEBUG: end   scal
 DEBUG: end   n = 100 
diff --git a/benchmark/test/reference/preconditioner.profile.stderr b/benchmark/test/reference/preconditioner.profile.stderr
index 610dfe464ec..e2069c318d2 100644
--- a/benchmark/test/reference/preconditioner.profile.stderr
+++ b/benchmark/test/reference/preconditioner.profile.stderr
@@ -31,13 +31,17 @@ DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
+DEBUG: begin repetition generate
 DEBUG: begin generate(<typename>)
 DEBUG: end   generate(<typename>)
+DEBUG: end   repetition generate
+DEBUG: begin repetition apply
 DEBUG: begin apply(<typename>)
 DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
 DEBUG: end   apply(<typename>)
+DEBUG: end   repetition apply
 DEBUG: end   none
 DEBUG: end   stencil(100,7pt)
diff --git a/benchmark/test/reference/solver.profile.stderr b/benchmark/test/reference/solver.profile.stderr
index 238591eb0c9..5e1e2cdb312 100644
--- a/benchmark/test/reference/solver.profile.stderr
+++ b/benchmark/test/reference/solver.profile.stderr
@@ -34,6 +34,7 @@ DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
+DEBUG: begin repetition
 DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
@@ -282,6 +283,7 @@ DEBUG: end   check(<typename>)
 DEBUG: end   check(<typename>)
 DEBUG: end   iteration
 DEBUG: end   apply(<typename>)
+DEBUG: end   repetition
 DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
diff --git a/benchmark/test/reference/sparse_blas.profile.stderr b/benchmark/test/reference/sparse_blas.profile.stderr
index 60cf41ccbae..fd991de7063 100644
--- a/benchmark/test/reference/sparse_blas.profile.stderr
+++ b/benchmark/test/reference/sparse_blas.profile.stderr
@@ -15,9 +15,11 @@ DEBUG: end   components::convert_idxs_to_ptrs
 DEBUG: begin stencil(100,7pt)
 	Running sparse_blas: transpose
 DEBUG: begin transpose
+DEBUG: begin repetition
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin csr::transpose
 DEBUG: end   csr::transpose
+DEBUG: end   repetition
 DEBUG: end   transpose
 DEBUG: end   stencil(100,7pt)
diff --git a/benchmark/test/reference/spmv.profile.stderr b/benchmark/test/reference/spmv.profile.stderr
index 2299614c6c4..1cc24a5f186 100644
--- a/benchmark/test/reference/spmv.profile.stderr
+++ b/benchmark/test/reference/spmv.profile.stderr
@@ -28,9 +28,11 @@ DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
+DEBUG: begin repetition
 DEBUG: begin apply(<typename>)
 DEBUG: begin coo::spmv
 DEBUG: end   coo::spmv
 DEBUG: end   apply(<typename>)
+DEBUG: end   repetition
 DEBUG: end   coo
 DEBUG: end   stencil(100,7pt)
diff --git a/benchmark/test/reference/spmv_distributed.profile.stderr b/benchmark/test/reference/spmv_distributed.profile.stderr
index b44cef7f3f6..f0d28332ef0 100644
--- a/benchmark/test/reference/spmv_distributed.profile.stderr
+++ b/benchmark/test/reference/spmv_distributed.profile.stderr
@@ -122,6 +122,7 @@ DEBUG: begin copy(<typename>)
 DEBUG: begin dense::copy
 DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
+DEBUG: begin repetition
 DEBUG: begin apply(<typename>)
 DEBUG: begin dense::row_gather
 DEBUG: end   dense::row_gather
@@ -134,5 +135,6 @@ DEBUG: begin csr::advanced_spmv
 DEBUG: end   csr::advanced_spmv
 DEBUG: end   advanced_apply(<typename>)
 DEBUG: end   apply(<typename>)
+DEBUG: end   repetition
 DEBUG: end   csr-csr
 DEBUG: end   stencil(100,7pt,stencil)

From b48d0e19c59e718b89b4eb4a38e52fd57de0ea0b Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Tue, 22 Aug 2023 17:52:30 +0200
Subject: [PATCH 253/583] update documentation

---
 ABOUT-LICENSING.md        | 105 +++-----------------------------------
 INSTALL.md                |   6 +--
 benchmark/CMakeLists.txt  |   4 +-
 dev_tools/scripts/regroup |   2 +-
 4 files changed, 14 insertions(+), 103 deletions(-)

diff --git a/ABOUT-LICENSING.md b/ABOUT-LICENSING.md
index df081e2211b..d6e68911d1a 100644
--- a/ABOUT-LICENSING.md
+++ b/ABOUT-LICENSING.md
@@ -76,7 +76,7 @@ the following license:
 
 When compiling Ginkgo with `-DGINKGO_BUILD_BENCHMARKS=ON` the build system will
 download, build, and link [gflags](https://github.com/gflags/gflags) and
-[RapidJSON](https://github.com/Tencent/rapidjson) with the
+[nlohmann-json](https://github.com/nlohmann/json) with the
 benchmark suites. gtest is available under the following license:
 
 > Copyright (c) 2006, Google Inc.
@@ -108,110 +108,22 @@ benchmark suites. gtest is available under the following license:
 > (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 > OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-RapidJSON is available under the following license (note that Ginkgo's build
-system automatically removes the `bin/jsonchecker/` directory which is licensed
-under the problematic JSON license):
+nlohmann-json is available under the following license:
 
-> Tencent is pleased to support the open source community by making RapidJSON
-> available.
->
-> Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.  All
-> rights reserved.
->
-> If you have downloaded a copy of the RapidJSON binary from Tencent, please
-> note that the RapidJSON binary is licensed under the MIT License.  If you have
-> downloaded a copy of the RapidJSON source code from Tencent, please note that
-> RapidJSON source code is licensed under the MIT License, except for the
-> third-party components listed below which are subject to different license
-> terms.  Your integration of RapidJSON into your own projects may require
-> compliance with the MIT License, as well as the other licenses applicable to
-> the third-party components included within RapidJSON. To avoid the problematic
-> JSON license in your own projects, it's sufficient to exclude the
-> bin/jsonchecker/ directory, as it's the only code under the JSON license.  A
-> copy of the MIT License is included in this file.
->
-> Other dependencies and licenses:
->
-> Open Source Software Licensed Under the BSD License:
-> --------------------------------------------------------------------
->
-> The msinttypes r29
->
-> Copyright (c) 2006-2013 Alexander Chemeris
-> All rights reserved.
->
-> Redistribution and use in source and binary forms, with or without
-> modification, are permitted provided that the following conditions are met:
->
-> * Redistributions of source code must retain the above copyright notice, this
->   list of conditions and the following disclaimer.
-> * Redistributions in binary form must reproduce the above copyright notice,
->   this list of conditions and the following disclaimer in the documentation
->   and/or other materials provided with the distribution.
-> * Neither the name of  copyright holder nor the names of its contributors may
->   be used to endorse or promote products derived from this software without
->   specific prior written permission.
->
-> THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
-> EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-> WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-> DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
-> DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-> (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-> LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-> ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-> (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-> SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
->
-> Open Source Software Licensed Under the JSON License:
-> --------------------------------------------------------------------
->
-> json.org
-> Copyright (c) 2002
-> JSON.org All Rights Reserved.
->
-> JSON_checker
-> Copyright (c) 2002 JSON.org
-> All Rights Reserved.
->
->
-> Terms of the JSON License:
-> ---------------------------------------------------
->
-> Permission is hereby granted, free of charge, to any person obtaining a copy
-> of this software and associated documentation files (the "Software"), to deal
-> in the Software without restriction, including without limitation the rights
-> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-> copies of the Software, and to permit persons to whom the Software is
-> furnished to do so, subject to the following conditions:
->
-> The above copyright notice and this permission notice shall be included in all
-> copies or substantial portions of the Software.
->
-> The Software shall be used for Good, not Evil.
->
-> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-> SOFTWARE.
->
->
-> Terms of the MIT License:
-> --------------------------------------------------------------------
->
+> MIT License 
+> 
+> Copyright (c) 2013-2022 Niels Lohmann
+> 
 > Permission is hereby granted, free of charge, to any person obtaining a copy
 > of this software and associated documentation files (the "Software"), to deal
 > in the Software without restriction, including without limitation the rights
 > to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 > copies of the Software, and to permit persons to whom the Software is
 > furnished to do so, subject to the following conditions:
->
+
 > The above copyright notice and this permission notice shall be included in all
 > copies or substantial portions of the Software.
->
+> 
 > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 > IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 > FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -220,7 +132,6 @@ under the problematic JSON license):
 > OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 > SOFTWARE.
 
-
 For generating the documentation of Ginkgo, some scripts from the deal.II
 library are used. You can refer to the `doc/` folder to see which files are a
 modified version of deal.II's documentation generation scripts. Additionally,
diff --git a/INSTALL.md b/INSTALL.md
index 5f788ed0e28..b29358d4eb6 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -31,7 +31,7 @@ Ginkgo adds the following additional switches to control what is being built:
 *   `-DGINKGO_FAST_TESTS={ON, OFF}` reduces the input sizes for a few slow tests
     to speed them up, default is `OFF`.
 *   `-DGINKGO_BUILD_BENCHMARKS={ON, OFF}` builds Ginkgo's benchmarks
-    (will download gflags and rapidjson), default is `ON`.
+    (will download gflags and nlohmann-json), default is `ON`.
 *   `-DGINKGO_BUILD_EXAMPLES={ON, OFF}` builds Ginkgo's examples, default is `ON`
 *   `-DGINKGO_BUILD_EXTLIB_EXAMPLE={ON, OFF}` builds the interfacing example
     with deal.II, default is `OFF`.
@@ -205,7 +205,7 @@ packages can be turned off by disabling the relevant options.
   Test](https://github.com/google/googletest);
 + GINKGO_BUILD_BENCHMARKS=ON: For argument management we use
   [gflags](https://github.com/gflags/gflags) and for JSON parsing we use
-  [RapidJSON](https://github.com/Tencent/rapidjson);
+  [nlohmann-json](https://github.com/nlohmann/json);
 + GINKGO_DEVEL_TOOLS=ON:
   [git-cmake-format](https://github.com/gflegar/git-cmake-format) is our CMake
   helper for code formatting.
@@ -224,7 +224,7 @@ packages can be turned off by disabling the relevant options.
 Ginkgo attempts to use pre-installed versions of these package if they match
 version requirements using `find_package`. Otherwise, the configuration step
 will download the files for each of the packages `GTest`, `gflags`,
-`RapidJSON` and `hwloc` and build them internally.
+`nlohmann-json` and `hwloc` and build them internally.
 
 Note that, if the external packages were not installed to the default location,
 the CMake option `-DCMAKE_PREFIX_PATH=<path-list>` needs to be set to the
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
index e993ee6cf0c..fd04620f595 100644
--- a/benchmark/CMakeLists.txt
+++ b/benchmark/CMakeLists.txt
@@ -46,7 +46,7 @@ endfunction()
 
 
 # Generates an executable for one precision. Each executable will be linked to
-# `ginkgo`, `gflags` and `rapidjson`.
+# `ginkgo`, `gflags` and `nlohmann-json`.
 # Note: This should only be used by `ginkgo_add_typed_benchmark_executables`
 #
 # \param name            name for the executable to create (including type suffix)
@@ -96,7 +96,7 @@ endfunction(ginkgo_add_single_benchmark_executable)
 
 
 # Generates an executable for each supported precision. Each executable will be
-# linked to `ginkgo`, `gflags` and `rapidjson`.
+# linked to `ginkgo`, `gflags` and `nlohmann-json`.
 #
 # \param name            base-name for the executable to create
 # \param use_lib_linops  Boolean indicating if linking against hipsparse/cusparse
diff --git a/dev_tools/scripts/regroup b/dev_tools/scripts/regroup
index 85eade99289..e35bd37efee 100644
--- a/dev_tools/scripts/regroup
+++ b/dev_tools/scripts/regroup
@@ -1,6 +1,6 @@
 IncludeBlocks: Regroup
 IncludeCategories:
-  - Regex: '^<(rapidjson|gflags|gtest|papi).*'
+  - Regex: '^<(nlohmann|gflags|gtest|papi).*'
     Priority: 3
   - Regex: '^<(omp|cu|hip|thrust|CL/|cooperative|oneapi|mpi|nvToolsExt).*'
     Priority: 2

From e9436137ff32f8e9950a72a07834f4b166bf80ed Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Wed, 23 Aug 2023 13:18:37 +0200
Subject: [PATCH 254/583] review updates

- remove unnecessary stdin in tests
- simplify validate_config
- consistently use pointer members instead of reference members

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 benchmark/solver/solver_common.hpp         |  6 +---
 benchmark/test/blas.py                     |  2 --
 benchmark/test/multi_vector_distributed.py |  2 --
 benchmark/utils/loggers.hpp                | 34 +++++++++++-----------
 4 files changed, 18 insertions(+), 26 deletions(-)

diff --git a/benchmark/solver/solver_common.hpp b/benchmark/solver/solver_common.hpp
index 597ab76729a..0248ab8e757 100644
--- a/benchmark/solver/solver_common.hpp
+++ b/benchmark/solver/solver_common.hpp
@@ -414,11 +414,7 @@ struct SolverBenchmark : Benchmark<solver_benchmark_state<Generator>> {
 
     bool validate_config(const json& value) const override
     {
-        return ((value.contains("size") && value.contains("stencil") &&
-                 value["size"].is_number_integer() &&
-                 value["stencil"].is_string()) ||
-                (value.contains("filename") &&
-                 value["filename"].is_string())) &&
+        return generator.validate_config(value) &&
                (value.contains("optimal") &&
                 value["optimal"].contains("spmv") &&
                 value["optimal"]["spmv"].is_string());
diff --git a/benchmark/test/blas.py b/benchmark/test/blas.py
index 160d5364e20..ff5bddc5d08 100755
--- a/benchmark/test/blas.py
+++ b/benchmark/test/blas.py
@@ -22,7 +22,6 @@
     ["-input", str(test_framework.sourcepath / "input.blas.json")],
     expected_stdout="blas.simple.stdout",
     expected_stderr="blas.simple.stderr",
-    stdin='[{"n": 100}]',
 )
 
 # profiler annotations
@@ -30,5 +29,4 @@
     ["-input", '[{"n": 100}]', "-profile", "-profiler_hook", "debug"],
     expected_stdout="blas.profile.stdout",
     expected_stderr="blas.profile.stderr",
-    stdin='[{"n": 100}]',
 )
diff --git a/benchmark/test/multi_vector_distributed.py b/benchmark/test/multi_vector_distributed.py
index 1e0c4c8adf5..c62cb8ebd17 100644
--- a/benchmark/test/multi_vector_distributed.py
+++ b/benchmark/test/multi_vector_distributed.py
@@ -24,7 +24,6 @@
     ["-input", str(test_framework.sourcepath / "input.blas.json")],
     expected_stdout="multi_vector_distributed.simple.stdout",
     expected_stderr="multi_vector_distributed.simple.stderr",
-    stdin='[{"n": 100}]',
     num_procs=3,
 )
 
@@ -33,6 +32,5 @@
     ["-input", '[{"n": 100}]', "-profile", "-profiler_hook", "debug"],
     expected_stdout="multi_vector_distributed.profile.stdout",
     expected_stderr="multi_vector_distributed.profile.stderr",
-    stdin='[{"n": 100}]',
     num_procs=3,
 )
diff --git a/benchmark/utils/loggers.hpp b/benchmark/utils/loggers.hpp
index 1e651811f0f..89ea6108eda 100644
--- a/benchmark/utils/loggers.hpp
+++ b/benchmark/utils/loggers.hpp
@@ -179,16 +179,16 @@ struct ResidualLogger : gko::log::Logger {
                                const gko::array<gko::stopping_status>* status,
                                bool all_stopped) const override
     {
-        timestamps.push_back(std::chrono::duration<double>(
-                                 std::chrono::steady_clock::now() - start)
-                                 .count());
+        timestamps->push_back(std::chrono::duration<double>(
+                                  std::chrono::steady_clock::now() - start)
+                                  .count());
         if (residual_norm) {
-            rec_res_norms.push_back(
+            rec_res_norms->push_back(
                 get_norm(gko::as<vec<rc_vtype>>(residual_norm)));
         } else {
             gko::detail::vector_dispatch<rc_vtype>(
                 residual, [&](const auto v_residual) {
-                    rec_res_norms.push_back(compute_norm2(v_residual));
+                    rec_res_norms->push_back(compute_norm2(v_residual));
                 });
         }
         if (solution) {
@@ -196,18 +196,18 @@ struct ResidualLogger : gko::log::Logger {
                 rc_vtype>(solution, [&](auto v_solution) {
                 using concrete_type =
                     std::remove_pointer_t<std::decay_t<decltype(v_solution)>>;
-                true_res_norms.push_back(compute_residual_norm(
+                true_res_norms->push_back(compute_residual_norm(
                     matrix, gko::as<concrete_type>(b), v_solution));
             });
         } else {
-            true_res_norms.push_back(-1.0);
+            true_res_norms->push_back(-1.0);
         }
         if (implicit_sq_residual_norm) {
-            implicit_res_norms.push_back(std::sqrt(
+            implicit_res_norms->push_back(std::sqrt(
                 get_norm(gko::as<vec<rc_vtype>>(implicit_sq_residual_norm))));
             has_implicit_res_norm = true;
         } else {
-            implicit_res_norms.push_back(-1.0);
+            implicit_res_norms->push_back(-1.0);
         }
     }
 
@@ -219,11 +219,11 @@ struct ResidualLogger : gko::log::Logger {
           matrix{matrix.get()},
           b{b.get()},
           start{std::chrono::steady_clock::now()},
-          rec_res_norms{rec_res_norms},
-          true_res_norms{true_res_norms},
+          rec_res_norms{&rec_res_norms},
+          true_res_norms{&true_res_norms},
           has_implicit_res_norm{},
-          implicit_res_norms{implicit_res_norms},
-          timestamps{timestamps}
+          implicit_res_norms{&implicit_res_norms},
+          timestamps{&timestamps}
     {}
 
     bool has_implicit_res_norms() const { return has_implicit_res_norm; }
@@ -232,11 +232,11 @@ struct ResidualLogger : gko::log::Logger {
     const gko::LinOp* matrix;
     const gko::LinOp* b;
     std::chrono::steady_clock::time_point start;
-    json& rec_res_norms;
-    json& true_res_norms;
+    json* rec_res_norms;
+    json* true_res_norms;
     mutable bool has_implicit_res_norm;
-    json& implicit_res_norms;
-    json& timestamps;
+    json* implicit_res_norms;
+    json* timestamps;
 };
 
 

From 310c686b8bdf4f4ef7595bac8de95131588d8d3c Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Tue, 29 Aug 2023 19:24:34 +0200
Subject: [PATCH 255/583] review updates

- don't install nlohmann-json
- simplify code
- improve config description formatting

Co-authored-by: Yuhsiang M. Tsai <yhmtsai@gmail.com>
---
 benchmark/preconditioner/preconditioner.cpp                | 3 +--
 benchmark/spmv/spmv_common.hpp                             | 3 ---
 benchmark/test/CMakeLists.txt                              | 2 +-
 benchmark/test/reference/conversion.all.stderr             | 2 +-
 benchmark/test/reference/conversion.profile.stderr         | 6 +++---
 benchmark/test/reference/conversion.simple.stderr          | 2 +-
 benchmark/test/reference/distributed_solver.profile.stderr | 6 +++---
 benchmark/test/reference/distributed_solver.simple.stderr  | 2 +-
 benchmark/test/reference/matrix_statistics.simple.stderr   | 2 +-
 benchmark/test/reference/preconditioner.profile.stderr     | 6 +++---
 benchmark/test/reference/preconditioner.simple.stderr      | 2 +-
 benchmark/test/reference/solver.profile.stderr             | 6 +++---
 benchmark/test/reference/solver.simple.stderr              | 2 +-
 benchmark/test/reference/sparse_blas.profile.stderr        | 6 +++---
 benchmark/test/reference/sparse_blas.simple.stderr         | 2 +-
 benchmark/test/reference/spmv.profile.stderr               | 6 +++---
 benchmark/test/reference/spmv.simple.stderr                | 2 +-
 benchmark/test/reference/spmv_distributed.profile.stderr   | 6 +++---
 benchmark/test/reference/spmv_distributed.simple.stderr    | 2 +-
 benchmark/utils/generator.hpp                              | 6 +++---
 third_party/nlohmann_json/CMakeLists.txt                   | 1 +
 21 files changed, 36 insertions(+), 39 deletions(-)

diff --git a/benchmark/preconditioner/preconditioner.cpp b/benchmark/preconditioner/preconditioner.cpp
index 98f116f9b12..074fe202e6c 100644
--- a/benchmark/preconditioner/preconditioner.cpp
+++ b/benchmark/preconditioner/preconditioner.cpp
@@ -205,9 +205,8 @@ struct PreconditionerBenchmark : Benchmark<preconditioner_benchmark_state> {
              json& precond_case) const override
     {
         auto decoded_precond_name = precond_decoder.at(encoded_precond_name);
-        precond_case["generate"] = json::object();
-        precond_case["apply"] = json::object();
         for (auto stage : {"generate", "apply"}) {
+            precond_case[stage] = json::object();
             precond_case[stage]["components"] = json::object();
         }
 
diff --git a/benchmark/spmv/spmv_common.hpp b/benchmark/spmv/spmv_common.hpp
index f589077834e..c85642bb5f1 100644
--- a/benchmark/spmv/spmv_common.hpp
+++ b/benchmark/spmv/spmv_common.hpp
@@ -211,9 +211,6 @@ struct SpmvBenchmark : Benchmark<spmv_benchmark_state<Generator>> {
         std::string best_format;
         // find the fastest among all formats we tested
         for (const auto& format : formats) {
-            if (!test_case[name].contains(format)) {
-                continue;
-            }
             auto& format_case = test_case[name][format];
             if (format_case.contains("completed") &&
                 format_case["completed"].template get<bool>()) {
diff --git a/benchmark/test/CMakeLists.txt b/benchmark/test/CMakeLists.txt
index 1cd589927fa..2f43b6eaf71 100644
--- a/benchmark/test/CMakeLists.txt
+++ b/benchmark/test/CMakeLists.txt
@@ -25,4 +25,4 @@ if (GINKGO_BUILD_MPI)
     add_benchmark_test(multi_vector_distributed)
     add_benchmark_test(spmv_distributed)
     add_benchmark_test(solver_distributed)
-endif()
\ No newline at end of file
+endif()
diff --git a/benchmark/test/reference/conversion.all.stderr b/benchmark/test/reference/conversion.all.stderr
index 77ff50a1b89..f6f1002e443 100644
--- a/benchmark/test/reference/conversion.all.stderr
+++ b/benchmark/test/reference/conversion.all.stderr
@@ -4,7 +4,7 @@ Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
 The formats are coo,csr,ell,sellp,hybrid
-Running test case stencil(100,7pt)
+Running test case stencil(100, 7pt)
 Matrix is of size (125, 125), 725
 	Running conversion: coo-read
 	Running conversion: coo-csr
diff --git a/benchmark/test/reference/conversion.profile.stderr b/benchmark/test/reference/conversion.profile.stderr
index 3a4301b13eb..b25fb4d42ee 100644
--- a/benchmark/test/reference/conversion.profile.stderr
+++ b/benchmark/test/reference/conversion.profile.stderr
@@ -4,11 +4,11 @@ Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
 The formats are coo,csr
-Running test case stencil(100,7pt)
+Running test case stencil(100, 7pt)
 Matrix is of size (125, 125), 725
 DEBUG: begin components::aos_to_soa
 DEBUG: end   components::aos_to_soa
-DEBUG: begin stencil(100,7pt)
+DEBUG: begin stencil(100, 7pt)
 	Running conversion: coo-read
 DEBUG: begin coo-read
 DEBUG: begin repetition
@@ -79,4 +79,4 @@ DEBUG: end   components::convert_ptrs_to_idxs
 DEBUG: end   copy(<typename>)
 DEBUG: end   repetition
 DEBUG: end   csr-coo
-DEBUG: end   stencil(100,7pt)
+DEBUG: end   stencil(100, 7pt)
diff --git a/benchmark/test/reference/conversion.simple.stderr b/benchmark/test/reference/conversion.simple.stderr
index 9b51effac09..53777a4fc53 100644
--- a/benchmark/test/reference/conversion.simple.stderr
+++ b/benchmark/test/reference/conversion.simple.stderr
@@ -4,7 +4,7 @@ Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
 The formats are coo,csr
-Running test case stencil(100,7pt)
+Running test case stencil(100, 7pt)
 Matrix is of size (125, 125), 725
 	Running conversion: coo-read
 	Running conversion: coo-csr
diff --git a/benchmark/test/reference/distributed_solver.profile.stderr b/benchmark/test/reference/distributed_solver.profile.stderr
index 227737e56b3..e8ef115f8c2 100644
--- a/benchmark/test/reference/distributed_solver.profile.stderr
+++ b/benchmark/test/reference/distributed_solver.profile.stderr
@@ -5,7 +5,7 @@ Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
 Running cg with 1000 iterations and residual goal of 1.000000e-06
 The number of right hand sides is 1
-Running test case stencil(100,7pt,stencil)
+Running test case stencil(100, 7pt, stencil)
 DEBUG: begin partition::build_ranges_from_global_size
 DEBUG: end   partition::build_ranges_from_global_size
 DEBUG: begin components::fill_array
@@ -77,7 +77,7 @@ DEBUG: begin dense::copy
 DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
 Matrix is of size (125, 125)
-DEBUG: begin stencil(100,7pt,stencil)
+DEBUG: begin stencil(100, 7pt, stencil)
 	Running solver: cg
 DEBUG: begin cg
 DEBUG: begin dense::compute_squared_norm2
@@ -445,4 +445,4 @@ DEBUG: end   dense::compute_sqrt
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: end   cg
-DEBUG: end   stencil(100,7pt,stencil)
+DEBUG: end   stencil(100, 7pt, stencil)
diff --git a/benchmark/test/reference/distributed_solver.simple.stderr b/benchmark/test/reference/distributed_solver.simple.stderr
index 607081a3949..bdf57c2d0e1 100644
--- a/benchmark/test/reference/distributed_solver.simple.stderr
+++ b/benchmark/test/reference/distributed_solver.simple.stderr
@@ -5,6 +5,6 @@ Running with 2 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
 Running cg with 1000 iterations and residual goal of 1.000000e-06
 The number of right hand sides is 1
-Running test case stencil(100,7pt,stencil)
+Running test case stencil(100, 7pt, stencil)
 Matrix is of size (125, 125)
 	Running solver: cg
diff --git a/benchmark/test/reference/matrix_statistics.simple.stderr b/benchmark/test/reference/matrix_statistics.simple.stderr
index d02edbc44da..bfaa411873e 100644
--- a/benchmark/test/reference/matrix_statistics.simple.stderr
+++ b/benchmark/test/reference/matrix_statistics.simple.stderr
@@ -1,4 +1,4 @@
 This is Ginkgo 1.7.0 (develop)
     running with core module 1.7.0 (develop)
-Running test case stencil(100,7pt)
+Running test case stencil(100, 7pt)
 Matrix is of size (125, 125), 725
diff --git a/benchmark/test/reference/preconditioner.profile.stderr b/benchmark/test/reference/preconditioner.profile.stderr
index e2069c318d2..328a738583c 100644
--- a/benchmark/test/reference/preconditioner.profile.stderr
+++ b/benchmark/test/reference/preconditioner.profile.stderr
@@ -4,7 +4,7 @@ Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
 Running with preconditioners: none
-Running test case stencil(100,7pt)
+Running test case stencil(100, 7pt)
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin components::aos_to_soa
@@ -24,7 +24,7 @@ DEBUG: end   dense::fill
 DEBUG: begin dense::fill_in_matrix_data
 DEBUG: end   dense::fill_in_matrix_data
 Matrix is of size (125, 125), 725
-DEBUG: begin stencil(100,7pt)
+DEBUG: begin stencil(100, 7pt)
 	Running preconditioner: none
 DEBUG: begin none
 DEBUG: begin copy(<typename>)
@@ -44,4 +44,4 @@ DEBUG: end   copy(<typename>)
 DEBUG: end   apply(<typename>)
 DEBUG: end   repetition apply
 DEBUG: end   none
-DEBUG: end   stencil(100,7pt)
+DEBUG: end   stencil(100, 7pt)
diff --git a/benchmark/test/reference/preconditioner.simple.stderr b/benchmark/test/reference/preconditioner.simple.stderr
index 0090e180d2b..a428671486f 100644
--- a/benchmark/test/reference/preconditioner.simple.stderr
+++ b/benchmark/test/reference/preconditioner.simple.stderr
@@ -4,6 +4,6 @@ Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
 Running with preconditioners: none
-Running test case stencil(100,7pt)
+Running test case stencil(100, 7pt)
 Matrix is of size (125, 125), 725
 	Running preconditioner: none
diff --git a/benchmark/test/reference/solver.profile.stderr b/benchmark/test/reference/solver.profile.stderr
index 5e1e2cdb312..a9846dff61f 100644
--- a/benchmark/test/reference/solver.profile.stderr
+++ b/benchmark/test/reference/solver.profile.stderr
@@ -5,7 +5,7 @@ Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
 Running cg with 1000 iterations and residual goal of 1.000000e-06
 The number of right hand sides is 1
-Running test case stencil(100,7pt)
+Running test case stencil(100, 7pt)
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
 DEBUG: begin components::aos_to_soa
@@ -23,7 +23,7 @@ DEBUG: begin dense::copy
 DEBUG: end   dense::copy
 DEBUG: end   copy(<typename>)
 Matrix is of size (125, 125)
-DEBUG: begin stencil(100,7pt)
+DEBUG: begin stencil(100, 7pt)
 	Running solver: cg
 DEBUG: begin cg
 DEBUG: begin dense::compute_norm2_dispatch
@@ -297,4 +297,4 @@ DEBUG: end   dense::compute_norm2_dispatch
 DEBUG: begin copy
 DEBUG: end   copy
 DEBUG: end   cg
-DEBUG: end   stencil(100,7pt)
+DEBUG: end   stencil(100, 7pt)
diff --git a/benchmark/test/reference/solver.simple.stderr b/benchmark/test/reference/solver.simple.stderr
index 659dd026588..d9c04b69cf5 100644
--- a/benchmark/test/reference/solver.simple.stderr
+++ b/benchmark/test/reference/solver.simple.stderr
@@ -5,6 +5,6 @@ Running with 2 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
 Running cg with 1000 iterations and residual goal of 1.000000e-06
 The number of right hand sides is 1
-Running test case stencil(100,7pt)
+Running test case stencil(100, 7pt)
 Matrix is of size (125, 125)
 	Running solver: cg
diff --git a/benchmark/test/reference/sparse_blas.profile.stderr b/benchmark/test/reference/sparse_blas.profile.stderr
index fd991de7063..70a9299ccae 100644
--- a/benchmark/test/reference/sparse_blas.profile.stderr
+++ b/benchmark/test/reference/sparse_blas.profile.stderr
@@ -4,7 +4,7 @@ Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
 The operations are transpose
-Running test case stencil(100,7pt)
+Running test case stencil(100, 7pt)
 Matrix is of size (125, 125), 725
 DEBUG: begin components::fill_array
 DEBUG: end   components::fill_array
@@ -12,7 +12,7 @@ DEBUG: begin components::aos_to_soa
 DEBUG: end   components::aos_to_soa
 DEBUG: begin components::convert_idxs_to_ptrs
 DEBUG: end   components::convert_idxs_to_ptrs
-DEBUG: begin stencil(100,7pt)
+DEBUG: begin stencil(100, 7pt)
 	Running sparse_blas: transpose
 DEBUG: begin transpose
 DEBUG: begin repetition
@@ -22,4 +22,4 @@ DEBUG: begin csr::transpose
 DEBUG: end   csr::transpose
 DEBUG: end   repetition
 DEBUG: end   transpose
-DEBUG: end   stencil(100,7pt)
+DEBUG: end   stencil(100, 7pt)
diff --git a/benchmark/test/reference/sparse_blas.simple.stderr b/benchmark/test/reference/sparse_blas.simple.stderr
index 1f2bb34809f..fe6cf23d5b7 100644
--- a/benchmark/test/reference/sparse_blas.simple.stderr
+++ b/benchmark/test/reference/sparse_blas.simple.stderr
@@ -4,6 +4,6 @@ Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
 The operations are transpose
-Running test case stencil(100,7pt)
+Running test case stencil(100, 7pt)
 Matrix is of size (125, 125), 725
 	Running sparse_blas: transpose
diff --git a/benchmark/test/reference/spmv.profile.stderr b/benchmark/test/reference/spmv.profile.stderr
index 1cc24a5f186..3c3ec3b7cfe 100644
--- a/benchmark/test/reference/spmv.profile.stderr
+++ b/benchmark/test/reference/spmv.profile.stderr
@@ -5,7 +5,7 @@ Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
 The formats are coo
 The number of right hand sides is 1
-Running test case stencil(100,7pt)
+Running test case stencil(100, 7pt)
 DEBUG: begin components::aos_to_soa
 DEBUG: end   components::aos_to_soa
 DEBUG: begin dense::fill
@@ -19,7 +19,7 @@ DEBUG: end   dense::fill
 DEBUG: begin dense::fill_in_matrix_data
 DEBUG: end   dense::fill_in_matrix_data
 Matrix is of size (125, 125), 725
-DEBUG: begin stencil(100,7pt)
+DEBUG: begin stencil(100, 7pt)
 	Running spmv: coo
 DEBUG: begin coo
 DEBUG: begin components::aos_to_soa
@@ -35,4 +35,4 @@ DEBUG: end   coo::spmv
 DEBUG: end   apply(<typename>)
 DEBUG: end   repetition
 DEBUG: end   coo
-DEBUG: end   stencil(100,7pt)
+DEBUG: end   stencil(100, 7pt)
diff --git a/benchmark/test/reference/spmv.simple.stderr b/benchmark/test/reference/spmv.simple.stderr
index 9d5047febb6..97fe670aff7 100644
--- a/benchmark/test/reference/spmv.simple.stderr
+++ b/benchmark/test/reference/spmv.simple.stderr
@@ -5,6 +5,6 @@ Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
 The formats are coo
 The number of right hand sides is 1
-Running test case stencil(100,7pt)
+Running test case stencil(100, 7pt)
 Matrix is of size (125, 125), 725
 	Running spmv: coo
diff --git a/benchmark/test/reference/spmv_distributed.profile.stderr b/benchmark/test/reference/spmv_distributed.profile.stderr
index f0d28332ef0..dc3cfd377c7 100644
--- a/benchmark/test/reference/spmv_distributed.profile.stderr
+++ b/benchmark/test/reference/spmv_distributed.profile.stderr
@@ -5,7 +5,7 @@ Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
 The formats are [csr]x[csr]
 The number of right hand sides is 1
-Running test case stencil(100,7pt,stencil)
+Running test case stencil(100, 7pt, stencil)
 DEBUG: begin partition::build_ranges_from_global_size
 DEBUG: end   partition::build_ranges_from_global_size
 DEBUG: begin components::fill_array
@@ -55,7 +55,7 @@ DEBUG: end   dense::fill
 DEBUG: begin dense::fill_in_matrix_data
 DEBUG: end   dense::fill_in_matrix_data
 Matrix is of size (81, 81), 144
-DEBUG: begin stencil(100,7pt,stencil)
+DEBUG: begin stencil(100, 7pt, stencil)
 	Running spmv: csr-csr
 DEBUG: begin csr-csr
 DEBUG: begin partition::build_ranges_from_global_size
@@ -137,4 +137,4 @@ DEBUG: end   advanced_apply(<typename>)
 DEBUG: end   apply(<typename>)
 DEBUG: end   repetition
 DEBUG: end   csr-csr
-DEBUG: end   stencil(100,7pt,stencil)
+DEBUG: end   stencil(100, 7pt, stencil)
diff --git a/benchmark/test/reference/spmv_distributed.simple.stderr b/benchmark/test/reference/spmv_distributed.simple.stderr
index 0df742d5b9b..7c7f6fccf54 100644
--- a/benchmark/test/reference/spmv_distributed.simple.stderr
+++ b/benchmark/test/reference/spmv_distributed.simple.stderr
@@ -5,6 +5,6 @@ Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
 The formats are [csr]x[csr]
 The number of right hand sides is 1
-Running test case stencil(100,7pt,stencil)
+Running test case stencil(100, 7pt, stencil)
 Matrix is of size (81, 81), 144
 	Running spmv: csr-csr
diff --git a/benchmark/utils/generator.hpp b/benchmark/utils/generator.hpp
index 257a2384634..3f26ed3f2fc 100644
--- a/benchmark/utils/generator.hpp
+++ b/benchmark/utils/generator.hpp
@@ -90,7 +90,7 @@ struct DefaultSystemGenerator {
             return config["filename"].get<std::string>();
         } else if (config.contains("stencil")) {
             std::stringstream ss;
-            ss << "stencil(" << config["size"].get<gko::int64>() << ","
+            ss << "stencil(" << config["size"].get<gko::int64>() << ", "
                << config["stencil"].get<std::string>() << ")";
             return ss.str();
         } else {
@@ -231,8 +231,8 @@ struct DistributedDefaultSystemGenerator {
             return config["filename"].get<std::string>();
         } else if (config.contains("stencil")) {
             std::stringstream ss;
-            ss << "stencil(" << config["size"].get<gko::int64>() << ","
-               << config["stencil"].get<std::string>() << ","
+            ss << "stencil(" << config["size"].get<gko::int64>() << ", "
+               << config["stencil"].get<std::string>() << ", "
                << config["comm_pattern"].get<std::string>() << ")";
             return ss.str();
         } else {
diff --git a/third_party/nlohmann_json/CMakeLists.txt b/third_party/nlohmann_json/CMakeLists.txt
index b95cfa5606a..6f413e458b9 100644
--- a/third_party/nlohmann_json/CMakeLists.txt
+++ b/third_party/nlohmann_json/CMakeLists.txt
@@ -6,4 +6,5 @@ FetchContent_Declare(
     GIT_TAG        v3.9.1
 )
 set(JSON_BuildTests OFF CACHE INTERNAL "")
+set(JSON_Install OFF CACHE INTERNAL "")
 FetchContent_MakeAvailable(nlohmann_json)

From bf1ece4bcf34fdd4c534c90e0d0ab77f0572c853 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Wed, 30 Aug 2023 11:30:08 +0200
Subject: [PATCH 256/583] keep trailing EOL

---
 benchmark/test/reference/blas.profile.stdout                   | 2 +-
 benchmark/test/reference/blas.simple.stdout                    | 2 +-
 benchmark/test/reference/conversion.all.stdout                 | 2 +-
 benchmark/test/reference/conversion.matrix.stdout              | 2 +-
 benchmark/test/reference/conversion.profile.stdout             | 2 +-
 benchmark/test/reference/conversion.simple.stdout              | 2 +-
 benchmark/test/reference/distributed_solver.matrix.stdout      | 2 +-
 benchmark/test/reference/distributed_solver.profile.stdout     | 2 +-
 benchmark/test/reference/distributed_solver.simple.stdout      | 2 +-
 benchmark/test/reference/matrix_statistics.matrix.stdout       | 2 +-
 benchmark/test/reference/matrix_statistics.simple.stdout       | 2 +-
 .../test/reference/multi_vector_distributed.profile.stdout     | 2 +-
 .../test/reference/multi_vector_distributed.simple.stdout      | 2 +-
 benchmark/test/reference/preconditioner.matrix.stdout          | 2 +-
 benchmark/test/reference/preconditioner.profile.stdout         | 2 +-
 benchmark/test/reference/preconditioner.simple.stdout          | 2 +-
 benchmark/test/reference/solver.matrix.stdout                  | 2 +-
 benchmark/test/reference/solver.profile.stdout                 | 2 +-
 benchmark/test/reference/solver.simple.stdout                  | 2 +-
 benchmark/test/reference/sparse_blas.matrix.stdout             | 2 +-
 benchmark/test/reference/sparse_blas.profile.stdout            | 2 +-
 benchmark/test/reference/sparse_blas.simple.stdout             | 2 +-
 benchmark/test/reference/spmv.matrix.stdout                    | 2 +-
 benchmark/test/reference/spmv.profile.stdout                   | 2 +-
 benchmark/test/reference/spmv.simple.stdout                    | 2 +-
 benchmark/test/reference/spmv_distributed.profile.stdout       | 2 +-
 benchmark/test/reference/spmv_distributed.simple.stdout        | 2 +-
 benchmark/test/test_framework.py.in                            | 3 ++-
 28 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/benchmark/test/reference/blas.profile.stdout b/benchmark/test/reference/blas.profile.stdout
index 8998d5eaed7..209e115b557 100644
--- a/benchmark/test/reference/blas.profile.stdout
+++ b/benchmark/test/reference/blas.profile.stdout
@@ -25,4 +25,4 @@
             }
         }
     }
-]
\ No newline at end of file
+]
diff --git a/benchmark/test/reference/blas.simple.stdout b/benchmark/test/reference/blas.simple.stdout
index a586a9bc57b..54745d81104 100644
--- a/benchmark/test/reference/blas.simple.stdout
+++ b/benchmark/test/reference/blas.simple.stdout
@@ -25,4 +25,4 @@
             }
         }
     }
-]
\ No newline at end of file
+]
diff --git a/benchmark/test/reference/conversion.all.stdout b/benchmark/test/reference/conversion.all.stdout
index 0c77d464793..e7a5b8f0f51 100644
--- a/benchmark/test/reference/conversion.all.stdout
+++ b/benchmark/test/reference/conversion.all.stdout
@@ -73,4 +73,4 @@
         "cols": 125,
         "nonzeros": 725
     }
-]
\ No newline at end of file
+]
diff --git a/benchmark/test/reference/conversion.matrix.stdout b/benchmark/test/reference/conversion.matrix.stdout
index 7f27b0c25b3..8489e4b30b4 100644
--- a/benchmark/test/reference/conversion.matrix.stdout
+++ b/benchmark/test/reference/conversion.matrix.stdout
@@ -27,4 +27,4 @@
         "cols": 36,
         "nonzeros": 208
     }
-]
\ No newline at end of file
+]
diff --git a/benchmark/test/reference/conversion.profile.stdout b/benchmark/test/reference/conversion.profile.stdout
index a9c3ea674fa..907eac5b951 100644
--- a/benchmark/test/reference/conversion.profile.stdout
+++ b/benchmark/test/reference/conversion.profile.stdout
@@ -28,4 +28,4 @@
         "cols": 125,
         "nonzeros": 725
     }
-]
\ No newline at end of file
+]
diff --git a/benchmark/test/reference/conversion.simple.stdout b/benchmark/test/reference/conversion.simple.stdout
index 81c735789d1..91b69b8a248 100644
--- a/benchmark/test/reference/conversion.simple.stdout
+++ b/benchmark/test/reference/conversion.simple.stdout
@@ -28,4 +28,4 @@
         "cols": 125,
         "nonzeros": 725
     }
-]
\ No newline at end of file
+]
diff --git a/benchmark/test/reference/distributed_solver.matrix.stdout b/benchmark/test/reference/distributed_solver.matrix.stdout
index ec1d258e2f4..67ac333bec5 100644
--- a/benchmark/test/reference/distributed_solver.matrix.stdout
+++ b/benchmark/test/reference/distributed_solver.matrix.stdout
@@ -54,4 +54,4 @@
         "rows": 36,
         "cols": 36
     }
-]
\ No newline at end of file
+]
diff --git a/benchmark/test/reference/distributed_solver.profile.stdout b/benchmark/test/reference/distributed_solver.profile.stdout
index 55dfb1dc428..0a844879c4f 100644
--- a/benchmark/test/reference/distributed_solver.profile.stdout
+++ b/benchmark/test/reference/distributed_solver.profile.stdout
@@ -30,4 +30,4 @@
         "rows": 125,
         "cols": 125
     }
-]
\ No newline at end of file
+]
diff --git a/benchmark/test/reference/distributed_solver.simple.stdout b/benchmark/test/reference/distributed_solver.simple.stdout
index eed8d864388..458115e6ab2 100644
--- a/benchmark/test/reference/distributed_solver.simple.stdout
+++ b/benchmark/test/reference/distributed_solver.simple.stdout
@@ -56,4 +56,4 @@
         "rows": 125,
         "cols": 125
     }
-]
\ No newline at end of file
+]
diff --git a/benchmark/test/reference/matrix_statistics.matrix.stdout b/benchmark/test/reference/matrix_statistics.matrix.stdout
index a6297e89b66..f5eba9461f7 100644
--- a/benchmark/test/reference/matrix_statistics.matrix.stdout
+++ b/benchmark/test/reference/matrix_statistics.matrix.stdout
@@ -36,4 +36,4 @@
         "cols": 36,
         "nonzeros": 208
     }
-]
\ No newline at end of file
+]
diff --git a/benchmark/test/reference/matrix_statistics.simple.stdout b/benchmark/test/reference/matrix_statistics.simple.stdout
index 923bbc9f962..23124781a7d 100644
--- a/benchmark/test/reference/matrix_statistics.simple.stdout
+++ b/benchmark/test/reference/matrix_statistics.simple.stdout
@@ -37,4 +37,4 @@
         "cols": 125,
         "nonzeros": 725
     }
-]
\ No newline at end of file
+]
diff --git a/benchmark/test/reference/multi_vector_distributed.profile.stdout b/benchmark/test/reference/multi_vector_distributed.profile.stdout
index 8998d5eaed7..209e115b557 100644
--- a/benchmark/test/reference/multi_vector_distributed.profile.stdout
+++ b/benchmark/test/reference/multi_vector_distributed.profile.stdout
@@ -25,4 +25,4 @@
             }
         }
     }
-]
\ No newline at end of file
+]
diff --git a/benchmark/test/reference/multi_vector_distributed.simple.stdout b/benchmark/test/reference/multi_vector_distributed.simple.stdout
index a586a9bc57b..54745d81104 100644
--- a/benchmark/test/reference/multi_vector_distributed.simple.stdout
+++ b/benchmark/test/reference/multi_vector_distributed.simple.stdout
@@ -25,4 +25,4 @@
             }
         }
     }
-]
\ No newline at end of file
+]
diff --git a/benchmark/test/reference/preconditioner.matrix.stdout b/benchmark/test/reference/preconditioner.matrix.stdout
index 51adb7383c3..742ec55c41d 100644
--- a/benchmark/test/reference/preconditioner.matrix.stdout
+++ b/benchmark/test/reference/preconditioner.matrix.stdout
@@ -28,4 +28,4 @@
         "cols": 36,
         "nonzeros": 208
     }
-]
\ No newline at end of file
+]
diff --git a/benchmark/test/reference/preconditioner.profile.stdout b/benchmark/test/reference/preconditioner.profile.stdout
index e33a6502eea..526349b55ad 100644
--- a/benchmark/test/reference/preconditioner.profile.stdout
+++ b/benchmark/test/reference/preconditioner.profile.stdout
@@ -21,4 +21,4 @@
         "cols": 125,
         "nonzeros": 725
     }
-]
\ No newline at end of file
+]
diff --git a/benchmark/test/reference/preconditioner.simple.stdout b/benchmark/test/reference/preconditioner.simple.stdout
index 06291228a1c..ed567dcbb13 100644
--- a/benchmark/test/reference/preconditioner.simple.stdout
+++ b/benchmark/test/reference/preconditioner.simple.stdout
@@ -29,4 +29,4 @@
         "cols": 125,
         "nonzeros": 725
     }
-]
\ No newline at end of file
+]
diff --git a/benchmark/test/reference/solver.matrix.stdout b/benchmark/test/reference/solver.matrix.stdout
index a87e78f7f66..594a3887921 100644
--- a/benchmark/test/reference/solver.matrix.stdout
+++ b/benchmark/test/reference/solver.matrix.stdout
@@ -52,4 +52,4 @@
         "rows": 36,
         "cols": 36
     }
-]
\ No newline at end of file
+]
diff --git a/benchmark/test/reference/solver.profile.stdout b/benchmark/test/reference/solver.profile.stdout
index 906c74de5e7..c132ed1a572 100644
--- a/benchmark/test/reference/solver.profile.stdout
+++ b/benchmark/test/reference/solver.profile.stdout
@@ -29,4 +29,4 @@
         "rows": 125,
         "cols": 125
     }
-]
\ No newline at end of file
+]
diff --git a/benchmark/test/reference/solver.simple.stdout b/benchmark/test/reference/solver.simple.stdout
index 5d127fe4b78..0ee0e4b9a4b 100644
--- a/benchmark/test/reference/solver.simple.stdout
+++ b/benchmark/test/reference/solver.simple.stdout
@@ -53,4 +53,4 @@
         "rows": 125,
         "cols": 125
     }
-]
\ No newline at end of file
+]
diff --git a/benchmark/test/reference/sparse_blas.matrix.stdout b/benchmark/test/reference/sparse_blas.matrix.stdout
index 74fdbf98e7a..a50fa1159d9 100644
--- a/benchmark/test/reference/sparse_blas.matrix.stdout
+++ b/benchmark/test/reference/sparse_blas.matrix.stdout
@@ -21,4 +21,4 @@
         "cols": 36,
         "nonzeros": 208
     }
-]
\ No newline at end of file
+]
diff --git a/benchmark/test/reference/sparse_blas.profile.stdout b/benchmark/test/reference/sparse_blas.profile.stdout
index e9d48fde23d..45cb7e2638a 100644
--- a/benchmark/test/reference/sparse_blas.profile.stdout
+++ b/benchmark/test/reference/sparse_blas.profile.stdout
@@ -15,4 +15,4 @@
         "cols": 125,
         "nonzeros": 725
     }
-]
\ No newline at end of file
+]
diff --git a/benchmark/test/reference/sparse_blas.simple.stdout b/benchmark/test/reference/sparse_blas.simple.stdout
index 3cc5f774ebf..a44f4f189b2 100644
--- a/benchmark/test/reference/sparse_blas.simple.stdout
+++ b/benchmark/test/reference/sparse_blas.simple.stdout
@@ -22,4 +22,4 @@
         "cols": 125,
         "nonzeros": 725
     }
-]
\ No newline at end of file
+]
diff --git a/benchmark/test/reference/spmv.matrix.stdout b/benchmark/test/reference/spmv.matrix.stdout
index 4d03ce3cd07..ea5927ba148 100644
--- a/benchmark/test/reference/spmv.matrix.stdout
+++ b/benchmark/test/reference/spmv.matrix.stdout
@@ -17,4 +17,4 @@
             "spmv": "coo"
         }
     }
-]
\ No newline at end of file
+]
diff --git a/benchmark/test/reference/spmv.profile.stdout b/benchmark/test/reference/spmv.profile.stdout
index 409a92d4e33..6e4701af719 100644
--- a/benchmark/test/reference/spmv.profile.stdout
+++ b/benchmark/test/reference/spmv.profile.stdout
@@ -17,4 +17,4 @@
             "spmv": "coo"
         }
     }
-]
\ No newline at end of file
+]
diff --git a/benchmark/test/reference/spmv.simple.stdout b/benchmark/test/reference/spmv.simple.stdout
index 9601a15b331..38f2598c616 100644
--- a/benchmark/test/reference/spmv.simple.stdout
+++ b/benchmark/test/reference/spmv.simple.stdout
@@ -18,4 +18,4 @@
             "spmv": "coo"
         }
     }
-]
\ No newline at end of file
+]
diff --git a/benchmark/test/reference/spmv_distributed.profile.stdout b/benchmark/test/reference/spmv_distributed.profile.stdout
index 8de6a68ae8a..bbef87d0b89 100644
--- a/benchmark/test/reference/spmv_distributed.profile.stdout
+++ b/benchmark/test/reference/spmv_distributed.profile.stdout
@@ -18,4 +18,4 @@
             "spmv": "csr-csr"
         }
     }
-]
\ No newline at end of file
+]
diff --git a/benchmark/test/reference/spmv_distributed.simple.stdout b/benchmark/test/reference/spmv_distributed.simple.stdout
index f94e4b992a1..77bdef168d3 100644
--- a/benchmark/test/reference/spmv_distributed.simple.stdout
+++ b/benchmark/test/reference/spmv_distributed.simple.stdout
@@ -19,4 +19,4 @@
             "spmv": "csr-csr"
         }
     }
-]
\ No newline at end of file
+]
diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in
index 1a07818df1f..62c4293e7c0 100644
--- a/benchmark/test/test_framework.py.in
+++ b/benchmark/test/test_framework.py.in
@@ -92,7 +92,8 @@ def sanitize_json_text(input: str) -> List[str]:
     """
 
     result = json.dumps(sanitize_json(json.loads(input)), indent=4)
-    return result.splitlines()
+    # json.dumps doesn't add a trailing newline
+    return result.splitlines() + [""]
 
 
 def sanitize_text(

From 39d88071ff0978c7c99817a96444e03e01e4c709 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Wed, 12 Jul 2023 10:59:59 +0200
Subject: [PATCH 257/583] add resource_groups property to tests

---
 cmake/create_test.cmake      | 265 +++++++++++++++++++++++------------
 hip/test/base/CMakeLists.txt |   2 +-
 resources.json               |  51 +++++++
 3 files changed, 230 insertions(+), 88 deletions(-)
 create mode 100644 resources.json

diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
index 58a49ca066c..937beb4eb8d 100644
--- a/cmake/create_test.cmake
+++ b/cmake/create_test.cmake
@@ -1,10 +1,12 @@
-set(gko_test_single_args "MPI_SIZE")
+set(gko_test_resource_args "LOCAL_CORES;PERCENT;TYPE")
+set(gko_test_single_args "MPI_SIZE;${gko_test_resource_args}")
 set(gko_test_multi_args "DISABLE_EXECUTORS;ADDITIONAL_LIBRARIES;ADDITIONAL_INCLUDES")
+set(gko_test_option_args "NO_RESOURCES")
 
 ## Replaces / by _ to create valid target names from relative paths
 function(ginkgo_build_test_name test_name target_name)
     file(RELATIVE_PATH REL_BINARY_DIR
-        ${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR})
+            ${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR})
     string(REPLACE "/" "_" TEST_TARGET_NAME "${REL_BINARY_DIR}/${test_name}")
     set(${target_name} ${TEST_TARGET_NAME} PARENT_SCOPE)
 endfunction(ginkgo_build_test_name)
@@ -12,8 +14,8 @@ endfunction(ginkgo_build_test_name)
 function(ginkgo_create_gtest_mpi_main)
     add_library(gtest_mpi_main "")
     target_sources(gtest_mpi_main
-      PRIVATE
-      ${PROJECT_SOURCE_DIR}/core/test/mpi/gtest/mpi_listener.cpp)
+            PRIVATE
+            ${PROJECT_SOURCE_DIR}/core/test/mpi/gtest/mpi_listener.cpp)
     find_package(MPI 3.1 COMPONENTS CXX REQUIRED)
     target_link_libraries(gtest_mpi_main PRIVATE GTest::GTest MPI::MPI_CXX)
 endfunction(ginkgo_create_gtest_mpi_main)
@@ -24,33 +26,96 @@ function(ginkgo_set_test_target_properties test_target_name)
     cmake_parse_arguments(PARSE_ARGV 1 set_properties "" "${gko_test_single_args}" "${gko_test_multi_args}")
     if (GINKGO_FAST_TESTS)
         target_compile_definitions(${test_target_name} PRIVATE GINKGO_FAST_TESTS)
-    endif()
+    endif ()
     if (GINKGO_TEST_NONDEFAULT_STREAM)
         target_compile_definitions(${test_target_name} PRIVATE GKO_TEST_NONDEFAULT_STREAM)
-    endif()
+    endif ()
     if (GINKGO_COMPILING_DPCPP_TEST AND GINKGO_DPCPP_SINGLE_MODE)
         target_compile_definitions(${test_target_name} PRIVATE GINKGO_DPCPP_SINGLE_MODE=1)
-    endif()
+    endif ()
     if (GINKGO_CHECK_CIRCULAR_DEPS)
         target_link_libraries(${test_target_name} PRIVATE "${GINKGO_CIRCULAR_DEPS_FLAGS}")
-    endif()
+    endif ()
     if (set_properties_MPI_SIZE)
-        if(NOT TARGET gtest_mpi_main)
+        if (NOT TARGET gtest_mpi_main)
             ginkgo_create_gtest_mpi_main()
-        endif()
+        endif ()
         set(gtest_main gtest_mpi_main MPI::MPI_CXX)
-    else()
+    else ()
         set(gtest_main GTest::Main)
-    endif()
+    endif ()
     target_compile_features(${test_target_name} PUBLIC cxx_std_14)
     target_compile_options(${test_target_name} PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${GINKGO_COMPILER_FLAGS}>)
     target_include_directories(${test_target_name} PRIVATE ${Ginkgo_BINARY_DIR} ${set_properties_ADDITIONAL_INCLUDES})
     target_link_libraries(${test_target_name} PRIVATE ginkgo ${gtest_main} GTest::GTest ${set_properties_ADDITIONAL_LIBRARIES})
 endfunction()
 
+function(ginkgo_add_cpu_resource_requirement_internal test_name local_cores mpi_size)
+    if (mpi_size)
+        math(EXPR cores "${mpi_size} * ${local_cores}")
+    else ()
+        set(cores ${local_cores})
+    endif ()
+    set_property(TEST ${test_name} PROPERTY
+            RESOURCE_GROUPS "cpus:${cores}")
+endfunction()
+
+function(ginkgo_add_resource_requirement test_name)
+    cmake_parse_arguments(PARSE_ARGV 1 add_rr "${gko_test_option_args}" "${gko_test_single_args}" "")
+    if(add_rr_NO_RESOURCES)
+        return()
+    endif()
+
+    if (NOT add_rr_TYPE)
+        message(FATAL_ERROR "Need to provide resource type used by test.")
+    endif ()
+
+    if(add_rr_TYPE STREQUAL "ref")
+        set(single_resource "cpus:1")
+    elseif(add_rr_TYPE STREQUAL "cpu")
+        if(NOT add_rr_CORES)
+            set(add_rr_CORES 4)  # perhaps get this from environment variable?
+        endif()
+        if(NOT add_rr_CORES MATCHES "^[0-9]+")
+            message(FATAL_ERROR "Resource specification is invalid: CORE=${add_rr_CORES}")
+        endif()
+
+        set(single_resource "cpus:${add_rr_CORES}")
+    elseif(add_rr_TYPE STREQUAL "gpu")
+        if(NOT add_rr_PERCENTAGE)
+            set(add_rr_PERCENTAGE 50)
+        endif()
+        if(add_rr_MPI_SIZE GREATER 1)
+            set(add_rr_PERCENTAGE 100)
+        endif()
+        if(NOT add_rr_PERCENTAGE MATCHES "^[0-9]([0-9][0-9]?)?"
+           OR add_rr_PERCENTAGE LESS 0
+           OR add_rr_PERCENTAGE GREATER 100)
+            message(FATAL_ERROR "Resource specification is invalid: PERCENTAGE=${add_rr_PERCENTAGE}")
+        endif()
+
+        set(single_resource "gpus:${add_rr_PERCENTAGE}")
+    else()
+        message(FATAL_ERROR "Unrecognized resource type ${add_rr_TYPE}, allowed are: ref, cpu, gpu.")
+    endif()
+
+    if(NOT add_rr_MPI_SIZE)
+        set(add_rr_MPI_SIZE 1)
+    endif()
+    foreach(unused RANGE ${MPI_SIZE})
+        list(APPEND resources "${single_resource}")
+    endforeach()
+    set_property(TEST ${test_name}
+                 PROPERTY
+                 RESOURCE_GROUPS ${resources})
+endfunction()
+
+
 ## Adds a test to the list executed by ctest and sets its output binary name
 ## Possible additional arguments:
 ## - `MPI_SIZE size` causes the tests to be run with `size` MPI processes.
+## - `CORES` the number of threads used by a test, default is 4
+## - `PERCENTAGE` usage percentage of a single GPU, default is 50
 ## - `DISABLE_EXECUTORS exec1 exec2` disables the test for certain backends (if built for multiple)
 ## - `ADDITIONAL_LIBRARIES lib1 lib2` adds additional target link dependencies
 ## - `ADDITIONAL_INCLUDES path1 path2` adds additional target include paths
@@ -60,36 +125,39 @@ function(ginkgo_add_test test_name test_target_name)
     set_target_properties(${test_target_name} PROPERTIES OUTPUT_NAME ${test_name})
     if (add_test_MPI_SIZE)
         add_test(NAME ${REL_BINARY_DIR}/${test_name}
-                 COMMAND
-                     ${MPIEXEC_EXECUTABLE}
-                     ${MPIEXEC_NUMPROC_FLAG}
-                     ${add_test_MPI_SIZE}
-                     "$<TARGET_FILE:${test_target_name}>"
-                 WORKING_DIRECTORY "$<TARGET_FILE_DIR:ginkgo>")
-    else()
+                COMMAND
+                ${MPIEXEC_EXECUTABLE}
+                ${MPIEXEC_NUMPROC_FLAG}
+                ${add_test_MPI_SIZE}
+                "$<TARGET_FILE:${test_target_name}>"
+                WORKING_DIRECTORY "$<TARGET_FILE_DIR:ginkgo>")
+    else ()
         add_test(NAME ${REL_BINARY_DIR}/${test_name}
-                 COMMAND ${test_target_name}
-                 WORKING_DIRECTORY "$<TARGET_FILE_DIR:ginkgo>")
-    endif()
+                COMMAND ${test_target_name}
+                WORKING_DIRECTORY "$<TARGET_FILE_DIR:ginkgo>")
+    endif ()
+
+    ginkgo_add_resource_requirement(${REL_BINARY_DIR}/${test_name} ${ARGN})
+
     set(test_preload)
     if (GINKGO_TEST_NONDEFAULT_STREAM AND GINKGO_BUILD_CUDA)
         set(test_preload $<TARGET_FILE:identify_stream_usage_cuda>:${test_preload})
-    endif()
+    endif ()
     if (GINKGO_TEST_NONDEFAULT_STREAM AND GINKGO_BUILD_HIP AND GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_AMD_REGEX}")
         set(test_preload $<TARGET_FILE:identify_stream_usage_hip>:${test_preload})
-    endif()
-    if(test_preload)
+    endif ()
+    if (test_preload)
         set_tests_properties(${REL_BINARY_DIR}/${test_name} PROPERTIES ENVIRONMENT LD_PRELOAD=${test_preload})
-    endif()
+    endif ()
 endfunction()
 
 ## Normal test
 function(ginkgo_create_test test_name)
     ginkgo_build_test_name(${test_name} test_target_name)
     add_executable(${test_target_name} ${test_name}.cpp)
-    target_link_libraries(${test_target_name} PRIVATE ${create_test_ADDITIONAL_LIBRARIES})
+    target_link_libraries(${test_target_name})
     ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
-    ginkgo_add_test(${test_name} ${test_target_name} ${ARGN})
+    ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} TYPE ref)
 endfunction(ginkgo_create_test)
 
 ## Test compiled with dpcpp
@@ -100,11 +168,11 @@ function(ginkgo_create_dpcpp_test test_name)
     target_compile_options(${test_target_name} PRIVATE ${GINKGO_DPCPP_FLAGS})
     target_link_options(${test_target_name} PRIVATE -fsycl-device-code-split=per_kernel)
     ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
-    ginkgo_add_test(${test_name} ${test_target_name} ${ARGN})
+    ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} TYPE gpu)
     # Note: MKL_ENV is empty on linux. Maybe need to apply MKL_ENV to all test.
     if (MKL_ENV)
         set_tests_properties(${test_target_name} PROPERTIES ENVIRONMENT "${MKL_ENV}")
-    endif()
+    endif ()
 endfunction(ginkgo_create_dpcpp_test)
 
 ## Test compiled with CUDA
@@ -118,23 +186,23 @@ function(ginkgo_create_cuda_test_internal test_name filename test_target_name)
     add_executable(${test_target_name} ${filename})
     target_compile_definitions(${test_target_name} PRIVATE GKO_COMPILING_CUDA)
     target_compile_options(${test_target_name}
-        PRIVATE
+            PRIVATE
             $<$<COMPILE_LANGUAGE:CUDA>:${GINKGO_CUDA_COMPILER_FLAGS}>)
-    if(MSVC)
+    if (MSVC)
         target_compile_options(${test_target_name}
-            PRIVATE
+                PRIVATE
                 $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda --expt-relaxed-constexpr>)
-    elseif(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
+    elseif (CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
         target_compile_options(${test_target_name}
-            PRIVATE
+                PRIVATE
                 $<$<COMPILE_LANGUAGE:CUDA>:--expt-extended-lambda --expt-relaxed-constexpr>)
-    endif()
+    endif ()
     # we handle CUDA architecture flags for now, disable CMake handling
-    if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
+    if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
         set_target_properties(${test_target_name} PROPERTIES CUDA_ARCHITECTURES OFF)
-    endif()
+    endif ()
     ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
-    ginkgo_add_test(${test_name} ${test_target_name} ${ARGN})
+    ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} TYPE gpu)
 endfunction(ginkgo_create_cuda_test_internal)
 
 ## Test compiled with HIP
@@ -149,71 +217,94 @@ function(ginkgo_create_hip_test_internal test_name filename test_target_name add
     set(GINKGO_TEST_HIP_DEFINES -DGKO_COMPILING_HIP ${additional_flags})
     if (GINKGO_FAST_TESTS)
         list(APPEND GINKGO_TEST_HIP_DEFINES -DGINKGO_FAST_TESTS)
-    endif()
+    endif ()
     if (GINKGO_TEST_NONDEFAULT_STREAM)
         list(APPEND GINKGO_TEST_HIP_DEFINES -DGKO_TEST_NONDEFAULT_STREAM)
-    endif()
+    endif ()
 
     # NOTE: With how HIP works, passing the flags `HIPCC_OPTIONS` etc. here
     # creates a redefinition of all flags. This creates some issues with `nvcc`,
     # but `clang` seems fine with the redefinitions.
     if (GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_NVIDIA_REGEX}")
         hip_add_executable(${test_target_name} ${filename}
-            # If `FindHIP.cmake`, namely `HIP_PARSE_HIPCC_OPTIONS` macro and
-            # call gets fixed, uncomment this.
-            HIPCC_OPTIONS ${GINKGO_TEST_HIP_DEFINES} # ${GINKGO_HIPCC_OPTIONS}
-            # NVCC_OPTIONS  ${GINKGO_TEST_HIP_DEFINES} ${GINKGO_HIP_NVCC_OPTIONS}
-            # CLANG_OPTIONS ${GINKGO_TEST_HIP_DEFINES} ${GINKGO_HIP_CLANG_OPTIONS}
-            --expt-relaxed-constexpr --expt-extended-lambda
-            )
-    else() # hcc/clang
+                # If `FindHIP.cmake`, namely `HIP_PARSE_HIPCC_OPTIONS` macro and
+                # call gets fixed, uncomment this.
+                HIPCC_OPTIONS ${GINKGO_TEST_HIP_DEFINES} # ${GINKGO_HIPCC_OPTIONS}
+                # NVCC_OPTIONS  ${GINKGO_TEST_HIP_DEFINES} ${GINKGO_HIP_NVCC_OPTIONS}
+                # CLANG_OPTIONS ${GINKGO_TEST_HIP_DEFINES} ${GINKGO_HIP_CLANG_OPTIONS}
+                --expt-relaxed-constexpr --expt-extended-lambda
+                )
+    else () # hcc/clang
         hip_add_executable(${test_target_name} ${filename}
-            HIPCC_OPTIONS ${GINKGO_HIPCC_OPTIONS} ${GINKGO_TEST_HIP_DEFINES}
-            NVCC_OPTIONS  ${GINKGO_HIP_NVCC_OPTIONS}
-            CLANG_OPTIONS ${GINKGO_HIP_CLANG_OPTIONS}
-            )
-    endif()
+                HIPCC_OPTIONS ${GINKGO_HIPCC_OPTIONS} ${GINKGO_TEST_HIP_DEFINES}
+                NVCC_OPTIONS ${GINKGO_HIP_NVCC_OPTIONS}
+                CLANG_OPTIONS ${GINKGO_HIP_CLANG_OPTIONS}
+                )
+    endif ()
 
     # Let's use a normal compiler for linking
     set_target_properties(${test_target_name} PROPERTIES LINKER_LANGUAGE CXX)
 
     target_include_directories(${test_target_name}
-        PRIVATE
-        # Only `math` requires it so far, but it's much easier
-        # to put these this way.
-        ${GINKGO_HIP_THRUST_PATH}
-        # Only `exception_helpers` requires these so far, but it's much easier
-        # to put these this way.
-        ${HIPBLAS_INCLUDE_DIRS}
-        ${HIPFFT_INCLUDE_DIRS}
-        ${hiprand_INCLUDE_DIRS}
-        ${HIPSPARSE_INCLUDE_DIRS}
-        )
+            PRIVATE
+            # Only `math` requires it so far, but it's much easier
+            # to put these this way.
+            ${GINKGO_HIP_THRUST_PATH}
+            # Only `exception_helpers` requires these so far, but it's much easier
+            # to put these this way.
+            ${HIPBLAS_INCLUDE_DIRS}
+            ${HIPFFT_INCLUDE_DIRS}
+            ${hiprand_INCLUDE_DIRS}
+            ${HIPSPARSE_INCLUDE_DIRS}
+            )
     ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
-    ginkgo_add_test(${test_name} ${test_target_name} ${ARGN})
+    ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} TYPE gpu)
 endfunction(ginkgo_create_hip_test_internal)
 
+
+## Test compiled with OpenMP
+function(ginkgo_create_omp_test test_name)
+    ginkgo_build_test_name(${test_name} test_target_name)
+    ginkgo_create_omp_test_internal(${test_name} ${test_name}.cpp ${test_target_name} "" ${ARGN})
+endfunction()
+
+function(ginkgo_create_omp_test_internal test_name filename test_target_name)
+    ginkgo_build_test_name(${test_name} test_target_name)
+    add_executable(${test_target_name} ${test_name}.cpp)
+    target_compile_definitions(${test_target_name} PRIVATE GKO_COMPILING_OMP)
+    target_link_libraries(${test_target_name} PRIVATE OpenMP::OpenMP_CXX)
+    ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
+    ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} TYPE cpu)
+endfunction()
+
 ## Common test compiled with the host compiler, one target for each enabled backend
 function(ginkgo_create_common_test test_name)
-    if(GINKGO_BUILD_OMP)
+    if (GINKGO_BUILD_OMP)
         ginkgo_create_common_test_internal(${test_name} OmpExecutor omp ${ARGN})
-    endif()
-    if(GINKGO_BUILD_HIP)
+    endif ()
+    if (GINKGO_BUILD_HIP)
         ginkgo_create_common_test_internal(${test_name} HipExecutor hip ${ARGN})
-    endif()
-    if(GINKGO_BUILD_CUDA)
+    endif ()
+    if (GINKGO_BUILD_CUDA)
         ginkgo_create_common_test_internal(${test_name} CudaExecutor cuda ${ARGN})
-    endif()
-    if(GINKGO_BUILD_DPCPP)
+    endif ()
+    if (GINKGO_BUILD_DPCPP)
         ginkgo_create_common_test_internal(${test_name} DpcppExecutor dpcpp ${ARGN})
-    endif()
+    endif ()
 endfunction(ginkgo_create_common_test)
 
 function(ginkgo_create_common_test_internal test_name exec_type exec)
     cmake_parse_arguments(PARSE_ARGV 3 common_test "" "${gko_test_single_args}" "${gko_test_multi_args}")
-    if(exec IN_LIST common_test_DISABLE_EXECUTORS)
+    if (exec IN_LIST common_test_DISABLE_EXECUTORS)
         return()
-    endif()
+    endif ()
+    if (exec STREQUAL reference)
+        set(test_resource_type ref)
+    elseif (exec STREQUAL omp)
+        set(test_resource_type cpu)
+    else ()
+        set(test_resource_type gpu)
+    endif ()
     ginkgo_build_test_name(${test_name} test_target_name)
     string(TOUPPER ${exec} exec_upper)
     # set up actual test
@@ -222,39 +313,39 @@ function(ginkgo_create_common_test_internal test_name exec_type exec)
     target_compile_definitions(${test_target_name} PRIVATE EXEC_TYPE=${exec_type} EXEC_NAMESPACE=${exec} GKO_COMPILING_${exec_upper})
     target_link_libraries(${test_target_name} PRIVATE ${common_test_ADDITIONAL_LIBRARIES})
     # use float for DPC++ if necessary
-    if((exec STREQUAL "dpcpp") AND GINKGO_DPCPP_SINGLE_MODE)
+    if ((exec STREQUAL "dpcpp") AND GINKGO_DPCPP_SINGLE_MODE)
         target_compile_definitions(${test_target_name} PRIVATE GINKGO_COMMON_SINGLE_MODE=1)
         target_compile_definitions(${test_target_name} PRIVATE GINKGO_DPCPP_SINGLE_MODE=1)
-    endif()
+    endif ()
     ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
-    ginkgo_add_test(${test_name}_${exec} ${test_target_name} ${ARGN})
+    ginkgo_add_test(${test_name}_${exec} ${test_target_name} ${ARGN} TYPE ${test_resource_type})
 endfunction(ginkgo_create_common_test_internal)
 
 ## Common test compiled with the device compiler, one target for each enabled backend
 function(ginkgo_create_common_device_test test_name)
     cmake_parse_arguments(PARSE_ARGV 1 common_device_test "" "${gko_test_single_args}" "${gko_test_multi_args}")
     ginkgo_build_test_name(${test_name} test_target_name)
-    if(GINKGO_BUILD_DPCPP)
+    if (GINKGO_BUILD_DPCPP)
         ginkgo_create_common_test_internal(${test_name} DpcppExecutor dpcpp ${ARGN})
         target_compile_features(${test_target_name}_dpcpp PRIVATE cxx_std_17)
         target_compile_options(${test_target_name}_dpcpp PRIVATE ${GINKGO_DPCPP_FLAGS})
         target_link_options(${test_target_name}_dpcpp PRIVATE -fsycl-device-lib=all -fsycl-device-code-split=per_kernel)
-    endif()
-    if(GINKGO_BUILD_OMP)
+    endif ()
+    if (GINKGO_BUILD_OMP)
         ginkgo_create_common_test_internal(${test_name} OmpExecutor omp ${ARGN})
         target_link_libraries(${test_target_name}_omp PUBLIC OpenMP::OpenMP_CXX)
-    endif()
-    if(GINKGO_BUILD_CUDA)
+    endif ()
+    if (GINKGO_BUILD_CUDA)
         # need to make a separate file for this, since we can't set conflicting properties on the same file
         configure_file(${test_name}.cpp ${test_name}.cu COPYONLY)
         ginkgo_create_cuda_test_internal(${test_name}_cuda ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.cu ${test_target_name}_cuda ${ARGN})
         target_compile_definitions(${test_target_name}_cuda PRIVATE EXEC_TYPE=CudaExecutor EXEC_NAMESPACE=cuda)
-    endif()
-    if(GINKGO_BUILD_HIP)
+    endif ()
+    if (GINKGO_BUILD_HIP)
         # need to make a separate file for this, since we can't set conflicting properties on the same file
         configure_file(${test_name}.cpp ${test_name}.hip.cpp COPYONLY)
         ginkgo_create_hip_test_internal(${test_name}_hip ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.hip.cpp ${test_target_name}_hip "-std=c++14;-DEXEC_TYPE=HipExecutor;-DEXEC_NAMESPACE=hip" ${ARGN})
-    endif()
+    endif ()
 endfunction(ginkgo_create_common_device_test)
 
 ## Common test compiled with the host compiler for all enabled backends and Reference
diff --git a/hip/test/base/CMakeLists.txt b/hip/test/base/CMakeLists.txt
index 486fca294c2..ed32ab5b6a7 100644
--- a/hip/test/base/CMakeLists.txt
+++ b/hip/test/base/CMakeLists.txt
@@ -15,4 +15,4 @@ if (GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_AMD_REGEX}")
 else()
     ginkgo_create_hip_test(exception_helpers)
 endif()
-ginkgo_create_hip_test(scoped_device_id)
+ginkgo_create_hip_test(scoped_device_id NO_RESOURCES)
diff --git a/resources.json b/resources.json
new file mode 100644
index 00000000000..9d69ada752b
--- /dev/null
+++ b/resources.json
@@ -0,0 +1,51 @@
+{
+  "version": {
+    "major": 1,
+    "minor": 0
+  },
+  "local": [
+    {
+      "cpus": [
+        {
+          "id": "0",
+          "slots": 32
+        }
+      ],
+
+      "gpus": [
+        {
+          "id": "0",
+          "slots": 100
+        },
+        {
+          "id": "1",
+          "slots": 100
+        },
+        {
+          "id": "2",
+          "slots": 100
+        },
+        {
+          "id": "3",
+          "slots": 100
+        },
+        {
+          "id": "4",
+          "slots": 100
+        },
+        {
+          "id": "5",
+          "slots": 100
+        },
+        {
+          "id": "6",
+          "slots": 100
+        },
+        {
+          "id": "7",
+          "slots": 100
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file

From a6c247378bff944b92baf23de229a338af73bc76 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Tue, 1 Aug 2023 17:52:21 +0200
Subject: [PATCH 258/583] add custom gtest main files

---
 cmake/create_test.cmake                       | 57 +++++++++++--------
 core/test/gtest/environments.hpp              | 40 +++++++++++++
 core/test/gtest/ginkgo_main.cpp               | 14 +++++
 .../ginkgo_mpi_main.cpp}                      |  5 ++
 cuda/test/utils.hpp                           |  9 ---
 hip/test/utils.hip.hpp                        |  9 ---
 test/utils/executor.hpp                       | 33 -----------
 7 files changed, 93 insertions(+), 74 deletions(-)
 create mode 100644 core/test/gtest/environments.hpp
 create mode 100644 core/test/gtest/ginkgo_main.cpp
 rename core/test/{mpi/gtest/mpi_listener.cpp => gtest/ginkgo_mpi_main.cpp} (98%)

diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
index 937beb4eb8d..e6ebc6523a2 100644
--- a/cmake/create_test.cmake
+++ b/cmake/create_test.cmake
@@ -5,20 +5,28 @@ set(gko_test_option_args "NO_RESOURCES")
 
 ## Replaces / by _ to create valid target names from relative paths
 function(ginkgo_build_test_name test_name target_name)
-    file(RELATIVE_PATH REL_BINARY_DIR
-            ${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR})
-    string(REPLACE "/" "_" TEST_TARGET_NAME "${REL_BINARY_DIR}/${test_name}")
-    set(${target_name} ${TEST_TARGET_NAME} PARENT_SCOPE)
-endfunction(ginkgo_build_test_name)
+  file(RELATIVE_PATH REL_BINARY_DIR
+       ${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR})
+  string(REPLACE "/" "_" TEST_TARGET_NAME "${REL_BINARY_DIR}/${test_name}")
+  set(${target_name} ${TEST_TARGET_NAME} PARENT_SCOPE)
+endfunction()
+
+function(ginkgo_create_gtest_main)
+  add_library(ginkgo_gtest_main "")
+  target_sources(ginkgo_gtest_main
+                 PRIVATE
+                 ${PROJECT_SOURCE_DIR}/core/test/gtest/ginkgo_main.cpp)
+  target_link_libraries(ginkgo_gtest_main PRIVATE GTest::GTest Ginkgo::ginkgo)
+endfunction()
 
 function(ginkgo_create_gtest_mpi_main)
-    add_library(gtest_mpi_main "")
-    target_sources(gtest_mpi_main
-            PRIVATE
-            ${PROJECT_SOURCE_DIR}/core/test/mpi/gtest/mpi_listener.cpp)
-    find_package(MPI 3.1 COMPONENTS CXX REQUIRED)
-    target_link_libraries(gtest_mpi_main PRIVATE GTest::GTest MPI::MPI_CXX)
-endfunction(ginkgo_create_gtest_mpi_main)
+  add_library(ginkgo_gtest_mpi_main "")
+  target_sources(ginkgo_gtest_mpi_main
+                 PRIVATE
+                 ${PROJECT_SOURCE_DIR}/core/test/gtest/ginkgo_mpi_main.cpp)
+  find_package(MPI 3.1 COMPONENTS CXX REQUIRED)
+  target_link_libraries(ginkgo_gtest_mpi_main PRIVATE GTest::GTest MPI::MPI_CXX Ginkgo::ginkgo)
+endfunction()
 
 ## Set up shared target properties and handle ADDITIONAL_LIBRARIES/ADDITIONAL_INCLUDES
 ## `MPI_SIZE size` causes the tests to be run with `size` MPI processes.
@@ -33,17 +41,20 @@ function(ginkgo_set_test_target_properties test_target_name)
     if (GINKGO_COMPILING_DPCPP_TEST AND GINKGO_DPCPP_SINGLE_MODE)
         target_compile_definitions(${test_target_name} PRIVATE GINKGO_DPCPP_SINGLE_MODE=1)
     endif ()
-    if (GINKGO_CHECK_CIRCULAR_DEPS)
-        target_link_libraries(${test_target_name} PRIVATE "${GINKGO_CIRCULAR_DEPS_FLAGS}")
-    endif ()
-    if (set_properties_MPI_SIZE)
-        if (NOT TARGET gtest_mpi_main)
-            ginkgo_create_gtest_mpi_main()
-        endif ()
-        set(gtest_main gtest_mpi_main MPI::MPI_CXX)
-    else ()
-        set(gtest_main GTest::Main)
-    endif ()
+    if(GINKGO_CHECK_CIRCULAR_DEPS)
+      target_link_libraries(${test_target_name} PRIVATE "${GINKGO_CIRCULAR_DEPS_FLAGS}")
+    endif()
+    if(set_properties_MPI_SIZE)
+      if(NOT TARGET ginkgo_gtest_mpi_main)
+        ginkgo_create_gtest_mpi_main()
+      endif()
+      set(gtest_main ginkgo_gtest_mpi_main MPI::MPI_CXX)
+    else()
+      if(NOT TARGET ginkgo_gtest_main)
+        ginkgo_create_gtest_main()
+      endif()
+      set(gtest_main ginkgo_gtest_main)
+    endif()
     target_compile_features(${test_target_name} PUBLIC cxx_std_14)
     target_compile_options(${test_target_name} PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${GINKGO_COMPILER_FLAGS}>)
     target_include_directories(${test_target_name} PRIVATE ${Ginkgo_BINARY_DIR} ${set_properties_ADDITIONAL_INCLUDES})
diff --git a/core/test/gtest/environments.hpp b/core/test/gtest/environments.hpp
new file mode 100644
index 00000000000..b248829bdfb
--- /dev/null
+++ b/core/test/gtest/environments.hpp
@@ -0,0 +1,40 @@
+#ifndef GINKGO_ENVIRONMENTS_HPP
+#define GINKGO_ENVIRONMENTS_HPP
+
+
+#include <ginkgo/core/base/exception_helpers.hpp>
+
+
+#ifdef GKO_COMPILING_CUDA
+
+#include "cuda/base/device.hpp"
+
+class CudaEnvironment : public ::testing::Environment {
+public:
+    void TearDown() override { gko::kernels::cuda::reset_device(0); }
+};
+
+#else
+
+class CudaEnvironment : public ::testing::Environment {};
+
+#endif
+
+
+#ifdef GKO_COMPILING_HIP
+
+#include "hip/base/device.hpp"
+
+class HipEnvironment : public ::testing::Environment {
+public:
+    void TearDown() override { gko::kernels::hip::reset_device(0); }
+};
+
+#else
+
+class HipEnvironment : public ::testing::Environment {};
+
+#endif
+
+
+#endif  // GINKGO_ENVIRONMENTS_HPP
diff --git a/core/test/gtest/ginkgo_main.cpp b/core/test/gtest/ginkgo_main.cpp
new file mode 100644
index 00000000000..c284db84794
--- /dev/null
+++ b/core/test/gtest/ginkgo_main.cpp
@@ -0,0 +1,14 @@
+#include <gtest/gtest.h>
+
+
+#include "core/test/gtest/environments.hpp"
+
+
+int main(int argc, char** argv)
+{
+    ::testing::InitGoogleTest(&argc, argv);
+    ::testing::AddGlobalTestEnvironment(new CudaEnvironment);
+    ::testing::AddGlobalTestEnvironment(new HipEnvironment);
+    int result = RUN_ALL_TESTS();
+    return result;
+}
\ No newline at end of file
diff --git a/core/test/mpi/gtest/mpi_listener.cpp b/core/test/gtest/ginkgo_mpi_main.cpp
similarity index 98%
rename from core/test/mpi/gtest/mpi_listener.cpp
rename to core/test/gtest/ginkgo_mpi_main.cpp
index 66c9e6cd319..6c9b1b248f3 100644
--- a/core/test/mpi/gtest/mpi_listener.cpp
+++ b/core/test/gtest/ginkgo_mpi_main.cpp
@@ -51,6 +51,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <gtest/gtest.h>
 
 
+#include "core/test/gtest/environments.hpp"
+
+
 namespace GTestMPIListener {
 
 // This class sets up the global test environment, which is needed
@@ -378,6 +381,8 @@ int main(int argc, char** argv)
     ::testing::InitGoogleTest(&argc, argv);
     MPI_Init(&argc, &argv);
     ::testing::AddGlobalTestEnvironment(new GTestMPIListener::MPIEnvironment);
+    ::testing::AddGlobalTestEnvironment(new CudaEnvironment);
+    ::testing::AddGlobalTestEnvironment(new HipEnvironment);
     ::testing::TestEventListeners& listeners =
         ::testing::UnitTest::GetInstance()->listeners();
     ::testing::TestEventListener* l =
diff --git a/cuda/test/utils.hpp b/cuda/test/utils.hpp
index e1156b91903..58d310024bd 100644
--- a/cuda/test/utils.hpp
+++ b/cuda/test/utils.hpp
@@ -47,15 +47,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace {
 
 
-class CudaEnvironment : public ::testing::Environment {
-public:
-    void TearDown() override { gko::kernels::cuda::reset_device(0); }
-};
-
-testing::Environment* cuda_env =
-    testing::AddGlobalTestEnvironment(new CudaEnvironment);
-
-
 class CudaTestFixture : public ::testing::Test {
 protected:
     CudaTestFixture()
diff --git a/hip/test/utils.hip.hpp b/hip/test/utils.hip.hpp
index bf7073cf9a1..dcecc8d2522 100644
--- a/hip/test/utils.hip.hpp
+++ b/hip/test/utils.hip.hpp
@@ -47,15 +47,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace {
 
 
-class HipEnvironment : public ::testing::Environment {
-public:
-    void TearDown() override { gko::kernels::hip::reset_device(0); }
-};
-
-testing::Environment* hip_env =
-    testing::AddGlobalTestEnvironment(new HipEnvironment);
-
-
 class HipTestFixture : public ::testing::Test {
 protected:
     HipTestFixture()
diff --git a/test/utils/executor.hpp b/test/utils/executor.hpp
index 200f4652644..ca6ad2a75c9 100644
--- a/test/utils/executor.hpp
+++ b/test/utils/executor.hpp
@@ -44,39 +44,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <gtest/gtest.h>
 
 
-#include <ginkgo/core/base/stream.hpp>
-
-
-#ifdef GKO_COMPILING_CUDA
-
-#include "cuda/base/device.hpp"
-
-class CudaEnvironment : public ::testing::Environment {
-public:
-    void TearDown() override { gko::kernels::cuda::reset_device(0); }
-};
-
-testing::Environment* cuda_env =
-    testing::AddGlobalTestEnvironment(new CudaEnvironment);
-
-#endif
-
-
-#ifdef GKO_COMPILING_HIP
-
-#include "hip/base/device.hpp"
-
-class HipEnvironment : public ::testing::Environment {
-public:
-    void TearDown() override { gko::kernels::hip::reset_device(0); }
-};
-
-testing::Environment* hip_env =
-    testing::AddGlobalTestEnvironment(new HipEnvironment);
-
-#endif
-
-
 #if GINKGO_COMMON_SINGLE_MODE
 #define SKIP_IF_SINGLE_MODE GTEST_SKIP() << "Skip due to single mode"
 #else

From af2ab0cf676051fb871870ad324bb63a5cb71457 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Wed, 12 Jul 2023 17:15:45 +0200
Subject: [PATCH 259/583] use resources in tests

---
 core/test/gtest/environments.hpp    | 109 +++++++++++++++++++++++++++-
 core/test/gtest/ginkgo_main.cpp     |   7 ++
 core/test/gtest/ginkgo_mpi_main.cpp |  23 ++++--
 test/utils/executor.hpp             |  16 ++--
 test/utils/mpi/executor.hpp         |  73 +------------------
 5 files changed, 145 insertions(+), 83 deletions(-)

diff --git a/core/test/gtest/environments.hpp b/core/test/gtest/environments.hpp
index b248829bdfb..3f93ea95b8a 100644
--- a/core/test/gtest/environments.hpp
+++ b/core/test/gtest/environments.hpp
@@ -1,17 +1,119 @@
 #ifndef GINKGO_ENVIRONMENTS_HPP
 #define GINKGO_ENVIRONMENTS_HPP
 
+#include <algorithm>
+#include <regex>
+
 
 #include <ginkgo/core/base/exception_helpers.hpp>
 
 
+std::vector<std::string> split(const std::string& s, char delimiter = ',')
+{
+    std::istringstream iss(s);
+    std::vector<std::string> tokens;
+    std::string token;
+    while (std::getline(iss, token, delimiter)) {
+        tokens.push_back(token);
+    }
+    return tokens;
+}
+
+
+struct resource {
+    int id;
+    int slots;
+};
+
+resource parse_single_resource(const std::string& resource_string)
+{
+    std::regex re(R"(id\:(\d+),slots\:(\d+))");
+    std::smatch match;
+
+    if (!std::regex_match(resource_string, match, re)) {
+        GKO_INVALID_STATE("Can't parse resource string: " + resource_string);
+    }
+
+    return resource{std::stoi(match[1]), std::stoi(match[2])};
+}
+
+std::vector<resource> parse_all_resources(const std::string& resource_string)
+{
+    auto resource_strings = split(resource_string, ';');
+
+    std::vector<resource> resources;
+    for (const auto& rs : resource_strings) {
+        resources.push_back(parse_single_resource(rs));
+    }
+    return resources;
+}
+
+
+std::vector<resource> get_ctest_resources()
+{
+    auto rs_count_env = std::getenv("CTEST_RESOURCE_GROUP_COUNT");
+
+    if (!rs_count_env) {
+        return {{0, 1}};
+    }
+
+    auto rs_count = std::stoi(rs_count_env);
+
+    if (rs_count > 1) {
+        GKO_INVALID_STATE("Can handle only one resource group.");
+    }
+
+    std::string rs_type = std::getenv("CTEST_RESOURCE_GROUP_0");
+    std::transform(rs_type.begin(), rs_type.end(), rs_type.begin(),
+                   [](auto c) { return std::toupper(c); });
+    std::string rs_env =
+        std::getenv(std::string("CTEST_RESOURCE_GROUP_0_" + rs_type).c_str());
+    std::cerr << rs_env << std::endl;
+    return parse_all_resources(rs_env);
+}
+
+
+class ResourceEnvironment : public ::testing::Environment {
+public:
+    explicit ResourceEnvironment(resource rs_) : ::testing::Environment()
+    {
+        rs = rs_;
+    }
+
+    static resource rs;
+};
+
+
+#ifdef GKO_COMPILING_OMP
+
+#include <omp.h>
+
+class OmpEnvironment : public ::testing::Environment {
+public:
+    void SetUp() override
+    {
+        omp_set_num_threads(ResourceEnvironment::rs.slots);
+    }
+};
+
+#else
+
+
+class OmpEnvironment : public ::testing::Environment {};
+
+#endif
+
+
 #ifdef GKO_COMPILING_CUDA
 
 #include "cuda/base/device.hpp"
 
 class CudaEnvironment : public ::testing::Environment {
 public:
-    void TearDown() override { gko::kernels::cuda::reset_device(0); }
+    void TearDown() override
+    {
+        gko::kernels::cuda::reset_device(ResourceEnvironment::rs.id);
+    }
 };
 
 #else
@@ -27,7 +129,10 @@ class CudaEnvironment : public ::testing::Environment {};
 
 class HipEnvironment : public ::testing::Environment {
 public:
-    void TearDown() override { gko::kernels::hip::reset_device(0); }
+    void TearDown() override
+    {
+        gko::kernels::hip::reset_device(ResourceEnvironment::rs.id);
+    }
 };
 
 #else
diff --git a/core/test/gtest/ginkgo_main.cpp b/core/test/gtest/ginkgo_main.cpp
index c284db84794..76a005a66e2 100644
--- a/core/test/gtest/ginkgo_main.cpp
+++ b/core/test/gtest/ginkgo_main.cpp
@@ -3,12 +3,19 @@
 
 #include "core/test/gtest/environments.hpp"
 
+resource ResourceEnvironment::rs = {};
 
 int main(int argc, char** argv)
 {
     ::testing::InitGoogleTest(&argc, argv);
+
+    auto resources = get_ctest_resources();
+
+    ::testing::AddGlobalTestEnvironment(
+        new ResourceEnvironment(resources.front()));
     ::testing::AddGlobalTestEnvironment(new CudaEnvironment);
     ::testing::AddGlobalTestEnvironment(new HipEnvironment);
+    ::testing::AddGlobalTestEnvironment(new OmpEnvironment);
     int result = RUN_ALL_TESTS();
     return result;
 }
\ No newline at end of file
diff --git a/core/test/gtest/ginkgo_mpi_main.cpp b/core/test/gtest/ginkgo_mpi_main.cpp
index 6c9b1b248f3..934a3dcd3f5 100644
--- a/core/test/gtest/ginkgo_mpi_main.cpp
+++ b/core/test/gtest/ginkgo_mpi_main.cpp
@@ -45,10 +45,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <vector>
 
 
-#include <mpi.h>
+#include <gtest/gtest.h>
 
 
-#include <gtest/gtest.h>
+#include <ginkgo/core/base/mpi.hpp>
 
 
 #include "core/test/gtest/environments.hpp"
@@ -95,7 +95,6 @@ class MPIEnvironment : public ::testing::Environment {
 private:
     // Disallow copying
     MPIEnvironment(const MPIEnvironment& env) {}
-
 };  // class MPIEnvironment
 
 
@@ -376,19 +375,31 @@ class MPIWrapperPrinter : public ::testing::TestEventListener {
 }  // namespace GTestMPIListener
 
 
+resource ResourceEnvironment::rs = {};
+
 int main(int argc, char** argv)
 {
     ::testing::InitGoogleTest(&argc, argv);
+
     MPI_Init(&argc, &argv);
-    ::testing::AddGlobalTestEnvironment(new GTestMPIListener::MPIEnvironment);
+    MPI_Comm comm(MPI_COMM_WORLD);
+    int rank;
+    MPI_Comm_rank(comm, &rank);
+
+    auto resources = get_ctest_resources();
+
+    testing::AddGlobalTestEnvironment(new GTestMPIListener::MPIEnvironment);
+    ::testing::AddGlobalTestEnvironment(
+        new ResourceEnvironment(resources[rank]));
     ::testing::AddGlobalTestEnvironment(new CudaEnvironment);
     ::testing::AddGlobalTestEnvironment(new HipEnvironment);
+    ::testing::AddGlobalTestEnvironment(new OmpEnvironment);
+
     ::testing::TestEventListeners& listeners =
         ::testing::UnitTest::GetInstance()->listeners();
     ::testing::TestEventListener* l =
         listeners.Release(listeners.default_result_printer());
-    listeners.Append(
-        new GTestMPIListener::MPIWrapperPrinter(l, MPI_COMM_WORLD));
+    listeners.Append(new GTestMPIListener::MPIWrapperPrinter(l, comm));
     int result = RUN_ALL_TESTS();
     return result;
 }
diff --git a/test/utils/executor.hpp b/test/utils/executor.hpp
index ca6ad2a75c9..ad4621d5c31 100644
--- a/test/utils/executor.hpp
+++ b/test/utils/executor.hpp
@@ -44,6 +44,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <gtest/gtest.h>
 
 
+#include "core/test/gtest/environments.hpp"
+
+
 #if GINKGO_COMMON_SINGLE_MODE
 #define SKIP_IF_SINGLE_MODE GTEST_SKIP() << "Skip due to single mode"
 #else
@@ -77,7 +80,7 @@ inline void init_executor(std::shared_ptr<gko::ReferenceExecutor> ref,
             throw std::runtime_error{"No suitable CUDA devices"};
         }
         exec = gko::CudaExecutor::create(
-            0, ref, std::make_shared<gko::CudaAllocator>(), stream);
+            ResourceEnvironment::rs.id, ref, std::make_shared<gko::CudaAllocator>(), stream);
     }
 }
 
@@ -90,7 +93,8 @@ inline void init_executor(std::shared_ptr<gko::ReferenceExecutor> ref,
         throw std::runtime_error{"No suitable HIP devices"};
     }
     exec = gko::HipExecutor::create(
-        0, ref, std::make_shared<gko::HipAllocator>(), stream);
+        ResourceEnvironment::rs.id, ref, std::make_shared<
+                                    gko::HipAllocator>(), stream);
 }
 
 
@@ -98,9 +102,11 @@ inline void init_executor(std::shared_ptr<gko::ReferenceExecutor> ref,
                           std::shared_ptr<gko::DpcppExecutor>& exec)
 {
     if (gko::DpcppExecutor::get_num_devices("gpu") > 0) {
-        exec = gko::DpcppExecutor::create(0, ref, "gpu");
+        exec =
+            gko::DpcppExecutor::create(ResourceEnvironment::rs.id, ref, "gpu");
     } else if (gko::DpcppExecutor::get_num_devices("cpu") > 0) {
-        exec = gko::DpcppExecutor::create(0, ref, "cpu");
+        exec =
+            gko::DpcppExecutor::create(ResourceEnvironment::rs.id, ref, "cpu");
     } else {
         throw std::runtime_error{"No suitable DPC++ devices"};
     }
@@ -120,7 +126,7 @@ class CommonTestFixture : public ::testing::Test {
         :
 #if defined(GKO_TEST_NONDEFAULT_STREAM) && \
     (defined(GKO_COMPILING_CUDA) || defined(GKO_COMPILING_HIP))
-          stream{0},
+          stream(ResourceEnvironment::rs.id),
 #endif
           ref{gko::ReferenceExecutor::create()}
     {
diff --git a/test/utils/mpi/executor.hpp b/test/utils/mpi/executor.hpp
index d8c94e01804..4eba5593c90 100644
--- a/test/utils/mpi/executor.hpp
+++ b/test/utils/mpi/executor.hpp
@@ -35,6 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/mpi.hpp>
 
 
 #include <memory>
@@ -43,73 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <gtest/gtest.h>
 
 
-#include <ginkgo/core/base/mpi.hpp>
-#include <ginkgo/core/base/stream.hpp>
-
-
-inline void init_executor(std::shared_ptr<gko::ReferenceExecutor>,
-                          std::shared_ptr<gko::ReferenceExecutor>& exec)
-{
-    exec = gko::ReferenceExecutor::create();
-}
-
-
-inline void init_executor(std::shared_ptr<gko::ReferenceExecutor>,
-                          std::shared_ptr<gko::OmpExecutor>& exec)
-{
-    exec = gko::OmpExecutor::create();
-}
-
-
-inline void init_executor(std::shared_ptr<gko::ReferenceExecutor> ref,
-                          std::shared_ptr<gko::CudaExecutor>& exec,
-                          CUstream_st* stream = nullptr)
-{
-    {
-        if (gko::CudaExecutor::get_num_devices() == 0) {
-            throw std::runtime_error{"No suitable CUDA devices"};
-        }
-        exec = gko::CudaExecutor::create(
-            gko::experimental::mpi::map_rank_to_device_id(
-                MPI_COMM_WORLD, gko::CudaExecutor::get_num_devices()),
-            ref, std::make_shared<gko::CudaAllocator>(), stream);
-    }
-}
-
-
-inline void init_executor(std::shared_ptr<gko::ReferenceExecutor> ref,
-                          std::shared_ptr<gko::HipExecutor>& exec,
-                          GKO_HIP_STREAM_STRUCT* stream = nullptr)
-{
-    if (gko::HipExecutor::get_num_devices() == 0) {
-        throw std::runtime_error{"No suitable HIP devices"};
-    }
-    exec = gko::HipExecutor::create(
-        gko::experimental::mpi::map_rank_to_device_id(
-            MPI_COMM_WORLD, gko::HipExecutor::get_num_devices()),
-        ref, std::make_shared<gko::HipAllocator>(), stream);
-}
-
-
-inline void init_executor(std::shared_ptr<gko::ReferenceExecutor> ref,
-                          std::shared_ptr<gko::DpcppExecutor>& exec)
-{
-    auto num_gpu_devices = gko::DpcppExecutor::get_num_devices("gpu");
-    auto num_cpu_devices = gko::DpcppExecutor::get_num_devices("cpu");
-    if (num_gpu_devices > 0) {
-        exec = gko::DpcppExecutor::create(
-            gko::experimental::mpi::map_rank_to_device_id(MPI_COMM_WORLD,
-                                                          num_gpu_devices),
-            ref, "gpu");
-    } else if (num_cpu_devices > 0) {
-        exec = gko::DpcppExecutor::create(
-            gko::experimental::mpi::map_rank_to_device_id(MPI_COMM_WORLD,
-                                                          num_cpu_devices),
-            ref, "cpu");
-    } else {
-        throw std::runtime_error{"No suitable DPC++ devices"};
-    }
-}
+#include "test/utils/executor.hpp"
 
 
 class CommonMpiTestFixture : public ::testing::Test {
@@ -125,9 +60,7 @@ class CommonMpiTestFixture : public ::testing::Test {
         : comm(MPI_COMM_WORLD),
 #if defined(GKO_TEST_NONDEFAULT_STREAM) && \
     (defined(GKO_COMPILING_CUDA) || defined(GKO_COMPILING_HIP))
-
-          stream(gko::experimental::mpi::map_rank_to_device_id(
-              comm.get(), gko::EXEC_TYPE::get_num_devices())),
+          stream(ResourceEnvironment::rs.id),
 #endif
           ref{gko::ReferenceExecutor::create()}
     {

From 20d32f69a28bfa92134047f80c0a2d7dbdf430f4 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Thu, 20 Jul 2023 14:14:09 +0200
Subject: [PATCH 260/583] add gtest_main.cpp directly to target

---
 cmake/create_test.cmake | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
index e6ebc6523a2..96310e7f22f 100644
--- a/cmake/create_test.cmake
+++ b/cmake/create_test.cmake
@@ -45,20 +45,18 @@ function(ginkgo_set_test_target_properties test_target_name)
       target_link_libraries(${test_target_name} PRIVATE "${GINKGO_CIRCULAR_DEPS_FLAGS}")
     endif()
     if(set_properties_MPI_SIZE)
-      if(NOT TARGET ginkgo_gtest_mpi_main)
-        ginkgo_create_gtest_mpi_main()
-      endif()
-      set(gtest_main ginkgo_gtest_mpi_main MPI::MPI_CXX)
+      target_sources(${test_target_name}
+                PRIVATE
+                ${PROJECT_SOURCE_DIR}/core/test/gtest/ginkgo_mpi_main.cpp)
     else()
-      if(NOT TARGET ginkgo_gtest_main)
-        ginkgo_create_gtest_main()
-      endif()
-      set(gtest_main ginkgo_gtest_main)
+      target_sources(${test_target_name}
+                PRIVATE
+                ${PROJECT_SOURCE_DIR}/core/test/gtest/ginkgo_main.cpp)
     endif()
     target_compile_features(${test_target_name} PUBLIC cxx_std_14)
     target_compile_options(${test_target_name} PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${GINKGO_COMPILER_FLAGS}>)
     target_include_directories(${test_target_name} PRIVATE ${Ginkgo_BINARY_DIR} ${set_properties_ADDITIONAL_INCLUDES})
-    target_link_libraries(${test_target_name} PRIVATE ginkgo ${gtest_main} GTest::GTest ${set_properties_ADDITIONAL_LIBRARIES})
+    target_link_libraries(${test_target_name} PRIVATE ginkgo GTest::GTest ${set_properties_ADDITIONAL_LIBRARIES})
 endfunction()
 
 function(ginkgo_add_cpu_resource_requirement_internal test_name local_cores mpi_size)

From f94a07ed42c7e40831065a9a234265b923640968 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Thu, 20 Jul 2023 14:15:01 +0200
Subject: [PATCH 261/583] simplify resource group

---
 cmake/create_test.cmake | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
index 96310e7f22f..1d18d07b516 100644
--- a/cmake/create_test.cmake
+++ b/cmake/create_test.cmake
@@ -59,16 +59,6 @@ function(ginkgo_set_test_target_properties test_target_name)
     target_link_libraries(${test_target_name} PRIVATE ginkgo GTest::GTest ${set_properties_ADDITIONAL_LIBRARIES})
 endfunction()
 
-function(ginkgo_add_cpu_resource_requirement_internal test_name local_cores mpi_size)
-    if (mpi_size)
-        math(EXPR cores "${mpi_size} * ${local_cores}")
-    else ()
-        set(cores ${local_cores})
-    endif ()
-    set_property(TEST ${test_name} PROPERTY
-            RESOURCE_GROUPS "cpus:${cores}")
-endfunction()
-
 function(ginkgo_add_resource_requirement test_name)
     cmake_parse_arguments(PARSE_ARGV 1 add_rr "${gko_test_option_args}" "${gko_test_single_args}" "")
     if(add_rr_NO_RESOURCES)
@@ -111,12 +101,9 @@ function(ginkgo_add_resource_requirement test_name)
     if(NOT add_rr_MPI_SIZE)
         set(add_rr_MPI_SIZE 1)
     endif()
-    foreach(unused RANGE ${MPI_SIZE})
-        list(APPEND resources "${single_resource}")
-    endforeach()
     set_property(TEST ${test_name}
                  PROPERTY
-                 RESOURCE_GROUPS ${resources})
+                 RESOURCE_GROUPS "${add_rr_MPI_SIZE},${single_resource}")
 endfunction()
 
 

From a2d0835dcf038bfce670a945a47d09a4e481b197 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Thu, 20 Jul 2023 14:37:34 +0200
Subject: [PATCH 262/583] rename cmake parameters

---
 cmake/create_test.cmake | 201 +++++++++++++++++++++-------------------
 1 file changed, 104 insertions(+), 97 deletions(-)

diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
index 1d18d07b516..6ce37976f84 100644
--- a/cmake/create_test.cmake
+++ b/cmake/create_test.cmake
@@ -1,4 +1,4 @@
-set(gko_test_resource_args "LOCAL_CORES;PERCENT;TYPE")
+set(gko_test_resource_args "RESOURCE_LOCAL_CORES;RESOURCE_PERCENT;RESOURCE_TYPE")
 set(gko_test_single_args "MPI_SIZE;${gko_test_resource_args}")
 set(gko_test_multi_args "DISABLE_EXECUTORS;ADDITIONAL_LIBRARIES;ADDITIONAL_INCLUDES")
 set(gko_test_option_args "NO_RESOURCES")
@@ -34,13 +34,13 @@ function(ginkgo_set_test_target_properties test_target_name)
     cmake_parse_arguments(PARSE_ARGV 1 set_properties "" "${gko_test_single_args}" "${gko_test_multi_args}")
     if (GINKGO_FAST_TESTS)
         target_compile_definitions(${test_target_name} PRIVATE GINKGO_FAST_TESTS)
-    endif ()
+    endif()
     if (GINKGO_TEST_NONDEFAULT_STREAM)
         target_compile_definitions(${test_target_name} PRIVATE GKO_TEST_NONDEFAULT_STREAM)
-    endif ()
+    endif()
     if (GINKGO_COMPILING_DPCPP_TEST AND GINKGO_DPCPP_SINGLE_MODE)
         target_compile_definitions(${test_target_name} PRIVATE GINKGO_DPCPP_SINGLE_MODE=1)
-    endif ()
+    endif()
     if(GINKGO_CHECK_CIRCULAR_DEPS)
       target_link_libraries(${test_target_name} PRIVATE "${GINKGO_CIRCULAR_DEPS_FLAGS}")
     endif()
@@ -65,37 +65,37 @@ function(ginkgo_add_resource_requirement test_name)
         return()
     endif()
 
-    if (NOT add_rr_TYPE)
+    if (NOT add_rr_RESOURCE_TYPE)
         message(FATAL_ERROR "Need to provide resource type used by test.")
     endif ()
 
-    if(add_rr_TYPE STREQUAL "ref")
+    if(add_rr_RESOURCE_TYPE STREQUAL "ref")
         set(single_resource "cpus:1")
-    elseif(add_rr_TYPE STREQUAL "cpu")
-        if(NOT add_rr_CORES)
-            set(add_rr_CORES 4)  # perhaps get this from environment variable?
+    elseif(add_rr_RESOURCE_TYPE STREQUAL "cpu")
+        if(NOT add_rr_RESOURCE_LOCAL_CORES)
+            set(add_rr_RESOURCE_LOCAL_CORES 4)  # perhaps get this from environment variable?
         endif()
-        if(NOT add_rr_CORES MATCHES "^[0-9]+")
-            message(FATAL_ERROR "Resource specification is invalid: CORE=${add_rr_CORES}")
+        if(NOT add_rr_RESOURCE_LOCAL_CORES MATCHES "^[0-9]+")
+            message(FATAL_ERROR "Resource specification is invalid: RESOURCE_LOCAL_CORE=${add_rr_RESOURCE_LOCAL_CORES}")
         endif()
 
-        set(single_resource "cpus:${add_rr_CORES}")
-    elseif(add_rr_TYPE STREQUAL "gpu")
-        if(NOT add_rr_PERCENTAGE)
-            set(add_rr_PERCENTAGE 50)
+        set(single_resource "cpus:${add_rr_RESOURCE_LOCAL_CORES}")
+    elseif(add_rr_RESOURCE_TYPE STREQUAL "gpu")
+        if(NOT add_rr_RESOURCE_PERCENTAGE)
+            set(add_rr_RESOURCE_PERCENTAGE 50)
         endif()
         if(add_rr_MPI_SIZE GREATER 1)
-            set(add_rr_PERCENTAGE 100)
+            set(add_rr_RESOURCE_PERCENTAGE 100)
         endif()
-        if(NOT add_rr_PERCENTAGE MATCHES "^[0-9]([0-9][0-9]?)?"
-           OR add_rr_PERCENTAGE LESS 0
-           OR add_rr_PERCENTAGE GREATER 100)
-            message(FATAL_ERROR "Resource specification is invalid: PERCENTAGE=${add_rr_PERCENTAGE}")
+        if(NOT add_rr_RESOURCE_PERCENTAGE MATCHES "^[0-9]([0-9][0-9]?)?"
+           OR add_rr_RESOURCE_PERCENTAGE LESS 0
+           OR add_rr_RESOURCE_PERCENTAGE GREATER 100)
+            message(FATAL_ERROR "Resource specification is invalid: RESOURCE_PERCENTAGE=${add_rr_RESOURCE_PERCENTAGE}")
         endif()
 
-        set(single_resource "gpus:${add_rr_PERCENTAGE}")
+        set(single_resource "gpus:${add_rr_RESOURCE_PERCENTAGE}")
     else()
-        message(FATAL_ERROR "Unrecognized resource type ${add_rr_TYPE}, allowed are: ref, cpu, gpu.")
+        message(FATAL_ERROR "Unrecognized resource type ${add_rr_RESOURCE_TYPE}, allowed are: ref, cpu, gpu.")
     endif()
 
     if(NOT add_rr_MPI_SIZE)
@@ -121,30 +121,30 @@ function(ginkgo_add_test test_name test_target_name)
     set_target_properties(${test_target_name} PROPERTIES OUTPUT_NAME ${test_name})
     if (add_test_MPI_SIZE)
         add_test(NAME ${REL_BINARY_DIR}/${test_name}
-                COMMAND
-                ${MPIEXEC_EXECUTABLE}
-                ${MPIEXEC_NUMPROC_FLAG}
-                ${add_test_MPI_SIZE}
-                "$<TARGET_FILE:${test_target_name}>"
-                WORKING_DIRECTORY "$<TARGET_FILE_DIR:ginkgo>")
-    else ()
+                 COMMAND
+                     ${MPIEXEC_EXECUTABLE}
+                     ${MPIEXEC_NUMPROC_FLAG}
+                     ${add_test_MPI_SIZE}
+                     "$<TARGET_FILE:${test_target_name}>"
+                 WORKING_DIRECTORY "$<TARGET_FILE_DIR:ginkgo>")
+    else()
         add_test(NAME ${REL_BINARY_DIR}/${test_name}
-                COMMAND ${test_target_name}
-                WORKING_DIRECTORY "$<TARGET_FILE_DIR:ginkgo>")
-    endif ()
+                 COMMAND ${test_target_name}
+                 WORKING_DIRECTORY "$<TARGET_FILE_DIR:ginkgo>")
+    endif()
 
     ginkgo_add_resource_requirement(${REL_BINARY_DIR}/${test_name} ${ARGN})
 
     set(test_preload)
     if (GINKGO_TEST_NONDEFAULT_STREAM AND GINKGO_BUILD_CUDA)
         set(test_preload $<TARGET_FILE:identify_stream_usage_cuda>:${test_preload})
-    endif ()
+    endif()
     if (GINKGO_TEST_NONDEFAULT_STREAM AND GINKGO_BUILD_HIP AND GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_AMD_REGEX}")
         set(test_preload $<TARGET_FILE:identify_stream_usage_hip>:${test_preload})
-    endif ()
-    if (test_preload)
+    endif()
+    if(test_preload)
         set_tests_properties(${REL_BINARY_DIR}/${test_name} PROPERTIES ENVIRONMENT LD_PRELOAD=${test_preload})
-    endif ()
+    endif()
 endfunction()
 
 ## Normal test
@@ -153,7 +153,7 @@ function(ginkgo_create_test test_name)
     add_executable(${test_target_name} ${test_name}.cpp)
     target_link_libraries(${test_target_name})
     ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
-    ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} TYPE ref)
+    ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} RESOURCE_TYPE ref)
 endfunction(ginkgo_create_test)
 
 ## Test compiled with dpcpp
@@ -164,11 +164,11 @@ function(ginkgo_create_dpcpp_test test_name)
     target_compile_options(${test_target_name} PRIVATE ${GINKGO_DPCPP_FLAGS})
     target_link_options(${test_target_name} PRIVATE -fsycl-device-code-split=per_kernel)
     ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
-    ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} TYPE gpu)
+    ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} RESOURCE_TYPE gpu)
     # Note: MKL_ENV is empty on linux. Maybe need to apply MKL_ENV to all test.
     if (MKL_ENV)
         set_tests_properties(${test_target_name} PROPERTIES ENVIRONMENT "${MKL_ENV}")
-    endif ()
+    endif()
 endfunction(ginkgo_create_dpcpp_test)
 
 ## Test compiled with CUDA
@@ -182,23 +182,23 @@ function(ginkgo_create_cuda_test_internal test_name filename test_target_name)
     add_executable(${test_target_name} ${filename})
     target_compile_definitions(${test_target_name} PRIVATE GKO_COMPILING_CUDA)
     target_compile_options(${test_target_name}
-            PRIVATE
+        PRIVATE
             $<$<COMPILE_LANGUAGE:CUDA>:${GINKGO_CUDA_COMPILER_FLAGS}>)
-    if (MSVC)
+    if(MSVC)
         target_compile_options(${test_target_name}
-                PRIVATE
+            PRIVATE
                 $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda --expt-relaxed-constexpr>)
-    elseif (CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
+    elseif(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
         target_compile_options(${test_target_name}
-                PRIVATE
+            PRIVATE
                 $<$<COMPILE_LANGUAGE:CUDA>:--expt-extended-lambda --expt-relaxed-constexpr>)
-    endif ()
+    endif()
     # we handle CUDA architecture flags for now, disable CMake handling
-    if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
+    if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
         set_target_properties(${test_target_name} PROPERTIES CUDA_ARCHITECTURES OFF)
-    endif ()
+    endif()
     ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
-    ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} TYPE gpu)
+    ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} RESOURCE_TYPE gpu)
 endfunction(ginkgo_create_cuda_test_internal)
 
 ## Test compiled with HIP
@@ -213,48 +213,48 @@ function(ginkgo_create_hip_test_internal test_name filename test_target_name add
     set(GINKGO_TEST_HIP_DEFINES -DGKO_COMPILING_HIP ${additional_flags})
     if (GINKGO_FAST_TESTS)
         list(APPEND GINKGO_TEST_HIP_DEFINES -DGINKGO_FAST_TESTS)
-    endif ()
+    endif()
     if (GINKGO_TEST_NONDEFAULT_STREAM)
         list(APPEND GINKGO_TEST_HIP_DEFINES -DGKO_TEST_NONDEFAULT_STREAM)
-    endif ()
+    endif()
 
     # NOTE: With how HIP works, passing the flags `HIPCC_OPTIONS` etc. here
     # creates a redefinition of all flags. This creates some issues with `nvcc`,
     # but `clang` seems fine with the redefinitions.
     if (GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_NVIDIA_REGEX}")
         hip_add_executable(${test_target_name} ${filename}
-                # If `FindHIP.cmake`, namely `HIP_PARSE_HIPCC_OPTIONS` macro and
-                # call gets fixed, uncomment this.
-                HIPCC_OPTIONS ${GINKGO_TEST_HIP_DEFINES} # ${GINKGO_HIPCC_OPTIONS}
-                # NVCC_OPTIONS  ${GINKGO_TEST_HIP_DEFINES} ${GINKGO_HIP_NVCC_OPTIONS}
-                # CLANG_OPTIONS ${GINKGO_TEST_HIP_DEFINES} ${GINKGO_HIP_CLANG_OPTIONS}
-                --expt-relaxed-constexpr --expt-extended-lambda
-                )
-    else () # hcc/clang
+            # If `FindHIP.cmake`, namely `HIP_PARSE_HIPCC_OPTIONS` macro and
+            # call gets fixed, uncomment this.
+            HIPCC_OPTIONS ${GINKGO_TEST_HIP_DEFINES} # ${GINKGO_HIPCC_OPTIONS}
+            # NVCC_OPTIONS  ${GINKGO_TEST_HIP_DEFINES} ${GINKGO_HIP_NVCC_OPTIONS}
+            # CLANG_OPTIONS ${GINKGO_TEST_HIP_DEFINES} ${GINKGO_HIP_CLANG_OPTIONS}
+            --expt-relaxed-constexpr --expt-extended-lambda
+            )
+    else() # hcc/clang
         hip_add_executable(${test_target_name} ${filename}
-                HIPCC_OPTIONS ${GINKGO_HIPCC_OPTIONS} ${GINKGO_TEST_HIP_DEFINES}
-                NVCC_OPTIONS ${GINKGO_HIP_NVCC_OPTIONS}
-                CLANG_OPTIONS ${GINKGO_HIP_CLANG_OPTIONS}
-                )
-    endif ()
+            HIPCC_OPTIONS ${GINKGO_HIPCC_OPTIONS} ${GINKGO_TEST_HIP_DEFINES}
+            NVCC_OPTIONS  ${GINKGO_HIP_NVCC_OPTIONS}
+            CLANG_OPTIONS ${GINKGO_HIP_CLANG_OPTIONS}
+            )
+    endif()
 
     # Let's use a normal compiler for linking
     set_target_properties(${test_target_name} PROPERTIES LINKER_LANGUAGE CXX)
 
     target_include_directories(${test_target_name}
-            PRIVATE
-            # Only `math` requires it so far, but it's much easier
-            # to put these this way.
-            ${GINKGO_HIP_THRUST_PATH}
-            # Only `exception_helpers` requires these so far, but it's much easier
-            # to put these this way.
-            ${HIPBLAS_INCLUDE_DIRS}
-            ${HIPFFT_INCLUDE_DIRS}
-            ${hiprand_INCLUDE_DIRS}
-            ${HIPSPARSE_INCLUDE_DIRS}
-            )
+        PRIVATE
+        # Only `math` requires it so far, but it's much easier
+        # to put these this way.
+        ${GINKGO_HIP_THRUST_PATH}
+        # Only `exception_helpers` requires these so far, but it's much easier
+        # to put these this way.
+        ${HIPBLAS_INCLUDE_DIRS}
+        ${HIPFFT_INCLUDE_DIRS}
+        ${hiprand_INCLUDE_DIRS}
+        ${HIPSPARSE_INCLUDE_DIRS}
+        )
     ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
-    ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} TYPE gpu)
+    ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} RESOURCE_TYPE gpu)
 endfunction(ginkgo_create_hip_test_internal)
 
 
@@ -270,30 +270,30 @@ function(ginkgo_create_omp_test_internal test_name filename test_target_name)
     target_compile_definitions(${test_target_name} PRIVATE GKO_COMPILING_OMP)
     target_link_libraries(${test_target_name} PRIVATE OpenMP::OpenMP_CXX)
     ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
-    ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} TYPE cpu)
+    ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} RESOURCE_TYPE cpu)
 endfunction()
 
 ## Common test compiled with the host compiler, one target for each enabled backend
 function(ginkgo_create_common_test test_name)
-    if (GINKGO_BUILD_OMP)
+    if(GINKGO_BUILD_OMP)
         ginkgo_create_common_test_internal(${test_name} OmpExecutor omp ${ARGN})
-    endif ()
-    if (GINKGO_BUILD_HIP)
+    endif()
+    if(GINKGO_BUILD_HIP)
         ginkgo_create_common_test_internal(${test_name} HipExecutor hip ${ARGN})
-    endif ()
-    if (GINKGO_BUILD_CUDA)
+    endif()
+    if(GINKGO_BUILD_CUDA)
         ginkgo_create_common_test_internal(${test_name} CudaExecutor cuda ${ARGN})
-    endif ()
-    if (GINKGO_BUILD_DPCPP)
+    endif()
+    if(GINKGO_BUILD_DPCPP)
         ginkgo_create_common_test_internal(${test_name} DpcppExecutor dpcpp ${ARGN})
-    endif ()
+    endif()
 endfunction(ginkgo_create_common_test)
 
 function(ginkgo_create_common_test_internal test_name exec_type exec)
     cmake_parse_arguments(PARSE_ARGV 3 common_test "" "${gko_test_single_args}" "${gko_test_multi_args}")
-    if (exec IN_LIST common_test_DISABLE_EXECUTORS)
+    if(exec IN_LIST common_test_DISABLE_EXECUTORS)
         return()
-    endif ()
+    endif()
     if (exec STREQUAL reference)
         set(test_resource_type ref)
     elseif (exec STREQUAL omp)
@@ -303,45 +303,52 @@ function(ginkgo_create_common_test_internal test_name exec_type exec)
     endif ()
     ginkgo_build_test_name(${test_name} test_target_name)
     string(TOUPPER ${exec} exec_upper)
+
     # set up actual test
     set(test_target_name ${test_target_name}_${exec})
     add_executable(${test_target_name} ${test_name}.cpp)
+
+    # also need to add runtime libraries for other backends
+    if (exec STREQUAL omp)
+        target_link_libraries(${test_target_name} PRIVATE OpenMP::OpenMP_CXX)
+    endif ()
+
     target_compile_definitions(${test_target_name} PRIVATE EXEC_TYPE=${exec_type} EXEC_NAMESPACE=${exec} GKO_COMPILING_${exec_upper})
     target_link_libraries(${test_target_name} PRIVATE ${common_test_ADDITIONAL_LIBRARIES})
     # use float for DPC++ if necessary
-    if ((exec STREQUAL "dpcpp") AND GINKGO_DPCPP_SINGLE_MODE)
+    if((exec STREQUAL "dpcpp") AND GINKGO_DPCPP_SINGLE_MODE)
         target_compile_definitions(${test_target_name} PRIVATE GINKGO_COMMON_SINGLE_MODE=1)
         target_compile_definitions(${test_target_name} PRIVATE GINKGO_DPCPP_SINGLE_MODE=1)
-    endif ()
+    endif()
     ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
-    ginkgo_add_test(${test_name}_${exec} ${test_target_name} ${ARGN} TYPE ${test_resource_type})
+    ginkgo_add_test(${test_name}_${exec} ${test_target_name} ${ARGN} RESOURCE_TYPE ${test_resource_type})
 endfunction(ginkgo_create_common_test_internal)
 
 ## Common test compiled with the device compiler, one target for each enabled backend
 function(ginkgo_create_common_device_test test_name)
     cmake_parse_arguments(PARSE_ARGV 1 common_device_test "" "${gko_test_single_args}" "${gko_test_multi_args}")
     ginkgo_build_test_name(${test_name} test_target_name)
-    if (GINKGO_BUILD_DPCPP)
+    if(GINKGO_BUILD_DPCPP)
         ginkgo_create_common_test_internal(${test_name} DpcppExecutor dpcpp ${ARGN})
         target_compile_features(${test_target_name}_dpcpp PRIVATE cxx_std_17)
         target_compile_options(${test_target_name}_dpcpp PRIVATE ${GINKGO_DPCPP_FLAGS})
         target_link_options(${test_target_name}_dpcpp PRIVATE -fsycl-device-lib=all -fsycl-device-code-split=per_kernel)
-    endif ()
-    if (GINKGO_BUILD_OMP)
+    endif()
+    if(GINKGO_BUILD_OMP)
         ginkgo_create_common_test_internal(${test_name} OmpExecutor omp ${ARGN})
         target_link_libraries(${test_target_name}_omp PUBLIC OpenMP::OpenMP_CXX)
-    endif ()
-    if (GINKGO_BUILD_CUDA)
+    endif()
+    if(GINKGO_BUILD_CUDA)
         # need to make a separate file for this, since we can't set conflicting properties on the same file
         configure_file(${test_name}.cpp ${test_name}.cu COPYONLY)
         ginkgo_create_cuda_test_internal(${test_name}_cuda ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.cu ${test_target_name}_cuda ${ARGN})
         target_compile_definitions(${test_target_name}_cuda PRIVATE EXEC_TYPE=CudaExecutor EXEC_NAMESPACE=cuda)
-    endif ()
-    if (GINKGO_BUILD_HIP)
+    endif()
+    if(GINKGO_BUILD_HIP)
         # need to make a separate file for this, since we can't set conflicting properties on the same file
         configure_file(${test_name}.cpp ${test_name}.hip.cpp COPYONLY)
         ginkgo_create_hip_test_internal(${test_name}_hip ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.hip.cpp ${test_target_name}_hip "-std=c++14;-DEXEC_TYPE=HipExecutor;-DEXEC_NAMESPACE=hip" ${ARGN})
-    endif ()
+    endif()
 endfunction(ginkgo_create_common_device_test)
 
 ## Common test compiled with the host compiler for all enabled backends and Reference

From a2af9d9a18d1372e28bb071caad195592faee4e6 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Thu, 20 Jul 2023 14:37:54 +0200
Subject: [PATCH 263/583] simplify parsing

---
 core/test/gtest/environments.hpp | 48 ++++++++++----------------------
 1 file changed, 14 insertions(+), 34 deletions(-)

diff --git a/core/test/gtest/environments.hpp b/core/test/gtest/environments.hpp
index 3f93ea95b8a..4cfb2a89959 100644
--- a/core/test/gtest/environments.hpp
+++ b/core/test/gtest/environments.hpp
@@ -8,24 +8,13 @@
 #include <ginkgo/core/base/exception_helpers.hpp>
 
 
-std::vector<std::string> split(const std::string& s, char delimiter = ',')
-{
-    std::istringstream iss(s);
-    std::vector<std::string> tokens;
-    std::string token;
-    while (std::getline(iss, token, delimiter)) {
-        tokens.push_back(token);
-    }
-    return tokens;
-}
-
-
 struct resource {
     int id;
     int slots;
 };
 
-resource parse_single_resource(const std::string& resource_string)
+
+inline resource parse_single_resource(const std::string& resource_string)
 {
     std::regex re(R"(id\:(\d+),slots\:(\d+))");
     std::smatch match;
@@ -37,19 +26,8 @@ resource parse_single_resource(const std::string& resource_string)
     return resource{std::stoi(match[1]), std::stoi(match[2])};
 }
 
-std::vector<resource> parse_all_resources(const std::string& resource_string)
-{
-    auto resource_strings = split(resource_string, ';');
-
-    std::vector<resource> resources;
-    for (const auto& rs : resource_strings) {
-        resources.push_back(parse_single_resource(rs));
-    }
-    return resources;
-}
-
 
-std::vector<resource> get_ctest_resources()
+inline std::vector<resource> get_ctest_resources()
 {
     auto rs_count_env = std::getenv("CTEST_RESOURCE_GROUP_COUNT");
 
@@ -59,17 +37,19 @@ std::vector<resource> get_ctest_resources()
 
     auto rs_count = std::stoi(rs_count_env);
 
-    if (rs_count > 1) {
-        GKO_INVALID_STATE("Can handle only one resource group.");
+    std::vector<resource> resources;
+
+    for (int i = 0; i < rs_count; ++i) {
+        std::string rs_group_env = "CTEST_RESOURCE_GROUP_" + std::to_string(i);
+        std::string rs_type = std::getenv(rs_group_env.c_str());
+        std::transform(rs_type.begin(), rs_type.end(), rs_type.begin(),
+                       [](auto c) { return std::toupper(c); });
+        std::string rs_env =
+            std::getenv((rs_group_env + "_" + rs_type).c_str());
+        resources.push_back(parse_single_resource(rs_env));
     }
 
-    std::string rs_type = std::getenv("CTEST_RESOURCE_GROUP_0");
-    std::transform(rs_type.begin(), rs_type.end(), rs_type.begin(),
-                   [](auto c) { return std::toupper(c); });
-    std::string rs_env =
-        std::getenv(std::string("CTEST_RESOURCE_GROUP_0_" + rs_type).c_str());
-    std::cerr << rs_env << std::endl;
-    return parse_all_resources(rs_env);
+    return resources;
 }
 
 

From 4117e931459a6a593481c1768b5b430cba242640 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Thu, 20 Jul 2023 14:38:13 +0200
Subject: [PATCH 264/583] use ginkgo_create_omp_test

---
 omp/test/base/CMakeLists.txt    | 6 ++----
 omp/test/matrix/CMakeLists.txt  | 2 +-
 omp/test/reorder/CMakeLists.txt | 2 +-
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/omp/test/base/CMakeLists.txt b/omp/test/base/CMakeLists.txt
index 4c511b6def7..cfd00fe28cf 100644
--- a/omp/test/base/CMakeLists.txt
+++ b/omp/test/base/CMakeLists.txt
@@ -1,4 +1,2 @@
-ginkgo_create_test(kernel_launch)
-target_compile_definitions(omp_test_base_kernel_launch PRIVATE GKO_COMPILING_OMP)
-target_link_libraries(omp_test_base_kernel_launch PRIVATE OpenMP::OpenMP_CXX)
-ginkgo_create_test(index_set)
+ginkgo_create_omp_test(kernel_launch)
+ginkgo_create_omp_test(index_set)
diff --git a/omp/test/matrix/CMakeLists.txt b/omp/test/matrix/CMakeLists.txt
index 88ab52e9c3f..398921ce75a 100644
--- a/omp/test/matrix/CMakeLists.txt
+++ b/omp/test/matrix/CMakeLists.txt
@@ -1 +1 @@
-ginkgo_create_test(fbcsr_kernels)
+ginkgo_create_omp_test(fbcsr_kernels)
diff --git a/omp/test/reorder/CMakeLists.txt b/omp/test/reorder/CMakeLists.txt
index 8987ae28a48..089e51c67c9 100644
--- a/omp/test/reorder/CMakeLists.txt
+++ b/omp/test/reorder/CMakeLists.txt
@@ -1 +1 @@
-ginkgo_create_test(rcm_kernels)
+ginkgo_create_omp_test(rcm_kernels)

From 84c5b0434fc4ce9fb3e54dc612b20aad90eca94a Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Mon, 24 Jul 2023 11:53:44 +0200
Subject: [PATCH 265/583] use custom stream by default

otherwise, the default stream is used in some places, e.g. initializing cublas. But it is not clear with which device the default stream is associated. Thus, this now sets the device id correctly for the new stream
---
 test/utils/executor.hpp | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/test/utils/executor.hpp b/test/utils/executor.hpp
index ad4621d5c31..7afd2b0e4d9 100644
--- a/test/utils/executor.hpp
+++ b/test/utils/executor.hpp
@@ -124,14 +124,12 @@ class CommonTestFixture : public ::testing::Test {
 
     CommonTestFixture()
         :
-#if defined(GKO_TEST_NONDEFAULT_STREAM) && \
-    (defined(GKO_COMPILING_CUDA) || defined(GKO_COMPILING_HIP))
+#if defined(GKO_COMPILING_CUDA) || defined(GKO_COMPILING_HIP)
           stream(ResourceEnvironment::rs.id),
 #endif
           ref{gko::ReferenceExecutor::create()}
     {
-#if defined(GKO_TEST_NONDEFAULT_STREAM) && \
-    (defined(GKO_COMPILING_CUDA) || defined(GKO_COMPILING_HIP))
+#if defined(GKO_COMPILING_CUDA) || defined(GKO_COMPILING_HIP)
         init_executor(ref, exec, stream.get());
 #else
         init_executor(ref, exec);
@@ -145,13 +143,11 @@ class CommonTestFixture : public ::testing::Test {
         }
     }
 
-#ifdef GKO_TEST_NONDEFAULT_STREAM
 #ifdef GKO_COMPILING_CUDA
     gko::cuda_stream stream;
 #endif
 #ifdef GKO_COMPILING_HIP
     gko::hip_stream stream;
-#endif
 #endif
     std::shared_ptr<gko::ReferenceExecutor> ref;
     std::shared_ptr<gko::EXEC_TYPE> exec;

From 6ec9475f4102f45f3b6d26040282d0420f4e126f Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Mon, 24 Jul 2023 13:33:30 +0200
Subject: [PATCH 266/583] set device-id for each test

this is necessary, since some test call the kernels directly and not through the executor. In this case, the setting of the device id by the executor is skipped, which leads to these kernel not run.
---
 cuda/test/utils.hpp         | 16 +++++++---------
 hip/test/utils.hip.hpp      | 16 +++++++---------
 test/utils/executor.hpp     |  2 ++
 test/utils/mpi/executor.hpp | 11 ++++-------
 4 files changed, 20 insertions(+), 25 deletions(-)

diff --git a/cuda/test/utils.hpp b/cuda/test/utils.hpp
index 58d310024bd..f35cb8d4c12 100644
--- a/cuda/test/utils.hpp
+++ b/cuda/test/utils.hpp
@@ -41,6 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/stream.hpp>
 
 
+#include "core/test/gtest/environments.hpp"
 #include "cuda/base/device.hpp"
 
 
@@ -51,13 +52,11 @@ class CudaTestFixture : public ::testing::Test {
 protected:
     CudaTestFixture()
         : ref(gko::ReferenceExecutor::create()),
-#ifdef GKO_TEST_NONDEFAULT_STREAM
-          stream(0),
-          exec(gko::CudaExecutor::create(
-              0, ref, std::make_shared<gko::CudaAllocator>(), stream.get()))
-#else
-          exec(gko::CudaExecutor::create(0, ref))
-#endif
+          stream(ResourceEnvironment::rs.id),
+          exec(gko::CudaExecutor::create(ResourceEnvironment::rs.id, ref, std::make_shared<
+                                         gko::CudaAllocator>(),
+                                         stream.get())),
+          guard(exec->get_scoped_device_id_guard())
     {}
 
     void TearDown()
@@ -68,11 +67,10 @@ class CudaTestFixture : public ::testing::Test {
         }
     }
 
-#ifdef GKO_TEST_NONDEFAULT_STREAM
     gko::cuda_stream stream;
-#endif
     std::shared_ptr<gko::ReferenceExecutor> ref;
     std::shared_ptr<gko::CudaExecutor> exec;
+    gko::scoped_device_id_guard guard;
 };
 
 
diff --git a/hip/test/utils.hip.hpp b/hip/test/utils.hip.hpp
index dcecc8d2522..1c57467b451 100644
--- a/hip/test/utils.hip.hpp
+++ b/hip/test/utils.hip.hpp
@@ -41,6 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/stream.hpp>
 
 
+#include "core/test/gtest/environments.hpp"
 #include "hip/base/device.hpp"
 
 
@@ -51,13 +52,11 @@ class HipTestFixture : public ::testing::Test {
 protected:
     HipTestFixture()
         : ref(gko::ReferenceExecutor::create()),
-#ifdef GKO_TEST_NONDEFAULT_STREAM
-          stream(0),
-          exec(gko::HipExecutor::create(
-              0, ref, std::make_shared<gko::HipAllocator>(), stream.get()))
-#else
-          exec(gko::HipExecutor::create(0, ref))
-#endif
+          stream(ResourceEnvironment::rs.id),
+          exec(gko::HipExecutor::create(ResourceEnvironment::rs.id, ref, std::make_shared<
+                                        gko::HipAllocator>(),
+                                        stream.get())),
+          guard(exec->get_scoped_device_id_guard())
     {}
 
     void TearDown()
@@ -68,11 +67,10 @@ class HipTestFixture : public ::testing::Test {
         }
     }
 
-#ifdef GKO_TEST_NONDEFAULT_STREAM
     gko::hip_stream stream;
-#endif
     std::shared_ptr<gko::ReferenceExecutor> ref;
     std::shared_ptr<gko::HipExecutor> exec;
+    gko::scoped_device_id_guard guard;
 };
 
 
diff --git a/test/utils/executor.hpp b/test/utils/executor.hpp
index 7afd2b0e4d9..d52b8083ac8 100644
--- a/test/utils/executor.hpp
+++ b/test/utils/executor.hpp
@@ -134,6 +134,7 @@ class CommonTestFixture : public ::testing::Test {
 #else
         init_executor(ref, exec);
 #endif
+        guard = exec->get_scoped_device_id_guard();
     }
 
     void TearDown() final
@@ -151,6 +152,7 @@ class CommonTestFixture : public ::testing::Test {
 #endif
     std::shared_ptr<gko::ReferenceExecutor> ref;
     std::shared_ptr<gko::EXEC_TYPE> exec;
+    gko::scoped_device_id_guard guard;
 };
 
 
diff --git a/test/utils/mpi/executor.hpp b/test/utils/mpi/executor.hpp
index 4eba5593c90..f317f60eb35 100644
--- a/test/utils/mpi/executor.hpp
+++ b/test/utils/mpi/executor.hpp
@@ -58,18 +58,17 @@ class CommonMpiTestFixture : public ::testing::Test {
 
     CommonMpiTestFixture()
         : comm(MPI_COMM_WORLD),
-#if defined(GKO_TEST_NONDEFAULT_STREAM) && \
-    (defined(GKO_COMPILING_CUDA) || defined(GKO_COMPILING_HIP))
+#if defined(GKO_COMPILING_CUDA) || defined(GKO_COMPILING_HIP)
           stream(ResourceEnvironment::rs.id),
 #endif
           ref{gko::ReferenceExecutor::create()}
     {
-#if defined(GKO_TEST_NONDEFAULT_STREAM) && \
-    (defined(GKO_COMPILING_CUDA) || defined(GKO_COMPILING_HIP))
+#if defined(GKO_COMPILING_CUDA) || defined(GKO_COMPILING_HIP)
         init_executor(ref, exec, stream.get());
 #else
         init_executor(ref, exec);
 #endif
+        guard = exec->get_scoped_device_id_guard();
     }
 
     void TearDown() final
@@ -81,17 +80,15 @@ class CommonMpiTestFixture : public ::testing::Test {
 
     gko::experimental::mpi::communicator comm;
 
-#ifdef GKO_TEST_NONDEFAULT_STREAM
 #ifdef GKO_COMPILING_CUDA
     gko::cuda_stream stream;
 #endif
 #ifdef GKO_COMPILING_HIP
     gko::hip_stream stream;
 #endif
-#endif
-
     std::shared_ptr<gko::ReferenceExecutor> ref;
     std::shared_ptr<gko::EXEC_TYPE> exec;
+    gko::scoped_device_id_guard guard;
 };
 
 

From 34e59dd094dc47633cb72345df393669e01f17e5 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Mon, 24 Jul 2023 13:33:49 +0200
Subject: [PATCH 267/583] add ctest resource settings to logging output

---
 core/test/gtest/environments.hpp | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/core/test/gtest/environments.hpp b/core/test/gtest/environments.hpp
index 4cfb2a89959..a7aedbc102c 100644
--- a/core/test/gtest/environments.hpp
+++ b/core/test/gtest/environments.hpp
@@ -30,6 +30,7 @@ inline resource parse_single_resource(const std::string& resource_string)
 inline std::vector<resource> get_ctest_resources()
 {
     auto rs_count_env = std::getenv("CTEST_RESOURCE_GROUP_COUNT");
+    std::cerr << "CTEST_RESOURCE_GROUP_COUNT=" << rs_count_env << std::endl;
 
     if (!rs_count_env) {
         return {{0, 1}};
@@ -42,10 +43,14 @@ inline std::vector<resource> get_ctest_resources()
     for (int i = 0; i < rs_count; ++i) {
         std::string rs_group_env = "CTEST_RESOURCE_GROUP_" + std::to_string(i);
         std::string rs_type = std::getenv(rs_group_env.c_str());
+        std::cerr << rs_group_env << "=" << rs_type << std::endl;
+
         std::transform(rs_type.begin(), rs_type.end(), rs_type.begin(),
                        [](auto c) { return std::toupper(c); });
-        std::string rs_env =
-            std::getenv((rs_group_env + "_" + rs_type).c_str());
+        std::string rs_current_group = rs_group_env + "_" + rs_type;
+        std::string rs_env = std::getenv(rs_current_group.c_str());
+        std::cerr << rs_current_group << "=" << rs_env << std::endl;
+
         resources.push_back(parse_single_resource(rs_env));
     }
 

From 7c83c349efefa9bb2348220a2fb0f2c44ae2669e Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Mon, 24 Jul 2023 14:37:33 +0200
Subject: [PATCH 268/583] fixes schwarz preconditioner test

---
 test/mpi/preconditioner/schwarz.cpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/test/mpi/preconditioner/schwarz.cpp b/test/mpi/preconditioner/schwarz.cpp
index 8586711a114..3c9e3a8d69f 100644
--- a/test/mpi/preconditioner/schwarz.cpp
+++ b/test/mpi/preconditioner/schwarz.cpp
@@ -101,14 +101,14 @@ class SchwarzPreconditioner : public CommonMpiTestFixture {
 
 
     SchwarzPreconditioner()
-        : size{8, 8}, mat_input{size, {{0, 0, 2}, {0, 1, -1}, {1, 0, -1},
-                                       {1, 1, 2}, {1, 2, -1}, {2, 1, -1},
-                                       {2, 2, 2}, {2, 3, -1}, {3, 2, -1},
-                                       {3, 3, 2}, {3, 4, -1}, {4, 3, -1},
-                                       {4, 4, 2}, {4, 5, -1}, {5, 4, -1},
-                                       {5, 5, 2}, {5, 6, -1}, {6, 5, -1},
-                                       {6, 6, 2}, {6, 7, -1}, {7, 6, -1},
-                                       {7, 7, 2}}}
+        : CommonMpiTestFixture(),
+          size{8, 8},
+          mat_input{size,
+                    {{0, 0, 2},  {0, 1, -1}, {1, 0, -1}, {1, 1, 2},  {1, 2, -1},
+                     {2, 1, -1}, {2, 2, 2},  {2, 3, -1}, {3, 2, -1}, {3, 3, 2},
+                     {3, 4, -1}, {4, 3, -1}, {4, 4, 2},  {4, 5, -1}, {5, 4, -1},
+                     {5, 5, 2},  {5, 6, -1}, {6, 5, -1}, {6, 6, 2},  {6, 7, -1},
+                     {7, 6, -1}, {7, 7, 2}}}
     {
         row_part = Partition::build_from_contiguous(
             exec, gko::array<global_index_type>(

From 9a5fd2c00d2ed8fb41fbdc1deb13dab6098ec2ea Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Mon, 24 Jul 2023 14:38:11 +0200
Subject: [PATCH 269/583] use ginkgo_create_cuda_test consistently

---
 cuda/test/base/CMakeLists.txt                               | 6 +++---
 cuda/test/base/{index_set.cpp => index_set.cu}              | 0
 cuda/test/base/{memory.cpp => memory.cu}                    | 0
 cuda/test/reorder/CMakeLists.txt                            | 2 +-
 cuda/test/reorder/{rcm_kernels.cpp => rcm_kernels.cu}       | 0
 cuda/test/utils/CMakeLists.txt                              | 2 +-
 cuda/test/utils/{assertions_test.cpp => assertions_test.cu} | 0
 7 files changed, 5 insertions(+), 5 deletions(-)
 rename cuda/test/base/{index_set.cpp => index_set.cu} (100%)
 rename cuda/test/base/{memory.cpp => memory.cu} (100%)
 rename cuda/test/reorder/{rcm_kernels.cpp => rcm_kernels.cu} (100%)
 rename cuda/test/utils/{assertions_test.cpp => assertions_test.cu} (100%)

diff --git a/cuda/test/base/CMakeLists.txt b/cuda/test/base/CMakeLists.txt
index a213e65277a..f78e6e653fe 100644
--- a/cuda/test/base/CMakeLists.txt
+++ b/cuda/test/base/CMakeLists.txt
@@ -1,13 +1,13 @@
 ginkgo_create_cuda_test(array)
 ginkgo_create_cuda_test(cuda_executor)
-ginkgo_create_test(index_set)
+ginkgo_create_cuda_test(index_set)
 if(GINKGO_HAVE_HWLOC)
     find_package(NUMA REQUIRED)
     ginkgo_create_cuda_test(cuda_executor_topology ADDITIONAL_LIBRARIES NUMA::NUMA)
-endif()
+endif ()
 ginkgo_create_cuda_test(exception_helpers)
 ginkgo_create_cuda_test(kernel_launch)
 ginkgo_create_cuda_test(lin_op)
 ginkgo_create_cuda_test(math)
-ginkgo_create_test(memory)
+ginkgo_create_cuda_test(memory)
 ginkgo_create_cuda_test(scoped_device_id)
diff --git a/cuda/test/base/index_set.cpp b/cuda/test/base/index_set.cu
similarity index 100%
rename from cuda/test/base/index_set.cpp
rename to cuda/test/base/index_set.cu
diff --git a/cuda/test/base/memory.cpp b/cuda/test/base/memory.cu
similarity index 100%
rename from cuda/test/base/memory.cpp
rename to cuda/test/base/memory.cu
diff --git a/cuda/test/reorder/CMakeLists.txt b/cuda/test/reorder/CMakeLists.txt
index 108e3b57dd5..e6cd8c0f5d2 100644
--- a/cuda/test/reorder/CMakeLists.txt
+++ b/cuda/test/reorder/CMakeLists.txt
@@ -1 +1 @@
-ginkgo_create_test(rcm_kernels)
\ No newline at end of file
+ginkgo_create_cuda_test(rcm_kernels)
diff --git a/cuda/test/reorder/rcm_kernels.cpp b/cuda/test/reorder/rcm_kernels.cu
similarity index 100%
rename from cuda/test/reorder/rcm_kernels.cpp
rename to cuda/test/reorder/rcm_kernels.cu
diff --git a/cuda/test/utils/CMakeLists.txt b/cuda/test/utils/CMakeLists.txt
index 06dffda5da0..28f5770856f 100644
--- a/cuda/test/utils/CMakeLists.txt
+++ b/cuda/test/utils/CMakeLists.txt
@@ -1 +1 @@
-ginkgo_create_test(assertions_test)
+ginkgo_create_cuda_test(assertions_test)
diff --git a/cuda/test/utils/assertions_test.cpp b/cuda/test/utils/assertions_test.cu
similarity index 100%
rename from cuda/test/utils/assertions_test.cpp
rename to cuda/test/utils/assertions_test.cu

From 2466da34682d50e0e90568686d88a4be1f336937 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Mon, 24 Jul 2023 14:43:24 +0200
Subject: [PATCH 270/583] without resources, return the default number of omp
 threads

---
 core/test/gtest/environments.hpp | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/core/test/gtest/environments.hpp b/core/test/gtest/environments.hpp
index a7aedbc102c..4434185a4e0 100644
--- a/core/test/gtest/environments.hpp
+++ b/core/test/gtest/environments.hpp
@@ -33,7 +33,17 @@ inline std::vector<resource> get_ctest_resources()
     std::cerr << "CTEST_RESOURCE_GROUP_COUNT=" << rs_count_env << std::endl;
 
     if (!rs_count_env) {
+#ifdef GKO_COMPILING_OMP
+        resource rs{};
+#pragma omp parallel
+#pragma omp single
+        {
+            rs = resource{0, omp_get_num_threads()};
+        }
+        return {rs};
+#else
         return {{0, 1}};
+#endif
     }
 
     auto rs_count = std::stoi(rs_count_env);

From a72f56cb7a4d751d847d0b09cc3d5b029a185fea Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Mon, 24 Jul 2023 15:22:15 +0200
Subject: [PATCH 271/583] fix check for no resources

---
 core/test/gtest/environments.hpp | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/core/test/gtest/environments.hpp b/core/test/gtest/environments.hpp
index 4434185a4e0..a678ce00ffd 100644
--- a/core/test/gtest/environments.hpp
+++ b/core/test/gtest/environments.hpp
@@ -5,6 +5,21 @@
 #include <regex>
 
 
+#ifdef GKO_COMPILING_OMP
+#include <omp.h>
+#endif
+
+
+#ifdef GKO_COMPILING_CUDA
+#include "cuda/base/device.hpp"
+#endif
+
+
+#ifdef GKO_COMPILING_HIP
+#include "hip/base/device.hpp"
+#endif
+
+
 #include <ginkgo/core/base/exception_helpers.hpp>
 
 
@@ -32,7 +47,9 @@ inline std::vector<resource> get_ctest_resources()
     auto rs_count_env = std::getenv("CTEST_RESOURCE_GROUP_COUNT");
     std::cerr << "CTEST_RESOURCE_GROUP_COUNT=" << rs_count_env << std::endl;
 
-    if (!rs_count_env) {
+    auto rs_count = rs_count_env ? std::stoi(rs_count_env) : 0;
+
+    if (rs_count == 0) {
 #ifdef GKO_COMPILING_OMP
         resource rs{};
 #pragma omp parallel
@@ -46,8 +63,6 @@ inline std::vector<resource> get_ctest_resources()
 #endif
     }
 
-    auto rs_count = std::stoi(rs_count_env);
-
     std::vector<resource> resources;
 
     for (int i = 0; i < rs_count; ++i) {
@@ -81,8 +96,6 @@ class ResourceEnvironment : public ::testing::Environment {
 
 #ifdef GKO_COMPILING_OMP
 
-#include <omp.h>
-
 class OmpEnvironment : public ::testing::Environment {
 public:
     void SetUp() override
@@ -101,8 +114,6 @@ class OmpEnvironment : public ::testing::Environment {};
 
 #ifdef GKO_COMPILING_CUDA
 
-#include "cuda/base/device.hpp"
-
 class CudaEnvironment : public ::testing::Environment {
 public:
     void TearDown() override
@@ -120,8 +131,6 @@ class CudaEnvironment : public ::testing::Environment {};
 
 #ifdef GKO_COMPILING_HIP
 
-#include "hip/base/device.hpp"
-
 class HipEnvironment : public ::testing::Environment {
 public:
     void TearDown() override

From 0aa6c718fd0cd549e004cca720799dbfdf0737f1 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Mon, 24 Jul 2023 15:23:57 +0200
Subject: [PATCH 272/583] fix cmake resource parameters

---
 cmake/create_test.cmake       | 2 +-
 cuda/test/base/CMakeLists.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
index 6ce37976f84..34e27529e08 100644
--- a/cmake/create_test.cmake
+++ b/cmake/create_test.cmake
@@ -1,4 +1,4 @@
-set(gko_test_resource_args "RESOURCE_LOCAL_CORES;RESOURCE_PERCENT;RESOURCE_TYPE")
+set(gko_test_resource_args "RESOURCE_LOCAL_CORES;RESOURCE_PERCENTAGE;RESOURCE_TYPE")
 set(gko_test_single_args "MPI_SIZE;${gko_test_resource_args}")
 set(gko_test_multi_args "DISABLE_EXECUTORS;ADDITIONAL_LIBRARIES;ADDITIONAL_INCLUDES")
 set(gko_test_option_args "NO_RESOURCES")
diff --git a/cuda/test/base/CMakeLists.txt b/cuda/test/base/CMakeLists.txt
index f78e6e653fe..bb99ba858a4 100644
--- a/cuda/test/base/CMakeLists.txt
+++ b/cuda/test/base/CMakeLists.txt
@@ -10,4 +10,4 @@ ginkgo_create_cuda_test(kernel_launch)
 ginkgo_create_cuda_test(lin_op)
 ginkgo_create_cuda_test(math)
 ginkgo_create_cuda_test(memory)
-ginkgo_create_cuda_test(scoped_device_id)
+ginkgo_create_cuda_test(scoped_device_id NO_RESOURCES)

From 882dfcf6068225f2b286773c787cb86564b1c306 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Mon, 24 Jul 2023 16:06:20 +0200
Subject: [PATCH 273/583] allow 4 concurrent GPU tests

---
 cmake/create_test.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
index 34e27529e08..76330a26627 100644
--- a/cmake/create_test.cmake
+++ b/cmake/create_test.cmake
@@ -82,7 +82,7 @@ function(ginkgo_add_resource_requirement test_name)
         set(single_resource "cpus:${add_rr_RESOURCE_LOCAL_CORES}")
     elseif(add_rr_RESOURCE_TYPE STREQUAL "gpu")
         if(NOT add_rr_RESOURCE_PERCENTAGE)
-            set(add_rr_RESOURCE_PERCENTAGE 50)
+            set(add_rr_RESOURCE_PERCENTAGE 25)
         endif()
         if(add_rr_MPI_SIZE GREATER 1)
             set(add_rr_RESOURCE_PERCENTAGE 100)

From cca63a1bc8885693acc74b128adf29b64c614d2c Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Mon, 31 Jul 2023 11:08:10 +0200
Subject: [PATCH 274/583] use different resource type per executor

Co-authored-by: Tobias Ribizel <ribizel@kit.edu>
---
 cmake/create_test.cmake             |  24 +++--
 core/test/gtest/environments.hpp    | 139 ++++++++++++++++++----------
 core/test/gtest/ginkgo_main.cpp     |  12 ++-
 core/test/gtest/ginkgo_mpi_main.cpp |  13 ++-
 cuda/test/utils.hpp                 |   8 +-
 hip/test/utils.hip.hpp              |   4 +-
 test/utils/executor.hpp             |  19 ++--
 test/utils/mpi/executor.hpp         |   7 +-
 8 files changed, 140 insertions(+), 86 deletions(-)

diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
index 76330a26627..1d3e041ff2a 100644
--- a/cmake/create_test.cmake
+++ b/cmake/create_test.cmake
@@ -70,17 +70,17 @@ function(ginkgo_add_resource_requirement test_name)
     endif ()
 
     if(add_rr_RESOURCE_TYPE STREQUAL "ref")
-        set(single_resource "cpus:1")
+        set(single_resource "cpu:1")
     elseif(add_rr_RESOURCE_TYPE STREQUAL "cpu")
         if(NOT add_rr_RESOURCE_LOCAL_CORES)
             set(add_rr_RESOURCE_LOCAL_CORES 4)  # perhaps get this from environment variable?
         endif()
         if(NOT add_rr_RESOURCE_LOCAL_CORES MATCHES "^[0-9]+")
-            message(FATAL_ERROR "Resource specification is invalid: RESOURCE_LOCAL_CORE=${add_rr_RESOURCE_LOCAL_CORES}")
+            message(FATAL_ERROR "Resource specification is invalid: RESOURCE_LOCAL_CORES=${add_rr_RESOURCE_LOCAL_CORES}")
         endif()
 
-        set(single_resource "cpus:${add_rr_RESOURCE_LOCAL_CORES}")
-    elseif(add_rr_RESOURCE_TYPE STREQUAL "gpu")
+        set(single_resource "cpu:${add_rr_RESOURCE_LOCAL_CORES}")
+    elseif(add_rr_RESOURCE_TYPE MATCHES "^(cuda|hip|sycl)gpu$")
         if(NOT add_rr_RESOURCE_PERCENTAGE)
             set(add_rr_RESOURCE_PERCENTAGE 25)
         endif()
@@ -93,9 +93,9 @@ function(ginkgo_add_resource_requirement test_name)
             message(FATAL_ERROR "Resource specification is invalid: RESOURCE_PERCENTAGE=${add_rr_RESOURCE_PERCENTAGE}")
         endif()
 
-        set(single_resource "gpus:${add_rr_RESOURCE_PERCENTAGE}")
+        set(single_resource "${add_rr_RESOURCE_TYPE}:${add_rr_RESOURCE_PERCENTAGE}")
     else()
-        message(FATAL_ERROR "Unrecognized resource type ${add_rr_RESOURCE_TYPE}, allowed are: ref, cpu, gpu.")
+        message(FATAL_ERROR "Unrecognized resource type ${add_rr_RESOURCE_TYPE}, allowed are: ref, cpu, cudagpu, hipgpu, syclgpu.")
     endif()
 
     if(NOT add_rr_MPI_SIZE)
@@ -164,7 +164,7 @@ function(ginkgo_create_dpcpp_test test_name)
     target_compile_options(${test_target_name} PRIVATE ${GINKGO_DPCPP_FLAGS})
     target_link_options(${test_target_name} PRIVATE -fsycl-device-code-split=per_kernel)
     ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
-    ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} RESOURCE_TYPE gpu)
+    ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} RESOURCE_TYPE syclgpu)
     # Note: MKL_ENV is empty on linux. Maybe need to apply MKL_ENV to all test.
     if (MKL_ENV)
         set_tests_properties(${test_target_name} PROPERTIES ENVIRONMENT "${MKL_ENV}")
@@ -198,7 +198,7 @@ function(ginkgo_create_cuda_test_internal test_name filename test_target_name)
         set_target_properties(${test_target_name} PROPERTIES CUDA_ARCHITECTURES OFF)
     endif()
     ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
-    ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} RESOURCE_TYPE gpu)
+    ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} RESOURCE_TYPE cudagpu)
 endfunction(ginkgo_create_cuda_test_internal)
 
 ## Test compiled with HIP
@@ -254,7 +254,7 @@ function(ginkgo_create_hip_test_internal test_name filename test_target_name add
         ${HIPSPARSE_INCLUDE_DIRS}
         )
     ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
-    ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} RESOURCE_TYPE gpu)
+    ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} RESOURCE_TYPE hipgpu)
 endfunction(ginkgo_create_hip_test_internal)
 
 
@@ -298,8 +298,12 @@ function(ginkgo_create_common_test_internal test_name exec_type exec)
         set(test_resource_type ref)
     elseif (exec STREQUAL omp)
         set(test_resource_type cpu)
+    elseif (exec STREQUAL cuda)
+        set(test_resource_type cudagpu)
+    elseif (exec STREQUAL hip)
+        set(test_resource_type hipgpu)
     else ()
-        set(test_resource_type gpu)
+        set(test_resource_type syclgpu)
     endif ()
     ginkgo_build_test_name(${test_name} test_target_name)
     string(TOUPPER ${exec} exec_upper)
diff --git a/core/test/gtest/environments.hpp b/core/test/gtest/environments.hpp
index a678ce00ffd..0d433f1c9d1 100644
--- a/core/test/gtest/environments.hpp
+++ b/core/test/gtest/environments.hpp
@@ -21,76 +21,113 @@
 
 
 #include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/mpi.hpp>
 
 
-struct resource {
+struct ctest_resource {
     int id;
     int slots;
 };
 
 
-inline resource parse_single_resource(const std::string& resource_string)
+inline char* get_ctest_group(std::string resource_type, int group_id)
 {
-    std::regex re(R"(id\:(\d+),slots\:(\d+))");
-    std::smatch match;
-
-    if (!std::regex_match(resource_string, match, re)) {
-        GKO_INVALID_STATE("Can't parse resource string: " + resource_string);
-    }
-
-    return resource{std::stoi(match[1]), std::stoi(match[2])};
+    std::transform(resource_type.begin(), resource_type.end(),
+                   resource_type.begin(),
+                   [](auto c) { return std::toupper(c); });
+    std::string rs_group_env = "CTEST_RESOURCE_GROUP_" +
+                               std::to_string(group_id) + "_" + resource_type;
+    return std::getenv(rs_group_env.c_str());
 }
 
 
-inline std::vector<resource> get_ctest_resources()
+inline ctest_resource parse_ctest_resources(std::string resource)
 {
-    auto rs_count_env = std::getenv("CTEST_RESOURCE_GROUP_COUNT");
-    std::cerr << "CTEST_RESOURCE_GROUP_COUNT=" << rs_count_env << std::endl;
-
-    auto rs_count = rs_count_env ? std::stoi(rs_count_env) : 0;
-
-    if (rs_count == 0) {
-#ifdef GKO_COMPILING_OMP
-        resource rs{};
-#pragma omp parallel
-#pragma omp single
-        {
-            rs = resource{0, omp_get_num_threads()};
-        }
-        return {rs};
-#else
-        return {{0, 1}};
-#endif
-    }
-
-    std::vector<resource> resources;
-
-    for (int i = 0; i < rs_count; ++i) {
-        std::string rs_group_env = "CTEST_RESOURCE_GROUP_" + std::to_string(i);
-        std::string rs_type = std::getenv(rs_group_env.c_str());
-        std::cerr << rs_group_env << "=" << rs_type << std::endl;
-
-        std::transform(rs_type.begin(), rs_type.end(), rs_type.begin(),
-                       [](auto c) { return std::toupper(c); });
-        std::string rs_current_group = rs_group_env + "_" + rs_type;
-        std::string rs_env = std::getenv(rs_current_group.c_str());
-        std::cerr << rs_current_group << "=" << rs_env << std::endl;
+    std::regex re(R"(id\:(\d+),slots\:(\d+))");
+    std::smatch match;
 
-        resources.push_back(parse_single_resource(rs_env));
+    if (!std::regex_match(resource, match, re)) {
+        GKO_INVALID_STATE("Can't parse ctest_resource string: " + resource);
     }
 
-    return resources;
+    return ctest_resource{std::stoi(match[1]), std::stoi(match[2])};
 }
 
 
 class ResourceEnvironment : public ::testing::Environment {
 public:
-    explicit ResourceEnvironment(resource rs_) : ::testing::Environment()
+    explicit ResourceEnvironment(int rank = 0, int size = 1)
     {
-        rs = rs_;
+#if GINKGO_BUILD_MPI
+        if (size > 1) {
+            cuda_device_id = gko::experimental::mpi::map_rank_to_device_id(
+                MPI_COMM_WORLD,
+                std::max(gko::CudaExecutor::get_num_devices(), 1));
+            hip_device_id = gko::experimental::mpi::map_rank_to_device_id(
+                MPI_COMM_WORLD,
+                std::max(gko::HipExecutor::get_num_devices(), 1));
+            sycl_device_id = gko::experimental::mpi::map_rank_to_device_id(
+                MPI_COMM_WORLD,
+                std::max(gko::DpcppExecutor::get_num_devices("gpu"), 1));
+        }
+#endif
+
+        auto rs_count_env = std::getenv("CTEST_RESOURCE_GROUP_COUNT");
+        auto rs_count = rs_count_env ? std::stoi(rs_count_env) : 0;
+        if (rs_count == 0) {
+            std::cerr << "Running without CTest ctest_resource configuration"
+                      << std::endl;
+            return;
+        }
+        if (rs_count != size) {
+            GKO_INVALID_STATE("Invalid resource group count: " +
+                              std::to_string(rs_count));
+        }
+
+        // parse CTest ctest_resource group descriptions
+        if (rank == 0) {
+            std::cerr << "Running with CTest ctest_resource configuration:"
+                      << std::endl;
+        }
+        // OpenMP CPU threads
+        if (auto rs_omp_env = get_ctest_group("cpu", rank)) {
+            auto resource = parse_ctest_resources(rs_omp_env);
+            omp_threads = resource.slots;
+            if (rank == 0) {
+                std::cerr << omp_threads << " CPU threads" << std::endl;
+            }
+        }
+        // CUDA GPUs
+        if (auto rs_cuda_env = get_ctest_group("cudagpu", rank)) {
+            auto resource = parse_ctest_resources(rs_cuda_env);
+            cuda_device_id = resource.id;
+            if (rank == 0) {
+                std::cerr << "CUDA device " << cuda_device_id << std::endl;
+            }
+        }
+        // HIP GPUs
+        if (auto rs_hip_env = get_ctest_group("hipgpu", rank)) {
+            auto resource = parse_ctest_resources(rs_hip_env);
+            hip_device_id = resource.id;
+            if (rank == 0) {
+                std::cerr << "HIP device " << hip_device_id << std::endl;
+            }
+        }
+        // SYCL GPUs (no other devices!)
+        if (auto rs_sycl_env = get_ctest_group("syclgpu", rank)) {
+            auto resource = parse_ctest_resources(rs_sycl_env);
+            sycl_device_id = resource.id;
+            if (rank == 0) {
+                std::cerr << "SYCL device " << sycl_device_id << std::endl;
+            }
+        }
     }
 
-    static resource rs;
+    static int omp_threads;
+    static int cuda_device_id;
+    static int hip_device_id;
+    static int sycl_device_id;
 };
 
 
@@ -100,7 +137,9 @@ class OmpEnvironment : public ::testing::Environment {
 public:
     void SetUp() override
     {
-        omp_set_num_threads(ResourceEnvironment::rs.slots);
+        if (ResourceEnvironment::omp_threads > 0) {
+            omp_set_num_threads(ResourceEnvironment::omp_threads);
+        }
     }
 };
 
@@ -118,7 +157,7 @@ class CudaEnvironment : public ::testing::Environment {
 public:
     void TearDown() override
     {
-        gko::kernels::cuda::reset_device(ResourceEnvironment::rs.id);
+        gko::kernels::cuda::reset_device(ResourceEnvironment::cuda_device_id);
     }
 };
 
@@ -135,7 +174,7 @@ class HipEnvironment : public ::testing::Environment {
 public:
     void TearDown() override
     {
-        gko::kernels::hip::reset_device(ResourceEnvironment::rs.id);
+        gko::kernels::hip::reset_device(ResourceEnvironment::hip_device_id);
     }
 };
 
diff --git a/core/test/gtest/ginkgo_main.cpp b/core/test/gtest/ginkgo_main.cpp
index 76a005a66e2..71117f2d73b 100644
--- a/core/test/gtest/ginkgo_main.cpp
+++ b/core/test/gtest/ginkgo_main.cpp
@@ -3,16 +3,18 @@
 
 #include "core/test/gtest/environments.hpp"
 
-resource ResourceEnvironment::rs = {};
+
+int ResourceEnvironment::omp_threads = 0;
+int ResourceEnvironment::cuda_device_id = 0;
+int ResourceEnvironment::hip_device_id = 0;
+int ResourceEnvironment::sycl_device_id = 0;
+
 
 int main(int argc, char** argv)
 {
     ::testing::InitGoogleTest(&argc, argv);
 
-    auto resources = get_ctest_resources();
-
-    ::testing::AddGlobalTestEnvironment(
-        new ResourceEnvironment(resources.front()));
+    ::testing::AddGlobalTestEnvironment(new ResourceEnvironment);
     ::testing::AddGlobalTestEnvironment(new CudaEnvironment);
     ::testing::AddGlobalTestEnvironment(new HipEnvironment);
     ::testing::AddGlobalTestEnvironment(new OmpEnvironment);
diff --git a/core/test/gtest/ginkgo_mpi_main.cpp b/core/test/gtest/ginkgo_mpi_main.cpp
index 934a3dcd3f5..945ec7ec7cd 100644
--- a/core/test/gtest/ginkgo_mpi_main.cpp
+++ b/core/test/gtest/ginkgo_mpi_main.cpp
@@ -375,7 +375,11 @@ class MPIWrapperPrinter : public ::testing::TestEventListener {
 }  // namespace GTestMPIListener
 
 
-resource ResourceEnvironment::rs = {};
+int ResourceEnvironment::omp_threads = 0;
+int ResourceEnvironment::cuda_device_id = 0;
+int ResourceEnvironment::hip_device_id = 0;
+int ResourceEnvironment::sycl_device_id = 0;
+
 
 int main(int argc, char** argv)
 {
@@ -384,13 +388,12 @@ int main(int argc, char** argv)
     MPI_Init(&argc, &argv);
     MPI_Comm comm(MPI_COMM_WORLD);
     int rank;
+    int size;
     MPI_Comm_rank(comm, &rank);
-
-    auto resources = get_ctest_resources();
+    MPI_Comm_size(comm, &size);
 
     testing::AddGlobalTestEnvironment(new GTestMPIListener::MPIEnvironment);
-    ::testing::AddGlobalTestEnvironment(
-        new ResourceEnvironment(resources[rank]));
+    ::testing::AddGlobalTestEnvironment(new ResourceEnvironment(rank, size));
     ::testing::AddGlobalTestEnvironment(new CudaEnvironment);
     ::testing::AddGlobalTestEnvironment(new HipEnvironment);
     ::testing::AddGlobalTestEnvironment(new OmpEnvironment);
diff --git a/cuda/test/utils.hpp b/cuda/test/utils.hpp
index f35cb8d4c12..0410b3a6a22 100644
--- a/cuda/test/utils.hpp
+++ b/cuda/test/utils.hpp
@@ -52,10 +52,10 @@ class CudaTestFixture : public ::testing::Test {
 protected:
     CudaTestFixture()
         : ref(gko::ReferenceExecutor::create()),
-          stream(ResourceEnvironment::rs.id),
-          exec(gko::CudaExecutor::create(ResourceEnvironment::rs.id, ref, std::make_shared<
-                                         gko::CudaAllocator>(),
-                                         stream.get())),
+          stream(ResourceEnvironment::cuda_device_id),
+          exec(gko::CudaExecutor::create(
+              ResourceEnvironment::cuda_device_id, ref, std::make_shared<
+              gko::CudaAllocator>(), stream.get())),
           guard(exec->get_scoped_device_id_guard())
     {}
 
diff --git a/hip/test/utils.hip.hpp b/hip/test/utils.hip.hpp
index 1c57467b451..38fc3763ece 100644
--- a/hip/test/utils.hip.hpp
+++ b/hip/test/utils.hip.hpp
@@ -52,8 +52,8 @@ class HipTestFixture : public ::testing::Test {
 protected:
     HipTestFixture()
         : ref(gko::ReferenceExecutor::create()),
-          stream(ResourceEnvironment::rs.id),
-          exec(gko::HipExecutor::create(ResourceEnvironment::rs.id, ref, std::make_shared<
+          stream(ResourceEnvironment::hip_device_id),
+          exec(gko::HipExecutor::create(ResourceEnvironment::hip_device_id, ref, std::make_shared<
                                         gko::HipAllocator>(),
                                         stream.get())),
           guard(exec->get_scoped_device_id_guard())
diff --git a/test/utils/executor.hpp b/test/utils/executor.hpp
index d52b8083ac8..082c3556381 100644
--- a/test/utils/executor.hpp
+++ b/test/utils/executor.hpp
@@ -80,7 +80,8 @@ inline void init_executor(std::shared_ptr<gko::ReferenceExecutor> ref,
             throw std::runtime_error{"No suitable CUDA devices"};
         }
         exec = gko::CudaExecutor::create(
-            ResourceEnvironment::rs.id, ref, std::make_shared<gko::CudaAllocator>(), stream);
+            ResourceEnvironment::cuda_device_id,
+                                         ref, std::make_shared<gko::CudaAllocator>(), stream);
     }
 }
 
@@ -93,7 +94,7 @@ inline void init_executor(std::shared_ptr<gko::ReferenceExecutor> ref,
         throw std::runtime_error{"No suitable HIP devices"};
     }
     exec = gko::HipExecutor::create(
-        ResourceEnvironment::rs.id, ref, std::make_shared<
+        ResourceEnvironment::hip_device_id, ref, std::make_shared<
                                     gko::HipAllocator>(), stream);
 }
 
@@ -102,11 +103,10 @@ inline void init_executor(std::shared_ptr<gko::ReferenceExecutor> ref,
                           std::shared_ptr<gko::DpcppExecutor>& exec)
 {
     if (gko::DpcppExecutor::get_num_devices("gpu") > 0) {
-        exec =
-            gko::DpcppExecutor::create(ResourceEnvironment::rs.id, ref, "gpu");
+        exec = gko::DpcppExecutor::create(ResourceEnvironment::sycl_device_id,
+                                          ref, "gpu");
     } else if (gko::DpcppExecutor::get_num_devices("cpu") > 0) {
-        exec =
-            gko::DpcppExecutor::create(ResourceEnvironment::rs.id, ref, "cpu");
+        exec = gko::DpcppExecutor::create(0, ref, "cpu");
     } else {
         throw std::runtime_error{"No suitable DPC++ devices"};
     }
@@ -124,8 +124,11 @@ class CommonTestFixture : public ::testing::Test {
 
     CommonTestFixture()
         :
-#if defined(GKO_COMPILING_CUDA) || defined(GKO_COMPILING_HIP)
-          stream(ResourceEnvironment::rs.id),
+#ifdef GKO_COMPILING_CUDA
+          stream(ResourceEnvironment::cuda_device_id),
+#endif
+#ifdef GKO_COMPILING_HIP
+          stream(ResourceEnvironment::hip_device_id),
 #endif
           ref{gko::ReferenceExecutor::create()}
     {
diff --git a/test/utils/mpi/executor.hpp b/test/utils/mpi/executor.hpp
index f317f60eb35..f02834a5a1f 100644
--- a/test/utils/mpi/executor.hpp
+++ b/test/utils/mpi/executor.hpp
@@ -58,8 +58,11 @@ class CommonMpiTestFixture : public ::testing::Test {
 
     CommonMpiTestFixture()
         : comm(MPI_COMM_WORLD),
-#if defined(GKO_COMPILING_CUDA) || defined(GKO_COMPILING_HIP)
-          stream(ResourceEnvironment::rs.id),
+#ifdef GKO_COMPILING_CUDA
+          stream(ResourceEnvironment::cuda_device_id),
+#endif
+#ifdef GKO_COMPILING_HIP
+          stream(ResourceEnvironment::hip_device_id),
 #endif
           ref{gko::ReferenceExecutor::create()}
     {

From 941382c94bec5a47490ad6ac90c1a0d5b310d826 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Mon, 31 Jul 2023 11:33:24 +0200
Subject: [PATCH 275/583] adds generator for ctest resource file

Co-authored-by: Tobias Ribizel <ribizel@kit.edu>
---
 test/CMakeLists.txt                    |  1 +
 test/tools/CMakeLists.txt              |  2 +
 test/tools/resource_file_generator.cpp | 72 ++++++++++++++++++++++++++
 3 files changed, 75 insertions(+)
 create mode 100644 test/tools/CMakeLists.txt
 create mode 100644 test/tools/resource_file_generator.cpp

diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 8a6eb305b6a..6e72dbdf0aa 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -14,3 +14,4 @@ add_subdirectory(preconditioner)
 add_subdirectory(reorder)
 add_subdirectory(solver)
 add_subdirectory(stop)
+add_subdirectory(tools)
diff --git a/test/tools/CMakeLists.txt b/test/tools/CMakeLists.txt
new file mode 100644
index 00000000000..d3aa14b8ca7
--- /dev/null
+++ b/test/tools/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_executable(resource_file_generator resource_file_generator.cpp)
+target_link_libraries(resource_file_generator Ginkgo::ginkgo )
diff --git a/test/tools/resource_file_generator.cpp b/test/tools/resource_file_generator.cpp
new file mode 100644
index 00000000000..1070a569662
--- /dev/null
+++ b/test/tools/resource_file_generator.cpp
@@ -0,0 +1,72 @@
+#include <ginkgo/core/base/executor.hpp>
+
+#include <iomanip>
+#include <thread>
+
+
+std::vector<std::string> split(const std::string& s, char delimiter = ',')
+{
+    std::istringstream iss(s);
+    std::vector<std::string> tokens;
+    std::string token;
+    while (std::getline(iss, token, delimiter)) {
+        tokens.push_back(token);
+    }
+    return tokens;
+}
+
+std::string create_json(const std::string& resources)
+{
+    std::string json;
+    json.append(R"({
+  "version": {
+    "major": 1,
+    "minor": 0
+  },
+  "local": [
+    {
+)");
+    for (const auto& line : split(resources, '\n')) {
+        json.append(R"(      )");
+        json.append(line);
+        json.append("\n");
+    }
+    json.append(R"(    }
+  ]
+})");
+    return json;
+}
+
+
+int main()
+{
+    auto num_cpu_threads = std::max(std::thread::hardware_concurrency(), 1u);
+    auto num_cuda_gpus = gko::CudaExecutor::get_num_devices();
+    auto num_hip_gpus = gko::HipExecutor::get_num_devices();
+    auto num_sycl_gpus = gko::DpcppExecutor::get_num_devices("gpu");
+
+    std::string cpus = R"("cpu": [{"id": "0", "slots": )" +
+                       std::to_string(num_cpu_threads) + "}]";
+
+    std::string gpus = "";
+    auto add_devices = [&](int num_devices, const std::string& name) {
+        if(num_devices){
+            gpus.append(",\n");
+            gpus += '"' + name + "\": [\n";
+        }
+        for (int i = 0; i < num_devices; i++) {
+            if(i > 0){
+                gpus.append(",\n");
+            }
+            gpus+= R"(  {"id": ")" + std::to_string(i) + R"(", "slots": 100})";
+        }
+        if(num_devices){
+            gpus.append("\n]");
+        }
+    };
+    add_devices(num_cuda_gpus, "cudagpu");
+    add_devices(num_hip_gpus, "hipgpu");
+    add_devices(num_sycl_gpus, "syclgpu");
+
+    std::cout << create_json(cpus + gpus) << std::endl;
+}
\ No newline at end of file

From b5e191b3482aebd7b8d12787a78241ebe6020bd3 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Tue, 1 Aug 2023 18:06:03 +0200
Subject: [PATCH 276/583] review updates:

- remove test file
- small documentation
- more verbose device id output

Co-authored-by: Tobias Ribizel <ribizel@kit.edu>
---
 core/test/gtest/environments.hpp       | 15 +++-----
 resources.json                         | 51 --------------------------
 test/tools/CMakeLists.txt              |  2 +-
 test/tools/resource_file_generator.cpp |  2 +-
 test/utils/executor.hpp                |  2 +
 5 files changed, 10 insertions(+), 62 deletions(-)
 delete mode 100644 resources.json

diff --git a/core/test/gtest/environments.hpp b/core/test/gtest/environments.hpp
index 0d433f1c9d1..6276de9372a 100644
--- a/core/test/gtest/environments.hpp
+++ b/core/test/gtest/environments.hpp
@@ -102,25 +102,22 @@ class ResourceEnvironment : public ::testing::Environment {
         if (auto rs_cuda_env = get_ctest_group("cudagpu", rank)) {
             auto resource = parse_ctest_resources(rs_cuda_env);
             cuda_device_id = resource.id;
-            if (rank == 0) {
-                std::cerr << "CUDA device " << cuda_device_id << std::endl;
-            }
+            std::cerr << "Rank " << rank << ": CUDA device " << cuda_device_id
+                      << std::endl;
         }
         // HIP GPUs
         if (auto rs_hip_env = get_ctest_group("hipgpu", rank)) {
             auto resource = parse_ctest_resources(rs_hip_env);
             hip_device_id = resource.id;
-            if (rank == 0) {
-                std::cerr << "HIP device " << hip_device_id << std::endl;
-            }
+            std::cerr << "Rank " << rank << ": HIP device " << cuda_device_id
+                      << std::endl;
         }
         // SYCL GPUs (no other devices!)
         if (auto rs_sycl_env = get_ctest_group("syclgpu", rank)) {
             auto resource = parse_ctest_resources(rs_sycl_env);
             sycl_device_id = resource.id;
-            if (rank == 0) {
-                std::cerr << "SYCL device " << sycl_device_id << std::endl;
-            }
+            std::cerr << "Rank " << rank << ": SYCL device " << cuda_device_id
+                      << std::endl;
         }
     }
 
diff --git a/resources.json b/resources.json
deleted file mode 100644
index 9d69ada752b..00000000000
--- a/resources.json
+++ /dev/null
@@ -1,51 +0,0 @@
-{
-  "version": {
-    "major": 1,
-    "minor": 0
-  },
-  "local": [
-    {
-      "cpus": [
-        {
-          "id": "0",
-          "slots": 32
-        }
-      ],
-
-      "gpus": [
-        {
-          "id": "0",
-          "slots": 100
-        },
-        {
-          "id": "1",
-          "slots": 100
-        },
-        {
-          "id": "2",
-          "slots": 100
-        },
-        {
-          "id": "3",
-          "slots": 100
-        },
-        {
-          "id": "4",
-          "slots": 100
-        },
-        {
-          "id": "5",
-          "slots": 100
-        },
-        {
-          "id": "6",
-          "slots": 100
-        },
-        {
-          "id": "7",
-          "slots": 100
-        }
-      ]
-    }
-  ]
-}
\ No newline at end of file
diff --git a/test/tools/CMakeLists.txt b/test/tools/CMakeLists.txt
index d3aa14b8ca7..21a7a5fc695 100644
--- a/test/tools/CMakeLists.txt
+++ b/test/tools/CMakeLists.txt
@@ -1,2 +1,2 @@
 add_executable(resource_file_generator resource_file_generator.cpp)
-target_link_libraries(resource_file_generator Ginkgo::ginkgo )
+target_link_libraries(resource_file_generator Ginkgo::ginkgo)
diff --git a/test/tools/resource_file_generator.cpp b/test/tools/resource_file_generator.cpp
index 1070a569662..de9464ce82d 100644
--- a/test/tools/resource_file_generator.cpp
+++ b/test/tools/resource_file_generator.cpp
@@ -69,4 +69,4 @@ int main()
     add_devices(num_sycl_gpus, "syclgpu");
 
     std::cout << create_json(cpus + gpus) << std::endl;
-}
\ No newline at end of file
+}
diff --git a/test/utils/executor.hpp b/test/utils/executor.hpp
index 082c3556381..836f70d2352 100644
--- a/test/utils/executor.hpp
+++ b/test/utils/executor.hpp
@@ -137,6 +137,8 @@ class CommonTestFixture : public ::testing::Test {
 #else
         init_executor(ref, exec);
 #endif
+        // set device-id test-wide since some test call device
+        // kernels directly
         guard = exec->get_scoped_device_id_guard();
     }
 

From 183e01fea1cf90e9f92a30c90b257eee95cd25e4 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Wed, 2 Aug 2023 14:30:21 +0200
Subject: [PATCH 277/583] fixes tests after rebase

---
 core/test/gtest/environments.hpp | 3 +++
 test/utils/executor.hpp          | 1 +
 2 files changed, 4 insertions(+)

diff --git a/core/test/gtest/environments.hpp b/core/test/gtest/environments.hpp
index 6276de9372a..ff029995baf 100644
--- a/core/test/gtest/environments.hpp
+++ b/core/test/gtest/environments.hpp
@@ -5,6 +5,9 @@
 #include <regex>
 
 
+#include <gtest/gtest.h>
+
+
 #ifdef GKO_COMPILING_OMP
 #include <omp.h>
 #endif
diff --git a/test/utils/executor.hpp b/test/utils/executor.hpp
index 836f70d2352..419e089f793 100644
--- a/test/utils/executor.hpp
+++ b/test/utils/executor.hpp
@@ -35,6 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/stream.hpp>
 
 
 #include <memory>

From a9879c12c1b20dfdb3273479e8e51b8a4f7149d8 Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Thu, 3 Aug 2023 07:23:10 +0000
Subject: [PATCH 278/583] Format files

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 core/test/gtest/environments.hpp       | 32 +++++++++++++++++++
 core/test/gtest/ginkgo_main.cpp        | 34 +++++++++++++++++++-
 cuda/test/utils.hpp                    |  4 +--
 hip/test/utils.hip.hpp                 |  4 +--
 test/tools/resource_file_generator.cpp | 43 +++++++++++++++++++++++---
 test/utils/executor.hpp                | 14 +++++----
 test/utils/mpi/executor.hpp            |  4 ++-
 7 files changed, 118 insertions(+), 17 deletions(-)

diff --git a/core/test/gtest/environments.hpp b/core/test/gtest/environments.hpp
index ff029995baf..856763d4105 100644
--- a/core/test/gtest/environments.hpp
+++ b/core/test/gtest/environments.hpp
@@ -1,3 +1,35 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
 #ifndef GINKGO_ENVIRONMENTS_HPP
 #define GINKGO_ENVIRONMENTS_HPP
 
diff --git a/core/test/gtest/ginkgo_main.cpp b/core/test/gtest/ginkgo_main.cpp
index 71117f2d73b..4d69b421875 100644
--- a/core/test/gtest/ginkgo_main.cpp
+++ b/core/test/gtest/ginkgo_main.cpp
@@ -1,3 +1,35 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
 #include <gtest/gtest.h>
 
 
@@ -20,4 +52,4 @@ int main(int argc, char** argv)
     ::testing::AddGlobalTestEnvironment(new OmpEnvironment);
     int result = RUN_ALL_TESTS();
     return result;
-}
\ No newline at end of file
+}
diff --git a/cuda/test/utils.hpp b/cuda/test/utils.hpp
index 0410b3a6a22..35f382806ec 100644
--- a/cuda/test/utils.hpp
+++ b/cuda/test/utils.hpp
@@ -54,8 +54,8 @@ class CudaTestFixture : public ::testing::Test {
         : ref(gko::ReferenceExecutor::create()),
           stream(ResourceEnvironment::cuda_device_id),
           exec(gko::CudaExecutor::create(
-              ResourceEnvironment::cuda_device_id, ref, std::make_shared<
-              gko::CudaAllocator>(), stream.get())),
+              ResourceEnvironment::cuda_device_id, ref,
+              std::make_shared<gko::CudaAllocator>(), stream.get())),
           guard(exec->get_scoped_device_id_guard())
     {}
 
diff --git a/hip/test/utils.hip.hpp b/hip/test/utils.hip.hpp
index 38fc3763ece..d67c8935ab4 100644
--- a/hip/test/utils.hip.hpp
+++ b/hip/test/utils.hip.hpp
@@ -53,8 +53,8 @@ class HipTestFixture : public ::testing::Test {
     HipTestFixture()
         : ref(gko::ReferenceExecutor::create()),
           stream(ResourceEnvironment::hip_device_id),
-          exec(gko::HipExecutor::create(ResourceEnvironment::hip_device_id, ref, std::make_shared<
-                                        gko::HipAllocator>(),
+          exec(gko::HipExecutor::create(ResourceEnvironment::hip_device_id, ref,
+                                        std::make_shared<gko::HipAllocator>(),
                                         stream.get())),
           guard(exec->get_scoped_device_id_guard())
     {}
diff --git a/test/tools/resource_file_generator.cpp b/test/tools/resource_file_generator.cpp
index de9464ce82d..7db262cf982 100644
--- a/test/tools/resource_file_generator.cpp
+++ b/test/tools/resource_file_generator.cpp
@@ -1,9 +1,42 @@
-#include <ginkgo/core/base/executor.hpp>
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
 
 #include <iomanip>
 #include <thread>
 
 
+#include <ginkgo/core/base/executor.hpp>
+
+
 std::vector<std::string> split(const std::string& s, char delimiter = ',')
 {
     std::istringstream iss(s);
@@ -50,17 +83,17 @@ int main()
 
     std::string gpus = "";
     auto add_devices = [&](int num_devices, const std::string& name) {
-        if(num_devices){
+        if (num_devices) {
             gpus.append(",\n");
             gpus += '"' + name + "\": [\n";
         }
         for (int i = 0; i < num_devices; i++) {
-            if(i > 0){
+            if (i > 0) {
                 gpus.append(",\n");
             }
-            gpus+= R"(  {"id": ")" + std::to_string(i) + R"(", "slots": 100})";
+            gpus += R"(  {"id": ")" + std::to_string(i) + R"(", "slots": 100})";
         }
-        if(num_devices){
+        if (num_devices) {
             gpus.append("\n]");
         }
     };
diff --git a/test/utils/executor.hpp b/test/utils/executor.hpp
index 419e089f793..2a8ace8e39a 100644
--- a/test/utils/executor.hpp
+++ b/test/utils/executor.hpp
@@ -35,7 +35,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/executor.hpp>
-#include <ginkgo/core/base/stream.hpp>
 
 
 #include <memory>
@@ -45,6 +44,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <gtest/gtest.h>
 
 
+#include <ginkgo/core/base/stream.hpp>
+
+
 #include "core/test/gtest/environments.hpp"
 
 
@@ -81,8 +83,8 @@ inline void init_executor(std::shared_ptr<gko::ReferenceExecutor> ref,
             throw std::runtime_error{"No suitable CUDA devices"};
         }
         exec = gko::CudaExecutor::create(
-            ResourceEnvironment::cuda_device_id,
-                                         ref, std::make_shared<gko::CudaAllocator>(), stream);
+            ResourceEnvironment::cuda_device_id, ref,
+            std::make_shared<gko::CudaAllocator>(), stream);
     }
 }
 
@@ -94,9 +96,9 @@ inline void init_executor(std::shared_ptr<gko::ReferenceExecutor> ref,
     if (gko::HipExecutor::get_num_devices() == 0) {
         throw std::runtime_error{"No suitable HIP devices"};
     }
-    exec = gko::HipExecutor::create(
-        ResourceEnvironment::hip_device_id, ref, std::make_shared<
-                                    gko::HipAllocator>(), stream);
+    exec =
+        gko::HipExecutor::create(ResourceEnvironment::hip_device_id, ref,
+                                 std::make_shared<gko::HipAllocator>(), stream);
 }
 
 
diff --git a/test/utils/mpi/executor.hpp b/test/utils/mpi/executor.hpp
index f02834a5a1f..504fc5d761c 100644
--- a/test/utils/mpi/executor.hpp
+++ b/test/utils/mpi/executor.hpp
@@ -35,7 +35,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/executor.hpp>
-#include <ginkgo/core/base/mpi.hpp>
 
 
 #include <memory>
@@ -44,6 +43,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <gtest/gtest.h>
 
 
+#include <ginkgo/core/base/mpi.hpp>
+
+
 #include "test/utils/executor.hpp"
 
 

From 248eaf958ee917494a1713b341bb850a007b9fd9 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Thu, 3 Aug 2023 09:57:00 +0200
Subject: [PATCH 279/583] allow using ctest resources in CI runs

---
 .gitlab/scripts.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.gitlab/scripts.yml b/.gitlab/scripts.yml
index b007caff35f..15a2004bde6 100644
--- a/.gitlab/scripts.yml
+++ b/.gitlab/scripts.yml
@@ -100,7 +100,7 @@
     - awk '!/^#/ { print ($2 - $1)/1000 " " $4 }' .ninja_log | sort -nr
     - |
         (( $(ctest -N | tail -1 | sed 's/Total Tests: //') != 0 )) || exit 1
-    - ctest -V --timeout 6000
+    - ctest --output-on-failure --timeout 6000 ${CTEST_EXTRA_ARGS}
     - ninja test_install
     - pushd test/test_install
     - ninja install
@@ -152,7 +152,7 @@
     - cd ${CI_JOB_NAME/test/build}
     - |
         (( $(ctest -N | tail -1 | sed 's/Total Tests: //') != 0 )) || exit 1
-    - ctest -V --timeout 6000
+    - ctest --output-on-failure --timeout 6000 ${CTEST_EXTRA_ARGS}
     - ninja test_install
     - pushd test/test_install
     - ninja install

From 19d5b2f2478d1e75bacdc7e7590f6dbd40bc832c Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Thu, 3 Aug 2023 11:00:49 +0200
Subject: [PATCH 280/583] add query for the default number of omp threads

---
 core/device_hooks/omp_hooks.cpp       |  4 +++
 include/ginkgo/core/base/executor.hpp |  2 ++
 omp/CMakeLists.txt                    |  1 +
 omp/base/executor.cpp                 | 52 +++++++++++++++++++++++++++
 4 files changed, 59 insertions(+)
 create mode 100644 omp/base/executor.cpp

diff --git a/core/device_hooks/omp_hooks.cpp b/core/device_hooks/omp_hooks.cpp
index f652a4d4582..f79ddfdeca6 100644
--- a/core/device_hooks/omp_hooks.cpp
+++ b/core/device_hooks/omp_hooks.cpp
@@ -31,6 +31,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
 #include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/executor.hpp>
 #include <ginkgo/core/base/scoped_device_id_guard.hpp>
 #include <ginkgo/core/base/version.hpp>
 
@@ -51,6 +52,9 @@ scoped_device_id_guard::scoped_device_id_guard(const OmpExecutor* exec,
     GKO_NOT_COMPILED(omp);
 
 
+int OmpExecutor::get_num_omp_threads() { return 1; }
+
+
 }  // namespace gko
 
 
diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp
index 456b69d3d7e..5f0c307bc73 100644
--- a/include/ginkgo/core/base/executor.hpp
+++ b/include/ginkgo/core/base/executor.hpp
@@ -1398,6 +1398,8 @@ class OmpExecutor : public detail::ExecutorBase<OmpExecutor>,
         return this->get_exec_info().num_pu_per_cu;
     }
 
+    static int get_num_omp_threads();
+
     scoped_device_id_guard get_scoped_device_id_guard() const override;
 
 protected:
diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt
index c689ffc42f3..7f46feff5da 100644
--- a/omp/CMakeLists.txt
+++ b/omp/CMakeLists.txt
@@ -5,6 +5,7 @@ target_sources(ginkgo_omp
     PRIVATE
     base/batch_multi_vector_kernels.cpp
     base/device_matrix_data_kernels.cpp
+               base/executor.cpp
     base/index_set_kernels.cpp
     base/scoped_device_id.cpp
     base/version.cpp
diff --git a/omp/base/executor.cpp b/omp/base/executor.cpp
new file mode 100644
index 00000000000..3e14270ecdc
--- /dev/null
+++ b/omp/base/executor.cpp
@@ -0,0 +1,52 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/base/executor.hpp>
+
+
+#include <omp.h>
+
+
+namespace gko {
+
+
+int OmpExecutor::get_num_omp_threads()
+{
+    int num_threads;
+#pragma omp parallel
+#pragma omp single
+    num_threads = omp_get_num_threads();
+    return num_threads;
+}
+
+
+}  // namespace gko
\ No newline at end of file

From a2591c59e34a03c82a6162a9dab97a68c0189432 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Thu, 3 Aug 2023 12:42:59 +0200
Subject: [PATCH 281/583] print device used in test environment

Co-authored-by: Tobias Ribizel <ribizel@kit.edu>
---
 core/test/gtest/environments.hpp    | 112 ++++++++++++++++++++++------
 core/test/gtest/ginkgo_main.cpp     |   7 +-
 core/test/gtest/ginkgo_mpi_main.cpp |   7 +-
 cuda/base/device.cpp                |   8 ++
 cuda/base/device.hpp                |   4 +
 dpcpp/base/device.hpp               |   3 +
 dpcpp/base/executor.dp.cpp          |  11 +++
 hip/base/device.hip.cpp             |   8 ++
 hip/base/device.hpp                 |   4 +
 9 files changed, 136 insertions(+), 28 deletions(-)

diff --git a/core/test/gtest/environments.hpp b/core/test/gtest/environments.hpp
index 856763d4105..125fa7b9b8b 100644
--- a/core/test/gtest/environments.hpp
+++ b/core/test/gtest/environments.hpp
@@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#ifndef GINKGO_ENVIRONMENTS_HPP
-#define GINKGO_ENVIRONMENTS_HPP
+#ifndef GKO_CORE_TEST_GTEST_ENVIRONMENTS_HPP_
+#define GKO_CORE_TEST_GTEST_ENVIRONMENTS_HPP_
 
 #include <algorithm>
 #include <regex>
@@ -55,6 +55,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #endif
 
 
+#if GKO_COMPILING_DPCPP
+#include "dpcpp/base/device.hpp"
+#endif
+
+
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/executor.hpp>
 #include <ginkgo/core/base/mpi.hpp>
@@ -111,8 +116,11 @@ class ResourceEnvironment : public ::testing::Environment {
         auto rs_count_env = std::getenv("CTEST_RESOURCE_GROUP_COUNT");
         auto rs_count = rs_count_env ? std::stoi(rs_count_env) : 0;
         if (rs_count == 0) {
-            std::cerr << "Running without CTest ctest_resource configuration"
-                      << std::endl;
+            if (rank == 0) {
+                std::cerr
+                    << "Running without CTest ctest_resource configuration"
+                    << std::endl;
+            }
             return;
         }
         if (rs_count != size) {
@@ -121,38 +129,25 @@ class ResourceEnvironment : public ::testing::Environment {
         }
 
         // parse CTest ctest_resource group descriptions
-        if (rank == 0) {
-            std::cerr << "Running with CTest ctest_resource configuration:"
-                      << std::endl;
-        }
         // OpenMP CPU threads
         if (auto rs_omp_env = get_ctest_group("cpu", rank)) {
             auto resource = parse_ctest_resources(rs_omp_env);
             omp_threads = resource.slots;
-            if (rank == 0) {
-                std::cerr << omp_threads << " CPU threads" << std::endl;
-            }
         }
         // CUDA GPUs
         if (auto rs_cuda_env = get_ctest_group("cudagpu", rank)) {
             auto resource = parse_ctest_resources(rs_cuda_env);
             cuda_device_id = resource.id;
-            std::cerr << "Rank " << rank << ": CUDA device " << cuda_device_id
-                      << std::endl;
         }
         // HIP GPUs
         if (auto rs_hip_env = get_ctest_group("hipgpu", rank)) {
             auto resource = parse_ctest_resources(rs_hip_env);
             hip_device_id = resource.id;
-            std::cerr << "Rank " << rank << ": HIP device " << cuda_device_id
-                      << std::endl;
         }
         // SYCL GPUs (no other devices!)
         if (auto rs_sycl_env = get_ctest_group("syclgpu", rank)) {
             auto resource = parse_ctest_resources(rs_sycl_env);
             sycl_device_id = resource.id;
-            std::cerr << "Rank " << rank << ": SYCL device " << cuda_device_id
-                      << std::endl;
         }
     }
 
@@ -167,18 +162,31 @@ class ResourceEnvironment : public ::testing::Environment {
 
 class OmpEnvironment : public ::testing::Environment {
 public:
+    explicit OmpEnvironment(int rank) : rank_(rank) {}
+
     void SetUp() override
     {
         if (ResourceEnvironment::omp_threads > 0) {
-            omp_set_num_threads(ResourceEnvironment::omp_threads);
+            omp_set_num_threads(num_threads);
         }
+#pragma omp parallel
+#pragma single
+        std::cerr << "Rank " << rank_ << ": OMP threads "
+                  << omp_get_num_threads();
+        << std::endl;
     }
+
+private:
+    int rank_;
 };
 
 #else
 
 
-class OmpEnvironment : public ::testing::Environment {};
+class OmpEnvironment : public ::testing::Environment {
+public:
+    explicit OmpEnvironment(int){};
+};
 
 #endif
 
@@ -187,15 +195,31 @@ class OmpEnvironment : public ::testing::Environment {};
 
 class CudaEnvironment : public ::testing::Environment {
 public:
+    explicit CudaEnvironment(int rank) : rank_(rank) {}
+
+    void SetUp() override
+    {
+        auto device_id = ResourceEnvironment::cuda_device_id;
+        std::cerr << "Rank " << rank_ << ": CUDA device "
+                  << gko::kernels::cuda::get_device_name(device_id) << " ID "
+                  << device_id << std::endl;
+    }
+
     void TearDown() override
     {
         gko::kernels::cuda::reset_device(ResourceEnvironment::cuda_device_id);
     }
+
+private:
+    int rank_;
 };
 
 #else
 
-class CudaEnvironment : public ::testing::Environment {};
+class CudaEnvironment : public ::testing::Environment {
+public:
+    explicit CudaEnvironment(int){};
+};
 
 #endif
 
@@ -204,17 +228,61 @@ class CudaEnvironment : public ::testing::Environment {};
 
 class HipEnvironment : public ::testing::Environment {
 public:
+    explicit HipEnvironment(int rank) : rank_(rank) {}
+
+    void SetUp() override
+    {
+        auto device_id = ResourceEnvironment::hip_device_id;
+        std::cerr << "Rank " << rank_ << ": HIP device "
+                  << gko::kernels::hip::get_device_name(device_id) << " ID "
+                  << device_id << std::endl;
+    }
+
     void TearDown() override
     {
         gko::kernels::hip::reset_device(ResourceEnvironment::hip_device_id);
     }
+
+private:
+    int rank_;
+};
+
+#else
+
+class HipEnvironment : public ::testing::Environment {
+public:
+    explicit HipEnvironment(int){};
+};
+
+#endif
+
+
+#ifdef GKO_COMPILING_DPCPP
+
+class SyclEnvironment : public ::testing::Environment {
+public:
+    explicit SyclEnvironment(int rank) : rank_(rank) {}
+
+    void SetUp() override
+    {
+        auto device_id = ResourceEnvironment::sycl_device_id;
+        std::cerr << "Rank " << rank_ << ": SYCL device "
+                  << gko::kernels::dpcpp::get_device_name(device_id) << " ID "
+                  << device_id << std::endl;
+    }
+
+private:
+    int rank_;
 };
 
 #else
 
-class HipEnvironment : public ::testing::Environment {};
+class SyclEnvironment : public ::testing::Environment {
+public:
+    explicit SyclEnvironment(int){};
+};
 
 #endif
 
 
-#endif  // GINKGO_ENVIRONMENTS_HPP
+#endif  // GKO_CORE_TEST_GTEST_ENVIRONMENTS_HPP_
diff --git a/core/test/gtest/ginkgo_main.cpp b/core/test/gtest/ginkgo_main.cpp
index 4d69b421875..01d1fc393c3 100644
--- a/core/test/gtest/ginkgo_main.cpp
+++ b/core/test/gtest/ginkgo_main.cpp
@@ -47,9 +47,10 @@ int main(int argc, char** argv)
     ::testing::InitGoogleTest(&argc, argv);
 
     ::testing::AddGlobalTestEnvironment(new ResourceEnvironment);
-    ::testing::AddGlobalTestEnvironment(new CudaEnvironment);
-    ::testing::AddGlobalTestEnvironment(new HipEnvironment);
-    ::testing::AddGlobalTestEnvironment(new OmpEnvironment);
+    ::testing::AddGlobalTestEnvironment(new CudaEnvironment(0));
+    ::testing::AddGlobalTestEnvironment(new HipEnvironment(0));
+    ::testing::AddGlobalTestEnvironment(new SyclEnvironment(0));
+    ::testing::AddGlobalTestEnvironment(new OmpEnvironment(0));
     int result = RUN_ALL_TESTS();
     return result;
 }
diff --git a/core/test/gtest/ginkgo_mpi_main.cpp b/core/test/gtest/ginkgo_mpi_main.cpp
index 945ec7ec7cd..f7fe71981d2 100644
--- a/core/test/gtest/ginkgo_mpi_main.cpp
+++ b/core/test/gtest/ginkgo_mpi_main.cpp
@@ -394,9 +394,10 @@ int main(int argc, char** argv)
 
     testing::AddGlobalTestEnvironment(new GTestMPIListener::MPIEnvironment);
     ::testing::AddGlobalTestEnvironment(new ResourceEnvironment(rank, size));
-    ::testing::AddGlobalTestEnvironment(new CudaEnvironment);
-    ::testing::AddGlobalTestEnvironment(new HipEnvironment);
-    ::testing::AddGlobalTestEnvironment(new OmpEnvironment);
+    ::testing::AddGlobalTestEnvironment(new CudaEnvironment(rank));
+    ::testing::AddGlobalTestEnvironment(new HipEnvironment(rank));
+    ::testing::AddGlobalTestEnvironment(new SyclEnvironment(rank));
+    ::testing::AddGlobalTestEnvironment(new OmpEnvironment(rank));
 
     ::testing::TestEventListeners& listeners =
         ::testing::UnitTest::GetInstance()->listeners();
diff --git a/cuda/base/device.cpp b/cuda/base/device.cpp
index 2db0876ca95..32cf6265160 100644
--- a/cuda/base/device.cpp
+++ b/cuda/base/device.cpp
@@ -58,6 +58,14 @@ void destroy_event(CUevent_st* event)
 }
 
 
+std::string get_device_name(int device_id)
+{
+    cudaDeviceProp prop;
+    GKO_ASSERT_NO_CUDA_ERRORS(cudaGetDeviceProperties(&prop, device_id));
+    return {prop.name};
+}
+
+
 }  // namespace cuda
 }  // namespace kernels
 }  // namespace gko
diff --git a/cuda/base/device.hpp b/cuda/base/device.hpp
index 7bd9390c54e..e363f455300 100644
--- a/cuda/base/device.hpp
+++ b/cuda/base/device.hpp
@@ -50,6 +50,10 @@ void reset_device(int device_id);
 void destroy_event(CUevent_st* event);
 
 
+/** returns cudaDeviceProp.name for the given device */
+std::string get_device_name(int device_id);
+
+
 }  // namespace cuda
 }  // namespace kernels
 }  // namespace gko
diff --git a/dpcpp/base/device.hpp b/dpcpp/base/device.hpp
index 6047fbed615..658ccbe18f4 100644
--- a/dpcpp/base/device.hpp
+++ b/dpcpp/base/device.hpp
@@ -46,6 +46,9 @@ namespace dpcpp {
 void destroy_event(sycl::event* event);
 
 
+std::string get_device_name(int device_id);
+
+
 }  // namespace dpcpp
 }  // namespace kernels
 }  // namespace gko
diff --git a/dpcpp/base/executor.dp.cpp b/dpcpp/base/executor.dp.cpp
index 3d01e271f15..6d6bbbe0388 100644
--- a/dpcpp/base/executor.dp.cpp
+++ b/dpcpp/base/executor.dp.cpp
@@ -323,6 +323,17 @@ namespace dpcpp {
 void destroy_event(sycl::event* event) { delete event; }
 
 
+std::string get_device_name(int device_id)
+{
+    auto devices = ::gko::detail::get_devices("gpu");
+    if (devices.empty()) {
+        return "CPU";
+    }
+
+    return devices[device_id].get_info<sycl::info::device::name>();
+}
+
+
 }  // namespace dpcpp
 }  // namespace kernels
 }  // namespace gko
diff --git a/hip/base/device.hip.cpp b/hip/base/device.hip.cpp
index 9a01d6aacee..d539fa69b43 100644
--- a/hip/base/device.hip.cpp
+++ b/hip/base/device.hip.cpp
@@ -62,6 +62,14 @@ void destroy_event(GKO_HIP_EVENT_STRUCT* event)
 }
 
 
+std::string get_device_name(int device_id)
+{
+    hipDeviceProp_t prop;
+    GKO_ASSERT_NO_HIP_ERRORS(hipGetDeviceProperties(&prop, device_id));
+    return {prop.name};
+}
+
+
 }  // namespace hip
 }  // namespace kernels
 }  // namespace gko
diff --git a/hip/base/device.hpp b/hip/base/device.hpp
index dcc8c3ba0f1..fceffe4a503 100644
--- a/hip/base/device.hpp
+++ b/hip/base/device.hpp
@@ -49,6 +49,10 @@ void reset_device(int device_id);
 void destroy_event(GKO_HIP_EVENT_STRUCT* event);
 
 
+/** returns hipDeviceProp.name for the given device */
+std::string get_device_name(int device_id);
+
+
 }  // namespace hip
 }  // namespace kernels
 }  // namespace gko

From f5d7209a5bbee6b35ecfe03fde71807426c5ae46 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Thu, 3 Aug 2023 12:44:21 +0200
Subject: [PATCH 282/583] use ginkgo_create_dpcpp_test consistently

---
 dpcpp/test/matrix/CMakeLists.txt                                | 2 +-
 dpcpp/test/matrix/{fbcsr_kernels.cpp => fbcsr_kernels.dp.cpp}   | 0
 dpcpp/test/preconditioner/CMakeLists.txt                        | 2 +-
 .../{jacobi_kernels.cpp => jacobi_kernels.dp.cpp}               | 0
 4 files changed, 2 insertions(+), 2 deletions(-)
 rename dpcpp/test/matrix/{fbcsr_kernels.cpp => fbcsr_kernels.dp.cpp} (100%)
 rename dpcpp/test/preconditioner/{jacobi_kernels.cpp => jacobi_kernels.dp.cpp} (100%)

diff --git a/dpcpp/test/matrix/CMakeLists.txt b/dpcpp/test/matrix/CMakeLists.txt
index 88ab52e9c3f..7ada04882da 100644
--- a/dpcpp/test/matrix/CMakeLists.txt
+++ b/dpcpp/test/matrix/CMakeLists.txt
@@ -1 +1 @@
-ginkgo_create_test(fbcsr_kernels)
+ginkgo_create_dpcpp_test(fbcsr_kernels)
diff --git a/dpcpp/test/matrix/fbcsr_kernels.cpp b/dpcpp/test/matrix/fbcsr_kernels.dp.cpp
similarity index 100%
rename from dpcpp/test/matrix/fbcsr_kernels.cpp
rename to dpcpp/test/matrix/fbcsr_kernels.dp.cpp
diff --git a/dpcpp/test/preconditioner/CMakeLists.txt b/dpcpp/test/preconditioner/CMakeLists.txt
index a0ca5a2e38a..c606e12ac3e 100644
--- a/dpcpp/test/preconditioner/CMakeLists.txt
+++ b/dpcpp/test/preconditioner/CMakeLists.txt
@@ -1 +1 @@
-ginkgo_create_test(jacobi_kernels)
+ginkgo_create_dpcpp_test(jacobi_kernels)
diff --git a/dpcpp/test/preconditioner/jacobi_kernels.cpp b/dpcpp/test/preconditioner/jacobi_kernels.dp.cpp
similarity index 100%
rename from dpcpp/test/preconditioner/jacobi_kernels.cpp
rename to dpcpp/test/preconditioner/jacobi_kernels.dp.cpp

From d00494b338d4f24f2c0a487ffb2aa78544bd6a21 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Thu, 3 Aug 2023 14:21:18 +0200
Subject: [PATCH 283/583] fixup! print device used in test environment

---
 core/test/gtest/environments.hpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/core/test/gtest/environments.hpp b/core/test/gtest/environments.hpp
index 125fa7b9b8b..2d0d1eac33a 100644
--- a/core/test/gtest/environments.hpp
+++ b/core/test/gtest/environments.hpp
@@ -167,13 +167,12 @@ class OmpEnvironment : public ::testing::Environment {
     void SetUp() override
     {
         if (ResourceEnvironment::omp_threads > 0) {
-            omp_set_num_threads(num_threads);
+            omp_set_num_threads(ResourceEnvironment::omp_threads);
         }
 #pragma omp parallel
 #pragma single
         std::cerr << "Rank " << rank_ << ": OMP threads "
-                  << omp_get_num_threads();
-        << std::endl;
+                  << omp_get_num_threads() << std::endl;
     }
 
 private:

From 14e84670bfab913e5bad07a1ee7e93285f765fc4 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Thu, 3 Aug 2023 17:45:28 +0200
Subject: [PATCH 284/583] review updates:

- cmake documentation
- take omp num thread into account for resource file

Co-authored-by: Yu-Hsiang M. Tsai <yhmtsai@gmail.com>
---
 cmake/create_test.cmake                | 5 +++--
 test/tools/resource_file_generator.cpp | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
index 1d3e041ff2a..2a905570a4b 100644
--- a/cmake/create_test.cmake
+++ b/cmake/create_test.cmake
@@ -110,8 +110,9 @@ endfunction()
 ## Adds a test to the list executed by ctest and sets its output binary name
 ## Possible additional arguments:
 ## - `MPI_SIZE size` causes the tests to be run with `size` MPI processes.
-## - `CORES` the number of threads used by a test, default is 4
-## - `PERCENTAGE` usage percentage of a single GPU, default is 50
+## - `RESOURCE_LOCAL_CORES` the number of threads used by a test, default is 4
+## - `RESOURCE_PERCENTAGE` usage percentage of a single GPU, default is 25
+## - `RESOURCE_TYPE` the resource type, can be ref, cpu, cudagpu, hipgpu, syclgpu
 ## - `DISABLE_EXECUTORS exec1 exec2` disables the test for certain backends (if built for multiple)
 ## - `ADDITIONAL_LIBRARIES lib1 lib2` adds additional target link dependencies
 ## - `ADDITIONAL_INCLUDES path1 path2` adds additional target include paths
diff --git a/test/tools/resource_file_generator.cpp b/test/tools/resource_file_generator.cpp
index 7db262cf982..a2b0b9bd5cd 100644
--- a/test/tools/resource_file_generator.cpp
+++ b/test/tools/resource_file_generator.cpp
@@ -48,6 +48,7 @@ std::vector<std::string> split(const std::string& s, char delimiter = ',')
     return tokens;
 }
 
+
 std::string create_json(const std::string& resources)
 {
     std::string json;
@@ -73,7 +74,7 @@ std::string create_json(const std::string& resources)
 
 int main()
 {
-    auto num_cpu_threads = std::max(std::thread::hardware_concurrency(), 1u);
+    auto num_cpu_threads = gko::OmpExecutor::get_num_omp_threads();
     auto num_cuda_gpus = gko::CudaExecutor::get_num_devices();
     auto num_hip_gpus = gko::HipExecutor::get_num_devices();
     auto num_sycl_gpus = gko::DpcppExecutor::get_num_devices("gpu");

From 70504d08ea53eaa9445d45e4a841c3a5d37575e1 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Sat, 26 Aug 2023 22:50:04 +0200
Subject: [PATCH 285/583] use a single SYCL CPU as fall-back

---
 CMakeLists.txt                                  |  6 ++++++
 cmake/create_test.cmake                         | 17 ++++++++++-------
 include/ginkgo/core/base/executor.hpp           |  2 +-
 .../core/base/{fwd_defs.hpp => fwd_decls.hpp}   |  6 +++---
 include/ginkgo/core/base/memory.hpp             |  2 +-
 include/ginkgo/ginkgo.hpp                       |  2 +-
 test/tools/resource_file_generator.cpp          |  3 ++-
 7 files changed, 24 insertions(+), 14 deletions(-)
 rename include/ginkgo/core/base/{fwd_defs.hpp => fwd_decls.hpp} (94%)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fab64e43c76..bec31a4360c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -89,6 +89,8 @@ option(GINKGO_INSTALL_RPATH_ORIGIN "Add $ORIGIN (Linux) or @loader_path (MacOS)
 option(GINKGO_INSTALL_RPATH_DEPENDENCIES "Add dependencies to the installation RPATH." OFF)
 option(GINKGO_FORCE_GPU_AWARE_MPI "Assert that the MPI library is GPU aware. This forces Ginkgo to assume that GPU aware functionality is available (OFF (default) or ON), but may fail
      catastrophically in case the MPI implementation is not GPU Aware, and GPU aware functionality has been forced" OFF)
+set(GINKGO_TEST_OMP_PARALLELISM "4" CACHE STRING
+    "The number of OpenMP threads to use for a test binary during CTest resource file-constrained test.")
 
 # load executor-specific configuration
 if(GINKGO_BUILD_CUDA)
@@ -307,6 +309,10 @@ configure_file(${Ginkgo_SOURCE_DIR}/include/ginkgo/config.hpp.in
 # propagated to the other parts of Ginkgo in case of building as static libraries
 add_subdirectory(devices)        # Basic device functionalities. Always compiled.
 add_subdirectory(common)         # Import list of unified kernel source files
+if(GINKGO_BUILD_TESTS)
+    # use custom target `tests` to build only test binaries
+    add_custom_target(tests)
+endif()
 if(GINKGO_BUILD_CUDA)
     add_subdirectory(cuda)       # High-performance kernels for NVIDIA GPUs
 endif()
diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
index 2a905570a4b..f11657ec324 100644
--- a/cmake/create_test.cmake
+++ b/cmake/create_test.cmake
@@ -73,14 +73,14 @@ function(ginkgo_add_resource_requirement test_name)
         set(single_resource "cpu:1")
     elseif(add_rr_RESOURCE_TYPE STREQUAL "cpu")
         if(NOT add_rr_RESOURCE_LOCAL_CORES)
-            set(add_rr_RESOURCE_LOCAL_CORES 4)  # perhaps get this from environment variable?
+            set(add_rr_RESOURCE_LOCAL_CORES ${GINKGO_TEST_OMP_PARALLELISM})
         endif()
         if(NOT add_rr_RESOURCE_LOCAL_CORES MATCHES "^[0-9]+")
             message(FATAL_ERROR "Resource specification is invalid: RESOURCE_LOCAL_CORES=${add_rr_RESOURCE_LOCAL_CORES}")
         endif()
 
         set(single_resource "cpu:${add_rr_RESOURCE_LOCAL_CORES}")
-    elseif(add_rr_RESOURCE_TYPE MATCHES "^(cuda|hip|sycl)gpu$")
+    elseif(add_rr_RESOURCE_TYPE MATCHES "^(cudagpu|hipgpu|sycl)$")
         if(NOT add_rr_RESOURCE_PERCENTAGE)
             set(add_rr_RESOURCE_PERCENTAGE 25)
         endif()
@@ -95,7 +95,7 @@ function(ginkgo_add_resource_requirement test_name)
 
         set(single_resource "${add_rr_RESOURCE_TYPE}:${add_rr_RESOURCE_PERCENTAGE}")
     else()
-        message(FATAL_ERROR "Unrecognized resource type ${add_rr_RESOURCE_TYPE}, allowed are: ref, cpu, cudagpu, hipgpu, syclgpu.")
+        message(FATAL_ERROR "Unrecognized resource type ${add_rr_RESOURCE_TYPE}, allowed are: ref, cpu, cudagpu, hipgpu, sycl.")
     endif()
 
     if(NOT add_rr_MPI_SIZE)
@@ -110,9 +110,10 @@ endfunction()
 ## Adds a test to the list executed by ctest and sets its output binary name
 ## Possible additional arguments:
 ## - `MPI_SIZE size` causes the tests to be run with `size` MPI processes.
-## - `RESOURCE_LOCAL_CORES` the number of threads used by a test, default is 4
+## - `RESOURCE_LOCAL_CORES` the number of threads used by a test, default is
+##    $GINKGO_TEST_OMP_PARALLELISM
 ## - `RESOURCE_PERCENTAGE` usage percentage of a single GPU, default is 25
-## - `RESOURCE_TYPE` the resource type, can be ref, cpu, cudagpu, hipgpu, syclgpu
+## - `RESOURCE_TYPE` the resource type, can be ref, cpu, cudagpu, hipgpu, sycl
 ## - `DISABLE_EXECUTORS exec1 exec2` disables the test for certain backends (if built for multiple)
 ## - `ADDITIONAL_LIBRARIES lib1 lib2` adds additional target link dependencies
 ## - `ADDITIONAL_INCLUDES path1 path2` adds additional target include paths
@@ -133,6 +134,8 @@ function(ginkgo_add_test test_name test_target_name)
                  COMMAND ${test_target_name}
                  WORKING_DIRECTORY "$<TARGET_FILE_DIR:ginkgo>")
     endif()
+    # use custom target `tests` to build only test binaries
+    add_dependencies(tests ${test_target_name})
 
     ginkgo_add_resource_requirement(${REL_BINARY_DIR}/${test_name} ${ARGN})
 
@@ -165,7 +168,7 @@ function(ginkgo_create_dpcpp_test test_name)
     target_compile_options(${test_target_name} PRIVATE ${GINKGO_DPCPP_FLAGS})
     target_link_options(${test_target_name} PRIVATE -fsycl-device-code-split=per_kernel)
     ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
-    ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} RESOURCE_TYPE syclgpu)
+    ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} RESOURCE_TYPE sycl)
     # Note: MKL_ENV is empty on linux. Maybe need to apply MKL_ENV to all test.
     if (MKL_ENV)
         set_tests_properties(${test_target_name} PROPERTIES ENVIRONMENT "${MKL_ENV}")
@@ -304,7 +307,7 @@ function(ginkgo_create_common_test_internal test_name exec_type exec)
     elseif (exec STREQUAL hip)
         set(test_resource_type hipgpu)
     else ()
-        set(test_resource_type syclgpu)
+        set(test_resource_type sycl)
     endif ()
     ginkgo_build_test_name(${test_name} test_target_name)
     string(TOUPPER ${exec} exec_upper)
diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp
index 5f0c307bc73..c7195501178 100644
--- a/include/ginkgo/core/base/executor.hpp
+++ b/include/ginkgo/core/base/executor.hpp
@@ -47,7 +47,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/device.hpp>
-#include <ginkgo/core/base/fwd_defs.hpp>
+#include <ginkgo/core/base/fwd_decls.hpp>
 #include <ginkgo/core/base/machine_topology.hpp>
 #include <ginkgo/core/base/memory.hpp>
 #include <ginkgo/core/base/scoped_device_id_guard.hpp>
diff --git a/include/ginkgo/core/base/fwd_defs.hpp b/include/ginkgo/core/base/fwd_decls.hpp
similarity index 94%
rename from include/ginkgo/core/base/fwd_defs.hpp
rename to include/ginkgo/core/base/fwd_decls.hpp
index 5f0cbd9d960..f99d3a0f90e 100644
--- a/include/ginkgo/core/base/fwd_defs.hpp
+++ b/include/ginkgo/core/base/fwd_decls.hpp
@@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#ifndef GKO_PUBLIC_CORE_BASE_FWD_DEFS_HPP_
-#define GKO_PUBLIC_CORE_BASE_FWD_DEFS_HPP_
+#ifndef GKO_PUBLIC_CORE_BASE_FWD_DECLS_HPP_
+#define GKO_PUBLIC_CORE_BASE_FWD_DECLS_HPP_
 
 
 #include <ginkgo/config.hpp>
@@ -87,4 +87,4 @@ class event;
 #endif
 
 
-#endif  // GKO_PUBLIC_CORE_BASE_FWD_DEFS_HPP_
+#endif  // GKO_PUBLIC_CORE_BASE_FWD_DECLS_HPP_
diff --git a/include/ginkgo/core/base/memory.hpp b/include/ginkgo/core/base/memory.hpp
index f421abf7da4..6997b6351e5 100644
--- a/include/ginkgo/core/base/memory.hpp
+++ b/include/ginkgo/core/base/memory.hpp
@@ -34,7 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define GKO_PUBLIC_CORE_BASE_MEMORY_HPP_
 
 
-#include <ginkgo/core/base/fwd_defs.hpp>
+#include <ginkgo/core/base/fwd_decls.hpp>
 #include <ginkgo/core/base/types.hpp>
 
 
diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp
index 594ad880b8c..bcdaa5d2d20 100644
--- a/include/ginkgo/ginkgo.hpp
+++ b/include/ginkgo/ginkgo.hpp
@@ -50,7 +50,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/exception.hpp>
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/executor.hpp>
-#include <ginkgo/core/base/fwd_defs.hpp>
+#include <ginkgo/core/base/fwd_decls.hpp>
 #include <ginkgo/core/base/index_set.hpp>
 #include <ginkgo/core/base/intrinsics.hpp>
 #include <ginkgo/core/base/lin_op.hpp>
diff --git a/test/tools/resource_file_generator.cpp b/test/tools/resource_file_generator.cpp
index a2b0b9bd5cd..f0dbbea0353 100644
--- a/test/tools/resource_file_generator.cpp
+++ b/test/tools/resource_file_generator.cpp
@@ -100,7 +100,8 @@ int main()
     };
     add_devices(num_cuda_gpus, "cudagpu");
     add_devices(num_hip_gpus, "hipgpu");
-    add_devices(num_sycl_gpus, "syclgpu");
+    // SYCL GPUs, fall back to CPU
+    add_devices(std::max(1, num_sycl_gpus), "sycl");
 
     std::cout << create_json(cpus + gpus) << std::endl;
 }

From 681b53d2fd658f3b31a5e927a189529013f6c383 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Sat, 26 Aug 2023 22:56:33 +0200
Subject: [PATCH 286/583] fix OMP environment

---
 core/test/gtest/environments.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/test/gtest/environments.hpp b/core/test/gtest/environments.hpp
index 2d0d1eac33a..815902d71a8 100644
--- a/core/test/gtest/environments.hpp
+++ b/core/test/gtest/environments.hpp
@@ -170,7 +170,7 @@ class OmpEnvironment : public ::testing::Environment {
             omp_set_num_threads(ResourceEnvironment::omp_threads);
         }
 #pragma omp parallel
-#pragma single
+#pragma omp single
         std::cerr << "Rank " << rank_ << ": OMP threads "
                   << omp_get_num_threads() << std::endl;
     }

From 1c627a977a8066dc5465649cdabaaf15ef092577 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 31 Aug 2023 11:10:39 +0200
Subject: [PATCH 287/583] create separate GTest main libraries

---
 cmake/create_test.cmake             |  41 ++---
 core/test/CMakeLists.txt            |   2 +
 core/test/gtest/CMakeLists.txt      |  30 ++++
 core/test/gtest/environments.hpp    | 231 +++++-----------------------
 core/test/gtest/ginkgo_main.cpp     |   5 +-
 core/test/gtest/ginkgo_mpi_main.cpp |   7 +-
 core/test/gtest/resources.cpp       | 145 +++++++++++++++++
 core/test/gtest/resources.hpp       |  51 ++++++
 cuda/test/utils.hpp                 |   2 +-
 hip/test/utils.hip.hpp              |   2 +-
 test/utils/executor.hpp             |   2 +-
 11 files changed, 285 insertions(+), 233 deletions(-)
 create mode 100644 core/test/gtest/CMakeLists.txt
 create mode 100644 core/test/gtest/resources.cpp
 create mode 100644 core/test/gtest/resources.hpp

diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
index f11657ec324..baaf84f59eb 100644
--- a/cmake/create_test.cmake
+++ b/cmake/create_test.cmake
@@ -11,26 +11,9 @@ function(ginkgo_build_test_name test_name target_name)
   set(${target_name} ${TEST_TARGET_NAME} PARENT_SCOPE)
 endfunction()
 
-function(ginkgo_create_gtest_main)
-  add_library(ginkgo_gtest_main "")
-  target_sources(ginkgo_gtest_main
-                 PRIVATE
-                 ${PROJECT_SOURCE_DIR}/core/test/gtest/ginkgo_main.cpp)
-  target_link_libraries(ginkgo_gtest_main PRIVATE GTest::GTest Ginkgo::ginkgo)
-endfunction()
-
-function(ginkgo_create_gtest_mpi_main)
-  add_library(ginkgo_gtest_mpi_main "")
-  target_sources(ginkgo_gtest_mpi_main
-                 PRIVATE
-                 ${PROJECT_SOURCE_DIR}/core/test/gtest/ginkgo_mpi_main.cpp)
-  find_package(MPI 3.1 COMPONENTS CXX REQUIRED)
-  target_link_libraries(ginkgo_gtest_mpi_main PRIVATE GTest::GTest MPI::MPI_CXX Ginkgo::ginkgo)
-endfunction()
-
 ## Set up shared target properties and handle ADDITIONAL_LIBRARIES/ADDITIONAL_INCLUDES
 ## `MPI_SIZE size` causes the tests to be run with `size` MPI processes.
-function(ginkgo_set_test_target_properties test_target_name)
+function(ginkgo_set_test_target_properties test_target_name test_library_suffix)
     cmake_parse_arguments(PARSE_ARGV 1 set_properties "" "${gko_test_single_args}" "${gko_test_multi_args}")
     if (GINKGO_FAST_TESTS)
         target_compile_definitions(${test_target_name} PRIVATE GINKGO_FAST_TESTS)
@@ -42,16 +25,12 @@ function(ginkgo_set_test_target_properties test_target_name)
         target_compile_definitions(${test_target_name} PRIVATE GINKGO_DPCPP_SINGLE_MODE=1)
     endif()
     if(GINKGO_CHECK_CIRCULAR_DEPS)
-      target_link_libraries(${test_target_name} PRIVATE "${GINKGO_CIRCULAR_DEPS_FLAGS}")
+        target_link_libraries(${test_target_name} PRIVATE "${GINKGO_CIRCULAR_DEPS_FLAGS}")
     endif()
     if(set_properties_MPI_SIZE)
-      target_sources(${test_target_name}
-                PRIVATE
-                ${PROJECT_SOURCE_DIR}/core/test/gtest/ginkgo_mpi_main.cpp)
+        target_link_libraries(${test_target_name} PRIVATE ginkgo_gtest_main_mpi${test_library_suffix})
     else()
-      target_sources(${test_target_name}
-                PRIVATE
-                ${PROJECT_SOURCE_DIR}/core/test/gtest/ginkgo_main.cpp)
+        target_link_libraries(${test_target_name} PRIVATE ginkgo_gtest_main${test_library_suffix})
     endif()
     target_compile_features(${test_target_name} PUBLIC cxx_std_14)
     target_compile_options(${test_target_name} PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${GINKGO_COMPILER_FLAGS}>)
@@ -156,7 +135,7 @@ function(ginkgo_create_test test_name)
     ginkgo_build_test_name(${test_name} test_target_name)
     add_executable(${test_target_name} ${test_name}.cpp)
     target_link_libraries(${test_target_name})
-    ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
+    ginkgo_set_test_target_properties(${test_target_name} "" ${ARGN})
     ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} RESOURCE_TYPE ref)
 endfunction(ginkgo_create_test)
 
@@ -167,7 +146,7 @@ function(ginkgo_create_dpcpp_test test_name)
     target_compile_features(${test_target_name} PUBLIC cxx_std_17)
     target_compile_options(${test_target_name} PRIVATE ${GINKGO_DPCPP_FLAGS})
     target_link_options(${test_target_name} PRIVATE -fsycl-device-code-split=per_kernel)
-    ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
+    ginkgo_set_test_target_properties(${test_target_name} "_dpcpp" ${ARGN})
     ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} RESOURCE_TYPE sycl)
     # Note: MKL_ENV is empty on linux. Maybe need to apply MKL_ENV to all test.
     if (MKL_ENV)
@@ -201,7 +180,7 @@ function(ginkgo_create_cuda_test_internal test_name filename test_target_name)
     if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
         set_target_properties(${test_target_name} PROPERTIES CUDA_ARCHITECTURES OFF)
     endif()
-    ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
+    ginkgo_set_test_target_properties(${test_target_name} "_cuda" ${ARGN})
     ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} RESOURCE_TYPE cudagpu)
 endfunction(ginkgo_create_cuda_test_internal)
 
@@ -257,7 +236,7 @@ function(ginkgo_create_hip_test_internal test_name filename test_target_name add
         ${hiprand_INCLUDE_DIRS}
         ${HIPSPARSE_INCLUDE_DIRS}
         )
-    ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
+    ginkgo_set_test_target_properties(${test_target_name} "_hip" ${ARGN})
     ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} RESOURCE_TYPE hipgpu)
 endfunction(ginkgo_create_hip_test_internal)
 
@@ -273,7 +252,7 @@ function(ginkgo_create_omp_test_internal test_name filename test_target_name)
     add_executable(${test_target_name} ${test_name}.cpp)
     target_compile_definitions(${test_target_name} PRIVATE GKO_COMPILING_OMP)
     target_link_libraries(${test_target_name} PRIVATE OpenMP::OpenMP_CXX)
-    ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
+    ginkgo_set_test_target_properties(${test_target_name} "_omp" ${ARGN})
     ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} RESOURCE_TYPE cpu)
 endfunction()
 
@@ -328,7 +307,7 @@ function(ginkgo_create_common_test_internal test_name exec_type exec)
         target_compile_definitions(${test_target_name} PRIVATE GINKGO_COMMON_SINGLE_MODE=1)
         target_compile_definitions(${test_target_name} PRIVATE GINKGO_DPCPP_SINGLE_MODE=1)
     endif()
-    ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
+    ginkgo_set_test_target_properties(${test_target_name} "_${exec}" ${ARGN})
     ginkgo_add_test(${test_name}_${exec} ${test_target_name} ${ARGN} RESOURCE_TYPE ${test_resource_type})
 endfunction(ginkgo_create_common_test_internal)
 
diff --git a/core/test/CMakeLists.txt b/core/test/CMakeLists.txt
index b330a493b38..776d0b72c7d 100644
--- a/core/test/CMakeLists.txt
+++ b/core/test/CMakeLists.txt
@@ -1,5 +1,7 @@
 include(${PROJECT_SOURCE_DIR}/cmake/create_test.cmake)
 
+add_subdirectory(gtest)
+
 add_subdirectory(accessor)
 add_subdirectory(base)
 add_subdirectory(components)
diff --git a/core/test/gtest/CMakeLists.txt b/core/test/gtest/CMakeLists.txt
new file mode 100644
index 00000000000..43bb6863224
--- /dev/null
+++ b/core/test/gtest/CMakeLists.txt
@@ -0,0 +1,30 @@
+function(add_gtest_main suffix definitions)
+    add_library(ginkgo_gtest_main${suffix} ginkgo_main.cpp resources.cpp)
+    target_link_libraries(ginkgo_gtest_main${suffix} PUBLIC Ginkgo::ginkgo GTest::GTest)
+    target_compile_definitions(ginkgo_gtest_main${suffix} PRIVATE ${definitions})
+    ginkgo_compile_features(ginkgo_gtest_main${suffix})
+    if (GINKGO_BUILD_MPI)
+        add_library(ginkgo_gtest_main_mpi${suffix} ginkgo_mpi_main.cpp resources.cpp)
+        target_link_libraries(ginkgo_gtest_main_mpi${suffix} PUBLIC Ginkgo::ginkgo GTest::GTest MPI::MPI_CXX)
+        target_compile_definitions(ginkgo_gtest_main_mpi${suffix} PRIVATE ${definitions})
+        ginkgo_compile_features(ginkgo_gtest_main_mpi${suffix})
+    endif()
+endfunction()
+
+add_gtest_main("" "")
+add_library(ginkgo_gtest_main_reference ALIAS ginkgo_gtest_main)
+if (GINKGO_BUILD_MPI)
+    add_library(ginkgo_gtest_main_mpi_reference ALIAS ginkgo_gtest_main_mpi)
+endif()
+if (GINKGO_BUILD_OMP)
+    add_gtest_main("_omp" "GKO_COMPILING_OMP")
+endif()
+if (GINKGO_BUILD_CUDA)
+    add_gtest_main("_cuda" "GKO_COMPILING_CUDA")
+endif()
+if (GINKGO_BUILD_HIP)
+    add_gtest_main("_hip" "GKO_COMPILING_HIP")
+endif()
+if (GINKGO_BUILD_DPCPP)
+    add_gtest_main("_dpcpp" "GKO_COMPILING_DPCPP")
+endif()
diff --git a/core/test/gtest/environments.hpp b/core/test/gtest/environments.hpp
index 815902d71a8..89166a0594c 100644
--- a/core/test/gtest/environments.hpp
+++ b/core/test/gtest/environments.hpp
@@ -35,11 +35,20 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <algorithm>
 #include <regex>
+#include <sstream>
 
 
 #include <gtest/gtest.h>
 
 
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/mpi.hpp>
+
+
+#include "core/test/gtest/resources.hpp"
+
+
 #ifdef GKO_COMPILING_OMP
 #include <omp.h>
 #endif
@@ -60,228 +69,68 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #endif
 
 
-#include <ginkgo/core/base/exception_helpers.hpp>
-#include <ginkgo/core/base/executor.hpp>
-#include <ginkgo/core/base/mpi.hpp>
-
-
-struct ctest_resource {
-    int id;
-    int slots;
-};
-
-
-inline char* get_ctest_group(std::string resource_type, int group_id)
-{
-    std::transform(resource_type.begin(), resource_type.end(),
-                   resource_type.begin(),
-                   [](auto c) { return std::toupper(c); });
-    std::string rs_group_env = "CTEST_RESOURCE_GROUP_" +
-                               std::to_string(group_id) + "_" + resource_type;
-    return std::getenv(rs_group_env.c_str());
-}
-
-
-inline ctest_resource parse_ctest_resources(std::string resource)
-{
-    std::regex re(R"(id\:(\d+),slots\:(\d+))");
-    std::smatch match;
-
-    if (!std::regex_match(resource, match, re)) {
-        GKO_INVALID_STATE("Can't parse ctest_resource string: " + resource);
-    }
-
-    return ctest_resource{std::stoi(match[1]), std::stoi(match[2])};
-}
-
-
-class ResourceEnvironment : public ::testing::Environment {
+class DeviceEnvironment : public ::testing::Environment {
 public:
-    explicit ResourceEnvironment(int rank = 0, int size = 1)
-    {
-#if GINKGO_BUILD_MPI
-        if (size > 1) {
-            cuda_device_id = gko::experimental::mpi::map_rank_to_device_id(
-                MPI_COMM_WORLD,
-                std::max(gko::CudaExecutor::get_num_devices(), 1));
-            hip_device_id = gko::experimental::mpi::map_rank_to_device_id(
-                MPI_COMM_WORLD,
-                std::max(gko::HipExecutor::get_num_devices(), 1));
-            sycl_device_id = gko::experimental::mpi::map_rank_to_device_id(
-                MPI_COMM_WORLD,
-                std::max(gko::DpcppExecutor::get_num_devices("gpu"), 1));
-        }
-#endif
-
-        auto rs_count_env = std::getenv("CTEST_RESOURCE_GROUP_COUNT");
-        auto rs_count = rs_count_env ? std::stoi(rs_count_env) : 0;
-        if (rs_count == 0) {
-            if (rank == 0) {
-                std::cerr
-                    << "Running without CTest ctest_resource configuration"
-                    << std::endl;
-            }
-            return;
-        }
-        if (rs_count != size) {
-            GKO_INVALID_STATE("Invalid resource group count: " +
-                              std::to_string(rs_count));
-        }
-
-        // parse CTest ctest_resource group descriptions
-        // OpenMP CPU threads
-        if (auto rs_omp_env = get_ctest_group("cpu", rank)) {
-            auto resource = parse_ctest_resources(rs_omp_env);
-            omp_threads = resource.slots;
-        }
-        // CUDA GPUs
-        if (auto rs_cuda_env = get_ctest_group("cudagpu", rank)) {
-            auto resource = parse_ctest_resources(rs_cuda_env);
-            cuda_device_id = resource.id;
-        }
-        // HIP GPUs
-        if (auto rs_hip_env = get_ctest_group("hipgpu", rank)) {
-            auto resource = parse_ctest_resources(rs_hip_env);
-            hip_device_id = resource.id;
-        }
-        // SYCL GPUs (no other devices!)
-        if (auto rs_sycl_env = get_ctest_group("syclgpu", rank)) {
-            auto resource = parse_ctest_resources(rs_sycl_env);
-            sycl_device_id = resource.id;
-        }
-    }
-
-    static int omp_threads;
-    static int cuda_device_id;
-    static int hip_device_id;
-    static int sycl_device_id;
-};
-
+    explicit DeviceEnvironment(int rank) : rank_(rank) { print_environment(); }
 
 #ifdef GKO_COMPILING_OMP
-
-class OmpEnvironment : public ::testing::Environment {
-public:
-    explicit OmpEnvironment(int rank) : rank_(rank) {}
-
-    void SetUp() override
+    void print_environment() const
     {
         if (ResourceEnvironment::omp_threads > 0) {
             omp_set_num_threads(ResourceEnvironment::omp_threads);
         }
-#pragma omp parallel
-#pragma omp single
-        std::cerr << "Rank " << rank_ << ": OMP threads "
-                  << omp_get_num_threads() << std::endl;
+        std::stringstream ss;
+        ss << "Rank " << rank_ << ": OMP threads " << omp_get_max_threads()
+           << std::endl;
+        std::cerr << ss.str();
     }
-
-private:
-    int rank_;
-};
-
-#else
-
-
-class OmpEnvironment : public ::testing::Environment {
-public:
-    explicit OmpEnvironment(int){};
-};
-
-#endif
-
-
-#ifdef GKO_COMPILING_CUDA
-
-class CudaEnvironment : public ::testing::Environment {
-public:
-    explicit CudaEnvironment(int rank) : rank_(rank) {}
-
-    void SetUp() override
+#elif defined(GKO_COMPILING_CUDA)
+    void print_environment() const
     {
         auto device_id = ResourceEnvironment::cuda_device_id;
-        std::cerr << "Rank " << rank_ << ": CUDA device "
-                  << gko::kernels::cuda::get_device_name(device_id) << " ID "
-                  << device_id << std::endl;
+        std::stringstream ss;
+        ss << "Rank " << rank_ << ": CUDA device "
+           << gko::kernels::cuda::get_device_name(device_id) << " ID "
+           << device_id << std::endl;
+        std::cerr << ss.str();
     }
 
     void TearDown() override
     {
         gko::kernels::cuda::reset_device(ResourceEnvironment::cuda_device_id);
     }
-
-private:
-    int rank_;
-};
-
-#else
-
-class CudaEnvironment : public ::testing::Environment {
-public:
-    explicit CudaEnvironment(int){};
-};
-
-#endif
-
-
-#ifdef GKO_COMPILING_HIP
-
-class HipEnvironment : public ::testing::Environment {
-public:
-    explicit HipEnvironment(int rank) : rank_(rank) {}
-
-    void SetUp() override
+#elif defined(GKO_COMPILING_HIP)
+    void print_environment() const
     {
         auto device_id = ResourceEnvironment::hip_device_id;
-        std::cerr << "Rank " << rank_ << ": HIP device "
-                  << gko::kernels::hip::get_device_name(device_id) << " ID "
-                  << device_id << std::endl;
+        std::stringstream ss;
+        ss << "Rank " << rank_ << ": HIP device "
+           << gko::kernels::hip::get_device_name(device_id) << " ID "
+           << device_id << std::endl;
+        std::cerr << ss.str();
     }
 
     void TearDown() override
     {
         gko::kernels::hip::reset_device(ResourceEnvironment::hip_device_id);
     }
-
-private:
-    int rank_;
-};
-
-#else
-
-class HipEnvironment : public ::testing::Environment {
-public:
-    explicit HipEnvironment(int){};
-};
-
-#endif
-
-
-#ifdef GKO_COMPILING_DPCPP
-
-class SyclEnvironment : public ::testing::Environment {
-public:
-    explicit SyclEnvironment(int rank) : rank_(rank) {}
-
-    void SetUp() override
+#elif defined(GKO_COMPILING_DPCPP)
+    void print_environment() const
     {
         auto device_id = ResourceEnvironment::sycl_device_id;
-        std::cerr << "Rank " << rank_ << ": SYCL device "
-                  << gko::kernels::dpcpp::get_device_name(device_id) << " ID "
-                  << device_id << std::endl;
+        std::stringstream ss;
+        ss << "Rank " << rank_ << ": SYCL device "
+           << gko::kernels::dpcpp::get_device_name(device_id) << " ID "
+           << device_id << std::endl;
+        std::cerr << ss.str();
     }
+#else
+    void print_environment() const {}
+#endif
 
 private:
     int rank_;
 };
 
-#else
-
-class SyclEnvironment : public ::testing::Environment {
-public:
-    explicit SyclEnvironment(int){};
-};
-
-#endif
-
 
 #endif  // GKO_CORE_TEST_GTEST_ENVIRONMENTS_HPP_
diff --git a/core/test/gtest/ginkgo_main.cpp b/core/test/gtest/ginkgo_main.cpp
index 01d1fc393c3..b8458dbc0b0 100644
--- a/core/test/gtest/ginkgo_main.cpp
+++ b/core/test/gtest/ginkgo_main.cpp
@@ -47,10 +47,7 @@ int main(int argc, char** argv)
     ::testing::InitGoogleTest(&argc, argv);
 
     ::testing::AddGlobalTestEnvironment(new ResourceEnvironment);
-    ::testing::AddGlobalTestEnvironment(new CudaEnvironment(0));
-    ::testing::AddGlobalTestEnvironment(new HipEnvironment(0));
-    ::testing::AddGlobalTestEnvironment(new SyclEnvironment(0));
-    ::testing::AddGlobalTestEnvironment(new OmpEnvironment(0));
+    ::testing::AddGlobalTestEnvironment(new DeviceEnvironment(0));
     int result = RUN_ALL_TESTS();
     return result;
 }
diff --git a/core/test/gtest/ginkgo_mpi_main.cpp b/core/test/gtest/ginkgo_mpi_main.cpp
index f7fe71981d2..c34d3c84693 100644
--- a/core/test/gtest/ginkgo_mpi_main.cpp
+++ b/core/test/gtest/ginkgo_mpi_main.cpp
@@ -46,6 +46,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <gtest/gtest.h>
+#include <mpi.h>
 
 
 #include <ginkgo/core/base/mpi.hpp>
@@ -394,10 +395,8 @@ int main(int argc, char** argv)
 
     testing::AddGlobalTestEnvironment(new GTestMPIListener::MPIEnvironment);
     ::testing::AddGlobalTestEnvironment(new ResourceEnvironment(rank, size));
-    ::testing::AddGlobalTestEnvironment(new CudaEnvironment(rank));
-    ::testing::AddGlobalTestEnvironment(new HipEnvironment(rank));
-    ::testing::AddGlobalTestEnvironment(new SyclEnvironment(rank));
-    ::testing::AddGlobalTestEnvironment(new OmpEnvironment(rank));
+    ::testing::AddGlobalTestEnvironment(new DeviceEnvironment(rank));
+    MPI_Barrier(comm);
 
     ::testing::TestEventListeners& listeners =
         ::testing::UnitTest::GetInstance()->listeners();
diff --git a/core/test/gtest/resources.cpp b/core/test/gtest/resources.cpp
new file mode 100644
index 00000000000..2bd0cdc3496
--- /dev/null
+++ b/core/test/gtest/resources.cpp
@@ -0,0 +1,145 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/test/gtest/resources.hpp"
+
+
+#include <algorithm>
+#include <regex>
+#include <sstream>
+
+
+#ifdef GKO_COMPILING_OMP
+#include <omp.h>
+#endif
+
+
+#ifdef GKO_COMPILING_CUDA
+#include "cuda/base/device.hpp"
+#endif
+
+
+#ifdef GKO_COMPILING_HIP
+#include "hip/base/device.hpp"
+#endif
+
+
+#if GKO_COMPILING_DPCPP
+#include "dpcpp/base/device.hpp"
+#endif
+
+
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/mpi.hpp>
+
+
+struct ctest_resource {
+    int id;
+    int slots;
+};
+
+
+char* get_ctest_group(std::string resource_type, int group_id)
+{
+    std::transform(resource_type.begin(), resource_type.end(),
+                   resource_type.begin(),
+                   [](auto c) { return std::toupper(c); });
+    std::string rs_group_env = "CTEST_RESOURCE_GROUP_" +
+                               std::to_string(group_id) + "_" + resource_type;
+    return std::getenv(rs_group_env.c_str());
+}
+
+
+ctest_resource parse_ctest_resources(std::string resource)
+{
+    std::regex re(R"(id\:(\d+),slots\:(\d+))");
+    std::smatch match;
+
+    if (!std::regex_match(resource, match, re)) {
+        GKO_INVALID_STATE("Can't parse ctest_resource string: " + resource);
+    }
+
+    return ctest_resource{std::stoi(match[1]), std::stoi(match[2])};
+}
+
+
+ResourceEnvironment::ResourceEnvironment(int rank, int size)
+{
+#if GINKGO_BUILD_MPI
+    if (size > 1) {
+        cuda_device_id = gko::experimental::mpi::map_rank_to_device_id(
+            MPI_COMM_WORLD, std::max(gko::CudaExecutor::get_num_devices(), 1));
+        hip_device_id = gko::experimental::mpi::map_rank_to_device_id(
+            MPI_COMM_WORLD, std::max(gko::HipExecutor::get_num_devices(), 1));
+        sycl_device_id = gko::experimental::mpi::map_rank_to_device_id(
+            MPI_COMM_WORLD,
+            std::max(gko::DpcppExecutor::get_num_devices("gpu"), 1));
+    }
+#endif
+
+    auto rs_count_env = std::getenv("CTEST_RESOURCE_GROUP_COUNT");
+    auto rs_count = rs_count_env ? std::stoi(rs_count_env) : 0;
+    if (rs_count == 0) {
+        if (rank == 0) {
+            std::cerr << "Running without CTest ctest_resource configuration"
+                      << std::endl;
+        }
+        return;
+    }
+    if (rs_count != size) {
+        GKO_INVALID_STATE("Invalid resource group count: " +
+                          std::to_string(rs_count));
+    }
+
+    // parse CTest ctest_resource group descriptions
+    // OpenMP CPU threads
+    if (auto rs_omp_env = get_ctest_group("cpu", rank)) {
+        auto resource = parse_ctest_resources(rs_omp_env);
+        omp_threads = resource.slots;
+    }
+    // CUDA GPUs
+    if (auto rs_cuda_env = get_ctest_group("cudagpu", rank)) {
+        auto resource = parse_ctest_resources(rs_cuda_env);
+        cuda_device_id = resource.id;
+    }
+    // HIP GPUs
+    if (auto rs_hip_env = get_ctest_group("hipgpu", rank)) {
+        auto resource = parse_ctest_resources(rs_hip_env);
+        hip_device_id = resource.id;
+    }
+    // SYCL GPUs (no other devices!)
+    if (auto rs_sycl_env = get_ctest_group("syclgpu", rank)) {
+        auto resource = parse_ctest_resources(rs_sycl_env);
+        sycl_device_id = resource.id;
+    }
+}
diff --git a/core/test/gtest/resources.hpp b/core/test/gtest/resources.hpp
new file mode 100644
index 00000000000..a88280f29c7
--- /dev/null
+++ b/core/test/gtest/resources.hpp
@@ -0,0 +1,51 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CORE_TEST_GTEST_RESOURCES_HPP_
+#define GKO_CORE_TEST_GTEST_RESOURCES_HPP_
+
+
+#include <gtest/gtest.h>
+
+
+class ResourceEnvironment : public ::testing::Environment {
+public:
+    explicit ResourceEnvironment(int rank = 0, int size = 1);
+
+    static int omp_threads;
+    static int cuda_device_id;
+    static int hip_device_id;
+    static int sycl_device_id;
+};
+
+
+#endif  // GKO_CORE_TEST_GTEST_RESOURCES_HPP_
diff --git a/cuda/test/utils.hpp b/cuda/test/utils.hpp
index 35f382806ec..6ef808aa1b3 100644
--- a/cuda/test/utils.hpp
+++ b/cuda/test/utils.hpp
@@ -41,7 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/stream.hpp>
 
 
-#include "core/test/gtest/environments.hpp"
+#include "core/test/gtest/resources.hpp"
 #include "cuda/base/device.hpp"
 
 
diff --git a/hip/test/utils.hip.hpp b/hip/test/utils.hip.hpp
index d67c8935ab4..e1c9f9341fb 100644
--- a/hip/test/utils.hip.hpp
+++ b/hip/test/utils.hip.hpp
@@ -41,7 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/stream.hpp>
 
 
-#include "core/test/gtest/environments.hpp"
+#include "core/test/gtest/resources.hpp"
 #include "hip/base/device.hpp"
 
 
diff --git a/test/utils/executor.hpp b/test/utils/executor.hpp
index 2a8ace8e39a..e4ce56f3d7a 100644
--- a/test/utils/executor.hpp
+++ b/test/utils/executor.hpp
@@ -47,7 +47,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/stream.hpp>
 
 
-#include "core/test/gtest/environments.hpp"
+#include "core/test/gtest/resources.hpp"
 
 
 #if GINKGO_COMMON_SINGLE_MODE

From 1adef1712d566b3ca313c84a9728582d6e12edbe Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Thu, 31 Aug 2023 09:20:51 +0000
Subject: [PATCH 288/583] Format files

Co-authored-by: Tobias Ribizel <upsj@users.noreply.github.com>
---
 core/test/gtest/environments.hpp    | 1 +
 core/test/gtest/ginkgo_mpi_main.cpp | 4 +++-
 core/test/gtest/resources.cpp       | 6 +++---
 omp/base/executor.cpp               | 2 +-
 4 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/core/test/gtest/environments.hpp b/core/test/gtest/environments.hpp
index 89166a0594c..78c5a40f8a5 100644
--- a/core/test/gtest/environments.hpp
+++ b/core/test/gtest/environments.hpp
@@ -33,6 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #ifndef GKO_CORE_TEST_GTEST_ENVIRONMENTS_HPP_
 #define GKO_CORE_TEST_GTEST_ENVIRONMENTS_HPP_
 
+
 #include <algorithm>
 #include <regex>
 #include <sstream>
diff --git a/core/test/gtest/ginkgo_mpi_main.cpp b/core/test/gtest/ginkgo_mpi_main.cpp
index c34d3c84693..12107ca55f8 100644
--- a/core/test/gtest/ginkgo_mpi_main.cpp
+++ b/core/test/gtest/ginkgo_mpi_main.cpp
@@ -45,10 +45,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <vector>
 
 
-#include <gtest/gtest.h>
 #include <mpi.h>
 
 
+#include <gtest/gtest.h>
+
+
 #include <ginkgo/core/base/mpi.hpp>
 
 
diff --git a/core/test/gtest/resources.cpp b/core/test/gtest/resources.cpp
index 2bd0cdc3496..dc8ad7931a9 100644
--- a/core/test/gtest/resources.cpp
+++ b/core/test/gtest/resources.cpp
@@ -30,14 +30,14 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include "core/test/gtest/resources.hpp"
-
-
 #include <algorithm>
 #include <regex>
 #include <sstream>
 
 
+#include "core/test/gtest/resources.hpp"
+
+
 #ifdef GKO_COMPILING_OMP
 #include <omp.h>
 #endif
diff --git a/omp/base/executor.cpp b/omp/base/executor.cpp
index 3e14270ecdc..49fd1332ed5 100644
--- a/omp/base/executor.cpp
+++ b/omp/base/executor.cpp
@@ -49,4 +49,4 @@ int OmpExecutor::get_num_omp_threads()
 }
 
 
-}  // namespace gko
\ No newline at end of file
+}  // namespace gko

From c9fcd208f2cdf4e1cf5cc6ab7eb8cde19a2151c2 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Fri, 1 Sep 2023 16:47:38 +0200
Subject: [PATCH 289/583] link against GTest main statically

---
 core/test/gtest/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/test/gtest/CMakeLists.txt b/core/test/gtest/CMakeLists.txt
index 43bb6863224..c4e9cb52870 100644
--- a/core/test/gtest/CMakeLists.txt
+++ b/core/test/gtest/CMakeLists.txt
@@ -1,10 +1,10 @@
 function(add_gtest_main suffix definitions)
-    add_library(ginkgo_gtest_main${suffix} ginkgo_main.cpp resources.cpp)
+    add_library(ginkgo_gtest_main${suffix} STATIC ginkgo_main.cpp resources.cpp)
     target_link_libraries(ginkgo_gtest_main${suffix} PUBLIC Ginkgo::ginkgo GTest::GTest)
     target_compile_definitions(ginkgo_gtest_main${suffix} PRIVATE ${definitions})
     ginkgo_compile_features(ginkgo_gtest_main${suffix})
     if (GINKGO_BUILD_MPI)
-        add_library(ginkgo_gtest_main_mpi${suffix} ginkgo_mpi_main.cpp resources.cpp)
+        add_library(ginkgo_gtest_main_mpi${suffix} STATIC ginkgo_mpi_main.cpp resources.cpp)
         target_link_libraries(ginkgo_gtest_main_mpi${suffix} PUBLIC Ginkgo::ginkgo GTest::GTest MPI::MPI_CXX)
         target_compile_definitions(ginkgo_gtest_main_mpi${suffix} PRIVATE ${definitions})
         ginkgo_compile_features(ginkgo_gtest_main_mpi${suffix})

From a430a918f92e7def6a488607a1dcb00967d4060f Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Fri, 1 Sep 2023 16:54:33 +0200
Subject: [PATCH 290/583] use nla-gpu1 for ROCm tests as well

---
 .gitlab/image.yml | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/.gitlab/image.yml b/.gitlab/image.yml
index 72fb51ad372..eb1ab5128af 100644
--- a/.gitlab/image.yml
+++ b/.gitlab/image.yml
@@ -72,15 +72,13 @@
   image: ginkgohub/rocm:45-mvapich2-gnu8-llvm8
   tags:
     - private_ci
-    - amdci
-    - gpu
+    - amd-gpu
 
 .use_gko-rocm502-nompi-gnu11-llvm11:
   image: ginkgohub/rocm:502-openmpi-gnu11-llvm11
   tags:
     - private_ci
-    - amdci
-    - gpu
+    - amd-gpu
 
 .use_gko-oneapi-cpu:
   image: ginkgohub/oneapi:2022.1

From 1978788235e00f8b5591fb476b9048162d973fb3 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 7 Sep 2023 10:21:07 +0100
Subject: [PATCH 291/583] review updates

- formatting
- remove remaining occurrences of syclgpu
- rename to GINKGO_CI_TEST_OMP_PARALLELISM

Co-authored-by: Yuhsiang M. Tsai <yhmtsai@gmail.com>
---
 CMakeLists.txt                |  2 +-
 cmake/create_test.cmake       | 12 ++++++------
 core/test/CMakeLists.txt      |  1 -
 core/test/gtest/resources.cpp |  2 +-
 cuda/test/base/CMakeLists.txt |  2 +-
 omp/CMakeLists.txt            |  2 +-
 6 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index bec31a4360c..e07023bc46b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -89,7 +89,7 @@ option(GINKGO_INSTALL_RPATH_ORIGIN "Add $ORIGIN (Linux) or @loader_path (MacOS)
 option(GINKGO_INSTALL_RPATH_DEPENDENCIES "Add dependencies to the installation RPATH." OFF)
 option(GINKGO_FORCE_GPU_AWARE_MPI "Assert that the MPI library is GPU aware. This forces Ginkgo to assume that GPU aware functionality is available (OFF (default) or ON), but may fail
      catastrophically in case the MPI implementation is not GPU Aware, and GPU aware functionality has been forced" OFF)
-set(GINKGO_TEST_OMP_PARALLELISM "4" CACHE STRING
+set(GINKGO_CI_TEST_OMP_PARALLELISM "4" CACHE STRING
     "The number of OpenMP threads to use for a test binary during CTest resource file-constrained test.")
 
 # load executor-specific configuration
diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
index baaf84f59eb..50271e12c9c 100644
--- a/cmake/create_test.cmake
+++ b/cmake/create_test.cmake
@@ -5,10 +5,10 @@ set(gko_test_option_args "NO_RESOURCES")
 
 ## Replaces / by _ to create valid target names from relative paths
 function(ginkgo_build_test_name test_name target_name)
-  file(RELATIVE_PATH REL_BINARY_DIR
-       ${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR})
-  string(REPLACE "/" "_" TEST_TARGET_NAME "${REL_BINARY_DIR}/${test_name}")
-  set(${target_name} ${TEST_TARGET_NAME} PARENT_SCOPE)
+    file(RELATIVE_PATH REL_BINARY_DIR
+         ${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR})
+    string(REPLACE "/" "_" TEST_TARGET_NAME "${REL_BINARY_DIR}/${test_name}")
+    set(${target_name} ${TEST_TARGET_NAME} PARENT_SCOPE)
 endfunction()
 
 ## Set up shared target properties and handle ADDITIONAL_LIBRARIES/ADDITIONAL_INCLUDES
@@ -52,7 +52,7 @@ function(ginkgo_add_resource_requirement test_name)
         set(single_resource "cpu:1")
     elseif(add_rr_RESOURCE_TYPE STREQUAL "cpu")
         if(NOT add_rr_RESOURCE_LOCAL_CORES)
-            set(add_rr_RESOURCE_LOCAL_CORES ${GINKGO_TEST_OMP_PARALLELISM})
+            set(add_rr_RESOURCE_LOCAL_CORES ${GINKGO_CI_TEST_OMP_PARALLELISM})
         endif()
         if(NOT add_rr_RESOURCE_LOCAL_CORES MATCHES "^[0-9]+")
             message(FATAL_ERROR "Resource specification is invalid: RESOURCE_LOCAL_CORES=${add_rr_RESOURCE_LOCAL_CORES}")
@@ -90,7 +90,7 @@ endfunction()
 ## Possible additional arguments:
 ## - `MPI_SIZE size` causes the tests to be run with `size` MPI processes.
 ## - `RESOURCE_LOCAL_CORES` the number of threads used by a test, default is
-##    $GINKGO_TEST_OMP_PARALLELISM
+##    $GINKGO_CI_TEST_OMP_PARALLELISM
 ## - `RESOURCE_PERCENTAGE` usage percentage of a single GPU, default is 25
 ## - `RESOURCE_TYPE` the resource type, can be ref, cpu, cudagpu, hipgpu, sycl
 ## - `DISABLE_EXECUTORS exec1 exec2` disables the test for certain backends (if built for multiple)
diff --git a/core/test/CMakeLists.txt b/core/test/CMakeLists.txt
index 776d0b72c7d..69f7ddd749e 100644
--- a/core/test/CMakeLists.txt
+++ b/core/test/CMakeLists.txt
@@ -1,7 +1,6 @@
 include(${PROJECT_SOURCE_DIR}/cmake/create_test.cmake)
 
 add_subdirectory(gtest)
-
 add_subdirectory(accessor)
 add_subdirectory(base)
 add_subdirectory(components)
diff --git a/core/test/gtest/resources.cpp b/core/test/gtest/resources.cpp
index dc8ad7931a9..0dd427b75ee 100644
--- a/core/test/gtest/resources.cpp
+++ b/core/test/gtest/resources.cpp
@@ -138,7 +138,7 @@ ResourceEnvironment::ResourceEnvironment(int rank, int size)
         hip_device_id = resource.id;
     }
     // SYCL GPUs (no other devices!)
-    if (auto rs_sycl_env = get_ctest_group("syclgpu", rank)) {
+    if (auto rs_sycl_env = get_ctest_group("sycl", rank)) {
         auto resource = parse_ctest_resources(rs_sycl_env);
         sycl_device_id = resource.id;
     }
diff --git a/cuda/test/base/CMakeLists.txt b/cuda/test/base/CMakeLists.txt
index bb99ba858a4..174f4533c52 100644
--- a/cuda/test/base/CMakeLists.txt
+++ b/cuda/test/base/CMakeLists.txt
@@ -4,7 +4,7 @@ ginkgo_create_cuda_test(index_set)
 if(GINKGO_HAVE_HWLOC)
     find_package(NUMA REQUIRED)
     ginkgo_create_cuda_test(cuda_executor_topology ADDITIONAL_LIBRARIES NUMA::NUMA)
-endif ()
+endif()
 ginkgo_create_cuda_test(exception_helpers)
 ginkgo_create_cuda_test(kernel_launch)
 ginkgo_create_cuda_test(lin_op)
diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt
index 7f46feff5da..47259feeac0 100644
--- a/omp/CMakeLists.txt
+++ b/omp/CMakeLists.txt
@@ -5,7 +5,7 @@ target_sources(ginkgo_omp
     PRIVATE
     base/batch_multi_vector_kernels.cpp
     base/device_matrix_data_kernels.cpp
-               base/executor.cpp
+    base/executor.cpp
     base/index_set_kernels.cpp
     base/scoped_device_id.cpp
     base/version.cpp

From 7b7b867be81e528938a638f2a2c2835defd7ee76 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Wed, 13 Sep 2023 10:38:15 +0200
Subject: [PATCH 292/583] use slots instead of percentages for GPUs

---
 cmake/create_test.cmake                | 17 ++---------------
 test/tools/resource_file_generator.cpp |  2 +-
 2 files changed, 3 insertions(+), 16 deletions(-)

diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
index 50271e12c9c..375135dcb13 100644
--- a/cmake/create_test.cmake
+++ b/cmake/create_test.cmake
@@ -1,4 +1,4 @@
-set(gko_test_resource_args "RESOURCE_LOCAL_CORES;RESOURCE_PERCENTAGE;RESOURCE_TYPE")
+set(gko_test_resource_args "RESOURCE_LOCAL_CORES;RESOURCE_TYPE")
 set(gko_test_single_args "MPI_SIZE;${gko_test_resource_args}")
 set(gko_test_multi_args "DISABLE_EXECUTORS;ADDITIONAL_LIBRARIES;ADDITIONAL_INCLUDES")
 set(gko_test_option_args "NO_RESOURCES")
@@ -60,19 +60,7 @@ function(ginkgo_add_resource_requirement test_name)
 
         set(single_resource "cpu:${add_rr_RESOURCE_LOCAL_CORES}")
     elseif(add_rr_RESOURCE_TYPE MATCHES "^(cudagpu|hipgpu|sycl)$")
-        if(NOT add_rr_RESOURCE_PERCENTAGE)
-            set(add_rr_RESOURCE_PERCENTAGE 25)
-        endif()
-        if(add_rr_MPI_SIZE GREATER 1)
-            set(add_rr_RESOURCE_PERCENTAGE 100)
-        endif()
-        if(NOT add_rr_RESOURCE_PERCENTAGE MATCHES "^[0-9]([0-9][0-9]?)?"
-           OR add_rr_RESOURCE_PERCENTAGE LESS 0
-           OR add_rr_RESOURCE_PERCENTAGE GREATER 100)
-            message(FATAL_ERROR "Resource specification is invalid: RESOURCE_PERCENTAGE=${add_rr_RESOURCE_PERCENTAGE}")
-        endif()
-
-        set(single_resource "${add_rr_RESOURCE_TYPE}:${add_rr_RESOURCE_PERCENTAGE}")
+        set(single_resource "${add_rr_RESOURCE_TYPE}:1")
     else()
         message(FATAL_ERROR "Unrecognized resource type ${add_rr_RESOURCE_TYPE}, allowed are: ref, cpu, cudagpu, hipgpu, sycl.")
     endif()
@@ -91,7 +79,6 @@ endfunction()
 ## - `MPI_SIZE size` causes the tests to be run with `size` MPI processes.
 ## - `RESOURCE_LOCAL_CORES` the number of threads used by a test, default is
 ##    $GINKGO_CI_TEST_OMP_PARALLELISM
-## - `RESOURCE_PERCENTAGE` usage percentage of a single GPU, default is 25
 ## - `RESOURCE_TYPE` the resource type, can be ref, cpu, cudagpu, hipgpu, sycl
 ## - `DISABLE_EXECUTORS exec1 exec2` disables the test for certain backends (if built for multiple)
 ## - `ADDITIONAL_LIBRARIES lib1 lib2` adds additional target link dependencies
diff --git a/test/tools/resource_file_generator.cpp b/test/tools/resource_file_generator.cpp
index f0dbbea0353..ca7b09288e8 100644
--- a/test/tools/resource_file_generator.cpp
+++ b/test/tools/resource_file_generator.cpp
@@ -92,7 +92,7 @@ int main()
             if (i > 0) {
                 gpus.append(",\n");
             }
-            gpus += R"(  {"id": ")" + std::to_string(i) + R"(", "slots": 100})";
+            gpus += R"(  {"id": ")" + std::to_string(i) + R"(", "slots": 1})";
         }
         if (num_devices) {
             gpus.append("\n]");

From 927b443d3a06b9bc7e914caee27370cb15c38a90 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Wed, 13 Sep 2023 11:01:37 +0200
Subject: [PATCH 293/583] use non-default stream only if necessary

---
 test/utils/executor.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/utils/executor.hpp b/test/utils/executor.hpp
index e4ce56f3d7a..ea2aef157fd 100644
--- a/test/utils/executor.hpp
+++ b/test/utils/executor.hpp
@@ -127,10 +127,10 @@ class CommonTestFixture : public ::testing::Test {
 
     CommonTestFixture()
         :
-#ifdef GKO_COMPILING_CUDA
+#if defined(GKO_TEST_NONDEFAULT_STREAM) && defined(GKO_COMPILING_CUDA)
           stream(ResourceEnvironment::cuda_device_id),
 #endif
-#ifdef GKO_COMPILING_HIP
+#if defined(GKO_TEST_NONDEFAULT_STREAM) && defined(GKO_COMPILING_HIP)
           stream(ResourceEnvironment::hip_device_id),
 #endif
           ref{gko::ReferenceExecutor::create()}

From 4f84679127d94408001513a2df544595d2d5bd54 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Wed, 13 Sep 2023 11:01:50 +0200
Subject: [PATCH 294/583] fix CUDA trisolve regression

---
 cuda/solver/common_trs_kernels.cuh | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/cuda/solver/common_trs_kernels.cuh b/cuda/solver/common_trs_kernels.cuh
index f42b11f510d..6ee2c7521ff 100644
--- a/cuda/solver/common_trs_kernels.cuh
+++ b/cuda/solver/common_trs_kernels.cuh
@@ -120,12 +120,13 @@ struct CudaSolveStruct : gko::solver::SolveStruct {
         const auto rows = matrix->get_size()[0];
         // workaround suggested by NVIDIA engineers: for some reason
         // cusparse needs non-nullptr input vectors even for analysis
+        // also make sure they are aligned by 16 bytes
         auto descr_b = cusparse::create_dnmat(
             dim<2>{matrix->get_size()[0], num_rhs}, matrix->get_size()[1],
-            reinterpret_cast<ValueType*>(0xDEAD));
+            reinterpret_cast<ValueType*>(0xDEAD0));
         auto descr_c = cusparse::create_dnmat(
             dim<2>{matrix->get_size()[0], num_rhs}, matrix->get_size()[1],
-            reinterpret_cast<ValueType*>(0xDEAF));
+            reinterpret_cast<ValueType*>(0xDEAF0));
 
         auto work_size = cusparse::spsm_buffer_size(
             handle, CUSPARSE_OPERATION_NON_TRANSPOSE,

From f0106bce310ff9a393a6e5561fd5d6b100fec01c Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 14 Sep 2023 18:13:49 +0200
Subject: [PATCH 295/583] fix nondefault stream handling

---
 cuda/test/utils.hpp    | 2 ++
 hip/test/utils.hip.hpp | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/cuda/test/utils.hpp b/cuda/test/utils.hpp
index 6ef808aa1b3..d13e364d66a 100644
--- a/cuda/test/utils.hpp
+++ b/cuda/test/utils.hpp
@@ -52,7 +52,9 @@ class CudaTestFixture : public ::testing::Test {
 protected:
     CudaTestFixture()
         : ref(gko::ReferenceExecutor::create()),
+#ifdef GKO_TEST_NONDEFAULT_STREAM
           stream(ResourceEnvironment::cuda_device_id),
+#endif
           exec(gko::CudaExecutor::create(
               ResourceEnvironment::cuda_device_id, ref,
               std::make_shared<gko::CudaAllocator>(), stream.get())),
diff --git a/hip/test/utils.hip.hpp b/hip/test/utils.hip.hpp
index e1c9f9341fb..9fc3edc3f82 100644
--- a/hip/test/utils.hip.hpp
+++ b/hip/test/utils.hip.hpp
@@ -52,7 +52,9 @@ class HipTestFixture : public ::testing::Test {
 protected:
     HipTestFixture()
         : ref(gko::ReferenceExecutor::create()),
+#ifdef GKO_TEST_NONDEFAULT_STREAM
           stream(ResourceEnvironment::hip_device_id),
+#endif
           exec(gko::HipExecutor::create(ResourceEnvironment::hip_device_id, ref,
                                         std::make_shared<gko::HipAllocator>(),
                                         stream.get())),

From 22be0ce76aad55c9374d53da2af766077ec53dd2 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 14 Sep 2023 18:14:24 +0200
Subject: [PATCH 296/583] remove `tests` target

---
 CMakeLists.txt          | 4 ----
 cmake/create_test.cmake | 2 --
 2 files changed, 6 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e07023bc46b..8ac16267717 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -309,10 +309,6 @@ configure_file(${Ginkgo_SOURCE_DIR}/include/ginkgo/config.hpp.in
 # propagated to the other parts of Ginkgo in case of building as static libraries
 add_subdirectory(devices)        # Basic device functionalities. Always compiled.
 add_subdirectory(common)         # Import list of unified kernel source files
-if(GINKGO_BUILD_TESTS)
-    # use custom target `tests` to build only test binaries
-    add_custom_target(tests)
-endif()
 if(GINKGO_BUILD_CUDA)
     add_subdirectory(cuda)       # High-performance kernels for NVIDIA GPUs
 endif()
diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
index 375135dcb13..4dd6bd12125 100644
--- a/cmake/create_test.cmake
+++ b/cmake/create_test.cmake
@@ -100,8 +100,6 @@ function(ginkgo_add_test test_name test_target_name)
                  COMMAND ${test_target_name}
                  WORKING_DIRECTORY "$<TARGET_FILE_DIR:ginkgo>")
     endif()
-    # use custom target `tests` to build only test binaries
-    add_dependencies(tests ${test_target_name})
 
     ginkgo_add_resource_requirement(${REL_BINARY_DIR}/${test_name} ${ARGN})
 

From 556f6387b50684c63c15541a41766e37db804e1e Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 14 Sep 2023 18:15:19 +0200
Subject: [PATCH 297/583] remove `ref` resource type

This can be handled by the `-j` flag
---
 cmake/create_test.cmake | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
index 4dd6bd12125..e66bfb3178c 100644
--- a/cmake/create_test.cmake
+++ b/cmake/create_test.cmake
@@ -40,17 +40,11 @@ endfunction()
 
 function(ginkgo_add_resource_requirement test_name)
     cmake_parse_arguments(PARSE_ARGV 1 add_rr "${gko_test_option_args}" "${gko_test_single_args}" "")
-    if(add_rr_NO_RESOURCES)
+    if(add_rr_NO_RESOURCES OR (NOT add_rr_RESOURCE_TYPE))
         return()
-    endif()
-
-    if (NOT add_rr_RESOURCE_TYPE)
-        message(FATAL_ERROR "Need to provide resource type used by test.")
     endif ()
 
-    if(add_rr_RESOURCE_TYPE STREQUAL "ref")
-        set(single_resource "cpu:1")
-    elseif(add_rr_RESOURCE_TYPE STREQUAL "cpu")
+    if(add_rr_RESOURCE_TYPE STREQUAL "cpu")
         if(NOT add_rr_RESOURCE_LOCAL_CORES)
             set(add_rr_RESOURCE_LOCAL_CORES ${GINKGO_CI_TEST_OMP_PARALLELISM})
         endif()
@@ -79,7 +73,7 @@ endfunction()
 ## - `MPI_SIZE size` causes the tests to be run with `size` MPI processes.
 ## - `RESOURCE_LOCAL_CORES` the number of threads used by a test, default is
 ##    $GINKGO_CI_TEST_OMP_PARALLELISM
-## - `RESOURCE_TYPE` the resource type, can be ref, cpu, cudagpu, hipgpu, sycl
+## - `RESOURCE_TYPE` the resource type, can be cpu, cudagpu, hipgpu, sycl
 ## - `DISABLE_EXECUTORS exec1 exec2` disables the test for certain backends (if built for multiple)
 ## - `ADDITIONAL_LIBRARIES lib1 lib2` adds additional target link dependencies
 ## - `ADDITIONAL_INCLUDES path1 path2` adds additional target include paths
@@ -121,7 +115,7 @@ function(ginkgo_create_test test_name)
     add_executable(${test_target_name} ${test_name}.cpp)
     target_link_libraries(${test_target_name})
     ginkgo_set_test_target_properties(${test_target_name} "" ${ARGN})
-    ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} RESOURCE_TYPE ref)
+    ginkgo_add_test(${test_name} ${test_target_name} ${ARGN})
 endfunction(ginkgo_create_test)
 
 ## Test compiled with dpcpp
@@ -263,7 +257,7 @@ function(ginkgo_create_common_test_internal test_name exec_type exec)
         return()
     endif()
     if (exec STREQUAL reference)
-        set(test_resource_type ref)
+        set(test_resource_type "")
     elseif (exec STREQUAL omp)
         set(test_resource_type cpu)
     elseif (exec STREQUAL cuda)

From dc0c474fe0f0c336a63e209210f0a1fa794f3371 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 14 Sep 2023 18:15:55 +0200
Subject: [PATCH 298/583] move more tests to host compiler

---
 cuda/test/base/CMakeLists.txt                          | 10 +++++-----
 cuda/test/base/{array.cu => array.cpp}                 |  0
 cuda/test/base/{index_set.cu => index_set.cpp}         |  0
 cuda/test/base/{lin_op.cu => lin_op.cpp}               |  0
 cuda/test/base/{memory.cu => memory.cpp}               |  0
 hip/test/base/CMakeLists.txt                           |  8 ++++----
 hip/test/base/{lin_op.hip.cpp => lin_op.cpp}           |  0
 hip/test/base/{memory.hip.cpp => memory.cpp}           |  0
 hip/test/matrix/CMakeLists.txt                         |  2 +-
 .../{fbcsr_kernels.hip.cpp => fbcsr_kernels.cpp}       |  0
 hip/test/solver/CMakeLists.txt                         |  4 ++--
 hip/test/utils/CMakeLists.txt                          |  2 +-
 .../{assertions_test.hip.cpp => assertions_test.cpp}   |  0
 13 files changed, 13 insertions(+), 13 deletions(-)
 rename cuda/test/base/{array.cu => array.cpp} (100%)
 rename cuda/test/base/{index_set.cu => index_set.cpp} (100%)
 rename cuda/test/base/{lin_op.cu => lin_op.cpp} (100%)
 rename cuda/test/base/{memory.cu => memory.cpp} (100%)
 rename hip/test/base/{lin_op.hip.cpp => lin_op.cpp} (100%)
 rename hip/test/base/{memory.hip.cpp => memory.cpp} (100%)
 rename hip/test/matrix/{fbcsr_kernels.hip.cpp => fbcsr_kernels.cpp} (100%)
 rename hip/test/utils/{assertions_test.hip.cpp => assertions_test.cpp} (100%)

diff --git a/cuda/test/base/CMakeLists.txt b/cuda/test/base/CMakeLists.txt
index 174f4533c52..d4260c6e934 100644
--- a/cuda/test/base/CMakeLists.txt
+++ b/cuda/test/base/CMakeLists.txt
@@ -1,13 +1,13 @@
-ginkgo_create_cuda_test(array)
+ginkgo_create_test(array RESOURCE_TYPE cudagpu)
 ginkgo_create_cuda_test(cuda_executor)
-ginkgo_create_cuda_test(index_set)
+ginkgo_create_test(index_set RESOURCE_TYPE cudagpu)
 if(GINKGO_HAVE_HWLOC)
     find_package(NUMA REQUIRED)
     ginkgo_create_cuda_test(cuda_executor_topology ADDITIONAL_LIBRARIES NUMA::NUMA)
 endif()
 ginkgo_create_cuda_test(exception_helpers)
 ginkgo_create_cuda_test(kernel_launch)
-ginkgo_create_cuda_test(lin_op)
+ginkgo_create_test(lin_op RESOURCE_TYPE cudagpu)
 ginkgo_create_cuda_test(math)
-ginkgo_create_cuda_test(memory)
-ginkgo_create_cuda_test(scoped_device_id NO_RESOURCES)
+ginkgo_create_test(memory RESOURCE_TYPE cudagpu)
+ginkgo_create_cuda_test(scoped_device_id)
diff --git a/cuda/test/base/array.cu b/cuda/test/base/array.cpp
similarity index 100%
rename from cuda/test/base/array.cu
rename to cuda/test/base/array.cpp
diff --git a/cuda/test/base/index_set.cu b/cuda/test/base/index_set.cpp
similarity index 100%
rename from cuda/test/base/index_set.cu
rename to cuda/test/base/index_set.cpp
diff --git a/cuda/test/base/lin_op.cu b/cuda/test/base/lin_op.cpp
similarity index 100%
rename from cuda/test/base/lin_op.cu
rename to cuda/test/base/lin_op.cpp
diff --git a/cuda/test/base/memory.cu b/cuda/test/base/memory.cpp
similarity index 100%
rename from cuda/test/base/memory.cu
rename to cuda/test/base/memory.cpp
diff --git a/hip/test/base/CMakeLists.txt b/hip/test/base/CMakeLists.txt
index ed32ab5b6a7..bfe8c8be96a 100644
--- a/hip/test/base/CMakeLists.txt
+++ b/hip/test/base/CMakeLists.txt
@@ -1,18 +1,18 @@
 ginkgo_create_hip_test(hip_executor)
-ginkgo_create_test(index_set)
+ginkgo_create_test(index_set RESOURCE_TYPE hipgpu)
 if(GINKGO_HAVE_HWLOC)
     find_package(NUMA REQUIRED)
     ginkgo_create_hip_test(hip_executor_topology ADDITIONAL_LIBRARIES NUMA::NUMA)
 endif()
 ginkgo_create_hip_test(kernel_launch)
 # correct flags for kernel_launch.hpp are set in GINKGO_HIPCC_OPTIONS
-ginkgo_create_hip_test(lin_op)
+ginkgo_create_test(lin_op RESOURCE_TYPE hipgpu)
 ginkgo_create_hip_test(math)
-ginkgo_create_hip_test(memory)
+ginkgo_create_test(memory RESOURCE_TYPE hipgpu)
 # Only hcc needs the libraries. nvcc only requires the headers.
 if (GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_AMD_REGEX}")
     ginkgo_create_hip_test(exception_helpers ADDITIONAL_LIBRARIES roc::hipblas roc::hipsparse hip::hiprand roc::rocrand)
 else()
     ginkgo_create_hip_test(exception_helpers)
 endif()
-ginkgo_create_hip_test(scoped_device_id NO_RESOURCES)
+ginkgo_create_hip_test(scoped_device_id)
diff --git a/hip/test/base/lin_op.hip.cpp b/hip/test/base/lin_op.cpp
similarity index 100%
rename from hip/test/base/lin_op.hip.cpp
rename to hip/test/base/lin_op.cpp
diff --git a/hip/test/base/memory.hip.cpp b/hip/test/base/memory.cpp
similarity index 100%
rename from hip/test/base/memory.hip.cpp
rename to hip/test/base/memory.cpp
diff --git a/hip/test/matrix/CMakeLists.txt b/hip/test/matrix/CMakeLists.txt
index 82db4b8b376..a52069daea0 100644
--- a/hip/test/matrix/CMakeLists.txt
+++ b/hip/test/matrix/CMakeLists.txt
@@ -1,4 +1,4 @@
-ginkgo_create_hip_test(fbcsr_kernels)
+ginkgo_create_test(fbcsr_kernels RESOURCE_TYPE hipgpu)
 if (hipfft_FOUND)
     ginkgo_create_hip_test(fft_kernels)
 endif()
diff --git a/hip/test/matrix/fbcsr_kernels.hip.cpp b/hip/test/matrix/fbcsr_kernels.cpp
similarity index 100%
rename from hip/test/matrix/fbcsr_kernels.hip.cpp
rename to hip/test/matrix/fbcsr_kernels.cpp
diff --git a/hip/test/solver/CMakeLists.txt b/hip/test/solver/CMakeLists.txt
index a3b86589410..fcbb3de0c47 100644
--- a/hip/test/solver/CMakeLists.txt
+++ b/hip/test/solver/CMakeLists.txt
@@ -1,2 +1,2 @@
-ginkgo_create_test(lower_trs_kernels)
-ginkgo_create_test(upper_trs_kernels)
+ginkgo_create_test(lower_trs_kernels RESOURCE_TYPE hipgpu)
+ginkgo_create_test(upper_trs_kernels RESOURCE_TYPE hipgpu)
diff --git a/hip/test/utils/CMakeLists.txt b/hip/test/utils/CMakeLists.txt
index a6c52f65d9c..d9ec2ff29a7 100644
--- a/hip/test/utils/CMakeLists.txt
+++ b/hip/test/utils/CMakeLists.txt
@@ -1 +1 @@
-ginkgo_create_hip_test(assertions_test)
+ginkgo_create_test(assertions_test RESOURCE_TYPE hipgpu)
diff --git a/hip/test/utils/assertions_test.hip.cpp b/hip/test/utils/assertions_test.cpp
similarity index 100%
rename from hip/test/utils/assertions_test.hip.cpp
rename to hip/test/utils/assertions_test.cpp

From 346d6230a5e2e305c8cfea4b6c51a2b7f07b460c Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Fri, 15 Sep 2023 18:03:07 +0200
Subject: [PATCH 299/583] Update cmake/create_test.cmake

Co-authored-by: Yu-Hsiang M. Tsai <19565938+yhmtsai@users.noreply.github.com>
---
 cmake/create_test.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
index e66bfb3178c..1911276f61d 100644
--- a/cmake/create_test.cmake
+++ b/cmake/create_test.cmake
@@ -56,7 +56,7 @@ function(ginkgo_add_resource_requirement test_name)
     elseif(add_rr_RESOURCE_TYPE MATCHES "^(cudagpu|hipgpu|sycl)$")
         set(single_resource "${add_rr_RESOURCE_TYPE}:1")
     else()
-        message(FATAL_ERROR "Unrecognized resource type ${add_rr_RESOURCE_TYPE}, allowed are: ref, cpu, cudagpu, hipgpu, sycl.")
+        message(FATAL_ERROR "Unrecognized resource type ${add_rr_RESOURCE_TYPE}, allowed are: cpu, cudagpu, hipgpu, sycl.")
     endif()
 
     if(NOT add_rr_MPI_SIZE)

From 78485c2bd085dbb680c3f907bf18bdd81423e847 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 21 Sep 2023 11:34:23 +0200
Subject: [PATCH 300/583] review updates

- make more tests host-compiled
- make GTest main library suffix more descriptive
- more consistent formatting
---
 cmake/create_test.cmake                               | 2 +-
 core/test/gtest/CMakeLists.txt                        | 2 ++
 core/test/gtest/ginkgo_mpi_main.cpp                   | 2 +-
 cuda/test/reorder/CMakeLists.txt                      | 2 +-
 cuda/test/reorder/{rcm_kernels.cu => rcm_kernels.cpp} | 0
 omp/test/reorder/CMakeLists.txt                       | 2 +-
 6 files changed, 6 insertions(+), 4 deletions(-)
 rename cuda/test/reorder/{rcm_kernels.cu => rcm_kernels.cpp} (100%)

diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
index 1911276f61d..cec47fced74 100644
--- a/cmake/create_test.cmake
+++ b/cmake/create_test.cmake
@@ -114,7 +114,7 @@ function(ginkgo_create_test test_name)
     ginkgo_build_test_name(${test_name} test_target_name)
     add_executable(${test_target_name} ${test_name}.cpp)
     target_link_libraries(${test_target_name})
-    ginkgo_set_test_target_properties(${test_target_name} "" ${ARGN})
+    ginkgo_set_test_target_properties(${test_target_name} "_cpu" ${ARGN})
     ginkgo_add_test(${test_name} ${test_target_name} ${ARGN})
 endfunction(ginkgo_create_test)
 
diff --git a/core/test/gtest/CMakeLists.txt b/core/test/gtest/CMakeLists.txt
index c4e9cb52870..6d77b663e84 100644
--- a/core/test/gtest/CMakeLists.txt
+++ b/core/test/gtest/CMakeLists.txt
@@ -13,8 +13,10 @@ endfunction()
 
 add_gtest_main("" "")
 add_library(ginkgo_gtest_main_reference ALIAS ginkgo_gtest_main)
+add_library(ginkgo_gtest_main_cpu ALIAS ginkgo_gtest_main)
 if (GINKGO_BUILD_MPI)
     add_library(ginkgo_gtest_main_mpi_reference ALIAS ginkgo_gtest_main_mpi)
+    add_library(ginkgo_gtest_main_mpi_cpu ALIAS ginkgo_gtest_main_mpi)
 endif()
 if (GINKGO_BUILD_OMP)
     add_gtest_main("_omp" "GKO_COMPILING_OMP")
diff --git a/core/test/gtest/ginkgo_mpi_main.cpp b/core/test/gtest/ginkgo_mpi_main.cpp
index 12107ca55f8..6853a12c940 100644
--- a/core/test/gtest/ginkgo_mpi_main.cpp
+++ b/core/test/gtest/ginkgo_mpi_main.cpp
@@ -395,7 +395,7 @@ int main(int argc, char** argv)
     MPI_Comm_rank(comm, &rank);
     MPI_Comm_size(comm, &size);
 
-    testing::AddGlobalTestEnvironment(new GTestMPIListener::MPIEnvironment);
+    ::testing::AddGlobalTestEnvironment(new GTestMPIListener::MPIEnvironment);
     ::testing::AddGlobalTestEnvironment(new ResourceEnvironment(rank, size));
     ::testing::AddGlobalTestEnvironment(new DeviceEnvironment(rank));
     MPI_Barrier(comm);
diff --git a/cuda/test/reorder/CMakeLists.txt b/cuda/test/reorder/CMakeLists.txt
index e6cd8c0f5d2..79deba957b3 100644
--- a/cuda/test/reorder/CMakeLists.txt
+++ b/cuda/test/reorder/CMakeLists.txt
@@ -1 +1 @@
-ginkgo_create_cuda_test(rcm_kernels)
+ginkgo_create_test(rcm_kernels RESOURCE_TYPE cudagpu)
diff --git a/cuda/test/reorder/rcm_kernels.cu b/cuda/test/reorder/rcm_kernels.cpp
similarity index 100%
rename from cuda/test/reorder/rcm_kernels.cu
rename to cuda/test/reorder/rcm_kernels.cpp
diff --git a/omp/test/reorder/CMakeLists.txt b/omp/test/reorder/CMakeLists.txt
index 089e51c67c9..65aea4a0fdb 100644
--- a/omp/test/reorder/CMakeLists.txt
+++ b/omp/test/reorder/CMakeLists.txt
@@ -1 +1 @@
-ginkgo_create_omp_test(rcm_kernels)
+ginkgo_create_test(rcm_kernels RESOURCE_TYPE cpu)

From 963a19752cbe5f209b084557ab43f18d81465041 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Sat, 23 Sep 2023 17:34:53 +0200
Subject: [PATCH 301/583] fix unused-argument warning for unsafe-atomic flag

---
 cmake/hip.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/hip.cmake b/cmake/hip.cmake
index e1897b42c9c..72a7a3a86d8 100644
--- a/cmake/hip.cmake
+++ b/cmake/hip.cmake
@@ -207,7 +207,7 @@ set(GINKGO_HIPCC_OPTIONS ${GINKGO_HIP_COMPILER_FLAGS} "-std=c++14 -DGKO_COMPILIN
 set(GINKGO_HIP_NVCC_OPTIONS ${GINKGO_HIP_NVCC_COMPILER_FLAGS} ${GINKGO_HIP_NVCC_ARCH} ${GINKGO_HIP_NVCC_ADDITIONAL_FLAGS})
 set(GINKGO_HIP_CLANG_OPTIONS ${GINKGO_HIP_CLANG_COMPILER_FLAGS} ${GINKGO_AMD_ARCH_FLAGS})
 if(GINKGO_HIP_AMD_UNSAFE_ATOMIC AND HIP_VERSION VERSION_GREATER_EQUAL 5)
-    list(APPEND GINKGO_HIP_CLANG_OPTIONS -munsafe-fp-atomics)
+    list(APPEND GINKGO_HIP_CLANG_OPTIONS "-munsafe-fp-atomics -Wno-unused-command-line-argument")
 endif()
 # HIP's cmake support secretly carries around global state to remember
 # whether we created any shared libraries, and sets PIC flags accordingly.

From 985a836153949957580d9a9ffdbfec8779fe0c47 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Sun, 24 Sep 2023 21:53:59 +0200
Subject: [PATCH 302/583] use non-deprecated headers for HIP libs

---
 benchmark/utils/hip_linops.hip.cpp        | 3 ---
 hip/base/exception.hip.cpp                | 6 ++++++
 hip/base/hipblas_bindings.hip.hpp         | 5 +++++
 hip/base/hiprand_bindings.hip.hpp         | 5 +++++
 hip/base/hipsparse_bindings.hip.hpp       | 5 +++++
 hip/base/hipsparse_block_bindings.hip.hpp | 5 +++++
 hip/base/pointer_mode_guard.hip.hpp       | 5 +++++
 hip/base/roctx.hip.cpp                    | 4 ++++
 hip/base/types.hip.hpp                    | 4 ++++
 hip/matrix/fft_kernels.hip.cpp            | 5 +++++
 hip/solver/common_trs_kernels.hip.hpp     | 4 ++++
 hip/solver/lower_trs_kernels.hip.cpp      | 4 ++++
 hip/solver/upper_trs_kernels.hip.cpp      | 4 ++++
 hip/test/base/exception_helpers.hip.cpp   | 6 ++++++
 hip/test/matrix/fft_kernels.hip.cpp       | 5 +++++
 15 files changed, 67 insertions(+), 3 deletions(-)

diff --git a/benchmark/utils/hip_linops.hip.cpp b/benchmark/utils/hip_linops.hip.cpp
index 627dfad980e..c8664778e02 100644
--- a/benchmark/utils/hip_linops.hip.cpp
+++ b/benchmark/utils/hip_linops.hip.cpp
@@ -36,9 +36,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <memory>
 
 
-#include <hipsparse.h>
-
-
 #include "benchmark/utils/sparselib_linops.hpp"
 #include "benchmark/utils/types.hpp"
 #include "hip/base/hipsparse_bindings.hip.hpp"
diff --git a/hip/base/exception.hip.cpp b/hip/base/exception.hip.cpp
index 19a2b3739ac..7a182963f74 100644
--- a/hip/base/exception.hip.cpp
+++ b/hip/base/exception.hip.cpp
@@ -37,9 +37,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <hip/hip_runtime.h>
+#if HIP_VERSION >= 50200000
+#include <hipblas/hipblas.h>
+#include <hiprand/hiprand.h>
+#include <hipsparse/hipsparse.h>
+#else
 #include <hipblas.h>
 #include <hiprand.h>
 #include <hipsparse.h>
+#endif
 
 
 #include <ginkgo/core/base/types.hpp>
diff --git a/hip/base/hipblas_bindings.hip.hpp b/hip/base/hipblas_bindings.hip.hpp
index 2ff73c81e34..63751aa725a 100644
--- a/hip/base/hipblas_bindings.hip.hpp
+++ b/hip/base/hipblas_bindings.hip.hpp
@@ -34,7 +34,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define GKO_HIP_BASE_HIPBLAS_BINDINGS_HIP_HPP_
 
 
+#include <hip/hip_runtime.h>
+#if HIP_VERSION >= 50200000
+#include <hipblas/hipblas.h>
+#else
 #include <hipblas.h>
+#endif
 
 
 #include <ginkgo/core/base/exception_helpers.hpp>
diff --git a/hip/base/hiprand_bindings.hip.hpp b/hip/base/hiprand_bindings.hip.hpp
index 900433af339..14e144f6d84 100644
--- a/hip/base/hiprand_bindings.hip.hpp
+++ b/hip/base/hiprand_bindings.hip.hpp
@@ -34,7 +34,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define GKO_HIP_BASE_HIPRAND_BINDINGS_HIP_HPP_
 
 
+#include <hip/hip_runtime.h>
+#if HIP_VERSION >= 50200000
+#include <hiprand/hiprand.h>
+#else
 #include <hiprand.h>
+#endif
 
 
 #include <ginkgo/core/base/exception_helpers.hpp>
diff --git a/hip/base/hipsparse_bindings.hip.hpp b/hip/base/hipsparse_bindings.hip.hpp
index 90378d3c711..322467dc2b3 100644
--- a/hip/base/hipsparse_bindings.hip.hpp
+++ b/hip/base/hipsparse_bindings.hip.hpp
@@ -34,7 +34,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define GKO_HIP_BASE_HIPSPARSE_BINDINGS_HIP_HPP_
 
 
+#include <hip/hip_runtime.h>
+#if HIP_VERSION >= 50200000
+#include <hipsparse/hipsparse.h>
+#else
 #include <hipsparse.h>
+#endif
 
 
 #include <ginkgo/core/base/exception_helpers.hpp>
diff --git a/hip/base/hipsparse_block_bindings.hip.hpp b/hip/base/hipsparse_block_bindings.hip.hpp
index bc6c28394eb..49ef1e86c7d 100644
--- a/hip/base/hipsparse_block_bindings.hip.hpp
+++ b/hip/base/hipsparse_block_bindings.hip.hpp
@@ -34,7 +34,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define GKO_HIP_BASE_HIPSPARSE_BLOCK_BINDINGS_HIP_HPP_
 
 
+#include <hip/hip_runtime.h>
+#if HIP_VERSION >= 50200000
+#include <hipsparse/hipsparse.h>
+#else
 #include <hipsparse.h>
+#endif
 
 
 #include <ginkgo/core/base/exception_helpers.hpp>
diff --git a/hip/base/pointer_mode_guard.hip.hpp b/hip/base/pointer_mode_guard.hip.hpp
index 681839ec9e2..11fa5afeb9e 100644
--- a/hip/base/pointer_mode_guard.hip.hpp
+++ b/hip/base/pointer_mode_guard.hip.hpp
@@ -38,8 +38,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <hip/hip_runtime.h>
+#if HIP_VERSION >= 50200000
+#include <hipblas/hipblas.h>
+#include <hipsparse/hipsparse.h>
+#else
 #include <hipblas.h>
 #include <hipsparse.h>
+#endif
 
 
 #include <ginkgo/core/base/exception_helpers.hpp>
diff --git a/hip/base/roctx.hip.cpp b/hip/base/roctx.hip.cpp
index 23b07e60254..6e2d93b3a06 100644
--- a/hip/base/roctx.hip.cpp
+++ b/hip/base/roctx.hip.cpp
@@ -37,8 +37,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #if GINKGO_HIP_PLATFORM_HCC && GKO_HAVE_ROCTX
+#if HIP_VERSION >= 50200000
+#include <roctracer/roctx.h>
+#else
 #include <roctx.h>
 #endif
+#endif
 
 
 #include <ginkgo/core/base/exception_helpers.hpp>
diff --git a/hip/base/types.hip.hpp b/hip/base/types.hip.hpp
index 93ae3646a4c..c886378ec80 100644
--- a/hip/base/types.hip.hpp
+++ b/hip/base/types.hip.hpp
@@ -43,7 +43,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <hip/hip_complex.h>
 #include <hip/hip_fp16.h>
 #include <hip/hip_runtime.h>
+#if HIP_VERSION >= 50200000
+#include <hipblas/hipblas.h>
+#else
 #include <hipblas.h>
+#endif
 #include <thrust/complex.h>
 
 
diff --git a/hip/matrix/fft_kernels.hip.cpp b/hip/matrix/fft_kernels.hip.cpp
index 238aeddc40f..56c967d9e49 100644
--- a/hip/matrix/fft_kernels.hip.cpp
+++ b/hip/matrix/fft_kernels.hip.cpp
@@ -36,7 +36,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <array>
 
 
+#include <hip/hip_runtime.h>
+#if HIP_VERSION >= 50200000
+#include <hipfft/hipfft.h>
+#else
 #include <hipfft.h>
+#endif
 
 
 #include <ginkgo/core/base/exception_helpers.hpp>
diff --git a/hip/solver/common_trs_kernels.hip.hpp b/hip/solver/common_trs_kernels.hip.hpp
index 643c875561e..6cf2ca516f2 100644
--- a/hip/solver/common_trs_kernels.hip.hpp
+++ b/hip/solver/common_trs_kernels.hip.hpp
@@ -39,7 +39,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <hip/hip_runtime.h>
+#if HIP_VERSION >= 50200000
+#include <hipsparse/hipsparse.h>
+#else
 #include <hipsparse.h>
+#endif
 
 
 #include <ginkgo/core/base/exception_helpers.hpp>
diff --git a/hip/solver/lower_trs_kernels.hip.cpp b/hip/solver/lower_trs_kernels.hip.cpp
index 2e9dd0d0ce3..283f5ee5284 100644
--- a/hip/solver/lower_trs_kernels.hip.cpp
+++ b/hip/solver/lower_trs_kernels.hip.cpp
@@ -37,7 +37,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <hip/hip_runtime.h>
+#if HIP_VERSION >= 50200000
+#include <hipsparse/hipsparse.h>
+#else
 #include <hipsparse.h>
+#endif
 
 
 #include <ginkgo/core/base/exception_helpers.hpp>
diff --git a/hip/solver/upper_trs_kernels.hip.cpp b/hip/solver/upper_trs_kernels.hip.cpp
index a3c6070614c..09e71826130 100644
--- a/hip/solver/upper_trs_kernels.hip.cpp
+++ b/hip/solver/upper_trs_kernels.hip.cpp
@@ -37,7 +37,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <hip/hip_runtime.h>
+#if HIP_VERSION >= 50200000
+#include <hipsparse/hipsparse.h>
+#else
 #include <hipsparse.h>
+#endif
 
 
 #include <ginkgo/core/base/exception_helpers.hpp>
diff --git a/hip/test/base/exception_helpers.hip.cpp b/hip/test/base/exception_helpers.hip.cpp
index 29dea03961f..7014738bd76 100644
--- a/hip/test/base/exception_helpers.hip.cpp
+++ b/hip/test/base/exception_helpers.hip.cpp
@@ -34,9 +34,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <hip/hip_runtime.h>
+#if HIP_VERSION >= 50200000
+#include <hipblas/hipblas.h>
+#include <hiprand/hiprand.h>
+#include <hipsparse/hipsparse.h>
+#else
 #include <hipblas.h>
 #include <hiprand.h>
 #include <hipsparse.h>
+#endif
 
 
 #include <gtest/gtest.h>
diff --git a/hip/test/matrix/fft_kernels.hip.cpp b/hip/test/matrix/fft_kernels.hip.cpp
index 59c24492b5b..8c213df8ad5 100644
--- a/hip/test/matrix/fft_kernels.hip.cpp
+++ b/hip/test/matrix/fft_kernels.hip.cpp
@@ -33,7 +33,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/matrix/fft.hpp>
 
 
+#include <hip/hip_runtime.h>
+#if HIP_VERSION >= 50200000
+#include <hipfft/hipfft.h>
+#else
 #include <hipfft.h>
+#endif
 
 
 #include <gtest/gtest.h>

From 089c46f577717add5e121f5fd9ff4815204e72c7 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 28 Sep 2023 11:24:15 +0200
Subject: [PATCH 303/583] fix HIP Jacobi transposition compilation

---
 .../preconditioner/jacobi_kernels.hpp.inc     | 59 +++++++++----------
 1 file changed, 27 insertions(+), 32 deletions(-)

diff --git a/common/cuda_hip/preconditioner/jacobi_kernels.hpp.inc b/common/cuda_hip/preconditioner/jacobi_kernels.hpp.inc
index 8827a47620b..c63a644f87b 100644
--- a/common/cuda_hip/preconditioner/jacobi_kernels.hpp.inc
+++ b/common/cuda_hip/preconditioner/jacobi_kernels.hpp.inc
@@ -145,8 +145,8 @@ __global__ void agglomerate_supervariables_kernel(
 }
 
 
-template <bool conjugate, int max_block_size, int subwarp_size,
-          int warps_per_block, typename ValueType, typename IndexType>
+template <bool conjugate, int subwarp_size, int warps_per_block,
+          typename ValueType, typename IndexType>
 __global__ void __launch_bounds__(warps_per_block* config::warp_size)
     transpose_jacobi(const ValueType* __restrict__ blocks,
                      preconditioner::block_interleaved_storage_scheme<IndexType>
@@ -154,8 +154,7 @@ __global__ void __launch_bounds__(warps_per_block* config::warp_size)
                      const IndexType* __restrict__ block_ptrs,
                      size_type num_blocks, ValueType* __restrict__ out_blocks)
 {
-    const auto block_id =
-        thread::get_subwarp_id<subwarp_size, warps_per_block>();
+    const auto block_id = thread::get_subwarp_id_flat<subwarp_size>();
     const auto subwarp =
         group::tiled_partition<subwarp_size>(group::this_thread_block());
     if (block_id >= num_blocks) {
@@ -176,18 +175,16 @@ __global__ void __launch_bounds__(warps_per_block* config::warp_size)
 }
 
 
-template <bool conjugate, int max_block_size, int subwarp_size,
-          int warps_per_block, typename ValueType, typename IndexType>
-__global__ void
-__launch_bounds__(warps_per_block* config::warp_size) adaptive_transpose_jacobi(
+template <bool conjugate, int subwarp_size, int warps_per_block,
+          typename ValueType, typename IndexType>
+__global__ void adaptive_transpose_jacobi(
     const ValueType* __restrict__ blocks,
     preconditioner::block_interleaved_storage_scheme<IndexType> storage_scheme,
     const precision_reduction* __restrict__ block_precisions,
     const IndexType* __restrict__ block_ptrs, size_type num_blocks,
     ValueType* __restrict__ out_blocks)
 {
-    const auto block_id =
-        thread::get_subwarp_id<subwarp_size, warps_per_block>();
+    const auto block_id = thread::get_subwarp_id_flat<subwarp_size>();
     const auto subwarp =
         group::tiled_partition<subwarp_size>(group::this_thread_block());
     if (block_id >= num_blocks) {
@@ -197,23 +194,23 @@ __launch_bounds__(warps_per_block* config::warp_size) adaptive_transpose_jacobi(
 
     const auto block_stride = storage_scheme.get_stride();
     const auto rank = subwarp.thread_rank();
-    if (rank < block_size) {
-        GKO_PRECONDITIONER_JACOBI_RESOLVE_PRECISION(
-            ValueType, block_precisions[block_id],
-            auto local_block =
-                reinterpret_cast<const resolved_precision*>(
-                    blocks + storage_scheme.get_group_offset(block_id)) +
-                storage_scheme.get_block_offset(block_id);
-            auto local_out_block =
-                reinterpret_cast<resolved_precision*>(
-                    out_blocks + storage_scheme.get_group_offset(block_id)) +
-                storage_scheme.get_block_offset(block_id);
-            for (IndexType i = 0; i < block_size; ++i) {
-                auto val = local_block[i * block_stride + rank];
-                local_out_block[i + rank * block_stride] =
-                    conjugate ? conj(val) : val;
-            });
-    }
+    GKO_PRECONDITIONER_JACOBI_RESOLVE_PRECISION(
+        ValueType, block_precisions[block_id],
+        auto local_block =
+            reinterpret_cast<const resolved_precision*>(
+                blocks + storage_scheme.get_group_offset(block_id)) +
+            storage_scheme.get_block_offset(block_id);
+        auto local_out_block =
+            reinterpret_cast<resolved_precision*>(
+                out_blocks + storage_scheme.get_group_offset(block_id)) +
+            storage_scheme.get_block_offset(block_id);
+        for (int i = rank; i < block_size * block_size; i += subwarp_size) {
+            int row = i % block_size;
+            int col = i / block_size;
+            auto val = local_block[row + col * block_stride];
+            local_out_block[row * block_stride + col] =
+                conjugate ? conj(val) : val;
+        });
 }
 
 
@@ -313,18 +310,16 @@ void transpose_jacobi(
     constexpr int blocks_per_warp = config::warp_size / subwarp_size;
     const auto grid_size =
         ceildiv(num_blocks, warps_per_block * blocks_per_warp);
-    const dim3 block_size(subwarp_size, blocks_per_warp, warps_per_block);
+    const auto block_size = subwarp_size * blocks_per_warp * warps_per_block;
 
     if (grid_size > 0) {
         if (block_precisions) {
-            adaptive_transpose_jacobi<conjugate, max_block_size, subwarp_size,
-                                      warps_per_block>
+            adaptive_transpose_jacobi<conjugate, subwarp_size, warps_per_block>
                 <<<grid_size, block_size, 0, exec->get_stream()>>>(
                     as_device_type(blocks), storage_scheme, block_precisions,
                     block_pointers, num_blocks, as_device_type(out_blocks));
         } else {
-            transpose_jacobi<conjugate, max_block_size, subwarp_size,
-                             warps_per_block>
+            transpose_jacobi<conjugate, subwarp_size, warps_per_block>
                 <<<grid_size, block_size, 0, exec->get_stream()>>>(
                     as_device_type(blocks), storage_scheme, block_pointers,
                     num_blocks, as_device_type(out_blocks));

From 2afdc4525ff0d0b998f51edbbf4a1738a29fc3db Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 28 Sep 2023 11:35:35 +0200
Subject: [PATCH 304/583] avoid changing the thread indexing scheme

---
 .../preconditioner/jacobi_kernels.hpp.inc     | 25 +++++++++++--------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/common/cuda_hip/preconditioner/jacobi_kernels.hpp.inc b/common/cuda_hip/preconditioner/jacobi_kernels.hpp.inc
index c63a644f87b..2a0f7bd0dd7 100644
--- a/common/cuda_hip/preconditioner/jacobi_kernels.hpp.inc
+++ b/common/cuda_hip/preconditioner/jacobi_kernels.hpp.inc
@@ -145,8 +145,8 @@ __global__ void agglomerate_supervariables_kernel(
 }
 
 
-template <bool conjugate, int subwarp_size, int warps_per_block,
-          typename ValueType, typename IndexType>
+template <bool conjugate, int max_block_size, int subwarp_size,
+          int warps_per_block, typename ValueType, typename IndexType>
 __global__ void __launch_bounds__(warps_per_block* config::warp_size)
     transpose_jacobi(const ValueType* __restrict__ blocks,
                      preconditioner::block_interleaved_storage_scheme<IndexType>
@@ -154,7 +154,8 @@ __global__ void __launch_bounds__(warps_per_block* config::warp_size)
                      const IndexType* __restrict__ block_ptrs,
                      size_type num_blocks, ValueType* __restrict__ out_blocks)
 {
-    const auto block_id = thread::get_subwarp_id_flat<subwarp_size>();
+    const auto block_id =
+        thread::get_subwarp_id<subwarp_size, warps_per_block>();
     const auto subwarp =
         group::tiled_partition<subwarp_size>(group::this_thread_block());
     if (block_id >= num_blocks) {
@@ -175,16 +176,18 @@ __global__ void __launch_bounds__(warps_per_block* config::warp_size)
 }
 
 
-template <bool conjugate, int subwarp_size, int warps_per_block,
-          typename ValueType, typename IndexType>
-__global__ void adaptive_transpose_jacobi(
+template <bool conjugate, int max_block_size, int subwarp_size,
+          int warps_per_block, typename ValueType, typename IndexType>
+__global__ void
+__launch_bounds__(warps_per_block* config::warp_size) adaptive_transpose_jacobi(
     const ValueType* __restrict__ blocks,
     preconditioner::block_interleaved_storage_scheme<IndexType> storage_scheme,
     const precision_reduction* __restrict__ block_precisions,
     const IndexType* __restrict__ block_ptrs, size_type num_blocks,
     ValueType* __restrict__ out_blocks)
 {
-    const auto block_id = thread::get_subwarp_id_flat<subwarp_size>();
+    const auto block_id =
+        thread::get_subwarp_id<subwarp_size, warps_per_block>();
     const auto subwarp =
         group::tiled_partition<subwarp_size>(group::this_thread_block());
     if (block_id >= num_blocks) {
@@ -310,16 +313,18 @@ void transpose_jacobi(
     constexpr int blocks_per_warp = config::warp_size / subwarp_size;
     const auto grid_size =
         ceildiv(num_blocks, warps_per_block * blocks_per_warp);
-    const auto block_size = subwarp_size * blocks_per_warp * warps_per_block;
+    const dim3 block_size(subwarp_size, blocks_per_warp, warps_per_block);
 
     if (grid_size > 0) {
         if (block_precisions) {
-            adaptive_transpose_jacobi<conjugate, subwarp_size, warps_per_block>
+            adaptive_transpose_jacobi<conjugate, max_block_size, subwarp_size,
+                                      warps_per_block>
                 <<<grid_size, block_size, 0, exec->get_stream()>>>(
                     as_device_type(blocks), storage_scheme, block_precisions,
                     block_pointers, num_blocks, as_device_type(out_blocks));
         } else {
-            transpose_jacobi<conjugate, subwarp_size, warps_per_block>
+            transpose_jacobi<conjugate, max_block_size, subwarp_size,
+                             warps_per_block>
                 <<<grid_size, block_size, 0, exec->get_stream()>>>(
                     as_device_type(blocks), storage_scheme, block_pointers,
                     num_blocks, as_device_type(out_blocks));

From 6698a995327f508e853d22b31cbe2df195d7e781 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 28 Sep 2023 16:34:44 +0200
Subject: [PATCH 305/583] fix SLURM GPU allocation

---
 .gitlab-ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 94dedd030c6..6185608864f 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -295,7 +295,7 @@ test/cuda110/nompi/clang/cuda/release/static:
   variables:
     USE_NAME: "cuda110-nompi-clang-${CI_PIPELINE_ID}"
     SLURM_PARTITION: "accelerated"
-    SLURM_GRES: "gpu:1"
+    SLURM_GRES: "gpu:4"
     SLURM_TIME: "01:30:00"
   dependencies: null
   needs: [ "build/cuda110/nompi/clang/cuda/release/static" ]
@@ -329,7 +329,7 @@ test/cuda110/nompi/intel/cuda/debug/static:
   variables:
     USE_NAME: "cuda110-nompi-intel-${CI_PIPELINE_ID}"
     SLURM_PARTITION: "accelerated"
-    SLURM_GRES: "gpu:1"
+    SLURM_GRES: "gpu:4"
     SLURM_TIME: "02:00:00"
   dependencies: null
   needs: [ "build/cuda110/nompi/intel/cuda/debug/static" ]

From d610c526b09b7f71b2781cfc3f00bf657cc02589 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Wed, 4 Oct 2023 13:06:56 +0200
Subject: [PATCH 306/583] fix ell accessor type

---
 common/cuda_hip/matrix/ell_kernels.hpp.inc | 24 +++++++------
 cuda/matrix/ell_kernels.cu                 |  6 ++--
 dpcpp/matrix/ell_kernels.dp.cpp            | 40 +++++++++++++---------
 hip/matrix/ell_kernels.hip.cpp             |  6 ++--
 4 files changed, 45 insertions(+), 31 deletions(-)

diff --git a/common/cuda_hip/matrix/ell_kernels.hpp.inc b/common/cuda_hip/matrix/ell_kernels.hpp.inc
index 6c81fb4964c..e7bcac351cb 100644
--- a/common/cuda_hip/matrix/ell_kernels.hpp.inc
+++ b/common/cuda_hip/matrix/ell_kernels.hpp.inc
@@ -43,13 +43,14 @@ __device__ void spmv_kernel(
     acc::range<b_accessor> b, OutputValueType* __restrict__ c,
     const size_type c_stride, Closure op)
 {
+    using arithmetic_type = typename a_accessor::arithmetic_type;
     const auto tidx = thread::get_thread_id_flat();
     const decltype(tidx) column_id = blockIdx.y;
     if (num_thread_per_worker == 1) {
         // Specialize the num_thread_per_worker = 1. It doesn't need the shared
         // memory, __syncthreads, and atomic_add
         if (tidx < num_rows) {
-            auto temp = zero<OutputValueType>();
+            auto temp = zero<arithmetic_type>();
             for (size_type idx = 0; idx < num_stored_elements_per_row; idx++) {
                 const auto ind = tidx + idx * stride;
                 const auto col_idx = col[ind];
@@ -69,13 +70,13 @@ __device__ void spmv_kernel(
             const auto worker_id = tidx / num_rows;
             const auto step_size = num_worker_per_row * num_thread_per_worker;
             __shared__ uninitialized_array<
-                OutputValueType, default_block_size / num_thread_per_worker>
+                arithmetic_type, default_block_size / num_thread_per_worker>
                 storage;
             if (idx_in_worker == 0) {
                 storage[threadIdx.x] = 0;
             }
             __syncthreads();
-            auto temp = zero<OutputValueType>();
+            auto temp = zero<arithmetic_type>();
             for (size_type idx =
                      worker_id * num_thread_per_worker + idx_in_worker;
                  idx < num_stored_elements_per_row; idx += step_size) {
@@ -114,7 +115,9 @@ __global__ __launch_bounds__(default_block_size) void spmv(
     spmv_kernel<num_thread_per_worker, atomic>(
         num_rows, num_worker_per_row, val, col, stride,
         num_stored_elements_per_row, b, c, c_stride,
-        [](const OutputValueType& x, const OutputValueType& y) { return x; });
+        [](const auto& x, const OutputValueType& y) {
+            return static_cast<OutputValueType>(x);
+        });
 }
 
 
@@ -128,7 +131,8 @@ __global__ __launch_bounds__(default_block_size) void spmv(
     const OutputValueType* __restrict__ beta, OutputValueType* __restrict__ c,
     const size_type c_stride)
 {
-    const OutputValueType alpha_val = alpha(0);
+    using arithmetic_type = typename a_accessor::arithmetic_type;
+    const auto alpha_val = alpha(0);
     const OutputValueType beta_val = beta[0];
     if (atomic) {
         // Because the atomic operation changes the values of c during
@@ -139,16 +143,16 @@ __global__ __launch_bounds__(default_block_size) void spmv(
         spmv_kernel<num_thread_per_worker, atomic>(
             num_rows, num_worker_per_row, val, col, stride,
             num_stored_elements_per_row, b, c, c_stride,
-            [&alpha_val](const OutputValueType& x, const OutputValueType& y) {
-                return alpha_val * x;
+            [&alpha_val](const auto& x, const OutputValueType& y) {
+                return static_cast<OutputValueType>(alpha_val * x);
             });
     } else {
         spmv_kernel<num_thread_per_worker, atomic>(
             num_rows, num_worker_per_row, val, col, stride,
             num_stored_elements_per_row, b, c, c_stride,
-            [&alpha_val, &beta_val](const OutputValueType& x,
-                                    const OutputValueType& y) {
-                return alpha_val * x + beta_val * y;
+            [&alpha_val, &beta_val](const auto& x, const OutputValueType& y) {
+                return static_cast<OutputValueType>(
+                    alpha_val * x + static_cast<arithmetic_type>(beta_val * y));
             });
     }
 }
diff --git a/cuda/matrix/ell_kernels.cu b/cuda/matrix/ell_kernels.cu
index 124a4deda75..7b20236827e 100644
--- a/cuda/matrix/ell_kernels.cu
+++ b/cuda/matrix/ell_kernels.cu
@@ -122,10 +122,12 @@ void abstract_spmv(syn::value_list<int, info>,
                    const matrix::Dense<MatrixValueType>* alpha = nullptr,
                    const matrix::Dense<OutputValueType>* beta = nullptr)
 {
+    using arithmetic_type =
+        highest_precision<InputValueType, OutputValueType, MatrixValueType>;
     using a_accessor =
-        gko::acc::reduced_row_major<1, OutputValueType, const MatrixValueType>;
+        gko::acc::reduced_row_major<1, arithmetic_type, const MatrixValueType>;
     using b_accessor =
-        gko::acc::reduced_row_major<2, OutputValueType, const InputValueType>;
+        gko::acc::reduced_row_major<2, arithmetic_type, const InputValueType>;
 
     const auto nrows = a->get_size()[0];
     const auto stride = a->get_stride();
diff --git a/dpcpp/matrix/ell_kernels.dp.cpp b/dpcpp/matrix/ell_kernels.dp.cpp
index 65fad771140..4817b9a5991 100644
--- a/dpcpp/matrix/ell_kernels.dp.cpp
+++ b/dpcpp/matrix/ell_kernels.dp.cpp
@@ -120,16 +120,17 @@ void spmv_kernel(
     const size_type stride, const size_type num_stored_elements_per_row,
     acc::range<b_accessor> b, OutputValueType* __restrict__ c,
     const size_type c_stride, Closure op, sycl::nd_item<3> item_ct1,
-    uninitialized_array<OutputValueType,
+    uninitialized_array<typename a_accessor::arithmetic_type,
                         default_block_size / num_thread_per_worker>& storage)
 {
+    using arithmetic_type = typename a_accessor::arithmetic_type;
     const auto tidx = thread::get_thread_id_flat(item_ct1);
     const decltype(tidx) column_id = item_ct1.get_group(1);
     if (num_thread_per_worker == 1) {
         // Specialize the num_thread_per_worker = 1. It doesn't need the shared
         // memory, __syncthreads, and atomic_add
         if (tidx < num_rows) {
-            auto temp = zero<OutputValueType>();
+            auto temp = zero<arithmetic_type>();
             for (size_type idx = 0; idx < num_stored_elements_per_row; idx++) {
                 const auto ind = tidx + idx * stride;
                 const auto col_idx = col[ind];
@@ -150,11 +151,11 @@ void spmv_kernel(
         const auto step_size = num_worker_per_row * num_thread_per_worker;
 
         if (runnable && idx_in_worker == 0) {
-            storage[item_ct1.get_local_id(2)] = 0;
+            storage[item_ct1.get_local_id(2)] = zero<arithmetic_type>();
         }
 
         item_ct1.barrier(sycl::access::fence_space::local_space);
-        auto temp = zero<OutputValueType>();
+        auto temp = zero<arithmetic_type>();
         if (runnable) {
             for (size_type idx =
                      worker_id * num_thread_per_worker + idx_in_worker;
@@ -193,13 +194,15 @@ void spmv(
     const size_type stride, const size_type num_stored_elements_per_row,
     acc::range<b_accessor> b, OutputValueType* __restrict__ c,
     const size_type c_stride, sycl::nd_item<3> item_ct1,
-    uninitialized_array<OutputValueType,
+    uninitialized_array<typename a_accessor::arithmetic_type,
                         default_block_size / num_thread_per_worker>& storage)
 {
     spmv_kernel<num_thread_per_worker, atomic>(
         num_rows, num_worker_per_row, val, col, stride,
         num_stored_elements_per_row, b, c, c_stride,
-        [](const OutputValueType& x, const OutputValueType& y) { return x; },
+        [](const auto& x, const OutputValueType& y) {
+            return static_cast<OutputValueType>(x);
+        },
         item_ct1, storage);
 }
 
@@ -214,7 +217,7 @@ void spmv(dim3 grid, dim3 block, size_type dynamic_shared_memory,
 {
     queue->submit([&](sycl::handler& cgh) {
         sycl::accessor<
-            uninitialized_array<OutputValueType,
+            uninitialized_array<typename a_accessor::arithmetic_type,
                                 default_block_size / num_thread_per_worker>,
             0, sycl::access_mode::read_write, sycl::access::target::local>
             storage_acc_ct1(cgh);
@@ -239,10 +242,11 @@ void spmv(
     const size_type num_stored_elements_per_row, acc::range<b_accessor> b,
     const OutputValueType* __restrict__ beta, OutputValueType* __restrict__ c,
     const size_type c_stride, sycl::nd_item<3> item_ct1,
-    uninitialized_array<OutputValueType,
+    uninitialized_array<typename a_accessor::arithmetic_type,
                         default_block_size / num_thread_per_worker>& storage)
 {
-    const OutputValueType alpha_val = alpha(0);
+    using arithmetic_type = typename a_accessor::arithmetic_type;
+    const auto alpha_val = alpha(0);
     const OutputValueType beta_val = beta[0];
     if (atomic) {
         // Because the atomic operation changes the values of c during
@@ -253,17 +257,17 @@ void spmv(
         spmv_kernel<num_thread_per_worker, atomic>(
             num_rows, num_worker_per_row, val, col, stride,
             num_stored_elements_per_row, b, c, c_stride,
-            [&alpha_val](const OutputValueType& x, const OutputValueType& y) {
-                return alpha_val * x;
+            [&alpha_val](const auto& x, const OutputValueType& y) {
+                return static_cast<OutputValueType>(alpha_val * x);
             },
             item_ct1, storage);
     } else {
         spmv_kernel<num_thread_per_worker, atomic>(
             num_rows, num_worker_per_row, val, col, stride,
             num_stored_elements_per_row, b, c, c_stride,
-            [&alpha_val, &beta_val](const OutputValueType& x,
-                                    const OutputValueType& y) {
-                return alpha_val * x + beta_val * y;
+            [&alpha_val, &beta_val](const auto& x, const OutputValueType& y) {
+                return static_cast<OutputValueType>(
+                    alpha_val * x + static_cast<arithmetic_type>(beta_val * y));
             },
             item_ct1, storage);
     }
@@ -281,7 +285,7 @@ void spmv(dim3 grid, dim3 block, size_type dynamic_shared_memory,
 {
     queue->submit([&](sycl::handler& cgh) {
         sycl::accessor<
-            uninitialized_array<OutputValueType,
+            uninitialized_array<typename a_accessor::arithmetic_type,
                                 default_block_size / num_thread_per_worker>,
             0, sycl::access_mode::read_write, sycl::access::target::local>
             storage_acc_ct1(cgh);
@@ -316,10 +320,12 @@ void abstract_spmv(syn::value_list<int, info>,
                    const matrix::Dense<MatrixValueType>* alpha = nullptr,
                    const matrix::Dense<OutputValueType>* beta = nullptr)
 {
+    using arithmetic_type =
+        highest_precision<InputValueType, OutputValueType, MatrixValueType>;
     using a_accessor =
-        gko::acc::reduced_row_major<1, OutputValueType, const MatrixValueType>;
+        gko::acc::reduced_row_major<1, arithmetic_type, const MatrixValueType>;
     using b_accessor =
-        gko::acc::reduced_row_major<2, OutputValueType, const InputValueType>;
+        gko::acc::reduced_row_major<2, arithmetic_type, const InputValueType>;
 
     const auto nrows = a->get_size()[0];
     const auto stride = a->get_stride();
diff --git a/hip/matrix/ell_kernels.hip.cpp b/hip/matrix/ell_kernels.hip.cpp
index db9d5aa11bb..1567548463f 100644
--- a/hip/matrix/ell_kernels.hip.cpp
+++ b/hip/matrix/ell_kernels.hip.cpp
@@ -125,10 +125,12 @@ void abstract_spmv(syn::value_list<int, info>,
                    const matrix::Dense<MatrixValueType>* alpha = nullptr,
                    const matrix::Dense<OutputValueType>* beta = nullptr)
 {
+    using arithmetic_type =
+        highest_precision<InputValueType, OutputValueType, MatrixValueType>;
     using a_accessor =
-        acc::reduced_row_major<1, OutputValueType, const MatrixValueType>;
+        acc::reduced_row_major<1, arithmetic_type, const MatrixValueType>;
     using b_accessor =
-        acc::reduced_row_major<2, OutputValueType, const InputValueType>;
+        acc::reduced_row_major<2, arithmetic_type, const InputValueType>;
 
     const auto nrows = a->get_size()[0];
     const auto stride = a->get_stride();

From 4bd96dd57efbd5fd177f6a500c0e577a215b3bc7 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Wed, 10 May 2023 19:30:29 +0200
Subject: [PATCH 307/583] omit .on(exec) in factory factory parameters

---
 include/ginkgo/core/base/lin_op.hpp        |  12 +-
 include/ginkgo/core/preconditioner/ic.hpp  | 109 +++++++++++++--
 include/ginkgo/core/preconditioner/ilu.hpp | 151 +++++++++++++++++++--
 include/ginkgo/core/solver/bicg.hpp        |  27 +---
 include/ginkgo/core/solver/bicgstab.hpp    |  26 +---
 include/ginkgo/core/solver/cb_gmres.hpp    |  27 +---
 include/ginkgo/core/solver/cg.hpp          |  27 +---
 include/ginkgo/core/solver/cgs.hpp         |  27 +---
 include/ginkgo/core/solver/direct.hpp      |  53 +++++++-
 include/ginkgo/core/solver/fcg.hpp         |  27 +---
 include/ginkgo/core/solver/gcr.hpp         |  30 +---
 include/ginkgo/core/solver/gmres.hpp       |  35 ++---
 include/ginkgo/core/solver/idr.hpp         |  24 +---
 include/ginkgo/core/solver/ir.hpp          |  73 ++++++++--
 include/ginkgo/core/solver/multigrid.hpp   |  12 +-
 include/ginkgo/core/solver/solver_base.hpp | 149 ++++++++++++++++++++
 16 files changed, 552 insertions(+), 257 deletions(-)

diff --git a/include/ginkgo/core/base/lin_op.hpp b/include/ginkgo/core/base/lin_op.hpp
index c06c43bbb6e..20d7771822f 100644
--- a/include/ginkgo/core/base/lin_op.hpp
+++ b/include/ginkgo/core/base/lin_op.hpp
@@ -1119,8 +1119,7 @@ public:                                                                      \
     mutable _name{__VA_ARGS__};                                              \
                                                                              \
     template <typename... Args>                                              \
-    auto with_##_name(Args&&... _value)                                      \
-        const->const std::decay_t<decltype(*this)>&                          \
+    auto with_##_name(Args&&... _value)->std::decay_t<decltype(*this)>&      \
     {                                                                        \
         using type = decltype(this->_name);                                  \
         this->_name = type{std::forward<Args>(_value)...};                   \
@@ -1170,8 +1169,7 @@ public:                                                                      \
     mutable _name{__VA_ARGS__};                                              \
                                                                              \
     template <typename... Args>                                              \
-    auto with_##_name(Args&&... _value)                                      \
-        const->const std::decay_t<decltype(*this)>&                          \
+    auto with_##_name(Args&&... _value)->std::decay_t<decltype(*this)>&      \
     {                                                                        \
         GKO_NOT_IMPLEMENTED;                                                 \
         return *this;                                                        \
@@ -1184,8 +1182,7 @@ public:                                                                      \
     mutable _name{_default};                                                 \
                                                                              \
     template <typename Arg>                                                  \
-    auto with_##_name(Arg&& _value)                                          \
-        const->const std::decay_t<decltype(*this)>&                          \
+    auto with_##_name(Arg&& _value)->std::decay_t<decltype(*this)>&          \
     {                                                                        \
         using type = decltype(this->_name);                                  \
         this->_name = type{std::forward<Arg>(_value)};                       \
@@ -1199,8 +1196,7 @@ public:                                                                      \
     mutable _name{__VA_ARGS__};                                              \
                                                                              \
     template <typename... Args>                                              \
-    auto with_##_name(Args&&... _value)                                      \
-        const->const std::decay_t<decltype(*this)>&                          \
+    auto with_##_name(Args&&... _value)->std::decay_t<decltype(*this)>&      \
     {                                                                        \
         using type = decltype(this->_name);                                  \
         this->_name = type{std::forward<Args>(_value)...};                   \
diff --git a/include/ginkgo/core/preconditioner/ic.hpp b/include/ginkgo/core/preconditioner/ic.hpp
index aa19a004dc1..cb00119582a 100644
--- a/include/ginkgo/core/preconditioner/ic.hpp
+++ b/include/ginkgo/core/preconditioner/ic.hpp
@@ -119,19 +119,106 @@ class Ic : public EnableLinOp<Ic<LSolverType, IndexType>>, public Transposable {
     using index_type = IndexType;
     using transposed_type = Ic<LSolverType, IndexType>;
 
-    GKO_CREATE_FACTORY_PARAMETERS(parameters, Factory)
-    {
+    class Factory;
+
+    struct parameters_type
+        : public enable_parameters_type<parameters_type, Factory> {
         /**
          * Factory for the L solver
          */
-        std::shared_ptr<typename l_solver_type::Factory>
-            GKO_FACTORY_PARAMETER_SCALAR(l_solver_factory, nullptr);
+        std::shared_ptr<const typename l_solver_type::Factory>
+            l_solver_factory{};
 
         /**
          * Factory for the factorization
          */
-        std::shared_ptr<LinOpFactory> GKO_FACTORY_PARAMETER_SCALAR(
-            factorization_factory, nullptr);
+        std::shared_ptr<const LinOpFactory> factorization_factory{};
+
+        [[deprecated("use with_l_solver instead")]] parameters_type&
+        with_l_solver_factory(
+            std::shared_ptr<const typename l_solver_type::Factory> factory)
+        {
+            return with_l_solver(std::move(factory));
+        }
+
+        parameters_type& with_l_solver(
+            std::shared_ptr<const typename l_solver_type::Factory> factory)
+        {
+            this->l_solver_generator =
+                [factory](std::shared_ptr<const Executor>)
+                -> std::shared_ptr<const typename l_solver_type::Factory> {
+                return factory;
+            };
+            return *this;
+        }
+
+        template <typename SolverParameters,
+                  typename = decltype(std::declval<SolverParameters>().on(
+                      std::shared_ptr<const Executor>{}))>
+        parameters_type& with_l_solver(SolverParameters parameters)
+        {
+            this->l_solver_generator =
+                [parameters](std::shared_ptr<const Executor> exec)
+                -> std::shared_ptr<const typename l_solver_type::Factory> {
+                return parameters.on(exec);
+            };
+            return *this;
+        }
+
+        [[deprecated("use with_factorization instead")]] parameters_type&
+        with_factorization_factory(std::shared_ptr<const LinOpFactory> factory)
+        {
+            return with_factorization(std::move(factory));
+        }
+
+        parameters_type& with_factorization(
+            std::shared_ptr<const LinOpFactory> factory)
+        {
+            this->factorization_generator =
+                [factory](std::shared_ptr<const Executor>)
+                -> std::shared_ptr<const LinOpFactory> { return factory; };
+            return *this;
+        }
+
+        template <
+            typename FactorizationParameters,
+            typename = decltype(std::declval<FactorizationParameters>().on(
+                std::shared_ptr<const Executor>{}))>
+        parameters_type& with_factorization(FactorizationParameters parameters)
+        {
+            this->factorization_generator =
+                [parameters](std::shared_ptr<const Executor> exec)
+                -> std::shared_ptr<const LinOpFactory> {
+                return parameters.on(exec);
+            };
+            return *this;
+        }
+
+        /**
+         *
+         */
+        std::unique_ptr<Factory> on(std::shared_ptr<const Executor> exec) const
+        {
+            auto parameters_copy = *this;
+            if (l_solver_generator) {
+                parameters_copy.l_solver_factory = l_solver_generator(exec);
+            }
+            if (factorization_generator) {
+                parameters_copy.factorization_factory =
+                    factorization_generator(exec);
+            }
+            return parameters_copy
+                .enable_parameters_type<parameters_type, Factory>::on(exec);
+        }
+
+    private:
+        std::function<std::shared_ptr<const typename l_solver_type::Factory>(
+            std::shared_ptr<const Executor>)>
+            l_solver_generator;
+
+        std::function<std::shared_ptr<const LinOpFactory>(
+            std::shared_ptr<const Executor>)>
+            factorization_generator;
     };
 
     GKO_ENABLE_LIN_OP_FACTORY(Ic, parameters, Factory);
@@ -365,12 +452,10 @@ class Ic : public EnableLinOp<Ic<LSolverType, IndexType>>, public Transposable {
             static_cast<unsigned int>(mtx->get_size()[0])};
 
         return SolverType::build()
-            .with_criteria(gko::stop::Iteration::build()
-                               .with_max_iters(default_max_iters)
-                               .on(exec),
-                           gko::stop::ResidualNorm<value_type>::build()
-                               .with_reduction_factor(default_reduce_residual)
-                               .on(exec))
+            .with_criteria(
+                gko::stop::Iteration::build().with_max_iters(default_max_iters),
+                gko::stop::ResidualNorm<value_type>::build()
+                    .with_reduction_factor(default_reduce_residual))
             .on(exec)
             ->generate(mtx);
     }
diff --git a/include/ginkgo/core/preconditioner/ilu.hpp b/include/ginkgo/core/preconditioner/ilu.hpp
index 7db9d19c7c2..bd6d665b009 100644
--- a/include/ginkgo/core/preconditioner/ilu.hpp
+++ b/include/ginkgo/core/preconditioner/ilu.hpp
@@ -131,25 +131,150 @@ class Ilu : public EnableLinOp<
         Ilu<typename USolverType::transposed_type,
             typename LSolverType::transposed_type, ReverseApply, IndexType>;
 
-    GKO_CREATE_FACTORY_PARAMETERS(parameters, Factory)
-    {
+    class Factory;
+
+    struct parameters_type
+        : public enable_parameters_type<parameters_type, Factory> {
         /**
          * Factory for the L solver
          */
-        std::shared_ptr<typename l_solver_type::Factory>
-            GKO_FACTORY_PARAMETER_SCALAR(l_solver_factory, nullptr);
+        std::shared_ptr<const typename l_solver_type::Factory>
+            l_solver_factory{};
 
         /**
          * Factory for the U solver
          */
-        std::shared_ptr<typename u_solver_type::Factory>
-            GKO_FACTORY_PARAMETER_SCALAR(u_solver_factory, nullptr);
+        std::shared_ptr<const typename u_solver_type::Factory>
+            u_solver_factory{};
 
         /**
          * Factory for the factorization
          */
-        std::shared_ptr<LinOpFactory> GKO_FACTORY_PARAMETER_SCALAR(
-            factorization_factory, nullptr);
+        std::shared_ptr<const LinOpFactory> factorization_factory{};
+
+        [[deprecated("use with_l_solver instead")]] parameters_type&
+        with_l_solver_factory(
+            std::shared_ptr<const typename l_solver_type::Factory> factory)
+        {
+            return with_l_solver(std::move(factory));
+        }
+
+        parameters_type& with_l_solver(
+            std::shared_ptr<const typename l_solver_type::Factory> factory)
+        {
+            this->l_solver_generator =
+                [factory](std::shared_ptr<const Executor>)
+                -> std::shared_ptr<const typename l_solver_type::Factory> {
+                return factory;
+            };
+            return *this;
+        }
+
+        template <typename SolverParameters,
+                  typename = decltype(std::declval<SolverParameters>().on(
+                      std::shared_ptr<const Executor>{}))>
+        parameters_type& with_l_solver(SolverParameters parameters)
+        {
+            this->l_solver_generator =
+                [parameters](std::shared_ptr<const Executor> exec)
+                -> std::shared_ptr<const typename l_solver_type::Factory> {
+                return parameters.on(exec);
+            };
+            return *this;
+        }
+
+        [[deprecated("use with_u_solver instead")]] parameters_type&
+        with_u_solver_factory(
+            std::shared_ptr<const typename u_solver_type::Factory> factory)
+        {
+            return with_u_solver(std::move(factory));
+        }
+
+        parameters_type& with_u_solver(
+            std::shared_ptr<const typename u_solver_type::Factory> factory)
+        {
+            this->u_solver_generator =
+                [factory](std::shared_ptr<const Executor>)
+                -> std::shared_ptr<const typename u_solver_type::Factory> {
+                return factory;
+            };
+            return *this;
+        }
+
+        template <typename SolverParameters,
+                  typename = decltype(std::declval<SolverParameters>().on(
+                      std::shared_ptr<const Executor>{}))>
+        parameters_type& with_u_solver(SolverParameters parameters)
+        {
+            this->u_solver_generator =
+                [parameters](std::shared_ptr<const Executor> exec)
+                -> std::shared_ptr<const typename u_solver_type::Factory> {
+                return parameters.on(exec);
+            };
+            return *this;
+        }
+
+        [[deprecated("use with_factorization instead")]] parameters_type&
+        with_factorization_factory(std::shared_ptr<const LinOpFactory> factory)
+        {
+            return with_factorization(std::move(factory));
+        }
+
+        parameters_type& with_factorization(
+            std::shared_ptr<const LinOpFactory> factory)
+        {
+            this->factorization_generator =
+                [factory](std::shared_ptr<const Executor>)
+                -> std::shared_ptr<const LinOpFactory> { return factory; };
+            return *this;
+        }
+
+        template <
+            typename FactorizationParameters,
+            typename = decltype(std::declval<FactorizationParameters>().on(
+                std::shared_ptr<const Executor>{}))>
+        parameters_type& with_factorization(FactorizationParameters parameters)
+        {
+            this->factorization_generator =
+                [parameters](std::shared_ptr<const Executor> exec)
+                -> std::shared_ptr<const LinOpFactory> {
+                return parameters.on(exec);
+            };
+            return *this;
+        }
+
+        /**
+         *
+         */
+        std::unique_ptr<Factory> on(std::shared_ptr<const Executor> exec) const
+        {
+            auto parameters_copy = *this;
+            if (l_solver_generator) {
+                parameters_copy.l_solver_factory = l_solver_generator(exec);
+            }
+            if (u_solver_generator) {
+                parameters_copy.u_solver_factory = u_solver_generator(exec);
+            }
+            if (factorization_generator) {
+                parameters_copy.factorization_factory =
+                    factorization_generator(exec);
+            }
+            return parameters_copy
+                .enable_parameters_type<parameters_type, Factory>::on(exec);
+        }
+
+    private:
+        std::function<std::shared_ptr<const typename l_solver_type::Factory>(
+            std::shared_ptr<const Executor>)>
+            l_solver_generator;
+
+        std::function<std::shared_ptr<const typename u_solver_type::Factory>(
+            std::shared_ptr<const Executor>)>
+            u_solver_generator;
+
+        std::function<std::shared_ptr<const LinOpFactory>(
+            std::shared_ptr<const Executor>)>
+            factorization_generator;
     };
 
     GKO_ENABLE_LIN_OP_FACTORY(Ilu, parameters, Factory);
@@ -393,12 +518,10 @@ class Ilu : public EnableLinOp<
             static_cast<unsigned int>(mtx->get_size()[0])};
 
         return SolverType::build()
-            .with_criteria(gko::stop::Iteration::build()
-                               .with_max_iters(default_max_iters)
-                               .on(exec),
-                           gko::stop::ResidualNorm<value_type>::build()
-                               .with_reduction_factor(default_reduce_residual)
-                               .on(exec))
+            .with_criteria(
+                gko::stop::Iteration::build().with_max_iters(default_max_iters),
+                gko::stop::ResidualNorm<value_type>::build()
+                    .with_reduction_factor(default_reduce_residual))
             .on(exec)
             ->generate(mtx);
     }
diff --git a/include/ginkgo/core/solver/bicg.hpp b/include/ginkgo/core/solver/bicg.hpp
index c7b47a0e807..205be85df6c 100644
--- a/include/ginkgo/core/solver/bicg.hpp
+++ b/include/ginkgo/core/solver/bicg.hpp
@@ -99,27 +99,12 @@ class Bicg
      */
     bool apply_uses_initial_guess() const override { return true; }
 
-    GKO_CREATE_FACTORY_PARAMETERS(parameters, Factory)
-    {
-        /**
-         * Criterion factories.
-         */
-        std::vector<std::shared_ptr<const stop::CriterionFactory>>
-            GKO_FACTORY_PARAMETER_VECTOR(criteria, nullptr);
-
-        /**
-         * Preconditioner factory.
-         */
-        std::shared_ptr<const LinOpFactory> GKO_FACTORY_PARAMETER_SCALAR(
-            preconditioner, nullptr);
-
-        /**
-         * Already generated preconditioner. If one is provided, the factory
-         * `preconditioner` will be ignored.
-         */
-        std::shared_ptr<const LinOp> GKO_FACTORY_PARAMETER_SCALAR(
-            generated_preconditioner, nullptr);
-    };
+    class Factory;
+
+    struct parameters_type
+        : enable_preconditioned_iterative_solver_factory_parameters<
+              parameters_type, Factory> {};
+
     GKO_ENABLE_LIN_OP_FACTORY(Bicg, parameters, Factory);
     GKO_ENABLE_BUILD_METHOD(Factory);
 
diff --git a/include/ginkgo/core/solver/bicgstab.hpp b/include/ginkgo/core/solver/bicgstab.hpp
index 214e669b2ff..58d76c5e0df 100644
--- a/include/ginkgo/core/solver/bicgstab.hpp
+++ b/include/ginkgo/core/solver/bicgstab.hpp
@@ -98,27 +98,11 @@ class Bicgstab
      */
     bool apply_uses_initial_guess() const override { return true; }
 
-    GKO_CREATE_FACTORY_PARAMETERS(parameters, Factory)
-    {
-        /**
-         * Criterion factories.
-         */
-        std::vector<std::shared_ptr<const stop::CriterionFactory>>
-            GKO_FACTORY_PARAMETER_VECTOR(criteria, nullptr);
-
-        /**
-         * Preconditioner factory.
-         */
-        std::shared_ptr<const LinOpFactory> GKO_FACTORY_PARAMETER_SCALAR(
-            preconditioner, nullptr);
-
-        /**
-         * Already generated preconditioner. If one is provided, the factory
-         * `preconditioner` will be ignored.
-         */
-        std::shared_ptr<const LinOp> GKO_FACTORY_PARAMETER_SCALAR(
-            generated_preconditioner, nullptr);
-    };
+    class Factory;
+    struct parameters_type
+        : enable_preconditioned_iterative_solver_factory_parameters<
+              parameters_type, Factory> {};
+
     GKO_ENABLE_LIN_OP_FACTORY(Bicgstab, parameters, Factory);
     GKO_ENABLE_BUILD_METHOD(Factory);
 
diff --git a/include/ginkgo/core/solver/cb_gmres.hpp b/include/ginkgo/core/solver/cb_gmres.hpp
index a2dbb1efce1..9bf4cf91a76 100644
--- a/include/ginkgo/core/solver/cb_gmres.hpp
+++ b/include/ginkgo/core/solver/cb_gmres.hpp
@@ -153,38 +153,23 @@ class CbGmres : public EnableLinOp<CbGmres<ValueType>>,
         return parameters_.storage_precision;
     }
 
-    GKO_CREATE_FACTORY_PARAMETERS(parameters, Factory)
-    {
+    class Factory;
+
+    struct parameters_type
+        : enable_preconditioned_iterative_solver_factory_parameters<
+              parameters_type, Factory> {
         /**
          * Determines which storage type is used.
          */
         cb_gmres::storage_precision GKO_FACTORY_PARAMETER_SCALAR(
             storage_precision, cb_gmres::storage_precision::reduce1);
 
-        /**
-         * Criterion factories.
-         */
-        std::vector<std::shared_ptr<const stop::CriterionFactory>>
-            GKO_FACTORY_PARAMETER_VECTOR(criteria, nullptr);
-
-        /**
-         * Preconditioner factory.
-         */
-        std::shared_ptr<const LinOpFactory> GKO_FACTORY_PARAMETER_SCALAR(
-            preconditioner, nullptr);
-
-        /**
-         * Already generated preconditioner. If one is provided, the factory
-         * `preconditioner` will be ignored.
-         */
-        std::shared_ptr<const LinOp> GKO_FACTORY_PARAMETER_SCALAR(
-            generated_preconditioner, nullptr);
-
         /**
          * Krylov dimension factory.
          */
         size_type GKO_FACTORY_PARAMETER_SCALAR(krylov_dim, 100u);
     };
+
     GKO_ENABLE_LIN_OP_FACTORY(CbGmres, parameters, Factory);
     GKO_ENABLE_BUILD_METHOD(Factory);
 
diff --git a/include/ginkgo/core/solver/cg.hpp b/include/ginkgo/core/solver/cg.hpp
index bc0861cf270..c0fff29fedd 100644
--- a/include/ginkgo/core/solver/cg.hpp
+++ b/include/ginkgo/core/solver/cg.hpp
@@ -92,27 +92,12 @@ class Cg : public EnableLinOp<Cg<ValueType>>,
      */
     bool apply_uses_initial_guess() const override { return true; }
 
-    GKO_CREATE_FACTORY_PARAMETERS(parameters, Factory)
-    {
-        /**
-         * Criterion factories.
-         */
-        std::vector<std::shared_ptr<const stop::CriterionFactory>>
-            GKO_FACTORY_PARAMETER_VECTOR(criteria, nullptr);
-
-        /**
-         * Preconditioner factory.
-         */
-        std::shared_ptr<const LinOpFactory> GKO_FACTORY_PARAMETER_SCALAR(
-            preconditioner, nullptr);
-
-        /**
-         * Already generated preconditioner. If one is provided, the factory
-         * `preconditioner` will be ignored.
-         */
-        std::shared_ptr<const LinOp> GKO_FACTORY_PARAMETER_SCALAR(
-            generated_preconditioner, nullptr);
-    };
+    class Factory;
+
+    struct parameters_type
+        : enable_preconditioned_iterative_solver_factory_parameters<
+              parameters_type, Factory> {};
+
     GKO_ENABLE_LIN_OP_FACTORY(Cg, parameters, Factory);
     GKO_ENABLE_BUILD_METHOD(Factory);
 
diff --git a/include/ginkgo/core/solver/cgs.hpp b/include/ginkgo/core/solver/cgs.hpp
index 22f81d8a292..57a834b0ead 100644
--- a/include/ginkgo/core/solver/cgs.hpp
+++ b/include/ginkgo/core/solver/cgs.hpp
@@ -90,27 +90,12 @@ class Cgs
      */
     bool apply_uses_initial_guess() const override { return true; }
 
-    GKO_CREATE_FACTORY_PARAMETERS(parameters, Factory)
-    {
-        /**
-         * Criterion factories.
-         */
-        std::vector<std::shared_ptr<const stop::CriterionFactory>>
-            GKO_FACTORY_PARAMETER_VECTOR(criteria, nullptr);
-
-        /**
-         * Preconditioner factory.
-         */
-        std::shared_ptr<const LinOpFactory> GKO_FACTORY_PARAMETER_SCALAR(
-            preconditioner, nullptr);
-
-        /**
-         * Already generated preconditioner. If one is provided, the factory
-         * `preconditioner` will be ignored.
-         */
-        std::shared_ptr<const LinOp> GKO_FACTORY_PARAMETER_SCALAR(
-            generated_preconditioner, nullptr);
-    };
+    class Factory;
+
+    struct parameters_type
+        : enable_preconditioned_iterative_solver_factory_parameters<
+              parameters_type, Factory> {};
+
     GKO_ENABLE_LIN_OP_FACTORY(Cgs, parameters, Factory);
     GKO_ENABLE_BUILD_METHOD(Factory);
 
diff --git a/include/ginkgo/core/solver/direct.hpp b/include/ginkgo/core/solver/direct.hpp
index 4a9a69731be..f66546cd2ec 100644
--- a/include/ginkgo/core/solver/direct.hpp
+++ b/include/ginkgo/core/solver/direct.hpp
@@ -74,8 +74,9 @@ class Direct : public EnableLinOp<Direct<ValueType, IndexType>>,
 
     std::unique_ptr<LinOp> conj_transpose() const override;
 
-    GKO_CREATE_FACTORY_PARAMETERS(parameters, Factory)
-    {
+    class Factory;
+
+    struct parameters_type : enable_parameters_type<parameters_type, Factory> {
         /**
          * Number of right hand sides.
          *
@@ -86,8 +87,52 @@ class Direct : public EnableLinOp<Direct<ValueType, IndexType>>,
         gko::size_type GKO_FACTORY_PARAMETER_SCALAR(num_rhs, 1u);
 
         /** The factorization factory to use for generating the factors. */
-        std::shared_ptr<const LinOpFactory> GKO_FACTORY_PARAMETER_SCALAR(
-            factorization, nullptr);
+        std::shared_ptr<const LinOpFactory> factorization;
+
+        /**
+         *
+         */
+        parameters_type& with_factorization(
+            std::shared_ptr<const LinOpFactory> factorization)
+        {
+            this->factorization_generator =
+                [factorization](std::shared_ptr<const Executor>)
+                -> std::shared_ptr<const LinOpFactory> {
+                return factorization;
+            };
+            return *this;
+        }
+
+        template <
+            typename FactorizationParameters,
+            typename = decltype(std::declval<FactorizationParameters>().on(
+                std::shared_ptr<const Executor>{}))>
+        parameters_type& with_factorization(
+            FactorizationParameters factorization_parameters)
+        {
+            this->factorization_generator =
+                [factorization_parameters](std::shared_ptr<const Executor> exec)
+                -> std::shared_ptr<const LinOpFactory> {
+                return factorization_parameters.on(exec);
+            };
+            return *this;
+        }
+
+        /**
+         *
+         */
+        std::unique_ptr<Factory> on(std::shared_ptr<const Executor> exec) const
+        {
+            auto parameters_copy = *this;
+            parameters_copy.factorization = factorization_generator(exec);
+            return parameters_copy
+                .enable_parameters_type<parameters_type, Factory>::on(exec);
+        }
+
+    private:
+        std::function<std::shared_ptr<const LinOpFactory>(
+            std::shared_ptr<const Executor>)>
+            factorization_generator;
     };
     GKO_ENABLE_LIN_OP_FACTORY(Direct, parameters, Factory);
     GKO_ENABLE_BUILD_METHOD(Factory);
diff --git a/include/ginkgo/core/solver/fcg.hpp b/include/ginkgo/core/solver/fcg.hpp
index cad7a29fc27..b6715f07512 100644
--- a/include/ginkgo/core/solver/fcg.hpp
+++ b/include/ginkgo/core/solver/fcg.hpp
@@ -98,27 +98,12 @@ class Fcg
      */
     bool apply_uses_initial_guess() const override { return true; }
 
-    GKO_CREATE_FACTORY_PARAMETERS(parameters, Factory)
-    {
-        /**
-         * Criterion factories.
-         */
-        std::vector<std::shared_ptr<const stop::CriterionFactory>>
-            GKO_FACTORY_PARAMETER_VECTOR(criteria, nullptr);
-
-        /**
-         * Preconditioner factory.
-         */
-        std::shared_ptr<const LinOpFactory> GKO_FACTORY_PARAMETER_SCALAR(
-            preconditioner, nullptr);
-
-        /**
-         * Already generated preconditioner. If one is provided, the factory
-         * `preconditioner` will be ignored.
-         */
-        std::shared_ptr<const LinOp> GKO_FACTORY_PARAMETER_SCALAR(
-            generated_preconditioner, nullptr);
-    };
+    class Factory;
+
+    struct parameters_type
+        : enable_preconditioned_iterative_solver_factory_parameters<
+              parameters_type, Factory> {};
+
     GKO_ENABLE_LIN_OP_FACTORY(Fcg, parameters, Factory);
     GKO_ENABLE_BUILD_METHOD(Factory);
 
diff --git a/include/ginkgo/core/solver/gcr.hpp b/include/ginkgo/core/solver/gcr.hpp
index fdc95d30c8f..8dc68e6e33d 100644
--- a/include/ginkgo/core/solver/gcr.hpp
+++ b/include/ginkgo/core/solver/gcr.hpp
@@ -108,30 +108,12 @@ class Gcr
      */
     void set_krylov_dim(size_type other) { parameters_.krylov_dim = other; }
 
-    GKO_CREATE_FACTORY_PARAMETERS(parameters, Factory)
-    {
-        /**
-         * Criterion factories.
-         */
-        std::vector<std::shared_ptr<const stop::CriterionFactory>>
-            GKO_FACTORY_PARAMETER_VECTOR(criteria, nullptr);
-
-        /**
-         * Preconditioner factory.
-         */
-        std::shared_ptr<const LinOpFactory> GKO_FACTORY_PARAMETER_SCALAR(
-            preconditioner, nullptr);
-
-        /**
-         * Already generated preconditioner. If one is provided, the factory
-         * `preconditioner` will be ignored.
-         */
-        std::shared_ptr<const LinOp> GKO_FACTORY_PARAMETER_SCALAR(
-            generated_preconditioner, nullptr);
-
-        /**
-         * Krylov dimension factory.
-         */
+    class Factory;
+
+    struct parameters_type
+        : enable_preconditioned_iterative_solver_factory_parameters<
+              parameters_type, Factory> {
+        /** Krylov subspace dimension/restart value. */
         size_type GKO_FACTORY_PARAMETER_SCALAR(krylov_dim, 0u);
     };
     GKO_ENABLE_LIN_OP_FACTORY(Gcr, parameters, Factory);
diff --git a/include/ginkgo/core/solver/gmres.hpp b/include/ginkgo/core/solver/gmres.hpp
index d7d0f57a8a4..0ea056c9333 100644
--- a/include/ginkgo/core/solver/gmres.hpp
+++ b/include/ginkgo/core/solver/gmres.hpp
@@ -109,35 +109,16 @@ class Gmres
      */
     void set_krylov_dim(size_type other) { parameters_.krylov_dim = other; }
 
-    GKO_CREATE_FACTORY_PARAMETERS(parameters, Factory)
-    {
-        /**
-         * Criterion factories.
-         */
-        std::vector<std::shared_ptr<const stop::CriterionFactory>>
-            GKO_FACTORY_PARAMETER_VECTOR(criteria, nullptr);
-
-        /**
-         * Preconditioner factory.
-         */
-        std::shared_ptr<const LinOpFactory> GKO_FACTORY_PARAMETER_SCALAR(
-            preconditioner, nullptr);
-
-        /**
-         * Already generated preconditioner. If one is provided, the factory
-         * `preconditioner` will be ignored.
-         */
-        std::shared_ptr<const LinOp> GKO_FACTORY_PARAMETER_SCALAR(
-            generated_preconditioner, nullptr);
-
-        /**
-         * Krylov dimension factory.
-         */
+
+    class Factory;
+
+    struct parameters_type
+        : enable_preconditioned_iterative_solver_factory_parameters<
+              parameters_type, Factory> {
+        /** Krylov subspace dimension/restart value. */
         size_type GKO_FACTORY_PARAMETER_SCALAR(krylov_dim, 0u);
 
-        /**
-         * Flexible GMRES
-         */
+        /** Flexible GMRES */
         bool GKO_FACTORY_PARAMETER_SCALAR(flexible, false);
     };
     GKO_ENABLE_LIN_OP_FACTORY(Gmres, parameters, Factory);
diff --git a/include/ginkgo/core/solver/idr.hpp b/include/ginkgo/core/solver/idr.hpp
index fc677f33171..a7b8af31bf4 100644
--- a/include/ginkgo/core/solver/idr.hpp
+++ b/include/ginkgo/core/solver/idr.hpp
@@ -180,27 +180,11 @@ class Idr
         parameters_.complex_subspace = other;
     }
 
-    GKO_CREATE_FACTORY_PARAMETERS(parameters, Factory)
-    {
-        /**
-         * Criterion factories.
-         */
-        std::vector<std::shared_ptr<const stop::CriterionFactory>>
-            GKO_FACTORY_PARAMETER_VECTOR(criteria, nullptr);
-
-        /**
-         * Preconditioner factory.
-         */
-        std::shared_ptr<const LinOpFactory> GKO_FACTORY_PARAMETER_SCALAR(
-            preconditioner, nullptr);
-
-        /**
-         * Already generated preconditioner. If one is provided, the factory
-         * `preconditioner` will be ignored.
-         */
-        std::shared_ptr<const LinOp> GKO_FACTORY_PARAMETER_SCALAR(
-            generated_preconditioner, nullptr);
+    class Factory;
 
+    struct parameters_type
+        : enable_preconditioned_iterative_solver_factory_parameters<
+              parameters_type, Factory> {
         /**
          * Dimension of the subspace S. Determines how many intermediate
          * residuals are computed in each iteration.
diff --git a/include/ginkgo/core/solver/ir.hpp b/include/ginkgo/core/solver/ir.hpp
index c5c69c1fb67..792a0cdcfc6 100644
--- a/include/ginkgo/core/solver/ir.hpp
+++ b/include/ginkgo/core/solver/ir.hpp
@@ -177,26 +177,20 @@ class Ir : public EnableLinOp<Ir<ValueType>>,
      */
     Ir(Ir&&);
 
-    GKO_CREATE_FACTORY_PARAMETERS(parameters, Factory)
-    {
-        /**
-         * Criterion factories.
-         */
-        std::vector<std::shared_ptr<const stop::CriterionFactory>>
-            GKO_FACTORY_PARAMETER_VECTOR(criteria, nullptr);
+    class Factory;
 
+    struct parameters_type
+        : enable_iterative_solver_factory_parameters<parameters_type, Factory> {
         /**
          * Inner solver factory.
          */
-        std::shared_ptr<const LinOpFactory> GKO_FACTORY_PARAMETER_SCALAR(
-            solver, nullptr);
+        std::shared_ptr<const LinOpFactory> solver{};
 
         /**
          * Already generated solver. If one is provided, the factory `solver`
          * will be ignored.
          */
-        std::shared_ptr<const LinOp> GKO_FACTORY_PARAMETER_SCALAR(
-            generated_solver, nullptr);
+        std::shared_ptr<const LinOp> generated_solver{};
 
         /**
          * Relaxation factor for Richardson iteration
@@ -210,6 +204,57 @@ class Ir : public EnableLinOp<Ir<ValueType>>,
          */
         initial_guess_mode GKO_FACTORY_PARAMETER_SCALAR(
             default_initial_guess, initial_guess_mode::provided);
+
+        /**
+         *
+         */
+        parameters_type& with_solver(std::shared_ptr<const LinOpFactory> solver)
+        {
+            this->solver_generator = [solver](std::shared_ptr<const Executor>)
+                -> std::shared_ptr<const LinOpFactory> { return solver; };
+            return *this;
+        }
+
+        template <typename SolverParameters,
+                  typename = decltype(std::declval<SolverParameters>().on(
+                      std::shared_ptr<const Executor>{}))>
+        parameters_type& with_solver(SolverParameters solver_parameters)
+        {
+            this->solver_generator =
+                [solver_parameters](std::shared_ptr<const Executor> exec)
+                -> std::shared_ptr<const LinOpFactory> {
+                return solver_parameters.on(exec);
+            };
+            return *this;
+        }
+
+        /**
+         *
+         */
+        parameters_type& with_generated_solver(
+            std::shared_ptr<const LinOp> generated_solver)
+        {
+            this->generated_solver = std::move(generated_solver);
+            return *this;
+        }
+
+        /**
+         *
+         */
+        std::unique_ptr<Factory> on(std::shared_ptr<const Executor> exec) const
+        {
+            auto parameters_copy = *this;
+            if (solver_generator) {
+                parameters_copy.solver = solver_generator(exec);
+            }
+            return parameters_copy.enable_iterative_solver_factory_parameters<
+                parameters_type, Factory>::on(exec);
+        }
+
+    private:
+        std::function<std::shared_ptr<const LinOpFactory>(
+            std::shared_ptr<const Executor>)>
+            solver_generator;
     };
     GKO_ENABLE_LIN_OP_FACTORY(Ir, parameters, Factory);
     GKO_ENABLE_BUILD_METHOD(Factory);
@@ -319,8 +364,7 @@ auto build_smoother(std::shared_ptr<const LinOpFactory> factory,
     return Ir<ValueType>::build()
         .with_solver(factory)
         .with_relaxation_factor(relaxation_factor)
-        .with_criteria(
-            gko::stop::Iteration::build().with_max_iters(iteration).on(exec))
+        .with_criteria(gko::stop::Iteration::build().with_max_iters(iteration))
         .on(exec);
 }
 
@@ -344,8 +388,7 @@ auto build_smoother(std::shared_ptr<const LinOp> solver,
     return Ir<ValueType>::build()
         .with_generated_solver(solver)
         .with_relaxation_factor(relaxation_factor)
-        .with_criteria(
-            gko::stop::Iteration::build().with_max_iters(iteration).on(exec))
+        .with_criteria(gko::stop::Iteration::build().with_max_iters(iteration))
         .on(exec);
 }
 
diff --git a/include/ginkgo/core/solver/multigrid.hpp b/include/ginkgo/core/solver/multigrid.hpp
index 2d04a889445..c78e54a773d 100644
--- a/include/ginkgo/core/solver/multigrid.hpp
+++ b/include/ginkgo/core/solver/multigrid.hpp
@@ -215,14 +215,12 @@ class Multigrid : public EnableLinOp<Multigrid>,
      */
     void set_cycle(multigrid::cycle cycle) { parameters_.cycle = cycle; }
 
-    GKO_CREATE_FACTORY_PARAMETERS(parameters, Factory)
-    {
-        /**
-         * Criterion factories.
-         */
-        std::vector<std::shared_ptr<const stop::CriterionFactory>>
-            GKO_FACTORY_PARAMETER_VECTOR(criteria, nullptr);
 
+    class Factory;
+
+    struct parameters_type
+        : public enable_iterative_solver_factory_parameters<parameters_type,
+                                                            Factory> {
         /**
          * MultigridLevel Factory list
          */
diff --git a/include/ginkgo/core/solver/solver_base.hpp b/include/ginkgo/core/solver/solver_base.hpp
index 53909337554..f527978c200 100644
--- a/include/ginkgo/core/solver/solver_base.hpp
+++ b/include/ginkgo/core/solver/solver_base.hpp
@@ -35,11 +35,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <memory>
+#include <type_traits>
 #include <utility>
 
 
 #include <ginkgo/core/base/lin_op.hpp>
 #include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/log/logger.hpp>
 #include <ginkgo/core/matrix/dense.hpp>
 #include <ginkgo/core/matrix/identity.hpp>
 #include <ginkgo/core/solver/workspace.hpp>
@@ -859,6 +861,153 @@ class EnablePreconditionedIterativeSolver
 };
 
 
+struct iterative_solver_factory_parameters {
+    /**
+     * Stopping criteria to be used by the solver.
+     */
+    std::vector<std::shared_ptr<const stop::CriterionFactory>> criteria{};
+};
+
+
+template <typename Parameters, typename Factory>
+struct enable_iterative_solver_factory_parameters
+    : enable_parameters_type<Parameters, Factory>,
+      iterative_solver_factory_parameters {
+    /**
+     * Provides stopping criteria via stop::CriterionFactory instances to be
+     * used by the iterative solver in a fluent interface.
+     */
+    template <typename... Args>
+    Parameters& with_criteria(Args... value)
+    {
+        this->criterion_generators = {build_generator(std::move(value))...};
+        return *self();
+    }
+
+    /**
+     * @copydoc enable_solver_factory_parameters<Parameters, Factory>::on
+     *
+     * @note This variant instantiates stopping criteria that were provided
+     *       without calling `.on(exec)` before generating the factory.
+     */
+    std::unique_ptr<Factory> on(std::shared_ptr<const Executor> exec) const
+    {
+        auto copy = *self();
+        copy.criteria.clear();
+        for (auto& generator : criterion_generators) {
+            copy.criteria.push_back(generator(exec));
+        }
+        auto factory =
+            copy.enable_parameters_type<Parameters, Factory>::on(exec);
+        return factory;
+    }
+
+private:
+    GKO_ENABLE_SELF(Parameters);
+
+    std::function<std::shared_ptr<const stop::CriterionFactory>(
+        std::shared_ptr<const Executor>)>
+    build_generator(std::shared_ptr<const stop::CriterionFactory> criterion)
+    {
+        return
+            [criterion](std::shared_ptr<const Executor>) { return criterion; };
+    }
+
+    template <typename CriterionParameters,
+              typename = decltype(std::declval<CriterionParameters>().on(
+                  std::shared_ptr<const Executor>{}))>
+    std::function<std::shared_ptr<const stop::CriterionFactory>(
+        std::shared_ptr<const Executor>)>
+    build_generator(CriterionParameters criterion_parameters)
+    {
+        return [criterion_parameters](std::shared_ptr<const Executor> exec) {
+            return criterion_parameters.on(exec);
+        };
+    }
+
+    std::vector<std::function<std::shared_ptr<const stop::CriterionFactory>(
+        std::shared_ptr<const Executor>)>>
+        criterion_generators;
+};
+
+
+struct preconditioned_iterative_solver_factory_parameters {
+    /**
+     * The preconditioner to be used by the iterative solver. By default, no
+     * preconditioner is used.
+     */
+    std::shared_ptr<const LinOpFactory> preconditioner{nullptr};
+
+    /**
+     * Already generated preconditioner. If one is provided, the factory
+     * `preconditioner` will be ignored.
+     */
+    std::shared_ptr<const LinOp> generated_preconditioner{nullptr};
+};
+
+
+template <typename Parameters, typename Factory>
+struct enable_preconditioned_iterative_solver_factory_parameters
+    : enable_iterative_solver_factory_parameters<Parameters, Factory>,
+      preconditioned_iterative_solver_factory_parameters {
+    /**
+     *
+     */
+    Parameters& with_preconditioner(
+        std::shared_ptr<const LinOpFactory> preconditioner)
+    {
+        this->preconditioner_generator =
+            [preconditioner](std::shared_ptr<const Executor>)
+            -> std::shared_ptr<const LinOpFactory> { return preconditioner; };
+        return *self();
+    }
+
+    template <typename PreconditionerParameters,
+              typename = decltype(std::declval<PreconditionerParameters>().on(
+                  std::shared_ptr<const Executor>{}))>
+    Parameters& with_preconditioner(
+        PreconditionerParameters preconditioner_parameters)
+    {
+        this->preconditioner_generator =
+            [preconditioner_parameters](std::shared_ptr<const Executor> exec)
+            -> std::shared_ptr<const LinOpFactory> {
+            return preconditioner_parameters.on(exec);
+        };
+        return *self();
+    }
+
+    /**
+     *
+     */
+    Parameters& with_generated_preconditioner(
+        std::shared_ptr<const LinOp> generated_preconditioner)
+    {
+        this->generated_preconditioner = std::move(generated_preconditioner);
+        return *self();
+    }
+
+    /**
+     *
+     */
+    std::unique_ptr<Factory> on(std::shared_ptr<const Executor> exec) const
+    {
+        auto parameters_copy = *self();
+        if (preconditioner_generator) {
+            parameters_copy.preconditioner = preconditioner_generator(exec);
+        }
+        return parameters_copy.enable_iterative_solver_factory_parameters<
+            Parameters, Factory>::on(exec);
+    }
+
+private:
+    GKO_ENABLE_SELF(Parameters);
+
+    std::function<std::shared_ptr<const LinOpFactory>(
+        std::shared_ptr<const Executor>)>
+        preconditioner_generator;
+};
+
+
 }  // namespace solver
 }  // namespace gko
 

From dd10f5f96ad600b28bcedaef04d61bdcb57f4fbd Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Thu, 11 May 2023 12:21:55 +0200
Subject: [PATCH 308/583] simplify deferred factory creation

---
 include/ginkgo/core/base/abstract_factory.hpp | 54 +++++++++++
 include/ginkgo/core/preconditioner/ic.hpp     | 61 +++----------
 include/ginkgo/core/preconditioner/ilu.hpp    | 91 ++++---------------
 include/ginkgo/core/solver/ir.hpp             | 25 +----
 include/ginkgo/core/solver/solver_base.hpp    | 55 ++---------
 5 files changed, 100 insertions(+), 186 deletions(-)

diff --git a/include/ginkgo/core/base/abstract_factory.hpp b/include/ginkgo/core/base/abstract_factory.hpp
index 1c5043c186f..3609a3f1205 100644
--- a/include/ginkgo/core/base/abstract_factory.hpp
+++ b/include/ginkgo/core/base/abstract_factory.hpp
@@ -274,6 +274,60 @@ class enable_parameters_type {
 };
 
 
+template <typename FactoryType>
+class deferred_factory_parameter {
+public:
+    deferred_factory_parameter() = default;
+
+    template <typename ConcreteFactoryType,
+              std::enable_if_t<std::is_base_of<
+                  FactoryType,
+                  std::remove_const_t<ConcreteFactoryType>>::value>* = nullptr>
+    deferred_factory_parameter(std::shared_ptr<ConcreteFactoryType> factory)
+    {
+        generator_ =
+            [factory = std::shared_ptr<const FactoryType>(std::move(factory))](
+                std::shared_ptr<const Executor>) { return factory; };
+    }
+
+    template <typename ConcreteFactoryType, typename Deleter,
+              std::enable_if_t<std::is_base_of<
+                  FactoryType,
+                  std::remove_const_t<ConcreteFactoryType>>::value>* = nullptr>
+    deferred_factory_parameter(
+        std::unique_ptr<ConcreteFactoryType, Deleter> factory)
+    {
+        generator_ =
+            [factory = std::shared_ptr<const FactoryType>(std::move(factory))](
+                std::shared_ptr<const Executor>) { return factory; };
+    }
+
+    template <typename ParametersType,
+              typename = decltype(std::declval<ParametersType>().on(
+                  std::shared_ptr<const Executor>{}))>
+    deferred_factory_parameter(ParametersType parameters)
+    {
+        generator_ = [parameters](std::shared_ptr<const Executor> exec)
+            -> std::shared_ptr<const FactoryType> {
+            return parameters.on(exec);
+        };
+    }
+
+    std::shared_ptr<const FactoryType> on(
+        std::shared_ptr<const Executor> exec) const
+    {
+        return generator_(exec);
+    }
+
+    explicit operator bool() const { return bool(generator_); }
+
+private:
+    std::function<std::shared_ptr<const FactoryType>(
+        std::shared_ptr<const Executor>)>
+        generator_;
+};
+
+
 }  // namespace gko
 
 
diff --git a/include/ginkgo/core/preconditioner/ic.hpp b/include/ginkgo/core/preconditioner/ic.hpp
index cb00119582a..ed5063d403b 100644
--- a/include/ginkgo/core/preconditioner/ic.hpp
+++ b/include/ginkgo/core/preconditioner/ic.hpp
@@ -136,61 +136,29 @@ class Ic : public EnableLinOp<Ic<LSolverType, IndexType>>, public Transposable {
 
         [[deprecated("use with_l_solver instead")]] parameters_type&
         with_l_solver_factory(
-            std::shared_ptr<const typename l_solver_type::Factory> factory)
+            deferred_factory_parameter<typename l_solver_type::Factory> solver)
         {
-            return with_l_solver(std::move(factory));
+            return with_l_solver(std::move(solver));
         }
 
         parameters_type& with_l_solver(
-            std::shared_ptr<const typename l_solver_type::Factory> factory)
+            deferred_factory_parameter<typename l_solver_type::Factory> solver)
         {
-            this->l_solver_generator =
-                [factory](std::shared_ptr<const Executor>)
-                -> std::shared_ptr<const typename l_solver_type::Factory> {
-                return factory;
-            };
-            return *this;
-        }
-
-        template <typename SolverParameters,
-                  typename = decltype(std::declval<SolverParameters>().on(
-                      std::shared_ptr<const Executor>{}))>
-        parameters_type& with_l_solver(SolverParameters parameters)
-        {
-            this->l_solver_generator =
-                [parameters](std::shared_ptr<const Executor> exec)
-                -> std::shared_ptr<const typename l_solver_type::Factory> {
-                return parameters.on(exec);
-            };
+            this->l_solver_generator = std::move(solver);
             return *this;
         }
 
         [[deprecated("use with_factorization instead")]] parameters_type&
-        with_factorization_factory(std::shared_ptr<const LinOpFactory> factory)
+        with_factorization_factory(
+            deferred_factory_parameter<LinOpFactory> factorization)
         {
-            return with_factorization(std::move(factory));
+            return with_factorization(std::move(factorization));
         }
 
         parameters_type& with_factorization(
-            std::shared_ptr<const LinOpFactory> factory)
-        {
-            this->factorization_generator =
-                [factory](std::shared_ptr<const Executor>)
-                -> std::shared_ptr<const LinOpFactory> { return factory; };
-            return *this;
-        }
-
-        template <
-            typename FactorizationParameters,
-            typename = decltype(std::declval<FactorizationParameters>().on(
-                std::shared_ptr<const Executor>{}))>
-        parameters_type& with_factorization(FactorizationParameters parameters)
+            deferred_factory_parameter<LinOpFactory> factorization)
         {
-            this->factorization_generator =
-                [parameters](std::shared_ptr<const Executor> exec)
-                -> std::shared_ptr<const LinOpFactory> {
-                return parameters.on(exec);
-            };
+            this->factorization_generator = std::move(factorization);
             return *this;
         }
 
@@ -201,24 +169,21 @@ class Ic : public EnableLinOp<Ic<LSolverType, IndexType>>, public Transposable {
         {
             auto parameters_copy = *this;
             if (l_solver_generator) {
-                parameters_copy.l_solver_factory = l_solver_generator(exec);
+                parameters_copy.l_solver_factory = l_solver_generator.on(exec);
             }
             if (factorization_generator) {
                 parameters_copy.factorization_factory =
-                    factorization_generator(exec);
+                    factorization_generator.on(exec);
             }
             return parameters_copy
                 .enable_parameters_type<parameters_type, Factory>::on(exec);
         }
 
     private:
-        std::function<std::shared_ptr<const typename l_solver_type::Factory>(
-            std::shared_ptr<const Executor>)>
+        deferred_factory_parameter<typename l_solver_type::Factory>
             l_solver_generator;
 
-        std::function<std::shared_ptr<const LinOpFactory>(
-            std::shared_ptr<const Executor>)>
-            factorization_generator;
+        deferred_factory_parameter<LinOpFactory> factorization_generator;
     };
 
     GKO_ENABLE_LIN_OP_FACTORY(Ic, parameters, Factory);
diff --git a/include/ginkgo/core/preconditioner/ilu.hpp b/include/ginkgo/core/preconditioner/ilu.hpp
index bd6d665b009..f4f8d0abd5b 100644
--- a/include/ginkgo/core/preconditioner/ilu.hpp
+++ b/include/ginkgo/core/preconditioner/ilu.hpp
@@ -154,92 +154,43 @@ class Ilu : public EnableLinOp<
 
         [[deprecated("use with_l_solver instead")]] parameters_type&
         with_l_solver_factory(
-            std::shared_ptr<const typename l_solver_type::Factory> factory)
+            deferred_factory_parameter<typename l_solver_type::Factory> solver)
         {
-            return with_l_solver(std::move(factory));
+            return with_l_solver(std::move(solver));
         }
 
         parameters_type& with_l_solver(
-            std::shared_ptr<const typename l_solver_type::Factory> factory)
+            deferred_factory_parameter<typename l_solver_type::Factory> solver)
         {
-            this->l_solver_generator =
-                [factory](std::shared_ptr<const Executor>)
-                -> std::shared_ptr<const typename l_solver_type::Factory> {
-                return factory;
-            };
-            return *this;
-        }
-
-        template <typename SolverParameters,
-                  typename = decltype(std::declval<SolverParameters>().on(
-                      std::shared_ptr<const Executor>{}))>
-        parameters_type& with_l_solver(SolverParameters parameters)
-        {
-            this->l_solver_generator =
-                [parameters](std::shared_ptr<const Executor> exec)
-                -> std::shared_ptr<const typename l_solver_type::Factory> {
-                return parameters.on(exec);
-            };
+            this->l_solver_generator = std::move(solver);
             return *this;
         }
 
         [[deprecated("use with_u_solver instead")]] parameters_type&
         with_u_solver_factory(
-            std::shared_ptr<const typename u_solver_type::Factory> factory)
+            deferred_factory_parameter<typename u_solver_type::Factory> solver)
         {
-            return with_u_solver(std::move(factory));
+            return with_u_solver(std::move(solver));
         }
 
         parameters_type& with_u_solver(
-            std::shared_ptr<const typename u_solver_type::Factory> factory)
-        {
-            this->u_solver_generator =
-                [factory](std::shared_ptr<const Executor>)
-                -> std::shared_ptr<const typename u_solver_type::Factory> {
-                return factory;
-            };
-            return *this;
-        }
-
-        template <typename SolverParameters,
-                  typename = decltype(std::declval<SolverParameters>().on(
-                      std::shared_ptr<const Executor>{}))>
-        parameters_type& with_u_solver(SolverParameters parameters)
+            deferred_factory_parameter<typename u_solver_type::Factory> solver)
         {
-            this->u_solver_generator =
-                [parameters](std::shared_ptr<const Executor> exec)
-                -> std::shared_ptr<const typename u_solver_type::Factory> {
-                return parameters.on(exec);
-            };
+            this->u_solver_generator = std::move(solver);
             return *this;
         }
 
         [[deprecated("use with_factorization instead")]] parameters_type&
-        with_factorization_factory(std::shared_ptr<const LinOpFactory> factory)
+        with_factorization_factory(
+            deferred_factory_parameter<LinOpFactory> factorization)
         {
-            return with_factorization(std::move(factory));
+            return with_factorization(std::move(factorization));
         }
 
         parameters_type& with_factorization(
-            std::shared_ptr<const LinOpFactory> factory)
-        {
-            this->factorization_generator =
-                [factory](std::shared_ptr<const Executor>)
-                -> std::shared_ptr<const LinOpFactory> { return factory; };
-            return *this;
-        }
-
-        template <
-            typename FactorizationParameters,
-            typename = decltype(std::declval<FactorizationParameters>().on(
-                std::shared_ptr<const Executor>{}))>
-        parameters_type& with_factorization(FactorizationParameters parameters)
+            deferred_factory_parameter<LinOpFactory> factorization)
         {
-            this->factorization_generator =
-                [parameters](std::shared_ptr<const Executor> exec)
-                -> std::shared_ptr<const LinOpFactory> {
-                return parameters.on(exec);
-            };
+            this->factorization_generator = std::move(factorization);
             return *this;
         }
 
@@ -250,31 +201,27 @@ class Ilu : public EnableLinOp<
         {
             auto parameters_copy = *this;
             if (l_solver_generator) {
-                parameters_copy.l_solver_factory = l_solver_generator(exec);
+                parameters_copy.l_solver_factory = l_solver_generator.on(exec);
             }
             if (u_solver_generator) {
-                parameters_copy.u_solver_factory = u_solver_generator(exec);
+                parameters_copy.u_solver_factory = u_solver_generator.on(exec);
             }
             if (factorization_generator) {
                 parameters_copy.factorization_factory =
-                    factorization_generator(exec);
+                    factorization_generator.on(exec);
             }
             return parameters_copy
                 .enable_parameters_type<parameters_type, Factory>::on(exec);
         }
 
     private:
-        std::function<std::shared_ptr<const typename l_solver_type::Factory>(
-            std::shared_ptr<const Executor>)>
+        deferred_factory_parameter<typename l_solver_type::Factory>
             l_solver_generator;
 
-        std::function<std::shared_ptr<const typename u_solver_type::Factory>(
-            std::shared_ptr<const Executor>)>
+        deferred_factory_parameter<typename u_solver_type::Factory>
             u_solver_generator;
 
-        std::function<std::shared_ptr<const LinOpFactory>(
-            std::shared_ptr<const Executor>)>
-            factorization_generator;
+        deferred_factory_parameter<LinOpFactory> factorization_generator;
     };
 
     GKO_ENABLE_LIN_OP_FACTORY(Ilu, parameters, Factory);
diff --git a/include/ginkgo/core/solver/ir.hpp b/include/ginkgo/core/solver/ir.hpp
index 792a0cdcfc6..d30fd9d69bc 100644
--- a/include/ginkgo/core/solver/ir.hpp
+++ b/include/ginkgo/core/solver/ir.hpp
@@ -208,23 +208,10 @@ class Ir : public EnableLinOp<Ir<ValueType>>,
         /**
          *
          */
-        parameters_type& with_solver(std::shared_ptr<const LinOpFactory> solver)
+        parameters_type& with_solver(
+            deferred_factory_parameter<LinOpFactory> solver)
         {
-            this->solver_generator = [solver](std::shared_ptr<const Executor>)
-                -> std::shared_ptr<const LinOpFactory> { return solver; };
-            return *this;
-        }
-
-        template <typename SolverParameters,
-                  typename = decltype(std::declval<SolverParameters>().on(
-                      std::shared_ptr<const Executor>{}))>
-        parameters_type& with_solver(SolverParameters solver_parameters)
-        {
-            this->solver_generator =
-                [solver_parameters](std::shared_ptr<const Executor> exec)
-                -> std::shared_ptr<const LinOpFactory> {
-                return solver_parameters.on(exec);
-            };
+            this->solver_generator = std::move(solver);
             return *this;
         }
 
@@ -245,16 +232,14 @@ class Ir : public EnableLinOp<Ir<ValueType>>,
         {
             auto parameters_copy = *this;
             if (solver_generator) {
-                parameters_copy.solver = solver_generator(exec);
+                parameters_copy.solver = solver_generator.on(exec);
             }
             return parameters_copy.enable_iterative_solver_factory_parameters<
                 parameters_type, Factory>::on(exec);
         }
 
     private:
-        std::function<std::shared_ptr<const LinOpFactory>(
-            std::shared_ptr<const Executor>)>
-            solver_generator;
+        deferred_factory_parameter<LinOpFactory> solver_generator;
     };
     GKO_ENABLE_LIN_OP_FACTORY(Ir, parameters, Factory);
     GKO_ENABLE_BUILD_METHOD(Factory);
diff --git a/include/ginkgo/core/solver/solver_base.hpp b/include/ginkgo/core/solver/solver_base.hpp
index f527978c200..2ed7375ac76 100644
--- a/include/ginkgo/core/solver/solver_base.hpp
+++ b/include/ginkgo/core/solver/solver_base.hpp
@@ -880,7 +880,9 @@ struct enable_iterative_solver_factory_parameters
     template <typename... Args>
     Parameters& with_criteria(Args... value)
     {
-        this->criterion_generators = {build_generator(std::move(value))...};
+        this->criterion_generators = {
+            deferred_factory_parameter<stop::CriterionFactory>{
+                std::move(value)}...};
         return *self();
     }
 
@@ -895,7 +897,7 @@ struct enable_iterative_solver_factory_parameters
         auto copy = *self();
         copy.criteria.clear();
         for (auto& generator : criterion_generators) {
-            copy.criteria.push_back(generator(exec));
+            copy.criteria.push_back(generator.on(exec));
         }
         auto factory =
             copy.enable_parameters_type<Parameters, Factory>::on(exec);
@@ -905,28 +907,7 @@ struct enable_iterative_solver_factory_parameters
 private:
     GKO_ENABLE_SELF(Parameters);
 
-    std::function<std::shared_ptr<const stop::CriterionFactory>(
-        std::shared_ptr<const Executor>)>
-    build_generator(std::shared_ptr<const stop::CriterionFactory> criterion)
-    {
-        return
-            [criterion](std::shared_ptr<const Executor>) { return criterion; };
-    }
-
-    template <typename CriterionParameters,
-              typename = decltype(std::declval<CriterionParameters>().on(
-                  std::shared_ptr<const Executor>{}))>
-    std::function<std::shared_ptr<const stop::CriterionFactory>(
-        std::shared_ptr<const Executor>)>
-    build_generator(CriterionParameters criterion_parameters)
-    {
-        return [criterion_parameters](std::shared_ptr<const Executor> exec) {
-            return criterion_parameters.on(exec);
-        };
-    }
-
-    std::vector<std::function<std::shared_ptr<const stop::CriterionFactory>(
-        std::shared_ptr<const Executor>)>>
+    std::vector<deferred_factory_parameter<stop::CriterionFactory>>
         criterion_generators;
 };
 
@@ -954,25 +935,9 @@ struct enable_preconditioned_iterative_solver_factory_parameters
      *
      */
     Parameters& with_preconditioner(
-        std::shared_ptr<const LinOpFactory> preconditioner)
-    {
-        this->preconditioner_generator =
-            [preconditioner](std::shared_ptr<const Executor>)
-            -> std::shared_ptr<const LinOpFactory> { return preconditioner; };
-        return *self();
-    }
-
-    template <typename PreconditionerParameters,
-              typename = decltype(std::declval<PreconditionerParameters>().on(
-                  std::shared_ptr<const Executor>{}))>
-    Parameters& with_preconditioner(
-        PreconditionerParameters preconditioner_parameters)
+        deferred_factory_parameter<LinOpFactory> preconditioner)
     {
-        this->preconditioner_generator =
-            [preconditioner_parameters](std::shared_ptr<const Executor> exec)
-            -> std::shared_ptr<const LinOpFactory> {
-            return preconditioner_parameters.on(exec);
-        };
+        this->preconditioner_generator = std::move(preconditioner);
         return *self();
     }
 
@@ -993,7 +958,7 @@ struct enable_preconditioned_iterative_solver_factory_parameters
     {
         auto parameters_copy = *self();
         if (preconditioner_generator) {
-            parameters_copy.preconditioner = preconditioner_generator(exec);
+            parameters_copy.preconditioner = preconditioner_generator.on(exec);
         }
         return parameters_copy.enable_iterative_solver_factory_parameters<
             Parameters, Factory>::on(exec);
@@ -1002,9 +967,7 @@ struct enable_preconditioned_iterative_solver_factory_parameters
 private:
     GKO_ENABLE_SELF(Parameters);
 
-    std::function<std::shared_ptr<const LinOpFactory>(
-        std::shared_ptr<const Executor>)>
-        preconditioner_generator;
+    deferred_factory_parameter<LinOpFactory> preconditioner_generator;
 };
 
 

From d6895fdc267edb92ead75b38e1cbbb9667bdb508 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Tue, 8 Aug 2023 16:43:05 +0200
Subject: [PATCH 309/583] handle Multigrid

---
 include/ginkgo/core/base/abstract_factory.hpp |   5 +
 include/ginkgo/core/solver/multigrid.hpp      | 117 ++++++++++++++++--
 include/ginkgo/core/solver/solver_base.hpp    |   4 +-
 3 files changed, 113 insertions(+), 13 deletions(-)

diff --git a/include/ginkgo/core/base/abstract_factory.hpp b/include/ginkgo/core/base/abstract_factory.hpp
index 3609a3f1205..e8ec803b480 100644
--- a/include/ginkgo/core/base/abstract_factory.hpp
+++ b/include/ginkgo/core/base/abstract_factory.hpp
@@ -279,6 +279,8 @@ class deferred_factory_parameter {
 public:
     deferred_factory_parameter() = default;
 
+    deferred_factory_parameter(std::nullptr_t) {}
+
     template <typename ConcreteFactoryType,
               std::enable_if_t<std::is_base_of<
                   FactoryType,
@@ -316,6 +318,9 @@ class deferred_factory_parameter {
     std::shared_ptr<const FactoryType> on(
         std::shared_ptr<const Executor> exec) const
     {
+        if (!(*this)) {
+            GKO_NOT_SUPPORTED(*this);
+        }
         return generator_(exec);
     }
 
diff --git a/include/ginkgo/core/solver/multigrid.hpp b/include/ginkgo/core/solver/multigrid.hpp
index c78e54a773d..5aab788f71f 100644
--- a/include/ginkgo/core/solver/multigrid.hpp
+++ b/include/ginkgo/core/solver/multigrid.hpp
@@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <utility>
 
 
+#include <ginkgo/core/base/abstract_factory.hpp>
 #include <ginkgo/core/base/array.hpp>
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/lin_op.hpp>
@@ -224,8 +225,16 @@ class Multigrid : public EnableLinOp<Multigrid>,
         /**
          * MultigridLevel Factory list
          */
-        std::vector<std::shared_ptr<const gko::LinOpFactory>>
-            GKO_FACTORY_PARAMETER_VECTOR(mg_level, nullptr);
+        std::vector<std::shared_ptr<const LinOpFactory>> mg_level{nullptr};
+
+        template <typename... Args>
+        parameters_type& with_mg_level(Args&&... level)
+        {
+            this->mg_level_generator = {
+                deferred_factory_parameter<LinOpFactory>{
+                    std::forward<Args>(level)}...};
+            return *this;
+        }
 
         /**
          * Custom selector size_type (size_type level, const LinOp* fine_matrix)
@@ -256,6 +265,7 @@ class Multigrid : public EnableLinOp<Multigrid>,
         std::function<size_type(const size_type, const LinOp*)>
             GKO_FACTORY_PARAMETER_SCALAR(level_selector, nullptr);
 
+        using smoother_list = std::vector<std::shared_ptr<const LinOpFactory>>;
         /**
          * Pre-smooth Factory list.
          * Its size must be 0, 1 or be the same as mg_level's.
@@ -270,17 +280,14 @@ class Multigrid : public EnableLinOp<Multigrid>,
          * If any element in the vector is a `nullptr` then the smoother
          * application at the corresponding level is skipped.
          */
-        using smoother_list = std::vector<std::shared_ptr<const LinOpFactory>>;
-        smoother_list GKO_FACTORY_PARAMETER_VECTOR(pre_smoother,
-                                                   smoother_list{});
+        smoother_list pre_smoother{};
 
         /**
          * Post-smooth Factory list.
          * It is similar to Pre-smooth Factory list. It is ignored if
          * the factory parameter post_uses_pre is set to true.
          */
-        smoother_list GKO_FACTORY_PARAMETER_VECTOR(post_smoother,
-                                                   smoother_list{});
+        smoother_list post_smoother{};
 
         /**
          * Mid-smooth Factory list. If it contains available elements, multigrid
@@ -289,8 +296,34 @@ class Multigrid : public EnableLinOp<Multigrid>,
          * Pre-smooth Factory list. It is ignored if the factory parameter
          * mid_case is not mid.
          */
-        smoother_list GKO_FACTORY_PARAMETER_VECTOR(mid_smoother,
-                                                   smoother_list{});
+        smoother_list mid_smoother{};
+
+        template <typename... Args>
+        parameters_type& with_pre_smoother(Args&&... smoother)
+        {
+            this->pre_smoother_generator = {
+                deferred_factory_parameter<LinOpFactory>{
+                    std::forward<Args>(smoother)}...};
+            return *this;
+        }
+
+        template <typename... Args>
+        parameters_type& with_post_smoother(Args&&... smoother)
+        {
+            this->post_smoother_generator = {
+                deferred_factory_parameter<LinOpFactory>{
+                    std::forward<Args>(smoother)}...};
+            return *this;
+        }
+
+        template <typename... Args>
+        parameters_type& with_mid_smoother(Args&&... smoother)
+        {
+            this->mid_smoother_generator = {
+                deferred_factory_parameter<LinOpFactory>{
+                    std::forward<Args>(smoother)}...};
+            return *this;
+        }
 
         /**
          * Whether post-smoothing-related calls use corresponding
@@ -330,8 +363,17 @@ class Multigrid : public EnableLinOp<Multigrid>,
          * If not set, then a direct LU solver will be used as solver on the
          * coarsest level.
          */
-        std::vector<std::shared_ptr<const LinOpFactory>>
-            GKO_FACTORY_PARAMETER_VECTOR(coarsest_solver, nullptr);
+        std::vector<std::shared_ptr<const LinOpFactory>> coarsest_solver{
+            nullptr};
+
+        template <typename... Args>
+        parameters_type& with_coarsest_solver(Args&&... solver)
+        {
+            this->coarsest_solver_generator = {
+                deferred_factory_parameter<LinOpFactory>{
+                    std::forward<Args>(solver)}...};
+            return *this;
+        }
 
         /**
          * Custom coarsest_solver selector
@@ -403,6 +445,59 @@ class Multigrid : public EnableLinOp<Multigrid>,
          */
         initial_guess_mode GKO_FACTORY_PARAMETER_SCALAR(
             default_initial_guess, initial_guess_mode::zero);
+
+        std::unique_ptr<Factory> on(std::shared_ptr<const Executor> exec) const
+        {
+            auto copy = *this;
+            if (!copy.mg_level_generator.empty()) {
+                copy.mg_level.clear();
+                for (auto& generator : copy.mg_level_generator) {
+                    copy.mg_level.push_back(generator.on(exec));
+                }
+            }
+            if (!copy.pre_smoother_generator.empty()) {
+                copy.pre_smoother.clear();
+                for (auto& generator : copy.pre_smoother_generator) {
+                    copy.pre_smoother.push_back(generator ? generator.on(exec)
+                                                          : nullptr);
+                }
+            }
+            if (!copy.mid_smoother_generator.empty()) {
+                copy.mid_smoother.clear();
+                for (auto& generator : copy.mid_smoother_generator) {
+                    copy.mid_smoother.push_back(generator ? generator.on(exec)
+                                                          : nullptr);
+                }
+            }
+            if (!copy.post_smoother_generator.empty()) {
+                copy.post_smoother.clear();
+                for (auto& generator : copy.post_smoother_generator) {
+                    copy.post_smoother.push_back(generator ? generator.on(exec)
+                                                           : nullptr);
+                }
+            }
+            if (!copy.coarsest_solver_generator.empty()) {
+                copy.coarsest_solver.clear();
+                for (auto& generator : copy.coarsest_solver_generator) {
+                    copy.coarsest_solver.push_back(
+                        generator ? generator.on(exec) : nullptr);
+                }
+            }
+            return copy.enable_iterative_solver_factory_parameters<
+                parameters_type, Factory>::on(exec);
+        }
+
+    private:
+        std::vector<deferred_factory_parameter<LinOpFactory>>
+            mg_level_generator;
+        std::vector<deferred_factory_parameter<LinOpFactory>>
+            pre_smoother_generator;
+        std::vector<deferred_factory_parameter<LinOpFactory>>
+            mid_smoother_generator;
+        std::vector<deferred_factory_parameter<LinOpFactory>>
+            post_smoother_generator;
+        std::vector<deferred_factory_parameter<LinOpFactory>>
+            coarsest_solver_generator;
     };
     GKO_ENABLE_LIN_OP_FACTORY(Multigrid, parameters, Factory);
     GKO_ENABLE_BUILD_METHOD(Factory);
diff --git a/include/ginkgo/core/solver/solver_base.hpp b/include/ginkgo/core/solver/solver_base.hpp
index 2ed7375ac76..f9132426c61 100644
--- a/include/ginkgo/core/solver/solver_base.hpp
+++ b/include/ginkgo/core/solver/solver_base.hpp
@@ -878,11 +878,11 @@ struct enable_iterative_solver_factory_parameters
      * used by the iterative solver in a fluent interface.
      */
     template <typename... Args>
-    Parameters& with_criteria(Args... value)
+    Parameters& with_criteria(Args&&... value)
     {
         this->criterion_generators = {
             deferred_factory_parameter<stop::CriterionFactory>{
-                std::move(value)}...};
+                std::forward<Args>(value)}...};
         return *self();
     }
 

From 05fad8e878439e8cb8f68d11f4715d36e75e4160 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Wed, 10 May 2023 19:46:12 +0200
Subject: [PATCH 310/583] remove unnecessary .on(...) calls

---
 benchmark/solver/solver_common.hpp            |  9 +--
 benchmark/utils/overhead_linop.hpp            | 27 ++------
 benchmark/utils/preconditioners.hpp           | 45 +++++++-------
 core/distributed/preconditioner/schwarz.cpp   |  4 +-
 core/preconditioner/isai.cpp                  | 10 ++-
 core/solver/multigrid.cpp                     | 14 ++---
 core/test/log/convergence.cpp                 |  3 +-
 core/test/log/profiler_hook.cpp               |  3 +-
 core/test/log/record.cpp                      |  9 +--
 core/test/log/stream.cpp                      |  9 +--
 .../distributed/preconditioner/schwarz.cpp    | 14 ++---
 core/test/preconditioner/ic.cpp               |  7 +--
 core/test/preconditioner/ilu.cpp              | 12 ++--
 core/test/solver/bicg.cpp                     |  5 +-
 core/test/solver/bicgstab.cpp                 |  5 +-
 core/test/solver/cb_gmres.cpp                 | 11 ++--
 core/test/solver/cg.cpp                       |  5 +-
 core/test/solver/cgs.cpp                      |  5 +-
 core/test/solver/fcg.cpp                      |  5 +-
 core/test/solver/gcr.cpp                      | 20 +++---
 core/test/solver/gmres.cpp                    | 11 ++--
 core/test/solver/idr.cpp                      |  5 +-
 core/test/solver/ir.cpp                       | 14 ++---
 core/test/solver/multigrid.cpp                | 32 +++++-----
 .../adaptiveprecision-blockjacobi.cpp         | 16 ++---
 examples/cb-gmres/cb-gmres.cpp                | 20 +++---
 examples/custom-logger/custom-logger.cpp      |  8 +--
 .../custom-matrix-format.cpp                  | 10 ++-
 .../custom-stopping-criterion.cpp             | 13 ++--
 .../distributed-solver/distributed-solver.cpp | 22 +++----
 .../external-lib-interfacing.cpp              |  8 +--
 examples/ginkgo-overhead/ginkgo-overhead.cpp  |  3 +-
 examples/heat-equation/heat-equation.cpp      |  5 +-
 .../ilu-preconditioned-solver.cpp             |  8 +--
 .../inverse-iteration/inverse-iteration.cpp   |  8 +--
 .../ir-ilu-preconditioned-solver.cpp          | 12 ++--
 .../iterative-refinement.cpp                  | 16 ++---
 examples/kokkos_assembly/kokkos_assembly.cpp  | 12 ++--
 .../minimal-cuda-solver.cpp                   |  8 +--
 .../mixed-multigrid-preconditioned-solver.cpp | 18 ++----
 .../mixed-multigrid-solver.cpp                | 20 +++---
 .../mixed-precision-ir/mixed-precision-ir.cpp | 10 ++-
 ...igrid-preconditioned-solver-customized.cpp |  6 +-
 .../multigrid-preconditioned-solver.cpp       | 15 ++---
 .../nine-pt-stencil-solver.cpp                | 10 ++-
 examples/papi-logging/papi-logging.cpp        |  8 +--
 .../performance-debugging.cpp                 |  3 +-
 examples/poisson-solver/poisson-solver.cpp    | 12 ++--
 .../preconditioned-solver.cpp                 | 10 ++-
 .../preconditioner-export.cpp                 | 36 +++++------
 .../simple-solver-logging.cpp                 |  5 +-
 examples/simple-solver/simple-solver.cpp      |  8 +--
 .../three-pt-stencil-solver.cpp               | 10 ++-
 .../distributed/preconditioner/schwarz.hpp    | 22 ++++++-
 reference/test/preconditioner/ilu.cpp         | 48 ++++++--------
 .../test/preconditioner/isai_kernels.cpp      |  9 +--
 reference/test/reorder/scaled_reordered.cpp   |  6 +-
 reference/test/solver/bicg_kernels.cpp        | 31 ++++------
 reference/test/solver/bicgstab_kernels.cpp    | 31 ++++------
 reference/test/solver/cb_gmres_kernels.cpp    | 17 ++---
 reference/test/solver/cg_kernels.cpp          | 32 ++++------
 reference/test/solver/cgs_kernels.cpp         | 27 +++-----
 reference/test/solver/direct.cpp              |  3 +-
 reference/test/solver/fcg_kernels.cpp         | 31 ++++------
 reference/test/solver/gcr_kernels.cpp         | 33 ++++------
 reference/test/solver/gmres_kernels.cpp       | 22 +++----
 reference/test/solver/idr_kernels.cpp         | 32 ++++------
 reference/test/solver/ir_kernels.cpp          | 15 ++---
 reference/test/solver/multigrid_kernels.cpp   | 42 +++++--------
 test/mpi/preconditioner/schwarz.cpp           | 16 +++--
 test/mpi/solver/solver.cpp                    | 20 ++----
 test/solver/bicg_kernels.cpp                  | 32 ++++------
 test/solver/bicgstab_kernels.cpp              | 10 ++-
 test/solver/cg_kernels.cpp                    | 16 ++---
 test/solver/cgs_kernels.cpp                   | 10 ++-
 test/solver/direct.cpp                        | 24 +++----
 test/solver/fcg_kernels.cpp                   | 16 ++---
 test/solver/gcr_kernels.cpp                   | 10 ++-
 test/solver/gmres_kernels.cpp                 | 10 ++-
 test/solver/idr_kernels.cpp                   | 18 ++----
 test/solver/ir_kernels.cpp                    | 62 ++++++-------------
 test/solver/solver.cpp                        | 25 +++-----
 test/test_install/test_install.cpp            | 19 +++---
 83 files changed, 517 insertions(+), 796 deletions(-)

diff --git a/benchmark/solver/solver_common.hpp b/benchmark/solver/solver_common.hpp
index 0248ab8e757..784b70eca61 100644
--- a/benchmark/solver/solver_common.hpp
+++ b/benchmark/solver/solver_common.hpp
@@ -239,21 +239,18 @@ std::unique_ptr<gko::LinOpFactory> generate_solver(
         return gko::experimental::solver::Direct<etype, itype>::build()
             .with_factorization(
                 gko::experimental::factorization::Cholesky<etype,
-                                                           itype>::build()
-                    .on(exec))
+                                                           itype>::build())
             .on(exec);
     } else if (description == "symm_direct") {
         return gko::experimental::solver::Direct<etype, itype>::build()
             .with_factorization(
                 gko::experimental::factorization::Lu<etype, itype>::build()
-                    .with_symmetric_sparsity(true)
-                    .on(exec))
+                    .with_symmetric_sparsity(true))
             .on(exec);
     } else if (description == "direct") {
         return gko::experimental::solver::Direct<etype, itype>::build()
             .with_factorization(
-                gko::experimental::factorization::Lu<etype, itype>::build().on(
-                    exec))
+                gko::experimental::factorization::Lu<etype, itype>::build())
             .on(exec);
     } else if (description == "overhead") {
         return add_criteria_precond_finalize<gko::Overhead<etype>>(
diff --git a/benchmark/utils/overhead_linop.hpp b/benchmark/utils/overhead_linop.hpp
index 168e650234d..d947b8de38e 100644
--- a/benchmark/utils/overhead_linop.hpp
+++ b/benchmark/utils/overhead_linop.hpp
@@ -104,27 +104,12 @@ class Overhead : public EnableLinOp<Overhead<ValueType>>,
     friend class EnablePolymorphicObject<Overhead, LinOp>;
 
 public:
-    GKO_CREATE_FACTORY_PARAMETERS(parameters, Factory)
-    {
-        /**
-         * Criterion factories.
-         */
-        std::vector<std::shared_ptr<const stop::CriterionFactory>>
-            GKO_FACTORY_PARAMETER_VECTOR(criteria, nullptr);
-
-        /**
-         * Preconditioner factory.
-         */
-        std::shared_ptr<const LinOpFactory> GKO_FACTORY_PARAMETER_SCALAR(
-            preconditioner, nullptr);
-
-        /**
-         * Already generated preconditioner. If one is provided, the factory
-         * `preconditioner` will be ignored.
-         */
-        std::shared_ptr<const LinOp> GKO_FACTORY_PARAMETER_SCALAR(
-            generated_preconditioner, nullptr);
-    };
+    class Factory;
+
+    struct parameters_type
+        : public gko::solver::
+              enable_preconditioned_iterative_solver_factory_parameters<
+                  parameters_type, Factory> {};
 
     GKO_ENABLE_LIN_OP_FACTORY(Overhead, parameters, Factory);
     GKO_ENABLE_BUILD_METHOD(Factory);
diff --git a/benchmark/utils/preconditioners.hpp b/benchmark/utils/preconditioners.hpp
index 466d5f2d3f9..3450eb71b44 100644
--- a/benchmark/utils/preconditioners.hpp
+++ b/benchmark/utils/preconditioners.hpp
@@ -122,7 +122,7 @@ const std::map<std::string, std::function<std::unique_ptr<gko::LinOpFactory>(
                                 .on(exec));
              return gko::preconditioner::Ic<gko::solver::LowerTrs<etype, itype>,
                                             itype>::build()
-                 .with_factorization_factory(fact)
+                 .with_factorization(fact)
                  .on(exec);
          }},
         {"parict",
@@ -137,7 +137,7 @@ const std::map<std::string, std::function<std::unique_ptr<gko::LinOpFactory>(
              return gko::preconditioner::
                  Ilu<gko::solver::LowerTrs<etype, itype>,
                      gko::solver::UpperTrs<etype, itype>, false, itype>::build()
-                     .with_factorization_factory(fact)
+                     .with_factorization(fact)
                      .on(exec);
          }},
         {"parilu",
@@ -150,7 +150,7 @@ const std::map<std::string, std::function<std::unique_ptr<gko::LinOpFactory>(
              return gko::preconditioner::
                  Ilu<gko::solver::LowerTrs<etype, itype>,
                      gko::solver::UpperTrs<etype, itype>, false, itype>::build()
-                     .with_factorization_factory(fact)
+                     .with_factorization(fact)
                      .on(exec);
          }},
         {"parilut",
@@ -165,7 +165,7 @@ const std::map<std::string, std::function<std::unique_ptr<gko::LinOpFactory>(
              return gko::preconditioner::
                  Ilu<gko::solver::LowerTrs<etype, itype>,
                      gko::solver::UpperTrs<etype, itype>, false, itype>::build()
-                     .with_factorization_factory(fact)
+                     .with_factorization(fact)
                      .on(exec);
          }},
         {"ic",
@@ -174,7 +174,7 @@ const std::map<std::string, std::function<std::unique_ptr<gko::LinOpFactory>(
                  gko::factorization::Ic<etype, itype>::build().on(exec));
              return gko::preconditioner::Ic<gko::solver::LowerTrs<etype, itype>,
                                             itype>::build()
-                 .with_factorization_factory(fact)
+                 .with_factorization(fact)
                  .on(exec);
          }},
         {"ilu",
@@ -184,7 +184,7 @@ const std::map<std::string, std::function<std::unique_ptr<gko::LinOpFactory>(
              return gko::preconditioner::
                  Ilu<gko::solver::LowerTrs<etype, itype>,
                      gko::solver::UpperTrs<etype, itype>, false, itype>::build()
-                     .with_factorization_factory(fact)
+                     .with_factorization(fact)
                      .on(exec);
          }},
         {"paric-isai",
@@ -201,8 +201,8 @@ const std::map<std::string, std::function<std::unique_ptr<gko::LinOpFactory>(
              return gko::preconditioner::Ic<
                         gko::preconditioner::LowerIsai<etype, itype>,
                         itype>::build()
-                 .with_factorization_factory(fact)
-                 .with_l_solver_factory(lisai)
+                 .with_factorization(fact)
+                 .with_l_solver(lisai)
                  .on(exec);
          }},
         {"parict-isai",
@@ -221,8 +221,8 @@ const std::map<std::string, std::function<std::unique_ptr<gko::LinOpFactory>(
              return gko::preconditioner::Ic<
                         gko::preconditioner::LowerIsai<etype, itype>,
                         itype>::build()
-                 .with_factorization_factory(fact)
-                 .with_l_solver_factory(lisai)
+                 .with_factorization(fact)
+                 .with_l_solver(lisai)
                  .on(exec);
          }},
         {"parilu-isai",
@@ -244,9 +244,9 @@ const std::map<std::string, std::function<std::unique_ptr<gko::LinOpFactory>(
                         gko::preconditioner::LowerIsai<etype, itype>,
                         gko::preconditioner::UpperIsai<etype, itype>, false,
                         itype>::build()
-                 .with_factorization_factory(fact)
-                 .with_l_solver_factory(lisai)
-                 .with_u_solver_factory(uisai)
+                 .with_factorization(fact)
+                 .with_l_solver(lisai)
+                 .with_u_solver(uisai)
                  .on(exec);
          }},
         {"parilut-isai",
@@ -270,9 +270,9 @@ const std::map<std::string, std::function<std::unique_ptr<gko::LinOpFactory>(
                         gko::preconditioner::LowerIsai<etype, itype>,
                         gko::preconditioner::UpperIsai<etype, itype>, false,
                         itype>::build()
-                 .with_factorization_factory(fact)
-                 .with_l_solver_factory(lisai)
-                 .with_u_solver_factory(uisai)
+                 .with_factorization(fact)
+                 .with_l_solver(lisai)
+                 .with_u_solver(uisai)
                  .on(exec);
          }},
         {"ic-isai",
@@ -286,8 +286,8 @@ const std::map<std::string, std::function<std::unique_ptr<gko::LinOpFactory>(
              return gko::preconditioner::Ic<
                         gko::preconditioner::LowerIsai<etype, itype>,
                         itype>::build()
-                 .with_factorization_factory(fact)
-                 .with_l_solver_factory(lisai)
+                 .with_factorization(fact)
+                 .with_l_solver(lisai)
                  .on(exec);
          }},
         {"ilu-isai",
@@ -306,9 +306,9 @@ const std::map<std::string, std::function<std::unique_ptr<gko::LinOpFactory>(
                         gko::preconditioner::LowerIsai<etype, itype>,
                         gko::preconditioner::UpperIsai<etype, itype>, false,
                         itype>::build()
-                 .with_factorization_factory(fact)
-                 .with_l_solver_factory(lisai)
-                 .with_u_solver_factory(uisai)
+                 .with_factorization(fact)
+                 .with_l_solver(lisai)
+                 .with_u_solver(uisai)
                  .on(exec);
          }},
         {"general-isai",
@@ -326,8 +326,7 @@ const std::map<std::string, std::function<std::unique_ptr<gko::LinOpFactory>(
         {"overhead", [](std::shared_ptr<const gko::Executor> exec) {
              return gko::Overhead<etype>::build()
                  .with_criteria(gko::stop::ResidualNorm<etype>::build()
-                                    .with_reduction_factor(rc_etype{})
-                                    .on(exec))
+                                    .with_reduction_factor(rc_etype{}))
                  .on(exec);
          }}};
 
diff --git a/core/distributed/preconditioner/schwarz.cpp b/core/distributed/preconditioner/schwarz.cpp
index 31c57947704..0d1267bc0b4 100644
--- a/core/distributed/preconditioner/schwarz.cpp
+++ b/core/distributed/preconditioner/schwarz.cpp
@@ -102,8 +102,8 @@ template <typename ValueType, typename LocalIndexType, typename GlobalIndexType>
 void Schwarz<ValueType, LocalIndexType, GlobalIndexType>::generate(
     std::shared_ptr<const LinOp> system_matrix)
 {
-    if (parameters_.local_solver_factory) {
-        this->local_solver_ = parameters_.local_solver_factory->generate(
+    if (parameters_.local_solver) {
+        this->local_solver_ = parameters_.local_solver->generate(
             as<experimental::distributed::Matrix<ValueType, LocalIndexType,
                                                  GlobalIndexType>>(
                 system_matrix)
diff --git a/core/preconditioner/isai.cpp b/core/preconditioner/isai.cpp
index 52fa9140853..4e0e2ea95d8 100644
--- a/core/preconditioner/isai.cpp
+++ b/core/preconditioner/isai.cpp
@@ -230,17 +230,15 @@ void Isai<IsaiType, ValueType, IndexType>::generate_inverse(
                 excess_solver_factory =
                     Gmres::build()
                         .with_preconditioner(
-                            Bj::build().with_max_block_size(32u).on(exec))
+                            Bj::build().with_max_block_size(32u))
                         .with_criteria(
-                            gko::stop::Iteration::build()
-                                .with_max_iters(excess_dim)
-                                .on(exec),
+                            gko::stop::Iteration::build().with_max_iters(
+                                excess_dim),
                             gko::stop::ResidualNorm<ValueType>::build()
                                 .with_baseline(gko::stop::mode::rhs_norm)
                                 .with_reduction_factor(
                                     remove_complex<ValueType>{
-                                        excess_solver_reduction})
-                                .on(exec))
+                                        excess_solver_reduction}))
                         .on(exec);
                 excess_solution->copy_from(excess_rhs);
             } else if (is_lower) {
diff --git a/core/solver/multigrid.cpp b/core/solver/multigrid.cpp
index 303106fa4f6..84afc1666cc 100644
--- a/core/solver/multigrid.cpp
+++ b/core/solver/multigrid.cpp
@@ -569,21 +569,18 @@ void Multigrid::generate()
                     using absolute_value_type = remove_complex<value_type>;
                     return solver::Gmres<value_type>::build()
                         .with_criteria(
-                            stop::Iteration::build()
-                                .with_max_iters(matrix->get_size()[0])
-                                .on(exec),
+                            stop::Iteration::build().with_max_iters(
+                                matrix->get_size()[0]),
                             stop::ResidualNorm<value_type>::build()
                                 .with_reduction_factor(
                                     std::numeric_limits<
                                         absolute_value_type>::epsilon() *
-                                    absolute_value_type{10})
-                                .on(exec))
+                                    absolute_value_type{10}))
                         .with_krylov_dim(
                             std::min(size_type(100), matrix->get_size()[0]))
                         .with_preconditioner(
                             preconditioner::Jacobi<value_type>::build()
-                                .with_max_block_size(1u)
-                                .on(exec))
+                                .with_max_block_size(1u))
                         .on(exec)
                         ->generate(matrix);
                 } else {
@@ -591,8 +588,7 @@ void Multigrid::generate()
                                                         int32>::build()
                         .with_factorization(
                             experimental::factorization::Lu<value_type,
-                                                            int32>::build()
-                                .on(exec))
+                                                            int32>::build())
                         .on(exec)
                         ->generate(matrix);
                 }
diff --git a/core/test/log/convergence.cpp b/core/test/log/convergence.cpp
index f6294d08cd4..746e8603865 100644
--- a/core/test/log/convergence.cpp
+++ b/core/test/log/convergence.cpp
@@ -68,8 +68,7 @@ class Convergence : public ::testing::Test {
         gko::initialize<AbsoluteDense>({6}, exec);
     std::unique_ptr<gko::LinOp> system =
         gko::solver::Ir<T>::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(1u).on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
             .on(exec)
             ->generate(gko::initialize<Dense>(I<I<T>>{{1, 2}, {0, 3}}, exec));
     std::unique_ptr<Dense> rhs = gko::initialize<Dense>({15, 25}, exec);
diff --git a/core/test/log/profiler_hook.cpp b/core/test/log/profiler_hook.cpp
index 281eed2d70b..cd6e1b0a3ce 100644
--- a/core/test/log/profiler_hook.cpp
+++ b/core/test/log/profiler_hook.cpp
@@ -202,8 +202,7 @@ TEST(ProfilerHook, LogsIteration)
     auto alpha = gko::share(gko::initialize<Vec>({1.0}, exec));
     auto solver =
         gko::solver::Ir<>::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(1u).on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
             .on(exec)
             ->generate(mtx);
     logger->set_object_name(solver, "solver");
diff --git a/core/test/log/record.cpp b/core/test/log/record.cpp
index 0aeca2b3df7..098f93ad523 100644
--- a/core/test/log/record.cpp
+++ b/core/test/log/record.cpp
@@ -440,8 +440,7 @@ TEST(Record, CatchesLinopFactoryGenerateStarted)
         gko::log::Logger::linop_factory_generate_started_mask);
     auto factory =
         gko::solver::Bicgstab<>::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(exec);
     auto input = factory->generate(gko::matrix::Dense<>::create(exec));
 
@@ -462,8 +461,7 @@ TEST(Record, CatchesLinopFactoryGenerateCompleted)
         gko::log::Logger::linop_factory_generate_completed_mask);
     auto factory =
         gko::solver::Bicgstab<>::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(exec);
     auto input = factory->generate(gko::matrix::Dense<>::create(exec));
     auto output = factory->generate(gko::matrix::Dense<>::create(exec));
@@ -569,8 +567,7 @@ TEST(Record, CatchesIterations)
         gko::log::Record::create(gko::log::Logger::iteration_complete_mask);
     auto factory =
         gko::solver::Bicgstab<>::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(exec);
     auto solver = factory->generate(gko::initialize<Dense>({1.1}, exec));
     auto right_hand_side = gko::initialize<Dense>({-5.5}, exec);
diff --git a/core/test/log/stream.cpp b/core/test/log/stream.cpp
index 3558a7d5564..721273ca468 100644
--- a/core/test/log/stream.cpp
+++ b/core/test/log/stream.cpp
@@ -606,8 +606,7 @@ TYPED_TEST(Stream, CatchesLinopFactoryGenerateStarted)
         gko::log::Logger::linop_factory_generate_started_mask, out);
     auto factory =
         gko::solver::Bicgstab<TypeParam>::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(exec);
     auto input = factory->generate(gko::matrix::Dense<TypeParam>::create(exec));
     std::stringstream ptrstream_factory;
@@ -633,8 +632,7 @@ TYPED_TEST(Stream, CatchesLinopFactoryGenerateCompleted)
         gko::log::Logger::linop_factory_generate_completed_mask, out);
     auto factory =
         gko::solver::Bicgstab<TypeParam>::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(exec);
     auto input = factory->generate(gko::matrix::Dense<TypeParam>::create(exec));
     auto output =
@@ -815,8 +813,7 @@ TYPED_TEST(Stream, CatchesIterationsWithVerbose)
 
     auto factory =
         gko::solver::Bicgstab<TypeParam>::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(exec);
     auto solver = factory->generate(gko::initialize<Dense>({1.1}, exec));
     auto right_hand_side = gko::initialize<Dense>({-5.5}, exec);
diff --git a/core/test/mpi/distributed/preconditioner/schwarz.cpp b/core/test/mpi/distributed/preconditioner/schwarz.cpp
index ff1cd0d45e5..e0b5749e987 100644
--- a/core/test/mpi/distributed/preconditioner/schwarz.cpp
+++ b/core/test/mpi/distributed/preconditioner/schwarz.cpp
@@ -67,7 +67,7 @@ class SchwarzFactory : public ::testing::Test {
           mtx(Mtx::create(exec, MPI_COMM_WORLD))
     {
         schwarz = Schwarz::build()
-                      .with_local_solver_factory(jacobi_factory)
+                      .with_local_solver(jacobi_factory)
                       .on(exec)
                       ->generate(mtx);
     }
@@ -83,8 +83,8 @@ class SchwarzFactory : public ::testing::Test {
                              gko::ptr_param<const Schwarz> b)
     {
         ASSERT_EQ(a->get_size(), b->get_size());
-        ASSERT_EQ(a->get_parameters().local_solver_factory,
-                  b->get_parameters().local_solver_factory);
+        ASSERT_EQ(a->get_parameters().local_solver,
+                  b->get_parameters().local_solver);
     }
 
     std::shared_ptr<const gko::Executor> exec;
@@ -105,7 +105,7 @@ TYPED_TEST(SchwarzFactory, KnowsItsExecutor)
 
 TYPED_TEST(SchwarzFactory, CanSetLocalFactory)
 {
-    ASSERT_EQ(this->schwarz->get_parameters().local_solver_factory,
+    ASSERT_EQ(this->schwarz->get_parameters().local_solver,
               this->jacobi_factory);
 }
 
@@ -125,7 +125,7 @@ TYPED_TEST(SchwarzFactory, CanBeCopied)
     using Mtx = typename TestFixture::Mtx;
     auto bj = gko::share(Jacobi::build().on(this->exec));
     auto copy = Schwarz::build()
-                    .with_local_solver_factory(bj)
+                    .with_local_solver(bj)
                     .on(this->exec)
                     ->generate(Mtx::create(this->exec, MPI_COMM_WORLD));
 
@@ -143,7 +143,7 @@ TYPED_TEST(SchwarzFactory, CanBeMoved)
     auto tmp = clone(this->schwarz);
     auto bj = gko::share(Jacobi::build().on(this->exec));
     auto copy = Schwarz::build()
-                    .with_local_solver_factory(bj)
+                    .with_local_solver(bj)
                     .on(this->exec)
                     ->generate(Mtx::create(this->exec, MPI_COMM_WORLD));
 
@@ -158,7 +158,7 @@ TYPED_TEST(SchwarzFactory, CanBeCleared)
     this->schwarz->clear();
 
     ASSERT_EQ(this->schwarz->get_size(), gko::dim<2>(0, 0));
-    ASSERT_EQ(this->schwarz->get_parameters().local_solver_factory, nullptr);
+    ASSERT_EQ(this->schwarz->get_parameters().local_solver, nullptr);
 }
 
 
diff --git a/core/test/preconditioner/ic.cpp b/core/test/preconditioner/ic.cpp
index efd54ee9ebc..9e1e3f3e3c4 100644
--- a/core/test/preconditioner/ic.cpp
+++ b/core/test/preconditioner/ic.cpp
@@ -77,9 +77,8 @@ TEST_F(IcFactory, KnowsItsExecutor)
 
 TEST_F(IcFactory, CanSetLSolverFactory)
 {
-    auto ic_factory = ic_prec_type::build()
-                          .with_l_solver_factory(this->l_factory)
-                          .on(this->exec);
+    auto ic_factory =
+        ic_prec_type::build().with_l_solver(this->l_factory).on(this->exec);
 
     ASSERT_EQ(ic_factory->get_parameters().l_solver_factory, this->l_factory);
 }
@@ -88,7 +87,7 @@ TEST_F(IcFactory, CanSetLSolverFactory)
 TEST_F(IcFactory, CanSetFactorizationFactory)
 {
     auto ic_factory = ic_prec_type::build()
-                          .with_factorization_factory(this->fact_factory)
+                          .with_factorization(this->fact_factory)
                           .on(this->exec);
 
     ASSERT_EQ(ic_factory->get_parameters().factorization_factory,
diff --git a/core/test/preconditioner/ilu.cpp b/core/test/preconditioner/ilu.cpp
index c7b72e09b09..f25a20b47e3 100644
--- a/core/test/preconditioner/ilu.cpp
+++ b/core/test/preconditioner/ilu.cpp
@@ -81,9 +81,8 @@ TEST_F(IluFactory, KnowsItsExecutor)
 
 TEST_F(IluFactory, CanSetLSolverFactory)
 {
-    auto ilu_factory = ilu_prec_type::build()
-                           .with_l_solver_factory(this->l_factory)
-                           .on(this->exec);
+    auto ilu_factory =
+        ilu_prec_type::build().with_l_solver(this->l_factory).on(this->exec);
 
     ASSERT_EQ(ilu_factory->get_parameters().l_solver_factory, this->l_factory);
 }
@@ -91,9 +90,8 @@ TEST_F(IluFactory, CanSetLSolverFactory)
 
 TEST_F(IluFactory, CanSetUSolverFactory)
 {
-    auto ilu_factory = ilu_prec_type::build()
-                           .with_u_solver_factory(this->u_factory)
-                           .on(this->exec);
+    auto ilu_factory =
+        ilu_prec_type::build().with_u_solver(this->u_factory).on(this->exec);
 
     ASSERT_EQ(ilu_factory->get_parameters().u_solver_factory, this->u_factory);
 }
@@ -102,7 +100,7 @@ TEST_F(IluFactory, CanSetUSolverFactory)
 TEST_F(IluFactory, CanSetFactorizationFactory)
 {
     auto ilu_factory = ilu_prec_type::build()
-                           .with_factorization_factory(this->fact_factory)
+                           .with_factorization(this->fact_factory)
                            .on(this->exec);
 
     ASSERT_EQ(ilu_factory->get_parameters().factorization_factory,
diff --git a/core/test/solver/bicg.cpp b/core/test/solver/bicg.cpp
index 4c7421f63e1..37ed110bdf4 100644
--- a/core/test/solver/bicg.cpp
+++ b/core/test/solver/bicg.cpp
@@ -66,10 +66,9 @@ class Bicg : public ::testing::Test {
           bicg_factory(
               Solver::build()
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(3u).on(exec),
+                      gko::stop::Iteration::build().with_max_iters(3u),
                       gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(gko::remove_complex<T>{1e-6})
-                          .on(exec))
+                          .with_reduction_factor(gko::remove_complex<T>{1e-6}))
                   .on(exec)),
           solver(bicg_factory->generate(mtx))
     {}
diff --git a/core/test/solver/bicgstab.cpp b/core/test/solver/bicgstab.cpp
index c42cd7db2af..937064da7c4 100644
--- a/core/test/solver/bicgstab.cpp
+++ b/core/test/solver/bicgstab.cpp
@@ -64,10 +64,9 @@ class Bicgstab : public ::testing::Test {
           bicgstab_factory(
               Solver::build()
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(3u).on(exec),
+                      gko::stop::Iteration::build().with_max_iters(3u),
                       gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(gko::remove_complex<T>{1e-6})
-                          .on(exec))
+                          .with_reduction_factor(gko::remove_complex<T>{1e-6}))
                   .on(exec)),
           solver(bicgstab_factory->generate(mtx))
     {}
diff --git a/core/test/solver/cb_gmres.cpp b/core/test/solver/cb_gmres.cpp
index b81d84f8b1e..17dcf0c385f 100644
--- a/core/test/solver/cb_gmres.cpp
+++ b/core/test/solver/cb_gmres.cpp
@@ -72,23 +72,20 @@ class CbGmres : public ::testing::Test {
               Solver::build()
                   .with_storage_precision(storage_precision)
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(3u).on(exec),
+                      gko::stop::Iteration::build().with_max_iters(3u),
                       gko::stop::ResidualNorm<value_type>::build()
                           .with_baseline(gko::stop::mode::initial_resnorm)
-                          .with_reduction_factor(nc_value_type{1e-6})
-                          .on(exec))
+                          .with_reduction_factor(nc_value_type{1e-6}))
                   .on(exec)),
           solver(cb_gmres_factory->generate(mtx)),
           cb_gmres_big_factory(
               Solver::build()
                   .with_storage_precision(storage_precision)
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(128u).on(
-                          exec),
+                      gko::stop::Iteration::build().with_max_iters(128u),
                       gko::stop::ResidualNorm<value_type>::build()
                           .with_baseline(gko::stop::mode::initial_resnorm)
-                          .with_reduction_factor(nc_value_type{1e-6})
-                          .on(exec))
+                          .with_reduction_factor(nc_value_type{1e-6}))
                   .on(exec)),
           big_solver(cb_gmres_big_factory->generate(mtx))
     {}
diff --git a/core/test/solver/cg.cpp b/core/test/solver/cg.cpp
index 5daf43bc160..d0381a6e5ab 100644
--- a/core/test/solver/cg.cpp
+++ b/core/test/solver/cg.cpp
@@ -66,10 +66,9 @@ class Cg : public ::testing::Test {
           cg_factory(
               Solver::build()
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(3u).on(exec),
+                      gko::stop::Iteration::build().with_max_iters(3u),
                       gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(gko::remove_complex<T>{1e-6})
-                          .on(exec))
+                          .with_reduction_factor(gko::remove_complex<T>{1e-6}))
                   .on(exec)),
           solver(cg_factory->generate(mtx))
     {}
diff --git a/core/test/solver/cgs.cpp b/core/test/solver/cgs.cpp
index c23dc7b2e3b..7509c22d76e 100644
--- a/core/test/solver/cgs.cpp
+++ b/core/test/solver/cgs.cpp
@@ -66,10 +66,9 @@ class Cgs : public ::testing::Test {
           cgs_factory(
               Solver::build()
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(3u).on(exec),
+                      gko::stop::Iteration::build().with_max_iters(3u),
                       gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(gko::remove_complex<T>{1e-6})
-                          .on(exec))
+                          .with_reduction_factor(gko::remove_complex<T>{1e-6}))
                   .on(exec)),
           solver(cgs_factory->generate(mtx))
     {}
diff --git a/core/test/solver/fcg.cpp b/core/test/solver/fcg.cpp
index 59bb5e0cdee..21cc686bd01 100644
--- a/core/test/solver/fcg.cpp
+++ b/core/test/solver/fcg.cpp
@@ -63,10 +63,9 @@ class Fcg : public ::testing::Test {
           fcg_factory(
               Solver::build()
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(3u).on(exec),
+                      gko::stop::Iteration::build().with_max_iters(3u),
                       gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(gko::remove_complex<T>{1e-6})
-                          .on(exec))
+                          .with_reduction_factor(gko::remove_complex<T>{1e-6}))
                   .on(exec)),
           solver(fcg_factory->generate(mtx))
     {}
diff --git a/core/test/solver/gcr.cpp b/core/test/solver/gcr.cpp
index f7ba80ebba1..fec313582ed 100644
--- a/core/test/solver/gcr.cpp
+++ b/core/test/solver/gcr.cpp
@@ -67,23 +67,19 @@ class Gcr : public ::testing::Test {
         : exec(gko::ReferenceExecutor::create()),
           mtx(gko::initialize<Mtx>(
               {{1.0, 2.0, 3.0}, {3.0, 2.0, -1.0}, {0.0, -1.0, 2}}, exec)),
-          gcr_factory(
-              Solver::build()
-                  .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(3u).on(exec),
-                      gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(reduction_factor)
-                          .on(exec))
-                  .on(exec)),
+          gcr_factory(Solver::build()
+                          .with_criteria(
+                              gko::stop::Iteration::build().with_max_iters(3u),
+                              gko::stop::ResidualNorm<value_type>::build()
+                                  .with_reduction_factor(reduction_factor))
+                          .on(exec)),
           solver(gcr_factory->generate(mtx)),
           gcr_big_factory(
               Big_solver::build()
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(128u).on(
-                          exec),
+                      gko::stop::Iteration::build().with_max_iters(128u),
                       gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(reduction_factor)
-                          .on(exec))
+                          .with_reduction_factor(reduction_factor))
                   .on(exec)),
           big_solver(gcr_big_factory->generate(mtx))
     {}
diff --git a/core/test/solver/gmres.cpp b/core/test/solver/gmres.cpp
index 11cafe2c86f..8ce8135f8b2 100644
--- a/core/test/solver/gmres.cpp
+++ b/core/test/solver/gmres.cpp
@@ -70,20 +70,17 @@ class Gmres : public ::testing::Test {
           gmres_factory(
               Solver::build()
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(3u).on(exec),
+                      gko::stop::Iteration::build().with_max_iters(3u),
                       gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(reduction_factor)
-                          .on(exec))
+                          .with_reduction_factor(reduction_factor))
                   .on(exec)),
           solver(gmres_factory->generate(mtx)),
           gmres_big_factory(
               Big_solver::build()
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(128u).on(
-                          exec),
+                      gko::stop::Iteration::build().with_max_iters(128u),
                       gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(reduction_factor)
-                          .on(exec))
+                          .with_reduction_factor(reduction_factor))
                   .on(exec)),
           big_solver(gmres_big_factory->generate(mtx))
     {}
diff --git a/core/test/solver/idr.cpp b/core/test/solver/idr.cpp
index 45511be8e1b..e2657be8581 100644
--- a/core/test/solver/idr.cpp
+++ b/core/test/solver/idr.cpp
@@ -64,10 +64,9 @@ class Idr : public ::testing::Test {
           idr_factory(
               Solver::build()
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(3u).on(exec),
+                      gko::stop::Iteration::build().with_max_iters(3u),
                       gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(gko::remove_complex<T>{1e-6})
-                          .on(exec))
+                          .with_reduction_factor(gko::remove_complex<T>{1e-6}))
                   .on(exec)),
           solver(idr_factory->generate(mtx))
     {}
diff --git a/core/test/solver/ir.cpp b/core/test/solver/ir.cpp
index 5fdcd55af14..7419f99bfd0 100644
--- a/core/test/solver/ir.cpp
+++ b/core/test/solver/ir.cpp
@@ -64,14 +64,12 @@ class Ir : public ::testing::Test {
         : exec(gko::ReferenceExecutor::create()),
           mtx(gko::initialize<Mtx>(
               {{2, -1.0, 0.0}, {-1.0, 2, -1.0}, {0.0, -1.0, 2}}, exec)),
-          ir_factory(
-              Solver::build()
-                  .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(3u).on(exec),
-                      gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(r<value_type>::value)
-                          .on(exec))
-                  .on(exec)),
+          ir_factory(Solver::build()
+                         .with_criteria(
+                             gko::stop::Iteration::build().with_max_iters(3u),
+                             gko::stop::ResidualNorm<value_type>::build()
+                                 .with_reduction_factor(r<value_type>::value))
+                         .on(exec)),
           solver(ir_factory->generate(mtx))
     {}
 
diff --git a/core/test/solver/multigrid.cpp b/core/test/solver/multigrid.cpp
index 856f9651ebe..8fea85a40bb 100644
--- a/core/test/solver/multigrid.cpp
+++ b/core/test/solver/multigrid.cpp
@@ -153,11 +153,10 @@ class Multigrid : public ::testing::Test {
         multigrid_factory =
             Solver::build()
                 .with_criteria(
-                    gko::stop::Iteration::build().with_max_iters(3u).on(exec),
+                    gko::stop::Iteration::build().with_max_iters(3u),
                     gko::stop::ResidualNorm<value_type>::build()
                         .with_baseline(gko::stop::mode::initial_resnorm)
-                        .with_reduction_factor(gko::remove_complex<T>{1e-6})
-                        .on(exec))
+                        .with_reduction_factor(gko::remove_complex<T>{1e-6}))
                 .with_max_levels(2u)
                 .with_coarsest_solver(lo_factory)
                 .with_pre_smoother(lo_factory)
@@ -427,28 +426,25 @@ TYPED_TEST(Multigrid, ThrowWhenNullMgLevel)
 TYPED_TEST(Multigrid, ThrowWhenMgLevelContainsNullptr)
 {
     using Solver = typename TestFixture::Solver;
-    auto factory = Solver::build()
-                       .with_max_levels(1u)
-                       .with_min_coarse_rows(2u)
-                       .with_criteria(this->criterion)
-                       .with_mg_level(this->rp_factory, nullptr)
-                       .on(this->exec);
+    auto factory_parameters = Solver::build()
+                                  .with_max_levels(1u)
+                                  .with_min_coarse_rows(2u)
+                                  .with_criteria(this->criterion)
+                                  .with_mg_level(this->rp_factory, nullptr);
 
-    ASSERT_THROW(factory->generate(this->mtx), gko::NotSupported);
+    ASSERT_THROW(factory_parameters.on(this->exec), gko::NotSupported);
 }
 
 
 TYPED_TEST(Multigrid, ThrowWhenEmptyMgLevelList)
 {
     using Solver = typename TestFixture::Solver;
-    auto factory =
-        Solver::build()
-            .with_max_levels(1u)
-            .with_min_coarse_rows(2u)
-            .with_mg_level(
-                std::vector<std::shared_ptr<const gko::LinOpFactory>>{})
-            .with_criteria(this->criterion)
-            .on(this->exec);
+    auto factory = Solver::build()
+                       .with_max_levels(1u)
+                       .with_min_coarse_rows(2u)
+                       .with_mg_level()
+                       .with_criteria(this->criterion)
+                       .on(this->exec);
 
     ASSERT_THROW(factory->generate(this->mtx), gko::NotSupported);
 }
diff --git a/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp b/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp
index 79b197aacc8..b673024c6fe 100644
--- a/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp
+++ b/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp
@@ -110,18 +110,14 @@ int main(int argc, char* argv[])
     const RealValueType reduction_factor = 1e-7;
     auto solver_gen =
         cg::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(10000u).on(exec),
-                gko::stop::ResidualNorm<ValueType>::build()
-                    .with_reduction_factor(reduction_factor)
-                    .on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(10000u),
+                           gko::stop::ResidualNorm<ValueType>::build()
+                               .with_reduction_factor(reduction_factor))
             // Add preconditioner, these 2 lines are the only
             // difference from the simple solver example
-            .with_preconditioner(bj::build()
-                                     .with_max_block_size(16u)
-                                     .with_storage_optimization(
-                                         gko::precision_reduction::autodetect())
-                                     .on(exec))
+            .with_preconditioner(
+                bj::build().with_max_block_size(16u).with_storage_optimization(
+                    gko::precision_reduction::autodetect()))
             .on(exec);
     // Create solver
     std::shared_ptr<const gko::log::Convergence<ValueType>> logger =
diff --git a/examples/cb-gmres/cb-gmres.cpp b/examples/cb-gmres/cb-gmres.cpp
index b096e48c71a..915035fd642 100644
--- a/examples/cb-gmres/cb-gmres.cpp
+++ b/examples/cb-gmres/cb-gmres.cpp
@@ -154,12 +154,10 @@ int main(int argc, char* argv[])
     // storage type
     auto solver_gen_keep =
         cb_gmres::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(1000u).on(exec),
-                gko::stop::ResidualNorm<ValueType>::build()
-                    .with_baseline(gko::stop::mode::rhs_norm)
-                    .with_reduction_factor(reduction_factor)
-                    .on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(1000u),
+                           gko::stop::ResidualNorm<ValueType>::build()
+                               .with_baseline(gko::stop::mode::rhs_norm)
+                               .with_reduction_factor(reduction_factor))
             .with_krylov_dim(100u)
             .with_storage_precision(
                 gko::solver::cb_gmres::storage_precision::keep)
@@ -167,12 +165,10 @@ int main(int argc, char* argv[])
 
     auto solver_gen_reduce =
         cb_gmres::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(1000u).on(exec),
-                gko::stop::ResidualNorm<ValueType>::build()
-                    .with_baseline(gko::stop::mode::rhs_norm)
-                    .with_reduction_factor(reduction_factor)
-                    .on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(1000u),
+                           gko::stop::ResidualNorm<ValueType>::build()
+                               .with_baseline(gko::stop::mode::rhs_norm)
+                               .with_reduction_factor(reduction_factor))
             .with_krylov_dim(100u)
             .with_storage_precision(
                 gko::solver::cb_gmres::storage_precision::reduce1)
diff --git a/examples/custom-logger/custom-logger.cpp b/examples/custom-logger/custom-logger.cpp
index 7e6cf531edd..e44303b81a2 100644
--- a/examples/custom-logger/custom-logger.cpp
+++ b/examples/custom-logger/custom-logger.cpp
@@ -290,11 +290,9 @@ int main(int argc, char* argv[])
     // object needs to be built on.
     auto solver_gen =
         cg::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(20u).on(exec),
-                gko::stop::ResidualNorm<ValueType>::build()
-                    .with_reduction_factor(reduction_factor)
-                    .on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(20u),
+                           gko::stop::ResidualNorm<ValueType>::build()
+                               .with_reduction_factor(reduction_factor))
             .on(exec);
 
     // Instantiate a ResidualLogger logger.
diff --git a/examples/custom-matrix-format/custom-matrix-format.cpp b/examples/custom-matrix-format/custom-matrix-format.cpp
index 4610413fe9c..bcaa126cdaa 100644
--- a/examples/custom-matrix-format/custom-matrix-format.cpp
+++ b/examples/custom-matrix-format/custom-matrix-format.cpp
@@ -291,12 +291,10 @@ int main(int argc, char* argv[])
     const RealValueType reduction_factor{1e-7};
     // Generate solver and solve the system
     cg::build()
-        .with_criteria(gko::stop::Iteration::build()
-                           .with_max_iters(discretization_points)
-                           .on(exec),
-                       gko::stop::ResidualNorm<ValueType>::build()
-                           .with_reduction_factor(reduction_factor)
-                           .on(exec))
+        .with_criteria(
+            gko::stop::Iteration::build().with_max_iters(discretization_points),
+            gko::stop::ResidualNorm<ValueType>::build().with_reduction_factor(
+                reduction_factor))
         .on(exec)
         // notice how our custom StencilMatrix can be used in the same way as
         // any built-in type
diff --git a/examples/custom-stopping-criterion/custom-stopping-criterion.cpp b/examples/custom-stopping-criterion/custom-stopping-criterion.cpp
index 800846cfbd9..e4c7d88785c 100644
--- a/examples/custom-stopping-criterion/custom-stopping-criterion.cpp
+++ b/examples/custom-stopping-criterion/custom-stopping-criterion.cpp
@@ -109,13 +109,12 @@ void run_solver(volatile bool* stop_iteration_process,
     auto x = gko::read<vec>(std::ifstream("data/x0.mtx"), exec);
 
     // Create solver factory and solve system
-    auto solver = bicg::build()
-                      .with_criteria(ByInteraction::build()
-                                         .with_stop_iteration_process(
-                                             stop_iteration_process)
-                                         .on(exec))
-                      .on(exec)
-                      ->generate(A);
+    auto solver =
+        bicg::build()
+            .with_criteria(ByInteraction::build().with_stop_iteration_process(
+                stop_iteration_process))
+            .on(exec)
+            ->generate(A);
     solver->add_logger(gko::log::Stream<ValueType>::create(
         gko::log::Logger::iteration_complete_mask, std::cout, true));
     solver->apply(b, x);
diff --git a/examples/distributed-solver/distributed-solver.cpp b/examples/distributed-solver/distributed-solver.cpp
index 123f93775f5..1b758d186a4 100644
--- a/examples/distributed-solver/distributed-solver.cpp
+++ b/examples/distributed-solver/distributed-solver.cpp
@@ -222,19 +222,15 @@ int main(int argc, char* argv[])
     const gko::remove_complex<ValueType> reduction_factor{1e-8};
     std::shared_ptr<const gko::log::Convergence<ValueType>> logger =
         gko::log::Convergence<ValueType>::create();
-    auto Ainv =
-        solver::build()
-            .with_preconditioner(schwarz::build()
-                                     .with_local_solver_factory(local_solver)
-                                     .on(exec))
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(num_iters).on(
-                    exec),
-                gko::stop::ResidualNorm<ValueType>::build()
-                    .with_reduction_factor(reduction_factor)
-                    .on(exec))
-            .on(exec)
-            ->generate(A);
+    auto Ainv = solver::build()
+                    .with_preconditioner(
+                        schwarz::build().with_local_solver(local_solver))
+                    .with_criteria(
+                        gko::stop::Iteration::build().with_max_iters(num_iters),
+                        gko::stop::ResidualNorm<ValueType>::build()
+                            .with_reduction_factor(reduction_factor))
+                    .on(exec)
+                    ->generate(A);
     // Add logger to the generated solver to log the iteration count and
     // residual norm
     Ainv->add_logger(logger);
diff --git a/examples/external-lib-interfacing/external-lib-interfacing.cpp b/examples/external-lib-interfacing/external-lib-interfacing.cpp
index 1766af3001f..04824cb9578 100644
--- a/examples/external-lib-interfacing/external-lib-interfacing.cpp
+++ b/examples/external-lib-interfacing/external-lib-interfacing.cpp
@@ -880,11 +880,9 @@ void AdvectionProblem<dim>::solve()
     auto solver_gen =
         bicgstab::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(1000).on(exec),
-                gko::stop::ResidualNorm<>::build()
-                    .with_reduction_factor(1e-12)
-                    .on(exec))
-            .with_preconditioner(bj::build().on(exec))
+                gko::stop::Iteration::build().with_max_iters(1000),
+                gko::stop::ResidualNorm<>::build().with_reduction_factor(1e-12))
+            .with_preconditioner(bj::build())
             .on(exec);
     auto solver = solver_gen->generate(gko::give(A));
 
diff --git a/examples/ginkgo-overhead/ginkgo-overhead.cpp b/examples/ginkgo-overhead/ginkgo-overhead.cpp
index 5bd90ba0bad..f3f308c495f 100644
--- a/examples/ginkgo-overhead/ginkgo-overhead.cpp
+++ b/examples/ginkgo-overhead/ginkgo-overhead.cpp
@@ -72,8 +72,7 @@ int main(int argc, char* argv[])
     auto cg_factory =
         cg::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(num_iters).on(
-                    exec))
+                gko::stop::Iteration::build().with_max_iters(num_iters))
             .on(exec);
     auto A = gko::initialize<mtx>({1.0}, exec);
     auto b = gko::initialize<vec>({std::nan("")}, exec);
diff --git a/examples/heat-equation/heat-equation.cpp b/examples/heat-equation/heat-equation.cpp
index eae87f7e64f..8e69931b250 100644
--- a/examples/heat-equation/heat-equation.cpp
+++ b/examples/heat-equation/heat-equation.cpp
@@ -192,11 +192,10 @@ int main(int argc, char* argv[])
     // stopping at 1e-10 relative accuracy
     auto solver =
         gko::solver::Cg<>::build()
-            .with_preconditioner(gko::preconditioner::Ic<>::build().on(exec))
+            .with_preconditioner(gko::preconditioner::Ic<>::build())
             .with_criteria(gko::stop::ResidualNorm<>::build()
                                .with_baseline(gko::stop::mode::rhs_norm)
-                               .with_reduction_factor(1e-10)
-                               .on(exec))
+                               .with_reduction_factor(1e-10))
             .on(exec)
             ->generate(stencil_matrix);
     // time stamp of the last output frame (initialized to a sentinel value)
diff --git a/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp b/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp
index 33946b7de44..acebd9d96ff 100644
--- a/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp
+++ b/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp
@@ -114,11 +114,9 @@ int main(int argc, char* argv[])
     const RealValueType reduction_factor{1e-7};
     auto ilu_gmres_factory =
         gmres::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(1000u).on(exec),
-                gko::stop::ResidualNorm<ValueType>::build()
-                    .with_reduction_factor(reduction_factor)
-                    .on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(1000u),
+                           gko::stop::ResidualNorm<ValueType>::build()
+                               .with_reduction_factor(reduction_factor))
             .with_generated_preconditioner(ilu_preconditioner)
             .on(exec);
 
diff --git a/examples/inverse-iteration/inverse-iteration.cpp b/examples/inverse-iteration/inverse-iteration.cpp
index 460370b7e00..2b584e0ca4f 100644
--- a/examples/inverse-iteration/inverse-iteration.cpp
+++ b/examples/inverse-iteration/inverse-iteration.cpp
@@ -118,12 +118,10 @@ int main(int argc, char* argv[])
     // Generate solver operator  (A - zI)^-1
     auto solver =
         solver_type::build()
-            .with_criteria(gko::stop::Iteration::build()
-                               .with_max_iters(system_max_iterations)
-                               .on(exec),
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(
+                               system_max_iterations),
                            gko::stop::ResidualNorm<precision>::build()
-                               .with_reduction_factor(system_residual_goal)
-                               .on(exec))
+                               .with_reduction_factor(system_residual_goal))
             .on(exec)
             ->generate(system_matrix);
 
diff --git a/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp b/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp
index 407a083e548..be7e8261f2c 100644
--- a/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp
+++ b/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp
@@ -119,18 +119,16 @@ int main(int argc, char* argv[])
     auto trisolve_factory =
         ir::build()
             .with_solver(bj_factory)
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(sweeps).on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(sweeps))
             .on(exec);
 
     // Generate an ILU preconditioner factory by setting lower and upper
     // triangular solver - in this case the previously defined iterative
     // refinement method.
-    auto ilu_pre_factory =
-        gko::preconditioner::Ilu<ir, ir>::build()
-            .with_l_solver_factory(gko::clone(trisolve_factory))
-            .with_u_solver_factory(gko::clone(trisolve_factory))
-            .on(exec);
+    auto ilu_pre_factory = gko::preconditioner::Ilu<ir, ir>::build()
+                               .with_l_solver(gko::clone(trisolve_factory))
+                               .with_u_solver(gko::clone(trisolve_factory))
+                               .on(exec);
 
     // Use incomplete factors to generate ILU preconditioner
     auto ilu_preconditioner = gko::share(ilu_pre_factory->generate(par_ilu));
diff --git a/examples/iterative-refinement/iterative-refinement.cpp b/examples/iterative-refinement/iterative-refinement.cpp
index 14384eaab52..711d43049a1 100644
--- a/examples/iterative-refinement/iterative-refinement.cpp
+++ b/examples/iterative-refinement/iterative-refinement.cpp
@@ -113,19 +113,13 @@ int main(int argc, char* argv[])
     RealValueType inner_reduction_factor{1e-2};
     auto solver_gen =
         ir::build()
-            .with_solver(
-                cg::build()
-                    .with_criteria(
-                        gko::stop::ResidualNorm<ValueType>::build()
-                            .with_reduction_factor(inner_reduction_factor)
-                            .on(exec))
-                    .on(exec))
+            .with_solver(cg::build().with_criteria(
+                gko::stop::ResidualNorm<ValueType>::build()
+                    .with_reduction_factor(inner_reduction_factor)))
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(max_iters).on(
-                    exec),
+                gko::stop::Iteration::build().with_max_iters(max_iters),
                 gko::stop::ResidualNorm<ValueType>::build()
-                    .with_reduction_factor(outer_reduction_factor)
-                    .on(exec))
+                    .with_reduction_factor(outer_reduction_factor))
             .on(exec);
     // Create solver
     auto solver = solver_gen->generate(A);
diff --git a/examples/kokkos_assembly/kokkos_assembly.cpp b/examples/kokkos_assembly/kokkos_assembly.cpp
index ba579199ee3..88ff261b759 100644
--- a/examples/kokkos_assembly/kokkos_assembly.cpp
+++ b/examples/kokkos_assembly/kokkos_assembly.cpp
@@ -208,13 +208,11 @@ int main(int argc, char* argv[])
     const RealValueType reduction_factor{1e-7};
     // Generate solver and solve the system
     cg::build()
-        .with_criteria(gko::stop::Iteration::build()
-                           .with_max_iters(discretization_points)
-                           .on(exec),
-                       gko::stop::ResidualNorm<ValueType>::build()
-                           .with_reduction_factor(reduction_factor)
-                           .on(exec))
-        .with_preconditioner(bj::build().on(exec))
+        .with_criteria(
+            gko::stop::Iteration::build().with_max_iters(discretization_points),
+            gko::stop::ResidualNorm<ValueType>::build().with_reduction_factor(
+                reduction_factor))
+        .with_preconditioner(bj::build())
         .on(exec)
         ->generate(A)
         ->apply(rhs, u);
diff --git a/examples/minimal-cuda-solver/minimal-cuda-solver.cpp b/examples/minimal-cuda-solver/minimal-cuda-solver.cpp
index 5a7a8c086af..ccbdaadfc41 100644
--- a/examples/minimal-cuda-solver/minimal-cuda-solver.cpp
+++ b/examples/minimal-cuda-solver/minimal-cuda-solver.cpp
@@ -44,12 +44,10 @@ int main()
     // Create the solver
     auto solver =
         gko::solver::Cg<>::build()
-            .with_preconditioner(gko::preconditioner::Jacobi<>::build().on(gpu))
+            .with_preconditioner(gko::preconditioner::Jacobi<>::build())
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(20u).on(gpu),
-                gko::stop::ResidualNorm<>::build()
-                    .with_reduction_factor(1e-15)
-                    .on(gpu))
+                gko::stop::Iteration::build().with_max_iters(20u),
+                gko::stop::ResidualNorm<>::build().with_reduction_factor(1e-15))
             .on(gpu);
     // Solve system
     solver->generate(give(A))->apply(b, x);
diff --git a/examples/mixed-multigrid-preconditioned-solver/mixed-multigrid-preconditioned-solver.cpp b/examples/mixed-multigrid-preconditioned-solver/mixed-multigrid-preconditioned-solver.cpp
index cef918983e9..3834fa7f33f 100644
--- a/examples/mixed-multigrid-preconditioned-solver/mixed-multigrid-preconditioned-solver.cpp
+++ b/examples/mixed-multigrid-preconditioned-solver/mixed-multigrid-preconditioned-solver.cpp
@@ -139,15 +139,13 @@ int main(int argc, char* argv[])
         ir::build()
             .with_solver(inner_solver_gen)
             .with_relaxation_factor(static_cast<ValueType>(0.9))
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(1u).on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
             .on(exec));
     auto smoother_gen_f = gko::share(
         ir_f::build()
             .with_solver(inner_solver_gen_f)
             .with_relaxation_factor(static_cast<MixedType>(0.9))
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(1u).on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
             .on(exec));
     // Create MultigridLevel factory
     auto mg_level_gen =
@@ -159,15 +157,13 @@ int main(int argc, char* argv[])
         ir::build()
             .with_solver(inner_solver_gen)
             .with_relaxation_factor(static_cast<ValueType>(0.9))
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(4u).on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(4u))
             .on(exec));
     auto coarsest_gen_f = gko::share(
         ir_f::build()
             .with_solver(inner_solver_gen_f)
             .with_relaxation_factor(static_cast<MixedType>(0.9))
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(4u).on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(4u))
             .on(exec));
     // Create multigrid factory
     std::shared_ptr<gko::LinOpFactory> multigrid_gen;
@@ -192,8 +188,7 @@ int main(int argc, char* argv[])
                 .with_coarsest_solver(coarsest_gen_f)
                 .with_default_initial_guess(
                     gko::solver::initial_guess_mode::zero)
-                .with_criteria(
-                    gko::stop::Iteration::build().with_max_iters(1u).on(exec))
+                .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
                 .on(exec);
     } else {
         multigrid_gen =
@@ -206,8 +201,7 @@ int main(int argc, char* argv[])
                 .with_coarsest_solver(coarsest_gen)
                 .with_default_initial_guess(
                     gko::solver::initial_guess_mode::zero)
-                .with_criteria(
-                    gko::stop::Iteration::build().with_max_iters(1u).on(exec))
+                .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
                 .on(exec);
     }
     // Create solver factory
diff --git a/examples/mixed-multigrid-solver/mixed-multigrid-solver.cpp b/examples/mixed-multigrid-solver/mixed-multigrid-solver.cpp
index 4241a74cdf2..33684198c83 100644
--- a/examples/mixed-multigrid-solver/mixed-multigrid-solver.cpp
+++ b/examples/mixed-multigrid-solver/mixed-multigrid-solver.cpp
@@ -125,17 +125,15 @@ int main(int argc, char* argv[])
     // Create smoother factory (ir with bj)
     auto smoother_gen = gko::share(
         ir::build()
-            .with_solver(bj::build().with_max_block_size(1u).on(exec))
+            .with_solver(bj::build().with_max_block_size(1u))
             .with_relaxation_factor(static_cast<ValueType>(0.9))
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(1u).on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
             .on(exec));
     auto smoother_gen2 = gko::share(
         ir2::build()
-            .with_solver(bj2::build().with_max_block_size(1u).on(exec))
+            .with_solver(bj2::build().with_max_block_size(1u))
             .with_relaxation_factor(static_cast<MixedType>(0.9))
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(1u).on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
             .on(exec));
     // Create RestrictProlong factory
     auto mg_level_gen =
@@ -145,17 +143,15 @@ int main(int argc, char* argv[])
     // Create CoarsesSolver factory
     auto coarsest_solver_gen = gko::share(
         ir::build()
-            .with_solver(bj::build().with_max_block_size(1u).on(exec))
+            .with_solver(bj::build().with_max_block_size(1u))
             .with_relaxation_factor(static_cast<ValueType>(0.9))
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(4u).on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(4u))
             .on(exec));
     auto coarsest_solver_gen2 = gko::share(
         ir2::build()
-            .with_solver(bj2::build().with_max_block_size(1u).on(exec))
+            .with_solver(bj2::build().with_max_block_size(1u))
             .with_relaxation_factor(static_cast<MixedType>(0.9))
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(4u).on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(4u))
             .on(exec));
     // Create multigrid factory
     std::shared_ptr<gko::LinOpFactory> multigrid_gen;
diff --git a/examples/mixed-precision-ir/mixed-precision-ir.cpp b/examples/mixed-precision-ir/mixed-precision-ir.cpp
index 0882d755cdc..0083ca15162 100644
--- a/examples/mixed-precision-ir/mixed-precision-ir.cpp
+++ b/examples/mixed-precision-ir/mixed-precision-ir.cpp
@@ -124,12 +124,10 @@ int main(int argc, char* argv[])
     // Create inner solver
     auto inner_solver =
         cg::build()
-            .with_criteria(gko::stop::ResidualNorm<SolverType>::build()
-                               .with_reduction_factor(inner_reduction_factor)
-                               .on(exec),
-                           gko::stop::Iteration::build()
-                               .with_max_iters(max_inner_iters)
-                               .on(exec))
+            .with_criteria(
+                gko::stop::ResidualNorm<SolverType>::build()
+                    .with_reduction_factor(inner_reduction_factor),
+                gko::stop::Iteration::build().with_max_iters(max_inner_iters))
             .on(exec)
             ->generate(give(solver_A));
 
diff --git a/examples/multigrid-preconditioned-solver-customized/multigrid-preconditioned-solver-customized.cpp b/examples/multigrid-preconditioned-solver-customized/multigrid-preconditioned-solver-customized.cpp
index f82a603d662..d63dedf486b 100644
--- a/examples/multigrid-preconditioned-solver-customized/multigrid-preconditioned-solver-customized.cpp
+++ b/examples/multigrid-preconditioned-solver-customized/multigrid-preconditioned-solver-customized.cpp
@@ -130,8 +130,7 @@ int main(int argc, char* argv[])
     // iterative refinement with two iterations and an Ic solver.
     auto ic_gen = gko::share(
         ic::build()
-            .with_factorization_factory(
-                gko::factorization::Ic<ValueType, int>::build().on(exec))
+            .with_factorization(gko::factorization::Ic<ValueType, int>::build())
             .on(exec));
     auto smoother_gen = gko::share(
         gko::solver::build_smoother(ic_gen, 2u, static_cast<ValueType>(0.9)));
@@ -159,8 +158,7 @@ int main(int argc, char* argv[])
             .with_mg_level(mg_level_gen)
             .with_coarsest_solver(coarsest_gen)
             .with_default_initial_guess(gko::solver::initial_guess_mode::zero)
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(1u).on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
             .on(exec);
     // Create solver factory
     auto solver_gen = cg::build()
diff --git a/examples/multigrid-preconditioned-solver/multigrid-preconditioned-solver.cpp b/examples/multigrid-preconditioned-solver/multigrid-preconditioned-solver.cpp
index b31b7906902..0bb51e6fee9 100644
--- a/examples/multigrid-preconditioned-solver/multigrid-preconditioned-solver.cpp
+++ b/examples/multigrid-preconditioned-solver/multigrid-preconditioned-solver.cpp
@@ -108,19 +108,16 @@ int main(int argc, char* argv[])
     std::shared_ptr<gko::LinOpFactory> multigrid_gen;
     multigrid_gen =
         mg::build()
-            .with_mg_level(pgm::build().with_deterministic(true).on(exec))
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(1u).on(exec))
+            .with_mg_level(pgm::build().with_deterministic(true))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
             .on(exec);
     const gko::remove_complex<ValueType> tolerance = 1e-8;
     auto solver_gen =
         cg::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(100u).on(exec),
-                gko::stop::ResidualNorm<ValueType>::build()
-                    .with_baseline(gko::stop::mode::absolute)
-                    .with_reduction_factor(tolerance)
-                    .on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(100u),
+                           gko::stop::ResidualNorm<ValueType>::build()
+                               .with_baseline(gko::stop::mode::absolute)
+                               .with_reduction_factor(tolerance))
             .with_preconditioner(multigrid_gen)
             .on(exec);
     // Create solver
diff --git a/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp b/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp
index 05ee0503a5f..be3cc958baf 100644
--- a/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp
+++ b/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp
@@ -282,12 +282,10 @@ void solve_system(const std::string& executor_string,
     // Generate solver
     auto solver_gen =
         cg::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(dp_2).on(exec),
-                gko::stop::ResidualNorm<ValueType>::build()
-                    .with_reduction_factor(reduction_factor)
-                    .on(exec))
-            .with_preconditioner(bj::build().on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(dp_2),
+                           gko::stop::ResidualNorm<ValueType>::build()
+                               .with_reduction_factor(reduction_factor))
+            .with_preconditioner(bj::build())
             .on(exec);
     auto solver = solver_gen->generate(gko::give(matrix));
 
diff --git a/examples/papi-logging/papi-logging.cpp b/examples/papi-logging/papi-logging.cpp
index 1ae2ae9ec08..0b26e56dd80 100644
--- a/examples/papi-logging/papi-logging.cpp
+++ b/examples/papi-logging/papi-logging.cpp
@@ -177,11 +177,9 @@ int main(int argc, char* argv[])
     const RealValueType reduction_factor{1e-7};
     auto solver_gen =
         cg::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(20u).on(exec),
-                gko::stop::ResidualNorm<ValueType>::build()
-                    .with_reduction_factor(reduction_factor)
-                    .on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(20u),
+                           gko::stop::ResidualNorm<ValueType>::build()
+                               .with_reduction_factor(reduction_factor))
             .on(exec);
     auto solver = solver_gen->generate(A);
 
diff --git a/examples/performance-debugging/performance-debugging.cpp b/examples/performance-debugging/performance-debugging.cpp
index 5f036728924..cb06ac6cc86 100644
--- a/examples/performance-debugging/performance-debugging.cpp
+++ b/examples/performance-debugging/performance-debugging.cpp
@@ -416,8 +416,7 @@ int main(int argc, char* argv[])
         solver::build()
             .with_criteria(
                 gko::stop::ResidualNorm<ValueType>::build()
-                    .with_reduction_factor(reduction_factor)
-                    .on(exec),
+                    .with_reduction_factor(reduction_factor),
                 gko::stop::Iteration::build().with_max_iters(max_iters).on(
                     exec))
             .with_preconditioner(preconditioner::create(exec))
diff --git a/examples/poisson-solver/poisson-solver.cpp b/examples/poisson-solver/poisson-solver.cpp
index e16f0b26968..eba163fb281 100644
--- a/examples/poisson-solver/poisson-solver.cpp
+++ b/examples/poisson-solver/poisson-solver.cpp
@@ -184,13 +184,11 @@ int main(int argc, char* argv[])
     const gko::remove_complex<ValueType> reduction_factor = 1e-7;
     // Generate solver and solve the system
     cg::build()
-        .with_criteria(gko::stop::Iteration::build()
-                           .with_max_iters(discretization_points)
-                           .on(exec),
-                       gko::stop::ResidualNorm<ValueType>::build()
-                           .with_reduction_factor(reduction_factor)
-                           .on(exec))
-        .with_preconditioner(bj::build().on(exec))
+        .with_criteria(
+            gko::stop::Iteration::build().with_max_iters(discretization_points),
+            gko::stop::ResidualNorm<ValueType>::build().with_reduction_factor(
+                reduction_factor))
+        .with_preconditioner(bj::build())
         .on(exec)
         ->generate(clone(exec, matrix))  // copy the matrix to the executor
         ->apply(rhs, u);
diff --git a/examples/preconditioned-solver/preconditioned-solver.cpp b/examples/preconditioned-solver/preconditioned-solver.cpp
index b64b588c4ef..cb3d34be8bc 100644
--- a/examples/preconditioned-solver/preconditioned-solver.cpp
+++ b/examples/preconditioned-solver/preconditioned-solver.cpp
@@ -95,14 +95,12 @@ int main(int argc, char* argv[])
     // Create solver factory
     auto solver_gen =
         cg::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(20u).on(exec),
-                gko::stop::ResidualNorm<ValueType>::build()
-                    .with_reduction_factor(reduction_factor)
-                    .on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(20u),
+                           gko::stop::ResidualNorm<ValueType>::build()
+                               .with_reduction_factor(reduction_factor))
             // Add preconditioner, these 2 lines are the only
             // difference from the simple solver example
-            .with_preconditioner(bj::build().with_max_block_size(8u).on(exec))
+            .with_preconditioner(bj::build().with_max_block_size(8u))
             .on(exec);
     // Create solver
     auto solver = solver_gen->generate(A);
diff --git a/examples/preconditioner-export/preconditioner-export.cpp b/examples/preconditioner-export/preconditioner-export.cpp
index 81aeece1cb1..16baffc6472 100644
--- a/examples/preconditioner-export/preconditioner-export.cpp
+++ b/examples/preconditioner-export/preconditioner-export.cpp
@@ -193,13 +193,11 @@ int main(int argc, char* argv[])
         auto factory =
             gko::preconditioner::Ilu<gko::preconditioner::LowerIsai<>,
                                      gko::preconditioner::UpperIsai<>>::build()
-                .with_factorization_factory(fact_factory)
-                .with_l_solver_factory(gko::preconditioner::LowerIsai<>::build()
-                                           .with_sparsity_power(sparsity_power)
-                                           .on(exec))
-                .with_u_solver_factory(gko::preconditioner::UpperIsai<>::build()
-                                           .with_sparsity_power(sparsity_power)
-                                           .on(exec))
+                .with_factorization(fact_factory)
+                .with_l_solver(gko::preconditioner::LowerIsai<>::build()
+                                   .with_sparsity_power(sparsity_power))
+                .with_u_solver(gko::preconditioner::UpperIsai<>::build()
+                                   .with_sparsity_power(sparsity_power))
                 .on(exec);
         auto ilu_isai = try_generate([&] { return factory->generate(mtx); });
         output(ilu_isai->get_l_solver()->get_approximate_inverse(),
@@ -220,13 +218,11 @@ int main(int argc, char* argv[])
         auto factory =
             gko::preconditioner::Ilu<gko::preconditioner::LowerIsai<>,
                                      gko::preconditioner::UpperIsai<>>::build()
-                .with_factorization_factory(fact_factory)
-                .with_l_solver_factory(gko::preconditioner::LowerIsai<>::build()
-                                           .with_sparsity_power(sparsity_power)
-                                           .on(exec))
-                .with_u_solver_factory(gko::preconditioner::UpperIsai<>::build()
-                                           .with_sparsity_power(sparsity_power)
-                                           .on(exec))
+                .with_factorization(fact_factory)
+                .with_l_solver(gko::preconditioner::LowerIsai<>::build()
+                                   .with_sparsity_power(sparsity_power))
+                .with_u_solver(gko::preconditioner::UpperIsai<>::build()
+                                   .with_sparsity_power(sparsity_power))
                 .on(exec);
         auto ilu_isai = try_generate([&] { return factory->generate(mtx); });
         output(ilu_isai->get_l_solver()->get_approximate_inverse(),
@@ -250,13 +246,11 @@ int main(int argc, char* argv[])
         auto factory =
             gko::preconditioner::Ilu<gko::preconditioner::LowerIsai<>,
                                      gko::preconditioner::UpperIsai<>>::build()
-                .with_factorization_factory(fact_factory)
-                .with_l_solver_factory(gko::preconditioner::LowerIsai<>::build()
-                                           .with_sparsity_power(sparsity_power)
-                                           .on(exec))
-                .with_u_solver_factory(gko::preconditioner::UpperIsai<>::build()
-                                           .with_sparsity_power(sparsity_power)
-                                           .on(exec))
+                .with_factorization(fact_factory)
+                .with_l_solver(gko::preconditioner::LowerIsai<>::build()
+                                   .with_sparsity_power(sparsity_power))
+                .with_u_solver(gko::preconditioner::UpperIsai<>::build()
+                                   .with_sparsity_power(sparsity_power))
                 .on(exec);
         auto ilu_isai = try_generate([&] { return factory->generate(mtx); });
         output(ilu_isai->get_l_solver()->get_approximate_inverse(),
diff --git a/examples/simple-solver-logging/simple-solver-logging.cpp b/examples/simple-solver-logging/simple-solver-logging.cpp
index 02318dd7784..2ef47524612 100644
--- a/examples/simple-solver-logging/simple-solver-logging.cpp
+++ b/examples/simple-solver-logging/simple-solver-logging.cpp
@@ -136,9 +136,8 @@ int main(int argc, char* argv[])
     // Generate solver
     auto solver_gen =
         cg::build()
-            .with_criteria(
-                residual_criterion,
-                gko::stop::Iteration::build().with_max_iters(20u).on(exec))
+            .with_criteria(residual_criterion,
+                           gko::stop::Iteration::build().with_max_iters(20u))
             .on(exec);
     auto solver = solver_gen->generate(A);
 
diff --git a/examples/simple-solver/simple-solver.cpp b/examples/simple-solver/simple-solver.cpp
index 81dc9ee6d74..d80c0633ab8 100644
--- a/examples/simple-solver/simple-solver.cpp
+++ b/examples/simple-solver/simple-solver.cpp
@@ -130,11 +130,9 @@ int main(int argc, char* argv[])
     const RealValueType reduction_factor{1e-7};
     auto solver_gen =
         cg::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(20u).on(exec),
-                gko::stop::ResidualNorm<ValueType>::build()
-                    .with_reduction_factor(reduction_factor)
-                    .on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(20u),
+                           gko::stop::ResidualNorm<ValueType>::build()
+                               .with_reduction_factor(reduction_factor))
             .on(exec);
     // Generate the solver from the matrix. The solver factory built in the
     // previous step takes a "matrix"(a gko::LinOp to be more general) as an
diff --git a/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp b/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp
index 63adfaa5571..f4af38882b0 100644
--- a/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp
+++ b/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp
@@ -216,13 +216,11 @@ void solve_system(const std::string& executor_string,
     // Generate solver
     auto solver_gen =
         cg::build()
-            .with_criteria(gko::stop::Iteration::build()
-                               .with_max_iters(gko::size_type(dp))
-                               .on(exec),
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(
+                               gko::size_type(dp)),
                            gko::stop::ResidualNorm<ValueType>::build()
-                               .with_reduction_factor(reduction_factor)
-                               .on(exec))
-            .with_preconditioner(bj::build().on(exec))
+                               .with_reduction_factor(reduction_factor))
+            .with_preconditioner(bj::build())
             .on(exec);
     auto solver = solver_gen->generate(gko::give(matrix));
 
diff --git a/include/ginkgo/core/distributed/preconditioner/schwarz.hpp b/include/ginkgo/core/distributed/preconditioner/schwarz.hpp
index 441bc63d22c..3347828a55d 100644
--- a/include/ginkgo/core/distributed/preconditioner/schwarz.hpp
+++ b/include/ginkgo/core/distributed/preconditioner/schwarz.hpp
@@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #if GINKGO_BUILD_MPI
 
 
+#include <ginkgo/core/base/abstract_factory.hpp>
 #include <ginkgo/core/base/lin_op.hpp>
 #include <ginkgo/core/distributed/matrix.hpp>
 #include <ginkgo/core/distributed/vector.hpp>
@@ -93,8 +94,25 @@ class Schwarz
         /**
          * Local solver factory.
          */
-        std::shared_ptr<const LinOpFactory> GKO_FACTORY_PARAMETER_SCALAR(
-            local_solver_factory, nullptr);
+        std::shared_ptr<const LinOpFactory> local_solver{};
+
+        parameters_type& with_local_solver(
+            deferred_factory_parameter<LinOpFactory> solver)
+        {
+            this->local_solver_generator = std::move(solver);
+            return *this;
+        }
+
+        std::unique_ptr<Factory> on(std::shared_ptr<const Executor> exec) const
+        {
+            auto copy = *this;
+            copy.local_solver = local_solver_generator.on(exec);
+            return copy.enable_parameters_type<parameters_type, Factory>::on(
+                exec);
+        }
+
+    private:
+        deferred_factory_parameter<LinOpFactory> local_solver_generator;
     };
     GKO_ENABLE_LIN_OP_FACTORY(Schwarz, parameters, Factory);
     GKO_ENABLE_BUILD_METHOD(Factory);
diff --git a/reference/test/preconditioner/ilu.cpp b/reference/test/preconditioner/ilu.cpp
index ce3ea72725f..92fe8fac8cf 100644
--- a/reference/test/preconditioner/ilu.cpp
+++ b/reference/test/preconditioner/ilu.cpp
@@ -80,37 +80,29 @@ class Ilu : public ::testing::Test {
           u_factor(gko::initialize<Mtx>(
               {{2., 1., 1.}, {0., 4., 1.}, {0., 0., 3.}}, exec)),
           l_u_composition(Composition::create(l_factor, u_factor)),
-          l_factory(
-              l_solver_type::build()
-                  .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(10u).on(
-                          exec),
-                      gko::stop::Time::build()
-                          .with_time_limit(std::chrono::seconds(6))
-                          .on(exec),
-                      gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(r<T>::value)
-                          .on(exec))
-                  .on(exec)),
-          u_factory(
-              u_solver_type::build()
-                  .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(10u).on(
-                          exec),
-                      gko::stop::Time::build()
-                          .with_time_limit(std::chrono::seconds(6))
-                          .on(exec),
-                      gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(r<T>::value)
-                          .on(exec))
-                  .on(exec)),
+          l_factory(l_solver_type::build()
+                        .with_criteria(
+                            gko::stop::Iteration::build().with_max_iters(10u),
+                            gko::stop::Time::build().with_time_limit(
+                                std::chrono::seconds(6)),
+                            gko::stop::ResidualNorm<value_type>::build()
+                                .with_reduction_factor(r<T>::value))
+                        .on(exec)),
+          u_factory(u_solver_type::build()
+                        .with_criteria(
+                            gko::stop::Iteration::build().with_max_iters(10u),
+                            gko::stop::Time::build().with_time_limit(
+                                std::chrono::seconds(6)),
+                            gko::stop::ResidualNorm<value_type>::build()
+                                .with_reduction_factor(r<T>::value))
+                        .on(exec)),
           ilu_pre_factory(ilu_prec_type::build()
-                              .with_l_solver_factory(l_factory)
-                              .with_u_solver_factory(u_factory)
+                              .with_l_solver(l_factory)
+                              .with_u_solver(u_factory)
                               .on(exec)),
           ilu_rev_pre_factory(ilu_rev_prec_type::build()
-                                  .with_l_solver_factory(l_factory)
-                                  .with_u_solver_factory(u_factory)
+                                  .with_l_solver(l_factory)
+                                  .with_u_solver(u_factory)
                                   .on(exec))
     {}
 
diff --git a/reference/test/preconditioner/isai_kernels.cpp b/reference/test/preconditioner/isai_kernels.cpp
index eea171d60fe..86d0f40142a 100644
--- a/reference/test/preconditioner/isai_kernels.cpp
+++ b/reference/test/preconditioner/isai_kernels.cpp
@@ -82,16 +82,13 @@ class Isai : public ::testing::Test {
         : exec{gko::ReferenceExecutor::create()},
           excess_solver_factory(
               excess_solver_type::build()
-                  .with_preconditioner(
-                      bj::build().with_max_block_size(16u).on(exec))
+                  .with_preconditioner(bj::build().with_max_block_size(16u))
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(1000u).on(
-                          exec),
+                      gko::stop::Iteration::build().with_max_iters(1000u),
                       gko::stop::ResidualNorm<value_type>::build()
                           .with_baseline(gko::stop::mode::rhs_norm)
                           .with_reduction_factor(
-                              gko::remove_complex<value_type>{1e-6})
-                          .on(exec))
+                              gko::remove_complex<value_type>{1e-6}))
                   .on(exec)),
           a_dense{gko::initialize<Dense>({{2, 1, 2}, {1, -2, 3}, {-1, 1, 1}},
                                          exec)},
diff --git a/reference/test/reorder/scaled_reordered.cpp b/reference/test/reorder/scaled_reordered.cpp
index 8789ded37ca..edadc245b33 100644
--- a/reference/test/reorder/scaled_reordered.cpp
+++ b/reference/test/reorder/scaled_reordered.cpp
@@ -110,11 +110,9 @@ class ScaledReordered : public ::testing::Test {
           solver_factory(
               Bicgstab::build()
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(100u).on(
-                          exec),
+                      gko::stop::Iteration::build().with_max_iters(100u),
                       gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(r<value_type>::value)
-                          .on(exec))
+                          .with_reduction_factor(r<value_type>::value))
                   .on(exec)),
           tol{r<value_type>::value}
     {
diff --git a/reference/test/solver/bicg_kernels.cpp b/reference/test/solver/bicg_kernels.cpp
index e317677b2de..aa27eb4afa3 100644
--- a/reference/test/solver/bicg_kernels.cpp
+++ b/reference/test/solver/bicg_kernels.cpp
@@ -64,17 +64,14 @@ class Bicg : public ::testing::Test {
               {{2, -1.0, 0.0}, {-1.0, 2, -1.0}, {0.0, -1.0, 2}}, exec)),
           stopped{},
           non_stopped{},
-          bicg_factory(
-              Solver::build()
-                  .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(4u).on(exec),
-                      gko::stop::Time::build()
-                          .with_time_limit(std::chrono::seconds(6))
-                          .on(exec),
-                      gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(r<value_type>::value)
-                          .on(exec))
-                  .on(exec)),
+          bicg_factory(Solver::build()
+                           .with_criteria(
+                               gko::stop::Iteration::build().with_max_iters(4u),
+                               gko::stop::Time::build().with_time_limit(
+                                   std::chrono::seconds(6)),
+                               gko::stop::ResidualNorm<value_type>::build()
+                                   .with_reduction_factor(r<value_type>::value))
+                           .on(exec)),
           mtx_big(gko::initialize<Mtx>(
               {{8828.0, 2673.0, 4150.0, -3139.5, 3829.5, 5856.0},
                {2673.0, 10765.5, 1805.0, 73.0, 1966.0, 3919.5},
@@ -86,20 +83,16 @@ class Bicg : public ::testing::Test {
           bicg_factory_big(
               Solver::build()
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(100u).on(
-                          exec),
+                      gko::stop::Iteration::build().with_max_iters(100u),
                       gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(r<value_type>::value)
-                          .on(exec))
+                          .with_reduction_factor(r<value_type>::value))
                   .on(exec)),
           bicg_factory_big2(
               Solver::build()
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(100u).on(
-                          exec),
+                      gko::stop::Iteration::build().with_max_iters(100u),
                       gko::stop::ImplicitResidualNorm<value_type>::build()
-                          .with_reduction_factor(r<value_type>::value)
-                          .on(exec))
+                          .with_reduction_factor(r<value_type>::value))
                   .on(exec)),
           mtx_non_symmetric(gko::initialize<Mtx>(
               {{1.0, 2.0, 3.0}, {3.0, 2.0, -1.0}, {0.0, -1.0, 2}}, exec))
diff --git a/reference/test/solver/bicgstab_kernels.cpp b/reference/test/solver/bicgstab_kernels.cpp
index ec44b6b6f17..70302e95796 100644
--- a/reference/test/solver/bicgstab_kernels.cpp
+++ b/reference/test/solver/bicgstab_kernels.cpp
@@ -69,36 +69,29 @@ class Bicgstab : public ::testing::Test {
           bicgstab_factory(
               Solver::build()
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(8u).on(exec),
-                      gko::stop::Time::build()
-                          .with_time_limit(std::chrono::seconds(6))
-                          .on(exec),
+                      gko::stop::Iteration::build().with_max_iters(8u),
+                      gko::stop::Time::build().with_time_limit(
+                          std::chrono::seconds(6)),
                       gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(r<value_type>::value)
-                          .on(exec))
+                          .with_reduction_factor(r<value_type>::value))
                   .on(exec)),
           bicgstab_factory2(
               Solver::build()
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(8u).on(exec),
-                      gko::stop::Time::build()
-                          .with_time_limit(std::chrono::seconds(6))
-                          .on(exec),
+                      gko::stop::Iteration::build().with_max_iters(8u),
+                      gko::stop::Time::build().with_time_limit(
+                          std::chrono::seconds(6)),
                       gko::stop::ImplicitResidualNorm<value_type>::build()
-                          .with_reduction_factor(r<value_type>::value)
-                          .on(exec))
+                          .with_reduction_factor(r<value_type>::value))
                   .on(exec)),
           bicgstab_factory_precision(
               Solver::build()
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(50u).on(
-                          exec),
-                      gko::stop::Time::build()
-                          .with_time_limit(std::chrono::seconds(6))
-                          .on(exec),
+                      gko::stop::Iteration::build().with_max_iters(50u),
+                      gko::stop::Time::build().with_time_limit(
+                          std::chrono::seconds(6)),
                       gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(r<value_type>::value)
-                          .on(exec))
+                          .with_reduction_factor(r<value_type>::value))
                   .on(exec))
     {
         auto small_size = gko::dim<2>{2, 2};
diff --git a/reference/test/solver/cb_gmres_kernels.cpp b/reference/test/solver/cb_gmres_kernels.cpp
index 1127d7caff7..e5b933ad82c 100644
--- a/reference/test/solver/cb_gmres_kernels.cpp
+++ b/reference/test/solver/cb_gmres_kernels.cpp
@@ -77,15 +77,12 @@ class CbGmres : public ::testing::Test {
               gmres_type::build()
                   .with_storage_precision(storage_prec)
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(100u).on(
-                          exec),
-                      gko::stop::Time::build()
-                          .with_time_limit(std::chrono::seconds(6))
-                          .on(exec),
+                      gko::stop::Iteration::build().with_max_iters(100u),
+                      gko::stop::Time::build().with_time_limit(
+                          std::chrono::seconds(6)),
                       gko::stop::ResidualNorm<value_type>::build()
                           .with_baseline(gko::stop::mode::initial_resnorm)
-                          .with_reduction_factor(this->reduction_factor())
-                          .on(exec))
+                          .with_reduction_factor(this->reduction_factor()))
                   .on(exec)),
           mtx_big(gko::initialize<Mtx>(
               {{2295.7, -764.8, 1166.5, 428.9, 291.7, -774.5},
@@ -99,12 +96,10 @@ class CbGmres : public ::testing::Test {
               gmres_type::build()
                   .with_storage_precision(storage_prec)
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(100u).on(
-                          exec),
+                      gko::stop::Iteration::build().with_max_iters(100u),
                       gko::stop::ResidualNorm<value_type>::build()
                           .with_baseline(gko::stop::mode::initial_resnorm)
-                          .with_reduction_factor(this->reduction_factor())
-                          .on(exec))
+                          .with_reduction_factor(this->reduction_factor()))
                   .on(exec)),
           mtx_medium(
               gko::initialize<Mtx>({{-86.40, 153.30, -108.90, 8.60, -61.60},
diff --git a/reference/test/solver/cg_kernels.cpp b/reference/test/solver/cg_kernels.cpp
index 76b8cf55946..c089442488f 100644
--- a/reference/test/solver/cg_kernels.cpp
+++ b/reference/test/solver/cg_kernels.cpp
@@ -64,18 +64,14 @@ class Cg : public ::testing::Test {
               {{2, -1.0, 0.0}, {-1.0, 2, -1.0}, {0.0, -1.0, 2}}, exec)),
           stopped{},
           non_stopped{},
-          cg_factory(
-              Solver::build()
-                  .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(400u).on(
-                          exec),
-                      gko::stop::Time::build()
-                          .with_time_limit(std::chrono::seconds(6))
-                          .on(exec),
-                      gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(r<value_type>::value)
-                          .on(exec))
-                  .on(exec)),
+          cg_factory(Solver::build()
+                         .with_criteria(
+                             gko::stop::Iteration::build().with_max_iters(400u),
+                             gko::stop::Time::build().with_time_limit(
+                                 std::chrono::seconds(6)),
+                             gko::stop::ResidualNorm<value_type>::build()
+                                 .with_reduction_factor(r<value_type>::value))
+                         .on(exec)),
           mtx_big(gko::initialize<Mtx>(
               {{8828.0, 2673.0, 4150.0, -3139.5, 3829.5, 5856.0},
                {2673.0, 10765.5, 1805.0, 73.0, 1966.0, 3919.5},
@@ -87,20 +83,16 @@ class Cg : public ::testing::Test {
           cg_factory_big(
               Solver::build()
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(100u).on(
-                          exec),
+                      gko::stop::Iteration::build().with_max_iters(100u),
                       gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(r<value_type>::value)
-                          .on(exec))
+                          .with_reduction_factor(r<value_type>::value))
                   .on(exec)),
           cg_factory_big2(
               Solver::build()
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(100u).on(
-                          exec),
+                      gko::stop::Iteration::build().with_max_iters(100u),
                       gko::stop::ImplicitResidualNorm<value_type>::build()
-                          .with_reduction_factor(r<value_type>::value)
-                          .on(exec))
+                          .with_reduction_factor(r<value_type>::value))
                   .on(exec))
     {
         auto small_size = gko::dim<2>{2, 2};
diff --git a/reference/test/solver/cgs_kernels.cpp b/reference/test/solver/cgs_kernels.cpp
index 9c3ce2071a7..91c7c1e821b 100644
--- a/reference/test/solver/cgs_kernels.cpp
+++ b/reference/test/solver/cgs_kernels.cpp
@@ -65,15 +65,12 @@ class Cgs : public ::testing::Test {
               {{1.0, -3.0, 0.0}, {-4.0, 1.0, -3.0}, {2.0, -1.0, 2.0}}, exec)),
           stopped{},
           non_stopped{},
-          cgs_factory(
-              Solver::build()
-                  .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(40u).on(
-                          exec),
-                      gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(r<value_type>::value)
-                          .on(exec))
-                  .on(exec)),
+          cgs_factory(Solver::build()
+                          .with_criteria(
+                              gko::stop::Iteration::build().with_max_iters(40u),
+                              gko::stop::ResidualNorm<value_type>::build()
+                                  .with_reduction_factor(r<value_type>::value))
+                          .on(exec)),
           mtx_big(
               gko::initialize<Mtx>({{-99.0, 87.0, -67.0, -62.0, -68.0, -19.0},
                                     {-30.0, -17.0, -1.0, 9.0, 23.0, 77.0},
@@ -85,20 +82,16 @@ class Cgs : public ::testing::Test {
           cgs_factory_big(
               Solver::build()
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(100u).on(
-                          exec),
+                      gko::stop::Iteration::build().with_max_iters(100u),
                       gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(r<value_type>::value)
-                          .on(exec))
+                          .with_reduction_factor(r<value_type>::value))
                   .on(exec)),
           cgs_factory_big2(
               Solver::build()
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(100u).on(
-                          exec),
+                      gko::stop::Iteration::build().with_max_iters(100u),
                       gko::stop::ImplicitResidualNorm<value_type>::build()
-                          .with_reduction_factor(r<value_type>::value)
-                          .on(exec))
+                          .with_reduction_factor(r<value_type>::value))
                   .on(exec))
     {
         auto small_size = gko::dim<2>{2, 2};
diff --git a/reference/test/solver/direct.cpp b/reference/test/solver/direct.cpp
index 617015bac1f..f69846b548d 100644
--- a/reference/test/solver/direct.cpp
+++ b/reference/test/solver/direct.cpp
@@ -77,8 +77,7 @@ class Direct : public ::testing::Test {
                 .with_factorization(
                     gko::experimental::factorization::Lu<value_type,
                                                          index_type>::build()
-                        .with_symmetric_sparsity(true)
-                        .on(exec))
+                        .with_symmetric_sparsity(true))
                 .on(exec);
         solver = factory->generate(mtx);
         std::normal_distribution<gko::remove_complex<value_type>> dist(0, 1);
diff --git a/reference/test/solver/fcg_kernels.cpp b/reference/test/solver/fcg_kernels.cpp
index e8163752689..3dd4149405e 100644
--- a/reference/test/solver/fcg_kernels.cpp
+++ b/reference/test/solver/fcg_kernels.cpp
@@ -65,17 +65,14 @@ class Fcg : public ::testing::Test {
               {{2, -1.0, 0.0}, {-1.0, 2, -1.0}, {0.0, -1.0, 2}}, exec)),
           stopped{},
           non_stopped{},
-          fcg_factory(
-              Solver::build()
-                  .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(4u).on(exec),
-                      gko::stop::Time::build()
-                          .with_time_limit(std::chrono::seconds(6))
-                          .on(exec),
-                      gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(r<value_type>::value)
-                          .on(exec))
-                  .on(exec)),
+          fcg_factory(Solver::build()
+                          .with_criteria(
+                              gko::stop::Iteration::build().with_max_iters(4u),
+                              gko::stop::Time::build().with_time_limit(
+                                  std::chrono::seconds(6)),
+                              gko::stop::ResidualNorm<value_type>::build()
+                                  .with_reduction_factor(r<value_type>::value))
+                          .on(exec)),
           mtx_big(gko::initialize<Mtx>(
               {{8828.0, 2673.0, 4150.0, -3139.5, 3829.5, 5856.0},
                {2673.0, 10765.5, 1805.0, 73.0, 1966.0, 3919.5},
@@ -87,20 +84,16 @@ class Fcg : public ::testing::Test {
           fcg_factory_big(
               Solver::build()
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(100u).on(
-                          exec),
+                      gko::stop::Iteration::build().with_max_iters(100u),
                       gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(r<value_type>::value)
-                          .on(exec))
+                          .with_reduction_factor(r<value_type>::value))
                   .on(exec)),
           fcg_factory_big2(
               Solver::build()
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(100u).on(
-                          exec),
+                      gko::stop::Iteration::build().with_max_iters(100u),
                       gko::stop::ImplicitResidualNorm<value_type>::build()
-                          .with_reduction_factor(r<value_type>::value)
-                          .on(exec))
+                          .with_reduction_factor(r<value_type>::value))
                   .on(exec))
     {
         auto small_size = gko::dim<2>{2, 2};
diff --git a/reference/test/solver/gcr_kernels.cpp b/reference/test/solver/gcr_kernels.cpp
index 888cbc3b4fe..adf5c35fd1d 100644
--- a/reference/test/solver/gcr_kernels.cpp
+++ b/reference/test/solver/gcr_kernels.cpp
@@ -72,18 +72,15 @@ class Gcr : public ::testing::Test {
           non_stopped{},
           mtx(gko::initialize<Mtx>(
               {{1.0, 2.0, 3.0}, {3.0, 2.0, -1.0}, {0.0, -1.0, 2}}, exec)),
-          gcr_factory(
-              Solver::build()
-                  .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(4u).on(exec),
-                      gko::stop::Time::build()
-                          .with_time_limit(std::chrono::seconds(6))
-                          .on(exec),
-                      gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(r<value_type>::value)
-                          .on(exec))
-                  .with_krylov_dim(3u)
-                  .on(exec)),
+          gcr_factory(Solver::build()
+                          .with_criteria(
+                              gko::stop::Iteration::build().with_max_iters(4u),
+                              gko::stop::Time::build().with_time_limit(
+                                  std::chrono::seconds(6)),
+                              gko::stop::ResidualNorm<value_type>::build()
+                                  .with_reduction_factor(r<value_type>::value))
+                          .with_krylov_dim(3u)
+                          .on(exec)),
           mtx_big(gko::initialize<Mtx>(
               {{2295.7, -764.8, 1166.5, 428.9, 291.7, -774.5},
                {2752.6, -1127.7, 1212.8, -299.1, 987.7, 786.8},
@@ -95,20 +92,16 @@ class Gcr : public ::testing::Test {
           gcr_factory_big(
               Solver::build()
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(100u).on(
-                          exec),
+                      gko::stop::Iteration::build().with_max_iters(100u),
                       gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(r<value_type>::value)
-                          .on(exec))
+                          .with_reduction_factor(r<value_type>::value))
                   .on(exec)),
           gcr_factory_big2(
               Solver::build()
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(100u).on(
-                          exec),
+                      gko::stop::Iteration::build().with_max_iters(100u),
                       gko::stop::ImplicitResidualNorm<value_type>::build()
-                          .with_reduction_factor(r<value_type>::value)
-                          .on(exec))
+                          .with_reduction_factor(r<value_type>::value))
                   .on(exec)),
           mtx_medium(
               gko::initialize<Mtx>({{-86.40, 153.30, -108.90, 8.60, -61.60},
diff --git a/reference/test/solver/gmres_kernels.cpp b/reference/test/solver/gmres_kernels.cpp
index 585fec833bc..a99400e412b 100644
--- a/reference/test/solver/gmres_kernels.cpp
+++ b/reference/test/solver/gmres_kernels.cpp
@@ -76,13 +76,11 @@ class Gmres : public ::testing::Test {
           gmres_factory(
               Solver::build()
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(4u).on(exec),
-                      gko::stop::Time::build()
-                          .with_time_limit(std::chrono::seconds(6))
-                          .on(exec),
+                      gko::stop::Iteration::build().with_max_iters(4u),
+                      gko::stop::Time::build().with_time_limit(
+                          std::chrono::seconds(6)),
                       gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(r<value_type>::value)
-                          .on(exec))
+                          .with_reduction_factor(r<value_type>::value))
                   .with_krylov_dim(3u)
                   .on(exec)),
           mtx_big(gko::initialize<Mtx>(
@@ -96,20 +94,16 @@ class Gmres : public ::testing::Test {
           gmres_factory_big(
               Solver::build()
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(100u).on(
-                          exec),
+                      gko::stop::Iteration::build().with_max_iters(100u),
                       gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(r<value_type>::value)
-                          .on(exec))
+                          .with_reduction_factor(r<value_type>::value))
                   .on(exec)),
           gmres_factory_big2(
               Solver::build()
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(100u).on(
-                          exec),
+                      gko::stop::Iteration::build().with_max_iters(100u),
                       gko::stop::ImplicitResidualNorm<value_type>::build()
-                          .with_reduction_factor(r<value_type>::value)
-                          .on(exec))
+                          .with_reduction_factor(r<value_type>::value))
                   .on(exec)),
           mtx_medium(
               gko::initialize<Mtx>({{-86.40, 153.30, -108.90, 8.60, -61.60},
diff --git a/reference/test/solver/idr_kernels.cpp b/reference/test/solver/idr_kernels.cpp
index 3e74e0c319b..da1b73a035c 100644
--- a/reference/test/solver/idr_kernels.cpp
+++ b/reference/test/solver/idr_kernels.cpp
@@ -62,30 +62,24 @@ class Idr : public ::testing::Test {
         : exec(gko::ReferenceExecutor::create()),
           mtx(gko::initialize<Mtx>(
               {{1.0, -3.0, 0.0}, {-4.0, 1.0, -3.0}, {2.0, -1.0, 2.0}}, exec)),
-          idr_factory(
-              Solver::build()
-                  .with_deterministic(true)
-                  .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(8u).on(exec),
-                      gko::stop::Time::build()
-                          .with_time_limit(std::chrono::seconds(6))
-                          .on(exec),
-                      gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(r<value_type>::value)
-                          .on(exec))
-                  .on(exec)),
+          idr_factory(Solver::build()
+                          .with_deterministic(true)
+                          .with_criteria(
+                              gko::stop::Iteration::build().with_max_iters(8u),
+                              gko::stop::Time::build().with_time_limit(
+                                  std::chrono::seconds(6)),
+                              gko::stop::ResidualNorm<value_type>::build()
+                                  .with_reduction_factor(r<value_type>::value))
+                          .on(exec)),
           idr_factory_precision(
               Solver::build()
                   .with_deterministic(true)
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(50u).on(
-                          exec),
-                      gko::stop::Time::build()
-                          .with_time_limit(std::chrono::seconds(6))
-                          .on(exec),
+                      gko::stop::Iteration::build().with_max_iters(50u),
+                      gko::stop::Time::build().with_time_limit(
+                          std::chrono::seconds(6)),
                       gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(r<value_type>::value)
-                          .on(exec))
+                          .with_reduction_factor(r<value_type>::value))
                   .on(exec))
     {}
 
diff --git a/reference/test/solver/ir_kernels.cpp b/reference/test/solver/ir_kernels.cpp
index 8b4255b72ef..fc0c130aa83 100644
--- a/reference/test/solver/ir_kernels.cpp
+++ b/reference/test/solver/ir_kernels.cpp
@@ -65,15 +65,12 @@ class Ir : public ::testing::Test {
           // Eigenvalues of mtx are 0.9, 1.0 and 1.1
           // Richardson iteration, converges since
           // | relaxation_factor * lambda - 1 | < 1
-          ir_factory(
-              Solver::build()
-                  .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(30u).on(
-                          exec),
-                      gko::stop::ResidualNorm<value_type>::build()
-                          .with_reduction_factor(r<value_type>::value)
-                          .on(exec))
-                  .on(exec))
+          ir_factory(Solver::build()
+                         .with_criteria(
+                             gko::stop::Iteration::build().with_max_iters(30u),
+                             gko::stop::ResidualNorm<value_type>::build()
+                                 .with_reduction_factor(r<value_type>::value))
+                         .on(exec))
     {}
 
     std::shared_ptr<const gko::ReferenceExecutor> exec;
diff --git a/reference/test/solver/multigrid_kernels.cpp b/reference/test/solver/multigrid_kernels.cpp
index 3efb9d41c5e..23307d20b33 100644
--- a/reference/test/solver/multigrid_kernels.cpp
+++ b/reference/test/solver/multigrid_kernels.cpp
@@ -289,30 +289,26 @@ class Multigrid : public ::testing::Test {
                                  .on(exec)),
           smoother_factory(gko::give(
               Smoother::build()
-                  .with_solver(
-                      InnerSolver::build().with_max_block_size(1u).on(exec))
+                  .with_solver(InnerSolver::build().with_max_block_size(1u))
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(1u).on(exec))
+                      gko::stop::Iteration::build().with_max_iters(1u))
                   .on(exec))),
           coarsest_factory(
               CoarsestSolver::build()
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(4u).on(exec),
-                      gko::stop::Time::build()
-                          .with_time_limit(std::chrono::seconds(6))
-                          .on(exec),
+                      gko::stop::Iteration::build().with_max_iters(4u),
+                      gko::stop::Time::build().with_time_limit(
+                          std::chrono::seconds(6)),
                       gko::stop::ResidualNorm<value_type>::build()
                           .with_baseline(gko::stop::mode::initial_resnorm)
-                          .with_reduction_factor(r<value_type>::value)
-                          .on(exec))
+                          .with_reduction_factor(r<value_type>::value))
                   .on(exec)),
           coarsestnext_factory(
               CoarsestNextSolver::build()
                   .with_criteria(
-                      gko::stop::Iteration::build().with_max_iters(4u).on(exec),
-                      gko::stop::Time::build()
-                          .with_time_limit(std::chrono::seconds(6))
-                          .on(exec))
+                      gko::stop::Iteration::build().with_max_iters(4u),
+                      gko::stop::Time::build().with_time_limit(
+                          std::chrono::seconds(6)))
                   .on(exec)),
           rp_factory(DummyRPFactory::build().on(exec)),
           lo_factory(DummyFactory::build().on(exec)),
@@ -357,14 +353,12 @@ class Multigrid : public ::testing::Test {
                 .with_mid_case(gko::solver::multigrid::mid_smooth_type::both)
                 .with_mg_level(coarse_factory)
                 .with_criteria(
-                    gko::stop::Iteration::build().with_max_iters(4u).on(exec),
-                    gko::stop::Time::build()
-                        .with_time_limit(std::chrono::seconds(6))
-                        .on(exec),
+                    gko::stop::Iteration::build().with_max_iters(4u),
+                    gko::stop::Time::build().with_time_limit(
+                        std::chrono::seconds(6)),
                     gko::stop::ResidualNorm<value_type>::build()
                         .with_baseline(gko::stop::mode::initial_resnorm)
-                        .with_reduction_factor(r<value_type>::value)
-                        .on(exec))
+                        .with_reduction_factor(r<value_type>::value))
                 .with_cycle(cycle)
                 .with_min_coarse_rows(1u)
                 .on(exec));
@@ -382,14 +376,12 @@ class Multigrid : public ::testing::Test {
                 .with_mid_case(gko::solver::multigrid::mid_smooth_type::both)
                 .with_mg_level(coarse_factory, coarsenext_factory)
                 .with_criteria(
-                    gko::stop::Iteration::build().with_max_iters(200u).on(exec),
-                    gko::stop::Time::build()
-                        .with_time_limit(std::chrono::seconds(100))
-                        .on(exec),
+                    gko::stop::Iteration::build().with_max_iters(200u),
+                    gko::stop::Time::build().with_time_limit(
+                        std::chrono::seconds(100)),
                     gko::stop::ResidualNorm<value_type>::build()
                         .with_baseline(gko::stop::mode::initial_resnorm)
-                        .with_reduction_factor(r<value_type>::value)
-                        .on(exec))
+                        .with_reduction_factor(r<value_type>::value))
                 .with_cycle(cycle)
                 .with_min_coarse_rows(1u)
                 .on(exec));
diff --git a/test/mpi/preconditioner/schwarz.cpp b/test/mpi/preconditioner/schwarz.cpp
index 3c9e3a8d69f..8d07ba44046 100644
--- a/test/mpi/preconditioner/schwarz.cpp
+++ b/test/mpi/preconditioner/schwarz.cpp
@@ -197,7 +197,7 @@ TYPED_TEST(SchwarzPreconditioner, CanApplyPreconditionedSolver)
         cg::build()
             .with_preconditioner(
                 prec::build()
-                    .with_local_solver_factory(this->local_solver_factory)
+                    .with_local_solver(this->local_solver_factory)
                     .on(this->exec))
             .with_criteria(iter_stop, tol_stop)
             .on(this->exec);
@@ -225,10 +225,9 @@ TYPED_TEST(SchwarzPreconditioner, CanApplyPreconditioner)
     using cg = typename TestFixture::solver_type;
     using prec = typename TestFixture::dist_prec_type;
 
-    auto precond_factory =
-        prec::build()
-            .with_local_solver_factory(this->local_solver_factory)
-            .on(this->exec);
+    auto precond_factory = prec::build()
+                               .with_local_solver(this->local_solver_factory)
+                               .on(this->exec);
     auto local_precond =
         this->local_solver_factory->generate(this->non_dist_mat);
     auto precond = precond_factory->generate(this->dist_mat);
@@ -249,10 +248,9 @@ TYPED_TEST(SchwarzPreconditioner, CanAdvancedApplyPreconditioner)
     using cg = typename TestFixture::solver_type;
     using prec = typename TestFixture::dist_prec_type;
 
-    auto precond_factory =
-        prec::build()
-            .with_local_solver_factory(this->local_solver_factory)
-            .on(this->exec);
+    auto precond_factory = prec::build()
+                               .with_local_solver(this->local_solver_factory)
+                               .on(this->exec);
     auto local_precond =
         this->local_solver_factory->generate(this->non_dist_mat);
     auto precond = precond_factory->generate(this->dist_mat);
diff --git a/test/mpi/solver/solver.cpp b/test/mpi/solver/solver.cpp
index 59462a9be59..f53b2784124 100644
--- a/test/mpi/solver/solver.cpp
+++ b/test/mpi/solver/solver.cpp
@@ -107,9 +107,7 @@ struct SimpleSolverTest {
         std::shared_ptr<const gko::Executor> exec)
     {
         return solver_type::build().with_criteria(
-            gko::stop::Iteration::build()
-                .with_max_iters(iteration_count())
-                .on(exec),
+            gko::stop::Iteration::build().with_max_iters(iteration_count()),
             gko::stop::ResidualNorm<value_type>::build()
                 .with_baseline(gko::stop::mode::absolute)
                 .with_reduction_factor(reduction_factor())
@@ -164,17 +162,11 @@ struct Ir : SimpleSolverTest<gko::solver::Ir<solver_value_type>> {
         std::shared_ptr<const gko::Executor> exec)
     {
         return SimpleSolverTest<gko::solver::Ir<solver_value_type>>::build(exec)
-            .with_solver(
-                gko::solver::Cg<value_type>::build()
-                    .with_criteria(
-                        gko::stop::Iteration::build()
-                            .with_max_iters(iteration_count())
-                            .on(exec),
-                        gko::stop::ResidualNorm<value_type>::build()
-                            .with_baseline(gko::stop::mode::absolute)
-                            .with_reduction_factor(2 * reduction_factor())
-                            .on(exec))
-                    .on(exec))
+            .with_solver(gko::solver::Cg<value_type>::build().with_criteria(
+                gko::stop::Iteration::build().with_max_iters(iteration_count()),
+                gko::stop::ResidualNorm<value_type>::build()
+                    .with_baseline(gko::stop::mode::absolute)
+                    .with_reduction_factor(2 * reduction_factor())))
             .with_relaxation_factor(0.9);
     }
 };
diff --git a/test/solver/bicg_kernels.cpp b/test/solver/bicg_kernels.cpp
index a62ab3f6d72..d35e6de227d 100644
--- a/test/solver/bicg_kernels.cpp
+++ b/test/solver/bicg_kernels.cpp
@@ -239,19 +239,15 @@ TEST_F(Bicg, ApplyWithSpdMatrixIsEquivalentToRef)
     auto d_b = gko::clone(exec, b);
     auto bicg_factory =
         gko::solver::Bicg<value_type>::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(50u).on(ref),
-                gko::stop::ResidualNorm<value_type>::build()
-                    .with_reduction_factor(::r<value_type>::value)
-                    .on(ref))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(50u),
+                           gko::stop::ResidualNorm<value_type>::build()
+                               .with_reduction_factor(::r<value_type>::value))
             .on(ref);
     auto d_bicg_factory =
         gko::solver::Bicg<value_type>::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(50u).on(exec),
-                gko::stop::ResidualNorm<value_type>::build()
-                    .with_reduction_factor(::r<value_type>::value)
-                    .on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(50u),
+                           gko::stop::ResidualNorm<value_type>::build()
+                               .with_reduction_factor(::r<value_type>::value))
             .on(exec);
     auto solver = bicg_factory->generate(std::move(mtx));
     auto d_solver = d_bicg_factory->generate(std::move(d_mtx));
@@ -271,19 +267,15 @@ TEST_F(Bicg, ApplyWithSuiteSparseMatrixIsEquivalentToRef)
     auto d_b = gko::clone(exec, b);
     auto bicg_factory =
         gko::solver::Bicg<value_type>::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(50u).on(ref),
-                gko::stop::ResidualNorm<value_type>::build()
-                    .with_reduction_factor(::r<value_type>::value)
-                    .on(ref))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(50u),
+                           gko::stop::ResidualNorm<value_type>::build()
+                               .with_reduction_factor(::r<value_type>::value))
             .on(ref);
     auto d_bicg_factory =
         gko::solver::Bicg<value_type>::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(50u).on(exec),
-                gko::stop::ResidualNorm<value_type>::build()
-                    .with_reduction_factor(::r<value_type>::value)
-                    .on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(50u),
+                           gko::stop::ResidualNorm<value_type>::build()
+                               .with_reduction_factor(::r<value_type>::value))
             .on(exec);
     auto solver = bicg_factory->generate(std::move(mtx_ani));
     auto d_solver = d_bicg_factory->generate(std::move(d_mtx_ani));
diff --git a/test/solver/bicgstab_kernels.cpp b/test/solver/bicgstab_kernels.cpp
index 15eda2a74cb..422d51c86ad 100644
--- a/test/solver/bicgstab_kernels.cpp
+++ b/test/solver/bicgstab_kernels.cpp
@@ -71,19 +71,17 @@ class Bicgstab : public CommonTestFixture {
         exec_bicgstab_factory =
             Solver::build()
                 .with_criteria(
-                    gko::stop::Iteration::build().with_max_iters(246u).on(exec),
+                    gko::stop::Iteration::build().with_max_iters(246u),
                     gko::stop::ResidualNorm<value_type>::build()
-                        .with_reduction_factor(::r<value_type>::value)
-                        .on(exec))
+                        .with_reduction_factor(::r<value_type>::value))
                 .on(exec);
 
         ref_bicgstab_factory =
             Solver::build()
                 .with_criteria(
-                    gko::stop::Iteration::build().with_max_iters(246u).on(ref),
+                    gko::stop::Iteration::build().with_max_iters(246u),
                     gko::stop::ResidualNorm<value_type>::build()
-                        .with_reduction_factor(::r<value_type>::value)
-                        .on(ref))
+                        .with_reduction_factor(::r<value_type>::value))
                 .on(ref);
     }
 
diff --git a/test/solver/cg_kernels.cpp b/test/solver/cg_kernels.cpp
index a51ac48c59b..dcb4b0147f6 100644
--- a/test/solver/cg_kernels.cpp
+++ b/test/solver/cg_kernels.cpp
@@ -203,19 +203,15 @@ TEST_F(Cg, ApplyIsEquivalentToRef)
     auto d_b = gko::clone(exec, b);
     auto cg_factory =
         gko::solver::Cg<value_type>::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(50u).on(ref),
-                gko::stop::ResidualNorm<value_type>::build()
-                    .with_reduction_factor(::r<value_type>::value)
-                    .on(ref))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(50u),
+                           gko::stop::ResidualNorm<value_type>::build()
+                               .with_reduction_factor(::r<value_type>::value))
             .on(ref);
     auto d_cg_factory =
         gko::solver::Cg<value_type>::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(50u).on(exec),
-                gko::stop::ResidualNorm<value_type>::build()
-                    .with_reduction_factor(::r<value_type>::value)
-                    .on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(50u),
+                           gko::stop::ResidualNorm<value_type>::build()
+                               .with_reduction_factor(::r<value_type>::value))
             .on(exec);
     auto solver = cg_factory->generate(std::move(mtx));
     auto d_solver = d_cg_factory->generate(std::move(d_mtx));
diff --git a/test/solver/cgs_kernels.cpp b/test/solver/cgs_kernels.cpp
index b1b124ed420..35914d4afa6 100644
--- a/test/solver/cgs_kernels.cpp
+++ b/test/solver/cgs_kernels.cpp
@@ -70,18 +70,16 @@ class Cgs : public CommonTestFixture {
         exec_cgs_factory =
             Solver::build()
                 .with_criteria(
-                    gko::stop::Iteration::build().with_max_iters(246u).on(exec),
+                    gko::stop::Iteration::build().with_max_iters(246u),
                     gko::stop::ResidualNorm<value_type>::build()
-                        .with_reduction_factor(::r<value_type>::value)
-                        .on(exec))
+                        .with_reduction_factor(::r<value_type>::value))
                 .on(exec);
         ref_cgs_factory =
             Solver::build()
                 .with_criteria(
-                    gko::stop::Iteration::build().with_max_iters(246u).on(ref),
+                    gko::stop::Iteration::build().with_max_iters(246u),
                     gko::stop::ResidualNorm<value_type>::build()
-                        .with_reduction_factor(::r<value_type>::value)
-                        .on(ref))
+                        .with_reduction_factor(::r<value_type>::value))
                 .on(ref);
     }
 
diff --git a/test/solver/direct.cpp b/test/solver/direct.cpp
index 0a30f7ba67f..31b7bd976ce 100644
--- a/test/solver/direct.cpp
+++ b/test/solver/direct.cpp
@@ -93,22 +93,22 @@ class Direct : public CommonTestFixture {
         mtx = gko::read<matrix_type>(s_mtx, ref);
         dmtx = gko::clone(exec, mtx);
         const auto num_rows = mtx->get_size()[0];
-        factory = solver_type::build()
-                      .with_factorization(factorization_type::build()
-                                              .with_symmetric_sparsity(true)
-                                              .on(ref))
-                      .with_num_rhs(static_cast<gko::size_type>(nrhs))
-                      .on(ref);
+        factory =
+            solver_type::build()
+                .with_factorization(
+                    factorization_type::build().with_symmetric_sparsity(true))
+                .with_num_rhs(static_cast<gko::size_type>(nrhs))
+                .on(ref);
         alpha = gen_mtx(1, 1);
         beta = gen_mtx(1, 1);
         input = gen_mtx(num_rows, nrhs);
         output = gen_mtx(num_rows, nrhs);
-        dfactory = solver_type::build()
-                       .with_factorization(factorization_type::build()
-                                               .with_symmetric_sparsity(true)
-                                               .on(exec))
-                       .with_num_rhs(static_cast<gko::size_type>(nrhs))
-                       .on(exec);
+        dfactory =
+            solver_type::build()
+                .with_factorization(
+                    factorization_type::build().with_symmetric_sparsity(true))
+                .with_num_rhs(static_cast<gko::size_type>(nrhs))
+                .on(exec);
         dalpha = gko::clone(exec, alpha);
         dbeta = gko::clone(exec, beta);
         dinput = gko::clone(exec, input);
diff --git a/test/solver/fcg_kernels.cpp b/test/solver/fcg_kernels.cpp
index 0d1ced86f85..d8a3a1ef9b2 100644
--- a/test/solver/fcg_kernels.cpp
+++ b/test/solver/fcg_kernels.cpp
@@ -212,19 +212,15 @@ TEST_F(Fcg, ApplyIsEquivalentToRef)
     auto d_b = gko::clone(exec, b);
     auto fcg_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(50u).on(ref),
-                gko::stop::ResidualNorm<value_type>::build()
-                    .with_reduction_factor(::r<value_type>::value)
-                    .on(ref))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(50u),
+                           gko::stop::ResidualNorm<value_type>::build()
+                               .with_reduction_factor(::r<value_type>::value))
             .on(ref);
     auto d_fcg_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(50u).on(exec),
-                gko::stop::ResidualNorm<value_type>::build()
-                    .with_reduction_factor(::r<value_type>::value)
-                    .on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(50u),
+                           gko::stop::ResidualNorm<value_type>::build()
+                               .with_reduction_factor(::r<value_type>::value))
             .on(exec);
     auto solver = fcg_factory->generate(std::move(mtx));
     auto d_solver = d_fcg_factory->generate(std::move(d_mtx));
diff --git a/test/solver/gcr_kernels.cpp b/test/solver/gcr_kernels.cpp
index 8f02c431f98..8db5570a6f0 100644
--- a/test/solver/gcr_kernels.cpp
+++ b/test/solver/gcr_kernels.cpp
@@ -74,19 +74,17 @@ class Gcr : public CommonTestFixture {
         exec_gcr_factory =
             Solver::build()
                 .with_criteria(
-                    gko::stop::Iteration::build().with_max_iters(246u).on(exec),
+                    gko::stop::Iteration::build().with_max_iters(246u),
                     gko::stop::ResidualNorm<value_type>::build()
-                        .with_reduction_factor(value_type{1e-15})
-                        .on(exec))
+                        .with_reduction_factor(value_type{1e-15}))
                 .on(exec);
 
         ref_gcr_factory =
             Solver::build()
                 .with_criteria(
-                    gko::stop::Iteration::build().with_max_iters(246u).on(ref),
+                    gko::stop::Iteration::build().with_max_iters(246u),
                     gko::stop::ResidualNorm<value_type>::build()
-                        .with_reduction_factor(value_type{1e-15})
-                        .on(ref))
+                        .with_reduction_factor(value_type{1e-15}))
                 .on(ref);
     }
 
diff --git a/test/solver/gmres_kernels.cpp b/test/solver/gmres_kernels.cpp
index 5c2541da1a7..7752ff4dda6 100644
--- a/test/solver/gmres_kernels.cpp
+++ b/test/solver/gmres_kernels.cpp
@@ -70,19 +70,17 @@ class Gmres : public CommonTestFixture {
         exec_gmres_factory =
             Solver::build()
                 .with_criteria(
-                    gko::stop::Iteration::build().with_max_iters(246u).on(exec),
+                    gko::stop::Iteration::build().with_max_iters(246u),
                     gko::stop::ResidualNorm<value_type>::build()
-                        .with_reduction_factor(value_type{1e-15})
-                        .on(exec))
+                        .with_reduction_factor(value_type{1e-15}))
                 .on(exec);
 
         ref_gmres_factory =
             Solver::build()
                 .with_criteria(
-                    gko::stop::Iteration::build().with_max_iters(246u).on(ref),
+                    gko::stop::Iteration::build().with_max_iters(246u),
                     gko::stop::ResidualNorm<value_type>::build()
-                        .with_reduction_factor(value_type{1e-15})
-                        .on(ref))
+                        .with_reduction_factor(value_type{1e-15}))
                 .on(ref);
     }
 
diff --git a/test/solver/idr_kernels.cpp b/test/solver/idr_kernels.cpp
index 959c857cb71..0019c05b9d4 100644
--- a/test/solver/idr_kernels.cpp
+++ b/test/solver/idr_kernels.cpp
@@ -76,15 +76,13 @@ class Idr : public CommonTestFixture {
         exec_idr_factory =
             Solver::build()
                 .with_deterministic(true)
-                .with_criteria(
-                    gko::stop::Iteration::build().with_max_iters(1u).on(exec))
+                .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
                 .on(exec);
 
         ref_idr_factory =
             Solver::build()
                 .with_deterministic(true)
-                .with_criteria(
-                    gko::stop::Iteration::build().with_max_iters(1u).on(ref))
+                .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
                 .on(ref);
     }
 
@@ -295,15 +293,13 @@ TEST_F(Idr, IdrIterationWithComplexSubspaceOneRHSIsEquivalentToRef)
         Solver::build()
             .with_deterministic(true)
             .with_complex_subspace(true)
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(1u).on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
             .on(exec);
     ref_idr_factory =
         Solver::build()
             .with_deterministic(true)
             .with_complex_subspace(true)
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(1u).on(ref))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
             .on(ref);
     auto ref_solver = ref_idr_factory->generate(mtx);
     auto exec_solver = exec_idr_factory->generate(d_mtx);
@@ -337,15 +333,13 @@ TEST_F(Idr, IdrIterationWithComplexSubspaceMultipleRHSIsEquivalentToRef)
         Solver::build()
             .with_deterministic(true)
             .with_complex_subspace(true)
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(1u).on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
             .on(exec);
     ref_idr_factory =
         Solver::build()
             .with_deterministic(true)
             .with_complex_subspace(true)
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(1u).on(ref))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
             .on(ref);
     auto exec_solver = exec_idr_factory->generate(d_mtx);
     auto ref_solver = ref_idr_factory->generate(mtx);
diff --git a/test/solver/ir_kernels.cpp b/test/solver/ir_kernels.cpp
index c21f6da3f66..81464036c69 100644
--- a/test/solver/ir_kernels.cpp
+++ b/test/solver/ir_kernels.cpp
@@ -105,13 +105,11 @@ TEST_F(Ir, ApplyIsEquivalentToRef)
     // both executors
     auto ir_factory =
         gko::solver::Ir<value_type>::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(2u).on(ref))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(2u))
             .on(ref);
     auto d_ir_factory =
         gko::solver::Ir<value_type>::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(2u).on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(2u))
             .on(exec);
     auto solver = ir_factory->generate(std::move(mtx));
     auto d_solver = d_ir_factory->generate(std::move(d_mtx));
@@ -134,25 +132,15 @@ TEST_F(Ir, ApplyWithIterativeInnerSolverIsEquivalentToRef)
 
     auto ir_factory =
         gko::solver::Ir<value_type>::build()
-            .with_solver(
-                gko::solver::Gmres<value_type>::build()
-                    .with_criteria(
-                        gko::stop::Iteration::build().with_max_iters(1u).on(
-                            ref))
-                    .on(ref))
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(2u).on(ref))
+            .with_solver(gko::solver::Gmres<value_type>::build().with_criteria(
+                gko::stop::Iteration::build().with_max_iters(1u).on(ref)))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(2u))
             .on(ref);
     auto d_ir_factory =
         gko::solver::Ir<value_type>::build()
-            .with_solver(
-                gko::solver::Gmres<value_type>::build()
-                    .with_criteria(
-                        gko::stop::Iteration::build().with_max_iters(1u).on(
-                            exec))
-                    .on(exec))
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(2u).on(exec))
+            .with_solver(gko::solver::Gmres<value_type>::build().with_criteria(
+                gko::stop::Iteration::build().with_max_iters(1u).on(exec)))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(2u))
             .on(exec);
     auto solver = ir_factory->generate(std::move(mtx));
     auto d_solver = d_ir_factory->generate(std::move(d_mtx));
@@ -180,14 +168,12 @@ TEST_F(Ir, RichardsonApplyIsEquivalentToRef)
     // both executors
     auto ir_factory =
         gko::solver::Ir<value_type>::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(2u).on(ref))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(2u))
             .with_relaxation_factor(value_type{0.9})
             .on(ref);
     auto d_ir_factory =
         gko::solver::Ir<value_type>::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(2u).on(exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(2u))
             .with_relaxation_factor(value_type{0.9})
             .on(exec);
     auto solver = ir_factory->generate(std::move(mtx));
@@ -210,26 +196,16 @@ TEST_F(Ir, RichardsonApplyWithIterativeInnerSolverIsEquivalentToRef)
     auto d_b = clone(exec, b);
     auto ir_factory =
         gko::solver::Ir<value_type>::build()
-            .with_solver(
-                gko::solver::Gmres<value_type>::build()
-                    .with_criteria(
-                        gko::stop::Iteration::build().with_max_iters(1u).on(
-                            ref))
-                    .on(ref))
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(2u).on(ref))
+            .with_solver(gko::solver::Gmres<value_type>::build().with_criteria(
+                gko::stop::Iteration::build().with_max_iters(1u).on(ref)))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(2u))
             .with_relaxation_factor(value_type{0.9})
             .on(ref);
     auto d_ir_factory =
         gko::solver::Ir<value_type>::build()
-            .with_solver(
-                gko::solver::Gmres<value_type>::build()
-                    .with_criteria(
-                        gko::stop::Iteration::build().with_max_iters(1u).on(
-                            exec))
-                    .on(exec))
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(2u).on(exec))
+            .with_solver(gko::solver::Gmres<value_type>::build().with_criteria(
+                gko::stop::Iteration::build().with_max_iters(1u).on(exec)))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(2u))
             .with_relaxation_factor(value_type{0.9})
             .on(exec);
     auto solver = ir_factory->generate(std::move(mtx));
@@ -258,14 +234,12 @@ TEST_F(Ir, ApplyWithGivenInitialGuessModeIsEquivalentToRef)
         auto d_x = clone(exec, x);
         auto ir_factory =
             gko::solver::Ir<value_type>::build()
-                .with_criteria(
-                    gko::stop::Iteration::build().with_max_iters(2u).on(ref))
+                .with_criteria(gko::stop::Iteration::build().with_max_iters(2u))
                 .with_default_initial_guess(guess)
                 .on(ref);
         auto d_ir_factory =
             gko::solver::Ir<value_type>::build()
-                .with_criteria(
-                    gko::stop::Iteration::build().with_max_iters(2u).on(exec))
+                .with_criteria(gko::stop::Iteration::build().with_max_iters(2u))
                 .with_default_initial_guess(guess)
                 .on(exec);
         auto solver = ir_factory->generate(mtx);
diff --git a/test/solver/solver.cpp b/test/solver/solver.cpp
index b6f228c13f5..6bb59507f17 100644
--- a/test/solver/solver.cpp
+++ b/test/solver/solver.cpp
@@ -101,9 +101,7 @@ struct SimpleSolverTest {
         gko::size_type iteration_count, bool check_residual = true)
     {
         return solver_type::build().with_criteria(
-            gko::stop::Iteration::build()
-                .with_max_iters(iteration_count)
-                .on(exec),
+            gko::stop::Iteration::build().with_max_iters(iteration_count),
             check_residual ? gko::stop::ResidualNorm<value_type>::build()
                                  .with_baseline(gko::stop::mode::absolute)
                                  .with_reduction_factor(1e-30)
@@ -116,8 +114,7 @@ struct SimpleSolverTest {
         gko::size_type iteration_count, bool check_residual = true)
     {
         return build(exec, iteration_count, check_residual)
-            .with_preconditioner(
-                precond_type::build().with_max_block_size(1u).on(exec));
+            .with_preconditioner(precond_type::build().with_max_block_size(1u));
     }
 
     static const gko::LinOp* get_preconditioner(
@@ -185,8 +182,7 @@ struct Idr : SimpleSolverTest<gko::solver::Idr<solver_value_type>> {
         gko::size_type iteration_count, bool check_residual = true)
     {
         return build(exec, iteration_count, check_residual)
-            .with_preconditioner(
-                precond_type::build().with_max_block_size(1u).on(exec));
+            .with_preconditioner(precond_type::build().with_max_block_size(1u));
     }
 };
 
@@ -200,8 +196,7 @@ struct Ir : SimpleSolverTest<gko::solver::Ir<solver_value_type>> {
     {
         return SimpleSolverTest<gko::solver::Ir<solver_value_type>>::build(
                    exec, iteration_count, check_residual)
-            .with_solver(
-                precond_type::build().with_max_block_size(1u).on(exec));
+            .with_solver(precond_type::build().with_max_block_size(1u));
     }
 
     static const gko::LinOp* get_preconditioner(
@@ -232,8 +227,7 @@ struct CbGmres : SimpleSolverTest<gko::solver::CbGmres<solver_value_type>> {
         gko::size_type iteration_count, bool check_residual = true)
     {
         return build(exec, iteration_count, check_residual)
-            .with_preconditioner(
-                precond_type::build().with_max_block_size(1u).on(exec));
+            .with_preconditioner(precond_type::build().with_max_block_size(1u));
     }
 };
 
@@ -254,8 +248,7 @@ struct Gmres : SimpleSolverTest<gko::solver::Gmres<solver_value_type>> {
         gko::size_type iteration_count, bool check_residual = true)
     {
         return build(exec, iteration_count, check_residual)
-            .with_preconditioner(
-                precond_type::build().with_max_block_size(1u).on(exec));
+            .with_preconditioner(precond_type::build().with_max_block_size(1u));
     }
 };
 
@@ -277,8 +270,7 @@ struct FGmres : SimpleSolverTest<gko::solver::Gmres<solver_value_type>> {
         gko::size_type iteration_count, bool check_residual = true)
     {
         return build(exec, iteration_count, check_residual)
-            .with_preconditioner(
-                precond_type::build().with_max_block_size(1u).on(exec))
+            .with_preconditioner(precond_type::build().with_max_block_size(1u))
             .with_flexible(true);
     }
 };
@@ -300,8 +292,7 @@ struct Gcr : SimpleSolverTest<gko::solver::Gcr<solver_value_type>> {
         gko::size_type iteration_count, bool check_residual = true)
     {
         return build(exec, iteration_count, check_residual)
-            .with_preconditioner(
-                precond_type::build().with_max_block_size(1u).on(exec));
+            .with_preconditioner(precond_type::build().with_max_block_size(1u));
     }
 };
 
diff --git a/test/test_install/test_install.cpp b/test/test_install/test_install.cpp
index d2c273b4e0f..d442647a985 100644
--- a/test/test_install/test_install.cpp
+++ b/test/test_install/test_install.cpp
@@ -104,11 +104,9 @@ void check_solver(std::shared_ptr<gko::Executor> exec,
     auto solver_gen =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(num_iters).on(
-                    exec),
-                gko::stop::ResidualNorm<>::build()
-                    .with_reduction_factor(reduction_factor)
-                    .on(exec))
+                gko::stop::Iteration::build().with_max_iters(num_iters),
+                gko::stop::ResidualNorm<>::build().with_reduction_factor(
+                    reduction_factor))
             .on(exec);
 #if HAS_REFERENCE
     A->read(A_raw);
@@ -126,11 +124,9 @@ void check_solver(std::shared_ptr<gko::Executor> exec,
     auto solver_gen_ref =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(num_iters).on(
-                    exec_ref),
-                gko::stop::ResidualNorm<>::build()
-                    .with_reduction_factor(reduction_factor)
-                    .on(exec_ref))
+                gko::stop::Iteration::build().with_max_iters(num_iters),
+                gko::stop::ResidualNorm<>::build().with_reduction_factor(
+                    reduction_factor))
             .on(exec_ref);
     auto x_ref = gko::clone(exec_ref, x);
     solver_gen->generate(A_ref)->apply(b, x_ref);
@@ -493,8 +489,7 @@ int main()
         using Solver = gko::solver::Ir<>;
         auto test =
             Solver::build()
-                .with_criteria(
-                    gko::stop::Iteration::build().with_max_iters(1u).on(exec))
+                .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
                 .on(exec);
     }
 

From fe24ad4f332b6188f337f87f02fefc96bbb4f42e Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Mon, 14 Aug 2023 17:17:31 +0200
Subject: [PATCH 311/583] review updates

- move parameter macros to abstract_factory.hpp
- use macros for defining deferred parameters

Co-authored-by: Yuhsiang M. Tsai <yhmtsai@gmail.com>
---
 include/ginkgo/core/base/abstract_factory.hpp | 203 ++++++++++++++++++
 include/ginkgo/core/base/lin_op.hpp           | 124 -----------
 .../distributed/preconditioner/schwarz.hpp    |  14 +-
 include/ginkgo/core/solver/direct.hpp         |  38 +---
 include/ginkgo/core/solver/ir.hpp             |  32 +--
 include/ginkgo/core/solver/multigrid.hpp      |  89 ++------
 6 files changed, 227 insertions(+), 273 deletions(-)

diff --git a/include/ginkgo/core/base/abstract_factory.hpp b/include/ginkgo/core/base/abstract_factory.hpp
index e8ec803b480..e644bcdcd76 100644
--- a/include/ginkgo/core/base/abstract_factory.hpp
+++ b/include/ginkgo/core/base/abstract_factory.hpp
@@ -274,13 +274,26 @@ class enable_parameters_type {
 };
 
 
+/**
+ * Represents a factory parameter of factory type that can either initialized by
+ * a pre-existing factory or by passing in a factory_parameters object whose
+ * `.on(exec)` will be called to instantiate a factory.
+ *
+ * @tparam FactoryType  the type of factory that can be instantiated from this
+ * object.
+ */
 template <typename FactoryType>
 class deferred_factory_parameter {
 public:
     deferred_factory_parameter() = default;
 
+    /** Creates an empty deferred factory parameter. */
     deferred_factory_parameter(std::nullptr_t) {}
 
+    /**
+     * Creates a deferred factory parameter from a preexisting factory with
+     * shared ownership.
+     */
     template <typename ConcreteFactoryType,
               std::enable_if_t<std::is_base_of<
                   FactoryType,
@@ -292,6 +305,10 @@ class deferred_factory_parameter {
                 std::shared_ptr<const Executor>) { return factory; };
     }
 
+    /**
+     * Creates a deferred factory parameter by taking ownership of a
+     * preexisting factory with unique ownership.
+     */
     template <typename ConcreteFactoryType, typename Deleter,
               std::enable_if_t<std::is_base_of<
                   FactoryType,
@@ -304,6 +321,11 @@ class deferred_factory_parameter {
                 std::shared_ptr<const Executor>) { return factory; };
     }
 
+    /**
+     * Creates a deferred factory parameter object from a
+     * factory_parameters-like object. To instantiate the actual factory, the
+     * parameter's `.on(exec)` function will be called.
+     */
     template <typename ParametersType,
               typename = decltype(std::declval<ParametersType>().on(
                   std::shared_ptr<const Executor>{}))>
@@ -315,6 +337,7 @@ class deferred_factory_parameter {
         };
     }
 
+    /** Instantiates the deferred parameter into an actual factory. */
     std::shared_ptr<const FactoryType> on(
         std::shared_ptr<const Executor> exec) const
     {
@@ -324,6 +347,7 @@ class deferred_factory_parameter {
         return generator_(exec);
     }
 
+    /** Returns true iff the parameter contains a factory. */
     explicit operator bool() const { return bool(generator_); }
 
 private:
@@ -333,6 +357,185 @@ class deferred_factory_parameter {
 };
 
 
+/**
+ * Defines a build method for the factory, simplifying its construction by
+ * removing the repetitive typing of factory's name.
+ *
+ * @param _factory_name  the factory for which to define the method
+ *
+ * @ingroup LinOp
+ */
+#define GKO_ENABLE_BUILD_METHOD(_factory_name)                               \
+    static auto build()->decltype(_factory_name::create())                   \
+    {                                                                        \
+        return _factory_name::create();                                      \
+    }                                                                        \
+    static_assert(true,                                                      \
+                  "This assert is used to counter the false positive extra " \
+                  "semi-colon warnings")
+
+
+#if !(defined(__CUDACC__) || defined(__HIPCC__))
+/**
+ * Creates a factory parameter in the factory parameters structure.
+ *
+ * @param _name  name of the parameter
+ * @param __VA_ARGS__  default value of the parameter
+ *
+ * @see GKO_ENABLE_LIN_OP_FACTORY for more details, and usage example
+ *
+ * @deprecated Use GKO_FACTORY_PARAMETER_SCALAR or GKO_FACTORY_PARAMETER_VECTOR
+ *
+ * @ingroup LinOp
+ */
+#define GKO_FACTORY_PARAMETER(_name, ...)                                    \
+    mutable _name{__VA_ARGS__};                                              \
+                                                                             \
+    template <typename... Args>                                              \
+    auto with_##_name(Args&&... _value)->std::decay_t<decltype(*this)>&      \
+    {                                                                        \
+        using type = decltype(this->_name);                                  \
+        this->_name = type{std::forward<Args>(_value)...};                   \
+        return *this;                                                        \
+    }                                                                        \
+    static_assert(true,                                                      \
+                  "This assert is used to counter the false positive extra " \
+                  "semi-colon warnings")
+
+/**
+ * Creates a scalar factory parameter in the factory parameters structure.
+ *
+ * Scalar in this context means that the constructor for this type only takes
+ * a single parameter.
+ *
+ * @param _name  name of the parameter
+ * @param _default  default value of the parameter
+ *
+ * @see GKO_ENABLE_LIN_OP_FACTORY for more details, and usage example
+ *
+ * @ingroup LinOp
+ */
+#define GKO_FACTORY_PARAMETER_SCALAR(_name, _default) \
+    GKO_FACTORY_PARAMETER(_name, _default)
+
+/**
+ * Creates a vector factory parameter in the factory parameters structure.
+ *
+ * Vector in this context means that the constructor for this type takes
+ * multiple parameters.
+ *
+ * @param _name  name of the parameter
+ * @param _default  default value of the parameter
+ *
+ * @see GKO_ENABLE_LIN_OP_FACTORY for more details, and usage example
+ *
+ * @ingroup LinOp
+ */
+#define GKO_FACTORY_PARAMETER_VECTOR(_name, ...) \
+    GKO_FACTORY_PARAMETER(_name, __VA_ARGS__)
+#else  // defined(__CUDACC__) || defined(__HIPCC__)
+// A workaround for the NVCC compiler - parameter pack expansion does not work
+// properly, because while the assignment to a scalar value is translated by
+// cudafe into a C-style cast, the parameter pack expansion is not removed and
+// `Args&&... args` is still kept as a parameter pack.
+#define GKO_FACTORY_PARAMETER(_name, ...)                                    \
+    mutable _name{__VA_ARGS__};                                              \
+                                                                             \
+    template <typename... Args>                                              \
+    auto with_##_name(Args&&... _value)->std::decay_t<decltype(*this)>&      \
+    {                                                                        \
+        GKO_NOT_IMPLEMENTED;                                                 \
+        return *this;                                                        \
+    }                                                                        \
+    static_assert(true,                                                      \
+                  "This assert is used to counter the false positive extra " \
+                  "semi-colon warnings")
+
+#define GKO_FACTORY_PARAMETER_SCALAR(_name, _default)                        \
+    mutable _name{_default};                                                 \
+                                                                             \
+    template <typename Arg>                                                  \
+    auto with_##_name(Arg&& _value)->std::decay_t<decltype(*this)>&          \
+    {                                                                        \
+        using type = decltype(this->_name);                                  \
+        this->_name = type{std::forward<Arg>(_value)};                       \
+        return *this;                                                        \
+    }                                                                        \
+    static_assert(true,                                                      \
+                  "This assert is used to counter the false positive extra " \
+                  "semi-colon warnings")
+
+#define GKO_FACTORY_PARAMETER_VECTOR(_name, ...)                             \
+    mutable _name{__VA_ARGS__};                                              \
+                                                                             \
+    template <typename... Args>                                              \
+    auto with_##_name(Args&&... _value)->std::decay_t<decltype(*this)>&      \
+    {                                                                        \
+        using type = decltype(this->_name);                                  \
+        this->_name = type{std::forward<Args>(_value)...};                   \
+        return *this;                                                        \
+    }                                                                        \
+    static_assert(true,                                                      \
+                  "This assert is used to counter the false positive extra " \
+                  "semi-colon warnings")
+#endif  // defined(__CUDACC__) || defined(__HIPCC__)
+
+/**
+ * Creates a factory parameter of factory type. The parameter can either be set
+ * directly, or its creation can be deferred until the executor is set in the
+ * `.on(exec)` function call, by using a deferred_factory_parameter.
+ *
+ * @param _name  name of the parameter
+ * @param _type  pointee type of the parameter, e.g. LinOpFactory
+ *
+ */
+#define GKO_DEFERRED_FACTORY_PARAMETER(_name, _type)                         \
+public:                                                                      \
+    std::shared_ptr<const _type> _name{};                                    \
+    parameters_type& with_##_name(deferred_factory_parameter<_type> factory) \
+    {                                                                        \
+        this->_name##_generator_ = std::move(factory);                       \
+        return *this;                                                        \
+    }                                                                        \
+                                                                             \
+private:                                                                     \
+    deferred_factory_parameter<_type> _name##_generator_;                    \
+                                                                             \
+public:                                                                      \
+    static_assert(true,                                                      \
+                  "This assert is used to counter the false positive extra " \
+                  "semi-colon warnings")
+
+/**
+ * Creates a factory parameter representing a vector of factories type. The
+ * parameter can either be set directly, or its creation can be deferred until
+ * the executor is set in the
+ * `.on(exec)` function call, by using a vector of deferred_factory_parameters.
+ *
+ * @param _name  name of the parameter
+ * @param _type  pointee type of the vector entries, e.g. LinOpFactory
+ *
+ */
+#define GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(_name, _type)                  \
+public:                                                                      \
+    std::vector<std::shared_ptr<const _type>> _name{};                       \
+    template <typename... Args>                                              \
+    parameters_type& with_##_name(Args&&... factories)                       \
+    {                                                                        \
+        this->_name##_generator_ = {deferred_factory_parameter<_type>{       \
+            std::forward<Args>(factories)}...};                              \
+        return *this;                                                        \
+    }                                                                        \
+                                                                             \
+private:                                                                     \
+    std::vector<deferred_factory_parameter<_type>> _name##_generator_;       \
+                                                                             \
+public:                                                                      \
+    static_assert(true,                                                      \
+                  "This assert is used to counter the false positive extra " \
+                  "semi-colon warnings")
+
+
 }  // namespace gko
 
 
diff --git a/include/ginkgo/core/base/lin_op.hpp b/include/ginkgo/core/base/lin_op.hpp
index 20d7771822f..e2660baff2e 100644
--- a/include/ginkgo/core/base/lin_op.hpp
+++ b/include/ginkgo/core/base/lin_op.hpp
@@ -1084,130 +1084,6 @@ public:                                                                      \
                   "semi-colon warnings")
 
 
-/**
- * Defines a build method for the factory, simplifying its construction by
- * removing the repetitive typing of factory's name.
- *
- * @param _factory_name  the factory for which to define the method
- *
- * @ingroup LinOp
- */
-#define GKO_ENABLE_BUILD_METHOD(_factory_name)                               \
-    static auto build()->decltype(_factory_name::create())                   \
-    {                                                                        \
-        return _factory_name::create();                                      \
-    }                                                                        \
-    static_assert(true,                                                      \
-                  "This assert is used to counter the false positive extra " \
-                  "semi-colon warnings")
-
-
-#if !(defined(__CUDACC__) || defined(__HIPCC__))
-/**
- * Creates a factory parameter in the factory parameters structure.
- *
- * @param _name  name of the parameter
- * @param __VA_ARGS__  default value of the parameter
- *
- * @see GKO_ENABLE_LIN_OP_FACTORY for more details, and usage example
- *
- * @deprecated Use GKO_FACTORY_PARAMETER_SCALAR or GKO_FACTORY_PARAMETER_VECTOR
- *
- * @ingroup LinOp
- */
-#define GKO_FACTORY_PARAMETER(_name, ...)                                    \
-    mutable _name{__VA_ARGS__};                                              \
-                                                                             \
-    template <typename... Args>                                              \
-    auto with_##_name(Args&&... _value)->std::decay_t<decltype(*this)>&      \
-    {                                                                        \
-        using type = decltype(this->_name);                                  \
-        this->_name = type{std::forward<Args>(_value)...};                   \
-        return *this;                                                        \
-    }                                                                        \
-    static_assert(true,                                                      \
-                  "This assert is used to counter the false positive extra " \
-                  "semi-colon warnings")
-
-/**
- * Creates a scalar factory parameter in the factory parameters structure.
- *
- * Scalar in this context means that the constructor for this type only takes
- * a single parameter.
- *
- * @param _name  name of the parameter
- * @param _default  default value of the parameter
- *
- * @see GKO_ENABLE_LIN_OP_FACTORY for more details, and usage example
- *
- * @ingroup LinOp
- */
-#define GKO_FACTORY_PARAMETER_SCALAR(_name, _default) \
-    GKO_FACTORY_PARAMETER(_name, _default)
-
-/**
- * Creates a vector factory parameter in the factory parameters structure.
- *
- * Vector in this context means that the constructor for this type takes
- * multiple parameters.
- *
- * @param _name  name of the parameter
- * @param _default  default value of the parameter
- *
- * @see GKO_ENABLE_LIN_OP_FACTORY for more details, and usage example
- *
- * @ingroup LinOp
- */
-#define GKO_FACTORY_PARAMETER_VECTOR(_name, ...) \
-    GKO_FACTORY_PARAMETER(_name, __VA_ARGS__)
-#else  // defined(__CUDACC__) || defined(__HIPCC__)
-// A workaround for the NVCC compiler - parameter pack expansion does not work
-// properly, because while the assignment to a scalar value is translated by
-// cudafe into a C-style cast, the parameter pack expansion is not removed and
-// `Args&&... args` is still kept as a parameter pack.
-#define GKO_FACTORY_PARAMETER(_name, ...)                                    \
-    mutable _name{__VA_ARGS__};                                              \
-                                                                             \
-    template <typename... Args>                                              \
-    auto with_##_name(Args&&... _value)->std::decay_t<decltype(*this)>&      \
-    {                                                                        \
-        GKO_NOT_IMPLEMENTED;                                                 \
-        return *this;                                                        \
-    }                                                                        \
-    static_assert(true,                                                      \
-                  "This assert is used to counter the false positive extra " \
-                  "semi-colon warnings")
-
-#define GKO_FACTORY_PARAMETER_SCALAR(_name, _default)                        \
-    mutable _name{_default};                                                 \
-                                                                             \
-    template <typename Arg>                                                  \
-    auto with_##_name(Arg&& _value)->std::decay_t<decltype(*this)>&          \
-    {                                                                        \
-        using type = decltype(this->_name);                                  \
-        this->_name = type{std::forward<Arg>(_value)};                       \
-        return *this;                                                        \
-    }                                                                        \
-    static_assert(true,                                                      \
-                  "This assert is used to counter the false positive extra " \
-                  "semi-colon warnings")
-
-#define GKO_FACTORY_PARAMETER_VECTOR(_name, ...)                             \
-    mutable _name{__VA_ARGS__};                                              \
-                                                                             \
-    template <typename... Args>                                              \
-    auto with_##_name(Args&&... _value)->std::decay_t<decltype(*this)>&      \
-    {                                                                        \
-        using type = decltype(this->_name);                                  \
-        this->_name = type{std::forward<Args>(_value)...};                   \
-        return *this;                                                        \
-    }                                                                        \
-    static_assert(true,                                                      \
-                  "This assert is used to counter the false positive extra " \
-                  "semi-colon warnings")
-#endif  // defined(__CUDACC__) || defined(__HIPCC__)
-
-
 }  // namespace gko
 
 
diff --git a/include/ginkgo/core/distributed/preconditioner/schwarz.hpp b/include/ginkgo/core/distributed/preconditioner/schwarz.hpp
index 3347828a55d..fe0539570ee 100644
--- a/include/ginkgo/core/distributed/preconditioner/schwarz.hpp
+++ b/include/ginkgo/core/distributed/preconditioner/schwarz.hpp
@@ -94,25 +94,15 @@ class Schwarz
         /**
          * Local solver factory.
          */
-        std::shared_ptr<const LinOpFactory> local_solver{};
-
-        parameters_type& with_local_solver(
-            deferred_factory_parameter<LinOpFactory> solver)
-        {
-            this->local_solver_generator = std::move(solver);
-            return *this;
-        }
+        GKO_DEFERRED_FACTORY_PARAMETER(local_solver, LinOpFactory);
 
         std::unique_ptr<Factory> on(std::shared_ptr<const Executor> exec) const
         {
             auto copy = *this;
-            copy.local_solver = local_solver_generator.on(exec);
+            copy.local_solver = local_solver_generator_.on(exec);
             return copy.enable_parameters_type<parameters_type, Factory>::on(
                 exec);
         }
-
-    private:
-        deferred_factory_parameter<LinOpFactory> local_solver_generator;
     };
     GKO_ENABLE_LIN_OP_FACTORY(Schwarz, parameters, Factory);
     GKO_ENABLE_BUILD_METHOD(Factory);
diff --git a/include/ginkgo/core/solver/direct.hpp b/include/ginkgo/core/solver/direct.hpp
index f66546cd2ec..dcd6fd189a6 100644
--- a/include/ginkgo/core/solver/direct.hpp
+++ b/include/ginkgo/core/solver/direct.hpp
@@ -87,36 +87,7 @@ class Direct : public EnableLinOp<Direct<ValueType, IndexType>>,
         gko::size_type GKO_FACTORY_PARAMETER_SCALAR(num_rhs, 1u);
 
         /** The factorization factory to use for generating the factors. */
-        std::shared_ptr<const LinOpFactory> factorization;
-
-        /**
-         *
-         */
-        parameters_type& with_factorization(
-            std::shared_ptr<const LinOpFactory> factorization)
-        {
-            this->factorization_generator =
-                [factorization](std::shared_ptr<const Executor>)
-                -> std::shared_ptr<const LinOpFactory> {
-                return factorization;
-            };
-            return *this;
-        }
-
-        template <
-            typename FactorizationParameters,
-            typename = decltype(std::declval<FactorizationParameters>().on(
-                std::shared_ptr<const Executor>{}))>
-        parameters_type& with_factorization(
-            FactorizationParameters factorization_parameters)
-        {
-            this->factorization_generator =
-                [factorization_parameters](std::shared_ptr<const Executor> exec)
-                -> std::shared_ptr<const LinOpFactory> {
-                return factorization_parameters.on(exec);
-            };
-            return *this;
-        }
+        GKO_DEFERRED_FACTORY_PARAMETER(factorization, LinOpFactory);
 
         /**
          *
@@ -124,15 +95,10 @@ class Direct : public EnableLinOp<Direct<ValueType, IndexType>>,
         std::unique_ptr<Factory> on(std::shared_ptr<const Executor> exec) const
         {
             auto parameters_copy = *this;
-            parameters_copy.factorization = factorization_generator(exec);
+            parameters_copy.factorization = factorization_generator_.on(exec);
             return parameters_copy
                 .enable_parameters_type<parameters_type, Factory>::on(exec);
         }
-
-    private:
-        std::function<std::shared_ptr<const LinOpFactory>(
-            std::shared_ptr<const Executor>)>
-            factorization_generator;
     };
     GKO_ENABLE_LIN_OP_FACTORY(Direct, parameters, Factory);
     GKO_ENABLE_BUILD_METHOD(Factory);
diff --git a/include/ginkgo/core/solver/ir.hpp b/include/ginkgo/core/solver/ir.hpp
index d30fd9d69bc..1f04c8b75d2 100644
--- a/include/ginkgo/core/solver/ir.hpp
+++ b/include/ginkgo/core/solver/ir.hpp
@@ -184,13 +184,14 @@ class Ir : public EnableLinOp<Ir<ValueType>>,
         /**
          * Inner solver factory.
          */
-        std::shared_ptr<const LinOpFactory> solver{};
+        GKO_DEFERRED_FACTORY_PARAMETER(solver, LinOpFactory);
 
         /**
          * Already generated solver. If one is provided, the factory `solver`
          * will be ignored.
          */
-        std::shared_ptr<const LinOp> generated_solver{};
+        std::shared_ptr<const LinOp> GKO_FACTORY_PARAMETER_SCALAR(
+            generated_solver, nullptr);
 
         /**
          * Relaxation factor for Richardson iteration
@@ -205,41 +206,18 @@ class Ir : public EnableLinOp<Ir<ValueType>>,
         initial_guess_mode GKO_FACTORY_PARAMETER_SCALAR(
             default_initial_guess, initial_guess_mode::provided);
 
-        /**
-         *
-         */
-        parameters_type& with_solver(
-            deferred_factory_parameter<LinOpFactory> solver)
-        {
-            this->solver_generator = std::move(solver);
-            return *this;
-        }
-
-        /**
-         *
-         */
-        parameters_type& with_generated_solver(
-            std::shared_ptr<const LinOp> generated_solver)
-        {
-            this->generated_solver = std::move(generated_solver);
-            return *this;
-        }
-
         /**
          *
          */
         std::unique_ptr<Factory> on(std::shared_ptr<const Executor> exec) const
         {
             auto parameters_copy = *this;
-            if (solver_generator) {
-                parameters_copy.solver = solver_generator.on(exec);
+            if (solver_generator_) {
+                parameters_copy.solver = solver_generator_.on(exec);
             }
             return parameters_copy.enable_iterative_solver_factory_parameters<
                 parameters_type, Factory>::on(exec);
         }
-
-    private:
-        deferred_factory_parameter<LinOpFactory> solver_generator;
     };
     GKO_ENABLE_LIN_OP_FACTORY(Ir, parameters, Factory);
     GKO_ENABLE_BUILD_METHOD(Factory);
diff --git a/include/ginkgo/core/solver/multigrid.hpp b/include/ginkgo/core/solver/multigrid.hpp
index 5aab788f71f..0a0a6fdd191 100644
--- a/include/ginkgo/core/solver/multigrid.hpp
+++ b/include/ginkgo/core/solver/multigrid.hpp
@@ -225,16 +225,7 @@ class Multigrid : public EnableLinOp<Multigrid>,
         /**
          * MultigridLevel Factory list
          */
-        std::vector<std::shared_ptr<const LinOpFactory>> mg_level{nullptr};
-
-        template <typename... Args>
-        parameters_type& with_mg_level(Args&&... level)
-        {
-            this->mg_level_generator = {
-                deferred_factory_parameter<LinOpFactory>{
-                    std::forward<Args>(level)}...};
-            return *this;
-        }
+        GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(mg_level, LinOpFactory);
 
         /**
          * Custom selector size_type (size_type level, const LinOp* fine_matrix)
@@ -265,7 +256,6 @@ class Multigrid : public EnableLinOp<Multigrid>,
         std::function<size_type(const size_type, const LinOp*)>
             GKO_FACTORY_PARAMETER_SCALAR(level_selector, nullptr);
 
-        using smoother_list = std::vector<std::shared_ptr<const LinOpFactory>>;
         /**
          * Pre-smooth Factory list.
          * Its size must be 0, 1 or be the same as mg_level's.
@@ -280,14 +270,14 @@ class Multigrid : public EnableLinOp<Multigrid>,
          * If any element in the vector is a `nullptr` then the smoother
          * application at the corresponding level is skipped.
          */
-        smoother_list pre_smoother{};
+        GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(pre_smoother, LinOpFactory);
 
         /**
          * Post-smooth Factory list.
          * It is similar to Pre-smooth Factory list. It is ignored if
          * the factory parameter post_uses_pre is set to true.
          */
-        smoother_list post_smoother{};
+        GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(post_smoother, LinOpFactory);
 
         /**
          * Mid-smooth Factory list. If it contains available elements, multigrid
@@ -296,34 +286,7 @@ class Multigrid : public EnableLinOp<Multigrid>,
          * Pre-smooth Factory list. It is ignored if the factory parameter
          * mid_case is not mid.
          */
-        smoother_list mid_smoother{};
-
-        template <typename... Args>
-        parameters_type& with_pre_smoother(Args&&... smoother)
-        {
-            this->pre_smoother_generator = {
-                deferred_factory_parameter<LinOpFactory>{
-                    std::forward<Args>(smoother)}...};
-            return *this;
-        }
-
-        template <typename... Args>
-        parameters_type& with_post_smoother(Args&&... smoother)
-        {
-            this->post_smoother_generator = {
-                deferred_factory_parameter<LinOpFactory>{
-                    std::forward<Args>(smoother)}...};
-            return *this;
-        }
-
-        template <typename... Args>
-        parameters_type& with_mid_smoother(Args&&... smoother)
-        {
-            this->mid_smoother_generator = {
-                deferred_factory_parameter<LinOpFactory>{
-                    std::forward<Args>(smoother)}...};
-            return *this;
-        }
+        GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(mid_smoother, LinOpFactory);
 
         /**
          * Whether post-smoothing-related calls use corresponding
@@ -363,17 +326,7 @@ class Multigrid : public EnableLinOp<Multigrid>,
          * If not set, then a direct LU solver will be used as solver on the
          * coarsest level.
          */
-        std::vector<std::shared_ptr<const LinOpFactory>> coarsest_solver{
-            nullptr};
-
-        template <typename... Args>
-        parameters_type& with_coarsest_solver(Args&&... solver)
-        {
-            this->coarsest_solver_generator = {
-                deferred_factory_parameter<LinOpFactory>{
-                    std::forward<Args>(solver)}...};
-            return *this;
-        }
+        GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(coarsest_solver, LinOpFactory);
 
         /**
          * Custom coarsest_solver selector
@@ -449,36 +402,36 @@ class Multigrid : public EnableLinOp<Multigrid>,
         std::unique_ptr<Factory> on(std::shared_ptr<const Executor> exec) const
         {
             auto copy = *this;
-            if (!copy.mg_level_generator.empty()) {
+            if (!copy.mg_level_generator_.empty()) {
                 copy.mg_level.clear();
-                for (auto& generator : copy.mg_level_generator) {
+                for (auto& generator : copy.mg_level_generator_) {
                     copy.mg_level.push_back(generator.on(exec));
                 }
             }
-            if (!copy.pre_smoother_generator.empty()) {
+            if (!copy.pre_smoother_generator_.empty()) {
                 copy.pre_smoother.clear();
-                for (auto& generator : copy.pre_smoother_generator) {
+                for (auto& generator : copy.pre_smoother_generator_) {
                     copy.pre_smoother.push_back(generator ? generator.on(exec)
                                                           : nullptr);
                 }
             }
-            if (!copy.mid_smoother_generator.empty()) {
+            if (!copy.mid_smoother_generator_.empty()) {
                 copy.mid_smoother.clear();
-                for (auto& generator : copy.mid_smoother_generator) {
+                for (auto& generator : copy.mid_smoother_generator_) {
                     copy.mid_smoother.push_back(generator ? generator.on(exec)
                                                           : nullptr);
                 }
             }
-            if (!copy.post_smoother_generator.empty()) {
+            if (!copy.post_smoother_generator_.empty()) {
                 copy.post_smoother.clear();
-                for (auto& generator : copy.post_smoother_generator) {
+                for (auto& generator : copy.post_smoother_generator_) {
                     copy.post_smoother.push_back(generator ? generator.on(exec)
                                                            : nullptr);
                 }
             }
-            if (!copy.coarsest_solver_generator.empty()) {
+            if (!copy.coarsest_solver_generator_.empty()) {
                 copy.coarsest_solver.clear();
-                for (auto& generator : copy.coarsest_solver_generator) {
+                for (auto& generator : copy.coarsest_solver_generator_) {
                     copy.coarsest_solver.push_back(
                         generator ? generator.on(exec) : nullptr);
                 }
@@ -486,18 +439,6 @@ class Multigrid : public EnableLinOp<Multigrid>,
             return copy.enable_iterative_solver_factory_parameters<
                 parameters_type, Factory>::on(exec);
         }
-
-    private:
-        std::vector<deferred_factory_parameter<LinOpFactory>>
-            mg_level_generator;
-        std::vector<deferred_factory_parameter<LinOpFactory>>
-            pre_smoother_generator;
-        std::vector<deferred_factory_parameter<LinOpFactory>>
-            mid_smoother_generator;
-        std::vector<deferred_factory_parameter<LinOpFactory>>
-            post_smoother_generator;
-        std::vector<deferred_factory_parameter<LinOpFactory>>
-            coarsest_solver_generator;
     };
     GKO_ENABLE_LIN_OP_FACTORY(Multigrid, parameters, Factory);
     GKO_ENABLE_BUILD_METHOD(Factory);

From d1be65246f9612661c78e30ed2940b3fc48b1402 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 21 Sep 2023 12:36:58 +0200
Subject: [PATCH 312/583] improve abstract_factory constructors

- make them explicit
- pass through nullptr explicitly
---
 include/ginkgo/core/base/abstract_factory.hpp | 16 ++++++++++------
 include/ginkgo/core/solver/multigrid.hpp      | 12 ++++--------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/include/ginkgo/core/base/abstract_factory.hpp b/include/ginkgo/core/base/abstract_factory.hpp
index e644bcdcd76..ca8ab7ed2ce 100644
--- a/include/ginkgo/core/base/abstract_factory.hpp
+++ b/include/ginkgo/core/base/abstract_factory.hpp
@@ -288,7 +288,10 @@ class deferred_factory_parameter {
     deferred_factory_parameter() = default;
 
     /** Creates an empty deferred factory parameter. */
-    deferred_factory_parameter(std::nullptr_t) {}
+    explicit deferred_factory_parameter(std::nullptr_t)
+    {
+        generator_ = [](std::shared_ptr<const Executor>) { return nullptr; };
+    }
 
     /**
      * Creates a deferred factory parameter from a preexisting factory with
@@ -298,7 +301,8 @@ class deferred_factory_parameter {
               std::enable_if_t<std::is_base_of<
                   FactoryType,
                   std::remove_const_t<ConcreteFactoryType>>::value>* = nullptr>
-    deferred_factory_parameter(std::shared_ptr<ConcreteFactoryType> factory)
+    explicit deferred_factory_parameter(
+        std::shared_ptr<ConcreteFactoryType> factory)
     {
         generator_ =
             [factory = std::shared_ptr<const FactoryType>(std::move(factory))](
@@ -313,7 +317,7 @@ class deferred_factory_parameter {
               std::enable_if_t<std::is_base_of<
                   FactoryType,
                   std::remove_const_t<ConcreteFactoryType>>::value>* = nullptr>
-    deferred_factory_parameter(
+    explicit deferred_factory_parameter(
         std::unique_ptr<ConcreteFactoryType, Deleter> factory)
     {
         generator_ =
@@ -329,7 +333,7 @@ class deferred_factory_parameter {
     template <typename ParametersType,
               typename = decltype(std::declval<ParametersType>().on(
                   std::shared_ptr<const Executor>{}))>
-    deferred_factory_parameter(ParametersType parameters)
+    explicit deferred_factory_parameter(ParametersType parameters)
     {
         generator_ = [parameters](std::shared_ptr<const Executor> exec)
             -> std::shared_ptr<const FactoryType> {
@@ -341,14 +345,14 @@ class deferred_factory_parameter {
     std::shared_ptr<const FactoryType> on(
         std::shared_ptr<const Executor> exec) const
     {
-        if (!(*this)) {
+        if (this->is_empty()) {
             GKO_NOT_SUPPORTED(*this);
         }
         return generator_(exec);
     }
 
     /** Returns true iff the parameter contains a factory. */
-    explicit operator bool() const { return bool(generator_); }
+    bool is_empty() const { return bool(generator_); }
 
 private:
     std::function<std::shared_ptr<const FactoryType>(
diff --git a/include/ginkgo/core/solver/multigrid.hpp b/include/ginkgo/core/solver/multigrid.hpp
index 0a0a6fdd191..1256639acb4 100644
--- a/include/ginkgo/core/solver/multigrid.hpp
+++ b/include/ginkgo/core/solver/multigrid.hpp
@@ -411,29 +411,25 @@ class Multigrid : public EnableLinOp<Multigrid>,
             if (!copy.pre_smoother_generator_.empty()) {
                 copy.pre_smoother.clear();
                 for (auto& generator : copy.pre_smoother_generator_) {
-                    copy.pre_smoother.push_back(generator ? generator.on(exec)
-                                                          : nullptr);
+                    copy.pre_smoother.push_back(generator.on(exec));
                 }
             }
             if (!copy.mid_smoother_generator_.empty()) {
                 copy.mid_smoother.clear();
                 for (auto& generator : copy.mid_smoother_generator_) {
-                    copy.mid_smoother.push_back(generator ? generator.on(exec)
-                                                          : nullptr);
+                    copy.mid_smoother.push_back(generator.on(exec));
                 }
             }
             if (!copy.post_smoother_generator_.empty()) {
                 copy.post_smoother.clear();
                 for (auto& generator : copy.post_smoother_generator_) {
-                    copy.post_smoother.push_back(generator ? generator.on(exec)
-                                                           : nullptr);
+                    copy.post_smoother.push_back(generator.on(exec));
                 }
             }
             if (!copy.coarsest_solver_generator_.empty()) {
                 copy.coarsest_solver.clear();
                 for (auto& generator : copy.coarsest_solver_generator_) {
-                    copy.coarsest_solver.push_back(
-                        generator ? generator.on(exec) : nullptr);
+                    copy.coarsest_solver.push_back(generator.on(exec));
                 }
             }
             return copy.enable_iterative_solver_factory_parameters<

From 2ebc888fda6c19785d16787a00be69e8631c6fe4 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 21 Sep 2023 12:42:00 +0200
Subject: [PATCH 313/583] remove more instances of .on

---
 core/test/log/papi.cpp                        |  4 +--
 .../distributed/preconditioner/schwarz.cpp    |  6 ++--
 core/test/solver/bicg.cpp                     | 18 ++++-------
 core/test/solver/bicgstab.cpp                 | 16 +++++-----
 core/test/solver/cb_gmres.cpp                 | 16 +++++-----
 core/test/solver/cg.cpp                       | 18 +++++------
 core/test/solver/cgs.cpp                      | 18 +++++------
 core/test/solver/fcg.cpp                      | 18 +++++------
 core/test/solver/gcr.cpp                      | 22 +++++++-------
 core/test/solver/gmres.cpp                    | 22 +++++++-------
 core/test/solver/idr.cpp                      | 24 +++++++--------
 core/test/solver/ir.cpp                       | 30 +++++++++----------
 core/test/solver/multigrid.cpp                |  2 +-
 reference/test/preconditioner/ilu.cpp         |  6 ++--
 reference/test/solver/ir_kernels.cpp          |  5 ++--
 reference/test/solver/multigrid_kernels.cpp   |  8 ++---
 test/solver/ir_kernels.cpp                    |  8 ++---
 17 files changed, 115 insertions(+), 126 deletions(-)

diff --git a/core/test/log/papi.cpp b/core/test/log/papi.cpp
index 2ed266449f6..b4e51cdc31b 100644
--- a/core/test/log/papi.cpp
+++ b/core/test/log/papi.cpp
@@ -472,7 +472,7 @@ TYPED_TEST(Papi, CatchesLinOpFactoryGenerateStarted)
     auto factory =
         gko::solver::Bicgstab<TypeParam>::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec);
     auto str = this->init(gko::log::Logger::linop_factory_generate_started_mask,
                           "linop_factory_generate_started", factory.get());
@@ -493,7 +493,7 @@ TYPED_TEST(Papi, CatchesLinOpFactoryGenerateCompleted)
     auto factory =
         gko::solver::Bicgstab<TypeParam>::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec);
     TypeParam dummy;
     auto str =
diff --git a/core/test/mpi/distributed/preconditioner/schwarz.cpp b/core/test/mpi/distributed/preconditioner/schwarz.cpp
index e0b5749e987..5c354b11748 100644
--- a/core/test/mpi/distributed/preconditioner/schwarz.cpp
+++ b/core/test/mpi/distributed/preconditioner/schwarz.cpp
@@ -123,9 +123,8 @@ TYPED_TEST(SchwarzFactory, CanBeCopied)
     using Jacobi = typename TestFixture::Jacobi;
     using Schwarz = typename TestFixture::Schwarz;
     using Mtx = typename TestFixture::Mtx;
-    auto bj = gko::share(Jacobi::build().on(this->exec));
     auto copy = Schwarz::build()
-                    .with_local_solver(bj)
+                    .with_local_solver(Jacobi::build())
                     .on(this->exec)
                     ->generate(Mtx::create(this->exec, MPI_COMM_WORLD));
 
@@ -141,9 +140,8 @@ TYPED_TEST(SchwarzFactory, CanBeMoved)
     using Schwarz = typename TestFixture::Schwarz;
     using Mtx = typename TestFixture::Mtx;
     auto tmp = clone(this->schwarz);
-    auto bj = gko::share(Jacobi::build().on(this->exec));
     auto copy = Schwarz::build()
-                    .with_local_solver(bj)
+                    .with_local_solver(Jacobi::build())
                     .on(this->exec)
                     ->generate(Mtx::create(this->exec, MPI_COMM_WORLD));
 
diff --git a/core/test/solver/bicg.cpp b/core/test/solver/bicg.cpp
index 37ed110bdf4..9e49b118484 100644
--- a/core/test/solver/bicg.cpp
+++ b/core/test/solver/bicg.cpp
@@ -194,15 +194,13 @@ TYPED_TEST(Bicg, CanSetPreconditionerInFactory)
     using Solver = typename TestFixture::Solver;
     std::shared_ptr<Solver> bicg_precond =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto bicg_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(bicg_precond)
             .on(this->exec);
     auto solver = bicg_factory->generate(this->mtx);
@@ -245,15 +243,13 @@ TYPED_TEST(Bicg, ThrowsOnWrongPreconditionerInFactory)
         Mtx::create(this->exec, gko::dim<2>{2, 2});
     std::shared_ptr<Solver> bicg_precond =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(wrong_sized_mtx);
 
     auto bicg_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(bicg_precond)
             .on(this->exec);
 
@@ -278,15 +274,13 @@ TYPED_TEST(Bicg, CanSetPreconditioner)
     using Solver = typename TestFixture::Solver;
     std::shared_ptr<Solver> bicg_precond =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto bicg_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec);
     auto solver = bicg_factory->generate(this->mtx);
     solver->set_preconditioner(bicg_precond);
diff --git a/core/test/solver/bicgstab.cpp b/core/test/solver/bicgstab.cpp
index 937064da7c4..d5b489feff9 100644
--- a/core/test/solver/bicgstab.cpp
+++ b/core/test/solver/bicgstab.cpp
@@ -160,13 +160,13 @@ TYPED_TEST(Bicgstab, CanSetPreconditionerGenerator)
     auto bicgstab_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .with_preconditioner(
                 Solver::build()
                     .with_criteria(
                         gko::stop::Iteration::build().with_max_iters(3u).on(
                             this->exec))
-                    .on(this->exec))
+                    )
             .on(this->exec);
 
     auto solver = bicgstab_factory->generate(this->mtx);
@@ -208,14 +208,14 @@ TYPED_TEST(Bicgstab, CanSetPreconditionerInFactory)
     std::shared_ptr<Solver> bicgstab_precond =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto bicgstab_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(bicgstab_precond)
             .on(this->exec);
     auto solver = bicgstab_factory->generate(this->mtx);
@@ -235,14 +235,14 @@ TYPED_TEST(Bicgstab, ThrowsOnWrongPreconditionerInFactory)
     std::shared_ptr<Solver> bicgstab_precond =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(wrong_sized_mtx);
 
     auto bicgstab_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(bicgstab_precond)
             .on(this->exec);
 
@@ -268,14 +268,14 @@ TYPED_TEST(Bicgstab, CanSetPreconditioner)
     std::shared_ptr<Solver> bicgstab_precond =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto bicgstab_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec);
     auto solver = bicgstab_factory->generate(this->mtx);
     solver->set_preconditioner(bicgstab_precond);
diff --git a/core/test/solver/cb_gmres.cpp b/core/test/solver/cb_gmres.cpp
index 17dcf0c385f..5f6076f248c 100644
--- a/core/test/solver/cb_gmres.cpp
+++ b/core/test/solver/cb_gmres.cpp
@@ -257,7 +257,7 @@ TYPED_TEST(CbGmres, CanSetKrylovDim)
         Solver::build()
             .with_krylov_dim(new_krylov_dim)
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(4u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(4u))
             .on(this->exec);
     auto solver = cb_gmres_factory->generate(this->mtx);
 
@@ -277,7 +277,7 @@ TYPED_TEST(CbGmres, CanUseSetKrylovDim)
     auto cb_gmres_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(4u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(4u))
             .on(this->exec);
     auto solver = cb_gmres_factory->generate(this->mtx);
 
@@ -296,14 +296,14 @@ TYPED_TEST(CbGmres, CanSetPreconditionerInFactory)
     std::shared_ptr<Solver> cb_gmres_precond =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto cb_gmres_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(cb_gmres_precond)
             .on(this->exec);
     auto solver = cb_gmres_factory->generate(this->mtx);
@@ -323,14 +323,14 @@ TYPED_TEST(CbGmres, ThrowsOnWrongPreconditionerInFactory)
     std::shared_ptr<Solver> cb_gmres_precond =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(wrong_sized_mtx);
 
     auto cb_gmres_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(cb_gmres_precond)
             .on(this->exec);
 
@@ -344,14 +344,14 @@ TYPED_TEST(CbGmres, CanSetPreconditioner)
     std::shared_ptr<Solver> cb_gmres_precond =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto cb_gmres_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec);
     auto solver = cb_gmres_factory->generate(this->mtx);
     solver->set_preconditioner(cb_gmres_precond);
diff --git a/core/test/solver/cg.cpp b/core/test/solver/cg.cpp
index d0381a6e5ab..d1d7dbee344 100644
--- a/core/test/solver/cg.cpp
+++ b/core/test/solver/cg.cpp
@@ -164,17 +164,17 @@ TYPED_TEST(Cg, CanSetPreconditionerGenerator)
     auto cg_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec),
+                gko::stop::Iteration::build().with_max_iters(3u),
                 gko::stop::ResidualNorm<value_type>::build()
                     .with_reduction_factor(
                         gko::remove_complex<value_type>(1e-6))
-                    .on(this->exec))
+                    )
             .with_preconditioner(
                 Solver::build()
                     .with_criteria(
                         gko::stop::Iteration::build().with_max_iters(3u).on(
                             this->exec))
-                    .on(this->exec))
+                    )
             .on(this->exec);
     auto solver = cg_factory->generate(this->mtx);
     auto precond = dynamic_cast<const gko::solver::Cg<value_type>*>(
@@ -194,14 +194,14 @@ TYPED_TEST(Cg, CanSetPreconditionerInFactory)
     std::shared_ptr<Solver> cg_precond =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto cg_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(cg_precond)
             .on(this->exec);
     auto solver = cg_factory->generate(this->mtx);
@@ -245,14 +245,14 @@ TYPED_TEST(Cg, ThrowsOnWrongPreconditionerInFactory)
     std::shared_ptr<Solver> cg_precond =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(wrong_sized_mtx);
 
     auto cg_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(cg_precond)
             .on(this->exec);
 
@@ -278,14 +278,14 @@ TYPED_TEST(Cg, CanSetPreconditioner)
     std::shared_ptr<Solver> cg_precond =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto cg_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec);
     auto solver = cg_factory->generate(this->mtx);
     solver->set_preconditioner(cg_precond);
diff --git a/core/test/solver/cgs.cpp b/core/test/solver/cgs.cpp
index 7509c22d76e..705e9f850c8 100644
--- a/core/test/solver/cgs.cpp
+++ b/core/test/solver/cgs.cpp
@@ -164,17 +164,17 @@ TYPED_TEST(Cgs, CanSetPreconditionerGenerator)
     auto cgs_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec),
+                gko::stop::Iteration::build().with_max_iters(3u),
                 gko::stop::ResidualNorm<value_type>::build()
                     .with_reduction_factor(
                         gko::remove_complex<value_type>(1e-6))
-                    .on(this->exec))
+                    )
             .with_preconditioner(
                 Solver::build()
                     .with_criteria(
                         gko::stop::Iteration::build().with_max_iters(3u).on(
                             this->exec))
-                    .on(this->exec))
+                    )
             .on(this->exec);
     auto solver = cgs_factory->generate(this->mtx);
     auto precond = dynamic_cast<const gko::solver::Cgs<value_type>*>(
@@ -218,14 +218,14 @@ TYPED_TEST(Cgs, CanSetPreconditionerInFactory)
     std::shared_ptr<Solver> cgs_precond =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto cgs_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(cgs_precond)
             .on(this->exec);
     auto solver = cgs_factory->generate(this->mtx);
@@ -245,14 +245,14 @@ TYPED_TEST(Cgs, ThrowsOnWrongPreconditionerInFactory)
     std::shared_ptr<Solver> cgs_precond =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(wrong_sized_mtx);
 
     auto cgs_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(cgs_precond)
             .on(this->exec);
 
@@ -278,14 +278,14 @@ TYPED_TEST(Cgs, CanSetPreconditioner)
     std::shared_ptr<Solver> cgs_precond =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto cgs_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec);
     auto solver = cgs_factory->generate(this->mtx);
     solver->set_preconditioner(cgs_precond);
diff --git a/core/test/solver/fcg.cpp b/core/test/solver/fcg.cpp
index 21cc686bd01..4ba3f389ecd 100644
--- a/core/test/solver/fcg.cpp
+++ b/core/test/solver/fcg.cpp
@@ -163,17 +163,17 @@ TYPED_TEST(Fcg, CanSetPreconditionerGenerator)
     auto fcg_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec),
+                gko::stop::Iteration::build().with_max_iters(3u),
                 gko::stop::ResidualNorm<value_type>::build()
                     .with_reduction_factor(
                         gko::remove_complex<value_type>(1e-6))
-                    .on(this->exec))
+                    )
             .with_preconditioner(
                 Solver::build()
                     .with_criteria(
                         gko::stop::Iteration::build().with_max_iters(3u).on(
                             this->exec))
-                    .on(this->exec))
+                    )
             .on(this->exec);
     auto solver = fcg_factory->generate(this->mtx);
     auto precond = dynamic_cast<const gko::solver::Fcg<value_type>*>(
@@ -217,14 +217,14 @@ TYPED_TEST(Fcg, CanSetPreconditionerInFactory)
     std::shared_ptr<Solver> fcg_precond =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto fcg_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(fcg_precond)
             .on(this->exec);
     auto solver = fcg_factory->generate(this->mtx);
@@ -244,14 +244,14 @@ TYPED_TEST(Fcg, ThrowsOnWrongPreconditionerInFactory)
     std::shared_ptr<Solver> fcg_precond =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(wrong_sized_mtx);
 
     auto fcg_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(fcg_precond)
             .on(this->exec);
 
@@ -277,14 +277,14 @@ TYPED_TEST(Fcg, CanSetPreconditioner)
     std::shared_ptr<Solver> fcg_precond =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto fcg_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec);
     auto solver = fcg_factory->generate(this->mtx);
     solver->set_preconditioner(fcg_precond);
diff --git a/core/test/solver/gcr.cpp b/core/test/solver/gcr.cpp
index fec313582ed..554d5aa9526 100644
--- a/core/test/solver/gcr.cpp
+++ b/core/test/solver/gcr.cpp
@@ -194,16 +194,16 @@ TYPED_TEST(Gcr, CanSetPreconditionerGenerator)
     auto gcr_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec),
+                gko::stop::Iteration::build().with_max_iters(3u),
                 gko::stop::ResidualNorm<value_type>::build()
                     .with_reduction_factor(TestFixture::reduction_factor)
-                    .on(this->exec))
+                    )
             .with_preconditioner(
                 Solver::build()
                     .with_criteria(
                         gko::stop::Iteration::build().with_max_iters(3u).on(
                             this->exec))
-                    .on(this->exec))
+                    )
             .on(this->exec);
     auto solver = gcr_factory->generate(this->mtx);
     auto precond = dynamic_cast<const gko::solver::Gcr<value_type>*>(
@@ -249,10 +249,10 @@ TYPED_TEST(Gcr, CanSetKrylovDim)
         Solver::build()
             .with_krylov_dim(4u)
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(4u).on(this->exec),
+                gko::stop::Iteration::build().with_max_iters(4u),
                 gko::stop::ResidualNorm<value_type>::build()
                     .with_reduction_factor(TestFixture::reduction_factor)
-                    .on(this->exec))
+                    )
             .on(this->exec);
     auto solver = gcr_factory->generate(this->mtx);
     auto krylov_dim = solver->get_krylov_dim();
@@ -286,14 +286,14 @@ TYPED_TEST(Gcr, CanSetPreconditionerInFactory)
     std::shared_ptr<Solver> gcr_precond =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto gcr_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(gcr_precond)
             .on(this->exec);
     auto solver = gcr_factory->generate(this->mtx);
@@ -313,14 +313,14 @@ TYPED_TEST(Gcr, ThrowsOnWrongPreconditionerInFactory)
     std::shared_ptr<Solver> gcr_precond =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(wrong_sized_mtx);
 
     auto gcr_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(gcr_precond)
             .on(this->exec);
 
@@ -346,14 +346,14 @@ TYPED_TEST(Gcr, CanSetPreconditioner)
     std::shared_ptr<Solver> gcr_precond =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto gcr_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec);
     auto solver = gcr_factory->generate(this->mtx);
     solver->set_preconditioner(gcr_precond);
diff --git a/core/test/solver/gmres.cpp b/core/test/solver/gmres.cpp
index 8ce8135f8b2..c2d62b3bb45 100644
--- a/core/test/solver/gmres.cpp
+++ b/core/test/solver/gmres.cpp
@@ -180,16 +180,16 @@ TYPED_TEST(Gmres, CanSetPreconditionerGenerator)
     auto gmres_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec),
+                gko::stop::Iteration::build().with_max_iters(3u),
                 gko::stop::ResidualNorm<value_type>::build()
                     .with_reduction_factor(TestFixture::reduction_factor)
-                    .on(this->exec))
+                    )
             .with_preconditioner(
                 Solver::build()
                     .with_criteria(
                         gko::stop::Iteration::build().with_max_iters(3u).on(
                             this->exec))
-                    .on(this->exec))
+                    )
             .on(this->exec);
     auto solver = gmres_factory->generate(this->mtx);
     auto precond = dynamic_cast<const gko::solver::Gmres<value_type>*>(
@@ -236,10 +236,10 @@ TYPED_TEST(Gmres, CanSetKrylovDim)
         Solver::build()
             .with_krylov_dim(4u)
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(4u).on(this->exec),
+                gko::stop::Iteration::build().with_max_iters(4u),
                 gko::stop::ResidualNorm<value_type>::build()
                     .with_reduction_factor(TestFixture::reduction_factor)
-                    .on(this->exec))
+                    )
             .on(this->exec);
     auto solver = gmres_factory->generate(this->mtx);
     auto krylov_dim = solver->get_krylov_dim();
@@ -273,14 +273,14 @@ TYPED_TEST(Gmres, CanSetPreconditionerInFactory)
     std::shared_ptr<Solver> gmres_precond =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto gmres_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(gmres_precond)
             .on(this->exec);
     auto solver = gmres_factory->generate(this->mtx);
@@ -300,14 +300,14 @@ TYPED_TEST(Gmres, ThrowsOnWrongPreconditionerInFactory)
     std::shared_ptr<Solver> gmres_precond =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(wrong_sized_mtx);
 
     auto gmres_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(gmres_precond)
             .on(this->exec);
 
@@ -333,14 +333,14 @@ TYPED_TEST(Gmres, CanSetPreconditioner)
     std::shared_ptr<Solver> gmres_precond =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto gmres_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec);
     auto solver = gmres_factory->generate(this->mtx);
     solver->set_preconditioner(gmres_precond);
diff --git a/core/test/solver/idr.cpp b/core/test/solver/idr.cpp
index e2657be8581..a93978fa335 100644
--- a/core/test/solver/idr.cpp
+++ b/core/test/solver/idr.cpp
@@ -162,13 +162,13 @@ TYPED_TEST(Idr, CanSetPreconditionerGenerator)
     auto idr_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .with_preconditioner(
                 Solver::build()
                     .with_criteria(
                         gko::stop::Iteration::build().with_max_iters(3u).on(
                             this->exec))
-                    .on(this->exec))
+                    )
             .on(this->exec);
 
     auto solver = idr_factory->generate(this->mtx);
@@ -209,14 +209,14 @@ TYPED_TEST(Idr, CanSetPreconditionerInFactory)
     std::shared_ptr<Solver> idr_precond =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto idr_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(idr_precond)
             .on(this->exec);
     auto solver = idr_factory->generate(this->mtx);
@@ -236,14 +236,14 @@ TYPED_TEST(Idr, ThrowsOnWrongPreconditionerInFactory)
     std::shared_ptr<Solver> idr_precond =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(wrong_sized_mtx);
 
     auto idr_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(idr_precond)
             .on(this->exec);
 
@@ -257,14 +257,14 @@ TYPED_TEST(Idr, CanSetPreconditioner)
     std::shared_ptr<Solver> idr_precond =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto idr_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec);
     auto solver = idr_factory->generate(this->mtx);
     solver->set_preconditioner(idr_precond);
@@ -283,7 +283,7 @@ TYPED_TEST(Idr, CanSetSubspaceDim)
         Solver::build()
             .with_subspace_dim(8u)
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(4u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(4u))
             .on(this->exec);
     auto solver = idr_factory->generate(this->mtx);
     auto subspace_dim = solver->get_subspace_dim();
@@ -320,7 +320,7 @@ TYPED_TEST(Idr, CanSetKappa)
         Solver::build()
             .with_kappa(real_type{0.05})
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(4u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(4u))
             .on(this->exec);
     auto solver = idr_factory->generate(this->mtx);
     auto kappa = solver->get_kappa();
@@ -359,7 +359,7 @@ TYPED_TEST(Idr, CanSetDeterministic)
         Solver::build()
             .with_deterministic(true)
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(4u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(4u))
             .on(this->exec);
     auto solver = idr_factory->generate(this->mtx);
     auto deterministic = solver->get_deterministic();
@@ -396,7 +396,7 @@ TYPED_TEST(Idr, CanSetComplexSubspace)
         Solver::build()
             .with_complex_subspace(true)
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(4u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(4u))
             .on(this->exec);
     auto solver = idr_factory->generate(this->mtx);
     auto complex_subspace = solver->get_complex_subspace();
diff --git a/core/test/solver/ir.cpp b/core/test/solver/ir.cpp
index 7419f99bfd0..93ea3e89b10 100644
--- a/core/test/solver/ir.cpp
+++ b/core/test/solver/ir.cpp
@@ -163,16 +163,16 @@ TYPED_TEST(Ir, CanSetInnerSolverInFactory)
     auto ir_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec),
+                gko::stop::Iteration::build().with_max_iters(3u),
                 gko::stop::ResidualNorm<value_type>::build()
                     .with_reduction_factor(r<value_type>::value)
-                    .on(this->exec))
+                    )
             .with_solver(
                 Solver::build()
                     .with_criteria(
                         gko::stop::Iteration::build().with_max_iters(3u).on(
                             this->exec))
-                    .on(this->exec))
+                    )
             .on(this->exec);
     auto solver = ir_factory->generate(this->mtx);
     auto inner_solver = dynamic_cast<const Solver*>(
@@ -190,14 +190,14 @@ TYPED_TEST(Ir, CanSetGeneratedInnerSolverInFactory)
     std::shared_ptr<Solver> ir_solver =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto ir_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_solver(ir_solver)
             .on(this->exec);
     auto solver = ir_factory->generate(this->mtx);
@@ -241,14 +241,14 @@ TYPED_TEST(Ir, ThrowsOnWrongInnerSolverInFactory)
     std::shared_ptr<Solver> ir_solver =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(wrong_sized_mtx);
 
     auto ir_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_solver(ir_solver)
             .on(this->exec);
 
@@ -262,14 +262,14 @@ TYPED_TEST(Ir, CanSetInnerSolver)
     std::shared_ptr<Solver> ir_solver =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto ir_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec);
     auto solver = ir_factory->generate(this->mtx);
     solver->set_solver(ir_solver);
@@ -311,14 +311,14 @@ TYPED_TEST(Ir, ThrowOnWrongInnerSolverSet)
     std::shared_ptr<Solver> ir_solver =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(wrong_sized_mtx);
 
     auto ir_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec);
     auto solver = ir_factory->generate(this->mtx);
 
@@ -346,10 +346,10 @@ TYPED_TEST(Ir, DefaultRelaxationFactor)
     auto richardson =
         gko::solver::Richardson<value_type>::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec),
+                gko::stop::Iteration::build().with_max_iters(3u),
                 gko::stop::ResidualNorm<value_type>::build()
                     .with_reduction_factor(r<value_type>::value)
-                    .on(this->exec))
+                    )
             .on(this->exec)
             ->generate(this->mtx);
 
@@ -365,10 +365,10 @@ TYPED_TEST(Ir, UseAsRichardson)
     auto richardson =
         gko::solver::Richardson<value_type>::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec),
+                gko::stop::Iteration::build().with_max_iters(3u),
                 gko::stop::ResidualNorm<value_type>::build()
                     .with_reduction_factor(r<value_type>::value)
-                    .on(this->exec))
+                    )
             .with_relaxation_factor(relaxation_factor)
             .on(this->exec)
             ->generate(this->mtx);
diff --git a/core/test/solver/multigrid.cpp b/core/test/solver/multigrid.cpp
index 8fea85a40bb..e9d6b332aac 100644
--- a/core/test/solver/multigrid.cpp
+++ b/core/test/solver/multigrid.cpp
@@ -287,7 +287,7 @@ TYPED_TEST(Multigrid, ApplyUsesInitialGuessReturnsFalseWhenZeroGuess)
     auto multigrid_factory =
         Solver::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(3u))
             .with_max_levels(2u)
             .with_coarsest_solver(this->lo_factory)
             .with_pre_smoother(this->lo_factory)
diff --git a/reference/test/preconditioner/ilu.cpp b/reference/test/preconditioner/ilu.cpp
index 92fe8fac8cf..22c9929219e 100644
--- a/reference/test/preconditioner/ilu.cpp
+++ b/reference/test/preconditioner/ilu.cpp
@@ -615,8 +615,8 @@ TEST_F(DefaultIlu, CanBeUsedAsPreconditioner)
     auto solver =
         gko::solver::Bicgstab<>::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(2u).on(this->exec))
-            .with_preconditioner(default_ilu_prec_type::build().on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(2u))
+            .with_preconditioner(default_ilu_prec_type::build())
             .on(this->exec)
             ->generate(this->mtx);
     auto x = Mtx::create(this->exec, gko::dim<2>{3, 1});
@@ -636,7 +636,7 @@ TEST_F(DefaultIlu, CanBeUsedAsGeneratedPreconditioner)
     auto solver =
         gko::solver::Bicgstab<>::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(2u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(2u))
             .with_generated_preconditioner(precond)
             .on(this->exec)
             ->generate(this->mtx);
diff --git a/reference/test/solver/ir_kernels.cpp b/reference/test/solver/ir_kernels.cpp
index fc0c130aa83..4fae1bfdac8 100644
--- a/reference/test/solver/ir_kernels.cpp
+++ b/reference/test/solver/ir_kernels.cpp
@@ -455,7 +455,7 @@ TYPED_TEST(Ir, ApplyWithGivenInitialGuessModeIsEquivalentToRef)
     auto ref_solver =
         gko::solver::Ir<value_type>::build()
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(1u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(1u))
             .on(this->exec)
             ->generate(this->mtx);
     auto b = gko::initialize<Mtx>({3.9, 9.0, 2.2}, this->exec);
@@ -464,8 +464,7 @@ TYPED_TEST(Ir, ApplyWithGivenInitialGuessModeIsEquivalentToRef)
         auto solver =
             gko::solver::Ir<value_type>::build()
                 .with_criteria(
-                    gko::stop::Iteration::build().with_max_iters(1u).on(
-                        this->exec))
+                    gko::stop::Iteration::build().with_max_iters(1u))
                 .with_default_initial_guess(guess)
                 .on(this->exec)
                 ->generate(this->mtx);
diff --git a/reference/test/solver/multigrid_kernels.cpp b/reference/test/solver/multigrid_kernels.cpp
index 23307d20b33..86be56ce3cb 100644
--- a/reference/test/solver/multigrid_kernels.cpp
+++ b/reference/test/solver/multigrid_kernels.cpp
@@ -406,8 +406,7 @@ class Multigrid : public ::testing::Test {
                 .with_post_uses_pre(false)
                 .with_mid_case(mid_case)
                 .with_criteria(
-                    gko::stop::Iteration::build().with_max_iters(1u).on(
-                        this->exec))
+                    gko::stop::Iteration::build().with_max_iters(1u))
                 .with_cycle(cycle)
                 .with_min_coarse_rows(1u)
                 .on(this->exec));
@@ -428,8 +427,7 @@ class Multigrid : public ::testing::Test {
                 .with_post_uses_pre(true)
                 .with_mid_case(mid_case)
                 .with_criteria(
-                    gko::stop::Iteration::build().with_max_iters(1u).on(
-                        this->exec))
+                    gko::stop::Iteration::build().with_max_iters(1u))
                 .with_cycle(cycle)
                 .with_min_coarse_rows(1u)
                 .on(this->exec));
@@ -1266,7 +1264,7 @@ TYPED_TEST(Multigrid, ZeroGuessIgnoresInput)
             .with_max_levels(2u)
             .with_mg_level(this->coarse_factory)
             .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(1u).on(this->exec))
+                gko::stop::Iteration::build().with_max_iters(1u))
             .with_min_coarse_rows(1u);
     auto normal_mg = common_part
                          .with_default_initial_guess(
diff --git a/test/solver/ir_kernels.cpp b/test/solver/ir_kernels.cpp
index 81464036c69..9374b7867ce 100644
--- a/test/solver/ir_kernels.cpp
+++ b/test/solver/ir_kernels.cpp
@@ -133,13 +133,13 @@ TEST_F(Ir, ApplyWithIterativeInnerSolverIsEquivalentToRef)
     auto ir_factory =
         gko::solver::Ir<value_type>::build()
             .with_solver(gko::solver::Gmres<value_type>::build().with_criteria(
-                gko::stop::Iteration::build().with_max_iters(1u).on(ref)))
+                gko::stop::Iteration::build().with_max_iters(1u)))
             .with_criteria(gko::stop::Iteration::build().with_max_iters(2u))
             .on(ref);
     auto d_ir_factory =
         gko::solver::Ir<value_type>::build()
             .with_solver(gko::solver::Gmres<value_type>::build().with_criteria(
-                gko::stop::Iteration::build().with_max_iters(1u).on(exec)))
+                gko::stop::Iteration::build().with_max_iters(1u)))
             .with_criteria(gko::stop::Iteration::build().with_max_iters(2u))
             .on(exec);
     auto solver = ir_factory->generate(std::move(mtx));
@@ -197,14 +197,14 @@ TEST_F(Ir, RichardsonApplyWithIterativeInnerSolverIsEquivalentToRef)
     auto ir_factory =
         gko::solver::Ir<value_type>::build()
             .with_solver(gko::solver::Gmres<value_type>::build().with_criteria(
-                gko::stop::Iteration::build().with_max_iters(1u).on(ref)))
+                gko::stop::Iteration::build().with_max_iters(1u)))
             .with_criteria(gko::stop::Iteration::build().with_max_iters(2u))
             .with_relaxation_factor(value_type{0.9})
             .on(ref);
     auto d_ir_factory =
         gko::solver::Ir<value_type>::build()
             .with_solver(gko::solver::Gmres<value_type>::build().with_criteria(
-                gko::stop::Iteration::build().with_max_iters(1u).on(exec)))
+                gko::stop::Iteration::build().with_max_iters(1u)))
             .with_criteria(gko::stop::Iteration::build().with_max_iters(2u))
             .with_relaxation_factor(value_type{0.9})
             .on(exec);

From fb941081c6b1b9aaf5713b9434e0e7a5f370ef06 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Mon, 25 Sep 2023 15:10:27 +0200
Subject: [PATCH 314/583] add .gitignore to build folders automatically

---
 CMakeLists.txt | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8ac16267717..4dbce4a29c6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -511,3 +511,8 @@ else()
     FILE(READ ${PROJECT_BINARY_DIR}/minimal.log GINKGO_LOG_SUMMARY)
 endif()
 MESSAGE(STATUS "${GINKGO_LOG_SUMMARY}")
+
+# make sure no build files get committed accidentally
+if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/.gitignore)
+    file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/.gitignore "*")
+endif()

From e606c36a1254418ed70f723b346dad8730d4ac31 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Mon, 25 Sep 2023 17:15:46 +0200
Subject: [PATCH 315/583] handle deferred factory generation by registration

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 core/test/solver/multigrid.cpp                | 30 ++++---
 include/ginkgo/core/base/abstract_factory.hpp | 86 +++++++++++++++++--
 include/ginkgo/core/base/lin_op.hpp           | 20 -----
 include/ginkgo/core/base/std_extensions.hpp   | 10 +++
 .../distributed/preconditioner/schwarz.hpp    |  8 --
 include/ginkgo/core/preconditioner/ic.hpp     | 31 +++----
 include/ginkgo/core/preconditioner/ilu.hpp    | 41 ++++-----
 include/ginkgo/core/solver/direct.hpp         | 11 ---
 include/ginkgo/core/solver/ir.hpp             | 13 ---
 include/ginkgo/core/solver/multigrid.hpp      | 37 --------
 include/ginkgo/core/solver/solver_base.hpp    | 64 +++++++-------
 11 files changed, 169 insertions(+), 182 deletions(-)

diff --git a/core/test/solver/multigrid.cpp b/core/test/solver/multigrid.cpp
index e9d6b332aac..9f7bddb633c 100644
--- a/core/test/solver/multigrid.cpp
+++ b/core/test/solver/multigrid.cpp
@@ -286,8 +286,7 @@ TYPED_TEST(Multigrid, ApplyUsesInitialGuessReturnsFalseWhenZeroGuess)
     using Solver = typename TestFixture::Solver;
     auto multigrid_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .with_max_levels(2u)
             .with_coarsest_solver(this->lo_factory)
             .with_pre_smoother(this->lo_factory)
@@ -426,25 +425,28 @@ TYPED_TEST(Multigrid, ThrowWhenNullMgLevel)
 TYPED_TEST(Multigrid, ThrowWhenMgLevelContainsNullptr)
 {
     using Solver = typename TestFixture::Solver;
-    auto factory_parameters = Solver::build()
-                                  .with_max_levels(1u)
-                                  .with_min_coarse_rows(2u)
-                                  .with_criteria(this->criterion)
-                                  .with_mg_level(this->rp_factory, nullptr);
+    auto factory = Solver::build()
+                       .with_max_levels(1u)
+                       .with_min_coarse_rows(2u)
+                       .with_criteria(this->criterion)
+                       .with_mg_level(this->rp_factory, nullptr)
+                       .on(this->exec);
 
-    ASSERT_THROW(factory_parameters.on(this->exec), gko::NotSupported);
+    ASSERT_THROW(factory->generate(this->mtx), gko::NotSupported);
 }
 
 
 TYPED_TEST(Multigrid, ThrowWhenEmptyMgLevelList)
 {
     using Solver = typename TestFixture::Solver;
-    auto factory = Solver::build()
-                       .with_max_levels(1u)
-                       .with_min_coarse_rows(2u)
-                       .with_mg_level()
-                       .with_criteria(this->criterion)
-                       .on(this->exec);
+    auto factory =
+        Solver::build()
+            .with_max_levels(1u)
+            .with_min_coarse_rows(2u)
+            .with_mg_level(
+                std::vector<std::shared_ptr<const gko::LinOpFactory>>{})
+            .with_criteria(this->criterion)
+            .on(this->exec);
 
     ASSERT_THROW(factory->generate(this->mtx), gko::NotSupported);
 }
diff --git a/include/ginkgo/core/base/abstract_factory.hpp b/include/ginkgo/core/base/abstract_factory.hpp
index ca8ab7ed2ce..341340a5db2 100644
--- a/include/ginkgo/core/base/abstract_factory.hpp
+++ b/include/ginkgo/core/base/abstract_factory.hpp
@@ -34,6 +34,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define GKO_PUBLIC_CORE_BASE_ABSTRACT_FACTORY_HPP_
 
 
+#include <unordered_map>
+
+
 #include <ginkgo/core/base/polymorphic_object.hpp>
 
 
@@ -257,7 +260,11 @@ class enable_parameters_type {
      */
     std::unique_ptr<Factory> on(std::shared_ptr<const Executor> exec) const
     {
-        auto factory = std::unique_ptr<Factory>(new Factory(exec, *self()));
+        ConcreteParametersType copy = *self();
+        for (const auto& item : deferred_factories) {
+            item.second(exec, copy);
+        }
+        auto factory = std::unique_ptr<Factory>(new Factory(exec, copy));
         for (auto& logger : loggers) {
             factory->add_logger(logger);
         };
@@ -271,9 +278,35 @@ class enable_parameters_type {
      * Loggers to be attached to the factory and generated object.
      */
     std::vector<std::shared_ptr<const log::Logger>> loggers{};
+
+    std::unordered_map<std::string,
+                       std::function<void(std::shared_ptr<const Executor> exec,
+                                          ConcreteParametersType&)>>
+        deferred_factories;
 };
 
 
+/**
+ * This Macro will generate a new type containing the parameters for the factory
+ * `_factory_name`. For more details, see #GKO_ENABLE_LIN_OP_FACTORY().
+ * It is required to use this macro **before** calling the
+ * macro #GKO_ENABLE_LIN_OP_FACTORY().
+ * It is also required to use the same names for all parameters between both
+ * macros.
+ *
+ * @param _parameters_name  name of the parameters member in the class
+ * @param _factory_name  name of the generated factory type
+ *
+ * @ingroup LinOp
+ */
+#define GKO_CREATE_FACTORY_PARAMETERS(_parameters_name, _factory_name)  \
+public:                                                                 \
+    class _factory_name;                                                \
+    struct _parameters_name##_type                                      \
+        : public ::gko::enable_parameters_type<_parameters_name##_type, \
+                                               _factory_name>
+
+
 /**
  * Represents a factory parameter of factory type that can either initialized by
  * a pre-existing factory or by passing in a factory_parameters object whose
@@ -288,7 +321,7 @@ class deferred_factory_parameter {
     deferred_factory_parameter() = default;
 
     /** Creates an empty deferred factory parameter. */
-    explicit deferred_factory_parameter(std::nullptr_t)
+    deferred_factory_parameter(std::nullptr_t)
     {
         generator_ = [](std::shared_ptr<const Executor>) { return nullptr; };
     }
@@ -301,8 +334,7 @@ class deferred_factory_parameter {
               std::enable_if_t<std::is_base_of<
                   FactoryType,
                   std::remove_const_t<ConcreteFactoryType>>::value>* = nullptr>
-    explicit deferred_factory_parameter(
-        std::shared_ptr<ConcreteFactoryType> factory)
+    deferred_factory_parameter(std::shared_ptr<ConcreteFactoryType> factory)
     {
         generator_ =
             [factory = std::shared_ptr<const FactoryType>(std::move(factory))](
@@ -317,7 +349,7 @@ class deferred_factory_parameter {
               std::enable_if_t<std::is_base_of<
                   FactoryType,
                   std::remove_const_t<ConcreteFactoryType>>::value>* = nullptr>
-    explicit deferred_factory_parameter(
+    deferred_factory_parameter(
         std::unique_ptr<ConcreteFactoryType, Deleter> factory)
     {
         generator_ =
@@ -333,7 +365,7 @@ class deferred_factory_parameter {
     template <typename ParametersType,
               typename = decltype(std::declval<ParametersType>().on(
                   std::shared_ptr<const Executor>{}))>
-    explicit deferred_factory_parameter(ParametersType parameters)
+    deferred_factory_parameter(ParametersType parameters)
     {
         generator_ = [parameters](std::shared_ptr<const Executor> exec)
             -> std::shared_ptr<const FactoryType> {
@@ -351,8 +383,8 @@ class deferred_factory_parameter {
         return generator_(exec);
     }
 
-    /** Returns true iff the parameter contains a factory. */
-    bool is_empty() const { return bool(generator_); }
+    /** Returns true iff the parameter is empty. */
+    bool is_empty() const { return !bool(generator_); }
 
 private:
     std::function<std::shared_ptr<const FactoryType>(
@@ -499,6 +531,12 @@ public:                                                                      \
     parameters_type& with_##_name(deferred_factory_parameter<_type> factory) \
     {                                                                        \
         this->_name##_generator_ = std::move(factory);                       \
+        this->deferred_factories[#_name] = [](const auto& exec,              \
+                                              auto& params) {                \
+            if (!params._name##_generator_.is_empty()) {                     \
+                params._name = params._name##_generator_.on(exec);           \
+            }                                                                \
+        };                                                                   \
         return *this;                                                        \
     }                                                                        \
                                                                              \
@@ -523,11 +561,41 @@ public:                                                                      \
 #define GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(_name, _type)                  \
 public:                                                                      \
     std::vector<std::shared_ptr<const _type>> _name{};                       \
-    template <typename... Args>                                              \
+    template <typename... Args,                                              \
+              typename =                                                     \
+                  std::enable_if_t<xstd::conjunction<std::is_convertible<    \
+                      Args, deferred_factory_parameter<_type>>...>::value>>  \
     parameters_type& with_##_name(Args&&... factories)                       \
     {                                                                        \
         this->_name##_generator_ = {deferred_factory_parameter<_type>{       \
             std::forward<Args>(factories)}...};                              \
+        this->deferred_factories[#_name] = [](const auto& exec,              \
+                                              auto& params) {                \
+            if (!params._name##_generator_.empty()) {                        \
+                params._name.clear();                                        \
+                for (auto& generator : params._name##_generator_) {          \
+                    params._name.push_back(generator.on(exec));              \
+                }                                                            \
+            }                                                                \
+        };                                                                   \
+        return *this;                                                        \
+    }                                                                        \
+    template <typename FactoryType>                                          \
+    parameters_type& with_##_name(const std::vector<FactoryType>& factories) \
+    {                                                                        \
+        this->_name##_generator_.clear();                                    \
+        for (const auto& factory : factories) {                              \
+            this->_name##_generator_.push_back(factory);                     \
+        }                                                                    \
+        this->deferred_factories[#_name] = [](const auto& exec,              \
+                                              auto& params) {                \
+            if (!params._name##_generator_.empty()) {                        \
+                params._name.clear();                                        \
+                for (auto& generator : params._name##_generator_) {          \
+                    params._name.push_back(generator.on(exec));              \
+                }                                                            \
+            }                                                                \
+        };                                                                   \
         return *this;                                                        \
     }                                                                        \
                                                                              \
diff --git a/include/ginkgo/core/base/lin_op.hpp b/include/ginkgo/core/base/lin_op.hpp
index e2660baff2e..407fafda0d1 100644
--- a/include/ginkgo/core/base/lin_op.hpp
+++ b/include/ginkgo/core/base/lin_op.hpp
@@ -949,26 +949,6 @@ using EnableDefaultLinOpFactory =
     EnableDefaultFactory<ConcreteFactory, ConcreteLinOp, ParametersType,
                          PolymorphicBase>;
 
-/**
- * This Macro will generate a new type containing the parameters for the factory
- * `_factory_name`. For more details, see #GKO_ENABLE_LIN_OP_FACTORY().
- * It is required to use this macro **before** calling the
- * macro #GKO_ENABLE_LIN_OP_FACTORY().
- * It is also required to use the same names for all parameters between both
- * macros.
- *
- * @param _parameters_name  name of the parameters member in the class
- * @param _factory_name  name of the generated factory type
- *
- * @ingroup LinOp
- */
-#define GKO_CREATE_FACTORY_PARAMETERS(_parameters_name, _factory_name)  \
-public:                                                                 \
-    class _factory_name;                                                \
-    struct _parameters_name##_type                                      \
-        : public ::gko::enable_parameters_type<_parameters_name##_type, \
-                                               _factory_name>
-
 
 /**
  * This macro will generate a default implementation of a LinOpFactory for the
diff --git a/include/ginkgo/core/base/std_extensions.hpp b/include/ginkgo/core/base/std_extensions.hpp
index 69629f98e06..1064ae464f0 100644
--- a/include/ginkgo/core/base/std_extensions.hpp
+++ b/include/ginkgo/core/base/std_extensions.hpp
@@ -128,6 +128,16 @@ constexpr bool less_equal(const T&& lhs, const T&& rhs)
 }
 
 
+// available in <type_traits> with C++17
+template <class...>
+struct conjunction : std::true_type {};
+template <class B1>
+struct conjunction<B1> : B1 {};
+template <class B1, class... Bn>
+struct conjunction<B1, Bn...>
+    : std::conditional_t<bool(B1::value), conjunction<Bn...>, B1> {};
+
+
 }  // namespace xstd
 }  // namespace gko
 
diff --git a/include/ginkgo/core/distributed/preconditioner/schwarz.hpp b/include/ginkgo/core/distributed/preconditioner/schwarz.hpp
index fe0539570ee..f31bd96aa2e 100644
--- a/include/ginkgo/core/distributed/preconditioner/schwarz.hpp
+++ b/include/ginkgo/core/distributed/preconditioner/schwarz.hpp
@@ -95,14 +95,6 @@ class Schwarz
          * Local solver factory.
          */
         GKO_DEFERRED_FACTORY_PARAMETER(local_solver, LinOpFactory);
-
-        std::unique_ptr<Factory> on(std::shared_ptr<const Executor> exec) const
-        {
-            auto copy = *this;
-            copy.local_solver = local_solver_generator_.on(exec);
-            return copy.enable_parameters_type<parameters_type, Factory>::on(
-                exec);
-        }
     };
     GKO_ENABLE_LIN_OP_FACTORY(Schwarz, parameters, Factory);
     GKO_ENABLE_BUILD_METHOD(Factory);
diff --git a/include/ginkgo/core/preconditioner/ic.hpp b/include/ginkgo/core/preconditioner/ic.hpp
index ed5063d403b..97e7fe37871 100644
--- a/include/ginkgo/core/preconditioner/ic.hpp
+++ b/include/ginkgo/core/preconditioner/ic.hpp
@@ -145,6 +145,13 @@ class Ic : public EnableLinOp<Ic<LSolverType, IndexType>>, public Transposable {
             deferred_factory_parameter<typename l_solver_type::Factory> solver)
         {
             this->l_solver_generator = std::move(solver);
+            this->deferred_factories["l_solver"] = [](const auto& exec,
+                                                      auto& params) {
+                if (!params.l_solver_generator.is_empty()) {
+                    params.l_solver_factory =
+                        params.l_solver_generator.on(exec);
+                }
+            };
             return *this;
         }
 
@@ -159,26 +166,16 @@ class Ic : public EnableLinOp<Ic<LSolverType, IndexType>>, public Transposable {
             deferred_factory_parameter<LinOpFactory> factorization)
         {
             this->factorization_generator = std::move(factorization);
+            this->deferred_factories["factorization"] = [](const auto& exec,
+                                                           auto& params) {
+                if (!params.factorization_generator.is_empty()) {
+                    params.factorization_factory =
+                        params.factorization_generator.on(exec);
+                }
+            };
             return *this;
         }
 
-        /**
-         *
-         */
-        std::unique_ptr<Factory> on(std::shared_ptr<const Executor> exec) const
-        {
-            auto parameters_copy = *this;
-            if (l_solver_generator) {
-                parameters_copy.l_solver_factory = l_solver_generator.on(exec);
-            }
-            if (factorization_generator) {
-                parameters_copy.factorization_factory =
-                    factorization_generator.on(exec);
-            }
-            return parameters_copy
-                .enable_parameters_type<parameters_type, Factory>::on(exec);
-        }
-
     private:
         deferred_factory_parameter<typename l_solver_type::Factory>
             l_solver_generator;
diff --git a/include/ginkgo/core/preconditioner/ilu.hpp b/include/ginkgo/core/preconditioner/ilu.hpp
index f4f8d0abd5b..d0f32c18c8c 100644
--- a/include/ginkgo/core/preconditioner/ilu.hpp
+++ b/include/ginkgo/core/preconditioner/ilu.hpp
@@ -163,6 +163,13 @@ class Ilu : public EnableLinOp<
             deferred_factory_parameter<typename l_solver_type::Factory> solver)
         {
             this->l_solver_generator = std::move(solver);
+            this->deferred_factories["l_solver"] = [](const auto& exec,
+                                                      auto& params) {
+                if (!params.l_solver_generator.is_empty()) {
+                    params.l_solver_factory =
+                        params.l_solver_generator.on(exec);
+                }
+            };
             return *this;
         }
 
@@ -177,6 +184,13 @@ class Ilu : public EnableLinOp<
             deferred_factory_parameter<typename u_solver_type::Factory> solver)
         {
             this->u_solver_generator = std::move(solver);
+            this->deferred_factories["u_solver"] = [](const auto& exec,
+                                                      auto& params) {
+                if (!params.u_solver_generator.is_empty()) {
+                    params.u_solver_factory =
+                        params.u_solver_generator.on(exec);
+                }
+            };
             return *this;
         }
 
@@ -191,29 +205,16 @@ class Ilu : public EnableLinOp<
             deferred_factory_parameter<LinOpFactory> factorization)
         {
             this->factorization_generator = std::move(factorization);
+            this->deferred_factories["factorization"] = [](const auto& exec,
+                                                           auto& params) {
+                if (!params.factorization_generator.is_empty()) {
+                    params.factorization_factory =
+                        params.factorization_generator.on(exec);
+                }
+            };
             return *this;
         }
 
-        /**
-         *
-         */
-        std::unique_ptr<Factory> on(std::shared_ptr<const Executor> exec) const
-        {
-            auto parameters_copy = *this;
-            if (l_solver_generator) {
-                parameters_copy.l_solver_factory = l_solver_generator.on(exec);
-            }
-            if (u_solver_generator) {
-                parameters_copy.u_solver_factory = u_solver_generator.on(exec);
-            }
-            if (factorization_generator) {
-                parameters_copy.factorization_factory =
-                    factorization_generator.on(exec);
-            }
-            return parameters_copy
-                .enable_parameters_type<parameters_type, Factory>::on(exec);
-        }
-
     private:
         deferred_factory_parameter<typename l_solver_type::Factory>
             l_solver_generator;
diff --git a/include/ginkgo/core/solver/direct.hpp b/include/ginkgo/core/solver/direct.hpp
index dcd6fd189a6..ee6783ff96d 100644
--- a/include/ginkgo/core/solver/direct.hpp
+++ b/include/ginkgo/core/solver/direct.hpp
@@ -88,17 +88,6 @@ class Direct : public EnableLinOp<Direct<ValueType, IndexType>>,
 
         /** The factorization factory to use for generating the factors. */
         GKO_DEFERRED_FACTORY_PARAMETER(factorization, LinOpFactory);
-
-        /**
-         *
-         */
-        std::unique_ptr<Factory> on(std::shared_ptr<const Executor> exec) const
-        {
-            auto parameters_copy = *this;
-            parameters_copy.factorization = factorization_generator_.on(exec);
-            return parameters_copy
-                .enable_parameters_type<parameters_type, Factory>::on(exec);
-        }
     };
     GKO_ENABLE_LIN_OP_FACTORY(Direct, parameters, Factory);
     GKO_ENABLE_BUILD_METHOD(Factory);
diff --git a/include/ginkgo/core/solver/ir.hpp b/include/ginkgo/core/solver/ir.hpp
index 1f04c8b75d2..468e539f487 100644
--- a/include/ginkgo/core/solver/ir.hpp
+++ b/include/ginkgo/core/solver/ir.hpp
@@ -205,19 +205,6 @@ class Ir : public EnableLinOp<Ir<ValueType>>,
          */
         initial_guess_mode GKO_FACTORY_PARAMETER_SCALAR(
             default_initial_guess, initial_guess_mode::provided);
-
-        /**
-         *
-         */
-        std::unique_ptr<Factory> on(std::shared_ptr<const Executor> exec) const
-        {
-            auto parameters_copy = *this;
-            if (solver_generator_) {
-                parameters_copy.solver = solver_generator_.on(exec);
-            }
-            return parameters_copy.enable_iterative_solver_factory_parameters<
-                parameters_type, Factory>::on(exec);
-        }
     };
     GKO_ENABLE_LIN_OP_FACTORY(Ir, parameters, Factory);
     GKO_ENABLE_BUILD_METHOD(Factory);
diff --git a/include/ginkgo/core/solver/multigrid.hpp b/include/ginkgo/core/solver/multigrid.hpp
index 1256639acb4..21860844d3e 100644
--- a/include/ginkgo/core/solver/multigrid.hpp
+++ b/include/ginkgo/core/solver/multigrid.hpp
@@ -398,43 +398,6 @@ class Multigrid : public EnableLinOp<Multigrid>,
          */
         initial_guess_mode GKO_FACTORY_PARAMETER_SCALAR(
             default_initial_guess, initial_guess_mode::zero);
-
-        std::unique_ptr<Factory> on(std::shared_ptr<const Executor> exec) const
-        {
-            auto copy = *this;
-            if (!copy.mg_level_generator_.empty()) {
-                copy.mg_level.clear();
-                for (auto& generator : copy.mg_level_generator_) {
-                    copy.mg_level.push_back(generator.on(exec));
-                }
-            }
-            if (!copy.pre_smoother_generator_.empty()) {
-                copy.pre_smoother.clear();
-                for (auto& generator : copy.pre_smoother_generator_) {
-                    copy.pre_smoother.push_back(generator.on(exec));
-                }
-            }
-            if (!copy.mid_smoother_generator_.empty()) {
-                copy.mid_smoother.clear();
-                for (auto& generator : copy.mid_smoother_generator_) {
-                    copy.mid_smoother.push_back(generator.on(exec));
-                }
-            }
-            if (!copy.post_smoother_generator_.empty()) {
-                copy.post_smoother.clear();
-                for (auto& generator : copy.post_smoother_generator_) {
-                    copy.post_smoother.push_back(generator.on(exec));
-                }
-            }
-            if (!copy.coarsest_solver_generator_.empty()) {
-                copy.coarsest_solver.clear();
-                for (auto& generator : copy.coarsest_solver_generator_) {
-                    copy.coarsest_solver.push_back(generator.on(exec));
-                }
-            }
-            return copy.enable_iterative_solver_factory_parameters<
-                parameters_type, Factory>::on(exec);
-        }
     };
     GKO_ENABLE_LIN_OP_FACTORY(Multigrid, parameters, Factory);
     GKO_ENABLE_BUILD_METHOD(Factory);
diff --git a/include/ginkgo/core/solver/solver_base.hpp b/include/ginkgo/core/solver/solver_base.hpp
index f9132426c61..3888d7fe62d 100644
--- a/include/ginkgo/core/solver/solver_base.hpp
+++ b/include/ginkgo/core/solver/solver_base.hpp
@@ -861,6 +861,10 @@ class EnablePreconditionedIterativeSolver
 };
 
 
+/**
+ * The parameter type shared between all iterative solvers.
+ * @see GKO_CREATE_FACTORY_PARAMETERS
+ */
 struct iterative_solver_factory_parameters {
     /**
      * Stopping criteria to be used by the solver.
@@ -883,27 +887,18 @@ struct enable_iterative_solver_factory_parameters
         this->criterion_generators = {
             deferred_factory_parameter<stop::CriterionFactory>{
                 std::forward<Args>(value)}...};
+        this->deferred_factories["criteria"] = [](const auto& exec,
+                                                  auto& params) {
+            if (!params.criterion_generators.empty()) {
+                params.criteria.clear();
+                for (auto& generator : params.criterion_generators) {
+                    params.criteria.push_back(generator.on(exec));
+                }
+            }
+        };
         return *self();
     }
 
-    /**
-     * @copydoc enable_solver_factory_parameters<Parameters, Factory>::on
-     *
-     * @note This variant instantiates stopping criteria that were provided
-     *       without calling `.on(exec)` before generating the factory.
-     */
-    std::unique_ptr<Factory> on(std::shared_ptr<const Executor> exec) const
-    {
-        auto copy = *self();
-        copy.criteria.clear();
-        for (auto& generator : criterion_generators) {
-            copy.criteria.push_back(generator.on(exec));
-        }
-        auto factory =
-            copy.enable_parameters_type<Parameters, Factory>::on(exec);
-        return factory;
-    }
-
 private:
     GKO_ENABLE_SELF(Parameters);
 
@@ -912,6 +907,11 @@ struct enable_iterative_solver_factory_parameters
 };
 
 
+/**
+ * The parameter type shared between all preconditioned iterative solvers,
+ * excluding the parameters available in iterative_solver_factory_parameters.
+ * @see GKO_CREATE_FACTORY_PARAMETERS
+ */
 struct preconditioned_iterative_solver_factory_parameters {
     /**
      * The preconditioner to be used by the iterative solver. By default, no
@@ -932,17 +932,28 @@ struct enable_preconditioned_iterative_solver_factory_parameters
     : enable_iterative_solver_factory_parameters<Parameters, Factory>,
       preconditioned_iterative_solver_factory_parameters {
     /**
-     *
+     * Provides a preconditioner factory to be used by the iterative solver in a
+     * fluent interface.
+     * @see preconditioned_iterative_solver_factory_parameters::preconditioner
      */
     Parameters& with_preconditioner(
         deferred_factory_parameter<LinOpFactory> preconditioner)
     {
         this->preconditioner_generator = std::move(preconditioner);
+        this->deferred_factories["preconditioner"] = [](const auto& exec,
+                                                        auto& params) {
+            if (!params.preconditioner_generator.is_empty()) {
+                params.preconditioner =
+                    params.preconditioner_generator.on(exec);
+            }
+        };
         return *self();
     }
 
     /**
-     *
+     * Provides a concrete preconditioner to be used by the iterative solver in
+     * a fluent interface.
+     * @see preconditioned_iterative_solver_factory_parameters::preconditioner
      */
     Parameters& with_generated_preconditioner(
         std::shared_ptr<const LinOp> generated_preconditioner)
@@ -951,19 +962,6 @@ struct enable_preconditioned_iterative_solver_factory_parameters
         return *self();
     }
 
-    /**
-     *
-     */
-    std::unique_ptr<Factory> on(std::shared_ptr<const Executor> exec) const
-    {
-        auto parameters_copy = *self();
-        if (preconditioner_generator) {
-            parameters_copy.preconditioner = preconditioner_generator.on(exec);
-        }
-        return parameters_copy.enable_iterative_solver_factory_parameters<
-            Parameters, Factory>::on(exec);
-    }
-
 private:
     GKO_ENABLE_SELF(Parameters);
 

From 6fb2e4ad0564161cb64d3ded7a3143e87d27e9b3 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Mon, 25 Sep 2023 18:06:02 +0200
Subject: [PATCH 316/583] add missing documentation

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 include/ginkgo/core/base/abstract_factory.hpp | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/include/ginkgo/core/base/abstract_factory.hpp b/include/ginkgo/core/base/abstract_factory.hpp
index 341340a5db2..cca440afe6c 100644
--- a/include/ginkgo/core/base/abstract_factory.hpp
+++ b/include/ginkgo/core/base/abstract_factory.hpp
@@ -279,6 +279,12 @@ class enable_parameters_type {
      */
     std::vector<std::shared_ptr<const log::Logger>> loggers{};
 
+    /**
+     * Deferred factory parameter initialization functions that will be called
+     * in on(). Their names usually correspond to the variable names in the
+     * parameter type. They will be provided the executor and the parameter
+     * object currently being initialized from the generators.
+     */
     std::unordered_map<std::string,
                        std::function<void(std::shared_ptr<const Executor> exec,
                                           ConcreteParametersType&)>>
@@ -318,9 +324,10 @@ public:                                                                 \
 template <typename FactoryType>
 class deferred_factory_parameter {
 public:
+    /** Creates an empty deferred factory parameter. */
     deferred_factory_parameter() = default;
 
-    /** Creates an empty deferred factory parameter. */
+    /** Creates a deferred factory parameter returning a nullptr. */
     deferred_factory_parameter(std::nullptr_t)
     {
         generator_ = [](std::shared_ptr<const Executor>) { return nullptr; };
@@ -373,7 +380,10 @@ class deferred_factory_parameter {
         };
     }
 
-    /** Instantiates the deferred parameter into an actual factory. */
+    /**
+     * Instantiates the deferred parameter into an actual factory. This will
+     * throw if the deferred factory parameter is empty.
+     */
     std::shared_ptr<const FactoryType> on(
         std::shared_ptr<const Executor> exec) const
     {

From d752e1b48671b6b6bcfd54ab6fd6d679cc1ca778 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Fri, 6 Oct 2023 18:07:15 +0200
Subject: [PATCH 317/583] review updates

- remove additional .on(...) calls
- add tests for old functionality
- add assertions for dynamic type

Co-authored-by: Pratik Nayak <pratik.nayak@kit.edu>
---
 .../distributed/preconditioner/schwarz.cpp    |  13 +++
 core/test/preconditioner/ic.cpp               |  41 +++++++
 core/test/preconditioner/ilu.cpp              |  46 ++++++++
 core/test/solver/CMakeLists.txt               |   1 +
 core/test/solver/bicg.cpp                     |  35 ++++--
 core/test/solver/bicgstab.cpp                 |  46 ++++----
 core/test/solver/cb_gmres.cpp                 |  59 +++++-----
 core/test/solver/cg.cpp                       |  53 +++++----
 core/test/solver/cgs.cpp                      |  53 +++++----
 core/test/solver/direct.cpp                   | 105 ++++++++++++++++++
 core/test/solver/fcg.cpp                      |  53 +++++----
 core/test/solver/gcr.cpp                      |  49 ++++----
 core/test/solver/gmres.cpp                    |  49 ++++----
 core/test/solver/idr.cpp                      |  58 +++++-----
 core/test/solver/ir.cpp                       |  77 ++++++-------
 core/test/solver/multigrid.cpp                |  28 +++++
 core/test/solver/workspace.cpp                |   6 +-
 core/test/utils/assertions.hpp                |  88 +++++++++++++++
 .../performance-debugging.cpp                 |   3 +-
 reference/test/matrix/csr_kernels.cpp         |   5 +-
 reference/test/solver/cb_gmres_kernels.cpp    |  41 +++----
 reference/test/solver/gcr_kernels.cpp         |  21 ++--
 reference/test/solver/gmres_kernels.cpp       |  39 +++----
 reference/test/solver/ir_kernels.cpp          |  47 ++++----
 test/matrix/matrix.cpp                        |   2 +-
 25 files changed, 675 insertions(+), 343 deletions(-)
 create mode 100644 core/test/solver/direct.cpp

diff --git a/core/test/mpi/distributed/preconditioner/schwarz.cpp b/core/test/mpi/distributed/preconditioner/schwarz.cpp
index 5c354b11748..16b0af91b74 100644
--- a/core/test/mpi/distributed/preconditioner/schwarz.cpp
+++ b/core/test/mpi/distributed/preconditioner/schwarz.cpp
@@ -160,4 +160,17 @@ TYPED_TEST(SchwarzFactory, CanBeCleared)
 }
 
 
+TYPED_TEST(SchwarzFactory, PassExplicitFactory)
+{
+    using Jacobi = typename TestFixture::Jacobi;
+    using Schwarz = typename TestFixture::Schwarz;
+    auto jacobi_factory = gko::share(Jacobi::build().on(this->exec));
+
+    auto factory =
+        Schwarz::build().with_local_solver(jacobi_factory).on(this->exec);
+
+    ASSERT_EQ(factory->get_parameters().local_solver, jacobi_factory);
+}
+
+
 }  // namespace
diff --git a/core/test/preconditioner/ic.cpp b/core/test/preconditioner/ic.cpp
index 9e1e3f3e3c4..dfcb5e5af3f 100644
--- a/core/test/preconditioner/ic.cpp
+++ b/core/test/preconditioner/ic.cpp
@@ -33,6 +33,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/preconditioner/ic.hpp>
 
 
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
+#endif
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 5211, 4973, 4974)
+#endif
+
+
 #include <memory>
 
 
@@ -44,6 +55,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/solver/bicgstab.hpp>
 
 
+#include "core/test/utils.hpp"
+
+
 namespace {
 
 
@@ -95,4 +109,31 @@ TEST_F(IcFactory, CanSetFactorizationFactory)
 }
 
 
+TEST_F(IcFactory, DeprecatedFactoryParameter)
+{
+    auto ilu_factory = ic_prec_type::build()
+                           .with_l_solver_factory(this->l_factory)
+                           .with_factorization_factory(this->fact_factory)
+                           .on(this->exec);
+
+    ASSERT_EQ(ilu_factory->get_parameters().l_solver_factory, this->l_factory);
+    ASSERT_EQ(ilu_factory->get_parameters().factorization_factory,
+              this->fact_factory);
+}
+
+
+TEST_F(IcFactory, DeferredFactoryParameter)
+{
+    auto ic_factory = ic_prec_type::build()
+                          .with_l_solver(solver_type::build())
+                          .with_factorization(ic_type::build())
+                          .on(this->exec);
+
+    GKO_ASSERT_DYNAMIC_TYPE(ic_factory->get_parameters().l_solver_factory,
+                            solver_type::Factory);
+    GKO_ASSERT_DYNAMIC_TYPE(ic_factory->get_parameters().factorization_factory,
+                            ic_type::Factory);
+}
+
+
 }  // namespace
diff --git a/core/test/preconditioner/ilu.cpp b/core/test/preconditioner/ilu.cpp
index f25a20b47e3..dec3c8532d2 100644
--- a/core/test/preconditioner/ilu.cpp
+++ b/core/test/preconditioner/ilu.cpp
@@ -33,6 +33,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/preconditioner/ilu.hpp>
 
 
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
+#endif
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 5211, 4973, 4974)
+#endif
+
+
 #include <memory>
 
 
@@ -44,6 +55,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/solver/bicgstab.hpp>
 
 
+#include "core/test/utils.hpp"
+
+
 namespace {
 
 
@@ -108,4 +122,36 @@ TEST_F(IluFactory, CanSetFactorizationFactory)
 }
 
 
+TEST_F(IluFactory, DeprecatedFactoryParameter)
+{
+    auto ilu_factory = ilu_prec_type::build()
+                           .with_l_solver_factory(this->l_factory)
+                           .with_u_solver_factory(this->u_factory)
+                           .with_factorization_factory(this->fact_factory)
+                           .on(this->exec);
+
+    ASSERT_EQ(ilu_factory->get_parameters().l_solver_factory, this->l_factory);
+    ASSERT_EQ(ilu_factory->get_parameters().u_solver_factory, this->u_factory);
+    ASSERT_EQ(ilu_factory->get_parameters().factorization_factory,
+              this->fact_factory);
+}
+
+
+TEST_F(IluFactory, DeferredFactoryParameter)
+{
+    auto ilu_factory = ilu_prec_type::build()
+                           .with_l_solver(l_solver_type::build())
+                           .with_u_solver(u_solver_type::build())
+                           .with_factorization(ilu_type::build())
+                           .on(this->exec);
+
+    GKO_ASSERT_DYNAMIC_TYPE(ilu_factory->get_parameters().l_solver_factory,
+                            l_solver_type::Factory);
+    GKO_ASSERT_DYNAMIC_TYPE(ilu_factory->get_parameters().u_solver_factory,
+                            u_solver_type::Factory);
+    GKO_ASSERT_DYNAMIC_TYPE(ilu_factory->get_parameters().factorization_factory,
+                            ilu_type::Factory);
+}
+
+
 }  // namespace
diff --git a/core/test/solver/CMakeLists.txt b/core/test/solver/CMakeLists.txt
index 4ca8763e2ee..f4e6b2e5b7b 100644
--- a/core/test/solver/CMakeLists.txt
+++ b/core/test/solver/CMakeLists.txt
@@ -2,6 +2,7 @@ ginkgo_create_test(bicg)
 ginkgo_create_test(bicgstab)
 ginkgo_create_test(cg)
 ginkgo_create_test(cgs)
+ginkgo_create_test(direct)
 ginkgo_create_test(fcg)
 ginkgo_create_test(gcr)
 ginkgo_create_test(gmres)
diff --git a/core/test/solver/bicg.cpp b/core/test/solver/bicg.cpp
index 9e49b118484..c13070fad1e 100644
--- a/core/test/solver/bicg.cpp
+++ b/core/test/solver/bicg.cpp
@@ -164,18 +164,12 @@ TYPED_TEST(Bicg, CanSetPreconditionerGenerator)
     using value_type = typename TestFixture::value_type;
     auto bicg_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec),
-                gko::stop::ResidualNorm<value_type>::build()
-                    .with_reduction_factor(
-                        gko::remove_complex<value_type>(1e-6))
-                    .on(this->exec))
-            .with_preconditioner(
-                Solver::build()
-                    .with_criteria(
-                        gko::stop::Iteration::build().with_max_iters(3u).on(
-                            this->exec))
-                    .on(this->exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u),
+                           gko::stop::ResidualNorm<value_type>::build()
+                               .with_reduction_factor(
+                                   gko::remove_complex<value_type>(1e-6)))
+            .with_preconditioner(Solver::build().with_criteria(
+                gko::stop::Iteration::build().with_max_iters(3u)))
             .on(this->exec);
     auto solver = bicg_factory->generate(this->mtx);
     auto precond = dynamic_cast<const gko::solver::Bicg<value_type>*>(
@@ -291,4 +285,21 @@ TYPED_TEST(Bicg, CanSetPreconditioner)
 }
 
 
+TYPED_TEST(Bicg, PassExplicitFactory)
+{
+    using Solver = typename TestFixture::Solver;
+    auto stop_factory = gko::share(
+        gko::stop::Iteration::build().with_max_iters(1u).on(this->exec));
+    auto precond_factory = gko::share(Solver::build().on(this->exec));
+
+    auto factory = Solver::build()
+                       .with_criteria(stop_factory)
+                       .with_preconditioner(precond_factory)
+                       .on(this->exec);
+
+    ASSERT_EQ(factory->get_parameters().criteria.front(), stop_factory);
+    ASSERT_EQ(factory->get_parameters().preconditioner, precond_factory);
+}
+
+
 }  // namespace
diff --git a/core/test/solver/bicgstab.cpp b/core/test/solver/bicgstab.cpp
index d5b489feff9..b420ccfc49e 100644
--- a/core/test/solver/bicgstab.cpp
+++ b/core/test/solver/bicgstab.cpp
@@ -159,14 +159,9 @@ TYPED_TEST(Bicgstab, CanSetPreconditionerGenerator)
     using value_type = typename TestFixture::value_type;
     auto bicgstab_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
-            .with_preconditioner(
-                Solver::build()
-                    .with_criteria(
-                        gko::stop::Iteration::build().with_max_iters(3u).on(
-                            this->exec))
-                    )
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
+            .with_preconditioner(Solver::build().with_criteria(
+                gko::stop::Iteration::build().with_max_iters(3u)))
             .on(this->exec);
 
     auto solver = bicgstab_factory->generate(this->mtx);
@@ -207,15 +202,13 @@ TYPED_TEST(Bicgstab, CanSetPreconditionerInFactory)
     using Solver = typename TestFixture::Solver;
     std::shared_ptr<Solver> bicgstab_precond =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto bicgstab_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(bicgstab_precond)
             .on(this->exec);
     auto solver = bicgstab_factory->generate(this->mtx);
@@ -234,15 +227,13 @@ TYPED_TEST(Bicgstab, ThrowsOnWrongPreconditionerInFactory)
         Mtx::create(this->exec, gko::dim<2>{2, 2});
     std::shared_ptr<Solver> bicgstab_precond =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(wrong_sized_mtx);
 
     auto bicgstab_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(bicgstab_precond)
             .on(this->exec);
 
@@ -267,15 +258,13 @@ TYPED_TEST(Bicgstab, CanSetPreconditioner)
     using Solver = typename TestFixture::Solver;
     std::shared_ptr<Solver> bicgstab_precond =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto bicgstab_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec);
     auto solver = bicgstab_factory->generate(this->mtx);
     solver->set_preconditioner(bicgstab_precond);
@@ -286,4 +275,21 @@ TYPED_TEST(Bicgstab, CanSetPreconditioner)
 }
 
 
+TYPED_TEST(Bicgstab, PassExplicitFactory)
+{
+    using Solver = typename TestFixture::Solver;
+    auto stop_factory = gko::share(
+        gko::stop::Iteration::build().with_max_iters(1u).on(this->exec));
+    auto precond_factory = gko::share(Solver::build().on(this->exec));
+
+    auto factory = Solver::build()
+                       .with_criteria(stop_factory)
+                       .with_preconditioner(precond_factory)
+                       .on(this->exec);
+
+    ASSERT_EQ(factory->get_parameters().criteria.front(), stop_factory);
+    ASSERT_EQ(factory->get_parameters().preconditioner, precond_factory);
+}
+
+
 }  // namespace
diff --git a/core/test/solver/cb_gmres.cpp b/core/test/solver/cb_gmres.cpp
index 5f6076f248c..434544b3ca2 100644
--- a/core/test/solver/cb_gmres.cpp
+++ b/core/test/solver/cb_gmres.cpp
@@ -223,18 +223,12 @@ TYPED_TEST(CbGmres, CanSetPreconditionerGenerator)
     using Solver = typename TestFixture::Solver;
     auto cb_gmres_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u).on(this->exec),
-                gko::stop::ResidualNorm<value_type>::build()
-                    .with_baseline(gko::stop::mode::initial_resnorm)
-                    .with_reduction_factor(nc_value_type{1e-6})
-                    .on(this->exec))
-            .with_preconditioner(
-                Solver::build()
-                    .with_criteria(
-                        gko::stop::Iteration::build().with_max_iters(3u).on(
-                            this->exec))
-                    .on(this->exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u),
+                           gko::stop::ResidualNorm<value_type>::build()
+                               .with_baseline(gko::stop::mode::initial_resnorm)
+                               .with_reduction_factor(nc_value_type{1e-6}))
+            .with_preconditioner(Solver::build().with_criteria(
+                gko::stop::Iteration::build().with_max_iters(3u)))
             .on(this->exec);
     auto solver = cb_gmres_factory->generate(this->mtx);
     auto precond =
@@ -256,8 +250,7 @@ TYPED_TEST(CbGmres, CanSetKrylovDim)
     auto cb_gmres_factory =
         Solver::build()
             .with_krylov_dim(new_krylov_dim)
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(4u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(4u))
             .on(this->exec);
     auto solver = cb_gmres_factory->generate(this->mtx);
 
@@ -276,8 +269,7 @@ TYPED_TEST(CbGmres, CanUseSetKrylovDim)
     const gko::size_type new_krylov_dim{40u};
     auto cb_gmres_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(4u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(4u))
             .on(this->exec);
     auto solver = cb_gmres_factory->generate(this->mtx);
 
@@ -295,15 +287,13 @@ TYPED_TEST(CbGmres, CanSetPreconditionerInFactory)
     using Solver = typename TestFixture::Solver;
     std::shared_ptr<Solver> cb_gmres_precond =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto cb_gmres_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(cb_gmres_precond)
             .on(this->exec);
     auto solver = cb_gmres_factory->generate(this->mtx);
@@ -322,15 +312,13 @@ TYPED_TEST(CbGmres, ThrowsOnWrongPreconditionerInFactory)
         Mtx::create(this->exec, gko::dim<2>{2, 2});
     std::shared_ptr<Solver> cb_gmres_precond =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(wrong_sized_mtx);
 
     auto cb_gmres_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(cb_gmres_precond)
             .on(this->exec);
 
@@ -343,15 +331,13 @@ TYPED_TEST(CbGmres, CanSetPreconditioner)
     using Solver = typename TestFixture::Solver;
     std::shared_ptr<Solver> cb_gmres_precond =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto cb_gmres_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec);
     auto solver = cb_gmres_factory->generate(this->mtx);
     solver->set_preconditioner(cb_gmres_precond);
@@ -362,4 +348,21 @@ TYPED_TEST(CbGmres, CanSetPreconditioner)
 }
 
 
+TYPED_TEST(CbGmres, PassExplicitFactory)
+{
+    using Solver = typename TestFixture::Solver;
+    auto stop_factory = gko::share(
+        gko::stop::Iteration::build().with_max_iters(1u).on(this->exec));
+    auto precond_factory = gko::share(Solver::build().on(this->exec));
+
+    auto factory = Solver::build()
+                       .with_criteria(stop_factory)
+                       .with_preconditioner(precond_factory)
+                       .on(this->exec);
+
+    ASSERT_EQ(factory->get_parameters().criteria.front(), stop_factory);
+    ASSERT_EQ(factory->get_parameters().preconditioner, precond_factory);
+}
+
+
 }  // namespace
diff --git a/core/test/solver/cg.cpp b/core/test/solver/cg.cpp
index d1d7dbee344..f94694e775b 100644
--- a/core/test/solver/cg.cpp
+++ b/core/test/solver/cg.cpp
@@ -163,18 +163,12 @@ TYPED_TEST(Cg, CanSetPreconditionerGenerator)
     using value_type = typename TestFixture::value_type;
     auto cg_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u),
-                gko::stop::ResidualNorm<value_type>::build()
-                    .with_reduction_factor(
-                        gko::remove_complex<value_type>(1e-6))
-                    )
-            .with_preconditioner(
-                Solver::build()
-                    .with_criteria(
-                        gko::stop::Iteration::build().with_max_iters(3u).on(
-                            this->exec))
-                    )
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u),
+                           gko::stop::ResidualNorm<value_type>::build()
+                               .with_reduction_factor(
+                                   gko::remove_complex<value_type>(1e-6)))
+            .with_preconditioner(Solver::build().with_criteria(
+                gko::stop::Iteration::build().with_max_iters(3u)))
             .on(this->exec);
     auto solver = cg_factory->generate(this->mtx);
     auto precond = dynamic_cast<const gko::solver::Cg<value_type>*>(
@@ -193,15 +187,13 @@ TYPED_TEST(Cg, CanSetPreconditionerInFactory)
     using Solver = typename TestFixture::Solver;
     std::shared_ptr<Solver> cg_precond =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto cg_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(cg_precond)
             .on(this->exec);
     auto solver = cg_factory->generate(this->mtx);
@@ -244,15 +236,13 @@ TYPED_TEST(Cg, ThrowsOnWrongPreconditionerInFactory)
         Mtx::create(this->exec, gko::dim<2>{2, 2});
     std::shared_ptr<Solver> cg_precond =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(wrong_sized_mtx);
 
     auto cg_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(cg_precond)
             .on(this->exec);
 
@@ -277,15 +267,13 @@ TYPED_TEST(Cg, CanSetPreconditioner)
     using Solver = typename TestFixture::Solver;
     std::shared_ptr<Solver> cg_precond =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto cg_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec);
     auto solver = cg_factory->generate(this->mtx);
     solver->set_preconditioner(cg_precond);
@@ -296,4 +284,21 @@ TYPED_TEST(Cg, CanSetPreconditioner)
 }
 
 
+TYPED_TEST(Cg, PassExplicitFactory)
+{
+    using Solver = typename TestFixture::Solver;
+    auto stop_factory = gko::share(
+        gko::stop::Iteration::build().with_max_iters(1u).on(this->exec));
+    auto precond_factory = gko::share(Solver::build().on(this->exec));
+
+    auto factory = Solver::build()
+                       .with_criteria(stop_factory)
+                       .with_preconditioner(precond_factory)
+                       .on(this->exec);
+
+    ASSERT_EQ(factory->get_parameters().criteria.front(), stop_factory);
+    ASSERT_EQ(factory->get_parameters().preconditioner, precond_factory);
+}
+
+
 }  // namespace
diff --git a/core/test/solver/cgs.cpp b/core/test/solver/cgs.cpp
index 705e9f850c8..6216899d898 100644
--- a/core/test/solver/cgs.cpp
+++ b/core/test/solver/cgs.cpp
@@ -163,18 +163,12 @@ TYPED_TEST(Cgs, CanSetPreconditionerGenerator)
     using value_type = typename TestFixture::value_type;
     auto cgs_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u),
-                gko::stop::ResidualNorm<value_type>::build()
-                    .with_reduction_factor(
-                        gko::remove_complex<value_type>(1e-6))
-                    )
-            .with_preconditioner(
-                Solver::build()
-                    .with_criteria(
-                        gko::stop::Iteration::build().with_max_iters(3u).on(
-                            this->exec))
-                    )
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u),
+                           gko::stop::ResidualNorm<value_type>::build()
+                               .with_reduction_factor(
+                                   gko::remove_complex<value_type>(1e-6)))
+            .with_preconditioner(Solver::build().with_criteria(
+                gko::stop::Iteration::build().with_max_iters(3u)))
             .on(this->exec);
     auto solver = cgs_factory->generate(this->mtx);
     auto precond = dynamic_cast<const gko::solver::Cgs<value_type>*>(
@@ -217,15 +211,13 @@ TYPED_TEST(Cgs, CanSetPreconditionerInFactory)
     using Solver = typename TestFixture::Solver;
     std::shared_ptr<Solver> cgs_precond =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto cgs_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(cgs_precond)
             .on(this->exec);
     auto solver = cgs_factory->generate(this->mtx);
@@ -244,15 +236,13 @@ TYPED_TEST(Cgs, ThrowsOnWrongPreconditionerInFactory)
         Mtx::create(this->exec, gko::dim<2>{2, 2});
     std::shared_ptr<Solver> cgs_precond =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(wrong_sized_mtx);
 
     auto cgs_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(cgs_precond)
             .on(this->exec);
 
@@ -277,15 +267,13 @@ TYPED_TEST(Cgs, CanSetPreconditioner)
     using Solver = typename TestFixture::Solver;
     std::shared_ptr<Solver> cgs_precond =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto cgs_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec);
     auto solver = cgs_factory->generate(this->mtx);
     solver->set_preconditioner(cgs_precond);
@@ -296,4 +284,21 @@ TYPED_TEST(Cgs, CanSetPreconditioner)
 }
 
 
+TYPED_TEST(Cgs, PassExplicitFactory)
+{
+    using Solver = typename TestFixture::Solver;
+    auto stop_factory = gko::share(
+        gko::stop::Iteration::build().with_max_iters(1u).on(this->exec));
+    auto precond_factory = gko::share(Solver::build().on(this->exec));
+
+    auto factory = Solver::build()
+                       .with_criteria(stop_factory)
+                       .with_preconditioner(precond_factory)
+                       .on(this->exec);
+
+    ASSERT_EQ(factory->get_parameters().criteria.front(), stop_factory);
+    ASSERT_EQ(factory->get_parameters().preconditioner, precond_factory);
+}
+
+
 }  // namespace
diff --git a/core/test/solver/direct.cpp b/core/test/solver/direct.cpp
new file mode 100644
index 00000000000..a4110c8c18d
--- /dev/null
+++ b/core/test/solver/direct.cpp
@@ -0,0 +1,105 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/solver/direct.hpp>
+
+
+#include <memory>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/factorization/lu.hpp>
+
+
+#include "core/test/utils.hpp"
+
+
+namespace {
+
+
+template <typename ValueIndexType>
+class Direct : public ::testing::Test {
+protected:
+    using value_type =
+        typename std::tuple_element<0, decltype(ValueIndexType())>::type;
+    using index_type =
+        typename std::tuple_element<1, decltype(ValueIndexType())>::type;
+    using Solver = gko::experimental::solver::Direct<value_type, index_type>;
+    using Lu = gko::experimental::factorization::Lu<value_type, index_type>;
+
+    Direct()
+        : exec(gko::ReferenceExecutor::create()),
+          factory(Solver::build().with_factorization(Lu::build()).on(exec))
+    {}
+
+    std::shared_ptr<const gko::Executor> exec;
+    std::unique_ptr<typename Solver::Factory> factory;
+};
+
+TYPED_TEST_SUITE(Direct, gko::test::ValueIndexTypes, PairTypenameNameGenerator);
+
+
+TYPED_TEST(Direct, FactoryKnowsItsExecutor)
+{
+    ASSERT_EQ(this->factory->get_executor(), this->exec);
+}
+
+
+TYPED_TEST(Direct, ThrowsOnRectangularMatrixInFactory)
+{
+    using Mtx = gko::matrix::Csr<typename TestFixture::value_type,
+                                 typename TestFixture::index_type>;
+    std::shared_ptr<Mtx> rectangular_matrix =
+        Mtx::create(this->exec, gko::dim<2>{1, 2}, 0);
+
+    ASSERT_THROW(this->factory->generate(rectangular_matrix),
+                 gko::DimensionMismatch);
+}
+
+
+TYPED_TEST(Direct, PassExplicitFactory)
+{
+    using Solver = typename TestFixture::Solver;
+    using Lu = typename TestFixture::Lu;
+    auto lu_factory = gko::share(Lu::build().on(this->exec));
+
+    auto factory =
+        Solver::build().with_factorization(lu_factory).on(this->exec);
+
+    ASSERT_EQ(factory->get_parameters().factorization, lu_factory);
+}
+
+
+}  // namespace
diff --git a/core/test/solver/fcg.cpp b/core/test/solver/fcg.cpp
index 4ba3f389ecd..87f27c2bacd 100644
--- a/core/test/solver/fcg.cpp
+++ b/core/test/solver/fcg.cpp
@@ -162,18 +162,12 @@ TYPED_TEST(Fcg, CanSetPreconditionerGenerator)
     using value_type = typename TestFixture::value_type;
     auto fcg_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u),
-                gko::stop::ResidualNorm<value_type>::build()
-                    .with_reduction_factor(
-                        gko::remove_complex<value_type>(1e-6))
-                    )
-            .with_preconditioner(
-                Solver::build()
-                    .with_criteria(
-                        gko::stop::Iteration::build().with_max_iters(3u).on(
-                            this->exec))
-                    )
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u),
+                           gko::stop::ResidualNorm<value_type>::build()
+                               .with_reduction_factor(
+                                   gko::remove_complex<value_type>(1e-6)))
+            .with_preconditioner(Solver::build().with_criteria(
+                gko::stop::Iteration::build().with_max_iters(3u)))
             .on(this->exec);
     auto solver = fcg_factory->generate(this->mtx);
     auto precond = dynamic_cast<const gko::solver::Fcg<value_type>*>(
@@ -216,15 +210,13 @@ TYPED_TEST(Fcg, CanSetPreconditionerInFactory)
     using Solver = typename TestFixture::Solver;
     std::shared_ptr<Solver> fcg_precond =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto fcg_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(fcg_precond)
             .on(this->exec);
     auto solver = fcg_factory->generate(this->mtx);
@@ -243,15 +235,13 @@ TYPED_TEST(Fcg, ThrowsOnWrongPreconditionerInFactory)
         Mtx::create(this->exec, gko::dim<2>{2, 2});
     std::shared_ptr<Solver> fcg_precond =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(wrong_sized_mtx);
 
     auto fcg_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(fcg_precond)
             .on(this->exec);
 
@@ -276,15 +266,13 @@ TYPED_TEST(Fcg, CanSetPreconditioner)
     using Solver = typename TestFixture::Solver;
     std::shared_ptr<Solver> fcg_precond =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto fcg_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec);
     auto solver = fcg_factory->generate(this->mtx);
     solver->set_preconditioner(fcg_precond);
@@ -295,4 +283,21 @@ TYPED_TEST(Fcg, CanSetPreconditioner)
 }
 
 
+TYPED_TEST(Fcg, PassExplicitFactory)
+{
+    using Solver = typename TestFixture::Solver;
+    auto stop_factory = gko::share(
+        gko::stop::Iteration::build().with_max_iters(1u).on(this->exec));
+    auto precond_factory = gko::share(Solver::build().on(this->exec));
+
+    auto factory = Solver::build()
+                       .with_criteria(stop_factory)
+                       .with_preconditioner(precond_factory)
+                       .on(this->exec);
+
+    ASSERT_EQ(factory->get_parameters().criteria.front(), stop_factory);
+    ASSERT_EQ(factory->get_parameters().preconditioner, precond_factory);
+}
+
+
 }  // namespace
diff --git a/core/test/solver/gcr.cpp b/core/test/solver/gcr.cpp
index 554d5aa9526..4c08863f09b 100644
--- a/core/test/solver/gcr.cpp
+++ b/core/test/solver/gcr.cpp
@@ -196,14 +196,9 @@ TYPED_TEST(Gcr, CanSetPreconditionerGenerator)
             .with_criteria(
                 gko::stop::Iteration::build().with_max_iters(3u),
                 gko::stop::ResidualNorm<value_type>::build()
-                    .with_reduction_factor(TestFixture::reduction_factor)
-                    )
-            .with_preconditioner(
-                Solver::build()
-                    .with_criteria(
-                        gko::stop::Iteration::build().with_max_iters(3u).on(
-                            this->exec))
-                    )
+                    .with_reduction_factor(TestFixture::reduction_factor))
+            .with_preconditioner(Solver::build().with_criteria(
+                gko::stop::Iteration::build().with_max_iters(3u)))
             .on(this->exec);
     auto solver = gcr_factory->generate(this->mtx);
     auto precond = dynamic_cast<const gko::solver::Gcr<value_type>*>(
@@ -251,8 +246,7 @@ TYPED_TEST(Gcr, CanSetKrylovDim)
             .with_criteria(
                 gko::stop::Iteration::build().with_max_iters(4u),
                 gko::stop::ResidualNorm<value_type>::build()
-                    .with_reduction_factor(TestFixture::reduction_factor)
-                    )
+                    .with_reduction_factor(TestFixture::reduction_factor))
             .on(this->exec);
     auto solver = gcr_factory->generate(this->mtx);
     auto krylov_dim = solver->get_krylov_dim();
@@ -285,15 +279,13 @@ TYPED_TEST(Gcr, CanSetPreconditionerInFactory)
     using Solver = typename TestFixture::Solver;
     std::shared_ptr<Solver> gcr_precond =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto gcr_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(gcr_precond)
             .on(this->exec);
     auto solver = gcr_factory->generate(this->mtx);
@@ -312,15 +304,13 @@ TYPED_TEST(Gcr, ThrowsOnWrongPreconditionerInFactory)
         Mtx::create(this->exec, gko::dim<2>{2, 2});
     std::shared_ptr<Solver> gcr_precond =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(wrong_sized_mtx);
 
     auto gcr_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(gcr_precond)
             .on(this->exec);
 
@@ -345,15 +335,13 @@ TYPED_TEST(Gcr, CanSetPreconditioner)
     using Solver = typename TestFixture::Solver;
     std::shared_ptr<Solver> gcr_precond =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto gcr_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec);
     auto solver = gcr_factory->generate(this->mtx);
     solver->set_preconditioner(gcr_precond);
@@ -364,4 +352,21 @@ TYPED_TEST(Gcr, CanSetPreconditioner)
 }
 
 
+TYPED_TEST(Gcr, PassExplicitFactory)
+{
+    using Solver = typename TestFixture::Solver;
+    auto stop_factory = gko::share(
+        gko::stop::Iteration::build().with_max_iters(1u).on(this->exec));
+    auto precond_factory = gko::share(Solver::build().on(this->exec));
+
+    auto factory = Solver::build()
+                       .with_criteria(stop_factory)
+                       .with_preconditioner(precond_factory)
+                       .on(this->exec);
+
+    ASSERT_EQ(factory->get_parameters().criteria.front(), stop_factory);
+    ASSERT_EQ(factory->get_parameters().preconditioner, precond_factory);
+}
+
+
 }  // namespace
diff --git a/core/test/solver/gmres.cpp b/core/test/solver/gmres.cpp
index c2d62b3bb45..2464bb7273d 100644
--- a/core/test/solver/gmres.cpp
+++ b/core/test/solver/gmres.cpp
@@ -182,14 +182,9 @@ TYPED_TEST(Gmres, CanSetPreconditionerGenerator)
             .with_criteria(
                 gko::stop::Iteration::build().with_max_iters(3u),
                 gko::stop::ResidualNorm<value_type>::build()
-                    .with_reduction_factor(TestFixture::reduction_factor)
-                    )
-            .with_preconditioner(
-                Solver::build()
-                    .with_criteria(
-                        gko::stop::Iteration::build().with_max_iters(3u).on(
-                            this->exec))
-                    )
+                    .with_reduction_factor(TestFixture::reduction_factor))
+            .with_preconditioner(Solver::build().with_criteria(
+                gko::stop::Iteration::build().with_max_iters(3u)))
             .on(this->exec);
     auto solver = gmres_factory->generate(this->mtx);
     auto precond = dynamic_cast<const gko::solver::Gmres<value_type>*>(
@@ -238,8 +233,7 @@ TYPED_TEST(Gmres, CanSetKrylovDim)
             .with_criteria(
                 gko::stop::Iteration::build().with_max_iters(4u),
                 gko::stop::ResidualNorm<value_type>::build()
-                    .with_reduction_factor(TestFixture::reduction_factor)
-                    )
+                    .with_reduction_factor(TestFixture::reduction_factor))
             .on(this->exec);
     auto solver = gmres_factory->generate(this->mtx);
     auto krylov_dim = solver->get_krylov_dim();
@@ -272,15 +266,13 @@ TYPED_TEST(Gmres, CanSetPreconditionerInFactory)
     using Solver = typename TestFixture::Solver;
     std::shared_ptr<Solver> gmres_precond =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto gmres_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(gmres_precond)
             .on(this->exec);
     auto solver = gmres_factory->generate(this->mtx);
@@ -299,15 +291,13 @@ TYPED_TEST(Gmres, ThrowsOnWrongPreconditionerInFactory)
         Mtx::create(this->exec, gko::dim<2>{2, 2});
     std::shared_ptr<Solver> gmres_precond =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(wrong_sized_mtx);
 
     auto gmres_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(gmres_precond)
             .on(this->exec);
 
@@ -332,15 +322,13 @@ TYPED_TEST(Gmres, CanSetPreconditioner)
     using Solver = typename TestFixture::Solver;
     std::shared_ptr<Solver> gmres_precond =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto gmres_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec);
     auto solver = gmres_factory->generate(this->mtx);
     solver->set_preconditioner(gmres_precond);
@@ -351,4 +339,21 @@ TYPED_TEST(Gmres, CanSetPreconditioner)
 }
 
 
+TYPED_TEST(Gmres, PassExplicitFactory)
+{
+    using Solver = typename TestFixture::Solver;
+    auto stop_factory = gko::share(
+        gko::stop::Iteration::build().with_max_iters(1u).on(this->exec));
+    auto precond_factory = gko::share(Solver::build().on(this->exec));
+
+    auto factory = Solver::build()
+                       .with_criteria(stop_factory)
+                       .with_preconditioner(precond_factory)
+                       .on(this->exec);
+
+    ASSERT_EQ(factory->get_parameters().criteria.front(), stop_factory);
+    ASSERT_EQ(factory->get_parameters().preconditioner, precond_factory);
+}
+
+
 }  // namespace
diff --git a/core/test/solver/idr.cpp b/core/test/solver/idr.cpp
index a93978fa335..5552f6f1c0a 100644
--- a/core/test/solver/idr.cpp
+++ b/core/test/solver/idr.cpp
@@ -161,14 +161,9 @@ TYPED_TEST(Idr, CanSetPreconditionerGenerator)
     using value_type = typename TestFixture::value_type;
     auto idr_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
-            .with_preconditioner(
-                Solver::build()
-                    .with_criteria(
-                        gko::stop::Iteration::build().with_max_iters(3u).on(
-                            this->exec))
-                    )
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
+            .with_preconditioner(Solver::build().with_criteria(
+                gko::stop::Iteration::build().with_max_iters(3u)))
             .on(this->exec);
 
     auto solver = idr_factory->generate(this->mtx);
@@ -208,15 +203,13 @@ TYPED_TEST(Idr, CanSetPreconditionerInFactory)
     using Solver = typename TestFixture::Solver;
     std::shared_ptr<Solver> idr_precond =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto idr_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(idr_precond)
             .on(this->exec);
     auto solver = idr_factory->generate(this->mtx);
@@ -235,15 +228,13 @@ TYPED_TEST(Idr, ThrowsOnWrongPreconditionerInFactory)
         Mtx::create(this->exec, gko::dim<2>{2, 2});
     std::shared_ptr<Solver> idr_precond =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(wrong_sized_mtx);
 
     auto idr_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_preconditioner(idr_precond)
             .on(this->exec);
 
@@ -256,15 +247,13 @@ TYPED_TEST(Idr, CanSetPreconditioner)
     using Solver = typename TestFixture::Solver;
     std::shared_ptr<Solver> idr_precond =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto idr_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec);
     auto solver = idr_factory->generate(this->mtx);
     solver->set_preconditioner(idr_precond);
@@ -282,8 +271,7 @@ TYPED_TEST(Idr, CanSetSubspaceDim)
     auto idr_factory =
         Solver::build()
             .with_subspace_dim(8u)
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(4u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(4u))
             .on(this->exec);
     auto solver = idr_factory->generate(this->mtx);
     auto subspace_dim = solver->get_subspace_dim();
@@ -319,8 +307,7 @@ TYPED_TEST(Idr, CanSetKappa)
     auto idr_factory =
         Solver::build()
             .with_kappa(real_type{0.05})
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(4u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(4u))
             .on(this->exec);
     auto solver = idr_factory->generate(this->mtx);
     auto kappa = solver->get_kappa();
@@ -358,8 +345,7 @@ TYPED_TEST(Idr, CanSetDeterministic)
     auto idr_factory =
         Solver::build()
             .with_deterministic(true)
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(4u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(4u))
             .on(this->exec);
     auto solver = idr_factory->generate(this->mtx);
     auto deterministic = solver->get_deterministic();
@@ -395,8 +381,7 @@ TYPED_TEST(Idr, CanSetComplexSubspace)
     auto idr_factory =
         Solver::build()
             .with_complex_subspace(true)
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(4u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(4u))
             .on(this->exec);
     auto solver = idr_factory->generate(this->mtx);
     auto complex_subspace = solver->get_complex_subspace();
@@ -425,4 +410,21 @@ TYPED_TEST(Idr, CanSetComplexSubspaceAgain)
 }
 
 
+TYPED_TEST(Idr, PassExplicitFactory)
+{
+    using Solver = typename TestFixture::Solver;
+    auto stop_factory = gko::share(
+        gko::stop::Iteration::build().with_max_iters(1u).on(this->exec));
+    auto precond_factory = gko::share(Solver::build().on(this->exec));
+
+    auto factory = Solver::build()
+                       .with_criteria(stop_factory)
+                       .with_preconditioner(precond_factory)
+                       .on(this->exec);
+
+    ASSERT_EQ(factory->get_parameters().criteria.front(), stop_factory);
+    ASSERT_EQ(factory->get_parameters().preconditioner, precond_factory);
+}
+
+
 }  // namespace
diff --git a/core/test/solver/ir.cpp b/core/test/solver/ir.cpp
index 93ea3e89b10..171c0c92b00 100644
--- a/core/test/solver/ir.cpp
+++ b/core/test/solver/ir.cpp
@@ -162,17 +162,11 @@ TYPED_TEST(Ir, CanSetInnerSolverInFactory)
     using value_type = typename TestFixture::value_type;
     auto ir_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u),
-                gko::stop::ResidualNorm<value_type>::build()
-                    .with_reduction_factor(r<value_type>::value)
-                    )
-            .with_solver(
-                Solver::build()
-                    .with_criteria(
-                        gko::stop::Iteration::build().with_max_iters(3u).on(
-                            this->exec))
-                    )
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u),
+                           gko::stop::ResidualNorm<value_type>::build()
+                               .with_reduction_factor(r<value_type>::value))
+            .with_solver(Solver::build().with_criteria(
+                gko::stop::Iteration::build().with_max_iters(3u)))
             .on(this->exec);
     auto solver = ir_factory->generate(this->mtx);
     auto inner_solver = dynamic_cast<const Solver*>(
@@ -189,15 +183,13 @@ TYPED_TEST(Ir, CanSetGeneratedInnerSolverInFactory)
     using Solver = typename TestFixture::Solver;
     std::shared_ptr<Solver> ir_solver =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto ir_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_solver(ir_solver)
             .on(this->exec);
     auto solver = ir_factory->generate(this->mtx);
@@ -240,15 +232,13 @@ TYPED_TEST(Ir, ThrowsOnWrongInnerSolverInFactory)
         Mtx::create(this->exec, gko::dim<2>{2, 2});
     std::shared_ptr<Solver> ir_solver =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(wrong_sized_mtx);
 
     auto ir_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .with_generated_solver(ir_solver)
             .on(this->exec);
 
@@ -261,15 +251,13 @@ TYPED_TEST(Ir, CanSetInnerSolver)
     using Solver = typename TestFixture::Solver;
     std::shared_ptr<Solver> ir_solver =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(this->mtx);
 
     auto ir_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec);
     auto solver = ir_factory->generate(this->mtx);
     solver->set_solver(ir_solver);
@@ -289,9 +277,7 @@ TYPED_TEST(Ir, CanSetApplyWithInitialGuessMode)
                        initial_guess_mode::zero}) {
         auto ir_factory =
             Solver::build()
-                .with_criteria(
-                    gko::stop::Iteration::build().with_max_iters(3u).on(
-                        this->exec))
+                .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
                 .with_default_initial_guess(guess)
                 .on(this->exec);
         auto solver = ir_factory->generate(this->mtx);
@@ -310,15 +296,13 @@ TYPED_TEST(Ir, ThrowOnWrongInnerSolverSet)
         Mtx::create(this->exec, gko::dim<2>{2, 2});
     std::shared_ptr<Solver> ir_solver =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec)
             ->generate(wrong_sized_mtx);
 
     auto ir_factory =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec);
     auto solver = ir_factory->generate(this->mtx);
 
@@ -345,11 +329,9 @@ TYPED_TEST(Ir, DefaultRelaxationFactor)
 
     auto richardson =
         gko::solver::Richardson<value_type>::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u),
-                gko::stop::ResidualNorm<value_type>::build()
-                    .with_reduction_factor(r<value_type>::value)
-                    )
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u),
+                           gko::stop::ResidualNorm<value_type>::build()
+                               .with_reduction_factor(r<value_type>::value))
             .on(this->exec)
             ->generate(this->mtx);
 
@@ -364,11 +346,9 @@ TYPED_TEST(Ir, UseAsRichardson)
 
     auto richardson =
         gko::solver::Richardson<value_type>::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u),
-                gko::stop::ResidualNorm<value_type>::build()
-                    .with_reduction_factor(r<value_type>::value)
-                    )
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u),
+                           gko::stop::ResidualNorm<value_type>::build()
+                               .with_reduction_factor(r<value_type>::value))
             .with_relaxation_factor(relaxation_factor)
             .on(this->exec)
             ->generate(this->mtx);
@@ -495,4 +475,21 @@ TYPED_TEST(Ir, RunResidualNormCheckCorrectTimes)
 }
 
 
+TYPED_TEST(Ir, PassExplicitFactory)
+{
+    using Solver = typename TestFixture::Solver;
+    auto stop_factory = gko::share(
+        gko::stop::Iteration::build().with_max_iters(1u).on(this->exec));
+    auto inner_solver_factory = gko::share(Solver::build().on(this->exec));
+
+    auto factory = Solver::build()
+                       .with_criteria(stop_factory)
+                       .with_solver(inner_solver_factory)
+                       .on(this->exec);
+
+    ASSERT_EQ(factory->get_parameters().criteria.front(), stop_factory);
+    ASSERT_EQ(factory->get_parameters().solver, inner_solver_factory);
+}
+
+
 }  // namespace
diff --git a/core/test/solver/multigrid.cpp b/core/test/solver/multigrid.cpp
index 9f7bddb633c..bab6bcaf863 100644
--- a/core/test/solver/multigrid.cpp
+++ b/core/test/solver/multigrid.cpp
@@ -902,4 +902,32 @@ TYPED_TEST(Multigrid, CustomCoarsestSolverSelector)
 }
 
 
+TYPED_TEST(Multigrid, DeferredFactoryParameter)
+{
+    using Solver = typename TestFixture::Solver;
+    using DummyRPFactory = typename TestFixture::DummyRPFactory;
+    using DummyFactory = typename TestFixture::DummyFactory;
+
+    auto solver = Solver::build()
+                      .with_mg_level(DummyRPFactory::build())
+                      .with_pre_smoother(DummyFactory::build())
+                      .with_mid_smoother(DummyFactory::build())
+                      .with_post_smoother(DummyFactory::build())
+                      .with_criteria(gko::stop::Iteration::build())
+                      .with_coarsest_solver(DummyFactory::build())
+                      .on(this->exec);
+
+    GKO_ASSERT_DYNAMIC_TYPE(solver->get_parameters().mg_level[0],
+                            typename DummyRPFactory::Factory);
+    GKO_ASSERT_DYNAMIC_TYPE(solver->get_parameters().pre_smoother[0],
+                            typename DummyFactory::Factory);
+    GKO_ASSERT_DYNAMIC_TYPE(solver->get_parameters().mid_smoother[0],
+                            typename DummyFactory::Factory);
+    GKO_ASSERT_DYNAMIC_TYPE(solver->get_parameters().post_smoother[0],
+                            typename DummyFactory::Factory);
+    GKO_ASSERT_DYNAMIC_TYPE(solver->get_parameters().coarsest_solver[0],
+                            typename DummyFactory::Factory);
+}
+
+
 }  // namespace
diff --git a/core/test/solver/workspace.cpp b/core/test/solver/workspace.cpp
index ffbab815dc6..3dc53fb6abe 100644
--- a/core/test/solver/workspace.cpp
+++ b/core/test/solver/workspace.cpp
@@ -256,8 +256,8 @@ TEST_F(Workspace, CanCreateOperators)
     ASSERT_EQ(op2->get_size(), size2);
     ASSERT_EQ(op1->get_stride(), stride1);
     ASSERT_EQ(op2->get_stride(), stride2);
-    ASSERT_EQ(typeid(*op1), typeid(DummyLinOp));
-    ASSERT_EQ(typeid(*op2), typeid(DummyLinOp2));
+    GKO_ASSERT_DYNAMIC_TYPE(op1, DummyLinOp);
+    GKO_ASSERT_DYNAMIC_TYPE(op2, DummyLinOp2);
     ASSERT_EQ(op1, ws.get_op(1));
     ASSERT_EQ(op2, ws.get_op(0));
 }
@@ -288,7 +288,7 @@ TEST_F(Workspace, ChecksExactOperatorType)
         0, [&] { return std::make_unique<DerivedDummyLinOp>(exec); },
         typeid(DerivedDummyLinOp), {}, 0);
 
-    ASSERT_EQ(typeid(*op1), typeid(DerivedDummyLinOp));
+    GKO_ASSERT_DYNAMIC_TYPE(op1, DerivedDummyLinOp);
 }
 
 
diff --git a/core/test/utils/assertions.hpp b/core/test/utils/assertions.hpp
index 153907cf2cf..a0f700e629e 100644
--- a/core/test/utils/assertions.hpp
+++ b/core/test/utils/assertions.hpp
@@ -53,10 +53,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/base/mtx_io.hpp>
 #include <ginkgo/core/matrix/dense.hpp>
+#include <typeinfo>
 
 
 #include "core/base/batch_utilities.hpp"
 #include "core/base/extended_float.hpp"
+#include "ginkgo/core/base/name_demangling.hpp"
 
 
 namespace gko {
@@ -1010,6 +1012,45 @@ ::testing::AssertionResult matrices_equal_sparsity(
 }
 
 
+template <typename Ptr1, typename Ptr2>
+::testing::AssertionResult dynamic_type_eq(const std::string& expr1,
+                                           const std::string& expr2,
+                                           const Ptr1& ptr1, const Ptr2& ptr2)
+{
+    auto& ref1 = *ptr1;
+    auto& ref2 = *ptr2;
+    if (typeid(ref1) == typeid(ref2)) {
+        return ::testing::AssertionSuccess();
+    } else {
+        return ::testing::AssertionFailure()
+               << "mismatching dynamic types\n"
+               << expr1 << " is\n\t"
+               << gko::name_demangling::get_type_name(typeid(ref1)) << "\n"
+               << expr2 << " is\n\t"
+               << gko::name_demangling::get_type_name(typeid(ref2)) << "\n";
+    }
+}
+
+
+template <typename Ptr>
+::testing::AssertionResult dynamic_type_is(const std::string& expr,
+                                           const std::string&, const Ptr& ptr,
+                                           const std::type_info& type)
+{
+    auto& ref = *ptr;
+    if (typeid(ref) == type) {
+        return ::testing::AssertionSuccess();
+    } else {
+        return ::testing::AssertionFailure()
+               << "unexpected dynamic type\n"
+               << expr << " is\n\t"
+               << gko::name_demangling::get_type_name(typeid(ref)) << "\n"
+               << "but we expected\n\t"
+               << gko::name_demangling::get_type_name(type) << "\n";
+    }
+}
+
+
 namespace detail {
 
 
@@ -1249,4 +1290,51 @@ T* plain_ptr(T* ptr)
     }
 
 
+/**
+ * Checks if the dynamic types of the objects referenced by two pointers are
+ * equal.
+ *
+ * @param _ptr1  the first pointer
+ * @param _ptr2  the second pointer
+ */
+#define GKO_ASSERT_DYNAMIC_TYPE_EQ(_ptr1, _ptr2)                             \
+    {                                                                        \
+        ASSERT_PRED_FORMAT2(::gko::test::assertions::dynamic_type_eq, _ptr1, \
+                            _ptr2);                                          \
+    }
+
+
+/**
+ * @copydoc GKO_ASSERT_DYNAMIC_TYPE_EQ
+ */
+#define GKO_EXPECT_DYNAMIC_TYPE_EQ(_ptr1, _ptr2)                             \
+    {                                                                        \
+        EXPECT_PRED_FORMAT2(::gko::test::assertions::dynamic_type_eq, _ptr1, \
+                            _ptr2);                                          \
+    }
+
+
+/**
+ * Checks if the dynamic type of a pointer to an object matches a given type
+ *
+ * @param _ptr  the pointer
+ * @param _type  the expected type
+ */
+#define GKO_ASSERT_DYNAMIC_TYPE(_ptr, _type)                                \
+    {                                                                       \
+        ASSERT_PRED_FORMAT2(::gko::test::assertions::dynamic_type_is, _ptr, \
+                            typeid(_type));                                 \
+    }
+
+
+/**
+ * @copydoc GKO_ASSERT_DYNAMIC_TYPE
+ */
+#define GKO_EXPECT_DYNAMIC_TYPE(_ptr, _type)                                \
+    {                                                                       \
+        EXPECT_PRED_FORMAT2(::gko::test::assertions::dynamic_type_is, _ptr, \
+                            typeid(_type));                                 \
+    }
+
+
 #endif  // GKO_CORE_TEST_UTILS_ASSERTIONS_HPP_
diff --git a/examples/performance-debugging/performance-debugging.cpp b/examples/performance-debugging/performance-debugging.cpp
index cb06ac6cc86..c8f741114d2 100644
--- a/examples/performance-debugging/performance-debugging.cpp
+++ b/examples/performance-debugging/performance-debugging.cpp
@@ -417,8 +417,7 @@ int main(int argc, char* argv[])
             .with_criteria(
                 gko::stop::ResidualNorm<ValueType>::build()
                     .with_reduction_factor(reduction_factor),
-                gko::stop::Iteration::build().with_max_iters(max_iters).on(
-                    exec))
+                gko::stop::Iteration::build().with_max_iters(max_iters))
             .with_preconditioner(preconditioner::create(exec))
             .on(exec);
 
diff --git a/reference/test/matrix/csr_kernels.cpp b/reference/test/matrix/csr_kernels.cpp
index d56201ade02..d0265e462f2 100644
--- a/reference/test/matrix/csr_kernels.cpp
+++ b/reference/test/matrix/csr_kernels.cpp
@@ -56,6 +56,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/matrix/csr_kernels.hpp"
 #include "core/matrix/csr_lookup.hpp"
 #include "core/test/utils.hpp"
+#include "core/test/utils/assertions.hpp"
 
 
 namespace {
@@ -810,7 +811,7 @@ TYPED_TEST(Csr, ConvertsToPrecision)
     GKO_ASSERT_MTX_NEAR(this->mtx2, res, residual);
     auto first_strategy = this->mtx2->get_strategy();
     auto second_strategy = res->get_strategy();
-    ASSERT_EQ(typeid(*first_strategy), typeid(*second_strategy));
+    GKO_ASSERT_DYNAMIC_TYPE_EQ(first_strategy, second_strategy);
 }
 
 
@@ -835,7 +836,7 @@ TYPED_TEST(Csr, MovesToPrecision)
     GKO_ASSERT_MTX_NEAR(this->mtx2, res, residual);
     auto first_strategy = this->mtx2->get_strategy();
     auto second_strategy = res->get_strategy();
-    ASSERT_EQ(typeid(*first_strategy), typeid(*second_strategy));
+    GKO_ASSERT_DYNAMIC_TYPE_EQ(first_strategy, second_strategy);
 }
 
 
diff --git a/reference/test/solver/cb_gmres_kernels.cpp b/reference/test/solver/cb_gmres_kernels.cpp
index e5b933ad82c..60d2a32b9ee 100644
--- a/reference/test/solver/cb_gmres_kernels.cpp
+++ b/reference/test/solver/cb_gmres_kernels.cpp
@@ -266,16 +266,12 @@ TYPED_TEST(CbGmres, SolvesStencilSystem2)
     auto factory =
         gmres_type::build()
             .with_storage_precision(this->storage_prec)
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(100u).on(
-                    this->exec),
-                gko::stop::Time::build()
-                    .with_time_limit(std::chrono::seconds(6))
-                    .on(this->exec),
-                gko::stop::ResidualNorm<T>::build()
-                    .with_baseline(gko::stop::mode::initial_resnorm)
-                    .with_reduction_factor(this->reduction_factor())
-                    .on(this->exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(100u),
+                           gko::stop::Time::build().with_time_limit(
+                               std::chrono::seconds(6)),
+                           gko::stop::ResidualNorm<T>::build()
+                               .with_baseline(gko::stop::mode::initial_resnorm)
+                               .with_reduction_factor(this->reduction_factor()))
             .on(this->exec);
     auto solver = factory->generate(this->mtx2);
     auto b = gko::initialize<Mtx>({33.0, 20.0, 20.0}, this->exec);
@@ -521,13 +517,10 @@ TYPED_TEST(CbGmres, SolvesBigDenseSystem1WithRestart)
         gmres_type::build()
             .with_krylov_dim(4u)
             .with_storage_precision(this->storage_prec)
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(200u).on(
-                    this->exec),
-                gko::stop::ResidualNorm<value_type>::build()
-                    .with_baseline(gko::stop::mode::initial_resnorm)
-                    .with_reduction_factor(this->reduction_factor())
-                    .on(this->exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(200u),
+                           gko::stop::ResidualNorm<value_type>::build()
+                               .with_baseline(gko::stop::mode::initial_resnorm)
+                               .with_reduction_factor(this->reduction_factor()))
             .on(this->exec);
     auto solver = cb_gmres_factory_restart->generate(this->mtx_medium);
     auto b = gko::initialize<Mtx>(
@@ -549,17 +542,13 @@ TYPED_TEST(CbGmres, SolvesWithPreconditioner)
     auto cb_gmres_factory_preconditioner =
         gmres_type::build()
             .with_storage_precision(this->storage_prec)
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(100u).on(
-                    this->exec),
-                gko::stop::ResidualNorm<value_type>::build()
-                    .with_baseline(gko::stop::mode::initial_resnorm)
-                    .with_reduction_factor(this->reduction_factor())
-                    .on(this->exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(100u),
+                           gko::stop::ResidualNorm<value_type>::build()
+                               .with_baseline(gko::stop::mode::initial_resnorm)
+                               .with_reduction_factor(this->reduction_factor()))
             .with_preconditioner(
                 gko::preconditioner::Jacobi<value_type>::build()
-                    .with_max_block_size(3u)
-                    .on(this->exec))
+                    .with_max_block_size(3u))
             .on(this->exec);
     auto solver = cb_gmres_factory_preconditioner->generate(this->mtx_big);
     auto b = gko::initialize<Mtx>(
diff --git a/reference/test/solver/gcr_kernels.cpp b/reference/test/solver/gcr_kernels.cpp
index adf5c35fd1d..8943a131d2b 100644
--- a/reference/test/solver/gcr_kernels.cpp
+++ b/reference/test/solver/gcr_kernels.cpp
@@ -574,12 +574,9 @@ TYPED_TEST(Gcr, SolvesBigDenseSystem1WithRestart)
     auto gcr_factory_restart =
         Solver::build()
             .with_krylov_dim(4u)
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(200u).on(
-                    this->exec),
-                gko::stop::ResidualNorm<value_type>::build()
-                    .with_reduction_factor(r<value_type>::value)
-                    .on(this->exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(200u),
+                           gko::stop::ResidualNorm<value_type>::build()
+                               .with_reduction_factor(r<value_type>::value))
             .on(this->exec);
     auto solver = gcr_factory_restart->generate(this->mtx_medium);
     auto b = gko::initialize<Mtx>(
@@ -600,16 +597,12 @@ TYPED_TEST(Gcr, SolvesWithPreconditioner)
     using value_type = typename TestFixture::value_type;
     auto gcr_factory_preconditioner =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(100u).on(
-                    this->exec),
-                gko::stop::ResidualNorm<value_type>::build()
-                    .with_reduction_factor(r<value_type>::value)
-                    .on(this->exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(100u),
+                           gko::stop::ResidualNorm<value_type>::build()
+                               .with_reduction_factor(r<value_type>::value))
             .with_preconditioner(
                 gko::preconditioner::Jacobi<value_type>::build()
-                    .with_max_block_size(3u)
-                    .on(this->exec))
+                    .with_max_block_size(3u))
             .on(this->exec);
     auto solver = gcr_factory_preconditioner->generate(this->mtx_big);
     auto b = gko::initialize<Mtx>(
diff --git a/reference/test/solver/gmres_kernels.cpp b/reference/test/solver/gmres_kernels.cpp
index a99400e412b..c718d60343b 100644
--- a/reference/test/solver/gmres_kernels.cpp
+++ b/reference/test/solver/gmres_kernels.cpp
@@ -373,15 +373,15 @@ TYPED_TEST(Gmres, KernelMultiAxpy)
     this->small_final_iter_nums.get_data()[1] = restart;
     this->small_krylov_bases = gko::initialize<Mtx>(  // restart+1 x rows x #rhs
         {
-            I<T>{1, 10},     // 0, 0, x
-            I<T>{2, 11},     // 0, 1, x
-            I<T>{3, 12},     // 0, 2, x
-            I<T>{4, 13},     // 1, 0, x
-            I<T>{5, 14},     // 1, 1, x
-            I<T>{6, 15},     // 1, 2, x
-            I<T>{nan, nan},  // 2, 0, x
-            I<T>{nan, nan},  // 2, 1, x
-            I<T>{nan, nan},  // 2, 2, x
+            I<T>{1, 10},                              // 0, 0, x
+            I<T>{2, 11},                              // 0, 1, x
+            I<T>{3, 12},                              // 0, 2, x
+            I<T>{4, 13},                              // 1, 0, x
+            I<T>{5, 14},                              // 1, 1, x
+            I<T>{6, 15},                              // 1, 2, x
+            I<T>{nan, nan},                           // 2, 0, x
+            I<T>{nan, nan},                           // 2, 1, x
+            I<T>{nan, nan},                           // 2, 2, x
         },
         this->exec);
     this->small_stop.get_data()[0].stop(7, false);
@@ -718,12 +718,9 @@ TYPED_TEST(Gmres, SolvesBigDenseSystem1WithRestart)
     auto gmres_factory_restart =
         Solver::build()
             .with_krylov_dim(4u)
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(200u).on(
-                    this->exec),
-                gko::stop::ResidualNorm<value_type>::build()
-                    .with_reduction_factor(r<value_type>::value)
-                    .on(this->exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(200u),
+                           gko::stop::ResidualNorm<value_type>::build()
+                               .with_reduction_factor(r<value_type>::value))
             .on(this->exec);
     auto solver = gmres_factory_restart->generate(this->mtx_medium);
     auto b = gko::initialize<Mtx>(
@@ -744,16 +741,12 @@ TYPED_TEST(Gmres, SolvesWithPreconditioner)
     using value_type = typename TestFixture::value_type;
     auto gmres_factory_preconditioner =
         Solver::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(100u).on(
-                    this->exec),
-                gko::stop::ResidualNorm<value_type>::build()
-                    .with_reduction_factor(r<value_type>::value)
-                    .on(this->exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(100u),
+                           gko::stop::ResidualNorm<value_type>::build()
+                               .with_reduction_factor(r<value_type>::value))
             .with_preconditioner(
                 gko::preconditioner::Jacobi<value_type>::build()
-                    .with_max_block_size(3u)
-                    .on(this->exec))
+                    .with_max_block_size(3u))
             .on(this->exec);
     auto solver = gmres_factory_preconditioner->generate(this->mtx_big);
     auto b = gko::initialize<Mtx>(
diff --git a/reference/test/solver/ir_kernels.cpp b/reference/test/solver/ir_kernels.cpp
index 4fae1bfdac8..fc6154f3366 100644
--- a/reference/test/solver/ir_kernels.cpp
+++ b/reference/test/solver/ir_kernels.cpp
@@ -184,11 +184,9 @@ TYPED_TEST(Ir, SolvesTriangularSystemWithIterativeInnerSolver)
 
     auto solver_factory =
         gko::solver::Ir<value_type>::build()
-            .with_criteria(gko::stop::Iteration::build().with_max_iters(30u).on(
-                               this->exec),
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(30u),
                            gko::stop::ResidualNorm<value_type>::build()
-                               .with_reduction_factor(r<value_type>::value)
-                               .on(this->exec))
+                               .with_reduction_factor(r<value_type>::value))
             .with_solver(inner_solver_factory)
             .on(this->exec);
     auto b = gko::initialize<Mtx>({3.9, 9.0, 2.2}, this->exec);
@@ -351,16 +349,15 @@ TYPED_TEST(Ir, RichardsonSolvesTriangularSystem)
 {
     using Mtx = typename TestFixture::Mtx;
     using value_type = typename TestFixture::value_type;
-    auto solver = gko::solver::Ir<value_type>::build()
-                      .with_criteria(
-                          gko::stop::Iteration::build().with_max_iters(100u).on(
-                              this->exec),
-                          gko::stop::ResidualNorm<value_type>::build()
-                              .with_reduction_factor(r<value_type>::value)
-                              .on(this->exec))
-                      .with_relaxation_factor(value_type{0.9})
-                      .on(this->exec)
-                      ->generate(this->mtx);
+    auto solver =
+        gko::solver::Ir<value_type>::build()
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(100u),
+                           gko::stop::ResidualNorm<value_type>::build()
+                               .with_reduction_factor(r<value_type>::value)
+                               .on(this->exec))
+            .with_relaxation_factor(value_type{0.9})
+            .on(this->exec)
+            ->generate(this->mtx);
     auto b = gko::initialize<Mtx>({3.9, 9.0, 2.2}, this->exec);
     auto x = gko::initialize<Mtx>({0.0, 0.0, 0.0}, this->exec);
 
@@ -383,12 +380,10 @@ TYPED_TEST(Ir, RichardsonSolvesTriangularSystemWithIterativeInnerSolver)
             .on(this->exec));
     auto solver_factory =
         gko::solver::Ir<value_type>::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(100u).on(
-                    this->exec),
-                gko::stop::ResidualNorm<value_type>::build()
-                    .with_reduction_factor(r<value_type>::value)
-                    .on(this->exec))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(100u),
+                           gko::stop::ResidualNorm<value_type>::build()
+                               .with_reduction_factor(r<value_type>::value)
+                               .on(this->exec))
             .with_relaxation_factor(value_type{0.9})
             .with_solver(inner_solver_factory)
             .on(this->exec);
@@ -407,8 +402,7 @@ TYPED_TEST(Ir, RichardsonTransposedSolvesTriangularSystem)
     using value_type = typename TestFixture::value_type;
     auto solver =
         gko::solver::Ir<value_type>::build()
-            .with_criteria(gko::stop::Iteration::build().with_max_iters(30u).on(
-                               this->exec),
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(30u),
                            gko::stop::ResidualNorm<value_type>::build()
                                .with_reduction_factor(r<value_type>::value)
                                .on(this->exec))
@@ -430,8 +424,7 @@ TYPED_TEST(Ir, RichardsonConjTransposedSolvesTriangularSystem)
     using value_type = typename TestFixture::value_type;
     auto solver =
         gko::solver::Ir<value_type>::build()
-            .with_criteria(gko::stop::Iteration::build().with_max_iters(30u).on(
-                               this->exec),
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(30u),
                            gko::stop::ResidualNorm<value_type>::build()
                                .with_reduction_factor(r<value_type>::value)
                                .on(this->exec))
@@ -454,8 +447,7 @@ TYPED_TEST(Ir, ApplyWithGivenInitialGuessModeIsEquivalentToRef)
     using initial_guess_mode = gko::solver::initial_guess_mode;
     auto ref_solver =
         gko::solver::Ir<value_type>::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(1u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
             .on(this->exec)
             ->generate(this->mtx);
     auto b = gko::initialize<Mtx>({3.9, 9.0, 2.2}, this->exec);
@@ -463,8 +455,7 @@ TYPED_TEST(Ir, ApplyWithGivenInitialGuessModeIsEquivalentToRef)
                        initial_guess_mode::zero}) {
         auto solver =
             gko::solver::Ir<value_type>::build()
-                .with_criteria(
-                    gko::stop::Iteration::build().with_max_iters(1u))
+                .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
                 .with_default_initial_guess(guess)
                 .on(this->exec)
                 ->generate(this->mtx);
diff --git a/test/matrix/matrix.cpp b/test/matrix/matrix.cpp
index 9192b2eeebe..9b78ae21d6c 100644
--- a/test/matrix/matrix.cpp
+++ b/test/matrix/matrix.cpp
@@ -155,7 +155,7 @@ struct CsrWithDefaultStrategy : CsrBase {
         CsrBase::assert_empty_state(mtx);
         auto first_strategy = mtx->create_default()->get_strategy();
         auto second_strategy = mtx->get_strategy();
-        ASSERT_EQ(typeid(*first_strategy), typeid(*second_strategy));
+        GKO_ASSERT_DYNAMIC_TYPE_EQ(first_strategy, second_strategy);
     }
 };
 

From a2649159787e6e8814f097120cd203b61da3fbec Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Fri, 6 Oct 2023 16:10:45 +0000
Subject: [PATCH 318/583] Format files

Co-authored-by: Tobias Ribizel <upsj@users.noreply.github.com>
---
 core/test/log/papi.cpp                      |  6 ++----
 core/test/utils/assertions.hpp              |  4 ++--
 reference/test/preconditioner/ilu.cpp       |  6 ++----
 reference/test/solver/gmres_kernels.cpp     | 18 +++++++++---------
 reference/test/solver/multigrid_kernels.cpp |  9 +++------
 5 files changed, 18 insertions(+), 25 deletions(-)

diff --git a/core/test/log/papi.cpp b/core/test/log/papi.cpp
index b4e51cdc31b..0928f35d1ba 100644
--- a/core/test/log/papi.cpp
+++ b/core/test/log/papi.cpp
@@ -471,8 +471,7 @@ TYPED_TEST(Papi, CatchesLinOpFactoryGenerateStarted)
 {
     auto factory =
         gko::solver::Bicgstab<TypeParam>::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec);
     auto str = this->init(gko::log::Logger::linop_factory_generate_started_mask,
                           "linop_factory_generate_started", factory.get());
@@ -492,8 +491,7 @@ TYPED_TEST(Papi, CatchesLinOpFactoryGenerateCompleted)
 {
     auto factory =
         gko::solver::Bicgstab<TypeParam>::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(3u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(3u))
             .on(this->exec);
     TypeParam dummy;
     auto str =
diff --git a/core/test/utils/assertions.hpp b/core/test/utils/assertions.hpp
index a0f700e629e..d723d5a8964 100644
--- a/core/test/utils/assertions.hpp
+++ b/core/test/utils/assertions.hpp
@@ -43,6 +43,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <initializer_list>
 #include <string>
 #include <type_traits>
+#include <typeinfo>
 
 
 #include <gtest/gtest.h>
@@ -52,13 +53,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/base/mtx_io.hpp>
+#include <ginkgo/core/base/name_demangling.hpp>
 #include <ginkgo/core/matrix/dense.hpp>
-#include <typeinfo>
 
 
 #include "core/base/batch_utilities.hpp"
 #include "core/base/extended_float.hpp"
-#include "ginkgo/core/base/name_demangling.hpp"
 
 
 namespace gko {
diff --git a/reference/test/preconditioner/ilu.cpp b/reference/test/preconditioner/ilu.cpp
index 22c9929219e..5150626c898 100644
--- a/reference/test/preconditioner/ilu.cpp
+++ b/reference/test/preconditioner/ilu.cpp
@@ -614,8 +614,7 @@ TEST_F(DefaultIlu, CanBeUsedAsPreconditioner)
 {
     auto solver =
         gko::solver::Bicgstab<>::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(2u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(2u))
             .with_preconditioner(default_ilu_prec_type::build())
             .on(this->exec)
             ->generate(this->mtx);
@@ -635,8 +634,7 @@ TEST_F(DefaultIlu, CanBeUsedAsGeneratedPreconditioner)
         default_ilu_prec_type::build().on(this->exec)->generate(this->mtx);
     auto solver =
         gko::solver::Bicgstab<>::build()
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(2u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(2u))
             .with_generated_preconditioner(precond)
             .on(this->exec)
             ->generate(this->mtx);
diff --git a/reference/test/solver/gmres_kernels.cpp b/reference/test/solver/gmres_kernels.cpp
index c718d60343b..4c651e7917b 100644
--- a/reference/test/solver/gmres_kernels.cpp
+++ b/reference/test/solver/gmres_kernels.cpp
@@ -373,15 +373,15 @@ TYPED_TEST(Gmres, KernelMultiAxpy)
     this->small_final_iter_nums.get_data()[1] = restart;
     this->small_krylov_bases = gko::initialize<Mtx>(  // restart+1 x rows x #rhs
         {
-            I<T>{1, 10},                              // 0, 0, x
-            I<T>{2, 11},                              // 0, 1, x
-            I<T>{3, 12},                              // 0, 2, x
-            I<T>{4, 13},                              // 1, 0, x
-            I<T>{5, 14},                              // 1, 1, x
-            I<T>{6, 15},                              // 1, 2, x
-            I<T>{nan, nan},                           // 2, 0, x
-            I<T>{nan, nan},                           // 2, 1, x
-            I<T>{nan, nan},                           // 2, 2, x
+            I<T>{1, 10},     // 0, 0, x
+            I<T>{2, 11},     // 0, 1, x
+            I<T>{3, 12},     // 0, 2, x
+            I<T>{4, 13},     // 1, 0, x
+            I<T>{5, 14},     // 1, 1, x
+            I<T>{6, 15},     // 1, 2, x
+            I<T>{nan, nan},  // 2, 0, x
+            I<T>{nan, nan},  // 2, 1, x
+            I<T>{nan, nan},  // 2, 2, x
         },
         this->exec);
     this->small_stop.get_data()[0].stop(7, false);
diff --git a/reference/test/solver/multigrid_kernels.cpp b/reference/test/solver/multigrid_kernels.cpp
index 86be56ce3cb..c35db0b1427 100644
--- a/reference/test/solver/multigrid_kernels.cpp
+++ b/reference/test/solver/multigrid_kernels.cpp
@@ -405,8 +405,7 @@ class Multigrid : public ::testing::Test {
                     gko::matrix::IdentityFactory<value_type>::create(exec))
                 .with_post_uses_pre(false)
                 .with_mid_case(mid_case)
-                .with_criteria(
-                    gko::stop::Iteration::build().with_max_iters(1u))
+                .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
                 .with_cycle(cycle)
                 .with_min_coarse_rows(1u)
                 .on(this->exec));
@@ -426,8 +425,7 @@ class Multigrid : public ::testing::Test {
                 .with_coarsest_solver(this->lo_factory)
                 .with_post_uses_pre(true)
                 .with_mid_case(mid_case)
-                .with_criteria(
-                    gko::stop::Iteration::build().with_max_iters(1u))
+                .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
                 .with_cycle(cycle)
                 .with_min_coarse_rows(1u)
                 .on(this->exec));
@@ -1263,8 +1261,7 @@ TYPED_TEST(Multigrid, ZeroGuessIgnoresInput)
             .with_coarsest_solver(this->coarsest_factory)
             .with_max_levels(2u)
             .with_mg_level(this->coarse_factory)
-            .with_criteria(
-                gko::stop::Iteration::build().with_max_iters(1u))
+            .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
             .with_min_coarse_rows(1u);
     auto normal_mg = common_part
                          .with_default_initial_guess(

From 01c196f2528fa69dbb65be7ceea3cfbae1cdd13b Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Wed, 12 Jul 2023 16:28:48 +0000
Subject: [PATCH 319/583] use advanced memory ordering instructions in CUDA

---
 .../{volatile.hpp.inc => memory.hpp.inc}      |   0
 common/cuda_hip/components/syncfree.hpp.inc   |  43 +-
 cuda/components/memory.cuh                    | 789 ++++++++++++++++++
 cuda/components/syncfree.cuh                  |   2 +-
 cuda/components/volatile.cuh                  |  58 --
 cuda/solver/common_trs_kernels.cuh            |  33 +-
 dev_tools/scripts/generate_cuda_memory_ptx.py | 192 +++++
 .../{volatile.hip.hpp => memory.hip.hpp}      |  70 +-
 hip/components/syncfree.hip.hpp               |   2 +-
 9 files changed, 1079 insertions(+), 110 deletions(-)
 rename common/cuda_hip/components/{volatile.hpp.inc => memory.hpp.inc} (100%)
 create mode 100644 cuda/components/memory.cuh
 delete mode 100644 cuda/components/volatile.cuh
 create mode 100755 dev_tools/scripts/generate_cuda_memory_ptx.py
 rename hip/components/{volatile.hip.hpp => memory.hip.hpp} (55%)

diff --git a/common/cuda_hip/components/volatile.hpp.inc b/common/cuda_hip/components/memory.hpp.inc
similarity index 100%
rename from common/cuda_hip/components/volatile.hpp.inc
rename to common/cuda_hip/components/memory.hpp.inc
diff --git a/common/cuda_hip/components/syncfree.hpp.inc b/common/cuda_hip/components/syncfree.hpp.inc
index 6b6dcc70f24..113c66d91ec 100644
--- a/common/cuda_hip/components/syncfree.hpp.inc
+++ b/common/cuda_hip/components/syncfree.hpp.inc
@@ -93,48 +93,31 @@ public:
         const auto dep_block = dependency / (block_size / subwarp_size);
         const auto dep_local = dependency % (block_size / subwarp_size);
         // assert(dependency < work_id);
-        if (dep_block == block_id) {
-            // wait for a local dependency
-            while (!load(local.status, dep_local)) {
-                __threadfence();
-            }
-        } else {
-            // wait for a global dependency
-            while (!load(global.status, dependency)) {
-                __threadfence();
+        if (get_lane() == 0) {
+            if (dep_block == block_id) {
+                // wait for a local dependency
+                while (!load_acquire_shared(local.status + dep_local)) {
+                }
+            } else {
+                // wait for a global dependency
+                while (!load_acquire(global.status + dependency)) {
+                }
             }
         }
-        __threadfence();
+        group::tiled_partition<subwarp_size>(group::this_thread_block()).sync();
     }
 
-    __device__ __forceinline__ bool peek(IndexType dependency)
-    {
-        const auto dep_block = dependency / (block_size / subwarp_size);
-        const auto dep_local = dependency % (block_size / subwarp_size);
-        // assert(dependency < work_id);
-        if (dep_block == block_id) {
-            // peek at a local dependency
-            auto finished = load(local.status, dep_local) != 0;
-            __threadfence();
-            return finished;
-        } else {
-            // peek at a global dependency
-            auto finished = load(global.status, dependency);
-            __threadfence();
-            return finished;
-        }
-    }
+    __device__ __forceinline__ bool peek(IndexType dependency) { return false; }
 
     __device__ __forceinline__ void mark_ready()
     {
         group::tiled_partition<subwarp_size>(group::this_thread_block()).sync();
-        __threadfence();
         if (get_lane() == 0) {
             const auto sh_id = get_work_id() % (block_size / subwarp_size);
             // notify local warps
-            store(local.status, sh_id, 1);
+            store_release_shared(local.status + sh_id, 1);
             // notify other blocks
-            store(global.status, get_work_id(), 1);
+            store_release(global.status + get_work_id(), 1);
         }
     }
 
diff --git a/cuda/components/memory.cuh b/cuda/components/memory.cuh
new file mode 100644
index 00000000000..578f7c8309f
--- /dev/null
+++ b/cuda/components/memory.cuh
@@ -0,0 +1,789 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CUDA_COMPONENTS_MEMORY_CUH_
+#define GKO_CUDA_COMPONENTS_MEMORY_CUH_
+
+
+#include <type_traits>
+
+
+#include <ginkgo/core/base/math.hpp>
+
+
+#include "cuda/base/types.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace cuda {
+
+
+__device__ __forceinline__ uint32 convert_generic_ptr_to_smem_ptr(void* ptr)
+{
+// see
+// https://github.com/NVIDIA/cutlass/blob/
+//     6fc5008803fe4e81b81a836fcd3a88258f4e5bbf/
+//     include/cutlass/arch/memory_sm75.h#L90
+// for reasoning behind this implementation
+#if (!defined(__clang__) && __CUDACC_VER_MAJOR__ >= 11)
+    return static_cast<uint32>(__cvta_generic_to_shared(ptr));
+#elif (!defined(__clang__) && CUDACC_VER_MAJOR__ == 10 && \
+       __CUDACC_VER_MINOR__ >= 2)
+    return __nvvm_get_smem_pointer(ptr);
+#else
+    uint32 smem_ptr;
+    asm("{{ .reg .u64 smem_ptr; cvta.to.shared.u64 smem_ptr, %1; cvt.u32.u64 "
+        "%0, smem_ptr; }}"
+        : "=r"(smem_ptr)
+        : "l"(ptr));
+    return smem_ptr;
+#endif
+}
+
+
+__device__ __forceinline__ uint32 membar_acq_rel()
+{
+#if __CUDA_ARCH__ < 700
+    asm volatile("membar.gl;" ::: "memory");
+#else
+    asm volatile("fence.acq_rel.gpu;" ::: "memory");
+#endif
+}
+
+
+__device__ __forceinline__ uint32 membar_acq_rel_shared()
+{
+#if __CUDA_ARCH__ < 700
+    asm volatile("membar.cta;" ::: "memory");
+#else
+    asm volatile("fence.acq_rel.cta;" ::: "memory");
+#endif
+}
+
+
+#include "common/cuda_hip/components/memory.hpp.inc"
+
+
+__device__ __forceinline__ int32 load_relaxed_shared(int32* ptr)
+{
+    int32 result;
+#if __CUDA_ARCH__ < 700
+    asm volatile("ld.volatile.shared.b32 %0, [%1];"
+                 : "=r"(result)
+                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "memory");
+#else
+    asm volatile("ld.relaxed.cta.shared.b32 %0, [%1];"
+                 : "=r"(result)
+                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "memory");
+#endif
+
+    return result;
+}
+
+
+__device__ __forceinline__ void store_relaxed_shared(int32* ptr, int32 result)
+{
+#if __CUDA_ARCH__ < 700
+    asm volatile("st.volatile.shared.b32 [%0], %1;" ::"r"(
+                     convert_generic_ptr_to_smem_ptr(ptr)),
+                 "r"(result)
+                 : "memory");
+#else
+    asm volatile("st.relaxed.cta.shared.b32 [%0], %1;" ::"r"(
+                     convert_generic_ptr_to_smem_ptr(ptr)),
+                 "r"(result)
+                 : "memory");
+#endif
+}
+
+
+__device__ __forceinline__ int64 load_relaxed_shared(int64* ptr)
+{
+    int64 result;
+#if __CUDA_ARCH__ < 700
+    asm volatile("ld.volatile.shared.b64 %0, [%1];"
+                 : "=l"(result)
+                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "memory");
+#else
+    asm volatile("ld.relaxed.cta.shared.b64 %0, [%1];"
+                 : "=l"(result)
+                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "memory");
+#endif
+
+    return result;
+}
+
+
+__device__ __forceinline__ void store_relaxed_shared(int64* ptr, int64 result)
+{
+#if __CUDA_ARCH__ < 700
+    asm volatile("st.volatile.shared.b64 [%0], %1;" ::"r"(
+                     convert_generic_ptr_to_smem_ptr(ptr)),
+                 "l"(result)
+                 : "memory");
+#else
+    asm volatile("st.relaxed.cta.shared.b64 [%0], %1;" ::"r"(
+                     convert_generic_ptr_to_smem_ptr(ptr)),
+                 "l"(result)
+                 : "memory");
+#endif
+}
+
+
+__device__ __forceinline__ float load_relaxed_shared(float* ptr)
+{
+    float result;
+#if __CUDA_ARCH__ < 700
+    asm volatile("ld.volatile.shared.f32 %0, [%1];"
+                 : "=f"(result)
+                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "memory");
+#else
+    asm volatile("ld.relaxed.cta.shared.f32 %0, [%1];"
+                 : "=f"(result)
+                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "memory");
+#endif
+
+    return result;
+}
+
+
+__device__ __forceinline__ void store_relaxed_shared(float* ptr, float result)
+{
+#if __CUDA_ARCH__ < 700
+    asm volatile("st.volatile.shared.f32 [%0], %1;" ::"r"(
+                     convert_generic_ptr_to_smem_ptr(ptr)),
+                 "f"(result)
+                 : "memory");
+#else
+    asm volatile("st.relaxed.cta.shared.f32 [%0], %1;" ::"r"(
+                     convert_generic_ptr_to_smem_ptr(ptr)),
+                 "f"(result)
+                 : "memory");
+#endif
+}
+
+
+__device__ __forceinline__ double load_relaxed_shared(double* ptr)
+{
+    double result;
+#if __CUDA_ARCH__ < 700
+    asm volatile("ld.volatile.shared.f64 %0, [%1];"
+                 : "=d"(result)
+                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "memory");
+#else
+    asm volatile("ld.relaxed.cta.shared.f64 %0, [%1];"
+                 : "=d"(result)
+                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "memory");
+#endif
+
+    return result;
+}
+
+
+__device__ __forceinline__ void store_relaxed_shared(double* ptr, double result)
+{
+#if __CUDA_ARCH__ < 700
+    asm volatile("st.volatile.shared.f64 [%0], %1;" ::"r"(
+                     convert_generic_ptr_to_smem_ptr(ptr)),
+                 "d"(result)
+                 : "memory");
+#else
+    asm volatile("st.relaxed.cta.shared.f64 [%0], %1;" ::"r"(
+                     convert_generic_ptr_to_smem_ptr(ptr)),
+                 "d"(result)
+                 : "memory");
+#endif
+}
+
+
+__device__ __forceinline__ int32 load_acquire_shared(int32* ptr)
+{
+    int32 result;
+#if __CUDA_ARCH__ < 700
+    asm volatile("ld.volatile.shared.b32 %0, [%1];"
+                 : "=r"(result)
+                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "memory");
+#else
+    asm volatile("ld.acquire.cta.shared.b32 %0, [%1];"
+                 : "=r"(result)
+                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "memory");
+#endif
+    membar_acq_rel_shared();
+    return result;
+}
+
+
+__device__ __forceinline__ void store_release_shared(int32* ptr, int32 result)
+{
+    membar_acq_rel_shared();
+#if __CUDA_ARCH__ < 700
+    asm volatile("st.volatile.shared.b32 [%0], %1;" ::"r"(
+                     convert_generic_ptr_to_smem_ptr(ptr)),
+                 "r"(result)
+                 : "memory");
+#else
+    asm volatile("st.release.cta.shared.b32 [%0], %1;" ::"r"(
+                     convert_generic_ptr_to_smem_ptr(ptr)),
+                 "r"(result)
+                 : "memory");
+#endif
+}
+
+
+__device__ __forceinline__ int64 load_acquire_shared(int64* ptr)
+{
+    int64 result;
+#if __CUDA_ARCH__ < 700
+    asm volatile("ld.volatile.shared.b64 %0, [%1];"
+                 : "=l"(result)
+                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "memory");
+#else
+    asm volatile("ld.acquire.cta.shared.b64 %0, [%1];"
+                 : "=l"(result)
+                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "memory");
+#endif
+    membar_acq_rel_shared();
+    return result;
+}
+
+
+__device__ __forceinline__ void store_release_shared(int64* ptr, int64 result)
+{
+    membar_acq_rel_shared();
+#if __CUDA_ARCH__ < 700
+    asm volatile("st.volatile.shared.b64 [%0], %1;" ::"r"(
+                     convert_generic_ptr_to_smem_ptr(ptr)),
+                 "l"(result)
+                 : "memory");
+#else
+    asm volatile("st.release.cta.shared.b64 [%0], %1;" ::"r"(
+                     convert_generic_ptr_to_smem_ptr(ptr)),
+                 "l"(result)
+                 : "memory");
+#endif
+}
+
+
+__device__ __forceinline__ float load_acquire_shared(float* ptr)
+{
+    float result;
+#if __CUDA_ARCH__ < 700
+    asm volatile("ld.volatile.shared.f32 %0, [%1];"
+                 : "=f"(result)
+                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "memory");
+#else
+    asm volatile("ld.acquire.cta.shared.f32 %0, [%1];"
+                 : "=f"(result)
+                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "memory");
+#endif
+    membar_acq_rel_shared();
+    return result;
+}
+
+
+__device__ __forceinline__ void store_release_shared(float* ptr, float result)
+{
+    membar_acq_rel_shared();
+#if __CUDA_ARCH__ < 700
+    asm volatile("st.volatile.shared.f32 [%0], %1;" ::"r"(
+                     convert_generic_ptr_to_smem_ptr(ptr)),
+                 "f"(result)
+                 : "memory");
+#else
+    asm volatile("st.release.cta.shared.f32 [%0], %1;" ::"r"(
+                     convert_generic_ptr_to_smem_ptr(ptr)),
+                 "f"(result)
+                 : "memory");
+#endif
+}
+
+
+__device__ __forceinline__ double load_acquire_shared(double* ptr)
+{
+    double result;
+#if __CUDA_ARCH__ < 700
+    asm volatile("ld.volatile.shared.f64 %0, [%1];"
+                 : "=d"(result)
+                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "memory");
+#else
+    asm volatile("ld.acquire.cta.shared.f64 %0, [%1];"
+                 : "=d"(result)
+                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "memory");
+#endif
+    membar_acq_rel_shared();
+    return result;
+}
+
+
+__device__ __forceinline__ void store_release_shared(double* ptr, double result)
+{
+    membar_acq_rel_shared();
+#if __CUDA_ARCH__ < 700
+    asm volatile("st.volatile.shared.f64 [%0], %1;" ::"r"(
+                     convert_generic_ptr_to_smem_ptr(ptr)),
+                 "d"(result)
+                 : "memory");
+#else
+    asm volatile("st.release.cta.shared.f64 [%0], %1;" ::"r"(
+                     convert_generic_ptr_to_smem_ptr(ptr)),
+                 "d"(result)
+                 : "memory");
+#endif
+}
+
+
+__device__ __forceinline__ int32 load_relaxed(int32* ptr)
+{
+    int32 result;
+#if __CUDA_ARCH__ < 700
+    asm volatile("ld.volatile.b32 %0, [%1];"
+                 : "=r"(result)
+                 : "l"(ptr)
+                 : "memory");
+#else
+    asm volatile("ld.relaxed.gpu.b32 %0, [%1];"
+                 : "=r"(result)
+                 : "l"(ptr)
+                 : "memory");
+#endif
+
+    return result;
+}
+
+
+__device__ __forceinline__ void store_relaxed(int32* ptr, int32 result)
+{
+#if __CUDA_ARCH__ < 700
+    asm volatile("st.volatile.b32 [%0], %1;" ::"l"(ptr), "r"(result)
+                 : "memory");
+#else
+    asm volatile("st.relaxed.gpu.b32 [%0], %1;" ::"l"(ptr), "r"(result)
+                 : "memory");
+#endif
+}
+
+
+__device__ __forceinline__ int64 load_relaxed(int64* ptr)
+{
+    int64 result;
+#if __CUDA_ARCH__ < 700
+    asm volatile("ld.volatile.b64 %0, [%1];"
+                 : "=l"(result)
+                 : "l"(ptr)
+                 : "memory");
+#else
+    asm volatile("ld.relaxed.gpu.b64 %0, [%1];"
+                 : "=l"(result)
+                 : "l"(ptr)
+                 : "memory");
+#endif
+
+    return result;
+}
+
+
+__device__ __forceinline__ void store_relaxed(int64* ptr, int64 result)
+{
+#if __CUDA_ARCH__ < 700
+    asm volatile("st.volatile.b64 [%0], %1;" ::"l"(ptr), "l"(result)
+                 : "memory");
+#else
+    asm volatile("st.relaxed.gpu.b64 [%0], %1;" ::"l"(ptr), "l"(result)
+                 : "memory");
+#endif
+}
+
+
+__device__ __forceinline__ float load_relaxed(float* ptr)
+{
+    float result;
+#if __CUDA_ARCH__ < 700
+    asm volatile("ld.volatile.f32 %0, [%1];"
+                 : "=f"(result)
+                 : "l"(ptr)
+                 : "memory");
+#else
+    asm volatile("ld.relaxed.gpu.f32 %0, [%1];"
+                 : "=f"(result)
+                 : "l"(ptr)
+                 : "memory");
+#endif
+
+    return result;
+}
+
+
+__device__ __forceinline__ void store_relaxed(float* ptr, float result)
+{
+#if __CUDA_ARCH__ < 700
+    asm volatile("st.volatile.f32 [%0], %1;" ::"l"(ptr), "f"(result)
+                 : "memory");
+#else
+    asm volatile("st.relaxed.gpu.f32 [%0], %1;" ::"l"(ptr), "f"(result)
+                 : "memory");
+#endif
+}
+
+
+__device__ __forceinline__ double load_relaxed(double* ptr)
+{
+    double result;
+#if __CUDA_ARCH__ < 700
+    asm volatile("ld.volatile.f64 %0, [%1];"
+                 : "=d"(result)
+                 : "l"(ptr)
+                 : "memory");
+#else
+    asm volatile("ld.relaxed.gpu.f64 %0, [%1];"
+                 : "=d"(result)
+                 : "l"(ptr)
+                 : "memory");
+#endif
+
+    return result;
+}
+
+
+__device__ __forceinline__ void store_relaxed(double* ptr, double result)
+{
+#if __CUDA_ARCH__ < 700
+    asm volatile("st.volatile.f64 [%0], %1;" ::"l"(ptr), "d"(result)
+                 : "memory");
+#else
+    asm volatile("st.relaxed.gpu.f64 [%0], %1;" ::"l"(ptr), "d"(result)
+                 : "memory");
+#endif
+}
+
+
+__device__ __forceinline__ int32 load_acquire(int32* ptr)
+{
+    int32 result;
+#if __CUDA_ARCH__ < 700
+    asm volatile("ld.volatile.b32 %0, [%1];"
+                 : "=r"(result)
+                 : "l"(ptr)
+                 : "memory");
+#else
+    asm volatile("ld.acquire.gpu.b32 %0, [%1];"
+                 : "=r"(result)
+                 : "l"(ptr)
+                 : "memory");
+#endif
+    membar_acq_rel();
+    return result;
+}
+
+
+__device__ __forceinline__ void store_release(int32* ptr, int32 result)
+{
+    membar_acq_rel();
+#if __CUDA_ARCH__ < 700
+    asm volatile("st.volatile.b32 [%0], %1;" ::"l"(ptr), "r"(result)
+                 : "memory");
+#else
+    asm volatile("st.release.gpu.b32 [%0], %1;" ::"l"(ptr), "r"(result)
+                 : "memory");
+#endif
+}
+
+
+__device__ __forceinline__ int64 load_acquire(int64* ptr)
+{
+    int64 result;
+#if __CUDA_ARCH__ < 700
+    asm volatile("ld.volatile.b64 %0, [%1];"
+                 : "=l"(result)
+                 : "l"(ptr)
+                 : "memory");
+#else
+    asm volatile("ld.acquire.gpu.b64 %0, [%1];"
+                 : "=l"(result)
+                 : "l"(ptr)
+                 : "memory");
+#endif
+    membar_acq_rel();
+    return result;
+}
+
+
+__device__ __forceinline__ void store_release(int64* ptr, int64 result)
+{
+    membar_acq_rel();
+#if __CUDA_ARCH__ < 700
+    asm volatile("st.volatile.b64 [%0], %1;" ::"l"(ptr), "l"(result)
+                 : "memory");
+#else
+    asm volatile("st.release.gpu.b64 [%0], %1;" ::"l"(ptr), "l"(result)
+                 : "memory");
+#endif
+}
+
+
+__device__ __forceinline__ float load_acquire(float* ptr)
+{
+    float result;
+#if __CUDA_ARCH__ < 700
+    asm volatile("ld.volatile.f32 %0, [%1];"
+                 : "=f"(result)
+                 : "l"(ptr)
+                 : "memory");
+#else
+    asm volatile("ld.acquire.gpu.f32 %0, [%1];"
+                 : "=f"(result)
+                 : "l"(ptr)
+                 : "memory");
+#endif
+    membar_acq_rel();
+    return result;
+}
+
+
+__device__ __forceinline__ void store_release(float* ptr, float result)
+{
+    membar_acq_rel();
+#if __CUDA_ARCH__ < 700
+    asm volatile("st.volatile.f32 [%0], %1;" ::"l"(ptr), "f"(result)
+                 : "memory");
+#else
+    asm volatile("st.release.gpu.f32 [%0], %1;" ::"l"(ptr), "f"(result)
+                 : "memory");
+#endif
+}
+
+
+__device__ __forceinline__ double load_acquire(double* ptr)
+{
+    double result;
+#if __CUDA_ARCH__ < 700
+    asm volatile("ld.volatile.f64 %0, [%1];"
+                 : "=d"(result)
+                 : "l"(ptr)
+                 : "memory");
+#else
+    asm volatile("ld.acquire.gpu.f64 %0, [%1];"
+                 : "=d"(result)
+                 : "l"(ptr)
+                 : "memory");
+#endif
+    membar_acq_rel();
+    return result;
+}
+
+
+__device__ __forceinline__ void store_release(double* ptr, double result)
+{
+    membar_acq_rel();
+#if __CUDA_ARCH__ < 700
+    asm volatile("st.volatile.f64 [%0], %1;" ::"l"(ptr), "d"(result)
+                 : "memory");
+#else
+    asm volatile("st.release.gpu.f64 [%0], %1;" ::"l"(ptr), "d"(result)
+                 : "memory");
+#endif
+}
+
+
+__device__ __forceinline__ thrust::complex<float> load_relaxed_shared(
+    thrust::complex<float>* ptr)
+{
+    float real_result;
+    float imag_result;
+#if __CUDA_ARCH__ < 700
+    asm volatile("ld.volatile.shared.v2.f32 {%0, %1}, [%2];"
+                 : "=f"(real_result), "=f"(imag_result)
+                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "memory");
+#else
+    asm volatile("ld.relaxed.cta.shared.v2.f32 {%0, %1}, [%2];"
+                 : "=f"(real_result), "=f"(imag_result)
+                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "memory");
+#endif
+    return thrust::complex<float>{real_result, imag_result};
+}
+
+
+__device__ __forceinline__ void store_relaxed_shared(
+    thrust::complex<float>* ptr, thrust::complex<float> result)
+{
+    auto real_result = result.real();
+    auto imag_result = result.imag();
+#if __CUDA_ARCH__ < 700
+    asm volatile("st.volatile.shared.v2.f32 [%0], {%1, %2};" ::"r"(
+                     convert_generic_ptr_to_smem_ptr(ptr)),
+                 "f"(real_result), "f"(imag_result)
+                 : "memory");
+#else
+    asm volatile("st.relaxed.cta.shared.v2.f32 [%0], {%1, %2};" ::"r"(
+                     convert_generic_ptr_to_smem_ptr(ptr)),
+                 "f"(real_result), "f"(imag_result)
+                 : "memory");
+#endif
+}
+
+
+__device__ __forceinline__ thrust::complex<double> load_relaxed_shared(
+    thrust::complex<double>* ptr)
+{
+    double real_result;
+    double imag_result;
+#if __CUDA_ARCH__ < 700
+    asm volatile("ld.volatile.shared.v2.f64 {%0, %1}, [%2];"
+                 : "=d"(real_result), "=d"(imag_result)
+                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "memory");
+#else
+    asm volatile("ld.relaxed.cta.shared.v2.f64 {%0, %1}, [%2];"
+                 : "=d"(real_result), "=d"(imag_result)
+                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "memory");
+#endif
+    return thrust::complex<double>{real_result, imag_result};
+}
+
+
+__device__ __forceinline__ void store_relaxed_shared(
+    thrust::complex<double>* ptr, thrust::complex<double> result)
+{
+    auto real_result = result.real();
+    auto imag_result = result.imag();
+#if __CUDA_ARCH__ < 700
+    asm volatile("st.volatile.shared.v2.f64 [%0], {%1, %2};" ::"r"(
+                     convert_generic_ptr_to_smem_ptr(ptr)),
+                 "d"(real_result), "d"(imag_result)
+                 : "memory");
+#else
+    asm volatile("st.relaxed.cta.shared.v2.f64 [%0], {%1, %2};" ::"r"(
+                     convert_generic_ptr_to_smem_ptr(ptr)),
+                 "d"(real_result), "d"(imag_result)
+                 : "memory");
+#endif
+}
+
+
+__device__ __forceinline__ thrust::complex<float> load_relaxed(
+    thrust::complex<float>* ptr)
+{
+    float real_result;
+    float imag_result;
+#if __CUDA_ARCH__ < 700
+    asm volatile("ld.volatile.v2.f32 {%0, %1}, [%2];"
+                 : "=f"(real_result), "=f"(imag_result)
+                 : "l"(ptr)
+                 : "memory");
+#else
+    asm volatile("ld.relaxed.gpu.v2.f32 {%0, %1}, [%2];"
+                 : "=f"(real_result), "=f"(imag_result)
+                 : "l"(ptr)
+                 : "memory");
+#endif
+    return thrust::complex<float>{real_result, imag_result};
+}
+
+
+__device__ __forceinline__ void store_relaxed(thrust::complex<float>* ptr,
+                                              thrust::complex<float> result)
+{
+    auto real_result = result.real();
+    auto imag_result = result.imag();
+#if __CUDA_ARCH__ < 700
+    asm volatile("st.volatile.v2.f32 [%0], {%1, %2};" ::"l"(ptr),
+                 "f"(real_result), "f"(imag_result)
+                 : "memory");
+#else
+    asm volatile("st.relaxed.gpu.v2.f32 [%0], {%1, %2};" ::"l"(ptr),
+                 "f"(real_result), "f"(imag_result)
+                 : "memory");
+#endif
+}
+
+
+__device__ __forceinline__ thrust::complex<double> load_relaxed(
+    thrust::complex<double>* ptr)
+{
+    double real_result;
+    double imag_result;
+#if __CUDA_ARCH__ < 700
+    asm volatile("ld.volatile.v2.f64 {%0, %1}, [%2];"
+                 : "=d"(real_result), "=d"(imag_result)
+                 : "l"(ptr)
+                 : "memory");
+#else
+    asm volatile("ld.relaxed.gpu.v2.f64 {%0, %1}, [%2];"
+                 : "=d"(real_result), "=d"(imag_result)
+                 : "l"(ptr)
+                 : "memory");
+#endif
+    return thrust::complex<double>{real_result, imag_result};
+}
+
+
+__device__ __forceinline__ void store_relaxed(thrust::complex<double>* ptr,
+                                              thrust::complex<double> result)
+{
+    auto real_result = result.real();
+    auto imag_result = result.imag();
+#if __CUDA_ARCH__ < 700
+    asm volatile("st.volatile.v2.f64 [%0], {%1, %2};" ::"l"(ptr),
+                 "d"(real_result), "d"(imag_result)
+                 : "memory");
+#else
+    asm volatile("st.relaxed.gpu.v2.f64 [%0], {%1, %2};" ::"l"(ptr),
+                 "d"(real_result), "d"(imag_result)
+                 : "memory");
+#endif
+}
+
+
+}  // namespace cuda
+}  // namespace kernels
+}  // namespace gko
+
+#endif  // GKO_CUDA_COMPONENTS_MEMORY_CUH_
diff --git a/cuda/components/syncfree.cuh b/cuda/components/syncfree.cuh
index 625f1bd8359..d00064b06b7 100644
--- a/cuda/components/syncfree.cuh
+++ b/cuda/components/syncfree.cuh
@@ -41,7 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "cuda/base/config.hpp"
 #include "cuda/components/atomic.cuh"
 #include "cuda/components/cooperative_groups.cuh"
-#include "cuda/components/volatile.cuh"
+#include "cuda/components/memory.cuh"
 
 
 namespace gko {
diff --git a/cuda/components/volatile.cuh b/cuda/components/volatile.cuh
deleted file mode 100644
index 96cb869c57e..00000000000
--- a/cuda/components/volatile.cuh
+++ /dev/null
@@ -1,58 +0,0 @@
-/*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2023, the Ginkgo authors
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
-1. Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
-notice, this list of conditions and the following disclaimer in the
-documentation and/or other materials provided with the distribution.
-
-3. Neither the name of the copyright holder nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-******************************<GINKGO LICENSE>*******************************/
-
-#ifndef GKO_CUDA_COMPONENTS_VOLATILE_CUH_
-#define GKO_CUDA_COMPONENTS_VOLATILE_CUH_
-
-
-#include <type_traits>
-
-
-#include <ginkgo/core/base/math.hpp>
-
-
-#include "cuda/base/types.hpp"
-
-
-namespace gko {
-namespace kernels {
-namespace cuda {
-
-
-#include "common/cuda_hip/components/volatile.hpp.inc"
-
-
-}  // namespace cuda
-}  // namespace kernels
-}  // namespace gko
-
-#endif  // GKO_CUDA_COMPONENTS_VOLATILE_CUH_
diff --git a/cuda/solver/common_trs_kernels.cuh b/cuda/solver/common_trs_kernels.cuh
index 6ee2c7521ff..546b366c6a2 100644
--- a/cuda/solver/common_trs_kernels.cuh
+++ b/cuda/solver/common_trs_kernels.cuh
@@ -55,9 +55,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "cuda/base/pointer_mode_guard.hpp"
 #include "cuda/base/types.hpp"
 #include "cuda/components/atomic.cuh"
+#include "cuda/components/memory.cuh"
 #include "cuda/components/thread_ids.cuh"
 #include "cuda/components/uninitialized_array.hpp"
-#include "cuda/components/volatile.cuh"
 
 
 namespace gko {
@@ -426,30 +426,31 @@ __global__ void sptrsv_naive_caching_kernel(
                                              : dependency * nrhs + rhs;
         const bool shmem_possible =
             (dependency_gid / default_block_size) == self_shmem_id;
+        ValueType val{};
         if (shmem_possible) {
             const auto dependency_shid = dependency_gid % default_block_size;
-            x_p = &x_s[dependency_shid];
-        }
-
-        ValueType x = *x_p;
-        while (is_nan(x)) {
-            x = load(x_p, 0);
+            while (is_nan(val = load_relaxed_shared(x_s + dependency_shid))) {
+            }
+        } else {
+            while (
+                is_nan(val = load_relaxed(x + dependency * x_stride + rhs))) {
+            }
         }
 
-        sum += x * vals[i];
+        sum += val * vals[i];
     }
 
     // The first entry past the triangular part will be the diagonal
     const auto diag = unit_diag ? one<ValueType>() : vals[i];
     const auto r = (b[row * b_stride + rhs] - sum) / diag;
 
-    store(x_s, self_shid, r);
-    x[row * x_stride + rhs] = r;
+    store_relaxed_shared(x_s + self_shid, r);
+    store_relaxed(x + row * x_stride + rhs, r);
 
     // This check to ensure no infinite loops happen.
     if (is_nan(r)) {
-        store(x_s, self_shid, zero<ValueType>());
-        x[row * x_stride + rhs] = zero<ValueType>();
+        store_relaxed(x_s + self_shid, zero<ValueType>());
+        store_relaxed(x + row * x_stride + rhs, zero<ValueType>());
         *nan_produced = true;
     }
 }
@@ -488,12 +489,12 @@ __global__ void sptrsv_naive_legacy_kernel(
     auto j = row_begin;
     auto col = colidxs[j];
     while (j != row_end) {
-        auto x_val = load(x, col * x_stride + rhs);
+        auto x_val = load_relaxed(x + col * x_stride + rhs);
         while (!is_nan(x_val)) {
             sum += vals[j] * x_val;
             j += row_step;
             col = colidxs[j];
-            x_val = load(x, col * x_stride + rhs);
+            x_val = load_relaxed(x + col * x_stride + rhs);
         }
         // to avoid the kernel hanging on matrices without diagonal,
         // we bail out if we are past the triangle, even if it's not
@@ -503,12 +504,12 @@ __global__ void sptrsv_naive_legacy_kernel(
             // assert(row == col);
             auto diag = unit_diag ? one<ValueType>() : vals[j];
             const auto r = (b[row * b_stride + rhs] - sum) / diag;
-            store(x, row * x_stride + rhs, r);
+            store_relaxed(x + row * x_stride + rhs, r);
             // after we encountered the diagonal, we are done
             // this also skips entries outside the triangle
             j = row_end;
             if (is_nan(r)) {
-                store(x, row * x_stride + rhs, zero<ValueType>());
+                store_relaxed(x + row * x_stride + rhs, zero<ValueType>());
                 *nan_produced = true;
             }
         }
diff --git a/dev_tools/scripts/generate_cuda_memory_ptx.py b/dev_tools/scripts/generate_cuda_memory_ptx.py
new file mode 100755
index 00000000000..a03cb47f4e7
--- /dev/null
+++ b/dev_tools/scripts/generate_cuda_memory_ptx.py
@@ -0,0 +1,192 @@
+#!/usr/bin/env python3
+import os
+memory_spaces = [(".shared", ".cta", "_shared", "convert_generic_ptr_to_smem_ptr(ptr)", "r"), ("", ".gpu", "", "ptr", "l")]
+memory_orderings = [
+    (".relaxed", "_relaxed", ".relaxed", "_relaxed", True),
+    (".acquire", "_acquire", ".release", "_release", False)
+    ]
+sizes=[(".b32", "r", "int32", 4), (".b64", "l", "int64", 8), (".f32", "f", "float", 4), (".f64", "d", "double", 8)]
+# header
+print("""/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CUDA_COMPONENTS_MEMORY_CUH_
+#define GKO_CUDA_COMPONENTS_MEMORY_CUH_
+
+
+#include <type_traits>
+
+
+#include <ginkgo/core/base/math.hpp>
+
+
+#include "cuda/base/types.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace cuda {
+
+
+__device__ __forceinline__ uint32 convert_generic_ptr_to_smem_ptr(void* ptr)
+{
+// see
+// https://github.com/NVIDIA/cutlass/blob/
+//     6fc5008803fe4e81b81a836fcd3a88258f4e5bbf/
+//     include/cutlass/arch/memory_sm75.h#L90
+// for reasoning behind this implementation
+#if (!defined(__clang__) && __CUDACC_VER_MAJOR__ >= 11)
+    return static_cast<uint32>(__cvta_generic_to_shared(ptr));
+#elif (!defined(__clang__) && CUDACC_VER_MAJOR__ == 10 && \
+       __CUDACC_VER_MINOR__ >= 2)
+    return __nvvm_get_smem_pointer(ptr);
+#else
+    uint32 smem_ptr;
+    asm("{{ .reg .u64 smem_ptr; cvta.to.shared.u64 smem_ptr, %1; cvt.u32.u64 "
+        "%0, smem_ptr; }}"
+        : "=r"(smem_ptr)
+        : "l"(ptr));
+    return smem_ptr;
+#endif
+}
+
+
+__device__ __forceinline__ uint32 membar_acq_rel()
+{
+#if __CUDA_ARCH__ < 700
+    asm volatile("membar.gl;" ::: "memory");
+#else
+    asm volatile("fence.acq_rel.gpu;" ::: "memory");
+#endif
+}
+
+
+__device__ __forceinline__ uint32 membar_acq_rel_shared()
+{
+#if __CUDA_ARCH__ < 700
+    asm volatile("membar.cta;" ::: "memory");
+#else
+    asm volatile("fence.acq_rel.cta;" ::: "memory");
+#endif
+}
+
+
+#include "common/cuda_hip/components/memory.hpp.inc"
+""")
+
+# relaxed
+for memory_space_suffix, scope_suffix, function_memory_space_suffix, ptr_name, ptr_constraint in memory_spaces:
+    for volta_load_ordering_suffix, load_function_ordering_suffix, volta_store_ordering_suffix, store_function_ordering_suffix, is_relaxed in memory_orderings:
+        for size_suffix, constraint, typename, size in sizes:
+            membar_expression = "" if is_relaxed else f"membar_acq_rel{function_memory_space_suffix}();"
+            print(f"""
+__device__ __forceinline__ {typename} load{load_function_ordering_suffix}{function_memory_space_suffix}({typename}* ptr)
+{{
+    {typename} result;
+#if __CUDA_ARCH__ < 700
+    asm volatile("ld.volatile{memory_space_suffix}{size_suffix} %0, [%1];"
+                 : "={constraint}"(result)
+                 : "{ptr_constraint}"({ptr_name})
+                 : "memory");
+#else
+    asm volatile("ld{volta_load_ordering_suffix}{scope_suffix}{memory_space_suffix}{size_suffix} %0, [%1];"
+                 : "={constraint}"(result)
+                 : "{ptr_constraint}"({ptr_name})
+                 : "memory");
+#endif
+    {membar_expression}
+    return result;
+}}
+
+
+__device__ __forceinline__ void store{store_function_ordering_suffix}{function_memory_space_suffix}({typename}* ptr, {typename} result)
+{{
+    {membar_expression}
+#if __CUDA_ARCH__ < 700
+    asm volatile("st.volatile{memory_space_suffix}{size_suffix} [%0], %1;"
+                 :: "{ptr_constraint}"({ptr_name}), "{constraint}"(result)
+                 : "memory");
+#else
+    asm volatile("st{volta_store_ordering_suffix}{scope_suffix}{memory_space_suffix}{size_suffix} [%0], %1;"
+                 :: "{ptr_constraint}"({ptr_name}), "{constraint}"(result)
+                 : "memory");
+#endif
+}}
+""")
+
+# vectorized relaxed loads for thrust::complex
+sizes=[(".f32", "f", "float", 4), (".f64", "d", "double", 8)]
+for memory_space_suffix, scope_suffix, function_memory_space_suffix, ptr_name, ptr_constraint in memory_spaces:
+    for size_suffix, constraint, typename, size in sizes:
+        print(f"""
+__device__ __forceinline__ thrust::complex<{typename}> load_relaxed{function_memory_space_suffix}(thrust::complex<{typename}>* ptr)
+{{
+    {typename} real_result;
+    {typename} imag_result;
+#if __CUDA_ARCH__ < 700
+    asm volatile("ld.volatile{memory_space_suffix}.v2{size_suffix} {{%0, %1}}, [%2];"
+                 : "={constraint}"(real_result), "={constraint}"(imag_result)
+                 : "{ptr_constraint}"({ptr_name})
+                 : "memory");
+#else
+    asm volatile("ld.relaxed{scope_suffix}{memory_space_suffix}.v2{size_suffix} {{%0, %1}}, [%2];"
+                 : "={constraint}"(real_result), "={constraint}"(imag_result)
+                 : "{ptr_constraint}"({ptr_name})
+                 : "memory");
+#endif
+    return thrust::complex<{typename}>{{real_result, imag_result}};
+}}
+
+
+__device__ __forceinline__ void store_relaxed{function_memory_space_suffix}(thrust::complex<{typename}>* ptr, thrust::complex<{typename}> result)
+{{
+    auto real_result = result.real();
+    auto imag_result = result.imag();
+#if __CUDA_ARCH__ < 700
+    asm volatile("st.volatile{memory_space_suffix}.v2{size_suffix} [%0], {{%1, %2}};"
+                 :: "{ptr_constraint}"({ptr_name}), "{constraint}"(real_result), "{constraint}"(imag_result)
+                 : "memory");
+#else
+    asm volatile("st.relaxed{scope_suffix}{memory_space_suffix}.v2{size_suffix} [%0], {{%1, %2}};"
+                 :: "{ptr_constraint}"({ptr_name}), "{constraint}"(real_result), "{constraint}"(imag_result)
+                 : "memory");
+#endif
+}}
+""")
+
+print("""
+}  // namespace cuda
+}  // namespace kernels
+}  // namespace gko
+
+#endif  // GKO_CUDA_COMPONENTS_MEMORY_CUH_
+""")
\ No newline at end of file
diff --git a/hip/components/volatile.hip.hpp b/hip/components/memory.hip.hpp
similarity index 55%
rename from hip/components/volatile.hip.hpp
rename to hip/components/memory.hip.hpp
index de0202d8391..b424c8bbc06 100644
--- a/hip/components/volatile.hip.hpp
+++ b/hip/components/memory.hip.hpp
@@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#ifndef GKO_HIP_COMPONENTS_VOLATILE_HIP_HPP_
-#define GKO_HIP_COMPONENTS_VOLATILE_HIP_HPP_
+#ifndef GKO_HIP_COMPONENTS_MEMORY_HIP_HPP_
+#define GKO_HIP_COMPONENTS_MEMORY_HIP_HPP_
 
 
 #include <type_traits>
@@ -48,11 +48,73 @@ namespace kernels {
 namespace hip {
 
 
-#include "common/cuda_hip/components/volatile.hpp.inc"
+#include "common/cuda_hip/components/memory.hpp.inc"
+
+
+template <typename ValueType>
+__device__ __forceinline__ ValueType load_relaxed(ValueType* ptr)
+{
+    return load(ptr, 0);
+}
+
+
+template <typename ValueType>
+__device__ __forceinline__ ValueType load_acquire(ValueType* ptr)
+{
+    auto result = load(ptr, 0);
+    __threadfence();
+    return result;
+}
+
+template <typename ValueType>
+__device__ __forceinline__ void store_relaxed(ValueType* ptr, ValueType value)
+{
+    store(ptr, 0, value);
+}
+
+
+template <typename ValueType>
+__device__ __forceinline__ void store_release(ValueType* ptr, ValueType value)
+{
+    __threadfence();
+    store(ptr, 0, value);
+}
+
+
+template <typename ValueType>
+__device__ __forceinline__ ValueType load_relaxed_shared(ValueType* ptr)
+{
+    return load(ptr, 0);
+}
+
+
+template <typename ValueType>
+__device__ __forceinline__ ValueType load_acquire_shared(ValueType* ptr)
+{
+    auto result = load(ptr, 0);
+    __threadfence();
+    return result;
+}
+
+template <typename ValueType>
+__device__ __forceinline__ void store_relaxed_shared(ValueType* ptr,
+                                                     ValueType value)
+{
+    store(ptr, 0, value);
+}
+
+
+template <typename ValueType>
+__device__ __forceinline__ void store_release_shared(ValueType* ptr,
+                                                     ValueType value)
+{
+    __threadfence();
+    store(ptr, 0, value);
+}
 
 
 }  // namespace hip
 }  // namespace kernels
 }  // namespace gko
 
-#endif  // GKO_HIP_COMPONENTS_VOLATILE_HIP_HPP_
+#endif  // GKO_HIP_COMPONENTS_MEMORY_HIP_HPP_
diff --git a/hip/components/syncfree.hip.hpp b/hip/components/syncfree.hip.hpp
index 232ff059585..528a9200d08 100644
--- a/hip/components/syncfree.hip.hpp
+++ b/hip/components/syncfree.hip.hpp
@@ -41,7 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "hip/base/config.hip.hpp"
 #include "hip/components/atomic.hip.hpp"
 #include "hip/components/cooperative_groups.hip.hpp"
-#include "hip/components/volatile.hip.hpp"
+#include "hip/components/memory.hip.hpp"
 
 
 namespace gko {

From 533ba1c3f1196d9c37f466e28038821d15b44b03 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Fri, 22 Sep 2023 22:36:29 +0200
Subject: [PATCH 320/583] review updates
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- const-correctness
- add doc to generic-to-shared ptr conversion
- improve generation script readability

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
Co-authored-by: Thomas Grützmacher <thomas.gruetzmacher@kit.edu>
---
 cuda/components/memory.cuh                    | 207 +++++++++---------
 dev_tools/scripts/generate_cuda_memory_ptx.py | 127 +++++++----
 hip/components/memory.hip.hpp                 |   8 +-
 3 files changed, 196 insertions(+), 146 deletions(-)

diff --git a/cuda/components/memory.cuh b/cuda/components/memory.cuh
index 578f7c8309f..15f2541bddf 100644
--- a/cuda/components/memory.cuh
+++ b/cuda/components/memory.cuh
@@ -48,6 +48,13 @@ namespace kernels {
 namespace cuda {
 
 
+/**
+ * Transforms a generic CUDA pointer pointing to shared memory to a
+ * shared memory pointer for use in PTX assembly.
+ * CUDA PTX assembly uses 32bit pointers for shared memory addressing.
+ * The result is undefined for a generic pointer pointing to anything but
+ * shared memory.
+ */
 __device__ __forceinline__ uint32 convert_generic_ptr_to_smem_ptr(void* ptr)
 {
 // see
@@ -94,18 +101,18 @@ __device__ __forceinline__ uint32 membar_acq_rel_shared()
 #include "common/cuda_hip/components/memory.hpp.inc"
 
 
-__device__ __forceinline__ int32 load_relaxed_shared(int32* ptr)
+__device__ __forceinline__ int32 load_relaxed_shared(const int32* ptr)
 {
     int32 result;
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.shared.b32 %0, [%1];"
                  : "=r"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
                  : "memory");
 #else
     asm volatile("ld.relaxed.cta.shared.b32 %0, [%1];"
                  : "=r"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
                  : "memory");
 #endif
 
@@ -117,30 +124,30 @@ __device__ __forceinline__ void store_relaxed_shared(int32* ptr, int32 result)
 {
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.b32 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(ptr)),
+                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
                  "r"(result)
                  : "memory");
 #else
     asm volatile("st.relaxed.cta.shared.b32 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(ptr)),
+                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
                  "r"(result)
                  : "memory");
 #endif
 }
 
 
-__device__ __forceinline__ int64 load_relaxed_shared(int64* ptr)
+__device__ __forceinline__ int64 load_relaxed_shared(const int64* ptr)
 {
     int64 result;
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.shared.b64 %0, [%1];"
                  : "=l"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
                  : "memory");
 #else
     asm volatile("ld.relaxed.cta.shared.b64 %0, [%1];"
                  : "=l"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
                  : "memory");
 #endif
 
@@ -152,30 +159,30 @@ __device__ __forceinline__ void store_relaxed_shared(int64* ptr, int64 result)
 {
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.b64 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(ptr)),
+                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
                  "l"(result)
                  : "memory");
 #else
     asm volatile("st.relaxed.cta.shared.b64 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(ptr)),
+                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
                  "l"(result)
                  : "memory");
 #endif
 }
 
 
-__device__ __forceinline__ float load_relaxed_shared(float* ptr)
+__device__ __forceinline__ float load_relaxed_shared(const float* ptr)
 {
     float result;
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.shared.f32 %0, [%1];"
                  : "=f"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
                  : "memory");
 #else
     asm volatile("ld.relaxed.cta.shared.f32 %0, [%1];"
                  : "=f"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
                  : "memory");
 #endif
 
@@ -187,30 +194,30 @@ __device__ __forceinline__ void store_relaxed_shared(float* ptr, float result)
 {
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.f32 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(ptr)),
+                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
                  "f"(result)
                  : "memory");
 #else
     asm volatile("st.relaxed.cta.shared.f32 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(ptr)),
+                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
                  "f"(result)
                  : "memory");
 #endif
 }
 
 
-__device__ __forceinline__ double load_relaxed_shared(double* ptr)
+__device__ __forceinline__ double load_relaxed_shared(const double* ptr)
 {
     double result;
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.shared.f64 %0, [%1];"
                  : "=d"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
                  : "memory");
 #else
     asm volatile("ld.relaxed.cta.shared.f64 %0, [%1];"
                  : "=d"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
                  : "memory");
 #endif
 
@@ -222,30 +229,30 @@ __device__ __forceinline__ void store_relaxed_shared(double* ptr, double result)
 {
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.f64 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(ptr)),
+                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
                  "d"(result)
                  : "memory");
 #else
     asm volatile("st.relaxed.cta.shared.f64 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(ptr)),
+                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
                  "d"(result)
                  : "memory");
 #endif
 }
 
 
-__device__ __forceinline__ int32 load_acquire_shared(int32* ptr)
+__device__ __forceinline__ int32 load_acquire_shared(const int32* ptr)
 {
     int32 result;
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.shared.b32 %0, [%1];"
                  : "=r"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
                  : "memory");
 #else
     asm volatile("ld.acquire.cta.shared.b32 %0, [%1];"
                  : "=r"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
                  : "memory");
 #endif
     membar_acq_rel_shared();
@@ -258,30 +265,30 @@ __device__ __forceinline__ void store_release_shared(int32* ptr, int32 result)
     membar_acq_rel_shared();
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.b32 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(ptr)),
+                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
                  "r"(result)
                  : "memory");
 #else
     asm volatile("st.release.cta.shared.b32 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(ptr)),
+                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
                  "r"(result)
                  : "memory");
 #endif
 }
 
 
-__device__ __forceinline__ int64 load_acquire_shared(int64* ptr)
+__device__ __forceinline__ int64 load_acquire_shared(const int64* ptr)
 {
     int64 result;
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.shared.b64 %0, [%1];"
                  : "=l"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
                  : "memory");
 #else
     asm volatile("ld.acquire.cta.shared.b64 %0, [%1];"
                  : "=l"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
                  : "memory");
 #endif
     membar_acq_rel_shared();
@@ -294,30 +301,30 @@ __device__ __forceinline__ void store_release_shared(int64* ptr, int64 result)
     membar_acq_rel_shared();
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.b64 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(ptr)),
+                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
                  "l"(result)
                  : "memory");
 #else
     asm volatile("st.release.cta.shared.b64 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(ptr)),
+                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
                  "l"(result)
                  : "memory");
 #endif
 }
 
 
-__device__ __forceinline__ float load_acquire_shared(float* ptr)
+__device__ __forceinline__ float load_acquire_shared(const float* ptr)
 {
     float result;
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.shared.f32 %0, [%1];"
                  : "=f"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
                  : "memory");
 #else
     asm volatile("ld.acquire.cta.shared.f32 %0, [%1];"
                  : "=f"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
                  : "memory");
 #endif
     membar_acq_rel_shared();
@@ -330,30 +337,30 @@ __device__ __forceinline__ void store_release_shared(float* ptr, float result)
     membar_acq_rel_shared();
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.f32 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(ptr)),
+                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
                  "f"(result)
                  : "memory");
 #else
     asm volatile("st.release.cta.shared.f32 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(ptr)),
+                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
                  "f"(result)
                  : "memory");
 #endif
 }
 
 
-__device__ __forceinline__ double load_acquire_shared(double* ptr)
+__device__ __forceinline__ double load_acquire_shared(const double* ptr)
 {
     double result;
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.shared.f64 %0, [%1];"
                  : "=d"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
                  : "memory");
 #else
     asm volatile("ld.acquire.cta.shared.f64 %0, [%1];"
                  : "=d"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
                  : "memory");
 #endif
     membar_acq_rel_shared();
@@ -366,30 +373,30 @@ __device__ __forceinline__ void store_release_shared(double* ptr, double result)
     membar_acq_rel_shared();
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.f64 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(ptr)),
+                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
                  "d"(result)
                  : "memory");
 #else
     asm volatile("st.release.cta.shared.f64 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(ptr)),
+                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
                  "d"(result)
                  : "memory");
 #endif
 }
 
 
-__device__ __forceinline__ int32 load_relaxed(int32* ptr)
+__device__ __forceinline__ int32 load_relaxed(const int32* ptr)
 {
     int32 result;
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.b32 %0, [%1];"
                  : "=r"(result)
-                 : "l"(ptr)
+                 : "l"((void*)ptr)
                  : "memory");
 #else
     asm volatile("ld.relaxed.gpu.b32 %0, [%1];"
                  : "=r"(result)
-                 : "l"(ptr)
+                 : "l"((void*)ptr)
                  : "memory");
 #endif
 
@@ -400,27 +407,27 @@ __device__ __forceinline__ int32 load_relaxed(int32* ptr)
 __device__ __forceinline__ void store_relaxed(int32* ptr, int32 result)
 {
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.b32 [%0], %1;" ::"l"(ptr), "r"(result)
+    asm volatile("st.volatile.b32 [%0], %1;" ::"l"((void*)ptr), "r"(result)
                  : "memory");
 #else
-    asm volatile("st.relaxed.gpu.b32 [%0], %1;" ::"l"(ptr), "r"(result)
+    asm volatile("st.relaxed.gpu.b32 [%0], %1;" ::"l"((void*)ptr), "r"(result)
                  : "memory");
 #endif
 }
 
 
-__device__ __forceinline__ int64 load_relaxed(int64* ptr)
+__device__ __forceinline__ int64 load_relaxed(const int64* ptr)
 {
     int64 result;
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.b64 %0, [%1];"
                  : "=l"(result)
-                 : "l"(ptr)
+                 : "l"((void*)ptr)
                  : "memory");
 #else
     asm volatile("ld.relaxed.gpu.b64 %0, [%1];"
                  : "=l"(result)
-                 : "l"(ptr)
+                 : "l"((void*)ptr)
                  : "memory");
 #endif
 
@@ -431,27 +438,27 @@ __device__ __forceinline__ int64 load_relaxed(int64* ptr)
 __device__ __forceinline__ void store_relaxed(int64* ptr, int64 result)
 {
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.b64 [%0], %1;" ::"l"(ptr), "l"(result)
+    asm volatile("st.volatile.b64 [%0], %1;" ::"l"((void*)ptr), "l"(result)
                  : "memory");
 #else
-    asm volatile("st.relaxed.gpu.b64 [%0], %1;" ::"l"(ptr), "l"(result)
+    asm volatile("st.relaxed.gpu.b64 [%0], %1;" ::"l"((void*)ptr), "l"(result)
                  : "memory");
 #endif
 }
 
 
-__device__ __forceinline__ float load_relaxed(float* ptr)
+__device__ __forceinline__ float load_relaxed(const float* ptr)
 {
     float result;
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.f32 %0, [%1];"
                  : "=f"(result)
-                 : "l"(ptr)
+                 : "l"((void*)ptr)
                  : "memory");
 #else
     asm volatile("ld.relaxed.gpu.f32 %0, [%1];"
                  : "=f"(result)
-                 : "l"(ptr)
+                 : "l"((void*)ptr)
                  : "memory");
 #endif
 
@@ -462,27 +469,27 @@ __device__ __forceinline__ float load_relaxed(float* ptr)
 __device__ __forceinline__ void store_relaxed(float* ptr, float result)
 {
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.f32 [%0], %1;" ::"l"(ptr), "f"(result)
+    asm volatile("st.volatile.f32 [%0], %1;" ::"l"((void*)ptr), "f"(result)
                  : "memory");
 #else
-    asm volatile("st.relaxed.gpu.f32 [%0], %1;" ::"l"(ptr), "f"(result)
+    asm volatile("st.relaxed.gpu.f32 [%0], %1;" ::"l"((void*)ptr), "f"(result)
                  : "memory");
 #endif
 }
 
 
-__device__ __forceinline__ double load_relaxed(double* ptr)
+__device__ __forceinline__ double load_relaxed(const double* ptr)
 {
     double result;
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.f64 %0, [%1];"
                  : "=d"(result)
-                 : "l"(ptr)
+                 : "l"((void*)ptr)
                  : "memory");
 #else
     asm volatile("ld.relaxed.gpu.f64 %0, [%1];"
                  : "=d"(result)
-                 : "l"(ptr)
+                 : "l"((void*)ptr)
                  : "memory");
 #endif
 
@@ -493,27 +500,27 @@ __device__ __forceinline__ double load_relaxed(double* ptr)
 __device__ __forceinline__ void store_relaxed(double* ptr, double result)
 {
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.f64 [%0], %1;" ::"l"(ptr), "d"(result)
+    asm volatile("st.volatile.f64 [%0], %1;" ::"l"((void*)ptr), "d"(result)
                  : "memory");
 #else
-    asm volatile("st.relaxed.gpu.f64 [%0], %1;" ::"l"(ptr), "d"(result)
+    asm volatile("st.relaxed.gpu.f64 [%0], %1;" ::"l"((void*)ptr), "d"(result)
                  : "memory");
 #endif
 }
 
 
-__device__ __forceinline__ int32 load_acquire(int32* ptr)
+__device__ __forceinline__ int32 load_acquire(const int32* ptr)
 {
     int32 result;
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.b32 %0, [%1];"
                  : "=r"(result)
-                 : "l"(ptr)
+                 : "l"((void*)ptr)
                  : "memory");
 #else
     asm volatile("ld.acquire.gpu.b32 %0, [%1];"
                  : "=r"(result)
-                 : "l"(ptr)
+                 : "l"((void*)ptr)
                  : "memory");
 #endif
     membar_acq_rel();
@@ -525,27 +532,27 @@ __device__ __forceinline__ void store_release(int32* ptr, int32 result)
 {
     membar_acq_rel();
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.b32 [%0], %1;" ::"l"(ptr), "r"(result)
+    asm volatile("st.volatile.b32 [%0], %1;" ::"l"((void*)ptr), "r"(result)
                  : "memory");
 #else
-    asm volatile("st.release.gpu.b32 [%0], %1;" ::"l"(ptr), "r"(result)
+    asm volatile("st.release.gpu.b32 [%0], %1;" ::"l"((void*)ptr), "r"(result)
                  : "memory");
 #endif
 }
 
 
-__device__ __forceinline__ int64 load_acquire(int64* ptr)
+__device__ __forceinline__ int64 load_acquire(const int64* ptr)
 {
     int64 result;
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.b64 %0, [%1];"
                  : "=l"(result)
-                 : "l"(ptr)
+                 : "l"((void*)ptr)
                  : "memory");
 #else
     asm volatile("ld.acquire.gpu.b64 %0, [%1];"
                  : "=l"(result)
-                 : "l"(ptr)
+                 : "l"((void*)ptr)
                  : "memory");
 #endif
     membar_acq_rel();
@@ -557,27 +564,27 @@ __device__ __forceinline__ void store_release(int64* ptr, int64 result)
 {
     membar_acq_rel();
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.b64 [%0], %1;" ::"l"(ptr), "l"(result)
+    asm volatile("st.volatile.b64 [%0], %1;" ::"l"((void*)ptr), "l"(result)
                  : "memory");
 #else
-    asm volatile("st.release.gpu.b64 [%0], %1;" ::"l"(ptr), "l"(result)
+    asm volatile("st.release.gpu.b64 [%0], %1;" ::"l"((void*)ptr), "l"(result)
                  : "memory");
 #endif
 }
 
 
-__device__ __forceinline__ float load_acquire(float* ptr)
+__device__ __forceinline__ float load_acquire(const float* ptr)
 {
     float result;
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.f32 %0, [%1];"
                  : "=f"(result)
-                 : "l"(ptr)
+                 : "l"((void*)ptr)
                  : "memory");
 #else
     asm volatile("ld.acquire.gpu.f32 %0, [%1];"
                  : "=f"(result)
-                 : "l"(ptr)
+                 : "l"((void*)ptr)
                  : "memory");
 #endif
     membar_acq_rel();
@@ -589,27 +596,27 @@ __device__ __forceinline__ void store_release(float* ptr, float result)
 {
     membar_acq_rel();
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.f32 [%0], %1;" ::"l"(ptr), "f"(result)
+    asm volatile("st.volatile.f32 [%0], %1;" ::"l"((void*)ptr), "f"(result)
                  : "memory");
 #else
-    asm volatile("st.release.gpu.f32 [%0], %1;" ::"l"(ptr), "f"(result)
+    asm volatile("st.release.gpu.f32 [%0], %1;" ::"l"((void*)ptr), "f"(result)
                  : "memory");
 #endif
 }
 
 
-__device__ __forceinline__ double load_acquire(double* ptr)
+__device__ __forceinline__ double load_acquire(const double* ptr)
 {
     double result;
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.f64 %0, [%1];"
                  : "=d"(result)
-                 : "l"(ptr)
+                 : "l"((void*)ptr)
                  : "memory");
 #else
     asm volatile("ld.acquire.gpu.f64 %0, [%1];"
                  : "=d"(result)
-                 : "l"(ptr)
+                 : "l"((void*)ptr)
                  : "memory");
 #endif
     membar_acq_rel();
@@ -621,29 +628,29 @@ __device__ __forceinline__ void store_release(double* ptr, double result)
 {
     membar_acq_rel();
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.f64 [%0], %1;" ::"l"(ptr), "d"(result)
+    asm volatile("st.volatile.f64 [%0], %1;" ::"l"((void*)ptr), "d"(result)
                  : "memory");
 #else
-    asm volatile("st.release.gpu.f64 [%0], %1;" ::"l"(ptr), "d"(result)
+    asm volatile("st.release.gpu.f64 [%0], %1;" ::"l"((void*)ptr), "d"(result)
                  : "memory");
 #endif
 }
 
 
 __device__ __forceinline__ thrust::complex<float> load_relaxed_shared(
-    thrust::complex<float>* ptr)
+    const thrust::complex<float>* ptr)
 {
     float real_result;
     float imag_result;
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.shared.v2.f32 {%0, %1}, [%2];"
                  : "=f"(real_result), "=f"(imag_result)
-                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
                  : "memory");
 #else
     asm volatile("ld.relaxed.cta.shared.v2.f32 {%0, %1}, [%2];"
                  : "=f"(real_result), "=f"(imag_result)
-                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
                  : "memory");
 #endif
     return thrust::complex<float>{real_result, imag_result};
@@ -657,12 +664,12 @@ __device__ __forceinline__ void store_relaxed_shared(
     auto imag_result = result.imag();
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.v2.f32 [%0], {%1, %2};" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(ptr)),
+                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
                  "f"(real_result), "f"(imag_result)
                  : "memory");
 #else
     asm volatile("st.relaxed.cta.shared.v2.f32 [%0], {%1, %2};" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(ptr)),
+                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
                  "f"(real_result), "f"(imag_result)
                  : "memory");
 #endif
@@ -670,19 +677,19 @@ __device__ __forceinline__ void store_relaxed_shared(
 
 
 __device__ __forceinline__ thrust::complex<double> load_relaxed_shared(
-    thrust::complex<double>* ptr)
+    const thrust::complex<double>* ptr)
 {
     double real_result;
     double imag_result;
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.shared.v2.f64 {%0, %1}, [%2];"
                  : "=d"(real_result), "=d"(imag_result)
-                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
                  : "memory");
 #else
     asm volatile("ld.relaxed.cta.shared.v2.f64 {%0, %1}, [%2];"
                  : "=d"(real_result), "=d"(imag_result)
-                 : "r"(convert_generic_ptr_to_smem_ptr(ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
                  : "memory");
 #endif
     return thrust::complex<double>{real_result, imag_result};
@@ -696,12 +703,12 @@ __device__ __forceinline__ void store_relaxed_shared(
     auto imag_result = result.imag();
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.v2.f64 [%0], {%1, %2};" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(ptr)),
+                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
                  "d"(real_result), "d"(imag_result)
                  : "memory");
 #else
     asm volatile("st.relaxed.cta.shared.v2.f64 [%0], {%1, %2};" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(ptr)),
+                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
                  "d"(real_result), "d"(imag_result)
                  : "memory");
 #endif
@@ -709,19 +716,19 @@ __device__ __forceinline__ void store_relaxed_shared(
 
 
 __device__ __forceinline__ thrust::complex<float> load_relaxed(
-    thrust::complex<float>* ptr)
+    const thrust::complex<float>* ptr)
 {
     float real_result;
     float imag_result;
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.v2.f32 {%0, %1}, [%2];"
                  : "=f"(real_result), "=f"(imag_result)
-                 : "l"(ptr)
+                 : "l"((void*)ptr)
                  : "memory");
 #else
     asm volatile("ld.relaxed.gpu.v2.f32 {%0, %1}, [%2];"
                  : "=f"(real_result), "=f"(imag_result)
-                 : "l"(ptr)
+                 : "l"((void*)ptr)
                  : "memory");
 #endif
     return thrust::complex<float>{real_result, imag_result};
@@ -734,11 +741,11 @@ __device__ __forceinline__ void store_relaxed(thrust::complex<float>* ptr,
     auto real_result = result.real();
     auto imag_result = result.imag();
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.v2.f32 [%0], {%1, %2};" ::"l"(ptr),
+    asm volatile("st.volatile.v2.f32 [%0], {%1, %2};" ::"l"((void*)ptr),
                  "f"(real_result), "f"(imag_result)
                  : "memory");
 #else
-    asm volatile("st.relaxed.gpu.v2.f32 [%0], {%1, %2};" ::"l"(ptr),
+    asm volatile("st.relaxed.gpu.v2.f32 [%0], {%1, %2};" ::"l"((void*)ptr),
                  "f"(real_result), "f"(imag_result)
                  : "memory");
 #endif
@@ -746,19 +753,19 @@ __device__ __forceinline__ void store_relaxed(thrust::complex<float>* ptr,
 
 
 __device__ __forceinline__ thrust::complex<double> load_relaxed(
-    thrust::complex<double>* ptr)
+    const thrust::complex<double>* ptr)
 {
     double real_result;
     double imag_result;
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.v2.f64 {%0, %1}, [%2];"
                  : "=d"(real_result), "=d"(imag_result)
-                 : "l"(ptr)
+                 : "l"((void*)ptr)
                  : "memory");
 #else
     asm volatile("ld.relaxed.gpu.v2.f64 {%0, %1}, [%2];"
                  : "=d"(real_result), "=d"(imag_result)
-                 : "l"(ptr)
+                 : "l"((void*)ptr)
                  : "memory");
 #endif
     return thrust::complex<double>{real_result, imag_result};
@@ -771,11 +778,11 @@ __device__ __forceinline__ void store_relaxed(thrust::complex<double>* ptr,
     auto real_result = result.real();
     auto imag_result = result.imag();
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.v2.f64 [%0], {%1, %2};" ::"l"(ptr),
+    asm volatile("st.volatile.v2.f64 [%0], {%1, %2};" ::"l"((void*)ptr),
                  "d"(real_result), "d"(imag_result)
                  : "memory");
 #else
-    asm volatile("st.relaxed.gpu.v2.f64 [%0], {%1, %2};" ::"l"(ptr),
+    asm volatile("st.relaxed.gpu.v2.f64 [%0], {%1, %2};" ::"l"((void*)ptr),
                  "d"(real_result), "d"(imag_result)
                  : "memory");
 #endif
diff --git a/dev_tools/scripts/generate_cuda_memory_ptx.py b/dev_tools/scripts/generate_cuda_memory_ptx.py
index a03cb47f4e7..dd5d682a9b8 100755
--- a/dev_tools/scripts/generate_cuda_memory_ptx.py
+++ b/dev_tools/scripts/generate_cuda_memory_ptx.py
@@ -1,11 +1,46 @@
 #!/usr/bin/env python3
-import os
-memory_spaces = [(".shared", ".cta", "_shared", "convert_generic_ptr_to_smem_ptr(ptr)", "r"), ("", ".gpu", "", "ptr", "l")]
+import dataclasses
+
+
+@dataclasses.dataclass
+class space:
+    ptx_space_suffix: str
+    ptx_scope_suffix: str
+    fn_suffix: str
+    ptr_expr: str
+    ptr_constraint: str
+
+
+@dataclasses.dataclass
+class ordering:
+    ptx_load_suffix: str
+    fn_load_suffix: str
+    ptx_store_suffix: str
+    fn_store_suffix: str
+    is_relaxed: bool
+
+
+@dataclasses.dataclass
+class type_desc:
+    ptx_type_suffix: str
+    val_constraint: str
+    name: str
+
+
+memory_spaces = [
+    space(ptx_space_suffix=".shared", ptx_scope_suffix=".cta", fn_suffix="_shared",
+          ptr_expr="convert_generic_ptr_to_smem_ptr((void*)ptr)", ptr_constraint="r"),
+    space(ptx_space_suffix="", ptx_scope_suffix=".gpu", fn_suffix="", ptr_expr="(void*)ptr", ptr_constraint="l")]
 memory_orderings = [
-    (".relaxed", "_relaxed", ".relaxed", "_relaxed", True),
-    (".acquire", "_acquire", ".release", "_release", False)
-    ]
-sizes=[(".b32", "r", "int32", 4), (".b64", "l", "int64", 8), (".f32", "f", "float", 4), (".f64", "d", "double", 8)]
+    ordering(ptx_load_suffix=".relaxed", fn_load_suffix="_relaxed",
+             ptx_store_suffix=".relaxed", fn_store_suffix="_relaxed", is_relaxed=True),
+    ordering(ptx_load_suffix=".acquire", fn_load_suffix="_acquire",
+             ptx_store_suffix=".release", fn_store_suffix="_release", is_relaxed=False)
+]
+types = [type_desc(ptx_type_suffix=".b32", val_constraint="r", name="int32"),
+         type_desc(ptx_type_suffix=".b64", val_constraint="l", name="int64"),
+         type_desc(ptx_type_suffix=".f32", val_constraint="f", name="float"),
+         type_desc(ptx_type_suffix=".f64", val_constraint="d", name="double")]
 # header
 print("""/*******************************<GINKGO LICENSE>******************************
 Copyright (c) 2017-2023, the Ginkgo authors
@@ -57,6 +92,13 @@
 namespace cuda {
 
 
+/**
+ * Transforms a generic CUDA pointer pointing to shared memory to a
+ * shared memory pointer for use in PTX assembly.
+ * CUDA PTX assembly uses 32bit pointers for shared memory addressing.
+ * The result is undefined for a generic pointer pointing to anything but
+ * shared memory.
+ */
 __device__ __forceinline__ uint32 convert_generic_ptr_to_smem_ptr(void* ptr)
 {
 // see
@@ -104,23 +146,23 @@
 """)
 
 # relaxed
-for memory_space_suffix, scope_suffix, function_memory_space_suffix, ptr_name, ptr_constraint in memory_spaces:
-    for volta_load_ordering_suffix, load_function_ordering_suffix, volta_store_ordering_suffix, store_function_ordering_suffix, is_relaxed in memory_orderings:
-        for size_suffix, constraint, typename, size in sizes:
-            membar_expression = "" if is_relaxed else f"membar_acq_rel{function_memory_space_suffix}();"
+for s in memory_spaces:
+    for o in memory_orderings:
+        for t in types:
+            membar_expression = "" if o.is_relaxed else f"membar_acq_rel{s.fn_suffix}();"
             print(f"""
-__device__ __forceinline__ {typename} load{load_function_ordering_suffix}{function_memory_space_suffix}({typename}* ptr)
+__device__ __forceinline__ {t.name} load{o.fn_load_suffix}{s.fn_suffix}(const {t.name}* ptr)
 {{
-    {typename} result;
+    {t.name} result;
 #if __CUDA_ARCH__ < 700
-    asm volatile("ld.volatile{memory_space_suffix}{size_suffix} %0, [%1];"
-                 : "={constraint}"(result)
-                 : "{ptr_constraint}"({ptr_name})
+    asm volatile("ld.volatile{s.ptx_space_suffix}{t.ptx_type_suffix} %0, [%1];"
+                 : "={t.val_constraint}"(result)
+                 : "{s.ptr_constraint}"({s.ptr_expr})
                  : "memory");
 #else
-    asm volatile("ld{volta_load_ordering_suffix}{scope_suffix}{memory_space_suffix}{size_suffix} %0, [%1];"
-                 : "={constraint}"(result)
-                 : "{ptr_constraint}"({ptr_name})
+    asm volatile("ld{o.ptx_load_suffix}{s.ptx_scope_suffix}{s.ptx_space_suffix}{t.ptx_type_suffix} %0, [%1];"
+                 : "={t.val_constraint}"(result)
+                 : "{s.ptr_constraint}"({s.ptr_expr})
                  : "memory");
 #endif
     {membar_expression}
@@ -128,56 +170,57 @@
 }}
 
 
-__device__ __forceinline__ void store{store_function_ordering_suffix}{function_memory_space_suffix}({typename}* ptr, {typename} result)
+__device__ __forceinline__ void store{o.fn_store_suffix}{s.fn_suffix}({t.name}* ptr, {t.name} result)
 {{
     {membar_expression}
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile{memory_space_suffix}{size_suffix} [%0], %1;"
-                 :: "{ptr_constraint}"({ptr_name}), "{constraint}"(result)
+    asm volatile("st.volatile{s.ptx_space_suffix}{t.ptx_type_suffix} [%0], %1;"
+                 :: "{s.ptr_constraint}"({s.ptr_expr}), "{t.val_constraint}"(result)
                  : "memory");
 #else
-    asm volatile("st{volta_store_ordering_suffix}{scope_suffix}{memory_space_suffix}{size_suffix} [%0], %1;"
-                 :: "{ptr_constraint}"({ptr_name}), "{constraint}"(result)
+    asm volatile("st{o.ptx_store_suffix}{s.ptx_scope_suffix}{s.ptx_space_suffix}{t.ptx_type_suffix} [%0], %1;"
+                 :: "{s.ptr_constraint}"({s.ptr_expr}), "{t.val_constraint}"(result)
                  : "memory");
 #endif
 }}
 """)
 
 # vectorized relaxed loads for thrust::complex
-sizes=[(".f32", "f", "float", 4), (".f64", "d", "double", 8)]
-for memory_space_suffix, scope_suffix, function_memory_space_suffix, ptr_name, ptr_constraint in memory_spaces:
-    for size_suffix, constraint, typename, size in sizes:
+types = [type_desc(ptx_type_suffix=".f32", val_constraint="f", name="float"),
+         type_desc(ptx_type_suffix=".f64", val_constraint="d", name="double")]
+for s in memory_spaces:
+    for t in types:
         print(f"""
-__device__ __forceinline__ thrust::complex<{typename}> load_relaxed{function_memory_space_suffix}(thrust::complex<{typename}>* ptr)
+__device__ __forceinline__ thrust::complex<{t.name}> load_relaxed{s.fn_suffix}(const thrust::complex<{t.name}>* ptr)
 {{
-    {typename} real_result;
-    {typename} imag_result;
+    {t.name} real_result;
+    {t.name} imag_result;
 #if __CUDA_ARCH__ < 700
-    asm volatile("ld.volatile{memory_space_suffix}.v2{size_suffix} {{%0, %1}}, [%2];"
-                 : "={constraint}"(real_result), "={constraint}"(imag_result)
-                 : "{ptr_constraint}"({ptr_name})
+    asm volatile("ld.volatile{s.ptx_space_suffix}.v2{t.ptx_type_suffix} {{%0, %1}}, [%2];"
+                 : "={t.val_constraint}"(real_result), "={t.val_constraint}"(imag_result)
+                 : "{s.ptr_constraint}"({s.ptr_expr})
                  : "memory");
 #else
-    asm volatile("ld.relaxed{scope_suffix}{memory_space_suffix}.v2{size_suffix} {{%0, %1}}, [%2];"
-                 : "={constraint}"(real_result), "={constraint}"(imag_result)
-                 : "{ptr_constraint}"({ptr_name})
+    asm volatile("ld.relaxed{s.ptx_scope_suffix}{s.ptx_space_suffix}.v2{t.ptx_type_suffix} {{%0, %1}}, [%2];"
+                 : "={t.val_constraint}"(real_result), "={t.val_constraint}"(imag_result)
+                 : "{s.ptr_constraint}"({s.ptr_expr})
                  : "memory");
 #endif
-    return thrust::complex<{typename}>{{real_result, imag_result}};
+    return thrust::complex<{t.name}>{{real_result, imag_result}};
 }}
 
 
-__device__ __forceinline__ void store_relaxed{function_memory_space_suffix}(thrust::complex<{typename}>* ptr, thrust::complex<{typename}> result)
+__device__ __forceinline__ void store_relaxed{s.fn_suffix}(thrust::complex<{t.name}>* ptr, thrust::complex<{t.name}> result)
 {{
     auto real_result = result.real();
     auto imag_result = result.imag();
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile{memory_space_suffix}.v2{size_suffix} [%0], {{%1, %2}};"
-                 :: "{ptr_constraint}"({ptr_name}), "{constraint}"(real_result), "{constraint}"(imag_result)
+    asm volatile("st.volatile{s.ptx_space_suffix}.v2{t.ptx_type_suffix} [%0], {{%1, %2}};"
+                 :: "{s.ptr_constraint}"({s.ptr_expr}), "{t.val_constraint}"(real_result), "{t.val_constraint}"(imag_result)
                  : "memory");
 #else
-    asm volatile("st.relaxed{scope_suffix}{memory_space_suffix}.v2{size_suffix} [%0], {{%1, %2}};"
-                 :: "{ptr_constraint}"({ptr_name}), "{constraint}"(real_result), "{constraint}"(imag_result)
+    asm volatile("st.relaxed{s.ptx_scope_suffix}{s.ptx_space_suffix}.v2{t.ptx_type_suffix} [%0], {{%1, %2}};"
+                 :: "{s.ptr_constraint}"({s.ptr_expr}), "{t.val_constraint}"(real_result), "{t.val_constraint}"(imag_result)
                  : "memory");
 #endif
 }}
@@ -189,4 +232,4 @@
 }  // namespace gko
 
 #endif  // GKO_CUDA_COMPONENTS_MEMORY_CUH_
-""")
\ No newline at end of file
+""")
diff --git a/hip/components/memory.hip.hpp b/hip/components/memory.hip.hpp
index b424c8bbc06..485f67343e0 100644
--- a/hip/components/memory.hip.hpp
+++ b/hip/components/memory.hip.hpp
@@ -52,14 +52,14 @@ namespace hip {
 
 
 template <typename ValueType>
-__device__ __forceinline__ ValueType load_relaxed(ValueType* ptr)
+__device__ __forceinline__ ValueType load_relaxed(const ValueType* ptr)
 {
     return load(ptr, 0);
 }
 
 
 template <typename ValueType>
-__device__ __forceinline__ ValueType load_acquire(ValueType* ptr)
+__device__ __forceinline__ ValueType load_acquire(const ValueType* ptr)
 {
     auto result = load(ptr, 0);
     __threadfence();
@@ -82,14 +82,14 @@ __device__ __forceinline__ void store_release(ValueType* ptr, ValueType value)
 
 
 template <typename ValueType>
-__device__ __forceinline__ ValueType load_relaxed_shared(ValueType* ptr)
+__device__ __forceinline__ ValueType load_relaxed_shared(const ValueType* ptr)
 {
     return load(ptr, 0);
 }
 
 
 template <typename ValueType>
-__device__ __forceinline__ ValueType load_acquire_shared(ValueType* ptr)
+__device__ __forceinline__ ValueType load_acquire_shared(const ValueType* ptr)
 {
     auto result = load(ptr, 0);
     __threadfence();

From 77b80ed243449dc8bd497076cb84aea6c07eabb2 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Fri, 22 Sep 2023 22:40:40 +0200
Subject: [PATCH 321/583] restore peek functionality

---
 common/cuda_hip/components/syncfree.hpp.inc | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/common/cuda_hip/components/syncfree.hpp.inc b/common/cuda_hip/components/syncfree.hpp.inc
index 113c66d91ec..a8fa767e4dd 100644
--- a/common/cuda_hip/components/syncfree.hpp.inc
+++ b/common/cuda_hip/components/syncfree.hpp.inc
@@ -107,7 +107,19 @@ public:
         group::tiled_partition<subwarp_size>(group::this_thread_block()).sync();
     }
 
-    __device__ __forceinline__ bool peek(IndexType dependency) { return false; }
+    __device__ __forceinline__ bool peek(IndexType dependency)
+    {
+        const auto dep_block = dependency / (block_size / subwarp_size);
+        const auto dep_local = dependency % (block_size / subwarp_size);
+        // assert(dependency < work_id);
+        if (dep_block == block_id) {
+            // peek at a local dependency
+            return load_acquire_shared(local.status + dep_local);
+        } else {
+            // peek at a global dependency
+            return load_acquire(global.status + dependency);
+        }
+    }
 
     __device__ __forceinline__ void mark_ready()
     {

From f0257851c1f58fc2afc91450712c8ceded28947c Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Mon, 25 Sep 2023 14:44:39 +0200
Subject: [PATCH 322/583] use const_cast for CUDA atomic load/store wrappers

---
 cuda/components/memory.cuh                    | 216 ++++++++++--------
 dev_tools/scripts/generate_cuda_memory_ptx.py |  22 +-
 2 files changed, 136 insertions(+), 102 deletions(-)

diff --git a/cuda/components/memory.cuh b/cuda/components/memory.cuh
index 15f2541bddf..844fca6adf4 100644
--- a/cuda/components/memory.cuh
+++ b/cuda/components/memory.cuh
@@ -107,12 +107,12 @@ __device__ __forceinline__ int32 load_relaxed_shared(const int32* ptr)
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.shared.b32 %0, [%1];"
                  : "=r"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr(const_cast<int32*>(ptr)))
                  : "memory");
 #else
     asm volatile("ld.relaxed.cta.shared.b32 %0, [%1];"
                  : "=r"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr(const_cast<int32*>(ptr)))
                  : "memory");
 #endif
 
@@ -124,12 +124,12 @@ __device__ __forceinline__ void store_relaxed_shared(int32* ptr, int32 result)
 {
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.b32 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
+                     convert_generic_ptr_to_smem_ptr(const_cast<int32*>(ptr))),
                  "r"(result)
                  : "memory");
 #else
     asm volatile("st.relaxed.cta.shared.b32 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
+                     convert_generic_ptr_to_smem_ptr(const_cast<int32*>(ptr))),
                  "r"(result)
                  : "memory");
 #endif
@@ -142,12 +142,12 @@ __device__ __forceinline__ int64 load_relaxed_shared(const int64* ptr)
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.shared.b64 %0, [%1];"
                  : "=l"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr(const_cast<int64*>(ptr)))
                  : "memory");
 #else
     asm volatile("ld.relaxed.cta.shared.b64 %0, [%1];"
                  : "=l"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr(const_cast<int64*>(ptr)))
                  : "memory");
 #endif
 
@@ -159,12 +159,12 @@ __device__ __forceinline__ void store_relaxed_shared(int64* ptr, int64 result)
 {
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.b64 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
+                     convert_generic_ptr_to_smem_ptr(const_cast<int64*>(ptr))),
                  "l"(result)
                  : "memory");
 #else
     asm volatile("st.relaxed.cta.shared.b64 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
+                     convert_generic_ptr_to_smem_ptr(const_cast<int64*>(ptr))),
                  "l"(result)
                  : "memory");
 #endif
@@ -177,12 +177,12 @@ __device__ __forceinline__ float load_relaxed_shared(const float* ptr)
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.shared.f32 %0, [%1];"
                  : "=f"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr(const_cast<float*>(ptr)))
                  : "memory");
 #else
     asm volatile("ld.relaxed.cta.shared.f32 %0, [%1];"
                  : "=f"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr(const_cast<float*>(ptr)))
                  : "memory");
 #endif
 
@@ -194,12 +194,12 @@ __device__ __forceinline__ void store_relaxed_shared(float* ptr, float result)
 {
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.f32 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
+                     convert_generic_ptr_to_smem_ptr(const_cast<float*>(ptr))),
                  "f"(result)
                  : "memory");
 #else
     asm volatile("st.relaxed.cta.shared.f32 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
+                     convert_generic_ptr_to_smem_ptr(const_cast<float*>(ptr))),
                  "f"(result)
                  : "memory");
 #endif
@@ -210,15 +210,17 @@ __device__ __forceinline__ double load_relaxed_shared(const double* ptr)
 {
     double result;
 #if __CUDA_ARCH__ < 700
-    asm volatile("ld.volatile.shared.f64 %0, [%1];"
-                 : "=d"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
-                 : "memory");
+    asm volatile(
+        "ld.volatile.shared.f64 %0, [%1];"
+        : "=d"(result)
+        : "r"(convert_generic_ptr_to_smem_ptr(const_cast<double*>(ptr)))
+        : "memory");
 #else
-    asm volatile("ld.relaxed.cta.shared.f64 %0, [%1];"
-                 : "=d"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
-                 : "memory");
+    asm volatile(
+        "ld.relaxed.cta.shared.f64 %0, [%1];"
+        : "=d"(result)
+        : "r"(convert_generic_ptr_to_smem_ptr(const_cast<double*>(ptr)))
+        : "memory");
 #endif
 
     return result;
@@ -229,12 +231,12 @@ __device__ __forceinline__ void store_relaxed_shared(double* ptr, double result)
 {
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.f64 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
+                     convert_generic_ptr_to_smem_ptr(const_cast<double*>(ptr))),
                  "d"(result)
                  : "memory");
 #else
     asm volatile("st.relaxed.cta.shared.f64 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
+                     convert_generic_ptr_to_smem_ptr(const_cast<double*>(ptr))),
                  "d"(result)
                  : "memory");
 #endif
@@ -247,12 +249,12 @@ __device__ __forceinline__ int32 load_acquire_shared(const int32* ptr)
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.shared.b32 %0, [%1];"
                  : "=r"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr(const_cast<int32*>(ptr)))
                  : "memory");
 #else
     asm volatile("ld.acquire.cta.shared.b32 %0, [%1];"
                  : "=r"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr(const_cast<int32*>(ptr)))
                  : "memory");
 #endif
     membar_acq_rel_shared();
@@ -265,12 +267,12 @@ __device__ __forceinline__ void store_release_shared(int32* ptr, int32 result)
     membar_acq_rel_shared();
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.b32 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
+                     convert_generic_ptr_to_smem_ptr(const_cast<int32*>(ptr))),
                  "r"(result)
                  : "memory");
 #else
     asm volatile("st.release.cta.shared.b32 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
+                     convert_generic_ptr_to_smem_ptr(const_cast<int32*>(ptr))),
                  "r"(result)
                  : "memory");
 #endif
@@ -283,12 +285,12 @@ __device__ __forceinline__ int64 load_acquire_shared(const int64* ptr)
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.shared.b64 %0, [%1];"
                  : "=l"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr(const_cast<int64*>(ptr)))
                  : "memory");
 #else
     asm volatile("ld.acquire.cta.shared.b64 %0, [%1];"
                  : "=l"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr(const_cast<int64*>(ptr)))
                  : "memory");
 #endif
     membar_acq_rel_shared();
@@ -301,12 +303,12 @@ __device__ __forceinline__ void store_release_shared(int64* ptr, int64 result)
     membar_acq_rel_shared();
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.b64 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
+                     convert_generic_ptr_to_smem_ptr(const_cast<int64*>(ptr))),
                  "l"(result)
                  : "memory");
 #else
     asm volatile("st.release.cta.shared.b64 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
+                     convert_generic_ptr_to_smem_ptr(const_cast<int64*>(ptr))),
                  "l"(result)
                  : "memory");
 #endif
@@ -319,12 +321,12 @@ __device__ __forceinline__ float load_acquire_shared(const float* ptr)
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.shared.f32 %0, [%1];"
                  : "=f"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr(const_cast<float*>(ptr)))
                  : "memory");
 #else
     asm volatile("ld.acquire.cta.shared.f32 %0, [%1];"
                  : "=f"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr(const_cast<float*>(ptr)))
                  : "memory");
 #endif
     membar_acq_rel_shared();
@@ -337,12 +339,12 @@ __device__ __forceinline__ void store_release_shared(float* ptr, float result)
     membar_acq_rel_shared();
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.f32 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
+                     convert_generic_ptr_to_smem_ptr(const_cast<float*>(ptr))),
                  "f"(result)
                  : "memory");
 #else
     asm volatile("st.release.cta.shared.f32 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
+                     convert_generic_ptr_to_smem_ptr(const_cast<float*>(ptr))),
                  "f"(result)
                  : "memory");
 #endif
@@ -353,15 +355,17 @@ __device__ __forceinline__ double load_acquire_shared(const double* ptr)
 {
     double result;
 #if __CUDA_ARCH__ < 700
-    asm volatile("ld.volatile.shared.f64 %0, [%1];"
-                 : "=d"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
-                 : "memory");
+    asm volatile(
+        "ld.volatile.shared.f64 %0, [%1];"
+        : "=d"(result)
+        : "r"(convert_generic_ptr_to_smem_ptr(const_cast<double*>(ptr)))
+        : "memory");
 #else
-    asm volatile("ld.acquire.cta.shared.f64 %0, [%1];"
-                 : "=d"(result)
-                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
-                 : "memory");
+    asm volatile(
+        "ld.acquire.cta.shared.f64 %0, [%1];"
+        : "=d"(result)
+        : "r"(convert_generic_ptr_to_smem_ptr(const_cast<double*>(ptr)))
+        : "memory");
 #endif
     membar_acq_rel_shared();
     return result;
@@ -373,12 +377,12 @@ __device__ __forceinline__ void store_release_shared(double* ptr, double result)
     membar_acq_rel_shared();
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.f64 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
+                     convert_generic_ptr_to_smem_ptr(const_cast<double*>(ptr))),
                  "d"(result)
                  : "memory");
 #else
     asm volatile("st.release.cta.shared.f64 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
+                     convert_generic_ptr_to_smem_ptr(const_cast<double*>(ptr))),
                  "d"(result)
                  : "memory");
 #endif
@@ -391,12 +395,12 @@ __device__ __forceinline__ int32 load_relaxed(const int32* ptr)
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.b32 %0, [%1];"
                  : "=r"(result)
-                 : "l"((void*)ptr)
+                 : "l"(const_cast<int32*>(ptr))
                  : "memory");
 #else
     asm volatile("ld.relaxed.gpu.b32 %0, [%1];"
                  : "=r"(result)
-                 : "l"((void*)ptr)
+                 : "l"(const_cast<int32*>(ptr))
                  : "memory");
 #endif
 
@@ -407,10 +411,12 @@ __device__ __forceinline__ int32 load_relaxed(const int32* ptr)
 __device__ __forceinline__ void store_relaxed(int32* ptr, int32 result)
 {
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.b32 [%0], %1;" ::"l"((void*)ptr), "r"(result)
+    asm volatile("st.volatile.b32 [%0], %1;" ::"l"(const_cast<int32*>(ptr)),
+                 "r"(result)
                  : "memory");
 #else
-    asm volatile("st.relaxed.gpu.b32 [%0], %1;" ::"l"((void*)ptr), "r"(result)
+    asm volatile("st.relaxed.gpu.b32 [%0], %1;" ::"l"(const_cast<int32*>(ptr)),
+                 "r"(result)
                  : "memory");
 #endif
 }
@@ -422,12 +428,12 @@ __device__ __forceinline__ int64 load_relaxed(const int64* ptr)
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.b64 %0, [%1];"
                  : "=l"(result)
-                 : "l"((void*)ptr)
+                 : "l"(const_cast<int64*>(ptr))
                  : "memory");
 #else
     asm volatile("ld.relaxed.gpu.b64 %0, [%1];"
                  : "=l"(result)
-                 : "l"((void*)ptr)
+                 : "l"(const_cast<int64*>(ptr))
                  : "memory");
 #endif
 
@@ -438,10 +444,12 @@ __device__ __forceinline__ int64 load_relaxed(const int64* ptr)
 __device__ __forceinline__ void store_relaxed(int64* ptr, int64 result)
 {
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.b64 [%0], %1;" ::"l"((void*)ptr), "l"(result)
+    asm volatile("st.volatile.b64 [%0], %1;" ::"l"(const_cast<int64*>(ptr)),
+                 "l"(result)
                  : "memory");
 #else
-    asm volatile("st.relaxed.gpu.b64 [%0], %1;" ::"l"((void*)ptr), "l"(result)
+    asm volatile("st.relaxed.gpu.b64 [%0], %1;" ::"l"(const_cast<int64*>(ptr)),
+                 "l"(result)
                  : "memory");
 #endif
 }
@@ -453,12 +461,12 @@ __device__ __forceinline__ float load_relaxed(const float* ptr)
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.f32 %0, [%1];"
                  : "=f"(result)
-                 : "l"((void*)ptr)
+                 : "l"(const_cast<float*>(ptr))
                  : "memory");
 #else
     asm volatile("ld.relaxed.gpu.f32 %0, [%1];"
                  : "=f"(result)
-                 : "l"((void*)ptr)
+                 : "l"(const_cast<float*>(ptr))
                  : "memory");
 #endif
 
@@ -469,10 +477,12 @@ __device__ __forceinline__ float load_relaxed(const float* ptr)
 __device__ __forceinline__ void store_relaxed(float* ptr, float result)
 {
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.f32 [%0], %1;" ::"l"((void*)ptr), "f"(result)
+    asm volatile("st.volatile.f32 [%0], %1;" ::"l"(const_cast<float*>(ptr)),
+                 "f"(result)
                  : "memory");
 #else
-    asm volatile("st.relaxed.gpu.f32 [%0], %1;" ::"l"((void*)ptr), "f"(result)
+    asm volatile("st.relaxed.gpu.f32 [%0], %1;" ::"l"(const_cast<float*>(ptr)),
+                 "f"(result)
                  : "memory");
 #endif
 }
@@ -484,12 +494,12 @@ __device__ __forceinline__ double load_relaxed(const double* ptr)
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.f64 %0, [%1];"
                  : "=d"(result)
-                 : "l"((void*)ptr)
+                 : "l"(const_cast<double*>(ptr))
                  : "memory");
 #else
     asm volatile("ld.relaxed.gpu.f64 %0, [%1];"
                  : "=d"(result)
-                 : "l"((void*)ptr)
+                 : "l"(const_cast<double*>(ptr))
                  : "memory");
 #endif
 
@@ -500,10 +510,12 @@ __device__ __forceinline__ double load_relaxed(const double* ptr)
 __device__ __forceinline__ void store_relaxed(double* ptr, double result)
 {
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.f64 [%0], %1;" ::"l"((void*)ptr), "d"(result)
+    asm volatile("st.volatile.f64 [%0], %1;" ::"l"(const_cast<double*>(ptr)),
+                 "d"(result)
                  : "memory");
 #else
-    asm volatile("st.relaxed.gpu.f64 [%0], %1;" ::"l"((void*)ptr), "d"(result)
+    asm volatile("st.relaxed.gpu.f64 [%0], %1;" ::"l"(const_cast<double*>(ptr)),
+                 "d"(result)
                  : "memory");
 #endif
 }
@@ -515,12 +527,12 @@ __device__ __forceinline__ int32 load_acquire(const int32* ptr)
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.b32 %0, [%1];"
                  : "=r"(result)
-                 : "l"((void*)ptr)
+                 : "l"(const_cast<int32*>(ptr))
                  : "memory");
 #else
     asm volatile("ld.acquire.gpu.b32 %0, [%1];"
                  : "=r"(result)
-                 : "l"((void*)ptr)
+                 : "l"(const_cast<int32*>(ptr))
                  : "memory");
 #endif
     membar_acq_rel();
@@ -532,10 +544,12 @@ __device__ __forceinline__ void store_release(int32* ptr, int32 result)
 {
     membar_acq_rel();
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.b32 [%0], %1;" ::"l"((void*)ptr), "r"(result)
+    asm volatile("st.volatile.b32 [%0], %1;" ::"l"(const_cast<int32*>(ptr)),
+                 "r"(result)
                  : "memory");
 #else
-    asm volatile("st.release.gpu.b32 [%0], %1;" ::"l"((void*)ptr), "r"(result)
+    asm volatile("st.release.gpu.b32 [%0], %1;" ::"l"(const_cast<int32*>(ptr)),
+                 "r"(result)
                  : "memory");
 #endif
 }
@@ -547,12 +561,12 @@ __device__ __forceinline__ int64 load_acquire(const int64* ptr)
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.b64 %0, [%1];"
                  : "=l"(result)
-                 : "l"((void*)ptr)
+                 : "l"(const_cast<int64*>(ptr))
                  : "memory");
 #else
     asm volatile("ld.acquire.gpu.b64 %0, [%1];"
                  : "=l"(result)
-                 : "l"((void*)ptr)
+                 : "l"(const_cast<int64*>(ptr))
                  : "memory");
 #endif
     membar_acq_rel();
@@ -564,10 +578,12 @@ __device__ __forceinline__ void store_release(int64* ptr, int64 result)
 {
     membar_acq_rel();
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.b64 [%0], %1;" ::"l"((void*)ptr), "l"(result)
+    asm volatile("st.volatile.b64 [%0], %1;" ::"l"(const_cast<int64*>(ptr)),
+                 "l"(result)
                  : "memory");
 #else
-    asm volatile("st.release.gpu.b64 [%0], %1;" ::"l"((void*)ptr), "l"(result)
+    asm volatile("st.release.gpu.b64 [%0], %1;" ::"l"(const_cast<int64*>(ptr)),
+                 "l"(result)
                  : "memory");
 #endif
 }
@@ -579,12 +595,12 @@ __device__ __forceinline__ float load_acquire(const float* ptr)
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.f32 %0, [%1];"
                  : "=f"(result)
-                 : "l"((void*)ptr)
+                 : "l"(const_cast<float*>(ptr))
                  : "memory");
 #else
     asm volatile("ld.acquire.gpu.f32 %0, [%1];"
                  : "=f"(result)
-                 : "l"((void*)ptr)
+                 : "l"(const_cast<float*>(ptr))
                  : "memory");
 #endif
     membar_acq_rel();
@@ -596,10 +612,12 @@ __device__ __forceinline__ void store_release(float* ptr, float result)
 {
     membar_acq_rel();
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.f32 [%0], %1;" ::"l"((void*)ptr), "f"(result)
+    asm volatile("st.volatile.f32 [%0], %1;" ::"l"(const_cast<float*>(ptr)),
+                 "f"(result)
                  : "memory");
 #else
-    asm volatile("st.release.gpu.f32 [%0], %1;" ::"l"((void*)ptr), "f"(result)
+    asm volatile("st.release.gpu.f32 [%0], %1;" ::"l"(const_cast<float*>(ptr)),
+                 "f"(result)
                  : "memory");
 #endif
 }
@@ -611,12 +629,12 @@ __device__ __forceinline__ double load_acquire(const double* ptr)
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.f64 %0, [%1];"
                  : "=d"(result)
-                 : "l"((void*)ptr)
+                 : "l"(const_cast<double*>(ptr))
                  : "memory");
 #else
     asm volatile("ld.acquire.gpu.f64 %0, [%1];"
                  : "=d"(result)
-                 : "l"((void*)ptr)
+                 : "l"(const_cast<double*>(ptr))
                  : "memory");
 #endif
     membar_acq_rel();
@@ -628,10 +646,12 @@ __device__ __forceinline__ void store_release(double* ptr, double result)
 {
     membar_acq_rel();
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.f64 [%0], %1;" ::"l"((void*)ptr), "d"(result)
+    asm volatile("st.volatile.f64 [%0], %1;" ::"l"(const_cast<double*>(ptr)),
+                 "d"(result)
                  : "memory");
 #else
-    asm volatile("st.release.gpu.f64 [%0], %1;" ::"l"((void*)ptr), "d"(result)
+    asm volatile("st.release.gpu.f64 [%0], %1;" ::"l"(const_cast<double*>(ptr)),
+                 "d"(result)
                  : "memory");
 #endif
 }
@@ -645,12 +665,14 @@ __device__ __forceinline__ thrust::complex<float> load_relaxed_shared(
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.shared.v2.f32 {%0, %1}, [%2];"
                  : "=f"(real_result), "=f"(imag_result)
-                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr(
+                     const_cast<thrust::complex<float>*>(ptr)))
                  : "memory");
 #else
     asm volatile("ld.relaxed.cta.shared.v2.f32 {%0, %1}, [%2];"
                  : "=f"(real_result), "=f"(imag_result)
-                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr(
+                     const_cast<thrust::complex<float>*>(ptr)))
                  : "memory");
 #endif
     return thrust::complex<float>{real_result, imag_result};
@@ -664,12 +686,14 @@ __device__ __forceinline__ void store_relaxed_shared(
     auto imag_result = result.imag();
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.v2.f32 [%0], {%1, %2};" ::"r"(
-                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
+                     convert_generic_ptr_to_smem_ptr(
+                         const_cast<thrust::complex<float>*>(ptr))),
                  "f"(real_result), "f"(imag_result)
                  : "memory");
 #else
     asm volatile("st.relaxed.cta.shared.v2.f32 [%0], {%1, %2};" ::"r"(
-                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
+                     convert_generic_ptr_to_smem_ptr(
+                         const_cast<thrust::complex<float>*>(ptr))),
                  "f"(real_result), "f"(imag_result)
                  : "memory");
 #endif
@@ -684,12 +708,14 @@ __device__ __forceinline__ thrust::complex<double> load_relaxed_shared(
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.shared.v2.f64 {%0, %1}, [%2];"
                  : "=d"(real_result), "=d"(imag_result)
-                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr(
+                     const_cast<thrust::complex<double>*>(ptr)))
                  : "memory");
 #else
     asm volatile("ld.relaxed.cta.shared.v2.f64 {%0, %1}, [%2];"
                  : "=d"(real_result), "=d"(imag_result)
-                 : "r"(convert_generic_ptr_to_smem_ptr((void*)ptr))
+                 : "r"(convert_generic_ptr_to_smem_ptr(
+                     const_cast<thrust::complex<double>*>(ptr)))
                  : "memory");
 #endif
     return thrust::complex<double>{real_result, imag_result};
@@ -703,12 +729,14 @@ __device__ __forceinline__ void store_relaxed_shared(
     auto imag_result = result.imag();
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.v2.f64 [%0], {%1, %2};" ::"r"(
-                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
+                     convert_generic_ptr_to_smem_ptr(
+                         const_cast<thrust::complex<double>*>(ptr))),
                  "d"(real_result), "d"(imag_result)
                  : "memory");
 #else
     asm volatile("st.relaxed.cta.shared.v2.f64 [%0], {%1, %2};" ::"r"(
-                     convert_generic_ptr_to_smem_ptr((void*)ptr)),
+                     convert_generic_ptr_to_smem_ptr(
+                         const_cast<thrust::complex<double>*>(ptr))),
                  "d"(real_result), "d"(imag_result)
                  : "memory");
 #endif
@@ -723,12 +751,12 @@ __device__ __forceinline__ thrust::complex<float> load_relaxed(
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.v2.f32 {%0, %1}, [%2];"
                  : "=f"(real_result), "=f"(imag_result)
-                 : "l"((void*)ptr)
+                 : "l"(const_cast<thrust::complex<float>*>(ptr))
                  : "memory");
 #else
     asm volatile("ld.relaxed.gpu.v2.f32 {%0, %1}, [%2];"
                  : "=f"(real_result), "=f"(imag_result)
-                 : "l"((void*)ptr)
+                 : "l"(const_cast<thrust::complex<float>*>(ptr))
                  : "memory");
 #endif
     return thrust::complex<float>{real_result, imag_result};
@@ -741,11 +769,13 @@ __device__ __forceinline__ void store_relaxed(thrust::complex<float>* ptr,
     auto real_result = result.real();
     auto imag_result = result.imag();
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.v2.f32 [%0], {%1, %2};" ::"l"((void*)ptr),
+    asm volatile("st.volatile.v2.f32 [%0], {%1, %2};" ::"l"(
+                     const_cast<thrust::complex<float>*>(ptr)),
                  "f"(real_result), "f"(imag_result)
                  : "memory");
 #else
-    asm volatile("st.relaxed.gpu.v2.f32 [%0], {%1, %2};" ::"l"((void*)ptr),
+    asm volatile("st.relaxed.gpu.v2.f32 [%0], {%1, %2};" ::"l"(
+                     const_cast<thrust::complex<float>*>(ptr)),
                  "f"(real_result), "f"(imag_result)
                  : "memory");
 #endif
@@ -760,12 +790,12 @@ __device__ __forceinline__ thrust::complex<double> load_relaxed(
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile.v2.f64 {%0, %1}, [%2];"
                  : "=d"(real_result), "=d"(imag_result)
-                 : "l"((void*)ptr)
+                 : "l"(const_cast<thrust::complex<double>*>(ptr))
                  : "memory");
 #else
     asm volatile("ld.relaxed.gpu.v2.f64 {%0, %1}, [%2];"
                  : "=d"(real_result), "=d"(imag_result)
-                 : "l"((void*)ptr)
+                 : "l"(const_cast<thrust::complex<double>*>(ptr))
                  : "memory");
 #endif
     return thrust::complex<double>{real_result, imag_result};
@@ -778,11 +808,13 @@ __device__ __forceinline__ void store_relaxed(thrust::complex<double>* ptr,
     auto real_result = result.real();
     auto imag_result = result.imag();
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.v2.f64 [%0], {%1, %2};" ::"l"((void*)ptr),
+    asm volatile("st.volatile.v2.f64 [%0], {%1, %2};" ::"l"(
+                     const_cast<thrust::complex<double>*>(ptr)),
                  "d"(real_result), "d"(imag_result)
                  : "memory");
 #else
-    asm volatile("st.relaxed.gpu.v2.f64 [%0], {%1, %2};" ::"l"((void*)ptr),
+    asm volatile("st.relaxed.gpu.v2.f64 [%0], {%1, %2};" ::"l"(
+                     const_cast<thrust::complex<double>*>(ptr)),
                  "d"(real_result), "d"(imag_result)
                  : "memory");
 #endif
diff --git a/dev_tools/scripts/generate_cuda_memory_ptx.py b/dev_tools/scripts/generate_cuda_memory_ptx.py
index dd5d682a9b8..dae5f6c3a59 100755
--- a/dev_tools/scripts/generate_cuda_memory_ptx.py
+++ b/dev_tools/scripts/generate_cuda_memory_ptx.py
@@ -29,8 +29,8 @@ class type_desc:
 
 memory_spaces = [
     space(ptx_space_suffix=".shared", ptx_scope_suffix=".cta", fn_suffix="_shared",
-          ptr_expr="convert_generic_ptr_to_smem_ptr((void*)ptr)", ptr_constraint="r"),
-    space(ptx_space_suffix="", ptx_scope_suffix=".gpu", fn_suffix="", ptr_expr="(void*)ptr", ptr_constraint="l")]
+          ptr_expr="convert_generic_ptr_to_smem_ptr(const_cast<{typename}*>(ptr))", ptr_constraint="r"),
+    space(ptx_space_suffix="", ptx_scope_suffix=".gpu", fn_suffix="", ptr_expr="const_cast<{typename}*>(ptr)", ptr_constraint="l")]
 memory_orderings = [
     ordering(ptx_load_suffix=".relaxed", fn_load_suffix="_relaxed",
              ptx_store_suffix=".relaxed", fn_store_suffix="_relaxed", is_relaxed=True),
@@ -150,6 +150,7 @@ class type_desc:
     for o in memory_orderings:
         for t in types:
             membar_expression = "" if o.is_relaxed else f"membar_acq_rel{s.fn_suffix}();"
+            ptr_expr = s.ptr_expr.format(typename=t.name)
             print(f"""
 __device__ __forceinline__ {t.name} load{o.fn_load_suffix}{s.fn_suffix}(const {t.name}* ptr)
 {{
@@ -157,12 +158,12 @@ class type_desc:
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile{s.ptx_space_suffix}{t.ptx_type_suffix} %0, [%1];"
                  : "={t.val_constraint}"(result)
-                 : "{s.ptr_constraint}"({s.ptr_expr})
+                 : "{s.ptr_constraint}"({ptr_expr})
                  : "memory");
 #else
     asm volatile("ld{o.ptx_load_suffix}{s.ptx_scope_suffix}{s.ptx_space_suffix}{t.ptx_type_suffix} %0, [%1];"
                  : "={t.val_constraint}"(result)
-                 : "{s.ptr_constraint}"({s.ptr_expr})
+                 : "{s.ptr_constraint}"({ptr_expr})
                  : "memory");
 #endif
     {membar_expression}
@@ -175,11 +176,11 @@ class type_desc:
     {membar_expression}
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile{s.ptx_space_suffix}{t.ptx_type_suffix} [%0], %1;"
-                 :: "{s.ptr_constraint}"({s.ptr_expr}), "{t.val_constraint}"(result)
+                 :: "{s.ptr_constraint}"({ptr_expr}), "{t.val_constraint}"(result)
                  : "memory");
 #else
     asm volatile("st{o.ptx_store_suffix}{s.ptx_scope_suffix}{s.ptx_space_suffix}{t.ptx_type_suffix} [%0], %1;"
-                 :: "{s.ptr_constraint}"({s.ptr_expr}), "{t.val_constraint}"(result)
+                 :: "{s.ptr_constraint}"({ptr_expr}), "{t.val_constraint}"(result)
                  : "memory");
 #endif
 }}
@@ -190,6 +191,7 @@ class type_desc:
          type_desc(ptx_type_suffix=".f64", val_constraint="d", name="double")]
 for s in memory_spaces:
     for t in types:
+        ptr_expr = s.ptr_expr.format(typename=f"thrust::complex<{t.name}>")
         print(f"""
 __device__ __forceinline__ thrust::complex<{t.name}> load_relaxed{s.fn_suffix}(const thrust::complex<{t.name}>* ptr)
 {{
@@ -198,12 +200,12 @@ class type_desc:
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile{s.ptx_space_suffix}.v2{t.ptx_type_suffix} {{%0, %1}}, [%2];"
                  : "={t.val_constraint}"(real_result), "={t.val_constraint}"(imag_result)
-                 : "{s.ptr_constraint}"({s.ptr_expr})
+                 : "{s.ptr_constraint}"({ptr_expr})
                  : "memory");
 #else
     asm volatile("ld.relaxed{s.ptx_scope_suffix}{s.ptx_space_suffix}.v2{t.ptx_type_suffix} {{%0, %1}}, [%2];"
                  : "={t.val_constraint}"(real_result), "={t.val_constraint}"(imag_result)
-                 : "{s.ptr_constraint}"({s.ptr_expr})
+                 : "{s.ptr_constraint}"({ptr_expr})
                  : "memory");
 #endif
     return thrust::complex<{t.name}>{{real_result, imag_result}};
@@ -216,11 +218,11 @@ class type_desc:
     auto imag_result = result.imag();
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile{s.ptx_space_suffix}.v2{t.ptx_type_suffix} [%0], {{%1, %2}};"
-                 :: "{s.ptr_constraint}"({s.ptr_expr}), "{t.val_constraint}"(real_result), "{t.val_constraint}"(imag_result)
+                 :: "{s.ptr_constraint}"({ptr_expr}), "{t.val_constraint}"(real_result), "{t.val_constraint}"(imag_result)
                  : "memory");
 #else
     asm volatile("st.relaxed{s.ptx_scope_suffix}{s.ptx_space_suffix}.v2{t.ptx_type_suffix} [%0], {{%1, %2}};"
-                 :: "{s.ptr_constraint}"({s.ptr_expr}), "{t.val_constraint}"(real_result), "{t.val_constraint}"(imag_result)
+                 :: "{s.ptr_constraint}"({ptr_expr}), "{t.val_constraint}"(real_result), "{t.val_constraint}"(imag_result)
                  : "memory");
 #endif
 }}

From fc4a4e6fe90a47be37e8293b8ffb6380dd2867b3 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Mon, 25 Sep 2023 15:59:26 +0200
Subject: [PATCH 323/583] remove unnecessary const casts

---
 cuda/components/memory.cuh                    | 104 +++++++-----------
 dev_tools/scripts/generate_cuda_memory_ptx.py |  28 +++--
 2 files changed, 56 insertions(+), 76 deletions(-)

diff --git a/cuda/components/memory.cuh b/cuda/components/memory.cuh
index 844fca6adf4..af3a0e838ea 100644
--- a/cuda/components/memory.cuh
+++ b/cuda/components/memory.cuh
@@ -124,12 +124,12 @@ __device__ __forceinline__ void store_relaxed_shared(int32* ptr, int32 result)
 {
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.b32 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(const_cast<int32*>(ptr))),
+                     convert_generic_ptr_to_smem_ptr(ptr)),
                  "r"(result)
                  : "memory");
 #else
     asm volatile("st.relaxed.cta.shared.b32 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(const_cast<int32*>(ptr))),
+                     convert_generic_ptr_to_smem_ptr(ptr)),
                  "r"(result)
                  : "memory");
 #endif
@@ -159,12 +159,12 @@ __device__ __forceinline__ void store_relaxed_shared(int64* ptr, int64 result)
 {
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.b64 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(const_cast<int64*>(ptr))),
+                     convert_generic_ptr_to_smem_ptr(ptr)),
                  "l"(result)
                  : "memory");
 #else
     asm volatile("st.relaxed.cta.shared.b64 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(const_cast<int64*>(ptr))),
+                     convert_generic_ptr_to_smem_ptr(ptr)),
                  "l"(result)
                  : "memory");
 #endif
@@ -194,12 +194,12 @@ __device__ __forceinline__ void store_relaxed_shared(float* ptr, float result)
 {
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.f32 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(const_cast<float*>(ptr))),
+                     convert_generic_ptr_to_smem_ptr(ptr)),
                  "f"(result)
                  : "memory");
 #else
     asm volatile("st.relaxed.cta.shared.f32 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(const_cast<float*>(ptr))),
+                     convert_generic_ptr_to_smem_ptr(ptr)),
                  "f"(result)
                  : "memory");
 #endif
@@ -231,12 +231,12 @@ __device__ __forceinline__ void store_relaxed_shared(double* ptr, double result)
 {
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.f64 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(const_cast<double*>(ptr))),
+                     convert_generic_ptr_to_smem_ptr(ptr)),
                  "d"(result)
                  : "memory");
 #else
     asm volatile("st.relaxed.cta.shared.f64 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(const_cast<double*>(ptr))),
+                     convert_generic_ptr_to_smem_ptr(ptr)),
                  "d"(result)
                  : "memory");
 #endif
@@ -267,12 +267,12 @@ __device__ __forceinline__ void store_release_shared(int32* ptr, int32 result)
     membar_acq_rel_shared();
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.b32 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(const_cast<int32*>(ptr))),
+                     convert_generic_ptr_to_smem_ptr(ptr)),
                  "r"(result)
                  : "memory");
 #else
     asm volatile("st.release.cta.shared.b32 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(const_cast<int32*>(ptr))),
+                     convert_generic_ptr_to_smem_ptr(ptr)),
                  "r"(result)
                  : "memory");
 #endif
@@ -303,12 +303,12 @@ __device__ __forceinline__ void store_release_shared(int64* ptr, int64 result)
     membar_acq_rel_shared();
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.b64 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(const_cast<int64*>(ptr))),
+                     convert_generic_ptr_to_smem_ptr(ptr)),
                  "l"(result)
                  : "memory");
 #else
     asm volatile("st.release.cta.shared.b64 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(const_cast<int64*>(ptr))),
+                     convert_generic_ptr_to_smem_ptr(ptr)),
                  "l"(result)
                  : "memory");
 #endif
@@ -339,12 +339,12 @@ __device__ __forceinline__ void store_release_shared(float* ptr, float result)
     membar_acq_rel_shared();
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.f32 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(const_cast<float*>(ptr))),
+                     convert_generic_ptr_to_smem_ptr(ptr)),
                  "f"(result)
                  : "memory");
 #else
     asm volatile("st.release.cta.shared.f32 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(const_cast<float*>(ptr))),
+                     convert_generic_ptr_to_smem_ptr(ptr)),
                  "f"(result)
                  : "memory");
 #endif
@@ -377,12 +377,12 @@ __device__ __forceinline__ void store_release_shared(double* ptr, double result)
     membar_acq_rel_shared();
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.f64 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(const_cast<double*>(ptr))),
+                     convert_generic_ptr_to_smem_ptr(ptr)),
                  "d"(result)
                  : "memory");
 #else
     asm volatile("st.release.cta.shared.f64 [%0], %1;" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(const_cast<double*>(ptr))),
+                     convert_generic_ptr_to_smem_ptr(ptr)),
                  "d"(result)
                  : "memory");
 #endif
@@ -411,12 +411,10 @@ __device__ __forceinline__ int32 load_relaxed(const int32* ptr)
 __device__ __forceinline__ void store_relaxed(int32* ptr, int32 result)
 {
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.b32 [%0], %1;" ::"l"(const_cast<int32*>(ptr)),
-                 "r"(result)
+    asm volatile("st.volatile.b32 [%0], %1;" ::"l"(ptr), "r"(result)
                  : "memory");
 #else
-    asm volatile("st.relaxed.gpu.b32 [%0], %1;" ::"l"(const_cast<int32*>(ptr)),
-                 "r"(result)
+    asm volatile("st.relaxed.gpu.b32 [%0], %1;" ::"l"(ptr), "r"(result)
                  : "memory");
 #endif
 }
@@ -444,12 +442,10 @@ __device__ __forceinline__ int64 load_relaxed(const int64* ptr)
 __device__ __forceinline__ void store_relaxed(int64* ptr, int64 result)
 {
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.b64 [%0], %1;" ::"l"(const_cast<int64*>(ptr)),
-                 "l"(result)
+    asm volatile("st.volatile.b64 [%0], %1;" ::"l"(ptr), "l"(result)
                  : "memory");
 #else
-    asm volatile("st.relaxed.gpu.b64 [%0], %1;" ::"l"(const_cast<int64*>(ptr)),
-                 "l"(result)
+    asm volatile("st.relaxed.gpu.b64 [%0], %1;" ::"l"(ptr), "l"(result)
                  : "memory");
 #endif
 }
@@ -477,12 +473,10 @@ __device__ __forceinline__ float load_relaxed(const float* ptr)
 __device__ __forceinline__ void store_relaxed(float* ptr, float result)
 {
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.f32 [%0], %1;" ::"l"(const_cast<float*>(ptr)),
-                 "f"(result)
+    asm volatile("st.volatile.f32 [%0], %1;" ::"l"(ptr), "f"(result)
                  : "memory");
 #else
-    asm volatile("st.relaxed.gpu.f32 [%0], %1;" ::"l"(const_cast<float*>(ptr)),
-                 "f"(result)
+    asm volatile("st.relaxed.gpu.f32 [%0], %1;" ::"l"(ptr), "f"(result)
                  : "memory");
 #endif
 }
@@ -510,12 +504,10 @@ __device__ __forceinline__ double load_relaxed(const double* ptr)
 __device__ __forceinline__ void store_relaxed(double* ptr, double result)
 {
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.f64 [%0], %1;" ::"l"(const_cast<double*>(ptr)),
-                 "d"(result)
+    asm volatile("st.volatile.f64 [%0], %1;" ::"l"(ptr), "d"(result)
                  : "memory");
 #else
-    asm volatile("st.relaxed.gpu.f64 [%0], %1;" ::"l"(const_cast<double*>(ptr)),
-                 "d"(result)
+    asm volatile("st.relaxed.gpu.f64 [%0], %1;" ::"l"(ptr), "d"(result)
                  : "memory");
 #endif
 }
@@ -544,12 +536,10 @@ __device__ __forceinline__ void store_release(int32* ptr, int32 result)
 {
     membar_acq_rel();
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.b32 [%0], %1;" ::"l"(const_cast<int32*>(ptr)),
-                 "r"(result)
+    asm volatile("st.volatile.b32 [%0], %1;" ::"l"(ptr), "r"(result)
                  : "memory");
 #else
-    asm volatile("st.release.gpu.b32 [%0], %1;" ::"l"(const_cast<int32*>(ptr)),
-                 "r"(result)
+    asm volatile("st.release.gpu.b32 [%0], %1;" ::"l"(ptr), "r"(result)
                  : "memory");
 #endif
 }
@@ -578,12 +568,10 @@ __device__ __forceinline__ void store_release(int64* ptr, int64 result)
 {
     membar_acq_rel();
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.b64 [%0], %1;" ::"l"(const_cast<int64*>(ptr)),
-                 "l"(result)
+    asm volatile("st.volatile.b64 [%0], %1;" ::"l"(ptr), "l"(result)
                  : "memory");
 #else
-    asm volatile("st.release.gpu.b64 [%0], %1;" ::"l"(const_cast<int64*>(ptr)),
-                 "l"(result)
+    asm volatile("st.release.gpu.b64 [%0], %1;" ::"l"(ptr), "l"(result)
                  : "memory");
 #endif
 }
@@ -612,12 +600,10 @@ __device__ __forceinline__ void store_release(float* ptr, float result)
 {
     membar_acq_rel();
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.f32 [%0], %1;" ::"l"(const_cast<float*>(ptr)),
-                 "f"(result)
+    asm volatile("st.volatile.f32 [%0], %1;" ::"l"(ptr), "f"(result)
                  : "memory");
 #else
-    asm volatile("st.release.gpu.f32 [%0], %1;" ::"l"(const_cast<float*>(ptr)),
-                 "f"(result)
+    asm volatile("st.release.gpu.f32 [%0], %1;" ::"l"(ptr), "f"(result)
                  : "memory");
 #endif
 }
@@ -646,12 +632,10 @@ __device__ __forceinline__ void store_release(double* ptr, double result)
 {
     membar_acq_rel();
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.f64 [%0], %1;" ::"l"(const_cast<double*>(ptr)),
-                 "d"(result)
+    asm volatile("st.volatile.f64 [%0], %1;" ::"l"(ptr), "d"(result)
                  : "memory");
 #else
-    asm volatile("st.release.gpu.f64 [%0], %1;" ::"l"(const_cast<double*>(ptr)),
-                 "d"(result)
+    asm volatile("st.release.gpu.f64 [%0], %1;" ::"l"(ptr), "d"(result)
                  : "memory");
 #endif
 }
@@ -686,14 +670,12 @@ __device__ __forceinline__ void store_relaxed_shared(
     auto imag_result = result.imag();
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.v2.f32 [%0], {%1, %2};" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(
-                         const_cast<thrust::complex<float>*>(ptr))),
+                     convert_generic_ptr_to_smem_ptr(ptr)),
                  "f"(real_result), "f"(imag_result)
                  : "memory");
 #else
     asm volatile("st.relaxed.cta.shared.v2.f32 [%0], {%1, %2};" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(
-                         const_cast<thrust::complex<float>*>(ptr))),
+                     convert_generic_ptr_to_smem_ptr(ptr)),
                  "f"(real_result), "f"(imag_result)
                  : "memory");
 #endif
@@ -729,14 +711,12 @@ __device__ __forceinline__ void store_relaxed_shared(
     auto imag_result = result.imag();
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile.shared.v2.f64 [%0], {%1, %2};" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(
-                         const_cast<thrust::complex<double>*>(ptr))),
+                     convert_generic_ptr_to_smem_ptr(ptr)),
                  "d"(real_result), "d"(imag_result)
                  : "memory");
 #else
     asm volatile("st.relaxed.cta.shared.v2.f64 [%0], {%1, %2};" ::"r"(
-                     convert_generic_ptr_to_smem_ptr(
-                         const_cast<thrust::complex<double>*>(ptr))),
+                     convert_generic_ptr_to_smem_ptr(ptr)),
                  "d"(real_result), "d"(imag_result)
                  : "memory");
 #endif
@@ -769,13 +749,11 @@ __device__ __forceinline__ void store_relaxed(thrust::complex<float>* ptr,
     auto real_result = result.real();
     auto imag_result = result.imag();
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.v2.f32 [%0], {%1, %2};" ::"l"(
-                     const_cast<thrust::complex<float>*>(ptr)),
+    asm volatile("st.volatile.v2.f32 [%0], {%1, %2};" ::"l"(ptr),
                  "f"(real_result), "f"(imag_result)
                  : "memory");
 #else
-    asm volatile("st.relaxed.gpu.v2.f32 [%0], {%1, %2};" ::"l"(
-                     const_cast<thrust::complex<float>*>(ptr)),
+    asm volatile("st.relaxed.gpu.v2.f32 [%0], {%1, %2};" ::"l"(ptr),
                  "f"(real_result), "f"(imag_result)
                  : "memory");
 #endif
@@ -808,13 +786,11 @@ __device__ __forceinline__ void store_relaxed(thrust::complex<double>* ptr,
     auto real_result = result.real();
     auto imag_result = result.imag();
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.v2.f64 [%0], {%1, %2};" ::"l"(
-                     const_cast<thrust::complex<double>*>(ptr)),
+    asm volatile("st.volatile.v2.f64 [%0], {%1, %2};" ::"l"(ptr),
                  "d"(real_result), "d"(imag_result)
                  : "memory");
 #else
-    asm volatile("st.relaxed.gpu.v2.f64 [%0], {%1, %2};" ::"l"(
-                     const_cast<thrust::complex<double>*>(ptr)),
+    asm volatile("st.relaxed.gpu.v2.f64 [%0], {%1, %2};" ::"l"(ptr),
                  "d"(real_result), "d"(imag_result)
                  : "memory");
 #endif
diff --git a/dev_tools/scripts/generate_cuda_memory_ptx.py b/dev_tools/scripts/generate_cuda_memory_ptx.py
index dae5f6c3a59..4cbe05361c1 100755
--- a/dev_tools/scripts/generate_cuda_memory_ptx.py
+++ b/dev_tools/scripts/generate_cuda_memory_ptx.py
@@ -29,8 +29,8 @@ class type_desc:
 
 memory_spaces = [
     space(ptx_space_suffix=".shared", ptx_scope_suffix=".cta", fn_suffix="_shared",
-          ptr_expr="convert_generic_ptr_to_smem_ptr(const_cast<{typename}*>(ptr))", ptr_constraint="r"),
-    space(ptx_space_suffix="", ptx_scope_suffix=".gpu", fn_suffix="", ptr_expr="const_cast<{typename}*>(ptr)", ptr_constraint="l")]
+          ptr_expr="convert_generic_ptr_to_smem_ptr({ptr})", ptr_constraint="r"),
+    space(ptx_space_suffix="", ptx_scope_suffix=".gpu", fn_suffix="", ptr_expr="{ptr}", ptr_constraint="l")]
 memory_orderings = [
     ordering(ptx_load_suffix=".relaxed", fn_load_suffix="_relaxed",
              ptx_store_suffix=".relaxed", fn_store_suffix="_relaxed", is_relaxed=True),
@@ -150,7 +150,9 @@ class type_desc:
     for o in memory_orderings:
         for t in types:
             membar_expression = "" if o.is_relaxed else f"membar_acq_rel{s.fn_suffix}();"
-            ptr_expr = s.ptr_expr.format(typename=t.name)
+            const_ptr_expr = s.ptr_expr.format(
+                ptr=f"const_cast<{t.name}*>(ptr)")
+            mut_ptr_expr = s.ptr_expr.format(ptr="ptr")
             print(f"""
 __device__ __forceinline__ {t.name} load{o.fn_load_suffix}{s.fn_suffix}(const {t.name}* ptr)
 {{
@@ -158,12 +160,12 @@ class type_desc:
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile{s.ptx_space_suffix}{t.ptx_type_suffix} %0, [%1];"
                  : "={t.val_constraint}"(result)
-                 : "{s.ptr_constraint}"({ptr_expr})
+                 : "{s.ptr_constraint}"({const_ptr_expr})
                  : "memory");
 #else
     asm volatile("ld{o.ptx_load_suffix}{s.ptx_scope_suffix}{s.ptx_space_suffix}{t.ptx_type_suffix} %0, [%1];"
                  : "={t.val_constraint}"(result)
-                 : "{s.ptr_constraint}"({ptr_expr})
+                 : "{s.ptr_constraint}"({const_ptr_expr})
                  : "memory");
 #endif
     {membar_expression}
@@ -176,11 +178,11 @@ class type_desc:
     {membar_expression}
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile{s.ptx_space_suffix}{t.ptx_type_suffix} [%0], %1;"
-                 :: "{s.ptr_constraint}"({ptr_expr}), "{t.val_constraint}"(result)
+                 :: "{s.ptr_constraint}"({mut_ptr_expr}), "{t.val_constraint}"(result)
                  : "memory");
 #else
     asm volatile("st{o.ptx_store_suffix}{s.ptx_scope_suffix}{s.ptx_space_suffix}{t.ptx_type_suffix} [%0], %1;"
-                 :: "{s.ptr_constraint}"({ptr_expr}), "{t.val_constraint}"(result)
+                 :: "{s.ptr_constraint}"({mut_ptr_expr}), "{t.val_constraint}"(result)
                  : "memory");
 #endif
 }}
@@ -191,7 +193,9 @@ class type_desc:
          type_desc(ptx_type_suffix=".f64", val_constraint="d", name="double")]
 for s in memory_spaces:
     for t in types:
-        ptr_expr = s.ptr_expr.format(typename=f"thrust::complex<{t.name}>")
+        const_ptr_expr = s.ptr_expr.format(
+            ptr=f"const_cast<thrust::complex<{t.name}>*>(ptr)")
+        mut_ptr_expr = s.ptr_expr.format(ptr="ptr")
         print(f"""
 __device__ __forceinline__ thrust::complex<{t.name}> load_relaxed{s.fn_suffix}(const thrust::complex<{t.name}>* ptr)
 {{
@@ -200,12 +204,12 @@ class type_desc:
 #if __CUDA_ARCH__ < 700
     asm volatile("ld.volatile{s.ptx_space_suffix}.v2{t.ptx_type_suffix} {{%0, %1}}, [%2];"
                  : "={t.val_constraint}"(real_result), "={t.val_constraint}"(imag_result)
-                 : "{s.ptr_constraint}"({ptr_expr})
+                 : "{s.ptr_constraint}"({const_ptr_expr})
                  : "memory");
 #else
     asm volatile("ld.relaxed{s.ptx_scope_suffix}{s.ptx_space_suffix}.v2{t.ptx_type_suffix} {{%0, %1}}, [%2];"
                  : "={t.val_constraint}"(real_result), "={t.val_constraint}"(imag_result)
-                 : "{s.ptr_constraint}"({ptr_expr})
+                 : "{s.ptr_constraint}"({const_ptr_expr})
                  : "memory");
 #endif
     return thrust::complex<{t.name}>{{real_result, imag_result}};
@@ -218,11 +222,11 @@ class type_desc:
     auto imag_result = result.imag();
 #if __CUDA_ARCH__ < 700
     asm volatile("st.volatile{s.ptx_space_suffix}.v2{t.ptx_type_suffix} [%0], {{%1, %2}};"
-                 :: "{s.ptr_constraint}"({ptr_expr}), "{t.val_constraint}"(real_result), "{t.val_constraint}"(imag_result)
+                 :: "{s.ptr_constraint}"({mut_ptr_expr}), "{t.val_constraint}"(real_result), "{t.val_constraint}"(imag_result)
                  : "memory");
 #else
     asm volatile("st.relaxed{s.ptx_scope_suffix}{s.ptx_space_suffix}.v2{t.ptx_type_suffix} [%0], {{%1, %2}};"
-                 :: "{s.ptr_constraint}"({ptr_expr}), "{t.val_constraint}"(real_result), "{t.val_constraint}"(imag_result)
+                 :: "{s.ptr_constraint}"({mut_ptr_expr}), "{t.val_constraint}"(real_result), "{t.val_constraint}"(imag_result)
                  : "memory");
 #endif
 }}

From 9ab9633165b4154a8edf30ef9e4e370eb23e39de Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Mon, 9 Oct 2023 11:29:35 +0200
Subject: [PATCH 324/583] review updates

- update asm type annotations
- fix incorrect store

Co-authored-by: Yuhsiang M. Tsai <yhmtsai@gmail.com>
---
 cuda/components/memory.cuh                    | 64 +++++++++----------
 cuda/solver/common_trs_kernels.cuh            |  2 +-
 dev_tools/scripts/generate_cuda_memory_ptx.py |  4 +-
 3 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/cuda/components/memory.cuh b/cuda/components/memory.cuh
index af3a0e838ea..4d814c7f513 100644
--- a/cuda/components/memory.cuh
+++ b/cuda/components/memory.cuh
@@ -105,12 +105,12 @@ __device__ __forceinline__ int32 load_relaxed_shared(const int32* ptr)
 {
     int32 result;
 #if __CUDA_ARCH__ < 700
-    asm volatile("ld.volatile.shared.b32 %0, [%1];"
+    asm volatile("ld.volatile.shared.s32 %0, [%1];"
                  : "=r"(result)
                  : "r"(convert_generic_ptr_to_smem_ptr(const_cast<int32*>(ptr)))
                  : "memory");
 #else
-    asm volatile("ld.relaxed.cta.shared.b32 %0, [%1];"
+    asm volatile("ld.relaxed.cta.shared.s32 %0, [%1];"
                  : "=r"(result)
                  : "r"(convert_generic_ptr_to_smem_ptr(const_cast<int32*>(ptr)))
                  : "memory");
@@ -123,12 +123,12 @@ __device__ __forceinline__ int32 load_relaxed_shared(const int32* ptr)
 __device__ __forceinline__ void store_relaxed_shared(int32* ptr, int32 result)
 {
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.shared.b32 [%0], %1;" ::"r"(
+    asm volatile("st.volatile.shared.s32 [%0], %1;" ::"r"(
                      convert_generic_ptr_to_smem_ptr(ptr)),
                  "r"(result)
                  : "memory");
 #else
-    asm volatile("st.relaxed.cta.shared.b32 [%0], %1;" ::"r"(
+    asm volatile("st.relaxed.cta.shared.s32 [%0], %1;" ::"r"(
                      convert_generic_ptr_to_smem_ptr(ptr)),
                  "r"(result)
                  : "memory");
@@ -140,12 +140,12 @@ __device__ __forceinline__ int64 load_relaxed_shared(const int64* ptr)
 {
     int64 result;
 #if __CUDA_ARCH__ < 700
-    asm volatile("ld.volatile.shared.b64 %0, [%1];"
+    asm volatile("ld.volatile.shared.s64 %0, [%1];"
                  : "=l"(result)
                  : "r"(convert_generic_ptr_to_smem_ptr(const_cast<int64*>(ptr)))
                  : "memory");
 #else
-    asm volatile("ld.relaxed.cta.shared.b64 %0, [%1];"
+    asm volatile("ld.relaxed.cta.shared.s64 %0, [%1];"
                  : "=l"(result)
                  : "r"(convert_generic_ptr_to_smem_ptr(const_cast<int64*>(ptr)))
                  : "memory");
@@ -158,12 +158,12 @@ __device__ __forceinline__ int64 load_relaxed_shared(const int64* ptr)
 __device__ __forceinline__ void store_relaxed_shared(int64* ptr, int64 result)
 {
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.shared.b64 [%0], %1;" ::"r"(
+    asm volatile("st.volatile.shared.s64 [%0], %1;" ::"r"(
                      convert_generic_ptr_to_smem_ptr(ptr)),
                  "l"(result)
                  : "memory");
 #else
-    asm volatile("st.relaxed.cta.shared.b64 [%0], %1;" ::"r"(
+    asm volatile("st.relaxed.cta.shared.s64 [%0], %1;" ::"r"(
                      convert_generic_ptr_to_smem_ptr(ptr)),
                  "l"(result)
                  : "memory");
@@ -247,12 +247,12 @@ __device__ __forceinline__ int32 load_acquire_shared(const int32* ptr)
 {
     int32 result;
 #if __CUDA_ARCH__ < 700
-    asm volatile("ld.volatile.shared.b32 %0, [%1];"
+    asm volatile("ld.volatile.shared.s32 %0, [%1];"
                  : "=r"(result)
                  : "r"(convert_generic_ptr_to_smem_ptr(const_cast<int32*>(ptr)))
                  : "memory");
 #else
-    asm volatile("ld.acquire.cta.shared.b32 %0, [%1];"
+    asm volatile("ld.acquire.cta.shared.s32 %0, [%1];"
                  : "=r"(result)
                  : "r"(convert_generic_ptr_to_smem_ptr(const_cast<int32*>(ptr)))
                  : "memory");
@@ -266,12 +266,12 @@ __device__ __forceinline__ void store_release_shared(int32* ptr, int32 result)
 {
     membar_acq_rel_shared();
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.shared.b32 [%0], %1;" ::"r"(
+    asm volatile("st.volatile.shared.s32 [%0], %1;" ::"r"(
                      convert_generic_ptr_to_smem_ptr(ptr)),
                  "r"(result)
                  : "memory");
 #else
-    asm volatile("st.release.cta.shared.b32 [%0], %1;" ::"r"(
+    asm volatile("st.release.cta.shared.s32 [%0], %1;" ::"r"(
                      convert_generic_ptr_to_smem_ptr(ptr)),
                  "r"(result)
                  : "memory");
@@ -283,12 +283,12 @@ __device__ __forceinline__ int64 load_acquire_shared(const int64* ptr)
 {
     int64 result;
 #if __CUDA_ARCH__ < 700
-    asm volatile("ld.volatile.shared.b64 %0, [%1];"
+    asm volatile("ld.volatile.shared.s64 %0, [%1];"
                  : "=l"(result)
                  : "r"(convert_generic_ptr_to_smem_ptr(const_cast<int64*>(ptr)))
                  : "memory");
 #else
-    asm volatile("ld.acquire.cta.shared.b64 %0, [%1];"
+    asm volatile("ld.acquire.cta.shared.s64 %0, [%1];"
                  : "=l"(result)
                  : "r"(convert_generic_ptr_to_smem_ptr(const_cast<int64*>(ptr)))
                  : "memory");
@@ -302,12 +302,12 @@ __device__ __forceinline__ void store_release_shared(int64* ptr, int64 result)
 {
     membar_acq_rel_shared();
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.shared.b64 [%0], %1;" ::"r"(
+    asm volatile("st.volatile.shared.s64 [%0], %1;" ::"r"(
                      convert_generic_ptr_to_smem_ptr(ptr)),
                  "l"(result)
                  : "memory");
 #else
-    asm volatile("st.release.cta.shared.b64 [%0], %1;" ::"r"(
+    asm volatile("st.release.cta.shared.s64 [%0], %1;" ::"r"(
                      convert_generic_ptr_to_smem_ptr(ptr)),
                  "l"(result)
                  : "memory");
@@ -393,12 +393,12 @@ __device__ __forceinline__ int32 load_relaxed(const int32* ptr)
 {
     int32 result;
 #if __CUDA_ARCH__ < 700
-    asm volatile("ld.volatile.b32 %0, [%1];"
+    asm volatile("ld.volatile.s32 %0, [%1];"
                  : "=r"(result)
                  : "l"(const_cast<int32*>(ptr))
                  : "memory");
 #else
-    asm volatile("ld.relaxed.gpu.b32 %0, [%1];"
+    asm volatile("ld.relaxed.gpu.s32 %0, [%1];"
                  : "=r"(result)
                  : "l"(const_cast<int32*>(ptr))
                  : "memory");
@@ -411,10 +411,10 @@ __device__ __forceinline__ int32 load_relaxed(const int32* ptr)
 __device__ __forceinline__ void store_relaxed(int32* ptr, int32 result)
 {
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.b32 [%0], %1;" ::"l"(ptr), "r"(result)
+    asm volatile("st.volatile.s32 [%0], %1;" ::"l"(ptr), "r"(result)
                  : "memory");
 #else
-    asm volatile("st.relaxed.gpu.b32 [%0], %1;" ::"l"(ptr), "r"(result)
+    asm volatile("st.relaxed.gpu.s32 [%0], %1;" ::"l"(ptr), "r"(result)
                  : "memory");
 #endif
 }
@@ -424,12 +424,12 @@ __device__ __forceinline__ int64 load_relaxed(const int64* ptr)
 {
     int64 result;
 #if __CUDA_ARCH__ < 700
-    asm volatile("ld.volatile.b64 %0, [%1];"
+    asm volatile("ld.volatile.s64 %0, [%1];"
                  : "=l"(result)
                  : "l"(const_cast<int64*>(ptr))
                  : "memory");
 #else
-    asm volatile("ld.relaxed.gpu.b64 %0, [%1];"
+    asm volatile("ld.relaxed.gpu.s64 %0, [%1];"
                  : "=l"(result)
                  : "l"(const_cast<int64*>(ptr))
                  : "memory");
@@ -442,10 +442,10 @@ __device__ __forceinline__ int64 load_relaxed(const int64* ptr)
 __device__ __forceinline__ void store_relaxed(int64* ptr, int64 result)
 {
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.b64 [%0], %1;" ::"l"(ptr), "l"(result)
+    asm volatile("st.volatile.s64 [%0], %1;" ::"l"(ptr), "l"(result)
                  : "memory");
 #else
-    asm volatile("st.relaxed.gpu.b64 [%0], %1;" ::"l"(ptr), "l"(result)
+    asm volatile("st.relaxed.gpu.s64 [%0], %1;" ::"l"(ptr), "l"(result)
                  : "memory");
 #endif
 }
@@ -517,12 +517,12 @@ __device__ __forceinline__ int32 load_acquire(const int32* ptr)
 {
     int32 result;
 #if __CUDA_ARCH__ < 700
-    asm volatile("ld.volatile.b32 %0, [%1];"
+    asm volatile("ld.volatile.s32 %0, [%1];"
                  : "=r"(result)
                  : "l"(const_cast<int32*>(ptr))
                  : "memory");
 #else
-    asm volatile("ld.acquire.gpu.b32 %0, [%1];"
+    asm volatile("ld.acquire.gpu.s32 %0, [%1];"
                  : "=r"(result)
                  : "l"(const_cast<int32*>(ptr))
                  : "memory");
@@ -536,10 +536,10 @@ __device__ __forceinline__ void store_release(int32* ptr, int32 result)
 {
     membar_acq_rel();
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.b32 [%0], %1;" ::"l"(ptr), "r"(result)
+    asm volatile("st.volatile.s32 [%0], %1;" ::"l"(ptr), "r"(result)
                  : "memory");
 #else
-    asm volatile("st.release.gpu.b32 [%0], %1;" ::"l"(ptr), "r"(result)
+    asm volatile("st.release.gpu.s32 [%0], %1;" ::"l"(ptr), "r"(result)
                  : "memory");
 #endif
 }
@@ -549,12 +549,12 @@ __device__ __forceinline__ int64 load_acquire(const int64* ptr)
 {
     int64 result;
 #if __CUDA_ARCH__ < 700
-    asm volatile("ld.volatile.b64 %0, [%1];"
+    asm volatile("ld.volatile.s64 %0, [%1];"
                  : "=l"(result)
                  : "l"(const_cast<int64*>(ptr))
                  : "memory");
 #else
-    asm volatile("ld.acquire.gpu.b64 %0, [%1];"
+    asm volatile("ld.acquire.gpu.s64 %0, [%1];"
                  : "=l"(result)
                  : "l"(const_cast<int64*>(ptr))
                  : "memory");
@@ -568,10 +568,10 @@ __device__ __forceinline__ void store_release(int64* ptr, int64 result)
 {
     membar_acq_rel();
 #if __CUDA_ARCH__ < 700
-    asm volatile("st.volatile.b64 [%0], %1;" ::"l"(ptr), "l"(result)
+    asm volatile("st.volatile.s64 [%0], %1;" ::"l"(ptr), "l"(result)
                  : "memory");
 #else
-    asm volatile("st.release.gpu.b64 [%0], %1;" ::"l"(ptr), "l"(result)
+    asm volatile("st.release.gpu.s64 [%0], %1;" ::"l"(ptr), "l"(result)
                  : "memory");
 #endif
 }
diff --git a/cuda/solver/common_trs_kernels.cuh b/cuda/solver/common_trs_kernels.cuh
index 546b366c6a2..6dbd65968d0 100644
--- a/cuda/solver/common_trs_kernels.cuh
+++ b/cuda/solver/common_trs_kernels.cuh
@@ -449,7 +449,7 @@ __global__ void sptrsv_naive_caching_kernel(
 
     // This check to ensure no infinite loops happen.
     if (is_nan(r)) {
-        store_relaxed(x_s + self_shid, zero<ValueType>());
+        store_relaxed_shared(x_s + self_shid, zero<ValueType>());
         store_relaxed(x + row * x_stride + rhs, zero<ValueType>());
         *nan_produced = true;
     }
diff --git a/dev_tools/scripts/generate_cuda_memory_ptx.py b/dev_tools/scripts/generate_cuda_memory_ptx.py
index 4cbe05361c1..d75a9f908b8 100755
--- a/dev_tools/scripts/generate_cuda_memory_ptx.py
+++ b/dev_tools/scripts/generate_cuda_memory_ptx.py
@@ -37,8 +37,8 @@ class type_desc:
     ordering(ptx_load_suffix=".acquire", fn_load_suffix="_acquire",
              ptx_store_suffix=".release", fn_store_suffix="_release", is_relaxed=False)
 ]
-types = [type_desc(ptx_type_suffix=".b32", val_constraint="r", name="int32"),
-         type_desc(ptx_type_suffix=".b64", val_constraint="l", name="int64"),
+types = [type_desc(ptx_type_suffix=".s32", val_constraint="r", name="int32"),
+         type_desc(ptx_type_suffix=".s64", val_constraint="l", name="int64"),
          type_desc(ptx_type_suffix=".f32", val_constraint="f", name="float"),
          type_desc(ptx_type_suffix=".f64", val_constraint="d", name="double")]
 # header

From b4c1699abe8c4e13f58ea7ad8947e75fbf6445ba Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Tue, 10 Oct 2023 10:50:50 +0200
Subject: [PATCH 325/583] add note to generated file

---
 cuda/components/memory.cuh                    | 3 +++
 dev_tools/scripts/generate_cuda_memory_ptx.py | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/cuda/components/memory.cuh b/cuda/components/memory.cuh
index 4d814c7f513..a1a53284e3f 100644
--- a/cuda/components/memory.cuh
+++ b/cuda/components/memory.cuh
@@ -43,6 +43,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "cuda/base/types.hpp"
 
 
+// this file is generated by dev_tools/scripts/generate_cuda_memory_ptx.py
+
+
 namespace gko {
 namespace kernels {
 namespace cuda {
diff --git a/dev_tools/scripts/generate_cuda_memory_ptx.py b/dev_tools/scripts/generate_cuda_memory_ptx.py
index d75a9f908b8..42bef50f9a2 100755
--- a/dev_tools/scripts/generate_cuda_memory_ptx.py
+++ b/dev_tools/scripts/generate_cuda_memory_ptx.py
@@ -87,6 +87,9 @@ class type_desc:
 #include "cuda/base/types.hpp"
 
 
+// this file is generated by dev_tools/scripts/generate_cuda_memory_ptx.py
+
+
 namespace gko {
 namespace kernels {
 namespace cuda {

From 03d696ce7ced9c69303f4895617285b3f5076536 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Sun, 8 Oct 2023 21:50:29 +0200
Subject: [PATCH 326/583] add reorderings to benchmarks

---
 benchmark/conversion/conversion.cpp           |  2 +
 .../matrix_statistics/matrix_statistics.cpp   |  1 +
 benchmark/preconditioner/preconditioner.cpp   |  1 +
 benchmark/solver/solver_common.hpp            | 12 +++-
 benchmark/sparse_blas/sparse_blas.cpp         |  2 +-
 benchmark/spmv/spmv_common.hpp                |  2 +
 benchmark/test/preconditioner.py              |  8 +++
 .../reference/preconditioner.reordered.stderr |  9 +++
 .../reference/preconditioner.reordered.stdout | 33 +++++++++
 .../test/reference/solver.reordered.stderr    | 10 +++
 .../test/reference/solver.reordered.stdout    | 57 ++++++++++++++++
 .../reference/sparse_blas.reordered.stderr    |  9 +++
 .../reference/sparse_blas.reordered.stdout    | 32 +++++++++
 .../test/reference/spmv.reordered.stderr      | 10 +++
 .../test/reference/spmv.reordered.stdout      | 22 ++++++
 benchmark/test/solver.py                      |  8 +++
 benchmark/test/sparse_blas.py                 | 11 ++-
 benchmark/test/spmv.py                        |  8 +++
 benchmark/utils/general_matrix.hpp            | 68 +++++++++++++++++++
 benchmark/utils/generator.hpp                 | 27 ++++----
 20 files changed, 315 insertions(+), 17 deletions(-)
 create mode 100644 benchmark/test/reference/preconditioner.reordered.stderr
 create mode 100644 benchmark/test/reference/preconditioner.reordered.stdout
 create mode 100644 benchmark/test/reference/solver.reordered.stderr
 create mode 100644 benchmark/test/reference/solver.reordered.stdout
 create mode 100644 benchmark/test/reference/sparse_blas.reordered.stderr
 create mode 100644 benchmark/test/reference/sparse_blas.reordered.stdout
 create mode 100644 benchmark/test/reference/spmv.reordered.stderr
 create mode 100644 benchmark/test/reference/spmv.reordered.stdout

diff --git a/benchmark/conversion/conversion.cpp b/benchmark/conversion/conversion.cpp
index c777db1a35a..e45046329d7 100644
--- a/benchmark/conversion/conversion.cpp
+++ b/benchmark/conversion/conversion.cpp
@@ -118,6 +118,8 @@ struct ConversionBenchmark : Benchmark<gko::device_matrix_data<etype, itype>> {
     {
         gko::matrix_data<etype, itype> data;
         data = Generator::generate_matrix_data(test_case);
+        // no reordering here, as it doesn't impact conversions beyond
+        // dense-sparse conversions
         std::clog << "Matrix is of size (" << data.size[0] << ", "
                   << data.size[1] << "), " << data.nonzeros.size() << std::endl;
         test_case["rows"] = data.size[0];
diff --git a/benchmark/matrix_statistics/matrix_statistics.cpp b/benchmark/matrix_statistics/matrix_statistics.cpp
index 20feecf5ccf..576d6fa7d52 100644
--- a/benchmark/matrix_statistics/matrix_statistics.cpp
+++ b/benchmark/matrix_statistics/matrix_statistics.cpp
@@ -186,6 +186,7 @@ struct MatrixStatistics : Benchmark<empty_state> {
                       json& test_case) const override
     {
         auto data = Generator::generate_matrix_data(test_case);
+        // no reordering here, as it doesn't change statistics
         std::clog << "Matrix is of size (" << data.size[0] << ", "
                   << data.size[1] << "), " << data.nonzeros.size() << std::endl;
         test_case["rows"] = data.size[0];
diff --git a/benchmark/preconditioner/preconditioner.cpp b/benchmark/preconditioner/preconditioner.cpp
index 074fe202e6c..d81dfaa4d5d 100644
--- a/benchmark/preconditioner/preconditioner.cpp
+++ b/benchmark/preconditioner/preconditioner.cpp
@@ -183,6 +183,7 @@ struct PreconditionerBenchmark : Benchmark<preconditioner_benchmark_state> {
     {
         preconditioner_benchmark_state state;
         auto data = Generator::generate_matrix_data(test_case);
+        reorder(data, test_case);
 
         state.system_matrix =
             formats::matrix_factory(FLAGS_formats, exec, data);
diff --git a/benchmark/solver/solver_common.hpp b/benchmark/solver/solver_common.hpp
index 784b70eca61..46b7a231e9a 100644
--- a/benchmark/solver/solver_common.hpp
+++ b/benchmark/solver/solver_common.hpp
@@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "benchmark/utils/formats.hpp"
 #include "benchmark/utils/general.hpp"
+#include "benchmark/utils/general_matrix.hpp"
 #include "benchmark/utils/generator.hpp"
 #include "benchmark/utils/iteration_control.hpp"
 #include "benchmark/utils/loggers.hpp"
@@ -433,10 +434,17 @@ struct SolverBenchmark : Benchmark<solver_benchmark_state<Generator>> {
                 {std::numeric_limits<rc_etype>::quiet_NaN()}, exec);
             state.x = generator.initialize({0.0}, exec);
         } else {
-            state.system_matrix =
-                generator.generate_matrix_with_optimal_format(exec, test_case);
+            auto data = generator.generate_matrix_data(test_case);
+            auto permutation =
+                reorder(data, test_case, generator.is_distributed());
+
+            state.system_matrix = generator.generate_matrix_with_format(
+                exec, test_case["optimal"]["spmv"].get<std::string>(), data);
             state.b = generator.generate_rhs(exec, state.system_matrix.get(),
                                              test_case);
+            if (permutation) {
+                permute(state.b, permutation.get());
+            }
             state.x = generator.generate_initial_guess(
                 exec, state.system_matrix.get(), state.b.get());
         }
diff --git a/benchmark/sparse_blas/sparse_blas.cpp b/benchmark/sparse_blas/sparse_blas.cpp
index 5d479eb7fc0..5385de4264c 100644
--- a/benchmark/sparse_blas/sparse_blas.cpp
+++ b/benchmark/sparse_blas/sparse_blas.cpp
@@ -114,7 +114,7 @@ struct SparseBlasBenchmark : Benchmark<std::unique_ptr<Mtx>> {
                                json& test_case) const override
     {
         auto data = Generator::generate_matrix_data(test_case);
-        data.ensure_row_major_order();
+        reorder(data, test_case);
         std::clog << "Matrix is of size (" << data.size[0] << ", "
                   << data.size[1] << "), " << data.nonzeros.size() << std::endl;
         test_case["rows"] = data.size[0];
diff --git a/benchmark/spmv/spmv_common.hpp b/benchmark/spmv/spmv_common.hpp
index c85642bb5f1..4d1ab17ccf4 100644
--- a/benchmark/spmv/spmv_common.hpp
+++ b/benchmark/spmv/spmv_common.hpp
@@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "benchmark/utils/formats.hpp"
 #include "benchmark/utils/general.hpp"
+#include "benchmark/utils/general_matrix.hpp"
 #include "benchmark/utils/iteration_control.hpp"
 #include "benchmark/utils/loggers.hpp"
 #include "benchmark/utils/runner.hpp"
@@ -104,6 +105,7 @@ struct SpmvBenchmark : Benchmark<spmv_benchmark_state<Generator>> {
     {
         spmv_benchmark_state<Generator> state;
         state.data = generator.generate_matrix_data(test_case);
+        reorder(state.data, test_case, generator.is_distributed());
 
         auto nrhs = FLAGS_nrhs;
         state.b = generator.create_multi_vector_random(
diff --git a/benchmark/test/preconditioner.py b/benchmark/test/preconditioner.py
index e05e5b780ac..7226964dd05 100755
--- a/benchmark/test/preconditioner.py
+++ b/benchmark/test/preconditioner.py
@@ -43,3 +43,11 @@
     expected_stdout="preconditioner.profile.stdout",
     expected_stderr="preconditioner.profile.stderr",
 )
+
+# stdin
+test_framework.compare_output(
+    ["-reorder", "amd"],
+    expected_stdout="preconditioner.reordered.stdout",
+    expected_stderr="preconditioner.reordered.stderr",
+    stdin='[{"size": 100, "stencil": "7pt"}]',
+)
diff --git a/benchmark/test/reference/preconditioner.reordered.stderr b/benchmark/test/reference/preconditioner.reordered.stderr
new file mode 100644
index 00000000000..a428671486f
--- /dev/null
+++ b/benchmark/test/reference/preconditioner.reordered.stderr
@@ -0,0 +1,9 @@
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
+Running on reference(0)
+Running with 2 warm iterations and 10 running iterations
+The random seed for right hand sides is 42
+Running with preconditioners: none
+Running test case stencil(100, 7pt)
+Matrix is of size (125, 125), 725
+	Running preconditioner: none
diff --git a/benchmark/test/reference/preconditioner.reordered.stdout b/benchmark/test/reference/preconditioner.reordered.stdout
new file mode 100644
index 00000000000..51adfb3b58b
--- /dev/null
+++ b/benchmark/test/reference/preconditioner.reordered.stdout
@@ -0,0 +1,33 @@
+[
+    {
+        "size": 100,
+        "stencil": "7pt",
+        "preconditioner": {
+            "none": {
+                "generate": {
+                    "components": {
+                        "generate(<typename>)": 1.0,
+                        "overhead": 1.0
+                    },
+                    "time": 1.0,
+                    "repetitions": 10
+                },
+                "apply": {
+                    "components": {
+                        "apply(<typename>)": 1.0,
+                        "copy(<typename>)": 1.0,
+                        "dense::copy": 1.0,
+                        "overhead": 1.0
+                    },
+                    "time": 1.0,
+                    "repetitions": 10
+                },
+                "completed": true
+            }
+        },
+        "reordered": "amd",
+        "rows": 125,
+        "cols": 125,
+        "nonzeros": 725
+    }
+]
diff --git a/benchmark/test/reference/solver.reordered.stderr b/benchmark/test/reference/solver.reordered.stderr
new file mode 100644
index 00000000000..d9c04b69cf5
--- /dev/null
+++ b/benchmark/test/reference/solver.reordered.stderr
@@ -0,0 +1,10 @@
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
+Running on reference(0)
+Running with 2 warm iterations and 1 running iterations
+The random seed for right hand sides is 42
+Running cg with 1000 iterations and residual goal of 1.000000e-06
+The number of right hand sides is 1
+Running test case stencil(100, 7pt)
+Matrix is of size (125, 125)
+	Running solver: cg
diff --git a/benchmark/test/reference/solver.reordered.stdout b/benchmark/test/reference/solver.reordered.stdout
new file mode 100644
index 00000000000..c1b826ae3fc
--- /dev/null
+++ b/benchmark/test/reference/solver.reordered.stdout
@@ -0,0 +1,57 @@
+[
+    {
+        "size": 100,
+        "stencil": "7pt",
+        "optimal": {
+            "spmv": "csr"
+        },
+        "solver": {
+            "cg": {
+                "recurrent_residuals": [],
+                "true_residuals": [],
+                "implicit_residuals": [],
+                "iteration_timestamps": [],
+                "rhs_norm": 1.0,
+                "generate": {
+                    "components": {
+                        "generate(<typename>)": 1.0,
+                        "free": 1.0,
+                        "overhead": 1.0
+                    },
+                    "time": 1.0
+                },
+                "apply": {
+                    "components": {
+                        "apply(<typename>)": 1.0,
+                        "iteration": 1.0,
+                        "allocate": 1.0,
+                        "dense::fill": 1.0,
+                        "cg::initialize": 1.0,
+                        "advanced_apply(<typename>)": 1.0,
+                        "csr::advanced_spmv": 1.0,
+                        "dense::compute_norm2_dispatch": 1.0,
+                        "copy(<typename>)": 1.0,
+                        "dense::copy": 1.0,
+                        "dense::compute_conj_dot_dispatch": 1.0,
+                        "check(<typename>)": 1.0,
+                        "residual_norm::residual_norm": 1.0,
+                        "cg::step_1": 1.0,
+                        "csr::spmv": 1.0,
+                        "cg::step_2": 1.0,
+                        "free": 1.0,
+                        "overhead": 1.0
+                    },
+                    "iterations": 7,
+                    "time": 1.0
+                },
+                "preconditioner": {},
+                "residual_norm": 1.0,
+                "repetitions": 1,
+                "completed": true
+            }
+        },
+        "reordered": "amd",
+        "rows": 125,
+        "cols": 125
+    }
+]
diff --git a/benchmark/test/reference/sparse_blas.reordered.stderr b/benchmark/test/reference/sparse_blas.reordered.stderr
new file mode 100644
index 00000000000..497d5a72bbf
--- /dev/null
+++ b/benchmark/test/reference/sparse_blas.reordered.stderr
@@ -0,0 +1,9 @@
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
+Running on reference(0)
+Running with 2 warm iterations and 10 running iterations
+The random seed for right hand sides is 42
+The operations are symbolic_cholesky
+Running test case stencil(100, 7pt)
+Matrix is of size (125, 125), 725
+	Running sparse_blas: symbolic_cholesky
diff --git a/benchmark/test/reference/sparse_blas.reordered.stdout b/benchmark/test/reference/sparse_blas.reordered.stdout
new file mode 100644
index 00000000000..b5fc8998be0
--- /dev/null
+++ b/benchmark/test/reference/sparse_blas.reordered.stdout
@@ -0,0 +1,32 @@
+[
+    {
+        "size": 100,
+        "stencil": "7pt",
+        "sparse_blas": {
+            "symbolic_cholesky": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 10,
+                "components": {
+                    "compute_elim_forest": 1.0,
+                    "allocate": 1.0,
+                    "free": 1.0,
+                    "components::fill_array": 1.0,
+                    "cholesky::symbolic_count": 1.0,
+                    "components::prefix_sum_nonnegative": 1.0,
+                    "copy": 1.0,
+                    "cholesky::symbolic_factorize": 1.0,
+                    "csr::sort_by_column_index": 1.0,
+                    "overhead": 1.0
+                },
+                "factor_nonzeros": 1324,
+                "completed": true
+            }
+        },
+        "reordered": "amd",
+        "rows": 125,
+        "cols": 125,
+        "nonzeros": 725
+    }
+]
diff --git a/benchmark/test/reference/spmv.reordered.stderr b/benchmark/test/reference/spmv.reordered.stderr
new file mode 100644
index 00000000000..97fe670aff7
--- /dev/null
+++ b/benchmark/test/reference/spmv.reordered.stderr
@@ -0,0 +1,10 @@
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
+Running on reference(0)
+Running with 2 warm iterations and 10 running iterations
+The random seed for right hand sides is 42
+The formats are coo
+The number of right hand sides is 1
+Running test case stencil(100, 7pt)
+Matrix is of size (125, 125), 725
+	Running spmv: coo
diff --git a/benchmark/test/reference/spmv.reordered.stdout b/benchmark/test/reference/spmv.reordered.stdout
new file mode 100644
index 00000000000..5404235cdf7
--- /dev/null
+++ b/benchmark/test/reference/spmv.reordered.stdout
@@ -0,0 +1,22 @@
+[
+    {
+        "size": 100,
+        "stencil": "7pt",
+        "spmv": {
+            "coo": {
+                "storage": 11600,
+                "max_relative_norm2": 1.0,
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        },
+        "reordered": "amd",
+        "rows": 125,
+        "cols": 125,
+        "nonzeros": 725,
+        "optimal": {
+            "spmv": "coo"
+        }
+    }
+]
diff --git a/benchmark/test/solver.py b/benchmark/test/solver.py
index 025ee92707c..5dd1d840a4e 100755
--- a/benchmark/test/solver.py
+++ b/benchmark/test/solver.py
@@ -43,3 +43,11 @@
     expected_stdout="solver.profile.stdout",
     expected_stderr="solver.profile.stderr",
 )
+
+# reordering
+test_framework.compare_output(
+    ["-reorder", "amd"],
+    expected_stdout="solver.reordered.stdout",
+    expected_stderr="solver.reordered.stderr",
+    stdin='[{"size": 100, "stencil": "7pt", "optimal": {"spmv": "csr"}}]',
+)
diff --git a/benchmark/test/sparse_blas.py b/benchmark/test/sparse_blas.py
index 724cdb866f0..8e6cda3c9bd 100755
--- a/benchmark/test/sparse_blas.py
+++ b/benchmark/test/sparse_blas.py
@@ -4,7 +4,8 @@
 # check that all input modes work:
 # parameter
 test_framework.compare_output(
-    ["-operations", "transpose", "-input", '[{"size": 100, "stencil": "7pt"}]'],
+    ["-operations", "transpose", "-input",
+        '[{"size": 100, "stencil": "7pt"}]'],
     expected_stdout="sparse_blas.simple.stdout",
     expected_stderr="sparse_blas.simple.stderr",
 )
@@ -55,3 +56,11 @@
     expected_stdout="sparse_blas.profile.stdout",
     expected_stderr="sparse_blas.profile.stderr",
 )
+
+# reordering
+test_framework.compare_output(
+    ["-operations", "symbolic_cholesky", "-reorder", "amd"],
+    expected_stdout="sparse_blas.reordered.stdout",
+    expected_stderr="sparse_blas.reordered.stderr",
+    stdin='[{"size": 100, "stencil": "7pt"}]',
+)
diff --git a/benchmark/test/spmv.py b/benchmark/test/spmv.py
index 865f74bb6d0..f6f4a4b5c39 100755
--- a/benchmark/test/spmv.py
+++ b/benchmark/test/spmv.py
@@ -43,3 +43,11 @@
     expected_stdout="spmv.profile.stdout",
     expected_stderr="spmv.profile.stderr",
 )
+
+# stdin
+test_framework.compare_output(
+    ["-reorder", "amd"],
+    expected_stdout="spmv.reordered.stdout",
+    expected_stderr="spmv.reordered.stderr",
+    stdin='[{"size": 100, "stencil": "7pt"}]',
+)
diff --git a/benchmark/utils/general_matrix.hpp b/benchmark/utils/general_matrix.hpp
index 39d8b5a8107..41b3459bc5a 100644
--- a/benchmark/utils/general_matrix.hpp
+++ b/benchmark/utils/general_matrix.hpp
@@ -41,12 +41,80 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include "benchmark/utils/general.hpp"
+#include "benchmark/utils/generator.hpp"
+
+
+std::string reordering_algorithm_desc =
+    "Reordering algorithm to apply to the input matrices:\n"
+    "    none - no reordering\n"
+    "    amd - Approximate Minimum Degree reordering algorithm\n"
+#if GKO_HAVE_METIS
+    "    nd - Nested Dissection reordering algorithm\n"
+#endif
+    "    rcm - Reverse Cuthill-McKee reordering algorithm";
 
 
 DEFINE_string(input_matrix, "",
               "Filename of a matrix to be used as the single input. Overwrites "
               "the value of the -input flag");
 
+DEFINE_string(reorder, "none", reordering_algorithm_desc.c_str());
+
+
+template <typename ValueType, typename IndexType>
+std::unique_ptr<gko::matrix::Permutation<IndexType>> reorder(
+    gko::matrix_data<ValueType, IndexType>& data, json& test_case,
+    bool is_distributed = false)
+{
+    if (FLAGS_reorder == "none" || is_distributed) {
+        return nullptr;
+    }
+    using Csr = gko::matrix::Csr<ValueType, IndexType>;
+    auto ref = gko::ReferenceExecutor::create();
+    auto mtx = gko::share(Csr::create(ref));
+    mtx->read(data);
+    std::unique_ptr<gko::matrix::Permutation<IndexType>> perm;
+    if (FLAGS_reorder == "amd") {
+        perm = gko::experimental::reorder::Amd<IndexType>::build()
+                   .on(ref)
+                   ->generate(mtx);
+#if GKO_HAVE_METIS
+    } else if (FLAGS_reorder == "nd") {
+        perm = gko::experimental::reorder::NestedDissection<ValueType,
+                                                            IndexType>::build()
+                   .on(ref)
+                   ->generate(mtx);
+#endif
+    } else if (FLAGS_reorder == "rcm") {
+        perm = gko::reorder::Rcm<ValueType, IndexType>::build()
+                   .on(ref)
+                   ->generate(mtx)
+                   ->get_permutation()
+                   ->clone();
+    } else {
+        throw std::runtime_error{"Unknown reordering algorithm " +
+                                 FLAGS_reorder};
+    }
+    mtx->permute(perm)->write(data);
+    test_case["reordered"] = FLAGS_reorder;
+    return perm;
+}
+
+
+template <typename ValueType, typename IndexType>
+void permute(std::unique_ptr<gko::matrix::Dense<ValueType>>& vec,
+             const gko::matrix::Permutation<IndexType>* perm)
+{
+    vec = vec->permute(perm, gko::matrix::permute_mode::rows);
+}
+
+
+template <typename ValueType, typename IndexType>
+void permute(
+    std::unique_ptr<gko::experimental::distributed::Vector<ValueType>>& vec,
+    const gko::matrix::Permutation<IndexType>* perm)
+{}
+
 
 /**
  * @copydoc initialize_argument_parsing
diff --git a/benchmark/utils/generator.hpp b/benchmark/utils/generator.hpp
index 3f26ed3f2fc..c280cb1ac72 100644
--- a/benchmark/utils/generator.hpp
+++ b/benchmark/utils/generator.hpp
@@ -52,20 +52,25 @@ struct DefaultSystemGenerator {
     using value_type = ValueType;
     using Vec = vec<ValueType>;
 
+    static bool is_distributed() { return false; }
+
     static gko::matrix_data<ValueType, IndexType> generate_matrix_data(
         const json& config)
     {
+        gko::matrix_data<ValueType, IndexType> data;
         if (config.contains("filename")) {
             std::ifstream in(config["filename"].get<std::string>());
-            return gko::read_generic_raw<ValueType, IndexType>(in);
+            data = gko::read_generic_raw<ValueType, IndexType>(in);
         } else if (config.contains("stencil")) {
-            return generate_stencil<ValueType, IndexType>(
+            data = generate_stencil<ValueType, IndexType>(
                 config["stencil"].get<std::string>(),
                 config["size"].get<gko::int64>());
         } else {
             throw std::runtime_error(
                 "No known way to generate matrix data found.");
         }
+        data.ensure_row_major_order();
+        return data;
     }
 
     static std::string get_example_config()
@@ -188,16 +193,19 @@ struct DistributedDefaultSystemGenerator {
     using Mtx = dist_mtx<value_type, local_index_type, index_type>;
     using Vec = dist_vec<value_type>;
 
+    static bool is_distributed() { return true; }
+
     gko::matrix_data<value_type, index_type> generate_matrix_data(
         const json& config) const
     {
+        gko::matrix_data<value_type, index_type> data;
         if (config.contains("filename")) {
             std::ifstream in(config["filename"].get<std::string>());
-            return gko::read_generic_raw<value_type, index_type>(in);
+            data = gko::read_generic_raw<value_type, index_type>(in);
         } else if (config.contains("stencil")) {
             auto local_size = static_cast<global_itype>(
                 config["size"].get<gko::int64>() / comm.size());
-            return generate_stencil<value_type, index_type>(
+            data = generate_stencil<value_type, index_type>(
                 config["stencil"].get<std::string>(), comm, local_size,
                 config["comm_pattern"].get<std::string>() ==
                     std::string("optimal"));
@@ -205,6 +213,8 @@ struct DistributedDefaultSystemGenerator {
             throw std::runtime_error(
                 "No known way to generate matrix data found.");
         }
+        data.ensure_row_major_order();
+        return data;
     }
 
     static std::string get_example_config()
@@ -240,15 +250,6 @@ struct DistributedDefaultSystemGenerator {
         }
     }
 
-    std::shared_ptr<gko::LinOp> generate_matrix_with_optimal_format(
-        std::shared_ptr<gko::Executor> exec, json& config) const
-    {
-        auto data = generate_matrix_data(config);
-        return generate_matrix_with_format(
-            std::move(exec), config["optimal"]["spmv"].get<std::string>(),
-            data);
-    }
-
     std::shared_ptr<gko::LinOp> generate_matrix_with_format(
         std::shared_ptr<gko::Executor> exec, const std::string& format_name,
         const gko::matrix_data<value_type, index_type>& data,

From 99f1eb870d45407ca6bc82a4dbf20c279b431368 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Tue, 10 Oct 2023 10:18:00 +0200
Subject: [PATCH 327/583] revert to old permutation interface for now

---
 benchmark/utils/general_matrix.hpp | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/benchmark/utils/general_matrix.hpp b/benchmark/utils/general_matrix.hpp
index 41b3459bc5a..3d063e91981 100644
--- a/benchmark/utils/general_matrix.hpp
+++ b/benchmark/utils/general_matrix.hpp
@@ -95,7 +95,9 @@ std::unique_ptr<gko::matrix::Permutation<IndexType>> reorder(
         throw std::runtime_error{"Unknown reordering algorithm " +
                                  FLAGS_reorder};
     }
-    mtx->permute(perm)->write(data);
+    auto perm_arr =
+        gko::array<IndexType>::view(ref, data.size[0], perm->get_permutation());
+    gko::as<Csr>(mtx->permute(&perm_arr))->write(data);
     test_case["reordered"] = FLAGS_reorder;
     return perm;
 }
@@ -103,16 +105,18 @@ std::unique_ptr<gko::matrix::Permutation<IndexType>> reorder(
 
 template <typename ValueType, typename IndexType>
 void permute(std::unique_ptr<gko::matrix::Dense<ValueType>>& vec,
-             const gko::matrix::Permutation<IndexType>* perm)
+             gko::matrix::Permutation<IndexType>* perm)
 {
-    vec = vec->permute(perm, gko::matrix::permute_mode::rows);
+    auto perm_arr = gko::array<IndexType>::view(
+        perm->get_executor(), perm->get_size()[0], perm->get_permutation());
+    vec = gko::as<gko::matrix::Dense<ValueType>>(vec->row_permute(&perm_arr));
 }
 
 
 template <typename ValueType, typename IndexType>
 void permute(
     std::unique_ptr<gko::experimental::distributed::Vector<ValueType>>& vec,
-    const gko::matrix::Permutation<IndexType>* perm)
+    gko::matrix::Permutation<IndexType>* perm)
 {}
 
 

From 21948574b68f6d9b5d216b1b4bbc1e7a42a80056 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Tue, 10 Oct 2023 14:19:57 +0200
Subject: [PATCH 328/583] disable reordering flag for distributed benchmarks

Co-authored-by: Yuhsiang M. Tsai <yhmtsai@gmail.com>
---
 benchmark/blas/distributed/multi_vector.cpp |  3 +++
 benchmark/solver/distributed/solver.cpp     |  3 +++
 benchmark/solver/solver_common.hpp          |  3 +--
 benchmark/spmv/distributed/spmv.cpp         |  3 +++
 benchmark/spmv/spmv_common.hpp              |  2 +-
 benchmark/utils/general_matrix.hpp          | 17 +++++++++++++----
 benchmark/utils/generator.hpp               |  4 ----
 7 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/benchmark/blas/distributed/multi_vector.cpp b/benchmark/blas/distributed/multi_vector.cpp
index d95e5fb38ac..fe5eea5a38c 100644
--- a/benchmark/blas/distributed/multi_vector.cpp
+++ b/benchmark/blas/distributed/multi_vector.cpp
@@ -38,6 +38,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <iostream>
 
 
+#define GKO_BENCHMARK_DISTRIBUTED
+
+
 #include "benchmark/blas/blas_common.hpp"
 #include "benchmark/utils/general.hpp"
 #include "benchmark/utils/generator.hpp"
diff --git a/benchmark/solver/distributed/solver.cpp b/benchmark/solver/distributed/solver.cpp
index d691309ab6a..6577c12e52e 100644
--- a/benchmark/solver/distributed/solver.cpp
+++ b/benchmark/solver/distributed/solver.cpp
@@ -39,6 +39,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <set>
 
 
+#define GKO_BENCHMARK_DISTRIBUTED
+
+
 #include "benchmark/solver/solver_common.hpp"
 #include "benchmark/utils/general_matrix.hpp"
 #include "benchmark/utils/generator.hpp"
diff --git a/benchmark/solver/solver_common.hpp b/benchmark/solver/solver_common.hpp
index 46b7a231e9a..b19d00cd519 100644
--- a/benchmark/solver/solver_common.hpp
+++ b/benchmark/solver/solver_common.hpp
@@ -435,8 +435,7 @@ struct SolverBenchmark : Benchmark<solver_benchmark_state<Generator>> {
             state.x = generator.initialize({0.0}, exec);
         } else {
             auto data = generator.generate_matrix_data(test_case);
-            auto permutation =
-                reorder(data, test_case, generator.is_distributed());
+            auto permutation = reorder(data, test_case);
 
             state.system_matrix = generator.generate_matrix_with_format(
                 exec, test_case["optimal"]["spmv"].get<std::string>(), data);
diff --git a/benchmark/spmv/distributed/spmv.cpp b/benchmark/spmv/distributed/spmv.cpp
index 202aad15c7e..d3925dabcf2 100644
--- a/benchmark/spmv/distributed/spmv.cpp
+++ b/benchmark/spmv/distributed/spmv.cpp
@@ -43,6 +43,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <typeinfo>
 
 
+#define GKO_BENCHMARK_DISTRIBUTED
+
+
 #include "benchmark/spmv/spmv_common.hpp"
 #include "benchmark/utils/general_matrix.hpp"
 #include "benchmark/utils/generator.hpp"
diff --git a/benchmark/spmv/spmv_common.hpp b/benchmark/spmv/spmv_common.hpp
index 4d1ab17ccf4..1d43e3ed327 100644
--- a/benchmark/spmv/spmv_common.hpp
+++ b/benchmark/spmv/spmv_common.hpp
@@ -105,7 +105,7 @@ struct SpmvBenchmark : Benchmark<spmv_benchmark_state<Generator>> {
     {
         spmv_benchmark_state<Generator> state;
         state.data = generator.generate_matrix_data(test_case);
-        reorder(state.data, test_case, generator.is_distributed());
+        reorder(state.data, test_case);
 
         auto nrhs = FLAGS_nrhs;
         state.b = generator.create_multi_vector_random(
diff --git a/benchmark/utils/general_matrix.hpp b/benchmark/utils/general_matrix.hpp
index 3d063e91981..2efbec77f99 100644
--- a/benchmark/utils/general_matrix.hpp
+++ b/benchmark/utils/general_matrix.hpp
@@ -51,22 +51,27 @@ std::string reordering_algorithm_desc =
 #if GKO_HAVE_METIS
     "    nd - Nested Dissection reordering algorithm\n"
 #endif
-    "    rcm - Reverse Cuthill-McKee reordering algorithm";
+    "    rcm - Reverse Cuthill-McKee reordering algorithm\n"
+    "This is a preprocessing step whose runtime will not be included\n"
+    "in the measurements.";
 
 
 DEFINE_string(input_matrix, "",
               "Filename of a matrix to be used as the single input. Overwrites "
               "the value of the -input flag");
 
+
+#ifndef GKO_BENCHMARK_DISTRIBUTED
 DEFINE_string(reorder, "none", reordering_algorithm_desc.c_str());
+#endif
 
 
 template <typename ValueType, typename IndexType>
 std::unique_ptr<gko::matrix::Permutation<IndexType>> reorder(
-    gko::matrix_data<ValueType, IndexType>& data, json& test_case,
-    bool is_distributed = false)
+    gko::matrix_data<ValueType, IndexType>& data, json& test_case)
 {
-    if (FLAGS_reorder == "none" || is_distributed) {
+#ifndef GKO_BENCHMARK_DISTRIBUTED
+    if (FLAGS_reorder == "none") {
         return nullptr;
     }
     using Csr = gko::matrix::Csr<ValueType, IndexType>;
@@ -100,6 +105,10 @@ std::unique_ptr<gko::matrix::Permutation<IndexType>> reorder(
     gko::as<Csr>(mtx->permute(&perm_arr))->write(data);
     test_case["reordered"] = FLAGS_reorder;
     return perm;
+#else
+    // no reordering for distributed benchmarks
+    return nullptr;
+#endif
 }
 
 
diff --git a/benchmark/utils/generator.hpp b/benchmark/utils/generator.hpp
index c280cb1ac72..3491fb0fc2c 100644
--- a/benchmark/utils/generator.hpp
+++ b/benchmark/utils/generator.hpp
@@ -52,8 +52,6 @@ struct DefaultSystemGenerator {
     using value_type = ValueType;
     using Vec = vec<ValueType>;
 
-    static bool is_distributed() { return false; }
-
     static gko::matrix_data<ValueType, IndexType> generate_matrix_data(
         const json& config)
     {
@@ -193,8 +191,6 @@ struct DistributedDefaultSystemGenerator {
     using Mtx = dist_mtx<value_type, local_index_type, index_type>;
     using Vec = dist_vec<value_type>;
 
-    static bool is_distributed() { return true; }
-
     gko::matrix_data<value_type, index_type> generate_matrix_data(
         const json& config) const
     {

From 11001e6a2c6c93ade942a68f34a1634c16bb3947 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Fri, 4 Aug 2023 10:57:22 +0200
Subject: [PATCH 329/583] Add a batch::BatchLinOp hierarchy and core tests

Co-authored-by: Aditya Kashi <aditya.kashi@mail.mcgill.ca>
---
 core/test/base/CMakeLists.txt                 |   1 +
 core/test/base/batch_lin_op.cpp               | 405 ++++++++++++++++
 include/ginkgo/core/base/batch_lin_op.hpp     | 439 ++++++++++++++++++
 .../ginkgo/core/base/batch_lin_op_helpers.hpp | 202 ++++++++
 .../ginkgo/core/base/exception_helpers.hpp    | 179 +++++++
 include/ginkgo/core/log/logger.hpp            | 118 ++++-
 include/ginkgo/ginkgo.hpp                     |   3 +
 7 files changed, 1339 insertions(+), 8 deletions(-)
 create mode 100644 core/test/base/batch_lin_op.cpp
 create mode 100644 include/ginkgo/core/base/batch_lin_op.hpp
 create mode 100644 include/ginkgo/core/base/batch_lin_op_helpers.hpp

diff --git a/core/test/base/CMakeLists.txt b/core/test/base/CMakeLists.txt
index 36bad656b07..4fa00e12922 100644
--- a/core/test/base/CMakeLists.txt
+++ b/core/test/base/CMakeLists.txt
@@ -2,6 +2,7 @@ ginkgo_create_test(abstract_factory)
 ginkgo_create_test(allocator)
 ginkgo_create_test(array)
 ginkgo_create_test(batch_dim)
+ginkgo_create_test(batch_lin_op)
 ginkgo_create_test(batch_multi_vector)
 ginkgo_create_test(dense_cache)
 ginkgo_create_test(combination)
diff --git a/core/test/base/batch_lin_op.cpp b/core/test/base/batch_lin_op.cpp
new file mode 100644
index 00000000000..1fe1765987f
--- /dev/null
+++ b/core/test/base/batch_lin_op.cpp
@@ -0,0 +1,405 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/base/batch_lin_op.hpp>
+
+
+#include <complex>
+#include <memory>
+#include <type_traits>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/lin_op.hpp>
+#include <ginkgo/core/base/math.hpp>
+
+
+namespace {
+
+
+class DummyBatchLinOp : public gko::batch::EnableBatchLinOp<DummyBatchLinOp>,
+                        public gko::EnableCreateMethod<DummyBatchLinOp> {
+public:
+    DummyBatchLinOp(std::shared_ptr<const gko::Executor> exec,
+                    gko::batch_dim<2> size = gko::batch_dim<2>{})
+        : gko::batch::EnableBatchLinOp<DummyBatchLinOp>(exec, size)
+    {}
+
+    void access() const { last_access = this->get_executor(); }
+
+    mutable std::shared_ptr<const gko::Executor> last_access;
+    mutable std::shared_ptr<const gko::Executor> last_b_access;
+    mutable std::shared_ptr<const gko::Executor> last_x_access;
+    mutable std::shared_ptr<const gko::Executor> last_alpha_access;
+    mutable std::shared_ptr<const gko::Executor> last_beta_access;
+
+protected:
+    void apply_impl(const gko::batch::BatchLinOp* b,
+                    gko::batch::BatchLinOp* x) const override
+    {
+        this->access();
+        static_cast<const DummyBatchLinOp*>(b)->access();
+        static_cast<const DummyBatchLinOp*>(x)->access();
+        last_b_access = b->get_executor();
+        last_x_access = x->get_executor();
+    }
+
+    void apply_impl(const gko::batch::BatchLinOp* alpha,
+                    const gko::batch::BatchLinOp* b,
+                    const gko::batch::BatchLinOp* beta,
+                    gko::batch::BatchLinOp* x) const override
+    {
+        this->access();
+        static_cast<const DummyBatchLinOp*>(alpha)->access();
+        static_cast<const DummyBatchLinOp*>(b)->access();
+        static_cast<const DummyBatchLinOp*>(beta)->access();
+        static_cast<const DummyBatchLinOp*>(x)->access();
+        last_alpha_access = alpha->get_executor();
+        last_b_access = b->get_executor();
+        last_beta_access = beta->get_executor();
+        last_x_access = x->get_executor();
+    }
+};
+
+
+class EnableBatchLinOp : public ::testing::Test {
+protected:
+    EnableBatchLinOp()
+        : ref{gko::ReferenceExecutor::create()},
+          ref2{gko::ReferenceExecutor::create()},
+          op{DummyBatchLinOp::create(ref2,
+                                     gko::batch_dim<2>(1, gko::dim<2>{3, 5}))},
+          op2{DummyBatchLinOp::create(ref2,
+                                      gko::batch_dim<2>(2, gko::dim<2>{3, 5}))},
+          alpha{DummyBatchLinOp::create(
+              ref, gko::batch_dim<2>(1, gko::dim<2>{1, 1}))},
+          alpha2{DummyBatchLinOp::create(
+              ref, gko::batch_dim<2>(2, gko::dim<2>{1, 1}))},
+          beta{DummyBatchLinOp::create(
+              ref, gko::batch_dim<2>(1, gko::dim<2>{1, 1}))},
+          beta2{DummyBatchLinOp::create(
+              ref, gko::batch_dim<2>(2, gko::dim<2>{1, 1}))},
+          b{DummyBatchLinOp::create(ref,
+                                    gko::batch_dim<2>(1, gko::dim<2>{5, 4}))},
+          b2{DummyBatchLinOp::create(ref,
+                                     gko::batch_dim<2>(2, gko::dim<2>{5, 4}))},
+          x{DummyBatchLinOp::create(ref,
+                                    gko::batch_dim<2>(1, gko::dim<2>{3, 4}))},
+          x2{DummyBatchLinOp::create(ref,
+                                     gko::batch_dim<2>(2, gko::dim<2>{3, 4}))}
+    {}
+
+    std::shared_ptr<const gko::ReferenceExecutor> ref;
+    std::shared_ptr<const gko::ReferenceExecutor> ref2;
+    std::unique_ptr<DummyBatchLinOp> op;
+    std::unique_ptr<DummyBatchLinOp> op2;
+    std::unique_ptr<DummyBatchLinOp> alpha;
+    std::unique_ptr<DummyBatchLinOp> alpha2;
+    std::unique_ptr<DummyBatchLinOp> beta;
+    std::unique_ptr<DummyBatchLinOp> beta2;
+    std::unique_ptr<DummyBatchLinOp> b;
+    std::unique_ptr<DummyBatchLinOp> b2;
+    std::unique_ptr<DummyBatchLinOp> x;
+    std::unique_ptr<DummyBatchLinOp> x2;
+};
+
+
+TEST_F(EnableBatchLinOp, KnowsNumBatchItems)
+{
+    ASSERT_EQ(op->get_num_batch_items(), 1);
+    ASSERT_EQ(op2->get_num_batch_items(), 2);
+}
+
+
+TEST_F(EnableBatchLinOp, KnowsItsSizes)
+{
+    auto op1_sizes = gko::batch_dim<2>(1, gko::dim<2>{3, 5});
+    auto op2_sizes = gko::batch_dim<2>(2, gko::dim<2>{3, 5});
+    ASSERT_EQ(op->get_size(), op1_sizes);
+    ASSERT_EQ(op2->get_size(), op2_sizes);
+}
+
+
+TEST_F(EnableBatchLinOp, CallsApplyImpl)
+{
+    op->apply(b, x);
+
+    ASSERT_EQ(op->last_access, ref2);
+}
+
+
+TEST_F(EnableBatchLinOp, CallsApplyImplForBatch)
+{
+    op2->apply(b2, x2);
+
+    ASSERT_EQ(op2->last_access, ref2);
+}
+
+
+TEST_F(EnableBatchLinOp, CallsExtendedApplyImpl)
+{
+    op->apply(alpha, b, beta, x);
+
+    ASSERT_EQ(op->last_access, ref2);
+}
+
+
+TEST_F(EnableBatchLinOp, CallsExtendedApplyImplBatch)
+{
+    op2->apply(alpha2, b2, beta2, x2);
+
+    ASSERT_EQ(op2->last_access, ref2);
+}
+
+
+TEST_F(EnableBatchLinOp, ApplyFailsOnWrongBatchSize)
+{
+    auto wrong =
+        DummyBatchLinOp::create(ref, gko::batch_dim<2>(1, gko::dim<2>{3, 4}));
+
+    ASSERT_THROW(op->apply(wrong, x), gko::DimensionMismatch);
+}
+
+
+TEST_F(EnableBatchLinOp, ApplyFailsOnWrongNumBatchItems)
+{
+    auto wrong =
+        DummyBatchLinOp::create(ref, gko::batch_dim<2>(1, gko::dim<2>{3, 4}));
+
+    ASSERT_THROW(op2->apply(wrong, x2), gko::ValueMismatch);
+}
+
+
+TEST_F(EnableBatchLinOp, ApplyFailsOnWrongSolutionRows)
+{
+    auto wrong =
+        DummyBatchLinOp::create(ref, gko::batch_dim<2>(1, gko::dim<2>{5, 4}));
+
+    ASSERT_THROW(op->apply(b, wrong), gko::DimensionMismatch);
+}
+
+
+TEST_F(EnableBatchLinOp, ApplyFailsOnOneBatchItemWrongSolutionRows)
+{
+    auto wrong =
+        DummyBatchLinOp::create(ref, gko::batch_dim<2>(2, gko::dim<2>{5, 4}));
+
+    ASSERT_THROW(op2->apply(b2, wrong), gko::DimensionMismatch);
+}
+
+
+TEST_F(EnableBatchLinOp, ApplyFailsOnWrongSolutionColumns)
+{
+    auto wrong =
+        DummyBatchLinOp::create(ref, gko::batch_dim<2>(1, gko::dim<2>{3, 5}));
+
+    ASSERT_THROW(op->apply(b, wrong), gko::DimensionMismatch);
+}
+
+
+TEST_F(EnableBatchLinOp, ApplyFailsOnOneBatchItemWrongSolutionColumn)
+{
+    auto wrong =
+        DummyBatchLinOp::create(ref, gko::batch_dim<2>(2, gko::dim<2>{3, 5}));
+
+    ASSERT_THROW(op2->apply(b2, wrong), gko::DimensionMismatch);
+}
+
+
+TEST_F(EnableBatchLinOp, ExtendedApplyFailsOnWrongBatchSize)
+{
+    auto wrong =
+        DummyBatchLinOp::create(ref, gko::batch_dim<2>(1, gko::dim<2>{3, 4}));
+
+    ASSERT_THROW(op->apply(alpha, wrong, beta, x), gko::DimensionMismatch);
+}
+
+
+TEST_F(EnableBatchLinOp, ExtendedApplyFailsOnWrongSolutionRows)
+{
+    auto wrong =
+        DummyBatchLinOp::create(ref, gko::batch_dim<2>(1, gko::dim<2>{5, 4}));
+
+    ASSERT_THROW(op->apply(alpha, b, beta, wrong), gko::DimensionMismatch);
+}
+
+
+TEST_F(EnableBatchLinOp, ExtendedApplyFailsOnWrongSolutionColumns)
+{
+    auto wrong =
+        DummyBatchLinOp::create(ref, gko::batch_dim<2>(1, gko::dim<2>{3, 5}));
+
+    ASSERT_THROW(op->apply(alpha, b, beta, wrong), gko::DimensionMismatch);
+}
+
+
+TEST_F(EnableBatchLinOp, ExtendedApplyFailsOnWrongAlphaDimension)
+{
+    auto wrong =
+        DummyBatchLinOp::create(ref, gko::batch_dim<2>(1, gko::dim<2>{2, 5}));
+
+    ASSERT_THROW(op->apply(wrong, b, beta, x), gko::DimensionMismatch);
+}
+
+
+TEST_F(EnableBatchLinOp, ExtendedApplyFailsOnWrongBetaDimension)
+{
+    auto wrong =
+        DummyBatchLinOp::create(ref, gko::batch_dim<2>(1, gko::dim<2>{2, 5}));
+
+    ASSERT_THROW(op->apply(alpha, b, wrong, x), gko::DimensionMismatch);
+}
+
+
+TEST_F(EnableBatchLinOp, ApplyDoesNotCopyBetweenSameMemory)
+{
+    op->apply(b, x);
+
+    ASSERT_EQ(op->last_b_access, ref);
+    ASSERT_EQ(op->last_x_access, ref);
+}
+
+
+TEST_F(EnableBatchLinOp, ApplyNoCopyBackBetweenSameMemory)
+{
+    op->apply(b, x);
+
+    ASSERT_EQ(b->last_access, ref);
+    ASSERT_EQ(x->last_access, ref);
+}
+
+
+TEST_F(EnableBatchLinOp, ExtendedApplyDoesNotCopyBetweenSameMemory)
+{
+    op->apply(alpha, b, beta, x);
+
+    ASSERT_EQ(op->last_alpha_access, ref);
+    ASSERT_EQ(op->last_b_access, ref);
+    ASSERT_EQ(op->last_beta_access, ref);
+    ASSERT_EQ(op->last_x_access, ref);
+}
+
+
+TEST_F(EnableBatchLinOp, ExtendedApplyNoCopyBackBetweenSameMemory)
+{
+    op->apply(alpha, b, beta, x);
+
+    ASSERT_EQ(alpha->last_access, ref);
+    ASSERT_EQ(b->last_access, ref);
+    ASSERT_EQ(beta->last_access, ref);
+    ASSERT_EQ(x->last_access, ref);
+}
+
+
+template <typename T = int>
+class DummyBatchLinOpWithFactory
+    : public gko::batch::EnableBatchLinOp<DummyBatchLinOpWithFactory<T>> {
+public:
+    DummyBatchLinOpWithFactory(std::shared_ptr<const gko::Executor> exec)
+        : gko::batch::EnableBatchLinOp<DummyBatchLinOpWithFactory>(exec)
+    {}
+
+    GKO_CREATE_FACTORY_PARAMETERS(parameters, Factory)
+    {
+        T GKO_FACTORY_PARAMETER_SCALAR(value, T{5});
+    };
+    GKO_ENABLE_BATCH_LIN_OP_FACTORY(DummyBatchLinOpWithFactory, parameters,
+                                    Factory);
+    GKO_ENABLE_BUILD_METHOD(Factory);
+
+    DummyBatchLinOpWithFactory(const Factory* factory,
+                               std::shared_ptr<const gko::batch::BatchLinOp> op)
+        : gko::batch::EnableBatchLinOp<DummyBatchLinOpWithFactory>(
+              factory->get_executor()),
+          parameters_{factory->get_parameters()},
+          op_{op}
+    {}
+
+    std::shared_ptr<const gko::batch::BatchLinOp> op_;
+
+protected:
+    void apply_impl(const gko::batch::BatchLinOp* b,
+                    gko::batch::BatchLinOp* x) const override
+    {}
+
+    void apply_impl(const gko::batch::BatchLinOp* alpha,
+                    const gko::batch::BatchLinOp* b,
+                    const gko::batch::BatchLinOp* beta,
+                    gko::batch::BatchLinOp* x) const override
+    {}
+};
+
+
+class EnableBatchLinOpFactory : public ::testing::Test {
+protected:
+    EnableBatchLinOpFactory() : ref{gko::ReferenceExecutor::create()} {}
+
+    std::shared_ptr<const gko::ReferenceExecutor> ref;
+};
+
+
+TEST_F(EnableBatchLinOpFactory, CreatesDefaultFactory)
+{
+    auto factory = DummyBatchLinOpWithFactory<>::build().on(ref);
+
+    ASSERT_EQ(factory->get_parameters().value, 5);
+    ASSERT_EQ(factory->get_executor(), ref);
+}
+
+
+TEST_F(EnableBatchLinOpFactory, CreatesFactoryWithParameters)
+{
+    auto factory = DummyBatchLinOpWithFactory<>::build().with_value(7).on(ref);
+
+    ASSERT_EQ(factory->get_parameters().value, 7);
+    ASSERT_EQ(factory->get_executor(), ref);
+}
+
+
+TEST_F(EnableBatchLinOpFactory, PassesParametersToBatchLinOp)
+{
+    auto dummy = gko::share(
+        DummyBatchLinOp::create(ref, gko::batch_dim<2>(1, gko::dim<2>{3, 5})));
+    auto factory = DummyBatchLinOpWithFactory<>::build().with_value(6).on(ref);
+
+    auto op = factory->generate(dummy);
+
+    ASSERT_EQ(op->get_executor(), ref);
+    ASSERT_EQ(op->get_parameters().value, 6);
+    ASSERT_EQ(op->op_.get(), dummy.get());
+}
+
+
+}  // namespace
diff --git a/include/ginkgo/core/base/batch_lin_op.hpp b/include/ginkgo/core/base/batch_lin_op.hpp
new file mode 100644
index 00000000000..68a88027904
--- /dev/null
+++ b/include/ginkgo/core/base/batch_lin_op.hpp
@@ -0,0 +1,439 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_PUBLIC_CORE_BASE_BATCH_LIN_OP_HPP_
+#define GKO_PUBLIC_CORE_BASE_BATCH_LIN_OP_HPP_
+
+
+#include <memory>
+#include <type_traits>
+#include <utility>
+
+
+#include <ginkgo/core/base/abstract_factory.hpp>
+#include <ginkgo/core/base/batch_lin_op_helpers.hpp>
+#include <ginkgo/core/base/dim.hpp>
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/matrix_assembly_data.hpp>
+#include <ginkgo/core/base/matrix_data.hpp>
+#include <ginkgo/core/base/polymorphic_object.hpp>
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/base/utils.hpp>
+#include <ginkgo/core/log/logger.hpp>
+
+
+namespace gko {
+namespace batch {
+
+
+/**
+ * @addtogroup BatchLinOp
+ *
+ * @section batch_linop_concept Batched Linear operator as a concept
+ *
+ * A batch linear operator (BatchLinOp) forms the base class for all batched
+ * linear algebra objects. In general, it follows the same structure as the
+ * LinOp class, but has some crucial differences which make it not strictly
+ * representable through or with the LinOp class.
+ *
+ * A batched operator is defined as a set of independent linear operators which
+ * have no communication/information exchange between them. Therefore, any
+ * collective operations between the batches is not possible and not
+ * implemented. This allows for each batch to be computed and operated on in an
+ * embarrasingly parallel fashion.
+ *
+ * Similar to the LinOp class, the BatchLinOp also implements
+ * BatchLinOp::apply() methods which call the internal apply_impl() methods
+ * which the concrete BatchLinOp's have to implement.
+ *
+ * A key difference between the LinOp and the BatchLinOp classes is the storing
+ * of dimensions. BatchLinOp allows for storing non-equal objects in the
+ * batches and hence stores a batch_dim object instead of a dim object. The
+ * batch_dim object is optimized to store less amount of data when storing
+ * uniform batches.
+ *
+ * All size validation functions again verify first that the number of batches
+ * are conformant and that the dimensions in the corresponding batches
+ * themselves are also valid/conformant. Here too, optimizations for uniform
+ * batches have been added.
+ *
+ * @ref BatchLinOp
+ */
+class BatchLinOp : public EnableAbstractPolymorphicObject<BatchLinOp> {
+public:
+    /**
+     * Applies a batch linear operator to a batch vector (or a sequence of batch
+     * of vectors).
+     *
+     * Performs the operation x = op(b), where op is this batch linear operator.
+     *
+     * @param b  the input batch vector(s) on which the batch operator is
+     *           applied
+     * @param x  the output batch vector(s) where the result is stored
+     *
+     * @return this
+     */
+    BatchLinOp* apply(ptr_param<const BatchLinOp> b, ptr_param<BatchLinOp> x)
+    {
+        this->template log<log::Logger::batch_linop_apply_started>(
+            this, b.get(), x.get());
+        this->validate_application_parameters(b.get(), x.get());
+        auto exec = this->get_executor();
+        this->apply_impl(make_temporary_clone(exec, b).get(),
+                         make_temporary_clone(exec, x).get());
+        this->template log<log::Logger::batch_linop_apply_completed>(
+            this, b.get(), x.get());
+        return this;
+    }
+
+    /**
+     * @copydoc apply(const BatchLinOp *, BatchLinOp *)
+     */
+    const BatchLinOp* apply(ptr_param<const BatchLinOp> b,
+                            ptr_param<BatchLinOp> x) const
+    {
+        this->template log<log::Logger::batch_linop_apply_started>(
+            this, b.get(), x.get());
+        this->validate_application_parameters(b.get(), x.get());
+        auto exec = this->get_executor();
+        this->apply_impl(make_temporary_clone(exec, b).get(),
+                         make_temporary_clone(exec, x).get());
+        this->template log<log::Logger::batch_linop_apply_completed>(
+            this, b.get(), x.get());
+        return this;
+    }
+
+    /**
+     * Performs the operation x = alpha * op(b) + beta * x.
+     *
+     * @param alpha  scaling of the result of op(b)
+     * @param b  vector(s) on which the operator is applied
+     * @param beta  scaling of the input x
+     * @param x  output vector(s)
+     *
+     * @return this
+     */
+    BatchLinOp* apply(ptr_param<const BatchLinOp> alpha,
+                      ptr_param<const BatchLinOp> b,
+                      ptr_param<const BatchLinOp> beta, ptr_param<BatchLinOp> x)
+    {
+        this->template log<log::Logger::batch_linop_advanced_apply_started>(
+            this, alpha.get(), b.get(), beta.get(), x.get());
+        this->validate_application_parameters(alpha.get(), b.get(), beta.get(),
+                                              x.get());
+        auto exec = this->get_executor();
+        this->apply_impl(make_temporary_clone(exec, alpha).get(),
+                         make_temporary_clone(exec, b).get(),
+                         make_temporary_clone(exec, beta).get(),
+                         make_temporary_clone(exec, x).get());
+        this->template log<log::Logger::batch_linop_advanced_apply_completed>(
+            this, alpha.get(), b.get(), beta.get(), x.get());
+        return this;
+    }
+
+    /**
+     * @copydoc apply(const BatchLinOp *, const BatchLinOp *, const BatchLinOp
+     * *, BatchLinOp *)
+     */
+    const BatchLinOp* apply(ptr_param<const BatchLinOp> alpha,
+                            ptr_param<const BatchLinOp> b,
+                            ptr_param<const BatchLinOp> beta,
+                            ptr_param<BatchLinOp> x) const
+    {
+        this->template log<log::Logger::batch_linop_advanced_apply_started>(
+            this, alpha.get(), b.get(), beta.get(), x.get());
+        this->validate_application_parameters(alpha.get(), b.get(), beta.get(),
+                                              x.get());
+        auto exec = this->get_executor();
+        this->apply_impl(make_temporary_clone(exec, alpha).get(),
+                         make_temporary_clone(exec, b).get(),
+                         make_temporary_clone(exec, beta).get(),
+                         make_temporary_clone(exec, x).get());
+        this->template log<log::Logger::batch_linop_advanced_apply_completed>(
+            this, alpha.get(), b.get(), beta.get(), x.get());
+        return this;
+    }
+
+    /**
+     * Returns the number of batches in the batch operator.
+     *
+     * @return number of batches in the batch operator
+     */
+    size_type get_num_batch_items() const noexcept
+    {
+        return size_.get_num_batch_items();
+    }
+
+    /**
+     * Sets the size of the batch operator.
+     *
+     * @param size to be set
+     */
+    void set_size(const batch_dim<2>& size) { size_ = size; }
+
+    /**
+     * Returns the size of the batch operator.
+     *
+     * @return size of the batch operator
+     */
+    const batch_dim<2>& get_size() const noexcept { return size_; }
+
+protected:
+    /**
+     * Creates a batch operator with uniform batches.
+     *
+     * @param exec        the executor where all the operations are performed
+     * @param num_batch_items the number of batches to be stored in the
+     * operator
+     * @param size        the size of on of the operator in the batched operator
+     */
+    explicit BatchLinOp(std::shared_ptr<const Executor> exec,
+                        const size_type num_batch_items = 0,
+                        const dim<2>& common_size = dim<2>{})
+        : EnableAbstractPolymorphicObject<BatchLinOp>(exec),
+          size_{num_batch_items > 0 ? batch_dim<2>(num_batch_items, common_size)
+                                    : batch_dim<2>{}}
+    {}
+
+    /**
+     * Creates a batch operator.
+     *
+     * @param exec  the executor where all the operations are performed
+     * @param batch_size  the sizes of the batch operator stored as a batch_dim
+     */
+    explicit BatchLinOp(std::shared_ptr<const Executor> exec,
+                        const batch_dim<2>& batch_size)
+        : EnableAbstractPolymorphicObject<BatchLinOp>(exec), size_{batch_size}
+    {}
+
+    /**
+     * Implementers of BatchLinOp should override this function instead
+     * of apply(const BatchLinOp *, BatchLinOp *).
+     *
+     * Performs the operation x = op(b), where op is this linear operator.
+     *
+     * @param b  the input batch vector(s) on which the operator is applied
+     * @param x  the output batch vector(s) where the result is stored
+     */
+    virtual void apply_impl(const BatchLinOp* b, BatchLinOp* x) const = 0;
+
+    /**
+     * Implementers of BatchLinOp should override this function instead
+     * of apply(const BatchLinOp *, const BatchLinOp *, const BatchLinOp *,
+     * BatchLinOp *).
+     *
+     * @param alpha  scaling of the result of op(b)
+     * @param b  vector(s) on which the operator is applied
+     * @param beta  scaling of the input x
+     * @param x  output vector(s)
+     */
+    virtual void apply_impl(const BatchLinOp* alpha, const BatchLinOp* b,
+                            const BatchLinOp* beta, BatchLinOp* x) const = 0;
+
+    /**
+     * Throws a DimensionMismatch exception if the parameters to `apply` are of
+     * the wrong size.
+     *
+     * @param b  batch vector(s) on which the operator is applied
+     * @param x  output batch vector(s)
+     */
+    void validate_application_parameters(const BatchLinOp* b,
+                                         const BatchLinOp* x) const
+    {
+        GKO_ASSERT_BATCH_CONFORMANT(this, b);
+        GKO_ASSERT_BATCH_EQUAL_ROWS(this, x);
+        GKO_ASSERT_BATCH_EQUAL_COLS(b, x);
+    }
+
+    /**
+     * Throws a DimensionMismatch exception if the parameters to `apply` are of
+     * the wrong size.
+     *
+     * @param alpha  scaling of the result of op(b)
+     * @param b  batch vector(s) on which the operator is applied
+     * @param beta  scaling of the input x
+     * @param x  output batch vector(s)
+     */
+    void validate_application_parameters(const BatchLinOp* alpha,
+                                         const BatchLinOp* b,
+                                         const BatchLinOp* beta,
+                                         const BatchLinOp* x) const
+    {
+        this->validate_application_parameters(b, x);
+        GKO_ASSERT_BATCH_EQUAL_ROWS(
+            alpha, batch_dim<2>(b->get_num_batch_items(), dim<2>(1, 1)));
+        GKO_ASSERT_BATCH_EQUAL_ROWS(
+            beta, batch_dim<2>(b->get_num_batch_items(), dim<2>(1, 1)));
+    }
+
+private:
+    batch_dim<2> size_{};
+};
+
+
+/**
+ * A BatchLinOpFactory represents a higher order mapping which transforms one
+ * batch linear operator into another.
+ *
+ * In a similar fashion to LinOps, BatchLinOps are also "generated" from the
+ * BatchLinOpFactory. A function of this class is to provide a generate method,
+ * which internally cals the generate_impl(), which the concrete BatchLinOps
+ * have to implement.
+ *
+ * Example: using BatchCG in Ginkgo
+ * ---------------------------
+ *
+ * ```c++
+ * // Suppose A is a batch matrix, batch_b a batch rhs vector, and batch_x an
+ * // initial guess
+ * // Create a BatchCG which runs for at most 1000 iterations, and stops after
+ * // reducing the residual norm by 6 orders of magnitude
+ * auto batch_cg_factory = solver::BatchCg<>::build()
+ *     .with_max_iters(1000)
+ *     .with_rel_residual_goal(1e-6)
+ *     .on(cuda);
+ * // create a batch linear operator which represents the solver
+ * auto batch_cg = batch_cg_factory->generate(A);
+ * // solve the system
+ * batch_cg->apply(gko::lend(batch_b), gko::lend(batch_x));
+ * ```
+ *
+ * @ingroup BatchLinOp
+ */
+class BatchLinOpFactory
+    : public AbstractFactory<BatchLinOp, std::shared_ptr<const BatchLinOp>> {
+public:
+    using AbstractFactory<BatchLinOp,
+                          std::shared_ptr<const BatchLinOp>>::AbstractFactory;
+
+    std::unique_ptr<BatchLinOp> generate(
+        std::shared_ptr<const BatchLinOp> input) const
+    {
+        this->template log<log::Logger::batch_linop_factory_generate_started>(
+            this, input.get());
+        const auto exec = this->get_executor();
+        std::unique_ptr<BatchLinOp> generated;
+        if (input->get_executor() == exec) {
+            generated = this->AbstractFactory::generate(input);
+        } else {
+            generated =
+                this->AbstractFactory::generate(gko::clone(exec, input));
+        }
+        this->template log<log::Logger::batch_linop_factory_generate_completed>(
+            this, input.get(), generated.get());
+        return generated;
+    }
+};
+
+
+/**
+ * The EnableBatchLinOp mixin can be used to provide sensible default
+ * implementations of the majority of the BatchLinOp and PolymorphicObject
+ * interface.
+ *
+ * The goal of the mixin is to facilitate the development of new BatchLinOp, by
+ * enabling the implementers to focus on the important parts of their operator,
+ * while the library takes care of generating the trivial utility functions.
+ * The mixin will provide default implementations for the entire
+ * PolymorphicObject interface, including a default implementation of
+ * `copy_from` between objects of the new BatchLinOp type. It will also hide the
+ * default BatchLinOp::apply() methods with versions that preserve the static
+ * type of the object.
+ *
+ * Implementers of new BatchLinOps are required to specify only the following
+ * aspects:
+ *
+ * 1.  Creation of the BatchLinOp: This can be facilitated via either
+ *     EnableCreateMethod mixin (used mostly for matrix formats),
+ *     or GKO_ENABLE_BATCH_LIN_OP_FACTORY macro (used for operators created from
+ *     other operators, like preconditioners and solvers).
+ * 2.  Application of the BatchLinOp: Implementers have to override the two
+ *     overloads of the BatchLinOp::apply_impl() virtual methods.
+ *
+ * @tparam ConcreteBatchLinOp  the concrete BatchLinOp which is being
+ *                             implemented [CRTP parameter]
+ * @tparam PolymorphicBase  parent of ConcreteBatchLinOp in the polymorphic
+ *                          hierarchy, has to be a subclass of BatchLinOp
+ *
+ * @ingroup BatchLinOp
+ */
+template <typename ConcreteBatchLinOp, typename PolymorphicBase = BatchLinOp>
+class EnableBatchLinOp
+    : public EnablePolymorphicObject<ConcreteBatchLinOp, PolymorphicBase>,
+      public EnablePolymorphicAssignment<ConcreteBatchLinOp> {
+public:
+    using EnablePolymorphicObject<ConcreteBatchLinOp,
+                                  PolymorphicBase>::EnablePolymorphicObject;
+
+    const ConcreteBatchLinOp* apply(ptr_param<const BatchLinOp> b,
+                                    ptr_param<BatchLinOp> x) const
+    {
+        PolymorphicBase::apply(b, x);
+        return self();
+    }
+
+    ConcreteBatchLinOp* apply(ptr_param<const BatchLinOp> b,
+                              ptr_param<BatchLinOp> x)
+    {
+        PolymorphicBase::apply(b, x);
+        return self();
+    }
+
+    const ConcreteBatchLinOp* apply(ptr_param<const BatchLinOp> alpha,
+                                    ptr_param<const BatchLinOp> b,
+                                    ptr_param<const BatchLinOp> beta,
+                                    ptr_param<BatchLinOp> x) const
+    {
+        PolymorphicBase::apply(alpha, b, beta, x);
+        return self();
+    }
+
+    ConcreteBatchLinOp* apply(ptr_param<const BatchLinOp> alpha,
+                              ptr_param<const BatchLinOp> b,
+                              ptr_param<const BatchLinOp> beta,
+                              ptr_param<BatchLinOp> x)
+    {
+        PolymorphicBase::apply(alpha, b, beta, x);
+        return self();
+    }
+
+protected:
+    GKO_ENABLE_SELF(ConcreteBatchLinOp);
+};
+
+
+}  // namespace batch
+}  // namespace gko
+
+
+#endif  // GKO_PUBLIC_CORE_BASE_BATCH_LIN_OP_HPP_
diff --git a/include/ginkgo/core/base/batch_lin_op_helpers.hpp b/include/ginkgo/core/base/batch_lin_op_helpers.hpp
new file mode 100644
index 00000000000..579411e9af0
--- /dev/null
+++ b/include/ginkgo/core/base/batch_lin_op_helpers.hpp
@@ -0,0 +1,202 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_PUBLIC_CORE_BASE_BATCH_LIN_OP_HELPERS_HPP_
+#define GKO_PUBLIC_CORE_BASE_BATCH_LIN_OP_HELPERS_HPP_
+
+
+#include <memory>
+#include <type_traits>
+#include <utility>
+
+
+#include <ginkgo/core/base/abstract_factory.hpp>
+#include <ginkgo/core/base/dim.hpp>
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/lin_op_helpers.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/matrix_assembly_data.hpp>
+#include <ginkgo/core/base/matrix_data.hpp>
+#include <ginkgo/core/base/polymorphic_object.hpp>
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/base/utils.hpp>
+#include <ginkgo/core/log/logger.hpp>
+
+
+namespace gko {
+namespace batch {
+
+
+/**
+ * This is an alias for the EnableDefaultFactory mixin, which correctly sets the
+ * template parameters to enable a subclass of BatchLinOpFactory.
+ *
+ * @tparam ConcreteFactory  the concrete factory which is being implemented
+ *                          [CRTP parmeter]
+ * @tparam ConcreteLinOp  the concrete BatchLinOp type which this factory
+ * produces, needs to have a constructor which takes a const ConcreteFactory *,
+ * and an std::shared_ptr<const BatchLinOp> as parameters.
+ * @tparam ParametersType  a subclass of enable_parameters_type template which
+ *                         defines all of the parameters of the factory
+ * @tparam PolymorphicBase  parent of ConcreteFactory in the polymorphic
+ *                          hierarchy, has to be a subclass of LinOpFactory
+ *
+ * @ingroup BatchLinOp
+ */
+template <typename ConcreteFactory, typename ConcreteBatchLinOp,
+          typename ParametersType, typename PolymorphicBase = BatchLinOpFactory>
+using EnableDefaultBatchLinOpFactory =
+    EnableDefaultFactory<ConcreteFactory, ConcreteBatchLinOp, ParametersType,
+                         PolymorphicBase>;
+
+
+/**
+ * This macro will generate a default implementation of a BatchLinOpFactory for
+ * the BatchLinOp subclass it is defined in.
+ *
+ * It is required to first call the macro #GKO_CREATE_FACTORY_PARAMETERS()
+ * before this one in order to instantiate the parameters type first.
+ *
+ * The list of parameters for the factory should be defined in a code block
+ * after the macro definition, and should contain a list of
+ * GKO_FACTORY_PARAMETER_* declarations. The class should provide a constructor
+ * with signature
+ * _batch_lin_op(const _factory_name *, std::shared_ptr<const BatchLinOp>)
+ * which the factory will use a callback to construct the object.
+ *
+ * A minimal example of a batch linear operator is the following:
+ *
+ * ```c++
+ * struct MyBatchLinOp : public EnableBatchLinOp<MyBatchLinOp> {
+ *     GKO_ENABLE_BATCH_LIN_OP_FACTORY(MyBatchLinOp, my_parameters, Factory) {
+ *         // a factory parameter named "my_value", of type int and default
+ *         // value of 5
+ *         int GKO_FACTORY_PARAMETER_SCALAR(my_value, 5);
+ *         // a factory parameter named `my_pair` of type `std::pair<int,int>`
+ *         // and default value {5, 5}
+ *         std::pair<int, int> GKO_FACTORY_PARAMETER_VECTOR(my_pair, 5, 5);
+ *     };
+ *     // constructor needed by EnableBatchLinOp
+ *     explicit MyBatchLinOp(std::shared_ptr<const Executor> exec) {
+ *         : EnableBatchLinOp<MyBatchLinOp>(exec) {}
+ *     // constructor needed by the factory
+ *     explicit MyBatchLinOp(const Factory *factory,
+ *                      std::shared_ptr<const BatchLinOp> matrix)
+ *         : EnableBatchLinOp<MyBatchLinOp>(factory->get_executor()),
+ *                                          matrix->get_size()),
+ *           // store factory's parameters locally
+ *           my_parameters_{factory->get_parameters()}
+ *     {
+ *          int value = my_parameters_.my_value;
+ *          // do something with value
+ *     }
+ * ```
+ *
+ * MyBatchLinOp can then be created as follows:
+ *
+ * ```c++
+ * auto exec = gko::ReferenceExecutor::create();
+ * // create a factory with default `my_value` parameter
+ * auto fact = MyBatchLinOp::build().on(exec);
+ * // create a operator using the factory:
+ * auto my_op = fact->generate(gko::matrix::BatchIdentity::create(exec, 2));
+ * std::cout << my_op->get_my_parameters().my_value;  // prints 5
+ *
+ * // create a factory with custom `my_value` parameter
+ * auto fact = MyLinOp::build().with_my_value(0).on(exec);
+ * // create a operator using the factory:
+ * auto my_op = fact->generate(gko::matrix::BatchIdentity::create(exec, 2));
+ * std::cout << my_op->get_my_parameters().my_value;  // prints 0
+ * ```
+ *
+ * @note It is possible to combine both the #GKO_CREATE_FACTORY_PARAMETER_*()
+ * macros with this one in a unique macro for class __templates__ (not with
+ * regular classes). Splitting this into two distinct macros allows to use them
+ * in all contexts. See <https://stackoverflow.com/q/50202718/9385966> for more
+ * details.
+ *
+ * @param _lin_op  concrete operator for which the factory is to be created
+ *                 [CRTP parameter]
+ * @param _parameters_name  name of the parameters member in the class
+ *                          (its type is `<_parameters_name>_type`, the
+ *                          protected member's name is `<_parameters_name>_`,
+ *                          and the public getter's name is
+ *                          `get_<_parameters_name>()`)
+ * @param _factory_name  name of the generated factory type
+ *
+ * @ingroup BatchLinOp
+ */
+#define GKO_ENABLE_BATCH_LIN_OP_FACTORY(_batch_lin_op, _parameters_name,     \
+                                        _factory_name)                       \
+public:                                                                      \
+    const _parameters_name##_type& get_##_parameters_name() const            \
+    {                                                                        \
+        return _parameters_name##_;                                          \
+    }                                                                        \
+                                                                             \
+    class _factory_name                                                      \
+        : public ::gko::batch::EnableDefaultBatchLinOpFactory<               \
+              _factory_name, _batch_lin_op, _parameters_name##_type> {       \
+        friend class ::gko::EnablePolymorphicObject<                         \
+            _factory_name, ::gko::batch::BatchLinOpFactory>;                 \
+        friend class ::gko::enable_parameters_type<_parameters_name##_type,  \
+                                                   _factory_name>;           \
+        explicit _factory_name(std::shared_ptr<const ::gko::Executor> exec)  \
+            : ::gko::batch::EnableDefaultBatchLinOpFactory<                  \
+                  _factory_name, _batch_lin_op, _parameters_name##_type>(    \
+                  std::move(exec))                                           \
+        {}                                                                   \
+        explicit _factory_name(std::shared_ptr<const ::gko::Executor> exec,  \
+                               const _parameters_name##_type& parameters)    \
+            : ::gko::batch::EnableDefaultBatchLinOpFactory<                  \
+                  _factory_name, _batch_lin_op, _parameters_name##_type>(    \
+                  std::move(exec), parameters)                               \
+        {}                                                                   \
+    };                                                                       \
+    friend ::gko::batch::EnableDefaultBatchLinOpFactory<                     \
+        _factory_name, _batch_lin_op, _parameters_name##_type>;              \
+                                                                             \
+                                                                             \
+private:                                                                     \
+    _parameters_name##_type _parameters_name##_;                             \
+                                                                             \
+public:                                                                      \
+    static_assert(true,                                                      \
+                  "This assert is used to counter the false positive extra " \
+                  "semi-colon warnings")
+
+
+}  // namespace batch
+}  // namespace gko
+
+
+#endif  // GKO_PUBLIC_CORE_BASE_BATCH_LIN_OP_HELPERS_HPP_
diff --git a/include/ginkgo/core/base/exception_helpers.hpp b/include/ginkgo/core/base/exception_helpers.hpp
index 4a12865f374..a45e19612b2 100644
--- a/include/ginkgo/core/base/exception_helpers.hpp
+++ b/include/ginkgo/core/base/exception_helpers.hpp
@@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <typeinfo>
 
 
+#include <ginkgo/core/base/batch_dim.hpp>
 #include <ginkgo/core/base/dim.hpp>
 #include <ginkgo/core/base/exception.hpp>
 #include <ginkgo/core/base/name_demangling.hpp>
@@ -147,6 +148,22 @@ inline dim<2> get_size(const T& op)
 inline dim<2> get_size(const dim<2>& size) { return size; }
 
 
+template <typename T>
+inline batch_dim<2> get_batch_size(const T& op)
+{
+    return op->get_size();
+}
+
+inline batch_dim<2> get_batch_size(const batch_dim<2>& size) { return size; }
+
+
+template <typename T>
+inline size_type get_num_batch_items(const T& obj)
+{
+    return obj.get_num_batch_items();
+}
+
+
 }  // namespace detail
 
 
@@ -298,6 +315,168 @@ inline dim<2> get_size(const dim<2>& size) { return size; }
     }
 
 
+/**
+ * Asserts that _op1 can be applied to _op2.
+ *
+ * @throw DimensionMismatch  if _op1 cannot be applied to _op2.
+ */
+#define GKO_ASSERT_BATCH_CONFORMANT(_op1, _op2)                              \
+    {                                                                        \
+        auto equal_num_items =                                               \
+            ::gko::detail::get_batch_size(_op1).get_num_batch_items() ==     \
+            ::gko::detail::get_batch_size(_op2).get_num_batch_items();       \
+        auto equal_inner_size =                                              \
+            ::gko::detail::get_batch_size(_op1).get_common_size()[1] ==      \
+            ::gko::detail::get_batch_size(_op2).get_common_size()[0];        \
+        if (!equal_num_items) {                                              \
+            throw ::gko::ValueMismatch(                                      \
+                __FILE__, __LINE__, __func__,                                \
+                ::gko::detail::get_batch_size(_op2).get_num_batch_items(),   \
+                ::gko::detail::get_batch_size(_op2).get_num_batch_items(),   \
+                "expected equal number of batch items");                     \
+        } else if (!equal_inner_size) {                                      \
+            throw ::gko::DimensionMismatch(                                  \
+                __FILE__, __LINE__, __func__, #_op1,                         \
+                ::gko::detail::get_batch_size(_op1).get_common_size()[0],    \
+                ::gko::detail::get_batch_size(_op1).get_common_size()[1],    \
+                #_op2,                                                       \
+                ::gko::detail::get_batch_size(_op2).get_common_size()[0],    \
+                ::gko::detail::get_batch_size(_op2).get_common_size()[1],    \
+                "expected matching inner dimensions among all batch items"); \
+        }                                                                    \
+    }
+
+
+/**
+ * Asserts that _op1 can be applied to _op2 from the right.
+ *
+ * @throw DimensionMismatch  if _op1 cannot be applied to _op2 from the right.
+ */
+#define GKO_ASSERT_BATCH_REVERSE_CONFORMANT(_op1, _op2)                      \
+    {                                                                        \
+        auto equal_num_items =                                               \
+            ::gko::detail::get_batch_size(_op1).get_num_batch_items() ==     \
+            ::gko::detail::get_batch_size(_op2).get_num_batch_items();       \
+        auto equal_outer_size =                                              \
+            ::gko::detail::get_batch_size(_op1).get_common_size()[0] ==      \
+            ::gko::detail::get_batch_size(_op2).get_common_size()[1];        \
+        if (!equal_num_items) {                                              \
+            throw ::gko::ValueMismatch(                                      \
+                __FILE__, __LINE__, __func__,                                \
+                ::gko::detail::get_batch_size(_op2).get_num_batch_items(),   \
+                ::gko::detail::get_batch_size(_op2).get_num_batch_items(),   \
+                "expected equal number of batch items");                     \
+        } else if (!equal_outer_size) {                                      \
+            throw ::gko::DimensionMismatch(                                  \
+                __FILE__, __LINE__, __func__, #_op1,                         \
+                ::gko::detail::get_batch_size(_op1).get_common_size()[0],    \
+                ::gko::detail::get_batch_size(_op1).get_common_size()[1],    \
+                #_op2,                                                       \
+                ::gko::detail::get_batch_size(_op2).get_common_size()[0],    \
+                ::gko::detail::get_batch_size(_op2).get_common_size()[1],    \
+                "expected matching outer dimensions among all batch items"); \
+        }                                                                    \
+    }
+
+
+/**
+ * Asserts that `_op1` and `_op2` have the same number of rows.
+ *
+ * @throw DimensionMismatch  if `_op1` and `_op2` differ in the number of rows
+ */
+#define GKO_ASSERT_BATCH_EQUAL_ROWS(_op1, _op2)                            \
+    {                                                                      \
+        auto equal_num_items =                                             \
+            ::gko::detail::get_batch_size(_op1).get_num_batch_items() ==   \
+            ::gko::detail::get_batch_size(_op2).get_num_batch_items();     \
+        auto equal_rows =                                                  \
+            ::gko::detail::get_batch_size(_op1).get_common_size()[0] ==    \
+            ::gko::detail::get_batch_size(_op2).get_common_size()[0];      \
+        if (!equal_num_items) {                                            \
+            throw ::gko::ValueMismatch(                                    \
+                __FILE__, __LINE__, __func__,                              \
+                ::gko::detail::get_batch_size(_op2).get_num_batch_items(), \
+                ::gko::detail::get_batch_size(_op2).get_num_batch_items(), \
+                "expected equal number of batch items");                   \
+        } else if (!equal_rows) {                                          \
+            throw ::gko::DimensionMismatch(                                \
+                __FILE__, __LINE__, __func__, #_op1,                       \
+                ::gko::detail::get_batch_size(_op1).get_common_size()[0],  \
+                ::gko::detail::get_batch_size(_op1).get_common_size()[1],  \
+                #_op2,                                                     \
+                ::gko::detail::get_batch_size(_op2).get_common_size()[0],  \
+                ::gko::detail::get_batch_size(_op2).get_common_size()[1],  \
+                "expected matching number of rows among all batch items"); \
+        }                                                                  \
+    }
+
+
+/**
+ * Asserts that `_op1` and `_op2` have the same number of columns.
+ *
+ * @throw DimensionMismatch  if `_op1` and `_op2` differ in the number of
+ *                           columns
+ */
+#define GKO_ASSERT_BATCH_EQUAL_COLS(_op1, _op2)                            \
+    {                                                                      \
+        auto equal_num_items =                                             \
+            ::gko::detail::get_batch_size(_op1).get_num_batch_items() ==   \
+            ::gko::detail::get_batch_size(_op2).get_num_batch_items();     \
+        auto equal_cols =                                                  \
+            ::gko::detail::get_batch_size(_op1).get_common_size()[1] ==    \
+            ::gko::detail::get_batch_size(_op2).get_common_size()[1];      \
+        if (!equal_num_items) {                                            \
+            throw ::gko::ValueMismatch(                                    \
+                __FILE__, __LINE__, __func__,                              \
+                ::gko::detail::get_batch_size(_op2).get_num_batch_items(), \
+                ::gko::detail::get_batch_size(_op2).get_num_batch_items(), \
+                "expected equal number of batch items");                   \
+        } else if (!equal_cols) {                                          \
+            throw ::gko::DimensionMismatch(                                \
+                __FILE__, __LINE__, __func__, #_op1,                       \
+                ::gko::detail::get_batch_size(_op1).get_common_size()[0],  \
+                ::gko::detail::get_batch_size(_op1).get_common_size()[1],  \
+                #_op2,                                                     \
+                ::gko::detail::get_batch_size(_op2).get_common_size()[0],  \
+                ::gko::detail::get_batch_size(_op2).get_common_size()[1],  \
+                "expected matching number of cols among all batch items"); \
+        }                                                                  \
+    }
+
+
+/**
+ * Asserts that `_op1` and `_op2` have the same number of rows and columns.
+ *
+ * @throw DimensionMismatch  if `_op1` and `_op2` differ in the number of
+ *                           rows or columns
+ */
+#define GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(_op1, _op2)                      \
+    {                                                                      \
+        auto equal_num_items =                                             \
+            ::gko::detail::get_batch_size(_op1).get_num_batch_items() ==   \
+            ::gko::detail::get_batch_size(_op2).get_num_batch_items();     \
+        auto equal_size =                                                  \
+            ::gko::detail::get_batch_size(_op1).get_common_size() ==       \
+            ::gko::detail::get_batch_size(_op2).get_common_size();         \
+        if (!equal_num_items) {                                            \
+            throw ::gko::ValueMismatch(                                    \
+                __FILE__, __LINE__, __func__,                              \
+                ::gko::detail::get_batch_size(_op2).get_num_batch_items(), \
+                ::gko::detail::get_batch_size(_op2).get_num_batch_items(), \
+                "expected equal number of batch items");                   \
+        } else if (!equal_size) {                                          \
+            throw ::gko::DimensionMismatch(                                \
+                __FILE__, __LINE__, __func__, #_op1,                       \
+                ::gko::detail::get_batch_size(_op1).get_common_size()[0],  \
+                ::gko::detail::get_batch_size(_op1).get_common_size()[1],  \
+                #_op2,                                                     \
+                ::gko::detail::get_batch_size(_op2).get_common_size()[0],  \
+                ::gko::detail::get_batch_size(_op2).get_common_size()[1],  \
+                "expected matching size among all batch items");           \
+        }                                                                  \
+    }
+
+
 /**
  * Instantiates a MpiError.
  *
diff --git a/include/ginkgo/core/log/logger.hpp b/include/ginkgo/core/log/logger.hpp
index b700e1e703a..a6fade087b0 100644
--- a/include/ginkgo/core/log/logger.hpp
+++ b/include/ginkgo/core/log/logger.hpp
@@ -58,6 +58,13 @@ class PolymorphicObject;
 class Operation;
 class stopping_status;
 
+
+namespace batch {
+class BatchLinOp;
+class BatchLinOpFactory;
+}  // namespace batch
+
+
 /**
  * @brief The Stopping criterion namespace.
  * @ref stop
@@ -448,9 +455,9 @@ public:                                                              \
      * @warning This on_iteration_complete function that this macro declares is
      * deprecated. Please use the version with the stopping information.
      */
-    [[deprecated(
-        "Please use the version with the additional stopping "
-        "information.")]] virtual void
+    [
+        [deprecated("Please use the version with the additional stopping "
+                    "information.")]] virtual void
     on_iteration_complete(const LinOp* solver, const size_type& it,
                           const LinOp* r, const LinOp* x = nullptr,
                           const LinOp* tau = nullptr) const
@@ -469,9 +476,9 @@ public:                                                              \
      * @warning This on_iteration_complete function that this macro declares is
      * deprecated. Please use the version with the stopping information.
      */
-    [[deprecated(
-        "Please use the version with the additional stopping "
-        "information.")]] virtual void
+    [
+        [deprecated("Please use the version with the additional stopping "
+                    "information.")]] virtual void
     on_iteration_complete(const LinOp* solver, const size_type& it,
                           const LinOp* r, const LinOp* x, const LinOp* tau,
                           const LinOp* implicit_tau_sq) const
@@ -563,6 +570,86 @@ public:                                                              \
                               const PolymorphicObject* input,
                               const PolymorphicObject* output)
 
+    /**
+     * BatchLinOp's apply started event.
+     *
+     * @param A  the system matrix
+     * @param b  the input vector(s)
+     * @param x  the output vector(s)
+     */
+    GKO_LOGGER_REGISTER_EVENT(24, batch_linop_apply_started,
+                              const batch::BatchLinOp* A,
+                              const batch::BatchLinOp* b,
+                              const batch::BatchLinOp* x)
+
+    /**
+     * BatchLinOp's apply completed event.
+     *
+     * @param A  the system matrix
+     * @param b  the input vector(s)
+     * @param x  the output vector(s)
+     */
+    GKO_LOGGER_REGISTER_EVENT(25, batch_linop_apply_completed,
+                              const batch::BatchLinOp* A,
+                              const batch::BatchLinOp* b,
+                              const batch::BatchLinOp* x)
+
+    /**
+     * BatchLinOp's advanced apply started event.
+     *
+     * @param A  the system matrix
+     * @param alpha  scaling of the result of op(b)
+     * @param b  the input vector(s)
+     * @param beta  scaling of the input x
+     * @param x  the output vector(s)
+     */
+    GKO_LOGGER_REGISTER_EVENT(26, batch_linop_advanced_apply_started,
+                              const batch::BatchLinOp* A,
+                              const batch::BatchLinOp* alpha,
+                              const batch::BatchLinOp* b,
+                              const batch::BatchLinOp* beta,
+                              const batch::BatchLinOp* x)
+
+    /**
+     * BatchLinOp's advanced apply completed event.
+     *
+     * @param A  the system matrix
+     * @param alpha  scaling of the result of op(b)
+     * @param b  the input vector(s)
+     * @param beta  scaling of the input x
+     * @param x  the output vector(s)
+     */
+    GKO_LOGGER_REGISTER_EVENT(27, batch_linop_advanced_apply_completed,
+                              const batch::BatchLinOp* A,
+                              const batch::BatchLinOp* alpha,
+                              const batch::BatchLinOp* b,
+                              const batch::BatchLinOp* beta,
+                              const batch::BatchLinOp* x)
+
+    /**
+     * BatchLinOp Factory's generate started event.
+     *
+     * @param factory  the factory used
+     * @param input  the BatchLinOp object used as input for the generation
+     * (usually a system matrix)
+     */
+    GKO_LOGGER_REGISTER_EVENT(28, batch_linop_factory_generate_started,
+                              const batch::BatchLinOpFactory* factory,
+                              const batch::BatchLinOp* input)
+
+    /**
+     * BatchLinOp Factory's generate completed event.
+     *
+     * @param factory  the factory used
+     * @param input  the BatchLinOp object used as input for the generation
+     * (usually a system matrix)
+     * @param output  the generated BatchLinOp object
+     */
+    GKO_LOGGER_REGISTER_EVENT(29, batch_linop_factory_generate_completed,
+                              const batch::BatchLinOpFactory* factory,
+                              const batch::BatchLinOp* input,
+                              const batch::BatchLinOp* output)
+
 #undef GKO_LOGGER_REGISTER_EVENT
 
     /**
@@ -605,6 +692,21 @@ public:                                                              \
         linop_factory_generate_started_mask |
         linop_factory_generate_completed_mask;
 
+    /**
+     * Bitset Mask which activates all batch linop events
+     */
+    static constexpr mask_type batch_linop_events_mask =
+        batch_linop_apply_started_mask | batch_linop_apply_completed_mask |
+        batch_linop_advanced_apply_started_mask |
+        batch_linop_advanced_apply_completed_mask;
+
+    /**
+     * Bitset Mask which activates all batch linop factory events
+     */
+    static constexpr mask_type batch_linop_factory_events_mask =
+        batch_linop_factory_generate_started_mask |
+        batch_linop_factory_generate_completed_mask;
+
     /**
      * Bitset Mask which activates all criterion events
      */
@@ -772,8 +874,8 @@ class EnableLogging : public PolymorphicBase {
     template <size_type Event, typename ConcreteLoggableT>
     struct propagate_log_helper<
         Event, ConcreteLoggableT,
-        xstd::void_t<
-            decltype(std::declval<ConcreteLoggableT>().get_executor())>> {
+        xstd::void_t<decltype(
+            std::declval<ConcreteLoggableT>().get_executor())>> {
         template <typename... Args>
         static void propagate_log(const ConcreteLoggableT* loggable,
                                   Args&&... args)
diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp
index bcdaa5d2d20..186a5fce061 100644
--- a/include/ginkgo/ginkgo.hpp
+++ b/include/ginkgo/ginkgo.hpp
@@ -40,6 +40,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/abstract_factory.hpp>
 #include <ginkgo/core/base/array.hpp>
 #include <ginkgo/core/base/batch_dim.hpp>
+#include <ginkgo/core/base/batch_lin_op.hpp>
+#include <ginkgo/core/base/batch_lin_op_helpers.hpp>
 #include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/combination.hpp>
 #include <ginkgo/core/base/composition.hpp>
@@ -54,6 +56,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/index_set.hpp>
 #include <ginkgo/core/base/intrinsics.hpp>
 #include <ginkgo/core/base/lin_op.hpp>
+#include <ginkgo/core/base/lin_op_helpers.hpp>
 #include <ginkgo/core/base/machine_topology.hpp>
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/base/matrix_assembly_data.hpp>

From 0da356008556c8afd3f20e20dc641d67a9ba9695 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Wed, 27 Sep 2023 11:22:07 +0200
Subject: [PATCH 330/583] Review updates.

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 core/test/base/batch_lin_op.cpp               |  72 ++----------
 include/ginkgo/core/base/batch_lin_op.hpp     |  14 +--
 .../ginkgo/core/base/exception_helpers.hpp    | 108 +++++++-----------
 3 files changed, 58 insertions(+), 136 deletions(-)

diff --git a/core/test/base/batch_lin_op.cpp b/core/test/base/batch_lin_op.cpp
index 1fe1765987f..b656c2bf3fb 100644
--- a/core/test/base/batch_lin_op.cpp
+++ b/core/test/base/batch_lin_op.cpp
@@ -57,23 +57,13 @@ class DummyBatchLinOp : public gko::batch::EnableBatchLinOp<DummyBatchLinOp>,
         : gko::batch::EnableBatchLinOp<DummyBatchLinOp>(exec, size)
     {}
 
-    void access() const { last_access = this->get_executor(); }
-
-    mutable std::shared_ptr<const gko::Executor> last_access;
-    mutable std::shared_ptr<const gko::Executor> last_b_access;
-    mutable std::shared_ptr<const gko::Executor> last_x_access;
-    mutable std::shared_ptr<const gko::Executor> last_alpha_access;
-    mutable std::shared_ptr<const gko::Executor> last_beta_access;
+    int called = 0;
 
 protected:
     void apply_impl(const gko::batch::BatchLinOp* b,
                     gko::batch::BatchLinOp* x) const override
     {
-        this->access();
-        static_cast<const DummyBatchLinOp*>(b)->access();
-        static_cast<const DummyBatchLinOp*>(x)->access();
-        last_b_access = b->get_executor();
-        last_x_access = x->get_executor();
+        this->called = 1;
     }
 
     void apply_impl(const gko::batch::BatchLinOp* alpha,
@@ -81,15 +71,7 @@ class DummyBatchLinOp : public gko::batch::EnableBatchLinOp<DummyBatchLinOp>,
                     const gko::batch::BatchLinOp* beta,
                     gko::batch::BatchLinOp* x) const override
     {
-        this->access();
-        static_cast<const DummyBatchLinOp*>(alpha)->access();
-        static_cast<const DummyBatchLinOp*>(b)->access();
-        static_cast<const DummyBatchLinOp*>(beta)->access();
-        static_cast<const DummyBatchLinOp*>(x)->access();
-        last_alpha_access = alpha->get_executor();
-        last_b_access = b->get_executor();
-        last_beta_access = beta->get_executor();
-        last_x_access = x->get_executor();
+        this->called = 2;
     }
 };
 
@@ -156,7 +138,7 @@ TEST_F(EnableBatchLinOp, CallsApplyImpl)
 {
     op->apply(b, x);
 
-    ASSERT_EQ(op->last_access, ref2);
+    ASSERT_EQ(op->called, 1);
 }
 
 
@@ -164,7 +146,7 @@ TEST_F(EnableBatchLinOp, CallsApplyImplForBatch)
 {
     op2->apply(b2, x2);
 
-    ASSERT_EQ(op2->last_access, ref2);
+    ASSERT_EQ(op2->called, 1);
 }
 
 
@@ -172,7 +154,7 @@ TEST_F(EnableBatchLinOp, CallsExtendedApplyImpl)
 {
     op->apply(alpha, b, beta, x);
 
-    ASSERT_EQ(op->last_access, ref2);
+    ASSERT_EQ(op->called, 2);
 }
 
 
@@ -180,7 +162,7 @@ TEST_F(EnableBatchLinOp, CallsExtendedApplyImplBatch)
 {
     op2->apply(alpha2, b2, beta2, x2);
 
-    ASSERT_EQ(op2->last_access, ref2);
+    ASSERT_EQ(op2->called, 2);
 }
 
 
@@ -283,46 +265,6 @@ TEST_F(EnableBatchLinOp, ExtendedApplyFailsOnWrongBetaDimension)
 }
 
 
-TEST_F(EnableBatchLinOp, ApplyDoesNotCopyBetweenSameMemory)
-{
-    op->apply(b, x);
-
-    ASSERT_EQ(op->last_b_access, ref);
-    ASSERT_EQ(op->last_x_access, ref);
-}
-
-
-TEST_F(EnableBatchLinOp, ApplyNoCopyBackBetweenSameMemory)
-{
-    op->apply(b, x);
-
-    ASSERT_EQ(b->last_access, ref);
-    ASSERT_EQ(x->last_access, ref);
-}
-
-
-TEST_F(EnableBatchLinOp, ExtendedApplyDoesNotCopyBetweenSameMemory)
-{
-    op->apply(alpha, b, beta, x);
-
-    ASSERT_EQ(op->last_alpha_access, ref);
-    ASSERT_EQ(op->last_b_access, ref);
-    ASSERT_EQ(op->last_beta_access, ref);
-    ASSERT_EQ(op->last_x_access, ref);
-}
-
-
-TEST_F(EnableBatchLinOp, ExtendedApplyNoCopyBackBetweenSameMemory)
-{
-    op->apply(alpha, b, beta, x);
-
-    ASSERT_EQ(alpha->last_access, ref);
-    ASSERT_EQ(b->last_access, ref);
-    ASSERT_EQ(beta->last_access, ref);
-    ASSERT_EQ(x->last_access, ref);
-}
-
-
 template <typename T = int>
 class DummyBatchLinOpWithFactory
     : public gko::batch::EnableBatchLinOp<DummyBatchLinOpWithFactory<T>> {
diff --git a/include/ginkgo/core/base/batch_lin_op.hpp b/include/ginkgo/core/base/batch_lin_op.hpp
index 68a88027904..a04ae3e79ce 100644
--- a/include/ginkgo/core/base/batch_lin_op.hpp
+++ b/include/ginkgo/core/base/batch_lin_op.hpp
@@ -194,13 +194,6 @@ class BatchLinOp : public EnableAbstractPolymorphicObject<BatchLinOp> {
         return size_.get_num_batch_items();
     }
 
-    /**
-     * Sets the size of the batch operator.
-     *
-     * @param size to be set
-     */
-    void set_size(const batch_dim<2>& size) { size_ = size; }
-
     /**
      * Returns the size of the batch operator.
      *
@@ -209,6 +202,13 @@ class BatchLinOp : public EnableAbstractPolymorphicObject<BatchLinOp> {
     const batch_dim<2>& get_size() const noexcept { return size_; }
 
 protected:
+    /**
+     * Sets the size of the batch operator.
+     *
+     * @param size to be set
+     */
+    void set_size(const batch_dim<2>& size) { size_ = size; }
+
     /**
      * Creates a batch operator with uniform batches.
      *
diff --git a/include/ginkgo/core/base/exception_helpers.hpp b/include/ginkgo/core/base/exception_helpers.hpp
index a45e19612b2..cb5a8b10263 100644
--- a/include/ginkgo/core/base/exception_helpers.hpp
+++ b/include/ginkgo/core/base/exception_helpers.hpp
@@ -315,6 +315,26 @@ inline size_type get_num_batch_items(const T& obj)
     }
 
 
+/**
+ * Asserts that _op1 and _op2 have equal number of items in the batch
+ *
+ * @throw ValueMismatch  if _op1 and _op2 do not have equal number of items
+ */
+#define GKO_ASSERT_BATCH_EQUAL_NUM_ITEMS(_op1, _op2)                       \
+    {                                                                      \
+        auto equal_num_items =                                             \
+            ::gko::detail::get_batch_size(_op1).get_num_batch_items() ==   \
+            ::gko::detail::get_batch_size(_op2).get_num_batch_items();     \
+        if (!equal_num_items) {                                            \
+            throw ::gko::ValueMismatch(                                    \
+                __FILE__, __LINE__, __func__,                              \
+                ::gko::detail::get_batch_size(_op2).get_num_batch_items(), \
+                ::gko::detail::get_batch_size(_op2).get_num_batch_items(), \
+                "expected equal number of batch items");                   \
+        }                                                                  \
+    }
+
+
 /**
  * Asserts that _op1 can be applied to _op2.
  *
@@ -322,19 +342,11 @@ inline size_type get_num_batch_items(const T& obj)
  */
 #define GKO_ASSERT_BATCH_CONFORMANT(_op1, _op2)                              \
     {                                                                        \
-        auto equal_num_items =                                               \
-            ::gko::detail::get_batch_size(_op1).get_num_batch_items() ==     \
-            ::gko::detail::get_batch_size(_op2).get_num_batch_items();       \
+        GKO_ASSERT_BATCH_EQUAL_NUM_ITEMS(_op1, _op2);                        \
         auto equal_inner_size =                                              \
             ::gko::detail::get_batch_size(_op1).get_common_size()[1] ==      \
             ::gko::detail::get_batch_size(_op2).get_common_size()[0];        \
-        if (!equal_num_items) {                                              \
-            throw ::gko::ValueMismatch(                                      \
-                __FILE__, __LINE__, __func__,                                \
-                ::gko::detail::get_batch_size(_op2).get_num_batch_items(),   \
-                ::gko::detail::get_batch_size(_op2).get_num_batch_items(),   \
-                "expected equal number of batch items");                     \
-        } else if (!equal_inner_size) {                                      \
+        if (!equal_inner_size) {                                             \
             throw ::gko::DimensionMismatch(                                  \
                 __FILE__, __LINE__, __func__, #_op1,                         \
                 ::gko::detail::get_batch_size(_op1).get_common_size()[0],    \
@@ -354,19 +366,11 @@ inline size_type get_num_batch_items(const T& obj)
  */
 #define GKO_ASSERT_BATCH_REVERSE_CONFORMANT(_op1, _op2)                      \
     {                                                                        \
-        auto equal_num_items =                                               \
-            ::gko::detail::get_batch_size(_op1).get_num_batch_items() ==     \
-            ::gko::detail::get_batch_size(_op2).get_num_batch_items();       \
+        GKO_ASSERT_BATCH_EQUAL_NUM_ITEMS(_op1, _op2);                        \
         auto equal_outer_size =                                              \
             ::gko::detail::get_batch_size(_op1).get_common_size()[0] ==      \
             ::gko::detail::get_batch_size(_op2).get_common_size()[1];        \
-        if (!equal_num_items) {                                              \
-            throw ::gko::ValueMismatch(                                      \
-                __FILE__, __LINE__, __func__,                                \
-                ::gko::detail::get_batch_size(_op2).get_num_batch_items(),   \
-                ::gko::detail::get_batch_size(_op2).get_num_batch_items(),   \
-                "expected equal number of batch items");                     \
-        } else if (!equal_outer_size) {                                      \
+        if (!equal_outer_size) {                                             \
             throw ::gko::DimensionMismatch(                                  \
                 __FILE__, __LINE__, __func__, #_op1,                         \
                 ::gko::detail::get_batch_size(_op1).get_common_size()[0],    \
@@ -386,19 +390,11 @@ inline size_type get_num_batch_items(const T& obj)
  */
 #define GKO_ASSERT_BATCH_EQUAL_ROWS(_op1, _op2)                            \
     {                                                                      \
-        auto equal_num_items =                                             \
-            ::gko::detail::get_batch_size(_op1).get_num_batch_items() ==   \
-            ::gko::detail::get_batch_size(_op2).get_num_batch_items();     \
+        GKO_ASSERT_BATCH_EQUAL_NUM_ITEMS(_op1, _op2);                      \
         auto equal_rows =                                                  \
             ::gko::detail::get_batch_size(_op1).get_common_size()[0] ==    \
             ::gko::detail::get_batch_size(_op2).get_common_size()[0];      \
-        if (!equal_num_items) {                                            \
-            throw ::gko::ValueMismatch(                                    \
-                __FILE__, __LINE__, __func__,                              \
-                ::gko::detail::get_batch_size(_op2).get_num_batch_items(), \
-                ::gko::detail::get_batch_size(_op2).get_num_batch_items(), \
-                "expected equal number of batch items");                   \
-        } else if (!equal_rows) {                                          \
+        if (!equal_rows) {                                                 \
             throw ::gko::DimensionMismatch(                                \
                 __FILE__, __LINE__, __func__, #_op1,                       \
                 ::gko::detail::get_batch_size(_op1).get_common_size()[0],  \
@@ -419,19 +415,11 @@ inline size_type get_num_batch_items(const T& obj)
  */
 #define GKO_ASSERT_BATCH_EQUAL_COLS(_op1, _op2)                            \
     {                                                                      \
-        auto equal_num_items =                                             \
-            ::gko::detail::get_batch_size(_op1).get_num_batch_items() ==   \
-            ::gko::detail::get_batch_size(_op2).get_num_batch_items();     \
+        GKO_ASSERT_BATCH_EQUAL_NUM_ITEMS(_op1, _op2);                      \
         auto equal_cols =                                                  \
             ::gko::detail::get_batch_size(_op1).get_common_size()[1] ==    \
             ::gko::detail::get_batch_size(_op2).get_common_size()[1];      \
-        if (!equal_num_items) {                                            \
-            throw ::gko::ValueMismatch(                                    \
-                __FILE__, __LINE__, __func__,                              \
-                ::gko::detail::get_batch_size(_op2).get_num_batch_items(), \
-                ::gko::detail::get_batch_size(_op2).get_num_batch_items(), \
-                "expected equal number of batch items");                   \
-        } else if (!equal_cols) {                                          \
+        if (!equal_cols) {                                                 \
             throw ::gko::DimensionMismatch(                                \
                 __FILE__, __LINE__, __func__, #_op1,                       \
                 ::gko::detail::get_batch_size(_op1).get_common_size()[0],  \
@@ -450,30 +438,22 @@ inline size_type get_num_batch_items(const T& obj)
  * @throw DimensionMismatch  if `_op1` and `_op2` differ in the number of
  *                           rows or columns
  */
-#define GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(_op1, _op2)                      \
-    {                                                                      \
-        auto equal_num_items =                                             \
-            ::gko::detail::get_batch_size(_op1).get_num_batch_items() ==   \
-            ::gko::detail::get_batch_size(_op2).get_num_batch_items();     \
-        auto equal_size =                                                  \
-            ::gko::detail::get_batch_size(_op1).get_common_size() ==       \
-            ::gko::detail::get_batch_size(_op2).get_common_size();         \
-        if (!equal_num_items) {                                            \
-            throw ::gko::ValueMismatch(                                    \
-                __FILE__, __LINE__, __func__,                              \
-                ::gko::detail::get_batch_size(_op2).get_num_batch_items(), \
-                ::gko::detail::get_batch_size(_op2).get_num_batch_items(), \
-                "expected equal number of batch items");                   \
-        } else if (!equal_size) {                                          \
-            throw ::gko::DimensionMismatch(                                \
-                __FILE__, __LINE__, __func__, #_op1,                       \
-                ::gko::detail::get_batch_size(_op1).get_common_size()[0],  \
-                ::gko::detail::get_batch_size(_op1).get_common_size()[1],  \
-                #_op2,                                                     \
-                ::gko::detail::get_batch_size(_op2).get_common_size()[0],  \
-                ::gko::detail::get_batch_size(_op2).get_common_size()[1],  \
-                "expected matching size among all batch items");           \
-        }                                                                  \
+#define GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(_op1, _op2)                     \
+    {                                                                     \
+        GKO_ASSERT_BATCH_EQUAL_NUM_ITEMS(_op1, _op2);                     \
+        auto equal_size =                                                 \
+            ::gko::detail::get_batch_size(_op1).get_common_size() ==      \
+            ::gko::detail::get_batch_size(_op2).get_common_size();        \
+        if (!equal_size) {                                                \
+            throw ::gko::DimensionMismatch(                               \
+                __FILE__, __LINE__, __func__, #_op1,                      \
+                ::gko::detail::get_batch_size(_op1).get_common_size()[0], \
+                ::gko::detail::get_batch_size(_op1).get_common_size()[1], \
+                #_op2,                                                    \
+                ::gko::detail::get_batch_size(_op2).get_common_size()[0], \
+                ::gko::detail::get_batch_size(_op2).get_common_size()[1], \
+                "expected matching size among all batch items");          \
+        }                                                                 \
     }
 
 

From c339832ffb4ef1170879fb80648e355e839871e1 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sun, 1 Oct 2023 14:50:07 +0200
Subject: [PATCH 331/583] Remove apply functionality from BatchLinOp

---
 core/test/base/batch_lin_op.cpp           | 185 +---------------------
 include/ginkgo/core/base/batch_lin_op.hpp | 185 ----------------------
 2 files changed, 1 insertion(+), 369 deletions(-)

diff --git a/core/test/base/batch_lin_op.cpp b/core/test/base/batch_lin_op.cpp
index b656c2bf3fb..2e0bf0fae0e 100644
--- a/core/test/base/batch_lin_op.cpp
+++ b/core/test/base/batch_lin_op.cpp
@@ -56,23 +56,6 @@ class DummyBatchLinOp : public gko::batch::EnableBatchLinOp<DummyBatchLinOp>,
                     gko::batch_dim<2> size = gko::batch_dim<2>{})
         : gko::batch::EnableBatchLinOp<DummyBatchLinOp>(exec, size)
     {}
-
-    int called = 0;
-
-protected:
-    void apply_impl(const gko::batch::BatchLinOp* b,
-                    gko::batch::BatchLinOp* x) const override
-    {
-        this->called = 1;
-    }
-
-    void apply_impl(const gko::batch::BatchLinOp* alpha,
-                    const gko::batch::BatchLinOp* b,
-                    const gko::batch::BatchLinOp* beta,
-                    gko::batch::BatchLinOp* x) const override
-    {
-        this->called = 2;
-    }
 };
 
 
@@ -84,37 +67,13 @@ class EnableBatchLinOp : public ::testing::Test {
           op{DummyBatchLinOp::create(ref2,
                                      gko::batch_dim<2>(1, gko::dim<2>{3, 5}))},
           op2{DummyBatchLinOp::create(ref2,
-                                      gko::batch_dim<2>(2, gko::dim<2>{3, 5}))},
-          alpha{DummyBatchLinOp::create(
-              ref, gko::batch_dim<2>(1, gko::dim<2>{1, 1}))},
-          alpha2{DummyBatchLinOp::create(
-              ref, gko::batch_dim<2>(2, gko::dim<2>{1, 1}))},
-          beta{DummyBatchLinOp::create(
-              ref, gko::batch_dim<2>(1, gko::dim<2>{1, 1}))},
-          beta2{DummyBatchLinOp::create(
-              ref, gko::batch_dim<2>(2, gko::dim<2>{1, 1}))},
-          b{DummyBatchLinOp::create(ref,
-                                    gko::batch_dim<2>(1, gko::dim<2>{5, 4}))},
-          b2{DummyBatchLinOp::create(ref,
-                                     gko::batch_dim<2>(2, gko::dim<2>{5, 4}))},
-          x{DummyBatchLinOp::create(ref,
-                                    gko::batch_dim<2>(1, gko::dim<2>{3, 4}))},
-          x2{DummyBatchLinOp::create(ref,
-                                     gko::batch_dim<2>(2, gko::dim<2>{3, 4}))}
+                                      gko::batch_dim<2>(2, gko::dim<2>{3, 5}))}
     {}
 
     std::shared_ptr<const gko::ReferenceExecutor> ref;
     std::shared_ptr<const gko::ReferenceExecutor> ref2;
     std::unique_ptr<DummyBatchLinOp> op;
     std::unique_ptr<DummyBatchLinOp> op2;
-    std::unique_ptr<DummyBatchLinOp> alpha;
-    std::unique_ptr<DummyBatchLinOp> alpha2;
-    std::unique_ptr<DummyBatchLinOp> beta;
-    std::unique_ptr<DummyBatchLinOp> beta2;
-    std::unique_ptr<DummyBatchLinOp> b;
-    std::unique_ptr<DummyBatchLinOp> b2;
-    std::unique_ptr<DummyBatchLinOp> x;
-    std::unique_ptr<DummyBatchLinOp> x2;
 };
 
 
@@ -134,137 +93,6 @@ TEST_F(EnableBatchLinOp, KnowsItsSizes)
 }
 
 
-TEST_F(EnableBatchLinOp, CallsApplyImpl)
-{
-    op->apply(b, x);
-
-    ASSERT_EQ(op->called, 1);
-}
-
-
-TEST_F(EnableBatchLinOp, CallsApplyImplForBatch)
-{
-    op2->apply(b2, x2);
-
-    ASSERT_EQ(op2->called, 1);
-}
-
-
-TEST_F(EnableBatchLinOp, CallsExtendedApplyImpl)
-{
-    op->apply(alpha, b, beta, x);
-
-    ASSERT_EQ(op->called, 2);
-}
-
-
-TEST_F(EnableBatchLinOp, CallsExtendedApplyImplBatch)
-{
-    op2->apply(alpha2, b2, beta2, x2);
-
-    ASSERT_EQ(op2->called, 2);
-}
-
-
-TEST_F(EnableBatchLinOp, ApplyFailsOnWrongBatchSize)
-{
-    auto wrong =
-        DummyBatchLinOp::create(ref, gko::batch_dim<2>(1, gko::dim<2>{3, 4}));
-
-    ASSERT_THROW(op->apply(wrong, x), gko::DimensionMismatch);
-}
-
-
-TEST_F(EnableBatchLinOp, ApplyFailsOnWrongNumBatchItems)
-{
-    auto wrong =
-        DummyBatchLinOp::create(ref, gko::batch_dim<2>(1, gko::dim<2>{3, 4}));
-
-    ASSERT_THROW(op2->apply(wrong, x2), gko::ValueMismatch);
-}
-
-
-TEST_F(EnableBatchLinOp, ApplyFailsOnWrongSolutionRows)
-{
-    auto wrong =
-        DummyBatchLinOp::create(ref, gko::batch_dim<2>(1, gko::dim<2>{5, 4}));
-
-    ASSERT_THROW(op->apply(b, wrong), gko::DimensionMismatch);
-}
-
-
-TEST_F(EnableBatchLinOp, ApplyFailsOnOneBatchItemWrongSolutionRows)
-{
-    auto wrong =
-        DummyBatchLinOp::create(ref, gko::batch_dim<2>(2, gko::dim<2>{5, 4}));
-
-    ASSERT_THROW(op2->apply(b2, wrong), gko::DimensionMismatch);
-}
-
-
-TEST_F(EnableBatchLinOp, ApplyFailsOnWrongSolutionColumns)
-{
-    auto wrong =
-        DummyBatchLinOp::create(ref, gko::batch_dim<2>(1, gko::dim<2>{3, 5}));
-
-    ASSERT_THROW(op->apply(b, wrong), gko::DimensionMismatch);
-}
-
-
-TEST_F(EnableBatchLinOp, ApplyFailsOnOneBatchItemWrongSolutionColumn)
-{
-    auto wrong =
-        DummyBatchLinOp::create(ref, gko::batch_dim<2>(2, gko::dim<2>{3, 5}));
-
-    ASSERT_THROW(op2->apply(b2, wrong), gko::DimensionMismatch);
-}
-
-
-TEST_F(EnableBatchLinOp, ExtendedApplyFailsOnWrongBatchSize)
-{
-    auto wrong =
-        DummyBatchLinOp::create(ref, gko::batch_dim<2>(1, gko::dim<2>{3, 4}));
-
-    ASSERT_THROW(op->apply(alpha, wrong, beta, x), gko::DimensionMismatch);
-}
-
-
-TEST_F(EnableBatchLinOp, ExtendedApplyFailsOnWrongSolutionRows)
-{
-    auto wrong =
-        DummyBatchLinOp::create(ref, gko::batch_dim<2>(1, gko::dim<2>{5, 4}));
-
-    ASSERT_THROW(op->apply(alpha, b, beta, wrong), gko::DimensionMismatch);
-}
-
-
-TEST_F(EnableBatchLinOp, ExtendedApplyFailsOnWrongSolutionColumns)
-{
-    auto wrong =
-        DummyBatchLinOp::create(ref, gko::batch_dim<2>(1, gko::dim<2>{3, 5}));
-
-    ASSERT_THROW(op->apply(alpha, b, beta, wrong), gko::DimensionMismatch);
-}
-
-
-TEST_F(EnableBatchLinOp, ExtendedApplyFailsOnWrongAlphaDimension)
-{
-    auto wrong =
-        DummyBatchLinOp::create(ref, gko::batch_dim<2>(1, gko::dim<2>{2, 5}));
-
-    ASSERT_THROW(op->apply(wrong, b, beta, x), gko::DimensionMismatch);
-}
-
-
-TEST_F(EnableBatchLinOp, ExtendedApplyFailsOnWrongBetaDimension)
-{
-    auto wrong =
-        DummyBatchLinOp::create(ref, gko::batch_dim<2>(1, gko::dim<2>{2, 5}));
-
-    ASSERT_THROW(op->apply(alpha, b, wrong, x), gko::DimensionMismatch);
-}
-
-
 template <typename T = int>
 class DummyBatchLinOpWithFactory
     : public gko::batch::EnableBatchLinOp<DummyBatchLinOpWithFactory<T>> {
@@ -290,17 +118,6 @@ class DummyBatchLinOpWithFactory
     {}
 
     std::shared_ptr<const gko::batch::BatchLinOp> op_;
-
-protected:
-    void apply_impl(const gko::batch::BatchLinOp* b,
-                    gko::batch::BatchLinOp* x) const override
-    {}
-
-    void apply_impl(const gko::batch::BatchLinOp* alpha,
-                    const gko::batch::BatchLinOp* b,
-                    const gko::batch::BatchLinOp* beta,
-                    gko::batch::BatchLinOp* x) const override
-    {}
 };
 
 
diff --git a/include/ginkgo/core/base/batch_lin_op.hpp b/include/ginkgo/core/base/batch_lin_op.hpp
index a04ae3e79ce..ac632c715e8 100644
--- a/include/ginkgo/core/base/batch_lin_op.hpp
+++ b/include/ginkgo/core/base/batch_lin_op.hpp
@@ -91,99 +91,6 @@ namespace batch {
  */
 class BatchLinOp : public EnableAbstractPolymorphicObject<BatchLinOp> {
 public:
-    /**
-     * Applies a batch linear operator to a batch vector (or a sequence of batch
-     * of vectors).
-     *
-     * Performs the operation x = op(b), where op is this batch linear operator.
-     *
-     * @param b  the input batch vector(s) on which the batch operator is
-     *           applied
-     * @param x  the output batch vector(s) where the result is stored
-     *
-     * @return this
-     */
-    BatchLinOp* apply(ptr_param<const BatchLinOp> b, ptr_param<BatchLinOp> x)
-    {
-        this->template log<log::Logger::batch_linop_apply_started>(
-            this, b.get(), x.get());
-        this->validate_application_parameters(b.get(), x.get());
-        auto exec = this->get_executor();
-        this->apply_impl(make_temporary_clone(exec, b).get(),
-                         make_temporary_clone(exec, x).get());
-        this->template log<log::Logger::batch_linop_apply_completed>(
-            this, b.get(), x.get());
-        return this;
-    }
-
-    /**
-     * @copydoc apply(const BatchLinOp *, BatchLinOp *)
-     */
-    const BatchLinOp* apply(ptr_param<const BatchLinOp> b,
-                            ptr_param<BatchLinOp> x) const
-    {
-        this->template log<log::Logger::batch_linop_apply_started>(
-            this, b.get(), x.get());
-        this->validate_application_parameters(b.get(), x.get());
-        auto exec = this->get_executor();
-        this->apply_impl(make_temporary_clone(exec, b).get(),
-                         make_temporary_clone(exec, x).get());
-        this->template log<log::Logger::batch_linop_apply_completed>(
-            this, b.get(), x.get());
-        return this;
-    }
-
-    /**
-     * Performs the operation x = alpha * op(b) + beta * x.
-     *
-     * @param alpha  scaling of the result of op(b)
-     * @param b  vector(s) on which the operator is applied
-     * @param beta  scaling of the input x
-     * @param x  output vector(s)
-     *
-     * @return this
-     */
-    BatchLinOp* apply(ptr_param<const BatchLinOp> alpha,
-                      ptr_param<const BatchLinOp> b,
-                      ptr_param<const BatchLinOp> beta, ptr_param<BatchLinOp> x)
-    {
-        this->template log<log::Logger::batch_linop_advanced_apply_started>(
-            this, alpha.get(), b.get(), beta.get(), x.get());
-        this->validate_application_parameters(alpha.get(), b.get(), beta.get(),
-                                              x.get());
-        auto exec = this->get_executor();
-        this->apply_impl(make_temporary_clone(exec, alpha).get(),
-                         make_temporary_clone(exec, b).get(),
-                         make_temporary_clone(exec, beta).get(),
-                         make_temporary_clone(exec, x).get());
-        this->template log<log::Logger::batch_linop_advanced_apply_completed>(
-            this, alpha.get(), b.get(), beta.get(), x.get());
-        return this;
-    }
-
-    /**
-     * @copydoc apply(const BatchLinOp *, const BatchLinOp *, const BatchLinOp
-     * *, BatchLinOp *)
-     */
-    const BatchLinOp* apply(ptr_param<const BatchLinOp> alpha,
-                            ptr_param<const BatchLinOp> b,
-                            ptr_param<const BatchLinOp> beta,
-                            ptr_param<BatchLinOp> x) const
-    {
-        this->template log<log::Logger::batch_linop_advanced_apply_started>(
-            this, alpha.get(), b.get(), beta.get(), x.get());
-        this->validate_application_parameters(alpha.get(), b.get(), beta.get(),
-                                              x.get());
-        auto exec = this->get_executor();
-        this->apply_impl(make_temporary_clone(exec, alpha).get(),
-                         make_temporary_clone(exec, b).get(),
-                         make_temporary_clone(exec, beta).get(),
-                         make_temporary_clone(exec, x).get());
-        this->template log<log::Logger::batch_linop_advanced_apply_completed>(
-            this, alpha.get(), b.get(), beta.get(), x.get());
-        return this;
-    }
-
     /**
      * Returns the number of batches in the batch operator.
      *
@@ -236,66 +143,6 @@ class BatchLinOp : public EnableAbstractPolymorphicObject<BatchLinOp> {
         : EnableAbstractPolymorphicObject<BatchLinOp>(exec), size_{batch_size}
     {}
 
-    /**
-     * Implementers of BatchLinOp should override this function instead
-     * of apply(const BatchLinOp *, BatchLinOp *).
-     *
-     * Performs the operation x = op(b), where op is this linear operator.
-     *
-     * @param b  the input batch vector(s) on which the operator is applied
-     * @param x  the output batch vector(s) where the result is stored
-     */
-    virtual void apply_impl(const BatchLinOp* b, BatchLinOp* x) const = 0;
-
-    /**
-     * Implementers of BatchLinOp should override this function instead
-     * of apply(const BatchLinOp *, const BatchLinOp *, const BatchLinOp *,
-     * BatchLinOp *).
-     *
-     * @param alpha  scaling of the result of op(b)
-     * @param b  vector(s) on which the operator is applied
-     * @param beta  scaling of the input x
-     * @param x  output vector(s)
-     */
-    virtual void apply_impl(const BatchLinOp* alpha, const BatchLinOp* b,
-                            const BatchLinOp* beta, BatchLinOp* x) const = 0;
-
-    /**
-     * Throws a DimensionMismatch exception if the parameters to `apply` are of
-     * the wrong size.
-     *
-     * @param b  batch vector(s) on which the operator is applied
-     * @param x  output batch vector(s)
-     */
-    void validate_application_parameters(const BatchLinOp* b,
-                                         const BatchLinOp* x) const
-    {
-        GKO_ASSERT_BATCH_CONFORMANT(this, b);
-        GKO_ASSERT_BATCH_EQUAL_ROWS(this, x);
-        GKO_ASSERT_BATCH_EQUAL_COLS(b, x);
-    }
-
-    /**
-     * Throws a DimensionMismatch exception if the parameters to `apply` are of
-     * the wrong size.
-     *
-     * @param alpha  scaling of the result of op(b)
-     * @param b  batch vector(s) on which the operator is applied
-     * @param beta  scaling of the input x
-     * @param x  output batch vector(s)
-     */
-    void validate_application_parameters(const BatchLinOp* alpha,
-                                         const BatchLinOp* b,
-                                         const BatchLinOp* beta,
-                                         const BatchLinOp* x) const
-    {
-        this->validate_application_parameters(b, x);
-        GKO_ASSERT_BATCH_EQUAL_ROWS(
-            alpha, batch_dim<2>(b->get_num_batch_items(), dim<2>(1, 1)));
-        GKO_ASSERT_BATCH_EQUAL_ROWS(
-            beta, batch_dim<2>(b->get_num_batch_items(), dim<2>(1, 1)));
-    }
-
 private:
     batch_dim<2> size_{};
 };
@@ -395,38 +242,6 @@ class EnableBatchLinOp
     using EnablePolymorphicObject<ConcreteBatchLinOp,
                                   PolymorphicBase>::EnablePolymorphicObject;
 
-    const ConcreteBatchLinOp* apply(ptr_param<const BatchLinOp> b,
-                                    ptr_param<BatchLinOp> x) const
-    {
-        PolymorphicBase::apply(b, x);
-        return self();
-    }
-
-    ConcreteBatchLinOp* apply(ptr_param<const BatchLinOp> b,
-                              ptr_param<BatchLinOp> x)
-    {
-        PolymorphicBase::apply(b, x);
-        return self();
-    }
-
-    const ConcreteBatchLinOp* apply(ptr_param<const BatchLinOp> alpha,
-                                    ptr_param<const BatchLinOp> b,
-                                    ptr_param<const BatchLinOp> beta,
-                                    ptr_param<BatchLinOp> x) const
-    {
-        PolymorphicBase::apply(alpha, b, beta, x);
-        return self();
-    }
-
-    ConcreteBatchLinOp* apply(ptr_param<const BatchLinOp> alpha,
-                              ptr_param<const BatchLinOp> b,
-                              ptr_param<const BatchLinOp> beta,
-                              ptr_param<BatchLinOp> x)
-    {
-        PolymorphicBase::apply(alpha, b, beta, x);
-        return self();
-    }
-
 protected:
     GKO_ENABLE_SELF(ConcreteBatchLinOp);
 };

From e1be54f118120313a5597b85047792edf94b9fa3 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Fri, 6 Oct 2023 09:38:11 +0200
Subject: [PATCH 332/583] Update docs

---
 include/ginkgo/core/base/batch_lin_op.hpp     | 45 +++++++------------
 .../ginkgo/core/base/batch_lin_op_helpers.hpp |  6 +--
 2 files changed, 19 insertions(+), 32 deletions(-)

diff --git a/include/ginkgo/core/base/batch_lin_op.hpp b/include/ginkgo/core/base/batch_lin_op.hpp
index ac632c715e8..54a1ead1a3d 100644
--- a/include/ginkgo/core/base/batch_lin_op.hpp
+++ b/include/ginkgo/core/base/batch_lin_op.hpp
@@ -70,31 +70,22 @@ namespace batch {
  * have no communication/information exchange between them. Therefore, any
  * collective operations between the batches is not possible and not
  * implemented. This allows for each batch to be computed and operated on in an
- * embarrasingly parallel fashion.
+ * embarrassingly parallel fashion.
  *
- * Similar to the LinOp class, the BatchLinOp also implements
- * BatchLinOp::apply() methods which call the internal apply_impl() methods
- * which the concrete BatchLinOp's have to implement.
- *
- * A key difference between the LinOp and the BatchLinOp classes is the storing
- * of dimensions. BatchLinOp allows for storing non-equal objects in the
- * batches and hence stores a batch_dim object instead of a dim object. The
- * batch_dim object is optimized to store less amount of data when storing
- * uniform batches.
- *
- * All size validation functions again verify first that the number of batches
- * are conformant and that the dimensions in the corresponding batches
- * themselves are also valid/conformant. Here too, optimizations for uniform
- * batches have been added.
+ * A key difference between the LinOp and the BatchLinOp class is that the apply
+ * between BatchLinOps is no longer supported. The user can apply a BatchLinOp
+ * to a batch::MultiVector but not to any general BatchLinOp. Therefore, the
+ * BatchLinOp serves only as a base class providing necessary core functionality
+ * from Polymorphic object and store the dimensions of the batched object.
  *
  * @ref BatchLinOp
  */
 class BatchLinOp : public EnableAbstractPolymorphicObject<BatchLinOp> {
 public:
     /**
-     * Returns the number of batches in the batch operator.
+     * Returns the number of items in the batch operator.
      *
-     * @return number of batches in the batch operator
+     * @return number of items in the batch operator
      */
     size_type get_num_batch_items() const noexcept
     {
@@ -104,7 +95,7 @@ class BatchLinOp : public EnableAbstractPolymorphicObject<BatchLinOp> {
     /**
      * Returns the size of the batch operator.
      *
-     * @return size of the batch operator
+     * @return size of the batch operator, a batch_dim object
      */
     const batch_dim<2>& get_size() const noexcept { return size_; }
 
@@ -117,12 +108,12 @@ class BatchLinOp : public EnableAbstractPolymorphicObject<BatchLinOp> {
     void set_size(const batch_dim<2>& size) { size_ = size; }
 
     /**
-     * Creates a batch operator with uniform batches.
+     * Creates a batch operator storing items of uniform sizes.
      *
      * @param exec        the executor where all the operations are performed
-     * @param num_batch_items the number of batches to be stored in the
+     * @param num_batch_items the number of batch items to be stored in the
      * operator
-     * @param size        the size of on of the operator in the batched operator
+     * @param size        the common size of the items in the batched operator
      */
     explicit BatchLinOp(std::shared_ptr<const Executor> exec,
                         const size_type num_batch_items = 0,
@@ -133,10 +124,10 @@ class BatchLinOp : public EnableAbstractPolymorphicObject<BatchLinOp> {
     {}
 
     /**
-     * Creates a batch operator.
+     * Creates a batch operator storing items of uniform sizes.
      *
-     * @param exec  the executor where all the operations are performed
-     * @param batch_size  the sizes of the batch operator stored as a batch_dim
+     * @param exec        the executor where all the operations are performed
+     * @param batch_size  the size the batched operator, as a batch_dim object
      */
     explicit BatchLinOp(std::shared_ptr<const Executor> exec,
                         const batch_dim<2>& batch_size)
@@ -213,9 +204,7 @@ class BatchLinOpFactory
  * while the library takes care of generating the trivial utility functions.
  * The mixin will provide default implementations for the entire
  * PolymorphicObject interface, including a default implementation of
- * `copy_from` between objects of the new BatchLinOp type. It will also hide the
- * default BatchLinOp::apply() methods with versions that preserve the static
- * type of the object.
+ * `copy_from` between objects of the new BatchLinOp type.
  *
  * Implementers of new BatchLinOps are required to specify only the following
  * aspects:
@@ -224,8 +213,6 @@ class BatchLinOpFactory
  *     EnableCreateMethod mixin (used mostly for matrix formats),
  *     or GKO_ENABLE_BATCH_LIN_OP_FACTORY macro (used for operators created from
  *     other operators, like preconditioners and solvers).
- * 2.  Application of the BatchLinOp: Implementers have to override the two
- *     overloads of the BatchLinOp::apply_impl() virtual methods.
  *
  * @tparam ConcreteBatchLinOp  the concrete BatchLinOp which is being
  *                             implemented [CRTP parameter]
diff --git a/include/ginkgo/core/base/batch_lin_op_helpers.hpp b/include/ginkgo/core/base/batch_lin_op_helpers.hpp
index 579411e9af0..7b479192a6b 100644
--- a/include/ginkgo/core/base/batch_lin_op_helpers.hpp
+++ b/include/ginkgo/core/base/batch_lin_op_helpers.hpp
@@ -61,7 +61,7 @@ namespace batch {
  * template parameters to enable a subclass of BatchLinOpFactory.
  *
  * @tparam ConcreteFactory  the concrete factory which is being implemented
- *                          [CRTP parmeter]
+ *                          [CRTP parameter]
  * @tparam ConcreteLinOp  the concrete BatchLinOp type which this factory
  * produces, needs to have a constructor which takes a const ConcreteFactory *,
  * and an std::shared_ptr<const BatchLinOp> as parameters.
@@ -128,13 +128,13 @@ using EnableDefaultBatchLinOpFactory =
  * // create a factory with default `my_value` parameter
  * auto fact = MyBatchLinOp::build().on(exec);
  * // create a operator using the factory:
- * auto my_op = fact->generate(gko::matrix::BatchIdentity::create(exec, 2));
+ * auto my_op = fact->generate(gko::batch::matrix::Identity::create(exec, 2));
  * std::cout << my_op->get_my_parameters().my_value;  // prints 5
  *
  * // create a factory with custom `my_value` parameter
  * auto fact = MyLinOp::build().with_my_value(0).on(exec);
  * // create a operator using the factory:
- * auto my_op = fact->generate(gko::matrix::BatchIdentity::create(exec, 2));
+ * auto my_op = fact->generate(gko::batch::matrix::Identity::create(exec, 2));
  * std::cout << my_op->get_my_parameters().my_value;  // prints 0
  * ```
  *

From 18d597ea3f969cd2cc7ef4711058a24a65620594 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Fri, 6 Oct 2023 09:43:12 +0200
Subject: [PATCH 333/583] Remove BatchLinOp apply log events

---
 include/ginkgo/core/log/logger.hpp | 68 +-----------------------------
 1 file changed, 2 insertions(+), 66 deletions(-)

diff --git a/include/ginkgo/core/log/logger.hpp b/include/ginkgo/core/log/logger.hpp
index a6fade087b0..a1607723a75 100644
--- a/include/ginkgo/core/log/logger.hpp
+++ b/include/ginkgo/core/log/logger.hpp
@@ -570,62 +570,6 @@ public:                                                              \
                               const PolymorphicObject* input,
                               const PolymorphicObject* output)
 
-    /**
-     * BatchLinOp's apply started event.
-     *
-     * @param A  the system matrix
-     * @param b  the input vector(s)
-     * @param x  the output vector(s)
-     */
-    GKO_LOGGER_REGISTER_EVENT(24, batch_linop_apply_started,
-                              const batch::BatchLinOp* A,
-                              const batch::BatchLinOp* b,
-                              const batch::BatchLinOp* x)
-
-    /**
-     * BatchLinOp's apply completed event.
-     *
-     * @param A  the system matrix
-     * @param b  the input vector(s)
-     * @param x  the output vector(s)
-     */
-    GKO_LOGGER_REGISTER_EVENT(25, batch_linop_apply_completed,
-                              const batch::BatchLinOp* A,
-                              const batch::BatchLinOp* b,
-                              const batch::BatchLinOp* x)
-
-    /**
-     * BatchLinOp's advanced apply started event.
-     *
-     * @param A  the system matrix
-     * @param alpha  scaling of the result of op(b)
-     * @param b  the input vector(s)
-     * @param beta  scaling of the input x
-     * @param x  the output vector(s)
-     */
-    GKO_LOGGER_REGISTER_EVENT(26, batch_linop_advanced_apply_started,
-                              const batch::BatchLinOp* A,
-                              const batch::BatchLinOp* alpha,
-                              const batch::BatchLinOp* b,
-                              const batch::BatchLinOp* beta,
-                              const batch::BatchLinOp* x)
-
-    /**
-     * BatchLinOp's advanced apply completed event.
-     *
-     * @param A  the system matrix
-     * @param alpha  scaling of the result of op(b)
-     * @param b  the input vector(s)
-     * @param beta  scaling of the input x
-     * @param x  the output vector(s)
-     */
-    GKO_LOGGER_REGISTER_EVENT(27, batch_linop_advanced_apply_completed,
-                              const batch::BatchLinOp* A,
-                              const batch::BatchLinOp* alpha,
-                              const batch::BatchLinOp* b,
-                              const batch::BatchLinOp* beta,
-                              const batch::BatchLinOp* x)
-
     /**
      * BatchLinOp Factory's generate started event.
      *
@@ -633,7 +577,7 @@ public:                                                              \
      * @param input  the BatchLinOp object used as input for the generation
      * (usually a system matrix)
      */
-    GKO_LOGGER_REGISTER_EVENT(28, batch_linop_factory_generate_started,
+    GKO_LOGGER_REGISTER_EVENT(24, batch_linop_factory_generate_started,
                               const batch::BatchLinOpFactory* factory,
                               const batch::BatchLinOp* input)
 
@@ -645,7 +589,7 @@ public:                                                              \
      * (usually a system matrix)
      * @param output  the generated BatchLinOp object
      */
-    GKO_LOGGER_REGISTER_EVENT(29, batch_linop_factory_generate_completed,
+    GKO_LOGGER_REGISTER_EVENT(25, batch_linop_factory_generate_completed,
                               const batch::BatchLinOpFactory* factory,
                               const batch::BatchLinOp* input,
                               const batch::BatchLinOp* output)
@@ -692,14 +636,6 @@ public:                                                              \
         linop_factory_generate_started_mask |
         linop_factory_generate_completed_mask;
 
-    /**
-     * Bitset Mask which activates all batch linop events
-     */
-    static constexpr mask_type batch_linop_events_mask =
-        batch_linop_apply_started_mask | batch_linop_apply_completed_mask |
-        batch_linop_advanced_apply_started_mask |
-        batch_linop_advanced_apply_completed_mask;
-
     /**
      * Bitset Mask which activates all batch linop factory events
      */

From 8913fc1aad786ec736b282d834306a77c502299a Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Fri, 6 Oct 2023 07:48:31 +0000
Subject: [PATCH 334/583] Format files

Co-authored-by: Pratik Nayak <pratikvn@pm.me>
---
 include/ginkgo/core/log/logger.hpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/ginkgo/core/log/logger.hpp b/include/ginkgo/core/log/logger.hpp
index a1607723a75..a10782c0102 100644
--- a/include/ginkgo/core/log/logger.hpp
+++ b/include/ginkgo/core/log/logger.hpp
@@ -455,9 +455,9 @@ public:                                                              \
      * @warning This on_iteration_complete function that this macro declares is
      * deprecated. Please use the version with the stopping information.
      */
-    [
-        [deprecated("Please use the version with the additional stopping "
-                    "information.")]] virtual void
+    [[deprecated(
+        "Please use the version with the additional stopping "
+        "information.")]] virtual void
     on_iteration_complete(const LinOp* solver, const size_type& it,
                           const LinOp* r, const LinOp* x = nullptr,
                           const LinOp* tau = nullptr) const
@@ -476,9 +476,9 @@ public:                                                              \
      * @warning This on_iteration_complete function that this macro declares is
      * deprecated. Please use the version with the stopping information.
      */
-    [
-        [deprecated("Please use the version with the additional stopping "
-                    "information.")]] virtual void
+    [[deprecated(
+        "Please use the version with the additional stopping "
+        "information.")]] virtual void
     on_iteration_complete(const LinOp* solver, const size_type& it,
                           const LinOp* r, const LinOp* x, const LinOp* tau,
                           const LinOp* implicit_tau_sq) const
@@ -810,8 +810,8 @@ class EnableLogging : public PolymorphicBase {
     template <size_type Event, typename ConcreteLoggableT>
     struct propagate_log_helper<
         Event, ConcreteLoggableT,
-        xstd::void_t<decltype(
-            std::declval<ConcreteLoggableT>().get_executor())>> {
+        xstd::void_t<
+            decltype(std::declval<ConcreteLoggableT>().get_executor())>> {
         template <typename... Args>
         static void propagate_log(const ConcreteLoggableT* loggable,
                                   Args&&... args)

From f5d5e2857406c7a28bc4aa2352fe82ff295fe175 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Fri, 6 Oct 2023 09:47:15 +0200
Subject: [PATCH 335/583] add common size getter

---
 include/ginkgo/core/base/batch_lin_op.hpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/ginkgo/core/base/batch_lin_op.hpp b/include/ginkgo/core/base/batch_lin_op.hpp
index 54a1ead1a3d..dd33e63bbd1 100644
--- a/include/ginkgo/core/base/batch_lin_op.hpp
+++ b/include/ginkgo/core/base/batch_lin_op.hpp
@@ -92,6 +92,13 @@ class BatchLinOp : public EnableAbstractPolymorphicObject<BatchLinOp> {
         return size_.get_num_batch_items();
     }
 
+    /**
+     * Returns the common size of the batch items.
+     *
+     * @return the common size stored
+     */
+    dim<2> get_common_size() const { return size_.get_common_size(); }
+
     /**
      * Returns the size of the batch operator.
      *

From 143aabebb4e2226355ab707fb7b13fef73344e6c Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Mon, 9 Oct 2023 23:46:13 +0200
Subject: [PATCH 336/583] Move lin_op_helpers back to lin_op

---
 include/ginkgo/core/base/batch_lin_op.hpp     | 140 +++++++++++-
 .../ginkgo/core/base/batch_lin_op_helpers.hpp | 202 ------------------
 include/ginkgo/ginkgo.hpp                     |   2 -
 3 files changed, 139 insertions(+), 205 deletions(-)
 delete mode 100644 include/ginkgo/core/base/batch_lin_op_helpers.hpp

diff --git a/include/ginkgo/core/base/batch_lin_op.hpp b/include/ginkgo/core/base/batch_lin_op.hpp
index dd33e63bbd1..31984997b2c 100644
--- a/include/ginkgo/core/base/batch_lin_op.hpp
+++ b/include/ginkgo/core/base/batch_lin_op.hpp
@@ -40,7 +40,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/abstract_factory.hpp>
-#include <ginkgo/core/base/batch_lin_op_helpers.hpp>
 #include <ginkgo/core/base/dim.hpp>
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/math.hpp>
@@ -241,6 +240,145 @@ class EnableBatchLinOp
 };
 
 
+/**
+ * This is an alias for the EnableDefaultFactory mixin, which correctly sets the
+ * template parameters to enable a subclass of BatchLinOpFactory.
+ *
+ * @tparam ConcreteFactory  the concrete factory which is being implemented
+ *                          [CRTP parameter]
+ * @tparam ConcreteLinOp  the concrete BatchLinOp type which this factory
+ * produces, needs to have a constructor which takes a const ConcreteFactory *,
+ * and an std::shared_ptr<const BatchLinOp> as parameters.
+ * @tparam ParametersType  a subclass of enable_parameters_type template which
+ *                         defines all of the parameters of the factory
+ * @tparam PolymorphicBase  parent of ConcreteFactory in the polymorphic
+ *                          hierarchy, has to be a subclass of LinOpFactory
+ *
+ * @ingroup BatchLinOp
+ */
+template <typename ConcreteFactory, typename ConcreteBatchLinOp,
+          typename ParametersType, typename PolymorphicBase = BatchLinOpFactory>
+using EnableDefaultBatchLinOpFactory =
+    EnableDefaultFactory<ConcreteFactory, ConcreteBatchLinOp, ParametersType,
+                         PolymorphicBase>;
+
+
+/**
+ * This macro will generate a default implementation of a BatchLinOpFactory for
+ * the BatchLinOp subclass it is defined in.
+ *
+ * It is required to first call the macro #GKO_CREATE_FACTORY_PARAMETERS()
+ * before this one in order to instantiate the parameters type first.
+ *
+ * The list of parameters for the factory should be defined in a code block
+ * after the macro definition, and should contain a list of
+ * GKO_FACTORY_PARAMETER_* declarations. The class should provide a constructor
+ * with signature
+ * _batch_lin_op(const _factory_name *, std::shared_ptr<const BatchLinOp>)
+ * which the factory will use a callback to construct the object.
+ *
+ * A minimal example of a batch linear operator is the following:
+ *
+ * ```c++
+ * struct MyBatchLinOp : public EnableBatchLinOp<MyBatchLinOp> {
+ *     GKO_ENABLE_BATCH_LIN_OP_FACTORY(MyBatchLinOp, my_parameters, Factory) {
+ *         // a factory parameter named "my_value", of type int and default
+ *         // value of 5
+ *         int GKO_FACTORY_PARAMETER_SCALAR(my_value, 5);
+ *         // a factory parameter named `my_pair` of type `std::pair<int,int>`
+ *         // and default value {5, 5}
+ *         std::pair<int, int> GKO_FACTORY_PARAMETER_VECTOR(my_pair, 5, 5);
+ *     };
+ *     // constructor needed by EnableBatchLinOp
+ *     explicit MyBatchLinOp(std::shared_ptr<const Executor> exec) {
+ *         : EnableBatchLinOp<MyBatchLinOp>(exec) {}
+ *     // constructor needed by the factory
+ *     explicit MyBatchLinOp(const Factory *factory,
+ *                      std::shared_ptr<const BatchLinOp> matrix)
+ *         : EnableBatchLinOp<MyBatchLinOp>(factory->get_executor()),
+ *                                          matrix->get_size()),
+ *           // store factory's parameters locally
+ *           my_parameters_{factory->get_parameters()}
+ *     {
+ *          int value = my_parameters_.my_value;
+ *          // do something with value
+ *     }
+ * ```
+ *
+ * MyBatchLinOp can then be created as follows:
+ *
+ * ```c++
+ * auto exec = gko::ReferenceExecutor::create();
+ * // create a factory with default `my_value` parameter
+ * auto fact = MyBatchLinOp::build().on(exec);
+ * // create a operator using the factory:
+ * auto my_op = fact->generate(gko::batch::matrix::Identity::create(exec, 2));
+ * std::cout << my_op->get_my_parameters().my_value;  // prints 5
+ *
+ * // create a factory with custom `my_value` parameter
+ * auto fact = MyLinOp::build().with_my_value(0).on(exec);
+ * // create a operator using the factory:
+ * auto my_op = fact->generate(gko::batch::matrix::Identity::create(exec, 2));
+ * std::cout << my_op->get_my_parameters().my_value;  // prints 0
+ * ```
+ *
+ * @note It is possible to combine both the #GKO_CREATE_FACTORY_PARAMETER_*()
+ * macros with this one in a unique macro for class __templates__ (not with
+ * regular classes). Splitting this into two distinct macros allows to use them
+ * in all contexts. See <https://stackoverflow.com/q/50202718/9385966> for more
+ * details.
+ *
+ * @param _lin_op  concrete operator for which the factory is to be created
+ *                 [CRTP parameter]
+ * @param _parameters_name  name of the parameters member in the class
+ *                          (its type is `<_parameters_name>_type`, the
+ *                          protected member's name is `<_parameters_name>_`,
+ *                          and the public getter's name is
+ *                          `get_<_parameters_name>()`)
+ * @param _factory_name  name of the generated factory type
+ *
+ * @ingroup BatchLinOp
+ */
+#define GKO_ENABLE_BATCH_LIN_OP_FACTORY(_batch_lin_op, _parameters_name,     \
+                                        _factory_name)                       \
+public:                                                                      \
+    const _parameters_name##_type& get_##_parameters_name() const            \
+    {                                                                        \
+        return _parameters_name##_;                                          \
+    }                                                                        \
+                                                                             \
+    class _factory_name                                                      \
+        : public ::gko::batch::EnableDefaultBatchLinOpFactory<               \
+              _factory_name, _batch_lin_op, _parameters_name##_type> {       \
+        friend class ::gko::EnablePolymorphicObject<                         \
+            _factory_name, ::gko::batch::BatchLinOpFactory>;                 \
+        friend class ::gko::enable_parameters_type<_parameters_name##_type,  \
+                                                   _factory_name>;           \
+        explicit _factory_name(std::shared_ptr<const ::gko::Executor> exec)  \
+            : ::gko::batch::EnableDefaultBatchLinOpFactory<                  \
+                  _factory_name, _batch_lin_op, _parameters_name##_type>(    \
+                  std::move(exec))                                           \
+        {}                                                                   \
+        explicit _factory_name(std::shared_ptr<const ::gko::Executor> exec,  \
+                               const _parameters_name##_type& parameters)    \
+            : ::gko::batch::EnableDefaultBatchLinOpFactory<                  \
+                  _factory_name, _batch_lin_op, _parameters_name##_type>(    \
+                  std::move(exec), parameters)                               \
+        {}                                                                   \
+    };                                                                       \
+    friend ::gko::batch::EnableDefaultBatchLinOpFactory<                     \
+        _factory_name, _batch_lin_op, _parameters_name##_type>;              \
+                                                                             \
+                                                                             \
+private:                                                                     \
+    _parameters_name##_type _parameters_name##_;                             \
+                                                                             \
+public:                                                                      \
+    static_assert(true,                                                      \
+                  "This assert is used to counter the false positive extra " \
+                  "semi-colon warnings")
+
+
 }  // namespace batch
 }  // namespace gko
 
diff --git a/include/ginkgo/core/base/batch_lin_op_helpers.hpp b/include/ginkgo/core/base/batch_lin_op_helpers.hpp
deleted file mode 100644
index 7b479192a6b..00000000000
--- a/include/ginkgo/core/base/batch_lin_op_helpers.hpp
+++ /dev/null
@@ -1,202 +0,0 @@
-/*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2023, the Ginkgo authors
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
-1. Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
-notice, this list of conditions and the following disclaimer in the
-documentation and/or other materials provided with the distribution.
-
-3. Neither the name of the copyright holder nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-******************************<GINKGO LICENSE>*******************************/
-
-#ifndef GKO_PUBLIC_CORE_BASE_BATCH_LIN_OP_HELPERS_HPP_
-#define GKO_PUBLIC_CORE_BASE_BATCH_LIN_OP_HELPERS_HPP_
-
-
-#include <memory>
-#include <type_traits>
-#include <utility>
-
-
-#include <ginkgo/core/base/abstract_factory.hpp>
-#include <ginkgo/core/base/dim.hpp>
-#include <ginkgo/core/base/exception_helpers.hpp>
-#include <ginkgo/core/base/lin_op_helpers.hpp>
-#include <ginkgo/core/base/math.hpp>
-#include <ginkgo/core/base/matrix_assembly_data.hpp>
-#include <ginkgo/core/base/matrix_data.hpp>
-#include <ginkgo/core/base/polymorphic_object.hpp>
-#include <ginkgo/core/base/types.hpp>
-#include <ginkgo/core/base/utils.hpp>
-#include <ginkgo/core/log/logger.hpp>
-
-
-namespace gko {
-namespace batch {
-
-
-/**
- * This is an alias for the EnableDefaultFactory mixin, which correctly sets the
- * template parameters to enable a subclass of BatchLinOpFactory.
- *
- * @tparam ConcreteFactory  the concrete factory which is being implemented
- *                          [CRTP parameter]
- * @tparam ConcreteLinOp  the concrete BatchLinOp type which this factory
- * produces, needs to have a constructor which takes a const ConcreteFactory *,
- * and an std::shared_ptr<const BatchLinOp> as parameters.
- * @tparam ParametersType  a subclass of enable_parameters_type template which
- *                         defines all of the parameters of the factory
- * @tparam PolymorphicBase  parent of ConcreteFactory in the polymorphic
- *                          hierarchy, has to be a subclass of LinOpFactory
- *
- * @ingroup BatchLinOp
- */
-template <typename ConcreteFactory, typename ConcreteBatchLinOp,
-          typename ParametersType, typename PolymorphicBase = BatchLinOpFactory>
-using EnableDefaultBatchLinOpFactory =
-    EnableDefaultFactory<ConcreteFactory, ConcreteBatchLinOp, ParametersType,
-                         PolymorphicBase>;
-
-
-/**
- * This macro will generate a default implementation of a BatchLinOpFactory for
- * the BatchLinOp subclass it is defined in.
- *
- * It is required to first call the macro #GKO_CREATE_FACTORY_PARAMETERS()
- * before this one in order to instantiate the parameters type first.
- *
- * The list of parameters for the factory should be defined in a code block
- * after the macro definition, and should contain a list of
- * GKO_FACTORY_PARAMETER_* declarations. The class should provide a constructor
- * with signature
- * _batch_lin_op(const _factory_name *, std::shared_ptr<const BatchLinOp>)
- * which the factory will use a callback to construct the object.
- *
- * A minimal example of a batch linear operator is the following:
- *
- * ```c++
- * struct MyBatchLinOp : public EnableBatchLinOp<MyBatchLinOp> {
- *     GKO_ENABLE_BATCH_LIN_OP_FACTORY(MyBatchLinOp, my_parameters, Factory) {
- *         // a factory parameter named "my_value", of type int and default
- *         // value of 5
- *         int GKO_FACTORY_PARAMETER_SCALAR(my_value, 5);
- *         // a factory parameter named `my_pair` of type `std::pair<int,int>`
- *         // and default value {5, 5}
- *         std::pair<int, int> GKO_FACTORY_PARAMETER_VECTOR(my_pair, 5, 5);
- *     };
- *     // constructor needed by EnableBatchLinOp
- *     explicit MyBatchLinOp(std::shared_ptr<const Executor> exec) {
- *         : EnableBatchLinOp<MyBatchLinOp>(exec) {}
- *     // constructor needed by the factory
- *     explicit MyBatchLinOp(const Factory *factory,
- *                      std::shared_ptr<const BatchLinOp> matrix)
- *         : EnableBatchLinOp<MyBatchLinOp>(factory->get_executor()),
- *                                          matrix->get_size()),
- *           // store factory's parameters locally
- *           my_parameters_{factory->get_parameters()}
- *     {
- *          int value = my_parameters_.my_value;
- *          // do something with value
- *     }
- * ```
- *
- * MyBatchLinOp can then be created as follows:
- *
- * ```c++
- * auto exec = gko::ReferenceExecutor::create();
- * // create a factory with default `my_value` parameter
- * auto fact = MyBatchLinOp::build().on(exec);
- * // create a operator using the factory:
- * auto my_op = fact->generate(gko::batch::matrix::Identity::create(exec, 2));
- * std::cout << my_op->get_my_parameters().my_value;  // prints 5
- *
- * // create a factory with custom `my_value` parameter
- * auto fact = MyLinOp::build().with_my_value(0).on(exec);
- * // create a operator using the factory:
- * auto my_op = fact->generate(gko::batch::matrix::Identity::create(exec, 2));
- * std::cout << my_op->get_my_parameters().my_value;  // prints 0
- * ```
- *
- * @note It is possible to combine both the #GKO_CREATE_FACTORY_PARAMETER_*()
- * macros with this one in a unique macro for class __templates__ (not with
- * regular classes). Splitting this into two distinct macros allows to use them
- * in all contexts. See <https://stackoverflow.com/q/50202718/9385966> for more
- * details.
- *
- * @param _lin_op  concrete operator for which the factory is to be created
- *                 [CRTP parameter]
- * @param _parameters_name  name of the parameters member in the class
- *                          (its type is `<_parameters_name>_type`, the
- *                          protected member's name is `<_parameters_name>_`,
- *                          and the public getter's name is
- *                          `get_<_parameters_name>()`)
- * @param _factory_name  name of the generated factory type
- *
- * @ingroup BatchLinOp
- */
-#define GKO_ENABLE_BATCH_LIN_OP_FACTORY(_batch_lin_op, _parameters_name,     \
-                                        _factory_name)                       \
-public:                                                                      \
-    const _parameters_name##_type& get_##_parameters_name() const            \
-    {                                                                        \
-        return _parameters_name##_;                                          \
-    }                                                                        \
-                                                                             \
-    class _factory_name                                                      \
-        : public ::gko::batch::EnableDefaultBatchLinOpFactory<               \
-              _factory_name, _batch_lin_op, _parameters_name##_type> {       \
-        friend class ::gko::EnablePolymorphicObject<                         \
-            _factory_name, ::gko::batch::BatchLinOpFactory>;                 \
-        friend class ::gko::enable_parameters_type<_parameters_name##_type,  \
-                                                   _factory_name>;           \
-        explicit _factory_name(std::shared_ptr<const ::gko::Executor> exec)  \
-            : ::gko::batch::EnableDefaultBatchLinOpFactory<                  \
-                  _factory_name, _batch_lin_op, _parameters_name##_type>(    \
-                  std::move(exec))                                           \
-        {}                                                                   \
-        explicit _factory_name(std::shared_ptr<const ::gko::Executor> exec,  \
-                               const _parameters_name##_type& parameters)    \
-            : ::gko::batch::EnableDefaultBatchLinOpFactory<                  \
-                  _factory_name, _batch_lin_op, _parameters_name##_type>(    \
-                  std::move(exec), parameters)                               \
-        {}                                                                   \
-    };                                                                       \
-    friend ::gko::batch::EnableDefaultBatchLinOpFactory<                     \
-        _factory_name, _batch_lin_op, _parameters_name##_type>;              \
-                                                                             \
-                                                                             \
-private:                                                                     \
-    _parameters_name##_type _parameters_name##_;                             \
-                                                                             \
-public:                                                                      \
-    static_assert(true,                                                      \
-                  "This assert is used to counter the false positive extra " \
-                  "semi-colon warnings")
-
-
-}  // namespace batch
-}  // namespace gko
-
-
-#endif  // GKO_PUBLIC_CORE_BASE_BATCH_LIN_OP_HELPERS_HPP_
diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp
index 186a5fce061..aed3b5f3572 100644
--- a/include/ginkgo/ginkgo.hpp
+++ b/include/ginkgo/ginkgo.hpp
@@ -41,7 +41,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/array.hpp>
 #include <ginkgo/core/base/batch_dim.hpp>
 #include <ginkgo/core/base/batch_lin_op.hpp>
-#include <ginkgo/core/base/batch_lin_op_helpers.hpp>
 #include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/combination.hpp>
 #include <ginkgo/core/base/composition.hpp>
@@ -56,7 +55,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/index_set.hpp>
 #include <ginkgo/core/base/intrinsics.hpp>
 #include <ginkgo/core/base/lin_op.hpp>
-#include <ginkgo/core/base/lin_op_helpers.hpp>
 #include <ginkgo/core/base/machine_topology.hpp>
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/base/matrix_assembly_data.hpp>

From d332a0e98715a8e7381a7a80c284720ec1a7757e Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 10 Oct 2023 10:22:21 +0200
Subject: [PATCH 337/583] Review updates

Co-authored-by: Tobias Ribizel <ribizel@kit.edu>
Co-authored-by: Yu-Hsiang Tasi <yhmtsai@gmail.com>
---
 core/test/base/batch_lin_op.cpp           | 91 ++++++++++++++++++++---
 include/ginkgo/core/base/batch_lin_op.hpp | 48 ++++++------
 include/ginkgo/core/log/logger.hpp        | 20 +++--
 3 files changed, 117 insertions(+), 42 deletions(-)

diff --git a/core/test/base/batch_lin_op.cpp b/core/test/base/batch_lin_op.cpp
index 2e0bf0fae0e..61dcf89f109 100644
--- a/core/test/base/batch_lin_op.cpp
+++ b/core/test/base/batch_lin_op.cpp
@@ -44,11 +44,36 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/executor.hpp>
 #include <ginkgo/core/base/lin_op.hpp>
 #include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/log/logger.hpp>
 
 
 namespace {
 
 
+struct DummyLogger : gko::log::Logger {
+    DummyLogger()
+        : gko::log::Logger(gko::log::Logger::batch_linop_factory_events_mask)
+    {}
+
+    void on_batch_linop_factory_generate_started(
+        const gko::batch::BatchLinOpFactory*,
+        const gko::batch::BatchLinOp*) const override
+    {
+        batch_linop_factory_generate_started++;
+    }
+
+    void on_batch_linop_factory_generate_completed(
+        const gko::batch::BatchLinOpFactory*, const gko::batch::BatchLinOp*,
+        const gko::batch::BatchLinOp*) const override
+    {
+        batch_linop_factory_generate_completed++;
+    }
+
+    int mutable batch_linop_factory_generate_started = 0;
+    int mutable batch_linop_factory_generate_completed = 0;
+};
+
+
 class DummyBatchLinOp : public gko::batch::EnableBatchLinOp<DummyBatchLinOp>,
                         public gko::EnableCreateMethod<DummyBatchLinOp> {
 public:
@@ -63,33 +88,25 @@ class EnableBatchLinOp : public ::testing::Test {
 protected:
     EnableBatchLinOp()
         : ref{gko::ReferenceExecutor::create()},
-          ref2{gko::ReferenceExecutor::create()},
-          op{DummyBatchLinOp::create(ref2,
-                                     gko::batch_dim<2>(1, gko::dim<2>{3, 5}))},
-          op2{DummyBatchLinOp::create(ref2,
-                                      gko::batch_dim<2>(2, gko::dim<2>{3, 5}))}
+          op{DummyBatchLinOp::create(ref,
+                                     gko::batch_dim<2>(1, gko::dim<2>{3, 5}))}
     {}
 
     std::shared_ptr<const gko::ReferenceExecutor> ref;
-    std::shared_ptr<const gko::ReferenceExecutor> ref2;
     std::unique_ptr<DummyBatchLinOp> op;
-    std::unique_ptr<DummyBatchLinOp> op2;
 };
 
 
 TEST_F(EnableBatchLinOp, KnowsNumBatchItems)
 {
     ASSERT_EQ(op->get_num_batch_items(), 1);
-    ASSERT_EQ(op2->get_num_batch_items(), 2);
 }
 
 
 TEST_F(EnableBatchLinOp, KnowsItsSizes)
 {
     auto op1_sizes = gko::batch_dim<2>(1, gko::dim<2>{3, 5});
-    auto op2_sizes = gko::batch_dim<2>(2, gko::dim<2>{3, 5});
     ASSERT_EQ(op->get_size(), op1_sizes);
-    ASSERT_EQ(op2->get_size(), op2_sizes);
 }
 
 
@@ -123,9 +140,14 @@ class DummyBatchLinOpWithFactory
 
 class EnableBatchLinOpFactory : public ::testing::Test {
 protected:
-    EnableBatchLinOpFactory() : ref{gko::ReferenceExecutor::create()} {}
+    EnableBatchLinOpFactory()
+        : ref{gko::ReferenceExecutor::create()},
+          logger{std::make_shared<DummyLogger>()}
+
+    {}
 
     std::shared_ptr<const gko::ReferenceExecutor> ref;
+    std::shared_ptr<DummyLogger> logger;
 };
 
 
@@ -161,4 +183,51 @@ TEST_F(EnableBatchLinOpFactory, PassesParametersToBatchLinOp)
 }
 
 
+TEST_F(EnableBatchLinOpFactory, FactoryGenerateIsLogged)
+{
+    auto before_logger = *logger;
+    auto factory = DummyBatchLinOpWithFactory<>::build().on(ref);
+    factory->add_logger(logger);
+    factory->generate(
+        DummyBatchLinOp::create(ref, gko::batch_dim<2>(1, gko::dim<2>{3, 5})));
+
+    ASSERT_EQ(logger->batch_linop_factory_generate_started,
+              before_logger.batch_linop_factory_generate_started + 1);
+    ASSERT_EQ(logger->batch_linop_factory_generate_completed,
+              before_logger.batch_linop_factory_generate_completed + 1);
+}
+
+
+TEST_F(EnableBatchLinOpFactory, WithLoggersWorksAndPropagates)
+{
+    auto before_logger = *logger;
+    auto factory =
+        DummyBatchLinOpWithFactory<>::build().with_loggers(logger).on(ref);
+    auto op = factory->generate(
+        DummyBatchLinOp::create(ref, gko::batch_dim<2>(1, gko::dim<2>{3, 5})));
+
+    ASSERT_EQ(logger->batch_linop_factory_generate_started,
+              before_logger.batch_linop_factory_generate_started + 1);
+    ASSERT_EQ(logger->batch_linop_factory_generate_completed,
+              before_logger.batch_linop_factory_generate_completed + 1);
+}
+
+
+TEST_F(EnableBatchLinOpFactory, CopiesLinOpToOtherExecutor)
+{
+    auto ref2 = gko::ReferenceExecutor::create();
+    auto dummy = gko::share(
+        DummyBatchLinOp::create(ref2, gko::batch_dim<2>(1, gko::dim<2>{3, 5})));
+    auto factory = DummyBatchLinOpWithFactory<>::build().with_value(6).on(ref);
+
+    auto op = factory->generate(dummy);
+
+    ASSERT_EQ(op->get_executor(), ref);
+    ASSERT_EQ(op->get_parameters().value, 6);
+    ASSERT_EQ(op->op_->get_executor(), ref);
+    ASSERT_NE(op->op_.get(), dummy.get());
+    ASSERT_TRUE(dynamic_cast<const DummyBatchLinOp*>(op->op_.get()));
+}
+
+
 }  // namespace
diff --git a/include/ginkgo/core/base/batch_lin_op.hpp b/include/ginkgo/core/base/batch_lin_op.hpp
index 31984997b2c..2e507d99a45 100644
--- a/include/ginkgo/core/base/batch_lin_op.hpp
+++ b/include/ginkgo/core/base/batch_lin_op.hpp
@@ -73,9 +73,13 @@ namespace batch {
  *
  * A key difference between the LinOp and the BatchLinOp class is that the apply
  * between BatchLinOps is no longer supported. The user can apply a BatchLinOp
- * to a batch::MultiVector but not to any general BatchLinOp. Therefore, the
- * BatchLinOp serves only as a base class providing necessary core functionality
- * from Polymorphic object and store the dimensions of the batched object.
+ * to a batch::MultiVector but not to any general BatchLinOp. This apply to a
+ * batch::MultiVector is handled by the concrete LinOp and may be moved to the
+ * base BatchLinOp class in the future.
+ *
+ * Therefore, the BatchLinOp serves only as a base class providing necessary
+ * core functionality from Polymorphic object and store the dimensions of the
+ * batched object.
  *
  * @ref BatchLinOp
  */
@@ -84,24 +88,24 @@ class BatchLinOp : public EnableAbstractPolymorphicObject<BatchLinOp> {
     /**
      * Returns the number of items in the batch operator.
      *
-     * @return number of items in the batch operator
+     * @return  number of items in the batch operator
      */
     size_type get_num_batch_items() const noexcept
     {
-        return size_.get_num_batch_items();
+        return get_size().get_num_batch_items();
     }
 
     /**
      * Returns the common size of the batch items.
      *
-     * @return the common size stored
+     * @return  the common size stored
      */
-    dim<2> get_common_size() const { return size_.get_common_size(); }
+    dim<2> get_common_size() const { return get_size().get_common_size(); }
 
     /**
      * Returns the size of the batch operator.
      *
-     * @return size of the batch operator, a batch_dim object
+     * @return  size of the batch operator, a batch_dim object
      */
     const batch_dim<2>& get_size() const noexcept { return size_; }
 
@@ -117,27 +121,28 @@ class BatchLinOp : public EnableAbstractPolymorphicObject<BatchLinOp> {
      * Creates a batch operator storing items of uniform sizes.
      *
      * @param exec        the executor where all the operations are performed
-     * @param num_batch_items the number of batch items to be stored in the
-     * operator
-     * @param size        the common size of the items in the batched operator
+     * @param batch_size  the size the batched operator, as a batch_dim object
      */
     explicit BatchLinOp(std::shared_ptr<const Executor> exec,
-                        const size_type num_batch_items = 0,
-                        const dim<2>& common_size = dim<2>{})
-        : EnableAbstractPolymorphicObject<BatchLinOp>(exec),
-          size_{num_batch_items > 0 ? batch_dim<2>(num_batch_items, common_size)
-                                    : batch_dim<2>{}}
+                        const batch_dim<2>& batch_size)
+        : EnableAbstractPolymorphicObject<BatchLinOp>(exec), size_{batch_size}
     {}
 
     /**
      * Creates a batch operator storing items of uniform sizes.
      *
      * @param exec        the executor where all the operations are performed
-     * @param batch_size  the size the batched operator, as a batch_dim object
+     * @param num_batch_items the number of batch items to be stored in the
+     * operator
+     * @param size        the common size of the items in the batched operator
      */
     explicit BatchLinOp(std::shared_ptr<const Executor> exec,
-                        const batch_dim<2>& batch_size)
-        : EnableAbstractPolymorphicObject<BatchLinOp>(exec), size_{batch_size}
+                        const size_type num_batch_items = 0,
+                        const dim<2>& common_size = dim<2>{})
+        : BatchLinOp{std::move(exec),
+                     num_batch_items > 0
+                         ? batch_dim<2>(num_batch_items, common_size)
+                         : batch_dim<2>{}}
     {}
 
 private:
@@ -158,7 +163,7 @@ class BatchLinOp : public EnableAbstractPolymorphicObject<BatchLinOp> {
  * ---------------------------
  *
  * ```c++
- * // Suppose A is a batch matrix, batch_b a batch rhs vector, and batch_x an
+ * // Suppose A is a batch matrix, batch_b, a batch rhs vector, and batch_x, an
  * // initial guess
  * // Create a BatchCG which runs for at most 1000 iterations, and stops after
  * // reducing the residual norm by 6 orders of magnitude
@@ -234,9 +239,6 @@ class EnableBatchLinOp
 public:
     using EnablePolymorphicObject<ConcreteBatchLinOp,
                                   PolymorphicBase>::EnablePolymorphicObject;
-
-protected:
-    GKO_ENABLE_SELF(ConcreteBatchLinOp);
 };
 
 
diff --git a/include/ginkgo/core/log/logger.hpp b/include/ginkgo/core/log/logger.hpp
index a10782c0102..bef0a44c227 100644
--- a/include/ginkgo/core/log/logger.hpp
+++ b/include/ginkgo/core/log/logger.hpp
@@ -60,8 +60,12 @@ class stopping_status;
 
 
 namespace batch {
+
+
 class BatchLinOp;
 class BatchLinOpFactory;
+
+
 }  // namespace batch
 
 
@@ -455,9 +459,9 @@ public:                                                              \
      * @warning This on_iteration_complete function that this macro declares is
      * deprecated. Please use the version with the stopping information.
      */
-    [[deprecated(
-        "Please use the version with the additional stopping "
-        "information.")]] virtual void
+    [
+        [deprecated("Please use the version with the additional stopping "
+                    "information.")]] virtual void
     on_iteration_complete(const LinOp* solver, const size_type& it,
                           const LinOp* r, const LinOp* x = nullptr,
                           const LinOp* tau = nullptr) const
@@ -476,9 +480,9 @@ public:                                                              \
      * @warning This on_iteration_complete function that this macro declares is
      * deprecated. Please use the version with the stopping information.
      */
-    [[deprecated(
-        "Please use the version with the additional stopping "
-        "information.")]] virtual void
+    [
+        [deprecated("Please use the version with the additional stopping "
+                    "information.")]] virtual void
     on_iteration_complete(const LinOp* solver, const size_type& it,
                           const LinOp* r, const LinOp* x, const LinOp* tau,
                           const LinOp* implicit_tau_sq) const
@@ -810,8 +814,8 @@ class EnableLogging : public PolymorphicBase {
     template <size_type Event, typename ConcreteLoggableT>
     struct propagate_log_helper<
         Event, ConcreteLoggableT,
-        xstd::void_t<
-            decltype(std::declval<ConcreteLoggableT>().get_executor())>> {
+        xstd::void_t<decltype(
+            std::declval<ConcreteLoggableT>().get_executor())>> {
         template <typename... Args>
         static void propagate_log(const ConcreteLoggableT* loggable,
                                   Args&&... args)

From f11e389b3e2cc453628b9824af06faed7f63eed6 Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Tue, 10 Oct 2023 08:30:45 +0000
Subject: [PATCH 338/583] Format files

Co-authored-by: Pratik Nayak <pratikvn@pm.me>
---
 include/ginkgo/core/log/logger.hpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/ginkgo/core/log/logger.hpp b/include/ginkgo/core/log/logger.hpp
index bef0a44c227..47c03b3c572 100644
--- a/include/ginkgo/core/log/logger.hpp
+++ b/include/ginkgo/core/log/logger.hpp
@@ -459,9 +459,9 @@ public:                                                              \
      * @warning This on_iteration_complete function that this macro declares is
      * deprecated. Please use the version with the stopping information.
      */
-    [
-        [deprecated("Please use the version with the additional stopping "
-                    "information.")]] virtual void
+    [[deprecated(
+        "Please use the version with the additional stopping "
+        "information.")]] virtual void
     on_iteration_complete(const LinOp* solver, const size_type& it,
                           const LinOp* r, const LinOp* x = nullptr,
                           const LinOp* tau = nullptr) const
@@ -480,9 +480,9 @@ public:                                                              \
      * @warning This on_iteration_complete function that this macro declares is
      * deprecated. Please use the version with the stopping information.
      */
-    [
-        [deprecated("Please use the version with the additional stopping "
-                    "information.")]] virtual void
+    [[deprecated(
+        "Please use the version with the additional stopping "
+        "information.")]] virtual void
     on_iteration_complete(const LinOp* solver, const size_type& it,
                           const LinOp* r, const LinOp* x, const LinOp* tau,
                           const LinOp* implicit_tau_sq) const
@@ -814,8 +814,8 @@ class EnableLogging : public PolymorphicBase {
     template <size_type Event, typename ConcreteLoggableT>
     struct propagate_log_helper<
         Event, ConcreteLoggableT,
-        xstd::void_t<decltype(
-            std::declval<ConcreteLoggableT>().get_executor())>> {
+        xstd::void_t<
+            decltype(std::declval<ConcreteLoggableT>().get_executor())>> {
         template <typename... Args>
         static void propagate_log(const ConcreteLoggableT* loggable,
                                   Args&&... args)

From fb75ac39d55a0fe1818621e2390625756a0924c7 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 10 Oct 2023 11:08:56 +0200
Subject: [PATCH 339/583] Add batch_linop_fac mask to logger.

---
 core/log/logger.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/core/log/logger.cpp b/core/log/logger.cpp
index 81f75842474..4b21bfe9b74 100644
--- a/core/log/logger.cpp
+++ b/core/log/logger.cpp
@@ -43,6 +43,7 @@ constexpr Logger::mask_type Logger::operation_events_mask;
 constexpr Logger::mask_type Logger::polymorphic_object_events_mask;
 constexpr Logger::mask_type Logger::linop_events_mask;
 constexpr Logger::mask_type Logger::linop_factory_events_mask;
+constexpr Logger::mask_type Logger::batch_linop_factory_events_mask;
 constexpr Logger::mask_type Logger::criterion_events_mask;
 
 constexpr Logger::mask_type Logger::allocation_started_mask;

From 0bd2b24406a7b3c8882c9f0dc50bb73f2b787903 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 10 Oct 2023 11:50:39 +0200
Subject: [PATCH 340/583] Review updates

Co-authored-by: Terry Cojean <terry.cojean@kit.edu>
---
 include/ginkgo/core/base/batch_lin_op.hpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/ginkgo/core/base/batch_lin_op.hpp b/include/ginkgo/core/base/batch_lin_op.hpp
index 2e507d99a45..320c935a54f 100644
--- a/include/ginkgo/core/base/batch_lin_op.hpp
+++ b/include/ginkgo/core/base/batch_lin_op.hpp
@@ -248,13 +248,13 @@ class EnableBatchLinOp
  *
  * @tparam ConcreteFactory  the concrete factory which is being implemented
  *                          [CRTP parameter]
- * @tparam ConcreteLinOp  the concrete BatchLinOp type which this factory
+ * @tparam ConcreteBatchLinOp  the concrete BatchLinOp type which this factory
  * produces, needs to have a constructor which takes a const ConcreteFactory *,
  * and an std::shared_ptr<const BatchLinOp> as parameters.
  * @tparam ParametersType  a subclass of enable_parameters_type template which
  *                         defines all of the parameters of the factory
  * @tparam PolymorphicBase  parent of ConcreteFactory in the polymorphic
- *                          hierarchy, has to be a subclass of LinOpFactory
+ *                          hierarchy, has to be a subclass of BatchLinOpFactory
  *
  * @ingroup BatchLinOp
  */
@@ -330,8 +330,8 @@ using EnableDefaultBatchLinOpFactory =
  * in all contexts. See <https://stackoverflow.com/q/50202718/9385966> for more
  * details.
  *
- * @param _lin_op  concrete operator for which the factory is to be created
- *                 [CRTP parameter]
+ * @param _batch_lin_op  concrete operator for which the factory is to be
+ *                       created [CRTP parameter]
  * @param _parameters_name  name of the parameters member in the class
  *                          (its type is `<_parameters_name>_type`, the
  *                          protected member's name is `<_parameters_name>_`,

From b13e0df89fb8d740a894950bca63b0a898f024f7 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 10 Oct 2023 16:48:19 +0200
Subject: [PATCH 341/583] Doc clarifications

---
 include/ginkgo/core/base/batch_lin_op.hpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/ginkgo/core/base/batch_lin_op.hpp b/include/ginkgo/core/base/batch_lin_op.hpp
index 320c935a54f..78ce4f4a942 100644
--- a/include/ginkgo/core/base/batch_lin_op.hpp
+++ b/include/ginkgo/core/base/batch_lin_op.hpp
@@ -73,14 +73,15 @@ namespace batch {
  *
  * A key difference between the LinOp and the BatchLinOp class is that the apply
  * between BatchLinOps is no longer supported. The user can apply a BatchLinOp
- * to a batch::MultiVector but not to any general BatchLinOp. This apply to a
- * batch::MultiVector is handled by the concrete LinOp and may be moved to the
- * base BatchLinOp class in the future.
+ * to a batch::MultiVector but not to any general BatchLinOp.
  *
  * Therefore, the BatchLinOp serves only as a base class providing necessary
  * core functionality from Polymorphic object and store the dimensions of the
  * batched object.
  *
+ * @note Apply to batch::MultiVector objects are handled by the concrete LinOp
+ * and may be moved to the base BatchLinOp class in the future.
+ *
  * @ref BatchLinOp
  */
 class BatchLinOp : public EnableAbstractPolymorphicObject<BatchLinOp> {

From ade5bb372bc2725d8c7fef1aefa5d8de9ec8c323 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sun, 1 Oct 2023 14:19:44 +0200
Subject: [PATCH 342/583] Add batch dense base class, core and kernels

Co-authored-by: Aditya Kashi <kashia@ornl.gov>
---
 core/CMakeLists.txt                          |   1 +
 core/base/batch_struct.hpp                   |  76 +++
 core/device_hooks/common_kernels.inc.cpp     |  10 +
 core/matrix/batch_dense.cpp                  | 203 ++++++++
 core/matrix/batch_dense_kernels.hpp          |  81 +++
 core/test/matrix/batch_dense.cpp             | 520 +++++++++++++++++++
 cuda/CMakeLists.txt                          |   1 +
 cuda/matrix/batch_dense_kernels.cu           |  90 ++++
 dpcpp/CMakeLists.txt                         |   1 +
 dpcpp/matrix/batch_dense_kernels.dp.cpp      |  83 +++
 hip/CMakeLists.txt                           |   1 +
 hip/matrix/batch_dense_kernels.hip.cpp       |  94 ++++
 include/ginkgo/core/matrix/batch_dense.hpp   | 341 ++++++++++++
 omp/CMakeLists.txt                           |   1 +
 omp/matrix/batch_dense_kernels.cpp           | 129 +++++
 reference/CMakeLists.txt                     |   1 +
 reference/base/batch_struct.hpp              |  28 +
 reference/matrix/batch_dense_kernels.cpp     | 128 +++++
 reference/matrix/batch_dense_kernels.hpp.inc |  88 ++++
 19 files changed, 1877 insertions(+)
 create mode 100644 core/matrix/batch_dense.cpp
 create mode 100644 core/matrix/batch_dense_kernels.hpp
 create mode 100644 core/test/matrix/batch_dense.cpp
 create mode 100644 cuda/matrix/batch_dense_kernels.cu
 create mode 100644 dpcpp/matrix/batch_dense_kernels.dp.cpp
 create mode 100644 hip/matrix/batch_dense_kernels.hip.cpp
 create mode 100644 include/ginkgo/core/matrix/batch_dense.hpp
 create mode 100644 omp/matrix/batch_dense_kernels.cpp
 create mode 100644 reference/matrix/batch_dense_kernels.cpp
 create mode 100644 reference/matrix/batch_dense_kernels.hpp.inc

diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
index 7932976d6c9..46ea67abc65 100644
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -39,6 +39,7 @@ target_sources(ginkgo
     log/vtune.cpp
     log/record.cpp
     log/stream.cpp
+    matrix/batch_dense.cpp
     matrix/coo.cpp
     matrix/csr.cpp
     matrix/dense.cpp
diff --git a/core/base/batch_struct.hpp b/core/base/batch_struct.hpp
index caca4577cf7..21bd5b0e8ea 100644
--- a/core/base/batch_struct.hpp
+++ b/core/base/batch_struct.hpp
@@ -81,6 +81,46 @@ struct uniform_batch {
 }  // namespace multi_vector
 
 
+namespace batch_dense {
+
+
+/**
+ * Encapsulates one matrix from a batch of multi-vectors.
+ */
+template <typename ValueType>
+struct batch_item {
+    using value_type = ValueType;
+    ValueType* values;
+    int stride;
+    int num_rows;
+    int num_rhs;
+};
+
+
+/**
+ * A 'simple' structure to store a global uniform batch of multi-vectors.
+ */
+template <typename ValueType>
+struct uniform_batch {
+    using value_type = ValueType;
+    using entry_type = batch_item<ValueType>;
+
+    ValueType* values;
+    size_type num_batch_items;
+    int stride;
+    int num_rows;
+    int num_rhs;
+
+    size_type get_entry_storage() const
+    {
+        return num_rows * stride * sizeof(value_type);
+    }
+};
+
+
+}  // namespace batch_dense
+
+
 template <typename ValueType>
 GKO_ATTRIBUTES GKO_INLINE multi_vector::batch_item<const ValueType> to_const(
     const multi_vector::batch_item<ValueType>& b)
@@ -97,6 +137,22 @@ GKO_ATTRIBUTES GKO_INLINE multi_vector::uniform_batch<const ValueType> to_const(
 }
 
 
+template <typename ValueType>
+GKO_ATTRIBUTES GKO_INLINE matrix::batch_dense::batch_item<const ValueType>
+to_const(const matrix::batch_dense::batch_item<ValueType>& b)
+{
+    return {b.values, b.stride, b.num_rows, b.num_rhs};
+}
+
+
+template <typename ValueType>
+GKO_ATTRIBUTES GKO_INLINE matrix::batch_dense::uniform_batch<const ValueType>
+to_const(const matrix::batch_dense::uniform_batch<ValueType>& ub)
+{
+    return {ub.values, ub.num_batch_items, ub.stride, ub.num_rows, ub.num_rhs};
+}
+
+
 /**
  * Extract one object (matrix, vector etc.) from a batch of objects
  *
@@ -126,6 +182,26 @@ extract_batch_item(ValueType* const batch_values, const int stride,
 }
 
 
+template <typename ValueType>
+GKO_ATTRIBUTES GKO_INLINE matrix::batch_dense::batch_item<ValueType>
+extract_batch_item(const matrix::batch_dense::uniform_batch<ValueType>& batch,
+                   const size_type batch_idx)
+{
+    return {batch.values + batch_idx * batch.stride * batch.num_rows,
+            batch.stride, batch.num_rows, batch.num_rhs};
+}
+
+template <typename ValueType>
+GKO_ATTRIBUTES GKO_INLINE matrix::batch_dense::batch_item<ValueType>
+extract_batch_item(ValueType* const batch_values, const int stride,
+                   const int num_rows, const int num_rhs,
+                   const size_type batch_idx)
+{
+    return {batch_values + batch_idx * stride * num_rows, stride, num_rows,
+            num_rhs};
+}
+
+
 }  // namespace batch
 }  // namespace gko
 
diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp
index c8bbd2e0a31..c22f5cd968d 100644
--- a/core/device_hooks/common_kernels.inc.cpp
+++ b/core/device_hooks/common_kernels.inc.cpp
@@ -299,6 +299,16 @@ GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_COPY_KERNEL);
 }  // namespace batch_multi_vector
 
 
+namespace batch_dense {
+
+
+GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL);
+GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL);
+
+
+}  // namespace batch_dense
+
+
 namespace dense {
 
 
diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp
new file mode 100644
index 00000000000..e6dedcf11fd
--- /dev/null
+++ b/core/matrix/batch_dense.cpp
@@ -0,0 +1,203 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/matrix/batch_dense.hpp>
+
+
+#include <algorithm>
+#include <type_traits>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/exception.hpp>
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/utils.hpp>
+
+
+#include "core/matrix/batch_dense_kernels.hpp"
+
+
+namespace gko {
+namespace batch {
+namespace matrix {
+namespace dense {
+
+
+GKO_REGISTER_OPERATION(simple_apply, batch_dense::simple_apply);
+GKO_REGISTER_OPERATION(advanced_apply, batch_dense::advanced_apply);
+
+
+}  // namespace dense
+
+
+namespace detail {
+
+
+template <typename ValueType>
+batch_dim<2> compute_batch_size(
+    const std::vector<matrix::Dense<ValueType>*>& matrices)
+{
+    auto common_size = matrices[0]->get_size();
+    for (size_type i = 1; i < matrices.size(); ++i) {
+        GKO_ASSERT_EQUAL_DIMENSIONS(common_size, matrices[i]->get_size());
+    }
+    return batch_dim<2>{matrices.size(), common_size};
+}
+
+
+}  // namespace detail
+
+
+template <typename ValueType>
+std::unique_ptr<matrix::Dense<ValueType>>
+BatchDense<ValueType>::create_view_for_item(size_type item_id)
+{
+    auto exec = this->get_executor();
+    auto num_rows = this->get_common_size()[0];
+    auto stride = this->get_common_size()[1];
+    auto mat = unbatch_type::create(
+        exec, this->get_common_size(),
+        make_array_view(exec, num_rows * stride,
+                        this->get_values_for_item(item_id)),
+        stride);
+    return mat;
+}
+
+
+template <typename ValueType>
+std::unique_ptr<const matrix::Dense<ValueType>>
+BatchDense<ValueType>::create_const_view_for_item(size_type item_id) const
+{
+    auto exec = this->get_executor();
+    auto num_rows = this->get_common_size()[0];
+    auto stride = this->get_common_size()[1];
+    auto mat = unbatch_type::create_const(
+        exec, this->get_common_size(),
+        make_const_array_view(exec, num_rows * stride,
+                              this->get_const_values_for_item(item_id)),
+        stride);
+    return mat;
+}
+
+
+template <typename ValueType>
+std::unique_ptr<BatchDense<ValueType>>
+BatchDense<ValueType>::create_with_config_of(ptr_param<const MultiVector> other)
+{
+    // De-referencing `other` before calling the functions (instead of
+    // using operator `->`) is currently required to be compatible with
+    // CUDA 10.1.
+    // Otherwise, it results in a compile error.
+    return (*other).create_with_same_config();
+}
+
+
+template <typename ValueType>
+void BatchDense<ValueType>::set_size(const batch_dim<2>& value) noexcept
+{
+    batch_size_ = value;
+}
+
+
+template <typename ValueType>
+std::unique_ptr<BatchDense<ValueType>>
+BatchDense<ValueType>::create_with_same_config() const
+{
+    return BatchDense<ValueType>::create(this->get_executor(),
+                                         this->get_size());
+}
+
+
+inline const batch_dim<2> get_col_sizes(const batch_dim<2>& sizes)
+{
+    return batch_dim<2>(sizes.get_num_batch_items(),
+                        dim<2>(1, sizes.get_common_size()[1]));
+}
+
+
+template <typename ValueType>
+void BatchDense<ValueType>::apply_impl(const MultiVector<ValueType>* b,
+                                       MultiVector<ValueType>* x) const
+{
+    GKO_ASSERT_EQUAL_DIMENSIONS(b->get_common_size(), x->get_common_size());
+    GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items());
+    GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size());
+    GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items());
+    GKO_ASSERT_CONFORMANT(this->get_common_size(), x->get_common_size());
+    this->get_executor()->run(batch_dense::make_simple_apply(this, b, x));
+}
+
+
+template <typename ValueType>
+void BatchDense<ValueType>::apply_impl(const MultiVector<ValueType>* alpha,
+                                       const MultiVector<ValueType>* b,
+                                       const MultiVector<ValueType>* beta,
+                                       MultiVector<ValueType>* x) const
+{
+    GKO_ASSERT_EQUAL_DIMENSIONS(b->get_common_size(), x->get_common_size());
+    GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items());
+    GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size());
+    GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items());
+    GKO_ASSERT_CONFORMANT(this->get_common_size(), x->get_common_size());
+    GKO_ASSERT_EQUAL_COLS(alpha->get_common_size(), gko::dim<2>(1, 1));
+    GKO_ASSERT_EQUAL_COLS(beta->get_common_size(), gko::dim<2>(1, 1));
+    this->get_executor()->run(
+        batch_dense::make_advanced_apply(alpha, this, b, beta, x));
+}
+
+
+template <typename ValueType>
+void BatchDense<ValueType>::convert_to(
+    BatchDense<next_precision<ValueType>>* result) const
+{
+    result->values_ = this->values_;
+    result->set_size(this->get_size());
+}
+
+
+template <typename ValueType>
+void BatchDense<ValueType>::move_to(
+    BatchDense<next_precision<ValueType>>* result)
+{
+    this->convert_to(result);
+}
+
+
+#define GKO_DECLARE_BATCH_DENSE_MATRIX(_type) class BatchDense<_type>
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_MATRIX);
+
+
+}  // namespace matrix
+}  // namespace batch
+}  // namespace gko
diff --git a/core/matrix/batch_dense_kernels.hpp b/core/matrix/batch_dense_kernels.hpp
new file mode 100644
index 00000000000..e801d7aa152
--- /dev/null
+++ b/core/matrix/batch_dense_kernels.hpp
@@ -0,0 +1,81 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CORE_MATRIX_BATCH_DENSE_KERNELS_HPP_
+#define GKO_CORE_MATRIX_BATCH_DENSE_KERNELS_HPP_
+
+
+#include <ginkgo/core/matrix/batch_dense.hpp>
+
+
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/types.hpp>
+
+
+namespace gko {
+namespace kernels {
+
+
+#define GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL(_type)         \
+    void simple_apply(std::shared_ptr<const DefaultExecutor> exec, \
+                      const batch::matrix::BatchDense<_type>* a,   \
+                      const batch::MultiVector<_type>* b,          \
+                      MultiVector<_type>* c)
+
+#define GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL(_type)         \
+    void advanced_apply(std::shared_ptr<const DefaultExecutor> exec, \
+                        const batch::MultiVector<_type>* alpha,      \
+                        const batch::matrix::BatchDense<_type>* a,   \
+                        const batch::MultiVector<_type>* b,          \
+                        const batch::MultiVector<_type>* beta,       \
+                        batch::MultiVector<_type>* c)
+
+#define GKO_DECLARE_ALL_AS_TEMPLATES                        \
+    template <typename ValueType>                           \
+    GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL(ValueType); \
+    template <typename ValueType>                           \
+    GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL(ValueType)
+
+
+GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(batch_dense,
+                                        GKO_DECLARE_ALL_AS_TEMPLATES);
+
+
+#undef GKO_DECLARE_ALL_AS_TEMPLATES
+
+
+}  // namespace kernels
+}  // namespace gko
+
+
+#endif  // GKO_CORE_MATRIX_BATCH_DENSE_KERNELS_HPP_
diff --git a/core/test/matrix/batch_dense.cpp b/core/test/matrix/batch_dense.cpp
new file mode 100644
index 00000000000..7db7469baf6
--- /dev/null
+++ b/core/test/matrix/batch_dense.cpp
@@ -0,0 +1,520 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/matrix/batch_dense.hpp>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/range.hpp>
+#include <ginkgo/core/matrix/dense.hpp>
+
+
+#include "core/test/utils.hpp"
+
+
+namespace {
+
+
+template <typename T>
+class BatchDense : public ::testing::Test {
+protected:
+    using value_type = T;
+    using DenseMtx = gko::matrix::Dense<value_type>;
+    using size_type = gko::size_type;
+    BatchDense()
+        : exec(gko::ReferenceExecutor::create()),
+          mtx(gko::batch_initialize<gko::matrix::BatchDense<value_type>>(
+              std::vector<size_type>{4, 3},
+              {{{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
+               {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}},
+              exec))
+    {}
+
+
+    static void assert_equal_to_original_mtx(
+        gko::matrix::BatchDense<value_type>* m)
+    {
+        ASSERT_EQ(m->get_num_batch_entries(), 2);
+        ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3));
+        ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3));
+        ASSERT_EQ(m->get_stride().at(0), 4);
+        ASSERT_EQ(m->get_stride().at(1), 3);
+        ASSERT_EQ(m->get_num_stored_elements(), (2 * 4) + (2 * 3));
+        ASSERT_EQ(m->get_num_stored_elements(0), 2 * 4);
+        ASSERT_EQ(m->get_num_stored_elements(1), 2 * 3);
+        EXPECT_EQ(m->at(0, 0, 0), value_type{-1.0});
+        EXPECT_EQ(m->at(0, 0, 1), value_type{2.0});
+        EXPECT_EQ(m->at(0, 0, 2), value_type{3.0});
+        EXPECT_EQ(m->at(0, 1, 0), value_type{-1.5});
+        EXPECT_EQ(m->at(0, 1, 1), value_type{2.5});
+        ASSERT_EQ(m->at(0, 1, 2), value_type{3.5});
+        EXPECT_EQ(m->at(1, 0, 0), value_type{1.0});
+        EXPECT_EQ(m->at(1, 0, 1), value_type{2.5});
+        EXPECT_EQ(m->at(1, 0, 2), value_type{3.0});
+        EXPECT_EQ(m->at(1, 1, 0), value_type{1.0});
+        EXPECT_EQ(m->at(1, 1, 1), value_type{2.0});
+        ASSERT_EQ(m->at(1, 1, 2), value_type{3.0});
+    }
+
+    static void assert_empty(gko::matrix::BatchDense<value_type>* m)
+    {
+        ASSERT_EQ(m->get_num_batch_entries(), 0);
+        ASSERT_EQ(m->get_num_stored_elements(), 0);
+    }
+
+    std::shared_ptr<const gko::Executor> exec;
+    std::unique_ptr<gko::matrix::BatchDense<value_type>> mtx;
+};
+
+TYPED_TEST_SUITE(BatchDense, gko::test::ValueTypes);
+
+
+TYPED_TEST(BatchDense, CanBeEmpty)
+{
+    auto empty = gko::matrix::BatchDense<TypeParam>::create(this->exec);
+    this->assert_empty(empty.get());
+}
+
+
+TYPED_TEST(BatchDense, ReturnsNullValuesArrayWhenEmpty)
+{
+    auto empty = gko::matrix::BatchDense<TypeParam>::create(this->exec);
+    ASSERT_EQ(empty->get_const_values(), nullptr);
+}
+
+
+TYPED_TEST(BatchDense, CanBeConstructedWithSize)
+{
+    using size_type = gko::size_type;
+    auto m = gko::matrix::BatchDense<TypeParam>::create(
+        this->exec,
+        std::vector<gko::dim<2>>{gko::dim<2>{2, 4}, gko::dim<2>{2, 3}});
+
+    ASSERT_EQ(m->get_num_batch_entries(), 2);
+    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 4));
+    ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 3));
+    EXPECT_EQ(m->get_stride().at(0), 4);
+    EXPECT_EQ(m->get_stride().at(1), 3);
+    ASSERT_EQ(m->get_num_stored_elements(), 14);
+    ASSERT_EQ(m->get_num_stored_elements(0), 8);
+    ASSERT_EQ(m->get_num_stored_elements(1), 6);
+}
+
+
+TYPED_TEST(BatchDense, CanBeConstructedWithSizeAndStride)
+{
+    using size_type = gko::size_type;
+    auto m = gko::matrix::BatchDense<TypeParam>::create(
+        this->exec, std::vector<gko::dim<2>>{gko::dim<2>{2, 3}},
+        std::vector<size_type>{4});
+
+    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3));
+    EXPECT_EQ(m->get_stride().at(0), 4);
+    ASSERT_EQ(m->get_num_stored_elements(), 8);
+}
+
+
+TYPED_TEST(BatchDense, CanBeConstructedFromExistingData)
+{
+    using value_type = typename TestFixture::value_type;
+    using size_type = gko::size_type;
+    // clang-format off
+    value_type data[] = {
+       1.0, 2.0, -1.0,
+       3.0, 4.0, -1.0,
+       3.0, 5.0, 1.0,
+       5.0, 6.0, -3.0};
+    // clang-format on
+
+    auto m = gko::matrix::BatchDense<TypeParam>::create(
+        this->exec,
+        std::vector<gko::dim<2>>{gko::dim<2>{2, 2}, gko::dim<2>{2, 2}},
+        gko::array<value_type>::view(this->exec, 12, data),
+        std::vector<size_type>{3, 3});
+
+    ASSERT_EQ(m->get_const_values(), data);
+    ASSERT_EQ(m->at(0, 0, 1), value_type{2.0});
+    ASSERT_EQ(m->at(0, 1, 2), value_type{-1.0});
+    ASSERT_EQ(m->at(1, 0, 1), value_type{5.0});
+    ASSERT_EQ(m->at(1, 1, 2), value_type{-3.0});
+}
+
+
+TYPED_TEST(BatchDense, CanBeConstructedFromExistingConstData)
+{
+    using value_type = typename TestFixture::value_type;
+    using size_type = gko::size_type;
+    // clang-format off
+    const value_type data[] = {
+       1.0, 2.0, -1.0,
+       3.0, 4.0, -1.0,
+       3.0, 5.0, 1.0,
+       5.0, 6.0, -3.0};
+    // clang-format on
+
+    auto m = gko::matrix::BatchDense<TypeParam>::create_const(
+        this->exec,
+        std::vector<gko::dim<2>>{gko::dim<2>{2, 2}, gko::dim<2>{2, 2}},
+        gko::array<value_type>::const_view(this->exec, 12, data),
+        std::vector<size_type>{3, 3});
+
+    ASSERT_EQ(m->get_const_values(), data);
+    ASSERT_EQ(m->at(0, 0, 1), value_type{2.0});
+    ASSERT_EQ(m->at(0, 1, 2), value_type{-1.0});
+    ASSERT_EQ(m->at(1, 0, 1), value_type{5.0});
+    ASSERT_EQ(m->at(1, 1, 2), value_type{-3.0});
+}
+
+
+TYPED_TEST(BatchDense, CanBeConstructedFromBatchDenseMatrices)
+{
+    using value_type = typename TestFixture::value_type;
+    using DenseMtx = typename TestFixture::DenseMtx;
+    using size_type = gko::size_type;
+    auto mat1 = gko::initialize<DenseMtx>(
+        3, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, this->exec);
+    auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
+                                          this->exec);
+
+    auto m = gko::matrix::BatchDense<TypeParam>::create(
+        this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get()});
+    auto m_ref = gko::matrix::BatchDense<TypeParam>::create(
+        this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get(), mat1.get(),
+                                           mat2.get(), mat1.get(), mat2.get()});
+    auto m2 =
+        gko::matrix::BatchDense<TypeParam>::create(this->exec, 3, m.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 1e-14);
+}
+
+
+TYPED_TEST(BatchDense, CanBeConstructedFromDenseMatricesByDuplication)
+{
+    using value_type = typename TestFixture::value_type;
+    using DenseMtx = typename TestFixture::DenseMtx;
+    using size_type = gko::size_type;
+    auto mat1 = gko::initialize<DenseMtx>(
+        4, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, this->exec);
+    auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
+                                          this->exec);
+
+    auto bat_m = gko::matrix::BatchDense<TypeParam>::create(
+        this->exec, std::vector<DenseMtx*>{mat1.get(), mat1.get(), mat1.get()});
+    auto m =
+        gko::matrix::BatchDense<TypeParam>::create(this->exec, 3, mat1.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(bat_m.get(), m.get(), 1e-14);
+}
+
+
+TYPED_TEST(BatchDense, CanBeConstructedFromDenseMatrices)
+{
+    using value_type = typename TestFixture::value_type;
+    using DenseMtx = typename TestFixture::DenseMtx;
+    using size_type = gko::size_type;
+    auto mat1 = gko::initialize<DenseMtx>(
+        4, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, this->exec);
+    auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
+                                          this->exec);
+
+    auto m = gko::matrix::BatchDense<TypeParam>::create(
+        this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get()});
+
+    this->assert_equal_to_original_mtx(m.get());
+}
+
+
+TYPED_TEST(BatchDense, CanBeUnbatchedIntoDenseMatrices)
+{
+    using value_type = typename TestFixture::value_type;
+    using DenseMtx = typename TestFixture::DenseMtx;
+    using size_type = gko::size_type;
+    auto mat1 = gko::initialize<DenseMtx>(
+        4, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, this->exec);
+    auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
+                                          this->exec);
+
+    auto dense_mats = this->mtx->unbatch();
+
+
+    GKO_ASSERT_MTX_NEAR(dense_mats[0].get(), mat1.get(), 0.);
+    GKO_ASSERT_MTX_NEAR(dense_mats[1].get(), mat2.get(), 0.);
+}
+
+
+TYPED_TEST(BatchDense, KnowsItsSizeAndValues)
+{
+    this->assert_equal_to_original_mtx(this->mtx.get());
+}
+
+
+TYPED_TEST(BatchDense, CanBeListConstructed)
+{
+    using value_type = typename TestFixture::value_type;
+    auto m = gko::batch_initialize<gko::matrix::BatchDense<TypeParam>>(
+        {{1.0, 2.0}, {1.0, 3.0}}, this->exec);
+
+    ASSERT_EQ(m->get_num_batch_entries(), 2);
+    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 1));
+    ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 1));
+    ASSERT_EQ(m->get_num_stored_elements(), 4);
+    EXPECT_EQ(m->at(0, 0), value_type{1});
+    EXPECT_EQ(m->at(0, 1), value_type{2});
+    EXPECT_EQ(m->at(1, 0), value_type{1});
+    EXPECT_EQ(m->at(1, 1), value_type{3});
+}
+
+
+TYPED_TEST(BatchDense, CanBeListConstructedWithstride)
+{
+    using value_type = typename TestFixture::value_type;
+    auto m = gko::batch_initialize<gko::matrix::BatchDense<TypeParam>>(
+        std::vector<gko::size_type>{2}, {{1.0, 2.0}}, this->exec);
+    ASSERT_EQ(m->get_num_batch_entries(), 1);
+    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 1));
+    ASSERT_EQ(m->get_num_stored_elements(), 4);
+    EXPECT_EQ(m->at(0, 0), value_type{1.0});
+    EXPECT_EQ(m->at(0, 1), value_type{2.0});
+}
+
+
+TYPED_TEST(BatchDense, CanBeListConstructedByCopies)
+{
+    using value_type = typename TestFixture::value_type;
+    auto m = gko::batch_initialize<gko::matrix::BatchDense<TypeParam>>(
+        2, I<value_type>({1.0, 2.0}), this->exec);
+    ASSERT_EQ(m->get_num_batch_entries(), 2);
+    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 1));
+    ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 1));
+    ASSERT_EQ(m->get_num_stored_elements(), 4);
+    EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
+    EXPECT_EQ(m->at(0, 0, 1), value_type{2.0});
+    EXPECT_EQ(m->at(1, 0, 0), value_type{1.0});
+    EXPECT_EQ(m->at(1, 0, 1), value_type{2.0});
+}
+
+
+TYPED_TEST(BatchDense, CanBeDoubleListConstructed)
+{
+    using value_type = typename TestFixture::value_type;
+    using T = value_type;
+    auto m = gko::batch_initialize<gko::matrix::BatchDense<TypeParam>>(
+        {{I<T>{1.0, 1.0, 0.0}, I<T>{2.0, 4.0, 3.0}, I<T>{3.0, 6.0, 1.0}},
+         {I<T>{1.0, 2.0}, I<T>{3.0, 4.0}, I<T>{5.0, 6.0}}},
+        this->exec);
+
+    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(3, 3));
+    ASSERT_EQ(m->get_size().at(1), gko::dim<2>(3, 2));
+    ASSERT_EQ(m->get_stride().at(0), 3);
+    ASSERT_EQ(m->get_stride().at(1), 2);
+    EXPECT_EQ(m->get_num_stored_elements(), 15);
+    ASSERT_EQ(m->get_num_stored_elements(0), 9);
+    ASSERT_EQ(m->get_num_stored_elements(1), 6);
+    EXPECT_EQ(m->at(0, 0), value_type{1.0});
+    EXPECT_EQ(m->at(0, 1), value_type{1.0});
+    EXPECT_EQ(m->at(0, 2), value_type{0.0});
+    ASSERT_EQ(m->at(0, 3), value_type{2.0});
+    EXPECT_EQ(m->at(0, 4), value_type{4.0});
+    EXPECT_EQ(m->at(1, 0), value_type{1.0});
+    EXPECT_EQ(m->at(1, 1), value_type{2.0});
+    EXPECT_EQ(m->at(1, 2), value_type{3.0});
+    ASSERT_EQ(m->at(1, 3), value_type{4.0});
+    EXPECT_EQ(m->at(1, 4), value_type{5.0});
+}
+
+
+TYPED_TEST(BatchDense, CanBeDoubleListConstructedWithstride)
+{
+    using value_type = typename TestFixture::value_type;
+    using T = value_type;
+    auto m = gko::batch_initialize<gko::matrix::BatchDense<TypeParam>>(
+        {4, 3},
+        {{I<T>{1.0, 1.0, 0.0}, I<T>{2.0, 4.0, 3.0}, I<T>{3.0, 6.0, 1.0}},
+         {I<T>{1.0, 2.0}, I<T>{3.0, 4.0}, I<T>{5.0, 6.0}}},
+        this->exec);
+
+    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(3, 3));
+    ASSERT_EQ(m->get_size().at(1), gko::dim<2>(3, 2));
+    ASSERT_EQ(m->get_stride().at(0), 4);
+    ASSERT_EQ(m->get_stride().at(1), 3);
+    EXPECT_EQ(m->get_num_stored_elements(), 21);
+    ASSERT_EQ(m->get_num_stored_elements(0), 12);
+    ASSERT_EQ(m->get_num_stored_elements(1), 9);
+    EXPECT_EQ(m->at(0, 0), value_type{1.0});
+    EXPECT_EQ(m->at(0, 1), value_type{1.0});
+    EXPECT_EQ(m->at(0, 2), value_type{0.0});
+    ASSERT_EQ(m->at(0, 3), value_type{2.0});
+    EXPECT_EQ(m->at(0, 4), value_type{4.0});
+    EXPECT_EQ(m->at(1, 0), value_type{1.0});
+    EXPECT_EQ(m->at(1, 1), value_type{2.0});
+    EXPECT_EQ(m->at(1, 2), value_type{3.0});
+    ASSERT_EQ(m->at(1, 3), value_type{4.0});
+    EXPECT_EQ(m->at(1, 4), value_type{5.0});
+}
+
+
+TYPED_TEST(BatchDense, CanBeCopied)
+{
+    auto mtx_copy = gko::matrix::BatchDense<TypeParam>::create(this->exec);
+    mtx_copy->copy_from(this->mtx.get());
+    this->assert_equal_to_original_mtx(this->mtx.get());
+    this->mtx->at(0, 0, 0) = 7;
+    this->mtx->at(0, 1) = 7;
+    this->assert_equal_to_original_mtx(mtx_copy.get());
+}
+
+
+TYPED_TEST(BatchDense, CanBeMoved)
+{
+    auto mtx_copy = gko::matrix::BatchDense<TypeParam>::create(this->exec);
+    mtx_copy->copy_from(std::move(this->mtx));
+    this->assert_equal_to_original_mtx(mtx_copy.get());
+}
+
+
+TYPED_TEST(BatchDense, CanBeCloned)
+{
+    auto mtx_clone = this->mtx->clone();
+    this->assert_equal_to_original_mtx(
+        dynamic_cast<decltype(this->mtx.get())>(mtx_clone.get()));
+}
+
+
+TYPED_TEST(BatchDense, CanBeCleared)
+{
+    this->mtx->clear();
+    this->assert_empty(this->mtx.get());
+}
+
+
+TYPED_TEST(BatchDense, CanBeReadFromMatrixData)
+{
+    using value_type = typename TestFixture::value_type;
+    auto m = gko::matrix::BatchDense<TypeParam>::create(this->exec);
+    // clang-format off
+    m->read({gko::matrix_data<TypeParam>{{2, 3},
+                                         {{0, 0, 1.0},
+                                          {0, 1, 3.0},
+                                          {0, 2, 2.0},
+                                          {1, 0, 0.0},
+                                          {1, 1, 5.0},
+                                          {1, 2, 0.0}}},
+             gko::matrix_data<TypeParam>{{2, 2},
+                                         {{0, 0, -1.0},
+                                          {0, 1, 0.5},
+                                          {1, 0, 0.0},
+                                          {1, 1, 9.0}}}});
+    // clang-format on
+
+    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3));
+    ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 2));
+    ASSERT_EQ(m->get_num_stored_elements(), 10);
+    ASSERT_EQ(m->get_num_stored_elements(0), 6);
+    ASSERT_EQ(m->get_num_stored_elements(1), 4);
+    EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
+    EXPECT_EQ(m->at(0, 1, 0), value_type{0.0});
+    EXPECT_EQ(m->at(0, 0, 1), value_type{3.0});
+    EXPECT_EQ(m->at(0, 1, 1), value_type{5.0});
+    EXPECT_EQ(m->at(0, 0, 2), value_type{2.0});
+    EXPECT_EQ(m->at(0, 1, 2), value_type{0.0});
+    EXPECT_EQ(m->at(1, 0, 0), value_type{-1.0});
+    EXPECT_EQ(m->at(1, 0, 1), value_type{0.5});
+    EXPECT_EQ(m->at(1, 1, 0), value_type{0.0});
+    EXPECT_EQ(m->at(1, 1, 1), value_type{9.0});
+}
+
+
+TYPED_TEST(BatchDense, GeneratesCorrectMatrixData)
+{
+    using value_type = typename TestFixture::value_type;
+    using tpl = typename gko::matrix_data<TypeParam>::nonzero_type;
+    std::vector<gko::matrix_data<TypeParam>> data;
+
+    this->mtx->write(data);
+
+    ASSERT_EQ(data[0].size, gko::dim<2>(2, 3));
+    ASSERT_EQ(data[0].nonzeros.size(), 6);
+    EXPECT_EQ(data[0].nonzeros[0], tpl(0, 0, value_type{-1.0}));
+    EXPECT_EQ(data[0].nonzeros[1], tpl(0, 1, value_type{2.0}));
+    EXPECT_EQ(data[0].nonzeros[2], tpl(0, 2, value_type{3.0}));
+    EXPECT_EQ(data[0].nonzeros[3], tpl(1, 0, value_type{-1.5}));
+    EXPECT_EQ(data[0].nonzeros[4], tpl(1, 1, value_type{2.5}));
+    EXPECT_EQ(data[0].nonzeros[5], tpl(1, 2, value_type{3.5}));
+    ASSERT_EQ(data[1].size, gko::dim<2>(2, 3));
+    ASSERT_EQ(data[1].nonzeros.size(), 6);
+    EXPECT_EQ(data[1].nonzeros[0], tpl(0, 0, value_type{1.0}));
+    EXPECT_EQ(data[1].nonzeros[1], tpl(0, 1, value_type{2.5}));
+    EXPECT_EQ(data[1].nonzeros[2], tpl(0, 2, value_type{3.0}));
+    EXPECT_EQ(data[1].nonzeros[3], tpl(1, 0, value_type{1.0}));
+    EXPECT_EQ(data[1].nonzeros[4], tpl(1, 1, value_type{2.0}));
+    EXPECT_EQ(data[1].nonzeros[5], tpl(1, 2, value_type{3.0}));
+}
+
+
+TYPED_TEST(BatchDense, CanBeReadFromMatrixAssemblyData)
+{
+    using value_type = typename TestFixture::value_type;
+    auto m = gko::matrix::BatchDense<TypeParam>::create(this->exec);
+    gko::matrix_assembly_data<TypeParam> data1(gko::dim<2>{2, 3});
+    data1.set_value(0, 0, 1.0);
+    data1.set_value(0, 1, 3.0);
+    data1.set_value(0, 2, 2.0);
+    data1.set_value(1, 0, 0.0);
+    data1.set_value(1, 1, 5.0);
+    data1.set_value(1, 2, 0.0);
+    gko::matrix_assembly_data<TypeParam> data2(gko::dim<2>{2, 1});
+    data2.set_value(0, 0, 2.0);
+    data2.set_value(1, 0, 5.0);
+    auto data = std::vector<gko::matrix_assembly_data<TypeParam>>{data1, data2};
+
+    m->read(data);
+
+    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3));
+    ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 1));
+    ASSERT_EQ(m->get_num_stored_elements(), 8);
+    ASSERT_EQ(m->get_num_stored_elements(0), 6);
+    ASSERT_EQ(m->get_num_stored_elements(1), 2);
+    EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
+    EXPECT_EQ(m->at(0, 1, 0), value_type{0.0});
+    EXPECT_EQ(m->at(0, 0, 1), value_type{3.0});
+    EXPECT_EQ(m->at(0, 1, 1), value_type{5.0});
+    EXPECT_EQ(m->at(0, 0, 2), value_type{2.0});
+    ASSERT_EQ(m->at(0, 1, 2), value_type{0.0});
+    EXPECT_EQ(m->at(1, 0, 0), value_type{2.0});
+    EXPECT_EQ(m->at(1, 1, 0), value_type{5.0});
+}
+
+
+}  // namespace
diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt
index 4c972d2a584..dfa1b2177ee 100644
--- a/cuda/CMakeLists.txt
+++ b/cuda/CMakeLists.txt
@@ -38,6 +38,7 @@ target_sources(ginkgo_cuda
     factorization/par_ilut_select_kernel.cu
     factorization/par_ilut_spgeam_kernel.cu
     factorization/par_ilut_sweep_kernel.cu
+    matrix/batch_dense_kernels.cu
     matrix/coo_kernels.cu
     ${CSR_INSTANTIATE}
     matrix/dense_kernels.cu
diff --git a/cuda/matrix/batch_dense_kernels.cu b/cuda/matrix/batch_dense_kernels.cu
new file mode 100644
index 00000000000..5e53a410bf0
--- /dev/null
+++ b/cuda/matrix/batch_dense_kernels.cu
@@ -0,0 +1,90 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/batch_dense_kernels.hpp"
+
+
+#include <ginkgo/core/base/math.hpp>
+
+
+#include "core/matrix/batch_struct.hpp"
+#include "cuda/base/config.hpp"
+#include "cuda/base/cublas_bindings.hpp"
+#include "cuda/base/pointer_mode_guard.hpp"
+#include "cuda/components/cooperative_groups.cuh"
+#include "cuda/components/reduction.cuh"
+#include "cuda/components/thread_ids.cuh"
+#include "cuda/components/uninitialized_array.hpp"
+#include "cuda/matrix/batch_struct.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace cuda {
+/**
+ * @brief The BatchDense matrix format namespace.
+ *
+ * @ingroup batch_dense
+ */
+namespace batch_dense {
+
+
+constexpr auto default_block_size = 256;
+constexpr int sm_multiplier = 4;
+
+
+template <typename ValueType>
+void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
+                  const batch::matrix::BatchDense<ValueType>* mat,
+                  const batch::MultiVector<ValueType>* b,
+                  MultiVector<ValueType>* x) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL);
+
+
+template <typename ValueType>
+void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
+                    const batch::MultiVector<ValueType>* alpha,
+                    const batch::matrix::BatchDense<ValueType>* a,
+                    const batch::MultiVector<ValueType>* b,
+                    const batch::MultiVector<ValueType>* beta,
+                    batch::MultiVector<ValueType>* c) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL);
+
+
+}  // namespace batch_dense
+}  // namespace cuda
+}  // namespace kernels
+}  // namespace gko
diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt
index dd0d7c4cdfb..4099bb603a3 100644
--- a/dpcpp/CMakeLists.txt
+++ b/dpcpp/CMakeLists.txt
@@ -35,6 +35,7 @@ target_sources(ginkgo_dpcpp
     factorization/par_ilut_select_kernel.dp.cpp
     factorization/par_ilut_spgeam_kernel.dp.cpp
     factorization/par_ilut_sweep_kernel.dp.cpp
+    matrix/batch_dense_kernels.dp.cpp
     matrix/coo_kernels.dp.cpp
     matrix/csr_kernels.dp.cpp
     matrix/fbcsr_kernels.dp.cpp
diff --git a/dpcpp/matrix/batch_dense_kernels.dp.cpp b/dpcpp/matrix/batch_dense_kernels.dp.cpp
new file mode 100644
index 00000000000..100dbf7e670
--- /dev/null
+++ b/dpcpp/matrix/batch_dense_kernels.dp.cpp
@@ -0,0 +1,83 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/batch_dense_kernels.hpp"
+
+
+#include <algorithm>
+
+
+#include <CL/sycl.hpp>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/range_accessors.hpp>
+
+
+namespace gko {
+namespace kernels {
+namespace dpcpp {
+/**
+ * @brief The BatchDense matrix format namespace.
+ *
+ * @ingroup batch_dense
+ */
+namespace batch_dense {
+
+
+template <typename ValueType>
+void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
+                  const batch::matrix::BatchDense<ValueType>* a,
+                  const batch::MultiVector<ValueType>* b,
+                  MultiVector<ValueType>* c) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL);
+
+
+template <typename ValueType>
+void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
+                    const batch::MultiVector<ValueType>* alpha,
+                    const batch::matrix::BatchDense<ValueType>* a,
+                    const batch::MultiVector<ValueType>* b,
+                    const batch::MultiVector<ValueType>* beta,
+                    batch::MultiVector<ValueType>* c) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL);
+
+
+}  // namespace batch_dense
+}  // namespace dpcpp
+}  // namespace kernels
+}  // namespace gko
diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt
index 779db13d36a..21b573b6cd0 100644
--- a/hip/CMakeLists.txt
+++ b/hip/CMakeLists.txt
@@ -35,6 +35,7 @@ set(GINKGO_HIP_SOURCES
     factorization/par_ilut_select_kernel.hip.cpp
     factorization/par_ilut_spgeam_kernel.hip.cpp
     factorization/par_ilut_sweep_kernel.hip.cpp
+    matrix/batch_dense_kernels.hip.cpp
     matrix/coo_kernels.hip.cpp
     ${CSR_INSTANTIATE}
     matrix/dense_kernels.hip.cpp
diff --git a/hip/matrix/batch_dense_kernels.hip.cpp b/hip/matrix/batch_dense_kernels.hip.cpp
new file mode 100644
index 00000000000..640f9c67b6a
--- /dev/null
+++ b/hip/matrix/batch_dense_kernels.hip.cpp
@@ -0,0 +1,94 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/batch_dense_kernels.hpp"
+
+
+#include <hip/hip_runtime.h>
+
+
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/range_accessors.hpp>
+
+
+#include "core/matrix/batch_struct.hpp"
+#include "hip/base/config.hip.hpp"
+#include "hip/base/hipblas_bindings.hip.hpp"
+#include "hip/base/pointer_mode_guard.hip.hpp"
+#include "hip/components/cooperative_groups.hip.hpp"
+#include "hip/components/reduction.hip.hpp"
+#include "hip/components/thread_ids.hip.hpp"
+#include "hip/components/uninitialized_array.hip.hpp"
+#include "hip/matrix/batch_struct.hip.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace hip {
+/**
+ * @brief The BatchDense matrix format namespace.
+ *
+ * @ingroup batch_dense
+ */
+namespace batch_dense {
+
+
+constexpr auto default_block_size = 256;
+constexpr int sm_multiplier = 4;
+
+
+template <typename ValueType>
+void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
+                  const batch::matrix::BatchDense<ValueType>* mat,
+                  const batch::MultiVector<ValueType>* b,
+                  MultiVector<ValueType>* x) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL);
+
+
+template <typename ValueType>
+void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
+                    const batch::MultiVector<ValueType>* alpha,
+                    const batch::matrix::BatchDense<ValueType>* a,
+                    const batch::MultiVector<ValueType>* b,
+                    const batch::MultiVector<ValueType>* beta,
+                    batch::MultiVector<ValueType>* c) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL);
+
+
+}  // namespace batch_dense
+}  // namespace hip
+}  // namespace kernels
+}  // namespace gko
diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp
new file mode 100644
index 00000000000..60023727c8a
--- /dev/null
+++ b/include/ginkgo/core/matrix/batch_dense.hpp
@@ -0,0 +1,341 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_PUBLIC_CORE_MATRIX_BATCH_DENSE_HPP_
+#define GKO_PUBLIC_CORE_MATRIX_BATCH_DENSE_HPP_
+
+
+#include <initializer_list>
+#include <vector>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/batch_lin_op.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/mtx_io.hpp>
+#include <ginkgo/core/base/range_accessors.hpp>
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/base/utils.hpp>
+#include <ginkgo/core/matrix/dense.hpp>
+
+
+namespace gko {
+namespace batch {
+namespace matrix {
+
+
+/**
+ * BatchDense is a batch matrix format which explicitly stores all values of the
+ * matrix in each of the batches.
+ *
+ * The values in each of the batches are stored in row-major format (values
+ * belonging to the same row appear consecutive in the memory). Optionally, rows
+ * can be padded for better memory access.
+ *
+ * @tparam ValueType  precision of matrix elements
+ *
+ * @note While this format is not very useful for storing sparse matrices, it
+ *       is often suitable to store vectors, and sets of vectors.
+ * @ingroup batch_dense
+ * @ingroup mat_formats
+ * @ingroup BatchLinOp
+ */
+template <typename ValueType = default_precision>
+class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
+                   public EnableCreateMethod<BatchDense<ValueType>>,
+                   public ConvertibleTo<BatchDense<next_precision<ValueType>>> {
+    friend class EnableCreateMethod<BatchDense>;
+    friend class EnablePolymorphicObject<BatchDense, BatchLinOp>;
+    friend class BatchDense<to_complex<ValueType>>;
+    friend class BatchDense<next_precision<ValueType>>;
+
+public:
+    using EnableBatchLinOp<BatchDense>::convert_to;
+    using EnableBatchLinOp<BatchDense>::move_to;
+
+    using value_type = ValueType;
+    using index_type = int32;
+    using transposed_type = BatchDense<ValueType>;
+    using unbatch_type = matrix::Dense<ValueType>;
+    using absolute_type = remove_complex<BatchDense>;
+    using complex_type = to_complex<BatchDense>;
+
+    /**
+     * Creates a BatchDense matrix with the configuration of another BatchDense
+     * matrix.
+     *
+     * @param other  The other matrix whose configuration needs to copied.
+     */
+    static std::unique_ptr<BatchDense> create_with_config_of(
+        const BatchDense* other)
+    {
+        // De-referencing `other` before calling the functions (instead of
+        // using operator `->`) is currently required to be compatible with
+        // CUDA 10.1.
+        // Otherwise, it results in a compile error.
+        return (*other).create_with_same_config();
+    }
+
+    void convert_to(
+        BatchDense<next_precision<ValueType>>* result) const override;
+
+    void move_to(BatchDense<next_precision<ValueType>>* result) override;
+
+
+    /**
+     * Creates a mutable view (of matrix::Dense type) of one item of the Batch
+     * MultiVector<value_type> object. Does not perform any deep copies, but
+     * only returns a view of the data.
+     *
+     * @param item_id  The index of the batch item
+     *
+     * @return  a matrix::Dense object with the data from the batch item at the
+     *          given index.
+     */
+    std::unique_ptr<unbatch_type> create_view_for_item(size_type item_id);
+
+    /**
+     * @copydoc create_view_for_item(size_type)
+     */
+    std::unique_ptr<const unbatch_type> create_const_view_for_item(
+        size_type item_id) const;
+
+    /**
+     * Returns the batch size.
+     *
+     * @return the batch size
+     */
+    batch_dim<2> get_size() const { return batch_size_; }
+
+    /**
+     * Returns the number of batch items.
+     *
+     * @return the number of batch items
+     */
+    size_type get_num_batch_items() const
+    {
+        return batch_size_.get_num_batch_items();
+    }
+
+    /**
+     * Returns the common size of the batch items.
+     *
+     * @return the common size stored
+     */
+    dim<2> get_common_size() const { return batch_size_.get_common_size(); }
+
+    /**
+     * Returns a pointer to the array of values of the multi-vector
+     *
+     * @return the pointer to the array of values
+     */
+    value_type* get_values() noexcept { return values_.get_data(); }
+
+    /**
+     * @copydoc get_values()
+     *
+     * @note This is the constant version of the function, which can be
+     *       significantly more memory efficient than the non-constant version,
+     *       so always prefer this version.
+     */
+    const value_type* get_const_values() const noexcept
+    {
+        return values_.get_const_data();
+    }
+
+    /**
+     * Returns a pointer to the array of values of the multi-vector for a
+     * specific batch item.
+     *
+     * @param batch_id  the id of the batch item.
+     *
+     * @return the pointer to the array of values
+     */
+    value_type* get_values_for_item(size_type batch_id) noexcept
+    {
+        GKO_ASSERT(batch_id < this->get_num_batch_items());
+        return values_.get_data() +
+               this->get_size().get_cumulative_offset(batch_id);
+    }
+
+    /**
+     * @copydoc get_values_for_item(size_type)
+     *
+     * @note This is the constant version of the function, which can be
+     *       significantly more memory efficient than the non-constant version,
+     *       so always prefer this version.
+     */
+    const value_type* get_const_values_for_item(size_type batch_id) const
+        noexcept
+    {
+        GKO_ASSERT(batch_id < this->get_num_batch_items());
+        return values_.get_const_data() +
+               this->get_size().get_cumulative_offset(batch_id);
+    }
+
+    /**
+     * Returns the number of elements explicitly stored in the batch matrix,
+     * cumulative across all the batch items.
+     *
+     * @return the number of elements explicitly stored in the vector,
+     *         cumulative across all the batch items
+     */
+    size_type get_num_stored_elements() const noexcept
+    {
+        return values_.get_num_elems();
+    }
+
+
+    /**
+     * Creates a constant (immutable) batch dense matrix from a constant
+     * array.
+     *
+     * @param exec  the executor to create the vector on
+     * @param size  the dimensions of the vector
+     * @param values  the value array of the vector
+     *
+     * @return A smart pointer to the constant multi-vector wrapping the input
+     * array (if it resides on the same executor as the vector) or a copy of the
+     * array on the correct executor.
+     */
+    static std::unique_ptr<const MultiVector<value_type><ValueType>>
+    create_const(std::shared_ptr<const Executor> exec,
+                 const batch_dim<2>& sizes,
+                 gko::detail::const_array_view<ValueType>&& values);
+
+private:
+    inline size_type compute_num_elems(const batch_dim<2>& size)
+    {
+        return size.get_cumulative_offset(size.get_num_batch_items());
+    }
+
+
+    void apply(const MultiVector<value_type>* b,
+               MultiVector<value_type>* x) const
+    {
+        this->apply_impl(b, x);
+    }
+
+    void apply(const MultiVector<value_type>* alpha,
+               const MultiVector<value_type>* b,
+               const MultiVector<value_type>* beta,
+               MultiVector<value_type>* x) const
+    {
+        this->apply_impl(alpha, b, beta, x);
+    }
+
+protected:
+    /**
+     * Sets the size of the MultiVector<value_type>.
+     *
+     * @param value  the new size of the operator
+     */
+    void set_size(const batch_dim<2>& value) noexcept;
+
+    /**
+     * Creates an uninitialized BatchDense matrix of the specified size.
+     *
+     * @param exec  Executor associated to the matrix
+     * @param size  size of the matrix
+     */
+    BatchDense(std::shared_ptr<const Executor> exec,
+               const batch_dim<2>& size = batch_dim<2>{});
+
+    /**
+     * Creates a BatchDense matrix from an already allocated (and initialized)
+     * array.
+     *
+     * @tparam ValuesArray  type of array of values
+     *
+     * @param exec  Executor associated to the matrix
+     * @param size  sizes of the batch matrices in a batch_dim object
+     * @param values  array of matrix values
+     * @param strides  stride of the rows (i.e. offset between the first
+     *                  elements of two consecutive rows, expressed as the
+     *                  number of matrix elements)
+     *
+     * @note If `values` is not an rvalue, not an array of ValueType, or is on
+     *       the wrong executor, an internal copy will be created, and the
+     *       original array data will not be used in the matrix.
+     */
+    template <typename ValuesArray>
+    BatchDense(std::shared_ptr<const Executor> exec, const batch_dim<2>& size,
+               ValuesArray&& values)
+        : EnableBatchLinOp<BatchDense>(exec, size),
+          values_{exec, std::forward<ValuesArray>(values)}
+    {
+        // Ensure that the values array has the correct size
+        auto num_elems = compute_num_elems(size);
+        GKO_ENSURE_IN_BOUNDS(num_elems, values_.get_num_elems() + 1);
+    }
+
+    /**
+     * Creates a BatchDense matrix with the same configuration as the callers
+     * matrix.
+     *
+     * @returns a BatchDense matrix with the same configuration as the caller.
+     */
+    std::unique_ptr<BatchDense> create_with_same_config() const;
+
+    virtual void apply_impl(const MultiVector<value_type>* b,
+                            MultiVector<value_type>* x) const;
+
+    virtual void apply_impl(const MultiVector<value_type>* alpha,
+                            const MultiVector<value_type>* b,
+                            const MultiVector<value_type>* beta,
+                            MultiVector<value_type>* x) const;
+
+    size_type linearize_index(size_type batch, size_type row,
+                              size_type col) const noexcept
+    {
+        return batch_size_.get_cumulative_offset(batch) +
+               row * batch_size_.get_common_size()[1] + col;
+    }
+
+    size_type linearize_index(size_type batch, size_type idx) const noexcept
+    {
+        return linearize_index(batch, idx / this->get_common_size()[1],
+                               idx % this->get_common_size()[1]);
+    }
+
+private:
+    batch_dim<2> batch_size_;
+    array<value_type> values_;
+};
+
+
+}  // namespace matrix
+}  // namespace batch
+}  // namespace gko
+
+
+#endif  // GKO_PUBLIC_CORE_MATRIX_BATCH_DENSE_HPP_
diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt
index 47259feeac0..d87399492f5 100644
--- a/omp/CMakeLists.txt
+++ b/omp/CMakeLists.txt
@@ -23,6 +23,7 @@ target_sources(ginkgo_omp
     factorization/par_ict_kernels.cpp
     factorization/par_ilu_kernels.cpp
     factorization/par_ilut_kernels.cpp
+    matrix/batch_dense_kernels.cpp
     matrix/coo_kernels.cpp
     matrix/csr_kernels.cpp
     matrix/dense_kernels.cpp
diff --git a/omp/matrix/batch_dense_kernels.cpp b/omp/matrix/batch_dense_kernels.cpp
new file mode 100644
index 00000000000..ea2e84a8e83
--- /dev/null
+++ b/omp/matrix/batch_dense_kernels.cpp
@@ -0,0 +1,129 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/batch_dense_kernels.hpp"
+
+
+#include <algorithm>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/math.hpp>
+
+
+#include "reference/matrix/batch_struct.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace omp {
+/**
+ * @brief The BatchDense matrix format namespace.
+ * @ref BatchDense
+ * @ingroup batch_dense
+ */
+namespace batch_dense {
+
+
+#include "reference/matrix/batch_dense_kernels.hpp.inc"
+
+
+template <typename ValueType>
+void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
+                  const batch::matrix::BatchDense<ValueType>* mat,
+                  const batch::MultiVector<ValueType>* b,
+                  MultiVector<ValueType>* x)
+{
+    const auto b_ub = host::get_batch_struct(b);
+    const auto x_ub = host::get_batch_struct(x);
+    const auto mat_ub = host::get_batch_struct(mat);
+#pragma omp parallel for
+    for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
+        const auto mat_item = batch::extract_batch_item(mat_ub, batch);
+        const auto b_item = batch::extract_batch_item(b_ub, batch);
+        const auto x_item = batch::extract_batch_item(x_ub, batch);
+        simple_apply_kernel(mat_item, b_item, x_item);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL);
+
+
+template <typename ValueType>
+void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
+                    const batch::MultiVector<ValueType>* alpha,
+                    const batch::matrix::BatchDense<ValueType>* a,
+                    const batch::MultiVector<ValueType>* b,
+                    const batch::MultiVector<ValueType>* beta,
+                    batch::MultiVector<ValueType>* c)
+{
+    const auto b_ub = host::get_batch_struct(b);
+    const auto x_ub = host::get_batch_struct(x);
+    const auto mat_ub = host::get_batch_struct(mat);
+    const auto alpha_ub = host::get_batch_struct(alpha);
+    const auto beta_ub = host::get_batch_struct(beta);
+    if (alpha->get_num_batch_items() > 1) {
+        GKO_ASSERT(alpha->get_num_batch_items() == x->get_num_batch_items());
+        GKO_ASSERT(beta->get_num_batch_items() == x->get_num_batch_items());
+#pragma omp parallel for
+        for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
+            const auto mat_item = batch::extract_batch_item(mat_ub, batch);
+            const auto b_item = batch::extract_batch_item(b_ub, batch);
+            const auto x_item = batch::extract_batch_item(x_ub, batch);
+            const auto alpha_item = batch::extract_batch_item(alpha_ub, batch);
+            const auto beta_item = batch::extract_batch_item(beta_ub, batch);
+            advanced_apply_kernel(alpha_item.values[0], mat_item, b_item,
+                                  beta_item.values[0], x_item);
+        }
+    } else {
+        const auto alpha_item = batch::extract_batch_item(alpha_ub, 0);
+        const auto beta_item = batch::extract_batch_item(beta_ub, 0);
+#pragma omp parallel for
+        for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
+            const auto mat_item = batch::extract_batch_item(mat_ub, batch);
+            const auto b_item = batch::extract_batch_item(b_ub, batch);
+            const auto x_item = batch::extract_batch_item(x_ub, batch);
+            advanced_apply_kernel(alpha_item.values[0], mat_item, b_item,
+                                  beta_item.values[0], x_item);
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL);
+
+
+}  // namespace batch_dense
+}  // namespace omp
+}  // namespace kernels
+}  // namespace gko
diff --git a/reference/CMakeLists.txt b/reference/CMakeLists.txt
index dd54e3fb52f..37498588ca7 100644
--- a/reference/CMakeLists.txt
+++ b/reference/CMakeLists.txt
@@ -25,6 +25,7 @@ target_sources(ginkgo_reference
     factorization/par_ict_kernels.cpp
     factorization/par_ilu_kernels.cpp
     factorization/par_ilut_kernels.cpp
+    matrix/batch_dense_kernels.cpp
     matrix/coo_kernels.cpp
     matrix/csr_kernels.cpp
     matrix/dense_kernels.cpp
diff --git a/reference/base/batch_struct.hpp b/reference/base/batch_struct.hpp
index ce7c7af5605..b30fa971ed7 100644
--- a/reference/base/batch_struct.hpp
+++ b/reference/base/batch_struct.hpp
@@ -87,6 +87,34 @@ inline batch::multi_vector::uniform_batch<ValueType> get_batch_struct(
 }
 
 
+/**
+ * Generates an immutable uniform batch struct from a batch of multi-vectors.
+ */
+template <typename ValueType>
+inline batch::matrix::batch_dense::uniform_batch<const ValueType>
+get_batch_struct(const batch::matrix::BatchDense<ValueType>* const op)
+{
+    return {op->get_const_values(), op->get_num_batch_items(),
+            static_cast<int>(op->get_common_size()[1]),
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[1])};
+}
+
+
+/**
+ * Generates a uniform batch struct from a batch of multi-vectors.
+ */
+template <typename ValueType>
+inline batch::matrix::batch_dense::uniform_batch<ValueType> get_batch_struct(
+    batch::matrix::BatchDense<ValueType>* const op)
+{
+    return {op->get_values(), op->get_num_batch_items(),
+            static_cast<int>(op->get_common_size()[1]),
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[1])};
+}
+
+
 }  // namespace host
 }  // namespace kernels
 }  // namespace gko
diff --git a/reference/matrix/batch_dense_kernels.cpp b/reference/matrix/batch_dense_kernels.cpp
new file mode 100644
index 00000000000..aa285a6b01b
--- /dev/null
+++ b/reference/matrix/batch_dense_kernels.cpp
@@ -0,0 +1,128 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/batch_dense_kernels.hpp"
+
+
+#include <algorithm>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/range_accessors.hpp>
+
+
+#include "core/matrix/batch_struct.hpp"
+#include "reference/matrix/batch_struct.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace reference {
+/**
+ * @brief The BatchDense matrix format namespace.
+ * @ref BatchDense
+ * @ingroup batch_dense
+ */
+namespace batch_dense {
+
+
+#include "reference/matrix/batch_dense_kernels.hpp.inc"
+
+
+template <typename ValueType>
+void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
+                  const batch::matrix::BatchDense<ValueType>* mat,
+                  const batch::MultiVector<ValueType>* b,
+                  MultiVector<ValueType>* x)
+{
+    const auto b_ub = host::get_batch_struct(b);
+    const auto x_ub = host::get_batch_struct(x);
+    const auto mat_ub = host::get_batch_struct(mat);
+    for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
+        const auto mat_item = batch::extract_batch_item(mat_ub, batch);
+        const auto b_item = batch::extract_batch_item(b_ub, batch);
+        const auto x_item = batch::extract_batch_item(x_ub, batch);
+        simple_apply_kernel(mat_item, b_item, x_item);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL);
+
+
+template <typename ValueType>
+void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
+                    const batch::MultiVector<ValueType>* alpha,
+                    const batch::matrix::BatchDense<ValueType>* a,
+                    const batch::MultiVector<ValueType>* b,
+                    const batch::MultiVector<ValueType>* beta,
+                    batch::MultiVector<ValueType>* c)
+{
+    const auto b_ub = host::get_batch_struct(b);
+    const auto x_ub = host::get_batch_struct(x);
+    const auto mat_ub = host::get_batch_struct(mat);
+    const auto alpha_ub = host::get_batch_struct(alpha);
+    const auto beta_ub = host::get_batch_struct(beta);
+    if (alpha->get_num_batch_items() > 1) {
+        GKO_ASSERT(alpha->get_num_batch_items() == x->get_num_batch_items());
+        GKO_ASSERT(beta->get_num_batch_items() == x->get_num_batch_items());
+        for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
+            const auto mat_item = batch::extract_batch_item(mat_ub, batch);
+            const auto b_item = batch::extract_batch_item(b_ub, batch);
+            const auto x_item = batch::extract_batch_item(x_ub, batch);
+            const auto alpha_item = batch::extract_batch_item(alpha_ub, batch);
+            const auto beta_item = batch::extract_batch_item(beta_ub, batch);
+            advanced_apply_kernel(alpha_item.values[0], mat_item, b_item,
+                                  beta_item.values[0], x_item);
+        }
+    } else {
+        const auto alpha_item = batch::extract_batch_item(alpha_ub, 0);
+        const auto beta_item = batch::extract_batch_item(beta_ub, 0);
+        for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
+            const auto mat_item = batch::extract_batch_item(mat_ub, batch);
+            const auto b_item = batch::extract_batch_item(b_ub, batch);
+            const auto x_item = batch::extract_batch_item(x_ub, batch);
+            advanced_apply_kernel(alpha_item.values[0], mat_item, b_item,
+                                  beta_item.values[0], x_item);
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL);
+
+
+}  // namespace batch_dense
+}  // namespace reference
+}  // namespace kernels
+}  // namespace gko
diff --git a/reference/matrix/batch_dense_kernels.hpp.inc b/reference/matrix/batch_dense_kernels.hpp.inc
new file mode 100644
index 00000000000..ae342982de5
--- /dev/null
+++ b/reference/matrix/batch_dense_kernels.hpp.inc
@@ -0,0 +1,88 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+template <typename ValueType>
+inline void simple_apply_kernel(
+    const gko::batch::batch_dense::batch_item<const ValueType>& a,
+    const gko::batch::batch_multi_vector::batch_item<const ValueType>& b,
+    const gko::batch::batch_multi_vector::batch_item<ValueType>& c)
+{
+    for (int row = 0; row < c.num_rows; ++row) {
+        for (int col = 0; col < c.num_rhs; ++col) {
+            c.values[row * c.stride + col] = gko::zero<ValueType>();
+        }
+    }
+
+    for (int row = 0; row < c.num_rows; ++row) {
+        for (int inner = 0; inner < a.num_rhs; ++inner) {
+            for (int col = 0; col < c.num_rhs; ++col) {
+                c.values[row * c.stride + col] +=
+                    a.values[row * a.stride + inner] *
+                    b.values[inner * b.stride + col];
+            }
+        }
+    }
+}
+
+
+template <typename ValueType>
+inline void advanced_apply_kernel(
+    const ValueType alpha,
+    const gko::batch::batch_dense::batch_item<const ValueType>& a,
+    const gko::batch::batch_multi_vector::batch_item<const ValueType>& b,
+    const ValueType beta,
+    const gko::batch::batch_multi_vector::batch_item<ValueType>& c)
+{
+    if (beta != gko::zero<ValueType>()) {
+        for (int row = 0; row < c.num_rows; ++row) {
+            for (int col = 0; col < c.num_rhs; ++col) {
+                c.values[row * c.stride + col] *= beta;
+            }
+        }
+    } else {
+        for (int row = 0; row < c.num_rows; ++row) {
+            for (int col = 0; col < c.num_rhs; ++col) {
+                c.values[row * c.stride + col] *= gko::zero<ValueType>();
+            }
+        }
+    }
+
+    for (int row = 0; row < c.num_rows; ++row) {
+        for (int inner = 0; inner < a.num_rhs; ++inner) {
+            for (int col = 0; col < c.num_rhs; ++col) {
+                c.values[row * c.stride + col] +=
+                    alpha * a.values[row * a.stride + inner] *
+                    b.values[inner * b.stride + col];
+            }
+        }
+    }
+}

From 4f8c875efa68d4f12641d10f397f4734b0216794 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Mon, 2 Oct 2023 19:18:05 +0200
Subject: [PATCH 343/583] add reference kernels WIP

---
 core/base/batch_multi_vector_kernels.hpp     |   1 -
 core/base/batch_struct.hpp                   |  76 -----------
 core/device_hooks/common_kernels.inc.cpp     |   1 +
 core/matrix/batch_dense.cpp                  |  10 +-
 core/matrix/batch_dense_kernels.hpp          |   5 +-
 core/matrix/batch_struct.hpp                 | 125 +++++++++++++++++++
 cuda/matrix/batch_dense_kernels.cu           |   3 +-
 dpcpp/matrix/batch_dense_kernels.dp.cpp      |   2 +-
 hip/matrix/batch_dense_kernels.hip.cpp       |   2 +-
 include/ginkgo/core/matrix/batch_dense.hpp   |  10 +-
 omp/matrix/batch_dense_kernels.cpp           |   4 +-
 reference/base/batch_struct.hpp              |  28 -----
 reference/matrix/batch_dense_kernels.cpp     |  16 ++-
 reference/matrix/batch_dense_kernels.hpp.inc |  12 +-
 reference/matrix/batch_struct.hpp            |  95 ++++++++++++++
 15 files changed, 259 insertions(+), 131 deletions(-)
 create mode 100644 core/matrix/batch_struct.hpp
 create mode 100644 reference/matrix/batch_struct.hpp

diff --git a/core/base/batch_multi_vector_kernels.hpp b/core/base/batch_multi_vector_kernels.hpp
index 8603a2b9055..5a39567f470 100644
--- a/core/base/batch_multi_vector_kernels.hpp
+++ b/core/base/batch_multi_vector_kernels.hpp
@@ -39,7 +39,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/base/types.hpp>
-#include <ginkgo/core/matrix/diagonal.hpp>
 
 
 #include "core/base/kernel_declaration.hpp"
diff --git a/core/base/batch_struct.hpp b/core/base/batch_struct.hpp
index 21bd5b0e8ea..caca4577cf7 100644
--- a/core/base/batch_struct.hpp
+++ b/core/base/batch_struct.hpp
@@ -81,46 +81,6 @@ struct uniform_batch {
 }  // namespace multi_vector
 
 
-namespace batch_dense {
-
-
-/**
- * Encapsulates one matrix from a batch of multi-vectors.
- */
-template <typename ValueType>
-struct batch_item {
-    using value_type = ValueType;
-    ValueType* values;
-    int stride;
-    int num_rows;
-    int num_rhs;
-};
-
-
-/**
- * A 'simple' structure to store a global uniform batch of multi-vectors.
- */
-template <typename ValueType>
-struct uniform_batch {
-    using value_type = ValueType;
-    using entry_type = batch_item<ValueType>;
-
-    ValueType* values;
-    size_type num_batch_items;
-    int stride;
-    int num_rows;
-    int num_rhs;
-
-    size_type get_entry_storage() const
-    {
-        return num_rows * stride * sizeof(value_type);
-    }
-};
-
-
-}  // namespace batch_dense
-
-
 template <typename ValueType>
 GKO_ATTRIBUTES GKO_INLINE multi_vector::batch_item<const ValueType> to_const(
     const multi_vector::batch_item<ValueType>& b)
@@ -137,22 +97,6 @@ GKO_ATTRIBUTES GKO_INLINE multi_vector::uniform_batch<const ValueType> to_const(
 }
 
 
-template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE matrix::batch_dense::batch_item<const ValueType>
-to_const(const matrix::batch_dense::batch_item<ValueType>& b)
-{
-    return {b.values, b.stride, b.num_rows, b.num_rhs};
-}
-
-
-template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE matrix::batch_dense::uniform_batch<const ValueType>
-to_const(const matrix::batch_dense::uniform_batch<ValueType>& ub)
-{
-    return {ub.values, ub.num_batch_items, ub.stride, ub.num_rows, ub.num_rhs};
-}
-
-
 /**
  * Extract one object (matrix, vector etc.) from a batch of objects
  *
@@ -182,26 +126,6 @@ extract_batch_item(ValueType* const batch_values, const int stride,
 }
 
 
-template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE matrix::batch_dense::batch_item<ValueType>
-extract_batch_item(const matrix::batch_dense::uniform_batch<ValueType>& batch,
-                   const size_type batch_idx)
-{
-    return {batch.values + batch_idx * batch.stride * batch.num_rows,
-            batch.stride, batch.num_rows, batch.num_rhs};
-}
-
-template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE matrix::batch_dense::batch_item<ValueType>
-extract_batch_item(ValueType* const batch_values, const int stride,
-                   const int num_rows, const int num_rhs,
-                   const size_type batch_idx)
-{
-    return {batch_values + batch_idx * stride * num_rows, stride, num_rows,
-            num_rhs};
-}
-
-
 }  // namespace batch
 }  // namespace gko
 
diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp
index c22f5cd968d..87cab3dcf0b 100644
--- a/core/device_hooks/common_kernels.inc.cpp
+++ b/core/device_hooks/common_kernels.inc.cpp
@@ -57,6 +57,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/factorization/par_ict_kernels.hpp"
 #include "core/factorization/par_ilu_kernels.hpp"
 #include "core/factorization/par_ilut_kernels.hpp"
+#include "core/matrix/batch_dense_kernels.hpp"
 #include "core/matrix/coo_kernels.hpp"
 #include "core/matrix/csr_kernels.hpp"
 #include "core/matrix/dense_kernels.hpp"
diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp
index e6dedcf11fd..803f7a51c50 100644
--- a/core/matrix/batch_dense.cpp
+++ b/core/matrix/batch_dense.cpp
@@ -43,6 +43,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/executor.hpp>
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/base/utils.hpp>
+#include <ginkgo/core/matrix/dense.hpp>
 
 
 #include "core/matrix/batch_dense_kernels.hpp"
@@ -66,7 +67,7 @@ namespace detail {
 
 template <typename ValueType>
 batch_dim<2> compute_batch_size(
-    const std::vector<matrix::Dense<ValueType>*>& matrices)
+    const std::vector<gko::matrix::Dense<ValueType>*>& matrices)
 {
     auto common_size = matrices[0]->get_size();
     for (size_type i = 1; i < matrices.size(); ++i) {
@@ -80,7 +81,7 @@ batch_dim<2> compute_batch_size(
 
 
 template <typename ValueType>
-std::unique_ptr<matrix::Dense<ValueType>>
+std::unique_ptr<gko::matrix::Dense<ValueType>>
 BatchDense<ValueType>::create_view_for_item(size_type item_id)
 {
     auto exec = this->get_executor();
@@ -96,7 +97,7 @@ BatchDense<ValueType>::create_view_for_item(size_type item_id)
 
 
 template <typename ValueType>
-std::unique_ptr<const matrix::Dense<ValueType>>
+std::unique_ptr<const gko::matrix::Dense<ValueType>>
 BatchDense<ValueType>::create_const_view_for_item(size_type item_id) const
 {
     auto exec = this->get_executor();
@@ -113,7 +114,8 @@ BatchDense<ValueType>::create_const_view_for_item(size_type item_id) const
 
 template <typename ValueType>
 std::unique_ptr<BatchDense<ValueType>>
-BatchDense<ValueType>::create_with_config_of(ptr_param<const MultiVector> other)
+BatchDense<ValueType>::create_with_config_of(
+    ptr_param<const BatchDense<ValueType>> other)
 {
     // De-referencing `other` before calling the functions (instead of
     // using operator `->`) is currently required to be compatible with
diff --git a/core/matrix/batch_dense_kernels.hpp b/core/matrix/batch_dense_kernels.hpp
index e801d7aa152..7f814e08b50 100644
--- a/core/matrix/batch_dense_kernels.hpp
+++ b/core/matrix/batch_dense_kernels.hpp
@@ -42,6 +42,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/types.hpp>
 
 
+#include "core/base/kernel_declaration.hpp"
+
+
 namespace gko {
 namespace kernels {
 
@@ -50,7 +53,7 @@ namespace kernels {
     void simple_apply(std::shared_ptr<const DefaultExecutor> exec, \
                       const batch::matrix::BatchDense<_type>* a,   \
                       const batch::MultiVector<_type>* b,          \
-                      MultiVector<_type>* c)
+                      batch::MultiVector<_type>* c)
 
 #define GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL(_type)         \
     void advanced_apply(std::shared_ptr<const DefaultExecutor> exec, \
diff --git a/core/matrix/batch_struct.hpp b/core/matrix/batch_struct.hpp
new file mode 100644
index 00000000000..b6926b0894d
--- /dev/null
+++ b/core/matrix/batch_struct.hpp
@@ -0,0 +1,125 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CORE_MATRIX_BATCH_STRUCT_HPP_
+#define GKO_CORE_MATRIX_BATCH_STRUCT_HPP_
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/matrix/batch_dense.hpp>
+
+
+namespace gko {
+namespace batch {
+namespace matrix {
+namespace batch_dense {
+
+
+/**
+ * Encapsulates one matrix from a batch of multi-vectors.
+ */
+template <typename ValueType>
+struct batch_item {
+    using value_type = ValueType;
+    ValueType* values;
+    int stride;
+    int num_rows;
+    int num_rhs;
+};
+
+
+/**
+ * A 'simple' structure to store a global uniform batch of multi-vectors.
+ */
+template <typename ValueType>
+struct uniform_batch {
+    using value_type = ValueType;
+    using entry_type = batch_item<ValueType>;
+
+    ValueType* values;
+    size_type num_batch_items;
+    int stride;
+    int num_rows;
+    int num_rhs;
+
+    size_type get_entry_storage() const
+    {
+        return num_rows * stride * sizeof(value_type);
+    }
+};
+
+
+}  // namespace batch_dense
+
+
+template <typename ValueType>
+GKO_ATTRIBUTES GKO_INLINE batch_dense::batch_item<const ValueType> to_const(
+    const batch_dense::batch_item<ValueType>& b)
+{
+    return {b.values, b.stride, b.num_rows, b.num_rhs};
+}
+
+
+template <typename ValueType>
+GKO_ATTRIBUTES GKO_INLINE batch_dense::uniform_batch<const ValueType> to_const(
+    const batch_dense::uniform_batch<ValueType>& ub)
+{
+    return {ub.values, ub.num_batch_items, ub.stride, ub.num_rows, ub.num_rhs};
+}
+
+
+template <typename ValueType>
+GKO_ATTRIBUTES GKO_INLINE batch_dense::batch_item<ValueType> extract_batch_item(
+    const batch_dense::uniform_batch<ValueType>& batch,
+    const size_type batch_idx)
+{
+    return {batch.values + batch_idx * batch.stride * batch.num_rows,
+            batch.stride, batch.num_rows, batch.num_rhs};
+}
+
+template <typename ValueType>
+GKO_ATTRIBUTES GKO_INLINE batch_dense::batch_item<ValueType> extract_batch_item(
+    ValueType* const batch_values, const int stride, const int num_rows,
+    const int num_rhs, const size_type batch_idx)
+{
+    return {batch_values + batch_idx * stride * num_rows, stride, num_rows,
+            num_rhs};
+}
+
+
+}  // namespace matrix
+}  // namespace batch
+}  // namespace gko
+
+
+#endif  // GKO_CORE_MATRIX_BATCH_STRUCT_HPP_
diff --git a/cuda/matrix/batch_dense_kernels.cu b/cuda/matrix/batch_dense_kernels.cu
index 5e53a410bf0..4615af581f5 100644
--- a/cuda/matrix/batch_dense_kernels.cu
+++ b/cuda/matrix/batch_dense_kernels.cu
@@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/math.hpp>
 
 
+#include "core/base/batch_struct.hpp"
 #include "core/matrix/batch_struct.hpp"
 #include "cuda/base/config.hpp"
 #include "cuda/base/cublas_bindings.hpp"
@@ -66,7 +67,7 @@ template <typename ValueType>
 void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
                   const batch::matrix::BatchDense<ValueType>* mat,
                   const batch::MultiVector<ValueType>* b,
-                  MultiVector<ValueType>* x) GKO_NOT_IMPLEMENTED;
+                  batch::MultiVector<ValueType>* x) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
     GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL);
diff --git a/dpcpp/matrix/batch_dense_kernels.dp.cpp b/dpcpp/matrix/batch_dense_kernels.dp.cpp
index 100dbf7e670..964bf094077 100644
--- a/dpcpp/matrix/batch_dense_kernels.dp.cpp
+++ b/dpcpp/matrix/batch_dense_kernels.dp.cpp
@@ -59,7 +59,7 @@ template <typename ValueType>
 void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
                   const batch::matrix::BatchDense<ValueType>* a,
                   const batch::MultiVector<ValueType>* b,
-                  MultiVector<ValueType>* c) GKO_NOT_IMPLEMENTED;
+                  batch::MultiVector<ValueType>* x) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
     GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL);
diff --git a/hip/matrix/batch_dense_kernels.hip.cpp b/hip/matrix/batch_dense_kernels.hip.cpp
index 640f9c67b6a..93570388d50 100644
--- a/hip/matrix/batch_dense_kernels.hip.cpp
+++ b/hip/matrix/batch_dense_kernels.hip.cpp
@@ -70,7 +70,7 @@ template <typename ValueType>
 void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
                   const batch::matrix::BatchDense<ValueType>* mat,
                   const batch::MultiVector<ValueType>* b,
-                  MultiVector<ValueType>* x) GKO_NOT_IMPLEMENTED;
+                  batch::MultiVector<ValueType>* x) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
     GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL);
diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp
index 60023727c8a..47aff35b7e7 100644
--- a/include/ginkgo/core/matrix/batch_dense.hpp
+++ b/include/ginkgo/core/matrix/batch_dense.hpp
@@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/array.hpp>
 #include <ginkgo/core/base/batch_lin_op.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/executor.hpp>
 #include <ginkgo/core/base/mtx_io.hpp>
 #include <ginkgo/core/base/range_accessors.hpp>
@@ -85,7 +86,7 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
     using value_type = ValueType;
     using index_type = int32;
     using transposed_type = BatchDense<ValueType>;
-    using unbatch_type = matrix::Dense<ValueType>;
+    using unbatch_type = gko::matrix::Dense<ValueType>;
     using absolute_type = remove_complex<BatchDense>;
     using complex_type = to_complex<BatchDense>;
 
@@ -227,10 +228,9 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
      * array (if it resides on the same executor as the vector) or a copy of the
      * array on the correct executor.
      */
-    static std::unique_ptr<const MultiVector<value_type><ValueType>>
-    create_const(std::shared_ptr<const Executor> exec,
-                 const batch_dim<2>& sizes,
-                 gko::detail::const_array_view<ValueType>&& values);
+    static std::unique_ptr<const MultiVector<value_type>> create_const(
+        std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
+        gko::detail::const_array_view<ValueType>&& values);
 
 private:
     inline size_type compute_num_elems(const batch_dim<2>& size)
diff --git a/omp/matrix/batch_dense_kernels.cpp b/omp/matrix/batch_dense_kernels.cpp
index ea2e84a8e83..fe742bee402 100644
--- a/omp/matrix/batch_dense_kernels.cpp
+++ b/omp/matrix/batch_dense_kernels.cpp
@@ -40,6 +40,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/math.hpp>
 
 
+#include "core/base/batch_struct.hpp"
+#include "core/matrix/batch_struct.hpp"
 #include "reference/matrix/batch_struct.hpp"
 
 
@@ -61,7 +63,7 @@ template <typename ValueType>
 void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
                   const batch::matrix::BatchDense<ValueType>* mat,
                   const batch::MultiVector<ValueType>* b,
-                  MultiVector<ValueType>* x)
+                  batch::MultiVector<ValueType>* x)
 {
     const auto b_ub = host::get_batch_struct(b);
     const auto x_ub = host::get_batch_struct(x);
diff --git a/reference/base/batch_struct.hpp b/reference/base/batch_struct.hpp
index b30fa971ed7..ce7c7af5605 100644
--- a/reference/base/batch_struct.hpp
+++ b/reference/base/batch_struct.hpp
@@ -87,34 +87,6 @@ inline batch::multi_vector::uniform_batch<ValueType> get_batch_struct(
 }
 
 
-/**
- * Generates an immutable uniform batch struct from a batch of multi-vectors.
- */
-template <typename ValueType>
-inline batch::matrix::batch_dense::uniform_batch<const ValueType>
-get_batch_struct(const batch::matrix::BatchDense<ValueType>* const op)
-{
-    return {op->get_const_values(), op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1])};
-}
-
-
-/**
- * Generates a uniform batch struct from a batch of multi-vectors.
- */
-template <typename ValueType>
-inline batch::matrix::batch_dense::uniform_batch<ValueType> get_batch_struct(
-    batch::matrix::BatchDense<ValueType>* const op)
-{
-    return {op->get_values(), op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1])};
-}
-
-
 }  // namespace host
 }  // namespace kernels
 }  // namespace gko
diff --git a/reference/matrix/batch_dense_kernels.cpp b/reference/matrix/batch_dense_kernels.cpp
index aa285a6b01b..bb5f3e18df7 100644
--- a/reference/matrix/batch_dense_kernels.cpp
+++ b/reference/matrix/batch_dense_kernels.cpp
@@ -41,7 +41,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/range_accessors.hpp>
 
 
+#include "core/base/batch_struct.hpp"
 #include "core/matrix/batch_struct.hpp"
+#include "reference/base/batch_struct.hpp"
 #include "reference/matrix/batch_struct.hpp"
 
 
@@ -63,13 +65,13 @@ template <typename ValueType>
 void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
                   const batch::matrix::BatchDense<ValueType>* mat,
                   const batch::MultiVector<ValueType>* b,
-                  MultiVector<ValueType>* x)
+                  batch::MultiVector<ValueType>* x)
 {
     const auto b_ub = host::get_batch_struct(b);
     const auto x_ub = host::get_batch_struct(x);
     const auto mat_ub = host::get_batch_struct(mat);
     for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
-        const auto mat_item = batch::extract_batch_item(mat_ub, batch);
+        const auto mat_item = batch::matrix::extract_batch_item(mat_ub, batch);
         const auto b_item = batch::extract_batch_item(b_ub, batch);
         const auto x_item = batch::extract_batch_item(x_ub, batch);
         simple_apply_kernel(mat_item, b_item, x_item);
@@ -83,10 +85,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 template <typename ValueType>
 void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
                     const batch::MultiVector<ValueType>* alpha,
-                    const batch::matrix::BatchDense<ValueType>* a,
+                    const batch::matrix::BatchDense<ValueType>* mat,
                     const batch::MultiVector<ValueType>* b,
                     const batch::MultiVector<ValueType>* beta,
-                    batch::MultiVector<ValueType>* c)
+                    batch::MultiVector<ValueType>* x)
 {
     const auto b_ub = host::get_batch_struct(b);
     const auto x_ub = host::get_batch_struct(x);
@@ -97,7 +99,8 @@ void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
         GKO_ASSERT(alpha->get_num_batch_items() == x->get_num_batch_items());
         GKO_ASSERT(beta->get_num_batch_items() == x->get_num_batch_items());
         for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
-            const auto mat_item = batch::extract_batch_item(mat_ub, batch);
+            const auto mat_item =
+                batch::matrix::extract_batch_item(mat_ub, batch);
             const auto b_item = batch::extract_batch_item(b_ub, batch);
             const auto x_item = batch::extract_batch_item(x_ub, batch);
             const auto alpha_item = batch::extract_batch_item(alpha_ub, batch);
@@ -109,7 +112,8 @@ void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
         const auto alpha_item = batch::extract_batch_item(alpha_ub, 0);
         const auto beta_item = batch::extract_batch_item(beta_ub, 0);
         for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
-            const auto mat_item = batch::extract_batch_item(mat_ub, batch);
+            const auto mat_item =
+                batch::matrix::extract_batch_item(mat_ub, batch);
             const auto b_item = batch::extract_batch_item(b_ub, batch);
             const auto x_item = batch::extract_batch_item(x_ub, batch);
             advanced_apply_kernel(alpha_item.values[0], mat_item, b_item,
diff --git a/reference/matrix/batch_dense_kernels.hpp.inc b/reference/matrix/batch_dense_kernels.hpp.inc
index ae342982de5..d45183b2faa 100644
--- a/reference/matrix/batch_dense_kernels.hpp.inc
+++ b/reference/matrix/batch_dense_kernels.hpp.inc
@@ -32,9 +32,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 template <typename ValueType>
 inline void simple_apply_kernel(
-    const gko::batch::batch_dense::batch_item<const ValueType>& a,
-    const gko::batch::batch_multi_vector::batch_item<const ValueType>& b,
-    const gko::batch::batch_multi_vector::batch_item<ValueType>& c)
+    const gko::batch::matrix::batch_dense::batch_item<const ValueType>& a,
+    const gko::batch::multi_vector::batch_item<const ValueType>& b,
+    const gko::batch::multi_vector::batch_item<ValueType>& c)
 {
     for (int row = 0; row < c.num_rows; ++row) {
         for (int col = 0; col < c.num_rhs; ++col) {
@@ -57,10 +57,10 @@ inline void simple_apply_kernel(
 template <typename ValueType>
 inline void advanced_apply_kernel(
     const ValueType alpha,
-    const gko::batch::batch_dense::batch_item<const ValueType>& a,
-    const gko::batch::batch_multi_vector::batch_item<const ValueType>& b,
+    const gko::batch::matrix::batch_dense::batch_item<const ValueType>& a,
+    const gko::batch::multi_vector::batch_item<const ValueType>& b,
     const ValueType beta,
-    const gko::batch::batch_multi_vector::batch_item<ValueType>& c)
+    const gko::batch::multi_vector::batch_item<ValueType>& c)
 {
     if (beta != gko::zero<ValueType>()) {
         for (int row = 0; row < c.num_rows; ++row) {
diff --git a/reference/matrix/batch_struct.hpp b/reference/matrix/batch_struct.hpp
new file mode 100644
index 00000000000..1bed5a4e5c9
--- /dev/null
+++ b/reference/matrix/batch_struct.hpp
@@ -0,0 +1,95 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_REFERENCE_MATRIX_BATCH_STRUCT_HPP_
+#define GKO_REFERENCE_MATRIX_BATCH_STRUCT_HPP_
+
+
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/math.hpp>
+
+
+#include "core/base/batch_struct.hpp"
+
+
+namespace gko {
+namespace kernels {
+/**
+ * @brief A namespace for shared functionality between omp and reference
+ *  executors.
+ */
+namespace host {
+
+
+/** @file batch_struct.hpp
+ *
+ * Helper functions to generate a batch struct from a batch LinOp.
+ *
+ * A specialization is needed for every format of every kind of linear algebra
+ * object. These are intended to be called on the host.
+ */
+
+
+/**
+ * Generates an immutable uniform batch struct from a batch of multi-vectors.
+ */
+template <typename ValueType>
+inline batch::matrix::batch_dense::uniform_batch<const ValueType>
+get_batch_struct(const batch::matrix::BatchDense<ValueType>* const op)
+{
+    return {op->get_const_values(), op->get_num_batch_items(),
+            static_cast<int>(op->get_common_size()[1]),
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[1])};
+}
+
+
+/**
+ * Generates a uniform batch struct from a batch of multi-vectors.
+ */
+template <typename ValueType>
+inline batch::matrix::batch_dense::uniform_batch<ValueType> get_batch_struct(
+    batch::matrix::BatchDense<ValueType>* const op)
+{
+    return {op->get_values(), op->get_num_batch_items(),
+            static_cast<int>(op->get_common_size()[1]),
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[1])};
+}
+
+
+}  // namespace host
+}  // namespace kernels
+}  // namespace gko
+
+
+#endif  // GKO_REFERENCE_MATRIX_BATCH_STRUCT_HPP_

From 5bdedb16c86932fdc53760668e503ee97a1161d8 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 3 Oct 2023 11:31:51 +0200
Subject: [PATCH 344/583] Generalize batch utilities

---
 core/base/batch_multi_vector.cpp              | 28 ++++++++--
 core/base/batch_utilities.hpp                 | 47 ++++++++--------
 core/test/base/batch_multi_vector.cpp         | 54 +++++++++++--------
 core/test/utils/assertions.hpp                |  8 ++-
 .../test/base/batch_multi_vector_kernels.cpp  | 43 ++++++++-------
 5 files changed, 109 insertions(+), 71 deletions(-)

diff --git a/core/base/batch_multi_vector.cpp b/core/base/batch_multi_vector.cpp
index 23591cd1ffe..f6884ef523b 100644
--- a/core/base/batch_multi_vector.cpp
+++ b/core/base/batch_multi_vector.cpp
@@ -44,6 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/base/matrix_data.hpp>
 #include <ginkgo/core/base/utils.hpp>
+#include <ginkgo/core/matrix/batch_dense.hpp>
 
 
 #include "core/base/batch_multi_vector_kernels.hpp"
@@ -72,7 +73,7 @@ namespace detail {
 
 template <typename ValueType>
 batch_dim<2> compute_batch_size(
-    const std::vector<matrix::Dense<ValueType>*>& matrices)
+    const std::vector<gko::matrix::Dense<ValueType>*>& matrices)
 {
     auto common_size = matrices[0]->get_size();
     for (size_type i = 1; i < matrices.size(); ++i) {
@@ -86,7 +87,7 @@ batch_dim<2> compute_batch_size(
 
 
 template <typename ValueType>
-std::unique_ptr<matrix::Dense<ValueType>>
+std::unique_ptr<gko::matrix::Dense<ValueType>>
 MultiVector<ValueType>::create_view_for_item(size_type item_id)
 {
     auto exec = this->get_executor();
@@ -102,7 +103,7 @@ MultiVector<ValueType>::create_view_for_item(size_type item_id)
 
 
 template <typename ValueType>
-std::unique_ptr<const matrix::Dense<ValueType>>
+std::unique_ptr<const gko::matrix::Dense<ValueType>>
 MultiVector<ValueType>::create_const_view_for_item(size_type item_id) const
 {
     auto exec = this->get_executor();
@@ -290,6 +291,27 @@ void MultiVector<ValueType>::move_to(
 }
 
 
+template <typename ValueType>
+void MultiVector<ValueType>::convert_to(
+    matrix::BatchDense<ValueType>* result) const
+{
+    auto exec = result->get_executor() != nullptr ? result->get_executor()
+                                                  : this->get_executor();
+    auto tmp = gko::batch::matrix::BatchDense<ValueType>::create_const(
+        exec, this->get_size(),
+        make_const_array_view(exec, this->get_num_stored_elements(),
+                              this->get_const_values()));
+    result->copy_from(tmp);
+}
+
+
+template <typename ValueType>
+void MultiVector<ValueType>::move_to(matrix::BatchDense<ValueType>* result)
+{
+    this->convert_to(result);
+}
+
+
 #define GKO_DECLARE_BATCH_MULTI_VECTOR(_type) class MultiVector<_type>
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR);
 
diff --git a/core/base/batch_utilities.hpp b/core/base/batch_utilities.hpp
index e5dc22faeda..d5c5bdb4aa2 100644
--- a/core/base/batch_utilities.hpp
+++ b/core/base/batch_utilities.hpp
@@ -51,16 +51,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 namespace gko {
 namespace batch {
-namespace multivector {
 
 
-template <typename ValueType>
-std::unique_ptr<batch::MultiVector<ValueType>> duplicate(
-    std::shared_ptr<const Executor> exec, size_type num_duplications,
-    const batch::MultiVector<ValueType>* input)
+template <typename OutputType>
+std::unique_ptr<OutputType> duplicate(std::shared_ptr<const Executor> exec,
+                                      size_type num_duplications,
+                                      const OutputType* input)
 {
     auto num_batch_items = input->get_num_batch_items();
-    auto tmp = batch::MultiVector<ValueType>::create(
+    auto tmp = OutputType::create(
         exec, batch_dim<2>(num_batch_items * num_duplications,
                            input->get_common_size()));
 
@@ -75,13 +74,13 @@ std::unique_ptr<batch::MultiVector<ValueType>> duplicate(
 }
 
 
-template <typename ValueType>
-std::unique_ptr<batch::MultiVector<ValueType>> create_from_dense(
+template <typename OutputType>
+std::unique_ptr<OutputType> create_from_item(
     std::shared_ptr<const Executor> exec, const size_type num_duplications,
-    const matrix::Dense<ValueType>* input)
+    const typename OutputType::unbatch_type* input)
 {
     auto num_batch_items = num_duplications;
-    auto tmp = batch::MultiVector<ValueType>::create(
+    auto tmp = OutputType::create(
         exec, batch_dim<2>(num_batch_items, input->get_size()));
 
     for (size_type b = 0; b < num_batch_items; ++b) {
@@ -92,13 +91,13 @@ std::unique_ptr<batch::MultiVector<ValueType>> create_from_dense(
 }
 
 
-template <typename ValueType>
-std::unique_ptr<batch::MultiVector<ValueType>> create_from_dense(
+template <typename OutputType>
+std::unique_ptr<OutputType> create_from_item(
     std::shared_ptr<const Executor> exec,
-    const std::vector<matrix::Dense<ValueType>*>& input)
+    const std::vector<typename OutputType::unbatch_type*>& input)
 {
     auto num_batch_items = input.size();
-    auto tmp = batch::MultiVector<ValueType>::create(
+    auto tmp = OutputType::create(
         exec, batch_dim<2>(num_batch_items, input[0]->get_size()));
 
     for (size_type b = 0; b < num_batch_items; ++b) {
@@ -109,13 +108,12 @@ std::unique_ptr<batch::MultiVector<ValueType>> create_from_dense(
 }
 
 
-template <typename ValueType>
-std::vector<std::unique_ptr<matrix::Dense<ValueType>>> unbatch(
-    const batch::MultiVector<ValueType>* batch_multivec)
+template <typename InputType>
+auto unbatch(const InputType* batch_multivec)
 {
     auto exec = batch_multivec->get_executor();
     auto unbatched_mats =
-        std::vector<std::unique_ptr<matrix::Dense<ValueType>>>{};
+        std::vector<std::unique_ptr<typename InputType::unbatch_type>>{};
     for (size_type b = 0; b < batch_multivec->get_num_batch_items(); ++b) {
         unbatched_mats.emplace_back(
             batch_multivec->create_const_view_for_item(b)->clone());
@@ -124,14 +122,14 @@ std::vector<std::unique_ptr<matrix::Dense<ValueType>>> unbatch(
 }
 
 
-template <typename ValueType, typename IndexType>
-std::unique_ptr<MultiVector<ValueType>> read(
+template <typename ValueType, typename IndexType, typename OutputType>
+std::unique_ptr<OutputType> read(
     std::shared_ptr<const Executor> exec,
     const std::vector<gko::matrix_data<ValueType, IndexType>>& data)
 {
     auto num_batch_items = data.size();
-    auto tmp = MultiVector<ValueType>::create(
-        exec, batch_dim<2>(num_batch_items, data[0].size));
+    auto tmp =
+        OutputType::create(exec, batch_dim<2>(num_batch_items, data[0].size));
 
     for (size_type b = 0; b < num_batch_items; ++b) {
         tmp->create_view_for_item(b)->read(data[b]);
@@ -141,9 +139,9 @@ std::unique_ptr<MultiVector<ValueType>> read(
 }
 
 
-template <typename ValueType, typename IndexType>
+template <typename ValueType, typename IndexType, typename OutputType>
 std::vector<gko::matrix_data<ValueType, IndexType>> write(
-    const MultiVector<ValueType>* mvec)
+    const OutputType* mvec)
 {
     auto data = std::vector<gko::matrix_data<ValueType, IndexType>>(
         mvec->get_num_batch_items());
@@ -157,7 +155,6 @@ std::vector<gko::matrix_data<ValueType, IndexType>> write(
 }
 
 
-}  // namespace multivector
 }  // namespace batch
 }  // namespace gko
 
diff --git a/core/test/base/batch_multi_vector.cpp b/core/test/base/batch_multi_vector.cpp
index 85168a406cc..7bdaec30b27 100644
--- a/core/test/base/batch_multi_vector.cpp
+++ b/core/test/base/batch_multi_vector.cpp
@@ -188,11 +188,11 @@ TYPED_TEST(MultiVector, CanBeConstructedFromExistingData)
     using size_type = gko::size_type;
     // clang-format off
     value_type data[] = {
-       1.0, 2.0,
-       -1.0,3.0,
+       1.0,  2.0,
+      -1.0,  3.0,
        4.0, -1.0,
-       3.0, 5.0,
-       1.0, 5.0,
+       3.0,  5.0,
+       1.0,  5.0,
        6.0, -3.0};
     // clang-format on
 
@@ -218,11 +218,11 @@ TYPED_TEST(MultiVector, CanBeConstructedFromExistingConstData)
     using size_type = gko::size_type;
     // clang-format off
     value_type data[] = {
-       1.0, 2.0,
-       -1.0,3.0,
+       1.0,  2.0,
+      -1.0,  3.0,
        4.0, -1.0,
-       3.0, 5.0,
-       1.0, 5.0,
+       3.0,  5.0,
+       1.0,  5.0,
        6.0, -3.0};
     // clang-format on
 
@@ -252,7 +252,7 @@ TYPED_TEST(MultiVector, CanBeConstructedFromDenseMatrices)
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
 
-    auto m = gko::batch::multivector::create_from_dense(
+    auto m = gko::batch::create_from_item<gko::batch::MultiVector<value_type>>(
         this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get()});
 
     this->assert_equal_to_original_mtx(m.get());
@@ -269,10 +269,12 @@ TYPED_TEST(MultiVector, CanBeConstructedFromDenseMatricesByDuplication)
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
 
-    auto bat_m = gko::batch::multivector::create_from_dense(
-        this->exec, std::vector<DenseMtx*>{mat1.get(), mat1.get(), mat1.get()});
-    auto m =
-        gko::batch::multivector::create_from_dense(this->exec, 3, mat1.get());
+    auto bat_m =
+        gko::batch::create_from_item<gko::batch::MultiVector<value_type>>(
+            this->exec,
+            std::vector<DenseMtx*>{mat1.get(), mat1.get(), mat1.get()});
+    auto m = gko::batch::create_from_item<gko::batch::MultiVector<value_type>>(
+        this->exec, 3, mat1.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(bat_m.get(), m.get(), 1e-14);
 }
@@ -287,14 +289,16 @@ TYPED_TEST(MultiVector, CanBeConstructedByDuplicatingMultiVectors)
                                           this->exec);
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
-    auto m = gko::batch::multivector::create_from_dense(
+    auto m = gko::batch::create_from_item<gko::batch::MultiVector<value_type>>(
         this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get()});
-    auto m_ref = gko::batch::multivector::create_from_dense(
-        this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get(), mat1.get(),
-                                           mat2.get(), mat1.get(), mat2.get()});
+    auto m_ref =
+        gko::batch::create_from_item<gko::batch::MultiVector<value_type>>(
+            this->exec,
+            std::vector<DenseMtx*>{mat1.get(), mat2.get(), mat1.get(),
+                                   mat2.get(), mat1.get(), mat2.get()});
 
-    auto m2 =
-        gko::batch::multivector::duplicate<value_type>(this->exec, 3, m.get());
+    auto m2 = gko::batch::duplicate<gko::batch::MultiVector<value_type>>(
+        this->exec, 3, m.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 1e-14);
 }
@@ -385,7 +389,8 @@ TYPED_TEST(MultiVector, CanBeUnbatchedIntoDenseMatrices)
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
 
-    auto dense_mats = gko::batch::multivector::unbatch(this->mtx.get());
+    auto dense_mats = gko::batch::unbatch<gko::batch::MultiVector<value_type>>(
+        this->mtx.get());
 
     ASSERT_EQ(dense_mats.size(), 2);
     GKO_ASSERT_MTX_NEAR(dense_mats[0].get(), mat1.get(), 0.);
@@ -404,7 +409,8 @@ TYPED_TEST(MultiVector, CanBeReadFromMatrixData)
     vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
         {2, 2}, {{0, 0, -1.0}, {0, 1, 0.5}, {1, 0, 0.0}, {1, 1, 9.0}}));
 
-    auto m = gko::batch::multivector::read<value_type, index_type>(this->exec,
+    auto m = gko::batch::read<value_type, index_type,
+                              gko::batch::MultiVector<value_type>>(this->exec,
                                                                    vec_data);
     EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
 
@@ -429,7 +435,8 @@ TYPED_TEST(MultiVector, CanBeReadFromSparseMatrixData)
     vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
         {2, 2}, {{0, 0, -1.0}, {0, 1, 0.5}, {1, 1, 9.0}}));
 
-    auto m = gko::batch::multivector::read<value_type, index_type>(this->exec,
+    auto m = gko::batch::read<value_type, index_type,
+                              gko::batch::MultiVector<value_type>>(this->exec,
                                                                    vec_data);
 
     ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2));
@@ -451,7 +458,8 @@ TYPED_TEST(MultiVector, GeneratesCorrectMatrixData)
     using tpl = typename gko::matrix_data<TypeParam>::nonzero_type;
 
     auto data =
-        gko::batch::multivector::write<value_type, index_type>(this->mtx.get());
+        gko::batch::write<value_type, index_type,
+                          gko::batch::MultiVector<value_type>>(this->mtx.get());
 
     ASSERT_EQ(data[0].size, gko::dim<2>(2, 3));
     ASSERT_EQ(data[0].nonzeros.size(), 6);
diff --git a/core/test/utils/assertions.hpp b/core/test/utils/assertions.hpp
index d723d5a8964..63ed1e5423a 100644
--- a/core/test/utils/assertions.hpp
+++ b/core/test/utils/assertions.hpp
@@ -720,8 +720,12 @@ ::testing::AssertionResult batch_matrices_near(
     using value_type1 = typename Mat1::value_type;
     using value_type2 = typename Mat2::value_type;
 
-    auto first_data = gko::batch::multivector::write<value_type1, int>(first);
-    auto second_data = gko::batch::multivector::write<value_type2, int>(second);
+    auto first_data =
+        gko::batch::write<value_type1, int,
+                          gko::batch::MultiVector<value_type1>>(first);
+    auto second_data =
+        gko::batch::write<value_type2, int,
+                          gko::batch::MultiVector<value_type2>>(second);
 
     if (first_data.size() != second_data.size()) {
         return ::testing::AssertionFailure()
diff --git a/reference/test/base/batch_multi_vector_kernels.cpp b/reference/test/base/batch_multi_vector_kernels.cpp
index 4f922c37703..e0c7643c8d7 100644
--- a/reference/test/base/batch_multi_vector_kernels.cpp
+++ b/reference/test/base/batch_multi_vector_kernels.cpp
@@ -137,13 +137,14 @@ TYPED_TEST(MultiVector, ScalesData)
     using T = typename TestFixture::value_type;
     auto alpha = gko::batch::initialize<Mtx>(
         {{{2.0, -2.0, 1.5}}, {{3.0, -1.0, 0.25}}}, this->exec);
-    auto ualpha = gko::batch::multivector::unbatch(alpha.get());
+    auto ualpha = gko::batch::unbatch<gko::batch::MultiVector<T>>(alpha.get());
 
     this->mtx_0->scale(alpha.get());
     this->mtx_00->scale(ualpha[0].get());
     this->mtx_01->scale(ualpha[1].get());
 
-    auto res = gko::batch::multivector::unbatch(this->mtx_0.get());
+    auto res =
+        gko::batch::unbatch<gko::batch::MultiVector<T>>(this->mtx_0.get());
     GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_00.get(), 0.);
     GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_01.get(), 0.);
 }
@@ -154,13 +155,14 @@ TYPED_TEST(MultiVector, ScalesDataWithScalar)
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     auto alpha = gko::batch::initialize<Mtx>({{2.0}, {-2.0}}, this->exec);
-    auto ualpha = gko::batch::multivector::unbatch(alpha.get());
+    auto ualpha = gko::batch::unbatch<gko::batch::MultiVector<T>>(alpha.get());
 
     this->mtx_1->scale(alpha.get());
     this->mtx_10->scale(ualpha[0].get());
     this->mtx_11->scale(ualpha[1].get());
 
-    auto res = gko::batch::multivector::unbatch(this->mtx_1.get());
+    auto res =
+        gko::batch::unbatch<gko::batch::MultiVector<T>>(this->mtx_1.get());
     GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.);
     GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_11.get(), 0.);
 }
@@ -172,13 +174,14 @@ TYPED_TEST(MultiVector, ScalesDataWithMultipleScalars)
     using T = typename TestFixture::value_type;
     auto alpha = gko::batch::initialize<Mtx>(
         {{{2.0, -2.0, -1.5}}, {{2.0, -2.0, 3.0}}}, this->exec);
-    auto ualpha = gko::batch::multivector::unbatch(alpha.get());
+    auto ualpha = gko::batch::unbatch<gko::batch::MultiVector<T>>(alpha.get());
 
     this->mtx_1->scale(alpha.get());
     this->mtx_10->scale(ualpha[0].get());
     this->mtx_11->scale(ualpha[1].get());
 
-    auto res = gko::batch::multivector::unbatch(this->mtx_1.get());
+    auto res =
+        gko::batch::unbatch<gko::batch::MultiVector<T>>(this->mtx_1.get());
     GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.);
     GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_11.get(), 0.);
 }
@@ -190,13 +193,14 @@ TYPED_TEST(MultiVector, AddsScaled)
     using T = typename TestFixture::value_type;
     auto alpha = gko::batch::initialize<Mtx>(
         {{{2.0, -2.0, 1.5}}, {{2.0, -2.0, 3.0}}}, this->exec);
-    auto ualpha = gko::batch::multivector::unbatch(alpha.get());
+    auto ualpha = gko::batch::unbatch<gko::batch::MultiVector<T>>(alpha.get());
 
     this->mtx_1->add_scaled(alpha.get(), this->mtx_0.get());
     this->mtx_10->add_scaled(ualpha[0].get(), this->mtx_00.get());
     this->mtx_11->add_scaled(ualpha[1].get(), this->mtx_01.get());
 
-    auto res = gko::batch::multivector::unbatch(this->mtx_1.get());
+    auto res =
+        gko::batch::unbatch<gko::batch::MultiVector<T>>(this->mtx_1.get());
     GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.);
     GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_11.get(), 0.);
 }
@@ -207,13 +211,14 @@ TYPED_TEST(MultiVector, AddsScaledWithScalar)
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
     auto alpha = gko::batch::initialize<Mtx>({{2.0}, {-2.0}}, this->exec);
-    auto ualpha = gko::batch::multivector::unbatch(alpha.get());
+    auto ualpha = gko::batch::unbatch<gko::batch::MultiVector<T>>(alpha.get());
 
     this->mtx_1->add_scaled(alpha.get(), this->mtx_0.get());
     this->mtx_10->add_scaled(ualpha[0].get(), this->mtx_00.get());
     this->mtx_11->add_scaled(ualpha[1].get(), this->mtx_01.get());
 
-    auto res = gko::batch::multivector::unbatch(this->mtx_1.get());
+    auto res =
+        gko::batch::unbatch<gko::batch::MultiVector<T>>(this->mtx_1.get());
     GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.);
     GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_11.get(), 0.);
 }
@@ -236,13 +241,13 @@ TYPED_TEST(MultiVector, ComputesDot)
     using T = typename TestFixture::value_type;
     auto result =
         Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 3}));
-    auto ures = gko::batch::multivector::unbatch(result.get());
+    auto ures = gko::batch::unbatch<gko::batch::MultiVector<T>>(result.get());
 
     this->mtx_0->compute_dot(this->mtx_1.get(), result.get());
     this->mtx_00->compute_dot(this->mtx_10.get(), ures[0].get());
     this->mtx_01->compute_dot(this->mtx_11.get(), ures[1].get());
 
-    auto res = gko::batch::multivector::unbatch(result.get());
+    auto res = gko::batch::unbatch<gko::batch::MultiVector<T>>(result.get());
     GKO_ASSERT_MTX_NEAR(res[0].get(), ures[0].get(), 0.);
     GKO_ASSERT_MTX_NEAR(res[1].get(), ures[1].get(), 0.);
 }
@@ -277,13 +282,13 @@ TYPED_TEST(MultiVector, ComputesConjDot)
     using T = typename TestFixture::value_type;
     auto result =
         Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 3}));
-    auto ures = gko::batch::multivector::unbatch(result.get());
+    auto ures = gko::batch::unbatch<gko::batch::MultiVector<T>>(result.get());
 
     this->mtx_0->compute_conj_dot(this->mtx_1.get(), result.get());
     this->mtx_00->compute_conj_dot(this->mtx_10.get(), ures[0].get());
     this->mtx_01->compute_conj_dot(this->mtx_11.get(), ures[1].get());
 
-    auto res = gko::batch::multivector::unbatch(result.get());
+    auto res = gko::batch::unbatch<gko::batch::MultiVector<T>>(result.get());
     GKO_ASSERT_MTX_NEAR(res[0].get(), ures[0].get(), 0.);
     GKO_ASSERT_MTX_NEAR(res[1].get(), ures[1].get(), 0.);
 }
@@ -359,8 +364,9 @@ TYPED_TEST(MultiVector, ConvertsToPrecision)
     this->mtx_1->convert_to(tmp.get());
     tmp->convert_to(res.get());
 
-    auto ures = gko::batch::multivector::unbatch(res.get());
-    auto umtx = gko::batch::multivector::unbatch(this->mtx_1.get());
+    auto ures = gko::batch::unbatch<gko::batch::MultiVector<T>>(res.get());
+    auto umtx =
+        gko::batch::unbatch<gko::batch::MultiVector<T>>(this->mtx_1.get());
     GKO_ASSERT_MTX_NEAR(umtx[0].get(), ures[0].get(), residual);
     GKO_ASSERT_MTX_NEAR(umtx[1].get(), ures[1].get(), residual);
 }
@@ -382,8 +388,9 @@ TYPED_TEST(MultiVector, MovesToPrecision)
     this->mtx_1->move_to(tmp.get());
     tmp->move_to(res.get());
 
-    auto ures = gko::batch::multivector::unbatch(res.get());
-    auto umtx = gko::batch::multivector::unbatch(this->mtx_1.get());
+    auto ures = gko::batch::unbatch<gko::batch::MultiVector<T>>(res.get());
+    auto umtx =
+        gko::batch::unbatch<gko::batch::MultiVector<T>>(this->mtx_1.get());
     GKO_ASSERT_MTX_NEAR(umtx[0].get(), ures[0].get(), residual);
     GKO_ASSERT_MTX_NEAR(umtx[1].get(), ures[1].get(), residual);
 }

From 669cc19753275db1827b34bdd5c9744901331c37 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 3 Oct 2023 11:32:30 +0200
Subject: [PATCH 345/583] MultiVector to BatchDense conversion

---
 core/matrix/batch_dense.cpp                   |  27 ++-
 core/test/matrix/CMakeLists.txt               |   2 +
 core/test/matrix/batch_dense.cpp              | 222 ++++++++----------
 .../ginkgo/core/base/batch_multi_vector.hpp   |  25 +-
 include/ginkgo/core/matrix/batch_dense.hpp    |  11 +-
 5 files changed, 154 insertions(+), 133 deletions(-)

diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp
index 803f7a51c50..9f72a26c488 100644
--- a/core/matrix/batch_dense.cpp
+++ b/core/matrix/batch_dense.cpp
@@ -53,12 +53,14 @@ namespace gko {
 namespace batch {
 namespace matrix {
 namespace dense {
+namespace {
 
 
 GKO_REGISTER_OPERATION(simple_apply, batch_dense::simple_apply);
 GKO_REGISTER_OPERATION(advanced_apply, batch_dense::advanced_apply);
 
 
+}  // namespace
 }  // namespace dense
 
 
@@ -141,6 +143,19 @@ BatchDense<ValueType>::create_with_same_config() const
 }
 
 
+template <typename ValueType>
+std::unique_ptr<const BatchDense<ValueType>>
+BatchDense<ValueType>::create_const(
+    std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
+    gko::detail::const_array_view<ValueType>&& values)
+{
+    // cast const-ness away, but return a const object afterwards,
+    // so we can ensure that no modifications take place.
+    return std::unique_ptr<const BatchDense>(new BatchDense{
+        exec, sizes, gko::detail::array_const_cast(std::move(values))});
+}
+
+
 inline const batch_dim<2> get_col_sizes(const batch_dim<2>& sizes)
 {
     return batch_dim<2>(sizes.get_num_batch_items(),
@@ -148,6 +163,14 @@ inline const batch_dim<2> get_col_sizes(const batch_dim<2>& sizes)
 }
 
 
+template <typename ValueType>
+BatchDense<ValueType>::BatchDense(std::shared_ptr<const Executor> exec,
+                                  const batch_dim<2>& size)
+    : EnableBatchLinOp<BatchDense<ValueType>>(exec, size),
+      values_(exec, compute_num_elems(size))
+{}
+
+
 template <typename ValueType>
 void BatchDense<ValueType>::apply_impl(const MultiVector<ValueType>* b,
                                        MultiVector<ValueType>* x) const
@@ -157,7 +180,7 @@ void BatchDense<ValueType>::apply_impl(const MultiVector<ValueType>* b,
     GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size());
     GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items());
     GKO_ASSERT_CONFORMANT(this->get_common_size(), x->get_common_size());
-    this->get_executor()->run(batch_dense::make_simple_apply(this, b, x));
+    this->get_executor()->run(dense::make_simple_apply(this, b, x));
 }
 
 
@@ -175,7 +198,7 @@ void BatchDense<ValueType>::apply_impl(const MultiVector<ValueType>* alpha,
     GKO_ASSERT_EQUAL_COLS(alpha->get_common_size(), gko::dim<2>(1, 1));
     GKO_ASSERT_EQUAL_COLS(beta->get_common_size(), gko::dim<2>(1, 1));
     this->get_executor()->run(
-        batch_dense::make_advanced_apply(alpha, this, b, beta, x));
+        dense::make_advanced_apply(alpha, this, b, beta, x));
 }
 
 
diff --git a/core/test/matrix/CMakeLists.txt b/core/test/matrix/CMakeLists.txt
index 433361a054f..57c2c97e355 100644
--- a/core/test/matrix/CMakeLists.txt
+++ b/core/test/matrix/CMakeLists.txt
@@ -1,3 +1,5 @@
+# ginkgo_create_test(batch_dense)
+#
 ginkgo_create_test(coo)
 ginkgo_create_test(coo_builder)
 ginkgo_create_test(csr)
diff --git a/core/test/matrix/batch_dense.cpp b/core/test/matrix/batch_dense.cpp
index 7db7469baf6..a1ebdb1061c 100644
--- a/core/test/matrix/batch_dense.cpp
+++ b/core/test/matrix/batch_dense.cpp
@@ -44,9 +44,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/test/utils.hpp"
 
 
-namespace {
-
-
 template <typename T>
 class BatchDense : public ::testing::Test {
 protected:
@@ -55,11 +52,13 @@ class BatchDense : public ::testing::Test {
     using size_type = gko::size_type;
     BatchDense()
         : exec(gko::ReferenceExecutor::create()),
-          mtx(gko::batch_initialize<gko::matrix::BatchDense<value_type>>(
-              std::vector<size_type>{4, 3},
+          mtx(gko::batch::initialize<
+              gko::batch::matrix::BatchDense<value_type>>(
               {{{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
                {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}},
-              exec))
+              exec)),
+          dense_mtx(gko::initialize<gko::matrix::Dense<value_type>>(
+              {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, exec))
     {}
 
 
@@ -67,13 +66,8 @@ class BatchDense : public ::testing::Test {
         gko::matrix::BatchDense<value_type>* m)
     {
         ASSERT_EQ(m->get_num_batch_entries(), 2);
-        ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3));
-        ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3));
-        ASSERT_EQ(m->get_stride().at(0), 4);
-        ASSERT_EQ(m->get_stride().at(1), 3);
-        ASSERT_EQ(m->get_num_stored_elements(), (2 * 4) + (2 * 3));
-        ASSERT_EQ(m->get_num_stored_elements(0), 2 * 4);
-        ASSERT_EQ(m->get_num_stored_elements(1), 2 * 3);
+        ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 3));
+        ASSERT_EQ(m->get_num_stored_elements(), 2 * (2 * 4));
         EXPECT_EQ(m->at(0, 0, 0), value_type{-1.0});
         EXPECT_EQ(m->at(0, 0, 1), value_type{2.0});
         EXPECT_EQ(m->at(0, 0, 2), value_type{3.0});
@@ -95,7 +89,7 @@ class BatchDense : public ::testing::Test {
     }
 
     std::shared_ptr<const gko::Executor> exec;
-    std::unique_ptr<gko::matrix::BatchDense<value_type>> mtx;
+    std::unique_ptr<gko::batch::matrix::BatchDense<value_type>> mtx;
 };
 
 TYPED_TEST_SUITE(BatchDense, gko::test::ValueTypes);
@@ -103,46 +97,85 @@ TYPED_TEST_SUITE(BatchDense, gko::test::ValueTypes);
 
 TYPED_TEST(BatchDense, CanBeEmpty)
 {
-    auto empty = gko::matrix::BatchDense<TypeParam>::create(this->exec);
+    auto empty = gko::batch::matrix::BatchDense<TypeParam>::create(this->exec);
     this->assert_empty(empty.get());
 }
 
 
 TYPED_TEST(BatchDense, ReturnsNullValuesArrayWhenEmpty)
 {
-    auto empty = gko::matrix::BatchDense<TypeParam>::create(this->exec);
+    auto empty = gko::batch::matrix::BatchDense<TypeParam>::create(this->exec);
     ASSERT_EQ(empty->get_const_values(), nullptr);
 }
 
 
-TYPED_TEST(BatchDense, CanBeConstructedWithSize)
+TYPED_TEST(BatchDense, CanGetValuesForEntry)
 {
-    using size_type = gko::size_type;
-    auto m = gko::matrix::BatchDense<TypeParam>::create(
-        this->exec,
-        std::vector<gko::dim<2>>{gko::dim<2>{2, 4}, gko::dim<2>{2, 3}});
+    using value_type = typename TestFixture::value_type;
 
-    ASSERT_EQ(m->get_num_batch_entries(), 2);
-    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 4));
-    ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 3));
-    EXPECT_EQ(m->get_stride().at(0), 4);
-    EXPECT_EQ(m->get_stride().at(1), 3);
-    ASSERT_EQ(m->get_num_stored_elements(), 14);
-    ASSERT_EQ(m->get_num_stored_elements(0), 8);
-    ASSERT_EQ(m->get_num_stored_elements(1), 6);
+    ASSERT_EQ(this->mtx->get_values_for_item(1)[0], value_type{1.0});
+}
+
+
+TYPED_TEST(BatchDense, CanCreateDenseItemView)
+{
+    GKO_ASSERT_MTX_NEAR(this->mtx->create_view_for_item(1), this->dense_mtx,
+                        0.0);
+}
+
+
+TYPED_TEST(BatchDense, CanBeCopied)
+{
+    auto mtx_copy =
+        gko::batch::matrix::BatchDense<TypeParam>::create(this->exec);
+
+    mtx_copy->copy_from(this->mtx.get());
+
+    this->assert_equal_to_original_mtx(this->mtx.get());
+    this->mtx->at(0, 0, 0) = 7;
+    this->mtx->at(0, 1) = 7;
+    this->assert_equal_to_original_mtx(mtx_copy.get());
+}
+
+
+TYPED_TEST(BatchDense, CanBeMoved)
+{
+    auto mtx_copy =
+        gko::batch::matrix::BatchDense<TypeParam>::create(this->exec);
+
+    mtx_copy->copy_from(std::move(this->mtx));
+
+    this->assert_equal_to_original_mtx(mtx_copy.get());
+}
+
+
+TYPED_TEST(BatchDense, CanBeCloned)
+{
+    auto mtx_clone = this->mtx->clone();
+
+    this->assert_equal_to_original_mtx(
+        dynamic_cast<decltype(this->mtx.get())>(mtx_clone.get()));
 }
 
 
-TYPED_TEST(BatchDense, CanBeConstructedWithSizeAndStride)
+TYPED_TEST(BatchDense, CanBeCleared)
+{
+    this->mtx->clear();
+
+    this->assert_empty(this->mtx.get());
+}
+
+
+TYPED_TEST(BatchDense, CanBeConstructedWithSize)
 {
     using size_type = gko::size_type;
-    auto m = gko::matrix::BatchDense<TypeParam>::create(
-        this->exec, std::vector<gko::dim<2>>{gko::dim<2>{2, 3}},
-        std::vector<size_type>{4});
 
-    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3));
-    EXPECT_EQ(m->get_stride().at(0), 4);
-    ASSERT_EQ(m->get_num_stored_elements(), 8);
+    auto m = gko::batch::matrix::BatchDense<TypeParam>::create(
+        this->exec, gko::batch_dim<2>> {2, gko::dim<2>{5, 3}});
+
+    ASSERT_EQ(m->get_num_batch_entries(), 2);
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(5, 3));
+    ASSERT_EQ(m->get_num_stored_elements(), 30);
 }
 
 
@@ -152,23 +185,27 @@ TYPED_TEST(BatchDense, CanBeConstructedFromExistingData)
     using size_type = gko::size_type;
     // clang-format off
     value_type data[] = {
-       1.0, 2.0, -1.0,
-       3.0, 4.0, -1.0,
-       3.0, 5.0, 1.0,
-       5.0, 6.0, -3.0};
+       1.0,  2.0,
+      -1.0,  3.0,
+       4.0, -1.0,
+       3.0,  5.0,
+       1.0,  5.0,
+       6.0, -3.0};
     // clang-format on
 
-    auto m = gko::matrix::BatchDense<TypeParam>::create(
-        this->exec,
-        std::vector<gko::dim<2>>{gko::dim<2>{2, 2}, gko::dim<2>{2, 2}},
-        gko::array<value_type>::view(this->exec, 12, data),
-        std::vector<size_type>{3, 3});
+    auto m = gko::batch::matrix::BatchDense<TypeParam>::create(
+        this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 2)),
+        gko::array<value_type>::view(this->exec, 8, data));
 
     ASSERT_EQ(m->get_const_values(), data);
+    ASSERT_EQ(m->at(0, 0, 0), value_type{1.0});
     ASSERT_EQ(m->at(0, 0, 1), value_type{2.0});
-    ASSERT_EQ(m->at(0, 1, 2), value_type{-1.0});
-    ASSERT_EQ(m->at(1, 0, 1), value_type{5.0});
-    ASSERT_EQ(m->at(1, 1, 2), value_type{-3.0});
+    ASSERT_EQ(m->at(0, 1, 0), value_type{-1.0});
+    ASSERT_EQ(m->at(0, 1, 1), value_type{3.0});
+    ASSERT_EQ(m->at(1, 0, 0), value_type{4.0});
+    ASSERT_EQ(m->at(1, 0, 1), value_type{-1.0});
+    ASSERT_EQ(m->at(1, 1, 0), value_type{3.0});
+    ASSERT_EQ(m->at(1, 1, 1), value_type{5.0});
 }
 
 
@@ -178,23 +215,27 @@ TYPED_TEST(BatchDense, CanBeConstructedFromExistingConstData)
     using size_type = gko::size_type;
     // clang-format off
     const value_type data[] = {
-       1.0, 2.0, -1.0,
-       3.0, 4.0, -1.0,
-       3.0, 5.0, 1.0,
-       5.0, 6.0, -3.0};
+       1.0,  2.0,
+      -1.0,  3.0,
+       4.0, -1.0,
+       3.0,  5.0,
+       1.0,  5.0,
+       6.0, -3.0};
     // clang-format on
 
     auto m = gko::matrix::BatchDense<TypeParam>::create_const(
-        this->exec,
-        std::vector<gko::dim<2>>{gko::dim<2>{2, 2}, gko::dim<2>{2, 2}},
-        gko::array<value_type>::const_view(this->exec, 12, data),
-        std::vector<size_type>{3, 3});
+        this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 2)),
+        gko::array<value_type>::const_view(this->exec, 8, data));
 
     ASSERT_EQ(m->get_const_values(), data);
+    ASSERT_EQ(m->at(0, 0, 0), value_type{1.0});
     ASSERT_EQ(m->at(0, 0, 1), value_type{2.0});
-    ASSERT_EQ(m->at(0, 1, 2), value_type{-1.0});
-    ASSERT_EQ(m->at(1, 0, 1), value_type{5.0});
-    ASSERT_EQ(m->at(1, 1, 2), value_type{-3.0});
+    ASSERT_EQ(m->at(0, 1, 0), value_type{-1.0});
+    ASSERT_EQ(m->at(0, 1, 1), value_type{3.0});
+    ASSERT_EQ(m->at(1, 0, 0), value_type{4.0});
+    ASSERT_EQ(m->at(1, 0, 1), value_type{-1.0});
+    ASSERT_EQ(m->at(1, 1, 0), value_type{3.0});
+    ASSERT_EQ(m->at(1, 1, 1), value_type{5.0});
 }
 
 
@@ -203,20 +244,15 @@ TYPED_TEST(BatchDense, CanBeConstructedFromBatchDenseMatrices)
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
     using size_type = gko::size_type;
-    auto mat1 = gko::initialize<DenseMtx>(
-        3, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, this->exec);
+    auto mat1 = gko::initialize<DenseMtx>({{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
+                                          this->exec);
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
 
-    auto m = gko::matrix::BatchDense<TypeParam>::create(
+    auto m = gko::batch::multivector::create_from_dense(
         this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get()});
-    auto m_ref = gko::matrix::BatchDense<TypeParam>::create(
-        this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get(), mat1.get(),
-                                           mat2.get(), mat1.get(), mat2.get()});
-    auto m2 =
-        gko::matrix::BatchDense<TypeParam>::create(this->exec, 3, m.get());
 
-    GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 1e-14);
+    this->assert_equal_to_original_mtx(m.get());
 }
 
 
@@ -297,19 +333,6 @@ TYPED_TEST(BatchDense, CanBeListConstructed)
 }
 
 
-TYPED_TEST(BatchDense, CanBeListConstructedWithstride)
-{
-    using value_type = typename TestFixture::value_type;
-    auto m = gko::batch_initialize<gko::matrix::BatchDense<TypeParam>>(
-        std::vector<gko::size_type>{2}, {{1.0, 2.0}}, this->exec);
-    ASSERT_EQ(m->get_num_batch_entries(), 1);
-    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 1));
-    ASSERT_EQ(m->get_num_stored_elements(), 4);
-    EXPECT_EQ(m->at(0, 0), value_type{1.0});
-    EXPECT_EQ(m->at(0, 1), value_type{2.0});
-}
-
-
 TYPED_TEST(BatchDense, CanBeListConstructedByCopies)
 {
     using value_type = typename TestFixture::value_type;
@@ -385,40 +408,6 @@ TYPED_TEST(BatchDense, CanBeDoubleListConstructedWithstride)
 }
 
 
-TYPED_TEST(BatchDense, CanBeCopied)
-{
-    auto mtx_copy = gko::matrix::BatchDense<TypeParam>::create(this->exec);
-    mtx_copy->copy_from(this->mtx.get());
-    this->assert_equal_to_original_mtx(this->mtx.get());
-    this->mtx->at(0, 0, 0) = 7;
-    this->mtx->at(0, 1) = 7;
-    this->assert_equal_to_original_mtx(mtx_copy.get());
-}
-
-
-TYPED_TEST(BatchDense, CanBeMoved)
-{
-    auto mtx_copy = gko::matrix::BatchDense<TypeParam>::create(this->exec);
-    mtx_copy->copy_from(std::move(this->mtx));
-    this->assert_equal_to_original_mtx(mtx_copy.get());
-}
-
-
-TYPED_TEST(BatchDense, CanBeCloned)
-{
-    auto mtx_clone = this->mtx->clone();
-    this->assert_equal_to_original_mtx(
-        dynamic_cast<decltype(this->mtx.get())>(mtx_clone.get()));
-}
-
-
-TYPED_TEST(BatchDense, CanBeCleared)
-{
-    this->mtx->clear();
-    this->assert_empty(this->mtx.get());
-}
-
-
 TYPED_TEST(BatchDense, CanBeReadFromMatrixData)
 {
     using value_type = typename TestFixture::value_type;
@@ -515,6 +504,3 @@ TYPED_TEST(BatchDense, CanBeReadFromMatrixAssemblyData)
     EXPECT_EQ(m->at(1, 0, 0), value_type{2.0});
     EXPECT_EQ(m->at(1, 1, 0), value_type{5.0});
 }
-
-
-}  // namespace
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index d91274526d3..43f35e55f62 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -52,6 +52,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 namespace gko {
 namespace batch {
+namespace matrix {
+
+
+template <typename ValueType>
+class BatchDense;
+
+
+}
+
 
 /**
  * MultiVector stores multiple vectors in a batched fashion and is useful
@@ -81,21 +90,25 @@ class MultiVector
     : public EnablePolymorphicObject<MultiVector<ValueType>>,
       public EnablePolymorphicAssignment<MultiVector<ValueType>>,
       public EnableCreateMethod<MultiVector<ValueType>>,
-      public ConvertibleTo<MultiVector<next_precision<ValueType>>> {
+      public ConvertibleTo<MultiVector<next_precision<ValueType>>>,
+      public ConvertibleTo<matrix::BatchDense<ValueType>> {
     friend class EnableCreateMethod<MultiVector>;
     friend class EnablePolymorphicObject<MultiVector>;
     friend class MultiVector<to_complex<ValueType>>;
     friend class MultiVector<next_precision<ValueType>>;
+    friend class matrix::BatchDense<ValueType>;
 
 public:
     using EnablePolymorphicAssignment<MultiVector>::convert_to;
     using EnablePolymorphicAssignment<MultiVector>::move_to;
     using ConvertibleTo<MultiVector<next_precision<ValueType>>>::convert_to;
     using ConvertibleTo<MultiVector<next_precision<ValueType>>>::move_to;
+    using ConvertibleTo<matrix::BatchDense<ValueType>>::convert_to;
+    using ConvertibleTo<matrix::BatchDense<ValueType>>::move_to;
 
     using value_type = ValueType;
     using index_type = int32;
-    using unbatch_type = matrix::Dense<ValueType>;
+    using unbatch_type = gko::matrix::Dense<ValueType>;
     using absolute_type = remove_complex<MultiVector<ValueType>>;
     using complex_type = to_complex<MultiVector<ValueType>>;
 
@@ -113,6 +126,10 @@ class MultiVector
 
     void move_to(MultiVector<next_precision<ValueType>>* result) override;
 
+    void convert_to(matrix::BatchDense<ValueType>* result) const override;
+
+    void move_to(matrix::BatchDense<ValueType>* result) override;
+
     /**
      * Creates a mutable view (of matrix::Dense type) of one item of the Batch
      * MultiVector object. Does not perform any deep copies, but only returns a
@@ -196,8 +213,8 @@ class MultiVector
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const value_type* get_const_values_for_item(
-        size_type batch_id) const noexcept
+    const value_type* get_const_values_for_item(size_type batch_id) const
+        noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
         return values_.get_const_data() +
diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp
index 47aff35b7e7..1b36cd64869 100644
--- a/include/ginkgo/core/matrix/batch_dense.hpp
+++ b/include/ginkgo/core/matrix/batch_dense.hpp
@@ -97,14 +97,7 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
      * @param other  The other matrix whose configuration needs to copied.
      */
     static std::unique_ptr<BatchDense> create_with_config_of(
-        const BatchDense* other)
-    {
-        // De-referencing `other` before calling the functions (instead of
-        // using operator `->`) is currently required to be compatible with
-        // CUDA 10.1.
-        // Otherwise, it results in a compile error.
-        return (*other).create_with_same_config();
-    }
+        ptr_param<const BatchDense> other);
 
     void convert_to(
         BatchDense<next_precision<ValueType>>* result) const override;
@@ -228,7 +221,7 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
      * array (if it resides on the same executor as the vector) or a copy of the
      * array on the correct executor.
      */
-    static std::unique_ptr<const MultiVector<value_type>> create_const(
+    static std::unique_ptr<const BatchDense<value_type>> create_const(
         std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
         gko::detail::const_array_view<ValueType>&& values);
 

From 989b17cd0ed518a75b5770d6f7a05385bcfce0b5 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 3 Oct 2023 14:09:59 +0200
Subject: [PATCH 346/583] Add tests for BatchDense core

---
 core/matrix/batch_dense.cpp                |   7 -
 core/test/base/batch_multi_vector.cpp      |   2 +-
 core/test/matrix/CMakeLists.txt            |   3 +-
 core/test/matrix/batch_dense.cpp           | 231 +++++++++------------
 core/test/utils/assertions.hpp             |   8 +-
 include/ginkgo/core/matrix/batch_dense.hpp |  93 +++++----
 6 files changed, 159 insertions(+), 185 deletions(-)

diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp
index 9f72a26c488..f5d255d901c 100644
--- a/core/matrix/batch_dense.cpp
+++ b/core/matrix/batch_dense.cpp
@@ -127,13 +127,6 @@ BatchDense<ValueType>::create_with_config_of(
 }
 
 
-template <typename ValueType>
-void BatchDense<ValueType>::set_size(const batch_dim<2>& value) noexcept
-{
-    batch_size_ = value;
-}
-
-
 template <typename ValueType>
 std::unique_ptr<BatchDense<ValueType>>
 BatchDense<ValueType>::create_with_same_config() const
diff --git a/core/test/base/batch_multi_vector.cpp b/core/test/base/batch_multi_vector.cpp
index 7bdaec30b27..8390a6c4327 100644
--- a/core/test/base/batch_multi_vector.cpp
+++ b/core/test/base/batch_multi_vector.cpp
@@ -412,9 +412,9 @@ TYPED_TEST(MultiVector, CanBeReadFromMatrixData)
     auto m = gko::batch::read<value_type, index_type,
                               gko::batch::MultiVector<value_type>>(this->exec,
                                                                    vec_data);
-    EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
 
     ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2));
+    EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
     EXPECT_EQ(m->at(0, 0, 1), value_type{3.0});
     EXPECT_EQ(m->at(0, 1, 0), value_type{0.0});
     EXPECT_EQ(m->at(0, 1, 1), value_type{5.0});
diff --git a/core/test/matrix/CMakeLists.txt b/core/test/matrix/CMakeLists.txt
index 57c2c97e355..cca4b8da1c0 100644
--- a/core/test/matrix/CMakeLists.txt
+++ b/core/test/matrix/CMakeLists.txt
@@ -1,5 +1,4 @@
-# ginkgo_create_test(batch_dense)
-#
+ginkgo_create_test(batch_dense)
 ginkgo_create_test(coo)
 ginkgo_create_test(coo_builder)
 ginkgo_create_test(csr)
diff --git a/core/test/matrix/batch_dense.cpp b/core/test/matrix/batch_dense.cpp
index a1ebdb1061c..f9210550bea 100644
--- a/core/test/matrix/batch_dense.cpp
+++ b/core/test/matrix/batch_dense.cpp
@@ -36,12 +36,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <gtest/gtest.h>
 
 
+#include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/executor.hpp>
 #include <ginkgo/core/base/range.hpp>
 #include <ginkgo/core/matrix/dense.hpp>
 
 
+#include "core/base/batch_utilities.hpp"
 #include "core/test/utils.hpp"
+#include "core/test/utils/batch_helpers.hpp"
 
 
 template <typename T>
@@ -63,11 +66,11 @@ class BatchDense : public ::testing::Test {
 
 
     static void assert_equal_to_original_mtx(
-        gko::matrix::BatchDense<value_type>* m)
+        gko::batch::matrix::BatchDense<value_type>* m)
     {
-        ASSERT_EQ(m->get_num_batch_entries(), 2);
+        ASSERT_EQ(m->get_num_batch_items(), 2);
         ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 3));
-        ASSERT_EQ(m->get_num_stored_elements(), 2 * (2 * 4));
+        ASSERT_EQ(m->get_num_stored_elements(), 2 * (2 * 3));
         EXPECT_EQ(m->at(0, 0, 0), value_type{-1.0});
         EXPECT_EQ(m->at(0, 0, 1), value_type{2.0});
         EXPECT_EQ(m->at(0, 0, 2), value_type{3.0});
@@ -82,19 +85,26 @@ class BatchDense : public ::testing::Test {
         ASSERT_EQ(m->at(1, 1, 2), value_type{3.0});
     }
 
-    static void assert_empty(gko::matrix::BatchDense<value_type>* m)
+    static void assert_empty(gko::batch::matrix::BatchDense<value_type>* m)
     {
-        ASSERT_EQ(m->get_num_batch_entries(), 0);
+        ASSERT_EQ(m->get_num_batch_items(), 0);
         ASSERT_EQ(m->get_num_stored_elements(), 0);
     }
 
     std::shared_ptr<const gko::Executor> exec;
     std::unique_ptr<gko::batch::matrix::BatchDense<value_type>> mtx;
+    std::unique_ptr<gko::matrix::Dense<value_type>> dense_mtx;
 };
 
 TYPED_TEST_SUITE(BatchDense, gko::test::ValueTypes);
 
 
+TYPED_TEST(BatchDense, KnowsItsSizeAndValues)
+{
+    this->assert_equal_to_original_mtx(this->mtx.get());
+}
+
+
 TYPED_TEST(BatchDense, CanBeEmpty)
 {
     auto empty = gko::batch::matrix::BatchDense<TypeParam>::create(this->exec);
@@ -171,9 +181,9 @@ TYPED_TEST(BatchDense, CanBeConstructedWithSize)
     using size_type = gko::size_type;
 
     auto m = gko::batch::matrix::BatchDense<TypeParam>::create(
-        this->exec, gko::batch_dim<2>> {2, gko::dim<2>{5, 3}});
+        this->exec, gko::batch_dim<2>(2, gko::dim<2>{5, 3}));
 
-    ASSERT_EQ(m->get_num_batch_entries(), 2);
+    ASSERT_EQ(m->get_num_batch_items(), 2);
     ASSERT_EQ(m->get_common_size(), gko::dim<2>(5, 3));
     ASSERT_EQ(m->get_num_stored_elements(), 30);
 }
@@ -223,7 +233,7 @@ TYPED_TEST(BatchDense, CanBeConstructedFromExistingConstData)
        6.0, -3.0};
     // clang-format on
 
-    auto m = gko::matrix::BatchDense<TypeParam>::create_const(
+    auto m = gko::batch::matrix::BatchDense<TypeParam>::create_const(
         this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 2)),
         gko::array<value_type>::const_view(this->exec, 8, data));
 
@@ -239,17 +249,19 @@ TYPED_TEST(BatchDense, CanBeConstructedFromExistingConstData)
 }
 
 
-TYPED_TEST(BatchDense, CanBeConstructedFromBatchDenseMatrices)
+TYPED_TEST(BatchDense, CanBeConstructedFromDenseMatrices)
 {
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
     using size_type = gko::size_type;
+
     auto mat1 = gko::initialize<DenseMtx>({{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
                                           this->exec);
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
 
-    auto m = gko::batch::multivector::create_from_dense(
+    auto m = gko::batch::create_from_item<
+        gko::batch::matrix::BatchDense<value_type>>(
         this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get()});
 
     this->assert_equal_to_original_mtx(m.get());
@@ -261,34 +273,45 @@ TYPED_TEST(BatchDense, CanBeConstructedFromDenseMatricesByDuplication)
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
     using size_type = gko::size_type;
+
     auto mat1 = gko::initialize<DenseMtx>(
         4, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, this->exec);
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
 
-    auto bat_m = gko::matrix::BatchDense<TypeParam>::create(
+    auto bat_m = gko::batch::create_from_item<
+        gko::batch::matrix::BatchDense<value_type>>(
         this->exec, std::vector<DenseMtx*>{mat1.get(), mat1.get(), mat1.get()});
-    auto m =
-        gko::matrix::BatchDense<TypeParam>::create(this->exec, 3, mat1.get());
+    auto m = gko::batch::create_from_item<
+        gko::batch::matrix::BatchDense<value_type>>(this->exec, 3, mat1.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(bat_m.get(), m.get(), 1e-14);
 }
 
 
-TYPED_TEST(BatchDense, CanBeConstructedFromDenseMatrices)
+TYPED_TEST(BatchDense, CanBeConstructedByDuplicatingBatchDenseMatrices)
 {
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
     using size_type = gko::size_type;
-    auto mat1 = gko::initialize<DenseMtx>(
-        4, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, this->exec);
+
+    auto mat1 = gko::initialize<DenseMtx>({{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
+                                          this->exec);
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
 
-    auto m = gko::matrix::BatchDense<TypeParam>::create(
+    auto m = gko::batch::create_from_item<
+        gko::batch::matrix::BatchDense<value_type>>(
         this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get()});
+    auto m_ref = gko::batch::create_from_item<
+        gko::batch::matrix::BatchDense<value_type>>(
+        this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get(), mat1.get(),
+                                           mat2.get(), mat1.get(), mat2.get()});
 
-    this->assert_equal_to_original_mtx(m.get());
+    auto m2 = gko::batch::duplicate<gko::batch::matrix::BatchDense<value_type>>(
+        this->exec, 3, m.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 1e-14);
 }
 
 
@@ -302,30 +325,23 @@ TYPED_TEST(BatchDense, CanBeUnbatchedIntoDenseMatrices)
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
 
-    auto dense_mats = this->mtx->unbatch();
-
+    auto dense_mats =
+        gko::batch::unbatch<gko::batch::matrix::BatchDense<value_type>>(
+            this->mtx.get());
 
     GKO_ASSERT_MTX_NEAR(dense_mats[0].get(), mat1.get(), 0.);
     GKO_ASSERT_MTX_NEAR(dense_mats[1].get(), mat2.get(), 0.);
 }
 
 
-TYPED_TEST(BatchDense, KnowsItsSizeAndValues)
-{
-    this->assert_equal_to_original_mtx(this->mtx.get());
-}
-
-
 TYPED_TEST(BatchDense, CanBeListConstructed)
 {
     using value_type = typename TestFixture::value_type;
-    auto m = gko::batch_initialize<gko::matrix::BatchDense<TypeParam>>(
+    auto m = gko::batch::initialize<gko::batch::matrix::BatchDense<TypeParam>>(
         {{1.0, 2.0}, {1.0, 3.0}}, this->exec);
 
-    ASSERT_EQ(m->get_num_batch_entries(), 2);
-    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 1));
-    ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 1));
-    ASSERT_EQ(m->get_num_stored_elements(), 4);
+    ASSERT_EQ(m->get_num_batch_items(), 2);
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 1));
     EXPECT_EQ(m->at(0, 0), value_type{1});
     EXPECT_EQ(m->at(0, 1), value_type{2});
     EXPECT_EQ(m->at(1, 0), value_type{1});
@@ -336,12 +352,12 @@ TYPED_TEST(BatchDense, CanBeListConstructed)
 TYPED_TEST(BatchDense, CanBeListConstructedByCopies)
 {
     using value_type = typename TestFixture::value_type;
-    auto m = gko::batch_initialize<gko::matrix::BatchDense<TypeParam>>(
+
+    auto m = gko::batch::initialize<gko::batch::matrix::BatchDense<TypeParam>>(
         2, I<value_type>({1.0, 2.0}), this->exec);
-    ASSERT_EQ(m->get_num_batch_entries(), 2);
-    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 1));
-    ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 1));
-    ASSERT_EQ(m->get_num_stored_elements(), 4);
+
+    ASSERT_EQ(m->get_num_batch_items(), 2);
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 1));
     EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
     EXPECT_EQ(m->at(0, 0, 1), value_type{2.0});
     EXPECT_EQ(m->at(1, 0, 0), value_type{1.0});
@@ -353,18 +369,13 @@ TYPED_TEST(BatchDense, CanBeDoubleListConstructed)
 {
     using value_type = typename TestFixture::value_type;
     using T = value_type;
-    auto m = gko::batch_initialize<gko::matrix::BatchDense<TypeParam>>(
+
+    auto m = gko::batch::initialize<gko::batch::matrix::BatchDense<TypeParam>>(
         {{I<T>{1.0, 1.0, 0.0}, I<T>{2.0, 4.0, 3.0}, I<T>{3.0, 6.0, 1.0}},
-         {I<T>{1.0, 2.0}, I<T>{3.0, 4.0}, I<T>{5.0, 6.0}}},
+         {I<T>{1.0, 2.0, -1.0}, I<T>{3.0, 4.0, -2.0}, I<T>{5.0, 6.0, -3.0}}},
         this->exec);
 
-    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(3, 3));
-    ASSERT_EQ(m->get_size().at(1), gko::dim<2>(3, 2));
-    ASSERT_EQ(m->get_stride().at(0), 3);
-    ASSERT_EQ(m->get_stride().at(1), 2);
-    EXPECT_EQ(m->get_num_stored_elements(), 15);
-    ASSERT_EQ(m->get_num_stored_elements(0), 9);
-    ASSERT_EQ(m->get_num_stored_elements(1), 6);
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(3, 3));
     EXPECT_EQ(m->at(0, 0), value_type{1.0});
     EXPECT_EQ(m->at(0, 1), value_type{1.0});
     EXPECT_EQ(m->at(0, 2), value_type{0.0});
@@ -372,72 +383,58 @@ TYPED_TEST(BatchDense, CanBeDoubleListConstructed)
     EXPECT_EQ(m->at(0, 4), value_type{4.0});
     EXPECT_EQ(m->at(1, 0), value_type{1.0});
     EXPECT_EQ(m->at(1, 1), value_type{2.0});
-    EXPECT_EQ(m->at(1, 2), value_type{3.0});
-    ASSERT_EQ(m->at(1, 3), value_type{4.0});
-    EXPECT_EQ(m->at(1, 4), value_type{5.0});
+    EXPECT_EQ(m->at(1, 2), value_type{-1.0});
+    ASSERT_EQ(m->at(1, 3), value_type{3.0});
+    EXPECT_EQ(m->at(1, 4), value_type{4.0});
 }
 
 
-TYPED_TEST(BatchDense, CanBeDoubleListConstructedWithstride)
+TYPED_TEST(BatchDense, CanBeReadFromMatrixData)
 {
     using value_type = typename TestFixture::value_type;
-    using T = value_type;
-    auto m = gko::batch_initialize<gko::matrix::BatchDense<TypeParam>>(
-        {4, 3},
-        {{I<T>{1.0, 1.0, 0.0}, I<T>{2.0, 4.0, 3.0}, I<T>{3.0, 6.0, 1.0}},
-         {I<T>{1.0, 2.0}, I<T>{3.0, 4.0}, I<T>{5.0, 6.0}}},
-        this->exec);
+    using index_type = int;
 
-    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(3, 3));
-    ASSERT_EQ(m->get_size().at(1), gko::dim<2>(3, 2));
-    ASSERT_EQ(m->get_stride().at(0), 4);
-    ASSERT_EQ(m->get_stride().at(1), 3);
-    EXPECT_EQ(m->get_num_stored_elements(), 21);
-    ASSERT_EQ(m->get_num_stored_elements(0), 12);
-    ASSERT_EQ(m->get_num_stored_elements(1), 9);
-    EXPECT_EQ(m->at(0, 0), value_type{1.0});
-    EXPECT_EQ(m->at(0, 1), value_type{1.0});
-    EXPECT_EQ(m->at(0, 2), value_type{0.0});
-    ASSERT_EQ(m->at(0, 3), value_type{2.0});
-    EXPECT_EQ(m->at(0, 4), value_type{4.0});
-    EXPECT_EQ(m->at(1, 0), value_type{1.0});
-    EXPECT_EQ(m->at(1, 1), value_type{2.0});
-    EXPECT_EQ(m->at(1, 2), value_type{3.0});
-    ASSERT_EQ(m->at(1, 3), value_type{4.0});
-    EXPECT_EQ(m->at(1, 4), value_type{5.0});
+    auto vec_data = std::vector<gko::matrix_data<value_type, index_type>>{};
+    vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
+        {2, 2}, {{0, 0, 1.0}, {0, 1, 3.0}, {1, 0, 0.0}, {1, 1, 5.0}}));
+    vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
+        {2, 2}, {{0, 0, -1.0}, {0, 1, 0.5}, {1, 0, 0.0}, {1, 1, 9.0}}));
+
+    auto m = gko::batch::read<value_type, index_type,
+                              gko::batch::matrix::BatchDense<value_type>>(
+        this->exec, vec_data);
+
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2));
+    EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
+    EXPECT_EQ(m->at(0, 0, 1), value_type{3.0});
+    EXPECT_EQ(m->at(0, 1, 0), value_type{0.0});
+    EXPECT_EQ(m->at(0, 1, 1), value_type{5.0});
+    EXPECT_EQ(m->at(1, 0, 0), value_type{-1.0});
+    EXPECT_EQ(m->at(1, 0, 1), value_type{0.5});
+    EXPECT_EQ(m->at(1, 1, 0), value_type{0.0});
+    EXPECT_EQ(m->at(1, 1, 1), value_type{9.0});
 }
 
 
-TYPED_TEST(BatchDense, CanBeReadFromMatrixData)
+TYPED_TEST(BatchDense, CanBeReadFromSparseMatrixData)
 {
     using value_type = typename TestFixture::value_type;
-    auto m = gko::matrix::BatchDense<TypeParam>::create(this->exec);
-    // clang-format off
-    m->read({gko::matrix_data<TypeParam>{{2, 3},
-                                         {{0, 0, 1.0},
-                                          {0, 1, 3.0},
-                                          {0, 2, 2.0},
-                                          {1, 0, 0.0},
-                                          {1, 1, 5.0},
-                                          {1, 2, 0.0}}},
-             gko::matrix_data<TypeParam>{{2, 2},
-                                         {{0, 0, -1.0},
-                                          {0, 1, 0.5},
-                                          {1, 0, 0.0},
-                                          {1, 1, 9.0}}}});
-    // clang-format on
-
-    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3));
-    ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 2));
-    ASSERT_EQ(m->get_num_stored_elements(), 10);
-    ASSERT_EQ(m->get_num_stored_elements(0), 6);
-    ASSERT_EQ(m->get_num_stored_elements(1), 4);
+    using index_type = int;
+    auto vec_data = std::vector<gko::matrix_data<value_type, index_type>>{};
+    vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
+        {2, 2}, {{0, 0, 1.0}, {0, 1, 3.0}, {1, 1, 5.0}}));
+    vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
+        {2, 2}, {{0, 0, -1.0}, {0, 1, 0.5}, {1, 1, 9.0}}));
+
+    auto m = gko::batch::read<value_type, index_type,
+                              gko::batch::matrix::BatchDense<value_type>>(
+        this->exec, vec_data);
+
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2));
     EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
-    EXPECT_EQ(m->at(0, 1, 0), value_type{0.0});
     EXPECT_EQ(m->at(0, 0, 1), value_type{3.0});
+    EXPECT_EQ(m->at(0, 1, 0), value_type{0.0});
     EXPECT_EQ(m->at(0, 1, 1), value_type{5.0});
-    EXPECT_EQ(m->at(0, 0, 2), value_type{2.0});
-    EXPECT_EQ(m->at(0, 1, 2), value_type{0.0});
     EXPECT_EQ(m->at(1, 0, 0), value_type{-1.0});
     EXPECT_EQ(m->at(1, 0, 1), value_type{0.5});
     EXPECT_EQ(m->at(1, 1, 0), value_type{0.0});
@@ -448,10 +445,12 @@ TYPED_TEST(BatchDense, CanBeReadFromMatrixData)
 TYPED_TEST(BatchDense, GeneratesCorrectMatrixData)
 {
     using value_type = typename TestFixture::value_type;
+    using index_type = int;
     using tpl = typename gko::matrix_data<TypeParam>::nonzero_type;
-    std::vector<gko::matrix_data<TypeParam>> data;
 
-    this->mtx->write(data);
+    auto data = gko::batch::write<value_type, index_type,
+                                  gko::batch::matrix::BatchDense<value_type>>(
+        this->mtx.get());
 
     ASSERT_EQ(data[0].size, gko::dim<2>(2, 3));
     ASSERT_EQ(data[0].nonzeros.size(), 6);
@@ -470,37 +469,3 @@ TYPED_TEST(BatchDense, GeneratesCorrectMatrixData)
     EXPECT_EQ(data[1].nonzeros[4], tpl(1, 1, value_type{2.0}));
     EXPECT_EQ(data[1].nonzeros[5], tpl(1, 2, value_type{3.0}));
 }
-
-
-TYPED_TEST(BatchDense, CanBeReadFromMatrixAssemblyData)
-{
-    using value_type = typename TestFixture::value_type;
-    auto m = gko::matrix::BatchDense<TypeParam>::create(this->exec);
-    gko::matrix_assembly_data<TypeParam> data1(gko::dim<2>{2, 3});
-    data1.set_value(0, 0, 1.0);
-    data1.set_value(0, 1, 3.0);
-    data1.set_value(0, 2, 2.0);
-    data1.set_value(1, 0, 0.0);
-    data1.set_value(1, 1, 5.0);
-    data1.set_value(1, 2, 0.0);
-    gko::matrix_assembly_data<TypeParam> data2(gko::dim<2>{2, 1});
-    data2.set_value(0, 0, 2.0);
-    data2.set_value(1, 0, 5.0);
-    auto data = std::vector<gko::matrix_assembly_data<TypeParam>>{data1, data2};
-
-    m->read(data);
-
-    ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3));
-    ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 1));
-    ASSERT_EQ(m->get_num_stored_elements(), 8);
-    ASSERT_EQ(m->get_num_stored_elements(0), 6);
-    ASSERT_EQ(m->get_num_stored_elements(1), 2);
-    EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
-    EXPECT_EQ(m->at(0, 1, 0), value_type{0.0});
-    EXPECT_EQ(m->at(0, 0, 1), value_type{3.0});
-    EXPECT_EQ(m->at(0, 1, 1), value_type{5.0});
-    EXPECT_EQ(m->at(0, 0, 2), value_type{2.0});
-    ASSERT_EQ(m->at(0, 1, 2), value_type{0.0});
-    EXPECT_EQ(m->at(1, 0, 0), value_type{2.0});
-    EXPECT_EQ(m->at(1, 1, 0), value_type{5.0});
-}
diff --git a/core/test/utils/assertions.hpp b/core/test/utils/assertions.hpp
index 63ed1e5423a..40034883078 100644
--- a/core/test/utils/assertions.hpp
+++ b/core/test/utils/assertions.hpp
@@ -720,12 +720,8 @@ ::testing::AssertionResult batch_matrices_near(
     using value_type1 = typename Mat1::value_type;
     using value_type2 = typename Mat2::value_type;
 
-    auto first_data =
-        gko::batch::write<value_type1, int,
-                          gko::batch::MultiVector<value_type1>>(first);
-    auto second_data =
-        gko::batch::write<value_type2, int,
-                          gko::batch::MultiVector<value_type2>>(second);
+    auto first_data = gko::batch::write<value_type1, int, Mat1>(first);
+    auto second_data = gko::batch::write<value_type2, int, Mat2>(second);
 
     if (first_data.size() != second_data.size()) {
         return ::testing::AssertionFailure()
diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp
index 1b36cd64869..55a1791a2a5 100644
--- a/include/ginkgo/core/matrix/batch_dense.hpp
+++ b/include/ginkgo/core/matrix/batch_dense.hpp
@@ -124,46 +124,75 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
         size_type item_id) const;
 
     /**
-     * Returns the batch size.
+     * Returns a pointer to the array of values of the multi-vector
      *
-     * @return the batch size
+     * @return the pointer to the array of values
      */
-    batch_dim<2> get_size() const { return batch_size_; }
+    value_type* get_values() noexcept { return values_.get_data(); }
 
     /**
-     * Returns the number of batch items.
+     * @copydoc get_values()
      *
-     * @return the number of batch items
+     * @note This is the constant version of the function, which can be
+     *       significantly more memory efficient than the non-constant version,
+     *       so always prefer this version.
      */
-    size_type get_num_batch_items() const
+    const value_type* get_const_values() const noexcept
     {
-        return batch_size_.get_num_batch_items();
+        return values_.get_const_data();
     }
 
     /**
-     * Returns the common size of the batch items.
+     * Returns a single element for a particular batch item.
+     *
+     * @param batch_id  the batch item index to be queried
+     * @param row  the row of the requested element
+     * @param col  the column of the requested element
      *
-     * @return the common size stored
+     * @note  the method has to be called on the same Executor the vector is
+     *        stored at (e.g. trying to call this method on a GPU multi-vector
+     *        from the OMP results in a runtime error)
      */
-    dim<2> get_common_size() const { return batch_size_.get_common_size(); }
+    value_type& at(size_type batch_id, size_type row, size_type col)
+    {
+        GKO_ASSERT(batch_id < this->get_num_batch_items());
+        return values_.get_data()[linearize_index(batch_id, row, col)];
+    }
 
     /**
-     * Returns a pointer to the array of values of the multi-vector
-     *
-     * @return the pointer to the array of values
+     * @copydoc MultiVector::at(size_type, size_type, size_type)
      */
-    value_type* get_values() noexcept { return values_.get_data(); }
+    value_type at(size_type batch_id, size_type row, size_type col) const
+    {
+        GKO_ASSERT(batch_id < this->get_num_batch_items());
+        return values_.get_const_data()[linearize_index(batch_id, row, col)];
+    }
 
     /**
-     * @copydoc get_values()
+     * Returns a single element for a particular batch item.
      *
-     * @note This is the constant version of the function, which can be
-     *       significantly more memory efficient than the non-constant version,
-     *       so always prefer this version.
+     * Useful for iterating across all elements of the vector.
+     * However, it is less efficient than the two-parameter variant of this
+     * method.
+     *
+     * @param batch_id  the batch item index to be queried
+     * @param idx  a linear index of the requested element
+     *
+     * @note  the method has to be called on the same Executor the vector is
+     *        stored at (e.g. trying to call this method on a GPU multi-vector
+     *        from the OMP results in a runtime error)
      */
-    const value_type* get_const_values() const noexcept
+    ValueType& at(size_type batch_id, size_type idx) noexcept
     {
-        return values_.get_const_data();
+        return values_.get_data()[linearize_index(batch_id, idx)];
+    }
+
+    /**
+     * @copydoc MultiVector::at(size_type, size_type, size_type)
+     */
+    ValueType at(size_type batch_id, size_type idx) const noexcept
+    {
+        return values_.get_const_data()[linearize_index(batch_id, idx)];
     }
 
     /**
@@ -225,12 +254,6 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
         std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
         gko::detail::const_array_view<ValueType>&& values);
 
-private:
-    inline size_type compute_num_elems(const batch_dim<2>& size)
-    {
-        return size.get_cumulative_offset(size.get_num_batch_items());
-    }
-
 
     void apply(const MultiVector<value_type>* b,
                MultiVector<value_type>* x) const
@@ -246,14 +269,13 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
         this->apply_impl(alpha, b, beta, x);
     }
 
-protected:
-    /**
-     * Sets the size of the MultiVector<value_type>.
-     *
-     * @param value  the new size of the operator
-     */
-    void set_size(const batch_dim<2>& value) noexcept;
+private:
+    inline size_type compute_num_elems(const batch_dim<2>& size)
+    {
+        return size.get_cumulative_offset(size.get_num_batch_items());
+    }
 
+protected:
     /**
      * Creates an uninitialized BatchDense matrix of the specified size.
      *
@@ -310,8 +332,8 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
     size_type linearize_index(size_type batch, size_type row,
                               size_type col) const noexcept
     {
-        return batch_size_.get_cumulative_offset(batch) +
-               row * batch_size_.get_common_size()[1] + col;
+        return this->get_size().get_cumulative_offset(batch) +
+               row * this->get_size().get_common_size()[1] + col;
     }
 
     size_type linearize_index(size_type batch, size_type idx) const noexcept
@@ -321,7 +343,6 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
     }
 
 private:
-    batch_dim<2> batch_size_;
     array<value_type> values_;
 };
 

From 12c652056dfcf0f49f0a66e78456afe3dfca6f00 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 3 Oct 2023 16:18:47 +0200
Subject: [PATCH 347/583] Add reference kernel tests

---
 core/matrix/batch_dense.cpp                   |  18 +-
 reference/test/matrix/CMakeLists.txt          |   1 +
 reference/test/matrix/batch_dense_kernels.cpp | 219 ++++++++++++++++++
 3 files changed, 230 insertions(+), 8 deletions(-)
 create mode 100644 reference/test/matrix/batch_dense_kernels.cpp

diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp
index f5d255d901c..c9da010c228 100644
--- a/core/matrix/batch_dense.cpp
+++ b/core/matrix/batch_dense.cpp
@@ -168,11 +168,12 @@ template <typename ValueType>
 void BatchDense<ValueType>::apply_impl(const MultiVector<ValueType>* b,
                                        MultiVector<ValueType>* x) const
 {
-    GKO_ASSERT_EQUAL_DIMENSIONS(b->get_common_size(), x->get_common_size());
     GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items());
-    GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size());
     GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items());
-    GKO_ASSERT_CONFORMANT(this->get_common_size(), x->get_common_size());
+
+    GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size());
+    GKO_ASSERT_EQUAL_ROWS(this->get_common_size(), x->get_common_size());
+    GKO_ASSERT_EQUAL_COLS(b->get_common_size(), x->get_common_size());
     this->get_executor()->run(dense::make_simple_apply(this, b, x));
 }
 
@@ -183,13 +184,14 @@ void BatchDense<ValueType>::apply_impl(const MultiVector<ValueType>* alpha,
                                        const MultiVector<ValueType>* beta,
                                        MultiVector<ValueType>* x) const
 {
-    GKO_ASSERT_EQUAL_DIMENSIONS(b->get_common_size(), x->get_common_size());
     GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items());
-    GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size());
     GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items());
-    GKO_ASSERT_CONFORMANT(this->get_common_size(), x->get_common_size());
-    GKO_ASSERT_EQUAL_COLS(alpha->get_common_size(), gko::dim<2>(1, 1));
-    GKO_ASSERT_EQUAL_COLS(beta->get_common_size(), gko::dim<2>(1, 1));
+
+    GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size());
+    GKO_ASSERT_EQUAL_ROWS(this->get_common_size(), x->get_common_size());
+    GKO_ASSERT_EQUAL_COLS(b->get_common_size(), x->get_common_size());
+    GKO_ASSERT_EQUAL_DIMENSIONS(alpha->get_common_size(), gko::dim<2>(1, 1));
+    GKO_ASSERT_EQUAL_DIMENSIONS(beta->get_common_size(), gko::dim<2>(1, 1));
     this->get_executor()->run(
         dense::make_advanced_apply(alpha, this, b, beta, x));
 }
diff --git a/reference/test/matrix/CMakeLists.txt b/reference/test/matrix/CMakeLists.txt
index 9670a5df80c..18634de662d 100644
--- a/reference/test/matrix/CMakeLists.txt
+++ b/reference/test/matrix/CMakeLists.txt
@@ -1,3 +1,4 @@
+ginkgo_create_test(batch_dense_kernels)
 ginkgo_create_test(coo_kernels)
 ginkgo_create_test(csr_kernels)
 ginkgo_create_test(dense_kernels)
diff --git a/reference/test/matrix/batch_dense_kernels.cpp b/reference/test/matrix/batch_dense_kernels.cpp
new file mode 100644
index 00000000000..7bf11ba70f9
--- /dev/null
+++ b/reference/test/matrix/batch_dense_kernels.cpp
@@ -0,0 +1,219 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/matrix/batch_dense.hpp>
+
+
+#include <complex>
+#include <memory>
+#include <random>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/exception.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/matrix/dense.hpp>
+
+
+#include "core/matrix/batch_dense_kernels.hpp"
+#include "core/test/utils.hpp"
+
+
+template <typename T>
+class BatchDense : public ::testing::Test {
+protected:
+    using value_type = T;
+    using size_type = gko::size_type;
+    using Mtx = gko::batch::matrix::BatchDense<value_type>;
+    using MVec = gko::batch::MultiVector<value_type>;
+    using DenseMtx = gko::matrix::Dense<value_type>;
+    using ComplexMtx = gko::to_complex<Mtx>;
+    using RealMtx = gko::remove_complex<Mtx>;
+    BatchDense()
+        : exec(gko::ReferenceExecutor::create()),
+          mtx_0(gko::batch::initialize<Mtx>(
+              {{I<T>({1.0, -1.0, 1.5}), I<T>({-2.0, 2.0, 3.0})},
+               {{1.0, -2.0, -0.5}, {1.0, -2.5, 4.0}}},
+              exec)),
+          mtx_00(gko::initialize<DenseMtx>(
+              {I<T>({1.0, -1.0, 1.5}), I<T>({-2.0, 2.0, 3.0})}, exec)),
+          mtx_01(gko::initialize<DenseMtx>(
+              {I<T>({1.0, -2.0, -0.5}), I<T>({1.0, -2.5, 4.0})}, exec)),
+          b_0(gko::batch::initialize<MVec>(
+              {{I<T>({1.0, 0.0, 1.0}), I<T>({2.0, 0.0, 1.0}),
+                I<T>({1.0, 0.0, 2.0})},
+               {I<T>({-1.0, 1.0, 1.0}), I<T>({1.0, -1.0, 1.0}),
+                I<T>({1.0, 0.0, 2.0})}},
+              exec)),
+          b_00(gko::initialize<DenseMtx>(
+              {I<T>({1.0, 0.0, 1.0}), I<T>({2.0, 0.0, 1.0}),
+               I<T>({1.0, 0.0, 2.0})},
+              exec)),
+          b_01(gko::initialize<DenseMtx>(
+              {I<T>({-1.0, 1.0, 1.0}), I<T>({1.0, -1.0, 1.0}),
+               I<T>({1.0, 0.0, 2.0})},
+              exec)),
+          x_0(gko::batch::initialize<MVec>(
+              {{I<T>({2.0, 0.0, 1.0}), I<T>({2.0, 0.0, 2.0})},
+               {I<T>({-2.0, 1.0, 1.0}), I<T>({1.0, -1.0, -1.0})}},
+              exec)),
+          x_00(gko::initialize<DenseMtx>(
+              {I<T>({2.0, 0.0, 1.0}), I<T>({2.0, 0.0, 2.0})}, exec)),
+          x_01(gko::initialize<DenseMtx>(
+              {I<T>({-2.0, 1.0, 1.0}), I<T>({1.0, -1.0, -1.0})}, exec))
+    {}
+
+    std::shared_ptr<const gko::ReferenceExecutor> exec;
+    std::unique_ptr<Mtx> mtx_0;
+    std::unique_ptr<DenseMtx> mtx_00;
+    std::unique_ptr<DenseMtx> mtx_01;
+    std::unique_ptr<MVec> b_0;
+    std::unique_ptr<DenseMtx> b_00;
+    std::unique_ptr<DenseMtx> b_01;
+    std::unique_ptr<MVec> x_0;
+    std::unique_ptr<DenseMtx> x_00;
+    std::unique_ptr<DenseMtx> x_01;
+
+    std::ranlux48 rand_engine;
+};
+
+
+TYPED_TEST_SUITE(BatchDense, gko::test::ValueTypes);
+
+
+TYPED_TEST(BatchDense, AppliesToBatchMultiVector)
+{
+    using T = typename TestFixture::value_type;
+
+    this->mtx_0->apply(this->b_0.get(), this->x_0.get());
+    this->mtx_00->apply(this->b_00.get(), this->x_00.get());
+    this->mtx_01->apply(this->b_01.get(), this->x_01.get());
+
+    auto res = gko::batch::unbatch<gko::batch::MultiVector<T>>(this->x_0.get());
+
+    GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.);
+    GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.);
+}
+
+
+TYPED_TEST(BatchDense, AppliesLinearCombinationToBatchMultiVector)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using MVec = typename TestFixture::MVec;
+    using DenseMtx = typename TestFixture::DenseMtx;
+    using T = typename TestFixture::value_type;
+    auto alpha = gko::batch::initialize<MVec>({{1.5}, {-1.0}}, this->exec);
+    auto beta = gko::batch::initialize<MVec>({{2.5}, {-4.0}}, this->exec);
+    auto alpha0 = gko::initialize<DenseMtx>({1.5}, this->exec);
+    auto alpha1 = gko::initialize<DenseMtx>({-1.0}, this->exec);
+    auto beta0 = gko::initialize<DenseMtx>({2.5}, this->exec);
+    auto beta1 = gko::initialize<DenseMtx>({-4.0}, this->exec);
+
+    this->mtx_0->apply(alpha.get(), this->b_0.get(), beta.get(),
+                       this->x_0.get());
+    this->mtx_00->apply(alpha0.get(), this->b_00.get(), beta0.get(),
+                        this->x_00.get());
+    this->mtx_01->apply(alpha1.get(), this->b_01.get(), beta1.get(),
+                        this->x_01.get());
+
+    auto res = gko::batch::unbatch<gko::batch::MultiVector<T>>(this->x_0.get());
+
+    GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.);
+    GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.);
+}
+
+
+TYPED_TEST(BatchDense, ApplyFailsOnWrongNumberOfResultCols)
+{
+    using MVec = typename TestFixture::MVec;
+    auto res = MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2}});
+
+    ASSERT_THROW(this->mtx_0->apply(this->b_0.get(), res.get()),
+                 gko::DimensionMismatch);
+}
+
+
+TYPED_TEST(BatchDense, ApplyFailsOnWrongNumberOfResultRows)
+{
+    using MVec = typename TestFixture::MVec;
+    auto res = MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3}});
+
+    ASSERT_THROW(this->mtx_0->apply(this->b_0.get(), res.get()),
+                 gko::DimensionMismatch);
+}
+
+
+TYPED_TEST(BatchDense, ApplyFailsOnWrongInnerDimension)
+{
+    using MVec = typename TestFixture::MVec;
+    auto res =
+        MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}});
+
+    ASSERT_THROW(this->mtx_0->apply(res.get(), this->x_0.get()),
+                 gko::DimensionMismatch);
+}
+
+
+TYPED_TEST(BatchDense, AdvancedApplyFailsOnWrongInnerDimension)
+{
+    using MVec = typename TestFixture::MVec;
+    auto res =
+        MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}});
+    auto alpha =
+        MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}});
+    auto beta =
+        MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}});
+
+    ASSERT_THROW(
+        this->mtx_0->apply(alpha.get(), res.get(), beta.get(), this->x_0.get()),
+        gko::DimensionMismatch);
+}
+
+
+TYPED_TEST(BatchDense, AdvancedApplyFailsOnWrongAlphaDimension)
+{
+    using MVec = typename TestFixture::MVec;
+    auto res =
+        MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3, 3}});
+    auto alpha =
+        MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 1}});
+    auto beta =
+        MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}});
+
+    ASSERT_THROW(
+        this->mtx_0->apply(alpha.get(), res.get(), beta.get(), this->x_0.get()),
+        gko::DimensionMismatch);
+}

From 1b14a262935a267e30a8d24017786e8376a9e5da Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 3 Oct 2023 17:56:16 +0200
Subject: [PATCH 348/583] Add OMP tests and fix kernel

---
 core/matrix/batch_struct.hpp                  |  14 +-
 cuda/matrix/batch_dense_kernels.cu            |   2 +-
 hip/matrix/batch_dense_kernels.hip.cpp        |   2 +-
 omp/matrix/batch_dense_kernels.cpp            |  38 ++----
 reference/matrix/batch_dense_kernels.cpp      |  32 ++---
 reference/matrix/batch_dense_kernels.hpp.inc  |   4 +-
 reference/test/matrix/batch_dense_kernels.cpp |  27 ++++
 test/matrix/CMakeLists.txt                    |   1 +
 test/matrix/batch_dense_kernels.cpp           | 129 ++++++++++++++++++
 9 files changed, 188 insertions(+), 61 deletions(-)
 create mode 100644 test/matrix/batch_dense_kernels.cpp

diff --git a/core/matrix/batch_struct.hpp b/core/matrix/batch_struct.hpp
index b6926b0894d..37c297bb6b5 100644
--- a/core/matrix/batch_struct.hpp
+++ b/core/matrix/batch_struct.hpp
@@ -54,7 +54,7 @@ struct batch_item {
     ValueType* values;
     int stride;
     int num_rows;
-    int num_rhs;
+    int num_cols;
 };
 
 
@@ -70,7 +70,7 @@ struct uniform_batch {
     size_type num_batch_items;
     int stride;
     int num_rows;
-    int num_rhs;
+    int num_cols;
 
     size_type get_entry_storage() const
     {
@@ -86,7 +86,7 @@ template <typename ValueType>
 GKO_ATTRIBUTES GKO_INLINE batch_dense::batch_item<const ValueType> to_const(
     const batch_dense::batch_item<ValueType>& b)
 {
-    return {b.values, b.stride, b.num_rows, b.num_rhs};
+    return {b.values, b.stride, b.num_rows, b.num_cols};
 }
 
 
@@ -94,7 +94,7 @@ template <typename ValueType>
 GKO_ATTRIBUTES GKO_INLINE batch_dense::uniform_batch<const ValueType> to_const(
     const batch_dense::uniform_batch<ValueType>& ub)
 {
-    return {ub.values, ub.num_batch_items, ub.stride, ub.num_rows, ub.num_rhs};
+    return {ub.values, ub.num_batch_items, ub.stride, ub.num_rows, ub.num_cols};
 }
 
 
@@ -104,16 +104,16 @@ GKO_ATTRIBUTES GKO_INLINE batch_dense::batch_item<ValueType> extract_batch_item(
     const size_type batch_idx)
 {
     return {batch.values + batch_idx * batch.stride * batch.num_rows,
-            batch.stride, batch.num_rows, batch.num_rhs};
+            batch.stride, batch.num_rows, batch.num_cols};
 }
 
 template <typename ValueType>
 GKO_ATTRIBUTES GKO_INLINE batch_dense::batch_item<ValueType> extract_batch_item(
     ValueType* const batch_values, const int stride, const int num_rows,
-    const int num_rhs, const size_type batch_idx)
+    const int num_cols, const size_type batch_idx)
 {
     return {batch_values + batch_idx * stride * num_rows, stride, num_rows,
-            num_rhs};
+            num_cols};
 }
 
 
diff --git a/cuda/matrix/batch_dense_kernels.cu b/cuda/matrix/batch_dense_kernels.cu
index 4615af581f5..c0a172fd026 100644
--- a/cuda/matrix/batch_dense_kernels.cu
+++ b/cuda/matrix/batch_dense_kernels.cu
@@ -45,7 +45,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "cuda/components/reduction.cuh"
 #include "cuda/components/thread_ids.cuh"
 #include "cuda/components/uninitialized_array.hpp"
-#include "cuda/matrix/batch_struct.hpp"
+// #include "cuda/matrix/batch_struct.hip.hpp"
 
 
 namespace gko {
diff --git a/hip/matrix/batch_dense_kernels.hip.cpp b/hip/matrix/batch_dense_kernels.hip.cpp
index 93570388d50..06f0caf81ec 100644
--- a/hip/matrix/batch_dense_kernels.hip.cpp
+++ b/hip/matrix/batch_dense_kernels.hip.cpp
@@ -48,7 +48,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "hip/components/reduction.hip.hpp"
 #include "hip/components/thread_ids.hip.hpp"
 #include "hip/components/uninitialized_array.hip.hpp"
-#include "hip/matrix/batch_struct.hip.hpp"
+// #include "hip/matrix/batch_struct.hip.hpp"
 
 
 namespace gko {
diff --git a/omp/matrix/batch_dense_kernels.cpp b/omp/matrix/batch_dense_kernels.cpp
index fe742bee402..a767215c844 100644
--- a/omp/matrix/batch_dense_kernels.cpp
+++ b/omp/matrix/batch_dense_kernels.cpp
@@ -42,6 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "core/base/batch_struct.hpp"
 #include "core/matrix/batch_struct.hpp"
+#include "reference/base/batch_struct.hpp"
 #include "reference/matrix/batch_struct.hpp"
 
 
@@ -70,7 +71,7 @@ void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
     const auto mat_ub = host::get_batch_struct(mat);
 #pragma omp parallel for
     for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
-        const auto mat_item = batch::extract_batch_item(mat_ub, batch);
+        const auto mat_item = batch::matrix::extract_batch_item(mat_ub, batch);
         const auto b_item = batch::extract_batch_item(b_ub, batch);
         const auto x_item = batch::extract_batch_item(x_ub, batch);
         simple_apply_kernel(mat_item, b_item, x_item);
@@ -84,40 +85,25 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 template <typename ValueType>
 void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
                     const batch::MultiVector<ValueType>* alpha,
-                    const batch::matrix::BatchDense<ValueType>* a,
+                    const batch::matrix::BatchDense<ValueType>* mat,
                     const batch::MultiVector<ValueType>* b,
                     const batch::MultiVector<ValueType>* beta,
-                    batch::MultiVector<ValueType>* c)
+                    batch::MultiVector<ValueType>* x)
 {
     const auto b_ub = host::get_batch_struct(b);
     const auto x_ub = host::get_batch_struct(x);
     const auto mat_ub = host::get_batch_struct(mat);
     const auto alpha_ub = host::get_batch_struct(alpha);
     const auto beta_ub = host::get_batch_struct(beta);
-    if (alpha->get_num_batch_items() > 1) {
-        GKO_ASSERT(alpha->get_num_batch_items() == x->get_num_batch_items());
-        GKO_ASSERT(beta->get_num_batch_items() == x->get_num_batch_items());
 #pragma omp parallel for
-        for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
-            const auto mat_item = batch::extract_batch_item(mat_ub, batch);
-            const auto b_item = batch::extract_batch_item(b_ub, batch);
-            const auto x_item = batch::extract_batch_item(x_ub, batch);
-            const auto alpha_item = batch::extract_batch_item(alpha_ub, batch);
-            const auto beta_item = batch::extract_batch_item(beta_ub, batch);
-            advanced_apply_kernel(alpha_item.values[0], mat_item, b_item,
-                                  beta_item.values[0], x_item);
-        }
-    } else {
-        const auto alpha_item = batch::extract_batch_item(alpha_ub, 0);
-        const auto beta_item = batch::extract_batch_item(beta_ub, 0);
-#pragma omp parallel for
-        for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
-            const auto mat_item = batch::extract_batch_item(mat_ub, batch);
-            const auto b_item = batch::extract_batch_item(b_ub, batch);
-            const auto x_item = batch::extract_batch_item(x_ub, batch);
-            advanced_apply_kernel(alpha_item.values[0], mat_item, b_item,
-                                  beta_item.values[0], x_item);
-        }
+    for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
+        const auto mat_item = batch::matrix::extract_batch_item(mat_ub, batch);
+        const auto b_item = batch::extract_batch_item(b_ub, batch);
+        const auto x_item = batch::extract_batch_item(x_ub, batch);
+        const auto alpha_item = batch::extract_batch_item(alpha_ub, batch);
+        const auto beta_item = batch::extract_batch_item(beta_ub, batch);
+        advanced_apply_kernel(alpha_item.values[0], mat_item, b_item,
+                              beta_item.values[0], x_item);
     }
 }
 
diff --git a/reference/matrix/batch_dense_kernels.cpp b/reference/matrix/batch_dense_kernels.cpp
index bb5f3e18df7..f42d9a81d1f 100644
--- a/reference/matrix/batch_dense_kernels.cpp
+++ b/reference/matrix/batch_dense_kernels.cpp
@@ -95,30 +95,14 @@ void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
     const auto mat_ub = host::get_batch_struct(mat);
     const auto alpha_ub = host::get_batch_struct(alpha);
     const auto beta_ub = host::get_batch_struct(beta);
-    if (alpha->get_num_batch_items() > 1) {
-        GKO_ASSERT(alpha->get_num_batch_items() == x->get_num_batch_items());
-        GKO_ASSERT(beta->get_num_batch_items() == x->get_num_batch_items());
-        for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
-            const auto mat_item =
-                batch::matrix::extract_batch_item(mat_ub, batch);
-            const auto b_item = batch::extract_batch_item(b_ub, batch);
-            const auto x_item = batch::extract_batch_item(x_ub, batch);
-            const auto alpha_item = batch::extract_batch_item(alpha_ub, batch);
-            const auto beta_item = batch::extract_batch_item(beta_ub, batch);
-            advanced_apply_kernel(alpha_item.values[0], mat_item, b_item,
-                                  beta_item.values[0], x_item);
-        }
-    } else {
-        const auto alpha_item = batch::extract_batch_item(alpha_ub, 0);
-        const auto beta_item = batch::extract_batch_item(beta_ub, 0);
-        for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
-            const auto mat_item =
-                batch::matrix::extract_batch_item(mat_ub, batch);
-            const auto b_item = batch::extract_batch_item(b_ub, batch);
-            const auto x_item = batch::extract_batch_item(x_ub, batch);
-            advanced_apply_kernel(alpha_item.values[0], mat_item, b_item,
-                                  beta_item.values[0], x_item);
-        }
+    for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
+        const auto mat_item = batch::matrix::extract_batch_item(mat_ub, batch);
+        const auto b_item = batch::extract_batch_item(b_ub, batch);
+        const auto x_item = batch::extract_batch_item(x_ub, batch);
+        const auto alpha_item = batch::extract_batch_item(alpha_ub, batch);
+        const auto beta_item = batch::extract_batch_item(beta_ub, batch);
+        advanced_apply_kernel(alpha_item.values[0], mat_item, b_item,
+                              beta_item.values[0], x_item);
     }
 }
 
diff --git a/reference/matrix/batch_dense_kernels.hpp.inc b/reference/matrix/batch_dense_kernels.hpp.inc
index d45183b2faa..bff9ad137cf 100644
--- a/reference/matrix/batch_dense_kernels.hpp.inc
+++ b/reference/matrix/batch_dense_kernels.hpp.inc
@@ -43,7 +43,7 @@ inline void simple_apply_kernel(
     }
 
     for (int row = 0; row < c.num_rows; ++row) {
-        for (int inner = 0; inner < a.num_rhs; ++inner) {
+        for (int inner = 0; inner < a.num_cols; ++inner) {
             for (int col = 0; col < c.num_rhs; ++col) {
                 c.values[row * c.stride + col] +=
                     a.values[row * a.stride + inner] *
@@ -77,7 +77,7 @@ inline void advanced_apply_kernel(
     }
 
     for (int row = 0; row < c.num_rows; ++row) {
-        for (int inner = 0; inner < a.num_rhs; ++inner) {
+        for (int inner = 0; inner < a.num_cols; ++inner) {
             for (int col = 0; col < c.num_rhs; ++col) {
                 c.values[row * c.stride + col] +=
                     alpha * a.values[row * a.stride + inner] *
diff --git a/reference/test/matrix/batch_dense_kernels.cpp b/reference/test/matrix/batch_dense_kernels.cpp
index 7bf11ba70f9..8e2e522e5f4 100644
--- a/reference/test/matrix/batch_dense_kernels.cpp
+++ b/reference/test/matrix/batch_dense_kernels.cpp
@@ -129,6 +129,33 @@ TYPED_TEST(BatchDense, AppliesToBatchMultiVector)
 }
 
 
+TYPED_TEST(BatchDense, AppliesLinearCombinationWithSameAlphaToBatchMultiVector)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using MVec = typename TestFixture::MVec;
+    using DenseMtx = typename TestFixture::DenseMtx;
+    using T = typename TestFixture::value_type;
+    auto alpha = gko::batch::initialize<MVec>(2, {1.5}, this->exec);
+    auto beta = gko::batch::initialize<MVec>(2, {-4.0}, this->exec);
+    auto alpha0 = gko::initialize<DenseMtx>({1.5}, this->exec);
+    auto alpha1 = gko::initialize<DenseMtx>({1.5}, this->exec);
+    auto beta0 = gko::initialize<DenseMtx>({-4.0}, this->exec);
+    auto beta1 = gko::initialize<DenseMtx>({-4.0}, this->exec);
+
+    this->mtx_0->apply(alpha.get(), this->b_0.get(), beta.get(),
+                       this->x_0.get());
+    this->mtx_00->apply(alpha0.get(), this->b_00.get(), beta0.get(),
+                        this->x_00.get());
+    this->mtx_01->apply(alpha1.get(), this->b_01.get(), beta1.get(),
+                        this->x_01.get());
+
+    auto res = gko::batch::unbatch<gko::batch::MultiVector<T>>(this->x_0.get());
+
+    GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.);
+    GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.);
+}
+
+
 TYPED_TEST(BatchDense, AppliesLinearCombinationToBatchMultiVector)
 {
     using Mtx = typename TestFixture::Mtx;
diff --git a/test/matrix/CMakeLists.txt b/test/matrix/CMakeLists.txt
index a9cf267a3c8..91987f3717f 100644
--- a/test/matrix/CMakeLists.txt
+++ b/test/matrix/CMakeLists.txt
@@ -1,3 +1,4 @@
+ginkgo_create_common_test(batch_dense_kernels DISABLE_EXECUTORS dpcpp hip cuda)
 ginkgo_create_common_device_test(csr_kernels)
 ginkgo_create_common_test(csr_kernels2)
 ginkgo_create_common_test(coo_kernels)
diff --git a/test/matrix/batch_dense_kernels.cpp b/test/matrix/batch_dense_kernels.cpp
new file mode 100644
index 00000000000..60ef4d61a95
--- /dev/null
+++ b/test/matrix/batch_dense_kernels.cpp
@@ -0,0 +1,129 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/matrix/batch_dense.hpp>
+
+
+#include <memory>
+#include <random>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/math.hpp>
+
+
+#include "core/base/batch_utilities.hpp"
+#include "core/matrix/batch_dense_kernels.hpp"
+#include "core/test/utils.hpp"
+#include "core/test/utils/assertions.hpp"
+#include "core/test/utils/batch_helpers.hpp"
+#include "test/utils/executor.hpp"
+
+
+class BatchDense : public CommonTestFixture {
+protected:
+    using vtype = double;
+    using Mtx = gko::batch::matrix::BatchDense<vtype>;
+    using MVec = gko::batch::MultiVector<vtype>;
+
+    BatchDense() : rand_engine(15) {}
+
+    template <typename MtxType>
+    std::unique_ptr<MtxType> gen_mtx(const gko::size_type num_batch_items,
+                                     gko::size_type num_rows,
+                                     gko::size_type num_cols)
+    {
+        return gko::test::generate_random_batch_matrix<MtxType>(
+            num_batch_items, num_rows, num_cols,
+            std::uniform_int_distribution<>(num_cols, num_cols),
+            std::normal_distribution<>(-1.0, 1.0), rand_engine, ref);
+    }
+
+    void set_up_apply_data(gko::size_type num_vecs = 1)
+    {
+        const int num_rows = 252;
+        const int num_cols = 32;
+        x = gen_mtx<Mtx>(batch_size, num_rows, num_cols);
+        y = gen_mtx<MVec>(batch_size, num_cols, num_vecs);
+        alpha = gen_mtx<MVec>(batch_size, 1, 1);
+        beta = gen_mtx<MVec>(batch_size, 1, 1);
+        dx = gko::clone(exec, x);
+        dy = gko::clone(exec, y);
+        dalpha = gko::clone(exec, alpha);
+        dbeta = gko::clone(exec, beta);
+        expected = MVec::create(
+            ref,
+            gko::batch_dim<2>(batch_size, gko::dim<2>{num_rows, num_vecs}));
+        expected->fill(gko::one<vtype>());
+        dresult = gko::clone(exec, expected);
+    }
+
+    std::ranlux48 rand_engine;
+
+    const size_t batch_size = 11;
+    std::unique_ptr<Mtx> x;
+    std::unique_ptr<MVec> y;
+    std::unique_ptr<MVec> alpha;
+    std::unique_ptr<MVec> beta;
+    std::unique_ptr<MVec> expected;
+    std::unique_ptr<MVec> dresult;
+    std::unique_ptr<Mtx> dx;
+    std::unique_ptr<MVec> dy;
+    std::unique_ptr<MVec> dalpha;
+    std::unique_ptr<MVec> dbeta;
+};
+
+
+TEST_F(BatchDense, SingleVectorApplyIsEquivalentToRef)
+{
+    set_up_apply_data(1);
+
+    x->apply(y.get(), expected.get());
+    dx->apply(dy.get(), dresult.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, 1e-14);
+}
+
+
+TEST_F(BatchDense, SingleVectorAdvancedApplyIsEquivalentToRef)
+{
+    set_up_apply_data(1);
+
+    x->apply(alpha.get(), y.get(), beta.get(), expected.get());
+    dx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, 1e-14);
+}

From 3273d69a756dde47447e59fb20040f85c9efe581 Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Wed, 4 Oct 2023 09:41:28 +0000
Subject: [PATCH 349/583] Format files

Co-authored-by: Pratik Nayak <pratikvn@pm.me>
---
 include/ginkgo/core/base/batch_multi_vector.hpp | 4 ++--
 include/ginkgo/core/matrix/batch_dense.hpp      | 4 ++--
 include/ginkgo/ginkgo.hpp                       | 1 +
 test/matrix/batch_dense_kernels.cpp             | 4 ++--
 4 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index 43f35e55f62..6b3b207c76c 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -213,8 +213,8 @@ class MultiVector
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const value_type* get_const_values_for_item(size_type batch_id) const
-        noexcept
+    const value_type* get_const_values_for_item(
+        size_type batch_id) const noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
         return values_.get_const_data() +
diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp
index 55a1791a2a5..0457f444c5a 100644
--- a/include/ginkgo/core/matrix/batch_dense.hpp
+++ b/include/ginkgo/core/matrix/batch_dense.hpp
@@ -217,8 +217,8 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const value_type* get_const_values_for_item(size_type batch_id) const
-        noexcept
+    const value_type* get_const_values_for_item(
+        size_type batch_id) const noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
         return values_.get_const_data() +
diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp
index aed3b5f3572..8bb29242e88 100644
--- a/include/ginkgo/ginkgo.hpp
+++ b/include/ginkgo/ginkgo.hpp
@@ -108,6 +108,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/log/record.hpp>
 #include <ginkgo/core/log/stream.hpp>
 
+#include <ginkgo/core/matrix/batch_dense.hpp>
 #include <ginkgo/core/matrix/coo.hpp>
 #include <ginkgo/core/matrix/csr.hpp>
 #include <ginkgo/core/matrix/dense.hpp>
diff --git a/test/matrix/batch_dense_kernels.cpp b/test/matrix/batch_dense_kernels.cpp
index 60ef4d61a95..7d44f29899c 100644
--- a/test/matrix/batch_dense_kernels.cpp
+++ b/test/matrix/batch_dense_kernels.cpp
@@ -30,7 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include <ginkgo/core/matrix/batch_dense.hpp>
+#include "core/matrix/batch_dense_kernels.hpp"
 
 
 #include <memory>
@@ -43,10 +43,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/array.hpp>
 #include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/matrix/batch_dense.hpp>
 
 
 #include "core/base/batch_utilities.hpp"
-#include "core/matrix/batch_dense_kernels.hpp"
 #include "core/test/utils.hpp"
 #include "core/test/utils/assertions.hpp"
 #include "core/test/utils/batch_helpers.hpp"

From 579b3e702c26e0f58d6f0dddfab7d42ffd171a3f Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Wed, 4 Oct 2023 11:49:01 +0200
Subject: [PATCH 350/583] circ dep and typo fixes

---
 core/test/matrix/batch_dense.cpp  | 2 +-
 reference/matrix/batch_struct.hpp | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/core/test/matrix/batch_dense.cpp b/core/test/matrix/batch_dense.cpp
index f9210550bea..02788e14b7d 100644
--- a/core/test/matrix/batch_dense.cpp
+++ b/core/test/matrix/batch_dense.cpp
@@ -153,7 +153,7 @@ TYPED_TEST(BatchDense, CanBeMoved)
     auto mtx_copy =
         gko::batch::matrix::BatchDense<TypeParam>::create(this->exec);
 
-    mtx_copy->copy_from(std::move(this->mtx));
+    this->mtx->move_to(mtx_copy);
 
     this->assert_equal_to_original_mtx(mtx_copy.get());
 }
diff --git a/reference/matrix/batch_struct.hpp b/reference/matrix/batch_struct.hpp
index 1bed5a4e5c9..dee7c71948a 100644
--- a/reference/matrix/batch_struct.hpp
+++ b/reference/matrix/batch_struct.hpp
@@ -36,9 +36,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/matrix/batch_dense.hpp>
 
 
 #include "core/base/batch_struct.hpp"
+#include "core/matrix/batch_struct.hpp"
 
 
 namespace gko {

From e97d62824cc9eb85be7aa65751d7118e38d810c3 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Wed, 4 Oct 2023 15:04:36 +0200
Subject: [PATCH 351/583] Add CUDA, HIP kernels and tests

Co-authored-by: Aditya Kashi <kashi1@ornl.gov>
Co-authored-by: Isha Aggarwal <isha.aggarwal2@kit.edu>
---
 .../batch_dense_kernel_launcher.hpp.inc       |  78 ++++++++
 .../matrix/batch_dense_kernels.hpp.inc        | 170 ++++++++++++++++++
 cuda/base/batch_multi_vector_kernels.cu       |   1 +
 cuda/matrix/batch_dense_kernels.cu            |  26 +--
 cuda/matrix/batch_struct.hpp                  |  96 ++++++++++
 hip/matrix/batch_dense_kernels.hip.cpp        |  28 +--
 hip/matrix/batch_struct.hip.hpp               |  96 ++++++++++
 test/matrix/CMakeLists.txt                    |   2 +-
 8 files changed, 459 insertions(+), 38 deletions(-)
 create mode 100644 common/cuda_hip/matrix/batch_dense_kernel_launcher.hpp.inc
 create mode 100644 common/cuda_hip/matrix/batch_dense_kernels.hpp.inc
 create mode 100644 cuda/matrix/batch_struct.hpp
 create mode 100644 hip/matrix/batch_struct.hip.hpp

diff --git a/common/cuda_hip/matrix/batch_dense_kernel_launcher.hpp.inc b/common/cuda_hip/matrix/batch_dense_kernel_launcher.hpp.inc
new file mode 100644
index 00000000000..668b0278680
--- /dev/null
+++ b/common/cuda_hip/matrix/batch_dense_kernel_launcher.hpp.inc
@@ -0,0 +1,78 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+
+template <typename ValueType>
+void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
+                  const batch::matrix::BatchDense<ValueType>* mat,
+                  const batch::MultiVector<ValueType>* b,
+                  batch::MultiVector<ValueType>* x)
+{
+    const auto num_blocks = mat->get_num_batch_items();
+    const auto b_ub = get_batch_struct(b);
+    const auto x_ub = get_batch_struct(x);
+    const auto mat_ub = get_batch_struct(mat);
+    if (b->get_common_size()[1] > 1) {
+        GKO_NOT_IMPLEMENTED;
+    }
+    simple_apply_kernel<<<num_blocks, default_block_size, 0,
+                          exec->get_stream()>>>(mat_ub, b_ub, x_ub);
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL);
+
+
+template <typename ValueType>
+void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
+                    const batch::MultiVector<ValueType>* alpha,
+                    const batch::matrix::BatchDense<ValueType>* mat,
+                    const batch::MultiVector<ValueType>* b,
+                    const batch::MultiVector<ValueType>* beta,
+                    batch::MultiVector<ValueType>* x)
+{
+    const auto num_blocks = mat->get_num_batch_items();
+    const auto b_ub = get_batch_struct(b);
+    const auto x_ub = get_batch_struct(x);
+    const auto mat_ub = get_batch_struct(mat);
+    const auto alpha_ub = get_batch_struct(alpha);
+    const auto beta_ub = get_batch_struct(beta);
+    if (b->get_common_size()[1] > 1) {
+        GKO_NOT_IMPLEMENTED;
+    }
+    advanced_apply_kernel<<<num_blocks, default_block_size, 0,
+                            exec->get_stream()>>>(alpha_ub, mat_ub, b_ub,
+                                                  beta_ub, x_ub);
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
+    GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL);
diff --git a/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc b/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc
new file mode 100644
index 00000000000..43046166abc
--- /dev/null
+++ b/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc
@@ -0,0 +1,170 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+
+template <typename ValueType>
+__device__ __forceinline__ void simple_apply(
+    const gko::batch::matrix::batch_dense::batch_item<const ValueType>& mat,
+    const ValueType* const __restrict__ b, ValueType* const __restrict__ x)
+{
+    constexpr auto tile_size = config::warp_size;
+
+    auto thread_block = group::this_thread_block();
+    auto subwarp_grp = group::tiled_partition<tile_size>(thread_block);
+    const auto subwarp_grp_id = static_cast<int>(threadIdx.x / tile_size);
+    const int num_subwarp_grps_per_block = ceildiv(blockDim.x, tile_size);
+
+    for (int row = subwarp_grp_id; row < mat.num_rows;
+         row += num_subwarp_grps_per_block) {
+        ValueType temp = zero<ValueType>();
+        for (int j = subwarp_grp.thread_rank(); j < mat.num_cols;
+             j += subwarp_grp.size()) {
+            const ValueType val = mat.values[row * mat.stride + j];
+            temp += val * b[j];
+        }
+
+#pragma unroll
+        for (int i = static_cast<int>(tile_size) / 2; i > 0; i /= 2) {
+            temp += subwarp_grp.shfl_down(temp, i);
+        }
+
+        if (subwarp_grp.thread_rank() == 0) {
+            x[row] = temp;
+        }
+    }
+}
+
+template <typename ValueType>
+__global__ __launch_bounds__(
+    default_block_size,
+    sm_oversubscription) void simple_apply_kernel(const gko::batch::matrix::
+                                                      batch_dense::
+                                                          uniform_batch<
+                                                              const ValueType>
+                                                              mat,
+                                                  const gko::batch::
+                                                      multi_vector::
+                                                          uniform_batch<
+                                                              const ValueType>
+                                                              b,
+                                                  const gko::batch::
+                                                      multi_vector::
+                                                          uniform_batch<
+                                                              ValueType>
+                                                              x)
+{
+    for (size_type batch_id = blockIdx.x; batch_id < mat.num_batch_items;
+         batch_id += gridDim.x) {
+        const auto mat_b =
+            gko::batch::matrix::extract_batch_item(mat, batch_id);
+        const auto b_b = gko::batch::extract_batch_item(b, batch_id);
+        const auto x_b = gko::batch::extract_batch_item(x, batch_id);
+        simple_apply(mat_b, b_b.values, x_b.values);
+    }
+}
+
+
+template <typename ValueType>
+__device__ __forceinline__ void advanced_apply(
+    const ValueType alpha,
+    const gko::batch::matrix::batch_dense::batch_item<const ValueType>& mat,
+    const ValueType* const __restrict__ b, const ValueType beta,
+    ValueType* const __restrict__ x)
+{
+    constexpr auto tile_size = config::warp_size;
+
+    auto thread_block = group::this_thread_block();
+    auto subwarp_grp = group::tiled_partition<tile_size>(thread_block);
+    const auto subwarp_grp_id = static_cast<int>(threadIdx.x / tile_size);
+    const int num_subwarp_grps_per_block = ceildiv(blockDim.x, tile_size);
+
+    for (int row = subwarp_grp_id; row < mat.num_rows;
+         row += num_subwarp_grps_per_block) {
+        ValueType temp = zero<ValueType>();
+        for (int j = subwarp_grp.thread_rank(); j < mat.num_cols;
+             j += subwarp_grp.size()) {
+            const ValueType val = mat.values[row * mat.stride + j];
+            temp += alpha * val * b[j];
+        }
+
+#pragma unroll
+        for (int i = static_cast<int>(tile_size) / 2; i > 0; i /= 2) {
+            temp += subwarp_grp.shfl_down(temp, i);
+        }
+
+        if (subwarp_grp.thread_rank() == 0) {
+            x[row] = temp + beta * x[row];
+        }
+    }
+}
+
+template <typename ValueType>
+__global__ __launch_bounds__(
+    default_block_size,
+    sm_oversubscription) void advanced_apply_kernel(const gko::batch::
+                                                        multi_vector::
+                                                            uniform_batch<
+                                                                const ValueType>
+                                                                alpha,
+                                                    const gko::batch::matrix::
+                                                        batch_dense::
+                                                            uniform_batch<
+                                                                const ValueType>
+                                                                mat,
+                                                    const gko::batch::
+                                                        multi_vector::
+                                                            uniform_batch<
+                                                                const ValueType>
+                                                                b,
+                                                    const gko::batch::
+                                                        multi_vector::
+                                                            uniform_batch<
+                                                                const ValueType>
+                                                                beta,
+                                                    const gko::batch::
+                                                        multi_vector::
+                                                            uniform_batch<
+                                                                ValueType>
+                                                                x)
+{
+    for (size_type batch_id = blockIdx.x; batch_id < mat.num_batch_items;
+         batch_id += gridDim.x) {
+        const auto mat_b =
+            gko::batch::matrix::extract_batch_item(mat, batch_id);
+        const auto b_b = gko::batch::extract_batch_item(b, batch_id);
+        const auto x_b = gko::batch::extract_batch_item(x, batch_id);
+        const auto alpha_b = gko::batch::extract_batch_item(alpha, batch_id);
+        const auto beta_b = gko::batch::extract_batch_item(beta, batch_id);
+        advanced_apply(alpha_b.values[0], mat_b, b_b.values, beta_b.values[0],
+                       x_b.values);
+    }
+}
diff --git a/cuda/base/batch_multi_vector_kernels.cu b/cuda/base/batch_multi_vector_kernels.cu
index 7729d006b75..5c4d1f5bdc5 100644
--- a/cuda/base/batch_multi_vector_kernels.cu
+++ b/cuda/base/batch_multi_vector_kernels.cu
@@ -78,6 +78,7 @@ constexpr int sm_oversubscription = 4;
 
 // clang-format on
 
+
 }  // namespace batch_multi_vector
 }  // namespace cuda
 }  // namespace kernels
diff --git a/cuda/matrix/batch_dense_kernels.cu b/cuda/matrix/batch_dense_kernels.cu
index c0a172fd026..9d9cfcf6c8e 100644
--- a/cuda/matrix/batch_dense_kernels.cu
+++ b/cuda/matrix/batch_dense_kernels.cu
@@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "core/base/batch_struct.hpp"
 #include "core/matrix/batch_struct.hpp"
+#include "cuda/base/batch_struct.hpp"
 #include "cuda/base/config.hpp"
 #include "cuda/base/cublas_bindings.hpp"
 #include "cuda/base/pointer_mode_guard.hpp"
@@ -45,7 +46,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "cuda/components/reduction.cuh"
 #include "cuda/components/thread_ids.cuh"
 #include "cuda/components/uninitialized_array.hpp"
-// #include "cuda/matrix/batch_struct.hip.hpp"
+#include "cuda/matrix/batch_struct.hpp"
 
 
 namespace gko {
@@ -60,29 +61,18 @@ namespace batch_dense {
 
 
 constexpr auto default_block_size = 256;
-constexpr int sm_multiplier = 4;
+constexpr int sm_oversubscription = 4;
 
+// clang-format off
 
-template <typename ValueType>
-void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
-                  const batch::matrix::BatchDense<ValueType>* mat,
-                  const batch::MultiVector<ValueType>* b,
-                  batch::MultiVector<ValueType>* x) GKO_NOT_IMPLEMENTED;
+// NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL);
+#include "common/cuda_hip/matrix/batch_dense_kernels.hpp.inc"
 
 
-template <typename ValueType>
-void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
-                    const batch::MultiVector<ValueType>* alpha,
-                    const batch::matrix::BatchDense<ValueType>* a,
-                    const batch::MultiVector<ValueType>* b,
-                    const batch::MultiVector<ValueType>* beta,
-                    batch::MultiVector<ValueType>* c) GKO_NOT_IMPLEMENTED;
+#include "common/cuda_hip/matrix/batch_dense_kernel_launcher.hpp.inc"
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL);
+// clang-format on
 
 
 }  // namespace batch_dense
diff --git a/cuda/matrix/batch_struct.hpp b/cuda/matrix/batch_struct.hpp
new file mode 100644
index 00000000000..202eb91a366
--- /dev/null
+++ b/cuda/matrix/batch_struct.hpp
@@ -0,0 +1,96 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CUDA_MATRIX_BATCH_STRUCT_HPP_
+#define GKO_CUDA_MATRIX_BATCH_STRUCT_HPP_
+
+
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/matrix/batch_dense.hpp>
+
+
+#include "core/base/batch_struct.hpp"
+#include "core/matrix/batch_struct.hpp"
+#include "cuda/base/config.hpp"
+#include "cuda/base/types.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace cuda {
+
+
+/** @file batch_struct.hpp
+ *
+ * Helper functions to generate a batch struct from a batch LinOp,
+ * while also shallow-casting to the required CUDA scalar type.
+ *
+ * A specialization is needed for every format of every kind of linear algebra
+ * object. These are intended to be called on the host.
+ */
+
+
+/**
+ * Generates an immutable uniform batch struct from a batch of multi-vectors.
+ */
+template <typename ValueType>
+inline batch::matrix::batch_dense::uniform_batch<const cuda_type<ValueType>>
+get_batch_struct(const batch::matrix::BatchDense<ValueType>* const op)
+{
+    return {as_cuda_type(op->get_const_values()), op->get_num_batch_items(),
+            static_cast<int>(op->get_common_size()[1]),
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[1])};
+}
+
+
+/**
+ * Generates a uniform batch struct from a batch of multi-vectors.
+ */
+template <typename ValueType>
+inline batch::matrix::batch_dense::uniform_batch<cuda_type<ValueType>>
+get_batch_struct(batch::matrix::BatchDense<ValueType>* const op)
+{
+    return {as_cuda_type(op->get_values()), op->get_num_batch_items(),
+            static_cast<int>(op->get_common_size()[1]),
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[1])};
+}
+
+
+}  // namespace cuda
+}  // namespace kernels
+}  // namespace gko
+
+
+#endif  // GKO_CUDA_MATRIX_BATCH_STRUCT_HPP_
diff --git a/hip/matrix/batch_dense_kernels.hip.cpp b/hip/matrix/batch_dense_kernels.hip.cpp
index 06f0caf81ec..51f2237826b 100644
--- a/hip/matrix/batch_dense_kernels.hip.cpp
+++ b/hip/matrix/batch_dense_kernels.hip.cpp
@@ -40,7 +40,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/range_accessors.hpp>
 
 
+#include "core/base/batch_struct.hpp"
 #include "core/matrix/batch_struct.hpp"
+#include "hip/base/batch_struct.hpp"
 #include "hip/base/config.hip.hpp"
 #include "hip/base/hipblas_bindings.hip.hpp"
 #include "hip/base/pointer_mode_guard.hip.hpp"
@@ -48,7 +50,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "hip/components/reduction.hip.hpp"
 #include "hip/components/thread_ids.hip.hpp"
 #include "hip/components/uninitialized_array.hip.hpp"
-// #include "hip/matrix/batch_struct.hip.hpp"
+#include "hip/matrix/batch_struct.hip.hpp"
 
 
 namespace gko {
@@ -63,30 +65,18 @@ namespace batch_dense {
 
 
 constexpr auto default_block_size = 256;
-constexpr int sm_multiplier = 4;
+constexpr int sm_oversubscription = 4;
 
+// clang-format off
 
-template <typename ValueType>
-void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
-                  const batch::matrix::BatchDense<ValueType>* mat,
-                  const batch::MultiVector<ValueType>* b,
-                  batch::MultiVector<ValueType>* x) GKO_NOT_IMPLEMENTED;
+// NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL);
+#include "common/cuda_hip/matrix/batch_dense_kernels.hpp.inc"
 
 
-template <typename ValueType>
-void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
-                    const batch::MultiVector<ValueType>* alpha,
-                    const batch::matrix::BatchDense<ValueType>* a,
-                    const batch::MultiVector<ValueType>* b,
-                    const batch::MultiVector<ValueType>* beta,
-                    batch::MultiVector<ValueType>* c) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
-    GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL);
+#include "common/cuda_hip/matrix/batch_dense_kernel_launcher.hpp.inc"
 
+// clang-format on
 
 }  // namespace batch_dense
 }  // namespace hip
diff --git a/hip/matrix/batch_struct.hip.hpp b/hip/matrix/batch_struct.hip.hpp
new file mode 100644
index 00000000000..0d5dfb46a1b
--- /dev/null
+++ b/hip/matrix/batch_struct.hip.hpp
@@ -0,0 +1,96 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_HIP_MATRIX_BATCH_STRUCT_HPP_
+#define GKO_HIP_MATRIX_BATCH_STRUCT_HPP_
+
+
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/matrix/batch_dense.hpp>
+
+
+#include "core/base/batch_struct.hpp"
+#include "core/matrix/batch_struct.hpp"
+#include "hip/base/config.hpp"
+#include "hip/base/types.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace hip {
+
+
+/** @file batch_struct.hpp
+ *
+ * Helper functions to generate a batch struct from a batch LinOp,
+ * while also shallow-casting to the required HIP scalar type.
+ *
+ * A specialization is needed for every format of every kind of linear algebra
+ * object. These are intended to be called on the host.
+ */
+
+
+/**
+ * Generates an immutable uniform batch struct from a batch of multi-vectors.
+ */
+template <typename ValueType>
+inline batch::matrix::batch_dense::uniform_batch<const hip_type<ValueType>>
+get_batch_struct(const batch::matrix::BatchDense<ValueType>* const op)
+{
+    return {as_hip_type(op->get_const_values()), op->get_num_batch_items(),
+            static_cast<int>(op->get_common_size()[1]),
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[1])};
+}
+
+
+/**
+ * Generates a uniform batch struct from a batch of multi-vectors.
+ */
+template <typename ValueType>
+inline batch::matrix::batch_dense::uniform_batch<hip_type<ValueType>>
+get_batch_struct(batch::matrix::BatchDense<ValueType>* const op)
+{
+    return {as_hip_type(op->get_values()), op->get_num_batch_items(),
+            static_cast<int>(op->get_common_size()[1]),
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[1])};
+}
+
+
+}  // namespace hip
+}  // namespace kernels
+}  // namespace gko
+
+
+#endif  // GKO_HIP_MATRIX_BATCH_STRUCT_HPP_
diff --git a/test/matrix/CMakeLists.txt b/test/matrix/CMakeLists.txt
index 91987f3717f..a89abfb4ae4 100644
--- a/test/matrix/CMakeLists.txt
+++ b/test/matrix/CMakeLists.txt
@@ -1,4 +1,4 @@
-ginkgo_create_common_test(batch_dense_kernels DISABLE_EXECUTORS dpcpp hip cuda)
+ginkgo_create_common_test(batch_dense_kernels DISABLE_EXECUTORS dpcpp)
 ginkgo_create_common_device_test(csr_kernels)
 ginkgo_create_common_test(csr_kernels2)
 ginkgo_create_common_test(coo_kernels)

From 0d0b1191039424f0a3b92419e855f9e2a162b28f Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Wed, 4 Oct 2023 15:36:19 +0200
Subject: [PATCH 352/583] Add SYCL kernels and tests WIP

Co-authored-by: Phuong Nguyen <phuong.nguyen@icl.utk.edu>
---
 .../matrix/batch_dense_kernels.hpp.inc        | 12 +--
 dpcpp/base/batch_multi_vector_kernels.dp.cpp  | 55 +++++------
 dpcpp/matrix/batch_dense_kernels.dp.cpp       | 99 ++++++++++++++++++-
 dpcpp/matrix/batch_dense_kernels.hpp.inc      | 91 +++++++++++++++++
 dpcpp/matrix/batch_struct.hpp                 | 94 ++++++++++++++++++
 test/matrix/CMakeLists.txt                    |  2 +-
 6 files changed, 311 insertions(+), 42 deletions(-)
 create mode 100644 dpcpp/matrix/batch_dense_kernels.hpp.inc
 create mode 100644 dpcpp/matrix/batch_struct.hpp

diff --git a/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc b/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc
index 43046166abc..6cae08eadb5 100644
--- a/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc
+++ b/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc
@@ -52,10 +52,8 @@ __device__ __forceinline__ void simple_apply(
             temp += val * b[j];
         }
 
-#pragma unroll
-        for (int i = static_cast<int>(tile_size) / 2; i > 0; i /= 2) {
-            temp += subwarp_grp.shfl_down(temp, i);
-        }
+        // subgroup level reduction
+        temp = reduce(subgroup, temp, thrust::plus<ValueType>{});
 
         if (subwarp_grp.thread_rank() == 0) {
             x[row] = temp;
@@ -116,10 +114,8 @@ __device__ __forceinline__ void advanced_apply(
             temp += alpha * val * b[j];
         }
 
-#pragma unroll
-        for (int i = static_cast<int>(tile_size) / 2; i > 0; i /= 2) {
-            temp += subwarp_grp.shfl_down(temp, i);
-        }
+        // subgroup level reduction
+        temp = reduce(subgroup, temp, thrust::plus<ValueType>{});
 
         if (subwarp_grp.thread_rank() == 0) {
             x[row] = temp + beta * x[row];
diff --git a/dpcpp/base/batch_multi_vector_kernels.dp.cpp b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
index 10e47ba080e..12648b81e00 100644
--- a/dpcpp/base/batch_multi_vector_kernels.dp.cpp
+++ b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
@@ -37,11 +37,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/math.hpp>
-#include <ginkgo/core/base/range_accessors.hpp>
 
 
+#include "core/base/batch_struct.hpp"
 #include "core/components/prefix_sum_kernels.hpp"
 #include "dpcpp/base/batch_struct.hpp"
 #include "dpcpp/base/config.hpp"
@@ -193,9 +194,9 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
     // TODO: Remove reqd_sub_group size and use sycl::reduce_over_group
     exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                config::warp_size)]] {
+            sycl_nd_range(grid, block), [=
+        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                            config::warp_size)]] {
                 auto group = item_ct1.get_group();
                 auto group_id = group.get_group_linear_id();
                 const auto x_b = batch::extract_batch_item(x_ub, group_id);
@@ -231,19 +232,18 @@ void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
 
     exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1)
-                [[sycl::reqd_sub_group_size(config::warp_size)]] {
-                    auto group = item_ct1.get_group();
-                    auto group_id = group.get_group_linear_id();
-                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                    const auto y_b = batch::extract_batch_item(y_ub, group_id);
-                    const auto res_b =
-                        batch::extract_batch_item(res_ub, group_id);
-                    compute_gen_dot_product_kernel(
-                        x_b, y_b, res_b, item_ct1,
-                        [](auto val) { return conj(val); });
-                });
+            sycl_nd_range(grid, block), [=
+        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                            config::warp_size)]] {
+                auto group = item_ct1.get_group();
+                auto group_id = group.get_group_linear_id();
+                const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                const auto y_b = batch::extract_batch_item(y_ub, group_id);
+                const auto res_b = batch::extract_batch_item(res_ub, group_id);
+                compute_gen_dot_product_kernel(
+                    x_b, y_b, res_b, item_ct1,
+                    [](auto val) { return conj(val); });
+            });
     });
 }
 
@@ -268,17 +268,16 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
     const dim3 grid(num_batches);
 
     exec->get_queue()->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(sycl_nd_range(grid, block),
-                         [=](sycl::nd_item<3> item_ct1)
-                             [[sycl::reqd_sub_group_size(config::warp_size)]] {
-                                 auto group = item_ct1.get_group();
-                                 auto group_id = group.get_group_linear_id();
-                                 const auto x_b =
-                                     batch::extract_batch_item(x_ub, group_id);
-                                 const auto res_b = batch::extract_batch_item(
-                                     res_ub, group_id);
-                                 compute_norm2_kernel(x_b, res_b, item_ct1);
-                             });
+        cgh.parallel_for(
+            sycl_nd_range(grid, block), [=
+        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                            config::warp_size)]] {
+                auto group = item_ct1.get_group();
+                auto group_id = group.get_group_linear_id();
+                const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                const auto res_b = batch::extract_batch_item(res_ub, group_id);
+                compute_norm2_kernel(x_b, res_b, item_ct1);
+            });
     });
 }
 
diff --git a/dpcpp/matrix/batch_dense_kernels.dp.cpp b/dpcpp/matrix/batch_dense_kernels.dp.cpp
index 964bf094077..118d46d81a5 100644
--- a/dpcpp/matrix/batch_dense_kernels.dp.cpp
+++ b/dpcpp/matrix/batch_dense_kernels.dp.cpp
@@ -40,8 +40,24 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/math.hpp>
-#include <ginkgo/core/base/range_accessors.hpp>
+#include <ginkgo/core/matrix/batch_dense.hpp>
+
+
+#include "core/base/batch_struct.hpp"
+#include "core/components/prefix_sum_kernels.hpp"
+#include "core/matrix/batch_struct.hpp"
+#include "dpcpp/base/batch_struct.hpp"
+#include "dpcpp/base/config.hpp"
+#include "dpcpp/base/dim3.dp.hpp"
+#include "dpcpp/base/dpct.hpp"
+#include "dpcpp/base/helper.hpp"
+#include "dpcpp/components/cooperative_groups.dp.hpp"
+#include "dpcpp/components/intrinsics.dp.hpp"
+#include "dpcpp/components/reduction.dp.hpp"
+#include "dpcpp/components/thread_ids.dp.hpp"
+#include "dpcpp/matrix/batch_struct.hpp"
 
 
 namespace gko {
@@ -55,11 +71,46 @@ namespace dpcpp {
 namespace batch_dense {
 
 
+#include "dpcpp/matrix/batch_dense_kernels.hpp.inc"
+
+
 template <typename ValueType>
 void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
-                  const batch::matrix::BatchDense<ValueType>* a,
+                  const batch::matrix::BatchDense<ValueType>* mat,
                   const batch::MultiVector<ValueType>* b,
-                  batch::MultiVector<ValueType>* x) GKO_NOT_IMPLEMENTED;
+                  batch::MultiVector<ValueType>* x)
+{
+    const size_type num_rows = x->get_common_size()[0];
+    const size_type num_cols = x->get_common_size()[1];
+
+    const auto num_batch_items = x->get_num_batch_items();
+    auto device = exec->get_queue()->get_device();
+    auto group_size =
+        device.get_info<sycl::info::device::max_work_group_size>();
+
+    const dim3 block(group_size);
+    const dim3 grid(num_batch_items);
+    const auto x_ub = get_batch_struct(x);
+    const auto b_ub = get_batch_struct(b);
+    const auto mat_ub = get_batch_struct(mat);
+    if (b_ub.num_rhs > 1) {
+        GKO_NOT_IMPLEMENTED;
+    }
+
+    // Launch a kernel that has nbatches blocks, each block has max group size
+    (exec->get_queue())->submit([&](sycl::handler& cgh) {
+        cgh.parallel_for(
+            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+                auto group = item_ct1.get_group();
+                auto group_id = group.get_group_linear_id();
+                const auto mat_b =
+                    batch::matrix::extract_batch_item(mat_ub, group_id);
+                const auto b_b = batch::extract_batch_item(b_ub, group_id);
+                const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                simple_apply_kernel(mat_b, b_b.values, x_b.values, item_ct1);
+            });
+    });
+}
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
     GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL);
@@ -68,10 +119,48 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 template <typename ValueType>
 void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
                     const batch::MultiVector<ValueType>* alpha,
-                    const batch::matrix::BatchDense<ValueType>* a,
+                    const batch::matrix::BatchDense<ValueType>* mat,
                     const batch::MultiVector<ValueType>* b,
                     const batch::MultiVector<ValueType>* beta,
-                    batch::MultiVector<ValueType>* c) GKO_NOT_IMPLEMENTED;
+                    batch::MultiVector<ValueType>* x)
+{
+    const auto mat_ub = get_batch_struct(mat);
+    const auto b_ub = get_batch_struct(b);
+    const auto x_ub = get_batch_struct(x);
+    const auto alpha_ub = get_batch_struct(alpha);
+    const auto beta_ub = get_batch_struct(beta);
+
+    if (b_ub.num_rhs > 1) {
+        GKO_NOT_IMPLEMENTED;
+    }
+
+    const auto num_batch_items = mat_ub.num_batch_items;
+    auto device = exec->get_queue()->get_device();
+    auto group_size =
+        device.get_info<sycl::info::device::max_work_group_size>();
+
+    const dim3 block(group_size);
+    const dim3 grid(num_batch_items);
+
+    // Launch a kernel that has nbatches blocks, each block has max group size
+    (exec->get_queue())->submit([&](sycl::handler& cgh) {
+        cgh.parallel_for(
+            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+                auto group = item_ct1.get_group();
+                auto group_id = group.get_group_linear_id();
+                const auto mat_b =
+                    batch::matrix::extract_batch_item(mat_ub, group_id);
+                const auto b_b = batch::extract_batch_item(b_ub, group_id);
+                const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                const auto alpha_b =
+                    batch::extract_batch_item(alpha_ub, group_id);
+                const auto beta_b =
+                    batch::extract_batch_item(beta_ub, group_id);
+                advanced_apply_kernel(alpha_b.values[0], mat_b, b_b.values,
+                                      beta_b.values[0], x_b.values, item_ct1);
+            });
+    });
+}
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
     GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL);
diff --git a/dpcpp/matrix/batch_dense_kernels.hpp.inc b/dpcpp/matrix/batch_dense_kernels.hpp.inc
new file mode 100644
index 00000000000..ba528ac31a4
--- /dev/null
+++ b/dpcpp/matrix/batch_dense_kernels.hpp.inc
@@ -0,0 +1,91 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+template <typename ValueType>
+__dpct_inline__ void simple_apply_kernel(
+    const gko::batch::matrix::batch_dense::batch_item<const ValueType>& mat,
+    const ValueType* const __restrict__ b, ValueType* const __restrict__ x,
+    sycl::nd_item<3>& item_ct1)
+{
+    constexpr auto tile_size = config::warp_size;
+    auto subg =
+        group::tiled_partition<tile_size>(group::this_thread_block(item_ct1));
+    const auto subgroup = static_cast<sycl::sub_group>(subg);
+    const int subgroup_id = subgroup.get_group_id();
+    const int subgroup_size = subgroup.get_local_range().size();
+    const int num_subgroup = subgroup.get_group_range().size();
+
+    for (int row = subgroup_id; row < mat.num_rows; row += num_subgroup) {
+        ValueType temp = zero<ValueType>();
+        for (int j = subgroup.get_local_id(); j < mat.num_cols;
+             j += subgroup_size) {
+            const ValueType val = mat.values[row * mat.stride + j];
+            temp += val * b[j];
+        }
+        temp = ::gko::kernels::dpcpp::reduce(
+            subg, temp, [](ValueType v1, ValueType v2) { return v1 + v2; });
+        if (subgroup.get_local_id() == 0) {
+            x[row] = temp;
+        }
+    }
+}
+
+
+template <typename ValueType>
+__dpct_inline__ void advanced_apply_kernel(
+    const ValueType alpha,
+    const gko::batch::matrix::batch_dense::batch_item<const ValueType>& mat,
+    const ValueType* const __restrict__ b, const ValueType beta,
+    ValueType* const __restrict__ x, sycl::nd_item<3>& item_ct1)
+{
+    constexpr auto tile_size = config::warp_size;
+    auto subg =
+        group::tiled_partition<tile_size>(group::this_thread_block(item_ct1));
+    const auto subgroup = static_cast<sycl::sub_group>(subg);
+    const int subgroup_id = subgroup.get_group_id();
+    const int subgroup_size = subgroup.get_local_range().size();
+    const int num_subgroup = subgroup.get_group_range().size();
+
+    for (int row = subgroup_id; row < mat.num_rows; row += num_subgroup) {
+        ValueType temp = zero<ValueType>();
+        for (int j = subgroup.get_local_id(); j < mat.num_cols;
+             j += subgroup_size) {
+            const ValueType val = mat.values[row * mat.stride + j];
+            temp += alpha * val * b[j];
+        }
+        temp = ::gko::kernels::dpcpp::reduce(
+            subg, temp, [](ValueType v1, ValueType v2) { return v1 + v2; });
+        if (subgroup.get_local_id() == 0) {
+            x[row] = temp + beta * x[row];
+        }
+    }
+}
diff --git a/dpcpp/matrix/batch_struct.hpp b/dpcpp/matrix/batch_struct.hpp
new file mode 100644
index 00000000000..dd8c1bbbab6
--- /dev/null
+++ b/dpcpp/matrix/batch_struct.hpp
@@ -0,0 +1,94 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_DPCPP_MATRIX_BATCH_STRUCT_HPP_
+#define GKO_DPCPP_MATRIX_BATCH_STRUCT_HPP_
+
+
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/math.hpp>
+
+
+#include "core/base/batch_struct.hpp"
+#include "core/matrix/batch_struct.hpp"
+#include "dpcpp/base/config.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace dpcpp {
+
+
+/** @file batch_struct.hpp
+ *
+ * Helper functions to generate a batch struct from a batch LinOp,
+ * while also shallow-casting to the required DPCPP scalar type.
+ *
+ * A specialization is needed for every format of every kind of linear algebra
+ * object. These are intended to be called on the host.
+ */
+
+
+/**
+ * Generates an immutable uniform batch struct from a batch of multi-vectors.
+ */
+template <typename ValueType>
+inline batch::matrix::batch_dense::uniform_batch<const ValueType>
+get_batch_struct(const batch::matrix::BatchDense<ValueType>* const op)
+{
+    return {op->get_const_values(), op->get_num_batch_items(),
+            static_cast<int>(op->get_common_size()[1]),
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[1])};
+}
+
+
+/**
+ * Generates a uniform batch struct from a batch of multi-vectors.
+ */
+template <typename ValueType>
+inline batch::matrix::batch_dense::uniform_batch<ValueType> get_batch_struct(
+    batch::matrix::BatchDense<ValueType>* const op)
+{
+    return {op->get_values(), op->get_num_batch_items(),
+            static_cast<int>(op->get_common_size()[1]),
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[1])};
+}
+
+
+}  // namespace dpcpp
+}  // namespace kernels
+}  // namespace gko
+
+
+#endif  // GKO_DPCPP_MATRIX_BATCH_STRUCT_HPP_
diff --git a/test/matrix/CMakeLists.txt b/test/matrix/CMakeLists.txt
index a89abfb4ae4..9f3b17cd858 100644
--- a/test/matrix/CMakeLists.txt
+++ b/test/matrix/CMakeLists.txt
@@ -1,4 +1,4 @@
-ginkgo_create_common_test(batch_dense_kernels DISABLE_EXECUTORS dpcpp)
+ginkgo_create_common_test(batch_dense_kernels)
 ginkgo_create_common_device_test(csr_kernels)
 ginkgo_create_common_test(csr_kernels2)
 ginkgo_create_common_test(coo_kernels)

From f4b9b869063229544690c6fc99e46ae1bfe75e8a Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Thu, 5 Oct 2023 13:38:18 +0200
Subject: [PATCH 353/583] HIP and CUDA thrust fixes

---
 .../matrix/batch_dense_kernels.hpp.inc        | 32 +++++++++----------
 cuda/matrix/batch_dense_kernels.cu            |  5 +++
 hip/matrix/batch_dense_kernels.hip.cpp        |  5 ++-
 3 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc b/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc
index 6cae08eadb5..2f876332ae7 100644
--- a/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc
+++ b/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc
@@ -39,15 +39,15 @@ __device__ __forceinline__ void simple_apply(
     constexpr auto tile_size = config::warp_size;
 
     auto thread_block = group::this_thread_block();
-    auto subwarp_grp = group::tiled_partition<tile_size>(thread_block);
-    const auto subwarp_grp_id = static_cast<int>(threadIdx.x / tile_size);
-    const int num_subwarp_grps_per_block = ceildiv(blockDim.x, tile_size);
+    auto subgroup = group::tiled_partition<tile_size>(thread_block);
+    const auto subgroup_id = static_cast<int>(threadIdx.x / tile_size);
+    const int num_subgroups_per_block = ceildiv(blockDim.x, tile_size);
 
-    for (int row = subwarp_grp_id; row < mat.num_rows;
-         row += num_subwarp_grps_per_block) {
+    for (int row = subgroup_id; row < mat.num_rows;
+         row += num_subgroups_per_block) {
         ValueType temp = zero<ValueType>();
-        for (int j = subwarp_grp.thread_rank(); j < mat.num_cols;
-             j += subwarp_grp.size()) {
+        for (int j = subgroup.thread_rank(); j < mat.num_cols;
+             j += subgroup.size()) {
             const ValueType val = mat.values[row * mat.stride + j];
             temp += val * b[j];
         }
@@ -55,7 +55,7 @@ __device__ __forceinline__ void simple_apply(
         // subgroup level reduction
         temp = reduce(subgroup, temp, thrust::plus<ValueType>{});
 
-        if (subwarp_grp.thread_rank() == 0) {
+        if (subgroup.thread_rank() == 0) {
             x[row] = temp;
         }
     }
@@ -101,15 +101,15 @@ __device__ __forceinline__ void advanced_apply(
     constexpr auto tile_size = config::warp_size;
 
     auto thread_block = group::this_thread_block();
-    auto subwarp_grp = group::tiled_partition<tile_size>(thread_block);
-    const auto subwarp_grp_id = static_cast<int>(threadIdx.x / tile_size);
-    const int num_subwarp_grps_per_block = ceildiv(blockDim.x, tile_size);
+    auto subgroup = group::tiled_partition<tile_size>(thread_block);
+    const auto subgroup_id = static_cast<int>(threadIdx.x / tile_size);
+    const int num_subgroups_per_block = ceildiv(blockDim.x, tile_size);
 
-    for (int row = subwarp_grp_id; row < mat.num_rows;
-         row += num_subwarp_grps_per_block) {
+    for (int row = subgroup_id; row < mat.num_rows;
+         row += num_subgroups_per_block) {
         ValueType temp = zero<ValueType>();
-        for (int j = subwarp_grp.thread_rank(); j < mat.num_cols;
-             j += subwarp_grp.size()) {
+        for (int j = subgroup.thread_rank(); j < mat.num_cols;
+             j += subgroup.size()) {
             const ValueType val = mat.values[row * mat.stride + j];
             temp += alpha * val * b[j];
         }
@@ -117,7 +117,7 @@ __device__ __forceinline__ void advanced_apply(
         // subgroup level reduction
         temp = reduce(subgroup, temp, thrust::plus<ValueType>{});
 
-        if (subwarp_grp.thread_rank() == 0) {
+        if (subgroup.thread_rank() == 0) {
             x[row] = temp + beta * x[row];
         }
     }
diff --git a/cuda/matrix/batch_dense_kernels.cu b/cuda/matrix/batch_dense_kernels.cu
index 9d9cfcf6c8e..28d61f70731 100644
--- a/cuda/matrix/batch_dense_kernels.cu
+++ b/cuda/matrix/batch_dense_kernels.cu
@@ -33,6 +33,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/matrix/batch_dense_kernels.hpp"
 
 
+#include <thrust/functional.h>
+#include <thrust/transform.h>
+
+
 #include <ginkgo/core/base/math.hpp>
 
 
@@ -42,6 +46,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "cuda/base/config.hpp"
 #include "cuda/base/cublas_bindings.hpp"
 #include "cuda/base/pointer_mode_guard.hpp"
+#include "cuda/base/thrust.cuh"
 #include "cuda/components/cooperative_groups.cuh"
 #include "cuda/components/reduction.cuh"
 #include "cuda/components/thread_ids.cuh"
diff --git a/hip/matrix/batch_dense_kernels.hip.cpp b/hip/matrix/batch_dense_kernels.hip.cpp
index 51f2237826b..20c46736026 100644
--- a/hip/matrix/batch_dense_kernels.hip.cpp
+++ b/hip/matrix/batch_dense_kernels.hip.cpp
@@ -34,6 +34,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <hip/hip_runtime.h>
+#include <thrust/functional.h>
+#include <thrust/transform.h>
 
 
 #include <ginkgo/core/base/math.hpp>
@@ -42,10 +44,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "core/base/batch_struct.hpp"
 #include "core/matrix/batch_struct.hpp"
-#include "hip/base/batch_struct.hpp"
+#include "hip/base/batch_struct.hip.hpp"
 #include "hip/base/config.hip.hpp"
 #include "hip/base/hipblas_bindings.hip.hpp"
 #include "hip/base/pointer_mode_guard.hip.hpp"
+#include "hip/base/thrust.hip.hpp"
 #include "hip/components/cooperative_groups.hip.hpp"
 #include "hip/components/reduction.hip.hpp"
 #include "hip/components/thread_ids.hip.hpp"

From ac61ccdf665447239a1e1273afbfb87673220496 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Thu, 5 Oct 2023 15:49:46 +0200
Subject: [PATCH 354/583] SYCL kernel fixes

---
 dpcpp/matrix/batch_dense_kernels.dp.cpp  | 20 +++++++++------
 dpcpp/matrix/batch_dense_kernels.hpp.inc | 31 +++++++++++++++---------
 2 files changed, 31 insertions(+), 20 deletions(-)

diff --git a/dpcpp/matrix/batch_dense_kernels.dp.cpp b/dpcpp/matrix/batch_dense_kernels.dp.cpp
index 118d46d81a5..7f3dca70a32 100644
--- a/dpcpp/matrix/batch_dense_kernels.dp.cpp
+++ b/dpcpp/matrix/batch_dense_kernels.dp.cpp
@@ -80,10 +80,10 @@ void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
                   const batch::MultiVector<ValueType>* b,
                   batch::MultiVector<ValueType>* x)
 {
-    const size_type num_rows = x->get_common_size()[0];
-    const size_type num_cols = x->get_common_size()[1];
+    const size_type num_rows = mat->get_common_size()[0];
+    const size_type num_cols = mat->get_common_size()[1];
 
-    const auto num_batch_items = x->get_num_batch_items();
+    const auto num_batch_items = mat->get_num_batch_items();
     auto device = exec->get_queue()->get_device();
     auto group_size =
         device.get_info<sycl::info::device::max_work_group_size>();
@@ -100,14 +100,16 @@ void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
     // Launch a kernel that has nbatches blocks, each block has max group size
     (exec->get_queue())->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=
+        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                            config::warp_size)]] {
                 auto group = item_ct1.get_group();
                 auto group_id = group.get_group_linear_id();
                 const auto mat_b =
                     batch::matrix::extract_batch_item(mat_ub, group_id);
                 const auto b_b = batch::extract_batch_item(b_ub, group_id);
                 const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                simple_apply_kernel(mat_b, b_b.values, x_b.values, item_ct1);
+                simple_apply_kernel(mat_b, b_b, x_b, item_ct1);
             });
     });
 }
@@ -145,7 +147,9 @@ void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
     // Launch a kernel that has nbatches blocks, each block has max group size
     (exec->get_queue())->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=
+        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                            config::warp_size)]] {
                 auto group = item_ct1.get_group();
                 auto group_id = group.get_group_linear_id();
                 const auto mat_b =
@@ -156,8 +160,8 @@ void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
                     batch::extract_batch_item(alpha_ub, group_id);
                 const auto beta_b =
                     batch::extract_batch_item(beta_ub, group_id);
-                advanced_apply_kernel(alpha_b.values[0], mat_b, b_b.values,
-                                      beta_b.values[0], x_b.values, item_ct1);
+                advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b,
+                                      item_ct1);
             });
     });
 }
diff --git a/dpcpp/matrix/batch_dense_kernels.hpp.inc b/dpcpp/matrix/batch_dense_kernels.hpp.inc
index ba528ac31a4..dacd31feade 100644
--- a/dpcpp/matrix/batch_dense_kernels.hpp.inc
+++ b/dpcpp/matrix/batch_dense_kernels.hpp.inc
@@ -33,7 +33,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 template <typename ValueType>
 __dpct_inline__ void simple_apply_kernel(
     const gko::batch::matrix::batch_dense::batch_item<const ValueType>& mat,
-    const ValueType* const __restrict__ b, ValueType* const __restrict__ x,
+    const gko::batch::multi_vector::batch_item<const ValueType>& b,
+    const gko::batch::multi_vector::batch_item<ValueType>& x,
     sycl::nd_item<3>& item_ct1)
 {
     constexpr auto tile_size = config::warp_size;
@@ -42,19 +43,21 @@ __dpct_inline__ void simple_apply_kernel(
     const auto subgroup = static_cast<sycl::sub_group>(subg);
     const int subgroup_id = subgroup.get_group_id();
     const int subgroup_size = subgroup.get_local_range().size();
-    const int num_subgroup = subgroup.get_group_range().size();
+    const int num_subgroups = subgroup.get_group_range().size();
 
-    for (int row = subgroup_id; row < mat.num_rows; row += num_subgroup) {
+    for (int row = subgroup_id; row < mat.num_rows; row += num_subgroups) {
         ValueType temp = zero<ValueType>();
         for (int j = subgroup.get_local_id(); j < mat.num_cols;
              j += subgroup_size) {
             const ValueType val = mat.values[row * mat.stride + j];
-            temp += val * b[j];
+            temp += val * b.values[j];
         }
+
         temp = ::gko::kernels::dpcpp::reduce(
-            subg, temp, [](ValueType v1, ValueType v2) { return v1 + v2; });
+            subg, temp, [](ValueType a, ValueType b) { return a + b; });
+
         if (subgroup.get_local_id() == 0) {
-            x[row] = temp;
+            x.values[row] = temp;
         }
     }
 }
@@ -62,10 +65,12 @@ __dpct_inline__ void simple_apply_kernel(
 
 template <typename ValueType>
 __dpct_inline__ void advanced_apply_kernel(
-    const ValueType alpha,
+    const gko::batch::multi_vector::batch_item<const ValueType>& alpha,
     const gko::batch::matrix::batch_dense::batch_item<const ValueType>& mat,
-    const ValueType* const __restrict__ b, const ValueType beta,
-    ValueType* const __restrict__ x, sycl::nd_item<3>& item_ct1)
+    const gko::batch::multi_vector::batch_item<const ValueType>& b,
+    const gko::batch::multi_vector::batch_item<const ValueType>& beta,
+    const gko::batch::multi_vector::batch_item<ValueType>& x,
+    sycl::nd_item<3>& item_ct1)
 {
     constexpr auto tile_size = config::warp_size;
     auto subg =
@@ -80,12 +85,14 @@ __dpct_inline__ void advanced_apply_kernel(
         for (int j = subgroup.get_local_id(); j < mat.num_cols;
              j += subgroup_size) {
             const ValueType val = mat.values[row * mat.stride + j];
-            temp += alpha * val * b[j];
+            temp += alpha.values[0] * val * b.values[j];
         }
+
         temp = ::gko::kernels::dpcpp::reduce(
-            subg, temp, [](ValueType v1, ValueType v2) { return v1 + v2; });
+            subg, temp, [](ValueType a, ValueType b) { return a + b; });
+
         if (subgroup.get_local_id() == 0) {
-            x[row] = temp + beta * x[row];
+            x.values[row] = temp + beta.values[0] * x.values[row];
         }
     }
 }

From f73196036f405cc0c13f862cc58843e88cf89cb2 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Thu, 5 Oct 2023 16:00:30 +0200
Subject: [PATCH 355/583] BatchDense -> batch::Dense

---
 .../batch_dense_kernel_launcher.hpp.inc       |   4 +-
 core/base/batch_multi_vector.cpp              |   7 +-
 core/matrix/batch_dense.cpp                   |  48 +++----
 core/matrix/batch_dense_kernels.hpp           |   4 +-
 core/test/matrix/batch_dense.cpp              | 128 +++++++++---------
 cuda/matrix/batch_dense_kernels.cu            |   2 +-
 cuda/matrix/batch_struct.hpp                  |   4 +-
 dpcpp/matrix/batch_dense_kernels.dp.cpp       |   6 +-
 dpcpp/matrix/batch_struct.hpp                 |   4 +-
 hip/matrix/batch_dense_kernels.hip.cpp        |   2 +-
 hip/matrix/batch_struct.hip.hpp               |   8 +-
 .../ginkgo/core/base/batch_multi_vector.hpp   |  14 +-
 include/ginkgo/core/matrix/batch_dense.hpp    |  59 ++++----
 omp/matrix/batch_dense_kernels.cpp            |   8 +-
 reference/matrix/batch_dense_kernels.cpp      |   8 +-
 reference/matrix/batch_struct.hpp             |   4 +-
 reference/test/matrix/batch_dense_kernels.cpp |  24 ++--
 test/matrix/batch_dense_kernels.cpp           |  10 +-
 18 files changed, 169 insertions(+), 175 deletions(-)

diff --git a/common/cuda_hip/matrix/batch_dense_kernel_launcher.hpp.inc b/common/cuda_hip/matrix/batch_dense_kernel_launcher.hpp.inc
index 668b0278680..23ae8ebd5f0 100644
--- a/common/cuda_hip/matrix/batch_dense_kernel_launcher.hpp.inc
+++ b/common/cuda_hip/matrix/batch_dense_kernel_launcher.hpp.inc
@@ -33,7 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 template <typename ValueType>
 void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
-                  const batch::matrix::BatchDense<ValueType>* mat,
+                  const batch::matrix::Dense<ValueType>* mat,
                   const batch::MultiVector<ValueType>* b,
                   batch::MultiVector<ValueType>* x)
 {
@@ -55,7 +55,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 template <typename ValueType>
 void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
                     const batch::MultiVector<ValueType>* alpha,
-                    const batch::matrix::BatchDense<ValueType>* mat,
+                    const batch::matrix::Dense<ValueType>* mat,
                     const batch::MultiVector<ValueType>* b,
                     const batch::MultiVector<ValueType>* beta,
                     batch::MultiVector<ValueType>* x)
diff --git a/core/base/batch_multi_vector.cpp b/core/base/batch_multi_vector.cpp
index f6884ef523b..294fe45972a 100644
--- a/core/base/batch_multi_vector.cpp
+++ b/core/base/batch_multi_vector.cpp
@@ -292,12 +292,11 @@ void MultiVector<ValueType>::move_to(
 
 
 template <typename ValueType>
-void MultiVector<ValueType>::convert_to(
-    matrix::BatchDense<ValueType>* result) const
+void MultiVector<ValueType>::convert_to(matrix::Dense<ValueType>* result) const
 {
     auto exec = result->get_executor() != nullptr ? result->get_executor()
                                                   : this->get_executor();
-    auto tmp = gko::batch::matrix::BatchDense<ValueType>::create_const(
+    auto tmp = gko::batch::matrix::Dense<ValueType>::create_const(
         exec, this->get_size(),
         make_const_array_view(exec, this->get_num_stored_elements(),
                               this->get_const_values()));
@@ -306,7 +305,7 @@ void MultiVector<ValueType>::convert_to(
 
 
 template <typename ValueType>
-void MultiVector<ValueType>::move_to(matrix::BatchDense<ValueType>* result)
+void MultiVector<ValueType>::move_to(matrix::Dense<ValueType>* result)
 {
     this->convert_to(result);
 }
diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp
index c9da010c228..75f29bc6b4c 100644
--- a/core/matrix/batch_dense.cpp
+++ b/core/matrix/batch_dense.cpp
@@ -84,7 +84,7 @@ batch_dim<2> compute_batch_size(
 
 template <typename ValueType>
 std::unique_ptr<gko::matrix::Dense<ValueType>>
-BatchDense<ValueType>::create_view_for_item(size_type item_id)
+Dense<ValueType>::create_view_for_item(size_type item_id)
 {
     auto exec = this->get_executor();
     auto num_rows = this->get_common_size()[0];
@@ -100,7 +100,7 @@ BatchDense<ValueType>::create_view_for_item(size_type item_id)
 
 template <typename ValueType>
 std::unique_ptr<const gko::matrix::Dense<ValueType>>
-BatchDense<ValueType>::create_const_view_for_item(size_type item_id) const
+Dense<ValueType>::create_const_view_for_item(size_type item_id) const
 {
     auto exec = this->get_executor();
     auto num_rows = this->get_common_size()[0];
@@ -115,9 +115,8 @@ BatchDense<ValueType>::create_const_view_for_item(size_type item_id) const
 
 
 template <typename ValueType>
-std::unique_ptr<BatchDense<ValueType>>
-BatchDense<ValueType>::create_with_config_of(
-    ptr_param<const BatchDense<ValueType>> other)
+std::unique_ptr<Dense<ValueType>> Dense<ValueType>::create_with_config_of(
+    ptr_param<const Dense<ValueType>> other)
 {
     // De-referencing `other` before calling the functions (instead of
     // using operator `->`) is currently required to be compatible with
@@ -128,23 +127,21 @@ BatchDense<ValueType>::create_with_config_of(
 
 
 template <typename ValueType>
-std::unique_ptr<BatchDense<ValueType>>
-BatchDense<ValueType>::create_with_same_config() const
+std::unique_ptr<Dense<ValueType>> Dense<ValueType>::create_with_same_config()
+    const
 {
-    return BatchDense<ValueType>::create(this->get_executor(),
-                                         this->get_size());
+    return Dense<ValueType>::create(this->get_executor(), this->get_size());
 }
 
 
 template <typename ValueType>
-std::unique_ptr<const BatchDense<ValueType>>
-BatchDense<ValueType>::create_const(
+std::unique_ptr<const Dense<ValueType>> Dense<ValueType>::create_const(
     std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
     gko::detail::const_array_view<ValueType>&& values)
 {
     // cast const-ness away, but return a const object afterwards,
     // so we can ensure that no modifications take place.
-    return std::unique_ptr<const BatchDense>(new BatchDense{
+    return std::unique_ptr<const Dense>(new Dense{
         exec, sizes, gko::detail::array_const_cast(std::move(values))});
 }
 
@@ -157,16 +154,16 @@ inline const batch_dim<2> get_col_sizes(const batch_dim<2>& sizes)
 
 
 template <typename ValueType>
-BatchDense<ValueType>::BatchDense(std::shared_ptr<const Executor> exec,
-                                  const batch_dim<2>& size)
-    : EnableBatchLinOp<BatchDense<ValueType>>(exec, size),
+Dense<ValueType>::Dense(std::shared_ptr<const Executor> exec,
+                        const batch_dim<2>& size)
+    : EnableBatchLinOp<Dense<ValueType>>(exec, size),
       values_(exec, compute_num_elems(size))
 {}
 
 
 template <typename ValueType>
-void BatchDense<ValueType>::apply_impl(const MultiVector<ValueType>* b,
-                                       MultiVector<ValueType>* x) const
+void Dense<ValueType>::apply_impl(const MultiVector<ValueType>* b,
+                                  MultiVector<ValueType>* x) const
 {
     GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items());
     GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items());
@@ -179,10 +176,10 @@ void BatchDense<ValueType>::apply_impl(const MultiVector<ValueType>* b,
 
 
 template <typename ValueType>
-void BatchDense<ValueType>::apply_impl(const MultiVector<ValueType>* alpha,
-                                       const MultiVector<ValueType>* b,
-                                       const MultiVector<ValueType>* beta,
-                                       MultiVector<ValueType>* x) const
+void Dense<ValueType>::apply_impl(const MultiVector<ValueType>* alpha,
+                                  const MultiVector<ValueType>* b,
+                                  const MultiVector<ValueType>* beta,
+                                  MultiVector<ValueType>* x) const
 {
     GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items());
     GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items());
@@ -198,8 +195,8 @@ void BatchDense<ValueType>::apply_impl(const MultiVector<ValueType>* alpha,
 
 
 template <typename ValueType>
-void BatchDense<ValueType>::convert_to(
-    BatchDense<next_precision<ValueType>>* result) const
+void Dense<ValueType>::convert_to(
+    Dense<next_precision<ValueType>>* result) const
 {
     result->values_ = this->values_;
     result->set_size(this->get_size());
@@ -207,14 +204,13 @@ void BatchDense<ValueType>::convert_to(
 
 
 template <typename ValueType>
-void BatchDense<ValueType>::move_to(
-    BatchDense<next_precision<ValueType>>* result)
+void Dense<ValueType>::move_to(Dense<next_precision<ValueType>>* result)
 {
     this->convert_to(result);
 }
 
 
-#define GKO_DECLARE_BATCH_DENSE_MATRIX(_type) class BatchDense<_type>
+#define GKO_DECLARE_BATCH_DENSE_MATRIX(_type) class Dense<_type>
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_MATRIX);
 
 
diff --git a/core/matrix/batch_dense_kernels.hpp b/core/matrix/batch_dense_kernels.hpp
index 7f814e08b50..cb46b7291b8 100644
--- a/core/matrix/batch_dense_kernels.hpp
+++ b/core/matrix/batch_dense_kernels.hpp
@@ -51,14 +51,14 @@ namespace kernels {
 
 #define GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL(_type)         \
     void simple_apply(std::shared_ptr<const DefaultExecutor> exec, \
-                      const batch::matrix::BatchDense<_type>* a,   \
+                      const batch::matrix::Dense<_type>* a,        \
                       const batch::MultiVector<_type>* b,          \
                       batch::MultiVector<_type>* c)
 
 #define GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL(_type)         \
     void advanced_apply(std::shared_ptr<const DefaultExecutor> exec, \
                         const batch::MultiVector<_type>* alpha,      \
-                        const batch::matrix::BatchDense<_type>* a,   \
+                        const batch::matrix::Dense<_type>* a,        \
                         const batch::MultiVector<_type>* b,          \
                         const batch::MultiVector<_type>* beta,       \
                         batch::MultiVector<_type>* c)
diff --git a/core/test/matrix/batch_dense.cpp b/core/test/matrix/batch_dense.cpp
index 02788e14b7d..6afe13a50af 100644
--- a/core/test/matrix/batch_dense.cpp
+++ b/core/test/matrix/batch_dense.cpp
@@ -48,15 +48,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 template <typename T>
-class BatchDense : public ::testing::Test {
+class Dense : public ::testing::Test {
 protected:
     using value_type = T;
     using DenseMtx = gko::matrix::Dense<value_type>;
     using size_type = gko::size_type;
-    BatchDense()
+    Dense()
         : exec(gko::ReferenceExecutor::create()),
-          mtx(gko::batch::initialize<
-              gko::batch::matrix::BatchDense<value_type>>(
+          mtx(gko::batch::initialize<gko::batch::matrix::Dense<value_type>>(
               {{{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
                {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}},
               exec)),
@@ -66,7 +65,7 @@ class BatchDense : public ::testing::Test {
 
 
     static void assert_equal_to_original_mtx(
-        gko::batch::matrix::BatchDense<value_type>* m)
+        gko::batch::matrix::Dense<value_type>* m)
     {
         ASSERT_EQ(m->get_num_batch_items(), 2);
         ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 3));
@@ -85,41 +84,41 @@ class BatchDense : public ::testing::Test {
         ASSERT_EQ(m->at(1, 1, 2), value_type{3.0});
     }
 
-    static void assert_empty(gko::batch::matrix::BatchDense<value_type>* m)
+    static void assert_empty(gko::batch::matrix::Dense<value_type>* m)
     {
         ASSERT_EQ(m->get_num_batch_items(), 0);
         ASSERT_EQ(m->get_num_stored_elements(), 0);
     }
 
     std::shared_ptr<const gko::Executor> exec;
-    std::unique_ptr<gko::batch::matrix::BatchDense<value_type>> mtx;
+    std::unique_ptr<gko::batch::matrix::Dense<value_type>> mtx;
     std::unique_ptr<gko::matrix::Dense<value_type>> dense_mtx;
 };
 
-TYPED_TEST_SUITE(BatchDense, gko::test::ValueTypes);
+TYPED_TEST_SUITE(Dense, gko::test::ValueTypes);
 
 
-TYPED_TEST(BatchDense, KnowsItsSizeAndValues)
+TYPED_TEST(Dense, KnowsItsSizeAndValues)
 {
     this->assert_equal_to_original_mtx(this->mtx.get());
 }
 
 
-TYPED_TEST(BatchDense, CanBeEmpty)
+TYPED_TEST(Dense, CanBeEmpty)
 {
-    auto empty = gko::batch::matrix::BatchDense<TypeParam>::create(this->exec);
+    auto empty = gko::batch::matrix::Dense<TypeParam>::create(this->exec);
     this->assert_empty(empty.get());
 }
 
 
-TYPED_TEST(BatchDense, ReturnsNullValuesArrayWhenEmpty)
+TYPED_TEST(Dense, ReturnsNullValuesArrayWhenEmpty)
 {
-    auto empty = gko::batch::matrix::BatchDense<TypeParam>::create(this->exec);
+    auto empty = gko::batch::matrix::Dense<TypeParam>::create(this->exec);
     ASSERT_EQ(empty->get_const_values(), nullptr);
 }
 
 
-TYPED_TEST(BatchDense, CanGetValuesForEntry)
+TYPED_TEST(Dense, CanGetValuesForEntry)
 {
     using value_type = typename TestFixture::value_type;
 
@@ -127,17 +126,16 @@ TYPED_TEST(BatchDense, CanGetValuesForEntry)
 }
 
 
-TYPED_TEST(BatchDense, CanCreateDenseItemView)
+TYPED_TEST(Dense, CanCreateDenseItemView)
 {
     GKO_ASSERT_MTX_NEAR(this->mtx->create_view_for_item(1), this->dense_mtx,
                         0.0);
 }
 
 
-TYPED_TEST(BatchDense, CanBeCopied)
+TYPED_TEST(Dense, CanBeCopied)
 {
-    auto mtx_copy =
-        gko::batch::matrix::BatchDense<TypeParam>::create(this->exec);
+    auto mtx_copy = gko::batch::matrix::Dense<TypeParam>::create(this->exec);
 
     mtx_copy->copy_from(this->mtx.get());
 
@@ -148,10 +146,9 @@ TYPED_TEST(BatchDense, CanBeCopied)
 }
 
 
-TYPED_TEST(BatchDense, CanBeMoved)
+TYPED_TEST(Dense, CanBeMoved)
 {
-    auto mtx_copy =
-        gko::batch::matrix::BatchDense<TypeParam>::create(this->exec);
+    auto mtx_copy = gko::batch::matrix::Dense<TypeParam>::create(this->exec);
 
     this->mtx->move_to(mtx_copy);
 
@@ -159,7 +156,7 @@ TYPED_TEST(BatchDense, CanBeMoved)
 }
 
 
-TYPED_TEST(BatchDense, CanBeCloned)
+TYPED_TEST(Dense, CanBeCloned)
 {
     auto mtx_clone = this->mtx->clone();
 
@@ -168,7 +165,7 @@ TYPED_TEST(BatchDense, CanBeCloned)
 }
 
 
-TYPED_TEST(BatchDense, CanBeCleared)
+TYPED_TEST(Dense, CanBeCleared)
 {
     this->mtx->clear();
 
@@ -176,11 +173,11 @@ TYPED_TEST(BatchDense, CanBeCleared)
 }
 
 
-TYPED_TEST(BatchDense, CanBeConstructedWithSize)
+TYPED_TEST(Dense, CanBeConstructedWithSize)
 {
     using size_type = gko::size_type;
 
-    auto m = gko::batch::matrix::BatchDense<TypeParam>::create(
+    auto m = gko::batch::matrix::Dense<TypeParam>::create(
         this->exec, gko::batch_dim<2>(2, gko::dim<2>{5, 3}));
 
     ASSERT_EQ(m->get_num_batch_items(), 2);
@@ -189,7 +186,7 @@ TYPED_TEST(BatchDense, CanBeConstructedWithSize)
 }
 
 
-TYPED_TEST(BatchDense, CanBeConstructedFromExistingData)
+TYPED_TEST(Dense, CanBeConstructedFromExistingData)
 {
     using value_type = typename TestFixture::value_type;
     using size_type = gko::size_type;
@@ -203,7 +200,7 @@ TYPED_TEST(BatchDense, CanBeConstructedFromExistingData)
        6.0, -3.0};
     // clang-format on
 
-    auto m = gko::batch::matrix::BatchDense<TypeParam>::create(
+    auto m = gko::batch::matrix::Dense<TypeParam>::create(
         this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 2)),
         gko::array<value_type>::view(this->exec, 8, data));
 
@@ -219,7 +216,7 @@ TYPED_TEST(BatchDense, CanBeConstructedFromExistingData)
 }
 
 
-TYPED_TEST(BatchDense, CanBeConstructedFromExistingConstData)
+TYPED_TEST(Dense, CanBeConstructedFromExistingConstData)
 {
     using value_type = typename TestFixture::value_type;
     using size_type = gko::size_type;
@@ -233,7 +230,7 @@ TYPED_TEST(BatchDense, CanBeConstructedFromExistingConstData)
        6.0, -3.0};
     // clang-format on
 
-    auto m = gko::batch::matrix::BatchDense<TypeParam>::create_const(
+    auto m = gko::batch::matrix::Dense<TypeParam>::create_const(
         this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 2)),
         gko::array<value_type>::const_view(this->exec, 8, data));
 
@@ -249,7 +246,7 @@ TYPED_TEST(BatchDense, CanBeConstructedFromExistingConstData)
 }
 
 
-TYPED_TEST(BatchDense, CanBeConstructedFromDenseMatrices)
+TYPED_TEST(Dense, CanBeConstructedFromDenseMatrices)
 {
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
@@ -260,15 +257,15 @@ TYPED_TEST(BatchDense, CanBeConstructedFromDenseMatrices)
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
 
-    auto m = gko::batch::create_from_item<
-        gko::batch::matrix::BatchDense<value_type>>(
-        this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get()});
+    auto m =
+        gko::batch::create_from_item<gko::batch::matrix::Dense<value_type>>(
+            this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get()});
 
     this->assert_equal_to_original_mtx(m.get());
 }
 
 
-TYPED_TEST(BatchDense, CanBeConstructedFromDenseMatricesByDuplication)
+TYPED_TEST(Dense, CanBeConstructedFromDenseMatricesByDuplication)
 {
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
@@ -279,17 +276,19 @@ TYPED_TEST(BatchDense, CanBeConstructedFromDenseMatricesByDuplication)
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
 
-    auto bat_m = gko::batch::create_from_item<
-        gko::batch::matrix::BatchDense<value_type>>(
-        this->exec, std::vector<DenseMtx*>{mat1.get(), mat1.get(), mat1.get()});
-    auto m = gko::batch::create_from_item<
-        gko::batch::matrix::BatchDense<value_type>>(this->exec, 3, mat1.get());
+    auto bat_m =
+        gko::batch::create_from_item<gko::batch::matrix::Dense<value_type>>(
+            this->exec,
+            std::vector<DenseMtx*>{mat1.get(), mat1.get(), mat1.get()});
+    auto m =
+        gko::batch::create_from_item<gko::batch::matrix::Dense<value_type>>(
+            this->exec, 3, mat1.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(bat_m.get(), m.get(), 1e-14);
 }
 
 
-TYPED_TEST(BatchDense, CanBeConstructedByDuplicatingBatchDenseMatrices)
+TYPED_TEST(Dense, CanBeConstructedByDuplicatingDenseMatrices)
 {
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
@@ -300,22 +299,23 @@ TYPED_TEST(BatchDense, CanBeConstructedByDuplicatingBatchDenseMatrices)
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
 
-    auto m = gko::batch::create_from_item<
-        gko::batch::matrix::BatchDense<value_type>>(
-        this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get()});
-    auto m_ref = gko::batch::create_from_item<
-        gko::batch::matrix::BatchDense<value_type>>(
-        this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get(), mat1.get(),
-                                           mat2.get(), mat1.get(), mat2.get()});
+    auto m =
+        gko::batch::create_from_item<gko::batch::matrix::Dense<value_type>>(
+            this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get()});
+    auto m_ref =
+        gko::batch::create_from_item<gko::batch::matrix::Dense<value_type>>(
+            this->exec,
+            std::vector<DenseMtx*>{mat1.get(), mat2.get(), mat1.get(),
+                                   mat2.get(), mat1.get(), mat2.get()});
 
-    auto m2 = gko::batch::duplicate<gko::batch::matrix::BatchDense<value_type>>(
+    auto m2 = gko::batch::duplicate<gko::batch::matrix::Dense<value_type>>(
         this->exec, 3, m.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 1e-14);
 }
 
 
-TYPED_TEST(BatchDense, CanBeUnbatchedIntoDenseMatrices)
+TYPED_TEST(Dense, CanBeUnbatchedIntoDenseMatrices)
 {
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
@@ -326,7 +326,7 @@ TYPED_TEST(BatchDense, CanBeUnbatchedIntoDenseMatrices)
                                           this->exec);
 
     auto dense_mats =
-        gko::batch::unbatch<gko::batch::matrix::BatchDense<value_type>>(
+        gko::batch::unbatch<gko::batch::matrix::Dense<value_type>>(
             this->mtx.get());
 
     GKO_ASSERT_MTX_NEAR(dense_mats[0].get(), mat1.get(), 0.);
@@ -334,10 +334,10 @@ TYPED_TEST(BatchDense, CanBeUnbatchedIntoDenseMatrices)
 }
 
 
-TYPED_TEST(BatchDense, CanBeListConstructed)
+TYPED_TEST(Dense, CanBeListConstructed)
 {
     using value_type = typename TestFixture::value_type;
-    auto m = gko::batch::initialize<gko::batch::matrix::BatchDense<TypeParam>>(
+    auto m = gko::batch::initialize<gko::batch::matrix::Dense<TypeParam>>(
         {{1.0, 2.0}, {1.0, 3.0}}, this->exec);
 
     ASSERT_EQ(m->get_num_batch_items(), 2);
@@ -349,11 +349,11 @@ TYPED_TEST(BatchDense, CanBeListConstructed)
 }
 
 
-TYPED_TEST(BatchDense, CanBeListConstructedByCopies)
+TYPED_TEST(Dense, CanBeListConstructedByCopies)
 {
     using value_type = typename TestFixture::value_type;
 
-    auto m = gko::batch::initialize<gko::batch::matrix::BatchDense<TypeParam>>(
+    auto m = gko::batch::initialize<gko::batch::matrix::Dense<TypeParam>>(
         2, I<value_type>({1.0, 2.0}), this->exec);
 
     ASSERT_EQ(m->get_num_batch_items(), 2);
@@ -365,12 +365,12 @@ TYPED_TEST(BatchDense, CanBeListConstructedByCopies)
 }
 
 
-TYPED_TEST(BatchDense, CanBeDoubleListConstructed)
+TYPED_TEST(Dense, CanBeDoubleListConstructed)
 {
     using value_type = typename TestFixture::value_type;
     using T = value_type;
 
-    auto m = gko::batch::initialize<gko::batch::matrix::BatchDense<TypeParam>>(
+    auto m = gko::batch::initialize<gko::batch::matrix::Dense<TypeParam>>(
         {{I<T>{1.0, 1.0, 0.0}, I<T>{2.0, 4.0, 3.0}, I<T>{3.0, 6.0, 1.0}},
          {I<T>{1.0, 2.0, -1.0}, I<T>{3.0, 4.0, -2.0}, I<T>{5.0, 6.0, -3.0}}},
         this->exec);
@@ -389,7 +389,7 @@ TYPED_TEST(BatchDense, CanBeDoubleListConstructed)
 }
 
 
-TYPED_TEST(BatchDense, CanBeReadFromMatrixData)
+TYPED_TEST(Dense, CanBeReadFromMatrixData)
 {
     using value_type = typename TestFixture::value_type;
     using index_type = int;
@@ -401,8 +401,8 @@ TYPED_TEST(BatchDense, CanBeReadFromMatrixData)
         {2, 2}, {{0, 0, -1.0}, {0, 1, 0.5}, {1, 0, 0.0}, {1, 1, 9.0}}));
 
     auto m = gko::batch::read<value_type, index_type,
-                              gko::batch::matrix::BatchDense<value_type>>(
-        this->exec, vec_data);
+                              gko::batch::matrix::Dense<value_type>>(this->exec,
+                                                                     vec_data);
 
     ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2));
     EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
@@ -416,7 +416,7 @@ TYPED_TEST(BatchDense, CanBeReadFromMatrixData)
 }
 
 
-TYPED_TEST(BatchDense, CanBeReadFromSparseMatrixData)
+TYPED_TEST(Dense, CanBeReadFromSparseMatrixData)
 {
     using value_type = typename TestFixture::value_type;
     using index_type = int;
@@ -427,8 +427,8 @@ TYPED_TEST(BatchDense, CanBeReadFromSparseMatrixData)
         {2, 2}, {{0, 0, -1.0}, {0, 1, 0.5}, {1, 1, 9.0}}));
 
     auto m = gko::batch::read<value_type, index_type,
-                              gko::batch::matrix::BatchDense<value_type>>(
-        this->exec, vec_data);
+                              gko::batch::matrix::Dense<value_type>>(this->exec,
+                                                                     vec_data);
 
     ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2));
     EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
@@ -442,14 +442,14 @@ TYPED_TEST(BatchDense, CanBeReadFromSparseMatrixData)
 }
 
 
-TYPED_TEST(BatchDense, GeneratesCorrectMatrixData)
+TYPED_TEST(Dense, GeneratesCorrectMatrixData)
 {
     using value_type = typename TestFixture::value_type;
     using index_type = int;
     using tpl = typename gko::matrix_data<TypeParam>::nonzero_type;
 
     auto data = gko::batch::write<value_type, index_type,
-                                  gko::batch::matrix::BatchDense<value_type>>(
+                                  gko::batch::matrix::Dense<value_type>>(
         this->mtx.get());
 
     ASSERT_EQ(data[0].size, gko::dim<2>(2, 3));
diff --git a/cuda/matrix/batch_dense_kernels.cu b/cuda/matrix/batch_dense_kernels.cu
index 28d61f70731..4f1dbc8f4d4 100644
--- a/cuda/matrix/batch_dense_kernels.cu
+++ b/cuda/matrix/batch_dense_kernels.cu
@@ -58,7 +58,7 @@ namespace gko {
 namespace kernels {
 namespace cuda {
 /**
- * @brief The BatchDense matrix format namespace.
+ * @brief The Dense matrix format namespace.
  *
  * @ingroup batch_dense
  */
diff --git a/cuda/matrix/batch_struct.hpp b/cuda/matrix/batch_struct.hpp
index 202eb91a366..56af3c5ba7e 100644
--- a/cuda/matrix/batch_struct.hpp
+++ b/cuda/matrix/batch_struct.hpp
@@ -65,7 +65,7 @@ namespace cuda {
  */
 template <typename ValueType>
 inline batch::matrix::batch_dense::uniform_batch<const cuda_type<ValueType>>
-get_batch_struct(const batch::matrix::BatchDense<ValueType>* const op)
+get_batch_struct(const batch::matrix::Dense<ValueType>* const op)
 {
     return {as_cuda_type(op->get_const_values()), op->get_num_batch_items(),
             static_cast<int>(op->get_common_size()[1]),
@@ -79,7 +79,7 @@ get_batch_struct(const batch::matrix::BatchDense<ValueType>* const op)
  */
 template <typename ValueType>
 inline batch::matrix::batch_dense::uniform_batch<cuda_type<ValueType>>
-get_batch_struct(batch::matrix::BatchDense<ValueType>* const op)
+get_batch_struct(batch::matrix::Dense<ValueType>* const op)
 {
     return {as_cuda_type(op->get_values()), op->get_num_batch_items(),
             static_cast<int>(op->get_common_size()[1]),
diff --git a/dpcpp/matrix/batch_dense_kernels.dp.cpp b/dpcpp/matrix/batch_dense_kernels.dp.cpp
index 7f3dca70a32..4552f918c60 100644
--- a/dpcpp/matrix/batch_dense_kernels.dp.cpp
+++ b/dpcpp/matrix/batch_dense_kernels.dp.cpp
@@ -64,7 +64,7 @@ namespace gko {
 namespace kernels {
 namespace dpcpp {
 /**
- * @brief The BatchDense matrix format namespace.
+ * @brief The Dense matrix format namespace.
  *
  * @ingroup batch_dense
  */
@@ -76,7 +76,7 @@ namespace batch_dense {
 
 template <typename ValueType>
 void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
-                  const batch::matrix::BatchDense<ValueType>* mat,
+                  const batch::matrix::Dense<ValueType>* mat,
                   const batch::MultiVector<ValueType>* b,
                   batch::MultiVector<ValueType>* x)
 {
@@ -121,7 +121,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 template <typename ValueType>
 void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
                     const batch::MultiVector<ValueType>* alpha,
-                    const batch::matrix::BatchDense<ValueType>* mat,
+                    const batch::matrix::Dense<ValueType>* mat,
                     const batch::MultiVector<ValueType>* b,
                     const batch::MultiVector<ValueType>* beta,
                     batch::MultiVector<ValueType>* x)
diff --git a/dpcpp/matrix/batch_struct.hpp b/dpcpp/matrix/batch_struct.hpp
index dd8c1bbbab6..e44bc394667 100644
--- a/dpcpp/matrix/batch_struct.hpp
+++ b/dpcpp/matrix/batch_struct.hpp
@@ -63,7 +63,7 @@ namespace dpcpp {
  */
 template <typename ValueType>
 inline batch::matrix::batch_dense::uniform_batch<const ValueType>
-get_batch_struct(const batch::matrix::BatchDense<ValueType>* const op)
+get_batch_struct(const batch::matrix::Dense<ValueType>* const op)
 {
     return {op->get_const_values(), op->get_num_batch_items(),
             static_cast<int>(op->get_common_size()[1]),
@@ -77,7 +77,7 @@ get_batch_struct(const batch::matrix::BatchDense<ValueType>* const op)
  */
 template <typename ValueType>
 inline batch::matrix::batch_dense::uniform_batch<ValueType> get_batch_struct(
-    batch::matrix::BatchDense<ValueType>* const op)
+    batch::matrix::Dense<ValueType>* const op)
 {
     return {op->get_values(), op->get_num_batch_items(),
             static_cast<int>(op->get_common_size()[1]),
diff --git a/hip/matrix/batch_dense_kernels.hip.cpp b/hip/matrix/batch_dense_kernels.hip.cpp
index 20c46736026..aa6d717438e 100644
--- a/hip/matrix/batch_dense_kernels.hip.cpp
+++ b/hip/matrix/batch_dense_kernels.hip.cpp
@@ -60,7 +60,7 @@ namespace gko {
 namespace kernels {
 namespace hip {
 /**
- * @brief The BatchDense matrix format namespace.
+ * @brief The Dense matrix format namespace.
  *
  * @ingroup batch_dense
  */
diff --git a/hip/matrix/batch_struct.hip.hpp b/hip/matrix/batch_struct.hip.hpp
index 0d5dfb46a1b..c75a6c7f0a3 100644
--- a/hip/matrix/batch_struct.hip.hpp
+++ b/hip/matrix/batch_struct.hip.hpp
@@ -41,8 +41,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "core/base/batch_struct.hpp"
 #include "core/matrix/batch_struct.hpp"
-#include "hip/base/config.hpp"
-#include "hip/base/types.hpp"
+#include "hip/base/config.hip.hpp"
+#include "hip/base/types.hip.hpp"
 
 
 namespace gko {
@@ -65,7 +65,7 @@ namespace hip {
  */
 template <typename ValueType>
 inline batch::matrix::batch_dense::uniform_batch<const hip_type<ValueType>>
-get_batch_struct(const batch::matrix::BatchDense<ValueType>* const op)
+get_batch_struct(const batch::matrix::Dense<ValueType>* const op)
 {
     return {as_hip_type(op->get_const_values()), op->get_num_batch_items(),
             static_cast<int>(op->get_common_size()[1]),
@@ -79,7 +79,7 @@ get_batch_struct(const batch::matrix::BatchDense<ValueType>* const op)
  */
 template <typename ValueType>
 inline batch::matrix::batch_dense::uniform_batch<hip_type<ValueType>>
-get_batch_struct(batch::matrix::BatchDense<ValueType>* const op)
+get_batch_struct(batch::matrix::Dense<ValueType>* const op)
 {
     return {as_hip_type(op->get_values()), op->get_num_batch_items(),
             static_cast<int>(op->get_common_size()[1]),
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index 6b3b207c76c..7830a4c6efb 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -56,7 +56,7 @@ namespace matrix {
 
 
 template <typename ValueType>
-class BatchDense;
+class Dense;
 
 
 }
@@ -91,20 +91,20 @@ class MultiVector
       public EnablePolymorphicAssignment<MultiVector<ValueType>>,
       public EnableCreateMethod<MultiVector<ValueType>>,
       public ConvertibleTo<MultiVector<next_precision<ValueType>>>,
-      public ConvertibleTo<matrix::BatchDense<ValueType>> {
+      public ConvertibleTo<matrix::Dense<ValueType>> {
     friend class EnableCreateMethod<MultiVector>;
     friend class EnablePolymorphicObject<MultiVector>;
     friend class MultiVector<to_complex<ValueType>>;
     friend class MultiVector<next_precision<ValueType>>;
-    friend class matrix::BatchDense<ValueType>;
+    friend class matrix::Dense<ValueType>;
 
 public:
     using EnablePolymorphicAssignment<MultiVector>::convert_to;
     using EnablePolymorphicAssignment<MultiVector>::move_to;
     using ConvertibleTo<MultiVector<next_precision<ValueType>>>::convert_to;
     using ConvertibleTo<MultiVector<next_precision<ValueType>>>::move_to;
-    using ConvertibleTo<matrix::BatchDense<ValueType>>::convert_to;
-    using ConvertibleTo<matrix::BatchDense<ValueType>>::move_to;
+    using ConvertibleTo<matrix::Dense<ValueType>>::convert_to;
+    using ConvertibleTo<matrix::Dense<ValueType>>::move_to;
 
     using value_type = ValueType;
     using index_type = int32;
@@ -126,9 +126,9 @@ class MultiVector
 
     void move_to(MultiVector<next_precision<ValueType>>* result) override;
 
-    void convert_to(matrix::BatchDense<ValueType>* result) const override;
+    void convert_to(matrix::Dense<ValueType>* result) const override;
 
-    void move_to(matrix::BatchDense<ValueType>* result) override;
+    void move_to(matrix::Dense<ValueType>* result) override;
 
     /**
      * Creates a mutable view (of matrix::Dense type) of one item of the Batch
diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp
index 0457f444c5a..86cd78eadc8 100644
--- a/include/ginkgo/core/matrix/batch_dense.hpp
+++ b/include/ginkgo/core/matrix/batch_dense.hpp
@@ -55,7 +55,7 @@ namespace matrix {
 
 
 /**
- * BatchDense is a batch matrix format which explicitly stores all values of the
+ * Dense is a batch matrix format which explicitly stores all values of the
  * matrix in each of the batches.
  *
  * The values in each of the batches are stored in row-major format (values
@@ -71,38 +71,37 @@ namespace matrix {
  * @ingroup BatchLinOp
  */
 template <typename ValueType = default_precision>
-class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
-                   public EnableCreateMethod<BatchDense<ValueType>>,
-                   public ConvertibleTo<BatchDense<next_precision<ValueType>>> {
-    friend class EnableCreateMethod<BatchDense>;
-    friend class EnablePolymorphicObject<BatchDense, BatchLinOp>;
-    friend class BatchDense<to_complex<ValueType>>;
-    friend class BatchDense<next_precision<ValueType>>;
+class Dense : public EnableBatchLinOp<Dense<ValueType>>,
+              public EnableCreateMethod<Dense<ValueType>>,
+              public ConvertibleTo<Dense<next_precision<ValueType>>> {
+    friend class EnableCreateMethod<Dense>;
+    friend class EnablePolymorphicObject<Dense, BatchLinOp>;
+    friend class Dense<to_complex<ValueType>>;
+    friend class Dense<next_precision<ValueType>>;
 
 public:
-    using EnableBatchLinOp<BatchDense>::convert_to;
-    using EnableBatchLinOp<BatchDense>::move_to;
+    using EnableBatchLinOp<Dense>::convert_to;
+    using EnableBatchLinOp<Dense>::move_to;
 
     using value_type = ValueType;
     using index_type = int32;
-    using transposed_type = BatchDense<ValueType>;
+    using transposed_type = Dense<ValueType>;
     using unbatch_type = gko::matrix::Dense<ValueType>;
-    using absolute_type = remove_complex<BatchDense>;
-    using complex_type = to_complex<BatchDense>;
+    using absolute_type = remove_complex<Dense>;
+    using complex_type = to_complex<Dense>;
 
     /**
-     * Creates a BatchDense matrix with the configuration of another BatchDense
+     * Creates a Dense matrix with the configuration of another Dense
      * matrix.
      *
      * @param other  The other matrix whose configuration needs to copied.
      */
-    static std::unique_ptr<BatchDense> create_with_config_of(
-        ptr_param<const BatchDense> other);
+    static std::unique_ptr<Dense> create_with_config_of(
+        ptr_param<const Dense> other);
 
-    void convert_to(
-        BatchDense<next_precision<ValueType>>* result) const override;
+    void convert_to(Dense<next_precision<ValueType>>* result) const override;
 
-    void move_to(BatchDense<next_precision<ValueType>>* result) override;
+    void move_to(Dense<next_precision<ValueType>>* result) override;
 
 
     /**
@@ -250,7 +249,7 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
      * array (if it resides on the same executor as the vector) or a copy of the
      * array on the correct executor.
      */
-    static std::unique_ptr<const BatchDense<value_type>> create_const(
+    static std::unique_ptr<const Dense<value_type>> create_const(
         std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
         gko::detail::const_array_view<ValueType>&& values);
 
@@ -277,16 +276,16 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
 
 protected:
     /**
-     * Creates an uninitialized BatchDense matrix of the specified size.
+     * Creates an uninitialized Dense matrix of the specified size.
      *
      * @param exec  Executor associated to the matrix
      * @param size  size of the matrix
      */
-    BatchDense(std::shared_ptr<const Executor> exec,
-               const batch_dim<2>& size = batch_dim<2>{});
+    Dense(std::shared_ptr<const Executor> exec,
+          const batch_dim<2>& size = batch_dim<2>{});
 
     /**
-     * Creates a BatchDense matrix from an already allocated (and initialized)
+     * Creates a Dense matrix from an already allocated (and initialized)
      * array.
      *
      * @tparam ValuesArray  type of array of values
@@ -303,9 +302,9 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
      *       original array data will not be used in the matrix.
      */
     template <typename ValuesArray>
-    BatchDense(std::shared_ptr<const Executor> exec, const batch_dim<2>& size,
-               ValuesArray&& values)
-        : EnableBatchLinOp<BatchDense>(exec, size),
+    Dense(std::shared_ptr<const Executor> exec, const batch_dim<2>& size,
+          ValuesArray&& values)
+        : EnableBatchLinOp<Dense>(exec, size),
           values_{exec, std::forward<ValuesArray>(values)}
     {
         // Ensure that the values array has the correct size
@@ -314,12 +313,12 @@ class BatchDense : public EnableBatchLinOp<BatchDense<ValueType>>,
     }
 
     /**
-     * Creates a BatchDense matrix with the same configuration as the callers
+     * Creates a Dense matrix with the same configuration as the callers
      * matrix.
      *
-     * @returns a BatchDense matrix with the same configuration as the caller.
+     * @returns a Dense matrix with the same configuration as the caller.
      */
-    std::unique_ptr<BatchDense> create_with_same_config() const;
+    std::unique_ptr<Dense> create_with_same_config() const;
 
     virtual void apply_impl(const MultiVector<value_type>* b,
                             MultiVector<value_type>* x) const;
diff --git a/omp/matrix/batch_dense_kernels.cpp b/omp/matrix/batch_dense_kernels.cpp
index a767215c844..2d0b7ed4d40 100644
--- a/omp/matrix/batch_dense_kernels.cpp
+++ b/omp/matrix/batch_dense_kernels.cpp
@@ -50,8 +50,8 @@ namespace gko {
 namespace kernels {
 namespace omp {
 /**
- * @brief The BatchDense matrix format namespace.
- * @ref BatchDense
+ * @brief The Dense matrix format namespace.
+ * @ref Dense
  * @ingroup batch_dense
  */
 namespace batch_dense {
@@ -62,7 +62,7 @@ namespace batch_dense {
 
 template <typename ValueType>
 void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
-                  const batch::matrix::BatchDense<ValueType>* mat,
+                  const batch::matrix::Dense<ValueType>* mat,
                   const batch::MultiVector<ValueType>* b,
                   batch::MultiVector<ValueType>* x)
 {
@@ -85,7 +85,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 template <typename ValueType>
 void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
                     const batch::MultiVector<ValueType>* alpha,
-                    const batch::matrix::BatchDense<ValueType>* mat,
+                    const batch::matrix::Dense<ValueType>* mat,
                     const batch::MultiVector<ValueType>* b,
                     const batch::MultiVector<ValueType>* beta,
                     batch::MultiVector<ValueType>* x)
diff --git a/reference/matrix/batch_dense_kernels.cpp b/reference/matrix/batch_dense_kernels.cpp
index f42d9a81d1f..3d7ef03a3bd 100644
--- a/reference/matrix/batch_dense_kernels.cpp
+++ b/reference/matrix/batch_dense_kernels.cpp
@@ -51,8 +51,8 @@ namespace gko {
 namespace kernels {
 namespace reference {
 /**
- * @brief The BatchDense matrix format namespace.
- * @ref BatchDense
+ * @brief The Dense matrix format namespace.
+ * @ref Dense
  * @ingroup batch_dense
  */
 namespace batch_dense {
@@ -63,7 +63,7 @@ namespace batch_dense {
 
 template <typename ValueType>
 void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
-                  const batch::matrix::BatchDense<ValueType>* mat,
+                  const batch::matrix::Dense<ValueType>* mat,
                   const batch::MultiVector<ValueType>* b,
                   batch::MultiVector<ValueType>* x)
 {
@@ -85,7 +85,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 template <typename ValueType>
 void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
                     const batch::MultiVector<ValueType>* alpha,
-                    const batch::matrix::BatchDense<ValueType>* mat,
+                    const batch::matrix::Dense<ValueType>* mat,
                     const batch::MultiVector<ValueType>* b,
                     const batch::MultiVector<ValueType>* beta,
                     batch::MultiVector<ValueType>* x)
diff --git a/reference/matrix/batch_struct.hpp b/reference/matrix/batch_struct.hpp
index dee7c71948a..40e2cfc2078 100644
--- a/reference/matrix/batch_struct.hpp
+++ b/reference/matrix/batch_struct.hpp
@@ -66,7 +66,7 @@ namespace host {
  */
 template <typename ValueType>
 inline batch::matrix::batch_dense::uniform_batch<const ValueType>
-get_batch_struct(const batch::matrix::BatchDense<ValueType>* const op)
+get_batch_struct(const batch::matrix::Dense<ValueType>* const op)
 {
     return {op->get_const_values(), op->get_num_batch_items(),
             static_cast<int>(op->get_common_size()[1]),
@@ -80,7 +80,7 @@ get_batch_struct(const batch::matrix::BatchDense<ValueType>* const op)
  */
 template <typename ValueType>
 inline batch::matrix::batch_dense::uniform_batch<ValueType> get_batch_struct(
-    batch::matrix::BatchDense<ValueType>* const op)
+    batch::matrix::Dense<ValueType>* const op)
 {
     return {op->get_values(), op->get_num_batch_items(),
             static_cast<int>(op->get_common_size()[1]),
diff --git a/reference/test/matrix/batch_dense_kernels.cpp b/reference/test/matrix/batch_dense_kernels.cpp
index 8e2e522e5f4..e1689352cde 100644
--- a/reference/test/matrix/batch_dense_kernels.cpp
+++ b/reference/test/matrix/batch_dense_kernels.cpp
@@ -53,16 +53,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 template <typename T>
-class BatchDense : public ::testing::Test {
+class Dense : public ::testing::Test {
 protected:
     using value_type = T;
     using size_type = gko::size_type;
-    using Mtx = gko::batch::matrix::BatchDense<value_type>;
+    using Mtx = gko::batch::matrix::Dense<value_type>;
     using MVec = gko::batch::MultiVector<value_type>;
     using DenseMtx = gko::matrix::Dense<value_type>;
     using ComplexMtx = gko::to_complex<Mtx>;
     using RealMtx = gko::remove_complex<Mtx>;
-    BatchDense()
+    Dense()
         : exec(gko::ReferenceExecutor::create()),
           mtx_0(gko::batch::initialize<Mtx>(
               {{I<T>({1.0, -1.0, 1.5}), I<T>({-2.0, 2.0, 3.0})},
@@ -111,10 +111,10 @@ class BatchDense : public ::testing::Test {
 };
 
 
-TYPED_TEST_SUITE(BatchDense, gko::test::ValueTypes);
+TYPED_TEST_SUITE(Dense, gko::test::ValueTypes);
 
 
-TYPED_TEST(BatchDense, AppliesToBatchMultiVector)
+TYPED_TEST(Dense, AppliesToBatchMultiVector)
 {
     using T = typename TestFixture::value_type;
 
@@ -129,7 +129,7 @@ TYPED_TEST(BatchDense, AppliesToBatchMultiVector)
 }
 
 
-TYPED_TEST(BatchDense, AppliesLinearCombinationWithSameAlphaToBatchMultiVector)
+TYPED_TEST(Dense, AppliesLinearCombinationWithSameAlphaToBatchMultiVector)
 {
     using Mtx = typename TestFixture::Mtx;
     using MVec = typename TestFixture::MVec;
@@ -156,7 +156,7 @@ TYPED_TEST(BatchDense, AppliesLinearCombinationWithSameAlphaToBatchMultiVector)
 }
 
 
-TYPED_TEST(BatchDense, AppliesLinearCombinationToBatchMultiVector)
+TYPED_TEST(Dense, AppliesLinearCombinationToBatchMultiVector)
 {
     using Mtx = typename TestFixture::Mtx;
     using MVec = typename TestFixture::MVec;
@@ -183,7 +183,7 @@ TYPED_TEST(BatchDense, AppliesLinearCombinationToBatchMultiVector)
 }
 
 
-TYPED_TEST(BatchDense, ApplyFailsOnWrongNumberOfResultCols)
+TYPED_TEST(Dense, ApplyFailsOnWrongNumberOfResultCols)
 {
     using MVec = typename TestFixture::MVec;
     auto res = MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2}});
@@ -193,7 +193,7 @@ TYPED_TEST(BatchDense, ApplyFailsOnWrongNumberOfResultCols)
 }
 
 
-TYPED_TEST(BatchDense, ApplyFailsOnWrongNumberOfResultRows)
+TYPED_TEST(Dense, ApplyFailsOnWrongNumberOfResultRows)
 {
     using MVec = typename TestFixture::MVec;
     auto res = MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3}});
@@ -203,7 +203,7 @@ TYPED_TEST(BatchDense, ApplyFailsOnWrongNumberOfResultRows)
 }
 
 
-TYPED_TEST(BatchDense, ApplyFailsOnWrongInnerDimension)
+TYPED_TEST(Dense, ApplyFailsOnWrongInnerDimension)
 {
     using MVec = typename TestFixture::MVec;
     auto res =
@@ -214,7 +214,7 @@ TYPED_TEST(BatchDense, ApplyFailsOnWrongInnerDimension)
 }
 
 
-TYPED_TEST(BatchDense, AdvancedApplyFailsOnWrongInnerDimension)
+TYPED_TEST(Dense, AdvancedApplyFailsOnWrongInnerDimension)
 {
     using MVec = typename TestFixture::MVec;
     auto res =
@@ -230,7 +230,7 @@ TYPED_TEST(BatchDense, AdvancedApplyFailsOnWrongInnerDimension)
 }
 
 
-TYPED_TEST(BatchDense, AdvancedApplyFailsOnWrongAlphaDimension)
+TYPED_TEST(Dense, AdvancedApplyFailsOnWrongAlphaDimension)
 {
     using MVec = typename TestFixture::MVec;
     auto res =
diff --git a/test/matrix/batch_dense_kernels.cpp b/test/matrix/batch_dense_kernels.cpp
index 7d44f29899c..b32f1063377 100644
--- a/test/matrix/batch_dense_kernels.cpp
+++ b/test/matrix/batch_dense_kernels.cpp
@@ -53,13 +53,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "test/utils/executor.hpp"
 
 
-class BatchDense : public CommonTestFixture {
+class Dense : public CommonTestFixture {
 protected:
     using vtype = double;
-    using Mtx = gko::batch::matrix::BatchDense<vtype>;
+    using Mtx = gko::batch::matrix::Dense<vtype>;
     using MVec = gko::batch::MultiVector<vtype>;
 
-    BatchDense() : rand_engine(15) {}
+    Dense() : rand_engine(15) {}
 
     template <typename MtxType>
     std::unique_ptr<MtxType> gen_mtx(const gko::size_type num_batch_items,
@@ -107,7 +107,7 @@ class BatchDense : public CommonTestFixture {
 };
 
 
-TEST_F(BatchDense, SingleVectorApplyIsEquivalentToRef)
+TEST_F(Dense, SingleVectorApplyIsEquivalentToRef)
 {
     set_up_apply_data(1);
 
@@ -118,7 +118,7 @@ TEST_F(BatchDense, SingleVectorApplyIsEquivalentToRef)
 }
 
 
-TEST_F(BatchDense, SingleVectorAdvancedApplyIsEquivalentToRef)
+TEST_F(Dense, SingleVectorAdvancedApplyIsEquivalentToRef)
 {
     set_up_apply_data(1);
 

From 37ce4f6095b0462cae1cbe49919f44c63bea5dd7 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Thu, 5 Oct 2023 16:54:40 +0200
Subject: [PATCH 356/583] Doc updates and multivector view

---
 core/matrix/batch_dense.cpp                |  32 +++++++
 core/matrix/batch_struct.hpp               |   4 +-
 core/test/matrix/batch_dense.cpp           |  12 +++
 cuda/matrix/batch_struct.hpp               |   4 +-
 dpcpp/matrix/batch_struct.hpp              |   4 +-
 hip/matrix/batch_struct.hip.hpp            |   4 +-
 include/ginkgo/core/matrix/batch_dense.hpp | 102 +++++++++++++--------
 reference/matrix/batch_struct.hpp          |   4 +-
 8 files changed, 120 insertions(+), 46 deletions(-)

diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp
index 75f29bc6b4c..a864b4114c2 100644
--- a/core/matrix/batch_dense.cpp
+++ b/core/matrix/batch_dense.cpp
@@ -82,6 +82,38 @@ batch_dim<2> compute_batch_size(
 }  // namespace detail
 
 
+template <typename ValueType>
+std::unique_ptr<gko::batch::MultiVector<ValueType>>
+Dense<ValueType>::create_multi_vector_view()
+{
+    auto exec = this->get_executor();
+    auto num_batch_items = this->get_num_batch_items();
+    auto num_rows = this->get_common_size()[0];
+    auto stride = this->get_common_size()[1];
+    auto mvec = MultiVector<ValueType>::create(
+        exec, this->get_size(),
+        make_array_view(exec, num_batch_items * num_rows * stride,
+                        this->get_values()));
+    return mvec;
+}
+
+
+template <typename ValueType>
+std::unique_ptr<const gko::batch::MultiVector<ValueType>>
+Dense<ValueType>::create_const_multi_vector_view() const
+{
+    auto exec = this->get_executor();
+    auto num_batch_items = this->get_num_batch_items();
+    auto num_rows = this->get_common_size()[0];
+    auto stride = this->get_common_size()[1];
+    auto mvec = MultiVector<ValueType>::create_const(
+        exec, this->get_size(),
+        make_const_array_view(exec, num_batch_items * num_rows * stride,
+                              this->get_const_values()));
+    return mvec;
+}
+
+
 template <typename ValueType>
 std::unique_ptr<gko::matrix::Dense<ValueType>>
 Dense<ValueType>::create_view_for_item(size_type item_id)
diff --git a/core/matrix/batch_struct.hpp b/core/matrix/batch_struct.hpp
index 37c297bb6b5..93b2b027ceb 100644
--- a/core/matrix/batch_struct.hpp
+++ b/core/matrix/batch_struct.hpp
@@ -46,7 +46,7 @@ namespace batch_dense {
 
 
 /**
- * Encapsulates one matrix from a batch of multi-vectors.
+ * Encapsulates one matrix from a batch of dense matrices.
  */
 template <typename ValueType>
 struct batch_item {
@@ -59,7 +59,7 @@ struct batch_item {
 
 
 /**
- * A 'simple' structure to store a global uniform batch of multi-vectors.
+ * A 'simple' structure to store a global uniform batch of dense matrices.
  */
 template <typename ValueType>
 struct uniform_batch {
diff --git a/core/test/matrix/batch_dense.cpp b/core/test/matrix/batch_dense.cpp
index 6afe13a50af..36fc3f2ee4a 100644
--- a/core/test/matrix/batch_dense.cpp
+++ b/core/test/matrix/batch_dense.cpp
@@ -59,6 +59,10 @@ class Dense : public ::testing::Test {
               {{{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
                {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}},
               exec)),
+          mvec(gko::batch::initialize<gko::batch::MultiVector<value_type>>(
+              {{{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
+               {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}},
+              exec)),
           dense_mtx(gko::initialize<gko::matrix::Dense<value_type>>(
               {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, exec))
     {}
@@ -92,6 +96,7 @@ class Dense : public ::testing::Test {
 
     std::shared_ptr<const gko::Executor> exec;
     std::unique_ptr<gko::batch::matrix::Dense<value_type>> mtx;
+    std::unique_ptr<gko::batch::MultiVector<value_type>> mvec;
     std::unique_ptr<gko::matrix::Dense<value_type>> dense_mtx;
 };
 
@@ -133,6 +138,13 @@ TYPED_TEST(Dense, CanCreateDenseItemView)
 }
 
 
+TYPED_TEST(Dense, CanCreateMultiVectorView)
+{
+    GKO_ASSERT_BATCH_MTX_NEAR(this->mtx->create_multi_vector_view(), this->mvec,
+                              0.0);
+}
+
+
 TYPED_TEST(Dense, CanBeCopied)
 {
     auto mtx_copy = gko::batch::matrix::Dense<TypeParam>::create(this->exec);
diff --git a/cuda/matrix/batch_struct.hpp b/cuda/matrix/batch_struct.hpp
index 56af3c5ba7e..19b006d26cd 100644
--- a/cuda/matrix/batch_struct.hpp
+++ b/cuda/matrix/batch_struct.hpp
@@ -61,7 +61,7 @@ namespace cuda {
 
 
 /**
- * Generates an immutable uniform batch struct from a batch of multi-vectors.
+ * Generates an immutable uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
 inline batch::matrix::batch_dense::uniform_batch<const cuda_type<ValueType>>
@@ -75,7 +75,7 @@ get_batch_struct(const batch::matrix::Dense<ValueType>* const op)
 
 
 /**
- * Generates a uniform batch struct from a batch of multi-vectors.
+ * Generates a uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
 inline batch::matrix::batch_dense::uniform_batch<cuda_type<ValueType>>
diff --git a/dpcpp/matrix/batch_struct.hpp b/dpcpp/matrix/batch_struct.hpp
index e44bc394667..cd5298a4409 100644
--- a/dpcpp/matrix/batch_struct.hpp
+++ b/dpcpp/matrix/batch_struct.hpp
@@ -59,7 +59,7 @@ namespace dpcpp {
 
 
 /**
- * Generates an immutable uniform batch struct from a batch of multi-vectors.
+ * Generates an immutable uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
 inline batch::matrix::batch_dense::uniform_batch<const ValueType>
@@ -73,7 +73,7 @@ get_batch_struct(const batch::matrix::Dense<ValueType>* const op)
 
 
 /**
- * Generates a uniform batch struct from a batch of multi-vectors.
+ * Generates a uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
 inline batch::matrix::batch_dense::uniform_batch<ValueType> get_batch_struct(
diff --git a/hip/matrix/batch_struct.hip.hpp b/hip/matrix/batch_struct.hip.hpp
index c75a6c7f0a3..25c73d45abc 100644
--- a/hip/matrix/batch_struct.hip.hpp
+++ b/hip/matrix/batch_struct.hip.hpp
@@ -61,7 +61,7 @@ namespace hip {
 
 
 /**
- * Generates an immutable uniform batch struct from a batch of multi-vectors.
+ * Generates an immutable uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
 inline batch::matrix::batch_dense::uniform_batch<const hip_type<ValueType>>
@@ -75,7 +75,7 @@ get_batch_struct(const batch::matrix::Dense<ValueType>* const op)
 
 
 /**
- * Generates a uniform batch struct from a batch of multi-vectors.
+ * Generates a uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
 inline batch::matrix::batch_dense::uniform_batch<hip_type<ValueType>>
diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp
index 86cd78eadc8..d713760947e 100644
--- a/include/ginkgo/core/matrix/batch_dense.hpp
+++ b/include/ginkgo/core/matrix/batch_dense.hpp
@@ -59,21 +59,24 @@ namespace matrix {
  * matrix in each of the batches.
  *
  * The values in each of the batches are stored in row-major format (values
- * belonging to the same row appear consecutive in the memory). Optionally, rows
- * can be padded for better memory access.
+ * belonging to the same row appear consecutive in the memory and the values of
+ * each batch item are also stored consecutively in memory).
+ *
+ * @note Though the storage layout is similar to the multi-vector object, the
+ * class semantics and the operations it aims to provide is different. Hence it
+ * is recommended to create multi-vector objects if the user means to view the
+ * data as a set of vectors.
  *
  * @tparam ValueType  precision of matrix elements
  *
- * @note While this format is not very useful for storing sparse matrices, it
- *       is often suitable to store vectors, and sets of vectors.
  * @ingroup batch_dense
  * @ingroup mat_formats
  * @ingroup BatchLinOp
  */
 template <typename ValueType = default_precision>
-class Dense : public EnableBatchLinOp<Dense<ValueType>>,
-              public EnableCreateMethod<Dense<ValueType>>,
-              public ConvertibleTo<Dense<next_precision<ValueType>>> {
+class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
+                    public EnableCreateMethod<Dense<ValueType>>,
+                    public ConvertibleTo<Dense<next_precision<ValueType>>> {
     friend class EnableCreateMethod<Dense>;
     friend class EnablePolymorphicObject<Dense, BatchLinOp>;
     friend class Dense<to_complex<ValueType>>;
@@ -103,16 +106,31 @@ class Dense : public EnableBatchLinOp<Dense<ValueType>>,
 
     void move_to(Dense<next_precision<ValueType>>* result) override;
 
+    /**
+     * Creates a mutable view (of MultiVector type) of the data owned by the
+     * matrix::Dense object. Does not perform any deep copies, but only
+     * returns a view of the underlying data.
+     *
+     * @return  a MultiVector object with a view of the data from the batch
+     * dense matrix.
+     */
+    std::unique_ptr<MultiVector<value_type>> create_multi_vector_view();
+
+    /**
+     * @copydoc create_const_multi_vector_view()
+     */
+    std::unique_ptr<const MultiVector<value_type>>
+    create_const_multi_vector_view() const;
 
     /**
-     * Creates a mutable view (of matrix::Dense type) of one item of the Batch
-     * MultiVector<value_type> object. Does not perform any deep copies, but
-     * only returns a view of the data.
+     * Creates a mutable view (of matrix::Dense type) of one item of the
+     * batch::matrix::Dense<value_type> object. Does not perform any deep
+     * copies, but only returns a view of the data.
      *
      * @param item_id  The index of the batch item
      *
-     * @return  a matrix::Dense object with the data from the batch item at the
-     *          given index.
+     * @return  a batch::matrix::Dense object with the data from the batch item
+     * at the given index.
      */
     std::unique_ptr<unbatch_type> create_view_for_item(size_type item_id);
 
@@ -148,8 +166,8 @@ class Dense : public EnableBatchLinOp<Dense<ValueType>>,
      * @param row  the row of the requested element
      * @param col  the column of the requested element
      *
-     * @note  the method has to be called on the same Executor the vector is
-     *        stored at (e.g. trying to call this method on a GPU multi-vector
+     * @note  the method has to be called on the same Executor the matrix is
+     *        stored at (e.g. trying to call this method on a GPU Dense object
      *        from the OMP results in a runtime error)
      */
     value_type& at(size_type batch_id, size_type row, size_type col)
@@ -159,7 +177,7 @@ class Dense : public EnableBatchLinOp<Dense<ValueType>>,
     }
 
     /**
-     * @copydoc MultiVector::at(size_type, size_type, size_type)
+     * @copydoc Dense::at(size_type, size_type, size_type)
      */
     value_type at(size_type batch_id, size_type row, size_type col) const
     {
@@ -170,15 +188,15 @@ class Dense : public EnableBatchLinOp<Dense<ValueType>>,
     /**
      * Returns a single element for a particular batch item.
      *
-     * Useful for iterating across all elements of the vector.
+     * Useful for iterating across all elements of the matrix.
      * However, it is less efficient than the two-parameter variant of this
      * method.
      *
      * @param batch_id  the batch item index to be queried
      * @param idx  a linear index of the requested element
      *
-     * @note  the method has to be called on the same Executor the vector is
-     *        stored at (e.g. trying to call this method on a GPU multi-vector
+     * @note  the method has to be called on the same Executor the matrix is
+     *        stored at (e.g. trying to call this method on a GPU Dense object
      *        from the OMP results in a runtime error)
      */
     ValueType& at(size_type batch_id, size_type idx) noexcept
@@ -187,7 +205,7 @@ class Dense : public EnableBatchLinOp<Dense<ValueType>>,
     }
 
     /**
-     * @copydoc MultiVector::at(size_type, size_type, size_type)
+     * @copydoc Dense::at(size_type, size_type, size_type)
      */
     ValueType at(size_type batch_id, size_type idx) const noexcept
     {
@@ -195,7 +213,7 @@ class Dense : public EnableBatchLinOp<Dense<ValueType>>,
     }
 
     /**
-     * Returns a pointer to the array of values of the multi-vector for a
+     * Returns a pointer to the array of values of the matrix for a
      * specific batch item.
      *
      * @param batch_id  the id of the batch item.
@@ -236,30 +254,45 @@ class Dense : public EnableBatchLinOp<Dense<ValueType>>,
         return values_.get_num_elems();
     }
 
-
     /**
      * Creates a constant (immutable) batch dense matrix from a constant
      * array.
      *
-     * @param exec  the executor to create the vector on
-     * @param size  the dimensions of the vector
-     * @param values  the value array of the vector
+     * @param exec  the executor to create the matrix on
+     * @param size  the dimensions of the matrix
+     * @param values  the value array of the matrix
      *
-     * @return A smart pointer to the constant multi-vector wrapping the input
-     * array (if it resides on the same executor as the vector) or a copy of the
+     * @return A smart pointer to the constant matrix wrapping the input
+     * array (if it resides on the same executor as the matrix) or a copy of the
      * array on the correct executor.
      */
     static std::unique_ptr<const Dense<value_type>> create_const(
         std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
         gko::detail::const_array_view<ValueType>&& values);
 
-
+    /**
+     * Apply the matrix to a multi-vector. Represents the matrix vector
+     * multiplication, x = A * b, where x and b are both multi-vectors.
+     *
+     * @param b  the multi-vector to be applied to
+     * @param x  the output multi-vector
+     */
     void apply(const MultiVector<value_type>* b,
                MultiVector<value_type>* x) const
     {
         this->apply_impl(b, x);
     }
 
+    /**
+     * Apply the matrix to a multi-vector with a linear combination of the given
+     * input vector. Represents the matrix vector multiplication, x = alpha* A *
+     * b + beta * x, where x and b are both multi-vectors.
+     *
+     * @param alpha  the scalar to scale the matrix-vector product with
+     * @param b      the multi-vector to be applied to
+     * @param beta   the scalar to scale the x vector with
+     * @param x      the output multi-vector
+     */
     void apply(const MultiVector<value_type>* alpha,
                const MultiVector<value_type>* b,
                const MultiVector<value_type>* beta,
@@ -293,9 +326,6 @@ class Dense : public EnableBatchLinOp<Dense<ValueType>>,
      * @param exec  Executor associated to the matrix
      * @param size  sizes of the batch matrices in a batch_dim object
      * @param values  array of matrix values
-     * @param strides  stride of the rows (i.e. offset between the first
-     *                  elements of two consecutive rows, expressed as the
-     *                  number of matrix elements)
      *
      * @note If `values` is not an rvalue, not an array of ValueType, or is on
      *       the wrong executor, an internal copy will be created, and the
@@ -320,13 +350,13 @@ class Dense : public EnableBatchLinOp<Dense<ValueType>>,
      */
     std::unique_ptr<Dense> create_with_same_config() const;
 
-    virtual void apply_impl(const MultiVector<value_type>* b,
-                            MultiVector<value_type>* x) const;
+    void apply_impl(const MultiVector<value_type>* b,
+                    MultiVector<value_type>* x) const;
 
-    virtual void apply_impl(const MultiVector<value_type>* alpha,
-                            const MultiVector<value_type>* b,
-                            const MultiVector<value_type>* beta,
-                            MultiVector<value_type>* x) const;
+    void apply_impl(const MultiVector<value_type>* alpha,
+                    const MultiVector<value_type>* b,
+                    const MultiVector<value_type>* beta,
+                    MultiVector<value_type>* x) const;
 
     size_type linearize_index(size_type batch, size_type row,
                               size_type col) const noexcept
diff --git a/reference/matrix/batch_struct.hpp b/reference/matrix/batch_struct.hpp
index 40e2cfc2078..1a759cec2a9 100644
--- a/reference/matrix/batch_struct.hpp
+++ b/reference/matrix/batch_struct.hpp
@@ -62,7 +62,7 @@ namespace host {
 
 
 /**
- * Generates an immutable uniform batch struct from a batch of multi-vectors.
+ * Generates an immutable uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
 inline batch::matrix::batch_dense::uniform_batch<const ValueType>
@@ -76,7 +76,7 @@ get_batch_struct(const batch::matrix::Dense<ValueType>* const op)
 
 
 /**
- * Generates a uniform batch struct from a batch of multi-vectors.
+ * Generates a uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
 inline batch::matrix::batch_dense::uniform_batch<ValueType> get_batch_struct(

From 33c71c13955e63c94d817bdc2d3f4fe1d83ac192 Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Fri, 6 Oct 2023 07:12:03 +0000
Subject: [PATCH 357/583] Format files

Co-authored-by: Pratik Nayak <pratikvn@pm.me>
---
 cuda/matrix/batch_struct.hpp                 |  4 +-
 dpcpp/base/batch_multi_vector_kernels.dp.cpp | 52 ++++++++++---------
 dpcpp/matrix/batch_dense_kernels.dp.cpp      | 54 ++++++++++----------
 dpcpp/matrix/batch_struct.hpp                |  4 +-
 hip/matrix/batch_struct.hip.hpp              | 10 ++--
 reference/matrix/batch_struct.hpp            |  4 +-
 6 files changed, 69 insertions(+), 59 deletions(-)

diff --git a/cuda/matrix/batch_struct.hpp b/cuda/matrix/batch_struct.hpp
index 19b006d26cd..f191953f7b9 100644
--- a/cuda/matrix/batch_struct.hpp
+++ b/cuda/matrix/batch_struct.hpp
@@ -34,13 +34,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define GKO_CUDA_MATRIX_BATCH_STRUCT_HPP_
 
 
+#include "core/matrix/batch_struct.hpp"
+
+
 #include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/matrix/batch_dense.hpp>
 
 
 #include "core/base/batch_struct.hpp"
-#include "core/matrix/batch_struct.hpp"
 #include "cuda/base/config.hpp"
 #include "cuda/base/types.hpp"
 
diff --git a/dpcpp/base/batch_multi_vector_kernels.dp.cpp b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
index 12648b81e00..e0bc15fdc61 100644
--- a/dpcpp/base/batch_multi_vector_kernels.dp.cpp
+++ b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
@@ -194,9 +194,9 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
     // TODO: Remove reqd_sub_group size and use sycl::reduce_over_group
     exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=
-        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                            config::warp_size)]] {
+            sycl_nd_range(grid, block),
+            [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                config::warp_size)]] {
                 auto group = item_ct1.get_group();
                 auto group_id = group.get_group_linear_id();
                 const auto x_b = batch::extract_batch_item(x_ub, group_id);
@@ -232,18 +232,19 @@ void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
 
     exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=
-        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                            config::warp_size)]] {
-                auto group = item_ct1.get_group();
-                auto group_id = group.get_group_linear_id();
-                const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                const auto y_b = batch::extract_batch_item(y_ub, group_id);
-                const auto res_b = batch::extract_batch_item(res_ub, group_id);
-                compute_gen_dot_product_kernel(
-                    x_b, y_b, res_b, item_ct1,
-                    [](auto val) { return conj(val); });
-            });
+            sycl_nd_range(grid, block),
+            [=](sycl::nd_item<3> item_ct1)
+                [[sycl::reqd_sub_group_size(config::warp_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    const auto y_b = batch::extract_batch_item(y_ub, group_id);
+                    const auto res_b =
+                        batch::extract_batch_item(res_ub, group_id);
+                    compute_gen_dot_product_kernel(
+                        x_b, y_b, res_b, item_ct1,
+                        [](auto val) { return conj(val); });
+                });
     });
 }
 
@@ -268,16 +269,17 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
     const dim3 grid(num_batches);
 
     exec->get_queue()->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(
-            sycl_nd_range(grid, block), [=
-        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                            config::warp_size)]] {
-                auto group = item_ct1.get_group();
-                auto group_id = group.get_group_linear_id();
-                const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                const auto res_b = batch::extract_batch_item(res_ub, group_id);
-                compute_norm2_kernel(x_b, res_b, item_ct1);
-            });
+        cgh.parallel_for(sycl_nd_range(grid, block),
+                         [=](sycl::nd_item<3> item_ct1)
+                             [[sycl::reqd_sub_group_size(config::warp_size)]] {
+                                 auto group = item_ct1.get_group();
+                                 auto group_id = group.get_group_linear_id();
+                                 const auto x_b =
+                                     batch::extract_batch_item(x_ub, group_id);
+                                 const auto res_b = batch::extract_batch_item(
+                                     res_ub, group_id);
+                                 compute_norm2_kernel(x_b, res_b, item_ct1);
+                             });
     });
 }
 
diff --git a/dpcpp/matrix/batch_dense_kernels.dp.cpp b/dpcpp/matrix/batch_dense_kernels.dp.cpp
index 4552f918c60..6aec3e57fc5 100644
--- a/dpcpp/matrix/batch_dense_kernels.dp.cpp
+++ b/dpcpp/matrix/batch_dense_kernels.dp.cpp
@@ -100,17 +100,17 @@ void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
     // Launch a kernel that has nbatches blocks, each block has max group size
     (exec->get_queue())->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=
-        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                            config::warp_size)]] {
-                auto group = item_ct1.get_group();
-                auto group_id = group.get_group_linear_id();
-                const auto mat_b =
-                    batch::matrix::extract_batch_item(mat_ub, group_id);
-                const auto b_b = batch::extract_batch_item(b_ub, group_id);
-                const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                simple_apply_kernel(mat_b, b_b, x_b, item_ct1);
-            });
+            sycl_nd_range(grid, block),
+            [=](sycl::nd_item<3> item_ct1)
+                [[sycl::reqd_sub_group_size(config::warp_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto mat_b =
+                        batch::matrix::extract_batch_item(mat_ub, group_id);
+                    const auto b_b = batch::extract_batch_item(b_ub, group_id);
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    simple_apply_kernel(mat_b, b_b, x_b, item_ct1);
+                });
     });
 }
 
@@ -147,22 +147,22 @@ void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
     // Launch a kernel that has nbatches blocks, each block has max group size
     (exec->get_queue())->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=
-        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                            config::warp_size)]] {
-                auto group = item_ct1.get_group();
-                auto group_id = group.get_group_linear_id();
-                const auto mat_b =
-                    batch::matrix::extract_batch_item(mat_ub, group_id);
-                const auto b_b = batch::extract_batch_item(b_ub, group_id);
-                const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                const auto alpha_b =
-                    batch::extract_batch_item(alpha_ub, group_id);
-                const auto beta_b =
-                    batch::extract_batch_item(beta_ub, group_id);
-                advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b,
-                                      item_ct1);
-            });
+            sycl_nd_range(grid, block),
+            [=](sycl::nd_item<3> item_ct1)
+                [[sycl::reqd_sub_group_size(config::warp_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto mat_b =
+                        batch::matrix::extract_batch_item(mat_ub, group_id);
+                    const auto b_b = batch::extract_batch_item(b_ub, group_id);
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    const auto alpha_b =
+                        batch::extract_batch_item(alpha_ub, group_id);
+                    const auto beta_b =
+                        batch::extract_batch_item(beta_ub, group_id);
+                    advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b,
+                                          item_ct1);
+                });
     });
 }
 
diff --git a/dpcpp/matrix/batch_struct.hpp b/dpcpp/matrix/batch_struct.hpp
index cd5298a4409..f561bf004c7 100644
--- a/dpcpp/matrix/batch_struct.hpp
+++ b/dpcpp/matrix/batch_struct.hpp
@@ -34,12 +34,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define GKO_DPCPP_MATRIX_BATCH_STRUCT_HPP_
 
 
+#include "core/matrix/batch_struct.hpp"
+
+
 #include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/math.hpp>
 
 
 #include "core/base/batch_struct.hpp"
-#include "core/matrix/batch_struct.hpp"
 #include "dpcpp/base/config.hpp"
 
 
diff --git a/hip/matrix/batch_struct.hip.hpp b/hip/matrix/batch_struct.hip.hpp
index 25c73d45abc..c0659420661 100644
--- a/hip/matrix/batch_struct.hip.hpp
+++ b/hip/matrix/batch_struct.hip.hpp
@@ -30,8 +30,11 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#ifndef GKO_HIP_MATRIX_BATCH_STRUCT_HPP_
-#define GKO_HIP_MATRIX_BATCH_STRUCT_HPP_
+#ifndef GKO_HIP_MATRIX_BATCH_STRUCT_HIP_HPP_
+#define GKO_HIP_MATRIX_BATCH_STRUCT_HIP_HPP_
+
+
+#include "core/matrix/batch_struct.hpp"
 
 
 #include <ginkgo/core/base/batch_multi_vector.hpp>
@@ -40,7 +43,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include "core/base/batch_struct.hpp"
-#include "core/matrix/batch_struct.hpp"
 #include "hip/base/config.hip.hpp"
 #include "hip/base/types.hip.hpp"
 
@@ -93,4 +95,4 @@ get_batch_struct(batch::matrix::Dense<ValueType>* const op)
 }  // namespace gko
 
 
-#endif  // GKO_HIP_MATRIX_BATCH_STRUCT_HPP_
+#endif  // GKO_HIP_MATRIX_BATCH_STRUCT_HIP_HPP_
diff --git a/reference/matrix/batch_struct.hpp b/reference/matrix/batch_struct.hpp
index 1a759cec2a9..47d48f1e927 100644
--- a/reference/matrix/batch_struct.hpp
+++ b/reference/matrix/batch_struct.hpp
@@ -34,13 +34,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define GKO_REFERENCE_MATRIX_BATCH_STRUCT_HPP_
 
 
+#include "core/matrix/batch_struct.hpp"
+
+
 #include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/matrix/batch_dense.hpp>
 
 
 #include "core/base/batch_struct.hpp"
-#include "core/matrix/batch_struct.hpp"
 
 
 namespace gko {

From fd9a228435a7387f3537633531a7d6a846d04cc3 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Fri, 6 Oct 2023 09:11:18 +0200
Subject: [PATCH 358/583] Use CommonTestFixture value_type

---
 test/matrix/batch_dense_kernels.cpp | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/test/matrix/batch_dense_kernels.cpp b/test/matrix/batch_dense_kernels.cpp
index b32f1063377..d6bf85a42c4 100644
--- a/test/matrix/batch_dense_kernels.cpp
+++ b/test/matrix/batch_dense_kernels.cpp
@@ -55,9 +55,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 class Dense : public CommonTestFixture {
 protected:
-    using vtype = double;
-    using Mtx = gko::batch::matrix::Dense<vtype>;
-    using MVec = gko::batch::MultiVector<vtype>;
+    using Mtx = gko::batch::matrix::Dense<value_type>;
+    using MVec = gko::batch::MultiVector<value_type>;
 
     Dense() : rand_engine(15) {}
 
@@ -87,7 +86,7 @@ class Dense : public CommonTestFixture {
         expected = MVec::create(
             ref,
             gko::batch_dim<2>(batch_size, gko::dim<2>{num_rows, num_vecs}));
-        expected->fill(gko::one<vtype>());
+        expected->fill(gko::one<value_type>());
         dresult = gko::clone(exec, expected);
     }
 
@@ -114,7 +113,7 @@ TEST_F(Dense, SingleVectorApplyIsEquivalentToRef)
     x->apply(y.get(), expected.get());
     dx->apply(dy.get(), dresult.get());
 
-    GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, 1e-14);
+    GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, r<value_type>::value);
 }
 
 
@@ -125,5 +124,5 @@ TEST_F(Dense, SingleVectorAdvancedApplyIsEquivalentToRef)
     x->apply(alpha.get(), y.get(), beta.get(), expected.get());
     dx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get());
 
-    GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, 1e-14);
+    GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, r<value_type>::value);
 }

From a71ce20655df85cedb0753e6161344ce0ff9675e Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Mon, 9 Oct 2023 12:48:24 +0200
Subject: [PATCH 359/583] Review updates

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 core/base/batch_multi_vector.cpp              | 16 +++++++++---
 core/base/batch_utilities.hpp                 |  7 +++---
 core/matrix/batch_dense.cpp                   | 25 -------------------
 .../test/preconditioner/jacobi_kernels.dp.cpp |  2 +-
 include/ginkgo/core/matrix/batch_dense.hpp    |  2 +-
 reference/matrix/batch_dense_kernels.hpp.inc  |  2 +-
 reference/test/matrix/batch_dense_kernels.cpp |  2 +-
 test/matrix/batch_dense_kernels.cpp           |  2 +-
 8 files changed, 20 insertions(+), 38 deletions(-)

diff --git a/core/base/batch_multi_vector.cpp b/core/base/batch_multi_vector.cpp
index 294fe45972a..bd2079907a3 100644
--- a/core/base/batch_multi_vector.cpp
+++ b/core/base/batch_multi_vector.cpp
@@ -294,11 +294,12 @@ void MultiVector<ValueType>::move_to(
 template <typename ValueType>
 void MultiVector<ValueType>::convert_to(matrix::Dense<ValueType>* result) const
 {
-    auto exec = result->get_executor() != nullptr ? result->get_executor()
-                                                  : this->get_executor();
+    auto exec = result->get_executor() == nullptr ? this->get_executor()
+                                                  : result->get_executor();
     auto tmp = gko::batch::matrix::Dense<ValueType>::create_const(
         exec, this->get_size(),
-        make_const_array_view(exec, this->get_num_stored_elements(),
+        make_const_array_view(this->get_executor(),
+                              this->get_num_stored_elements(),
                               this->get_const_values()));
     result->copy_from(tmp);
 }
@@ -307,7 +308,14 @@ void MultiVector<ValueType>::convert_to(matrix::Dense<ValueType>* result) const
 template <typename ValueType>
 void MultiVector<ValueType>::move_to(matrix::Dense<ValueType>* result)
 {
-    this->convert_to(result);
+    auto exec = result->get_executor() == nullptr ? this->get_executor()
+                                                  : result->get_executor();
+    auto tmp = gko::batch::matrix::Dense<ValueType>::create_const(
+        exec, this->get_size(),
+        make_const_array_view(this->get_executor(),
+                              this->get_num_stored_elements(),
+                              this->get_const_values()));
+    tmp->move_to(result);
 }
 
 
diff --git a/core/base/batch_utilities.hpp b/core/base/batch_utilities.hpp
index d5c5bdb4aa2..834e89c8358 100644
--- a/core/base/batch_utilities.hpp
+++ b/core/base/batch_utilities.hpp
@@ -109,14 +109,13 @@ std::unique_ptr<OutputType> create_from_item(
 
 
 template <typename InputType>
-auto unbatch(const InputType* batch_multivec)
+auto unbatch(const InputType* batch_object)
 {
-    auto exec = batch_multivec->get_executor();
     auto unbatched_mats =
         std::vector<std::unique_ptr<typename InputType::unbatch_type>>{};
-    for (size_type b = 0; b < batch_multivec->get_num_batch_items(); ++b) {
+    for (size_type b = 0; b < batch_object->get_num_batch_items(); ++b) {
         unbatched_mats.emplace_back(
-            batch_multivec->create_const_view_for_item(b)->clone());
+            batch_object->create_const_view_for_item(b)->clone());
     }
     return unbatched_mats;
 }
diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp
index a864b4114c2..b948a2c3afc 100644
--- a/core/matrix/batch_dense.cpp
+++ b/core/matrix/batch_dense.cpp
@@ -64,24 +64,6 @@ GKO_REGISTER_OPERATION(advanced_apply, batch_dense::advanced_apply);
 }  // namespace dense
 
 
-namespace detail {
-
-
-template <typename ValueType>
-batch_dim<2> compute_batch_size(
-    const std::vector<gko::matrix::Dense<ValueType>*>& matrices)
-{
-    auto common_size = matrices[0]->get_size();
-    for (size_type i = 1; i < matrices.size(); ++i) {
-        GKO_ASSERT_EQUAL_DIMENSIONS(common_size, matrices[i]->get_size());
-    }
-    return batch_dim<2>{matrices.size(), common_size};
-}
-
-
-}  // namespace detail
-
-
 template <typename ValueType>
 std::unique_ptr<gko::batch::MultiVector<ValueType>>
 Dense<ValueType>::create_multi_vector_view()
@@ -178,13 +160,6 @@ std::unique_ptr<const Dense<ValueType>> Dense<ValueType>::create_const(
 }
 
 
-inline const batch_dim<2> get_col_sizes(const batch_dim<2>& sizes)
-{
-    return batch_dim<2>(sizes.get_num_batch_items(),
-                        dim<2>(1, sizes.get_common_size()[1]));
-}
-
-
 template <typename ValueType>
 Dense<ValueType>::Dense(std::shared_ptr<const Executor> exec,
                         const batch_dim<2>& size)
diff --git a/dpcpp/test/preconditioner/jacobi_kernels.dp.cpp b/dpcpp/test/preconditioner/jacobi_kernels.dp.cpp
index aae15245357..b8082a2db32 100644
--- a/dpcpp/test/preconditioner/jacobi_kernels.dp.cpp
+++ b/dpcpp/test/preconditioner/jacobi_kernels.dp.cpp
@@ -90,7 +90,7 @@ class Jacobi : public ::testing::Test {
         gko::uint32 max_block_size, int min_nnz, int max_nnz, int num_rhs = 1,
         value_type accuracy = 0.1, bool skip_sorting = true)
     {
-        std::ranlux48 engine(42);
+        std::default_random_engine engine(42);
         const auto dim = *(end(block_pointers) - 1);
         if (condition_numbers.size() == 0) {
             mtx = gko::test::generate_random_matrix<Mtx>(
diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp
index d713760947e..d081e5d440e 100644
--- a/include/ginkgo/core/matrix/batch_dense.hpp
+++ b/include/ginkgo/core/matrix/batch_dense.hpp
@@ -62,7 +62,7 @@ namespace matrix {
  * belonging to the same row appear consecutive in the memory and the values of
  * each batch item are also stored consecutively in memory).
  *
- * @note Though the storage layout is similar to the multi-vector object, the
+ * @note Though the storage layout is the same as the multi-vector object, the
  * class semantics and the operations it aims to provide is different. Hence it
  * is recommended to create multi-vector objects if the user means to view the
  * data as a set of vectors.
diff --git a/reference/matrix/batch_dense_kernels.hpp.inc b/reference/matrix/batch_dense_kernels.hpp.inc
index bff9ad137cf..20e395af5b7 100644
--- a/reference/matrix/batch_dense_kernels.hpp.inc
+++ b/reference/matrix/batch_dense_kernels.hpp.inc
@@ -71,7 +71,7 @@ inline void advanced_apply_kernel(
     } else {
         for (int row = 0; row < c.num_rows; ++row) {
             for (int col = 0; col < c.num_rhs; ++col) {
-                c.values[row * c.stride + col] *= gko::zero<ValueType>();
+                c.values[row * c.stride + col] = gko::zero<ValueType>();
             }
         }
     }
diff --git a/reference/test/matrix/batch_dense_kernels.cpp b/reference/test/matrix/batch_dense_kernels.cpp
index e1689352cde..97dbe3e77cb 100644
--- a/reference/test/matrix/batch_dense_kernels.cpp
+++ b/reference/test/matrix/batch_dense_kernels.cpp
@@ -107,7 +107,7 @@ class Dense : public ::testing::Test {
     std::unique_ptr<DenseMtx> x_00;
     std::unique_ptr<DenseMtx> x_01;
 
-    std::ranlux48 rand_engine;
+    std::default_random_engine rand_engine;
 };
 
 
diff --git a/test/matrix/batch_dense_kernels.cpp b/test/matrix/batch_dense_kernels.cpp
index d6bf85a42c4..a73efcd8753 100644
--- a/test/matrix/batch_dense_kernels.cpp
+++ b/test/matrix/batch_dense_kernels.cpp
@@ -90,7 +90,7 @@ class Dense : public CommonTestFixture {
         dresult = gko::clone(exec, expected);
     }
 
-    std::ranlux48 rand_engine;
+    std::default_random_engine rand_engine;
 
     const size_t batch_size = 11;
     std::unique_ptr<Mtx> x;

From 2e8e600a047671bd339d6dbe1b43082cbbe470be Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Mon, 9 Oct 2023 16:01:46 +0200
Subject: [PATCH 360/583] Review updates
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Thomas Grützmacher <thomas.gruetzmacher@kit.edu>
---
 .../matrix/batch_dense_kernels.hpp.inc        | 18 ++---
 core/base/batch_multi_vector.cpp              |  9 +--
 core/base/batch_struct.hpp                    | 16 ++--
 core/matrix/batch_struct.hpp                  | 37 +++++-----
 cuda/base/batch_struct.hpp                    | 14 ++--
 cuda/matrix/batch_struct.hpp                  | 18 ++---
 dpcpp/base/batch_struct.hpp                   | 14 ++--
 dpcpp/matrix/batch_dense_kernels.hpp.inc      |  4 +-
 dpcpp/matrix/batch_struct.hpp                 | 22 +++---
 hip/base/batch_struct.hip.hpp                 | 14 ++--
 hip/matrix/batch_struct.hip.hpp               | 18 ++---
 include/ginkgo/core/matrix/batch_dense.hpp    | 14 ++--
 reference/base/batch_struct.hpp               | 12 +--
 reference/matrix/batch_dense_kernels.hpp.inc  |  4 +-
 reference/matrix/batch_struct.hpp             | 19 ++---
 .../test/base/batch_multi_vector_kernels.cpp  | 14 ++--
 reference/test/matrix/batch_dense_kernels.cpp | 74 +++++++++----------
 test/matrix/batch_dense_kernels.cpp           | 44 +++++------
 18 files changed, 178 insertions(+), 187 deletions(-)

diff --git a/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc b/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc
index 2f876332ae7..7a38cfea215 100644
--- a/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc
+++ b/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc
@@ -33,7 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 template <typename ValueType>
 __device__ __forceinline__ void simple_apply(
-    const gko::batch::matrix::batch_dense::batch_item<const ValueType>& mat,
+    const gko::batch::matrix::dense::batch_item<const ValueType>& mat,
     const ValueType* const __restrict__ b, ValueType* const __restrict__ x)
 {
     constexpr auto tile_size = config::warp_size;
@@ -65,10 +65,9 @@ template <typename ValueType>
 __global__ __launch_bounds__(
     default_block_size,
     sm_oversubscription) void simple_apply_kernel(const gko::batch::matrix::
-                                                      batch_dense::
-                                                          uniform_batch<
-                                                              const ValueType>
-                                                              mat,
+                                                      dense::uniform_batch<
+                                                          const ValueType>
+                                                          mat,
                                                   const gko::batch::
                                                       multi_vector::
                                                           uniform_batch<
@@ -94,7 +93,7 @@ __global__ __launch_bounds__(
 template <typename ValueType>
 __device__ __forceinline__ void advanced_apply(
     const ValueType alpha,
-    const gko::batch::matrix::batch_dense::batch_item<const ValueType>& mat,
+    const gko::batch::matrix::dense::batch_item<const ValueType>& mat,
     const ValueType* const __restrict__ b, const ValueType beta,
     ValueType* const __restrict__ x)
 {
@@ -132,10 +131,9 @@ __global__ __launch_bounds__(
                                                                 const ValueType>
                                                                 alpha,
                                                     const gko::batch::matrix::
-                                                        batch_dense::
-                                                            uniform_batch<
-                                                                const ValueType>
-                                                                mat,
+                                                        dense::uniform_batch<
+                                                            const ValueType>
+                                                            mat,
                                                     const gko::batch::
                                                         multi_vector::
                                                             uniform_batch<
diff --git a/core/base/batch_multi_vector.cpp b/core/base/batch_multi_vector.cpp
index bd2079907a3..6a14919bf2f 100644
--- a/core/base/batch_multi_vector.cpp
+++ b/core/base/batch_multi_vector.cpp
@@ -308,14 +308,7 @@ void MultiVector<ValueType>::convert_to(matrix::Dense<ValueType>* result) const
 template <typename ValueType>
 void MultiVector<ValueType>::move_to(matrix::Dense<ValueType>* result)
 {
-    auto exec = result->get_executor() == nullptr ? this->get_executor()
-                                                  : result->get_executor();
-    auto tmp = gko::batch::matrix::Dense<ValueType>::create_const(
-        exec, this->get_size(),
-        make_const_array_view(this->get_executor(),
-                              this->get_num_stored_elements(),
-                              this->get_const_values()));
-    tmp->move_to(result);
+    this->convert_to(result);
 }
 
 
diff --git a/core/base/batch_struct.hpp b/core/base/batch_struct.hpp
index caca4577cf7..71445550b87 100644
--- a/core/base/batch_struct.hpp
+++ b/core/base/batch_struct.hpp
@@ -51,9 +51,9 @@ template <typename ValueType>
 struct batch_item {
     using value_type = ValueType;
     ValueType* values;
-    int stride;
-    int num_rows;
-    int num_rhs;
+    int32 stride;
+    int32 num_rows;
+    int32 num_rhs;
 };
 
 
@@ -67,9 +67,9 @@ struct uniform_batch {
 
     ValueType* values;
     size_type num_batch_items;
-    int stride;
-    int num_rows;
-    int num_rhs;
+    int32 stride;
+    int32 num_rows;
+    int32 num_rhs;
 
     size_type get_entry_storage() const
     {
@@ -117,8 +117,8 @@ extract_batch_item(const multi_vector::uniform_batch<ValueType>& batch,
 
 template <typename ValueType>
 GKO_ATTRIBUTES GKO_INLINE multi_vector::batch_item<ValueType>
-extract_batch_item(ValueType* const batch_values, const int stride,
-                   const int num_rows, const int num_rhs,
+extract_batch_item(ValueType* const batch_values, const int32 stride,
+                   const int32 num_rows, const int32 num_rhs,
                    const size_type batch_idx)
 {
     return {batch_values + batch_idx * stride * num_rows, stride, num_rows,
diff --git a/core/matrix/batch_struct.hpp b/core/matrix/batch_struct.hpp
index 93b2b027ceb..0bbfde40cc9 100644
--- a/core/matrix/batch_struct.hpp
+++ b/core/matrix/batch_struct.hpp
@@ -42,7 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace gko {
 namespace batch {
 namespace matrix {
-namespace batch_dense {
+namespace dense {
 
 
 /**
@@ -51,10 +51,10 @@ namespace batch_dense {
 template <typename ValueType>
 struct batch_item {
     using value_type = ValueType;
-    ValueType* values;
-    int stride;
-    int num_rows;
-    int num_cols;
+    value_type* values;
+    int32 stride;
+    int32 num_rows;
+    int32 num_cols;
 };
 
 
@@ -68,9 +68,9 @@ struct uniform_batch {
 
     ValueType* values;
     size_type num_batch_items;
-    int stride;
-    int num_rows;
-    int num_cols;
+    int32 stride;
+    int32 num_rows;
+    int32 num_cols;
 
     size_type get_entry_storage() const
     {
@@ -79,38 +79,37 @@ struct uniform_batch {
 };
 
 
-}  // namespace batch_dense
+}  // namespace dense
 
 
 template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE batch_dense::batch_item<const ValueType> to_const(
-    const batch_dense::batch_item<ValueType>& b)
+GKO_ATTRIBUTES GKO_INLINE dense::batch_item<const ValueType> to_const(
+    const dense::batch_item<ValueType>& b)
 {
     return {b.values, b.stride, b.num_rows, b.num_cols};
 }
 
 
 template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE batch_dense::uniform_batch<const ValueType> to_const(
-    const batch_dense::uniform_batch<ValueType>& ub)
+GKO_ATTRIBUTES GKO_INLINE dense::uniform_batch<const ValueType> to_const(
+    const dense::uniform_batch<ValueType>& ub)
 {
     return {ub.values, ub.num_batch_items, ub.stride, ub.num_rows, ub.num_cols};
 }
 
 
 template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE batch_dense::batch_item<ValueType> extract_batch_item(
-    const batch_dense::uniform_batch<ValueType>& batch,
-    const size_type batch_idx)
+GKO_ATTRIBUTES GKO_INLINE dense::batch_item<ValueType> extract_batch_item(
+    const dense::uniform_batch<ValueType>& batch, const size_type batch_idx)
 {
     return {batch.values + batch_idx * batch.stride * batch.num_rows,
             batch.stride, batch.num_rows, batch.num_cols};
 }
 
 template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE batch_dense::batch_item<ValueType> extract_batch_item(
-    ValueType* const batch_values, const int stride, const int num_rows,
-    const int num_cols, const size_type batch_idx)
+GKO_ATTRIBUTES GKO_INLINE dense::batch_item<ValueType> extract_batch_item(
+    ValueType* const batch_values, const int32 stride, const int32 num_rows,
+    const int32 num_cols, const size_type batch_idx)
 {
     return {batch_values + batch_idx * stride * num_rows, stride, num_rows,
             num_cols};
diff --git a/cuda/base/batch_struct.hpp b/cuda/base/batch_struct.hpp
index 715332418fb..12f34509275 100644
--- a/cuda/base/batch_struct.hpp
+++ b/cuda/base/batch_struct.hpp
@@ -54,7 +54,7 @@ namespace cuda {
  * while also shallow-casting to the required CUDA scalar type.
  *
  * A specialization is needed for every format of every kind of linear algebra
- * object. These are intended to be called on the host.
+ * object.
  */
 
 
@@ -66,9 +66,9 @@ inline batch::multi_vector::uniform_batch<const cuda_type<ValueType>>
 get_batch_struct(const batch::MultiVector<ValueType>* const op)
 {
     return {as_cuda_type(op->get_const_values()), op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1])};
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
 }
 
 /**
@@ -79,9 +79,9 @@ inline batch::multi_vector::uniform_batch<cuda_type<ValueType>>
 get_batch_struct(batch::MultiVector<ValueType>* const op)
 {
     return {as_cuda_type(op->get_values()), op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1])};
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
 }
 
 
diff --git a/cuda/matrix/batch_struct.hpp b/cuda/matrix/batch_struct.hpp
index f191953f7b9..8daf06f416c 100644
--- a/cuda/matrix/batch_struct.hpp
+++ b/cuda/matrix/batch_struct.hpp
@@ -58,7 +58,7 @@ namespace cuda {
  * while also shallow-casting to the required CUDA scalar type.
  *
  * A specialization is needed for every format of every kind of linear algebra
- * object. These are intended to be called on the host.
+ * object.
  */
 
 
@@ -66,13 +66,13 @@ namespace cuda {
  * Generates an immutable uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
-inline batch::matrix::batch_dense::uniform_batch<const cuda_type<ValueType>>
+inline batch::matrix::dense::uniform_batch<const cuda_type<ValueType>>
 get_batch_struct(const batch::matrix::Dense<ValueType>* const op)
 {
     return {as_cuda_type(op->get_const_values()), op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1])};
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
 }
 
 
@@ -80,13 +80,13 @@ get_batch_struct(const batch::matrix::Dense<ValueType>* const op)
  * Generates a uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
-inline batch::matrix::batch_dense::uniform_batch<cuda_type<ValueType>>
+inline batch::matrix::dense::uniform_batch<cuda_type<ValueType>>
 get_batch_struct(batch::matrix::Dense<ValueType>* const op)
 {
     return {as_cuda_type(op->get_values()), op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1])};
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
 }
 
 
diff --git a/dpcpp/base/batch_struct.hpp b/dpcpp/base/batch_struct.hpp
index 9c752a94b4f..2a0c03f552e 100644
--- a/dpcpp/base/batch_struct.hpp
+++ b/dpcpp/base/batch_struct.hpp
@@ -53,7 +53,7 @@ namespace dpcpp {
  * while also shallow-casting to the required DPCPP scalar type.
  *
  * A specialization is needed for every format of every kind of linear algebra
- * object. These are intended to be called on the host.
+ * object.
  */
 
 
@@ -65,9 +65,9 @@ inline batch::multi_vector::uniform_batch<const ValueType> get_batch_struct(
     const batch::MultiVector<ValueType>* const op)
 {
     return {op->get_const_values(), op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1])};
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
 }
 
 
@@ -79,9 +79,9 @@ inline batch::multi_vector::uniform_batch<ValueType> get_batch_struct(
     batch::MultiVector<ValueType>* const op)
 {
     return {op->get_values(), op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1])};
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
 }
 
 
diff --git a/dpcpp/matrix/batch_dense_kernels.hpp.inc b/dpcpp/matrix/batch_dense_kernels.hpp.inc
index dacd31feade..88ef5f54764 100644
--- a/dpcpp/matrix/batch_dense_kernels.hpp.inc
+++ b/dpcpp/matrix/batch_dense_kernels.hpp.inc
@@ -32,7 +32,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 template <typename ValueType>
 __dpct_inline__ void simple_apply_kernel(
-    const gko::batch::matrix::batch_dense::batch_item<const ValueType>& mat,
+    const gko::batch::matrix::dense::batch_item<const ValueType>& mat,
     const gko::batch::multi_vector::batch_item<const ValueType>& b,
     const gko::batch::multi_vector::batch_item<ValueType>& x,
     sycl::nd_item<3>& item_ct1)
@@ -66,7 +66,7 @@ __dpct_inline__ void simple_apply_kernel(
 template <typename ValueType>
 __dpct_inline__ void advanced_apply_kernel(
     const gko::batch::multi_vector::batch_item<const ValueType>& alpha,
-    const gko::batch::matrix::batch_dense::batch_item<const ValueType>& mat,
+    const gko::batch::matrix::dense::batch_item<const ValueType>& mat,
     const gko::batch::multi_vector::batch_item<const ValueType>& b,
     const gko::batch::multi_vector::batch_item<const ValueType>& beta,
     const gko::batch::multi_vector::batch_item<ValueType>& x,
diff --git a/dpcpp/matrix/batch_struct.hpp b/dpcpp/matrix/batch_struct.hpp
index f561bf004c7..1955399d0d8 100644
--- a/dpcpp/matrix/batch_struct.hpp
+++ b/dpcpp/matrix/batch_struct.hpp
@@ -37,8 +37,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/matrix/batch_struct.hpp"
 
 
-#include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/matrix/batch_dense.hpp>
 
 
 #include "core/base/batch_struct.hpp"
@@ -56,7 +56,7 @@ namespace dpcpp {
  * while also shallow-casting to the required DPCPP scalar type.
  *
  * A specialization is needed for every format of every kind of linear algebra
- * object. These are intended to be called on the host.
+ * object.
  */
 
 
@@ -64,13 +64,13 @@ namespace dpcpp {
  * Generates an immutable uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
-inline batch::matrix::batch_dense::uniform_batch<const ValueType>
-get_batch_struct(const batch::matrix::Dense<ValueType>* const op)
+inline batch::matrix::dense::uniform_batch<const ValueType> get_batch_struct(
+    const batch::matrix::Dense<ValueType>* const op)
 {
     return {op->get_const_values(), op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1])};
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
 }
 
 
@@ -78,13 +78,13 @@ get_batch_struct(const batch::matrix::Dense<ValueType>* const op)
  * Generates a uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
-inline batch::matrix::batch_dense::uniform_batch<ValueType> get_batch_struct(
+inline batch::matrix::dense::uniform_batch<ValueType> get_batch_struct(
     batch::matrix::Dense<ValueType>* const op)
 {
     return {op->get_values(), op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1])};
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
 }
 
 
diff --git a/hip/base/batch_struct.hip.hpp b/hip/base/batch_struct.hip.hpp
index 442260e50e6..732c40662aa 100644
--- a/hip/base/batch_struct.hip.hpp
+++ b/hip/base/batch_struct.hip.hpp
@@ -54,7 +54,7 @@ namespace hip {
  * while also shallow-casting to the required Hip scalar type.
  *
  * A specialization is needed for every format of every kind of linear algebra
- * object. These are intended to be called on the host.
+ * object.
  */
 
 
@@ -66,9 +66,9 @@ inline batch::multi_vector::uniform_batch<const hip_type<ValueType>>
 get_batch_struct(const batch::MultiVector<ValueType>* const op)
 {
     return {as_hip_type(op->get_const_values()), op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1])};
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
 }
 
 /**
@@ -79,9 +79,9 @@ inline batch::multi_vector::uniform_batch<hip_type<ValueType>> get_batch_struct(
     batch::MultiVector<ValueType>* const op)
 {
     return {as_hip_type(op->get_values()), op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1])};
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
 }
 
 
diff --git a/hip/matrix/batch_struct.hip.hpp b/hip/matrix/batch_struct.hip.hpp
index c0659420661..a22797a03d4 100644
--- a/hip/matrix/batch_struct.hip.hpp
+++ b/hip/matrix/batch_struct.hip.hpp
@@ -58,7 +58,7 @@ namespace hip {
  * while also shallow-casting to the required HIP scalar type.
  *
  * A specialization is needed for every format of every kind of linear algebra
- * object. These are intended to be called on the host.
+ * object.
  */
 
 
@@ -66,13 +66,13 @@ namespace hip {
  * Generates an immutable uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
-inline batch::matrix::batch_dense::uniform_batch<const hip_type<ValueType>>
+inline batch::matrix::dense::uniform_batch<const hip_type<ValueType>>
 get_batch_struct(const batch::matrix::Dense<ValueType>* const op)
 {
     return {as_hip_type(op->get_const_values()), op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1])};
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
 }
 
 
@@ -80,13 +80,13 @@ get_batch_struct(const batch::matrix::Dense<ValueType>* const op)
  * Generates a uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
-inline batch::matrix::batch_dense::uniform_batch<hip_type<ValueType>>
+inline batch::matrix::dense::uniform_batch<hip_type<ValueType>>
 get_batch_struct(batch::matrix::Dense<ValueType>* const op)
 {
     return {as_hip_type(op->get_values()), op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1])};
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
 }
 
 
diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp
index d081e5d440e..932c52edfc5 100644
--- a/include/ginkgo/core/matrix/batch_dense.hpp
+++ b/include/ginkgo/core/matrix/batch_dense.hpp
@@ -63,7 +63,7 @@ namespace matrix {
  * each batch item are also stored consecutively in memory).
  *
  * @note Though the storage layout is the same as the multi-vector object, the
- * class semantics and the operations it aims to provide is different. Hence it
+ * class semantics and the operations it aims to provide are different. Hence it
  * is recommended to create multi-vector objects if the user means to view the
  * data as a set of vectors.
  *
@@ -123,13 +123,13 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
     create_const_multi_vector_view() const;
 
     /**
-     * Creates a mutable view (of matrix::Dense type) of one item of the
+     * Creates a mutable view (of gko::matrix::Dense type) of one item of the
      * batch::matrix::Dense<value_type> object. Does not perform any deep
      * copies, but only returns a view of the data.
      *
      * @param item_id  The index of the batch item
      *
-     * @return  a batch::matrix::Dense object with the data from the batch item
+     * @return  a gko::matrix::Dense object with the data from the batch item
      * at the given index.
      */
     std::unique_ptr<unbatch_type> create_view_for_item(size_type item_id);
@@ -168,7 +168,7 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
      *
      * @note  the method has to be called on the same Executor the matrix is
      *        stored at (e.g. trying to call this method on a GPU Dense object
-     *        from the OMP results in a runtime error)
+     *        from the OMP may result in incorrect behaviour)
      */
     value_type& at(size_type batch_id, size_type row, size_type col)
     {
@@ -197,7 +197,7 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
      *
      * @note  the method has to be called on the same Executor the matrix is
      *        stored at (e.g. trying to call this method on a GPU Dense object
-     *        from the OMP results in a runtime error)
+     *        from the OMP may result in incorrect behaviour)
      */
     ValueType& at(size_type batch_id, size_type idx) noexcept
     {
@@ -268,7 +268,7 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
      */
     static std::unique_ptr<const Dense<value_type>> create_const(
         std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
-        gko::detail::const_array_view<ValueType>&& values);
+        detail::const_array_view<ValueType>&& values);
 
     /**
      * Apply the matrix to a multi-vector. Represents the matrix vector
@@ -343,7 +343,7 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
     }
 
     /**
-     * Creates a Dense matrix with the same configuration as the callers
+     * Creates a Dense matrix with the same configuration as the caller's
      * matrix.
      *
      * @returns a Dense matrix with the same configuration as the caller.
diff --git a/reference/base/batch_struct.hpp b/reference/base/batch_struct.hpp
index ce7c7af5605..0a3dbf37493 100644
--- a/reference/base/batch_struct.hpp
+++ b/reference/base/batch_struct.hpp
@@ -67,9 +67,9 @@ inline batch::multi_vector::uniform_batch<const ValueType> get_batch_struct(
     const batch::MultiVector<ValueType>* const op)
 {
     return {op->get_const_values(), op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1])};
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
 }
 
 
@@ -81,9 +81,9 @@ inline batch::multi_vector::uniform_batch<ValueType> get_batch_struct(
     batch::MultiVector<ValueType>* const op)
 {
     return {op->get_values(), op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1])};
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
 }
 
 
diff --git a/reference/matrix/batch_dense_kernels.hpp.inc b/reference/matrix/batch_dense_kernels.hpp.inc
index 20e395af5b7..17144267af1 100644
--- a/reference/matrix/batch_dense_kernels.hpp.inc
+++ b/reference/matrix/batch_dense_kernels.hpp.inc
@@ -32,7 +32,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 template <typename ValueType>
 inline void simple_apply_kernel(
-    const gko::batch::matrix::batch_dense::batch_item<const ValueType>& a,
+    const gko::batch::matrix::dense::batch_item<const ValueType>& a,
     const gko::batch::multi_vector::batch_item<const ValueType>& b,
     const gko::batch::multi_vector::batch_item<ValueType>& c)
 {
@@ -57,7 +57,7 @@ inline void simple_apply_kernel(
 template <typename ValueType>
 inline void advanced_apply_kernel(
     const ValueType alpha,
-    const gko::batch::matrix::batch_dense::batch_item<const ValueType>& a,
+    const gko::batch::matrix::dense::batch_item<const ValueType>& a,
     const gko::batch::multi_vector::batch_item<const ValueType>& b,
     const ValueType beta,
     const gko::batch::multi_vector::batch_item<ValueType>& c)
diff --git a/reference/matrix/batch_struct.hpp b/reference/matrix/batch_struct.hpp
index 47d48f1e927..dcd4ce3e71e 100644
--- a/reference/matrix/batch_struct.hpp
+++ b/reference/matrix/batch_struct.hpp
@@ -39,6 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/matrix/batch_dense.hpp>
 
 
@@ -67,13 +68,13 @@ namespace host {
  * Generates an immutable uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
-inline batch::matrix::batch_dense::uniform_batch<const ValueType>
-get_batch_struct(const batch::matrix::Dense<ValueType>* const op)
+inline batch::matrix::dense::uniform_batch<const ValueType> get_batch_struct(
+    const batch::matrix::Dense<ValueType>* const op)
 {
     return {op->get_const_values(), op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1])};
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
 }
 
 
@@ -81,13 +82,13 @@ get_batch_struct(const batch::matrix::Dense<ValueType>* const op)
  * Generates a uniform batch struct from a batch of dense matrices.
  */
 template <typename ValueType>
-inline batch::matrix::batch_dense::uniform_batch<ValueType> get_batch_struct(
+inline batch::matrix::dense::uniform_batch<ValueType> get_batch_struct(
     batch::matrix::Dense<ValueType>* const op)
 {
     return {op->get_values(), op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1])};
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1])};
 }
 
 
diff --git a/reference/test/base/batch_multi_vector_kernels.cpp b/reference/test/base/batch_multi_vector_kernels.cpp
index e0c7643c8d7..a49168dc24e 100644
--- a/reference/test/base/batch_multi_vector_kernels.cpp
+++ b/reference/test/base/batch_multi_vector_kernels.cpp
@@ -140,9 +140,9 @@ TYPED_TEST(MultiVector, ScalesData)
     auto ualpha = gko::batch::unbatch<gko::batch::MultiVector<T>>(alpha.get());
 
     this->mtx_0->scale(alpha.get());
+
     this->mtx_00->scale(ualpha[0].get());
     this->mtx_01->scale(ualpha[1].get());
-
     auto res =
         gko::batch::unbatch<gko::batch::MultiVector<T>>(this->mtx_0.get());
     GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_00.get(), 0.);
@@ -158,9 +158,9 @@ TYPED_TEST(MultiVector, ScalesDataWithScalar)
     auto ualpha = gko::batch::unbatch<gko::batch::MultiVector<T>>(alpha.get());
 
     this->mtx_1->scale(alpha.get());
+
     this->mtx_10->scale(ualpha[0].get());
     this->mtx_11->scale(ualpha[1].get());
-
     auto res =
         gko::batch::unbatch<gko::batch::MultiVector<T>>(this->mtx_1.get());
     GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.);
@@ -196,9 +196,9 @@ TYPED_TEST(MultiVector, AddsScaled)
     auto ualpha = gko::batch::unbatch<gko::batch::MultiVector<T>>(alpha.get());
 
     this->mtx_1->add_scaled(alpha.get(), this->mtx_0.get());
+
     this->mtx_10->add_scaled(ualpha[0].get(), this->mtx_00.get());
     this->mtx_11->add_scaled(ualpha[1].get(), this->mtx_01.get());
-
     auto res =
         gko::batch::unbatch<gko::batch::MultiVector<T>>(this->mtx_1.get());
     GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.);
@@ -214,9 +214,9 @@ TYPED_TEST(MultiVector, AddsScaledWithScalar)
     auto ualpha = gko::batch::unbatch<gko::batch::MultiVector<T>>(alpha.get());
 
     this->mtx_1->add_scaled(alpha.get(), this->mtx_0.get());
+
     this->mtx_10->add_scaled(ualpha[0].get(), this->mtx_00.get());
     this->mtx_11->add_scaled(ualpha[1].get(), this->mtx_01.get());
-
     auto res =
         gko::batch::unbatch<gko::batch::MultiVector<T>>(this->mtx_1.get());
     GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.);
@@ -244,9 +244,9 @@ TYPED_TEST(MultiVector, ComputesDot)
     auto ures = gko::batch::unbatch<gko::batch::MultiVector<T>>(result.get());
 
     this->mtx_0->compute_dot(this->mtx_1.get(), result.get());
+
     this->mtx_00->compute_dot(this->mtx_10.get(), ures[0].get());
     this->mtx_01->compute_dot(this->mtx_11.get(), ures[1].get());
-
     auto res = gko::batch::unbatch<gko::batch::MultiVector<T>>(result.get());
     GKO_ASSERT_MTX_NEAR(res[0].get(), ures[0].get(), 0.);
     GKO_ASSERT_MTX_NEAR(res[1].get(), ures[1].get(), 0.);
@@ -256,6 +256,7 @@ TYPED_TEST(MultiVector, ComputesDot)
 TYPED_TEST(MultiVector, ComputeDotFailsOnWrongInputSize)
 {
     using Mtx = typename TestFixture::Mtx;
+
     auto result =
         Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 3}));
 
@@ -285,9 +286,9 @@ TYPED_TEST(MultiVector, ComputesConjDot)
     auto ures = gko::batch::unbatch<gko::batch::MultiVector<T>>(result.get());
 
     this->mtx_0->compute_conj_dot(this->mtx_1.get(), result.get());
+
     this->mtx_00->compute_conj_dot(this->mtx_10.get(), ures[0].get());
     this->mtx_01->compute_conj_dot(this->mtx_11.get(), ures[1].get());
-
     auto res = gko::batch::unbatch<gko::batch::MultiVector<T>>(result.get());
     GKO_ASSERT_MTX_NEAR(res[0].get(), ures[0].get(), 0.);
     GKO_ASSERT_MTX_NEAR(res[1].get(), ures[1].get(), 0.);
@@ -297,6 +298,7 @@ TYPED_TEST(MultiVector, ComputesConjDot)
 TYPED_TEST(MultiVector, ComputeConjDotFailsOnWrongInputSize)
 {
     using Mtx = typename TestFixture::Mtx;
+
     auto result =
         Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 3}));
 
diff --git a/reference/test/matrix/batch_dense_kernels.cpp b/reference/test/matrix/batch_dense_kernels.cpp
index 97dbe3e77cb..a85453edee8 100644
--- a/reference/test/matrix/batch_dense_kernels.cpp
+++ b/reference/test/matrix/batch_dense_kernels.cpp
@@ -57,14 +57,12 @@ class Dense : public ::testing::Test {
 protected:
     using value_type = T;
     using size_type = gko::size_type;
-    using Mtx = gko::batch::matrix::Dense<value_type>;
-    using MVec = gko::batch::MultiVector<value_type>;
+    using BMtx = gko::batch::matrix::Dense<value_type>;
+    using BMVec = gko::batch::MultiVector<value_type>;
     using DenseMtx = gko::matrix::Dense<value_type>;
-    using ComplexMtx = gko::to_complex<Mtx>;
-    using RealMtx = gko::remove_complex<Mtx>;
     Dense()
         : exec(gko::ReferenceExecutor::create()),
-          mtx_0(gko::batch::initialize<Mtx>(
+          mtx_0(gko::batch::initialize<BMtx>(
               {{I<T>({1.0, -1.0, 1.5}), I<T>({-2.0, 2.0, 3.0})},
                {{1.0, -2.0, -0.5}, {1.0, -2.5, 4.0}}},
               exec)),
@@ -72,7 +70,7 @@ class Dense : public ::testing::Test {
               {I<T>({1.0, -1.0, 1.5}), I<T>({-2.0, 2.0, 3.0})}, exec)),
           mtx_01(gko::initialize<DenseMtx>(
               {I<T>({1.0, -2.0, -0.5}), I<T>({1.0, -2.5, 4.0})}, exec)),
-          b_0(gko::batch::initialize<MVec>(
+          b_0(gko::batch::initialize<BMVec>(
               {{I<T>({1.0, 0.0, 1.0}), I<T>({2.0, 0.0, 1.0}),
                 I<T>({1.0, 0.0, 2.0})},
                {I<T>({-1.0, 1.0, 1.0}), I<T>({1.0, -1.0, 1.0}),
@@ -86,7 +84,7 @@ class Dense : public ::testing::Test {
               {I<T>({-1.0, 1.0, 1.0}), I<T>({1.0, -1.0, 1.0}),
                I<T>({1.0, 0.0, 2.0})},
               exec)),
-          x_0(gko::batch::initialize<MVec>(
+          x_0(gko::batch::initialize<BMVec>(
               {{I<T>({2.0, 0.0, 1.0}), I<T>({2.0, 0.0, 2.0})},
                {I<T>({-2.0, 1.0, 1.0}), I<T>({1.0, -1.0, -1.0})}},
               exec)),
@@ -97,13 +95,13 @@ class Dense : public ::testing::Test {
     {}
 
     std::shared_ptr<const gko::ReferenceExecutor> exec;
-    std::unique_ptr<Mtx> mtx_0;
+    std::unique_ptr<BMtx> mtx_0;
     std::unique_ptr<DenseMtx> mtx_00;
     std::unique_ptr<DenseMtx> mtx_01;
-    std::unique_ptr<MVec> b_0;
+    std::unique_ptr<BMVec> b_0;
     std::unique_ptr<DenseMtx> b_00;
     std::unique_ptr<DenseMtx> b_01;
-    std::unique_ptr<MVec> x_0;
+    std::unique_ptr<BMVec> x_0;
     std::unique_ptr<DenseMtx> x_00;
     std::unique_ptr<DenseMtx> x_01;
 
@@ -119,11 +117,10 @@ TYPED_TEST(Dense, AppliesToBatchMultiVector)
     using T = typename TestFixture::value_type;
 
     this->mtx_0->apply(this->b_0.get(), this->x_0.get());
+
     this->mtx_00->apply(this->b_00.get(), this->x_00.get());
     this->mtx_01->apply(this->b_01.get(), this->x_01.get());
-
     auto res = gko::batch::unbatch<gko::batch::MultiVector<T>>(this->x_0.get());
-
     GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.);
     GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.);
 }
@@ -131,12 +128,12 @@ TYPED_TEST(Dense, AppliesToBatchMultiVector)
 
 TYPED_TEST(Dense, AppliesLinearCombinationWithSameAlphaToBatchMultiVector)
 {
-    using Mtx = typename TestFixture::Mtx;
-    using MVec = typename TestFixture::MVec;
+    using BMtx = typename TestFixture::BMtx;
+    using BMVec = typename TestFixture::BMVec;
     using DenseMtx = typename TestFixture::DenseMtx;
     using T = typename TestFixture::value_type;
-    auto alpha = gko::batch::initialize<MVec>(2, {1.5}, this->exec);
-    auto beta = gko::batch::initialize<MVec>(2, {-4.0}, this->exec);
+    auto alpha = gko::batch::initialize<BMVec>(2, {1.5}, this->exec);
+    auto beta = gko::batch::initialize<BMVec>(2, {-4.0}, this->exec);
     auto alpha0 = gko::initialize<DenseMtx>({1.5}, this->exec);
     auto alpha1 = gko::initialize<DenseMtx>({1.5}, this->exec);
     auto beta0 = gko::initialize<DenseMtx>({-4.0}, this->exec);
@@ -144,13 +141,12 @@ TYPED_TEST(Dense, AppliesLinearCombinationWithSameAlphaToBatchMultiVector)
 
     this->mtx_0->apply(alpha.get(), this->b_0.get(), beta.get(),
                        this->x_0.get());
+
     this->mtx_00->apply(alpha0.get(), this->b_00.get(), beta0.get(),
                         this->x_00.get());
     this->mtx_01->apply(alpha1.get(), this->b_01.get(), beta1.get(),
                         this->x_01.get());
-
     auto res = gko::batch::unbatch<gko::batch::MultiVector<T>>(this->x_0.get());
-
     GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.);
     GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.);
 }
@@ -158,12 +154,12 @@ TYPED_TEST(Dense, AppliesLinearCombinationWithSameAlphaToBatchMultiVector)
 
 TYPED_TEST(Dense, AppliesLinearCombinationToBatchMultiVector)
 {
-    using Mtx = typename TestFixture::Mtx;
-    using MVec = typename TestFixture::MVec;
+    using BMtx = typename TestFixture::BMtx;
+    using BMVec = typename TestFixture::BMVec;
     using DenseMtx = typename TestFixture::DenseMtx;
     using T = typename TestFixture::value_type;
-    auto alpha = gko::batch::initialize<MVec>({{1.5}, {-1.0}}, this->exec);
-    auto beta = gko::batch::initialize<MVec>({{2.5}, {-4.0}}, this->exec);
+    auto alpha = gko::batch::initialize<BMVec>({{1.5}, {-1.0}}, this->exec);
+    auto beta = gko::batch::initialize<BMVec>({{2.5}, {-4.0}}, this->exec);
     auto alpha0 = gko::initialize<DenseMtx>({1.5}, this->exec);
     auto alpha1 = gko::initialize<DenseMtx>({-1.0}, this->exec);
     auto beta0 = gko::initialize<DenseMtx>({2.5}, this->exec);
@@ -171,13 +167,12 @@ TYPED_TEST(Dense, AppliesLinearCombinationToBatchMultiVector)
 
     this->mtx_0->apply(alpha.get(), this->b_0.get(), beta.get(),
                        this->x_0.get());
+
     this->mtx_00->apply(alpha0.get(), this->b_00.get(), beta0.get(),
                         this->x_00.get());
     this->mtx_01->apply(alpha1.get(), this->b_01.get(), beta1.get(),
                         this->x_01.get());
-
     auto res = gko::batch::unbatch<gko::batch::MultiVector<T>>(this->x_0.get());
-
     GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.);
     GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.);
 }
@@ -185,8 +180,9 @@ TYPED_TEST(Dense, AppliesLinearCombinationToBatchMultiVector)
 
 TYPED_TEST(Dense, ApplyFailsOnWrongNumberOfResultCols)
 {
-    using MVec = typename TestFixture::MVec;
-    auto res = MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2}});
+    using BMVec = typename TestFixture::BMVec;
+
+    auto res = BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2}});
 
     ASSERT_THROW(this->mtx_0->apply(this->b_0.get(), res.get()),
                  gko::DimensionMismatch);
@@ -195,8 +191,9 @@ TYPED_TEST(Dense, ApplyFailsOnWrongNumberOfResultCols)
 
 TYPED_TEST(Dense, ApplyFailsOnWrongNumberOfResultRows)
 {
-    using MVec = typename TestFixture::MVec;
-    auto res = MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3}});
+    using BMVec = typename TestFixture::BMVec;
+
+    auto res = BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3}});
 
     ASSERT_THROW(this->mtx_0->apply(this->b_0.get(), res.get()),
                  gko::DimensionMismatch);
@@ -205,9 +202,10 @@ TYPED_TEST(Dense, ApplyFailsOnWrongNumberOfResultRows)
 
 TYPED_TEST(Dense, ApplyFailsOnWrongInnerDimension)
 {
-    using MVec = typename TestFixture::MVec;
+    using BMVec = typename TestFixture::BMVec;
+
     auto res =
-        MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}});
+        BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}});
 
     ASSERT_THROW(this->mtx_0->apply(res.get(), this->x_0.get()),
                  gko::DimensionMismatch);
@@ -216,13 +214,13 @@ TYPED_TEST(Dense, ApplyFailsOnWrongInnerDimension)
 
 TYPED_TEST(Dense, AdvancedApplyFailsOnWrongInnerDimension)
 {
-    using MVec = typename TestFixture::MVec;
+    using BMVec = typename TestFixture::BMVec;
     auto res =
-        MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}});
+        BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}});
     auto alpha =
-        MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}});
+        BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}});
     auto beta =
-        MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}});
+        BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}});
 
     ASSERT_THROW(
         this->mtx_0->apply(alpha.get(), res.get(), beta.get(), this->x_0.get()),
@@ -232,13 +230,13 @@ TYPED_TEST(Dense, AdvancedApplyFailsOnWrongInnerDimension)
 
 TYPED_TEST(Dense, AdvancedApplyFailsOnWrongAlphaDimension)
 {
-    using MVec = typename TestFixture::MVec;
+    using BMVec = typename TestFixture::BMVec;
     auto res =
-        MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3, 3}});
+        BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3, 3}});
     auto alpha =
-        MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 1}});
+        BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 1}});
     auto beta =
-        MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}});
+        BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}});
 
     ASSERT_THROW(
         this->mtx_0->apply(alpha.get(), res.get(), beta.get(), this->x_0.get()),
diff --git a/test/matrix/batch_dense_kernels.cpp b/test/matrix/batch_dense_kernels.cpp
index a73efcd8753..119a868be09 100644
--- a/test/matrix/batch_dense_kernels.cpp
+++ b/test/matrix/batch_dense_kernels.cpp
@@ -55,17 +55,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 class Dense : public CommonTestFixture {
 protected:
-    using Mtx = gko::batch::matrix::Dense<value_type>;
-    using MVec = gko::batch::MultiVector<value_type>;
+    using BMtx = gko::batch::matrix::Dense<value_type>;
+    using BMVec = gko::batch::MultiVector<value_type>;
 
     Dense() : rand_engine(15) {}
 
-    template <typename MtxType>
-    std::unique_ptr<MtxType> gen_mtx(const gko::size_type num_batch_items,
-                                     gko::size_type num_rows,
-                                     gko::size_type num_cols)
+    template <typename BMtxType>
+    std::unique_ptr<BMtxType> gen_mtx(const gko::size_type num_batch_items,
+                                      gko::size_type num_rows,
+                                      gko::size_type num_cols)
     {
-        return gko::test::generate_random_batch_matrix<MtxType>(
+        return gko::test::generate_random_batch_matrix<BMtxType>(
             num_batch_items, num_rows, num_cols,
             std::uniform_int_distribution<>(num_cols, num_cols),
             std::normal_distribution<>(-1.0, 1.0), rand_engine, ref);
@@ -75,15 +75,15 @@ class Dense : public CommonTestFixture {
     {
         const int num_rows = 252;
         const int num_cols = 32;
-        x = gen_mtx<Mtx>(batch_size, num_rows, num_cols);
-        y = gen_mtx<MVec>(batch_size, num_cols, num_vecs);
-        alpha = gen_mtx<MVec>(batch_size, 1, 1);
-        beta = gen_mtx<MVec>(batch_size, 1, 1);
+        x = gen_mtx<BMtx>(batch_size, num_rows, num_cols);
+        y = gen_mtx<BMVec>(batch_size, num_cols, num_vecs);
+        alpha = gen_mtx<BMVec>(batch_size, 1, 1);
+        beta = gen_mtx<BMVec>(batch_size, 1, 1);
         dx = gko::clone(exec, x);
         dy = gko::clone(exec, y);
         dalpha = gko::clone(exec, alpha);
         dbeta = gko::clone(exec, beta);
-        expected = MVec::create(
+        expected = BMVec::create(
             ref,
             gko::batch_dim<2>(batch_size, gko::dim<2>{num_rows, num_vecs}));
         expected->fill(gko::one<value_type>());
@@ -93,16 +93,16 @@ class Dense : public CommonTestFixture {
     std::default_random_engine rand_engine;
 
     const size_t batch_size = 11;
-    std::unique_ptr<Mtx> x;
-    std::unique_ptr<MVec> y;
-    std::unique_ptr<MVec> alpha;
-    std::unique_ptr<MVec> beta;
-    std::unique_ptr<MVec> expected;
-    std::unique_ptr<MVec> dresult;
-    std::unique_ptr<Mtx> dx;
-    std::unique_ptr<MVec> dy;
-    std::unique_ptr<MVec> dalpha;
-    std::unique_ptr<MVec> dbeta;
+    std::unique_ptr<BMtx> x;
+    std::unique_ptr<BMVec> y;
+    std::unique_ptr<BMVec> alpha;
+    std::unique_ptr<BMVec> beta;
+    std::unique_ptr<BMVec> expected;
+    std::unique_ptr<BMVec> dresult;
+    std::unique_ptr<BMtx> dx;
+    std::unique_ptr<BMVec> dy;
+    std::unique_ptr<BMVec> dalpha;
+    std::unique_ptr<BMVec> dbeta;
 };
 
 

From 5ac0ad8a4a0fc20b7565d7438871c82acc214874 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Mon, 9 Oct 2023 22:56:15 +0200
Subject: [PATCH 361/583] dpcpp Jacobi needs ranlux

---
 dpcpp/test/preconditioner/jacobi_kernels.dp.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpcpp/test/preconditioner/jacobi_kernels.dp.cpp b/dpcpp/test/preconditioner/jacobi_kernels.dp.cpp
index b8082a2db32..aae15245357 100644
--- a/dpcpp/test/preconditioner/jacobi_kernels.dp.cpp
+++ b/dpcpp/test/preconditioner/jacobi_kernels.dp.cpp
@@ -90,7 +90,7 @@ class Jacobi : public ::testing::Test {
         gko::uint32 max_block_size, int min_nnz, int max_nnz, int num_rhs = 1,
         value_type accuracy = 0.1, bool skip_sorting = true)
     {
-        std::default_random_engine engine(42);
+        std::ranlux48 engine(42);
         const auto dim = *(end(block_pointers) - 1);
         if (condition_numbers.size() == 0) {
             mtx = gko::test::generate_random_matrix<Mtx>(

From 8ab42cb37f0a46f3400639787c377740266168be Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Mon, 9 Oct 2023 23:12:59 +0200
Subject: [PATCH 362/583] Remove create_multivector_view

---
 core/matrix/batch_dense.cpp                | 32 ----------------------
 core/test/matrix/batch_dense.cpp           |  7 -----
 include/ginkgo/core/matrix/batch_dense.hpp | 20 ++------------
 3 files changed, 2 insertions(+), 57 deletions(-)

diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp
index b948a2c3afc..da092a20229 100644
--- a/core/matrix/batch_dense.cpp
+++ b/core/matrix/batch_dense.cpp
@@ -64,38 +64,6 @@ GKO_REGISTER_OPERATION(advanced_apply, batch_dense::advanced_apply);
 }  // namespace dense
 
 
-template <typename ValueType>
-std::unique_ptr<gko::batch::MultiVector<ValueType>>
-Dense<ValueType>::create_multi_vector_view()
-{
-    auto exec = this->get_executor();
-    auto num_batch_items = this->get_num_batch_items();
-    auto num_rows = this->get_common_size()[0];
-    auto stride = this->get_common_size()[1];
-    auto mvec = MultiVector<ValueType>::create(
-        exec, this->get_size(),
-        make_array_view(exec, num_batch_items * num_rows * stride,
-                        this->get_values()));
-    return mvec;
-}
-
-
-template <typename ValueType>
-std::unique_ptr<const gko::batch::MultiVector<ValueType>>
-Dense<ValueType>::create_const_multi_vector_view() const
-{
-    auto exec = this->get_executor();
-    auto num_batch_items = this->get_num_batch_items();
-    auto num_rows = this->get_common_size()[0];
-    auto stride = this->get_common_size()[1];
-    auto mvec = MultiVector<ValueType>::create_const(
-        exec, this->get_size(),
-        make_const_array_view(exec, num_batch_items * num_rows * stride,
-                              this->get_const_values()));
-    return mvec;
-}
-
-
 template <typename ValueType>
 std::unique_ptr<gko::matrix::Dense<ValueType>>
 Dense<ValueType>::create_view_for_item(size_type item_id)
diff --git a/core/test/matrix/batch_dense.cpp b/core/test/matrix/batch_dense.cpp
index 36fc3f2ee4a..316312bd68f 100644
--- a/core/test/matrix/batch_dense.cpp
+++ b/core/test/matrix/batch_dense.cpp
@@ -138,13 +138,6 @@ TYPED_TEST(Dense, CanCreateDenseItemView)
 }
 
 
-TYPED_TEST(Dense, CanCreateMultiVectorView)
-{
-    GKO_ASSERT_BATCH_MTX_NEAR(this->mtx->create_multi_vector_view(), this->mvec,
-                              0.0);
-}
-
-
 TYPED_TEST(Dense, CanBeCopied)
 {
     auto mtx_copy = gko::batch::matrix::Dense<TypeParam>::create(this->exec);
diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp
index 932c52edfc5..50f8fe39727 100644
--- a/include/ginkgo/core/matrix/batch_dense.hpp
+++ b/include/ginkgo/core/matrix/batch_dense.hpp
@@ -106,22 +106,6 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
 
     void move_to(Dense<next_precision<ValueType>>* result) override;
 
-    /**
-     * Creates a mutable view (of MultiVector type) of the data owned by the
-     * matrix::Dense object. Does not perform any deep copies, but only
-     * returns a view of the underlying data.
-     *
-     * @return  a MultiVector object with a view of the data from the batch
-     * dense matrix.
-     */
-    std::unique_ptr<MultiVector<value_type>> create_multi_vector_view();
-
-    /**
-     * @copydoc create_const_multi_vector_view()
-     */
-    std::unique_ptr<const MultiVector<value_type>>
-    create_const_multi_vector_view() const;
-
     /**
      * Creates a mutable view (of gko::matrix::Dense type) of one item of the
      * batch::matrix::Dense<value_type> object. Does not perform any deep
@@ -234,8 +218,8 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const value_type* get_const_values_for_item(
-        size_type batch_id) const noexcept
+    const value_type* get_const_values_for_item(size_type batch_id) const
+        noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
         return values_.get_const_data() +

From 00eba9845969f5cb363d6e22409bd7a73a1aa16f Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Mon, 9 Oct 2023 21:50:28 +0000
Subject: [PATCH 363/583] Format files

Co-authored-by: Pratik Nayak <pratikvn@pm.me>
---
 include/ginkgo/core/matrix/batch_dense.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp
index 50f8fe39727..2a33a0a8df3 100644
--- a/include/ginkgo/core/matrix/batch_dense.hpp
+++ b/include/ginkgo/core/matrix/batch_dense.hpp
@@ -218,8 +218,8 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const value_type* get_const_values_for_item(size_type batch_id) const
-        noexcept
+    const value_type* get_const_values_for_item(
+        size_type batch_id) const noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
         return values_.get_const_data() +

From ceb97e01f24a6e74aa732aa8bba34ec3ec71301f Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 10 Oct 2023 00:07:15 +0200
Subject: [PATCH 364/583] const_array_view needs to be in gko::

MSVC compiler fails lookip in gko::detail if there exists a gko::x::detail namespace
---
 include/ginkgo/core/matrix/batch_dense.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp
index 2a33a0a8df3..89f12d69f62 100644
--- a/include/ginkgo/core/matrix/batch_dense.hpp
+++ b/include/ginkgo/core/matrix/batch_dense.hpp
@@ -252,7 +252,7 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
      */
     static std::unique_ptr<const Dense<value_type>> create_const(
         std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
-        detail::const_array_view<ValueType>&& values);
+        gko::detail::const_array_view<ValueType>&& values);
 
     /**
      * Apply the matrix to a multi-vector. Represents the matrix vector

From f6d4c4e3fee1ac4af0932a99cc4c01339571a84f Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 10 Oct 2023 16:15:32 +0200
Subject: [PATCH 365/583] Review updates

Co-authored-by: Yu-Hsiang Tsai <yhmtsai@gmail.com>
Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 core/matrix/batch_dense.cpp                   | 14 +----
 core/test/base/batch_dim.cpp                  | 10 ----
 core/test/matrix/batch_dense.cpp              | 16 +++--
 cuda/base/batch_struct.hpp                    |  2 +-
 cuda/matrix/batch_dense_kernels.cu            |  1 +
 cuda/matrix/batch_struct.hpp                  |  2 +-
 dpcpp/base/batch_struct.hpp                   |  2 +-
 dpcpp/matrix/batch_dense_kernels.dp.cpp       | 58 +++++++++----------
 dpcpp/matrix/batch_struct.hpp                 |  2 +-
 hip/base/batch_struct.hip.hpp                 |  2 +-
 hip/matrix/batch_dense_kernels.hip.cpp        |  2 +
 hip/matrix/batch_struct.hip.hpp               |  2 +-
 include/ginkgo/core/base/batch_dim.hpp        | 12 ----
 .../ginkgo/core/base/batch_multi_vector.hpp   | 24 ++++++--
 include/ginkgo/core/matrix/batch_dense.hpp    | 36 +++++++-----
 reference/test/matrix/batch_dense_kernels.cpp | 26 ---------
 16 files changed, 89 insertions(+), 122 deletions(-)

diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp
index da092a20229..7675fcdde9c 100644
--- a/core/matrix/batch_dense.cpp
+++ b/core/matrix/batch_dense.cpp
@@ -100,19 +100,7 @@ template <typename ValueType>
 std::unique_ptr<Dense<ValueType>> Dense<ValueType>::create_with_config_of(
     ptr_param<const Dense<ValueType>> other)
 {
-    // De-referencing `other` before calling the functions (instead of
-    // using operator `->`) is currently required to be compatible with
-    // CUDA 10.1.
-    // Otherwise, it results in a compile error.
-    return (*other).create_with_same_config();
-}
-
-
-template <typename ValueType>
-std::unique_ptr<Dense<ValueType>> Dense<ValueType>::create_with_same_config()
-    const
-{
-    return Dense<ValueType>::create(this->get_executor(), this->get_size());
+    return Dense<ValueType>::create(other->get_executor(), other->get_size());
 }
 
 
diff --git a/core/test/base/batch_dim.cpp b/core/test/base/batch_dim.cpp
index 7914eb4d15e..e8722530fba 100644
--- a/core/test/base/batch_dim.cpp
+++ b/core/test/base/batch_dim.cpp
@@ -85,16 +85,6 @@ TEST(BatchDim, NotEqualWorks)
 }
 
 
-TEST(BatchDim, CanGetCumulativeOffsets)
-{
-    auto d = gko::batch_dim<2>(3, gko::dim<2>(4, 2));
-
-    ASSERT_EQ(d.get_cumulative_offset(0), 0);
-    ASSERT_EQ(d.get_cumulative_offset(1), 8);
-    ASSERT_EQ(d.get_cumulative_offset(2), 16);
-}
-
-
 TEST(BatchDim, TransposesBatchDimensions)
 {
     ASSERT_EQ(gko::transpose(gko::batch_dim<2>(2, gko::dim<2>{4, 2})),
diff --git a/core/test/matrix/batch_dense.cpp b/core/test/matrix/batch_dense.cpp
index 316312bd68f..7bde0c708dc 100644
--- a/core/test/matrix/batch_dense.cpp
+++ b/core/test/matrix/batch_dense.cpp
@@ -289,7 +289,7 @@ TYPED_TEST(Dense, CanBeConstructedFromDenseMatricesByDuplication)
         gko::batch::create_from_item<gko::batch::matrix::Dense<value_type>>(
             this->exec, 3, mat1.get());
 
-    GKO_ASSERT_BATCH_MTX_NEAR(bat_m.get(), m.get(), 1e-14);
+    GKO_ASSERT_BATCH_MTX_NEAR(bat_m.get(), m.get(), 0);
 }
 
 
@@ -316,7 +316,7 @@ TYPED_TEST(Dense, CanBeConstructedByDuplicatingDenseMatrices)
     auto m2 = gko::batch::duplicate<gko::batch::matrix::Dense<value_type>>(
         this->exec, 3, m.get());
 
-    GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 1e-14);
+    GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 0);
 }
 
 
@@ -384,13 +384,21 @@ TYPED_TEST(Dense, CanBeDoubleListConstructed)
     EXPECT_EQ(m->at(0, 0), value_type{1.0});
     EXPECT_EQ(m->at(0, 1), value_type{1.0});
     EXPECT_EQ(m->at(0, 2), value_type{0.0});
-    ASSERT_EQ(m->at(0, 3), value_type{2.0});
+    EXPECT_EQ(m->at(0, 3), value_type{2.0});
     EXPECT_EQ(m->at(0, 4), value_type{4.0});
+    EXPECT_EQ(m->at(0, 5), value_type{3.0});
+    EXPECT_EQ(m->at(0, 6), value_type{3.0});
+    EXPECT_EQ(m->at(0, 7), value_type{6.0});
+    EXPECT_EQ(m->at(0, 8), value_type{1.0});
     EXPECT_EQ(m->at(1, 0), value_type{1.0});
     EXPECT_EQ(m->at(1, 1), value_type{2.0});
     EXPECT_EQ(m->at(1, 2), value_type{-1.0});
-    ASSERT_EQ(m->at(1, 3), value_type{3.0});
+    EXPECT_EQ(m->at(1, 3), value_type{3.0});
     EXPECT_EQ(m->at(1, 4), value_type{4.0});
+    EXPECT_EQ(m->at(1, 5), value_type{-2.0});
+    EXPECT_EQ(m->at(1, 6), value_type{5.0});
+    EXPECT_EQ(m->at(1, 7), value_type{6.0});
+    EXPECT_EQ(m->at(1, 8), value_type{-3.0});
 }
 
 
diff --git a/cuda/base/batch_struct.hpp b/cuda/base/batch_struct.hpp
index 12f34509275..14b300c9204 100644
--- a/cuda/base/batch_struct.hpp
+++ b/cuda/base/batch_struct.hpp
@@ -54,7 +54,7 @@ namespace cuda {
  * while also shallow-casting to the required CUDA scalar type.
  *
  * A specialization is needed for every format of every kind of linear algebra
- * object.
+ * object. These are intended to be called on the host.
  */
 
 
diff --git a/cuda/matrix/batch_dense_kernels.cu b/cuda/matrix/batch_dense_kernels.cu
index 4f1dbc8f4d4..47c478864cf 100644
--- a/cuda/matrix/batch_dense_kernels.cu
+++ b/cuda/matrix/batch_dense_kernels.cu
@@ -77,6 +77,7 @@ constexpr int sm_oversubscription = 4;
 
 #include "common/cuda_hip/matrix/batch_dense_kernel_launcher.hpp.inc"
 
+
 // clang-format on
 
 
diff --git a/cuda/matrix/batch_struct.hpp b/cuda/matrix/batch_struct.hpp
index 8daf06f416c..2ae453b6e61 100644
--- a/cuda/matrix/batch_struct.hpp
+++ b/cuda/matrix/batch_struct.hpp
@@ -58,7 +58,7 @@ namespace cuda {
  * while also shallow-casting to the required CUDA scalar type.
  *
  * A specialization is needed for every format of every kind of linear algebra
- * object.
+ * object. These are intended to be called on the host.
  */
 
 
diff --git a/dpcpp/base/batch_struct.hpp b/dpcpp/base/batch_struct.hpp
index 2a0c03f552e..dc8301ecb2e 100644
--- a/dpcpp/base/batch_struct.hpp
+++ b/dpcpp/base/batch_struct.hpp
@@ -53,7 +53,7 @@ namespace dpcpp {
  * while also shallow-casting to the required DPCPP scalar type.
  *
  * A specialization is needed for every format of every kind of linear algebra
- * object.
+ * object. These are intended to be called on the host.
  */
 
 
diff --git a/dpcpp/matrix/batch_dense_kernels.dp.cpp b/dpcpp/matrix/batch_dense_kernels.dp.cpp
index 6aec3e57fc5..8fca47c27b8 100644
--- a/dpcpp/matrix/batch_dense_kernels.dp.cpp
+++ b/dpcpp/matrix/batch_dense_kernels.dp.cpp
@@ -98,19 +98,19 @@ void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
     }
 
     // Launch a kernel that has nbatches blocks, each block has max group size
-    (exec->get_queue())->submit([&](sycl::handler& cgh) {
+    exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1)
-                [[sycl::reqd_sub_group_size(config::warp_size)]] {
-                    auto group = item_ct1.get_group();
-                    auto group_id = group.get_group_linear_id();
-                    const auto mat_b =
-                        batch::matrix::extract_batch_item(mat_ub, group_id);
-                    const auto b_b = batch::extract_batch_item(b_ub, group_id);
-                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                    simple_apply_kernel(mat_b, b_b, x_b, item_ct1);
-                });
+            sycl_nd_range(grid, block), [=
+        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                            config::warp_size)]] {
+                auto group = item_ct1.get_group();
+                auto group_id = group.get_group_linear_id();
+                const auto mat_b =
+                    batch::matrix::extract_batch_item(mat_ub, group_id);
+                const auto b_b = batch::extract_batch_item(b_ub, group_id);
+                const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                simple_apply_kernel(mat_b, b_b, x_b, item_ct1);
+            });
     });
 }
 
@@ -145,24 +145,24 @@ void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
     const dim3 grid(num_batch_items);
 
     // Launch a kernel that has nbatches blocks, each block has max group size
-    (exec->get_queue())->submit([&](sycl::handler& cgh) {
+    exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1)
-                [[sycl::reqd_sub_group_size(config::warp_size)]] {
-                    auto group = item_ct1.get_group();
-                    auto group_id = group.get_group_linear_id();
-                    const auto mat_b =
-                        batch::matrix::extract_batch_item(mat_ub, group_id);
-                    const auto b_b = batch::extract_batch_item(b_ub, group_id);
-                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                    const auto alpha_b =
-                        batch::extract_batch_item(alpha_ub, group_id);
-                    const auto beta_b =
-                        batch::extract_batch_item(beta_ub, group_id);
-                    advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b,
-                                          item_ct1);
-                });
+            sycl_nd_range(grid, block), [=
+        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                            config::warp_size)]] {
+                auto group = item_ct1.get_group();
+                auto group_id = group.get_group_linear_id();
+                const auto mat_b =
+                    batch::matrix::extract_batch_item(mat_ub, group_id);
+                const auto b_b = batch::extract_batch_item(b_ub, group_id);
+                const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                const auto alpha_b =
+                    batch::extract_batch_item(alpha_ub, group_id);
+                const auto beta_b =
+                    batch::extract_batch_item(beta_ub, group_id);
+                advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b,
+                                      item_ct1);
+            });
     });
 }
 
diff --git a/dpcpp/matrix/batch_struct.hpp b/dpcpp/matrix/batch_struct.hpp
index 1955399d0d8..d452f78644f 100644
--- a/dpcpp/matrix/batch_struct.hpp
+++ b/dpcpp/matrix/batch_struct.hpp
@@ -56,7 +56,7 @@ namespace dpcpp {
  * while also shallow-casting to the required DPCPP scalar type.
  *
  * A specialization is needed for every format of every kind of linear algebra
- * object.
+ * object. These are intended to be called on the host.
  */
 
 
diff --git a/hip/base/batch_struct.hip.hpp b/hip/base/batch_struct.hip.hpp
index 732c40662aa..5747e202fb7 100644
--- a/hip/base/batch_struct.hip.hpp
+++ b/hip/base/batch_struct.hip.hpp
@@ -54,7 +54,7 @@ namespace hip {
  * while also shallow-casting to the required Hip scalar type.
  *
  * A specialization is needed for every format of every kind of linear algebra
- * object.
+ * object. These are intended to be called on the host.
  */
 
 
diff --git a/hip/matrix/batch_dense_kernels.hip.cpp b/hip/matrix/batch_dense_kernels.hip.cpp
index aa6d717438e..a0fdea446be 100644
--- a/hip/matrix/batch_dense_kernels.hip.cpp
+++ b/hip/matrix/batch_dense_kernels.hip.cpp
@@ -79,8 +79,10 @@ constexpr int sm_oversubscription = 4;
 
 #include "common/cuda_hip/matrix/batch_dense_kernel_launcher.hpp.inc"
 
+
 // clang-format on
 
+
 }  // namespace batch_dense
 }  // namespace hip
 }  // namespace kernels
diff --git a/hip/matrix/batch_struct.hip.hpp b/hip/matrix/batch_struct.hip.hpp
index a22797a03d4..c1bd6441367 100644
--- a/hip/matrix/batch_struct.hip.hpp
+++ b/hip/matrix/batch_struct.hip.hpp
@@ -58,7 +58,7 @@ namespace hip {
  * while also shallow-casting to the required HIP scalar type.
  *
  * A specialization is needed for every format of every kind of linear algebra
- * object.
+ * object. These are intended to be called on the host.
  */
 
 
diff --git a/include/ginkgo/core/base/batch_dim.hpp b/include/ginkgo/core/base/batch_dim.hpp
index 3bda352fb9d..e0ade2c872f 100644
--- a/include/ginkgo/core/base/batch_dim.hpp
+++ b/include/ginkgo/core/base/batch_dim.hpp
@@ -74,18 +74,6 @@ struct batch_dim {
         return common_size_;
     }
 
-    /**
-     * Get the cumulative storage size offset
-     *
-     * @param batch_id the batch id
-     *
-     * @return the cumulative offset
-     */
-    size_type get_cumulative_offset(size_type batch_id) const
-    {
-        return batch_id * common_size_[0] * common_size_[1];
-    }
-
     /**
      * Checks if two batch_dim objects are equal.
      *
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index 7830a4c6efb..61dffba3193 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -202,8 +202,7 @@ class MultiVector
     value_type* get_values_for_item(size_type batch_id) noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
-        return values_.get_data() +
-               this->get_size().get_cumulative_offset(batch_id);
+        return values_.get_data() + this->get_cumulative_offset(batch_id);
     }
 
     /**
@@ -217,8 +216,7 @@ class MultiVector
         size_type batch_id) const noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
-        return values_.get_const_data() +
-               this->get_size().get_cumulative_offset(batch_id);
+        return values_.get_const_data() + this->get_cumulative_offset(batch_id);
     }
 
     /**
@@ -233,6 +231,19 @@ class MultiVector
         return values_.get_num_elems();
     }
 
+    /**
+     * Get the cumulative storage size offset
+     *
+     * @param batch_id the batch id
+     *
+     * @return the cumulative offset
+     */
+    size_type get_cumulative_offset(size_type batch_id) const
+    {
+        return batch_id * this->get_common_size()[0] *
+               this->get_common_size()[1];
+    }
+
     /**
      * Returns a single element for a particular batch item.
      *
@@ -375,7 +386,8 @@ class MultiVector
 private:
     inline size_type compute_num_elems(const batch_dim<2>& size)
     {
-        return size.get_cumulative_offset(size.get_num_batch_items());
+        return size.get_num_batch_items() * size.get_common_size()[0] *
+               size.get_common_size()[1];
     }
 
 protected:
@@ -434,7 +446,7 @@ class MultiVector
     size_type linearize_index(size_type batch, size_type row,
                               size_type col) const noexcept
     {
-        return batch_size_.get_cumulative_offset(batch) +
+        return this->get_cumulative_offset(batch) +
                row * batch_size_.get_common_size()[1] + col;
     }
 
diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp
index 89f12d69f62..59ab92cd146 100644
--- a/include/ginkgo/core/matrix/batch_dense.hpp
+++ b/include/ginkgo/core/matrix/batch_dense.hpp
@@ -124,6 +124,19 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
     std::unique_ptr<const unbatch_type> create_const_view_for_item(
         size_type item_id) const;
 
+    /**
+     * Get the cumulative storage size offset
+     *
+     * @param batch_id the batch id
+     *
+     * @return the cumulative offset
+     */
+    size_type get_cumulative_offset(size_type batch_id) const
+    {
+        return batch_id * this->get_common_size()[0] *
+               this->get_common_size()[1];
+    }
+
     /**
      * Returns a pointer to the array of values of the multi-vector
      *
@@ -207,8 +220,7 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
     value_type* get_values_for_item(size_type batch_id) noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
-        return values_.get_data() +
-               this->get_size().get_cumulative_offset(batch_id);
+        return values_.get_data() + this->get_cumulative_offset(batch_id);
     }
 
     /**
@@ -222,8 +234,7 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
         size_type batch_id) const noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
-        return values_.get_const_data() +
-               this->get_size().get_cumulative_offset(batch_id);
+        return values_.get_const_data() + this->get_cumulative_offset(batch_id);
     }
 
     /**
@@ -269,8 +280,8 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
 
     /**
      * Apply the matrix to a multi-vector with a linear combination of the given
-     * input vector. Represents the matrix vector multiplication, x = alpha* A *
-     * b + beta * x, where x and b are both multi-vectors.
+     * input vector. Represents the matrix vector multiplication, x = alpha * A
+     * * b + beta * x, where x and b are both multi-vectors.
      *
      * @param alpha  the scalar to scale the matrix-vector product with
      * @param b      the multi-vector to be applied to
@@ -288,7 +299,8 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
 private:
     inline size_type compute_num_elems(const batch_dim<2>& size)
     {
-        return size.get_cumulative_offset(size.get_num_batch_items());
+        return size.get_num_batch_items() * size.get_common_size()[0] *
+               size.get_common_size()[1];
     }
 
 protected:
@@ -326,14 +338,6 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
         GKO_ENSURE_IN_BOUNDS(num_elems, values_.get_num_elems() + 1);
     }
 
-    /**
-     * Creates a Dense matrix with the same configuration as the caller's
-     * matrix.
-     *
-     * @returns a Dense matrix with the same configuration as the caller.
-     */
-    std::unique_ptr<Dense> create_with_same_config() const;
-
     void apply_impl(const MultiVector<value_type>* b,
                     MultiVector<value_type>* x) const;
 
@@ -345,7 +349,7 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
     size_type linearize_index(size_type batch, size_type row,
                               size_type col) const noexcept
     {
-        return this->get_size().get_cumulative_offset(batch) +
+        return this->get_cumulative_offset(batch) +
                row * this->get_size().get_common_size()[1] + col;
     }
 
diff --git a/reference/test/matrix/batch_dense_kernels.cpp b/reference/test/matrix/batch_dense_kernels.cpp
index a85453edee8..6a23374f7cb 100644
--- a/reference/test/matrix/batch_dense_kernels.cpp
+++ b/reference/test/matrix/batch_dense_kernels.cpp
@@ -126,32 +126,6 @@ TYPED_TEST(Dense, AppliesToBatchMultiVector)
 }
 
 
-TYPED_TEST(Dense, AppliesLinearCombinationWithSameAlphaToBatchMultiVector)
-{
-    using BMtx = typename TestFixture::BMtx;
-    using BMVec = typename TestFixture::BMVec;
-    using DenseMtx = typename TestFixture::DenseMtx;
-    using T = typename TestFixture::value_type;
-    auto alpha = gko::batch::initialize<BMVec>(2, {1.5}, this->exec);
-    auto beta = gko::batch::initialize<BMVec>(2, {-4.0}, this->exec);
-    auto alpha0 = gko::initialize<DenseMtx>({1.5}, this->exec);
-    auto alpha1 = gko::initialize<DenseMtx>({1.5}, this->exec);
-    auto beta0 = gko::initialize<DenseMtx>({-4.0}, this->exec);
-    auto beta1 = gko::initialize<DenseMtx>({-4.0}, this->exec);
-
-    this->mtx_0->apply(alpha.get(), this->b_0.get(), beta.get(),
-                       this->x_0.get());
-
-    this->mtx_00->apply(alpha0.get(), this->b_00.get(), beta0.get(),
-                        this->x_00.get());
-    this->mtx_01->apply(alpha1.get(), this->b_01.get(), beta1.get(),
-                        this->x_01.get());
-    auto res = gko::batch::unbatch<gko::batch::MultiVector<T>>(this->x_0.get());
-    GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.);
-    GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.);
-}
-
-
 TYPED_TEST(Dense, AppliesLinearCombinationToBatchMultiVector)
 {
     using BMtx = typename TestFixture::BMtx;

From a34315fe22a25709e2cd437b34c5958db9ea2d0d Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 10 Oct 2023 16:35:46 +0200
Subject: [PATCH 366/583] Move apply validation to BatchLinOp

---
 core/matrix/batch_dense.cpp               | 16 ++-------
 include/ginkgo/core/base/batch_lin_op.hpp | 40 +++++++++++++++++++++++
 2 files changed, 42 insertions(+), 14 deletions(-)

diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp
index 7675fcdde9c..758635cea7f 100644
--- a/core/matrix/batch_dense.cpp
+++ b/core/matrix/batch_dense.cpp
@@ -128,12 +128,7 @@ template <typename ValueType>
 void Dense<ValueType>::apply_impl(const MultiVector<ValueType>* b,
                                   MultiVector<ValueType>* x) const
 {
-    GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items());
-    GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items());
-
-    GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size());
-    GKO_ASSERT_EQUAL_ROWS(this->get_common_size(), x->get_common_size());
-    GKO_ASSERT_EQUAL_COLS(b->get_common_size(), x->get_common_size());
+    this->validate_application_parameters(b, x);
     this->get_executor()->run(dense::make_simple_apply(this, b, x));
 }
 
@@ -144,14 +139,7 @@ void Dense<ValueType>::apply_impl(const MultiVector<ValueType>* alpha,
                                   const MultiVector<ValueType>* beta,
                                   MultiVector<ValueType>* x) const
 {
-    GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items());
-    GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items());
-
-    GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size());
-    GKO_ASSERT_EQUAL_ROWS(this->get_common_size(), x->get_common_size());
-    GKO_ASSERT_EQUAL_COLS(b->get_common_size(), x->get_common_size());
-    GKO_ASSERT_EQUAL_DIMENSIONS(alpha->get_common_size(), gko::dim<2>(1, 1));
-    GKO_ASSERT_EQUAL_DIMENSIONS(beta->get_common_size(), gko::dim<2>(1, 1));
+    this->validate_application_parameters(alpha, b, beta, x);
     this->get_executor()->run(
         dense::make_advanced_apply(alpha, this, b, beta, x));
 }
diff --git a/include/ginkgo/core/base/batch_lin_op.hpp b/include/ginkgo/core/base/batch_lin_op.hpp
index 78ce4f4a942..a0efb2ea324 100644
--- a/include/ginkgo/core/base/batch_lin_op.hpp
+++ b/include/ginkgo/core/base/batch_lin_op.hpp
@@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/abstract_factory.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/dim.hpp>
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/math.hpp>
@@ -110,6 +111,45 @@ class BatchLinOp : public EnableAbstractPolymorphicObject<BatchLinOp> {
      */
     const batch_dim<2>& get_size() const noexcept { return size_; }
 
+    /**
+     * Validates the sizes for the apply(b,x) operation in the
+     * concrete BatchLinOp.
+     *
+     */
+    template <typename ValueType>
+    void validate_application_parameters(const MultiVector<ValueType>* b,
+                                         MultiVector<ValueType>* x) const
+    {
+        GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items());
+        GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items());
+
+        GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size());
+        GKO_ASSERT_EQUAL_ROWS(this->get_common_size(), x->get_common_size());
+        GKO_ASSERT_EQUAL_COLS(b->get_common_size(), x->get_common_size());
+    }
+
+    /**
+     * Validates the sizes for the apply(alpha, b , beta, x) operation in the
+     * concrete BatchLinOp.
+     *
+     */
+    template <typename ValueType>
+    void validate_application_parameters(const MultiVector<ValueType>* alpha,
+                                         const MultiVector<ValueType>* b,
+                                         const MultiVector<ValueType>* beta,
+                                         MultiVector<ValueType>* x) const
+    {
+        GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items());
+        GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items());
+
+        GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size());
+        GKO_ASSERT_EQUAL_ROWS(this->get_common_size(), x->get_common_size());
+        GKO_ASSERT_EQUAL_COLS(b->get_common_size(), x->get_common_size());
+        GKO_ASSERT_EQUAL_DIMENSIONS(alpha->get_common_size(),
+                                    gko::dim<2>(1, 1));
+        GKO_ASSERT_EQUAL_DIMENSIONS(beta->get_common_size(), gko::dim<2>(1, 1));
+    }
+
 protected:
     /**
      * Sets the size of the batch operator.

From 2b65f1433eed447c944ec2e89d0db1eca5c3753a Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 10 Oct 2023 16:38:25 +0200
Subject: [PATCH 367/583] Add to test_install

---
 test/test_install/test_install.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/test/test_install/test_install.cpp b/test/test_install/test_install.cpp
index d442647a985..325773f0b75 100644
--- a/test/test_install/test_install.cpp
+++ b/test/test_install/test_install.cpp
@@ -219,6 +219,13 @@ int main()
         auto test = batch_multi_vector_type::create(exec);
     }
 
+    // core/base/batch_dense.hpp
+    {
+        using type1 = float;
+        using batch_dense_type = gko::batch::Dense<type1>;
+        auto test = batch_dense_type::create(exec);
+    }
+
     // core/base/combination.hpp
     {
         using type1 = int;

From 5928b9ff4041490539ce182ae96738902a12f53e Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Tue, 10 Oct 2023 14:46:12 +0000
Subject: [PATCH 368/583] Format files

Co-authored-by: Pratik Nayak <pratikvn@pm.me>
---
 dpcpp/matrix/batch_dense_kernels.dp.cpp | 54 ++++++++++++-------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/dpcpp/matrix/batch_dense_kernels.dp.cpp b/dpcpp/matrix/batch_dense_kernels.dp.cpp
index 8fca47c27b8..a6fba2df8e3 100644
--- a/dpcpp/matrix/batch_dense_kernels.dp.cpp
+++ b/dpcpp/matrix/batch_dense_kernels.dp.cpp
@@ -100,17 +100,17 @@ void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
     // Launch a kernel that has nbatches blocks, each block has max group size
     exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=
-        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                            config::warp_size)]] {
-                auto group = item_ct1.get_group();
-                auto group_id = group.get_group_linear_id();
-                const auto mat_b =
-                    batch::matrix::extract_batch_item(mat_ub, group_id);
-                const auto b_b = batch::extract_batch_item(b_ub, group_id);
-                const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                simple_apply_kernel(mat_b, b_b, x_b, item_ct1);
-            });
+            sycl_nd_range(grid, block),
+            [=](sycl::nd_item<3> item_ct1)
+                [[sycl::reqd_sub_group_size(config::warp_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto mat_b =
+                        batch::matrix::extract_batch_item(mat_ub, group_id);
+                    const auto b_b = batch::extract_batch_item(b_ub, group_id);
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    simple_apply_kernel(mat_b, b_b, x_b, item_ct1);
+                });
     });
 }
 
@@ -147,22 +147,22 @@ void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
     // Launch a kernel that has nbatches blocks, each block has max group size
     exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=
-        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                            config::warp_size)]] {
-                auto group = item_ct1.get_group();
-                auto group_id = group.get_group_linear_id();
-                const auto mat_b =
-                    batch::matrix::extract_batch_item(mat_ub, group_id);
-                const auto b_b = batch::extract_batch_item(b_ub, group_id);
-                const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                const auto alpha_b =
-                    batch::extract_batch_item(alpha_ub, group_id);
-                const auto beta_b =
-                    batch::extract_batch_item(beta_ub, group_id);
-                advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b,
-                                      item_ct1);
-            });
+            sycl_nd_range(grid, block),
+            [=](sycl::nd_item<3> item_ct1)
+                [[sycl::reqd_sub_group_size(config::warp_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto mat_b =
+                        batch::matrix::extract_batch_item(mat_ub, group_id);
+                    const auto b_b = batch::extract_batch_item(b_ub, group_id);
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    const auto alpha_b =
+                        batch::extract_batch_item(alpha_ub, group_id);
+                    const auto beta_b =
+                        batch::extract_batch_item(beta_ub, group_id);
+                    advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b,
+                                          item_ct1);
+                });
     });
 }
 

From ed59e2fe570e5a0b44245d80ec6f6cbe1b62ae00 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 10 Oct 2023 22:53:30 +0200
Subject: [PATCH 369/583] Review updates

Co-authored-by: Terry Cojean <terry.cojean@kit.edu>
---
 core/matrix/batch_dense_kernels.hpp        |  1 -
 core/test/matrix/batch_dense.cpp           |  8 ++------
 cuda/matrix/batch_dense_kernels.cu         |  3 ---
 cuda/matrix/batch_struct.hpp               |  3 ---
 dpcpp/matrix/batch_struct.hpp              |  2 --
 hip/matrix/batch_dense_kernels.hip.cpp     |  3 ---
 hip/matrix/batch_struct.hip.hpp            |  3 ---
 include/ginkgo/core/matrix/batch_dense.hpp |  3 +++
 reference/matrix/batch_struct.hpp          |  2 --
 test/matrix/batch_dense_kernels.cpp        | 16 ++++++++--------
 test/test_install/test_install.cpp         |  2 +-
 11 files changed, 14 insertions(+), 32 deletions(-)

diff --git a/core/matrix/batch_dense_kernels.hpp b/core/matrix/batch_dense_kernels.hpp
index cb46b7291b8..ef59ff3e9cc 100644
--- a/core/matrix/batch_dense_kernels.hpp
+++ b/core/matrix/batch_dense_kernels.hpp
@@ -38,7 +38,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/batch_multi_vector.hpp>
-#include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/base/types.hpp>
 
 
diff --git a/core/test/matrix/batch_dense.cpp b/core/test/matrix/batch_dense.cpp
index 7bde0c708dc..8e64c913a6a 100644
--- a/core/test/matrix/batch_dense.cpp
+++ b/core/test/matrix/batch_dense.cpp
@@ -256,7 +256,6 @@ TYPED_TEST(Dense, CanBeConstructedFromDenseMatrices)
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
     using size_type = gko::size_type;
-
     auto mat1 = gko::initialize<DenseMtx>({{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
                                           this->exec);
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
@@ -275,16 +274,15 @@ TYPED_TEST(Dense, CanBeConstructedFromDenseMatricesByDuplication)
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
     using size_type = gko::size_type;
-
     auto mat1 = gko::initialize<DenseMtx>(
         4, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, this->exec);
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
-
     auto bat_m =
         gko::batch::create_from_item<gko::batch::matrix::Dense<value_type>>(
             this->exec,
             std::vector<DenseMtx*>{mat1.get(), mat1.get(), mat1.get()});
+
     auto m =
         gko::batch::create_from_item<gko::batch::matrix::Dense<value_type>>(
             this->exec, 3, mat1.get());
@@ -298,12 +296,10 @@ TYPED_TEST(Dense, CanBeConstructedByDuplicatingDenseMatrices)
     using value_type = typename TestFixture::value_type;
     using DenseMtx = typename TestFixture::DenseMtx;
     using size_type = gko::size_type;
-
     auto mat1 = gko::initialize<DenseMtx>({{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
                                           this->exec);
     auto mat2 = gko::initialize<DenseMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
                                           this->exec);
-
     auto m =
         gko::batch::create_from_item<gko::batch::matrix::Dense<value_type>>(
             this->exec, std::vector<DenseMtx*>{mat1.get(), mat2.get()});
@@ -342,6 +338,7 @@ TYPED_TEST(Dense, CanBeUnbatchedIntoDenseMatrices)
 TYPED_TEST(Dense, CanBeListConstructed)
 {
     using value_type = typename TestFixture::value_type;
+
     auto m = gko::batch::initialize<gko::batch::matrix::Dense<TypeParam>>(
         {{1.0, 2.0}, {1.0, 3.0}}, this->exec);
 
@@ -406,7 +403,6 @@ TYPED_TEST(Dense, CanBeReadFromMatrixData)
 {
     using value_type = typename TestFixture::value_type;
     using index_type = int;
-
     auto vec_data = std::vector<gko::matrix_data<value_type, index_type>>{};
     vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
         {2, 2}, {{0, 0, 1.0}, {0, 1, 3.0}, {1, 0, 0.0}, {1, 1, 5.0}}));
diff --git a/cuda/matrix/batch_dense_kernels.cu b/cuda/matrix/batch_dense_kernels.cu
index 47c478864cf..dd82e15b8cc 100644
--- a/cuda/matrix/batch_dense_kernels.cu
+++ b/cuda/matrix/batch_dense_kernels.cu
@@ -34,7 +34,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <thrust/functional.h>
-#include <thrust/transform.h>
 
 
 #include <ginkgo/core/base/math.hpp>
@@ -44,8 +43,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/matrix/batch_struct.hpp"
 #include "cuda/base/batch_struct.hpp"
 #include "cuda/base/config.hpp"
-#include "cuda/base/cublas_bindings.hpp"
-#include "cuda/base/pointer_mode_guard.hpp"
 #include "cuda/base/thrust.cuh"
 #include "cuda/components/cooperative_groups.cuh"
 #include "cuda/components/reduction.cuh"
diff --git a/cuda/matrix/batch_struct.hpp b/cuda/matrix/batch_struct.hpp
index 2ae453b6e61..73712a7b81b 100644
--- a/cuda/matrix/batch_struct.hpp
+++ b/cuda/matrix/batch_struct.hpp
@@ -37,13 +37,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/matrix/batch_struct.hpp"
 
 
-#include <ginkgo/core/base/batch_multi_vector.hpp>
-#include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/matrix/batch_dense.hpp>
 
 
 #include "core/base/batch_struct.hpp"
-#include "cuda/base/config.hpp"
 #include "cuda/base/types.hpp"
 
 
diff --git a/dpcpp/matrix/batch_struct.hpp b/dpcpp/matrix/batch_struct.hpp
index d452f78644f..b0393daf55d 100644
--- a/dpcpp/matrix/batch_struct.hpp
+++ b/dpcpp/matrix/batch_struct.hpp
@@ -37,12 +37,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/matrix/batch_struct.hpp"
 
 
-#include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/matrix/batch_dense.hpp>
 
 
 #include "core/base/batch_struct.hpp"
-#include "dpcpp/base/config.hpp"
 
 
 namespace gko {
diff --git a/hip/matrix/batch_dense_kernels.hip.cpp b/hip/matrix/batch_dense_kernels.hip.cpp
index a0fdea446be..eb3da83760a 100644
--- a/hip/matrix/batch_dense_kernels.hip.cpp
+++ b/hip/matrix/batch_dense_kernels.hip.cpp
@@ -35,7 +35,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <hip/hip_runtime.h>
 #include <thrust/functional.h>
-#include <thrust/transform.h>
 
 
 #include <ginkgo/core/base/math.hpp>
@@ -46,8 +45,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/matrix/batch_struct.hpp"
 #include "hip/base/batch_struct.hip.hpp"
 #include "hip/base/config.hip.hpp"
-#include "hip/base/hipblas_bindings.hip.hpp"
-#include "hip/base/pointer_mode_guard.hip.hpp"
 #include "hip/base/thrust.hip.hpp"
 #include "hip/components/cooperative_groups.hip.hpp"
 #include "hip/components/reduction.hip.hpp"
diff --git a/hip/matrix/batch_struct.hip.hpp b/hip/matrix/batch_struct.hip.hpp
index c1bd6441367..4670cf0988b 100644
--- a/hip/matrix/batch_struct.hip.hpp
+++ b/hip/matrix/batch_struct.hip.hpp
@@ -37,13 +37,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/matrix/batch_struct.hpp"
 
 
-#include <ginkgo/core/base/batch_multi_vector.hpp>
-#include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/matrix/batch_dense.hpp>
 
 
 #include "core/base/batch_struct.hpp"
-#include "hip/base/config.hip.hpp"
 #include "hip/base/types.hip.hpp"
 
 
diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp
index 59ab92cd146..7f3ce5890e4 100644
--- a/include/ginkgo/core/matrix/batch_dense.hpp
+++ b/include/ginkgo/core/matrix/batch_dense.hpp
@@ -133,6 +133,7 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
      */
     size_type get_cumulative_offset(size_type batch_id) const
     {
+        GKO_ASSERT(batch_id < this->get_num_batch_items());
         return batch_id * this->get_common_size()[0] *
                this->get_common_size()[1];
     }
@@ -198,6 +199,7 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
      */
     ValueType& at(size_type batch_id, size_type idx) noexcept
     {
+        GKO_ASSERT(batch_id < this->get_num_batch_items());
         return values_.get_data()[linearize_index(batch_id, idx)];
     }
 
@@ -206,6 +208,7 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
      */
     ValueType at(size_type batch_id, size_type idx) const noexcept
     {
+        GKO_ASSERT(batch_id < this->get_num_batch_items());
         return values_.get_const_data()[linearize_index(batch_id, idx)];
     }
 
diff --git a/reference/matrix/batch_struct.hpp b/reference/matrix/batch_struct.hpp
index dcd4ce3e71e..483d7717718 100644
--- a/reference/matrix/batch_struct.hpp
+++ b/reference/matrix/batch_struct.hpp
@@ -37,8 +37,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/matrix/batch_struct.hpp"
 
 
-#include <ginkgo/core/base/batch_multi_vector.hpp>
-#include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/matrix/batch_dense.hpp>
 
diff --git a/test/matrix/batch_dense_kernels.cpp b/test/matrix/batch_dense_kernels.cpp
index 119a868be09..a243d51f3c1 100644
--- a/test/matrix/batch_dense_kernels.cpp
+++ b/test/matrix/batch_dense_kernels.cpp
@@ -75,11 +75,11 @@ class Dense : public CommonTestFixture {
     {
         const int num_rows = 252;
         const int num_cols = 32;
-        x = gen_mtx<BMtx>(batch_size, num_rows, num_cols);
+        mat = gen_mtx<BMtx>(batch_size, num_rows, num_cols);
         y = gen_mtx<BMVec>(batch_size, num_cols, num_vecs);
         alpha = gen_mtx<BMVec>(batch_size, 1, 1);
         beta = gen_mtx<BMVec>(batch_size, 1, 1);
-        dx = gko::clone(exec, x);
+        dmat = gko::clone(exec, mat);
         dy = gko::clone(exec, y);
         dalpha = gko::clone(exec, alpha);
         dbeta = gko::clone(exec, beta);
@@ -93,13 +93,13 @@ class Dense : public CommonTestFixture {
     std::default_random_engine rand_engine;
 
     const size_t batch_size = 11;
-    std::unique_ptr<BMtx> x;
+    std::unique_ptr<BMtx> mat;
     std::unique_ptr<BMVec> y;
     std::unique_ptr<BMVec> alpha;
     std::unique_ptr<BMVec> beta;
     std::unique_ptr<BMVec> expected;
     std::unique_ptr<BMVec> dresult;
-    std::unique_ptr<BMtx> dx;
+    std::unique_ptr<BMtx> dmat;
     std::unique_ptr<BMVec> dy;
     std::unique_ptr<BMVec> dalpha;
     std::unique_ptr<BMVec> dbeta;
@@ -110,8 +110,8 @@ TEST_F(Dense, SingleVectorApplyIsEquivalentToRef)
 {
     set_up_apply_data(1);
 
-    x->apply(y.get(), expected.get());
-    dx->apply(dy.get(), dresult.get());
+    mat->apply(y.get(), expected.get());
+    dmat->apply(dy.get(), dresult.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, r<value_type>::value);
 }
@@ -121,8 +121,8 @@ TEST_F(Dense, SingleVectorAdvancedApplyIsEquivalentToRef)
 {
     set_up_apply_data(1);
 
-    x->apply(alpha.get(), y.get(), beta.get(), expected.get());
-    dx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get());
+    mat->apply(alpha.get(), y.get(), beta.get(), expected.get());
+    dmat->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, r<value_type>::value);
 }
diff --git a/test/test_install/test_install.cpp b/test/test_install/test_install.cpp
index 325773f0b75..7e53ea8f165 100644
--- a/test/test_install/test_install.cpp
+++ b/test/test_install/test_install.cpp
@@ -222,7 +222,7 @@ int main()
     // core/base/batch_dense.hpp
     {
         using type1 = float;
-        using batch_dense_type = gko::batch::Dense<type1>;
+        using batch_dense_type = gko::batch::matrix::Dense<type1>;
         auto test = batch_dense_type::create(exec);
     }
 

From 17d54f3771b45c7991f0c3d57d4ebd34c17e6b72 Mon Sep 17 00:00:00 2001
From: "Yuhsiang M. Tsai" <yhmtsai@gmail.com>
Date: Thu, 14 Sep 2023 15:08:47 +0200
Subject: [PATCH 370/583] destroy rand_generator

---
 cuda/base/curand_bindings.hpp     | 6 ++++++
 cuda/solver/idr_kernels.cu        | 1 +
 hip/base/hiprand_bindings.hip.hpp | 5 +++++
 hip/solver/idr_kernels.hip.cpp    | 1 +
 4 files changed, 13 insertions(+)

diff --git a/cuda/base/curand_bindings.hpp b/cuda/base/curand_bindings.hpp
index 429481ec9b6..4bf12dd9064 100644
--- a/cuda/base/curand_bindings.hpp
+++ b/cuda/base/curand_bindings.hpp
@@ -83,6 +83,12 @@ inline curandGenerator_t rand_generator(int64 seed,
 }
 
 
+inline void destroy(curandGenerator_t gen)
+{
+    GKO_ASSERT_NO_CURAND_ERRORS(curandDestroyGenerator(gen));
+}
+
+
 #define GKO_BIND_CURAND_RANDOM_VECTOR(ValueType, CurandName)                 \
     inline void rand_vector(                                                 \
         curandGenerator_t& gen, int n, remove_complex<ValueType> mean,       \
diff --git a/cuda/solver/idr_kernels.cu b/cuda/solver/idr_kernels.cu
index 10e8a7b2fc3..7bfe56987f4 100644
--- a/cuda/solver/idr_kernels.cu
+++ b/cuda/solver/idr_kernels.cu
@@ -104,6 +104,7 @@ void initialize_subspace_vectors(std::shared_ptr<const DefaultExecutor> exec,
             gen,
             subspace_vectors->get_size()[0] * subspace_vectors->get_stride(),
             0.0, 1.0, subspace_vectors->get_values());
+        curand::destroy(gen);
     }
 }
 
diff --git a/hip/base/hiprand_bindings.hip.hpp b/hip/base/hiprand_bindings.hip.hpp
index 14e144f6d84..dfef3bb84b4 100644
--- a/hip/base/hiprand_bindings.hip.hpp
+++ b/hip/base/hiprand_bindings.hip.hpp
@@ -87,6 +87,11 @@ inline hiprandGenerator_t rand_generator(int64 seed,
     return gen;
 }
 
+inline void destroy(hiprandGenerator_t gen)
+{
+    GKO_ASSERT_NO_HIPRAND_ERRORS(hiprandDestroyGenerator(gen));
+}
+
 
 #define GKO_BIND_HIPRAND_RANDOM_VECTOR(ValueType, HiprandName)               \
     inline void rand_vector(                                                 \
diff --git a/hip/solver/idr_kernels.hip.cpp b/hip/solver/idr_kernels.hip.cpp
index 9e6f353abe4..1a3d2931897 100644
--- a/hip/solver/idr_kernels.hip.cpp
+++ b/hip/solver/idr_kernels.hip.cpp
@@ -106,6 +106,7 @@ void initialize_subspace_vectors(std::shared_ptr<const DefaultExecutor> exec,
             gen,
             subspace_vectors->get_size()[0] * subspace_vectors->get_stride(),
             0.0, 1.0, subspace_vectors->get_values());
+        hiprand::destroy(gen);
     }
 }
 

From c2649ded0206f072cfd9fa7688d7259d4a38241e Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <19565938+yhmtsai@users.noreply.github.com>
Date: Fri, 13 Oct 2023 00:42:58 +0200
Subject: [PATCH 371/583] Fix PAPI segmentation fault (#1419)

This PR fix the PAPI segmentation fault

Related PR: https://github.com/ginkgo-project/ginkgo/pull/1419
---
 include/ginkgo/core/log/papi.hpp | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/include/ginkgo/core/log/papi.hpp b/include/ginkgo/core/log/papi.hpp
index bf22f7c876f..2b2a3326cce 100644
--- a/include/ginkgo/core/log/papi.hpp
+++ b/include/ginkgo/core/log/papi.hpp
@@ -208,10 +208,7 @@ class Papi : public Logger {
     create(std::shared_ptr<const gko::Executor>,
            const Logger::mask_type& enabled_events = Logger::all_events_mask)
     {
-        return std::shared_ptr<Papi>(new Papi(enabled_events), [](auto logger) {
-            papi_sde_shutdown(logger->get_handle());
-            delete logger;
-        });
+        return Papi::create(enabled_events);
     }
 
     /**
@@ -223,8 +220,9 @@ class Papi : public Logger {
         const Logger::mask_type& enabled_events = Logger::all_events_mask)
     {
         return std::shared_ptr<Papi>(new Papi(enabled_events), [](auto logger) {
-            papi_sde_shutdown(logger->get_handle());
+            auto handle = logger->get_handle();
             delete logger;
+            papi_sde_shutdown(handle);
         });
     }
 

From 74997fc2f2883c8ab88b425f67aa404611cc10ac Mon Sep 17 00:00:00 2001
From: Kevin Huck <khuck@cs.uoregon.edu>
Date: Thu, 12 Oct 2023 11:09:25 -0700
Subject: [PATCH 372/583] Update tau.cpp

Fixing order of includes. The ifdef check has to happen after config.h has been included, or else the perfstubs header file won't get included. Also, adding an argument name for the `end_tau()` function.
---
 core/log/tau.cpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/core/log/tau.cpp b/core/log/tau.cpp
index 62b68732de1..5db95375da9 100644
--- a/core/log/tau.cpp
+++ b/core/log/tau.cpp
@@ -30,16 +30,14 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/log/profiler_hook.hpp>
+
 #if GKO_HAVE_TAU
 #define PERFSTUBS_USE_TIMERS
 #include <perfstubs_api/timer.h>
 #endif
 
-
-#include <ginkgo/core/base/exception_helpers.hpp>
-#include <ginkgo/core/log/profiler_hook.hpp>
-
-
 namespace gko {
 namespace log {
 
@@ -56,7 +54,7 @@ void begin_tau(const char* name, profile_event_category)
 }
 
 
-void end_tau(const char*, profile_event_category)
+void end_tau(const char* name, profile_event_category)
 {
     PERFSTUBS_STOP_STRING(name);
 }

From 1ca0debdaaec04ee635c86e5f198d40cf1ed1f12 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Fri, 13 Oct 2023 11:25:07 +0200
Subject: [PATCH 373/583] review updates

Co-authored-by: Yuhsiang M. Tsai <yhmtsai@gmail.com>
---
 core/log/tau.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/core/log/tau.cpp b/core/log/tau.cpp
index 5db95375da9..e1b29c9c953 100644
--- a/core/log/tau.cpp
+++ b/core/log/tau.cpp
@@ -30,6 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
+#include <ginkgo/config.hpp>
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/log/profiler_hook.hpp>
 

From fe60c741b1aa818cca1894a72b48cbc5d5a96af6 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Wed, 14 Jun 2023 16:19:54 +0200
Subject: [PATCH 374/583] add icpx support

---
 .github/workflows/intel.yml      | 6 ++++--
 CMakeLists.txt                   | 4 +++-
 README.md                        | 2 +-
 benchmark/CMakeLists.txt         | 2 ++
 cmake/autodetect_executors.cmake | 4 ++++
 cmake/build_helpers.cmake        | 7 ++++++-
 cmake/create_test.cmake          | 2 ++
 dpcpp/CMakeLists.txt             | 2 ++
 test/solver/CMakeLists.txt       | 3 +++
 third_party/gtest/CMakeLists.txt | 2 +-
 10 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml
index 9fd85708737..4652b3996e1 100644
--- a/.github/workflows/intel.yml
+++ b/.github/workflows/intel.yml
@@ -6,6 +6,7 @@ on:
       - 'master'
       - 'develop'
       - 'release/**'
+      - 'icpx_compilation'
     tags:
       - '**'
   pull_request:
@@ -21,7 +22,8 @@ jobs:
       fail-fast: false
       matrix:
         config:
-        - {build_type: "Release", name: "intel/release/shared", "mixed": "ON"}
+        - {compiler: "dpcpp", build_type: "Release", name: "intel/dpcpp/release/shared", mixed: "ON"}
+        - {compiler: "icpx", build_type: "Release", name: "intel/icpx/release/shared", mixed: "OFF"}
     name: ${{ matrix.config.name }}
     runs-on: [gpu_intel]
 
@@ -35,7 +37,7 @@ jobs:
         spack find --loaded
         mkdir build
         cd build
-        cmake .. -DCMAKE_INSTALL_PREFIX=install_ginkgo -DGINKGO_COMPILER_FLAGS="-ffp-model=precise" -DCMAKE_CXX_COMPILER=dpcpp -DCMAKE_BUILD_TYPE=${{ matrix.config.build_type }} -DGINKGO_MIXED_PRECISION=${{ matrix.config.mixed }} -DGINKGO_DPCPP_SINGLE_MODE=ON
+        cmake .. -DCMAKE_INSTALL_PREFIX=install_ginkgo -DGINKGO_COMPILER_FLAGS="-ffp-model=precise" -DCMAKE_CXX_COMPILER=${{ matrix.config.compiler }} -DCMAKE_BUILD_TYPE=${{ matrix.config.build_type }} -DGINKGO_MIXED_PRECISION=${{ matrix.config.mixed }} -DGINKGO_DPCPP_SINGLE_MODE=ON
         make -j8
         ONEAPI_DEVICE_SELECTOR=level_zero:gpu ctest -j10 --output-on-failure
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4dbce4a29c6..5dff9bcbaac 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -50,7 +50,7 @@ set(GINKGO_VERBOSE_LEVEL "1" CACHE STRING
 if(MSVC)
     set(GINKGO_COMPILER_FLAGS "" CACHE STRING
         "Set the required CXX compiler flags, mainly used for warnings. Current default is ``")
-elseif(GINKGO_BUILD_DPCPP OR CMAKE_CXX_COMPILER MATCHES "dpcpp")
+elseif(GINKGO_BUILD_DPCPP OR CMAKE_CXX_COMPILER MATCHES "dpcpp|icpx")
     # For now always use `-ffp-model=precise` with DPC++. This can be removed when
     # the floating point issues are fixed.
     set(GINKGO_COMPILER_FLAGS "-Wpedantic;-ffp-model=precise" CACHE STRING
@@ -298,6 +298,8 @@ endif()
 if(GINKGO_BUILD_DPCPP)
     ginkgo_extract_dpcpp_version(${CMAKE_CXX_COMPILER} GINKGO_DPCPP_MAJOR_VERSION __LIBSYCL_MAJOR_VERSION)
     ginkgo_extract_dpcpp_version(${CMAKE_CXX_COMPILER} GINKGO_DPCPP_VERSION __SYCL_COMPILER_VERSION)
+    get_filename_component(GINKGO_SYCL_DIR ${CMAKE_CXX_COMPILER} DIRECTORY)
+    set(SYCL_INCLUDE_PATH "${GINKGO_SYCL_DIR}/../include;${GINKGO_SYCL_DIR}/../include/sycl")
 else()
     set(GINKGO_DPCPP_MAJOR_VERSION "0")
 endif()
diff --git a/README.md b/README.md
index 44428386b83..102005e4a18 100644
--- a/README.md
+++ b/README.md
@@ -65,7 +65,7 @@ The Ginkgo HIP module has the following __additional__ requirements:
 The Ginkgo DPC++ module has the following __additional__ requirements:
 
 * _OneAPI 2021.3+_
-* Set `dpcpp` as the `CMAKE_CXX_COMPILER`
+* Set `dpcpp` or `icpx` as the `CMAKE_CXX_COMPILER`
 * `c++17` is used to compile Ginkgo
 * The following oneAPI packages should be available:
     * oneMKL
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
index fd04620f595..7a4f5b1ca43 100644
--- a/benchmark/CMakeLists.txt
+++ b/benchmark/CMakeLists.txt
@@ -140,6 +140,8 @@ if (GINKGO_BUILD_DPCPP)
     ginkgo_benchmark_onemkl_linops(z GKO_BENCHMARK_USE_DOUBLE_COMPLEX_PRECISION)
     ginkgo_benchmark_onemkl_linops(c GKO_BENCHMARK_USE_SINGLE_COMPLEX_PRECISION)
     add_library(dpcpp_timer utils/dpcpp_timer.dp.cpp)
+    target_compile_options(dpcpp_timer PRIVATE ${GINKGO_DPCPP_FLAGS})
+    target_link_options(dpcpp_timer PRIVATE ${GINKGO_DPCPP_FLAGS})
     target_link_libraries(dpcpp_timer ginkgo)
 endif()
 
diff --git a/cmake/autodetect_executors.cmake b/cmake/autodetect_executors.cmake
index 315e0eb3e38..86949cecda9 100644
--- a/cmake/autodetect_executors.cmake
+++ b/cmake/autodetect_executors.cmake
@@ -40,6 +40,10 @@ endif()
 if (NOT DEFINED GINKGO_BUILD_DPCPP)
     try_compile(GKO_CAN_COMPILE_DPCPP ${PROJECT_BINARY_DIR}/dpcpp
         SOURCES ${PROJECT_SOURCE_DIR}/dpcpp/test_dpcpp.dp.cpp
+        # try_compile will pass the project CMAKE_CXX_FLAGS so passing -DCMAKE_CXX_FLAGS does not affect it.
+        # They append COMPILE_DEFINITIONS into CMAKE_CXX_FLAGS.
+        # Note. it is different from try_compile COMPILE_DEFINITIONS affect
+        CMAKE_FLAGS -DCOMPILE_DEFINITIONS=-fsycl
         CXX_STANDARD 17)
     if (GKO_CAN_COMPILE_DPCPP)
         message(STATUS "Enabling DPCPP executor")
diff --git a/cmake/build_helpers.cmake b/cmake/build_helpers.cmake
index a7b8c48acf3..25add05c60f 100644
--- a/cmake/build_helpers.cmake
+++ b/cmake/build_helpers.cmake
@@ -9,6 +9,10 @@ function(ginkgo_default_includes name)
             $<BUILD_INTERFACE:${Ginkgo_SOURCE_DIR}>
             $<INSTALL_INTERFACE:include>
         )
+    if(DEFINED SYCL_INCLUDE_PATH)
+        # avoid -fsycl in all place
+        target_include_directories("${name}" PUBLIC ${SYCL_INCLUDE_PATH})
+    endif()
     if(GINKGO_HAVE_HWLOC)
       target_include_directories("${name}"
         PUBLIC
@@ -139,7 +143,8 @@ function(ginkgo_extract_dpcpp_version DPCPP_COMPILER GINKGO_DPCPP_VERSION MACRO_
         "int main() {std::cout << ${MACRO_VAR} << '\\n'\;"
         "return 0\;}")
     file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/extract_dpcpp_ver.cpp" ${DPCPP_VERSION_PROG})
-    execute_process(COMMAND ${DPCPP_COMPILER} ${CMAKE_CURRENT_BINARY_DIR}/extract_dpcpp_ver.cpp
+    # we always add -fsycl
+    execute_process(COMMAND ${DPCPP_COMPILER} -fsycl ${CMAKE_CURRENT_BINARY_DIR}/extract_dpcpp_ver.cpp
         -o ${CMAKE_CURRENT_BINARY_DIR}/extract_dpcpp_ver
         ERROR_VARIABLE DPCPP_EXTRACT_VER_ERROR)
     execute_process(COMMAND ${CMAKE_CURRENT_BINARY_DIR}/extract_dpcpp_ver
diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
index cec47fced74..55b70bbaeaa 100644
--- a/cmake/create_test.cmake
+++ b/cmake/create_test.cmake
@@ -124,6 +124,7 @@ function(ginkgo_create_dpcpp_test test_name)
     add_executable(${test_target_name} ${test_name}.dp.cpp)
     target_compile_features(${test_target_name} PUBLIC cxx_std_17)
     target_compile_options(${test_target_name} PRIVATE ${GINKGO_DPCPP_FLAGS})
+    target_link_options(${test_target_name} PRIVATE ${GINKGO_DPCPP_FLAGS})
     target_link_options(${test_target_name} PRIVATE -fsycl-device-code-split=per_kernel)
     ginkgo_set_test_target_properties(${test_target_name} "_dpcpp" ${ARGN})
     ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} RESOURCE_TYPE sycl)
@@ -298,6 +299,7 @@ function(ginkgo_create_common_device_test test_name)
         ginkgo_create_common_test_internal(${test_name} DpcppExecutor dpcpp ${ARGN})
         target_compile_features(${test_target_name}_dpcpp PRIVATE cxx_std_17)
         target_compile_options(${test_target_name}_dpcpp PRIVATE ${GINKGO_DPCPP_FLAGS})
+        target_link_options(${test_target_name}_dpcpp PRIVATE ${GINKGO_DPCPP_FLAGS})
         target_link_options(${test_target_name}_dpcpp PRIVATE -fsycl-device-lib=all -fsycl-device-code-split=per_kernel)
     endif()
     if(GINKGO_BUILD_OMP)
diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt
index 4099bb603a3..9d0952480be 100644
--- a/dpcpp/CMakeLists.txt
+++ b/dpcpp/CMakeLists.txt
@@ -88,8 +88,10 @@ configure_file(preconditioner/jacobi_common.hpp.in preconditioner/jacobi_common.
 ginkgo_compile_features(ginkgo_dpcpp)
 target_compile_definitions(ginkgo_dpcpp PRIVATE GKO_COMPILING_DPCPP _ONEDPL_COMPILE_KERNEL=0)
 
+set(GINKGO_DPCPP_FLAGS "-fsycl")
 set(GINKGO_DPCPP_FLAGS ${GINKGO_DPCPP_FLAGS} PARENT_SCOPE)
 target_compile_options(ginkgo_dpcpp PRIVATE "${GINKGO_DPCPP_FLAGS}")
+target_link_options(ginkgo_dpcpp PRIVATE "${GINKGO_DPCPP_FLAGS}")
 target_compile_options(ginkgo_dpcpp PRIVATE "${GINKGO_COMPILER_FLAGS}")
 # Note: add MKL as PRIVATE not PUBLIC (MKL example shows) to avoid propagating
 # find_package(MKL) everywhere when linking ginkgo (see the MKL example
diff --git a/test/solver/CMakeLists.txt b/test/solver/CMakeLists.txt
index 4cec6b05d22..3231956beb7 100644
--- a/test/solver/CMakeLists.txt
+++ b/test/solver/CMakeLists.txt
@@ -13,3 +13,6 @@ ginkgo_create_common_test(lower_trs_kernels DISABLE_EXECUTORS dpcpp)
 ginkgo_create_common_test(multigrid_kernels DISABLE_EXECUTORS dpcpp)
 ginkgo_create_common_test(solver DISABLE_EXECUTORS dpcpp)
 ginkgo_create_common_test(upper_trs_kernels DISABLE_EXECUTORS dpcpp)
+if(GINKGO_BUILD_DPCPP) 
+    target_link_options(test_solver_idr_kernels_dpcpp PRIVATE ${GINKGO_DPCPP_FLAGS})
+endif()
diff --git a/third_party/gtest/CMakeLists.txt b/third_party/gtest/CMakeLists.txt
index 45b564dbfbf..378a7cdc705 100644
--- a/third_party/gtest/CMakeLists.txt
+++ b/third_party/gtest/CMakeLists.txt
@@ -22,7 +22,7 @@ set_target_properties(gtest gtest_main PROPERTIES
     LIBRARY_OUTPUT_DIRECTORY "${GINKGO_LIBRARY_PATH}")
 # If CXX compiler is dpcpp, use -ffp-model=precise
 # Otherwise, it will throw src/gtest.cc:1583:8: error: comparison with NaN always evaluates to false in fast floating point modes
-if(CMAKE_CXX_COMPILER MATCHES "dpcpp")
+if(CMAKE_CXX_COMPILER MATCHES "dpcpp|icpx")
     target_compile_options(gtest PRIVATE "-ffp-model=precise")
     target_compile_options(gtest_main PRIVATE "-ffp-model=precise")
 endif()

From 27d7512c9f2aca5e67ae5c5771c5bd0b99578f02 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Mon, 21 Aug 2023 09:41:42 +0200
Subject: [PATCH 375/583] add gko_add_sycl_to_target

---
 .github/workflows/intel.yml |  1 -
 .gitlab-ci.yml              | 17 ++++++++++++++++-
 CMakeLists.txt              |  5 +++--
 benchmark/CMakeLists.txt    |  2 +-
 cmake/build_helpers.cmake   |  4 ----
 cmake/create_test.cmake     |  6 ++++--
 cmake/sycl.cmake            | 33 +++++++++++++++++++++++++++++++++
 dpcpp/CMakeLists.txt        |  6 ++++--
 test/solver/CMakeLists.txt  |  2 +-
 9 files changed, 62 insertions(+), 14 deletions(-)
 create mode 100644 cmake/sycl.cmake

diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml
index 4652b3996e1..db18b510e21 100644
--- a/.github/workflows/intel.yml
+++ b/.github/workflows/intel.yml
@@ -6,7 +6,6 @@ on:
       - 'master'
       - 'develop'
       - 'release/**'
-      - 'icpx_compilation'
     tags:
       - '**'
   pull_request:
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 6185608864f..e1f1eb8be3d 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -581,7 +581,7 @@ build/nocuda-nomixed/nompi/clang/omp/debug/static:
     BUILD_SHARED_LIBS: "OFF"
     MIXED_PRECISION: "OFF"
 
-build/dpcpp/2022-1/cpu/release/static:
+build/dpcpp/2022-1/cpu/release/shared:
   extends:
     - .build_and_test_template
     - .default_variables
@@ -665,6 +665,21 @@ build/dpcpp/level_zero_dgpu/release/shared:
     DPCPP_SINGLE_MODE: "ON"
     ONEAPI_DEVICE_SELECTOR: "level_zero:gpu"
 
+build/icpx/level_zero_dgpu/release/shared:
+  extends:
+    - .build_and_test_template
+    - .default_variables
+    - .quick_test_condition
+    - .use_gko-oneapi-dgpu
+  variables:
+    C_COMPILER: "icx"
+    CXX_COMPILER: "icpx"
+    BUILD_DPCPP: "ON"
+    GKO_COMPILER_FLAGS: "-ffp-model=precise"
+    BUILD_TYPE: "Release"
+    DPCPP_SINGLE_MODE: "ON"
+    ONEAPI_DEVICE_SELECTOR: "level_zero:gpu"
+
 # Job with important warnings as error
 warnings:
   stage: code_quality
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5dff9bcbaac..3f38c1e7165 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -99,6 +99,9 @@ endif()
 if(GINKGO_BUILD_HIP)
     include(cmake/hip.cmake)
 endif()
+if(GINKGO_BUILD_DPCPP)
+    include(cmake/sycl.cmake)
+endif()
 if(GINKGO_BUILD_OMP)
     find_package(OpenMP 3.0 REQUIRED)
 endif()
@@ -298,8 +301,6 @@ endif()
 if(GINKGO_BUILD_DPCPP)
     ginkgo_extract_dpcpp_version(${CMAKE_CXX_COMPILER} GINKGO_DPCPP_MAJOR_VERSION __LIBSYCL_MAJOR_VERSION)
     ginkgo_extract_dpcpp_version(${CMAKE_CXX_COMPILER} GINKGO_DPCPP_VERSION __SYCL_COMPILER_VERSION)
-    get_filename_component(GINKGO_SYCL_DIR ${CMAKE_CXX_COMPILER} DIRECTORY)
-    set(SYCL_INCLUDE_PATH "${GINKGO_SYCL_DIR}/../include;${GINKGO_SYCL_DIR}/../include/sycl")
 else()
     set(GINKGO_DPCPP_MAJOR_VERSION "0")
 endif()
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
index 7a4f5b1ca43..5cffddd51aa 100644
--- a/benchmark/CMakeLists.txt
+++ b/benchmark/CMakeLists.txt
@@ -141,7 +141,7 @@ if (GINKGO_BUILD_DPCPP)
     ginkgo_benchmark_onemkl_linops(c GKO_BENCHMARK_USE_SINGLE_COMPLEX_PRECISION)
     add_library(dpcpp_timer utils/dpcpp_timer.dp.cpp)
     target_compile_options(dpcpp_timer PRIVATE ${GINKGO_DPCPP_FLAGS})
-    target_link_options(dpcpp_timer PRIVATE ${GINKGO_DPCPP_FLAGS})
+    gko_add_sycl_to_target(TARGET dpcpp_timer SOURCES utils/dpcpp_timer.dp.cpp)
     target_link_libraries(dpcpp_timer ginkgo)
 endif()
 
diff --git a/cmake/build_helpers.cmake b/cmake/build_helpers.cmake
index 25add05c60f..34189a09450 100644
--- a/cmake/build_helpers.cmake
+++ b/cmake/build_helpers.cmake
@@ -9,10 +9,6 @@ function(ginkgo_default_includes name)
             $<BUILD_INTERFACE:${Ginkgo_SOURCE_DIR}>
             $<INSTALL_INTERFACE:include>
         )
-    if(DEFINED SYCL_INCLUDE_PATH)
-        # avoid -fsycl in all place
-        target_include_directories("${name}" PUBLIC ${SYCL_INCLUDE_PATH})
-    endif()
     if(GINKGO_HAVE_HWLOC)
       target_include_directories("${name}"
         PUBLIC
diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
index 55b70bbaeaa..3794a8026e1 100644
--- a/cmake/create_test.cmake
+++ b/cmake/create_test.cmake
@@ -124,7 +124,7 @@ function(ginkgo_create_dpcpp_test test_name)
     add_executable(${test_target_name} ${test_name}.dp.cpp)
     target_compile_features(${test_target_name} PUBLIC cxx_std_17)
     target_compile_options(${test_target_name} PRIVATE ${GINKGO_DPCPP_FLAGS})
-    target_link_options(${test_target_name} PRIVATE ${GINKGO_DPCPP_FLAGS})
+    gko_add_sycl_to_target(TARGET ${test_target_name} SOURCES ${test_name}.dp.cpp)
     target_link_options(${test_target_name} PRIVATE -fsycl-device-code-split=per_kernel)
     ginkgo_set_test_target_properties(${test_target_name} "_dpcpp" ${ARGN})
     ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} RESOURCE_TYPE sycl)
@@ -299,7 +299,9 @@ function(ginkgo_create_common_device_test test_name)
         ginkgo_create_common_test_internal(${test_name} DpcppExecutor dpcpp ${ARGN})
         target_compile_features(${test_target_name}_dpcpp PRIVATE cxx_std_17)
         target_compile_options(${test_target_name}_dpcpp PRIVATE ${GINKGO_DPCPP_FLAGS})
-        target_link_options(${test_target_name}_dpcpp PRIVATE ${GINKGO_DPCPP_FLAGS})
+        # We need to use a new file to avoid sycl setting in other backends because add_sycl_to_target will change the source property.
+        configure_file(${test_name}.cpp ${test_name}.dp.cpp COPYONLY)
+        gko_add_sycl_to_target(TARGET ${test_target_name}_dpcpp SOURCES ${test_name}.dp.cpp)
         target_link_options(${test_target_name}_dpcpp PRIVATE -fsycl-device-lib=all -fsycl-device-code-split=per_kernel)
     endif()
     if(GINKGO_BUILD_OMP)
diff --git a/cmake/sycl.cmake b/cmake/sycl.cmake
new file mode 100644
index 00000000000..b0f4eab91f1
--- /dev/null
+++ b/cmake/sycl.cmake
@@ -0,0 +1,33 @@
+# IntelSYCL for dpcpp and icpx if the config is existed and cmake reaches the requirement
+if(CMAKE_CXX_COMPILER MATCHES "dpcpp|icpx")
+    if(CMAKE_HOST_WIN32 AND CMAKE_VERSION VERSION_GREATER_EQUAL 3.25)
+        find_package(IntelSYCL QUIET)
+    elseif(CMAKE_VERSION VERSION_GREATER_EQUAL 3.20.5)
+        find_package(IntelSYCL QUIET)
+    endif()
+endif()
+# If we do not have the config from compiler, try to set components to make it work.
+if(NOT COMMAND add_sycl_to_target) 
+    if(NOT DEFINED SYCL_FLAGS)
+        set(SYCL_FLAGS "-fsycl" CACHE STRING "SYCL flags for compiler")
+    endif()
+endif()
+
+# Provide a uniform way for those package without add_sycl_to_target
+function(gko_add_sycl_to_target)
+    if(COMMAND add_sycl_to_target)
+        add_sycl_to_target(${ARGN})
+        return()
+    endif()
+    # We handle them by adding SYCL_FLAGS to compile and link to the target
+    set(one_value_args TARGET)
+    set(multi_value_args SOURCES)
+    cmake_parse_arguments(SYCL
+        ""
+        "${one_value_args}"
+        "${multi_value_args}"
+        ${ARGN})
+    target_compile_options(${SYCL_TARGET} PRIVATE "${SYCL_FLAGS}")
+    target_link_options(${SYCL_TARGET} PRIVATE "${SYCL_FLAGS}")
+endfunction()
+
diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt
index 9d0952480be..0041b7cbd18 100644
--- a/dpcpp/CMakeLists.txt
+++ b/dpcpp/CMakeLists.txt
@@ -88,10 +88,12 @@ configure_file(preconditioner/jacobi_common.hpp.in preconditioner/jacobi_common.
 ginkgo_compile_features(ginkgo_dpcpp)
 target_compile_definitions(ginkgo_dpcpp PRIVATE GKO_COMPILING_DPCPP _ONEDPL_COMPILE_KERNEL=0)
 
-set(GINKGO_DPCPP_FLAGS "-fsycl")
 set(GINKGO_DPCPP_FLAGS ${GINKGO_DPCPP_FLAGS} PARENT_SCOPE)
 target_compile_options(ginkgo_dpcpp PRIVATE "${GINKGO_DPCPP_FLAGS}")
-target_link_options(ginkgo_dpcpp PRIVATE "${GINKGO_DPCPP_FLAGS}")
+# all file in target ginkgo_dpcpp are necessarily compiled with sycl, so we can ignore the warning.
+# If we would like to use SOURCES, please use the new files copied from GKO_UNIFIED_COMMON_SOURCES.
+# Otherwise, the source's properties will be changed by add_sycl_to_target
+gko_add_sycl_to_target(TARGET ginkgo_dpcpp)
 target_compile_options(ginkgo_dpcpp PRIVATE "${GINKGO_COMPILER_FLAGS}")
 # Note: add MKL as PRIVATE not PUBLIC (MKL example shows) to avoid propagating
 # find_package(MKL) everywhere when linking ginkgo (see the MKL example
diff --git a/test/solver/CMakeLists.txt b/test/solver/CMakeLists.txt
index 3231956beb7..f870ecfbf19 100644
--- a/test/solver/CMakeLists.txt
+++ b/test/solver/CMakeLists.txt
@@ -14,5 +14,5 @@ ginkgo_create_common_test(multigrid_kernels DISABLE_EXECUTORS dpcpp)
 ginkgo_create_common_test(solver DISABLE_EXECUTORS dpcpp)
 ginkgo_create_common_test(upper_trs_kernels DISABLE_EXECUTORS dpcpp)
 if(GINKGO_BUILD_DPCPP) 
-    target_link_options(test_solver_idr_kernels_dpcpp PRIVATE ${GINKGO_DPCPP_FLAGS})
+    gko_add_sycl_to_target(TARGET test_solver_idr_kernels_dpcpp SOURCES idr_kernels.cpp)
 endif()

From e31912e1e02701b5812830b279112450e018ef0d Mon Sep 17 00:00:00 2001
From: "Yuhsiang M. Tsai" <yhmtsai@gmail.com>
Date: Mon, 21 Aug 2023 22:55:52 +0200
Subject: [PATCH 376/583] rename GINKGO_BUILD_DPCPP to GINKGO_BUILD_SYCL

Co-authored-by: Terry Cojean <terry.cojean@kit.edu>
Co-authored-by: Tobias Ribizel <ribizel@kit.edu>
---
 .github/workflows/bot-pr-updated.yml |  2 +-
 .gitlab-ci.yml                       | 14 +++++++-------
 .gitlab/scripts.yml                  |  4 ++--
 .gitlab/variables.yml                |  1 +
 CMakeLists.txt                       | 16 ++++++++++------
 INSTALL.md                           |  7 ++++---
 README.md                            |  6 +++---
 benchmark/CMakeLists.txt             |  6 +++---
 cmake/GinkgoConfig.cmake.in          |  4 ++--
 cmake/autodetect_executors.cmake     |  6 +++---
 cmake/create_test.cmake              |  4 ++--
 cmake/get_info.cmake                 |  4 ++--
 cmake/rename.cmake                   | 25 +++++++++++++++++++++++++
 core/device_hooks/CMakeLists.txt     |  2 +-
 core/test/gtest/CMakeLists.txt       |  2 +-
 doc/examples/examples.hpp.in         |  2 +-
 test/solver/CMakeLists.txt           |  2 +-
 17 files changed, 69 insertions(+), 38 deletions(-)
 create mode 100644 cmake/rename.cmake

diff --git a/.github/workflows/bot-pr-updated.yml b/.github/workflows/bot-pr-updated.yml
index ae357c9db96..8554ca3b1e9 100644
--- a/.github/workflows/bot-pr-updated.yml
+++ b/.github/workflows/bot-pr-updated.yml
@@ -28,7 +28,7 @@ jobs:
     runs-on: ubuntu-latest
     if: github.event.pull_request.author_association == 'COLLABORATOR' || github.event.pull_request.author_association == 'MEMBER' || github.event.pull_request.author_association == 'OWNER'
     env:
-      CMAKE_FLAGS: -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=DEBUG -DGINKGO_BUILD_TESTS=OFF -DGINKGO_BUILD_EXAMPLES=OFF -DGINKGO_BUILD_BENCHMARKS=OFF -DGINKGO_BUILD_HWLOC=OFF -DGINKGO_BUILD_REFERENCE=OFF -DGINKGO_BUILD_OMP=OFF -DGINKGO_BUILD_CUDA=OFF -DGINKGO_BUILD_HIP=OFF -DGINKGO_BUILD_DPCPP=OFF
+      CMAKE_FLAGS: -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=DEBUG -DGINKGO_BUILD_TESTS=OFF -DGINKGO_BUILD_EXAMPLES=OFF -DGINKGO_BUILD_BENCHMARKS=OFF -DGINKGO_BUILD_HWLOC=OFF -DGINKGO_BUILD_REFERENCE=OFF -DGINKGO_BUILD_OMP=OFF -DGINKGO_BUILD_CUDA=OFF -DGINKGO_BUILD_HIP=OFF -DGINKGO_BUILD_SYCL=OFF
     steps:
       - name: Checkout the new code (shallow clone)
         uses: actions/checkout@v3
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index e1f1eb8be3d..ffd037e45ff 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -590,7 +590,7 @@ build/dpcpp/2022-1/cpu/release/shared:
   variables:
     C_COMPILER: "gcc"
     CXX_COMPILER: "dpcpp"
-    BUILD_DPCPP: "ON"
+    BUILD_SYCL: "ON"
     GKO_COMPILER_FLAGS: "-ffp-model=precise"
     BUILD_TYPE: "Release"
     BUILD_SHARED_LIBS: "ON"
@@ -609,7 +609,7 @@ build/dpcpp/igpu/release/shared:
   variables:
     C_COMPILER: "gcc"
     CXX_COMPILER: "dpcpp"
-    BUILD_DPCPP: "ON"
+    BUILD_SYCL: "ON"
     GKO_COMPILER_FLAGS: "-ffp-model=precise"
     BUILD_TYPE: "Release"
     BUILD_SHARED_LIBS: "ON"
@@ -626,7 +626,7 @@ build/dpcpp/igpu/release/shared:
 #   variables:
 #     C_COMPILER: "gcc"
 #     CXX_COMPILER: "dpcpp"
-#     BUILD_DPCPP: "ON"
+#     BUILD_SYCL: "ON"
 #     GKO_COMPILER_FLAGS: "-ffp-model=precise"
 #     BUILD_TYPE: "Debug"
 #     BUILD_SHARED_LIBS: "ON"
@@ -643,7 +643,7 @@ build/dpcpp/dgpu/release/static:
   variables:
     C_COMPILER: "gcc"
     CXX_COMPILER: "dpcpp"
-    BUILD_DPCPP: "ON"
+    BUILD_SYCL: "ON"
     GKO_COMPILER_FLAGS: "-ffp-model=precise"
     BUILD_TYPE: "Release"
     BUILD_SHARED_LIBS: "OF"
@@ -659,7 +659,7 @@ build/dpcpp/level_zero_dgpu/release/shared:
   variables:
     C_COMPILER: "gcc"
     CXX_COMPILER: "dpcpp"
-    BUILD_DPCPP: "ON"
+    BUILD_SYCL: "ON"
     GKO_COMPILER_FLAGS: "-ffp-model=precise"
     BUILD_TYPE: "Release"
     DPCPP_SINGLE_MODE: "ON"
@@ -674,7 +674,7 @@ build/icpx/level_zero_dgpu/release/shared:
   variables:
     C_COMPILER: "icx"
     CXX_COMPILER: "icpx"
-    BUILD_DPCPP: "ON"
+    BUILD_SYCL: "ON"
     GKO_COMPILER_FLAGS: "-ffp-model=precise"
     BUILD_TYPE: "Release"
     DPCPP_SINGLE_MODE: "ON"
@@ -834,7 +834,7 @@ gh-pages:
         -DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} -DCMAKE_BUILD_TYPE=${BUILD_TYPE}
         -DBUILD_SHARED_LIBS=ON ${EXTRA_CMAKE_FLAGS} -DGINKGO_DEVEL_TOOLS=OFF
         -DGINKGO_BUILD_REFERENCE=OFF -DGINKGO_BUILD_OMP=OFF -DGINKGO_BUILD_CUDA=OFF
-        -DGINKGO_BUILD_HIP=OFF -DGINKGO_BUILD_DPCPP=OFF -DGINKGO_BUILD_MPI=OFF
+        -DGINKGO_BUILD_HIP=OFF -DGINKGO_BUILD_SYCL=OFF -DGINKGO_BUILD_MPI=OFF
         -DGINKGO_BUILD_TESTS=OFF -DGINKGO_BUILD_EXAMPLES=OFF
         -DGINKGO_BUILD_DOC=ON -DGINKGO_DOC_GENERATE_PDF=ON
     - make usr
diff --git a/.gitlab/scripts.yml b/.gitlab/scripts.yml
index 15a2004bde6..504aa7dad40 100644
--- a/.gitlab/scripts.yml
+++ b/.gitlab/scripts.yml
@@ -40,7 +40,7 @@
         -DGINKGO_COMPILER_FLAGS=${GKO_COMPILER_FLAGS}
         -DGINKGO_DEVEL_TOOLS=OFF -DGINKGO_BUILD_REFERENCE=${BUILD_REFERENCE}
         -DGINKGO_BUILD_OMP=${BUILD_OMP} -DGINKGO_BUILD_CUDA=${BUILD_CUDA}
-        -DGINKGO_BUILD_HIP=${BUILD_HIP}
+        -DGINKGO_BUILD_HIP=${BUILD_HIP} -DGINKGO_BUILD_SYCL=${BUILD_SYCL}
         -DGINKGO_BUILD_MPI=${BUILD_MPI} ${MPI_STR}
         -DGINKGO_BUILD_HWLOC=${BUILD_HWLOC}
         -DGINKGO_BUILD_PAPI_SDE=${BUILD_PAPI_SDE}
@@ -85,7 +85,7 @@
         -DGINKGO_COMPILER_FLAGS=${GKO_COMPILER_FLAGS}
         -DGINKGO_DEVEL_TOOLS=OFF -DGINKGO_BUILD_REFERENCE=${BUILD_REFERENCE}
         -DGINKGO_BUILD_OMP=${BUILD_OMP} -DGINKGO_BUILD_CUDA=${BUILD_CUDA}
-        -DGINKGO_BUILD_HIP=${BUILD_HIP}
+        -DGINKGO_BUILD_HIP=${BUILD_HIP} -DGINKGO_BUILD_SYCL=${BUILD_SYCL}
         -DGINKGO_BUILD_MPI=${BUILD_MPI} ${MPI_STR}
         -DGINKGO_BUILD_HWLOC=${BUILD_HWLOC}
         -DGINKGO_BUILD_PAPI_SDE=${BUILD_PAPI_SDE}
diff --git a/.gitlab/variables.yml b/.gitlab/variables.yml
index 2316b5abc71..6c75d60d069 100644
--- a/.gitlab/variables.yml
+++ b/.gitlab/variables.yml
@@ -11,6 +11,7 @@
     BUILD_OMP: "OFF"
     BUILD_CUDA: "OFF"
     BUILD_HIP: "OFF"
+    BUILD_SYCL: "OFF"
     BUILD_HWLOC: "ON"
     BUILD_PAPI_SDE: "OFF"
     BUILD_MPI: "OFF"
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3f38c1e7165..216feb658f5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -13,6 +13,9 @@ include(cmake/autodetect_executors.cmake)
 list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/Modules/")
 include(cmake/autodetect_system_libs.cmake)
 
+# rename helper
+include(cmake/rename.cmake)
+
 # Ginkgo configuration options
 option(GINKGO_DEVEL_TOOLS "Add development tools to the build system" OFF)
 option(GINKGO_BUILD_TESTS "Generate build files for unit tests" ON)
@@ -21,8 +24,9 @@ option(GINKGO_BUILD_BENCHMARKS "Build Ginkgo's benchmarks" ON)
 option(GINKGO_BUILD_REFERENCE "Compile reference CPU kernels" ON)
 option(GINKGO_BUILD_OMP "Compile OpenMP kernels for CPU" ${GINKGO_HAS_OMP})
 option(GINKGO_BUILD_MPI "Compile the MPI module" ${GINKGO_HAS_MPI})
-option(GINKGO_BUILD_DPCPP
-    "Compile DPC++ kernels for Intel GPUs or other DPC++ enabled hardware" ${GINKGO_HAS_DPCPP})
+gko_rename_cache(GINKGO_BUILD_DPCPP GINKGO_BUILD_SYCL BOOL)
+option(GINKGO_BUILD_SYCL
+    "Compile SYCL kernels for Intel GPUs or other SYCL enabled hardware" ${GINKGO_HAS_SYCL})
 option(GINKGO_BUILD_CUDA "Compile kernels for NVIDIA GPUs" ${GINKGO_HAS_CUDA})
 option(GINKGO_BUILD_HIP "Compile kernels for AMD or NVIDIA GPUs" ${GINKGO_HAS_HIP})
 option(GINKGO_BUILD_DOC "Generate documentation" OFF)
@@ -50,7 +54,7 @@ set(GINKGO_VERBOSE_LEVEL "1" CACHE STRING
 if(MSVC)
     set(GINKGO_COMPILER_FLAGS "" CACHE STRING
         "Set the required CXX compiler flags, mainly used for warnings. Current default is ``")
-elseif(GINKGO_BUILD_DPCPP OR CMAKE_CXX_COMPILER MATCHES "dpcpp|icpx")
+elseif(GINKGO_BUILD_SYCL OR CMAKE_CXX_COMPILER MATCHES "dpcpp|icpx")
     # For now always use `-ffp-model=precise` with DPC++. This can be removed when
     # the floating point issues are fixed.
     set(GINKGO_COMPILER_FLAGS "-Wpedantic;-ffp-model=precise" CACHE STRING
@@ -99,7 +103,7 @@ endif()
 if(GINKGO_BUILD_HIP)
     include(cmake/hip.cmake)
 endif()
-if(GINKGO_BUILD_DPCPP)
+if(GINKGO_BUILD_SYCL)
     include(cmake/sycl.cmake)
 endif()
 if(GINKGO_BUILD_OMP)
@@ -298,7 +302,7 @@ if(MSVC)
     endif()
 endif()
 
-if(GINKGO_BUILD_DPCPP)
+if(GINKGO_BUILD_SYCL)
     ginkgo_extract_dpcpp_version(${CMAKE_CXX_COMPILER} GINKGO_DPCPP_MAJOR_VERSION __LIBSYCL_MAJOR_VERSION)
     ginkgo_extract_dpcpp_version(${CMAKE_CXX_COMPILER} GINKGO_DPCPP_VERSION __SYCL_COMPILER_VERSION)
 else()
@@ -321,7 +325,7 @@ endif()
 if(GINKGO_BUILD_HIP)
     add_subdirectory(hip)        # High-performance kernels for AMD or NVIDIA GPUs
 endif()
-if(GINKGO_BUILD_DPCPP)
+if(GINKGO_BUILD_SYCL)
     add_subdirectory(dpcpp)        # High-performance DPC++ kernels
 endif()
 if(GINKGO_BUILD_OMP)
diff --git a/INSTALL.md b/INSTALL.md
index b29358d4eb6..4da58010ba8 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -42,9 +42,10 @@ Ginkgo adds the following additional switches to control what is being built:
 *   `-DGINKGO_BUILD_CUDA={ON, OFF}` builds optimized cuda versions of the kernels
     (requires CUDA), default is `ON` if a CUDA compiler could be detected,
     `OFF` otherwise.
-*   `-DGINKGO_BUILD_DPCPP={ON, OFF}` builds optimized DPC++ versions of the
-    kernels (requires `CMAKE_CXX_COMPILER` to be set to the `dpcpp` compiler).
-    The default is `ON` if `CMAKE_CXX_COMPILER` is a DPC++ compiler, `OFF`
+*   `-DGINKGO_BUILD_DPCPP={ON, OFF}` is deprecated. Please use `GINKGO_BUILD_SYCL` instead.
+*   `-DGINKGO_BUILD_SYCL={ON, OFF}` builds optimized SYCL versions of the
+    kernels (requires `CMAKE_CXX_COMPILER` to be set to the `dpcpp` or `icpx` compiler).
+    The default is `ON` if `CMAKE_CXX_COMPILER` is a SYCL compiler, `OFF`
     otherwise.
 *   `-DGINKGO_BUILD_HIP={ON, OFF}` builds optimized HIP versions of the kernels
     (requires HIP), default is `ON` if an installation of HIP could be detected,
diff --git a/README.md b/README.md
index 102005e4a18..7f64d42599d 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@
 Ginkgo is a high-performance linear algebra library for manycore systems, with a
 focus on the solution of sparse linear systems. It is implemented using modern C++
 (you will need an at least C++14 compliant compiler to build it), with GPU kernels
-implemented in CUDA, HIP, and DPC++.
+implemented in CUDA, HIP, and DPC++(SYCL).
 
 
 Performance
@@ -62,7 +62,7 @@ The Ginkgo HIP module has the following __additional__ requirements:
     * _10.1 <= CUDA < 11_ backend
 * if the hipFFT package is available, it is used to implement the FFT LinOps.
 
-The Ginkgo DPC++ module has the following __additional__ requirements:
+The Ginkgo DPC++(SYCL) module has the following __additional__ requirements:
 
 * _OneAPI 2021.3+_
 * Set `dpcpp` or `icpx` as the `CMAKE_CXX_COMPILER`
@@ -123,7 +123,7 @@ cmake -G "Unix Makefiles" .. && make
 By default, `GINKGO_BUILD_REFERENCE` is enabled. You should be able to run
 examples with this executor. By default, Ginkgo tries to enable the relevant
 modules depending on your machine environment (present of CUDA, ...). You can
-also explicitly compile with the OpenMP, CUDA, HIP or DPC++ modules enabled to
+also explicitly compile with the OpenMP, CUDA, HIP or DPC++(SYCL) modules enabled to
 run the examples with these executors. Please refer to the [Installation
 page](./INSTALL.md) for more details.
 
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
index 5cffddd51aa..347ecec7699 100644
--- a/benchmark/CMakeLists.txt
+++ b/benchmark/CMakeLists.txt
@@ -67,7 +67,7 @@ function(ginkgo_add_single_benchmark_executable name use_lib_linops macro_def ty
         target_compile_definitions("${name}" PRIVATE HAS_HIP_TIMER=1)
         target_link_libraries("${name}" hip_timer)
     endif()
-    if (GINKGO_BUILD_DPCPP)
+    if (GINKGO_BUILD_SYCL)
         target_compile_definitions("${name}" PRIVATE HAS_DPCPP_TIMER=1)
         target_link_libraries("${name}" dpcpp_timer)
     endif()
@@ -87,7 +87,7 @@ function(ginkgo_add_single_benchmark_executable name use_lib_linops macro_def ty
             target_compile_definitions("${name}" PRIVATE HAS_HIP=1)
             target_link_libraries("${name}" hipsparse_linops_${type})
         endif()
-        if (GINKGO_BUILD_DPCPP)
+        if (GINKGO_BUILD_SYCL)
             target_compile_definitions("${name}" PRIVATE HAS_DPCPP=1)
             target_link_libraries("${name}" onemkl_linops_${type})
         endif()
@@ -134,7 +134,7 @@ if (GINKGO_BUILD_HIP)
     target_link_libraries(hip_timer ginkgo)
 endif()
 
-if (GINKGO_BUILD_DPCPP)
+if (GINKGO_BUILD_SYCL)
     ginkgo_benchmark_onemkl_linops(d GKO_BENCHMARK_USE_DOUBLE_PRECISION)
     ginkgo_benchmark_onemkl_linops(s GKO_BENCHMARK_USE_SINGLE_PRECISION)
     ginkgo_benchmark_onemkl_linops(z GKO_BENCHMARK_USE_DOUBLE_COMPLEX_PRECISION)
diff --git a/cmake/GinkgoConfig.cmake.in b/cmake/GinkgoConfig.cmake.in
index 352cf1dde8d..41f3b8f2879 100644
--- a/cmake/GinkgoConfig.cmake.in
+++ b/cmake/GinkgoConfig.cmake.in
@@ -37,7 +37,7 @@ set(GINKGO_BUILD_OMP @GINKGO_BUILD_OMP@)
 set(GINKGO_BUILD_CUDA @GINKGO_BUILD_CUDA@)
 set(GINKGO_BUILD_HIP @GINKGO_BUILD_HIP@)
 set(GINKGO_BUILD_MPI @GINKGO_BUILD_MPI@)
-set(GINKGO_BUILD_DPCPP @GINKGO_BUILD_DPCPP@)
+set(GINKGO_BUILD_SYCL @GINKGO_BUILD_SYCL@)
 
 set(GINKGO_DEVEL_TOOLS @GINKGO_DEVEL_TOOLS@)
 set(GINKGO_BUILD_TESTS @GINKGO_BUILD_TESTS@)
@@ -184,7 +184,7 @@ if((NOT GINKGO_BUILD_SHARED_LIBS) AND GINKGO_BUILD_HIP)
     endif()
 endif()
 
-if((NOT GINKGO_BUILD_SHARED_LIBS) AND GINKGO_BUILD_DPCPP)
+if((NOT GINKGO_BUILD_SHARED_LIBS) AND GINKGO_BUILD_SYCL)
     find_package(MKL CONFIG REQUIRED HINTS "${GINKGO_MKL_ROOT}")
     find_package(oneDPL REQUIRED HINTS "${GINKGO_DPL_ROOT}")
 endif()
diff --git a/cmake/autodetect_executors.cmake b/cmake/autodetect_executors.cmake
index 86949cecda9..757262f1ea1 100644
--- a/cmake/autodetect_executors.cmake
+++ b/cmake/autodetect_executors.cmake
@@ -1,7 +1,7 @@
 set(GINKGO_HAS_OMP OFF)
 set(GINKGO_HAS_MPI OFF)
 set(GINKGO_HAS_CUDA OFF)
-set(GINKGO_HAS_DPCPP OFF)
+set(GINKGO_HAS_SYCL OFF)
 set(GINKGO_HAS_HIP OFF)
 
 include(CheckLanguage)
@@ -37,7 +37,7 @@ if (NOT DEFINED GINKGO_BUILD_HIP)
     endif()
 endif()
 
-if (NOT DEFINED GINKGO_BUILD_DPCPP)
+if (NOT DEFINED GINKGO_BUILD_DPCPP AND NOT DEFINED GINKGO_BUILD_SYCL)
     try_compile(GKO_CAN_COMPILE_DPCPP ${PROJECT_BINARY_DIR}/dpcpp
         SOURCES ${PROJECT_SOURCE_DIR}/dpcpp/test_dpcpp.dp.cpp
         # try_compile will pass the project CMAKE_CXX_FLAGS so passing -DCMAKE_CXX_FLAGS does not affect it.
@@ -47,6 +47,6 @@ if (NOT DEFINED GINKGO_BUILD_DPCPP)
         CXX_STANDARD 17)
     if (GKO_CAN_COMPILE_DPCPP)
         message(STATUS "Enabling DPCPP executor")
-        set(GINKGO_HAS_DPCPP ON)
+        set(GINKGO_HAS_SYCL ON)
     endif()
 endif()
diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
index 3794a8026e1..522ad5f2ba7 100644
--- a/cmake/create_test.cmake
+++ b/cmake/create_test.cmake
@@ -247,7 +247,7 @@ function(ginkgo_create_common_test test_name)
     if(GINKGO_BUILD_CUDA)
         ginkgo_create_common_test_internal(${test_name} CudaExecutor cuda ${ARGN})
     endif()
-    if(GINKGO_BUILD_DPCPP)
+    if(GINKGO_BUILD_SYCL)
         ginkgo_create_common_test_internal(${test_name} DpcppExecutor dpcpp ${ARGN})
     endif()
 endfunction(ginkgo_create_common_test)
@@ -295,7 +295,7 @@ endfunction(ginkgo_create_common_test_internal)
 function(ginkgo_create_common_device_test test_name)
     cmake_parse_arguments(PARSE_ARGV 1 common_device_test "" "${gko_test_single_args}" "${gko_test_multi_args}")
     ginkgo_build_test_name(${test_name} test_target_name)
-    if(GINKGO_BUILD_DPCPP)
+    if(GINKGO_BUILD_SYCL)
         ginkgo_create_common_test_internal(${test_name} DpcppExecutor dpcpp ${ARGN})
         target_compile_features(${test_target_name}_dpcpp PRIVATE cxx_std_17)
         target_compile_options(${test_target_name}_dpcpp PRIVATE ${GINKGO_DPCPP_FLAGS})
diff --git a/cmake/get_info.cmake b/cmake/get_info.cmake
index 2dd068abb50..6b904189151 100644
--- a/cmake/get_info.cmake
+++ b/cmake/get_info.cmake
@@ -127,7 +127,7 @@ foreach(log_type ${log_types})
     ginkgo_print_module_footer(${${log_type}} "User configuration:")
     ginkgo_print_module_footer(${${log_type}} "  Enabled modules:")
     ginkgo_print_foreach_variable(${${log_type}}
-        "GINKGO_BUILD_OMP;GINKGO_BUILD_MPI;GINKGO_BUILD_REFERENCE;GINKGO_BUILD_CUDA;GINKGO_BUILD_HIP;GINKGO_BUILD_DPCPP")
+        "GINKGO_BUILD_OMP;GINKGO_BUILD_MPI;GINKGO_BUILD_REFERENCE;GINKGO_BUILD_CUDA;GINKGO_BUILD_HIP;GINKGO_BUILD_SYCL")
     ginkgo_print_module_footer(${${log_type}} "  Enabled features:")
     ginkgo_print_foreach_variable(${${log_type}}
         "GINKGO_MIXED_PRECISION;GINKGO_HAVE_GPU_AWARE_MPI")
@@ -167,7 +167,7 @@ IF(GINKGO_BUILD_HIP)
     include(hip/get_info.cmake)
 ENDIF()
 
-IF(GINKGO_BUILD_DPCPP)
+IF(GINKGO_BUILD_SYCL)
     include(dpcpp/get_info.cmake)
 ENDIF()
 
diff --git a/cmake/rename.cmake b/cmake/rename.cmake
new file mode 100644
index 00000000000..d9837b84a1b
--- /dev/null
+++ b/cmake/rename.cmake
@@ -0,0 +1,25 @@
+# Only for CACHE variable (option)
+macro(gko_rename_cache deprecated actual type)
+    if(DEFINED ${deprecated})
+        if(DEFINED ${actual})
+            message("actual ${actual} and deprecated ${deprecated}")
+            if("${${actual}}" STREQUAL "${${deprecated}}")
+                # They are the same, so only throw warning
+                message(WARNING "${deprecated} was deprecated, please only use ${actual} instead.")
+            else()
+                # They are different
+                set(${deprecated}_copy ${${deprecated}})
+                unset(${deprecated} CACHE)
+                message(FATAL_ERROR "Both ${deprecated} and ${actual} were specified, please use ${actual} instead.  "
+                    "We remove ${deprecated}:${${deprecated}_copy} and keep ${actual}:${${actual}}")
+            endif()
+        else()
+            # Only set `deprecated`, move it to `actual`.
+            message(WARNING "${deprecated} was deprecated, please use ${actual} instead.  "
+                "We copy ${${deprecated}} to ${actual} and unset ${deprecated}.")
+            set(${actual} ${${deprecated}} CACHE ${type} "")
+        endif()
+        # We always unset the deprecated for easier next setup
+        unset(${deprecated} CACHE)
+    endif()
+endmacro()
\ No newline at end of file
diff --git a/core/device_hooks/CMakeLists.txt b/core/device_hooks/CMakeLists.txt
index 901acef7797..573f87fad93 100644
--- a/core/device_hooks/CMakeLists.txt
+++ b/core/device_hooks/CMakeLists.txt
@@ -8,7 +8,7 @@ if(NOT GINKGO_BUILD_CUDA)
     ginkgo_install_library(ginkgo_cuda)
 endif()
 
-if (NOT GINKGO_BUILD_DPCPP)
+if (NOT GINKGO_BUILD_SYCL)
     add_library(ginkgo_dpcpp
         $<TARGET_OBJECTS:ginkgo_dpcpp_device>
         dpcpp_hooks.cpp)
diff --git a/core/test/gtest/CMakeLists.txt b/core/test/gtest/CMakeLists.txt
index 6d77b663e84..cdfc67fafdf 100644
--- a/core/test/gtest/CMakeLists.txt
+++ b/core/test/gtest/CMakeLists.txt
@@ -27,6 +27,6 @@ endif()
 if (GINKGO_BUILD_HIP)
     add_gtest_main("_hip" "GKO_COMPILING_HIP")
 endif()
-if (GINKGO_BUILD_DPCPP)
+if (GINKGO_BUILD_SYCL)
     add_gtest_main("_dpcpp" "GKO_COMPILING_DPCPP")
 endif()
diff --git a/doc/examples/examples.hpp.in b/doc/examples/examples.hpp.in
index a75ac59f186..7234a3ca8aa 100644
--- a/doc/examples/examples.hpp.in
+++ b/doc/examples/examples.hpp.in
@@ -64,7 +64,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  * <ol>
  *   <li> <code>-DGINKGO_BUILD_CUDA=ON</code> option for NVIDIA GPUs.
  *   <li> <code>-DGINKGO_BUILD_HIP=ON</code> option for AMD or NVIDIA GPUs.
- *   <li> <code>-DGINKGO_BUILD_DPCPP=ON</code> option for Intel GPUs (and
+ *   <li> <code>-DGINKGO_BUILD_SYCL=ON</code> option for Intel GPUs (and
  *        possibly any other platform).
  * </ol>
  *
diff --git a/test/solver/CMakeLists.txt b/test/solver/CMakeLists.txt
index f870ecfbf19..3cfe2db8ac3 100644
--- a/test/solver/CMakeLists.txt
+++ b/test/solver/CMakeLists.txt
@@ -13,6 +13,6 @@ ginkgo_create_common_test(lower_trs_kernels DISABLE_EXECUTORS dpcpp)
 ginkgo_create_common_test(multigrid_kernels DISABLE_EXECUTORS dpcpp)
 ginkgo_create_common_test(solver DISABLE_EXECUTORS dpcpp)
 ginkgo_create_common_test(upper_trs_kernels DISABLE_EXECUTORS dpcpp)
-if(GINKGO_BUILD_DPCPP) 
+if(GINKGO_BUILD_SYCL) 
     gko_add_sycl_to_target(TARGET test_solver_idr_kernels_dpcpp SOURCES idr_kernels.cpp)
 endif()

From b08c49048920c2a83c9b4465bf3a55843ae515b0 Mon Sep 17 00:00:00 2001
From: "Yuhsiang M. Tsai" <yhmtsai@gmail.com>
Date: Fri, 13 Oct 2023 10:09:53 +0200
Subject: [PATCH 377/583] do not delete deprecated var from CMake, keep doc

Co-authored-by: Tobias Ribizel <ribizel@kit.edu>
---
 CMakeLists.txt     |  2 +-
 cmake/rename.cmake | 13 ++++---------
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 216feb658f5..9eca64fa3fd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -24,7 +24,7 @@ option(GINKGO_BUILD_BENCHMARKS "Build Ginkgo's benchmarks" ON)
 option(GINKGO_BUILD_REFERENCE "Compile reference CPU kernels" ON)
 option(GINKGO_BUILD_OMP "Compile OpenMP kernels for CPU" ${GINKGO_HAS_OMP})
 option(GINKGO_BUILD_MPI "Compile the MPI module" ${GINKGO_HAS_MPI})
-gko_rename_cache(GINKGO_BUILD_DPCPP GINKGO_BUILD_SYCL BOOL)
+gko_rename_cache(GINKGO_BUILD_DPCPP GINKGO_BUILD_SYCL BOOL "Compile SYCL kernels for Intel GPUs or other SYCL enabled hardware")
 option(GINKGO_BUILD_SYCL
     "Compile SYCL kernels for Intel GPUs or other SYCL enabled hardware" ${GINKGO_HAS_SYCL})
 option(GINKGO_BUILD_CUDA "Compile kernels for NVIDIA GPUs" ${GINKGO_HAS_CUDA})
diff --git a/cmake/rename.cmake b/cmake/rename.cmake
index d9837b84a1b..6c386bc24c6 100644
--- a/cmake/rename.cmake
+++ b/cmake/rename.cmake
@@ -1,5 +1,5 @@
 # Only for CACHE variable (option)
-macro(gko_rename_cache deprecated actual type)
+macro(gko_rename_cache deprecated actual type doc_string)
     if(DEFINED ${deprecated})
         if(DEFINED ${actual})
             message("actual ${actual} and deprecated ${deprecated}")
@@ -8,18 +8,13 @@ macro(gko_rename_cache deprecated actual type)
                 message(WARNING "${deprecated} was deprecated, please only use ${actual} instead.")
             else()
                 # They are different
-                set(${deprecated}_copy ${${deprecated}})
-                unset(${deprecated} CACHE)
-                message(FATAL_ERROR "Both ${deprecated} and ${actual} were specified, please use ${actual} instead.  "
-                    "We remove ${deprecated}:${${deprecated}_copy} and keep ${actual}:${${actual}}")
+                message(FATAL_ERROR "Both ${deprecated} and ${actual} were specified differently, please only use ${actual} instead.")
             endif()
         else()
             # Only set `deprecated`, move it to `actual`.
             message(WARNING "${deprecated} was deprecated, please use ${actual} instead.  "
-                "We copy ${${deprecated}} to ${actual} and unset ${deprecated}.")
-            set(${actual} ${${deprecated}} CACHE ${type} "")
+                "We copy ${${deprecated}} to ${actual}")
+            set(${actual} ${${deprecated}} CACHE ${type} "${doc_string}")
         endif()
-        # We always unset the deprecated for easier next setup
-        unset(${deprecated} CACHE)
     endif()
 endmacro()
\ No newline at end of file

From dfb9607836e37d1969fe4cac8e4cf026b60a00d7 Mon Sep 17 00:00:00 2001
From: "Yuhsiang M. Tsai" <yhmtsai@gmail.com>
Date: Fri, 13 Oct 2023 10:47:20 +0200
Subject: [PATCH 378/583] adapt MKL and oneDPL env

---
 dpcpp/CMakeLists.txt | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt
index 0041b7cbd18..9990496c98f 100644
--- a/dpcpp/CMakeLists.txt
+++ b/dpcpp/CMakeLists.txt
@@ -1,7 +1,8 @@
-find_package(MKL CONFIG REQUIRED HINTS "$ENV{MKLROOT}")
-set(GINKGO_MKL_ROOT "${MKL_ROOT}" PARENT_SCOPE)
-find_package(oneDPL REQUIRED HINTS "$ENV{DPL_ROOT}")
-set(GINKGO_DPL_ROOT "${DPL_ROOT}" PARENT_SCOPE)
+find_package(MKL CONFIG REQUIRED HINTS "$ENV{MKLROOT}" "$ENV{MKL_ROOT}")
+find_package(oneDPL REQUIRED HINTS "$ENV{DPL_ROOT}" "$ENV{DPLROOT}")
+# use the parameter from cmake 
+set(GINKGO_MKL_ROOT "${MKL_DIR}" PARENT_SCOPE)
+set(GINKGO_DPL_ROOT "${oneDPL_DIR}" PARENT_SCOPE)
 
 include(${PROJECT_SOURCE_DIR}/cmake/template_instantiation.cmake)
 add_instantiation_files(${PROJECT_SOURCE_DIR}/common/unified matrix/dense_kernels.instantiate.cpp DENSE_INSTANTIATE)

From 392a626fe68672ea6c32a5427fb5314681b0d760 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Sun, 15 Oct 2023 19:49:54 +0200
Subject: [PATCH 379/583] add factorization unpack functions

---
 core/factorization/factorization.cpp          | 74 ++++++++++++++++++-
 .../test/factorization/factorization.cpp      | 45 +++++++++++
 2 files changed, 117 insertions(+), 2 deletions(-)

diff --git a/core/factorization/factorization.cpp b/core/factorization/factorization.cpp
index d38d18ca3e5..436359a417a 100644
--- a/core/factorization/factorization.cpp
+++ b/core/factorization/factorization.cpp
@@ -33,18 +33,88 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/factorization/factorization.hpp>
 
 
+#include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/matrix/csr.hpp>
 
 
+#include "core/factorization/factorization_kernels.hpp"
+
+
 namespace gko {
 namespace experimental {
 namespace factorization {
+namespace {
+
+
+GKO_REGISTER_OPERATION(initialize_row_ptrs_l_u,
+                       factorization::initialize_row_ptrs_l_u);
+GKO_REGISTER_OPERATION(initialize_l_u, factorization::initialize_l_u);
+GKO_REGISTER_OPERATION(initialize_row_ptrs_l,
+                       factorization::initialize_row_ptrs_l);
+GKO_REGISTER_OPERATION(initialize_l, factorization::initialize_l);
+
+
+}  // namespace
 
 
 template <typename ValueType, typename IndexType>
 std::unique_ptr<Factorization<ValueType, IndexType>>
-Factorization<ValueType, IndexType>::unpack() const GKO_NOT_IMPLEMENTED;
+Factorization<ValueType, IndexType>::unpack() const
+{
+    const auto exec = this->get_executor();
+    const auto size = this->get_size();
+    switch (this->get_storage_type()) {
+    case storage_type::empty:
+        GKO_NOT_SUPPORTED(nullptr);
+    case storage_type::composition:
+    case storage_type::symm_composition:
+        return this->clone();
+    case storage_type::combined_lu: {
+        // count nonzeros
+        array<index_type> l_row_ptrs{exec, size[0] + 1};
+        array<index_type> u_row_ptrs{exec, size[0] + 1};
+        const auto mtx = this->get_combined();
+        exec->run(make_initialize_row_ptrs_l_u(mtx.get(), l_row_ptrs.get_data(),
+                                               u_row_ptrs.get_data()));
+        const auto l_nnz = static_cast<size_type>(
+            exec->copy_val_to_host(l_row_ptrs.get_const_data() + size[0]));
+        const auto u_nnz = static_cast<size_type>(
+            exec->copy_val_to_host(u_row_ptrs.get_const_data() + size[0]));
+        // create matrices
+        auto l_mtx = matrix_type::create(
+            exec, size, array<value_type>{exec, l_nnz},
+            array<index_type>{exec, l_nnz}, std::move(l_row_ptrs));
+        auto u_mtx = matrix_type::create(
+            exec, size, array<value_type>{exec, u_nnz},
+            array<index_type>{exec, u_nnz}, std::move(u_row_ptrs));
+        // fill matrices
+        exec->run(make_initialize_l_u(mtx.get(), l_mtx.get(), u_mtx.get()));
+        return create_from_composition(
+            composition_type::create(std::move(l_mtx), std::move(u_mtx)));
+    }
+    case storage_type::symm_combined_cholesky: {
+        // count nonzeros
+        array<index_type> l_row_ptrs{exec, size[0] + 1};
+        const auto mtx = this->get_combined();
+        exec->run(make_initialize_row_ptrs_l(mtx.get(), l_row_ptrs.get_data()));
+        const auto l_nnz = static_cast<size_type>(
+            exec->copy_val_to_host(l_row_ptrs.get_const_data() + size[0]));
+        // create matrices
+        auto l_mtx = matrix_type::create(
+            exec, size, array<value_type>{exec, l_nnz},
+            array<index_type>{exec, l_nnz}, std::move(l_row_ptrs));
+        // fill matrices
+        exec->run(make_initialize_l(mtx.get(), l_mtx.get(), false));
+        auto u_mtx = l_mtx->conj_transpose();
+        return create_from_symm_composition(
+            composition_type::create(std::move(l_mtx), std::move(u_mtx)));
+    }
+    case storage_type::combined_ldu:
+    case storage_type::symm_combined_ldl:
+        GKO_NOT_IMPLEMENTED;
+    }
+}
 
 
 template <typename ValueType, typename IndexType>
@@ -58,7 +128,7 @@ template <typename ValueType, typename IndexType>
 std::shared_ptr<const gko::matrix::Csr<ValueType, IndexType>>
 Factorization<ValueType, IndexType>::get_lower_factor() const
 {
-    switch (storage_type_) {
+    switch (this->get_storage_type()) {
     case storage_type::composition:
     case storage_type::symm_composition:
         GKO_ASSERT(factors_->get_operators().size() == 2 ||
diff --git a/reference/test/factorization/factorization.cpp b/reference/test/factorization/factorization.cpp
index d9928491771..6abfd470385 100644
--- a/reference/test/factorization/factorization.cpp
+++ b/reference/test/factorization/factorization.cpp
@@ -71,9 +71,13 @@ class Factorization : public ::testing::Test {
         : ref(gko::ReferenceExecutor::create()),
           lower_mtx{gko::initialize<matrix_type>(
               {{1.0, 0.0, 0.0}, {3.0, 1.0, 0.0}, {1.0, 2.0, 1.0}}, ref)},
+          lower_cholesky_mtx{gko::initialize<matrix_type>(
+              {{1.0, 0.0, 0.0}, {3.0, -1.0, 0.0}, {1.0, 2.0, 5.0}}, ref)},
           diagonal{diag_type::create(ref, 3)},
           upper_mtx(gko::initialize<matrix_type>(
               {{1.0, 2.0, 1.0}, {0.0, 1.0, 3.0}, {0.0, 0.0, 1.0}}, ref)),
+          upper_nonunit_mtx(gko::initialize<matrix_type>(
+              {{1.0, 2.0, 1.0}, {0.0, -1.0, 3.0}, {0.0, 0.0, 5.0}}, ref)),
           combined_mtx(gko::initialize<matrix_type>(
               {{1.0, 2.0, 1.0}, {3.0, -1.0, 3.0}, {1.0, 2.0, 5.0}}, ref)),
           input(gko::initialize<vector_type>({1.0, 2.0, 3.0}, ref)),
@@ -88,8 +92,10 @@ class Factorization : public ::testing::Test {
 
     std::shared_ptr<const gko::ReferenceExecutor> ref;
     std::shared_ptr<matrix_type> lower_mtx;
+    std::shared_ptr<matrix_type> lower_cholesky_mtx;
     std::shared_ptr<diag_type> diagonal;
     std::shared_ptr<matrix_type> upper_mtx;
+    std::shared_ptr<matrix_type> upper_nonunit_mtx;
     std::shared_ptr<matrix_type> combined_mtx;
     std::shared_ptr<vector_type> input;
     std::shared_ptr<vector_type> output;
@@ -261,6 +267,45 @@ TYPED_TEST(Factorization, CreateSymmCombinedLDLWorks)
 }
 
 
+TYPED_TEST(Factorization, UnpackCombinedLUWorks)
+{
+    using factorization_type = typename TestFixture::factorization_type;
+    auto fact = factorization_type::create_from_combined_lu(
+        this->combined_mtx->clone());
+
+    auto separated = fact->unpack();
+
+    ASSERT_EQ(separated->get_storage_type(),
+              gko::experimental::factorization::storage_type::composition);
+    ASSERT_EQ(separated->get_combined(), nullptr);
+    ASSERT_EQ(separated->get_diagonal(), nullptr);
+    GKO_ASSERT_MTX_NEAR(separated->get_lower_factor(), this->lower_mtx, 0.0);
+    GKO_ASSERT_MTX_NEAR(separated->get_upper_factor(), this->upper_nonunit_mtx,
+                        0.0);
+}
+
+
+TYPED_TEST(Factorization, UnpackSymmCombinedCholeskyWorks)
+{
+    using matrix_type = typename TestFixture::matrix_type;
+    using factorization_type = typename TestFixture::factorization_type;
+    auto fact = factorization_type::create_from_combined_cholesky(
+        this->combined_mtx->clone());
+
+    auto separated = fact->unpack();
+
+    ASSERT_EQ(separated->get_storage_type(),
+              gko::experimental::factorization::storage_type::symm_composition);
+    ASSERT_EQ(separated->get_combined(), nullptr);
+    ASSERT_EQ(separated->get_diagonal(), nullptr);
+    GKO_ASSERT_MTX_NEAR(separated->get_lower_factor(), this->lower_cholesky_mtx,
+                        0.0);
+    GKO_ASSERT_MTX_NEAR(
+        separated->get_upper_factor(),
+        gko::as<matrix_type>(this->lower_cholesky_mtx->conj_transpose()), 0.0);
+}
+
+
 TYPED_TEST(Factorization, ApplyFromCompositionWorks)
 {
     using factorization_type = typename TestFixture::factorization_type;

From aee10ee7d1e7e7045057ebadaae52ee459721cb4 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Mon, 16 Oct 2023 11:06:00 +0200
Subject: [PATCH 380/583] test composition unpacking

---
 .../test/factorization/factorization.cpp      | 42 +++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/reference/test/factorization/factorization.cpp b/reference/test/factorization/factorization.cpp
index 6abfd470385..2f8231f1da7 100644
--- a/reference/test/factorization/factorization.cpp
+++ b/reference/test/factorization/factorization.cpp
@@ -306,6 +306,48 @@ TYPED_TEST(Factorization, UnpackSymmCombinedCholeskyWorks)
 }
 
 
+TYPED_TEST(Factorization, UnpackCompositionWorks)
+{
+    using factorization_type = typename TestFixture::factorization_type;
+    using composition_type = typename TestFixture::composition_type;
+    auto fact = factorization_type::create_from_composition(
+        composition_type::create(this->lower_mtx, this->upper_nonunit_mtx));
+
+    auto separated = fact->unpack();
+
+    ASSERT_EQ(separated->get_storage_type(),
+              gko::experimental::factorization::storage_type::composition);
+    ASSERT_EQ(separated->get_combined(), nullptr);
+    ASSERT_EQ(separated->get_diagonal(), nullptr);
+    GKO_ASSERT_MTX_NEAR(separated->get_lower_factor(), this->lower_mtx, 0.0);
+    GKO_ASSERT_MTX_NEAR(separated->get_upper_factor(), this->upper_nonunit_mtx,
+                        0.0);
+}
+
+
+TYPED_TEST(Factorization, UnpackSymmCompositionWorks)
+{
+    using matrix_type = typename TestFixture::matrix_type;
+    using factorization_type = typename TestFixture::factorization_type;
+    using composition_type = typename TestFixture::composition_type;
+    auto fact = factorization_type::create_from_symm_composition(
+        composition_type::create(this->lower_cholesky_mtx,
+                                 this->lower_cholesky_mtx->conj_transpose()));
+
+    auto separated = fact->unpack();
+
+    ASSERT_EQ(separated->get_storage_type(),
+              gko::experimental::factorization::storage_type::symm_composition);
+    ASSERT_EQ(separated->get_combined(), nullptr);
+    ASSERT_EQ(separated->get_diagonal(), nullptr);
+    GKO_ASSERT_MTX_NEAR(separated->get_lower_factor(), this->lower_cholesky_mtx,
+                        0.0);
+    GKO_ASSERT_MTX_NEAR(
+        separated->get_upper_factor(),
+        gko::as<matrix_type>(this->lower_cholesky_mtx->conj_transpose()), 0.0);
+}
+
+
 TYPED_TEST(Factorization, ApplyFromCompositionWorks)
 {
     using factorization_type = typename TestFixture::factorization_type;

From 671b9752cb20ca53d24fb3efbc5045dda95b1834 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Mon, 16 Oct 2023 10:42:21 +0200
Subject: [PATCH 381/583] refactor tests to use index_type template param

---
 reference/test/matrix/dense_kernels.cpp | 2219 +++++++----------------
 1 file changed, 630 insertions(+), 1589 deletions(-)

diff --git a/reference/test/matrix/dense_kernels.cpp b/reference/test/matrix/dense_kernels.cpp
index 9edab89e382..3a4cfb6826b 100644
--- a/reference/test/matrix/dense_kernels.cpp
+++ b/reference/test/matrix/dense_kernels.cpp
@@ -97,7 +97,6 @@ class Dense : public ::testing::Test {
     std::unique_ptr<Mtx> mtx6;
     std::unique_ptr<Mtx> mtx7;
     std::unique_ptr<Mtx> mtx8;
-    gko::int32 invalid_index = gko::invalid_index<gko::int32>();
     std::default_random_engine rand_engine;
 
     template <typename MtxType>
@@ -115,6 +114,23 @@ class Dense : public ::testing::Test {
 TYPED_TEST_SUITE(Dense, gko::test::ValueTypes, TypenameNameGenerator);
 
 
+template <typename ValueIndexType>
+class DenseWithIndexType
+    : public Dense<
+          typename std::tuple_element<0, decltype(ValueIndexType())>::type> {
+public:
+    using value_type =
+        typename std::tuple_element<0, decltype(ValueIndexType())>::type;
+    using index_type =
+        typename std::tuple_element<1, decltype(ValueIndexType())>::type;
+
+    index_type invalid_index = gko::invalid_index<index_type>();
+};
+
+TYPED_TEST_SUITE(DenseWithIndexType, gko::test::ValueIndexTypes,
+                 PairTypenameNameGenerator);
+
+
 TYPED_TEST(Dense, CopyRespectsStride)
 {
     using value_type = typename TestFixture::value_type;
@@ -780,41 +796,9 @@ TYPED_TEST(Dense, MovesToPrecision)
 }
 
 
-TYPED_TEST(Dense, ConvertsToCoo32)
-{
-    using T = typename TestFixture::value_type;
-    using Coo = typename gko::matrix::Coo<T, gko::int32>;
-    auto coo_mtx = Coo::create(this->mtx4->get_executor());
-
-    this->mtx4->convert_to(coo_mtx);
-    auto v = coo_mtx->get_const_values();
-    auto c = coo_mtx->get_const_col_idxs();
-    auto r = coo_mtx->get_const_row_idxs();
-
-    ASSERT_EQ(coo_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(coo_mtx->get_num_stored_elements(), 4);
-    EXPECT_EQ(r[0], 0);
-    EXPECT_EQ(r[1], 0);
-    EXPECT_EQ(r[2], 0);
-    EXPECT_EQ(r[3], 1);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], 2);
-    EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{3.0});
-    EXPECT_EQ(v[2], T{2.0});
-    EXPECT_EQ(v[3], T{5.0});
-}
-
-
-TYPED_TEST(Dense, MovesToCoo32)
+template <typename ValueType, typename IndexType>
+void assert_coo_eq_mtx4(const gko::matrix::Coo<ValueType, IndexType>* coo_mtx)
 {
-    using T = typename TestFixture::value_type;
-    using Coo = typename gko::matrix::Coo<T, gko::int32>;
-    auto coo_mtx = Coo::create(this->mtx4->get_executor());
-
-    this->mtx4->move_to(coo_mtx);
     auto v = coo_mtx->get_const_values();
     auto c = coo_mtx->get_const_col_idxs();
     auto r = coo_mtx->get_const_row_idxs();
@@ -829,121 +813,47 @@ TYPED_TEST(Dense, MovesToCoo32)
     EXPECT_EQ(c[1], 1);
     EXPECT_EQ(c[2], 2);
     EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{3.0});
-    EXPECT_EQ(v[2], T{2.0});
-    EXPECT_EQ(v[3], T{5.0});
+    EXPECT_EQ(v[0], ValueType{1.0});
+    EXPECT_EQ(v[1], ValueType{3.0});
+    EXPECT_EQ(v[2], ValueType{2.0});
+    EXPECT_EQ(v[3], ValueType{5.0});
 }
 
 
-TYPED_TEST(Dense, ConvertsToCoo64)
+TYPED_TEST(DenseWithIndexType, ConvertsToCoo)
 {
-    using T = typename TestFixture::value_type;
-    using Coo = typename gko::matrix::Coo<T, gko::int64>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Coo = typename gko::matrix::Coo<value_type, index_type>;
     auto coo_mtx = Coo::create(this->mtx4->get_executor());
 
     this->mtx4->convert_to(coo_mtx);
-    auto v = coo_mtx->get_const_values();
-    auto c = coo_mtx->get_const_col_idxs();
-    auto r = coo_mtx->get_const_row_idxs();
 
-    ASSERT_EQ(coo_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(coo_mtx->get_num_stored_elements(), 4);
-    EXPECT_EQ(r[0], 0);
-    EXPECT_EQ(r[1], 0);
-    EXPECT_EQ(r[2], 0);
-    EXPECT_EQ(r[3], 1);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], 2);
-    EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{3.0});
-    EXPECT_EQ(v[2], T{2.0});
-    EXPECT_EQ(v[3], T{5.0});
+    assert_coo_eq_mtx4(coo_mtx.get());
 }
 
 
-TYPED_TEST(Dense, MovesToCoo64)
+TYPED_TEST(DenseWithIndexType, MovesToCoo)
 {
-    using T = typename TestFixture::value_type;
-    using Coo = typename gko::matrix::Coo<T, gko::int64>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Coo = typename gko::matrix::Coo<value_type, index_type>;
     auto coo_mtx = Coo::create(this->mtx4->get_executor());
 
     this->mtx4->move_to(coo_mtx);
-    auto v = coo_mtx->get_const_values();
-    auto c = coo_mtx->get_const_col_idxs();
-    auto r = coo_mtx->get_const_row_idxs();
-
-    ASSERT_EQ(coo_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(coo_mtx->get_num_stored_elements(), 4);
-    EXPECT_EQ(r[0], 0);
-    EXPECT_EQ(r[1], 0);
-    EXPECT_EQ(r[2], 0);
-    EXPECT_EQ(r[3], 1);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], 2);
-    EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{3.0});
-    EXPECT_EQ(v[2], T{2.0});
-    EXPECT_EQ(v[3], T{5.0});
-}
-
 
-TYPED_TEST(Dense, ConvertsToCsr32)
-{
-    using T = typename TestFixture::value_type;
-    using Csr = typename gko::matrix::Csr<T, gko::int32>;
-    auto csr_s_classical = std::make_shared<typename Csr::classical>();
-    auto csr_s_merge = std::make_shared<typename Csr::merge_path>();
-    auto csr_mtx_c = Csr::create(this->mtx4->get_executor(), csr_s_classical);
-    auto csr_mtx_m = Csr::create(this->mtx4->get_executor(), csr_s_merge);
-
-    this->mtx4->convert_to(csr_mtx_c);
-    this->mtx4->convert_to(csr_mtx_m);
-
-    auto v = csr_mtx_c->get_const_values();
-    auto c = csr_mtx_c->get_const_col_idxs();
-    auto r = csr_mtx_c->get_const_row_ptrs();
-    ASSERT_EQ(csr_mtx_c->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(csr_mtx_c->get_num_stored_elements(), 4);
-    EXPECT_EQ(r[0], 0);
-    EXPECT_EQ(r[1], 3);
-    EXPECT_EQ(r[2], 4);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], 2);
-    EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{3.0});
-    EXPECT_EQ(v[2], T{2.0});
-    EXPECT_EQ(v[3], T{5.0});
-    ASSERT_EQ(csr_mtx_c->get_strategy()->get_name(), "classical");
-    GKO_ASSERT_MTX_NEAR(csr_mtx_c, csr_mtx_m, 0.0);
-    ASSERT_EQ(csr_mtx_m->get_strategy()->get_name(), "merge_path");
+    assert_coo_eq_mtx4(coo_mtx.get());
 }
 
 
-TYPED_TEST(Dense, MovesToCsr32)
+template <typename ValueType, typename IndexType>
+void assert_csr_eq_mtx4(const gko::matrix::Csr<ValueType, IndexType>* csr_mtx)
 {
-    using T = typename TestFixture::value_type;
-    using Csr = typename gko::matrix::Csr<T, gko::int32>;
-    auto csr_s_classical = std::make_shared<typename Csr::classical>();
-    auto csr_s_merge = std::make_shared<typename Csr::merge_path>();
-    auto csr_mtx_c = Csr::create(this->mtx4->get_executor(), csr_s_classical);
-    auto csr_mtx_m = Csr::create(this->mtx4->get_executor(), csr_s_merge);
-    auto mtx_clone = this->mtx4->clone();
-
-    this->mtx4->move_to(csr_mtx_c);
-    mtx_clone->move_to(csr_mtx_m);
-
-    auto v = csr_mtx_c->get_const_values();
-    auto c = csr_mtx_c->get_const_col_idxs();
-    auto r = csr_mtx_c->get_const_row_ptrs();
-    ASSERT_EQ(csr_mtx_c->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(csr_mtx_c->get_num_stored_elements(), 4);
+    auto v = csr_mtx->get_const_values();
+    auto c = csr_mtx->get_const_col_idxs();
+    auto r = csr_mtx->get_const_row_ptrs();
+    ASSERT_EQ(csr_mtx->get_size(), gko::dim<2>(2, 3));
+    ASSERT_EQ(csr_mtx->get_num_stored_elements(), 4);
     EXPECT_EQ(r[0], 0);
     EXPECT_EQ(r[1], 3);
     EXPECT_EQ(r[2], 4);
@@ -951,20 +861,18 @@ TYPED_TEST(Dense, MovesToCsr32)
     EXPECT_EQ(c[1], 1);
     EXPECT_EQ(c[2], 2);
     EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{3.0});
-    EXPECT_EQ(v[2], T{2.0});
-    EXPECT_EQ(v[3], T{5.0});
-    ASSERT_EQ(csr_mtx_c->get_strategy()->get_name(), "classical");
-    GKO_ASSERT_MTX_NEAR(csr_mtx_c, csr_mtx_m, 0.0);
-    ASSERT_EQ(csr_mtx_m->get_strategy()->get_name(), "merge_path");
+    EXPECT_EQ(v[0], ValueType{1.0});
+    EXPECT_EQ(v[1], ValueType{3.0});
+    EXPECT_EQ(v[2], ValueType{2.0});
+    EXPECT_EQ(v[3], ValueType{5.0});
 }
 
 
-TYPED_TEST(Dense, ConvertsToCsr64)
+TYPED_TEST(DenseWithIndexType, ConvertsToCsr)
 {
-    using T = typename TestFixture::value_type;
-    using Csr = typename gko::matrix::Csr<T, gko::int64>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Csr = typename gko::matrix::Csr<value_type, index_type>;
     auto csr_s_classical = std::make_shared<typename Csr::classical>();
     auto csr_s_merge = std::make_shared<typename Csr::merge_path>();
     auto csr_mtx_c = Csr::create(this->mtx4->get_executor(), csr_s_classical);
@@ -973,32 +881,18 @@ TYPED_TEST(Dense, ConvertsToCsr64)
     this->mtx4->convert_to(csr_mtx_c);
     this->mtx4->convert_to(csr_mtx_m);
 
-    auto v = csr_mtx_c->get_const_values();
-    auto c = csr_mtx_c->get_const_col_idxs();
-    auto r = csr_mtx_c->get_const_row_ptrs();
-    ASSERT_EQ(csr_mtx_c->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(csr_mtx_c->get_num_stored_elements(), 4);
-    EXPECT_EQ(r[0], 0);
-    EXPECT_EQ(r[1], 3);
-    EXPECT_EQ(r[2], 4);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], 2);
-    EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{3.0});
-    EXPECT_EQ(v[2], T{2.0});
-    EXPECT_EQ(v[3], T{5.0});
+    assert_csr_eq_mtx4(csr_mtx_c.get());
     ASSERT_EQ(csr_mtx_c->get_strategy()->get_name(), "classical");
     GKO_ASSERT_MTX_NEAR(csr_mtx_c, csr_mtx_m, 0.0);
     ASSERT_EQ(csr_mtx_m->get_strategy()->get_name(), "merge_path");
 }
 
 
-TYPED_TEST(Dense, MovesToCsr64)
+TYPED_TEST(DenseWithIndexType, MovesToCsr)
 {
-    using T = typename TestFixture::value_type;
-    using Csr = typename gko::matrix::Csr<T, gko::int64>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Csr = typename gko::matrix::Csr<value_type, index_type>;
     auto csr_s_classical = std::make_shared<typename Csr::classical>();
     auto csr_s_merge = std::make_shared<typename Csr::merge_path>();
     auto csr_mtx_c = Csr::create(this->mtx4->get_executor(), csr_s_classical);
@@ -1008,59 +902,17 @@ TYPED_TEST(Dense, MovesToCsr64)
     this->mtx4->move_to(csr_mtx_c);
     mtx_clone->move_to(csr_mtx_m);
 
-    auto v = csr_mtx_c->get_const_values();
-    auto c = csr_mtx_c->get_const_col_idxs();
-    auto r = csr_mtx_c->get_const_row_ptrs();
-    ASSERT_EQ(csr_mtx_c->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(csr_mtx_c->get_num_stored_elements(), 4);
-    EXPECT_EQ(r[0], 0);
-    EXPECT_EQ(r[1], 3);
-    EXPECT_EQ(r[2], 4);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], 2);
-    EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{3.0});
-    EXPECT_EQ(v[2], T{2.0});
-    EXPECT_EQ(v[3], T{5.0});
+    assert_csr_eq_mtx4(csr_mtx_c.get());
     ASSERT_EQ(csr_mtx_c->get_strategy()->get_name(), "classical");
     GKO_ASSERT_MTX_NEAR(csr_mtx_c, csr_mtx_m, 0.0);
     ASSERT_EQ(csr_mtx_m->get_strategy()->get_name(), "merge_path");
 }
 
 
-TYPED_TEST(Dense, ConvertsToSparsityCsr32)
-{
-    using T = typename TestFixture::value_type;
-    using SparsityCsr = typename gko::matrix::SparsityCsr<T, gko::int32>;
-    auto sparsity_csr_mtx = SparsityCsr::create(this->mtx4->get_executor());
-
-    this->mtx4->convert_to(sparsity_csr_mtx);
-    auto v = sparsity_csr_mtx->get_const_value();
-    auto c = sparsity_csr_mtx->get_const_col_idxs();
-    auto r = sparsity_csr_mtx->get_const_row_ptrs();
-
-    ASSERT_EQ(sparsity_csr_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(sparsity_csr_mtx->get_num_nonzeros(), 4);
-    EXPECT_EQ(r[0], 0);
-    EXPECT_EQ(r[1], 3);
-    EXPECT_EQ(r[2], 4);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], 2);
-    EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(v[0], T{1.0});
-}
-
-
-TYPED_TEST(Dense, MovesToSparsityCsr32)
+template <typename ValueType, typename IndexType>
+void assert_sparsity_csr_eq_mtx4(
+    const gko::matrix::SparsityCsr<ValueType, IndexType>* sparsity_csr_mtx)
 {
-    using T = typename TestFixture::value_type;
-    using SparsityCsr = typename gko::matrix::SparsityCsr<T, gko::int32>;
-    auto sparsity_csr_mtx = SparsityCsr::create(this->mtx4->get_executor());
-
-    this->mtx4->move_to(sparsity_csr_mtx);
     auto v = sparsity_csr_mtx->get_const_value();
     auto c = sparsity_csr_mtx->get_const_col_idxs();
     auto r = sparsity_csr_mtx->get_const_row_ptrs();
@@ -1074,65 +926,41 @@ TYPED_TEST(Dense, MovesToSparsityCsr32)
     EXPECT_EQ(c[1], 1);
     EXPECT_EQ(c[2], 2);
     EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(v[0], T{1.0});
+    EXPECT_EQ(v[0], ValueType{1.0});
 }
 
 
-TYPED_TEST(Dense, ConvertsToSparsityCsr64)
+TYPED_TEST(DenseWithIndexType, ConvertsToSparsityCsr)
 {
-    using T = typename TestFixture::value_type;
-    using SparsityCsr = typename gko::matrix::SparsityCsr<T, gko::int64>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using SparsityCsr =
+        typename gko::matrix::SparsityCsr<value_type, index_type>;
     auto sparsity_csr_mtx = SparsityCsr::create(this->mtx4->get_executor());
 
     this->mtx4->convert_to(sparsity_csr_mtx);
-    auto v = sparsity_csr_mtx->get_const_value();
-    auto c = sparsity_csr_mtx->get_const_col_idxs();
-    auto r = sparsity_csr_mtx->get_const_row_ptrs();
 
-    ASSERT_EQ(sparsity_csr_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(sparsity_csr_mtx->get_num_nonzeros(), 4);
-    EXPECT_EQ(r[0], 0);
-    EXPECT_EQ(r[1], 3);
-    EXPECT_EQ(r[2], 4);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], 2);
-    EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(v[0], T{1.0});
+    assert_sparsity_csr_eq_mtx4(sparsity_csr_mtx.get());
 }
 
 
-TYPED_TEST(Dense, MovesToSparsityCsr64)
+TYPED_TEST(DenseWithIndexType, MovesToSparsityCsr)
 {
-    using T = typename TestFixture::value_type;
-    using SparsityCsr = typename gko::matrix::SparsityCsr<T, gko::int64>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using SparsityCsr =
+        typename gko::matrix::SparsityCsr<value_type, index_type>;
     auto sparsity_csr_mtx = SparsityCsr::create(this->mtx4->get_executor());
 
     this->mtx4->move_to(sparsity_csr_mtx);
-    auto v = sparsity_csr_mtx->get_const_value();
-    auto c = sparsity_csr_mtx->get_const_col_idxs();
-    auto r = sparsity_csr_mtx->get_const_row_ptrs();
 
-    ASSERT_EQ(sparsity_csr_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(sparsity_csr_mtx->get_num_nonzeros(), 4);
-    EXPECT_EQ(r[0], 0);
-    EXPECT_EQ(r[1], 3);
-    EXPECT_EQ(r[2], 4);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], 2);
-    EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(v[0], T{1.0});
+    assert_sparsity_csr_eq_mtx4(sparsity_csr_mtx.get());
 }
 
 
-TYPED_TEST(Dense, ConvertsToEll32)
+template <typename ValueType, typename IndexType>
+void assert_ell_eq_mtx6(const gko::matrix::Ell<ValueType, IndexType>* ell_mtx)
 {
-    using T = typename TestFixture::value_type;
-    using Ell = typename gko::matrix::Ell<T, gko::int32>;
-    auto ell_mtx = Ell::create(this->mtx6->get_executor());
-
-    this->mtx6->convert_to(ell_mtx);
     auto v = ell_mtx->get_const_values();
     auto c = ell_mtx->get_const_col_idxs();
 
@@ -1143,156 +971,99 @@ TYPED_TEST(Dense, ConvertsToEll32)
     EXPECT_EQ(c[0], 0);
     EXPECT_EQ(c[1], 1);
     EXPECT_EQ(c[2], 1);
-    EXPECT_EQ(c[3], this->invalid_index);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{1.5});
-    EXPECT_EQ(v[2], T{2.0});
-    EXPECT_EQ(v[3], T{0.0});
+    EXPECT_EQ(c[3], gko::invalid_index<IndexType>());
+    EXPECT_EQ(v[0], ValueType{1.0});
+    EXPECT_EQ(v[1], ValueType{1.5});
+    EXPECT_EQ(v[2], ValueType{2.0});
+    EXPECT_EQ(v[3], ValueType{0.0});
 }
 
 
-TYPED_TEST(Dense, MovesToEll32)
+TYPED_TEST(DenseWithIndexType, ConvertsToEll)
 {
-    using T = typename TestFixture::value_type;
-    using Ell = typename gko::matrix::Ell<T, gko::int32>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Ell = typename gko::matrix::Ell<value_type, index_type>;
     auto ell_mtx = Ell::create(this->mtx6->get_executor());
 
-    this->mtx6->move_to(ell_mtx);
-    auto v = ell_mtx->get_const_values();
-    auto c = ell_mtx->get_const_col_idxs();
+    this->mtx6->convert_to(ell_mtx);
 
-    ASSERT_EQ(ell_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(ell_mtx->get_num_stored_elements_per_row(), 2);
-    ASSERT_EQ(ell_mtx->get_num_stored_elements(), 4);
-    ASSERT_EQ(ell_mtx->get_stride(), 2);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], 1);
-    EXPECT_EQ(c[3], this->invalid_index);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{1.5});
-    EXPECT_EQ(v[2], T{2.0});
-    EXPECT_EQ(v[3], T{0.0});
+    assert_ell_eq_mtx6(ell_mtx.get());
 }
 
 
-TYPED_TEST(Dense, ConvertsToEll64)
+TYPED_TEST(DenseWithIndexType, MovesToEll)
 {
-    using T = typename TestFixture::value_type;
-    using Ell = typename gko::matrix::Ell<T, gko::int64>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Ell = typename gko::matrix::Ell<value_type, index_type>;
     auto ell_mtx = Ell::create(this->mtx6->get_executor());
 
-    this->mtx6->convert_to(ell_mtx);
-    auto v = ell_mtx->get_const_values();
-    auto c = ell_mtx->get_const_col_idxs();
+    this->mtx6->move_to(ell_mtx);
 
-    ASSERT_EQ(ell_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(ell_mtx->get_num_stored_elements_per_row(), 2);
-    ASSERT_EQ(ell_mtx->get_num_stored_elements(), 4);
-    ASSERT_EQ(ell_mtx->get_stride(), 2);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], 1);
-    EXPECT_EQ(c[3], this->invalid_index);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{1.5});
-    EXPECT_EQ(v[2], T{2.0});
-    EXPECT_EQ(v[3], T{0.0});
+    assert_ell_eq_mtx6(ell_mtx.get());
 }
 
 
-TYPED_TEST(Dense, MovesToEll64)
+template <typename ValueType, typename IndexType>
+void assert_strided_ell_eq_mtx6(
+    const gko::matrix::Ell<ValueType, IndexType>* ell_mtx)
 {
-    using T = typename TestFixture::value_type;
-    using Ell = typename gko::matrix::Ell<T, gko::int64>;
-    auto ell_mtx = Ell::create(this->mtx6->get_executor());
-
-    this->mtx6->move_to(ell_mtx);
+    constexpr auto invalid_index = gko::invalid_index<IndexType>();
     auto v = ell_mtx->get_const_values();
     auto c = ell_mtx->get_const_col_idxs();
 
     ASSERT_EQ(ell_mtx->get_size(), gko::dim<2>(2, 3));
     ASSERT_EQ(ell_mtx->get_num_stored_elements_per_row(), 2);
-    ASSERT_EQ(ell_mtx->get_num_stored_elements(), 4);
-    ASSERT_EQ(ell_mtx->get_stride(), 2);
+    ASSERT_EQ(ell_mtx->get_num_stored_elements(), 6);
+    ASSERT_EQ(ell_mtx->get_stride(), 3);
     EXPECT_EQ(c[0], 0);
     EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], 1);
-    EXPECT_EQ(c[3], this->invalid_index);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{1.5});
-    EXPECT_EQ(v[2], T{2.0});
-    EXPECT_EQ(v[3], T{0.0});
+    EXPECT_EQ(c[2], invalid_index);
+    EXPECT_EQ(c[3], 1);
+    EXPECT_EQ(c[4], invalid_index);
+    EXPECT_EQ(c[5], invalid_index);
+    EXPECT_EQ(v[0], ValueType{1.0});
+    EXPECT_EQ(v[1], ValueType{1.5});
+    EXPECT_EQ(v[2], ValueType{0.0});
+    EXPECT_EQ(v[3], ValueType{2.0});
+    EXPECT_EQ(v[4], ValueType{0.0});
+    EXPECT_EQ(v[5], ValueType{0.0});
 }
 
 
-TYPED_TEST(Dense, ConvertsToEllWithStride)
+TYPED_TEST(DenseWithIndexType, ConvertsToEllWithStride)
 {
-    using T = typename TestFixture::value_type;
-    using Ell = typename gko::matrix::Ell<T, gko::int32>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Ell = typename gko::matrix::Ell<value_type, index_type>;
     auto ell_mtx =
         Ell::create(this->mtx6->get_executor(), gko::dim<2>{2, 3}, 2, 3);
 
     this->mtx6->convert_to(ell_mtx);
-    auto v = ell_mtx->get_const_values();
-    auto c = ell_mtx->get_const_col_idxs();
 
-    ASSERT_EQ(ell_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(ell_mtx->get_num_stored_elements_per_row(), 2);
-    ASSERT_EQ(ell_mtx->get_num_stored_elements(), 6);
-    ASSERT_EQ(ell_mtx->get_stride(), 3);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], this->invalid_index);
-    EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(c[4], this->invalid_index);
-    EXPECT_EQ(c[5], this->invalid_index);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{1.5});
-    EXPECT_EQ(v[2], T{0.0});
-    EXPECT_EQ(v[3], T{2.0});
-    EXPECT_EQ(v[4], T{0.0});
-    EXPECT_EQ(v[5], T{0.0});
+    assert_strided_ell_eq_mtx6(ell_mtx.get());
 }
 
 
-TYPED_TEST(Dense, MovesToEllWithStride)
+TYPED_TEST(DenseWithIndexType, MovesToEllWithStride)
 {
-    using T = typename TestFixture::value_type;
-    using Ell = typename gko::matrix::Ell<T, gko::int32>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Ell = typename gko::matrix::Ell<value_type, index_type>;
     auto ell_mtx =
         Ell::create(this->mtx6->get_executor(), gko::dim<2>{2, 3}, 2, 3);
 
     this->mtx6->move_to(ell_mtx);
-    auto v = ell_mtx->get_const_values();
-    auto c = ell_mtx->get_const_col_idxs();
 
-    ASSERT_EQ(ell_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(ell_mtx->get_num_stored_elements_per_row(), 2);
-    ASSERT_EQ(ell_mtx->get_num_stored_elements(), 6);
-    ASSERT_EQ(ell_mtx->get_stride(), 3);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], this->invalid_index);
-    EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(c[4], this->invalid_index);
-    EXPECT_EQ(c[5], this->invalid_index);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{1.5});
-    EXPECT_EQ(v[2], T{0.0});
-    EXPECT_EQ(v[3], T{2.0});
-    EXPECT_EQ(v[4], T{0.0});
-    EXPECT_EQ(v[5], T{0.0});
+    assert_strided_ell_eq_mtx6(ell_mtx.get());
 }
 
 
-TYPED_TEST(Dense, MovesToHybridAutomatically32)
+template <typename ValueType, typename IndexType>
+void assert_hybrid_auto_eq_mtx4(
+    const gko::matrix::Hybrid<ValueType, IndexType>* hybrid_mtx)
 {
-    using T = typename TestFixture::value_type;
-    using Hybrid = typename gko::matrix::Hybrid<T, gko::int32>;
-    auto hybrid_mtx = Hybrid::create(this->mtx4->get_executor());
-
-    this->mtx4->move_to(hybrid_mtx);
     auto v = hybrid_mtx->get_const_coo_values();
     auto c = hybrid_mtx->get_const_coo_col_idxs();
     auto r = hybrid_mtx->get_const_coo_row_idxs();
@@ -1312,20 +1083,43 @@ TYPED_TEST(Dense, MovesToHybridAutomatically32)
     EXPECT_EQ(c[1], 1);
     EXPECT_EQ(c[2], 2);
     EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{3.0});
-    EXPECT_EQ(v[2], T{2.0});
-    EXPECT_EQ(v[3], T{5.0});
+    EXPECT_EQ(v[0], ValueType{1.0});
+    EXPECT_EQ(v[1], ValueType{3.0});
+    EXPECT_EQ(v[2], ValueType{2.0});
+    EXPECT_EQ(v[3], ValueType{5.0});
 }
 
 
-TYPED_TEST(Dense, ConvertsToHybridAutomatically32)
+TYPED_TEST(DenseWithIndexType, MovesToHybridAutomatically)
 {
-    using T = typename TestFixture::value_type;
-    using Hybrid = typename gko::matrix::Hybrid<T, gko::int32>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
+    auto hybrid_mtx = Hybrid::create(this->mtx4->get_executor());
+
+    this->mtx4->move_to(hybrid_mtx);
+
+    assert_hybrid_auto_eq_mtx4(hybrid_mtx.get());
+}
+
+
+TYPED_TEST(DenseWithIndexType, ConvertsToHybridAutomatically)
+{
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
     auto hybrid_mtx = Hybrid::create(this->mtx4->get_executor());
 
     this->mtx4->convert_to(hybrid_mtx);
+
+    assert_hybrid_auto_eq_mtx4(hybrid_mtx.get());
+}
+
+
+template <typename ValueType, typename IndexType>
+void assert_hybrid_strided_eq_mtx4(
+    const gko::matrix::Hybrid<ValueType, IndexType>* hybrid_mtx)
+{
     auto v = hybrid_mtx->get_const_coo_values();
     auto c = hybrid_mtx->get_const_coo_col_idxs();
     auto r = hybrid_mtx->get_const_coo_row_idxs();
@@ -1336,7 +1130,7 @@ TYPED_TEST(Dense, ConvertsToHybridAutomatically32)
     ASSERT_EQ(hybrid_mtx->get_ell_num_stored_elements(), 0);
     ASSERT_EQ(hybrid_mtx->get_coo_num_stored_elements(), 4);
     EXPECT_EQ(n, 0);
-    EXPECT_EQ(p, 2);
+    EXPECT_EQ(p, 3);
     EXPECT_EQ(r[0], 0);
     EXPECT_EQ(r[1], 0);
     EXPECT_EQ(r[2], 0);
@@ -1345,156 +1139,46 @@ TYPED_TEST(Dense, ConvertsToHybridAutomatically32)
     EXPECT_EQ(c[1], 1);
     EXPECT_EQ(c[2], 2);
     EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{3.0});
-    EXPECT_EQ(v[2], T{2.0});
-    EXPECT_EQ(v[3], T{5.0});
+    EXPECT_EQ(v[0], ValueType{1.0});
+    EXPECT_EQ(v[1], ValueType{3.0});
+    EXPECT_EQ(v[2], ValueType{2.0});
+    EXPECT_EQ(v[3], ValueType{5.0});
 }
 
 
-TYPED_TEST(Dense, MovesToHybridAutomatically64)
+TYPED_TEST(DenseWithIndexType, MovesToHybridWithStrideAutomatically)
 {
-    using T = typename TestFixture::value_type;
-    using Hybrid = typename gko::matrix::Hybrid<T, gko::int64>;
-    auto hybrid_mtx = Hybrid::create(this->mtx4->get_executor());
-
-    this->mtx4->move_to(hybrid_mtx);
-    auto v = hybrid_mtx->get_const_coo_values();
-    auto c = hybrid_mtx->get_const_coo_col_idxs();
-    auto r = hybrid_mtx->get_const_coo_row_idxs();
-    auto n = hybrid_mtx->get_ell_num_stored_elements_per_row();
-    auto p = hybrid_mtx->get_ell_stride();
-
-    ASSERT_EQ(hybrid_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(hybrid_mtx->get_ell_num_stored_elements(), 0);
-    ASSERT_EQ(hybrid_mtx->get_coo_num_stored_elements(), 4);
-    EXPECT_EQ(n, 0);
-    EXPECT_EQ(p, 2);
-    EXPECT_EQ(r[0], 0);
-    EXPECT_EQ(r[1], 0);
-    EXPECT_EQ(r[2], 0);
-    EXPECT_EQ(r[3], 1);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], 2);
-    EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{3.0});
-    EXPECT_EQ(v[2], T{2.0});
-    EXPECT_EQ(v[3], T{5.0});
-}
-
-
-TYPED_TEST(Dense, ConvertsToHybridAutomatically64)
-{
-    using T = typename TestFixture::value_type;
-    using Hybrid = typename gko::matrix::Hybrid<T, gko::int64>;
-    auto hybrid_mtx = Hybrid::create(this->mtx4->get_executor());
-
-    this->mtx4->convert_to(hybrid_mtx);
-    auto v = hybrid_mtx->get_const_coo_values();
-    auto c = hybrid_mtx->get_const_coo_col_idxs();
-    auto r = hybrid_mtx->get_const_coo_row_idxs();
-    auto n = hybrid_mtx->get_ell_num_stored_elements_per_row();
-    auto p = hybrid_mtx->get_ell_stride();
-
-    ASSERT_EQ(hybrid_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(hybrid_mtx->get_ell_num_stored_elements(), 0);
-    ASSERT_EQ(hybrid_mtx->get_coo_num_stored_elements(), 4);
-    EXPECT_EQ(n, 0);
-    EXPECT_EQ(p, 2);
-    EXPECT_EQ(r[0], 0);
-    EXPECT_EQ(r[1], 0);
-    EXPECT_EQ(r[2], 0);
-    EXPECT_EQ(r[3], 1);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], 2);
-    EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{3.0});
-    EXPECT_EQ(v[2], T{2.0});
-    EXPECT_EQ(v[3], T{5.0});
-}
-
-
-TYPED_TEST(Dense, MovesToHybridWithStrideAutomatically)
-{
-    using T = typename TestFixture::value_type;
-    using Hybrid = typename gko::matrix::Hybrid<T, gko::int32>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
     auto hybrid_mtx =
         Hybrid::create(this->mtx4->get_executor(), gko::dim<2>{2, 3}, 0, 3);
 
     this->mtx4->move_to(hybrid_mtx);
-    auto v = hybrid_mtx->get_const_coo_values();
-    auto c = hybrid_mtx->get_const_coo_col_idxs();
-    auto r = hybrid_mtx->get_const_coo_row_idxs();
-    auto n = hybrid_mtx->get_ell_num_stored_elements_per_row();
-    auto p = hybrid_mtx->get_ell_stride();
 
-    ASSERT_EQ(hybrid_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(hybrid_mtx->get_ell_num_stored_elements(), 0);
-    ASSERT_EQ(hybrid_mtx->get_coo_num_stored_elements(), 4);
-    EXPECT_EQ(n, 0);
-    EXPECT_EQ(p, 3);
-    EXPECT_EQ(r[0], 0);
-    EXPECT_EQ(r[1], 0);
-    EXPECT_EQ(r[2], 0);
-    EXPECT_EQ(r[3], 1);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], 2);
-    EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{3.0});
-    EXPECT_EQ(v[2], T{2.0});
-    EXPECT_EQ(v[3], T{5.0});
+    assert_hybrid_strided_eq_mtx4(hybrid_mtx.get());
 }
 
 
-TYPED_TEST(Dense, ConvertsToHybridWithStrideAutomatically)
+TYPED_TEST(DenseWithIndexType, ConvertsToHybridWithStrideAutomatically)
 {
-    using T = typename TestFixture::value_type;
-    using Hybrid = typename gko::matrix::Hybrid<T, gko::int32>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
     auto hybrid_mtx =
         Hybrid::create(this->mtx4->get_executor(), gko::dim<2>{2, 3}, 0, 3);
 
     this->mtx4->convert_to(hybrid_mtx);
-    auto v = hybrid_mtx->get_const_coo_values();
-    auto c = hybrid_mtx->get_const_coo_col_idxs();
-    auto r = hybrid_mtx->get_const_coo_row_idxs();
-    auto n = hybrid_mtx->get_ell_num_stored_elements_per_row();
-    auto p = hybrid_mtx->get_ell_stride();
 
-    ASSERT_EQ(hybrid_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(hybrid_mtx->get_ell_num_stored_elements(), 0);
-    ASSERT_EQ(hybrid_mtx->get_coo_num_stored_elements(), 4);
-    EXPECT_EQ(n, 0);
-    EXPECT_EQ(p, 3);
-    EXPECT_EQ(r[0], 0);
-    EXPECT_EQ(r[1], 0);
-    EXPECT_EQ(r[2], 0);
-    EXPECT_EQ(r[3], 1);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], 2);
-    EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{3.0});
-    EXPECT_EQ(v[2], T{2.0});
-    EXPECT_EQ(v[3], T{5.0});
+    assert_hybrid_strided_eq_mtx4(hybrid_mtx.get());
 }
 
 
-TYPED_TEST(Dense, MovesToHybridWithStrideAndCooLengthByColumns2)
+template <typename ValueType, typename IndexType>
+void assert_hybrid_limited_eq_mtx4(
+    const gko::matrix::Hybrid<ValueType, IndexType>* hybrid_mtx)
 {
-    using T = typename TestFixture::value_type;
-    using Hybrid = typename gko::matrix::Hybrid<T, gko::int32>;
-    auto hybrid_mtx =
-        Hybrid::create(this->mtx4->get_executor(), gko::dim<2>{2, 3}, 2, 3, 3,
-                       std::make_shared<typename Hybrid::column_limit>(2));
-
-    this->mtx4->move_to(hybrid_mtx);
+    constexpr auto invalid_index = gko::invalid_index<IndexType>();
     auto v = hybrid_mtx->get_const_ell_values();
     auto c = hybrid_mtx->get_const_ell_col_idxs();
     auto n = hybrid_mtx->get_ell_num_stored_elements_per_row();
@@ -1507,68 +1191,56 @@ TYPED_TEST(Dense, MovesToHybridWithStrideAndCooLengthByColumns2)
     EXPECT_EQ(p, 3);
     EXPECT_EQ(c[0], 0);
     EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], this->invalid_index);
+    EXPECT_EQ(c[2], invalid_index);
     EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(c[4], this->invalid_index);
-    EXPECT_EQ(c[5], this->invalid_index);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{5.0});
-    EXPECT_EQ(v[2], T{0.0});
-    EXPECT_EQ(v[3], T{3.0});
-    EXPECT_EQ(v[4], T{0.0});
-    EXPECT_EQ(v[5], T{0.0});
-    EXPECT_EQ(hybrid_mtx->get_const_coo_values()[0], T{2.0});
+    EXPECT_EQ(c[4], invalid_index);
+    EXPECT_EQ(c[5], invalid_index);
+    EXPECT_EQ(v[0], ValueType{1.0});
+    EXPECT_EQ(v[1], ValueType{5.0});
+    EXPECT_EQ(v[2], ValueType{0.0});
+    EXPECT_EQ(v[3], ValueType{3.0});
+    EXPECT_EQ(v[4], ValueType{0.0});
+    EXPECT_EQ(v[5], ValueType{0.0});
+    EXPECT_EQ(hybrid_mtx->get_const_coo_values()[0], ValueType{2.0});
     EXPECT_EQ(hybrid_mtx->get_const_coo_row_idxs()[0], 0);
     EXPECT_EQ(hybrid_mtx->get_const_coo_col_idxs()[0], 2);
 }
 
 
-TYPED_TEST(Dense, ConvertsToHybridWithStrideAndCooLengthByColumns2)
+TYPED_TEST(DenseWithIndexType, MovesToHybridWithStrideAndCooLengthByColumns2)
 {
-    using T = typename TestFixture::value_type;
-    using Hybrid = typename gko::matrix::Hybrid<T, gko::int32>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
     auto hybrid_mtx =
         Hybrid::create(this->mtx4->get_executor(), gko::dim<2>{2, 3}, 2, 3, 3,
                        std::make_shared<typename Hybrid::column_limit>(2));
 
-    this->mtx4->convert_to(hybrid_mtx);
-    auto v = hybrid_mtx->get_const_ell_values();
-    auto c = hybrid_mtx->get_const_ell_col_idxs();
-    auto n = hybrid_mtx->get_ell_num_stored_elements_per_row();
-    auto p = hybrid_mtx->get_ell_stride();
+    this->mtx4->move_to(hybrid_mtx);
 
-    ASSERT_EQ(hybrid_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(hybrid_mtx->get_ell_num_stored_elements(), 6);
-    ASSERT_EQ(hybrid_mtx->get_coo_num_stored_elements(), 1);
-    EXPECT_EQ(n, 2);
-    EXPECT_EQ(p, 3);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], this->invalid_index);
-    EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(c[4], this->invalid_index);
-    EXPECT_EQ(c[5], this->invalid_index);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{5.0});
-    EXPECT_EQ(v[2], T{0.0});
-    EXPECT_EQ(v[3], T{3.0});
-    EXPECT_EQ(v[4], T{0.0});
-    EXPECT_EQ(v[5], T{0.0});
-    EXPECT_EQ(hybrid_mtx->get_const_coo_row_idxs()[0], 0);
-    EXPECT_EQ(hybrid_mtx->get_const_coo_col_idxs()[0], 2);
-    EXPECT_EQ(hybrid_mtx->get_const_coo_values()[0], T{2.0});
+    assert_hybrid_limited_eq_mtx4(hybrid_mtx.get());
 }
 
 
-TYPED_TEST(Dense, MovesToHybridWithStrideByPercent40)
+TYPED_TEST(DenseWithIndexType, ConvertsToHybridWithStrideAndCooLengthByColumns2)
 {
-    using T = typename TestFixture::value_type;
-    using Hybrid = typename gko::matrix::Hybrid<T, gko::int32>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
     auto hybrid_mtx =
-        Hybrid::create(this->mtx4->get_executor(), gko::dim<2>{2, 3}, 1, 3,
-                       std::make_shared<typename Hybrid::imbalance_limit>(0.4));
+        Hybrid::create(this->mtx4->get_executor(), gko::dim<2>{2, 3}, 2, 3, 3,
+                       std::make_shared<typename Hybrid::column_limit>(2));
 
-    this->mtx4->move_to(hybrid_mtx);
+    this->mtx4->convert_to(hybrid_mtx);
+
+    assert_hybrid_limited_eq_mtx4(hybrid_mtx.get());
+}
+
+
+template <typename ValueType, typename IndexType>
+void assert_hybrid_percent_eq_mtx4(
+    const gko::matrix::Hybrid<ValueType, IndexType>* hybrid_mtx)
+{
     auto v = hybrid_mtx->get_const_ell_values();
     auto c = hybrid_mtx->get_const_ell_col_idxs();
     auto n = hybrid_mtx->get_ell_num_stored_elements_per_row();
@@ -1583,13 +1255,13 @@ TYPED_TEST(Dense, MovesToHybridWithStrideByPercent40)
     EXPECT_EQ(p, 3);
     EXPECT_EQ(c[0], 0);
     EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], this->invalid_index);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{5.0});
-    EXPECT_EQ(v[2], T{0.0});
+    EXPECT_EQ(c[2], gko::invalid_index<IndexType>());
+    EXPECT_EQ(v[0], ValueType{1.0});
+    EXPECT_EQ(v[1], ValueType{5.0});
+    EXPECT_EQ(v[2], ValueType{0.0});
     ASSERT_EQ(hybrid_mtx->get_coo_num_stored_elements(), 2);
-    EXPECT_EQ(coo_v[0], T{3.0});
-    EXPECT_EQ(coo_v[1], T{2.0});
+    EXPECT_EQ(coo_v[0], ValueType{3.0});
+    EXPECT_EQ(coo_v[1], ValueType{2.0});
     EXPECT_EQ(coo_c[0], 1);
     EXPECT_EQ(coo_c[1], 2);
     EXPECT_EQ(coo_r[0], 0);
@@ -1597,87 +1269,41 @@ TYPED_TEST(Dense, MovesToHybridWithStrideByPercent40)
 }
 
 
-TYPED_TEST(Dense, ConvertsToHybridWithStrideByPercent40)
+TYPED_TEST(DenseWithIndexType, MovesToHybridWithStrideByPercent40)
 {
-    using T = typename TestFixture::value_type;
-    using Hybrid = typename gko::matrix::Hybrid<T, gko::int32>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
     auto hybrid_mtx =
         Hybrid::create(this->mtx4->get_executor(), gko::dim<2>{2, 3}, 1, 3,
                        std::make_shared<typename Hybrid::imbalance_limit>(0.4));
 
-    this->mtx4->convert_to(hybrid_mtx);
-    auto v = hybrid_mtx->get_const_ell_values();
-    auto c = hybrid_mtx->get_const_ell_col_idxs();
-    auto n = hybrid_mtx->get_ell_num_stored_elements_per_row();
-    auto p = hybrid_mtx->get_ell_stride();
-    auto coo_v = hybrid_mtx->get_const_coo_values();
-    auto coo_c = hybrid_mtx->get_const_coo_col_idxs();
-    auto coo_r = hybrid_mtx->get_const_coo_row_idxs();
+    this->mtx4->move_to(hybrid_mtx);
 
-    ASSERT_EQ(hybrid_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(hybrid_mtx->get_ell_num_stored_elements(), 3);
-    EXPECT_EQ(n, 1);
-    EXPECT_EQ(p, 3);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], this->invalid_index);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{5.0});
-    EXPECT_EQ(v[2], T{0.0});
-    ASSERT_EQ(hybrid_mtx->get_coo_num_stored_elements(), 2);
-    EXPECT_EQ(coo_v[0], T{3.0});
-    EXPECT_EQ(coo_v[1], T{2.0});
-    EXPECT_EQ(coo_c[0], 1);
-    EXPECT_EQ(coo_c[1], 2);
-    EXPECT_EQ(coo_r[0], 0);
-    EXPECT_EQ(coo_r[1], 0);
+    assert_hybrid_percent_eq_mtx4(hybrid_mtx.get());
 }
 
 
-TYPED_TEST(Dense, ConvertsToSellp32)
+TYPED_TEST(DenseWithIndexType, ConvertsToHybridWithStrideByPercent40)
 {
-    using T = typename TestFixture::value_type;
-    using Sellp = typename gko::matrix::Sellp<T, gko::int32>;
-    auto sellp_mtx = Sellp::create(this->mtx7->get_executor());
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
+    auto hybrid_mtx =
+        Hybrid::create(this->mtx4->get_executor(), gko::dim<2>{2, 3}, 1, 3,
+                       std::make_shared<typename Hybrid::imbalance_limit>(0.4));
 
-    this->mtx7->convert_to(sellp_mtx);
-    auto v = sellp_mtx->get_const_values();
-    auto c = sellp_mtx->get_const_col_idxs();
-    auto s = sellp_mtx->get_const_slice_sets();
-    auto l = sellp_mtx->get_const_slice_lengths();
+    this->mtx4->convert_to(hybrid_mtx);
 
-    ASSERT_EQ(sellp_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(sellp_mtx->get_total_cols(), 3);
-    ASSERT_EQ(sellp_mtx->get_num_stored_elements(),
-              3 * gko::matrix::default_slice_size);
-    ASSERT_EQ(sellp_mtx->get_slice_size(), gko::matrix::default_slice_size);
-    ASSERT_EQ(sellp_mtx->get_stride_factor(),
-              gko::matrix::default_stride_factor);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[gko::matrix::default_slice_size], 1);
-    EXPECT_EQ(c[gko::matrix::default_slice_size + 1], this->invalid_index);
-    EXPECT_EQ(c[2 * gko::matrix::default_slice_size], 2);
-    EXPECT_EQ(c[2 * gko::matrix::default_slice_size + 1], this->invalid_index);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{1.5});
-    EXPECT_EQ(v[gko::matrix::default_slice_size], T{2.0});
-    EXPECT_EQ(v[gko::matrix::default_slice_size + 1], T{0.0});
-    EXPECT_EQ(v[2 * gko::matrix::default_slice_size], T{3.0});
-    EXPECT_EQ(v[2 * gko::matrix::default_slice_size + 1], T{0.0});
-    EXPECT_EQ(s[0], 0);
-    EXPECT_EQ(s[1], 3);
-    EXPECT_EQ(l[0], 3);
+    assert_hybrid_percent_eq_mtx4(hybrid_mtx.get());
 }
 
 
-TYPED_TEST(Dense, MovesToSellp32)
+template <typename ValueType, typename IndexType>
+void assert_sellp_eq_mtx7(
+    const gko::matrix::Sellp<ValueType, IndexType>* sellp_mtx)
 {
-    using T = typename TestFixture::value_type;
-    using Sellp = typename gko::matrix::Sellp<T, gko::int32>;
-    auto sellp_mtx = Sellp::create(this->mtx7->get_executor());
-
-    this->mtx7->move_to(sellp_mtx);
+    constexpr auto invalid_index = gko::invalid_index<IndexType>();
     auto v = sellp_mtx->get_const_values();
     auto c = sellp_mtx->get_const_col_idxs();
     auto s = sellp_mtx->get_const_slice_sets();
@@ -1693,103 +1319,52 @@ TYPED_TEST(Dense, MovesToSellp32)
     EXPECT_EQ(c[0], 0);
     EXPECT_EQ(c[1], 1);
     EXPECT_EQ(c[gko::matrix::default_slice_size], 1);
-    EXPECT_EQ(c[gko::matrix::default_slice_size + 1], this->invalid_index);
+    EXPECT_EQ(c[gko::matrix::default_slice_size + 1], invalid_index);
     EXPECT_EQ(c[2 * gko::matrix::default_slice_size], 2);
-    EXPECT_EQ(c[2 * gko::matrix::default_slice_size + 1], this->invalid_index);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{1.5});
-    EXPECT_EQ(v[gko::matrix::default_slice_size], T{2.0});
-    EXPECT_EQ(v[gko::matrix::default_slice_size + 1], T{0.0});
-    EXPECT_EQ(v[2 * gko::matrix::default_slice_size], T{3.0});
-    EXPECT_EQ(v[2 * gko::matrix::default_slice_size + 1], T{0.0});
+    EXPECT_EQ(c[2 * gko::matrix::default_slice_size + 1], invalid_index);
+    EXPECT_EQ(v[0], ValueType{1.0});
+    EXPECT_EQ(v[1], ValueType{1.5});
+    EXPECT_EQ(v[gko::matrix::default_slice_size], ValueType{2.0});
+    EXPECT_EQ(v[gko::matrix::default_slice_size + 1], ValueType{0.0});
+    EXPECT_EQ(v[2 * gko::matrix::default_slice_size], ValueType{3.0});
+    EXPECT_EQ(v[2 * gko::matrix::default_slice_size + 1], ValueType{0.0});
     EXPECT_EQ(s[0], 0);
     EXPECT_EQ(s[1], 3);
     EXPECT_EQ(l[0], 3);
 }
 
 
-TYPED_TEST(Dense, ConvertsToSellp64)
+TYPED_TEST(DenseWithIndexType, ConvertsToSellp)
 {
-    using T = typename TestFixture::value_type;
-    using Sellp = typename gko::matrix::Sellp<T, gko::int64>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Sellp = typename gko::matrix::Sellp<value_type, index_type>;
     auto sellp_mtx = Sellp::create(this->mtx7->get_executor());
 
     this->mtx7->convert_to(sellp_mtx);
-    auto v = sellp_mtx->get_const_values();
-    auto c = sellp_mtx->get_const_col_idxs();
-    auto s = sellp_mtx->get_const_slice_sets();
-    auto l = sellp_mtx->get_const_slice_lengths();
 
-    ASSERT_EQ(sellp_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(sellp_mtx->get_total_cols(), 3);
-    ASSERT_EQ(sellp_mtx->get_num_stored_elements(),
-              3 * gko::matrix::default_slice_size);
-    ASSERT_EQ(sellp_mtx->get_slice_size(), gko::matrix::default_slice_size);
-    ASSERT_EQ(sellp_mtx->get_stride_factor(),
-              gko::matrix::default_stride_factor);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[gko::matrix::default_slice_size], 1);
-    EXPECT_EQ(c[gko::matrix::default_slice_size + 1], this->invalid_index);
-    EXPECT_EQ(c[2 * gko::matrix::default_slice_size], 2);
-    EXPECT_EQ(c[2 * gko::matrix::default_slice_size + 1], this->invalid_index);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{1.5});
-    EXPECT_EQ(v[gko::matrix::default_slice_size], T{2.0});
-    EXPECT_EQ(v[gko::matrix::default_slice_size + 1], T{0.0});
-    EXPECT_EQ(v[2 * gko::matrix::default_slice_size], T{3.0});
-    EXPECT_EQ(v[2 * gko::matrix::default_slice_size + 1], T{0.0});
-    EXPECT_EQ(s[0], 0);
-    EXPECT_EQ(s[1], 3);
-    EXPECT_EQ(l[0], 3);
+    assert_sellp_eq_mtx7(sellp_mtx.get());
 }
 
 
-TYPED_TEST(Dense, MovesToSellp64)
+TYPED_TEST(DenseWithIndexType, MovesToSellp)
 {
-    using T = typename TestFixture::value_type;
-    using Sellp = typename gko::matrix::Sellp<T, gko::int64>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Sellp = typename gko::matrix::Sellp<value_type, index_type>;
     auto sellp_mtx = Sellp::create(this->mtx7->get_executor());
 
     this->mtx7->move_to(sellp_mtx);
-    auto v = sellp_mtx->get_const_values();
-    auto c = sellp_mtx->get_const_col_idxs();
-    auto s = sellp_mtx->get_const_slice_sets();
-    auto l = sellp_mtx->get_const_slice_lengths();
 
-    ASSERT_EQ(sellp_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(sellp_mtx->get_total_cols(), 3);
-    ASSERT_EQ(sellp_mtx->get_num_stored_elements(),
-              3 * gko::matrix::default_slice_size);
-    ASSERT_EQ(sellp_mtx->get_slice_size(), gko::matrix::default_slice_size);
-    ASSERT_EQ(sellp_mtx->get_stride_factor(),
-              gko::matrix::default_stride_factor);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[gko::matrix::default_slice_size], 1);
-    EXPECT_EQ(c[gko::matrix::default_slice_size + 1], this->invalid_index);
-    EXPECT_EQ(c[2 * gko::matrix::default_slice_size], 2);
-    EXPECT_EQ(c[2 * gko::matrix::default_slice_size + 1], this->invalid_index);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{1.5});
-    EXPECT_EQ(v[gko::matrix::default_slice_size], T{2.0});
-    EXPECT_EQ(v[gko::matrix::default_slice_size + 1], T{0.0});
-    EXPECT_EQ(v[2 * gko::matrix::default_slice_size], T{3.0});
-    EXPECT_EQ(v[2 * gko::matrix::default_slice_size + 1], T{0.0});
-    EXPECT_EQ(s[0], 0);
-    EXPECT_EQ(s[1], 3);
-    EXPECT_EQ(l[0], 3);
+    assert_sellp_eq_mtx7(sellp_mtx.get());
 }
 
 
-TYPED_TEST(Dense, ConvertsToSellpWithSliceSizeAndStrideFactor)
+template <typename ValueType, typename IndexType>
+void assert_sellp_strided_eq_mtx7(
+    const gko::matrix::Sellp<ValueType, IndexType>* sellp_mtx)
 {
-    using T = typename TestFixture::value_type;
-    using Sellp = typename gko::matrix::Sellp<T, gko::int32>;
-    auto sellp_mtx =
-        Sellp::create(this->mtx7->get_executor(), gko::dim<2>{}, 2, 2, 0);
-
-    this->mtx7->convert_to(sellp_mtx);
+    constexpr auto invalid_index = gko::invalid_index<IndexType>();
     auto v = sellp_mtx->get_const_values();
     auto c = sellp_mtx->get_const_col_idxs();
     auto s = sellp_mtx->get_const_slice_sets();
@@ -1803,70 +1378,59 @@ TYPED_TEST(Dense, ConvertsToSellpWithSliceSizeAndStrideFactor)
     EXPECT_EQ(c[0], 0);
     EXPECT_EQ(c[1], 1);
     EXPECT_EQ(c[2], 1);
-    EXPECT_EQ(c[3], this->invalid_index);
+    EXPECT_EQ(c[3], invalid_index);
     EXPECT_EQ(c[4], 2);
-    EXPECT_EQ(c[5], this->invalid_index);
-    EXPECT_EQ(c[6], this->invalid_index);
-    EXPECT_EQ(c[7], this->invalid_index);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{1.5});
-    EXPECT_EQ(v[2], T{2.0});
-    EXPECT_EQ(v[3], T{0.0});
-    EXPECT_EQ(v[4], T{3.0});
-    EXPECT_EQ(v[5], T{0.0});
-    EXPECT_EQ(v[6], T{0.0});
-    EXPECT_EQ(v[7], T{0.0});
+    EXPECT_EQ(c[5], invalid_index);
+    EXPECT_EQ(c[6], invalid_index);
+    EXPECT_EQ(c[7], invalid_index);
+    EXPECT_EQ(v[0], ValueType{1.0});
+    EXPECT_EQ(v[1], ValueType{1.5});
+    EXPECT_EQ(v[2], ValueType{2.0});
+    EXPECT_EQ(v[3], ValueType{0.0});
+    EXPECT_EQ(v[4], ValueType{3.0});
+    EXPECT_EQ(v[5], ValueType{0.0});
+    EXPECT_EQ(v[6], ValueType{0.0});
+    EXPECT_EQ(v[7], ValueType{0.0});
     EXPECT_EQ(s[0], 0);
     EXPECT_EQ(s[1], 4);
     EXPECT_EQ(l[0], 4);
 }
 
 
-TYPED_TEST(Dense, MovesToSellpWithSliceSizeAndStrideFactor)
+TYPED_TEST(DenseWithIndexType, ConvertsToSellpWithSliceSizeAndStrideFactor)
 {
-    using T = typename TestFixture::value_type;
-    using Sellp = typename gko::matrix::Sellp<T, gko::int32>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Sellp = typename gko::matrix::Sellp<value_type, index_type>;
+    auto sellp_mtx =
+        Sellp::create(this->mtx7->get_executor(), gko::dim<2>{}, 2, 2, 0);
+
+    this->mtx7->convert_to(sellp_mtx);
+
+    assert_sellp_strided_eq_mtx7(sellp_mtx.get());
+}
+
+
+TYPED_TEST(DenseWithIndexType, MovesToSellpWithSliceSizeAndStrideFactor)
+{
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Sellp = typename gko::matrix::Sellp<value_type, index_type>;
     auto sellp_mtx =
         Sellp::create(this->mtx7->get_executor(), gko::dim<2>{}, 2, 2, 0);
 
     this->mtx7->move_to(sellp_mtx);
-    auto v = sellp_mtx->get_const_values();
-    auto c = sellp_mtx->get_const_col_idxs();
-    auto s = sellp_mtx->get_const_slice_sets();
-    auto l = sellp_mtx->get_const_slice_lengths();
 
-    ASSERT_EQ(sellp_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(sellp_mtx->get_total_cols(), 4);
-    ASSERT_EQ(sellp_mtx->get_num_stored_elements(), 8);
-    ASSERT_EQ(sellp_mtx->get_slice_size(), 2);
-    ASSERT_EQ(sellp_mtx->get_stride_factor(), 2);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], 1);
-    EXPECT_EQ(c[3], this->invalid_index);
-    EXPECT_EQ(c[4], 2);
-    EXPECT_EQ(c[5], this->invalid_index);
-    EXPECT_EQ(c[6], this->invalid_index);
-    EXPECT_EQ(c[7], this->invalid_index);
-    EXPECT_EQ(v[0], T{1.0});
-    EXPECT_EQ(v[1], T{1.5});
-    EXPECT_EQ(v[2], T{2.0});
-    EXPECT_EQ(v[3], T{0.0});
-    EXPECT_EQ(v[4], T{3.0});
-    EXPECT_EQ(v[5], T{0.0});
-    EXPECT_EQ(v[6], T{0.0});
-    EXPECT_EQ(v[7], T{0.0});
-    EXPECT_EQ(s[0], 0);
-    EXPECT_EQ(s[1], 4);
-    EXPECT_EQ(l[0], 4);
+    assert_sellp_strided_eq_mtx7(sellp_mtx.get());
 }
 
 
-TYPED_TEST(Dense, ConvertsToAndFromSellpWithMoreThanOneSlice)
+TYPED_TEST(DenseWithIndexType, ConvertsToAndFromSellpWithMoreThanOneSlice)
 {
-    using T = typename TestFixture::value_type;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     using Mtx = typename TestFixture::Mtx;
-    using Sellp = typename gko::matrix::Sellp<T, gko::int32>;
+    using Sellp = typename gko::matrix::Sellp<value_type, index_type>;
     auto x = this->template gen_mtx<Mtx>(65, 25);
 
     auto sellp_mtx = Sellp::create(this->exec);
@@ -1908,11 +1472,12 @@ TYPED_TEST(Dense, MovesEmptyToPrecision)
 }
 
 
-TYPED_TEST(Dense, ConvertsEmptyToCoo)
+TYPED_TEST(DenseWithIndexType, ConvertsEmptyToCoo)
 {
     using Dense = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    using Coo = typename gko::matrix::Coo<T, gko::int32>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Coo = typename gko::matrix::Coo<value_type, index_type>;
     auto empty = Dense::create(this->exec);
     auto res = Coo::create(this->exec);
 
@@ -1923,11 +1488,12 @@ TYPED_TEST(Dense, ConvertsEmptyToCoo)
 }
 
 
-TYPED_TEST(Dense, MovesEmptyToCoo)
+TYPED_TEST(DenseWithIndexType, MovesEmptyToCoo)
 {
     using Dense = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    using Coo = typename gko::matrix::Coo<T, gko::int32>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Coo = typename gko::matrix::Coo<value_type, index_type>;
     auto empty = Dense::create(this->exec);
     auto res = Coo::create(this->exec);
 
@@ -1938,11 +1504,12 @@ TYPED_TEST(Dense, MovesEmptyToCoo)
 }
 
 
-TYPED_TEST(Dense, ConvertsEmptyMatrixToCsr)
+TYPED_TEST(DenseWithIndexType, ConvertsEmptyMatrixToCsr)
 {
     using Dense = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    using Csr = typename gko::matrix::Csr<T, gko::int32>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Csr = typename gko::matrix::Csr<value_type, index_type>;
     auto empty = Dense::create(this->exec);
     auto res = Csr::create(this->exec);
 
@@ -1954,11 +1521,12 @@ TYPED_TEST(Dense, ConvertsEmptyMatrixToCsr)
 }
 
 
-TYPED_TEST(Dense, MovesEmptyMatrixToCsr)
+TYPED_TEST(DenseWithIndexType, MovesEmptyMatrixToCsr)
 {
     using Dense = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    using Csr = typename gko::matrix::Csr<T, gko::int32>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Csr = typename gko::matrix::Csr<value_type, index_type>;
     auto empty = Dense::create(this->exec);
     auto res = Csr::create(this->exec);
 
@@ -1970,11 +1538,13 @@ TYPED_TEST(Dense, MovesEmptyMatrixToCsr)
 }
 
 
-TYPED_TEST(Dense, ConvertsEmptyToSparsityCsr)
+TYPED_TEST(DenseWithIndexType, ConvertsEmptyToSparsityCsr)
 {
     using Dense = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    using SparsityCsr = typename gko::matrix::SparsityCsr<T, gko::int32>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using SparsityCsr =
+        typename gko::matrix::SparsityCsr<value_type, index_type>;
     auto empty = Dense::create(this->exec);
     auto res = SparsityCsr::create(this->exec);
 
@@ -1986,11 +1556,13 @@ TYPED_TEST(Dense, ConvertsEmptyToSparsityCsr)
 }
 
 
-TYPED_TEST(Dense, MovesEmptyToSparsityCsr)
+TYPED_TEST(DenseWithIndexType, MovesEmptyToSparsityCsr)
 {
     using Dense = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    using SparsityCsr = typename gko::matrix::SparsityCsr<T, gko::int32>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using SparsityCsr =
+        typename gko::matrix::SparsityCsr<value_type, index_type>;
     auto empty = Dense::create(this->exec);
     auto res = SparsityCsr::create(this->exec);
 
@@ -2002,11 +1574,12 @@ TYPED_TEST(Dense, MovesEmptyToSparsityCsr)
 }
 
 
-TYPED_TEST(Dense, ConvertsEmptyToEll)
+TYPED_TEST(DenseWithIndexType, ConvertsEmptyToEll)
 {
     using Dense = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    using Ell = typename gko::matrix::Ell<T, gko::int32>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Ell = typename gko::matrix::Ell<value_type, index_type>;
     auto empty = Dense::create(this->exec);
     auto res = Ell::create(this->exec);
 
@@ -2017,11 +1590,12 @@ TYPED_TEST(Dense, ConvertsEmptyToEll)
 }
 
 
-TYPED_TEST(Dense, MovesEmptyToEll)
+TYPED_TEST(DenseWithIndexType, MovesEmptyToEll)
 {
     using Dense = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    using Ell = typename gko::matrix::Ell<T, gko::int32>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Ell = typename gko::matrix::Ell<value_type, index_type>;
     auto empty = Dense::create(this->exec);
     auto res = Ell::create(this->exec);
 
@@ -2032,11 +1606,12 @@ TYPED_TEST(Dense, MovesEmptyToEll)
 }
 
 
-TYPED_TEST(Dense, ConvertsEmptyToHybrid)
+TYPED_TEST(DenseWithIndexType, ConvertsEmptyToHybrid)
 {
     using Dense = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    using Hybrid = typename gko::matrix::Hybrid<T, gko::int32>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
     auto empty = Dense::create(this->exec);
     auto res = Hybrid::create(this->exec);
 
@@ -2047,11 +1622,12 @@ TYPED_TEST(Dense, ConvertsEmptyToHybrid)
 }
 
 
-TYPED_TEST(Dense, MovesEmptyToHybrid)
+TYPED_TEST(DenseWithIndexType, MovesEmptyToHybrid)
 {
     using Dense = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    using Hybrid = typename gko::matrix::Hybrid<T, gko::int32>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
     auto empty = Dense::create(this->exec);
     auto res = Hybrid::create(this->exec);
 
@@ -2062,11 +1638,12 @@ TYPED_TEST(Dense, MovesEmptyToHybrid)
 }
 
 
-TYPED_TEST(Dense, ConvertsEmptyToSellp)
+TYPED_TEST(DenseWithIndexType, ConvertsEmptyToSellp)
 {
     using Dense = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    using Sellp = typename gko::matrix::Sellp<T, gko::int32>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Sellp = typename gko::matrix::Sellp<value_type, index_type>;
     auto empty = Dense::create(this->exec);
     auto res = Sellp::create(this->exec);
 
@@ -2078,11 +1655,12 @@ TYPED_TEST(Dense, ConvertsEmptyToSellp)
 }
 
 
-TYPED_TEST(Dense, MovesEmptyToSellp)
+TYPED_TEST(DenseWithIndexType, MovesEmptyToSellp)
 {
     using Dense = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    using Sellp = typename gko::matrix::Sellp<T, gko::int32>;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Sellp = typename gko::matrix::Sellp<value_type, index_type>;
     auto empty = Dense::create(this->exec);
     auto res = Sellp::create(this->exec);
 
@@ -2159,961 +1737,400 @@ TYPED_TEST(Dense, NonSquareMatrixIsTransposableIntoDense)
     auto trans =
         Mtx::create(this->exec, gko::transpose(this->mtx4->get_size()));
 
-    this->mtx4->transpose(trans);
-
-    GKO_ASSERT_MTX_NEAR(trans, l<T>({{1.0, 0.0}, {3.0, 5.0}, {2.0, 0.0}}), 0.0);
-}
-
-
-TYPED_TEST(Dense, NonSquareSubmatrixIsTransposableIntoDense)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto trans = Mtx::create(this->exec, gko::dim<2>{2, 1}, 5);
-
-    this->mtx4->create_submatrix({0, 1}, {0, 2})->transpose(trans);
-
-    GKO_ASSERT_MTX_NEAR(trans, l({1.0, 3.0}), 0.0);
-    ASSERT_EQ(trans->get_stride(), 5);
-}
-
-
-TYPED_TEST(Dense, NonSquareMatrixIsTransposableIntoDenseFailsForWrongDimensions)
-{
-    using Mtx = typename TestFixture::Mtx;
-
-    ASSERT_THROW(this->mtx4->transpose(Mtx::create(this->exec)),
-                 gko::DimensionMismatch);
-}
-
-
-TYPED_TEST(Dense, SquareMatrixCanGatherRows)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 0}};
-
-    auto row_collection = this->mtx5->row_gather(&permute_idxs);
-
-    GKO_ASSERT_MTX_NEAR(row_collection,
-                        l<T>({{-2.0, 2.0, 4.5}, {1.0, -1.0, -0.5}}), 0.0);
-}
-
-
-TYPED_TEST(Dense, SquareMatrixCanGatherRowsIntoDense)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 0}};
-    auto row_collection = Mtx::create(exec, gko::dim<2>{2, 3});
-
-    this->mtx5->row_gather(&permute_idxs, row_collection);
-
-    GKO_ASSERT_MTX_NEAR(row_collection,
-                        l<T>({{-2.0, 2.0, 4.5}, {1.0, -1.0, -0.5}}), 0.0);
-}
-
-
-TYPED_TEST(Dense, SquareSubmatrixCanGatherRowsIntoDense)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 0}};
-    auto row_collection = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
-
-    this->mtx5->create_submatrix({0, 2}, {1, 3})
-        ->row_gather(&permute_idxs, row_collection);
-
-    GKO_ASSERT_MTX_NEAR(row_collection, l<T>({{2.0, 4.5}, {-1.0, -0.5}}), 0.0);
-    ASSERT_EQ(row_collection->get_stride(), 4);
-}
-
-
-TYPED_TEST(Dense, NonSquareSubmatrixCanGatherRowsIntoMixedDense)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using MixedMtx = typename TestFixture::MixedMtx;
-    using T = typename TestFixture::value_type;
-    auto exec = this->mtx4->get_executor();
-    gko::array<gko::int32> gather_index{exec, {1, 0, 1}};
-    auto row_collection = MixedMtx::create(exec, gko::dim<2>{3, 3}, 4);
-
-    this->mtx4->row_gather(&gather_index, row_collection);
-
-    GKO_ASSERT_MTX_NEAR(
-        row_collection,
-        l<typename MixedMtx::value_type>(
-            {{0.0, 5.0, 0.0}, {1.0, 3.0, 2.0}, {0.0, 5.0, 0.0}}),
-        0.0);
-}
-
-
-TYPED_TEST(Dense, NonSquareSubmatrixCanAdvancedGatherRowsIntoMixedDense)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using MixedMtx = typename TestFixture::MixedMtx;
-    using T = typename TestFixture::value_type;
-    auto exec = this->mtx4->get_executor();
-    gko::array<gko::int32> gather_index{exec, {1, 0, 1}};
-    auto row_collection = gko::initialize<MixedMtx>(
-        {{1.0, 0.5, -1.0}, {-1.5, 0.5, 1.0}, {2.0, -3.0, 1.0}}, exec);
-    auto alpha = gko::initialize<MixedMtx>({1.0}, exec);
-    auto beta = gko::initialize<Mtx>({2.0}, exec);
-
-    this->mtx4->row_gather(alpha, &gather_index, beta, row_collection);
-
-    GKO_ASSERT_MTX_NEAR(
-        row_collection,
-        l<typename MixedMtx::value_type>(
-            {{2.0, 6.0, -2.0}, {-2.0, 4.0, 4.0}, {4.0, -1.0, 2.0}}),
-        0.0);
-}
-
-
-TYPED_TEST(Dense, SquareMatrixGatherRowsIntoDenseFailsForWrongDimensions)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 0}};
-
-    ASSERT_THROW(this->mtx5->row_gather(&permute_idxs, Mtx::create(exec)),
-                 gko::DimensionMismatch);
-}
-
-
-TYPED_TEST(Dense, SquareMatrixCanGatherRows64)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 0}};
-
-    auto row_collection = this->mtx5->row_gather(&permute_idxs);
-
-    GKO_ASSERT_MTX_NEAR(row_collection,
-                        l<T>({{-2.0, 2.0, 4.5}, {1.0, -1.0, -0.5}}), 0.0);
-}
-
-
-TYPED_TEST(Dense, SquareMatrixCanGatherRowsIntoDense64)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 0}};
-    auto row_collection = Mtx::create(exec, gko::dim<2>{2, 3});
-
-    this->mtx5->row_gather(&permute_idxs, row_collection);
-
-    GKO_ASSERT_MTX_NEAR(row_collection,
-                        l<T>({{-2.0, 2.0, 4.5}, {1.0, -1.0, -0.5}}), 0.0);
-}
-
-
-TYPED_TEST(Dense, SquareSubmatrixCanGatherRowsIntoDense64)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 0}};
-    auto row_collection = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
-
-    this->mtx5->create_submatrix({0, 2}, {1, 3})
-        ->row_gather(&permute_idxs, row_collection);
-
-    GKO_ASSERT_MTX_NEAR(row_collection, l<T>({{2.0, 4.5}, {-1.0, -0.5}}), 0.0);
-    ASSERT_EQ(row_collection->get_stride(), 4);
-}
-
-
-TYPED_TEST(Dense, NonSquareSubmatrixCanGatherRowsIntoMixedDense64)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using MixedMtx = typename TestFixture::MixedMtx;
-    using T = typename TestFixture::value_type;
-    auto exec = this->mtx4->get_executor();
-    gko::array<gko::int64> gather_index{exec, {1, 0, 1}};
-    auto row_collection = MixedMtx::create(exec, gko::dim<2>{3, 3}, 4);
-
-    this->mtx4->row_gather(&gather_index, row_collection);
-
-    GKO_ASSERT_MTX_NEAR(
-        row_collection,
-        l<typename MixedMtx::value_type>(
-            {{0.0, 5.0, 0.0}, {1.0, 3.0, 2.0}, {0.0, 5.0, 0.0}}),
-        0.0);
-}
-
-
-TYPED_TEST(Dense, SquareMatrixGatherRowsIntoDenseFailsForWrongDimensions64)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 0}};
-
-    ASSERT_THROW(this->mtx5->row_gather(&permute_idxs, Mtx::create(exec)),
-                 gko::DimensionMismatch);
-}
-
-
-TYPED_TEST(Dense, SquareMatrixIsPermutable)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 2, 0}};
-
-    auto ref_permuted =
-        gko::as<Mtx>(gko::as<Mtx>(this->mtx5->row_permute(&permute_idxs))
-                         ->column_permute(&permute_idxs));
-    auto permuted = gko::as<Mtx>(this->mtx5->permute(&permute_idxs));
-
-    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
-}
-
-
-TYPED_TEST(Dense, SquareMatrixIsPermutableIntoDense)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 2, 0}};
-    auto permuted = Mtx::create(exec, this->mtx5->get_size());
-
-    auto ref_permuted =
-        gko::as<Mtx>(gko::as<Mtx>(this->mtx5->row_permute(&permute_idxs))
-                         ->column_permute(&permute_idxs));
-    this->mtx5->permute(&permute_idxs, permuted);
-
-    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
-}
-
-
-TYPED_TEST(Dense, SquareSubmatrixIsPermutableIntoDense)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 0}};
-    auto permuted = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
-    auto mtx = this->mtx5->create_submatrix({0, 2}, {1, 3});
-
-    auto ref_permuted =
-        gko::as<Mtx>(gko::as<Mtx>(mtx->row_permute(&permute_idxs))
-                         ->column_permute(&permute_idxs));
-    mtx->permute(&permute_idxs, permuted);
-
-    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
-    ASSERT_EQ(permuted->get_stride(), 4);
-}
-
-
-TYPED_TEST(Dense, NonSquareMatrixPermuteIntoDenseFails)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto exec = this->mtx4->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 2, 0}};
-
-    ASSERT_THROW(this->mtx4->permute(&permute_idxs, this->mtx4->clone()),
-                 gko::DimensionMismatch);
-}
-
-
-TYPED_TEST(Dense, SquareMatrixPermuteIntoDenseFailsForWrongPermutationSize)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 2}};
-
-    ASSERT_THROW(this->mtx5->permute(&permute_idxs, this->mtx5->clone()),
-                 gko::ValueMismatch);
-}
-
-
-TYPED_TEST(Dense, SquareMatrixPermuteIntoDenseFailsForWrongDimensions)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 2, 0}};
-
-    ASSERT_THROW(this->mtx5->permute(&permute_idxs, Mtx::create(exec)),
-                 gko::DimensionMismatch);
-}
-
-
-TYPED_TEST(Dense, SquareMatrixIsInversePermutable)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 2, 0}};
-
-    auto ref_permuted = gko::as<Mtx>(
-        gko::as<Mtx>(this->mtx5->inverse_row_permute(&permute_idxs))
-            ->inverse_column_permute(&permute_idxs));
-    auto permuted = gko::as<Mtx>(this->mtx5->inverse_permute(&permute_idxs));
-
-    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
-}
-
-
-TYPED_TEST(Dense, SquareMatrixIsInversePermutableIntoDense)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 2, 0}};
-    auto permuted = Mtx::create(exec, this->mtx5->get_size());
-
-    auto ref_permuted = gko::as<Mtx>(
-        gko::as<Mtx>(this->mtx5->inverse_row_permute(&permute_idxs))
-            ->inverse_column_permute(&permute_idxs));
-    this->mtx5->inverse_permute(&permute_idxs, permuted);
-
-    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
-}
-
-
-TYPED_TEST(Dense, SquareSubmatrixIsInversePermutableIntoDense)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 0}};
-    auto permuted = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
-    auto mtx = this->mtx5->create_submatrix({0, 2}, {1, 3});
-
-    auto ref_permuted =
-        gko::as<Mtx>(gko::as<Mtx>(mtx->inverse_row_permute(&permute_idxs))
-                         ->inverse_column_permute(&permute_idxs));
-    mtx->inverse_permute(&permute_idxs, permuted);
-
-    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
-    ASSERT_EQ(permuted->get_stride(), 4);
-}
-
-
-TYPED_TEST(Dense, NonSquareMatrixInversePermuteIntoDenseFails)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto exec = this->mtx4->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 2, 0}};
-
-    ASSERT_THROW(
-        this->mtx4->inverse_permute(&permute_idxs, this->mtx4->clone()),
-        gko::DimensionMismatch);
-}
-
-
-TYPED_TEST(Dense,
-           SquareMatrixInversePermuteIntoDenseFailsForWrongPermutationSize)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {0, 1}};
-
-    ASSERT_THROW(
-        this->mtx5->inverse_permute(&permute_idxs, this->mtx5->clone()),
-        gko::ValueMismatch);
-}
-
-
-TYPED_TEST(Dense, SquareMatrixInversePermuteIntoDenseFailsForWrongDimensions)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 2, 0}};
-
-    ASSERT_THROW(this->mtx5->inverse_permute(&permute_idxs, Mtx::create(exec)),
-                 gko::DimensionMismatch);
-}
-
-
-TYPED_TEST(Dense, SquareMatrixIsPermutable64)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 2, 0}};
-
-    auto ref_permuted =
-        gko::as<Mtx>(gko::as<Mtx>(this->mtx5->row_permute(&permute_idxs))
-                         ->column_permute(&permute_idxs));
-    auto permuted = gko::as<Mtx>(this->mtx5->permute(&permute_idxs));
-
-    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
-}
-
-
-TYPED_TEST(Dense, SquareMatrixIsPermutableIntoDense64)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 2, 0}};
-    auto permuted = Mtx::create(exec, this->mtx5->get_size());
-
-    auto ref_permuted =
-        gko::as<Mtx>(gko::as<Mtx>(this->mtx5->row_permute(&permute_idxs))
-                         ->column_permute(&permute_idxs));
-    this->mtx5->permute(&permute_idxs, permuted);
-
-    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
-}
-
-
-TYPED_TEST(Dense, SquareSubmatrixIsPermutableIntoDense64)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 0}};
-    auto permuted = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
-    auto mtx = this->mtx5->create_submatrix({0, 2}, {1, 3});
-
-    auto ref_permuted =
-        gko::as<Mtx>(gko::as<Mtx>(mtx->row_permute(&permute_idxs))
-                         ->column_permute(&permute_idxs));
-    mtx->permute(&permute_idxs, permuted);
-
-    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
-    ASSERT_EQ(permuted->get_stride(), 4);
-}
-
-
-TYPED_TEST(Dense, NonSquareMatrixPermuteIntoDenseFails64)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto exec = this->mtx4->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 2, 0}};
-
-    ASSERT_THROW(this->mtx4->permute(&permute_idxs, this->mtx4->clone()),
-                 gko::DimensionMismatch);
-}
-
-
-TYPED_TEST(Dense, SquareMatrixPermuteIntoDenseFailsForWrongPermutationSize64)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 2}};
-
-    ASSERT_THROW(this->mtx5->permute(&permute_idxs, this->mtx5->clone()),
-                 gko::ValueMismatch);
-}
-
-
-TYPED_TEST(Dense, SquareMatrixPermuteIntoDenseFailsForWrongDimensions64)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 2, 0}};
-
-    ASSERT_THROW(this->mtx5->permute(&permute_idxs, Mtx::create(exec)),
-                 gko::DimensionMismatch);
-}
-
-
-TYPED_TEST(Dense, SquareMatrixIsInversePermutable64)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 2, 0}};
-
-    auto ref_permuted = gko::as<Mtx>(
-        gko::as<Mtx>(this->mtx5->inverse_row_permute(&permute_idxs))
-            ->inverse_column_permute(&permute_idxs));
-    auto permuted = gko::as<Mtx>(this->mtx5->inverse_permute(&permute_idxs));
-
-    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
-}
-
-
-TYPED_TEST(Dense, SquareMatrixIsInversePermutableIntoDense64)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 2, 0}};
-    auto permuted = Mtx::create(exec, this->mtx5->get_size());
-
-    auto ref_permuted = gko::as<Mtx>(
-        gko::as<Mtx>(this->mtx5->inverse_row_permute(&permute_idxs))
-            ->inverse_column_permute(&permute_idxs));
-    this->mtx5->inverse_permute(&permute_idxs, permuted);
-
-    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
-}
-
-
-TYPED_TEST(Dense, SquareSubmatrixIsInversePermutableIntoDense64)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 0}};
-    auto permuted = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
-    auto mtx = this->mtx5->create_submatrix({0, 2}, {1, 3});
-
-    auto ref_permuted =
-        gko::as<Mtx>(gko::as<Mtx>(mtx->inverse_row_permute(&permute_idxs))
-                         ->inverse_column_permute(&permute_idxs));
-    mtx->inverse_permute(&permute_idxs, permuted);
-
-    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
-    ASSERT_EQ(permuted->get_stride(), 4);
-}
-
-
-TYPED_TEST(Dense, NonSquareMatrixInversePermuteIntoDenseFails64)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto exec = this->mtx4->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 2, 0}};
-
-    ASSERT_THROW(
-        this->mtx4->inverse_permute(&permute_idxs, this->mtx4->clone()),
-        gko::DimensionMismatch);
-}
-
-
-TYPED_TEST(Dense,
-           SquareMatrixInversePermuteIntoDenseFailsForWrongPermutationSize64)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 2}};
-
-    ASSERT_THROW(
-        this->mtx5->inverse_permute(&permute_idxs, this->mtx5->clone()),
-        gko::ValueMismatch);
-}
-
-
-TYPED_TEST(Dense, SquareMatrixInversePermuteIntoDenseFailsForWrongDimensions64)
-{
-    using Mtx = typename TestFixture::Mtx;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 2, 0}};
-
-    ASSERT_THROW(this->mtx5->inverse_permute(&permute_idxs, Mtx::create(exec)),
-                 gko::DimensionMismatch);
-}
-
-
-TYPED_TEST(Dense, SquareMatrixIsRowPermutable)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 2, 0}};
-
-    auto row_permute = gko::as<Mtx>(this->mtx5->row_permute(&permute_idxs));
-
-    GKO_ASSERT_MTX_NEAR(
-        row_permute,
-        l<T>({{-2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}}), 0.0);
-}
-
-
-TYPED_TEST(Dense, NonSquareMatrixIsRowPermutable)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto exec = this->mtx4->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 0}};
-
-    auto row_permute = gko::as<Mtx>(this->mtx4->row_permute(&permute_idxs));
-
-    GKO_ASSERT_MTX_NEAR(row_permute, l<T>({{0.0, 5.0, 0.0}, {1.0, 3.0, 2.0}}),
-                        0.0);
-}
-
-
-TYPED_TEST(Dense, SquareMatrixIsRowPermutableIntoDense)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 2, 0}};
-    auto row_permute = Mtx::create(exec, this->mtx5->get_size());
-
-    this->mtx5->row_permute(&permute_idxs, row_permute);
-
-    GKO_ASSERT_MTX_NEAR(
-        row_permute,
-        l<T>({{-2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}}), 0.0);
-}
-
-
-TYPED_TEST(Dense, SquareSubmatrixIsRowPermutableIntoDense)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 0}};
-    auto row_permute = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
-
-    this->mtx5->create_submatrix({0, 2}, {0, 2})
-        ->row_permute(&permute_idxs, row_permute);
+    this->mtx4->transpose(trans);
 
-    GKO_ASSERT_MTX_NEAR(row_permute, l<T>({{-2.0, 2.0}, {1.0, -1.0}}), 0.0);
-    ASSERT_EQ(row_permute->get_stride(), 4);
+    GKO_ASSERT_MTX_NEAR(trans, l<T>({{1.0, 0.0}, {3.0, 5.0}, {2.0, 0.0}}), 0.0);
 }
 
 
-TYPED_TEST(Dense, SquareMatrixRowPermuteIntoDenseFailsForWrongPermutationSize)
+TYPED_TEST(Dense, NonSquareSubmatrixIsTransposableIntoDense)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 2}};
-    auto row_permute = Mtx::create(exec, this->mtx5->get_size());
+    auto trans = Mtx::create(this->exec, gko::dim<2>{2, 1}, 5);
 
-    ASSERT_THROW(this->mtx5->row_permute(&permute_idxs, row_permute),
-                 gko::ValueMismatch);
+    this->mtx4->create_submatrix({0, 1}, {0, 2})->transpose(trans);
+
+    GKO_ASSERT_MTX_NEAR(trans, l({1.0, 3.0}), 0.0);
+    ASSERT_EQ(trans->get_stride(), 5);
 }
 
 
-TYPED_TEST(Dense, SquareMatrixRowPermuteIntoDenseFailsForWrongDimensions)
+TYPED_TEST(Dense, NonSquareMatrixIsTransposableIntoDenseFailsForWrongDimensions)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 2, 0}};
 
-    ASSERT_THROW(this->mtx5->row_permute(&permute_idxs, Mtx::create(exec)),
+    ASSERT_THROW(this->mtx4->transpose(Mtx::create(this->exec)),
                  gko::DimensionMismatch);
 }
 
 
-TYPED_TEST(Dense, SquareMatrixIsColPermutable)
+TYPED_TEST(DenseWithIndexType, SquareMatrixCanGatherRows)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 2, 0}};
+    gko::array<index_type> permute_idxs{exec, {1, 0}};
 
-    auto c_permute = gko::as<Mtx>(this->mtx5->column_permute(&permute_idxs));
+    auto row_collection = this->mtx5->row_gather(&permute_idxs);
 
-    GKO_ASSERT_MTX_NEAR(
-        c_permute, l<T>({{-1.0, -0.5, 1.0}, {2.0, 4.5, -2.0}, {3.4, 1.2, 2.1}}),
-        0.0);
+    GKO_ASSERT_MTX_NEAR(row_collection,
+                        l<value_type>({{-2.0, 2.0, 4.5}, {1.0, -1.0, -0.5}}),
+                        0.0);
 }
 
 
-TYPED_TEST(Dense, NonSquareMatrixIsColPermutable)
+TYPED_TEST(DenseWithIndexType, SquareMatrixCanGatherRowsIntoDense)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto exec = this->mtx4->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 2, 0}};
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx5->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 0}};
+    auto row_collection = Mtx::create(exec, gko::dim<2>{2, 3});
 
-    auto c_permute = gko::as<Mtx>(this->mtx4->column_permute(&permute_idxs));
+    this->mtx5->row_gather(&permute_idxs, row_collection);
 
-    GKO_ASSERT_MTX_NEAR(c_permute, l<T>({{3.0, 2.0, 1.0}, {5.0, 0.0, 0.0}}),
+    GKO_ASSERT_MTX_NEAR(row_collection,
+                        l<value_type>({{-2.0, 2.0, 4.5}, {1.0, -1.0, -0.5}}),
                         0.0);
 }
 
 
-TYPED_TEST(Dense, SquareMatrixIsColPermutableIntoDense)
+TYPED_TEST(DenseWithIndexType, SquareSubmatrixCanGatherRowsIntoDense)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 2, 0}};
-    auto c_permute = Mtx::create(exec, this->mtx5->get_size());
+    gko::array<index_type> permute_idxs{exec, {1, 0}};
+    auto row_collection = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
 
-    this->mtx5->column_permute(&permute_idxs, c_permute);
+    this->mtx5->create_submatrix({0, 2}, {1, 3})
+        ->row_gather(&permute_idxs, row_collection);
 
-    GKO_ASSERT_MTX_NEAR(
-        c_permute, l<T>({{-1.0, -0.5, 1.0}, {2.0, 4.5, -2.0}, {3.4, 1.2, 2.1}}),
-        0.0);
+    GKO_ASSERT_MTX_NEAR(row_collection,
+                        l<value_type>({{2.0, 4.5}, {-1.0, -0.5}}), 0.0);
+    ASSERT_EQ(row_collection->get_stride(), 4);
 }
 
 
-TYPED_TEST(Dense, SquareSubmatrixIsColPermutableIntoDense)
+TYPED_TEST(DenseWithIndexType, NonSquareSubmatrixCanGatherRowsIntoMixedDense)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 0}};
-    auto c_permute = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
+    using MixedMtx = typename TestFixture::MixedMtx;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx4->get_executor();
+    gko::array<index_type> gather_index{exec, {1, 0, 1}};
+    auto row_collection = MixedMtx::create(exec, gko::dim<2>{3, 3}, 4);
 
-    this->mtx5->create_submatrix({0, 2}, {0, 2})
-        ->column_permute(&permute_idxs, c_permute);
+    this->mtx4->row_gather(&gather_index, row_collection);
 
-    GKO_ASSERT_MTX_NEAR(c_permute, l<T>({{-1.0, 1.0}, {2.0, -2.0}}), 0.0);
-    ASSERT_EQ(c_permute->get_stride(), 4);
+    GKO_ASSERT_MTX_NEAR(
+        row_collection,
+        l<typename MixedMtx::value_type>(
+            {{0.0, 5.0, 0.0}, {1.0, 3.0, 2.0}, {0.0, 5.0, 0.0}}),
+        0.0);
 }
 
 
-TYPED_TEST(Dense, SquareMatrixColPermuteIntoDenseFailsForWrongPermutationSize)
+TYPED_TEST(DenseWithIndexType,
+           NonSquareSubmatrixCanAdvancedGatherRowsIntoMixedDense)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 2}};
-    auto row_permute = Mtx::create(exec, this->mtx5->get_size());
+    using MixedMtx = typename TestFixture::MixedMtx;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx4->get_executor();
+    gko::array<index_type> gather_index{exec, {1, 0, 1}};
+    auto row_collection = gko::initialize<MixedMtx>(
+        {{1.0, 0.5, -1.0}, {-1.5, 0.5, 1.0}, {2.0, -3.0, 1.0}}, exec);
+    auto alpha = gko::initialize<MixedMtx>({1.0}, exec);
+    auto beta = gko::initialize<Mtx>({2.0}, exec);
 
-    ASSERT_THROW(this->mtx5->column_permute(&permute_idxs, row_permute),
-                 gko::ValueMismatch);
+    this->mtx4->row_gather(alpha, &gather_index, beta, row_collection);
+
+    GKO_ASSERT_MTX_NEAR(
+        row_collection,
+        l<typename MixedMtx::value_type>(
+            {{2.0, 6.0, -2.0}, {-2.0, 4.0, 4.0}, {4.0, -1.0, 2.0}}),
+        0.0);
 }
 
 
-TYPED_TEST(Dense, SquareMatrixColPermuteIntoDenseFailsForWrongDimensions)
+TYPED_TEST(DenseWithIndexType,
+           SquareMatrixGatherRowsIntoDenseFailsForWrongDimensions)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 2, 0}};
+    gko::array<index_type> permute_idxs{exec, {1, 0}};
 
-    ASSERT_THROW(this->mtx5->column_permute(&permute_idxs, Mtx::create(exec)),
+    ASSERT_THROW(this->mtx5->row_gather(&permute_idxs, Mtx::create(exec)),
                  gko::DimensionMismatch);
 }
 
 
-TYPED_TEST(Dense, SquareMatrixIsInverseRowPermutable)
+TYPED_TEST(DenseWithIndexType, SquareMatrixIsPermutable)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> inverse_permute_idxs{exec, {1, 2, 0}};
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
 
-    auto inverse_row_permute =
-        gko::as<Mtx>(this->mtx5->inverse_row_permute(&inverse_permute_idxs));
+    auto ref_permuted =
+        gko::as<Mtx>(gko::as<Mtx>(this->mtx5->row_permute(&permute_idxs))
+                         ->column_permute(&permute_idxs));
+    auto permuted = gko::as<Mtx>(this->mtx5->permute(&permute_idxs));
 
-    GKO_ASSERT_MTX_NEAR(
-        inverse_row_permute,
-        l<T>({{2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}, {-2.0, 2.0, 4.5}}), 0.0);
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
 }
 
 
-TYPED_TEST(Dense, NonSquareMatrixIsInverseRowPermutable)
+TYPED_TEST(DenseWithIndexType, SquareMatrixIsPermutableIntoDense)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto exec = this->mtx4->get_executor();
-    gko::array<gko::int32> inverse_permute_idxs{exec, {1, 0}};
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx5->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
+    auto permuted = Mtx::create(exec, this->mtx5->get_size());
 
-    auto inverse_row_permute =
-        gko::as<Mtx>(this->mtx4->inverse_row_permute(&inverse_permute_idxs));
+    auto ref_permuted =
+        gko::as<Mtx>(gko::as<Mtx>(this->mtx5->row_permute(&permute_idxs))
+                         ->column_permute(&permute_idxs));
+    this->mtx5->permute(&permute_idxs, permuted);
 
-    GKO_ASSERT_MTX_NEAR(inverse_row_permute,
-                        l<T>({{0.0, 5.0, 0.0}, {1.0, 3.0, 2.0}}), 0.0);
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
 }
 
 
-TYPED_TEST(Dense, SquareMatrixIsInverseRowPermutableIntoDense)
+TYPED_TEST(DenseWithIndexType, SquareSubmatrixIsPermutableIntoDense)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 2, 0}};
-    auto row_permute = Mtx::create(exec, this->mtx5->get_size());
+    gko::array<index_type> permute_idxs{exec, {1, 0}};
+    auto permuted = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
+    auto mtx = this->mtx5->create_submatrix({0, 2}, {1, 3});
 
-    this->mtx5->inverse_row_permute(&permute_idxs, row_permute);
+    auto ref_permuted =
+        gko::as<Mtx>(gko::as<Mtx>(mtx->row_permute(&permute_idxs))
+                         ->column_permute(&permute_idxs));
+    mtx->permute(&permute_idxs, permuted);
 
-    GKO_ASSERT_MTX_NEAR(
-        row_permute,
-        l<T>({{2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}, {-2.0, 2.0, 4.5}}), 0.0);
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
+    ASSERT_EQ(permuted->get_stride(), 4);
 }
 
 
-TYPED_TEST(Dense, SquareSubmatrixIsInverseRowPermutableIntoDense)
+TYPED_TEST(DenseWithIndexType, NonSquareMatrixPermuteIntoDenseFails)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 0}};
-    auto row_permute = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
-
-    this->mtx5->create_submatrix({0, 2}, {0, 2})
-        ->inverse_row_permute(&permute_idxs, row_permute);
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx4->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
 
-    GKO_ASSERT_MTX_NEAR(row_permute, l<T>({{-2.0, 2.0}, {1.0, -1.0}}), 0.0);
-    ASSERT_EQ(row_permute->get_stride(), 4);
+    ASSERT_THROW(this->mtx4->permute(&permute_idxs, this->mtx4->clone()),
+                 gko::DimensionMismatch);
 }
 
 
-TYPED_TEST(Dense,
-           SquareMatrixInverseRowPermuteIntoDenseFailsForWrongPermutationSize)
+TYPED_TEST(DenseWithIndexType,
+           SquareMatrixPermuteIntoDenseFailsForWrongPermutationSize)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 2}};
-    auto row_permute = Mtx::create(exec, this->mtx5->get_size());
+    gko::array<index_type> permute_idxs{exec, {1, 2}};
 
-    ASSERT_THROW(this->mtx5->inverse_row_permute(&permute_idxs, row_permute),
+    ASSERT_THROW(this->mtx5->permute(&permute_idxs, this->mtx5->clone()),
                  gko::ValueMismatch);
 }
 
 
-TYPED_TEST(Dense, SquareMatrixInverseRowPermuteIntoDenseFailsForWrongDimensions)
+TYPED_TEST(DenseWithIndexType,
+           SquareMatrixPermuteIntoDenseFailsForWrongDimensions)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 2, 0}};
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
 
-    ASSERT_THROW(
-        this->mtx5->inverse_row_permute(&permute_idxs, Mtx::create(exec)),
-        gko::DimensionMismatch);
+    ASSERT_THROW(this->mtx5->permute(&permute_idxs, Mtx::create(exec)),
+                 gko::DimensionMismatch);
 }
 
 
-TYPED_TEST(Dense, SquareMatrixIsInverseColPermutable)
+TYPED_TEST(DenseWithIndexType, SquareMatrixIsInversePermutable)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> inverse_permute_idxs{exec, {1, 2, 0}};
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
 
-    auto inverse_c_permute =
-        gko::as<Mtx>(this->mtx5->inverse_column_permute(&inverse_permute_idxs));
+    auto ref_permuted = gko::as<Mtx>(
+        gko::as<Mtx>(this->mtx5->inverse_row_permute(&permute_idxs))
+            ->inverse_column_permute(&permute_idxs));
+    auto permuted = gko::as<Mtx>(this->mtx5->inverse_permute(&permute_idxs));
 
-    GKO_ASSERT_MTX_NEAR(
-        inverse_c_permute,
-        l<T>({{-0.5, 1.0, -1.0}, {4.5, -2.0, 2.0}, {1.2, 2.1, 3.4}}), 0.0);
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
 }
 
 
-TYPED_TEST(Dense, NonSquareMatrixIsInverseColPermutable)
+TYPED_TEST(DenseWithIndexType, SquareMatrixIsInversePermutableIntoDense)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto exec = this->mtx4->get_executor();
-    gko::array<gko::int32> inverse_permute_idxs{exec, {1, 2, 0}};
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx5->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
+    auto permuted = Mtx::create(exec, this->mtx5->get_size());
 
-    auto inverse_c_permute =
-        gko::as<Mtx>(this->mtx4->inverse_column_permute(&inverse_permute_idxs));
+    auto ref_permuted = gko::as<Mtx>(
+        gko::as<Mtx>(this->mtx5->inverse_row_permute(&permute_idxs))
+            ->inverse_column_permute(&permute_idxs));
+    this->mtx5->inverse_permute(&permute_idxs, permuted);
 
-    GKO_ASSERT_MTX_NEAR(inverse_c_permute,
-                        l<T>({{2.0, 1.0, 3.0}, {0.0, 0.0, 5.0}}), 0.0);
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
 }
 
 
-TYPED_TEST(Dense, SquareMatrixIsInverseColPermutableIntoDense)
+TYPED_TEST(DenseWithIndexType, SquareSubmatrixIsInversePermutableIntoDense)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 2, 0}};
-    auto c_permute = Mtx::create(exec, this->mtx5->get_size());
+    gko::array<index_type> permute_idxs{exec, {1, 0}};
+    auto permuted = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
+    auto mtx = this->mtx5->create_submatrix({0, 2}, {1, 3});
 
-    this->mtx5->inverse_column_permute(&permute_idxs, c_permute);
+    auto ref_permuted =
+        gko::as<Mtx>(gko::as<Mtx>(mtx->inverse_row_permute(&permute_idxs))
+                         ->inverse_column_permute(&permute_idxs));
+    mtx->inverse_permute(&permute_idxs, permuted);
 
-    GKO_ASSERT_MTX_NEAR(
-        c_permute, l<T>({{-0.5, 1.0, -1.0}, {4.5, -2.0, 2.0}, {1.2, 2.1, 3.4}}),
-        0.0);
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
+    ASSERT_EQ(permuted->get_stride(), 4);
 }
 
 
-TYPED_TEST(Dense, SquareSubmatrixIsInverseColPermutableIntoDense)
+TYPED_TEST(DenseWithIndexType, NonSquareMatrixInversePermuteIntoDenseFails)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 0}};
-    auto c_permute = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
-
-    this->mtx5->create_submatrix({0, 2}, {0, 2})
-        ->column_permute(&permute_idxs, c_permute);
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx4->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
 
-    GKO_ASSERT_MTX_NEAR(c_permute, l<T>({{-1.0, 1.0}, {2.0, -2.0}}), 0.0);
-    ASSERT_EQ(c_permute->get_stride(), 4);
+    ASSERT_THROW(
+        this->mtx4->inverse_permute(&permute_idxs, this->mtx4->clone()),
+        gko::DimensionMismatch);
 }
 
 
-TYPED_TEST(Dense,
-           SquareMatrixInverseColPermuteIntoDenseFailsForWrongPermutationSize)
+TYPED_TEST(DenseWithIndexType,
+           SquareMatrixInversePermuteIntoDenseFailsForWrongPermutationSize)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 2}};
-    auto row_permute = Mtx::create(exec, this->mtx5->get_size());
+    gko::array<index_type> permute_idxs{exec, {0, 1}};
 
-    ASSERT_THROW(this->mtx5->inverse_column_permute(&permute_idxs, row_permute),
-                 gko::ValueMismatch);
+    ASSERT_THROW(
+        this->mtx5->inverse_permute(&permute_idxs, this->mtx5->clone()),
+        gko::ValueMismatch);
 }
 
 
-TYPED_TEST(Dense, SquareMatrixInverseColPermuteIntoDenseFailsForWrongDimensions)
+TYPED_TEST(DenseWithIndexType,
+           SquareMatrixInversePermuteIntoDenseFailsForWrongDimensions)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int32> permute_idxs{exec, {1, 2, 0}};
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
 
-    ASSERT_THROW(
-        this->mtx5->inverse_column_permute(&permute_idxs, Mtx::create(exec)),
-        gko::DimensionMismatch);
+    ASSERT_THROW(this->mtx5->inverse_permute(&permute_idxs, Mtx::create(exec)),
+                 gko::DimensionMismatch);
 }
 
 
-TYPED_TEST(Dense, SquareMatrixIsRowPermutable64)
+TYPED_TEST(DenseWithIndexType, SquareMatrixIsRowPermutable)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 2, 0}};
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
 
     auto row_permute = gko::as<Mtx>(this->mtx5->row_permute(&permute_idxs));
 
     GKO_ASSERT_MTX_NEAR(
         row_permute,
-        l<T>({{-2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}}), 0.0);
+        l<value_type>({{-2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}}),
+        0.0);
 }
 
 
-TYPED_TEST(Dense, NonSquareMatrixIsRowPermutable64)
+TYPED_TEST(DenseWithIndexType, NonSquareMatrixIsRowPermutable)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx4->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 0}};
+    gko::array<index_type> permute_idxs{exec, {1, 0}};
 
     auto row_permute = gko::as<Mtx>(this->mtx4->row_permute(&permute_idxs));
 
-    GKO_ASSERT_MTX_NEAR(row_permute, l<T>({{0.0, 5.0, 0.0}, {1.0, 3.0, 2.0}}),
-                        0.0);
+    GKO_ASSERT_MTX_NEAR(row_permute,
+                        l<value_type>({{0.0, 5.0, 0.0}, {1.0, 3.0, 2.0}}), 0.0);
 }
 
 
-TYPED_TEST(Dense, SquareMatrixIsRowPermutableIntoDense64)
+TYPED_TEST(DenseWithIndexType, SquareMatrixIsRowPermutableIntoDense)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 2, 0}};
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
     auto row_permute = Mtx::create(exec, this->mtx5->get_size());
 
     this->mtx5->row_permute(&permute_idxs, row_permute);
 
     GKO_ASSERT_MTX_NEAR(
         row_permute,
-        l<T>({{-2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}}), 0.0);
+        l<value_type>({{-2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}}),
+        0.0);
 }
 
 
-TYPED_TEST(Dense, SquareSubmatrixIsRowPermutableIntoDense64)
+TYPED_TEST(DenseWithIndexType, SquareSubmatrixIsRowPermutableIntoDense)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 0}};
+    gko::array<index_type> permute_idxs{exec, {1, 0}};
     auto row_permute = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
 
     this->mtx5->create_submatrix({0, 2}, {0, 2})
         ->row_permute(&permute_idxs, row_permute);
 
-    GKO_ASSERT_MTX_NEAR(row_permute, l<T>({{-2.0, 2.0}, {1.0, -1.0}}), 0.0);
+    GKO_ASSERT_MTX_NEAR(row_permute, l<value_type>({{-2.0, 2.0}, {1.0, -1.0}}),
+                        0.0);
     ASSERT_EQ(row_permute->get_stride(), 4);
 }
 
 
-TYPED_TEST(Dense, SquareMatrixRowPermuteIntoDenseFailsForWrongPermutationSize64)
+TYPED_TEST(DenseWithIndexType,
+           SquareMatrixRowPermuteIntoDenseFailsForWrongPermutationSize)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 2}};
+    gko::array<index_type> permute_idxs{exec, {1, 2}};
     auto row_permute = Mtx::create(exec, this->mtx5->get_size());
 
     ASSERT_THROW(this->mtx5->row_permute(&permute_idxs, row_permute),
@@ -3121,85 +2138,94 @@ TYPED_TEST(Dense, SquareMatrixRowPermuteIntoDenseFailsForWrongPermutationSize64)
 }
 
 
-TYPED_TEST(Dense, SquareMatrixRowPermuteIntoDenseFailsForWrongDimensions64)
+TYPED_TEST(DenseWithIndexType,
+           SquareMatrixRowPermuteIntoDenseFailsForWrongDimensions)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 2, 0}};
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
 
     ASSERT_THROW(this->mtx5->row_permute(&permute_idxs, Mtx::create(exec)),
                  gko::DimensionMismatch);
 }
 
 
-TYPED_TEST(Dense, SquareMatrixIsColPermutable64)
+TYPED_TEST(DenseWithIndexType, SquareMatrixIsColPermutable)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 2, 0}};
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
 
     auto c_permute = gko::as<Mtx>(this->mtx5->column_permute(&permute_idxs));
 
     GKO_ASSERT_MTX_NEAR(
-        c_permute, l<T>({{-1.0, -0.5, 1.0}, {2.0, 4.5, -2.0}, {3.4, 1.2, 2.1}}),
+        c_permute,
+        l<value_type>({{-1.0, -0.5, 1.0}, {2.0, 4.5, -2.0}, {3.4, 1.2, 2.1}}),
         0.0);
 }
 
 
-TYPED_TEST(Dense, NonSquareMatrixIsColPermutable64)
+TYPED_TEST(DenseWithIndexType, NonSquareMatrixIsColPermutable)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx4->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 2, 0}};
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
 
     auto c_permute = gko::as<Mtx>(this->mtx4->column_permute(&permute_idxs));
 
-    GKO_ASSERT_MTX_NEAR(c_permute, l<T>({{3.0, 2.0, 1.0}, {5.0, 0.0, 0.0}}),
-                        0.0);
+    GKO_ASSERT_MTX_NEAR(c_permute,
+                        l<value_type>({{3.0, 2.0, 1.0}, {5.0, 0.0, 0.0}}), 0.0);
 }
 
 
-TYPED_TEST(Dense, SquareMatrixIsColPermutableIntoDense64)
+TYPED_TEST(DenseWithIndexType, SquareMatrixIsColPermutableIntoDense)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 2, 0}};
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
     auto c_permute = Mtx::create(exec, this->mtx5->get_size());
 
     this->mtx5->column_permute(&permute_idxs, c_permute);
 
     GKO_ASSERT_MTX_NEAR(
-        c_permute, l<T>({{-1.0, -0.5, 1.0}, {2.0, 4.5, -2.0}, {3.4, 1.2, 2.1}}),
+        c_permute,
+        l<value_type>({{-1.0, -0.5, 1.0}, {2.0, 4.5, -2.0}, {3.4, 1.2, 2.1}}),
         0.0);
 }
 
 
-TYPED_TEST(Dense, SquareSubmatrixIsColPermutableIntoDense64)
+TYPED_TEST(DenseWithIndexType, SquareSubmatrixIsColPermutableIntoDense)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 0}};
+    gko::array<index_type> permute_idxs{exec, {1, 0}};
     auto c_permute = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
 
     this->mtx5->create_submatrix({0, 2}, {0, 2})
         ->column_permute(&permute_idxs, c_permute);
 
-    GKO_ASSERT_MTX_NEAR(c_permute, l<T>({{-1.0, 1.0}, {2.0, -2.0}}), 0.0);
+    GKO_ASSERT_MTX_NEAR(c_permute, l<value_type>({{-1.0, 1.0}, {2.0, -2.0}}),
+                        0.0);
     ASSERT_EQ(c_permute->get_stride(), 4);
 }
 
 
-TYPED_TEST(Dense, SquareMatrixColPermuteIntoDenseFailsForWrongPermutationSize64)
+TYPED_TEST(DenseWithIndexType,
+           SquareMatrixColPermuteIntoDenseFailsForWrongPermutationSize)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 2}};
+    gko::array<index_type> permute_idxs{exec, {1, 2}};
     auto row_permute = Mtx::create(exec, this->mtx5->get_size());
 
     ASSERT_THROW(this->mtx5->column_permute(&permute_idxs, row_permute),
@@ -3207,88 +2233,96 @@ TYPED_TEST(Dense, SquareMatrixColPermuteIntoDenseFailsForWrongPermutationSize64)
 }
 
 
-TYPED_TEST(Dense, SquareMatrixColPermuteIntoDenseFailsForWrongDimensions64)
+TYPED_TEST(DenseWithIndexType,
+           SquareMatrixColPermuteIntoDenseFailsForWrongDimensions)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 2, 0}};
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
 
     ASSERT_THROW(this->mtx5->column_permute(&permute_idxs, Mtx::create(exec)),
                  gko::DimensionMismatch);
 }
 
 
-TYPED_TEST(Dense, SquareMatrixIsInverseRowPermutable64)
+TYPED_TEST(DenseWithIndexType, SquareMatrixIsInverseRowPermutable)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> inverse_permute_idxs{exec, {1, 2, 0}};
+    gko::array<index_type> inverse_permute_idxs{exec, {1, 2, 0}};
 
-    auto inverse_row_permute =
+    auto inv_row_permute =
         gko::as<Mtx>(this->mtx5->inverse_row_permute(&inverse_permute_idxs));
 
     GKO_ASSERT_MTX_NEAR(
-        inverse_row_permute,
-        l<T>({{2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}, {-2.0, 2.0, 4.5}}), 0.0);
+        inv_row_permute,
+        l<value_type>({{2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}, {-2.0, 2.0, 4.5}}),
+        0.0);
 }
 
 
-TYPED_TEST(Dense, NonSquareMatrixIsInverseRowPermutable64)
+TYPED_TEST(DenseWithIndexType, NonSquareMatrixIsInverseRowPermutable)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx4->get_executor();
-    gko::array<gko::int64> inverse_permute_idxs{exec, {1, 0}};
+    gko::array<index_type> inverse_permute_idxs{exec, {1, 0}};
 
     auto inverse_row_permute =
         gko::as<Mtx>(this->mtx4->inverse_row_permute(&inverse_permute_idxs));
 
     GKO_ASSERT_MTX_NEAR(inverse_row_permute,
-                        l<T>({{0.0, 5.0, 0.0}, {1.0, 3.0, 2.0}}), 0.0);
+                        l<value_type>({{0.0, 5.0, 0.0}, {1.0, 3.0, 2.0}}), 0.0);
 }
 
 
-TYPED_TEST(Dense, SquareMatrixIsInverseRowPermutableIntoDense64)
+TYPED_TEST(DenseWithIndexType, SquareMatrixIsInverseRowPermutableIntoDense)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 2, 0}};
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
     auto row_permute = Mtx::create(exec, this->mtx5->get_size());
 
     this->mtx5->inverse_row_permute(&permute_idxs, row_permute);
 
     GKO_ASSERT_MTX_NEAR(
         row_permute,
-        l<T>({{2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}, {-2.0, 2.0, 4.5}}), 0.0);
+        l<value_type>({{2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}, {-2.0, 2.0, 4.5}}),
+        0.0);
 }
 
 
-TYPED_TEST(Dense, SquareSubmatrixIsInverseRowPermutableIntoDense64)
+TYPED_TEST(DenseWithIndexType, SquareSubmatrixIsInverseRowPermutableIntoDense)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 0}};
+    gko::array<index_type> permute_idxs{exec, {1, 0}};
     auto row_permute = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
 
     this->mtx5->create_submatrix({0, 2}, {0, 2})
         ->inverse_row_permute(&permute_idxs, row_permute);
 
-    GKO_ASSERT_MTX_NEAR(row_permute, l<T>({{-2.0, 2.0}, {1.0, -1.0}}), 0.0);
+    GKO_ASSERT_MTX_NEAR(row_permute, l<value_type>({{-2.0, 2.0}, {1.0, -1.0}}),
+                        0.0);
     ASSERT_EQ(row_permute->get_stride(), 4);
 }
 
 
-TYPED_TEST(Dense,
-           SquareMatrixInverseRowPermuteIntoDenseFailsForWrongPermutationSize64)
+TYPED_TEST(DenseWithIndexType,
+           SquareMatrixInverseRowPermuteIntoDenseFailsForWrongPermutationSize)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 2}};
+    gko::array<index_type> permute_idxs{exec, {1, 2}};
     auto row_permute = Mtx::create(exec, this->mtx5->get_size());
 
     ASSERT_THROW(this->mtx5->inverse_row_permute(&permute_idxs, row_permute),
@@ -3296,13 +2330,13 @@ TYPED_TEST(Dense,
 }
 
 
-TYPED_TEST(Dense,
-           SquareMatrixInverseRowPermuteIntoDenseFailsForWrongDimensions64)
+TYPED_TEST(DenseWithIndexType,
+           SquareMatrixInverseRowPermuteIntoDenseFailsForWrongDimensions)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 2, 0}};
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
 
     ASSERT_THROW(
         this->mtx5->inverse_row_permute(&permute_idxs, Mtx::create(exec)),
@@ -3310,76 +2344,83 @@ TYPED_TEST(Dense,
 }
 
 
-TYPED_TEST(Dense, SquareMatrixIsInverseColPermutable64)
+TYPED_TEST(DenseWithIndexType, SquareMatrixIsInverseColPermutable)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> inverse_permute_idxs{exec, {1, 2, 0}};
+    gko::array<index_type> inverse_permute_idxs{exec, {1, 2, 0}};
 
-    auto inverse_c_permute =
+    auto inv_c_permute =
         gko::as<Mtx>(this->mtx5->inverse_column_permute(&inverse_permute_idxs));
 
     GKO_ASSERT_MTX_NEAR(
-        inverse_c_permute,
-        l<T>({{-0.5, 1.0, -1.0}, {4.5, -2.0, 2.0}, {1.2, 2.1, 3.4}}), 0.0);
+        inv_c_permute,
+        l<value_type>({{-0.5, 1.0, -1.0}, {4.5, -2.0, 2.0}, {1.2, 2.1, 3.4}}),
+        0.0);
 }
 
 
-TYPED_TEST(Dense, NonSquareMatrixIsInverseColPermutable64)
+TYPED_TEST(DenseWithIndexType, NonSquareMatrixIsInverseColPermutable)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx4->get_executor();
-    gko::array<gko::int64> inverse_permute_idxs{exec, {1, 2, 0}};
+    gko::array<index_type> inverse_permute_idxs{exec, {1, 2, 0}};
 
     auto inverse_c_permute =
         gko::as<Mtx>(this->mtx4->inverse_column_permute(&inverse_permute_idxs));
 
     GKO_ASSERT_MTX_NEAR(inverse_c_permute,
-                        l<T>({{2.0, 1.0, 3.0}, {0.0, 0.0, 5.0}}), 0.0);
+                        l<value_type>({{2.0, 1.0, 3.0}, {0.0, 0.0, 5.0}}), 0.0);
 }
 
 
-TYPED_TEST(Dense, SquareMatrixIsInverseColPermutableIntoDense64)
+TYPED_TEST(DenseWithIndexType, SquareMatrixIsInverseColPermutableIntoDense)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 2, 0}};
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
     auto c_permute = Mtx::create(exec, this->mtx5->get_size());
 
     this->mtx5->inverse_column_permute(&permute_idxs, c_permute);
 
     GKO_ASSERT_MTX_NEAR(
-        c_permute, l<T>({{-0.5, 1.0, -1.0}, {4.5, -2.0, 2.0}, {1.2, 2.1, 3.4}}),
+        c_permute,
+        l<value_type>({{-0.5, 1.0, -1.0}, {4.5, -2.0, 2.0}, {1.2, 2.1, 3.4}}),
         0.0);
 }
 
 
-TYPED_TEST(Dense, SquareSubmatrixIsInverseColPermutableIntoDense64)
+TYPED_TEST(DenseWithIndexType, SquareSubmatrixIsInverseColPermutableIntoDense)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 0}};
+    gko::array<index_type> permute_idxs{exec, {1, 0}};
     auto c_permute = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
 
     this->mtx5->create_submatrix({0, 2}, {0, 2})
         ->column_permute(&permute_idxs, c_permute);
 
-    GKO_ASSERT_MTX_NEAR(c_permute, l<T>({{-1.0, 1.0}, {2.0, -2.0}}), 0.0);
+    GKO_ASSERT_MTX_NEAR(c_permute, l<value_type>({{-1.0, 1.0}, {2.0, -2.0}}),
+                        0.0);
     ASSERT_EQ(c_permute->get_stride(), 4);
 }
 
 
-TYPED_TEST(Dense,
-           SquareMatrixInverseColPermuteIntoDenseFailsForWrongPermutationSize64)
+TYPED_TEST(DenseWithIndexType,
+           SquareMatrixInverseColPermuteIntoDenseFailsForWrongPermutationSize)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 2}};
+    gko::array<index_type> permute_idxs{exec, {1, 2}};
     auto row_permute = Mtx::create(exec, this->mtx5->get_size());
 
     ASSERT_THROW(this->mtx5->inverse_column_permute(&permute_idxs, row_permute),
@@ -3387,13 +2428,13 @@ TYPED_TEST(Dense,
 }
 
 
-TYPED_TEST(Dense,
-           SquareMatrixInverseColPermuteIntoDenseFailsForWrongDimensions64)
+TYPED_TEST(DenseWithIndexType,
+           SquareMatrixInverseColPermuteIntoDenseFailsForWrongDimensions)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<gko::int64> permute_idxs{exec, {1, 2, 0}};
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
 
     ASSERT_THROW(
         this->mtx5->inverse_column_permute(&permute_idxs, Mtx::create(exec)),

From 6fbcd9f744f95adea22ffef60c162078e16aca9c Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Mon, 16 Oct 2023 10:43:50 +0200
Subject: [PATCH 382/583] group tests by fixture

---
 reference/test/matrix/dense_kernels.cpp | 2582 +++++++++++------------
 1 file changed, 1291 insertions(+), 1291 deletions(-)

diff --git a/reference/test/matrix/dense_kernels.cpp b/reference/test/matrix/dense_kernels.cpp
index 3a4cfb6826b..56f082243e6 100644
--- a/reference/test/matrix/dense_kernels.cpp
+++ b/reference/test/matrix/dense_kernels.cpp
@@ -114,23 +114,6 @@ class Dense : public ::testing::Test {
 TYPED_TEST_SUITE(Dense, gko::test::ValueTypes, TypenameNameGenerator);
 
 
-template <typename ValueIndexType>
-class DenseWithIndexType
-    : public Dense<
-          typename std::tuple_element<0, decltype(ValueIndexType())>::type> {
-public:
-    using value_type =
-        typename std::tuple_element<0, decltype(ValueIndexType())>::type;
-    using index_type =
-        typename std::tuple_element<1, decltype(ValueIndexType())>::type;
-
-    index_type invalid_index = gko::invalid_index<index_type>();
-};
-
-TYPED_TEST_SUITE(DenseWithIndexType, gko::test::ValueIndexTypes,
-                 PairTypenameNameGenerator);
-
-
 TYPED_TEST(Dense, CopyRespectsStride)
 {
     using value_type = typename TestFixture::value_type;
@@ -796,2086 +779,2103 @@ TYPED_TEST(Dense, MovesToPrecision)
 }
 
 
-template <typename ValueType, typename IndexType>
-void assert_coo_eq_mtx4(const gko::matrix::Coo<ValueType, IndexType>* coo_mtx)
+TYPED_TEST(Dense, SquareMatrixIsTransposable)
 {
-    auto v = coo_mtx->get_const_values();
-    auto c = coo_mtx->get_const_col_idxs();
-    auto r = coo_mtx->get_const_row_idxs();
+    using Mtx = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    auto trans = gko::as<Mtx>(this->mtx5->transpose());
 
-    ASSERT_EQ(coo_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(coo_mtx->get_num_stored_elements(), 4);
-    EXPECT_EQ(r[0], 0);
-    EXPECT_EQ(r[1], 0);
-    EXPECT_EQ(r[2], 0);
-    EXPECT_EQ(r[3], 1);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], 2);
-    EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(v[0], ValueType{1.0});
-    EXPECT_EQ(v[1], ValueType{3.0});
-    EXPECT_EQ(v[2], ValueType{2.0});
-    EXPECT_EQ(v[3], ValueType{5.0});
+    GKO_ASSERT_MTX_NEAR(
+        trans, l<T>({{1.0, -2.0, 2.1}, {-1.0, 2.0, 3.4}, {-0.5, 4.5, 1.2}}),
+        0.0);
 }
 
 
-TYPED_TEST(DenseWithIndexType, ConvertsToCoo)
+TYPED_TEST(Dense, SquareMatrixIsTransposableIntoDense)
 {
-    using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    using Coo = typename gko::matrix::Coo<value_type, index_type>;
-    auto coo_mtx = Coo::create(this->mtx4->get_executor());
+    using Mtx = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    auto trans = Mtx::create(this->exec, this->mtx5->get_size());
 
-    this->mtx4->convert_to(coo_mtx);
+    this->mtx5->transpose(trans);
 
-    assert_coo_eq_mtx4(coo_mtx.get());
+    GKO_ASSERT_MTX_NEAR(
+        trans, l<T>({{1.0, -2.0, 2.1}, {-1.0, 2.0, 3.4}, {-0.5, 4.5, 1.2}}),
+        0.0);
 }
 
 
-TYPED_TEST(DenseWithIndexType, MovesToCoo)
+TYPED_TEST(Dense, SquareSubmatrixIsTransposableIntoDense)
 {
-    using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    using Coo = typename gko::matrix::Coo<value_type, index_type>;
-    auto coo_mtx = Coo::create(this->mtx4->get_executor());
+    using Mtx = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    auto trans = Mtx::create(this->exec, gko::dim<2>{2, 2}, 4);
 
-    this->mtx4->move_to(coo_mtx);
+    this->mtx5->create_submatrix({0, 2}, {0, 2})->transpose(trans);
 
-    assert_coo_eq_mtx4(coo_mtx.get());
+    GKO_ASSERT_MTX_NEAR(trans, l<T>({{1.0, -2.0}, {-1.0, 2.0}}), 0.0);
+    ASSERT_EQ(trans->get_stride(), 4);
 }
 
 
-template <typename ValueType, typename IndexType>
-void assert_csr_eq_mtx4(const gko::matrix::Csr<ValueType, IndexType>* csr_mtx)
+TYPED_TEST(Dense, SquareMatrixIsTransposableIntoDenseFailsForWrongDimensions)
 {
-    auto v = csr_mtx->get_const_values();
-    auto c = csr_mtx->get_const_col_idxs();
-    auto r = csr_mtx->get_const_row_ptrs();
-    ASSERT_EQ(csr_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(csr_mtx->get_num_stored_elements(), 4);
-    EXPECT_EQ(r[0], 0);
-    EXPECT_EQ(r[1], 3);
-    EXPECT_EQ(r[2], 4);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], 2);
-    EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(v[0], ValueType{1.0});
-    EXPECT_EQ(v[1], ValueType{3.0});
-    EXPECT_EQ(v[2], ValueType{2.0});
-    EXPECT_EQ(v[3], ValueType{5.0});
+    using Mtx = typename TestFixture::Mtx;
+
+    ASSERT_THROW(this->mtx5->transpose(Mtx::create(this->exec)),
+                 gko::DimensionMismatch);
 }
 
 
-TYPED_TEST(DenseWithIndexType, ConvertsToCsr)
+TYPED_TEST(Dense, NonSquareMatrixIsTransposable)
 {
-    using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    using Csr = typename gko::matrix::Csr<value_type, index_type>;
-    auto csr_s_classical = std::make_shared<typename Csr::classical>();
-    auto csr_s_merge = std::make_shared<typename Csr::merge_path>();
-    auto csr_mtx_c = Csr::create(this->mtx4->get_executor(), csr_s_classical);
-    auto csr_mtx_m = Csr::create(this->mtx4->get_executor(), csr_s_merge);
-
-    this->mtx4->convert_to(csr_mtx_c);
-    this->mtx4->convert_to(csr_mtx_m);
+    using Mtx = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    auto trans = gko::as<Mtx>(this->mtx4->transpose());
 
-    assert_csr_eq_mtx4(csr_mtx_c.get());
-    ASSERT_EQ(csr_mtx_c->get_strategy()->get_name(), "classical");
-    GKO_ASSERT_MTX_NEAR(csr_mtx_c, csr_mtx_m, 0.0);
-    ASSERT_EQ(csr_mtx_m->get_strategy()->get_name(), "merge_path");
+    GKO_ASSERT_MTX_NEAR(trans, l<T>({{1.0, 0.0}, {3.0, 5.0}, {2.0, 0.0}}), 0.0);
 }
 
 
-TYPED_TEST(DenseWithIndexType, MovesToCsr)
+TYPED_TEST(Dense, NonSquareMatrixIsTransposableIntoDense)
 {
-    using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    using Csr = typename gko::matrix::Csr<value_type, index_type>;
-    auto csr_s_classical = std::make_shared<typename Csr::classical>();
-    auto csr_s_merge = std::make_shared<typename Csr::merge_path>();
-    auto csr_mtx_c = Csr::create(this->mtx4->get_executor(), csr_s_classical);
-    auto csr_mtx_m = Csr::create(this->mtx4->get_executor(), csr_s_merge);
-    auto mtx_clone = this->mtx4->clone();
+    using Mtx = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    auto trans =
+        Mtx::create(this->exec, gko::transpose(this->mtx4->get_size()));
 
-    this->mtx4->move_to(csr_mtx_c);
-    mtx_clone->move_to(csr_mtx_m);
+    this->mtx4->transpose(trans);
 
-    assert_csr_eq_mtx4(csr_mtx_c.get());
-    ASSERT_EQ(csr_mtx_c->get_strategy()->get_name(), "classical");
-    GKO_ASSERT_MTX_NEAR(csr_mtx_c, csr_mtx_m, 0.0);
-    ASSERT_EQ(csr_mtx_m->get_strategy()->get_name(), "merge_path");
+    GKO_ASSERT_MTX_NEAR(trans, l<T>({{1.0, 0.0}, {3.0, 5.0}, {2.0, 0.0}}), 0.0);
 }
 
 
-template <typename ValueType, typename IndexType>
-void assert_sparsity_csr_eq_mtx4(
-    const gko::matrix::SparsityCsr<ValueType, IndexType>* sparsity_csr_mtx)
+TYPED_TEST(Dense, NonSquareSubmatrixIsTransposableIntoDense)
 {
-    auto v = sparsity_csr_mtx->get_const_value();
-    auto c = sparsity_csr_mtx->get_const_col_idxs();
-    auto r = sparsity_csr_mtx->get_const_row_ptrs();
+    using Mtx = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    auto trans = Mtx::create(this->exec, gko::dim<2>{2, 1}, 5);
 
-    ASSERT_EQ(sparsity_csr_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(sparsity_csr_mtx->get_num_nonzeros(), 4);
-    EXPECT_EQ(r[0], 0);
-    EXPECT_EQ(r[1], 3);
-    EXPECT_EQ(r[2], 4);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], 2);
-    EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(v[0], ValueType{1.0});
+    this->mtx4->create_submatrix({0, 1}, {0, 2})->transpose(trans);
+
+    GKO_ASSERT_MTX_NEAR(trans, l({1.0, 3.0}), 0.0);
+    ASSERT_EQ(trans->get_stride(), 5);
 }
 
 
-TYPED_TEST(DenseWithIndexType, ConvertsToSparsityCsr)
+TYPED_TEST(Dense, NonSquareMatrixIsTransposableIntoDenseFailsForWrongDimensions)
 {
-    using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    using SparsityCsr =
-        typename gko::matrix::SparsityCsr<value_type, index_type>;
-    auto sparsity_csr_mtx = SparsityCsr::create(this->mtx4->get_executor());
-
-    this->mtx4->convert_to(sparsity_csr_mtx);
+    using Mtx = typename TestFixture::Mtx;
 
-    assert_sparsity_csr_eq_mtx4(sparsity_csr_mtx.get());
+    ASSERT_THROW(this->mtx4->transpose(Mtx::create(this->exec)),
+                 gko::DimensionMismatch);
 }
 
 
-TYPED_TEST(DenseWithIndexType, MovesToSparsityCsr)
+TYPED_TEST(Dense, ExtractsDiagonalFromSquareMatrix)
 {
-    using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    using SparsityCsr =
-        typename gko::matrix::SparsityCsr<value_type, index_type>;
-    auto sparsity_csr_mtx = SparsityCsr::create(this->mtx4->get_executor());
+    using T = typename TestFixture::value_type;
 
-    this->mtx4->move_to(sparsity_csr_mtx);
+    auto diag = this->mtx5->extract_diagonal();
 
-    assert_sparsity_csr_eq_mtx4(sparsity_csr_mtx.get());
+    ASSERT_EQ(diag->get_size()[0], 3);
+    ASSERT_EQ(diag->get_size()[1], 3);
+    ASSERT_EQ(diag->get_values()[0], T{1.});
+    ASSERT_EQ(diag->get_values()[1], T{2.});
+    ASSERT_EQ(diag->get_values()[2], T{1.2});
 }
 
 
-template <typename ValueType, typename IndexType>
-void assert_ell_eq_mtx6(const gko::matrix::Ell<ValueType, IndexType>* ell_mtx)
+TYPED_TEST(Dense, ExtractsDiagonalFromTallSkinnyMatrix)
 {
-    auto v = ell_mtx->get_const_values();
-    auto c = ell_mtx->get_const_col_idxs();
+    using T = typename TestFixture::value_type;
 
-    ASSERT_EQ(ell_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(ell_mtx->get_num_stored_elements_per_row(), 2);
-    ASSERT_EQ(ell_mtx->get_num_stored_elements(), 4);
-    ASSERT_EQ(ell_mtx->get_stride(), 2);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], 1);
-    EXPECT_EQ(c[3], gko::invalid_index<IndexType>());
-    EXPECT_EQ(v[0], ValueType{1.0});
-    EXPECT_EQ(v[1], ValueType{1.5});
-    EXPECT_EQ(v[2], ValueType{2.0});
-    EXPECT_EQ(v[3], ValueType{0.0});
+    auto diag = this->mtx4->extract_diagonal();
+
+    ASSERT_EQ(diag->get_size()[0], 2);
+    ASSERT_EQ(diag->get_size()[1], 2);
+    ASSERT_EQ(diag->get_values()[0], T{1.});
+    ASSERT_EQ(diag->get_values()[1], T{5.});
 }
 
 
-TYPED_TEST(DenseWithIndexType, ConvertsToEll)
+TYPED_TEST(Dense, ExtractsDiagonalFromShortFatMatrix)
 {
-    using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    using Ell = typename gko::matrix::Ell<value_type, index_type>;
-    auto ell_mtx = Ell::create(this->mtx6->get_executor());
+    using T = typename TestFixture::value_type;
 
-    this->mtx6->convert_to(ell_mtx);
+    auto diag = this->mtx8->extract_diagonal();
 
-    assert_ell_eq_mtx6(ell_mtx.get());
+    ASSERT_EQ(diag->get_size()[0], 2);
+    ASSERT_EQ(diag->get_size()[1], 2);
+    ASSERT_EQ(diag->get_values()[0], T{1.});
+    ASSERT_EQ(diag->get_values()[1], T{2.});
 }
 
 
-TYPED_TEST(DenseWithIndexType, MovesToEll)
+TYPED_TEST(Dense, ExtractsDiagonalFromSquareMatrixIntoDiagonal)
 {
-    using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    using Ell = typename gko::matrix::Ell<value_type, index_type>;
-    auto ell_mtx = Ell::create(this->mtx6->get_executor());
+    using T = typename TestFixture::value_type;
+    auto diag = gko::matrix::Diagonal<T>::create(this->exec, 3);
 
-    this->mtx6->move_to(ell_mtx);
+    this->mtx5->extract_diagonal(diag);
 
-    assert_ell_eq_mtx6(ell_mtx.get());
+    ASSERT_EQ(diag->get_size()[0], 3);
+    ASSERT_EQ(diag->get_size()[1], 3);
+    ASSERT_EQ(diag->get_values()[0], T{1.});
+    ASSERT_EQ(diag->get_values()[1], T{2.});
+    ASSERT_EQ(diag->get_values()[2], T{1.2});
 }
 
 
-template <typename ValueType, typename IndexType>
-void assert_strided_ell_eq_mtx6(
-    const gko::matrix::Ell<ValueType, IndexType>* ell_mtx)
+TYPED_TEST(Dense, ExtractsDiagonalFromTallSkinnyMatrixIntoDiagonal)
 {
-    constexpr auto invalid_index = gko::invalid_index<IndexType>();
-    auto v = ell_mtx->get_const_values();
-    auto c = ell_mtx->get_const_col_idxs();
+    using T = typename TestFixture::value_type;
+    auto diag = gko::matrix::Diagonal<T>::create(this->exec, 2);
 
-    ASSERT_EQ(ell_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(ell_mtx->get_num_stored_elements_per_row(), 2);
-    ASSERT_EQ(ell_mtx->get_num_stored_elements(), 6);
-    ASSERT_EQ(ell_mtx->get_stride(), 3);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], invalid_index);
-    EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(c[4], invalid_index);
-    EXPECT_EQ(c[5], invalid_index);
-    EXPECT_EQ(v[0], ValueType{1.0});
-    EXPECT_EQ(v[1], ValueType{1.5});
-    EXPECT_EQ(v[2], ValueType{0.0});
-    EXPECT_EQ(v[3], ValueType{2.0});
-    EXPECT_EQ(v[4], ValueType{0.0});
-    EXPECT_EQ(v[5], ValueType{0.0});
+    this->mtx4->extract_diagonal(diag);
+
+    ASSERT_EQ(diag->get_size()[0], 2);
+    ASSERT_EQ(diag->get_size()[1], 2);
+    ASSERT_EQ(diag->get_values()[0], T{1.});
+    ASSERT_EQ(diag->get_values()[1], T{5.});
 }
 
 
-TYPED_TEST(DenseWithIndexType, ConvertsToEllWithStride)
+TYPED_TEST(Dense, ExtractsDiagonalFromShortFatMatrixIntoDiagonal)
 {
-    using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    using Ell = typename gko::matrix::Ell<value_type, index_type>;
-    auto ell_mtx =
-        Ell::create(this->mtx6->get_executor(), gko::dim<2>{2, 3}, 2, 3);
+    using T = typename TestFixture::value_type;
+    auto diag = gko::matrix::Diagonal<T>::create(this->exec, 2);
 
-    this->mtx6->convert_to(ell_mtx);
+    this->mtx8->extract_diagonal(diag);
 
-    assert_strided_ell_eq_mtx6(ell_mtx.get());
+    ASSERT_EQ(diag->get_size()[0], 2);
+    ASSERT_EQ(diag->get_size()[1], 2);
+    ASSERT_EQ(diag->get_values()[0], T{1.});
+    ASSERT_EQ(diag->get_values()[1], T{2.});
 }
 
 
-TYPED_TEST(DenseWithIndexType, MovesToEllWithStride)
+TYPED_TEST(Dense, InplaceAbsolute)
 {
-    using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    using Ell = typename gko::matrix::Ell<value_type, index_type>;
-    auto ell_mtx =
-        Ell::create(this->mtx6->get_executor(), gko::dim<2>{2, 3}, 2, 3);
+    using T = typename TestFixture::value_type;
 
-    this->mtx6->move_to(ell_mtx);
+    this->mtx5->compute_absolute_inplace();
 
-    assert_strided_ell_eq_mtx6(ell_mtx.get());
+    GKO_ASSERT_MTX_NEAR(
+        this->mtx5, l<T>({{1.0, 1.0, 0.5}, {2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}}),
+        0.0);
 }
 
 
-template <typename ValueType, typename IndexType>
-void assert_hybrid_auto_eq_mtx4(
-    const gko::matrix::Hybrid<ValueType, IndexType>* hybrid_mtx)
+TYPED_TEST(Dense, InplaceAbsoluteSubMatrix)
 {
-    auto v = hybrid_mtx->get_const_coo_values();
-    auto c = hybrid_mtx->get_const_coo_col_idxs();
-    auto r = hybrid_mtx->get_const_coo_row_idxs();
-    auto n = hybrid_mtx->get_ell_num_stored_elements_per_row();
-    auto p = hybrid_mtx->get_ell_stride();
+    using T = typename TestFixture::value_type;
+    auto mtx = this->mtx5->create_submatrix(gko::span{0, 2}, gko::span{0, 2});
 
-    ASSERT_EQ(hybrid_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(hybrid_mtx->get_ell_num_stored_elements(), 0);
-    ASSERT_EQ(hybrid_mtx->get_coo_num_stored_elements(), 4);
-    EXPECT_EQ(n, 0);
-    EXPECT_EQ(p, 2);
-    EXPECT_EQ(r[0], 0);
-    EXPECT_EQ(r[1], 0);
-    EXPECT_EQ(r[2], 0);
-    EXPECT_EQ(r[3], 1);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], 2);
-    EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(v[0], ValueType{1.0});
-    EXPECT_EQ(v[1], ValueType{3.0});
-    EXPECT_EQ(v[2], ValueType{2.0});
-    EXPECT_EQ(v[3], ValueType{5.0});
+    mtx->compute_absolute_inplace();
+
+    GKO_ASSERT_MTX_NEAR(
+        this->mtx5, l<T>({{1.0, 1.0, -0.5}, {2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}}),
+        0.0);
 }
 
 
-TYPED_TEST(DenseWithIndexType, MovesToHybridAutomatically)
+TYPED_TEST(Dense, OutplaceAbsolute)
 {
-    using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
-    auto hybrid_mtx = Hybrid::create(this->mtx4->get_executor());
+    using T = typename TestFixture::value_type;
 
-    this->mtx4->move_to(hybrid_mtx);
+    auto abs_mtx = this->mtx5->compute_absolute();
 
-    assert_hybrid_auto_eq_mtx4(hybrid_mtx.get());
+    GKO_ASSERT_MTX_NEAR(
+        abs_mtx, l<T>({{1.0, 1.0, 0.5}, {2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}}),
+        0.0);
 }
 
 
-TYPED_TEST(DenseWithIndexType, ConvertsToHybridAutomatically)
+TYPED_TEST(Dense, OutplaceAbsoluteIntoDense)
 {
-    using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
-    auto hybrid_mtx = Hybrid::create(this->mtx4->get_executor());
+    using Mtx = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    auto abs_mtx =
+        gko::remove_complex<Mtx>::create(this->exec, this->mtx5->get_size());
 
-    this->mtx4->convert_to(hybrid_mtx);
+    this->mtx5->compute_absolute(abs_mtx);
 
-    assert_hybrid_auto_eq_mtx4(hybrid_mtx.get());
+    GKO_ASSERT_MTX_NEAR(
+        abs_mtx, l<T>({{1.0, 1.0, 0.5}, {2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}}),
+        0.0);
 }
 
 
-template <typename ValueType, typename IndexType>
-void assert_hybrid_strided_eq_mtx4(
-    const gko::matrix::Hybrid<ValueType, IndexType>* hybrid_mtx)
+TYPED_TEST(Dense, OutplaceAbsoluteSubMatrix)
 {
-    auto v = hybrid_mtx->get_const_coo_values();
-    auto c = hybrid_mtx->get_const_coo_col_idxs();
-    auto r = hybrid_mtx->get_const_coo_row_idxs();
-    auto n = hybrid_mtx->get_ell_num_stored_elements_per_row();
-    auto p = hybrid_mtx->get_ell_stride();
+    using T = typename TestFixture::value_type;
+    auto mtx = this->mtx5->create_submatrix(gko::span{0, 2}, gko::span{0, 2});
 
-    ASSERT_EQ(hybrid_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(hybrid_mtx->get_ell_num_stored_elements(), 0);
-    ASSERT_EQ(hybrid_mtx->get_coo_num_stored_elements(), 4);
-    EXPECT_EQ(n, 0);
-    EXPECT_EQ(p, 3);
-    EXPECT_EQ(r[0], 0);
-    EXPECT_EQ(r[1], 0);
-    EXPECT_EQ(r[2], 0);
-    EXPECT_EQ(r[3], 1);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], 2);
-    EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(v[0], ValueType{1.0});
-    EXPECT_EQ(v[1], ValueType{3.0});
-    EXPECT_EQ(v[2], ValueType{2.0});
-    EXPECT_EQ(v[3], ValueType{5.0});
+    auto abs_mtx = mtx->compute_absolute();
+
+    GKO_ASSERT_MTX_NEAR(abs_mtx, l<T>({{1.0, 1.0}, {2.0, 2.0}}), 0);
+    GKO_ASSERT_EQ(abs_mtx->get_stride(), 2);
 }
 
 
-TYPED_TEST(DenseWithIndexType, MovesToHybridWithStrideAutomatically)
+TYPED_TEST(Dense, OutplaceSubmatrixAbsoluteIntoDense)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    auto mtx = this->mtx5->create_submatrix(gko::span{0, 2}, gko::span{0, 2});
+    auto abs_mtx =
+        gko::remove_complex<Mtx>::create(this->exec, gko::dim<2>{2, 2}, 4);
+
+    mtx->compute_absolute(abs_mtx);
+
+    GKO_ASSERT_MTX_NEAR(abs_mtx, l<T>({{1.0, 1.0}, {2.0, 2.0}}), 0);
+    GKO_ASSERT_EQ(abs_mtx->get_stride(), 4);
+}
+
+
+TYPED_TEST(Dense, AppliesToComplex)
 {
     using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
-    auto hybrid_mtx =
-        Hybrid::create(this->mtx4->get_executor(), gko::dim<2>{2, 3}, 0, 3);
+    using complex_type = gko::to_complex<value_type>;
+    using Vec = gko::matrix::Dense<complex_type>;
+    auto exec = gko::ReferenceExecutor::create();
+    auto b =
+        gko::initialize<Vec>({{complex_type{1.0, 0.0}, complex_type{2.0, 1.0}},
+                              {complex_type{2.0, 2.0}, complex_type{3.0, 3.0}},
+                              {complex_type{3.0, 4.0}, complex_type{4.0, 5.0}}},
+                             exec);
+    auto x = Vec::create(exec, gko::dim<2>{2, 2});
 
-    this->mtx4->move_to(hybrid_mtx);
+    this->mtx1->apply(b, x);
 
-    assert_hybrid_strided_eq_mtx4(hybrid_mtx.get());
+    GKO_ASSERT_MTX_NEAR(
+        x,
+        l({{complex_type{14.0, 16.0}, complex_type{20.0, 22.0}},
+           {complex_type{17.0, 19.0}, complex_type{24.5, 26.5}}}),
+        0.0);
 }
 
 
-TYPED_TEST(DenseWithIndexType, ConvertsToHybridWithStrideAutomatically)
+TYPED_TEST(Dense, AppliesToMixedComplex)
+{
+    using mixed_value_type =
+        gko::next_precision<typename TestFixture::value_type>;
+    using mixed_complex_type = gko::to_complex<mixed_value_type>;
+    using Vec = gko::matrix::Dense<mixed_complex_type>;
+    auto exec = gko::ReferenceExecutor::create();
+    auto b = gko::initialize<Vec>(
+        {{mixed_complex_type{1.0, 0.0}, mixed_complex_type{2.0, 1.0}},
+         {mixed_complex_type{2.0, 2.0}, mixed_complex_type{3.0, 3.0}},
+         {mixed_complex_type{3.0, 4.0}, mixed_complex_type{4.0, 5.0}}},
+        exec);
+    auto x = Vec::create(exec, gko::dim<2>{2, 2});
+
+    this->mtx1->apply(b, x);
+
+    GKO_ASSERT_MTX_NEAR(
+        x,
+        l({{mixed_complex_type{14.0, 16.0}, mixed_complex_type{20.0, 22.0}},
+           {mixed_complex_type{17.0, 19.0}, mixed_complex_type{24.5, 26.5}}}),
+        0.0);
+}
+
+
+TYPED_TEST(Dense, AdvancedAppliesToComplex)
 {
     using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
-    auto hybrid_mtx =
-        Hybrid::create(this->mtx4->get_executor(), gko::dim<2>{2, 3}, 0, 3);
+    using complex_type = gko::to_complex<value_type>;
+    using Dense = gko::matrix::Dense<value_type>;
+    using DenseComplex = gko::matrix::Dense<complex_type>;
+    auto exec = gko::ReferenceExecutor::create();
 
-    this->mtx4->convert_to(hybrid_mtx);
+    auto b = gko::initialize<DenseComplex>(
+        {{complex_type{1.0, 0.0}, complex_type{2.0, 1.0}},
+         {complex_type{2.0, 2.0}, complex_type{3.0, 3.0}},
+         {complex_type{3.0, 4.0}, complex_type{4.0, 5.0}}},
+        exec);
+    auto x = gko::initialize<DenseComplex>(
+        {{complex_type{1.0, 0.0}, complex_type{2.0, 1.0}},
+         {complex_type{2.0, 2.0}, complex_type{3.0, 3.0}}},
+        exec);
+    auto alpha = gko::initialize<Dense>({-1.0}, this->exec);
+    auto beta = gko::initialize<Dense>({2.0}, this->exec);
 
-    assert_hybrid_strided_eq_mtx4(hybrid_mtx.get());
+    this->mtx1->apply(alpha, b, beta, x);
+
+    GKO_ASSERT_MTX_NEAR(
+        x,
+        l({{complex_type{-12.0, -16.0}, complex_type{-16.0, -20.0}},
+           {complex_type{-13.0, -15.0}, complex_type{-18.5, -20.5}}}),
+        0.0);
 }
 
 
-template <typename ValueType, typename IndexType>
-void assert_hybrid_limited_eq_mtx4(
-    const gko::matrix::Hybrid<ValueType, IndexType>* hybrid_mtx)
+TYPED_TEST(Dense, AdvancedAppliesToMixedComplex)
 {
-    constexpr auto invalid_index = gko::invalid_index<IndexType>();
-    auto v = hybrid_mtx->get_const_ell_values();
-    auto c = hybrid_mtx->get_const_ell_col_idxs();
-    auto n = hybrid_mtx->get_ell_num_stored_elements_per_row();
-    auto p = hybrid_mtx->get_ell_stride();
+    using mixed_value_type =
+        gko::next_precision<typename TestFixture::value_type>;
+    using mixed_complex_type = gko::to_complex<mixed_value_type>;
+    using MixedDense = gko::matrix::Dense<mixed_value_type>;
+    using MixedDenseComplex = gko::matrix::Dense<mixed_complex_type>;
+    auto exec = gko::ReferenceExecutor::create();
 
-    ASSERT_EQ(hybrid_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(hybrid_mtx->get_ell_num_stored_elements(), 6);
-    ASSERT_EQ(hybrid_mtx->get_coo_num_stored_elements(), 1);
-    EXPECT_EQ(n, 2);
-    EXPECT_EQ(p, 3);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], invalid_index);
-    EXPECT_EQ(c[3], 1);
-    EXPECT_EQ(c[4], invalid_index);
-    EXPECT_EQ(c[5], invalid_index);
-    EXPECT_EQ(v[0], ValueType{1.0});
-    EXPECT_EQ(v[1], ValueType{5.0});
-    EXPECT_EQ(v[2], ValueType{0.0});
-    EXPECT_EQ(v[3], ValueType{3.0});
-    EXPECT_EQ(v[4], ValueType{0.0});
-    EXPECT_EQ(v[5], ValueType{0.0});
-    EXPECT_EQ(hybrid_mtx->get_const_coo_values()[0], ValueType{2.0});
-    EXPECT_EQ(hybrid_mtx->get_const_coo_row_idxs()[0], 0);
-    EXPECT_EQ(hybrid_mtx->get_const_coo_col_idxs()[0], 2);
+    auto b = gko::initialize<MixedDenseComplex>(
+        {{mixed_complex_type{1.0, 0.0}, mixed_complex_type{2.0, 1.0}},
+         {mixed_complex_type{2.0, 2.0}, mixed_complex_type{3.0, 3.0}},
+         {mixed_complex_type{3.0, 4.0}, mixed_complex_type{4.0, 5.0}}},
+        exec);
+    auto x = gko::initialize<MixedDenseComplex>(
+        {{mixed_complex_type{1.0, 0.0}, mixed_complex_type{2.0, 1.0}},
+         {mixed_complex_type{2.0, 2.0}, mixed_complex_type{3.0, 3.0}}},
+        exec);
+    auto alpha = gko::initialize<MixedDense>({-1.0}, this->exec);
+    auto beta = gko::initialize<MixedDense>({2.0}, this->exec);
+
+    this->mtx1->apply(alpha, b, beta, x);
+
+    GKO_ASSERT_MTX_NEAR(
+        x,
+        l({{mixed_complex_type{-12.0, -16.0}, mixed_complex_type{-16.0, -20.0}},
+           {mixed_complex_type{-13.0, -15.0},
+            mixed_complex_type{-18.5, -20.5}}}),
+        0.0);
 }
 
 
-TYPED_TEST(DenseWithIndexType, MovesToHybridWithStrideAndCooLengthByColumns2)
+TYPED_TEST(Dense, MakeComplex)
 {
-    using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
-    auto hybrid_mtx =
-        Hybrid::create(this->mtx4->get_executor(), gko::dim<2>{2, 3}, 2, 3, 3,
-                       std::make_shared<typename Hybrid::column_limit>(2));
+    using T = typename TestFixture::value_type;
 
-    this->mtx4->move_to(hybrid_mtx);
+    auto complex_mtx = this->mtx5->make_complex();
 
-    assert_hybrid_limited_eq_mtx4(hybrid_mtx.get());
+    GKO_ASSERT_MTX_NEAR(complex_mtx, this->mtx5, 0.0);
 }
 
 
-TYPED_TEST(DenseWithIndexType, ConvertsToHybridWithStrideAndCooLengthByColumns2)
+TYPED_TEST(Dense, MakeComplexIntoDense)
 {
-    using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
-    auto hybrid_mtx =
-        Hybrid::create(this->mtx4->get_executor(), gko::dim<2>{2, 3}, 2, 3, 3,
-                       std::make_shared<typename Hybrid::column_limit>(2));
+    using T = typename TestFixture::value_type;
+    using ComplexMtx = typename TestFixture::ComplexMtx;
+    auto exec = this->mtx5->get_executor();
 
-    this->mtx4->convert_to(hybrid_mtx);
+    auto complex_mtx = ComplexMtx::create(exec, this->mtx5->get_size());
+    this->mtx5->make_complex(complex_mtx);
 
-    assert_hybrid_limited_eq_mtx4(hybrid_mtx.get());
+    GKO_ASSERT_MTX_NEAR(complex_mtx, this->mtx5, 0.0);
 }
 
 
-template <typename ValueType, typename IndexType>
-void assert_hybrid_percent_eq_mtx4(
-    const gko::matrix::Hybrid<ValueType, IndexType>* hybrid_mtx)
+TYPED_TEST(Dense, MakeComplexIntoDenseFailsForWrongDimensions)
 {
-    auto v = hybrid_mtx->get_const_ell_values();
-    auto c = hybrid_mtx->get_const_ell_col_idxs();
-    auto n = hybrid_mtx->get_ell_num_stored_elements_per_row();
-    auto p = hybrid_mtx->get_ell_stride();
-    auto coo_v = hybrid_mtx->get_const_coo_values();
-    auto coo_c = hybrid_mtx->get_const_coo_col_idxs();
-    auto coo_r = hybrid_mtx->get_const_coo_row_idxs();
+    using T = typename TestFixture::value_type;
+    using ComplexMtx = typename TestFixture::ComplexMtx;
+    auto exec = this->mtx5->get_executor();
 
-    ASSERT_EQ(hybrid_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(hybrid_mtx->get_ell_num_stored_elements(), 3);
-    EXPECT_EQ(n, 1);
-    EXPECT_EQ(p, 3);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], gko::invalid_index<IndexType>());
-    EXPECT_EQ(v[0], ValueType{1.0});
-    EXPECT_EQ(v[1], ValueType{5.0});
-    EXPECT_EQ(v[2], ValueType{0.0});
-    ASSERT_EQ(hybrid_mtx->get_coo_num_stored_elements(), 2);
-    EXPECT_EQ(coo_v[0], ValueType{3.0});
-    EXPECT_EQ(coo_v[1], ValueType{2.0});
-    EXPECT_EQ(coo_c[0], 1);
-    EXPECT_EQ(coo_c[1], 2);
-    EXPECT_EQ(coo_r[0], 0);
-    EXPECT_EQ(coo_r[1], 0);
+    auto complex_mtx = ComplexMtx::create(exec);
+
+    ASSERT_THROW(this->mtx5->make_complex(complex_mtx), gko::DimensionMismatch);
 }
 
 
-TYPED_TEST(DenseWithIndexType, MovesToHybridWithStrideByPercent40)
+TYPED_TEST(Dense, GetReal)
 {
-    using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
-    auto hybrid_mtx =
-        Hybrid::create(this->mtx4->get_executor(), gko::dim<2>{2, 3}, 1, 3,
-                       std::make_shared<typename Hybrid::imbalance_limit>(0.4));
+    using T = typename TestFixture::value_type;
 
-    this->mtx4->move_to(hybrid_mtx);
+    auto real_mtx = this->mtx5->get_real();
 
-    assert_hybrid_percent_eq_mtx4(hybrid_mtx.get());
+    GKO_ASSERT_MTX_NEAR(real_mtx, this->mtx5, 0.0);
 }
 
 
-TYPED_TEST(DenseWithIndexType, ConvertsToHybridWithStrideByPercent40)
+TYPED_TEST(Dense, GetRealIntoDense)
 {
-    using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
-    auto hybrid_mtx =
-        Hybrid::create(this->mtx4->get_executor(), gko::dim<2>{2, 3}, 1, 3,
-                       std::make_shared<typename Hybrid::imbalance_limit>(0.4));
+    using T = typename TestFixture::value_type;
+    using RealMtx = typename TestFixture::RealMtx;
+    auto exec = this->mtx5->get_executor();
 
-    this->mtx4->convert_to(hybrid_mtx);
+    auto real_mtx = RealMtx::create(exec, this->mtx5->get_size());
+    this->mtx5->get_real(real_mtx);
 
-    assert_hybrid_percent_eq_mtx4(hybrid_mtx.get());
+    GKO_ASSERT_MTX_NEAR(real_mtx, this->mtx5, 0.0);
 }
 
 
-template <typename ValueType, typename IndexType>
-void assert_sellp_eq_mtx7(
-    const gko::matrix::Sellp<ValueType, IndexType>* sellp_mtx)
+TYPED_TEST(Dense, GetRealIntoDenseFailsForWrongDimensions)
 {
-    constexpr auto invalid_index = gko::invalid_index<IndexType>();
-    auto v = sellp_mtx->get_const_values();
-    auto c = sellp_mtx->get_const_col_idxs();
-    auto s = sellp_mtx->get_const_slice_sets();
-    auto l = sellp_mtx->get_const_slice_lengths();
+    using T = typename TestFixture::value_type;
+    using RealMtx = typename TestFixture::RealMtx;
+    auto exec = this->mtx5->get_executor();
 
-    ASSERT_EQ(sellp_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(sellp_mtx->get_total_cols(), 3);
-    ASSERT_EQ(sellp_mtx->get_num_stored_elements(),
-              3 * gko::matrix::default_slice_size);
-    ASSERT_EQ(sellp_mtx->get_slice_size(), gko::matrix::default_slice_size);
-    ASSERT_EQ(sellp_mtx->get_stride_factor(),
-              gko::matrix::default_stride_factor);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[gko::matrix::default_slice_size], 1);
-    EXPECT_EQ(c[gko::matrix::default_slice_size + 1], invalid_index);
-    EXPECT_EQ(c[2 * gko::matrix::default_slice_size], 2);
-    EXPECT_EQ(c[2 * gko::matrix::default_slice_size + 1], invalid_index);
-    EXPECT_EQ(v[0], ValueType{1.0});
-    EXPECT_EQ(v[1], ValueType{1.5});
-    EXPECT_EQ(v[gko::matrix::default_slice_size], ValueType{2.0});
-    EXPECT_EQ(v[gko::matrix::default_slice_size + 1], ValueType{0.0});
-    EXPECT_EQ(v[2 * gko::matrix::default_slice_size], ValueType{3.0});
-    EXPECT_EQ(v[2 * gko::matrix::default_slice_size + 1], ValueType{0.0});
-    EXPECT_EQ(s[0], 0);
-    EXPECT_EQ(s[1], 3);
-    EXPECT_EQ(l[0], 3);
+    auto real_mtx = RealMtx::create(exec);
+    ASSERT_THROW(this->mtx5->get_real(real_mtx), gko::DimensionMismatch);
 }
 
 
-TYPED_TEST(DenseWithIndexType, ConvertsToSellp)
+TYPED_TEST(Dense, GetImag)
 {
-    using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    using Sellp = typename gko::matrix::Sellp<value_type, index_type>;
-    auto sellp_mtx = Sellp::create(this->mtx7->get_executor());
+    using T = typename TestFixture::value_type;
 
-    this->mtx7->convert_to(sellp_mtx);
+    auto imag_mtx = this->mtx5->get_imag();
 
-    assert_sellp_eq_mtx7(sellp_mtx.get());
+    GKO_ASSERT_MTX_NEAR(
+        imag_mtx, l<T>({{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}),
+        0.0);
 }
 
 
-TYPED_TEST(DenseWithIndexType, MovesToSellp)
+TYPED_TEST(Dense, GetImagIntoDense)
 {
-    using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    using Sellp = typename gko::matrix::Sellp<value_type, index_type>;
-    auto sellp_mtx = Sellp::create(this->mtx7->get_executor());
+    using T = typename TestFixture::value_type;
+    using RealMtx = typename TestFixture::RealMtx;
+    auto exec = this->mtx5->get_executor();
 
-    this->mtx7->move_to(sellp_mtx);
+    auto imag_mtx = RealMtx::create(exec, this->mtx5->get_size());
+    this->mtx5->get_imag(imag_mtx);
 
-    assert_sellp_eq_mtx7(sellp_mtx.get());
+    GKO_ASSERT_MTX_NEAR(
+        imag_mtx, l<T>({{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}),
+        0.0);
 }
 
 
-template <typename ValueType, typename IndexType>
-void assert_sellp_strided_eq_mtx7(
-    const gko::matrix::Sellp<ValueType, IndexType>* sellp_mtx)
+TYPED_TEST(Dense, GetImagIntoDenseFailsForWrongDimensions)
 {
-    constexpr auto invalid_index = gko::invalid_index<IndexType>();
-    auto v = sellp_mtx->get_const_values();
-    auto c = sellp_mtx->get_const_col_idxs();
-    auto s = sellp_mtx->get_const_slice_sets();
-    auto l = sellp_mtx->get_const_slice_lengths();
+    using T = typename TestFixture::value_type;
+    using RealMtx = typename TestFixture::RealMtx;
+    auto exec = this->mtx5->get_executor();
 
-    ASSERT_EQ(sellp_mtx->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(sellp_mtx->get_total_cols(), 4);
-    ASSERT_EQ(sellp_mtx->get_num_stored_elements(), 8);
-    ASSERT_EQ(sellp_mtx->get_slice_size(), 2);
-    ASSERT_EQ(sellp_mtx->get_stride_factor(), 2);
-    EXPECT_EQ(c[0], 0);
-    EXPECT_EQ(c[1], 1);
-    EXPECT_EQ(c[2], 1);
-    EXPECT_EQ(c[3], invalid_index);
-    EXPECT_EQ(c[4], 2);
-    EXPECT_EQ(c[5], invalid_index);
-    EXPECT_EQ(c[6], invalid_index);
-    EXPECT_EQ(c[7], invalid_index);
-    EXPECT_EQ(v[0], ValueType{1.0});
-    EXPECT_EQ(v[1], ValueType{1.5});
-    EXPECT_EQ(v[2], ValueType{2.0});
-    EXPECT_EQ(v[3], ValueType{0.0});
-    EXPECT_EQ(v[4], ValueType{3.0});
-    EXPECT_EQ(v[5], ValueType{0.0});
-    EXPECT_EQ(v[6], ValueType{0.0});
-    EXPECT_EQ(v[7], ValueType{0.0});
-    EXPECT_EQ(s[0], 0);
-    EXPECT_EQ(s[1], 4);
-    EXPECT_EQ(l[0], 4);
+    auto imag_mtx = RealMtx::create(exec);
+    ASSERT_THROW(this->mtx5->get_imag(imag_mtx), gko::DimensionMismatch);
 }
 
 
-TYPED_TEST(DenseWithIndexType, ConvertsToSellpWithSliceSizeAndStrideFactor)
+TYPED_TEST(Dense, MakeTemporaryConversionDoesntConvertOnMatch)
 {
-    using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    using Sellp = typename gko::matrix::Sellp<value_type, index_type>;
-    auto sellp_mtx =
-        Sellp::create(this->mtx7->get_executor(), gko::dim<2>{}, 2, 2, 0);
-
-    this->mtx7->convert_to(sellp_mtx);
+    using Mtx = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    auto alpha = gko::initialize<Mtx>({8.0}, this->exec);
 
-    assert_sellp_strided_eq_mtx7(sellp_mtx.get());
+    ASSERT_EQ(gko::make_temporary_conversion<T>(alpha).get(), alpha.get());
 }
 
 
-TYPED_TEST(DenseWithIndexType, MovesToSellpWithSliceSizeAndStrideFactor)
+TYPED_TEST(Dense, MakeTemporaryConversionConvertsBack)
 {
-    using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    using Sellp = typename gko::matrix::Sellp<value_type, index_type>;
-    auto sellp_mtx =
-        Sellp::create(this->mtx7->get_executor(), gko::dim<2>{}, 2, 2, 0);
+    using MixedMtx = typename TestFixture::MixedMtx;
+    using T = typename TestFixture::value_type;
+    using MixedT = typename MixedMtx::value_type;
+    auto alpha = gko::initialize<MixedMtx>({8.0}, this->exec);
 
-    this->mtx7->move_to(sellp_mtx);
+    {
+        auto conversion = gko::make_temporary_conversion<T>(alpha);
+        conversion->at(0, 0) = T{7.0};
+    }
 
-    assert_sellp_strided_eq_mtx7(sellp_mtx.get());
+    ASSERT_EQ(alpha->at(0, 0), MixedT{7.0});
 }
 
 
-TYPED_TEST(DenseWithIndexType, ConvertsToAndFromSellpWithMoreThanOneSlice)
+TYPED_TEST(Dense, MakeTemporaryConversionConstDoesntConvertBack)
 {
-    using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    using Mtx = typename TestFixture::Mtx;
-    using Sellp = typename gko::matrix::Sellp<value_type, index_type>;
-    auto x = this->template gen_mtx<Mtx>(65, 25);
+    using MixedMtx = typename TestFixture::MixedMtx;
+    using T = typename TestFixture::value_type;
+    using MixedT = typename MixedMtx::value_type;
+    auto alpha = gko::initialize<MixedMtx>({8.0}, this->exec);
 
-    auto sellp_mtx = Sellp::create(this->exec);
-    auto dense_mtx = Mtx::create(this->exec);
-    x->convert_to(sellp_mtx);
-    sellp_mtx->convert_to(dense_mtx);
+    {
+        auto conversion = gko::make_temporary_conversion<T>(
+            static_cast<const MixedMtx*>(alpha.get()));
+        alpha->at(0, 0) = MixedT{7.0};
+    }
 
-    GKO_ASSERT_MTX_NEAR(dense_mtx, x, 0.0);
+    ASSERT_EQ(alpha->at(0, 0), MixedT{7.0});
 }
 
 
-TYPED_TEST(Dense, ConvertsEmptyToPrecision)
+TYPED_TEST(Dense, ScaleAddIdentityRectangular)
 {
-    using Dense = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
-    using OtherT = typename gko::next_precision<T>;
-    using OtherDense = typename gko::matrix::Dense<OtherT>;
-    auto empty = OtherDense::create(this->exec);
-    auto res = Dense::create(this->exec);
+    using Vec = typename TestFixture::Mtx;
+    using MixedVec = typename TestFixture::MixedMtx;
+    auto alpha = gko::initialize<Vec>({2.0}, this->exec);
+    auto beta = gko::initialize<Vec>({-1.0}, this->exec);
+    auto b = gko::initialize<Vec>(
+        {I<T>{2.0, 0.0}, I<T>{1.0, 2.5}, I<T>{0.0, -4.0}}, this->exec);
 
-    empty->convert_to(res);
+    b->add_scaled_identity(alpha, beta);
 
-    ASSERT_FALSE(res->get_size());
+    GKO_ASSERT_MTX_NEAR(b, l({{0.0, 0.0}, {-1.0, -0.5}, {0.0, 4.0}}), 0.0);
 }
 
 
-TYPED_TEST(Dense, MovesEmptyToPrecision)
-{
-    using Dense = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    using OtherT = typename gko::next_precision<T>;
-    using OtherDense = typename gko::matrix::Dense<OtherT>;
-    auto empty = OtherDense::create(this->exec);
-    auto res = Dense::create(this->exec);
+template <typename ValueIndexType>
+class DenseWithIndexType
+    : public Dense<
+          typename std::tuple_element<0, decltype(ValueIndexType())>::type> {
+public:
+    using value_type =
+        typename std::tuple_element<0, decltype(ValueIndexType())>::type;
+    using index_type =
+        typename std::tuple_element<1, decltype(ValueIndexType())>::type;
 
-    empty->move_to(res);
+    index_type invalid_index = gko::invalid_index<index_type>();
+};
 
-    ASSERT_FALSE(res->get_size());
-}
+TYPED_TEST_SUITE(DenseWithIndexType, gko::test::ValueIndexTypes,
+                 PairTypenameNameGenerator);
 
 
-TYPED_TEST(DenseWithIndexType, ConvertsEmptyToCoo)
+template <typename ValueType, typename IndexType>
+void assert_coo_eq_mtx4(const gko::matrix::Coo<ValueType, IndexType>* coo_mtx)
 {
-    using Dense = typename TestFixture::Mtx;
-    using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    using Coo = typename gko::matrix::Coo<value_type, index_type>;
-    auto empty = Dense::create(this->exec);
-    auto res = Coo::create(this->exec);
-
-    empty->convert_to(res);
+    auto v = coo_mtx->get_const_values();
+    auto c = coo_mtx->get_const_col_idxs();
+    auto r = coo_mtx->get_const_row_idxs();
 
-    ASSERT_EQ(res->get_num_stored_elements(), 0);
-    ASSERT_FALSE(res->get_size());
+    ASSERT_EQ(coo_mtx->get_size(), gko::dim<2>(2, 3));
+    ASSERT_EQ(coo_mtx->get_num_stored_elements(), 4);
+    EXPECT_EQ(r[0], 0);
+    EXPECT_EQ(r[1], 0);
+    EXPECT_EQ(r[2], 0);
+    EXPECT_EQ(r[3], 1);
+    EXPECT_EQ(c[0], 0);
+    EXPECT_EQ(c[1], 1);
+    EXPECT_EQ(c[2], 2);
+    EXPECT_EQ(c[3], 1);
+    EXPECT_EQ(v[0], ValueType{1.0});
+    EXPECT_EQ(v[1], ValueType{3.0});
+    EXPECT_EQ(v[2], ValueType{2.0});
+    EXPECT_EQ(v[3], ValueType{5.0});
 }
 
 
-TYPED_TEST(DenseWithIndexType, MovesEmptyToCoo)
+TYPED_TEST(DenseWithIndexType, ConvertsToCoo)
 {
-    using Dense = typename TestFixture::Mtx;
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
     using Coo = typename gko::matrix::Coo<value_type, index_type>;
-    auto empty = Dense::create(this->exec);
-    auto res = Coo::create(this->exec);
+    auto coo_mtx = Coo::create(this->mtx4->get_executor());
 
-    empty->move_to(res);
+    this->mtx4->convert_to(coo_mtx);
 
-    ASSERT_EQ(res->get_num_stored_elements(), 0);
-    ASSERT_FALSE(res->get_size());
+    assert_coo_eq_mtx4(coo_mtx.get());
 }
 
 
-TYPED_TEST(DenseWithIndexType, ConvertsEmptyMatrixToCsr)
+TYPED_TEST(DenseWithIndexType, MovesToCoo)
 {
-    using Dense = typename TestFixture::Mtx;
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    using Csr = typename gko::matrix::Csr<value_type, index_type>;
-    auto empty = Dense::create(this->exec);
-    auto res = Csr::create(this->exec);
+    using Coo = typename gko::matrix::Coo<value_type, index_type>;
+    auto coo_mtx = Coo::create(this->mtx4->get_executor());
 
-    empty->convert_to(res);
+    this->mtx4->move_to(coo_mtx);
 
-    ASSERT_EQ(res->get_num_stored_elements(), 0);
-    ASSERT_EQ(*res->get_const_row_ptrs(), 0);
-    ASSERT_FALSE(res->get_size());
+    assert_coo_eq_mtx4(coo_mtx.get());
 }
 
 
-TYPED_TEST(DenseWithIndexType, MovesEmptyMatrixToCsr)
+template <typename ValueType, typename IndexType>
+void assert_csr_eq_mtx4(const gko::matrix::Csr<ValueType, IndexType>* csr_mtx)
 {
-    using Dense = typename TestFixture::Mtx;
-    using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    using Csr = typename gko::matrix::Csr<value_type, index_type>;
-    auto empty = Dense::create(this->exec);
-    auto res = Csr::create(this->exec);
-
-    empty->move_to(res);
-
-    ASSERT_EQ(res->get_num_stored_elements(), 0);
-    ASSERT_EQ(*res->get_const_row_ptrs(), 0);
-    ASSERT_FALSE(res->get_size());
+    auto v = csr_mtx->get_const_values();
+    auto c = csr_mtx->get_const_col_idxs();
+    auto r = csr_mtx->get_const_row_ptrs();
+    ASSERT_EQ(csr_mtx->get_size(), gko::dim<2>(2, 3));
+    ASSERT_EQ(csr_mtx->get_num_stored_elements(), 4);
+    EXPECT_EQ(r[0], 0);
+    EXPECT_EQ(r[1], 3);
+    EXPECT_EQ(r[2], 4);
+    EXPECT_EQ(c[0], 0);
+    EXPECT_EQ(c[1], 1);
+    EXPECT_EQ(c[2], 2);
+    EXPECT_EQ(c[3], 1);
+    EXPECT_EQ(v[0], ValueType{1.0});
+    EXPECT_EQ(v[1], ValueType{3.0});
+    EXPECT_EQ(v[2], ValueType{2.0});
+    EXPECT_EQ(v[3], ValueType{5.0});
 }
 
 
-TYPED_TEST(DenseWithIndexType, ConvertsEmptyToSparsityCsr)
+TYPED_TEST(DenseWithIndexType, ConvertsToCsr)
 {
-    using Dense = typename TestFixture::Mtx;
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    using SparsityCsr =
-        typename gko::matrix::SparsityCsr<value_type, index_type>;
-    auto empty = Dense::create(this->exec);
-    auto res = SparsityCsr::create(this->exec);
+    using Csr = typename gko::matrix::Csr<value_type, index_type>;
+    auto csr_s_classical = std::make_shared<typename Csr::classical>();
+    auto csr_s_merge = std::make_shared<typename Csr::merge_path>();
+    auto csr_mtx_c = Csr::create(this->mtx4->get_executor(), csr_s_classical);
+    auto csr_mtx_m = Csr::create(this->mtx4->get_executor(), csr_s_merge);
 
-    empty->convert_to(res);
+    this->mtx4->convert_to(csr_mtx_c);
+    this->mtx4->convert_to(csr_mtx_m);
 
-    ASSERT_EQ(res->get_num_nonzeros(), 0);
-    ASSERT_EQ(*res->get_const_row_ptrs(), 0);
-    ASSERT_FALSE(res->get_size());
+    assert_csr_eq_mtx4(csr_mtx_c.get());
+    ASSERT_EQ(csr_mtx_c->get_strategy()->get_name(), "classical");
+    GKO_ASSERT_MTX_NEAR(csr_mtx_c, csr_mtx_m, 0.0);
+    ASSERT_EQ(csr_mtx_m->get_strategy()->get_name(), "merge_path");
 }
 
 
-TYPED_TEST(DenseWithIndexType, MovesEmptyToSparsityCsr)
+TYPED_TEST(DenseWithIndexType, MovesToCsr)
 {
-    using Dense = typename TestFixture::Mtx;
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    using SparsityCsr =
-        typename gko::matrix::SparsityCsr<value_type, index_type>;
-    auto empty = Dense::create(this->exec);
-    auto res = SparsityCsr::create(this->exec);
+    using Csr = typename gko::matrix::Csr<value_type, index_type>;
+    auto csr_s_classical = std::make_shared<typename Csr::classical>();
+    auto csr_s_merge = std::make_shared<typename Csr::merge_path>();
+    auto csr_mtx_c = Csr::create(this->mtx4->get_executor(), csr_s_classical);
+    auto csr_mtx_m = Csr::create(this->mtx4->get_executor(), csr_s_merge);
+    auto mtx_clone = this->mtx4->clone();
 
-    empty->move_to(res);
+    this->mtx4->move_to(csr_mtx_c);
+    mtx_clone->move_to(csr_mtx_m);
 
-    ASSERT_EQ(res->get_num_nonzeros(), 0);
-    ASSERT_EQ(*res->get_const_row_ptrs(), 0);
-    ASSERT_FALSE(res->get_size());
+    assert_csr_eq_mtx4(csr_mtx_c.get());
+    ASSERT_EQ(csr_mtx_c->get_strategy()->get_name(), "classical");
+    GKO_ASSERT_MTX_NEAR(csr_mtx_c, csr_mtx_m, 0.0);
+    ASSERT_EQ(csr_mtx_m->get_strategy()->get_name(), "merge_path");
 }
 
 
-TYPED_TEST(DenseWithIndexType, ConvertsEmptyToEll)
+template <typename ValueType, typename IndexType>
+void assert_sparsity_csr_eq_mtx4(
+    const gko::matrix::SparsityCsr<ValueType, IndexType>* sparsity_csr_mtx)
 {
-    using Dense = typename TestFixture::Mtx;
-    using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    using Ell = typename gko::matrix::Ell<value_type, index_type>;
-    auto empty = Dense::create(this->exec);
-    auto res = Ell::create(this->exec);
-
-    empty->convert_to(res);
+    auto v = sparsity_csr_mtx->get_const_value();
+    auto c = sparsity_csr_mtx->get_const_col_idxs();
+    auto r = sparsity_csr_mtx->get_const_row_ptrs();
 
-    ASSERT_EQ(res->get_num_stored_elements(), 0);
-    ASSERT_FALSE(res->get_size());
+    ASSERT_EQ(sparsity_csr_mtx->get_size(), gko::dim<2>(2, 3));
+    ASSERT_EQ(sparsity_csr_mtx->get_num_nonzeros(), 4);
+    EXPECT_EQ(r[0], 0);
+    EXPECT_EQ(r[1], 3);
+    EXPECT_EQ(r[2], 4);
+    EXPECT_EQ(c[0], 0);
+    EXPECT_EQ(c[1], 1);
+    EXPECT_EQ(c[2], 2);
+    EXPECT_EQ(c[3], 1);
+    EXPECT_EQ(v[0], ValueType{1.0});
 }
 
 
-TYPED_TEST(DenseWithIndexType, MovesEmptyToEll)
+TYPED_TEST(DenseWithIndexType, ConvertsToSparsityCsr)
 {
-    using Dense = typename TestFixture::Mtx;
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    using Ell = typename gko::matrix::Ell<value_type, index_type>;
-    auto empty = Dense::create(this->exec);
-    auto res = Ell::create(this->exec);
+    using SparsityCsr =
+        typename gko::matrix::SparsityCsr<value_type, index_type>;
+    auto sparsity_csr_mtx = SparsityCsr::create(this->mtx4->get_executor());
 
-    empty->move_to(res);
+    this->mtx4->convert_to(sparsity_csr_mtx);
 
-    ASSERT_EQ(res->get_num_stored_elements(), 0);
-    ASSERT_FALSE(res->get_size());
+    assert_sparsity_csr_eq_mtx4(sparsity_csr_mtx.get());
 }
 
 
-TYPED_TEST(DenseWithIndexType, ConvertsEmptyToHybrid)
+TYPED_TEST(DenseWithIndexType, MovesToSparsityCsr)
 {
-    using Dense = typename TestFixture::Mtx;
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
-    auto empty = Dense::create(this->exec);
-    auto res = Hybrid::create(this->exec);
+    using SparsityCsr =
+        typename gko::matrix::SparsityCsr<value_type, index_type>;
+    auto sparsity_csr_mtx = SparsityCsr::create(this->mtx4->get_executor());
 
-    empty->convert_to(res);
+    this->mtx4->move_to(sparsity_csr_mtx);
 
-    ASSERT_EQ(res->get_num_stored_elements(), 0);
-    ASSERT_FALSE(res->get_size());
+    assert_sparsity_csr_eq_mtx4(sparsity_csr_mtx.get());
 }
 
 
-TYPED_TEST(DenseWithIndexType, MovesEmptyToHybrid)
+template <typename ValueType, typename IndexType>
+void assert_ell_eq_mtx6(const gko::matrix::Ell<ValueType, IndexType>* ell_mtx)
 {
-    using Dense = typename TestFixture::Mtx;
-    using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
-    auto empty = Dense::create(this->exec);
-    auto res = Hybrid::create(this->exec);
-
-    empty->move_to(res);
+    auto v = ell_mtx->get_const_values();
+    auto c = ell_mtx->get_const_col_idxs();
 
-    ASSERT_EQ(res->get_num_stored_elements(), 0);
-    ASSERT_FALSE(res->get_size());
+    ASSERT_EQ(ell_mtx->get_size(), gko::dim<2>(2, 3));
+    ASSERT_EQ(ell_mtx->get_num_stored_elements_per_row(), 2);
+    ASSERT_EQ(ell_mtx->get_num_stored_elements(), 4);
+    ASSERT_EQ(ell_mtx->get_stride(), 2);
+    EXPECT_EQ(c[0], 0);
+    EXPECT_EQ(c[1], 1);
+    EXPECT_EQ(c[2], 1);
+    EXPECT_EQ(c[3], gko::invalid_index<IndexType>());
+    EXPECT_EQ(v[0], ValueType{1.0});
+    EXPECT_EQ(v[1], ValueType{1.5});
+    EXPECT_EQ(v[2], ValueType{2.0});
+    EXPECT_EQ(v[3], ValueType{0.0});
 }
 
 
-TYPED_TEST(DenseWithIndexType, ConvertsEmptyToSellp)
+TYPED_TEST(DenseWithIndexType, ConvertsToEll)
 {
-    using Dense = typename TestFixture::Mtx;
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    using Sellp = typename gko::matrix::Sellp<value_type, index_type>;
-    auto empty = Dense::create(this->exec);
-    auto res = Sellp::create(this->exec);
+    using Ell = typename gko::matrix::Ell<value_type, index_type>;
+    auto ell_mtx = Ell::create(this->mtx6->get_executor());
 
-    empty->convert_to(res);
+    this->mtx6->convert_to(ell_mtx);
 
-    ASSERT_EQ(res->get_num_stored_elements(), 0);
-    ASSERT_EQ(*res->get_const_slice_sets(), 0);
-    ASSERT_FALSE(res->get_size());
+    assert_ell_eq_mtx6(ell_mtx.get());
 }
 
 
-TYPED_TEST(DenseWithIndexType, MovesEmptyToSellp)
+TYPED_TEST(DenseWithIndexType, MovesToEll)
 {
-    using Dense = typename TestFixture::Mtx;
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    using Sellp = typename gko::matrix::Sellp<value_type, index_type>;
-    auto empty = Dense::create(this->exec);
-    auto res = Sellp::create(this->exec);
+    using Ell = typename gko::matrix::Ell<value_type, index_type>;
+    auto ell_mtx = Ell::create(this->mtx6->get_executor());
 
-    empty->move_to(res);
+    this->mtx6->move_to(ell_mtx);
 
-    ASSERT_EQ(res->get_num_stored_elements(), 0);
-    ASSERT_EQ(*res->get_const_slice_sets(), 0);
-    ASSERT_FALSE(res->get_size());
+    assert_ell_eq_mtx6(ell_mtx.get());
 }
 
 
-TYPED_TEST(Dense, SquareMatrixIsTransposable)
+template <typename ValueType, typename IndexType>
+void assert_strided_ell_eq_mtx6(
+    const gko::matrix::Ell<ValueType, IndexType>* ell_mtx)
 {
-    using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto trans = gko::as<Mtx>(this->mtx5->transpose());
+    constexpr auto invalid_index = gko::invalid_index<IndexType>();
+    auto v = ell_mtx->get_const_values();
+    auto c = ell_mtx->get_const_col_idxs();
 
-    GKO_ASSERT_MTX_NEAR(
-        trans, l<T>({{1.0, -2.0, 2.1}, {-1.0, 2.0, 3.4}, {-0.5, 4.5, 1.2}}),
-        0.0);
+    ASSERT_EQ(ell_mtx->get_size(), gko::dim<2>(2, 3));
+    ASSERT_EQ(ell_mtx->get_num_stored_elements_per_row(), 2);
+    ASSERT_EQ(ell_mtx->get_num_stored_elements(), 6);
+    ASSERT_EQ(ell_mtx->get_stride(), 3);
+    EXPECT_EQ(c[0], 0);
+    EXPECT_EQ(c[1], 1);
+    EXPECT_EQ(c[2], invalid_index);
+    EXPECT_EQ(c[3], 1);
+    EXPECT_EQ(c[4], invalid_index);
+    EXPECT_EQ(c[5], invalid_index);
+    EXPECT_EQ(v[0], ValueType{1.0});
+    EXPECT_EQ(v[1], ValueType{1.5});
+    EXPECT_EQ(v[2], ValueType{0.0});
+    EXPECT_EQ(v[3], ValueType{2.0});
+    EXPECT_EQ(v[4], ValueType{0.0});
+    EXPECT_EQ(v[5], ValueType{0.0});
 }
 
 
-TYPED_TEST(Dense, SquareMatrixIsTransposableIntoDense)
+TYPED_TEST(DenseWithIndexType, ConvertsToEllWithStride)
 {
-    using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto trans = Mtx::create(this->exec, this->mtx5->get_size());
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Ell = typename gko::matrix::Ell<value_type, index_type>;
+    auto ell_mtx =
+        Ell::create(this->mtx6->get_executor(), gko::dim<2>{2, 3}, 2, 3);
 
-    this->mtx5->transpose(trans);
+    this->mtx6->convert_to(ell_mtx);
 
-    GKO_ASSERT_MTX_NEAR(
-        trans, l<T>({{1.0, -2.0, 2.1}, {-1.0, 2.0, 3.4}, {-0.5, 4.5, 1.2}}),
-        0.0);
+    assert_strided_ell_eq_mtx6(ell_mtx.get());
 }
 
 
-TYPED_TEST(Dense, SquareSubmatrixIsTransposableIntoDense)
+TYPED_TEST(DenseWithIndexType, MovesToEllWithStride)
 {
-    using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto trans = Mtx::create(this->exec, gko::dim<2>{2, 2}, 4);
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Ell = typename gko::matrix::Ell<value_type, index_type>;
+    auto ell_mtx =
+        Ell::create(this->mtx6->get_executor(), gko::dim<2>{2, 3}, 2, 3);
 
-    this->mtx5->create_submatrix({0, 2}, {0, 2})->transpose(trans);
+    this->mtx6->move_to(ell_mtx);
 
-    GKO_ASSERT_MTX_NEAR(trans, l<T>({{1.0, -2.0}, {-1.0, 2.0}}), 0.0);
-    ASSERT_EQ(trans->get_stride(), 4);
+    assert_strided_ell_eq_mtx6(ell_mtx.get());
 }
 
 
-TYPED_TEST(Dense, SquareMatrixIsTransposableIntoDenseFailsForWrongDimensions)
+template <typename ValueType, typename IndexType>
+void assert_hybrid_auto_eq_mtx4(
+    const gko::matrix::Hybrid<ValueType, IndexType>* hybrid_mtx)
 {
-    using Mtx = typename TestFixture::Mtx;
+    auto v = hybrid_mtx->get_const_coo_values();
+    auto c = hybrid_mtx->get_const_coo_col_idxs();
+    auto r = hybrid_mtx->get_const_coo_row_idxs();
+    auto n = hybrid_mtx->get_ell_num_stored_elements_per_row();
+    auto p = hybrid_mtx->get_ell_stride();
 
-    ASSERT_THROW(this->mtx5->transpose(Mtx::create(this->exec)),
-                 gko::DimensionMismatch);
+    ASSERT_EQ(hybrid_mtx->get_size(), gko::dim<2>(2, 3));
+    ASSERT_EQ(hybrid_mtx->get_ell_num_stored_elements(), 0);
+    ASSERT_EQ(hybrid_mtx->get_coo_num_stored_elements(), 4);
+    EXPECT_EQ(n, 0);
+    EXPECT_EQ(p, 2);
+    EXPECT_EQ(r[0], 0);
+    EXPECT_EQ(r[1], 0);
+    EXPECT_EQ(r[2], 0);
+    EXPECT_EQ(r[3], 1);
+    EXPECT_EQ(c[0], 0);
+    EXPECT_EQ(c[1], 1);
+    EXPECT_EQ(c[2], 2);
+    EXPECT_EQ(c[3], 1);
+    EXPECT_EQ(v[0], ValueType{1.0});
+    EXPECT_EQ(v[1], ValueType{3.0});
+    EXPECT_EQ(v[2], ValueType{2.0});
+    EXPECT_EQ(v[3], ValueType{5.0});
 }
 
 
-TYPED_TEST(Dense, NonSquareMatrixIsTransposable)
+TYPED_TEST(DenseWithIndexType, MovesToHybridAutomatically)
 {
-    using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto trans = gko::as<Mtx>(this->mtx4->transpose());
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
+    auto hybrid_mtx = Hybrid::create(this->mtx4->get_executor());
 
-    GKO_ASSERT_MTX_NEAR(trans, l<T>({{1.0, 0.0}, {3.0, 5.0}, {2.0, 0.0}}), 0.0);
+    this->mtx4->move_to(hybrid_mtx);
+
+    assert_hybrid_auto_eq_mtx4(hybrid_mtx.get());
 }
 
 
-TYPED_TEST(Dense, NonSquareMatrixIsTransposableIntoDense)
+TYPED_TEST(DenseWithIndexType, ConvertsToHybridAutomatically)
 {
-    using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto trans =
-        Mtx::create(this->exec, gko::transpose(this->mtx4->get_size()));
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
+    auto hybrid_mtx = Hybrid::create(this->mtx4->get_executor());
 
-    this->mtx4->transpose(trans);
+    this->mtx4->convert_to(hybrid_mtx);
 
-    GKO_ASSERT_MTX_NEAR(trans, l<T>({{1.0, 0.0}, {3.0, 5.0}, {2.0, 0.0}}), 0.0);
+    assert_hybrid_auto_eq_mtx4(hybrid_mtx.get());
 }
 
 
-TYPED_TEST(Dense, NonSquareSubmatrixIsTransposableIntoDense)
+template <typename ValueType, typename IndexType>
+void assert_hybrid_strided_eq_mtx4(
+    const gko::matrix::Hybrid<ValueType, IndexType>* hybrid_mtx)
 {
-    using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto trans = Mtx::create(this->exec, gko::dim<2>{2, 1}, 5);
-
-    this->mtx4->create_submatrix({0, 1}, {0, 2})->transpose(trans);
+    auto v = hybrid_mtx->get_const_coo_values();
+    auto c = hybrid_mtx->get_const_coo_col_idxs();
+    auto r = hybrid_mtx->get_const_coo_row_idxs();
+    auto n = hybrid_mtx->get_ell_num_stored_elements_per_row();
+    auto p = hybrid_mtx->get_ell_stride();
 
-    GKO_ASSERT_MTX_NEAR(trans, l({1.0, 3.0}), 0.0);
-    ASSERT_EQ(trans->get_stride(), 5);
+    ASSERT_EQ(hybrid_mtx->get_size(), gko::dim<2>(2, 3));
+    ASSERT_EQ(hybrid_mtx->get_ell_num_stored_elements(), 0);
+    ASSERT_EQ(hybrid_mtx->get_coo_num_stored_elements(), 4);
+    EXPECT_EQ(n, 0);
+    EXPECT_EQ(p, 3);
+    EXPECT_EQ(r[0], 0);
+    EXPECT_EQ(r[1], 0);
+    EXPECT_EQ(r[2], 0);
+    EXPECT_EQ(r[3], 1);
+    EXPECT_EQ(c[0], 0);
+    EXPECT_EQ(c[1], 1);
+    EXPECT_EQ(c[2], 2);
+    EXPECT_EQ(c[3], 1);
+    EXPECT_EQ(v[0], ValueType{1.0});
+    EXPECT_EQ(v[1], ValueType{3.0});
+    EXPECT_EQ(v[2], ValueType{2.0});
+    EXPECT_EQ(v[3], ValueType{5.0});
 }
 
 
-TYPED_TEST(Dense, NonSquareMatrixIsTransposableIntoDenseFailsForWrongDimensions)
+TYPED_TEST(DenseWithIndexType, MovesToHybridWithStrideAutomatically)
 {
-    using Mtx = typename TestFixture::Mtx;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
+    auto hybrid_mtx =
+        Hybrid::create(this->mtx4->get_executor(), gko::dim<2>{2, 3}, 0, 3);
 
-    ASSERT_THROW(this->mtx4->transpose(Mtx::create(this->exec)),
-                 gko::DimensionMismatch);
+    this->mtx4->move_to(hybrid_mtx);
+
+    assert_hybrid_strided_eq_mtx4(hybrid_mtx.get());
 }
 
 
-TYPED_TEST(DenseWithIndexType, SquareMatrixCanGatherRows)
+TYPED_TEST(DenseWithIndexType, ConvertsToHybridWithStrideAutomatically)
 {
-    using Mtx = typename TestFixture::Mtx;
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<index_type> permute_idxs{exec, {1, 0}};
+    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
+    auto hybrid_mtx =
+        Hybrid::create(this->mtx4->get_executor(), gko::dim<2>{2, 3}, 0, 3);
 
-    auto row_collection = this->mtx5->row_gather(&permute_idxs);
+    this->mtx4->convert_to(hybrid_mtx);
 
-    GKO_ASSERT_MTX_NEAR(row_collection,
-                        l<value_type>({{-2.0, 2.0, 4.5}, {1.0, -1.0, -0.5}}),
-                        0.0);
+    assert_hybrid_strided_eq_mtx4(hybrid_mtx.get());
 }
 
 
-TYPED_TEST(DenseWithIndexType, SquareMatrixCanGatherRowsIntoDense)
+template <typename ValueType, typename IndexType>
+void assert_hybrid_limited_eq_mtx4(
+    const gko::matrix::Hybrid<ValueType, IndexType>* hybrid_mtx)
+{
+    constexpr auto invalid_index = gko::invalid_index<IndexType>();
+    auto v = hybrid_mtx->get_const_ell_values();
+    auto c = hybrid_mtx->get_const_ell_col_idxs();
+    auto n = hybrid_mtx->get_ell_num_stored_elements_per_row();
+    auto p = hybrid_mtx->get_ell_stride();
+
+    ASSERT_EQ(hybrid_mtx->get_size(), gko::dim<2>(2, 3));
+    ASSERT_EQ(hybrid_mtx->get_ell_num_stored_elements(), 6);
+    ASSERT_EQ(hybrid_mtx->get_coo_num_stored_elements(), 1);
+    EXPECT_EQ(n, 2);
+    EXPECT_EQ(p, 3);
+    EXPECT_EQ(c[0], 0);
+    EXPECT_EQ(c[1], 1);
+    EXPECT_EQ(c[2], invalid_index);
+    EXPECT_EQ(c[3], 1);
+    EXPECT_EQ(c[4], invalid_index);
+    EXPECT_EQ(c[5], invalid_index);
+    EXPECT_EQ(v[0], ValueType{1.0});
+    EXPECT_EQ(v[1], ValueType{5.0});
+    EXPECT_EQ(v[2], ValueType{0.0});
+    EXPECT_EQ(v[3], ValueType{3.0});
+    EXPECT_EQ(v[4], ValueType{0.0});
+    EXPECT_EQ(v[5], ValueType{0.0});
+    EXPECT_EQ(hybrid_mtx->get_const_coo_values()[0], ValueType{2.0});
+    EXPECT_EQ(hybrid_mtx->get_const_coo_row_idxs()[0], 0);
+    EXPECT_EQ(hybrid_mtx->get_const_coo_col_idxs()[0], 2);
+}
+
+
+TYPED_TEST(DenseWithIndexType, MovesToHybridWithStrideAndCooLengthByColumns2)
 {
-    using Mtx = typename TestFixture::Mtx;
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<index_type> permute_idxs{exec, {1, 0}};
-    auto row_collection = Mtx::create(exec, gko::dim<2>{2, 3});
+    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
+    auto hybrid_mtx =
+        Hybrid::create(this->mtx4->get_executor(), gko::dim<2>{2, 3}, 2, 3, 3,
+                       std::make_shared<typename Hybrid::column_limit>(2));
 
-    this->mtx5->row_gather(&permute_idxs, row_collection);
+    this->mtx4->move_to(hybrid_mtx);
 
-    GKO_ASSERT_MTX_NEAR(row_collection,
-                        l<value_type>({{-2.0, 2.0, 4.5}, {1.0, -1.0, -0.5}}),
-                        0.0);
+    assert_hybrid_limited_eq_mtx4(hybrid_mtx.get());
 }
 
 
-TYPED_TEST(DenseWithIndexType, SquareSubmatrixCanGatherRowsIntoDense)
+TYPED_TEST(DenseWithIndexType, ConvertsToHybridWithStrideAndCooLengthByColumns2)
 {
-    using Mtx = typename TestFixture::Mtx;
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<index_type> permute_idxs{exec, {1, 0}};
-    auto row_collection = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
+    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
+    auto hybrid_mtx =
+        Hybrid::create(this->mtx4->get_executor(), gko::dim<2>{2, 3}, 2, 3, 3,
+                       std::make_shared<typename Hybrid::column_limit>(2));
 
-    this->mtx5->create_submatrix({0, 2}, {1, 3})
-        ->row_gather(&permute_idxs, row_collection);
+    this->mtx4->convert_to(hybrid_mtx);
 
-    GKO_ASSERT_MTX_NEAR(row_collection,
-                        l<value_type>({{2.0, 4.5}, {-1.0, -0.5}}), 0.0);
-    ASSERT_EQ(row_collection->get_stride(), 4);
+    assert_hybrid_limited_eq_mtx4(hybrid_mtx.get());
 }
 
 
-TYPED_TEST(DenseWithIndexType, NonSquareSubmatrixCanGatherRowsIntoMixedDense)
+template <typename ValueType, typename IndexType>
+void assert_hybrid_percent_eq_mtx4(
+    const gko::matrix::Hybrid<ValueType, IndexType>* hybrid_mtx)
 {
-    using Mtx = typename TestFixture::Mtx;
-    using MixedMtx = typename TestFixture::MixedMtx;
-    using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx4->get_executor();
-    gko::array<index_type> gather_index{exec, {1, 0, 1}};
-    auto row_collection = MixedMtx::create(exec, gko::dim<2>{3, 3}, 4);
-
-    this->mtx4->row_gather(&gather_index, row_collection);
+    auto v = hybrid_mtx->get_const_ell_values();
+    auto c = hybrid_mtx->get_const_ell_col_idxs();
+    auto n = hybrid_mtx->get_ell_num_stored_elements_per_row();
+    auto p = hybrid_mtx->get_ell_stride();
+    auto coo_v = hybrid_mtx->get_const_coo_values();
+    auto coo_c = hybrid_mtx->get_const_coo_col_idxs();
+    auto coo_r = hybrid_mtx->get_const_coo_row_idxs();
 
-    GKO_ASSERT_MTX_NEAR(
-        row_collection,
-        l<typename MixedMtx::value_type>(
-            {{0.0, 5.0, 0.0}, {1.0, 3.0, 2.0}, {0.0, 5.0, 0.0}}),
-        0.0);
+    ASSERT_EQ(hybrid_mtx->get_size(), gko::dim<2>(2, 3));
+    ASSERT_EQ(hybrid_mtx->get_ell_num_stored_elements(), 3);
+    EXPECT_EQ(n, 1);
+    EXPECT_EQ(p, 3);
+    EXPECT_EQ(c[0], 0);
+    EXPECT_EQ(c[1], 1);
+    EXPECT_EQ(c[2], gko::invalid_index<IndexType>());
+    EXPECT_EQ(v[0], ValueType{1.0});
+    EXPECT_EQ(v[1], ValueType{5.0});
+    EXPECT_EQ(v[2], ValueType{0.0});
+    ASSERT_EQ(hybrid_mtx->get_coo_num_stored_elements(), 2);
+    EXPECT_EQ(coo_v[0], ValueType{3.0});
+    EXPECT_EQ(coo_v[1], ValueType{2.0});
+    EXPECT_EQ(coo_c[0], 1);
+    EXPECT_EQ(coo_c[1], 2);
+    EXPECT_EQ(coo_r[0], 0);
+    EXPECT_EQ(coo_r[1], 0);
 }
 
 
-TYPED_TEST(DenseWithIndexType,
-           NonSquareSubmatrixCanAdvancedGatherRowsIntoMixedDense)
+TYPED_TEST(DenseWithIndexType, MovesToHybridWithStrideByPercent40)
 {
-    using Mtx = typename TestFixture::Mtx;
-    using MixedMtx = typename TestFixture::MixedMtx;
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx4->get_executor();
-    gko::array<index_type> gather_index{exec, {1, 0, 1}};
-    auto row_collection = gko::initialize<MixedMtx>(
-        {{1.0, 0.5, -1.0}, {-1.5, 0.5, 1.0}, {2.0, -3.0, 1.0}}, exec);
-    auto alpha = gko::initialize<MixedMtx>({1.0}, exec);
-    auto beta = gko::initialize<Mtx>({2.0}, exec);
+    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
+    auto hybrid_mtx =
+        Hybrid::create(this->mtx4->get_executor(), gko::dim<2>{2, 3}, 1, 3,
+                       std::make_shared<typename Hybrid::imbalance_limit>(0.4));
 
-    this->mtx4->row_gather(alpha, &gather_index, beta, row_collection);
+    this->mtx4->move_to(hybrid_mtx);
 
-    GKO_ASSERT_MTX_NEAR(
-        row_collection,
-        l<typename MixedMtx::value_type>(
-            {{2.0, 6.0, -2.0}, {-2.0, 4.0, 4.0}, {4.0, -1.0, 2.0}}),
-        0.0);
+    assert_hybrid_percent_eq_mtx4(hybrid_mtx.get());
 }
 
 
-TYPED_TEST(DenseWithIndexType,
-           SquareMatrixGatherRowsIntoDenseFailsForWrongDimensions)
+TYPED_TEST(DenseWithIndexType, ConvertsToHybridWithStrideByPercent40)
 {
-    using Mtx = typename TestFixture::Mtx;
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<index_type> permute_idxs{exec, {1, 0}};
+    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
+    auto hybrid_mtx =
+        Hybrid::create(this->mtx4->get_executor(), gko::dim<2>{2, 3}, 1, 3,
+                       std::make_shared<typename Hybrid::imbalance_limit>(0.4));
 
-    ASSERT_THROW(this->mtx5->row_gather(&permute_idxs, Mtx::create(exec)),
-                 gko::DimensionMismatch);
+    this->mtx4->convert_to(hybrid_mtx);
+
+    assert_hybrid_percent_eq_mtx4(hybrid_mtx.get());
 }
 
 
-TYPED_TEST(DenseWithIndexType, SquareMatrixIsPermutable)
+template <typename ValueType, typename IndexType>
+void assert_sellp_eq_mtx7(
+    const gko::matrix::Sellp<ValueType, IndexType>* sellp_mtx)
 {
-    using Mtx = typename TestFixture::Mtx;
-    using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
-
-    auto ref_permuted =
-        gko::as<Mtx>(gko::as<Mtx>(this->mtx5->row_permute(&permute_idxs))
-                         ->column_permute(&permute_idxs));
-    auto permuted = gko::as<Mtx>(this->mtx5->permute(&permute_idxs));
+    constexpr auto invalid_index = gko::invalid_index<IndexType>();
+    auto v = sellp_mtx->get_const_values();
+    auto c = sellp_mtx->get_const_col_idxs();
+    auto s = sellp_mtx->get_const_slice_sets();
+    auto l = sellp_mtx->get_const_slice_lengths();
 
-    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
+    ASSERT_EQ(sellp_mtx->get_size(), gko::dim<2>(2, 3));
+    ASSERT_EQ(sellp_mtx->get_total_cols(), 3);
+    ASSERT_EQ(sellp_mtx->get_num_stored_elements(),
+              3 * gko::matrix::default_slice_size);
+    ASSERT_EQ(sellp_mtx->get_slice_size(), gko::matrix::default_slice_size);
+    ASSERT_EQ(sellp_mtx->get_stride_factor(),
+              gko::matrix::default_stride_factor);
+    EXPECT_EQ(c[0], 0);
+    EXPECT_EQ(c[1], 1);
+    EXPECT_EQ(c[gko::matrix::default_slice_size], 1);
+    EXPECT_EQ(c[gko::matrix::default_slice_size + 1], invalid_index);
+    EXPECT_EQ(c[2 * gko::matrix::default_slice_size], 2);
+    EXPECT_EQ(c[2 * gko::matrix::default_slice_size + 1], invalid_index);
+    EXPECT_EQ(v[0], ValueType{1.0});
+    EXPECT_EQ(v[1], ValueType{1.5});
+    EXPECT_EQ(v[gko::matrix::default_slice_size], ValueType{2.0});
+    EXPECT_EQ(v[gko::matrix::default_slice_size + 1], ValueType{0.0});
+    EXPECT_EQ(v[2 * gko::matrix::default_slice_size], ValueType{3.0});
+    EXPECT_EQ(v[2 * gko::matrix::default_slice_size + 1], ValueType{0.0});
+    EXPECT_EQ(s[0], 0);
+    EXPECT_EQ(s[1], 3);
+    EXPECT_EQ(l[0], 3);
 }
 
 
-TYPED_TEST(DenseWithIndexType, SquareMatrixIsPermutableIntoDense)
+TYPED_TEST(DenseWithIndexType, ConvertsToSellp)
 {
-    using Mtx = typename TestFixture::Mtx;
+    using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
-    auto permuted = Mtx::create(exec, this->mtx5->get_size());
+    using Sellp = typename gko::matrix::Sellp<value_type, index_type>;
+    auto sellp_mtx = Sellp::create(this->mtx7->get_executor());
 
-    auto ref_permuted =
-        gko::as<Mtx>(gko::as<Mtx>(this->mtx5->row_permute(&permute_idxs))
-                         ->column_permute(&permute_idxs));
-    this->mtx5->permute(&permute_idxs, permuted);
+    this->mtx7->convert_to(sellp_mtx);
 
-    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
+    assert_sellp_eq_mtx7(sellp_mtx.get());
 }
 
 
-TYPED_TEST(DenseWithIndexType, SquareSubmatrixIsPermutableIntoDense)
+TYPED_TEST(DenseWithIndexType, MovesToSellp)
 {
-    using Mtx = typename TestFixture::Mtx;
+    using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<index_type> permute_idxs{exec, {1, 0}};
-    auto permuted = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
-    auto mtx = this->mtx5->create_submatrix({0, 2}, {1, 3});
+    using Sellp = typename gko::matrix::Sellp<value_type, index_type>;
+    auto sellp_mtx = Sellp::create(this->mtx7->get_executor());
 
-    auto ref_permuted =
-        gko::as<Mtx>(gko::as<Mtx>(mtx->row_permute(&permute_idxs))
-                         ->column_permute(&permute_idxs));
-    mtx->permute(&permute_idxs, permuted);
+    this->mtx7->move_to(sellp_mtx);
 
-    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
-    ASSERT_EQ(permuted->get_stride(), 4);
+    assert_sellp_eq_mtx7(sellp_mtx.get());
 }
 
 
-TYPED_TEST(DenseWithIndexType, NonSquareMatrixPermuteIntoDenseFails)
+template <typename ValueType, typename IndexType>
+void assert_sellp_strided_eq_mtx7(
+    const gko::matrix::Sellp<ValueType, IndexType>* sellp_mtx)
 {
-    using Mtx = typename TestFixture::Mtx;
-    using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx4->get_executor();
-    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
+    constexpr auto invalid_index = gko::invalid_index<IndexType>();
+    auto v = sellp_mtx->get_const_values();
+    auto c = sellp_mtx->get_const_col_idxs();
+    auto s = sellp_mtx->get_const_slice_sets();
+    auto l = sellp_mtx->get_const_slice_lengths();
 
-    ASSERT_THROW(this->mtx4->permute(&permute_idxs, this->mtx4->clone()),
-                 gko::DimensionMismatch);
+    ASSERT_EQ(sellp_mtx->get_size(), gko::dim<2>(2, 3));
+    ASSERT_EQ(sellp_mtx->get_total_cols(), 4);
+    ASSERT_EQ(sellp_mtx->get_num_stored_elements(), 8);
+    ASSERT_EQ(sellp_mtx->get_slice_size(), 2);
+    ASSERT_EQ(sellp_mtx->get_stride_factor(), 2);
+    EXPECT_EQ(c[0], 0);
+    EXPECT_EQ(c[1], 1);
+    EXPECT_EQ(c[2], 1);
+    EXPECT_EQ(c[3], invalid_index);
+    EXPECT_EQ(c[4], 2);
+    EXPECT_EQ(c[5], invalid_index);
+    EXPECT_EQ(c[6], invalid_index);
+    EXPECT_EQ(c[7], invalid_index);
+    EXPECT_EQ(v[0], ValueType{1.0});
+    EXPECT_EQ(v[1], ValueType{1.5});
+    EXPECT_EQ(v[2], ValueType{2.0});
+    EXPECT_EQ(v[3], ValueType{0.0});
+    EXPECT_EQ(v[4], ValueType{3.0});
+    EXPECT_EQ(v[5], ValueType{0.0});
+    EXPECT_EQ(v[6], ValueType{0.0});
+    EXPECT_EQ(v[7], ValueType{0.0});
+    EXPECT_EQ(s[0], 0);
+    EXPECT_EQ(s[1], 4);
+    EXPECT_EQ(l[0], 4);
 }
 
 
-TYPED_TEST(DenseWithIndexType,
-           SquareMatrixPermuteIntoDenseFailsForWrongPermutationSize)
+TYPED_TEST(DenseWithIndexType, ConvertsToSellpWithSliceSizeAndStrideFactor)
 {
-    using Mtx = typename TestFixture::Mtx;
+    using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<index_type> permute_idxs{exec, {1, 2}};
+    using Sellp = typename gko::matrix::Sellp<value_type, index_type>;
+    auto sellp_mtx =
+        Sellp::create(this->mtx7->get_executor(), gko::dim<2>{}, 2, 2, 0);
 
-    ASSERT_THROW(this->mtx5->permute(&permute_idxs, this->mtx5->clone()),
-                 gko::ValueMismatch);
+    this->mtx7->convert_to(sellp_mtx);
+
+    assert_sellp_strided_eq_mtx7(sellp_mtx.get());
 }
 
 
-TYPED_TEST(DenseWithIndexType,
-           SquareMatrixPermuteIntoDenseFailsForWrongDimensions)
+TYPED_TEST(DenseWithIndexType, MovesToSellpWithSliceSizeAndStrideFactor)
 {
-    using Mtx = typename TestFixture::Mtx;
+    using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
+    using Sellp = typename gko::matrix::Sellp<value_type, index_type>;
+    auto sellp_mtx =
+        Sellp::create(this->mtx7->get_executor(), gko::dim<2>{}, 2, 2, 0);
 
-    ASSERT_THROW(this->mtx5->permute(&permute_idxs, Mtx::create(exec)),
-                 gko::DimensionMismatch);
+    this->mtx7->move_to(sellp_mtx);
+
+    assert_sellp_strided_eq_mtx7(sellp_mtx.get());
 }
 
 
-TYPED_TEST(DenseWithIndexType, SquareMatrixIsInversePermutable)
+TYPED_TEST(DenseWithIndexType, ConvertsToAndFromSellpWithMoreThanOneSlice)
 {
-    using Mtx = typename TestFixture::Mtx;
+    using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
+    using Mtx = typename TestFixture::Mtx;
+    using Sellp = typename gko::matrix::Sellp<value_type, index_type>;
+    auto x = this->template gen_mtx<Mtx>(65, 25);
 
-    auto ref_permuted = gko::as<Mtx>(
-        gko::as<Mtx>(this->mtx5->inverse_row_permute(&permute_idxs))
-            ->inverse_column_permute(&permute_idxs));
-    auto permuted = gko::as<Mtx>(this->mtx5->inverse_permute(&permute_idxs));
+    auto sellp_mtx = Sellp::create(this->exec);
+    auto dense_mtx = Mtx::create(this->exec);
+    x->convert_to(sellp_mtx);
+    sellp_mtx->convert_to(dense_mtx);
 
-    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
+    GKO_ASSERT_MTX_NEAR(dense_mtx, x, 0.0);
 }
 
 
-TYPED_TEST(DenseWithIndexType, SquareMatrixIsInversePermutableIntoDense)
+TYPED_TEST(Dense, ConvertsEmptyToPrecision)
 {
-    using Mtx = typename TestFixture::Mtx;
-    using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
-    auto permuted = Mtx::create(exec, this->mtx5->get_size());
+    using Dense = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    using OtherT = typename gko::next_precision<T>;
+    using OtherDense = typename gko::matrix::Dense<OtherT>;
+    auto empty = OtherDense::create(this->exec);
+    auto res = Dense::create(this->exec);
 
-    auto ref_permuted = gko::as<Mtx>(
-        gko::as<Mtx>(this->mtx5->inverse_row_permute(&permute_idxs))
-            ->inverse_column_permute(&permute_idxs));
-    this->mtx5->inverse_permute(&permute_idxs, permuted);
+    empty->convert_to(res);
 
-    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
+    ASSERT_FALSE(res->get_size());
 }
 
 
-TYPED_TEST(DenseWithIndexType, SquareSubmatrixIsInversePermutableIntoDense)
+TYPED_TEST(Dense, MovesEmptyToPrecision)
 {
-    using Mtx = typename TestFixture::Mtx;
-    using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<index_type> permute_idxs{exec, {1, 0}};
-    auto permuted = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
-    auto mtx = this->mtx5->create_submatrix({0, 2}, {1, 3});
+    using Dense = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    using OtherT = typename gko::next_precision<T>;
+    using OtherDense = typename gko::matrix::Dense<OtherT>;
+    auto empty = OtherDense::create(this->exec);
+    auto res = Dense::create(this->exec);
 
-    auto ref_permuted =
-        gko::as<Mtx>(gko::as<Mtx>(mtx->inverse_row_permute(&permute_idxs))
-                         ->inverse_column_permute(&permute_idxs));
-    mtx->inverse_permute(&permute_idxs, permuted);
+    empty->move_to(res);
 
-    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
-    ASSERT_EQ(permuted->get_stride(), 4);
+    ASSERT_FALSE(res->get_size());
 }
 
 
-TYPED_TEST(DenseWithIndexType, NonSquareMatrixInversePermuteIntoDenseFails)
+TYPED_TEST(DenseWithIndexType, ConvertsEmptyToCoo)
 {
-    using Mtx = typename TestFixture::Mtx;
+    using Dense = typename TestFixture::Mtx;
+    using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx4->get_executor();
-    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
+    using Coo = typename gko::matrix::Coo<value_type, index_type>;
+    auto empty = Dense::create(this->exec);
+    auto res = Coo::create(this->exec);
 
-    ASSERT_THROW(
-        this->mtx4->inverse_permute(&permute_idxs, this->mtx4->clone()),
-        gko::DimensionMismatch);
+    empty->convert_to(res);
+
+    ASSERT_EQ(res->get_num_stored_elements(), 0);
+    ASSERT_FALSE(res->get_size());
 }
 
 
-TYPED_TEST(DenseWithIndexType,
-           SquareMatrixInversePermuteIntoDenseFailsForWrongPermutationSize)
+TYPED_TEST(DenseWithIndexType, MovesEmptyToCoo)
 {
-    using Mtx = typename TestFixture::Mtx;
+    using Dense = typename TestFixture::Mtx;
+    using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<index_type> permute_idxs{exec, {0, 1}};
+    using Coo = typename gko::matrix::Coo<value_type, index_type>;
+    auto empty = Dense::create(this->exec);
+    auto res = Coo::create(this->exec);
 
-    ASSERT_THROW(
-        this->mtx5->inverse_permute(&permute_idxs, this->mtx5->clone()),
-        gko::ValueMismatch);
+    empty->move_to(res);
+
+    ASSERT_EQ(res->get_num_stored_elements(), 0);
+    ASSERT_FALSE(res->get_size());
 }
 
 
-TYPED_TEST(DenseWithIndexType,
-           SquareMatrixInversePermuteIntoDenseFailsForWrongDimensions)
+TYPED_TEST(DenseWithIndexType, ConvertsEmptyMatrixToCsr)
 {
-    using Mtx = typename TestFixture::Mtx;
+    using Dense = typename TestFixture::Mtx;
+    using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
+    using Csr = typename gko::matrix::Csr<value_type, index_type>;
+    auto empty = Dense::create(this->exec);
+    auto res = Csr::create(this->exec);
 
-    ASSERT_THROW(this->mtx5->inverse_permute(&permute_idxs, Mtx::create(exec)),
-                 gko::DimensionMismatch);
+    empty->convert_to(res);
+
+    ASSERT_EQ(res->get_num_stored_elements(), 0);
+    ASSERT_EQ(*res->get_const_row_ptrs(), 0);
+    ASSERT_FALSE(res->get_size());
 }
 
 
-TYPED_TEST(DenseWithIndexType, SquareMatrixIsRowPermutable)
+TYPED_TEST(DenseWithIndexType, MovesEmptyMatrixToCsr)
 {
-    using Mtx = typename TestFixture::Mtx;
+    using Dense = typename TestFixture::Mtx;
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
+    using Csr = typename gko::matrix::Csr<value_type, index_type>;
+    auto empty = Dense::create(this->exec);
+    auto res = Csr::create(this->exec);
 
-    auto row_permute = gko::as<Mtx>(this->mtx5->row_permute(&permute_idxs));
+    empty->move_to(res);
 
-    GKO_ASSERT_MTX_NEAR(
-        row_permute,
-        l<value_type>({{-2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}}),
-        0.0);
+    ASSERT_EQ(res->get_num_stored_elements(), 0);
+    ASSERT_EQ(*res->get_const_row_ptrs(), 0);
+    ASSERT_FALSE(res->get_size());
 }
 
 
-TYPED_TEST(DenseWithIndexType, NonSquareMatrixIsRowPermutable)
+TYPED_TEST(DenseWithIndexType, ConvertsEmptyToSparsityCsr)
 {
-    using Mtx = typename TestFixture::Mtx;
+    using Dense = typename TestFixture::Mtx;
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx4->get_executor();
-    gko::array<index_type> permute_idxs{exec, {1, 0}};
+    using SparsityCsr =
+        typename gko::matrix::SparsityCsr<value_type, index_type>;
+    auto empty = Dense::create(this->exec);
+    auto res = SparsityCsr::create(this->exec);
 
-    auto row_permute = gko::as<Mtx>(this->mtx4->row_permute(&permute_idxs));
+    empty->convert_to(res);
 
-    GKO_ASSERT_MTX_NEAR(row_permute,
-                        l<value_type>({{0.0, 5.0, 0.0}, {1.0, 3.0, 2.0}}), 0.0);
+    ASSERT_EQ(res->get_num_nonzeros(), 0);
+    ASSERT_EQ(*res->get_const_row_ptrs(), 0);
+    ASSERT_FALSE(res->get_size());
 }
 
 
-TYPED_TEST(DenseWithIndexType, SquareMatrixIsRowPermutableIntoDense)
+TYPED_TEST(DenseWithIndexType, MovesEmptyToSparsityCsr)
 {
-    using Mtx = typename TestFixture::Mtx;
+    using Dense = typename TestFixture::Mtx;
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
-    auto row_permute = Mtx::create(exec, this->mtx5->get_size());
+    using SparsityCsr =
+        typename gko::matrix::SparsityCsr<value_type, index_type>;
+    auto empty = Dense::create(this->exec);
+    auto res = SparsityCsr::create(this->exec);
 
-    this->mtx5->row_permute(&permute_idxs, row_permute);
+    empty->move_to(res);
 
-    GKO_ASSERT_MTX_NEAR(
-        row_permute,
-        l<value_type>({{-2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}}),
-        0.0);
+    ASSERT_EQ(res->get_num_nonzeros(), 0);
+    ASSERT_EQ(*res->get_const_row_ptrs(), 0);
+    ASSERT_FALSE(res->get_size());
 }
 
 
-TYPED_TEST(DenseWithIndexType, SquareSubmatrixIsRowPermutableIntoDense)
+TYPED_TEST(DenseWithIndexType, ConvertsEmptyToEll)
 {
-    using Mtx = typename TestFixture::Mtx;
+    using Dense = typename TestFixture::Mtx;
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<index_type> permute_idxs{exec, {1, 0}};
-    auto row_permute = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
+    using Ell = typename gko::matrix::Ell<value_type, index_type>;
+    auto empty = Dense::create(this->exec);
+    auto res = Ell::create(this->exec);
 
-    this->mtx5->create_submatrix({0, 2}, {0, 2})
-        ->row_permute(&permute_idxs, row_permute);
+    empty->convert_to(res);
 
-    GKO_ASSERT_MTX_NEAR(row_permute, l<value_type>({{-2.0, 2.0}, {1.0, -1.0}}),
-                        0.0);
-    ASSERT_EQ(row_permute->get_stride(), 4);
+    ASSERT_EQ(res->get_num_stored_elements(), 0);
+    ASSERT_FALSE(res->get_size());
 }
 
 
-TYPED_TEST(DenseWithIndexType,
-           SquareMatrixRowPermuteIntoDenseFailsForWrongPermutationSize)
+TYPED_TEST(DenseWithIndexType, MovesEmptyToEll)
 {
-    using Mtx = typename TestFixture::Mtx;
+    using Dense = typename TestFixture::Mtx;
+    using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<index_type> permute_idxs{exec, {1, 2}};
-    auto row_permute = Mtx::create(exec, this->mtx5->get_size());
+    using Ell = typename gko::matrix::Ell<value_type, index_type>;
+    auto empty = Dense::create(this->exec);
+    auto res = Ell::create(this->exec);
 
-    ASSERT_THROW(this->mtx5->row_permute(&permute_idxs, row_permute),
-                 gko::ValueMismatch);
+    empty->move_to(res);
+
+    ASSERT_EQ(res->get_num_stored_elements(), 0);
+    ASSERT_FALSE(res->get_size());
 }
 
 
-TYPED_TEST(DenseWithIndexType,
-           SquareMatrixRowPermuteIntoDenseFailsForWrongDimensions)
+TYPED_TEST(DenseWithIndexType, ConvertsEmptyToHybrid)
 {
-    using Mtx = typename TestFixture::Mtx;
+    using Dense = typename TestFixture::Mtx;
+    using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
+    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
+    auto empty = Dense::create(this->exec);
+    auto res = Hybrid::create(this->exec);
 
-    ASSERT_THROW(this->mtx5->row_permute(&permute_idxs, Mtx::create(exec)),
-                 gko::DimensionMismatch);
+    empty->convert_to(res);
+
+    ASSERT_EQ(res->get_num_stored_elements(), 0);
+    ASSERT_FALSE(res->get_size());
 }
 
 
-TYPED_TEST(DenseWithIndexType, SquareMatrixIsColPermutable)
+TYPED_TEST(DenseWithIndexType, MovesEmptyToHybrid)
 {
-    using Mtx = typename TestFixture::Mtx;
+    using Dense = typename TestFixture::Mtx;
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
+    using Hybrid = typename gko::matrix::Hybrid<value_type, index_type>;
+    auto empty = Dense::create(this->exec);
+    auto res = Hybrid::create(this->exec);
 
-    auto c_permute = gko::as<Mtx>(this->mtx5->column_permute(&permute_idxs));
+    empty->move_to(res);
 
-    GKO_ASSERT_MTX_NEAR(
-        c_permute,
-        l<value_type>({{-1.0, -0.5, 1.0}, {2.0, 4.5, -2.0}, {3.4, 1.2, 2.1}}),
-        0.0);
+    ASSERT_EQ(res->get_num_stored_elements(), 0);
+    ASSERT_FALSE(res->get_size());
 }
 
 
-TYPED_TEST(DenseWithIndexType, NonSquareMatrixIsColPermutable)
+TYPED_TEST(DenseWithIndexType, ConvertsEmptyToSellp)
 {
-    using Mtx = typename TestFixture::Mtx;
+    using Dense = typename TestFixture::Mtx;
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx4->get_executor();
-    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
+    using Sellp = typename gko::matrix::Sellp<value_type, index_type>;
+    auto empty = Dense::create(this->exec);
+    auto res = Sellp::create(this->exec);
 
-    auto c_permute = gko::as<Mtx>(this->mtx4->column_permute(&permute_idxs));
+    empty->convert_to(res);
 
-    GKO_ASSERT_MTX_NEAR(c_permute,
-                        l<value_type>({{3.0, 2.0, 1.0}, {5.0, 0.0, 0.0}}), 0.0);
+    ASSERT_EQ(res->get_num_stored_elements(), 0);
+    ASSERT_EQ(*res->get_const_slice_sets(), 0);
+    ASSERT_FALSE(res->get_size());
 }
 
 
-TYPED_TEST(DenseWithIndexType, SquareMatrixIsColPermutableIntoDense)
+TYPED_TEST(DenseWithIndexType, MovesEmptyToSellp)
 {
-    using Mtx = typename TestFixture::Mtx;
+    using Dense = typename TestFixture::Mtx;
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
-    auto c_permute = Mtx::create(exec, this->mtx5->get_size());
+    using Sellp = typename gko::matrix::Sellp<value_type, index_type>;
+    auto empty = Dense::create(this->exec);
+    auto res = Sellp::create(this->exec);
 
-    this->mtx5->column_permute(&permute_idxs, c_permute);
+    empty->move_to(res);
 
-    GKO_ASSERT_MTX_NEAR(
-        c_permute,
-        l<value_type>({{-1.0, -0.5, 1.0}, {2.0, 4.5, -2.0}, {3.4, 1.2, 2.1}}),
-        0.0);
+    ASSERT_EQ(res->get_num_stored_elements(), 0);
+    ASSERT_EQ(*res->get_const_slice_sets(), 0);
+    ASSERT_FALSE(res->get_size());
 }
 
 
-TYPED_TEST(DenseWithIndexType, SquareSubmatrixIsColPermutableIntoDense)
+TYPED_TEST(DenseWithIndexType, SquareMatrixCanGatherRows)
 {
     using Mtx = typename TestFixture::Mtx;
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
     gko::array<index_type> permute_idxs{exec, {1, 0}};
-    auto c_permute = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
 
-    this->mtx5->create_submatrix({0, 2}, {0, 2})
-        ->column_permute(&permute_idxs, c_permute);
+    auto row_collection = this->mtx5->row_gather(&permute_idxs);
 
-    GKO_ASSERT_MTX_NEAR(c_permute, l<value_type>({{-1.0, 1.0}, {2.0, -2.0}}),
+    GKO_ASSERT_MTX_NEAR(row_collection,
+                        l<value_type>({{-2.0, 2.0, 4.5}, {1.0, -1.0, -0.5}}),
                         0.0);
-    ASSERT_EQ(c_permute->get_stride(), 4);
 }
 
 
-TYPED_TEST(DenseWithIndexType,
-           SquareMatrixColPermuteIntoDenseFailsForWrongPermutationSize)
+TYPED_TEST(DenseWithIndexType, SquareMatrixCanGatherRowsIntoDense)
 {
     using Mtx = typename TestFixture::Mtx;
+    using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<index_type> permute_idxs{exec, {1, 2}};
-    auto row_permute = Mtx::create(exec, this->mtx5->get_size());
-
-    ASSERT_THROW(this->mtx5->column_permute(&permute_idxs, row_permute),
-                 gko::ValueMismatch);
-}
-
+    gko::array<index_type> permute_idxs{exec, {1, 0}};
+    auto row_collection = Mtx::create(exec, gko::dim<2>{2, 3});
 
-TYPED_TEST(DenseWithIndexType,
-           SquareMatrixColPermuteIntoDenseFailsForWrongDimensions)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
+    this->mtx5->row_gather(&permute_idxs, row_collection);
 
-    ASSERT_THROW(this->mtx5->column_permute(&permute_idxs, Mtx::create(exec)),
-                 gko::DimensionMismatch);
+    GKO_ASSERT_MTX_NEAR(row_collection,
+                        l<value_type>({{-2.0, 2.0, 4.5}, {1.0, -1.0, -0.5}}),
+                        0.0);
 }
 
 
-TYPED_TEST(DenseWithIndexType, SquareMatrixIsInverseRowPermutable)
+TYPED_TEST(DenseWithIndexType, SquareSubmatrixCanGatherRowsIntoDense)
 {
     using Mtx = typename TestFixture::Mtx;
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<index_type> inverse_permute_idxs{exec, {1, 2, 0}};
+    gko::array<index_type> permute_idxs{exec, {1, 0}};
+    auto row_collection = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
 
-    auto inv_row_permute =
-        gko::as<Mtx>(this->mtx5->inverse_row_permute(&inverse_permute_idxs));
+    this->mtx5->create_submatrix({0, 2}, {1, 3})
+        ->row_gather(&permute_idxs, row_collection);
 
-    GKO_ASSERT_MTX_NEAR(
-        inv_row_permute,
-        l<value_type>({{2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}, {-2.0, 2.0, 4.5}}),
-        0.0);
+    GKO_ASSERT_MTX_NEAR(row_collection,
+                        l<value_type>({{2.0, 4.5}, {-1.0, -0.5}}), 0.0);
+    ASSERT_EQ(row_collection->get_stride(), 4);
 }
 
 
-TYPED_TEST(DenseWithIndexType, NonSquareMatrixIsInverseRowPermutable)
+TYPED_TEST(DenseWithIndexType, NonSquareSubmatrixCanGatherRowsIntoMixedDense)
 {
     using Mtx = typename TestFixture::Mtx;
+    using MixedMtx = typename TestFixture::MixedMtx;
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
     auto exec = this->mtx4->get_executor();
-    gko::array<index_type> inverse_permute_idxs{exec, {1, 0}};
+    gko::array<index_type> gather_index{exec, {1, 0, 1}};
+    auto row_collection = MixedMtx::create(exec, gko::dim<2>{3, 3}, 4);
 
-    auto inverse_row_permute =
-        gko::as<Mtx>(this->mtx4->inverse_row_permute(&inverse_permute_idxs));
+    this->mtx4->row_gather(&gather_index, row_collection);
 
-    GKO_ASSERT_MTX_NEAR(inverse_row_permute,
-                        l<value_type>({{0.0, 5.0, 0.0}, {1.0, 3.0, 2.0}}), 0.0);
+    GKO_ASSERT_MTX_NEAR(
+        row_collection,
+        l<typename MixedMtx::value_type>(
+            {{0.0, 5.0, 0.0}, {1.0, 3.0, 2.0}, {0.0, 5.0, 0.0}}),
+        0.0);
 }
 
 
-TYPED_TEST(DenseWithIndexType, SquareMatrixIsInverseRowPermutableIntoDense)
+TYPED_TEST(DenseWithIndexType,
+           NonSquareSubmatrixCanAdvancedGatherRowsIntoMixedDense)
 {
     using Mtx = typename TestFixture::Mtx;
+    using MixedMtx = typename TestFixture::MixedMtx;
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
-    auto row_permute = Mtx::create(exec, this->mtx5->get_size());
+    auto exec = this->mtx4->get_executor();
+    gko::array<index_type> gather_index{exec, {1, 0, 1}};
+    auto row_collection = gko::initialize<MixedMtx>(
+        {{1.0, 0.5, -1.0}, {-1.5, 0.5, 1.0}, {2.0, -3.0, 1.0}}, exec);
+    auto alpha = gko::initialize<MixedMtx>({1.0}, exec);
+    auto beta = gko::initialize<Mtx>({2.0}, exec);
 
-    this->mtx5->inverse_row_permute(&permute_idxs, row_permute);
+    this->mtx4->row_gather(alpha, &gather_index, beta, row_collection);
 
     GKO_ASSERT_MTX_NEAR(
-        row_permute,
-        l<value_type>({{2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}, {-2.0, 2.0, 4.5}}),
+        row_collection,
+        l<typename MixedMtx::value_type>(
+            {{2.0, 6.0, -2.0}, {-2.0, 4.0, 4.0}, {4.0, -1.0, 2.0}}),
         0.0);
 }
 
 
-TYPED_TEST(DenseWithIndexType, SquareSubmatrixIsInverseRowPermutableIntoDense)
+TYPED_TEST(DenseWithIndexType,
+           SquareMatrixGatherRowsIntoDenseFailsForWrongDimensions)
 {
     using Mtx = typename TestFixture::Mtx;
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
     gko::array<index_type> permute_idxs{exec, {1, 0}};
-    auto row_permute = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
-
-    this->mtx5->create_submatrix({0, 2}, {0, 2})
-        ->inverse_row_permute(&permute_idxs, row_permute);
 
-    GKO_ASSERT_MTX_NEAR(row_permute, l<value_type>({{-2.0, 2.0}, {1.0, -1.0}}),
-                        0.0);
-    ASSERT_EQ(row_permute->get_stride(), 4);
+    ASSERT_THROW(this->mtx5->row_gather(&permute_idxs, Mtx::create(exec)),
+                 gko::DimensionMismatch);
 }
 
 
-TYPED_TEST(DenseWithIndexType,
-           SquareMatrixInverseRowPermuteIntoDenseFailsForWrongPermutationSize)
+TYPED_TEST(DenseWithIndexType, SquareMatrixIsPermutable)
 {
     using Mtx = typename TestFixture::Mtx;
     using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<index_type> permute_idxs{exec, {1, 2}};
-    auto row_permute = Mtx::create(exec, this->mtx5->get_size());
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
 
-    ASSERT_THROW(this->mtx5->inverse_row_permute(&permute_idxs, row_permute),
-                 gko::ValueMismatch);
+    auto ref_permuted =
+        gko::as<Mtx>(gko::as<Mtx>(this->mtx5->row_permute(&permute_idxs))
+                         ->column_permute(&permute_idxs));
+    auto permuted = gko::as<Mtx>(this->mtx5->permute(&permute_idxs));
+
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
 }
 
 
-TYPED_TEST(DenseWithIndexType,
-           SquareMatrixInverseRowPermuteIntoDenseFailsForWrongDimensions)
+TYPED_TEST(DenseWithIndexType, SquareMatrixIsPermutableIntoDense)
 {
     using Mtx = typename TestFixture::Mtx;
     using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
     gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
+    auto permuted = Mtx::create(exec, this->mtx5->get_size());
 
-    ASSERT_THROW(
-        this->mtx5->inverse_row_permute(&permute_idxs, Mtx::create(exec)),
-        gko::DimensionMismatch);
+    auto ref_permuted =
+        gko::as<Mtx>(gko::as<Mtx>(this->mtx5->row_permute(&permute_idxs))
+                         ->column_permute(&permute_idxs));
+    this->mtx5->permute(&permute_idxs, permuted);
+
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
 }
 
 
-TYPED_TEST(DenseWithIndexType, SquareMatrixIsInverseColPermutable)
+TYPED_TEST(DenseWithIndexType, SquareSubmatrixIsPermutableIntoDense)
 {
     using Mtx = typename TestFixture::Mtx;
-    using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
-    gko::array<index_type> inverse_permute_idxs{exec, {1, 2, 0}};
+    gko::array<index_type> permute_idxs{exec, {1, 0}};
+    auto permuted = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
+    auto mtx = this->mtx5->create_submatrix({0, 2}, {1, 3});
 
-    auto inv_c_permute =
-        gko::as<Mtx>(this->mtx5->inverse_column_permute(&inverse_permute_idxs));
+    auto ref_permuted =
+        gko::as<Mtx>(gko::as<Mtx>(mtx->row_permute(&permute_idxs))
+                         ->column_permute(&permute_idxs));
+    mtx->permute(&permute_idxs, permuted);
 
-    GKO_ASSERT_MTX_NEAR(
-        inv_c_permute,
-        l<value_type>({{-0.5, 1.0, -1.0}, {4.5, -2.0, 2.0}, {1.2, 2.1, 3.4}}),
-        0.0);
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
+    ASSERT_EQ(permuted->get_stride(), 4);
 }
 
 
-TYPED_TEST(DenseWithIndexType, NonSquareMatrixIsInverseColPermutable)
+TYPED_TEST(DenseWithIndexType, NonSquareMatrixPermuteIntoDenseFails)
 {
     using Mtx = typename TestFixture::Mtx;
-    using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
     auto exec = this->mtx4->get_executor();
-    gko::array<index_type> inverse_permute_idxs{exec, {1, 2, 0}};
-
-    auto inverse_c_permute =
-        gko::as<Mtx>(this->mtx4->inverse_column_permute(&inverse_permute_idxs));
-
-    GKO_ASSERT_MTX_NEAR(inverse_c_permute,
-                        l<value_type>({{2.0, 1.0, 3.0}, {0.0, 0.0, 5.0}}), 0.0);
-}
-
-
-TYPED_TEST(DenseWithIndexType, SquareMatrixIsInverseColPermutableIntoDense)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx5->get_executor();
     gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
-    auto c_permute = Mtx::create(exec, this->mtx5->get_size());
-
-    this->mtx5->inverse_column_permute(&permute_idxs, c_permute);
-
-    GKO_ASSERT_MTX_NEAR(
-        c_permute,
-        l<value_type>({{-0.5, 1.0, -1.0}, {4.5, -2.0, 2.0}, {1.2, 2.1, 3.4}}),
-        0.0);
-}
-
-
-TYPED_TEST(DenseWithIndexType, SquareSubmatrixIsInverseColPermutableIntoDense)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using value_type = typename TestFixture::value_type;
-    using index_type = typename TestFixture::index_type;
-    auto exec = this->mtx5->get_executor();
-    gko::array<index_type> permute_idxs{exec, {1, 0}};
-    auto c_permute = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
-
-    this->mtx5->create_submatrix({0, 2}, {0, 2})
-        ->column_permute(&permute_idxs, c_permute);
 
-    GKO_ASSERT_MTX_NEAR(c_permute, l<value_type>({{-1.0, 1.0}, {2.0, -2.0}}),
-                        0.0);
-    ASSERT_EQ(c_permute->get_stride(), 4);
+    ASSERT_THROW(this->mtx4->permute(&permute_idxs, this->mtx4->clone()),
+                 gko::DimensionMismatch);
 }
 
 
 TYPED_TEST(DenseWithIndexType,
-           SquareMatrixInverseColPermuteIntoDenseFailsForWrongPermutationSize)
+           SquareMatrixPermuteIntoDenseFailsForWrongPermutationSize)
 {
     using Mtx = typename TestFixture::Mtx;
     using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
     gko::array<index_type> permute_idxs{exec, {1, 2}};
-    auto row_permute = Mtx::create(exec, this->mtx5->get_size());
 
-    ASSERT_THROW(this->mtx5->inverse_column_permute(&permute_idxs, row_permute),
+    ASSERT_THROW(this->mtx5->permute(&permute_idxs, this->mtx5->clone()),
                  gko::ValueMismatch);
 }
 
 
 TYPED_TEST(DenseWithIndexType,
-           SquareMatrixInverseColPermuteIntoDenseFailsForWrongDimensions)
+           SquareMatrixPermuteIntoDenseFailsForWrongDimensions)
 {
     using Mtx = typename TestFixture::Mtx;
     using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
     gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
 
-    ASSERT_THROW(
-        this->mtx5->inverse_column_permute(&permute_idxs, Mtx::create(exec)),
-        gko::DimensionMismatch);
-}
-
-
-TYPED_TEST(Dense, ExtractsDiagonalFromSquareMatrix)
-{
-    using T = typename TestFixture::value_type;
-
-    auto diag = this->mtx5->extract_diagonal();
-
-    ASSERT_EQ(diag->get_size()[0], 3);
-    ASSERT_EQ(diag->get_size()[1], 3);
-    ASSERT_EQ(diag->get_values()[0], T{1.});
-    ASSERT_EQ(diag->get_values()[1], T{2.});
-    ASSERT_EQ(diag->get_values()[2], T{1.2});
+    ASSERT_THROW(this->mtx5->permute(&permute_idxs, Mtx::create(exec)),
+                 gko::DimensionMismatch);
 }
 
 
-TYPED_TEST(Dense, ExtractsDiagonalFromTallSkinnyMatrix)
+TYPED_TEST(DenseWithIndexType, SquareMatrixIsInversePermutable)
 {
-    using T = typename TestFixture::value_type;
+    using Mtx = typename TestFixture::Mtx;
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx5->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
 
-    auto diag = this->mtx4->extract_diagonal();
+    auto ref_permuted = gko::as<Mtx>(
+        gko::as<Mtx>(this->mtx5->inverse_row_permute(&permute_idxs))
+            ->inverse_column_permute(&permute_idxs));
+    auto permuted = gko::as<Mtx>(this->mtx5->inverse_permute(&permute_idxs));
 
-    ASSERT_EQ(diag->get_size()[0], 2);
-    ASSERT_EQ(diag->get_size()[1], 2);
-    ASSERT_EQ(diag->get_values()[0], T{1.});
-    ASSERT_EQ(diag->get_values()[1], T{5.});
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
 }
 
 
-TYPED_TEST(Dense, ExtractsDiagonalFromShortFatMatrix)
+TYPED_TEST(DenseWithIndexType, SquareMatrixIsInversePermutableIntoDense)
 {
-    using T = typename TestFixture::value_type;
+    using Mtx = typename TestFixture::Mtx;
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx5->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
+    auto permuted = Mtx::create(exec, this->mtx5->get_size());
 
-    auto diag = this->mtx8->extract_diagonal();
+    auto ref_permuted = gko::as<Mtx>(
+        gko::as<Mtx>(this->mtx5->inverse_row_permute(&permute_idxs))
+            ->inverse_column_permute(&permute_idxs));
+    this->mtx5->inverse_permute(&permute_idxs, permuted);
 
-    ASSERT_EQ(diag->get_size()[0], 2);
-    ASSERT_EQ(diag->get_size()[1], 2);
-    ASSERT_EQ(diag->get_values()[0], T{1.});
-    ASSERT_EQ(diag->get_values()[1], T{2.});
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
 }
 
 
-TYPED_TEST(Dense, ExtractsDiagonalFromSquareMatrixIntoDiagonal)
+TYPED_TEST(DenseWithIndexType, SquareSubmatrixIsInversePermutableIntoDense)
 {
-    using T = typename TestFixture::value_type;
-    auto diag = gko::matrix::Diagonal<T>::create(this->exec, 3);
+    using Mtx = typename TestFixture::Mtx;
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx5->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 0}};
+    auto permuted = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
+    auto mtx = this->mtx5->create_submatrix({0, 2}, {1, 3});
 
-    this->mtx5->extract_diagonal(diag);
+    auto ref_permuted =
+        gko::as<Mtx>(gko::as<Mtx>(mtx->inverse_row_permute(&permute_idxs))
+                         ->inverse_column_permute(&permute_idxs));
+    mtx->inverse_permute(&permute_idxs, permuted);
 
-    ASSERT_EQ(diag->get_size()[0], 3);
-    ASSERT_EQ(diag->get_size()[1], 3);
-    ASSERT_EQ(diag->get_values()[0], T{1.});
-    ASSERT_EQ(diag->get_values()[1], T{2.});
-    ASSERT_EQ(diag->get_values()[2], T{1.2});
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
+    ASSERT_EQ(permuted->get_stride(), 4);
 }
 
 
-TYPED_TEST(Dense, ExtractsDiagonalFromTallSkinnyMatrixIntoDiagonal)
+TYPED_TEST(DenseWithIndexType, NonSquareMatrixInversePermuteIntoDenseFails)
 {
-    using T = typename TestFixture::value_type;
-    auto diag = gko::matrix::Diagonal<T>::create(this->exec, 2);
-
-    this->mtx4->extract_diagonal(diag);
+    using Mtx = typename TestFixture::Mtx;
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx4->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
 
-    ASSERT_EQ(diag->get_size()[0], 2);
-    ASSERT_EQ(diag->get_size()[1], 2);
-    ASSERT_EQ(diag->get_values()[0], T{1.});
-    ASSERT_EQ(diag->get_values()[1], T{5.});
+    ASSERT_THROW(
+        this->mtx4->inverse_permute(&permute_idxs, this->mtx4->clone()),
+        gko::DimensionMismatch);
 }
 
 
-TYPED_TEST(Dense, ExtractsDiagonalFromShortFatMatrixIntoDiagonal)
+TYPED_TEST(DenseWithIndexType,
+           SquareMatrixInversePermuteIntoDenseFailsForWrongPermutationSize)
 {
-    using T = typename TestFixture::value_type;
-    auto diag = gko::matrix::Diagonal<T>::create(this->exec, 2);
-
-    this->mtx8->extract_diagonal(diag);
+    using Mtx = typename TestFixture::Mtx;
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx5->get_executor();
+    gko::array<index_type> permute_idxs{exec, {0, 1}};
 
-    ASSERT_EQ(diag->get_size()[0], 2);
-    ASSERT_EQ(diag->get_size()[1], 2);
-    ASSERT_EQ(diag->get_values()[0], T{1.});
-    ASSERT_EQ(diag->get_values()[1], T{2.});
+    ASSERT_THROW(
+        this->mtx5->inverse_permute(&permute_idxs, this->mtx5->clone()),
+        gko::ValueMismatch);
 }
 
 
-TYPED_TEST(Dense, InplaceAbsolute)
+TYPED_TEST(DenseWithIndexType,
+           SquareMatrixInversePermuteIntoDenseFailsForWrongDimensions)
 {
-    using T = typename TestFixture::value_type;
-
-    this->mtx5->compute_absolute_inplace();
+    using Mtx = typename TestFixture::Mtx;
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx5->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
 
-    GKO_ASSERT_MTX_NEAR(
-        this->mtx5, l<T>({{1.0, 1.0, 0.5}, {2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}}),
-        0.0);
+    ASSERT_THROW(this->mtx5->inverse_permute(&permute_idxs, Mtx::create(exec)),
+                 gko::DimensionMismatch);
 }
 
 
-TYPED_TEST(Dense, InplaceAbsoluteSubMatrix)
+TYPED_TEST(DenseWithIndexType, SquareMatrixIsRowPermutable)
 {
-    using T = typename TestFixture::value_type;
-    auto mtx = this->mtx5->create_submatrix(gko::span{0, 2}, gko::span{0, 2});
+    using Mtx = typename TestFixture::Mtx;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx5->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
 
-    mtx->compute_absolute_inplace();
+    auto row_permute = gko::as<Mtx>(this->mtx5->row_permute(&permute_idxs));
 
     GKO_ASSERT_MTX_NEAR(
-        this->mtx5, l<T>({{1.0, 1.0, -0.5}, {2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}}),
+        row_permute,
+        l<value_type>({{-2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}}),
         0.0);
 }
 
 
-TYPED_TEST(Dense, OutplaceAbsolute)
+TYPED_TEST(DenseWithIndexType, NonSquareMatrixIsRowPermutable)
 {
-    using T = typename TestFixture::value_type;
+    using Mtx = typename TestFixture::Mtx;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx4->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 0}};
 
-    auto abs_mtx = this->mtx5->compute_absolute();
+    auto row_permute = gko::as<Mtx>(this->mtx4->row_permute(&permute_idxs));
 
-    GKO_ASSERT_MTX_NEAR(
-        abs_mtx, l<T>({{1.0, 1.0, 0.5}, {2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}}),
-        0.0);
+    GKO_ASSERT_MTX_NEAR(row_permute,
+                        l<value_type>({{0.0, 5.0, 0.0}, {1.0, 3.0, 2.0}}), 0.0);
 }
 
 
-TYPED_TEST(Dense, OutplaceAbsoluteIntoDense)
+TYPED_TEST(DenseWithIndexType, SquareMatrixIsRowPermutableIntoDense)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto abs_mtx =
-        gko::remove_complex<Mtx>::create(this->exec, this->mtx5->get_size());
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx5->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
+    auto row_permute = Mtx::create(exec, this->mtx5->get_size());
 
-    this->mtx5->compute_absolute(abs_mtx);
+    this->mtx5->row_permute(&permute_idxs, row_permute);
 
     GKO_ASSERT_MTX_NEAR(
-        abs_mtx, l<T>({{1.0, 1.0, 0.5}, {2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}}),
+        row_permute,
+        l<value_type>({{-2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}}),
         0.0);
 }
 
 
-TYPED_TEST(Dense, OutplaceAbsoluteSubMatrix)
+TYPED_TEST(DenseWithIndexType, SquareSubmatrixIsRowPermutableIntoDense)
 {
-    using T = typename TestFixture::value_type;
-    auto mtx = this->mtx5->create_submatrix(gko::span{0, 2}, gko::span{0, 2});
+    using Mtx = typename TestFixture::Mtx;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx5->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 0}};
+    auto row_permute = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
 
-    auto abs_mtx = mtx->compute_absolute();
+    this->mtx5->create_submatrix({0, 2}, {0, 2})
+        ->row_permute(&permute_idxs, row_permute);
 
-    GKO_ASSERT_MTX_NEAR(abs_mtx, l<T>({{1.0, 1.0}, {2.0, 2.0}}), 0);
-    GKO_ASSERT_EQ(abs_mtx->get_stride(), 2);
+    GKO_ASSERT_MTX_NEAR(row_permute, l<value_type>({{-2.0, 2.0}, {1.0, -1.0}}),
+                        0.0);
+    ASSERT_EQ(row_permute->get_stride(), 4);
 }
 
 
-TYPED_TEST(Dense, OutplaceSubmatrixAbsoluteIntoDense)
+TYPED_TEST(DenseWithIndexType,
+           SquareMatrixRowPermuteIntoDenseFailsForWrongPermutationSize)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto mtx = this->mtx5->create_submatrix(gko::span{0, 2}, gko::span{0, 2});
-    auto abs_mtx =
-        gko::remove_complex<Mtx>::create(this->exec, gko::dim<2>{2, 2}, 4);
-
-    mtx->compute_absolute(abs_mtx);
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx5->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 2}};
+    auto row_permute = Mtx::create(exec, this->mtx5->get_size());
 
-    GKO_ASSERT_MTX_NEAR(abs_mtx, l<T>({{1.0, 1.0}, {2.0, 2.0}}), 0);
-    GKO_ASSERT_EQ(abs_mtx->get_stride(), 4);
+    ASSERT_THROW(this->mtx5->row_permute(&permute_idxs, row_permute),
+                 gko::ValueMismatch);
 }
 
 
-TYPED_TEST(Dense, AppliesToComplex)
+TYPED_TEST(DenseWithIndexType,
+           SquareMatrixRowPermuteIntoDenseFailsForWrongDimensions)
 {
-    using value_type = typename TestFixture::value_type;
-    using complex_type = gko::to_complex<value_type>;
-    using Vec = gko::matrix::Dense<complex_type>;
-    auto exec = gko::ReferenceExecutor::create();
-    auto b =
-        gko::initialize<Vec>({{complex_type{1.0, 0.0}, complex_type{2.0, 1.0}},
-                              {complex_type{2.0, 2.0}, complex_type{3.0, 3.0}},
-                              {complex_type{3.0, 4.0}, complex_type{4.0, 5.0}}},
-                             exec);
-    auto x = Vec::create(exec, gko::dim<2>{2, 2});
-
-    this->mtx1->apply(b, x);
+    using Mtx = typename TestFixture::Mtx;
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx5->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
 
-    GKO_ASSERT_MTX_NEAR(
-        x,
-        l({{complex_type{14.0, 16.0}, complex_type{20.0, 22.0}},
-           {complex_type{17.0, 19.0}, complex_type{24.5, 26.5}}}),
-        0.0);
+    ASSERT_THROW(this->mtx5->row_permute(&permute_idxs, Mtx::create(exec)),
+                 gko::DimensionMismatch);
 }
 
 
-TYPED_TEST(Dense, AppliesToMixedComplex)
+TYPED_TEST(DenseWithIndexType, SquareMatrixIsColPermutable)
 {
-    using mixed_value_type =
-        gko::next_precision<typename TestFixture::value_type>;
-    using mixed_complex_type = gko::to_complex<mixed_value_type>;
-    using Vec = gko::matrix::Dense<mixed_complex_type>;
-    auto exec = gko::ReferenceExecutor::create();
-    auto b = gko::initialize<Vec>(
-        {{mixed_complex_type{1.0, 0.0}, mixed_complex_type{2.0, 1.0}},
-         {mixed_complex_type{2.0, 2.0}, mixed_complex_type{3.0, 3.0}},
-         {mixed_complex_type{3.0, 4.0}, mixed_complex_type{4.0, 5.0}}},
-        exec);
-    auto x = Vec::create(exec, gko::dim<2>{2, 2});
+    using Mtx = typename TestFixture::Mtx;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx5->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
 
-    this->mtx1->apply(b, x);
+    auto c_permute = gko::as<Mtx>(this->mtx5->column_permute(&permute_idxs));
 
     GKO_ASSERT_MTX_NEAR(
-        x,
-        l({{mixed_complex_type{14.0, 16.0}, mixed_complex_type{20.0, 22.0}},
-           {mixed_complex_type{17.0, 19.0}, mixed_complex_type{24.5, 26.5}}}),
+        c_permute,
+        l<value_type>({{-1.0, -0.5, 1.0}, {2.0, 4.5, -2.0}, {3.4, 1.2, 2.1}}),
         0.0);
 }
 
 
-TYPED_TEST(Dense, AdvancedAppliesToComplex)
+TYPED_TEST(DenseWithIndexType, NonSquareMatrixIsColPermutable)
 {
+    using Mtx = typename TestFixture::Mtx;
     using value_type = typename TestFixture::value_type;
-    using complex_type = gko::to_complex<value_type>;
-    using Dense = gko::matrix::Dense<value_type>;
-    using DenseComplex = gko::matrix::Dense<complex_type>;
-    auto exec = gko::ReferenceExecutor::create();
-
-    auto b = gko::initialize<DenseComplex>(
-        {{complex_type{1.0, 0.0}, complex_type{2.0, 1.0}},
-         {complex_type{2.0, 2.0}, complex_type{3.0, 3.0}},
-         {complex_type{3.0, 4.0}, complex_type{4.0, 5.0}}},
-        exec);
-    auto x = gko::initialize<DenseComplex>(
-        {{complex_type{1.0, 0.0}, complex_type{2.0, 1.0}},
-         {complex_type{2.0, 2.0}, complex_type{3.0, 3.0}}},
-        exec);
-    auto alpha = gko::initialize<Dense>({-1.0}, this->exec);
-    auto beta = gko::initialize<Dense>({2.0}, this->exec);
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx4->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
 
-    this->mtx1->apply(alpha, b, beta, x);
+    auto c_permute = gko::as<Mtx>(this->mtx4->column_permute(&permute_idxs));
 
-    GKO_ASSERT_MTX_NEAR(
-        x,
-        l({{complex_type{-12.0, -16.0}, complex_type{-16.0, -20.0}},
-           {complex_type{-13.0, -15.0}, complex_type{-18.5, -20.5}}}),
-        0.0);
+    GKO_ASSERT_MTX_NEAR(c_permute,
+                        l<value_type>({{3.0, 2.0, 1.0}, {5.0, 0.0, 0.0}}), 0.0);
 }
 
 
-TYPED_TEST(Dense, AdvancedAppliesToMixedComplex)
+TYPED_TEST(DenseWithIndexType, SquareMatrixIsColPermutableIntoDense)
 {
-    using mixed_value_type =
-        gko::next_precision<typename TestFixture::value_type>;
-    using mixed_complex_type = gko::to_complex<mixed_value_type>;
-    using MixedDense = gko::matrix::Dense<mixed_value_type>;
-    using MixedDenseComplex = gko::matrix::Dense<mixed_complex_type>;
-    auto exec = gko::ReferenceExecutor::create();
-
-    auto b = gko::initialize<MixedDenseComplex>(
-        {{mixed_complex_type{1.0, 0.0}, mixed_complex_type{2.0, 1.0}},
-         {mixed_complex_type{2.0, 2.0}, mixed_complex_type{3.0, 3.0}},
-         {mixed_complex_type{3.0, 4.0}, mixed_complex_type{4.0, 5.0}}},
-        exec);
-    auto x = gko::initialize<MixedDenseComplex>(
-        {{mixed_complex_type{1.0, 0.0}, mixed_complex_type{2.0, 1.0}},
-         {mixed_complex_type{2.0, 2.0}, mixed_complex_type{3.0, 3.0}}},
-        exec);
-    auto alpha = gko::initialize<MixedDense>({-1.0}, this->exec);
-    auto beta = gko::initialize<MixedDense>({2.0}, this->exec);
+    using Mtx = typename TestFixture::Mtx;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx5->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
+    auto c_permute = Mtx::create(exec, this->mtx5->get_size());
 
-    this->mtx1->apply(alpha, b, beta, x);
+    this->mtx5->column_permute(&permute_idxs, c_permute);
 
     GKO_ASSERT_MTX_NEAR(
-        x,
-        l({{mixed_complex_type{-12.0, -16.0}, mixed_complex_type{-16.0, -20.0}},
-           {mixed_complex_type{-13.0, -15.0},
-            mixed_complex_type{-18.5, -20.5}}}),
+        c_permute,
+        l<value_type>({{-1.0, -0.5, 1.0}, {2.0, 4.5, -2.0}, {3.4, 1.2, 2.1}}),
         0.0);
 }
 
 
-TYPED_TEST(Dense, MakeComplex)
+TYPED_TEST(DenseWithIndexType, SquareSubmatrixIsColPermutableIntoDense)
 {
-    using T = typename TestFixture::value_type;
+    using Mtx = typename TestFixture::Mtx;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx5->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 0}};
+    auto c_permute = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
 
-    auto complex_mtx = this->mtx5->make_complex();
+    this->mtx5->create_submatrix({0, 2}, {0, 2})
+        ->column_permute(&permute_idxs, c_permute);
 
-    GKO_ASSERT_MTX_NEAR(complex_mtx, this->mtx5, 0.0);
+    GKO_ASSERT_MTX_NEAR(c_permute, l<value_type>({{-1.0, 1.0}, {2.0, -2.0}}),
+                        0.0);
+    ASSERT_EQ(c_permute->get_stride(), 4);
 }
 
 
-TYPED_TEST(Dense, MakeComplexIntoDense)
+TYPED_TEST(DenseWithIndexType,
+           SquareMatrixColPermuteIntoDenseFailsForWrongPermutationSize)
 {
-    using T = typename TestFixture::value_type;
-    using ComplexMtx = typename TestFixture::ComplexMtx;
+    using Mtx = typename TestFixture::Mtx;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 2}};
+    auto row_permute = Mtx::create(exec, this->mtx5->get_size());
 
-    auto complex_mtx = ComplexMtx::create(exec, this->mtx5->get_size());
-    this->mtx5->make_complex(complex_mtx);
+    ASSERT_THROW(this->mtx5->column_permute(&permute_idxs, row_permute),
+                 gko::ValueMismatch);
+}
 
-    GKO_ASSERT_MTX_NEAR(complex_mtx, this->mtx5, 0.0);
+
+TYPED_TEST(DenseWithIndexType,
+           SquareMatrixColPermuteIntoDenseFailsForWrongDimensions)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx5->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
+
+    ASSERT_THROW(this->mtx5->column_permute(&permute_idxs, Mtx::create(exec)),
+                 gko::DimensionMismatch);
 }
 
 
-TYPED_TEST(Dense, MakeComplexIntoDenseFailsForWrongDimensions)
+TYPED_TEST(DenseWithIndexType, SquareMatrixIsInverseRowPermutable)
 {
-    using T = typename TestFixture::value_type;
-    using ComplexMtx = typename TestFixture::ComplexMtx;
+    using Mtx = typename TestFixture::Mtx;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
+    gko::array<index_type> inverse_permute_idxs{exec, {1, 2, 0}};
 
-    auto complex_mtx = ComplexMtx::create(exec);
+    auto inv_row_permute =
+        gko::as<Mtx>(this->mtx5->inverse_row_permute(&inverse_permute_idxs));
 
-    ASSERT_THROW(this->mtx5->make_complex(complex_mtx), gko::DimensionMismatch);
+    GKO_ASSERT_MTX_NEAR(
+        inv_row_permute,
+        l<value_type>({{2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}, {-2.0, 2.0, 4.5}}),
+        0.0);
 }
 
 
-TYPED_TEST(Dense, GetReal)
+TYPED_TEST(DenseWithIndexType, NonSquareMatrixIsInverseRowPermutable)
 {
-    using T = typename TestFixture::value_type;
+    using Mtx = typename TestFixture::Mtx;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx4->get_executor();
+    gko::array<index_type> inverse_permute_idxs{exec, {1, 0}};
 
-    auto real_mtx = this->mtx5->get_real();
+    auto inverse_row_permute =
+        gko::as<Mtx>(this->mtx4->inverse_row_permute(&inverse_permute_idxs));
 
-    GKO_ASSERT_MTX_NEAR(real_mtx, this->mtx5, 0.0);
+    GKO_ASSERT_MTX_NEAR(inverse_row_permute,
+                        l<value_type>({{0.0, 5.0, 0.0}, {1.0, 3.0, 2.0}}), 0.0);
 }
 
 
-TYPED_TEST(Dense, GetRealIntoDense)
+TYPED_TEST(DenseWithIndexType, SquareMatrixIsInverseRowPermutableIntoDense)
 {
-    using T = typename TestFixture::value_type;
-    using RealMtx = typename TestFixture::RealMtx;
+    using Mtx = typename TestFixture::Mtx;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
+    auto row_permute = Mtx::create(exec, this->mtx5->get_size());
 
-    auto real_mtx = RealMtx::create(exec, this->mtx5->get_size());
-    this->mtx5->get_real(real_mtx);
+    this->mtx5->inverse_row_permute(&permute_idxs, row_permute);
 
-    GKO_ASSERT_MTX_NEAR(real_mtx, this->mtx5, 0.0);
+    GKO_ASSERT_MTX_NEAR(
+        row_permute,
+        l<value_type>({{2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}, {-2.0, 2.0, 4.5}}),
+        0.0);
 }
 
 
-TYPED_TEST(Dense, GetRealIntoDenseFailsForWrongDimensions)
+TYPED_TEST(DenseWithIndexType, SquareSubmatrixIsInverseRowPermutableIntoDense)
 {
-    using T = typename TestFixture::value_type;
-    using RealMtx = typename TestFixture::RealMtx;
+    using Mtx = typename TestFixture::Mtx;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 0}};
+    auto row_permute = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
 
-    auto real_mtx = RealMtx::create(exec);
-    ASSERT_THROW(this->mtx5->get_real(real_mtx), gko::DimensionMismatch);
+    this->mtx5->create_submatrix({0, 2}, {0, 2})
+        ->inverse_row_permute(&permute_idxs, row_permute);
+
+    GKO_ASSERT_MTX_NEAR(row_permute, l<value_type>({{-2.0, 2.0}, {1.0, -1.0}}),
+                        0.0);
+    ASSERT_EQ(row_permute->get_stride(), 4);
 }
 
 
-TYPED_TEST(Dense, GetImag)
+TYPED_TEST(DenseWithIndexType,
+           SquareMatrixInverseRowPermuteIntoDenseFailsForWrongPermutationSize)
 {
-    using T = typename TestFixture::value_type;
+    using Mtx = typename TestFixture::Mtx;
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx5->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 2}};
+    auto row_permute = Mtx::create(exec, this->mtx5->get_size());
 
-    auto imag_mtx = this->mtx5->get_imag();
+    ASSERT_THROW(this->mtx5->inverse_row_permute(&permute_idxs, row_permute),
+                 gko::ValueMismatch);
+}
 
-    GKO_ASSERT_MTX_NEAR(
-        imag_mtx, l<T>({{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}),
-        0.0);
+
+TYPED_TEST(DenseWithIndexType,
+           SquareMatrixInverseRowPermuteIntoDenseFailsForWrongDimensions)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx5->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
+
+    ASSERT_THROW(
+        this->mtx5->inverse_row_permute(&permute_idxs, Mtx::create(exec)),
+        gko::DimensionMismatch);
 }
 
 
-TYPED_TEST(Dense, GetImagIntoDense)
+TYPED_TEST(DenseWithIndexType, SquareMatrixIsInverseColPermutable)
 {
-    using T = typename TestFixture::value_type;
-    using RealMtx = typename TestFixture::RealMtx;
+    using Mtx = typename TestFixture::Mtx;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
+    gko::array<index_type> inverse_permute_idxs{exec, {1, 2, 0}};
 
-    auto imag_mtx = RealMtx::create(exec, this->mtx5->get_size());
-    this->mtx5->get_imag(imag_mtx);
+    auto inv_c_permute =
+        gko::as<Mtx>(this->mtx5->inverse_column_permute(&inverse_permute_idxs));
 
     GKO_ASSERT_MTX_NEAR(
-        imag_mtx, l<T>({{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}),
+        inv_c_permute,
+        l<value_type>({{-0.5, 1.0, -1.0}, {4.5, -2.0, 2.0}, {1.2, 2.1, 3.4}}),
         0.0);
 }
 
 
-TYPED_TEST(Dense, GetImagIntoDenseFailsForWrongDimensions)
+TYPED_TEST(DenseWithIndexType, NonSquareMatrixIsInverseColPermutable)
 {
-    using T = typename TestFixture::value_type;
-    using RealMtx = typename TestFixture::RealMtx;
-    auto exec = this->mtx5->get_executor();
+    using Mtx = typename TestFixture::Mtx;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx4->get_executor();
+    gko::array<index_type> inverse_permute_idxs{exec, {1, 2, 0}};
 
-    auto imag_mtx = RealMtx::create(exec);
-    ASSERT_THROW(this->mtx5->get_imag(imag_mtx), gko::DimensionMismatch);
+    auto inverse_c_permute =
+        gko::as<Mtx>(this->mtx4->inverse_column_permute(&inverse_permute_idxs));
+
+    GKO_ASSERT_MTX_NEAR(inverse_c_permute,
+                        l<value_type>({{2.0, 1.0, 3.0}, {0.0, 0.0, 5.0}}), 0.0);
 }
 
 
-TYPED_TEST(Dense, MakeTemporaryConversionDoesntConvertOnMatch)
+TYPED_TEST(DenseWithIndexType, SquareMatrixIsInverseColPermutableIntoDense)
 {
     using Mtx = typename TestFixture::Mtx;
-    using T = typename TestFixture::value_type;
-    auto alpha = gko::initialize<Mtx>({8.0}, this->exec);
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx5->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
+    auto c_permute = Mtx::create(exec, this->mtx5->get_size());
 
-    ASSERT_EQ(gko::make_temporary_conversion<T>(alpha).get(), alpha.get());
+    this->mtx5->inverse_column_permute(&permute_idxs, c_permute);
+
+    GKO_ASSERT_MTX_NEAR(
+        c_permute,
+        l<value_type>({{-0.5, 1.0, -1.0}, {4.5, -2.0, 2.0}, {1.2, 2.1, 3.4}}),
+        0.0);
 }
 
 
-TYPED_TEST(Dense, MakeTemporaryConversionConvertsBack)
+TYPED_TEST(DenseWithIndexType, SquareSubmatrixIsInverseColPermutableIntoDense)
 {
-    using MixedMtx = typename TestFixture::MixedMtx;
-    using T = typename TestFixture::value_type;
-    using MixedT = typename MixedMtx::value_type;
-    auto alpha = gko::initialize<MixedMtx>({8.0}, this->exec);
+    using Mtx = typename TestFixture::Mtx;
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx5->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 0}};
+    auto c_permute = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
 
-    {
-        auto conversion = gko::make_temporary_conversion<T>(alpha);
-        conversion->at(0, 0) = T{7.0};
-    }
+    this->mtx5->create_submatrix({0, 2}, {0, 2})
+        ->column_permute(&permute_idxs, c_permute);
 
-    ASSERT_EQ(alpha->at(0, 0), MixedT{7.0});
+    GKO_ASSERT_MTX_NEAR(c_permute, l<value_type>({{-1.0, 1.0}, {2.0, -2.0}}),
+                        0.0);
+    ASSERT_EQ(c_permute->get_stride(), 4);
 }
 
 
-TYPED_TEST(Dense, MakeTemporaryConversionConstDoesntConvertBack)
+TYPED_TEST(DenseWithIndexType,
+           SquareMatrixInverseColPermuteIntoDenseFailsForWrongPermutationSize)
 {
-    using MixedMtx = typename TestFixture::MixedMtx;
-    using T = typename TestFixture::value_type;
-    using MixedT = typename MixedMtx::value_type;
-    auto alpha = gko::initialize<MixedMtx>({8.0}, this->exec);
-
-    {
-        auto conversion = gko::make_temporary_conversion<T>(
-            static_cast<const MixedMtx*>(alpha.get()));
-        alpha->at(0, 0) = MixedT{7.0};
-    }
+    using Mtx = typename TestFixture::Mtx;
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx5->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 2}};
+    auto row_permute = Mtx::create(exec, this->mtx5->get_size());
 
-    ASSERT_EQ(alpha->at(0, 0), MixedT{7.0});
+    ASSERT_THROW(this->mtx5->inverse_column_permute(&permute_idxs, row_permute),
+                 gko::ValueMismatch);
 }
 
 
-TYPED_TEST(Dense, ScaleAddIdentityRectangular)
+TYPED_TEST(DenseWithIndexType,
+           SquareMatrixInverseColPermuteIntoDenseFailsForWrongDimensions)
 {
-    using T = typename TestFixture::value_type;
-    using Vec = typename TestFixture::Mtx;
-    using MixedVec = typename TestFixture::MixedMtx;
-    auto alpha = gko::initialize<Vec>({2.0}, this->exec);
-    auto beta = gko::initialize<Vec>({-1.0}, this->exec);
-    auto b = gko::initialize<Vec>(
-        {I<T>{2.0, 0.0}, I<T>{1.0, 2.5}, I<T>{0.0, -4.0}}, this->exec);
-
-    b->add_scaled_identity(alpha, beta);
+    using Mtx = typename TestFixture::Mtx;
+    using index_type = typename TestFixture::index_type;
+    auto exec = this->mtx5->get_executor();
+    gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
 
-    GKO_ASSERT_MTX_NEAR(b, l({{0.0, 0.0}, {-1.0, -0.5}, {0.0, 4.0}}), 0.0);
+    ASSERT_THROW(
+        this->mtx5->inverse_column_permute(&permute_idxs, Mtx::create(exec)),
+        gko::DimensionMismatch);
 }
 
 

From 07f01db4c8aa6c76d54db81e981eb91d4d5ffc7e Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Tue, 17 Oct 2023 09:52:27 +0200
Subject: [PATCH 383/583] review updates

- Remove unused declarations
- Consistent variable naming

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 reference/test/matrix/dense_kernels.cpp | 108 ++++++++++++------------
 1 file changed, 52 insertions(+), 56 deletions(-)

diff --git a/reference/test/matrix/dense_kernels.cpp b/reference/test/matrix/dense_kernels.cpp
index 56f082243e6..60713c815de 100644
--- a/reference/test/matrix/dense_kernels.cpp
+++ b/reference/test/matrix/dense_kernels.cpp
@@ -68,7 +68,6 @@ class Dense : public ::testing::Test {
     using Mtx = gko::matrix::Dense<value_type>;
     using MixedMtx = gko::matrix::Dense<gko::next_precision<value_type>>;
     using ComplexMtx = gko::to_complex<Mtx>;
-    using MixedComplexMtx = gko::to_complex<MixedMtx>;
     using RealMtx = gko::remove_complex<Mtx>;
     Dense()
         : exec(gko::ReferenceExecutor::create()),
@@ -1297,7 +1296,6 @@ TYPED_TEST(Dense, ScaleAddIdentityRectangular)
 {
     using T = typename TestFixture::value_type;
     using Vec = typename TestFixture::Mtx;
-    using MixedVec = typename TestFixture::MixedMtx;
     auto alpha = gko::initialize<Vec>({2.0}, this->exec);
     auto beta = gko::initialize<Vec>({-1.0}, this->exec);
     auto b = gko::initialize<Vec>(
@@ -1318,8 +1316,6 @@ class DenseWithIndexType
         typename std::tuple_element<0, decltype(ValueIndexType())>::type;
     using index_type =
         typename std::tuple_element<1, decltype(ValueIndexType())>::type;
-
-    index_type invalid_index = gko::invalid_index<index_type>();
 };
 
 TYPED_TEST_SUITE(DenseWithIndexType, gko::test::ValueIndexTypes,
@@ -2501,10 +2497,10 @@ TYPED_TEST(DenseWithIndexType, SquareMatrixIsRowPermutable)
     auto exec = this->mtx5->get_executor();
     gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
 
-    auto row_permute = gko::as<Mtx>(this->mtx5->row_permute(&permute_idxs));
+    auto permuted = gko::as<Mtx>(this->mtx5->row_permute(&permute_idxs));
 
     GKO_ASSERT_MTX_NEAR(
-        row_permute,
+        permuted,
         l<value_type>({{-2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}}),
         0.0);
 }
@@ -2518,9 +2514,9 @@ TYPED_TEST(DenseWithIndexType, NonSquareMatrixIsRowPermutable)
     auto exec = this->mtx4->get_executor();
     gko::array<index_type> permute_idxs{exec, {1, 0}};
 
-    auto row_permute = gko::as<Mtx>(this->mtx4->row_permute(&permute_idxs));
+    auto permuted = gko::as<Mtx>(this->mtx4->row_permute(&permute_idxs));
 
-    GKO_ASSERT_MTX_NEAR(row_permute,
+    GKO_ASSERT_MTX_NEAR(permuted,
                         l<value_type>({{0.0, 5.0, 0.0}, {1.0, 3.0, 2.0}}), 0.0);
 }
 
@@ -2532,12 +2528,12 @@ TYPED_TEST(DenseWithIndexType, SquareMatrixIsRowPermutableIntoDense)
     using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
     gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
-    auto row_permute = Mtx::create(exec, this->mtx5->get_size());
+    auto permuted = Mtx::create(exec, this->mtx5->get_size());
 
-    this->mtx5->row_permute(&permute_idxs, row_permute);
+    this->mtx5->row_permute(&permute_idxs, permuted);
 
     GKO_ASSERT_MTX_NEAR(
-        row_permute,
+        permuted,
         l<value_type>({{-2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}}),
         0.0);
 }
@@ -2550,14 +2546,14 @@ TYPED_TEST(DenseWithIndexType, SquareSubmatrixIsRowPermutableIntoDense)
     using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
     gko::array<index_type> permute_idxs{exec, {1, 0}};
-    auto row_permute = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
+    auto permuted = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
 
     this->mtx5->create_submatrix({0, 2}, {0, 2})
-        ->row_permute(&permute_idxs, row_permute);
+        ->row_permute(&permute_idxs, permuted);
 
-    GKO_ASSERT_MTX_NEAR(row_permute, l<value_type>({{-2.0, 2.0}, {1.0, -1.0}}),
+    GKO_ASSERT_MTX_NEAR(permuted, l<value_type>({{-2.0, 2.0}, {1.0, -1.0}}),
                         0.0);
-    ASSERT_EQ(row_permute->get_stride(), 4);
+    ASSERT_EQ(permuted->get_stride(), 4);
 }
 
 
@@ -2568,9 +2564,9 @@ TYPED_TEST(DenseWithIndexType,
     using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
     gko::array<index_type> permute_idxs{exec, {1, 2}};
-    auto row_permute = Mtx::create(exec, this->mtx5->get_size());
+    auto permuted = Mtx::create(exec, this->mtx5->get_size());
 
-    ASSERT_THROW(this->mtx5->row_permute(&permute_idxs, row_permute),
+    ASSERT_THROW(this->mtx5->row_permute(&permute_idxs, permuted),
                  gko::ValueMismatch);
 }
 
@@ -2596,10 +2592,10 @@ TYPED_TEST(DenseWithIndexType, SquareMatrixIsColPermutable)
     auto exec = this->mtx5->get_executor();
     gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
 
-    auto c_permute = gko::as<Mtx>(this->mtx5->column_permute(&permute_idxs));
+    auto permuted = gko::as<Mtx>(this->mtx5->column_permute(&permute_idxs));
 
     GKO_ASSERT_MTX_NEAR(
-        c_permute,
+        permuted,
         l<value_type>({{-1.0, -0.5, 1.0}, {2.0, 4.5, -2.0}, {3.4, 1.2, 2.1}}),
         0.0);
 }
@@ -2613,9 +2609,9 @@ TYPED_TEST(DenseWithIndexType, NonSquareMatrixIsColPermutable)
     auto exec = this->mtx4->get_executor();
     gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
 
-    auto c_permute = gko::as<Mtx>(this->mtx4->column_permute(&permute_idxs));
+    auto permuted = gko::as<Mtx>(this->mtx4->column_permute(&permute_idxs));
 
-    GKO_ASSERT_MTX_NEAR(c_permute,
+    GKO_ASSERT_MTX_NEAR(permuted,
                         l<value_type>({{3.0, 2.0, 1.0}, {5.0, 0.0, 0.0}}), 0.0);
 }
 
@@ -2627,12 +2623,12 @@ TYPED_TEST(DenseWithIndexType, SquareMatrixIsColPermutableIntoDense)
     using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
     gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
-    auto c_permute = Mtx::create(exec, this->mtx5->get_size());
+    auto permuted = Mtx::create(exec, this->mtx5->get_size());
 
-    this->mtx5->column_permute(&permute_idxs, c_permute);
+    this->mtx5->column_permute(&permute_idxs, permuted);
 
     GKO_ASSERT_MTX_NEAR(
-        c_permute,
+        permuted,
         l<value_type>({{-1.0, -0.5, 1.0}, {2.0, 4.5, -2.0}, {3.4, 1.2, 2.1}}),
         0.0);
 }
@@ -2645,14 +2641,14 @@ TYPED_TEST(DenseWithIndexType, SquareSubmatrixIsColPermutableIntoDense)
     using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
     gko::array<index_type> permute_idxs{exec, {1, 0}};
-    auto c_permute = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
+    auto permuted = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
 
     this->mtx5->create_submatrix({0, 2}, {0, 2})
-        ->column_permute(&permute_idxs, c_permute);
+        ->column_permute(&permute_idxs, permuted);
 
-    GKO_ASSERT_MTX_NEAR(c_permute, l<value_type>({{-1.0, 1.0}, {2.0, -2.0}}),
+    GKO_ASSERT_MTX_NEAR(permuted, l<value_type>({{-1.0, 1.0}, {2.0, -2.0}}),
                         0.0);
-    ASSERT_EQ(c_permute->get_stride(), 4);
+    ASSERT_EQ(permuted->get_stride(), 4);
 }
 
 
@@ -2663,9 +2659,9 @@ TYPED_TEST(DenseWithIndexType,
     using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
     gko::array<index_type> permute_idxs{exec, {1, 2}};
-    auto row_permute = Mtx::create(exec, this->mtx5->get_size());
+    auto permuted = Mtx::create(exec, this->mtx5->get_size());
 
-    ASSERT_THROW(this->mtx5->column_permute(&permute_idxs, row_permute),
+    ASSERT_THROW(this->mtx5->column_permute(&permute_idxs, permuted),
                  gko::ValueMismatch);
 }
 
@@ -2691,11 +2687,11 @@ TYPED_TEST(DenseWithIndexType, SquareMatrixIsInverseRowPermutable)
     auto exec = this->mtx5->get_executor();
     gko::array<index_type> inverse_permute_idxs{exec, {1, 2, 0}};
 
-    auto inv_row_permute =
+    auto permuted =
         gko::as<Mtx>(this->mtx5->inverse_row_permute(&inverse_permute_idxs));
 
     GKO_ASSERT_MTX_NEAR(
-        inv_row_permute,
+        permuted,
         l<value_type>({{2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}, {-2.0, 2.0, 4.5}}),
         0.0);
 }
@@ -2709,10 +2705,10 @@ TYPED_TEST(DenseWithIndexType, NonSquareMatrixIsInverseRowPermutable)
     auto exec = this->mtx4->get_executor();
     gko::array<index_type> inverse_permute_idxs{exec, {1, 0}};
 
-    auto inverse_row_permute =
+    auto permuted =
         gko::as<Mtx>(this->mtx4->inverse_row_permute(&inverse_permute_idxs));
 
-    GKO_ASSERT_MTX_NEAR(inverse_row_permute,
+    GKO_ASSERT_MTX_NEAR(permuted,
                         l<value_type>({{0.0, 5.0, 0.0}, {1.0, 3.0, 2.0}}), 0.0);
 }
 
@@ -2724,12 +2720,12 @@ TYPED_TEST(DenseWithIndexType, SquareMatrixIsInverseRowPermutableIntoDense)
     using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
     gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
-    auto row_permute = Mtx::create(exec, this->mtx5->get_size());
+    auto permuted = Mtx::create(exec, this->mtx5->get_size());
 
-    this->mtx5->inverse_row_permute(&permute_idxs, row_permute);
+    this->mtx5->inverse_row_permute(&permute_idxs, permuted);
 
     GKO_ASSERT_MTX_NEAR(
-        row_permute,
+        permuted,
         l<value_type>({{2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}, {-2.0, 2.0, 4.5}}),
         0.0);
 }
@@ -2742,14 +2738,14 @@ TYPED_TEST(DenseWithIndexType, SquareSubmatrixIsInverseRowPermutableIntoDense)
     using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
     gko::array<index_type> permute_idxs{exec, {1, 0}};
-    auto row_permute = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
+    auto permuted = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
 
     this->mtx5->create_submatrix({0, 2}, {0, 2})
-        ->inverse_row_permute(&permute_idxs, row_permute);
+        ->inverse_row_permute(&permute_idxs, permuted);
 
-    GKO_ASSERT_MTX_NEAR(row_permute, l<value_type>({{-2.0, 2.0}, {1.0, -1.0}}),
+    GKO_ASSERT_MTX_NEAR(permuted, l<value_type>({{-2.0, 2.0}, {1.0, -1.0}}),
                         0.0);
-    ASSERT_EQ(row_permute->get_stride(), 4);
+    ASSERT_EQ(permuted->get_stride(), 4);
 }
 
 
@@ -2760,9 +2756,9 @@ TYPED_TEST(DenseWithIndexType,
     using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
     gko::array<index_type> permute_idxs{exec, {1, 2}};
-    auto row_permute = Mtx::create(exec, this->mtx5->get_size());
+    auto permuted = Mtx::create(exec, this->mtx5->get_size());
 
-    ASSERT_THROW(this->mtx5->inverse_row_permute(&permute_idxs, row_permute),
+    ASSERT_THROW(this->mtx5->inverse_row_permute(&permute_idxs, permuted),
                  gko::ValueMismatch);
 }
 
@@ -2789,11 +2785,11 @@ TYPED_TEST(DenseWithIndexType, SquareMatrixIsInverseColPermutable)
     auto exec = this->mtx5->get_executor();
     gko::array<index_type> inverse_permute_idxs{exec, {1, 2, 0}};
 
-    auto inv_c_permute =
+    auto permuted =
         gko::as<Mtx>(this->mtx5->inverse_column_permute(&inverse_permute_idxs));
 
     GKO_ASSERT_MTX_NEAR(
-        inv_c_permute,
+        permuted,
         l<value_type>({{-0.5, 1.0, -1.0}, {4.5, -2.0, 2.0}, {1.2, 2.1, 3.4}}),
         0.0);
 }
@@ -2807,10 +2803,10 @@ TYPED_TEST(DenseWithIndexType, NonSquareMatrixIsInverseColPermutable)
     auto exec = this->mtx4->get_executor();
     gko::array<index_type> inverse_permute_idxs{exec, {1, 2, 0}};
 
-    auto inverse_c_permute =
+    auto permuted =
         gko::as<Mtx>(this->mtx4->inverse_column_permute(&inverse_permute_idxs));
 
-    GKO_ASSERT_MTX_NEAR(inverse_c_permute,
+    GKO_ASSERT_MTX_NEAR(permuted,
                         l<value_type>({{2.0, 1.0, 3.0}, {0.0, 0.0, 5.0}}), 0.0);
 }
 
@@ -2822,12 +2818,12 @@ TYPED_TEST(DenseWithIndexType, SquareMatrixIsInverseColPermutableIntoDense)
     using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
     gko::array<index_type> permute_idxs{exec, {1, 2, 0}};
-    auto c_permute = Mtx::create(exec, this->mtx5->get_size());
+    auto permuted = Mtx::create(exec, this->mtx5->get_size());
 
-    this->mtx5->inverse_column_permute(&permute_idxs, c_permute);
+    this->mtx5->inverse_column_permute(&permute_idxs, permuted);
 
     GKO_ASSERT_MTX_NEAR(
-        c_permute,
+        permuted,
         l<value_type>({{-0.5, 1.0, -1.0}, {4.5, -2.0, 2.0}, {1.2, 2.1, 3.4}}),
         0.0);
 }
@@ -2840,14 +2836,14 @@ TYPED_TEST(DenseWithIndexType, SquareSubmatrixIsInverseColPermutableIntoDense)
     using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
     gko::array<index_type> permute_idxs{exec, {1, 0}};
-    auto c_permute = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
+    auto permuted = Mtx::create(exec, gko::dim<2>{2, 2}, 4);
 
     this->mtx5->create_submatrix({0, 2}, {0, 2})
-        ->column_permute(&permute_idxs, c_permute);
+        ->column_permute(&permute_idxs, permuted);
 
-    GKO_ASSERT_MTX_NEAR(c_permute, l<value_type>({{-1.0, 1.0}, {2.0, -2.0}}),
+    GKO_ASSERT_MTX_NEAR(permuted, l<value_type>({{-1.0, 1.0}, {2.0, -2.0}}),
                         0.0);
-    ASSERT_EQ(c_permute->get_stride(), 4);
+    ASSERT_EQ(permuted->get_stride(), 4);
 }
 
 
@@ -2858,9 +2854,9 @@ TYPED_TEST(DenseWithIndexType,
     using index_type = typename TestFixture::index_type;
     auto exec = this->mtx5->get_executor();
     gko::array<index_type> permute_idxs{exec, {1, 2}};
-    auto row_permute = Mtx::create(exec, this->mtx5->get_size());
+    auto permuted = Mtx::create(exec, this->mtx5->get_size());
 
-    ASSERT_THROW(this->mtx5->inverse_column_permute(&permute_idxs, row_permute),
+    ASSERT_THROW(this->mtx5->inverse_column_permute(&permute_idxs, permuted),
                  gko::ValueMismatch);
 }
 

From c459676141f8dc7ca0a16ad802bd109259169b03 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Fri, 6 Oct 2023 14:39:38 +0200
Subject: [PATCH 384/583] Add batch::matrix::Ell class and core

Co-authored-by: Aditya Kashi <kashia@ornl.gov>
---
 core/matrix/batch_ell.cpp                | 235 ++++++++++++++
 include/ginkgo/core/matrix/batch_ell.hpp | 390 +++++++++++++++++++++++
 2 files changed, 625 insertions(+)
 create mode 100644 core/matrix/batch_ell.cpp
 create mode 100644 include/ginkgo/core/matrix/batch_ell.hpp

diff --git a/core/matrix/batch_ell.cpp b/core/matrix/batch_ell.cpp
new file mode 100644
index 00000000000..63d4f0dda8a
--- /dev/null
+++ b/core/matrix/batch_ell.cpp
@@ -0,0 +1,235 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/matrix/batch_ell.hpp>
+
+
+#include <algorithm>
+#include <type_traits>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/exception.hpp>
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/utils.hpp>
+#include <ginkgo/core/matrix/ell.hpp>
+
+
+#include "core/matrix/batch_ell_kernels.hpp"
+
+
+namespace gko {
+namespace batch {
+namespace matrix {
+namespace ell {
+namespace {
+
+
+GKO_REGISTER_OPERATION(simple_apply, batch_ell::simple_apply);
+GKO_REGISTER_OPERATION(advanced_apply, batch_ell::advanced_apply);
+
+
+}  // namespace
+}  // namespace ell
+
+
+namespace detail {
+
+
+template <typename ValueType, typename IndexType>
+batch_dim<2> compute_batch_size(
+    const std::vector<gko::matrix::Ell<ValueType, IndexType>*>& matrices)
+{
+    auto common_size = matrices[0]->get_size();
+    for (size_type i = 1; i < matrices.size(); ++i) {
+        GKO_ASSERT_EQUAL_DIMENSIONS(common_size, matrices[i]->get_size());
+    }
+    return batch_dim<2>{matrices.size(), common_size};
+}
+
+
+}  // namespace detail
+
+
+template <typename ValueType, typename IndexType>
+std::unique_ptr<gko::matrix::Ell<ValueType, IndexType>>
+Ell<ValueType, IndexType>::create_view_for_item(size_type item_id)
+{
+    auto exec = this->get_executor();
+    auto num_rows = this->get_common_size()[0];
+    auto stride = this->get_common_size()[1];
+    auto mat = unbatch_type::create(
+        exec, this->get_common_size(),
+        make_array_view(exec, this->get_num_elements_per_item(),
+                        this->get_values_for_item(item_id)),
+        make_array_view(exec, this->get_num_elements_per_item(),
+                        this->get_col_idxs_for_item(item_id)),
+        this->get_num_stored_elements_per_row(), stride);
+    return mat;
+}
+
+
+template <typename ValueType, typename IndexType>
+std::unique_ptr<const gko::matrix::Ell<ValueType, IndexType>>
+Ell<ValueType, IndexType>::create_const_view_for_item(size_type item_id) const
+{
+    auto exec = this->get_executor();
+    auto num_rows = this->get_common_size()[0];
+    auto stride = this->get_common_size()[1];
+    auto mat = unbatch_type::create_const(
+        exec, this->get_common_size(),
+        make_const_array_view(exec, this->get_num_elements_per_item(),
+                              this->get_const_values_for_item(item_id)),
+        make_const_array_view(exec, this->get_num_elements_per_item(),
+                              this->get_const_col_idxs_for_item(item_id)),
+        this->get_num_stored_elements_per_row(), stride);
+    return mat;
+}
+
+
+template <typename ValueType, typename IndexType>
+std::unique_ptr<Ell<ValueType, IndexType>>
+Ell<ValueType, IndexType>::create_with_config_of(
+    ptr_param<const Ell<ValueType, IndexType>> other)
+{
+    // De-referencing `other` before calling the functions (instead of
+    // using operator `->`) is currently required to be compatible with
+    // CUDA 10.1.
+    // Otherwise, it results in a compile error.
+    return (*other).create_with_same_config();
+}
+
+
+template <typename ValueType, typename IndexType>
+std::unique_ptr<Ell<ValueType, IndexType>>
+Ell<ValueType, IndexType>::create_with_same_config() const
+{
+    return Ell<ValueType, IndexType>::create(
+        this->get_executor(), this->get_size(),
+        this->get_num_stored_elements_per_row());
+}
+
+
+template <typename ValueType, typename IndexType>
+std::unique_ptr<const Ell<ValueType, IndexType>>
+Ell<ValueType, IndexType>::create_const(
+    std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
+    int num_elems_per_row, gko::detail::const_array_view<ValueType>&& values,
+    gko::detail::const_array_view<IndexType>&& col_idxs)
+{
+    // cast const-ness away, but return a const object afterwards,
+    // so we can ensure that no modifications take place.
+    return std::unique_ptr<const Ell>(
+        new Ell{exec, sizes, num_elems_per_row,
+                gko::detail::array_const_cast(std::move(values)),
+                gko::detail::array_const_cast(std::move(col_idxs))});
+}
+
+
+inline const batch_dim<2> get_col_sizes(const batch_dim<2>& sizes)
+{
+    return batch_dim<2>(sizes.get_num_batch_items(),
+                        dim<2>(1, sizes.get_common_size()[1]));
+}
+
+
+template <typename ValueType, typename IndexType>
+Ell<ValueType, IndexType>::Ell(std::shared_ptr<const Executor> exec,
+                               const batch_dim<2>& size, int num_elems_per_row)
+    : EnableBatchLinOp<Ell<ValueType, IndexType>>(exec, size),
+      num_elems_per_row_(num_elems_per_row),
+      values_(exec, compute_num_elems(size, num_elems_per_row)),
+      col_idxs_(exec, compute_num_elems(size, num_elems_per_row))
+{}
+
+
+template <typename ValueType, typename IndexType>
+void Ell<ValueType, IndexType>::apply_impl(const MultiVector<ValueType>* b,
+                                           MultiVector<ValueType>* x) const
+{
+    GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items());
+    GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items());
+
+    GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size());
+    GKO_ASSERT_EQUAL_ROWS(this->get_common_size(), x->get_common_size());
+    GKO_ASSERT_EQUAL_COLS(b->get_common_size(), x->get_common_size());
+    this->get_executor()->run(ell::make_simple_apply(this, b, x));
+}
+
+
+template <typename ValueType, typename IndexType>
+void Ell<ValueType, IndexType>::apply_impl(const MultiVector<ValueType>* alpha,
+                                           const MultiVector<ValueType>* b,
+                                           const MultiVector<ValueType>* beta,
+                                           MultiVector<ValueType>* x) const
+{
+    GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items());
+    GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items());
+
+    GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size());
+    GKO_ASSERT_EQUAL_ROWS(this->get_common_size(), x->get_common_size());
+    GKO_ASSERT_EQUAL_COLS(b->get_common_size(), x->get_common_size());
+    GKO_ASSERT_EQUAL_DIMENSIONS(alpha->get_common_size(), gko::dim<2>(1, 1));
+    GKO_ASSERT_EQUAL_DIMENSIONS(beta->get_common_size(), gko::dim<2>(1, 1));
+    this->get_executor()->run(
+        ell::make_advanced_apply(alpha, this, b, beta, x));
+}
+
+
+template <typename ValueType, typename IndexType>
+void Ell<ValueType, IndexType>::convert_to(
+    Ell<next_precision<ValueType, IndexType>>* result) const
+{
+    result->values_ = this->values_;
+    result->col_idxs_ = this->col_idxs_;
+    result->num_elems_per_row_ = this->num_elems_per_row_;
+    result->set_size(this->get_size());
+}
+
+
+template <typename ValueType>
+void Ell<ValueType, IndexType>::move_to(
+    Ell<next_precision<ValueType, IndexType>>* result)
+{
+    this->convert_to(result);
+}
+
+
+#define GKO_DECLARE_BATCH_ELL_MATRIX(_type) class Ell<_vtype, _itype>
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_BATCH_ELL_MATRIX);
+
+
+}  // namespace matrix
+}  // namespace batch
+}  // namespace gko
diff --git a/include/ginkgo/core/matrix/batch_ell.hpp b/include/ginkgo/core/matrix/batch_ell.hpp
new file mode 100644
index 00000000000..374f1479664
--- /dev/null
+++ b/include/ginkgo/core/matrix/batch_ell.hpp
@@ -0,0 +1,390 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_PUBLIC_CORE_MATRIX_BATCH_ELL_HPP_
+#define GKO_PUBLIC_CORE_MATRIX_BATCH_ELL_HPP_
+
+
+#include <initializer_list>
+#include <vector>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/batch_lin_op.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/mtx_io.hpp>
+#include <ginkgo/core/base/range_accessors.hpp>
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/base/utils.hpp>
+#include <ginkgo/core/matrix/ell.hpp>
+
+
+namespace gko {
+namespace batch {
+namespace matrix {
+
+
+/**
+ * Ell is a batch matrix format which explicitly stores all values of the
+ * matrix in each of the batches.
+ *
+ * The values in each of the batches are stored in row-major format (values
+ * belonging to the same row appear consecutive in the memory and the values of
+ * each batch item are also stored consecutively in memory).
+ *
+ * @note Though the storage layout is similar to the multi-vector object, the
+ * class semantics and the operations it aims to provide is different. Hence it
+ * is recommended to create multi-vector objects if the user means to view the
+ * data as a set of vectors.
+ *
+ * @tparam ValueType  precision of matrix elements
+ *
+ * @ingroup batch_ell
+ * @ingroup mat_formats
+ * @ingroup BatchLinOp
+ */
+template <typename ValueType = default_precision, typename IndexType = int32>
+class Ell final
+    : public EnableBatchLinOp<Ell<ValueType, IndexType>>,
+      public EnableCreateMethod<Ell<ValueType, IndexType>>,
+      public ConvertibleTo<Ell<next_precision<ValueType>, IndexType>> {
+    friend class EnableCreateMethod<Ell>;
+    friend class EnablePolymorphicObject<Ell, BatchLinOp>;
+    friend class Ell<to_complex<ValueType>, IndexType>;
+    friend class Ell<next_precision<ValueType>, IndexType>;
+
+public:
+    using EnableBatchLinOp<Ell>::convert_to;
+    using EnableBatchLinOp<Ell>::move_to;
+
+    using value_type = ValueType;
+    using index_type = int32;
+    using transposed_type = Ell<ValueType, IndexType>;
+    using unbatch_type = gko::matrix::Ell<ValueType, IndexType>;
+    using absolute_type = remove_complex<Ell>;
+    using complex_type = to_complex<Ell>;
+
+    /**
+     * Creates a Ell matrix with the configuration of another Ell
+     * matrix.
+     *
+     * @param other  The other matrix whose configuration needs to copied.
+     */
+    static std::unique_ptr<Ell> create_with_config_of(
+        ptr_param<const Ell> other);
+
+    void convert_to(
+        Ell<next_precision<ValueType>, IndexType>* result) const override;
+
+    void move_to(Ell<next_precision<ValueType>, IndexType>* result) override;
+
+    /**
+     * Creates a mutable view (of matrix::Ell type) of one item of the
+     * batch::matrix::Ell<value_type> object. Does not perform any deep
+     * copies, but only returns a view of the data.
+     *
+     * @param item_id  The index of the batch item
+     *
+     * @return  a batch::matrix::Ell object with the data from the batch item
+     * at the given index.
+     */
+    std::unique_ptr<unbatch_type> create_view_for_item(size_type item_id);
+
+    /**
+     * @copydoc create_view_for_item(size_type)
+     */
+    std::unique_ptr<const unbatch_type> create_const_view_for_item(
+        size_type item_id) const;
+
+    /**
+     * Returns a pointer to the array of values of the matrix
+     *
+     * @return the pointer to the array of values
+     */
+    value_type* get_values() noexcept { return values_.get_data(); }
+
+    /**
+     * @copydoc get_values()
+     *
+     * @note This is the constant version of the function, which can be
+     *       significantly more memory efficient than the non-constant version,
+     *       so always prefer this version.
+     */
+    const value_type* get_const_values() const noexcept
+    {
+        return values_.get_const_data();
+    }
+
+    /**
+     * Returns a pointer to the array of column indices of the matrix
+     *
+     * @return the pointer to the array of column indices
+     */
+    index_type* get_col_idxs() noexcept { return col_idxs_.get_data(); }
+
+    /**
+     * @copydoc get_col_idxs()
+     *
+     * @note This is the constant version of the function, which can be
+     *       significantly more memory efficient than the non-constant version,
+     *       so always prefer this version.
+     */
+    const index_type* get_const_col_idxs() const noexcept
+    {
+        return col_idxs_.get_const_data();
+    }
+
+    /**
+     * Returns the number of elements per row explicitly stored.
+     *
+     * @return the number of elements stored in each row of the ELL matrix. Same
+     * for each batch item
+     */
+    int get_num_stored_elements_per_row() const noexcept
+    {
+        return num_elems_per_row_;
+    }
+
+    /**
+     * Returns the number of elements explicitly stored in the batch matrix,
+     * cumulative across all the batch items.
+     *
+     * @return the number of elements explicitly stored in the vector,
+     *         cumulative across all the batch items
+     */
+    size_type get_num_stored_elements() const noexcept
+    {
+        return values_.get_num_elems();
+    }
+
+    /**
+     * Returns the number of stored elements in each batch item.
+     *
+     * @return the number of stored elements per batch item.
+     */
+    size_type get_num_elements_per_item() const noexcept
+    {
+        return this->get_num_stored_elements() / this->get_num_batch_items();
+    }
+
+    /**
+     * Returns a pointer to the array of col_idxs of the matrix for a
+     * specific batch item.
+     *
+     * @param batch_id  the id of the batch item.
+     *
+     * @return the pointer to the array of col_idxs
+     */
+    value_type* get_col_idxs_for_item(size_type batch_id) noexcept
+    {
+        GKO_ASSERT(batch_id < this->get_num_batch_items());
+        return col_idxs_.get_data() +
+               batch_id * this->get_num_elements_per_item();
+    }
+
+    /**
+     * @copydoc get_col_idxs_for_item(size_type)
+     *
+     * @note This is the constant version of the function, which can be
+     *       significantly more memory efficient than the non-constant version,
+     *       so always prefer this version.
+     */
+    const value_type* get_const_col_idxs_for_item(
+        size_type batch_id) const noexcept
+    {
+        GKO_ASSERT(batch_id < this->get_num_batch_items());
+        return col_idxs_.get_const_data() +
+               batch_id * this->get_num_elements_per_item();
+    }
+
+    /**
+     * Returns a pointer to the array of values of the matrix for a
+     * specific batch item.
+     *
+     * @param batch_id  the id of the batch item.
+     *
+     * @return the pointer to the array of values
+     */
+    value_type* get_values_for_item(size_type batch_id) noexcept
+    {
+        GKO_ASSERT(batch_id < this->get_num_batch_items());
+        return values_.get_data() +
+               batch_id * this->get_num_elements_per_item();
+    }
+
+    /**
+     * @copydoc get_values_for_item(size_type)
+     *
+     * @note This is the constant version of the function, which can be
+     *       significantly more memory efficient than the non-constant version,
+     *       so always prefer this version.
+     */
+    const value_type* get_const_values_for_item(
+        size_type batch_id) const noexcept
+    {
+        GKO_ASSERT(batch_id < this->get_num_batch_items());
+        return values_.get_const_data() +
+               batch_id * this->get_num_elements_per_item();
+    }
+
+    /**
+     * Creates a constant (immutable) batch ell matrix from a constant
+     * array.
+     *
+     * @param exec  the executor to create the matrix on
+     * @param size  the dimensions of the matrix
+     * @param num_elems_per_row  the number of elements to be stored in each row
+     * @param values  the value array of the matrix
+     * @param col_idxs the col_idxs array of the matrix
+     *
+     * @return A smart pointer to the constant matrix wrapping the input
+     * array (if it resides on the same executor as the matrix) or a copy of the
+     * array on the correct executor.
+     */
+    static std::unique_ptr<const Ell<value_type, index_type>> create_const(
+        std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
+        const int num_elems_per_row,
+        gko::detail::const_array_view<ValueType>&& values,
+        gko::detail::const_array_view<IndexType>&& col_idxs);
+
+    /**
+     * Apply the matrix to a multi-vector. Represents the matrix vector
+     * multiplication, x = A * b, where x and b are both multi-vectors.
+     *
+     * @param b  the multi-vector to be applied to
+     * @param x  the output multi-vector
+     */
+    void apply(const MultiVector<value_type>* b,
+               MultiVector<value_type>* x) const
+    {
+        this->apply_impl(b, x);
+    }
+
+    /**
+     * Apply the matrix to a multi-vector with a linear combination of the given
+     * input vector. Represents the matrix vector multiplication, x = alpha* A *
+     * b + beta * x, where x and b are both multi-vectors.
+     *
+     * @param alpha  the scalar to scale the matrix-vector product with
+     * @param b      the multi-vector to be applied to
+     * @param beta   the scalar to scale the x vector with
+     * @param x      the output multi-vector
+     */
+    void apply(const MultiVector<value_type>* alpha,
+               const MultiVector<value_type>* b,
+               const MultiVector<value_type>* beta,
+               MultiVector<value_type>* x) const
+    {
+        this->apply_impl(alpha, b, beta, x);
+    }
+
+private:
+    size_type compute_num_elems(const batch_dim<2>& size, int num_elems_per_row)
+    {
+        return size->get_common_size()[0] * num_elems_per_row;
+    }
+
+
+protected:
+    /**
+     * Creates an uninitialized Ell matrix of the specified size.
+     *
+     * @param exec  Executor associated to the matrix
+     * @param size  size of the matrix
+     * @param num_elems_per_row  the number of elements to be stored in each row
+     */
+    Ell(std::shared_ptr<const Executor> exec,
+        const batch_dim<2>& size = batch_dim<2>{},
+        const int num_elems_per_row = 0);
+
+    /**
+     * Creates a Ell matrix from an already allocated (and initialized)
+     * array.
+     *
+     * @tparam ValuesArray  type of array of values
+     *
+     * @param exec  Executor associated to the matrix
+     * @param size  size of the matrix
+     * @param num_elems_per_row  the number of elements to be stored in each row
+     * @param values  array of matrix values
+     * @param col_idxs the col_idxs array of the matrix
+     *
+     * @note If `values` is not an rvalue, not an array of ValueType, or is on
+     *       the wrong executor, an internal copy will be created, and the
+     *       original array data will not be used in the matrix.
+     */
+    template <typename ValuesArray, typename IndicesArray>
+    Ell(std::shared_ptr<const Executor> exec, const batch_dim<2>& size,
+        const int num_elems_per_row, ValuesArray&& values,
+        IndicesArray&& col_idxs)
+        : EnableBatchLinOp<Ell>(exec, size),
+          num_elems_per_row_{num_elems_per_row},
+          values_{exec, std::forward<ValuesArray>(values)},
+          col_idxs_{exec, std::forward<IndicesArray>(col_idxs)}
+    {
+        // Ensure that the value and col_idxs arrays have the correct size
+        auto num_elems = this->get_size()[0] * num_elems_per_row() *
+                         this->get_num_batch_items();
+        GKO_ENSURE_IN_BOUNDS(num_elems, values_.get_num_elems() + 1);
+        GKO_ENSURE_IN_BOUNDS(num_elems, col_idxs_.get_num_elems() + 1);
+    }
+
+    /**
+     * Creates a Ell matrix with the same configuration as the callers
+     * matrix.
+     *
+     * @returns a Ell matrix with the same configuration as the caller.
+     */
+    std::unique_ptr<Ell> create_with_same_config() const;
+
+    void apply_impl(const MultiVector<value_type>* b,
+                    MultiVector<value_type>* x) const;
+
+    void apply_impl(const MultiVector<value_type>* alpha,
+                    const MultiVector<value_type>* b,
+                    const MultiVector<value_type>* beta,
+                    MultiVector<value_type>* x) const;
+
+private:
+    int num_elems_per_row_;
+    array<value_type> values_;
+    array<index_type> col_idxs_;
+};
+
+
+}  // namespace matrix
+}  // namespace batch
+}  // namespace gko
+
+
+#endif  // GKO_PUBLIC_CORE_MATRIX_BATCH_ELL_HPP_

From 79fb2c12bbdba47e19246e96f61b759f1b80cc84 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Fri, 6 Oct 2023 15:13:34 +0200
Subject: [PATCH 385/583] Add ref, omp kernels and scaffold

Co-authored-by: Aditya Kashi <kashia@ornl.gov>
---
 core/CMakeLists.txt                        |   1 +
 core/device_hooks/common_kernels.inc.cpp   |  10 +
 core/matrix/batch_ell_kernels.hpp          |  84 ++++
 core/test/matrix/CMakeLists.txt            |   1 +
 core/test/matrix/batch_ell.cpp             | 478 +++++++++++++++++++++
 cuda/CMakeLists.txt                        |   1 +
 cuda/matrix/batch_ell_kernels.cu           |  86 ++++
 dpcpp/CMakeLists.txt                       |   1 +
 dpcpp/matrix/batch_ell_kernels.dp.cpp      | 102 +++++
 hip/CMakeLists.txt                         |   1 +
 hip/matrix/batch_ell_kernels.hip.cpp       |  86 ++++
 omp/CMakeLists.txt                         |   1 +
 omp/matrix/batch_ell_kernels.cpp           | 117 +++++
 reference/CMakeLists.txt                   |   1 +
 reference/matrix/batch_ell_kernels.cpp     | 116 +++++
 reference/matrix/batch_ell_kernels.hpp.inc |  78 ++++
 16 files changed, 1164 insertions(+)
 create mode 100644 core/matrix/batch_ell_kernels.hpp
 create mode 100644 core/test/matrix/batch_ell.cpp
 create mode 100644 cuda/matrix/batch_ell_kernels.cu
 create mode 100644 dpcpp/matrix/batch_ell_kernels.dp.cpp
 create mode 100644 hip/matrix/batch_ell_kernels.hip.cpp
 create mode 100644 omp/matrix/batch_ell_kernels.cpp
 create mode 100644 reference/matrix/batch_ell_kernels.cpp
 create mode 100644 reference/matrix/batch_ell_kernels.hpp.inc

diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
index 46ea67abc65..ae8035bcbf9 100644
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -40,6 +40,7 @@ target_sources(ginkgo
     log/record.cpp
     log/stream.cpp
     matrix/batch_dense.cpp
+    matrix/batch_ell.cpp
     matrix/coo.cpp
     matrix/csr.cpp
     matrix/dense.cpp
diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp
index 87cab3dcf0b..b685063da10 100644
--- a/core/device_hooks/common_kernels.inc.cpp
+++ b/core/device_hooks/common_kernels.inc.cpp
@@ -310,6 +310,16 @@ GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL);
 }  // namespace batch_dense
 
 
+namespace batch_ell {
+
+
+GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL);
+GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL);
+
+
+}  // namespace batch_ell
+
+
 namespace dense {
 
 
diff --git a/core/matrix/batch_ell_kernels.hpp b/core/matrix/batch_ell_kernels.hpp
new file mode 100644
index 00000000000..1b1ef345ae0
--- /dev/null
+++ b/core/matrix/batch_ell_kernels.hpp
@@ -0,0 +1,84 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CORE_MATRIX_BATCH_ELL_KERNELS_HPP_
+#define GKO_CORE_MATRIX_BATCH_ELL_KERNELS_HPP_
+
+
+#include <ginkgo/core/matrix/batch_ell.hpp>
+
+
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/types.hpp>
+
+
+#include "core/base/kernel_declaration.hpp"
+
+
+namespace gko {
+namespace kernels {
+
+
+#define GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL(_vtype, _itype)  \
+    void simple_apply(std::shared_ptr<const DefaultExecutor> exec, \
+                      const batch::matrix::Ell<_vtype, _itype>* a, \
+                      const batch::MultiVector<_vtype, _itype>* b, \
+                      batch::MultiVector<_vtype, _itype>* c)
+
+#define GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL(_vtype, _itype)      \
+    void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,     \
+                        const batch::MultiVector<_vtype, _itype>* alpha, \
+                        const batch::matrix::Ell<_vtype, _itype>* a,     \
+                        const batch::MultiVector<_vtype, _itype>* b,     \
+                        const batch::MultiVector<_vtype, _itype>* beta,  \
+                        batch::MultiVector<_vtype, _itype>* c)
+
+#define GKO_DECLARE_ALL_AS_TEMPLATES                                 \
+    template <typename ValueType, typename IndexType>                \
+    GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL(ValueType, IndexType); \
+    template <typename ValueType, typename IndexType>                \
+    GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL(ValueType, IndexType)
+
+
+GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(batch_ell,
+                                        GKO_DECLARE_ALL_AS_TEMPLATES);
+
+
+#undef GKO_DECLARE_ALL_AS_TEMPLATES
+
+
+}  // namespace kernels
+}  // namespace gko
+
+
+#endif  // GKO_CORE_MATRIX_BATCH_ELL_KERNELS_HPP_
diff --git a/core/test/matrix/CMakeLists.txt b/core/test/matrix/CMakeLists.txt
index cca4b8da1c0..ec7ef93e517 100644
--- a/core/test/matrix/CMakeLists.txt
+++ b/core/test/matrix/CMakeLists.txt
@@ -1,4 +1,5 @@
 ginkgo_create_test(batch_dense)
+ginkgo_create_test(batch_ell)
 ginkgo_create_test(coo)
 ginkgo_create_test(coo_builder)
 ginkgo_create_test(csr)
diff --git a/core/test/matrix/batch_ell.cpp b/core/test/matrix/batch_ell.cpp
new file mode 100644
index 00000000000..931efb47d2e
--- /dev/null
+++ b/core/test/matrix/batch_ell.cpp
@@ -0,0 +1,478 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/matrix/batch_ell.hpp>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/range.hpp>
+#include <ginkgo/core/matrix/ell.hpp>
+
+
+#include "core/base/batch_utilities.hpp"
+#include "core/test/utils.hpp"
+#include "core/test/utils/batch_helpers.hpp"
+
+
+template <typename T>
+class Ell : public ::testing::Test {
+protected:
+    using value_type = T;
+    using EllMtx = gko::matrix::Ell<value_type>;
+    using size_type = gko::size_type;
+    Ell()
+        : exec(gko::ReferenceExecutor::create()),
+          mtx(gko::batch::initialize<gko::batch::matrix::Ell<value_type>>(
+              {{{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
+               {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}},
+              exec)),
+          mvec(gko::batch::initialize<gko::batch::MultiVector<value_type>>(
+              {{{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
+               {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}},
+              exec)),
+          ell_mtx(gko::initialize<gko::matrix::Ell<value_type>>(
+              {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, exec))
+    {}
+
+
+    static void assert_equal_to_original_mtx(
+        gko::batch::matrix::Ell<value_type>* m)
+    {
+        ASSERT_EQ(m->get_num_batch_items(), 2);
+        ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 3));
+        ASSERT_EQ(m->get_num_stored_elements(), 2 * (2 * 3));
+        EXPECT_EQ(m->at(0, 0, 0), value_type{-1.0});
+        EXPECT_EQ(m->at(0, 0, 1), value_type{2.0});
+        EXPECT_EQ(m->at(0, 0, 2), value_type{3.0});
+        EXPECT_EQ(m->at(0, 1, 0), value_type{-1.5});
+        EXPECT_EQ(m->at(0, 1, 1), value_type{2.5});
+        ASSERT_EQ(m->at(0, 1, 2), value_type{3.5});
+        EXPECT_EQ(m->at(1, 0, 0), value_type{1.0});
+        EXPECT_EQ(m->at(1, 0, 1), value_type{2.5});
+        EXPECT_EQ(m->at(1, 0, 2), value_type{3.0});
+        EXPECT_EQ(m->at(1, 1, 0), value_type{1.0});
+        EXPECT_EQ(m->at(1, 1, 1), value_type{2.0});
+        ASSERT_EQ(m->at(1, 1, 2), value_type{3.0});
+    }
+
+    static void assert_empty(gko::batch::matrix::Ell<value_type>* m)
+    {
+        ASSERT_EQ(m->get_num_batch_items(), 0);
+        ASSERT_EQ(m->get_num_stored_elements(), 0);
+    }
+
+    std::shared_ptr<const gko::Executor> exec;
+    std::unique_ptr<gko::batch::matrix::Ell<value_type>> mtx;
+    std::unique_ptr<gko::batch::MultiVector<value_type>> mvec;
+    std::unique_ptr<gko::matrix::Ell<value_type>> ell_mtx;
+};
+
+TYPED_TEST_SUITE(Ell, gko::test::ValueTypes);
+
+
+TYPED_TEST(Ell, KnowsItsSizeAndValues)
+{
+    this->assert_equal_to_original_mtx(this->mtx.get());
+}
+
+
+TYPED_TEST(Ell, CanBeEmpty)
+{
+    auto empty = gko::batch::matrix::Ell<TypeParam>::create(this->exec);
+    this->assert_empty(empty.get());
+}
+
+
+TYPED_TEST(Ell, ReturnsNullValuesArrayWhenEmpty)
+{
+    auto empty = gko::batch::matrix::Ell<TypeParam>::create(this->exec);
+    ASSERT_EQ(empty->get_const_values(), nullptr);
+}
+
+
+TYPED_TEST(Ell, CanGetValuesForEntry)
+{
+    using value_type = typename TestFixture::value_type;
+
+    ASSERT_EQ(this->mtx->get_values_for_item(1)[0], value_type{1.0});
+}
+
+
+TYPED_TEST(Ell, CanCreateEllItemView)
+{
+    GKO_ASSERT_MTX_NEAR(this->mtx->create_view_for_item(1), this->ell_mtx, 0.0);
+}
+
+
+TYPED_TEST(Ell, CanCreateMultiVectorView)
+{
+    GKO_ASSERT_BATCH_MTX_NEAR(this->mtx->create_multi_vector_view(), this->mvec,
+                              0.0);
+}
+
+
+TYPED_TEST(Ell, CanBeCopied)
+{
+    auto mtx_copy = gko::batch::matrix::Ell<TypeParam>::create(this->exec);
+
+    mtx_copy->copy_from(this->mtx.get());
+
+    this->assert_equal_to_original_mtx(this->mtx.get());
+    this->mtx->at(0, 0, 0) = 7;
+    this->mtx->at(0, 1) = 7;
+    this->assert_equal_to_original_mtx(mtx_copy.get());
+}
+
+
+TYPED_TEST(Ell, CanBeMoved)
+{
+    auto mtx_copy = gko::batch::matrix::Ell<TypeParam>::create(this->exec);
+
+    this->mtx->move_to(mtx_copy);
+
+    this->assert_equal_to_original_mtx(mtx_copy.get());
+}
+
+
+TYPED_TEST(Ell, CanBeCloned)
+{
+    auto mtx_clone = this->mtx->clone();
+
+    this->assert_equal_to_original_mtx(
+        dynamic_cast<decltype(this->mtx.get())>(mtx_clone.get()));
+}
+
+
+TYPED_TEST(Ell, CanBeCleared)
+{
+    this->mtx->clear();
+
+    this->assert_empty(this->mtx.get());
+}
+
+
+TYPED_TEST(Ell, CanBeConstructedWithSize)
+{
+    using size_type = gko::size_type;
+
+    auto m = gko::batch::matrix::Ell<TypeParam>::create(
+        this->exec, gko::batch_dim<2>(2, gko::dim<2>{5, 3}));
+
+    ASSERT_EQ(m->get_num_batch_items(), 2);
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(5, 3));
+    ASSERT_EQ(m->get_num_stored_elements(), 30);
+}
+
+
+TYPED_TEST(Ell, CanBeConstructedFromExistingData)
+{
+    using value_type = typename TestFixture::value_type;
+    using size_type = gko::size_type;
+    // clang-format off
+    value_type data[] = {
+       1.0,  2.0,
+      -1.0,  3.0,
+       4.0, -1.0,
+       3.0,  5.0,
+       1.0,  5.0,
+       6.0, -3.0};
+    // clang-format on
+
+    auto m = gko::batch::matrix::Ell<TypeParam>::create(
+        this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 2)),
+        gko::array<value_type>::view(this->exec, 8, data));
+
+    ASSERT_EQ(m->get_const_values(), data);
+    ASSERT_EQ(m->at(0, 0, 0), value_type{1.0});
+    ASSERT_EQ(m->at(0, 0, 1), value_type{2.0});
+    ASSERT_EQ(m->at(0, 1, 0), value_type{-1.0});
+    ASSERT_EQ(m->at(0, 1, 1), value_type{3.0});
+    ASSERT_EQ(m->at(1, 0, 0), value_type{4.0});
+    ASSERT_EQ(m->at(1, 0, 1), value_type{-1.0});
+    ASSERT_EQ(m->at(1, 1, 0), value_type{3.0});
+    ASSERT_EQ(m->at(1, 1, 1), value_type{5.0});
+}
+
+
+TYPED_TEST(Ell, CanBeConstructedFromExistingConstData)
+{
+    using value_type = typename TestFixture::value_type;
+    using size_type = gko::size_type;
+    // clang-format off
+    const value_type data[] = {
+       1.0,  2.0,
+      -1.0,  3.0,
+       4.0, -1.0,
+       3.0,  5.0,
+       1.0,  5.0,
+       6.0, -3.0};
+    // clang-format on
+
+    auto m = gko::batch::matrix::Ell<TypeParam>::create_const(
+        this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 2)),
+        gko::array<value_type>::const_view(this->exec, 8, data));
+
+    ASSERT_EQ(m->get_const_values(), data);
+    ASSERT_EQ(m->at(0, 0, 0), value_type{1.0});
+    ASSERT_EQ(m->at(0, 0, 1), value_type{2.0});
+    ASSERT_EQ(m->at(0, 1, 0), value_type{-1.0});
+    ASSERT_EQ(m->at(0, 1, 1), value_type{3.0});
+    ASSERT_EQ(m->at(1, 0, 0), value_type{4.0});
+    ASSERT_EQ(m->at(1, 0, 1), value_type{-1.0});
+    ASSERT_EQ(m->at(1, 1, 0), value_type{3.0});
+    ASSERT_EQ(m->at(1, 1, 1), value_type{5.0});
+}
+
+
+TYPED_TEST(Ell, CanBeConstructedFromEllMatrices)
+{
+    using value_type = typename TestFixture::value_type;
+    using EllMtx = typename TestFixture::EllMtx;
+    using size_type = gko::size_type;
+
+    auto mat1 = gko::initialize<EllMtx>({{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
+                                        this->exec);
+    auto mat2 =
+        gko::initialize<EllMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec);
+
+    auto m = gko::batch::create_from_item<gko::batch::matrix::Ell<value_type>>(
+        this->exec, std::vector<EllMtx*>{mat1.get(), mat2.get()});
+
+    this->assert_equal_to_original_mtx(m.get());
+}
+
+
+TYPED_TEST(Ell, CanBeConstructedFromEllMatricesByDuplication)
+{
+    using value_type = typename TestFixture::value_type;
+    using EllMtx = typename TestFixture::EllMtx;
+    using size_type = gko::size_type;
+
+    auto mat1 = gko::initialize<EllMtx>(4, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
+                                        this->exec);
+    auto mat2 =
+        gko::initialize<EllMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec);
+
+    auto bat_m =
+        gko::batch::create_from_item<gko::batch::matrix::Ell<value_type>>(
+            this->exec,
+            std::vector<EllMtx*>{mat1.get(), mat1.get(), mat1.get()});
+    auto m = gko::batch::create_from_item<gko::batch::matrix::Ell<value_type>>(
+        this->exec, 3, mat1.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(bat_m.get(), m.get(), 1e-14);
+}
+
+
+TYPED_TEST(Ell, CanBeConstructedByDuplicatingEllMatrices)
+{
+    using value_type = typename TestFixture::value_type;
+    using EllMtx = typename TestFixture::EllMtx;
+    using size_type = gko::size_type;
+
+    auto mat1 = gko::initialize<EllMtx>({{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
+                                        this->exec);
+    auto mat2 =
+        gko::initialize<EllMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec);
+
+    auto m = gko::batch::create_from_item<gko::batch::matrix::Ell<value_type>>(
+        this->exec, std::vector<EllMtx*>{mat1.get(), mat2.get()});
+    auto m_ref =
+        gko::batch::create_from_item<gko::batch::matrix::Ell<value_type>>(
+            this->exec,
+            std::vector<EllMtx*>{mat1.get(), mat2.get(), mat1.get(), mat2.get(),
+                                 mat1.get(), mat2.get()});
+
+    auto m2 = gko::batch::duplicate<gko::batch::matrix::Ell<value_type>>(
+        this->exec, 3, m.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 1e-14);
+}
+
+
+TYPED_TEST(Ell, CanBeUnbatchedIntoEllMatrices)
+{
+    using value_type = typename TestFixture::value_type;
+    using EllMtx = typename TestFixture::EllMtx;
+    using size_type = gko::size_type;
+    auto mat1 = gko::initialize<EllMtx>(4, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
+                                        this->exec);
+    auto mat2 =
+        gko::initialize<EllMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec);
+
+    auto ell_mats = gko::batch::unbatch<gko::batch::matrix::Ell<value_type>>(
+        this->mtx.get());
+
+    GKO_ASSERT_MTX_NEAR(ell_mats[0].get(), mat1.get(), 0.);
+    GKO_ASSERT_MTX_NEAR(ell_mats[1].get(), mat2.get(), 0.);
+}
+
+
+TYPED_TEST(Ell, CanBeListConstructed)
+{
+    using value_type = typename TestFixture::value_type;
+    auto m = gko::batch::initialize<gko::batch::matrix::Ell<TypeParam>>(
+        {{1.0, 2.0}, {1.0, 3.0}}, this->exec);
+
+    ASSERT_EQ(m->get_num_batch_items(), 2);
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 1));
+    EXPECT_EQ(m->at(0, 0), value_type{1});
+    EXPECT_EQ(m->at(0, 1), value_type{2});
+    EXPECT_EQ(m->at(1, 0), value_type{1});
+    EXPECT_EQ(m->at(1, 1), value_type{3});
+}
+
+
+TYPED_TEST(Ell, CanBeListConstructedByCopies)
+{
+    using value_type = typename TestFixture::value_type;
+
+    auto m = gko::batch::initialize<gko::batch::matrix::Ell<TypeParam>>(
+        2, I<value_type>({1.0, 2.0}), this->exec);
+
+    ASSERT_EQ(m->get_num_batch_items(), 2);
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 1));
+    EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
+    EXPECT_EQ(m->at(0, 0, 1), value_type{2.0});
+    EXPECT_EQ(m->at(1, 0, 0), value_type{1.0});
+    EXPECT_EQ(m->at(1, 0, 1), value_type{2.0});
+}
+
+
+TYPED_TEST(Ell, CanBeDoubleListConstructed)
+{
+    using value_type = typename TestFixture::value_type;
+    using T = value_type;
+
+    auto m = gko::batch::initialize<gko::batch::matrix::Ell<TypeParam>>(
+        {{I<T>{1.0, 1.0, 0.0}, I<T>{2.0, 4.0, 3.0}, I<T>{3.0, 6.0, 1.0}},
+         {I<T>{1.0, 2.0, -1.0}, I<T>{3.0, 4.0, -2.0}, I<T>{5.0, 6.0, -3.0}}},
+        this->exec);
+
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(3, 3));
+    EXPECT_EQ(m->at(0, 0), value_type{1.0});
+    EXPECT_EQ(m->at(0, 1), value_type{1.0});
+    EXPECT_EQ(m->at(0, 2), value_type{0.0});
+    ASSERT_EQ(m->at(0, 3), value_type{2.0});
+    EXPECT_EQ(m->at(0, 4), value_type{4.0});
+    EXPECT_EQ(m->at(1, 0), value_type{1.0});
+    EXPECT_EQ(m->at(1, 1), value_type{2.0});
+    EXPECT_EQ(m->at(1, 2), value_type{-1.0});
+    ASSERT_EQ(m->at(1, 3), value_type{3.0});
+    EXPECT_EQ(m->at(1, 4), value_type{4.0});
+}
+
+
+TYPED_TEST(Ell, CanBeReadFromMatrixData)
+{
+    using value_type = typename TestFixture::value_type;
+    using index_type = int;
+
+    auto vec_data = std::vector<gko::matrix_data<value_type, index_type>>{};
+    vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
+        {2, 2}, {{0, 0, 1.0}, {0, 1, 3.0}, {1, 0, 0.0}, {1, 1, 5.0}}));
+    vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
+        {2, 2}, {{0, 0, -1.0}, {0, 1, 0.5}, {1, 0, 0.0}, {1, 1, 9.0}}));
+
+    auto m = gko::batch::read<value_type, index_type,
+                              gko::batch::matrix::Ell<value_type>>(this->exec,
+                                                                   vec_data);
+
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2));
+    EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
+    EXPECT_EQ(m->at(0, 0, 1), value_type{3.0});
+    EXPECT_EQ(m->at(0, 1, 0), value_type{0.0});
+    EXPECT_EQ(m->at(0, 1, 1), value_type{5.0});
+    EXPECT_EQ(m->at(1, 0, 0), value_type{-1.0});
+    EXPECT_EQ(m->at(1, 0, 1), value_type{0.5});
+    EXPECT_EQ(m->at(1, 1, 0), value_type{0.0});
+    EXPECT_EQ(m->at(1, 1, 1), value_type{9.0});
+}
+
+
+TYPED_TEST(Ell, CanBeReadFromSparseMatrixData)
+{
+    using value_type = typename TestFixture::value_type;
+    using index_type = int;
+    auto vec_data = std::vector<gko::matrix_data<value_type, index_type>>{};
+    vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
+        {2, 2}, {{0, 0, 1.0}, {0, 1, 3.0}, {1, 1, 5.0}}));
+    vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
+        {2, 2}, {{0, 0, -1.0}, {0, 1, 0.5}, {1, 1, 9.0}}));
+
+    auto m = gko::batch::read<value_type, index_type,
+                              gko::batch::matrix::Ell<value_type>>(this->exec,
+                                                                   vec_data);
+
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2));
+    EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
+    EXPECT_EQ(m->at(0, 0, 1), value_type{3.0});
+    EXPECT_EQ(m->at(0, 1, 0), value_type{0.0});
+    EXPECT_EQ(m->at(0, 1, 1), value_type{5.0});
+    EXPECT_EQ(m->at(1, 0, 0), value_type{-1.0});
+    EXPECT_EQ(m->at(1, 0, 1), value_type{0.5});
+    EXPECT_EQ(m->at(1, 1, 0), value_type{0.0});
+    EXPECT_EQ(m->at(1, 1, 1), value_type{9.0});
+}
+
+
+TYPED_TEST(Ell, GeneratesCorrectMatrixData)
+{
+    using value_type = typename TestFixture::value_type;
+    using index_type = int;
+    using tpl = typename gko::matrix_data<TypeParam>::nonzero_type;
+
+    auto data =
+        gko::batch::write<value_type, index_type,
+                          gko::batch::matrix::Ell<value_type>>(this->mtx.get());
+
+    ASSERT_EQ(data[0].size, gko::dim<2>(2, 3));
+    ASSERT_EQ(data[0].nonzeros.size(), 6);
+    EXPECT_EQ(data[0].nonzeros[0], tpl(0, 0, value_type{-1.0}));
+    EXPECT_EQ(data[0].nonzeros[1], tpl(0, 1, value_type{2.0}));
+    EXPECT_EQ(data[0].nonzeros[2], tpl(0, 2, value_type{3.0}));
+    EXPECT_EQ(data[0].nonzeros[3], tpl(1, 0, value_type{-1.5}));
+    EXPECT_EQ(data[0].nonzeros[4], tpl(1, 1, value_type{2.5}));
+    EXPECT_EQ(data[0].nonzeros[5], tpl(1, 2, value_type{3.5}));
+    ASSERT_EQ(data[1].size, gko::dim<2>(2, 3));
+    ASSERT_EQ(data[1].nonzeros.size(), 6);
+    EXPECT_EQ(data[1].nonzeros[0], tpl(0, 0, value_type{1.0}));
+    EXPECT_EQ(data[1].nonzeros[1], tpl(0, 1, value_type{2.5}));
+    EXPECT_EQ(data[1].nonzeros[2], tpl(0, 2, value_type{3.0}));
+    EXPECT_EQ(data[1].nonzeros[3], tpl(1, 0, value_type{1.0}));
+    EXPECT_EQ(data[1].nonzeros[4], tpl(1, 1, value_type{2.0}));
+    EXPECT_EQ(data[1].nonzeros[5], tpl(1, 2, value_type{3.0}));
+}
diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt
index dfa1b2177ee..f5b7932ed39 100644
--- a/cuda/CMakeLists.txt
+++ b/cuda/CMakeLists.txt
@@ -39,6 +39,7 @@ target_sources(ginkgo_cuda
     factorization/par_ilut_spgeam_kernel.cu
     factorization/par_ilut_sweep_kernel.cu
     matrix/batch_dense_kernels.cu
+    matrix/batch_ell_kernels.cu
     matrix/coo_kernels.cu
     ${CSR_INSTANTIATE}
     matrix/dense_kernels.cu
diff --git a/cuda/matrix/batch_ell_kernels.cu b/cuda/matrix/batch_ell_kernels.cu
new file mode 100644
index 00000000000..c41b436daed
--- /dev/null
+++ b/cuda/matrix/batch_ell_kernels.cu
@@ -0,0 +1,86 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/batch_dense_kernels.hpp"
+
+
+#include <thrust/functional.h>
+#include <thrust/transform.h>
+
+
+#include <ginkgo/core/base/math.hpp>
+
+
+#include "core/base/batch_struct.hpp"
+#include "core/matrix/batch_struct.hpp"
+#include "cuda/base/batch_struct.hpp"
+#include "cuda/base/config.hpp"
+#include "cuda/base/cublas_bindings.hpp"
+#include "cuda/base/pointer_mode_guard.hpp"
+#include "cuda/base/thrust.cuh"
+#include "cuda/components/cooperative_groups.cuh"
+#include "cuda/components/reduction.cuh"
+#include "cuda/components/thread_ids.cuh"
+#include "cuda/components/uninitialized_array.hpp"
+#include "cuda/matrix/batch_struct.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace cuda {
+/**
+ * @brief The Ell matrix format namespace.
+ * @ref Ell
+ * @ingroup batch_ell
+ */
+namespace batch_ell {
+
+
+constexpr auto default_block_size = 256;
+constexpr int sm_oversubscription = 4;
+
+// clang-format off
+
+// NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES
+
+#include "common/cuda_hip/matrix/batch_ell_kernels.hpp.inc"
+
+
+#include "common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc"
+
+// clang-format on
+
+
+}  // namespace batch_ell
+}  // namespace cuda
+}  // namespace kernels
+}  // namespace gko
diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt
index 9990496c98f..9c2e799ede9 100644
--- a/dpcpp/CMakeLists.txt
+++ b/dpcpp/CMakeLists.txt
@@ -37,6 +37,7 @@ target_sources(ginkgo_dpcpp
     factorization/par_ilut_spgeam_kernel.dp.cpp
     factorization/par_ilut_sweep_kernel.dp.cpp
     matrix/batch_dense_kernels.dp.cpp
+    matrix/batch_ell_kernels.dp.cpp
     matrix/coo_kernels.dp.cpp
     matrix/csr_kernels.dp.cpp
     matrix/fbcsr_kernels.dp.cpp
diff --git a/dpcpp/matrix/batch_ell_kernels.dp.cpp b/dpcpp/matrix/batch_ell_kernels.dp.cpp
new file mode 100644
index 00000000000..f886b7dd790
--- /dev/null
+++ b/dpcpp/matrix/batch_ell_kernels.dp.cpp
@@ -0,0 +1,102 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/batch_dense_kernels.hpp"
+
+
+#include <algorithm>
+
+
+#include <CL/sycl.hpp>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/matrix/batch_dense.hpp>
+
+
+#include "core/base/batch_struct.hpp"
+#include "core/components/prefix_sum_kernels.hpp"
+#include "core/matrix/batch_struct.hpp"
+#include "dpcpp/base/batch_struct.hpp"
+#include "dpcpp/base/config.hpp"
+#include "dpcpp/base/dim3.dp.hpp"
+#include "dpcpp/base/dpct.hpp"
+#include "dpcpp/base/helper.hpp"
+#include "dpcpp/components/cooperative_groups.dp.hpp"
+#include "dpcpp/components/intrinsics.dp.hpp"
+#include "dpcpp/components/reduction.dp.hpp"
+#include "dpcpp/components/thread_ids.dp.hpp"
+#include "dpcpp/matrix/batch_struct.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace dpcpp {
+/**
+ * @brief The Ell matrix format namespace.
+ * @ref Ell
+ * @ingroup batch_ell
+ */
+namespace batch_ell {
+
+
+// #include "dpcpp/matrix/batch_dense_kernels.hpp.inc"
+
+
+template <typename ValueType, typename IndexType>
+void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
+                  const batch::matrix::Ell<ValueType, IndexType>* mat,
+                  const batch::MultiVector<ValueType>* b,
+                  batch::MultiVector<ValueType>* x) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
+                    const batch::MultiVector<ValueType>* alpha,
+                    const batch::matrix::Ell<ValueType, IndexType>* mat,
+                    const batch::MultiVector<ValueType>* b,
+                    const batch::MultiVector<ValueType>* beta,
+                    batch::MultiVector<ValueType>* x) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL);
+
+
+}  // namespace batch_ell
+}  // namespace dpcpp
+}  // namespace kernels
+}  // namespace gko
diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt
index 21b573b6cd0..ccc88769a4e 100644
--- a/hip/CMakeLists.txt
+++ b/hip/CMakeLists.txt
@@ -36,6 +36,7 @@ set(GINKGO_HIP_SOURCES
     factorization/par_ilut_spgeam_kernel.hip.cpp
     factorization/par_ilut_sweep_kernel.hip.cpp
     matrix/batch_dense_kernels.hip.cpp
+    matrix/batch_ell_kernels.hip.cpp
     matrix/coo_kernels.hip.cpp
     ${CSR_INSTANTIATE}
     matrix/dense_kernels.hip.cpp
diff --git a/hip/matrix/batch_ell_kernels.hip.cpp b/hip/matrix/batch_ell_kernels.hip.cpp
new file mode 100644
index 00000000000..c41b436daed
--- /dev/null
+++ b/hip/matrix/batch_ell_kernels.hip.cpp
@@ -0,0 +1,86 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/batch_dense_kernels.hpp"
+
+
+#include <thrust/functional.h>
+#include <thrust/transform.h>
+
+
+#include <ginkgo/core/base/math.hpp>
+
+
+#include "core/base/batch_struct.hpp"
+#include "core/matrix/batch_struct.hpp"
+#include "cuda/base/batch_struct.hpp"
+#include "cuda/base/config.hpp"
+#include "cuda/base/cublas_bindings.hpp"
+#include "cuda/base/pointer_mode_guard.hpp"
+#include "cuda/base/thrust.cuh"
+#include "cuda/components/cooperative_groups.cuh"
+#include "cuda/components/reduction.cuh"
+#include "cuda/components/thread_ids.cuh"
+#include "cuda/components/uninitialized_array.hpp"
+#include "cuda/matrix/batch_struct.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace cuda {
+/**
+ * @brief The Ell matrix format namespace.
+ * @ref Ell
+ * @ingroup batch_ell
+ */
+namespace batch_ell {
+
+
+constexpr auto default_block_size = 256;
+constexpr int sm_oversubscription = 4;
+
+// clang-format off
+
+// NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES
+
+#include "common/cuda_hip/matrix/batch_ell_kernels.hpp.inc"
+
+
+#include "common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc"
+
+// clang-format on
+
+
+}  // namespace batch_ell
+}  // namespace cuda
+}  // namespace kernels
+}  // namespace gko
diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt
index d87399492f5..aa8e30cd590 100644
--- a/omp/CMakeLists.txt
+++ b/omp/CMakeLists.txt
@@ -24,6 +24,7 @@ target_sources(ginkgo_omp
     factorization/par_ilu_kernels.cpp
     factorization/par_ilut_kernels.cpp
     matrix/batch_dense_kernels.cpp
+    matrix/batch_ell_kernels.cpp
     matrix/coo_kernels.cpp
     matrix/csr_kernels.cpp
     matrix/dense_kernels.cpp
diff --git a/omp/matrix/batch_ell_kernels.cpp b/omp/matrix/batch_ell_kernels.cpp
new file mode 100644
index 00000000000..282920c05f3
--- /dev/null
+++ b/omp/matrix/batch_ell_kernels.cpp
@@ -0,0 +1,117 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/batch_ell_kernels.hpp"
+
+
+#include <algorithm>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/math.hpp>
+
+
+#include "core/base/batch_struct.hpp"
+#include "core/matrix/batch_struct.hpp"
+#include "reference/base/batch_struct.hpp"
+#include "reference/matrix/batch_struct.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace omp {
+/**
+ * @brief The Ell matrix format namespace.
+ * @ref Ell
+ * @ingroup batch_ell
+ */
+namespace batch_ell {
+
+
+#include "reference/matrix/batch_ell_kernels.hpp.inc"
+
+
+template <typename ValueType, typename IndexType>
+void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
+                  const batch::matrix::Ell<ValueType, IndexType>* mat,
+                  const batch::MultiVector<ValueType>* b,
+                  batch::MultiVector<ValueType>* x)
+{
+    const auto b_ub = host::get_batch_struct(b);
+    const auto x_ub = host::get_batch_struct(x);
+    const auto mat_ub = host::get_batch_struct(mat);
+#pragma omp parallel for
+    for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
+        const auto mat_item = batch::matrix::extract_batch_item(mat_ub, batch);
+        const auto b_item = batch::extract_batch_item(b_ub, batch);
+        const auto x_item = batch::extract_batch_item(x_ub, batch);
+        simple_apply_kernel(mat_item, b_item, x_item);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
+                    const batch::MultiVector<ValueType>* alpha,
+                    const batch::matrix::Ell<ValueType, IndexType>* mat,
+                    const batch::MultiVector<ValueType>* b,
+                    const batch::MultiVector<ValueType>* beta,
+                    batch::MultiVector<ValueType>* x)
+{
+    const auto b_ub = host::get_batch_struct(b);
+    const auto x_ub = host::get_batch_struct(x);
+    const auto mat_ub = host::get_batch_struct(mat);
+    const auto alpha_ub = host::get_batch_struct(alpha);
+    const auto beta_ub = host::get_batch_struct(beta);
+#pragma omp parallel for
+    for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
+        const auto mat_item = batch::matrix::extract_batch_item(mat_ub, batch);
+        const auto b_item = batch::extract_batch_item(b_ub, batch);
+        const auto x_item = batch::extract_batch_item(x_ub, batch);
+        const auto alpha_item = batch::extract_batch_item(alpha_ub, batch);
+        const auto beta_item = batch::extract_batch_item(beta_ub, batch);
+        advanced_apply_kernel(alpha_item.values[0], mat_item, b_item,
+                              beta_item.values[0], x_item);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL);
+
+
+}  // namespace batch_ell
+}  // namespace omp
+}  // namespace kernels
+}  // namespace gko
diff --git a/reference/CMakeLists.txt b/reference/CMakeLists.txt
index 37498588ca7..21dfc0dfb5a 100644
--- a/reference/CMakeLists.txt
+++ b/reference/CMakeLists.txt
@@ -26,6 +26,7 @@ target_sources(ginkgo_reference
     factorization/par_ilu_kernels.cpp
     factorization/par_ilut_kernels.cpp
     matrix/batch_dense_kernels.cpp
+    matrix/batch_ell_kernels.cpp
     matrix/coo_kernels.cpp
     matrix/csr_kernels.cpp
     matrix/dense_kernels.cpp
diff --git a/reference/matrix/batch_ell_kernels.cpp b/reference/matrix/batch_ell_kernels.cpp
new file mode 100644
index 00000000000..1fab322dc5f
--- /dev/null
+++ b/reference/matrix/batch_ell_kernels.cpp
@@ -0,0 +1,116 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/batch_ell_kernels.hpp"
+
+
+#include <algorithm>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/range_accessors.hpp>
+
+
+#include "core/base/batch_struct.hpp"
+#include "core/matrix/batch_struct.hpp"
+#include "reference/base/batch_struct.hpp"
+#include "reference/matrix/batch_struct.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace reference {
+/**
+ * @brief The Ell matrix format namespace.
+ * @ref Ell
+ * @ingroup batch_ell
+ */
+namespace batch_ell {
+
+
+#include "reference/matrix/batch_ell_kernels.hpp.inc"
+
+
+template <typename ValueType, typename IndexType>
+void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
+                  const batch::matrix::Ell<ValueType, IndexType>* mat,
+                  const batch::MultiVector<ValueType>* b,
+                  batch::MultiVector<ValueType>* x)
+{
+    const auto b_ub = host::get_batch_struct(b);
+    const auto x_ub = host::get_batch_struct(x);
+    const auto mat_ub = host::get_batch_struct(mat);
+    for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
+        const auto mat_item = batch::matrix::extract_batch_item(mat_ub, batch);
+        const auto b_item = batch::extract_batch_item(b_ub, batch);
+        const auto x_item = batch::extract_batch_item(x_ub, batch);
+        simple_apply_kernel(mat_item, b_item, x_item);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
+                    const batch::MultiVector<ValueType>* alpha,
+                    const batch::matrix::Ell<ValueType, IndexType>* mat,
+                    const batch::MultiVector<ValueType>* b,
+                    const batch::MultiVector<ValueType>* beta,
+                    batch::MultiVector<ValueType>* x)
+{
+    const auto b_ub = host::get_batch_struct(b);
+    const auto x_ub = host::get_batch_struct(x);
+    const auto mat_ub = host::get_batch_struct(mat);
+    const auto alpha_ub = host::get_batch_struct(alpha);
+    const auto beta_ub = host::get_batch_struct(beta);
+    for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) {
+        const auto mat_item = batch::matrix::extract_batch_item(mat_ub, batch);
+        const auto b_item = batch::extract_batch_item(b_ub, batch);
+        const auto x_item = batch::extract_batch_item(x_ub, batch);
+        const auto alpha_item = batch::extract_batch_item(alpha_ub, batch);
+        const auto beta_item = batch::extract_batch_item(beta_ub, batch);
+        advanced_apply_kernel(alpha_item.values[0], mat_item, b_item,
+                              beta_item.values[0], x_item);
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL);
+
+
+}  // namespace batch_ell
+}  // namespace reference
+}  // namespace kernels
+}  // namespace gko
diff --git a/reference/matrix/batch_ell_kernels.hpp.inc b/reference/matrix/batch_ell_kernels.hpp.inc
new file mode 100644
index 00000000000..1874d1db9f3
--- /dev/null
+++ b/reference/matrix/batch_ell_kernels.hpp.inc
@@ -0,0 +1,78 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+template <typename ValueType, typename IndexType>
+inline void simple_apply_kernel(
+    const gko::batch::matrix::batch_ell::batch_item<const ValueType,
+                                                    const IndexType>& a,
+    const gko::batch::multi_vector::batch_item<const ValueType>& b,
+    const gko::batch::multi_vector::batch_item<ValueType>& c)
+{
+    for (int row = 0; row < a.num_rows; ++row) {
+        for (int j = 0; j < b.num_rhs; ++j) {
+            c.values[row * c.stride + j] = zero<ValueType>();
+        }
+        for (auto k = 0; k < a.num_stored_elems_per_row; ++k) {
+            auto val = a.values[row + k * a.stride];
+            auto col = a.col_idxs[row + k * a.stride];
+            for (int j = 0; j < b.num_rhs; ++j) {
+                c.values[row * c.stride + j] +=
+                    val * b.values[col * b.stride + j];
+            }
+        }
+    }
+}
+
+
+template <typename ValueType, typename IndexType>
+inline void advanced_apply_kernel(
+    const ValueType alpha,
+    const gko::batch::matrix::batch_ell::batch_item<const ValueType,
+                                                    const IndexType>& a,
+    const gko::batch::multi_vector::batch_item<const ValueType>& b,
+    const ValueType beta,
+    const gko::batch::multi_vector::batch_item<ValueType>& c)
+{
+    for (int row = 0; row < a.num_rows; ++row) {
+        for (int j = 0; j < c.num_rhs; ++j) {
+            c.values[row * c.stride + j] *= beta;
+        }
+        for (auto k = 0; k < a.num_stored_elems_per_row; ++k) {
+            auto val = a.values[row + k * a.stride];
+            auto col = a.col_idxs[row + k * a.stride];
+            for (int j = 0; j < b.num_rhs; ++j) {
+                c.values[row * c.stride + j] +=
+                    alpha * val * b.values[col * b.stride + j];
+            }
+        }
+    }
+}

From e0683e489f5044ac4f3de241c1c767665abb1c40 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Fri, 6 Oct 2023 17:19:37 +0200
Subject: [PATCH 386/583] Use only int32

---
 .../matrix/batch_ell_kernel_launcher.hpp.inc  | 53 +++++++++++
 core/device_hooks/common_kernels.inc.cpp      | 10 +-
 core/matrix/batch_ell.cpp                     | 34 ++-----
 core/matrix/batch_ell_kernels.hpp             | 20 ++--
 core/matrix/batch_struct.hpp                  | 95 +++++++++++++++++++
 cuda/matrix/batch_ell_kernels.cu              |  4 +-
 dpcpp/matrix/batch_ell_kernels.dp.cpp         |  4 +-
 hip/matrix/batch_ell_kernels.hip.cpp          |  4 +-
 include/ginkgo/core/base/types.hpp            | 16 ++++
 include/ginkgo/core/matrix/batch_ell.hpp      | 31 +++---
 omp/matrix/batch_ell_kernels.cpp              |  4 +-
 reference/matrix/batch_ell_kernels.cpp        |  4 +-
 reference/matrix/batch_ell_kernels.hpp.inc    | 10 +-
 reference/matrix/batch_struct.hpp             | 35 +++++++
 14 files changed, 256 insertions(+), 68 deletions(-)
 create mode 100644 common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc

diff --git a/common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc b/common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc
new file mode 100644
index 00000000000..263e911c31a
--- /dev/null
+++ b/common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc
@@ -0,0 +1,53 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+template <typename ValueType, typename IndexType>
+void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
+                  const batch::matrix::Ell<ValueType, IndexType>* mat,
+                  const batch::MultiVector<ValueType>* b,
+                  batch::MultiVector<ValueType>* x) GKO_NOT_IMPLEMENTED;
+
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
+    GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
+                    const batch::MultiVector<ValueType>* alpha,
+                    const batch::matrix::Ell<ValueType, IndexType>* mat,
+                    const batch::MultiVector<ValueType>* b,
+                    const batch::MultiVector<ValueType>* beta,
+                    batch::MultiVector<ValueType>* x) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
+    GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL);
diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp
index b685063da10..462675c15db 100644
--- a/core/device_hooks/common_kernels.inc.cpp
+++ b/core/device_hooks/common_kernels.inc.cpp
@@ -58,6 +58,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/factorization/par_ilu_kernels.hpp"
 #include "core/factorization/par_ilut_kernels.hpp"
 #include "core/matrix/batch_dense_kernels.hpp"
+#include "core/matrix/batch_ell_kernels.hpp"
 #include "core/matrix/coo_kernels.hpp"
 #include "core/matrix/csr_kernels.hpp"
 #include "core/matrix/dense_kernels.hpp"
@@ -137,6 +138,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     _macro(ValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); \
     GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(_macro)
 
+#define GKO_STUB_VALUE_AND_INT32_TYPE(_macro)                       \
+    template <typename ValueType, typename IndexType>               \
+    _macro(ValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); \
+    GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(_macro)
+
 #define GKO_STUB_MIXED_VALUE_AND_INDEX_TYPE(_macro)                     \
     template <typename InputValueType, typename MatrixValueType,        \
               typename OutputValueType, typename IndexType>             \
@@ -313,8 +319,8 @@ GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL);
 namespace batch_ell {
 
 
-GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL);
-GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL);
+GKO_STUB_VALUE_AND_INT32_TYPE(GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL);
+GKO_STUB_VALUE_AND_INT32_TYPE(GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL);
 
 
 }  // namespace batch_ell
diff --git a/core/matrix/batch_ell.cpp b/core/matrix/batch_ell.cpp
index 63d4f0dda8a..3aea6e1aae4 100644
--- a/core/matrix/batch_ell.cpp
+++ b/core/matrix/batch_ell.cpp
@@ -64,24 +64,6 @@ GKO_REGISTER_OPERATION(advanced_apply, batch_ell::advanced_apply);
 }  // namespace ell
 
 
-namespace detail {
-
-
-template <typename ValueType, typename IndexType>
-batch_dim<2> compute_batch_size(
-    const std::vector<gko::matrix::Ell<ValueType, IndexType>*>& matrices)
-{
-    auto common_size = matrices[0]->get_size();
-    for (size_type i = 1; i < matrices.size(); ++i) {
-        GKO_ASSERT_EQUAL_DIMENSIONS(common_size, matrices[i]->get_size());
-    }
-    return batch_dim<2>{matrices.size(), common_size};
-}
-
-
-}  // namespace detail
-
-
 template <typename ValueType, typename IndexType>
 std::unique_ptr<gko::matrix::Ell<ValueType, IndexType>>
 Ell<ValueType, IndexType>::create_view_for_item(size_type item_id)
@@ -145,7 +127,8 @@ template <typename ValueType, typename IndexType>
 std::unique_ptr<const Ell<ValueType, IndexType>>
 Ell<ValueType, IndexType>::create_const(
     std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
-    int num_elems_per_row, gko::detail::const_array_view<ValueType>&& values,
+    const IndexType num_elems_per_row,
+    gko::detail::const_array_view<ValueType>&& values,
     gko::detail::const_array_view<IndexType>&& col_idxs)
 {
     // cast const-ness away, but return a const object afterwards,
@@ -166,7 +149,8 @@ inline const batch_dim<2> get_col_sizes(const batch_dim<2>& sizes)
 
 template <typename ValueType, typename IndexType>
 Ell<ValueType, IndexType>::Ell(std::shared_ptr<const Executor> exec,
-                               const batch_dim<2>& size, int num_elems_per_row)
+                               const batch_dim<2>& size,
+                               IndexType num_elems_per_row)
     : EnableBatchLinOp<Ell<ValueType, IndexType>>(exec, size),
       num_elems_per_row_(num_elems_per_row),
       values_(exec, compute_num_elems(size, num_elems_per_row)),
@@ -209,7 +193,7 @@ void Ell<ValueType, IndexType>::apply_impl(const MultiVector<ValueType>* alpha,
 
 template <typename ValueType, typename IndexType>
 void Ell<ValueType, IndexType>::convert_to(
-    Ell<next_precision<ValueType, IndexType>>* result) const
+    Ell<next_precision<ValueType>, IndexType>* result) const
 {
     result->values_ = this->values_;
     result->col_idxs_ = this->col_idxs_;
@@ -218,16 +202,16 @@ void Ell<ValueType, IndexType>::convert_to(
 }
 
 
-template <typename ValueType>
+template <typename ValueType, typename IndexType>
 void Ell<ValueType, IndexType>::move_to(
-    Ell<next_precision<ValueType, IndexType>>* result)
+    Ell<next_precision<ValueType>, IndexType>* result)
 {
     this->convert_to(result);
 }
 
 
-#define GKO_DECLARE_BATCH_ELL_MATRIX(_type) class Ell<_vtype, _itype>
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_BATCH_ELL_MATRIX);
+#define GKO_DECLARE_BATCH_ELL_MATRIX(ValueType) class Ell<ValueType, int32>
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_ELL_MATRIX);
 
 
 }  // namespace matrix
diff --git a/core/matrix/batch_ell_kernels.hpp b/core/matrix/batch_ell_kernels.hpp
index 1b1ef345ae0..d3acc582f9b 100644
--- a/core/matrix/batch_ell_kernels.hpp
+++ b/core/matrix/batch_ell_kernels.hpp
@@ -52,16 +52,16 @@ namespace kernels {
 #define GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL(_vtype, _itype)  \
     void simple_apply(std::shared_ptr<const DefaultExecutor> exec, \
                       const batch::matrix::Ell<_vtype, _itype>* a, \
-                      const batch::MultiVector<_vtype, _itype>* b, \
-                      batch::MultiVector<_vtype, _itype>* c)
-
-#define GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL(_vtype, _itype)      \
-    void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,     \
-                        const batch::MultiVector<_vtype, _itype>* alpha, \
-                        const batch::matrix::Ell<_vtype, _itype>* a,     \
-                        const batch::MultiVector<_vtype, _itype>* b,     \
-                        const batch::MultiVector<_vtype, _itype>* beta,  \
-                        batch::MultiVector<_vtype, _itype>* c)
+                      const batch::MultiVector<_vtype>* b,         \
+                      batch::MultiVector<_vtype>* c)
+
+#define GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL(_vtype, _itype)  \
+    void advanced_apply(std::shared_ptr<const DefaultExecutor> exec, \
+                        const batch::MultiVector<_vtype>* alpha,     \
+                        const batch::matrix::Ell<_vtype, _itype>* a, \
+                        const batch::MultiVector<_vtype>* b,         \
+                        const batch::MultiVector<_vtype>* beta,      \
+                        batch::MultiVector<_vtype>* c)
 
 #define GKO_DECLARE_ALL_AS_TEMPLATES                                 \
     template <typename ValueType, typename IndexType>                \
diff --git a/core/matrix/batch_struct.hpp b/core/matrix/batch_struct.hpp
index 0bbfde40cc9..272bb506df2 100644
--- a/core/matrix/batch_struct.hpp
+++ b/core/matrix/batch_struct.hpp
@@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/array.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/matrix/batch_dense.hpp>
+#include <ginkgo/core/matrix/batch_ell.hpp>
 
 
 namespace gko {
@@ -82,6 +83,53 @@ struct uniform_batch {
 }  // namespace dense
 
 
+namespace batch_ell {
+
+
+/**
+ * Encapsulates one matrix from a batch of ell matrices.
+ */
+template <typename ValueType>
+struct batch_item {
+    using value_type = ValueType;
+    using index_type = int32;
+
+    ValueType* values;
+    const index_type* col_idxs;
+    index_type stride;
+    index_type num_rows;
+    index_type num_cols;
+    index_type num_stored_elems_per_row;
+};
+
+
+/**
+ * A 'simple' structure to store a global uniform batch of ell matrices.
+ */
+template <typename ValueType>
+struct uniform_batch {
+    using value_type = ValueType;
+    using index_type = int;
+    using entry_type = batch_item<value_type>;
+
+    ValueType* values;
+    const index_type* col_idxs;
+    size_type num_batch_items;
+    index_type stride;
+    index_type num_rows;
+    index_type num_cols;
+    index_type num_stored_elems_per_row;
+
+    size_type get_entry_storage() const
+    {
+        return num_rows * num_stored_elems_per_row * sizeof(value_type);
+    }
+};
+
+
+}  // namespace batch_ell
+
+
 template <typename ValueType>
 GKO_ATTRIBUTES GKO_INLINE dense::batch_item<const ValueType> to_const(
     const dense::batch_item<ValueType>& b)
@@ -116,6 +164,53 @@ GKO_ATTRIBUTES GKO_INLINE dense::batch_item<ValueType> extract_batch_item(
 }
 
 
+template <typename ValueType>
+GKO_ATTRIBUTES GKO_INLINE batch_ell::batch_item<const ValueType> to_const(
+    const batch_ell::batch_item<ValueType>& b)
+{
+    return {b.values,   b.col_idxs, b.stride,
+            b.num_rows, b.num_cols, b.num_stored_elems_per_row};
+}
+
+
+template <typename ValueType>
+GKO_ATTRIBUTES GKO_INLINE batch_ell::uniform_batch<const ValueType> to_const(
+    const batch_ell::uniform_batch<ValueType>& ub)
+{
+    return {ub.values,   ub.col_idxs, ub.num_batch_items,         ub.stride,
+            ub.num_rows, ub.num_cols, ub.num_stored_elems_per_row};
+}
+
+
+template <typename ValueType>
+GKO_ATTRIBUTES GKO_INLINE batch_ell::batch_item<ValueType> extract_batch_item(
+    const batch_ell::uniform_batch<ValueType>& batch, const size_type batch_idx)
+{
+    return {batch.values +
+                batch_idx * batch.num_stored_elems_per_row * batch.num_rows,
+            batch.col_idxs +
+                batch_idx * batch.num_stored_elems_per_row * batch.num_rows,
+            batch.stride,
+            batch.num_rows,
+            batch.num_cols,
+            batch.num_stored_elems_per_row};
+}
+
+template <typename ValueType>
+GKO_ATTRIBUTES GKO_INLINE batch_ell::batch_item<ValueType> extract_batch_item(
+    ValueType* const batch_values, int* const batch_col_idxs, const int stride,
+    const int num_rows, const int num_cols, int num_elems_per_row,
+    const size_type batch_idx)
+{
+    return {batch_values + batch_idx * num_elems_per_row * num_rows,
+            batch_col_idxs + batch_idx * num_elems_per_row * num_rows,
+            stride,
+            num_rows,
+            num_cols,
+            num_elems_per_row};
+}
+
+
 }  // namespace matrix
 }  // namespace batch
 }  // namespace gko
diff --git a/cuda/matrix/batch_ell_kernels.cu b/cuda/matrix/batch_ell_kernels.cu
index c41b436daed..567d863d95c 100644
--- a/cuda/matrix/batch_ell_kernels.cu
+++ b/cuda/matrix/batch_ell_kernels.cu
@@ -30,7 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include "core/matrix/batch_dense_kernels.hpp"
+#include "core/matrix/batch_ell_kernels.hpp"
 
 
 #include <thrust/functional.h>
@@ -72,7 +72,7 @@ constexpr int sm_oversubscription = 4;
 
 // NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES
 
-#include "common/cuda_hip/matrix/batch_ell_kernels.hpp.inc"
+// #include "common/cuda_hip/matrix/batch_ell_kernels.hpp.inc"
 
 
 #include "common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc"
diff --git a/dpcpp/matrix/batch_ell_kernels.dp.cpp b/dpcpp/matrix/batch_ell_kernels.dp.cpp
index f886b7dd790..cdcd5abd024 100644
--- a/dpcpp/matrix/batch_ell_kernels.dp.cpp
+++ b/dpcpp/matrix/batch_ell_kernels.dp.cpp
@@ -80,7 +80,7 @@ void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
                   const batch::MultiVector<ValueType>* b,
                   batch::MultiVector<ValueType>* x) GKO_NOT_IMPLEMENTED;
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
     GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL);
 
 
@@ -92,7 +92,7 @@ void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
                     const batch::MultiVector<ValueType>* beta,
                     batch::MultiVector<ValueType>* x) GKO_NOT_IMPLEMENTED;
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
     GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL);
 
 
diff --git a/hip/matrix/batch_ell_kernels.hip.cpp b/hip/matrix/batch_ell_kernels.hip.cpp
index c41b436daed..567d863d95c 100644
--- a/hip/matrix/batch_ell_kernels.hip.cpp
+++ b/hip/matrix/batch_ell_kernels.hip.cpp
@@ -30,7 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include "core/matrix/batch_dense_kernels.hpp"
+#include "core/matrix/batch_ell_kernels.hpp"
 
 
 #include <thrust/functional.h>
@@ -72,7 +72,7 @@ constexpr int sm_oversubscription = 4;
 
 // NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES
 
-#include "common/cuda_hip/matrix/batch_ell_kernels.hpp.inc"
+// #include "common/cuda_hip/matrix/batch_ell_kernels.hpp.inc"
 
 
 #include "common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc"
diff --git a/include/ginkgo/core/base/types.hpp b/include/ginkgo/core/base/types.hpp
index 68b5da6e3eb..f5a75c7448e 100644
--- a/include/ginkgo/core/base/types.hpp
+++ b/include/ginkgo/core/base/types.hpp
@@ -531,6 +531,22 @@ GKO_ATTRIBUTES constexpr bool operator!=(precision_reduction x,
     template _macro(double, int64)
 #endif
 
+#if GINKGO_DPCPP_SINGLE_MODE
+#define GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(_macro) \
+    template _macro(float, int32);                            \
+    template <>                                               \
+    _macro(double, int32) GKO_NOT_IMPLEMENTED;                \
+    template _macro(std::complex<float>, int32);              \
+    template <>                                               \
+    _macro(std::complex<double>, int32) GKO_NOT_IMPLEMENTED
+#else
+#define GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(_macro) \
+    template _macro(float, int32);                            \
+    template _macro(double, int32);                           \
+    template _macro(std::complex<float>, int32);              \
+    template _macro(std::complex<double>, int32)
+#endif
+
 
 /**
  * Instantiates a template for each value and index type compiled by Ginkgo.
diff --git a/include/ginkgo/core/matrix/batch_ell.hpp b/include/ginkgo/core/matrix/batch_ell.hpp
index 374f1479664..af77fc1e390 100644
--- a/include/ginkgo/core/matrix/batch_ell.hpp
+++ b/include/ginkgo/core/matrix/batch_ell.hpp
@@ -88,7 +88,7 @@ class Ell final
     using EnableBatchLinOp<Ell>::move_to;
 
     using value_type = ValueType;
-    using index_type = int32;
+    using index_type = IndexType;
     using transposed_type = Ell<ValueType, IndexType>;
     using unbatch_type = gko::matrix::Ell<ValueType, IndexType>;
     using absolute_type = remove_complex<Ell>;
@@ -170,7 +170,7 @@ class Ell final
      * @return the number of elements stored in each row of the ELL matrix. Same
      * for each batch item
      */
-    int get_num_stored_elements_per_row() const noexcept
+    index_type get_num_stored_elements_per_row() const noexcept
     {
         return num_elems_per_row_;
     }
@@ -205,7 +205,7 @@ class Ell final
      *
      * @return the pointer to the array of col_idxs
      */
-    value_type* get_col_idxs_for_item(size_type batch_id) noexcept
+    index_type* get_col_idxs_for_item(size_type batch_id) noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
         return col_idxs_.get_data() +
@@ -219,8 +219,8 @@ class Ell final
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const value_type* get_const_col_idxs_for_item(
-        size_type batch_id) const noexcept
+    const index_type* get_const_col_idxs_for_item(size_type batch_id) const
+        noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
         return col_idxs_.get_const_data() +
@@ -249,8 +249,8 @@ class Ell final
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const value_type* get_const_values_for_item(
-        size_type batch_id) const noexcept
+    const value_type* get_const_values_for_item(size_type batch_id) const
+        noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
         return values_.get_const_data() +
@@ -271,9 +271,9 @@ class Ell final
      * array (if it resides on the same executor as the matrix) or a copy of the
      * array on the correct executor.
      */
-    static std::unique_ptr<const Ell<value_type, index_type>> create_const(
+    static std::unique_ptr<const Ell> create_const(
         std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
-        const int num_elems_per_row,
+        const index_type num_elems_per_row,
         gko::detail::const_array_view<ValueType>&& values,
         gko::detail::const_array_view<IndexType>&& col_idxs);
 
@@ -309,9 +309,10 @@ class Ell final
     }
 
 private:
-    size_type compute_num_elems(const batch_dim<2>& size, int num_elems_per_row)
+    size_type compute_num_elems(const batch_dim<2>& size,
+                                IndexType num_elems_per_row)
     {
-        return size->get_common_size()[0] * num_elems_per_row;
+        return size.get_common_size()[0] * num_elems_per_row;
     }
 
 
@@ -325,7 +326,7 @@ class Ell final
      */
     Ell(std::shared_ptr<const Executor> exec,
         const batch_dim<2>& size = batch_dim<2>{},
-        const int num_elems_per_row = 0);
+        const IndexType num_elems_per_row = 0);
 
     /**
      * Creates a Ell matrix from an already allocated (and initialized)
@@ -345,7 +346,7 @@ class Ell final
      */
     template <typename ValuesArray, typename IndicesArray>
     Ell(std::shared_ptr<const Executor> exec, const batch_dim<2>& size,
-        const int num_elems_per_row, ValuesArray&& values,
+        const IndexType num_elems_per_row, ValuesArray&& values,
         IndicesArray&& col_idxs)
         : EnableBatchLinOp<Ell>(exec, size),
           num_elems_per_row_{num_elems_per_row},
@@ -353,7 +354,7 @@ class Ell final
           col_idxs_{exec, std::forward<IndicesArray>(col_idxs)}
     {
         // Ensure that the value and col_idxs arrays have the correct size
-        auto num_elems = this->get_size()[0] * num_elems_per_row() *
+        auto num_elems = this->get_common_size()[0] * num_elems_per_row *
                          this->get_num_batch_items();
         GKO_ENSURE_IN_BOUNDS(num_elems, values_.get_num_elems() + 1);
         GKO_ENSURE_IN_BOUNDS(num_elems, col_idxs_.get_num_elems() + 1);
@@ -376,7 +377,7 @@ class Ell final
                     MultiVector<value_type>* x) const;
 
 private:
-    int num_elems_per_row_;
+    index_type num_elems_per_row_;
     array<value_type> values_;
     array<index_type> col_idxs_;
 };
diff --git a/omp/matrix/batch_ell_kernels.cpp b/omp/matrix/batch_ell_kernels.cpp
index 282920c05f3..20ea4614e7d 100644
--- a/omp/matrix/batch_ell_kernels.cpp
+++ b/omp/matrix/batch_ell_kernels.cpp
@@ -78,7 +78,7 @@ void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
     GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL);
 
 
@@ -107,7 +107,7 @@ void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
     GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL);
 
 
diff --git a/reference/matrix/batch_ell_kernels.cpp b/reference/matrix/batch_ell_kernels.cpp
index 1fab322dc5f..a3f69827c02 100644
--- a/reference/matrix/batch_ell_kernels.cpp
+++ b/reference/matrix/batch_ell_kernels.cpp
@@ -78,7 +78,7 @@ void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
     GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL);
 
 
@@ -106,7 +106,7 @@ void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
     GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL);
 
 
diff --git a/reference/matrix/batch_ell_kernels.hpp.inc b/reference/matrix/batch_ell_kernels.hpp.inc
index 1874d1db9f3..37370261d44 100644
--- a/reference/matrix/batch_ell_kernels.hpp.inc
+++ b/reference/matrix/batch_ell_kernels.hpp.inc
@@ -30,10 +30,9 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-template <typename ValueType, typename IndexType>
+template <typename ValueType>
 inline void simple_apply_kernel(
-    const gko::batch::matrix::batch_ell::batch_item<const ValueType,
-                                                    const IndexType>& a,
+    const gko::batch::matrix::batch_ell::batch_item<const ValueType>& a,
     const gko::batch::multi_vector::batch_item<const ValueType>& b,
     const gko::batch::multi_vector::batch_item<ValueType>& c)
 {
@@ -53,11 +52,10 @@ inline void simple_apply_kernel(
 }
 
 
-template <typename ValueType, typename IndexType>
+template <typename ValueType>
 inline void advanced_apply_kernel(
     const ValueType alpha,
-    const gko::batch::matrix::batch_ell::batch_item<const ValueType,
-                                                    const IndexType>& a,
+    const gko::batch::matrix::batch_ell::batch_item<const ValueType>& a,
     const gko::batch::multi_vector::batch_item<const ValueType>& b,
     const ValueType beta,
     const gko::batch::multi_vector::batch_item<ValueType>& c)
diff --git a/reference/matrix/batch_struct.hpp b/reference/matrix/batch_struct.hpp
index 483d7717718..b5eacd80d18 100644
--- a/reference/matrix/batch_struct.hpp
+++ b/reference/matrix/batch_struct.hpp
@@ -39,6 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/matrix/batch_dense.hpp>
+#include <ginkgo/core/matrix/batch_ell.hpp>
 
 
 #include "core/base/batch_struct.hpp"
@@ -90,6 +91,40 @@ inline batch::matrix::dense::uniform_batch<ValueType> get_batch_struct(
 }
 
 
+/**
+ * Generates an immutable uniform batch struct from a batch of ell matrices.
+ */
+template <typename ValueType>
+inline batch::matrix::batch_ell::uniform_batch<const ValueType>
+get_batch_struct(const batch::matrix::Ell<ValueType, int32>* const op)
+{
+    return {op->get_const_values(),
+            op->get_const_col_idxs(),
+            op->get_num_batch_items(),
+            static_cast<int>(op->get_common_size()[1]),
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[1]),
+            static_cast<int>(op->get_num_stored_elements_per_row())};
+}
+
+
+/**
+ * Generates a uniform batch struct from a batch of ell matrices.
+ */
+template <typename ValueType>
+inline batch::matrix::batch_ell::uniform_batch<ValueType> get_batch_struct(
+    batch::matrix::Ell<ValueType, int32>* const op)
+{
+    return {op->get_values(),
+            op->get_col_idxs(),
+            op->get_num_batch_items(),
+            static_cast<int>(op->get_common_size()[1]),
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[1]),
+            static_cast<int>(op->get_num_stored_elements_per_row())};
+}
+
+
 }  // namespace host
 }  // namespace kernels
 }  // namespace gko

From 251914e01044906c762db4a0e368b03a0862a089 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sat, 7 Oct 2023 17:31:07 +0200
Subject: [PATCH 387/583] Generalize and rewrite batch utils

---
 core/base/batch_utilities.hpp                 | 273 ++++++++++++++-
 core/matrix/batch_ell.cpp                     |  15 +-
 core/test/matrix/batch_ell.cpp                | 330 +++++++++---------
 .../ginkgo/core/base/batch_multi_vector.hpp   | 222 +-----------
 include/ginkgo/core/matrix/batch_ell.hpp      |  18 +-
 5 files changed, 449 insertions(+), 409 deletions(-)

diff --git a/core/base/batch_utilities.hpp b/core/base/batch_utilities.hpp
index 834e89c8358..c37c0cae721 100644
--- a/core/base/batch_utilities.hpp
+++ b/core/base/batch_utilities.hpp
@@ -39,7 +39,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/array.hpp>
-#include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/exception.hpp>
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/executor.hpp>
@@ -53,15 +52,18 @@ namespace gko {
 namespace batch {
 
 
-template <typename OutputType>
+template <typename OutputType, typename... TArgs>
 std::unique_ptr<OutputType> duplicate(std::shared_ptr<const Executor> exec,
                                       size_type num_duplications,
-                                      const OutputType* input)
+                                      const OutputType* input,
+                                      TArgs&&... create_args)
 {
     auto num_batch_items = input->get_num_batch_items();
-    auto tmp = OutputType::create(
-        exec, batch_dim<2>(num_batch_items * num_duplications,
-                           input->get_common_size()));
+    auto tmp =
+        OutputType::create(exec,
+                           batch_dim<2>(num_batch_items * num_duplications,
+                                        input->get_common_size()),
+                           std::forward<TArgs>(create_args)...);
 
     for (size_type i = 0; i < num_duplications; ++i) {
         for (size_type b = 0; b < num_batch_items; ++b) {
@@ -74,14 +76,15 @@ std::unique_ptr<OutputType> duplicate(std::shared_ptr<const Executor> exec,
 }
 
 
-template <typename OutputType>
+template <typename OutputType, typename... TArgs>
 std::unique_ptr<OutputType> create_from_item(
     std::shared_ptr<const Executor> exec, const size_type num_duplications,
-    const typename OutputType::unbatch_type* input)
+    const typename OutputType::unbatch_type* input, TArgs&&... create_args)
 {
     auto num_batch_items = num_duplications;
     auto tmp = OutputType::create(
-        exec, batch_dim<2>(num_batch_items, input->get_size()));
+        exec, batch_dim<2>(num_batch_items, input->get_size()),
+        std::forward<TArgs>(create_args)...);
 
     for (size_type b = 0; b < num_batch_items; ++b) {
         tmp->create_view_for_item(b)->copy_from(input);
@@ -91,14 +94,16 @@ std::unique_ptr<OutputType> create_from_item(
 }
 
 
-template <typename OutputType>
+template <typename OutputType, typename... TArgs>
 std::unique_ptr<OutputType> create_from_item(
     std::shared_ptr<const Executor> exec,
-    const std::vector<typename OutputType::unbatch_type*>& input)
+    const std::vector<typename OutputType::unbatch_type*>& input,
+    TArgs&&... create_args)
 {
     auto num_batch_items = input.size();
     auto tmp = OutputType::create(
-        exec, batch_dim<2>(num_batch_items, input[0]->get_size()));
+        exec, batch_dim<2>(num_batch_items, input[0]->get_size()),
+        std::forward<TArgs>(create_args)...);
 
     for (size_type b = 0; b < num_batch_items; ++b) {
         tmp->create_view_for_item(b)->copy_from(input[b]);
@@ -121,14 +126,17 @@ auto unbatch(const InputType* batch_object)
 }
 
 
-template <typename ValueType, typename IndexType, typename OutputType>
+template <typename ValueType, typename IndexType, typename OutputType,
+          typename... TArgs>
 std::unique_ptr<OutputType> read(
     std::shared_ptr<const Executor> exec,
-    const std::vector<gko::matrix_data<ValueType, IndexType>>& data)
+    const std::vector<gko::matrix_data<ValueType, IndexType>>& data,
+    TArgs&&... create_args)
 {
     auto num_batch_items = data.size();
     auto tmp =
-        OutputType::create(exec, batch_dim<2>(num_batch_items, data[0].size));
+        OutputType::create(exec, batch_dim<2>(num_batch_items, data[0].size),
+                           std::forward<TArgs>(create_args)...);
 
     for (size_type b = 0; b < num_batch_items; ++b) {
         tmp->create_view_for_item(b)->read(data[b]);
@@ -154,6 +162,241 @@ std::vector<gko::matrix_data<ValueType, IndexType>> write(
 }
 
 
+/**
+ * Creates and initializes a batch of single column-vectors.
+ *
+ * This function first creates a temporary MultiVector, fills it with
+ * passed in values, and then converts the vector to the requested type.
+ *
+ * @tparam Matrix  matrix type to initialize
+ *                 (MultiVector has to implement the ConvertibleTo<Matrix>
+ *                 interface)
+ * @tparam TArgs  argument types for Matrix::create method
+ *                (not including the implied Executor as the first argument)
+ *
+ * @param vals  values used to initialize the batch vector
+ * @param exec  Executor associated to the vector
+ * @param create_args  additional arguments passed to Matrix::create, not
+ *                     including the Executor, which is passed as the first
+ *                     argument
+ *
+ * @ingroup MultiVector
+ * @ingroup mat_formats
+ */
+template <typename Matrix, typename... TArgs>
+std::unique_ptr<Matrix> initialize(
+    std::initializer_list<std::initializer_list<typename Matrix::value_type>>
+        vals,
+    std::shared_ptr<const Executor> exec, TArgs&&... create_args)
+{
+    using value_type = typename Matrix::value_type;
+    using index_type = typename Matrix::index_type;
+    using mat_data = gko::matrix_data<value_type, index_type>;
+    size_type num_batch_items = vals.size();
+    GKO_THROW_IF_INVALID(num_batch_items > 0, "Input data is empty");
+    auto vals_begin = begin(vals);
+    size_type common_num_rows = vals_begin ? vals_begin->size() : 0;
+    auto common_size = dim<2>(common_num_rows, 1);
+    for (auto& val : vals) {
+        GKO_ASSERT_EQ(common_num_rows, val.size());
+    }
+    auto b_size = batch_dim<2>(num_batch_items, common_size);
+    size_type batch = 0;
+    std::vector<mat_data> input_mat_data(num_batch_items, common_size);
+    for (const auto& b : vals) {
+        input_mat_data[batch].nonzeros.reserve(b.size());
+        size_type idx = 0;
+        for (const auto& elem : b) {
+            if (elem != zero<value_type>()) {
+                input_mat_data[batch].nonzeros.emplace_back(idx, 0, elem);
+            }
+            ++idx;
+        }
+        ++batch;
+    }
+    return read<value_type, index_type, Matrix>(
+        exec, input_mat_data, std::forward<TArgs>(create_args)...);
+}
+
+
+/**
+ * Creates and initializes a batch of multi-vectors.
+ *
+ * This function first creates a temporary MultiVector, fills it with
+ * passed in values, and then converts the vector to the requested type.
+ *
+ * @tparam Matrix  matrix type to initialize
+ *                 (Dense has to implement the ConvertibleTo<Matrix> interface)
+ * @tparam TArgs  argument types for Matrix::create method
+ *                (not including the implied Executor as the first argument)
+ *
+ * @param vals  values used to initialize the vector
+ * @param exec  Executor associated to the vector
+ * @param create_args  additional arguments passed to Matrix::create, not
+ *                     including the Executor, which is passed as the first
+ *                     argument
+ *
+ * @ingroup MultiVector
+ * @ingroup mat_formats
+ */
+template <typename Matrix, typename... TArgs>
+std::unique_ptr<Matrix> initialize(
+    std::initializer_list<std::initializer_list<
+        std::initializer_list<typename Matrix::value_type>>>
+        vals,
+    std::shared_ptr<const Executor> exec, TArgs&&... create_args)
+{
+    using value_type = typename Matrix::value_type;
+    using index_type = typename Matrix::index_type;
+    using mat_data = gko::matrix_data<value_type, index_type>;
+    size_type num_batch_items = vals.size();
+    GKO_THROW_IF_INVALID(num_batch_items > 0, "Input data is empty");
+    auto vals_begin = begin(vals);
+    size_type common_num_rows = vals_begin ? vals_begin->size() : 0;
+    size_type common_num_cols =
+        vals_begin->begin() ? vals_begin->begin()->size() : 0;
+    auto common_size = dim<2>(common_num_rows, common_num_cols);
+    for (const auto& b : vals) {
+        auto num_rows = b.size();
+        auto num_cols = begin(b)->size();
+        auto b_size = dim<2>(num_rows, num_cols);
+        GKO_ASSERT_EQUAL_DIMENSIONS(b_size, common_size);
+    }
+
+    auto b_size = batch_dim<2>(num_batch_items, common_size);
+    size_type batch = 0;
+    std::vector<mat_data> input_mat_data(num_batch_items, common_size);
+    for (const auto& b : vals) {
+        size_type ridx = 0;
+        for (const auto& row : b) {
+            size_type cidx = 0;
+            for (const auto& elem : row) {
+                if (elem != zero<value_type>()) {
+                    input_mat_data[batch].nonzeros.emplace_back(ridx, cidx,
+                                                                elem);
+                }
+                ++cidx;
+            }
+            ++ridx;
+        }
+        ++batch;
+    }
+    return read<value_type, index_type, Matrix>(
+        exec, input_mat_data, std::forward<TArgs>(create_args)...);
+}
+
+
+/**
+ * Creates and initializes a batch single column-vector by making copies of the
+ * single input column vector.
+ *
+ * This function first creates a temporary batch multi-vector, fills it with
+ * passed in values, and then converts the vector to the requested type.
+ *
+ * @tparam Matrix  matrix type to initialize
+ *                 (MultiVector has to implement the ConvertibleTo<Matrix>
+ *                  interface)
+ * @tparam TArgs  argument types for Matrix::create method
+ *                (not including the implied Executor as the first argument)
+ *
+ * @param num_vectors  The number of times the input vector is to be duplicated
+ * @param vals  values used to initialize each vector in the temp. batch
+ * @param exec  Executor associated to the vector
+ * @param create_args  additional arguments passed to Matrix::create, not
+ *                     including the Executor, which is passed as the first
+ *                     argument
+ *
+ * @ingroup MultiVector
+ * @ingroup mat_formats
+ */
+template <typename Matrix, typename... TArgs>
+std::unique_ptr<Matrix> initialize(
+    const size_type num_vectors,
+    std::initializer_list<typename Matrix::value_type> vals,
+    std::shared_ptr<const Executor> exec, TArgs&&... create_args)
+{
+    using value_type = typename Matrix::value_type;
+    using index_type = typename Matrix::index_type;
+    using mat_data = gko::matrix_data<value_type, index_type>;
+    size_type num_batch_items = num_vectors;
+    GKO_THROW_IF_INVALID(num_batch_items > 0 && vals.size() > 0,
+                         "Input data is empty");
+    auto num_rows = begin(vals) ? vals.size() : 0;
+    auto common_size = dim<2>(num_rows, 1);
+    auto b_size = batch_dim<2>(num_batch_items, common_size);
+    std::vector<mat_data> input_mat_data(num_batch_items, common_size);
+    for (size_type batch = 0; batch < num_vectors; batch++) {
+        input_mat_data[batch].nonzeros.reserve(num_rows);
+        size_type idx = 0;
+        for (const auto& elem : vals) {
+            if (elem != zero<value_type>()) {
+                input_mat_data[batch].nonzeros.emplace_back(idx, 0, elem);
+            }
+            ++idx;
+        }
+    }
+    return read<value_type, index_type, Matrix>(
+        exec, input_mat_data, std::forward<TArgs>(create_args)...);
+}
+
+
+/**
+ * Creates and initializes a matrix from copies of a given matrix.
+ *
+ * This function first creates a temporary batch multi-vector, fills it with
+ * passed in values, and then converts the vector to the requested type.
+ *
+ * @tparam Matrix  matrix type to initialize
+ *                 (MultiVector has to implement the ConvertibleTo<Matrix>
+ *                  interface)
+ * @tparam TArgs  argument types for Matrix::create method
+ *                (not including the implied Executor as the first argument)
+ *
+ * @param num_batch_items The number of times the input matrix is duplicated
+ * @param vals  values used to initialize each vector in the temp. batch
+ * @param exec  Executor associated to the vector
+ * @param create_args  additional arguments passed to Matrix::create, not
+ *                     including the Executor, which is passed as the first
+ *                     argument
+ *
+ * @ingroup LinOp
+ * @ingroup mat_formats
+ */
+template <typename Matrix, typename... TArgs>
+std::unique_ptr<Matrix> initialize(
+    const size_type num_batch_items,
+    std::initializer_list<std::initializer_list<typename Matrix::value_type>>
+        vals,
+    std::shared_ptr<const Executor> exec, TArgs&&... create_args)
+{
+    using value_type = typename Matrix::value_type;
+    using index_type = typename Matrix::index_type;
+    using mat_data = gko::matrix_data<value_type, index_type>;
+    GKO_THROW_IF_INVALID(num_batch_items > 0 && vals.size() > 0,
+                         "Input data is empty");
+    auto common_size = dim<2>(begin(vals) ? vals.size() : 0,
+                              begin(vals) ? begin(vals)->size() : 0);
+    batch_dim<2> b_size(num_batch_items, common_size);
+    std::vector<mat_data> input_mat_data(num_batch_items, common_size);
+    for (size_type batch = 0; batch < num_batch_items; batch++) {
+        size_type ridx = 0;
+        for (const auto& row : vals) {
+            size_type cidx = 0;
+            for (const auto& elem : row) {
+                if (elem != zero<value_type>()) {
+                    input_mat_data[batch].nonzeros.emplace_back(ridx, cidx,
+                                                                elem);
+                }
+                ++cidx;
+            }
+            ++ridx;
+        }
+    }
+    return read<value_type, index_type, Matrix>(
+        exec, input_mat_data, std::forward<TArgs>(create_args)...);
+}
+
+
 }  // namespace batch
 }  // namespace gko
 
diff --git a/core/matrix/batch_ell.cpp b/core/matrix/batch_ell.cpp
index 3aea6e1aae4..0d903b10968 100644
--- a/core/matrix/batch_ell.cpp
+++ b/core/matrix/batch_ell.cpp
@@ -70,13 +70,13 @@ Ell<ValueType, IndexType>::create_view_for_item(size_type item_id)
 {
     auto exec = this->get_executor();
     auto num_rows = this->get_common_size()[0];
-    auto stride = this->get_common_size()[1];
+    auto stride = this->get_common_size()[0];
     auto mat = unbatch_type::create(
         exec, this->get_common_size(),
         make_array_view(exec, this->get_num_elements_per_item(),
                         this->get_values_for_item(item_id)),
         make_array_view(exec, this->get_num_elements_per_item(),
-                        this->get_col_idxs_for_item(item_id)),
+                        this->get_col_idxs()),
         this->get_num_stored_elements_per_row(), stride);
     return mat;
 }
@@ -88,13 +88,13 @@ Ell<ValueType, IndexType>::create_const_view_for_item(size_type item_id) const
 {
     auto exec = this->get_executor();
     auto num_rows = this->get_common_size()[0];
-    auto stride = this->get_common_size()[1];
+    auto stride = this->get_common_size()[0];
     auto mat = unbatch_type::create_const(
         exec, this->get_common_size(),
         make_const_array_view(exec, this->get_num_elements_per_item(),
                               this->get_const_values_for_item(item_id)),
         make_const_array_view(exec, this->get_num_elements_per_item(),
-                              this->get_const_col_idxs_for_item(item_id)),
+                              this->get_const_col_idxs()),
         this->get_num_stored_elements_per_row(), stride);
     return mat;
 }
@@ -152,9 +152,10 @@ Ell<ValueType, IndexType>::Ell(std::shared_ptr<const Executor> exec,
                                const batch_dim<2>& size,
                                IndexType num_elems_per_row)
     : EnableBatchLinOp<Ell<ValueType, IndexType>>(exec, size),
-      num_elems_per_row_(num_elems_per_row),
-      values_(exec, compute_num_elems(size, num_elems_per_row)),
-      col_idxs_(exec, compute_num_elems(size, num_elems_per_row))
+      num_elems_per_row_(num_elems_per_row == 0 ? size.get_common_size()[1]
+                                                : num_elems_per_row),
+      values_(exec, compute_num_elems(size, num_elems_per_row_)),
+      col_idxs_(exec, this->get_common_size()[0] * num_elems_per_row_)
 {}
 
 
diff --git a/core/test/matrix/batch_ell.cpp b/core/test/matrix/batch_ell.cpp
index 931efb47d2e..2830705bf5f 100644
--- a/core/test/matrix/batch_ell.cpp
+++ b/core/test/matrix/batch_ell.cpp
@@ -51,6 +51,7 @@ template <typename T>
 class Ell : public ::testing::Test {
 protected:
     using value_type = T;
+    using index_type = gko::int32;
     using EllMtx = gko::matrix::Ell<value_type>;
     using size_type = gko::size_type;
     Ell()
@@ -58,46 +59,71 @@ class Ell : public ::testing::Test {
           mtx(gko::batch::initialize<gko::batch::matrix::Ell<value_type>>(
               {{{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
                {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}},
-              exec)),
-          mvec(gko::batch::initialize<gko::batch::MultiVector<value_type>>(
-              {{{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
-               {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}},
-              exec)),
+              exec, 3)),
+          sp_mtx(gko::batch::initialize<gko::batch::matrix::Ell<value_type>>(
+              {{{-1.0, 0.0, 0.0}, {0.0, 2.5, 3.5}},
+               {{1.0, 0.0, 0.0}, {0.0, 2.0, 3.0}}},
+              exec, 2)),
           ell_mtx(gko::initialize<gko::matrix::Ell<value_type>>(
-              {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, exec))
+              {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, exec, gko::dim<2>(2, 3), 3)),
+          sp_ell_mtx(gko::initialize<gko::matrix::Ell<value_type>>(
+              {{1.0, 0.0, 0.0}, {0.0, 2.0, 3.0}}, exec, gko::dim<2>(2, 3), 2))
     {}
 
+    static void assert_equal_to_original_sparse_mtx(
+        const gko::batch::matrix::Ell<value_type>* m)
+    {
+        ASSERT_EQ(m->get_num_batch_items(), 2);
+        ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 3));
+        ASSERT_EQ(m->get_num_stored_elements(), 2 * (2 * 2));
+        ASSERT_EQ(m->get_num_stored_elements_per_row(), 2);
+        EXPECT_EQ(m->get_const_values()[0], value_type{-1.0});
+        EXPECT_EQ(m->get_const_values()[1], value_type{2.5});
+        EXPECT_EQ(m->get_const_values()[2], value_type{0.0});
+        EXPECT_EQ(m->get_const_values()[3], value_type{3.5});
+        EXPECT_EQ(m->get_const_values()[4], value_type{1.0});
+        EXPECT_EQ(m->get_const_values()[5], value_type{2.0});
+        EXPECT_EQ(m->get_const_values()[6], value_type{0.0});
+        EXPECT_EQ(m->get_const_values()[7], value_type{3.0});
+        EXPECT_EQ(m->get_const_col_idxs()[0], index_type{0});
+        EXPECT_EQ(m->get_const_col_idxs()[1], index_type{1});
+        EXPECT_EQ(m->get_const_col_idxs()[2], index_type{-1});
+        ASSERT_EQ(m->get_const_col_idxs()[3], index_type{2});
+    }
 
     static void assert_equal_to_original_mtx(
-        gko::batch::matrix::Ell<value_type>* m)
+        const gko::batch::matrix::Ell<value_type>* m)
     {
         ASSERT_EQ(m->get_num_batch_items(), 2);
         ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 3));
         ASSERT_EQ(m->get_num_stored_elements(), 2 * (2 * 3));
-        EXPECT_EQ(m->at(0, 0, 0), value_type{-1.0});
-        EXPECT_EQ(m->at(0, 0, 1), value_type{2.0});
-        EXPECT_EQ(m->at(0, 0, 2), value_type{3.0});
-        EXPECT_EQ(m->at(0, 1, 0), value_type{-1.5});
-        EXPECT_EQ(m->at(0, 1, 1), value_type{2.5});
-        ASSERT_EQ(m->at(0, 1, 2), value_type{3.5});
-        EXPECT_EQ(m->at(1, 0, 0), value_type{1.0});
-        EXPECT_EQ(m->at(1, 0, 1), value_type{2.5});
-        EXPECT_EQ(m->at(1, 0, 2), value_type{3.0});
-        EXPECT_EQ(m->at(1, 1, 0), value_type{1.0});
-        EXPECT_EQ(m->at(1, 1, 1), value_type{2.0});
-        ASSERT_EQ(m->at(1, 1, 2), value_type{3.0});
+        ASSERT_EQ(m->get_num_stored_elements_per_row(), 3);
+        EXPECT_EQ(m->get_const_values()[0], value_type{-1.0});
+        EXPECT_EQ(m->get_const_values()[1], value_type{-1.5});
+        EXPECT_EQ(m->get_const_values()[2], value_type{2.0});
+        EXPECT_EQ(m->get_const_values()[3], value_type{2.5});
+        EXPECT_EQ(m->get_const_values()[4], value_type{3.0});
+        EXPECT_EQ(m->get_const_values()[5], value_type{3.5});
+        EXPECT_EQ(m->get_const_values()[6], value_type{1.0});
+        EXPECT_EQ(m->get_const_values()[7], value_type{1.0});
+        EXPECT_EQ(m->get_const_values()[8], value_type{2.5});
+        EXPECT_EQ(m->get_const_values()[9], value_type{2.0});
+        EXPECT_EQ(m->get_const_values()[10], value_type{3.0});
+        ASSERT_EQ(m->get_const_values()[11], value_type{3.0});
     }
 
     static void assert_empty(gko::batch::matrix::Ell<value_type>* m)
     {
         ASSERT_EQ(m->get_num_batch_items(), 0);
         ASSERT_EQ(m->get_num_stored_elements(), 0);
+        ASSERT_EQ(m->get_num_stored_elements_per_row(), 0);
     }
 
     std::shared_ptr<const gko::Executor> exec;
     std::unique_ptr<gko::batch::matrix::Ell<value_type>> mtx;
-    std::unique_ptr<gko::batch::MultiVector<value_type>> mvec;
+    std::unique_ptr<gko::batch::matrix::Ell<value_type>> sp_mtx;
     std::unique_ptr<gko::matrix::Ell<value_type>> ell_mtx;
+    std::unique_ptr<gko::matrix::Ell<value_type>> sp_ell_mtx;
 };
 
 TYPED_TEST_SUITE(Ell, gko::test::ValueTypes);
@@ -109,6 +135,12 @@ TYPED_TEST(Ell, KnowsItsSizeAndValues)
 }
 
 
+TYPED_TEST(Ell, SparseMtxKnowsItsSizeAndValues)
+{
+    this->assert_equal_to_original_sparse_mtx(this->sp_mtx.get());
+}
+
+
 TYPED_TEST(Ell, CanBeEmpty)
 {
     auto empty = gko::batch::matrix::Ell<TypeParam>::create(this->exec);
@@ -137,10 +169,10 @@ TYPED_TEST(Ell, CanCreateEllItemView)
 }
 
 
-TYPED_TEST(Ell, CanCreateMultiVectorView)
+TYPED_TEST(Ell, CanCreateSpEllItemView)
 {
-    GKO_ASSERT_BATCH_MTX_NEAR(this->mtx->create_multi_vector_view(), this->mvec,
-                              0.0);
+    GKO_ASSERT_MTX_NEAR(this->sp_mtx->create_view_for_item(1), this->sp_ell_mtx,
+                        0.0);
 }
 
 
@@ -151,8 +183,7 @@ TYPED_TEST(Ell, CanBeCopied)
     mtx_copy->copy_from(this->mtx.get());
 
     this->assert_equal_to_original_mtx(this->mtx.get());
-    this->mtx->at(0, 0, 0) = 7;
-    this->mtx->at(0, 1) = 7;
+    this->mtx->get_values()[0] = 7;
     this->assert_equal_to_original_mtx(mtx_copy.get());
 }
 
@@ -189,71 +220,62 @@ TYPED_TEST(Ell, CanBeConstructedWithSize)
     using size_type = gko::size_type;
 
     auto m = gko::batch::matrix::Ell<TypeParam>::create(
-        this->exec, gko::batch_dim<2>(2, gko::dim<2>{5, 3}));
+        this->exec, gko::batch_dim<2>(2, gko::dim<2>{5, 3}), 2);
 
     ASSERT_EQ(m->get_num_batch_items(), 2);
     ASSERT_EQ(m->get_common_size(), gko::dim<2>(5, 3));
-    ASSERT_EQ(m->get_num_stored_elements(), 30);
+    ASSERT_EQ(m->get_num_stored_elements_per_row(), 2);
+    ASSERT_EQ(m->get_num_stored_elements(), 20);
 }
 
 
 TYPED_TEST(Ell, CanBeConstructedFromExistingData)
 {
     using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     using size_type = gko::size_type;
     // clang-format off
-    value_type data[] = {
+    value_type values[] = {
+       -1.0,  2.5,
+       0.0,  3.5,
        1.0,  2.0,
-      -1.0,  3.0,
-       4.0, -1.0,
-       3.0,  5.0,
-       1.0,  5.0,
-       6.0, -3.0};
+       0.0,  3.0};
+    index_type col_idxs[] = {
+       0,  1,
+      -1, 2};
     // clang-format on
 
     auto m = gko::batch::matrix::Ell<TypeParam>::create(
-        this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 2)),
-        gko::array<value_type>::view(this->exec, 8, data));
-
-    ASSERT_EQ(m->get_const_values(), data);
-    ASSERT_EQ(m->at(0, 0, 0), value_type{1.0});
-    ASSERT_EQ(m->at(0, 0, 1), value_type{2.0});
-    ASSERT_EQ(m->at(0, 1, 0), value_type{-1.0});
-    ASSERT_EQ(m->at(0, 1, 1), value_type{3.0});
-    ASSERT_EQ(m->at(1, 0, 0), value_type{4.0});
-    ASSERT_EQ(m->at(1, 0, 1), value_type{-1.0});
-    ASSERT_EQ(m->at(1, 1, 0), value_type{3.0});
-    ASSERT_EQ(m->at(1, 1, 1), value_type{5.0});
+        this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 3)), 2,
+        gko::array<value_type>::view(this->exec, 8, values),
+        gko::array<index_type>::view(this->exec, 4, col_idxs));
+
+    this->assert_equal_to_original_sparse_mtx(m.get());
 }
 
 
 TYPED_TEST(Ell, CanBeConstructedFromExistingConstData)
 {
     using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
     using size_type = gko::size_type;
     // clang-format off
-    const value_type data[] = {
+    value_type values[] = {
+       -1.0,  2.5,
+       0.0,  3.5,
        1.0,  2.0,
-      -1.0,  3.0,
-       4.0, -1.0,
-       3.0,  5.0,
-       1.0,  5.0,
-       6.0, -3.0};
+       0.0,  3.0};
+    index_type col_idxs[] = {
+       0,  1,
+      -1, 2};
     // clang-format on
 
     auto m = gko::batch::matrix::Ell<TypeParam>::create_const(
-        this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 2)),
-        gko::array<value_type>::const_view(this->exec, 8, data));
-
-    ASSERT_EQ(m->get_const_values(), data);
-    ASSERT_EQ(m->at(0, 0, 0), value_type{1.0});
-    ASSERT_EQ(m->at(0, 0, 1), value_type{2.0});
-    ASSERT_EQ(m->at(0, 1, 0), value_type{-1.0});
-    ASSERT_EQ(m->at(0, 1, 1), value_type{3.0});
-    ASSERT_EQ(m->at(1, 0, 0), value_type{4.0});
-    ASSERT_EQ(m->at(1, 0, 1), value_type{-1.0});
-    ASSERT_EQ(m->at(1, 1, 0), value_type{3.0});
-    ASSERT_EQ(m->at(1, 1, 1), value_type{5.0});
+        this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 3)), 2,
+        gko::array<value_type>::const_view(this->exec, 8, values),
+        gko::array<index_type>::const_view(this->exec, 4, col_idxs));
+
+    this->assert_equal_to_original_sparse_mtx(m.get());
 }
 
 
@@ -263,35 +285,36 @@ TYPED_TEST(Ell, CanBeConstructedFromEllMatrices)
     using EllMtx = typename TestFixture::EllMtx;
     using size_type = gko::size_type;
 
-    auto mat1 = gko::initialize<EllMtx>({{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
+    auto mat1 = gko::initialize<EllMtx>({{-1.0, 0.0, 0.0}, {0.0, 2.5, 3.5}},
                                         this->exec);
     auto mat2 =
-        gko::initialize<EllMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec);
+        gko::initialize<EllMtx>({{1.0, 0.0, 0.0}, {0.0, 2.0, 3.0}}, this->exec);
 
     auto m = gko::batch::create_from_item<gko::batch::matrix::Ell<value_type>>(
-        this->exec, std::vector<EllMtx*>{mat1.get(), mat2.get()});
+        this->exec, std::vector<EllMtx*>{mat1.get(), mat2.get()},
+        mat1->get_num_stored_elements_per_row());
 
-    this->assert_equal_to_original_mtx(m.get());
+    this->assert_equal_to_original_sparse_mtx(m.get());
 }
 
 
 TYPED_TEST(Ell, CanBeConstructedFromEllMatricesByDuplication)
 {
     using value_type = typename TestFixture::value_type;
+    using index_type = int;
     using EllMtx = typename TestFixture::EllMtx;
     using size_type = gko::size_type;
 
-    auto mat1 = gko::initialize<EllMtx>(4, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
-                                        this->exec);
-    auto mat2 =
-        gko::initialize<EllMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec);
+    auto mat1 =
+        gko::initialize<EllMtx>({{1.0, 0.0, 0.0}, {0.0, 2.0, 0.0}}, this->exec);
 
     auto bat_m =
         gko::batch::create_from_item<gko::batch::matrix::Ell<value_type>>(
             this->exec,
-            std::vector<EllMtx*>{mat1.get(), mat1.get(), mat1.get()});
+            std::vector<EllMtx*>{mat1.get(), mat1.get(), mat1.get()},
+            mat1->get_num_stored_elements_per_row());
     auto m = gko::batch::create_from_item<gko::batch::matrix::Ell<value_type>>(
-        this->exec, 3, mat1.get());
+        this->exec, 3, mat1.get(), mat1->get_num_stored_elements_per_row());
 
     GKO_ASSERT_BATCH_MTX_NEAR(bat_m.get(), m.get(), 1e-14);
 }
@@ -300,24 +323,27 @@ TYPED_TEST(Ell, CanBeConstructedFromEllMatricesByDuplication)
 TYPED_TEST(Ell, CanBeConstructedByDuplicatingEllMatrices)
 {
     using value_type = typename TestFixture::value_type;
+    using index_type = int;
     using EllMtx = typename TestFixture::EllMtx;
     using size_type = gko::size_type;
 
-    auto mat1 = gko::initialize<EllMtx>({{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
+    auto mat1 = gko::initialize<EllMtx>({{-1.0, 0.0, 0.0}, {0.0, 2.5, 0.0}},
                                         this->exec);
     auto mat2 =
-        gko::initialize<EllMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec);
+        gko::initialize<EllMtx>({{1.0, 0.0, 0.0}, {0.0, 2.0, 0.0}}, this->exec);
 
     auto m = gko::batch::create_from_item<gko::batch::matrix::Ell<value_type>>(
-        this->exec, std::vector<EllMtx*>{mat1.get(), mat2.get()});
+        this->exec, std::vector<EllMtx*>{mat1.get(), mat2.get()},
+        mat1->get_num_stored_elements_per_row());
     auto m_ref =
         gko::batch::create_from_item<gko::batch::matrix::Ell<value_type>>(
             this->exec,
             std::vector<EllMtx*>{mat1.get(), mat2.get(), mat1.get(), mat2.get(),
-                                 mat1.get(), mat2.get()});
+                                 mat1.get(), mat2.get()},
+            mat1->get_num_stored_elements_per_row());
 
     auto m2 = gko::batch::duplicate<gko::batch::matrix::Ell<value_type>>(
-        this->exec, 3, m.get());
+        this->exec, 3, m.get(), mat1->get_num_stored_elements_per_row());
 
     GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 1e-14);
 }
@@ -326,15 +352,16 @@ TYPED_TEST(Ell, CanBeConstructedByDuplicatingEllMatrices)
 TYPED_TEST(Ell, CanBeUnbatchedIntoEllMatrices)
 {
     using value_type = typename TestFixture::value_type;
+    using index_type = int;
     using EllMtx = typename TestFixture::EllMtx;
     using size_type = gko::size_type;
-    auto mat1 = gko::initialize<EllMtx>(4, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
+    auto mat1 = gko::initialize<EllMtx>({{-1.0, 0.0, 0.0}, {0.0, 2.5, 3.5}},
                                         this->exec);
     auto mat2 =
-        gko::initialize<EllMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec);
+        gko::initialize<EllMtx>({{1.0, 0.0, 0.0}, {0.0, 2.0, 3.0}}, this->exec);
 
     auto ell_mats = gko::batch::unbatch<gko::batch::matrix::Ell<value_type>>(
-        this->mtx.get());
+        this->sp_mtx.get());
 
     GKO_ASSERT_MTX_NEAR(ell_mats[0].get(), mat1.get(), 0.);
     GKO_ASSERT_MTX_NEAR(ell_mats[1].get(), mat2.get(), 0.);
@@ -344,55 +371,83 @@ TYPED_TEST(Ell, CanBeUnbatchedIntoEllMatrices)
 TYPED_TEST(Ell, CanBeListConstructed)
 {
     using value_type = typename TestFixture::value_type;
+    using index_type = int;
     auto m = gko::batch::initialize<gko::batch::matrix::Ell<TypeParam>>(
-        {{1.0, 2.0}, {1.0, 3.0}}, this->exec);
+        {{0.0, -1.0}, {1.0, 0.0}}, this->exec);
 
     ASSERT_EQ(m->get_num_batch_items(), 2);
     ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 1));
-    EXPECT_EQ(m->at(0, 0), value_type{1});
-    EXPECT_EQ(m->at(0, 1), value_type{2});
-    EXPECT_EQ(m->at(1, 0), value_type{1});
-    EXPECT_EQ(m->at(1, 1), value_type{3});
+    ASSERT_EQ(m->get_num_stored_elements(), 4);
+    ASSERT_EQ(m->get_num_stored_elements_per_row(), 1);
+    EXPECT_EQ(m->get_values()[0], value_type{0.0});
+    EXPECT_EQ(m->get_values()[1], value_type{-1.0});
+    EXPECT_EQ(m->get_values()[2], value_type{1.0});
+    EXPECT_EQ(m->get_values()[3], value_type{0.0});
+    EXPECT_EQ(m->get_col_idxs()[0], index_type{0});
+    EXPECT_EQ(m->get_col_idxs()[1], index_type{-1});
 }
 
 
 TYPED_TEST(Ell, CanBeListConstructedByCopies)
 {
     using value_type = typename TestFixture::value_type;
+    using index_type = int;
 
     auto m = gko::batch::initialize<gko::batch::matrix::Ell<TypeParam>>(
-        2, I<value_type>({1.0, 2.0}), this->exec);
+        2, I<value_type>({0.0, -1.0}), this->exec, 1);
 
     ASSERT_EQ(m->get_num_batch_items(), 2);
     ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 1));
-    EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
-    EXPECT_EQ(m->at(0, 0, 1), value_type{2.0});
-    EXPECT_EQ(m->at(1, 0, 0), value_type{1.0});
-    EXPECT_EQ(m->at(1, 0, 1), value_type{2.0});
+    ASSERT_EQ(m->get_num_stored_elements(), 4);
+    ASSERT_EQ(m->get_num_stored_elements_per_row(), 1);
+    EXPECT_EQ(m->get_values()[0], value_type{0.0});
+    EXPECT_EQ(m->get_values()[1], value_type{-1.0});
+    EXPECT_EQ(m->get_values()[2], value_type{0.0});
+    EXPECT_EQ(m->get_values()[3], value_type{-1.0});
+    EXPECT_EQ(m->get_col_idxs()[0], index_type{-1});
+    EXPECT_EQ(m->get_col_idxs()[1], index_type{0});
 }
 
 
 TYPED_TEST(Ell, CanBeDoubleListConstructed)
 {
     using value_type = typename TestFixture::value_type;
+    using index_type = int;
     using T = value_type;
 
     auto m = gko::batch::initialize<gko::batch::matrix::Ell<TypeParam>>(
-        {{I<T>{1.0, 1.0, 0.0}, I<T>{2.0, 4.0, 3.0}, I<T>{3.0, 6.0, 1.0}},
-         {I<T>{1.0, 2.0, -1.0}, I<T>{3.0, 4.0, -2.0}, I<T>{5.0, 6.0, -3.0}}},
-        this->exec);
+        // clang-format off
+        {{I<T>{1.0, 0.0, 0.0},
+          I<T>{2.0, 0.0, 3.0},
+          I<T>{3.0, 6.0, 0.0}},
+         {I<T>{1.0, 0.0, 0.0},
+          I<T>{3.0, 0.0, -2.0},
+          I<T>{5.0, 8.0, 0.0}}},
+        // clang-format on
+        this->exec, 2);
 
+    ASSERT_EQ(m->get_num_batch_items(), 2);
     ASSERT_EQ(m->get_common_size(), gko::dim<2>(3, 3));
-    EXPECT_EQ(m->at(0, 0), value_type{1.0});
-    EXPECT_EQ(m->at(0, 1), value_type{1.0});
-    EXPECT_EQ(m->at(0, 2), value_type{0.0});
-    ASSERT_EQ(m->at(0, 3), value_type{2.0});
-    EXPECT_EQ(m->at(0, 4), value_type{4.0});
-    EXPECT_EQ(m->at(1, 0), value_type{1.0});
-    EXPECT_EQ(m->at(1, 1), value_type{2.0});
-    EXPECT_EQ(m->at(1, 2), value_type{-1.0});
-    ASSERT_EQ(m->at(1, 3), value_type{3.0});
-    EXPECT_EQ(m->at(1, 4), value_type{4.0});
+    ASSERT_EQ(m->get_num_stored_elements(), 2 * (2 * 3));
+    ASSERT_EQ(m->get_num_stored_elements_per_row(), 2);
+    EXPECT_EQ(m->get_values()[0], value_type{1.0});
+    EXPECT_EQ(m->get_values()[1], value_type{2.0});
+    EXPECT_EQ(m->get_values()[2], value_type{3.0});
+    EXPECT_EQ(m->get_values()[3], value_type{0.0});
+    EXPECT_EQ(m->get_values()[4], value_type{3.0});
+    EXPECT_EQ(m->get_values()[5], value_type{6.0});
+    EXPECT_EQ(m->get_values()[6], value_type{1.0});
+    EXPECT_EQ(m->get_values()[7], value_type{3.0});
+    EXPECT_EQ(m->get_values()[8], value_type{5.0});
+    EXPECT_EQ(m->get_values()[9], value_type{0.0});
+    EXPECT_EQ(m->get_values()[10], value_type{-2.0});
+    EXPECT_EQ(m->get_values()[11], value_type{8.0});
+    EXPECT_EQ(m->get_col_idxs()[0], index_type{0});
+    EXPECT_EQ(m->get_col_idxs()[1], index_type{0});
+    EXPECT_EQ(m->get_col_idxs()[2], index_type{0});
+    EXPECT_EQ(m->get_col_idxs()[3], index_type{-1});
+    EXPECT_EQ(m->get_col_idxs()[4], index_type{2});
+    EXPECT_EQ(m->get_col_idxs()[5], index_type{1});
 }
 
 
@@ -400,52 +455,17 @@ TYPED_TEST(Ell, CanBeReadFromMatrixData)
 {
     using value_type = typename TestFixture::value_type;
     using index_type = int;
-
     auto vec_data = std::vector<gko::matrix_data<value_type, index_type>>{};
     vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
-        {2, 2}, {{0, 0, 1.0}, {0, 1, 3.0}, {1, 0, 0.0}, {1, 1, 5.0}}));
+        {2, 3}, {{0, 0, -1.0}, {1, 1, 2.5}, {1, 2, 3.5}}));
     vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
-        {2, 2}, {{0, 0, -1.0}, {0, 1, 0.5}, {1, 0, 0.0}, {1, 1, 9.0}}));
+        {2, 3}, {{0, 0, 1.0}, {1, 1, 2.0}, {1, 2, 3.0}}));
 
     auto m = gko::batch::read<value_type, index_type,
                               gko::batch::matrix::Ell<value_type>>(this->exec,
-                                                                   vec_data);
-
-    ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2));
-    EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
-    EXPECT_EQ(m->at(0, 0, 1), value_type{3.0});
-    EXPECT_EQ(m->at(0, 1, 0), value_type{0.0});
-    EXPECT_EQ(m->at(0, 1, 1), value_type{5.0});
-    EXPECT_EQ(m->at(1, 0, 0), value_type{-1.0});
-    EXPECT_EQ(m->at(1, 0, 1), value_type{0.5});
-    EXPECT_EQ(m->at(1, 1, 0), value_type{0.0});
-    EXPECT_EQ(m->at(1, 1, 1), value_type{9.0});
-}
-
-
-TYPED_TEST(Ell, CanBeReadFromSparseMatrixData)
-{
-    using value_type = typename TestFixture::value_type;
-    using index_type = int;
-    auto vec_data = std::vector<gko::matrix_data<value_type, index_type>>{};
-    vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
-        {2, 2}, {{0, 0, 1.0}, {0, 1, 3.0}, {1, 1, 5.0}}));
-    vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
-        {2, 2}, {{0, 0, -1.0}, {0, 1, 0.5}, {1, 1, 9.0}}));
+                                                                   vec_data, 2);
 
-    auto m = gko::batch::read<value_type, index_type,
-                              gko::batch::matrix::Ell<value_type>>(this->exec,
-                                                                   vec_data);
-
-    ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2));
-    EXPECT_EQ(m->at(0, 0, 0), value_type{1.0});
-    EXPECT_EQ(m->at(0, 0, 1), value_type{3.0});
-    EXPECT_EQ(m->at(0, 1, 0), value_type{0.0});
-    EXPECT_EQ(m->at(0, 1, 1), value_type{5.0});
-    EXPECT_EQ(m->at(1, 0, 0), value_type{-1.0});
-    EXPECT_EQ(m->at(1, 0, 1), value_type{0.5});
-    EXPECT_EQ(m->at(1, 1, 0), value_type{0.0});
-    EXPECT_EQ(m->at(1, 1, 1), value_type{9.0});
+    this->assert_equal_to_original_sparse_mtx(m.get());
 }
 
 
@@ -455,24 +475,18 @@ TYPED_TEST(Ell, GeneratesCorrectMatrixData)
     using index_type = int;
     using tpl = typename gko::matrix_data<TypeParam>::nonzero_type;
 
-    auto data =
-        gko::batch::write<value_type, index_type,
-                          gko::batch::matrix::Ell<value_type>>(this->mtx.get());
+    auto data = gko::batch::write<value_type, index_type,
+                                  gko::batch::matrix::Ell<value_type>>(
+        this->sp_mtx.get());
 
     ASSERT_EQ(data[0].size, gko::dim<2>(2, 3));
-    ASSERT_EQ(data[0].nonzeros.size(), 6);
+    ASSERT_EQ(data[0].nonzeros.size(), 3);
     EXPECT_EQ(data[0].nonzeros[0], tpl(0, 0, value_type{-1.0}));
-    EXPECT_EQ(data[0].nonzeros[1], tpl(0, 1, value_type{2.0}));
-    EXPECT_EQ(data[0].nonzeros[2], tpl(0, 2, value_type{3.0}));
-    EXPECT_EQ(data[0].nonzeros[3], tpl(1, 0, value_type{-1.5}));
-    EXPECT_EQ(data[0].nonzeros[4], tpl(1, 1, value_type{2.5}));
-    EXPECT_EQ(data[0].nonzeros[5], tpl(1, 2, value_type{3.5}));
+    EXPECT_EQ(data[0].nonzeros[1], tpl(1, 1, value_type{2.5}));
+    EXPECT_EQ(data[0].nonzeros[2], tpl(1, 2, value_type{3.5}));
     ASSERT_EQ(data[1].size, gko::dim<2>(2, 3));
-    ASSERT_EQ(data[1].nonzeros.size(), 6);
+    ASSERT_EQ(data[1].nonzeros.size(), 3);
     EXPECT_EQ(data[1].nonzeros[0], tpl(0, 0, value_type{1.0}));
-    EXPECT_EQ(data[1].nonzeros[1], tpl(0, 1, value_type{2.5}));
-    EXPECT_EQ(data[1].nonzeros[2], tpl(0, 2, value_type{3.0}));
-    EXPECT_EQ(data[1].nonzeros[3], tpl(1, 0, value_type{1.0}));
-    EXPECT_EQ(data[1].nonzeros[4], tpl(1, 1, value_type{2.0}));
-    EXPECT_EQ(data[1].nonzeros[5], tpl(1, 2, value_type{3.0}));
+    EXPECT_EQ(data[1].nonzeros[1], tpl(1, 1, value_type{2.0}));
+    EXPECT_EQ(data[1].nonzeros[2], tpl(1, 2, value_type{3.0}));
 }
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index 61dffba3193..45ba0686468 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -212,8 +212,8 @@ class MultiVector
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const value_type* get_const_values_for_item(
-        size_type batch_id) const noexcept
+    const value_type* get_const_values_for_item(size_type batch_id) const
+        noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
         return values_.get_const_data() + this->get_cumulative_offset(batch_id);
@@ -462,224 +462,6 @@ class MultiVector
 };
 
 
-/**
- * Creates and initializes a batch of single column-vectors.
- *
- * This function first creates a temporary MultiVector, fills it with
- * passed in values, and then converts the vector to the requested type.
- *
- * @tparam Matrix  matrix type to initialize
- *                 (MultiVector has to implement the ConvertibleTo<Matrix>
- *                 interface)
- * @tparam TArgs  argument types for Matrix::create method
- *                (not including the implied Executor as the first argument)
- *
- * @param vals  values used to initialize the batch vector
- * @param exec  Executor associated to the vector
- * @param create_args  additional arguments passed to Matrix::create, not
- *                     including the Executor, which is passed as the first
- *                     argument
- *
- * @ingroup MultiVector
- * @ingroup mat_formats
- */
-template <typename Matrix, typename... TArgs>
-std::unique_ptr<Matrix> initialize(
-    std::initializer_list<std::initializer_list<typename Matrix::value_type>>
-        vals,
-    std::shared_ptr<const Executor> exec, TArgs&&... create_args)
-{
-    using batch_multi_vector = MultiVector<typename Matrix::value_type>;
-    size_type num_batch_items = vals.size();
-    GKO_THROW_IF_INVALID(num_batch_items > 0, "Input data is empty");
-    auto vals_begin = begin(vals);
-    size_type common_num_rows = vals_begin ? vals_begin->size() : 0;
-    auto common_size = dim<2>(common_num_rows, 1);
-    for (auto& val : vals) {
-        GKO_ASSERT_EQ(common_num_rows, val.size());
-    }
-    auto b_size = batch_dim<2>(num_batch_items, common_size);
-    auto tmp = batch_multi_vector::create(exec->get_master(), b_size);
-    size_type batch = 0;
-    for (const auto& b : vals) {
-        size_type idx = 0;
-        for (const auto& elem : b) {
-            tmp->at(batch, idx) = elem;
-            ++idx;
-        }
-        ++batch;
-    }
-    auto mtx = Matrix::create(exec, std::forward<TArgs>(create_args)...);
-    tmp->move_to(mtx);
-    return mtx;
-}
-
-
-/**
- * Creates and initializes a batch of multi-vectors.
- *
- * This function first creates a temporary MultiVector, fills it with
- * passed in values, and then converts the vector to the requested type.
- *
- * @tparam Matrix  matrix type to initialize
- *                 (Dense has to implement the ConvertibleTo<Matrix> interface)
- * @tparam TArgs  argument types for Matrix::create method
- *                (not including the implied Executor as the first argument)
- *
- * @param vals  values used to initialize the vector
- * @param exec  Executor associated to the vector
- * @param create_args  additional arguments passed to Matrix::create, not
- *                     including the Executor, which is passed as the first
- *                     argument
- *
- * @ingroup MultiVector
- * @ingroup mat_formats
- */
-template <typename Matrix, typename... TArgs>
-std::unique_ptr<Matrix> initialize(
-    std::initializer_list<std::initializer_list<
-        std::initializer_list<typename Matrix::value_type>>>
-        vals,
-    std::shared_ptr<const Executor> exec, TArgs&&... create_args)
-{
-    using batch_multi_vector = MultiVector<typename Matrix::value_type>;
-    size_type num_batch_items = vals.size();
-    GKO_THROW_IF_INVALID(num_batch_items > 0, "Input data is empty");
-    auto vals_begin = begin(vals);
-    size_type common_num_rows = vals_begin ? vals_begin->size() : 0;
-    size_type common_num_cols =
-        vals_begin->begin() ? vals_begin->begin()->size() : 0;
-    auto common_size = dim<2>(common_num_rows, common_num_cols);
-    for (const auto& b : vals) {
-        auto num_rows = b.size();
-        auto num_cols = begin(b)->size();
-        auto b_size = dim<2>(num_rows, num_cols);
-        GKO_ASSERT_EQUAL_DIMENSIONS(b_size, common_size);
-    }
-
-    auto b_size = batch_dim<2>(num_batch_items, common_size);
-    auto tmp = batch_multi_vector::create(exec->get_master(), b_size);
-    size_type batch = 0;
-    for (const auto& b : vals) {
-        size_type ridx = 0;
-        for (const auto& row : b) {
-            size_type cidx = 0;
-            for (const auto& elem : row) {
-                tmp->at(batch, ridx, cidx) = elem;
-                ++cidx;
-            }
-            ++ridx;
-        }
-        ++batch;
-    }
-    auto mtx = Matrix::create(exec, std::forward<TArgs>(create_args)...);
-    tmp->move_to(mtx);
-    return mtx;
-}
-
-
-/**
- * Creates and initializes a batch single column-vector by making copies of the
- * single input column vector.
- *
- * This function first creates a temporary batch multi-vector, fills it with
- * passed in values, and then converts the vector to the requested type.
- *
- * @tparam Matrix  matrix type to initialize
- *                 (MultiVector has to implement the ConvertibleTo<Matrix>
- *                  interface)
- * @tparam TArgs  argument types for Matrix::create method
- *                (not including the implied Executor as the first argument)
- *
- * @param num_vectors  The number of times the input vector is to be duplicated
- * @param vals  values used to initialize each vector in the temp. batch
- * @param exec  Executor associated to the vector
- * @param create_args  additional arguments passed to Matrix::create, not
- *                     including the Executor, which is passed as the first
- *                     argument
- *
- * @ingroup MultiVector
- * @ingroup mat_formats
- */
-template <typename Matrix, typename... TArgs>
-std::unique_ptr<Matrix> initialize(
-    const size_type num_vectors,
-    std::initializer_list<typename Matrix::value_type> vals,
-    std::shared_ptr<const Executor> exec, TArgs&&... create_args)
-{
-    using batch_multi_vector = MultiVector<typename Matrix::value_type>;
-    size_type num_batch_items = num_vectors;
-    GKO_THROW_IF_INVALID(num_batch_items > 0 && vals.size() > 0,
-                         "Input data is empty");
-    auto b_size =
-        batch_dim<2>(num_batch_items, dim<2>(begin(vals) ? vals.size() : 0, 1));
-    auto tmp = batch_multi_vector::create(exec->get_master(), b_size);
-    for (size_type batch = 0; batch < num_vectors; batch++) {
-        size_type idx = 0;
-        for (const auto& elem : vals) {
-            tmp->at(batch, idx) = elem;
-            ++idx;
-        }
-    }
-    auto mtx = Matrix::create(exec, std::forward<TArgs>(create_args)...);
-    tmp->move_to(mtx);
-    return mtx;
-}
-
-
-/**
- * Creates and initializes a matrix from copies of a given matrix.
- *
- * This function first creates a temporary batch multi-vector, fills it with
- * passed in values, and then converts the vector to the requested type.
- *
- * @tparam Matrix  matrix type to initialize
- *                 (MultiVector has to implement the ConvertibleTo<Matrix>
- *                  interface)
- * @tparam TArgs  argument types for Matrix::create method
- *                (not including the implied Executor as the first argument)
- *
- * @param num_batch_items The number of times the input matrix is duplicated
- * @param vals  values used to initialize each vector in the temp. batch
- * @param exec  Executor associated to the vector
- * @param create_args  additional arguments passed to Matrix::create, not
- *                     including the Executor, which is passed as the first
- *                     argument
- *
- * @ingroup LinOp
- * @ingroup mat_formats
- */
-template <typename Matrix, typename... TArgs>
-std::unique_ptr<Matrix> initialize(
-    const size_type num_batch_items,
-    std::initializer_list<std::initializer_list<typename Matrix::value_type>>
-        vals,
-    std::shared_ptr<const Executor> exec, TArgs&&... create_args)
-{
-    using batch_multi_vector = MultiVector<typename Matrix::value_type>;
-    GKO_THROW_IF_INVALID(num_batch_items > 0 && vals.size() > 0,
-                         "Input data is empty");
-    auto common_size = dim<2>(begin(vals) ? vals.size() : 0,
-                              begin(vals) ? begin(vals)->size() : 0);
-    batch_dim<2> b_size(num_batch_items, common_size);
-    auto tmp = batch_multi_vector::create(exec->get_master(), b_size);
-    for (size_type batch = 0; batch < num_batch_items; batch++) {
-        size_type ridx = 0;
-        for (const auto& row : vals) {
-            size_type cidx = 0;
-            for (const auto& elem : row) {
-                tmp->at(batch, ridx, cidx) = elem;
-                ++cidx;
-            }
-            ++ridx;
-        }
-    }
-    auto mtx = Matrix::create(exec, std::forward<TArgs>(create_args)...);
-    tmp->move_to(mtx);
-    return mtx;
-}
-
-
 }  // namespace batch
 }  // namespace gko
 
diff --git a/include/ginkgo/core/matrix/batch_ell.hpp b/include/ginkgo/core/matrix/batch_ell.hpp
index af77fc1e390..490f7a7d4b0 100644
--- a/include/ginkgo/core/matrix/batch_ell.hpp
+++ b/include/ginkgo/core/matrix/batch_ell.hpp
@@ -198,8 +198,8 @@ class Ell final
     }
 
     /**
-     * Returns a pointer to the array of col_idxs of the matrix for a
-     * specific batch item.
+     * Returns a pointer to the array of col_idxs of the matrix. This is shared
+     * across all batch items.
      *
      * @param batch_id  the id of the batch item.
      *
@@ -208,8 +208,7 @@ class Ell final
     index_type* get_col_idxs_for_item(size_type batch_id) noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
-        return col_idxs_.get_data() +
-               batch_id * this->get_num_elements_per_item();
+        return col_idxs_.get_data();
     }
 
     /**
@@ -223,8 +222,7 @@ class Ell final
         noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
-        return col_idxs_.get_const_data() +
-               batch_id * this->get_num_elements_per_item();
+        return col_idxs_.get_const_data();
     }
 
     /**
@@ -312,7 +310,8 @@ class Ell final
     size_type compute_num_elems(const batch_dim<2>& size,
                                 IndexType num_elems_per_row)
     {
-        return size.get_common_size()[0] * num_elems_per_row;
+        return size.get_num_batch_items() * size.get_common_size()[0] *
+               num_elems_per_row;
     }
 
 
@@ -356,8 +355,9 @@ class Ell final
         // Ensure that the value and col_idxs arrays have the correct size
         auto num_elems = this->get_common_size()[0] * num_elems_per_row *
                          this->get_num_batch_items();
-        GKO_ENSURE_IN_BOUNDS(num_elems, values_.get_num_elems() + 1);
-        GKO_ENSURE_IN_BOUNDS(num_elems, col_idxs_.get_num_elems() + 1);
+        GKO_ASSERT_EQ(num_elems, values_.get_num_elems());
+        GKO_ASSERT_EQ(this->get_num_elements_per_item(),
+                      col_idxs_.get_num_elems());
     }
 
     /**

From 3622b20f1d318cf009d38dd409a6cc5b241a8cf9 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sun, 8 Oct 2023 10:26:50 +0200
Subject: [PATCH 388/583] Add OMP, CUDA, HIP kernels and tests

Co-authored-by: Aditya Kashi <kashia@ornl.gov>
---
 .../matrix/batch_ell_kernel_launcher.hpp.inc  |  29 +-
 .../cuda_hip/matrix/batch_ell_kernels.hpp.inc | 155 +++++++++++
 core/matrix/batch_struct.hpp                  |   5 +-
 cuda/matrix/batch_ell_kernels.cu              |   2 +-
 cuda/matrix/batch_struct.hpp                  |  34 +++
 hip/matrix/batch_dense_kernels.hip.cpp        |   1 -
 hip/matrix/batch_ell_kernels.hip.cpp          |  27 +-
 hip/matrix/batch_struct.hip.hpp               |  34 +++
 reference/matrix/batch_ell_kernels.hpp.inc    |   6 +-
 reference/matrix/batch_struct.hpp             |   4 +-
 reference/test/matrix/CMakeLists.txt          |   1 +
 reference/test/matrix/batch_ell_kernels.cpp   | 248 ++++++++++++++++++
 test/matrix/CMakeLists.txt                    |   1 +
 test/matrix/batch_ell_kernels.cpp             | 128 +++++++++
 14 files changed, 650 insertions(+), 25 deletions(-)
 create mode 100644 common/cuda_hip/matrix/batch_ell_kernels.hpp.inc
 create mode 100644 reference/test/matrix/batch_ell_kernels.cpp
 create mode 100644 test/matrix/batch_ell_kernels.cpp

diff --git a/common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc b/common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc
index 263e911c31a..f8da432aa4d 100644
--- a/common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc
+++ b/common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc
@@ -34,7 +34,18 @@ template <typename ValueType, typename IndexType>
 void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
                   const batch::matrix::Ell<ValueType, IndexType>* mat,
                   const batch::MultiVector<ValueType>* b,
-                  batch::MultiVector<ValueType>* x) GKO_NOT_IMPLEMENTED;
+                  batch::MultiVector<ValueType>* x)
+{
+    const auto num_blocks = mat->get_num_batch_items();
+    const auto b_ub = get_batch_struct(b);
+    const auto x_ub = get_batch_struct(x);
+    const auto mat_ub = get_batch_struct(mat);
+    if (b->get_common_size()[1] > 1) {
+        GKO_NOT_IMPLEMENTED;
+    }
+    simple_apply_kernel<<<num_blocks, default_block_size, 0,
+                          exec->get_stream()>>>(mat_ub, b_ub, x_ub);
+}
 
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
@@ -47,7 +58,21 @@ void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
                     const batch::matrix::Ell<ValueType, IndexType>* mat,
                     const batch::MultiVector<ValueType>* b,
                     const batch::MultiVector<ValueType>* beta,
-                    batch::MultiVector<ValueType>* x) GKO_NOT_IMPLEMENTED;
+                    batch::MultiVector<ValueType>* x)
+{
+    const auto num_blocks = mat->get_num_batch_items();
+    const auto b_ub = get_batch_struct(b);
+    const auto x_ub = get_batch_struct(x);
+    const auto mat_ub = get_batch_struct(mat);
+    const auto alpha_ub = get_batch_struct(alpha);
+    const auto beta_ub = get_batch_struct(beta);
+    if (b->get_common_size()[1] > 1) {
+        GKO_NOT_IMPLEMENTED;
+    }
+    advanced_apply_kernel<<<num_blocks, default_block_size, 0,
+                            exec->get_stream()>>>(alpha_ub, mat_ub, b_ub,
+                                                  beta_ub, x_ub);
+}
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
     GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL);
diff --git a/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc b/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc
new file mode 100644
index 00000000000..e55e7a60471
--- /dev/null
+++ b/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc
@@ -0,0 +1,155 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+
+template <typename ValueType>
+__device__ __forceinline__ void simple_apply(
+    const gko::batch::matrix::batch_ell::batch_item<const ValueType>& mat,
+    const ValueType* const __restrict__ b, ValueType* const __restrict__ x)
+{
+    const auto num_rows = mat.num_rows;
+    const auto num_stored_elements_per_row = mat.num_stored_elems_per_row;
+    const auto stride = mat.stride;
+    const auto val = mat.values;
+    const auto col = mat.col_idxs;
+    for (int tidx = threadIdx.x; tidx < num_rows; tidx += blockDim.x) {
+        auto temp = zero<ValueType>();
+        for (size_type idx = 0; idx < num_stored_elements_per_row; idx++) {
+            const auto ind = tidx + idx * stride;
+            const auto col_idx = col[ind];
+            if (col_idx < idx) {
+                break;
+            } else {
+                temp += val[ind] * b[col_idx];
+            }
+        }
+        x[tidx] = temp;
+    }
+}
+
+template <typename ValueType>
+__global__ __launch_bounds__(
+    default_block_size,
+    sm_oversubscription) void simple_apply_kernel(const gko::batch::matrix::
+                                                      batch_ell::uniform_batch<
+                                                          const ValueType>
+                                                          mat,
+                                                  const gko::batch::
+                                                      multi_vector::
+                                                          uniform_batch<
+                                                              const ValueType>
+                                                              b,
+                                                  const gko::batch::
+                                                      multi_vector::
+                                                          uniform_batch<
+                                                              ValueType>
+                                                              x)
+{
+    for (size_type batch_id = blockIdx.x; batch_id < mat.num_batch_items;
+         batch_id += gridDim.x) {
+        const auto mat_b =
+            gko::batch::matrix::extract_batch_item(mat, batch_id);
+        const auto b_b = gko::batch::extract_batch_item(b, batch_id);
+        const auto x_b = gko::batch::extract_batch_item(x, batch_id);
+        simple_apply(mat_b, b_b.values, x_b.values);
+    }
+}
+
+
+template <typename ValueType>
+__device__ __forceinline__ void advanced_apply(
+    const ValueType alpha,
+    const gko::batch::matrix::batch_ell::batch_item<const ValueType>& mat,
+    const ValueType* const __restrict__ b, const ValueType beta,
+    ValueType* const __restrict__ x)
+{
+    const auto num_rows = mat.num_rows;
+    const auto num_stored_elements_per_row = mat.num_stored_elems_per_row;
+    const auto stride = mat.stride;
+    const auto val = mat.values;
+    const auto col = mat.col_idxs;
+    for (int tidx = threadIdx.x; tidx < num_rows; tidx += blockDim.x) {
+        auto temp = zero<ValueType>();
+        for (size_type idx = 0; idx < num_stored_elements_per_row; idx++) {
+            const auto ind = tidx + idx * stride;
+            const auto col_idx = col[ind];
+            if (col_idx < idx) {
+                break;
+            } else {
+                temp += alpha * val[ind] * b[col_idx];
+            }
+        }
+        x[tidx] = temp + beta * x[tidx];
+    }
+}
+
+template <typename ValueType>
+__global__ __launch_bounds__(
+    default_block_size,
+    sm_oversubscription) void advanced_apply_kernel(const gko::batch::
+                                                        multi_vector::
+                                                            uniform_batch<
+                                                                const ValueType>
+                                                                alpha,
+                                                    const gko::batch::matrix::
+                                                        batch_ell::
+                                                            uniform_batch<
+                                                                const ValueType>
+                                                                mat,
+                                                    const gko::batch::
+                                                        multi_vector::
+                                                            uniform_batch<
+                                                                const ValueType>
+                                                                b,
+                                                    const gko::batch::
+                                                        multi_vector::
+                                                            uniform_batch<
+                                                                const ValueType>
+                                                                beta,
+                                                    const gko::batch::
+                                                        multi_vector::
+                                                            uniform_batch<
+                                                                ValueType>
+                                                                x)
+{
+    for (size_type batch_id = blockIdx.x; batch_id < mat.num_batch_items;
+         batch_id += gridDim.x) {
+        const auto mat_b =
+            gko::batch::matrix::extract_batch_item(mat, batch_id);
+        const auto b_b = gko::batch::extract_batch_item(b, batch_id);
+        const auto x_b = gko::batch::extract_batch_item(x, batch_id);
+        const auto alpha_b = gko::batch::extract_batch_item(alpha, batch_id);
+        const auto beta_b = gko::batch::extract_batch_item(beta, batch_id);
+        advanced_apply(alpha_b.values[0], mat_b, b_b.values, beta_b.values[0],
+                       x_b.values);
+    }
+}
diff --git a/core/matrix/batch_struct.hpp b/core/matrix/batch_struct.hpp
index 272bb506df2..2eed40882bc 100644
--- a/core/matrix/batch_struct.hpp
+++ b/core/matrix/batch_struct.hpp
@@ -188,8 +188,7 @@ GKO_ATTRIBUTES GKO_INLINE batch_ell::batch_item<ValueType> extract_batch_item(
 {
     return {batch.values +
                 batch_idx * batch.num_stored_elems_per_row * batch.num_rows,
-            batch.col_idxs +
-                batch_idx * batch.num_stored_elems_per_row * batch.num_rows,
+            batch.col_idxs,
             batch.stride,
             batch.num_rows,
             batch.num_cols,
@@ -203,7 +202,7 @@ GKO_ATTRIBUTES GKO_INLINE batch_ell::batch_item<ValueType> extract_batch_item(
     const size_type batch_idx)
 {
     return {batch_values + batch_idx * num_elems_per_row * num_rows,
-            batch_col_idxs + batch_idx * num_elems_per_row * num_rows,
+            batch_col_idxs,
             stride,
             num_rows,
             num_cols,
diff --git a/cuda/matrix/batch_ell_kernels.cu b/cuda/matrix/batch_ell_kernels.cu
index 567d863d95c..ee6a99f04ca 100644
--- a/cuda/matrix/batch_ell_kernels.cu
+++ b/cuda/matrix/batch_ell_kernels.cu
@@ -72,7 +72,7 @@ constexpr int sm_oversubscription = 4;
 
 // NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES
 
-// #include "common/cuda_hip/matrix/batch_ell_kernels.hpp.inc"
+#include "common/cuda_hip/matrix/batch_ell_kernels.hpp.inc"
 
 
 #include "common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc"
diff --git a/cuda/matrix/batch_struct.hpp b/cuda/matrix/batch_struct.hpp
index 73712a7b81b..7a6a4ac7f00 100644
--- a/cuda/matrix/batch_struct.hpp
+++ b/cuda/matrix/batch_struct.hpp
@@ -87,6 +87,40 @@ get_batch_struct(batch::matrix::Dense<ValueType>* const op)
 }
 
 
+/**
+ * Generates an immutable uniform batch struct from a batch of ell matrices.
+ */
+template <typename ValueType>
+inline batch::matrix::batch_ell::uniform_batch<const cuda_type<ValueType>>
+get_batch_struct(const batch::matrix::Ell<ValueType, int32>* const op)
+{
+    return {as_cuda_type(op->get_const_values()),
+            op->get_const_col_idxs(),
+            op->get_num_batch_items(),
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[1]),
+            static_cast<int>(op->get_num_stored_elements_per_row())};
+}
+
+
+/**
+ * Generates a uniform batch struct from a batch of ell matrices.
+ */
+template <typename ValueType>
+inline batch::matrix::batch_ell::uniform_batch<cuda_type<ValueType>>
+get_batch_struct(batch::matrix::Ell<ValueType, int32>* const op)
+{
+    return {as_cuda_type(op->get_values()),
+            op->get_col_idxs(),
+            op->get_num_batch_items(),
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[1]),
+            static_cast<int>(op->get_num_stored_elements_per_row())};
+}
+
+
 }  // namespace cuda
 }  // namespace kernels
 }  // namespace gko
diff --git a/hip/matrix/batch_dense_kernels.hip.cpp b/hip/matrix/batch_dense_kernels.hip.cpp
index eb3da83760a..3361feeb8b8 100644
--- a/hip/matrix/batch_dense_kernels.hip.cpp
+++ b/hip/matrix/batch_dense_kernels.hip.cpp
@@ -38,7 +38,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/math.hpp>
-#include <ginkgo/core/base/range_accessors.hpp>
 
 
 #include "core/base/batch_struct.hpp"
diff --git a/hip/matrix/batch_ell_kernels.hip.cpp b/hip/matrix/batch_ell_kernels.hip.cpp
index 567d863d95c..fdd52c38f57 100644
--- a/hip/matrix/batch_ell_kernels.hip.cpp
+++ b/hip/matrix/batch_ell_kernels.hip.cpp
@@ -33,6 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/matrix/batch_ell_kernels.hpp"
 
 
+#include <hip/hip_runtime.h>
 #include <thrust/functional.h>
 #include <thrust/transform.h>
 
@@ -42,21 +43,21 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "core/base/batch_struct.hpp"
 #include "core/matrix/batch_struct.hpp"
-#include "cuda/base/batch_struct.hpp"
-#include "cuda/base/config.hpp"
-#include "cuda/base/cublas_bindings.hpp"
-#include "cuda/base/pointer_mode_guard.hpp"
-#include "cuda/base/thrust.cuh"
-#include "cuda/components/cooperative_groups.cuh"
-#include "cuda/components/reduction.cuh"
-#include "cuda/components/thread_ids.cuh"
-#include "cuda/components/uninitialized_array.hpp"
-#include "cuda/matrix/batch_struct.hpp"
+#include "hip/base/batch_struct.hip.hpp"
+#include "hip/base/config.hip.hpp"
+#include "hip/base/hipblas_bindings.hip.hpp"
+#include "hip/base/pointer_mode_guard.hip.hpp"
+#include "hip/base/thrust.hip.hpp"
+#include "hip/components/cooperative_groups.hip.hpp"
+#include "hip/components/reduction.hip.hpp"
+#include "hip/components/thread_ids.hip.hpp"
+#include "hip/components/uninitialized_array.hip.hpp"
+#include "hip/matrix/batch_struct.hip.hpp"
 
 
 namespace gko {
 namespace kernels {
-namespace cuda {
+namespace hip {
 /**
  * @brief The Ell matrix format namespace.
  * @ref Ell
@@ -72,7 +73,7 @@ constexpr int sm_oversubscription = 4;
 
 // NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES
 
-// #include "common/cuda_hip/matrix/batch_ell_kernels.hpp.inc"
+#include "common/cuda_hip/matrix/batch_ell_kernels.hpp.inc"
 
 
 #include "common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc"
@@ -81,6 +82,6 @@ constexpr int sm_oversubscription = 4;
 
 
 }  // namespace batch_ell
-}  // namespace cuda
+}  // namespace hip
 }  // namespace kernels
 }  // namespace gko
diff --git a/hip/matrix/batch_struct.hip.hpp b/hip/matrix/batch_struct.hip.hpp
index 4670cf0988b..a43d7d058b0 100644
--- a/hip/matrix/batch_struct.hip.hpp
+++ b/hip/matrix/batch_struct.hip.hpp
@@ -87,6 +87,40 @@ get_batch_struct(batch::matrix::Dense<ValueType>* const op)
 }
 
 
+/**
+ * Generates an immutable uniform batch struct from a batch of ell matrices.
+ */
+template <typename ValueType>
+inline batch::matrix::batch_ell::uniform_batch<const hip_type<ValueType>>
+get_batch_struct(const batch::matrix::Ell<ValueType, int32>* const op)
+{
+    return {as_hip_type(op->get_const_values()),
+            op->get_const_col_idxs(),
+            op->get_num_batch_items(),
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[1]),
+            static_cast<int>(op->get_num_stored_elements_per_row())};
+}
+
+
+/**
+ * Generates a uniform batch struct from a batch of ell matrices.
+ */
+template <typename ValueType>
+inline batch::matrix::batch_ell::uniform_batch<hip_type<ValueType>>
+get_batch_struct(batch::matrix::Ell<ValueType, int32>* const op)
+{
+    return {as_hip_type(op->get_values()),
+            op->get_col_idxs(),
+            op->get_num_batch_items(),
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[1]),
+            static_cast<int>(op->get_num_stored_elements_per_row())};
+}
+
+
 }  // namespace hip
 }  // namespace kernels
 }  // namespace gko
diff --git a/reference/matrix/batch_ell_kernels.hpp.inc b/reference/matrix/batch_ell_kernels.hpp.inc
index 37370261d44..41d0a00ddcd 100644
--- a/reference/matrix/batch_ell_kernels.hpp.inc
+++ b/reference/matrix/batch_ell_kernels.hpp.inc
@@ -36,14 +36,14 @@ inline void simple_apply_kernel(
     const gko::batch::multi_vector::batch_item<const ValueType>& b,
     const gko::batch::multi_vector::batch_item<ValueType>& c)
 {
-    for (int row = 0; row < a.num_rows; ++row) {
-        for (int j = 0; j < b.num_rhs; ++j) {
+    for (int row = 0; row < c.num_rows; ++row) {
+        for (int j = 0; j < c.num_rhs; ++j) {
             c.values[row * c.stride + j] = zero<ValueType>();
         }
         for (auto k = 0; k < a.num_stored_elems_per_row; ++k) {
             auto val = a.values[row + k * a.stride];
             auto col = a.col_idxs[row + k * a.stride];
-            for (int j = 0; j < b.num_rhs; ++j) {
+            for (int j = 0; j < c.num_rhs; ++j) {
                 c.values[row * c.stride + j] +=
                     val * b.values[col * b.stride + j];
             }
diff --git a/reference/matrix/batch_struct.hpp b/reference/matrix/batch_struct.hpp
index b5eacd80d18..3b562450ee0 100644
--- a/reference/matrix/batch_struct.hpp
+++ b/reference/matrix/batch_struct.hpp
@@ -101,7 +101,7 @@ get_batch_struct(const batch::matrix::Ell<ValueType, int32>* const op)
     return {op->get_const_values(),
             op->get_const_col_idxs(),
             op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[1]),
+            static_cast<int>(op->get_common_size()[0]),
             static_cast<int>(op->get_common_size()[0]),
             static_cast<int>(op->get_common_size()[1]),
             static_cast<int>(op->get_num_stored_elements_per_row())};
@@ -118,7 +118,7 @@ inline batch::matrix::batch_ell::uniform_batch<ValueType> get_batch_struct(
     return {op->get_values(),
             op->get_col_idxs(),
             op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[1]),
+            static_cast<int>(op->get_common_size()[0]),
             static_cast<int>(op->get_common_size()[0]),
             static_cast<int>(op->get_common_size()[1]),
             static_cast<int>(op->get_num_stored_elements_per_row())};
diff --git a/reference/test/matrix/CMakeLists.txt b/reference/test/matrix/CMakeLists.txt
index 18634de662d..05498cbadc4 100644
--- a/reference/test/matrix/CMakeLists.txt
+++ b/reference/test/matrix/CMakeLists.txt
@@ -1,4 +1,5 @@
 ginkgo_create_test(batch_dense_kernels)
+ginkgo_create_test(batch_ell_kernels)
 ginkgo_create_test(coo_kernels)
 ginkgo_create_test(csr_kernels)
 ginkgo_create_test(dense_kernels)
diff --git a/reference/test/matrix/batch_ell_kernels.cpp b/reference/test/matrix/batch_ell_kernels.cpp
new file mode 100644
index 00000000000..76b681c69f7
--- /dev/null
+++ b/reference/test/matrix/batch_ell_kernels.cpp
@@ -0,0 +1,248 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/matrix/batch_ell.hpp>
+
+
+#include <complex>
+#include <memory>
+#include <random>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/exception.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/matrix/dense.hpp>
+#include <ginkgo/core/matrix/ell.hpp>
+
+
+#include "core/matrix/batch_ell_kernels.hpp"
+#include "core/test/utils.hpp"
+
+
+template <typename T>
+class Ell : public ::testing::Test {
+protected:
+    using value_type = T;
+    using size_type = gko::size_type;
+    using Mtx = gko::batch::matrix::Ell<value_type>;
+    using MVec = gko::batch::MultiVector<value_type>;
+    using EllMtx = gko::matrix::Ell<value_type>;
+    using DenseMtx = gko::matrix::Dense<value_type>;
+    using ComplexMtx = gko::to_complex<Mtx>;
+    using RealMtx = gko::remove_complex<Mtx>;
+    Ell()
+        : exec(gko::ReferenceExecutor::create()),
+          mtx_0(gko::batch::initialize<Mtx>(
+              {{I<T>({1.0, -1.0, 1.5}), I<T>({-2.0, 2.0, 3.0})},
+               {{1.0, -2.0, -0.5}, {1.0, -2.5, 4.0}}},
+              exec)),
+          mtx_00(gko::initialize<EllMtx>(
+              {I<T>({1.0, -1.0, 1.5}), I<T>({-2.0, 2.0, 3.0})}, exec)),
+          mtx_01(gko::initialize<EllMtx>(
+              {I<T>({1.0, -2.0, -0.5}), I<T>({1.0, -2.5, 4.0})}, exec)),
+          b_0(gko::batch::initialize<MVec>(
+              {{I<T>({1.0, 0.0, 1.0}), I<T>({2.0, 0.0, 1.0}),
+                I<T>({1.0, 0.0, 2.0})},
+               {I<T>({-1.0, 1.0, 1.0}), I<T>({1.0, -1.0, 1.0}),
+                I<T>({1.0, 0.0, 2.0})}},
+              exec)),
+          b_00(gko::initialize<DenseMtx>(
+              {I<T>({1.0, 0.0, 1.0}), I<T>({2.0, 0.0, 1.0}),
+               I<T>({1.0, 0.0, 2.0})},
+              exec)),
+          b_01(gko::initialize<DenseMtx>(
+              {I<T>({-1.0, 1.0, 1.0}), I<T>({1.0, -1.0, 1.0}),
+               I<T>({1.0, 0.0, 2.0})},
+              exec)),
+          x_0(gko::batch::initialize<MVec>(
+              {{I<T>({2.0, 0.0, 1.0}), I<T>({2.0, 0.0, 2.0})},
+               {I<T>({-2.0, 1.0, 1.0}), I<T>({1.0, -1.0, -1.0})}},
+              exec)),
+          x_00(gko::initialize<DenseMtx>(
+              {I<T>({2.0, 0.0, 1.0}), I<T>({2.0, 0.0, 2.0})}, exec)),
+          x_01(gko::initialize<DenseMtx>(
+              {I<T>({-2.0, 1.0, 1.0}), I<T>({1.0, -1.0, -1.0})}, exec))
+    {}
+
+    std::shared_ptr<const gko::ReferenceExecutor> exec;
+    std::unique_ptr<Mtx> mtx_0;
+    std::unique_ptr<EllMtx> mtx_00;
+    std::unique_ptr<EllMtx> mtx_01;
+    std::unique_ptr<MVec> b_0;
+    std::unique_ptr<DenseMtx> b_00;
+    std::unique_ptr<DenseMtx> b_01;
+    std::unique_ptr<MVec> x_0;
+    std::unique_ptr<DenseMtx> x_00;
+    std::unique_ptr<DenseMtx> x_01;
+
+    std::ranlux48 rand_engine;
+};
+
+
+TYPED_TEST_SUITE(Ell, gko::test::ValueTypes);
+
+
+TYPED_TEST(Ell, AppliesToBatchMultiVector)
+{
+    using T = typename TestFixture::value_type;
+
+    this->mtx_0->apply(this->b_0.get(), this->x_0.get());
+    this->mtx_00->apply(this->b_00.get(), this->x_00.get());
+    this->mtx_01->apply(this->b_01.get(), this->x_01.get());
+
+    auto res = gko::batch::unbatch<gko::batch::MultiVector<T>>(this->x_0.get());
+
+    GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.);
+    GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.);
+}
+
+
+TYPED_TEST(Ell, AppliesLinearCombinationWithSameAlphaToBatchMultiVector)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using MVec = typename TestFixture::MVec;
+    using DenseMtx = typename TestFixture::DenseMtx;
+    using T = typename TestFixture::value_type;
+    auto alpha = gko::batch::initialize<MVec>(2, {1.5}, this->exec);
+    auto beta = gko::batch::initialize<MVec>(2, {-4.0}, this->exec);
+    auto alpha0 = gko::initialize<DenseMtx>({1.5}, this->exec);
+    auto alpha1 = gko::initialize<DenseMtx>({1.5}, this->exec);
+    auto beta0 = gko::initialize<DenseMtx>({-4.0}, this->exec);
+    auto beta1 = gko::initialize<DenseMtx>({-4.0}, this->exec);
+
+    this->mtx_0->apply(alpha.get(), this->b_0.get(), beta.get(),
+                       this->x_0.get());
+    this->mtx_00->apply(alpha0.get(), this->b_00.get(), beta0.get(),
+                        this->x_00.get());
+    this->mtx_01->apply(alpha1.get(), this->b_01.get(), beta1.get(),
+                        this->x_01.get());
+
+    auto res = gko::batch::unbatch<gko::batch::MultiVector<T>>(this->x_0.get());
+
+    GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.);
+    GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.);
+}
+
+
+TYPED_TEST(Ell, AppliesLinearCombinationToBatchMultiVector)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using MVec = typename TestFixture::MVec;
+    using DenseMtx = typename TestFixture::DenseMtx;
+    using T = typename TestFixture::value_type;
+    auto alpha = gko::batch::initialize<MVec>({{1.5}, {-1.0}}, this->exec);
+    auto beta = gko::batch::initialize<MVec>({{2.5}, {-4.0}}, this->exec);
+    auto alpha0 = gko::initialize<DenseMtx>({1.5}, this->exec);
+    auto alpha1 = gko::initialize<DenseMtx>({-1.0}, this->exec);
+    auto beta0 = gko::initialize<DenseMtx>({2.5}, this->exec);
+    auto beta1 = gko::initialize<DenseMtx>({-4.0}, this->exec);
+
+    this->mtx_0->apply(alpha.get(), this->b_0.get(), beta.get(),
+                       this->x_0.get());
+    this->mtx_00->apply(alpha0.get(), this->b_00.get(), beta0.get(),
+                        this->x_00.get());
+    this->mtx_01->apply(alpha1.get(), this->b_01.get(), beta1.get(),
+                        this->x_01.get());
+
+    auto res = gko::batch::unbatch<gko::batch::MultiVector<T>>(this->x_0.get());
+
+    GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.);
+    GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.);
+}
+
+
+TYPED_TEST(Ell, ApplyFailsOnWrongNumberOfResultCols)
+{
+    using MVec = typename TestFixture::MVec;
+    auto res = MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2}});
+
+    ASSERT_THROW(this->mtx_0->apply(this->b_0.get(), res.get()),
+                 gko::DimensionMismatch);
+}
+
+
+TYPED_TEST(Ell, ApplyFailsOnWrongNumberOfResultRows)
+{
+    using MVec = typename TestFixture::MVec;
+    auto res = MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3}});
+
+    ASSERT_THROW(this->mtx_0->apply(this->b_0.get(), res.get()),
+                 gko::DimensionMismatch);
+}
+
+
+TYPED_TEST(Ell, ApplyFailsOnWrongInnerDimension)
+{
+    using MVec = typename TestFixture::MVec;
+    auto res =
+        MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}});
+
+    ASSERT_THROW(this->mtx_0->apply(res.get(), this->x_0.get()),
+                 gko::DimensionMismatch);
+}
+
+
+TYPED_TEST(Ell, AdvancedApplyFailsOnWrongInnerDimension)
+{
+    using MVec = typename TestFixture::MVec;
+    auto res =
+        MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}});
+    auto alpha =
+        MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}});
+    auto beta =
+        MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}});
+
+    ASSERT_THROW(
+        this->mtx_0->apply(alpha.get(), res.get(), beta.get(), this->x_0.get()),
+        gko::DimensionMismatch);
+}
+
+
+TYPED_TEST(Ell, AdvancedApplyFailsOnWrongAlphaDimension)
+{
+    using MVec = typename TestFixture::MVec;
+    auto res =
+        MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3, 3}});
+    auto alpha =
+        MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 1}});
+    auto beta =
+        MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}});
+
+    ASSERT_THROW(
+        this->mtx_0->apply(alpha.get(), res.get(), beta.get(), this->x_0.get()),
+        gko::DimensionMismatch);
+}
diff --git a/test/matrix/CMakeLists.txt b/test/matrix/CMakeLists.txt
index 9f3b17cd858..f1c91e615e7 100644
--- a/test/matrix/CMakeLists.txt
+++ b/test/matrix/CMakeLists.txt
@@ -1,4 +1,5 @@
 ginkgo_create_common_test(batch_dense_kernels)
+ginkgo_create_common_test(batch_ell_kernels DISABLE_EXECUTORS dpcpp)
 ginkgo_create_common_device_test(csr_kernels)
 ginkgo_create_common_test(csr_kernels2)
 ginkgo_create_common_test(coo_kernels)
diff --git a/test/matrix/batch_ell_kernels.cpp b/test/matrix/batch_ell_kernels.cpp
new file mode 100644
index 00000000000..9629a2263ff
--- /dev/null
+++ b/test/matrix/batch_ell_kernels.cpp
@@ -0,0 +1,128 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/batch_ell_kernels.hpp"
+
+
+#include <memory>
+#include <random>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/matrix/batch_ell.hpp>
+
+
+#include "core/base/batch_utilities.hpp"
+#include "core/test/utils.hpp"
+#include "core/test/utils/assertions.hpp"
+#include "core/test/utils/batch_helpers.hpp"
+#include "test/utils/executor.hpp"
+
+
+class Ell : public CommonTestFixture {
+protected:
+    using Mtx = gko::batch::matrix::Ell<value_type>;
+    using MVec = gko::batch::MultiVector<value_type>;
+
+    Ell() : rand_engine(15) {}
+
+    template <typename MtxType>
+    std::unique_ptr<MtxType> gen_mtx(const gko::size_type num_batch_items,
+                                     gko::size_type num_rows,
+                                     gko::size_type num_cols)
+    {
+        return gko::test::generate_random_batch_matrix<MtxType>(
+            num_batch_items, num_rows, num_cols,
+            std::uniform_int_distribution<>(num_cols, num_cols),
+            std::normal_distribution<>(-1.0, 1.0), rand_engine, ref);
+    }
+
+    void set_up_apply_data(gko::size_type num_vecs = 1)
+    {
+        const int num_rows = 252;
+        const int num_cols = 32;
+        x = gen_mtx<Mtx>(batch_size, num_rows, num_cols);
+        y = gen_mtx<MVec>(batch_size, num_cols, num_vecs);
+        alpha = gen_mtx<MVec>(batch_size, 1, 1);
+        beta = gen_mtx<MVec>(batch_size, 1, 1);
+        dx = gko::clone(exec, x);
+        dy = gko::clone(exec, y);
+        dalpha = gko::clone(exec, alpha);
+        dbeta = gko::clone(exec, beta);
+        expected = MVec::create(
+            ref,
+            gko::batch_dim<2>(batch_size, gko::dim<2>{num_rows, num_vecs}));
+        expected->fill(gko::one<value_type>());
+        dresult = gko::clone(exec, expected);
+    }
+
+    std::ranlux48 rand_engine;
+
+    const size_t batch_size = 11;
+    std::unique_ptr<Mtx> x;
+    std::unique_ptr<MVec> y;
+    std::unique_ptr<MVec> alpha;
+    std::unique_ptr<MVec> beta;
+    std::unique_ptr<MVec> expected;
+    std::unique_ptr<MVec> dresult;
+    std::unique_ptr<Mtx> dx;
+    std::unique_ptr<MVec> dy;
+    std::unique_ptr<MVec> dalpha;
+    std::unique_ptr<MVec> dbeta;
+};
+
+
+TEST_F(Ell, SingleVectorApplyIsEquivalentToRef)
+{
+    set_up_apply_data(1);
+
+    x->apply(y.get(), expected.get());
+    dx->apply(dy.get(), dresult.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, r<value_type>::value);
+}
+
+
+TEST_F(Ell, SingleVectorAdvancedApplyIsEquivalentToRef)
+{
+    set_up_apply_data(1);
+
+    x->apply(alpha.get(), y.get(), beta.get(), expected.get());
+    dx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, r<value_type>::value);
+}

From 301fa6ea283f4a8b5e654d1cdcc1ecda2d8ed859 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sun, 8 Oct 2023 12:03:36 +0200
Subject: [PATCH 389/583] Add DPCPP kernels and tests

Co-authored-by: Phuong Nguyen <phuong.nguyen@icl.utk.edu>
---
 core/test/utils/batch_helpers.hpp      |  2 -
 dpcpp/matrix/batch_ell_kernels.dp.cpp  | 84 ++++++++++++++++++++++++--
 dpcpp/matrix/batch_ell_kernels.hpp.inc | 79 ++++++++++++++++++++++++
 dpcpp/matrix/batch_struct.hpp          | 34 +++++++++++
 test/matrix/CMakeLists.txt             |  2 +-
 test/matrix/batch_ell_kernels.cpp      | 26 ++++++--
 6 files changed, 213 insertions(+), 14 deletions(-)
 create mode 100644 dpcpp/matrix/batch_ell_kernels.hpp.inc

diff --git a/core/test/utils/batch_helpers.hpp b/core/test/utils/batch_helpers.hpp
index 4cf9d4973e2..b040691999e 100644
--- a/core/test/utils/batch_helpers.hpp
+++ b/core/test/utils/batch_helpers.hpp
@@ -83,8 +83,6 @@ std::unique_ptr<MatrixType> generate_random_batch_matrix(
         exec, batch_dim<2>(num_batch_items, dim<2>(num_rows, num_cols)),
         std::forward<MatrixArgs>(args)...);
 
-    // TODO: Need to preserve sparsity pattern across batch items for batched
-    // sparse matrix formats
     for (size_type b = 0; b < num_batch_items; b++) {
         auto rand_mat =
             generate_random_matrix<typename MatrixType::unbatch_type>(
diff --git a/dpcpp/matrix/batch_ell_kernels.dp.cpp b/dpcpp/matrix/batch_ell_kernels.dp.cpp
index cdcd5abd024..1ed83d79630 100644
--- a/dpcpp/matrix/batch_ell_kernels.dp.cpp
+++ b/dpcpp/matrix/batch_ell_kernels.dp.cpp
@@ -30,7 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include "core/matrix/batch_dense_kernels.hpp"
+#include "core/matrix/batch_ell_kernels.hpp"
 
 
 #include <algorithm>
@@ -42,7 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/array.hpp>
 #include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/math.hpp>
-#include <ginkgo/core/matrix/batch_dense.hpp>
+#include <ginkgo/core/matrix/batch_ell.hpp>
 
 
 #include "core/base/batch_struct.hpp"
@@ -71,14 +71,48 @@ namespace dpcpp {
 namespace batch_ell {
 
 
-// #include "dpcpp/matrix/batch_dense_kernels.hpp.inc"
+#include "dpcpp/matrix/batch_ell_kernels.hpp.inc"
 
 
 template <typename ValueType, typename IndexType>
 void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
                   const batch::matrix::Ell<ValueType, IndexType>* mat,
                   const batch::MultiVector<ValueType>* b,
-                  batch::MultiVector<ValueType>* x) GKO_NOT_IMPLEMENTED;
+                  batch::MultiVector<ValueType>* x)
+{
+    const size_type num_rows = mat->get_common_size()[0];
+    const size_type num_cols = mat->get_common_size()[1];
+
+    const auto num_batch_items = mat->get_num_batch_items();
+    auto device = exec->get_queue()->get_device();
+    auto group_size =
+        device.get_info<sycl::info::device::max_work_group_size>();
+
+    const dim3 block(group_size);
+    const dim3 grid(num_batch_items);
+    const auto x_ub = get_batch_struct(x);
+    const auto b_ub = get_batch_struct(b);
+    const auto mat_ub = get_batch_struct(mat);
+    if (b_ub.num_rhs > 1) {
+        GKO_NOT_IMPLEMENTED;
+    }
+
+    // Launch a kernel that has nbatches blocks, each block has max group size
+    (exec->get_queue())->submit([&](sycl::handler& cgh) {
+        cgh.parallel_for(
+            sycl_nd_range(grid, block), [=
+        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                            config::warp_size)]] {
+                auto group = item_ct1.get_group();
+                auto group_id = group.get_group_linear_id();
+                const auto mat_b =
+                    batch::matrix::extract_batch_item(mat_ub, group_id);
+                const auto b_b = batch::extract_batch_item(b_ub, group_id);
+                const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                simple_apply_kernel(mat_b, b_b, x_b, item_ct1);
+            });
+    });
+}
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
     GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL);
@@ -90,7 +124,47 @@ void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
                     const batch::matrix::Ell<ValueType, IndexType>* mat,
                     const batch::MultiVector<ValueType>* b,
                     const batch::MultiVector<ValueType>* beta,
-                    batch::MultiVector<ValueType>* x) GKO_NOT_IMPLEMENTED;
+                    batch::MultiVector<ValueType>* x)
+{
+    const auto mat_ub = get_batch_struct(mat);
+    const auto b_ub = get_batch_struct(b);
+    const auto x_ub = get_batch_struct(x);
+    const auto alpha_ub = get_batch_struct(alpha);
+    const auto beta_ub = get_batch_struct(beta);
+
+    if (b_ub.num_rhs > 1) {
+        GKO_NOT_IMPLEMENTED;
+    }
+
+    const auto num_batch_items = mat_ub.num_batch_items;
+    auto device = exec->get_queue()->get_device();
+    auto group_size =
+        device.get_info<sycl::info::device::max_work_group_size>();
+
+    const dim3 block(group_size);
+    const dim3 grid(num_batch_items);
+
+    // Launch a kernel that has nbatches blocks, each block has max group size
+    (exec->get_queue())->submit([&](sycl::handler& cgh) {
+        cgh.parallel_for(
+            sycl_nd_range(grid, block), [=
+        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                            config::warp_size)]] {
+                auto group = item_ct1.get_group();
+                auto group_id = group.get_group_linear_id();
+                const auto mat_b =
+                    batch::matrix::extract_batch_item(mat_ub, group_id);
+                const auto b_b = batch::extract_batch_item(b_ub, group_id);
+                const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                const auto alpha_b =
+                    batch::extract_batch_item(alpha_ub, group_id);
+                const auto beta_b =
+                    batch::extract_batch_item(beta_ub, group_id);
+                advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b,
+                                      item_ct1);
+            });
+    });
+}
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
     GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL);
diff --git a/dpcpp/matrix/batch_ell_kernels.hpp.inc b/dpcpp/matrix/batch_ell_kernels.hpp.inc
new file mode 100644
index 00000000000..1048f2f8ff8
--- /dev/null
+++ b/dpcpp/matrix/batch_ell_kernels.hpp.inc
@@ -0,0 +1,79 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+template <typename ValueType>
+__dpct_inline__ void simple_apply_kernel(
+    const gko::batch::matrix::batch_ell::batch_item<const ValueType>& mat,
+    const gko::batch::multi_vector::batch_item<const ValueType>& b,
+    const gko::batch::multi_vector::batch_item<ValueType>& x,
+    sycl::nd_item<3>& item_ct1)
+{
+    for (int tidx = item_ct1.get_local_linear_id(); tidx < mat.num_rows;
+         tidx += item_ct1.get_local_range().size()) {
+        auto temp = zero<ValueType>();
+        for (size_type idx = 0; idx < mat.num_stored_elems_per_row; idx++) {
+            const auto col_idx = mat.col_idxs[tidx + idx * mat.stride];
+            if (col_idx < idx)
+                break;
+            else
+                temp += mat.values[tidx + idx * mat.stride] *
+                        b.values[col_idx * b.stride];
+        }
+        x.values[tidx * x.stride] = temp;
+    }
+}
+
+
+template <typename ValueType>
+__dpct_inline__ void advanced_apply_kernel(
+    const gko::batch::multi_vector::batch_item<const ValueType>& alpha,
+    const gko::batch::matrix::batch_ell::batch_item<const ValueType>& mat,
+    const gko::batch::multi_vector::batch_item<const ValueType>& b,
+    const gko::batch::multi_vector::batch_item<const ValueType>& beta,
+    const gko::batch::multi_vector::batch_item<ValueType>& x,
+    sycl::nd_item<3>& item_ct1)
+{
+    for (int tidx = item_ct1.get_local_linear_id(); tidx < mat.num_rows;
+         tidx += item_ct1.get_local_range().size()) {
+        auto temp = zero<ValueType>();
+        for (size_type idx = 0; idx < mat.num_stored_elems_per_row; idx++) {
+            const auto col_idx = mat.col_idxs[tidx + idx * mat.stride];
+            if (col_idx < idx)
+                break;
+            else
+                temp += alpha.values[0] * mat.values[tidx + idx * mat.stride] *
+                        b.values[col_idx * b.stride];
+        }
+        x.values[tidx * x.stride] =
+            temp + beta.values[0] * x.values[tidx * x.stride];
+    }
+}
diff --git a/dpcpp/matrix/batch_struct.hpp b/dpcpp/matrix/batch_struct.hpp
index b0393daf55d..35ff1148dd5 100644
--- a/dpcpp/matrix/batch_struct.hpp
+++ b/dpcpp/matrix/batch_struct.hpp
@@ -86,6 +86,40 @@ inline batch::matrix::dense::uniform_batch<ValueType> get_batch_struct(
 }
 
 
+/**
+ * Generates an immutable uniform batch struct from a batch of ell matrices.
+ */
+template <typename ValueType>
+inline batch::matrix::batch_ell::uniform_batch<const ValueType>
+get_batch_struct(const batch::matrix::Ell<ValueType, int32>* const op)
+{
+    return {op->get_const_values(),
+            op->get_const_col_idxs(),
+            op->get_num_batch_items(),
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[1]),
+            static_cast<int>(op->get_num_stored_elements_per_row())};
+}
+
+
+/**
+ * Generates a uniform batch struct from a batch of ell matrices.
+ */
+template <typename ValueType>
+inline batch::matrix::batch_ell::uniform_batch<ValueType> get_batch_struct(
+    batch::matrix::Ell<ValueType, int32>* const op)
+{
+    return {op->get_values(),
+            op->get_col_idxs(),
+            op->get_num_batch_items(),
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[0]),
+            static_cast<int>(op->get_common_size()[1]),
+            static_cast<int>(op->get_num_stored_elements_per_row())};
+}
+
+
 }  // namespace dpcpp
 }  // namespace kernels
 }  // namespace gko
diff --git a/test/matrix/CMakeLists.txt b/test/matrix/CMakeLists.txt
index f1c91e615e7..a03a0a0bb4e 100644
--- a/test/matrix/CMakeLists.txt
+++ b/test/matrix/CMakeLists.txt
@@ -1,5 +1,5 @@
 ginkgo_create_common_test(batch_dense_kernels)
-ginkgo_create_common_test(batch_ell_kernels DISABLE_EXECUTORS dpcpp)
+ginkgo_create_common_test(batch_ell_kernels)
 ginkgo_create_common_device_test(csr_kernels)
 ginkgo_create_common_test(csr_kernels2)
 ginkgo_create_common_test(coo_kernels)
diff --git a/test/matrix/batch_ell_kernels.cpp b/test/matrix/batch_ell_kernels.cpp
index 9629a2263ff..bc1e0c7fb42 100644
--- a/test/matrix/batch_ell_kernels.cpp
+++ b/test/matrix/batch_ell_kernels.cpp
@@ -63,22 +63,36 @@ class Ell : public CommonTestFixture {
     template <typename MtxType>
     std::unique_ptr<MtxType> gen_mtx(const gko::size_type num_batch_items,
                                      gko::size_type num_rows,
-                                     gko::size_type num_cols)
+                                     gko::size_type num_cols,
+                                     int num_elems_per_row)
     {
         return gko::test::generate_random_batch_matrix<MtxType>(
+            num_batch_items, num_rows, num_cols,
+            std::uniform_int_distribution<>(num_elems_per_row,
+                                            num_elems_per_row),
+            std::normal_distribution<>(-1.0, 1.0), rand_engine, ref,
+            num_elems_per_row);
+    }
+
+    std::unique_ptr<MVec> gen_mvec(const gko::size_type num_batch_items,
+                                   gko::size_type num_rows,
+                                   gko::size_type num_cols)
+    {
+        return gko::test::generate_random_batch_matrix<MVec>(
             num_batch_items, num_rows, num_cols,
             std::uniform_int_distribution<>(num_cols, num_cols),
             std::normal_distribution<>(-1.0, 1.0), rand_engine, ref);
     }
 
-    void set_up_apply_data(gko::size_type num_vecs = 1)
+    void set_up_apply_data(gko::size_type num_vecs = 1,
+                           int num_elems_per_row = 5)
     {
         const int num_rows = 252;
         const int num_cols = 32;
-        x = gen_mtx<Mtx>(batch_size, num_rows, num_cols);
-        y = gen_mtx<MVec>(batch_size, num_cols, num_vecs);
-        alpha = gen_mtx<MVec>(batch_size, 1, 1);
-        beta = gen_mtx<MVec>(batch_size, 1, 1);
+        x = gen_mtx<Mtx>(batch_size, num_rows, num_cols, num_elems_per_row);
+        y = gen_mvec(batch_size, num_cols, num_vecs);
+        alpha = gen_mvec(batch_size, 1, 1);
+        beta = gen_mvec(batch_size, 1, 1);
         dx = gko::clone(exec, x);
         dy = gko::clone(exec, y);
         dalpha = gko::clone(exec, alpha);

From 715235cc5f5f4785dff9657dc1543fcca74d2024 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sun, 8 Oct 2023 12:15:00 +0200
Subject: [PATCH 390/583] Update docs

---
 include/ginkgo/core/matrix/batch_ell.hpp | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/include/ginkgo/core/matrix/batch_ell.hpp b/include/ginkgo/core/matrix/batch_ell.hpp
index 490f7a7d4b0..48a3a6d9831 100644
--- a/include/ginkgo/core/matrix/batch_ell.hpp
+++ b/include/ginkgo/core/matrix/batch_ell.hpp
@@ -55,19 +55,15 @@ namespace matrix {
 
 
 /**
- * Ell is a batch matrix format which explicitly stores all values of the
- * matrix in each of the batches.
+ * Ell is a sparse matrix format that stores the same number of nonzeros in each
+ * row, enabling coalesced accesses. It is suitable for sparsity patterns that
+ * have a similar number of nonzeros in every row. The values are stored in a
+ * column-major fashion similar to the monolithic gko::matrix::Ell class. It is
+ * also assumed that the sparsity pattern of all the items in the batch is the
+ * same and therefore only a single copy of the sparsity pattern is stored.
  *
- * The values in each of the batches are stored in row-major format (values
- * belonging to the same row appear consecutive in the memory and the values of
- * each batch item are also stored consecutively in memory).
- *
- * @note Though the storage layout is similar to the multi-vector object, the
- * class semantics and the operations it aims to provide is different. Hence it
- * is recommended to create multi-vector objects if the user means to view the
- * data as a set of vectors.
- *
- * @tparam ValueType  precision of matrix elements
+ * @tparam ValueType  value precision of matrix elements
+ * @tparam IndexType  index precision of matrix elements
  *
  * @ingroup batch_ell
  * @ingroup mat_formats

From 5cdcedef194ee25d2ca60627bfca7e076aa08e70 Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Sun, 8 Oct 2023 10:49:57 +0000
Subject: [PATCH 391/583] Format files

Co-authored-by: Pratik Nayak <pratikvn@pm.me>
---
 dpcpp/matrix/batch_ell_kernels.dp.cpp         | 54 +++++++++----------
 .../ginkgo/core/base/batch_multi_vector.hpp   |  4 +-
 include/ginkgo/core/matrix/batch_ell.hpp      |  8 +--
 include/ginkgo/ginkgo.hpp                     |  1 +
 4 files changed, 34 insertions(+), 33 deletions(-)

diff --git a/dpcpp/matrix/batch_ell_kernels.dp.cpp b/dpcpp/matrix/batch_ell_kernels.dp.cpp
index 1ed83d79630..1d1210cc270 100644
--- a/dpcpp/matrix/batch_ell_kernels.dp.cpp
+++ b/dpcpp/matrix/batch_ell_kernels.dp.cpp
@@ -100,17 +100,17 @@ void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
     // Launch a kernel that has nbatches blocks, each block has max group size
     (exec->get_queue())->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=
-        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                            config::warp_size)]] {
-                auto group = item_ct1.get_group();
-                auto group_id = group.get_group_linear_id();
-                const auto mat_b =
-                    batch::matrix::extract_batch_item(mat_ub, group_id);
-                const auto b_b = batch::extract_batch_item(b_ub, group_id);
-                const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                simple_apply_kernel(mat_b, b_b, x_b, item_ct1);
-            });
+            sycl_nd_range(grid, block),
+            [=](sycl::nd_item<3> item_ct1)
+                [[sycl::reqd_sub_group_size(config::warp_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto mat_b =
+                        batch::matrix::extract_batch_item(mat_ub, group_id);
+                    const auto b_b = batch::extract_batch_item(b_ub, group_id);
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    simple_apply_kernel(mat_b, b_b, x_b, item_ct1);
+                });
     });
 }
 
@@ -147,22 +147,22 @@ void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
     // Launch a kernel that has nbatches blocks, each block has max group size
     (exec->get_queue())->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=
-        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                            config::warp_size)]] {
-                auto group = item_ct1.get_group();
-                auto group_id = group.get_group_linear_id();
-                const auto mat_b =
-                    batch::matrix::extract_batch_item(mat_ub, group_id);
-                const auto b_b = batch::extract_batch_item(b_ub, group_id);
-                const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                const auto alpha_b =
-                    batch::extract_batch_item(alpha_ub, group_id);
-                const auto beta_b =
-                    batch::extract_batch_item(beta_ub, group_id);
-                advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b,
-                                      item_ct1);
-            });
+            sycl_nd_range(grid, block),
+            [=](sycl::nd_item<3> item_ct1)
+                [[sycl::reqd_sub_group_size(config::warp_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto mat_b =
+                        batch::matrix::extract_batch_item(mat_ub, group_id);
+                    const auto b_b = batch::extract_batch_item(b_ub, group_id);
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    const auto alpha_b =
+                        batch::extract_batch_item(alpha_ub, group_id);
+                    const auto beta_b =
+                        batch::extract_batch_item(beta_ub, group_id);
+                    advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b,
+                                          item_ct1);
+                });
     });
 }
 
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index 45ba0686468..9a4b8d5cf1d 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -212,8 +212,8 @@ class MultiVector
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const value_type* get_const_values_for_item(size_type batch_id) const
-        noexcept
+    const value_type* get_const_values_for_item(
+        size_type batch_id) const noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
         return values_.get_const_data() + this->get_cumulative_offset(batch_id);
diff --git a/include/ginkgo/core/matrix/batch_ell.hpp b/include/ginkgo/core/matrix/batch_ell.hpp
index 48a3a6d9831..5cb5f73dec5 100644
--- a/include/ginkgo/core/matrix/batch_ell.hpp
+++ b/include/ginkgo/core/matrix/batch_ell.hpp
@@ -214,8 +214,8 @@ class Ell final
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const index_type* get_const_col_idxs_for_item(size_type batch_id) const
-        noexcept
+    const index_type* get_const_col_idxs_for_item(
+        size_type batch_id) const noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
         return col_idxs_.get_const_data();
@@ -243,8 +243,8 @@ class Ell final
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const value_type* get_const_values_for_item(size_type batch_id) const
-        noexcept
+    const value_type* get_const_values_for_item(
+        size_type batch_id) const noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
         return values_.get_const_data() +
diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp
index 8bb29242e88..ad90e264189 100644
--- a/include/ginkgo/ginkgo.hpp
+++ b/include/ginkgo/ginkgo.hpp
@@ -109,6 +109,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/log/stream.hpp>
 
 #include <ginkgo/core/matrix/batch_dense.hpp>
+#include <ginkgo/core/matrix/batch_ell.hpp>
 #include <ginkgo/core/matrix/coo.hpp>
 #include <ginkgo/core/matrix/csr.hpp>
 #include <ginkgo/core/matrix/dense.hpp>

From a2c8e6551912ddbbe4ad0f7cabae3a2567e9b455 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 10 Oct 2023 17:15:48 +0200
Subject: [PATCH 392/583] Some general fixes.

---
 .../cuda_hip/matrix/batch_ell_kernels.hpp.inc | 13 ++-
 core/matrix/batch_ell.cpp                     | 32 +------
 core/matrix/batch_struct.hpp                  | 20 ++---
 core/test/matrix/batch_ell.cpp                |  8 +-
 cuda/matrix/batch_dense_kernels.cu            |  3 +-
 cuda/matrix/batch_ell_kernels.cu              |  6 +-
 cuda/matrix/batch_struct.hpp                  | 23 ++---
 dpcpp/matrix/batch_ell_kernels.dp.cpp         | 62 +++++++------
 dpcpp/matrix/batch_ell_kernels.hpp.inc        |  4 +-
 dpcpp/matrix/batch_struct.hpp                 | 23 ++---
 hip/matrix/batch_ell_kernels.hip.cpp          |  6 +-
 hip/matrix/batch_struct.hip.hpp               | 23 ++---
 include/ginkgo/core/matrix/batch_ell.hpp      |  8 --
 omp/matrix/batch_dense_kernels.cpp            |  4 +-
 omp/matrix/batch_ell_kernels.cpp              |  4 +-
 reference/matrix/batch_dense_kernels.cpp      |  5 +-
 reference/matrix/batch_ell_kernels.cpp        |  5 +-
 reference/matrix/batch_ell_kernels.hpp.inc    |  4 +-
 reference/matrix/batch_struct.hpp             | 22 ++---
 reference/test/matrix/batch_ell_kernels.cpp   | 87 ++++++-------------
 test/matrix/batch_ell_kernels.cpp             | 59 ++++++-------
 test/test_install/test_install.cpp            |  9 +-
 22 files changed, 184 insertions(+), 246 deletions(-)

diff --git a/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc b/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc
index e55e7a60471..5c00358c5a0 100644
--- a/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc
+++ b/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc
@@ -33,7 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 template <typename ValueType>
 __device__ __forceinline__ void simple_apply(
-    const gko::batch::matrix::batch_ell::batch_item<const ValueType>& mat,
+    const gko::batch::matrix::ell::batch_item<const ValueType>& mat,
     const ValueType* const __restrict__ b, ValueType* const __restrict__ x)
 {
     const auto num_rows = mat.num_rows;
@@ -60,7 +60,7 @@ template <typename ValueType>
 __global__ __launch_bounds__(
     default_block_size,
     sm_oversubscription) void simple_apply_kernel(const gko::batch::matrix::
-                                                      batch_ell::uniform_batch<
+                                                      ell::uniform_batch<
                                                           const ValueType>
                                                           mat,
                                                   const gko::batch::
@@ -88,7 +88,7 @@ __global__ __launch_bounds__(
 template <typename ValueType>
 __device__ __forceinline__ void advanced_apply(
     const ValueType alpha,
-    const gko::batch::matrix::batch_ell::batch_item<const ValueType>& mat,
+    const gko::batch::matrix::ell::batch_item<const ValueType>& mat,
     const ValueType* const __restrict__ b, const ValueType beta,
     ValueType* const __restrict__ x)
 {
@@ -121,10 +121,9 @@ __global__ __launch_bounds__(
                                                                 const ValueType>
                                                                 alpha,
                                                     const gko::batch::matrix::
-                                                        batch_ell::
-                                                            uniform_batch<
-                                                                const ValueType>
-                                                                mat,
+                                                        ell::uniform_batch<
+                                                            const ValueType>
+                                                            mat,
                                                     const gko::batch::
                                                         multi_vector::
                                                             uniform_batch<
diff --git a/core/matrix/batch_ell.cpp b/core/matrix/batch_ell.cpp
index 0d903b10968..f421fdf2b49 100644
--- a/core/matrix/batch_ell.cpp
+++ b/core/matrix/batch_ell.cpp
@@ -104,22 +104,10 @@ template <typename ValueType, typename IndexType>
 std::unique_ptr<Ell<ValueType, IndexType>>
 Ell<ValueType, IndexType>::create_with_config_of(
     ptr_param<const Ell<ValueType, IndexType>> other)
-{
-    // De-referencing `other` before calling the functions (instead of
-    // using operator `->`) is currently required to be compatible with
-    // CUDA 10.1.
-    // Otherwise, it results in a compile error.
-    return (*other).create_with_same_config();
-}
-
-
-template <typename ValueType, typename IndexType>
-std::unique_ptr<Ell<ValueType, IndexType>>
-Ell<ValueType, IndexType>::create_with_same_config() const
 {
     return Ell<ValueType, IndexType>::create(
-        this->get_executor(), this->get_size(),
-        this->get_num_stored_elements_per_row());
+        other->get_executor(), other->get_size(),
+        other->get_num_stored_elements_per_row());
 }
 
 
@@ -163,12 +151,7 @@ template <typename ValueType, typename IndexType>
 void Ell<ValueType, IndexType>::apply_impl(const MultiVector<ValueType>* b,
                                            MultiVector<ValueType>* x) const
 {
-    GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items());
-    GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items());
-
-    GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size());
-    GKO_ASSERT_EQUAL_ROWS(this->get_common_size(), x->get_common_size());
-    GKO_ASSERT_EQUAL_COLS(b->get_common_size(), x->get_common_size());
+    this->validate_application_parameters(b, x);
     this->get_executor()->run(ell::make_simple_apply(this, b, x));
 }
 
@@ -179,14 +162,7 @@ void Ell<ValueType, IndexType>::apply_impl(const MultiVector<ValueType>* alpha,
                                            const MultiVector<ValueType>* beta,
                                            MultiVector<ValueType>* x) const
 {
-    GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items());
-    GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items());
-
-    GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size());
-    GKO_ASSERT_EQUAL_ROWS(this->get_common_size(), x->get_common_size());
-    GKO_ASSERT_EQUAL_COLS(b->get_common_size(), x->get_common_size());
-    GKO_ASSERT_EQUAL_DIMENSIONS(alpha->get_common_size(), gko::dim<2>(1, 1));
-    GKO_ASSERT_EQUAL_DIMENSIONS(beta->get_common_size(), gko::dim<2>(1, 1));
+    this->validate_application_parameters(alpha, b, beta, x);
     this->get_executor()->run(
         ell::make_advanced_apply(alpha, this, b, beta, x));
 }
diff --git a/core/matrix/batch_struct.hpp b/core/matrix/batch_struct.hpp
index 2eed40882bc..eeeeebd53d6 100644
--- a/core/matrix/batch_struct.hpp
+++ b/core/matrix/batch_struct.hpp
@@ -83,7 +83,7 @@ struct uniform_batch {
 }  // namespace dense
 
 
-namespace batch_ell {
+namespace ell {
 
 
 /**
@@ -109,7 +109,7 @@ struct batch_item {
 template <typename ValueType>
 struct uniform_batch {
     using value_type = ValueType;
-    using index_type = int;
+    using index_type = int32;
     using entry_type = batch_item<value_type>;
 
     ValueType* values;
@@ -127,7 +127,7 @@ struct uniform_batch {
 };
 
 
-}  // namespace batch_ell
+}  // namespace ell
 
 
 template <typename ValueType>
@@ -165,8 +165,8 @@ GKO_ATTRIBUTES GKO_INLINE dense::batch_item<ValueType> extract_batch_item(
 
 
 template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE batch_ell::batch_item<const ValueType> to_const(
-    const batch_ell::batch_item<ValueType>& b)
+GKO_ATTRIBUTES GKO_INLINE ell::batch_item<const ValueType> to_const(
+    const ell::batch_item<ValueType>& b)
 {
     return {b.values,   b.col_idxs, b.stride,
             b.num_rows, b.num_cols, b.num_stored_elems_per_row};
@@ -174,8 +174,8 @@ GKO_ATTRIBUTES GKO_INLINE batch_ell::batch_item<const ValueType> to_const(
 
 
 template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE batch_ell::uniform_batch<const ValueType> to_const(
-    const batch_ell::uniform_batch<ValueType>& ub)
+GKO_ATTRIBUTES GKO_INLINE ell::uniform_batch<const ValueType> to_const(
+    const ell::uniform_batch<ValueType>& ub)
 {
     return {ub.values,   ub.col_idxs, ub.num_batch_items,         ub.stride,
             ub.num_rows, ub.num_cols, ub.num_stored_elems_per_row};
@@ -183,8 +183,8 @@ GKO_ATTRIBUTES GKO_INLINE batch_ell::uniform_batch<const ValueType> to_const(
 
 
 template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE batch_ell::batch_item<ValueType> extract_batch_item(
-    const batch_ell::uniform_batch<ValueType>& batch, const size_type batch_idx)
+GKO_ATTRIBUTES GKO_INLINE ell::batch_item<ValueType> extract_batch_item(
+    const ell::uniform_batch<ValueType>& batch, const size_type batch_idx)
 {
     return {batch.values +
                 batch_idx * batch.num_stored_elems_per_row * batch.num_rows,
@@ -196,7 +196,7 @@ GKO_ATTRIBUTES GKO_INLINE batch_ell::batch_item<ValueType> extract_batch_item(
 }
 
 template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE batch_ell::batch_item<ValueType> extract_batch_item(
+GKO_ATTRIBUTES GKO_INLINE ell::batch_item<ValueType> extract_batch_item(
     ValueType* const batch_values, int* const batch_col_idxs, const int stride,
     const int num_rows, const int num_cols, int num_elems_per_row,
     const size_type batch_idx)
diff --git a/core/test/matrix/batch_ell.cpp b/core/test/matrix/batch_ell.cpp
index 2830705bf5f..e4dcab23917 100644
--- a/core/test/matrix/batch_ell.cpp
+++ b/core/test/matrix/batch_ell.cpp
@@ -144,6 +144,7 @@ TYPED_TEST(Ell, SparseMtxKnowsItsSizeAndValues)
 TYPED_TEST(Ell, CanBeEmpty)
 {
     auto empty = gko::batch::matrix::Ell<TypeParam>::create(this->exec);
+
     this->assert_empty(empty.get());
 }
 
@@ -151,6 +152,7 @@ TYPED_TEST(Ell, CanBeEmpty)
 TYPED_TEST(Ell, ReturnsNullValuesArrayWhenEmpty)
 {
     auto empty = gko::batch::matrix::Ell<TypeParam>::create(this->exec);
+
     ASSERT_EQ(empty->get_const_values(), nullptr);
 }
 
@@ -284,7 +286,6 @@ TYPED_TEST(Ell, CanBeConstructedFromEllMatrices)
     using value_type = typename TestFixture::value_type;
     using EllMtx = typename TestFixture::EllMtx;
     using size_type = gko::size_type;
-
     auto mat1 = gko::initialize<EllMtx>({{-1.0, 0.0, 0.0}, {0.0, 2.5, 3.5}},
                                         this->exec);
     auto mat2 =
@@ -304,15 +305,14 @@ TYPED_TEST(Ell, CanBeConstructedFromEllMatricesByDuplication)
     using index_type = int;
     using EllMtx = typename TestFixture::EllMtx;
     using size_type = gko::size_type;
-
     auto mat1 =
         gko::initialize<EllMtx>({{1.0, 0.0, 0.0}, {0.0, 2.0, 0.0}}, this->exec);
-
     auto bat_m =
         gko::batch::create_from_item<gko::batch::matrix::Ell<value_type>>(
             this->exec,
             std::vector<EllMtx*>{mat1.get(), mat1.get(), mat1.get()},
             mat1->get_num_stored_elements_per_row());
+
     auto m = gko::batch::create_from_item<gko::batch::matrix::Ell<value_type>>(
         this->exec, 3, mat1.get(), mat1->get_num_stored_elements_per_row());
 
@@ -326,7 +326,6 @@ TYPED_TEST(Ell, CanBeConstructedByDuplicatingEllMatrices)
     using index_type = int;
     using EllMtx = typename TestFixture::EllMtx;
     using size_type = gko::size_type;
-
     auto mat1 = gko::initialize<EllMtx>({{-1.0, 0.0, 0.0}, {0.0, 2.5, 0.0}},
                                         this->exec);
     auto mat2 =
@@ -372,6 +371,7 @@ TYPED_TEST(Ell, CanBeListConstructed)
 {
     using value_type = typename TestFixture::value_type;
     using index_type = int;
+
     auto m = gko::batch::initialize<gko::batch::matrix::Ell<TypeParam>>(
         {{0.0, -1.0}, {1.0, 0.0}}, this->exec);
 
diff --git a/cuda/matrix/batch_dense_kernels.cu b/cuda/matrix/batch_dense_kernels.cu
index dd82e15b8cc..c693a3ae861 100644
--- a/cuda/matrix/batch_dense_kernels.cu
+++ b/cuda/matrix/batch_dense_kernels.cu
@@ -36,7 +36,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <thrust/functional.h>
 
 
-#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/matrix/batch_dense.hpp>
 
 
 #include "core/base/batch_struct.hpp"
diff --git a/cuda/matrix/batch_ell_kernels.cu b/cuda/matrix/batch_ell_kernels.cu
index ee6a99f04ca..6dd268a2d8e 100644
--- a/cuda/matrix/batch_ell_kernels.cu
+++ b/cuda/matrix/batch_ell_kernels.cu
@@ -34,18 +34,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <thrust/functional.h>
-#include <thrust/transform.h>
 
 
-#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/matrix/batch_ell.hpp>
 
 
 #include "core/base/batch_struct.hpp"
 #include "core/matrix/batch_struct.hpp"
 #include "cuda/base/batch_struct.hpp"
 #include "cuda/base/config.hpp"
-#include "cuda/base/cublas_bindings.hpp"
-#include "cuda/base/pointer_mode_guard.hpp"
 #include "cuda/base/thrust.cuh"
 #include "cuda/components/cooperative_groups.cuh"
 #include "cuda/components/reduction.cuh"
diff --git a/cuda/matrix/batch_struct.hpp b/cuda/matrix/batch_struct.hpp
index 7a6a4ac7f00..e2db1ea6e97 100644
--- a/cuda/matrix/batch_struct.hpp
+++ b/cuda/matrix/batch_struct.hpp
@@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/matrix/batch_dense.hpp>
+#include <ginkgo/core/matrix/batch_ell.hpp>
 
 
 #include "core/base/batch_struct.hpp"
@@ -91,16 +92,16 @@ get_batch_struct(batch::matrix::Dense<ValueType>* const op)
  * Generates an immutable uniform batch struct from a batch of ell matrices.
  */
 template <typename ValueType>
-inline batch::matrix::batch_ell::uniform_batch<const cuda_type<ValueType>>
+inline batch::matrix::ell::uniform_batch<const cuda_type<ValueType>>
 get_batch_struct(const batch::matrix::Ell<ValueType, int32>* const op)
 {
     return {as_cuda_type(op->get_const_values()),
             op->get_const_col_idxs(),
             op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_num_stored_elements_per_row())};
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_num_stored_elements_per_row())};
 }
 
 
@@ -108,16 +109,16 @@ get_batch_struct(const batch::matrix::Ell<ValueType, int32>* const op)
  * Generates a uniform batch struct from a batch of ell matrices.
  */
 template <typename ValueType>
-inline batch::matrix::batch_ell::uniform_batch<cuda_type<ValueType>>
-get_batch_struct(batch::matrix::Ell<ValueType, int32>* const op)
+inline batch::matrix::ell::uniform_batch<cuda_type<ValueType>> get_batch_struct(
+    batch::matrix::Ell<ValueType, int32>* const op)
 {
     return {as_cuda_type(op->get_values()),
             op->get_col_idxs(),
             op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_num_stored_elements_per_row())};
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_num_stored_elements_per_row())};
 }
 
 
diff --git a/dpcpp/matrix/batch_ell_kernels.dp.cpp b/dpcpp/matrix/batch_ell_kernels.dp.cpp
index 1d1210cc270..fca265eceb0 100644
--- a/dpcpp/matrix/batch_ell_kernels.dp.cpp
+++ b/dpcpp/matrix/batch_ell_kernels.dp.cpp
@@ -39,17 +39,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <CL/sycl.hpp>
 
 
-#include <ginkgo/core/base/array.hpp>
 #include <ginkgo/core/base/batch_multi_vector.hpp>
-#include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/matrix/batch_ell.hpp>
 
 
 #include "core/base/batch_struct.hpp"
-#include "core/components/prefix_sum_kernels.hpp"
 #include "core/matrix/batch_struct.hpp"
 #include "dpcpp/base/batch_struct.hpp"
-#include "dpcpp/base/config.hpp"
 #include "dpcpp/base/dim3.dp.hpp"
 #include "dpcpp/base/dpct.hpp"
 #include "dpcpp/base/helper.hpp"
@@ -98,19 +94,19 @@ void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
     }
 
     // Launch a kernel that has nbatches blocks, each block has max group size
-    (exec->get_queue())->submit([&](sycl::handler& cgh) {
+    exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1)
-                [[sycl::reqd_sub_group_size(config::warp_size)]] {
-                    auto group = item_ct1.get_group();
-                    auto group_id = group.get_group_linear_id();
-                    const auto mat_b =
-                        batch::matrix::extract_batch_item(mat_ub, group_id);
-                    const auto b_b = batch::extract_batch_item(b_ub, group_id);
-                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                    simple_apply_kernel(mat_b, b_b, x_b, item_ct1);
-                });
+            sycl_nd_range(grid, block), [=
+        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                            config::warp_size)]] {
+                auto group = item_ct1.get_group();
+                auto group_id = group.get_group_linear_id();
+                const auto mat_b =
+                    batch::matrix::extract_batch_item(mat_ub, group_id);
+                const auto b_b = batch::extract_batch_item(b_ub, group_id);
+                const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                simple_apply_kernel(mat_b, b_b, x_b, item_ct1);
+            });
     });
 }
 
@@ -145,24 +141,24 @@ void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
     const dim3 grid(num_batch_items);
 
     // Launch a kernel that has nbatches blocks, each block has max group size
-    (exec->get_queue())->submit([&](sycl::handler& cgh) {
+    exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1)
-                [[sycl::reqd_sub_group_size(config::warp_size)]] {
-                    auto group = item_ct1.get_group();
-                    auto group_id = group.get_group_linear_id();
-                    const auto mat_b =
-                        batch::matrix::extract_batch_item(mat_ub, group_id);
-                    const auto b_b = batch::extract_batch_item(b_ub, group_id);
-                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                    const auto alpha_b =
-                        batch::extract_batch_item(alpha_ub, group_id);
-                    const auto beta_b =
-                        batch::extract_batch_item(beta_ub, group_id);
-                    advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b,
-                                          item_ct1);
-                });
+            sycl_nd_range(grid, block), [=
+        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                            config::warp_size)]] {
+                auto group = item_ct1.get_group();
+                auto group_id = group.get_group_linear_id();
+                const auto mat_b =
+                    batch::matrix::extract_batch_item(mat_ub, group_id);
+                const auto b_b = batch::extract_batch_item(b_ub, group_id);
+                const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                const auto alpha_b =
+                    batch::extract_batch_item(alpha_ub, group_id);
+                const auto beta_b =
+                    batch::extract_batch_item(beta_ub, group_id);
+                advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b,
+                                      item_ct1);
+            });
     });
 }
 
diff --git a/dpcpp/matrix/batch_ell_kernels.hpp.inc b/dpcpp/matrix/batch_ell_kernels.hpp.inc
index 1048f2f8ff8..7500ae9e060 100644
--- a/dpcpp/matrix/batch_ell_kernels.hpp.inc
+++ b/dpcpp/matrix/batch_ell_kernels.hpp.inc
@@ -32,7 +32,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 template <typename ValueType>
 __dpct_inline__ void simple_apply_kernel(
-    const gko::batch::matrix::batch_ell::batch_item<const ValueType>& mat,
+    const gko::batch::matrix::ell::batch_item<const ValueType>& mat,
     const gko::batch::multi_vector::batch_item<const ValueType>& b,
     const gko::batch::multi_vector::batch_item<ValueType>& x,
     sycl::nd_item<3>& item_ct1)
@@ -56,7 +56,7 @@ __dpct_inline__ void simple_apply_kernel(
 template <typename ValueType>
 __dpct_inline__ void advanced_apply_kernel(
     const gko::batch::multi_vector::batch_item<const ValueType>& alpha,
-    const gko::batch::matrix::batch_ell::batch_item<const ValueType>& mat,
+    const gko::batch::matrix::ell::batch_item<const ValueType>& mat,
     const gko::batch::multi_vector::batch_item<const ValueType>& b,
     const gko::batch::multi_vector::batch_item<const ValueType>& beta,
     const gko::batch::multi_vector::batch_item<ValueType>& x,
diff --git a/dpcpp/matrix/batch_struct.hpp b/dpcpp/matrix/batch_struct.hpp
index 35ff1148dd5..f857653e05e 100644
--- a/dpcpp/matrix/batch_struct.hpp
+++ b/dpcpp/matrix/batch_struct.hpp
@@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/matrix/batch_dense.hpp>
+#include <ginkgo/core/matrix/batch_ell.hpp>
 
 
 #include "core/base/batch_struct.hpp"
@@ -90,16 +91,16 @@ inline batch::matrix::dense::uniform_batch<ValueType> get_batch_struct(
  * Generates an immutable uniform batch struct from a batch of ell matrices.
  */
 template <typename ValueType>
-inline batch::matrix::batch_ell::uniform_batch<const ValueType>
-get_batch_struct(const batch::matrix::Ell<ValueType, int32>* const op)
+inline batch::matrix::ell::uniform_batch<const ValueType> get_batch_struct(
+    const batch::matrix::Ell<ValueType, int32>* const op)
 {
     return {op->get_const_values(),
             op->get_const_col_idxs(),
             op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_num_stored_elements_per_row())};
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_num_stored_elements_per_row())};
 }
 
 
@@ -107,16 +108,16 @@ get_batch_struct(const batch::matrix::Ell<ValueType, int32>* const op)
  * Generates a uniform batch struct from a batch of ell matrices.
  */
 template <typename ValueType>
-inline batch::matrix::batch_ell::uniform_batch<ValueType> get_batch_struct(
+inline batch::matrix::ell::uniform_batch<ValueType> get_batch_struct(
     batch::matrix::Ell<ValueType, int32>* const op)
 {
     return {op->get_values(),
             op->get_col_idxs(),
             op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_num_stored_elements_per_row())};
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_num_stored_elements_per_row())};
 }
 
 
diff --git a/hip/matrix/batch_ell_kernels.hip.cpp b/hip/matrix/batch_ell_kernels.hip.cpp
index fdd52c38f57..5c6d5179a21 100644
--- a/hip/matrix/batch_ell_kernels.hip.cpp
+++ b/hip/matrix/batch_ell_kernels.hip.cpp
@@ -35,18 +35,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <hip/hip_runtime.h>
 #include <thrust/functional.h>
-#include <thrust/transform.h>
 
 
-#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/matrix/batch_ell.hpp>
 
 
 #include "core/base/batch_struct.hpp"
 #include "core/matrix/batch_struct.hpp"
 #include "hip/base/batch_struct.hip.hpp"
 #include "hip/base/config.hip.hpp"
-#include "hip/base/hipblas_bindings.hip.hpp"
-#include "hip/base/pointer_mode_guard.hip.hpp"
 #include "hip/base/thrust.hip.hpp"
 #include "hip/components/cooperative_groups.hip.hpp"
 #include "hip/components/reduction.hip.hpp"
diff --git a/hip/matrix/batch_struct.hip.hpp b/hip/matrix/batch_struct.hip.hpp
index a43d7d058b0..6f15b2d966a 100644
--- a/hip/matrix/batch_struct.hip.hpp
+++ b/hip/matrix/batch_struct.hip.hpp
@@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/matrix/batch_dense.hpp>
+#include <ginkgo/core/matrix/batch_ell.hpp>
 
 
 #include "core/base/batch_struct.hpp"
@@ -91,16 +92,16 @@ get_batch_struct(batch::matrix::Dense<ValueType>* const op)
  * Generates an immutable uniform batch struct from a batch of ell matrices.
  */
 template <typename ValueType>
-inline batch::matrix::batch_ell::uniform_batch<const hip_type<ValueType>>
+inline batch::matrix::ell::uniform_batch<const hip_type<ValueType>>
 get_batch_struct(const batch::matrix::Ell<ValueType, int32>* const op)
 {
     return {as_hip_type(op->get_const_values()),
             op->get_const_col_idxs(),
             op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_num_stored_elements_per_row())};
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_num_stored_elements_per_row())};
 }
 
 
@@ -108,16 +109,16 @@ get_batch_struct(const batch::matrix::Ell<ValueType, int32>* const op)
  * Generates a uniform batch struct from a batch of ell matrices.
  */
 template <typename ValueType>
-inline batch::matrix::batch_ell::uniform_batch<hip_type<ValueType>>
-get_batch_struct(batch::matrix::Ell<ValueType, int32>* const op)
+inline batch::matrix::ell::uniform_batch<hip_type<ValueType>> get_batch_struct(
+    batch::matrix::Ell<ValueType, int32>* const op)
 {
     return {as_hip_type(op->get_values()),
             op->get_col_idxs(),
             op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_num_stored_elements_per_row())};
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_num_stored_elements_per_row())};
 }
 
 
diff --git a/include/ginkgo/core/matrix/batch_ell.hpp b/include/ginkgo/core/matrix/batch_ell.hpp
index 5cb5f73dec5..6f3db1bb96b 100644
--- a/include/ginkgo/core/matrix/batch_ell.hpp
+++ b/include/ginkgo/core/matrix/batch_ell.hpp
@@ -356,14 +356,6 @@ class Ell final
                       col_idxs_.get_num_elems());
     }
 
-    /**
-     * Creates a Ell matrix with the same configuration as the callers
-     * matrix.
-     *
-     * @returns a Ell matrix with the same configuration as the caller.
-     */
-    std::unique_ptr<Ell> create_with_same_config() const;
-
     void apply_impl(const MultiVector<value_type>* b,
                     MultiVector<value_type>* x) const;
 
diff --git a/omp/matrix/batch_dense_kernels.cpp b/omp/matrix/batch_dense_kernels.cpp
index 2d0b7ed4d40..b91a4133dba 100644
--- a/omp/matrix/batch_dense_kernels.cpp
+++ b/omp/matrix/batch_dense_kernels.cpp
@@ -36,8 +36,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <algorithm>
 
 
-#include <ginkgo/core/base/array.hpp>
-#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/matrix/batch_dense.hpp>
 
 
 #include "core/base/batch_struct.hpp"
diff --git a/omp/matrix/batch_ell_kernels.cpp b/omp/matrix/batch_ell_kernels.cpp
index 20ea4614e7d..17710a97366 100644
--- a/omp/matrix/batch_ell_kernels.cpp
+++ b/omp/matrix/batch_ell_kernels.cpp
@@ -36,8 +36,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <algorithm>
 
 
-#include <ginkgo/core/base/array.hpp>
-#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/matrix/batch_dense.hpp>
 
 
 #include "core/base/batch_struct.hpp"
diff --git a/reference/matrix/batch_dense_kernels.cpp b/reference/matrix/batch_dense_kernels.cpp
index 3d7ef03a3bd..87d73bb8e34 100644
--- a/reference/matrix/batch_dense_kernels.cpp
+++ b/reference/matrix/batch_dense_kernels.cpp
@@ -36,9 +36,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <algorithm>
 
 
-#include <ginkgo/core/base/array.hpp>
-#include <ginkgo/core/base/math.hpp>
-#include <ginkgo/core/base/range_accessors.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/matrix/batch_dense.hpp>
 
 
 #include "core/base/batch_struct.hpp"
diff --git a/reference/matrix/batch_ell_kernels.cpp b/reference/matrix/batch_ell_kernels.cpp
index a3f69827c02..1d3a0e1ef94 100644
--- a/reference/matrix/batch_ell_kernels.cpp
+++ b/reference/matrix/batch_ell_kernels.cpp
@@ -36,9 +36,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <algorithm>
 
 
-#include <ginkgo/core/base/array.hpp>
-#include <ginkgo/core/base/math.hpp>
-#include <ginkgo/core/base/range_accessors.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/matrix/batch_ell.hpp>
 
 
 #include "core/base/batch_struct.hpp"
diff --git a/reference/matrix/batch_ell_kernels.hpp.inc b/reference/matrix/batch_ell_kernels.hpp.inc
index 41d0a00ddcd..44de2a57af9 100644
--- a/reference/matrix/batch_ell_kernels.hpp.inc
+++ b/reference/matrix/batch_ell_kernels.hpp.inc
@@ -32,7 +32,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 template <typename ValueType>
 inline void simple_apply_kernel(
-    const gko::batch::matrix::batch_ell::batch_item<const ValueType>& a,
+    const gko::batch::matrix::ell::batch_item<const ValueType>& a,
     const gko::batch::multi_vector::batch_item<const ValueType>& b,
     const gko::batch::multi_vector::batch_item<ValueType>& c)
 {
@@ -55,7 +55,7 @@ inline void simple_apply_kernel(
 template <typename ValueType>
 inline void advanced_apply_kernel(
     const ValueType alpha,
-    const gko::batch::matrix::batch_ell::batch_item<const ValueType>& a,
+    const gko::batch::matrix::ell::batch_item<const ValueType>& a,
     const gko::batch::multi_vector::batch_item<const ValueType>& b,
     const ValueType beta,
     const gko::batch::multi_vector::batch_item<ValueType>& c)
diff --git a/reference/matrix/batch_struct.hpp b/reference/matrix/batch_struct.hpp
index 3b562450ee0..fb0e08c16f5 100644
--- a/reference/matrix/batch_struct.hpp
+++ b/reference/matrix/batch_struct.hpp
@@ -95,16 +95,16 @@ inline batch::matrix::dense::uniform_batch<ValueType> get_batch_struct(
  * Generates an immutable uniform batch struct from a batch of ell matrices.
  */
 template <typename ValueType>
-inline batch::matrix::batch_ell::uniform_batch<const ValueType>
-get_batch_struct(const batch::matrix::Ell<ValueType, int32>* const op)
+inline batch::matrix::ell::uniform_batch<const ValueType> get_batch_struct(
+    const batch::matrix::Ell<ValueType, int32>* const op)
 {
     return {op->get_const_values(),
             op->get_const_col_idxs(),
             op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_num_stored_elements_per_row())};
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_num_stored_elements_per_row())};
 }
 
 
@@ -112,16 +112,16 @@ get_batch_struct(const batch::matrix::Ell<ValueType, int32>* const op)
  * Generates a uniform batch struct from a batch of ell matrices.
  */
 template <typename ValueType>
-inline batch::matrix::batch_ell::uniform_batch<ValueType> get_batch_struct(
+inline batch::matrix::ell::uniform_batch<ValueType> get_batch_struct(
     batch::matrix::Ell<ValueType, int32>* const op)
 {
     return {op->get_values(),
             op->get_col_idxs(),
             op->get_num_batch_items(),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[0]),
-            static_cast<int>(op->get_common_size()[1]),
-            static_cast<int>(op->get_num_stored_elements_per_row())};
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[0]),
+            static_cast<int32>(op->get_common_size()[1]),
+            static_cast<int32>(op->get_num_stored_elements_per_row())};
 }
 
 
diff --git a/reference/test/matrix/batch_ell_kernels.cpp b/reference/test/matrix/batch_ell_kernels.cpp
index 76b681c69f7..8a5806a9513 100644
--- a/reference/test/matrix/batch_ell_kernels.cpp
+++ b/reference/test/matrix/batch_ell_kernels.cpp
@@ -58,15 +58,13 @@ class Ell : public ::testing::Test {
 protected:
     using value_type = T;
     using size_type = gko::size_type;
-    using Mtx = gko::batch::matrix::Ell<value_type>;
-    using MVec = gko::batch::MultiVector<value_type>;
+    using BMtx = gko::batch::matrix::Ell<value_type>;
+    using BMVec = gko::batch::MultiVector<value_type>;
     using EllMtx = gko::matrix::Ell<value_type>;
     using DenseMtx = gko::matrix::Dense<value_type>;
-    using ComplexMtx = gko::to_complex<Mtx>;
-    using RealMtx = gko::remove_complex<Mtx>;
     Ell()
         : exec(gko::ReferenceExecutor::create()),
-          mtx_0(gko::batch::initialize<Mtx>(
+          mtx_0(gko::batch::initialize<BMtx>(
               {{I<T>({1.0, -1.0, 1.5}), I<T>({-2.0, 2.0, 3.0})},
                {{1.0, -2.0, -0.5}, {1.0, -2.5, 4.0}}},
               exec)),
@@ -74,7 +72,7 @@ class Ell : public ::testing::Test {
               {I<T>({1.0, -1.0, 1.5}), I<T>({-2.0, 2.0, 3.0})}, exec)),
           mtx_01(gko::initialize<EllMtx>(
               {I<T>({1.0, -2.0, -0.5}), I<T>({1.0, -2.5, 4.0})}, exec)),
-          b_0(gko::batch::initialize<MVec>(
+          b_0(gko::batch::initialize<BMVec>(
               {{I<T>({1.0, 0.0, 1.0}), I<T>({2.0, 0.0, 1.0}),
                 I<T>({1.0, 0.0, 2.0})},
                {I<T>({-1.0, 1.0, 1.0}), I<T>({1.0, -1.0, 1.0}),
@@ -88,7 +86,7 @@ class Ell : public ::testing::Test {
               {I<T>({-1.0, 1.0, 1.0}), I<T>({1.0, -1.0, 1.0}),
                I<T>({1.0, 0.0, 2.0})},
               exec)),
-          x_0(gko::batch::initialize<MVec>(
+          x_0(gko::batch::initialize<BMVec>(
               {{I<T>({2.0, 0.0, 1.0}), I<T>({2.0, 0.0, 2.0})},
                {I<T>({-2.0, 1.0, 1.0}), I<T>({1.0, -1.0, -1.0})}},
               exec)),
@@ -99,13 +97,13 @@ class Ell : public ::testing::Test {
     {}
 
     std::shared_ptr<const gko::ReferenceExecutor> exec;
-    std::unique_ptr<Mtx> mtx_0;
+    std::unique_ptr<BMtx> mtx_0;
     std::unique_ptr<EllMtx> mtx_00;
     std::unique_ptr<EllMtx> mtx_01;
-    std::unique_ptr<MVec> b_0;
+    std::unique_ptr<BMVec> b_0;
     std::unique_ptr<DenseMtx> b_00;
     std::unique_ptr<DenseMtx> b_01;
-    std::unique_ptr<MVec> x_0;
+    std::unique_ptr<BMVec> x_0;
     std::unique_ptr<DenseMtx> x_00;
     std::unique_ptr<DenseMtx> x_01;
 
@@ -121,38 +119,10 @@ TYPED_TEST(Ell, AppliesToBatchMultiVector)
     using T = typename TestFixture::value_type;
 
     this->mtx_0->apply(this->b_0.get(), this->x_0.get());
+
     this->mtx_00->apply(this->b_00.get(), this->x_00.get());
     this->mtx_01->apply(this->b_01.get(), this->x_01.get());
-
-    auto res = gko::batch::unbatch<gko::batch::MultiVector<T>>(this->x_0.get());
-
-    GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.);
-    GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.);
-}
-
-
-TYPED_TEST(Ell, AppliesLinearCombinationWithSameAlphaToBatchMultiVector)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using MVec = typename TestFixture::MVec;
-    using DenseMtx = typename TestFixture::DenseMtx;
-    using T = typename TestFixture::value_type;
-    auto alpha = gko::batch::initialize<MVec>(2, {1.5}, this->exec);
-    auto beta = gko::batch::initialize<MVec>(2, {-4.0}, this->exec);
-    auto alpha0 = gko::initialize<DenseMtx>({1.5}, this->exec);
-    auto alpha1 = gko::initialize<DenseMtx>({1.5}, this->exec);
-    auto beta0 = gko::initialize<DenseMtx>({-4.0}, this->exec);
-    auto beta1 = gko::initialize<DenseMtx>({-4.0}, this->exec);
-
-    this->mtx_0->apply(alpha.get(), this->b_0.get(), beta.get(),
-                       this->x_0.get());
-    this->mtx_00->apply(alpha0.get(), this->b_00.get(), beta0.get(),
-                        this->x_00.get());
-    this->mtx_01->apply(alpha1.get(), this->b_01.get(), beta1.get(),
-                        this->x_01.get());
-
     auto res = gko::batch::unbatch<gko::batch::MultiVector<T>>(this->x_0.get());
-
     GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.);
     GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.);
 }
@@ -160,12 +130,12 @@ TYPED_TEST(Ell, AppliesLinearCombinationWithSameAlphaToBatchMultiVector)
 
 TYPED_TEST(Ell, AppliesLinearCombinationToBatchMultiVector)
 {
-    using Mtx = typename TestFixture::Mtx;
-    using MVec = typename TestFixture::MVec;
+    using BMtx = typename TestFixture::BMtx;
+    using BMVec = typename TestFixture::BMVec;
     using DenseMtx = typename TestFixture::DenseMtx;
     using T = typename TestFixture::value_type;
-    auto alpha = gko::batch::initialize<MVec>({{1.5}, {-1.0}}, this->exec);
-    auto beta = gko::batch::initialize<MVec>({{2.5}, {-4.0}}, this->exec);
+    auto alpha = gko::batch::initialize<BMVec>({{1.5}, {-1.0}}, this->exec);
+    auto beta = gko::batch::initialize<BMVec>({{2.5}, {-4.0}}, this->exec);
     auto alpha0 = gko::initialize<DenseMtx>({1.5}, this->exec);
     auto alpha1 = gko::initialize<DenseMtx>({-1.0}, this->exec);
     auto beta0 = gko::initialize<DenseMtx>({2.5}, this->exec);
@@ -173,13 +143,12 @@ TYPED_TEST(Ell, AppliesLinearCombinationToBatchMultiVector)
 
     this->mtx_0->apply(alpha.get(), this->b_0.get(), beta.get(),
                        this->x_0.get());
+
     this->mtx_00->apply(alpha0.get(), this->b_00.get(), beta0.get(),
                         this->x_00.get());
     this->mtx_01->apply(alpha1.get(), this->b_01.get(), beta1.get(),
                         this->x_01.get());
-
     auto res = gko::batch::unbatch<gko::batch::MultiVector<T>>(this->x_0.get());
-
     GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.);
     GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.);
 }
@@ -187,8 +156,8 @@ TYPED_TEST(Ell, AppliesLinearCombinationToBatchMultiVector)
 
 TYPED_TEST(Ell, ApplyFailsOnWrongNumberOfResultCols)
 {
-    using MVec = typename TestFixture::MVec;
-    auto res = MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2}});
+    using BMVec = typename TestFixture::BMVec;
+    auto res = BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2}});
 
     ASSERT_THROW(this->mtx_0->apply(this->b_0.get(), res.get()),
                  gko::DimensionMismatch);
@@ -197,8 +166,8 @@ TYPED_TEST(Ell, ApplyFailsOnWrongNumberOfResultCols)
 
 TYPED_TEST(Ell, ApplyFailsOnWrongNumberOfResultRows)
 {
-    using MVec = typename TestFixture::MVec;
-    auto res = MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3}});
+    using BMVec = typename TestFixture::BMVec;
+    auto res = BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3}});
 
     ASSERT_THROW(this->mtx_0->apply(this->b_0.get(), res.get()),
                  gko::DimensionMismatch);
@@ -207,9 +176,9 @@ TYPED_TEST(Ell, ApplyFailsOnWrongNumberOfResultRows)
 
 TYPED_TEST(Ell, ApplyFailsOnWrongInnerDimension)
 {
-    using MVec = typename TestFixture::MVec;
+    using BMVec = typename TestFixture::BMVec;
     auto res =
-        MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}});
+        BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}});
 
     ASSERT_THROW(this->mtx_0->apply(res.get(), this->x_0.get()),
                  gko::DimensionMismatch);
@@ -218,13 +187,13 @@ TYPED_TEST(Ell, ApplyFailsOnWrongInnerDimension)
 
 TYPED_TEST(Ell, AdvancedApplyFailsOnWrongInnerDimension)
 {
-    using MVec = typename TestFixture::MVec;
+    using BMVec = typename TestFixture::BMVec;
     auto res =
-        MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}});
+        BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}});
     auto alpha =
-        MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}});
+        BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}});
     auto beta =
-        MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}});
+        BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}});
 
     ASSERT_THROW(
         this->mtx_0->apply(alpha.get(), res.get(), beta.get(), this->x_0.get()),
@@ -234,13 +203,13 @@ TYPED_TEST(Ell, AdvancedApplyFailsOnWrongInnerDimension)
 
 TYPED_TEST(Ell, AdvancedApplyFailsOnWrongAlphaDimension)
 {
-    using MVec = typename TestFixture::MVec;
+    using BMVec = typename TestFixture::BMVec;
     auto res =
-        MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3, 3}});
+        BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3, 3}});
     auto alpha =
-        MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 1}});
+        BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 1}});
     auto beta =
-        MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}});
+        BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}});
 
     ASSERT_THROW(
         this->mtx_0->apply(alpha.get(), res.get(), beta.get(), this->x_0.get()),
diff --git a/test/matrix/batch_ell_kernels.cpp b/test/matrix/batch_ell_kernels.cpp
index bc1e0c7fb42..083af0a0938 100644
--- a/test/matrix/batch_ell_kernels.cpp
+++ b/test/matrix/batch_ell_kernels.cpp
@@ -55,18 +55,18 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 class Ell : public CommonTestFixture {
 protected:
-    using Mtx = gko::batch::matrix::Ell<value_type>;
-    using MVec = gko::batch::MultiVector<value_type>;
+    using BMtx = gko::batch::matrix::Ell<value_type>;
+    using BMVec = gko::batch::MultiVector<value_type>;
 
     Ell() : rand_engine(15) {}
 
-    template <typename MtxType>
-    std::unique_ptr<MtxType> gen_mtx(const gko::size_type num_batch_items,
-                                     gko::size_type num_rows,
-                                     gko::size_type num_cols,
-                                     int num_elems_per_row)
+    template <typename BMtxType>
+    std::unique_ptr<BMtxType> gen_mtx(const gko::size_type num_batch_items,
+                                      gko::size_type num_rows,
+                                      gko::size_type num_cols,
+                                      int num_elems_per_row)
     {
-        return gko::test::generate_random_batch_matrix<MtxType>(
+        return gko::test::generate_random_batch_matrix<BMtxType>(
             num_batch_items, num_rows, num_cols,
             std::uniform_int_distribution<>(num_elems_per_row,
                                             num_elems_per_row),
@@ -74,11 +74,11 @@ class Ell : public CommonTestFixture {
             num_elems_per_row);
     }
 
-    std::unique_ptr<MVec> gen_mvec(const gko::size_type num_batch_items,
-                                   gko::size_type num_rows,
-                                   gko::size_type num_cols)
+    std::unique_ptr<BMVec> gen_mvec(const gko::size_type num_batch_items,
+                                    gko::size_type num_rows,
+                                    gko::size_type num_cols)
     {
-        return gko::test::generate_random_batch_matrix<MVec>(
+        return gko::test::generate_random_batch_matrix<BMVec>(
             num_batch_items, num_rows, num_cols,
             std::uniform_int_distribution<>(num_cols, num_cols),
             std::normal_distribution<>(-1.0, 1.0), rand_engine, ref);
@@ -89,15 +89,16 @@ class Ell : public CommonTestFixture {
     {
         const int num_rows = 252;
         const int num_cols = 32;
-        x = gen_mtx<Mtx>(batch_size, num_rows, num_cols, num_elems_per_row);
+        GKO_ASSERT(num_elems_per_row <= num_cols);
+        mat = gen_mtx<BMtx>(batch_size, num_rows, num_cols, num_elems_per_row);
         y = gen_mvec(batch_size, num_cols, num_vecs);
         alpha = gen_mvec(batch_size, 1, 1);
         beta = gen_mvec(batch_size, 1, 1);
-        dx = gko::clone(exec, x);
+        dmat = gko::clone(exec, mat);
         dy = gko::clone(exec, y);
         dalpha = gko::clone(exec, alpha);
         dbeta = gko::clone(exec, beta);
-        expected = MVec::create(
+        expected = BMVec::create(
             ref,
             gko::batch_dim<2>(batch_size, gko::dim<2>{num_rows, num_vecs}));
         expected->fill(gko::one<value_type>());
@@ -107,16 +108,16 @@ class Ell : public CommonTestFixture {
     std::ranlux48 rand_engine;
 
     const size_t batch_size = 11;
-    std::unique_ptr<Mtx> x;
-    std::unique_ptr<MVec> y;
-    std::unique_ptr<MVec> alpha;
-    std::unique_ptr<MVec> beta;
-    std::unique_ptr<MVec> expected;
-    std::unique_ptr<MVec> dresult;
-    std::unique_ptr<Mtx> dx;
-    std::unique_ptr<MVec> dy;
-    std::unique_ptr<MVec> dalpha;
-    std::unique_ptr<MVec> dbeta;
+    std::unique_ptr<BMtx> mat;
+    std::unique_ptr<BMVec> y;
+    std::unique_ptr<BMVec> alpha;
+    std::unique_ptr<BMVec> beta;
+    std::unique_ptr<BMVec> expected;
+    std::unique_ptr<BMVec> dresult;
+    std::unique_ptr<BMtx> dmat;
+    std::unique_ptr<BMVec> dy;
+    std::unique_ptr<BMVec> dalpha;
+    std::unique_ptr<BMVec> dbeta;
 };
 
 
@@ -124,8 +125,8 @@ TEST_F(Ell, SingleVectorApplyIsEquivalentToRef)
 {
     set_up_apply_data(1);
 
-    x->apply(y.get(), expected.get());
-    dx->apply(dy.get(), dresult.get());
+    mat->apply(y.get(), expected.get());
+    dmat->apply(dy.get(), dresult.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, r<value_type>::value);
 }
@@ -135,8 +136,8 @@ TEST_F(Ell, SingleVectorAdvancedApplyIsEquivalentToRef)
 {
     set_up_apply_data(1);
 
-    x->apply(alpha.get(), y.get(), beta.get(), expected.get());
-    dx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get());
+    mat->apply(alpha.get(), y.get(), beta.get(), expected.get());
+    dmat->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get());
 
     GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, r<value_type>::value);
 }
diff --git a/test/test_install/test_install.cpp b/test/test_install/test_install.cpp
index 7e53ea8f165..c00bb594ecd 100644
--- a/test/test_install/test_install.cpp
+++ b/test/test_install/test_install.cpp
@@ -219,13 +219,20 @@ int main()
         auto test = batch_multi_vector_type::create(exec);
     }
 
-    // core/base/batch_dense.hpp
+    // core/matrix/batch_dense.hpp
     {
         using type1 = float;
         using batch_dense_type = gko::batch::matrix::Dense<type1>;
         auto test = batch_dense_type::create(exec);
     }
 
+    // core/matrix/batch_ell.hpp
+    {
+        using type1 = float;
+        using batch_ell_type = gko::batch::matrix::Ell<type1>;
+        auto test = batch_ell_type::create(exec);
+    }
+
     // core/base/combination.hpp
     {
         using type1 = int;

From b4d877fbe4e6e49371f6d682d87b2c5127f9fffa Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Wed, 11 Oct 2023 14:07:13 +0200
Subject: [PATCH 393/583] Kernel updates and batch_random_matrix gen

---
 .../cuda_hip/matrix/batch_ell_kernels.hpp.inc |  4 +-
 core/matrix/batch_ell.cpp                     |  7 --
 core/test/utils/batch_helpers.hpp             | 17 +++-
 core/test/utils/matrix_generator.hpp          | 90 +++++++++++++++++++
 cuda/matrix/batch_ell_kernels.cu              |  1 +
 dpcpp/matrix/batch_ell_kernels.hpp.inc        | 57 ++++++------
 hip/matrix/batch_ell_kernels.hip.cpp          |  1 +
 include/ginkgo/core/matrix/batch_ell.hpp      | 19 ++--
 test/matrix/batch_ell_kernels.cpp             |  2 +-
 9 files changed, 149 insertions(+), 49 deletions(-)

diff --git a/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc b/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc
index 5c00358c5a0..19c29f14aa8 100644
--- a/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc
+++ b/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc
@@ -46,7 +46,7 @@ __device__ __forceinline__ void simple_apply(
         for (size_type idx = 0; idx < num_stored_elements_per_row; idx++) {
             const auto ind = tidx + idx * stride;
             const auto col_idx = col[ind];
-            if (col_idx < idx) {
+            if (col_idx == invalid_index<int>()) {
                 break;
             } else {
                 temp += val[ind] * b[col_idx];
@@ -102,7 +102,7 @@ __device__ __forceinline__ void advanced_apply(
         for (size_type idx = 0; idx < num_stored_elements_per_row; idx++) {
             const auto ind = tidx + idx * stride;
             const auto col_idx = col[ind];
-            if (col_idx < idx) {
+            if (col_idx == invalid_index<int>()) {
                 break;
             } else {
                 temp += alpha * val[ind] * b[col_idx];
diff --git a/core/matrix/batch_ell.cpp b/core/matrix/batch_ell.cpp
index f421fdf2b49..c9dbe6d51c9 100644
--- a/core/matrix/batch_ell.cpp
+++ b/core/matrix/batch_ell.cpp
@@ -128,13 +128,6 @@ Ell<ValueType, IndexType>::create_const(
 }
 
 
-inline const batch_dim<2> get_col_sizes(const batch_dim<2>& sizes)
-{
-    return batch_dim<2>(sizes.get_num_batch_items(),
-                        dim<2>(1, sizes.get_common_size()[1]));
-}
-
-
 template <typename ValueType, typename IndexType>
 Ell<ValueType, IndexType>::Ell(std::shared_ptr<const Executor> exec,
                                const batch_dim<2>& size,
diff --git a/core/test/utils/batch_helpers.hpp b/core/test/utils/batch_helpers.hpp
index b040691999e..0b6197b5062 100644
--- a/core/test/utils/batch_helpers.hpp
+++ b/core/test/utils/batch_helpers.hpp
@@ -82,11 +82,22 @@ std::unique_ptr<MatrixType> generate_random_batch_matrix(
     auto result = MatrixType::create(
         exec, batch_dim<2>(num_batch_items, dim<2>(num_rows, num_cols)),
         std::forward<MatrixArgs>(args)...);
+    auto sp_mat = generate_random_device_matrix_data<value_type, index_type>(
+        num_rows, num_cols, nonzero_dist, value_dist, engine,
+        exec->get_master());
+    auto row_idxs = gko::array<index_type>::const_view(
+                        exec->get_master(), sp_mat.get_num_elems(),
+                        sp_mat.get_const_row_idxs())
+                        .copy_to_array();
+    auto col_idxs = gko::array<index_type>::const_view(
+                        exec->get_master(), sp_mat.get_num_elems(),
+                        sp_mat.get_const_col_idxs())
+                        .copy_to_array();
 
     for (size_type b = 0; b < num_batch_items; b++) {
-        auto rand_mat =
-            generate_random_matrix<typename MatrixType::unbatch_type>(
-                num_rows, num_cols, nonzero_dist, value_dist, engine, exec);
+        auto rand_mat = fill_random_matrix_with_sparsity_pattern<
+            typename MatrixType::unbatch_type, index_type>(
+            num_rows, num_cols, row_idxs, col_idxs, value_dist, engine, exec);
         result->create_view_for_item(b)->copy_from(rand_mat.get());
     }
 
diff --git a/core/test/utils/matrix_generator.hpp b/core/test/utils/matrix_generator.hpp
index 6928c5424a5..8a82ae744e7 100644
--- a/core/test/utils/matrix_generator.hpp
+++ b/core/test/utils/matrix_generator.hpp
@@ -42,6 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <vector>
 
 
+#include <ginkgo/core/base/array.hpp>
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/base/utils.hpp>
 #include <ginkgo/core/matrix/dense.hpp>
@@ -54,6 +55,49 @@ namespace gko {
 namespace test {
 
 
+/**
+ * Fills matrix data for a random matrix given a sparsity pattern
+ *
+ * @tparam ValueType  the type for matrix values
+ * @tparam IndexType  the type for row and column indices
+ * @tparam ValueDistribution  type of value distribution
+ * @tparam Engine  type of random engine
+ *
+ * @param num_rows  number of rows
+ * @param num_cols  number of columns
+ * @param row_idxs  the row indices of the matrix
+ * @param col_idxs  the column indices of the matrix
+ * @param value_dist  distribution of matrix values
+ * @param engine  a random engine
+ *
+ * @return the generated matrix_data with entries according to the given
+ *         dimensions and nonzero count and value distributions.
+ */
+template <typename ValueType, typename IndexType, typename ValueDistribution,
+          typename Engine>
+matrix_data<ValueType, IndexType> fill_random_matrix_data(
+    size_type num_rows, size_type num_cols,
+    const gko::array<IndexType>& row_indices,
+    const gko::array<IndexType>& col_indices, ValueDistribution&& value_dist,
+    Engine&& engine)
+{
+    matrix_data<ValueType, IndexType> data{gko::dim<2>{num_rows, num_cols}, {}};
+    auto host_exec = row_indices.get_executor()->get_master();
+    auto host_row_indices = make_temporary_clone(host_exec, &row_indices);
+    auto host_col_indices = make_temporary_clone(host_exec, &col_indices);
+
+    for (int nnz = 0; nnz < row_indices.get_num_elems(); ++nnz) {
+        data.nonzeros.emplace_back(
+            host_row_indices->get_const_data()[nnz],
+            host_col_indices->get_const_data()[nnz],
+            detail::get_rand_value<ValueType>(value_dist, engine));
+    }
+
+    data.ensure_row_major_order();
+    return data;
+}
+
+
 /**
  * Generates matrix data for a random matrix.
  *
@@ -156,6 +200,48 @@ generate_random_device_matrix_data(gko::size_type num_rows,
 }
 
 
+/**
+ * Fills a random matrix with given sparsity pattern.
+ *
+ * @tparam MatrixType  type of matrix to generate (must implement
+ *                     the interface `ReadableFromMatrixData<>` and provide
+ *                     matching `value_type` and `index_type` type aliases)
+ *
+ * @param num_rows  number of rows
+ * @param num_cols  number of columns
+ * @param value_dist  distribution of matrix values
+ * @param row_idxs  the row indices of the matrix
+ * @param col_idxs  the column indices of the matrix
+ * @param exec  executor where the matrix should be allocated
+ * @param args  additional arguments for the matrix constructor
+ *
+ * The other (template) parameters match generate_random_matrix_data.
+ *
+ * @return the unique pointer of MatrixType
+ */
+template <typename MatrixType = matrix::Dense<>,
+          typename IndexType = typename MatrixType::index_type,
+          typename ValueDistribution, typename Engine, typename... MatrixArgs>
+std::unique_ptr<MatrixType> fill_random_matrix_with_sparsity_pattern(
+    size_type num_rows, size_type num_cols,
+    const gko::array<IndexType>& row_idxs,
+    const gko::array<IndexType>& col_idxs, ValueDistribution&& value_dist,
+    Engine&& engine, std::shared_ptr<const Executor> exec, MatrixArgs&&... args)
+{
+    using value_type = typename MatrixType::value_type;
+    using index_type = IndexType;
+
+    GKO_ASSERT(row_idxs.get_num_elems() == col_idxs.get_num_elems());
+    GKO_ASSERT(row_idxs.get_num_elems() < (num_rows * num_cols));
+    auto result = MatrixType::create(exec, std::forward<MatrixArgs>(args)...);
+    result->read(fill_random_matrix_data<value_type, index_type>(
+        num_rows, num_cols, row_idxs, col_idxs,
+        std::forward<ValueDistribution>(value_dist),
+        std::forward<Engine>(engine)));
+    return result;
+}
+
+
 /**
  * Generates a random matrix.
  *
@@ -163,6 +249,10 @@ generate_random_device_matrix_data(gko::size_type num_rows,
  *                     the interface `ReadableFromMatrixData<>` and provide
  *                     matching `value_type` and `index_type` type aliases)
  *
+ * @param num_rows  number of rows
+ * @param num_cols  number of columns
+ * @param nonzero_dist  distribution of nonzeros per row
+ * @param value_dist  distribution of matrix values
  * @param exec  executor where the matrix should be allocated
  * @param args  additional arguments for the matrix constructor
  *
diff --git a/cuda/matrix/batch_ell_kernels.cu b/cuda/matrix/batch_ell_kernels.cu
index 6dd268a2d8e..5cadd7755a2 100644
--- a/cuda/matrix/batch_ell_kernels.cu
+++ b/cuda/matrix/batch_ell_kernels.cu
@@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/matrix/batch_ell.hpp>
 
 
diff --git a/dpcpp/matrix/batch_ell_kernels.hpp.inc b/dpcpp/matrix/batch_ell_kernels.hpp.inc
index 7500ae9e060..e6501bafaba 100644
--- a/dpcpp/matrix/batch_ell_kernels.hpp.inc
+++ b/dpcpp/matrix/batch_ell_kernels.hpp.inc
@@ -42,38 +42,37 @@ __dpct_inline__ void simple_apply_kernel(
         auto temp = zero<ValueType>();
         for (size_type idx = 0; idx < mat.num_stored_elems_per_row; idx++) {
             const auto col_idx = mat.col_idxs[tidx + idx * mat.stride];
-            if (col_idx < idx)
+            if (col_idx == invalid_index<int>()) {
                 break;
-            else
-                temp += mat.values[tidx + idx * mat.stride] *
-                        b.values[col_idx * b.stride];
+                else temp += mat.values[tidx + idx * mat.stride] *
+                             b.values[col_idx * b.stride];
+            }
+            x.values[tidx * x.stride] = temp;
         }
-        x.values[tidx * x.stride] = temp;
     }
-}
 
 
-template <typename ValueType>
-__dpct_inline__ void advanced_apply_kernel(
-    const gko::batch::multi_vector::batch_item<const ValueType>& alpha,
-    const gko::batch::matrix::ell::batch_item<const ValueType>& mat,
-    const gko::batch::multi_vector::batch_item<const ValueType>& b,
-    const gko::batch::multi_vector::batch_item<const ValueType>& beta,
-    const gko::batch::multi_vector::batch_item<ValueType>& x,
-    sycl::nd_item<3>& item_ct1)
-{
-    for (int tidx = item_ct1.get_local_linear_id(); tidx < mat.num_rows;
-         tidx += item_ct1.get_local_range().size()) {
-        auto temp = zero<ValueType>();
-        for (size_type idx = 0; idx < mat.num_stored_elems_per_row; idx++) {
-            const auto col_idx = mat.col_idxs[tidx + idx * mat.stride];
-            if (col_idx < idx)
-                break;
-            else
-                temp += alpha.values[0] * mat.values[tidx + idx * mat.stride] *
-                        b.values[col_idx * b.stride];
+    template <typename ValueType>
+    __dpct_inline__ void advanced_apply_kernel(
+        const gko::batch::multi_vector::batch_item<const ValueType>& alpha,
+        const gko::batch::matrix::ell::batch_item<const ValueType>& mat,
+        const gko::batch::multi_vector::batch_item<const ValueType>& b,
+        const gko::batch::multi_vector::batch_item<const ValueType>& beta,
+        const gko::batch::multi_vector::batch_item<ValueType>& x,
+        sycl::nd_item<3>& item_ct1)
+    {
+        for (int tidx = item_ct1.get_local_linear_id(); tidx < mat.num_rows;
+             tidx += item_ct1.get_local_range().size()) {
+            auto temp = zero<ValueType>();
+            for (size_type idx = 0; idx < mat.num_stored_elems_per_row; idx++) {
+                const auto col_idx = mat.col_idxs[tidx + idx * mat.stride];
+                if (col_idx == invalid_index<int>()) {
+                    break;
+                    else temp += alpha.values[0] *
+                                 mat.values[tidx + idx * mat.stride] *
+                                 b.values[col_idx * b.stride];
+                }
+                x.values[tidx * x.stride] =
+                    temp + beta.values[0] * x.values[tidx * x.stride];
+            }
         }
-        x.values[tidx * x.stride] =
-            temp + beta.values[0] * x.values[tidx * x.stride];
-    }
-}
diff --git a/hip/matrix/batch_ell_kernels.hip.cpp b/hip/matrix/batch_ell_kernels.hip.cpp
index 5c6d5179a21..96e7cdb298e 100644
--- a/hip/matrix/batch_ell_kernels.hip.cpp
+++ b/hip/matrix/batch_ell_kernels.hip.cpp
@@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/matrix/batch_ell.hpp>
 
 
diff --git a/include/ginkgo/core/matrix/batch_ell.hpp b/include/ginkgo/core/matrix/batch_ell.hpp
index 6f3db1bb96b..be49e2cff41 100644
--- a/include/ginkgo/core/matrix/batch_ell.hpp
+++ b/include/ginkgo/core/matrix/batch_ell.hpp
@@ -58,9 +58,14 @@ namespace matrix {
  * Ell is a sparse matrix format that stores the same number of nonzeros in each
  * row, enabling coalesced accesses. It is suitable for sparsity patterns that
  * have a similar number of nonzeros in every row. The values are stored in a
- * column-major fashion similar to the monolithic gko::matrix::Ell class. It is
- * also assumed that the sparsity pattern of all the items in the batch is the
- * same and therefore only a single copy of the sparsity pattern is stored.
+ * column-major fashion similar to the monolithic gko::matrix::Ell class.
+ *
+ * Similar to the monolithic gko::matrix::Ell class, invalid_index<IndexType> is
+ * used as the column index for padded zero entries.
+ *
+ * @note It is also assumed that the sparsity pattern of all the items in the
+ * batch is the same and therefore only a single copy of the sparsity pattern is
+ * stored.
  *
  * @tparam ValueType  value precision of matrix elements
  * @tparam IndexType  index precision of matrix elements
@@ -253,13 +258,13 @@ class Ell final
 
     /**
      * Creates a constant (immutable) batch ell matrix from a constant
-     * array.
+     * array. The column indices array needs to be the same for all batch items.
      *
      * @param exec  the executor to create the matrix on
      * @param size  the dimensions of the matrix
      * @param num_elems_per_row  the number of elements to be stored in each row
      * @param values  the value array of the matrix
-     * @param col_idxs the col_idxs array of the matrix
+     * @param col_idxs the col_idxs array of a single batch item of the matrix.
      *
      * @return A smart pointer to the constant matrix wrapping the input
      * array (if it resides on the same executor as the matrix) or a copy of the
@@ -325,7 +330,7 @@ class Ell final
 
     /**
      * Creates a Ell matrix from an already allocated (and initialized)
-     * array.
+     * array. The column indices array needs to be the same for all batch items.
      *
      * @tparam ValuesArray  type of array of values
      *
@@ -333,7 +338,7 @@ class Ell final
      * @param size  size of the matrix
      * @param num_elems_per_row  the number of elements to be stored in each row
      * @param values  array of matrix values
-     * @param col_idxs the col_idxs array of the matrix
+     * @param col_idxs the col_idxs array of a single batch item of the matrix.
      *
      * @note If `values` is not an rvalue, not an array of ValueType, or is on
      *       the wrong executor, an internal copy will be created, and the
diff --git a/test/matrix/batch_ell_kernels.cpp b/test/matrix/batch_ell_kernels.cpp
index 083af0a0938..572f47ba47d 100644
--- a/test/matrix/batch_ell_kernels.cpp
+++ b/test/matrix/batch_ell_kernels.cpp
@@ -55,7 +55,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 class Ell : public CommonTestFixture {
 protected:
-    using BMtx = gko::batch::matrix::Ell<value_type>;
+    using BMtx = gko::batch::matrix::Ell<value_type, gko::int32>;
     using BMVec = gko::batch::MultiVector<value_type>;
 
     Ell() : rand_engine(15) {}

From ea785065fbb5ef33a3f6061a4e2db6b904214a8a Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Wed, 11 Oct 2023 15:41:55 +0200
Subject: [PATCH 394/583] Review updates

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
Co-authored-by: Yu-Hsiang Tsai <yhmtsai@gmail.com>
---
 .../cuda_hip/matrix/batch_ell_kernels.hpp.inc |  22 +--
 core/base/batch_multi_vector.cpp              |  21 ---
 core/base/batch_utilities.hpp                 |  47 ++---
 core/matrix/batch_struct.hpp                  |  40 ++---
 core/test/matrix/batch_ell.cpp                | 160 ++++++++----------
 core/test/utils/matrix_generator.hpp          |   2 +-
 cuda/matrix/batch_struct.hpp                  |  28 +--
 dpcpp/matrix/batch_ell_kernels.dp.cpp         |   2 +
 dpcpp/matrix/batch_ell_kernels.hpp.inc        |  61 +++----
 dpcpp/matrix/batch_struct.hpp                 |  28 +--
 hip/matrix/batch_struct.hip.hpp               |  28 +--
 .../ginkgo/core/base/batch_multi_vector.hpp   |  18 +-
 include/ginkgo/core/matrix/batch_dense.hpp    |   2 -
 include/ginkgo/core/matrix/batch_ell.hpp      |   7 +-
 reference/matrix/batch_ell_kernels.hpp.inc    |  24 +--
 reference/matrix/batch_struct.hpp             |  28 +--
 reference/test/matrix/batch_ell_kernels.cpp   |   8 +-
 17 files changed, 235 insertions(+), 291 deletions(-)

diff --git a/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc b/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc
index 19c29f14aa8..de6ca879890 100644
--- a/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc
+++ b/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc
@@ -31,9 +31,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
 
-template <typename ValueType>
+template <typename ValueType, typename IndexType>
 __device__ __forceinline__ void simple_apply(
-    const gko::batch::matrix::ell::batch_item<const ValueType>& mat,
+    const gko::batch::matrix::ell::batch_item<const ValueType, IndexType>& mat,
     const ValueType* const __restrict__ b, ValueType* const __restrict__ x)
 {
     const auto num_rows = mat.num_rows;
@@ -46,7 +46,7 @@ __device__ __forceinline__ void simple_apply(
         for (size_type idx = 0; idx < num_stored_elements_per_row; idx++) {
             const auto ind = tidx + idx * stride;
             const auto col_idx = col[ind];
-            if (col_idx == invalid_index<int>()) {
+            if (col_idx == invalid_index<IndexType>()) {
                 break;
             } else {
                 temp += val[ind] * b[col_idx];
@@ -56,12 +56,13 @@ __device__ __forceinline__ void simple_apply(
     }
 }
 
-template <typename ValueType>
+template <typename ValueType, typename IndexType>
 __global__ __launch_bounds__(
     default_block_size,
     sm_oversubscription) void simple_apply_kernel(const gko::batch::matrix::
                                                       ell::uniform_batch<
-                                                          const ValueType>
+                                                          const ValueType,
+                                                          IndexType>
                                                           mat,
                                                   const gko::batch::
                                                       multi_vector::
@@ -85,10 +86,10 @@ __global__ __launch_bounds__(
 }
 
 
-template <typename ValueType>
+template <typename ValueType, typename IndexType>
 __device__ __forceinline__ void advanced_apply(
     const ValueType alpha,
-    const gko::batch::matrix::ell::batch_item<const ValueType>& mat,
+    const gko::batch::matrix::ell::batch_item<const ValueType, IndexType>& mat,
     const ValueType* const __restrict__ b, const ValueType beta,
     ValueType* const __restrict__ x)
 {
@@ -102,7 +103,7 @@ __device__ __forceinline__ void advanced_apply(
         for (size_type idx = 0; idx < num_stored_elements_per_row; idx++) {
             const auto ind = tidx + idx * stride;
             const auto col_idx = col[ind];
-            if (col_idx == invalid_index<int>()) {
+            if (col_idx == invalid_index<IndexType>()) {
                 break;
             } else {
                 temp += alpha * val[ind] * b[col_idx];
@@ -112,7 +113,7 @@ __device__ __forceinline__ void advanced_apply(
     }
 }
 
-template <typename ValueType>
+template <typename ValueType, typename IndexType>
 __global__ __launch_bounds__(
     default_block_size,
     sm_oversubscription) void advanced_apply_kernel(const gko::batch::
@@ -122,7 +123,8 @@ __global__ __launch_bounds__(
                                                                 alpha,
                                                     const gko::batch::matrix::
                                                         ell::uniform_batch<
-                                                            const ValueType>
+                                                            const ValueType,
+                                                            IndexType>
                                                             mat,
                                                     const gko::batch::
                                                         multi_vector::
diff --git a/core/base/batch_multi_vector.cpp b/core/base/batch_multi_vector.cpp
index 6a14919bf2f..6dcf8dd90b5 100644
--- a/core/base/batch_multi_vector.cpp
+++ b/core/base/batch_multi_vector.cpp
@@ -291,27 +291,6 @@ void MultiVector<ValueType>::move_to(
 }
 
 
-template <typename ValueType>
-void MultiVector<ValueType>::convert_to(matrix::Dense<ValueType>* result) const
-{
-    auto exec = result->get_executor() == nullptr ? this->get_executor()
-                                                  : result->get_executor();
-    auto tmp = gko::batch::matrix::Dense<ValueType>::create_const(
-        exec, this->get_size(),
-        make_const_array_view(this->get_executor(),
-                              this->get_num_stored_elements(),
-                              this->get_const_values()));
-    result->copy_from(tmp);
-}
-
-
-template <typename ValueType>
-void MultiVector<ValueType>::move_to(matrix::Dense<ValueType>* result)
-{
-    this->convert_to(result);
-}
-
-
 #define GKO_DECLARE_BATCH_MULTI_VECTOR(_type) class MultiVector<_type>
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR);
 
diff --git a/core/base/batch_utilities.hpp b/core/base/batch_utilities.hpp
index c37c0cae721..7204c78a552 100644
--- a/core/base/batch_utilities.hpp
+++ b/core/base/batch_utilities.hpp
@@ -165,12 +165,8 @@ std::vector<gko::matrix_data<ValueType, IndexType>> write(
 /**
  * Creates and initializes a batch of single column-vectors.
  *
- * This function first creates a temporary MultiVector, fills it with
- * passed in values, and then converts the vector to the requested type.
- *
- * @tparam Matrix  matrix type to initialize
- *                 (MultiVector has to implement the ConvertibleTo<Matrix>
- *                 interface)
+ * @tparam Matrix  matrix type to initialize (It has to implement the
+ *                 read<Matrix> function)
  * @tparam TArgs  argument types for Matrix::create method
  *                (not including the implied Executor as the first argument)
  *
@@ -180,7 +176,6 @@ std::vector<gko::matrix_data<ValueType, IndexType>> write(
  *                     including the Executor, which is passed as the first
  *                     argument
  *
- * @ingroup MultiVector
  * @ingroup mat_formats
  */
 template <typename Matrix, typename... TArgs>
@@ -220,23 +215,19 @@ std::unique_ptr<Matrix> initialize(
 
 
 /**
- * Creates and initializes a batch of multi-vectors.
- *
- * This function first creates a temporary MultiVector, fills it with
- * passed in values, and then converts the vector to the requested type.
+ * Creates and initializes a batch of matrices.
  *
- * @tparam Matrix  matrix type to initialize
- *                 (Dense has to implement the ConvertibleTo<Matrix> interface)
+ * @tparam Matrix  matrix type to initialize (It has to implement the
+ *                 read<Matrix> function)
  * @tparam TArgs  argument types for Matrix::create method
  *                (not including the implied Executor as the first argument)
  *
- * @param vals  values used to initialize the vector
- * @param exec  Executor associated to the vector
+ * @param vals  values used to initialize the matrix
+ * @param exec  Executor associated with the matrix
  * @param create_args  additional arguments passed to Matrix::create, not
  *                     including the Executor, which is passed as the first
  *                     argument
  *
- * @ingroup MultiVector
  * @ingroup mat_formats
  */
 template <typename Matrix, typename... TArgs>
@@ -290,23 +281,18 @@ std::unique_ptr<Matrix> initialize(
  * Creates and initializes a batch single column-vector by making copies of the
  * single input column vector.
  *
- * This function first creates a temporary batch multi-vector, fills it with
- * passed in values, and then converts the vector to the requested type.
- *
- * @tparam Matrix  matrix type to initialize
- *                 (MultiVector has to implement the ConvertibleTo<Matrix>
- *                  interface)
+ * @tparam Matrix  matrix type to initialize (It has to implement the
+ *                 read<Matrix> function)
  * @tparam TArgs  argument types for Matrix::create method
  *                (not including the implied Executor as the first argument)
  *
  * @param num_vectors  The number of times the input vector is to be duplicated
  * @param vals  values used to initialize each vector in the temp. batch
- * @param exec  Executor associated to the vector
+ * @param exec  Executor associated with the matrix
  * @param create_args  additional arguments passed to Matrix::create, not
  *                     including the Executor, which is passed as the first
  *                     argument
  *
- * @ingroup MultiVector
  * @ingroup mat_formats
  */
 template <typename Matrix, typename... TArgs>
@@ -343,23 +329,18 @@ std::unique_ptr<Matrix> initialize(
 /**
  * Creates and initializes a matrix from copies of a given matrix.
  *
- * This function first creates a temporary batch multi-vector, fills it with
- * passed in values, and then converts the vector to the requested type.
- *
- * @tparam Matrix  matrix type to initialize
- *                 (MultiVector has to implement the ConvertibleTo<Matrix>
- *                  interface)
+ * @tparam Matrix  matrix type to initialize (It has to implement the
+ *                 read<Matrix> function)
  * @tparam TArgs  argument types for Matrix::create method
  *                (not including the implied Executor as the first argument)
  *
  * @param num_batch_items The number of times the input matrix is duplicated
- * @param vals  values used to initialize each vector in the temp. batch
- * @param exec  Executor associated to the vector
+ * @param vals  values used to initialize each matrix in the temp. batch
+ * @param exec  Executor associated to the matrix
  * @param create_args  additional arguments passed to Matrix::create, not
  *                     including the Executor, which is passed as the first
  *                     argument
  *
- * @ingroup LinOp
  * @ingroup mat_formats
  */
 template <typename Matrix, typename... TArgs>
diff --git a/core/matrix/batch_struct.hpp b/core/matrix/batch_struct.hpp
index eeeeebd53d6..f208f5ff078 100644
--- a/core/matrix/batch_struct.hpp
+++ b/core/matrix/batch_struct.hpp
@@ -89,10 +89,10 @@ namespace ell {
 /**
  * Encapsulates one matrix from a batch of ell matrices.
  */
-template <typename ValueType>
+template <typename ValueType, typename IndexType>
 struct batch_item {
     using value_type = ValueType;
-    using index_type = int32;
+    using index_type = IndexType;
 
     ValueType* values;
     const index_type* col_idxs;
@@ -106,11 +106,11 @@ struct batch_item {
 /**
  * A 'simple' structure to store a global uniform batch of ell matrices.
  */
-template <typename ValueType>
+template <typename ValueType, typename IndexType>
 struct uniform_batch {
     using value_type = ValueType;
-    using index_type = int32;
-    using entry_type = batch_item<value_type>;
+    using index_type = IndexType;
+    using entry_type = batch_item<value_type, index_type>;
 
     ValueType* values;
     const index_type* col_idxs;
@@ -164,27 +164,28 @@ GKO_ATTRIBUTES GKO_INLINE dense::batch_item<ValueType> extract_batch_item(
 }
 
 
-template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE ell::batch_item<const ValueType> to_const(
-    const ell::batch_item<ValueType>& b)
+template <typename ValueType, typename IndexType>
+GKO_ATTRIBUTES GKO_INLINE ell::batch_item<const ValueType, IndexType> to_const(
+    const ell::batch_item<ValueType, IndexType>& b)
 {
     return {b.values,   b.col_idxs, b.stride,
             b.num_rows, b.num_cols, b.num_stored_elems_per_row};
 }
 
 
-template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE ell::uniform_batch<const ValueType> to_const(
-    const ell::uniform_batch<ValueType>& ub)
+template <typename ValueType, typename IndexType>
+GKO_ATTRIBUTES GKO_INLINE ell::uniform_batch<const ValueType, IndexType>
+to_const(const ell::uniform_batch<ValueType, IndexType>& ub)
 {
     return {ub.values,   ub.col_idxs, ub.num_batch_items,         ub.stride,
             ub.num_rows, ub.num_cols, ub.num_stored_elems_per_row};
 }
 
 
-template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE ell::batch_item<ValueType> extract_batch_item(
-    const ell::uniform_batch<ValueType>& batch, const size_type batch_idx)
+template <typename ValueType, typename IndexType>
+GKO_ATTRIBUTES GKO_INLINE ell::batch_item<ValueType, IndexType>
+extract_batch_item(const ell::uniform_batch<ValueType, IndexType>& batch,
+                   const size_type batch_idx)
 {
     return {batch.values +
                 batch_idx * batch.num_stored_elems_per_row * batch.num_rows,
@@ -195,11 +196,12 @@ GKO_ATTRIBUTES GKO_INLINE ell::batch_item<ValueType> extract_batch_item(
             batch.num_stored_elems_per_row};
 }
 
-template <typename ValueType>
-GKO_ATTRIBUTES GKO_INLINE ell::batch_item<ValueType> extract_batch_item(
-    ValueType* const batch_values, int* const batch_col_idxs, const int stride,
-    const int num_rows, const int num_cols, int num_elems_per_row,
-    const size_type batch_idx)
+template <typename ValueType, typename IndexType>
+GKO_ATTRIBUTES GKO_INLINE ell::batch_item<ValueType, IndexType>
+extract_batch_item(ValueType* const batch_values,
+                   IndexType* const batch_col_idxs, const int stride,
+                   const int num_rows, const int num_cols,
+                   int num_elems_per_row, const size_type batch_idx)
 {
     return {batch_values + batch_idx * num_elems_per_row * num_rows,
             batch_col_idxs,
diff --git a/core/test/matrix/batch_ell.cpp b/core/test/matrix/batch_ell.cpp
index e4dcab23917..c36a877ac14 100644
--- a/core/test/matrix/batch_ell.cpp
+++ b/core/test/matrix/batch_ell.cpp
@@ -38,7 +38,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/executor.hpp>
-#include <ginkgo/core/base/range.hpp>
 #include <ginkgo/core/matrix/ell.hpp>
 
 
@@ -52,26 +51,26 @@ class Ell : public ::testing::Test {
 protected:
     using value_type = T;
     using index_type = gko::int32;
-    using EllMtx = gko::matrix::Ell<value_type>;
+    using BatchEllMtx = gko::batch::matrix::Ell<value_type, index_type>;
+    using EllMtx = gko::matrix::Ell<value_type, index_type>;
     using size_type = gko::size_type;
     Ell()
         : exec(gko::ReferenceExecutor::create()),
-          mtx(gko::batch::initialize<gko::batch::matrix::Ell<value_type>>(
+          mtx(gko::batch::initialize<BatchEllMtx>(
               {{{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}},
                {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}},
               exec, 3)),
-          sp_mtx(gko::batch::initialize<gko::batch::matrix::Ell<value_type>>(
+          sp_mtx(gko::batch::initialize<BatchEllMtx>(
               {{{-1.0, 0.0, 0.0}, {0.0, 2.5, 3.5}},
                {{1.0, 0.0, 0.0}, {0.0, 2.0, 3.0}}},
               exec, 2)),
-          ell_mtx(gko::initialize<gko::matrix::Ell<value_type>>(
-              {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, exec, gko::dim<2>(2, 3), 3)),
-          sp_ell_mtx(gko::initialize<gko::matrix::Ell<value_type>>(
-              {{1.0, 0.0, 0.0}, {0.0, 2.0, 3.0}}, exec, gko::dim<2>(2, 3), 2))
+          ell_mtx(gko::initialize<EllMtx>({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}},
+                                          exec, gko::dim<2>(2, 3), 3)),
+          sp_ell_mtx(gko::initialize<EllMtx>({{1.0, 0.0, 0.0}, {0.0, 2.0, 3.0}},
+                                             exec, gko::dim<2>(2, 3), 2))
     {}
 
-    static void assert_equal_to_original_sparse_mtx(
-        const gko::batch::matrix::Ell<value_type>* m)
+    static void assert_equal_to_original_sparse_mtx(const BatchEllMtx* m)
     {
         ASSERT_EQ(m->get_num_batch_items(), 2);
         ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 3));
@@ -91,8 +90,7 @@ class Ell : public ::testing::Test {
         ASSERT_EQ(m->get_const_col_idxs()[3], index_type{2});
     }
 
-    static void assert_equal_to_original_mtx(
-        const gko::batch::matrix::Ell<value_type>* m)
+    static void assert_equal_to_original_mtx(const BatchEllMtx* m)
     {
         ASSERT_EQ(m->get_num_batch_items(), 2);
         ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 3));
@@ -112,7 +110,7 @@ class Ell : public ::testing::Test {
         ASSERT_EQ(m->get_const_values()[11], value_type{3.0});
     }
 
-    static void assert_empty(gko::batch::matrix::Ell<value_type>* m)
+    static void assert_empty(BatchEllMtx* m)
     {
         ASSERT_EQ(m->get_num_batch_items(), 0);
         ASSERT_EQ(m->get_num_stored_elements(), 0);
@@ -120,10 +118,10 @@ class Ell : public ::testing::Test {
     }
 
     std::shared_ptr<const gko::Executor> exec;
-    std::unique_ptr<gko::batch::matrix::Ell<value_type>> mtx;
-    std::unique_ptr<gko::batch::matrix::Ell<value_type>> sp_mtx;
-    std::unique_ptr<gko::matrix::Ell<value_type>> ell_mtx;
-    std::unique_ptr<gko::matrix::Ell<value_type>> sp_ell_mtx;
+    std::unique_ptr<BatchEllMtx> mtx;
+    std::unique_ptr<BatchEllMtx> sp_mtx;
+    std::unique_ptr<EllMtx> ell_mtx;
+    std::unique_ptr<EllMtx> sp_ell_mtx;
 };
 
 TYPED_TEST_SUITE(Ell, gko::test::ValueTypes);
@@ -143,16 +141,11 @@ TYPED_TEST(Ell, SparseMtxKnowsItsSizeAndValues)
 
 TYPED_TEST(Ell, CanBeEmpty)
 {
-    auto empty = gko::batch::matrix::Ell<TypeParam>::create(this->exec);
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
 
-    this->assert_empty(empty.get());
-}
-
-
-TYPED_TEST(Ell, ReturnsNullValuesArrayWhenEmpty)
-{
-    auto empty = gko::batch::matrix::Ell<TypeParam>::create(this->exec);
+    auto empty = BatchEllMtx::create(this->exec);
 
+    this->assert_empty(empty.get());
     ASSERT_EQ(empty->get_const_values(), nullptr);
 }
 
@@ -180,7 +173,9 @@ TYPED_TEST(Ell, CanCreateSpEllItemView)
 
 TYPED_TEST(Ell, CanBeCopied)
 {
-    auto mtx_copy = gko::batch::matrix::Ell<TypeParam>::create(this->exec);
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
+
+    auto mtx_copy = BatchEllMtx::create(this->exec);
 
     mtx_copy->copy_from(this->mtx.get());
 
@@ -192,7 +187,9 @@ TYPED_TEST(Ell, CanBeCopied)
 
 TYPED_TEST(Ell, CanBeMoved)
 {
-    auto mtx_copy = gko::batch::matrix::Ell<TypeParam>::create(this->exec);
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
+
+    auto mtx_copy = BatchEllMtx::create(this->exec);
 
     this->mtx->move_to(mtx_copy);
 
@@ -219,10 +216,10 @@ TYPED_TEST(Ell, CanBeCleared)
 
 TYPED_TEST(Ell, CanBeConstructedWithSize)
 {
-    using size_type = gko::size_type;
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
 
-    auto m = gko::batch::matrix::Ell<TypeParam>::create(
-        this->exec, gko::batch_dim<2>(2, gko::dim<2>{5, 3}), 2);
+    auto m = BatchEllMtx::create(this->exec,
+                                 gko::batch_dim<2>(2, gko::dim<2>{5, 3}), 2);
 
     ASSERT_EQ(m->get_num_batch_items(), 2);
     ASSERT_EQ(m->get_common_size(), gko::dim<2>(5, 3));
@@ -235,19 +232,19 @@ TYPED_TEST(Ell, CanBeConstructedFromExistingData)
 {
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    using size_type = gko::size_type;
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
     // clang-format off
     value_type values[] = {
        -1.0,  2.5,
-       0.0,  3.5,
-       1.0,  2.0,
-       0.0,  3.0};
+        0.0,  3.5,
+        1.0,  2.0,
+        0.0,  3.0};
     index_type col_idxs[] = {
-       0,  1,
+       0, 1,
       -1, 2};
     // clang-format on
 
-    auto m = gko::batch::matrix::Ell<TypeParam>::create(
+    auto m = BatchEllMtx::create(
         this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 3)), 2,
         gko::array<value_type>::view(this->exec, 8, values),
         gko::array<index_type>::view(this->exec, 4, col_idxs));
@@ -260,19 +257,19 @@ TYPED_TEST(Ell, CanBeConstructedFromExistingConstData)
 {
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    using size_type = gko::size_type;
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
     // clang-format off
     value_type values[] = {
        -1.0,  2.5,
-       0.0,  3.5,
-       1.0,  2.0,
-       0.0,  3.0};
+        0.0,  3.5,
+        1.0,  2.0,
+        0.0,  3.0};
     index_type col_idxs[] = {
-       0,  1,
+       0, 1,
       -1, 2};
     // clang-format on
 
-    auto m = gko::batch::matrix::Ell<TypeParam>::create_const(
+    auto m = BatchEllMtx::create_const(
         this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 3)), 2,
         gko::array<value_type>::const_view(this->exec, 8, values),
         gko::array<index_type>::const_view(this->exec, 4, col_idxs));
@@ -283,15 +280,14 @@ TYPED_TEST(Ell, CanBeConstructedFromExistingConstData)
 
 TYPED_TEST(Ell, CanBeConstructedFromEllMatrices)
 {
-    using value_type = typename TestFixture::value_type;
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
     using EllMtx = typename TestFixture::EllMtx;
-    using size_type = gko::size_type;
     auto mat1 = gko::initialize<EllMtx>({{-1.0, 0.0, 0.0}, {0.0, 2.5, 3.5}},
                                         this->exec);
     auto mat2 =
         gko::initialize<EllMtx>({{1.0, 0.0, 0.0}, {0.0, 2.0, 3.0}}, this->exec);
 
-    auto m = gko::batch::create_from_item<gko::batch::matrix::Ell<value_type>>(
+    auto m = gko::batch::create_from_item<BatchEllMtx>(
         this->exec, std::vector<EllMtx*>{mat1.get(), mat2.get()},
         mat1->get_num_stored_elements_per_row());
 
@@ -301,19 +297,15 @@ TYPED_TEST(Ell, CanBeConstructedFromEllMatrices)
 
 TYPED_TEST(Ell, CanBeConstructedFromEllMatricesByDuplication)
 {
-    using value_type = typename TestFixture::value_type;
-    using index_type = int;
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
     using EllMtx = typename TestFixture::EllMtx;
-    using size_type = gko::size_type;
     auto mat1 =
         gko::initialize<EllMtx>({{1.0, 0.0, 0.0}, {0.0, 2.0, 0.0}}, this->exec);
-    auto bat_m =
-        gko::batch::create_from_item<gko::batch::matrix::Ell<value_type>>(
-            this->exec,
-            std::vector<EllMtx*>{mat1.get(), mat1.get(), mat1.get()},
-            mat1->get_num_stored_elements_per_row());
+    auto bat_m = gko::batch::create_from_item<BatchEllMtx>(
+        this->exec, std::vector<EllMtx*>{mat1.get(), mat1.get(), mat1.get()},
+        mat1->get_num_stored_elements_per_row());
 
-    auto m = gko::batch::create_from_item<gko::batch::matrix::Ell<value_type>>(
+    auto m = gko::batch::create_from_item<BatchEllMtx>(
         this->exec, 3, mat1.get(), mat1->get_num_stored_elements_per_row());
 
     GKO_ASSERT_BATCH_MTX_NEAR(bat_m.get(), m.get(), 1e-14);
@@ -322,26 +314,23 @@ TYPED_TEST(Ell, CanBeConstructedFromEllMatricesByDuplication)
 
 TYPED_TEST(Ell, CanBeConstructedByDuplicatingEllMatrices)
 {
-    using value_type = typename TestFixture::value_type;
-    using index_type = int;
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
     using EllMtx = typename TestFixture::EllMtx;
-    using size_type = gko::size_type;
     auto mat1 = gko::initialize<EllMtx>({{-1.0, 0.0, 0.0}, {0.0, 2.5, 0.0}},
                                         this->exec);
     auto mat2 =
         gko::initialize<EllMtx>({{1.0, 0.0, 0.0}, {0.0, 2.0, 0.0}}, this->exec);
 
-    auto m = gko::batch::create_from_item<gko::batch::matrix::Ell<value_type>>(
+    auto m = gko::batch::create_from_item<BatchEllMtx>(
         this->exec, std::vector<EllMtx*>{mat1.get(), mat2.get()},
         mat1->get_num_stored_elements_per_row());
-    auto m_ref =
-        gko::batch::create_from_item<gko::batch::matrix::Ell<value_type>>(
-            this->exec,
-            std::vector<EllMtx*>{mat1.get(), mat2.get(), mat1.get(), mat2.get(),
-                                 mat1.get(), mat2.get()},
-            mat1->get_num_stored_elements_per_row());
-
-    auto m2 = gko::batch::duplicate<gko::batch::matrix::Ell<value_type>>(
+    auto m_ref = gko::batch::create_from_item<BatchEllMtx>(
+        this->exec,
+        std::vector<EllMtx*>{mat1.get(), mat2.get(), mat1.get(), mat2.get(),
+                             mat1.get(), mat2.get()},
+        mat1->get_num_stored_elements_per_row());
+
+    auto m2 = gko::batch::duplicate<BatchEllMtx>(
         this->exec, 3, m.get(), mat1->get_num_stored_elements_per_row());
 
     GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 1e-14);
@@ -350,17 +339,14 @@ TYPED_TEST(Ell, CanBeConstructedByDuplicatingEllMatrices)
 
 TYPED_TEST(Ell, CanBeUnbatchedIntoEllMatrices)
 {
-    using value_type = typename TestFixture::value_type;
-    using index_type = int;
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
     using EllMtx = typename TestFixture::EllMtx;
-    using size_type = gko::size_type;
     auto mat1 = gko::initialize<EllMtx>({{-1.0, 0.0, 0.0}, {0.0, 2.5, 3.5}},
                                         this->exec);
     auto mat2 =
         gko::initialize<EllMtx>({{1.0, 0.0, 0.0}, {0.0, 2.0, 3.0}}, this->exec);
 
-    auto ell_mats = gko::batch::unbatch<gko::batch::matrix::Ell<value_type>>(
-        this->sp_mtx.get());
+    auto ell_mats = gko::batch::unbatch<BatchEllMtx>(this->sp_mtx.get());
 
     GKO_ASSERT_MTX_NEAR(ell_mats[0].get(), mat1.get(), 0.);
     GKO_ASSERT_MTX_NEAR(ell_mats[1].get(), mat2.get(), 0.);
@@ -370,10 +356,12 @@ TYPED_TEST(Ell, CanBeUnbatchedIntoEllMatrices)
 TYPED_TEST(Ell, CanBeListConstructed)
 {
     using value_type = typename TestFixture::value_type;
-    using index_type = int;
+    using index_type = typename TestFixture::index_type;
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
+    using EllMtx = typename TestFixture::EllMtx;
 
-    auto m = gko::batch::initialize<gko::batch::matrix::Ell<TypeParam>>(
-        {{0.0, -1.0}, {1.0, 0.0}}, this->exec);
+    auto m = gko::batch::initialize<BatchEllMtx>({{0.0, -1.0}, {1.0, 0.0}},
+                                                 this->exec);
 
     ASSERT_EQ(m->get_num_batch_items(), 2);
     ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 1));
@@ -391,10 +379,11 @@ TYPED_TEST(Ell, CanBeListConstructed)
 TYPED_TEST(Ell, CanBeListConstructedByCopies)
 {
     using value_type = typename TestFixture::value_type;
-    using index_type = int;
+    using index_type = typename TestFixture::index_type;
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
 
-    auto m = gko::batch::initialize<gko::batch::matrix::Ell<TypeParam>>(
-        2, I<value_type>({0.0, -1.0}), this->exec, 1);
+    auto m = gko::batch::initialize<BatchEllMtx>(2, I<value_type>({0.0, -1.0}),
+                                                 this->exec, 1);
 
     ASSERT_EQ(m->get_num_batch_items(), 2);
     ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 1));
@@ -412,10 +401,11 @@ TYPED_TEST(Ell, CanBeListConstructedByCopies)
 TYPED_TEST(Ell, CanBeDoubleListConstructed)
 {
     using value_type = typename TestFixture::value_type;
-    using index_type = int;
+    using index_type = typename TestFixture::index_type;
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
     using T = value_type;
 
-    auto m = gko::batch::initialize<gko::batch::matrix::Ell<TypeParam>>(
+    auto m = gko::batch::initialize<BatchEllMtx>(
         // clang-format off
         {{I<T>{1.0, 0.0, 0.0},
           I<T>{2.0, 0.0, 3.0},
@@ -454,15 +444,15 @@ TYPED_TEST(Ell, CanBeDoubleListConstructed)
 TYPED_TEST(Ell, CanBeReadFromMatrixData)
 {
     using value_type = typename TestFixture::value_type;
-    using index_type = int;
+    using index_type = typename TestFixture::index_type;
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
     auto vec_data = std::vector<gko::matrix_data<value_type, index_type>>{};
     vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
         {2, 3}, {{0, 0, -1.0}, {1, 1, 2.5}, {1, 2, 3.5}}));
     vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
         {2, 3}, {{0, 0, 1.0}, {1, 1, 2.0}, {1, 2, 3.0}}));
 
-    auto m = gko::batch::read<value_type, index_type,
-                              gko::batch::matrix::Ell<value_type>>(this->exec,
+    auto m = gko::batch::read<value_type, index_type, BatchEllMtx>(this->exec,
                                                                    vec_data, 2);
 
     this->assert_equal_to_original_sparse_mtx(m.get());
@@ -472,11 +462,11 @@ TYPED_TEST(Ell, CanBeReadFromMatrixData)
 TYPED_TEST(Ell, GeneratesCorrectMatrixData)
 {
     using value_type = typename TestFixture::value_type;
-    using index_type = int;
+    using index_type = typename TestFixture::index_type;
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
     using tpl = typename gko::matrix_data<TypeParam>::nonzero_type;
 
-    auto data = gko::batch::write<value_type, index_type,
-                                  gko::batch::matrix::Ell<value_type>>(
+    auto data = gko::batch::write<value_type, index_type, BatchEllMtx>(
         this->sp_mtx.get());
 
     ASSERT_EQ(data[0].size, gko::dim<2>(2, 3));
diff --git a/core/test/utils/matrix_generator.hpp b/core/test/utils/matrix_generator.hpp
index 8a82ae744e7..7490a24bbe5 100644
--- a/core/test/utils/matrix_generator.hpp
+++ b/core/test/utils/matrix_generator.hpp
@@ -232,7 +232,7 @@ std::unique_ptr<MatrixType> fill_random_matrix_with_sparsity_pattern(
     using index_type = IndexType;
 
     GKO_ASSERT(row_idxs.get_num_elems() == col_idxs.get_num_elems());
-    GKO_ASSERT(row_idxs.get_num_elems() < (num_rows * num_cols));
+    GKO_ASSERT(row_idxs.get_num_elems() <= (num_rows * num_cols));
     auto result = MatrixType::create(exec, std::forward<MatrixArgs>(args)...);
     result->read(fill_random_matrix_data<value_type, index_type>(
         num_rows, num_cols, row_idxs, col_idxs,
diff --git a/cuda/matrix/batch_struct.hpp b/cuda/matrix/batch_struct.hpp
index e2db1ea6e97..4a2a1835961 100644
--- a/cuda/matrix/batch_struct.hpp
+++ b/cuda/matrix/batch_struct.hpp
@@ -91,34 +91,34 @@ get_batch_struct(batch::matrix::Dense<ValueType>* const op)
 /**
  * Generates an immutable uniform batch struct from a batch of ell matrices.
  */
-template <typename ValueType>
-inline batch::matrix::ell::uniform_batch<const cuda_type<ValueType>>
-get_batch_struct(const batch::matrix::Ell<ValueType, int32>* const op)
+template <typename ValueType, typename IndexType>
+inline batch::matrix::ell::uniform_batch<const cuda_type<ValueType>, IndexType>
+get_batch_struct(const batch::matrix::Ell<ValueType, IndexType>* const op)
 {
     return {as_cuda_type(op->get_const_values()),
             op->get_const_col_idxs(),
             op->get_num_batch_items(),
-            static_cast<int32>(op->get_common_size()[0]),
-            static_cast<int32>(op->get_common_size()[0]),
-            static_cast<int32>(op->get_common_size()[1]),
-            static_cast<int32>(op->get_num_stored_elements_per_row())};
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[1]),
+            static_cast<IndexType>(op->get_num_stored_elements_per_row())};
 }
 
 
 /**
  * Generates a uniform batch struct from a batch of ell matrices.
  */
-template <typename ValueType>
-inline batch::matrix::ell::uniform_batch<cuda_type<ValueType>> get_batch_struct(
-    batch::matrix::Ell<ValueType, int32>* const op)
+template <typename ValueType, typename IndexType>
+inline batch::matrix::ell::uniform_batch<cuda_type<ValueType>, IndexType>
+get_batch_struct(batch::matrix::Ell<ValueType, IndexType>* const op)
 {
     return {as_cuda_type(op->get_values()),
             op->get_col_idxs(),
             op->get_num_batch_items(),
-            static_cast<int32>(op->get_common_size()[0]),
-            static_cast<int32>(op->get_common_size()[0]),
-            static_cast<int32>(op->get_common_size()[1]),
-            static_cast<int32>(op->get_num_stored_elements_per_row())};
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[1]),
+            static_cast<IndexType>(op->get_num_stored_elements_per_row())};
 }
 
 
diff --git a/dpcpp/matrix/batch_ell_kernels.dp.cpp b/dpcpp/matrix/batch_ell_kernels.dp.cpp
index fca265eceb0..e4d2421a42f 100644
--- a/dpcpp/matrix/batch_ell_kernels.dp.cpp
+++ b/dpcpp/matrix/batch_ell_kernels.dp.cpp
@@ -81,6 +81,7 @@ void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
 
     const auto num_batch_items = mat->get_num_batch_items();
     auto device = exec->get_queue()->get_device();
+    // TODO: use runtime selection of group size based on num_rows.
     auto group_size =
         device.get_info<sycl::info::device::max_work_group_size>();
 
@@ -134,6 +135,7 @@ void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
 
     const auto num_batch_items = mat_ub.num_batch_items;
     auto device = exec->get_queue()->get_device();
+    // TODO: use runtime selection of group size based on num_rows.
     auto group_size =
         device.get_info<sycl::info::device::max_work_group_size>();
 
diff --git a/dpcpp/matrix/batch_ell_kernels.hpp.inc b/dpcpp/matrix/batch_ell_kernels.hpp.inc
index e6501bafaba..553e0aa1f3c 100644
--- a/dpcpp/matrix/batch_ell_kernels.hpp.inc
+++ b/dpcpp/matrix/batch_ell_kernels.hpp.inc
@@ -30,9 +30,9 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-template <typename ValueType>
+template <typename ValueType, typename IndexType>
 __dpct_inline__ void simple_apply_kernel(
-    const gko::batch::matrix::ell::batch_item<const ValueType>& mat,
+    const gko::batch::matrix::ell::batch_item<const ValueType, IndexType>& mat,
     const gko::batch::multi_vector::batch_item<const ValueType>& b,
     const gko::batch::multi_vector::batch_item<ValueType>& x,
     sycl::nd_item<3>& item_ct1)
@@ -42,37 +42,38 @@ __dpct_inline__ void simple_apply_kernel(
         auto temp = zero<ValueType>();
         for (size_type idx = 0; idx < mat.num_stored_elems_per_row; idx++) {
             const auto col_idx = mat.col_idxs[tidx + idx * mat.stride];
-            if (col_idx == invalid_index<int>()) {
+            if (col_idx != invalid_index<IndexType>()) {
                 break;
-                else temp += mat.values[tidx + idx * mat.stride] *
-                             b.values[col_idx * b.stride];
-            }
-            x.values[tidx * x.stride] = temp;
+            } else
+                temp += mat.values[tidx + idx * mat.stride] *
+                        b.values[col_idx * b.stride];
         }
+        x.values[tidx * x.stride] = temp;
     }
+}
 
 
-    template <typename ValueType>
-    __dpct_inline__ void advanced_apply_kernel(
-        const gko::batch::multi_vector::batch_item<const ValueType>& alpha,
-        const gko::batch::matrix::ell::batch_item<const ValueType>& mat,
-        const gko::batch::multi_vector::batch_item<const ValueType>& b,
-        const gko::batch::multi_vector::batch_item<const ValueType>& beta,
-        const gko::batch::multi_vector::batch_item<ValueType>& x,
-        sycl::nd_item<3>& item_ct1)
-    {
-        for (int tidx = item_ct1.get_local_linear_id(); tidx < mat.num_rows;
-             tidx += item_ct1.get_local_range().size()) {
-            auto temp = zero<ValueType>();
-            for (size_type idx = 0; idx < mat.num_stored_elems_per_row; idx++) {
-                const auto col_idx = mat.col_idxs[tidx + idx * mat.stride];
-                if (col_idx == invalid_index<int>()) {
-                    break;
-                    else temp += alpha.values[0] *
-                                 mat.values[tidx + idx * mat.stride] *
-                                 b.values[col_idx * b.stride];
-                }
-                x.values[tidx * x.stride] =
-                    temp + beta.values[0] * x.values[tidx * x.stride];
-            }
+template <typename ValueType, typename IndexType>
+__dpct_inline__ void advanced_apply_kernel(
+    const gko::batch::multi_vector::batch_item<const ValueType>& alpha,
+    const gko::batch::matrix::ell::batch_item<const ValueType, IndexType>& mat,
+    const gko::batch::multi_vector::batch_item<const ValueType>& b,
+    const gko::batch::multi_vector::batch_item<const ValueType>& beta,
+    const gko::batch::multi_vector::batch_item<ValueType>& x,
+    sycl::nd_item<3>& item_ct1)
+{
+    for (int tidx = item_ct1.get_local_linear_id(); tidx < mat.num_rows;
+         tidx += item_ct1.get_local_range().size()) {
+        auto temp = zero<ValueType>();
+        for (size_type idx = 0; idx < mat.num_stored_elems_per_row; idx++) {
+            const auto col_idx = mat.col_idxs[tidx + idx * mat.stride];
+            if (col_idx != invalid_index<IndexType>()) {
+                break;
+            } else
+                temp += alpha.values[0] * mat.values[tidx + idx * mat.stride] *
+                        b.values[col_idx * b.stride];
         }
+        x.values[tidx * x.stride] =
+            temp + beta.values[0] * x.values[tidx * x.stride];
+    }
+}
diff --git a/dpcpp/matrix/batch_struct.hpp b/dpcpp/matrix/batch_struct.hpp
index f857653e05e..fe04407d82d 100644
--- a/dpcpp/matrix/batch_struct.hpp
+++ b/dpcpp/matrix/batch_struct.hpp
@@ -90,34 +90,34 @@ inline batch::matrix::dense::uniform_batch<ValueType> get_batch_struct(
 /**
  * Generates an immutable uniform batch struct from a batch of ell matrices.
  */
-template <typename ValueType>
-inline batch::matrix::ell::uniform_batch<const ValueType> get_batch_struct(
-    const batch::matrix::Ell<ValueType, int32>* const op)
+template <typename ValueType, typename IndexType>
+inline batch::matrix::ell::uniform_batch<const ValueType, IndexType>
+get_batch_struct(const batch::matrix::Ell<ValueType, IndexType>* const op)
 {
     return {op->get_const_values(),
             op->get_const_col_idxs(),
             op->get_num_batch_items(),
-            static_cast<int32>(op->get_common_size()[0]),
-            static_cast<int32>(op->get_common_size()[0]),
-            static_cast<int32>(op->get_common_size()[1]),
-            static_cast<int32>(op->get_num_stored_elements_per_row())};
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[1]),
+            static_cast<IndexType>(op->get_num_stored_elements_per_row())};
 }
 
 
 /**
  * Generates a uniform batch struct from a batch of ell matrices.
  */
-template <typename ValueType>
-inline batch::matrix::ell::uniform_batch<ValueType> get_batch_struct(
-    batch::matrix::Ell<ValueType, int32>* const op)
+template <typename ValueType, typename IndexType>
+inline batch::matrix::ell::uniform_batch<ValueType, IndexType> get_batch_struct(
+    batch::matrix::Ell<ValueType, IndexType>* const op)
 {
     return {op->get_values(),
             op->get_col_idxs(),
             op->get_num_batch_items(),
-            static_cast<int32>(op->get_common_size()[0]),
-            static_cast<int32>(op->get_common_size()[0]),
-            static_cast<int32>(op->get_common_size()[1]),
-            static_cast<int32>(op->get_num_stored_elements_per_row())};
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[1]),
+            static_cast<IndexType>(op->get_num_stored_elements_per_row())};
 }
 
 
diff --git a/hip/matrix/batch_struct.hip.hpp b/hip/matrix/batch_struct.hip.hpp
index 6f15b2d966a..e35f13f1249 100644
--- a/hip/matrix/batch_struct.hip.hpp
+++ b/hip/matrix/batch_struct.hip.hpp
@@ -91,34 +91,34 @@ get_batch_struct(batch::matrix::Dense<ValueType>* const op)
 /**
  * Generates an immutable uniform batch struct from a batch of ell matrices.
  */
-template <typename ValueType>
-inline batch::matrix::ell::uniform_batch<const hip_type<ValueType>>
-get_batch_struct(const batch::matrix::Ell<ValueType, int32>* const op)
+template <typename ValueType, typename IndexType>
+inline batch::matrix::ell::uniform_batch<const hip_type<ValueType>, IndexType>
+get_batch_struct(const batch::matrix::Ell<ValueType, IndexType>* const op)
 {
     return {as_hip_type(op->get_const_values()),
             op->get_const_col_idxs(),
             op->get_num_batch_items(),
-            static_cast<int32>(op->get_common_size()[0]),
-            static_cast<int32>(op->get_common_size()[0]),
-            static_cast<int32>(op->get_common_size()[1]),
-            static_cast<int32>(op->get_num_stored_elements_per_row())};
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[1]),
+            static_cast<IndexType>(op->get_num_stored_elements_per_row())};
 }
 
 
 /**
  * Generates a uniform batch struct from a batch of ell matrices.
  */
-template <typename ValueType>
-inline batch::matrix::ell::uniform_batch<hip_type<ValueType>> get_batch_struct(
-    batch::matrix::Ell<ValueType, int32>* const op)
+template <typename ValueType, typename IndexType>
+inline batch::matrix::ell::uniform_batch<hip_type<ValueType>, IndexType>
+get_batch_struct(batch::matrix::Ell<ValueType, IndexType>* const op)
 {
     return {as_hip_type(op->get_values()),
             op->get_col_idxs(),
             op->get_num_batch_items(),
-            static_cast<int32>(op->get_common_size()[0]),
-            static_cast<int32>(op->get_common_size()[0]),
-            static_cast<int32>(op->get_common_size()[1]),
-            static_cast<int32>(op->get_num_stored_elements_per_row())};
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[1]),
+            static_cast<IndexType>(op->get_num_stored_elements_per_row())};
 }
 
 
diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp
index 9a4b8d5cf1d..405603269ff 100644
--- a/include/ginkgo/core/base/batch_multi_vector.hpp
+++ b/include/ginkgo/core/base/batch_multi_vector.hpp
@@ -52,14 +52,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 namespace gko {
 namespace batch {
-namespace matrix {
-
-
-template <typename ValueType>
-class Dense;
-
-
-}
 
 
 /**
@@ -90,21 +82,17 @@ class MultiVector
     : public EnablePolymorphicObject<MultiVector<ValueType>>,
       public EnablePolymorphicAssignment<MultiVector<ValueType>>,
       public EnableCreateMethod<MultiVector<ValueType>>,
-      public ConvertibleTo<MultiVector<next_precision<ValueType>>>,
-      public ConvertibleTo<matrix::Dense<ValueType>> {
+      public ConvertibleTo<MultiVector<next_precision<ValueType>>> {
     friend class EnableCreateMethod<MultiVector>;
     friend class EnablePolymorphicObject<MultiVector>;
     friend class MultiVector<to_complex<ValueType>>;
     friend class MultiVector<next_precision<ValueType>>;
-    friend class matrix::Dense<ValueType>;
 
 public:
     using EnablePolymorphicAssignment<MultiVector>::convert_to;
     using EnablePolymorphicAssignment<MultiVector>::move_to;
     using ConvertibleTo<MultiVector<next_precision<ValueType>>>::convert_to;
     using ConvertibleTo<MultiVector<next_precision<ValueType>>>::move_to;
-    using ConvertibleTo<matrix::Dense<ValueType>>::convert_to;
-    using ConvertibleTo<matrix::Dense<ValueType>>::move_to;
 
     using value_type = ValueType;
     using index_type = int32;
@@ -126,10 +114,6 @@ class MultiVector
 
     void move_to(MultiVector<next_precision<ValueType>>* result) override;
 
-    void convert_to(matrix::Dense<ValueType>* result) const override;
-
-    void move_to(matrix::Dense<ValueType>* result) override;
-
     /**
      * Creates a mutable view (of matrix::Dense type) of one item of the Batch
      * MultiVector object. Does not perform any deep copies, but only returns a
diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp
index 7f3ce5890e4..cbec04482a3 100644
--- a/include/ginkgo/core/matrix/batch_dense.hpp
+++ b/include/ginkgo/core/matrix/batch_dense.hpp
@@ -306,7 +306,6 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
                size.get_common_size()[1];
     }
 
-protected:
     /**
      * Creates an uninitialized Dense matrix of the specified size.
      *
@@ -362,7 +361,6 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
                                idx % this->get_common_size()[1]);
     }
 
-private:
     array<value_type> values_;
 };
 
diff --git a/include/ginkgo/core/matrix/batch_ell.hpp b/include/ginkgo/core/matrix/batch_ell.hpp
index be49e2cff41..943f63bfdd7 100644
--- a/include/ginkgo/core/matrix/batch_ell.hpp
+++ b/include/ginkgo/core/matrix/batch_ell.hpp
@@ -67,6 +67,8 @@ namespace matrix {
  * batch is the same and therefore only a single copy of the sparsity pattern is
  * stored.
  *
+ * @note Currently only IndexType of int32 is supported.
+ *
  * @tparam ValueType  value precision of matrix elements
  * @tparam IndexType  index precision of matrix elements
  *
@@ -83,6 +85,8 @@ class Ell final
     friend class EnablePolymorphicObject<Ell, BatchLinOp>;
     friend class Ell<to_complex<ValueType>, IndexType>;
     friend class Ell<next_precision<ValueType>, IndexType>;
+    static_assert(std::is_same<decltype(IndexType), int32>::value,
+                  "IndexType must be a 32 bit integer");
 
 public:
     using EnableBatchLinOp<Ell>::convert_to;
@@ -315,8 +319,6 @@ class Ell final
                num_elems_per_row;
     }
 
-
-protected:
     /**
      * Creates an uninitialized Ell matrix of the specified size.
      *
@@ -369,7 +371,6 @@ class Ell final
                     const MultiVector<value_type>* beta,
                     MultiVector<value_type>* x) const;
 
-private:
     index_type num_elems_per_row_;
     array<value_type> values_;
     array<index_type> col_idxs_;
diff --git a/reference/matrix/batch_ell_kernels.hpp.inc b/reference/matrix/batch_ell_kernels.hpp.inc
index 44de2a57af9..979df1a19bd 100644
--- a/reference/matrix/batch_ell_kernels.hpp.inc
+++ b/reference/matrix/batch_ell_kernels.hpp.inc
@@ -30,9 +30,9 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-template <typename ValueType>
+template <typename ValueType, typename IndexType>
 inline void simple_apply_kernel(
-    const gko::batch::matrix::ell::batch_item<const ValueType>& a,
+    const gko::batch::matrix::ell::batch_item<const ValueType, IndexType>& a,
     const gko::batch::multi_vector::batch_item<const ValueType>& b,
     const gko::batch::multi_vector::batch_item<ValueType>& c)
 {
@@ -43,19 +43,21 @@ inline void simple_apply_kernel(
         for (auto k = 0; k < a.num_stored_elems_per_row; ++k) {
             auto val = a.values[row + k * a.stride];
             auto col = a.col_idxs[row + k * a.stride];
-            for (int j = 0; j < c.num_rhs; ++j) {
-                c.values[row * c.stride + j] +=
-                    val * b.values[col * b.stride + j];
+            if (col != invalid_index<IndexType>()) {
+                for (int j = 0; j < c.num_rhs; ++j) {
+                    c.values[row * c.stride + j] +=
+                        val * b.values[col * b.stride + j];
+                }
             }
         }
     }
 }
 
 
-template <typename ValueType>
+template <typename ValueType, typename IndexType>
 inline void advanced_apply_kernel(
     const ValueType alpha,
-    const gko::batch::matrix::ell::batch_item<const ValueType>& a,
+    const gko::batch::matrix::ell::batch_item<const ValueType, IndexType>& a,
     const gko::batch::multi_vector::batch_item<const ValueType>& b,
     const ValueType beta,
     const gko::batch::multi_vector::batch_item<ValueType>& c)
@@ -67,9 +69,11 @@ inline void advanced_apply_kernel(
         for (auto k = 0; k < a.num_stored_elems_per_row; ++k) {
             auto val = a.values[row + k * a.stride];
             auto col = a.col_idxs[row + k * a.stride];
-            for (int j = 0; j < b.num_rhs; ++j) {
-                c.values[row * c.stride + j] +=
-                    alpha * val * b.values[col * b.stride + j];
+            if (col != invalid_index<IndexType>()) {
+                for (int j = 0; j < b.num_rhs; ++j) {
+                    c.values[row * c.stride + j] +=
+                        alpha * val * b.values[col * b.stride + j];
+                }
             }
         }
     }
diff --git a/reference/matrix/batch_struct.hpp b/reference/matrix/batch_struct.hpp
index fb0e08c16f5..bb7680d1493 100644
--- a/reference/matrix/batch_struct.hpp
+++ b/reference/matrix/batch_struct.hpp
@@ -94,34 +94,34 @@ inline batch::matrix::dense::uniform_batch<ValueType> get_batch_struct(
 /**
  * Generates an immutable uniform batch struct from a batch of ell matrices.
  */
-template <typename ValueType>
-inline batch::matrix::ell::uniform_batch<const ValueType> get_batch_struct(
-    const batch::matrix::Ell<ValueType, int32>* const op)
+template <typename ValueType, typename IndexType>
+inline batch::matrix::ell::uniform_batch<const ValueType, IndexType>
+get_batch_struct(const batch::matrix::Ell<ValueType, IndexType>* const op)
 {
     return {op->get_const_values(),
             op->get_const_col_idxs(),
             op->get_num_batch_items(),
-            static_cast<int32>(op->get_common_size()[0]),
-            static_cast<int32>(op->get_common_size()[0]),
-            static_cast<int32>(op->get_common_size()[1]),
-            static_cast<int32>(op->get_num_stored_elements_per_row())};
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[1]),
+            static_cast<IndexType>(op->get_num_stored_elements_per_row())};
 }
 
 
 /**
  * Generates a uniform batch struct from a batch of ell matrices.
  */
-template <typename ValueType>
-inline batch::matrix::ell::uniform_batch<ValueType> get_batch_struct(
-    batch::matrix::Ell<ValueType, int32>* const op)
+template <typename ValueType, typename IndexType>
+inline batch::matrix::ell::uniform_batch<ValueType, IndexType> get_batch_struct(
+    batch::matrix::Ell<ValueType, IndexType>* const op)
 {
     return {op->get_values(),
             op->get_col_idxs(),
             op->get_num_batch_items(),
-            static_cast<int32>(op->get_common_size()[0]),
-            static_cast<int32>(op->get_common_size()[0]),
-            static_cast<int32>(op->get_common_size()[1]),
-            static_cast<int32>(op->get_num_stored_elements_per_row())};
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[0]),
+            static_cast<IndexType>(op->get_common_size()[1]),
+            static_cast<IndexType>(op->get_num_stored_elements_per_row())};
 }
 
 
diff --git a/reference/test/matrix/batch_ell_kernels.cpp b/reference/test/matrix/batch_ell_kernels.cpp
index 8a5806a9513..81f189c3e02 100644
--- a/reference/test/matrix/batch_ell_kernels.cpp
+++ b/reference/test/matrix/batch_ell_kernels.cpp
@@ -123,8 +123,8 @@ TYPED_TEST(Ell, AppliesToBatchMultiVector)
     this->mtx_00->apply(this->b_00.get(), this->x_00.get());
     this->mtx_01->apply(this->b_01.get(), this->x_01.get());
     auto res = gko::batch::unbatch<gko::batch::MultiVector<T>>(this->x_0.get());
-    GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.);
-    GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.);
+    GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), r<T>::value);
+    GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), r<T>::value);
 }
 
 
@@ -149,8 +149,8 @@ TYPED_TEST(Ell, AppliesLinearCombinationToBatchMultiVector)
     this->mtx_01->apply(alpha1.get(), this->b_01.get(), beta1.get(),
                         this->x_01.get());
     auto res = gko::batch::unbatch<gko::batch::MultiVector<T>>(this->x_0.get());
-    GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.);
-    GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.);
+    GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), r<T>::value);
+    GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), r<T>::value);
 }
 
 

From 0532b2b30ddec5546062da327983bb41ab2706e2 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Thu, 12 Oct 2023 12:05:08 +0200
Subject: [PATCH 395/583] Add apply temp clone, review updates

Co-authored-by: Tobias Ribizel <ribizel@kit.edu>
---
 core/matrix/batch_dense.cpp                | 64 +++++++++++++++++++++-
 core/matrix/batch_ell.cpp                  | 62 +++++++++++++++++++++
 dpcpp/matrix/batch_ell_kernels.hpp.inc     |  4 +-
 include/ginkgo/core/matrix/batch_dense.hpp | 38 ++++++++-----
 include/ginkgo/core/matrix/batch_ell.hpp   | 55 +++++++++++--------
 5 files changed, 182 insertions(+), 41 deletions(-)

diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp
index 758635cea7f..8390d43fd7d 100644
--- a/core/matrix/batch_dense.cpp
+++ b/core/matrix/batch_dense.cpp
@@ -124,11 +124,72 @@ Dense<ValueType>::Dense(std::shared_ptr<const Executor> exec,
 {}
 
 
+template <typename ValueType>
+Dense<ValueType>* Dense<ValueType>::apply(
+    ptr_param<const MultiVector<ValueType>> b,
+    ptr_param<MultiVector<ValueType>> x)
+{
+    this->validate_application_parameters(b.get(), x.get());
+    auto exec = this->get_executor();
+    this->apply_impl(make_temporary_clone(exec, b).get(),
+                     make_temporary_clone(exec, x).get());
+    return this;
+}
+
+
+template <typename ValueType>
+const Dense<ValueType>* Dense<ValueType>::apply(
+    ptr_param<const MultiVector<ValueType>> b,
+    ptr_param<MultiVector<ValueType>> x) const
+{
+    this->validate_application_parameters(b.get(), x.get());
+    auto exec = this->get_executor();
+    this->apply_impl(make_temporary_clone(exec, b).get(),
+                     make_temporary_clone(exec, x).get());
+    return this;
+}
+
+
+template <typename ValueType>
+Dense<ValueType>* Dense<ValueType>::apply(
+    ptr_param<const MultiVector<ValueType>> alpha,
+    ptr_param<const MultiVector<ValueType>> b,
+    ptr_param<const MultiVector<ValueType>> beta,
+    ptr_param<MultiVector<ValueType>> x)
+{
+    this->validate_application_parameters(alpha.get(), b.get(), beta.get(),
+                                          x.get());
+    auto exec = this->get_executor();
+    this->apply_impl(make_temporary_clone(exec, alpha).get(),
+                     make_temporary_clone(exec, b).get(),
+                     make_temporary_clone(exec, beta).get(),
+                     make_temporary_clone(exec, x).get());
+    return this;
+}
+
+
+template <typename ValueType>
+const Dense<ValueType>* Dense<ValueType>::apply(
+    ptr_param<const MultiVector<ValueType>> alpha,
+    ptr_param<const MultiVector<ValueType>> b,
+    ptr_param<const MultiVector<ValueType>> beta,
+    ptr_param<MultiVector<ValueType>> x) const
+{
+    this->validate_application_parameters(alpha.get(), b.get(), beta.get(),
+                                          x.get());
+    auto exec = this->get_executor();
+    this->apply_impl(make_temporary_clone(exec, alpha).get(),
+                     make_temporary_clone(exec, b).get(),
+                     make_temporary_clone(exec, beta).get(),
+                     make_temporary_clone(exec, x).get());
+    return this;
+}
+
+
 template <typename ValueType>
 void Dense<ValueType>::apply_impl(const MultiVector<ValueType>* b,
                                   MultiVector<ValueType>* x) const
 {
-    this->validate_application_parameters(b, x);
     this->get_executor()->run(dense::make_simple_apply(this, b, x));
 }
 
@@ -139,7 +200,6 @@ void Dense<ValueType>::apply_impl(const MultiVector<ValueType>* alpha,
                                   const MultiVector<ValueType>* beta,
                                   MultiVector<ValueType>* x) const
 {
-    this->validate_application_parameters(alpha, b, beta, x);
     this->get_executor()->run(
         dense::make_advanced_apply(alpha, this, b, beta, x));
 }
diff --git a/core/matrix/batch_ell.cpp b/core/matrix/batch_ell.cpp
index c9dbe6d51c9..a50b2f3e23a 100644
--- a/core/matrix/batch_ell.cpp
+++ b/core/matrix/batch_ell.cpp
@@ -140,6 +140,68 @@ Ell<ValueType, IndexType>::Ell(std::shared_ptr<const Executor> exec,
 {}
 
 
+template <typename ValueType, typename IndexType>
+Ell<ValueType, IndexType>* Ell<ValueType, IndexType>::apply(
+    ptr_param<const MultiVector<ValueType>> b,
+    ptr_param<MultiVector<ValueType>> x)
+{
+    this->validate_application_parameters(b.get(), x.get());
+    auto exec = this->get_executor();
+    this->apply_impl(make_temporary_clone(exec, b).get(),
+                     make_temporary_clone(exec, x).get());
+    return this;
+}
+
+
+template <typename ValueType, typename IndexType>
+const Ell<ValueType, IndexType>* Ell<ValueType, IndexType>::apply(
+    ptr_param<const MultiVector<ValueType>> b,
+    ptr_param<MultiVector<ValueType>> x) const
+{
+    this->validate_application_parameters(b.get(), x.get());
+    auto exec = this->get_executor();
+    this->apply_impl(make_temporary_clone(exec, b).get(),
+                     make_temporary_clone(exec, x).get());
+    return this;
+}
+
+
+template <typename ValueType, typename IndexType>
+Ell<ValueType, IndexType>* Ell<ValueType, IndexType>::apply(
+    ptr_param<const MultiVector<ValueType>> alpha,
+    ptr_param<const MultiVector<ValueType>> b,
+    ptr_param<const MultiVector<ValueType>> beta,
+    ptr_param<MultiVector<ValueType>> x)
+{
+    this->validate_application_parameters(alpha.get(), b.get(), beta.get(),
+                                          x.get());
+    auto exec = this->get_executor();
+    this->apply_impl(make_temporary_clone(exec, alpha).get(),
+                     make_temporary_clone(exec, b).get(),
+                     make_temporary_clone(exec, beta).get(),
+                     make_temporary_clone(exec, x).get());
+    return this;
+}
+
+
+template <typename ValueType, typename IndexType>
+const Ell<ValueType, IndexType>* Ell<ValueType, IndexType>::apply(
+    ptr_param<const MultiVector<ValueType>> alpha,
+    ptr_param<const MultiVector<ValueType>> b,
+    ptr_param<const MultiVector<ValueType>> beta,
+    ptr_param<MultiVector<ValueType>> x) const
+{
+    this->validate_application_parameters(alpha.get(), b.get(), beta.get(),
+                                          x.get());
+    auto exec = this->get_executor();
+    this->apply_impl(make_temporary_clone(exec, alpha).get(),
+                     make_temporary_clone(exec, b).get(),
+                     make_temporary_clone(exec, beta).get(),
+                     make_temporary_clone(exec, x).get());
+    return this;
+}
+
+
 template <typename ValueType, typename IndexType>
 void Ell<ValueType, IndexType>::apply_impl(const MultiVector<ValueType>* b,
                                            MultiVector<ValueType>* x) const
diff --git a/dpcpp/matrix/batch_ell_kernels.hpp.inc b/dpcpp/matrix/batch_ell_kernels.hpp.inc
index 553e0aa1f3c..8cdb8daa273 100644
--- a/dpcpp/matrix/batch_ell_kernels.hpp.inc
+++ b/dpcpp/matrix/batch_ell_kernels.hpp.inc
@@ -42,7 +42,7 @@ __dpct_inline__ void simple_apply_kernel(
         auto temp = zero<ValueType>();
         for (size_type idx = 0; idx < mat.num_stored_elems_per_row; idx++) {
             const auto col_idx = mat.col_idxs[tidx + idx * mat.stride];
-            if (col_idx != invalid_index<IndexType>()) {
+            if (col_idx == invalid_index<IndexType>()) {
                 break;
             } else
                 temp += mat.values[tidx + idx * mat.stride] *
@@ -67,7 +67,7 @@ __dpct_inline__ void advanced_apply_kernel(
         auto temp = zero<ValueType>();
         for (size_type idx = 0; idx < mat.num_stored_elems_per_row; idx++) {
             const auto col_idx = mat.col_idxs[tidx + idx * mat.stride];
-            if (col_idx != invalid_index<IndexType>()) {
+            if (col_idx == invalid_index<IndexType>()) {
                 break;
             } else
                 temp += alpha.values[0] * mat.values[tidx + idx * mat.stride] *
diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp
index cbec04482a3..07b862ef484 100644
--- a/include/ginkgo/core/matrix/batch_dense.hpp
+++ b/include/ginkgo/core/matrix/batch_dense.hpp
@@ -233,8 +233,8 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const value_type* get_const_values_for_item(
-        size_type batch_id) const noexcept
+    const value_type* get_const_values_for_item(size_type batch_id) const
+        noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
         return values_.get_const_data() + this->get_cumulative_offset(batch_id);
@@ -275,11 +275,8 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
      * @param b  the multi-vector to be applied to
      * @param x  the output multi-vector
      */
-    void apply(const MultiVector<value_type>* b,
-               MultiVector<value_type>* x) const
-    {
-        this->apply_impl(b, x);
-    }
+    Dense* apply(ptr_param<const MultiVector<value_type>> b,
+                 ptr_param<MultiVector<value_type>> x);
 
     /**
      * Apply the matrix to a multi-vector with a linear combination of the given
@@ -291,13 +288,26 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
      * @param beta   the scalar to scale the x vector with
      * @param x      the output multi-vector
      */
-    void apply(const MultiVector<value_type>* alpha,
-               const MultiVector<value_type>* b,
-               const MultiVector<value_type>* beta,
-               MultiVector<value_type>* x) const
-    {
-        this->apply_impl(alpha, b, beta, x);
-    }
+    Dense* apply(ptr_param<const MultiVector<value_type>> alpha,
+                 ptr_param<const MultiVector<value_type>> b,
+                 ptr_param<const MultiVector<value_type>> beta,
+                 ptr_param<MultiVector<value_type>> x);
+
+    /**
+     * @copydoc apply(const MultiVector<value_type>*, MultiVector<value_type>*)
+     */
+    const Dense* apply(ptr_param<const MultiVector<value_type>> b,
+                       ptr_param<MultiVector<value_type>> x) const;
+
+    /**
+     * @copydoc apply(const MultiVector<value_type>*, const
+     * MultiVector<value_type>*, const MultiVector<value_type>*,
+     * MultiVector<value_type>*)
+     */
+    const Dense* apply(ptr_param<const MultiVector<value_type>> alpha,
+                       ptr_param<const MultiVector<value_type>> b,
+                       ptr_param<const MultiVector<value_type>> beta,
+                       ptr_param<MultiVector<value_type>> x) const;
 
 private:
     inline size_type compute_num_elems(const batch_dim<2>& size)
diff --git a/include/ginkgo/core/matrix/batch_ell.hpp b/include/ginkgo/core/matrix/batch_ell.hpp
index 943f63bfdd7..5be94f1035e 100644
--- a/include/ginkgo/core/matrix/batch_ell.hpp
+++ b/include/ginkgo/core/matrix/batch_ell.hpp
@@ -85,7 +85,7 @@ class Ell final
     friend class EnablePolymorphicObject<Ell, BatchLinOp>;
     friend class Ell<to_complex<ValueType>, IndexType>;
     friend class Ell<next_precision<ValueType>, IndexType>;
-    static_assert(std::is_same<decltype(IndexType), int32>::value,
+    static_assert(std::is_same<IndexType, int32>::value,
                   "IndexType must be a 32 bit integer");
 
 public:
@@ -94,8 +94,7 @@ class Ell final
 
     using value_type = ValueType;
     using index_type = IndexType;
-    using transposed_type = Ell<ValueType, IndexType>;
-    using unbatch_type = gko::matrix::Ell<ValueType, IndexType>;
+    using unbatch_type = gko::matrix::Ell<value_type, index_type>;
     using absolute_type = remove_complex<Ell>;
     using complex_type = to_complex<Ell>;
 
@@ -223,8 +222,8 @@ class Ell final
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const index_type* get_const_col_idxs_for_item(
-        size_type batch_id) const noexcept
+    const index_type* get_const_col_idxs_for_item(size_type batch_id) const
+        noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
         return col_idxs_.get_const_data();
@@ -252,8 +251,8 @@ class Ell final
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const value_type* get_const_values_for_item(
-        size_type batch_id) const noexcept
+    const value_type* get_const_values_for_item(size_type batch_id) const
+        noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
         return values_.get_const_data() +
@@ -277,8 +276,8 @@ class Ell final
     static std::unique_ptr<const Ell> create_const(
         std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
         const index_type num_elems_per_row,
-        gko::detail::const_array_view<ValueType>&& values,
-        gko::detail::const_array_view<IndexType>&& col_idxs);
+        gko::detail::const_array_view<value_type>&& values,
+        gko::detail::const_array_view<index_type>&& col_idxs);
 
     /**
      * Apply the matrix to a multi-vector. Represents the matrix vector
@@ -287,29 +286,39 @@ class Ell final
      * @param b  the multi-vector to be applied to
      * @param x  the output multi-vector
      */
-    void apply(const MultiVector<value_type>* b,
-               MultiVector<value_type>* x) const
-    {
-        this->apply_impl(b, x);
-    }
+    Ell* apply(ptr_param<const MultiVector<value_type>> b,
+               ptr_param<MultiVector<value_type>> x);
 
     /**
      * Apply the matrix to a multi-vector with a linear combination of the given
-     * input vector. Represents the matrix vector multiplication, x = alpha* A *
-     * b + beta * x, where x and b are both multi-vectors.
+     * input vector. Represents the matrix vector multiplication, x = alpha * A
+     * * b + beta * x, where x and b are both multi-vectors.
      *
      * @param alpha  the scalar to scale the matrix-vector product with
      * @param b      the multi-vector to be applied to
      * @param beta   the scalar to scale the x vector with
      * @param x      the output multi-vector
      */
-    void apply(const MultiVector<value_type>* alpha,
-               const MultiVector<value_type>* b,
-               const MultiVector<value_type>* beta,
-               MultiVector<value_type>* x) const
-    {
-        this->apply_impl(alpha, b, beta, x);
-    }
+    Ell* apply(ptr_param<const MultiVector<value_type>> alpha,
+               ptr_param<const MultiVector<value_type>> b,
+               ptr_param<const MultiVector<value_type>> beta,
+               ptr_param<MultiVector<value_type>> x);
+
+    /**
+     * @copydoc apply(const MultiVector<value_type>*, MultiVector<value_type>*)
+     */
+    const Ell* apply(ptr_param<const MultiVector<value_type>> b,
+                     ptr_param<MultiVector<value_type>> x) const;
+
+    /**
+     * @copydoc apply(const MultiVector<value_type>*, const
+     * MultiVector<value_type>*, const MultiVector<value_type>*,
+     * MultiVector<value_type>*)
+     */
+    const Ell* apply(ptr_param<const MultiVector<value_type>> alpha,
+                     ptr_param<const MultiVector<value_type>> b,
+                     ptr_param<const MultiVector<value_type>> beta,
+                     ptr_param<MultiVector<value_type>> x) const;
 
 private:
     size_type compute_num_elems(const batch_dim<2>& size,

From ca459356009f5556493e39c0073ebf8a2ab60bab Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Thu, 12 Oct 2023 10:56:01 +0000
Subject: [PATCH 396/583] Format files

Co-authored-by: Pratik Nayak <pratikvn@pm.me>
---
 dpcpp/matrix/batch_ell_kernels.dp.cpp      | 54 +++++++++++-----------
 include/ginkgo/core/matrix/batch_dense.hpp |  4 +-
 include/ginkgo/core/matrix/batch_ell.hpp   |  8 ++--
 3 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/dpcpp/matrix/batch_ell_kernels.dp.cpp b/dpcpp/matrix/batch_ell_kernels.dp.cpp
index e4d2421a42f..5a69bbd3d5d 100644
--- a/dpcpp/matrix/batch_ell_kernels.dp.cpp
+++ b/dpcpp/matrix/batch_ell_kernels.dp.cpp
@@ -97,17 +97,17 @@ void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
     // Launch a kernel that has nbatches blocks, each block has max group size
     exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=
-        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                            config::warp_size)]] {
-                auto group = item_ct1.get_group();
-                auto group_id = group.get_group_linear_id();
-                const auto mat_b =
-                    batch::matrix::extract_batch_item(mat_ub, group_id);
-                const auto b_b = batch::extract_batch_item(b_ub, group_id);
-                const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                simple_apply_kernel(mat_b, b_b, x_b, item_ct1);
-            });
+            sycl_nd_range(grid, block),
+            [=](sycl::nd_item<3> item_ct1)
+                [[sycl::reqd_sub_group_size(config::warp_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto mat_b =
+                        batch::matrix::extract_batch_item(mat_ub, group_id);
+                    const auto b_b = batch::extract_batch_item(b_ub, group_id);
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    simple_apply_kernel(mat_b, b_b, x_b, item_ct1);
+                });
     });
 }
 
@@ -145,22 +145,22 @@ void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
     // Launch a kernel that has nbatches blocks, each block has max group size
     exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=
-        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                            config::warp_size)]] {
-                auto group = item_ct1.get_group();
-                auto group_id = group.get_group_linear_id();
-                const auto mat_b =
-                    batch::matrix::extract_batch_item(mat_ub, group_id);
-                const auto b_b = batch::extract_batch_item(b_ub, group_id);
-                const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                const auto alpha_b =
-                    batch::extract_batch_item(alpha_ub, group_id);
-                const auto beta_b =
-                    batch::extract_batch_item(beta_ub, group_id);
-                advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b,
-                                      item_ct1);
-            });
+            sycl_nd_range(grid, block),
+            [=](sycl::nd_item<3> item_ct1)
+                [[sycl::reqd_sub_group_size(config::warp_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto mat_b =
+                        batch::matrix::extract_batch_item(mat_ub, group_id);
+                    const auto b_b = batch::extract_batch_item(b_ub, group_id);
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    const auto alpha_b =
+                        batch::extract_batch_item(alpha_ub, group_id);
+                    const auto beta_b =
+                        batch::extract_batch_item(beta_ub, group_id);
+                    advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b,
+                                          item_ct1);
+                });
     });
 }
 
diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp
index 07b862ef484..0b2bcc49166 100644
--- a/include/ginkgo/core/matrix/batch_dense.hpp
+++ b/include/ginkgo/core/matrix/batch_dense.hpp
@@ -233,8 +233,8 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const value_type* get_const_values_for_item(size_type batch_id) const
-        noexcept
+    const value_type* get_const_values_for_item(
+        size_type batch_id) const noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
         return values_.get_const_data() + this->get_cumulative_offset(batch_id);
diff --git a/include/ginkgo/core/matrix/batch_ell.hpp b/include/ginkgo/core/matrix/batch_ell.hpp
index 5be94f1035e..a6381f90f10 100644
--- a/include/ginkgo/core/matrix/batch_ell.hpp
+++ b/include/ginkgo/core/matrix/batch_ell.hpp
@@ -222,8 +222,8 @@ class Ell final
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const index_type* get_const_col_idxs_for_item(size_type batch_id) const
-        noexcept
+    const index_type* get_const_col_idxs_for_item(
+        size_type batch_id) const noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
         return col_idxs_.get_const_data();
@@ -251,8 +251,8 @@ class Ell final
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const value_type* get_const_values_for_item(size_type batch_id) const
-        noexcept
+    const value_type* get_const_values_for_item(
+        size_type batch_id) const noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
         return values_.get_const_data() +

From a74b018fae4c15c04145dbe55e6d75d7215d542f Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Thu, 12 Oct 2023 16:06:05 +0200
Subject: [PATCH 397/583] Fix sparsity issues and review updates

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
Co-authored-by: Yu-Hsiang Tsai <yhmtsai@gmail.com>
---
 core/base/batch_utilities.hpp          | 55 ++++++++++++++++++++++----
 core/matrix/batch_ell.cpp              |  2 -
 core/test/matrix/batch_ell.cpp         | 32 ++++++++++++---
 core/test/utils/batch_helpers.hpp      |  7 ++--
 core/test/utils/matrix_generator.hpp   |  9 +++--
 dpcpp/matrix/batch_ell_kernels.hpp.inc | 10 +++--
 6 files changed, 89 insertions(+), 26 deletions(-)

diff --git a/core/base/batch_utilities.hpp b/core/base/batch_utilities.hpp
index 7204c78a552..3117b35d0f4 100644
--- a/core/base/batch_utilities.hpp
+++ b/core/base/batch_utilities.hpp
@@ -39,6 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/exception.hpp>
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/executor.hpp>
@@ -46,6 +47,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/matrix_data.hpp>
 #include <ginkgo/core/base/utils.hpp>
 #include <ginkgo/core/base/utils_helper.hpp>
+#include <ginkgo/core/matrix/batch_dense.hpp>
 
 
 namespace gko {
@@ -126,6 +128,36 @@ auto unbatch(const InputType* batch_object)
 }
 
 
+namespace detail {
+
+
+template <typename ValueType, typename IndexType>
+void assert_same_sparsity_in_batched_data(
+    const std::vector<gko::matrix_data<ValueType, IndexType>>& data)
+{
+    auto num_nnz = data[0].nonzeros.size();
+    auto base_data = data[0];
+    base_data.ensure_row_major_order();
+    for (int b = 0; b < data.size(); ++b) {
+        if (data[b].nonzeros.size() != num_nnz) {
+            GKO_NOT_IMPLEMENTED;
+        }
+        auto temp_data = data[b];
+        temp_data.ensure_row_major_order();
+        for (int nnz = 0; nnz < num_nnz; ++nnz) {
+            if (temp_data.nonzeros[nnz].row != base_data.nonzeros[nnz].row ||
+                temp_data.nonzeros[nnz].column !=
+                    base_data.nonzeros[nnz].column) {
+                GKO_NOT_IMPLEMENTED;
+            }
+        }
+    }
+}
+
+
+}  // namespace detail
+
+
 template <typename ValueType, typename IndexType, typename OutputType,
           typename... TArgs>
 std::unique_ptr<OutputType> read(
@@ -134,6 +166,12 @@ std::unique_ptr<OutputType> read(
     TArgs&&... create_args)
 {
     auto num_batch_items = data.size();
+    // Throw if all the items in the batch dont have same sparsity.
+    if (!std::is_same<OutputType,
+                      gko::batch::matrix::Dense<ValueType>>::value &&
+        !std::is_same<OutputType, gko::batch::MultiVector<ValueType>>::value) {
+        detail::assert_same_sparsity_in_batched_data(data);
+    }
     auto tmp =
         OutputType::create(exec, batch_dim<2>(num_batch_items, data[0].size),
                            std::forward<TArgs>(create_args)...);
@@ -163,7 +201,8 @@ std::vector<gko::matrix_data<ValueType, IndexType>> write(
 
 
 /**
- * Creates and initializes a batch of single column-vectors.
+ * Creates and initializes a batch of the specified Matrix type with a single
+ * column-vector.
  *
  * @tparam Matrix  matrix type to initialize (It has to implement the
  *                 read<Matrix> function)
@@ -278,15 +317,16 @@ std::unique_ptr<Matrix> initialize(
 
 
 /**
- * Creates and initializes a batch single column-vector by making copies of the
- * single input column vector.
+ * Creates and initializes a batch of specified Matrix type with a single
+ * column-vector by making copies of the single input column vector.
  *
  * @tparam Matrix  matrix type to initialize (It has to implement the
  *                 read<Matrix> function)
  * @tparam TArgs  argument types for Matrix::create method
  *                (not including the implied Executor as the first argument)
  *
- * @param num_vectors  The number of times the input vector is to be duplicated
+ * @param num_batch_items  The number of times the input vector is to be
+ *                         duplicated
  * @param vals  values used to initialize each vector in the temp. batch
  * @param exec  Executor associated with the matrix
  * @param create_args  additional arguments passed to Matrix::create, not
@@ -297,21 +337,20 @@ std::unique_ptr<Matrix> initialize(
  */
 template <typename Matrix, typename... TArgs>
 std::unique_ptr<Matrix> initialize(
-    const size_type num_vectors,
+    const size_type num_batch_items,
     std::initializer_list<typename Matrix::value_type> vals,
     std::shared_ptr<const Executor> exec, TArgs&&... create_args)
 {
     using value_type = typename Matrix::value_type;
     using index_type = typename Matrix::index_type;
     using mat_data = gko::matrix_data<value_type, index_type>;
-    size_type num_batch_items = num_vectors;
     GKO_THROW_IF_INVALID(num_batch_items > 0 && vals.size() > 0,
                          "Input data is empty");
     auto num_rows = begin(vals) ? vals.size() : 0;
     auto common_size = dim<2>(num_rows, 1);
     auto b_size = batch_dim<2>(num_batch_items, common_size);
     std::vector<mat_data> input_mat_data(num_batch_items, common_size);
-    for (size_type batch = 0; batch < num_vectors; batch++) {
+    for (size_type batch = 0; batch < num_batch_items; batch++) {
         input_mat_data[batch].nonzeros.reserve(num_rows);
         size_type idx = 0;
         for (const auto& elem : vals) {
@@ -334,7 +373,7 @@ std::unique_ptr<Matrix> initialize(
  * @tparam TArgs  argument types for Matrix::create method
  *                (not including the implied Executor as the first argument)
  *
- * @param num_batch_items The number of times the input matrix is duplicated
+ * @param num_batch_items  The number of times the input matrix is duplicated
  * @param vals  values used to initialize each matrix in the temp. batch
  * @param exec  Executor associated to the matrix
  * @param create_args  additional arguments passed to Matrix::create, not
diff --git a/core/matrix/batch_ell.cpp b/core/matrix/batch_ell.cpp
index a50b2f3e23a..5626860e7ee 100644
--- a/core/matrix/batch_ell.cpp
+++ b/core/matrix/batch_ell.cpp
@@ -206,7 +206,6 @@ template <typename ValueType, typename IndexType>
 void Ell<ValueType, IndexType>::apply_impl(const MultiVector<ValueType>* b,
                                            MultiVector<ValueType>* x) const
 {
-    this->validate_application_parameters(b, x);
     this->get_executor()->run(ell::make_simple_apply(this, b, x));
 }
 
@@ -217,7 +216,6 @@ void Ell<ValueType, IndexType>::apply_impl(const MultiVector<ValueType>* alpha,
                                            const MultiVector<ValueType>* beta,
                                            MultiVector<ValueType>* x) const
 {
-    this->validate_application_parameters(alpha, b, beta, x);
     this->get_executor()->run(
         ell::make_advanced_apply(alpha, this, b, beta, x));
 }
diff --git a/core/test/matrix/batch_ell.cpp b/core/test/matrix/batch_ell.cpp
index c36a877ac14..e04ed96bf4c 100644
--- a/core/test/matrix/batch_ell.cpp
+++ b/core/test/matrix/batch_ell.cpp
@@ -360,7 +360,7 @@ TYPED_TEST(Ell, CanBeListConstructed)
     using BatchEllMtx = typename TestFixture::BatchEllMtx;
     using EllMtx = typename TestFixture::EllMtx;
 
-    auto m = gko::batch::initialize<BatchEllMtx>({{0.0, -1.0}, {1.0, 0.0}},
+    auto m = gko::batch::initialize<BatchEllMtx>({{0.0, -1.0}, {0.0, -5.0}},
                                                  this->exec);
 
     ASSERT_EQ(m->get_num_batch_items(), 2);
@@ -369,10 +369,10 @@ TYPED_TEST(Ell, CanBeListConstructed)
     ASSERT_EQ(m->get_num_stored_elements_per_row(), 1);
     EXPECT_EQ(m->get_values()[0], value_type{0.0});
     EXPECT_EQ(m->get_values()[1], value_type{-1.0});
-    EXPECT_EQ(m->get_values()[2], value_type{1.0});
-    EXPECT_EQ(m->get_values()[3], value_type{0.0});
-    EXPECT_EQ(m->get_col_idxs()[0], index_type{0});
-    EXPECT_EQ(m->get_col_idxs()[1], index_type{-1});
+    EXPECT_EQ(m->get_values()[2], value_type{0.0});
+    EXPECT_EQ(m->get_values()[3], value_type{-5.0});
+    EXPECT_EQ(m->get_col_idxs()[0], index_type{-1});
+    EXPECT_EQ(m->get_col_idxs()[1], index_type{0});
 }
 
 
@@ -459,6 +459,28 @@ TYPED_TEST(Ell, CanBeReadFromMatrixData)
 }
 
 
+TYPED_TEST(Ell, CanBeDetectDataWithDifferentSparsity)
+{
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
+    auto vec_data = std::vector<gko::matrix_data<value_type, index_type>>{};
+    vec_data.emplace_back(
+        gko::matrix_data<value_type, index_type>({2, 3}, {
+                                                             {0, 0, -1.0},
+                                                             {1, 1, 2.5},
+                                                             {1, 2, 0.5},
+                                                             {2, 2, -3.0},
+                                                         }));
+    vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
+        {2, 3}, {{0, 0, 1.0}, {1, 1, 2.0}, {1, 2, 3.0}}));
+
+    EXPECT_THROW(
+        gko::batch::detail::assert_same_sparsity_in_batched_data(vec_data),
+        gko::NotImplemented);
+}
+
+
 TYPED_TEST(Ell, GeneratesCorrectMatrixData)
 {
     using value_type = typename TestFixture::value_type;
diff --git a/core/test/utils/batch_helpers.hpp b/core/test/utils/batch_helpers.hpp
index 0b6197b5062..5b1fa60ed36 100644
--- a/core/test/utils/batch_helpers.hpp
+++ b/core/test/utils/batch_helpers.hpp
@@ -95,9 +95,10 @@ std::unique_ptr<MatrixType> generate_random_batch_matrix(
                         .copy_to_array();
 
     for (size_type b = 0; b < num_batch_items; b++) {
-        auto rand_mat = fill_random_matrix_with_sparsity_pattern<
-            typename MatrixType::unbatch_type, index_type>(
-            num_rows, num_cols, row_idxs, col_idxs, value_dist, engine, exec);
+        auto rand_mat =
+            fill_random_matrix<typename MatrixType::unbatch_type, index_type>(
+                num_rows, num_cols, row_idxs, col_idxs, value_dist, engine,
+                exec);
         result->create_view_for_item(b)->copy_from(rand_mat.get());
     }
 
diff --git a/core/test/utils/matrix_generator.hpp b/core/test/utils/matrix_generator.hpp
index 7490a24bbe5..d5370c6ef6a 100644
--- a/core/test/utils/matrix_generator.hpp
+++ b/core/test/utils/matrix_generator.hpp
@@ -206,23 +206,24 @@ generate_random_device_matrix_data(gko::size_type num_rows,
  * @tparam MatrixType  type of matrix to generate (must implement
  *                     the interface `ReadableFromMatrixData<>` and provide
  *                     matching `value_type` and `index_type` type aliases)
+ * @tparam IndexType  the type for row and column indices
+ * @tparam ValueDistribution  type of value distribution
+ * @tparam Engine  type of random engine
  *
  * @param num_rows  number of rows
  * @param num_cols  number of columns
- * @param value_dist  distribution of matrix values
  * @param row_idxs  the row indices of the matrix
  * @param col_idxs  the column indices of the matrix
+ * @param value_dist  distribution of matrix values
  * @param exec  executor where the matrix should be allocated
  * @param args  additional arguments for the matrix constructor
  *
- * The other (template) parameters match generate_random_matrix_data.
- *
  * @return the unique pointer of MatrixType
  */
 template <typename MatrixType = matrix::Dense<>,
           typename IndexType = typename MatrixType::index_type,
           typename ValueDistribution, typename Engine, typename... MatrixArgs>
-std::unique_ptr<MatrixType> fill_random_matrix_with_sparsity_pattern(
+std::unique_ptr<MatrixType> fill_random_matrix(
     size_type num_rows, size_type num_cols,
     const gko::array<IndexType>& row_idxs,
     const gko::array<IndexType>& col_idxs, ValueDistribution&& value_dist,
diff --git a/dpcpp/matrix/batch_ell_kernels.hpp.inc b/dpcpp/matrix/batch_ell_kernels.hpp.inc
index 8cdb8daa273..64d71710dbb 100644
--- a/dpcpp/matrix/batch_ell_kernels.hpp.inc
+++ b/dpcpp/matrix/batch_ell_kernels.hpp.inc
@@ -44,9 +44,10 @@ __dpct_inline__ void simple_apply_kernel(
             const auto col_idx = mat.col_idxs[tidx + idx * mat.stride];
             if (col_idx == invalid_index<IndexType>()) {
                 break;
-            } else
+            } else {
                 temp += mat.values[tidx + idx * mat.stride] *
                         b.values[col_idx * b.stride];
+            }
         }
         x.values[tidx * x.stride] = temp;
     }
@@ -69,11 +70,12 @@ __dpct_inline__ void advanced_apply_kernel(
             const auto col_idx = mat.col_idxs[tidx + idx * mat.stride];
             if (col_idx == invalid_index<IndexType>()) {
                 break;
-            } else
-                temp += alpha.values[0] * mat.values[tidx + idx * mat.stride] *
+            } else {
+                temp += mat.values[tidx + idx * mat.stride] *
                         b.values[col_idx * b.stride];
+            }
         }
         x.values[tidx * x.stride] =
-            temp + beta.values[0] * x.values[tidx * x.stride];
+            alpha.values[0] * temp + beta.values[0] * x.values[tidx * x.stride];
     }
 }

From 9f077180d3db898ded7075d89cef1a2f216a3d46 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Mon, 16 Oct 2023 16:28:44 +0200
Subject: [PATCH 398/583] vector mat data with duplication

---
 core/base/batch_utilities.hpp | 39 ++++++++++++++++-------------------
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/core/base/batch_utilities.hpp b/core/base/batch_utilities.hpp
index 3117b35d0f4..e6a52250565 100644
--- a/core/base/batch_utilities.hpp
+++ b/core/base/batch_utilities.hpp
@@ -349,17 +349,16 @@ std::unique_ptr<Matrix> initialize(
     auto num_rows = begin(vals) ? vals.size() : 0;
     auto common_size = dim<2>(num_rows, 1);
     auto b_size = batch_dim<2>(num_batch_items, common_size);
-    std::vector<mat_data> input_mat_data(num_batch_items, common_size);
-    for (size_type batch = 0; batch < num_batch_items; batch++) {
-        input_mat_data[batch].nonzeros.reserve(num_rows);
-        size_type idx = 0;
-        for (const auto& elem : vals) {
-            if (elem != zero<value_type>()) {
-                input_mat_data[batch].nonzeros.emplace_back(idx, 0, elem);
-            }
-            ++idx;
+    mat_data single_mat_data(common_size);
+    single_mat_data.nonzeros.reserve(num_rows);
+    size_type idx = 0;
+    for (const auto& elem : vals) {
+        if (elem != zero<value_type>()) {
+            single_mat_data.nonzeros.emplace_back(idx, 0, elem);
         }
+        ++idx;
     }
+    std::vector<mat_data> input_mat_data(num_batch_items, single_mat_data);
     return read<value_type, index_type, Matrix>(
         exec, input_mat_data, std::forward<TArgs>(create_args)...);
 }
@@ -397,21 +396,19 @@ std::unique_ptr<Matrix> initialize(
     auto common_size = dim<2>(begin(vals) ? vals.size() : 0,
                               begin(vals) ? begin(vals)->size() : 0);
     batch_dim<2> b_size(num_batch_items, common_size);
-    std::vector<mat_data> input_mat_data(num_batch_items, common_size);
-    for (size_type batch = 0; batch < num_batch_items; batch++) {
-        size_type ridx = 0;
-        for (const auto& row : vals) {
-            size_type cidx = 0;
-            for (const auto& elem : row) {
-                if (elem != zero<value_type>()) {
-                    input_mat_data[batch].nonzeros.emplace_back(ridx, cidx,
-                                                                elem);
-                }
-                ++cidx;
+    mat_data single_mat_data(common_size);
+    size_type ridx = 0;
+    for (const auto& row : vals) {
+        size_type cidx = 0;
+        for (const auto& elem : row) {
+            if (elem != zero<value_type>()) {
+                single_mat_data.nonzeros.emplace_back(ridx, cidx, elem);
             }
-            ++ridx;
+            ++cidx;
         }
+        ++ridx;
     }
+    std::vector<mat_data> input_mat_data(num_batch_items, single_mat_data);
     return read<value_type, index_type, Matrix>(
         exec, input_mat_data, std::forward<TArgs>(create_args)...);
 }

From 70044e458d5f0b5cb8e40e7aa1b39b024638369a Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Mon, 16 Oct 2023 21:49:43 +0200
Subject: [PATCH 399/583] Review updates

Co-authored-by: Yu-Hsiang Tsai <yhmtsai@gmail.com>
Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 core/base/batch_utilities.hpp              | 44 +++++++++++++++++-----
 core/matrix/batch_dense.cpp                |  8 ----
 core/matrix/batch_ell.cpp                  | 11 ------
 core/test/matrix/batch_ell.cpp             | 23 ++++++++++-
 include/ginkgo/core/matrix/batch_dense.hpp | 13 +------
 include/ginkgo/core/matrix/batch_ell.hpp   | 17 ++-------
 6 files changed, 62 insertions(+), 54 deletions(-)

diff --git a/core/base/batch_utilities.hpp b/core/base/batch_utilities.hpp
index e6a52250565..febfd59b636 100644
--- a/core/base/batch_utilities.hpp
+++ b/core/base/batch_utilities.hpp
@@ -54,6 +54,9 @@ namespace gko {
 namespace batch {
 
 
+/**
+ * Duplicate a given input batch object.
+ */
 template <typename OutputType, typename... TArgs>
 std::unique_ptr<OutputType> duplicate(std::shared_ptr<const Executor> exec,
                                       size_type num_duplications,
@@ -78,6 +81,9 @@ std::unique_ptr<OutputType> duplicate(std::shared_ptr<const Executor> exec,
 }
 
 
+/**
+ * Duplicate a monolithic matrix and create a batch object.
+ */
 template <typename OutputType, typename... TArgs>
 std::unique_ptr<OutputType> create_from_item(
     std::shared_ptr<const Executor> exec, const size_type num_duplications,
@@ -96,6 +102,13 @@ std::unique_ptr<OutputType> create_from_item(
 }
 
 
+/**
+ * Create a batch object from a vector of monolithic object that share the same
+ * sparsity pattern.
+ *
+ * @note The sparsity of the elements in the input vector of matrices needs to
+ * be the same. TODO: Check for same sparsity among the different input items
+ */
 template <typename OutputType, typename... TArgs>
 std::unique_ptr<OutputType> create_from_item(
     std::shared_ptr<const Executor> exec,
@@ -115,6 +128,9 @@ std::unique_ptr<OutputType> create_from_item(
 }
 
 
+/**
+ * Unbatch a batched object into a vector of items of its unbatch_type.
+ */
 template <typename InputType>
 auto unbatch(const InputType* batch_object)
 {
@@ -135,19 +151,20 @@ template <typename ValueType, typename IndexType>
 void assert_same_sparsity_in_batched_data(
     const std::vector<gko::matrix_data<ValueType, IndexType>>& data)
 {
-    auto num_nnz = data[0].nonzeros.size();
-    auto base_data = data[0];
+    auto num_nnz = data.at(0).nonzeros.size();
+    auto base_data = data.at(0);
     base_data.ensure_row_major_order();
-    for (int b = 0; b < data.size(); ++b) {
+    for (int b = 1; b < data.size(); ++b) {
         if (data[b].nonzeros.size() != num_nnz) {
             GKO_NOT_IMPLEMENTED;
         }
-        auto temp_data = data[b];
+        auto temp_data = data.at(b);
         temp_data.ensure_row_major_order();
         for (int nnz = 0; nnz < num_nnz; ++nnz) {
-            if (temp_data.nonzeros[nnz].row != base_data.nonzeros[nnz].row ||
-                temp_data.nonzeros[nnz].column !=
-                    base_data.nonzeros[nnz].column) {
+            if (temp_data.nonzeros.at(nnz).row !=
+                    base_data.nonzeros.at(nnz).row ||
+                temp_data.nonzeros.at(nnz).column !=
+                    base_data.nonzeros.at(nnz).column) {
                 GKO_NOT_IMPLEMENTED;
             }
         }
@@ -158,6 +175,10 @@ void assert_same_sparsity_in_batched_data(
 }  // namespace detail
 
 
+/**
+ * Create a batch object from a vector of gko::matrix_data objects. Each item of
+ * the vector needs to store the same sparsity pattern.
+ */
 template <typename ValueType, typename IndexType, typename OutputType,
           typename... TArgs>
 std::unique_ptr<OutputType> read(
@@ -173,7 +194,7 @@ std::unique_ptr<OutputType> read(
         detail::assert_same_sparsity_in_batched_data(data);
     }
     auto tmp =
-        OutputType::create(exec, batch_dim<2>(num_batch_items, data[0].size),
+        OutputType::create(exec, batch_dim<2>(num_batch_items, data.at(0).size),
                            std::forward<TArgs>(create_args)...);
 
     for (size_type b = 0; b < num_batch_items; ++b) {
@@ -184,6 +205,9 @@ std::unique_ptr<OutputType> read(
 }
 
 
+/**
+ * Write a vector of matrix data objects from an input batch object.
+ */
 template <typename ValueType, typename IndexType, typename OutputType>
 std::vector<gko::matrix_data<ValueType, IndexType>> write(
     const OutputType* mvec)
@@ -201,8 +225,8 @@ std::vector<gko::matrix_data<ValueType, IndexType>> write(
 
 
 /**
- * Creates and initializes a batch of the specified Matrix type with a single
- * column-vector.
+ * Creates and initializes a batch of the specified Matrix type from a series of
+ * single column-vectors.
  *
  * @tparam Matrix  matrix type to initialize (It has to implement the
  *                 read<Matrix> function)
diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp
index 8390d43fd7d..58c7fa25cea 100644
--- a/core/matrix/batch_dense.cpp
+++ b/core/matrix/batch_dense.cpp
@@ -96,14 +96,6 @@ Dense<ValueType>::create_const_view_for_item(size_type item_id) const
 }
 
 
-template <typename ValueType>
-std::unique_ptr<Dense<ValueType>> Dense<ValueType>::create_with_config_of(
-    ptr_param<const Dense<ValueType>> other)
-{
-    return Dense<ValueType>::create(other->get_executor(), other->get_size());
-}
-
-
 template <typename ValueType>
 std::unique_ptr<const Dense<ValueType>> Dense<ValueType>::create_const(
     std::shared_ptr<const Executor> exec, const batch_dim<2>& sizes,
diff --git a/core/matrix/batch_ell.cpp b/core/matrix/batch_ell.cpp
index 5626860e7ee..88863a05dd4 100644
--- a/core/matrix/batch_ell.cpp
+++ b/core/matrix/batch_ell.cpp
@@ -100,17 +100,6 @@ Ell<ValueType, IndexType>::create_const_view_for_item(size_type item_id) const
 }
 
 
-template <typename ValueType, typename IndexType>
-std::unique_ptr<Ell<ValueType, IndexType>>
-Ell<ValueType, IndexType>::create_with_config_of(
-    ptr_param<const Ell<ValueType, IndexType>> other)
-{
-    return Ell<ValueType, IndexType>::create(
-        other->get_executor(), other->get_size(),
-        other->get_num_stored_elements_per_row());
-}
-
-
 template <typename ValueType, typename IndexType>
 std::unique_ptr<const Ell<ValueType, IndexType>>
 Ell<ValueType, IndexType>::create_const(
diff --git a/core/test/matrix/batch_ell.cpp b/core/test/matrix/batch_ell.cpp
index e04ed96bf4c..2c8166aa023 100644
--- a/core/test/matrix/batch_ell.cpp
+++ b/core/test/matrix/batch_ell.cpp
@@ -459,7 +459,7 @@ TYPED_TEST(Ell, CanBeReadFromMatrixData)
 }
 
 
-TYPED_TEST(Ell, CanBeDetectDataWithDifferentSparsity)
+TYPED_TEST(Ell, ThrowsForDataWithDifferentNnz)
 {
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
@@ -481,6 +481,27 @@ TYPED_TEST(Ell, CanBeDetectDataWithDifferentSparsity)
 }
 
 
+TYPED_TEST(Ell, ThrowsForDataWithDifferentSparsity)
+{
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using BatchEllMtx = typename TestFixture::BatchEllMtx;
+    auto vec_data = std::vector<gko::matrix_data<value_type, index_type>>{};
+    vec_data.emplace_back(
+        gko::matrix_data<value_type, index_type>({2, 3}, {
+                                                             {0, 0, -1.0},
+                                                             {1, 1, 2.5},
+                                                             {2, 2, -3.0},
+                                                         }));
+    vec_data.emplace_back(gko::matrix_data<value_type, index_type>(
+        {2, 3}, {{0, 0, 1.0}, {1, 1, 2.0}, {1, 2, 3.0}}));
+
+    EXPECT_THROW(
+        gko::batch::detail::assert_same_sparsity_in_batched_data(vec_data),
+        gko::NotImplemented);
+}
+
+
 TYPED_TEST(Ell, GeneratesCorrectMatrixData)
 {
     using value_type = typename TestFixture::value_type;
diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp
index 0b2bcc49166..5a1697afec4 100644
--- a/include/ginkgo/core/matrix/batch_dense.hpp
+++ b/include/ginkgo/core/matrix/batch_dense.hpp
@@ -93,15 +93,6 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
     using absolute_type = remove_complex<Dense>;
     using complex_type = to_complex<Dense>;
 
-    /**
-     * Creates a Dense matrix with the configuration of another Dense
-     * matrix.
-     *
-     * @param other  The other matrix whose configuration needs to copied.
-     */
-    static std::unique_ptr<Dense> create_with_config_of(
-        ptr_param<const Dense> other);
-
     void convert_to(Dense<next_precision<ValueType>>* result) const override;
 
     void move_to(Dense<next_precision<ValueType>>* result) override;
@@ -233,8 +224,8 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const value_type* get_const_values_for_item(
-        size_type batch_id) const noexcept
+    const value_type* get_const_values_for_item(size_type batch_id) const
+        noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
         return values_.get_const_data() + this->get_cumulative_offset(batch_id);
diff --git a/include/ginkgo/core/matrix/batch_ell.hpp b/include/ginkgo/core/matrix/batch_ell.hpp
index a6381f90f10..a02d6c81fe8 100644
--- a/include/ginkgo/core/matrix/batch_ell.hpp
+++ b/include/ginkgo/core/matrix/batch_ell.hpp
@@ -98,15 +98,6 @@ class Ell final
     using absolute_type = remove_complex<Ell>;
     using complex_type = to_complex<Ell>;
 
-    /**
-     * Creates a Ell matrix with the configuration of another Ell
-     * matrix.
-     *
-     * @param other  The other matrix whose configuration needs to copied.
-     */
-    static std::unique_ptr<Ell> create_with_config_of(
-        ptr_param<const Ell> other);
-
     void convert_to(
         Ell<next_precision<ValueType>, IndexType>* result) const override;
 
@@ -222,8 +213,8 @@ class Ell final
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const index_type* get_const_col_idxs_for_item(
-        size_type batch_id) const noexcept
+    const index_type* get_const_col_idxs_for_item(size_type batch_id) const
+        noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
         return col_idxs_.get_const_data();
@@ -251,8 +242,8 @@ class Ell final
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const value_type* get_const_values_for_item(
-        size_type batch_id) const noexcept
+    const value_type* get_const_values_for_item(size_type batch_id) const
+        noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
         return values_.get_const_data() +

From e13b07b551e08c9831b07c993ec5b26064835d08 Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Mon, 16 Oct 2023 19:52:38 +0000
Subject: [PATCH 400/583] Format files

Co-authored-by: Pratik Nayak <pratikvn@pm.me>
---
 include/ginkgo/core/matrix/batch_dense.hpp | 4 ++--
 include/ginkgo/core/matrix/batch_ell.hpp   | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp
index 5a1697afec4..47230c24e32 100644
--- a/include/ginkgo/core/matrix/batch_dense.hpp
+++ b/include/ginkgo/core/matrix/batch_dense.hpp
@@ -224,8 +224,8 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const value_type* get_const_values_for_item(size_type batch_id) const
-        noexcept
+    const value_type* get_const_values_for_item(
+        size_type batch_id) const noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
         return values_.get_const_data() + this->get_cumulative_offset(batch_id);
diff --git a/include/ginkgo/core/matrix/batch_ell.hpp b/include/ginkgo/core/matrix/batch_ell.hpp
index a02d6c81fe8..fa00a0631fd 100644
--- a/include/ginkgo/core/matrix/batch_ell.hpp
+++ b/include/ginkgo/core/matrix/batch_ell.hpp
@@ -213,8 +213,8 @@ class Ell final
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const index_type* get_const_col_idxs_for_item(size_type batch_id) const
-        noexcept
+    const index_type* get_const_col_idxs_for_item(
+        size_type batch_id) const noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
         return col_idxs_.get_const_data();
@@ -242,8 +242,8 @@ class Ell final
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const value_type* get_const_values_for_item(size_type batch_id) const
-        noexcept
+    const value_type* get_const_values_for_item(
+        size_type batch_id) const noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
         return values_.get_const_data() +

From b2f7b473288a0f818f1c3237ce16c450672b006f Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 17 Oct 2023 10:11:59 +0200
Subject: [PATCH 401/583] Review updates

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 core/base/batch_utilities.hpp |  3 +++
 core/matrix/batch_ell.cpp     | 13 ++-----------
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/core/base/batch_utilities.hpp b/core/base/batch_utilities.hpp
index febfd59b636..b4e380a4162 100644
--- a/core/base/batch_utilities.hpp
+++ b/core/base/batch_utilities.hpp
@@ -151,6 +151,9 @@ template <typename ValueType, typename IndexType>
 void assert_same_sparsity_in_batched_data(
     const std::vector<gko::matrix_data<ValueType, IndexType>>& data)
 {
+    if (data.empty()) {
+        return;
+    }
     auto num_nnz = data.at(0).nonzeros.size();
     auto base_data = data.at(0);
     base_data.ensure_row_major_order();
diff --git a/core/matrix/batch_ell.cpp b/core/matrix/batch_ell.cpp
index 88863a05dd4..b2987e741d9 100644
--- a/core/matrix/batch_ell.cpp
+++ b/core/matrix/batch_ell.cpp
@@ -147,10 +147,7 @@ const Ell<ValueType, IndexType>* Ell<ValueType, IndexType>::apply(
     ptr_param<const MultiVector<ValueType>> b,
     ptr_param<MultiVector<ValueType>> x) const
 {
-    this->validate_application_parameters(b.get(), x.get());
-    auto exec = this->get_executor();
-    this->apply_impl(make_temporary_clone(exec, b).get(),
-                     make_temporary_clone(exec, x).get());
+    this->apply(b, x);
     return this;
 }
 
@@ -180,13 +177,7 @@ const Ell<ValueType, IndexType>* Ell<ValueType, IndexType>::apply(
     ptr_param<const MultiVector<ValueType>> beta,
     ptr_param<MultiVector<ValueType>> x) const
 {
-    this->validate_application_parameters(alpha.get(), b.get(), beta.get(),
-                                          x.get());
-    auto exec = this->get_executor();
-    this->apply_impl(make_temporary_clone(exec, alpha).get(),
-                     make_temporary_clone(exec, b).get(),
-                     make_temporary_clone(exec, beta).get(),
-                     make_temporary_clone(exec, x).get());
+    this->apply(alpha, b, beta, x);
     return this;
 }
 

From e256261db28c62c65d9ad63cc6041e963da28702 Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Sun, 12 Feb 2023 20:01:04 +0100
Subject: [PATCH 402/583] Add vec mean implementation

Add vec mean tests
---
 .../matrix/dense_kernels.instantiate.cpp      |  2 +
 .../unified/matrix/dense_kernels.template.cpp | 16 +++++++
 core/device_hooks/common_kernels.inc.cpp      |  1 +
 core/distributed/vector.cpp                   | 45 +++++++++++++++++--
 core/matrix/dense.cpp                         | 33 ++++++++++++++
 core/matrix/dense_kernels.hpp                 |  7 +++
 include/ginkgo/core/distributed/vector.hpp    | 26 +++++++++++
 include/ginkgo/core/matrix/dense.hpp          | 29 ++++++++++++
 reference/matrix/dense_kernels.cpp            | 22 +++++++++
 reference/test/matrix/dense_kernels.cpp       | 16 +++++++
 test/mpi/vector.cpp                           | 21 +++++++++
 11 files changed, 215 insertions(+), 3 deletions(-)

diff --git a/common/unified/matrix/dense_kernels.instantiate.cpp b/common/unified/matrix/dense_kernels.instantiate.cpp
index bf20c8a19b6..f34d05954c4 100644
--- a/common/unified/matrix/dense_kernels.instantiate.cpp
+++ b/common/unified/matrix/dense_kernels.instantiate.cpp
@@ -99,6 +99,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 // split
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
     GKO_DECLARE_DENSE_COMPUTE_SQUARED_NORM2_KERNEL);
+// split
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_MEAN_KERNEL);
 // end
 
 
diff --git a/common/unified/matrix/dense_kernels.template.cpp b/common/unified/matrix/dense_kernels.template.cpp
index b6ed5fb37e0..d7e1c08f38c 100644
--- a/common/unified/matrix/dense_kernels.template.cpp
+++ b/common/unified/matrix/dense_kernels.template.cpp
@@ -278,6 +278,22 @@ void compute_norm1(std::shared_ptr<const DefaultExecutor> exec,
 }
 
 
+template <typename ValueType>
+void compute_mean(std::shared_ptr<const DefaultExecutor> exec,
+                  const matrix::Dense<ValueType>* x,
+                  matrix::Dense<ValueType>* result, array<char>& tmp)
+{
+    using ValueType_nc = gko::remove_complex<ValueType>;
+    run_kernel_col_reduction_cached(
+        exec,
+        [] GKO_KERNEL(auto i, auto j, auto x, auto total_size) {
+            return x(i, j) / static_cast<ValueType_nc>(total_size);
+        },
+        GKO_KERNEL_REDUCE_SUM(ValueType), result->get_values(), x->get_size(),
+        tmp, x, x->get_size()[0]);
+}
+
+
 template <typename ValueType>
 void compute_max_nnz_per_row(std::shared_ptr<const DefaultExecutor> exec,
                              const matrix::Dense<ValueType>* source,
diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp
index 462675c15db..7f7b1b473a2 100644
--- a/core/device_hooks/common_kernels.inc.cpp
+++ b/core/device_hooks/common_kernels.inc.cpp
@@ -347,6 +347,7 @@ GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_DISPATCH_KERNEL);
 GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM2_KERNEL);
 GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM2_DISPATCH_KERNEL);
 GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM1_KERNEL);
+GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_MEAN_KERNEL);
 GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_SQUARED_NORM2_KERNEL);
 GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_SQRT_KERNEL);
 GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_FILL_IN_MATRIX_DATA_KERNEL);
diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp
index 001cf75b76d..f05a2df73fd 100644
--- a/core/distributed/vector.cpp
+++ b/core/distributed/vector.cpp
@@ -30,10 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include <ginkgo/core/distributed/vector.hpp>
-
-
 #include <ginkgo/core/distributed/partition.hpp>
+#include <ginkgo/core/distributed/vector.hpp>
 
 
 #include "core/distributed/vector_kernels.hpp"
@@ -573,12 +571,52 @@ void Vector<ValueType>::compute_squared_norm2(ptr_param<LinOp> result,
 }
 
 
+template <typename ValueType>
+void Vector<ValueType>::compute_mean(LinOp* result) const
+{
+    array<char> tmp{this->get_executor()};
+    this->compute_mean(result, tmp);
+}
+
+
+void Vector<ValueType>::compute_mean(LinOp* result, array<char>& tmp) const
+{
+    using MeanVector = local_vector_type;
+    const auto global_size = this->get_size()[0];
+    const auto local_size = this->get_local_vector()->get_size()[0];
+    const auto num_vecs = static_cast<int>(this->get_size()[1]);
+    GKO_ASSERT_EQUAL_DIMENSIONS(result, dim<2>(1, num_vecs));
+    auto exec = this->get_executor();
+    const auto comm = this->get_communicator();
+    auto dense_res = make_temporary_clone(exec, as<MeanVector>(result));
+    this->get_local_vector()->compute_mean(dense_res.get());
+
+    // scale by its weight ie ratio of local to global size
+    auto weight = initialize<matrix::Dense<remove_complex<ValueType>>>(
+        1, {static_cast<remove_complex<ValueType>>(local_size) / global_size},
+        this->get_executor());
+    dense_res->scale(weight.get());
+
+    exec->synchronize();
+    if (mpi::requires_host_buffer(exec, comm)) {
+        host_reduction_buffer_.init(exec->get_master(), dense_res->get_size());
+        host_reduction_buffer_->copy_from(dense_res.get());
+        comm.all_reduce(exec->get_master(),
+                        host_reduction_buffer_->get_values(), num_vecs,
+                        MPI_SUM);
+        dense_res->copy_from(host_reduction_buffer_.get());
+    } else {
+        comm.all_reduce(exec, dense_res->get_values(), num_vecs, MPI_SUM);
+    }
+}
+
 template <typename ValueType>
 ValueType& Vector<ValueType>::at_local(size_type row, size_type col) noexcept
 {
     return local_.at(row, col);
 }
 
+
 template <typename ValueType>
 ValueType Vector<ValueType>::at_local(size_type row,
                                       size_type col) const noexcept
@@ -586,6 +624,7 @@ ValueType Vector<ValueType>::at_local(size_type row,
     return local_.at(row, col);
 }
 
+
 template <typename ValueType>
 ValueType& Vector<ValueType>::at_local(size_type idx) noexcept
 {
diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp
index 17dec93c234..babb1919040 100644
--- a/core/matrix/dense.cpp
+++ b/core/matrix/dense.cpp
@@ -80,6 +80,7 @@ GKO_REGISTER_OPERATION(compute_dot, dense::compute_dot_dispatch);
 GKO_REGISTER_OPERATION(compute_conj_dot, dense::compute_conj_dot_dispatch);
 GKO_REGISTER_OPERATION(compute_norm2, dense::compute_norm2_dispatch);
 GKO_REGISTER_OPERATION(compute_norm1, dense::compute_norm1);
+GKO_REGISTER_OPERATION(compute_mean, dense::compute_mean);
 GKO_REGISTER_OPERATION(compute_squared_norm2, dense::compute_squared_norm2);
 GKO_REGISTER_OPERATION(compute_sqrt, dense::compute_sqrt);
 GKO_REGISTER_OPERATION(compute_max_nnz_per_row, dense::compute_max_nnz_per_row);
@@ -235,6 +236,14 @@ void Dense<ValueType>::compute_squared_norm2(ptr_param<LinOp> result) const
 }
 
 
+template <typename ValueType>
+void Dense<ValueType>::compute_mean(LinOp* result) const
+{
+    auto exec = this->get_executor();
+    this->compute_mean_impl(make_temporary_output_clone(exec, result).get());
+}
+
+
 template <typename ValueType>
 void Dense<ValueType>::inv_scale_impl(const LinOp* alpha)
 {
@@ -496,6 +505,20 @@ void Dense<ValueType>::compute_squared_norm2(ptr_param<LinOp> result,
 }
 
 
+template <typename ValueType>
+void Dense<ValueType>::compute_mean(LinOp* result, array<char>& tmp) const
+{
+    GKO_ASSERT_EQUAL_DIMENSIONS(result, dim<2>(1, this->get_size()[1]));
+    auto exec = this->get_executor();
+    if (tmp.get_executor() != exec) {
+        tmp.clear();
+        tmp.set_executor(exec);
+    }
+    auto dense_res = make_temporary_conversion<ValueType>(result);
+    exec->run(dense::make_compute_mean(this, dense_res.get(), tmp));
+}
+
+
 template <typename ValueType>
 void Dense<ValueType>::compute_squared_norm2_impl(LinOp* result) const
 {
@@ -505,6 +528,16 @@ void Dense<ValueType>::compute_squared_norm2_impl(LinOp* result) const
                                 tmp);
 }
 
+template <typename ValueType>
+void Dense<ValueType>::compute_mean_impl(LinOp* result) const
+{
+    GKO_ASSERT_EQUAL_DIMENSIONS(result, dim<2>(1, this->get_size()[1]));
+    auto exec = this->get_executor();
+    auto dense_res = make_temporary_conversion<ValueType>(result);
+    array<char> tmp{exec};
+    exec->run(dense::make_compute_mean(this, dense_res.get(), tmp));
+}
+
 
 template <typename ValueType>
 Dense<ValueType>& Dense<ValueType>::operator=(const Dense& other)
diff --git a/core/matrix/dense_kernels.hpp b/core/matrix/dense_kernels.hpp
index 9a487fadeda..a352aa8d7c1 100644
--- a/core/matrix/dense_kernels.hpp
+++ b/core/matrix/dense_kernels.hpp
@@ -146,6 +146,11 @@ namespace kernels {
                        matrix::Dense<remove_complex<_type>>* result, \
                        array<char>& tmp)
 
+#define GKO_DECLARE_DENSE_COMPUTE_MEAN_KERNEL(_type)               \
+    void compute_mean(std::shared_ptr<const DefaultExecutor> exec, \
+                      const matrix::Dense<_type>* x,               \
+                      matrix::Dense<_type>* result, array<char>& tmp)
+
 #define GKO_DECLARE_DENSE_FILL_IN_MATRIX_DATA_KERNEL(_type, _prec)         \
     void fill_in_matrix_data(std::shared_ptr<const DefaultExecutor> exec,  \
                              const device_matrix_data<_type, _prec>& data, \
@@ -349,6 +354,8 @@ namespace kernels {
     GKO_DECLARE_DENSE_COMPUTE_NORM2_DISPATCH_KERNEL(ValueType);             \
     template <typename ValueType>                                           \
     GKO_DECLARE_DENSE_COMPUTE_NORM1_KERNEL(ValueType);                      \
+    template <typename ValueType>                                           \
+    GKO_DECLARE_DENSE_COMPUTE_MEAN_KERNEL(ValueType);                       \
     template <typename ValueType, typename IndexType>                       \
     GKO_DECLARE_DENSE_FILL_IN_MATRIX_DATA_KERNEL(ValueType, IndexType);     \
     template <typename ValueType>                                           \
diff --git a/include/ginkgo/core/distributed/vector.hpp b/include/ginkgo/core/distributed/vector.hpp
index 61ceab8e380..e86c2ec3e61 100644
--- a/include/ginkgo/core/distributed/vector.hpp
+++ b/include/ginkgo/core/distributed/vector.hpp
@@ -404,6 +404,32 @@ class Vector
      */
     void compute_norm1(ptr_param<LinOp> result, array<char>& tmp) const;
 
+    /**
+     * Computes the column-wise mean of this (multi-)vector using a global
+     * reduction.
+     *
+     * @param result  a Dense row matrix, used to store the mean
+     *                (the number of columns in result must match the number
+     *                of columns of this)
+     * @param tmp  the temporary storage to use for partial sums during the
+     *             reduction computation. It may be resized and/or reset to the
+     *             correct executor.
+     */
+    void compute_mean(LinOp* result) const;
+
+    /**
+     * Computes the column-wise mean of this (multi-)vector using a global
+     * reduction.
+     *
+     * @param result  a Dense row matrix, used to store the mean
+     *                (the number of columns in result must match the number
+     *                of columns of this)
+     * @param tmp  the temporary storage to use for partial sums during the
+     *             reduction computation. It may be resized and/or reset to the
+     *             correct executor.
+     */
+    void compute_mean(LinOp* result, array<char>& tmp) const;
+
     /**
      * Returns a single element of the multi-vector.
      *
diff --git a/include/ginkgo/core/matrix/dense.hpp b/include/ginkgo/core/matrix/dense.hpp
index ae738d49b93..1cba8622fce 100644
--- a/include/ginkgo/core/matrix/dense.hpp
+++ b/include/ginkgo/core/matrix/dense.hpp
@@ -917,6 +917,27 @@ class Dense
      */
     void compute_squared_norm2(ptr_param<LinOp> result, array<char>& tmp) const;
 
+    /**
+     * Computes the column-wise mean of this matrix.
+     *
+     * @param result  a Dense row vector, used to store the norm
+     *                (the number of columns in the vector must match the number
+     *                of columns of this)
+     */
+    void compute_mean(LinOp* result) const;
+
+    /**
+     * Computes the column-wise mean of this matrix.
+     *
+     * @param result  a Dense row vector, used to store the norm
+     *                (the number of columns in the vector must match the
+     *                number of columns of this)
+     * @param tmp  the temporary storage to use for partial sums during the
+     *             reduction computation. It may be resized and/or reset to the
+     *             correct executor.
+     */
+    void compute_mean(LinOp* result, array<char>& tmp) const;
+
     /**
      * Create a submatrix from the original matrix.
      * Warning: defining stride for this create_submatrix method might cause
@@ -1215,6 +1236,14 @@ class Dense
      */
     virtual void compute_squared_norm2_impl(LinOp* result) const;
 
+    /**
+     * @copydoc compute_mean(LinOp*) const
+     *
+     * @deprecated  This function will be removed in the future,
+     *              we will instead always use Ginkgo's implementation.
+     */
+    virtual void compute_mean_impl(LinOp* result) const;
+
     /**
      * Resizes the matrix to the given size.
      *
diff --git a/reference/matrix/dense_kernels.cpp b/reference/matrix/dense_kernels.cpp
index ba399b0f445..df86aedd047 100644
--- a/reference/matrix/dense_kernels.cpp
+++ b/reference/matrix/dense_kernels.cpp
@@ -397,6 +397,28 @@ void compute_norm1(std::shared_ptr<const ReferenceExecutor> exec,
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM1_KERNEL);
 
 
+template <typename ValueType>
+void compute_mean(std::shared_ptr<const ReferenceExecutor> exec,
+                  const matrix::Dense<ValueType>* x,
+                  matrix::Dense<ValueType>* result, array<char>&)
+{
+    using ValueType_nc = gko::remove_complex<ValueType>;
+    for (size_type j = 0; j < x->get_size()[1]; ++j) {
+        result->at(0, j) = zero<ValueType>();
+    }
+
+    for (size_type i = 0; i < x->get_size()[0]; ++i) {
+        const ValueType_nc alpha = static_cast<ValueType_nc>(i) / (i + 1);
+        const ValueType_nc beta = static_cast<ValueType_nc>(1) / (i + 1);
+        for (size_type j = 0; j < x->get_size()[1]; ++j) {
+            result->at(0, j) = alpha * result->at(0, j) + beta * x->at(i, j);
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_MEAN_KERNEL);
+
+
 template <typename ValueType, typename IndexType>
 void fill_in_matrix_data(std::shared_ptr<const ReferenceExecutor> exec,
                          const device_matrix_data<ValueType, IndexType>& data,
diff --git a/reference/test/matrix/dense_kernels.cpp b/reference/test/matrix/dense_kernels.cpp
index 60713c815de..763cf1b6321 100644
--- a/reference/test/matrix/dense_kernels.cpp
+++ b/reference/test/matrix/dense_kernels.cpp
@@ -698,6 +698,22 @@ TYPED_TEST(Dense, ComputesNorm1Mixed)
 }
 
 
+TYPED_TEST(Dense, ComputesMean)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    using MeanVector = gko::matrix::Dense<T>;
+    auto mtx(gko::initialize<Mtx>(
+        {I<T>{1.0, 0.0}, I<T>{2.0, 3.0}, I<T>{2.0, 4.0}, I<T>{-1.0, -1.0}},
+        this->exec));
+    auto result = MeanVector::create(this->exec, gko::dim<2>{1, 2});
+
+    mtx->compute_mean(result.get());
+
+    GKO_ASSERT_MTX_NEAR(result, l<T>({{1.0, 1.5}}), 1e-2);
+}
+
+
 TYPED_TEST(Dense, ComputeDotFailsOnWrongInputSize)
 {
     using Mtx = typename TestFixture::Mtx;
diff --git a/test/mpi/vector.cpp b/test/mpi/vector.cpp
index a7ad735458c..414f8197f57 100644
--- a/test/mpi/vector.cpp
+++ b/test/mpi/vector.cpp
@@ -675,6 +675,27 @@ TYPED_TEST(VectorReductions, ComputeSquaredNorm2WithTmpIsSameAsDense)
                         r<value_type>::value);
 }
 
+TYPED_TEST(VectorReductions, ComputesMeanIsSameAsDense)
+{
+    using value_type = typename TestFixture::value_type;
+    this->init_result();
+
+    this->x->compute_mean(this->res.get());
+    this->dense_x->compute_mean(this->dense_res.get());
+
+    GKO_ASSERT_MTX_NEAR(this->res, this->dense_res, r<value_type>::value);
+}
+
+TYPED_TEST(VectorReductions, ComputesMeanWithTmpIsSameAsDense)
+{
+    using value_type = typename TestFixture::value_type;
+    this->init_result();
+
+    this->x->compute_mean(this->res.get(), this->tmp);
+    this->dense_x->compute_mean(this->dense_res.get(), this->dense_tmp);
+
+    GKO_ASSERT_MTX_NEAR(this->res, this->dense_res, r<value_type>::value);
+}
 
 TYPED_TEST(VectorReductions, ComputeDotCopiesToHostOnlyIfNecessary)
 {

From 01205e54d6e35b65f1e2eb1226d841cd32673bce Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Fri, 13 Oct 2023 20:44:03 +0200
Subject: [PATCH 403/583] use ptr_param

---
 core/distributed/vector.cpp                |  5 +++--
 include/ginkgo/core/distributed/vector.hpp | 12 +++++++++++-
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp
index f05a2df73fd..23a6774ccd2 100644
--- a/core/distributed/vector.cpp
+++ b/core/distributed/vector.cpp
@@ -572,14 +572,15 @@ void Vector<ValueType>::compute_squared_norm2(ptr_param<LinOp> result,
 
 
 template <typename ValueType>
-void Vector<ValueType>::compute_mean(LinOp* result) const
+void Vector<ValueType>::compute_mean(ptr_param<LinOp> result) const
 {
     array<char> tmp{this->get_executor()};
     this->compute_mean(result, tmp);
 }
 
 
-void Vector<ValueType>::compute_mean(LinOp* result, array<char>& tmp) const
+void Vector<ValueType>::compute_mean(ptr_param<LinOp> result,
+                                     array<char>& tmp) const
 {
     using MeanVector = local_vector_type;
     const auto global_size = this->get_size()[0];
diff --git a/include/ginkgo/core/distributed/vector.hpp b/include/ginkgo/core/distributed/vector.hpp
index e86c2ec3e61..86a82a2f7da 100644
--- a/include/ginkgo/core/distributed/vector.hpp
+++ b/include/ginkgo/core/distributed/vector.hpp
@@ -404,6 +404,16 @@ class Vector
      */
     void compute_norm1(ptr_param<LinOp> result, array<char>& tmp) const;
 
+    /**
+     * Computes the column-wise mean of this (multi-)vector using a global
+     * reduction.
+     *
+     * @param result  a Dense row matrix, used to store the mean
+     *                (the number of columns in result must match the number
+     *                of columns of this)
+     */
+    void compute_mean(ptr_param<LinOp> result) const;
+
     /**
      * Computes the column-wise mean of this (multi-)vector using a global
      * reduction.
@@ -415,7 +425,7 @@ class Vector
      *             reduction computation. It may be resized and/or reset to the
      *             correct executor.
      */
-    void compute_mean(LinOp* result) const;
+    void compute_mean(ptr_param<LinOp> result, array<char>& tmp) const;
 
     /**
      * Computes the column-wise mean of this (multi-)vector using a global

From e7feed393c3b265eebf2332d3b9d485fea4fd4f7 Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Sat, 14 Oct 2023 08:16:02 +0200
Subject: [PATCH 404/583] fix documentation

---
 include/ginkgo/core/matrix/dense.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/ginkgo/core/matrix/dense.hpp b/include/ginkgo/core/matrix/dense.hpp
index 1cba8622fce..fb9773e1247 100644
--- a/include/ginkgo/core/matrix/dense.hpp
+++ b/include/ginkgo/core/matrix/dense.hpp
@@ -920,7 +920,7 @@ class Dense
     /**
      * Computes the column-wise mean of this matrix.
      *
-     * @param result  a Dense row vector, used to store the norm
+     * @param result  a Dense row vector, used to store the mean
      *                (the number of columns in the vector must match the number
      *                of columns of this)
      */
@@ -929,7 +929,7 @@ class Dense
     /**
      * Computes the column-wise mean of this matrix.
      *
-     * @param result  a Dense row vector, used to store the norm
+     * @param result  a Dense row vector, used to store the mean
      *                (the number of columns in the vector must match the
      *                number of columns of this)
      * @param tmp  the temporary storage to use for partial sums during the

From 61b7c803adf3757b341df446be7ca64e24b359c3 Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Sat, 14 Oct 2023 14:56:03 +0200
Subject: [PATCH 405/583] update documentation and tests

---
 include/ginkgo/core/matrix/dense.hpp    | 4 ++--
 reference/test/matrix/dense_kernels.cpp | 4 +++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/include/ginkgo/core/matrix/dense.hpp b/include/ginkgo/core/matrix/dense.hpp
index fb9773e1247..912e857611c 100644
--- a/include/ginkgo/core/matrix/dense.hpp
+++ b/include/ginkgo/core/matrix/dense.hpp
@@ -918,7 +918,7 @@ class Dense
     void compute_squared_norm2(ptr_param<LinOp> result, array<char>& tmp) const;
 
     /**
-     * Computes the column-wise mean of this matrix.
+     * Computes the column-wise arithmetic mean of this matrix.
      *
      * @param result  a Dense row vector, used to store the mean
      *                (the number of columns in the vector must match the number
@@ -927,7 +927,7 @@ class Dense
     void compute_mean(LinOp* result) const;
 
     /**
-     * Computes the column-wise mean of this matrix.
+     * Computes the column-wise arithmetic mean of this matrix.
      *
      * @param result  a Dense row vector, used to store the mean
      *                (the number of columns in the vector must match the
diff --git a/reference/test/matrix/dense_kernels.cpp b/reference/test/matrix/dense_kernels.cpp
index 763cf1b6321..9e31410fc49 100644
--- a/reference/test/matrix/dense_kernels.cpp
+++ b/reference/test/matrix/dense_kernels.cpp
@@ -702,6 +702,7 @@ TYPED_TEST(Dense, ComputesMean)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
+    using T_nc = gko::remove_complex<T>;
     using MeanVector = gko::matrix::Dense<T>;
     auto mtx(gko::initialize<Mtx>(
         {I<T>{1.0, 0.0}, I<T>{2.0, 3.0}, I<T>{2.0, 4.0}, I<T>{-1.0, -1.0}},
@@ -710,7 +711,8 @@ TYPED_TEST(Dense, ComputesMean)
 
     mtx->compute_mean(result.get());
 
-    GKO_ASSERT_MTX_NEAR(result, l<T>({{1.0, 1.5}}), 1e-2);
+    EXPECT_EQ(result->at(0, 0), T_nc{1.0});
+    EXPECT_EQ(result->at(0, 1), T_nc{1.5});
 }
 
 

From 52ef32d5ce5f00d2a769513c3be4d3640900dfb3 Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Sat, 14 Oct 2023 15:24:05 +0200
Subject: [PATCH 406/583] use GKO_EXPECT_NEAR

---
 core/matrix/dense.cpp                   | 20 +++++++++++---------
 include/ginkgo/core/matrix/dense.hpp    |  4 ++--
 reference/test/matrix/dense_kernels.cpp | 14 +++++---------
 3 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp
index babb1919040..a50ab6b260b 100644
--- a/core/matrix/dense.cpp
+++ b/core/matrix/dense.cpp
@@ -236,14 +236,6 @@ void Dense<ValueType>::compute_squared_norm2(ptr_param<LinOp> result) const
 }
 
 
-template <typename ValueType>
-void Dense<ValueType>::compute_mean(LinOp* result) const
-{
-    auto exec = this->get_executor();
-    this->compute_mean_impl(make_temporary_output_clone(exec, result).get());
-}
-
-
 template <typename ValueType>
 void Dense<ValueType>::inv_scale_impl(const LinOp* alpha)
 {
@@ -506,7 +498,16 @@ void Dense<ValueType>::compute_squared_norm2(ptr_param<LinOp> result,
 
 
 template <typename ValueType>
-void Dense<ValueType>::compute_mean(LinOp* result, array<char>& tmp) const
+void Dense<ValueType>::compute_mean(ptr_param<LinOp> result) const
+{
+    auto exec = this->get_executor();
+    this->compute_mean_impl(make_temporary_output_clone(exec, result).get());
+}
+
+
+template <typename ValueType>
+void Dense<ValueType>::compute_mean(ptr_param<LinOp> result,
+                                    array<char>& tmp) const
 {
     GKO_ASSERT_EQUAL_DIMENSIONS(result, dim<2>(1, this->get_size()[1]));
     auto exec = this->get_executor();
@@ -528,6 +529,7 @@ void Dense<ValueType>::compute_squared_norm2_impl(LinOp* result) const
                                 tmp);
 }
 
+
 template <typename ValueType>
 void Dense<ValueType>::compute_mean_impl(LinOp* result) const
 {
diff --git a/include/ginkgo/core/matrix/dense.hpp b/include/ginkgo/core/matrix/dense.hpp
index 912e857611c..9edf55d2e4c 100644
--- a/include/ginkgo/core/matrix/dense.hpp
+++ b/include/ginkgo/core/matrix/dense.hpp
@@ -924,7 +924,7 @@ class Dense
      *                (the number of columns in the vector must match the number
      *                of columns of this)
      */
-    void compute_mean(LinOp* result) const;
+    void compute_mean(ptr_param<LinOp> result) const;
 
     /**
      * Computes the column-wise arithmetic mean of this matrix.
@@ -936,7 +936,7 @@ class Dense
      *             reduction computation. It may be resized and/or reset to the
      *             correct executor.
      */
-    void compute_mean(LinOp* result, array<char>& tmp) const;
+    void compute_mean(ptr_param<LinOp> result, array<char>& tmp) const;
 
     /**
      * Create a submatrix from the original matrix.
diff --git a/reference/test/matrix/dense_kernels.cpp b/reference/test/matrix/dense_kernels.cpp
index 9e31410fc49..a2527a31d3e 100644
--- a/reference/test/matrix/dense_kernels.cpp
+++ b/reference/test/matrix/dense_kernels.cpp
@@ -702,17 +702,13 @@ TYPED_TEST(Dense, ComputesMean)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
-    using T_nc = gko::remove_complex<T>;
-    using MeanVector = gko::matrix::Dense<T>;
-    auto mtx(gko::initialize<Mtx>(
-        {I<T>{1.0, 0.0}, I<T>{2.0, 3.0}, I<T>{2.0, 4.0}, I<T>{-1.0, -1.0}},
-        this->exec));
-    auto result = MeanVector::create(this->exec, gko::dim<2>{1, 2});
+    auto result = Mtx::create(this->exec, gko::dim<2>{1, 3});
 
-    mtx->compute_mean(result.get());
+    this->mtx4->compute_mean(result.get());
 
-    EXPECT_EQ(result->at(0, 0), T_nc{1.0});
-    EXPECT_EQ(result->at(0, 1), T_nc{1.5});
+    GKO_EXPECT_NEAR(result->at(0, 0), T{0.5}, 1e-6);
+    GKO_EXPECT_NEAR(result->at(0, 1), T{4.0}, 1e-6);
+    GKO_EXPECT_NEAR(result->at(0, 2), T{1.0}, 1e-6);
 }
 
 

From f5e3c9eb2a84b7da0c4384554b7acbe9c38adb2e Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Sat, 14 Oct 2023 17:28:06 +0200
Subject: [PATCH 407/583] fix documentation

---
 include/ginkgo/core/distributed/vector.hpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/ginkgo/core/distributed/vector.hpp b/include/ginkgo/core/distributed/vector.hpp
index 86a82a2f7da..1e3b9571b19 100644
--- a/include/ginkgo/core/distributed/vector.hpp
+++ b/include/ginkgo/core/distributed/vector.hpp
@@ -415,8 +415,8 @@ class Vector
     void compute_mean(ptr_param<LinOp> result) const;
 
     /**
-     * Computes the column-wise mean of this (multi-)vector using a global
-     * reduction.
+     * Computes the column-wise arithmetic mean of this (multi-)vector using a
+     * global reduction.
      *
      * @param result  a Dense row matrix, used to store the mean
      *                (the number of columns in result must match the number
@@ -428,8 +428,8 @@ class Vector
     void compute_mean(ptr_param<LinOp> result, array<char>& tmp) const;
 
     /**
-     * Computes the column-wise mean of this (multi-)vector using a global
-     * reduction.
+     * Computes the column-wise arithmetic mean of this (multi-)vector using a
+     * global reduction.
      *
      * @param result  a Dense row matrix, used to store the mean
      *                (the number of columns in result must match the number

From fd9ca82fd79db77d1a03d646aca1f233935f2a7d Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Sun, 15 Oct 2023 08:10:54 +0200
Subject: [PATCH 408/583] Fixup missing template declaration, add more tests

---
 core/distributed/vector.cpp             |  1 +
 reference/test/matrix/dense_kernels.cpp | 10 ++++++++++
 2 files changed, 11 insertions(+)

diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp
index 23a6774ccd2..387e792c147 100644
--- a/core/distributed/vector.cpp
+++ b/core/distributed/vector.cpp
@@ -579,6 +579,7 @@ void Vector<ValueType>::compute_mean(ptr_param<LinOp> result) const
 }
 
 
+template <typename ValueType>
 void Vector<ValueType>::compute_mean(ptr_param<LinOp> result,
                                      array<char>& tmp) const
 {
diff --git a/reference/test/matrix/dense_kernels.cpp b/reference/test/matrix/dense_kernels.cpp
index a2527a31d3e..3a382371635 100644
--- a/reference/test/matrix/dense_kernels.cpp
+++ b/reference/test/matrix/dense_kernels.cpp
@@ -712,6 +712,16 @@ TYPED_TEST(Dense, ComputesMean)
 }
 
 
+TYPED_TEST(Dense, ComputesMeanFailsOnWrongResultSize)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using T = typename TestFixture::value_type;
+    auto result = Mtx::create(this->exec, gko::dim<2>{1, 2});
+
+    ASSERT_THROW(this->mtx4->compute_mean(result), gko::DimensionMismatch);
+}
+
+
 TYPED_TEST(Dense, ComputeDotFailsOnWrongInputSize)
 {
     using Mtx = typename TestFixture::Mtx;

From 569e1f67137e13828f7caf1a1d52b02ddf3f0124 Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Sun, 15 Oct 2023 12:31:48 +0200
Subject: [PATCH 409/583] Fixup call with ptr_param

---
 test/mpi/vector.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/mpi/vector.cpp b/test/mpi/vector.cpp
index 414f8197f57..43d18aad6c5 100644
--- a/test/mpi/vector.cpp
+++ b/test/mpi/vector.cpp
@@ -680,8 +680,8 @@ TYPED_TEST(VectorReductions, ComputesMeanIsSameAsDense)
     using value_type = typename TestFixture::value_type;
     this->init_result();
 
-    this->x->compute_mean(this->res.get());
-    this->dense_x->compute_mean(this->dense_res.get());
+    this->x->compute_mean(this->res);
+    this->dense_x->compute_mean(this->dense_res);
 
     GKO_ASSERT_MTX_NEAR(this->res, this->dense_res, r<value_type>::value);
 }
@@ -691,8 +691,8 @@ TYPED_TEST(VectorReductions, ComputesMeanWithTmpIsSameAsDense)
     using value_type = typename TestFixture::value_type;
     this->init_result();
 
-    this->x->compute_mean(this->res.get(), this->tmp);
-    this->dense_x->compute_mean(this->dense_res.get(), this->dense_tmp);
+    this->x->compute_mean(this->res, this->tmp);
+    this->dense_x->compute_mean(this->dense_res, this->dense_tmp);
 
     GKO_ASSERT_MTX_NEAR(this->res, this->dense_res, r<value_type>::value);
 }

From bed88788718a75c6a7cdc81abb99e2c987a604f9 Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Mon, 16 Oct 2023 15:04:10 +0200
Subject: [PATCH 410/583] Add review suggestions

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 common/unified/matrix/dense_kernels.template.cpp |  6 +++---
 core/distributed/vector.cpp                      |  4 ++--
 core/matrix/dense.cpp                            |  6 ++----
 include/ginkgo/core/distributed/vector.hpp       | 13 -------------
 reference/test/matrix/dense_kernels.cpp          |  6 +++---
 5 files changed, 10 insertions(+), 25 deletions(-)

diff --git a/common/unified/matrix/dense_kernels.template.cpp b/common/unified/matrix/dense_kernels.template.cpp
index d7e1c08f38c..e8751a896a0 100644
--- a/common/unified/matrix/dense_kernels.template.cpp
+++ b/common/unified/matrix/dense_kernels.template.cpp
@@ -286,11 +286,11 @@ void compute_mean(std::shared_ptr<const DefaultExecutor> exec,
     using ValueType_nc = gko::remove_complex<ValueType>;
     run_kernel_col_reduction_cached(
         exec,
-        [] GKO_KERNEL(auto i, auto j, auto x, auto total_size) {
-            return x(i, j) / static_cast<ValueType_nc>(total_size);
+        [] GKO_KERNEL(auto i, auto j, auto x, auto inv_total_size) {
+            return x(i, j) * inv_total_size;
         },
         GKO_KERNEL_REDUCE_SUM(ValueType), result->get_values(), x->get_size(),
-        tmp, x, x->get_size()[0]);
+        tmp, x, 1. / x->get_size()[0]);
 }
 
 
diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp
index 387e792c147..b828a44bd6d 100644
--- a/core/distributed/vector.cpp
+++ b/core/distributed/vector.cpp
@@ -587,7 +587,7 @@ void Vector<ValueType>::compute_mean(ptr_param<LinOp> result,
     const auto global_size = this->get_size()[0];
     const auto local_size = this->get_local_vector()->get_size()[0];
     const auto num_vecs = static_cast<int>(this->get_size()[1]);
-    GKO_ASSERT_EQUAL_DIMENSIONS(result, dim<2>(1, num_vecs));
+    GKO_ASSERT_EQUAL_COLS(result, dim<2>(1, num_vecs));
     auto exec = this->get_executor();
     const auto comm = this->get_communicator();
     auto dense_res = make_temporary_clone(exec, as<MeanVector>(result));
@@ -595,7 +595,7 @@ void Vector<ValueType>::compute_mean(ptr_param<LinOp> result,
 
     // scale by its weight ie ratio of local to global size
     auto weight = initialize<matrix::Dense<remove_complex<ValueType>>>(
-        1, {static_cast<remove_complex<ValueType>>(local_size) / global_size},
+        {static_cast<remove_complex<ValueType>>(local_size) / global_size},
         this->get_executor());
     dense_res->scale(weight.get());
 
diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp
index a50ab6b260b..68a26c5bd87 100644
--- a/core/matrix/dense.cpp
+++ b/core/matrix/dense.cpp
@@ -509,7 +509,7 @@ template <typename ValueType>
 void Dense<ValueType>::compute_mean(ptr_param<LinOp> result,
                                     array<char>& tmp) const
 {
-    GKO_ASSERT_EQUAL_DIMENSIONS(result, dim<2>(1, this->get_size()[1]));
+    GKO_ASSERT_EQUAL_COLS(result, dim<2>(1, this->get_size()[1]));
     auto exec = this->get_executor();
     if (tmp.get_executor() != exec) {
         tmp.clear();
@@ -533,11 +533,9 @@ void Dense<ValueType>::compute_squared_norm2_impl(LinOp* result) const
 template <typename ValueType>
 void Dense<ValueType>::compute_mean_impl(LinOp* result) const
 {
-    GKO_ASSERT_EQUAL_DIMENSIONS(result, dim<2>(1, this->get_size()[1]));
     auto exec = this->get_executor();
-    auto dense_res = make_temporary_conversion<ValueType>(result);
     array<char> tmp{exec};
-    exec->run(dense::make_compute_mean(this, dense_res.get(), tmp));
+    this->compute_mean(make_temporary_output_clone(exec, result).get(), tmp);
 }
 
 
diff --git a/include/ginkgo/core/distributed/vector.hpp b/include/ginkgo/core/distributed/vector.hpp
index 1e3b9571b19..87afa3a01b5 100644
--- a/include/ginkgo/core/distributed/vector.hpp
+++ b/include/ginkgo/core/distributed/vector.hpp
@@ -427,19 +427,6 @@ class Vector
      */
     void compute_mean(ptr_param<LinOp> result, array<char>& tmp) const;
 
-    /**
-     * Computes the column-wise arithmetic mean of this (multi-)vector using a
-     * global reduction.
-     *
-     * @param result  a Dense row matrix, used to store the mean
-     *                (the number of columns in result must match the number
-     *                of columns of this)
-     * @param tmp  the temporary storage to use for partial sums during the
-     *             reduction computation. It may be resized and/or reset to the
-     *             correct executor.
-     */
-    void compute_mean(LinOp* result, array<char>& tmp) const;
-
     /**
      * Returns a single element of the multi-vector.
      *
diff --git a/reference/test/matrix/dense_kernels.cpp b/reference/test/matrix/dense_kernels.cpp
index 3a382371635..532bd14ec95 100644
--- a/reference/test/matrix/dense_kernels.cpp
+++ b/reference/test/matrix/dense_kernels.cpp
@@ -706,9 +706,9 @@ TYPED_TEST(Dense, ComputesMean)
 
     this->mtx4->compute_mean(result.get());
 
-    GKO_EXPECT_NEAR(result->at(0, 0), T{0.5}, 1e-6);
-    GKO_EXPECT_NEAR(result->at(0, 1), T{4.0}, 1e-6);
-    GKO_EXPECT_NEAR(result->at(0, 2), T{1.0}, 1e-6);
+    GKO_EXPECT_NEAR(result->at(0, 0), T{0.5}, r<T>::value * 10);
+    GKO_EXPECT_NEAR(result->at(0, 1), T{4.0}, r<T>::value * 10);
+    GKO_EXPECT_NEAR(result->at(0, 2), T{1.0}, r<T>::value * 10);
 }
 
 

From a192d9466f27d580d6c67c6f1e0f7def45ef2020 Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Tue, 17 Oct 2023 06:37:09 +0200
Subject: [PATCH 411/583] Update test/mpi/vector.cpp

Co-authored-by: Yu-Hsiang M. Tsai <19565938+yhmtsai@users.noreply.github.com>
---
 test/mpi/vector.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/mpi/vector.cpp b/test/mpi/vector.cpp
index 43d18aad6c5..515c8e59a7b 100644
--- a/test/mpi/vector.cpp
+++ b/test/mpi/vector.cpp
@@ -675,6 +675,7 @@ TYPED_TEST(VectorReductions, ComputeSquaredNorm2WithTmpIsSameAsDense)
                         r<value_type>::value);
 }
 
+
 TYPED_TEST(VectorReductions, ComputesMeanIsSameAsDense)
 {
     using value_type = typename TestFixture::value_type;

From 927d9e7ebe85117bfe9c93a0a5069501a2ac9fcc Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Tue, 17 Oct 2023 06:37:24 +0200
Subject: [PATCH 412/583] Update test/mpi/vector.cpp

Co-authored-by: Yu-Hsiang M. Tsai <19565938+yhmtsai@users.noreply.github.com>
---
 common/unified/matrix/dense_kernels.template.cpp | 2 +-
 test/mpi/vector.cpp                              | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/common/unified/matrix/dense_kernels.template.cpp b/common/unified/matrix/dense_kernels.template.cpp
index e8751a896a0..9bd5c04f861 100644
--- a/common/unified/matrix/dense_kernels.template.cpp
+++ b/common/unified/matrix/dense_kernels.template.cpp
@@ -290,7 +290,7 @@ void compute_mean(std::shared_ptr<const DefaultExecutor> exec,
             return x(i, j) * inv_total_size;
         },
         GKO_KERNEL_REDUCE_SUM(ValueType), result->get_values(), x->get_size(),
-        tmp, x, 1. / x->get_size()[0]);
+        tmp, x, ValueType_nc{1.} / x->get_size()[0]);
 }
 
 
diff --git a/test/mpi/vector.cpp b/test/mpi/vector.cpp
index 515c8e59a7b..2b00de19bda 100644
--- a/test/mpi/vector.cpp
+++ b/test/mpi/vector.cpp
@@ -687,6 +687,7 @@ TYPED_TEST(VectorReductions, ComputesMeanIsSameAsDense)
     GKO_ASSERT_MTX_NEAR(this->res, this->dense_res, r<value_type>::value);
 }
 
+
 TYPED_TEST(VectorReductions, ComputesMeanWithTmpIsSameAsDense)
 {
     using value_type = typename TestFixture::value_type;

From 393ea41733b8602da2fc2570ab5ad60d6a19a37a Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Tue, 17 Oct 2023 06:39:03 +0200
Subject: [PATCH 413/583] Add review suggestions

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 core/distributed/vector.cpp | 2 +-
 core/matrix/dense.cpp       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp
index b828a44bd6d..b61d5c36328 100644
--- a/core/distributed/vector.cpp
+++ b/core/distributed/vector.cpp
@@ -587,7 +587,7 @@ void Vector<ValueType>::compute_mean(ptr_param<LinOp> result,
     const auto global_size = this->get_size()[0];
     const auto local_size = this->get_local_vector()->get_size()[0];
     const auto num_vecs = static_cast<int>(this->get_size()[1]);
-    GKO_ASSERT_EQUAL_COLS(result, dim<2>(1, num_vecs));
+    GKO_ASSERT_EQUAL_COLS(result, this);
     auto exec = this->get_executor();
     const auto comm = this->get_communicator();
     auto dense_res = make_temporary_clone(exec, as<MeanVector>(result));
diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp
index 68a26c5bd87..9f7dff96aab 100644
--- a/core/matrix/dense.cpp
+++ b/core/matrix/dense.cpp
@@ -509,7 +509,7 @@ template <typename ValueType>
 void Dense<ValueType>::compute_mean(ptr_param<LinOp> result,
                                     array<char>& tmp) const
 {
-    GKO_ASSERT_EQUAL_COLS(result, dim<2>(1, this->get_size()[1]));
+    GKO_ASSERT_EQUAL_COLS(result, this);
     auto exec = this->get_executor();
     if (tmp.get_executor() != exec) {
         tmp.clear();

From b3dbc679b66382b236e508153b5f058aff08f2b8 Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Tue, 17 Oct 2023 08:53:50 +0200
Subject: [PATCH 414/583] Doc fixes and format

---
 include/ginkgo/core/matrix/dense.hpp | 3 ---
 test/mpi/vector.cpp                  | 1 +
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/include/ginkgo/core/matrix/dense.hpp b/include/ginkgo/core/matrix/dense.hpp
index 9edf55d2e4c..0db8f7697a5 100644
--- a/include/ginkgo/core/matrix/dense.hpp
+++ b/include/ginkgo/core/matrix/dense.hpp
@@ -1238,9 +1238,6 @@ class Dense
 
     /**
      * @copydoc compute_mean(LinOp*) const
-     *
-     * @deprecated  This function will be removed in the future,
-     *              we will instead always use Ginkgo's implementation.
      */
     virtual void compute_mean_impl(LinOp* result) const;
 
diff --git a/test/mpi/vector.cpp b/test/mpi/vector.cpp
index 2b00de19bda..ac75a461465 100644
--- a/test/mpi/vector.cpp
+++ b/test/mpi/vector.cpp
@@ -699,6 +699,7 @@ TYPED_TEST(VectorReductions, ComputesMeanWithTmpIsSameAsDense)
     GKO_ASSERT_MTX_NEAR(this->res, this->dense_res, r<value_type>::value);
 }
 
+
 TYPED_TEST(VectorReductions, ComputeDotCopiesToHostOnlyIfNecessary)
 {
     this->init_result();

From 452af109676e3b00cf77469308065b5a3c22abf8 Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Tue, 17 Oct 2023 08:54:30 +0200
Subject: [PATCH 415/583] Use simpler implementation for reference

---
 reference/matrix/dense_kernels.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/reference/matrix/dense_kernels.cpp b/reference/matrix/dense_kernels.cpp
index df86aedd047..ff69dcf2684 100644
--- a/reference/matrix/dense_kernels.cpp
+++ b/reference/matrix/dense_kernels.cpp
@@ -408,11 +408,10 @@ void compute_mean(std::shared_ptr<const ReferenceExecutor> exec,
     }
 
     for (size_type i = 0; i < x->get_size()[0]; ++i) {
-        const ValueType_nc alpha = static_cast<ValueType_nc>(i) / (i + 1);
-        const ValueType_nc beta = static_cast<ValueType_nc>(1) / (i + 1);
         for (size_type j = 0; j < x->get_size()[1]; ++j) {
-            result->at(0, j) = alpha * result->at(0, j) + beta * x->at(i, j);
+            result->at(0, i) += x->at(i, j);
         }
+        result->at(0, i) /= static_cast<ValueType_nc>(x->get_size()[1]);
     }
 }
 

From c364dd81f90479e3459abb206f6754fe2a3f9cf1 Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Wed, 18 Oct 2023 10:09:42 +0200
Subject: [PATCH 416/583] Fix reference compute mean impl, add test

---
 reference/matrix/dense_kernels.cpp      | 8 ++++----
 reference/test/matrix/dense_kernels.cpp | 8 ++++++++
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/reference/matrix/dense_kernels.cpp b/reference/matrix/dense_kernels.cpp
index ff69dcf2684..47df46b3c86 100644
--- a/reference/matrix/dense_kernels.cpp
+++ b/reference/matrix/dense_kernels.cpp
@@ -407,11 +407,11 @@ void compute_mean(std::shared_ptr<const ReferenceExecutor> exec,
         result->at(0, j) = zero<ValueType>();
     }
 
-    for (size_type i = 0; i < x->get_size()[0]; ++i) {
-        for (size_type j = 0; j < x->get_size()[1]; ++j) {
-            result->at(0, i) += x->at(i, j);
+    for (size_type i = 0; i < x->get_size()[1]; ++i) {
+        for (size_type j = 0; j < x->get_size()[0]; ++j) {
+            result->at(0, i) += x->at(j, i);
         }
-        result->at(0, i) /= static_cast<ValueType_nc>(x->get_size()[1]);
+        result->at(0, i) /= static_cast<ValueType_nc>(x->get_size()[0]);
     }
 }
 
diff --git a/reference/test/matrix/dense_kernels.cpp b/reference/test/matrix/dense_kernels.cpp
index 532bd14ec95..b776f426794 100644
--- a/reference/test/matrix/dense_kernels.cpp
+++ b/reference/test/matrix/dense_kernels.cpp
@@ -35,6 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <complex>
 #include <memory>
+#include <numeric>
 #include <random>
 
 
@@ -702,6 +703,13 @@ TYPED_TEST(Dense, ComputesMean)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::value_type;
+
+    auto iota = Mtx::create(this->exec, gko::dim<2>{10, 1});
+    std::iota(iota->get_values(), iota->get_values() + 10, 1);
+    auto iota_result = Mtx::create(this->exec, gko::dim<2>{1, 1});
+    iota->compute_mean(iota_result.get());
+    GKO_EXPECT_NEAR(iota_result->at(0, 0), T{5.5}, r<T>::value * 10);
+
     auto result = Mtx::create(this->exec, gko::dim<2>{1, 3});
 
     this->mtx4->compute_mean(result.get());

From 3e1c8316e544de399960535d35751b2987339406 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Thu, 19 Oct 2023 11:11:38 +0200
Subject: [PATCH 417/583] add the const apply check

---
 reference/test/matrix/batch_ell_kernels.cpp | 41 +++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/reference/test/matrix/batch_ell_kernels.cpp b/reference/test/matrix/batch_ell_kernels.cpp
index 81f189c3e02..d0e70bf5552 100644
--- a/reference/test/matrix/batch_ell_kernels.cpp
+++ b/reference/test/matrix/batch_ell_kernels.cpp
@@ -128,6 +128,21 @@ TYPED_TEST(Ell, AppliesToBatchMultiVector)
 }
 
 
+TYPED_TEST(Ell, ConstAppliesToBatchMultiVector)
+{
+    using T = typename TestFixture::value_type;
+    using BMtx = typename TestFixture::BMtx;
+
+    static_cast<const BMtx*>(this->mtx_0.get())->apply(this->b_0, this->x_0);
+
+    this->mtx_00->apply(this->b_00.get(), this->x_00.get());
+    this->mtx_01->apply(this->b_01.get(), this->x_01.get());
+    auto res = gko::batch::unbatch<gko::batch::MultiVector<T>>(this->x_0.get());
+    GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), r<T>::value);
+    GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), r<T>::value);
+}
+
+
 TYPED_TEST(Ell, AppliesLinearCombinationToBatchMultiVector)
 {
     using BMtx = typename TestFixture::BMtx;
@@ -154,6 +169,32 @@ TYPED_TEST(Ell, AppliesLinearCombinationToBatchMultiVector)
 }
 
 
+TYPED_TEST(Ell, ConstAppliesLinearCombinationToBatchMultiVector)
+{
+    using BMtx = typename TestFixture::BMtx;
+    using BMVec = typename TestFixture::BMVec;
+    using DenseMtx = typename TestFixture::DenseMtx;
+    using T = typename TestFixture::value_type;
+    auto alpha = gko::batch::initialize<BMVec>({{1.5}, {-1.0}}, this->exec);
+    auto beta = gko::batch::initialize<BMVec>({{2.5}, {-4.0}}, this->exec);
+    auto alpha0 = gko::initialize<DenseMtx>({1.5}, this->exec);
+    auto alpha1 = gko::initialize<DenseMtx>({-1.0}, this->exec);
+    auto beta0 = gko::initialize<DenseMtx>({2.5}, this->exec);
+    auto beta1 = gko::initialize<DenseMtx>({-4.0}, this->exec);
+
+    static_cast<const BMtx*>(this->mtx_0.get())
+        ->apply(alpha.get(), this->b_0.get(), beta.get(), this->x_0.get());
+
+    this->mtx_00->apply(alpha0.get(), this->b_00.get(), beta0.get(),
+                        this->x_00.get());
+    this->mtx_01->apply(alpha1.get(), this->b_01.get(), beta1.get(),
+                        this->x_01.get());
+    auto res = gko::batch::unbatch<gko::batch::MultiVector<T>>(this->x_0.get());
+    GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), r<T>::value);
+    GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), r<T>::value);
+}
+
+
 TYPED_TEST(Ell, ApplyFailsOnWrongNumberOfResultCols)
 {
     using BMVec = typename TestFixture::BMVec;

From 4ef1159e00fcccdf954eac6268db1460291f7f12 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Thu, 19 Oct 2023 11:16:28 +0200
Subject: [PATCH 418/583] fix batch ell infinite loop

---
 core/matrix/batch_ell.cpp | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/core/matrix/batch_ell.cpp b/core/matrix/batch_ell.cpp
index b2987e741d9..19b2dcae5c3 100644
--- a/core/matrix/batch_ell.cpp
+++ b/core/matrix/batch_ell.cpp
@@ -134,10 +134,7 @@ Ell<ValueType, IndexType>* Ell<ValueType, IndexType>::apply(
     ptr_param<const MultiVector<ValueType>> b,
     ptr_param<MultiVector<ValueType>> x)
 {
-    this->validate_application_parameters(b.get(), x.get());
-    auto exec = this->get_executor();
-    this->apply_impl(make_temporary_clone(exec, b).get(),
-                     make_temporary_clone(exec, x).get());
+    static_cast<const Ell*>(this)->apply(b, x);
     return this;
 }
 
@@ -147,7 +144,10 @@ const Ell<ValueType, IndexType>* Ell<ValueType, IndexType>::apply(
     ptr_param<const MultiVector<ValueType>> b,
     ptr_param<MultiVector<ValueType>> x) const
 {
-    this->apply(b, x);
+    this->validate_application_parameters(b.get(), x.get());
+    auto exec = this->get_executor();
+    this->apply_impl(make_temporary_clone(exec, b).get(),
+                     make_temporary_clone(exec, x).get());
     return this;
 }
 
@@ -159,13 +159,7 @@ Ell<ValueType, IndexType>* Ell<ValueType, IndexType>::apply(
     ptr_param<const MultiVector<ValueType>> beta,
     ptr_param<MultiVector<ValueType>> x)
 {
-    this->validate_application_parameters(alpha.get(), b.get(), beta.get(),
-                                          x.get());
-    auto exec = this->get_executor();
-    this->apply_impl(make_temporary_clone(exec, alpha).get(),
-                     make_temporary_clone(exec, b).get(),
-                     make_temporary_clone(exec, beta).get(),
-                     make_temporary_clone(exec, x).get());
+    static_cast<const Ell*>(this)->apply(alpha, b, beta, x);
     return this;
 }
 
@@ -177,7 +171,13 @@ const Ell<ValueType, IndexType>* Ell<ValueType, IndexType>::apply(
     ptr_param<const MultiVector<ValueType>> beta,
     ptr_param<MultiVector<ValueType>> x) const
 {
-    this->apply(alpha, b, beta, x);
+    this->validate_application_parameters(alpha.get(), b.get(), beta.get(),
+                                          x.get());
+    auto exec = this->get_executor();
+    this->apply_impl(make_temporary_clone(exec, alpha).get(),
+                     make_temporary_clone(exec, b).get(),
+                     make_temporary_clone(exec, beta).get(),
+                     make_temporary_clone(exec, x).get());
     return this;
 }
 

From 53a006a8bde8d49407a35e689d2e393947d694c4 Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Fri, 13 Oct 2023 22:09:37 +0200
Subject: [PATCH 419/583] Add pregenerated local solver as factory param

---
 core/distributed/preconditioner/schwarz.cpp         | 13 +++++++++++--
 .../core/distributed/preconditioner/schwarz.hpp     |  5 +++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/core/distributed/preconditioner/schwarz.cpp b/core/distributed/preconditioner/schwarz.cpp
index 0d1267bc0b4..2b2c33d23e7 100644
--- a/core/distributed/preconditioner/schwarz.cpp
+++ b/core/distributed/preconditioner/schwarz.cpp
@@ -102,14 +102,23 @@ template <typename ValueType, typename LocalIndexType, typename GlobalIndexType>
 void Schwarz<ValueType, LocalIndexType, GlobalIndexType>::generate(
     std::shared_ptr<const LinOp> system_matrix)
 {
-    if (parameters_.local_solver) {
+    if (parameters_.local_solver && !parameters_.generated_local_solvers) {
         this->local_solver_ = parameters_.local_solver->generate(
             as<experimental::distributed::Matrix<ValueType, LocalIndexType,
                                                  GlobalIndexType>>(
                 system_matrix)
                 ->get_local_matrix());
+    } else if (parameters_.generated_local_solvers &&
+               !parameters_.local_solver) {
+        this->local_solver_ = parameters_.generated_local_solvers;
+    } else if (!parameters_.generated_local_ && !parameters_.local_solver) {
+        throw ::gko::InvalidStateError(
+            __FILE__, __LINE__, __func__,
+            "Requires either a generated solver or an solver factory");
     } else {
-        GKO_NOT_IMPLEMENTED;
+        throw ::gko::InvalidStateError(
+            __FILE__, __LINE__, __func__,
+            "Provided both a generated solver and a solver factory");
     }
 }
 
diff --git a/include/ginkgo/core/distributed/preconditioner/schwarz.hpp b/include/ginkgo/core/distributed/preconditioner/schwarz.hpp
index f31bd96aa2e..5bce97fb414 100644
--- a/include/ginkgo/core/distributed/preconditioner/schwarz.hpp
+++ b/include/ginkgo/core/distributed/preconditioner/schwarz.hpp
@@ -95,6 +95,11 @@ class Schwarz
          * Local solver factory.
          */
         GKO_DEFERRED_FACTORY_PARAMETER(local_solver, LinOpFactory);
+        /**
+         * Generated Inner solvers.
+         */
+        std::shared_ptr<const LinOp> GKO_FACTORY_PARAMETER(
+            generated_local_solver, nullptr);
     };
     GKO_ENABLE_LIN_OP_FACTORY(Schwarz, parameters, Factory);
     GKO_ENABLE_BUILD_METHOD(Factory);

From dc36bf897fd269d1e64be85e126e52f1a7b395d8 Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Fri, 13 Oct 2023 23:12:57 +0200
Subject: [PATCH 420/583] Add unit test

---
 test/mpi/preconditioner/schwarz.cpp | 30 +++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/test/mpi/preconditioner/schwarz.cpp b/test/mpi/preconditioner/schwarz.cpp
index 8d07ba44046..f3269b1d237 100644
--- a/test/mpi/preconditioner/schwarz.cpp
+++ b/test/mpi/preconditioner/schwarz.cpp
@@ -217,6 +217,36 @@ TYPED_TEST(SchwarzPreconditioner, CanApplyPreconditionedSolver)
                                                  this->non_dist_x);
 }
 
+TYPED_TEST(SchwarzPreconditioner, CanApplyPreconditionedSolverWithPregenSolver)
+{
+    using value_type = typename TestFixture::value_type;
+    using csr = typename TestFixture::local_matrix_type;
+    using cg = typename TestFixture::solver_type;
+    using prec = typename TestFixture::dist_prec_type;
+    constexpr double tolerance = 1e-20;
+    auto iter_stop = gko::share(
+        gko::stop::Iteration::build().with_max_iters(200u).on(this->exec));
+    auto tol_stop = gko::share(
+        gko::stop::ResidualNorm<value_type>::build()
+            .with_reduction_factor(
+                static_cast<gko::remove_complex<value_type>>(tolerance))
+            .on(this->exec));
+    this->non_dist_solver_factory =
+        cg::build()
+            .with_preconditioner(this->local_solver_factory)
+            .with_criteria(iter_stop, tol_stop)
+            .on(this->exec);
+    auto local_solver =
+        this->non_dist_solver_factory->generate(this->non_dist_mat);
+    this->dist_solver_factory =
+        cg::build()
+            .with_preconditioner(prec::build()
+                                     .with_generated_local_solver(local_solver)
+                                     .on(this->exec))
+            .with_criteria(iter_stop, tol_stop)
+            .on(this->exec);
+}
+
 
 TYPED_TEST(SchwarzPreconditioner, CanApplyPreconditioner)
 {

From 79aaaa168b88b19f90087e2b533520aa9bc22416 Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Mon, 16 Oct 2023 11:02:48 +0200
Subject: [PATCH 421/583] Test if generate fails for invalid solver states

---
 core/distributed/preconditioner/schwarz.cpp |  9 +--
 test/mpi/preconditioner/schwarz.cpp         | 69 +++++++++++++--------
 2 files changed, 49 insertions(+), 29 deletions(-)

diff --git a/core/distributed/preconditioner/schwarz.cpp b/core/distributed/preconditioner/schwarz.cpp
index 2b2c33d23e7..90adf384cce 100644
--- a/core/distributed/preconditioner/schwarz.cpp
+++ b/core/distributed/preconditioner/schwarz.cpp
@@ -102,16 +102,17 @@ template <typename ValueType, typename LocalIndexType, typename GlobalIndexType>
 void Schwarz<ValueType, LocalIndexType, GlobalIndexType>::generate(
     std::shared_ptr<const LinOp> system_matrix)
 {
-    if (parameters_.local_solver && !parameters_.generated_local_solvers) {
+    if (parameters_.local_solver && !parameters_.generated_local_solver) {
         this->local_solver_ = parameters_.local_solver->generate(
             as<experimental::distributed::Matrix<ValueType, LocalIndexType,
                                                  GlobalIndexType>>(
                 system_matrix)
                 ->get_local_matrix());
-    } else if (parameters_.generated_local_solvers &&
+    } else if (parameters_.generated_local_solver &&
+               !parameters_.local_solver) {
+        this->local_solver_ = parameters_.generated_local_solver;
+    } else if (!parameters_.generated_local_solver &&
                !parameters_.local_solver) {
-        this->local_solver_ = parameters_.generated_local_solvers;
-    } else if (!parameters_.generated_local_ && !parameters_.local_solver) {
         throw ::gko::InvalidStateError(
             __FILE__, __LINE__, __func__,
             "Requires either a generated solver or an solver factory");
diff --git a/test/mpi/preconditioner/schwarz.cpp b/test/mpi/preconditioner/schwarz.cpp
index f3269b1d237..7a1f69a59a3 100644
--- a/test/mpi/preconditioner/schwarz.cpp
+++ b/test/mpi/preconditioner/schwarz.cpp
@@ -217,37 +217,56 @@ TYPED_TEST(SchwarzPreconditioner, CanApplyPreconditionedSolver)
                                                  this->non_dist_x);
 }
 
-TYPED_TEST(SchwarzPreconditioner, CanApplyPreconditionedSolverWithPregenSolver)
+
+TYPED_TEST(SchwarzPreconditioner, GenerateFailsIfPregenSolverAndSolverFactoryArePresent)
 {
-    using value_type = typename TestFixture::value_type;
-    using csr = typename TestFixture::local_matrix_type;
-    using cg = typename TestFixture::solver_type;
     using prec = typename TestFixture::dist_prec_type;
-    constexpr double tolerance = 1e-20;
-    auto iter_stop = gko::share(
-        gko::stop::Iteration::build().with_max_iters(200u).on(this->exec));
-    auto tol_stop = gko::share(
-        gko::stop::ResidualNorm<value_type>::build()
-            .with_reduction_factor(
-                static_cast<gko::remove_complex<value_type>>(tolerance))
-            .on(this->exec));
-    this->non_dist_solver_factory =
-        cg::build()
-            .with_preconditioner(this->local_solver_factory)
-            .with_criteria(iter_stop, tol_stop)
-            .on(this->exec);
     auto local_solver =
-        this->non_dist_solver_factory->generate(this->non_dist_mat);
-    this->dist_solver_factory =
-        cg::build()
-            .with_preconditioner(prec::build()
-                                     .with_generated_local_solver(local_solver)
-                                     .on(this->exec))
-            .with_criteria(iter_stop, tol_stop)
-            .on(this->exec);
+        gko::share(this->non_dist_solver_factory->generate(this->non_dist_mat));
+
+    auto schwarz = prec::build()
+                    .with_local_solver(this->local_solver_factory)
+                    .with_generated_local_solver(local_solver)
+                    .on(this->exec);
+
+    ASSERT_THROW(schwarz->generate(this->dist_mat), gko::InvalidStateError);
+
+    auto schwarz_no_solver = prec::build().on(this->exec);
+    ASSERT_THROW(schwarz_no_solver->generate(this->dist_mat), gko::InvalidStateError);
 }
 
 
+// TYPED_TEST(SchwarzPreconditioner, CanApplyPreconditionedSolverWithPregenSolver)
+// {
+//     using value_type = typename TestFixture::value_type;
+//     using csr = typename TestFixture::local_matrix_type;
+//     using cg = typename TestFixture::solver_type;
+//     using prec = typename TestFixture::dist_prec_type;
+//     constexpr double tolerance = 1e-20;
+//     auto iter_stop = gko::share(
+//         gko::stop::Iteration::build().with_max_iters(200u).on(this->exec));
+//     auto tol_stop = gko::share(
+//         gko::stop::ResidualNorm<value_type>::build()
+//             .with_reduction_factor(
+//                 static_cast<gko::remove_complex<value_type>>(tolerance))
+//             .on(this->exec));
+//     this->non_dist_solver_factory =
+//         cg::build()
+//             .with_preconditioner(this->local_solver_factory)
+//             .with_criteria(iter_stop, tol_stop)
+//             .on(this->exec);
+//     auto local_solver =
+//         this->non_dist_solver_factory->generate(this->non_dist_mat);
+//     this->dist_solver_factory =
+//         cg::build()
+//             .with_preconditioner(prec::build()
+//                                      .with_generated_local_solver(local_solver.get())
+//                                      .on(this->exec))
+//             .with_criteria(iter_stop, tol_stop)
+//             .on(this->exec);
+// }
+
+
 TYPED_TEST(SchwarzPreconditioner, CanApplyPreconditioner)
 {
     using value_type = typename TestFixture::value_type;

From 1ff74f58700c364feae3544c83f338d81345d09c Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Tue, 17 Oct 2023 14:58:08 +0200
Subject: [PATCH 422/583] refactor build method a bit, add unit tests

---
 core/distributed/preconditioner/schwarz.cpp |  28 +++--
 test/mpi/preconditioner/schwarz.cpp         | 121 +++++++++++---------
 2 files changed, 81 insertions(+), 68 deletions(-)

diff --git a/core/distributed/preconditioner/schwarz.cpp b/core/distributed/preconditioner/schwarz.cpp
index 90adf384cce..7dfdfd3b4a7 100644
--- a/core/distributed/preconditioner/schwarz.cpp
+++ b/core/distributed/preconditioner/schwarz.cpp
@@ -102,24 +102,28 @@ template <typename ValueType, typename LocalIndexType, typename GlobalIndexType>
 void Schwarz<ValueType, LocalIndexType, GlobalIndexType>::generate(
     std::shared_ptr<const LinOp> system_matrix)
 {
-    if (parameters_.local_solver && !parameters_.generated_local_solver) {
+    if (parameters_.local_solver != nullptr &&
+        parameters_.generated_local_solver != nullptr) {
+        throw ::gko::InvalidStateError(
+            __FILE__, __LINE__, __func__,
+            "Provided both a generated solver and a solver factory");
+    }
+
+    if (parameters_.local_solver == nullptr &&
+        parameters_.generated_local_solver == nullptr) {
+        throw ::gko::InvalidStateError(
+            __FILE__, __LINE__, __func__,
+            "Requires either a generated solver or an solver factory");
+    }
+
+    if (parameters_.local_solver) {
         this->local_solver_ = parameters_.local_solver->generate(
             as<experimental::distributed::Matrix<ValueType, LocalIndexType,
                                                  GlobalIndexType>>(
                 system_matrix)
                 ->get_local_matrix());
-    } else if (parameters_.generated_local_solver &&
-               !parameters_.local_solver) {
-        this->local_solver_ = parameters_.generated_local_solver;
-    } else if (!parameters_.generated_local_solver &&
-               !parameters_.local_solver) {
-        throw ::gko::InvalidStateError(
-            __FILE__, __LINE__, __func__,
-            "Requires either a generated solver or an solver factory");
     } else {
-        throw ::gko::InvalidStateError(
-            __FILE__, __LINE__, __func__,
-            "Provided both a generated solver and a solver factory");
+        this->local_solver_ = parameters_.generated_local_solver;
     }
 }
 
diff --git a/test/mpi/preconditioner/schwarz.cpp b/test/mpi/preconditioner/schwarz.cpp
index 7a1f69a59a3..42a043d2e51 100644
--- a/test/mpi/preconditioner/schwarz.cpp
+++ b/test/mpi/preconditioner/schwarz.cpp
@@ -178,65 +178,32 @@ class SchwarzPreconditioner : public CommonMpiTestFixture {
 TYPED_TEST_SUITE(SchwarzPreconditioner, gko::test::ValueLocalGlobalIndexTypes,
                  TupleTypenameNameGenerator);
 
-
-TYPED_TEST(SchwarzPreconditioner, CanApplyPreconditionedSolver)
+TYPED_TEST(SchwarzPreconditioner, GenerateFailsIfInvalidState)
 {
     using value_type = typename TestFixture::value_type;
-    using csr = typename TestFixture::local_matrix_type;
-    using cg = typename TestFixture::solver_type;
-    using prec = typename TestFixture::dist_prec_type;
-    constexpr double tolerance = 1e-20;
-    auto iter_stop = gko::share(
-        gko::stop::Iteration::build().with_max_iters(200u).on(this->exec));
-    auto tol_stop = gko::share(
-        gko::stop::ResidualNorm<value_type>::build()
-            .with_reduction_factor(
-                static_cast<gko::remove_complex<value_type>>(tolerance))
-            .on(this->exec));
-    this->dist_solver_factory =
-        cg::build()
-            .with_preconditioner(
-                prec::build()
-                    .with_local_solver(this->local_solver_factory)
-                    .on(this->exec))
-            .with_criteria(iter_stop, tol_stop)
-            .on(this->exec);
-    auto dist_solver = this->dist_solver_factory->generate(this->dist_mat);
-    this->non_dist_solver_factory =
-        cg::build()
-            .with_preconditioner(this->local_solver_factory)
-            .with_criteria(iter_stop, tol_stop)
-            .on(this->exec);
-    auto non_dist_solver =
-        this->non_dist_solver_factory->generate(this->non_dist_mat);
-
-    dist_solver->apply(this->dist_b.get(), this->dist_x.get());
-    non_dist_solver->apply(this->non_dist_b.get(), this->non_dist_x.get());
-
-    this->assert_equal_to_non_distributed_vector(this->dist_x,
-                                                 this->non_dist_x);
-}
-
-
-TYPED_TEST(SchwarzPreconditioner, GenerateFailsIfPregenSolverAndSolverFactoryArePresent)
-{
+    using local_index_type = typename TestFixture::local_index_type;
+    using local_prec_type =
+        gko::preconditioner::Jacobi<value_type, local_index_type>;
     using prec = typename TestFixture::dist_prec_type;
-    auto local_solver =
-        gko::share(this->non_dist_solver_factory->generate(this->non_dist_mat));
 
+    auto local_solver = gko::share(local_prec_type::build()
+                                       .with_max_block_size(1u)
+                                       .on(this->exec)
+                                       ->generate(this->non_dist_mat));
     auto schwarz = prec::build()
-                    .with_local_solver(this->local_solver_factory)
-                    .with_generated_local_solver(local_solver)
-                    .on(this->exec);
+                       .with_local_solver(this->local_solver_factory)
+                       .with_generated_local_solver(local_solver)
+                       .on(this->exec);
 
     ASSERT_THROW(schwarz->generate(this->dist_mat), gko::InvalidStateError);
 
     auto schwarz_no_solver = prec::build().on(this->exec);
-    ASSERT_THROW(schwarz_no_solver->generate(this->dist_mat), gko::InvalidStateError);
+    ASSERT_THROW(schwarz_no_solver->generate(this->dist_mat),
+                 gko::InvalidStateError);
 }
 
 
-// TYPED_TEST(SchwarzPreconditioner, CanApplyPreconditionedSolverWithPregenSolver)
+// TYPED_TEST(SchwarzPreconditioner, CanApplyPreconditionedSolver)
 // {
 //     using value_type = typename TestFixture::value_type;
 //     using csr = typename TestFixture::local_matrix_type;
@@ -250,30 +217,72 @@ TYPED_TEST(SchwarzPreconditioner, GenerateFailsIfPregenSolverAndSolverFactoryAre
 //             .with_reduction_factor(
 //                 static_cast<gko::remove_complex<value_type>>(tolerance))
 //             .on(this->exec));
-//     this->non_dist_solver_factory =
+//     this->dist_solver_factory =
 //         cg::build()
-//             .with_preconditioner(this->local_solver_factory)
+//             .with_preconditioner(
+//                 prec::build()
+//                     .with_local_solver(this->local_solver_factory)
+//                     .on(this->exec))
 //             .with_criteria(iter_stop, tol_stop)
 //             .on(this->exec);
-//     auto local_solver =
-//         this->non_dist_solver_factory->generate(this->non_dist_mat);
-//     this->dist_solver_factory =
+//     auto dist_solver = this->dist_solver_factory->generate(this->dist_mat);
+//     this->non_dist_solver_factory =
 //         cg::build()
-//             .with_preconditioner(prec::build()
-//                                      .with_generated_local_solver(local_solver.get())
-//                                      .on(this->exec))
+//             .with_preconditioner(this->local_solver_factory)
 //             .with_criteria(iter_stop, tol_stop)
 //             .on(this->exec);
+//     auto non_dist_solver =
+//         this->non_dist_solver_factory->generate(this->non_dist_mat);
+//
+//     dist_solver->apply(this->dist_b.get(), this->dist_x.get());
+//     dist_solver->apply(this->non_dist_b.get(), this->non_dist_x.get());
+//
+//     this->assert_equal_to_non_distributed_vector(this->dist_x,
+//                                                  this->non_dist_x);
 // }
 
 
-TYPED_TEST(SchwarzPreconditioner, CanApplyPreconditioner)
+TYPED_TEST(SchwarzPreconditioner, CanApplyPreconditionedSolverWithPregenSolver)
 {
     using value_type = typename TestFixture::value_type;
+    using local_index_type = typename TestFixture::local_index_type;
+    using local_prec_type =
+        gko::preconditioner::Jacobi<value_type, local_index_type>;
     using csr = typename TestFixture::local_matrix_type;
     using cg = typename TestFixture::solver_type;
     using prec = typename TestFixture::dist_prec_type;
 
+    auto local_solver = gko::share(local_prec_type::build()
+                                       .with_max_block_size(1u)
+                                       .on(this->exec)
+                                       ->generate(this->non_dist_mat));
+    auto precond = prec::build()
+                       .with_local_solver(this->local_solver_factory)
+                       .on(this->exec)
+                       ->generate(this->dist_mat);
+
+    auto precond_pregen = prec::build()
+                              .with_generated_local_solver(local_solver)
+                              .on(this->exec)
+                              ->generate(this->dist_mat);
+
+    auto dist_x = gko::share(this->dist_x->clone());
+    auto dist_x_pregen = gko::share(this->dist_x->clone());
+
+    precond->apply(this->dist_b.get(), dist_x.get());
+    precond->apply(this->dist_b.get(), dist_x_pregen.get());
+
+    GKO_ASSERT_MTX_NEAR(
+        dist_x->get_local_vector(), dist_x_pregen->get_local_vector(),
+        r<value_type>::value);
+}
+
+
+TYPED_TEST(SchwarzPreconditioner, CanApplyPreconditioner)
+{
+    using value_type = typename TestFixture::value_type;
+    using prec = typename TestFixture::dist_prec_type;
+
     auto precond_factory = prec::build()
                                .with_local_solver(this->local_solver_factory)
                                .on(this->exec);

From 7ff372885d31e7726c1ed9fcf95906138e22bb75 Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Tue, 17 Oct 2023 16:20:14 +0200
Subject: [PATCH 423/583] add missing test

---
 test/mpi/preconditioner/schwarz.cpp | 74 ++++++++++++++---------------
 1 file changed, 37 insertions(+), 37 deletions(-)

diff --git a/test/mpi/preconditioner/schwarz.cpp b/test/mpi/preconditioner/schwarz.cpp
index 42a043d2e51..2241be8f535 100644
--- a/test/mpi/preconditioner/schwarz.cpp
+++ b/test/mpi/preconditioner/schwarz.cpp
@@ -203,43 +203,43 @@ TYPED_TEST(SchwarzPreconditioner, GenerateFailsIfInvalidState)
 }
 
 
-// TYPED_TEST(SchwarzPreconditioner, CanApplyPreconditionedSolver)
-// {
-//     using value_type = typename TestFixture::value_type;
-//     using csr = typename TestFixture::local_matrix_type;
-//     using cg = typename TestFixture::solver_type;
-//     using prec = typename TestFixture::dist_prec_type;
-//     constexpr double tolerance = 1e-20;
-//     auto iter_stop = gko::share(
-//         gko::stop::Iteration::build().with_max_iters(200u).on(this->exec));
-//     auto tol_stop = gko::share(
-//         gko::stop::ResidualNorm<value_type>::build()
-//             .with_reduction_factor(
-//                 static_cast<gko::remove_complex<value_type>>(tolerance))
-//             .on(this->exec));
-//     this->dist_solver_factory =
-//         cg::build()
-//             .with_preconditioner(
-//                 prec::build()
-//                     .with_local_solver(this->local_solver_factory)
-//                     .on(this->exec))
-//             .with_criteria(iter_stop, tol_stop)
-//             .on(this->exec);
-//     auto dist_solver = this->dist_solver_factory->generate(this->dist_mat);
-//     this->non_dist_solver_factory =
-//         cg::build()
-//             .with_preconditioner(this->local_solver_factory)
-//             .with_criteria(iter_stop, tol_stop)
-//             .on(this->exec);
-//     auto non_dist_solver =
-//         this->non_dist_solver_factory->generate(this->non_dist_mat);
-//
-//     dist_solver->apply(this->dist_b.get(), this->dist_x.get());
-//     dist_solver->apply(this->non_dist_b.get(), this->non_dist_x.get());
-//
-//     this->assert_equal_to_non_distributed_vector(this->dist_x,
-//                                                  this->non_dist_x);
-// }
+TYPED_TEST(SchwarzPreconditioner, CanApplyPreconditionedSolver)
+{
+    using value_type = typename TestFixture::value_type;
+    using csr = typename TestFixture::local_matrix_type;
+    using cg = typename TestFixture::solver_type;
+    using prec = typename TestFixture::dist_prec_type;
+    constexpr double tolerance = 1e-20;
+    auto iter_stop = gko::share(
+        gko::stop::Iteration::build().with_max_iters(200u).on(this->exec));
+    auto tol_stop = gko::share(
+        gko::stop::ResidualNorm<value_type>::build()
+            .with_reduction_factor(
+                static_cast<gko::remove_complex<value_type>>(tolerance))
+            .on(this->exec));
+    this->dist_solver_factory =
+        cg::build()
+            .with_preconditioner(
+                prec::build()
+                    .with_local_solver(this->local_solver_factory)
+                    .on(this->exec))
+            .with_criteria(iter_stop, tol_stop)
+            .on(this->exec);
+    auto dist_solver = this->dist_solver_factory->generate(this->dist_mat);
+    this->non_dist_solver_factory =
+        cg::build()
+            .with_preconditioner(this->local_solver_factory)
+            .with_criteria(iter_stop, tol_stop)
+            .on(this->exec);
+    auto non_dist_solver =
+        this->non_dist_solver_factory->generate(this->non_dist_mat);
+
+    dist_solver->apply(this->dist_b.get(), this->dist_x.get());
+    non_dist_solver->apply(this->non_dist_b.get(), this->non_dist_x.get());
+
+    this->assert_equal_to_non_distributed_vector(this->dist_x,
+                                                 this->non_dist_x);
+}
 
 
 TYPED_TEST(SchwarzPreconditioner, CanApplyPreconditionedSolverWithPregenSolver)

From fc17c0b8df56dcbf21b11ad60320592187e5115d Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Wed, 18 Oct 2023 10:21:46 +0200
Subject: [PATCH 424/583] Implement review comments

Co-authored-by: Pratik Nayak <pratikvn@protonmail.com>
---
 core/distributed/preconditioner/schwarz.cpp         | 12 ++++--------
 .../core/distributed/preconditioner/schwarz.hpp     |  1 +
 test/mpi/preconditioner/schwarz.cpp                 | 13 ++++++++-----
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/core/distributed/preconditioner/schwarz.cpp b/core/distributed/preconditioner/schwarz.cpp
index 7dfdfd3b4a7..dd3f86a1cd9 100644
--- a/core/distributed/preconditioner/schwarz.cpp
+++ b/core/distributed/preconditioner/schwarz.cpp
@@ -102,17 +102,13 @@ template <typename ValueType, typename LocalIndexType, typename GlobalIndexType>
 void Schwarz<ValueType, LocalIndexType, GlobalIndexType>::generate(
     std::shared_ptr<const LinOp> system_matrix)
 {
-    if (parameters_.local_solver != nullptr &&
-        parameters_.generated_local_solver != nullptr) {
-        throw ::gko::InvalidStateError(
-            __FILE__, __LINE__, __func__,
+    if (parameters_.local_solver && parameters_.generated_local_solver) {
+        GKO_INVALID_STATE(
             "Provided both a generated solver and a solver factory");
     }
 
-    if (parameters_.local_solver == nullptr &&
-        parameters_.generated_local_solver == nullptr) {
-        throw ::gko::InvalidStateError(
-            __FILE__, __LINE__, __func__,
+    if (!parameters_.local_solver && !parameters_.generated_local_solver) {
+        GKO_INVALID_STATE(
             "Requires either a generated solver or an solver factory");
     }
 
diff --git a/include/ginkgo/core/distributed/preconditioner/schwarz.hpp b/include/ginkgo/core/distributed/preconditioner/schwarz.hpp
index 5bce97fb414..1b34faff7c4 100644
--- a/include/ginkgo/core/distributed/preconditioner/schwarz.hpp
+++ b/include/ginkgo/core/distributed/preconditioner/schwarz.hpp
@@ -95,6 +95,7 @@ class Schwarz
          * Local solver factory.
          */
         GKO_DEFERRED_FACTORY_PARAMETER(local_solver, LinOpFactory);
+
         /**
          * Generated Inner solvers.
          */
diff --git a/test/mpi/preconditioner/schwarz.cpp b/test/mpi/preconditioner/schwarz.cpp
index 2241be8f535..506a8d1320f 100644
--- a/test/mpi/preconditioner/schwarz.cpp
+++ b/test/mpi/preconditioner/schwarz.cpp
@@ -196,7 +196,12 @@ TYPED_TEST(SchwarzPreconditioner, GenerateFailsIfInvalidState)
                        .on(this->exec);
 
     ASSERT_THROW(schwarz->generate(this->dist_mat), gko::InvalidStateError);
+}
+
 
+TYPED_TEST(SchwarzPreconditioner, GenerateFailsIfNoSolverProvided)
+{
+    using prec = typename TestFixture::dist_prec_type;
     auto schwarz_no_solver = prec::build().on(this->exec);
     ASSERT_THROW(schwarz_no_solver->generate(this->dist_mat),
                  gko::InvalidStateError);
@@ -260,21 +265,19 @@ TYPED_TEST(SchwarzPreconditioner, CanApplyPreconditionedSolverWithPregenSolver)
                        .with_local_solver(this->local_solver_factory)
                        .on(this->exec)
                        ->generate(this->dist_mat);
-
     auto precond_pregen = prec::build()
                               .with_generated_local_solver(local_solver)
                               .on(this->exec)
                               ->generate(this->dist_mat);
-
     auto dist_x = gko::share(this->dist_x->clone());
     auto dist_x_pregen = gko::share(this->dist_x->clone());
 
     precond->apply(this->dist_b.get(), dist_x.get());
     precond->apply(this->dist_b.get(), dist_x_pregen.get());
 
-    GKO_ASSERT_MTX_NEAR(
-        dist_x->get_local_vector(), dist_x_pregen->get_local_vector(),
-        r<value_type>::value);
+    GKO_ASSERT_MTX_NEAR(dist_x->get_local_vector(),
+                        dist_x_pregen->get_local_vector(),
+                        r<value_type>::value);
 }
 
 

From b345caa5df30b10d97044229a7dc9afff872e8e9 Mon Sep 17 00:00:00 2001
From: Gregor Olenik <gregor.olenik@web.de>
Date: Wed, 18 Oct 2023 14:52:41 +0200
Subject: [PATCH 425/583] Add review suggestions

Co-authored-by: Yuhsiang Tsai <yhmtsai@gmail.com>
---
 core/distributed/preconditioner/schwarz.cpp   | 26 ++++++++++++++-----
 .../distributed/preconditioner/schwarz.hpp    | 10 +++++--
 test/mpi/preconditioner/schwarz.cpp           |  5 ++--
 3 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/core/distributed/preconditioner/schwarz.cpp b/core/distributed/preconditioner/schwarz.cpp
index dd3f86a1cd9..45536c9df87 100644
--- a/core/distributed/preconditioner/schwarz.cpp
+++ b/core/distributed/preconditioner/schwarz.cpp
@@ -98,6 +98,20 @@ void Schwarz<ValueType, LocalIndexType, GlobalIndexType>::apply_impl(
 }
 
 
+template <typename ValueType, typename LocalIndexType, typename GlobalIndexType>
+void Schwarz<ValueType, LocalIndexType, GlobalIndexType>::set_solver(
+    std::shared_ptr<const LinOp> new_solver)
+{
+    auto exec = this->get_executor();
+    if (new_solver) {
+        if (new_solver->get_executor() != exec) {
+            new_solver = gko::clone(exec, new_solver);
+        }
+    }
+    this->local_solver_ = new_solver;
+}
+
+
 template <typename ValueType, typename LocalIndexType, typename GlobalIndexType>
 void Schwarz<ValueType, LocalIndexType, GlobalIndexType>::generate(
     std::shared_ptr<const LinOp> system_matrix)
@@ -113,13 +127,13 @@ void Schwarz<ValueType, LocalIndexType, GlobalIndexType>::generate(
     }
 
     if (parameters_.local_solver) {
-        this->local_solver_ = parameters_.local_solver->generate(
-            as<experimental::distributed::Matrix<ValueType, LocalIndexType,
-                                                 GlobalIndexType>>(
-                system_matrix)
-                ->get_local_matrix());
+        this->set_solver(gko::share(parameters_.local_solver->generate(
+            as<experimental::distributed::Matrix<
+                ValueType, LocalIndexType, GlobalIndexType>>(system_matrix)
+                ->get_local_matrix())));
+
     } else {
-        this->local_solver_ = parameters_.generated_local_solver;
+        this->set_solver(parameters_.generated_local_solver);
     }
 }
 
diff --git a/include/ginkgo/core/distributed/preconditioner/schwarz.hpp b/include/ginkgo/core/distributed/preconditioner/schwarz.hpp
index 1b34faff7c4..e7cd2b1d471 100644
--- a/include/ginkgo/core/distributed/preconditioner/schwarz.hpp
+++ b/include/ginkgo/core/distributed/preconditioner/schwarz.hpp
@@ -99,7 +99,7 @@ class Schwarz
         /**
          * Generated Inner solvers.
          */
-        std::shared_ptr<const LinOp> GKO_FACTORY_PARAMETER(
+        std::shared_ptr<const LinOp> GKO_FACTORY_PARAMETER_SCALAR(
             generated_local_solver, nullptr);
     };
     GKO_ENABLE_LIN_OP_FACTORY(Schwarz, parameters, Factory);
@@ -136,7 +136,6 @@ class Schwarz
      */
     void generate(std::shared_ptr<const LinOp> system_matrix);
 
-
     void apply_impl(const LinOp* b, LinOp* x) const override;
 
     template <typename VectorType>
@@ -146,6 +145,13 @@ class Schwarz
                     LinOp* x) const override;
 
 private:
+    /**
+     * Sets the solver operator used as the local solver.
+     *
+     * @param new_solver  the new local solver
+     */
+    void set_solver(std::shared_ptr<const LinOp> new_solver);
+
     std::shared_ptr<const LinOp> local_solver_;
 };
 
diff --git a/test/mpi/preconditioner/schwarz.cpp b/test/mpi/preconditioner/schwarz.cpp
index 506a8d1320f..f0181cad39a 100644
--- a/test/mpi/preconditioner/schwarz.cpp
+++ b/test/mpi/preconditioner/schwarz.cpp
@@ -203,6 +203,7 @@ TYPED_TEST(SchwarzPreconditioner, GenerateFailsIfNoSolverProvided)
 {
     using prec = typename TestFixture::dist_prec_type;
     auto schwarz_no_solver = prec::build().on(this->exec);
+
     ASSERT_THROW(schwarz_no_solver->generate(this->dist_mat),
                  gko::InvalidStateError);
 }
@@ -260,7 +261,7 @@ TYPED_TEST(SchwarzPreconditioner, CanApplyPreconditionedSolverWithPregenSolver)
     auto local_solver = gko::share(local_prec_type::build()
                                        .with_max_block_size(1u)
                                        .on(this->exec)
-                                       ->generate(this->non_dist_mat));
+                                       ->generate(this->dist_mat->get_local_matrix()));
     auto precond = prec::build()
                        .with_local_solver(this->local_solver_factory)
                        .on(this->exec)
@@ -273,7 +274,7 @@ TYPED_TEST(SchwarzPreconditioner, CanApplyPreconditionedSolverWithPregenSolver)
     auto dist_x_pregen = gko::share(this->dist_x->clone());
 
     precond->apply(this->dist_b.get(), dist_x.get());
-    precond->apply(this->dist_b.get(), dist_x_pregen.get());
+    precond_pregen->apply(this->dist_b.get(), dist_x_pregen.get());
 
     GKO_ASSERT_MTX_NEAR(dist_x->get_local_vector(),
                         dist_x_pregen->get_local_vector(),

From bd5fc17f80cfa2a965c60784ab6e73c063e04dfd Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Mon, 23 Oct 2023 08:05:19 +0000
Subject: [PATCH 426/583] Format files

Co-authored-by: Gregor Olenik <greole@users.noreply.github.com>
---
 test/mpi/preconditioner/schwarz.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/test/mpi/preconditioner/schwarz.cpp b/test/mpi/preconditioner/schwarz.cpp
index f0181cad39a..3c6dbf33a52 100644
--- a/test/mpi/preconditioner/schwarz.cpp
+++ b/test/mpi/preconditioner/schwarz.cpp
@@ -258,10 +258,11 @@ TYPED_TEST(SchwarzPreconditioner, CanApplyPreconditionedSolverWithPregenSolver)
     using cg = typename TestFixture::solver_type;
     using prec = typename TestFixture::dist_prec_type;
 
-    auto local_solver = gko::share(local_prec_type::build()
-                                       .with_max_block_size(1u)
-                                       .on(this->exec)
-                                       ->generate(this->dist_mat->get_local_matrix()));
+    auto local_solver =
+        gko::share(local_prec_type::build()
+                       .with_max_block_size(1u)
+                       .on(this->exec)
+                       ->generate(this->dist_mat->get_local_matrix()));
     auto precond = prec::build()
                        .with_local_solver(this->local_solver_factory)
                        .on(this->exec)

From a5942fac01104400524ff1aecf57401671d6515e Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Wed, 18 Oct 2023 10:15:26 +0200
Subject: [PATCH 427/583] add dpcpp csr diagonal missing components -
 check_diagonal_entries - add_scaled_identity

---
 dpcpp/matrix/csr_kernels.dp.cpp | 102 +++++++++++++++++++++++++++++++-
 test/matrix/csr_kernels2.cpp    |   6 --
 2 files changed, 99 insertions(+), 9 deletions(-)

diff --git a/dpcpp/matrix/csr_kernels.dp.cpp b/dpcpp/matrix/csr_kernels.dp.cpp
index 11309b67b9b..c5a8e3ef4d4 100644
--- a/dpcpp/matrix/csr_kernels.dp.cpp
+++ b/dpcpp/matrix/csr_kernels.dp.cpp
@@ -871,6 +871,74 @@ void extract_diagonal(size_type diag_size, size_type nnz,
 GKO_ENABLE_DEFAULT_HOST(extract_diagonal, extract_diagonal);
 
 
+template <typename IndexType>
+void check_diagonal_entries(const IndexType num_min_rows_cols,
+                            const IndexType* const __restrict__ row_ptrs,
+                            const IndexType* const __restrict__ col_idxs,
+                            bool* const __restrict__ has_all_diags,
+                            sycl::nd_item<3> item_ct1)
+{
+    constexpr int subgroup_size = config::warp_size;
+    auto tile_grp = group::tiled_partition<subgroup_size>(
+        group::this_thread_block(item_ct1));
+    const auto row =
+        thread::get_subwarp_id_flat<subgroup_size, IndexType>(item_ct1);
+    if (row < num_min_rows_cols) {
+        const auto tid_in_warp = tile_grp.thread_rank();
+        const auto row_start = row_ptrs[row];
+        const auto num_nz = row_ptrs[row + 1] - row_start;
+        bool row_has_diag_local{false};
+        for (IndexType iz = tid_in_warp; iz < num_nz; iz += subgroup_size) {
+            if (col_idxs[iz + row_start] == row) {
+                row_has_diag_local = true;
+                break;
+            }
+        }
+        auto row_has_diag = static_cast<bool>(tile_grp.any(row_has_diag_local));
+        if (!row_has_diag) {
+            if (tile_grp.thread_rank() == 0) {
+                *has_all_diags = false;
+            }
+            return;
+        }
+    }
+}
+
+GKO_ENABLE_DEFAULT_HOST(check_diagonal_entries, check_diagonal_entries);
+
+
+template <typename ValueType, typename IndexType>
+void add_scaled_identity(const ValueType* const __restrict__ alpha,
+                         const ValueType* const __restrict__ beta,
+                         const IndexType num_rows,
+                         const IndexType* const __restrict__ row_ptrs,
+                         const IndexType* const __restrict__ col_idxs,
+                         ValueType* const __restrict__ values,
+                         sycl::nd_item<3> item_ct1)
+{
+    constexpr int subgroup_size = config::warp_size;
+    auto tile_grp = group::tiled_partition<subgroup_size>(
+        group::this_thread_block(item_ct1));
+    const auto row =
+        thread::get_subwarp_id_flat<subgroup_size, IndexType>(item_ct1);
+    const auto num_warps =
+        thread::get_subwarp_num_flat<subgroup_size, IndexType>(item_ct1);
+    if (row < num_rows) {
+        const auto tid_in_warp = tile_grp.thread_rank();
+        const auto row_start = row_ptrs[row];
+        const auto num_nz = row_ptrs[row + 1] - row_start;
+        for (IndexType iz = tid_in_warp; iz < num_nz; iz += subgroup_size) {
+            values[iz + row_start] *= beta[0];
+            if (col_idxs[iz + row_start] == row) {
+                values[iz + row_start] += alpha[0];
+            }
+        }
+    }
+}
+
+GKO_ENABLE_DEFAULT_HOST(add_scaled_identity, add_scaled_identity);
+
+
 }  // namespace kernel
 
 
@@ -2364,8 +2432,24 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_EXTRACT_DIAGONAL);
 template <typename ValueType, typename IndexType>
 void check_diagonal_entries_exist(
     std::shared_ptr<const DpcppExecutor> exec,
-    const matrix::Csr<ValueType, IndexType>* const mtx,
-    bool& has_all_diags) GKO_NOT_IMPLEMENTED;
+    const matrix::Csr<ValueType, IndexType>* const mtx, bool& has_all_diags)
+{
+    const size_type num_subgroup = mtx->get_size()[0];
+    if (num_subgroup > 0) {
+        const size_type num_blocks =
+            num_subgroup / (default_block_size / config::warp_size);
+        array<bool> has_diags(exec, {true});
+        kernel::check_diagonal_entries(
+            num_blocks, default_block_size, 0, exec->get_queue(),
+            static_cast<IndexType>(
+                std::min(mtx->get_size()[0], mtx->get_size()[1])),
+            mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(),
+            has_diags.get_data());
+        has_all_diags = exec->copy_val_to_host(has_diags.get_const_data());
+    } else {
+        has_all_diags = true;
+    }
+}
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_CSR_CHECK_DIAGONAL_ENTRIES_EXIST);
@@ -2376,7 +2460,19 @@ void add_scaled_identity(std::shared_ptr<const DpcppExecutor> exec,
                          const matrix::Dense<ValueType>* const alpha,
                          const matrix::Dense<ValueType>* const beta,
                          matrix::Csr<ValueType, IndexType>* const mtx)
-    GKO_NOT_IMPLEMENTED;
+{
+    const auto nrows = mtx->get_size()[0];
+    if (nrows == 0) {
+        return;
+    }
+    const auto nthreads = nrows * config::warp_size;
+    const auto nblocks = ceildiv(nthreads, default_block_size);
+    kernel::add_scaled_identity(
+        nblocks, default_block_size, 0, exec->get_queue(),
+        alpha->get_const_values(), beta->get_const_values(),
+        static_cast<IndexType>(nrows), mtx->get_const_row_ptrs(),
+        mtx->get_const_col_idxs(), mtx->get_values());
+}
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_CSR_ADD_SCALED_IDENTITY_KERNEL);
diff --git a/test/matrix/csr_kernels2.cpp b/test/matrix/csr_kernels2.cpp
index 4d3ffa61323..412f9a41158 100644
--- a/test/matrix/csr_kernels2.cpp
+++ b/test/matrix/csr_kernels2.cpp
@@ -1311,9 +1311,6 @@ TEST_F(Csr, CreateSubMatrixIsEquivalentToRef)
 }
 
 
-#ifndef GKO_COMPILING_DPCPP
-
-
 TEST_F(Csr, CanDetectMissingDiagonalEntry)
 {
     using T = double;
@@ -1359,6 +1356,3 @@ TEST_F(Csr, AddScaledIdentityToNonSquare)
 
     GKO_ASSERT_MTX_NEAR(mtx, dmtx, r<value_type>::value);
 }
-
-
-#endif  // GKO_COMPILING_DPCPP

From f78461435be15346d54c534cdf4c92bbc3b35469 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Mon, 23 Oct 2023 22:50:16 +0200
Subject: [PATCH 428/583] refine the kernel
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Pratik Nayak <pratikvn@protonmail.com>
Co-authored-by: Thomas Grützmacher <thomas.gruetzmacher@kit.edu>
---
 common/cuda_hip/matrix/csr_common.hpp.inc  |  1 -
 common/cuda_hip/matrix/csr_kernels.hpp.inc | 12 ++++++++----
 dpcpp/matrix/csr_kernels.dp.cpp            | 14 ++++++++------
 omp/matrix/csr_kernels.cpp                 | 11 ++++++++---
 4 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/common/cuda_hip/matrix/csr_common.hpp.inc b/common/cuda_hip/matrix/csr_common.hpp.inc
index 0fce02aecfa..35718464c42 100644
--- a/common/cuda_hip/matrix/csr_common.hpp.inc
+++ b/common/cuda_hip/matrix/csr_common.hpp.inc
@@ -102,7 +102,6 @@ __global__ __launch_bounds__(default_block_size) void check_diagonal_entries(
             if (tile_grp.thread_rank() == 0) {
                 *has_all_diags = false;
             }
-            return;
         }
     }
 }
diff --git a/common/cuda_hip/matrix/csr_kernels.hpp.inc b/common/cuda_hip/matrix/csr_kernels.hpp.inc
index 3f02337747e..4bc601c5067 100644
--- a/common/cuda_hip/matrix/csr_kernels.hpp.inc
+++ b/common/cuda_hip/matrix/csr_kernels.hpp.inc
@@ -826,15 +826,19 @@ __global__ __launch_bounds__(default_block_size) void add_scaled_identity(
     auto tile_grp =
         group::tiled_partition<warp_size>(group::this_thread_block());
     const auto warpid = thread::get_subwarp_id_flat<warp_size, IndexType>();
-    const auto num_warps = thread::get_subwarp_num_flat<warp_size, IndexType>();
     if (warpid < num_rows) {
         const auto tid_in_warp = tile_grp.thread_rank();
         const IndexType row_start = row_ptrs[warpid];
         const IndexType num_nz = row_ptrs[warpid + 1] - row_start;
+        const auto beta_val = beta[0];
+        const auto alpha_val = alpha[0];
         for (IndexType iz = tid_in_warp; iz < num_nz; iz += warp_size) {
-            values[iz + row_start] *= beta[0];
-            if (col_idxs[iz + row_start] == warpid) {
-                values[iz + row_start] += alpha[0];
+            if (beta_val != one<ValueType>()) {
+                values[iz + row_start] *= beta_val;
+            }
+            if (col_idxs[iz + row_start] == warpid &&
+                alpha_val != zero<ValueType>()) {
+                values[iz + row_start] += alpha_val;
             }
         }
     }
diff --git a/dpcpp/matrix/csr_kernels.dp.cpp b/dpcpp/matrix/csr_kernels.dp.cpp
index c5a8e3ef4d4..915e2027a26 100644
--- a/dpcpp/matrix/csr_kernels.dp.cpp
+++ b/dpcpp/matrix/csr_kernels.dp.cpp
@@ -899,7 +899,6 @@ void check_diagonal_entries(const IndexType num_min_rows_cols,
             if (tile_grp.thread_rank() == 0) {
                 *has_all_diags = false;
             }
-            return;
         }
     }
 }
@@ -921,16 +920,19 @@ void add_scaled_identity(const ValueType* const __restrict__ alpha,
         group::this_thread_block(item_ct1));
     const auto row =
         thread::get_subwarp_id_flat<subgroup_size, IndexType>(item_ct1);
-    const auto num_warps =
-        thread::get_subwarp_num_flat<subgroup_size, IndexType>(item_ct1);
     if (row < num_rows) {
         const auto tid_in_warp = tile_grp.thread_rank();
         const auto row_start = row_ptrs[row];
         const auto num_nz = row_ptrs[row + 1] - row_start;
+        const auto beta_val = beta[0];
+        const auto alpha_val = alpha[0];
         for (IndexType iz = tid_in_warp; iz < num_nz; iz += subgroup_size) {
-            values[iz + row_start] *= beta[0];
-            if (col_idxs[iz + row_start] == row) {
-                values[iz + row_start] += alpha[0];
+            if (beta_val != one<ValueType>()) {
+                values[iz + row_start] *= beta_val;
+            }
+            if (col_idxs[iz + row_start] == row &&
+                alpha_val != zero<ValueType>()) {
+                values[iz + row_start] += alpha_val;
             }
         }
     }
diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp
index 7d4a5a7ebd1..1757b4b8a25 100644
--- a/omp/matrix/csr_kernels.cpp
+++ b/omp/matrix/csr_kernels.cpp
@@ -1134,12 +1134,17 @@ void add_scaled_identity(std::shared_ptr<const OmpExecutor> exec,
     const auto nrows = static_cast<IndexType>(mtx->get_size()[0]);
     const auto row_ptrs = mtx->get_const_row_ptrs();
     const auto vals = mtx->get_values();
+    const auto beta_val = beta->get_const_values()[0];
+    const auto alpha_val = alpha->get_const_values()[0];
 #pragma omp parallel for
     for (IndexType row = 0; row < nrows; row++) {
         for (IndexType iz = row_ptrs[row]; iz < row_ptrs[row + 1]; iz++) {
-            vals[iz] *= beta->get_const_values()[0];
-            if (row == mtx->get_const_col_idxs()[iz]) {
-                vals[iz] += alpha->get_const_values()[0];
+            if (beta_val != one<ValueType>()) {
+                vals[iz] *= beta_val;
+            }
+            if (row == mtx->get_const_col_idxs()[iz] &&
+                alpha_val != zero<ValueType>()) {
+                vals[iz] += alpha_val;
             }
         }
     }

From 6444d965fcf1e9b3ea3262cbede49281f314d1d6 Mon Sep 17 00:00:00 2001
From: "Jayesh Badwaik (FZ Juelich)" <j.badwaik@fz-juelich.de>
Date: Wed, 30 Aug 2023 12:46:38 +0200
Subject: [PATCH 429/583] Fix memory_order invocations to be inline with C++20
 changes

---
 omp/reorder/rcm_kernels.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/omp/reorder/rcm_kernels.cpp b/omp/reorder/rcm_kernels.cpp
index c0042224b3c..4de58456cc1 100644
--- a/omp/reorder/rcm_kernels.cpp
+++ b/omp/reorder/rcm_kernels.cpp
@@ -235,8 +235,8 @@ struct UbfsLinearQueue {
 #define GKO_CMPXCHG_IMPL(ptr, ptr_expected, replace_with) \
     return __atomic_compare_exchange_n(                   \
         ptr, ptr_expected, replace_with, true,            \
-        std::memory_order::memory_order_acq_rel,          \
-        std::memory_order::memory_order_acquire);
+        static_cast<int>(std::memory_order_acq_rel),      \
+        static_cast<int>(std::memory_order_acquire));
 #endif
 
 /**

From cec594be8dfb14ea9b740c8290d31477196bcca8 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Tue, 24 Oct 2023 21:46:40 +0200
Subject: [PATCH 430/583] failed: missing diag in tail and ensure all diag

---
 core/test/utils/matrix_utils_test.cpp | 41 ++++++++++++++++-----------
 core/utils/matrix_utils.hpp           |  7 +++--
 test/matrix/csr_kernels2.cpp          |  6 ++--
 3 files changed, 33 insertions(+), 21 deletions(-)

diff --git a/core/test/utils/matrix_utils_test.cpp b/core/test/utils/matrix_utils_test.cpp
index 31a6072270e..5c1653f22dc 100644
--- a/core/test/utils/matrix_utils_test.cpp
+++ b/core/test/utils/matrix_utils_test.cpp
@@ -355,29 +355,38 @@ TEST(MatrixUtils, ModifyToEnsureAllDiagonalEntries)
     using T = float;
     using Csr = gko::matrix::Csr<T, int>;
     auto exec = gko::ReferenceExecutor::create();
+    auto check_all_diag = [](const Csr* csr) {
+        const auto rowptrs = csr->get_const_row_ptrs();
+        const auto colidxs = csr->get_const_col_idxs();
+        const auto ndiag =
+            static_cast<int>(std::min(csr->get_size()[0], csr->get_size()[1]));
+        bool all_diags = true;
+        for (int i = 0; i < ndiag; i++) {
+            bool has_diag = false;
+            for (int j = rowptrs[i]; j < rowptrs[i + 1]; j++) {
+                if (colidxs[j] == i) {
+                    has_diag = true;
+                    break;
+                }
+            }
+            if (!has_diag) {
+                all_diags = false;
+                break;
+            }
+        }
+        return all_diags;
+    };
     auto b = gko::initialize<Csr>(
         {I<T>{2.0, 0.0, 1.1, 0.0}, I<T>{1.0, 2.4, 0.0, -1.0},
          I<T>{0.0, -4.0, 2.2, -2.0}, I<T>{0.0, -3.0, 1.5, 1.0}},
         exec);
+    // ensure it misses some diag
+    bool prev_check = check_all_diag(b.get());
 
     gko::utils::ensure_all_diagonal_entries(b.get());
 
-    const auto rowptrs = b->get_const_row_ptrs();
-    const auto colidxs = b->get_const_col_idxs();
-    bool all_diags = true;
-    for (int i = 0; i < 3; i++) {
-        bool has_diag = false;
-        for (int j = rowptrs[i]; j < rowptrs[i + 1]; j++) {
-            if (colidxs[j] == i) {
-                has_diag = true;
-            }
-        }
-        if (!has_diag) {
-            all_diags = false;
-            break;
-        }
-    }
-    ASSERT_TRUE(all_diags);
+    ASSERT_FALSE(prev_check);
+    ASSERT_TRUE(check_all_diag(b.get()));
 }
 
 
diff --git a/core/utils/matrix_utils.hpp b/core/utils/matrix_utils.hpp
index fed92ad73ef..65b610d1a1d 100644
--- a/core/utils/matrix_utils.hpp
+++ b/core/utils/matrix_utils.hpp
@@ -301,9 +301,10 @@ void ensure_all_diagonal_entries(MtxType* mtx)
     using index_type = typename MtxType::index_type;
     matrix_data<value_type, index_type> mdata;
     mtx->write(mdata);
-    const auto nrows = static_cast<index_type>(mtx->get_size()[0]);
-    mdata.nonzeros.reserve(mtx->get_num_stored_elements() + nrows);
-    for (index_type i = 0; i < nrows; i++) {
+    const auto ndiag = static_cast<index_type>(
+        std::min(mtx->get_size()[0], mtx->get_size()[1]));
+    mdata.nonzeros.reserve(mtx->get_num_stored_elements() + ndiag);
+    for (index_type i = 0; i < ndiag; i++) {
         mdata.nonzeros.push_back({i, i, zero<value_type>()});
     }
     mdata.sum_duplicates();
diff --git a/test/matrix/csr_kernels2.cpp b/test/matrix/csr_kernels2.cpp
index 412f9a41158..84b1335c675 100644
--- a/test/matrix/csr_kernels2.cpp
+++ b/test/matrix/csr_kernels2.cpp
@@ -1315,10 +1315,12 @@ TEST_F(Csr, CanDetectMissingDiagonalEntry)
 {
     using T = double;
     using Csr = Mtx;
-    auto ref_mtx = gen_mtx<Csr>(103, 98, 10);
+    auto ref_mtx = gen_mtx<Csr>(103, 104, 10);
     const auto rowptrs = ref_mtx->get_row_ptrs();
     const auto colidxs = ref_mtx->get_col_idxs();
-    const int testrow = 15;
+    gko::utils::ensure_all_diagonal_entries(ref_mtx.get());
+    // Choose the last row to ensure that kernel assign enough work
+    const int testrow = 102;
     gko::utils::remove_diagonal_entry_from_row(ref_mtx.get(), testrow);
     auto mtx = gko::clone(exec, ref_mtx);
     bool has_diags = true;

From 04449bb90a6f4b33cf8f6913b9edadf516dd290b Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Tue, 24 Oct 2023 22:06:06 +0200
Subject: [PATCH 431/583] enough work for check and initial non-full-diag ex

---
 core/test/utils/matrix_utils_test.cpp   |  7 +++----
 cuda/matrix/csr_kernels.template.cu     | 13 ++++++-------
 dpcpp/matrix/csr_kernels.dp.cpp         | 13 ++++++-------
 hip/matrix/csr_kernels.template.hip.cpp | 13 ++++++-------
 4 files changed, 21 insertions(+), 25 deletions(-)

diff --git a/core/test/utils/matrix_utils_test.cpp b/core/test/utils/matrix_utils_test.cpp
index 5c1653f22dc..cc5ed70966d 100644
--- a/core/test/utils/matrix_utils_test.cpp
+++ b/core/test/utils/matrix_utils_test.cpp
@@ -376,10 +376,9 @@ TEST(MatrixUtils, ModifyToEnsureAllDiagonalEntries)
         }
         return all_diags;
     };
-    auto b = gko::initialize<Csr>(
-        {I<T>{2.0, 0.0, 1.1, 0.0}, I<T>{1.0, 2.4, 0.0, -1.0},
-         I<T>{0.0, -4.0, 2.2, -2.0}, I<T>{0.0, -3.0, 1.5, 1.0}},
-        exec);
+    auto b = gko::initialize<Csr>({I<T>{2.0, 0.0, 1.1}, I<T>{1.0, 0.0, 0.0},
+                                   I<T>{0.0, -4.0, 2.2}, I<T>{0.0, -3.0, 1.5}},
+                                  exec);
     // ensure it misses some diag
     bool prev_check = check_all_diag(b.get());
 
diff --git a/cuda/matrix/csr_kernels.template.cu b/cuda/matrix/csr_kernels.template.cu
index 1b4b20a1e75..803cb530262 100644
--- a/cuda/matrix/csr_kernels.template.cu
+++ b/cuda/matrix/csr_kernels.template.cu
@@ -1322,16 +1322,15 @@ void check_diagonal_entries_exist(
     std::shared_ptr<const CudaExecutor> exec,
     const matrix::Csr<ValueType, IndexType>* const mtx, bool& has_all_diags)
 {
-    const size_type num_warps = mtx->get_size()[0];
-    if (num_warps > 0) {
-        const size_type num_blocks =
-            num_warps / (default_block_size / config::warp_size);
+    const auto num_diag = static_cast<IndexType>(
+        std::min(mtx->get_size()[0], mtx->get_size()[1]));
+    if (num_diag > 0) {
+        const IndexType num_blocks =
+            ceildiv(num_diag, default_block_size / config::warp_size);
         array<bool> has_diags(exec, {true});
         kernel::check_diagonal_entries<<<num_blocks, default_block_size, 0,
                                          exec->get_stream()>>>(
-            static_cast<IndexType>(
-                std::min(mtx->get_size()[0], mtx->get_size()[1])),
-            mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(),
+            num_diag, mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(),
             has_diags.get_data());
         has_all_diags = exec->copy_val_to_host(has_diags.get_const_data());
     } else {
diff --git a/dpcpp/matrix/csr_kernels.dp.cpp b/dpcpp/matrix/csr_kernels.dp.cpp
index 915e2027a26..46e8894fdac 100644
--- a/dpcpp/matrix/csr_kernels.dp.cpp
+++ b/dpcpp/matrix/csr_kernels.dp.cpp
@@ -2436,15 +2436,14 @@ void check_diagonal_entries_exist(
     std::shared_ptr<const DpcppExecutor> exec,
     const matrix::Csr<ValueType, IndexType>* const mtx, bool& has_all_diags)
 {
-    const size_type num_subgroup = mtx->get_size()[0];
-    if (num_subgroup > 0) {
-        const size_type num_blocks =
-            num_subgroup / (default_block_size / config::warp_size);
+    const auto num_diag = static_cast<IndexType>(
+        std::min(mtx->get_size()[0], mtx->get_size()[1]));
+    if (num_diag > 0) {
+        const IndexType num_blocks =
+            ceildiv(num_diag, default_block_size / config::warp_size);
         array<bool> has_diags(exec, {true});
         kernel::check_diagonal_entries(
-            num_blocks, default_block_size, 0, exec->get_queue(),
-            static_cast<IndexType>(
-                std::min(mtx->get_size()[0], mtx->get_size()[1])),
+            num_blocks, default_block_size, 0, exec->get_queue(), num_diag,
             mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(),
             has_diags.get_data());
         has_all_diags = exec->copy_val_to_host(has_diags.get_const_data());
diff --git a/hip/matrix/csr_kernels.template.hip.cpp b/hip/matrix/csr_kernels.template.hip.cpp
index e6a4fb64041..5e4de7b9699 100644
--- a/hip/matrix/csr_kernels.template.hip.cpp
+++ b/hip/matrix/csr_kernels.template.hip.cpp
@@ -1119,16 +1119,15 @@ void check_diagonal_entries_exist(
     std::shared_ptr<const HipExecutor> exec,
     const matrix::Csr<ValueType, IndexType>* const mtx, bool& has_all_diags)
 {
-    const size_type num_warps = mtx->get_size()[0];
-    if (num_warps > 0) {
-        const size_type num_blocks =
-            num_warps / (default_block_size / config::warp_size);
+    const auto num_diag = static_cast<IndexType>(
+        std::min(mtx->get_size()[0], mtx->get_size()[1]));
+    if (num_diag > 0) {
+        const IndexType num_blocks =
+            ceildiv(num_diag, default_block_size / config::warp_size);
         array<bool> has_diags(exec, {true});
         kernel::check_diagonal_entries<<<num_blocks, default_block_size, 0,
                                          exec->get_stream()>>>(
-            static_cast<IndexType>(
-                std::min(mtx->get_size()[0], mtx->get_size()[1])),
-            mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(),
+            num_diag, mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(),
             has_diags.get_data());
         has_all_diags = exec->copy_val_to_host(has_diags.get_const_data());
     } else {

From ad4d2bbe6598e31f6f44e168504ad2580f0f2d10 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Sun, 3 Sep 2023 08:12:10 +0200
Subject: [PATCH 432/583] improve-doc

---
 include/ginkgo/core/base/lin_op.hpp | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/include/ginkgo/core/base/lin_op.hpp b/include/ginkgo/core/base/lin_op.hpp
index 407fafda0d1..531163e6c94 100644
--- a/include/ginkgo/core/base/lin_op.hpp
+++ b/include/ginkgo/core/base/lin_op.hpp
@@ -520,6 +520,9 @@ class Permutable {
      * In the resulting LinOp, the entry at location `(i,j)` contains the input
      * value `(perm[i],perm[j])`.
      *
+     * From the linear algebra perspective, with \f$P_{ij} = \delta_{i
+     * \pi(i)}\f$, this represents the operation \f$P A P^{-1}\f$.
+     *
      * @param permutation_indices  the array of indices containing the
      *                             permutation order.
      *
@@ -538,6 +541,9 @@ class Permutable {
      * In the resulting LinOp, the entry at location `(perm[i],perm[j])`
      * contains the input value `(i,j)`.
      *
+     * From the linear algebra perspective, with \f$P_{ij} = \delta_{i
+     * \pi(i)}\f$, this represents the operation \f$P^{-1} A P\f$.
+     *
      * @param permutation_indices  the array of indices containing the
      *                             permutation order.
      *
@@ -555,6 +561,9 @@ class Permutable {
      * object.
      * In the resulting LinOp, the row `i` contains the input row `perm[i]`.
      *
+     * From the linear algebra perspective, with \f$P_{ij} = \delta_{i
+     * \pi(i)}\f$, this represents the operation \f$P A\f$.
+     *
      * @param permutation_indices  the array of indices containing the
      *                             permutation order.
      *
@@ -569,6 +578,9 @@ class Permutable {
      * In the resulting LinOp, the column `i` contains the input column
      * `perm[i]`.
      *
+     * From the linear algebra perspective, with \f$P_{ij} = \delta_{i
+     * \pi(i)}\f$, this represents the operation \f$A P^{-1}\f$.
+     *
      * @param permutation_indices  the array of indices containing the
      *                             permutation order `perm`.
      *
@@ -582,6 +594,9 @@ class Permutable {
      * object.
      * In the resulting LinOp, the row `perm[i]` contains the input row `i`.
      *
+     * From the linear algebra perspective, with \f$P_{ij} = \delta_{i
+     * \pi(i)}\f$, this represents the operation \f$P^{-1} A\f$.
+     *
      * @param permutation_indices  the array of indices containing the
      *                             permutation order `perm`.
      *
@@ -596,6 +611,9 @@ class Permutable {
      * In the resulting LinOp, the column `perm[i]` contains the input column
      * `i`.
      *
+     * From the linear algebra perspective, with \f$P_{ij} = \delta_{i
+     * \pi(i)}\f$, this represents the operation \f$A P\f$.
+     *
      * @param permutation_indices  the array of indices containing the
      *                             permutation order `perm`.
      *

From 03022434789ecdde574ef1e455899301729d13a2 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Sun, 3 Sep 2023 08:13:39 +0200
Subject: [PATCH 433/583] doc-and-new-interface

---
 include/ginkgo/core/base/lin_op.hpp  |  28 ++--
 include/ginkgo/core/matrix/csr.hpp   |  84 ++++++++++++
 include/ginkgo/core/matrix/dense.hpp | 191 +++++++++++++++++++++++++++
 3 files changed, 289 insertions(+), 14 deletions(-)

diff --git a/include/ginkgo/core/base/lin_op.hpp b/include/ginkgo/core/base/lin_op.hpp
index 531163e6c94..e40b0500bde 100644
--- a/include/ginkgo/core/base/lin_op.hpp
+++ b/include/ginkgo/core/base/lin_op.hpp
@@ -520,8 +520,8 @@ class Permutable {
      * In the resulting LinOp, the entry at location `(i,j)` contains the input
      * value `(perm[i],perm[j])`.
      *
-     * From the linear algebra perspective, with \f$P_{ij} = \delta_{i
-     * \pi(i)}\f$, this represents the operation \f$P A P^{-1}\f$.
+     * From the linear algebra perspective, with $P_{ij} = \delta_{i
+     * \pi(i)}$, this represents the operation $P A P^T$.
      *
      * @param permutation_indices  the array of indices containing the
      *                             permutation order.
@@ -533,7 +533,7 @@ class Permutable {
     {
         return as<Permutable>(this->row_permute(permutation_indices))
             ->column_permute(permutation_indices);
-    };
+    }
 
     /**
      * Returns a LinOp representing the symmetric inverse row and column
@@ -541,8 +541,8 @@ class Permutable {
      * In the resulting LinOp, the entry at location `(perm[i],perm[j])`
      * contains the input value `(i,j)`.
      *
-     * From the linear algebra perspective, with \f$P_{ij} = \delta_{i
-     * \pi(i)}\f$, this represents the operation \f$P^{-1} A P\f$.
+     * From the linear algebra perspective, with $P_{ij} = \delta_{i
+     * \pi(i)}$, this represents the operation $P^{-1} A P^{-T}$.
      *
      * @param permutation_indices  the array of indices containing the
      *                             permutation order.
@@ -554,15 +554,15 @@ class Permutable {
     {
         return as<Permutable>(this->inverse_row_permute(permutation_indices))
             ->inverse_column_permute(permutation_indices);
-    };
+    }
 
     /**
      * Returns a LinOp representing the row permutation of the Permutable
      * object.
      * In the resulting LinOp, the row `i` contains the input row `perm[i]`.
      *
-     * From the linear algebra perspective, with \f$P_{ij} = \delta_{i
-     * \pi(i)}\f$, this represents the operation \f$P A\f$.
+     * From the linear algebra perspective, with $P_{ij} = \delta_{i
+     * \pi(i)}$, this represents the operation $P A$.
      *
      * @param permutation_indices  the array of indices containing the
      *                             permutation order.
@@ -578,8 +578,8 @@ class Permutable {
      * In the resulting LinOp, the column `i` contains the input column
      * `perm[i]`.
      *
-     * From the linear algebra perspective, with \f$P_{ij} = \delta_{i
-     * \pi(i)}\f$, this represents the operation \f$A P^{-1}\f$.
+     * From the linear algebra perspective, with $P_{ij} = \delta_{i
+     * \pi(i)}$, this represents the operation $A P^T$.
      *
      * @param permutation_indices  the array of indices containing the
      *                             permutation order `perm`.
@@ -594,8 +594,8 @@ class Permutable {
      * object.
      * In the resulting LinOp, the row `perm[i]` contains the input row `i`.
      *
-     * From the linear algebra perspective, with \f$P_{ij} = \delta_{i
-     * \pi(i)}\f$, this represents the operation \f$P^{-1} A\f$.
+     * From the linear algebra perspective, with $P_{ij} = \delta_{i
+     * \pi(i)}$, this represents the operation $P^{-1} A$.
      *
      * @param permutation_indices  the array of indices containing the
      *                             permutation order `perm`.
@@ -611,8 +611,8 @@ class Permutable {
      * In the resulting LinOp, the column `perm[i]` contains the input column
      * `i`.
      *
-     * From the linear algebra perspective, with \f$P_{ij} = \delta_{i
-     * \pi(i)}\f$, this represents the operation \f$A P\f$.
+     * From the linear algebra perspective, with $P_{ij} = \delta_{i
+     * \pi(i)}$, this represents the operation $A P^{-T}$.
      *
      * @param permutation_indices  the array of indices containing the
      *                             permutation order `perm`.
diff --git a/include/ginkgo/core/matrix/csr.hpp b/include/ginkgo/core/matrix/csr.hpp
index 611e5d33c64..834208c4322 100644
--- a/include/ginkgo/core/matrix/csr.hpp
+++ b/include/ginkgo/core/matrix/csr.hpp
@@ -59,6 +59,12 @@ class Ell;
 template <typename ValueType, typename IndexType>
 class Hybrid;
 
+template <typename IndexType>
+class Permutation;
+
+template <typename ValueType, typename IndexType>
+class ScaledPermutation;
+
 template <typename ValueType, typename IndexType>
 class Sellp;
 
@@ -763,6 +769,84 @@ class Csr : public EnableLinOp<Csr<ValueType, IndexType>>,
 
     std::unique_ptr<LinOp> conj_transpose() const override;
 
+    /**
+     * Creates a permuted copy $A'$ of this matrix $A$ with the given
+     * permutation $P$. By default, this computes a symmetric permutation
+     * (permute_mode::symmetric). For the effect of the different permutation
+     * modes, see the following table.
+     *
+     * mode              | entry mapping              | matrix representation
+     * ------------------|----------------------------|----------------------
+     * none              | $A'(i, j) = A(i, j)$       | $A' = A$
+     * rows              | $A'(i, j) = A(p[i], j)$    | $A' = P A$
+     * columns           | $A'(i, j) = A(i, p[j])$    | $A' = A P^T$
+     * inverse_rows      | $A'(p[i], j) = A(i, j)$    | $A' = P^{-1} A$
+     * inverse_columns   | $A'(p[i], j) = A(i, j)$    | $A' = A P^{-T}$
+     * symmetric         | $A'(i, j) = A(p[i], p[j])$ | $A' = P A P^T$
+     * inverse_symmetric | $A'(p[i], p[j]) = A(i, j)$ | $A' = P^{-1} A P^{-T}$
+     *
+     * @param permutation  The input permutation.
+     * @param mode  The permutation mode. If permute_mode::inverse is set, we
+     *              use the inverse permutation $P^{-1}$ instead of $P$.
+     *              If permute_mode::rows is set, the rows will be permuted.
+     *              If permute_mode::columns is set, the columns will be
+     *              permuted.
+     * @return  The permuted matrix.
+     */
+    std::unique_ptr<Csr> permute(
+        ptr_param<const Permutation<index_type>> permutation,
+        permute_mode mode = permute_mode::symmetric) const;
+
+    /**
+     * Creates a non-symmetrically permuted copy $A'$ of this matrix $A$ with
+     * the given row and column permutations $P$ and $Q$. The operation will
+     * compute $A'(i, j) = A(p[i], q[j])$, or $A' = P A Q^T$ if `invert` is
+     * `false`, and $A'(p[i], q[j]) = A(i,j)$, or $A' = P^{-1} A Q^{-T}$ if
+     * `invert` is `true`.
+     *
+     * @param row_permutation  The permutation $P$ to apply to the rows
+     * @param column_permutation  The permutation $Q$ to apply to the columns
+     * @param invert  If set to `false`, uses the input permutations, otherwise
+     *                uses their inverses $P^{-1}, Q^{-1}$
+     * @return  The permuted matrix.
+     */
+    std::unique_ptr<Csr> permute(
+        ptr_param<const Permutation<index_type>> row_permutation,
+        ptr_param<const Permutation<index_type>> column_permutation,
+        bool invert = false) const;
+
+    /**
+     * Creates a scaled and permuted copy of this matrix.
+     * For an explanation of the permutation modes, see
+     * @ref permute(ptr_param<const Permutation<index_type>>, permute_mode)
+     *
+     * @param permutation  The scaled permutation.
+     * @param mode  The permutation mode.
+     * @return The permuted matrix.
+     */
+    std::unique_ptr<Csr> scale_permute(
+        ptr_param<const ScaledPermutation<value_type, index_type>> permutation,
+        permute_mode = permute_mode::symmetric) const;
+
+    /**
+     * Creates a scaled and permuted copy of this matrix.
+     * For an explanation of the parameters, see
+     * @ref permute(ptr_param<const Permutation<index_type>>, ptr_param<const
+     * Permutation<index_type>>, permute_mode)
+     *
+     * @param row_permutation  The scaled row permutation.
+     * @param column_permutation  The scaled column permutation.
+     * @param invert  If set to `false`, uses the input permutations, otherwise
+     *                uses their inverses $P^{-1}, Q^{-1}$
+     * @return The permuted matrix.
+     */
+    std::unique_ptr<Csr> scale_permute(
+        ptr_param<const ScaledPermutation<value_type, index_type>>
+            row_permutation,
+        ptr_param<const ScaledPermutation<value_type, index_type>>
+            column_permutation,
+        bool invert = false) const;
+
     std::unique_ptr<LinOp> permute(
         const array<IndexType>* permutation_indices) const override;
 
diff --git a/include/ginkgo/core/matrix/dense.hpp b/include/ginkgo/core/matrix/dense.hpp
index 0db8f7697a5..9c4799951f2 100644
--- a/include/ginkgo/core/matrix/dense.hpp
+++ b/include/ginkgo/core/matrix/dense.hpp
@@ -81,6 +81,12 @@ class Fbcsr;
 template <typename ValueType, typename IndexType>
 class Hybrid;
 
+template <typename IndexType>
+class Permutation;
+
+template <typename ValueType, typename IndexType>
+class ScaledPermutation;
+
 template <typename ValueType, typename IndexType>
 class Sellp;
 
@@ -401,6 +407,191 @@ class Dense
      */
     void fill(const ValueType value);
 
+    /**
+     * Creates a permuted copy $A'$ of this matrix $A$ with the given
+     * permutation $P$. By default, this computes a symmetric permutation
+     * (permute_mode::symmetric). For the effect of the different permutation
+     * modes, see the following table.
+     *
+     * mode              | entry mapping              | matrix representation
+     * ------------------|----------------------------|----------------------
+     * none              | $A'(i, j) = A(i, j)$       | $A' = A$
+     * rows              | $A'(i, j) = A(p[i], j)$    | $A' = P A$
+     * columns           | $A'(i, j) = A(i, p[j])$    | $A' = A P^T$
+     * inverse_rows      | $A'(p[i], j) = A(i, j)$    | $A' = P^{-1} A$
+     * inverse_columns   | $A'(p[i], j) = A(i, j)$    | $A' = A P^{-T}$
+     * symmetric         | $A'(i, j) = A(p[i], p[j])$ | $A' = P A P^T$
+     * inverse_symmetric | $A'(p[i], p[j]) = A(i, j)$ | $A' = P^{-1} A P^{-T}$
+     *
+     * @param permutation  The input permutation.
+     * @param mode  The permutation mode. If permute_mode::inverse is set, we
+     *              use the inverse permutation $P^{-1}$ instead of $P$.
+     *              If permute_mode::rows is set, the rows will be permuted.
+     *              If permute_mode::columns is set, the columns will be
+     *              permuted.
+     * @return  The permuted matrix.
+     */
+    std::unique_ptr<Dense> permute(
+        ptr_param<const Permutation<int32>> permutation,
+        permute_mode mode = permute_mode::symmetric) const;
+
+    /**
+     * @copydoc permute(ptr_param<const Permutation<int32>>, permute_mode)
+     */
+    std::unique_ptr<Dense> permute(
+        ptr_param<const Permutation<int64>> permutation,
+        permute_mode mode = permute_mode::symmetric) const;
+
+    /**
+     * Overload of permute(ptr_param<const Permutation<int32>>, permute_mode)
+     * that writes the permuted copy into an existing Dense matrix.
+     * @param output  the output matrix.
+     */
+    void permute(ptr_param<const Permutation<int32>> permutation,
+                 ptr_param<Dense> output, permute_mode mode) const;
+
+    /**
+     * @copydoc permute(ptr_param<const Permutation<int32>>, ptr_param<Dense>,
+     * permute_mode)
+     */
+    void permute(ptr_param<const Permutation<int64>> permutation,
+                 ptr_param<Dense> output, permute_mode mode) const;
+
+    /**
+     * Creates a non-symmetrically permuted copy $A'$ of this matrix $A$ with
+     * the given row and column permutations $P$ and $Q$. The operation will
+     * compute $A'(i, j) = A(p[i], q[j])$, or $A' = P A Q^T$ if `invert` is
+     * `false`, and $A'(p[i], q[j]) = A(i,j)$, or $A' = P^{-1} A Q^{-T}$ if
+     * `invert` is `true`.
+     *
+     * @param row_permutation  The permutation $P$ to apply to the rows
+     * @param column_permutation  The permutation $Q$ to apply to the columns
+     * @param invert  If set to `false`, uses the input permutations, otherwise
+     *                uses their inverses $P^{-1}, Q^{-1}$
+     * @return  The permuted matrix.
+     */
+    std::unique_ptr<Dense> permute(
+        ptr_param<const Permutation<int32>> row_permutation,
+        ptr_param<const Permutation<int32>> column_permutation,
+        bool invert = false) const;
+
+    /**
+     * @copydoc permute(ptr_param<const Permutation<int32>>, ptr_param<const
+     * Permutation<int32>>, permute_mode)
+     */
+    std::unique_ptr<Dense> permute(
+        ptr_param<const Permutation<int64>> row_permutation,
+        ptr_param<const Permutation<int64>> column_permutation,
+        bool invert = false) const;
+
+    /**
+     * Overload of permute(ptr_param<const Permutation<int32>>, ptr_param<const
+     * Permutation<int32>>, permute_mode) that writes the permuted copy into an
+     * existing Dense matrix.
+     * @param output  the output matrix.
+     */
+    void permute(ptr_param<const Permutation<int32>> row_permutation,
+                 ptr_param<const Permutation<int32>> column_permutation,
+                 ptr_param<Dense> output, bool invert) const;
+
+    /**
+     * @copydoc permute(ptr_param<const Permutation<int32>>, ptr_param<const
+     * Permutation<int32>>, ptr_param<Dense>, permute_mode)
+     */
+    void permute(ptr_param<const Permutation<int64>> row_permutation,
+                 ptr_param<const Permutation<int64>> column_permutation,
+                 ptr_param<Dense> output, bool invert) const;
+
+    /**
+     * Creates a scaled and permuted copy of this matrix.
+     * For an explanation of the permutation modes, see
+     * @ref permute(ptr_param<const Permutation<index_type>>, permute_mode)
+     *
+     * @param permutation  The scaled permutation.
+     * @param mode  The permutation mode.
+     * @return The permuted matrix.
+     */
+    std::unique_ptr<Dense> scale_permute(
+        ptr_param<const ScaledPermutation<value_type, int32>> permutation,
+        permute_mode mode = permute_mode::symmetric) const;
+
+    /**
+     * @copydoc scale_permute(ptr_param<const ScaledPermutation<value_type,
+     * int32>>, permute_mode)
+     */
+    std::unique_ptr<Dense> scale_permute(
+        ptr_param<const ScaledPermutation<value_type, int64>> permutation,
+        permute_mode mode = permute_mode::symmetric) const;
+
+    /**
+     * Overload of scale_permute(ptr_param<const ScaledPermutation<value_type,
+     * int32>>, permute_mode) that writes the permuted copy into an
+     * existing Dense matrix.
+     * @param output  the output matrix.
+     */
+    void scale_permute(
+        ptr_param<const ScaledPermutation<value_type, int32>> permutation,
+        ptr_param<Dense> output, permute_mode mode) const;
+
+    /**
+     * @copydoc scale_permute(ptr_param<const ScaledPermutation<value_type,
+     * int32>>, ptr_param<Dense>, permute_mode)
+     */
+    std::unique_ptr<Dense> scale_permute(
+        ptr_param<const ScaledPermutation<value_type, int64>> permutation,
+        ptr_param<Dense> output, permute_mode mode) const;
+
+    /**
+     * Creates a scaled and permuted copy of this matrix.
+     * For an explanation of the parameters, see
+     * @ref permute(ptr_param<const Permutation<index_type>>, ptr_param<const
+     * Permutation<index_type>>, permute_mode)
+     *
+     * @param row_permutation  The scaled row permutation.
+     * @param column_permutation  The scaled column permutation.
+     * @param invert  If set to `false`, uses the input permutations, otherwise
+     *                uses their inverses $P^{-1}, Q^{-1}$
+     * @return The permuted matrix.
+     */
+    std::unique_ptr<Dense> scale_permute(
+        ptr_param<const ScaledPermutation<value_type, int32>> row_permutation,
+        ptr_param<const ScaledPermutation<value_type, int32>>
+            column_permutation,
+        bool invert = false) const;
+
+    /**
+     * @copydoc scale_permute(ptr_param<const ScaledPermutation<value_type,
+     * int32>>, ptr_param<const ScaledPermutation<value_type, int32>>, bool)
+     */
+    std::unique_ptr<Dense> scale_permute(
+        ptr_param<const ScaledPermutation<value_type, int64>> row_permutation,
+        ptr_param<const ScaledPermutation<value_type, int64>>
+            column_permutation,
+        bool invert = false) const;
+
+    /**
+     * Overload of scale_permute(ptr_param<const ScaledPermutation<value_type,
+     * int32>>, ptr_param<const ScaledPermutation<value_type, int32>>, bool)
+     * that writes the permuted copy into an existing Dense matrix.
+     * @param output  the output matrix.
+     */
+    std::unique_ptr<Dense> scale_permute(
+        ptr_param<const ScaledPermutation<value_type, int32>> row_permutation,
+        ptr_param<const ScaledPermutation<value_type, int32>>
+            column_permutation,
+        ptr_param<Dense> output, bool invert) const;
+
+    /**
+     * @copydoc scale_permute(ptr_param<const ScaledPermutation<value_type,
+     * int32>>, ptr_param<const ScaledPermutation<value_type, int32>>,
+     * ptr_param<Dense>, bool)
+     */
+    std::unique_ptr<Dense> scale_permute(
+        ptr_param<const ScaledPermutation<value_type, int64>> row_permutation,
+        ptr_param<const ScaledPermutation<value_type, int64>>
+            column_permutation,
+        ptr_param<Dense> output, bool invert) const;
+
     std::unique_ptr<LinOp> permute(
         const array<int32>* permutation_indices) const override;
 

From f669e50df1814a9249543d63f719c6f48b136377 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Fri, 6 Oct 2023 20:54:13 +0200
Subject: [PATCH 434/583] add new interface and kernels

---
 common/cuda_hip/matrix/csr_kernels.hpp.inc    | 631 +++++++++++++++++
 common/unified/CMakeLists.txt                 |   2 +
 common/unified/matrix/csr_kernels.cpp         |  56 +-
 .../matrix/dense_kernels.instantiate.cpp      |  24 +-
 .../unified/matrix/dense_kernels.template.cpp | 225 +++++-
 common/unified/matrix/permutation_kernels.cpp |  67 ++
 .../matrix/scaled_permutation_kernels.cpp     |  72 ++
 core/CMakeLists.txt                           |   3 +-
 core/device_hooks/common_kernels.inc.cpp      |  46 +-
 core/matrix/csr.cpp                           | 317 ++++++---
 core/matrix/csr_kernels.hpp                   | 216 +++---
 core/matrix/dense.cpp                         | 439 +++++++++---
 core/matrix/dense_kernels.hpp                 | 336 +++++----
 core/matrix/permutation.cpp                   |  38 ++
 core/matrix/permutation_kernels.hpp           |  82 +++
 core/matrix/scaled_permutation.cpp            | 142 ++++
 core/matrix/scaled_permutation_kernels.hpp    |  68 ++
 core/test/matrix/permutation.cpp              |  99 +--
 cuda/matrix/csr_kernels.instantiate.cu        |  12 +-
 cuda/matrix/csr_kernels.template.cu           | 365 +---------
 dpcpp/matrix/csr_kernels.dp.cpp               |  85 ++-
 .../distributed-solver/distributed-solver.cpp |   5 +-
 hip/matrix/csr_kernels.instantiate.hip.cpp    |  12 +-
 hip/matrix/csr_kernels.template.hip.cpp       | 336 ---------
 include/ginkgo/core/base/exception.hpp        |  20 +
 include/ginkgo/core/matrix/csr.hpp            |   4 +-
 include/ginkgo/core/matrix/dense.hpp          |  45 +-
 include/ginkgo/core/matrix/permutation.hpp    |  94 ++-
 .../ginkgo/core/matrix/scaled_permutation.hpp | 177 +++++
 include/ginkgo/ginkgo.hpp                     |   1 +
 omp/matrix/csr_kernels.cpp                    | 171 ++++-
 omp/test/reorder/rcm_kernels.cpp              |   7 +-
 reference/CMakeLists.txt                      |   2 +
 reference/matrix/csr_kernels.cpp              | 219 +++++-
 reference/matrix/dense_kernels.cpp            | 230 ++++++-
 reference/matrix/permutation_kernels.cpp      |  58 ++
 .../matrix/scaled_permutation_kernels.cpp     |  64 ++
 reference/test/matrix/CMakeLists.txt          |   1 +
 reference/test/matrix/csr_kernels.cpp         | 461 ++++++++++++-
 reference/test/matrix/dense_kernels.cpp       | 641 ++++++++++++++++++
 reference/test/matrix/permutation.cpp         | 415 +-----------
 reference/test/matrix/scaled_permutation.cpp  | 116 ++++
 reference/test/reorder/rcm_kernels.cpp        |   2 +-
 test/matrix/CMakeLists.txt                    |   2 +
 test/matrix/csr_kernels2.cpp                  | 227 ++++++-
 test/matrix/dense_kernels.cpp                 | 216 ++++++
 test/matrix/permutation_kernels.cpp           |  73 ++
 test/matrix/scaled_permutation_kernels.cpp    |  77 +++
 48 files changed, 5305 insertions(+), 1696 deletions(-)
 create mode 100644 common/unified/matrix/permutation_kernels.cpp
 create mode 100644 common/unified/matrix/scaled_permutation_kernels.cpp
 create mode 100644 core/matrix/permutation_kernels.hpp
 create mode 100644 core/matrix/scaled_permutation.cpp
 create mode 100644 core/matrix/scaled_permutation_kernels.hpp
 create mode 100644 include/ginkgo/core/matrix/scaled_permutation.hpp
 create mode 100644 reference/matrix/permutation_kernels.cpp
 create mode 100644 reference/matrix/scaled_permutation_kernels.cpp
 create mode 100644 reference/test/matrix/scaled_permutation.cpp
 create mode 100644 test/matrix/permutation_kernels.cpp
 create mode 100644 test/matrix/scaled_permutation_kernels.cpp

diff --git a/common/cuda_hip/matrix/csr_kernels.hpp.inc b/common/cuda_hip/matrix/csr_kernels.hpp.inc
index 4bc601c5067..3a762ad5ad1 100644
--- a/common/cuda_hip/matrix/csr_kernels.hpp.inc
+++ b/common/cuda_hip/matrix/csr_kernels.hpp.inc
@@ -764,6 +764,147 @@ __global__ __launch_bounds__(default_block_size) void inv_symm_permute(
 }
 
 
+template <int subwarp_size, typename ValueType, typename IndexType>
+__global__ __launch_bounds__(default_block_size) void inv_nonsymm_permute(
+    size_type num_rows, const IndexType* __restrict__ row_permutation,
+    const IndexType* __restrict__ col_permutation,
+    const IndexType* __restrict__ in_row_ptrs,
+    const IndexType* __restrict__ in_cols,
+    const ValueType* __restrict__ in_vals,
+    const IndexType* __restrict__ out_row_ptrs,
+    IndexType* __restrict__ out_cols, ValueType* __restrict__ out_vals)
+{
+    auto tid = thread::get_subwarp_id_flat<subwarp_size>();
+    if (tid >= num_rows) {
+        return;
+    }
+    auto lane = threadIdx.x % subwarp_size;
+    auto in_row = tid;
+    auto out_row = row_permutation[tid];
+    auto in_begin = in_row_ptrs[in_row];
+    auto in_size = in_row_ptrs[in_row + 1] - in_begin;
+    auto out_begin = out_row_ptrs[out_row];
+    for (IndexType i = lane; i < in_size; i += subwarp_size) {
+        out_cols[out_begin + i] = col_permutation[in_cols[in_begin + i]];
+        out_vals[out_begin + i] = in_vals[in_begin + i];
+    }
+}
+
+
+template <int subwarp_size, typename ValueType, typename IndexType>
+__global__ __launch_bounds__(default_block_size) void row_scale_permute(
+    size_type num_rows, const ValueType* __restrict__ scale,
+    const IndexType* __restrict__ permutation,
+    const IndexType* __restrict__ in_row_ptrs,
+    const IndexType* __restrict__ in_cols,
+    const ValueType* __restrict__ in_vals,
+    const IndexType* __restrict__ out_row_ptrs,
+    IndexType* __restrict__ out_cols, ValueType* __restrict__ out_vals)
+{
+    auto tid = thread::get_subwarp_id_flat<subwarp_size>();
+    if (tid >= num_rows) {
+        return;
+    }
+    auto lane = threadIdx.x % subwarp_size;
+    auto in_row = permutation[tid];
+    auto out_row = tid;
+    auto in_begin = in_row_ptrs[in_row];
+    auto in_size = in_row_ptrs[in_row + 1] - in_begin;
+    auto out_begin = out_row_ptrs[out_row];
+    for (IndexType i = lane; i < in_size; i += subwarp_size) {
+        out_cols[out_begin + i] = in_cols[in_begin + i];
+        out_vals[out_begin + i] = in_vals[in_begin + i] * scale[out_row];
+    }
+}
+
+
+template <int subwarp_size, typename ValueType, typename IndexType>
+__global__ __launch_bounds__(default_block_size) void inv_row_scale_permute(
+    size_type num_rows, const ValueType* __restrict__ scale,
+    const IndexType* __restrict__ permutation,
+    const IndexType* __restrict__ in_row_ptrs,
+    const IndexType* __restrict__ in_cols,
+    const ValueType* __restrict__ in_vals,
+    const IndexType* __restrict__ out_row_ptrs,
+    IndexType* __restrict__ out_cols, ValueType* __restrict__ out_vals)
+{
+    auto tid = thread::get_subwarp_id_flat<subwarp_size>();
+    if (tid >= num_rows) {
+        return;
+    }
+    auto lane = threadIdx.x % subwarp_size;
+    auto in_row = tid;
+    auto out_row = permutation[tid];
+    auto in_begin = in_row_ptrs[in_row];
+    auto in_size = in_row_ptrs[in_row + 1] - in_begin;
+    auto out_begin = out_row_ptrs[out_row];
+    for (IndexType i = lane; i < in_size; i += subwarp_size) {
+        out_cols[out_begin + i] = in_cols[in_begin + i];
+        out_vals[out_begin + i] = in_vals[in_begin + i] / scale[in_row];
+    }
+}
+
+
+template <int subwarp_size, typename ValueType, typename IndexType>
+__global__ __launch_bounds__(default_block_size) void inv_symm_scale_permute(
+    size_type num_rows, const ValueType* __restrict__ scale,
+    const IndexType* __restrict__ permutation,
+    const IndexType* __restrict__ in_row_ptrs,
+    const IndexType* __restrict__ in_cols,
+    const ValueType* __restrict__ in_vals,
+    const IndexType* __restrict__ out_row_ptrs,
+    IndexType* __restrict__ out_cols, ValueType* __restrict__ out_vals)
+{
+    auto tid = thread::get_subwarp_id_flat<subwarp_size>();
+    if (tid >= num_rows) {
+        return;
+    }
+    auto lane = threadIdx.x % subwarp_size;
+    auto in_row = tid;
+    auto out_row = permutation[tid];
+    auto in_begin = in_row_ptrs[in_row];
+    auto in_size = in_row_ptrs[in_row + 1] - in_begin;
+    auto out_begin = out_row_ptrs[out_row];
+    for (IndexType i = lane; i < in_size; i += subwarp_size) {
+        const auto in_col = in_cols[in_begin + i];
+        out_cols[out_begin + i] = permutation[in_col];
+        out_vals[out_begin + i] =
+            in_vals[in_begin + i] / (scale[in_row] * scale[in_col]);
+    }
+}
+
+
+template <int subwarp_size, typename ValueType, typename IndexType>
+__global__ __launch_bounds__(default_block_size) void inv_nonsymm_scale_permute(
+    size_type num_rows, const ValueType* __restrict__ row_scale,
+    const IndexType* __restrict__ row_permutation,
+    const ValueType* __restrict__ col_scale,
+    const IndexType* __restrict__ col_permutation,
+    const IndexType* __restrict__ in_row_ptrs,
+    const IndexType* __restrict__ in_cols,
+    const ValueType* __restrict__ in_vals,
+    const IndexType* __restrict__ out_row_ptrs,
+    IndexType* __restrict__ out_cols, ValueType* __restrict__ out_vals)
+{
+    auto tid = thread::get_subwarp_id_flat<subwarp_size>();
+    if (tid >= num_rows) {
+        return;
+    }
+    auto lane = threadIdx.x % subwarp_size;
+    auto in_row = tid;
+    auto out_row = row_permutation[tid];
+    auto in_begin = in_row_ptrs[in_row];
+    auto in_size = in_row_ptrs[in_row + 1] - in_begin;
+    auto out_begin = out_row_ptrs[out_row];
+    for (IndexType i = lane; i < in_size; i += subwarp_size) {
+        const auto in_col = in_cols[in_begin + i];
+        out_cols[out_begin + i] = col_permutation[in_col];
+        out_vals[out_begin + i] =
+            in_vals[in_begin + i] / (row_scale[in_row] * col_scale[in_col]);
+    }
+}
+
+
 template <typename ValueType, typename IndexType>
 __global__
 __launch_bounds__(default_block_size) void compute_submatrix_idxs_and_vals(
@@ -1120,6 +1261,408 @@ void build_lookup(std::shared_ptr<const DefaultExecutor> exec,
 }
 
 
+namespace {
+
+
+template <int subwarp_size, typename ValueType, typename IndexType>
+void spgeam(syn::value_list<int, subwarp_size>,
+            std::shared_ptr<const DefaultExecutor> exec, const ValueType* alpha,
+            const IndexType* a_row_ptrs, const IndexType* a_col_idxs,
+            const ValueType* a_vals, const ValueType* beta,
+            const IndexType* b_row_ptrs, const IndexType* b_col_idxs,
+            const ValueType* b_vals, matrix::Csr<ValueType, IndexType>* c)
+{
+    auto m = static_cast<IndexType>(c->get_size()[0]);
+    auto c_row_ptrs = c->get_row_ptrs();
+    // count nnz for alpha * A + beta * B
+    auto subwarps_per_block = default_block_size / subwarp_size;
+    auto num_blocks = ceildiv(m, subwarps_per_block);
+    if (num_blocks > 0) {
+        kernel::spgeam_nnz<subwarp_size>
+            <<<num_blocks, default_block_size, 0, exec->get_stream()>>>(
+                a_row_ptrs, a_col_idxs, b_row_ptrs, b_col_idxs, m, c_row_ptrs);
+    }
+
+    // build row pointers
+    components::prefix_sum_nonnegative(exec, c_row_ptrs, m + 1);
+
+    // accumulate non-zeros for alpha * A + beta * B
+    matrix::CsrBuilder<ValueType, IndexType> c_builder{c};
+    auto c_nnz = exec->copy_val_to_host(c_row_ptrs + m);
+    c_builder.get_col_idx_array().resize_and_reset(c_nnz);
+    c_builder.get_value_array().resize_and_reset(c_nnz);
+    auto c_col_idxs = c->get_col_idxs();
+    auto c_vals = c->get_values();
+    if (num_blocks > 0) {
+        kernel::spgeam<subwarp_size>
+            <<<num_blocks, default_block_size, 0, exec->get_stream()>>>(
+                as_device_type(alpha), a_row_ptrs, a_col_idxs,
+                as_device_type(a_vals), as_device_type(beta), b_row_ptrs,
+                b_col_idxs, as_device_type(b_vals), m, c_row_ptrs, c_col_idxs,
+                as_device_type(c_vals));
+    }
+}
+
+GKO_ENABLE_IMPLEMENTATION_SELECTION(select_spgeam, spgeam);
+
+
+}  // namespace
+
+
+template <typename ValueType, typename IndexType>
+void spgeam(std::shared_ptr<const DefaultExecutor> exec,
+            const matrix::Dense<ValueType>* alpha,
+            const matrix::Csr<ValueType, IndexType>* a,
+            const matrix::Dense<ValueType>* beta,
+            const matrix::Csr<ValueType, IndexType>* b,
+            matrix::Csr<ValueType, IndexType>* c)
+{
+    auto total_nnz =
+        a->get_num_stored_elements() + b->get_num_stored_elements();
+    auto nnz_per_row = total_nnz / a->get_size()[0];
+    select_spgeam(
+        spgeam_kernels(),
+        [&](int compiled_subwarp_size) {
+            return compiled_subwarp_size >= nnz_per_row ||
+                   compiled_subwarp_size == config::warp_size;
+        },
+        syn::value_list<int>(), syn::type_list<>(), exec,
+        alpha->get_const_values(), a->get_const_row_ptrs(),
+        a->get_const_col_idxs(), a->get_const_values(),
+        beta->get_const_values(), b->get_const_row_ptrs(),
+        b->get_const_col_idxs(), b->get_const_values(), c);
+}
+
+
+template <typename ValueType, typename IndexType>
+void fill_in_dense(std::shared_ptr<const DefaultExecutor> exec,
+                   const matrix::Csr<ValueType, IndexType>* source,
+                   matrix::Dense<ValueType>* result)
+{
+    const auto num_rows = result->get_size()[0];
+    const auto num_cols = result->get_size()[1];
+    const auto stride = result->get_stride();
+    const auto row_ptrs = source->get_const_row_ptrs();
+    const auto col_idxs = source->get_const_col_idxs();
+    const auto vals = source->get_const_values();
+
+    auto grid_dim = ceildiv(num_rows, default_block_size);
+    if (grid_dim > 0) {
+        kernel::fill_in_dense<<<grid_dim, default_block_size, 0,
+                                exec->get_stream()>>>(
+            num_rows, as_device_type(row_ptrs), as_device_type(col_idxs),
+            as_device_type(vals), stride, as_device_type(result->get_values()));
+    }
+}
+
+
+template <typename ValueType, typename IndexType>
+void inv_symm_permute(std::shared_ptr<const DefaultExecutor> exec,
+                      const IndexType* perm,
+                      const matrix::Csr<ValueType, IndexType>* orig,
+                      matrix::Csr<ValueType, IndexType>* permuted)
+{
+    auto num_rows = orig->get_size()[0];
+    auto count_num_blocks = ceildiv(num_rows, default_block_size);
+    if (count_num_blocks > 0) {
+        kernel::inv_row_ptr_permute<<<count_num_blocks, default_block_size, 0,
+                                      exec->get_stream()>>>(
+            num_rows, perm, orig->get_const_row_ptrs(),
+            permuted->get_row_ptrs());
+    }
+    components::prefix_sum_nonnegative(exec, permuted->get_row_ptrs(),
+                                       num_rows + 1);
+    auto copy_num_blocks =
+        ceildiv(num_rows, default_block_size / config::warp_size);
+    if (copy_num_blocks > 0) {
+        kernel::inv_symm_permute<config::warp_size>
+            <<<copy_num_blocks, default_block_size, 0, exec->get_stream()>>>(
+                num_rows, perm, orig->get_const_row_ptrs(),
+                orig->get_const_col_idxs(),
+                as_device_type(orig->get_const_values()),
+                permuted->get_row_ptrs(), permuted->get_col_idxs(),
+                as_device_type(permuted->get_values()));
+    }
+}
+
+
+template <typename ValueType, typename IndexType>
+void inv_nonsymm_permute(std::shared_ptr<const DefaultExecutor> exec,
+                         const IndexType* row_perm, const IndexType* col_perm,
+                         const matrix::Csr<ValueType, IndexType>* orig,
+                         matrix::Csr<ValueType, IndexType>* permuted)
+{
+    auto num_rows = orig->get_size()[0];
+    auto count_num_blocks = ceildiv(num_rows, default_block_size);
+    if (count_num_blocks > 0) {
+        kernel::inv_row_ptr_permute<<<count_num_blocks, default_block_size, 0,
+                                      exec->get_stream()>>>(
+            num_rows, row_perm, orig->get_const_row_ptrs(),
+            permuted->get_row_ptrs());
+    }
+    components::prefix_sum_nonnegative(exec, permuted->get_row_ptrs(),
+                                       num_rows + 1);
+    auto copy_num_blocks =
+        ceildiv(num_rows, default_block_size / config::warp_size);
+    if (copy_num_blocks > 0) {
+        kernel::inv_nonsymm_permute<config::warp_size>
+            <<<copy_num_blocks, default_block_size, 0, exec->get_stream()>>>(
+                num_rows, row_perm, col_perm, orig->get_const_row_ptrs(),
+                orig->get_const_col_idxs(),
+                as_device_type(orig->get_const_values()),
+                permuted->get_row_ptrs(), permuted->get_col_idxs(),
+                as_device_type(permuted->get_values()));
+    }
+}
+
+
+template <typename ValueType, typename IndexType>
+void row_permute(std::shared_ptr<const DefaultExecutor> exec,
+                 const IndexType* perm,
+                 const matrix::Csr<ValueType, IndexType>* orig,
+                 matrix::Csr<ValueType, IndexType>* row_permuted)
+{
+    auto num_rows = orig->get_size()[0];
+    auto count_num_blocks = ceildiv(num_rows, default_block_size);
+    if (count_num_blocks > 0) {
+        kernel::row_ptr_permute<<<count_num_blocks, default_block_size, 0,
+                                  exec->get_stream()>>>(
+            num_rows, perm, orig->get_const_row_ptrs(),
+            row_permuted->get_row_ptrs());
+    }
+    components::prefix_sum_nonnegative(exec, row_permuted->get_row_ptrs(),
+                                       num_rows + 1);
+    auto copy_num_blocks =
+        ceildiv(num_rows, default_block_size / config::warp_size);
+    if (copy_num_blocks > 0) {
+        kernel::row_permute<config::warp_size>
+            <<<copy_num_blocks, default_block_size, 0, exec->get_stream()>>>(
+                num_rows, perm, orig->get_const_row_ptrs(),
+                orig->get_const_col_idxs(),
+                as_device_type(orig->get_const_values()),
+                row_permuted->get_row_ptrs(), row_permuted->get_col_idxs(),
+                as_device_type(row_permuted->get_values()));
+    }
+}
+
+
+template <typename ValueType, typename IndexType>
+void inv_row_permute(std::shared_ptr<const DefaultExecutor> exec,
+                     const IndexType* perm,
+                     const matrix::Csr<ValueType, IndexType>* orig,
+                     matrix::Csr<ValueType, IndexType>* row_permuted)
+{
+    auto num_rows = orig->get_size()[0];
+    auto count_num_blocks = ceildiv(num_rows, default_block_size);
+    if (count_num_blocks > 0) {
+        kernel::inv_row_ptr_permute<<<count_num_blocks, default_block_size, 0,
+                                      exec->get_stream()>>>(
+            num_rows, perm, orig->get_const_row_ptrs(),
+            row_permuted->get_row_ptrs());
+    }
+    components::prefix_sum_nonnegative(exec, row_permuted->get_row_ptrs(),
+                                       num_rows + 1);
+    auto copy_num_blocks =
+        ceildiv(num_rows, default_block_size / config::warp_size);
+    if (copy_num_blocks > 0) {
+        kernel::inv_row_permute<config::warp_size>
+            <<<copy_num_blocks, default_block_size, 0, exec->get_stream()>>>(
+                num_rows, perm, orig->get_const_row_ptrs(),
+                orig->get_const_col_idxs(),
+                as_device_type(orig->get_const_values()),
+                row_permuted->get_row_ptrs(), row_permuted->get_col_idxs(),
+                as_device_type(row_permuted->get_values()));
+    }
+}
+
+
+template <typename ValueType, typename IndexType>
+void inv_symm_scale_permute(std::shared_ptr<const DefaultExecutor> exec,
+                            const ValueType* scale, const IndexType* perm,
+                            const matrix::Csr<ValueType, IndexType>* orig,
+                            matrix::Csr<ValueType, IndexType>* permuted)
+{
+    auto num_rows = orig->get_size()[0];
+    auto count_num_blocks = ceildiv(num_rows, default_block_size);
+    if (count_num_blocks > 0) {
+        kernel::inv_row_ptr_permute<<<count_num_blocks, default_block_size, 0,
+                                      exec->get_stream()>>>(
+            num_rows, perm, orig->get_const_row_ptrs(),
+            permuted->get_row_ptrs());
+    }
+    components::prefix_sum_nonnegative(exec, permuted->get_row_ptrs(),
+                                       num_rows + 1);
+    auto copy_num_blocks =
+        ceildiv(num_rows, default_block_size / config::warp_size);
+    if (copy_num_blocks > 0) {
+        kernel::inv_symm_scale_permute<config::warp_size>
+            <<<copy_num_blocks, default_block_size, 0, exec->get_stream()>>>(
+                num_rows, as_device_type(scale), perm,
+                orig->get_const_row_ptrs(), orig->get_const_col_idxs(),
+                as_device_type(orig->get_const_values()),
+                permuted->get_row_ptrs(), permuted->get_col_idxs(),
+                as_device_type(permuted->get_values()));
+    }
+}
+
+
+template <typename ValueType, typename IndexType>
+void inv_nonsymm_scale_permute(std::shared_ptr<const DefaultExecutor> exec,
+                               const ValueType* row_scale,
+                               const IndexType* row_perm,
+                               const ValueType* col_scale,
+                               const IndexType* col_perm,
+                               const matrix::Csr<ValueType, IndexType>* orig,
+                               matrix::Csr<ValueType, IndexType>* permuted)
+{
+    auto num_rows = orig->get_size()[0];
+    auto count_num_blocks = ceildiv(num_rows, default_block_size);
+    if (count_num_blocks > 0) {
+        kernel::inv_row_ptr_permute<<<count_num_blocks, default_block_size, 0,
+                                      exec->get_stream()>>>(
+            num_rows, row_perm, orig->get_const_row_ptrs(),
+            permuted->get_row_ptrs());
+    }
+    components::prefix_sum_nonnegative(exec, permuted->get_row_ptrs(),
+                                       num_rows + 1);
+    auto copy_num_blocks =
+        ceildiv(num_rows, default_block_size / config::warp_size);
+    if (copy_num_blocks > 0) {
+        kernel::inv_nonsymm_scale_permute<config::warp_size>
+            <<<copy_num_blocks, default_block_size, 0, exec->get_stream()>>>(
+                num_rows, as_device_type(row_scale), row_perm,
+                as_device_type(col_scale), col_perm, orig->get_const_row_ptrs(),
+                orig->get_const_col_idxs(),
+                as_device_type(orig->get_const_values()),
+                permuted->get_row_ptrs(), permuted->get_col_idxs(),
+                as_device_type(permuted->get_values()));
+    }
+}
+
+
+template <typename ValueType, typename IndexType>
+void row_scale_permute(std::shared_ptr<const DefaultExecutor> exec,
+                       const ValueType* scale, const IndexType* perm,
+                       const matrix::Csr<ValueType, IndexType>* orig,
+                       matrix::Csr<ValueType, IndexType>* row_permuted)
+{
+    auto num_rows = orig->get_size()[0];
+    auto count_num_blocks = ceildiv(num_rows, default_block_size);
+    if (count_num_blocks > 0) {
+        kernel::row_ptr_permute<<<count_num_blocks, default_block_size, 0,
+                                  exec->get_stream()>>>(
+            num_rows, perm, orig->get_const_row_ptrs(),
+            row_permuted->get_row_ptrs());
+    }
+    components::prefix_sum_nonnegative(exec, row_permuted->get_row_ptrs(),
+                                       num_rows + 1);
+    auto copy_num_blocks =
+        ceildiv(num_rows, default_block_size / config::warp_size);
+    if (copy_num_blocks > 0) {
+        kernel::row_scale_permute<config::warp_size>
+            <<<copy_num_blocks, default_block_size, 0, exec->get_stream()>>>(
+                num_rows, as_device_type(scale), perm,
+                orig->get_const_row_ptrs(), orig->get_const_col_idxs(),
+                as_device_type(orig->get_const_values()),
+                row_permuted->get_row_ptrs(), row_permuted->get_col_idxs(),
+                as_device_type(row_permuted->get_values()));
+    }
+}
+
+
+template <typename ValueType, typename IndexType>
+void inv_row_scale_permute(std::shared_ptr<const DefaultExecutor> exec,
+                           const ValueType* scale, const IndexType* perm,
+                           const matrix::Csr<ValueType, IndexType>* orig,
+                           matrix::Csr<ValueType, IndexType>* row_permuted)
+{
+    auto num_rows = orig->get_size()[0];
+    auto count_num_blocks = ceildiv(num_rows, default_block_size);
+    if (count_num_blocks > 0) {
+        kernel::inv_row_ptr_permute<<<count_num_blocks, default_block_size, 0,
+                                      exec->get_stream()>>>(
+            num_rows, perm, orig->get_const_row_ptrs(),
+            row_permuted->get_row_ptrs());
+    }
+    components::prefix_sum_nonnegative(exec, row_permuted->get_row_ptrs(),
+                                       num_rows + 1);
+    auto copy_num_blocks =
+        ceildiv(num_rows, default_block_size / config::warp_size);
+    if (copy_num_blocks > 0) {
+        kernel::inv_row_scale_permute<config::warp_size>
+            <<<copy_num_blocks, default_block_size, 0, exec->get_stream()>>>(
+                num_rows, as_device_type(scale), perm,
+                orig->get_const_row_ptrs(), orig->get_const_col_idxs(),
+                as_device_type(orig->get_const_values()),
+                row_permuted->get_row_ptrs(), row_permuted->get_col_idxs(),
+                as_device_type(row_permuted->get_values()));
+    }
+}
+
+
+template <typename ValueType, typename IndexType>
+void calculate_nonzeros_per_row_in_span(
+    std::shared_ptr<const DefaultExecutor> exec,
+    const matrix::Csr<ValueType, IndexType>* source, const span& row_span,
+    const span& col_span, array<IndexType>* row_nnz)
+{
+    const auto num_rows = source->get_size()[0];
+    auto row_ptrs = source->get_const_row_ptrs();
+    auto col_idxs = source->get_const_col_idxs();
+    auto grid_dim = ceildiv(row_span.length(), default_block_size);
+    if (grid_dim > 0) {
+        kernel::calculate_nnz_per_row_in_span<<<grid_dim, default_block_size, 0,
+                                                exec->get_stream()>>>(
+            row_span, col_span, as_device_type(row_ptrs),
+            as_device_type(col_idxs), as_device_type(row_nnz->get_data()));
+    }
+}
+
+
+template <typename ValueType, typename IndexType>
+void compute_submatrix(std::shared_ptr<const DefaultExecutor> exec,
+                       const matrix::Csr<ValueType, IndexType>* source,
+                       gko::span row_span, gko::span col_span,
+                       matrix::Csr<ValueType, IndexType>* result)
+{
+    auto row_offset = row_span.begin;
+    auto col_offset = col_span.begin;
+    auto num_rows = result->get_size()[0];
+    auto num_cols = result->get_size()[1];
+    auto row_ptrs = source->get_const_row_ptrs();
+    auto grid_dim = ceildiv(num_rows, default_block_size);
+    if (grid_dim > 0) {
+        kernel::compute_submatrix_idxs_and_vals<<<grid_dim, default_block_size,
+                                                  0, exec->get_stream()>>>(
+            num_rows, num_cols, row_offset, col_offset,
+            as_device_type(source->get_const_row_ptrs()),
+            as_device_type(source->get_const_col_idxs()),
+            as_device_type(source->get_const_values()),
+            as_device_type(result->get_const_row_ptrs()),
+            as_device_type(result->get_col_idxs()),
+            as_device_type(result->get_values()));
+    }
+}
+
+
+template <typename ValueType, typename IndexType>
+void calculate_nonzeros_per_row_in_index_set(
+    std::shared_ptr<const DefaultExecutor> exec,
+    const matrix::Csr<ValueType, IndexType>* source,
+    const gko::index_set<IndexType>& row_index_set,
+    const gko::index_set<IndexType>& col_index_set,
+    IndexType* row_nnz) GKO_NOT_IMPLEMENTED;
+
+
+template <typename ValueType, typename IndexType>
+void compute_submatrix_from_index_set(
+    std::shared_ptr<const DefaultExecutor> exec,
+    const matrix::Csr<ValueType, IndexType>* source,
+    const gko::index_set<IndexType>& row_index_set,
+    const gko::index_set<IndexType>& col_index_set,
+    matrix::Csr<ValueType, IndexType>* result) GKO_NOT_IMPLEMENTED;
+
+
 template <typename ValueType, typename IndexType>
 void fallback_transpose(std::shared_ptr<const DefaultExecutor> exec,
                         const matrix::Csr<ValueType, IndexType>* input,
@@ -1169,3 +1712,91 @@ void fallback_sort(std::shared_ptr<const DefaultExecutor> exec,
     thrust::stable_sort_by_key(thrust_policy(exec), row_idxs, row_idxs + nnz,
                                col_val_it);
 }
+
+
+template <typename ValueType, typename IndexType>
+void is_sorted_by_column_index(
+    std::shared_ptr<const DefaultExecutor> exec,
+    const matrix::Csr<ValueType, IndexType>* to_check, bool* is_sorted)
+{
+    *is_sorted = true;
+    auto cpu_array = make_array_view(exec->get_master(), 1, is_sorted);
+    auto gpu_array = array<bool>{exec, cpu_array};
+    auto block_size = default_block_size;
+    auto num_rows = static_cast<IndexType>(to_check->get_size()[0]);
+    auto num_blocks = ceildiv(num_rows, block_size);
+    if (num_blocks > 0) {
+        kernel::
+            check_unsorted<<<num_blocks, block_size, 0, exec->get_stream()>>>(
+                to_check->get_const_row_ptrs(), to_check->get_const_col_idxs(),
+                num_rows, gpu_array.get_data());
+    }
+    cpu_array = gpu_array;
+}
+
+
+template <typename ValueType, typename IndexType>
+void extract_diagonal(std::shared_ptr<const DefaultExecutor> exec,
+                      const matrix::Csr<ValueType, IndexType>* orig,
+                      matrix::Diagonal<ValueType>* diag)
+{
+    const auto nnz = orig->get_num_stored_elements();
+    const auto diag_size = diag->get_size()[0];
+    const auto num_blocks =
+        ceildiv(config::warp_size * diag_size, default_block_size);
+
+    const auto orig_values = orig->get_const_values();
+    const auto orig_row_ptrs = orig->get_const_row_ptrs();
+    const auto orig_col_idxs = orig->get_const_col_idxs();
+    auto diag_values = diag->get_values();
+    if (num_blocks > 0) {
+        kernel::extract_diagonal<<<num_blocks, default_block_size, 0,
+                                   exec->get_stream()>>>(
+            diag_size, nnz, as_device_type(orig_values),
+            as_device_type(orig_row_ptrs), as_device_type(orig_col_idxs),
+            as_device_type(diag_values));
+    }
+}
+
+
+template <typename ValueType, typename IndexType>
+void check_diagonal_entries_exist(
+    std::shared_ptr<const DefaultExecutor> exec,
+    const matrix::Csr<ValueType, IndexType>* const mtx, bool& has_all_diags)
+{
+    const auto num_diag = static_cast<IndexType>(
+        std::min(mtx->get_size()[0], mtx->get_size()[1]));
+    if (num_diag > 0) {
+        const IndexType num_blocks =
+            ceildiv(num_diag, default_block_size / config::warp_size);
+        array<bool> has_diags(exec, {true});
+        kernel::check_diagonal_entries<<<num_blocks, default_block_size, 0,
+                                         exec->get_stream()>>>(
+            num_diag, mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(),
+            has_diags.get_data());
+        has_all_diags = exec->copy_val_to_host(has_diags.get_const_data());
+    } else {
+        has_all_diags = true;
+    }
+}
+
+
+template <typename ValueType, typename IndexType>
+void add_scaled_identity(std::shared_ptr<const DefaultExecutor> exec,
+                         const matrix::Dense<ValueType>* const alpha,
+                         const matrix::Dense<ValueType>* const beta,
+                         matrix::Csr<ValueType, IndexType>* const mtx)
+{
+    const auto nrows = mtx->get_size()[0];
+    if (nrows == 0) {
+        return;
+    }
+    const auto nthreads = nrows * config::warp_size;
+    const auto nblocks = ceildiv(nthreads, default_block_size);
+    kernel::add_scaled_identity<<<nblocks, default_block_size, 0,
+                                  exec->get_stream()>>>(
+        as_device_type(alpha->get_const_values()),
+        as_device_type(beta->get_const_values()), static_cast<IndexType>(nrows),
+        mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(),
+        as_device_type(mtx->get_values()));
+}
diff --git a/common/unified/CMakeLists.txt b/common/unified/CMakeLists.txt
index 67fc839d6a7..7ac6b3df40c 100644
--- a/common/unified/CMakeLists.txt
+++ b/common/unified/CMakeLists.txt
@@ -12,6 +12,8 @@ set(UNIFIED_SOURCES
     matrix/csr_kernels.cpp
     matrix/ell_kernels.cpp
     matrix/hybrid_kernels.cpp
+    matrix/permutation_kernels.cpp
+    matrix/scaled_permutation_kernels.cpp
     matrix/sellp_kernels.cpp
     matrix/sparsity_csr_kernels.cpp
     matrix/diagonal_kernels.cpp
diff --git a/common/unified/matrix/csr_kernels.cpp b/common/unified/matrix/csr_kernels.cpp
index 1704fdd1f9c..4746f88ddfe 100644
--- a/common/unified/matrix/csr_kernels.cpp
+++ b/common/unified/matrix/csr_kernels.cpp
@@ -54,53 +54,71 @@ namespace GKO_DEVICE_NAMESPACE {
 namespace csr {
 
 
-template <typename IndexType>
-void invert_permutation(std::shared_ptr<const DefaultExecutor> exec,
-                        size_type size, const IndexType* permutation_indices,
-                        IndexType* inv_permutation)
+template <typename ValueType, typename IndexType>
+void inv_col_permute(std::shared_ptr<const DefaultExecutor> exec,
+                     const IndexType* perm,
+                     const matrix::Csr<ValueType, IndexType>* orig,
+                     matrix::Csr<ValueType, IndexType>* col_permuted)
 {
+    auto num_rows = orig->get_size()[0];
+    auto nnz = orig->get_num_stored_elements();
+    auto size = std::max(num_rows, nnz);
     run_kernel(
         exec,
-        [] GKO_KERNEL(auto tid, auto permutation, auto inv_permutation) {
-            inv_permutation[permutation[tid]] = tid;
+        [] GKO_KERNEL(auto tid, auto num_rows, auto num_nonzeros,
+                      auto permutation, auto in_row_ptrs, auto in_col_idxs,
+                      auto in_vals, auto out_row_ptrs, auto out_col_idxs,
+                      auto out_vals) {
+            if (tid < num_nonzeros) {
+                out_col_idxs[tid] = permutation[in_col_idxs[tid]];
+                out_vals[tid] = in_vals[tid];
+            }
+            if (tid <= num_rows) {
+                out_row_ptrs[tid] = in_row_ptrs[tid];
+            }
         },
-        size, permutation_indices, inv_permutation);
+        size, num_rows, nnz, perm, orig->get_const_row_ptrs(),
+        orig->get_const_col_idxs(), orig->get_const_values(),
+        col_permuted->get_row_ptrs(), col_permuted->get_col_idxs(),
+        col_permuted->get_values());
 }
 
-GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_INVERT_PERMUTATION_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_INV_COL_PERMUTE_KERNEL);
 
 
 template <typename ValueType, typename IndexType>
-void inverse_column_permute(std::shared_ptr<const DefaultExecutor> exec,
-                            const IndexType* perm,
-                            const matrix::Csr<ValueType, IndexType>* orig,
-                            matrix::Csr<ValueType, IndexType>* column_permuted)
+void inv_col_scale_permute(std::shared_ptr<const DefaultExecutor> exec,
+                           const ValueType* scale, const IndexType* perm,
+                           const matrix::Csr<ValueType, IndexType>* orig,
+                           matrix::Csr<ValueType, IndexType>* col_permuted)
 {
     auto num_rows = orig->get_size()[0];
     auto nnz = orig->get_num_stored_elements();
     auto size = std::max(num_rows, nnz);
     run_kernel(
         exec,
-        [] GKO_KERNEL(auto tid, auto num_rows, auto num_nonzeros,
+        [] GKO_KERNEL(auto tid, auto num_rows, auto num_nonzeros, auto scale,
                       auto permutation, auto in_row_ptrs, auto in_col_idxs,
                       auto in_vals, auto out_row_ptrs, auto out_col_idxs,
                       auto out_vals) {
             if (tid < num_nonzeros) {
-                out_col_idxs[tid] = permutation[in_col_idxs[tid]];
-                out_vals[tid] = in_vals[tid];
+                const auto in_col = in_col_idxs[tid];
+                out_col_idxs[tid] = permutation[in_col];
+                out_vals[tid] = in_vals[tid] / scale[in_col];
             }
             if (tid <= num_rows) {
                 out_row_ptrs[tid] = in_row_ptrs[tid];
             }
         },
-        size, num_rows, nnz, perm, orig->get_const_row_ptrs(),
+        size, num_rows, nnz, scale, perm, orig->get_const_row_ptrs(),
         orig->get_const_col_idxs(), orig->get_const_values(),
-        column_permuted->get_row_ptrs(), column_permuted->get_col_idxs(),
-        column_permuted->get_values());
+        col_permuted->get_row_ptrs(), col_permuted->get_col_idxs(),
+        col_permuted->get_values());
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_INVERSE_COLUMN_PERMUTE_KERNEL);
+    GKO_DECLARE_CSR_INV_COL_SCALE_PERMUTE_KERNEL);
 
 
 template <typename ValueType, typename IndexType>
diff --git a/common/unified/matrix/dense_kernels.instantiate.cpp b/common/unified/matrix/dense_kernels.instantiate.cpp
index f34d05954c4..73e06385f54 100644
--- a/common/unified/matrix/dense_kernels.instantiate.cpp
+++ b/common/unified/matrix/dense_kernels.instantiate.cpp
@@ -59,16 +59,36 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_DENSE_SYMM_PERMUTE_KERNEL);
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_DENSE_INV_SYMM_PERMUTE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_DENSE_NONSYMM_PERMUTE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_DENSE_INV_NONSYMM_PERMUTE_KERNEL);
 GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2(
     GKO_DECLARE_DENSE_ROW_GATHER_KERNEL);
 GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2(
     GKO_DECLARE_DENSE_ADVANCED_ROW_GATHER_KERNEL);
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_DENSE_COLUMN_PERMUTE_KERNEL);
+    GKO_DECLARE_DENSE_COL_PERMUTE_KERNEL);
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_DENSE_INV_ROW_PERMUTE_KERNEL);
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_DENSE_INV_COLUMN_PERMUTE_KERNEL);
+    GKO_DECLARE_DENSE_INV_COL_PERMUTE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_DENSE_SYMM_SCALE_PERMUTE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_DENSE_INV_SYMM_SCALE_PERMUTE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_DENSE_NONSYMM_SCALE_PERMUTE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_DENSE_INV_NONSYMM_SCALE_PERMUTE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_DENSE_ROW_SCALE_PERMUTE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_DENSE_INV_ROW_SCALE_PERMUTE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_DENSE_COL_SCALE_PERMUTE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_DENSE_INV_COL_SCALE_PERMUTE_KERNEL);
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_EXTRACT_DIAGONAL_KERNEL);
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_INPLACE_ABSOLUTE_DENSE_KERNEL);
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_OUTPLACE_ABSOLUTE_DENSE_KERNEL);
diff --git a/common/unified/matrix/dense_kernels.template.cpp b/common/unified/matrix/dense_kernels.template.cpp
index 9bd5c04f861..f3723ae8aad 100644
--- a/common/unified/matrix/dense_kernels.template.cpp
+++ b/common/unified/matrix/dense_kernels.template.cpp
@@ -383,7 +383,7 @@ void compute_sqrt(std::shared_ptr<const DefaultExecutor> exec,
 
 template <typename ValueType, typename IndexType>
 void symm_permute(std::shared_ptr<const DefaultExecutor> exec,
-                  const array<IndexType>* permutation_indices,
+                  const IndexType* permutation_indices,
                   const matrix::Dense<ValueType>* orig,
                   matrix::Dense<ValueType>* permuted)
 {
@@ -392,13 +392,13 @@ void symm_permute(std::shared_ptr<const DefaultExecutor> exec,
         [] GKO_KERNEL(auto row, auto col, auto orig, auto perm, auto permuted) {
             permuted(row, col) = orig(perm[row], perm[col]);
         },
-        orig->get_size(), orig, *permutation_indices, permuted);
+        orig->get_size(), orig, permutation_indices, permuted);
 }
 
 
 template <typename ValueType, typename IndexType>
 void inv_symm_permute(std::shared_ptr<const DefaultExecutor> exec,
-                      const array<IndexType>* permutation_indices,
+                      const IndexType* permutation_indices,
                       const matrix::Dense<ValueType>* orig,
                       matrix::Dense<ValueType>* permuted)
 {
@@ -407,14 +407,49 @@ void inv_symm_permute(std::shared_ptr<const DefaultExecutor> exec,
         [] GKO_KERNEL(auto row, auto col, auto orig, auto perm, auto permuted) {
             permuted(perm[row], perm[col]) = orig(row, col);
         },
-        orig->get_size(), orig, *permutation_indices, permuted);
+        orig->get_size(), orig, permutation_indices, permuted);
+}
+
+
+template <typename ValueType, typename IndexType>
+void nonsymm_permute(std::shared_ptr<const DefaultExecutor> exec,
+                     const IndexType* row_permutation_indices,
+                     const IndexType* column_permutation_indices,
+                     const matrix::Dense<ValueType>* orig,
+                     matrix::Dense<ValueType>* permuted)
+{
+    run_kernel(
+        exec,
+        [] GKO_KERNEL(auto row, auto col, auto orig, auto row_perm,
+                      auto col_perm, auto permuted) {
+            permuted(row, col) = orig(row_perm[row], col_perm[col]);
+        },
+        orig->get_size(), orig, row_permutation_indices,
+        column_permutation_indices, permuted);
+}
+
+
+template <typename ValueType, typename IndexType>
+void inv_nonsymm_permute(std::shared_ptr<const DefaultExecutor> exec,
+                         const IndexType* row_permutation_indices,
+                         const IndexType* column_permutation_indices,
+                         const matrix::Dense<ValueType>* orig,
+                         matrix::Dense<ValueType>* permuted)
+{
+    run_kernel(
+        exec,
+        [] GKO_KERNEL(auto row, auto col, auto orig, auto row_perm,
+                      auto col_perm, auto permuted) {
+            permuted(row_perm[row], col_perm[col]) = orig(row, col);
+        },
+        orig->get_size(), orig, row_permutation_indices,
+        column_permutation_indices, permuted);
 }
 
 
 template <typename ValueType, typename OutputType, typename IndexType>
 void row_gather(std::shared_ptr<const DefaultExecutor> exec,
-                const array<IndexType>* row_idxs,
-                const matrix::Dense<ValueType>* orig,
+                const IndexType* row_idxs, const matrix::Dense<ValueType>* orig,
                 matrix::Dense<OutputType>* row_collection)
 {
     run_kernel(
@@ -422,15 +457,14 @@ void row_gather(std::shared_ptr<const DefaultExecutor> exec,
         [] GKO_KERNEL(auto row, auto col, auto orig, auto rows, auto gathered) {
             gathered(row, col) = orig(rows[row], col);
         },
-        dim<2>{row_idxs->get_num_elems(), orig->get_size()[1]}, orig, *row_idxs,
-        row_collection);
+        row_collection->get_size(), orig, row_idxs, row_collection);
 }
 
 
 template <typename ValueType, typename OutputType, typename IndexType>
 void advanced_row_gather(std::shared_ptr<const DefaultExecutor> exec,
                          const matrix::Dense<ValueType>* alpha,
-                         const array<IndexType>* row_idxs,
+                         const IndexType* row_idxs,
                          const matrix::Dense<ValueType>* orig,
                          const matrix::Dense<ValueType>* beta,
                          matrix::Dense<OutputType>* row_collection)
@@ -445,54 +479,191 @@ void advanced_row_gather(std::shared_ptr<const DefaultExecutor> exec,
                 static_cast<type>(beta[0]) *
                     static_cast<type>(gathered(row, col));
         },
-        dim<2>{row_idxs->get_num_elems(), orig->get_size()[1]},
-        alpha->get_const_values(), orig, *row_idxs, beta->get_const_values(),
-        row_collection);
+        row_collection->get_size(), alpha->get_const_values(), orig, row_idxs,
+        beta->get_const_values(), row_collection);
 }
 
 
 template <typename ValueType, typename IndexType>
-void column_permute(std::shared_ptr<const DefaultExecutor> exec,
-                    const array<IndexType>* permutation_indices,
-                    const matrix::Dense<ValueType>* orig,
-                    matrix::Dense<ValueType>* column_permuted)
+void col_permute(std::shared_ptr<const DefaultExecutor> exec,
+                 const IndexType* permutation_indices,
+                 const matrix::Dense<ValueType>* orig,
+                 matrix::Dense<ValueType>* col_permuted)
 {
     run_kernel(
         exec,
         [] GKO_KERNEL(auto row, auto col, auto orig, auto perm, auto permuted) {
             permuted(row, col) = orig(row, perm[col]);
         },
-        orig->get_size(), orig, *permutation_indices, column_permuted);
+        orig->get_size(), orig, permutation_indices, col_permuted);
 }
 
 
 template <typename ValueType, typename IndexType>
-void inverse_row_permute(std::shared_ptr<const DefaultExecutor> exec,
-                         const array<IndexType>* permutation_indices,
-                         const matrix::Dense<ValueType>* orig,
-                         matrix::Dense<ValueType>* row_permuted)
+void inv_row_permute(std::shared_ptr<const DefaultExecutor> exec,
+                     const IndexType* permutation_indices,
+                     const matrix::Dense<ValueType>* orig,
+                     matrix::Dense<ValueType>* row_permuted)
 {
     run_kernel(
         exec,
         [] GKO_KERNEL(auto row, auto col, auto orig, auto perm, auto permuted) {
             permuted(perm[row], col) = orig(row, col);
         },
-        orig->get_size(), orig, *permutation_indices, row_permuted);
+        orig->get_size(), orig, permutation_indices, row_permuted);
 }
 
 
 template <typename ValueType, typename IndexType>
-void inverse_column_permute(std::shared_ptr<const DefaultExecutor> exec,
-                            const array<IndexType>* permutation_indices,
-                            const matrix::Dense<ValueType>* orig,
-                            matrix::Dense<ValueType>* column_permuted)
+void inv_col_permute(std::shared_ptr<const DefaultExecutor> exec,
+                     const IndexType* permutation_indices,
+                     const matrix::Dense<ValueType>* orig,
+                     matrix::Dense<ValueType>* col_permuted)
 {
     run_kernel(
         exec,
         [] GKO_KERNEL(auto row, auto col, auto orig, auto perm, auto permuted) {
             permuted(row, perm[col]) = orig(row, col);
         },
-        orig->get_size(), orig, *permutation_indices, column_permuted);
+        orig->get_size(), orig, permutation_indices, col_permuted);
+}
+
+
+template <typename ValueType, typename IndexType>
+void symm_scale_permute(std::shared_ptr<const DefaultExecutor> exec,
+                        const ValueType* scale, const IndexType* perm,
+                        const matrix::Dense<ValueType>* orig,
+                        matrix::Dense<ValueType>* permuted)
+{
+    run_kernel(
+        exec,
+        [] GKO_KERNEL(auto i, auto j, auto scale, auto perm, auto orig,
+                      auto permuted) {
+            permuted(i, j) = scale[i] * scale[j] * orig(perm[i], perm[j]);
+        },
+        orig->get_size(), scale, perm, orig, permuted);
+}
+
+
+template <typename ValueType, typename IndexType>
+void inv_symm_scale_permute(std::shared_ptr<const DefaultExecutor> exec,
+                            const ValueType* scale, const IndexType* perm,
+                            const matrix::Dense<ValueType>* orig,
+                            matrix::Dense<ValueType>* permuted)
+{
+    run_kernel(
+        exec,
+        [] GKO_KERNEL(auto i, auto j, auto scale, auto perm, auto orig,
+                      auto permuted) {
+            permuted(perm[i], perm[j]) = orig(i, j) / (scale[i] * scale[j]);
+        },
+        orig->get_size(), scale, perm, orig, permuted);
+}
+
+
+template <typename ValueType, typename IndexType>
+void nonsymm_scale_permute(std::shared_ptr<const DefaultExecutor> exec,
+                           const ValueType* row_scale,
+                           const IndexType* row_perm,
+                           const ValueType* col_scale,
+                           const IndexType* col_perm,
+                           const matrix::Dense<ValueType>* orig,
+                           matrix::Dense<ValueType>* permuted)
+{
+    run_kernel(
+        exec,
+        [] GKO_KERNEL(auto i, auto j, auto row_scale, auto row_perm,
+                      auto col_scale, auto col_perm, auto orig, auto permuted) {
+            permuted(i, j) =
+                row_scale[i] * col_scale[j] * orig(row_perm[i], col_perm[j]);
+        },
+        orig->get_size(), row_scale, row_perm, col_scale, col_perm, orig,
+        permuted);
+}
+
+
+template <typename ValueType, typename IndexType>
+void inv_nonsymm_scale_permute(std::shared_ptr<const DefaultExecutor> exec,
+                               const ValueType* row_scale,
+                               const IndexType* row_perm,
+                               const ValueType* col_scale,
+                               const IndexType* col_perm,
+                               const matrix::Dense<ValueType>* orig,
+                               matrix::Dense<ValueType>* permuted)
+{
+    run_kernel(
+        exec,
+        [] GKO_KERNEL(auto i, auto j, auto row_scale, auto row_perm,
+                      auto col_scale, auto col_perm, auto orig, auto permuted) {
+            permuted(row_perm[i], row_perm[j]) =
+                orig(i, j) / (row_scale[i] * col_scale[j]);
+        },
+        orig->get_size(), row_scale, row_perm, col_scale, col_perm, orig,
+        permuted);
+}
+
+
+template <typename ValueType, typename IndexType>
+void row_scale_permute(std::shared_ptr<const DefaultExecutor> exec,
+                       const ValueType* scale, const IndexType* perm,
+                       const matrix::Dense<ValueType>* orig,
+                       matrix::Dense<ValueType>* permuted)
+{
+    run_kernel(
+        exec,
+        [] GKO_KERNEL(auto i, auto j, auto scale, auto perm, auto orig,
+                      auto permuted) {
+            permuted(i, j) = scale[i] * orig(perm[i], j);
+        },
+        orig->get_size(), scale, perm, orig, permuted);
+}
+
+
+template <typename ValueType, typename IndexType>
+void inv_row_scale_permute(std::shared_ptr<const DefaultExecutor> exec,
+                           const ValueType* scale, const IndexType* perm,
+                           const matrix::Dense<ValueType>* orig,
+                           matrix::Dense<ValueType>* permuted)
+{
+    run_kernel(
+        exec,
+        [] GKO_KERNEL(auto i, auto j, auto scale, auto perm, auto orig,
+                      auto permuted) {
+            permuted(perm[i], j) = orig(i, j) / scale[i];
+        },
+        orig->get_size(), scale, perm, orig, permuted);
+}
+
+
+template <typename ValueType, typename IndexType>
+void col_scale_permute(std::shared_ptr<const DefaultExecutor> exec,
+                       const ValueType* scale, const IndexType* perm,
+                       const matrix::Dense<ValueType>* orig,
+                       matrix::Dense<ValueType>* permuted)
+{
+    run_kernel(
+        exec,
+        [] GKO_KERNEL(auto i, auto j, auto scale, auto perm, auto orig,
+                      auto permuted) {
+            permuted(i, j) = scale[j] * orig(i, perm[j]);
+        },
+        orig->get_size(), scale, perm, orig, permuted);
+}
+
+
+template <typename ValueType, typename IndexType>
+void inv_col_scale_permute(std::shared_ptr<const DefaultExecutor> exec,
+                           const ValueType* scale, const IndexType* perm,
+                           const matrix::Dense<ValueType>* orig,
+                           matrix::Dense<ValueType>* permuted)
+{
+    run_kernel(
+        exec,
+        [] GKO_KERNEL(auto i, auto j, auto scale, auto perm, auto orig,
+                      auto permuted) {
+            permuted(i, perm[j]) = orig(i, j) / scale[j];
+        },
+        orig->get_size(), scale, perm, orig, permuted);
 }
 
 
diff --git a/common/unified/matrix/permutation_kernels.cpp b/common/unified/matrix/permutation_kernels.cpp
new file mode 100644
index 00000000000..58b82c1602e
--- /dev/null
+++ b/common/unified/matrix/permutation_kernels.cpp
@@ -0,0 +1,67 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/permutation_kernels.hpp"
+
+
+#include <ginkgo/core/base/math.hpp>
+
+
+#include "common/unified/base/kernel_launch.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace GKO_DEVICE_NAMESPACE {
+namespace permutation {
+
+
+template <typename IndexType>
+void invert(std::shared_ptr<const DefaultExecutor> exec,
+            const IndexType* permutation_indices, size_type size,
+            IndexType* inv_permutation)
+{
+    run_kernel(
+        exec,
+        [] GKO_KERNEL(auto i, auto permutation, auto inv_permutation) {
+            inv_permutation[permutation[i]] = i;
+        },
+        size, permutation_indices, inv_permutation);
+}
+
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PERMUTATION_INVERT_KERNEL);
+
+
+}  // namespace permutation
+}  // namespace GKO_DEVICE_NAMESPACE
+}  // namespace kernels
+}  // namespace gko
diff --git a/common/unified/matrix/scaled_permutation_kernels.cpp b/common/unified/matrix/scaled_permutation_kernels.cpp
new file mode 100644
index 00000000000..7bebe4c4778
--- /dev/null
+++ b/common/unified/matrix/scaled_permutation_kernels.cpp
@@ -0,0 +1,72 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/scaled_permutation_kernels.hpp"
+
+
+#include <ginkgo/core/base/math.hpp>
+
+
+#include "common/unified/base/kernel_launch.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace GKO_DEVICE_NAMESPACE {
+namespace scaled_permutation {
+
+
+template <typename ValueType, typename IndexType>
+void invert(std::shared_ptr<const DefaultExecutor> exec,
+            const IndexType* input_permutation, const ValueType* input_scale,
+            size_type size, IndexType* output_permutation,
+            ValueType* output_scale)
+{
+    run_kernel(
+        exec,
+        [] GKO_KERNEL(auto i, auto input_permutation, auto input_scale,
+                      auto output_permutation, auto output_scale) {
+            output_permutation[input_permutation[i]] = i;
+            output_scale[input_permutation[i]] =
+                one(input_scale[i]) / input_scale[i];
+        },
+        size, input_permutation, input_scale, output_permutation, output_scale);
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_SCALED_PERMUTATION_INVERT_KERNEL);
+
+
+}  // namespace scaled_permutation
+}  // namespace GKO_DEVICE_NAMESPACE
+}  // namespace kernels
+}  // namespace gko
diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
index ae8035bcbf9..014a94c0369 100644
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -51,9 +51,10 @@ target_sources(ginkgo
     matrix/hybrid.cpp
     matrix/identity.cpp
     matrix/permutation.cpp
+    matrix/row_gatherer.cpp
+    matrix/scaled_permutation.cpp
     matrix/sellp.cpp
     matrix/sparsity_csr.cpp
-    matrix/row_gatherer.cpp
     multigrid/pgm.cpp
     multigrid/fixed_coarsening.cpp
     preconditioner/isai.cpp
diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp
index 7f7b1b473a2..3f5d097abac 100644
--- a/core/device_hooks/common_kernels.inc.cpp
+++ b/core/device_hooks/common_kernels.inc.cpp
@@ -67,6 +67,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/matrix/fbcsr_kernels.hpp"
 #include "core/matrix/fft_kernels.hpp"
 #include "core/matrix/hybrid_kernels.hpp"
+#include "core/matrix/permutation_kernels.hpp"
+#include "core/matrix/scaled_permutation_kernels.hpp"
 #include "core/matrix/sellp_kernels.hpp"
 #include "core/matrix/sparsity_csr_kernels.hpp"
 #include "core/multigrid/pgm_kernels.hpp"
@@ -372,9 +374,20 @@ GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_INV_SYMM_PERMUTE_KERNEL);
 GKO_STUB_MIXED_VALUE_AND_INDEX_TYPE_2(GKO_DECLARE_DENSE_ROW_GATHER_KERNEL);
 GKO_STUB_MIXED_VALUE_AND_INDEX_TYPE_2(
     GKO_DECLARE_DENSE_ADVANCED_ROW_GATHER_KERNEL);
-GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_COLUMN_PERMUTE_KERNEL);
+GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_COL_PERMUTE_KERNEL);
 GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_INV_ROW_PERMUTE_KERNEL);
-GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_INV_COLUMN_PERMUTE_KERNEL);
+GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_INV_COL_PERMUTE_KERNEL);
+GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_NONSYMM_PERMUTE_KERNEL);
+GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_INV_NONSYMM_PERMUTE_KERNEL);
+GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_SYMM_SCALE_PERMUTE_KERNEL);
+GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_INV_SYMM_SCALE_PERMUTE_KERNEL);
+GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_ROW_SCALE_PERMUTE_KERNEL);
+GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_COL_SCALE_PERMUTE_KERNEL);
+GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_INV_ROW_SCALE_PERMUTE_KERNEL);
+GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_INV_COL_SCALE_PERMUTE_KERNEL);
+GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_NONSYMM_SCALE_PERMUTE_KERNEL);
+GKO_STUB_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_DENSE_INV_NONSYMM_SCALE_PERMUTE_KERNEL);
 GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_EXTRACT_DIAGONAL_KERNEL);
 GKO_STUB_VALUE_TYPE(GKO_DECLARE_INPLACE_ABSOLUTE_DENSE_KERNEL);
 GKO_STUB_VALUE_TYPE(GKO_DECLARE_OUTPLACE_ABSOLUTE_DENSE_KERNEL);
@@ -591,11 +604,16 @@ GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_CONVERT_TO_HYBRID_KERNEL);
 GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_CONVERT_TO_SELLP_KERNEL);
 GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_TRANSPOSE_KERNEL);
 GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_CONJ_TRANSPOSE_KERNEL);
+GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_INV_NONSYMM_PERMUTE_KERNEL);
 GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL);
 GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL);
-GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_INVERSE_COLUMN_PERMUTE_KERNEL);
-GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL);
-GKO_STUB_INDEX_TYPE(GKO_DECLARE_INVERT_PERMUTATION_KERNEL);
+GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_INV_COL_PERMUTE_KERNEL);
+GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_INV_ROW_PERMUTE_KERNEL);
+GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_INV_NONSYMM_SCALE_PERMUTE_KERNEL);
+GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_INV_SYMM_SCALE_PERMUTE_KERNEL);
+GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_ROW_SCALE_PERMUTE_KERNEL);
+GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_INV_COL_SCALE_PERMUTE_KERNEL);
+GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_INV_ROW_SCALE_PERMUTE_KERNEL);
 GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SORT_BY_COLUMN_INDEX);
 GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_IS_SORTED_BY_COLUMN_INDEX);
 GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_EXTRACT_DIAGONAL);
@@ -708,6 +726,24 @@ GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_HYBRID_CONVERT_TO_CSR_KERNEL);
 }  // namespace hybrid
 
 
+namespace permutation {
+
+
+GKO_STUB_INDEX_TYPE(GKO_DECLARE_PERMUTATION_INVERT_KERNEL);
+
+
+}  // namespace permutation
+
+
+namespace scaled_permutation {
+
+
+GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SCALED_PERMUTATION_INVERT_KERNEL);
+
+
+}  // namespace scaled_permutation
+
+
 namespace sellp {
 
 
diff --git a/core/matrix/csr.cpp b/core/matrix/csr.cpp
index 9a4697c1195..e669f4d4718 100644
--- a/core/matrix/csr.cpp
+++ b/core/matrix/csr.cpp
@@ -45,6 +45,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/matrix/ell.hpp>
 #include <ginkgo/core/matrix/fbcsr.hpp>
 #include <ginkgo/core/matrix/identity.hpp>
+#include <ginkgo/core/matrix/scaled_permutation.hpp>
 #include <ginkgo/core/matrix/sellp.hpp>
 #include <ginkgo/core/matrix/sparsity_csr.hpp>
 
@@ -93,9 +94,15 @@ GKO_REGISTER_OPERATION(transpose, csr::transpose);
 GKO_REGISTER_OPERATION(conj_transpose, csr::conj_transpose);
 GKO_REGISTER_OPERATION(inv_symm_permute, csr::inv_symm_permute);
 GKO_REGISTER_OPERATION(row_permute, csr::row_permute);
-GKO_REGISTER_OPERATION(inverse_row_permute, csr::inverse_row_permute);
-GKO_REGISTER_OPERATION(inverse_column_permute, csr::inverse_column_permute);
-GKO_REGISTER_OPERATION(invert_permutation, csr::invert_permutation);
+GKO_REGISTER_OPERATION(inv_row_permute, csr::inv_row_permute);
+GKO_REGISTER_OPERATION(inv_col_permute, csr::inv_col_permute);
+GKO_REGISTER_OPERATION(inv_nonsymm_permute, csr::inv_nonsymm_permute);
+GKO_REGISTER_OPERATION(inv_symm_scale_permute, csr::inv_symm_scale_permute);
+GKO_REGISTER_OPERATION(row_scale_permute, csr::row_scale_permute);
+GKO_REGISTER_OPERATION(inv_row_scale_permute, csr::inv_row_scale_permute);
+GKO_REGISTER_OPERATION(inv_col_scale_permute, csr::inv_col_scale_permute);
+GKO_REGISTER_OPERATION(inv_nonsymm_scale_permute,
+                       csr::inv_nonsymm_scale_permute);
 GKO_REGISTER_OPERATION(convert_ptrs_to_sizes,
                        components::convert_ptrs_to_sizes);
 GKO_REGISTER_OPERATION(sort_by_column_index, csr::sort_by_column_index);
@@ -520,26 +527,226 @@ std::unique_ptr<LinOp> Csr<ValueType, IndexType>::conj_transpose() const
 }
 
 
+template <typename ValueType, typename IndexType>
+std::unique_ptr<Csr<ValueType, IndexType>> Csr<ValueType, IndexType>::permute(
+    ptr_param<const Permutation<IndexType>> permutation,
+    permute_mode mode) const
+{
+    const auto exec = this->get_executor();
+    const auto size = this->get_size();
+    const auto nnz = this->get_num_stored_elements();
+    if ((mode & permute_mode::symmetric) == permute_mode::none) {
+        return this->clone();
+    }
+    if ((mode & permute_mode::symmetric) == permute_mode::symmetric) {
+        GKO_ASSERT_IS_SQUARE_MATRIX(this);
+    }
+    if ((mode & permute_mode::rows) == permute_mode::rows) {
+        GKO_ASSERT_EQ(size[0], permutation->get_size()[0]);
+    }
+    if ((mode & permute_mode::columns) == permute_mode::columns) {
+        GKO_ASSERT_EQ(size[1], permutation->get_size()[0]);
+    }
+    auto result = Csr::create(exec, size, nnz, this->get_strategy()->copy());
+    auto local_permutation = make_temporary_clone(exec, permutation);
+    std::unique_ptr<const Permutation<IndexType>> inv_permutation;
+    const auto perm_idxs = local_permutation->get_const_permutation();
+    const IndexType* inv_perm_idxs{};
+    // to permute columns, we need to know the inverse permutation
+    bool needs_inverse =
+        (mode & permute_mode::inverse_columns) == permute_mode::columns;
+    if (needs_inverse) {
+        inv_permutation = local_permutation->invert();
+        inv_perm_idxs = inv_permutation->get_const_permutation();
+    }
+    switch (mode) {
+    case permute_mode::rows:
+        exec->run(csr::make_row_permute(perm_idxs, this, result.get()));
+        break;
+    case permute_mode::columns:
+        exec->run(csr::make_inv_col_permute(inv_perm_idxs, this, result.get()));
+        break;
+    case permute_mode::inverse_rows:
+        exec->run(csr::make_inv_row_permute(perm_idxs, this, result.get()));
+        break;
+    case permute_mode::inverse_columns:
+        exec->run(csr::make_inv_col_permute(perm_idxs, this, result.get()));
+        break;
+    case permute_mode::symmetric:
+        exec->run(
+            csr::make_inv_symm_permute(inv_perm_idxs, this, result.get()));
+        break;
+    case permute_mode::inverse_symmetric:
+        exec->run(csr::make_inv_symm_permute(perm_idxs, this, result.get()));
+        break;
+    default:
+        GKO_ASSERT(false);
+    }
+    result->make_srow();
+    if ((mode & permute_mode::columns) == permute_mode::columns) {
+        result->sort_by_column_index();
+    }
+    return result;
+}
+
+
+template <typename ValueType, typename IndexType>
+std::unique_ptr<Csr<ValueType, IndexType>> Csr<ValueType, IndexType>::permute(
+    ptr_param<const Permutation<IndexType>> row_permutation,
+    ptr_param<const Permutation<IndexType>> col_permutation, bool invert) const
+{
+    const auto exec = this->get_executor();
+    const auto size = this->get_size();
+    const auto nnz = this->get_num_stored_elements();
+    GKO_ASSERT_EQ(size[0], row_permutation->get_size()[0]);
+    GKO_ASSERT_EQ(size[1], col_permutation->get_size()[0]);
+    auto result = Csr::create(exec, size, nnz, this->get_strategy()->copy());
+    auto local_row_permutation = make_temporary_clone(exec, row_permutation);
+    auto local_col_permutation = make_temporary_clone(exec, col_permutation);
+    if (invert) {
+        exec->run(csr::make_inv_nonsymm_permute(
+            local_row_permutation->get_const_permutation(),
+            local_col_permutation->get_const_permutation(), this,
+            result.get()));
+    } else {
+        const auto inv_row_perm = local_row_permutation->invert();
+        const auto inv_col_perm = local_col_permutation->invert();
+        exec->run(csr::make_inv_nonsymm_permute(
+            inv_row_perm->get_const_permutation(),
+            inv_col_perm->get_const_permutation(), this, result.get()));
+    }
+    result->make_srow();
+    result->sort_by_column_index();
+    return result;
+}
+
+
+template <typename ValueType, typename IndexType>
+std::unique_ptr<Csr<ValueType, IndexType>>
+Csr<ValueType, IndexType>::scale_permute(
+    ptr_param<const ScaledPermutation<ValueType, IndexType>> permutation,
+    permute_mode mode) const
+{
+    const auto exec = this->get_executor();
+    const auto size = this->get_size();
+    const auto nnz = this->get_num_stored_elements();
+    if ((mode & permute_mode::symmetric) == permute_mode::none) {
+        return this->clone();
+    }
+    if ((mode & permute_mode::symmetric) == permute_mode::symmetric) {
+        GKO_ASSERT_IS_SQUARE_MATRIX(this);
+    }
+    if ((mode & permute_mode::rows) == permute_mode::rows) {
+        GKO_ASSERT_EQ(size[0], permutation->get_size()[0]);
+    }
+    if ((mode & permute_mode::columns) == permute_mode::columns) {
+        GKO_ASSERT_EQ(size[1], permutation->get_size()[0]);
+    }
+    auto result = Csr::create(exec, size, nnz, this->get_strategy()->copy());
+    auto local_permutation = make_temporary_clone(exec, permutation);
+    std::unique_ptr<const ScaledPermutation<ValueType, IndexType>>
+        inv_permutation;
+    const auto perm_idxs = local_permutation->get_const_permutation();
+    const auto scale_factors = local_permutation->get_const_scale();
+    const ValueType* inv_scale_factors{};
+    const IndexType* inv_perm_idxs{};
+    // to permute columns, we need to know the inverse permutation
+    bool needs_inverse =
+        (mode & permute_mode::inverse_columns) == permute_mode::columns;
+    if (needs_inverse) {
+        inv_permutation = local_permutation->invert();
+        inv_scale_factors = inv_permutation->get_const_scale();
+        inv_perm_idxs = inv_permutation->get_const_permutation();
+    }
+    switch (mode) {
+    case permute_mode::rows:
+        exec->run(csr::make_row_scale_permute(scale_factors, perm_idxs, this,
+                                              result.get()));
+        break;
+    case permute_mode::columns:
+        exec->run(csr::make_inv_col_scale_permute(
+            inv_scale_factors, inv_perm_idxs, this, result.get()));
+        break;
+    case permute_mode::inverse_rows:
+        exec->run(csr::make_inv_row_scale_permute(scale_factors, perm_idxs,
+                                                  this, result.get()));
+        break;
+    case permute_mode::inverse_columns:
+        exec->run(csr::make_inv_col_scale_permute(scale_factors, perm_idxs,
+                                                  this, result.get()));
+        break;
+    case permute_mode::symmetric:
+        exec->run(csr::make_inv_symm_scale_permute(
+            inv_scale_factors, inv_perm_idxs, this, result.get()));
+        break;
+    case permute_mode::inverse_symmetric:
+        exec->run(csr::make_inv_symm_scale_permute(scale_factors, perm_idxs,
+                                                   this, result.get()));
+        break;
+    default:
+        GKO_ASSERT(false);
+    }
+    result->make_srow();
+    if ((mode & permute_mode::columns) == permute_mode::columns) {
+        result->sort_by_column_index();
+    }
+    return result;
+}
+
+
+template <typename ValueType, typename IndexType>
+std::unique_ptr<Csr<ValueType, IndexType>>
+Csr<ValueType, IndexType>::scale_permute(
+    ptr_param<const ScaledPermutation<ValueType, IndexType>> row_permutation,
+    ptr_param<const ScaledPermutation<ValueType, IndexType>> col_permutation,
+    bool invert) const
+{
+    const auto exec = this->get_executor();
+    const auto size = this->get_size();
+    const auto nnz = this->get_num_stored_elements();
+    GKO_ASSERT_EQ(size[0], row_permutation->get_size()[0]);
+    GKO_ASSERT_EQ(size[1], col_permutation->get_size()[0]);
+    auto result = Csr::create(exec, size, nnz, this->get_strategy()->copy());
+    auto local_row_permutation = make_temporary_clone(exec, row_permutation);
+    auto local_col_permutation = make_temporary_clone(exec, col_permutation);
+    if (invert) {
+        exec->run(csr::make_inv_nonsymm_scale_permute(
+            local_row_permutation->get_const_scale(),
+            local_row_permutation->get_const_permutation(),
+            local_col_permutation->get_const_scale(),
+            local_col_permutation->get_const_permutation(), this,
+            result.get()));
+    } else {
+        const auto inv_row_perm = local_row_permutation->invert();
+        const auto inv_col_perm = local_col_permutation->invert();
+        exec->run(csr::make_inv_nonsymm_scale_permute(
+            inv_row_perm->get_const_scale(),
+            inv_row_perm->get_const_permutation(),
+            inv_col_perm->get_const_scale(),
+            inv_col_perm->get_const_permutation(), this, result.get()));
+    }
+    result->make_srow();
+    result->sort_by_column_index();
+    return result;
+}
+
+
+template <typename IndexType>
+std::unique_ptr<const Permutation<IndexType>> create_permutation_view(
+    const array<IndexType>& indices)
+{
+    return Permutation<IndexType>::create_const(indices.get_executor(),
+                                                indices.get_num_elems(),
+                                                indices.as_const_view());
+}
+
+
 template <typename ValueType, typename IndexType>
 std::unique_ptr<LinOp> Csr<ValueType, IndexType>::permute(
     const array<IndexType>* permutation_indices) const
 {
-    GKO_ASSERT_IS_SQUARE_MATRIX(this);
-    GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]);
-    auto exec = this->get_executor();
-    auto permute_cpy =
-        Csr::create(exec, this->get_size(), this->get_num_stored_elements(),
-                    this->get_strategy());
-    array<IndexType> inv_permutation(exec, this->get_size()[1]);
-
-    exec->run(csr::make_invert_permutation(
-        this->get_size()[1],
-        make_temporary_clone(exec, permutation_indices)->get_const_data(),
-        inv_permutation.get_data()));
-    exec->run(csr::make_inv_symm_permute(inv_permutation.get_const_data(), this,
-                                         permute_cpy.get()));
-    permute_cpy->make_srow();
-    return std::move(permute_cpy);
+    return permute(create_permutation_view(*permutation_indices),
+                   permute_mode::symmetric);
 }
 
 
@@ -547,18 +754,8 @@ template <typename ValueType, typename IndexType>
 std::unique_ptr<LinOp> Csr<ValueType, IndexType>::inverse_permute(
     const array<IndexType>* permutation_indices) const
 {
-    GKO_ASSERT_IS_SQUARE_MATRIX(this);
-    GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]);
-    auto exec = this->get_executor();
-    auto permute_cpy =
-        Csr::create(exec, this->get_size(), this->get_num_stored_elements(),
-                    this->get_strategy());
-
-    exec->run(csr::make_inv_symm_permute(
-        make_temporary_clone(exec, permutation_indices)->get_const_data(), this,
-        permute_cpy.get()));
-    permute_cpy->make_srow();
-    return std::move(permute_cpy);
+    return permute(create_permutation_view(*permutation_indices),
+                   permute_mode::inverse_symmetric);
 }
 
 
@@ -566,17 +763,8 @@ template <typename ValueType, typename IndexType>
 std::unique_ptr<LinOp> Csr<ValueType, IndexType>::row_permute(
     const array<IndexType>* permutation_indices) const
 {
-    GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]);
-    auto exec = this->get_executor();
-    auto permute_cpy =
-        Csr::create(exec, this->get_size(), this->get_num_stored_elements(),
-                    this->get_strategy());
-
-    exec->run(csr::make_row_permute(
-        make_temporary_clone(exec, permutation_indices)->get_const_data(), this,
-        permute_cpy.get()));
-    permute_cpy->make_srow();
-    return std::move(permute_cpy);
+    return permute(create_permutation_view(*permutation_indices),
+                   permute_mode::rows);
 }
 
 
@@ -584,22 +772,8 @@ template <typename ValueType, typename IndexType>
 std::unique_ptr<LinOp> Csr<ValueType, IndexType>::column_permute(
     const array<IndexType>* permutation_indices) const
 {
-    GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[1]);
-    auto exec = this->get_executor();
-    auto permute_cpy =
-        Csr::create(exec, this->get_size(), this->get_num_stored_elements(),
-                    this->get_strategy());
-    array<IndexType> inv_permutation(exec, this->get_size()[1]);
-
-    exec->run(csr::make_invert_permutation(
-        this->get_size()[1],
-        make_temporary_clone(exec, permutation_indices)->get_const_data(),
-        inv_permutation.get_data()));
-    exec->run(csr::make_inverse_column_permute(inv_permutation.get_const_data(),
-                                               this, permute_cpy.get()));
-    permute_cpy->make_srow();
-    permute_cpy->sort_by_column_index();
-    return std::move(permute_cpy);
+    return permute(create_permutation_view(*permutation_indices),
+                   permute_mode::columns);
 }
 
 
@@ -607,17 +781,8 @@ template <typename ValueType, typename IndexType>
 std::unique_ptr<LinOp> Csr<ValueType, IndexType>::inverse_row_permute(
     const array<IndexType>* permutation_indices) const
 {
-    GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]);
-    auto exec = this->get_executor();
-    auto inverse_permute_cpy =
-        Csr::create(exec, this->get_size(), this->get_num_stored_elements(),
-                    this->get_strategy());
-
-    exec->run(csr::make_inverse_row_permute(
-        make_temporary_clone(exec, permutation_indices)->get_const_data(), this,
-        inverse_permute_cpy.get()));
-    inverse_permute_cpy->make_srow();
-    return std::move(inverse_permute_cpy);
+    return permute(create_permutation_view(*permutation_indices),
+                   permute_mode::inverse_rows);
 }
 
 
@@ -625,18 +790,8 @@ template <typename ValueType, typename IndexType>
 std::unique_ptr<LinOp> Csr<ValueType, IndexType>::inverse_column_permute(
     const array<IndexType>* permutation_indices) const
 {
-    GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[1]);
-    auto exec = this->get_executor();
-    auto inverse_permute_cpy =
-        Csr::create(exec, this->get_size(), this->get_num_stored_elements(),
-                    this->get_strategy());
-
-    exec->run(csr::make_inverse_column_permute(
-        make_temporary_clone(exec, permutation_indices)->get_const_data(), this,
-        inverse_permute_cpy.get()));
-    inverse_permute_cpy->make_srow();
-    inverse_permute_cpy->sort_by_column_index();
-    return std::move(inverse_permute_cpy);
+    return permute(create_permutation_view(*permutation_indices),
+                   permute_mode::inverse_columns);
 }
 
 
diff --git a/core/matrix/csr_kernels.hpp b/core/matrix/csr_kernels.hpp
index 42a92ca1b84..26d80f93b8b 100644
--- a/core/matrix/csr_kernels.hpp
+++ b/core/matrix/csr_kernels.hpp
@@ -146,23 +146,61 @@ namespace kernels {
                      const matrix::Csr<ValueType, IndexType>* orig, \
                      matrix::Csr<ValueType, IndexType>* row_permuted)
 
-#define GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL(ValueType, IndexType)    \
-    void inverse_row_permute(std::shared_ptr<const DefaultExecutor> exec,   \
-                             const IndexType* permutation_indices,          \
+#define GKO_DECLARE_CSR_INV_ROW_PERMUTE_KERNEL(ValueType, IndexType)    \
+    void inv_row_permute(std::shared_ptr<const DefaultExecutor> exec,   \
+                         const IndexType* permutation_indices,          \
+                         const matrix::Csr<ValueType, IndexType>* orig, \
+                         matrix::Csr<ValueType, IndexType>* row_permuted)
+
+#define GKO_DECLARE_CSR_INV_COL_PERMUTE_KERNEL(ValueType, IndexType)    \
+    void inv_col_permute(std::shared_ptr<const DefaultExecutor> exec,   \
+                         const IndexType* permutation_indices,          \
+                         const matrix::Csr<ValueType, IndexType>* orig, \
+                         matrix::Csr<ValueType, IndexType>* col_permuted)
+
+#define GKO_DECLARE_CSR_INV_NONSYMM_PERMUTE_KERNEL(ValueType, IndexType)    \
+    void inv_nonsymm_permute(std::shared_ptr<const DefaultExecutor> exec,   \
+                             const IndexType* row_permutation_indices,      \
+                             const IndexType* column_permutation_indices,   \
                              const matrix::Csr<ValueType, IndexType>* orig, \
-                             matrix::Csr<ValueType, IndexType>* row_permuted)
-
-#define GKO_DECLARE_CSR_INVERSE_COLUMN_PERMUTE_KERNEL(ValueType, IndexType) \
-    void inverse_column_permute(                                            \
-        std::shared_ptr<const DefaultExecutor> exec,                        \
-        const IndexType* permutation_indices,                               \
-        const matrix::Csr<ValueType, IndexType>* orig,                      \
-        matrix::Csr<ValueType, IndexType>* column_permuted)
-
-#define GKO_DECLARE_INVERT_PERMUTATION_KERNEL(IndexType)             \
-    void invert_permutation(                                         \
-        std::shared_ptr<const DefaultExecutor> exec, size_type size, \
-        const IndexType* permutation_indices, IndexType* inv_permutation)
+                             matrix::Csr<ValueType, IndexType>* permuted)
+
+#define GKO_DECLARE_CSR_INV_SYMM_SCALE_PERMUTE_KERNEL(ValueType, IndexType)    \
+    void inv_symm_scale_permute(std::shared_ptr<const DefaultExecutor> exec,   \
+                                const ValueType* scale,                        \
+                                const IndexType* permutation_indices,          \
+                                const matrix::Csr<ValueType, IndexType>* orig, \
+                                matrix::Csr<ValueType, IndexType>* permuted)
+
+#define GKO_DECLARE_CSR_ROW_SCALE_PERMUTE_KERNEL(ValueType, IndexType)    \
+    void row_scale_permute(std::shared_ptr<const DefaultExecutor> exec,   \
+                           const ValueType* scale,                        \
+                           const IndexType* permutation_indices,          \
+                           const matrix::Csr<ValueType, IndexType>* orig, \
+                           matrix::Csr<ValueType, IndexType>* row_permuted)
+
+#define GKO_DECLARE_CSR_INV_ROW_SCALE_PERMUTE_KERNEL(ValueType, IndexType)   \
+    void inv_row_scale_permute(                                              \
+        std::shared_ptr<const DefaultExecutor> exec, const ValueType* scale, \
+        const IndexType* permutation_indices,                                \
+        const matrix::Csr<ValueType, IndexType>* orig,                       \
+        matrix::Csr<ValueType, IndexType>* row_permuted)
+
+#define GKO_DECLARE_CSR_INV_COL_SCALE_PERMUTE_KERNEL(ValueType, IndexType)   \
+    void inv_col_scale_permute(                                              \
+        std::shared_ptr<const DefaultExecutor> exec, const ValueType* scale, \
+        const IndexType* permutation_indices,                                \
+        const matrix::Csr<ValueType, IndexType>* orig,                       \
+        matrix::Csr<ValueType, IndexType>* col_permuted)
+
+#define GKO_DECLARE_CSR_INV_NONSYMM_SCALE_PERMUTE_KERNEL(ValueType, IndexType) \
+    void inv_nonsymm_scale_permute(                                            \
+        std::shared_ptr<const DefaultExecutor> exec,                           \
+        const ValueType* row_scale, const IndexType* row_permutation_indices,  \
+        const ValueType* column_scale,                                         \
+        const IndexType* column_permutation_indices,                           \
+        const matrix::Csr<ValueType, IndexType>* orig,                         \
+        matrix::Csr<ValueType, IndexType>* col_permuted)
 
 #define GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL(ValueType, IndexType)  \
     void calculate_nonzeros_per_row_in_span(                                   \
@@ -251,74 +289,84 @@ namespace kernels {
                           IndexType sample_size, IndexType* result)
 
 
-#define GKO_DECLARE_ALL_AS_TEMPLATES                                       \
-    template <typename MatrixValueType, typename InputValueType,           \
-              typename OutputValueType, typename IndexType>                \
-    GKO_DECLARE_CSR_SPMV_KERNEL(MatrixValueType, InputValueType,           \
-                                OutputValueType, IndexType);               \
-    template <typename MatrixValueType, typename InputValueType,           \
-              typename OutputValueType, typename IndexType>                \
-    GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL(MatrixValueType, InputValueType,  \
-                                         OutputValueType, IndexType);      \
-    template <typename ValueType, typename IndexType>                      \
-    GKO_DECLARE_CSR_SPGEMM_KERNEL(ValueType, IndexType);                   \
-    template <typename ValueType, typename IndexType>                      \
-    GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL(ValueType, IndexType);          \
-    template <typename ValueType, typename IndexType>                      \
-    GKO_DECLARE_CSR_SPGEAM_KERNEL(ValueType, IndexType);                   \
-    template <typename ValueType, typename IndexType>                      \
-    GKO_DECLARE_CSR_FILL_IN_DENSE_KERNEL(ValueType, IndexType);            \
-    template <typename ValueType, typename IndexType>                      \
-    GKO_DECLARE_CSR_CONVERT_TO_SELLP_KERNEL(ValueType, IndexType);         \
-    template <typename ValueType, typename IndexType>                      \
-    GKO_DECLARE_CSR_CONVERT_TO_HYBRID_KERNEL(ValueType, IndexType);        \
-    template <typename ValueType, typename IndexType>                      \
-    GKO_DECLARE_CSR_CONVERT_TO_ELL_KERNEL(ValueType, IndexType);           \
-    template <typename ValueType, typename IndexType>                      \
-    GKO_DECLARE_CSR_CONVERT_TO_FBCSR_KERNEL(ValueType, IndexType);         \
-    template <typename ValueType, typename IndexType>                      \
-    GKO_DECLARE_CSR_TRANSPOSE_KERNEL(ValueType, IndexType);                \
-    template <typename ValueType, typename IndexType>                      \
-    GKO_DECLARE_CSR_CONJ_TRANSPOSE_KERNEL(ValueType, IndexType);           \
-    template <typename ValueType, typename IndexType>                      \
-    GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL(ValueType, IndexType);         \
-    template <typename ValueType, typename IndexType>                      \
-    GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL(ValueType, IndexType);              \
-    template <typename ValueType, typename IndexType>                      \
-    GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL(ValueType, IndexType);      \
-    template <typename ValueType, typename IndexType>                      \
-    GKO_DECLARE_CSR_INVERSE_COLUMN_PERMUTE_KERNEL(ValueType, IndexType);   \
-    template <typename IndexType>                                          \
-    GKO_DECLARE_INVERT_PERMUTATION_KERNEL(IndexType);                      \
-    template <typename ValueType, typename IndexType>                      \
-    GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL(ValueType, IndexType); \
-    template <typename ValueType, typename IndexType>                      \
-    GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL(ValueType, IndexType);       \
-    template <typename ValueType, typename IndexType>                      \
-    GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_INDEX_SET_KERNEL(ValueType,        \
-                                                         IndexType);       \
-    template <typename ValueType, typename IndexType>                      \
-    GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_FROM_INDEX_SET_KERNEL(ValueType,    \
-                                                             IndexType);   \
-    template <typename ValueType, typename IndexType>                      \
-    GKO_DECLARE_CSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType);            \
-    template <typename ValueType, typename IndexType>                      \
-    GKO_DECLARE_CSR_IS_SORTED_BY_COLUMN_INDEX(ValueType, IndexType);       \
-    template <typename ValueType, typename IndexType>                      \
-    GKO_DECLARE_CSR_EXTRACT_DIAGONAL(ValueType, IndexType);                \
-    template <typename ValueType, typename IndexType>                      \
-    GKO_DECLARE_CSR_SCALE_KERNEL(ValueType, IndexType);                    \
-    template <typename ValueType, typename IndexType>                      \
-    GKO_DECLARE_CSR_INV_SCALE_KERNEL(ValueType, IndexType);                \
-    template <typename ValueType, typename IndexType>                      \
-    GKO_DECLARE_CSR_CHECK_DIAGONAL_ENTRIES_EXIST(ValueType, IndexType);    \
-    template <typename ValueType, typename IndexType>                      \
-    GKO_DECLARE_CSR_ADD_SCALED_IDENTITY_KERNEL(ValueType, IndexType);      \
-    template <typename IndexType>                                          \
-    GKO_DECLARE_CSR_BUILD_LOOKUP_OFFSETS_KERNEL(IndexType);                \
-    template <typename IndexType>                                          \
-    GKO_DECLARE_CSR_BUILD_LOOKUP_KERNEL(IndexType);                        \
-    template <typename IndexType>                                          \
+#define GKO_DECLARE_ALL_AS_TEMPLATES                                        \
+    template <typename MatrixValueType, typename InputValueType,            \
+              typename OutputValueType, typename IndexType>                 \
+    GKO_DECLARE_CSR_SPMV_KERNEL(MatrixValueType, InputValueType,            \
+                                OutputValueType, IndexType);                \
+    template <typename MatrixValueType, typename InputValueType,            \
+              typename OutputValueType, typename IndexType>                 \
+    GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL(MatrixValueType, InputValueType,   \
+                                         OutputValueType, IndexType);       \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_SPGEMM_KERNEL(ValueType, IndexType);                    \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL(ValueType, IndexType);           \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_SPGEAM_KERNEL(ValueType, IndexType);                    \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_FILL_IN_DENSE_KERNEL(ValueType, IndexType);             \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_CONVERT_TO_SELLP_KERNEL(ValueType, IndexType);          \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_CONVERT_TO_HYBRID_KERNEL(ValueType, IndexType);         \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_CONVERT_TO_ELL_KERNEL(ValueType, IndexType);            \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_CONVERT_TO_FBCSR_KERNEL(ValueType, IndexType);          \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_TRANSPOSE_KERNEL(ValueType, IndexType);                 \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_CONJ_TRANSPOSE_KERNEL(ValueType, IndexType);            \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL(ValueType, IndexType);          \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL(ValueType, IndexType);               \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_INV_ROW_PERMUTE_KERNEL(ValueType, IndexType);           \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_INV_COL_PERMUTE_KERNEL(ValueType, IndexType);           \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_INV_NONSYMM_PERMUTE_KERNEL(ValueType, IndexType);       \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_INV_SYMM_SCALE_PERMUTE_KERNEL(ValueType, IndexType);    \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_ROW_SCALE_PERMUTE_KERNEL(ValueType, IndexType);         \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_INV_ROW_SCALE_PERMUTE_KERNEL(ValueType, IndexType);     \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_INV_COL_SCALE_PERMUTE_KERNEL(ValueType, IndexType);     \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_INV_NONSYMM_SCALE_PERMUTE_KERNEL(ValueType, IndexType); \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL(ValueType, IndexType);  \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL(ValueType, IndexType);        \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_INDEX_SET_KERNEL(ValueType,         \
+                                                         IndexType);        \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_FROM_INDEX_SET_KERNEL(ValueType,     \
+                                                             IndexType);    \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType);             \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_IS_SORTED_BY_COLUMN_INDEX(ValueType, IndexType);        \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_EXTRACT_DIAGONAL(ValueType, IndexType);                 \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_SCALE_KERNEL(ValueType, IndexType);                     \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_INV_SCALE_KERNEL(ValueType, IndexType);                 \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_CHECK_DIAGONAL_ENTRIES_EXIST(ValueType, IndexType);     \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_CSR_ADD_SCALED_IDENTITY_KERNEL(ValueType, IndexType);       \
+    template <typename IndexType>                                           \
+    GKO_DECLARE_CSR_BUILD_LOOKUP_OFFSETS_KERNEL(IndexType);                 \
+    template <typename IndexType>                                           \
+    GKO_DECLARE_CSR_BUILD_LOOKUP_KERNEL(IndexType);                         \
+    template <typename IndexType>                                           \
     GKO_DECLARE_CSR_BENCHMARK_LOOKUP_KERNEL(IndexType)
 
 
diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp
index 9f7dff96aab..b263357dc9b 100644
--- a/core/matrix/dense.cpp
+++ b/core/matrix/dense.cpp
@@ -50,6 +50,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/matrix/ell.hpp>
 #include <ginkgo/core/matrix/fbcsr.hpp>
 #include <ginkgo/core/matrix/hybrid.hpp>
+#include <ginkgo/core/matrix/permutation.hpp>
+#include <ginkgo/core/matrix/scaled_permutation.hpp>
 #include <ginkgo/core/matrix/sellp.hpp>
 #include <ginkgo/core/matrix/sparsity_csr.hpp>
 
@@ -58,6 +60,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/components/prefix_sum_kernels.hpp"
 #include "core/matrix/dense_kernels.hpp"
 #include "core/matrix/hybrid_kernels.hpp"
+#include "ginkgo/core/base/temporary_clone.hpp"
 
 
 namespace gko {
@@ -96,11 +99,22 @@ GKO_REGISTER_OPERATION(transpose, dense::transpose);
 GKO_REGISTER_OPERATION(conj_transpose, dense::conj_transpose);
 GKO_REGISTER_OPERATION(symm_permute, dense::symm_permute);
 GKO_REGISTER_OPERATION(inv_symm_permute, dense::inv_symm_permute);
+GKO_REGISTER_OPERATION(nonsymm_permute, dense::nonsymm_permute);
+GKO_REGISTER_OPERATION(inv_nonsymm_permute, dense::inv_nonsymm_permute);
 GKO_REGISTER_OPERATION(row_gather, dense::row_gather);
 GKO_REGISTER_OPERATION(advanced_row_gather, dense::advanced_row_gather);
-GKO_REGISTER_OPERATION(column_permute, dense::column_permute);
-GKO_REGISTER_OPERATION(inverse_row_permute, dense::inverse_row_permute);
-GKO_REGISTER_OPERATION(inverse_column_permute, dense::inverse_column_permute);
+GKO_REGISTER_OPERATION(col_permute, dense::col_permute);
+GKO_REGISTER_OPERATION(inverse_row_permute, dense::inv_row_permute);
+GKO_REGISTER_OPERATION(inverse_col_permute, dense::inv_col_permute);
+GKO_REGISTER_OPERATION(symm_scale_permute, dense::symm_scale_permute);
+GKO_REGISTER_OPERATION(inv_symm_scale_permute, dense::inv_symm_scale_permute);
+GKO_REGISTER_OPERATION(nonsymm_scale_permute, dense::nonsymm_scale_permute);
+GKO_REGISTER_OPERATION(inv_nonsymm_scale_permute,
+                       dense::inv_nonsymm_scale_permute);
+GKO_REGISTER_OPERATION(row_scale_permute, dense::row_scale_permute);
+GKO_REGISTER_OPERATION(col_scale_permute, dense::col_scale_permute);
+GKO_REGISTER_OPERATION(inv_row_scale_permute, dense::inv_row_scale_permute);
+GKO_REGISTER_OPERATION(inv_col_scale_permute, dense::inv_col_scale_permute);
 GKO_REGISTER_OPERATION(fill_in_matrix_data, dense::fill_in_matrix_data);
 GKO_REGISTER_OPERATION(convert_to_coo, dense::convert_to_coo);
 GKO_REGISTER_OPERATION(convert_to_csr, dense::convert_to_csr);
@@ -1113,48 +1127,174 @@ void Dense<ValueType>::conj_transpose(ptr_param<Dense<ValueType>> output) const
 
 template <typename ValueType>
 template <typename IndexType>
-void Dense<ValueType>::permute_impl(const array<IndexType>* permutation_indices,
-                                    Dense<ValueType>* output) const
+void Dense<ValueType>::permute_impl(const Permutation<IndexType>* permutation,
+                                    permute_mode mode, Dense* output) const
 {
-    GKO_ASSERT_IS_SQUARE_MATRIX(this);
+    const auto exec = this->get_executor();
+    const auto size = this->get_size();
     GKO_ASSERT_EQUAL_DIMENSIONS(this, output);
-    GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]);
-    auto exec = this->get_executor();
-
-    exec->run(dense::make_symm_permute(
-        make_temporary_clone(exec, permutation_indices).get(), this,
-        make_temporary_output_clone(exec, output).get()));
+    if ((mode & permute_mode::symmetric) == permute_mode::none) {
+        output->copy_from(this);
+        return;
+    }
+    if ((mode & permute_mode::symmetric) == permute_mode::symmetric) {
+        GKO_ASSERT_IS_SQUARE_MATRIX(this);
+    }
+    if ((mode & permute_mode::rows) == permute_mode::rows) {
+        GKO_ASSERT_EQ(size[0], permutation->get_size()[0]);
+    }
+    if ((mode & permute_mode::columns) == permute_mode::columns) {
+        GKO_ASSERT_EQ(size[1], permutation->get_size()[0]);
+    }
+    auto local_output = make_temporary_output_clone(exec, output);
+    auto local_perm = make_temporary_clone(exec, permutation);
+    switch (mode) {
+    case permute_mode::rows:
+        exec->run(dense::make_row_gather(local_perm->get_const_permutation(),
+                                         this, local_output.get()));
+        break;
+    case permute_mode::columns:
+        exec->run(dense::make_col_permute(local_perm->get_const_permutation(),
+                                          this, local_output.get()));
+        break;
+    case permute_mode::symmetric:
+        exec->run(dense::make_symm_permute(local_perm->get_const_permutation(),
+                                           this, local_output.get()));
+        break;
+    case permute_mode::inverse_rows:
+        exec->run(dense::make_inverse_row_permute(
+            local_perm->get_const_permutation(), this, local_output.get()));
+        break;
+    case permute_mode::inverse_columns:
+        exec->run(dense::make_inverse_col_permute(
+            local_perm->get_const_permutation(), this, local_output.get()));
+        break;
+    case permute_mode::inverse_symmetric:
+        exec->run(dense::make_inv_symm_permute(
+            local_perm->get_const_permutation(), this, local_output.get()));
+        break;
+    default:
+        GKO_ASSERT(false);  // cannot happen
+    }
 }
 
 
 template <typename ValueType>
 template <typename IndexType>
-void Dense<ValueType>::inverse_permute_impl(
-    const array<IndexType>* permutation_indices, Dense<ValueType>* output) const
+void Dense<ValueType>::permute_impl(
+    const Permutation<IndexType>* row_permutation,
+    const Permutation<IndexType>* col_permutation, bool invert,
+    Dense* output) const
 {
-    GKO_ASSERT_IS_SQUARE_MATRIX(this);
-    GKO_ASSERT_EQUAL_DIMENSIONS(this, output);
-    GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]);
     auto exec = this->get_executor();
-
-    exec->run(dense::make_inv_symm_permute(
-        make_temporary_clone(exec, permutation_indices).get(), this,
-        make_temporary_output_clone(exec, output).get()));
+    auto size = this->get_size();
+    GKO_ASSERT_EQUAL_DIMENSIONS(this, output);
+    GKO_ASSERT_EQ(size[0], row_permutation->get_size()[0]);
+    GKO_ASSERT_EQ(size[1], col_permutation->get_size()[0]);
+    auto local_output = make_temporary_output_clone(exec, output);
+    auto local_row_perm = make_temporary_clone(exec, row_permutation);
+    auto local_col_perm = make_temporary_clone(exec, col_permutation);
+    if (invert) {
+        exec->run(dense::make_inv_nonsymm_permute(
+            local_row_perm->get_const_permutation(),
+            local_col_perm->get_const_permutation(), this, local_output.get()));
+    } else {
+        exec->run(dense::make_nonsymm_permute(
+            local_row_perm->get_const_permutation(),
+            local_col_perm->get_const_permutation(), this, local_output.get()));
+    }
 }
 
 
 template <typename ValueType>
 template <typename IndexType>
-void Dense<ValueType>::row_permute_impl(
-    const array<IndexType>* permutation_indices, Dense<ValueType>* output) const
+void Dense<ValueType>::scale_permute_impl(
+    const ScaledPermutation<ValueType, IndexType>* permutation,
+    permute_mode mode, Dense* output) const
 {
-    GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]);
+    const auto exec = this->get_executor();
+    const auto size = this->get_size();
     GKO_ASSERT_EQUAL_DIMENSIONS(this, output);
-    auto exec = this->get_executor();
+    if ((mode & permute_mode::symmetric) == permute_mode::none) {
+        output->copy_from(this);
+        return;
+    }
+    if ((mode & permute_mode::symmetric) == permute_mode::symmetric) {
+        GKO_ASSERT_IS_SQUARE_MATRIX(this);
+    }
+    if ((mode & permute_mode::rows) == permute_mode::rows) {
+        GKO_ASSERT_EQ(size[0], permutation->get_size()[0]);
+    }
+    if ((mode & permute_mode::columns) == permute_mode::columns) {
+        GKO_ASSERT_EQ(size[1], permutation->get_size()[0]);
+    }
+    auto local_output = make_temporary_output_clone(exec, output);
+    auto local_perm = make_temporary_clone(exec, permutation);
+    switch (mode) {
+    case permute_mode::rows:
+        exec->run(dense::make_row_scale_permute(
+            local_perm->get_const_scale(), local_perm->get_const_permutation(),
+            this, local_output.get()));
+        break;
+    case permute_mode::columns:
+        exec->run(dense::make_col_scale_permute(
+            local_perm->get_const_scale(), local_perm->get_const_permutation(),
+            this, local_output.get()));
+        break;
+    case permute_mode::symmetric:
+        exec->run(dense::make_symm_scale_permute(
+            local_perm->get_const_scale(), local_perm->get_const_permutation(),
+            this, local_output.get()));
+        break;
+    case permute_mode::inverse_rows:
+        exec->run(dense::make_inv_row_scale_permute(
+            local_perm->get_const_scale(), local_perm->get_const_permutation(),
+            this, local_output.get()));
+        break;
+    case permute_mode::inverse_columns:
+        exec->run(dense::make_inv_col_scale_permute(
+            local_perm->get_const_scale(), local_perm->get_const_permutation(),
+            this, local_output.get()));
+        break;
+    case permute_mode::inverse_symmetric:
+        exec->run(dense::make_inv_symm_scale_permute(
+            local_perm->get_const_scale(), local_perm->get_const_permutation(),
+            this, local_output.get()));
+        break;
+    default:
+        GKO_ASSERT(false);  // cannot happen
+    }
+}
 
-    exec->run(dense::make_row_gather(
-        make_temporary_clone(exec, permutation_indices).get(), this,
-        make_temporary_output_clone(exec, output).get()));
+
+template <typename ValueType>
+template <typename IndexType>
+void Dense<ValueType>::scale_permute_impl(
+    const ScaledPermutation<ValueType, IndexType>* row_permutation,
+    const ScaledPermutation<ValueType, IndexType>* col_permutation, bool invert,
+    Dense* output) const
+{
+    auto exec = this->get_executor();
+    auto size = this->get_size();
+    GKO_ASSERT_EQUAL_DIMENSIONS(this, output);
+    GKO_ASSERT_EQ(size[0], row_permutation->get_size()[0]);
+    GKO_ASSERT_EQ(size[1], col_permutation->get_size()[0]);
+    auto local_output = make_temporary_output_clone(exec, output);
+    auto local_row_perm = make_temporary_clone(exec, row_permutation);
+    auto local_col_perm = make_temporary_clone(exec, col_permutation);
+    if (invert) {
+        exec->run(dense::make_inv_nonsymm_scale_permute(
+            local_row_perm->get_const_scale(),
+            local_row_perm->get_const_permutation(),
+            local_col_perm->get_const_scale(),
+            local_col_perm->get_const_permutation(), this, local_output.get()));
+    } else {
+        exec->run(dense::make_nonsymm_scale_permute(
+            local_row_perm->get_const_scale(),
+            local_row_perm->get_const_permutation(),
+            local_col_perm->get_const_scale(),
+            local_col_perm->get_const_permutation(), this, local_output.get()));
+    }
 }
 
 
@@ -1168,7 +1308,7 @@ void Dense<ValueType>::row_gather_impl(const array<IndexType>* row_idxs,
     GKO_ASSERT_EQUAL_DIMENSIONS(expected_dim, row_collection);
 
     exec->run(dense::make_row_gather(
-        make_temporary_clone(exec, row_idxs).get(), this,
+        make_temporary_clone(exec, row_idxs)->get_const_data(), this,
         make_temporary_output_clone(exec, row_collection).get()));
 }
 
@@ -1185,82 +1325,130 @@ void Dense<ValueType>::row_gather_impl(const Dense<ValueType>* alpha,
 
     exec->run(dense::make_advanced_row_gather(
         make_temporary_clone(exec, alpha).get(),
-        make_temporary_clone(exec, row_idxs).get(), this,
+        make_temporary_clone(exec, row_idxs)->get_const_data(), this,
         make_temporary_clone(exec, beta).get(),
         make_temporary_clone(exec, row_collection).get()));
 }
 
 
 template <typename ValueType>
-template <typename IndexType>
-void Dense<ValueType>::column_permute_impl(
-    const array<IndexType>* permutation_indices, Dense<ValueType>* output) const
+std::unique_ptr<LinOp> Dense<ValueType>::permute(
+    const array<int32>* permutation_indices) const
 {
-    GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[1]);
-    GKO_ASSERT_EQUAL_DIMENSIONS(this, output);
-    auto exec = this->get_executor();
-
-    exec->run(dense::make_column_permute(
-        make_temporary_clone(exec, permutation_indices).get(), this,
-        make_temporary_output_clone(exec, output).get()));
+    auto result = Dense::create(this->get_executor(), this->get_size());
+    this->permute(permutation_indices, result);
+    return result;
 }
 
 
 template <typename ValueType>
-template <typename IndexType>
-void Dense<ValueType>::inverse_row_permute_impl(
-    const array<IndexType>* permutation_indices, Dense<ValueType>* output) const
+std::unique_ptr<LinOp> Dense<ValueType>::permute(
+    const array<int64>* permutation_indices) const
 {
-    GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]);
-    GKO_ASSERT_EQUAL_DIMENSIONS(this, output);
-    auto exec = this->get_executor();
-
-    exec->run(dense::make_inverse_row_permute(
-        make_temporary_clone(exec, permutation_indices).get(), this,
-        make_temporary_output_clone(exec, output).get()));
+    auto result = Dense::create(this->get_executor(), this->get_size());
+    this->permute(permutation_indices, result);
+    return result;
 }
 
 
 template <typename ValueType>
-template <typename IndexType>
-void Dense<ValueType>::inverse_column_permute_impl(
-    const array<IndexType>* permutation_indices, Dense<ValueType>* output) const
+std::unique_ptr<Dense<ValueType>> Dense<ValueType>::permute(
+    ptr_param<const Permutation<int32>> permutation, permute_mode mode) const
 {
-    GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[1]);
-    GKO_ASSERT_EQUAL_DIMENSIONS(this, output);
-    auto exec = this->get_executor();
+    auto result = Dense::create(this->get_executor(), this->get_size());
+    this->permute(permutation, result, mode);
+    return result;
+}
+
 
-    exec->run(dense::make_inverse_column_permute(
-        make_temporary_clone(exec, permutation_indices).get(), this,
-        make_temporary_output_clone(exec, output).get()));
+template <typename ValueType>
+std::unique_ptr<Dense<ValueType>> Dense<ValueType>::permute(
+    ptr_param<const Permutation<int64>> permutation, permute_mode mode) const
+{
+    auto result = Dense::create(this->get_executor(), this->get_size());
+    this->permute(permutation, result, mode);
+    return result;
 }
 
 
 template <typename ValueType>
-std::unique_ptr<LinOp> Dense<ValueType>::permute(
-    const array<int32>* permutation_indices) const
+std::unique_ptr<Dense<ValueType>> Dense<ValueType>::permute(
+    ptr_param<const Permutation<int32>> row_permutation,
+    ptr_param<const Permutation<int32>> col_permutation, bool invert) const
 {
     auto result = Dense::create(this->get_executor(), this->get_size());
-    this->permute(permutation_indices, result);
+    this->permute(row_permutation, col_permutation, result, invert);
     return result;
 }
 
 
 template <typename ValueType>
-std::unique_ptr<LinOp> Dense<ValueType>::permute(
-    const array<int64>* permutation_indices) const
+std::unique_ptr<Dense<ValueType>> Dense<ValueType>::permute(
+    ptr_param<const Permutation<int64>> row_permutation,
+    ptr_param<const Permutation<int64>> col_permutation, bool invert) const
 {
     auto result = Dense::create(this->get_executor(), this->get_size());
-    this->permute(permutation_indices, result);
+    this->permute(row_permutation, col_permutation, result, invert);
     return result;
 }
 
 
+template <typename ValueType>
+void Dense<ValueType>::permute(ptr_param<const Permutation<int32>> permutation,
+                               ptr_param<Dense<ValueType>> result,
+                               permute_mode mode) const
+{
+    this->permute_impl(permutation.get(), mode, result.get());
+}
+
+
+template <typename ValueType>
+void Dense<ValueType>::permute(ptr_param<const Permutation<int64>> permutation,
+                               ptr_param<Dense<ValueType>> result,
+                               permute_mode mode) const
+{
+    this->permute_impl(permutation.get(), mode, result.get());
+}
+
+
+template <typename ValueType>
+void Dense<ValueType>::permute(
+    ptr_param<const Permutation<int32>> row_permutation,
+    ptr_param<const Permutation<int32>> col_permutation,
+    ptr_param<Dense<ValueType>> result, bool invert) const
+{
+    this->permute_impl(row_permutation.get(), col_permutation.get(), invert,
+                       result.get());
+}
+
+
+template <typename ValueType>
+void Dense<ValueType>::permute(
+    ptr_param<const Permutation<int64>> row_permutation,
+    ptr_param<const Permutation<int64>> col_permutation,
+    ptr_param<Dense<ValueType>> result, bool invert) const
+{
+    this->permute_impl(row_permutation.get(), col_permutation.get(), invert,
+                       result.get());
+}
+
+
+template <typename IndexType>
+std::unique_ptr<const Permutation<IndexType>> create_permutation_view(
+    const array<IndexType>& indices)
+{
+    return Permutation<IndexType>::create_const(indices.get_executor(),
+                                                indices.get_num_elems(),
+                                                indices.as_const_view());
+}
+
+
 template <typename ValueType>
 void Dense<ValueType>::permute(const array<int32>* permutation_indices,
                                ptr_param<Dense<ValueType>> output) const
 {
-    this->permute_impl(permutation_indices, output.get());
+    this->permute_impl(create_permutation_view(*permutation_indices).get(),
+                       permute_mode::symmetric, output.get());
 }
 
 
@@ -1268,7 +1456,8 @@ template <typename ValueType>
 void Dense<ValueType>::permute(const array<int64>* permutation_indices,
                                ptr_param<Dense<ValueType>> output) const
 {
-    this->permute_impl(permutation_indices, output.get());
+    this->permute_impl(create_permutation_view(*permutation_indices).get(),
+                       permute_mode::symmetric, output.get());
 }
 
 
@@ -1296,7 +1485,8 @@ template <typename ValueType>
 void Dense<ValueType>::inverse_permute(const array<int32>* permutation_indices,
                                        ptr_param<Dense<ValueType>> output) const
 {
-    this->inverse_permute_impl(permutation_indices, output.get());
+    this->permute_impl(create_permutation_view(*permutation_indices).get(),
+                       permute_mode::inverse_symmetric, output.get());
 }
 
 
@@ -1304,7 +1494,8 @@ template <typename ValueType>
 void Dense<ValueType>::inverse_permute(const array<int64>* permutation_indices,
                                        ptr_param<Dense<ValueType>> output) const
 {
-    this->inverse_permute_impl(permutation_indices, output.get());
+    this->permute_impl(create_permutation_view(*permutation_indices).get(),
+                       permute_mode::inverse_symmetric, output.get());
 }
 
 
@@ -1332,7 +1523,8 @@ template <typename ValueType>
 void Dense<ValueType>::row_permute(const array<int32>* permutation_indices,
                                    ptr_param<Dense<ValueType>> output) const
 {
-    this->row_permute_impl(permutation_indices, output.get());
+    this->permute_impl(create_permutation_view(*permutation_indices).get(),
+                       permute_mode::rows, output.get());
 }
 
 
@@ -1340,7 +1532,8 @@ template <typename ValueType>
 void Dense<ValueType>::row_permute(const array<int64>* permutation_indices,
                                    ptr_param<Dense<ValueType>> output) const
 {
-    this->row_permute_impl(permutation_indices, output.get());
+    this->permute_impl(create_permutation_view(*permutation_indices).get(),
+                       permute_mode::rows, output.get());
 }
 
 
@@ -1467,7 +1660,8 @@ template <typename ValueType>
 void Dense<ValueType>::column_permute(const array<int32>* permutation_indices,
                                       ptr_param<Dense<ValueType>> output) const
 {
-    this->column_permute_impl(permutation_indices, output.get());
+    this->permute_impl(create_permutation_view(*permutation_indices).get(),
+                       permute_mode::columns, output.get());
 }
 
 
@@ -1475,7 +1669,8 @@ template <typename ValueType>
 void Dense<ValueType>::column_permute(const array<int64>* permutation_indices,
                                       ptr_param<Dense<ValueType>> output) const
 {
-    this->column_permute_impl(permutation_indices, output.get());
+    this->permute_impl(create_permutation_view(*permutation_indices).get(),
+                       permute_mode::columns, output.get());
 }
 
 
@@ -1504,7 +1699,8 @@ void Dense<ValueType>::inverse_row_permute(
     const array<int32>* permutation_indices,
     ptr_param<Dense<ValueType>> output) const
 {
-    this->inverse_row_permute_impl(permutation_indices, output.get());
+    this->permute_impl(create_permutation_view(*permutation_indices).get(),
+                       permute_mode::inverse_rows, output.get());
 }
 
 
@@ -1513,7 +1709,8 @@ void Dense<ValueType>::inverse_row_permute(
     const array<int64>* permutation_indices,
     ptr_param<Dense<ValueType>> output) const
 {
-    this->inverse_row_permute_impl(permutation_indices, output.get());
+    this->permute_impl(create_permutation_view(*permutation_indices).get(),
+                       permute_mode::inverse_rows, output.get());
 }
 
 
@@ -1542,7 +1739,8 @@ void Dense<ValueType>::inverse_column_permute(
     const array<int32>* permutation_indices,
     ptr_param<Dense<ValueType>> output) const
 {
-    this->inverse_column_permute_impl(permutation_indices, output.get());
+    this->permute_impl(create_permutation_view(*permutation_indices).get(),
+                       permute_mode::inverse_columns, output.get());
 }
 
 
@@ -1551,7 +1749,94 @@ void Dense<ValueType>::inverse_column_permute(
     const array<int64>* permutation_indices,
     ptr_param<Dense<ValueType>> output) const
 {
-    this->inverse_column_permute_impl(permutation_indices, output.get());
+    this->permute_impl(create_permutation_view(*permutation_indices).get(),
+                       permute_mode::inverse_columns, output.get());
+}
+
+
+template <typename ValueType>
+std::unique_ptr<Dense<ValueType>> Dense<ValueType>::scale_permute(
+    ptr_param<const ScaledPermutation<value_type, int32>> permutation,
+    permute_mode mode) const
+{
+    auto result = Dense::create(this->get_executor(), this->get_size());
+    this->scale_permute(permutation, result, mode);
+    return result;
+}
+
+
+template <typename ValueType>
+std::unique_ptr<Dense<ValueType>> Dense<ValueType>::scale_permute(
+    ptr_param<const ScaledPermutation<value_type, int64>> permutation,
+    permute_mode mode) const
+{
+    auto result = Dense::create(this->get_executor(), this->get_size());
+    this->scale_permute(permutation, result, mode);
+    return result;
+}
+
+
+template <typename ValueType>
+void Dense<ValueType>::scale_permute(
+    ptr_param<const ScaledPermutation<value_type, int32>> permutation,
+    ptr_param<Dense> output, permute_mode mode) const
+{
+    this->scale_permute_impl(permutation.get(), mode, output.get());
+}
+
+
+template <typename ValueType>
+void Dense<ValueType>::scale_permute(
+    ptr_param<const ScaledPermutation<value_type, int64>> permutation,
+    ptr_param<Dense> output, permute_mode mode) const
+{
+    this->scale_permute_impl(permutation.get(), mode, output.get());
+}
+
+
+template <typename ValueType>
+std::unique_ptr<Dense<ValueType>> Dense<ValueType>::scale_permute(
+    ptr_param<const ScaledPermutation<value_type, int32>> row_permutation,
+    ptr_param<const ScaledPermutation<value_type, int32>> col_permutation,
+    bool invert) const
+{
+    auto result = Dense::create(this->get_executor(), this->get_size());
+    this->scale_permute(row_permutation, col_permutation, result, invert);
+    return result;
+}
+
+
+template <typename ValueType>
+std::unique_ptr<Dense<ValueType>> Dense<ValueType>::scale_permute(
+    ptr_param<const ScaledPermutation<value_type, int64>> row_permutation,
+    ptr_param<const ScaledPermutation<value_type, int64>> col_permutation,
+    bool invert) const
+{
+    auto result = Dense::create(this->get_executor(), this->get_size());
+    this->scale_permute(row_permutation, col_permutation, result, invert);
+    return result;
+}
+
+
+template <typename ValueType>
+void Dense<ValueType>::scale_permute(
+    ptr_param<const ScaledPermutation<value_type, int32>> row_permutation,
+    ptr_param<const ScaledPermutation<value_type, int32>> col_permutation,
+    ptr_param<Dense> output, bool invert) const
+{
+    this->scale_permute_impl(row_permutation.get(), col_permutation.get(),
+                             invert, output.get());
+}
+
+
+template <typename ValueType>
+void Dense<ValueType>::scale_permute(
+    ptr_param<const ScaledPermutation<value_type, int64>> row_permutation,
+    ptr_param<const ScaledPermutation<value_type, int64>> col_permutation,
+    ptr_param<Dense> output, bool invert) const
+{
+    this->scale_permute_impl(row_permutation.get(), col_permutation.get(),
+                             invert, output.get());
 }
 
 
diff --git a/core/matrix/dense_kernels.hpp b/core/matrix/dense_kernels.hpp
index a352aa8d7c1..f315a393712 100644
--- a/core/matrix/dense_kernels.hpp
+++ b/core/matrix/dense_kernels.hpp
@@ -237,50 +237,112 @@ namespace kernels {
                         const matrix::Dense<_type>* orig,            \
                         matrix::Dense<_type>* trans)
 
+#define GKO_DECLARE_DENSE_SYMM_SCALE_PERMUTE_KERNEL(_vtype, _itype)           \
+    void symm_scale_permute(                                                  \
+        std::shared_ptr<const DefaultExecutor> exec, const _vtype* scale,     \
+        const _itype* permutation_indices, const matrix::Dense<_vtype>* orig, \
+        matrix::Dense<_vtype>* permuted)
+
+#define GKO_DECLARE_DENSE_ROW_SCALE_PERMUTE_KERNEL(_vtype, _itype)            \
+    void row_scale_permute(                                                   \
+        std::shared_ptr<const DefaultExecutor> exec, const _vtype* scale,     \
+        const _itype* permutation_indices, const matrix::Dense<_vtype>* orig, \
+        matrix::Dense<_vtype>* permuted)
+
+#define GKO_DECLARE_DENSE_COL_SCALE_PERMUTE_KERNEL(_vtype, _itype)            \
+    void col_scale_permute(                                                   \
+        std::shared_ptr<const DefaultExecutor> exec, const _vtype* scale,     \
+        const _itype* permutation_indices, const matrix::Dense<_vtype>* orig, \
+        matrix::Dense<_vtype>* permuted)
+
+#define GKO_DECLARE_DENSE_INV_SYMM_SCALE_PERMUTE_KERNEL(_vtype, _itype)       \
+    void inv_symm_scale_permute(                                              \
+        std::shared_ptr<const DefaultExecutor> exec, const _vtype* scale,     \
+        const _itype* permutation_indices, const matrix::Dense<_vtype>* orig, \
+        matrix::Dense<_vtype>* permuted)
+
+#define GKO_DECLARE_DENSE_INV_ROW_SCALE_PERMUTE_KERNEL(_vtype, _itype)        \
+    void inv_row_scale_permute(                                               \
+        std::shared_ptr<const DefaultExecutor> exec, const _vtype* scale,     \
+        const _itype* permutation_indices, const matrix::Dense<_vtype>* orig, \
+        matrix::Dense<_vtype>* permuted)
+
+#define GKO_DECLARE_DENSE_INV_COL_SCALE_PERMUTE_KERNEL(_vtype, _itype)        \
+    void inv_col_scale_permute(                                               \
+        std::shared_ptr<const DefaultExecutor> exec, const _vtype* scale,     \
+        const _itype* permutation_indices, const matrix::Dense<_vtype>* orig, \
+        matrix::Dense<_vtype>* permuted)
+
+#define GKO_DECLARE_DENSE_NONSYMM_SCALE_PERMUTE_KERNEL(_vtype, _itype)        \
+    void nonsymm_scale_permute(                                               \
+        std::shared_ptr<const DefaultExecutor> exec, const _vtype* row_scale, \
+        const _itype* row_permutation_indices, const _vtype* column_scale,    \
+        const _itype* column_permutation_indices,                             \
+        const matrix::Dense<_vtype>* orig, matrix::Dense<_vtype>* permuted)
+
+#define GKO_DECLARE_DENSE_INV_NONSYMM_SCALE_PERMUTE_KERNEL(_vtype, _itype)    \
+    void inv_nonsymm_scale_permute(                                           \
+        std::shared_ptr<const DefaultExecutor> exec, const _vtype* row_scale, \
+        const _itype* row_permutation_indices, const _vtype* column_scale,    \
+        const _itype* column_permutation_indices,                             \
+        const matrix::Dense<_vtype>* orig, matrix::Dense<_vtype>* permuted)
+
 #define GKO_DECLARE_DENSE_SYMM_PERMUTE_KERNEL(_vtype, _itype)      \
     void symm_permute(std::shared_ptr<const DefaultExecutor> exec, \
-                      const array<_itype>* permutation_indices,    \
+                      const _itype* permutation_indices,           \
                       const matrix::Dense<_vtype>* orig,           \
                       matrix::Dense<_vtype>* permuted)
 
 #define GKO_DECLARE_DENSE_INV_SYMM_PERMUTE_KERNEL(_vtype, _itype)      \
     void inv_symm_permute(std::shared_ptr<const DefaultExecutor> exec, \
-                          const array<_itype>* permutation_indices,    \
+                          const _itype* permutation_indices,           \
                           const matrix::Dense<_vtype>* orig,           \
                           matrix::Dense<_vtype>* permuted)
 
+#define GKO_DECLARE_DENSE_NONSYMM_PERMUTE_KERNEL(_vtype, _itype)      \
+    void nonsymm_permute(std::shared_ptr<const DefaultExecutor> exec, \
+                         const _itype* row_permutation_indices,       \
+                         const _itype* column_permutation_indices,    \
+                         const matrix::Dense<_vtype>* orig,           \
+                         matrix::Dense<_vtype>* permuted)
+
+#define GKO_DECLARE_DENSE_INV_NONSYMM_PERMUTE_KERNEL(_vtype, _itype)      \
+    void inv_nonsymm_permute(std::shared_ptr<const DefaultExecutor> exec, \
+                             const _itype* row_permutation_indices,       \
+                             const _itype* column_permutation_indices,    \
+                             const matrix::Dense<_vtype>* orig,           \
+                             matrix::Dense<_vtype>* permuted)
+
 #define GKO_DECLARE_DENSE_ROW_GATHER_KERNEL(_vtype, _otype, _itype) \
     void row_gather(std::shared_ptr<const DefaultExecutor> exec,    \
-                    const array<_itype>* gather_indices,            \
+                    const _itype* gather_indices,                   \
                     const matrix::Dense<_vtype>* orig,              \
                     matrix::Dense<_otype>* row_collection)
 
-
-#define GKO_DECLARE_DENSE_ADVANCED_ROW_GATHER_KERNEL(_vtype, _otype, _itype) \
-    void advanced_row_gather(std::shared_ptr<const DefaultExecutor> exec,    \
-                             const matrix::Dense<_vtype>* alpha,             \
-                             const array<_itype>* gather_indices,            \
-                             const matrix::Dense<_vtype>* orig,              \
-                             const matrix::Dense<_vtype>* beta,              \
-                             matrix::Dense<_otype>* row_collection)
-
-#define GKO_DECLARE_DENSE_COLUMN_PERMUTE_KERNEL(_vtype, _itype)      \
-    void column_permute(std::shared_ptr<const DefaultExecutor> exec, \
-                        const array<_itype>* permutation_indices,    \
-                        const matrix::Dense<_vtype>* orig,           \
-                        matrix::Dense<_vtype>* column_permuted)
-
-#define GKO_DECLARE_DENSE_INV_ROW_PERMUTE_KERNEL(_vtype, _itype)          \
-    void inverse_row_permute(std::shared_ptr<const DefaultExecutor> exec, \
-                             const array<_itype>* permutation_indices,    \
-                             const matrix::Dense<_vtype>* orig,           \
-                             matrix::Dense<_vtype>* row_permuted)
-
-#define GKO_DECLARE_DENSE_INV_COLUMN_PERMUTE_KERNEL(_vtype, _itype)          \
-    void inverse_column_permute(std::shared_ptr<const DefaultExecutor> exec, \
-                                const array<_itype>* permutation_indices,    \
-                                const matrix::Dense<_vtype>* orig,           \
-                                matrix::Dense<_vtype>* column_permuted)
+#define GKO_DECLARE_DENSE_ADVANCED_ROW_GATHER_KERNEL(_vtype, _otype, _itype)  \
+    void advanced_row_gather(                                                 \
+        std::shared_ptr<const DefaultExecutor> exec,                          \
+        const matrix::Dense<_vtype>* alpha, const _itype* gather_indices,     \
+        const matrix::Dense<_vtype>* orig, const matrix::Dense<_vtype>* beta, \
+        matrix::Dense<_otype>* row_collection)
+
+#define GKO_DECLARE_DENSE_COL_PERMUTE_KERNEL(_vtype, _itype)      \
+    void col_permute(std::shared_ptr<const DefaultExecutor> exec, \
+                     const _itype* permutation_indices,           \
+                     const matrix::Dense<_vtype>* orig,           \
+                     matrix::Dense<_vtype>* col_permuted)
+
+#define GKO_DECLARE_DENSE_INV_ROW_PERMUTE_KERNEL(_vtype, _itype)      \
+    void inv_row_permute(std::shared_ptr<const DefaultExecutor> exec, \
+                         const _itype* permutation_indices,           \
+                         const matrix::Dense<_vtype>* orig,           \
+                         matrix::Dense<_vtype>* row_permuted)
+
+#define GKO_DECLARE_DENSE_INV_COL_PERMUTE_KERNEL(_vtype, _itype)      \
+    void inv_col_permute(std::shared_ptr<const DefaultExecutor> exec, \
+                         const _itype* permutation_indices,           \
+                         const matrix::Dense<_vtype>* orig,           \
+                         matrix::Dense<_vtype>* col_permuted)
 
 #define GKO_DECLARE_DENSE_EXTRACT_DIAGONAL_KERNEL(_vtype)              \
     void extract_diagonal(std::shared_ptr<const DefaultExecutor> exec, \
@@ -319,104 +381,124 @@ namespace kernels {
                              matrix::Dense<_vtype>* mtx)
 
 
-#define GKO_DECLARE_ALL_AS_TEMPLATES                                        \
-    template <typename ValueType>                                           \
-    GKO_DECLARE_DENSE_SIMPLE_APPLY_KERNEL(ValueType);                       \
-    template <typename ValueType>                                           \
-    GKO_DECLARE_DENSE_APPLY_KERNEL(ValueType);                              \
-    template <typename InValueType, typename OutValueType>                  \
-    GKO_DECLARE_DENSE_COPY_KERNEL(InValueType, OutValueType);               \
-    template <typename ValueType>                                           \
-    GKO_DECLARE_DENSE_FILL_KERNEL(ValueType);                               \
-    template <typename ValueType, typename ScalarType>                      \
-    GKO_DECLARE_DENSE_SCALE_KERNEL(ValueType, ScalarType);                  \
-    template <typename ValueType, typename ScalarType>                      \
-    GKO_DECLARE_DENSE_INV_SCALE_KERNEL(ValueType, ScalarType);              \
-    template <typename ValueType, typename ScalarType>                      \
-    GKO_DECLARE_DENSE_ADD_SCALED_KERNEL(ValueType, ScalarType);             \
-    template <typename ValueType, typename ScalarType>                      \
-    GKO_DECLARE_DENSE_SUB_SCALED_KERNEL(ValueType, ScalarType);             \
-    template <typename ValueType>                                           \
-    GKO_DECLARE_DENSE_ADD_SCALED_DIAG_KERNEL(ValueType);                    \
-    template <typename ValueType>                                           \
-    GKO_DECLARE_DENSE_SUB_SCALED_DIAG_KERNEL(ValueType);                    \
-    template <typename ValueType>                                           \
-    GKO_DECLARE_DENSE_COMPUTE_DOT_KERNEL(ValueType);                        \
-    template <typename ValueType>                                           \
-    GKO_DECLARE_DENSE_COMPUTE_DOT_DISPATCH_KERNEL(ValueType);               \
-    template <typename ValueType>                                           \
-    GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL(ValueType);                   \
-    template <typename ValueType>                                           \
-    GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_DISPATCH_KERNEL(ValueType);          \
-    template <typename ValueType>                                           \
-    GKO_DECLARE_DENSE_COMPUTE_NORM2_KERNEL(ValueType);                      \
-    template <typename ValueType>                                           \
-    GKO_DECLARE_DENSE_COMPUTE_NORM2_DISPATCH_KERNEL(ValueType);             \
-    template <typename ValueType>                                           \
-    GKO_DECLARE_DENSE_COMPUTE_NORM1_KERNEL(ValueType);                      \
-    template <typename ValueType>                                           \
-    GKO_DECLARE_DENSE_COMPUTE_MEAN_KERNEL(ValueType);                       \
-    template <typename ValueType, typename IndexType>                       \
-    GKO_DECLARE_DENSE_FILL_IN_MATRIX_DATA_KERNEL(ValueType, IndexType);     \
-    template <typename ValueType>                                           \
-    GKO_DECLARE_DENSE_COMPUTE_SQUARED_NORM2_KERNEL(ValueType);              \
-    template <typename ValueType>                                           \
-    GKO_DECLARE_DENSE_COMPUTE_SQRT_KERNEL(ValueType);                       \
-    template <typename ValueType, typename IndexType>                       \
-    GKO_DECLARE_DENSE_CONVERT_TO_COO_KERNEL(ValueType, IndexType);          \
-    template <typename ValueType, typename IndexType>                       \
-    GKO_DECLARE_DENSE_CONVERT_TO_CSR_KERNEL(ValueType, IndexType);          \
-    template <typename ValueType, typename IndexType>                       \
-    GKO_DECLARE_DENSE_CONVERT_TO_ELL_KERNEL(ValueType, IndexType);          \
-    template <typename ValueType, typename IndexType>                       \
-    GKO_DECLARE_DENSE_CONVERT_TO_FBCSR_KERNEL(ValueType, IndexType);        \
-    template <typename ValueType, typename IndexType>                       \
-    GKO_DECLARE_DENSE_CONVERT_TO_HYBRID_KERNEL(ValueType, IndexType);       \
-    template <typename ValueType, typename IndexType>                       \
-    GKO_DECLARE_DENSE_CONVERT_TO_SELLP_KERNEL(ValueType, IndexType);        \
-    template <typename ValueType, typename IndexType>                       \
-    GKO_DECLARE_DENSE_CONVERT_TO_SPARSITY_CSR_KERNEL(ValueType, IndexType); \
-    template <typename ValueType>                                           \
-    GKO_DECLARE_DENSE_COMPUTE_MAX_NNZ_PER_ROW_KERNEL(ValueType);            \
-    template <typename ValueType>                                           \
-    GKO_DECLARE_DENSE_COMPUTE_SLICE_SETS_KERNEL(ValueType);                 \
-    template <typename ValueType, typename IndexType>                       \
-    GKO_DECLARE_DENSE_COUNT_NONZEROS_PER_ROW_KERNEL(ValueType, IndexType);  \
-    template <typename ValueType, typename IndexType>                       \
-    GKO_DECLARE_DENSE_COUNT_NONZERO_BLOCKS_PER_ROW_KERNEL(ValueType,        \
-                                                          IndexType);       \
-    template <typename ValueType>                                           \
-    GKO_DECLARE_DENSE_TRANSPOSE_KERNEL(ValueType);                          \
-    template <typename ValueType>                                           \
-    GKO_DECLARE_DENSE_CONJ_TRANSPOSE_KERNEL(ValueType);                     \
-    template <typename ValueType, typename IndexType>                       \
-    GKO_DECLARE_DENSE_SYMM_PERMUTE_KERNEL(ValueType, IndexType);            \
-    template <typename ValueType, typename IndexType>                       \
-    GKO_DECLARE_DENSE_INV_SYMM_PERMUTE_KERNEL(ValueType, IndexType);        \
-    template <typename ValueType, typename OutputType, typename IndexType>  \
-    GKO_DECLARE_DENSE_ROW_GATHER_KERNEL(ValueType, OutputType, IndexType);  \
-    template <typename ValueType, typename OutputType, typename IndexType>  \
-    GKO_DECLARE_DENSE_ADVANCED_ROW_GATHER_KERNEL(ValueType, OutputType,     \
-                                                 IndexType);                \
-    template <typename ValueType, typename IndexType>                       \
-    GKO_DECLARE_DENSE_COLUMN_PERMUTE_KERNEL(ValueType, IndexType);          \
-    template <typename ValueType, typename IndexType>                       \
-    GKO_DECLARE_DENSE_INV_ROW_PERMUTE_KERNEL(ValueType, IndexType);         \
-    template <typename ValueType, typename IndexType>                       \
-    GKO_DECLARE_DENSE_INV_COLUMN_PERMUTE_KERNEL(ValueType, IndexType);      \
-    template <typename ValueType>                                           \
-    GKO_DECLARE_DENSE_EXTRACT_DIAGONAL_KERNEL(ValueType);                   \
-    template <typename ValueType>                                           \
-    GKO_DECLARE_INPLACE_ABSOLUTE_DENSE_KERNEL(ValueType);                   \
-    template <typename ValueType>                                           \
-    GKO_DECLARE_OUTPLACE_ABSOLUTE_DENSE_KERNEL(ValueType);                  \
-    template <typename ValueType>                                           \
-    GKO_DECLARE_MAKE_COMPLEX_KERNEL(ValueType);                             \
-    template <typename ValueType>                                           \
-    GKO_DECLARE_GET_REAL_KERNEL(ValueType);                                 \
-    template <typename ValueType>                                           \
-    GKO_DECLARE_GET_IMAG_KERNEL(ValueType);                                 \
-    template <typename ValueType, typename ScalarType>                      \
+#define GKO_DECLARE_ALL_AS_TEMPLATES                                          \
+    template <typename ValueType>                                             \
+    GKO_DECLARE_DENSE_SIMPLE_APPLY_KERNEL(ValueType);                         \
+    template <typename ValueType>                                             \
+    GKO_DECLARE_DENSE_APPLY_KERNEL(ValueType);                                \
+    template <typename InValueType, typename OutValueType>                    \
+    GKO_DECLARE_DENSE_COPY_KERNEL(InValueType, OutValueType);                 \
+    template <typename ValueType>                                             \
+    GKO_DECLARE_DENSE_FILL_KERNEL(ValueType);                                 \
+    template <typename ValueType, typename ScalarType>                        \
+    GKO_DECLARE_DENSE_SCALE_KERNEL(ValueType, ScalarType);                    \
+    template <typename ValueType, typename ScalarType>                        \
+    GKO_DECLARE_DENSE_INV_SCALE_KERNEL(ValueType, ScalarType);                \
+    template <typename ValueType, typename ScalarType>                        \
+    GKO_DECLARE_DENSE_ADD_SCALED_KERNEL(ValueType, ScalarType);               \
+    template <typename ValueType, typename ScalarType>                        \
+    GKO_DECLARE_DENSE_SUB_SCALED_KERNEL(ValueType, ScalarType);               \
+    template <typename ValueType>                                             \
+    GKO_DECLARE_DENSE_ADD_SCALED_DIAG_KERNEL(ValueType);                      \
+    template <typename ValueType>                                             \
+    GKO_DECLARE_DENSE_SUB_SCALED_DIAG_KERNEL(ValueType);                      \
+    template <typename ValueType>                                             \
+    GKO_DECLARE_DENSE_COMPUTE_DOT_KERNEL(ValueType);                          \
+    template <typename ValueType>                                             \
+    GKO_DECLARE_DENSE_COMPUTE_DOT_DISPATCH_KERNEL(ValueType);                 \
+    template <typename ValueType>                                             \
+    GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL(ValueType);                     \
+    template <typename ValueType>                                             \
+    GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_DISPATCH_KERNEL(ValueType);            \
+    template <typename ValueType>                                             \
+    GKO_DECLARE_DENSE_COMPUTE_NORM2_KERNEL(ValueType);                        \
+    template <typename ValueType>                                             \
+    GKO_DECLARE_DENSE_COMPUTE_NORM2_DISPATCH_KERNEL(ValueType);               \
+    template <typename ValueType>                                             \
+    GKO_DECLARE_DENSE_COMPUTE_NORM1_KERNEL(ValueType);                        \
+    template <typename ValueType>                                             \
+    GKO_DECLARE_DENSE_COMPUTE_MEAN_KERNEL(ValueType);                         \
+    template <typename ValueType, typename IndexType>                         \
+    GKO_DECLARE_DENSE_FILL_IN_MATRIX_DATA_KERNEL(ValueType, IndexType);       \
+    template <typename ValueType>                                             \
+    GKO_DECLARE_DENSE_COMPUTE_SQUARED_NORM2_KERNEL(ValueType);                \
+    template <typename ValueType>                                             \
+    GKO_DECLARE_DENSE_COMPUTE_SQRT_KERNEL(ValueType);                         \
+    template <typename ValueType, typename IndexType>                         \
+    GKO_DECLARE_DENSE_CONVERT_TO_COO_KERNEL(ValueType, IndexType);            \
+    template <typename ValueType, typename IndexType>                         \
+    GKO_DECLARE_DENSE_CONVERT_TO_CSR_KERNEL(ValueType, IndexType);            \
+    template <typename ValueType, typename IndexType>                         \
+    GKO_DECLARE_DENSE_CONVERT_TO_ELL_KERNEL(ValueType, IndexType);            \
+    template <typename ValueType, typename IndexType>                         \
+    GKO_DECLARE_DENSE_CONVERT_TO_FBCSR_KERNEL(ValueType, IndexType);          \
+    template <typename ValueType, typename IndexType>                         \
+    GKO_DECLARE_DENSE_CONVERT_TO_HYBRID_KERNEL(ValueType, IndexType);         \
+    template <typename ValueType, typename IndexType>                         \
+    GKO_DECLARE_DENSE_CONVERT_TO_SELLP_KERNEL(ValueType, IndexType);          \
+    template <typename ValueType, typename IndexType>                         \
+    GKO_DECLARE_DENSE_CONVERT_TO_SPARSITY_CSR_KERNEL(ValueType, IndexType);   \
+    template <typename ValueType>                                             \
+    GKO_DECLARE_DENSE_COMPUTE_MAX_NNZ_PER_ROW_KERNEL(ValueType);              \
+    template <typename ValueType>                                             \
+    GKO_DECLARE_DENSE_COMPUTE_SLICE_SETS_KERNEL(ValueType);                   \
+    template <typename ValueType, typename IndexType>                         \
+    GKO_DECLARE_DENSE_COUNT_NONZEROS_PER_ROW_KERNEL(ValueType, IndexType);    \
+    template <typename ValueType, typename IndexType>                         \
+    GKO_DECLARE_DENSE_COUNT_NONZERO_BLOCKS_PER_ROW_KERNEL(ValueType,          \
+                                                          IndexType);         \
+    template <typename ValueType>                                             \
+    GKO_DECLARE_DENSE_TRANSPOSE_KERNEL(ValueType);                            \
+    template <typename ValueType>                                             \
+    GKO_DECLARE_DENSE_CONJ_TRANSPOSE_KERNEL(ValueType);                       \
+    template <typename ValueType, typename IndexType>                         \
+    GKO_DECLARE_DENSE_SYMM_PERMUTE_KERNEL(ValueType, IndexType);              \
+    template <typename ValueType, typename IndexType>                         \
+    GKO_DECLARE_DENSE_INV_SYMM_PERMUTE_KERNEL(ValueType, IndexType);          \
+    template <typename ValueType, typename IndexType>                         \
+    GKO_DECLARE_DENSE_NONSYMM_PERMUTE_KERNEL(ValueType, IndexType);           \
+    template <typename ValueType, typename IndexType>                         \
+    GKO_DECLARE_DENSE_INV_NONSYMM_PERMUTE_KERNEL(ValueType, IndexType);       \
+    template <typename ValueType, typename OutputType, typename IndexType>    \
+    GKO_DECLARE_DENSE_ROW_GATHER_KERNEL(ValueType, OutputType, IndexType);    \
+    template <typename ValueType, typename OutputType, typename IndexType>    \
+    GKO_DECLARE_DENSE_ADVANCED_ROW_GATHER_KERNEL(ValueType, OutputType,       \
+                                                 IndexType);                  \
+    template <typename ValueType, typename IndexType>                         \
+    GKO_DECLARE_DENSE_COL_PERMUTE_KERNEL(ValueType, IndexType);               \
+    template <typename ValueType, typename IndexType>                         \
+    GKO_DECLARE_DENSE_INV_ROW_PERMUTE_KERNEL(ValueType, IndexType);           \
+    template <typename ValueType, typename IndexType>                         \
+    GKO_DECLARE_DENSE_INV_COL_PERMUTE_KERNEL(ValueType, IndexType);           \
+    template <typename ValueType, typename IndexType>                         \
+    GKO_DECLARE_DENSE_SYMM_SCALE_PERMUTE_KERNEL(ValueType, IndexType);        \
+    template <typename ValueType, typename IndexType>                         \
+    GKO_DECLARE_DENSE_INV_SYMM_SCALE_PERMUTE_KERNEL(ValueType, IndexType);    \
+    template <typename ValueType, typename IndexType>                         \
+    GKO_DECLARE_DENSE_ROW_SCALE_PERMUTE_KERNEL(ValueType, IndexType);         \
+    template <typename ValueType, typename IndexType>                         \
+    GKO_DECLARE_DENSE_COL_SCALE_PERMUTE_KERNEL(ValueType, IndexType);         \
+    template <typename ValueType, typename IndexType>                         \
+    GKO_DECLARE_DENSE_INV_ROW_SCALE_PERMUTE_KERNEL(ValueType, IndexType);     \
+    template <typename ValueType, typename IndexType>                         \
+    GKO_DECLARE_DENSE_INV_COL_SCALE_PERMUTE_KERNEL(ValueType, IndexType);     \
+    template <typename ValueType, typename IndexType>                         \
+    GKO_DECLARE_DENSE_NONSYMM_SCALE_PERMUTE_KERNEL(ValueType, IndexType);     \
+    template <typename ValueType, typename IndexType>                         \
+    GKO_DECLARE_DENSE_INV_NONSYMM_SCALE_PERMUTE_KERNEL(ValueType, IndexType); \
+    template <typename ValueType>                                             \
+    GKO_DECLARE_DENSE_EXTRACT_DIAGONAL_KERNEL(ValueType);                     \
+    template <typename ValueType>                                             \
+    GKO_DECLARE_INPLACE_ABSOLUTE_DENSE_KERNEL(ValueType);                     \
+    template <typename ValueType>                                             \
+    GKO_DECLARE_OUTPLACE_ABSOLUTE_DENSE_KERNEL(ValueType);                    \
+    template <typename ValueType>                                             \
+    GKO_DECLARE_MAKE_COMPLEX_KERNEL(ValueType);                               \
+    template <typename ValueType>                                             \
+    GKO_DECLARE_GET_REAL_KERNEL(ValueType);                                   \
+    template <typename ValueType>                                             \
+    GKO_DECLARE_GET_IMAG_KERNEL(ValueType);                                   \
+    template <typename ValueType, typename ScalarType>                        \
     GKO_DECLARE_DENSE_ADD_SCALED_IDENTITY_KERNEL(ValueType, ScalarType)
 
 
diff --git a/core/matrix/permutation.cpp b/core/matrix/permutation.cpp
index a641834f12c..cc58ced53d2 100644
--- a/core/matrix/permutation.cpp
+++ b/core/matrix/permutation.cpp
@@ -31,10 +31,48 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
 #include <ginkgo/core/matrix/permutation.hpp>
+#include "core/matrix/permutation_kernels.hpp"
+#include "ginkgo/core/base/executor.hpp"
 
 
 namespace gko {
 namespace matrix {
+namespace permutation {
+
+
+GKO_REGISTER_OPERATION(invert, permutation::invert);
+
+
+}
+
+
+template <typename IndexType>
+std::unique_ptr<Permutation<IndexType>> Permutation<IndexType>::invert() const
+{
+    const auto exec = this->get_executor();
+    const auto size = this->get_size()[0];
+    array<index_type> inv_permutation{exec, size};
+    exec->run(permutation::make_invert(this->get_const_permutation(), size,
+                                       inv_permutation.get_data()));
+    return Permutation::create(exec, dim<2>{size, size},
+                               std::move(inv_permutation));
+}
+
+
+template <typename IndexType>
+void Permutation<IndexType>::write(
+    gko::matrix_data<value_type, index_type>& data) const
+{
+    const auto host_this =
+        make_temporary_clone(this->get_executor()->get_master(), this);
+    data.size = this->get_size();
+    data.nonzeros.clear();
+    data.nonzeros.reserve(data.size[0]);
+    for (IndexType row = 0; row < this->get_size()[0]; row++) {
+        data.nonzeros.emplace_back(row, host_this->get_const_permutation()[row],
+                                   1.0);
+    }
+}
 
 
 #define GKO_DECLARE_PERMUTATION_MATRIX(_type) class Permutation<_type>
diff --git a/core/matrix/permutation_kernels.hpp b/core/matrix/permutation_kernels.hpp
new file mode 100644
index 00000000000..a77e0c2f618
--- /dev/null
+++ b/core/matrix/permutation_kernels.hpp
@@ -0,0 +1,82 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CORE_MATRIX_PERMUTATION_KERNELS_HPP_
+#define GKO_CORE_MATRIX_PERMUTATION_KERNELS_HPP_
+
+
+#include <ginkgo/core/matrix/csr.hpp>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/index_set.hpp>
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/matrix/coo.hpp>
+#include <ginkgo/core/matrix/dense.hpp>
+#include <ginkgo/core/matrix/diagonal.hpp>
+#include <ginkgo/core/matrix/ell.hpp>
+#include <ginkgo/core/matrix/hybrid.hpp>
+#include <ginkgo/core/matrix/sellp.hpp>
+#include <ginkgo/core/matrix/sparsity_csr.hpp>
+
+
+#include "core/base/kernel_declaration.hpp"
+#include "core/matrix/csr_lookup.hpp"
+
+
+namespace gko {
+namespace kernels {
+
+
+#define GKO_DECLARE_PERMUTATION_INVERT_KERNEL(IndexType)              \
+    void invert(std::shared_ptr<const DefaultExecutor> exec,          \
+                const IndexType* permutation_indices, size_type size, \
+                IndexType* inv_permutation)
+
+
+#define GKO_DECLARE_ALL_AS_TEMPLATES \
+    template <typename IndexType>    \
+    GKO_DECLARE_PERMUTATION_INVERT_KERNEL(IndexType)
+
+
+GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(permutation,
+                                        GKO_DECLARE_ALL_AS_TEMPLATES);
+
+
+#undef GKO_DECLARE_ALL_AS_TEMPLATES
+
+
+}  // namespace kernels
+}  // namespace gko
+
+
+#endif  // GKO_CORE_MATRIX_PERMUTATION_KERNELS_HPP_
diff --git a/core/matrix/scaled_permutation.cpp b/core/matrix/scaled_permutation.cpp
new file mode 100644
index 00000000000..d1ce00b521a
--- /dev/null
+++ b/core/matrix/scaled_permutation.cpp
@@ -0,0 +1,142 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/matrix/scaled_permutation.hpp>
+#include "core/matrix/scaled_permutation_kernels.hpp"
+#include "ginkgo/core/base/executor.hpp"
+#include "ginkgo/core/base/precision_dispatch.hpp"
+
+
+namespace gko {
+namespace matrix {
+namespace scaled_permutation {
+namespace {
+
+
+GKO_REGISTER_OPERATION(invert, scaled_permutation::invert);
+
+
+}  // namespace
+}  // namespace scaled_permutation
+
+
+template <typename ValueType, typename IndexType>
+ScaledPermutation<ValueType, IndexType>::ScaledPermutation(
+    std::shared_ptr<const Executor> exec, size_type size)
+    : ScaledPermutation{exec, array<ValueType>{exec, size},
+                        array<IndexType>{exec, size}}
+{}
+
+
+template <typename ValueType, typename IndexType>
+ScaledPermutation<ValueType, IndexType>::ScaledPermutation(
+    std::shared_ptr<const Executor> exec, array<value_type> scaling_factors,
+    array<index_type> permutation_indices)
+    : EnableLinOp<ScaledPermutation>(exec,
+                                     dim<2>{scaling_factors.get_num_elems(),
+                                            scaling_factors.get_num_elems()}),
+      scale_{exec, std::move(scaling_factors)},
+      permutation_{exec, std::move(permutation_indices)}
+{
+    GKO_ASSERT_EQ(scale_.get_num_elems(), permutation_.get_num_elems());
+}
+
+
+template <typename ValueType, typename IndexType>
+std::unique_ptr<ScaledPermutation<ValueType, IndexType>>
+ScaledPermutation<ValueType, IndexType>::invert() const
+{
+    const auto exec = this->get_executor();
+    const auto size = this->get_size()[0];
+    array<index_type> inv_permutation{exec, size};
+    array<value_type> inv_scale{exec, size};
+    exec->run(scaled_permutation::make_invert(
+        this->get_const_permutation(), this->get_const_scale(), size,
+        inv_permutation.get_data(), inv_scale.get_data()));
+    return ScaledPermutation::create(exec, std::move(inv_scale),
+                                     std::move(inv_permutation));
+}
+
+
+template <typename ValueType, typename IndexType>
+void ScaledPermutation<ValueType, IndexType>::apply_impl(const LinOp* b,
+                                                         LinOp* x) const
+{
+    precision_dispatch_real_complex<ValueType>(
+        [this](auto dense_b, auto dense_x) {
+            dense_b->scale_permute(this, dense_x, permute_mode::rows);
+        },
+        b, x);
+}
+
+
+template <typename ValueType, typename IndexType>
+void ScaledPermutation<ValueType, IndexType>::apply_impl(const LinOp* alpha,
+                                                         const LinOp* b,
+                                                         const LinOp* beta,
+                                                         LinOp* x) const
+{
+    precision_dispatch_real_complex<ValueType>(
+        [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) {
+            auto x_clone = dense_x->clone();
+            dense_b->scale_permute(this, x_clone, permute_mode::rows);
+            dense_x->scale(dense_beta);
+            dense_x->add_scaled(dense_alpha, x_clone);
+        },
+        alpha, b, beta, x);
+}
+
+
+template <typename ValueType, typename IndexType>
+void ScaledPermutation<ValueType, IndexType>::write(
+    gko::matrix_data<value_type, index_type>& data) const
+{
+    const auto host_this =
+        make_temporary_clone(this->get_executor()->get_master(), this);
+    data.size = this->get_size();
+    data.nonzeros.clear();
+    data.nonzeros.reserve(data.size[0]);
+    for (IndexType row = 0; row < this->get_size()[0]; row++) {
+        data.nonzeros.emplace_back(row, host_this->get_const_permutation()[row],
+                                   host_this->get_const_scale()[row]);
+    }
+}
+
+
+#define GKO_DECLARE_SCALED_PERMUTATION_MATRIX(ValueType, IndexType) \
+    class ScaledPermutation<ValueType, IndexType>
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_SCALED_PERMUTATION_MATRIX);
+
+
+}  // namespace matrix
+}  // namespace gko
diff --git a/core/matrix/scaled_permutation_kernels.hpp b/core/matrix/scaled_permutation_kernels.hpp
new file mode 100644
index 00000000000..905321ea885
--- /dev/null
+++ b/core/matrix/scaled_permutation_kernels.hpp
@@ -0,0 +1,68 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CORE_MATRIX_SCALED_PERMUTATION_KERNELS_HPP_
+#define GKO_CORE_MATRIX_SCALED_PERMUTATION_KERNELS_HPP_
+
+#include <ginkgo/core/matrix/dense.hpp>
+
+
+#include "core/base/kernel_declaration.hpp"
+
+
+namespace gko {
+namespace kernels {
+
+
+#define GKO_DECLARE_SCALED_PERMUTATION_INVERT_KERNEL(ValueType, IndexType) \
+    void invert(std::shared_ptr<const DefaultExecutor> exec,               \
+                const IndexType* input_permutation,                        \
+                const ValueType* input_scale, size_type size,              \
+                IndexType* output_permutation, ValueType* output_scale)
+
+#define GKO_DECLARE_ALL_AS_TEMPLATES                  \
+    template <typename ValueType, typename IndexType> \
+    GKO_DECLARE_SCALED_PERMUTATION_INVERT_KERNEL(ValueType, IndexType)
+
+
+GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(scaled_permutation,
+                                        GKO_DECLARE_ALL_AS_TEMPLATES);
+
+
+#undef GKO_DECLARE_ALL_AS_TEMPLATES
+
+
+}  // namespace kernels
+}  // namespace gko
+
+
+#endif  // GKO_CORE_MATRIX_SCALED_PERMUTATION_KERNELS_HPP_
diff --git a/core/test/matrix/permutation.cpp b/core/test/matrix/permutation.cpp
index 09ef5e4701a..166ff0cbcdb 100644
--- a/core/test/matrix/permutation.cpp
+++ b/core/test/matrix/permutation.cpp
@@ -51,39 +51,40 @@ namespace {
 template <typename ValueIndexType>
 class Permutation : public ::testing::Test {
 protected:
-    using v_type =
+    using value_type =
         typename std::tuple_element<0, decltype(ValueIndexType())>::type;
-    using i_type =
+    using index_type =
         typename std::tuple_element<1, decltype(ValueIndexType())>::type;
-    using Vec = gko::matrix::Dense<v_type>;
-    using Csr = gko::matrix::Csr<v_type, i_type>;
+    using Vec = gko::matrix::Dense<value_type>;
+    using Csr = gko::matrix::Csr<value_type, index_type>;
     Permutation()
         : exec(gko::ReferenceExecutor::create()),
-          mtx(gko::matrix::Permutation<i_type>::create(
-              exec, gko::dim<2>{4, 3}, gko::array<i_type>{exec, {1, 0, 2, 3}}))
+          mtx(gko::matrix::Permutation<index_type>::create(
+              exec, gko::dim<2>{4, 3},
+              gko::array<index_type>{exec, {1, 0, 2, 3}}))
     {}
 
 
     static void assert_equal_to_original_mtx(
-        gko::ptr_param<gko::matrix::Permutation<i_type>> m)
+        gko::ptr_param<gko::matrix::Permutation<index_type>> m)
     {
         auto perm = m->get_permutation();
         ASSERT_EQ(m->get_size(), gko::dim<2>(4, 3));
-        ASSERT_EQ(m->get_permutation_size(), 4);
+        ASSERT_EQ(m->get_size()[0], 4);
         ASSERT_EQ(perm[0], 1);
         ASSERT_EQ(perm[1], 0);
         ASSERT_EQ(perm[2], 2);
         ASSERT_EQ(perm[3], 3);
     }
 
-    static void assert_empty(gko::matrix::Permutation<i_type>* m)
+    static void assert_empty(gko::matrix::Permutation<index_type>* m)
     {
         ASSERT_EQ(m->get_size(), gko::dim<2>(0, 0));
-        ASSERT_EQ(m->get_permutation_size(), 0);
+        ASSERT_EQ(m->get_size()[0], 0);
     }
 
     std::shared_ptr<const gko::Executor> exec;
-    std::unique_ptr<gko::matrix::Permutation<i_type>> mtx;
+    std::unique_ptr<gko::matrix::Permutation<index_type>> mtx;
 };
 
 TYPED_TEST_SUITE(Permutation, gko::test::ValueIndexTypes,
@@ -92,8 +93,8 @@ TYPED_TEST_SUITE(Permutation, gko::test::ValueIndexTypes,
 
 TYPED_TEST(Permutation, CanBeEmpty)
 {
-    using i_type = typename TestFixture::i_type;
-    auto empty = gko::matrix::Permutation<i_type>::create(this->exec);
+    using index_type = typename TestFixture::index_type;
+    auto empty = gko::matrix::Permutation<index_type>::create(this->exec);
 
     this->assert_empty(empty.get());
 }
@@ -101,8 +102,8 @@ TYPED_TEST(Permutation, CanBeEmpty)
 
 TYPED_TEST(Permutation, ReturnsNullValuesArrayWhenEmpty)
 {
-    using i_type = typename TestFixture::i_type;
-    auto empty = gko::matrix::Permutation<i_type>::create(this->exec);
+    using index_type = typename TestFixture::index_type;
+    auto empty = gko::matrix::Permutation<index_type>::create(this->exec);
 
     ASSERT_EQ(empty->get_const_permutation(), nullptr);
 }
@@ -110,19 +111,19 @@ TYPED_TEST(Permutation, ReturnsNullValuesArrayWhenEmpty)
 
 TYPED_TEST(Permutation, CanBeConstructedWithSize)
 {
-    using i_type = typename TestFixture::i_type;
-    auto m =
-        gko::matrix::Permutation<i_type>::create(this->exec, gko::dim<2>{2, 3});
+    using index_type = typename TestFixture::index_type;
+    auto m = gko::matrix::Permutation<index_type>::create(this->exec,
+                                                          gko::dim<2>{2, 3});
 
     ASSERT_EQ(m->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(m->get_permutation_size(), 2);
+    ASSERT_EQ(m->get_size()[0], 2);
 }
 
 
 TYPED_TEST(Permutation, FactorySetsCorrectPermuteMask)
 {
-    using i_type = typename TestFixture::i_type;
-    auto m = gko::matrix::Permutation<i_type>::create(this->exec);
+    using index_type = typename TestFixture::index_type;
+    auto m = gko::matrix::Permutation<index_type>::create(this->exec);
     auto mask = m->get_permute_mask();
 
     ASSERT_EQ(mask, gko::matrix::row_permute);
@@ -131,10 +132,10 @@ TYPED_TEST(Permutation, FactorySetsCorrectPermuteMask)
 
 TYPED_TEST(Permutation, PermutationCanBeConstructedFromExistingData)
 {
-    using i_type = typename TestFixture::i_type;
-    i_type data[] = {1, 0, 2};
+    using index_type = typename TestFixture::index_type;
+    index_type data[] = {1, 0, 2};
 
-    auto m = gko::matrix::Permutation<i_type>::create(
+    auto m = gko::matrix::Permutation<index_type>::create(
         this->exec, gko::dim<2>{3, 5},
         gko::make_array_view(this->exec, 3, data));
 
@@ -144,12 +145,12 @@ TYPED_TEST(Permutation, PermutationCanBeConstructedFromExistingData)
 
 TYPED_TEST(Permutation, PermutationCanBeConstructedFromExistingConstData)
 {
-    using i_type = typename TestFixture::i_type;
-    using i_type = typename TestFixture::i_type;
-    const i_type data[] = {1, 0, 2};
+    using index_type = typename TestFixture::index_type;
+    using index_type = typename TestFixture::index_type;
+    const index_type data[] = {1, 0, 2};
 
-    auto m = gko::matrix::Permutation<i_type>::create_const(
-        this->exec, 3, gko::array<i_type>::const_view(this->exec, 3, data));
+    auto m = gko::matrix::Permutation<index_type>::create_const(
+        this->exec, 3, gko::array<index_type>::const_view(this->exec, 3, data));
 
     ASSERT_EQ(m->get_const_permutation(), data);
 }
@@ -157,20 +158,20 @@ TYPED_TEST(Permutation, PermutationCanBeConstructedFromExistingConstData)
 
 TYPED_TEST(Permutation, CanBeConstructedWithSizeAndMask)
 {
-    using i_type = typename TestFixture::i_type;
-    auto m = gko::matrix::Permutation<i_type>::create(
+    using index_type = typename TestFixture::index_type;
+    auto m = gko::matrix::Permutation<index_type>::create(
         this->exec, gko::dim<2>{2, 3}, gko::matrix::column_permute);
 
     ASSERT_EQ(m->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(m->get_permutation_size(), 2);
+    ASSERT_EQ(m->get_size()[0], 2);
     ASSERT_EQ(m->get_permute_mask(), gko::matrix::column_permute);
 }
 
 
 TYPED_TEST(Permutation, CanExplicitlyOverrideSetPermuteMask)
 {
-    using i_type = typename TestFixture::i_type;
-    auto m = gko::matrix::Permutation<i_type>::create(
+    using index_type = typename TestFixture::index_type;
+    auto m = gko::matrix::Permutation<index_type>::create(
         this->exec, gko::dim<2>{2, 3}, gko::matrix::column_permute);
 
     auto mask = m->get_permute_mask();
@@ -186,10 +187,10 @@ TYPED_TEST(Permutation, CanExplicitlyOverrideSetPermuteMask)
 
 TYPED_TEST(Permutation, PermutationThrowsforWrongRowPermDimensions)
 {
-    using i_type = typename TestFixture::i_type;
-    i_type data[] = {0, 2, 1};
+    using index_type = typename TestFixture::index_type;
+    index_type data[] = {0, 2, 1};
 
-    ASSERT_THROW(gko::matrix::Permutation<i_type>::create(
+    ASSERT_THROW(gko::matrix::Permutation<index_type>::create(
                      this->exec, gko::dim<2>{4, 2},
                      gko::make_array_view(this->exec, 3, data)),
                  gko::ValueMismatch);
@@ -198,10 +199,10 @@ TYPED_TEST(Permutation, PermutationThrowsforWrongRowPermDimensions)
 
 TYPED_TEST(Permutation, SettingMaskDoesNotModifyData)
 {
-    using i_type = typename TestFixture::i_type;
-    i_type data[] = {1, 0, 2};
+    using index_type = typename TestFixture::index_type;
+    index_type data[] = {1, 0, 2};
 
-    auto m = gko::matrix::Permutation<i_type>::create(
+    auto m = gko::matrix::Permutation<index_type>::create(
         this->exec, gko::dim<2>{3, 5},
         gko::make_array_view(this->exec, 3, data));
 
@@ -220,10 +221,10 @@ TYPED_TEST(Permutation, SettingMaskDoesNotModifyData)
 
 TYPED_TEST(Permutation, PermutationThrowsforWrongColPermDimensions)
 {
-    using i_type = typename TestFixture::i_type;
-    i_type data[] = {0, 2, 1};
+    using index_type = typename TestFixture::index_type;
+    index_type data[] = {0, 2, 1};
 
-    ASSERT_THROW(gko::matrix::Permutation<i_type>::create(
+    ASSERT_THROW(gko::matrix::Permutation<index_type>::create(
                      this->exec, gko::dim<2>{3, 4},
                      gko::make_array_view(this->exec, 3, data),
                      gko::matrix::column_permute),
@@ -239,8 +240,8 @@ TYPED_TEST(Permutation, KnowsItsSizeAndValues)
 
 TYPED_TEST(Permutation, CanBeCopied)
 {
-    using i_type = typename TestFixture::i_type;
-    auto mtx_copy = gko::matrix::Permutation<i_type>::create(this->exec);
+    using index_type = typename TestFixture::index_type;
+    auto mtx_copy = gko::matrix::Permutation<index_type>::create(this->exec);
 
     mtx_copy->copy_from(this->mtx);
 
@@ -252,8 +253,8 @@ TYPED_TEST(Permutation, CanBeCopied)
 
 TYPED_TEST(Permutation, CanBeMoved)
 {
-    using i_type = typename TestFixture::i_type;
-    auto mtx_copy = gko::matrix::Permutation<i_type>::create(this->exec);
+    using index_type = typename TestFixture::index_type;
+    auto mtx_copy = gko::matrix::Permutation<index_type>::create(this->exec);
 
     mtx_copy->move_from(this->mtx);
 
@@ -263,8 +264,8 @@ TYPED_TEST(Permutation, CanBeMoved)
 
 TYPED_TEST(Permutation, CopyingPreservesMask)
 {
-    using i_type = typename TestFixture::i_type;
-    auto mtx_copy = gko::matrix::Permutation<i_type>::create(this->exec);
+    using index_type = typename TestFixture::index_type;
+    auto mtx_copy = gko::matrix::Permutation<index_type>::create(this->exec);
 
     mtx_copy->copy_from(this->mtx);
 
diff --git a/cuda/matrix/csr_kernels.instantiate.cu b/cuda/matrix/csr_kernels.instantiate.cu
index 75747bf074b..335d42d2ff9 100644
--- a/cuda/matrix/csr_kernels.instantiate.cu
+++ b/cuda/matrix/csr_kernels.instantiate.cu
@@ -69,12 +69,22 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_CSR_BUILD_LOOKUP_KERNEL);
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEAM_KERNEL);
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_CSR_FILL_IN_DENSE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_INV_NONSYMM_PERMUTE_KERNEL);
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL);
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL);
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL);
+    GKO_DECLARE_CSR_INV_ROW_PERMUTE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_INV_NONSYMM_SCALE_PERMUTE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_INV_SYMM_SCALE_PERMUTE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_ROW_SCALE_PERMUTE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_INV_ROW_SCALE_PERMUTE_KERNEL);
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL);
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
diff --git a/cuda/matrix/csr_kernels.template.cu b/cuda/matrix/csr_kernels.template.cu
index 803cb530262..d5b577a6068 100644
--- a/cuda/matrix/csr_kernels.template.cu
+++ b/cuda/matrix/csr_kernels.template.cu
@@ -124,7 +124,7 @@ namespace {
 template <int items_per_thread, typename MatrixValueType,
           typename InputValueType, typename OutputValueType, typename IndexType>
 void merge_path_spmv(syn::value_list<int, items_per_thread>,
-                     std::shared_ptr<const CudaExecutor> exec,
+                     std::shared_ptr<const DefaultExecutor> exec,
                      const matrix::Csr<MatrixValueType, IndexType>* a,
                      const matrix::Dense<InputValueType>* b,
                      matrix::Dense<OutputValueType>* c,
@@ -204,7 +204,7 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_merge_path_spmv, merge_path_spmv);
 
 
 template <typename ValueType, typename IndexType>
-int compute_items_per_thread(std::shared_ptr<const CudaExecutor> exec)
+int compute_items_per_thread(std::shared_ptr<const DefaultExecutor> exec)
 {
     const int version =
         (exec->get_major_version() << 4) + exec->get_minor_version();
@@ -245,7 +245,7 @@ int compute_items_per_thread(std::shared_ptr<const CudaExecutor> exec)
 template <int subwarp_size, typename MatrixValueType, typename InputValueType,
           typename OutputValueType, typename IndexType>
 void classical_spmv(syn::value_list<int, subwarp_size>,
-                    std::shared_ptr<const CudaExecutor> exec,
+                    std::shared_ptr<const DefaultExecutor> exec,
                     const matrix::Csr<MatrixValueType, IndexType>* a,
                     const matrix::Dense<InputValueType>* b,
                     matrix::Dense<OutputValueType>* c,
@@ -298,7 +298,7 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_classical_spmv, classical_spmv);
 
 template <typename MatrixValueType, typename InputValueType,
           typename OutputValueType, typename IndexType>
-void load_balance_spmv(std::shared_ptr<const CudaExecutor> exec,
+void load_balance_spmv(std::shared_ptr<const DefaultExecutor> exec,
                        const matrix::Csr<MatrixValueType, IndexType>* a,
                        const matrix::Dense<InputValueType>* b,
                        matrix::Dense<OutputValueType>* c,
@@ -349,7 +349,7 @@ void load_balance_spmv(std::shared_ptr<const CudaExecutor> exec,
 
 
 template <typename ValueType, typename IndexType>
-bool try_general_sparselib_spmv(std::shared_ptr<const CudaExecutor> exec,
+bool try_general_sparselib_spmv(std::shared_ptr<const DefaultExecutor> exec,
                                 const ValueType* alpha,
                                 const matrix::Csr<ValueType, IndexType>* a,
                                 const matrix::Dense<ValueType>* b,
@@ -441,7 +441,7 @@ template <typename MatrixValueType, typename InputValueType,
           typename = std::enable_if_t<
               !std::is_same<MatrixValueType, InputValueType>::value ||
               !std::is_same<MatrixValueType, OutputValueType>::value>>
-bool try_sparselib_spmv(std::shared_ptr<const CudaExecutor> exec,
+bool try_sparselib_spmv(std::shared_ptr<const DefaultExecutor> exec,
                         const matrix::Csr<MatrixValueType, IndexType>* a,
                         const matrix::Dense<InputValueType>* b,
                         matrix::Dense<OutputValueType>* c,
@@ -453,7 +453,7 @@ bool try_sparselib_spmv(std::shared_ptr<const CudaExecutor> exec,
 }
 
 template <typename ValueType, typename IndexType>
-bool try_sparselib_spmv(std::shared_ptr<const CudaExecutor> exec,
+bool try_sparselib_spmv(std::shared_ptr<const DefaultExecutor> exec,
                         const matrix::Csr<ValueType, IndexType>* a,
                         const matrix::Dense<ValueType>* b,
                         matrix::Dense<ValueType>* c,
@@ -479,7 +479,7 @@ bool try_sparselib_spmv(std::shared_ptr<const CudaExecutor> exec,
 
 template <typename MatrixValueType, typename InputValueType,
           typename OutputValueType, typename IndexType>
-void spmv(std::shared_ptr<const CudaExecutor> exec,
+void spmv(std::shared_ptr<const DefaultExecutor> exec,
           const matrix::Csr<MatrixValueType, IndexType>* a,
           const matrix::Dense<InputValueType>* b,
           matrix::Dense<OutputValueType>* c)
@@ -536,7 +536,7 @@ void spmv(std::shared_ptr<const CudaExecutor> exec,
 
 template <typename MatrixValueType, typename InputValueType,
           typename OutputValueType, typename IndexType>
-void advanced_spmv(std::shared_ptr<const CudaExecutor> exec,
+void advanced_spmv(std::shared_ptr<const DefaultExecutor> exec,
                    const matrix::Dense<MatrixValueType>* alpha,
                    const matrix::Csr<MatrixValueType, IndexType>* a,
                    const matrix::Dense<InputValueType>* b,
@@ -597,7 +597,7 @@ void advanced_spmv(std::shared_ptr<const CudaExecutor> exec,
 
 
 template <typename ValueType, typename IndexType>
-void spgemm(std::shared_ptr<const CudaExecutor> exec,
+void spgemm(std::shared_ptr<const DefaultExecutor> exec,
             const matrix::Csr<ValueType, IndexType>* a,
             const matrix::Csr<ValueType, IndexType>* b,
             matrix::Csr<ValueType, IndexType>* c)
@@ -719,56 +719,8 @@ void spgemm(std::shared_ptr<const CudaExecutor> exec,
 }
 
 
-namespace {
-
-
-template <int subwarp_size, typename ValueType, typename IndexType>
-void spgeam(syn::value_list<int, subwarp_size>,
-            std::shared_ptr<const DefaultExecutor> exec, const ValueType* alpha,
-            const IndexType* a_row_ptrs, const IndexType* a_col_idxs,
-            const ValueType* a_vals, const ValueType* beta,
-            const IndexType* b_row_ptrs, const IndexType* b_col_idxs,
-            const ValueType* b_vals, matrix::Csr<ValueType, IndexType>* c)
-{
-    auto m = static_cast<IndexType>(c->get_size()[0]);
-    auto c_row_ptrs = c->get_row_ptrs();
-    // count nnz for alpha * A + beta * B
-    auto subwarps_per_block = default_block_size / subwarp_size;
-    auto num_blocks = ceildiv(m, subwarps_per_block);
-    if (num_blocks > 0) {
-        kernel::spgeam_nnz<subwarp_size>
-            <<<num_blocks, default_block_size, 0, exec->get_stream()>>>(
-                a_row_ptrs, a_col_idxs, b_row_ptrs, b_col_idxs, m, c_row_ptrs);
-    }
-
-    // build row pointers
-    components::prefix_sum_nonnegative(exec, c_row_ptrs, m + 1);
-
-    // accumulate non-zeros for alpha * A + beta * B
-    matrix::CsrBuilder<ValueType, IndexType> c_builder{c};
-    auto c_nnz = exec->copy_val_to_host(c_row_ptrs + m);
-    c_builder.get_col_idx_array().resize_and_reset(c_nnz);
-    c_builder.get_value_array().resize_and_reset(c_nnz);
-    auto c_col_idxs = c->get_col_idxs();
-    auto c_vals = c->get_values();
-    if (num_blocks > 0) {
-        kernel::spgeam<subwarp_size>
-            <<<num_blocks, default_block_size, 0, exec->get_stream()>>>(
-                as_device_type(alpha), a_row_ptrs, a_col_idxs,
-                as_device_type(a_vals), as_device_type(beta), b_row_ptrs,
-                b_col_idxs, as_device_type(b_vals), m, c_row_ptrs, c_col_idxs,
-                as_device_type(c_vals));
-    }
-}
-
-GKO_ENABLE_IMPLEMENTATION_SELECTION(select_spgeam, spgeam);
-
-
-}  // namespace
-
-
 template <typename ValueType, typename IndexType>
-void advanced_spgemm(std::shared_ptr<const CudaExecutor> exec,
+void advanced_spgemm(std::shared_ptr<const DefaultExecutor> exec,
                      const matrix::Dense<ValueType>* alpha,
                      const matrix::Csr<ValueType, IndexType>* a,
                      const matrix::Csr<ValueType, IndexType>* b,
@@ -914,54 +866,7 @@ void advanced_spgemm(std::shared_ptr<const CudaExecutor> exec,
 
 
 template <typename ValueType, typename IndexType>
-void spgeam(std::shared_ptr<const DefaultExecutor> exec,
-            const matrix::Dense<ValueType>* alpha,
-            const matrix::Csr<ValueType, IndexType>* a,
-            const matrix::Dense<ValueType>* beta,
-            const matrix::Csr<ValueType, IndexType>* b,
-            matrix::Csr<ValueType, IndexType>* c)
-{
-    auto total_nnz =
-        a->get_num_stored_elements() + b->get_num_stored_elements();
-    auto nnz_per_row = total_nnz / a->get_size()[0];
-    select_spgeam(
-        spgeam_kernels(),
-        [&](int compiled_subwarp_size) {
-            return compiled_subwarp_size >= nnz_per_row ||
-                   compiled_subwarp_size == config::warp_size;
-        },
-        syn::value_list<int>(), syn::type_list<>(), exec,
-        alpha->get_const_values(), a->get_const_row_ptrs(),
-        a->get_const_col_idxs(), a->get_const_values(),
-        beta->get_const_values(), b->get_const_row_ptrs(),
-        b->get_const_col_idxs(), b->get_const_values(), c);
-}
-
-
-template <typename ValueType, typename IndexType>
-void fill_in_dense(std::shared_ptr<const CudaExecutor> exec,
-                   const matrix::Csr<ValueType, IndexType>* source,
-                   matrix::Dense<ValueType>* result)
-{
-    const auto num_rows = result->get_size()[0];
-    const auto num_cols = result->get_size()[1];
-    const auto stride = result->get_stride();
-    const auto row_ptrs = source->get_const_row_ptrs();
-    const auto col_idxs = source->get_const_col_idxs();
-    const auto vals = source->get_const_values();
-
-    auto grid_dim = ceildiv(num_rows, default_block_size);
-    if (grid_dim > 0) {
-        kernel::fill_in_dense<<<grid_dim, default_block_size, 0,
-                                exec->get_stream()>>>(
-            num_rows, as_device_type(row_ptrs), as_device_type(col_idxs),
-            as_device_type(vals), stride, as_device_type(result->get_values()));
-    }
-}
-
-
-template <typename ValueType, typename IndexType>
-void transpose(std::shared_ptr<const CudaExecutor> exec,
+void transpose(std::shared_ptr<const DefaultExecutor> exec,
                const matrix::Csr<ValueType, IndexType>* orig,
                matrix::Csr<ValueType, IndexType>* trans)
 {
@@ -1010,7 +915,7 @@ void transpose(std::shared_ptr<const CudaExecutor> exec,
 
 
 template <typename ValueType, typename IndexType>
-void conj_transpose(std::shared_ptr<const CudaExecutor> exec,
+void conj_transpose(std::shared_ptr<const DefaultExecutor> exec,
                     const matrix::Csr<ValueType, IndexType>* orig,
                     matrix::Csr<ValueType, IndexType>* trans)
 {
@@ -1067,160 +972,7 @@ void conj_transpose(std::shared_ptr<const CudaExecutor> exec,
 
 
 template <typename ValueType, typename IndexType>
-void inv_symm_permute(std::shared_ptr<const CudaExecutor> exec,
-                      const IndexType* perm,
-                      const matrix::Csr<ValueType, IndexType>* orig,
-                      matrix::Csr<ValueType, IndexType>* permuted)
-{
-    auto num_rows = orig->get_size()[0];
-    auto count_num_blocks = ceildiv(num_rows, default_block_size);
-    if (count_num_blocks > 0) {
-        kernel::inv_row_ptr_permute<<<count_num_blocks, default_block_size, 0,
-                                      exec->get_stream()>>>(
-            num_rows, perm, orig->get_const_row_ptrs(),
-            permuted->get_row_ptrs());
-    }
-    components::prefix_sum_nonnegative(exec, permuted->get_row_ptrs(),
-                                       num_rows + 1);
-    auto copy_num_blocks =
-        ceildiv(num_rows, default_block_size / config::warp_size);
-    if (copy_num_blocks > 0) {
-        kernel::inv_symm_permute<config::warp_size>
-            <<<copy_num_blocks, default_block_size, 0, exec->get_stream()>>>(
-                num_rows, perm, orig->get_const_row_ptrs(),
-                orig->get_const_col_idxs(),
-                as_device_type(orig->get_const_values()),
-                permuted->get_row_ptrs(), permuted->get_col_idxs(),
-                as_device_type(permuted->get_values()));
-    }
-}
-
-
-template <typename ValueType, typename IndexType>
-void row_permute(std::shared_ptr<const CudaExecutor> exec,
-                 const IndexType* perm,
-                 const matrix::Csr<ValueType, IndexType>* orig,
-                 matrix::Csr<ValueType, IndexType>* row_permuted)
-{
-    auto num_rows = orig->get_size()[0];
-    auto count_num_blocks = ceildiv(num_rows, default_block_size);
-    if (count_num_blocks > 0) {
-        kernel::row_ptr_permute<<<count_num_blocks, default_block_size, 0,
-                                  exec->get_stream()>>>(
-            num_rows, perm, orig->get_const_row_ptrs(),
-            row_permuted->get_row_ptrs());
-    }
-    components::prefix_sum_nonnegative(exec, row_permuted->get_row_ptrs(),
-                                       num_rows + 1);
-    auto copy_num_blocks =
-        ceildiv(num_rows, default_block_size / config::warp_size);
-    if (copy_num_blocks > 0) {
-        kernel::row_permute<config::warp_size>
-            <<<copy_num_blocks, default_block_size, 0, exec->get_stream()>>>(
-                num_rows, perm, orig->get_const_row_ptrs(),
-                orig->get_const_col_idxs(),
-                as_device_type(orig->get_const_values()),
-                row_permuted->get_row_ptrs(), row_permuted->get_col_idxs(),
-                as_device_type(row_permuted->get_values()));
-    }
-}
-
-
-template <typename ValueType, typename IndexType>
-void inverse_row_permute(std::shared_ptr<const CudaExecutor> exec,
-                         const IndexType* perm,
-                         const matrix::Csr<ValueType, IndexType>* orig,
-                         matrix::Csr<ValueType, IndexType>* row_permuted)
-{
-    auto num_rows = orig->get_size()[0];
-    auto count_num_blocks = ceildiv(num_rows, default_block_size);
-    if (count_num_blocks > 0) {
-        kernel::inv_row_ptr_permute<<<count_num_blocks, default_block_size, 0,
-                                      exec->get_stream()>>>(
-            num_rows, perm, orig->get_const_row_ptrs(),
-            row_permuted->get_row_ptrs());
-    }
-    components::prefix_sum_nonnegative(exec, row_permuted->get_row_ptrs(),
-                                       num_rows + 1);
-    auto copy_num_blocks =
-        ceildiv(num_rows, default_block_size / config::warp_size);
-    if (copy_num_blocks > 0) {
-        kernel::inv_row_permute<config::warp_size>
-            <<<copy_num_blocks, default_block_size, 0, exec->get_stream()>>>(
-                num_rows, perm, orig->get_const_row_ptrs(),
-                orig->get_const_col_idxs(),
-                as_device_type(orig->get_const_values()),
-                row_permuted->get_row_ptrs(), row_permuted->get_col_idxs(),
-                as_device_type(row_permuted->get_values()));
-    }
-}
-
-
-template <typename ValueType, typename IndexType>
-void calculate_nonzeros_per_row_in_span(
-    std::shared_ptr<const DefaultExecutor> exec,
-    const matrix::Csr<ValueType, IndexType>* source, const span& row_span,
-    const span& col_span, array<IndexType>* row_nnz)
-{
-    const auto num_rows = source->get_size()[0];
-    auto row_ptrs = source->get_const_row_ptrs();
-    auto col_idxs = source->get_const_col_idxs();
-    auto grid_dim = ceildiv(row_span.length(), default_block_size);
-    if (grid_dim > 0) {
-        kernel::calculate_nnz_per_row_in_span<<<grid_dim, default_block_size, 0,
-                                                exec->get_stream()>>>(
-            row_span, col_span, as_device_type(row_ptrs),
-            as_device_type(col_idxs), as_device_type(row_nnz->get_data()));
-    }
-}
-
-
-template <typename ValueType, typename IndexType>
-void compute_submatrix(std::shared_ptr<const DefaultExecutor> exec,
-                       const matrix::Csr<ValueType, IndexType>* source,
-                       gko::span row_span, gko::span col_span,
-                       matrix::Csr<ValueType, IndexType>* result)
-{
-    auto row_offset = row_span.begin;
-    auto col_offset = col_span.begin;
-    auto num_rows = result->get_size()[0];
-    auto num_cols = result->get_size()[1];
-    auto row_ptrs = source->get_const_row_ptrs();
-    auto grid_dim = ceildiv(num_rows, default_block_size);
-    if (grid_dim > 0) {
-        kernel::compute_submatrix_idxs_and_vals<<<grid_dim, default_block_size,
-                                                  0, exec->get_stream()>>>(
-            num_rows, num_cols, row_offset, col_offset,
-            as_device_type(source->get_const_row_ptrs()),
-            as_device_type(source->get_const_col_idxs()),
-            as_device_type(source->get_const_values()),
-            as_device_type(result->get_const_row_ptrs()),
-            as_device_type(result->get_col_idxs()),
-            as_device_type(result->get_values()));
-    }
-}
-
-
-template <typename ValueType, typename IndexType>
-void calculate_nonzeros_per_row_in_index_set(
-    std::shared_ptr<const DefaultExecutor> exec,
-    const matrix::Csr<ValueType, IndexType>* source,
-    const gko::index_set<IndexType>& row_index_set,
-    const gko::index_set<IndexType>& col_index_set,
-    IndexType* row_nnz) GKO_NOT_IMPLEMENTED;
-
-
-template <typename ValueType, typename IndexType>
-void compute_submatrix_from_index_set(
-    std::shared_ptr<const DefaultExecutor> exec,
-    const matrix::Csr<ValueType, IndexType>* source,
-    const gko::index_set<IndexType>& row_index_set,
-    const gko::index_set<IndexType>& col_index_set,
-    matrix::Csr<ValueType, IndexType>* result) GKO_NOT_IMPLEMENTED;
-
-
-template <typename ValueType, typename IndexType>
-void sort_by_column_index(std::shared_ptr<const CudaExecutor> exec,
+void sort_by_column_index(std::shared_ptr<const DefaultExecutor> exec,
                           matrix::Csr<ValueType, IndexType>* to_sort)
 {
     if (cusparse::is_supported<ValueType, IndexType>::value) {
@@ -1271,95 +1023,6 @@ void sort_by_column_index(std::shared_ptr<const CudaExecutor> exec,
 }
 
 
-template <typename ValueType, typename IndexType>
-void is_sorted_by_column_index(
-    std::shared_ptr<const CudaExecutor> exec,
-    const matrix::Csr<ValueType, IndexType>* to_check, bool* is_sorted)
-{
-    *is_sorted = true;
-    auto cpu_array = make_array_view(exec->get_master(), 1, is_sorted);
-    auto gpu_array = array<bool>{exec, cpu_array};
-    auto block_size = default_block_size;
-    auto num_rows = static_cast<IndexType>(to_check->get_size()[0]);
-    auto num_blocks = ceildiv(num_rows, block_size);
-    if (num_blocks > 0) {
-        kernel::
-            check_unsorted<<<num_blocks, block_size, 0, exec->get_stream()>>>(
-                to_check->get_const_row_ptrs(), to_check->get_const_col_idxs(),
-                num_rows, gpu_array.get_data());
-    }
-    cpu_array = gpu_array;
-}
-
-
-template <typename ValueType, typename IndexType>
-void extract_diagonal(std::shared_ptr<const CudaExecutor> exec,
-                      const matrix::Csr<ValueType, IndexType>* orig,
-                      matrix::Diagonal<ValueType>* diag)
-{
-    const auto nnz = orig->get_num_stored_elements();
-    const auto diag_size = diag->get_size()[0];
-    const auto num_blocks =
-        ceildiv(config::warp_size * diag_size, default_block_size);
-
-    const auto orig_values = orig->get_const_values();
-    const auto orig_row_ptrs = orig->get_const_row_ptrs();
-    const auto orig_col_idxs = orig->get_const_col_idxs();
-    auto diag_values = diag->get_values();
-
-    if (num_blocks > 0) {
-        kernel::extract_diagonal<<<num_blocks, default_block_size, 0,
-                                   exec->get_stream()>>>(
-            diag_size, nnz, as_device_type(orig_values),
-            as_device_type(orig_row_ptrs), as_device_type(orig_col_idxs),
-            as_device_type(diag_values));
-    }
-}
-
-
-template <typename ValueType, typename IndexType>
-void check_diagonal_entries_exist(
-    std::shared_ptr<const CudaExecutor> exec,
-    const matrix::Csr<ValueType, IndexType>* const mtx, bool& has_all_diags)
-{
-    const auto num_diag = static_cast<IndexType>(
-        std::min(mtx->get_size()[0], mtx->get_size()[1]));
-    if (num_diag > 0) {
-        const IndexType num_blocks =
-            ceildiv(num_diag, default_block_size / config::warp_size);
-        array<bool> has_diags(exec, {true});
-        kernel::check_diagonal_entries<<<num_blocks, default_block_size, 0,
-                                         exec->get_stream()>>>(
-            num_diag, mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(),
-            has_diags.get_data());
-        has_all_diags = exec->copy_val_to_host(has_diags.get_const_data());
-    } else {
-        has_all_diags = true;
-    }
-}
-
-
-template <typename ValueType, typename IndexType>
-void add_scaled_identity(std::shared_ptr<const CudaExecutor> exec,
-                         const matrix::Dense<ValueType>* const alpha,
-                         const matrix::Dense<ValueType>* const beta,
-                         matrix::Csr<ValueType, IndexType>* const mtx)
-{
-    const auto nrows = mtx->get_size()[0];
-    if (nrows == 0) {
-        return;
-    }
-    const auto nthreads = nrows * config::warp_size;
-    const auto nblocks = ceildiv(nthreads, default_block_size);
-    kernel::add_scaled_identity<<<nblocks, default_block_size, 0,
-                                  exec->get_stream()>>>(
-        as_device_type(alpha->get_const_values()),
-        as_device_type(beta->get_const_values()), static_cast<IndexType>(nrows),
-        mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(),
-        as_device_type(mtx->get_values()));
-}
-
-
 }  // namespace csr
 }  // namespace cuda
 }  // namespace kernels
diff --git a/dpcpp/matrix/csr_kernels.dp.cpp b/dpcpp/matrix/csr_kernels.dp.cpp
index 46e8894fdac..f05692c1929 100644
--- a/dpcpp/matrix/csr_kernels.dp.cpp
+++ b/dpcpp/matrix/csr_kernels.dp.cpp
@@ -1102,6 +1102,35 @@ void inv_symm_permute_kernel(size_type num_rows,
     }
 }
 
+
+template <int subgroup_size, typename ValueType, typename IndexType>
+void inv_nonsymm_permute_kernel(size_type num_rows,
+                                const IndexType* __restrict__ row_permutation,
+                                const IndexType* __restrict__ col_permutation,
+                                const IndexType* __restrict__ in_row_ptrs,
+                                const IndexType* __restrict__ in_cols,
+                                const ValueType* __restrict__ in_vals,
+                                const IndexType* __restrict__ out_row_ptrs,
+                                IndexType* __restrict__ out_cols,
+                                ValueType* __restrict__ out_vals,
+                                sycl::nd_item<3> item_ct1)
+{
+    auto tid = thread::get_subwarp_id_flat<subgroup_size>(item_ct1);
+    if (tid >= num_rows) {
+        return;
+    }
+    auto lane = item_ct1.get_local_id(2) % subgroup_size;
+    auto in_row = tid;
+    auto out_row = row_permutation[tid];
+    auto in_begin = in_row_ptrs[in_row];
+    auto in_size = in_row_ptrs[in_row + 1] - in_begin;
+    auto out_begin = out_row_ptrs[out_row];
+    for (IndexType i = lane; i < in_size; i += subgroup_size) {
+        out_cols[out_begin + i] = col_permutation[in_cols[in_begin + i]];
+        out_vals[out_begin + i] = in_vals[in_begin + i];
+    }
+}
+
 template <int subgroup_size, typename ValueType, typename IndexType>
 void inv_symm_permute_kernel(dim3 grid, dim3 block,
                              size_type dynamic_shared_memory,
@@ -1122,6 +1151,25 @@ void inv_symm_permute_kernel(dim3 grid, dim3 block,
     });
 }
 
+template <int subgroup_size, typename ValueType, typename IndexType>
+void inv_nonsymm_permute_kernel(
+    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
+    size_type num_rows, const IndexType* row_permutation,
+    const IndexType* col_permutation, const IndexType* in_row_ptrs,
+    const IndexType* in_cols, const ValueType* in_vals,
+    const IndexType* out_row_ptrs, IndexType* out_cols, ValueType* out_vals)
+{
+    queue->submit([&](sycl::handler& cgh) {
+        cgh.parallel_for(sycl_nd_range(grid, block),
+                         [=](sycl::nd_item<3> item_ct1) {
+                             inv_nonsymm_permute_kernel<subgroup_size>(
+                                 num_rows, row_permutation, col_permutation,
+                                 in_row_ptrs, in_cols, in_vals, out_row_ptrs,
+                                 out_cols, out_vals, item_ct1);
+                         });
+    });
+}
+
 namespace host_kernel {
 
 
@@ -2266,6 +2314,33 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL);
 
 
+template <typename ValueType, typename IndexType>
+void inv_nonsymm_permute(std::shared_ptr<const DpcppExecutor> exec,
+                         const IndexType* row_perm, const IndexType* col_perm,
+                         const matrix::Csr<ValueType, IndexType>* orig,
+                         matrix::Csr<ValueType, IndexType>* permuted)
+{
+    auto num_rows = orig->get_size()[0];
+    auto count_num_blocks = ceildiv(num_rows, default_block_size);
+    inv_row_ptr_permute_kernel(
+        count_num_blocks, default_block_size, 0, exec->get_queue(), num_rows,
+        row_perm, orig->get_const_row_ptrs(), permuted->get_row_ptrs());
+    components::prefix_sum_nonnegative(exec, permuted->get_row_ptrs(),
+                                       num_rows + 1);
+    auto copy_num_blocks =
+        ceildiv(num_rows, default_block_size / config::warp_size);
+    inv_symm_permute_kernel<config::warp_size>(
+        copy_num_blocks, default_block_size, 0, exec->get_queue(), num_rows,
+        row_perm, col_perm, orig->get_const_row_ptrs(),
+        orig->get_const_col_idxs(), orig->get_const_values(),
+        permuted->get_row_ptrs(), permuted->get_col_idxs(),
+        permuted->get_values());
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_INV_NONSYMM_PERMUTE_KERNEL);
+
+
 template <typename ValueType, typename IndexType>
 void row_permute(std::shared_ptr<const DpcppExecutor> exec,
                  const IndexType* perm,
@@ -2293,10 +2368,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
 
 
 template <typename ValueType, typename IndexType>
-void inverse_row_permute(std::shared_ptr<const DpcppExecutor> exec,
-                         const IndexType* perm,
-                         const matrix::Csr<ValueType, IndexType>* orig,
-                         matrix::Csr<ValueType, IndexType>* row_permuted)
+void inv_row_permute(std::shared_ptr<const DpcppExecutor> exec,
+                     const IndexType* perm,
+                     const matrix::Csr<ValueType, IndexType>* orig,
+                     matrix::Csr<ValueType, IndexType>* row_permuted)
 {
     auto num_rows = orig->get_size()[0];
     auto count_num_blocks = ceildiv(num_rows, default_block_size);
@@ -2315,7 +2390,7 @@ void inverse_row_permute(std::shared_ptr<const DpcppExecutor> exec,
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL);
+    GKO_DECLARE_CSR_INV_ROW_PERMUTE_KERNEL);
 
 
 template <typename ValueType, typename IndexType>
diff --git a/examples/distributed-solver/distributed-solver.cpp b/examples/distributed-solver/distributed-solver.cpp
index 1b758d186a4..7eafba783ee 100644
--- a/examples/distributed-solver/distributed-solver.cpp
+++ b/examples/distributed-solver/distributed-solver.cpp
@@ -119,15 +119,14 @@ int main(int argc, char* argv[])
                  int device_id = gko::experimental::mpi::map_rank_to_device_id(
                      comm, gko::CudaExecutor::get_num_devices());
                  return gko::CudaExecutor::create(
-                     device_id, gko::ReferenceExecutor::create(), false,
-                     gko::allocation_mode::device);
+                     device_id, gko::ReferenceExecutor::create());
              }},
             {"hip",
              [](MPI_Comm comm) {
                  int device_id = gko::experimental::mpi::map_rank_to_device_id(
                      comm, gko::HipExecutor::get_num_devices());
                  return gko::HipExecutor::create(
-                     device_id, gko::ReferenceExecutor::create(), true);
+                     device_id, gko::ReferenceExecutor::create());
              }},
             {"dpcpp", [](MPI_Comm comm) {
                  int device_id = 0;
diff --git a/hip/matrix/csr_kernels.instantiate.hip.cpp b/hip/matrix/csr_kernels.instantiate.hip.cpp
index 9a6c29206de..156b170311f 100644
--- a/hip/matrix/csr_kernels.instantiate.hip.cpp
+++ b/hip/matrix/csr_kernels.instantiate.hip.cpp
@@ -117,12 +117,22 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_CSR_BUILD_LOOKUP_KERNEL);
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEAM_KERNEL);
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_CSR_FILL_IN_DENSE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_INV_NONSYMM_PERMUTE_KERNEL);
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL);
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL);
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL);
+    GKO_DECLARE_CSR_INV_ROW_PERMUTE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_INV_NONSYMM_SCALE_PERMUTE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_INV_SYMM_SCALE_PERMUTE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_ROW_SCALE_PERMUTE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_INV_ROW_SCALE_PERMUTE_KERNEL);
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL);
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
diff --git a/hip/matrix/csr_kernels.template.hip.cpp b/hip/matrix/csr_kernels.template.hip.cpp
index 5e4de7b9699..52101385c92 100644
--- a/hip/matrix/csr_kernels.template.hip.cpp
+++ b/hip/matrix/csr_kernels.template.hip.cpp
@@ -629,54 +629,6 @@ void spgemm(std::shared_ptr<const HipExecutor> exec,
 }
 
 
-namespace {
-
-
-template <int subwarp_size, typename ValueType, typename IndexType>
-void spgeam(syn::value_list<int, subwarp_size>,
-            std::shared_ptr<const HipExecutor> exec, const ValueType* alpha,
-            const IndexType* a_row_ptrs, const IndexType* a_col_idxs,
-            const ValueType* a_vals, const ValueType* beta,
-            const IndexType* b_row_ptrs, const IndexType* b_col_idxs,
-            const ValueType* b_vals, matrix::Csr<ValueType, IndexType>* c)
-{
-    auto m = static_cast<IndexType>(c->get_size()[0]);
-    auto c_row_ptrs = c->get_row_ptrs();
-    // count nnz for alpha * A + beta * B
-    auto subwarps_per_block = default_block_size / subwarp_size;
-    auto num_blocks = ceildiv(m, subwarps_per_block);
-    if (num_blocks > 0) {
-        kernel::spgeam_nnz<subwarp_size>
-            <<<num_blocks, default_block_size, 0, exec->get_stream()>>>(
-                a_row_ptrs, a_col_idxs, b_row_ptrs, b_col_idxs, m, c_row_ptrs);
-    }
-
-    // build row pointers
-    components::prefix_sum_nonnegative(exec, c_row_ptrs, m + 1);
-
-    // accumulate non-zeros for alpha * A + beta * B
-    matrix::CsrBuilder<ValueType, IndexType> c_builder{c};
-    auto c_nnz = exec->copy_val_to_host(c_row_ptrs + m);
-    c_builder.get_col_idx_array().resize_and_reset(c_nnz);
-    c_builder.get_value_array().resize_and_reset(c_nnz);
-    auto c_col_idxs = c->get_col_idxs();
-    auto c_vals = c->get_values();
-    if (num_blocks > 0) {
-        kernel::spgeam<subwarp_size>
-            <<<num_blocks, default_block_size, 0, exec->get_stream()>>>(
-                as_device_type(alpha), a_row_ptrs, a_col_idxs,
-                as_device_type(a_vals), as_device_type(beta), b_row_ptrs,
-                b_col_idxs, as_device_type(b_vals), m, c_row_ptrs, c_col_idxs,
-                as_device_type(c_vals));
-    }
-}
-
-GKO_ENABLE_IMPLEMENTATION_SELECTION(select_spgeam, spgeam);
-
-
-}  // namespace
-
-
 template <typename ValueType, typename IndexType>
 void advanced_spgemm(std::shared_ptr<const HipExecutor> exec,
                      const matrix::Dense<ValueType>* alpha,
@@ -768,53 +720,6 @@ void advanced_spgemm(std::shared_ptr<const HipExecutor> exec,
 }
 
 
-template <typename ValueType, typename IndexType>
-void spgeam(std::shared_ptr<const DefaultExecutor> exec,
-            const matrix::Dense<ValueType>* alpha,
-            const matrix::Csr<ValueType, IndexType>* a,
-            const matrix::Dense<ValueType>* beta,
-            const matrix::Csr<ValueType, IndexType>* b,
-            matrix::Csr<ValueType, IndexType>* c)
-{
-    auto total_nnz =
-        a->get_num_stored_elements() + b->get_num_stored_elements();
-    auto nnz_per_row = total_nnz / a->get_size()[0];
-    select_spgeam(
-        spgeam_kernels(),
-        [&](int compiled_subwarp_size) {
-            return compiled_subwarp_size >= nnz_per_row ||
-                   compiled_subwarp_size == config::warp_size;
-        },
-        syn::value_list<int>(), syn::type_list<>(), exec,
-        alpha->get_const_values(), a->get_const_row_ptrs(),
-        a->get_const_col_idxs(), a->get_const_values(),
-        beta->get_const_values(), b->get_const_row_ptrs(),
-        b->get_const_col_idxs(), b->get_const_values(), c);
-}
-
-
-template <typename ValueType, typename IndexType>
-void fill_in_dense(std::shared_ptr<const HipExecutor> exec,
-                   const matrix::Csr<ValueType, IndexType>* source,
-                   matrix::Dense<ValueType>* result)
-{
-    const auto num_rows = result->get_size()[0];
-    const auto num_cols = result->get_size()[1];
-    const auto stride = result->get_stride();
-    const auto row_ptrs = source->get_const_row_ptrs();
-    const auto col_idxs = source->get_const_col_idxs();
-    const auto vals = source->get_const_values();
-
-    auto grid_dim = ceildiv(num_rows, default_block_size);
-    if (grid_dim > 0) {
-        kernel::fill_in_dense<<<grid_dim, default_block_size, 0,
-                                exec->get_stream()>>>(
-            num_rows, as_device_type(row_ptrs), as_device_type(col_idxs),
-            as_device_type(vals), stride, as_device_type(result->get_values()));
-    }
-}
-
-
 template <typename ValueType, typename IndexType>
 void transpose(std::shared_ptr<const HipExecutor> exec,
                const matrix::Csr<ValueType, IndexType>* orig,
@@ -871,159 +776,6 @@ void conj_transpose(std::shared_ptr<const HipExecutor> exec,
 }
 
 
-template <typename ValueType, typename IndexType>
-void inv_symm_permute(std::shared_ptr<const HipExecutor> exec,
-                      const IndexType* perm,
-                      const matrix::Csr<ValueType, IndexType>* orig,
-                      matrix::Csr<ValueType, IndexType>* permuted)
-{
-    auto num_rows = orig->get_size()[0];
-    auto count_num_blocks = ceildiv(num_rows, default_block_size);
-    if (count_num_blocks > 0) {
-        kernel::inv_row_ptr_permute<<<count_num_blocks, default_block_size, 0,
-                                      exec->get_stream()>>>(
-            num_rows, perm, orig->get_const_row_ptrs(),
-            permuted->get_row_ptrs());
-    }
-    components::prefix_sum_nonnegative(exec, permuted->get_row_ptrs(),
-                                       num_rows + 1);
-    auto copy_num_blocks =
-        ceildiv(num_rows, default_block_size / config::warp_size);
-    if (copy_num_blocks > 0) {
-        kernel::inv_symm_permute<config::warp_size>
-            <<<copy_num_blocks, default_block_size, 0, exec->get_stream()>>>(
-                num_rows, perm, orig->get_const_row_ptrs(),
-                orig->get_const_col_idxs(),
-                as_device_type(orig->get_const_values()),
-                permuted->get_row_ptrs(), permuted->get_col_idxs(),
-                as_device_type(permuted->get_values()));
-    }
-}
-
-
-template <typename ValueType, typename IndexType>
-void row_permute(std::shared_ptr<const HipExecutor> exec, const IndexType* perm,
-                 const matrix::Csr<ValueType, IndexType>* orig,
-                 matrix::Csr<ValueType, IndexType>* row_permuted)
-{
-    auto num_rows = orig->get_size()[0];
-    auto count_num_blocks = ceildiv(num_rows, default_block_size);
-    if (count_num_blocks > 0) {
-        kernel::row_ptr_permute<<<count_num_blocks, default_block_size, 0,
-                                  exec->get_stream()>>>(
-            num_rows, perm, orig->get_const_row_ptrs(),
-            row_permuted->get_row_ptrs());
-    }
-    components::prefix_sum_nonnegative(exec, row_permuted->get_row_ptrs(),
-                                       num_rows + 1);
-    auto copy_num_blocks =
-        ceildiv(num_rows, default_block_size / config::warp_size);
-    if (copy_num_blocks > 0) {
-        kernel::row_permute<config::warp_size>
-            <<<copy_num_blocks, default_block_size, 0, exec->get_stream()>>>(
-                num_rows, perm, orig->get_const_row_ptrs(),
-                orig->get_const_col_idxs(),
-                as_device_type(orig->get_const_values()),
-                row_permuted->get_row_ptrs(), row_permuted->get_col_idxs(),
-                as_device_type(row_permuted->get_values()));
-    }
-}
-
-
-template <typename ValueType, typename IndexType>
-void inverse_row_permute(std::shared_ptr<const HipExecutor> exec,
-                         const IndexType* perm,
-                         const matrix::Csr<ValueType, IndexType>* orig,
-                         matrix::Csr<ValueType, IndexType>* row_permuted)
-{
-    auto num_rows = orig->get_size()[0];
-    auto count_num_blocks = ceildiv(num_rows, default_block_size);
-    if (count_num_blocks > 0) {
-        kernel::inv_row_ptr_permute<<<count_num_blocks, default_block_size, 0,
-                                      exec->get_stream()>>>(
-            num_rows, perm, orig->get_const_row_ptrs(),
-            row_permuted->get_row_ptrs());
-    }
-    components::prefix_sum_nonnegative(exec, row_permuted->get_row_ptrs(),
-                                       num_rows + 1);
-    auto copy_num_blocks =
-        ceildiv(num_rows, default_block_size / config::warp_size);
-    if (copy_num_blocks > 0) {
-        kernel::inv_row_permute<config::warp_size>
-            <<<copy_num_blocks, default_block_size, 0, exec->get_stream()>>>(
-                num_rows, perm, orig->get_const_row_ptrs(),
-                orig->get_const_col_idxs(),
-                as_device_type(orig->get_const_values()),
-                row_permuted->get_row_ptrs(), row_permuted->get_col_idxs(),
-                as_device_type(row_permuted->get_values()));
-    }
-}
-
-
-template <typename ValueType, typename IndexType>
-void calculate_nonzeros_per_row_in_span(
-    std::shared_ptr<const DefaultExecutor> exec,
-    const matrix::Csr<ValueType, IndexType>* source, const span& row_span,
-    const span& col_span, array<IndexType>* row_nnz)
-{
-    const auto num_rows = source->get_size()[0];
-    auto row_ptrs = source->get_const_row_ptrs();
-    auto col_idxs = source->get_const_col_idxs();
-    auto grid_dim = ceildiv(row_span.length(), default_block_size);
-
-    if (grid_dim > 0) {
-        kernel::calculate_nnz_per_row_in_span<<<grid_dim, default_block_size, 0,
-                                                exec->get_stream()>>>(
-            row_span, col_span, as_device_type(row_ptrs),
-            as_device_type(col_idxs), as_device_type(row_nnz->get_data()));
-    }
-}
-
-
-template <typename ValueType, typename IndexType>
-void compute_submatrix(std::shared_ptr<const DefaultExecutor> exec,
-                       const matrix::Csr<ValueType, IndexType>* source,
-                       gko::span row_span, gko::span col_span,
-                       matrix::Csr<ValueType, IndexType>* result)
-{
-    auto row_offset = row_span.begin;
-    auto col_offset = col_span.begin;
-    auto num_rows = result->get_size()[0];
-    auto num_cols = result->get_size()[1];
-    auto row_ptrs = source->get_const_row_ptrs();
-    auto grid_dim = ceildiv(num_rows, default_block_size);
-    if (grid_dim > 0) {
-        kernel::compute_submatrix_idxs_and_vals<<<grid_dim, default_block_size,
-                                                  0, exec->get_stream()>>>(
-            num_rows, num_cols, row_offset, col_offset,
-            as_device_type(source->get_const_row_ptrs()),
-            as_device_type(source->get_const_col_idxs()),
-            as_device_type(source->get_const_values()),
-            as_device_type(result->get_const_row_ptrs()),
-            as_device_type(result->get_col_idxs()),
-            as_device_type(result->get_values()));
-    }
-}
-
-
-template <typename ValueType, typename IndexType>
-void calculate_nonzeros_per_row_in_index_set(
-    std::shared_ptr<const DefaultExecutor> exec,
-    const matrix::Csr<ValueType, IndexType>* source,
-    const gko::index_set<IndexType>& row_index_set,
-    const gko::index_set<IndexType>& col_index_set,
-    IndexType* row_nnz) GKO_NOT_IMPLEMENTED;
-
-
-template <typename ValueType, typename IndexType>
-void compute_submatrix_from_index_set(
-    std::shared_ptr<const DefaultExecutor> exec,
-    const matrix::Csr<ValueType, IndexType>* source,
-    const gko::index_set<IndexType>& row_index_set,
-    const gko::index_set<IndexType>& col_index_set,
-    matrix::Csr<ValueType, IndexType>* result) GKO_NOT_IMPLEMENTED;
-
-
 template <typename ValueType, typename IndexType>
 void sort_by_column_index(std::shared_ptr<const HipExecutor> exec,
                           matrix::Csr<ValueType, IndexType>* to_sort)
@@ -1069,94 +821,6 @@ void sort_by_column_index(std::shared_ptr<const HipExecutor> exec,
 }
 
 
-template <typename ValueType, typename IndexType>
-void is_sorted_by_column_index(
-    std::shared_ptr<const HipExecutor> exec,
-    const matrix::Csr<ValueType, IndexType>* to_check, bool* is_sorted)
-{
-    *is_sorted = true;
-    auto cpu_array = make_array_view(exec->get_master(), 1, is_sorted);
-    auto gpu_array = array<bool>{exec, cpu_array};
-    auto block_size = default_block_size;
-    auto num_rows = static_cast<IndexType>(to_check->get_size()[0]);
-    auto num_blocks = ceildiv(num_rows, block_size);
-    if (num_blocks > 0) {
-        kernel::
-            check_unsorted<<<num_blocks, block_size, 0, exec->get_stream()>>>(
-                to_check->get_const_row_ptrs(), to_check->get_const_col_idxs(),
-                num_rows, gpu_array.get_data());
-    }
-    cpu_array = gpu_array;
-}
-
-
-template <typename ValueType, typename IndexType>
-void extract_diagonal(std::shared_ptr<const HipExecutor> exec,
-                      const matrix::Csr<ValueType, IndexType>* orig,
-                      matrix::Diagonal<ValueType>* diag)
-{
-    const auto nnz = orig->get_num_stored_elements();
-    const auto diag_size = diag->get_size()[0];
-    const auto num_blocks =
-        ceildiv(config::warp_size * diag_size, default_block_size);
-
-    const auto orig_values = orig->get_const_values();
-    const auto orig_row_ptrs = orig->get_const_row_ptrs();
-    const auto orig_col_idxs = orig->get_const_col_idxs();
-    auto diag_values = diag->get_values();
-    if (num_blocks > 0) {
-        kernel::extract_diagonal<<<num_blocks, default_block_size, 0,
-                                   exec->get_stream()>>>(
-            diag_size, nnz, as_device_type(orig_values),
-            as_device_type(orig_row_ptrs), as_device_type(orig_col_idxs),
-            as_device_type(diag_values));
-    }
-}
-
-
-template <typename ValueType, typename IndexType>
-void check_diagonal_entries_exist(
-    std::shared_ptr<const HipExecutor> exec,
-    const matrix::Csr<ValueType, IndexType>* const mtx, bool& has_all_diags)
-{
-    const auto num_diag = static_cast<IndexType>(
-        std::min(mtx->get_size()[0], mtx->get_size()[1]));
-    if (num_diag > 0) {
-        const IndexType num_blocks =
-            ceildiv(num_diag, default_block_size / config::warp_size);
-        array<bool> has_diags(exec, {true});
-        kernel::check_diagonal_entries<<<num_blocks, default_block_size, 0,
-                                         exec->get_stream()>>>(
-            num_diag, mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(),
-            has_diags.get_data());
-        has_all_diags = exec->copy_val_to_host(has_diags.get_const_data());
-    } else {
-        has_all_diags = true;
-    }
-}
-
-
-template <typename ValueType, typename IndexType>
-void add_scaled_identity(std::shared_ptr<const HipExecutor> exec,
-                         const matrix::Dense<ValueType>* const alpha,
-                         const matrix::Dense<ValueType>* const beta,
-                         matrix::Csr<ValueType, IndexType>* const mtx)
-{
-    const auto nrows = mtx->get_size()[0];
-    if (nrows == 0) {
-        return;
-    }
-    const auto nthreads = nrows * config::warp_size;
-    const auto nblocks = ceildiv(nthreads, default_block_size);
-    kernel::add_scaled_identity<<<nblocks, default_block_size, 0,
-                                  exec->get_stream()>>>(
-        as_device_type(alpha->get_const_values()),
-        as_device_type(beta->get_const_values()), static_cast<IndexType>(nrows),
-        mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(),
-        as_device_type(mtx->get_values()));
-}
-
-
 }  // namespace csr
 }  // namespace hip
 }  // namespace kernels
diff --git a/include/ginkgo/core/base/exception.hpp b/include/ginkgo/core/base/exception.hpp
index 8b270ed7a98..1a52b93c0bd 100644
--- a/include/ginkgo/core/base/exception.hpp
+++ b/include/ginkgo/core/base/exception.hpp
@@ -683,6 +683,7 @@ class UnsupportedMatrixProperty : public Error {
 };
 
 
+/** Exception thrown if an object is in an invalid state. */
 class InvalidStateError : public Error {
 public:
     /**
@@ -701,6 +702,25 @@ class InvalidStateError : public Error {
 };
 
 
+/** Exception thrown if an invalid valid was passed to a function. */
+class InvalidValueError : public Error {
+public:
+    /**
+     * Initializes an invalid value error.
+     *
+     * @param file  The name of the offending source file
+     * @param line  The source code line number where the error occurred
+     * @param func  The function name where the error occurred
+     * @param clarification  A message describing the invalid value
+     */
+    InvalidValueError(const std::string& file, int line,
+                      const std::string& func, const std::string& clarification)
+        : Error(file, line,
+                func + ": Invalid value encountered : " + clarification)
+    {}
+};
+
+
 }  // namespace gko
 
 
diff --git a/include/ginkgo/core/matrix/csr.hpp b/include/ginkgo/core/matrix/csr.hpp
index 834208c4322..b73459c1175 100644
--- a/include/ginkgo/core/matrix/csr.hpp
+++ b/include/ginkgo/core/matrix/csr.hpp
@@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/index_set.hpp>
 #include <ginkgo/core/base/lin_op.hpp>
 #include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/matrix/permutation.hpp>
 
 
 namespace gko {
@@ -59,9 +60,6 @@ class Ell;
 template <typename ValueType, typename IndexType>
 class Hybrid;
 
-template <typename IndexType>
-class Permutation;
-
 template <typename ValueType, typename IndexType>
 class ScaledPermutation;
 
diff --git a/include/ginkgo/core/matrix/dense.hpp b/include/ginkgo/core/matrix/dense.hpp
index 9c4799951f2..539480934d1 100644
--- a/include/ginkgo/core/matrix/dense.hpp
+++ b/include/ginkgo/core/matrix/dense.hpp
@@ -45,6 +45,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/range_accessors.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/base/utils.hpp>
+#include <ginkgo/core/matrix/permutation.hpp>
 
 
 namespace gko {
@@ -81,9 +82,6 @@ class Fbcsr;
 template <typename ValueType, typename IndexType>
 class Hybrid;
 
-template <typename IndexType>
-class Permutation;
-
 template <typename ValueType, typename IndexType>
 class ScaledPermutation;
 
@@ -492,7 +490,7 @@ class Dense
      */
     void permute(ptr_param<const Permutation<int32>> row_permutation,
                  ptr_param<const Permutation<int32>> column_permutation,
-                 ptr_param<Dense> output, bool invert) const;
+                 ptr_param<Dense> output, bool invert = false) const;
 
     /**
      * @copydoc permute(ptr_param<const Permutation<int32>>, ptr_param<const
@@ -500,7 +498,7 @@ class Dense
      */
     void permute(ptr_param<const Permutation<int64>> row_permutation,
                  ptr_param<const Permutation<int64>> column_permutation,
-                 ptr_param<Dense> output, bool invert) const;
+                 ptr_param<Dense> output, bool invert = false) const;
 
     /**
      * Creates a scaled and permuted copy of this matrix.
@@ -537,7 +535,7 @@ class Dense
      * @copydoc scale_permute(ptr_param<const ScaledPermutation<value_type,
      * int32>>, ptr_param<Dense>, permute_mode)
      */
-    std::unique_ptr<Dense> scale_permute(
+    void scale_permute(
         ptr_param<const ScaledPermutation<value_type, int64>> permutation,
         ptr_param<Dense> output, permute_mode mode) const;
 
@@ -575,22 +573,22 @@ class Dense
      * that writes the permuted copy into an existing Dense matrix.
      * @param output  the output matrix.
      */
-    std::unique_ptr<Dense> scale_permute(
+    void scale_permute(
         ptr_param<const ScaledPermutation<value_type, int32>> row_permutation,
         ptr_param<const ScaledPermutation<value_type, int32>>
             column_permutation,
-        ptr_param<Dense> output, bool invert) const;
+        ptr_param<Dense> output, bool invert = false) const;
 
     /**
      * @copydoc scale_permute(ptr_param<const ScaledPermutation<value_type,
      * int32>>, ptr_param<const ScaledPermutation<value_type, int32>>,
      * ptr_param<Dense>, bool)
      */
-    std::unique_ptr<Dense> scale_permute(
+    void scale_permute(
         ptr_param<const ScaledPermutation<value_type, int64>> row_permutation,
         ptr_param<const ScaledPermutation<value_type, int64>>
             column_permutation,
-        ptr_param<Dense> output, bool invert) const;
+        ptr_param<Dense> output, bool invert = false) const;
 
     std::unique_ptr<LinOp> permute(
         const array<int32>* permutation_indices) const override;
@@ -1469,19 +1467,24 @@ class Dense
     }
 
     template <typename IndexType>
-    void permute_impl(const array<IndexType>* permutation, Dense* output) const;
+    void permute_impl(const Permutation<IndexType>* permutation,
+                      permute_mode mode, Dense* output) const;
 
     template <typename IndexType>
-    void inverse_permute_impl(const array<IndexType>* permutation,
-                              Dense* output) const;
+    void permute_impl(const Permutation<IndexType>* row_permutation,
+                      const Permutation<IndexType>* col_permutation,
+                      bool invert, Dense* output) const;
 
     template <typename IndexType>
-    void row_permute_impl(const array<IndexType>* permutation,
-                          Dense* output) const;
+    void scale_permute_impl(
+        const ScaledPermutation<ValueType, IndexType>* permutation,
+        permute_mode mode, Dense* output) const;
 
     template <typename IndexType>
-    void inverse_row_permute_impl(const array<IndexType>* permutation,
-                                  Dense* output) const;
+    void scale_permute_impl(
+        const ScaledPermutation<ValueType, IndexType>* row_permutation,
+        const ScaledPermutation<ValueType, IndexType>* column_permutation,
+        bool invert, Dense* output) const;
 
     template <typename OutputType, typename IndexType>
     void row_gather_impl(const array<IndexType>* row_idxs,
@@ -1493,14 +1496,6 @@ class Dense
                          const Dense<ValueType>* beta,
                          Dense<OutputType>* row_collection) const;
 
-    template <typename IndexType>
-    void column_permute_impl(const array<IndexType>* permutation,
-                             Dense* output) const;
-
-    template <typename IndexType>
-    void inverse_column_permute_impl(const array<IndexType>* permutation,
-                                     Dense* output) const;
-
 private:
     array<value_type> values_;
     size_type stride_;
diff --git a/include/ginkgo/core/matrix/permutation.hpp b/include/ginkgo/core/matrix/permutation.hpp
index 163160a2af6..b577481345b 100644
--- a/include/ginkgo/core/matrix/permutation.hpp
+++ b/include/ginkgo/core/matrix/permutation.hpp
@@ -52,6 +52,78 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace gko {
 namespace matrix {
 
+
+/** Specifies how a permutation will be applied to a matrix. */
+enum class permute_mode {
+    /** Neither rows nor columns will be permuted. */
+    none = 0b0,
+    /** The rows will be permuted. */
+    rows = 0b1,
+    /** The columns will be permuted. */
+    columns = 0b10,
+    /**
+     * The rows and columns will be permuted. This is equivalent to
+     * `permute_mode::rows | permute_mode::columns`.
+     */
+    symmetric = 0b11,
+    /** The permutation will be inverted before being applied. */
+    inverse = 0b100,
+    /**
+     * The rows will be permuted using the inverse permutation. This is
+     * equivalent to `permute_mode::rows | permute_mode::inverse`.
+     */
+    inverse_rows = 0b101,
+    /**
+     * The columns will be permuted using the inverse permutation. This is
+     * equivalent to `permute_mode::columns | permute_mode::inverse`.
+     */
+    inverse_columns = 0b110,
+    /**
+     * The rows and columns will be permuted using the inverse permutation. This
+     * is equivalent to `permute_mode::symmetric | permute_mode::inverse`.
+     */
+    inverse_symmetric = 0b111
+};
+
+
+/** Combines two permutation modes. */
+inline permute_mode operator|(permute_mode a, permute_mode b)
+{
+    return static_cast<permute_mode>(static_cast<int>(a) | static_cast<int>(b));
+}
+
+
+/** Computes the intersection of two permutation modes. */
+inline permute_mode operator&(permute_mode a, permute_mode b)
+{
+    return static_cast<permute_mode>(static_cast<int>(a) & static_cast<int>(b));
+}
+
+
+inline std::ostream& operator<<(std::ostream& stream, permute_mode mode)
+{
+    switch (mode) {
+    case permute_mode::none:
+        return stream << "none";
+    case permute_mode::rows:
+        return stream << "rows";
+    case permute_mode::columns:
+        return stream << "columns";
+    case permute_mode::symmetric:
+        return stream << "symmetric";
+    case permute_mode::inverse:
+        return stream << "inverse";
+    case permute_mode::inverse_rows:
+        return stream << "inverse_rows";
+    case permute_mode::inverse_columns:
+        return stream << "inverse_columns";
+    case permute_mode::inverse_symmetric:
+        return stream << "inverse_symmetric";
+    }
+    return stream;
+}
+
+
 /** @internal std::bitset allows to store any number of bits */
 using mask_type = gko::uint64;
 
@@ -77,11 +149,14 @@ static constexpr mask_type inverse_permute = mask_type{1 << 3};
  */
 template <typename IndexType = int32>
 class Permutation : public EnableLinOp<Permutation<IndexType>>,
-                    public EnableCreateMethod<Permutation<IndexType>> {
+                    public EnableCreateMethod<Permutation<IndexType>>,
+                    public WritableToMatrixData<default_precision, IndexType> {
     friend class EnableCreateMethod<Permutation>;
     friend class EnablePolymorphicObject<Permutation, LinOp>;
 
 public:
+    // value_type is only available to enable the usage of gko::write
+    using value_type = default_precision;
     using index_type = IndexType;
 
     /**
@@ -110,7 +185,8 @@ class Permutation : public EnableLinOp<Permutation<IndexType>>,
      * @return the number of elements explicitly stored in the permutation
      * array.
      */
-    size_type get_permutation_size() const noexcept
+    [[deprecated("use get_size()[0] instead")]] size_type get_permutation_size()
+        const noexcept
     {
         return permutation_.get_num_elems();
     }
@@ -132,6 +208,16 @@ class Permutation : public EnableLinOp<Permutation<IndexType>>,
         enabled_permute_ = permute_mask;
     }
 
+    /**
+     * Returns the inverse permutation.
+     *
+     * @return a newly created Permutation object storing the inverse
+     *         permutation of this Permutation.
+     */
+    std::unique_ptr<Permutation> invert() const;
+
+    void write(gko::matrix_data<value_type, index_type>& data) const override;
+
     /**
      * Creates a constant (immutable) Permutation matrix from a constant array.
      *
@@ -214,7 +300,7 @@ class Permutation : public EnableLinOp<Permutation<IndexType>>,
         }
     }
 
-    void apply_impl(const LinOp* in, LinOp* out) const
+    void apply_impl(const LinOp* in, LinOp* out) const override
     {
         auto perm = as<Permutable<index_type>>(in);
         std::unique_ptr<gko::LinOp> tmp{};
@@ -248,7 +334,7 @@ class Permutation : public EnableLinOp<Permutation<IndexType>>,
 
 
     void apply_impl(const LinOp*, const LinOp* in, const LinOp*,
-                    LinOp* out) const
+                    LinOp* out) const override
     {
         // Ignores alpha and beta and just performs a normal permutation as an
         // advanced apply does not really make sense here.
diff --git a/include/ginkgo/core/matrix/scaled_permutation.hpp b/include/ginkgo/core/matrix/scaled_permutation.hpp
new file mode 100644
index 00000000000..0a5a2d781e7
--- /dev/null
+++ b/include/ginkgo/core/matrix/scaled_permutation.hpp
@@ -0,0 +1,177 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_PUBLIC_CORE_MATRIX_SCALED_PERMUTATION_HPP_
+#define GKO_PUBLIC_CORE_MATRIX_SCALED_PERMUTATION_HPP_
+
+
+#include <memory>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/lin_op.hpp>
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/base/utils.hpp>
+
+
+namespace gko {
+namespace matrix {
+
+
+/**
+ * ScaledPermutation is a matrix combining a permutation with scaling factors.
+ * It is a combination of Diagonal and Permutation, and can be read as
+ * $SP = S \cdot P$, i.e. the scaling gets applied after the permutation.
+ *
+ * @tparam IndexType  index type of permutation indices
+ * @tparam ValueType  value type of the scaling factors
+ *
+ * @ingroup permutation
+ * @ingroup mat_formats
+ * @ingroup LinOp
+ */
+template <typename ValueType = default_precision, typename IndexType = int32>
+class ScaledPermutation
+    : public EnableLinOp<ScaledPermutation<ValueType, IndexType>>,
+      public EnableCreateMethod<ScaledPermutation<ValueType, IndexType>>,
+      public WritableToMatrixData<ValueType, IndexType> {
+    friend class EnableCreateMethod<ScaledPermutation>;
+    friend class EnablePolymorphicObject<ScaledPermutation, LinOp>;
+
+public:
+    using value_type = ValueType;
+    using index_type = IndexType;
+
+    /**
+     * Returns a pointer to the scaling factors.
+     *
+     * @return the pointer to the scaling factors.
+     */
+    value_type* get_scale() noexcept { return scale_.get_data(); }
+
+    /**
+     * @copydoc get_scale()
+     *
+     * @note This is the constant version of the function, which can be
+     *       significantly more memory efficient than the non-constant version,
+     *       so always prefer this version.
+     */
+    const value_type* get_const_scale() const noexcept
+    {
+        return scale_.get_const_data();
+    }
+
+    /**
+     * Returns a pointer to the permutation indices.
+     *
+     * @return the pointer to the permutation indices.
+     */
+    index_type* get_permutation() noexcept { return permutation_.get_data(); }
+
+    /**
+     * @copydoc get_permutation()
+     *
+     * @note This is the constant version of the function, which can be
+     *       significantly more memory efficient than the non-constant version,
+     *       so always prefer this version.
+     */
+    const index_type* get_const_permutation() const noexcept
+    {
+        return permutation_.get_const_data();
+    }
+
+    /**
+     * Returns the inverse scaled permutation.
+     *
+     * @return a newly created ScaledPermutation object storing the inverse
+     *         permutation and scaling factors of this ScalingPermutation.
+     */
+    std::unique_ptr<ScaledPermutation> invert() const;
+
+    void write(gko::matrix_data<value_type, index_type>& data) const override;
+
+    /**
+     * Creates a constant (immutable) ScaledPermutation matrix from constant
+     * arrays.
+     *
+     * @param exec  the executor to create the object on
+     * @param perm_idxs  the permutation index array of the matrix
+     * @param scale  the scaling factor array
+     * @returns A smart pointer to the constant matrix wrapping the input arrays
+     *          (if it resides on the same executor as the matrix) or a copy of
+     *          the arrays on the correct executor.
+     */
+    static std::unique_ptr<const ScaledPermutation> create_const(
+        std::shared_ptr<const Executor> exec,
+        gko::detail::const_array_view<value_type>&& scale,
+        gko::detail::const_array_view<index_type>&& perm_idxs);
+
+protected:
+    /**
+     * Creates an uninitialized ScaledPermutation matrix.
+     *
+     * @param exec  Executor associated to the matrix
+     * @param size  dimensions of the (square) scaled permutation matrix
+     */
+    ScaledPermutation(std::shared_ptr<const Executor> exec, size_type size = 0);
+
+    /**
+     * Creates a ScaledPermutation matrix from already allocated (and
+     * initialized) arrays.
+     *
+     * @param exec  Executor associated to the matrix
+     * @param permutation_indices  array of permutation indices
+     * @param scaling_factors  array of scaling factors
+     */
+    ScaledPermutation(std::shared_ptr<const Executor> exec,
+                      array<value_type> scaling_factors,
+                      array<index_type> permutation_indices);
+
+    void apply_impl(const LinOp* in, LinOp* out) const override;
+
+
+    void apply_impl(const LinOp*, const LinOp* in, const LinOp*,
+                    LinOp* out) const override;
+
+
+private:
+    array<value_type> scale_;
+    array<index_type> permutation_;
+};
+
+
+}  // namespace matrix
+}  // namespace gko
+
+
+#endif  // GKO_PUBLIC_CORE_MATRIX_SCALED_PERMUTATION_HPP_
diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp
index ad90e264189..baa5f5fd795 100644
--- a/include/ginkgo/ginkgo.hpp
+++ b/include/ginkgo/ginkgo.hpp
@@ -121,6 +121,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/matrix/identity.hpp>
 #include <ginkgo/core/matrix/permutation.hpp>
 #include <ginkgo/core/matrix/row_gatherer.hpp>
+#include <ginkgo/core/matrix/scaled_permutation.hpp>
 #include <ginkgo/core/matrix/sellp.hpp>
 #include <ginkgo/core/matrix/sparsity_csr.hpp>
 
diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp
index 1757b4b8a25..29459a264c4 100644
--- a/omp/matrix/csr_kernels.cpp
+++ b/omp/matrix/csr_kernels.cpp
@@ -909,6 +909,20 @@ void inv_symm_permute(std::shared_ptr<const DefaultExecutor> exec,
                       const IndexType* perm,
                       const matrix::Csr<ValueType, IndexType>* orig,
                       matrix::Csr<ValueType, IndexType>* permuted)
+{
+    inv_nonsymm_permute(exec, perm, perm, orig, permuted);
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void inv_nonsymm_permute(std::shared_ptr<const DefaultExecutor> exec,
+                         const IndexType* row_perm,
+                         const IndexType* column_perm,
+                         const matrix::Csr<ValueType, IndexType>* orig,
+                         matrix::Csr<ValueType, IndexType>* permuted)
 {
     auto in_row_ptrs = orig->get_const_row_ptrs();
     auto in_col_idxs = orig->get_const_col_idxs();
@@ -921,26 +935,26 @@ void inv_symm_permute(std::shared_ptr<const DefaultExecutor> exec,
 #pragma omp parallel for
     for (size_type row = 0; row < num_rows; ++row) {
         auto src_row = row;
-        auto dst_row = perm[row];
+        auto dst_row = row_perm[row];
         p_row_ptrs[dst_row] = in_row_ptrs[src_row + 1] - in_row_ptrs[src_row];
     }
     components::prefix_sum_nonnegative(exec, p_row_ptrs, num_rows + 1);
 #pragma omp parallel for
     for (size_type row = 0; row < num_rows; ++row) {
         auto src_row = row;
-        auto dst_row = perm[row];
+        auto dst_row = row_perm[row];
         auto src_begin = in_row_ptrs[src_row];
         auto dst_begin = p_row_ptrs[dst_row];
         auto row_size = in_row_ptrs[src_row + 1] - src_begin;
         for (IndexType i = 0; i < row_size; ++i) {
-            p_col_idxs[dst_begin + i] = perm[in_col_idxs[src_begin + i]];
+            p_col_idxs[dst_begin + i] = column_perm[in_col_idxs[src_begin + i]];
             p_vals[dst_begin + i] = in_vals[src_begin + i];
         }
     }
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL);
+    GKO_DECLARE_CSR_INV_NONSYMM_PERMUTE_KERNEL);
 
 
 template <typename ValueType, typename IndexType>
@@ -982,10 +996,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
 
 
 template <typename ValueType, typename IndexType>
-void inverse_row_permute(std::shared_ptr<const OmpExecutor> exec,
-                         const IndexType* perm,
-                         const matrix::Csr<ValueType, IndexType>* orig,
-                         matrix::Csr<ValueType, IndexType>* row_permuted)
+void inv_row_permute(std::shared_ptr<const OmpExecutor> exec,
+                     const IndexType* perm,
+                     const matrix::Csr<ValueType, IndexType>* orig,
+                     matrix::Csr<ValueType, IndexType>* row_permuted)
 {
     auto orig_row_ptrs = orig->get_const_row_ptrs();
     auto orig_col_idxs = orig->get_const_col_idxs();
@@ -1017,7 +1031,146 @@ void inverse_row_permute(std::shared_ptr<const OmpExecutor> exec,
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL);
+    GKO_DECLARE_CSR_INV_ROW_PERMUTE_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void inv_symm_scale_permute(std::shared_ptr<const DefaultExecutor> exec,
+                            const ValueType* scale, const IndexType* perm,
+                            const matrix::Csr<ValueType, IndexType>* orig,
+                            matrix::Csr<ValueType, IndexType>* permuted)
+{
+    inv_nonsymm_scale_permute(exec, scale, perm, scale, perm, orig, permuted);
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_INV_SYMM_SCALE_PERMUTE_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void inv_nonsymm_scale_permute(std::shared_ptr<const DefaultExecutor> exec,
+                               const ValueType* row_scale,
+                               const IndexType* row_perm,
+                               const ValueType* col_scale,
+                               const IndexType* col_perm,
+                               const matrix::Csr<ValueType, IndexType>* orig,
+                               matrix::Csr<ValueType, IndexType>* permuted)
+{
+    auto in_row_ptrs = orig->get_const_row_ptrs();
+    auto in_col_idxs = orig->get_const_col_idxs();
+    auto in_vals = orig->get_const_values();
+    auto p_row_ptrs = permuted->get_row_ptrs();
+    auto p_col_idxs = permuted->get_col_idxs();
+    auto p_vals = permuted->get_values();
+    size_type num_rows = orig->get_size()[0];
+
+#pragma omp parallel for
+    for (size_type row = 0; row < num_rows; ++row) {
+        auto src_row = row;
+        auto dst_row = row_perm[row];
+        p_row_ptrs[dst_row] = in_row_ptrs[src_row + 1] - in_row_ptrs[src_row];
+    }
+    components::prefix_sum_nonnegative(exec, p_row_ptrs, num_rows + 1);
+#pragma omp parallel for
+    for (size_type row = 0; row < num_rows; ++row) {
+        auto src_row = row;
+        auto dst_row = row_perm[row];
+        auto src_begin = in_row_ptrs[src_row];
+        auto dst_begin = p_row_ptrs[dst_row];
+        auto row_size = in_row_ptrs[src_row + 1] - src_begin;
+        for (IndexType i = 0; i < row_size; ++i) {
+            const auto in_col = in_col_idxs[src_begin + i];
+            p_col_idxs[dst_begin + i] = col_perm[in_col];
+            p_vals[dst_begin + i] = in_vals[src_begin + i] /
+                                    (row_scale[src_row] * col_scale[in_col]);
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_INV_NONSYMM_SCALE_PERMUTE_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void row_scale_permute(std::shared_ptr<const OmpExecutor> exec,
+                       const ValueType* scale, const IndexType* perm,
+                       const matrix::Csr<ValueType, IndexType>* orig,
+                       matrix::Csr<ValueType, IndexType>* row_permuted)
+{
+    auto orig_row_ptrs = orig->get_const_row_ptrs();
+    auto orig_col_idxs = orig->get_const_col_idxs();
+    auto orig_vals = orig->get_const_values();
+    auto rp_row_ptrs = row_permuted->get_row_ptrs();
+    auto rp_col_idxs = row_permuted->get_col_idxs();
+    auto rp_vals = row_permuted->get_values();
+    size_type num_rows = orig->get_size()[0];
+
+#pragma omp parallel for
+    for (size_type row = 0; row < num_rows; ++row) {
+        auto src_row = perm[row];
+        auto dst_row = row;
+        rp_row_ptrs[dst_row] =
+            orig_row_ptrs[src_row + 1] - orig_row_ptrs[src_row];
+    }
+    components::prefix_sum_nonnegative(exec, rp_row_ptrs, num_rows + 1);
+#pragma omp parallel for
+    for (size_type row = 0; row < num_rows; ++row) {
+        auto src_row = perm[row];
+        auto dst_row = row;
+        auto src_begin = orig_row_ptrs[src_row];
+        auto dst_begin = rp_row_ptrs[dst_row];
+        auto row_size = orig_row_ptrs[src_row + 1] - src_begin;
+        std::copy_n(orig_col_idxs + src_begin, row_size,
+                    rp_col_idxs + dst_begin);
+        for (IndexType i = 0; i < row_size; i++) {
+            rp_vals[i + dst_begin] = orig_vals[i + src_begin] * scale[dst_row];
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_ROW_SCALE_PERMUTE_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void inv_row_scale_permute(std::shared_ptr<const OmpExecutor> exec,
+                           const ValueType* scale, const IndexType* perm,
+                           const matrix::Csr<ValueType, IndexType>* orig,
+                           matrix::Csr<ValueType, IndexType>* row_permuted)
+{
+    auto orig_row_ptrs = orig->get_const_row_ptrs();
+    auto orig_col_idxs = orig->get_const_col_idxs();
+    auto orig_vals = orig->get_const_values();
+    auto rp_row_ptrs = row_permuted->get_row_ptrs();
+    auto rp_col_idxs = row_permuted->get_col_idxs();
+    auto rp_vals = row_permuted->get_values();
+    size_type num_rows = orig->get_size()[0];
+
+#pragma omp parallel for
+    for (size_type row = 0; row < num_rows; ++row) {
+        auto src_row = row;
+        auto dst_row = perm[row];
+        rp_row_ptrs[dst_row] =
+            orig_row_ptrs[src_row + 1] - orig_row_ptrs[src_row];
+    }
+    components::prefix_sum_nonnegative(exec, rp_row_ptrs, num_rows + 1);
+#pragma omp parallel for
+    for (size_type row = 0; row < num_rows; ++row) {
+        auto src_row = row;
+        auto dst_row = perm[row];
+        auto src_begin = orig_row_ptrs[src_row];
+        auto dst_begin = rp_row_ptrs[dst_row];
+        auto row_size = orig_row_ptrs[src_row + 1] - src_begin;
+        std::copy_n(orig_col_idxs + src_begin, row_size,
+                    rp_col_idxs + dst_begin);
+        for (IndexType i = 0; i < row_size; i++) {
+            rp_vals[i + dst_begin] = orig_vals[i + src_begin] / scale[src_row];
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_INV_ROW_SCALE_PERMUTE_KERNEL);
 
 
 template <typename ValueType, typename IndexType>
diff --git a/omp/test/reorder/rcm_kernels.cpp b/omp/test/reorder/rcm_kernels.cpp
index d2996ffb319..48698ac1b49 100644
--- a/omp/test/reorder/rcm_kernels.cpp
+++ b/omp/test/reorder/rcm_kernels.cpp
@@ -118,8 +118,7 @@ class Rcm : public ::testing::Test {
             return false;
         }
 
-        const auto n = gko::as<perm_type>(reorder->get_permutation())
-                           ->get_permutation_size();
+        const auto n = reorder->get_permutation()->get_size()[0];
         auto degrees = std::vector<i_type>(n);
         for (gko::size_type i = 0; i < n; ++i) {
             degrees[i] =
@@ -198,8 +197,8 @@ class Rcm : public ::testing::Test {
     static bool is_rcm_ordered(std::shared_ptr<CsrMtx> mtx,
                                std::shared_ptr<reorder_type> reorder)
     {
-        const auto n = gko::as<perm_type>(reorder->get_permutation())
-                           ->get_permutation_size();
+        const auto n =
+            gko::as<perm_type>(reorder->get_permutation())->get_size()[0];
         const auto row_ptrs = mtx->get_const_row_ptrs();
         const auto col_idxs = mtx->get_const_col_idxs();
         auto degrees = std::vector<i_type>(n);
diff --git a/reference/CMakeLists.txt b/reference/CMakeLists.txt
index 21dfc0dfb5a..44ee564c16f 100644
--- a/reference/CMakeLists.txt
+++ b/reference/CMakeLists.txt
@@ -35,6 +35,8 @@ target_sources(ginkgo_reference
     matrix/fbcsr_kernels.cpp
     matrix/fft_kernels.cpp
     matrix/hybrid_kernels.cpp
+    matrix/permutation_kernels.cpp
+    matrix/scaled_permutation_kernels.cpp
     matrix/sellp_kernels.cpp
     matrix/sparsity_csr_kernels.cpp
     multigrid/pgm_kernels.cpp
diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp
index 3a05a09cd45..d87e72bc5ab 100644
--- a/reference/matrix/csr_kernels.cpp
+++ b/reference/matrix/csr_kernels.cpp
@@ -834,24 +834,25 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_CSR_CONVERT_TO_HYBRID_KERNEL);
 
 
-template <typename IndexType>
-void invert_permutation(std::shared_ptr<const DefaultExecutor> exec,
-                        size_type size, const IndexType* permutation_indices,
-                        IndexType* inv_permutation)
+template <typename ValueType, typename IndexType>
+void inv_symm_permute(std::shared_ptr<const ReferenceExecutor> exec,
+                      const IndexType* perm,
+                      const matrix::Csr<ValueType, IndexType>* orig,
+                      matrix::Csr<ValueType, IndexType>* permuted)
 {
-    for (IndexType i = 0; i < static_cast<IndexType>(size); ++i) {
-        inv_permutation[permutation_indices[i]] = i;
-    }
+    inv_nonsymm_permute(exec, perm, perm, orig, permuted);
 }
 
-GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_INVERT_PERMUTATION_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL);
 
 
 template <typename ValueType, typename IndexType>
-void inv_symm_permute(std::shared_ptr<const ReferenceExecutor> exec,
-                      const IndexType* perm,
-                      const matrix::Csr<ValueType, IndexType>* orig,
-                      matrix::Csr<ValueType, IndexType>* permuted)
+void inv_nonsymm_permute(std::shared_ptr<const ReferenceExecutor> exec,
+                         const IndexType* row_perm,
+                         const IndexType* column_perm,
+                         const matrix::Csr<ValueType, IndexType>* orig,
+                         matrix::Csr<ValueType, IndexType>* permuted)
 {
     auto in_row_ptrs = orig->get_const_row_ptrs();
     auto in_col_idxs = orig->get_const_col_idxs();
@@ -863,25 +864,25 @@ void inv_symm_permute(std::shared_ptr<const ReferenceExecutor> exec,
 
     for (size_type row = 0; row < num_rows; ++row) {
         auto src_row = row;
-        auto dst_row = perm[row];
+        auto dst_row = row_perm[row];
         p_row_ptrs[dst_row] = in_row_ptrs[src_row + 1] - in_row_ptrs[src_row];
     }
     components::prefix_sum_nonnegative(exec, p_row_ptrs, num_rows + 1);
     for (size_type row = 0; row < num_rows; ++row) {
         auto src_row = row;
-        auto dst_row = perm[row];
+        auto dst_row = row_perm[row];
         auto src_begin = in_row_ptrs[src_row];
         auto dst_begin = p_row_ptrs[dst_row];
         auto row_size = in_row_ptrs[src_row + 1] - src_begin;
         for (IndexType i = 0; i < row_size; ++i) {
-            p_col_idxs[dst_begin + i] = perm[in_col_idxs[src_begin + i]];
+            p_col_idxs[dst_begin + i] = column_perm[in_col_idxs[src_begin + i]];
             p_vals[dst_begin + i] = in_vals[src_begin + i];
         }
     }
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL);
+    GKO_DECLARE_CSR_INV_NONSYMM_PERMUTE_KERNEL);
 
 
 template <typename ValueType, typename IndexType>
@@ -920,10 +921,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
 
 
 template <typename ValueType, typename IndexType>
-void inverse_row_permute(std::shared_ptr<const ReferenceExecutor> exec,
-                         const IndexType* perm,
-                         const matrix::Csr<ValueType, IndexType>* orig,
-                         matrix::Csr<ValueType, IndexType>* row_permuted)
+void inv_row_permute(std::shared_ptr<const ReferenceExecutor> exec,
+                     const IndexType* perm,
+                     const matrix::Csr<ValueType, IndexType>* orig,
+                     matrix::Csr<ValueType, IndexType>* row_permuted)
 {
     auto in_row_ptrs = orig->get_const_row_ptrs();
     auto in_col_idxs = orig->get_const_col_idxs();
@@ -951,21 +952,21 @@ void inverse_row_permute(std::shared_ptr<const ReferenceExecutor> exec,
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL);
+    GKO_DECLARE_CSR_INV_ROW_PERMUTE_KERNEL);
 
 
 template <typename ValueType, typename IndexType>
-void inverse_column_permute(std::shared_ptr<const ReferenceExecutor> exec,
-                            const IndexType* perm,
-                            const matrix::Csr<ValueType, IndexType>* orig,
-                            matrix::Csr<ValueType, IndexType>* column_permuted)
+void inv_col_permute(std::shared_ptr<const ReferenceExecutor> exec,
+                     const IndexType* perm,
+                     const matrix::Csr<ValueType, IndexType>* orig,
+                     matrix::Csr<ValueType, IndexType>* col_permuted)
 {
     auto in_row_ptrs = orig->get_const_row_ptrs();
     auto in_col_idxs = orig->get_const_col_idxs();
     auto in_vals = orig->get_const_values();
-    auto cp_row_ptrs = column_permuted->get_row_ptrs();
-    auto cp_col_idxs = column_permuted->get_col_idxs();
-    auto cp_vals = column_permuted->get_values();
+    auto cp_row_ptrs = col_permuted->get_row_ptrs();
+    auto cp_col_idxs = col_permuted->get_col_idxs();
+    auto cp_vals = col_permuted->get_values();
     auto num_rows = orig->get_size()[0];
 
     for (size_type row = 0; row < num_rows; ++row) {
@@ -981,7 +982,167 @@ void inverse_column_permute(std::shared_ptr<const ReferenceExecutor> exec,
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_CSR_INVERSE_COLUMN_PERMUTE_KERNEL);
+    GKO_DECLARE_CSR_INV_COL_PERMUTE_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void inv_symm_scale_permute(std::shared_ptr<const ReferenceExecutor> exec,
+                            const ValueType* scale, const IndexType* perm,
+                            const matrix::Csr<ValueType, IndexType>* orig,
+                            matrix::Csr<ValueType, IndexType>* permuted)
+{
+    inv_nonsymm_scale_permute(exec, scale, perm, scale, perm, orig, permuted);
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_INV_SYMM_SCALE_PERMUTE_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void inv_nonsymm_scale_permute(std::shared_ptr<const ReferenceExecutor> exec,
+                               const ValueType* row_scale,
+                               const IndexType* row_perm,
+                               const ValueType* col_scale,
+                               const IndexType* col_perm,
+                               const matrix::Csr<ValueType, IndexType>* orig,
+                               matrix::Csr<ValueType, IndexType>* permuted)
+{
+    auto in_row_ptrs = orig->get_const_row_ptrs();
+    auto in_col_idxs = orig->get_const_col_idxs();
+    auto in_vals = orig->get_const_values();
+    auto p_row_ptrs = permuted->get_row_ptrs();
+    auto p_col_idxs = permuted->get_col_idxs();
+    auto p_vals = permuted->get_values();
+    size_type num_rows = orig->get_size()[0];
+
+    for (size_type row = 0; row < num_rows; ++row) {
+        auto src_row = row;
+        auto dst_row = row_perm[row];
+        p_row_ptrs[dst_row] = in_row_ptrs[src_row + 1] - in_row_ptrs[src_row];
+    }
+    components::prefix_sum_nonnegative(exec, p_row_ptrs, num_rows + 1);
+    for (size_type row = 0; row < num_rows; ++row) {
+        auto src_row = row;
+        auto dst_row = row_perm[row];
+        auto src_begin = in_row_ptrs[src_row];
+        auto dst_begin = p_row_ptrs[dst_row];
+        auto row_size = in_row_ptrs[src_row + 1] - src_begin;
+        for (IndexType i = 0; i < row_size; ++i) {
+            const auto in_col = in_col_idxs[src_begin + i];
+            p_col_idxs[dst_begin + i] = col_perm[in_col];
+            p_vals[dst_begin + i] = in_vals[src_begin + i] /
+                                    (row_scale[src_row] * col_scale[in_col]);
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_INV_NONSYMM_SCALE_PERMUTE_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void row_scale_permute(std::shared_ptr<const ReferenceExecutor> exec,
+                       const ValueType* scale, const IndexType* perm,
+                       const matrix::Csr<ValueType, IndexType>* orig,
+                       matrix::Csr<ValueType, IndexType>* row_permuted)
+{
+    auto in_row_ptrs = orig->get_const_row_ptrs();
+    auto in_col_idxs = orig->get_const_col_idxs();
+    auto in_vals = orig->get_const_values();
+    auto rp_row_ptrs = row_permuted->get_row_ptrs();
+    auto rp_col_idxs = row_permuted->get_col_idxs();
+    auto rp_vals = row_permuted->get_values();
+    size_type num_rows = orig->get_size()[0];
+
+    for (size_type row = 0; row < num_rows; ++row) {
+        auto src_row = perm[row];
+        auto dst_row = row;
+        rp_row_ptrs[dst_row] = in_row_ptrs[src_row + 1] - in_row_ptrs[src_row];
+    }
+    components::prefix_sum_nonnegative(exec, rp_row_ptrs, num_rows + 1);
+    for (size_type row = 0; row < num_rows; ++row) {
+        const auto src_row = perm[row];
+        const auto dst_row = row;
+        const auto src_begin = in_row_ptrs[src_row];
+        const auto dst_begin = rp_row_ptrs[dst_row];
+        const auto row_size = in_row_ptrs[src_row + 1] - src_begin;
+        std::copy_n(in_col_idxs + src_begin, row_size, rp_col_idxs + dst_begin);
+        for (IndexType i = 0; i < row_size; i++) {
+            rp_vals[i + dst_begin] = in_vals[i + src_begin] * scale[dst_row];
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_ROW_SCALE_PERMUTE_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void inv_row_scale_permute(std::shared_ptr<const ReferenceExecutor> exec,
+                           const ValueType* scale, const IndexType* perm,
+                           const matrix::Csr<ValueType, IndexType>* orig,
+                           matrix::Csr<ValueType, IndexType>* row_permuted)
+{
+    auto in_row_ptrs = orig->get_const_row_ptrs();
+    auto in_col_idxs = orig->get_const_col_idxs();
+    auto in_vals = orig->get_const_values();
+    auto rp_row_ptrs = row_permuted->get_row_ptrs();
+    auto rp_col_idxs = row_permuted->get_col_idxs();
+    auto rp_vals = row_permuted->get_values();
+    size_type num_rows = orig->get_size()[0];
+
+    for (size_type row = 0; row < num_rows; ++row) {
+        auto src_row = row;
+        auto dst_row = perm[row];
+        rp_row_ptrs[dst_row] = in_row_ptrs[src_row + 1] - in_row_ptrs[src_row];
+    }
+    components::prefix_sum_nonnegative(exec, rp_row_ptrs, num_rows + 1);
+    for (size_type row = 0; row < num_rows; ++row) {
+        auto src_row = row;
+        auto dst_row = perm[row];
+        auto src_begin = in_row_ptrs[src_row];
+        auto dst_begin = rp_row_ptrs[dst_row];
+        auto row_size = in_row_ptrs[src_row + 1] - src_begin;
+        std::copy_n(in_col_idxs + src_begin, row_size, rp_col_idxs + dst_begin);
+        for (IndexType i = 0; i < row_size; i++) {
+            rp_vals[i + dst_begin] = in_vals[i + src_begin] / scale[src_row];
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_INV_ROW_SCALE_PERMUTE_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void inv_col_scale_permute(std::shared_ptr<const ReferenceExecutor> exec,
+                           const ValueType* scale, const IndexType* perm,
+                           const matrix::Csr<ValueType, IndexType>* orig,
+                           matrix::Csr<ValueType, IndexType>* col_permuted)
+{
+    auto in_row_ptrs = orig->get_const_row_ptrs();
+    auto in_col_idxs = orig->get_const_col_idxs();
+    auto in_vals = orig->get_const_values();
+    auto cp_row_ptrs = col_permuted->get_row_ptrs();
+    auto cp_col_idxs = col_permuted->get_col_idxs();
+    auto cp_vals = col_permuted->get_values();
+    auto num_rows = orig->get_size()[0];
+
+    for (size_type row = 0; row < num_rows; ++row) {
+        auto row_begin = in_row_ptrs[row];
+        auto row_end = in_row_ptrs[row + 1];
+        cp_row_ptrs[row] = in_row_ptrs[row];
+        for (auto k = row_begin; k < row_end; ++k) {
+            const auto in_col = in_col_idxs[k];
+            cp_col_idxs[k] = perm[in_col];
+            cp_vals[k] = in_vals[k] / scale[in_col];
+        }
+    }
+    cp_row_ptrs[num_rows] = in_row_ptrs[num_rows];
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_INV_COL_SCALE_PERMUTE_KERNEL);
 
 
 template <typename ValueType, typename IndexType>
diff --git a/reference/matrix/dense_kernels.cpp b/reference/matrix/dense_kernels.cpp
index 47df46b3c86..3b28336db11 100644
--- a/reference/matrix/dense_kernels.cpp
+++ b/reference/matrix/dense_kernels.cpp
@@ -862,11 +862,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_CONJ_TRANSPOSE_KERNEL);
 
 template <typename ValueType, typename IndexType>
 void symm_permute(std::shared_ptr<const ReferenceExecutor> exec,
-                  const array<IndexType>* permutation_indices,
-                  const matrix::Dense<ValueType>* orig,
+                  const IndexType* perm, const matrix::Dense<ValueType>* orig,
                   matrix::Dense<ValueType>* permuted)
 {
-    auto perm = permutation_indices->get_const_data();
     auto size = orig->get_size()[0];
     for (size_type i = 0; i < size; ++i) {
         for (size_type j = 0; j < size; ++j) {
@@ -881,11 +879,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
 
 template <typename ValueType, typename IndexType>
 void inv_symm_permute(std::shared_ptr<const ReferenceExecutor> exec,
-                      const array<IndexType>* permutation_indices,
+                      const IndexType* perm,
                       const matrix::Dense<ValueType>* orig,
                       matrix::Dense<ValueType>* permuted)
 {
-    auto perm = permutation_indices->get_const_data();
     auto size = orig->get_size()[0];
     for (size_type i = 0; i < size; ++i) {
         for (size_type j = 0; j < size; ++j) {
@@ -898,14 +895,46 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_DENSE_INV_SYMM_PERMUTE_KERNEL);
 
 
+template <typename ValueType, typename IndexType>
+void nonsymm_permute(std::shared_ptr<const ReferenceExecutor> exec,
+                     const IndexType* row_perm, const IndexType* col_perm,
+                     const matrix::Dense<ValueType>* orig,
+                     matrix::Dense<ValueType>* permuted)
+{
+    for (size_type i = 0; i < orig->get_size()[0]; ++i) {
+        for (size_type j = 0; j < orig->get_size()[1]; ++j) {
+            permuted->at(i, j) = orig->at(row_perm[i], col_perm[j]);
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_DENSE_NONSYMM_PERMUTE_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void inv_nonsymm_permute(std::shared_ptr<const ReferenceExecutor> exec,
+                         const IndexType* row_perm, const IndexType* col_perm,
+                         const matrix::Dense<ValueType>* orig,
+                         matrix::Dense<ValueType>* permuted)
+{
+    for (size_type i = 0; i < orig->get_size()[0]; ++i) {
+        for (size_type j = 0; j < orig->get_size()[1]; ++j) {
+            permuted->at(row_perm[i], col_perm[j]) = orig->at(i, j);
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_DENSE_INV_NONSYMM_PERMUTE_KERNEL);
+
+
 template <typename ValueType, typename OutputType, typename IndexType>
 void row_gather(std::shared_ptr<const ReferenceExecutor> exec,
-                const array<IndexType>* row_idxs,
-                const matrix::Dense<ValueType>* orig,
+                const IndexType* rows, const matrix::Dense<ValueType>* orig,
                 matrix::Dense<OutputType>* row_collection)
 {
-    auto rows = row_idxs->get_const_data();
-    for (size_type i = 0; i < row_idxs->get_num_elems(); ++i) {
+    for (size_type i = 0; i < row_collection->get_size()[0]; ++i) {
         for (size_type j = 0; j < orig->get_size()[1]; ++j) {
             row_collection->at(i, j) = orig->at(rows[i], j);
         }
@@ -919,16 +948,15 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2(
 template <typename ValueType, typename OutputType, typename IndexType>
 void advanced_row_gather(std::shared_ptr<const ReferenceExecutor> exec,
                          const matrix::Dense<ValueType>* alpha,
-                         const array<IndexType>* row_idxs,
+                         const IndexType* rows,
                          const matrix::Dense<ValueType>* orig,
                          const matrix::Dense<ValueType>* beta,
                          matrix::Dense<OutputType>* row_collection)
 {
     using type = highest_precision<ValueType, OutputType>;
-    auto rows = row_idxs->get_const_data();
     auto scalar_alpha = alpha->at(0, 0);
     auto scalar_beta = beta->at(0, 0);
-    for (size_type i = 0; i < row_idxs->get_num_elems(); ++i) {
+    for (size_type i = 0; i < row_collection->get_size()[0]; ++i) {
         for (size_type j = 0; j < orig->get_size()[1]; ++j) {
             row_collection->at(i, j) =
                 static_cast<type>(scalar_alpha * orig->at(rows[i], j)) +
@@ -943,30 +971,27 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2(
 
 
 template <typename ValueType, typename IndexType>
-void column_permute(std::shared_ptr<const ReferenceExecutor> exec,
-                    const array<IndexType>* permutation_indices,
-                    const matrix::Dense<ValueType>* orig,
-                    matrix::Dense<ValueType>* column_permuted)
+void col_permute(std::shared_ptr<const ReferenceExecutor> exec,
+                 const IndexType* perm, const matrix::Dense<ValueType>* orig,
+                 matrix::Dense<ValueType>* col_permuted)
 {
-    auto perm = permutation_indices->get_const_data();
     for (size_type j = 0; j < orig->get_size()[1]; ++j) {
         for (size_type i = 0; i < orig->get_size()[0]; ++i) {
-            column_permuted->at(i, j) = orig->at(i, perm[j]);
+            col_permuted->at(i, j) = orig->at(i, perm[j]);
         }
     }
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_DENSE_COLUMN_PERMUTE_KERNEL);
+    GKO_DECLARE_DENSE_COL_PERMUTE_KERNEL);
 
 
 template <typename ValueType, typename IndexType>
-void inverse_row_permute(std::shared_ptr<const ReferenceExecutor> exec,
-                         const array<IndexType>* permutation_indices,
-                         const matrix::Dense<ValueType>* orig,
-                         matrix::Dense<ValueType>* row_permuted)
+void inv_row_permute(std::shared_ptr<const ReferenceExecutor> exec,
+                     const IndexType* perm,
+                     const matrix::Dense<ValueType>* orig,
+                     matrix::Dense<ValueType>* row_permuted)
 {
-    auto perm = permutation_indices->get_const_data();
     for (size_type i = 0; i < orig->get_size()[0]; ++i) {
         for (size_type j = 0; j < orig->get_size()[1]; ++j) {
             row_permuted->at(perm[i], j) = orig->at(i, j);
@@ -979,21 +1004,166 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
 
 
 template <typename ValueType, typename IndexType>
-void inverse_column_permute(std::shared_ptr<const ReferenceExecutor> exec,
-                            const array<IndexType>* permutation_indices,
+void inv_col_permute(std::shared_ptr<const ReferenceExecutor> exec,
+                     const IndexType* perm,
+                     const matrix::Dense<ValueType>* orig,
+                     matrix::Dense<ValueType>* col_permuted)
+{
+    for (size_type j = 0; j < orig->get_size()[1]; ++j) {
+        for (size_type i = 0; i < orig->get_size()[0]; ++i) {
+            col_permuted->at(i, perm[j]) = orig->at(i, j);
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_DENSE_INV_COL_PERMUTE_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void symm_scale_permute(std::shared_ptr<const ReferenceExecutor> exec,
+                        const ValueType* scale, const IndexType* perm,
+                        const matrix::Dense<ValueType>* orig,
+                        matrix::Dense<ValueType>* permuted)
+{
+    for (size_type j = 0; j < orig->get_size()[1]; ++j) {
+        for (size_type i = 0; i < orig->get_size()[0]; ++i) {
+            permuted->at(i, j) =
+                scale[i] * scale[j] * orig->at(perm[i], perm[j]);
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_DENSE_SYMM_SCALE_PERMUTE_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void inv_symm_scale_permute(std::shared_ptr<const ReferenceExecutor> exec,
+                            const ValueType* scale, const IndexType* perm,
                             const matrix::Dense<ValueType>* orig,
-                            matrix::Dense<ValueType>* column_permuted)
+                            matrix::Dense<ValueType>* permuted)
+{
+    for (size_type j = 0; j < orig->get_size()[1]; ++j) {
+        for (size_type i = 0; i < orig->get_size()[0]; ++i) {
+            permuted->at(perm[i], perm[j]) =
+                orig->at(i, j) / (scale[i] * scale[j]);
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_DENSE_INV_SYMM_SCALE_PERMUTE_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void nonsymm_scale_permute(std::shared_ptr<const ReferenceExecutor> exec,
+                           const ValueType* row_scale,
+                           const IndexType* row_perm,
+                           const ValueType* col_scale,
+                           const IndexType* col_perm,
+                           const matrix::Dense<ValueType>* orig,
+                           matrix::Dense<ValueType>* permuted)
+{
+    for (size_type j = 0; j < orig->get_size()[1]; ++j) {
+        for (size_type i = 0; i < orig->get_size()[0]; ++i) {
+            permuted->at(i, j) = row_scale[i] * col_scale[j] *
+                                 orig->at(row_perm[i], col_perm[j]);
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_DENSE_NONSYMM_SCALE_PERMUTE_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void inv_nonsymm_scale_permute(std::shared_ptr<const ReferenceExecutor> exec,
+                               const ValueType* row_scale,
+                               const IndexType* row_perm,
+                               const ValueType* col_scale,
+                               const IndexType* col_perm,
+                               const matrix::Dense<ValueType>* orig,
+                               matrix::Dense<ValueType>* permuted)
+{
+    for (size_type j = 0; j < orig->get_size()[1]; ++j) {
+        for (size_type i = 0; i < orig->get_size()[0]; ++i) {
+            permuted->at(row_perm[i], col_perm[j]) =
+                orig->at(i, j) / (row_scale[i] * col_scale[j]);
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_DENSE_INV_NONSYMM_SCALE_PERMUTE_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void row_scale_permute(std::shared_ptr<const ReferenceExecutor> exec,
+                       const ValueType* scale, const IndexType* perm,
+                       const matrix::Dense<ValueType>* orig,
+                       matrix::Dense<ValueType>* permuted)
+{
+    for (size_type j = 0; j < orig->get_size()[1]; ++j) {
+        for (size_type i = 0; i < orig->get_size()[0]; ++i) {
+            permuted->at(i, j) = scale[i] * orig->at(perm[i], j);
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_DENSE_ROW_SCALE_PERMUTE_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void inv_row_scale_permute(std::shared_ptr<const ReferenceExecutor> exec,
+                           const ValueType* scale, const IndexType* perm,
+                           const matrix::Dense<ValueType>* orig,
+                           matrix::Dense<ValueType>* permuted)
+{
+    for (size_type j = 0; j < orig->get_size()[1]; ++j) {
+        for (size_type i = 0; i < orig->get_size()[0]; ++i) {
+            permuted->at(perm[i], j) = orig->at(i, j) / scale[i];
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_DENSE_INV_ROW_SCALE_PERMUTE_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void col_scale_permute(std::shared_ptr<const ReferenceExecutor> exec,
+                       const ValueType* scale, const IndexType* perm,
+                       const matrix::Dense<ValueType>* orig,
+                       matrix::Dense<ValueType>* permuted)
+{
+    for (size_type j = 0; j < orig->get_size()[1]; ++j) {
+        for (size_type i = 0; i < orig->get_size()[0]; ++i) {
+            permuted->at(i, j) = scale[j] * orig->at(i, perm[j]);
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_DENSE_COL_SCALE_PERMUTE_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void inv_col_scale_permute(std::shared_ptr<const ReferenceExecutor> exec,
+                           const ValueType* scale, const IndexType* perm,
+                           const matrix::Dense<ValueType>* orig,
+                           matrix::Dense<ValueType>* permuted)
 {
-    auto perm = permutation_indices->get_const_data();
     for (size_type j = 0; j < orig->get_size()[1]; ++j) {
         for (size_type i = 0; i < orig->get_size()[0]; ++i) {
-            column_permuted->at(i, perm[j]) = orig->at(i, j);
+            permuted->at(i, perm[j]) = orig->at(i, j) / scale[j];
         }
     }
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_DENSE_INV_COLUMN_PERMUTE_KERNEL);
+    GKO_DECLARE_DENSE_INV_COL_SCALE_PERMUTE_KERNEL);
 
 
 template <typename ValueType>
diff --git a/reference/matrix/permutation_kernels.cpp b/reference/matrix/permutation_kernels.cpp
new file mode 100644
index 00000000000..cc7a81a1044
--- /dev/null
+++ b/reference/matrix/permutation_kernels.cpp
@@ -0,0 +1,58 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/permutation_kernels.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace reference {
+namespace permutation {
+
+
+template <typename IndexType>
+void invert(std::shared_ptr<const DefaultExecutor> exec,
+            const IndexType* permutation, size_type size,
+            IndexType* output_permutation)
+{
+    for (size_type i = 0; i < size; i++) {
+        output_permutation[permutation[i]] = i;
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PERMUTATION_INVERT_KERNEL);
+
+
+}  // namespace permutation
+}  // namespace reference
+}  // namespace kernels
+}  // namespace gko
diff --git a/reference/matrix/scaled_permutation_kernels.cpp b/reference/matrix/scaled_permutation_kernels.cpp
new file mode 100644
index 00000000000..54a68fbdf0a
--- /dev/null
+++ b/reference/matrix/scaled_permutation_kernels.cpp
@@ -0,0 +1,64 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/matrix/scaled_permutation_kernels.hpp"
+
+
+#include <ginkgo/core/matrix/dense.hpp>
+
+
+namespace gko {
+namespace kernels {
+namespace reference {
+namespace scaled_permutation {
+
+
+template <typename ValueType, typename IndexType>
+void invert(std::shared_ptr<const DefaultExecutor> exec,
+            const IndexType* input_permutation, const ValueType* input_scale,
+            size_type size, IndexType* output_permutation,
+            ValueType* output_scale)
+{
+    for (size_type i = 0; i < size; i++) {
+        output_permutation[input_permutation[i]] = i;
+        output_scale[input_permutation[i]] = one<ValueType>() / input_scale[i];
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_SCALED_PERMUTATION_INVERT_KERNEL);
+
+
+}  // namespace scaled_permutation
+}  // namespace reference
+}  // namespace kernels
+}  // namespace gko
diff --git a/reference/test/matrix/CMakeLists.txt b/reference/test/matrix/CMakeLists.txt
index 05498cbadc4..6f3348da432 100644
--- a/reference/test/matrix/CMakeLists.txt
+++ b/reference/test/matrix/CMakeLists.txt
@@ -10,6 +10,7 @@ ginkgo_create_test(fft_kernels)
 ginkgo_create_test(hybrid_kernels)
 ginkgo_create_test(identity)
 ginkgo_create_test(permutation)
+ginkgo_create_test(scaled_permutation)
 ginkgo_create_test(sellp_kernels)
 ginkgo_create_test(sparsity_csr)
 ginkgo_create_test(sparsity_csr_kernels)
diff --git a/reference/test/matrix/csr_kernels.cpp b/reference/test/matrix/csr_kernels.cpp
index d0265e462f2..f388922f05d 100644
--- a/reference/test/matrix/csr_kernels.cpp
+++ b/reference/test/matrix/csr_kernels.cpp
@@ -49,6 +49,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/matrix/ell.hpp>
 #include <ginkgo/core/matrix/hybrid.hpp>
 #include <ginkgo/core/matrix/identity.hpp>
+#include <ginkgo/core/matrix/permutation.hpp>
+#include <ginkgo/core/matrix/scaled_permutation.hpp>
 #include <ginkgo/core/matrix/sellp.hpp>
 #include <ginkgo/core/matrix/sparsity_csr.hpp>
 
@@ -77,6 +79,8 @@ class Csr : public ::testing::Test {
     using Hybrid = gko::matrix::Hybrid<value_type, index_type>;
     using Vec = gko::matrix::Dense<value_type>;
     using MixedVec = gko::matrix::Dense<gko::next_precision<value_type>>;
+    using Perm = gko::matrix::Permutation<index_type>;
+    using ScaledPerm = gko::matrix::ScaledPermutation<value_type, index_type>;
 
     Csr()
         : exec(gko::ReferenceExecutor::create()),
@@ -88,7 +92,21 @@ class Csr : public ::testing::Test {
                                   std::make_shared<typename Mtx::classical>())),
           mtx3_unsorted(
               Mtx::create(exec, gko::dim<2>(3, 3), 7,
-                          std::make_shared<typename Mtx::classical>()))
+                          std::make_shared<typename Mtx::classical>())),
+          perm3(Perm::create(exec, 3, gko::array<index_type>{exec, {1, 2, 0}})),
+          perm3_rev(perm3->invert()),
+          perm2(Perm::create(exec, 2, gko::array<index_type>{exec, {1, 0}})),
+          perm0(Perm::create(exec)),
+          scale_perm3(ScaledPerm::create(
+              exec, gko::array<value_type>{this->exec, {2.0, 3.0, 5.0}},
+              gko::array<index_type>{exec, {1, 2, 0}})),
+          scale_perm3_rev(ScaledPerm::create(
+              exec, gko::array<value_type>{this->exec, {7.0, 11.0, 13.0}},
+              gko::array<index_type>{exec, {1, 2, 0}})),
+          scale_perm2(ScaledPerm::create(
+              exec, gko::array<value_type>{this->exec, {17.0, 19.0}},
+              gko::array<index_type>{exec, {1, 0}})),
+          scale_perm0(ScaledPerm::create(exec))
     {
         this->create_mtx(mtx.get());
         this->create_mtx2(mtx2.get());
@@ -350,6 +368,14 @@ class Csr : public ::testing::Test {
     std::unique_ptr<Mtx> mtx2;
     std::unique_ptr<Mtx> mtx3_sorted;
     std::unique_ptr<Mtx> mtx3_unsorted;
+    std::unique_ptr<Perm> perm3;
+    std::unique_ptr<Perm> perm3_rev;
+    std::unique_ptr<Perm> perm2;
+    std::unique_ptr<Perm> perm0;
+    std::unique_ptr<ScaledPerm> scale_perm3;
+    std::unique_ptr<ScaledPerm> scale_perm3_rev;
+    std::unique_ptr<ScaledPerm> scale_perm2;
+    std::unique_ptr<ScaledPerm> scale_perm0;
     index_type invalid_index = gko::invalid_index<index_type>();
 };
 
@@ -1285,6 +1311,439 @@ TYPED_TEST(Csr, NonSquareMtxIsTransposable)
 }
 
 
+template <typename ValueType, typename IndexType>
+std::unique_ptr<gko::matrix::Csr<ValueType, IndexType>> csr_from_permutation(
+    gko::matrix::Permutation<IndexType>* perm, bool invert)
+{
+    gko::matrix_data<double, IndexType> double_data;
+    if (invert) {
+        perm->invert()->write(double_data);
+    } else {
+        perm->write(double_data);
+    }
+    gko::matrix_data<ValueType, IndexType> data;
+    data.size = double_data.size;
+    for (auto entry : double_data.nonzeros) {
+        data.nonzeros.emplace_back(entry.row, entry.column, 1.0);
+    }
+    auto mtx =
+        gko::matrix::Csr<ValueType, IndexType>::create(perm->get_executor());
+    mtx->read(data);
+    return mtx;
+}
+
+
+template <typename ValueType, typename IndexType>
+std::unique_ptr<gko::matrix::Csr<ValueType, IndexType>> csr_from_permutation(
+    gko::matrix::ScaledPermutation<ValueType, IndexType>* perm, bool invert)
+{
+    gko::matrix_data<ValueType, IndexType> data;
+    if (invert) {
+        perm->invert()->write(data);
+    } else {
+        perm->write(data);
+    }
+    auto mtx =
+        gko::matrix::Csr<ValueType, IndexType>::create(perm->get_executor());
+    mtx->read(data);
+    return mtx;
+}
+
+
+template <typename ValueType, typename IndexType, typename Permutation>
+std::unique_ptr<gko::matrix::Csr<ValueType, IndexType>> ref_permute(
+    gko::matrix::Csr<ValueType, IndexType>* input, Permutation* permutation,
+    gko::matrix::permute_mode mode)
+{
+    using gko::matrix::permute_mode;
+    using Csr = gko::matrix::Csr<ValueType, IndexType>;
+    auto result = input->clone();
+    auto permutation_csr = csr_from_permutation<ValueType>(
+        permutation, (mode & permute_mode::inverse) == permute_mode::inverse);
+    if ((mode & permute_mode::rows) == permute_mode::rows) {
+        // compute P * A
+        permutation_csr->apply(input, result);
+    }
+    if ((mode & permute_mode::columns) == permute_mode::columns) {
+        // compute A * P^T = (P * A^T)^T
+        auto tmp = result->transpose();
+        auto tmp2 = gko::as<Csr>(tmp->clone());
+        permutation_csr->apply(tmp, tmp2);
+        result = gko::as<Csr>(tmp2->transpose());
+    }
+    return result;
+}
+
+
+template <typename ValueType, typename IndexType, typename Permutation>
+std::unique_ptr<gko::matrix::Csr<ValueType, IndexType>> ref_permute(
+    gko::matrix::Csr<ValueType, IndexType>* input, Permutation* row_permutation,
+    Permutation* col_permutation, bool invert)
+{
+    using gko::matrix::permute_mode;
+    using Csr = gko::matrix::Csr<ValueType, IndexType>;
+    auto result = input->clone();
+    auto row_permutation_csr =
+        csr_from_permutation<ValueType>(row_permutation, invert);
+    auto col_permutation_csr =
+        csr_from_permutation<ValueType>(col_permutation, invert);
+    row_permutation_csr->apply(input, result);
+    auto tmp = result->transpose();
+    auto tmp2 = gko::as<Csr>(tmp->clone());
+    col_permutation_csr->apply(tmp, tmp2);
+    return gko::as<Csr>(tmp2->transpose());
+}
+
+
+TYPED_TEST(Csr, Permute)
+{
+    using gko::matrix::permute_mode;
+
+    for (auto mode :
+         {permute_mode::none, permute_mode::rows, permute_mode::columns,
+          permute_mode::symmetric, permute_mode::inverse_rows,
+          permute_mode::inverse_columns, permute_mode::inverse_symmetric}) {
+        SCOPED_TRACE(mode);
+
+        auto permuted = this->mtx3_sorted->permute(this->perm3, mode);
+        auto ref_permuted =
+            ref_permute(this->mtx3_sorted.get(), this->perm3.get(), mode);
+
+        GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
+        GKO_ASSERT_MTX_EQ_SPARSITY(permuted, ref_permuted);
+        ASSERT_TRUE(permuted->is_sorted_by_column_index());
+    }
+}
+
+
+TYPED_TEST(Csr, PermuteRoundtrip)
+{
+    using gko::matrix::permute_mode;
+
+    for (auto mode :
+         {permute_mode::rows, permute_mode::columns, permute_mode::symmetric}) {
+        SCOPED_TRACE(mode);
+
+        auto permuted =
+            this->mtx3_sorted->permute(this->perm3, mode)
+                ->permute(this->perm3, mode | permute_mode::inverse);
+
+        GKO_ASSERT_MTX_NEAR(this->mtx3_sorted, permuted, 0.0);
+        GKO_ASSERT_MTX_EQ_SPARSITY(permuted, this->mtx3_sorted);
+        ASSERT_TRUE(permuted->is_sorted_by_column_index());
+    }
+}
+
+
+TYPED_TEST(Csr, PermuteRectangular)
+{
+    using gko::matrix::permute_mode;
+
+    auto rpermuted = this->mtx2->permute(this->perm2, permute_mode::rows);
+    auto irpermuted =
+        this->mtx2->permute(this->perm2, permute_mode::inverse_rows);
+    auto cpermuted = this->mtx2->permute(this->perm3, permute_mode::columns);
+    auto icpermuted =
+        this->mtx2->permute(this->perm3, permute_mode::inverse_columns);
+    auto ref_rpermuted =
+        ref_permute(this->mtx2.get(), this->perm2.get(), permute_mode::rows);
+    auto ref_irpermuted = ref_permute(this->mtx2.get(), this->perm2.get(),
+                                      permute_mode::inverse_rows);
+    auto ref_cpermuted =
+        ref_permute(this->mtx2.get(), this->perm3.get(), permute_mode::columns);
+    auto ref_icpermuted = ref_permute(this->mtx2.get(), this->perm3.get(),
+                                      permute_mode::inverse_columns);
+
+    GKO_ASSERT_MTX_NEAR(rpermuted, ref_rpermuted, 0.0);
+    GKO_ASSERT_MTX_NEAR(irpermuted, ref_irpermuted, 0.0);
+    GKO_ASSERT_MTX_NEAR(cpermuted, ref_cpermuted, 0.0);
+    GKO_ASSERT_MTX_NEAR(icpermuted, ref_icpermuted, 0.0);
+    GKO_ASSERT_MTX_EQ_SPARSITY(rpermuted, ref_rpermuted);
+    GKO_ASSERT_MTX_EQ_SPARSITY(irpermuted, ref_irpermuted);
+    GKO_ASSERT_MTX_EQ_SPARSITY(cpermuted, ref_cpermuted);
+    GKO_ASSERT_MTX_EQ_SPARSITY(icpermuted, ref_icpermuted);
+    ASSERT_TRUE(rpermuted->is_sorted_by_column_index());
+    ASSERT_TRUE(irpermuted->is_sorted_by_column_index());
+    ASSERT_TRUE(cpermuted->is_sorted_by_column_index());
+    ASSERT_TRUE(icpermuted->is_sorted_by_column_index());
+}
+
+
+TYPED_TEST(Csr, PermuteFailsWithIncorrectPermutationSize)
+{
+    using gko::matrix::permute_mode;
+
+    for (auto mode :
+         {/* no permute_mode::none */ permute_mode::rows, permute_mode::columns,
+          permute_mode::symmetric, permute_mode::inverse_rows,
+          permute_mode::inverse_columns, permute_mode::inverse_symmetric}) {
+        SCOPED_TRACE(mode);
+
+        ASSERT_THROW(this->mtx3_sorted->permute(this->perm0, mode),
+                     gko::ValueMismatch);
+    }
+}
+
+
+TYPED_TEST(Csr, NonsymmPermute)
+{
+    auto permuted = this->mtx3_sorted->permute(this->perm3, this->perm3_rev);
+    auto ref_permuted = ref_permute(this->mtx3_sorted.get(), this->perm3.get(),
+                                    this->perm3_rev.get(), false);
+
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
+    GKO_ASSERT_MTX_EQ_SPARSITY(permuted, ref_permuted);
+    ASSERT_TRUE(permuted->is_sorted_by_column_index());
+}
+
+
+TYPED_TEST(Csr, NonsymmPermuteInverse)
+{
+    auto permuted =
+        this->mtx3_sorted->permute(this->perm3, this->perm3_rev, true);
+    auto ref_permuted = ref_permute(this->mtx3_sorted.get(), this->perm3.get(),
+                                    this->perm3_rev.get(), true);
+
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
+    GKO_ASSERT_MTX_EQ_SPARSITY(permuted, ref_permuted);
+    ASSERT_TRUE(permuted->is_sorted_by_column_index());
+}
+
+
+TYPED_TEST(Csr, NonsymmPermuteRectangular)
+{
+    auto permuted = this->mtx2->permute(this->perm2, this->perm3);
+    auto ref_permuted = ref_permute(this->mtx2.get(), this->perm2.get(),
+                                    this->perm3.get(), false);
+
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
+    GKO_ASSERT_MTX_EQ_SPARSITY(permuted, ref_permuted);
+    ASSERT_TRUE(permuted->is_sorted_by_column_index());
+}
+
+
+TYPED_TEST(Csr, NonsymmPermuteInverseRectangular)
+{
+    auto permuted = this->mtx2->permute(this->perm2, this->perm3, true);
+    auto ref_permuted = ref_permute(this->mtx2.get(), this->perm2.get(),
+                                    this->perm3.get(), true);
+
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
+    GKO_ASSERT_MTX_EQ_SPARSITY(permuted, ref_permuted);
+    ASSERT_TRUE(permuted->is_sorted_by_column_index());
+}
+
+
+TYPED_TEST(Csr, NonsymmPermuteRoundtrip)
+{
+    auto permuted = this->mtx3_sorted->permute(this->perm3, this->perm3_rev)
+                        ->permute(this->perm3, this->perm3_rev, true);
+
+    GKO_ASSERT_MTX_NEAR(this->mtx3_sorted, permuted, 0.0);
+    GKO_ASSERT_MTX_EQ_SPARSITY(permuted, this->mtx3_sorted);
+    ASSERT_TRUE(permuted->is_sorted_by_column_index());
+}
+
+
+TYPED_TEST(Csr, NonsymmPermuteFailsWithIncorrectPermutationSize)
+{
+    ASSERT_THROW(this->mtx3_sorted->permute(this->perm0, this->perm3_rev),
+                 gko::ValueMismatch);
+    ASSERT_THROW(this->mtx3_sorted->permute(this->perm3_rev, this->perm0),
+                 gko::ValueMismatch);
+    ASSERT_THROW(this->mtx3_sorted->permute(this->perm0, this->perm0),
+                 gko::ValueMismatch);
+}
+
+
+TYPED_TEST(Csr, ScaledPermute)
+{
+    using gko::matrix::permute_mode;
+    using value_type = typename TestFixture::value_type;
+
+    for (auto mode :
+         {permute_mode::none, permute_mode::rows, permute_mode::columns,
+          permute_mode::symmetric, permute_mode::inverse_rows,
+          permute_mode::inverse_columns, permute_mode::inverse_symmetric}) {
+        SCOPED_TRACE(mode);
+
+        auto permuted =
+            this->mtx3_sorted->scale_permute(this->scale_perm3, mode);
+        auto ref_permuted =
+            ref_permute(this->mtx3_sorted.get(), this->scale_perm3.get(), mode);
+
+        GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, r<value_type>::value);
+        GKO_ASSERT_MTX_EQ_SPARSITY(permuted, ref_permuted);
+        ASSERT_TRUE(permuted->is_sorted_by_column_index());
+    }
+}
+
+
+TYPED_TEST(Csr, ScaledPermuteRoundtrip)
+{
+    using gko::matrix::permute_mode;
+    using value_type = typename TestFixture::value_type;
+
+    for (auto mode :
+         {permute_mode::rows, permute_mode::columns, permute_mode::symmetric}) {
+        SCOPED_TRACE(mode);
+
+        auto permuted =
+            this->mtx3_sorted->scale_permute(this->scale_perm3, mode)
+                ->scale_permute(this->scale_perm3,
+                                mode | permute_mode::inverse);
+
+        GKO_ASSERT_MTX_NEAR(this->mtx3_sorted, permuted, r<value_type>::value);
+        GKO_ASSERT_MTX_EQ_SPARSITY(permuted, this->mtx3_sorted);
+        ASSERT_TRUE(permuted->is_sorted_by_column_index());
+    }
+}
+
+
+TYPED_TEST(Csr, ScaledPermuteRectangular)
+{
+    using gko::matrix::permute_mode;
+    using value_type = typename TestFixture::value_type;
+
+    auto rpermuted =
+        this->mtx2->scale_permute(this->scale_perm2, permute_mode::rows);
+    auto irpermuted = this->mtx2->scale_permute(this->scale_perm2,
+                                                permute_mode::inverse_rows);
+    auto cpermuted =
+        this->mtx2->scale_permute(this->scale_perm3, permute_mode::columns);
+    auto icpermuted = this->mtx2->scale_permute(this->scale_perm3,
+                                                permute_mode::inverse_columns);
+    auto ref_rpermuted = ref_permute(this->mtx2.get(), this->scale_perm2.get(),
+                                     permute_mode::rows);
+    auto ref_irpermuted = ref_permute(this->mtx2.get(), this->scale_perm2.get(),
+                                      permute_mode::inverse_rows);
+    auto ref_cpermuted = ref_permute(this->mtx2.get(), this->scale_perm3.get(),
+                                     permute_mode::columns);
+    auto ref_icpermuted = ref_permute(this->mtx2.get(), this->scale_perm3.get(),
+                                      permute_mode::inverse_columns);
+
+    GKO_ASSERT_MTX_NEAR(rpermuted, ref_rpermuted, r<value_type>::value);
+    GKO_ASSERT_MTX_NEAR(irpermuted, ref_irpermuted, r<value_type>::value);
+    GKO_ASSERT_MTX_NEAR(cpermuted, ref_cpermuted, r<value_type>::value);
+    GKO_ASSERT_MTX_NEAR(icpermuted, ref_icpermuted, r<value_type>::value);
+    GKO_ASSERT_MTX_EQ_SPARSITY(rpermuted, ref_rpermuted);
+    GKO_ASSERT_MTX_EQ_SPARSITY(irpermuted, ref_irpermuted);
+    GKO_ASSERT_MTX_EQ_SPARSITY(cpermuted, ref_cpermuted);
+    GKO_ASSERT_MTX_EQ_SPARSITY(icpermuted, ref_icpermuted);
+    ASSERT_TRUE(rpermuted->is_sorted_by_column_index());
+    ASSERT_TRUE(irpermuted->is_sorted_by_column_index());
+    ASSERT_TRUE(cpermuted->is_sorted_by_column_index());
+    ASSERT_TRUE(icpermuted->is_sorted_by_column_index());
+}
+
+
+TYPED_TEST(Csr, ScaledPermuteFailsWithIncorrectPermutationSize)
+{
+    using gko::matrix::permute_mode;
+
+    for (auto mode :
+         {/* no permute_mode::none */ permute_mode::rows, permute_mode::columns,
+          permute_mode::symmetric, permute_mode::inverse_rows,
+          permute_mode::inverse_columns, permute_mode::inverse_symmetric}) {
+        SCOPED_TRACE(mode);
+
+        ASSERT_THROW(this->mtx3_sorted->scale_permute(this->scale_perm0, mode),
+                     gko::ValueMismatch);
+    }
+}
+
+
+TYPED_TEST(Csr, NonsymmScaledPermute)
+{
+    using value_type = typename TestFixture::value_type;
+
+    auto permuted = this->mtx3_sorted->scale_permute(this->scale_perm3,
+                                                     this->scale_perm3_rev);
+    auto ref_permuted =
+        ref_permute(this->mtx3_sorted.get(), this->scale_perm3.get(),
+                    this->scale_perm3_rev.get(), false);
+
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, r<value_type>::value);
+    GKO_ASSERT_MTX_EQ_SPARSITY(permuted, ref_permuted);
+    ASSERT_TRUE(permuted->is_sorted_by_column_index());
+}
+
+
+TYPED_TEST(Csr, NonsymmScaledPermuteInverse)
+{
+    using value_type = typename TestFixture::value_type;
+
+    auto permuted = this->mtx3_sorted->scale_permute(
+        this->scale_perm3, this->scale_perm3_rev, true);
+    auto ref_permuted =
+        ref_permute(this->mtx3_sorted.get(), this->scale_perm3.get(),
+                    this->scale_perm3_rev.get(), true);
+
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, r<value_type>::value);
+    GKO_ASSERT_MTX_EQ_SPARSITY(permuted, ref_permuted);
+    ASSERT_TRUE(permuted->is_sorted_by_column_index());
+}
+
+
+TYPED_TEST(Csr, NonsymmScaledPermuteRectangular)
+{
+    using value_type = typename TestFixture::value_type;
+
+    auto permuted =
+        this->mtx2->scale_permute(this->scale_perm2, this->scale_perm3);
+    auto ref_permuted = ref_permute(this->mtx2.get(), this->scale_perm2.get(),
+                                    this->scale_perm3.get(), false);
+
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, r<value_type>::value);
+    GKO_ASSERT_MTX_EQ_SPARSITY(permuted, ref_permuted);
+    ASSERT_TRUE(permuted->is_sorted_by_column_index());
+}
+
+
+TYPED_TEST(Csr, NonsymmScaledPermuteInverseRectangular)
+{
+    using value_type = typename TestFixture::value_type;
+
+    auto permuted =
+        this->mtx2->scale_permute(this->scale_perm2, this->scale_perm3, true);
+    auto ref_permuted = ref_permute(this->mtx2.get(), this->scale_perm2.get(),
+                                    this->scale_perm3.get(), true);
+
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, r<value_type>::value);
+    GKO_ASSERT_MTX_EQ_SPARSITY(permuted, ref_permuted);
+    ASSERT_TRUE(permuted->is_sorted_by_column_index());
+}
+
+
+TYPED_TEST(Csr, NonsymmScaledPermuteRoundtrip)
+{
+    using value_type = typename TestFixture::value_type;
+
+    auto permuted =
+        this->mtx3_sorted
+            ->scale_permute(this->scale_perm3, this->scale_perm3_rev)
+            ->scale_permute(this->scale_perm3, this->scale_perm3_rev, true);
+
+    GKO_ASSERT_MTX_NEAR(this->mtx3_sorted, permuted, r<value_type>::value);
+    GKO_ASSERT_MTX_EQ_SPARSITY(permuted, this->mtx3_sorted);
+    ASSERT_TRUE(permuted->is_sorted_by_column_index());
+}
+
+
+TYPED_TEST(Csr, NonsymmScaledPermuteFailsWithIncorrectPermutationSize)
+{
+    ASSERT_THROW(this->mtx3_sorted->scale_permute(this->scale_perm0,
+                                                  this->scale_perm3_rev),
+                 gko::ValueMismatch);
+    ASSERT_THROW(this->mtx3_sorted->scale_permute(this->scale_perm3_rev,
+                                                  this->scale_perm0),
+                 gko::ValueMismatch);
+    ASSERT_THROW(
+        this->mtx3_sorted->scale_permute(this->scale_perm0, this->scale_perm0),
+        gko::ValueMismatch);
+}
+
+
 TYPED_TEST(Csr, SquareMatrixIsPermutable)
 {
     using Csr = typename TestFixture::Mtx;
diff --git a/reference/test/matrix/dense_kernels.cpp b/reference/test/matrix/dense_kernels.cpp
index b776f426794..a95359a0ac8 100644
--- a/reference/test/matrix/dense_kernels.cpp
+++ b/reference/test/matrix/dense_kernels.cpp
@@ -51,6 +51,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/matrix/diagonal.hpp>
 #include <ginkgo/core/matrix/ell.hpp>
 #include <ginkgo/core/matrix/hybrid.hpp>
+#include <ginkgo/core/matrix/permutation.hpp>
+#include <ginkgo/core/matrix/scaled_permutation.hpp>
 #include <ginkgo/core/matrix/sellp.hpp>
 #include <ginkgo/core/matrix/sparsity_csr.hpp>
 
@@ -1348,6 +1350,40 @@ class DenseWithIndexType
         typename std::tuple_element<0, decltype(ValueIndexType())>::type;
     using index_type =
         typename std::tuple_element<1, decltype(ValueIndexType())>::type;
+    using Permutation = gko::matrix::Permutation<index_type>;
+    using ScaledPermutation =
+        gko::matrix::ScaledPermutation<value_type, index_type>;
+
+
+    DenseWithIndexType()
+    {
+        perm2 = Permutation::create(this->exec,
+                                    gko::array<index_type>{this->exec, {1, 0}});
+        perm3 = Permutation::create(
+            this->exec, gko::array<index_type>{this->exec, {1, 2, 0}});
+        perm3_rev = Permutation::create(
+            this->exec, gko::array<index_type>{this->exec, {2, 0, 1}});
+        perm0 = Permutation::create(this->exec, 0);
+        scale_perm2 = ScaledPermutation::create(
+            this->exec, gko::array<value_type>{this->exec, {17.0, 19.0}},
+            gko::array<index_type>{this->exec, {1, 0}});
+        scale_perm3 = ScaledPermutation::create(
+            this->exec, gko::array<value_type>{this->exec, {2.0, 3.0, 5.0}},
+            gko::array<index_type>{this->exec, {1, 2, 0}});
+        scale_perm3_rev = ScaledPermutation::create(
+            this->exec, gko::array<value_type>{this->exec, {7.0, 11.0, 13.0}},
+            gko::array<index_type>{this->exec, {2, 0, 1}});
+        scale_perm0 = ScaledPermutation::create(this->exec, 0);
+    }
+
+    std::unique_ptr<Permutation> perm2;
+    std::unique_ptr<Permutation> perm3;
+    std::unique_ptr<Permutation> perm3_rev;
+    std::unique_ptr<Permutation> perm0;
+    std::unique_ptr<ScaledPermutation> scale_perm2;
+    std::unique_ptr<ScaledPermutation> scale_perm3;
+    std::unique_ptr<ScaledPermutation> scale_perm3_rev;
+    std::unique_ptr<ScaledPermutation> scale_perm0;
 };
 
 TYPED_TEST_SUITE(DenseWithIndexType, gko::test::ValueIndexTypes,
@@ -2230,6 +2266,286 @@ TYPED_TEST(DenseWithIndexType, MovesEmptyToSellp)
 }
 
 
+template <typename ValueType, typename IndexType>
+std::unique_ptr<gko::matrix::Dense<ValueType>> ref_permute(
+    gko::matrix::Dense<ValueType>* input,
+    gko::matrix::Permutation<IndexType>* permutation,
+    gko::matrix::permute_mode mode)
+{
+    using gko::matrix::permute_mode;
+    auto result = input->clone();
+    auto permutation_dense =
+        gko::matrix::Dense<double>::create(input->get_executor());
+    gko::matrix_data<double, IndexType> permutation_data;
+    if ((mode & permute_mode::inverse) == permute_mode::inverse) {
+        permutation->invert()->write(permutation_data);
+    } else {
+        permutation->write(permutation_data);
+    }
+    permutation_dense->read(permutation_data);
+    if ((mode & permute_mode::rows) == permute_mode::rows) {
+        // compute P * A
+        permutation_dense->apply(input, result);
+    }
+    if ((mode & permute_mode::columns) == permute_mode::columns) {
+        // compute A * P^T = (P * A^T)^T
+        auto tmp = result->transpose();
+        auto tmp2 = gko::as<gko::matrix::Dense<ValueType>>(tmp->clone());
+        permutation_dense->apply(tmp, tmp2);
+        tmp2->transpose(result);
+    }
+    return result;
+}
+
+
+template <typename ValueType, typename IndexType>
+std::unique_ptr<gko::matrix::Dense<ValueType>> ref_permute(
+    gko::matrix::Dense<ValueType>* input,
+    gko::matrix::Permutation<IndexType>* row_permutation,
+    gko::matrix::Permutation<IndexType>* col_permutation, bool invert)
+{
+    using gko::matrix::permute_mode;
+    auto result = input->clone();
+    auto row_permutation_dense =
+        gko::matrix::Dense<double>::create(input->get_executor());
+    auto col_permutation_dense =
+        gko::matrix::Dense<double>::create(input->get_executor());
+    gko::matrix_data<double, IndexType> row_permutation_data;
+    gko::matrix_data<double, IndexType> col_permutation_data;
+    if (invert) {
+        row_permutation->invert()->write(row_permutation_data);
+        col_permutation->invert()->write(col_permutation_data);
+    } else {
+        row_permutation->write(row_permutation_data);
+        col_permutation->write(col_permutation_data);
+    }
+    row_permutation_dense->read(row_permutation_data);
+    col_permutation_dense->read(col_permutation_data);
+    row_permutation_dense->apply(input, result);
+    auto tmp = result->transpose();
+    auto tmp2 = gko::as<gko::matrix::Dense<ValueType>>(tmp->clone());
+    col_permutation_dense->apply(tmp, tmp2);
+    tmp2->transpose(result);
+    return result;
+}
+
+
+TYPED_TEST(DenseWithIndexType, Permute)
+{
+    using gko::matrix::permute_mode;
+
+    for (auto mode :
+         {permute_mode::none, permute_mode::rows, permute_mode::columns,
+          permute_mode::symmetric, permute_mode::inverse_rows,
+          permute_mode::inverse_columns, permute_mode::inverse_symmetric}) {
+        SCOPED_TRACE(mode);
+
+        auto permuted = this->mtx5->permute(this->perm3, mode);
+        auto ref_permuted =
+            ref_permute(this->mtx5.get(), this->perm3.get(), mode);
+
+        GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
+    }
+}
+
+
+TYPED_TEST(DenseWithIndexType, PermuteRoundtrip)
+{
+    using gko::matrix::permute_mode;
+
+    for (auto mode :
+         {permute_mode::rows, permute_mode::columns, permute_mode::symmetric}) {
+        SCOPED_TRACE(mode);
+
+        auto permuted =
+            this->mtx5->permute(this->perm3, mode)
+                ->permute(this->perm3, mode | permute_mode::inverse);
+
+        GKO_ASSERT_MTX_NEAR(this->mtx5, permuted, 0.0);
+    }
+}
+
+
+TYPED_TEST(DenseWithIndexType, PermuteStridedIntoDense)
+{
+    using gko::matrix::permute_mode;
+    using Mtx = typename TestFixture::Mtx;
+    auto mtx = Mtx::create(this->exec, this->mtx5->get_size(),
+                           this->mtx5->get_size()[1] + 1);
+    mtx->copy_from(this->mtx5);
+
+    for (auto mode :
+         {permute_mode::none, permute_mode::rows, permute_mode::columns,
+          permute_mode::symmetric, permute_mode::inverse,
+          permute_mode::inverse_rows, permute_mode::inverse_columns,
+          permute_mode::inverse_symmetric}) {
+        SCOPED_TRACE(mode);
+        auto permuted = Mtx::create(this->exec, this->mtx5->get_size(),
+                                    this->mtx5->get_size()[1] + 2);
+
+        this->mtx5->permute(this->perm3, permuted, mode);
+        auto ref_permuted =
+            ref_permute(this->mtx5.get(), this->perm3.get(), mode);
+
+        GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
+    }
+}
+
+
+TYPED_TEST(DenseWithIndexType, PermuteRectangular)
+{
+    using gko::matrix::permute_mode;
+
+    auto rpermuted = this->mtx1->permute(this->perm2, permute_mode::rows);
+    auto irpermuted =
+        this->mtx1->permute(this->perm2, permute_mode::inverse_rows);
+    auto cpermuted = this->mtx1->permute(this->perm3, permute_mode::columns);
+    auto icpermuted =
+        this->mtx1->permute(this->perm3, permute_mode::inverse_columns);
+    auto ref_rpermuted =
+        ref_permute(this->mtx1.get(), this->perm2.get(), permute_mode::rows);
+    auto ref_irpermuted = ref_permute(this->mtx1.get(), this->perm2.get(),
+                                      permute_mode::inverse_rows);
+    auto ref_cpermuted =
+        ref_permute(this->mtx1.get(), this->perm3.get(), permute_mode::columns);
+    auto ref_icpermuted = ref_permute(this->mtx1.get(), this->perm3.get(),
+                                      permute_mode::inverse_columns);
+
+    GKO_ASSERT_MTX_NEAR(rpermuted, ref_rpermuted, 0.0);
+    GKO_ASSERT_MTX_NEAR(irpermuted, ref_irpermuted, 0.0);
+    GKO_ASSERT_MTX_NEAR(cpermuted, ref_cpermuted, 0.0);
+    GKO_ASSERT_MTX_NEAR(icpermuted, ref_icpermuted, 0.0);
+}
+
+
+TYPED_TEST(DenseWithIndexType, PermuteFailsWithIncorrectPermutationSize)
+{
+    using gko::matrix::permute_mode;
+
+    for (auto mode :
+         {/* no permute_mode::none */ permute_mode::rows, permute_mode::columns,
+          permute_mode::symmetric, permute_mode::inverse_rows,
+          permute_mode::inverse_columns, permute_mode::inverse_symmetric}) {
+        SCOPED_TRACE(mode);
+
+        ASSERT_THROW(this->mtx5->permute(this->perm0, mode),
+                     gko::ValueMismatch);
+    }
+}
+
+
+TYPED_TEST(DenseWithIndexType, PermuteFailsWithIncorrectOutputSize)
+{
+    using gko::matrix::permute_mode;
+    using Mtx = typename TestFixture::Mtx;
+    auto output = Mtx::create(this->exec);
+
+    for (auto mode :
+         {permute_mode::none, permute_mode::rows, permute_mode::columns,
+          permute_mode::symmetric, permute_mode::inverse_rows,
+          permute_mode::inverse_columns, permute_mode::inverse_symmetric}) {
+        SCOPED_TRACE(mode);
+
+        ASSERT_THROW(this->mtx5->permute(this->perm3, output, mode),
+                     gko::DimensionMismatch);
+    }
+}
+
+
+TYPED_TEST(DenseWithIndexType, NonsymmPermute)
+{
+    auto permuted = this->mtx5->permute(this->perm3, this->perm3_rev);
+    auto ref_permuted = ref_permute(this->mtx5.get(), this->perm3.get(),
+                                    this->perm3_rev.get(), false);
+
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
+}
+
+
+TYPED_TEST(DenseWithIndexType, NonsymmPermuteInverse)
+{
+    auto permuted = this->mtx5->permute(this->perm3, this->perm3_rev, true);
+    auto ref_permuted = ref_permute(this->mtx5.get(), this->perm3.get(),
+                                    this->perm3_rev.get(), true);
+
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
+}
+
+
+TYPED_TEST(DenseWithIndexType, NonsymmPermuteRectangular)
+{
+    auto permuted = this->mtx1->permute(this->perm2, this->perm3);
+    auto ref_permuted = ref_permute(this->mtx1.get(), this->perm2.get(),
+                                    this->perm3.get(), false);
+
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
+}
+
+
+TYPED_TEST(DenseWithIndexType, NonsymmPermuteInverseRectangular)
+{
+    auto permuted = this->mtx1->permute(this->perm2, this->perm3, true);
+    auto ref_permuted = ref_permute(this->mtx1.get(), this->perm2.get(),
+                                    this->perm3.get(), true);
+
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
+}
+
+
+TYPED_TEST(DenseWithIndexType, NonsymmPermuteRoundtrip)
+{
+    auto permuted = this->mtx5->permute(this->perm3, this->perm3_rev)
+                        ->permute(this->perm3, this->perm3_rev, true);
+
+    GKO_ASSERT_MTX_NEAR(this->mtx5, permuted, 0.0);
+}
+
+
+TYPED_TEST(DenseWithIndexType, NonsymmPermuteStridedIntoDense)
+{
+    using Mtx = typename TestFixture::Mtx;
+    auto mtx = Mtx::create(this->exec, this->mtx5->get_size(),
+                           this->mtx5->get_size()[1] + 1);
+    auto permuted = Mtx::create(this->exec, this->mtx5->get_size(),
+                                this->mtx5->get_size()[1] + 2);
+    mtx->copy_from(this->mtx5);
+
+    mtx->permute(this->perm3, this->perm3_rev, permuted);
+    auto ref_permuted = ref_permute(this->mtx5.get(), this->perm3.get(),
+                                    this->perm3_rev.get(), false);
+
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
+}
+
+
+TYPED_TEST(DenseWithIndexType, NonsymmPermuteInverseStridedIntoDense)
+{
+    using Mtx = typename TestFixture::Mtx;
+    auto mtx = Mtx::create(this->exec, this->mtx5->get_size(),
+                           this->mtx5->get_size()[1] + 1);
+    auto permuted = Mtx::create(this->exec, this->mtx5->get_size(),
+                                this->mtx5->get_size()[1] + 2);
+    mtx->copy_from(this->mtx5);
+
+    mtx->permute(this->perm3, this->perm3_rev, permuted, true);
+    auto ref_permuted = ref_permute(this->mtx5.get(), this->perm3.get(),
+                                    this->perm3_rev.get(), true);
+
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
+}
+
+
+TYPED_TEST(DenseWithIndexType, NonsymmPermuteFailsWithIncorrectPermutationSize)
+{
+    ASSERT_THROW(this->mtx5->permute(this->perm0, this->perm3_rev),
+                 gko::ValueMismatch);
+    ASSERT_THROW(this->mtx5->permute(this->perm3_rev, this->perm0),
+                 gko::ValueMismatch);
+    ASSERT_THROW(this->mtx5->permute(this->perm0, this->perm0),
+                 gko::ValueMismatch);
+}
+
+
 TYPED_TEST(DenseWithIndexType, SquareMatrixCanGatherRows)
 {
     using Mtx = typename TestFixture::Mtx;
@@ -2907,6 +3223,331 @@ TYPED_TEST(DenseWithIndexType,
 }
 
 
+template <typename ValueType, typename IndexType>
+std::unique_ptr<gko::matrix::Dense<ValueType>> ref_scaled_permute(
+    gko::matrix::Dense<ValueType>* input,
+    gko::matrix::ScaledPermutation<ValueType, IndexType>* permutation,
+    gko::matrix::permute_mode mode)
+{
+    using gko::matrix::permute_mode;
+    auto result = input->clone();
+    auto permutation_dense =
+        gko::matrix::Dense<ValueType>::create(input->get_executor());
+    gko::matrix_data<ValueType, IndexType> permutation_data;
+    if ((mode & permute_mode::inverse) == permute_mode::inverse) {
+        permutation->invert()->write(permutation_data);
+    } else {
+        permutation->write(permutation_data);
+    }
+    permutation_dense->read(permutation_data);
+    if ((mode & permute_mode::rows) == permute_mode::rows) {
+        // compute P * A
+        permutation_dense->apply(input, result);
+    }
+    if ((mode & permute_mode::columns) == permute_mode::columns) {
+        // compute A * P^T = (P * A^T)^T
+        auto tmp = result->transpose();
+        auto tmp2 = gko::as<gko::matrix::Dense<ValueType>>(tmp->clone());
+        permutation_dense->apply(tmp, tmp2);
+        tmp2->transpose(result);
+    }
+    return result;
+}
+
+
+template <typename ValueType, typename IndexType>
+std::unique_ptr<gko::matrix::Dense<ValueType>> ref_scaled_permute(
+    gko::matrix::Dense<ValueType>* input,
+    gko::matrix::ScaledPermutation<ValueType, IndexType>* row_permutation,
+    gko::matrix::ScaledPermutation<ValueType, IndexType>* col_permutation,
+    bool invert)
+{
+    using gko::matrix::permute_mode;
+    auto result = input->clone();
+    auto row_permutation_dense =
+        gko::matrix::Dense<ValueType>::create(input->get_executor());
+    auto col_permutation_dense =
+        gko::matrix::Dense<ValueType>::create(input->get_executor());
+    gko::matrix_data<ValueType, IndexType> row_permutation_data;
+    gko::matrix_data<ValueType, IndexType> col_permutation_data;
+    if (invert) {
+        row_permutation->invert()->write(row_permutation_data);
+        col_permutation->invert()->write(col_permutation_data);
+    } else {
+        row_permutation->write(row_permutation_data);
+        col_permutation->write(col_permutation_data);
+    }
+    row_permutation_dense->read(row_permutation_data);
+    col_permutation_dense->read(col_permutation_data);
+    row_permutation_dense->apply(input, result);
+    auto tmp = result->transpose();
+    auto tmp2 = gko::as<gko::matrix::Dense<ValueType>>(tmp->clone());
+    col_permutation_dense->apply(tmp, tmp2);
+    tmp2->transpose(result);
+    return result;
+}
+
+
+TYPED_TEST(DenseWithIndexType, ScaledPermute)
+{
+    using gko::matrix::permute_mode;
+    using value_type = typename TestFixture::value_type;
+
+    for (auto mode :
+         {permute_mode::none, permute_mode::rows, permute_mode::columns,
+          permute_mode::symmetric, permute_mode::inverse_rows,
+          permute_mode::inverse_columns, permute_mode::inverse_symmetric}) {
+        SCOPED_TRACE(mode);
+
+        auto permuted = this->mtx5->scale_permute(this->scale_perm3, mode);
+        auto ref_permuted =
+            ref_scaled_permute(this->mtx5.get(), this->scale_perm3.get(), mode);
+
+        GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, r<value_type>::value);
+    }
+}
+
+
+TYPED_TEST(DenseWithIndexType, ScaledPermuteRoundtrip)
+{
+    using gko::matrix::permute_mode;
+    using value_type = typename TestFixture::value_type;
+
+    for (auto mode :
+         {permute_mode::rows, permute_mode::columns, permute_mode::symmetric}) {
+        SCOPED_TRACE(mode);
+
+        auto permuted = this->mtx5->scale_permute(this->scale_perm3, mode)
+                            ->scale_permute(this->scale_perm3,
+                                            mode | permute_mode::inverse);
+
+        GKO_ASSERT_MTX_NEAR(this->mtx5, permuted, r<value_type>::value);
+    }
+}
+
+
+TYPED_TEST(DenseWithIndexType, ScaledPermuteStridedIntoDense)
+{
+    using gko::matrix::permute_mode;
+    using value_type = typename TestFixture::value_type;
+    using Mtx = typename TestFixture::Mtx;
+    auto mtx = Mtx::create(this->exec, this->mtx5->get_size(),
+                           this->mtx5->get_size()[1] + 1);
+    mtx->copy_from(this->mtx5);
+
+    for (auto mode :
+         {permute_mode::none, permute_mode::rows, permute_mode::columns,
+          permute_mode::symmetric, permute_mode::inverse,
+          permute_mode::inverse_rows, permute_mode::inverse_columns,
+          permute_mode::inverse_symmetric}) {
+        SCOPED_TRACE(mode);
+        auto permuted = Mtx::create(this->exec, this->mtx5->get_size(),
+                                    this->mtx5->get_size()[1] + 2);
+
+        this->mtx5->scale_permute(this->scale_perm3, permuted, mode);
+        auto ref_permuted =
+            ref_scaled_permute(this->mtx5.get(), this->scale_perm3.get(), mode);
+
+        GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, r<value_type>::value);
+    }
+}
+
+
+TYPED_TEST(DenseWithIndexType, ScaledPermuteRectangular)
+{
+    using gko::matrix::permute_mode;
+    using value_type = typename TestFixture::value_type;
+
+    auto rpermuted =
+        this->mtx1->scale_permute(this->scale_perm2, permute_mode::rows);
+    auto irpermuted = this->mtx1->scale_permute(this->scale_perm2,
+                                                permute_mode::inverse_rows);
+    auto cpermuted =
+        this->mtx1->scale_permute(this->scale_perm3, permute_mode::columns);
+    auto icpermuted = this->mtx1->scale_permute(this->scale_perm3,
+                                                permute_mode::inverse_columns);
+    auto ref_rpermuted = ref_scaled_permute(
+        this->mtx1.get(), this->scale_perm2.get(), permute_mode::rows);
+    auto ref_irpermuted = ref_scaled_permute(
+        this->mtx1.get(), this->scale_perm2.get(), permute_mode::inverse_rows);
+    auto ref_cpermuted = ref_scaled_permute(
+        this->mtx1.get(), this->scale_perm3.get(), permute_mode::columns);
+    auto ref_icpermuted =
+        ref_scaled_permute(this->mtx1.get(), this->scale_perm3.get(),
+                           permute_mode::inverse_columns);
+
+    GKO_ASSERT_MTX_NEAR(rpermuted, ref_rpermuted, r<value_type>::value);
+    GKO_ASSERT_MTX_NEAR(irpermuted, ref_irpermuted, r<value_type>::value);
+    GKO_ASSERT_MTX_NEAR(cpermuted, ref_cpermuted, r<value_type>::value);
+    GKO_ASSERT_MTX_NEAR(icpermuted, ref_icpermuted, r<value_type>::value);
+}
+
+
+TYPED_TEST(DenseWithIndexType, ScaledPermuteFailsWithIncorrectPermutationSize)
+{
+    using gko::matrix::permute_mode;
+
+    for (auto mode :
+         {/* no permute_mode::none */ permute_mode::rows, permute_mode::columns,
+          permute_mode::symmetric, permute_mode::inverse_rows,
+          permute_mode::inverse_columns, permute_mode::inverse_symmetric}) {
+        SCOPED_TRACE(mode);
+
+        ASSERT_THROW(this->mtx5->scale_permute(this->scale_perm0, mode),
+                     gko::ValueMismatch);
+    }
+}
+
+
+TYPED_TEST(DenseWithIndexType, ScaledPermuteFailsWithIncorrectOutputSize)
+{
+    using gko::matrix::permute_mode;
+    using Mtx = typename TestFixture::Mtx;
+    auto output = Mtx::create(this->exec);
+
+    for (auto mode :
+         {permute_mode::none, permute_mode::rows, permute_mode::columns,
+          permute_mode::symmetric, permute_mode::inverse_rows,
+          permute_mode::inverse_columns, permute_mode::inverse_symmetric}) {
+        SCOPED_TRACE(mode);
+
+        ASSERT_THROW(this->mtx5->scale_permute(this->scale_perm3, output, mode),
+                     gko::DimensionMismatch);
+    }
+}
+
+
+TYPED_TEST(DenseWithIndexType, NonsymmScaledPermute)
+{
+    using value_type = typename TestFixture::value_type;
+
+    auto permuted =
+        this->mtx5->scale_permute(this->scale_perm3, this->scale_perm3_rev);
+    auto ref_permuted =
+        ref_scaled_permute(this->mtx5.get(), this->scale_perm3.get(),
+                           this->scale_perm3_rev.get(), false);
+
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, r<value_type>::value);
+}
+
+
+TYPED_TEST(DenseWithIndexType, NonsymmScaledPermuteInverse)
+{
+    using value_type = typename TestFixture::value_type;
+
+    auto permuted = this->mtx5->scale_permute(this->scale_perm3,
+                                              this->scale_perm3_rev, true);
+    auto ref_permuted =
+        ref_scaled_permute(this->mtx5.get(), this->scale_perm3.get(),
+                           this->scale_perm3_rev.get(), true);
+
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, r<value_type>::value);
+}
+
+
+TYPED_TEST(DenseWithIndexType, NonsymmScaledPermuteRectangular)
+{
+    using value_type = typename TestFixture::value_type;
+
+    auto permuted =
+        this->mtx1->scale_permute(this->scale_perm2, this->scale_perm3);
+    auto ref_permuted =
+        ref_scaled_permute(this->mtx1.get(), this->scale_perm2.get(),
+                           this->scale_perm3.get(), false);
+
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, r<value_type>::value);
+}
+
+
+TYPED_TEST(DenseWithIndexType, NonsymmScaledPermuteInverseRectangular)
+{
+    using value_type = typename TestFixture::value_type;
+
+    auto permuted =
+        this->mtx1->scale_permute(this->scale_perm2, this->scale_perm3, true);
+    auto ref_permuted =
+        ref_scaled_permute(this->mtx1.get(), this->scale_perm2.get(),
+                           this->scale_perm3.get(), true);
+
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, r<value_type>::value);
+}
+
+
+TYPED_TEST(DenseWithIndexType, NonsymmScaledPermuteRoundtrip)
+{
+    using value_type = typename TestFixture::value_type;
+
+    auto permuted =
+        this->mtx5->scale_permute(this->scale_perm3, this->scale_perm3_rev)
+            ->scale_permute(this->scale_perm3, this->scale_perm3_rev, true);
+
+    GKO_ASSERT_MTX_NEAR(this->mtx5, permuted, r<value_type>::value);
+}
+
+
+TYPED_TEST(DenseWithIndexType, NonsymmScaledPermuteStridedIntoDense)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using value_type = typename TestFixture::value_type;
+    auto mtx = Mtx::create(this->exec, this->mtx5->get_size(),
+                           this->mtx5->get_size()[1] + 1);
+    auto permuted = Mtx::create(this->exec, this->mtx5->get_size(),
+                                this->mtx5->get_size()[1] + 2);
+    mtx->copy_from(this->mtx5);
+
+    mtx->scale_permute(this->scale_perm3, this->scale_perm3_rev, permuted);
+    auto ref_permuted =
+        ref_scaled_permute(this->mtx5.get(), this->scale_perm3.get(),
+                           this->scale_perm3_rev.get(), false);
+
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, r<value_type>::value);
+}
+
+
+TYPED_TEST(DenseWithIndexType, NonsymmScaledPermuteInverseStridedIntoDense)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using value_type = typename TestFixture::value_type;
+    auto mtx = Mtx::create(this->exec, this->mtx5->get_size(),
+                           this->mtx5->get_size()[1] + 1);
+    auto permuted = Mtx::create(this->exec, this->mtx5->get_size(),
+                                this->mtx5->get_size()[1] + 2);
+    mtx->copy_from(this->mtx5);
+
+    mtx->scale_permute(this->scale_perm3, this->scale_perm3_rev, permuted,
+                       true);
+    auto ref_permuted =
+        ref_scaled_permute(this->mtx5.get(), this->scale_perm3.get(),
+                           this->scale_perm3_rev.get(), true);
+
+    GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, r<value_type>::value);
+}
+
+
+TYPED_TEST(DenseWithIndexType, NonsymmScaledPermuteFailsWithIncorrectOutputSize)
+{
+    ASSERT_THROW(
+        this->mtx5->scale_permute(this->scale_perm3, this->scale_perm3,
+                                  TestFixture::Mtx::create(this->exec)),
+        gko::DimensionMismatch);
+}
+
+
+TYPED_TEST(DenseWithIndexType,
+           NonsymmScaledPermuteFailsWithIncorrectPermutationSize)
+{
+    ASSERT_THROW(
+        this->mtx5->scale_permute(this->scale_perm0, this->scale_perm3_rev),
+        gko::ValueMismatch);
+    ASSERT_THROW(
+        this->mtx5->scale_permute(this->scale_perm3_rev, this->scale_perm0),
+        gko::ValueMismatch);
+    ASSERT_THROW(
+        this->mtx5->scale_permute(this->scale_perm0, this->scale_perm0),
+        gko::ValueMismatch);
+}
+
+
 template <typename T>
 class DenseComplex : public ::testing::Test {
 protected:
diff --git a/reference/test/matrix/permutation.cpp b/reference/test/matrix/permutation.cpp
index 2bd2e3d9741..65e092dfcd5 100644
--- a/reference/test/matrix/permutation.cpp
+++ b/reference/test/matrix/permutation.cpp
@@ -37,8 +37,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/executor.hpp>
-#include <ginkgo/core/base/range.hpp>
-#include <ginkgo/core/matrix/csr.hpp>
 #include <ginkgo/core/matrix/dense.hpp>
 
 
@@ -51,12 +49,11 @@ namespace {
 template <typename ValueIndexType>
 class Permutation : public ::testing::Test {
 protected:
-    using v_type =
+    using value_type =
         typename std::tuple_element<0, decltype(ValueIndexType())>::type;
-    using i_type =
+    using index_type =
         typename std::tuple_element<1, decltype(ValueIndexType())>::type;
-    using Vec = gko::matrix::Dense<v_type>;
-    using Csr = gko::matrix::Csr<v_type, i_type>;
+    using Vec = gko::matrix::Dense<value_type>;
 
     Permutation() : exec(gko::ReferenceExecutor::create()) {}
 
@@ -67,413 +64,53 @@ TYPED_TEST_SUITE(Permutation, gko::test::ValueIndexTypes,
                  PairTypenameNameGenerator);
 
 
-TYPED_TEST(Permutation, AppliesRowPermutationToDense)
+TYPED_TEST(Permutation, Invert)
 {
-    using i_type = typename TestFixture::i_type;
-    using T = typename TestFixture::v_type;
-    using Vec = typename TestFixture::Vec;
-    // clang-format off
-    auto x = gko::initialize<Vec>(
-        {I<T>{2.0, 3.0},
-         I<T>{4.0, 2.5}}, this->exec);
-    // clang-format on
-    auto y = Vec::create(this->exec, gko::dim<2>{2});
-    i_type rdata[] = {1, 0};
+    using index_type = typename TestFixture::index_type;
+    auto perm = gko::matrix::Permutation<index_type>::create(
+        this->exec, 3, gko::array<index_type>{this->exec, {1, 2, 0}});
 
-    auto perm = gko::matrix::Permutation<i_type>::create(
-        this->exec, gko::dim<2>{2}, gko::make_array_view(this->exec, 2, rdata));
+    auto inv = perm->invert();
 
-    perm->apply(x, y);
-    // clang-format off
-    GKO_ASSERT_MTX_NEAR(y,
-                        l({{4.0, 2.5},
-                           {2.0, 3.0}}),
-                        0.0);
-    // clang-format on
+    EXPECT_EQ(inv->get_const_permutation()[0], 2);
+    EXPECT_EQ(inv->get_const_permutation()[1], 0);
+    EXPECT_EQ(inv->get_const_permutation()[2], 1);
 }
 
 
-TYPED_TEST(Permutation, AppliesColPermutationToDense)
+TYPED_TEST(Permutation, Write)
 {
-    using i_type = typename TestFixture::i_type;
-    using T = typename TestFixture::v_type;
-    using Vec = typename TestFixture::Vec;
-    // clang-format off
-    auto x = gko::initialize<Vec>(
-        {I<T>{2.0, 3.0},
-         I<T>{4.0, 2.5}}, this->exec);
-    // clang-format on
-    auto y = Vec::create(this->exec, gko::dim<2>{2});
-    i_type rdata[] = {1, 0};
+    using index_type = typename TestFixture::index_type;
+    auto perm = gko::matrix::Permutation<index_type>::create(
+        this->exec, 3, gko::array<index_type>{this->exec, {1, 2, 0}});
 
-    auto perm = gko::matrix::Permutation<i_type>::create(
-        this->exec, gko::dim<2>{2}, gko::make_array_view(this->exec, 2, rdata),
-        gko::matrix::column_permute);
-
-    perm->apply(x, y);
-    // clang-format off
-    GKO_ASSERT_MTX_NEAR(y,
-                        l({{3.0, 2.0},
-                           {2.5, 4.0}}),
-                        0.0);
-    // clang-format on
+    GKO_ASSERT_MTX_NEAR(
+        perm, l<double>({{0.0, 1.0, 0.0}, {0.0, 0.0, 1.0}, {1.0, 0.0, 0.0}}),
+        0.0);
 }
 
 
-TYPED_TEST(Permutation, AppliesRowAndColPermutationToDense)
+TYPED_TEST(Permutation, AppliesRowPermutationToDense)
 {
-    using i_type = typename TestFixture::i_type;
-    using T = typename TestFixture::v_type;
+    using index_type = typename TestFixture::index_type;
+    using T = typename TestFixture::value_type;
     using Vec = typename TestFixture::Vec;
     // clang-format off
     auto x = gko::initialize<Vec>(
         {I<T>{2.0, 3.0},
          I<T>{4.0, 2.5}}, this->exec);
     // clang-format on
-    auto y1 = Vec::create(this->exec, gko::dim<2>{2});
-    auto y2 = Vec::create(this->exec, gko::dim<2>{2});
-    i_type cdata[] = {1, 0};
-    i_type rdata[] = {1, 0};
+    auto y = Vec::create(this->exec, gko::dim<2>{2});
+    index_type rdata[] = {1, 0};
 
-    auto rperm = gko::matrix::Permutation<i_type>::create(
+    auto perm = gko::matrix::Permutation<index_type>::create(
         this->exec, gko::dim<2>{2}, gko::make_array_view(this->exec, 2, rdata));
-    auto cperm = gko::matrix::Permutation<i_type>::create(
-        this->exec, gko::dim<2>{2}, gko::make_array_view(this->exec, 2, cdata),
-        gko::matrix::column_permute);
-
-    rperm->apply(x, y1);
-    cperm->apply(y1, y2);
-    // clang-format off
-    GKO_ASSERT_MTX_NEAR(y2,
-                        l({{2.5, 4.0},
-                           {3.0, 2.0}}),
-                        0.0);
-    // clang-format on
-}
-
-
-TYPED_TEST(Permutation, AppliesRowAndColPermutationToDenseWithOneArray)
-{
-    using i_type = typename TestFixture::i_type;
-    using T = typename TestFixture::v_type;
-    using Vec = typename TestFixture::Vec;
-    // clang-format off
-    auto x = gko::initialize<Vec>(
-        {I<T>{2.0, 3.0},
-         I<T>{4.0, 2.5}}, this->exec);
-    // clang-format on
-    auto y1 = Vec::create(this->exec, gko::dim<2>{2});
-    i_type data[] = {1, 0};
-
-    auto perm = gko::matrix::Permutation<i_type>::create(
-        this->exec, gko::dim<2>{2}, gko::make_array_view(this->exec, 2, data),
-        gko::matrix::row_permute | gko::matrix::column_permute);
-
-    perm->apply(x, y1);
-    // clang-format off
-    GKO_ASSERT_MTX_NEAR(y1,
-                        l({{2.5, 4.0},
-                           {3.0, 2.0}}),
-                        0.0);
-    // clang-format on
-}
-
-
-TYPED_TEST(Permutation, AppliesInverseRowAndColPermutationToDense)
-{
-    using i_type = typename TestFixture::i_type;
-    using Vec = typename TestFixture::Vec;
-    // clang-format off
-    auto x = gko::initialize<Vec>({{2.0, 3.0, 0.0},
-                                  {0.0, 1.0, 0.0},
-                                  {0.0, 4.0, 2.5}},
-                                  this->exec);
-    // clang-format on
-    auto y1 = Vec::create(this->exec, gko::dim<2>{3});
-    auto y2 = Vec::create(this->exec, gko::dim<2>{3});
-    i_type cdata[] = {1, 2, 0};
-    i_type rdata[] = {1, 2, 0};
-
-    auto rperm = gko::matrix::Permutation<i_type>::create(
-        this->exec, gko::dim<2>{3}, gko::make_array_view(this->exec, 3, rdata),
-        gko::matrix::row_permute | gko::matrix::inverse_permute);
-    auto cperm = gko::matrix::Permutation<i_type>::create(
-        this->exec, gko::dim<2>{3}, gko::make_array_view(this->exec, 3, cdata),
-        gko::matrix::inverse_permute | gko::matrix::column_permute);
-
-    rperm->apply(x, y1);
-    cperm->apply(y1, y2);
-    // clang-format off
-    GKO_ASSERT_MTX_NEAR(y2,
-                        l({{2.5, 0.0, 4.0},
-                           {0.0, 2.0, 3.0},
-                           {0.0, 0.0, 1.0}}),
-                        0.0);
-    // clang-format on
-}
-
-
-TYPED_TEST(Permutation, AppliesInverseRowAndColPermutationToDenseWithOneArray)
-{
-    using i_type = typename TestFixture::i_type;
-    using Vec = typename TestFixture::Vec;
-    // clang-format off
-    auto x = gko::initialize<Vec>({{2.0, 3.0, 0.0},
-                                   {0.0, 1.0, 0.0},
-                                   {0.0, 4.0, 2.5}},
-                                 this->exec);
-    // clang-format on
-    auto y1 = Vec::create(this->exec, gko::dim<2>{3});
-    i_type data[] = {1, 2, 0};
-
-    auto perm = gko::matrix::Permutation<i_type>::create(
-        this->exec, gko::dim<2>{3}, gko::make_array_view(this->exec, 3, data),
-        gko::matrix::column_permute | gko::matrix::row_permute |
-            gko::matrix::inverse_permute);
-
-    perm->apply(x, y1);
-    // clang-format off
-    GKO_ASSERT_MTX_NEAR(y1,
-                        l({{2.5, 0.0, 4.0},
-                           {0.0, 2.0, 3.0},
-                           {0.0, 0.0, 1.0}}),
-                        0.0);
-    // clang-format on
-}
-
-
-TYPED_TEST(Permutation, AppliesInverseRowPermutationToDense)
-{
-    using i_type = typename TestFixture::i_type;
-    using Vec = typename TestFixture::Vec;
-    // clang-format off
-    auto x = gko::initialize<Vec>({{2.0, 3.0, 0.0},
-                                 {0.0, 1.0, 0.0},
-                                 {0.0, 4.0, 2.5}},
-                                this->exec);
-    // clang-format on
-    auto y = Vec::create(this->exec, gko::dim<2>{3});
-    i_type rdata[] = {1, 2, 0};
-
-    auto rperm = gko::matrix::Permutation<i_type>::create(
-        this->exec, gko::dim<2>{3}, gko::make_array_view(this->exec, 3, rdata),
-        gko::matrix::row_permute | gko::matrix::inverse_permute);
-
-    rperm->apply(x, y);
-    // clang-format off
-    GKO_ASSERT_MTX_NEAR(y,
-                        l({{0.0, 4.0, 2.5},
-                           {2.0, 3.0, 0.0},
-                           {0.0, 1.0, 0.0}}),
-                          0.0);
-    // clang-format on
-}
-
-
-TYPED_TEST(Permutation, AppliesInverseColPermutationToDense)
-{
-    using i_type = typename TestFixture::i_type;
-    using Vec = typename TestFixture::Vec;
-    // clang-format off
-    auto x = gko::initialize<Vec>({{2.0, 3.0, 0.0},
-                                   {0.0, 1.0, 0.0},
-                                   {0.0, 4.0, 2.5}},
-                                  this->exec);
-    // clang-format on
-    auto y = Vec::create(this->exec, gko::dim<2>{3});
-    i_type cdata[] = {1, 2, 0};
-
-    auto cperm = gko::matrix::Permutation<i_type>::create(
-        this->exec, gko::dim<2>{3}, gko::make_array_view(this->exec, 3, cdata),
-        gko::matrix::inverse_permute | gko::matrix::column_permute);
-
-    cperm->apply(x, y);
-    // clang-format off
-    GKO_ASSERT_MTX_NEAR(y,
-                      l({{0.0, 2.0, 3.0},
-                         {0.0, 0.0, 1.0},
-                         {2.5, 0.0, 4.0}}),
-                      0.0);
-    // clang-format on
-}
-
-
-TYPED_TEST(Permutation, AppliesRowPermutationToCsr)
-{
-    using i_type = typename TestFixture::i_type;
-    using Csr = typename TestFixture::Csr;
-    // clang-format off
-    auto x = gko::initialize<Csr>(
-                                  {{2.0, 3.0, 0.0},
-                                   {0.0, 1.0, 0.0},
-                                   {0.0, 4.0, 2.5}},
-                                  this->exec);
-    // clang-format on
-    auto y = Csr::create(this->exec, gko::dim<2>{3});
-    i_type rdata[] = {1, 2, 0};
-
-    auto perm = gko::matrix::Permutation<i_type>::create(
-        this->exec, gko::dim<2>{3}, gko::make_array_view(this->exec, 3, rdata));
 
     perm->apply(x, y);
     // clang-format off
     GKO_ASSERT_MTX_NEAR(y,
-                        l({{0.0, 1.0, 0.0},
-                           {0.0, 4.0, 2.5},
-                           {2.0, 3.0, 0.0}}),
-                        0.0);
-    // clang-format on
-}
-
-
-TYPED_TEST(Permutation, AppliesColPermutationToCsr)
-{
-    using i_type = typename TestFixture::i_type;
-    using Csr = typename TestFixture::Csr;
-    // clang-format off
-    auto x = gko::initialize<Csr>(
-                                  {{2.0, 3.0, 0.0},
-                                   {0.0, 1.0, 0.0},
-                                   {0.0, 4.0, 2.5}},
-                                  this->exec);
-    // clang-format on
-    auto y = Csr::create(this->exec, gko::dim<2>{3});
-    i_type cdata[] = {1, 2, 0};
-
-    auto perm = gko::matrix::Permutation<i_type>::create(
-        this->exec, gko::dim<2>{3}, gko::make_array_view(this->exec, 3, cdata),
-        gko::matrix::column_permute);
-
-    perm->apply(x, y);
-    // clang-format off
-    GKO_ASSERT_MTX_NEAR(y,
-                      l({{3.0, 0.0, 2.0},
-                         {1.0, 0.0, 0.0},
-                         {4.0, 2.5, 0.0}}),
-                      0.0);
-    // clang-format on
-}
-
-
-TYPED_TEST(Permutation, AppliesRowAndColPermutationToCsr)
-{
-    using i_type = typename TestFixture::i_type;
-    using Csr = typename TestFixture::Csr;
-    // clang-format off
-    auto x = gko::initialize<Csr>(
-                                  {{2.0, 3.0, 0.0},
-                                   {0.0, 1.0, 0.0},
-                                   {0.0, 4.0, 2.5}},
-                                  this->exec);
-    // clang-format on
-    auto y1 = Csr::create(this->exec, gko::dim<2>{3});
-    auto y2 = Csr::create(this->exec, gko::dim<2>{3});
-    i_type cdata[] = {1, 2, 0};
-    i_type rdata[] = {1, 2, 0};
-
-    auto rperm = gko::matrix::Permutation<i_type>::create(
-        this->exec, gko::dim<2>{3}, gko::make_array_view(this->exec, 3, rdata));
-    auto cperm = gko::matrix::Permutation<i_type>::create(
-        this->exec, gko::dim<2>{3}, gko::make_array_view(this->exec, 3, cdata),
-        gko::matrix::column_permute);
-
-    rperm->apply(x, y1);
-    cperm->apply(y1, y2);
-    // clang-format off
-    GKO_ASSERT_MTX_NEAR(y2,
-                      l({{1.0, 0.0, 0.0},
-                         {4.0, 2.5, 0.0},
-                         {3.0, 0.0, 2.0}}),
-                      0.0);
-    // clang-format on
-}
-
-
-TYPED_TEST(Permutation, AppliesInverseRowPermutationToCsr)
-{
-    using i_type = typename TestFixture::i_type;
-    using Csr = typename TestFixture::Csr;
-    // clang-format off
-    auto x = gko::initialize<Csr>({{2.0, 3.0, 0.0},
-                                   {0.0, 1.0, 0.0},
-                                   {0.0, 4.0, 2.5}},
-                                  this->exec);
-    // clang-format on
-    auto y = Csr::create(this->exec, gko::dim<2>{3});
-    i_type rdata[] = {1, 2, 0};
-
-    auto rperm = gko::matrix::Permutation<i_type>::create(
-        this->exec, gko::dim<2>{3}, gko::make_array_view(this->exec, 3, rdata),
-        gko::matrix::row_permute | gko::matrix::inverse_permute);
-
-    rperm->apply(x, y);
-    // clang-format off
-    GKO_ASSERT_MTX_NEAR(y,
-                        l({{0.0, 4.0, 2.5},
-                           {2.0, 3.0, 0.0},
-                           {0.0, 1.0, 0.0}}),
-                          0.0);
-    // clang-format on
-}
-
-
-TYPED_TEST(Permutation, AppliesInverseColPermutationToCsr)
-{
-    using i_type = typename TestFixture::i_type;
-    using Csr = typename TestFixture::Csr;
-    // clang-format off
-    auto x = gko::initialize<Csr>({{2.0, 3.0, 0.0},
-                                   {0.0, 1.0, 0.0},
-                                   {0.0, 4.0, 2.5}},
-                                  this->exec);
-    // clang-format on
-    auto y = Csr::create(this->exec, gko::dim<2>{3});
-    i_type cdata[] = {1, 2, 0};
-
-    auto cperm = gko::matrix::Permutation<i_type>::create(
-        this->exec, gko::dim<2>{3}, gko::make_array_view(this->exec, 3, cdata),
-        gko::matrix::inverse_permute | gko::matrix::column_permute);
-
-    cperm->apply(x, y);
-    // clang-format off
-    GKO_ASSERT_MTX_NEAR(y,
-                      l({{0.0, 2.0, 3.0},
-                         {0.0, 0.0, 1.0},
-                         {2.5, 0.0, 4.0}}),
-                      0.0);
-    // clang-format on
-}
-
-
-TYPED_TEST(Permutation, AppliesInverseRowAndColPermutationToCsr)
-{
-    using i_type = typename TestFixture::i_type;
-    using Csr = typename TestFixture::Csr;
-    // clang-format off
-    auto x = gko::initialize<Csr>({{2.0, 3.0, 0.0},
-                                   {0.0, 1.0, 0.0},
-                                   {0.0, 4.0, 2.5}},
-                                  this->exec);
-    // clang-format on
-    auto y1 = Csr::create(this->exec, gko::dim<2>{3});
-    auto y2 = Csr::create(this->exec, gko::dim<2>{3});
-    i_type cdata[] = {1, 2, 0};
-    i_type rdata[] = {1, 2, 0};
-
-    auto rperm = gko::matrix::Permutation<i_type>::create(
-        this->exec, gko::dim<2>{3}, gko::make_array_view(this->exec, 3, rdata),
-        gko::matrix::row_permute | gko::matrix::inverse_permute);
-    auto cperm = gko::matrix::Permutation<i_type>::create(
-        this->exec, gko::dim<2>{3}, gko::make_array_view(this->exec, 3, cdata),
-        gko::matrix::inverse_permute | gko::matrix::column_permute);
-
-    rperm->apply(x, y1);
-    cperm->apply(y1, y2);
-    // clang-format off
-    GKO_ASSERT_MTX_NEAR(y2,
-                        l({{2.5, 0.0, 4.0},
-                           {0.0, 2.0, 3.0},
-                           {0.0, 0.0, 1.0}}),
+                        l({{4.0, 2.5},
+                           {2.0, 3.0}}),
                         0.0);
     // clang-format on
 }
diff --git a/reference/test/matrix/scaled_permutation.cpp b/reference/test/matrix/scaled_permutation.cpp
new file mode 100644
index 00000000000..a15c0f09bbf
--- /dev/null
+++ b/reference/test/matrix/scaled_permutation.cpp
@@ -0,0 +1,116 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/matrix/permutation.hpp>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/matrix/dense.hpp>
+#include <ginkgo/core/matrix/scaled_permutation.hpp>
+
+
+#include "core/test/utils.hpp"
+
+
+namespace {
+
+
+template <typename ValueIndexType>
+class ScaledPermutation : public ::testing::Test {
+protected:
+    using value_type =
+        typename std::tuple_element<0, decltype(ValueIndexType())>::type;
+    using index_type =
+        typename std::tuple_element<1, decltype(ValueIndexType())>::type;
+    using Vec = gko::matrix::Dense<value_type>;
+    using Mtx = gko::matrix::ScaledPermutation<value_type, index_type>;
+
+    ScaledPermutation() : exec(gko::ReferenceExecutor::create())
+    {
+        perm3 = Mtx::create(exec,
+                            gko::array<value_type>{this->exec, {1.0, 2.0, 4.0}},
+                            gko::array<index_type>{this->exec, {1, 2, 0}});
+        perm2 =
+            Mtx::create(exec, gko::array<value_type>{this->exec, {3.0, 5.0}},
+                        gko::array<index_type>{this->exec, {1, 0}});
+    }
+
+    std::shared_ptr<const gko::Executor> exec;
+    std::unique_ptr<Mtx> perm3;
+    std::unique_ptr<Mtx> perm2;
+};
+
+TYPED_TEST_SUITE(ScaledPermutation, gko::test::ValueIndexTypes,
+                 PairTypenameNameGenerator);
+
+
+TYPED_TEST(ScaledPermutation, Invert)
+{
+    using T = typename TestFixture::value_type;
+    auto inv = this->perm3->invert();
+
+    EXPECT_EQ(inv->get_const_permutation()[0], 2);
+    EXPECT_EQ(inv->get_const_permutation()[1], 0);
+    EXPECT_EQ(inv->get_const_permutation()[2], 1);
+    EXPECT_EQ(inv->get_const_scale()[0], T{0.25});
+    EXPECT_EQ(inv->get_const_scale()[1], T{1.0});
+    EXPECT_EQ(inv->get_const_scale()[2], T{0.5});
+}
+
+
+TYPED_TEST(ScaledPermutation, Write)
+{
+    using T = typename TestFixture::value_type;
+
+    GKO_ASSERT_MTX_NEAR(
+        this->perm3, l<T>({{0.0, 1.0, 0.0}, {0.0, 0.0, 2.0}, {4.0, 0.0, 0.0}}),
+        0.0);
+}
+
+
+TYPED_TEST(ScaledPermutation, AppliesToDense)
+{
+    using T = typename TestFixture::value_type;
+    using Vec = typename TestFixture::Vec;
+    auto x = gko::initialize<Vec>({I<T>{2.0, 3.0}, I<T>{4.0, 2.5}}, this->exec);
+    auto y = Vec::create(this->exec, gko::dim<2>{2});
+
+    this->perm2->apply(x, y);
+
+    GKO_ASSERT_MTX_NEAR(y, l({{12.0, 7.5}, {10.0, 15.0}}), 0.0);
+}
+
+
+}  // namespace
diff --git a/reference/test/reorder/rcm_kernels.cpp b/reference/test/reorder/rcm_kernels.cpp
index 4c79af9e73a..b23e8bec097 100644
--- a/reference/test/reorder/rcm_kernels.cpp
+++ b/reference/test/reorder/rcm_kernels.cpp
@@ -98,7 +98,7 @@ class Rcm : public ::testing::Test {
 
     static bool is_permutation(const perm_type* input_perm)
     {
-        const auto perm_size = input_perm->get_permutation_size();
+        const auto perm_size = input_perm->get_size()[0];
         auto perm_sorted = std::vector<i_type>(perm_size);
         std::copy_n(input_perm->get_const_permutation(), perm_size,
                     perm_sorted.begin());
diff --git a/test/matrix/CMakeLists.txt b/test/matrix/CMakeLists.txt
index a03a0a0bb4e..d49373811dc 100644
--- a/test/matrix/CMakeLists.txt
+++ b/test/matrix/CMakeLists.txt
@@ -14,5 +14,7 @@ else()
 endif()
 ginkgo_create_common_test(hybrid_kernels)
 ginkgo_create_common_test(matrix)
+ginkgo_create_common_test(permutation_kernels)
+ginkgo_create_common_test(scaled_permutation_kernels)
 ginkgo_create_common_test(sellp_kernels)
 ginkgo_create_common_test(sparsity_csr_kernels)
diff --git a/test/matrix/csr_kernels2.cpp b/test/matrix/csr_kernels2.cpp
index 84b1335c675..9e8355c284d 100644
--- a/test/matrix/csr_kernels2.cpp
+++ b/test/matrix/csr_kernels2.cpp
@@ -48,6 +48,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/matrix/ell.hpp>
 #include <ginkgo/core/matrix/hybrid.hpp>
 #include <ginkgo/core/matrix/identity.hpp>
+#include <ginkgo/core/matrix/permutation.hpp>
+#include <ginkgo/core/matrix/scaled_permutation.hpp>
 #include <ginkgo/core/matrix/sellp.hpp>
 #include <ginkgo/core/matrix/sparsity_csr.hpp>
 
@@ -55,6 +57,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/components/prefix_sum_kernels.hpp"
 #include "core/matrix/csr_kernels.hpp"
 #include "core/test/utils.hpp"
+#include "core/test/utils/assertions.hpp"
 #include "core/test/utils/unsort_matrix.hpp"
 #include "core/utils/matrix_utils.hpp"
 #include "test/utils/executor.hpp"
@@ -68,6 +71,8 @@ class Csr : public CommonTestFixture {
     using Mtx = gko::matrix::Csr<value_type>;
     using ComplexVec = gko::matrix::Dense<std::complex<value_type>>;
     using ComplexMtx = gko::matrix::Csr<std::complex<value_type>>;
+    using Perm = gko::matrix::Permutation<index_type>;
+    using ScaledPerm = gko::matrix::ScaledPermutation<value_type, index_type>;
 
     Csr()
 #ifdef GINKGO_FAST_TESTS
@@ -162,8 +167,8 @@ class Csr : public CommonTestFixture {
         beta2 = gko::initialize<Vec2>({-1.0}, ref);
         dmtx = Mtx::create(exec, strategy);
         dmtx->copy_from(mtx);
-        square_dmtx = Mtx::create(exec, strategy);
-        square_dmtx->copy_from(square_mtx);
+        dsquare_mtx = Mtx::create(exec, strategy);
+        dsquare_mtx->copy_from(square_mtx);
         dresult = gko::clone(exec, expected);
         dresult2 = gko::clone(exec, expected2);
         dy = gko::clone(exec, y);
@@ -180,8 +185,22 @@ class Csr : public CommonTestFixture {
         std::vector<int> tmp2(mtx->get_size()[1], 0);
         std::iota(tmp2.begin(), tmp2.end(), 0);
         std::shuffle(tmp2.begin(), tmp2.end(), rng);
+        std::vector<value_type> scale(mtx->get_size()[0]);
+        std::vector<value_type> scale2(mtx->get_size()[1]);
+        std::uniform_real_distribution<value_type> dist(1, 2);
+        auto gen = [&] { return dist(rng); };
+        std::generate(scale.begin(), scale.end(), gen);
+        std::generate(scale2.begin(), scale2.end(), gen);
         rpermute_idxs = std::make_unique<Arr>(ref, tmp.begin(), tmp.end());
         cpermute_idxs = std::make_unique<Arr>(ref, tmp2.begin(), tmp2.end());
+        rpermutation = Perm::create(ref, tmp.size(), *rpermute_idxs);
+        cpermutation = Perm::create(ref, tmp2.size(), *cpermute_idxs);
+        srpermutation = ScaledPerm::create(
+            ref, gko::array<value_type>(ref, scale.begin(), scale.end()),
+            *rpermute_idxs);
+        scpermutation = ScaledPerm::create(
+            ref, gko::array<value_type>(ref, scale2.begin(), scale2.end()),
+            *cpermute_idxs);
     }
 
     template <typename StrategyType>
@@ -192,8 +211,8 @@ class Csr : public CommonTestFixture {
         complex_mtx = ComplexMtx::create(ref, strategy);
         complex_mtx->move_from(
             gen_mtx<ComplexVec>(mtx_size[0], mtx_size[1], 1));
-        complex_dmtx = ComplexMtx::create(exec, strategy);
-        complex_dmtx->copy_from(complex_mtx);
+        dcomplex_mtx = ComplexMtx::create(exec, strategy);
+        dcomplex_mtx->copy_from(complex_mtx);
     }
 
     void unsort_mtx()
@@ -220,8 +239,8 @@ class Csr : public CommonTestFixture {
 
     std::unique_ptr<Mtx> dmtx;
     std::unique_ptr<Mtx> dmtx2;
-    std::unique_ptr<ComplexMtx> complex_dmtx;
-    std::unique_ptr<Mtx> square_dmtx;
+    std::unique_ptr<ComplexMtx> dcomplex_mtx;
+    std::unique_ptr<Mtx> dsquare_mtx;
     std::unique_ptr<Vec> dresult;
     std::unique_ptr<Vec2> dresult2;
     std::unique_ptr<Vec> dy;
@@ -232,6 +251,10 @@ class Csr : public CommonTestFixture {
     std::unique_ptr<Vec2> dbeta2;
     std::unique_ptr<Arr> rpermute_idxs;
     std::unique_ptr<Arr> cpermute_idxs;
+    std::unique_ptr<Perm> rpermutation;
+    std::unique_ptr<Perm> cpermutation;
+    std::unique_ptr<ScaledPerm> srpermutation;
+    std::unique_ptr<ScaledPerm> scpermutation;
 };
 
 
@@ -510,11 +533,11 @@ TEST_F(Csr, AdvancedApplyToCsrMatrixIsEquivalentToRef)
     auto d_trans = dmtx->transpose();
 
     mtx->apply(alpha, trans, beta, square_mtx);
-    dmtx->apply(dalpha, d_trans, dbeta, square_dmtx);
+    dmtx->apply(dalpha, d_trans, dbeta, dsquare_mtx);
 
-    GKO_ASSERT_MTX_NEAR(square_dmtx, square_mtx, r<value_type>::value);
-    GKO_ASSERT_MTX_EQ_SPARSITY(square_dmtx, square_mtx);
-    ASSERT_TRUE(square_dmtx->is_sorted_by_column_index());
+    GKO_ASSERT_MTX_NEAR(dsquare_mtx, square_mtx, r<value_type>::value);
+    GKO_ASSERT_MTX_EQ_SPARSITY(dsquare_mtx, square_mtx);
+    ASSERT_TRUE(dsquare_mtx->is_sorted_by_column_index());
 }
 
 
@@ -525,11 +548,11 @@ TEST_F(Csr, SimpleApplyToCsrMatrixIsEquivalentToRef)
     auto d_trans = dmtx->transpose();
 
     mtx->apply(trans, square_mtx);
-    dmtx->apply(d_trans, square_dmtx);
+    dmtx->apply(d_trans, dsquare_mtx);
 
-    GKO_ASSERT_MTX_NEAR(square_dmtx, square_mtx, r<value_type>::value);
-    GKO_ASSERT_MTX_EQ_SPARSITY(square_dmtx, square_mtx);
-    ASSERT_TRUE(square_dmtx->is_sorted_by_column_index());
+    GKO_ASSERT_MTX_NEAR(dsquare_mtx, square_mtx, r<value_type>::value);
+    GKO_ASSERT_MTX_EQ_SPARSITY(dsquare_mtx, square_mtx);
+    ASSERT_TRUE(dsquare_mtx->is_sorted_by_column_index());
 }
 
 
@@ -542,11 +565,11 @@ TEST_F(Csr, SimpleApplyToSparseCsrMatrixIsEquivalentToRef)
     dmtx2->copy_from(mtx2);
 
     mtx->apply(mtx2, square_mtx);
-    dmtx->apply(dmtx2, square_dmtx);
+    dmtx->apply(dmtx2, dsquare_mtx);
 
-    GKO_ASSERT_MTX_EQ_SPARSITY(square_dmtx, square_mtx);
-    GKO_ASSERT_MTX_NEAR(square_dmtx, square_mtx, r<value_type>::value);
-    ASSERT_TRUE(square_dmtx->is_sorted_by_column_index());
+    GKO_ASSERT_MTX_EQ_SPARSITY(dsquare_mtx, square_mtx);
+    GKO_ASSERT_MTX_NEAR(dsquare_mtx, square_mtx, r<value_type>::value);
+    ASSERT_TRUE(dsquare_mtx->is_sorted_by_column_index());
 }
 
 
@@ -560,11 +583,11 @@ TEST_F(Csr, SimpleApplySparseToSparseCsrMatrixIsEquivalentToRef)
     auto dmtx2 = gko::clone(exec, mtx2);
 
     mtx1->apply(mtx2, square_mtx);
-    dmtx1->apply(dmtx2, square_dmtx);
+    dmtx1->apply(dmtx2, dsquare_mtx);
 
-    GKO_ASSERT_MTX_EQ_SPARSITY(square_dmtx, square_mtx);
-    GKO_ASSERT_MTX_NEAR(square_dmtx, square_mtx, r<value_type>::value);
-    ASSERT_TRUE(square_dmtx->is_sorted_by_column_index());
+    GKO_ASSERT_MTX_EQ_SPARSITY(dsquare_mtx, square_mtx);
+    GKO_ASSERT_MTX_NEAR(dsquare_mtx, square_mtx, r<value_type>::value);
+    ASSERT_TRUE(dsquare_mtx->is_sorted_by_column_index());
 }
 
 
@@ -581,11 +604,11 @@ TEST_F(Csr, SimpleApplyToEmptyCsrMatrixIsEquivalentToRef)
     dmtx2->copy_from(mtx2);
 
     mtx->apply(mtx2, square_mtx);
-    dmtx->apply(dmtx2, square_dmtx);
+    dmtx->apply(dmtx2, dsquare_mtx);
 
-    GKO_ASSERT_MTX_EQ_SPARSITY(square_dmtx, square_mtx);
-    GKO_ASSERT_MTX_NEAR(square_dmtx, square_mtx, r<value_type>::value);
-    ASSERT_TRUE(square_dmtx->is_sorted_by_column_index());
+    GKO_ASSERT_MTX_EQ_SPARSITY(dsquare_mtx, square_mtx);
+    GKO_ASSERT_MTX_NEAR(dsquare_mtx, square_mtx, r<value_type>::value);
+    ASSERT_TRUE(dsquare_mtx->is_sorted_by_column_index());
 }
 
 
@@ -673,7 +696,7 @@ TEST_F(Csr, ConjugateTransposeIsEquivalentToRef)
     set_up_apply_complex_data<ComplexMtx::classical>();
 
     auto trans = gko::as<ComplexMtx>(complex_mtx->conj_transpose());
-    auto d_trans = gko::as<ComplexMtx>(complex_dmtx->conj_transpose());
+    auto d_trans = gko::as<ComplexMtx>(dcomplex_mtx->conj_transpose());
 
     GKO_ASSERT_MTX_NEAR(d_trans, trans, 0.0);
     ASSERT_TRUE(d_trans->is_sorted_by_column_index());
@@ -868,12 +891,152 @@ TEST_F(Csr, MoveToHybridIsEquivalentToRef)
 }
 
 
+TEST_F(Csr, IsGenericPermutable)
+{
+    using gko::matrix::permute_mode;
+    set_up_apply_data<Mtx::classical>();
+
+    for (auto mode :
+         {permute_mode::none, permute_mode::rows, permute_mode::columns,
+          permute_mode::symmetric, permute_mode::inverse_rows,
+          permute_mode::inverse_columns, permute_mode::inverse_symmetric}) {
+        SCOPED_TRACE(mode);
+        auto permuted = square_mtx->permute(rpermutation, mode);
+        auto dpermuted = dsquare_mtx->permute(rpermutation, mode);
+
+        GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0);
+        GKO_ASSERT_MTX_EQ_SPARSITY(permuted, dpermuted);
+        ASSERT_TRUE(dpermuted->is_sorted_by_column_index());
+    }
+}
+
+
+TEST_F(Csr, IsGenericPermutableRectangular)
+{
+    using gko::matrix::permute_mode;
+    set_up_apply_data<Mtx::classical>();
+
+    auto rpermuted = mtx->permute(rpermutation, permute_mode::rows);
+    auto drpermuted = dmtx->permute(rpermutation, permute_mode::rows);
+    auto irpermuted =
+        mtx->permute(rpermutation, permute_mode::inverse_rows);
+    auto dirpermuted =
+        dmtx->permute(rpermutation, permute_mode::inverse_rows);
+    auto cpermuted = mtx->permute(cpermutation, permute_mode::columns);
+    auto dcpermuted = dmtx->permute(cpermutation, permute_mode::columns);
+    auto icpermuted =
+        mtx->permute(cpermutation, permute_mode::inverse_columns);
+    auto dicpermuted =
+        dmtx->permute(cpermutation, permute_mode::inverse_columns);
+
+    GKO_EXPECT_MTX_NEAR(rpermuted, drpermuted, r<value_type>::value);
+    GKO_EXPECT_MTX_NEAR(irpermuted, dirpermuted, r<value_type>::value);
+    GKO_EXPECT_MTX_NEAR(cpermuted, dcpermuted, r<value_type>::value);
+    GKO_EXPECT_MTX_NEAR(icpermuted, dicpermuted, r<value_type>::value);
+    GKO_EXPECT_MTX_EQ_SPARSITY(rpermuted, drpermuted);
+    GKO_EXPECT_MTX_EQ_SPARSITY(irpermuted, dirpermuted);
+    GKO_EXPECT_MTX_EQ_SPARSITY(cpermuted, dcpermuted);
+    GKO_EXPECT_MTX_EQ_SPARSITY(icpermuted, dicpermuted);
+    EXPECT_TRUE(rpermuted->is_sorted_by_column_index());
+    EXPECT_TRUE(irpermuted->is_sorted_by_column_index());
+    EXPECT_TRUE(cpermuted->is_sorted_by_column_index());
+    EXPECT_TRUE(icpermuted->is_sorted_by_column_index());
+}
+
+
+TEST_F(Csr, IsNonsymmPermutable)
+{
+    using gko::matrix::permute_mode;
+    set_up_apply_data<Mtx::classical>();
+
+    for (auto invert : {false, true}) {
+        SCOPED_TRACE(invert);
+        auto permuted = mtx->permute(rpermutation, cpermutation, invert);
+        auto dpermuted = dmtx->permute(rpermutation, cpermutation, invert);
+
+        GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0);
+        GKO_ASSERT_MTX_EQ_SPARSITY(permuted, dpermuted);
+        ASSERT_TRUE(dpermuted->is_sorted_by_column_index());
+    }
+}
+
+
+TEST_F(Csr, IsGenericScalePermutable)
+{
+    using gko::matrix::permute_mode;
+    set_up_apply_data<Mtx::classical>();
+
+    for (auto mode :
+         {permute_mode::none, permute_mode::rows, permute_mode::columns,
+          permute_mode::symmetric, permute_mode::inverse_rows,
+          permute_mode::inverse_columns, permute_mode::inverse_symmetric}) {
+        SCOPED_TRACE(mode);
+        auto permuted = square_mtx->scale_permute(srpermutation, mode);
+        auto dpermuted = dsquare_mtx->scale_permute(srpermutation, mode);
+
+        GKO_EXPECT_MTX_NEAR(permuted, dpermuted, r<value_type>::value);
+        GKO_EXPECT_MTX_EQ_SPARSITY(permuted, dpermuted);
+        EXPECT_TRUE(dpermuted->is_sorted_by_column_index());
+    }
+}
+
+
+TEST_F(Csr, IsGenericScalePermutableRectangular)
+{
+    using gko::matrix::permute_mode;
+    set_up_apply_data<Mtx::classical>();
+
+    auto rpermuted = mtx->scale_permute(srpermutation, permute_mode::rows);
+    auto drpermuted = dmtx->scale_permute(srpermutation, permute_mode::rows);
+    auto irpermuted =
+        mtx->scale_permute(srpermutation, permute_mode::inverse_rows);
+    auto dirpermuted =
+        dmtx->scale_permute(srpermutation, permute_mode::inverse_rows);
+    auto cpermuted = mtx->scale_permute(scpermutation, permute_mode::columns);
+    auto dcpermuted = dmtx->scale_permute(scpermutation, permute_mode::columns);
+    auto icpermuted =
+        mtx->scale_permute(scpermutation, permute_mode::inverse_columns);
+    auto dicpermuted =
+        dmtx->scale_permute(scpermutation, permute_mode::inverse_columns);
+
+    GKO_EXPECT_MTX_NEAR(rpermuted, drpermuted, r<value_type>::value);
+    GKO_EXPECT_MTX_NEAR(irpermuted, dirpermuted, r<value_type>::value);
+    GKO_EXPECT_MTX_NEAR(cpermuted, dcpermuted, r<value_type>::value);
+    GKO_EXPECT_MTX_NEAR(icpermuted, dicpermuted, r<value_type>::value);
+    GKO_EXPECT_MTX_EQ_SPARSITY(rpermuted, drpermuted);
+    GKO_EXPECT_MTX_EQ_SPARSITY(irpermuted, dirpermuted);
+    GKO_EXPECT_MTX_EQ_SPARSITY(cpermuted, dcpermuted);
+    GKO_EXPECT_MTX_EQ_SPARSITY(icpermuted, dicpermuted);
+    EXPECT_TRUE(rpermuted->is_sorted_by_column_index());
+    EXPECT_TRUE(irpermuted->is_sorted_by_column_index());
+    EXPECT_TRUE(cpermuted->is_sorted_by_column_index());
+    EXPECT_TRUE(icpermuted->is_sorted_by_column_index());
+}
+
+
+TEST_F(Csr, IsNonsymmScalePermutable)
+{
+    using gko::matrix::permute_mode;
+    set_up_apply_data<Mtx::classical>();
+
+    for (auto invert : {false, true}) {
+        SCOPED_TRACE(invert);
+        auto permuted = mtx->scale_permute(srpermutation, scpermutation, invert);
+        auto dpermuted = dmtx->scale_permute(srpermutation, scpermutation, invert);
+
+        GKO_EXPECT_MTX_NEAR(permuted, dpermuted, r<value_type>::value);
+        GKO_EXPECT_MTX_EQ_SPARSITY(permuted, dpermuted);
+        EXPECT_TRUE(dpermuted->is_sorted_by_column_index());
+    }
+}
+
+
 TEST_F(Csr, IsPermutable)
 {
     set_up_apply_data<Mtx::classical>();
 
     auto permuted = gko::as<Mtx>(square_mtx->permute(rpermute_idxs.get()));
-    auto dpermuted = gko::as<Mtx>(square_dmtx->permute(rpermute_idxs.get()));
+    auto dpermuted = gko::as<Mtx>(dsquare_mtx->permute(rpermute_idxs.get()));
 
     GKO_ASSERT_MTX_EQ_SPARSITY(permuted, dpermuted);
     GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0);
@@ -887,7 +1050,7 @@ TEST_F(Csr, IsInversePermutable)
     auto permuted =
         gko::as<Mtx>(square_mtx->inverse_permute(rpermute_idxs.get()));
     auto dpermuted =
-        gko::as<Mtx>(square_dmtx->inverse_permute(rpermute_idxs.get()));
+        gko::as<Mtx>(dsquare_mtx->inverse_permute(rpermute_idxs.get()));
 
     GKO_ASSERT_MTX_EQ_SPARSITY(permuted, dpermuted);
     GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0);
@@ -1141,9 +1304,9 @@ TEST_F(Csr, InplaceAbsoluteComplexMatrixIsEquivalentToRef)
     set_up_apply_complex_data<ComplexMtx::classical>();
 
     complex_mtx->compute_absolute_inplace();
-    complex_dmtx->compute_absolute_inplace();
+    dcomplex_mtx->compute_absolute_inplace();
 
-    GKO_ASSERT_MTX_NEAR(complex_mtx, complex_dmtx, r<value_type>::value);
+    GKO_ASSERT_MTX_NEAR(complex_mtx, dcomplex_mtx, r<value_type>::value);
 }
 
 
@@ -1152,7 +1315,7 @@ TEST_F(Csr, OutplaceAbsoluteComplexMatrixIsEquivalentToRef)
     set_up_apply_complex_data<ComplexMtx::classical>();
 
     auto abs_mtx = complex_mtx->compute_absolute();
-    auto dabs_mtx = complex_dmtx->compute_absolute();
+    auto dabs_mtx = dcomplex_mtx->compute_absolute();
 
     GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, r<value_type>::value);
 }
diff --git a/test/matrix/dense_kernels.cpp b/test/matrix/dense_kernels.cpp
index e9449ee9262..994283915c2 100644
--- a/test/matrix/dense_kernels.cpp
+++ b/test/matrix/dense_kernels.cpp
@@ -50,6 +50,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/matrix/diagonal.hpp>
 #include <ginkgo/core/matrix/ell.hpp>
 #include <ginkgo/core/matrix/hybrid.hpp>
+#include <ginkgo/core/matrix/permutation.hpp>
+#include <ginkgo/core/matrix/scaled_permutation.hpp>
 #include <ginkgo/core/matrix/sellp.hpp>
 #include <ginkgo/core/matrix/sparsity_csr.hpp>
 
@@ -70,6 +72,9 @@ class Dense : public CommonTestFixture {
     using ComplexMtx = gko::matrix::Dense<std::complex<value_type>>;
     using Diagonal = gko::matrix::Diagonal<value_type>;
     using MixedComplexMtx = gko::matrix::Dense<std::complex<mixed_type>>;
+    using Permutation = gko::matrix::Permutation<index_type>;
+    using ScaledPermutation =
+        gko::matrix::ScaledPermutation<value_type, index_type>;
 
     Dense() : rand_engine(15) {}
 
@@ -145,16 +150,37 @@ class Dense : public CommonTestFixture {
         std::iota(tmp2.begin(), tmp2.end(), 0);
         std::shuffle(tmp2.begin(), tmp2.end(), rng);
         std::vector<int> tmp3(x->get_size()[0] / 10);
+        std::vector<value_type> scale_factors(tmp.size());
+        std::vector<value_type> scale_factors2(tmp2.size());
         std::uniform_int_distribution<int> row_dist(0, x->get_size()[0] - 1);
+        std::uniform_real_distribution<value_type> scale_dist{1, 2};
         for (auto& i : tmp3) {
             i = row_dist(rng);
         }
+        for (auto& s : scale_factors) {
+            s = scale_dist(rng);
+        }
+        for (auto& s : scale_factors2) {
+            s = scale_dist(rng);
+        }
         rpermute_idxs =
             std::unique_ptr<Arr>(new Arr{ref, tmp.begin(), tmp.end()});
         cpermute_idxs =
             std::unique_ptr<Arr>(new Arr{ref, tmp2.begin(), tmp2.end()});
         rgather_idxs =
             std::unique_ptr<Arr>(new Arr{ref, tmp3.begin(), tmp3.end()});
+        rpermutation = Permutation::create(ref, tmp.size(), *rpermute_idxs);
+        cpermutation = Permutation::create(ref, tmp2.size(), *cpermute_idxs);
+        rspermutation = ScaledPermutation::create(
+            ref,
+            gko::array<value_type>{ref, scale_factors.begin(),
+                                   scale_factors.end()},
+            *rpermute_idxs);
+        cspermutation = ScaledPermutation::create(
+            ref,
+            gko::array<value_type>{ref, scale_factors2.begin(),
+                                   scale_factors2.end()},
+            *cpermute_idxs);
     }
 
     template <typename ConvertedType, typename InputType>
@@ -187,6 +213,10 @@ class Dense : public CommonTestFixture {
     std::unique_ptr<Mtx> dsquare;
     std::unique_ptr<Arr> rpermute_idxs;
     std::unique_ptr<Arr> cpermute_idxs;
+    std::unique_ptr<Permutation> rpermutation;
+    std::unique_ptr<Permutation> cpermutation;
+    std::unique_ptr<ScaledPermutation> rspermutation;
+    std::unique_ptr<ScaledPermutation> cspermutation;
     std::unique_ptr<Arr> rgather_idxs;
 };
 
@@ -1278,6 +1308,192 @@ TEST_F(Dense, CanAdvancedGatherRowsIntoMixedDenseCrossExecutor)
 }
 
 
+TEST_F(Dense, IsGenericPermutable)
+{
+    using gko::matrix::permute_mode;
+    set_up_apply_data();
+
+    for (auto mode :
+         {permute_mode::none, permute_mode::rows, permute_mode::columns,
+          permute_mode::symmetric, permute_mode::inverse_rows,
+          permute_mode::inverse_columns, permute_mode::inverse_symmetric}) {
+        SCOPED_TRACE(mode);
+        auto permuted = square->permute(rpermutation, mode);
+        auto dpermuted = dsquare->permute(rpermutation, mode);
+
+        GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0);
+    }
+}
+
+
+TEST_F(Dense, IsGenericPermutableRectangular)
+{
+    using gko::matrix::permute_mode;
+    set_up_apply_data();
+
+    auto rpermuted = x->permute(rpermutation, permute_mode::rows);
+    auto drpermuted = dx->permute(rpermutation, permute_mode::rows);
+    auto irpermuted = x->permute(rpermutation, permute_mode::inverse_rows);
+    auto dirpermuted = dx->permute(rpermutation, permute_mode::inverse_rows);
+    auto cpermuted = x->permute(cpermutation, permute_mode::columns);
+    auto dcpermuted = dx->permute(cpermutation, permute_mode::columns);
+    auto icpermuted = x->permute(cpermutation, permute_mode::inverse_columns);
+    auto dicpermuted = dx->permute(cpermutation, permute_mode::inverse_columns);
+
+    GKO_ASSERT_MTX_NEAR(rpermuted, drpermuted, 0);
+    GKO_ASSERT_MTX_NEAR(irpermuted, dirpermuted, 0);
+    GKO_ASSERT_MTX_NEAR(cpermuted, dcpermuted, 0);
+    GKO_ASSERT_MTX_NEAR(icpermuted, dicpermuted, 0);
+}
+
+
+TEST_F(Dense, IsGenericPermutableIntoDenseCrossExecutor)
+{
+    using gko::matrix::permute_mode;
+    set_up_apply_data();
+
+    for (auto mode :
+         {permute_mode::none, permute_mode::rows, permute_mode::columns,
+          permute_mode::symmetric, permute_mode::inverse_rows,
+          permute_mode::inverse_columns, permute_mode::inverse_symmetric}) {
+        SCOPED_TRACE(mode);
+        auto host_permuted = square->clone();
+
+        auto ref_permuted = square->permute(rpermutation, mode);
+        dsquare->permute(rpermutation, host_permuted, mode);
+
+        GKO_ASSERT_MTX_NEAR(ref_permuted, host_permuted, 0);
+    }
+}
+
+
+TEST_F(Dense, IsNonsymmPermutable)
+{
+    using gko::matrix::permute_mode;
+    set_up_apply_data();
+
+    for (auto invert : {false, true}) {
+        SCOPED_TRACE(invert);
+        auto permuted = x->permute(rpermutation, cpermutation, invert);
+        auto dpermuted = dx->permute(rpermutation, cpermutation, invert);
+
+        GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0);
+    }
+}
+
+
+TEST_F(Dense, IsNonsymmPermutableIntoDenseCrossExecutor)
+{
+    using gko::matrix::permute_mode;
+    set_up_apply_data();
+
+    for (auto invert : {false, true}) {
+        SCOPED_TRACE(invert);
+        auto host_permuted = dx->clone();
+
+        auto ref_permuted = x->permute(rpermutation, cpermutation, invert);
+        dx->permute(rpermutation, cpermutation, host_permuted, invert);
+
+        GKO_ASSERT_MTX_NEAR(ref_permuted, host_permuted, 0);
+    }
+}
+
+
+TEST_F(Dense, IsGenericScalePermutable)
+{
+    using gko::matrix::permute_mode;
+    set_up_apply_data();
+
+    for (auto mode :
+         {permute_mode::none, permute_mode::rows, permute_mode::columns,
+          permute_mode::symmetric, permute_mode::inverse_rows,
+          permute_mode::inverse_columns, permute_mode::inverse_symmetric}) {
+        SCOPED_TRACE(mode);
+        auto permuted = square->scale_permute(rspermutation, mode);
+        auto dpermuted = dsquare->scale_permute(rspermutation, mode);
+
+        GKO_ASSERT_MTX_NEAR(permuted, dpermuted, r<value_type>::value);
+    }
+}
+
+
+TEST_F(Dense, IsGenericScalePermutableRectangular)
+{
+    using gko::matrix::permute_mode;
+    set_up_apply_data();
+
+    auto rpermuted = x->scale_permute(rspermutation, permute_mode::rows);
+    auto drpermuted = dx->scale_permute(rspermutation, permute_mode::rows);
+    auto irpermuted =
+        x->scale_permute(rspermutation, permute_mode::inverse_rows);
+    auto dirpermuted =
+        dx->scale_permute(rspermutation, permute_mode::inverse_rows);
+    auto cpermuted = x->scale_permute(cspermutation, permute_mode::columns);
+    auto dcpermuted = dx->scale_permute(cspermutation, permute_mode::columns);
+    auto icpermuted =
+        x->scale_permute(cspermutation, permute_mode::inverse_columns);
+    auto dicpermuted =
+        dx->scale_permute(cspermutation, permute_mode::inverse_columns);
+
+    GKO_ASSERT_MTX_NEAR(rpermuted, drpermuted, r<value_type>::value);
+    GKO_ASSERT_MTX_NEAR(irpermuted, dirpermuted, r<value_type>::value);
+    GKO_ASSERT_MTX_NEAR(cpermuted, dcpermuted, r<value_type>::value);
+    GKO_ASSERT_MTX_NEAR(icpermuted, dicpermuted, r<value_type>::value);
+}
+
+
+TEST_F(Dense, IsGenericScalePermutableIntoDenseCrossExecutor)
+{
+    using gko::matrix::permute_mode;
+    set_up_apply_data();
+
+    for (auto mode :
+         {permute_mode::none, permute_mode::rows, permute_mode::columns,
+          permute_mode::symmetric, permute_mode::inverse_rows,
+          permute_mode::inverse_columns, permute_mode::inverse_symmetric}) {
+        SCOPED_TRACE(mode);
+        auto host_permuted = square->clone();
+
+        auto ref_permuted = square->permute(rpermutation, mode);
+        dsquare->permute(rpermutation, host_permuted, mode);
+
+        GKO_ASSERT_MTX_NEAR(ref_permuted, host_permuted, r<value_type>::value);
+    }
+}
+
+
+TEST_F(Dense, IsNonsymmScalePermutable)
+{
+    using gko::matrix::permute_mode;
+    set_up_apply_data();
+
+    for (auto invert : {false, true}) {
+        SCOPED_TRACE(invert);
+        auto permuted = x->permute(rpermutation, cpermutation, invert);
+        auto dpermuted = dx->permute(rpermutation, cpermutation, invert);
+
+        GKO_ASSERT_MTX_NEAR(permuted, dpermuted, r<value_type>::value);
+    }
+}
+
+
+TEST_F(Dense, IsNonsymmScalePermutableIntoDenseCrossExecutor)
+{
+    using gko::matrix::permute_mode;
+    set_up_apply_data();
+
+    for (auto invert : {false, true}) {
+        SCOPED_TRACE(invert);
+        auto host_permuted = dx->clone();
+
+        auto ref_permuted = x->permute(rpermutation, cpermutation, invert);
+        dx->permute(rpermutation, cpermutation, host_permuted, invert);
+
+        GKO_ASSERT_MTX_NEAR(ref_permuted, host_permuted, r<value_type>::value);
+    }
+}
+
+
 TEST_F(Dense, IsPermutable)
 {
     set_up_apply_data();
diff --git a/test/matrix/permutation_kernels.cpp b/test/matrix/permutation_kernels.cpp
new file mode 100644
index 00000000000..037040b8fd4
--- /dev/null
+++ b/test/matrix/permutation_kernels.cpp
@@ -0,0 +1,73 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/matrix/permutation.hpp>
+
+
+#include <algorithm>
+#include <numeric>
+
+
+#include <gtest/gtest.h>
+
+
+#include "core/test/utils.hpp"
+#include "test/utils/executor.hpp"
+
+
+class Permutation : public CommonTestFixture {
+protected:
+    using Perm = gko::matrix::Permutation<index_type>;
+
+    Permutation() : rand_engine(42)
+    {
+        std::vector<int> tmp(1000, 0);
+        std::iota(tmp.begin(), tmp.end(), 0);
+        std::shuffle(tmp.begin(), tmp.end(), rand_engine);
+        permutation = Perm::create(ref, tmp.size(), gko::array<index_type>(ref, tmp.begin(), tmp.end()));
+        dpermutation = permutation->clone(exec);
+    }
+
+    std::default_random_engine rand_engine;
+
+    std::unique_ptr<Perm> permutation;
+    std::unique_ptr<Perm> dpermutation;
+};
+
+
+TEST_F(Permutation, InvertIsEquivalentToRef)
+{
+    auto inv = permutation->invert();
+    auto dinv = dpermutation->invert();
+
+    GKO_ASSERT_MTX_EQ_SPARSITY(inv, dinv);
+}
diff --git a/test/matrix/scaled_permutation_kernels.cpp b/test/matrix/scaled_permutation_kernels.cpp
new file mode 100644
index 00000000000..d85b9735abc
--- /dev/null
+++ b/test/matrix/scaled_permutation_kernels.cpp
@@ -0,0 +1,77 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/matrix/scaled_permutation.hpp>
+
+
+#include <algorithm>
+#include <numeric>
+
+
+#include <gtest/gtest.h>
+
+
+#include "core/test/utils.hpp"
+#include "test/utils/executor.hpp"
+
+
+class ScaledPermutation : public CommonTestFixture {
+protected:
+    using ScaledPerm = gko::matrix::ScaledPermutation<value_type, index_type>;
+
+    ScaledPermutation() : rand_engine(42)
+    {
+        std::vector<int> tmp(1000, 0);
+        std::iota(tmp.begin(), tmp.end(), 0);
+        std::shuffle(tmp.begin(), tmp.end(), rand_engine);
+        std::vector<value_type> scale(tmp.size());
+        std::uniform_real_distribution<value_type> dist(1, 2);
+        auto gen = [&] { return dist(rand_engine); };
+        std::generate(scale.begin(), scale.end(), gen);
+        permutation = ScaledPerm::create(ref, gko::array<value_type>(ref, scale.begin(), scale.end()), gko::array<index_type>(ref, tmp.begin(), tmp.end()));
+        dpermutation = permutation->clone(exec);
+    }
+
+    std::default_random_engine rand_engine;
+
+    std::unique_ptr<ScaledPerm> permutation;
+    std::unique_ptr<ScaledPerm> dpermutation;
+};
+
+
+TEST_F(ScaledPermutation, InvertIsEquivalentToRef)
+{
+    auto inv = permutation->invert();
+    auto dinv = dpermutation->invert();
+
+    GKO_ASSERT_MTX_NEAR(inv, dinv, r<value_type>::value);
+}

From 17da78cb47c51b617252944a90177cbbe22401c7 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Mon, 9 Oct 2023 16:17:52 +0200
Subject: [PATCH 435/583] improve permutation interface consistency

- remove permute_mask
- deprecate permute_mask and dim<2> parameters
---
 core/matrix/csr.cpp                        |   1 -
 core/matrix/dense.cpp                      |   1 -
 core/matrix/permutation.cpp                | 157 ++++++++++++++++++++-
 core/matrix/scaled_permutation.cpp         |   5 +-
 core/reorder/amd.cpp                       |   3 +-
 core/test/matrix/permutation.cpp           | 130 +----------------
 include/ginkgo/core/matrix/permutation.hpp | 154 ++++++--------------
 include/ginkgo/core/reorder/rcm.hpp        |  16 +--
 reference/test/matrix/csr_kernels.cpp      |   4 +-
 reference/test/matrix/permutation.cpp      |   4 +-
 10 files changed, 221 insertions(+), 254 deletions(-)

diff --git a/core/matrix/csr.cpp b/core/matrix/csr.cpp
index e669f4d4718..b99becadccc 100644
--- a/core/matrix/csr.cpp
+++ b/core/matrix/csr.cpp
@@ -736,7 +736,6 @@ std::unique_ptr<const Permutation<IndexType>> create_permutation_view(
     const array<IndexType>& indices)
 {
     return Permutation<IndexType>::create_const(indices.get_executor(),
-                                                indices.get_num_elems(),
                                                 indices.as_const_view());
 }
 
diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp
index b263357dc9b..05b5672117b 100644
--- a/core/matrix/dense.cpp
+++ b/core/matrix/dense.cpp
@@ -1438,7 +1438,6 @@ std::unique_ptr<const Permutation<IndexType>> create_permutation_view(
     const array<IndexType>& indices)
 {
     return Permutation<IndexType>::create_const(indices.get_executor(),
-                                                indices.get_num_elems(),
                                                 indices.as_const_view());
 }
 
diff --git a/core/matrix/permutation.cpp b/core/matrix/permutation.cpp
index cc58ced53d2..779bdd964bb 100644
--- a/core/matrix/permutation.cpp
+++ b/core/matrix/permutation.cpp
@@ -31,8 +31,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
 #include <ginkgo/core/matrix/permutation.hpp>
+#include "core/base/dispatch_helper.hpp"
 #include "core/matrix/permutation_kernels.hpp"
+#include "ginkgo/core/base/exception_helpers.hpp"
 #include "ginkgo/core/base/executor.hpp"
+#include "ginkgo/core/base/precision_dispatch.hpp"
+#include "ginkgo/core/base/utils_helper.hpp"
 
 
 namespace gko {
@@ -46,6 +50,113 @@ GKO_REGISTER_OPERATION(invert, permutation::invert);
 }
 
 
+template <typename IndexType>
+std::unique_ptr<const Permutation<IndexType>>
+Permutation<IndexType>::create_const(
+    std::shared_ptr<const Executor> exec, size_type size,
+    gko::detail::const_array_view<IndexType>&& perm_idxs,
+    mask_type enabled_permute)
+{
+    GKO_ASSERT_EQ(enabled_permute, row_permute);
+    GKO_ASSERT_EQ(size, perm_idxs.get_num_elems());
+    return create_const(std::move(exec), std::move(perm_idxs));
+}
+
+
+template <typename IndexType>
+std::unique_ptr<const Permutation<IndexType>>
+Permutation<IndexType>::create_const(
+    std::shared_ptr<const Executor> exec,
+    gko::detail::const_array_view<IndexType>&& perm_idxs)
+{
+    // cast const-ness away, but return a const object afterwards,
+    // so we can ensure that no modifications take place.
+    return std::unique_ptr<const Permutation<IndexType>>(
+        new Permutation<IndexType>{
+            exec, gko::detail::array_const_cast(std::move(perm_idxs))});
+}
+
+
+template <typename IndexType>
+Permutation<IndexType>::Permutation(std::shared_ptr<const Executor> exec,
+                                    size_type size)
+    : EnableLinOp<Permutation>(exec, size), permutation_{exec, size}
+{}
+
+
+template <typename IndexType>
+Permutation<IndexType>::Permutation(std::shared_ptr<const Executor> exec,
+                                    array<index_type> permutation_indices)
+    : EnableLinOp<Permutation>(exec, permutation_indices.get_num_elems()),
+      permutation_{exec, std::move(permutation_indices)}
+{}
+
+
+template <typename IndexType>
+Permutation<IndexType>::Permutation(std::shared_ptr<const Executor> exec,
+                                    const dim<2>& size)
+    : Permutation{exec, size[0]}
+{
+    GKO_ASSERT_IS_SQUARE_MATRIX(size);
+}
+
+
+template <typename IndexType>
+Permutation<IndexType>::Permutation(std::shared_ptr<const Executor> exec,
+                                    const dim<2>& size,
+                                    const mask_type& enabled_permute)
+    : Permutation{exec, size[0]}
+{
+    GKO_ASSERT_EQ(enabled_permute, row_permute);
+    GKO_ASSERT_IS_SQUARE_MATRIX(size);
+}
+
+
+template <typename IndexType>
+Permutation<IndexType>::Permutation(std::shared_ptr<const Executor> exec,
+                                    const dim<2>& size,
+                                    array<index_type> permutation_indices)
+    : Permutation{std::move(exec), std::move(permutation_indices)}
+{
+    GKO_ASSERT_EQ(size[0], permutation_.get_num_elems());
+    GKO_ASSERT_IS_SQUARE_MATRIX(size);
+}
+
+
+template <typename IndexType>
+Permutation<IndexType>::Permutation(std::shared_ptr<const Executor> exec,
+                                    const dim<2>& size,
+                                    array<index_type> permutation_indices,
+                                    const mask_type& enabled_permute)
+    : Permutation{std::move(exec), std::move(permutation_indices)}
+{
+    GKO_ASSERT_EQ(enabled_permute, row_permute);
+    GKO_ASSERT_EQ(size[0], permutation_.get_num_elems());
+    GKO_ASSERT_IS_SQUARE_MATRIX(size);
+}
+
+
+template <typename IndexType>
+size_type Permutation<IndexType>::get_permutation_size() const noexcept
+{
+    return permutation_.get_num_elems();
+}
+
+
+template <typename IndexType>
+mask_type Permutation<IndexType>::get_permute_mask() const
+{
+    return row_permute;
+}
+
+
+template <typename IndexType>
+void Permutation<IndexType>::set_permute_mask(mask_type permute_mask)
+{
+    GKO_ASSERT_EQ(permute_mask, row_permute);
+}
+
+
 template <typename IndexType>
 std::unique_ptr<Permutation<IndexType>> Permutation<IndexType>::invert() const
 {
@@ -54,8 +165,7 @@ std::unique_ptr<Permutation<IndexType>> Permutation<IndexType>::invert() const
     array<index_type> inv_permutation{exec, size};
     exec->run(permutation::make_invert(this->get_const_permutation(), size,
                                        inv_permutation.get_data()));
-    return Permutation::create(exec, dim<2>{size, size},
-                               std::move(inv_permutation));
+    return Permutation::create(exec, std::move(inv_permutation));
 }
 
 
@@ -70,11 +180,52 @@ void Permutation<IndexType>::write(
     data.nonzeros.reserve(data.size[0]);
     for (IndexType row = 0; row < this->get_size()[0]; row++) {
         data.nonzeros.emplace_back(row, host_this->get_const_permutation()[row],
-                                   1.0);
+                                   one<value_type>());
+    }
+}
+
+
+template <typename Functor>
+void dispatch_dense(const LinOp* op, Functor fn)
+{
+    using matrix::Dense;
+    using std::complex;
+    if (dynamic_cast<const ConvertibleTo<Dense<double>>*>(op)) {
+        run<const Dense<double>*, const Dense<float>*>(op, fn);
+    } else if (dynamic_cast<const ConvertibleTo<Dense<complex<double>>>*>(op)) {
+        run<const Dense<complex<double>>*, const Dense<complex<float>>*>(op,
+                                                                         fn);
+    } else {
+        GKO_NOT_SUPPORTED(*op);
     }
 }
 
 
+template <typename IndexType>
+void Permutation<IndexType>::apply_impl(const LinOp* in, LinOp* out) const
+{
+    dispatch_dense(in, [&](auto dense_in) {
+        auto dense_out = make_temporary_conversion<
+            typename gko::detail::pointee<decltype(dense_in)>::value_type>(out);
+        dense_in->permute(this, dense_out.get(), permute_mode::rows);
+    });
+}
+
+
+template <typename IndexType>
+void Permutation<IndexType>::apply_impl(const LinOp* alpha, const LinOp* in,
+                                        const LinOp* beta, LinOp* out) const
+{
+    dispatch_dense(in, [&](auto dense_in) {
+        auto dense_out = make_temporary_conversion<
+            typename gko::detail::pointee<decltype(dense_in)>::value_type>(out);
+        auto tmp = dense_in->permute(this, permute_mode::rows);
+        dense_out->scale(beta);
+        dense_out->add_scaled(alpha, tmp);
+    });
+}
+
+
 #define GKO_DECLARE_PERMUTATION_MATRIX(_type) class Permutation<_type>
 GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PERMUTATION_MATRIX);
 
diff --git a/core/matrix/scaled_permutation.cpp b/core/matrix/scaled_permutation.cpp
index d1ce00b521a..435b928a6b2 100644
--- a/core/matrix/scaled_permutation.cpp
+++ b/core/matrix/scaled_permutation.cpp
@@ -107,10 +107,9 @@ void ScaledPermutation<ValueType, IndexType>::apply_impl(const LinOp* alpha,
 {
     precision_dispatch_real_complex<ValueType>(
         [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) {
-            auto x_clone = dense_x->clone();
-            dense_b->scale_permute(this, x_clone, permute_mode::rows);
+            auto tmp = dense_b->scale_permute(this, permute_mode::rows);
             dense_x->scale(dense_beta);
-            dense_x->add_scaled(dense_alpha, x_clone);
+            dense_x->add_scaled(dense_alpha, tmp);
         },
         alpha, b, beta, x);
 }
diff --git a/core/reorder/amd.cpp b/core/reorder/amd.cpp
index fa955801c2b..a305a95293d 100644
--- a/core/reorder/amd.cpp
+++ b/core/reorder/amd.cpp
@@ -212,8 +212,7 @@ std::unique_ptr<LinOp> Amd<IndexType>::generate_impl(
         head, elen, degree, w));
 
     // permutation gets copied to device via gko::array constructor
-    return permutation_type::create(exec, dim<2>{num_rows, num_rows},
-                                    std::move(permutation));
+    return permutation_type::create(exec, std::move(permutation));
 }
 
 
diff --git a/core/test/matrix/permutation.cpp b/core/test/matrix/permutation.cpp
index 166ff0cbcdb..4879d1a8402 100644
--- a/core/test/matrix/permutation.cpp
+++ b/core/test/matrix/permutation.cpp
@@ -60,8 +60,7 @@ class Permutation : public ::testing::Test {
     Permutation()
         : exec(gko::ReferenceExecutor::create()),
           mtx(gko::matrix::Permutation<index_type>::create(
-              exec, gko::dim<2>{4, 3},
-              gko::array<index_type>{exec, {1, 0, 2, 3}}))
+              exec, gko::array<index_type>{exec, {1, 0, 2, 3}}))
     {}
 
 
@@ -69,8 +68,7 @@ class Permutation : public ::testing::Test {
         gko::ptr_param<gko::matrix::Permutation<index_type>> m)
     {
         auto perm = m->get_permutation();
-        ASSERT_EQ(m->get_size(), gko::dim<2>(4, 3));
-        ASSERT_EQ(m->get_size()[0], 4);
+        ASSERT_EQ(m->get_size(), gko::dim<2>(4, 4));
         ASSERT_EQ(perm[0], 1);
         ASSERT_EQ(perm[1], 0);
         ASSERT_EQ(perm[2], 2);
@@ -80,7 +78,6 @@ class Permutation : public ::testing::Test {
     static void assert_empty(gko::matrix::Permutation<index_type>* m)
     {
         ASSERT_EQ(m->get_size(), gko::dim<2>(0, 0));
-        ASSERT_EQ(m->get_size()[0], 0);
     }
 
     std::shared_ptr<const gko::Executor> exec;
@@ -112,21 +109,9 @@ TYPED_TEST(Permutation, ReturnsNullValuesArrayWhenEmpty)
 TYPED_TEST(Permutation, CanBeConstructedWithSize)
 {
     using index_type = typename TestFixture::index_type;
-    auto m = gko::matrix::Permutation<index_type>::create(this->exec,
-                                                          gko::dim<2>{2, 3});
+    auto m = gko::matrix::Permutation<index_type>::create(this->exec, 2);
 
-    ASSERT_EQ(m->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(m->get_size()[0], 2);
-}
-
-
-TYPED_TEST(Permutation, FactorySetsCorrectPermuteMask)
-{
-    using index_type = typename TestFixture::index_type;
-    auto m = gko::matrix::Permutation<index_type>::create(this->exec);
-    auto mask = m->get_permute_mask();
-
-    ASSERT_EQ(mask, gko::matrix::row_permute);
+    ASSERT_EQ(m->get_size(), gko::dim<2>(2, 2));
 }
 
 
@@ -136,8 +121,7 @@ TYPED_TEST(Permutation, PermutationCanBeConstructedFromExistingData)
     index_type data[] = {1, 0, 2};
 
     auto m = gko::matrix::Permutation<index_type>::create(
-        this->exec, gko::dim<2>{3, 5},
-        gko::make_array_view(this->exec, 3, data));
+        this->exec, gko::make_array_view(this->exec, 3, data));
 
     ASSERT_EQ(m->get_const_permutation(), data);
 }
@@ -150,88 +134,12 @@ TYPED_TEST(Permutation, PermutationCanBeConstructedFromExistingConstData)
     const index_type data[] = {1, 0, 2};
 
     auto m = gko::matrix::Permutation<index_type>::create_const(
-        this->exec, 3, gko::array<index_type>::const_view(this->exec, 3, data));
+        this->exec, gko::array<index_type>::const_view(this->exec, 3, data));
 
     ASSERT_EQ(m->get_const_permutation(), data);
 }
 
 
-TYPED_TEST(Permutation, CanBeConstructedWithSizeAndMask)
-{
-    using index_type = typename TestFixture::index_type;
-    auto m = gko::matrix::Permutation<index_type>::create(
-        this->exec, gko::dim<2>{2, 3}, gko::matrix::column_permute);
-
-    ASSERT_EQ(m->get_size(), gko::dim<2>(2, 3));
-    ASSERT_EQ(m->get_size()[0], 2);
-    ASSERT_EQ(m->get_permute_mask(), gko::matrix::column_permute);
-}
-
-
-TYPED_TEST(Permutation, CanExplicitlyOverrideSetPermuteMask)
-{
-    using index_type = typename TestFixture::index_type;
-    auto m = gko::matrix::Permutation<index_type>::create(
-        this->exec, gko::dim<2>{2, 3}, gko::matrix::column_permute);
-
-    auto mask = m->get_permute_mask();
-    ASSERT_EQ(mask, gko::matrix::column_permute);
-
-    m->set_permute_mask(gko::matrix::row_permute |
-                        gko::matrix::inverse_permute);
-
-    auto s_mask = m->get_permute_mask();
-    ASSERT_EQ(s_mask, gko::matrix::row_permute | gko::matrix::inverse_permute);
-}
-
-
-TYPED_TEST(Permutation, PermutationThrowsforWrongRowPermDimensions)
-{
-    using index_type = typename TestFixture::index_type;
-    index_type data[] = {0, 2, 1};
-
-    ASSERT_THROW(gko::matrix::Permutation<index_type>::create(
-                     this->exec, gko::dim<2>{4, 2},
-                     gko::make_array_view(this->exec, 3, data)),
-                 gko::ValueMismatch);
-}
-
-
-TYPED_TEST(Permutation, SettingMaskDoesNotModifyData)
-{
-    using index_type = typename TestFixture::index_type;
-    index_type data[] = {1, 0, 2};
-
-    auto m = gko::matrix::Permutation<index_type>::create(
-        this->exec, gko::dim<2>{3, 5},
-        gko::make_array_view(this->exec, 3, data));
-
-    auto mask = m->get_permute_mask();
-    ASSERT_EQ(m->get_const_permutation(), data);
-    ASSERT_EQ(mask, gko::matrix::row_permute);
-
-    m->set_permute_mask(gko::matrix::row_permute |
-                        gko::matrix::inverse_permute);
-
-    auto s_mask = m->get_permute_mask();
-    ASSERT_EQ(s_mask, gko::matrix::row_permute | gko::matrix::inverse_permute);
-    ASSERT_EQ(m->get_const_permutation(), data);
-}
-
-
-TYPED_TEST(Permutation, PermutationThrowsforWrongColPermDimensions)
-{
-    using index_type = typename TestFixture::index_type;
-    index_type data[] = {0, 2, 1};
-
-    ASSERT_THROW(gko::matrix::Permutation<index_type>::create(
-                     this->exec, gko::dim<2>{3, 4},
-                     gko::make_array_view(this->exec, 3, data),
-                     gko::matrix::column_permute),
-                 gko::ValueMismatch);
-}
-
-
 TYPED_TEST(Permutation, KnowsItsSizeAndValues)
 {
     this->assert_equal_to_original_mtx(this->mtx);
@@ -262,32 +170,6 @@ TYPED_TEST(Permutation, CanBeMoved)
 }
 
 
-TYPED_TEST(Permutation, CopyingPreservesMask)
-{
-    using index_type = typename TestFixture::index_type;
-    auto mtx_copy = gko::matrix::Permutation<index_type>::create(this->exec);
-
-    mtx_copy->copy_from(this->mtx);
-
-    auto o_mask = this->mtx->get_permute_mask();
-    auto n_mask = mtx_copy->get_permute_mask();
-    ASSERT_EQ(o_mask, gko::matrix::row_permute);
-    ASSERT_EQ(o_mask, n_mask);
-
-    this->mtx->set_permute_mask(gko::matrix::column_permute);
-
-    o_mask = this->mtx->get_permute_mask();
-    n_mask = mtx_copy->get_permute_mask();
-    ASSERT_EQ(o_mask, gko::matrix::column_permute);
-    ASSERT_NE(o_mask, n_mask);
-
-    mtx_copy->copy_from(this->mtx);
-
-    n_mask = mtx_copy->get_permute_mask();
-    ASSERT_EQ(o_mask, n_mask);
-}
-
-
 TYPED_TEST(Permutation, CanBeCloned)
 {
     auto mtx_clone = this->mtx->clone();
diff --git a/include/ginkgo/core/matrix/permutation.hpp b/include/ginkgo/core/matrix/permutation.hpp
index b577481345b..abfffb11248 100644
--- a/include/ginkgo/core/matrix/permutation.hpp
+++ b/include/ginkgo/core/matrix/permutation.hpp
@@ -186,27 +186,13 @@ class Permutation : public EnableLinOp<Permutation<IndexType>>,
      * array.
      */
     [[deprecated("use get_size()[0] instead")]] size_type get_permutation_size()
-        const noexcept
-    {
-        return permutation_.get_num_elems();
-    }
+        const noexcept;
 
-    /**
-     * Get the permute masks
-     *
-     * @return  permute_mask the permute masks
-     */
-    mask_type get_permute_mask() const { return enabled_permute_; }
+    [[deprecated("permute mask is no longer supported")]] mask_type
+    get_permute_mask() const;
 
-    /**
-     * Set the permute masks
-     *
-     * @param permute_mask the permute masks
-     */
-    void set_permute_mask(mask_type permute_mask)
-    {
-        enabled_permute_ = permute_mask;
-    }
+    [[deprecated("permute mask is no longer supported")]] void set_permute_mask(
+        mask_type permute_mask);
 
     /**
      * Returns the inverse permutation.
@@ -218,6 +204,23 @@ class Permutation : public EnableLinOp<Permutation<IndexType>>,
 
     void write(gko::matrix_data<value_type, index_type>& data) const override;
 
+    /**
+     * Creates a constant (immutable) Permutation matrix from a constant array.
+     *
+     * @param exec  the executor to create the matrix on
+     * @param size  the size of the square matrix
+     * @param perm_idxs  the permutation index array of the matrix
+     * @param enabled_permute  the mask describing the type of permutation
+     * @returns A smart pointer to the constant matrix wrapping the input array
+     *          (if it resides on the same executor as the matrix) or a copy of
+     *          the array on the correct executor.
+     */
+    [[deprecated(
+        "use create_const without size and permute mask")]] static std::
+        unique_ptr<const Permutation>
+        create_const(std::shared_ptr<const Executor> exec, size_type size,
+                     gko::detail::const_array_view<IndexType>&& perm_idxs,
+                     mask_type enabled_permute = row_permute);
     /**
      * Creates a constant (immutable) Permutation matrix from a constant array.
      *
@@ -230,16 +233,8 @@ class Permutation : public EnableLinOp<Permutation<IndexType>>,
      *          the array on the correct executor.
      */
     static std::unique_ptr<const Permutation> create_const(
-        std::shared_ptr<const Executor> exec, size_type size,
-        gko::detail::const_array_view<IndexType>&& perm_idxs,
-        mask_type enabled_permute = row_permute)
-    {
-        // cast const-ness away, but return a const object afterwards,
-        // so we can ensure that no modifications take place.
-        return std::unique_ptr<const Permutation>(new Permutation{
-            exec, size, gko::detail::array_const_cast(std::move(perm_idxs)),
-            enabled_permute});
-    }
+        std::shared_ptr<const Executor> exec,
+        gko::detail::const_array_view<IndexType>&& perm_idxs);
 
 protected:
     /**
@@ -247,32 +242,12 @@ class Permutation : public EnableLinOp<Permutation<IndexType>>,
      *
      * @param exec  Executor associated to the LinOp
      */
-    Permutation(std::shared_ptr<const Executor> exec)
-        : Permutation(std::move(exec), dim<2>{})
-    {}
-
-    /**
-     * Creates uninitialized Permutation arrays of the specified size.
-     *
-     * @param exec  Executor associated to the matrix
-     * @param size  size of the permutable matrix
-     * @param enabled_permute  mask for the type of permutation to apply.
-     */
-    Permutation(std::shared_ptr<const Executor> exec, const dim<2>& size,
-                const mask_type& enabled_permute = row_permute)
-        : EnableLinOp<Permutation>(exec, size),
-          permutation_(exec, size[0]),
-          row_size_(size[0]),
-          col_size_(size[1]),
-          enabled_permute_(enabled_permute)
-    {}
+    Permutation(std::shared_ptr<const Executor> exec, size_type = 0);
 
     /**
      * Creates a Permutation matrix from an already allocated (and initialized)
      * row and column permutation arrays.
      *
-     * @tparam IndicesArray  type of array of indices
-     *
      * @param exec  Executor associated to the matrix
      * @param size  size of the permutation array.
      * @param permutation_indices array of permutation array
@@ -282,71 +257,34 @@ class Permutation : public EnableLinOp<Permutation<IndexType>>,
      * IndexType, or is on the wrong executor, an internal copy will be created,
      * and the original array data will not be used in the matrix.
      */
-    template <typename IndicesArray>
-    Permutation(std::shared_ptr<const Executor> exec, const dim<2>& size,
-                IndicesArray&& permutation_indices,
-                const mask_type& enabled_permute = row_permute)
-        : EnableLinOp<Permutation>(exec, size),
-          permutation_{exec, std::forward<IndicesArray>(permutation_indices)},
-          row_size_(size[0]),
-          col_size_(size[1]),
-          enabled_permute_(enabled_permute)
-    {
-        if (enabled_permute_ & row_permute) {
-            GKO_ASSERT_EQ(size[0], permutation_.get_num_elems());
-        }
-        if (enabled_permute_ & column_permute) {
-            GKO_ASSERT_EQ(size[1], permutation_.get_num_elems());
-        }
-    }
+    Permutation(std::shared_ptr<const Executor> exec,
+                array<index_type> permutation_indices);
 
-    void apply_impl(const LinOp* in, LinOp* out) const override
-    {
-        auto perm = as<Permutable<index_type>>(in);
-        std::unique_ptr<gko::LinOp> tmp{};
-        if (enabled_permute_ & inverse_permute) {
-            if (enabled_permute_ & row_permute) {
-                tmp = perm->inverse_row_permute(&permutation_);
-            }
-            if (enabled_permute_ & column_permute) {
-                if (enabled_permute_ & row_permute) {
-                    tmp = as<Permutable<index_type>>(tmp.get())
-                              ->inverse_column_permute(&permutation_);
-                } else {
-                    tmp = perm->inverse_column_permute(&permutation_);
-                }
-            }
-        } else {
-            if (enabled_permute_ & row_permute) {
-                tmp = perm->row_permute(&permutation_);
-            }
-            if (enabled_permute_ & column_permute) {
-                if (enabled_permute_ & row_permute) {
-                    tmp = as<Permutable<index_type>>(tmp.get())->column_permute(
-                        &permutation_);
-                } else {
-                    tmp = perm->column_permute(&permutation_);
-                }
-            }
-        }
-        out->move_from(tmp);
-    }
+    [[deprecated(
+        "dim<2> is no longer supported as a dimension parameter, use size_type "
+        "instead")]] Permutation(std::shared_ptr<const Executor> exec,
+                                 const dim<2>& size);
 
+    [[deprecated("permute mask is no longer supported")]] Permutation(
+        std::shared_ptr<const Executor> exec, const dim<2>& size,
+        const mask_type& enabled_permute);
 
-    void apply_impl(const LinOp*, const LinOp* in, const LinOp*,
-                    LinOp* out) const override
-    {
-        // Ignores alpha and beta and just performs a normal permutation as an
-        // advanced apply does not really make sense here.
-        this->apply_impl(in, out);
-    }
+    [[deprecated("use the overload without dimensions")]] Permutation(
+        std::shared_ptr<const Executor> exec, const dim<2>& size,
+        array<index_type> permutation_indices);
+
+    [[deprecated("permute mask is no longer supported")]] Permutation(
+        std::shared_ptr<const Executor> exec, const dim<2>& size,
+        array<index_type> permutation_indices,
+        const mask_type& enabled_permute);
 
+    void apply_impl(const LinOp* in, LinOp* out) const override;
+
+    void apply_impl(const LinOp*, const LinOp* in, const LinOp*,
+                    LinOp* out) const override;
 
 private:
     array<index_type> permutation_;
-    size_type row_size_;
-    size_type col_size_;
-    mask_type enabled_permute_;
 };
 
 
diff --git a/include/ginkgo/core/reorder/rcm.hpp b/include/ginkgo/core/reorder/rcm.hpp
index 72ba6827f2b..ab0807194c5 100644
--- a/include/ginkgo/core/reorder/rcm.hpp
+++ b/include/ginkgo/core/reorder/rcm.hpp
@@ -177,6 +177,7 @@ class Rcm : public EnablePolymorphicObject<Rcm<ValueType, IndexType>,
 
         // The adjacency matrix has to be square.
         GKO_ASSERT_IS_SQUARE_MATRIX(args.system_matrix);
+        const auto num_rows = args.system_matrix->get_size()[0];
         // This is needed because it does not make sense to call the copy and
         // convert if the existing matrix is empty.
         if (args.system_matrix->get_size()) {
@@ -187,13 +188,12 @@ class Rcm : public EnablePolymorphicObject<Rcm<ValueType, IndexType>,
             adjacency_matrix = tmp->to_adjacency_matrix();
         }
 
-        auto const dim = adjacency_matrix->get_size();
-        permutation_ = PermutationMatrix::create(cpu_exec, dim);
+        permutation_ = PermutationMatrix::create(cpu_exec, num_rows);
 
         // To make it explicit.
         inv_permutation_ = nullptr;
         if (parameters_.construct_inverse_permutation) {
-            inv_permutation_ = PermutationMatrix::create(cpu_exec, dim);
+            inv_permutation_ = PermutationMatrix::create(cpu_exec, num_rows);
         }
 
         this->generate(cpu_exec, std::move(adjacency_matrix));
@@ -201,19 +201,19 @@ class Rcm : public EnablePolymorphicObject<Rcm<ValueType, IndexType>,
         // Copy back results to gpu if necessary.
         if (is_gpu_executor) {
             const auto gpu_exec = this->get_executor();
-            auto gpu_perm = share(PermutationMatrix::create(gpu_exec, dim));
+            auto gpu_perm =
+                share(PermutationMatrix::create(gpu_exec, num_rows));
             gpu_perm->copy_from(permutation_);
             permutation_ = gpu_perm;
             if (inv_permutation_) {
                 auto gpu_inv_perm =
-                    share(PermutationMatrix::create(gpu_exec, dim));
+                    share(PermutationMatrix::create(gpu_exec, num_rows));
                 gpu_inv_perm->copy_from(inv_permutation_);
                 inv_permutation_ = gpu_inv_perm;
             }
         }
-        auto permutation_array =
-            make_array_view(this->get_executor(), permutation_->get_size()[0],
-                            permutation_->get_permutation());
+        auto permutation_array = make_array_view(
+            this->get_executor(), num_rows, permutation_->get_permutation());
         this->set_permutation_array(permutation_array);
     }
 
diff --git a/reference/test/matrix/csr_kernels.cpp b/reference/test/matrix/csr_kernels.cpp
index f388922f05d..d7b43ce9495 100644
--- a/reference/test/matrix/csr_kernels.cpp
+++ b/reference/test/matrix/csr_kernels.cpp
@@ -93,9 +93,9 @@ class Csr : public ::testing::Test {
           mtx3_unsorted(
               Mtx::create(exec, gko::dim<2>(3, 3), 7,
                           std::make_shared<typename Mtx::classical>())),
-          perm3(Perm::create(exec, 3, gko::array<index_type>{exec, {1, 2, 0}})),
+          perm3(Perm::create(exec, gko::array<index_type>{exec, {1, 2, 0}})),
           perm3_rev(perm3->invert()),
-          perm2(Perm::create(exec, 2, gko::array<index_type>{exec, {1, 0}})),
+          perm2(Perm::create(exec, gko::array<index_type>{exec, {1, 0}})),
           perm0(Perm::create(exec)),
           scale_perm3(ScaledPerm::create(
               exec, gko::array<value_type>{this->exec, {2.0, 3.0, 5.0}},
diff --git a/reference/test/matrix/permutation.cpp b/reference/test/matrix/permutation.cpp
index 65e092dfcd5..1301276a424 100644
--- a/reference/test/matrix/permutation.cpp
+++ b/reference/test/matrix/permutation.cpp
@@ -68,7 +68,7 @@ TYPED_TEST(Permutation, Invert)
 {
     using index_type = typename TestFixture::index_type;
     auto perm = gko::matrix::Permutation<index_type>::create(
-        this->exec, 3, gko::array<index_type>{this->exec, {1, 2, 0}});
+        this->exec, gko::array<index_type>{this->exec, {1, 2, 0}});
 
     auto inv = perm->invert();
 
@@ -104,7 +104,7 @@ TYPED_TEST(Permutation, AppliesRowPermutationToDense)
     index_type rdata[] = {1, 0};
 
     auto perm = gko::matrix::Permutation<index_type>::create(
-        this->exec, gko::dim<2>{2}, gko::make_array_view(this->exec, 2, rdata));
+        this->exec, gko::make_array_view(this->exec, 2, rdata));
 
     perm->apply(x, y);
     // clang-format off

From c70dbf723a971c4f73d32c44924206739c648e4d Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Mon, 9 Oct 2023 17:26:49 +0200
Subject: [PATCH 436/583] swap order of scaling and permutation

---
 common/cuda_hip/matrix/csr_kernels.hpp.inc    | 16 ++--
 common/unified/matrix/csr_kernels.cpp         |  6 +-
 .../unified/matrix/dense_kernels.template.cpp | 30 +++++---
 .../matrix/scaled_permutation_kernels.cpp     |  6 +-
 core/matrix/scaled_permutation.cpp            |  4 +-
 .../ginkgo/core/matrix/scaled_permutation.hpp |  2 +-
 omp/matrix/csr_kernels.cpp                    | 10 +--
 reference/matrix/csr_kernels.cpp              | 16 ++--
 reference/matrix/dense_kernels.cpp            | 75 +++++++++++--------
 .../matrix/scaled_permutation_kernels.cpp     |  5 +-
 reference/test/matrix/scaled_permutation.cpp  | 10 +--
 11 files changed, 101 insertions(+), 79 deletions(-)

diff --git a/common/cuda_hip/matrix/csr_kernels.hpp.inc b/common/cuda_hip/matrix/csr_kernels.hpp.inc
index 3a762ad5ad1..757cd13e8d6 100644
--- a/common/cuda_hip/matrix/csr_kernels.hpp.inc
+++ b/common/cuda_hip/matrix/csr_kernels.hpp.inc
@@ -813,7 +813,7 @@ __global__ __launch_bounds__(default_block_size) void row_scale_permute(
     auto out_begin = out_row_ptrs[out_row];
     for (IndexType i = lane; i < in_size; i += subwarp_size) {
         out_cols[out_begin + i] = in_cols[in_begin + i];
-        out_vals[out_begin + i] = in_vals[in_begin + i] * scale[out_row];
+        out_vals[out_begin + i] = in_vals[in_begin + i] * scale[in_row];
     }
 }
 
@@ -840,7 +840,7 @@ __global__ __launch_bounds__(default_block_size) void inv_row_scale_permute(
     auto out_begin = out_row_ptrs[out_row];
     for (IndexType i = lane; i < in_size; i += subwarp_size) {
         out_cols[out_begin + i] = in_cols[in_begin + i];
-        out_vals[out_begin + i] = in_vals[in_begin + i] / scale[in_row];
+        out_vals[out_begin + i] = in_vals[in_begin + i] / scale[out_row];
     }
 }
 
@@ -866,10 +866,10 @@ __global__ __launch_bounds__(default_block_size) void inv_symm_scale_permute(
     auto in_size = in_row_ptrs[in_row + 1] - in_begin;
     auto out_begin = out_row_ptrs[out_row];
     for (IndexType i = lane; i < in_size; i += subwarp_size) {
-        const auto in_col = in_cols[in_begin + i];
-        out_cols[out_begin + i] = permutation[in_col];
+        const auto out_col = permutation[in_cols[in_begin + i]];
+        out_cols[out_begin + i] = out_col;
         out_vals[out_begin + i] =
-            in_vals[in_begin + i] / (scale[in_row] * scale[in_col]);
+            in_vals[in_begin + i] / (scale[out_row] * scale[out_col]);
     }
 }
 
@@ -897,10 +897,10 @@ __global__ __launch_bounds__(default_block_size) void inv_nonsymm_scale_permute(
     auto in_size = in_row_ptrs[in_row + 1] - in_begin;
     auto out_begin = out_row_ptrs[out_row];
     for (IndexType i = lane; i < in_size; i += subwarp_size) {
-        const auto in_col = in_cols[in_begin + i];
-        out_cols[out_begin + i] = col_permutation[in_col];
+        const auto out_col = col_permutation[in_cols[in_begin + i]];
+        out_cols[out_begin + i] = out_col;
         out_vals[out_begin + i] =
-            in_vals[in_begin + i] / (row_scale[in_row] * col_scale[in_col]);
+            in_vals[in_begin + i] / (row_scale[out_row] * col_scale[out_col]);
     }
 }
 
diff --git a/common/unified/matrix/csr_kernels.cpp b/common/unified/matrix/csr_kernels.cpp
index 4746f88ddfe..10c8d8cd08e 100644
--- a/common/unified/matrix/csr_kernels.cpp
+++ b/common/unified/matrix/csr_kernels.cpp
@@ -103,9 +103,9 @@ void inv_col_scale_permute(std::shared_ptr<const DefaultExecutor> exec,
                       auto in_vals, auto out_row_ptrs, auto out_col_idxs,
                       auto out_vals) {
             if (tid < num_nonzeros) {
-                const auto in_col = in_col_idxs[tid];
-                out_col_idxs[tid] = permutation[in_col];
-                out_vals[tid] = in_vals[tid] / scale[in_col];
+                const auto out_col = permutation[in_col_idxs[tid]];
+                out_col_idxs[tid] = out_col;
+                out_vals[tid] = in_vals[tid] / scale[out_col];
             }
             if (tid <= num_rows) {
                 out_row_ptrs[tid] = in_row_ptrs[tid];
diff --git a/common/unified/matrix/dense_kernels.template.cpp b/common/unified/matrix/dense_kernels.template.cpp
index f3723ae8aad..c04f9c14d4c 100644
--- a/common/unified/matrix/dense_kernels.template.cpp
+++ b/common/unified/matrix/dense_kernels.template.cpp
@@ -539,7 +539,9 @@ void symm_scale_permute(std::shared_ptr<const DefaultExecutor> exec,
         exec,
         [] GKO_KERNEL(auto i, auto j, auto scale, auto perm, auto orig,
                       auto permuted) {
-            permuted(i, j) = scale[i] * scale[j] * orig(perm[i], perm[j]);
+            const auto row = perm[i];
+            const auto col = perm[j];
+            permuted(i, j) = scale[row] * scale[col] * orig(row, col);
         },
         orig->get_size(), scale, perm, orig, permuted);
 }
@@ -555,7 +557,9 @@ void inv_symm_scale_permute(std::shared_ptr<const DefaultExecutor> exec,
         exec,
         [] GKO_KERNEL(auto i, auto j, auto scale, auto perm, auto orig,
                       auto permuted) {
-            permuted(perm[i], perm[j]) = orig(i, j) / (scale[i] * scale[j]);
+            const auto row = perm[i];
+            const auto col = perm[j];
+            permuted(row, col) = orig(i, j) / (scale[row] * scale[col]);
         },
         orig->get_size(), scale, perm, orig, permuted);
 }
@@ -574,8 +578,9 @@ void nonsymm_scale_permute(std::shared_ptr<const DefaultExecutor> exec,
         exec,
         [] GKO_KERNEL(auto i, auto j, auto row_scale, auto row_perm,
                       auto col_scale, auto col_perm, auto orig, auto permuted) {
-            permuted(i, j) =
-                row_scale[i] * col_scale[j] * orig(row_perm[i], col_perm[j]);
+            const auto row = row_perm[i];
+            const auto col = col_perm[j];
+            permuted(i, j) = row_scale[row] * col_scale[col] * orig(row, col);
         },
         orig->get_size(), row_scale, row_perm, col_scale, col_perm, orig,
         permuted);
@@ -595,8 +600,9 @@ void inv_nonsymm_scale_permute(std::shared_ptr<const DefaultExecutor> exec,
         exec,
         [] GKO_KERNEL(auto i, auto j, auto row_scale, auto row_perm,
                       auto col_scale, auto col_perm, auto orig, auto permuted) {
-            permuted(row_perm[i], row_perm[j]) =
-                orig(i, j) / (row_scale[i] * col_scale[j]);
+            const auto row = row_perm[i];
+            const auto col = col_perm[j];
+            permuted(row, col) = orig(i, j) / (row_scale[i] * col_scale[j]);
         },
         orig->get_size(), row_scale, row_perm, col_scale, col_perm, orig,
         permuted);
@@ -613,7 +619,8 @@ void row_scale_permute(std::shared_ptr<const DefaultExecutor> exec,
         exec,
         [] GKO_KERNEL(auto i, auto j, auto scale, auto perm, auto orig,
                       auto permuted) {
-            permuted(i, j) = scale[i] * orig(perm[i], j);
+            const auto row = perm[i];
+            permuted(i, j) = scale[row] * orig(row, j);
         },
         orig->get_size(), scale, perm, orig, permuted);
 }
@@ -629,7 +636,8 @@ void inv_row_scale_permute(std::shared_ptr<const DefaultExecutor> exec,
         exec,
         [] GKO_KERNEL(auto i, auto j, auto scale, auto perm, auto orig,
                       auto permuted) {
-            permuted(perm[i], j) = orig(i, j) / scale[i];
+            const auto row = perm[i];
+            permuted(row, j) = orig(i, j) / scale[row];
         },
         orig->get_size(), scale, perm, orig, permuted);
 }
@@ -645,7 +653,8 @@ void col_scale_permute(std::shared_ptr<const DefaultExecutor> exec,
         exec,
         [] GKO_KERNEL(auto i, auto j, auto scale, auto perm, auto orig,
                       auto permuted) {
-            permuted(i, j) = scale[j] * orig(i, perm[j]);
+            const auto col = perm[j];
+            permuted(i, j) = scale[col] * orig(i, col);
         },
         orig->get_size(), scale, perm, orig, permuted);
 }
@@ -661,7 +670,8 @@ void inv_col_scale_permute(std::shared_ptr<const DefaultExecutor> exec,
         exec,
         [] GKO_KERNEL(auto i, auto j, auto scale, auto perm, auto orig,
                       auto permuted) {
-            permuted(i, perm[j]) = orig(i, j) / scale[j];
+            const auto col = perm[j];
+            permuted(i, col) = orig(i, j) / scale[col];
         },
         orig->get_size(), scale, perm, orig, permuted);
 }
diff --git a/common/unified/matrix/scaled_permutation_kernels.cpp b/common/unified/matrix/scaled_permutation_kernels.cpp
index 7bebe4c4778..27a70e6c8ab 100644
--- a/common/unified/matrix/scaled_permutation_kernels.cpp
+++ b/common/unified/matrix/scaled_permutation_kernels.cpp
@@ -55,9 +55,9 @@ void invert(std::shared_ptr<const DefaultExecutor> exec,
         exec,
         [] GKO_KERNEL(auto i, auto input_permutation, auto input_scale,
                       auto output_permutation, auto output_scale) {
-            output_permutation[input_permutation[i]] = i;
-            output_scale[input_permutation[i]] =
-                one(input_scale[i]) / input_scale[i];
+            const auto ip = input_permutation[i];
+            output_permutation[ip] = i;
+            output_scale[i] = one(input_scale[ip]) / input_scale[ip];
         },
         size, input_permutation, input_scale, output_permutation, output_scale);
 }
diff --git a/core/matrix/scaled_permutation.cpp b/core/matrix/scaled_permutation.cpp
index 435b928a6b2..b6545ee68b4 100644
--- a/core/matrix/scaled_permutation.cpp
+++ b/core/matrix/scaled_permutation.cpp
@@ -125,8 +125,8 @@ void ScaledPermutation<ValueType, IndexType>::write(
     data.nonzeros.clear();
     data.nonzeros.reserve(data.size[0]);
     for (IndexType row = 0; row < this->get_size()[0]; row++) {
-        data.nonzeros.emplace_back(row, host_this->get_const_permutation()[row],
-                                   host_this->get_const_scale()[row]);
+        auto col = host_this->get_const_permutation()[row];
+        data.nonzeros.emplace_back(row, col, host_this->get_const_scale()[col]);
     }
 }
 
diff --git a/include/ginkgo/core/matrix/scaled_permutation.hpp b/include/ginkgo/core/matrix/scaled_permutation.hpp
index 0a5a2d781e7..46d17ecbb75 100644
--- a/include/ginkgo/core/matrix/scaled_permutation.hpp
+++ b/include/ginkgo/core/matrix/scaled_permutation.hpp
@@ -51,7 +51,7 @@ namespace matrix {
 /**
  * ScaledPermutation is a matrix combining a permutation with scaling factors.
  * It is a combination of Diagonal and Permutation, and can be read as
- * $SP = S \cdot P$, i.e. the scaling gets applied after the permutation.
+ * $SP = P \pdot S$, i.e. the scaling gets applied before the permutation.
  *
  * @tparam IndexType  index type of permutation indices
  * @tparam ValueType  value type of the scaling factors
diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp
index 29459a264c4..ca876d29199 100644
--- a/omp/matrix/csr_kernels.cpp
+++ b/omp/matrix/csr_kernels.cpp
@@ -1079,10 +1079,10 @@ void inv_nonsymm_scale_permute(std::shared_ptr<const DefaultExecutor> exec,
         auto dst_begin = p_row_ptrs[dst_row];
         auto row_size = in_row_ptrs[src_row + 1] - src_begin;
         for (IndexType i = 0; i < row_size; ++i) {
-            const auto in_col = in_col_idxs[src_begin + i];
-            p_col_idxs[dst_begin + i] = col_perm[in_col];
+            const auto out_col = col_perm[in_col_idxs[src_begin + i]];
+            p_col_idxs[dst_begin + i] = out_col;
             p_vals[dst_begin + i] = in_vals[src_begin + i] /
-                                    (row_scale[src_row] * col_scale[in_col]);
+                                    (row_scale[dst_row] * col_scale[out_col]);
         }
     }
 }
@@ -1123,7 +1123,7 @@ void row_scale_permute(std::shared_ptr<const OmpExecutor> exec,
         std::copy_n(orig_col_idxs + src_begin, row_size,
                     rp_col_idxs + dst_begin);
         for (IndexType i = 0; i < row_size; i++) {
-            rp_vals[i + dst_begin] = orig_vals[i + src_begin] * scale[dst_row];
+            rp_vals[i + dst_begin] = orig_vals[i + src_begin] * scale[src_row];
         }
     }
 }
@@ -1164,7 +1164,7 @@ void inv_row_scale_permute(std::shared_ptr<const OmpExecutor> exec,
         std::copy_n(orig_col_idxs + src_begin, row_size,
                     rp_col_idxs + dst_begin);
         for (IndexType i = 0; i < row_size; i++) {
-            rp_vals[i + dst_begin] = orig_vals[i + src_begin] / scale[src_row];
+            rp_vals[i + dst_begin] = orig_vals[i + src_begin] / scale[dst_row];
         }
     }
 }
diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp
index d87e72bc5ab..c45ad22177c 100644
--- a/reference/matrix/csr_kernels.cpp
+++ b/reference/matrix/csr_kernels.cpp
@@ -1028,10 +1028,10 @@ void inv_nonsymm_scale_permute(std::shared_ptr<const ReferenceExecutor> exec,
         auto dst_begin = p_row_ptrs[dst_row];
         auto row_size = in_row_ptrs[src_row + 1] - src_begin;
         for (IndexType i = 0; i < row_size; ++i) {
-            const auto in_col = in_col_idxs[src_begin + i];
-            p_col_idxs[dst_begin + i] = col_perm[in_col];
+            const auto out_col = col_perm[in_col_idxs[src_begin + i]];
+            p_col_idxs[dst_begin + i] = out_col;
             p_vals[dst_begin + i] = in_vals[src_begin + i] /
-                                    (row_scale[src_row] * col_scale[in_col]);
+                                    (row_scale[dst_row] * col_scale[out_col]);
         }
     }
 }
@@ -1068,7 +1068,7 @@ void row_scale_permute(std::shared_ptr<const ReferenceExecutor> exec,
         const auto row_size = in_row_ptrs[src_row + 1] - src_begin;
         std::copy_n(in_col_idxs + src_begin, row_size, rp_col_idxs + dst_begin);
         for (IndexType i = 0; i < row_size; i++) {
-            rp_vals[i + dst_begin] = in_vals[i + src_begin] * scale[dst_row];
+            rp_vals[i + dst_begin] = in_vals[i + src_begin] * scale[src_row];
         }
     }
 }
@@ -1105,7 +1105,7 @@ void inv_row_scale_permute(std::shared_ptr<const ReferenceExecutor> exec,
         auto row_size = in_row_ptrs[src_row + 1] - src_begin;
         std::copy_n(in_col_idxs + src_begin, row_size, rp_col_idxs + dst_begin);
         for (IndexType i = 0; i < row_size; i++) {
-            rp_vals[i + dst_begin] = in_vals[i + src_begin] / scale[src_row];
+            rp_vals[i + dst_begin] = in_vals[i + src_begin] / scale[dst_row];
         }
     }
 }
@@ -1133,9 +1133,9 @@ void inv_col_scale_permute(std::shared_ptr<const ReferenceExecutor> exec,
         auto row_end = in_row_ptrs[row + 1];
         cp_row_ptrs[row] = in_row_ptrs[row];
         for (auto k = row_begin; k < row_end; ++k) {
-            const auto in_col = in_col_idxs[k];
-            cp_col_idxs[k] = perm[in_col];
-            cp_vals[k] = in_vals[k] / scale[in_col];
+            const auto out_col = perm[in_col_idxs[k]];
+            cp_col_idxs[k] = out_col;
+            cp_vals[k] = in_vals[k] / scale[out_col];
         }
     }
     cp_row_ptrs[num_rows] = in_row_ptrs[num_rows];
diff --git a/reference/matrix/dense_kernels.cpp b/reference/matrix/dense_kernels.cpp
index 3b28336db11..8b35dcbe6af 100644
--- a/reference/matrix/dense_kernels.cpp
+++ b/reference/matrix/dense_kernels.cpp
@@ -975,8 +975,8 @@ void col_permute(std::shared_ptr<const ReferenceExecutor> exec,
                  const IndexType* perm, const matrix::Dense<ValueType>* orig,
                  matrix::Dense<ValueType>* col_permuted)
 {
-    for (size_type j = 0; j < orig->get_size()[1]; ++j) {
-        for (size_type i = 0; i < orig->get_size()[0]; ++i) {
+    for (size_type i = 0; i < orig->get_size()[0]; ++i) {
+        for (size_type j = 0; j < orig->get_size()[1]; ++j) {
             col_permuted->at(i, j) = orig->at(i, perm[j]);
         }
     }
@@ -1009,8 +1009,8 @@ void inv_col_permute(std::shared_ptr<const ReferenceExecutor> exec,
                      const matrix::Dense<ValueType>* orig,
                      matrix::Dense<ValueType>* col_permuted)
 {
-    for (size_type j = 0; j < orig->get_size()[1]; ++j) {
-        for (size_type i = 0; i < orig->get_size()[0]; ++i) {
+    for (size_type i = 0; i < orig->get_size()[0]; ++i) {
+        for (size_type j = 0; j < orig->get_size()[1]; ++j) {
             col_permuted->at(i, perm[j]) = orig->at(i, j);
         }
     }
@@ -1026,10 +1026,11 @@ void symm_scale_permute(std::shared_ptr<const ReferenceExecutor> exec,
                         const matrix::Dense<ValueType>* orig,
                         matrix::Dense<ValueType>* permuted)
 {
-    for (size_type j = 0; j < orig->get_size()[1]; ++j) {
-        for (size_type i = 0; i < orig->get_size()[0]; ++i) {
-            permuted->at(i, j) =
-                scale[i] * scale[j] * orig->at(perm[i], perm[j]);
+    for (size_type i = 0; i < orig->get_size()[0]; ++i) {
+        for (size_type j = 0; j < orig->get_size()[1]; ++j) {
+            const auto row = perm[i];
+            const auto col = perm[j];
+            permuted->at(i, j) = scale[row] * scale[col] * orig->at(row, col);
         }
     }
 }
@@ -1044,10 +1045,11 @@ void inv_symm_scale_permute(std::shared_ptr<const ReferenceExecutor> exec,
                             const matrix::Dense<ValueType>* orig,
                             matrix::Dense<ValueType>* permuted)
 {
-    for (size_type j = 0; j < orig->get_size()[1]; ++j) {
-        for (size_type i = 0; i < orig->get_size()[0]; ++i) {
-            permuted->at(perm[i], perm[j]) =
-                orig->at(i, j) / (scale[i] * scale[j]);
+    for (size_type i = 0; i < orig->get_size()[0]; ++i) {
+        for (size_type j = 0; j < orig->get_size()[1]; ++j) {
+            const auto row = perm[i];
+            const auto col = perm[j];
+            permuted->at(row, col) = orig->at(i, j) / (scale[row] * scale[col]);
         }
     }
 }
@@ -1065,10 +1067,12 @@ void nonsymm_scale_permute(std::shared_ptr<const ReferenceExecutor> exec,
                            const matrix::Dense<ValueType>* orig,
                            matrix::Dense<ValueType>* permuted)
 {
-    for (size_type j = 0; j < orig->get_size()[1]; ++j) {
-        for (size_type i = 0; i < orig->get_size()[0]; ++i) {
-            permuted->at(i, j) = row_scale[i] * col_scale[j] *
-                                 orig->at(row_perm[i], col_perm[j]);
+    for (size_type i = 0; i < orig->get_size()[0]; ++i) {
+        for (size_type j = 0; j < orig->get_size()[1]; ++j) {
+            const auto row = row_perm[i];
+            const auto col = col_perm[j];
+            permuted->at(i, j) =
+                row_scale[row] * col_scale[col] * orig->at(row, col);
         }
     }
 }
@@ -1086,10 +1090,13 @@ void inv_nonsymm_scale_permute(std::shared_ptr<const ReferenceExecutor> exec,
                                const matrix::Dense<ValueType>* orig,
                                matrix::Dense<ValueType>* permuted)
 {
-    for (size_type j = 0; j < orig->get_size()[1]; ++j) {
-        for (size_type i = 0; i < orig->get_size()[0]; ++i) {
-            permuted->at(row_perm[i], col_perm[j]) =
-                orig->at(i, j) / (row_scale[i] * col_scale[j]);
+    // TODO this was broken in common, why did the test not pick it up?
+    for (size_type i = 0; i < orig->get_size()[0]; ++i) {
+        for (size_type j = 0; j < orig->get_size()[1]; ++j) {
+            const auto row = row_perm[i];
+            const auto col = col_perm[j];
+            permuted->at(row, col) =
+                orig->at(i, j) / (row_scale[row] * col_scale[col]);
         }
     }
 }
@@ -1104,9 +1111,10 @@ void row_scale_permute(std::shared_ptr<const ReferenceExecutor> exec,
                        const matrix::Dense<ValueType>* orig,
                        matrix::Dense<ValueType>* permuted)
 {
-    for (size_type j = 0; j < orig->get_size()[1]; ++j) {
-        for (size_type i = 0; i < orig->get_size()[0]; ++i) {
-            permuted->at(i, j) = scale[i] * orig->at(perm[i], j);
+    for (size_type i = 0; i < orig->get_size()[0]; ++i) {
+        for (size_type j = 0; j < orig->get_size()[1]; ++j) {
+            const auto row = perm[i];
+            permuted->at(i, j) = scale[row] * orig->at(row, j);
         }
     }
 }
@@ -1121,9 +1129,10 @@ void inv_row_scale_permute(std::shared_ptr<const ReferenceExecutor> exec,
                            const matrix::Dense<ValueType>* orig,
                            matrix::Dense<ValueType>* permuted)
 {
-    for (size_type j = 0; j < orig->get_size()[1]; ++j) {
-        for (size_type i = 0; i < orig->get_size()[0]; ++i) {
-            permuted->at(perm[i], j) = orig->at(i, j) / scale[i];
+    for (size_type i = 0; i < orig->get_size()[0]; ++i) {
+        for (size_type j = 0; j < orig->get_size()[1]; ++j) {
+            const auto row = perm[i];
+            permuted->at(row, j) = orig->at(i, j) / scale[row];
         }
     }
 }
@@ -1138,9 +1147,10 @@ void col_scale_permute(std::shared_ptr<const ReferenceExecutor> exec,
                        const matrix::Dense<ValueType>* orig,
                        matrix::Dense<ValueType>* permuted)
 {
-    for (size_type j = 0; j < orig->get_size()[1]; ++j) {
-        for (size_type i = 0; i < orig->get_size()[0]; ++i) {
-            permuted->at(i, j) = scale[j] * orig->at(i, perm[j]);
+    for (size_type i = 0; i < orig->get_size()[0]; ++i) {
+        for (size_type j = 0; j < orig->get_size()[1]; ++j) {
+            const auto col = perm[j];
+            permuted->at(i, j) = scale[col] * orig->at(i, col);
         }
     }
 }
@@ -1155,9 +1165,10 @@ void inv_col_scale_permute(std::shared_ptr<const ReferenceExecutor> exec,
                            const matrix::Dense<ValueType>* orig,
                            matrix::Dense<ValueType>* permuted)
 {
-    for (size_type j = 0; j < orig->get_size()[1]; ++j) {
-        for (size_type i = 0; i < orig->get_size()[0]; ++i) {
-            permuted->at(i, perm[j]) = orig->at(i, j) / scale[j];
+    for (size_type i = 0; i < orig->get_size()[0]; ++i) {
+        for (size_type j = 0; j < orig->get_size()[1]; ++j) {
+            const auto col = perm[j];
+            permuted->at(i, col) = orig->at(i, j) / scale[col];
         }
     }
 }
diff --git a/reference/matrix/scaled_permutation_kernels.cpp b/reference/matrix/scaled_permutation_kernels.cpp
index 54a68fbdf0a..f0b83128c66 100644
--- a/reference/matrix/scaled_permutation_kernels.cpp
+++ b/reference/matrix/scaled_permutation_kernels.cpp
@@ -49,8 +49,9 @@ void invert(std::shared_ptr<const DefaultExecutor> exec,
             ValueType* output_scale)
 {
     for (size_type i = 0; i < size; i++) {
-        output_permutation[input_permutation[i]] = i;
-        output_scale[input_permutation[i]] = one<ValueType>() / input_scale[i];
+        const auto ip = input_permutation[i];
+        output_permutation[ip] = i;
+        output_scale[i] = one<ValueType>() / input_scale[ip];
     }
 }
 
diff --git a/reference/test/matrix/scaled_permutation.cpp b/reference/test/matrix/scaled_permutation.cpp
index a15c0f09bbf..d2968692761 100644
--- a/reference/test/matrix/scaled_permutation.cpp
+++ b/reference/test/matrix/scaled_permutation.cpp
@@ -84,9 +84,9 @@ TYPED_TEST(ScaledPermutation, Invert)
     EXPECT_EQ(inv->get_const_permutation()[0], 2);
     EXPECT_EQ(inv->get_const_permutation()[1], 0);
     EXPECT_EQ(inv->get_const_permutation()[2], 1);
-    EXPECT_EQ(inv->get_const_scale()[0], T{0.25});
-    EXPECT_EQ(inv->get_const_scale()[1], T{1.0});
-    EXPECT_EQ(inv->get_const_scale()[2], T{0.5});
+    EXPECT_EQ(inv->get_const_scale()[0], T{0.5});
+    EXPECT_EQ(inv->get_const_scale()[1], T{0.25});
+    EXPECT_EQ(inv->get_const_scale()[2], T{1.0});
 }
 
 
@@ -95,7 +95,7 @@ TYPED_TEST(ScaledPermutation, Write)
     using T = typename TestFixture::value_type;
 
     GKO_ASSERT_MTX_NEAR(
-        this->perm3, l<T>({{0.0, 1.0, 0.0}, {0.0, 0.0, 2.0}, {4.0, 0.0, 0.0}}),
+        this->perm3, l<T>({{0.0, 2.0, 0.0}, {0.0, 0.0, 4.0}, {1.0, 0.0, 0.0}}),
         0.0);
 }
 
@@ -109,7 +109,7 @@ TYPED_TEST(ScaledPermutation, AppliesToDense)
 
     this->perm2->apply(x, y);
 
-    GKO_ASSERT_MTX_NEAR(y, l({{12.0, 7.5}, {10.0, 15.0}}), 0.0);
+    GKO_ASSERT_MTX_NEAR(y, l({{20.0, 12.5}, {6.0, 9.0}}), 0.0);
 }
 
 

From 270ba1734a8c07a140aee28c918fe79d7e369a10 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 12 Oct 2023 15:06:49 +0200
Subject: [PATCH 437/583] add SYCL kernels

---
 dpcpp/matrix/csr_kernels.dp.cpp | 305 +++++++++++++++++++++++++++++++-
 1 file changed, 304 insertions(+), 1 deletion(-)

diff --git a/dpcpp/matrix/csr_kernels.dp.cpp b/dpcpp/matrix/csr_kernels.dp.cpp
index f05692c1929..ab57b3c072e 100644
--- a/dpcpp/matrix/csr_kernels.dp.cpp
+++ b/dpcpp/matrix/csr_kernels.dp.cpp
@@ -1170,6 +1170,201 @@ void inv_nonsymm_permute_kernel(
     });
 }
 
+
+template <int subgroup_size, typename ValueType, typename IndexType>
+void row_scale_permute_kernel(size_type num_rows,
+                              const ValueType* __restrict__ scale,
+                              const IndexType* __restrict__ permutation,
+                              const IndexType* __restrict__ in_row_ptrs,
+                              const IndexType* __restrict__ in_cols,
+                              const ValueType* __restrict__ in_vals,
+                              const IndexType* __restrict__ out_row_ptrs,
+                              IndexType* __restrict__ out_cols,
+                              ValueType* __restrict__ out_vals,
+                              sycl::nd_item<3> item_ct1)
+{
+    auto tid = thread::get_subwarp_id_flat<subgroup_size>(item_ct1);
+    if (tid >= num_rows) {
+        return;
+    }
+    auto lane = item_ct1.get_local_id(2) % subgroup_size;
+    auto in_row = permutation[tid];
+    auto out_row = tid;
+    auto in_begin = in_row_ptrs[in_row];
+    auto in_size = in_row_ptrs[in_row + 1] - in_begin;
+    auto out_begin = out_row_ptrs[out_row];
+    for (IndexType i = lane; i < in_size; i += subgroup_size) {
+        out_cols[out_begin + i] = in_cols[in_begin + i];
+        out_vals[out_begin + i] = in_vals[in_begin + i] * scale[in_row];
+    }
+}
+
+template <int subgroup_size, typename ValueType, typename IndexType>
+void row_scale_permute_kernel(
+    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
+    size_type num_rows, const ValueType* scale, const IndexType* permutation,
+    const IndexType* in_row_ptrs, const IndexType* in_cols,
+    const ValueType* in_vals, const IndexType* out_row_ptrs,
+    IndexType* out_cols, ValueType* out_vals)
+{
+    queue->submit([&](sycl::handler& cgh) {
+        cgh.parallel_for(
+            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+                row_scale_permute_kernel<subgroup_size>(
+                    num_rows, scale, permutation, in_row_ptrs, in_cols, in_vals,
+                    out_row_ptrs, out_cols, out_vals, item_ct1);
+            });
+    });
+}
+
+
+template <int subgroup_size, typename ValueType, typename IndexType>
+void inv_row_scale_permute_kernel(size_type num_rows,
+                                  const ValueType* __restrict__ scale,
+                                  const IndexType* __restrict__ permutation,
+                                  const IndexType* __restrict__ in_row_ptrs,
+                                  const IndexType* __restrict__ in_cols,
+                                  const ValueType* __restrict__ in_vals,
+                                  const IndexType* __restrict__ out_row_ptrs,
+                                  IndexType* __restrict__ out_cols,
+                                  ValueType* __restrict__ out_vals,
+                                  sycl::nd_item<3> item_ct1)
+{
+    auto tid = thread::get_subwarp_id_flat<subgroup_size>(item_ct1);
+    if (tid >= num_rows) {
+        return;
+    }
+    auto lane = item_ct1.get_local_id(2) % subgroup_size;
+    auto in_row = tid;
+    auto out_row = permutation[tid];
+    auto in_begin = in_row_ptrs[in_row];
+    auto in_size = in_row_ptrs[in_row + 1] - in_begin;
+    auto out_begin = out_row_ptrs[out_row];
+    for (IndexType i = lane; i < in_size; i += subgroup_size) {
+        out_cols[out_begin + i] = in_cols[in_begin + i];
+        out_vals[out_begin + i] = in_vals[in_begin + i] / scale[out_row];
+    }
+}
+
+template <int subgroup_size, typename ValueType, typename IndexType>
+void inv_row_scale_permute_kernel(
+    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
+    size_type num_rows, const ValueType* scale, const IndexType* permutation,
+    const IndexType* in_row_ptrs, const IndexType* in_cols,
+    const ValueType* in_vals, const IndexType* out_row_ptrs,
+    IndexType* out_cols, ValueType* out_vals)
+{
+    queue->submit([&](sycl::handler& cgh) {
+        cgh.parallel_for(
+            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+                inv_row_scale_permute_kernel<subgroup_size>(
+                    num_rows, scale, permutation, in_row_ptrs, in_cols, in_vals,
+                    out_row_ptrs, out_cols, out_vals, item_ct1);
+            });
+    });
+}
+
+
+template <int subgroup_size, typename ValueType, typename IndexType>
+void inv_symm_scale_permute_kernel(size_type num_rows,
+                                   const ValueType* __restrict__ scale,
+                                   const IndexType* __restrict__ permutation,
+                                   const IndexType* __restrict__ in_row_ptrs,
+                                   const IndexType* __restrict__ in_cols,
+                                   const ValueType* __restrict__ in_vals,
+                                   const IndexType* __restrict__ out_row_ptrs,
+                                   IndexType* __restrict__ out_cols,
+                                   ValueType* __restrict__ out_vals,
+                                   sycl::nd_item<3> item_ct1)
+{
+    auto tid = thread::get_subwarp_id_flat<subgroup_size>(item_ct1);
+    if (tid >= num_rows) {
+        return;
+    }
+    auto lane = item_ct1.get_local_id(2) % subgroup_size;
+    auto in_row = tid;
+    auto out_row = permutation[tid];
+    auto in_begin = in_row_ptrs[in_row];
+    auto in_size = in_row_ptrs[in_row + 1] - in_begin;
+    auto out_begin = out_row_ptrs[out_row];
+    for (IndexType i = lane; i < in_size; i += subgroup_size) {
+        const auto out_col = permutation[in_cols[in_begin + i]];
+        out_cols[out_begin + i] = out_col;
+        out_vals[out_begin + i] =
+            in_vals[in_begin + i] / (scale[out_row] * scale[out_col]);
+    }
+}
+
+
+template <int subgroup_size, typename ValueType, typename IndexType>
+void inv_nonsymm_scale_permute_kernel(
+    size_type num_rows, const ValueType* __restrict__ row_scale,
+    const IndexType* __restrict__ row_permutation,
+    const ValueType* __restrict__ col_scale,
+    const IndexType* __restrict__ col_permutation,
+    const IndexType* __restrict__ in_row_ptrs,
+    const IndexType* __restrict__ in_cols,
+    const ValueType* __restrict__ in_vals,
+    const IndexType* __restrict__ out_row_ptrs,
+    IndexType* __restrict__ out_cols, ValueType* __restrict__ out_vals,
+    sycl::nd_item<3> item_ct1)
+{
+    auto tid = thread::get_subwarp_id_flat<subgroup_size>(item_ct1);
+    if (tid >= num_rows) {
+        return;
+    }
+    auto lane = item_ct1.get_local_id(2) % subgroup_size;
+    auto in_row = tid;
+    auto out_row = row_permutation[tid];
+    auto in_begin = in_row_ptrs[in_row];
+    auto in_size = in_row_ptrs[in_row + 1] - in_begin;
+    auto out_begin = out_row_ptrs[out_row];
+    for (IndexType i = lane; i < in_size; i += subgroup_size) {
+        const auto out_col = col_permutation[in_cols[in_begin + i]];
+        out_cols[out_begin + i] = out_col;
+        out_vals[out_begin + i] =
+            in_vals[in_begin + i] / (row_scale[out_row] * col_scale[out_col]);
+    }
+}
+
+template <int subgroup_size, typename ValueType, typename IndexType>
+void inv_symm_scale_permute_kernel(
+    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
+    size_type num_rows, const ValueType* scale, const IndexType* permutation,
+    const IndexType* in_row_ptrs, const IndexType* in_cols,
+    const ValueType* in_vals, const IndexType* out_row_ptrs,
+    IndexType* out_cols, ValueType* out_vals)
+{
+    queue->submit([&](sycl::handler& cgh) {
+        cgh.parallel_for(
+            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+                inv_symm_scale_permute_kernel<subgroup_size>(
+                    num_rows, scale, permutation, in_row_ptrs, in_cols, in_vals,
+                    out_row_ptrs, out_cols, out_vals, item_ct1);
+            });
+    });
+}
+
+template <int subgroup_size, typename ValueType, typename IndexType>
+void inv_nonsymm_scale_permute_kernel(
+    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
+    size_type num_rows, const ValueType* row_scale,
+    const IndexType* row_permutation, const ValueType* col_scale,
+    const IndexType* col_permutation, const IndexType* in_row_ptrs,
+    const IndexType* in_cols, const ValueType* in_vals,
+    const IndexType* out_row_ptrs, IndexType* out_cols, ValueType* out_vals)
+{
+    queue->submit([&](sycl::handler& cgh) {
+        cgh.parallel_for(
+            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+                inv_nonsymm_scale_permute_kernel<subgroup_size>(
+                    num_rows, row_scale, row_permutation, col_scale,
+                    col_permutation, in_row_ptrs, in_cols, in_vals,
+                    out_row_ptrs, out_cols, out_vals, item_ct1);
+            });
+    });
+}
+
 namespace host_kernel {
 
 
@@ -2329,7 +2524,7 @@ void inv_nonsymm_permute(std::shared_ptr<const DpcppExecutor> exec,
                                        num_rows + 1);
     auto copy_num_blocks =
         ceildiv(num_rows, default_block_size / config::warp_size);
-    inv_symm_permute_kernel<config::warp_size>(
+    inv_nonsymm_permute_kernel<config::warp_size>(
         copy_num_blocks, default_block_size, 0, exec->get_queue(), num_rows,
         row_perm, col_perm, orig->get_const_row_ptrs(),
         orig->get_const_col_idxs(), orig->get_const_values(),
@@ -2393,6 +2588,114 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_CSR_INV_ROW_PERMUTE_KERNEL);
 
 
+template <typename ValueType, typename IndexType>
+void inv_symm_scale_permute(std::shared_ptr<const DpcppExecutor> exec,
+                            const ValueType* scale, const IndexType* perm,
+                            const matrix::Csr<ValueType, IndexType>* orig,
+                            matrix::Csr<ValueType, IndexType>* permuted)
+{
+    auto num_rows = orig->get_size()[0];
+    auto count_num_blocks = ceildiv(num_rows, default_block_size);
+    inv_row_ptr_permute_kernel(
+        count_num_blocks, default_block_size, 0, exec->get_queue(), num_rows,
+        perm, orig->get_const_row_ptrs(), permuted->get_row_ptrs());
+    components::prefix_sum_nonnegative(exec, permuted->get_row_ptrs(),
+                                       num_rows + 1);
+    auto copy_num_blocks =
+        ceildiv(num_rows, default_block_size / config::warp_size);
+    inv_symm_scale_permute_kernel<config::warp_size>(
+        copy_num_blocks, default_block_size, 0, exec->get_queue(), num_rows,
+        scale, perm, orig->get_const_row_ptrs(), orig->get_const_col_idxs(),
+        orig->get_const_values(), permuted->get_row_ptrs(),
+        permuted->get_col_idxs(), permuted->get_values());
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_INV_SYMM_SCALE_PERMUTE_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void inv_nonsymm_scale_permute(std::shared_ptr<const DpcppExecutor> exec,
+                               const ValueType* row_scale,
+                               const IndexType* row_perm,
+                               const ValueType* col_scale,
+                               const IndexType* col_perm,
+                               const matrix::Csr<ValueType, IndexType>* orig,
+                               matrix::Csr<ValueType, IndexType>* permuted)
+{
+    auto num_rows = orig->get_size()[0];
+    auto count_num_blocks = ceildiv(num_rows, default_block_size);
+    inv_row_ptr_permute_kernel(
+        count_num_blocks, default_block_size, 0, exec->get_queue(), num_rows,
+        row_perm, orig->get_const_row_ptrs(), permuted->get_row_ptrs());
+    components::prefix_sum_nonnegative(exec, permuted->get_row_ptrs(),
+                                       num_rows + 1);
+    auto copy_num_blocks =
+        ceildiv(num_rows, default_block_size / config::warp_size);
+    inv_nonsymm_scale_permute_kernel<config::warp_size>(
+        copy_num_blocks, default_block_size, 0, exec->get_queue(), num_rows,
+        row_scale, row_perm, col_scale, col_perm, orig->get_const_row_ptrs(),
+        orig->get_const_col_idxs(), orig->get_const_values(),
+        permuted->get_row_ptrs(), permuted->get_col_idxs(),
+        permuted->get_values());
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_INV_NONSYMM_SCALE_PERMUTE_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void row_scale_permute(std::shared_ptr<const DpcppExecutor> exec,
+                       const ValueType* scale, const IndexType* perm,
+                       const matrix::Csr<ValueType, IndexType>* orig,
+                       matrix::Csr<ValueType, IndexType>* row_permuted)
+{
+    auto num_rows = orig->get_size()[0];
+    auto count_num_blocks = ceildiv(num_rows, default_block_size);
+    row_ptr_permute_kernel(
+        count_num_blocks, default_block_size, 0, exec->get_queue(), num_rows,
+        perm, orig->get_const_row_ptrs(), row_permuted->get_row_ptrs());
+    components::prefix_sum_nonnegative(exec, row_permuted->get_row_ptrs(),
+                                       num_rows + 1);
+    auto copy_num_blocks =
+        ceildiv(num_rows, default_block_size / config::warp_size);
+    row_scale_permute_kernel<config::warp_size>(
+        copy_num_blocks, default_block_size, 0, exec->get_queue(), num_rows,
+        scale, perm, orig->get_const_row_ptrs(), orig->get_const_col_idxs(),
+        orig->get_const_values(), row_permuted->get_row_ptrs(),
+        row_permuted->get_col_idxs(), row_permuted->get_values());
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_ROW_SCALE_PERMUTE_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void inv_row_scale_permute(std::shared_ptr<const DpcppExecutor> exec,
+                           const ValueType* scale, const IndexType* perm,
+                           const matrix::Csr<ValueType, IndexType>* orig,
+                           matrix::Csr<ValueType, IndexType>* row_permuted)
+{
+    auto num_rows = orig->get_size()[0];
+    auto count_num_blocks = ceildiv(num_rows, default_block_size);
+    inv_row_ptr_permute_kernel(
+        count_num_blocks, default_block_size, 0, exec->get_queue(), num_rows,
+        perm, orig->get_const_row_ptrs(), row_permuted->get_row_ptrs());
+    components::prefix_sum_nonnegative(exec, row_permuted->get_row_ptrs(),
+                                       num_rows + 1);
+    auto copy_num_blocks =
+        ceildiv(num_rows, default_block_size / config::warp_size);
+    inv_row_scale_permute_kernel<config::warp_size>(
+        copy_num_blocks, default_block_size, 0, exec->get_queue(), num_rows,
+        scale, perm, orig->get_const_row_ptrs(), orig->get_const_col_idxs(),
+        orig->get_const_values(), row_permuted->get_row_ptrs(),
+        row_permuted->get_col_idxs(), row_permuted->get_values());
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_CSR_INV_ROW_SCALE_PERMUTE_KERNEL);
+
+
 template <typename ValueType, typename IndexType>
 void sort_by_column_index(std::shared_ptr<const DpcppExecutor> exec,
                           matrix::Csr<ValueType, IndexType>* to_sort)

From 03a7288907123fee7edb6ef159ea7d670ad5889e Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 12 Oct 2023 22:54:01 +0200
Subject: [PATCH 438/583] add permutation combination functions

---
 common/unified/matrix/permutation_kernels.cpp | 18 +++++
 .../matrix/scaled_permutation_kernels.cpp     | 37 +++++++--
 core/device_hooks/common_kernels.inc.cpp      |  2 +
 core/matrix/permutation.cpp                   | 38 +++++++--
 core/matrix/permutation_kernels.hpp           | 16 +++-
 core/matrix/scaled_permutation.cpp            | 78 +++++++++++++++++--
 core/matrix/scaled_permutation_kernels.hpp    | 25 ++++--
 include/ginkgo/core/matrix/permutation.hpp    | 12 +++
 .../ginkgo/core/matrix/scaled_permutation.hpp | 62 ++++++++++-----
 reference/matrix/permutation_kernels.cpp      | 14 ++++
 .../matrix/scaled_permutation_kernels.cpp     | 26 ++++++-
 reference/test/matrix/permutation.cpp         | 48 +++++++++++-
 reference/test/matrix/scaled_permutation.cpp  | 63 +++++++++++++++
 test/matrix/permutation_kernels.cpp           | 17 +++-
 test/matrix/scaled_permutation_kernels.cpp    | 21 ++++-
 15 files changed, 422 insertions(+), 55 deletions(-)

diff --git a/common/unified/matrix/permutation_kernels.cpp b/common/unified/matrix/permutation_kernels.cpp
index 58b82c1602e..d94620aca75 100644
--- a/common/unified/matrix/permutation_kernels.cpp
+++ b/common/unified/matrix/permutation_kernels.cpp
@@ -61,6 +61,24 @@ void invert(std::shared_ptr<const DefaultExecutor> exec,
 GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PERMUTATION_INVERT_KERNEL);
 
 
+template <typename IndexType>
+void combine(std::shared_ptr<const DefaultExecutor> exec,
+             const IndexType* first_permutation,
+             const IndexType* second_permutation, size_type size,
+             IndexType* output_permutation)
+{
+    run_kernel(
+        exec,
+        [] GKO_KERNEL(auto i, auto first_permutation, auto second_permutation,
+                      auto output_permutation) {
+            output_permutation[i] = second_permutation[first_permutation[i]];
+        },
+        size, first_permutation, second_permutation, output_permutation);
+}
+
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PERMUTATION_COMBINE_KERNEL);
+
+
 }  // namespace permutation
 }  // namespace GKO_DEVICE_NAMESPACE
 }  // namespace kernels
diff --git a/common/unified/matrix/scaled_permutation_kernels.cpp b/common/unified/matrix/scaled_permutation_kernels.cpp
index 27a70e6c8ab..46219d45d66 100644
--- a/common/unified/matrix/scaled_permutation_kernels.cpp
+++ b/common/unified/matrix/scaled_permutation_kernels.cpp
@@ -47,25 +47,50 @@ namespace scaled_permutation {
 
 template <typename ValueType, typename IndexType>
 void invert(std::shared_ptr<const DefaultExecutor> exec,
-            const IndexType* input_permutation, const ValueType* input_scale,
-            size_type size, IndexType* output_permutation,
-            ValueType* output_scale)
+            const ValueType* input_scale, const IndexType* input_permutation,
+            size_type size, ValueType* output_scale,
+            IndexType* output_permutation)
 {
     run_kernel(
         exec,
-        [] GKO_KERNEL(auto i, auto input_permutation, auto input_scale,
-                      auto output_permutation, auto output_scale) {
+        [] GKO_KERNEL(auto i, auto input_scale, auto input_permutation,
+                      auto output_scale, auto output_permutation) {
             const auto ip = input_permutation[i];
             output_permutation[ip] = i;
             output_scale[i] = one(input_scale[ip]) / input_scale[ip];
         },
-        size, input_permutation, input_scale, output_permutation, output_scale);
+        size, input_scale, input_permutation, output_scale, output_permutation);
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_SCALED_PERMUTATION_INVERT_KERNEL);
 
 
+template <typename ValueType, typename IndexType>
+void combine(std::shared_ptr<const DefaultExecutor> exec,
+             const ValueType* first_scale, const IndexType* first_permutation,
+             const ValueType* second_scale, const IndexType* second_permutation,
+             size_type size, ValueType* output_scale,
+             IndexType* output_permutation)
+{
+    run_kernel(
+        exec,
+        [] GKO_KERNEL(auto i, auto first_scale, auto first_permutation,
+                      auto second_scale, auto second_permutation,
+                      auto output_permutation, auto output_scale) {
+            const auto first_permuted = first_permutation[i];
+            output_permutation[i] = second_permutation[first_permuted];
+            output_scale[first_permuted] =
+                first_scale[first_permuted] * second_scale[i];
+        },
+        size, first_scale, first_permutation, second_scale, second_permutation,
+        output_permutation, output_scale);
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_SCALED_PERMUTATION_COMBINE_KERNEL);
+
+
 }  // namespace scaled_permutation
 }  // namespace GKO_DEVICE_NAMESPACE
 }  // namespace kernels
diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp
index 3f5d097abac..0c58f1a4c0f 100644
--- a/core/device_hooks/common_kernels.inc.cpp
+++ b/core/device_hooks/common_kernels.inc.cpp
@@ -730,6 +730,7 @@ namespace permutation {
 
 
 GKO_STUB_INDEX_TYPE(GKO_DECLARE_PERMUTATION_INVERT_KERNEL);
+GKO_STUB_INDEX_TYPE(GKO_DECLARE_PERMUTATION_COMBINE_KERNEL);
 
 
 }  // namespace permutation
@@ -739,6 +740,7 @@ namespace scaled_permutation {
 
 
 GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SCALED_PERMUTATION_INVERT_KERNEL);
+GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SCALED_PERMUTATION_COMBINE_KERNEL);
 
 
 }  // namespace scaled_permutation
diff --git a/core/matrix/permutation.cpp b/core/matrix/permutation.cpp
index 779bdd964bb..00115d0db68 100644
--- a/core/matrix/permutation.cpp
+++ b/core/matrix/permutation.cpp
@@ -31,12 +31,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
 #include <ginkgo/core/matrix/permutation.hpp>
+
+
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/precision_dispatch.hpp>
+#include <ginkgo/core/base/temporary_clone.hpp>
+#include <ginkgo/core/base/utils_helper.hpp>
+
+
 #include "core/base/dispatch_helper.hpp"
 #include "core/matrix/permutation_kernels.hpp"
-#include "ginkgo/core/base/exception_helpers.hpp"
-#include "ginkgo/core/base/executor.hpp"
-#include "ginkgo/core/base/precision_dispatch.hpp"
-#include "ginkgo/core/base/utils_helper.hpp"
 
 
 namespace gko {
@@ -45,9 +50,10 @@ namespace permutation {
 
 
 GKO_REGISTER_OPERATION(invert, permutation::invert);
+GKO_REGISTER_OPERATION(combine, permutation::combine);
 
 
-}
+}  // namespace permutation
 
 
 template <typename IndexType>
@@ -162,10 +168,26 @@ std::unique_ptr<Permutation<IndexType>> Permutation<IndexType>::invert() const
 {
     const auto exec = this->get_executor();
     const auto size = this->get_size()[0];
-    array<index_type> inv_permutation{exec, size};
+    auto result = Permutation<IndexType>::create(exec, size);
     exec->run(permutation::make_invert(this->get_const_permutation(), size,
-                                       inv_permutation.get_data()));
-    return Permutation::create(exec, std::move(inv_permutation));
+                                       result->get_permutation()));
+    return result;
+}
+
+
+template <typename IndexType>
+std::unique_ptr<Permutation<IndexType>> Permutation<IndexType>::combine(
+    ptr_param<const Permutation<IndexType>> other) const
+{
+    GKO_ASSERT_EQUAL_DIMENSIONS(this, other);
+    const auto exec = this->get_executor();
+    const auto size = this->get_size()[0];
+    const auto local_other = make_temporary_clone(exec, other);
+    auto result = Permutation<IndexType>::create(exec, size);
+    exec->run(permutation::make_combine(this->get_const_permutation(),
+                                        local_other->get_const_permutation(),
+                                        size, result->get_permutation()));
+    return result;
 }
 
 
diff --git a/core/matrix/permutation_kernels.hpp b/core/matrix/permutation_kernels.hpp
index a77e0c2f618..d4deb4142ad 100644
--- a/core/matrix/permutation_kernels.hpp
+++ b/core/matrix/permutation_kernels.hpp
@@ -62,10 +62,18 @@ namespace kernels {
                 const IndexType* permutation_indices, size_type size, \
                 IndexType* inv_permutation)
 
-
-#define GKO_DECLARE_ALL_AS_TEMPLATES \
-    template <typename IndexType>    \
-    GKO_DECLARE_PERMUTATION_INVERT_KERNEL(IndexType)
+#define GKO_DECLARE_PERMUTATION_COMBINE_KERNEL(IndexType)             \
+    void combine(std::shared_ptr<const DefaultExecutor> exec,         \
+                 const IndexType* first_permutation,                  \
+                 const IndexType* second_permutation, size_type size, \
+                 IndexType* combined_permutation)
+
+
+#define GKO_DECLARE_ALL_AS_TEMPLATES                  \
+    template <typename IndexType>                     \
+    GKO_DECLARE_PERMUTATION_INVERT_KERNEL(IndexType); \
+    template <typename IndexType>                     \
+    GKO_DECLARE_PERMUTATION_COMBINE_KERNEL(IndexType)
 
 
 GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(permutation,
diff --git a/core/matrix/scaled_permutation.cpp b/core/matrix/scaled_permutation.cpp
index b6545ee68b4..f3de34bd9b6 100644
--- a/core/matrix/scaled_permutation.cpp
+++ b/core/matrix/scaled_permutation.cpp
@@ -32,6 +32,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/matrix/scaled_permutation.hpp>
 #include "core/matrix/scaled_permutation_kernels.hpp"
+#include "ginkgo/core/base/exception_helpers.hpp"
 #include "ginkgo/core/base/executor.hpp"
 #include "ginkgo/core/base/precision_dispatch.hpp"
 
@@ -43,6 +44,7 @@ namespace {
 
 
 GKO_REGISTER_OPERATION(invert, scaled_permutation::invert);
+GKO_REGISTER_OPERATION(combine, scaled_permutation::combine);
 
 
 }  // namespace
@@ -71,19 +73,83 @@ ScaledPermutation<ValueType, IndexType>::ScaledPermutation(
 }
 
 
+template <typename ValueType, typename IndexType>
+std::unique_ptr<ScaledPermutation<ValueType, IndexType>>
+ScaledPermutation<ValueType, IndexType>::create(
+    std::shared_ptr<const Executor> exec, size_type size)
+{
+    return std::unique_ptr<ScaledPermutation>(
+        new ScaledPermutation{exec, size});
+}
+
+
+template <typename ValueType, typename IndexType>
+std::unique_ptr<ScaledPermutation<ValueType, IndexType>>
+ScaledPermutation<ValueType, IndexType>::create(
+    ptr_param<const Permutation<IndexType>> permutation)
+{
+    const auto exec = permutation->get_executor();
+    const auto size = permutation->get_size()[0];
+    array<value_type> scale{exec, size};
+    array<index_type> perm{exec, size};
+    exec->copy(size, permutation->get_const_permutation(), perm.get_data());
+    scale.fill(one<ValueType>());
+    return create(exec, std::move(scale), std::move(perm));
+}
+
+
+template <typename ValueType, typename IndexType>
+std::unique_ptr<ScaledPermutation<ValueType, IndexType>>
+ScaledPermutation<ValueType, IndexType>::create(
+    std::shared_ptr<const Executor> exec, array<value_type> scaling_factors,
+    array<index_type> permutation_indices)
+{
+    return std::unique_ptr<ScaledPermutation>(new ScaledPermutation{
+        exec, std::move(scaling_factors), std::move(permutation_indices)});
+}
+
+
+template <typename ValueType, typename IndexType>
+std::unique_ptr<const ScaledPermutation<ValueType, IndexType>>
+ScaledPermutation<ValueType, IndexType>::create_const(
+    std::shared_ptr<const Executor> exec,
+    gko::detail::const_array_view<value_type>&& scale,
+    gko::detail::const_array_view<index_type>&& perm_idxs)
+{
+    return create(exec, gko::detail::array_const_cast(std::move(scale)),
+                  gko::detail::array_const_cast(std::move(perm_idxs)));
+}
+
+
 template <typename ValueType, typename IndexType>
 std::unique_ptr<ScaledPermutation<ValueType, IndexType>>
 ScaledPermutation<ValueType, IndexType>::invert() const
 {
     const auto exec = this->get_executor();
     const auto size = this->get_size()[0];
-    array<index_type> inv_permutation{exec, size};
-    array<value_type> inv_scale{exec, size};
+    auto result = ScaledPermutation::create(exec, size);
     exec->run(scaled_permutation::make_invert(
-        this->get_const_permutation(), this->get_const_scale(), size,
-        inv_permutation.get_data(), inv_scale.get_data()));
-    return ScaledPermutation::create(exec, std::move(inv_scale),
-                                     std::move(inv_permutation));
+        this->get_const_scale(), this->get_const_permutation(), size,
+        result->get_scale(), result->get_permutation()));
+    return result;
+}
+
+
+template <typename ValueType, typename IndexType>
+std::unique_ptr<ScaledPermutation<ValueType, IndexType>>
+ScaledPermutation<ValueType, IndexType>::combine(
+    ptr_param<const ScaledPermutation> other) const
+{
+    GKO_ASSERT_EQUAL_DIMENSIONS(this, other);
+    const auto exec = this->get_executor();
+    const auto size = this->get_size()[0];
+    const auto local_other = make_temporary_clone(exec, other);
+    auto result = ScaledPermutation::create(exec, size);
+    exec->run(scaled_permutation::make_combine(
+        this->get_const_scale(), this->get_const_permutation(),
+        local_other->get_const_scale(), local_other->get_const_permutation(),
+        size, result->get_scale(), result->get_permutation()));
+    return result;
 }
 
 
diff --git a/core/matrix/scaled_permutation_kernels.hpp b/core/matrix/scaled_permutation_kernels.hpp
index 905321ea885..360e1a947f1 100644
--- a/core/matrix/scaled_permutation_kernels.hpp
+++ b/core/matrix/scaled_permutation_kernels.hpp
@@ -45,13 +45,24 @@ namespace kernels {
 
 #define GKO_DECLARE_SCALED_PERMUTATION_INVERT_KERNEL(ValueType, IndexType) \
     void invert(std::shared_ptr<const DefaultExecutor> exec,               \
-                const IndexType* input_permutation,                        \
-                const ValueType* input_scale, size_type size,              \
-                IndexType* output_permutation, ValueType* output_scale)
-
-#define GKO_DECLARE_ALL_AS_TEMPLATES                  \
-    template <typename ValueType, typename IndexType> \
-    GKO_DECLARE_SCALED_PERMUTATION_INVERT_KERNEL(ValueType, IndexType)
+                const ValueType* input_scale,                              \
+                const IndexType* input_permutation, size_type size,        \
+                ValueType* output_scale, IndexType* output_permutation)
+
+#define GKO_DECLARE_SCALED_PERMUTATION_COMBINE_KERNEL(ValueType, IndexType) \
+    void combine(std::shared_ptr<const DefaultExecutor> exec,               \
+                 const ValueType* first_scale,                              \
+                 const IndexType* first_permutation,                        \
+                 const ValueType* second_scale,                             \
+                 const IndexType* second_permutation, size_type size,       \
+                 ValueType* output_scale, IndexType* output_permutation)
+
+
+#define GKO_DECLARE_ALL_AS_TEMPLATES                                    \
+    template <typename ValueType, typename IndexType>                   \
+    GKO_DECLARE_SCALED_PERMUTATION_INVERT_KERNEL(ValueType, IndexType); \
+    template <typename ValueType, typename IndexType>                   \
+    GKO_DECLARE_SCALED_PERMUTATION_COMBINE_KERNEL(ValueType, IndexType)
 
 
 GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(scaled_permutation,
diff --git a/include/ginkgo/core/matrix/permutation.hpp b/include/ginkgo/core/matrix/permutation.hpp
index abfffb11248..02401cf698e 100644
--- a/include/ginkgo/core/matrix/permutation.hpp
+++ b/include/ginkgo/core/matrix/permutation.hpp
@@ -202,6 +202,18 @@ class Permutation : public EnableLinOp<Permutation<IndexType>>,
      */
     std::unique_ptr<Permutation> invert() const;
 
+    /**
+     * Combines this permutation with another permutation via composition.
+     * The resulting permutation fulfills `result[i] = other[this[i]]`
+     * or `result = other * this` from the matrix perspective, which is
+     * equivalent to first permuting by `this` and then by `other`.
+     *
+     * @param other  the other permutation
+     * @return the combined permutation
+     */
+    std::unique_ptr<Permutation> combine(
+        ptr_param<const Permutation> other) const;
+
     void write(gko::matrix_data<value_type, index_type>& data) const override;
 
     /**
diff --git a/include/ginkgo/core/matrix/scaled_permutation.hpp b/include/ginkgo/core/matrix/scaled_permutation.hpp
index 46d17ecbb75..5008590bcc1 100644
--- a/include/ginkgo/core/matrix/scaled_permutation.hpp
+++ b/include/ginkgo/core/matrix/scaled_permutation.hpp
@@ -42,6 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/lin_op.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/base/utils.hpp>
+#include <ginkgo/core/matrix/permutation.hpp>
 
 
 namespace gko {
@@ -63,9 +64,7 @@ namespace matrix {
 template <typename ValueType = default_precision, typename IndexType = int32>
 class ScaledPermutation
     : public EnableLinOp<ScaledPermutation<ValueType, IndexType>>,
-      public EnableCreateMethod<ScaledPermutation<ValueType, IndexType>>,
       public WritableToMatrixData<ValueType, IndexType> {
-    friend class EnableCreateMethod<ScaledPermutation>;
     friend class EnablePolymorphicObject<ScaledPermutation, LinOp>;
 
 public:
@@ -118,8 +117,51 @@ class ScaledPermutation
      */
     std::unique_ptr<ScaledPermutation> invert() const;
 
+    /**
+     * Combines this scaled permutation with another scaled permutation via
+     * composition. This means `result = other * this` from the matrix
+     * perspective, which is equivalent to first scaling and permuting by `this`
+     * and then by `other`.
+     *
+     * @param other  the other permutation
+     * @return the combined permutation
+     */
+    std::unique_ptr<ScaledPermutation> combine(
+        ptr_param<const ScaledPermutation> other) const;
+
     void write(gko::matrix_data<value_type, index_type>& data) const override;
 
+    /**
+     * Creates an uninitialized ScaledPermutation matrix.
+     *
+     * @param exec  Executor associated to the matrix
+     * @param size  dimensions of the (square) scaled permutation matrix
+     */
+    static std::unique_ptr<ScaledPermutation> create(
+        std::shared_ptr<const Executor> exec, size_type size = 0);
+
+    /**
+     * Create a ScaledPermutation from a Permutation.
+     * The permutation will be copied, the scaling factors are all set to 1.0.
+     *
+     * @param permutation  the permutation
+     * @return  the scaled permutation.
+     */
+    static std::unique_ptr<ScaledPermutation> create(
+        ptr_param<const Permutation<IndexType>> permutation);
+
+    /**
+     * Creates a ScaledPermutation matrix from already allocated (and
+     * initialized) arrays.
+     *
+     * @param exec  Executor associated to the matrix
+     * @param permutation_indices  array of permutation indices
+     * @param scaling_factors  array of scaling factors
+     */
+    static std::unique_ptr<ScaledPermutation> create(
+        std::shared_ptr<const Executor> exec, array<value_type> scaling_factors,
+        array<index_type> permutation_indices);
+
     /**
      * Creates a constant (immutable) ScaledPermutation matrix from constant
      * arrays.
@@ -137,33 +179,17 @@ class ScaledPermutation
         gko::detail::const_array_view<index_type>&& perm_idxs);
 
 protected:
-    /**
-     * Creates an uninitialized ScaledPermutation matrix.
-     *
-     * @param exec  Executor associated to the matrix
-     * @param size  dimensions of the (square) scaled permutation matrix
-     */
     ScaledPermutation(std::shared_ptr<const Executor> exec, size_type size = 0);
 
-    /**
-     * Creates a ScaledPermutation matrix from already allocated (and
-     * initialized) arrays.
-     *
-     * @param exec  Executor associated to the matrix
-     * @param permutation_indices  array of permutation indices
-     * @param scaling_factors  array of scaling factors
-     */
     ScaledPermutation(std::shared_ptr<const Executor> exec,
                       array<value_type> scaling_factors,
                       array<index_type> permutation_indices);
 
     void apply_impl(const LinOp* in, LinOp* out) const override;
 
-
     void apply_impl(const LinOp*, const LinOp* in, const LinOp*,
                     LinOp* out) const override;
 
-
 private:
     array<value_type> scale_;
     array<index_type> permutation_;
diff --git a/reference/matrix/permutation_kernels.cpp b/reference/matrix/permutation_kernels.cpp
index cc7a81a1044..7d295394904 100644
--- a/reference/matrix/permutation_kernels.cpp
+++ b/reference/matrix/permutation_kernels.cpp
@@ -52,6 +52,20 @@ void invert(std::shared_ptr<const DefaultExecutor> exec,
 GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PERMUTATION_INVERT_KERNEL);
 
 
+template <typename IndexType>
+void combine(std::shared_ptr<const DefaultExecutor> exec,
+             const IndexType* first_permutation,
+             const IndexType* second_permutation, size_type size,
+             IndexType* output_permutation)
+{
+    for (size_type i = 0; i < size; i++) {
+        output_permutation[i] = second_permutation[first_permutation[i]];
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PERMUTATION_COMBINE_KERNEL);
+
+
 }  // namespace permutation
 }  // namespace reference
 }  // namespace kernels
diff --git a/reference/matrix/scaled_permutation_kernels.cpp b/reference/matrix/scaled_permutation_kernels.cpp
index f0b83128c66..e1d418204e8 100644
--- a/reference/matrix/scaled_permutation_kernels.cpp
+++ b/reference/matrix/scaled_permutation_kernels.cpp
@@ -44,9 +44,9 @@ namespace scaled_permutation {
 
 template <typename ValueType, typename IndexType>
 void invert(std::shared_ptr<const DefaultExecutor> exec,
-            const IndexType* input_permutation, const ValueType* input_scale,
-            size_type size, IndexType* output_permutation,
-            ValueType* output_scale)
+            const ValueType* input_scale, const IndexType* input_permutation,
+            size_type size, ValueType* output_scale,
+            IndexType* output_permutation)
 {
     for (size_type i = 0; i < size; i++) {
         const auto ip = input_permutation[i];
@@ -59,6 +59,26 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_SCALED_PERMUTATION_INVERT_KERNEL);
 
 
+template <typename ValueType, typename IndexType>
+void combine(std::shared_ptr<const DefaultExecutor> exec,
+             const ValueType* first_scale, const IndexType* first_permutation,
+             const ValueType* second_scale, const IndexType* second_permutation,
+             size_type size, ValueType* output_scale,
+             IndexType* output_permutation)
+{
+    // P_2 S_2 P_1 S_1 = P_2 P_1 S'_2 S_1 with S'_2 = P_1^-1 S_2 P_1^-T
+    for (size_type i = 0; i < size; i++) {
+        const auto first_permuted = first_permutation[i];
+        output_permutation[i] = second_permutation[first_permuted];
+        output_scale[first_permuted] =
+            first_scale[first_permuted] * second_scale[i];
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_SCALED_PERMUTATION_COMBINE_KERNEL);
+
+
 }  // namespace scaled_permutation
 }  // namespace reference
 }  // namespace kernels
diff --git a/reference/test/matrix/permutation.cpp b/reference/test/matrix/permutation.cpp
index 1301276a424..cd260ef6301 100644
--- a/reference/test/matrix/permutation.cpp
+++ b/reference/test/matrix/permutation.cpp
@@ -38,9 +38,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/executor.hpp>
 #include <ginkgo/core/matrix/dense.hpp>
+#include <random>
 
 
 #include "core/test/utils.hpp"
+#include "ginkgo/core/base/exception.hpp"
 
 
 namespace {
@@ -78,11 +80,55 @@ TYPED_TEST(Permutation, Invert)
 }
 
 
+TYPED_TEST(Permutation, Combine)
+{
+    using index_type = typename TestFixture::index_type;
+    auto perm = gko::matrix::Permutation<index_type>::create(
+        this->exec, gko::array<index_type>{this->exec, {1, 2, 0}});
+    auto perm2 = gko::matrix::Permutation<index_type>::create(
+        this->exec, gko::array<index_type>{this->exec, {0, 2, 1}});
+
+    auto combined = perm->combine(perm2);
+
+    EXPECT_EQ(combined->get_const_permutation()[0], 2);
+    EXPECT_EQ(combined->get_const_permutation()[1], 1);
+    EXPECT_EQ(combined->get_const_permutation()[2], 0);
+}
+
+
+TYPED_TEST(Permutation, CombineWithInverse)
+{
+    using index_type = typename TestFixture::index_type;
+    const gko::size_type size = 20;
+    auto perm = gko::matrix::Permutation<index_type>::create(this->exec, size);
+    std::iota(perm->get_permutation(), perm->get_permutation() + size, 0);
+    std::shuffle(perm->get_permutation(), perm->get_permutation() + size,
+                 std::default_random_engine{29584});
+
+    auto combined = perm->combine(perm->invert());
+
+    for (index_type i = 0; i < size; i++) {
+        ASSERT_EQ(combined->get_const_permutation()[i], i);
+    }
+}
+
+
+TYPED_TEST(Permutation, CombineFailsWithMismatchingSize)
+{
+    using index_type = typename TestFixture::index_type;
+    auto perm = gko::matrix::Permutation<index_type>::create(
+        this->exec, gko::array<index_type>{this->exec, {1, 2, 0}});
+    auto perm0 = gko::matrix::Permutation<index_type>::create(this->exec);
+
+    ASSERT_THROW(perm->combine(perm0), gko::DimensionMismatch);
+}
+
+
 TYPED_TEST(Permutation, Write)
 {
     using index_type = typename TestFixture::index_type;
     auto perm = gko::matrix::Permutation<index_type>::create(
-        this->exec, 3, gko::array<index_type>{this->exec, {1, 2, 0}});
+        this->exec, gko::array<index_type>{this->exec, {1, 2, 0}});
 
     GKO_ASSERT_MTX_NEAR(
         perm, l<double>({{0.0, 1.0, 0.0}, {0.0, 0.0, 1.0}, {1.0, 0.0, 0.0}}),
diff --git a/reference/test/matrix/scaled_permutation.cpp b/reference/test/matrix/scaled_permutation.cpp
index d2968692761..9ca0bb26d8c 100644
--- a/reference/test/matrix/scaled_permutation.cpp
+++ b/reference/test/matrix/scaled_permutation.cpp
@@ -90,6 +90,69 @@ TYPED_TEST(ScaledPermutation, Invert)
 }
 
 
+TYPED_TEST(ScaledPermutation, CreateFromPermutation)
+{
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Mtx = typename TestFixture::Mtx;
+    auto non_scaled = gko::matrix::Permutation<index_type>::create(
+        this->exec, gko::array<index_type>{this->exec, {1, 2, 0}});
+
+    auto scaled = Mtx::create(non_scaled);
+
+    EXPECT_EQ(scaled->get_const_permutation()[0], 1);
+    EXPECT_EQ(scaled->get_const_permutation()[1], 2);
+    EXPECT_EQ(scaled->get_const_permutation()[2], 0);
+    EXPECT_EQ(scaled->get_const_scale()[0], gko::one<value_type>());
+    EXPECT_EQ(scaled->get_const_scale()[1], gko::one<value_type>());
+    EXPECT_EQ(scaled->get_const_scale()[2], gko::one<value_type>());
+}
+
+
+TYPED_TEST(ScaledPermutation, Combine)
+{
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Mtx = typename TestFixture::Mtx;
+    auto other_perm = Mtx::create(
+        this->exec, gko::array<value_type>{this->exec, {3.0, 5.0, 7.0}},
+        gko::array<index_type>{this->exec, {1, 0, 2}});
+
+    auto combined = this->perm3->combine(other_perm);
+
+    EXPECT_EQ(combined->get_const_permutation()[0], 0);
+    EXPECT_EQ(combined->get_const_permutation()[1], 2);
+    EXPECT_EQ(combined->get_const_permutation()[2], 1);
+    EXPECT_EQ(combined->get_const_scale()[0], value_type{7});
+    EXPECT_EQ(combined->get_const_scale()[1], value_type{6});
+    EXPECT_EQ(combined->get_const_scale()[2], value_type{20});
+}
+
+
+TYPED_TEST(ScaledPermutation, CombineWithInverse)
+{
+    using T = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    const gko::size_type size = 20;
+    auto rng = std::default_random_engine{3754};
+    auto perm = gko::matrix::Permutation<index_type>::create(this->exec, size);
+    std::iota(perm->get_permutation(), perm->get_permutation() + size, 0);
+    std::shuffle(perm->get_permutation(), perm->get_permutation() + size, rng);
+
+    auto combined = perm->combine(perm->invert());
+
+    for (index_type i = 0; i < size; i++) {
+        ASSERT_EQ(combined->get_const_permutation()[i], i);
+    }
+}
+
+
+TYPED_TEST(ScaledPermutation, CombineFailsWithMismatchingSize)
+{
+    ASSERT_THROW(this->perm3->combine(this->perm2), gko::DimensionMismatch);
+}
+
+
 TYPED_TEST(ScaledPermutation, Write)
 {
     using T = typename TestFixture::value_type;
diff --git a/test/matrix/permutation_kernels.cpp b/test/matrix/permutation_kernels.cpp
index 037040b8fd4..53086478547 100644
--- a/test/matrix/permutation_kernels.cpp
+++ b/test/matrix/permutation_kernels.cpp
@@ -52,14 +52,20 @@ class Permutation : public CommonTestFixture {
     {
         std::vector<int> tmp(1000, 0);
         std::iota(tmp.begin(), tmp.end(), 0);
+        auto tmp2 = tmp;
         std::shuffle(tmp.begin(), tmp.end(), rand_engine);
-        permutation = Perm::create(ref, tmp.size(), gko::array<index_type>(ref, tmp.begin(), tmp.end()));
+        std::shuffle(tmp2.begin(), tmp2.end(), rand_engine);
+        permutation = Perm::create(
+            ref, gko::array<index_type>(ref, tmp.begin(), tmp.end()));
+        permutation2 = Perm::create(
+            ref, gko::array<index_type>(ref, tmp2.begin(), tmp2.end()));
         dpermutation = permutation->clone(exec);
     }
 
     std::default_random_engine rand_engine;
 
     std::unique_ptr<Perm> permutation;
+    std::unique_ptr<Perm> permutation2;
     std::unique_ptr<Perm> dpermutation;
 };
 
@@ -71,3 +77,12 @@ TEST_F(Permutation, InvertIsEquivalentToRef)
 
     GKO_ASSERT_MTX_EQ_SPARSITY(inv, dinv);
 }
+
+
+TEST_F(Permutation, CombineIsEquivalentToRef)
+{
+    auto combined = permutation->combine(permutation2);
+    auto dcombined = dpermutation->combine(permutation2);
+
+    GKO_ASSERT_MTX_EQ_SPARSITY(combined, dcombined);
+}
diff --git a/test/matrix/scaled_permutation_kernels.cpp b/test/matrix/scaled_permutation_kernels.cpp
index d85b9735abc..688788fb64a 100644
--- a/test/matrix/scaled_permutation_kernels.cpp
+++ b/test/matrix/scaled_permutation_kernels.cpp
@@ -52,18 +52,28 @@ class ScaledPermutation : public CommonTestFixture {
     {
         std::vector<int> tmp(1000, 0);
         std::iota(tmp.begin(), tmp.end(), 0);
+        auto tmp2 = tmp;
         std::shuffle(tmp.begin(), tmp.end(), rand_engine);
+        std::shuffle(tmp2.begin(), tmp2.end(), rand_engine);
         std::vector<value_type> scale(tmp.size());
+        std::vector<value_type> scale2(tmp2.size());
         std::uniform_real_distribution<value_type> dist(1, 2);
         auto gen = [&] { return dist(rand_engine); };
         std::generate(scale.begin(), scale.end(), gen);
-        permutation = ScaledPerm::create(ref, gko::array<value_type>(ref, scale.begin(), scale.end()), gko::array<index_type>(ref, tmp.begin(), tmp.end()));
+        std::generate(scale2.begin(), scale2.end(), gen);
+        permutation = ScaledPerm::create(
+            ref, gko::array<value_type>(ref, scale.begin(), scale.end()),
+            gko::array<index_type>(ref, tmp.begin(), tmp.end()));
+        permutation2 = ScaledPerm::create(
+            ref, gko::array<value_type>(ref, scale2.begin(), scale2.end()),
+            gko::array<index_type>(ref, tmp2.begin(), tmp2.end()));
         dpermutation = permutation->clone(exec);
     }
 
     std::default_random_engine rand_engine;
 
     std::unique_ptr<ScaledPerm> permutation;
+    std::unique_ptr<ScaledPerm> permutation2;
     std::unique_ptr<ScaledPerm> dpermutation;
 };
 
@@ -75,3 +85,12 @@ TEST_F(ScaledPermutation, InvertIsEquivalentToRef)
 
     GKO_ASSERT_MTX_NEAR(inv, dinv, r<value_type>::value);
 }
+
+
+TEST_F(ScaledPermutation, CombineIsEquivalentToRef)
+{
+    auto combined = permutation->combine(permutation2);
+    auto dcombined = dpermutation->combine(permutation2);
+
+    GKO_ASSERT_MTX_NEAR(combined, dcombined, r<value_type>::value);
+}

From 430769a51513f574c43ddcce559e1e30f760646c Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Thu, 12 Oct 2023 20:56:31 +0000
Subject: [PATCH 439/583] Format files

Co-authored-by: Tobias Ribizel <upsj@users.noreply.github.com>
---
 core/matrix/dense.cpp                        |  2 +-
 core/matrix/permutation_kernels.hpp          |  4 +---
 core/matrix/scaled_permutation.cpp           | 10 +++++++---
 core/matrix/scaled_permutation_kernels.hpp   |  1 +
 reference/test/matrix/permutation.cpp        |  6 ++++--
 reference/test/matrix/scaled_permutation.cpp |  4 ++--
 test/matrix/csr_kernels2.cpp                 | 15 +++++++--------
 test/matrix/permutation_kernels.cpp          |  6 +++---
 test/matrix/scaled_permutation_kernels.cpp   |  6 +++---
 9 files changed, 29 insertions(+), 25 deletions(-)

diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp
index 05b5672117b..0f5a9397f27 100644
--- a/core/matrix/dense.cpp
+++ b/core/matrix/dense.cpp
@@ -43,6 +43,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/executor.hpp>
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/base/precision_dispatch.hpp>
+#include <ginkgo/core/base/temporary_clone.hpp>
 #include <ginkgo/core/base/utils.hpp>
 #include <ginkgo/core/matrix/coo.hpp>
 #include <ginkgo/core/matrix/csr.hpp>
@@ -60,7 +61,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/components/prefix_sum_kernels.hpp"
 #include "core/matrix/dense_kernels.hpp"
 #include "core/matrix/hybrid_kernels.hpp"
-#include "ginkgo/core/base/temporary_clone.hpp"
 
 
 namespace gko {
diff --git a/core/matrix/permutation_kernels.hpp b/core/matrix/permutation_kernels.hpp
index d4deb4142ad..b5186fdaaf9 100644
--- a/core/matrix/permutation_kernels.hpp
+++ b/core/matrix/permutation_kernels.hpp
@@ -34,13 +34,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define GKO_CORE_MATRIX_PERMUTATION_KERNELS_HPP_
 
 
-#include <ginkgo/core/matrix/csr.hpp>
-
-
 #include <ginkgo/core/base/array.hpp>
 #include <ginkgo/core/base/index_set.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/matrix/coo.hpp>
+#include <ginkgo/core/matrix/csr.hpp>
 #include <ginkgo/core/matrix/dense.hpp>
 #include <ginkgo/core/matrix/diagonal.hpp>
 #include <ginkgo/core/matrix/ell.hpp>
diff --git a/core/matrix/scaled_permutation.cpp b/core/matrix/scaled_permutation.cpp
index f3de34bd9b6..cb3b5f9bf6c 100644
--- a/core/matrix/scaled_permutation.cpp
+++ b/core/matrix/scaled_permutation.cpp
@@ -31,10 +31,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
 #include <ginkgo/core/matrix/scaled_permutation.hpp>
+
+
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/precision_dispatch.hpp>
+
+
 #include "core/matrix/scaled_permutation_kernels.hpp"
-#include "ginkgo/core/base/exception_helpers.hpp"
-#include "ginkgo/core/base/executor.hpp"
-#include "ginkgo/core/base/precision_dispatch.hpp"
 
 
 namespace gko {
diff --git a/core/matrix/scaled_permutation_kernels.hpp b/core/matrix/scaled_permutation_kernels.hpp
index 360e1a947f1..9aa5421fd07 100644
--- a/core/matrix/scaled_permutation_kernels.hpp
+++ b/core/matrix/scaled_permutation_kernels.hpp
@@ -33,6 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #ifndef GKO_CORE_MATRIX_SCALED_PERMUTATION_KERNELS_HPP_
 #define GKO_CORE_MATRIX_SCALED_PERMUTATION_KERNELS_HPP_
 
+
 #include <ginkgo/core/matrix/dense.hpp>
 
 
diff --git a/reference/test/matrix/permutation.cpp b/reference/test/matrix/permutation.cpp
index cd260ef6301..6d19fa7a9e3 100644
--- a/reference/test/matrix/permutation.cpp
+++ b/reference/test/matrix/permutation.cpp
@@ -33,16 +33,18 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/matrix/permutation.hpp>
 
 
+#include <random>
+
+
 #include <gtest/gtest.h>
 
 
+#include <ginkgo/core/base/exception.hpp>
 #include <ginkgo/core/base/executor.hpp>
 #include <ginkgo/core/matrix/dense.hpp>
-#include <random>
 
 
 #include "core/test/utils.hpp"
-#include "ginkgo/core/base/exception.hpp"
 
 
 namespace {
diff --git a/reference/test/matrix/scaled_permutation.cpp b/reference/test/matrix/scaled_permutation.cpp
index 9ca0bb26d8c..8a5fbe9f6c5 100644
--- a/reference/test/matrix/scaled_permutation.cpp
+++ b/reference/test/matrix/scaled_permutation.cpp
@@ -30,7 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include <ginkgo/core/matrix/permutation.hpp>
+#include <ginkgo/core/matrix/scaled_permutation.hpp>
 
 
 #include <gtest/gtest.h>
@@ -38,7 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/executor.hpp>
 #include <ginkgo/core/matrix/dense.hpp>
-#include <ginkgo/core/matrix/scaled_permutation.hpp>
+#include <ginkgo/core/matrix/permutation.hpp>
 
 
 #include "core/test/utils.hpp"
diff --git a/test/matrix/csr_kernels2.cpp b/test/matrix/csr_kernels2.cpp
index 9e8355c284d..0884e203733 100644
--- a/test/matrix/csr_kernels2.cpp
+++ b/test/matrix/csr_kernels2.cpp
@@ -918,14 +918,11 @@ TEST_F(Csr, IsGenericPermutableRectangular)
 
     auto rpermuted = mtx->permute(rpermutation, permute_mode::rows);
     auto drpermuted = dmtx->permute(rpermutation, permute_mode::rows);
-    auto irpermuted =
-        mtx->permute(rpermutation, permute_mode::inverse_rows);
-    auto dirpermuted =
-        dmtx->permute(rpermutation, permute_mode::inverse_rows);
+    auto irpermuted = mtx->permute(rpermutation, permute_mode::inverse_rows);
+    auto dirpermuted = dmtx->permute(rpermutation, permute_mode::inverse_rows);
     auto cpermuted = mtx->permute(cpermutation, permute_mode::columns);
     auto dcpermuted = dmtx->permute(cpermutation, permute_mode::columns);
-    auto icpermuted =
-        mtx->permute(cpermutation, permute_mode::inverse_columns);
+    auto icpermuted = mtx->permute(cpermutation, permute_mode::inverse_columns);
     auto dicpermuted =
         dmtx->permute(cpermutation, permute_mode::inverse_columns);
 
@@ -1021,8 +1018,10 @@ TEST_F(Csr, IsNonsymmScalePermutable)
 
     for (auto invert : {false, true}) {
         SCOPED_TRACE(invert);
-        auto permuted = mtx->scale_permute(srpermutation, scpermutation, invert);
-        auto dpermuted = dmtx->scale_permute(srpermutation, scpermutation, invert);
+        auto permuted =
+            mtx->scale_permute(srpermutation, scpermutation, invert);
+        auto dpermuted =
+            dmtx->scale_permute(srpermutation, scpermutation, invert);
 
         GKO_EXPECT_MTX_NEAR(permuted, dpermuted, r<value_type>::value);
         GKO_EXPECT_MTX_EQ_SPARSITY(permuted, dpermuted);
diff --git a/test/matrix/permutation_kernels.cpp b/test/matrix/permutation_kernels.cpp
index 53086478547..f04d7a9e58b 100644
--- a/test/matrix/permutation_kernels.cpp
+++ b/test/matrix/permutation_kernels.cpp
@@ -30,9 +30,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include <ginkgo/core/matrix/permutation.hpp>
-
-
 #include <algorithm>
 #include <numeric>
 
@@ -40,6 +37,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <gtest/gtest.h>
 
 
+#include <ginkgo/core/matrix/permutation.hpp>
+
+
 #include "core/test/utils.hpp"
 #include "test/utils/executor.hpp"
 
diff --git a/test/matrix/scaled_permutation_kernels.cpp b/test/matrix/scaled_permutation_kernels.cpp
index 688788fb64a..249ffe8867b 100644
--- a/test/matrix/scaled_permutation_kernels.cpp
+++ b/test/matrix/scaled_permutation_kernels.cpp
@@ -30,9 +30,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include <ginkgo/core/matrix/scaled_permutation.hpp>
-
-
 #include <algorithm>
 #include <numeric>
 
@@ -40,6 +37,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <gtest/gtest.h>
 
 
+#include <ginkgo/core/matrix/scaled_permutation.hpp>
+
+
 #include "core/test/utils.hpp"
 #include "test/utils/executor.hpp"
 

From bb856dfeee9c8fcecabdac5ab49d1f645a4c5e0b Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Wed, 18 Oct 2023 13:11:45 +0200
Subject: [PATCH 440/583] fix warning

---
 core/factorization/factorization.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/core/factorization/factorization.cpp b/core/factorization/factorization.cpp
index 436359a417a..5877124bf77 100644
--- a/core/factorization/factorization.cpp
+++ b/core/factorization/factorization.cpp
@@ -112,6 +112,7 @@ Factorization<ValueType, IndexType>::unpack() const
     }
     case storage_type::combined_ldu:
     case storage_type::symm_combined_ldl:
+    default:
         GKO_NOT_IMPLEMENTED;
     }
 }

From a7d4dff91fbdf1d31c834dea3a5205b82ae8ca35 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Tue, 24 Oct 2023 12:48:00 -0400
Subject: [PATCH 441/583] review updates

- fix and test csr column permutation for hypersparse matrices
- fix inverse nonsymm scaled permutation for dense
- extract permutation dimension validation
- simplify permutation dispatch
- re-add templated constructors to preserve interface
- move more code to source
- add documentation for permutation to permute_mode
instead of matrix classes
- improve documentation
- make scaled_permutation final
- remove unnecessary InvalidValueError
- improve error message on dispatch_helper run(...)
- simplify rectangular permutation tests
- fix dense tests not calling scaled permutation
- test apply in (scaled) permutation on the device
- throw DimensionMismatch instead of ValueMismatch
- add more round-trip and inverted permutation tests
- add dpcpp subgroup size

Co-authored-by: Pratik Nayak <pratik.nayak@kit.edu>
Co-authored-by: Yuhsiang M. Tsai <yhmtsai@gmail.com>
Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 common/unified/matrix/csr_kernels.cpp         |   4 +-
 .../unified/matrix/dense_kernels.template.cpp |   2 +-
 core/base/dispatch_helper.hpp                 |   4 +-
 core/matrix/csr.cpp                           |  28 ++--
 core/matrix/dense.cpp                         |  33 ++--
 core/matrix/permutation.cpp                   | 106 +++++++++----
 core/matrix/permutation.hpp                   |  59 +++++++
 dpcpp/matrix/csr_kernels.dp.cpp               |  56 ++++---
 include/ginkgo/core/base/exception.hpp        |  19 ---
 include/ginkgo/core/matrix/csr.hpp            |  12 +-
 include/ginkgo/core/matrix/dense.hpp          |  12 +-
 include/ginkgo/core/matrix/permutation.hpp    | 122 ++++++++-------
 .../ginkgo/core/matrix/scaled_permutation.hpp |  14 +-
 omp/matrix/csr_kernels.cpp                    |   6 +-
 reference/matrix/csr_kernels.cpp              |   6 +-
 reference/matrix/dense_kernels.cpp            |   1 -
 reference/test/matrix/csr_kernels.cpp         | 145 ++++++++++--------
 reference/test/matrix/dense_kernels.cpp       |  28 ++--
 test/matrix/csr_kernels2.cpp                  | 123 +++++++++------
 test/matrix/dense_kernels.cpp                 |  70 ++++-----
 test/matrix/permutation_kernels.cpp           |  46 +++++-
 test/matrix/scaled_permutation_kernels.cpp    |  45 +++++-
 22 files changed, 557 insertions(+), 384 deletions(-)
 create mode 100644 core/matrix/permutation.hpp

diff --git a/common/unified/matrix/csr_kernels.cpp b/common/unified/matrix/csr_kernels.cpp
index 10c8d8cd08e..d1abb043c44 100644
--- a/common/unified/matrix/csr_kernels.cpp
+++ b/common/unified/matrix/csr_kernels.cpp
@@ -62,7 +62,7 @@ void inv_col_permute(std::shared_ptr<const DefaultExecutor> exec,
 {
     auto num_rows = orig->get_size()[0];
     auto nnz = orig->get_num_stored_elements();
-    auto size = std::max(num_rows, nnz);
+    auto size = std::max(num_rows + 1, nnz);
     run_kernel(
         exec,
         [] GKO_KERNEL(auto tid, auto num_rows, auto num_nonzeros,
@@ -95,7 +95,7 @@ void inv_col_scale_permute(std::shared_ptr<const DefaultExecutor> exec,
 {
     auto num_rows = orig->get_size()[0];
     auto nnz = orig->get_num_stored_elements();
-    auto size = std::max(num_rows, nnz);
+    auto size = std::max(num_rows + 1, nnz);
     run_kernel(
         exec,
         [] GKO_KERNEL(auto tid, auto num_rows, auto num_nonzeros, auto scale,
diff --git a/common/unified/matrix/dense_kernels.template.cpp b/common/unified/matrix/dense_kernels.template.cpp
index c04f9c14d4c..ed508066ba8 100644
--- a/common/unified/matrix/dense_kernels.template.cpp
+++ b/common/unified/matrix/dense_kernels.template.cpp
@@ -602,7 +602,7 @@ void inv_nonsymm_scale_permute(std::shared_ptr<const DefaultExecutor> exec,
                       auto col_scale, auto col_perm, auto orig, auto permuted) {
             const auto row = row_perm[i];
             const auto col = col_perm[j];
-            permuted(row, col) = orig(i, j) / (row_scale[i] * col_scale[j]);
+            permuted(row, col) = orig(i, j) / (row_scale[row] * col_scale[col]);
         },
         orig->get_size(), row_scale, row_perm, col_scale, col_perm, orig,
         permuted);
diff --git a/core/base/dispatch_helper.hpp b/core/base/dispatch_helper.hpp
index 2226ffc6b6d..9653c7f8622 100644
--- a/core/base/dispatch_helper.hpp
+++ b/core/base/dispatch_helper.hpp
@@ -97,9 +97,9 @@ void run(T obj, Func f, Args... args)
  */
 template <template <typename> class Base, typename T, typename Func,
           typename... Args>
-void run(T, Func, Args...)
+void run(T obj, Func, Args...)
 {
-    GKO_NOT_IMPLEMENTED;
+    GKO_NOT_SUPPORTED(obj);
 }
 
 /**
diff --git a/core/matrix/csr.cpp b/core/matrix/csr.cpp
index b99becadccc..69261dd17e0 100644
--- a/core/matrix/csr.cpp
+++ b/core/matrix/csr.cpp
@@ -45,6 +45,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/matrix/ell.hpp>
 #include <ginkgo/core/matrix/fbcsr.hpp>
 #include <ginkgo/core/matrix/identity.hpp>
+#include <ginkgo/core/matrix/permutation.hpp>
 #include <ginkgo/core/matrix/scaled_permutation.hpp>
 #include <ginkgo/core/matrix/sellp.hpp>
 #include <ginkgo/core/matrix/sparsity_csr.hpp>
@@ -58,6 +59,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/matrix/csr_kernels.hpp"
 #include "core/matrix/ell_kernels.hpp"
 #include "core/matrix/hybrid_kernels.hpp"
+#include "core/matrix/permutation.hpp"
 #include "core/matrix/sellp_kernels.hpp"
 
 
@@ -535,6 +537,7 @@ std::unique_ptr<Csr<ValueType, IndexType>> Csr<ValueType, IndexType>::permute(
     const auto exec = this->get_executor();
     const auto size = this->get_size();
     const auto nnz = this->get_num_stored_elements();
+    validate_permute_dimensions(size, permutation->get_size(), mode);
     if ((mode & permute_mode::symmetric) == permute_mode::none) {
         return this->clone();
     }
@@ -552,7 +555,8 @@ std::unique_ptr<Csr<ValueType, IndexType>> Csr<ValueType, IndexType>::permute(
     std::unique_ptr<const Permutation<IndexType>> inv_permutation;
     const auto perm_idxs = local_permutation->get_const_permutation();
     const IndexType* inv_perm_idxs{};
-    // to permute columns, we need to know the inverse permutation
+    // Due to the sparse storage, we can only inverse-permute columns, so we
+    // need to compute the inverse for forward-permutations.
     bool needs_inverse =
         (mode & permute_mode::inverse_columns) == permute_mode::columns;
     if (needs_inverse) {
@@ -580,7 +584,7 @@ std::unique_ptr<Csr<ValueType, IndexType>> Csr<ValueType, IndexType>::permute(
         exec->run(csr::make_inv_symm_permute(perm_idxs, this, result.get()));
         break;
     default:
-        GKO_ASSERT(false);
+        GKO_INVALID_STATE("Invalid permute mode");
     }
     result->make_srow();
     if ((mode & permute_mode::columns) == permute_mode::columns) {
@@ -598,8 +602,8 @@ std::unique_ptr<Csr<ValueType, IndexType>> Csr<ValueType, IndexType>::permute(
     const auto exec = this->get_executor();
     const auto size = this->get_size();
     const auto nnz = this->get_num_stored_elements();
-    GKO_ASSERT_EQ(size[0], row_permutation->get_size()[0]);
-    GKO_ASSERT_EQ(size[1], col_permutation->get_size()[0]);
+    GKO_ASSERT_EQUAL_ROWS(this, row_permutation);
+    GKO_ASSERT_EQUAL_COLS(this, col_permutation);
     auto result = Csr::create(exec, size, nnz, this->get_strategy()->copy());
     auto local_row_permutation = make_temporary_clone(exec, row_permutation);
     auto local_col_permutation = make_temporary_clone(exec, col_permutation);
@@ -630,18 +634,10 @@ Csr<ValueType, IndexType>::scale_permute(
     const auto exec = this->get_executor();
     const auto size = this->get_size();
     const auto nnz = this->get_num_stored_elements();
+    validate_permute_dimensions(size, permutation->get_size(), mode);
     if ((mode & permute_mode::symmetric) == permute_mode::none) {
         return this->clone();
     }
-    if ((mode & permute_mode::symmetric) == permute_mode::symmetric) {
-        GKO_ASSERT_IS_SQUARE_MATRIX(this);
-    }
-    if ((mode & permute_mode::rows) == permute_mode::rows) {
-        GKO_ASSERT_EQ(size[0], permutation->get_size()[0]);
-    }
-    if ((mode & permute_mode::columns) == permute_mode::columns) {
-        GKO_ASSERT_EQ(size[1], permutation->get_size()[0]);
-    }
     auto result = Csr::create(exec, size, nnz, this->get_strategy()->copy());
     auto local_permutation = make_temporary_clone(exec, permutation);
     std::unique_ptr<const ScaledPermutation<ValueType, IndexType>>
@@ -684,7 +680,7 @@ Csr<ValueType, IndexType>::scale_permute(
                                                    this, result.get()));
         break;
     default:
-        GKO_ASSERT(false);
+        GKO_INVALID_STATE("Invalid permute mode");
     }
     result->make_srow();
     if ((mode & permute_mode::columns) == permute_mode::columns) {
@@ -704,8 +700,8 @@ Csr<ValueType, IndexType>::scale_permute(
     const auto exec = this->get_executor();
     const auto size = this->get_size();
     const auto nnz = this->get_num_stored_elements();
-    GKO_ASSERT_EQ(size[0], row_permutation->get_size()[0]);
-    GKO_ASSERT_EQ(size[1], col_permutation->get_size()[0]);
+    GKO_ASSERT_EQUAL_ROWS(this, row_permutation);
+    GKO_ASSERT_EQUAL_COLS(this, col_permutation);
     auto result = Csr::create(exec, size, nnz, this->get_strategy()->copy());
     auto local_row_permutation = make_temporary_clone(exec, row_permutation);
     auto local_col_permutation = make_temporary_clone(exec, col_permutation);
diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp
index 0f5a9397f27..5f0e9cbf177 100644
--- a/core/matrix/dense.cpp
+++ b/core/matrix/dense.cpp
@@ -61,6 +61,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/components/prefix_sum_kernels.hpp"
 #include "core/matrix/dense_kernels.hpp"
 #include "core/matrix/hybrid_kernels.hpp"
+#include "core/matrix/permutation.hpp"
 
 
 namespace gko {
@@ -1133,19 +1134,11 @@ void Dense<ValueType>::permute_impl(const Permutation<IndexType>* permutation,
     const auto exec = this->get_executor();
     const auto size = this->get_size();
     GKO_ASSERT_EQUAL_DIMENSIONS(this, output);
+    validate_permute_dimensions(size, permutation->get_size(), mode);
     if ((mode & permute_mode::symmetric) == permute_mode::none) {
         output->copy_from(this);
         return;
     }
-    if ((mode & permute_mode::symmetric) == permute_mode::symmetric) {
-        GKO_ASSERT_IS_SQUARE_MATRIX(this);
-    }
-    if ((mode & permute_mode::rows) == permute_mode::rows) {
-        GKO_ASSERT_EQ(size[0], permutation->get_size()[0]);
-    }
-    if ((mode & permute_mode::columns) == permute_mode::columns) {
-        GKO_ASSERT_EQ(size[1], permutation->get_size()[0]);
-    }
     auto local_output = make_temporary_output_clone(exec, output);
     auto local_perm = make_temporary_clone(exec, permutation);
     switch (mode) {
@@ -1174,7 +1167,7 @@ void Dense<ValueType>::permute_impl(const Permutation<IndexType>* permutation,
             local_perm->get_const_permutation(), this, local_output.get()));
         break;
     default:
-        GKO_ASSERT(false);  // cannot happen
+        GKO_INVALID_STATE("Invalid permute mode");
     }
 }
 
@@ -1189,8 +1182,8 @@ void Dense<ValueType>::permute_impl(
     auto exec = this->get_executor();
     auto size = this->get_size();
     GKO_ASSERT_EQUAL_DIMENSIONS(this, output);
-    GKO_ASSERT_EQ(size[0], row_permutation->get_size()[0]);
-    GKO_ASSERT_EQ(size[1], col_permutation->get_size()[0]);
+    GKO_ASSERT_EQUAL_ROWS(this, row_permutation);
+    GKO_ASSERT_EQUAL_COLS(this, col_permutation);
     auto local_output = make_temporary_output_clone(exec, output);
     auto local_row_perm = make_temporary_clone(exec, row_permutation);
     auto local_col_perm = make_temporary_clone(exec, col_permutation);
@@ -1215,19 +1208,11 @@ void Dense<ValueType>::scale_permute_impl(
     const auto exec = this->get_executor();
     const auto size = this->get_size();
     GKO_ASSERT_EQUAL_DIMENSIONS(this, output);
+    validate_permute_dimensions(size, permutation->get_size(), mode);
     if ((mode & permute_mode::symmetric) == permute_mode::none) {
         output->copy_from(this);
         return;
     }
-    if ((mode & permute_mode::symmetric) == permute_mode::symmetric) {
-        GKO_ASSERT_IS_SQUARE_MATRIX(this);
-    }
-    if ((mode & permute_mode::rows) == permute_mode::rows) {
-        GKO_ASSERT_EQ(size[0], permutation->get_size()[0]);
-    }
-    if ((mode & permute_mode::columns) == permute_mode::columns) {
-        GKO_ASSERT_EQ(size[1], permutation->get_size()[0]);
-    }
     auto local_output = make_temporary_output_clone(exec, output);
     auto local_perm = make_temporary_clone(exec, permutation);
     switch (mode) {
@@ -1262,7 +1247,7 @@ void Dense<ValueType>::scale_permute_impl(
             this, local_output.get()));
         break;
     default:
-        GKO_ASSERT(false);  // cannot happen
+        GKO_INVALID_STATE("Invalid permute mode");
     }
 }
 
@@ -1277,8 +1262,8 @@ void Dense<ValueType>::scale_permute_impl(
     auto exec = this->get_executor();
     auto size = this->get_size();
     GKO_ASSERT_EQUAL_DIMENSIONS(this, output);
-    GKO_ASSERT_EQ(size[0], row_permutation->get_size()[0]);
-    GKO_ASSERT_EQ(size[1], col_permutation->get_size()[0]);
+    GKO_ASSERT_EQUAL_ROWS(this, row_permutation);
+    GKO_ASSERT_EQUAL_COLS(this, col_permutation);
     auto local_output = make_temporary_output_clone(exec, output);
     auto local_row_perm = make_temporary_clone(exec, row_permutation);
     auto local_col_perm = make_temporary_clone(exec, col_permutation);
diff --git a/core/matrix/permutation.cpp b/core/matrix/permutation.cpp
index 00115d0db68..6d8d8e037a6 100644
--- a/core/matrix/permutation.cpp
+++ b/core/matrix/permutation.cpp
@@ -56,6 +56,76 @@ GKO_REGISTER_OPERATION(combine, permutation::combine);
 }  // namespace permutation
 
 
+void validate_permute_dimensions(dim<2> size, dim<2> permutation_size,
+                                 permute_mode mode)
+{
+    if ((mode & permute_mode::symmetric) == permute_mode::symmetric) {
+        GKO_ASSERT_IS_SQUARE_MATRIX(size);
+    }
+    if ((mode & permute_mode::rows) == permute_mode::rows) {
+        if (size[0] != permutation_size[0]) {
+            throw DimensionMismatch(
+                __FILE__, __LINE__, __func__, "matrix", size[0], size[1],
+                "permutation", permutation_size[0], permutation_size[0],
+                "expected the permutation size to match the number of rows");
+        };
+    }
+    if ((mode & permute_mode::columns) == permute_mode::columns) {
+        if (size[1] != permutation_size[0]) {
+            throw DimensionMismatch(
+                __FILE__, __LINE__, __func__, "matrix", size[0], size[1],
+                "permutation", permutation_size[0], permutation_size[0],
+                "expected the permutation size to match the number of columns");
+        };
+    }
+}
+
+
+permute_mode operator|(permute_mode a, permute_mode b)
+{
+    return static_cast<permute_mode>(static_cast<unsigned>(a) |
+                                     static_cast<unsigned>(b));
+}
+
+
+permute_mode operator&(permute_mode a, permute_mode b)
+{
+    return static_cast<permute_mode>(static_cast<unsigned>(a) &
+                                     static_cast<unsigned>(b));
+}
+
+
+permute_mode operator^(permute_mode a, permute_mode b)
+{
+    return static_cast<permute_mode>(static_cast<unsigned>(a) ^
+                                     static_cast<unsigned>(b));
+}
+
+
+std::ostream& operator<<(std::ostream& stream, permute_mode mode)
+{
+    switch (mode) {
+    case permute_mode::none:
+        return stream << "none";
+    case permute_mode::rows:
+        return stream << "rows";
+    case permute_mode::columns:
+        return stream << "columns";
+    case permute_mode::symmetric:
+        return stream << "symmetric";
+    case permute_mode::inverse:
+        return stream << "inverse";
+    case permute_mode::inverse_rows:
+        return stream << "inverse_rows";
+    case permute_mode::inverse_columns:
+        return stream << "inverse_columns";
+    case permute_mode::inverse_symmetric:
+        return stream << "inverse_symmetric";
+    }
+    return stream;
+}
+
+
 template <typename IndexType>
 std::unique_ptr<const Permutation<IndexType>>
 Permutation<IndexType>::create_const(
@@ -118,34 +188,10 @@ Permutation<IndexType>::Permutation(std::shared_ptr<const Executor> exec,
 }
 
 
-template <typename IndexType>
-Permutation<IndexType>::Permutation(std::shared_ptr<const Executor> exec,
-                                    const dim<2>& size,
-                                    array<index_type> permutation_indices)
-    : Permutation{std::move(exec), std::move(permutation_indices)}
-{
-    GKO_ASSERT_EQ(size[0], permutation_.get_num_elems());
-    GKO_ASSERT_IS_SQUARE_MATRIX(size);
-}
-
-
-template <typename IndexType>
-Permutation<IndexType>::Permutation(std::shared_ptr<const Executor> exec,
-                                    const dim<2>& size,
-                                    array<index_type> permutation_indices,
-                                    const mask_type& enabled_permute)
-    : Permutation{std::move(exec), std::move(permutation_indices)}
-{
-    GKO_ASSERT_EQ(enabled_permute, row_permute);
-    GKO_ASSERT_EQ(size[0], permutation_.get_num_elems());
-    GKO_ASSERT_IS_SQUARE_MATRIX(size);
-}
-
-
 template <typename IndexType>
 size_type Permutation<IndexType>::get_permutation_size() const noexcept
 {
-    return permutation_.get_num_elems();
+    return this->get_size()[0];
 }
 
 
@@ -212,14 +258,8 @@ void dispatch_dense(const LinOp* op, Functor fn)
 {
     using matrix::Dense;
     using std::complex;
-    if (dynamic_cast<const ConvertibleTo<Dense<double>>*>(op)) {
-        run<const Dense<double>*, const Dense<float>*>(op, fn);
-    } else if (dynamic_cast<const ConvertibleTo<Dense<complex<double>>>*>(op)) {
-        run<const Dense<complex<double>>*, const Dense<complex<float>>*>(op,
-                                                                         fn);
-    } else {
-        GKO_NOT_SUPPORTED(*op);
-    }
+    run<const Dense<double>*, const Dense<float>*,
+        const Dense<complex<double>>*, const Dense<complex<float>>*>(op, fn);
 }
 
 
diff --git a/core/matrix/permutation.hpp b/core/matrix/permutation.hpp
new file mode 100644
index 00000000000..a424c695842
--- /dev/null
+++ b/core/matrix/permutation.hpp
@@ -0,0 +1,59 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CORE_MATRIX_PERMUTATION_HPP_
+#define GKO_CORE_MATRIX_PERMUTATION_HPP_
+
+
+#include <ginkgo/core/matrix/permutation.hpp>
+
+
+#include <ginkgo/core/base/exception_helpers.hpp>
+
+
+namespace gko {
+namespace matrix {
+
+
+/**
+ * Checks that the given input and permutation size are consistent with
+ * the given mode.
+ */
+void validate_permute_dimensions(dim<2> size, dim<2> permutation_size,
+                                 permute_mode mode);
+
+
+}  // namespace matrix
+}  // namespace gko
+
+
+#endif  // GKO_CORE_MATRIX_PERMUTATION_HPP_
\ No newline at end of file
diff --git a/dpcpp/matrix/csr_kernels.dp.cpp b/dpcpp/matrix/csr_kernels.dp.cpp
index ab57b3c072e..ead667d96a7 100644
--- a/dpcpp/matrix/csr_kernels.dp.cpp
+++ b/dpcpp/matrix/csr_kernels.dp.cpp
@@ -1018,12 +1018,14 @@ void row_permute_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory,
                         IndexType* out_cols, ValueType* out_vals)
 {
     queue->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
-                row_permute_kernel<subgroup_size>(
-                    num_rows, permutation, in_row_ptrs, in_cols, in_vals,
-                    out_row_ptrs, out_cols, out_vals, item_ct1);
-            });
+        cgh.parallel_for(sycl_nd_range(grid, block),
+                         [=](sycl::nd_item<3> item_ct1)
+                             [[sycl::reqd_sub_group_size(subgroup_size)]] {
+                                 row_permute_kernel<subgroup_size>(
+                                     num_rows, permutation, in_row_ptrs,
+                                     in_cols, in_vals, out_row_ptrs, out_cols,
+                                     out_vals, item_ct1);
+                             });
     });
 }
 
@@ -1065,12 +1067,14 @@ void inv_row_permute_kernel(dim3 grid, dim3 block,
                             ValueType* out_vals)
 {
     queue->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
-                inv_row_permute_kernel<subgroup_size>(
-                    num_rows, permutation, in_row_ptrs, in_cols, in_vals,
-                    out_row_ptrs, out_cols, out_vals, item_ct1);
-            });
+        cgh.parallel_for(sycl_nd_range(grid, block),
+                         [=](sycl::nd_item<3> item_ct1)
+                             [[sycl::reqd_sub_group_size(subgroup_size)]] {
+                                 inv_row_permute_kernel<subgroup_size>(
+                                     num_rows, permutation, in_row_ptrs,
+                                     in_cols, in_vals, out_row_ptrs, out_cols,
+                                     out_vals, item_ct1);
+                             });
     });
 }
 
@@ -1142,12 +1146,14 @@ void inv_symm_permute_kernel(dim3 grid, dim3 block,
                              ValueType* out_vals)
 {
     queue->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
-                inv_symm_permute_kernel<subgroup_size>(
-                    num_rows, permutation, in_row_ptrs, in_cols, in_vals,
-                    out_row_ptrs, out_cols, out_vals, item_ct1);
-            });
+        cgh.parallel_for(sycl_nd_range(grid, block),
+                         [=](sycl::nd_item<3> item_ct1)
+                             [[sycl::reqd_sub_group_size(subgroup_size)]] {
+                                 inv_symm_permute_kernel<subgroup_size>(
+                                     num_rows, permutation, in_row_ptrs,
+                                     in_cols, in_vals, out_row_ptrs, out_cols,
+                                     out_vals, item_ct1);
+                             });
     });
 }
 
@@ -1161,12 +1167,14 @@ void inv_nonsymm_permute_kernel(
 {
     queue->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(sycl_nd_range(grid, block),
-                         [=](sycl::nd_item<3> item_ct1) {
-                             inv_nonsymm_permute_kernel<subgroup_size>(
-                                 num_rows, row_permutation, col_permutation,
-                                 in_row_ptrs, in_cols, in_vals, out_row_ptrs,
-                                 out_cols, out_vals, item_ct1);
-                         });
+                         [=](sycl::nd_item<3> item_ct1)
+                             [[sycl::reqd_sub_group_size(subgroup_size)]] {
+                                 inv_nonsymm_permute_kernel<subgroup_size>(
+                                     num_rows, row_permutation, col_permutation,
+                                     in_row_ptrs, in_cols, in_vals,
+                                     out_row_ptrs, out_cols, out_vals,
+                                     item_ct1);
+                             });
     });
 }
 
diff --git a/include/ginkgo/core/base/exception.hpp b/include/ginkgo/core/base/exception.hpp
index 1a52b93c0bd..97b60f07d05 100644
--- a/include/ginkgo/core/base/exception.hpp
+++ b/include/ginkgo/core/base/exception.hpp
@@ -702,25 +702,6 @@ class InvalidStateError : public Error {
 };
 
 
-/** Exception thrown if an invalid valid was passed to a function. */
-class InvalidValueError : public Error {
-public:
-    /**
-     * Initializes an invalid value error.
-     *
-     * @param file  The name of the offending source file
-     * @param line  The source code line number where the error occurred
-     * @param func  The function name where the error occurred
-     * @param clarification  A message describing the invalid value
-     */
-    InvalidValueError(const std::string& file, int line,
-                      const std::string& func, const std::string& clarification)
-        : Error(file, line,
-                func + ": Invalid value encountered : " + clarification)
-    {}
-};
-
-
 }  // namespace gko
 
 
diff --git a/include/ginkgo/core/matrix/csr.hpp b/include/ginkgo/core/matrix/csr.hpp
index b73459c1175..366e2994f0a 100644
--- a/include/ginkgo/core/matrix/csr.hpp
+++ b/include/ginkgo/core/matrix/csr.hpp
@@ -771,17 +771,7 @@ class Csr : public EnableLinOp<Csr<ValueType, IndexType>>,
      * Creates a permuted copy $A'$ of this matrix $A$ with the given
      * permutation $P$. By default, this computes a symmetric permutation
      * (permute_mode::symmetric). For the effect of the different permutation
-     * modes, see the following table.
-     *
-     * mode              | entry mapping              | matrix representation
-     * ------------------|----------------------------|----------------------
-     * none              | $A'(i, j) = A(i, j)$       | $A' = A$
-     * rows              | $A'(i, j) = A(p[i], j)$    | $A' = P A$
-     * columns           | $A'(i, j) = A(i, p[j])$    | $A' = A P^T$
-     * inverse_rows      | $A'(p[i], j) = A(i, j)$    | $A' = P^{-1} A$
-     * inverse_columns   | $A'(p[i], j) = A(i, j)$    | $A' = A P^{-T}$
-     * symmetric         | $A'(i, j) = A(p[i], p[j])$ | $A' = P A P^T$
-     * inverse_symmetric | $A'(p[i], p[j]) = A(i, j)$ | $A' = P^{-1} A P^{-T}$
+     * modes, see @ref permute_mode
      *
      * @param permutation  The input permutation.
      * @param mode  The permutation mode. If permute_mode::inverse is set, we
diff --git a/include/ginkgo/core/matrix/dense.hpp b/include/ginkgo/core/matrix/dense.hpp
index 539480934d1..a897430fce7 100644
--- a/include/ginkgo/core/matrix/dense.hpp
+++ b/include/ginkgo/core/matrix/dense.hpp
@@ -409,17 +409,7 @@ class Dense
      * Creates a permuted copy $A'$ of this matrix $A$ with the given
      * permutation $P$. By default, this computes a symmetric permutation
      * (permute_mode::symmetric). For the effect of the different permutation
-     * modes, see the following table.
-     *
-     * mode              | entry mapping              | matrix representation
-     * ------------------|----------------------------|----------------------
-     * none              | $A'(i, j) = A(i, j)$       | $A' = A$
-     * rows              | $A'(i, j) = A(p[i], j)$    | $A' = P A$
-     * columns           | $A'(i, j) = A(i, p[j])$    | $A' = A P^T$
-     * inverse_rows      | $A'(p[i], j) = A(i, j)$    | $A' = P^{-1} A$
-     * inverse_columns   | $A'(p[i], j) = A(i, j)$    | $A' = A P^{-T}$
-     * symmetric         | $A'(i, j) = A(p[i], p[j])$ | $A' = P A P^T$
-     * inverse_symmetric | $A'(p[i], p[j]) = A(i, j)$ | $A' = P^{-1} A P^{-T}$
+     * modes, see @ref permute_mode.
      *
      * @param permutation  The input permutation.
      * @param mode  The permutation mode. If permute_mode::inverse is set, we
diff --git a/include/ginkgo/core/matrix/permutation.hpp b/include/ginkgo/core/matrix/permutation.hpp
index 02401cf698e..7fbe589733a 100644
--- a/include/ginkgo/core/matrix/permutation.hpp
+++ b/include/ginkgo/core/matrix/permutation.hpp
@@ -53,96 +53,85 @@ namespace gko {
 namespace matrix {
 
 
-/** Specifies how a permutation will be applied to a matrix. */
-enum class permute_mode {
+/**
+ * Specifies how a permutation will be applied to a matrix.
+ * For the effect of the different permutation
+ * modes, see the following table.
+ *
+ * mode              | entry mapping              | matrix representation
+ * ------------------|----------------------------|----------------------
+ * none              | $A'(i, j) = A(i, j)$       | $A' = A$
+ * rows              | $A'(i, j) = A(p[i], j)$    | $A' = P A$
+ * columns           | $A'(i, j) = A(i, p[j])$    | $A' = A P^T$
+ * inverse_rows      | $A'(p[i], j) = A(i, j)$    | $A' = P^{-1} A$
+ * inverse_columns   | $A'(i, p[j]) = A(i, j)$    | $A' = A P^{-T}$
+ * symmetric         | $A'(i, j) = A(p[i], p[j])$ | $A' = P A P^T$
+ * inverse_symmetric | $A'(p[i], p[j]) = A(i, j)$ | $A' = P^{-1} A P^{-T}$
+ */
+enum class permute_mode : unsigned {
     /** Neither rows nor columns will be permuted. */
-    none = 0b0,
+    none = 0b000u,
     /** The rows will be permuted. */
-    rows = 0b1,
+    rows = 0b001u,
     /** The columns will be permuted. */
-    columns = 0b10,
+    columns = 0b010u,
     /**
      * The rows and columns will be permuted. This is equivalent to
      * `permute_mode::rows | permute_mode::columns`.
      */
-    symmetric = 0b11,
+    symmetric = 0b011u,
     /** The permutation will be inverted before being applied. */
-    inverse = 0b100,
+    inverse = 0b100u,
     /**
      * The rows will be permuted using the inverse permutation. This is
      * equivalent to `permute_mode::rows | permute_mode::inverse`.
      */
-    inverse_rows = 0b101,
+    inverse_rows = 0b101u,
     /**
      * The columns will be permuted using the inverse permutation. This is
      * equivalent to `permute_mode::columns | permute_mode::inverse`.
      */
-    inverse_columns = 0b110,
+    inverse_columns = 0b110u,
     /**
      * The rows and columns will be permuted using the inverse permutation. This
      * is equivalent to `permute_mode::symmetric | permute_mode::inverse`.
      */
-    inverse_symmetric = 0b111
+    inverse_symmetric = 0b111u
 };
 
 
 /** Combines two permutation modes. */
-inline permute_mode operator|(permute_mode a, permute_mode b)
-{
-    return static_cast<permute_mode>(static_cast<int>(a) | static_cast<int>(b));
-}
+permute_mode operator|(permute_mode a, permute_mode b);
 
 
 /** Computes the intersection of two permutation modes. */
-inline permute_mode operator&(permute_mode a, permute_mode b)
-{
-    return static_cast<permute_mode>(static_cast<int>(a) & static_cast<int>(b));
-}
-
-
-inline std::ostream& operator<<(std::ostream& stream, permute_mode mode)
-{
-    switch (mode) {
-    case permute_mode::none:
-        return stream << "none";
-    case permute_mode::rows:
-        return stream << "rows";
-    case permute_mode::columns:
-        return stream << "columns";
-    case permute_mode::symmetric:
-        return stream << "symmetric";
-    case permute_mode::inverse:
-        return stream << "inverse";
-    case permute_mode::inverse_rows:
-        return stream << "inverse_rows";
-    case permute_mode::inverse_columns:
-        return stream << "inverse_columns";
-    case permute_mode::inverse_symmetric:
-        return stream << "inverse_symmetric";
-    }
-    return stream;
-}
+permute_mode operator&(permute_mode a, permute_mode b);
+
+
+/** Computes the symmetric difference of two permutation modes. */
+permute_mode operator^(permute_mode a, permute_mode b);
+
+
+/** Prints a permutation mode. */
+std::ostream& operator<<(std::ostream& stream, permute_mode mode);
 
 
-/** @internal std::bitset allows to store any number of bits */
 using mask_type = gko::uint64;
 
 static constexpr mask_type row_permute = mask_type{1};
-static constexpr mask_type column_permute = mask_type{1 << 2};
-static constexpr mask_type inverse_permute = mask_type{1 << 3};
+[[deprecated("permute mask is no longer supported")]] static constexpr mask_type
+    column_permute = mask_type{1 << 2};
+[[deprecated("permute mask is no longer supported")]] static constexpr mask_type
+    inverse_permute = mask_type{1 << 3};
 
 /**
- * Permutation is a matrix "format" which stores the row and column permutation
- * arrays which can be used for re-ordering the rows and columns a matrix.
+ * Permutation is a matrix format that represents a permutation matrix,
+ * i.e. a matrix where each row and column has exactly one entry.
+ * The matrix can only be applied to Dense inputs, where it represents
+ * a row permutation: $A' = PA$ means $A'(i, j) = A(p[i], j)$.
  *
  * @tparam IndexType  precision of permutation array indices.
  *
- * @note This format is used mainly to allow for an abstraction of the
- * permutation/re-ordering and provides the user with an apply method which
- * calls the respective LinOp's permute operation if the respective LinOp
- * implements the Permutable interface. As such it only stores an array of the
- * permutation indices.
- *
  * @ingroup permutation
  * @ingroup mat_formats
  * @ingroup LinOp
@@ -206,7 +195,9 @@ class Permutation : public EnableLinOp<Permutation<IndexType>>,
      * Combines this permutation with another permutation via composition.
      * The resulting permutation fulfills `result[i] = other[this[i]]`
      * or `result = other * this` from the matrix perspective, which is
-     * equivalent to first permuting by `this` and then by `other`.
+     * equivalent to first permuting by `this` and then by `other`:
+     * Combining permutations $P_1$ and $P_2$ with `P = P_1.combine(P_2)`
+     * performs the operation permute(A, P) = permute(permute(A, P_1), P_2).
      *
      * @param other  the other permutation
      * @return the combined permutation
@@ -270,7 +261,7 @@ class Permutation : public EnableLinOp<Permutation<IndexType>>,
      * and the original array data will not be used in the matrix.
      */
     Permutation(std::shared_ptr<const Executor> exec,
-                array<index_type> permutation_indices);
+                array<IndexType> permutation_indices);
 
     [[deprecated(
         "dim<2> is no longer supported as a dimension parameter, use size_type "
@@ -281,14 +272,29 @@ class Permutation : public EnableLinOp<Permutation<IndexType>>,
         std::shared_ptr<const Executor> exec, const dim<2>& size,
         const mask_type& enabled_permute);
 
+    template <typename IndicesArray>
     [[deprecated("use the overload without dimensions")]] Permutation(
         std::shared_ptr<const Executor> exec, const dim<2>& size,
-        array<index_type> permutation_indices);
+        IndicesArray&& permutation_indices)
+        : Permutation{exec, array<IndexType>{exec, std::forward<IndicesArray>(
+                                                       permutation_indices)}}
+    {
+        GKO_ASSERT_EQ(size[0], permutation_.get_num_elems());
+        GKO_ASSERT_IS_SQUARE_MATRIX(size);
+    }
 
+    template <typename IndicesArray>
     [[deprecated("permute mask is no longer supported")]] Permutation(
         std::shared_ptr<const Executor> exec, const dim<2>& size,
-        array<index_type> permutation_indices,
-        const mask_type& enabled_permute);
+        IndicesArray&& permutation_indices, const mask_type& enabled_permute)
+        : Permutation{std::move(exec),
+                      array<IndexType>{exec, std::forward<IndicesArray>(
+                                                 permutation_indices)}}
+    {
+        GKO_ASSERT_EQ(enabled_permute, row_permute);
+        GKO_ASSERT_EQ(size[0], permutation_.get_num_elems());
+        GKO_ASSERT_IS_SQUARE_MATRIX(size);
+    }
 
     void apply_impl(const LinOp* in, LinOp* out) const override;
 
diff --git a/include/ginkgo/core/matrix/scaled_permutation.hpp b/include/ginkgo/core/matrix/scaled_permutation.hpp
index 5008590bcc1..50e12ff6706 100644
--- a/include/ginkgo/core/matrix/scaled_permutation.hpp
+++ b/include/ginkgo/core/matrix/scaled_permutation.hpp
@@ -62,7 +62,7 @@ namespace matrix {
  * @ingroup LinOp
  */
 template <typename ValueType = default_precision, typename IndexType = int32>
-class ScaledPermutation
+class ScaledPermutation final
     : public EnableLinOp<ScaledPermutation<ValueType, IndexType>>,
       public WritableToMatrixData<ValueType, IndexType> {
     friend class EnablePolymorphicObject<ScaledPermutation, LinOp>;
@@ -110,10 +110,12 @@ class ScaledPermutation
     }
 
     /**
-     * Returns the inverse scaled permutation.
+     * Returns the inverse of this operator as a scaled permutation.
+     * It is computed via $(P S)^-1 = P^{-1} (P S P^{-1})$.
      *
      * @return a newly created ScaledPermutation object storing the inverse
-     *         permutation and scaling factors of this ScalingPermutation.
+     *         of the permutation and scaling factors of this
+     *         ScalledPermutation.
      */
     std::unique_ptr<ScaledPermutation> invert() const;
 
@@ -151,8 +153,7 @@ class ScaledPermutation
         ptr_param<const Permutation<IndexType>> permutation);
 
     /**
-     * Creates a ScaledPermutation matrix from already allocated (and
-     * initialized) arrays.
+     * Creates a ScaledPermutation matrix from already allocated arrays.
      *
      * @param exec  Executor associated to the matrix
      * @param permutation_indices  array of permutation indices
@@ -178,7 +179,7 @@ class ScaledPermutation
         gko::detail::const_array_view<value_type>&& scale,
         gko::detail::const_array_view<index_type>&& perm_idxs);
 
-protected:
+private:
     ScaledPermutation(std::shared_ptr<const Executor> exec, size_type size = 0);
 
     ScaledPermutation(std::shared_ptr<const Executor> exec,
@@ -190,7 +191,6 @@ class ScaledPermutation
     void apply_impl(const LinOp*, const LinOp* in, const LinOp*,
                     LinOp* out) const override;
 
-private:
     array<value_type> scale_;
     array<index_type> permutation_;
 };
diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp
index ca876d29199..19a173b37bd 100644
--- a/omp/matrix/csr_kernels.cpp
+++ b/omp/matrix/csr_kernels.cpp
@@ -1079,10 +1079,10 @@ void inv_nonsymm_scale_permute(std::shared_ptr<const DefaultExecutor> exec,
         auto dst_begin = p_row_ptrs[dst_row];
         auto row_size = in_row_ptrs[src_row + 1] - src_begin;
         for (IndexType i = 0; i < row_size; ++i) {
-            const auto out_col = col_perm[in_col_idxs[src_begin + i]];
-            p_col_idxs[dst_begin + i] = out_col;
+            const auto dst_col = col_perm[in_col_idxs[src_begin + i]];
+            p_col_idxs[dst_begin + i] = dst_col;
             p_vals[dst_begin + i] = in_vals[src_begin + i] /
-                                    (row_scale[dst_row] * col_scale[out_col]);
+                                    (row_scale[dst_row] * col_scale[dst_col]);
         }
     }
 }
diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp
index c45ad22177c..6065884dce8 100644
--- a/reference/matrix/csr_kernels.cpp
+++ b/reference/matrix/csr_kernels.cpp
@@ -1028,10 +1028,10 @@ void inv_nonsymm_scale_permute(std::shared_ptr<const ReferenceExecutor> exec,
         auto dst_begin = p_row_ptrs[dst_row];
         auto row_size = in_row_ptrs[src_row + 1] - src_begin;
         for (IndexType i = 0; i < row_size; ++i) {
-            const auto out_col = col_perm[in_col_idxs[src_begin + i]];
-            p_col_idxs[dst_begin + i] = out_col;
+            const auto dst_col = col_perm[in_col_idxs[src_begin + i]];
+            p_col_idxs[dst_begin + i] = dst_col;
             p_vals[dst_begin + i] = in_vals[src_begin + i] /
-                                    (row_scale[dst_row] * col_scale[out_col]);
+                                    (row_scale[dst_row] * col_scale[dst_col]);
         }
     }
 }
diff --git a/reference/matrix/dense_kernels.cpp b/reference/matrix/dense_kernels.cpp
index 8b35dcbe6af..6a19c85cf83 100644
--- a/reference/matrix/dense_kernels.cpp
+++ b/reference/matrix/dense_kernels.cpp
@@ -1090,7 +1090,6 @@ void inv_nonsymm_scale_permute(std::shared_ptr<const ReferenceExecutor> exec,
                                const matrix::Dense<ValueType>* orig,
                                matrix::Dense<ValueType>* permuted)
 {
-    // TODO this was broken in common, why did the test not pick it up?
     for (size_type i = 0; i < orig->get_size()[0]; ++i) {
         for (size_type j = 0; j < orig->get_size()[1]; ++j) {
             const auto row = row_perm[i];
diff --git a/reference/test/matrix/csr_kernels.cpp b/reference/test/matrix/csr_kernels.cpp
index d7b43ce9495..863bf15a817 100644
--- a/reference/test/matrix/csr_kernels.cpp
+++ b/reference/test/matrix/csr_kernels.cpp
@@ -1421,12 +1421,14 @@ TYPED_TEST(Csr, PermuteRoundtrip)
     using gko::matrix::permute_mode;
 
     for (auto mode :
-         {permute_mode::rows, permute_mode::columns, permute_mode::symmetric}) {
+         {permute_mode::rows, permute_mode::columns, permute_mode::symmetric,
+          permute_mode::inverse_rows, permute_mode::inverse_columns,
+          permute_mode::inverse_symmetric}) {
         SCOPED_TRACE(mode);
 
         auto permuted =
             this->mtx3_sorted->permute(this->perm3, mode)
-                ->permute(this->perm3, mode | permute_mode::inverse);
+                ->permute(this->perm3, mode ^ permute_mode::inverse);
 
         GKO_ASSERT_MTX_NEAR(this->mtx3_sorted, permuted, 0.0);
         GKO_ASSERT_MTX_EQ_SPARSITY(permuted, this->mtx3_sorted);
@@ -1435,37 +1437,44 @@ TYPED_TEST(Csr, PermuteRoundtrip)
 }
 
 
+TYPED_TEST(Csr, PermuteInverted)
+{
+    using gko::matrix::permute_mode;
+
+    for (auto mode :
+         {permute_mode::rows, permute_mode::columns, permute_mode::symmetric}) {
+        SCOPED_TRACE(mode);
+
+        auto permuted = this->mtx3_sorted->permute(this->perm3, mode);
+        auto inv_inv_permuted = this->mtx3_sorted->permute(
+            this->perm3->invert(), mode | permute_mode::inverse);
+
+        GKO_ASSERT_MTX_NEAR(permuted, inv_inv_permuted, 0.0);
+        GKO_ASSERT_MTX_EQ_SPARSITY(permuted, inv_inv_permuted);
+        ASSERT_TRUE(permuted->is_sorted_by_column_index());
+        ASSERT_TRUE(inv_inv_permuted->is_sorted_by_column_index());
+    }
+}
+
+
 TYPED_TEST(Csr, PermuteRectangular)
 {
     using gko::matrix::permute_mode;
 
-    auto rpermuted = this->mtx2->permute(this->perm2, permute_mode::rows);
-    auto irpermuted =
-        this->mtx2->permute(this->perm2, permute_mode::inverse_rows);
-    auto cpermuted = this->mtx2->permute(this->perm3, permute_mode::columns);
-    auto icpermuted =
-        this->mtx2->permute(this->perm3, permute_mode::inverse_columns);
-    auto ref_rpermuted =
-        ref_permute(this->mtx2.get(), this->perm2.get(), permute_mode::rows);
-    auto ref_irpermuted = ref_permute(this->mtx2.get(), this->perm2.get(),
-                                      permute_mode::inverse_rows);
-    auto ref_cpermuted =
-        ref_permute(this->mtx2.get(), this->perm3.get(), permute_mode::columns);
-    auto ref_icpermuted = ref_permute(this->mtx2.get(), this->perm3.get(),
-                                      permute_mode::inverse_columns);
-
-    GKO_ASSERT_MTX_NEAR(rpermuted, ref_rpermuted, 0.0);
-    GKO_ASSERT_MTX_NEAR(irpermuted, ref_irpermuted, 0.0);
-    GKO_ASSERT_MTX_NEAR(cpermuted, ref_cpermuted, 0.0);
-    GKO_ASSERT_MTX_NEAR(icpermuted, ref_icpermuted, 0.0);
-    GKO_ASSERT_MTX_EQ_SPARSITY(rpermuted, ref_rpermuted);
-    GKO_ASSERT_MTX_EQ_SPARSITY(irpermuted, ref_irpermuted);
-    GKO_ASSERT_MTX_EQ_SPARSITY(cpermuted, ref_cpermuted);
-    GKO_ASSERT_MTX_EQ_SPARSITY(icpermuted, ref_icpermuted);
-    ASSERT_TRUE(rpermuted->is_sorted_by_column_index());
-    ASSERT_TRUE(irpermuted->is_sorted_by_column_index());
-    ASSERT_TRUE(cpermuted->is_sorted_by_column_index());
-    ASSERT_TRUE(icpermuted->is_sorted_by_column_index());
+    for (auto mode :
+         {permute_mode::rows, permute_mode::columns, permute_mode::inverse_rows,
+          permute_mode::inverse_columns}) {
+        auto perm = (mode & permute_mode::rows) == permute_mode::rows
+                        ? this->perm2.get()
+                        : this->perm3.get();
+
+        auto permuted = this->mtx2->permute(perm, mode);
+        auto ref_permuted = ref_permute(this->mtx2.get(), perm, mode);
+
+        GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0);
+        GKO_ASSERT_MTX_EQ_SPARSITY(permuted, ref_permuted);
+        ASSERT_TRUE(permuted->is_sorted_by_column_index());
+    }
 }
 
 
@@ -1480,7 +1489,7 @@ TYPED_TEST(Csr, PermuteFailsWithIncorrectPermutationSize)
         SCOPED_TRACE(mode);
 
         ASSERT_THROW(this->mtx3_sorted->permute(this->perm0, mode),
-                     gko::ValueMismatch);
+                     gko::DimensionMismatch);
     }
 }
 
@@ -1545,14 +1554,27 @@ TYPED_TEST(Csr, NonsymmPermuteRoundtrip)
 }
 
 
+TYPED_TEST(Csr, NonsymmPermuteInverted)
+{
+    auto permuted = this->mtx3_sorted->permute(this->perm3, this->perm3_rev);
+    auto inv_inv_permuted = this->mtx3_sorted->permute(
+        this->perm3->invert(), this->perm3_rev->invert(), true);
+
+    GKO_ASSERT_MTX_NEAR(permuted, inv_inv_permuted, 0.0);
+    GKO_ASSERT_MTX_EQ_SPARSITY(permuted, inv_inv_permuted);
+    ASSERT_TRUE(permuted->is_sorted_by_column_index());
+    ASSERT_TRUE(inv_inv_permuted->is_sorted_by_column_index());
+}
+
+
 TYPED_TEST(Csr, NonsymmPermuteFailsWithIncorrectPermutationSize)
 {
     ASSERT_THROW(this->mtx3_sorted->permute(this->perm0, this->perm3_rev),
-                 gko::ValueMismatch);
+                 gko::DimensionMismatch);
     ASSERT_THROW(this->mtx3_sorted->permute(this->perm3_rev, this->perm0),
-                 gko::ValueMismatch);
+                 gko::DimensionMismatch);
     ASSERT_THROW(this->mtx3_sorted->permute(this->perm0, this->perm0),
-                 gko::ValueMismatch);
+                 gko::DimensionMismatch);
 }
 
 
@@ -1585,13 +1607,15 @@ TYPED_TEST(Csr, ScaledPermuteRoundtrip)
     using value_type = typename TestFixture::value_type;
 
     for (auto mode :
-         {permute_mode::rows, permute_mode::columns, permute_mode::symmetric}) {
+         {permute_mode::rows, permute_mode::columns, permute_mode::symmetric,
+          permute_mode::inverse_rows, permute_mode::inverse_columns,
+          permute_mode::inverse_symmetric}) {
         SCOPED_TRACE(mode);
 
         auto permuted =
             this->mtx3_sorted->scale_permute(this->scale_perm3, mode)
                 ->scale_permute(this->scale_perm3,
-                                mode | permute_mode::inverse);
+                                mode ^ permute_mode::inverse);
 
         GKO_ASSERT_MTX_NEAR(this->mtx3_sorted, permuted, r<value_type>::value);
         GKO_ASSERT_MTX_EQ_SPARSITY(permuted, this->mtx3_sorted);
@@ -1605,35 +1629,20 @@ TYPED_TEST(Csr, ScaledPermuteRectangular)
     using gko::matrix::permute_mode;
     using value_type = typename TestFixture::value_type;
 
-    auto rpermuted =
-        this->mtx2->scale_permute(this->scale_perm2, permute_mode::rows);
-    auto irpermuted = this->mtx2->scale_permute(this->scale_perm2,
-                                                permute_mode::inverse_rows);
-    auto cpermuted =
-        this->mtx2->scale_permute(this->scale_perm3, permute_mode::columns);
-    auto icpermuted = this->mtx2->scale_permute(this->scale_perm3,
-                                                permute_mode::inverse_columns);
-    auto ref_rpermuted = ref_permute(this->mtx2.get(), this->scale_perm2.get(),
-                                     permute_mode::rows);
-    auto ref_irpermuted = ref_permute(this->mtx2.get(), this->scale_perm2.get(),
-                                      permute_mode::inverse_rows);
-    auto ref_cpermuted = ref_permute(this->mtx2.get(), this->scale_perm3.get(),
-                                     permute_mode::columns);
-    auto ref_icpermuted = ref_permute(this->mtx2.get(), this->scale_perm3.get(),
-                                      permute_mode::inverse_columns);
-
-    GKO_ASSERT_MTX_NEAR(rpermuted, ref_rpermuted, r<value_type>::value);
-    GKO_ASSERT_MTX_NEAR(irpermuted, ref_irpermuted, r<value_type>::value);
-    GKO_ASSERT_MTX_NEAR(cpermuted, ref_cpermuted, r<value_type>::value);
-    GKO_ASSERT_MTX_NEAR(icpermuted, ref_icpermuted, r<value_type>::value);
-    GKO_ASSERT_MTX_EQ_SPARSITY(rpermuted, ref_rpermuted);
-    GKO_ASSERT_MTX_EQ_SPARSITY(irpermuted, ref_irpermuted);
-    GKO_ASSERT_MTX_EQ_SPARSITY(cpermuted, ref_cpermuted);
-    GKO_ASSERT_MTX_EQ_SPARSITY(icpermuted, ref_icpermuted);
-    ASSERT_TRUE(rpermuted->is_sorted_by_column_index());
-    ASSERT_TRUE(irpermuted->is_sorted_by_column_index());
-    ASSERT_TRUE(cpermuted->is_sorted_by_column_index());
-    ASSERT_TRUE(icpermuted->is_sorted_by_column_index());
+    for (auto mode :
+         {permute_mode::rows, permute_mode::columns, permute_mode::inverse_rows,
+          permute_mode::inverse_columns}) {
+        auto perm = (mode & permute_mode::rows) == permute_mode::rows
+                        ? this->scale_perm2.get()
+                        : this->scale_perm3.get();
+
+        auto permuted = this->mtx2->scale_permute(perm, mode);
+        auto ref_permuted = ref_permute(this->mtx2.get(), perm, mode);
+
+        GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, r<value_type>::value);
+        GKO_ASSERT_MTX_EQ_SPARSITY(permuted, ref_permuted);
+        ASSERT_TRUE(permuted->is_sorted_by_column_index());
+    }
 }
 
 
@@ -1648,7 +1657,7 @@ TYPED_TEST(Csr, ScaledPermuteFailsWithIncorrectPermutationSize)
         SCOPED_TRACE(mode);
 
         ASSERT_THROW(this->mtx3_sorted->scale_permute(this->scale_perm0, mode),
-                     gko::ValueMismatch);
+                     gko::DimensionMismatch);
     }
 }
 
@@ -1734,13 +1743,13 @@ TYPED_TEST(Csr, NonsymmScaledPermuteFailsWithIncorrectPermutationSize)
 {
     ASSERT_THROW(this->mtx3_sorted->scale_permute(this->scale_perm0,
                                                   this->scale_perm3_rev),
-                 gko::ValueMismatch);
+                 gko::DimensionMismatch);
     ASSERT_THROW(this->mtx3_sorted->scale_permute(this->scale_perm3_rev,
                                                   this->scale_perm0),
-                 gko::ValueMismatch);
+                 gko::DimensionMismatch);
     ASSERT_THROW(
         this->mtx3_sorted->scale_permute(this->scale_perm0, this->scale_perm0),
-        gko::ValueMismatch);
+        gko::DimensionMismatch);
 }
 
 
diff --git a/reference/test/matrix/dense_kernels.cpp b/reference/test/matrix/dense_kernels.cpp
index a95359a0ac8..2fe97e79064 100644
--- a/reference/test/matrix/dense_kernels.cpp
+++ b/reference/test/matrix/dense_kernels.cpp
@@ -2429,7 +2429,7 @@ TYPED_TEST(DenseWithIndexType, PermuteFailsWithIncorrectPermutationSize)
         SCOPED_TRACE(mode);
 
         ASSERT_THROW(this->mtx5->permute(this->perm0, mode),
-                     gko::ValueMismatch);
+                     gko::DimensionMismatch);
     }
 }
 
@@ -2538,11 +2538,11 @@ TYPED_TEST(DenseWithIndexType, NonsymmPermuteInverseStridedIntoDense)
 TYPED_TEST(DenseWithIndexType, NonsymmPermuteFailsWithIncorrectPermutationSize)
 {
     ASSERT_THROW(this->mtx5->permute(this->perm0, this->perm3_rev),
-                 gko::ValueMismatch);
+                 gko::DimensionMismatch);
     ASSERT_THROW(this->mtx5->permute(this->perm3_rev, this->perm0),
-                 gko::ValueMismatch);
+                 gko::DimensionMismatch);
     ASSERT_THROW(this->mtx5->permute(this->perm0, this->perm0),
-                 gko::ValueMismatch);
+                 gko::DimensionMismatch);
 }
 
 
@@ -2728,7 +2728,7 @@ TYPED_TEST(DenseWithIndexType,
     gko::array<index_type> permute_idxs{exec, {1, 2}};
 
     ASSERT_THROW(this->mtx5->permute(&permute_idxs, this->mtx5->clone()),
-                 gko::ValueMismatch);
+                 gko::DimensionMismatch);
 }
 
 
@@ -2820,7 +2820,7 @@ TYPED_TEST(DenseWithIndexType,
 
     ASSERT_THROW(
         this->mtx5->inverse_permute(&permute_idxs, this->mtx5->clone()),
-        gko::ValueMismatch);
+        gko::DimensionMismatch);
 }
 
 
@@ -2915,7 +2915,7 @@ TYPED_TEST(DenseWithIndexType,
     auto permuted = Mtx::create(exec, this->mtx5->get_size());
 
     ASSERT_THROW(this->mtx5->row_permute(&permute_idxs, permuted),
-                 gko::ValueMismatch);
+                 gko::DimensionMismatch);
 }
 
 
@@ -3010,7 +3010,7 @@ TYPED_TEST(DenseWithIndexType,
     auto permuted = Mtx::create(exec, this->mtx5->get_size());
 
     ASSERT_THROW(this->mtx5->column_permute(&permute_idxs, permuted),
-                 gko::ValueMismatch);
+                 gko::DimensionMismatch);
 }
 
 
@@ -3107,7 +3107,7 @@ TYPED_TEST(DenseWithIndexType,
     auto permuted = Mtx::create(exec, this->mtx5->get_size());
 
     ASSERT_THROW(this->mtx5->inverse_row_permute(&permute_idxs, permuted),
-                 gko::ValueMismatch);
+                 gko::DimensionMismatch);
 }
 
 
@@ -3205,7 +3205,7 @@ TYPED_TEST(DenseWithIndexType,
     auto permuted = Mtx::create(exec, this->mtx5->get_size());
 
     ASSERT_THROW(this->mtx5->inverse_column_permute(&permute_idxs, permuted),
-                 gko::ValueMismatch);
+                 gko::DimensionMismatch);
 }
 
 
@@ -3394,7 +3394,7 @@ TYPED_TEST(DenseWithIndexType, ScaledPermuteFailsWithIncorrectPermutationSize)
         SCOPED_TRACE(mode);
 
         ASSERT_THROW(this->mtx5->scale_permute(this->scale_perm0, mode),
-                     gko::ValueMismatch);
+                     gko::DimensionMismatch);
     }
 }
 
@@ -3538,13 +3538,13 @@ TYPED_TEST(DenseWithIndexType,
 {
     ASSERT_THROW(
         this->mtx5->scale_permute(this->scale_perm0, this->scale_perm3_rev),
-        gko::ValueMismatch);
+        gko::DimensionMismatch);
     ASSERT_THROW(
         this->mtx5->scale_permute(this->scale_perm3_rev, this->scale_perm0),
-        gko::ValueMismatch);
+        gko::DimensionMismatch);
     ASSERT_THROW(
         this->mtx5->scale_permute(this->scale_perm0, this->scale_perm0),
-        gko::ValueMismatch);
+        gko::DimensionMismatch);
 }
 
 
diff --git a/test/matrix/csr_kernels2.cpp b/test/matrix/csr_kernels2.cpp
index 0884e203733..1f1d459f330 100644
--- a/test/matrix/csr_kernels2.cpp
+++ b/test/matrix/csr_kernels2.cpp
@@ -193,8 +193,8 @@ class Csr : public CommonTestFixture {
         std::generate(scale2.begin(), scale2.end(), gen);
         rpermute_idxs = std::make_unique<Arr>(ref, tmp.begin(), tmp.end());
         cpermute_idxs = std::make_unique<Arr>(ref, tmp2.begin(), tmp2.end());
-        rpermutation = Perm::create(ref, tmp.size(), *rpermute_idxs);
-        cpermutation = Perm::create(ref, tmp2.size(), *cpermute_idxs);
+        rpermutation = Perm::create(ref, *rpermute_idxs);
+        cpermutation = Perm::create(ref, *cpermute_idxs);
         srpermutation = ScaledPerm::create(
             ref, gko::array<value_type>(ref, scale.begin(), scale.end()),
             *rpermute_idxs);
@@ -911,33 +911,46 @@ TEST_F(Csr, IsGenericPermutable)
 }
 
 
+TEST_F(Csr, IsColPermutableHypersparse)
+{
+    using gko::matrix::permute_mode;
+    auto hypersparse_mtx = gko::initialize<Mtx>(
+        {{0.0, 0.0, 0.0}, {1.0, 0.0, 0.0}, {0.0, 0.0, 2.0}}, ref);
+    auto dhypersparse_mtx = hypersparse_mtx->clone();
+    auto perm3 = Perm::create(ref, gko::array<index_type>{ref, {1, 2, 0}});
+
+    for (auto mode : {permute_mode::columns, permute_mode::inverse_columns}) {
+        SCOPED_TRACE(mode);
+        auto permuted = hypersparse_mtx->permute(perm3, mode);
+        auto dpermuted = dhypersparse_mtx->permute(perm3, mode);
+
+        GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0);
+        GKO_ASSERT_MTX_EQ_SPARSITY(permuted, dpermuted);
+        ASSERT_TRUE(dpermuted->is_sorted_by_column_index());
+    }
+}
+
+
 TEST_F(Csr, IsGenericPermutableRectangular)
 {
     using gko::matrix::permute_mode;
     set_up_apply_data<Mtx::classical>();
 
-    auto rpermuted = mtx->permute(rpermutation, permute_mode::rows);
-    auto drpermuted = dmtx->permute(rpermutation, permute_mode::rows);
-    auto irpermuted = mtx->permute(rpermutation, permute_mode::inverse_rows);
-    auto dirpermuted = dmtx->permute(rpermutation, permute_mode::inverse_rows);
-    auto cpermuted = mtx->permute(cpermutation, permute_mode::columns);
-    auto dcpermuted = dmtx->permute(cpermutation, permute_mode::columns);
-    auto icpermuted = mtx->permute(cpermutation, permute_mode::inverse_columns);
-    auto dicpermuted =
-        dmtx->permute(cpermutation, permute_mode::inverse_columns);
-
-    GKO_EXPECT_MTX_NEAR(rpermuted, drpermuted, r<value_type>::value);
-    GKO_EXPECT_MTX_NEAR(irpermuted, dirpermuted, r<value_type>::value);
-    GKO_EXPECT_MTX_NEAR(cpermuted, dcpermuted, r<value_type>::value);
-    GKO_EXPECT_MTX_NEAR(icpermuted, dicpermuted, r<value_type>::value);
-    GKO_EXPECT_MTX_EQ_SPARSITY(rpermuted, drpermuted);
-    GKO_EXPECT_MTX_EQ_SPARSITY(irpermuted, dirpermuted);
-    GKO_EXPECT_MTX_EQ_SPARSITY(cpermuted, dcpermuted);
-    GKO_EXPECT_MTX_EQ_SPARSITY(icpermuted, dicpermuted);
-    EXPECT_TRUE(rpermuted->is_sorted_by_column_index());
-    EXPECT_TRUE(irpermuted->is_sorted_by_column_index());
-    EXPECT_TRUE(cpermuted->is_sorted_by_column_index());
-    EXPECT_TRUE(icpermuted->is_sorted_by_column_index());
+    for (auto mode :
+         {permute_mode::rows, permute_mode::columns, permute_mode::inverse_rows,
+          permute_mode::inverse_columns}) {
+        SCOPED_TRACE(mode);
+        auto perm = (mode & permute_mode::rows) == permute_mode::rows
+                        ? rpermutation.get()
+                        : cpermutation.get();
+
+        auto permuted = mtx->permute(perm, mode);
+        auto dpermuted = dmtx->permute(perm, mode);
+
+        GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0);
+        GKO_ASSERT_MTX_EQ_SPARSITY(permuted, dpermuted);
+        ASSERT_TRUE(dpermuted->is_sorted_by_column_index());
+    }
 }
 
 
@@ -978,36 +991,48 @@ TEST_F(Csr, IsGenericScalePermutable)
 }
 
 
+TEST_F(Csr, IsColScalePermutableHypersparse)
+{
+    using gko::matrix::permute_mode;
+    auto hypersparse_mtx = gko::initialize<Mtx>(
+        {{0.0, 0.0, 0.0}, {1.0, 0.0, 0.0}, {0.0, 0.0, 2.0}}, ref);
+    auto dhypersparse_mtx = hypersparse_mtx->clone();
+    auto perm3 =
+        ScaledPerm::create(ref, gko::array<value_type>{ref, {1.0, 2.0, 4.0}},
+                           gko::array<index_type>{ref, {1, 2, 0}});
+
+    for (auto mode : {permute_mode::columns, permute_mode::inverse_columns}) {
+        SCOPED_TRACE(mode);
+        auto permuted = hypersparse_mtx->scale_permute(perm3, mode);
+        auto dpermuted = dhypersparse_mtx->scale_permute(perm3, mode);
+
+        GKO_ASSERT_MTX_NEAR(permuted, dpermuted, r<value_type>::value);
+        GKO_ASSERT_MTX_EQ_SPARSITY(permuted, dpermuted);
+        ASSERT_TRUE(dpermuted->is_sorted_by_column_index());
+    }
+}
+
+
 TEST_F(Csr, IsGenericScalePermutableRectangular)
 {
     using gko::matrix::permute_mode;
     set_up_apply_data<Mtx::classical>();
 
-    auto rpermuted = mtx->scale_permute(srpermutation, permute_mode::rows);
-    auto drpermuted = dmtx->scale_permute(srpermutation, permute_mode::rows);
-    auto irpermuted =
-        mtx->scale_permute(srpermutation, permute_mode::inverse_rows);
-    auto dirpermuted =
-        dmtx->scale_permute(srpermutation, permute_mode::inverse_rows);
-    auto cpermuted = mtx->scale_permute(scpermutation, permute_mode::columns);
-    auto dcpermuted = dmtx->scale_permute(scpermutation, permute_mode::columns);
-    auto icpermuted =
-        mtx->scale_permute(scpermutation, permute_mode::inverse_columns);
-    auto dicpermuted =
-        dmtx->scale_permute(scpermutation, permute_mode::inverse_columns);
-
-    GKO_EXPECT_MTX_NEAR(rpermuted, drpermuted, r<value_type>::value);
-    GKO_EXPECT_MTX_NEAR(irpermuted, dirpermuted, r<value_type>::value);
-    GKO_EXPECT_MTX_NEAR(cpermuted, dcpermuted, r<value_type>::value);
-    GKO_EXPECT_MTX_NEAR(icpermuted, dicpermuted, r<value_type>::value);
-    GKO_EXPECT_MTX_EQ_SPARSITY(rpermuted, drpermuted);
-    GKO_EXPECT_MTX_EQ_SPARSITY(irpermuted, dirpermuted);
-    GKO_EXPECT_MTX_EQ_SPARSITY(cpermuted, dcpermuted);
-    GKO_EXPECT_MTX_EQ_SPARSITY(icpermuted, dicpermuted);
-    EXPECT_TRUE(rpermuted->is_sorted_by_column_index());
-    EXPECT_TRUE(irpermuted->is_sorted_by_column_index());
-    EXPECT_TRUE(cpermuted->is_sorted_by_column_index());
-    EXPECT_TRUE(icpermuted->is_sorted_by_column_index());
+    for (auto mode :
+         {permute_mode::rows, permute_mode::columns, permute_mode::inverse_rows,
+          permute_mode::inverse_columns}) {
+        SCOPED_TRACE(mode);
+        auto perm = (mode & permute_mode::rows) == permute_mode::rows
+                        ? srpermutation.get()
+                        : scpermutation.get();
+
+        auto permuted = mtx->scale_permute(perm, mode);
+        auto dpermuted = dmtx->scale_permute(perm, mode);
+
+        GKO_ASSERT_MTX_NEAR(permuted, dpermuted, r<value_type>::value);
+        GKO_ASSERT_MTX_EQ_SPARSITY(permuted, dpermuted);
+        ASSERT_TRUE(dpermuted->is_sorted_by_column_index());
+    }
 }
 
 
diff --git a/test/matrix/dense_kernels.cpp b/test/matrix/dense_kernels.cpp
index 994283915c2..5edfee16892 100644
--- a/test/matrix/dense_kernels.cpp
+++ b/test/matrix/dense_kernels.cpp
@@ -169,8 +169,8 @@ class Dense : public CommonTestFixture {
             std::unique_ptr<Arr>(new Arr{ref, tmp2.begin(), tmp2.end()});
         rgather_idxs =
             std::unique_ptr<Arr>(new Arr{ref, tmp3.begin(), tmp3.end()});
-        rpermutation = Permutation::create(ref, tmp.size(), *rpermute_idxs);
-        cpermutation = Permutation::create(ref, tmp2.size(), *cpermute_idxs);
+        rpermutation = Permutation::create(ref, *rpermute_idxs);
+        cpermutation = Permutation::create(ref, *cpermute_idxs);
         rspermutation = ScaledPermutation::create(
             ref,
             gko::array<value_type>{ref, scale_factors.begin(),
@@ -1331,19 +1331,19 @@ TEST_F(Dense, IsGenericPermutableRectangular)
     using gko::matrix::permute_mode;
     set_up_apply_data();
 
-    auto rpermuted = x->permute(rpermutation, permute_mode::rows);
-    auto drpermuted = dx->permute(rpermutation, permute_mode::rows);
-    auto irpermuted = x->permute(rpermutation, permute_mode::inverse_rows);
-    auto dirpermuted = dx->permute(rpermutation, permute_mode::inverse_rows);
-    auto cpermuted = x->permute(cpermutation, permute_mode::columns);
-    auto dcpermuted = dx->permute(cpermutation, permute_mode::columns);
-    auto icpermuted = x->permute(cpermutation, permute_mode::inverse_columns);
-    auto dicpermuted = dx->permute(cpermutation, permute_mode::inverse_columns);
+    for (auto mode :
+         {permute_mode::rows, permute_mode::columns, permute_mode::inverse_rows,
+          permute_mode::inverse_columns}) {
+        SCOPED_TRACE(mode);
+        auto perm = (mode & permute_mode::rows) == permute_mode::rows
+                        ? rpermutation.get()
+                        : cpermutation.get();
 
-    GKO_ASSERT_MTX_NEAR(rpermuted, drpermuted, 0);
-    GKO_ASSERT_MTX_NEAR(irpermuted, dirpermuted, 0);
-    GKO_ASSERT_MTX_NEAR(cpermuted, dcpermuted, 0);
-    GKO_ASSERT_MTX_NEAR(icpermuted, dicpermuted, 0);
+        auto permuted = x->permute(perm, mode);
+        auto dpermuted = dx->permute(perm, mode);
+
+        GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0);
+    }
 }
 
 
@@ -1422,23 +1422,19 @@ TEST_F(Dense, IsGenericScalePermutableRectangular)
     using gko::matrix::permute_mode;
     set_up_apply_data();
 
-    auto rpermuted = x->scale_permute(rspermutation, permute_mode::rows);
-    auto drpermuted = dx->scale_permute(rspermutation, permute_mode::rows);
-    auto irpermuted =
-        x->scale_permute(rspermutation, permute_mode::inverse_rows);
-    auto dirpermuted =
-        dx->scale_permute(rspermutation, permute_mode::inverse_rows);
-    auto cpermuted = x->scale_permute(cspermutation, permute_mode::columns);
-    auto dcpermuted = dx->scale_permute(cspermutation, permute_mode::columns);
-    auto icpermuted =
-        x->scale_permute(cspermutation, permute_mode::inverse_columns);
-    auto dicpermuted =
-        dx->scale_permute(cspermutation, permute_mode::inverse_columns);
+    for (auto mode :
+         {permute_mode::rows, permute_mode::columns, permute_mode::inverse_rows,
+          permute_mode::inverse_columns}) {
+        SCOPED_TRACE(mode);
+        auto perm = (mode & permute_mode::rows) == permute_mode::rows
+                        ? rspermutation.get()
+                        : cspermutation.get();
+
+        auto permuted = x->scale_permute(perm, mode);
+        auto dpermuted = dx->scale_permute(perm, mode);
 
-    GKO_ASSERT_MTX_NEAR(rpermuted, drpermuted, r<value_type>::value);
-    GKO_ASSERT_MTX_NEAR(irpermuted, dirpermuted, r<value_type>::value);
-    GKO_ASSERT_MTX_NEAR(cpermuted, dcpermuted, r<value_type>::value);
-    GKO_ASSERT_MTX_NEAR(icpermuted, dicpermuted, r<value_type>::value);
+        GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0);
+    }
 }
 
 
@@ -1454,8 +1450,8 @@ TEST_F(Dense, IsGenericScalePermutableIntoDenseCrossExecutor)
         SCOPED_TRACE(mode);
         auto host_permuted = square->clone();
 
-        auto ref_permuted = square->permute(rpermutation, mode);
-        dsquare->permute(rpermutation, host_permuted, mode);
+        auto ref_permuted = square->scale_permute(rspermutation, mode);
+        dsquare->scale_permute(rspermutation, host_permuted, mode);
 
         GKO_ASSERT_MTX_NEAR(ref_permuted, host_permuted, r<value_type>::value);
     }
@@ -1469,8 +1465,9 @@ TEST_F(Dense, IsNonsymmScalePermutable)
 
     for (auto invert : {false, true}) {
         SCOPED_TRACE(invert);
-        auto permuted = x->permute(rpermutation, cpermutation, invert);
-        auto dpermuted = dx->permute(rpermutation, cpermutation, invert);
+        auto permuted = x->scale_permute(rspermutation, cspermutation, invert);
+        auto dpermuted =
+            dx->scale_permute(rspermutation, cspermutation, invert);
 
         GKO_ASSERT_MTX_NEAR(permuted, dpermuted, r<value_type>::value);
     }
@@ -1486,8 +1483,9 @@ TEST_F(Dense, IsNonsymmScalePermutableIntoDenseCrossExecutor)
         SCOPED_TRACE(invert);
         auto host_permuted = dx->clone();
 
-        auto ref_permuted = x->permute(rpermutation, cpermutation, invert);
-        dx->permute(rpermutation, cpermutation, host_permuted, invert);
+        auto ref_permuted =
+            x->scale_permute(rspermutation, cspermutation, invert);
+        dx->scale_permute(rspermutation, cspermutation, host_permuted, invert);
 
         GKO_ASSERT_MTX_NEAR(ref_permuted, host_permuted, r<value_type>::value);
     }
diff --git a/test/matrix/permutation_kernels.cpp b/test/matrix/permutation_kernels.cpp
index f04d7a9e58b..85aa03489f2 100644
--- a/test/matrix/permutation_kernels.cpp
+++ b/test/matrix/permutation_kernels.cpp
@@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <gtest/gtest.h>
 
 
+#include <ginkgo/core/matrix/dense.hpp>
 #include <ginkgo/core/matrix/permutation.hpp>
 
 
@@ -47,6 +48,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 class Permutation : public CommonTestFixture {
 protected:
     using Perm = gko::matrix::Permutation<index_type>;
+    using Mtx = gko::matrix::Dense<value_type>;
 
     Permutation() : rand_engine(42)
     {
@@ -60,10 +62,28 @@ class Permutation : public CommonTestFixture {
         permutation2 = Perm::create(
             ref, gko::array<index_type>(ref, tmp2.begin(), tmp2.end()));
         dpermutation = permutation->clone(exec);
+
+        mtx = gko::test::generate_random_matrix<Mtx>(
+            tmp.size(), 4, std::uniform_int_distribution<>(4, 4),
+            std::normal_distribution<gko::remove_complex<value_type>>(-1.0,
+                                                                      1.0),
+            rand_engine, ref);
+        mtx2 = gko::test::generate_random_matrix<Mtx>(
+            tmp.size(), 4, std::uniform_int_distribution<>(4, 4),
+            std::normal_distribution<gko::remove_complex<value_type>>(-1.0,
+                                                                      1.0),
+            rand_engine, ref);
+        alpha = gko::initialize<Mtx>({2.0}, ref);
+        beta = gko::initialize<Mtx>({-3.0}, ref);
+        dmtx = mtx->clone();
     }
 
     std::default_random_engine rand_engine;
-
+    std::unique_ptr<Mtx> mtx;
+    std::unique_ptr<Mtx> mtx2;
+    std::unique_ptr<Mtx> dmtx;
+    std::unique_ptr<Mtx> alpha;
+    std::unique_ptr<Mtx> beta;
     std::unique_ptr<Perm> permutation;
     std::unique_ptr<Perm> permutation2;
     std::unique_ptr<Perm> dpermutation;
@@ -79,6 +99,30 @@ TEST_F(Permutation, InvertIsEquivalentToRef)
 }
 
 
+TEST_F(Permutation, ApplyIsEquivalentToRef)
+{
+    auto out = mtx->clone();
+    auto dout = dmtx->clone();
+
+    permutation->apply(mtx, out);
+    dpermutation->apply(dmtx, dout);
+
+    GKO_ASSERT_MTX_NEAR(out, dout, 0.0);
+}
+
+
+TEST_F(Permutation, AdvancedApplyIsEquivalentToRef)
+{
+    auto out = mtx->clone();
+    auto dout = dmtx->clone();
+
+    permutation->apply(alpha, mtx, beta, out);
+    dpermutation->apply(alpha, dmtx, beta, dout);
+
+    GKO_ASSERT_MTX_NEAR(out, dout, r<value_type>::value);
+}
+
+
 TEST_F(Permutation, CombineIsEquivalentToRef)
 {
     auto combined = permutation->combine(permutation2);
diff --git a/test/matrix/scaled_permutation_kernels.cpp b/test/matrix/scaled_permutation_kernels.cpp
index 249ffe8867b..059da97e784 100644
--- a/test/matrix/scaled_permutation_kernels.cpp
+++ b/test/matrix/scaled_permutation_kernels.cpp
@@ -47,6 +47,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 class ScaledPermutation : public CommonTestFixture {
 protected:
     using ScaledPerm = gko::matrix::ScaledPermutation<value_type, index_type>;
+    using Mtx = gko::matrix::Dense<value_type>;
 
     ScaledPermutation() : rand_engine(42)
     {
@@ -68,10 +69,28 @@ class ScaledPermutation : public CommonTestFixture {
             ref, gko::array<value_type>(ref, scale2.begin(), scale2.end()),
             gko::array<index_type>(ref, tmp2.begin(), tmp2.end()));
         dpermutation = permutation->clone(exec);
+
+        mtx = gko::test::generate_random_matrix<Mtx>(
+            tmp.size(), 4, std::uniform_int_distribution<>(4, 4),
+            std::normal_distribution<gko::remove_complex<value_type>>(-1.0,
+                                                                      1.0),
+            rand_engine, ref);
+        mtx2 = gko::test::generate_random_matrix<Mtx>(
+            tmp.size(), 4, std::uniform_int_distribution<>(4, 4),
+            std::normal_distribution<gko::remove_complex<value_type>>(-1.0,
+                                                                      1.0),
+            rand_engine, ref);
+        alpha = gko::initialize<Mtx>({2.0}, ref);
+        beta = gko::initialize<Mtx>({-3.0}, ref);
+        dmtx = mtx->clone();
     }
 
     std::default_random_engine rand_engine;
-
+    std::unique_ptr<Mtx> mtx;
+    std::unique_ptr<Mtx> mtx2;
+    std::unique_ptr<Mtx> dmtx;
+    std::unique_ptr<Mtx> alpha;
+    std::unique_ptr<Mtx> beta;
     std::unique_ptr<ScaledPerm> permutation;
     std::unique_ptr<ScaledPerm> permutation2;
     std::unique_ptr<ScaledPerm> dpermutation;
@@ -87,6 +106,30 @@ TEST_F(ScaledPermutation, InvertIsEquivalentToRef)
 }
 
 
+TEST_F(ScaledPermutation, ApplyIsEquivalentToRef)
+{
+    auto out = mtx->clone();
+    auto dout = dmtx->clone();
+
+    permutation->apply(mtx, out);
+    dpermutation->apply(dmtx, dout);
+
+    GKO_ASSERT_MTX_NEAR(out, dout, r<value_type>::value);
+}
+
+
+TEST_F(ScaledPermutation, AdvancedApplyIsEquivalentToRef)
+{
+    auto out = mtx->clone();
+    auto dout = dmtx->clone();
+
+    permutation->apply(alpha, mtx, beta, out);
+    dpermutation->apply(alpha, dmtx, beta, dout);
+
+    GKO_ASSERT_MTX_NEAR(out, dout, r<value_type>::value);
+}
+
+
 TEST_F(ScaledPermutation, CombineIsEquivalentToRef)
 {
     auto combined = permutation->combine(permutation2);

From ab7afe61247955c801e1e733ec5fb534afdb4e94 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Tue, 24 Oct 2023 17:41:50 -0400
Subject: [PATCH 442/583] fix incorrect combination function

also adds a few more tests to be sure.
---
 common/unified/matrix/permutation_kernels.cpp |  2 +-
 .../matrix/scaled_permutation_kernels.cpp     |  9 +--
 reference/matrix/permutation_kernels.cpp      |  3 +-
 .../matrix/scaled_permutation_kernels.cpp     | 12 ++--
 reference/test/matrix/permutation.cpp         | 51 +++++++++++++++--
 reference/test/matrix/scaled_permutation.cpp  | 56 ++++++++++++++++---
 6 files changed, 109 insertions(+), 24 deletions(-)

diff --git a/common/unified/matrix/permutation_kernels.cpp b/common/unified/matrix/permutation_kernels.cpp
index d94620aca75..2bca5e26daa 100644
--- a/common/unified/matrix/permutation_kernels.cpp
+++ b/common/unified/matrix/permutation_kernels.cpp
@@ -71,7 +71,7 @@ void combine(std::shared_ptr<const DefaultExecutor> exec,
         exec,
         [] GKO_KERNEL(auto i, auto first_permutation, auto second_permutation,
                       auto output_permutation) {
-            output_permutation[i] = second_permutation[first_permutation[i]];
+            output_permutation[i] = first_permutation[second_permutation[i]];
         },
         size, first_permutation, second_permutation, output_permutation);
 }
diff --git a/common/unified/matrix/scaled_permutation_kernels.cpp b/common/unified/matrix/scaled_permutation_kernels.cpp
index 46219d45d66..035eeea7ebc 100644
--- a/common/unified/matrix/scaled_permutation_kernels.cpp
+++ b/common/unified/matrix/scaled_permutation_kernels.cpp
@@ -78,10 +78,11 @@ void combine(std::shared_ptr<const DefaultExecutor> exec,
         [] GKO_KERNEL(auto i, auto first_scale, auto first_permutation,
                       auto second_scale, auto second_permutation,
                       auto output_permutation, auto output_scale) {
-            const auto first_permuted = first_permutation[i];
-            output_permutation[i] = second_permutation[first_permuted];
-            output_scale[first_permuted] =
-                first_scale[first_permuted] * second_scale[i];
+            const auto second_permuted = second_permutation[i];
+            const auto combined_permuted = first_permutation[second_permuted];
+            output_permutation[i] = combined_permuted;
+            output_scale[combined_permuted] =
+                first_scale[combined_permuted] * second_scale[second_permuted];
         },
         size, first_scale, first_permutation, second_scale, second_permutation,
         output_permutation, output_scale);
diff --git a/reference/matrix/permutation_kernels.cpp b/reference/matrix/permutation_kernels.cpp
index 7d295394904..ff5da03e232 100644
--- a/reference/matrix/permutation_kernels.cpp
+++ b/reference/matrix/permutation_kernels.cpp
@@ -58,8 +58,9 @@ void combine(std::shared_ptr<const DefaultExecutor> exec,
              const IndexType* second_permutation, size_type size,
              IndexType* output_permutation)
 {
+    // P_2 P_1 does a row permutation of P_1 with indices from P_2
     for (size_type i = 0; i < size; i++) {
-        output_permutation[i] = second_permutation[first_permutation[i]];
+        output_permutation[i] = first_permutation[second_permutation[i]];
     }
 }
 
diff --git a/reference/matrix/scaled_permutation_kernels.cpp b/reference/matrix/scaled_permutation_kernels.cpp
index e1d418204e8..d79e89ee424 100644
--- a/reference/matrix/scaled_permutation_kernels.cpp
+++ b/reference/matrix/scaled_permutation_kernels.cpp
@@ -67,11 +67,15 @@ void combine(std::shared_ptr<const DefaultExecutor> exec,
              IndexType* output_permutation)
 {
     // P_2 S_2 P_1 S_1 = P_2 P_1 S'_2 S_1 with S'_2 = P_1^-1 S_2 P_1^-T
+    // P_2 P_1 does a row permutation of P_1 with indices from P_2
     for (size_type i = 0; i < size; i++) {
-        const auto first_permuted = first_permutation[i];
-        output_permutation[i] = second_permutation[first_permuted];
-        output_scale[first_permuted] =
-            first_scale[first_permuted] * second_scale[i];
+        const auto second_permuted = second_permutation[i];
+        const auto combined_permuted = first_permutation[second_permuted];
+        output_permutation[i] = combined_permuted;
+        // output_scale[i] = first_scale[i] * second_scale[inv_first_perm[i]];
+        // second_perm[i] = inv_first_perm[combined_perm[i]];
+        output_scale[combined_permuted] =
+            first_scale[combined_permuted] * second_scale[second_permuted];
     }
 }
 
diff --git a/reference/test/matrix/permutation.cpp b/reference/test/matrix/permutation.cpp
index 6d19fa7a9e3..080c68f515e 100644
--- a/reference/test/matrix/permutation.cpp
+++ b/reference/test/matrix/permutation.cpp
@@ -61,6 +61,25 @@ class Permutation : public ::testing::Test {
 
     Permutation() : exec(gko::ReferenceExecutor::create()) {}
 
+    std::unique_ptr<gko::matrix::Dense<double>> ref_combine(
+        const gko::matrix::Permutation<index_type>* first,
+        const gko::matrix::Permutation<index_type>* second)
+    {
+        using Mtx = gko::matrix::Dense<double>;
+        const auto exec = first->get_executor();
+        gko::matrix_data<double, index_type> first_perm_data;
+        gko::matrix_data<double, index_type> second_perm_data;
+        first->write(first_perm_data);
+        second->write(second_perm_data);
+        const auto first_mtx = Mtx::create(exec);
+        const auto second_mtx = Mtx::create(exec);
+        first_mtx->read(first_perm_data);
+        second_mtx->read(second_perm_data);
+        auto combined_mtx = first_mtx->clone();
+        second_mtx->apply(first_mtx, combined_mtx);
+        return combined_mtx;
+    }
+
     std::shared_ptr<const gko::Executor> exec;
 };
 
@@ -84,17 +103,37 @@ TYPED_TEST(Permutation, Invert)
 
 TYPED_TEST(Permutation, Combine)
 {
+    using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    auto perm = gko::matrix::Permutation<index_type>::create(
+    using Vec = gko::matrix::Dense<double>;
+    const auto perm = gko::matrix::Permutation<index_type>::create(
         this->exec, gko::array<index_type>{this->exec, {1, 2, 0}});
-    auto perm2 = gko::matrix::Permutation<index_type>::create(
+    const auto perm2 = gko::matrix::Permutation<index_type>::create(
         this->exec, gko::array<index_type>{this->exec, {0, 2, 1}});
+    const auto ref_combined = this->ref_combine(perm.get(), perm2.get());
+
+    const auto combined = perm->combine(perm2);
+
+    GKO_ASSERT_MTX_NEAR(combined, ref_combined, 0.0);
+}
 
-    auto combined = perm->combine(perm2);
 
-    EXPECT_EQ(combined->get_const_permutation()[0], 2);
-    EXPECT_EQ(combined->get_const_permutation()[1], 1);
-    EXPECT_EQ(combined->get_const_permutation()[2], 0);
+TYPED_TEST(Permutation, CombineLarger)
+{
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Vec = gko::matrix::Dense<double>;
+    const auto perm = gko::matrix::Permutation<index_type>::create(
+        this->exec,
+        gko::array<index_type>{this->exec, {6, 2, 4, 0, 1, 5, 9, 8, 3, 7}});
+    const auto perm2 = gko::matrix::Permutation<index_type>::create(
+        this->exec,
+        gko::array<index_type>{this->exec, {9, 2, 1, 6, 3, 7, 8, 4, 0, 5}});
+    const auto ref_combined = this->ref_combine(perm.get(), perm2.get());
+
+    const auto combined = perm->combine(perm2);
+
+    GKO_ASSERT_MTX_NEAR(combined, ref_combined, 0.0);
 }
 
 
diff --git a/reference/test/matrix/scaled_permutation.cpp b/reference/test/matrix/scaled_permutation.cpp
index 8a5fbe9f6c5..49a2502c71d 100644
--- a/reference/test/matrix/scaled_permutation.cpp
+++ b/reference/test/matrix/scaled_permutation.cpp
@@ -67,6 +67,22 @@ class ScaledPermutation : public ::testing::Test {
                         gko::array<index_type>{this->exec, {1, 0}});
     }
 
+    std::unique_ptr<Vec> ref_combine(const Mtx* first, const Mtx* second)
+    {
+        const auto exec = first->get_executor();
+        gko::matrix_data<value_type, index_type> first_perm_data;
+        gko::matrix_data<value_type, index_type> second_perm_data;
+        first->write(first_perm_data);
+        second->write(second_perm_data);
+        const auto first_mtx = Vec::create(exec);
+        const auto second_mtx = Vec::create(exec);
+        first_mtx->read(first_perm_data);
+        second_mtx->read(second_perm_data);
+        auto combined_mtx = first_mtx->clone();
+        second_mtx->apply(first_mtx, combined_mtx);
+        return combined_mtx;
+    }
+
     std::shared_ptr<const gko::Executor> exec;
     std::unique_ptr<Mtx> perm3;
     std::unique_ptr<Mtx> perm2;
@@ -113,19 +129,43 @@ TYPED_TEST(ScaledPermutation, Combine)
 {
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
+    using Vec = typename TestFixture::Vec;
     using Mtx = typename TestFixture::Mtx;
-    auto other_perm = Mtx::create(
+    const auto other_perm = Mtx::create(
         this->exec, gko::array<value_type>{this->exec, {3.0, 5.0, 7.0}},
         gko::array<index_type>{this->exec, {1, 0, 2}});
+    const auto ref_combined =
+        this->ref_combine(this->perm3.get(), other_perm.get());
+
+    const auto combined = this->perm3->combine(other_perm);
+
+    GKO_ASSERT_MTX_NEAR(combined, ref_combined, 0.0);
+}
 
-    auto combined = this->perm3->combine(other_perm);
 
-    EXPECT_EQ(combined->get_const_permutation()[0], 0);
-    EXPECT_EQ(combined->get_const_permutation()[1], 2);
-    EXPECT_EQ(combined->get_const_permutation()[2], 1);
-    EXPECT_EQ(combined->get_const_scale()[0], value_type{7});
-    EXPECT_EQ(combined->get_const_scale()[1], value_type{6});
-    EXPECT_EQ(combined->get_const_scale()[2], value_type{20});
+TYPED_TEST(ScaledPermutation, CombineLarger)
+{
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    using Vec = typename TestFixture::Vec;
+    using Mtx = typename TestFixture::Mtx;
+    const auto perm = Mtx::create(
+        this->exec,
+        gko::array<value_type>{
+            this->exec,
+            {1.0, 2.0, 3.0, 5.0, 7.0, 11.0, 13.0, 17.0, 19.0, 23.0}},
+        gko::array<index_type>{this->exec, {6, 2, 4, 0, 1, 5, 9, 8, 3, 7}});
+    const auto perm2 = Mtx::create(
+        this->exec,
+        gko::array<value_type>{
+            this->exec,
+            {29.0, 31.0, 37.0, 41.0, 43.0, 47.0, 53.0, 59.0, 61.0, 67.0}},
+        gko::array<index_type>{this->exec, {9, 2, 1, 6, 3, 7, 8, 4, 0, 5}});
+    const auto ref_combined = this->ref_combine(perm.get(), perm2.get());
+
+    const auto combined = perm->combine(perm2);
+
+    GKO_ASSERT_MTX_NEAR(combined, ref_combined, 0.0);
 }
 
 

From 8004d2316023f259640cf105c224de5899f62ab0 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Fri, 27 Oct 2023 09:32:58 -0400
Subject: [PATCH 443/583] review updates

- const-correctness
- rename combine to compose
- rename invert to compute_inverse
- improve documentation
- add more tests
- simplify DPC++ kernel launch

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
Co-authored-by: Yuhsiang M. Tsai <yhmtsai@gmail.com>
Co-authored-by: Pratik Nayak <pratikvn@protonmail.com>
---
 common/cuda_hip/matrix/csr_kernels.hpp.inc    | 104 +++----
 common/unified/matrix/permutation_kernels.cpp |   4 +-
 .../matrix/scaled_permutation_kernels.cpp     |   4 +-
 core/base/dispatch_helper.hpp                 |   4 +-
 core/device_hooks/common_kernels.inc.cpp      |   4 +-
 core/matrix/csr.cpp                           |  21 +-
 core/matrix/permutation.cpp                   |   9 +-
 core/matrix/permutation.hpp                   |   2 +-
 core/matrix/permutation_kernels.hpp           |   6 +-
 core/matrix/scaled_permutation.cpp            |   8 +-
 core/matrix/scaled_permutation_kernels.hpp    |   6 +-
 dpcpp/matrix/csr_kernels.dp.cpp               | 288 +++++-------------
 include/ginkgo/core/matrix/csr.hpp            |   4 +-
 include/ginkgo/core/matrix/dense.hpp          |   4 +-
 include/ginkgo/core/matrix/permutation.hpp    |   8 +-
 .../ginkgo/core/matrix/scaled_permutation.hpp |  13 +-
 reference/matrix/permutation_kernels.cpp      |   5 +-
 .../matrix/scaled_permutation_kernels.cpp     |   5 +-
 reference/test/matrix/csr_kernels.cpp         |  13 +-
 reference/test/matrix/dense_kernels.cpp       |  34 ++-
 reference/test/matrix/permutation.cpp         |  56 +++-
 reference/test/matrix/scaled_permutation.cpp  |  41 ++-
 test/matrix/permutation_kernels.cpp           |   8 +-
 test/matrix/scaled_permutation_kernels.cpp    |   8 +-
 24 files changed, 303 insertions(+), 356 deletions(-)

diff --git a/common/cuda_hip/matrix/csr_kernels.hpp.inc b/common/cuda_hip/matrix/csr_kernels.hpp.inc
index 757cd13e8d6..9687678dc58 100644
--- a/common/cuda_hip/matrix/csr_kernels.hpp.inc
+++ b/common/cuda_hip/matrix/csr_kernels.hpp.inc
@@ -665,8 +665,8 @@ __global__ __launch_bounds__(default_block_size) void row_ptr_permute(
     if (tid >= num_rows) {
         return;
     }
-    auto in_row = permutation[tid];
-    auto out_row = tid;
+    const auto in_row = permutation[tid];
+    const auto out_row = tid;
     out_nnz[out_row] = in_row_ptrs[in_row + 1] - in_row_ptrs[in_row];
 }
 
@@ -680,8 +680,8 @@ __global__ __launch_bounds__(default_block_size) void inv_row_ptr_permute(
     if (tid >= num_rows) {
         return;
     }
-    auto in_row = tid;
-    auto out_row = permutation[tid];
+    const auto in_row = tid;
+    const auto out_row = permutation[tid];
     out_nnz[out_row] = in_row_ptrs[in_row + 1] - in_row_ptrs[in_row];
 }
 
@@ -699,12 +699,12 @@ __global__ __launch_bounds__(default_block_size) void row_permute(
     if (tid >= num_rows) {
         return;
     }
-    auto lane = threadIdx.x % subwarp_size;
-    auto in_row = permutation[tid];
-    auto out_row = tid;
-    auto in_begin = in_row_ptrs[in_row];
-    auto in_size = in_row_ptrs[in_row + 1] - in_begin;
-    auto out_begin = out_row_ptrs[out_row];
+    const auto lane = threadIdx.x % subwarp_size;
+    const auto in_row = permutation[tid];
+    const auto out_row = tid;
+    const auto in_begin = in_row_ptrs[in_row];
+    const auto in_size = in_row_ptrs[in_row + 1] - in_begin;
+    const auto out_begin = out_row_ptrs[out_row];
     for (IndexType i = lane; i < in_size; i += subwarp_size) {
         out_cols[out_begin + i] = in_cols[in_begin + i];
         out_vals[out_begin + i] = in_vals[in_begin + i];
@@ -725,12 +725,12 @@ __global__ __launch_bounds__(default_block_size) void inv_row_permute(
     if (tid >= num_rows) {
         return;
     }
-    auto lane = threadIdx.x % subwarp_size;
-    auto in_row = tid;
-    auto out_row = permutation[tid];
-    auto in_begin = in_row_ptrs[in_row];
-    auto in_size = in_row_ptrs[in_row + 1] - in_begin;
-    auto out_begin = out_row_ptrs[out_row];
+    const auto lane = threadIdx.x % subwarp_size;
+    const auto in_row = tid;
+    const auto out_row = permutation[tid];
+    const auto in_begin = in_row_ptrs[in_row];
+    const auto in_size = in_row_ptrs[in_row + 1] - in_begin;
+    const auto out_begin = out_row_ptrs[out_row];
     for (IndexType i = lane; i < in_size; i += subwarp_size) {
         out_cols[out_begin + i] = in_cols[in_begin + i];
         out_vals[out_begin + i] = in_vals[in_begin + i];
@@ -751,12 +751,12 @@ __global__ __launch_bounds__(default_block_size) void inv_symm_permute(
     if (tid >= num_rows) {
         return;
     }
-    auto lane = threadIdx.x % subwarp_size;
-    auto in_row = tid;
-    auto out_row = permutation[tid];
-    auto in_begin = in_row_ptrs[in_row];
-    auto in_size = in_row_ptrs[in_row + 1] - in_begin;
-    auto out_begin = out_row_ptrs[out_row];
+    const auto lane = threadIdx.x % subwarp_size;
+    const auto in_row = tid;
+    const auto out_row = permutation[tid];
+    const auto in_begin = in_row_ptrs[in_row];
+    const auto in_size = in_row_ptrs[in_row + 1] - in_begin;
+    const auto out_begin = out_row_ptrs[out_row];
     for (IndexType i = lane; i < in_size; i += subwarp_size) {
         out_cols[out_begin + i] = permutation[in_cols[in_begin + i]];
         out_vals[out_begin + i] = in_vals[in_begin + i];
@@ -778,12 +778,12 @@ __global__ __launch_bounds__(default_block_size) void inv_nonsymm_permute(
     if (tid >= num_rows) {
         return;
     }
-    auto lane = threadIdx.x % subwarp_size;
-    auto in_row = tid;
-    auto out_row = row_permutation[tid];
-    auto in_begin = in_row_ptrs[in_row];
-    auto in_size = in_row_ptrs[in_row + 1] - in_begin;
-    auto out_begin = out_row_ptrs[out_row];
+    const auto lane = threadIdx.x % subwarp_size;
+    const auto in_row = tid;
+    const auto out_row = row_permutation[tid];
+    const auto in_begin = in_row_ptrs[in_row];
+    const auto in_size = in_row_ptrs[in_row + 1] - in_begin;
+    const auto out_begin = out_row_ptrs[out_row];
     for (IndexType i = lane; i < in_size; i += subwarp_size) {
         out_cols[out_begin + i] = col_permutation[in_cols[in_begin + i]];
         out_vals[out_begin + i] = in_vals[in_begin + i];
@@ -805,12 +805,12 @@ __global__ __launch_bounds__(default_block_size) void row_scale_permute(
     if (tid >= num_rows) {
         return;
     }
-    auto lane = threadIdx.x % subwarp_size;
-    auto in_row = permutation[tid];
-    auto out_row = tid;
-    auto in_begin = in_row_ptrs[in_row];
-    auto in_size = in_row_ptrs[in_row + 1] - in_begin;
-    auto out_begin = out_row_ptrs[out_row];
+    const auto lane = threadIdx.x % subwarp_size;
+    const auto in_row = permutation[tid];
+    const auto out_row = tid;
+    const auto in_begin = in_row_ptrs[in_row];
+    const auto in_size = in_row_ptrs[in_row + 1] - in_begin;
+    const auto out_begin = out_row_ptrs[out_row];
     for (IndexType i = lane; i < in_size; i += subwarp_size) {
         out_cols[out_begin + i] = in_cols[in_begin + i];
         out_vals[out_begin + i] = in_vals[in_begin + i] * scale[in_row];
@@ -832,12 +832,12 @@ __global__ __launch_bounds__(default_block_size) void inv_row_scale_permute(
     if (tid >= num_rows) {
         return;
     }
-    auto lane = threadIdx.x % subwarp_size;
-    auto in_row = tid;
-    auto out_row = permutation[tid];
-    auto in_begin = in_row_ptrs[in_row];
-    auto in_size = in_row_ptrs[in_row + 1] - in_begin;
-    auto out_begin = out_row_ptrs[out_row];
+    const auto lane = threadIdx.x % subwarp_size;
+    const auto in_row = tid;
+    const auto out_row = permutation[tid];
+    const auto in_begin = in_row_ptrs[in_row];
+    const auto in_size = in_row_ptrs[in_row + 1] - in_begin;
+    const auto out_begin = out_row_ptrs[out_row];
     for (IndexType i = lane; i < in_size; i += subwarp_size) {
         out_cols[out_begin + i] = in_cols[in_begin + i];
         out_vals[out_begin + i] = in_vals[in_begin + i] / scale[out_row];
@@ -859,12 +859,12 @@ __global__ __launch_bounds__(default_block_size) void inv_symm_scale_permute(
     if (tid >= num_rows) {
         return;
     }
-    auto lane = threadIdx.x % subwarp_size;
-    auto in_row = tid;
-    auto out_row = permutation[tid];
-    auto in_begin = in_row_ptrs[in_row];
-    auto in_size = in_row_ptrs[in_row + 1] - in_begin;
-    auto out_begin = out_row_ptrs[out_row];
+    const auto lane = threadIdx.x % subwarp_size;
+    const auto in_row = tid;
+    const auto out_row = permutation[tid];
+    const auto in_begin = in_row_ptrs[in_row];
+    const auto in_size = in_row_ptrs[in_row + 1] - in_begin;
+    const auto out_begin = out_row_ptrs[out_row];
     for (IndexType i = lane; i < in_size; i += subwarp_size) {
         const auto out_col = permutation[in_cols[in_begin + i]];
         out_cols[out_begin + i] = out_col;
@@ -890,12 +890,12 @@ __global__ __launch_bounds__(default_block_size) void inv_nonsymm_scale_permute(
     if (tid >= num_rows) {
         return;
     }
-    auto lane = threadIdx.x % subwarp_size;
-    auto in_row = tid;
-    auto out_row = row_permutation[tid];
-    auto in_begin = in_row_ptrs[in_row];
-    auto in_size = in_row_ptrs[in_row + 1] - in_begin;
-    auto out_begin = out_row_ptrs[out_row];
+    const auto lane = threadIdx.x % subwarp_size;
+    const auto in_row = tid;
+    const auto out_row = row_permutation[tid];
+    const auto in_begin = in_row_ptrs[in_row];
+    const auto in_size = in_row_ptrs[in_row + 1] - in_begin;
+    const auto out_begin = out_row_ptrs[out_row];
     for (IndexType i = lane; i < in_size; i += subwarp_size) {
         const auto out_col = col_permutation[in_cols[in_begin + i]];
         out_cols[out_begin + i] = out_col;
diff --git a/common/unified/matrix/permutation_kernels.cpp b/common/unified/matrix/permutation_kernels.cpp
index 2bca5e26daa..e437737c524 100644
--- a/common/unified/matrix/permutation_kernels.cpp
+++ b/common/unified/matrix/permutation_kernels.cpp
@@ -62,7 +62,7 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PERMUTATION_INVERT_KERNEL);
 
 
 template <typename IndexType>
-void combine(std::shared_ptr<const DefaultExecutor> exec,
+void compose(std::shared_ptr<const DefaultExecutor> exec,
              const IndexType* first_permutation,
              const IndexType* second_permutation, size_type size,
              IndexType* output_permutation)
@@ -76,7 +76,7 @@ void combine(std::shared_ptr<const DefaultExecutor> exec,
         size, first_permutation, second_permutation, output_permutation);
 }
 
-GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PERMUTATION_COMBINE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PERMUTATION_COMPOSE_KERNEL);
 
 
 }  // namespace permutation
diff --git a/common/unified/matrix/scaled_permutation_kernels.cpp b/common/unified/matrix/scaled_permutation_kernels.cpp
index 035eeea7ebc..ff3bb55becb 100644
--- a/common/unified/matrix/scaled_permutation_kernels.cpp
+++ b/common/unified/matrix/scaled_permutation_kernels.cpp
@@ -67,7 +67,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
 
 
 template <typename ValueType, typename IndexType>
-void combine(std::shared_ptr<const DefaultExecutor> exec,
+void compose(std::shared_ptr<const DefaultExecutor> exec,
              const ValueType* first_scale, const IndexType* first_permutation,
              const ValueType* second_scale, const IndexType* second_permutation,
              size_type size, ValueType* output_scale,
@@ -89,7 +89,7 @@ void combine(std::shared_ptr<const DefaultExecutor> exec,
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_SCALED_PERMUTATION_COMBINE_KERNEL);
+    GKO_DECLARE_SCALED_PERMUTATION_COMPOSE_KERNEL);
 
 
 }  // namespace scaled_permutation
diff --git a/core/base/dispatch_helper.hpp b/core/base/dispatch_helper.hpp
index 9653c7f8622..7ca04107575 100644
--- a/core/base/dispatch_helper.hpp
+++ b/core/base/dispatch_helper.hpp
@@ -54,9 +54,9 @@ namespace gko {
  * @note this is the end case
  */
 template <typename T, typename Func, typename... Args>
-void run(T, Func, Args...)
+void run(T obj, Func, Args...)
 {
-    GKO_NOT_IMPLEMENTED;
+    GKO_NOT_SUPPORTED(obj);
 }
 
 /**
diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp
index 0c58f1a4c0f..e09bb6ca654 100644
--- a/core/device_hooks/common_kernels.inc.cpp
+++ b/core/device_hooks/common_kernels.inc.cpp
@@ -730,7 +730,7 @@ namespace permutation {
 
 
 GKO_STUB_INDEX_TYPE(GKO_DECLARE_PERMUTATION_INVERT_KERNEL);
-GKO_STUB_INDEX_TYPE(GKO_DECLARE_PERMUTATION_COMBINE_KERNEL);
+GKO_STUB_INDEX_TYPE(GKO_DECLARE_PERMUTATION_COMPOSE_KERNEL);
 
 
 }  // namespace permutation
@@ -740,7 +740,7 @@ namespace scaled_permutation {
 
 
 GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SCALED_PERMUTATION_INVERT_KERNEL);
-GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SCALED_PERMUTATION_COMBINE_KERNEL);
+GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SCALED_PERMUTATION_COMPOSE_KERNEL);
 
 
 }  // namespace scaled_permutation
diff --git a/core/matrix/csr.cpp b/core/matrix/csr.cpp
index 69261dd17e0..cac1795f084 100644
--- a/core/matrix/csr.cpp
+++ b/core/matrix/csr.cpp
@@ -541,15 +541,6 @@ std::unique_ptr<Csr<ValueType, IndexType>> Csr<ValueType, IndexType>::permute(
     if ((mode & permute_mode::symmetric) == permute_mode::none) {
         return this->clone();
     }
-    if ((mode & permute_mode::symmetric) == permute_mode::symmetric) {
-        GKO_ASSERT_IS_SQUARE_MATRIX(this);
-    }
-    if ((mode & permute_mode::rows) == permute_mode::rows) {
-        GKO_ASSERT_EQ(size[0], permutation->get_size()[0]);
-    }
-    if ((mode & permute_mode::columns) == permute_mode::columns) {
-        GKO_ASSERT_EQ(size[1], permutation->get_size()[0]);
-    }
     auto result = Csr::create(exec, size, nnz, this->get_strategy()->copy());
     auto local_permutation = make_temporary_clone(exec, permutation);
     std::unique_ptr<const Permutation<IndexType>> inv_permutation;
@@ -560,7 +551,7 @@ std::unique_ptr<Csr<ValueType, IndexType>> Csr<ValueType, IndexType>::permute(
     bool needs_inverse =
         (mode & permute_mode::inverse_columns) == permute_mode::columns;
     if (needs_inverse) {
-        inv_permutation = local_permutation->invert();
+        inv_permutation = local_permutation->compute_inverse();
         inv_perm_idxs = inv_permutation->get_const_permutation();
     }
     switch (mode) {
@@ -613,8 +604,8 @@ std::unique_ptr<Csr<ValueType, IndexType>> Csr<ValueType, IndexType>::permute(
             local_col_permutation->get_const_permutation(), this,
             result.get()));
     } else {
-        const auto inv_row_perm = local_row_permutation->invert();
-        const auto inv_col_perm = local_col_permutation->invert();
+        const auto inv_row_perm = local_row_permutation->compute_inverse();
+        const auto inv_col_perm = local_col_permutation->compute_inverse();
         exec->run(csr::make_inv_nonsymm_permute(
             inv_row_perm->get_const_permutation(),
             inv_col_perm->get_const_permutation(), this, result.get()));
@@ -650,7 +641,7 @@ Csr<ValueType, IndexType>::scale_permute(
     bool needs_inverse =
         (mode & permute_mode::inverse_columns) == permute_mode::columns;
     if (needs_inverse) {
-        inv_permutation = local_permutation->invert();
+        inv_permutation = local_permutation->compute_inverse();
         inv_scale_factors = inv_permutation->get_const_scale();
         inv_perm_idxs = inv_permutation->get_const_permutation();
     }
@@ -713,8 +704,8 @@ Csr<ValueType, IndexType>::scale_permute(
             local_col_permutation->get_const_permutation(), this,
             result.get()));
     } else {
-        const auto inv_row_perm = local_row_permutation->invert();
-        const auto inv_col_perm = local_col_permutation->invert();
+        const auto inv_row_perm = local_row_permutation->compute_inverse();
+        const auto inv_col_perm = local_col_permutation->compute_inverse();
         exec->run(csr::make_inv_nonsymm_scale_permute(
             inv_row_perm->get_const_scale(),
             inv_row_perm->get_const_permutation(),
diff --git a/core/matrix/permutation.cpp b/core/matrix/permutation.cpp
index 6d8d8e037a6..3f13ef9711f 100644
--- a/core/matrix/permutation.cpp
+++ b/core/matrix/permutation.cpp
@@ -50,7 +50,7 @@ namespace permutation {
 
 
 GKO_REGISTER_OPERATION(invert, permutation::invert);
-GKO_REGISTER_OPERATION(combine, permutation::combine);
+GKO_REGISTER_OPERATION(compose, permutation::compose);
 
 
 }  // namespace permutation
@@ -210,7 +210,8 @@ void Permutation<IndexType>::set_permute_mask(mask_type permute_mask)
 
 
 template <typename IndexType>
-std::unique_ptr<Permutation<IndexType>> Permutation<IndexType>::invert() const
+std::unique_ptr<Permutation<IndexType>>
+Permutation<IndexType>::compute_inverse() const
 {
     const auto exec = this->get_executor();
     const auto size = this->get_size()[0];
@@ -222,7 +223,7 @@ std::unique_ptr<Permutation<IndexType>> Permutation<IndexType>::invert() const
 
 
 template <typename IndexType>
-std::unique_ptr<Permutation<IndexType>> Permutation<IndexType>::combine(
+std::unique_ptr<Permutation<IndexType>> Permutation<IndexType>::compose(
     ptr_param<const Permutation<IndexType>> other) const
 {
     GKO_ASSERT_EQUAL_DIMENSIONS(this, other);
@@ -230,7 +231,7 @@ std::unique_ptr<Permutation<IndexType>> Permutation<IndexType>::combine(
     const auto size = this->get_size()[0];
     const auto local_other = make_temporary_clone(exec, other);
     auto result = Permutation<IndexType>::create(exec, size);
-    exec->run(permutation::make_combine(this->get_const_permutation(),
+    exec->run(permutation::make_compose(this->get_const_permutation(),
                                         local_other->get_const_permutation(),
                                         size, result->get_permutation()));
     return result;
diff --git a/core/matrix/permutation.hpp b/core/matrix/permutation.hpp
index a424c695842..08a1d731b4e 100644
--- a/core/matrix/permutation.hpp
+++ b/core/matrix/permutation.hpp
@@ -56,4 +56,4 @@ void validate_permute_dimensions(dim<2> size, dim<2> permutation_size,
 }  // namespace gko
 
 
-#endif  // GKO_CORE_MATRIX_PERMUTATION_HPP_
\ No newline at end of file
+#endif  // GKO_CORE_MATRIX_PERMUTATION_HPP_
diff --git a/core/matrix/permutation_kernels.hpp b/core/matrix/permutation_kernels.hpp
index b5186fdaaf9..957121f4c41 100644
--- a/core/matrix/permutation_kernels.hpp
+++ b/core/matrix/permutation_kernels.hpp
@@ -60,8 +60,8 @@ namespace kernels {
                 const IndexType* permutation_indices, size_type size, \
                 IndexType* inv_permutation)
 
-#define GKO_DECLARE_PERMUTATION_COMBINE_KERNEL(IndexType)             \
-    void combine(std::shared_ptr<const DefaultExecutor> exec,         \
+#define GKO_DECLARE_PERMUTATION_COMPOSE_KERNEL(IndexType)             \
+    void compose(std::shared_ptr<const DefaultExecutor> exec,         \
                  const IndexType* first_permutation,                  \
                  const IndexType* second_permutation, size_type size, \
                  IndexType* combined_permutation)
@@ -71,7 +71,7 @@ namespace kernels {
     template <typename IndexType>                     \
     GKO_DECLARE_PERMUTATION_INVERT_KERNEL(IndexType); \
     template <typename IndexType>                     \
-    GKO_DECLARE_PERMUTATION_COMBINE_KERNEL(IndexType)
+    GKO_DECLARE_PERMUTATION_COMPOSE_KERNEL(IndexType)
 
 
 GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(permutation,
diff --git a/core/matrix/scaled_permutation.cpp b/core/matrix/scaled_permutation.cpp
index cb3b5f9bf6c..f20442fc585 100644
--- a/core/matrix/scaled_permutation.cpp
+++ b/core/matrix/scaled_permutation.cpp
@@ -48,7 +48,7 @@ namespace {
 
 
 GKO_REGISTER_OPERATION(invert, scaled_permutation::invert);
-GKO_REGISTER_OPERATION(combine, scaled_permutation::combine);
+GKO_REGISTER_OPERATION(compose, scaled_permutation::compose);
 
 
 }  // namespace
@@ -127,7 +127,7 @@ ScaledPermutation<ValueType, IndexType>::create_const(
 
 template <typename ValueType, typename IndexType>
 std::unique_ptr<ScaledPermutation<ValueType, IndexType>>
-ScaledPermutation<ValueType, IndexType>::invert() const
+ScaledPermutation<ValueType, IndexType>::compute_inverse() const
 {
     const auto exec = this->get_executor();
     const auto size = this->get_size()[0];
@@ -141,7 +141,7 @@ ScaledPermutation<ValueType, IndexType>::invert() const
 
 template <typename ValueType, typename IndexType>
 std::unique_ptr<ScaledPermutation<ValueType, IndexType>>
-ScaledPermutation<ValueType, IndexType>::combine(
+ScaledPermutation<ValueType, IndexType>::compose(
     ptr_param<const ScaledPermutation> other) const
 {
     GKO_ASSERT_EQUAL_DIMENSIONS(this, other);
@@ -149,7 +149,7 @@ ScaledPermutation<ValueType, IndexType>::combine(
     const auto size = this->get_size()[0];
     const auto local_other = make_temporary_clone(exec, other);
     auto result = ScaledPermutation::create(exec, size);
-    exec->run(scaled_permutation::make_combine(
+    exec->run(scaled_permutation::make_compose(
         this->get_const_scale(), this->get_const_permutation(),
         local_other->get_const_scale(), local_other->get_const_permutation(),
         size, result->get_scale(), result->get_permutation()));
diff --git a/core/matrix/scaled_permutation_kernels.hpp b/core/matrix/scaled_permutation_kernels.hpp
index 9aa5421fd07..8f247ac33d1 100644
--- a/core/matrix/scaled_permutation_kernels.hpp
+++ b/core/matrix/scaled_permutation_kernels.hpp
@@ -50,8 +50,8 @@ namespace kernels {
                 const IndexType* input_permutation, size_type size,        \
                 ValueType* output_scale, IndexType* output_permutation)
 
-#define GKO_DECLARE_SCALED_PERMUTATION_COMBINE_KERNEL(ValueType, IndexType) \
-    void combine(std::shared_ptr<const DefaultExecutor> exec,               \
+#define GKO_DECLARE_SCALED_PERMUTATION_COMPOSE_KERNEL(ValueType, IndexType) \
+    void compose(std::shared_ptr<const DefaultExecutor> exec,               \
                  const ValueType* first_scale,                              \
                  const IndexType* first_permutation,                        \
                  const ValueType* second_scale,                             \
@@ -63,7 +63,7 @@ namespace kernels {
     template <typename ValueType, typename IndexType>                   \
     GKO_DECLARE_SCALED_PERMUTATION_INVERT_KERNEL(ValueType, IndexType); \
     template <typename ValueType, typename IndexType>                   \
-    GKO_DECLARE_SCALED_PERMUTATION_COMBINE_KERNEL(ValueType, IndexType)
+    GKO_DECLARE_SCALED_PERMUTATION_COMPOSE_KERNEL(ValueType, IndexType)
 
 
 GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(scaled_permutation,
diff --git a/dpcpp/matrix/csr_kernels.dp.cpp b/dpcpp/matrix/csr_kernels.dp.cpp
index ead667d96a7..a9c3c18cabd 100644
--- a/dpcpp/matrix/csr_kernels.dp.cpp
+++ b/dpcpp/matrix/csr_kernels.dp.cpp
@@ -982,7 +982,8 @@ void inv_row_ptr_permute_kernel(size_type num_rows,
 GKO_ENABLE_DEFAULT_HOST(inv_row_ptr_permute_kernel, inv_row_ptr_permute_kernel);
 
 
-template <int subgroup_size, typename ValueType, typename IndexType>
+template <int subgroup_size = config::warp_size, typename ValueType,
+          typename IndexType>
 void row_permute_kernel(size_type num_rows,
                         const IndexType* __restrict__ permutation,
                         const IndexType* __restrict__ in_row_ptrs,
@@ -997,40 +998,23 @@ void row_permute_kernel(size_type num_rows,
     if (tid >= num_rows) {
         return;
     }
-    auto lane = item_ct1.get_local_id(2) % subgroup_size;
-    auto in_row = permutation[tid];
-    auto out_row = tid;
-    auto in_begin = in_row_ptrs[in_row];
-    auto in_size = in_row_ptrs[in_row + 1] - in_begin;
-    auto out_begin = out_row_ptrs[out_row];
+    const auto lane = item_ct1.get_local_id(2) % subgroup_size;
+    const auto in_row = permutation[tid];
+    const auto out_row = tid;
+    const auto in_begin = in_row_ptrs[in_row];
+    const auto in_size = in_row_ptrs[in_row + 1] - in_begin;
+    const auto out_begin = out_row_ptrs[out_row];
     for (IndexType i = lane; i < in_size; i += subgroup_size) {
         out_cols[out_begin + i] = in_cols[in_begin + i];
         out_vals[out_begin + i] = in_vals[in_begin + i];
     }
 }
 
-template <int subgroup_size, typename ValueType, typename IndexType>
-void row_permute_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                        sycl::queue* queue, size_type num_rows,
-                        const IndexType* permutation,
-                        const IndexType* in_row_ptrs, const IndexType* in_cols,
-                        const ValueType* in_vals, const IndexType* out_row_ptrs,
-                        IndexType* out_cols, ValueType* out_vals)
-{
-    queue->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(sycl_nd_range(grid, block),
-                         [=](sycl::nd_item<3> item_ct1)
-                             [[sycl::reqd_sub_group_size(subgroup_size)]] {
-                                 row_permute_kernel<subgroup_size>(
-                                     num_rows, permutation, in_row_ptrs,
-                                     in_cols, in_vals, out_row_ptrs, out_cols,
-                                     out_vals, item_ct1);
-                             });
-    });
-}
+GKO_ENABLE_DEFAULT_HOST(row_ptr_permute_kernel, row_ptr_permute_kernel);
 
 
-template <int subgroup_size, typename ValueType, typename IndexType>
+template <int subgroup_size = config::warp_size, typename ValueType,
+          typename IndexType>
 void inv_row_permute_kernel(size_type num_rows,
                             const IndexType* __restrict__ permutation,
                             const IndexType* __restrict__ in_row_ptrs,
@@ -1045,41 +1029,23 @@ void inv_row_permute_kernel(size_type num_rows,
     if (tid >= num_rows) {
         return;
     }
-    auto lane = item_ct1.get_local_id(2) % subgroup_size;
-    auto in_row = tid;
-    auto out_row = permutation[tid];
-    auto in_begin = in_row_ptrs[in_row];
-    auto in_size = in_row_ptrs[in_row + 1] - in_begin;
-    auto out_begin = out_row_ptrs[out_row];
+    const auto lane = item_ct1.get_local_id(2) % subgroup_size;
+    const auto in_row = tid;
+    const auto out_row = permutation[tid];
+    const auto in_begin = in_row_ptrs[in_row];
+    const auto in_size = in_row_ptrs[in_row + 1] - in_begin;
+    const auto out_begin = out_row_ptrs[out_row];
     for (IndexType i = lane; i < in_size; i += subgroup_size) {
         out_cols[out_begin + i] = in_cols[in_begin + i];
         out_vals[out_begin + i] = in_vals[in_begin + i];
     }
 }
 
-template <int subgroup_size, typename ValueType, typename IndexType>
-void inv_row_permute_kernel(dim3 grid, dim3 block,
-                            size_type dynamic_shared_memory, sycl::queue* queue,
-                            size_type num_rows, const IndexType* permutation,
-                            const IndexType* in_row_ptrs,
-                            const IndexType* in_cols, const ValueType* in_vals,
-                            const IndexType* out_row_ptrs, IndexType* out_cols,
-                            ValueType* out_vals)
-{
-    queue->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(sycl_nd_range(grid, block),
-                         [=](sycl::nd_item<3> item_ct1)
-                             [[sycl::reqd_sub_group_size(subgroup_size)]] {
-                                 inv_row_permute_kernel<subgroup_size>(
-                                     num_rows, permutation, in_row_ptrs,
-                                     in_cols, in_vals, out_row_ptrs, out_cols,
-                                     out_vals, item_ct1);
-                             });
-    });
-}
+GKO_ENABLE_DEFAULT_HOST(inv_row_permute_kernel, inv_row_permute_kernel);
 
 
-template <int subgroup_size, typename ValueType, typename IndexType>
+template <int subgroup_size = config::warp_size, typename ValueType,
+          typename IndexType>
 void inv_symm_permute_kernel(size_type num_rows,
                              const IndexType* __restrict__ permutation,
                              const IndexType* __restrict__ in_row_ptrs,
@@ -1094,20 +1060,23 @@ void inv_symm_permute_kernel(size_type num_rows,
     if (tid >= num_rows) {
         return;
     }
-    auto lane = item_ct1.get_local_id(2) % subgroup_size;
-    auto in_row = tid;
-    auto out_row = permutation[tid];
-    auto in_begin = in_row_ptrs[in_row];
-    auto in_size = in_row_ptrs[in_row + 1] - in_begin;
-    auto out_begin = out_row_ptrs[out_row];
+    const auto lane = item_ct1.get_local_id(2) % subgroup_size;
+    const auto in_row = tid;
+    const auto out_row = permutation[tid];
+    const auto in_begin = in_row_ptrs[in_row];
+    const auto in_size = in_row_ptrs[in_row + 1] - in_begin;
+    const auto out_begin = out_row_ptrs[out_row];
     for (IndexType i = lane; i < in_size; i += subgroup_size) {
         out_cols[out_begin + i] = permutation[in_cols[in_begin + i]];
         out_vals[out_begin + i] = in_vals[in_begin + i];
     }
 }
 
+GKO_ENABLE_DEFAULT_HOST(inv_symm_permute_kernel, inv_symm_permute_kernel);
+
 
-template <int subgroup_size, typename ValueType, typename IndexType>
+template <int subgroup_size = config::warp_size, typename ValueType,
+          typename IndexType>
 void inv_nonsymm_permute_kernel(size_type num_rows,
                                 const IndexType* __restrict__ row_permutation,
                                 const IndexType* __restrict__ col_permutation,
@@ -1135,51 +1104,11 @@ void inv_nonsymm_permute_kernel(size_type num_rows,
     }
 }
 
-template <int subgroup_size, typename ValueType, typename IndexType>
-void inv_symm_permute_kernel(dim3 grid, dim3 block,
-                             size_type dynamic_shared_memory,
-                             sycl::queue* queue, size_type num_rows,
-                             const IndexType* permutation,
-                             const IndexType* in_row_ptrs,
-                             const IndexType* in_cols, const ValueType* in_vals,
-                             const IndexType* out_row_ptrs, IndexType* out_cols,
-                             ValueType* out_vals)
-{
-    queue->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(sycl_nd_range(grid, block),
-                         [=](sycl::nd_item<3> item_ct1)
-                             [[sycl::reqd_sub_group_size(subgroup_size)]] {
-                                 inv_symm_permute_kernel<subgroup_size>(
-                                     num_rows, permutation, in_row_ptrs,
-                                     in_cols, in_vals, out_row_ptrs, out_cols,
-                                     out_vals, item_ct1);
-                             });
-    });
-}
-
-template <int subgroup_size, typename ValueType, typename IndexType>
-void inv_nonsymm_permute_kernel(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
-    size_type num_rows, const IndexType* row_permutation,
-    const IndexType* col_permutation, const IndexType* in_row_ptrs,
-    const IndexType* in_cols, const ValueType* in_vals,
-    const IndexType* out_row_ptrs, IndexType* out_cols, ValueType* out_vals)
-{
-    queue->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(sycl_nd_range(grid, block),
-                         [=](sycl::nd_item<3> item_ct1)
-                             [[sycl::reqd_sub_group_size(subgroup_size)]] {
-                                 inv_nonsymm_permute_kernel<subgroup_size>(
-                                     num_rows, row_permutation, col_permutation,
-                                     in_row_ptrs, in_cols, in_vals,
-                                     out_row_ptrs, out_cols, out_vals,
-                                     item_ct1);
-                             });
-    });
-}
+GKO_ENABLE_DEFAULT_HOST(inv_nonsymm_permute_kernel, inv_nonsymm_permute_kernel);
 
 
-template <int subgroup_size, typename ValueType, typename IndexType>
+template <int subgroup_size = config::warp_size, typename ValueType,
+          typename IndexType>
 void row_scale_permute_kernel(size_type num_rows,
                               const ValueType* __restrict__ scale,
                               const IndexType* __restrict__ permutation,
@@ -1195,38 +1124,23 @@ void row_scale_permute_kernel(size_type num_rows,
     if (tid >= num_rows) {
         return;
     }
-    auto lane = item_ct1.get_local_id(2) % subgroup_size;
-    auto in_row = permutation[tid];
-    auto out_row = tid;
-    auto in_begin = in_row_ptrs[in_row];
-    auto in_size = in_row_ptrs[in_row + 1] - in_begin;
-    auto out_begin = out_row_ptrs[out_row];
+    const auto lane = item_ct1.get_local_id(2) % subgroup_size;
+    const auto in_row = permutation[tid];
+    const auto out_row = tid;
+    const auto in_begin = in_row_ptrs[in_row];
+    const auto in_size = in_row_ptrs[in_row + 1] - in_begin;
+    const auto out_begin = out_row_ptrs[out_row];
     for (IndexType i = lane; i < in_size; i += subgroup_size) {
         out_cols[out_begin + i] = in_cols[in_begin + i];
         out_vals[out_begin + i] = in_vals[in_begin + i] * scale[in_row];
     }
 }
 
-template <int subgroup_size, typename ValueType, typename IndexType>
-void row_scale_permute_kernel(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
-    size_type num_rows, const ValueType* scale, const IndexType* permutation,
-    const IndexType* in_row_ptrs, const IndexType* in_cols,
-    const ValueType* in_vals, const IndexType* out_row_ptrs,
-    IndexType* out_cols, ValueType* out_vals)
-{
-    queue->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
-                row_scale_permute_kernel<subgroup_size>(
-                    num_rows, scale, permutation, in_row_ptrs, in_cols, in_vals,
-                    out_row_ptrs, out_cols, out_vals, item_ct1);
-            });
-    });
-}
+GKO_ENABLE_DEFAULT_HOST(row_scale_permute_kernel, row_scale_permute_kernel);
 
 
-template <int subgroup_size, typename ValueType, typename IndexType>
+template <int subgroup_size = config::warp_size, typename ValueType,
+          typename IndexType>
 void inv_row_scale_permute_kernel(size_type num_rows,
                                   const ValueType* __restrict__ scale,
                                   const IndexType* __restrict__ permutation,
@@ -1242,38 +1156,24 @@ void inv_row_scale_permute_kernel(size_type num_rows,
     if (tid >= num_rows) {
         return;
     }
-    auto lane = item_ct1.get_local_id(2) % subgroup_size;
-    auto in_row = tid;
-    auto out_row = permutation[tid];
-    auto in_begin = in_row_ptrs[in_row];
-    auto in_size = in_row_ptrs[in_row + 1] - in_begin;
-    auto out_begin = out_row_ptrs[out_row];
+    const auto lane = item_ct1.get_local_id(2) % subgroup_size;
+    const auto in_row = tid;
+    const auto out_row = permutation[tid];
+    const auto in_begin = in_row_ptrs[in_row];
+    const auto in_size = in_row_ptrs[in_row + 1] - in_begin;
+    const auto out_begin = out_row_ptrs[out_row];
     for (IndexType i = lane; i < in_size; i += subgroup_size) {
         out_cols[out_begin + i] = in_cols[in_begin + i];
         out_vals[out_begin + i] = in_vals[in_begin + i] / scale[out_row];
     }
 }
 
-template <int subgroup_size, typename ValueType, typename IndexType>
-void inv_row_scale_permute_kernel(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
-    size_type num_rows, const ValueType* scale, const IndexType* permutation,
-    const IndexType* in_row_ptrs, const IndexType* in_cols,
-    const ValueType* in_vals, const IndexType* out_row_ptrs,
-    IndexType* out_cols, ValueType* out_vals)
-{
-    queue->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
-                inv_row_scale_permute_kernel<subgroup_size>(
-                    num_rows, scale, permutation, in_row_ptrs, in_cols, in_vals,
-                    out_row_ptrs, out_cols, out_vals, item_ct1);
-            });
-    });
-}
+GKO_ENABLE_DEFAULT_HOST(inv_row_scale_permute_kernel,
+                        inv_row_scale_permute_kernel);
 
 
-template <int subgroup_size, typename ValueType, typename IndexType>
+template <int subgroup_size = config::warp_size, typename ValueType,
+          typename IndexType>
 void inv_symm_scale_permute_kernel(size_type num_rows,
                                    const ValueType* __restrict__ scale,
                                    const IndexType* __restrict__ permutation,
@@ -1289,12 +1189,12 @@ void inv_symm_scale_permute_kernel(size_type num_rows,
     if (tid >= num_rows) {
         return;
     }
-    auto lane = item_ct1.get_local_id(2) % subgroup_size;
-    auto in_row = tid;
-    auto out_row = permutation[tid];
-    auto in_begin = in_row_ptrs[in_row];
-    auto in_size = in_row_ptrs[in_row + 1] - in_begin;
-    auto out_begin = out_row_ptrs[out_row];
+    const auto lane = item_ct1.get_local_id(2) % subgroup_size;
+    const auto in_row = tid;
+    const auto out_row = permutation[tid];
+    const auto in_begin = in_row_ptrs[in_row];
+    const auto in_size = in_row_ptrs[in_row + 1] - in_begin;
+    const auto out_begin = out_row_ptrs[out_row];
     for (IndexType i = lane; i < in_size; i += subgroup_size) {
         const auto out_col = permutation[in_cols[in_begin + i]];
         out_cols[out_begin + i] = out_col;
@@ -1303,8 +1203,12 @@ void inv_symm_scale_permute_kernel(size_type num_rows,
     }
 }
 
+GKO_ENABLE_DEFAULT_HOST(inv_symm_scale_permute_kernel,
+                        inv_symm_scale_permute_kernel);
 
-template <int subgroup_size, typename ValueType, typename IndexType>
+
+template <int subgroup_size = config::warp_size, typename ValueType,
+          typename IndexType>
 void inv_nonsymm_scale_permute_kernel(
     size_type num_rows, const ValueType* __restrict__ row_scale,
     const IndexType* __restrict__ row_permutation,
@@ -1321,12 +1225,12 @@ void inv_nonsymm_scale_permute_kernel(
     if (tid >= num_rows) {
         return;
     }
-    auto lane = item_ct1.get_local_id(2) % subgroup_size;
-    auto in_row = tid;
-    auto out_row = row_permutation[tid];
-    auto in_begin = in_row_ptrs[in_row];
-    auto in_size = in_row_ptrs[in_row + 1] - in_begin;
-    auto out_begin = out_row_ptrs[out_row];
+    const auto lane = item_ct1.get_local_id(2) % subgroup_size;
+    const auto in_row = tid;
+    const auto out_row = row_permutation[tid];
+    const auto in_begin = in_row_ptrs[in_row];
+    const auto in_size = in_row_ptrs[in_row + 1] - in_begin;
+    const auto out_begin = out_row_ptrs[out_row];
     for (IndexType i = lane; i < in_size; i += subgroup_size) {
         const auto out_col = col_permutation[in_cols[in_begin + i]];
         out_cols[out_begin + i] = out_col;
@@ -1335,43 +1239,9 @@ void inv_nonsymm_scale_permute_kernel(
     }
 }
 
-template <int subgroup_size, typename ValueType, typename IndexType>
-void inv_symm_scale_permute_kernel(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
-    size_type num_rows, const ValueType* scale, const IndexType* permutation,
-    const IndexType* in_row_ptrs, const IndexType* in_cols,
-    const ValueType* in_vals, const IndexType* out_row_ptrs,
-    IndexType* out_cols, ValueType* out_vals)
-{
-    queue->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
-                inv_symm_scale_permute_kernel<subgroup_size>(
-                    num_rows, scale, permutation, in_row_ptrs, in_cols, in_vals,
-                    out_row_ptrs, out_cols, out_vals, item_ct1);
-            });
-    });
-}
+GKO_ENABLE_DEFAULT_HOST(inv_nonsymm_scale_permute_kernel,
+                        inv_nonsymm_scale_permute_kernel);
 
-template <int subgroup_size, typename ValueType, typename IndexType>
-void inv_nonsymm_scale_permute_kernel(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
-    size_type num_rows, const ValueType* row_scale,
-    const IndexType* row_permutation, const ValueType* col_scale,
-    const IndexType* col_permutation, const IndexType* in_row_ptrs,
-    const IndexType* in_cols, const ValueType* in_vals,
-    const IndexType* out_row_ptrs, IndexType* out_cols, ValueType* out_vals)
-{
-    queue->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
-                inv_nonsymm_scale_permute_kernel<subgroup_size>(
-                    num_rows, row_scale, row_permutation, col_scale,
-                    col_permutation, in_row_ptrs, in_cols, in_vals,
-                    out_row_ptrs, out_cols, out_vals, item_ct1);
-            });
-    });
-}
 
 namespace host_kernel {
 
@@ -2506,7 +2376,7 @@ void inv_symm_permute(std::shared_ptr<const DpcppExecutor> exec,
                                        num_rows + 1);
     auto copy_num_blocks =
         ceildiv(num_rows, default_block_size / config::warp_size);
-    inv_symm_permute_kernel<config::warp_size>(
+    inv_symm_permute_kernel(
         copy_num_blocks, default_block_size, 0, exec->get_queue(), num_rows,
         perm, orig->get_const_row_ptrs(), orig->get_const_col_idxs(),
         orig->get_const_values(), permuted->get_row_ptrs(),
@@ -2532,7 +2402,7 @@ void inv_nonsymm_permute(std::shared_ptr<const DpcppExecutor> exec,
                                        num_rows + 1);
     auto copy_num_blocks =
         ceildiv(num_rows, default_block_size / config::warp_size);
-    inv_nonsymm_permute_kernel<config::warp_size>(
+    inv_nonsymm_permute_kernel(
         copy_num_blocks, default_block_size, 0, exec->get_queue(), num_rows,
         row_perm, col_perm, orig->get_const_row_ptrs(),
         orig->get_const_col_idxs(), orig->get_const_values(),
@@ -2559,7 +2429,7 @@ void row_permute(std::shared_ptr<const DpcppExecutor> exec,
                                        num_rows + 1);
     auto copy_num_blocks =
         ceildiv(num_rows, default_block_size / config::warp_size);
-    row_permute_kernel<config::warp_size>(
+    row_permute_kernel(
         copy_num_blocks, default_block_size, 0, exec->get_queue(), num_rows,
         perm, orig->get_const_row_ptrs(), orig->get_const_col_idxs(),
         orig->get_const_values(), row_permuted->get_row_ptrs(),
@@ -2585,7 +2455,7 @@ void inv_row_permute(std::shared_ptr<const DpcppExecutor> exec,
                                        num_rows + 1);
     auto copy_num_blocks =
         ceildiv(num_rows, default_block_size / config::warp_size);
-    inv_row_permute_kernel<config::warp_size>(
+    inv_row_permute_kernel(
         copy_num_blocks, default_block_size, 0, exec->get_queue(), num_rows,
         perm, orig->get_const_row_ptrs(), orig->get_const_col_idxs(),
         orig->get_const_values(), row_permuted->get_row_ptrs(),
@@ -2611,7 +2481,7 @@ void inv_symm_scale_permute(std::shared_ptr<const DpcppExecutor> exec,
                                        num_rows + 1);
     auto copy_num_blocks =
         ceildiv(num_rows, default_block_size / config::warp_size);
-    inv_symm_scale_permute_kernel<config::warp_size>(
+    inv_symm_scale_permute_kernel(
         copy_num_blocks, default_block_size, 0, exec->get_queue(), num_rows,
         scale, perm, orig->get_const_row_ptrs(), orig->get_const_col_idxs(),
         orig->get_const_values(), permuted->get_row_ptrs(),
@@ -2640,7 +2510,7 @@ void inv_nonsymm_scale_permute(std::shared_ptr<const DpcppExecutor> exec,
                                        num_rows + 1);
     auto copy_num_blocks =
         ceildiv(num_rows, default_block_size / config::warp_size);
-    inv_nonsymm_scale_permute_kernel<config::warp_size>(
+    inv_nonsymm_scale_permute_kernel(
         copy_num_blocks, default_block_size, 0, exec->get_queue(), num_rows,
         row_scale, row_perm, col_scale, col_perm, orig->get_const_row_ptrs(),
         orig->get_const_col_idxs(), orig->get_const_values(),
@@ -2667,7 +2537,7 @@ void row_scale_permute(std::shared_ptr<const DpcppExecutor> exec,
                                        num_rows + 1);
     auto copy_num_blocks =
         ceildiv(num_rows, default_block_size / config::warp_size);
-    row_scale_permute_kernel<config::warp_size>(
+    row_scale_permute_kernel(
         copy_num_blocks, default_block_size, 0, exec->get_queue(), num_rows,
         scale, perm, orig->get_const_row_ptrs(), orig->get_const_col_idxs(),
         orig->get_const_values(), row_permuted->get_row_ptrs(),
@@ -2693,7 +2563,7 @@ void inv_row_scale_permute(std::shared_ptr<const DpcppExecutor> exec,
                                        num_rows + 1);
     auto copy_num_blocks =
         ceildiv(num_rows, default_block_size / config::warp_size);
-    inv_row_scale_permute_kernel<config::warp_size>(
+    inv_row_scale_permute_kernel(
         copy_num_blocks, default_block_size, 0, exec->get_queue(), num_rows,
         scale, perm, orig->get_const_row_ptrs(), orig->get_const_col_idxs(),
         orig->get_const_values(), row_permuted->get_row_ptrs(),
diff --git a/include/ginkgo/core/matrix/csr.hpp b/include/ginkgo/core/matrix/csr.hpp
index 366e2994f0a..63ee74d6dcc 100644
--- a/include/ginkgo/core/matrix/csr.hpp
+++ b/include/ginkgo/core/matrix/csr.hpp
@@ -39,6 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/lin_op.hpp>
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/matrix/permutation.hpp>
+#include <ginkgo/core/matrix/scaled_permutation.hpp>
 
 
 namespace gko {
@@ -60,9 +61,6 @@ class Ell;
 template <typename ValueType, typename IndexType>
 class Hybrid;
 
-template <typename ValueType, typename IndexType>
-class ScaledPermutation;
-
 template <typename ValueType, typename IndexType>
 class Sellp;
 
diff --git a/include/ginkgo/core/matrix/dense.hpp b/include/ginkgo/core/matrix/dense.hpp
index a897430fce7..7d68f45f063 100644
--- a/include/ginkgo/core/matrix/dense.hpp
+++ b/include/ginkgo/core/matrix/dense.hpp
@@ -46,6 +46,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/base/utils.hpp>
 #include <ginkgo/core/matrix/permutation.hpp>
+#include <ginkgo/core/matrix/scaled_permutation.hpp>
 
 
 namespace gko {
@@ -82,9 +83,6 @@ class Fbcsr;
 template <typename ValueType, typename IndexType>
 class Hybrid;
 
-template <typename ValueType, typename IndexType>
-class ScaledPermutation;
-
 template <typename ValueType, typename IndexType>
 class Sellp;
 
diff --git a/include/ginkgo/core/matrix/permutation.hpp b/include/ginkgo/core/matrix/permutation.hpp
index 7fbe589733a..3fa924c3c2b 100644
--- a/include/ginkgo/core/matrix/permutation.hpp
+++ b/include/ginkgo/core/matrix/permutation.hpp
@@ -189,11 +189,11 @@ class Permutation : public EnableLinOp<Permutation<IndexType>>,
      * @return a newly created Permutation object storing the inverse
      *         permutation of this Permutation.
      */
-    std::unique_ptr<Permutation> invert() const;
+    std::unique_ptr<Permutation> compute_inverse() const;
 
     /**
-     * Combines this permutation with another permutation via composition.
-     * The resulting permutation fulfills `result[i] = other[this[i]]`
+     * Composes this permutation with another permutation.
+     * The resulting permutation fulfills `result[i] = this[other[i]]`
      * or `result = other * this` from the matrix perspective, which is
      * equivalent to first permuting by `this` and then by `other`:
      * Combining permutations $P_1$ and $P_2$ with `P = P_1.combine(P_2)`
@@ -202,7 +202,7 @@ class Permutation : public EnableLinOp<Permutation<IndexType>>,
      * @param other  the other permutation
      * @return the combined permutation
      */
-    std::unique_ptr<Permutation> combine(
+    std::unique_ptr<Permutation> compose(
         ptr_param<const Permutation> other) const;
 
     void write(gko::matrix_data<value_type, index_type>& data) const override;
diff --git a/include/ginkgo/core/matrix/scaled_permutation.hpp b/include/ginkgo/core/matrix/scaled_permutation.hpp
index 50e12ff6706..dfcfefed9ac 100644
--- a/include/ginkgo/core/matrix/scaled_permutation.hpp
+++ b/include/ginkgo/core/matrix/scaled_permutation.hpp
@@ -115,20 +115,19 @@ class ScaledPermutation final
      *
      * @return a newly created ScaledPermutation object storing the inverse
      *         of the permutation and scaling factors of this
-     *         ScalledPermutation.
+     *         ScaledPermutation.
      */
-    std::unique_ptr<ScaledPermutation> invert() const;
+    std::unique_ptr<ScaledPermutation> compute_inverse() const;
 
     /**
-     * Combines this scaled permutation with another scaled permutation via
-     * composition. This means `result = other * this` from the matrix
-     * perspective, which is equivalent to first scaling and permuting by `this`
-     * and then by `other`.
+     * Composes this scaled permutation with another scaled permutation. This
+     * means `result = other * this` from the matrix perspective, which is
+     * equivalent to first scaling and permuting by `this` and then by `other`.
      *
      * @param other  the other permutation
      * @return the combined permutation
      */
-    std::unique_ptr<ScaledPermutation> combine(
+    std::unique_ptr<ScaledPermutation> compose(
         ptr_param<const ScaledPermutation> other) const;
 
     void write(gko::matrix_data<value_type, index_type>& data) const override;
diff --git a/reference/matrix/permutation_kernels.cpp b/reference/matrix/permutation_kernels.cpp
index ff5da03e232..72076d2e69d 100644
--- a/reference/matrix/permutation_kernels.cpp
+++ b/reference/matrix/permutation_kernels.cpp
@@ -53,18 +53,19 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PERMUTATION_INVERT_KERNEL);
 
 
 template <typename IndexType>
-void combine(std::shared_ptr<const DefaultExecutor> exec,
+void compose(std::shared_ptr<const DefaultExecutor> exec,
              const IndexType* first_permutation,
              const IndexType* second_permutation, size_type size,
              IndexType* output_permutation)
 {
     // P_2 P_1 does a row permutation of P_1 with indices from P_2
+    // row i of P_2 P_1 x accesses row P_2[i] of P_1 x = row P_1[P_2[i]] of x
     for (size_type i = 0; i < size; i++) {
         output_permutation[i] = first_permutation[second_permutation[i]];
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PERMUTATION_COMBINE_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PERMUTATION_COMPOSE_KERNEL);
 
 
 }  // namespace permutation
diff --git a/reference/matrix/scaled_permutation_kernels.cpp b/reference/matrix/scaled_permutation_kernels.cpp
index d79e89ee424..586ab9b9316 100644
--- a/reference/matrix/scaled_permutation_kernels.cpp
+++ b/reference/matrix/scaled_permutation_kernels.cpp
@@ -60,7 +60,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
 
 
 template <typename ValueType, typename IndexType>
-void combine(std::shared_ptr<const DefaultExecutor> exec,
+void compose(std::shared_ptr<const DefaultExecutor> exec,
              const ValueType* first_scale, const IndexType* first_permutation,
              const ValueType* second_scale, const IndexType* second_permutation,
              size_type size, ValueType* output_scale,
@@ -68,6 +68,7 @@ void combine(std::shared_ptr<const DefaultExecutor> exec,
 {
     // P_2 S_2 P_1 S_1 = P_2 P_1 S'_2 S_1 with S'_2 = P_1^-1 S_2 P_1^-T
     // P_2 P_1 does a row permutation of P_1 with indices from P_2
+    // row i of P_2 P_1 x accesses row P_2[i] of P_1 x = row P_1[P_2[i]] of x
     for (size_type i = 0; i < size; i++) {
         const auto second_permuted = second_permutation[i];
         const auto combined_permuted = first_permutation[second_permuted];
@@ -80,7 +81,7 @@ void combine(std::shared_ptr<const DefaultExecutor> exec,
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_SCALED_PERMUTATION_COMBINE_KERNEL);
+    GKO_DECLARE_SCALED_PERMUTATION_COMPOSE_KERNEL);
 
 
 }  // namespace scaled_permutation
diff --git a/reference/test/matrix/csr_kernels.cpp b/reference/test/matrix/csr_kernels.cpp
index 863bf15a817..305eb8bf5ee 100644
--- a/reference/test/matrix/csr_kernels.cpp
+++ b/reference/test/matrix/csr_kernels.cpp
@@ -94,7 +94,7 @@ class Csr : public ::testing::Test {
               Mtx::create(exec, gko::dim<2>(3, 3), 7,
                           std::make_shared<typename Mtx::classical>())),
           perm3(Perm::create(exec, gko::array<index_type>{exec, {1, 2, 0}})),
-          perm3_rev(perm3->invert()),
+          perm3_rev(perm3->compute_inverse()),
           perm2(Perm::create(exec, gko::array<index_type>{exec, {1, 0}})),
           perm0(Perm::create(exec)),
           scale_perm3(ScaledPerm::create(
@@ -1317,7 +1317,7 @@ std::unique_ptr<gko::matrix::Csr<ValueType, IndexType>> csr_from_permutation(
 {
     gko::matrix_data<double, IndexType> double_data;
     if (invert) {
-        perm->invert()->write(double_data);
+        perm->compute_inverse()->write(double_data);
     } else {
         perm->write(double_data);
     }
@@ -1339,7 +1339,7 @@ std::unique_ptr<gko::matrix::Csr<ValueType, IndexType>> csr_from_permutation(
 {
     gko::matrix_data<ValueType, IndexType> data;
     if (invert) {
-        perm->invert()->write(data);
+        perm->compute_inverse()->write(data);
     } else {
         perm->write(data);
     }
@@ -1447,7 +1447,7 @@ TYPED_TEST(Csr, PermuteInverted)
 
         auto permuted = this->mtx3_sorted->permute(this->perm3, mode);
         auto inv_inv_permuted = this->mtx3_sorted->permute(
-            this->perm3->invert(), mode | permute_mode::inverse);
+            this->perm3->compute_inverse(), mode | permute_mode::inverse);
 
         GKO_ASSERT_MTX_NEAR(permuted, inv_inv_permuted, 0.0);
         GKO_ASSERT_MTX_EQ_SPARSITY(permuted, inv_inv_permuted);
@@ -1557,8 +1557,9 @@ TYPED_TEST(Csr, NonsymmPermuteRoundtrip)
 TYPED_TEST(Csr, NonsymmPermuteInverted)
 {
     auto permuted = this->mtx3_sorted->permute(this->perm3, this->perm3_rev);
-    auto inv_inv_permuted = this->mtx3_sorted->permute(
-        this->perm3->invert(), this->perm3_rev->invert(), true);
+    auto inv_inv_permuted =
+        this->mtx3_sorted->permute(this->perm3->compute_inverse(),
+                                   this->perm3_rev->compute_inverse(), true);
 
     GKO_ASSERT_MTX_NEAR(permuted, inv_inv_permuted, 0.0);
     GKO_ASSERT_MTX_EQ_SPARSITY(permuted, inv_inv_permuted);
diff --git a/reference/test/matrix/dense_kernels.cpp b/reference/test/matrix/dense_kernels.cpp
index 2fe97e79064..b70265ed217 100644
--- a/reference/test/matrix/dense_kernels.cpp
+++ b/reference/test/matrix/dense_kernels.cpp
@@ -2278,7 +2278,7 @@ std::unique_ptr<gko::matrix::Dense<ValueType>> ref_permute(
         gko::matrix::Dense<double>::create(input->get_executor());
     gko::matrix_data<double, IndexType> permutation_data;
     if ((mode & permute_mode::inverse) == permute_mode::inverse) {
-        permutation->invert()->write(permutation_data);
+        permutation->compute_inverse()->write(permutation_data);
     } else {
         permutation->write(permutation_data);
     }
@@ -2313,8 +2313,8 @@ std::unique_ptr<gko::matrix::Dense<ValueType>> ref_permute(
     gko::matrix_data<double, IndexType> row_permutation_data;
     gko::matrix_data<double, IndexType> col_permutation_data;
     if (invert) {
-        row_permutation->invert()->write(row_permutation_data);
-        col_permutation->invert()->write(col_permutation_data);
+        row_permutation->compute_inverse()->write(row_permutation_data);
+        col_permutation->compute_inverse()->write(col_permutation_data);
     } else {
         row_permutation->write(row_permutation_data);
         col_permutation->write(col_permutation_data);
@@ -2501,6 +2501,15 @@ TYPED_TEST(DenseWithIndexType, NonsymmPermuteRoundtrip)
 }
 
 
+TYPED_TEST(DenseWithIndexType, NonsymmPermuteInverseInverted)
+{
+    auto inv_permuted = this->mtx5->permute(this->perm3, this->perm3_rev, true);
+    auto preinv_permuted = this->mtx5->permute(this->perm3_rev, this->perm3);
+
+    GKO_ASSERT_MTX_NEAR(inv_permuted, preinv_permuted, 0.0);
+}
+
+
 TYPED_TEST(DenseWithIndexType, NonsymmPermuteStridedIntoDense)
 {
     using Mtx = typename TestFixture::Mtx;
@@ -3235,7 +3244,7 @@ std::unique_ptr<gko::matrix::Dense<ValueType>> ref_scaled_permute(
         gko::matrix::Dense<ValueType>::create(input->get_executor());
     gko::matrix_data<ValueType, IndexType> permutation_data;
     if ((mode & permute_mode::inverse) == permute_mode::inverse) {
-        permutation->invert()->write(permutation_data);
+        permutation->compute_inverse()->write(permutation_data);
     } else {
         permutation->write(permutation_data);
     }
@@ -3271,8 +3280,8 @@ std::unique_ptr<gko::matrix::Dense<ValueType>> ref_scaled_permute(
     gko::matrix_data<ValueType, IndexType> row_permutation_data;
     gko::matrix_data<ValueType, IndexType> col_permutation_data;
     if (invert) {
-        row_permutation->invert()->write(row_permutation_data);
-        col_permutation->invert()->write(col_permutation_data);
+        row_permutation->compute_inverse()->write(row_permutation_data);
+        col_permutation->compute_inverse()->write(col_permutation_data);
     } else {
         row_permutation->write(row_permutation_data);
         col_permutation->write(col_permutation_data);
@@ -3485,6 +3494,19 @@ TYPED_TEST(DenseWithIndexType, NonsymmScaledPermuteRoundtrip)
 }
 
 
+TYPED_TEST(DenseWithIndexType, NonsymmScaledPermuteInverseInverted)
+{
+    using value_type = typename TestFixture::value_type;
+
+    auto inv_permuted = this->mtx5->scale_permute(this->scale_perm3,
+                                                  this->scale_perm3_rev, true);
+    auto preinv_permuted =
+        this->mtx5->scale_permute(this->scale_perm3->compute_inverse(),
+                                  this->scale_perm3_rev->compute_inverse());
+
+    GKO_ASSERT_MTX_NEAR(inv_permuted, preinv_permuted, r<value_type>::value);
+}
+
 TYPED_TEST(DenseWithIndexType, NonsymmScaledPermuteStridedIntoDense)
 {
     using Mtx = typename TestFixture::Mtx;
diff --git a/reference/test/matrix/permutation.cpp b/reference/test/matrix/permutation.cpp
index 080c68f515e..5946eea7eb8 100644
--- a/reference/test/matrix/permutation.cpp
+++ b/reference/test/matrix/permutation.cpp
@@ -41,6 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/exception.hpp>
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/matrix/csr.hpp>
 #include <ginkgo/core/matrix/dense.hpp>
 
 
@@ -93,7 +94,7 @@ TYPED_TEST(Permutation, Invert)
     auto perm = gko::matrix::Permutation<index_type>::create(
         this->exec, gko::array<index_type>{this->exec, {1, 2, 0}});
 
-    auto inv = perm->invert();
+    auto inv = perm->compute_inverse();
 
     EXPECT_EQ(inv->get_const_permutation()[0], 2);
     EXPECT_EQ(inv->get_const_permutation()[1], 0);
@@ -103,16 +104,14 @@ TYPED_TEST(Permutation, Invert)
 
 TYPED_TEST(Permutation, Combine)
 {
-    using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    using Vec = gko::matrix::Dense<double>;
     const auto perm = gko::matrix::Permutation<index_type>::create(
         this->exec, gko::array<index_type>{this->exec, {1, 2, 0}});
     const auto perm2 = gko::matrix::Permutation<index_type>::create(
         this->exec, gko::array<index_type>{this->exec, {0, 2, 1}});
     const auto ref_combined = this->ref_combine(perm.get(), perm2.get());
 
-    const auto combined = perm->combine(perm2);
+    const auto combined = perm->compose(perm2);
 
     GKO_ASSERT_MTX_NEAR(combined, ref_combined, 0.0);
 }
@@ -120,9 +119,7 @@ TYPED_TEST(Permutation, Combine)
 
 TYPED_TEST(Permutation, CombineLarger)
 {
-    using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
-    using Vec = gko::matrix::Dense<double>;
     const auto perm = gko::matrix::Permutation<index_type>::create(
         this->exec,
         gko::array<index_type>{this->exec, {6, 2, 4, 0, 1, 5, 9, 8, 3, 7}});
@@ -131,7 +128,7 @@ TYPED_TEST(Permutation, CombineLarger)
         gko::array<index_type>{this->exec, {9, 2, 1, 6, 3, 7, 8, 4, 0, 5}});
     const auto ref_combined = this->ref_combine(perm.get(), perm2.get());
 
-    const auto combined = perm->combine(perm2);
+    const auto combined = perm->compose(perm2);
 
     GKO_ASSERT_MTX_NEAR(combined, ref_combined, 0.0);
 }
@@ -146,7 +143,7 @@ TYPED_TEST(Permutation, CombineWithInverse)
     std::shuffle(perm->get_permutation(), perm->get_permutation() + size,
                  std::default_random_engine{29584});
 
-    auto combined = perm->combine(perm->invert());
+    auto combined = perm->compose(perm->compute_inverse());
 
     for (index_type i = 0; i < size; i++) {
         ASSERT_EQ(combined->get_const_permutation()[i], i);
@@ -161,7 +158,7 @@ TYPED_TEST(Permutation, CombineFailsWithMismatchingSize)
         this->exec, gko::array<index_type>{this->exec, {1, 2, 0}});
     auto perm0 = gko::matrix::Permutation<index_type>::create(this->exec);
 
-    ASSERT_THROW(perm->combine(perm0), gko::DimensionMismatch);
+    ASSERT_THROW(perm->compose(perm0), gko::DimensionMismatch);
 }
 
 
@@ -203,4 +200,45 @@ TYPED_TEST(Permutation, AppliesRowPermutationToDense)
 }
 
 
+TYPED_TEST(Permutation, AdvancedAppliesRowPermutationToDense)
+{
+    using index_type = typename TestFixture::index_type;
+    using T = typename TestFixture::value_type;
+    using Vec = typename TestFixture::Vec;
+    // clang-format off
+    auto x = gko::initialize<Vec>(
+        {I<T>{2.0, 3.0},
+         I<T>{4.0, 2.5}}, this->exec);
+    // clang-format on
+    auto alpha = gko::initialize<Vec>({2.0}, this->exec);
+    auto beta = gko::initialize<Vec>({-1.0}, this->exec);
+    auto y = x->clone();
+    index_type rdata[] = {1, 0};
+
+    auto perm = gko::matrix::Permutation<index_type>::create(
+        this->exec, gko::make_array_view(this->exec, 2, rdata));
+
+    perm->apply(alpha, x, beta, y);
+
+    // clang-format off
+    GKO_ASSERT_MTX_NEAR(y,
+                        l({{6.0, 2.0},
+                           {0.0, 3.5}}),
+                        0.0);
+    // clang-format on
+}
+
+
+TYPED_TEST(Permutation, ApplyFailsWithNonDenseMatrix)
+{
+    using index_type = typename TestFixture::index_type;
+    using T = typename TestFixture::value_type;
+    auto mtx = gko::matrix::Csr<T, index_type>::create(this->exec);
+    auto mtx2 = mtx->clone();
+    auto perm = gko::matrix::Permutation<index_type>::create(this->exec);
+
+    ASSERT_THROW(perm->apply(mtx, mtx2), gko::NotSupported);
+}
+
+
 }  // namespace
diff --git a/reference/test/matrix/scaled_permutation.cpp b/reference/test/matrix/scaled_permutation.cpp
index 49a2502c71d..2cd331bd100 100644
--- a/reference/test/matrix/scaled_permutation.cpp
+++ b/reference/test/matrix/scaled_permutation.cpp
@@ -33,6 +33,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/matrix/scaled_permutation.hpp>
 
 
+#include <random>
+
+
 #include <gtest/gtest.h>
 
 
@@ -95,7 +98,7 @@ TYPED_TEST_SUITE(ScaledPermutation, gko::test::ValueIndexTypes,
 TYPED_TEST(ScaledPermutation, Invert)
 {
     using T = typename TestFixture::value_type;
-    auto inv = this->perm3->invert();
+    auto inv = this->perm3->compute_inverse();
 
     EXPECT_EQ(inv->get_const_permutation()[0], 2);
     EXPECT_EQ(inv->get_const_permutation()[1], 0);
@@ -137,7 +140,7 @@ TYPED_TEST(ScaledPermutation, Combine)
     const auto ref_combined =
         this->ref_combine(this->perm3.get(), other_perm.get());
 
-    const auto combined = this->perm3->combine(other_perm);
+    const auto combined = this->perm3->compose(other_perm);
 
     GKO_ASSERT_MTX_NEAR(combined, ref_combined, 0.0);
 }
@@ -163,7 +166,7 @@ TYPED_TEST(ScaledPermutation, CombineLarger)
         gko::array<index_type>{this->exec, {9, 2, 1, 6, 3, 7, 8, 4, 0, 5}});
     const auto ref_combined = this->ref_combine(perm.get(), perm2.get());
 
-    const auto combined = perm->combine(perm2);
+    const auto combined = perm->compose(perm2);
 
     GKO_ASSERT_MTX_NEAR(combined, ref_combined, 0.0);
 }
@@ -171,25 +174,34 @@ TYPED_TEST(ScaledPermutation, CombineLarger)
 
 TYPED_TEST(ScaledPermutation, CombineWithInverse)
 {
-    using T = typename TestFixture::value_type;
+    using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
     const gko::size_type size = 20;
     auto rng = std::default_random_engine{3754};
-    auto perm = gko::matrix::Permutation<index_type>::create(this->exec, size);
+    auto dist = std::uniform_real_distribution<gko::remove_complex<value_type>>{
+        1.0, 2.0};
+    auto perm = gko::matrix::ScaledPermutation<value_type, index_type>::create(
+        this->exec, size);
     std::iota(perm->get_permutation(), perm->get_permutation() + size, 0);
     std::shuffle(perm->get_permutation(), perm->get_permutation() + size, rng);
+    for (gko::size_type i = 0; i < size; i++) {
+        perm->get_scale()[i] = dist(rng);
+    }
 
-    auto combined = perm->combine(perm->invert());
+    auto combined = perm->compose(perm->compute_inverse());
 
     for (index_type i = 0; i < size; i++) {
         ASSERT_EQ(combined->get_const_permutation()[i], i);
+        ASSERT_LT(
+            gko::abs(combined->get_const_scale()[i] - gko::one<value_type>()),
+            r<value_type>::value);
     }
 }
 
 
 TYPED_TEST(ScaledPermutation, CombineFailsWithMismatchingSize)
 {
-    ASSERT_THROW(this->perm3->combine(this->perm2), gko::DimensionMismatch);
+    ASSERT_THROW(this->perm3->compose(this->perm2), gko::DimensionMismatch);
 }
 
 
@@ -216,4 +228,19 @@ TYPED_TEST(ScaledPermutation, AppliesToDense)
 }
 
 
+TYPED_TEST(ScaledPermutation, AdvancedAppliesToDense)
+{
+    using T = typename TestFixture::value_type;
+    using Vec = typename TestFixture::Vec;
+    auto alpha = gko::initialize<Vec>({2.0}, this->exec);
+    auto beta = gko::initialize<Vec>({-1.0}, this->exec);
+    auto x = gko::initialize<Vec>({I<T>{2.0, 3.0}, I<T>{4.0, 2.5}}, this->exec);
+    auto y = x->clone();
+
+    this->perm2->apply(alpha, x, beta, y);
+
+    GKO_ASSERT_MTX_NEAR(y, l({{38.0, 22.0}, {8.0, 15.5}}), 0.0);
+}
+
+
 }  // namespace
diff --git a/test/matrix/permutation_kernels.cpp b/test/matrix/permutation_kernels.cpp
index 85aa03489f2..a1013d7b401 100644
--- a/test/matrix/permutation_kernels.cpp
+++ b/test/matrix/permutation_kernels.cpp
@@ -92,8 +92,8 @@ class Permutation : public CommonTestFixture {
 
 TEST_F(Permutation, InvertIsEquivalentToRef)
 {
-    auto inv = permutation->invert();
-    auto dinv = dpermutation->invert();
+    auto inv = permutation->compute_inverse();
+    auto dinv = dpermutation->compute_inverse();
 
     GKO_ASSERT_MTX_EQ_SPARSITY(inv, dinv);
 }
@@ -125,8 +125,8 @@ TEST_F(Permutation, AdvancedApplyIsEquivalentToRef)
 
 TEST_F(Permutation, CombineIsEquivalentToRef)
 {
-    auto combined = permutation->combine(permutation2);
-    auto dcombined = dpermutation->combine(permutation2);
+    auto combined = permutation->compose(permutation2);
+    auto dcombined = dpermutation->compose(permutation2);
 
     GKO_ASSERT_MTX_EQ_SPARSITY(combined, dcombined);
 }
diff --git a/test/matrix/scaled_permutation_kernels.cpp b/test/matrix/scaled_permutation_kernels.cpp
index 059da97e784..85ea72071ce 100644
--- a/test/matrix/scaled_permutation_kernels.cpp
+++ b/test/matrix/scaled_permutation_kernels.cpp
@@ -99,8 +99,8 @@ class ScaledPermutation : public CommonTestFixture {
 
 TEST_F(ScaledPermutation, InvertIsEquivalentToRef)
 {
-    auto inv = permutation->invert();
-    auto dinv = dpermutation->invert();
+    auto inv = permutation->compute_inverse();
+    auto dinv = dpermutation->compute_inverse();
 
     GKO_ASSERT_MTX_NEAR(inv, dinv, r<value_type>::value);
 }
@@ -132,8 +132,8 @@ TEST_F(ScaledPermutation, AdvancedApplyIsEquivalentToRef)
 
 TEST_F(ScaledPermutation, CombineIsEquivalentToRef)
 {
-    auto combined = permutation->combine(permutation2);
-    auto dcombined = dpermutation->combine(permutation2);
+    auto combined = permutation->compose(permutation2);
+    auto dcombined = dpermutation->compose(permutation2);
 
     GKO_ASSERT_MTX_NEAR(combined, dcombined, r<value_type>::value);
 }

From 51bdc0663624a9438a6aca16b7f498ad230292f7 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Fri, 27 Oct 2023 09:49:44 -0400
Subject: [PATCH 444/583] fix DPC++ compilation

---
 dpcpp/matrix/csr_kernels.dp.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpcpp/matrix/csr_kernels.dp.cpp b/dpcpp/matrix/csr_kernels.dp.cpp
index a9c3c18cabd..cadb9fe46e6 100644
--- a/dpcpp/matrix/csr_kernels.dp.cpp
+++ b/dpcpp/matrix/csr_kernels.dp.cpp
@@ -1010,7 +1010,7 @@ void row_permute_kernel(size_type num_rows,
     }
 }
 
-GKO_ENABLE_DEFAULT_HOST(row_ptr_permute_kernel, row_ptr_permute_kernel);
+GKO_ENABLE_DEFAULT_HOST(row_permute_kernel, row_permute_kernel);
 
 
 template <int subgroup_size = config::warp_size, typename ValueType,

From d0bb5f5d52fbc1f28a8bdb65c02a5a89da68c04d Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Fri, 27 Oct 2023 14:07:36 -0400
Subject: [PATCH 445/583] fix test precision

---
 test/matrix/dense_kernels.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/matrix/dense_kernels.cpp b/test/matrix/dense_kernels.cpp
index 5edfee16892..c289afed41b 100644
--- a/test/matrix/dense_kernels.cpp
+++ b/test/matrix/dense_kernels.cpp
@@ -1433,7 +1433,7 @@ TEST_F(Dense, IsGenericScalePermutableRectangular)
         auto permuted = x->scale_permute(perm, mode);
         auto dpermuted = dx->scale_permute(perm, mode);
 
-        GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0);
+        GKO_ASSERT_MTX_NEAR(permuted, dpermuted, r<value_type>::value);
     }
 }
 

From 33e59f24b5f3cf391d453cdeb7ad3b3748b96890 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Fri, 27 Oct 2023 16:52:47 -0400
Subject: [PATCH 446/583] rename get_scale to get_scaling_factors

---
 core/matrix/csr.cpp                           | 12 +++----
 core/matrix/dense.cpp                         | 32 +++++++++----------
 core/matrix/scaled_permutation.cpp            | 14 ++++----
 .../ginkgo/core/matrix/scaled_permutation.hpp |  6 ++--
 reference/test/matrix/scaled_permutation.cpp  | 20 ++++++------
 5 files changed, 43 insertions(+), 41 deletions(-)

diff --git a/core/matrix/csr.cpp b/core/matrix/csr.cpp
index cac1795f084..b34cae206f5 100644
--- a/core/matrix/csr.cpp
+++ b/core/matrix/csr.cpp
@@ -634,7 +634,7 @@ Csr<ValueType, IndexType>::scale_permute(
     std::unique_ptr<const ScaledPermutation<ValueType, IndexType>>
         inv_permutation;
     const auto perm_idxs = local_permutation->get_const_permutation();
-    const auto scale_factors = local_permutation->get_const_scale();
+    const auto scale_factors = local_permutation->get_const_scaling_factors();
     const ValueType* inv_scale_factors{};
     const IndexType* inv_perm_idxs{};
     // to permute columns, we need to know the inverse permutation
@@ -642,7 +642,7 @@ Csr<ValueType, IndexType>::scale_permute(
         (mode & permute_mode::inverse_columns) == permute_mode::columns;
     if (needs_inverse) {
         inv_permutation = local_permutation->compute_inverse();
-        inv_scale_factors = inv_permutation->get_const_scale();
+        inv_scale_factors = inv_permutation->get_const_scaling_factors();
         inv_perm_idxs = inv_permutation->get_const_permutation();
     }
     switch (mode) {
@@ -698,18 +698,18 @@ Csr<ValueType, IndexType>::scale_permute(
     auto local_col_permutation = make_temporary_clone(exec, col_permutation);
     if (invert) {
         exec->run(csr::make_inv_nonsymm_scale_permute(
-            local_row_permutation->get_const_scale(),
+            local_row_permutation->get_const_scaling_factors(),
             local_row_permutation->get_const_permutation(),
-            local_col_permutation->get_const_scale(),
+            local_col_permutation->get_const_scaling_factors(),
             local_col_permutation->get_const_permutation(), this,
             result.get()));
     } else {
         const auto inv_row_perm = local_row_permutation->compute_inverse();
         const auto inv_col_perm = local_col_permutation->compute_inverse();
         exec->run(csr::make_inv_nonsymm_scale_permute(
-            inv_row_perm->get_const_scale(),
+            inv_row_perm->get_const_scaling_factors(),
             inv_row_perm->get_const_permutation(),
-            inv_col_perm->get_const_scale(),
+            inv_col_perm->get_const_scaling_factors(),
             inv_col_perm->get_const_permutation(), this, result.get()));
     }
     result->make_srow();
diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp
index 5f0e9cbf177..72f984aa27f 100644
--- a/core/matrix/dense.cpp
+++ b/core/matrix/dense.cpp
@@ -1218,33 +1218,33 @@ void Dense<ValueType>::scale_permute_impl(
     switch (mode) {
     case permute_mode::rows:
         exec->run(dense::make_row_scale_permute(
-            local_perm->get_const_scale(), local_perm->get_const_permutation(),
-            this, local_output.get()));
+            local_perm->get_const_scaling_factors(),
+            local_perm->get_const_permutation(), this, local_output.get()));
         break;
     case permute_mode::columns:
         exec->run(dense::make_col_scale_permute(
-            local_perm->get_const_scale(), local_perm->get_const_permutation(),
-            this, local_output.get()));
+            local_perm->get_const_scaling_factors(),
+            local_perm->get_const_permutation(), this, local_output.get()));
         break;
     case permute_mode::symmetric:
         exec->run(dense::make_symm_scale_permute(
-            local_perm->get_const_scale(), local_perm->get_const_permutation(),
-            this, local_output.get()));
+            local_perm->get_const_scaling_factors(),
+            local_perm->get_const_permutation(), this, local_output.get()));
         break;
     case permute_mode::inverse_rows:
         exec->run(dense::make_inv_row_scale_permute(
-            local_perm->get_const_scale(), local_perm->get_const_permutation(),
-            this, local_output.get()));
+            local_perm->get_const_scaling_factors(),
+            local_perm->get_const_permutation(), this, local_output.get()));
         break;
     case permute_mode::inverse_columns:
         exec->run(dense::make_inv_col_scale_permute(
-            local_perm->get_const_scale(), local_perm->get_const_permutation(),
-            this, local_output.get()));
+            local_perm->get_const_scaling_factors(),
+            local_perm->get_const_permutation(), this, local_output.get()));
         break;
     case permute_mode::inverse_symmetric:
         exec->run(dense::make_inv_symm_scale_permute(
-            local_perm->get_const_scale(), local_perm->get_const_permutation(),
-            this, local_output.get()));
+            local_perm->get_const_scaling_factors(),
+            local_perm->get_const_permutation(), this, local_output.get()));
         break;
     default:
         GKO_INVALID_STATE("Invalid permute mode");
@@ -1269,15 +1269,15 @@ void Dense<ValueType>::scale_permute_impl(
     auto local_col_perm = make_temporary_clone(exec, col_permutation);
     if (invert) {
         exec->run(dense::make_inv_nonsymm_scale_permute(
-            local_row_perm->get_const_scale(),
+            local_row_perm->get_const_scaling_factors(),
             local_row_perm->get_const_permutation(),
-            local_col_perm->get_const_scale(),
+            local_col_perm->get_const_scaling_factors(),
             local_col_perm->get_const_permutation(), this, local_output.get()));
     } else {
         exec->run(dense::make_nonsymm_scale_permute(
-            local_row_perm->get_const_scale(),
+            local_row_perm->get_const_scaling_factors(),
             local_row_perm->get_const_permutation(),
-            local_col_perm->get_const_scale(),
+            local_col_perm->get_const_scaling_factors(),
             local_col_perm->get_const_permutation(), this, local_output.get()));
     }
 }
diff --git a/core/matrix/scaled_permutation.cpp b/core/matrix/scaled_permutation.cpp
index f20442fc585..53296aee5b3 100644
--- a/core/matrix/scaled_permutation.cpp
+++ b/core/matrix/scaled_permutation.cpp
@@ -133,8 +133,8 @@ ScaledPermutation<ValueType, IndexType>::compute_inverse() const
     const auto size = this->get_size()[0];
     auto result = ScaledPermutation::create(exec, size);
     exec->run(scaled_permutation::make_invert(
-        this->get_const_scale(), this->get_const_permutation(), size,
-        result->get_scale(), result->get_permutation()));
+        this->get_const_scaling_factors(), this->get_const_permutation(), size,
+        result->get_scaling_factors(), result->get_permutation()));
     return result;
 }
 
@@ -150,9 +150,10 @@ ScaledPermutation<ValueType, IndexType>::compose(
     const auto local_other = make_temporary_clone(exec, other);
     auto result = ScaledPermutation::create(exec, size);
     exec->run(scaled_permutation::make_compose(
-        this->get_const_scale(), this->get_const_permutation(),
-        local_other->get_const_scale(), local_other->get_const_permutation(),
-        size, result->get_scale(), result->get_permutation()));
+        this->get_const_scaling_factors(), this->get_const_permutation(),
+        local_other->get_const_scaling_factors(),
+        local_other->get_const_permutation(), size,
+        result->get_scaling_factors(), result->get_permutation()));
     return result;
 }
 
@@ -196,7 +197,8 @@ void ScaledPermutation<ValueType, IndexType>::write(
     data.nonzeros.reserve(data.size[0]);
     for (IndexType row = 0; row < this->get_size()[0]; row++) {
         auto col = host_this->get_const_permutation()[row];
-        data.nonzeros.emplace_back(row, col, host_this->get_const_scale()[col]);
+        data.nonzeros.emplace_back(row, col,
+                                   host_this->get_const_scaling_factors()[col]);
     }
 }
 
diff --git a/include/ginkgo/core/matrix/scaled_permutation.hpp b/include/ginkgo/core/matrix/scaled_permutation.hpp
index dfcfefed9ac..b0b5aa15f98 100644
--- a/include/ginkgo/core/matrix/scaled_permutation.hpp
+++ b/include/ginkgo/core/matrix/scaled_permutation.hpp
@@ -76,16 +76,16 @@ class ScaledPermutation final
      *
      * @return the pointer to the scaling factors.
      */
-    value_type* get_scale() noexcept { return scale_.get_data(); }
+    value_type* get_scaling_factors() noexcept { return scale_.get_data(); }
 
     /**
-     * @copydoc get_scale()
+     * @copydoc get_scaling_factors()
      *
      * @note This is the constant version of the function, which can be
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const value_type* get_const_scale() const noexcept
+    const value_type* get_const_scaling_factors() const noexcept
     {
         return scale_.get_const_data();
     }
diff --git a/reference/test/matrix/scaled_permutation.cpp b/reference/test/matrix/scaled_permutation.cpp
index 2cd331bd100..028716694f4 100644
--- a/reference/test/matrix/scaled_permutation.cpp
+++ b/reference/test/matrix/scaled_permutation.cpp
@@ -103,9 +103,9 @@ TYPED_TEST(ScaledPermutation, Invert)
     EXPECT_EQ(inv->get_const_permutation()[0], 2);
     EXPECT_EQ(inv->get_const_permutation()[1], 0);
     EXPECT_EQ(inv->get_const_permutation()[2], 1);
-    EXPECT_EQ(inv->get_const_scale()[0], T{0.5});
-    EXPECT_EQ(inv->get_const_scale()[1], T{0.25});
-    EXPECT_EQ(inv->get_const_scale()[2], T{1.0});
+    EXPECT_EQ(inv->get_const_scaling_factors()[0], T{0.5});
+    EXPECT_EQ(inv->get_const_scaling_factors()[1], T{0.25});
+    EXPECT_EQ(inv->get_const_scaling_factors()[2], T{1.0});
 }
 
 
@@ -122,9 +122,9 @@ TYPED_TEST(ScaledPermutation, CreateFromPermutation)
     EXPECT_EQ(scaled->get_const_permutation()[0], 1);
     EXPECT_EQ(scaled->get_const_permutation()[1], 2);
     EXPECT_EQ(scaled->get_const_permutation()[2], 0);
-    EXPECT_EQ(scaled->get_const_scale()[0], gko::one<value_type>());
-    EXPECT_EQ(scaled->get_const_scale()[1], gko::one<value_type>());
-    EXPECT_EQ(scaled->get_const_scale()[2], gko::one<value_type>());
+    EXPECT_EQ(scaled->get_const_scaling_factors()[0], gko::one<value_type>());
+    EXPECT_EQ(scaled->get_const_scaling_factors()[1], gko::one<value_type>());
+    EXPECT_EQ(scaled->get_const_scaling_factors()[2], gko::one<value_type>());
 }
 
 
@@ -185,16 +185,16 @@ TYPED_TEST(ScaledPermutation, CombineWithInverse)
     std::iota(perm->get_permutation(), perm->get_permutation() + size, 0);
     std::shuffle(perm->get_permutation(), perm->get_permutation() + size, rng);
     for (gko::size_type i = 0; i < size; i++) {
-        perm->get_scale()[i] = dist(rng);
+        perm->get_scaling_factors()[i] = dist(rng);
     }
 
     auto combined = perm->compose(perm->compute_inverse());
 
     for (index_type i = 0; i < size; i++) {
         ASSERT_EQ(combined->get_const_permutation()[i], i);
-        ASSERT_LT(
-            gko::abs(combined->get_const_scale()[i] - gko::one<value_type>()),
-            r<value_type>::value);
+        ASSERT_LT(gko::abs(combined->get_const_scaling_factors()[i] -
+                           gko::one<value_type>()),
+                  r<value_type>::value);
     }
 }
 

From 21b379f385203acc31d9ceaa27e3631f66d4f0ff Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Fri, 27 Oct 2023 16:53:09 -0400
Subject: [PATCH 447/583] fix misc test-related warnings

---
 core/test/accessor/reduced_row_major_ginkgo.cpp    |  4 +++-
 core/test/base/batch_multi_vector.cpp              |  2 +-
 core/test/base/iterator_factory.cpp                | 14 +++++++-------
 core/test/matrix/batch_dense.cpp                   |  2 +-
 core/test/matrix/batch_ell.cpp                     |  2 +-
 reference/test/base/batch_multi_vector_kernels.cpp |  2 +-
 reference/test/matrix/batch_dense_kernels.cpp      |  2 +-
 reference/test/matrix/batch_ell_kernels.cpp        |  2 +-
 test/distributed/partition_helper_kernels.cpp      |  3 ++-
 9 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/core/test/accessor/reduced_row_major_ginkgo.cpp b/core/test/accessor/reduced_row_major_ginkgo.cpp
index b12fba6ad0f..d72245d9882 100644
--- a/core/test/accessor/reduced_row_major_ginkgo.cpp
+++ b/core/test/accessor/reduced_row_major_ginkgo.cpp
@@ -45,6 +45,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "accessor/reduced_row_major.hpp"
 #include "accessor/utils.hpp"
 #include "core/base/extended_float.hpp"  // necessary for gko::half
+#include "core/test/utils.hpp"
 
 
 namespace {
@@ -156,7 +157,8 @@ using ReducedStorage3dTypes =
                      std::tuple<std::complex<double>, std::complex<float>>,
                      std::tuple<std::complex<float>, std::complex<float>>>;
 
-TYPED_TEST_SUITE(ReducedStorage3d, ReducedStorage3dTypes);
+TYPED_TEST_SUITE(ReducedStorage3d, ReducedStorage3dTypes,
+                 PairTypenameNameGenerator);
 
 
 TYPED_TEST(ReducedStorage3d, CorrectLengths)
diff --git a/core/test/base/batch_multi_vector.cpp b/core/test/base/batch_multi_vector.cpp
index 8390a6c4327..1041096a5ed 100644
--- a/core/test/base/batch_multi_vector.cpp
+++ b/core/test/base/batch_multi_vector.cpp
@@ -96,7 +96,7 @@ class MultiVector : public ::testing::Test {
     std::unique_ptr<gko::matrix::Dense<value_type>> dense_mtx;
 };
 
-TYPED_TEST_SUITE(MultiVector, gko::test::ValueTypes);
+TYPED_TEST_SUITE(MultiVector, gko::test::ValueTypes, TypenameNameGenerator);
 
 
 TYPED_TEST(MultiVector, CanBeEmpty)
diff --git a/core/test/base/iterator_factory.cpp b/core/test/base/iterator_factory.cpp
index e4d8d39b340..4db597e399a 100644
--- a/core/test/base/iterator_factory.cpp
+++ b/core/test/base/iterator_factory.cpp
@@ -273,13 +273,13 @@ TYPED_TEST(ZipIterator, IncompatibleIteratorDeathTest)
 
     // a set of operations that return inconsistent results for the two
     // different iterators
-    EXPECT_EXIT(it2 - it1, check_assertion_exit_code, "");
-    EXPECT_EXIT(it2 == it1, check_assertion_exit_code, "");
-    EXPECT_EXIT(it2 != it1, check_assertion_exit_code, "");
-    EXPECT_EXIT(it1 < it2, check_assertion_exit_code, "");
-    EXPECT_EXIT(it2 <= it1, check_assertion_exit_code, "");
-    EXPECT_EXIT(it2 > it1, check_assertion_exit_code, "");
-    EXPECT_EXIT(it1 >= it2, check_assertion_exit_code, "");
+    EXPECT_EXIT((void)(it2 - it1), check_assertion_exit_code, "");
+    EXPECT_EXIT((void)(it2 == it1), check_assertion_exit_code, "");
+    EXPECT_EXIT((void)(it2 != it1), check_assertion_exit_code, "");
+    EXPECT_EXIT((void)(it1 < it2), check_assertion_exit_code, "");
+    EXPECT_EXIT((void)(it2 <= it1), check_assertion_exit_code, "");
+    EXPECT_EXIT((void)(it2 > it1), check_assertion_exit_code, "");
+    EXPECT_EXIT((void)(it1 >= it2), check_assertion_exit_code, "");
 }
 
 
diff --git a/core/test/matrix/batch_dense.cpp b/core/test/matrix/batch_dense.cpp
index 8e64c913a6a..adeddbcc994 100644
--- a/core/test/matrix/batch_dense.cpp
+++ b/core/test/matrix/batch_dense.cpp
@@ -100,7 +100,7 @@ class Dense : public ::testing::Test {
     std::unique_ptr<gko::matrix::Dense<value_type>> dense_mtx;
 };
 
-TYPED_TEST_SUITE(Dense, gko::test::ValueTypes);
+TYPED_TEST_SUITE(Dense, gko::test::ValueTypes, TypenameNameGenerator);
 
 
 TYPED_TEST(Dense, KnowsItsSizeAndValues)
diff --git a/core/test/matrix/batch_ell.cpp b/core/test/matrix/batch_ell.cpp
index 2c8166aa023..a42a18f5faf 100644
--- a/core/test/matrix/batch_ell.cpp
+++ b/core/test/matrix/batch_ell.cpp
@@ -124,7 +124,7 @@ class Ell : public ::testing::Test {
     std::unique_ptr<EllMtx> sp_ell_mtx;
 };
 
-TYPED_TEST_SUITE(Ell, gko::test::ValueTypes);
+TYPED_TEST_SUITE(Ell, gko::test::ValueTypes, TypenameNameGenerator);
 
 
 TYPED_TEST(Ell, KnowsItsSizeAndValues)
diff --git a/reference/test/base/batch_multi_vector_kernels.cpp b/reference/test/base/batch_multi_vector_kernels.cpp
index a49168dc24e..26395bf6791 100644
--- a/reference/test/base/batch_multi_vector_kernels.cpp
+++ b/reference/test/base/batch_multi_vector_kernels.cpp
@@ -128,7 +128,7 @@ class MultiVector : public ::testing::Test {
     std::default_random_engine rand_engine;
 };
 
-TYPED_TEST_SUITE(MultiVector, gko::test::ValueTypes);
+TYPED_TEST_SUITE(MultiVector, gko::test::ValueTypes, TypenameNameGenerator);
 
 
 TYPED_TEST(MultiVector, ScalesData)
diff --git a/reference/test/matrix/batch_dense_kernels.cpp b/reference/test/matrix/batch_dense_kernels.cpp
index 6a23374f7cb..518d4fe024b 100644
--- a/reference/test/matrix/batch_dense_kernels.cpp
+++ b/reference/test/matrix/batch_dense_kernels.cpp
@@ -109,7 +109,7 @@ class Dense : public ::testing::Test {
 };
 
 
-TYPED_TEST_SUITE(Dense, gko::test::ValueTypes);
+TYPED_TEST_SUITE(Dense, gko::test::ValueTypes, TypenameNameGenerator);
 
 
 TYPED_TEST(Dense, AppliesToBatchMultiVector)
diff --git a/reference/test/matrix/batch_ell_kernels.cpp b/reference/test/matrix/batch_ell_kernels.cpp
index d0e70bf5552..d0ab012294c 100644
--- a/reference/test/matrix/batch_ell_kernels.cpp
+++ b/reference/test/matrix/batch_ell_kernels.cpp
@@ -111,7 +111,7 @@ class Ell : public ::testing::Test {
 };
 
 
-TYPED_TEST_SUITE(Ell, gko::test::ValueTypes);
+TYPED_TEST_SUITE(Ell, gko::test::ValueTypes, TypenameNameGenerator);
 
 
 TYPED_TEST(Ell, AppliesToBatchMultiVector)
diff --git a/test/distributed/partition_helper_kernels.cpp b/test/distributed/partition_helper_kernels.cpp
index a53505cf1f6..44d7ac2b8f0 100644
--- a/test/distributed/partition_helper_kernels.cpp
+++ b/test/distributed/partition_helper_kernels.cpp
@@ -165,7 +165,8 @@ class PartitionHelpers : public CommonTestFixture {
     using index_type = IndexType;
 };
 
-TYPED_TEST_SUITE(PartitionHelpers, gko::test::IndexTypes);
+TYPED_TEST_SUITE(PartitionHelpers, gko::test::IndexTypes,
+                 TypenameNameGenerator);
 
 
 TYPED_TEST(PartitionHelpers, CanCheckConsecutiveRanges)

From a8a7e568b2eb1cb0157dfa99f1a99dc84eddee7d Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Sun, 8 Oct 2023 12:48:39 +0200
Subject: [PATCH 448/583] add new interface for RCM

---
 core/reorder/rcm.cpp                   | 184 +++++++++++++++++++++++--
 core/test/reorder/rcm.cpp              |  22 +++
 cuda/test/reorder/rcm_kernels.cpp      |  29 ++--
 include/ginkgo/core/reorder/rcm.hpp    | 135 +++++++++---------
 omp/test/reorder/rcm_kernels.cpp       |  56 +++++---
 reference/test/reorder/rcm_kernels.cpp |  39 +++++-
 6 files changed, 355 insertions(+), 110 deletions(-)

diff --git a/core/reorder/rcm.cpp b/core/reorder/rcm.cpp
index ce4c26225a1..45eddec56bd 100644
--- a/core/reorder/rcm.cpp
+++ b/core/reorder/rcm.cpp
@@ -66,22 +66,86 @@ GKO_REGISTER_OPERATION(get_degree_of_nodes, rcm::get_degree_of_nodes);
 
 
 template <typename ValueType, typename IndexType>
-void Rcm<ValueType, IndexType>::generate(
-    std::shared_ptr<const Executor>& exec,
-    std::unique_ptr<SparsityMatrix> adjacency_matrix) const
+void rcm_reorder(matrix::SparsityCsr<ValueType, IndexType>* mtx,
+                 IndexType* permutation, IndexType* inv_permutation,
+                 starting_strategy strategy)
 {
-    const IndexType num_rows = adjacency_matrix->get_size()[0];
-    const auto mtx = adjacency_matrix.get();
-    auto degrees = array<IndexType>(exec, num_rows);
-    // RCM is only valid for symmetric matrices. Need to add an expensive check
-    // for symmetricity here ?
+    const auto exec = mtx->get_executor();
+    const IndexType num_rows = mtx->get_size()[0];
+    array<IndexType> degrees{exec, mtx->get_size()[0]};
     exec->run(rcm::make_get_degree_of_nodes(num_rows, mtx->get_const_row_ptrs(),
                                             degrees.get_data()));
     exec->run(rcm::make_get_permutation(
         num_rows, mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(),
-        degrees.get_const_data(), permutation_->get_permutation(),
-        inv_permutation_.get() ? inv_permutation_->get_permutation() : nullptr,
-        parameters_.strategy));
+        degrees.get_const_data(), permutation, inv_permutation, strategy));
+}
+
+
+template <typename ValueType, typename IndexType>
+Rcm<ValueType, IndexType>::Rcm(std::shared_ptr<const Executor> exec)
+    : EnablePolymorphicObject<Rcm, ReorderingBase<IndexType>>(std::move(exec))
+{}
+
+
+template <typename ValueType, typename IndexType>
+Rcm<ValueType, IndexType>::Rcm(const Factory* factory,
+                               const ReorderingBaseArgs& args)
+    : EnablePolymorphicObject<Rcm, ReorderingBase<IndexType>>(
+          factory->get_executor()),
+      parameters_{factory->get_parameters()}
+{
+    // Always execute the reordering on the cpu.
+    const auto is_gpu_executor =
+        this->get_executor() != this->get_executor()->get_master();
+    auto cpu_exec = is_gpu_executor ? this->get_executor()->get_master()
+                                    : this->get_executor();
+
+    auto adjacency_matrix = SparsityMatrix::create(cpu_exec);
+    array<IndexType> degrees;
+
+    // The adjacency matrix has to be square.
+    GKO_ASSERT_IS_SQUARE_MATRIX(args.system_matrix);
+    // This is needed because it does not make sense to call the copy and
+    // convert if the existing matrix is empty.
+    if (args.system_matrix->get_size()) {
+        auto tmp =
+            copy_and_convert_to<SparsityMatrix>(cpu_exec, args.system_matrix);
+        // This function provided within the Sparsity matrix format removes
+        // the diagonal elements and outputs an adjacency matrix.
+        adjacency_matrix = tmp->to_adjacency_matrix();
+    }
+
+    auto const size = adjacency_matrix->get_size()[0];
+    permutation_ = PermutationMatrix::create(cpu_exec, size);
+
+    // To make it explicit.
+    inv_permutation_ = nullptr;
+    if (parameters_.construct_inverse_permutation) {
+        inv_permutation_ = PermutationMatrix::create(cpu_exec, size);
+    }
+
+    rcm_reorder(
+        adjacency_matrix.get(), permutation_->get_permutation(),
+        inv_permutation_ ? inv_permutation_->get_permutation() : nullptr,
+        parameters_.strategy);
+
+    // Copy back results to gpu if necessary.
+    if (is_gpu_executor) {
+        const auto gpu_exec = this->get_executor();
+        auto gpu_perm = share(PermutationMatrix::create(gpu_exec, size));
+        gpu_perm->copy_from(permutation_);
+        permutation_ = gpu_perm;
+        if (inv_permutation_) {
+            auto gpu_inv_perm =
+                share(PermutationMatrix::create(gpu_exec, size));
+            gpu_inv_perm->copy_from(inv_permutation_);
+            inv_permutation_ = gpu_inv_perm;
+        }
+    }
+    auto permutation_array =
+        make_array_view(this->get_executor(), permutation_->get_size()[0],
+                        permutation_->get_permutation());
+    this->set_permutation_array(permutation_array);
 }
 
 
@@ -90,4 +154,102 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_RCM);
 
 
 }  // namespace reorder
+
+
+namespace experimental {
+namespace reorder {
+
+
+template <typename IndexType>
+Rcm<IndexType>::Rcm(std::shared_ptr<const Executor> exec,
+                    const parameters_type& params)
+    : EnablePolymorphicObject<Rcm, LinOpFactory>(std::move(exec)),
+      parameters_{params}
+{}
+
+
+template <typename IndexType>
+std::unique_ptr<matrix::Permutation<IndexType>> Rcm<IndexType>::generate(
+    std::shared_ptr<const LinOp> system_matrix) const
+{
+    auto product =
+        std::unique_ptr<permutation_type>(static_cast<permutation_type*>(
+            this->LinOpFactory::generate(std::move(system_matrix)).release()));
+    return product;
+}
+
+
+template <typename IndexType>
+std::unique_ptr<LinOp> Rcm<IndexType>::generate_impl(
+    std::shared_ptr<const LinOp> system_matrix) const
+{
+    GKO_ASSERT_IS_SQUARE_MATRIX(system_matrix);
+    const auto exec = this->get_executor();
+    const auto host_exec = exec->get_master();
+    const auto num_rows = system_matrix->get_size()[0];
+    using complex_scalar = matrix::Dense<std::complex<float>>;
+    using real_scalar = matrix::Dense<float>;
+    using complex_identity = matrix::Identity<std::complex<float>>;
+    using real_identity = matrix::Identity<float>;
+    using complex_mtx = matrix::Csr<std::complex<float>, IndexType>;
+    using real_mtx = matrix::Csr<float, IndexType>;
+    using sparsity_mtx = matrix::SparsityCsr<float, IndexType>;
+    std::unique_ptr<LinOp> converted;
+    // extract row pointers and column indices
+    IndexType* d_row_ptrs{};
+    IndexType* d_col_idxs{};
+    size_type d_nnz{};
+    if (auto convertible = dynamic_cast<const ConvertibleTo<complex_mtx>*>(
+            system_matrix.get())) {
+        auto conv_csr = complex_mtx::create(exec);
+        convertible->convert_to(conv_csr);
+        if (!parameters_.skip_symmetrize) {
+            auto scalar =
+                initialize<complex_scalar>({one<std::complex<float>>()}, exec);
+            auto id = complex_identity::create(exec, conv_csr->get_size()[0]);
+            // compute A^T + A
+            conv_csr->transpose()->apply(scalar, id, scalar, conv_csr);
+        }
+        d_nnz = conv_csr->get_num_stored_elements();
+        d_row_ptrs = conv_csr->get_row_ptrs();
+        d_col_idxs = conv_csr->get_col_idxs();
+        converted = std::move(conv_csr);
+    } else {
+        auto conv_csr = real_mtx::create(exec);
+        as<ConvertibleTo<real_mtx>>(system_matrix)->convert_to(conv_csr);
+        if (!parameters_.skip_symmetrize) {
+            auto scalar = initialize<real_scalar>({one<float>()}, exec);
+            auto id = real_identity::create(exec, conv_csr->get_size()[0]);
+            // compute A^T + A
+            conv_csr->transpose()->apply(scalar, id, scalar, conv_csr);
+        }
+        d_nnz = conv_csr->get_num_stored_elements();
+        d_row_ptrs = conv_csr->get_row_ptrs();
+        d_col_idxs = conv_csr->get_col_idxs();
+        converted = std::move(conv_csr);
+    }
+
+    array<IndexType> permutation(host_exec, num_rows);
+
+    // remove diagonal entries
+    auto pattern =
+        sparsity_mtx::create(exec, gko::dim<2>{num_rows, num_rows},
+                             make_array_view(exec, d_nnz, d_col_idxs),
+                             make_array_view(exec, num_rows + 1, d_row_ptrs));
+    pattern = pattern->to_adjacency_matrix();
+    rcm_reorder(pattern.get(), permutation.get_data(),
+                static_cast<IndexType*>(nullptr), parameters_.strategy);
+
+    // permutation gets copied to device via gko::array constructor
+    return permutation_type::create(exec, std::move(permutation));
+}
+
+
+#undef GKO_DECLARE_RCM
+#define GKO_DECLARE_RCM(IndexType) class Rcm<IndexType>
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM);
+
+
+}  // namespace reorder
+}  // namespace experimental
 }  // namespace gko
diff --git a/core/test/reorder/rcm.cpp b/core/test/reorder/rcm.cpp
index dfd90fe137a..8ffd04b0e7c 100644
--- a/core/test/reorder/rcm.cpp
+++ b/core/test/reorder/rcm.cpp
@@ -52,6 +52,7 @@ class Rcm : public ::testing::Test {
     using v_type = double;
     using i_type = int;
     using reorder_type = gko::reorder::Rcm<v_type, i_type>;
+    using new_reorder_type = gko::experimental::reorder::Rcm<i_type>;
 
     Rcm()
         : exec(gko::ReferenceExecutor::create()),
@@ -67,4 +68,25 @@ TEST_F(Rcm, RcmFactoryKnowsItsExecutor)
     ASSERT_EQ(this->rcm_factory->get_executor(), this->exec);
 }
 
+
+TEST_F(Rcm, NewInterfaceDefaults)
+{
+    auto param = new_reorder_type::build();
+
+    ASSERT_EQ(param.skip_symmetrize, false);
+    ASSERT_EQ(param.strategy,
+              gko::reorder::starting_strategy::pseudo_peripheral);
+}
+
+
+TEST_F(Rcm, NewInterfaceSetParameters)
+{
+    auto param =
+        new_reorder_type::build().with_skip_symmetrize(true).with_strategy(
+            gko::reorder::starting_strategy::minimum_degree);
+
+    ASSERT_EQ(param.skip_symmetrize, true);
+    ASSERT_EQ(param.strategy, gko::reorder::starting_strategy::minimum_degree);
+}
+
 }  // namespace
diff --git a/cuda/test/reorder/rcm_kernels.cpp b/cuda/test/reorder/rcm_kernels.cpp
index 169f26e208e..828c2f8f5bb 100644
--- a/cuda/test/reorder/rcm_kernels.cpp
+++ b/cuda/test/reorder/rcm_kernels.cpp
@@ -49,34 +49,39 @@ class Rcm : public CudaTestFixture {
     using i_type = int;
     using CsrMtx = gko::matrix::Csr<v_type, i_type>;
     using reorder_type = gko::reorder::Rcm<v_type, i_type>;
+    using new_reorder_type = gko::experimental::reorder::Rcm<i_type>;
     using perm_type = gko::matrix::Permutation<i_type>;
 
 
     Rcm()
-        :  // clang-format off
-          p_mtx(gko::initialize<CsrMtx>({{1.0, 2.0, 0.0, -1.3, 2.1},
+        : p_mtx(gko::initialize<CsrMtx>({{1.0, 2.0, 0.0, -1.3, 2.1},
                                          {2.0, 5.0, 1.5, 0.0, 0.0},
                                          {0.0, 1.5, 1.5, 1.1, 0.0},
                                          {-1.3, 0.0, 1.1, 2.0, 0.0},
                                          {2.1, 0.0, 0.0, 0.0, 1.0}},
-                                        exec)),
-          // clang-format on
-          rcm_factory(reorder_type::build().on(exec)),
-          reorder_op(rcm_factory->generate(p_mtx))
+                                        exec))
     {}
 
-    std::unique_ptr<reorder_type::Factory> rcm_factory;
     std::shared_ptr<CsrMtx> p_mtx;
-    std::unique_ptr<reorder_type> reorder_op;
 };
 
 
-TEST_F(Rcm, IsExecutedOnCpuExecutor)
+TEST_F(Rcm, IsEquivalentToRef)
 {
-    // This only executes successfully if computed on cpu executor.
-    auto p = reorder_op->get_permutation();
+    auto reorder_op = reorder_type::build().on(ref)->generate(p_mtx);
+    auto dreorder_op = reorder_type::build().on(exec)->generate(p_mtx);
 
-    ASSERT_TRUE(true);
+    GKO_ASSERT_ARRAY_EQ(dreorder_op->get_permutation_array(),
+                        reorder_op->get_permutation_array());
+}
+
+
+TEST_F(Rcm, IsEquivalentToRefNewInterface)
+{
+    auto reorder_op = new_reorder_type::build().on(ref)->generate(p_mtx);
+    auto dreorder_op = new_reorder_type::build().on(exec)->generate(p_mtx);
+
+    GKO_ASSERT_MTX_EQ_SPARSITY(dreorder_op, reorder_op);
 }
 
 
diff --git a/include/ginkgo/core/reorder/rcm.hpp b/include/ginkgo/core/reorder/rcm.hpp
index ab0807194c5..27e246c8f16 100644
--- a/include/ginkgo/core/reorder/rcm.hpp
+++ b/include/ginkgo/core/reorder/rcm.hpp
@@ -149,73 +149,9 @@ class Rcm : public EnablePolymorphicObject<Rcm<ValueType, IndexType>,
     GKO_ENABLE_BUILD_METHOD(Factory);
 
 protected:
-    /**
-     * Generates the permutation matrix and if required the inverse permutation
-     * matrix.
-     */
-    void generate(std::shared_ptr<const Executor>& exec,
-                  std::unique_ptr<SparsityMatrix> adjacency_matrix) const;
-
-    explicit Rcm(std::shared_ptr<const Executor> exec)
-        : EnablePolymorphicObject<Rcm, ReorderingBase<IndexType>>(
-              std::move(exec))
-    {}
-
-    explicit Rcm(const Factory* factory, const ReorderingBaseArgs& args)
-        : EnablePolymorphicObject<Rcm, ReorderingBase<IndexType>>(
-              factory->get_executor()),
-          parameters_{factory->get_parameters()}
-    {
-        // Always execute the reordering on the cpu.
-        const auto is_gpu_executor =
-            this->get_executor() != this->get_executor()->get_master();
-        auto cpu_exec = is_gpu_executor ? this->get_executor()->get_master()
-                                        : this->get_executor();
-
-        auto adjacency_matrix = SparsityMatrix::create(cpu_exec);
-        array<IndexType> degrees;
-
-        // The adjacency matrix has to be square.
-        GKO_ASSERT_IS_SQUARE_MATRIX(args.system_matrix);
-        const auto num_rows = args.system_matrix->get_size()[0];
-        // This is needed because it does not make sense to call the copy and
-        // convert if the existing matrix is empty.
-        if (args.system_matrix->get_size()) {
-            auto tmp = copy_and_convert_to<SparsityMatrix>(cpu_exec,
-                                                           args.system_matrix);
-            // This function provided within the Sparsity matrix format removes
-            // the diagonal elements and outputs an adjacency matrix.
-            adjacency_matrix = tmp->to_adjacency_matrix();
-        }
-
-        permutation_ = PermutationMatrix::create(cpu_exec, num_rows);
-
-        // To make it explicit.
-        inv_permutation_ = nullptr;
-        if (parameters_.construct_inverse_permutation) {
-            inv_permutation_ = PermutationMatrix::create(cpu_exec, num_rows);
-        }
-
-        this->generate(cpu_exec, std::move(adjacency_matrix));
-
-        // Copy back results to gpu if necessary.
-        if (is_gpu_executor) {
-            const auto gpu_exec = this->get_executor();
-            auto gpu_perm =
-                share(PermutationMatrix::create(gpu_exec, num_rows));
-            gpu_perm->copy_from(permutation_);
-            permutation_ = gpu_perm;
-            if (inv_permutation_) {
-                auto gpu_inv_perm =
-                    share(PermutationMatrix::create(gpu_exec, num_rows));
-                gpu_inv_perm->copy_from(inv_permutation_);
-                inv_permutation_ = gpu_inv_perm;
-            }
-        }
-        auto permutation_array = make_array_view(
-            this->get_executor(), num_rows, permutation_->get_permutation());
-        this->set_permutation_array(permutation_array);
-    }
+    explicit Rcm(std::shared_ptr<const Executor> exec);
+
+    explicit Rcm(const Factory* factory, const ReorderingBaseArgs& args);
 
 private:
     std::shared_ptr<PermutationMatrix> permutation_;
@@ -224,6 +160,71 @@ class Rcm : public EnablePolymorphicObject<Rcm<ValueType, IndexType>,
 
 
 }  // namespace reorder
+
+
+namespace experimental {
+namespace reorder {
+
+
+using rcm_starting_strategy = gko::reorder::starting_strategy;
+
+
+/**
+ * @copydoc gko::reorder::Rcm
+ */
+template <typename IndexType = int32>
+class Rcm : public EnablePolymorphicObject<Rcm<IndexType>, LinOpFactory>,
+            public EnablePolymorphicAssignment<Rcm<IndexType>> {
+public:
+    struct parameters_type;
+    friend class EnablePolymorphicObject<Rcm<IndexType>, LinOpFactory>;
+    friend class enable_parameters_type<parameters_type, Rcm<IndexType>>;
+
+    using index_type = IndexType;
+    using permutation_type = matrix::Permutation<index_type>;
+
+    struct parameters_type
+        : public enable_parameters_type<parameters_type, Rcm<IndexType>> {
+        /**
+         * If set to false, computes the RCM reordering on A + A^T, otherwise
+         * assumes that A is symmetric and uses it directly.
+         */
+        bool GKO_FACTORY_PARAMETER_SCALAR(skip_symmetrize, false);
+
+        /**
+         * This parameter controls the strategy used to determine a starting
+         * vertex.
+         */
+        rcm_starting_strategy GKO_FACTORY_PARAMETER_SCALAR(
+            strategy, rcm_starting_strategy::pseudo_peripheral);
+    };
+
+    /**
+     * @copydoc LinOpFactory::generate
+     * @note This function overrides the default LinOpFactory::generate to
+     *       return a Permutation instead of a generic LinOp, which would
+     *       need to be cast to Permutation again to access its indices.
+     *       It is only necessary because smart pointers aren't covariant.
+     */
+    std::unique_ptr<permutation_type> generate(
+        std::shared_ptr<const LinOp> system_matrix) const;
+
+    /** Creates a new parameter_type to set up the factory. */
+    static parameters_type build() { return {}; }
+
+protected:
+    explicit Rcm(std::shared_ptr<const Executor> exec,
+                 const parameters_type& params = {});
+
+    std::unique_ptr<LinOp> generate_impl(
+        std::shared_ptr<const LinOp> system_matrix) const override;
+
+    parameters_type parameters_;
+};
+
+
+}  // namespace reorder
+}  // namespace experimental
 }  // namespace gko
 
 
diff --git a/omp/test/reorder/rcm_kernels.cpp b/omp/test/reorder/rcm_kernels.cpp
index 48698ac1b49..9155ecb1a4c 100644
--- a/omp/test/reorder/rcm_kernels.cpp
+++ b/omp/test/reorder/rcm_kernels.cpp
@@ -62,6 +62,7 @@ class Rcm : public ::testing::Test {
     using Mtx = gko::matrix::Dense<v_type>;
     using CsrMtx = gko::matrix::Csr<v_type, i_type>;
     using reorder_type = gko::reorder::Rcm<v_type, i_type>;
+    using new_reorder_type = gko::experimental::reorder::Rcm<i_type>;
     using strategy = gko::reorder::starting_strategy;
     using perm_type = gko::matrix::Permutation<i_type>;
 
@@ -110,22 +111,22 @@ class Rcm : public ::testing::Test {
     }
 
     static bool is_valid_start_node(std::shared_ptr<CsrMtx> mtx,
-                                    std::shared_ptr<reorder_type> reorder,
-                                    i_type start,
-                                    std::vector<bool>& already_visited)
+                                    const i_type* permutation, i_type start,
+                                    std::vector<bool>& already_visited,
+                                    gko::reorder::starting_strategy strategy)
     {
         if (already_visited[start]) {
             return false;
         }
 
-        const auto n = reorder->get_permutation()->get_size()[0];
+        const auto n = mtx->get_size()[0];
         auto degrees = std::vector<i_type>(n);
         for (gko::size_type i = 0; i < n; ++i) {
             degrees[i] =
                 mtx->get_const_row_ptrs()[i + 1] - mtx->get_const_row_ptrs()[i];
         }
 
-        switch (reorder->get_parameters().strategy) {
+        switch (strategy) {
         case strategy::minimum_degree: {
             auto min_degree = std::numeric_limits<i_type>::max();
             for (gko::size_type i = 0; i < n; ++i) {
@@ -195,10 +196,10 @@ class Rcm : public ::testing::Test {
     }
 
     static bool is_rcm_ordered(std::shared_ptr<CsrMtx> mtx,
-                               std::shared_ptr<reorder_type> reorder)
+                               const i_type* permutation,
+                               gko::reorder::starting_strategy strategy)
     {
-        const auto n =
-            gko::as<perm_type>(reorder->get_permutation())->get_size()[0];
+        const auto n = mtx->get_size()[0];
         const auto row_ptrs = mtx->get_const_row_ptrs();
         const auto col_idxs = mtx->get_const_col_idxs();
         auto degrees = std::vector<i_type>(n);
@@ -209,14 +210,8 @@ class Rcm : public ::testing::Test {
 
         // Following checks for cm ordering, therefore create a reversed perm.
         auto perm = std::vector<i_type>(n);
-        std::copy_n(gko::as<perm_type>(reorder->get_permutation())
-                        ->get_const_permutation(),
-                    n, perm.begin());
-        for (gko::size_type i = 0; i < n / 2; ++i) {
-            const auto tmp = perm[i];
-            perm[i] = perm[n - i - 1];
-            perm[n - i - 1] = tmp;
-        }
+        std::copy_n(permutation, n, perm.begin());
+        std::reverse(perm.begin(), perm.end());
 
         // Now check for cm ordering.
 
@@ -224,8 +219,8 @@ class Rcm : public ::testing::Test {
         std::vector<bool> already_visited(n);
         while (base_offset != n) {
             // Assert valid start node.
-            if (!is_valid_start_node(mtx, reorder, perm[base_offset],
-                                     already_visited)) {
+            if (!is_valid_start_node(mtx, permutation, perm[base_offset],
+                                     already_visited, strategy)) {
                 return false;
             }
 
@@ -330,7 +325,30 @@ TEST_F(Rcm, OmpPermutationIsRcmOrdered)
 
     auto perm = d_reorder_op->get_permutation();
 
-    ASSERT_PRED2(is_rcm_ordered, d_1138_bus_mtx, d_reorder_op);
+    ASSERT_PRED3(is_rcm_ordered, d_1138_bus_mtx, perm->get_const_permutation(),
+                 d_reorder_op->get_parameters().strategy);
+}
+
+TEST_F(Rcm, OmpPermutationIsRcmOrderedMinDegree)
+{
+    d_reorder_op =
+        reorder_type::build()
+            .with_strategy(gko::reorder::starting_strategy::minimum_degree)
+            .on(omp)
+            ->generate(d_1138_bus_mtx);
+
+    auto perm = d_reorder_op->get_permutation();
+
+    ASSERT_PRED3(is_rcm_ordered, d_1138_bus_mtx, perm->get_const_permutation(),
+                 d_reorder_op->get_parameters().strategy);
+}
+
+TEST_F(Rcm, OmpPermutationIsRcmOrderedNewInterface)
+{
+    auto perm = new_reorder_type::build().on(omp)->generate(d_1138_bus_mtx);
+
+    ASSERT_PRED3(is_rcm_ordered, d_1138_bus_mtx, perm->get_const_permutation(),
+                 gko::reorder::starting_strategy::pseudo_peripheral);
 }
 
 }  // namespace
diff --git a/reference/test/reorder/rcm_kernels.cpp b/reference/test/reorder/rcm_kernels.cpp
index b23e8bec097..ec43de5e1f6 100644
--- a/reference/test/reorder/rcm_kernels.cpp
+++ b/reference/test/reorder/rcm_kernels.cpp
@@ -59,9 +59,9 @@ class Rcm : public ::testing::Test {
     using i_type = int;
     using CsrMtx = gko::matrix::Csr<v_type, i_type>;
     using reorder_type = gko::reorder::Rcm<v_type, i_type>;
+    using new_reorder_type = gko::experimental::reorder::Rcm<i_type>;
     using perm_type = gko::matrix::Permutation<i_type>;
 
-
     Rcm()
         : exec(gko::ReferenceExecutor::create()),
           // clang-format off
@@ -83,6 +83,17 @@ class Rcm : public ::testing::Test {
                                          {1., 0., 0., 0., 1., 0., 0., 1., 1.},
                                          {1., 0., 0., 1., 1., 0., 0., 1., 1.}},
                                         exec)),
+        p_mtx_1_lower(gko::initialize<CsrMtx>(
+                                        {{1., 0., 0., 0., 0., 0., 0., 0., 0.},
+                                         {0., 1., 0., 0., 0., 0., 0., 0., 0.},
+                                         {0., 1., 1., 0., 0., 0., 0., 0., 0.},
+                                         {1., 1., 0., 1., 0., 0., 0., 0., 0.},
+                                         {1., 0., 0., 1., 1., 0., 0., 0., 0.},
+                                         {1., 1., 1., 1., 1., 1., 0., 0., 0.},
+                                         {0., 1., 1., 1., 1., 1., 1., 0., 0.},
+                                         {1., 0., 0., 0., 1., 0., 0., 1., 0.},
+                                         {1., 0., 0., 1., 1., 0., 0., 1., 1.}},
+                                        exec)),
           // clang-format on
           rcm_factory(reorder_type::build().on(exec)),
           reorder_op_0(rcm_factory->generate(p_mtx_0)),
@@ -95,6 +106,7 @@ class Rcm : public ::testing::Test {
     std::unique_ptr<reorder_type> reorder_op_0;
     std::shared_ptr<CsrMtx> p_mtx_1;
     std::unique_ptr<reorder_type> reorder_op_1;
+    std::shared_ptr<CsrMtx> p_mtx_1_lower;
 
     static bool is_permutation(const perm_type* input_perm)
     {
@@ -140,4 +152,29 @@ TEST_F(Rcm, PermutesPerfectFullBand)
 }
 
 
+TEST_F(Rcm, NewInterfaceWorksOnSymmetric)
+{
+    std::vector<i_type> correct = {7, 8, 0, 4, 3, 5, 6, 1, 2};
+
+    auto permutation =
+        new_reorder_type::build().with_skip_symmetrize(true).on(exec)->generate(
+            p_mtx_1);
+
+    auto p = permutation->get_const_permutation();
+    ASSERT_TRUE(std::equal(p, p + correct.size(), correct.begin()));
+}
+
+
+TEST_F(Rcm, NewInterfaceWorksOnNonsymmetric)
+{
+    std::vector<i_type> correct = {7, 8, 0, 4, 3, 5, 6, 1, 2};
+
+    auto permutation =
+        new_reorder_type::build().on(exec)->generate(p_mtx_1_lower);
+
+    auto p = permutation->get_const_permutation();
+    ASSERT_TRUE(std::equal(p, p + correct.size(), correct.begin()));
+}
+
+
 }  // namespace

From 3c6edeb9092abfd124c01cdb687852bdb0cfc64e Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Fri, 13 Oct 2023 08:50:55 +0200
Subject: [PATCH 449/583] deprecate old interface

---
 include/ginkgo/core/reorder/rcm.hpp | 33 +++++++++++++++++++++++++----
 1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/include/ginkgo/core/reorder/rcm.hpp b/include/ginkgo/core/reorder/rcm.hpp
index 27e246c8f16..ae82f6a025e 100644
--- a/include/ginkgo/core/reorder/rcm.hpp
+++ b/include/ginkgo/core/reorder/rcm.hpp
@@ -74,6 +74,9 @@ enum class starting_strategy { minimum_degree, pseudo_peripheral };
  * objective of this class is to generate a reordering/permutation vector (in
  * the form of the Permutation matrix), which can be used to apply to reorder a
  * matrix as required.
+ * @deprecated  This class is deprecated and should be replaced by
+ * gko::experimental::reorder::Rcm, which integrates more cleanly with the other
+ * reordering-related functionality of Ginkgo.
  *
  * There are two "starting strategies" currently available: minimum degree and
  * pseudo-peripheral. These strategies control how a starting vertex for a
@@ -92,9 +95,10 @@ enum class starting_strategy { minimum_degree, pseudo_peripheral };
  * @ingroup reorder
  */
 template <typename ValueType = default_precision, typename IndexType = int32>
-class Rcm : public EnablePolymorphicObject<Rcm<ValueType, IndexType>,
-                                           ReorderingBase<IndexType>>,
-            public EnablePolymorphicAssignment<Rcm<ValueType, IndexType>> {
+class [[deprecated("use gko::experimental::reorder::Rcm instead")]] Rcm
+    : public EnablePolymorphicObject<Rcm<ValueType, IndexType>,
+                                     ReorderingBase<IndexType>>,
+      public EnablePolymorphicAssignment<Rcm<ValueType, IndexType>> {
     friend class EnablePolymorphicObject<Rcm, ReorderingBase<IndexType>>;
 
 public:
@@ -170,7 +174,28 @@ using rcm_starting_strategy = gko::reorder::starting_strategy;
 
 
 /**
- * @copydoc gko::reorder::Rcm
+ * Rcm is a reordering algorithm minimizing the bandwidth of a matrix. Such a
+ * reordering typically also significantly reduces fill-in, though usually not
+ * as effective as more complex algorithms, specifically AMD and nested
+ * dissection schemes. The advantage of this algorithm is its low runtime.
+ *
+ * The class is a LinOpFactory generating a Permutation matrix out of a Csr
+ * system matrix, to be used with `Csr::permute(...)`.
+ *
+ * There are two "starting strategies" currently available: minimum degree and
+ * pseudo-peripheral. These strategies control how a starting vertex for a
+ * connected component is chosen, which is then renumbered as first vertex in
+ * the component, starting the algorithm from there.
+ * In general, the bandwidths obtained by choosing a pseudo-peripheral vertex
+ * are slightly smaller than those obtained from choosing a vertex of minimum
+ * degree. On the other hand, this strategy is much more expensive, relatively.
+ * The algorithm for finding a pseudo-peripheral vertex as
+ * described in "Computer Solution of Sparse Linear Systems" (George, Liu, Ng,
+ * Oak Ridge National Laboratory, 1994) is implemented here.
+ *
+ * @tparam IndexType  Type of the indices of all matrices used in this class
+ *
+ * @ingroup reorder
  */
 template <typename IndexType = int32>
 class Rcm : public EnablePolymorphicObject<Rcm<IndexType>, LinOpFactory>,

From 78e15bd8dd3d82bbe23ade6ba77e2ae7d10f5e79 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Fri, 13 Oct 2023 09:00:22 +0200
Subject: [PATCH 450/583] simplify generation

---
 core/reorder/rcm.cpp | 40 +++++++++++++++-------------------------
 1 file changed, 15 insertions(+), 25 deletions(-)

diff --git a/core/reorder/rcm.cpp b/core/reorder/rcm.cpp
index 45eddec56bd..717c44c34d0 100644
--- a/core/reorder/rcm.cpp
+++ b/core/reorder/rcm.cpp
@@ -187,26 +187,22 @@ std::unique_ptr<LinOp> Rcm<IndexType>::generate_impl(
     const auto exec = this->get_executor();
     const auto host_exec = exec->get_master();
     const auto num_rows = system_matrix->get_size()[0];
-    using complex_scalar = matrix::Dense<std::complex<float>>;
-    using real_scalar = matrix::Dense<float>;
-    using complex_identity = matrix::Identity<std::complex<float>>;
-    using real_identity = matrix::Identity<float>;
-    using complex_mtx = matrix::Csr<std::complex<float>, IndexType>;
-    using real_mtx = matrix::Csr<float, IndexType>;
     using sparsity_mtx = matrix::SparsityCsr<float, IndexType>;
     std::unique_ptr<LinOp> converted;
     // extract row pointers and column indices
     IndexType* d_row_ptrs{};
     IndexType* d_col_idxs{};
     size_type d_nnz{};
-    if (auto convertible = dynamic_cast<const ConvertibleTo<complex_mtx>*>(
-            system_matrix.get())) {
-        auto conv_csr = complex_mtx::create(exec);
-        convertible->convert_to(conv_csr);
+    auto convert = [&](auto op, auto value_type) {
+        using ValueType = std::decay_t<decltype(value_type)>;
+        using Identity = matrix::Identity<ValueType>;
+        using Mtx = matrix::Csr<ValueType, IndexType>;
+        using Scalar = matrix::Dense<ValueType>;
+        auto conv_csr = matrix::Csr<ValueType, IndexType>::create(exec);
+        as<Mtx>(op)->convert_to(conv_csr);
         if (!parameters_.skip_symmetrize) {
-            auto scalar =
-                initialize<complex_scalar>({one<std::complex<float>>()}, exec);
-            auto id = complex_identity::create(exec, conv_csr->get_size()[0]);
+            auto scalar = initialize<Scalar>({one<ValueType>()}, exec);
+            auto id = Identity::create(exec, conv_csr->get_size()[0]);
             // compute A^T + A
             conv_csr->transpose()->apply(scalar, id, scalar, conv_csr);
         }
@@ -214,19 +210,13 @@ std::unique_ptr<LinOp> Rcm<IndexType>::generate_impl(
         d_row_ptrs = conv_csr->get_row_ptrs();
         d_col_idxs = conv_csr->get_col_idxs();
         converted = std::move(conv_csr);
+    };
+    if (auto convertible =
+            dynamic_cast<const ConvertibleTo<matrix::Csr<float, IndexType>>*>(
+                system_matrix.get())) {
+        convert(system_matrix, float{});
     } else {
-        auto conv_csr = real_mtx::create(exec);
-        as<ConvertibleTo<real_mtx>>(system_matrix)->convert_to(conv_csr);
-        if (!parameters_.skip_symmetrize) {
-            auto scalar = initialize<real_scalar>({one<float>()}, exec);
-            auto id = real_identity::create(exec, conv_csr->get_size()[0]);
-            // compute A^T + A
-            conv_csr->transpose()->apply(scalar, id, scalar, conv_csr);
-        }
-        d_nnz = conv_csr->get_num_stored_elements();
-        d_row_ptrs = conv_csr->get_row_ptrs();
-        d_col_idxs = conv_csr->get_col_idxs();
-        converted = std::move(conv_csr);
+        convert(system_matrix, std::complex<float>{});
     }
 
     array<IndexType> permutation(host_exec, num_rows);

From 1b52749d1f02c654ba239bb8dd224160683c2489 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Fri, 13 Oct 2023 09:00:31 +0200
Subject: [PATCH 451/583] disable deprecation warnings

---
 core/test/reorder/rcm.cpp         | 11 +++++++++++
 cuda/test/reorder/rcm_kernels.cpp | 11 +++++++++++
 omp/test/reorder/rcm_kernels.cpp  | 11 +++++++++++
 reference/test/reorder/rcm.cpp    | 11 +++++++++++
 4 files changed, 44 insertions(+)

diff --git a/core/test/reorder/rcm.cpp b/core/test/reorder/rcm.cpp
index 8ffd04b0e7c..f74d476c4d6 100644
--- a/core/test/reorder/rcm.cpp
+++ b/core/test/reorder/rcm.cpp
@@ -33,6 +33,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/reorder/rcm.hpp>
 
 
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
+#endif
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 5211, 4973, 4974)
+#endif
+
+
 #include <memory>
 
 
diff --git a/cuda/test/reorder/rcm_kernels.cpp b/cuda/test/reorder/rcm_kernels.cpp
index 828c2f8f5bb..e77d9b61275 100644
--- a/cuda/test/reorder/rcm_kernels.cpp
+++ b/cuda/test/reorder/rcm_kernels.cpp
@@ -33,6 +33,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/reorder/rcm.hpp>
 
 
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
+#endif
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 5211, 4973, 4974)
+#endif
+
+
 #include <gtest/gtest.h>
 
 
diff --git a/omp/test/reorder/rcm_kernels.cpp b/omp/test/reorder/rcm_kernels.cpp
index 9155ecb1a4c..a539ad4f275 100644
--- a/omp/test/reorder/rcm_kernels.cpp
+++ b/omp/test/reorder/rcm_kernels.cpp
@@ -33,6 +33,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/reorder/rcm.hpp>
 
 
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
+#endif
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 5211, 4973, 4974)
+#endif
+
+
 #include <algorithm>
 #include <deque>
 #include <fstream>
diff --git a/reference/test/reorder/rcm.cpp b/reference/test/reorder/rcm.cpp
index c7314c9e26d..704f4a342ed 100644
--- a/reference/test/reorder/rcm.cpp
+++ b/reference/test/reorder/rcm.cpp
@@ -33,6 +33,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/reorder/rcm.hpp>
 
 
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
+#endif
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 5211, 4973, 4974)
+#endif
+
+
 #include <algorithm>
 #include <fstream>
 #include <memory>

From f4874cf199cad21d1b749f4882ffddcaf8bfbed0 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Sat, 28 Oct 2023 09:14:07 -0400
Subject: [PATCH 452/583] fix deprecation warnings

---
 benchmark/sparse_blas/operations.cpp        |  7 ++++---
 benchmark/utils/general_matrix.hpp          |  6 ++----
 core/reorder/rcm.cpp                        | 21 ++++++++++++++++++++-
 core/test/reorder/scaled_reordered.cpp      | 19 +++++++++++++++++++
 reference/test/reorder/rcm_kernels.cpp      | 19 +++++++++++++++++++
 reference/test/reorder/scaled_reordered.cpp | 19 +++++++++++++++++++
 6 files changed, 83 insertions(+), 8 deletions(-)

diff --git a/benchmark/sparse_blas/operations.cpp b/benchmark/sparse_blas/operations.cpp
index 2ee766d4f83..8eb166f451c 100644
--- a/benchmark/sparse_blas/operations.cpp
+++ b/benchmark/sparse_blas/operations.cpp
@@ -691,7 +691,8 @@ class SymbolicCholeskyOperation : public BenchmarkOperation {
 
 
 class ReorderRcmOperation : public BenchmarkOperation {
-    using reorder_type = gko::reorder::Rcm<etype, itype>;
+    using reorder_type = gko::experimental::reorder::Rcm<itype>;
+    using permute_type = gko::matrix::Permutation<itype>;
 
 public:
     explicit ReorderRcmOperation(const Mtx* mtx)
@@ -715,8 +716,8 @@ class ReorderRcmOperation : public BenchmarkOperation {
 
 private:
     std::shared_ptr<Mtx> mtx_;
-    std::unique_ptr<reorder_type::Factory> factory_;
-    std::unique_ptr<reorder_type> reorder_;
+    std::unique_ptr<reorder_type> factory_;
+    std::unique_ptr<permute_type> reorder_;
 };
 
 
diff --git a/benchmark/utils/general_matrix.hpp b/benchmark/utils/general_matrix.hpp
index 2efbec77f99..914684ce6e4 100644
--- a/benchmark/utils/general_matrix.hpp
+++ b/benchmark/utils/general_matrix.hpp
@@ -91,11 +91,9 @@ std::unique_ptr<gko::matrix::Permutation<IndexType>> reorder(
                    ->generate(mtx);
 #endif
     } else if (FLAGS_reorder == "rcm") {
-        perm = gko::reorder::Rcm<ValueType, IndexType>::build()
+        perm = gko::experimental::reorder::Rcm<IndexType>::build()
                    .on(ref)
-                   ->generate(mtx)
-                   ->get_permutation()
-                   ->clone();
+                   ->generate(mtx);
     } else {
         throw std::runtime_error{"Unknown reordering algorithm " +
                                  FLAGS_reorder};
diff --git a/core/reorder/rcm.cpp b/core/reorder/rcm.cpp
index 717c44c34d0..a25bf11cec1 100644
--- a/core/reorder/rcm.cpp
+++ b/core/reorder/rcm.cpp
@@ -81,6 +81,17 @@ void rcm_reorder(matrix::SparsityCsr<ValueType, IndexType>* mtx,
 }
 
 
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
+#endif
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 5211, 4973, 4974)
+#endif
+
+
 template <typename ValueType, typename IndexType>
 Rcm<ValueType, IndexType>::Rcm(std::shared_ptr<const Executor> exec)
     : EnablePolymorphicObject<Rcm, ReorderingBase<IndexType>>(std::move(exec))
@@ -153,6 +164,14 @@ Rcm<ValueType, IndexType>::Rcm(const Factory* factory,
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_RCM);
 
 
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+
 }  // namespace reorder
 
 
@@ -199,7 +218,7 @@ std::unique_ptr<LinOp> Rcm<IndexType>::generate_impl(
         using Mtx = matrix::Csr<ValueType, IndexType>;
         using Scalar = matrix::Dense<ValueType>;
         auto conv_csr = matrix::Csr<ValueType, IndexType>::create(exec);
-        as<Mtx>(op)->convert_to(conv_csr);
+        as<ConvertibleTo<Mtx>>(op)->convert_to(conv_csr);
         if (!parameters_.skip_symmetrize) {
             auto scalar = initialize<Scalar>({one<ValueType>()}, exec);
             auto id = Identity::create(exec, conv_csr->get_size()[0]);
diff --git a/core/test/reorder/scaled_reordered.cpp b/core/test/reorder/scaled_reordered.cpp
index 183bfd7ef3d..7e4e99a2afb 100644
--- a/core/test/reorder/scaled_reordered.cpp
+++ b/core/test/reorder/scaled_reordered.cpp
@@ -46,6 +46,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/solver/bicgstab.hpp>
 
 
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
+#endif
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 5211, 4973, 4974)
+#endif
+
+
 namespace {
 
 
@@ -132,3 +143,11 @@ TEST_F(ScaledReorderedFactory, CanSetColScaling)
 
 
 }  // namespace
+
+
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
diff --git a/reference/test/reorder/rcm_kernels.cpp b/reference/test/reorder/rcm_kernels.cpp
index ec43de5e1f6..28cdab3a793 100644
--- a/reference/test/reorder/rcm_kernels.cpp
+++ b/reference/test/reorder/rcm_kernels.cpp
@@ -50,6 +50,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/test/utils/assertions.hpp"
 
 
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
+#endif
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 5211, 4973, 4974)
+#endif
+
+
 namespace {
 
 
@@ -178,3 +189,11 @@ TEST_F(Rcm, NewInterfaceWorksOnNonsymmetric)
 
 
 }  // namespace
+
+
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
diff --git a/reference/test/reorder/scaled_reordered.cpp b/reference/test/reorder/scaled_reordered.cpp
index edadc245b33..598073f4918 100644
--- a/reference/test/reorder/scaled_reordered.cpp
+++ b/reference/test/reorder/scaled_reordered.cpp
@@ -54,6 +54,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/test/utils.hpp"
 
 
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
+#endif
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 5211, 4973, 4974)
+#endif
+
+
 namespace {
 
 
@@ -568,3 +579,11 @@ TYPED_TEST(ScaledReordered, SolvesMultipleRhs)
 
 
 }  // namespace
+
+
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif

From 25f1dbac9540ada0df7b6ae4a40498aa32ab5a67 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Sat, 28 Oct 2023 11:56:30 -0400
Subject: [PATCH 453/583] add documentation

---
 include/ginkgo/core/reorder/rcm.hpp | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/include/ginkgo/core/reorder/rcm.hpp b/include/ginkgo/core/reorder/rcm.hpp
index ae82f6a025e..6ece0bcaf4c 100644
--- a/include/ginkgo/core/reorder/rcm.hpp
+++ b/include/ginkgo/core/reorder/rcm.hpp
@@ -64,10 +64,12 @@ enum class starting_strategy { minimum_degree, pseudo_peripheral };
 
 
 /**
- * Rcm is a reordering algorithm minimizing the bandwidth of a matrix. Such a
- * reordering typically also significantly reduces fill-in, though usually not
- * as effective as more complex algorithms, specifically AMD and nested
- * dissection schemes. The advantage of this algorithm is its low runtime.
+ * Rcm (Reverse Cuthill-McKee) is a reordering algorithm minimizing the
+ * bandwidth of a matrix. Such a reordering typically also significantly reduces
+ * fill-in, though usually not as effective as more complex algorithms,
+ * specifically AMD and nested dissection schemes. The advantage of this
+ * algorithm is its low runtime.
+ * It requires the input matrix to be structurally symmetric.
  *
  * @note  This class is derived from polymorphic object but is not a LinOp as it
  * does not make sense for this class to implement the apply methods. The
@@ -174,10 +176,11 @@ using rcm_starting_strategy = gko::reorder::starting_strategy;
 
 
 /**
- * Rcm is a reordering algorithm minimizing the bandwidth of a matrix. Such a
- * reordering typically also significantly reduces fill-in, though usually not
- * as effective as more complex algorithms, specifically AMD and nested
- * dissection schemes. The advantage of this algorithm is its low runtime.
+ * Rcm (Reverse Cuthill-McKee) is a reordering algorithm minimizing the
+ * bandwidth of a matrix. Such a reordering typically also significantly reduces
+ * fill-in, though usually not as effective as more complex algorithms,
+ * specifically AMD and nested dissection schemes. The advantage of this
+ * algorithm is its low runtime.
  *
  * The class is a LinOpFactory generating a Permutation matrix out of a Csr
  * system matrix, to be used with `Csr::permute(...)`.

From b422f111e70a89b62526fcac92a5ce37310922ea Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Sun, 29 Oct 2023 15:55:58 -0400
Subject: [PATCH 454/583] fix cross-executor copy in RCM

---
 core/reorder/rcm.cpp | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/core/reorder/rcm.cpp b/core/reorder/rcm.cpp
index a25bf11cec1..970a64bdedf 100644
--- a/core/reorder/rcm.cpp
+++ b/core/reorder/rcm.cpp
@@ -66,7 +66,7 @@ GKO_REGISTER_OPERATION(get_degree_of_nodes, rcm::get_degree_of_nodes);
 
 
 template <typename ValueType, typename IndexType>
-void rcm_reorder(matrix::SparsityCsr<ValueType, IndexType>* mtx,
+void rcm_reorder(const matrix::SparsityCsr<ValueType, IndexType>* mtx,
                  IndexType* permutation, IndexType* inv_permutation,
                  starting_strategy strategy)
 {
@@ -209,15 +209,15 @@ std::unique_ptr<LinOp> Rcm<IndexType>::generate_impl(
     using sparsity_mtx = matrix::SparsityCsr<float, IndexType>;
     std::unique_ptr<LinOp> converted;
     // extract row pointers and column indices
-    IndexType* d_row_ptrs{};
-    IndexType* d_col_idxs{};
-    size_type d_nnz{};
+    const IndexType* row_ptrs{};
+    const IndexType* col_idxs{};
+    size_type nnz{};
     auto convert = [&](auto op, auto value_type) {
         using ValueType = std::decay_t<decltype(value_type)>;
         using Identity = matrix::Identity<ValueType>;
         using Mtx = matrix::Csr<ValueType, IndexType>;
         using Scalar = matrix::Dense<ValueType>;
-        auto conv_csr = matrix::Csr<ValueType, IndexType>::create(exec);
+        auto conv_csr = Mtx::create(host_exec);
         as<ConvertibleTo<Mtx>>(op)->convert_to(conv_csr);
         if (!parameters_.skip_symmetrize) {
             auto scalar = initialize<Scalar>({one<ValueType>()}, exec);
@@ -225,9 +225,12 @@ std::unique_ptr<LinOp> Rcm<IndexType>::generate_impl(
             // compute A^T + A
             conv_csr->transpose()->apply(scalar, id, scalar, conv_csr);
         }
-        d_nnz = conv_csr->get_num_stored_elements();
-        d_row_ptrs = conv_csr->get_row_ptrs();
-        d_col_idxs = conv_csr->get_col_idxs();
+        if (exec != host_exec) {
+            conv_csr = gko::clone(host_exec, std::move(conv_csr));
+        }
+        nnz = conv_csr->get_num_stored_elements();
+        row_ptrs = conv_csr->get_const_row_ptrs();
+        col_idxs = conv_csr->get_const_col_idxs();
         converted = std::move(conv_csr);
     };
     if (auto convertible =
@@ -241,10 +244,10 @@ std::unique_ptr<LinOp> Rcm<IndexType>::generate_impl(
     array<IndexType> permutation(host_exec, num_rows);
 
     // remove diagonal entries
-    auto pattern =
-        sparsity_mtx::create(exec, gko::dim<2>{num_rows, num_rows},
-                             make_array_view(exec, d_nnz, d_col_idxs),
-                             make_array_view(exec, num_rows + 1, d_row_ptrs));
+    auto pattern = sparsity_mtx::create_const(
+        host_exec, gko::dim<2>{num_rows, num_rows},
+        make_const_array_view(host_exec, nnz, col_idxs),
+        make_const_array_view(host_exec, num_rows + 1, row_ptrs));
     pattern = pattern->to_adjacency_matrix();
     rcm_reorder(pattern.get(), permutation.get_data(),
                 static_cast<IndexType*>(nullptr), parameters_.strategy);

From ff933086e0987249022b6c7783f656e5dcdd5e24 Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Sun, 29 Oct 2023 20:15:58 +0000
Subject: [PATCH 455/583] Format files

Co-authored-by: Tobias Ribizel <upsj@users.noreply.github.com>
---
 include/ginkgo/core/reorder/rcm.hpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/ginkgo/core/reorder/rcm.hpp b/include/ginkgo/core/reorder/rcm.hpp
index 6ece0bcaf4c..5556ae693e5 100644
--- a/include/ginkgo/core/reorder/rcm.hpp
+++ b/include/ginkgo/core/reorder/rcm.hpp
@@ -100,7 +100,8 @@ template <typename ValueType = default_precision, typename IndexType = int32>
 class [[deprecated("use gko::experimental::reorder::Rcm instead")]] Rcm
     : public EnablePolymorphicObject<Rcm<ValueType, IndexType>,
                                      ReorderingBase<IndexType>>,
-      public EnablePolymorphicAssignment<Rcm<ValueType, IndexType>> {
+      public EnablePolymorphicAssignment<Rcm<ValueType, IndexType>>
+{
     friend class EnablePolymorphicObject<Rcm, ReorderingBase<IndexType>>;
 
 public:

From 8584dac2ea23083fa4a126954e0b2f56e7e01223 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Sun, 29 Oct 2023 16:25:15 -0400
Subject: [PATCH 456/583] fix gcc 5.5 build

---
 omp/test/reorder/rcm_kernels.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/omp/test/reorder/rcm_kernels.cpp b/omp/test/reorder/rcm_kernels.cpp
index a539ad4f275..66aff074d65 100644
--- a/omp/test/reorder/rcm_kernels.cpp
+++ b/omp/test/reorder/rcm_kernels.cpp
@@ -74,7 +74,6 @@ class Rcm : public ::testing::Test {
     using CsrMtx = gko::matrix::Csr<v_type, i_type>;
     using reorder_type = gko::reorder::Rcm<v_type, i_type>;
     using new_reorder_type = gko::experimental::reorder::Rcm<i_type>;
-    using strategy = gko::reorder::starting_strategy;
     using perm_type = gko::matrix::Permutation<i_type>;
 
     Rcm()
@@ -138,7 +137,7 @@ class Rcm : public ::testing::Test {
         }
 
         switch (strategy) {
-        case strategy::minimum_degree: {
+        case gko::reorder::starting_strategy::minimum_degree: {
             auto min_degree = std::numeric_limits<i_type>::max();
             for (gko::size_type i = 0; i < n; ++i) {
                 if (!already_visited[i] && degrees[i] < min_degree) {
@@ -151,7 +150,7 @@ class Rcm : public ::testing::Test {
             break;
         }
 
-        case strategy::pseudo_peripheral: {
+        case gko::reorder::starting_strategy::pseudo_peripheral: {
             // Check if any valid contender has a lowereq height than the
             // selected start node.
 

From f9ceef691878818ab8d12037d971560a8755516f Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Tue, 31 Oct 2023 14:54:33 +0100
Subject: [PATCH 457/583] replaces try-run with try-compile in OpenMPI check

try-run can't be run in cross-compiling mode, which happens for example on Macs. So, the sufficient OpenMPI version check has been re-written to run at compile time.
---
 CMakeLists.txt         | 14 +++++++++++---
 cmake/openmpi_test.cpp | 12 +++++++-----
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9eca64fa3fd..dea653642e4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -242,14 +242,22 @@ if(GINKGO_BUILD_MPI)
         set(GINKGO_HAVE_GPU_AWARE_MPI OFF)
     endif()
 
-    try_run(uses_openmpi gko_result_unused
+    # use try_compile instead of try_run to prevent cross-compiling issues
+    try_compile(uses_openmpi
         ${Ginkgo_BINARY_DIR}
         ${Ginkgo_SOURCE_DIR}/cmake/openmpi_test.cpp
+                COMPILE_DEFINITIONS -DCHECK_HAS_OPEN_MPI=1
         LINK_LIBRARIES MPI::MPI_CXX
-        RUN_OUTPUT_VARIABLE openmpi_version
+                OUTPUT_VARIABLE output
         )
     if(uses_openmpi)
-        if(openmpi_version VERSION_LESS "4.1")
+        try_compile(valid_openmpi_version
+                    ${Ginkgo_BINARY_DIR}
+                    ${Ginkgo_SOURCE_DIR}/cmake/openmpi_test.cpp
+                    COMPILE_DEFINITIONS -DCHECK_OPEN_MPI_VERSION=1
+                    LINK_LIBRARIES MPI::MPI_CXX
+        )
+        if(NOT valid_openmpi_version)
             message(WARNING
                 "OpenMPI v4.0.x has a bug that forces us to use blocking communication in our distributed "
                 "matrix class. To enable faster, non-blocking communication, consider updating your OpenMPI version or "
diff --git a/cmake/openmpi_test.cpp b/cmake/openmpi_test.cpp
index 3b6f33dd5d0..7580a908b6c 100644
--- a/cmake/openmpi_test.cpp
+++ b/cmake/openmpi_test.cpp
@@ -38,11 +38,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 int main()
 {
-#if defined(OPEN_MPI) && OPEN_MPI
-    std::printf("%d.%d.%d", OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
-                OMPI_RELEASE_VERSION);
-    return 1;
+#if CHECK_HAS_OPEN_MPI && defined(OPEN_MPI) && OPEN_MPI
+    static_assert(true);
+#elif CHECK_OPEN_MPI_VERSION
+    static_assert(OMPI_MAJOR_VERSION > 4 ||
+                  (OMPI_MAJOR_VERSION == 4 && OMPI_MINOR_VERSION >= 1));
 #else
-    return 0;
+    static_assert(false);
 #endif
+    return 1;
 }

From 1882578d00c198887c12a1072e6501691111d961 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Tue, 31 Oct 2023 15:08:30 +0100
Subject: [PATCH 458/583] review updates:
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- unused variable
- correct return code
- consistent ifdef check
- don't use c++17

Co-authored-by: Yu-Hsiang M. Tsai <yhmtsai@gmail.com>
Co-authored-by: Pratik Nayak <pratik.nayak@kit.edu>
Co-authored-by: Tobias Ribizel <ribizel@kit.edu>
Co-authored-by: Thomas Grützmacher <thogru.kit@gmx.de>
---
 CMakeLists.txt         |  1 -
 cmake/openmpi_test.cpp | 10 +++++-----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index dea653642e4..e4ffbc4efd5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -248,7 +248,6 @@ if(GINKGO_BUILD_MPI)
         ${Ginkgo_SOURCE_DIR}/cmake/openmpi_test.cpp
                 COMPILE_DEFINITIONS -DCHECK_HAS_OPEN_MPI=1
         LINK_LIBRARIES MPI::MPI_CXX
-                OUTPUT_VARIABLE output
         )
     if(uses_openmpi)
         try_compile(valid_openmpi_version
diff --git a/cmake/openmpi_test.cpp b/cmake/openmpi_test.cpp
index 7580a908b6c..94b2774503b 100644
--- a/cmake/openmpi_test.cpp
+++ b/cmake/openmpi_test.cpp
@@ -39,12 +39,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 int main()
 {
 #if CHECK_HAS_OPEN_MPI && defined(OPEN_MPI) && OPEN_MPI
-    static_assert(true);
-#elif CHECK_OPEN_MPI_VERSION
+    static_assert(true, "Check availability of OpenMPI");
+#elif CHECK_OPEN_MPI_VERSION && defined(OPEN_MPI) && OPEN_MPI
     static_assert(OMPI_MAJOR_VERSION > 4 ||
-                  (OMPI_MAJOR_VERSION == 4 && OMPI_MINOR_VERSION >= 1));
+                      (OMPI_MAJOR_VERSION == 4 && OMPI_MINOR_VERSION >= 1),
+                  "Check OpenMPI version.");
 #else
-    static_assert(false);
+    static_assert(false, "No OpenMPI available");
 #endif
-    return 1;
 }

From cd3d2e21e66a6de8f14b6c85ef342baaeec34d6d Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Tue, 31 Oct 2023 16:32:10 +0100
Subject: [PATCH 459/583] pass slurm job failure

slurm always reports error even if all tests are passed.
It should be fixed or find alternative way to do it in the future
---
 .gitlab-ci.yml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index ffd037e45ff..ab78943a409 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -265,6 +265,8 @@ test/cuda110/mvapich2/gcc/cuda/debug/shared:
     SLURM_GRES: "gpu:4"
     SLURM_TIME: "02:00:00"
   dependencies: null
+  # FIXME: current slurm always reports failure even if all tests are passed.
+  allow_failure: yes
   needs: [ "build/cuda110/mvapich2/gcc/cuda/debug/shared" ]
 
 
@@ -298,6 +300,8 @@ test/cuda110/nompi/clang/cuda/release/static:
     SLURM_GRES: "gpu:4"
     SLURM_TIME: "01:30:00"
   dependencies: null
+  # FIXME: current slurm always reports failure even if all tests are passed.
+  allow_failure: yes
   needs: [ "build/cuda110/nompi/clang/cuda/release/static" ]
 
 
@@ -332,6 +336,8 @@ test/cuda110/nompi/intel/cuda/debug/static:
     SLURM_GRES: "gpu:4"
     SLURM_TIME: "02:00:00"
   dependencies: null
+  # FIXME: current slurm always reports failure even if all tests are passed.
+  allow_failure: yes
   needs: [ "build/cuda110/nompi/intel/cuda/debug/static" ]
 
 

From 4448a5acbaf513e079c0b3fe3c6a512381d9f356 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sat, 21 Oct 2023 20:32:34 +0200
Subject: [PATCH 460/583] Add batch::dense const, update batch struct

---
 core/matrix/batch_dense.cpp                |  8 +++++++
 core/matrix/batch_struct.hpp               | 26 ++++++++++++++++------
 core/test/matrix/batch_dense.cpp           | 13 +++++++++++
 include/ginkgo/core/base/batch_lin_op.hpp  |  6 +++--
 include/ginkgo/core/matrix/batch_dense.hpp | 14 ++++++++++--
 5 files changed, 56 insertions(+), 11 deletions(-)

diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp
index 58c7fa25cea..615fd1d8a4b 100644
--- a/core/matrix/batch_dense.cpp
+++ b/core/matrix/batch_dense.cpp
@@ -116,6 +116,14 @@ Dense<ValueType>::Dense(std::shared_ptr<const Executor> exec,
 {}
 
 
+template <typename ValueType>
+Dense<ValueType>::Dense(std::shared_ptr<const Executor> exec,
+                        const size_type num_batch_items,
+                        const dim<2>& common_size)
+    : Dense(exec, batch_dim<2>(num_batch_items, common_size))
+{}
+
+
 template <typename ValueType>
 Dense<ValueType>* Dense<ValueType>::apply(
     ptr_param<const MultiVector<ValueType>> b,
diff --git a/core/matrix/batch_struct.hpp b/core/matrix/batch_struct.hpp
index f208f5ff078..575c511d051 100644
--- a/core/matrix/batch_struct.hpp
+++ b/core/matrix/batch_struct.hpp
@@ -56,6 +56,7 @@ struct batch_item {
     int32 stride;
     int32 num_rows;
     int32 num_cols;
+    int32 num_nnz = num_rows * stride;
 };
 
 
@@ -72,10 +73,16 @@ struct uniform_batch {
     int32 stride;
     int32 num_rows;
     int32 num_cols;
+    int32 num_nnz = num_rows * stride;
 
-    size_type get_entry_storage() const
+    inline size_type get_num_nnz() const
     {
-        return num_rows * stride * sizeof(value_type);
+        return static_cast<size_type>(stride * num_rows);
+    }
+
+    inline size_type get_entry_storage() const
+    {
+        return get_num_nnz() * sizeof(value_type);
     }
 };
 
@@ -120,9 +127,14 @@ struct uniform_batch {
     index_type num_cols;
     index_type num_stored_elems_per_row;
 
-    size_type get_entry_storage() const
+    inline size_type get_num_nnz() const
+    {
+        return static_cast<size_type>(stride * num_stored_elems_per_row);
+    }
+
+    inline size_type get_entry_storage() const
     {
-        return num_rows * num_stored_elems_per_row * sizeof(value_type);
+        return get_num_nnz() * sizeof(value_type);
     }
 };
 
@@ -165,8 +177,8 @@ GKO_ATTRIBUTES GKO_INLINE dense::batch_item<ValueType> extract_batch_item(
 
 
 template <typename ValueType, typename IndexType>
-GKO_ATTRIBUTES GKO_INLINE ell::batch_item<const ValueType, IndexType> to_const(
-    const ell::batch_item<ValueType, IndexType>& b)
+GKO_ATTRIBUTES GKO_INLINE ell::batch_item<const ValueType, const IndexType>
+to_const(const ell::batch_item<ValueType, IndexType>& b)
 {
     return {b.values,   b.col_idxs, b.stride,
             b.num_rows, b.num_cols, b.num_stored_elems_per_row};
@@ -174,7 +186,7 @@ GKO_ATTRIBUTES GKO_INLINE ell::batch_item<const ValueType, IndexType> to_const(
 
 
 template <typename ValueType, typename IndexType>
-GKO_ATTRIBUTES GKO_INLINE ell::uniform_batch<const ValueType, IndexType>
+GKO_ATTRIBUTES GKO_INLINE ell::uniform_batch<const ValueType, const IndexType>
 to_const(const ell::uniform_batch<ValueType, IndexType>& ub)
 {
     return {ub.values,   ub.col_idxs, ub.num_batch_items,         ub.stride,
diff --git a/core/test/matrix/batch_dense.cpp b/core/test/matrix/batch_dense.cpp
index adeddbcc994..d02751b3840 100644
--- a/core/test/matrix/batch_dense.cpp
+++ b/core/test/matrix/batch_dense.cpp
@@ -191,6 +191,19 @@ TYPED_TEST(Dense, CanBeConstructedWithSize)
 }
 
 
+TYPED_TEST(Dense, CanBeConstructedWithSizeAndNumItems)
+{
+    using size_type = gko::size_type;
+
+    auto m = gko::batch::matrix::Dense<TypeParam>::create(this->exec, 2,
+                                                          gko::dim<2>{5, 3});
+
+    ASSERT_EQ(m->get_num_batch_items(), 2);
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(5, 3));
+    ASSERT_EQ(m->get_num_stored_elements(), 30);
+}
+
+
 TYPED_TEST(Dense, CanBeConstructedFromExistingData)
 {
     using value_type = typename TestFixture::value_type;
diff --git a/include/ginkgo/core/base/batch_lin_op.hpp b/include/ginkgo/core/base/batch_lin_op.hpp
index a0efb2ea324..497936374a2 100644
--- a/include/ginkgo/core/base/batch_lin_op.hpp
+++ b/include/ginkgo/core/base/batch_lin_op.hpp
@@ -229,7 +229,8 @@ class BatchLinOpFactory
     std::unique_ptr<BatchLinOp> generate(
         std::shared_ptr<const BatchLinOp> input) const
     {
-        this->template log<log::Logger::batch_linop_factory_generate_started>(
+        this->template log<
+            gko::log::Logger::batch_linop_factory_generate_started>(
             this, input.get());
         const auto exec = this->get_executor();
         std::unique_ptr<BatchLinOp> generated;
@@ -239,7 +240,8 @@ class BatchLinOpFactory
             generated =
                 this->AbstractFactory::generate(gko::clone(exec, input));
         }
-        this->template log<log::Logger::batch_linop_factory_generate_completed>(
+        this->template log<
+            gko::log::Logger::batch_linop_factory_generate_completed>(
             this, input.get(), generated.get());
         return generated;
     }
diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp
index 47230c24e32..559d53564e7 100644
--- a/include/ginkgo/core/matrix/batch_dense.hpp
+++ b/include/ginkgo/core/matrix/batch_dense.hpp
@@ -224,8 +224,8 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const value_type* get_const_values_for_item(
-        size_type batch_id) const noexcept
+    const value_type* get_const_values_for_item(size_type batch_id) const
+        noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
         return values_.get_const_data() + this->get_cumulative_offset(batch_id);
@@ -316,6 +316,16 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
     Dense(std::shared_ptr<const Executor> exec,
           const batch_dim<2>& size = batch_dim<2>{});
 
+    /**
+     * Creates an uninitialized Dense matrix of the specified size.
+     *
+     * @param exec  Executor associated to the matrix
+     * @param num_batch_items  number of items in the batch
+     * @param common_size  common size of the items in the batch
+     */
+    Dense(std::shared_ptr<const Executor> exec, const size_type num_batch_items,
+          const gko::dim<2>& common_size = gko::dim<2>{});
+
     /**
      * Creates a Dense matrix from an already allocated (and initialized)
      * array.

From 9c3d0add698c6128e593cfc447d6c177df15bc49 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sat, 21 Oct 2023 20:34:14 +0200
Subject: [PATCH 461/583] Add BatchBicgstab core, classes and dispatch

Co-authored-by: Aditya Kashi <kashia@ornl.gov>
---
 core/solver/batch_bicgstab.cpp                |  95 ++++++
 core/solver/batch_bicgstab_kernels.hpp        | 237 ++++++++++++++
 core/solver/batch_dispatch.hpp                | 294 ++++++++++++++++++
 core/solver/batch_solver_base.hpp             | 131 ++++++++
 .../ginkgo/core/base/exception_helpers.hpp    |  21 ++
 include/ginkgo/core/solver/batch_bicgstab.hpp | 145 +++++++++
 .../ginkgo/core/solver/batch_solver_base.hpp  | 276 ++++++++++++++++
 include/ginkgo/core/stop/batch_stop_enum.hpp  |  49 +++
 8 files changed, 1248 insertions(+)
 create mode 100644 core/solver/batch_bicgstab.cpp
 create mode 100644 core/solver/batch_bicgstab_kernels.hpp
 create mode 100644 core/solver/batch_dispatch.hpp
 create mode 100644 core/solver/batch_solver_base.hpp
 create mode 100644 include/ginkgo/core/solver/batch_bicgstab.hpp
 create mode 100644 include/ginkgo/core/solver/batch_solver_base.hpp
 create mode 100644 include/ginkgo/core/stop/batch_stop_enum.hpp

diff --git a/core/solver/batch_bicgstab.cpp b/core/solver/batch_bicgstab.cpp
new file mode 100644
index 00000000000..05f14c5b357
--- /dev/null
+++ b/core/solver/batch_bicgstab.cpp
@@ -0,0 +1,95 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/solver/batch_bicgstab.hpp>
+
+
+#include <ginkgo/core/base/batch_lin_op.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+
+
+#include "core/base/batch_multi_vector_kernels.hpp"
+#include "core/solver/batch_bicgstab_kernels.hpp"
+
+
+namespace gko {
+namespace batch {
+namespace solver {
+namespace bicgstab {
+
+
+GKO_REGISTER_OPERATION(apply, batch_bicgstab::apply);
+
+
+}  // namespace bicgstab
+
+
+template <typename ValueType>
+void BatchBicgstab<ValueType>::solver_apply(const MultiVector<ValueType>* b,
+                                            MultiVector<ValueType>* x,
+                                            BatchInfo* const info) const
+{
+    using MVec = MultiVector<ValueType>;
+    const kernels::batch_bicgstab::BatchBicgstabOptions<
+        remove_complex<ValueType>>
+        opts{this->max_iterations_, static_cast<real_type>(this->residual_tol_),
+             parameters_.tolerance_type};
+    auto exec = this->get_executor();
+    exec->run(bicgstab::make_apply(
+        opts, this->system_matrix_.get(), this->preconditioner_.get(),
+        as<const MVec>(b), as<MVec>(x),
+        *as<log::BatchLogData<double>>(info->logdata.get())));
+}
+
+
+#define GKO_DECLARE_BATCH_BICGSTAB(_type) class BatchBicgstab<_type>
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_BICGSTAB);
+
+
+// #define GKO_DECLARE_BATCH_BICGSTAB_APPLY_FUNCTIONS(_type)                   \
+//     EnableBatchSolver<BatchBicgstab<_type>, _type, BatchLinOp>::            \
+//         EnableBatchSolver(std::shared_ptr<const Executor> exec,             \
+//                           std::shared_ptr<const BatchLinOp> system_matrix,  \
+//                           detail::common_batch_params common_params);       \
+//     template void                                                           \
+//     EnableBatchSolver<BatchBicgstab<_type>, _type, BatchLinOp>::apply_impl( \
+//         const MultiVector<_type>* b, MultiVector<_type>* x) const;          \
+//     template void                                                           \
+//     EnableBatchSolver<BatchBicgstab<_type>, _type, BatchLinOp>::apply_impl( \
+//         const MultiVector<_type>* alpha, const MultiVector<_type>* b,       \
+//         const MultiVector<_type>* beta, MultiVector<_type>* x) const
+// GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_BICGSTAB_APPLY_FUNCTIONS);
+
+
+}  // namespace solver
+}  // namespace batch
+}  // namespace gko
diff --git a/core/solver/batch_bicgstab_kernels.hpp b/core/solver/batch_bicgstab_kernels.hpp
new file mode 100644
index 00000000000..0fe429f6464
--- /dev/null
+++ b/core/solver/batch_bicgstab_kernels.hpp
@@ -0,0 +1,237 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CORE_SOLVER_BATCH_BICGSTAB_KERNELS_HPP_
+#define GKO_CORE_SOLVER_BATCH_BICGSTAB_KERNELS_HPP_
+
+
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/log/batch_logger.hpp>
+#include <ginkgo/core/matrix/batch_dense.hpp>
+#include <ginkgo/core/matrix/batch_ell.hpp>
+#include <ginkgo/core/stop/batch_stop_enum.hpp>
+
+
+#include "core/base/kernel_declaration.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace batch_bicgstab {
+
+
+/**
+ * Options controlling the batch Bicgstab solver.
+ */
+template <typename RealType>
+struct BatchBicgstabOptions {
+    int max_its;
+    RealType residual_tol;
+    ::gko::batch::stop::ToleranceType tol_type;
+};
+
+
+/**
+ * Calculates the amount of in-solver storage needed by batch-Bicgstab.
+ *
+ * The calculation includes multivectors for
+ * - r
+ * - r_hat
+ * - p
+ * - p_hat
+ * - v
+ * - s
+ * - s_hat
+ * - t
+ * - x
+ * Note: small arrays for
+ * - rho_old
+ * - rho_new
+ * - omega
+ * - alpha
+ * - temp
+ * - rhs_norms
+ * - res_norms
+ * are currently not accounted for as they are in static shared memory.
+ */
+template <typename ValueType>
+inline int local_memory_requirement(const int num_rows, const int num_rhs)
+{
+    return (9 * num_rows * num_rhs) * sizeof(ValueType);
+    //+ 2 * num_rhs * sizeof(typename gko::remove_complex<ValueType>);
+}
+
+
+struct StorageConfig {
+    // preconditioner storage
+    bool prec_shared;
+
+    // total number of shared vectors
+    int n_shared;
+
+    // number of vectors in global memory
+    int n_global;
+
+    // global stride from one batch entry to the next
+    int gmem_stride_bytes;
+
+    // padded vector length
+    int padded_vec_len;
+};
+
+
+namespace {
+
+template <int align_bytes>
+void set_gmem_stride_bytes(StorageConfig& sconf,
+                           const int multi_vector_size_bytes,
+                           const int prec_storage_bytes)
+{
+    int gmem_stride = sconf.n_global * multi_vector_size_bytes;
+    if (!sconf.prec_shared) {
+        gmem_stride += prec_storage_bytes;
+    }
+    // align global memory chunks
+    sconf.gmem_stride_bytes =
+        gmem_stride > 0 ? ((gmem_stride - 1) / align_bytes + 1) * align_bytes
+                        : 0;
+}
+
+}  // namespace
+
+
+/**
+ * Calculates the amount of in-solver storage needed by batch-Bicgstab and
+ * the split between shared and global memory.
+ *
+ * The calculation includes multivectors for
+ * - r
+ * - r_hat
+ * - p
+ * - p_hat
+ * - v
+ * - s
+ * - s_hat
+ * - t
+ * - x
+ * In addition, small arrays are needed for
+ * - rho_old
+ * - rho_new
+ * - omega
+ * - alpha
+ * - temp
+ * - rhs_norms
+ * - res_norms
+ *
+ * @param shared_mem_per_blk  The amount of shared memory per block to use for
+ *   keeping intermediate vectors. In case keeping the matrix in L1 cache etc.
+ *   should be prioritized, the cache configuration must be updated separately
+ *   and the needed space should be subtracted before passing to this
+ *   function.
+ * @param num_rows  Size of the matrix.
+ * @param num_nz  Number of nonzeros in the matrix
+ * @param num_rhs  Number of right-hand-sides in the vectors.
+ * @return  A struct containing allocation information specific to Bicgstab.
+ */
+template <typename Prectype, typename ValueType, int align_bytes = 32>
+StorageConfig compute_shared_storage(const int shared_mem_per_blk,
+                                     const int num_rows, const int num_nz,
+                                     const int num_rhs)
+{
+    using real_type = remove_complex<ValueType>;
+    const int vec_size = num_rows * num_rhs * sizeof(ValueType);
+    const int num_priority_vecs = 4;
+    const int prec_storage =
+        Prectype::dynamic_work_size(num_rows, num_nz) * sizeof(ValueType);
+    int rem_shared = shared_mem_per_blk;
+    StorageConfig sconf{false, 0, 9, 0, num_rows};
+    if (rem_shared <= 0) {
+        set_gmem_stride_bytes<align_bytes>(sconf, vec_size, prec_storage);
+        return sconf;
+    }
+    const int initial_vecs_available = rem_shared / vec_size;
+    const int priority_available = initial_vecs_available >= num_priority_vecs
+                                       ? num_priority_vecs
+                                       : initial_vecs_available;
+    sconf.n_shared += priority_available;
+    sconf.n_global -= priority_available;
+    // for simplicity, we don't allocate anything else in shared
+    //  if all the spmv vectors were not.
+    if (priority_available < num_priority_vecs) {
+        set_gmem_stride_bytes<align_bytes>(sconf, vec_size, prec_storage);
+        return sconf;
+    }
+    rem_shared -= priority_available * vec_size;
+    if (rem_shared >= prec_storage) {
+        sconf.prec_shared = true;
+        rem_shared -= prec_storage;
+    }
+    const int shared_other_vecs =
+        rem_shared / vec_size >= 0 ? rem_shared / vec_size : 0;
+    sconf.n_shared += shared_other_vecs;
+    sconf.n_shared = min(sconf.n_shared, 9);
+    sconf.n_global -= shared_other_vecs;
+    sconf.n_global = max(sconf.n_global, 0);
+    set_gmem_stride_bytes<align_bytes>(sconf, vec_size, prec_storage);
+    return sconf;
+}
+
+}  // namespace batch_bicgstab
+
+
+#define GKO_DECLARE_BATCH_BICGSTAB_APPLY_KERNEL(_type)                       \
+    void apply(                                                              \
+        std::shared_ptr<const DefaultExecutor> exec,                         \
+        const gko::kernels::batch_bicgstab::BatchBicgstabOptions<            \
+            remove_complex<_type>>& options,                                 \
+        const batch::BatchLinOp* a, const batch::BatchLinOp* preconditioner, \
+        const batch::MultiVector<_type>* b, batch::MultiVector<_type>* x,    \
+        gko::batch::log::BatchLogData<double>& logdata)
+
+
+#define GKO_DECLARE_ALL_AS_TEMPLATES \
+    template <typename ValueType>    \
+    GKO_DECLARE_BATCH_BICGSTAB_APPLY_KERNEL(ValueType)
+
+
+GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(batch_bicgstab,
+                                        GKO_DECLARE_ALL_AS_TEMPLATES);
+
+
+#undef GKO_DECLARE_ALL_AS_TEMPLATES
+
+
+}  // namespace kernels
+}  // namespace gko
+
+
+#endif  // GKO_CORE_SOLVER_BATCH_BICGSTAB_KERNELS_HPP_
diff --git a/core/solver/batch_dispatch.hpp b/core/solver/batch_dispatch.hpp
new file mode 100644
index 00000000000..f80f8c9a813
--- /dev/null
+++ b/core/solver/batch_dispatch.hpp
@@ -0,0 +1,294 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CORE_SOLVER_BATCH_DISPATCH_HPP_
+#define GKO_CORE_SOLVER_BATCH_DISPATCH_HPP_
+
+
+#include <ginkgo/core/base/batch_lin_op.hpp>
+#include <ginkgo/core/log/batch_logger.hpp>
+#include <ginkgo/core/matrix/batch_identity.hpp>
+#include <ginkgo/core/stop/batch_stop_enum.hpp>
+
+
+#include "core/base/batch_struct.hpp"
+#include "core/matrix/batch_struct.hpp"
+
+
+#if defined GKO_COMPILING_CUDA
+
+#include "cuda/base/batch_struct.hpp"
+#include "cuda/components/cooperative_groups.cuh"
+#include "cuda/log/batch_logger.cuh"
+#include "cuda/matrix/batch_struct.hpp"
+#include "cuda/preconditioner/batch_preconditioners.cuh"
+#include "cuda/stop/batch_criteria.cuh"
+
+namespace gko {
+namespace batch {
+namespace solver {
+
+namespace device = gko::kernels::cuda;
+
+template <typename ValueType>
+using DeviceValueType = typename gko::kernels::cuda::cuda_type<ValueType>;
+
+}  // namespace solver
+}  // namespace batch
+}  // namespace gko
+
+#elif defined GKO_COMPILING_HIP
+
+#include "hip/base/batch_struct.hip.hpp"
+#include "hip/components/cooperative_groups.hip.hpp"
+#include "hip/log/batch_logger.hip.hpp"
+#include "hip/matrix/batch_struct.hip.hpp"
+#include "hip/preconditioner/batch_preconditioners.hip.hpp"
+#include "hip/stop/batch_criteria.hip.hpp"
+
+namespace gko {
+namespace batch {
+namespace solver {
+
+namespace device = gko::kernels::hip;
+
+template <typename ValueType>
+using DeviceValueType = gko::kernels::hip::hip_type<ValueType>;
+
+}  // namespace solver
+}  // namespace batch
+}  // namespace gko
+
+#elif defined GKO_COMPILING_DPCPP
+
+
+#include "dpcpp/base/batch_struct.hpp"
+#include "dpcpp/log/batch_logger.hpp"
+#include "dpcpp/matrix/batch_struct.hpp"
+#include "dpcpp/preconditioner/batch_preconditioners.hpp"
+#include "dpcpp/stop/batch_criteria.hpp"
+
+
+namespace gko {
+namespace batch {
+namespace solver {
+
+namespace device = gko::kernels::dpcpp;
+
+template <typename ValueType>
+using DeviceValueType = ValueType;
+
+}  // namespace solver
+}  // namespace batch
+}  // namespace gko
+
+#else
+
+#include "reference/base/batch_struct.hpp"
+#include "reference/log/batch_logger.hpp"
+#include "reference/matrix/batch_struct.hpp"
+#include "reference/preconditioner/batch_identity.hpp"
+#include "reference/stop/batch_criteria.hpp"
+
+namespace gko {
+namespace batch {
+namespace solver {
+
+namespace device = gko::kernels::host;
+
+template <typename ValueType>
+using DeviceValueType = ValueType;
+
+}  // namespace solver
+}  // namespace batch
+}  // namespace gko
+
+#endif
+
+namespace gko {
+namespace batch {
+namespace solver {
+
+template <typename DValueType>
+class DummyKernelCaller {
+public:
+    template <typename BatchMatrixType, typename PrecType, typename StopType,
+              typename LogType>
+    void call_kernel(LogType logger, const BatchMatrixType& a,
+                     const multi_vector::uniform_batch<DValueType>& b,
+                     const multi_vector::uniform_batch<DValueType>& x) const
+    {}
+};
+
+
+/**
+ * Handles dispatching to the correct instantiation of a batched solver
+ * depending on runtime parameters.
+ *
+ * @tparam KernelCaller  Class with an interface like DummyKernelCaller,
+ *   that is reponsible for finally calling the templated backend-specific
+ *   kernel.
+ * @tparam OptsType  Structure type of options for the particular solver to be
+ *   used.
+ * @tparam ValueType  The user-facing value type.
+ * @tparam DevValueType  The backend-specific value type corresponding to
+ *   ValueType.
+ */
+template <typename KernelCaller, typename OptsType, typename ValueType>
+class BatchSolverDispatch {
+public:
+    using value_type = ValueType;
+    using device_value_type = DeviceValueType<ValueType>;
+    using res_norm_type = double;
+
+    BatchSolverDispatch(const KernelCaller& kernel_caller, const OptsType& opts,
+                        const BatchLinOp* const matrix,
+                        const BatchLinOp* const preconditioner,
+                        const log::BatchLogType logger_type =
+                            log::BatchLogType::simple_convergence_completion)
+        : caller_{kernel_caller},
+          opts_{opts},
+          a_{matrix},
+          precon_{preconditioner},
+          logger_type_{logger_type}
+    {}
+
+    template <typename PrecType, typename BatchMatrixType, typename LogType>
+    void dispatch_on_stop(
+        const LogType& logger, const BatchMatrixType& amat, PrecType prec,
+        const multi_vector::uniform_batch<const device_value_type>& b_b,
+        const multi_vector::uniform_batch<device_value_type>& x_b)
+    {
+        if (opts_.tol_type == stop::ToleranceType::absolute) {
+            caller_.template call_kernel<
+                BatchMatrixType, PrecType,
+                device::stop::SimpleAbsResidual<device_value_type>, LogType>(
+                logger, amat, prec, b_b, x_b);
+        } else if (opts_.tol_type == stop::ToleranceType::relative) {
+            caller_.template call_kernel<
+                BatchMatrixType, PrecType,
+                device::stop::SimpleRelResidual<device_value_type>, LogType>(
+                logger, amat, prec, b_b, x_b);
+        } else {
+            GKO_NOT_IMPLEMENTED;
+        }
+    }
+
+    template <typename BatchMatrixType, typename LogType>
+    void dispatch_on_preconditioner(
+        const LogType& logger, const BatchMatrixType& amat,
+        const multi_vector::uniform_batch<const device_value_type>& b_b,
+        const multi_vector::uniform_batch<device_value_type>& x_b)
+    {
+        if (!precon_ ||
+            dynamic_cast<const matrix::BatchIdentity<value_type>*>(precon_)) {
+            dispatch_on_stop<device::BatchIdentity<device_value_type>>(
+                logger, amat, device::BatchIdentity<device_value_type>(), b_b,
+                x_b);
+        } else {
+            GKO_NOT_IMPLEMENTED;
+        }
+    }
+
+    template <typename BatchMatrixType>
+    void dispatch_on_logger(
+        const BatchMatrixType& amat,
+        const multi_vector::uniform_batch<const device_value_type>& b_b,
+        const multi_vector::uniform_batch<device_value_type>& x_b,
+        log::BatchLogData<res_norm_type>& logdata)
+    {
+        if (logger_type_ == log::BatchLogType::simple_convergence_completion) {
+            device::batch_log::SimpleFinalLogger<res_norm_type> logger(
+                logdata.res_norms->get_values(),
+                logdata.iter_counts.get_data());
+            dispatch_on_preconditioner(logger, amat, b_b, x_b);
+        } else {
+            GKO_NOT_IMPLEMENTED;
+        }
+    }
+
+    /**
+     * Solves a linear system from the given data and kernel caller.
+     *
+     * Note: The correct backend-specific get_batch_struct function needs to be
+     * available in the current scope.
+     */
+    void apply(const MultiVector<ValueType>* const b,
+               MultiVector<ValueType>* const x,
+               log::BatchLogData<res_norm_type>& logdata)
+    {
+        const auto x_b = device::get_batch_struct(x);
+        const auto b_b = device::get_batch_struct(b);
+
+        if (auto amat =
+                dynamic_cast<const batch::matrix::Ell<ValueType, int32>*>(a_)) {
+            auto m_b = device::get_batch_struct(amat);
+            dispatch_on_logger(m_b, b_b, x_b, logdata);
+        } else if (auto amat =
+                       dynamic_cast<const batch::matrix::Dense<ValueType>*>(
+                           a_)) {
+            auto m_b = device::get_batch_struct(amat);
+            dispatch_on_logger(m_b, b_b, x_b, logdata);
+        } else {
+            GKO_NOT_SUPPORTED(a_);
+        }
+    }
+
+private:
+    const KernelCaller caller_;
+    const OptsType opts_;
+    const BatchLinOp* a_;
+    const BatchLinOp* precon_;
+    const log::BatchLogType logger_type_;
+};
+
+
+/**
+ * Conventient function to create a dispatcher. Infers most template arguments.
+ */
+template <typename ValueType, typename KernelCaller, typename OptsType>
+BatchSolverDispatch<KernelCaller, OptsType, ValueType> create_dispatcher(
+    const KernelCaller& kernel_caller, const OptsType& opts,
+    const BatchLinOp* const a, const BatchLinOp* const preconditioner,
+    const log::BatchLogType logger_type =
+        log::BatchLogType::simple_convergence_completion)
+{
+    return BatchSolverDispatch<KernelCaller, OptsType, ValueType>(
+        kernel_caller, opts, a, preconditioner, logger_type);
+}
+
+
+}  // namespace solver
+}  // namespace batch
+}  // namespace gko
+
+#endif  // GKO_CORE_SOLVER_BATCH_DISPATCH_HPP_
diff --git a/core/solver/batch_solver_base.hpp b/core/solver/batch_solver_base.hpp
new file mode 100644
index 00000000000..ba226387788
--- /dev/null
+++ b/core/solver/batch_solver_base.hpp
@@ -0,0 +1,131 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CORE_SOLVER_BATCH_SOLVER_HPP_
+#define GKO_CORE_SOLVER_BATCH_SOLVER_HPP_
+
+
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/log/batch_convergence.hpp>
+#include <ginkgo/core/matrix/batch_identity.hpp>
+#include <ginkgo/core/solver/batch_solver_base.hpp>
+
+
+#include "core/log/batch_logger.hpp"
+
+
+namespace gko {
+namespace batch {
+namespace solver {
+
+
+struct BatchInfo {
+    std::unique_ptr<log::BatchLogDataBase> logdata;
+};
+
+
+template <typename ConcreteSolver, typename ValueType, typename PolymorphicBase>
+EnableBatchSolver<ConcreteSolver, ValueType, PolymorphicBase>::
+    EnableBatchSolver(std::shared_ptr<const Executor> exec,
+                      std::shared_ptr<const BatchLinOp> system_matrix,
+                      detail::common_batch_params common_params)
+    : BatchSolver(system_matrix, nullptr, common_params.residual_tolerance,
+                  common_params.max_iterations),
+      EnableBatchLinOp<ConcreteSolver, PolymorphicBase>(
+          exec, gko::transpose(system_matrix->get_size()))
+{
+    GKO_ASSERT_BATCH_HAS_SQUARE_DIMENSIONS(system_matrix_);
+
+    using value_type = typename ConcreteSolver::value_type;
+    using Identity = matrix::BatchIdentity<value_type>;
+    using real_type = remove_complex<value_type>;
+
+    if (common_params.generated_prec) {
+        GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(common_params.generated_prec, this);
+        preconditioner_ = std::move(common_params.generated_prec);
+    } else if (common_params.prec_factory) {
+        preconditioner_ = common_params.prec_factory->generate(system_matrix_);
+    } else {
+        auto id = Identity::create(exec, system_matrix->get_size());
+        preconditioner_ = std::move(id);
+    }
+}
+
+
+template <typename ConcreteSolver, typename ValueType, typename PolymorphicBase>
+void EnableBatchSolver<ConcreteSolver, ValueType, PolymorphicBase>::apply_impl(
+    const MultiVector<ValueType>* b, MultiVector<ValueType>* x) const
+{
+    using value_type = ValueType;
+    using Vector = MultiVector<value_type>;
+    using res_log_type = double;
+
+    auto exec = this->get_executor();
+
+    const size_type num_rhs = b->get_common_size()[0];
+    const size_type num_batch_items = b->get_num_batch_items();
+    batch_dim<2> batch_size(num_batch_items, dim<2>{1, num_rhs});
+
+    BatchInfo info;
+    info.logdata =
+        std::move(std::make_unique<log::BatchLogData<res_log_type>>());
+    auto concrete_logdata =
+        static_cast<log::BatchLogData<res_log_type>*>(info.logdata.get());
+    concrete_logdata->res_norms =
+        MultiVector<res_log_type>::create(this->get_executor(), batch_size);
+    concrete_logdata->iter_counts.set_executor(this->get_executor());
+    concrete_logdata->iter_counts.resize_and_reset(num_rhs * num_batch_items);
+
+    this->solver_apply(b, x, &info);
+
+    this->template log<gko::log::Logger::batch_solver_completed>(
+        concrete_logdata->iter_counts, concrete_logdata->res_norms.get());
+}
+
+
+template <typename ConcreteSolver, typename ValueType, typename PolymorphicBase>
+void EnableBatchSolver<ConcreteSolver, ValueType, PolymorphicBase>::apply_impl(
+    const MultiVector<ValueType>* alpha, const MultiVector<ValueType>* b,
+    const MultiVector<ValueType>* beta, MultiVector<ValueType>* x) const
+{
+    auto x_clone = x->clone();
+    this->apply(b, x_clone.get());
+    x->scale(beta);
+    x->add_scaled(alpha, x_clone.get());
+}
+
+
+}  // namespace solver
+}  // namespace batch
+}  // namespace gko
+
+#endif
diff --git a/include/ginkgo/core/base/exception_helpers.hpp b/include/ginkgo/core/base/exception_helpers.hpp
index cb5a8b10263..f847334236f 100644
--- a/include/ginkgo/core/base/exception_helpers.hpp
+++ b/include/ginkgo/core/base/exception_helpers.hpp
@@ -457,6 +457,27 @@ inline size_type get_num_batch_items(const T& obj)
     }
 
 
+/**
+ * Asserts that `_op1` and `_op2` have the same number of rows and columns.
+ *
+ * @throw DimensionMismatch  if `_op1` and `_op2` differ in the number of
+ *                           rows or columns
+ */
+#define GKO_ASSERT_BATCH_HAS_SQUARE_DIMENSIONS(_op1)                      \
+    {                                                                     \
+        auto is_square =                                                  \
+            ::gko::detail::get_batch_size(_op1).get_common_size()[0] ==   \
+            ::gko::detail::get_batch_size(_op1).get_common_size()[1];     \
+        if (!is_square) {                                                 \
+            throw ::gko::BadDimension(                                    \
+                __FILE__, __LINE__, __func__, #_op1,                      \
+                ::gko::detail::get_batch_size(_op1).get_common_size()[0], \
+                ::gko::detail::get_batch_size(_op1).get_common_size()[1], \
+                "expected common size of matrices to be square");         \
+        }                                                                 \
+    }
+
+
 /**
  * Instantiates a MpiError.
  *
diff --git a/include/ginkgo/core/solver/batch_bicgstab.hpp b/include/ginkgo/core/solver/batch_bicgstab.hpp
new file mode 100644
index 00000000000..ad308985dbd
--- /dev/null
+++ b/include/ginkgo/core/solver/batch_bicgstab.hpp
@@ -0,0 +1,145 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_PUBLIC_CORE_SOLVER_BATCH_BICGSTAB_HPP_
+#define GKO_PUBLIC_CORE_SOLVER_BATCH_BICGSTAB_HPP_
+
+
+#include <vector>
+
+
+#include <ginkgo/core/base/batch_lin_op.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/lin_op.hpp>
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/solver/batch_solver_base.hpp>
+#include <ginkgo/core/stop/batch_stop_enum.hpp>
+
+
+namespace gko {
+namespace batch {
+namespace solver {
+
+
+/**
+ * BiCGSTAB or the Bi-Conjugate Gradient-Stabilized is a Krylov subspace solver.
+ * Being a generic solver, it is capable of solving general matrices, including
+ * non-s.p.d matrices.
+ *
+ * This solver solves a batch of linear systems using Bicgstab algorithm.
+ *
+ * Unless otherwise specified via the `preconditioner` factory parameter, this
+ * implementation does not use any preconditioner by default. The type of
+ * tolerance( absolute or relative ) and the maximum number of iterations to be
+ * used in the stopping criterion can be set via the factory parameters.
+ *
+ * @tparam ValueType  precision of matrix elements
+ *
+ * @ingroup solvers
+ * @ingroup BatchLinOp
+ */
+template <typename ValueType = default_precision>
+class BatchBicgstab final
+    : public EnableBatchSolver<BatchBicgstab<ValueType>, ValueType> {
+    friend class EnableBatchLinOp<BatchBicgstab>;
+    friend class EnablePolymorphicObject<BatchBicgstab, BatchLinOp>;
+
+public:
+    using value_type = ValueType;
+    using real_type = gko::remove_complex<ValueType>;
+
+    GKO_CREATE_FACTORY_PARAMETERS(parameters, Factory)
+    {
+        /**
+         * Preconditioner factory.
+         */
+        std::shared_ptr<const BatchLinOpFactory> GKO_FACTORY_PARAMETER_SCALAR(
+            preconditioner, nullptr);
+
+        /**
+         * Already generated preconditioner. If one is provided, the factory
+         * `preconditioner` will be ignored.
+         */
+        std::shared_ptr<const BatchLinOp> GKO_FACTORY_PARAMETER_SCALAR(
+            generated_preconditioner, nullptr);
+
+        /**
+         * Default maximum number iterations allowed.
+         *
+         * Generated solvers are initialized with this value for their maximum
+         * iterations.
+         */
+        int GKO_FACTORY_PARAMETER_SCALAR(default_max_iterations, 100);
+
+        /**
+         * Default residual tolerance.
+         *
+         * Generated solvers are initialized with this value for their residual
+         * tolerance.
+         */
+        real_type GKO_FACTORY_PARAMETER_SCALAR(default_residual_tol, 1e-11);
+
+        /**
+         * To specify which tolerance is to be considered.
+         */
+        ::gko::batch::stop::ToleranceType GKO_FACTORY_PARAMETER_SCALAR(
+            tolerance_type, ::gko::batch::stop::ToleranceType::absolute);
+    };
+    GKO_ENABLE_BATCH_LIN_OP_FACTORY(BatchBicgstab, parameters, Factory);
+    GKO_ENABLE_BUILD_METHOD(Factory);
+
+private:
+    explicit BatchBicgstab(std::shared_ptr<const Executor> exec)
+        : EnableBatchSolver<BatchBicgstab>(std::move(exec))
+    {}
+
+    explicit BatchBicgstab(const Factory* factory,
+                           std::shared_ptr<const BatchLinOp> system_matrix)
+        : EnableBatchSolver<BatchBicgstab>(
+              factory->get_executor(), std::move(system_matrix),
+              detail::extract_common_batch_params(factory->get_parameters())),
+          parameters_{factory->get_parameters()}
+    {}
+
+    void solver_apply(const MultiVector<ValueType>* b,
+                      MultiVector<ValueType>* x,
+                      BatchInfo* const info) const override;
+};
+
+
+}  // namespace solver
+}  // namespace batch
+}  // namespace gko
+
+
+#endif  // GKO_PUBLIC_CORE_SOLVER_BATCH_BICGSTAB_HPP_
diff --git a/include/ginkgo/core/solver/batch_solver_base.hpp b/include/ginkgo/core/solver/batch_solver_base.hpp
new file mode 100644
index 00000000000..ee690b66b95
--- /dev/null
+++ b/include/ginkgo/core/solver/batch_solver_base.hpp
@@ -0,0 +1,276 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_PUBLIC_CORE_SOLVER_BATCH_SOLVER_HPP_
+#define GKO_PUBLIC_CORE_SOLVER_BATCH_SOLVER_HPP_
+
+
+#include <ginkgo/core/base/batch_lin_op.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/log/batch_logger.hpp>
+#include <ginkgo/core/matrix/batch_identity.hpp>
+
+
+namespace gko {
+namespace batch {
+namespace solver {
+
+
+class BatchSolver {
+public:
+    /**
+     * Returns the system operator (matrix) of the linear system.
+     *
+     * @return the system operator (matrix)
+     */
+    std::shared_ptr<const BatchLinOp> get_system_matrix() const
+    {
+        return system_matrix_;
+    }
+
+    /**
+     * Returns the generated preconditioner.
+     *
+     * @return the generated preconditioner.
+     */
+    std::shared_ptr<const BatchLinOp> get_preconditioner() const
+    {
+        return preconditioner_;
+    }
+
+    /**
+     * Get the residual tolerance used by the solver.
+     *
+     * @return The residual tolerance.
+     */
+    double get_residual_tolerance() const { return residual_tol_; }
+
+    /**
+     * Update the residual tolerance to be used by the solver.
+     *
+     * @param res_tol  The residual tolerance to be used for subsequent
+     *                 invocations of the solver.
+     */
+    void set_residual_tolerance(double res_tol) { residual_tol_ = res_tol; }
+
+    /**
+     * Get the maximum number of iterations set on the solver.
+     *
+     * @return  Maximum number of iterations.
+     */
+    int get_max_iterations() const { return max_iterations_; }
+
+    /**
+     * Set the maximum number of iterations for the solver to use,
+     * independent of the factory that created it.
+     *
+     * @param max_iterations  The maximum number of iterations for the solver.
+     */
+    void set_max_iterations(int max_iterations)
+    {
+        max_iterations_ = max_iterations;
+    }
+
+protected:
+    BatchSolver() {}
+
+    BatchSolver(std::shared_ptr<const BatchLinOp> system_matrix,
+                std::shared_ptr<const BatchLinOp> gen_preconditioner,
+                const double res_tol, const int max_iterations)
+        : system_matrix_{std::move(system_matrix)},
+          preconditioner_{std::move(gen_preconditioner)},
+          residual_tol_{res_tol},
+          max_iterations_{max_iterations}
+    {}
+
+    std::shared_ptr<const BatchLinOp> system_matrix_{};
+    std::shared_ptr<const BatchLinOp> preconditioner_{};
+    double residual_tol_{};
+    int max_iterations_{};
+};
+
+
+namespace detail {
+
+
+struct common_batch_params {
+    std::shared_ptr<const BatchLinOpFactory> prec_factory;
+    std::shared_ptr<const BatchLinOp> generated_prec;
+    double residual_tolerance;
+    int max_iterations;
+};
+
+
+template <typename ParamsType>
+common_batch_params extract_common_batch_params(ParamsType& params)
+{
+    return {params.preconditioner, params.generated_preconditioner,
+            params.default_residual_tol, params.default_max_iterations};
+}
+
+
+}  // namespace detail
+
+
+struct BatchInfo {
+    std::unique_ptr<log::BatchLogDataBase> logdata;
+};
+
+
+/**
+ * @tparam PolymorphicBase  The base class; must be a subclass of BatchLinOp.
+ */
+template <typename ConcreteSolver,
+          typename ValueType = typename ConcreteSolver::value_type,
+          typename PolymorphicBase = BatchLinOp>
+class EnableBatchSolver
+    : public BatchSolver,
+      public EnableBatchLinOp<ConcreteSolver, PolymorphicBase> {
+protected:
+    explicit EnableBatchSolver(std::shared_ptr<const Executor> exec)
+        : EnableBatchLinOp<ConcreteSolver, PolymorphicBase>(std::move(exec))
+    {}
+
+    explicit EnableBatchSolver(std::shared_ptr<const Executor> exec,
+                               std::shared_ptr<const BatchLinOp> system_matrix,
+                               detail::common_batch_params common_params)
+        : BatchSolver(system_matrix, nullptr, common_params.residual_tolerance,
+                      common_params.max_iterations),
+          EnableBatchLinOp<ConcreteSolver, PolymorphicBase>(
+              exec, gko::transpose(system_matrix->get_size()))
+    {
+        GKO_ASSERT_BATCH_HAS_SQUARE_DIMENSIONS(system_matrix_);
+
+        using value_type = typename ConcreteSolver::value_type;
+        using Identity = matrix::BatchIdentity<value_type>;
+        using real_type = remove_complex<value_type>;
+
+        if (common_params.generated_prec) {
+            GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(common_params.generated_prec,
+                                              this);
+            preconditioner_ = std::move(common_params.generated_prec);
+        } else if (common_params.prec_factory) {
+            preconditioner_ =
+                common_params.prec_factory->generate(system_matrix_);
+        } else {
+            auto id = Identity::create(exec, system_matrix->get_size());
+            preconditioner_ = std::move(id);
+        }
+    }
+
+    ConcreteSolver* apply(ptr_param<const MultiVector<ValueType>> b,
+                          ptr_param<MultiVector<ValueType>> x) const
+    {
+        this->validate_application_parameters(b.get(), x.get());
+        auto exec = this->get_executor();
+        this->apply_impl(make_temporary_clone(exec, b).get(),
+                         make_temporary_clone(exec, x).get());
+        return this;
+    }
+
+    ConcreteSolver* apply_impl(ptr_param<const MultiVector<ValueType>>* alpha,
+                               ptr_param<const MultiVector<ValueType>>* b,
+                               ptr_param<const MultiVector<ValueType>>* beta,
+                               ptr_param<MultiVector<ValueType>>* x) const
+    {
+        this->validate_application_parameters(alpha.get(), b.get(), beta.get(),
+                                              x.get());
+        auto exec = this->get_executor();
+        this->apply_impl(make_temporary_clone(exec, alpha).get(),
+                         make_temporary_clone(exec, b).get(),
+                         make_temporary_clone(exec, beta).get(),
+                         make_temporary_clone(exec, x).get());
+        return this;
+    }
+
+    void apply_impl(const MultiVector<ValueType>* b,
+                    MultiVector<ValueType>* x) const
+    {
+        using value_type = ValueType;
+        using Vector = MultiVector<value_type>;
+        using res_log_type = double;
+
+        auto exec = this->get_executor();
+
+        const size_type num_rhs = b->get_common_size()[0];
+        const size_type num_batch_items = b->get_num_batch_items();
+        batch_dim<2> batch_size(num_batch_items, dim<2>{1, num_rhs});
+
+        BatchInfo info;
+        info.logdata =
+            std::move(std::make_unique<log::BatchLogData<res_log_type>>());
+        auto concrete_logdata =
+            static_cast<log::BatchLogData<res_log_type>*>(info.logdata.get());
+        concrete_logdata->res_norms =
+            MultiVector<res_log_type>::create(this->get_executor(), batch_size);
+        concrete_logdata->iter_counts.set_executor(this->get_executor());
+        concrete_logdata->iter_counts.resize_and_reset(num_rhs *
+                                                       num_batch_items);
+
+        this->solver_apply(b, x, &info);
+
+        this->template log<gko::log::Logger::batch_solver_completed>(
+            concrete_logdata->iter_counts, concrete_logdata->res_norms.get());
+    }
+
+    void apply_impl(const MultiVector<ValueType>* alpha,
+                    const MultiVector<ValueType>* b,
+                    const MultiVector<ValueType>* beta,
+                    MultiVector<ValueType>* x) const
+    {
+        auto x_clone = x->clone();
+        this->apply(b, x_clone.get());
+        x->scale(beta);
+        x->add_scaled(alpha, x_clone.get());
+    }
+
+    /**
+     * Calls the concrete solver on the given system (not necessarily on
+     * system_matrix_).
+     *
+     * @param mtx  Left-hand side matrix for the linear solve.
+     * @param b  Right-hand side vector.
+     * @param x  Solution vector and initial guess.
+     * @param info  Batch logging information.
+     */
+    virtual void solver_apply(const MultiVector<ValueType>* b,
+                              MultiVector<ValueType>* x,
+                              BatchInfo* const info) const = 0;
+};
+
+
+}  // namespace solver
+}  // namespace batch
+}  // namespace gko
+
+
+#endif  // GKO_PUBLIC_CORE_SOLVER_BATCH_SOLVER_HPP_
diff --git a/include/ginkgo/core/stop/batch_stop_enum.hpp b/include/ginkgo/core/stop/batch_stop_enum.hpp
new file mode 100644
index 00000000000..d960e384d24
--- /dev/null
+++ b/include/ginkgo/core/stop/batch_stop_enum.hpp
@@ -0,0 +1,49 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_PUBLIC_CORE_STOP_BATCH_STOP_ENUM_HPP_
+#define GKO_PUBLIC_CORE_STOP_BATCH_STOP_ENUM_HPP_
+
+
+namespace gko {
+namespace batch {
+namespace stop {
+
+
+enum class ToleranceType { absolute, relative };
+
+
+}  // namespace stop
+}  // namespace batch
+}  // namespace gko
+
+#endif  // GKO_PUBLIC_CORE_STOP_BATCH_STOP_ENUM_HPP_

From 464f9073a032b703f053fe8f14fd15b3421e811f Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sat, 21 Oct 2023 20:36:54 +0200
Subject: [PATCH 462/583] Add a batch logger class and core

Co-authored-by: Aditya Kashi <kashia@ornl.gov>
---
 common/cuda_hip/log/batch_logger.hpp.inc |  79 ++++++++++
 core/log/batch_logger.cpp                |  62 ++++++++
 cuda/log/batch_logger.cuh                |  52 +++++++
 dpcpp/log/batch_logger.hpp               | 100 +++++++++++++
 hip/log/batch_logger.hip.hpp             |  51 +++++++
 include/ginkgo/core/log/batch_logger.hpp | 182 +++++++++++++++++++++++
 include/ginkgo/core/log/logger.hpp       |  24 ++-
 reference/log/batch_logger.hpp           |  93 ++++++++++++
 8 files changed, 635 insertions(+), 8 deletions(-)
 create mode 100644 common/cuda_hip/log/batch_logger.hpp.inc
 create mode 100644 core/log/batch_logger.cpp
 create mode 100644 cuda/log/batch_logger.cuh
 create mode 100644 dpcpp/log/batch_logger.hpp
 create mode 100644 hip/log/batch_logger.hip.hpp
 create mode 100644 include/ginkgo/core/log/batch_logger.hpp
 create mode 100644 reference/log/batch_logger.hpp

diff --git a/common/cuda_hip/log/batch_logger.hpp.inc b/common/cuda_hip/log/batch_logger.hpp.inc
new file mode 100644
index 00000000000..70d7a9ab6f6
--- /dev/null
+++ b/common/cuda_hip/log/batch_logger.hpp.inc
@@ -0,0 +1,79 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+
+namespace batch_log {
+
+
+/**
+ * Logs the final residual and iteration count for a batch solver.
+ *
+ * Specialized for a single RHS.
+ */
+template <typename RealType>
+class SimpleFinalLogger final {
+public:
+    using real_type = RealType;
+
+    /**
+     * Sets pre-allocated storage for logging.
+     *
+     * @param batch_residuals  Array of residuals norms of size
+     *                         num_batches x num_rhs. Used as row major.
+     * @param batch_iters  Array of final iteration counts for each
+     *                     linear system and each RHS in the batch.
+     */
+    SimpleFinalLogger(real_type* const batch_residuals, int* const batch_iters)
+        : final_residuals_{batch_residuals}, final_iters_{batch_iters}
+    {}
+
+    /**
+     * Logs the iteration count and residual norm.
+     *
+     * @param batch_idx  The index of linear system in the batch to log.
+     * @param iter  The current iteration count (0-based).
+     * @param res_norm  Norm of current residual
+     */
+    __device__ __forceinline__ void log_iteration(const size_type batch_idx,
+                                                  const int iter,
+                                                  const real_type res_norm)
+    {
+        final_iters_[batch_idx] = iter;
+        final_residuals_[batch_idx] = res_norm;
+    }
+
+private:
+    real_type* const final_residuals_;
+    int* const final_iters_;
+};
+
+}  // namespace batch_log
diff --git a/core/log/batch_logger.cpp b/core/log/batch_logger.cpp
new file mode 100644
index 00000000000..58b083e03c8
--- /dev/null
+++ b/core/log/batch_logger.cpp
@@ -0,0 +1,62 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/log/batch_logger.hpp>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/math.hpp>
+
+
+namespace gko {
+namespace batch {
+namespace log {
+
+
+template <typename ValueType>
+void BatchConvergence<ValueType>::on_batch_solver_completed(
+    const array<int>& iteration_count,
+    const MultiVector<remove_complex<ValueType>>* const residual_norm) const
+{
+    this->iteration_count_ = iteration_count;
+    this->residual_norm_->copy_from(residual_norm);
+}
+
+
+#define GKO_DECLARE_BATCH_CONVERGENCE(_type) class BatchConvergence<_type>
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_CONVERGENCE);
+
+
+}  // namespace log
+}  // namespace batch
+}  // namespace gko
diff --git a/cuda/log/batch_logger.cuh b/cuda/log/batch_logger.cuh
new file mode 100644
index 00000000000..09d1fe799e8
--- /dev/null
+++ b/cuda/log/batch_logger.cuh
@@ -0,0 +1,52 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CUDA_LOG_BATCH_LOGGERS_CUH_
+#define GKO_CUDA_LOG_BATCH_LOGGERS_CUH_
+
+
+#include <ginkgo/core/base/types.hpp>
+
+
+namespace gko {
+namespace kernels {
+namespace cuda {
+
+
+#include "common/cuda_hip/log/batch_logger.hpp.inc"
+
+
+}
+}  // namespace kernels
+}  // namespace gko
+
+#endif  // GKO_CUDA_LOG_BATCH_LOGGERS_CUH_
diff --git a/dpcpp/log/batch_logger.hpp b/dpcpp/log/batch_logger.hpp
new file mode 100644
index 00000000000..8ed60495dd2
--- /dev/null
+++ b/dpcpp/log/batch_logger.hpp
@@ -0,0 +1,100 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_DPCPP_LOG_BATCH_LOGGERS_HPP_
+#define GKO_DPCPP_LOG_BATCH_LOGGERS_HPP_
+
+
+#include <ginkgo/config.hpp>
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/types.hpp>
+
+
+#include "dpcpp/base/config.hpp"
+#include "dpcpp/base/dim3.dp.hpp"
+#include "dpcpp/base/dpct.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace dpcpp {
+namespace batch_log {
+
+
+/**
+ * Logs the final residual and iteration count for a batch solver.
+ *
+ * Specialized for a single RHS.
+ */
+template <typename RealType>
+class SimpleFinalLogger final {
+public:
+    using real_type = RealType;
+
+    /**
+     * Sets pre-allocated storage for logging.
+     *
+     * @param batch_residuals  Array of residuals norms of size
+     *                         num_batches x num_rhs. Used as row major.
+     * @param batch_iters  Array of final iteration counts for each
+     *                     linear system and each RHS in the batch.
+     */
+    SimpleFinalLogger(real_type* const batch_residuals, int* const batch_iters)
+        : final_residuals_{batch_residuals}, final_iters_{batch_iters}
+    {}
+
+    /**
+     * Logs the iteration count and residual norm.
+     *
+     * @param batch_idx  The index of linear system in the batch to log.
+     * @param iter  The current iteration count (0-based).
+     * @param res_norm  Norm of current residual
+     */
+    __dpct_inline__ void log_iteration(const size_type batch_idx,
+                                       const int iter, const real_type res_norm)
+    {
+        final_iters_[batch_idx] = iter;
+        final_residuals_[batch_idx] = res_norm;
+    }
+
+private:
+    real_type* const final_residuals_;
+    int* const final_iters_;
+};
+
+
+}  // namespace batch_log
+}  // namespace dpcpp
+}  // namespace kernels
+}  // namespace gko
+
+#endif  // GKO_DPCPP_LOG_BATCH_LOGGERS_HPP_
diff --git a/hip/log/batch_logger.hip.hpp b/hip/log/batch_logger.hip.hpp
new file mode 100644
index 00000000000..4962bfb3f21
--- /dev/null
+++ b/hip/log/batch_logger.hip.hpp
@@ -0,0 +1,51 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_HIP_LOG_BATCH_LOGGERS_HIP_HPP_
+#define GKO_HIP_LOG_BATCH_LOGGERS_HIP_HPP_
+
+
+#include <ginkgo/core/base/types.hpp>
+
+
+namespace gko {
+namespace kernels {
+namespace hip {
+
+#include "common/cuda_hip/log/batch_logger.hpp.inc"
+
+
+}  // namespace hip
+}  // namespace kernels
+}  // namespace gko
+
+#endif  // GKO_HIP_LOG_BATCH_LOGGERS_HIP_HPP_
diff --git a/include/ginkgo/core/log/batch_logger.hpp b/include/ginkgo/core/log/batch_logger.hpp
new file mode 100644
index 00000000000..0acc96faf34
--- /dev/null
+++ b/include/ginkgo/core/log/batch_logger.hpp
@@ -0,0 +1,182 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_PUBLIC_CORE_LOG_BATCH_CONVERGENCE_HPP_
+#define GKO_PUBLIC_CORE_LOG_BATCH_CONVERGENCE_HPP_
+
+
+#include <memory>
+
+
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/log/logger.hpp>
+
+
+namespace gko {
+namespace batch {
+/**
+ * @brief The logger namespace .
+ * @ref log
+ * @ingroup log
+ */
+namespace log {
+
+
+/**
+ * Types of batch loggers available.
+ */
+enum class BatchLogType {
+    convergence_completion,
+    simple_convergence_completion
+};
+
+
+struct BatchLogDataBase {
+    /**
+     * Stores convergence iteration counts for every matrix in the batch and
+     * for every right-hand side.
+     */
+    array<int> iter_counts;
+
+    virtual ~BatchLogDataBase() = default;
+};
+
+
+/**
+ * Stores logging data for batch solver kernels.
+ */
+template <typename ValueType>
+struct BatchLogData : public BatchLogDataBase {
+    /**
+     * Stores residual norm values for every linear system in the batch
+     * for every right-hand side.
+     */
+    std::shared_ptr<MultiVector<remove_complex<ValueType>>> res_norms;
+};
+
+
+/**
+ * Logs the final residuals and iteration counts for a batch solver.
+ *
+ * The purpose of this logger is to give simple access to standard data
+ * generated by the solver once it has converged.
+ *
+ * @ingroup log
+ */
+template <typename ValueType = default_precision>
+class BatchConvergence : public gko::log::Logger {
+public:
+    using real_type = remove_complex<ValueType>;
+    using mask_type = gko::log::Logger::mask_type;
+
+    /**
+     * Copies arrays of iterations and residual norms into this (on the host).
+     *
+     * The arguments can be on any executor and the data is copied to the host
+     * executor.
+     * TODO: Move to gko::log::Logger class and override here when a non-value
+     * type MultiVector base is available.
+     *
+     * @param iteration_count  Array (size number of matrices x number of
+     *     right-hand sides) which stores the iteration count at which each RHS
+     *     of each linear system converged. The convergence iteration count for
+     *     the different RHS are stored contiguously.
+     * @param residual_norm  A MultiVector of size
+     *     num_matrices x 1 x num_RHS, which stores the final residual norms.
+     */
+    void on_batch_solver_completed(
+        const array<int>& iteration_count,
+        const MultiVector<real_type>* const residual_norm) const;
+
+    /**
+     * Creates a convergence logger. This dynamically allocates the memory,
+     * constructs the object and returns an std::unique_ptr to this object.
+     *
+     * @param exec  the executor
+     * @param enabled_events  the events enabled for this logger. By default all
+     *                        events.
+     *
+     * @return an std::unique_ptr to the the constructed object
+     */
+    static std::unique_ptr<BatchConvergence> create(
+        std::shared_ptr<const Executor> exec,
+        const mask_type& enabled_events = gko::log::Logger::all_events_mask)
+    {
+        return std::unique_ptr<BatchConvergence>(
+            new BatchConvergence(exec, enabled_events));
+    }
+
+    /**
+     * @return  The number of iterations for entire batch
+     */
+    const array<int>& get_num_iterations() const noexcept
+    {
+        return iteration_count_;
+    }
+
+    /**
+     * @return  The residual norms for the entire batch.
+     */
+    const MultiVector<real_type>* get_residual_norm() const noexcept
+    {
+        return residual_norm_.get();
+    }
+
+protected:
+    /**
+     * Creates a batch convergence logger.
+     *
+     * @param exec  the executor
+     * @param enabled_events  the events enabled for this logger. By default all
+     *                        events.
+     */
+    explicit BatchConvergence(
+        std::shared_ptr<const Executor> exec,
+        const mask_type& enabled_events = gko::log::Logger::all_events_mask)
+        : gko::log::Logger(enabled_events),
+          iteration_count_(exec->get_master()),
+          residual_norm_(MultiVector<real_type>::create(exec->get_master()))
+    {}
+
+private:
+    mutable array<int> iteration_count_;
+    mutable std::unique_ptr<MultiVector<real_type>> residual_norm_{};
+};
+
+
+}  // namespace log
+}  // namespace batch
+}  // namespace gko
+
+
+#endif  // GKO_PUBLIC_CORE_LOG_BATCH_CONVERGENCE_HPP_
diff --git a/include/ginkgo/core/log/logger.hpp b/include/ginkgo/core/log/logger.hpp
index 47c03b3c572..ede351b7d48 100644
--- a/include/ginkgo/core/log/logger.hpp
+++ b/include/ginkgo/core/log/logger.hpp
@@ -65,6 +65,9 @@ namespace batch {
 class BatchLinOp;
 class BatchLinOpFactory;
 
+template <typename ValueType>
+class MultiVector;
+
 
 }  // namespace batch
 
@@ -459,9 +462,9 @@ public:                                                              \
      * @warning This on_iteration_complete function that this macro declares is
      * deprecated. Please use the version with the stopping information.
      */
-    [[deprecated(
-        "Please use the version with the additional stopping "
-        "information.")]] virtual void
+    [
+        [deprecated("Please use the version with the additional stopping "
+                    "information.")]] virtual void
     on_iteration_complete(const LinOp* solver, const size_type& it,
                           const LinOp* r, const LinOp* x = nullptr,
                           const LinOp* tau = nullptr) const
@@ -480,9 +483,9 @@ public:                                                              \
      * @warning This on_iteration_complete function that this macro declares is
      * deprecated. Please use the version with the stopping information.
      */
-    [[deprecated(
-        "Please use the version with the additional stopping "
-        "information.")]] virtual void
+    [
+        [deprecated("Please use the version with the additional stopping "
+                    "information.")]] virtual void
     on_iteration_complete(const LinOp* solver, const size_type& it,
                           const LinOp* r, const LinOp* x, const LinOp* tau,
                           const LinOp* implicit_tau_sq) const
@@ -598,6 +601,11 @@ public:                                                              \
                               const batch::BatchLinOp* input,
                               const batch::BatchLinOp* output)
 
+
+    GKO_LOGGER_REGISTER_EVENT(26, batch_solver_completed,
+                              const array<int>& iteration,
+                              const batch::MultiVector<double>* res_norms)
+
 #undef GKO_LOGGER_REGISTER_EVENT
 
     /**
@@ -814,8 +822,8 @@ class EnableLogging : public PolymorphicBase {
     template <size_type Event, typename ConcreteLoggableT>
     struct propagate_log_helper<
         Event, ConcreteLoggableT,
-        xstd::void_t<
-            decltype(std::declval<ConcreteLoggableT>().get_executor())>> {
+        xstd::void_t<decltype(
+            std::declval<ConcreteLoggableT>().get_executor())>> {
         template <typename... Args>
         static void propagate_log(const ConcreteLoggableT* loggable,
                                   Args&&... args)
diff --git a/reference/log/batch_logger.hpp b/reference/log/batch_logger.hpp
new file mode 100644
index 00000000000..5a9e0f78970
--- /dev/null
+++ b/reference/log/batch_logger.hpp
@@ -0,0 +1,93 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_REFERENCE_LOG_BATCH_LOGGER_HPP_
+#define GKO_REFERENCE_LOG_BATCH_LOGGER_HPP_
+
+
+#include <ginkgo/core/base/types.hpp>
+
+
+namespace gko {
+namespace kernels {
+namespace host {
+namespace batch_log {
+
+
+/**
+ * Logs the final residual and iteration count for a batch solver.
+ *
+ * Specialized for a single RHS.
+ */
+template <typename RealType>
+class SimpleFinalLogger final {
+public:
+    using real_type = RealType;
+
+    /**
+     * Sets pre-allocated storage for logging.
+     *
+     * @param batch_residuals  Array of residuals norms of size
+     *                         num_batches x num_rhs. Used as row major.
+     * @param batch_iters  Array of final iteration counts for each
+     *                     linear system and each RHS in the batch.
+     */
+    SimpleFinalLogger(real_type* const batch_residuals, int* const batch_iters)
+        : final_residuals_{batch_residuals}, final_iters_{batch_iters}
+    {}
+
+    /**
+     * Logs the iteration count and residual norm.
+     *
+     * @param batch_idx  The index of linear system in the batch to log.
+     * @param iter  The current iteration count (0-based).
+     * @param res_norm  Norm of current residual
+     */
+    void log_iteration(const size_type batch_idx, const int iter,
+                       const real_type res_norm)
+    {
+        final_iters_[batch_idx] = iter;
+        final_residuals_[batch_idx] = res_norm;
+    }
+
+private:
+    real_type* const final_residuals_;
+    int* const final_iters_;
+};
+
+
+}  // namespace batch_log
+}  // namespace host
+}  // namespace kernels
+}  // namespace gko
+
+#endif  // GKO_REFERENCE_LOG_BATCH_LOGGER_HPP_

From 0492d88a49004aa4dfc24500992b3d2f42866f2b Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sat, 21 Oct 2023 20:38:02 +0200
Subject: [PATCH 463/583] Add a BatchIdentity matrix class

Co-authored-by: Aditya Kashi <kashia@ornl.gov>
---
 core/matrix/batch_identity.cpp                | 138 ++++++++++++++++
 include/ginkgo/core/matrix/batch_identity.hpp | 149 ++++++++++++++++++
 2 files changed, 287 insertions(+)
 create mode 100644 core/matrix/batch_identity.cpp
 create mode 100644 include/ginkgo/core/matrix/batch_identity.hpp

diff --git a/core/matrix/batch_identity.cpp b/core/matrix/batch_identity.cpp
new file mode 100644
index 00000000000..a2612e70305
--- /dev/null
+++ b/core/matrix/batch_identity.cpp
@@ -0,0 +1,138 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/matrix/batch_identity.hpp>
+
+
+#include <algorithm>
+#include <type_traits>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/batch_dim.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/exception.hpp>
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/utils.hpp>
+#include <ginkgo/core/matrix/identity.hpp>
+
+
+namespace gko {
+namespace batch {
+namespace matrix {
+
+
+template <typename ValueType>
+BatchIdentity<ValueType>::BatchIdentity(std::shared_ptr<const Executor> exec,
+                                        const batch_dim<2>& size)
+    : EnableBatchLinOp<BatchIdentity<ValueType>>(exec, size)
+{}
+
+
+template <typename ValueType>
+BatchIdentity<ValueType>* BatchIdentity<ValueType>::apply(
+    ptr_param<const MultiVector<ValueType>> b,
+    ptr_param<MultiVector<ValueType>> x)
+{
+    this->validate_application_parameters(b.get(), x.get());
+    auto exec = this->get_executor();
+    this->apply_impl(make_temporary_clone(exec, b).get(),
+                     make_temporary_clone(exec, x).get());
+    return this;
+}
+
+
+template <typename ValueType>
+const BatchIdentity<ValueType>* BatchIdentity<ValueType>::apply(
+    ptr_param<const MultiVector<ValueType>> b,
+    ptr_param<MultiVector<ValueType>> x) const
+{
+    this->apply(b, x);
+    return this;
+}
+
+
+template <typename ValueType>
+BatchIdentity<ValueType>* BatchIdentity<ValueType>::apply(
+    ptr_param<const MultiVector<ValueType>> alpha,
+    ptr_param<const MultiVector<ValueType>> b,
+    ptr_param<const MultiVector<ValueType>> beta,
+    ptr_param<MultiVector<ValueType>> x)
+{
+    this->validate_application_parameters(alpha.get(), b.get(), beta.get(),
+                                          x.get());
+    auto exec = this->get_executor();
+    this->apply_impl(make_temporary_clone(exec, alpha).get(),
+                     make_temporary_clone(exec, b).get(),
+                     make_temporary_clone(exec, beta).get(),
+                     make_temporary_clone(exec, x).get());
+    return this;
+}
+
+
+template <typename ValueType>
+const BatchIdentity<ValueType>* BatchIdentity<ValueType>::apply(
+    ptr_param<const MultiVector<ValueType>> alpha,
+    ptr_param<const MultiVector<ValueType>> b,
+    ptr_param<const MultiVector<ValueType>> beta,
+    ptr_param<MultiVector<ValueType>> x) const
+{
+    this->apply(alpha, b, beta, x);
+    return this;
+}
+
+
+template <typename ValueType>
+void BatchIdentity<ValueType>::apply_impl(const MultiVector<ValueType>* b,
+                                          MultiVector<ValueType>* x) const
+{
+    x->copy_from(b);
+}
+
+
+template <typename ValueType>
+void BatchIdentity<ValueType>::apply_impl(
+    const MultiVector<ValueType>* alpha, const MultiVector<ValueType>* b,
+    const MultiVector<ValueType>* beta,
+    MultiVector<ValueType>* x) const GKO_NOT_IMPLEMENTED;
+
+
+#define GKO_DECLARE_BATCH_IDENTITY_MATRIX(ValueType) \
+    class BatchIdentity<ValueType>
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_IDENTITY_MATRIX);
+
+
+}  // namespace matrix
+}  // namespace batch
+}  // namespace gko
diff --git a/include/ginkgo/core/matrix/batch_identity.hpp b/include/ginkgo/core/matrix/batch_identity.hpp
new file mode 100644
index 00000000000..3a757cfb558
--- /dev/null
+++ b/include/ginkgo/core/matrix/batch_identity.hpp
@@ -0,0 +1,149 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_PUBLIC_CORE_MATRIX_BATCH_IDENTITY_HPP_
+#define GKO_PUBLIC_CORE_MATRIX_BATCH_IDENTITY_HPP_
+
+
+#include <ginkgo/core/base/batch_lin_op.hpp>
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/base/utils.hpp>
+#include <ginkgo/core/matrix/identity.hpp>
+
+
+namespace gko {
+namespace batch {
+namespace matrix {
+
+
+/**
+ * BatchIdentity is a batch matrix format which explicitly stores all values of
+ * the matrix in each of the batches.
+ *
+ * The values in each of the batches are stored in row-major format (values
+ * belonging to the same row appear consecutive in the memory). Optionally, rows
+ * can be padded for better memory access.
+ *
+ * @tparam ValueType  precision of matrix elements
+ *
+ * @note While this format is not very useful for storing sparse matrices, it
+ *       is often suitable to store vectors, and sets of vectors.
+ * @ingroup batch_dense
+ * @ingroup mat_formats
+ * @ingroup BatchLinOp
+ */
+template <typename ValueType = default_precision>
+class BatchIdentity final
+    : public EnableBatchLinOp<BatchIdentity<ValueType>>,
+      public EnableCreateMethod<BatchIdentity<ValueType>> {
+    friend class EnableCreateMethod<BatchIdentity>;
+    friend class EnablePolymorphicObject<BatchIdentity, BatchLinOp>;
+
+public:
+    using EnableBatchLinOp<BatchIdentity>::convert_to;
+    using EnableBatchLinOp<BatchIdentity>::move_to;
+
+    using value_type = ValueType;
+    using index_type = int32;
+    using unbatch_type = gko::matrix::Identity<ValueType>;
+    using absolute_type = remove_complex<BatchIdentity>;
+    using complex_type = to_complex<BatchIdentity>;
+
+    /**
+     * Apply the matrix to a multi-vector. Represents the matrix vector
+     * multiplication, x = A * b, where x and b are both multi-vectors.
+     *
+     * @param b  the multi-vector to be applied to
+     * @param x  the output multi-vector
+     */
+    BatchIdentity* apply(ptr_param<const MultiVector<value_type>> b,
+                         ptr_param<MultiVector<value_type>> x);
+
+    /**
+     * Apply the matrix to a multi-vector with a linear combination of the given
+     * input vector. Represents the matrix vector multiplication, x = alpha * A
+     * * b + beta * x, where x and b are both multi-vectors.
+     *
+     * @param alpha  the scalar to scale the matrix-vector product with
+     * @param b      the multi-vector to be applied to
+     * @param beta   the scalar to scale the x vector with
+     * @param x      the output multi-vector
+     */
+    BatchIdentity* apply(ptr_param<const MultiVector<value_type>> alpha,
+                         ptr_param<const MultiVector<value_type>> b,
+                         ptr_param<const MultiVector<value_type>> beta,
+                         ptr_param<MultiVector<value_type>> x);
+
+    /**
+     * @copydoc apply(const MultiVector<value_type>*, MultiVector<value_type>*)
+     */
+    const BatchIdentity* apply(ptr_param<const MultiVector<value_type>> b,
+                               ptr_param<MultiVector<value_type>> x) const;
+
+    /**
+     * @copydoc apply(const MultiVector<value_type>*, const
+     * MultiVector<value_type>*, const MultiVector<value_type>*,
+     * MultiVector<value_type>*)
+     */
+    const BatchIdentity* apply(ptr_param<const MultiVector<value_type>> alpha,
+                               ptr_param<const MultiVector<value_type>> b,
+                               ptr_param<const MultiVector<value_type>> beta,
+                               ptr_param<MultiVector<value_type>> x) const;
+
+private:
+    /**
+     * Creates an uninitialized BatchIdentity matrix of the specified size.
+     *
+     * @param exec  Executor associated to the matrix
+     * @param size  size of the batch matrices in a batch_dim object
+     */
+    BatchIdentity(std::shared_ptr<const Executor> exec,
+                  const batch_dim<2>& size = batch_dim<2>{});
+
+    void apply_impl(const MultiVector<value_type>* b,
+                    MultiVector<value_type>* x) const;
+
+    void apply_impl(const MultiVector<value_type>* alpha,
+                    const MultiVector<value_type>* b,
+                    const MultiVector<value_type>* beta,
+                    MultiVector<value_type>* x) const;
+};
+
+
+}  // namespace matrix
+}  // namespace batch
+}  // namespace gko
+
+
+#endif  // GKO_PUBLIC_CORE_MATRIX_BATCH_IDENTITY_HPP_

From 19d48a0f9f097371ef4155e70957ff712e641df1 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sat, 21 Oct 2023 20:39:27 +0200
Subject: [PATCH 464/583] Add batch stopping critieria

Co-authored-by: Aditya Kashi <kashia@ornl.gov>
---
 common/cuda_hip/stop/batch_criteria.hpp.inc | 114 ++++++++++++++++
 cuda/stop/batch_criteria.cuh                |  52 +++++++
 dpcpp/stop/batch_criteria.hpp               | 143 ++++++++++++++++++++
 hip/stop/batch_criteria.hip.hpp             |  52 +++++++
 reference/stop/batch_criteria.hpp           | 139 +++++++++++++++++++
 5 files changed, 500 insertions(+)
 create mode 100644 common/cuda_hip/stop/batch_criteria.hpp.inc
 create mode 100644 cuda/stop/batch_criteria.cuh
 create mode 100644 dpcpp/stop/batch_criteria.hpp
 create mode 100644 hip/stop/batch_criteria.hip.hpp
 create mode 100644 reference/stop/batch_criteria.hpp

diff --git a/common/cuda_hip/stop/batch_criteria.hpp.inc b/common/cuda_hip/stop/batch_criteria.hpp.inc
new file mode 100644
index 00000000000..9219d36d0f9
--- /dev/null
+++ b/common/cuda_hip/stop/batch_criteria.hpp.inc
@@ -0,0 +1,114 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+
+// to include "cuda|hip/matrix/batch_struct.hpp"
+// to include "cuda|hip/matrix/batch_dense_kernels.hpp"
+
+namespace stop {
+
+
+template <typename ValueType>
+class SimpleRelResidual {
+public:
+    using real_type = remove_complex<ValueType>;
+
+    /**
+     * Set up the stopping criterion and convergence variable.
+     *
+     * @param max_iters  Maximum number of iterations allowed.
+     * @param rel_res_tol  Tolerance on relative residual norm.
+     * @param rhs_b_norms  The reference RHS norms.
+     */
+    __device__ __forceinline__ SimpleRelResidual(
+        const real_type rel_res_tol, const real_type* const rhs_b_norms)
+        : rel_tol_{rel_res_tol}, rhs_norms_{rhs_b_norms}
+    {}
+
+    /**
+     * Checks whether the right hand side has converged.
+     *
+     * @param residual_norms  Current residual norm.
+     *
+     * @return  True if RHS has converged, false otherwise.
+     */
+    __device__ __forceinline__ bool check_converged(
+        const real_type* const residual_norms) const
+    {
+        return (residual_norms[0] / rhs_norms_[0] < rel_tol_);
+    }
+
+private:
+    const real_type rel_tol_;
+    const real_type* const rhs_norms_;
+};
+
+template <typename ValueType>
+class SimpleAbsResidual {
+public:
+    using real_type = remove_complex<ValueType>;
+
+    /**
+     * Set up the stopping criterion and convergence variable.
+     *
+     * @param max_iters  Maximum number of iterations allowed.
+     * @param tol  Tolerance on residual norm.
+     */
+    __device__ __forceinline__ SimpleAbsResidual(const real_type tol,
+                                                 const real_type*)
+        : abs_tol_{tol}
+    {}
+
+    /**
+     * Checks whether the different right hand sides have converged.
+     *
+     * @param iter  The current iteration count.
+     * @param residual_norms  (Optional) current residual norm of each RHS.
+     * @param residual  Current residual vectors. Unused if residual_norms
+     *                  are provided.
+     * @param converged  Bits representing converged (1) or not (0) for each
+     *                   RHS. The 'right-most' bit corresponds to the first RHS.
+     *
+     * @return  True if all RHS have converged, false otherwise.
+     */
+    __device__ __forceinline__ bool check_converged(
+        const real_type* const residual_norms) const
+    {
+        return (residual_norms[0] < abs_tol_);
+    }
+
+private:
+    const real_type abs_tol_;
+};
+
+
+}  // namespace stop
diff --git a/cuda/stop/batch_criteria.cuh b/cuda/stop/batch_criteria.cuh
new file mode 100644
index 00000000000..242a6e824bf
--- /dev/null
+++ b/cuda/stop/batch_criteria.cuh
@@ -0,0 +1,52 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CUDA_STOP_BATCH_CRITERIA_CUH_
+#define GKO_CUDA_STOP_BATCH_CRITERIA_CUH_
+
+
+#include <ginkgo/core/base/math.hpp>
+
+
+namespace gko {
+namespace kernels {
+namespace cuda {
+
+
+#include "common/cuda_hip/stop/batch_criteria.hpp.inc"
+
+
+}
+}  // namespace kernels
+}  // namespace gko
+
+#endif  // GKO_CUDA_STOP_BATCH_CRITERIA_CUH_
diff --git a/dpcpp/stop/batch_criteria.hpp b/dpcpp/stop/batch_criteria.hpp
new file mode 100644
index 00000000000..843c8077678
--- /dev/null
+++ b/dpcpp/stop/batch_criteria.hpp
@@ -0,0 +1,143 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_DPCPP_STOP_BATCH_CRITERIA_HPP_
+#define GKO_DPCPP_STOP_BATCH_CRITERIA_HPP_
+
+
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/stop/batch_stop_enum.hpp>
+
+
+#include "dpcpp/base/config.hpp"
+#include "dpcpp/base/dim3.dp.hpp"
+#include "dpcpp/base/dpct.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace dpcpp {
+namespace stop {
+
+
+/**
+ * Stopping criterion for batch solvers that combines a maximum iteration
+ * count and relative residual threshold.
+ *
+ * Supports only one right hand side.
+ */
+template <typename ValueType>
+class SimpleRelResidual {
+public:
+    using real_type = remove_complex<ValueType>;
+
+    /**
+     * Set up the stopping criterion and convergence variable.
+     *
+     * @param max_iters  Maximum number of iterations allowed.
+     * @param rel_res_tol  Tolerance on relative residual norm.
+     * @param rhs_b_norms  The dpcpp RHS norms.
+     */
+    SimpleRelResidual(const real_type rel_res_tol,
+                      const real_type* const rhs_b_norms)
+        : rel_tol_{rel_res_tol}, rhs_norms_{rhs_b_norms}
+    {}
+
+    /**
+     * Checks whether the right hand side has converged.
+     *
+     * @param residual_norms  Current residual norm.
+     *
+     * @return  True if RHS has converged, false otherwise.
+     */
+    __dpct_inline__ bool check_converged(
+        const real_type* const residual_norms) const
+    {
+        return (residual_norms[0] / rhs_norms_[0] < rel_tol_);
+    }
+
+private:
+    const real_type rel_tol_;
+    const real_type* const rhs_norms_;
+};
+
+
+/**
+ * Stopping criterion for batch solvers that combines a maximum iteration
+ * count and absolute residual threshold.
+ *
+ * Supports only one right hand side.
+ */
+template <typename ValueType>
+class SimpleAbsResidual {
+public:
+    using real_type = remove_complex<ValueType>;
+
+    /**
+     * Set up the stopping criterion and convergence variable.
+     *
+     * @param max_iters  Maximum number of iterations allowed.
+     * @param tol  Tolerance on residual norm.
+     */
+    SimpleAbsResidual(const real_type tol, const real_type*) : abs_tol_{tol} {}
+
+    /**
+     * Checks whether the different right hand sides have converged.
+     *
+     * @param iter  The current iteration count.
+     * @param residual_norms  (Optional) current residual norm of each RHS.
+     * @param residual  Current residual vectors. Unused if residual_norms
+     *                  are provided.
+     * @param converged  Bits representing converged (1) or not (0) for each
+     *                   RHS. The 'right-most' bit corresponds to the first RHS.
+     *
+     * @return  True if all RHS have converged, false otherwise.
+     */
+    __dpct_inline__ bool check_converged(
+        const real_type* const residual_norms) const
+    {
+        return (residual_norms[0] < abs_tol_);
+    }
+
+private:
+    const real_type abs_tol_;
+};
+
+
+}  // namespace stop
+}  // namespace dpcpp
+}  // namespace kernels
+}  // namespace gko
+
+
+#endif  // GKO_DPCPP_STOP_BATCH_CRITERIA_HPP_
diff --git a/hip/stop/batch_criteria.hip.hpp b/hip/stop/batch_criteria.hip.hpp
new file mode 100644
index 00000000000..d6456479db3
--- /dev/null
+++ b/hip/stop/batch_criteria.hip.hpp
@@ -0,0 +1,52 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_HIP_STOP_BATCH_CRITERIA_HIP_HPP_
+#define GKO_HIP_STOP_BATCH_CRITERIA_HIP_HPP_
+
+
+#include <ginkgo/core/base/math.hpp>
+
+
+namespace gko {
+namespace kernels {
+namespace hip {
+
+
+#include "common/cuda_hip/stop/batch_criteria.hpp.inc"
+
+
+}  // namespace hip
+}  // namespace kernels
+}  // namespace gko
+
+#endif  // GKO_HIP_STOP_BATCH_CRITERIA_HIP_HPP_
diff --git a/reference/stop/batch_criteria.hpp b/reference/stop/batch_criteria.hpp
new file mode 100644
index 00000000000..45a2fc67b5e
--- /dev/null
+++ b/reference/stop/batch_criteria.hpp
@@ -0,0 +1,139 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_REFERENCE_STOP_BATCH_CRITERIA_HPP_
+#define GKO_REFERENCE_STOP_BATCH_CRITERIA_HPP_
+
+
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/stop/batch_stop_enum.hpp>
+
+
+#include "reference/base/config.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace host {
+namespace stop {
+
+
+/**
+ * Stopping criterion for batch solvers that combines a maximum iteration
+ * count and relative residual threshold.
+ *
+ * Supports only one right hand side.
+ */
+template <typename ValueType>
+class SimpleRelResidual {
+public:
+    using real_type = remove_complex<ValueType>;
+
+    /**
+     * Set up the stopping criterion and convergence variable.
+     *
+     * @param max_iters  Maximum number of iterations allowed.
+     * @param rel_res_tol  Tolerance on relative residual norm.
+     * @param rhs_b_norms  The reference RHS norms.
+     */
+    SimpleRelResidual(const real_type rel_res_tol,
+                      const real_type* const rhs_b_norms)
+        : rel_tol_{rel_res_tol}, rhs_norms_{rhs_b_norms}
+    {}
+
+    /**
+     * Checks whether the right hand side has converged.
+     *
+     * @param residual_norms  Current residual norm.
+     *
+     * @return  True if RHS has converged, false otherwise.
+     */
+    bool check_converged(const real_type* const residual_norms) const
+    {
+        return (residual_norms[0] / rhs_norms_[0] < rel_tol_);
+    }
+
+private:
+    const real_type rel_tol_;
+    const real_type* const rhs_norms_;
+};
+
+
+/**
+ * Stopping criterion for batch solvers that combines a maximum iteration
+ * count and absolute residual threshold.
+ *
+ * Supports only one right hand side.
+ */
+template <typename ValueType>
+class SimpleAbsResidual {
+public:
+    using real_type = remove_complex<ValueType>;
+
+    /**
+     * Set up the stopping criterion and convergence variable.
+     *
+     * @param max_iters  Maximum number of iterations allowed.
+     * @param tol  Tolerance on residual norm.
+     */
+    SimpleAbsResidual(const real_type tol, const real_type*) : abs_tol_{tol} {}
+
+    /**
+     * Checks whether the different right hand sides have converged.
+     *
+     * @param iter  The current iteration count.
+     * @param residual_norms  (Optional) current residual norm of each RHS.
+     * @param residual  Current residual vectors. Unused if residual_norms
+     *                  are provided.
+     * @param converged  Bits representing converged (1) or not (0) for each
+     *                   RHS. The 'right-most' bit corresponds to the first RHS.
+     *
+     * @return  True if all RHS have converged, false otherwise.
+     */
+    bool check_converged(const real_type* const residual_norms) const
+    {
+        return (residual_norms[0] < abs_tol_);
+    }
+
+private:
+    const real_type abs_tol_;
+};
+
+
+}  // namespace stop
+}  // namespace host
+}  // namespace kernels
+}  // namespace gko
+
+
+#endif  // GKO_REFERENCE_STOP_BATCH_CRITERIA_HPP_

From 31065d89e98a5ae3cef927e096e29014313b4583 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sat, 21 Oct 2023 20:40:57 +0200
Subject: [PATCH 465/583] Add ref and omp kernels and placeholders

Co-authored-by: Aditya Kashi <kashia@ornl.gov>
---
 core/CMakeLists.txt                           |   3 +
 core/device_hooks/common_kernels.inc.cpp      |  10 +
 cuda/CMakeLists.txt                           |   1 +
 cuda/solver/batch_bicgstab_kernels.cu         |  87 ++++
 dpcpp/CMakeLists.txt                          |   1 +
 dpcpp/solver/batch_bicgstab_kernels.dp.cpp    |  82 ++++
 hip/CMakeLists.txt                            |   1 +
 hip/solver/batch_bicgstab_kernels.hip.cpp     |  86 ++++
 omp/CMakeLists.txt                            |   1 +
 omp/solver/batch_bicgstab_kernels.cpp         | 140 +++++++
 reference/CMakeLists.txt                      |   1 +
 reference/solver/batch_bicgstab_kernels.cpp   | 135 +++++++
 .../solver/batch_bicgstab_kernels.hpp.inc     | 372 ++++++++++++++++++
 13 files changed, 920 insertions(+)
 create mode 100644 cuda/solver/batch_bicgstab_kernels.cu
 create mode 100644 dpcpp/solver/batch_bicgstab_kernels.dp.cpp
 create mode 100644 hip/solver/batch_bicgstab_kernels.hip.cpp
 create mode 100644 omp/solver/batch_bicgstab_kernels.cpp
 create mode 100644 reference/solver/batch_bicgstab_kernels.cpp
 create mode 100644 reference/solver/batch_bicgstab_kernels.hpp.inc

diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
index 014a94c0369..1d63ed7f62e 100644
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -29,6 +29,7 @@ target_sources(ginkgo
     factorization/par_ilu.cpp
     factorization/par_ilut.cpp
     factorization/symbolic.cpp
+    log/batch_logger.cpp
     log/convergence.cpp
     log/logger.cpp
     log/performance_hint.cpp
@@ -41,6 +42,7 @@ target_sources(ginkgo
     log/stream.cpp
     matrix/batch_dense.cpp
     matrix/batch_ell.cpp
+    matrix/batch_identity.cpp
     matrix/coo.cpp
     matrix/csr.cpp
     matrix/dense.cpp
@@ -62,6 +64,7 @@ target_sources(ginkgo
     reorder/amd.cpp
     reorder/rcm.cpp
     reorder/scaled_reordered.cpp
+    solver/batch_bicgstab.cpp
     solver/bicg.cpp
     solver/bicgstab.cpp
     solver/cb_gmres.cpp
diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp
index e09bb6ca654..1011f4435f5 100644
--- a/core/device_hooks/common_kernels.inc.cpp
+++ b/core/device_hooks/common_kernels.inc.cpp
@@ -75,6 +75,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/preconditioner/isai_kernels.hpp"
 #include "core/preconditioner/jacobi_kernels.hpp"
 #include "core/reorder/rcm_kernels.hpp"
+#include "core/solver/batch_bicgstab_kernels.hpp"
 #include "core/solver/bicg_kernels.hpp"
 #include "core/solver/bicgstab_kernels.hpp"
 #include "core/solver/cb_gmres_kernels.hpp"
@@ -414,6 +415,15 @@ GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DIAGONAL_FILL_IN_MATRIX_DATA_KERNEL);
 }  // namespace diagonal
 
 
+namespace batch_bicgstab {
+
+
+GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_BICGSTAB_APPLY_KERNEL);
+
+
+}  // namespace batch_bicgstab
+
+
 namespace cg {
 
 
diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt
index f5b7932ed39..1efa8192aeb 100644
--- a/cuda/CMakeLists.txt
+++ b/cuda/CMakeLists.txt
@@ -56,6 +56,7 @@ target_sources(ginkgo_cuda
     preconditioner/jacobi_kernels.cu
     preconditioner/jacobi_simple_apply_kernel.cu
     reorder/rcm_kernels.cu
+    solver/batch_bicgstab_kernels.cu
     solver/cb_gmres_kernels.cu
     solver/idr_kernels.cu
     solver/lower_trs_kernels.cu
diff --git a/cuda/solver/batch_bicgstab_kernels.cu b/cuda/solver/batch_bicgstab_kernels.cu
new file mode 100644
index 00000000000..d9f58d33769
--- /dev/null
+++ b/cuda/solver/batch_bicgstab_kernels.cu
@@ -0,0 +1,87 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/solver/batch_bicgstab_kernels.hpp"
+
+
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/math.hpp>
+
+
+#include "core/solver/batch_dispatch.hpp"
+#include "cuda/base/config.hpp"
+#include "cuda/base/exception.cuh"
+#include "cuda/base/kernel_config.cuh"
+#include "cuda/base/types.hpp"
+#include "cuda/components/cooperative_groups.cuh"
+#include "cuda/components/thread_ids.cuh"
+#include "cuda/matrix/batch_struct.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace cuda {
+
+
+// NOTE: this default block size is not used for the main solver kernel.
+constexpr int default_block_size = 256;
+constexpr int sm_oversubscription = 4;
+
+/**
+ * @brief The batch Bicgstab solver namespace.
+ *
+ * @ingroup batch_bicgstab
+ */
+namespace batch_bicgstab {
+
+
+template <typename T>
+using BatchBicgstabOptions =
+    gko::kernels::batch_bicgstab::BatchBicgstabOptions<T>;
+
+
+template <typename ValueType>
+void apply(std::shared_ptr<const DefaultExecutor> exec,
+           const BatchBicgstabOptions<remove_complex<ValueType>>& opts,
+           const batch::BatchLinOp* const a,
+           const batch::BatchLinOp* const precon,
+           const batch::MultiVector<ValueType>* const b,
+           batch::MultiVector<ValueType>* const x,
+           batch::log::BatchLogData<double>& logdata) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_BICGSTAB_APPLY_KERNEL);
+
+
+}  // namespace batch_bicgstab
+}  // namespace cuda
+}  // namespace kernels
+}  // namespace gko
diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt
index 9c2e799ede9..7499bca97a5 100644
--- a/dpcpp/CMakeLists.txt
+++ b/dpcpp/CMakeLists.txt
@@ -54,6 +54,7 @@ target_sources(ginkgo_dpcpp
     preconditioner/jacobi_kernels.dp.cpp
     preconditioner/jacobi_simple_apply_kernel.dp.cpp
     reorder/rcm_kernels.dp.cpp
+    solver/batch_bicgstab_kernels.dp.cpp
     solver/cb_gmres_kernels.dp.cpp
     solver/idr_kernels.dp.cpp
     solver/lower_trs_kernels.dp.cpp
diff --git a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
new file mode 100644
index 00000000000..0d67e353e21
--- /dev/null
+++ b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
@@ -0,0 +1,82 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/solver/batch_bicgstab_kernels.hpp"
+
+
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/math.hpp>
+
+
+#include "core/solver/batch_dispatch.hpp"
+#include "dpcpp/base/config.hpp"
+#include "dpcpp/matrix/batch_struct.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace dpcpp {
+
+
+// NOTE: this default block size is not used for the main solver kernel.
+constexpr int default_block_size = 256;
+constexpr int sm_multiplier = 4;
+
+/**
+ * @brief The batch Bicgstab solver namespace.
+ *
+ * @ingroup batch_bicgstab
+ */
+namespace batch_bicgstab {
+
+
+template <typename T>
+using BatchBicgstabOptions =
+    gko::kernels::batch_bicgstab::BatchBicgstabOptions<T>;
+
+
+template <typename ValueType>
+void apply(std::shared_ptr<const DefaultExecutor> exec,
+           const BatchBicgstabOptions<remove_complex<ValueType>>& opts,
+           const batch::BatchLinOp* const a,
+           const batch::BatchLinOp* const precon,
+           const batch::MultiVector<ValueType>* const b,
+           batch::MultiVector<ValueType>* const x,
+           batch::log::BatchLogData<double>& logdata) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_BICGSTAB_APPLY_KERNEL);
+
+
+}  // namespace batch_bicgstab
+}  // namespace dpcpp
+}  // namespace kernels
+}  // namespace gko
diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt
index ccc88769a4e..cb193920edc 100644
--- a/hip/CMakeLists.txt
+++ b/hip/CMakeLists.txt
@@ -52,6 +52,7 @@ set(GINKGO_HIP_SOURCES
     preconditioner/jacobi_kernels.hip.cpp
     preconditioner/jacobi_simple_apply_kernel.hip.cpp
     reorder/rcm_kernels.hip.cpp
+    solver/batch_bicgstab_kernels.hip.cpp
     solver/cb_gmres_kernels.hip.cpp
     solver/idr_kernels.hip.cpp
     solver/lower_trs_kernels.hip.cpp
diff --git a/hip/solver/batch_bicgstab_kernels.hip.cpp b/hip/solver/batch_bicgstab_kernels.hip.cpp
new file mode 100644
index 00000000000..1c3879e4f2d
--- /dev/null
+++ b/hip/solver/batch_bicgstab_kernels.hip.cpp
@@ -0,0 +1,86 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/solver/batch_bicgstab_kernels.hpp"
+
+
+#include <hip/hip_runtime.h>
+
+
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/math.hpp>
+
+
+#include "core/solver/batch_dispatch.hpp"
+#include "hip/base/batch_struct.hip.hpp"
+#include "hip/base/config.hip.hpp"
+#include "hip/matrix/batch_struct.hip.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace hip {
+
+
+// NOTE: this default block size is not used for the main solver kernel.
+constexpr int default_block_size = 256;
+constexpr int sm_oversubscription = 4;
+
+/**
+ * @brief The batch Bicgstab solver namespace.
+ *
+ * @ingroup batch_bicgstab
+ */
+namespace batch_bicgstab {
+
+
+template <typename T>
+using BatchBicgstabOptions =
+    gko::kernels::batch_bicgstab::BatchBicgstabOptions<T>;
+
+
+template <typename ValueType>
+void apply(std::shared_ptr<const DefaultExecutor> exec,
+           const BatchBicgstabOptions<remove_complex<ValueType>>& opts,
+           const batch::BatchLinOp* const a,
+           const batch::BatchLinOp* const precon,
+           const batch::MultiVector<ValueType>* const b,
+           batch::MultiVector<ValueType>* const x,
+           batch::log::BatchLogData<double>& logdata) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_BICGSTAB_APPLY_KERNEL);
+
+
+}  // namespace batch_bicgstab
+}  // namespace hip
+}  // namespace kernels
+}  // namespace gko
diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt
index aa8e30cd590..557061c5fed 100644
--- a/omp/CMakeLists.txt
+++ b/omp/CMakeLists.txt
@@ -38,6 +38,7 @@ target_sources(ginkgo_omp
     preconditioner/isai_kernels.cpp
     preconditioner/jacobi_kernels.cpp
     reorder/rcm_kernels.cpp
+    solver/batch_bicgstab_kernels.cpp
     solver/cb_gmres_kernels.cpp
     solver/idr_kernels.cpp
     solver/lower_trs_kernels.cpp
diff --git a/omp/solver/batch_bicgstab_kernels.cpp b/omp/solver/batch_bicgstab_kernels.cpp
new file mode 100644
index 00000000000..a53b05d536d
--- /dev/null
+++ b/omp/solver/batch_bicgstab_kernels.cpp
@@ -0,0 +1,140 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/solver/batch_bicgstab_kernels.hpp"
+
+
+#include "core/solver/batch_dispatch.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace omp {
+
+
+/**
+ * @brief The batch Bicgstab solver namespace.
+ *
+ * @ingroup batch_bicgstab
+ */
+namespace batch_bicgstab {
+
+
+namespace {
+
+
+constexpr int max_num_rhs = 1;
+
+
+#include "reference/base/batch_multi_vector_kernels.hpp.inc"
+#include "reference/matrix/batch_dense_kernels.hpp.inc"
+#include "reference/matrix/batch_ell_kernels.hpp.inc"
+#include "reference/solver/batch_bicgstab_kernels.hpp.inc"
+
+
+}  // unnamed namespace
+
+
+template <typename T>
+using BatchBicgstabOptions =
+    gko::kernels::batch_bicgstab::BatchBicgstabOptions<T>;
+
+template <typename ValueType>
+class KernelCaller {
+public:
+    KernelCaller(std::shared_ptr<const DefaultExecutor> exec,
+                 const BatchBicgstabOptions<remove_complex<ValueType>> opts)
+        : exec_{std::move(exec)}, opts_{opts}
+    {}
+
+    template <typename BatchMatrixType, typename PrecType, typename StopType,
+              typename LogType>
+    void call_kernel(
+        const LogType& logger, const BatchMatrixType& a, PrecType prec,
+        const gko::batch::multi_vector::uniform_batch<const ValueType>& b,
+        const gko::batch::multi_vector::uniform_batch<ValueType>& x) const
+    {
+        using real_type = typename gko::remove_complex<ValueType>;
+        const size_type nbatch = a.num_batch_items;
+        const auto nrows = a.num_rows;
+        const auto nrhs = b.num_rhs;
+        if (nrhs > 1) {
+            GKO_NOT_IMPLEMENTED;
+        }
+
+        const int local_size_bytes =
+            gko::kernels::batch_bicgstab::local_memory_requirement<ValueType>(
+                nrows, nrhs) +
+            PrecType::dynamic_work_size(nrows, a.get_num_nnz()) *
+                sizeof(ValueType);
+
+#pragma omp parallel for firstprivate(logger)
+        for (size_type ibatch = 0; ibatch < nbatch; ibatch++) {
+            // TODO: Align to cache line boundary
+            // TODO: Allocate and free once per thread rather than once per
+            // work-item.
+            const auto local_space =
+                static_cast<unsigned char*>(malloc(local_size_bytes));
+            batch_entry_bicgstab_impl<StopType, PrecType, LogType,
+                                      BatchMatrixType, ValueType>(
+                opts_, logger, prec, a, b, x, ibatch, local_space);
+            free(local_space);
+        }
+    }
+
+private:
+    const std::shared_ptr<const DefaultExecutor> exec_;
+    const BatchBicgstabOptions<remove_complex<ValueType>> opts_;
+};
+
+
+template <typename ValueType>
+void apply(std::shared_ptr<const DefaultExecutor> exec,
+           const BatchBicgstabOptions<remove_complex<ValueType>>& opts,
+           const batch::BatchLinOp* const a,
+           const batch::BatchLinOp* const precon,
+           const batch::MultiVector<ValueType>* const b,
+           batch::MultiVector<ValueType>* const x,
+           batch::log::BatchLogData<double>& logdata)
+{
+    auto dispatcher = batch::solver::create_dispatcher<ValueType>(
+        KernelCaller<ValueType>(exec, opts), opts, a, precon);
+    dispatcher.apply(b, x, logdata);
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_BICGSTAB_APPLY_KERNEL);
+
+
+}  // namespace batch_bicgstab
+}  // namespace omp
+}  // namespace kernels
+}  // namespace gko
diff --git a/reference/CMakeLists.txt b/reference/CMakeLists.txt
index 44ee564c16f..f8dff69723b 100644
--- a/reference/CMakeLists.txt
+++ b/reference/CMakeLists.txt
@@ -43,6 +43,7 @@ target_sources(ginkgo_reference
     preconditioner/isai_kernels.cpp
     preconditioner/jacobi_kernels.cpp
     reorder/rcm_kernels.cpp
+    solver/batch_bicgstab_kernels.cpp
     solver/bicg_kernels.cpp
     solver/bicgstab_kernels.cpp
     solver/cg_kernels.cpp
diff --git a/reference/solver/batch_bicgstab_kernels.cpp b/reference/solver/batch_bicgstab_kernels.cpp
new file mode 100644
index 00000000000..05a21aa6658
--- /dev/null
+++ b/reference/solver/batch_bicgstab_kernels.cpp
@@ -0,0 +1,135 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/solver/batch_bicgstab_kernels.hpp"
+
+
+#include "core/solver/batch_dispatch.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace reference {
+
+
+/**
+ * @brief The batch Bicgstab solver namespace.
+ *
+ * @ingroup batch_bicgstab
+ */
+namespace batch_bicgstab {
+
+
+namespace {
+
+
+constexpr int max_num_rhs = 1;
+
+
+#include "reference/base/batch_multi_vector_kernels.hpp.inc"
+#include "reference/matrix/batch_dense_kernels.hpp.inc"
+#include "reference/matrix/batch_ell_kernels.hpp.inc"
+#include "reference/solver/batch_bicgstab_kernels.hpp.inc"
+
+
+}  // unnamed namespace
+
+
+template <typename T>
+using BatchBicgstabOptions =
+    gko::kernels::batch_bicgstab::BatchBicgstabOptions<T>;
+
+template <typename ValueType>
+class KernelCaller {
+public:
+    KernelCaller(std::shared_ptr<const DefaultExecutor> exec,
+                 const BatchBicgstabOptions<remove_complex<ValueType>> opts)
+        : exec_{std::move(exec)}, opts_{opts}
+    {}
+
+    template <typename BatchMatrixType, typename PrecType, typename StopType,
+              typename LogType>
+    void call_kernel(
+        const LogType& logger, const BatchMatrixType& a, PrecType prec,
+        const gko::batch::multi_vector::uniform_batch<const ValueType>& b,
+        const gko::batch::multi_vector::uniform_batch<ValueType>& x) const
+    {
+        using real_type = typename gko::remove_complex<ValueType>;
+        const size_type nbatch = a.num_batch_items;
+        const auto nrows = a.num_rows;
+        const auto nrhs = b.num_rhs;
+        if (nrhs > 1) {
+            GKO_NOT_IMPLEMENTED;
+        }
+
+        const int local_size_bytes =
+            gko::kernels::batch_bicgstab::local_memory_requirement<ValueType>(
+                nrows, nrhs) +
+            PrecType::dynamic_work_size(nrows, a.get_num_nnz()) *
+                sizeof(ValueType);
+        // array<unsigned char> local_space(exec_, local_size_bytes);
+        std::vector<unsigned char> local_space(local_size_bytes);
+
+        for (size_type ibatch = 0; ibatch < nbatch; ibatch++) {
+            batch_entry_bicgstab_impl<StopType, PrecType, LogType,
+                                      BatchMatrixType, ValueType>(
+                opts_, logger, prec, a, b, x, ibatch, local_space.data());
+        }
+    }
+
+private:
+    const std::shared_ptr<const DefaultExecutor> exec_;
+    const BatchBicgstabOptions<remove_complex<ValueType>> opts_;
+};
+
+
+template <typename ValueType>
+void apply(std::shared_ptr<const DefaultExecutor> exec,
+           const BatchBicgstabOptions<remove_complex<ValueType>>& opts,
+           const batch::BatchLinOp* const a,
+           const batch::BatchLinOp* const precon,
+           const batch::MultiVector<ValueType>* const b,
+           batch::MultiVector<ValueType>* const x,
+           batch::log::BatchLogData<double>& logdata)
+{
+    auto dispatcher = batch::solver::create_dispatcher<ValueType>(
+        KernelCaller<ValueType>(exec, opts), opts, a, precon);
+    dispatcher.apply(b, x, logdata);
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_BICGSTAB_APPLY_KERNEL);
+
+
+}  // namespace batch_bicgstab
+}  // namespace reference
+}  // namespace kernels
+}  // namespace gko
diff --git a/reference/solver/batch_bicgstab_kernels.hpp.inc b/reference/solver/batch_bicgstab_kernels.hpp.inc
new file mode 100644
index 00000000000..376db5a25dd
--- /dev/null
+++ b/reference/solver/batch_bicgstab_kernels.hpp.inc
@@ -0,0 +1,372 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+template <typename BatchMatrixType_entry, typename ValueType>
+inline void initialize(
+    const BatchMatrixType_entry& A_entry,
+    const gko::batch::multi_vector::batch_item<const ValueType>& b_entry,
+    const gko::batch::multi_vector::batch_item<const ValueType>& x_entry,
+    const gko::batch::multi_vector::batch_item<ValueType>& rho_old_entry,
+    const gko::batch::multi_vector::batch_item<ValueType>& omega_entry,
+    const gko::batch::multi_vector::batch_item<ValueType>& alpha_entry,
+    const gko::batch::multi_vector::batch_item<ValueType>& r_entry,
+    const gko::batch::multi_vector::batch_item<ValueType>& r_hat_entry,
+    const gko::batch::multi_vector::batch_item<ValueType>& p_entry,
+    const gko::batch::multi_vector::batch_item<ValueType>& v_entry,
+    const gko::batch::multi_vector::batch_item<
+        typename gko::remove_complex<ValueType>>& rhs_norms_entry,
+    const gko::batch::multi_vector::batch_item<
+        typename gko::remove_complex<ValueType>>& res_norms_entry)
+{
+    for (int c = 0; c < rho_old_entry.num_rhs; c++) {
+        rho_old_entry.values[c] = one<ValueType>();
+        omega_entry.values[c] = one<ValueType>();
+        alpha_entry.values[c] = one<ValueType>();
+    }
+
+    // Compute norms of rhs
+    compute_norm2_kernel<ValueType>(b_entry, rhs_norms_entry);
+
+
+    // r = b
+    copy_kernel(b_entry, r_entry);
+
+    // r = b - A*x
+    advanced_apply_kernel(static_cast<ValueType>(-1.0), A_entry,
+                          gko::batch::to_const(x_entry),
+                          static_cast<ValueType>(1.0), r_entry);
+    compute_norm2_kernel<ValueType>(gko::batch::to_const(r_entry),
+                                    res_norms_entry);
+
+    copy_kernel(gko::batch::to_const(r_entry), r_hat_entry);
+
+    for (int r = 0; r < p_entry.num_rows; r++) {
+        for (int c = 0; c < p_entry.num_rhs; c++) {
+            p_entry.values[r * p_entry.stride + c] = zero<ValueType>();
+            v_entry.values[r * v_entry.stride + c] = zero<ValueType>();
+        }
+    }
+}
+
+
+template <typename ValueType>
+inline void update_p(
+    const gko::batch::multi_vector::batch_item<const ValueType>& rho_new_entry,
+    const gko::batch::multi_vector::batch_item<const ValueType>& rho_old_entry,
+    const gko::batch::multi_vector::batch_item<const ValueType>& alpha_entry,
+    const gko::batch::multi_vector::batch_item<const ValueType>& omega_entry,
+    const gko::batch::multi_vector::batch_item<const ValueType>& r_entry,
+    const gko::batch::multi_vector::batch_item<const ValueType>& v_entry,
+    const gko::batch::multi_vector::batch_item<ValueType>& p_entry)
+{
+    const ValueType beta = (rho_new_entry.values[0] / rho_old_entry.values[0]) *
+                           (alpha_entry.values[0] / omega_entry.values[0]);
+    for (int r = 0; r < p_entry.num_rows; r++) {
+        p_entry.values[r * p_entry.stride] =
+            r_entry.values[r * r_entry.stride] +
+            beta * (p_entry.values[r * p_entry.stride] -
+                    omega_entry.values[0] * v_entry.values[r * v_entry.stride]);
+    }
+}
+
+
+template <typename ValueType>
+inline void compute_alpha(
+    const gko::batch::multi_vector::batch_item<const ValueType>& rho_new_entry,
+    const gko::batch::multi_vector::batch_item<const ValueType>& r_hat_entry,
+    const gko::batch::multi_vector::batch_item<const ValueType>& v_entry,
+    const gko::batch::multi_vector::batch_item<ValueType>& alpha_entry)
+{
+    compute_dot_product_kernel<ValueType>(r_hat_entry, v_entry, alpha_entry);
+    alpha_entry.values[0] = rho_new_entry.values[0] / alpha_entry.values[0];
+}
+
+
+template <typename ValueType>
+inline void update_s(
+    const gko::batch::multi_vector::batch_item<const ValueType>& r_entry,
+    const gko::batch::multi_vector::batch_item<const ValueType>& alpha_entry,
+    const gko::batch::multi_vector::batch_item<const ValueType>& v_entry,
+    const gko::batch::multi_vector::batch_item<ValueType>& s_entry)
+{
+    for (int r = 0; r < s_entry.num_rows; r++) {
+        s_entry.values[r * s_entry.stride] =
+            r_entry.values[r * r_entry.stride] -
+            alpha_entry.values[0] * v_entry.values[r * v_entry.stride];
+    }
+}
+
+
+template <typename ValueType>
+inline void compute_omega(
+    const gko::batch::multi_vector::batch_item<const ValueType>& t_entry,
+    const gko::batch::multi_vector::batch_item<const ValueType>& s_entry,
+    const gko::batch::multi_vector::batch_item<ValueType>& temp_entry,
+    const gko::batch::multi_vector::batch_item<ValueType>& omega_entry)
+{
+    compute_dot_product_kernel<ValueType>(t_entry, s_entry, omega_entry);
+    compute_dot_product_kernel<ValueType>(t_entry, t_entry, temp_entry);
+    omega_entry.values[0] /= temp_entry.values[0];
+}
+
+
+template <typename ValueType>
+inline void update_x_and_r(
+    const gko::batch::multi_vector::batch_item<const ValueType>& p_hat_entry,
+    const gko::batch::multi_vector::batch_item<const ValueType>& s_hat_entry,
+    const gko::batch::multi_vector::batch_item<const ValueType>& alpha_entry,
+    const gko::batch::multi_vector::batch_item<const ValueType>& omega_entry,
+    const gko::batch::multi_vector::batch_item<const ValueType>& s_entry,
+    const gko::batch::multi_vector::batch_item<const ValueType>& t_entry,
+    const gko::batch::multi_vector::batch_item<ValueType>& x_entry,
+    const gko::batch::multi_vector::batch_item<ValueType>& r_entry)
+{
+    const ValueType omega = omega_entry.values[0];
+    for (int r = 0; r < x_entry.num_rows; r++) {
+        x_entry.values[r * x_entry.stride] =
+            x_entry.values[r * x_entry.stride] +
+            alpha_entry.values[0] * p_hat_entry.values[r * p_hat_entry.stride] +
+            omega * s_hat_entry.values[r * s_hat_entry.stride];
+
+        r_entry.values[r * r_entry.stride] =
+            s_entry.values[r * s_entry.stride] -
+            omega * t_entry.values[r * t_entry.stride];
+    }
+}
+
+template <typename ValueType>
+inline void update_x_middle(
+    const gko::batch::multi_vector::batch_item<const ValueType>& alpha_entry,
+    const gko::batch::multi_vector::batch_item<const ValueType>& p_hat_entry,
+    const gko::batch::multi_vector::batch_item<ValueType>& x_entry)
+{
+    for (int r = 0; r < x_entry.num_rows; r++) {
+        x_entry.values[r * x_entry.stride] =
+            x_entry.values[r * x_entry.stride] +
+            alpha_entry.values[0] * p_hat_entry.values[r * p_hat_entry.stride];
+    }
+}
+
+
+template <typename StopType, typename PrecType, typename LogType,
+          typename BatchMatrixType, typename ValueType>
+inline void batch_entry_bicgstab_impl(
+    const gko::kernels::batch_bicgstab::BatchBicgstabOptions<
+        remove_complex<ValueType>>& opts,
+    LogType logger, PrecType prec, const BatchMatrixType& a,
+    const gko::batch::multi_vector::uniform_batch<const ValueType>& b,
+    const gko::batch::multi_vector::uniform_batch<ValueType>& x,
+    const size_type ibatch, unsigned char* const local_space)
+{
+    using real_type = typename gko::remove_complex<ValueType>;
+    const size_type nbatch = a.num_batch_items;
+    const auto nrows = a.num_rows;
+    const auto nrhs = b.num_rhs;
+    GKO_ASSERT((nrhs <= max_num_rhs));
+
+    unsigned char* const shared_space = local_space;
+    ValueType* const r = reinterpret_cast<ValueType*>(shared_space);
+    ValueType* const r_hat = r + nrows * nrhs;
+    ValueType* const p = r_hat + nrows * nrhs;
+    ValueType* const p_hat = p + nrows * nrhs;
+    ValueType* const v = p_hat + nrows * nrhs;
+    ValueType* const s = v + nrows * nrhs;
+    ValueType* const s_hat = s + nrows * nrhs;
+    ValueType* const t = s_hat + nrows * nrhs;
+    ValueType* const prec_work = t + nrows * nrhs;
+    ValueType rho_old[max_num_rhs];
+    ValueType rho_new[max_num_rhs];
+    ValueType omega[max_num_rhs];
+    ValueType alpha[max_num_rhs];
+    ValueType temp[max_num_rhs];
+    real_type norms_rhs[max_num_rhs];
+    real_type norms_res[max_num_rhs];
+
+    const auto A_entry = gko::batch::matrix::extract_batch_item(
+        gko::batch::matrix::to_const(a), ibatch);
+
+    const gko::batch::multi_vector::batch_item<const ValueType> b_entry =
+        gko::batch::extract_batch_item(gko::batch::to_const(b), ibatch);
+
+    const gko::batch::multi_vector::batch_item<ValueType> x_entry =
+        gko::batch::extract_batch_item(x, ibatch);
+
+
+    const gko::batch::multi_vector::batch_item<ValueType> r_entry{
+        r, static_cast<size_type>(nrhs), nrows, nrhs};
+
+    const gko::batch::multi_vector::batch_item<ValueType> r_hat_entry{
+        r_hat, static_cast<size_type>(nrhs), nrows, nrhs};
+
+    const gko::batch::multi_vector::batch_item<ValueType> p_entry{
+        p, static_cast<size_type>(nrhs), nrows, nrhs};
+
+    const gko::batch::multi_vector::batch_item<ValueType> p_hat_entry{
+        p_hat, static_cast<size_type>(nrhs), nrows, nrhs};
+
+    const gko::batch::multi_vector::batch_item<ValueType> v_entry{
+        v, static_cast<size_type>(nrhs), nrows, nrhs};
+
+    const gko::batch::multi_vector::batch_item<ValueType> s_entry{
+        s, static_cast<size_type>(nrhs), nrows, nrhs};
+
+    const gko::batch::multi_vector::batch_item<ValueType> s_hat_entry{
+        s_hat, static_cast<size_type>(nrhs), nrows, nrhs};
+
+    const gko::batch::multi_vector::batch_item<ValueType> t_entry{
+        t, static_cast<size_type>(nrhs), nrows, nrhs};
+
+    const gko::batch::multi_vector::batch_item<ValueType> rho_old_entry{
+        rho_old, static_cast<size_type>(nrhs), 1, nrhs};
+
+    const gko::batch::multi_vector::batch_item<ValueType> rho_new_entry{
+        rho_new, static_cast<size_type>(nrhs), 1, nrhs};
+
+    const gko::batch::multi_vector::batch_item<ValueType> omega_entry{
+        omega, static_cast<size_type>(nrhs), 1, nrhs};
+
+    const gko::batch::multi_vector::batch_item<ValueType> alpha_entry{
+        alpha, static_cast<size_type>(nrhs), 1, nrhs};
+
+    const gko::batch::multi_vector::batch_item<ValueType> temp_entry{
+        temp, static_cast<size_type>(nrhs), 1, nrhs};
+
+    const gko::batch::multi_vector::batch_item<real_type> rhs_norms_entry{
+        norms_rhs, static_cast<size_type>(nrhs), 1, nrhs};
+
+    const gko::batch::multi_vector::batch_item<real_type> res_norms_entry{
+        norms_res, static_cast<size_type>(nrhs), 1, nrhs};
+
+    // generate preconditioner
+    prec.generate(ibatch, A_entry, prec_work);
+
+    // initialization
+    // rho_old = 1, omega = 1, alpha = 1
+    // compute b norms
+    // r = b - A*x
+    // compute residual norms
+    // r_hat = r
+    // p = 0
+    // v = 0
+    initialize(A_entry, b_entry, gko::batch::to_const(x_entry), rho_old_entry,
+               omega_entry, alpha_entry, r_entry, r_hat_entry, p_entry, v_entry,
+               rhs_norms_entry, res_norms_entry);
+
+    // stopping criterion object
+    StopType stop(opts.residual_tol, rhs_norms_entry.values);
+
+    int iter{};
+
+    for (iter = 0; iter < opts.max_its; iter++) {
+        if (stop.check_converged(res_norms_entry.values)) {
+            break;
+        }
+
+        // rho_new =  < r_hat , r > = (r_hat)' * (r)
+        compute_dot_product_kernel<ValueType>(gko::batch::to_const(r_hat_entry),
+                                              gko::batch::to_const(r_entry),
+                                              rho_new_entry);
+
+
+        // beta = (rho_new / rho_old)*(alpha / omega)
+        // p = r + beta*(p - omega * v)
+        update_p(gko::batch::to_const(rho_new_entry),
+                 gko::batch::to_const(rho_old_entry),
+                 gko::batch::to_const(alpha_entry),
+                 gko::batch::to_const(omega_entry),
+                 gko::batch::to_const(r_entry), gko::batch::to_const(v_entry),
+                 p_entry);
+
+
+        // p_hat = precond * p
+        prec.apply(gko::batch::to_const(p_entry), p_hat_entry);
+
+        // v = A * p_hat
+        simple_apply_kernel(A_entry, gko::batch::to_const(p_hat_entry),
+                            v_entry);
+
+        // alpha = rho_new / < r_hat , v>
+        compute_alpha(gko::batch::to_const(rho_new_entry),
+                      gko::batch::to_const(r_hat_entry),
+                      gko::batch::to_const(v_entry), alpha_entry);
+
+
+        // s = r - alpha*v
+        update_s(gko::batch::to_const(r_entry),
+                 gko::batch::to_const(alpha_entry),
+                 gko::batch::to_const(v_entry), s_entry);
+        // an estimate of residual norms
+        compute_norm2_kernel<ValueType>(gko::batch::to_const(s_entry),
+                                        res_norms_entry);
+
+
+        if (stop.check_converged(res_norms_entry.values)) {
+            // update x for the sytems (rhs) which converge at this point...  x
+            // = x + alpha*p_hat
+            // note bits could change from 0 to 1, not the other way round, so
+            // we can use xor to get info about recent convergence...
+            // const uint32 converged_recent = converged_prev ^ converged;
+            update_x_middle(gko::batch::to_const(alpha_entry),
+                            gko::batch::to_const(p_hat_entry), x_entry);
+            break;
+        }
+
+        // s_hat = precond * s
+        prec.apply(gko::batch::to_const(s_entry), s_hat_entry);
+
+        // t = A * s_hat
+        simple_apply_kernel(A_entry, gko::batch::to_const(s_hat_entry),
+                            t_entry);
+
+        // omega = <t,s> / <t,t>
+        compute_omega(gko::batch::to_const(t_entry),
+                      gko::batch::to_const(s_entry), temp_entry, omega_entry);
+
+
+        // x = x + alpha*p_hat + omega *s_hat
+        // r = s - omega * t
+        update_x_and_r(gko::batch::to_const(p_hat_entry),
+                       gko::batch::to_const(s_hat_entry),
+                       gko::batch::to_const(alpha_entry),
+                       gko::batch::to_const(omega_entry),
+                       gko::batch::to_const(s_entry),
+                       gko::batch::to_const(t_entry), x_entry, r_entry);
+
+        compute_norm2_kernel<ValueType>(gko::batch::to_const(r_entry),
+                                        res_norms_entry);
+
+        // rho_old = rho_new
+        copy_kernel(gko::batch::to_const(rho_new_entry), rho_old_entry);
+    }
+
+    logger.log_iteration(ibatch, iter, res_norms_entry.values[0]);
+}

From 9af6c49ea9ab8f0eadf8576a9e82e3398232db27 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sat, 21 Oct 2023 20:41:59 +0200
Subject: [PATCH 466/583] Add core BatchBicgstab solver tests

Co-authored-by: Aditya Kashi <kashia@ornl.gov>
---
 core/test/solver/CMakeLists.txt     |   1 +
 core/test/solver/batch_bicgstab.cpp | 238 ++++++++++++++++++++++++++++
 2 files changed, 239 insertions(+)
 create mode 100644 core/test/solver/batch_bicgstab.cpp

diff --git a/core/test/solver/CMakeLists.txt b/core/test/solver/CMakeLists.txt
index f4e6b2e5b7b..828d8cceb6a 100644
--- a/core/test/solver/CMakeLists.txt
+++ b/core/test/solver/CMakeLists.txt
@@ -1,3 +1,4 @@
+ginkgo_create_test(batch_bicgstab)
 ginkgo_create_test(bicg)
 ginkgo_create_test(bicgstab)
 ginkgo_create_test(cg)
diff --git a/core/test/solver/batch_bicgstab.cpp b/core/test/solver/batch_bicgstab.cpp
new file mode 100644
index 00000000000..6718bf22eda
--- /dev/null
+++ b/core/test/solver/batch_bicgstab.cpp
@@ -0,0 +1,238 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/solver/batch_bicgstab.hpp>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/matrix/batch_dense.hpp>
+
+
+#include "core/base/batch_utilities.hpp"
+#include "core/test/utils.hpp"
+#include "core/test/utils/batch_helpers.hpp"
+
+
+namespace {
+
+
+template <typename T>
+class BatchBicgstab : public ::testing::Test {
+protected:
+    using value_type = T;
+    using real_type = gko::remove_complex<T>;
+    using Mtx = gko::batch::matrix::Dense<value_type>;
+    using MVec = gko::batch::MultiVector<value_type>;
+    using Solver = gko::batch::solver::BatchBicgstab<value_type>;
+
+    BatchBicgstab()
+        : exec(gko::ReferenceExecutor::create()),
+          mtx(gko::test::generate_3pt_stencil_batch_matrix<Mtx>(
+              this->exec->get_master(), nrows, nbatch)),
+          solver_factory(Solver::build()
+                             .with_default_max_iterations(def_max_iters)
+                             .with_default_residual_tol(def_abs_res_tol)
+                             .with_tolerance_type(def_tol_type)
+                             .on(exec)),
+          solver(solver_factory->generate(mtx))
+    {}
+
+    std::shared_ptr<const gko::Executor> exec;
+    const gko::size_type nbatch = 3;
+    const int nrows = 5;
+    std::shared_ptr<Mtx> mtx;
+    std::unique_ptr<typename Solver::Factory> solver_factory;
+    const int def_max_iters = 100;
+    const real_type def_abs_res_tol = 1e-11;
+    const gko::batch::stop::ToleranceType def_tol_type =
+        gko::batch::stop::ToleranceType::absolute;
+    std::unique_ptr<gko::batch::BatchLinOp> solver;
+};
+
+TYPED_TEST_SUITE(BatchBicgstab, gko::test::ValueTypes);
+
+
+TYPED_TEST(BatchBicgstab, FactoryKnowsItsExecutor)
+{
+    ASSERT_EQ(this->solver_factory->get_executor(), this->exec);
+}
+
+
+TYPED_TEST(BatchBicgstab, FactoryCreatesCorrectSolver)
+{
+    using Solver = typename TestFixture::Solver;
+    for (size_t i = 0; i < this->nbatch; i++) {
+        ASSERT_EQ(this->solver->get_common_size(),
+                  gko::dim<2>(this->nrows, this->nrows));
+    }
+    auto solver = static_cast<Solver*>(this->solver.get());
+    ASSERT_NE(solver->get_system_matrix(), nullptr);
+    ASSERT_EQ(solver->get_system_matrix(), this->mtx);
+}
+
+
+TYPED_TEST(BatchBicgstab, CanBeCopied)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using Solver = typename TestFixture::Solver;
+    auto copy = this->solver_factory->generate(Mtx::create(this->exec));
+
+    copy->copy_from(this->solver.get());
+
+    ASSERT_EQ(copy->get_common_size(), gko::dim<2>(this->nrows, this->nrows));
+    ASSERT_EQ(copy->get_num_batch_items(), this->nbatch);
+    auto copy_mtx = static_cast<Solver*>(copy.get())->get_system_matrix();
+    const auto copy_batch_mtx = static_cast<const Mtx*>(copy_mtx.get());
+    GKO_ASSERT_BATCH_MTX_NEAR(this->mtx.get(), copy_batch_mtx, 0.0);
+}
+
+
+TYPED_TEST(BatchBicgstab, CanBeMoved)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using Solver = typename TestFixture::Solver;
+    auto copy = this->solver_factory->generate(Mtx::create(this->exec));
+
+    copy->move_from(this->solver);
+
+    ASSERT_EQ(copy->get_common_size(), gko::dim<2>(this->nrows, this->nrows));
+    ASSERT_EQ(copy->get_num_batch_items(), this->nbatch);
+    auto copy_mtx = static_cast<Solver*>(copy.get())->get_system_matrix();
+    const auto copy_batch_mtx = static_cast<const Mtx*>(copy_mtx.get());
+    GKO_ASSERT_BATCH_MTX_NEAR(this->mtx.get(), copy_batch_mtx, 0.0);
+}
+
+
+TYPED_TEST(BatchBicgstab, CanBeCloned)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using Solver = typename TestFixture::Solver;
+    auto clone = this->solver->clone();
+
+    ASSERT_EQ(clone->get_common_size(), gko::dim<2>(this->nrows, this->nrows));
+    ASSERT_EQ(clone->get_num_batch_items(), this->nbatch);
+    auto clone_mtx = static_cast<Solver*>(clone.get())->get_system_matrix();
+    const auto clone_batch_mtx = static_cast<const Mtx*>(clone_mtx.get());
+    GKO_ASSERT_BATCH_MTX_NEAR(this->mtx.get(), clone_batch_mtx, 0.0);
+}
+
+
+TYPED_TEST(BatchBicgstab, CanBeCleared)
+{
+    using Solver = typename TestFixture::Solver;
+
+    this->solver->clear();
+
+    ASSERT_EQ(this->solver->get_num_batch_items(), 0);
+    auto solver_mtx =
+        static_cast<Solver*>(this->solver.get())->get_system_matrix();
+    ASSERT_EQ(solver_mtx, nullptr);
+}
+
+
+TYPED_TEST(BatchBicgstab, CanSetCriteriaInFactory)
+{
+    using Solver = typename TestFixture::Solver;
+    using real_type = typename TestFixture::real_type;
+
+    auto solver_factory =
+        Solver::build()
+            .with_default_max_iterations(22)
+            .with_default_residual_tol(static_cast<real_type>(0.25))
+            .with_tolerance_type(gko::batch::stop::ToleranceType::relative)
+            .on(this->exec);
+    auto solver = solver_factory->generate(this->mtx);
+
+    ASSERT_EQ(solver->get_parameters().default_max_iterations, 22);
+    ASSERT_EQ(solver->get_parameters().default_residual_tol, 0.25);
+    ASSERT_EQ(solver->get_parameters().tolerance_type,
+              gko::batch::stop::ToleranceType::relative);
+}
+
+
+TYPED_TEST(BatchBicgstab, CanSetResidualTol)
+{
+    using Solver = typename TestFixture::Solver;
+    using real_type = typename TestFixture::real_type;
+    auto solver_factory =
+        Solver::build()
+            .with_default_max_iterations(22)
+            .with_default_residual_tol(static_cast<real_type>(0.25))
+            .with_tolerance_type(gko::batch::stop::ToleranceType::relative)
+            .on(this->exec);
+    auto solver = solver_factory->generate(this->mtx);
+
+    solver->set_residual_tolerance(0.5);
+
+    ASSERT_EQ(solver->get_parameters().default_max_iterations, 22);
+    ASSERT_EQ(solver->get_parameters().default_residual_tol, 0.25);
+    ASSERT_EQ(solver->get_residual_tolerance(), 0.5);
+}
+
+
+TYPED_TEST(BatchBicgstab, CanSetMaxIterations)
+{
+    using Solver = typename TestFixture::Solver;
+    using real_type = typename TestFixture::real_type;
+    auto solver_factory =
+        Solver::build()
+            .with_default_max_iterations(22)
+            .with_default_residual_tol(static_cast<real_type>(0.25))
+            .with_tolerance_type(gko::batch::stop::ToleranceType::relative)
+            .on(this->exec);
+    auto solver = solver_factory->generate(this->mtx);
+
+    solver->set_max_iterations(10);
+
+    ASSERT_EQ(solver->get_parameters().default_residual_tol, 0.25);
+    ASSERT_EQ(solver->get_parameters().default_max_iterations, 22);
+    ASSERT_EQ(solver->get_max_iterations(), 10);
+}
+
+
+TYPED_TEST(BatchBicgstab, ThrowsOnRectangularMatrixInFactory)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using Solver = typename TestFixture::Solver;
+    std::shared_ptr<Mtx> rectangular_mtx =
+        Mtx::create(this->exec, 2, gko::dim<2>{3, 5});
+
+    ASSERT_THROW(this->solver_factory->generate(rectangular_mtx),
+                 gko::BadDimension);
+}
+
+
+}  // namespace

From da0c44375c2e6e5c4517e7c1f0070d071c4ff46c Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sat, 21 Oct 2023 20:43:00 +0200
Subject: [PATCH 467/583] Add BatchIdentity preconditioner

Co-authored-by: Aditya Kashi <kashia@ornl.gov>
---
 .../preconditioner/batch_identity.hpp.inc     |  89 +++++++++++++++
 cuda/preconditioner/batch_preconditioners.cuh |  58 ++++++++++
 dpcpp/preconditioner/batch_identity.hpp.inc   |  81 ++++++++++++++
 .../preconditioner/batch_preconditioners.hpp  |  55 ++++++++++
 .../batch_preconditioners.hip.hpp             |  59 ++++++++++
 reference/preconditioner/batch_identity.hpp   | 101 ++++++++++++++++++
 6 files changed, 443 insertions(+)
 create mode 100644 common/cuda_hip/preconditioner/batch_identity.hpp.inc
 create mode 100644 cuda/preconditioner/batch_preconditioners.cuh
 create mode 100644 dpcpp/preconditioner/batch_identity.hpp.inc
 create mode 100644 dpcpp/preconditioner/batch_preconditioners.hpp
 create mode 100644 hip/preconditioner/batch_preconditioners.hip.hpp
 create mode 100644 reference/preconditioner/batch_identity.hpp

diff --git a/common/cuda_hip/preconditioner/batch_identity.hpp.inc b/common/cuda_hip/preconditioner/batch_identity.hpp.inc
new file mode 100644
index 00000000000..4872e667112
--- /dev/null
+++ b/common/cuda_hip/preconditioner/batch_identity.hpp.inc
@@ -0,0 +1,89 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+/**
+ *  Identity preconditioner for batch solvers. ( To be able to have
+ * unpreconditioned solves )
+ */
+template <typename ValueType>
+class BatchIdentity final {
+public:
+    using value_type = ValueType;
+
+    /**
+     * The size of the work vector required in case of static allocation.
+     */
+    static constexpr int work_size = 0;
+
+    /**
+     * The size of the work vector required in case of dynamic allocation.
+     *
+     * For the Identity preconditioner, this is unnecessary, but this function
+     * is part of a 'batch preconditioner interface' because other
+     * preconditioners may need it.
+     */
+    __host__ __device__ static constexpr int dynamic_work_size(int, int)
+    {
+        return 0;
+    }
+
+
+    /**
+     * Sets the input and generates the identity preconditioner.(Nothing needs
+     * to be actually generated.)
+     *
+     * @param mat  Matrix for which to build an Ideniity preconditioner.
+     * @param work  A 'work-vector', which is unneecessary here as no
+     * preconditioner values are to be stored.
+     */
+    __device__ __forceinline__ void generate(
+        size_type,
+        const gko::batch::matrix::ell::batch_item<const ValueType, gko::int32>&
+            mat,
+        ValueType*)
+    {}
+
+    __device__ __forceinline__ void generate(
+        size_type,
+        const gko::batch::matrix::dense::batch_item<const ValueType>& mat,
+        ValueType*)
+    {}
+
+    __device__ __forceinline__ void apply(const int num_rows,
+                                          const ValueType* const r,
+                                          ValueType* const z) const
+    {
+        for (int li = threadIdx.x; li < num_rows; li += blockDim.x) {
+            z[li] = r[li];
+        }
+    }
+};
diff --git a/cuda/preconditioner/batch_preconditioners.cuh b/cuda/preconditioner/batch_preconditioners.cuh
new file mode 100644
index 00000000000..aedfead3ef2
--- /dev/null
+++ b/cuda/preconditioner/batch_preconditioners.cuh
@@ -0,0 +1,58 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CUDA_PRECONDITIONER_BATCH_PRECONDITIONERS_CUH_
+#define GKO_CUDA_PRECONDITIONER_BATCH_PRECONDITIONERS_CUH_
+
+
+#include <ginkgo/core/matrix/batch_identity.hpp>
+
+
+#include "core/matrix/batch_struct.hpp"
+#include "cuda/components/cooperative_groups.cuh"
+#include "cuda/components/load_store.cuh"
+#include "cuda/components/reduction.cuh"
+
+
+namespace gko {
+namespace kernels {
+namespace cuda {
+
+
+#include "common/cuda_hip/preconditioner/batch_identity.hpp.inc"
+
+
+}  // namespace cuda
+}  // namespace kernels
+}  // namespace gko
+
+#endif  // GKO_CUDA_PRECONDITIONER_BATCH_PRECONDITIONERS_CUH_
diff --git a/dpcpp/preconditioner/batch_identity.hpp.inc b/dpcpp/preconditioner/batch_identity.hpp.inc
new file mode 100644
index 00000000000..fef9ac4fca3
--- /dev/null
+++ b/dpcpp/preconditioner/batch_identity.hpp.inc
@@ -0,0 +1,81 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+
+/**
+ *  Identity preconditioner for batch solvers. ( To be able to have
+ * unpreconditioned solves )
+ */
+template <typename ValueType>
+class BatchIdentity final {
+public:
+    using value_type = ValueType;
+
+    /**
+     * The size of the work vector required in case of static allocation.
+     */
+    static constexpr int work_size = 0;
+
+    /**
+     * The size of the work vector required in case of dynamic allocation.
+     */
+    static int dynamic_work_size(int, int) { return 0; }
+
+
+    /**
+     * Sets the input and generates the identity preconditioner.(Nothing needs
+     * to be actually generated.)
+     *
+     * @param mat  Matrix for which to build an Ideniity preconditioner.
+     * @param work  A 'work-vector', which is unneecessary here as no
+     * preconditioner values are to be stored.
+     */
+    void generate(size_type batch_id,
+                  const gko::batch::matrix::ell::batch_item<const ValueType>&,
+                  ValueType* const, sycl::nd_item<3> item_ct1)
+    {}
+
+    void generate(size_type batch_id,
+                  const gko::batch::matrix::dense::batch_item<const ValueType>&,
+                  ValueType* const, sycl::nd_item<3> item_ct1)
+    {}
+
+    __dpct_inline__ void apply(const int num_rows, const ValueType* const r,
+                               ValueType* const z,
+                               sycl::nd_item<3> item_ct1) const
+    {
+        for (int li = item_ct1.get_local_linear_id(); li < num_rows;
+             li += item_ct1.get_local_range().size()) {
+            z[li] = r[li];
+        }
+    }
+};
diff --git a/dpcpp/preconditioner/batch_preconditioners.hpp b/dpcpp/preconditioner/batch_preconditioners.hpp
new file mode 100644
index 00000000000..51865edb64a
--- /dev/null
+++ b/dpcpp/preconditioner/batch_preconditioners.hpp
@@ -0,0 +1,55 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_DPCPP_PRECONDITIONER_BATCH_PRECONDITIONERS_HPP_
+#define GKO_DPCPP_PRECONDITIONER_BATCH_PRECONDITIONERS_HPP_
+
+
+#include <ginkgo/core/matrix/batch_identity.hpp>
+
+
+#include "core/matrix/batch_struct.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace dpcpp {
+
+
+#include "dpcpp/preconditioner/batch_identity.hpp.inc"
+
+
+}  // namespace dpcpp
+}  // namespace kernels
+}  // namespace gko
+
+#endif  // GKO_DPCPP_PRECONDITIONER_BATCH_PRECONDITIONERS_HPP_
diff --git a/hip/preconditioner/batch_preconditioners.hip.hpp b/hip/preconditioner/batch_preconditioners.hip.hpp
new file mode 100644
index 00000000000..fdb36ad765c
--- /dev/null
+++ b/hip/preconditioner/batch_preconditioners.hip.hpp
@@ -0,0 +1,59 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_HIP_PRECONDITIONER_BATCH_PRECONDITIONERS_HIP_HPP_
+#define GKO_HIP_PRECONDITIONER_BATCH_PRECONDITIONERS_HIP_HPP_
+
+
+#include <ginkgo/core/matrix/batch_identity.hpp>
+
+
+#include "core/matrix/batch_struct.hpp"
+#include "hip/components/cooperative_groups.hip.hpp"
+#include "hip/components/load_store.hip.hpp"
+#include "hip/components/reduction.hip.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace hip {
+
+
+#include "common/cuda_hip/preconditioner/batch_identity.hpp.inc"
+
+
+}  // namespace hip
+}  // namespace kernels
+}  // namespace gko
+
+
+#endif  // GKO_HIP_PRECONDITIONER_BATCH_PRECONDITIONERS_HIP_HPP_
diff --git a/reference/preconditioner/batch_identity.hpp b/reference/preconditioner/batch_identity.hpp
new file mode 100644
index 00000000000..c26beb155ac
--- /dev/null
+++ b/reference/preconditioner/batch_identity.hpp
@@ -0,0 +1,101 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_REFERENCE_PRECONDITIONER_BATCH_IDENTITY_HPP_
+#define GKO_REFERENCE_PRECONDITIONER_BATCH_IDENTITY_HPP_
+
+
+#include "core/matrix/batch_struct.hpp"
+#include "reference/base/config.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace host {
+
+
+/**
+ *  Identity preconditioner for batch solvers. ( To be able to have
+ * unpreconditioned solves )
+ */
+template <typename ValueType>
+class BatchIdentity final {
+public:
+    using value_type = ValueType;
+
+    /**
+     * The size of the work vector required in case of static allocation.
+     */
+    static constexpr int work_size = 0;
+
+    /**
+     * The size of the work vector required in case of dynamic allocation.
+     */
+    static int dynamic_work_size(int, int) { return 0; }
+
+
+    /**
+     * Sets the input and generates the identity preconditioner.(Nothing needs
+     * to be actually generated.)
+     *
+     * @param mat  Matrix for which to build an Ideniity preconditioner.
+     * @param work  A 'work-vector', which is unneecessary here as no
+     * preconditioner values are to be stored.
+     */
+    void generate(size_type,
+                  const gko::batch::matrix::ell::batch_item<const ValueType,
+                                                            const int32>& mat,
+                  ValueType* const work)
+    {}
+
+    void generate(
+        size_type,
+        const gko::batch::matrix::dense::batch_item<const ValueType>& mat,
+        ValueType* const work)
+    {}
+
+    void apply(const gko::batch::multi_vector::batch_item<const ValueType>& r,
+               const gko::batch::multi_vector::batch_item<ValueType>& z) const
+    {
+        for (int i = 0; i < r.num_rows; i++) {
+            for (int j = 0; j < r.num_rhs; j++)
+                z.values[i * z.stride + j] = r.values[i * r.stride + j];
+        }
+    }
+};
+
+
+}  // namespace host
+}  // namespace kernels
+}  // namespace gko
+
+#endif  // GKO_REFERENCE_PRECONDITIONER_BATCH_IDENTITY_HPP_

From 2d772bb51f20c3de3d93240d0bde8381e1283c8b Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sat, 21 Oct 2023 20:43:53 +0200
Subject: [PATCH 468/583] Add batch matrix gen utils and test helpers

---
 core/test/utils/batch_helpers.hpp           | 138 ++++++++++++++++++++
 reference/preconditioner/batch_identity.hpp |   1 -
 2 files changed, 138 insertions(+), 1 deletion(-)

diff --git a/core/test/utils/batch_helpers.hpp b/core/test/utils/batch_helpers.hpp
index 5b1fa60ed36..40eeae07a25 100644
--- a/core/test/utils/batch_helpers.hpp
+++ b/core/test/utils/batch_helpers.hpp
@@ -39,12 +39,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/device_matrix_data.hpp>
 #include <ginkgo/core/base/matrix_data.hpp>
 #include <ginkgo/core/matrix/dense.hpp>
 
 
 #include "core/test/utils/assertions.hpp"
 #include "core/test/utils/matrix_generator.hpp"
+#include "core/utils/matrix_utils.hpp"
 
 
 namespace gko {
@@ -106,6 +108,142 @@ std::unique_ptr<MatrixType> generate_random_batch_matrix(
 }
 
 
+/**
+ * Generate a batch of 1D Poisson (3pt stencil, {-1, 2, -1}) matrices in the
+ * given input matrix format.
+ *
+ * @tparam MatrixType  The concrete type of the output matrix.
+ *
+ * @param exec  The executor.
+ * @param num_rows  The size (number of rows) of the generated matrix
+ * @param num_batch_items  The number of Poisson matrices in the batch
+ * @param args The create args to be forwarded to the matrix
+ */
+template <typename MatrixType, typename... MatrixArgs>
+std::unique_ptr<MatrixType> generate_3pt_stencil_batch_matrix(
+    std::shared_ptr<const Executor> exec, const int num_rows,
+    const size_type num_batch_items, MatrixArgs&&... args)
+{
+    using value_type = typename MatrixType::value_type;
+    using index_type = typename MatrixType::index_type;
+    const int num_cols = num_rows;
+    gko::matrix_data<value_type, index_type> data{
+        gko::dim<2>{static_cast<size_type>(num_rows),
+                    static_cast<size_type>(num_cols)},
+        {}};
+    for (int row = 1; row < num_rows - 1; ++row) {
+        data.nonzeros.emplace_back(row, row + 1, value_type{-1.0});
+        data.nonzeros.emplace_back(row - 1, row, value_type{-1.0});
+        data.nonzeros.emplace_back(row, row, value_type{2.0});
+    }
+    data.nonzeros.emplace_back(0, 0, value_type{2.0});
+    data.nonzeros.emplace_back(num_rows - 1, num_rows - 1, value_type{2.0});
+    data.nonzeros.emplace_back(num_rows - 1, num_rows - 2, value_type{-1.0});
+    data.nonzeros.emplace_back(0, 1, value_type{-1.0});
+
+    std::vector<gko::matrix_data<value_type, index_type>> batch_data(
+        num_batch_items, data);
+    return gko::batch::read<value_type, index_type, MatrixType>(
+        exec, batch_data, std::forward<MatrixArgs>(args)...);
+}
+
+
+template <typename ValueType>
+struct BatchSystem {
+    using vec_type = batch::MultiVector<ValueType>;
+    std::unique_ptr<batch::BatchLinOp> A;
+    std::unique_ptr<vec_type> b;
+};
+
+
+template <typename MatrixType, typename... MatrixArgs>
+BatchSystem<typename MatrixType::value_type>
+generate_diag_dominant_batch_system(std::shared_ptr<const gko::Executor> exec,
+                                    const size_type num_batch_items,
+                                    const int num_rows, const int num_rhs,
+                                    const bool is_hermitian,
+                                    MatrixArgs&&... args)
+{
+    using value_type = typename MatrixType::value_type;
+    using index_type = typename MatrixType::index_type;
+    using unbatch_type = typename MatrixType::unbatch_type;
+    using real_type = remove_complex<value_type>;
+    const int num_cols = num_rows;
+    gko::matrix_data<value_type, index_type> data{
+        gko::dim<2>{static_cast<size_type>(num_rows),
+                    static_cast<size_type>(num_cols)},
+        {}};
+    auto engine = std::default_random_engine(42);
+    auto rand_diag_dist = std::normal_distribution<value_type>(4.0, 12.0);
+    for (int row = 1; row < num_rows - 1; ++row) {
+        auto rand_nnz_dist = std::normal_distribution<index_type>(1, row + 1);
+        auto k = detail::get_rand_value<index_type>(rand_nnz_dist, engine);
+        data.nonzeros.emplace_back(row, k, value_type{-1.0});
+        data.nonzeros.emplace_back(row, row + 1, value_type{-1.0});
+        data.nonzeros.emplace_back(row - 1, row, value_type{-1.0});
+        data.nonzeros.emplace_back(
+            row, row, detail::get_rand_value(rand_diag_dist, engine));
+    }
+    data.nonzeros.emplace_back(0, 0, value_type{2.0});
+    data.nonzeros.emplace_back(num_rows - 1, num_rows - 1, value_type{2.0});
+    data.nonzeros.emplace_back(num_rows - 1, num_rows - 2, value_type{-1.0});
+    data.nonzeros.emplace_back(0, 1, value_type{-1.0});
+
+    if (is_hermitian) {
+        gko::utils::make_hpd(data);
+    }
+    data.ensure_row_major_order();
+
+    auto soa_data =
+        gko::device_matrix_data<value_type, index_type>::create_from_host(
+            exec->get_master(), data);
+    auto row_idxs = gko::array<index_type>::const_view(
+                        exec->get_master(), soa_data.get_num_elems(),
+                        soa_data.get_const_row_idxs())
+                        .copy_to_array();
+    auto col_idxs = gko::array<index_type>::const_view(
+                        exec->get_master(), soa_data.get_num_elems(),
+                        soa_data.get_const_col_idxs())
+                        .copy_to_array();
+    auto result = MatrixType::create(
+        exec, batch_dim<2>(num_batch_items, dim<2>(num_rows, num_cols)),
+        std::forward<MatrixArgs>(args)...);
+    auto rand_val_dist = std::normal_distribution<value_type>(-0.5, 0.5);
+    std::vector<gko::matrix_data<value_type, index_type>> batch_data(
+        num_batch_items);
+    batch_data.reserve(num_batch_items);
+    BatchSystem<value_type> sys;
+
+    for (size_type b = 1; b < num_batch_items; b++) {
+        auto rand_data = fill_random_matrix_data<value_type, index_type>(
+            num_rows, num_cols, row_idxs, col_idxs, rand_val_dist, engine);
+        if (is_hermitian) {
+            gko::utils::make_hpd(rand_data);
+        } else {
+            gko::utils::make_diag_dominant(rand_data);
+        }
+        batch_data.emplace_back(rand_data);
+    }
+    sys.A = gko::give(gko::batch::read<value_type, index_type, MatrixType>(
+        exec, batch_data, std::forward<MatrixArgs>(args)...));
+
+    std::vector<gko::matrix_data<value_type, index_type>> batch_rhs_data(
+        num_batch_items);
+    batch_rhs_data.reserve(num_batch_items);
+    for (size_type b = 0; b < num_batch_items; b++) {
+        auto rand_data = generate_random_matrix_data<value_type, index_type>(
+            num_rows, num_cols,
+            std::normal_distribution<index_type>(num_rhs, num_rhs),
+            rand_val_dist, engine);
+        batch_data.emplace_back(rand_data);
+    }
+    sys.b = gko::give(gko::batch::read<value_type, index_type,
+                                       BatchSystem<value_type>::vec_type>(
+        exec, batch_rhs_data));
+    return sys;
+}
+
+
 }  // namespace test
 }  // namespace gko
 
diff --git a/reference/preconditioner/batch_identity.hpp b/reference/preconditioner/batch_identity.hpp
index c26beb155ac..e628997b0da 100644
--- a/reference/preconditioner/batch_identity.hpp
+++ b/reference/preconditioner/batch_identity.hpp
@@ -35,7 +35,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include "core/matrix/batch_struct.hpp"
-#include "reference/base/config.hpp"
 
 
 namespace gko {

From 47cdd9890e31d2472c8fca7c4f8e38603ec99552 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sat, 21 Oct 2023 22:03:41 +0200
Subject: [PATCH 469/583] Rename classes

---
 .../preconditioner/batch_identity.hpp.inc     |  2 +-
 core/matrix/batch_identity.cpp                | 18 +++++------
 core/solver/batch_bicgstab.cpp                | 12 +++----
 core/solver/batch_bicgstab_kernels.hpp        |  4 +--
 core/solver/batch_dispatch.hpp                | 11 +++----
 core/solver/batch_solver_base.hpp             |  2 +-
 core/test/solver/batch_bicgstab.cpp           |  2 +-
 cuda/preconditioner/batch_preconditioners.cuh |  1 -
 cuda/solver/batch_bicgstab_kernels.cu         |  7 ++--
 dpcpp/preconditioner/batch_identity.hpp.inc   |  2 +-
 dpcpp/solver/batch_bicgstab_kernels.dp.cpp    |  6 ++--
 .../batch_preconditioners.hip.hpp             |  1 -
 hip/solver/batch_bicgstab_kernels.hip.cpp     |  6 ++--
 include/ginkgo/core/matrix/batch_identity.hpp | 32 +++++++++----------
 include/ginkgo/core/solver/batch_bicgstab.hpp | 18 +++++------
 .../ginkgo/core/solver/batch_solver_base.hpp  |  2 +-
 omp/solver/batch_bicgstab_kernels.cpp         | 10 +++---
 reference/preconditioner/batch_identity.hpp   |  2 +-
 reference/solver/batch_bicgstab_kernels.cpp   | 10 +++---
 .../solver/batch_bicgstab_kernels.hpp.inc     |  4 +--
 reference/stop/batch_criteria.hpp             |  3 --
 21 files changed, 73 insertions(+), 82 deletions(-)

diff --git a/common/cuda_hip/preconditioner/batch_identity.hpp.inc b/common/cuda_hip/preconditioner/batch_identity.hpp.inc
index 4872e667112..b3c25820586 100644
--- a/common/cuda_hip/preconditioner/batch_identity.hpp.inc
+++ b/common/cuda_hip/preconditioner/batch_identity.hpp.inc
@@ -35,7 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  * unpreconditioned solves )
  */
 template <typename ValueType>
-class BatchIdentity final {
+class Identity final {
 public:
     using value_type = ValueType;
 
diff --git a/core/matrix/batch_identity.cpp b/core/matrix/batch_identity.cpp
index a2612e70305..b3a57b81f18 100644
--- a/core/matrix/batch_identity.cpp
+++ b/core/matrix/batch_identity.cpp
@@ -54,14 +54,14 @@ namespace matrix {
 
 
 template <typename ValueType>
-BatchIdentity<ValueType>::BatchIdentity(std::shared_ptr<const Executor> exec,
+Identity<ValueType>::Identity(std::shared_ptr<const Executor> exec,
                                         const batch_dim<2>& size)
-    : EnableBatchLinOp<BatchIdentity<ValueType>>(exec, size)
+    : EnableBatchLinOp<Identity<ValueType>>(exec, size)
 {}
 
 
 template <typename ValueType>
-BatchIdentity<ValueType>* BatchIdentity<ValueType>::apply(
+Identity<ValueType>* Identity<ValueType>::apply(
     ptr_param<const MultiVector<ValueType>> b,
     ptr_param<MultiVector<ValueType>> x)
 {
@@ -74,7 +74,7 @@ BatchIdentity<ValueType>* BatchIdentity<ValueType>::apply(
 
 
 template <typename ValueType>
-const BatchIdentity<ValueType>* BatchIdentity<ValueType>::apply(
+const Identity<ValueType>* Identity<ValueType>::apply(
     ptr_param<const MultiVector<ValueType>> b,
     ptr_param<MultiVector<ValueType>> x) const
 {
@@ -84,7 +84,7 @@ const BatchIdentity<ValueType>* BatchIdentity<ValueType>::apply(
 
 
 template <typename ValueType>
-BatchIdentity<ValueType>* BatchIdentity<ValueType>::apply(
+Identity<ValueType>* Identity<ValueType>::apply(
     ptr_param<const MultiVector<ValueType>> alpha,
     ptr_param<const MultiVector<ValueType>> b,
     ptr_param<const MultiVector<ValueType>> beta,
@@ -102,7 +102,7 @@ BatchIdentity<ValueType>* BatchIdentity<ValueType>::apply(
 
 
 template <typename ValueType>
-const BatchIdentity<ValueType>* BatchIdentity<ValueType>::apply(
+const Identity<ValueType>* Identity<ValueType>::apply(
     ptr_param<const MultiVector<ValueType>> alpha,
     ptr_param<const MultiVector<ValueType>> b,
     ptr_param<const MultiVector<ValueType>> beta,
@@ -114,7 +114,7 @@ const BatchIdentity<ValueType>* BatchIdentity<ValueType>::apply(
 
 
 template <typename ValueType>
-void BatchIdentity<ValueType>::apply_impl(const MultiVector<ValueType>* b,
+void Identity<ValueType>::apply_impl(const MultiVector<ValueType>* b,
                                           MultiVector<ValueType>* x) const
 {
     x->copy_from(b);
@@ -122,14 +122,14 @@ void BatchIdentity<ValueType>::apply_impl(const MultiVector<ValueType>* b,
 
 
 template <typename ValueType>
-void BatchIdentity<ValueType>::apply_impl(
+void Identity<ValueType>::apply_impl(
     const MultiVector<ValueType>* alpha, const MultiVector<ValueType>* b,
     const MultiVector<ValueType>* beta,
     MultiVector<ValueType>* x) const GKO_NOT_IMPLEMENTED;
 
 
 #define GKO_DECLARE_BATCH_IDENTITY_MATRIX(ValueType) \
-    class BatchIdentity<ValueType>
+    class Identity<ValueType>
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_IDENTITY_MATRIX);
 
 
diff --git a/core/solver/batch_bicgstab.cpp b/core/solver/batch_bicgstab.cpp
index 05f14c5b357..7be1f331777 100644
--- a/core/solver/batch_bicgstab.cpp
+++ b/core/solver/batch_bicgstab.cpp
@@ -54,12 +54,12 @@ GKO_REGISTER_OPERATION(apply, batch_bicgstab::apply);
 
 
 template <typename ValueType>
-void BatchBicgstab<ValueType>::solver_apply(const MultiVector<ValueType>* b,
+void Bicgstab<ValueType>::solver_apply(const MultiVector<ValueType>* b,
                                             MultiVector<ValueType>* x,
                                             BatchInfo* const info) const
 {
     using MVec = MultiVector<ValueType>;
-    const kernels::batch_bicgstab::BatchBicgstabOptions<
+    const kernels::batch_bicgstab::BicgstabOptions<
         remove_complex<ValueType>>
         opts{this->max_iterations_, static_cast<real_type>(this->residual_tol_),
              parameters_.tolerance_type};
@@ -71,20 +71,20 @@ void BatchBicgstab<ValueType>::solver_apply(const MultiVector<ValueType>* b,
 }
 
 
-#define GKO_DECLARE_BATCH_BICGSTAB(_type) class BatchBicgstab<_type>
+#define GKO_DECLARE_BATCH_BICGSTAB(_type) class Bicgstab<_type>
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_BICGSTAB);
 
 
 // #define GKO_DECLARE_BATCH_BICGSTAB_APPLY_FUNCTIONS(_type)                   \
-//     EnableBatchSolver<BatchBicgstab<_type>, _type, BatchLinOp>::            \
+//     EnableBatchSolver<Bicgstab<_type>, _type, BatchLinOp>::            \
 //         EnableBatchSolver(std::shared_ptr<const Executor> exec,             \
 //                           std::shared_ptr<const BatchLinOp> system_matrix,  \
 //                           detail::common_batch_params common_params);       \
 //     template void                                                           \
-//     EnableBatchSolver<BatchBicgstab<_type>, _type, BatchLinOp>::apply_impl( \
+//     EnableBatchSolver<Bicgstab<_type>, _type, BatchLinOp>::apply_impl( \
 //         const MultiVector<_type>* b, MultiVector<_type>* x) const;          \
 //     template void                                                           \
-//     EnableBatchSolver<BatchBicgstab<_type>, _type, BatchLinOp>::apply_impl( \
+//     EnableBatchSolver<Bicgstab<_type>, _type, BatchLinOp>::apply_impl( \
 //         const MultiVector<_type>* alpha, const MultiVector<_type>* b,       \
 //         const MultiVector<_type>* beta, MultiVector<_type>* x) const
 // GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_BICGSTAB_APPLY_FUNCTIONS);
diff --git a/core/solver/batch_bicgstab_kernels.hpp b/core/solver/batch_bicgstab_kernels.hpp
index 0fe429f6464..4051f4f7310 100644
--- a/core/solver/batch_bicgstab_kernels.hpp
+++ b/core/solver/batch_bicgstab_kernels.hpp
@@ -53,7 +53,7 @@ namespace batch_bicgstab {
  * Options controlling the batch Bicgstab solver.
  */
 template <typename RealType>
-struct BatchBicgstabOptions {
+struct BicgstabOptions {
     int max_its;
     RealType residual_tol;
     ::gko::batch::stop::ToleranceType tol_type;
@@ -211,7 +211,7 @@ StorageConfig compute_shared_storage(const int shared_mem_per_blk,
 #define GKO_DECLARE_BATCH_BICGSTAB_APPLY_KERNEL(_type)                       \
     void apply(                                                              \
         std::shared_ptr<const DefaultExecutor> exec,                         \
-        const gko::kernels::batch_bicgstab::BatchBicgstabOptions<            \
+        const gko::kernels::batch_bicgstab::BicgstabOptions<            \
             remove_complex<_type>>& options,                                 \
         const batch::BatchLinOp* a, const batch::BatchLinOp* preconditioner, \
         const batch::MultiVector<_type>* b, batch::MultiVector<_type>* x,    \
diff --git a/core/solver/batch_dispatch.hpp b/core/solver/batch_dispatch.hpp
index f80f8c9a813..efa71559fb5 100644
--- a/core/solver/batch_dispatch.hpp
+++ b/core/solver/batch_dispatch.hpp
@@ -155,7 +155,7 @@ class DummyKernelCaller {
  * depending on runtime parameters.
  *
  * @tparam KernelCaller  Class with an interface like DummyKernelCaller,
- *   that is reponsible for finally calling the templated backend-specific
+ *   that is responsible for finally calling the templated backend-specific
  *   kernel.
  * @tparam OptsType  Structure type of options for the particular solver to be
  *   used.
@@ -210,10 +210,9 @@ class BatchSolverDispatch {
         const multi_vector::uniform_batch<device_value_type>& x_b)
     {
         if (!precon_ ||
-            dynamic_cast<const matrix::BatchIdentity<value_type>*>(precon_)) {
-            dispatch_on_stop<device::BatchIdentity<device_value_type>>(
-                logger, amat, device::BatchIdentity<device_value_type>(), b_b,
-                x_b);
+            dynamic_cast<const matrix::Identity<value_type>*>(precon_)) {
+            dispatch_on_stop<device::Identity<device_value_type>>(
+                logger, amat, device::Identity<device_value_type>(), b_b, x_b);
         } else {
             GKO_NOT_IMPLEMENTED;
         }
@@ -273,7 +272,7 @@ class BatchSolverDispatch {
 
 
 /**
- * Conventient function to create a dispatcher. Infers most template arguments.
+ * Convenient function to create a dispatcher. Infers most template arguments.
  */
 template <typename ValueType, typename KernelCaller, typename OptsType>
 BatchSolverDispatch<KernelCaller, OptsType, ValueType> create_dispatcher(
diff --git a/core/solver/batch_solver_base.hpp b/core/solver/batch_solver_base.hpp
index ba226387788..2c2ebdff18b 100644
--- a/core/solver/batch_solver_base.hpp
+++ b/core/solver/batch_solver_base.hpp
@@ -66,7 +66,7 @@ EnableBatchSolver<ConcreteSolver, ValueType, PolymorphicBase>::
     GKO_ASSERT_BATCH_HAS_SQUARE_DIMENSIONS(system_matrix_);
 
     using value_type = typename ConcreteSolver::value_type;
-    using Identity = matrix::BatchIdentity<value_type>;
+    using Identity = matrix::Identity<value_type>;
     using real_type = remove_complex<value_type>;
 
     if (common_params.generated_prec) {
diff --git a/core/test/solver/batch_bicgstab.cpp b/core/test/solver/batch_bicgstab.cpp
index 6718bf22eda..28a098906cf 100644
--- a/core/test/solver/batch_bicgstab.cpp
+++ b/core/test/solver/batch_bicgstab.cpp
@@ -56,7 +56,7 @@ class BatchBicgstab : public ::testing::Test {
     using real_type = gko::remove_complex<T>;
     using Mtx = gko::batch::matrix::Dense<value_type>;
     using MVec = gko::batch::MultiVector<value_type>;
-    using Solver = gko::batch::solver::BatchBicgstab<value_type>;
+    using Solver = gko::batch::solver::Bicgstab<value_type>;
 
     BatchBicgstab()
         : exec(gko::ReferenceExecutor::create()),
diff --git a/cuda/preconditioner/batch_preconditioners.cuh b/cuda/preconditioner/batch_preconditioners.cuh
index aedfead3ef2..e3dc3a0be3b 100644
--- a/cuda/preconditioner/batch_preconditioners.cuh
+++ b/cuda/preconditioner/batch_preconditioners.cuh
@@ -39,7 +39,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "core/matrix/batch_struct.hpp"
 #include "cuda/components/cooperative_groups.cuh"
-#include "cuda/components/load_store.cuh"
 #include "cuda/components/reduction.cuh"
 
 
diff --git a/cuda/solver/batch_bicgstab_kernels.cu b/cuda/solver/batch_bicgstab_kernels.cu
index d9f58d33769..4a1039eacd8 100644
--- a/cuda/solver/batch_bicgstab_kernels.cu
+++ b/cuda/solver/batch_bicgstab_kernels.cu
@@ -39,8 +39,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "core/solver/batch_dispatch.hpp"
 #include "cuda/base/config.hpp"
-#include "cuda/base/exception.cuh"
-#include "cuda/base/kernel_config.cuh"
 #include "cuda/base/types.hpp"
 #include "cuda/components/cooperative_groups.cuh"
 #include "cuda/components/thread_ids.cuh"
@@ -65,13 +63,12 @@ namespace batch_bicgstab {
 
 
 template <typename T>
-using BatchBicgstabOptions =
-    gko::kernels::batch_bicgstab::BatchBicgstabOptions<T>;
+using BicgstabOptions = gko::kernels::batch_bicgstab::BicgstabOptions<T>;
 
 
 template <typename ValueType>
 void apply(std::shared_ptr<const DefaultExecutor> exec,
-           const BatchBicgstabOptions<remove_complex<ValueType>>& opts,
+           const BicgstabOptions<remove_complex<ValueType>>& opts,
            const batch::BatchLinOp* const a,
            const batch::BatchLinOp* const precon,
            const batch::MultiVector<ValueType>* const b,
diff --git a/dpcpp/preconditioner/batch_identity.hpp.inc b/dpcpp/preconditioner/batch_identity.hpp.inc
index fef9ac4fca3..d7e6482eacd 100644
--- a/dpcpp/preconditioner/batch_identity.hpp.inc
+++ b/dpcpp/preconditioner/batch_identity.hpp.inc
@@ -36,7 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  * unpreconditioned solves )
  */
 template <typename ValueType>
-class BatchIdentity final {
+class Identity final {
 public:
     using value_type = ValueType;
 
diff --git a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
index 0d67e353e21..a5d123cbb6a 100644
--- a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
+++ b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
@@ -60,13 +60,13 @@ namespace batch_bicgstab {
 
 
 template <typename T>
-using BatchBicgstabOptions =
-    gko::kernels::batch_bicgstab::BatchBicgstabOptions<T>;
+using BicgstabOptions =
+    gko::kernels::batch_bicgstab::BicgstabOptions<T>;
 
 
 template <typename ValueType>
 void apply(std::shared_ptr<const DefaultExecutor> exec,
-           const BatchBicgstabOptions<remove_complex<ValueType>>& opts,
+           const BicgstabOptions<remove_complex<ValueType>>& opts,
            const batch::BatchLinOp* const a,
            const batch::BatchLinOp* const precon,
            const batch::MultiVector<ValueType>* const b,
diff --git a/hip/preconditioner/batch_preconditioners.hip.hpp b/hip/preconditioner/batch_preconditioners.hip.hpp
index fdb36ad765c..59ba0646b0f 100644
--- a/hip/preconditioner/batch_preconditioners.hip.hpp
+++ b/hip/preconditioner/batch_preconditioners.hip.hpp
@@ -39,7 +39,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "core/matrix/batch_struct.hpp"
 #include "hip/components/cooperative_groups.hip.hpp"
-#include "hip/components/load_store.hip.hpp"
 #include "hip/components/reduction.hip.hpp"
 
 
diff --git a/hip/solver/batch_bicgstab_kernels.hip.cpp b/hip/solver/batch_bicgstab_kernels.hip.cpp
index 1c3879e4f2d..17da9d54ac7 100644
--- a/hip/solver/batch_bicgstab_kernels.hip.cpp
+++ b/hip/solver/batch_bicgstab_kernels.hip.cpp
@@ -64,13 +64,13 @@ namespace batch_bicgstab {
 
 
 template <typename T>
-using BatchBicgstabOptions =
-    gko::kernels::batch_bicgstab::BatchBicgstabOptions<T>;
+using BicgstabOptions =
+    gko::kernels::batch_bicgstab::BicgstabOptions<T>;
 
 
 template <typename ValueType>
 void apply(std::shared_ptr<const DefaultExecutor> exec,
-           const BatchBicgstabOptions<remove_complex<ValueType>>& opts,
+           const BicgstabOptions<remove_complex<ValueType>>& opts,
            const batch::BatchLinOp* const a,
            const batch::BatchLinOp* const precon,
            const batch::MultiVector<ValueType>* const b,
diff --git a/include/ginkgo/core/matrix/batch_identity.hpp b/include/ginkgo/core/matrix/batch_identity.hpp
index 3a757cfb558..846640a4a24 100644
--- a/include/ginkgo/core/matrix/batch_identity.hpp
+++ b/include/ginkgo/core/matrix/batch_identity.hpp
@@ -48,7 +48,7 @@ namespace matrix {
 
 
 /**
- * BatchIdentity is a batch matrix format which explicitly stores all values of
+ * Identity is a batch matrix format which explicitly stores all values of
  * the matrix in each of the batches.
  *
  * The values in each of the batches are stored in row-major format (values
@@ -64,21 +64,21 @@ namespace matrix {
  * @ingroup BatchLinOp
  */
 template <typename ValueType = default_precision>
-class BatchIdentity final
-    : public EnableBatchLinOp<BatchIdentity<ValueType>>,
-      public EnableCreateMethod<BatchIdentity<ValueType>> {
-    friend class EnableCreateMethod<BatchIdentity>;
-    friend class EnablePolymorphicObject<BatchIdentity, BatchLinOp>;
+class Identity final
+    : public EnableBatchLinOp<Identity<ValueType>>,
+      public EnableCreateMethod<Identity<ValueType>> {
+    friend class EnableCreateMethod<Identity>;
+    friend class EnablePolymorphicObject<Identity, BatchLinOp>;
 
 public:
-    using EnableBatchLinOp<BatchIdentity>::convert_to;
-    using EnableBatchLinOp<BatchIdentity>::move_to;
+    using EnableBatchLinOp<Identity>::convert_to;
+    using EnableBatchLinOp<Identity>::move_to;
 
     using value_type = ValueType;
     using index_type = int32;
     using unbatch_type = gko::matrix::Identity<ValueType>;
-    using absolute_type = remove_complex<BatchIdentity>;
-    using complex_type = to_complex<BatchIdentity>;
+    using absolute_type = remove_complex<Identity>;
+    using complex_type = to_complex<Identity>;
 
     /**
      * Apply the matrix to a multi-vector. Represents the matrix vector
@@ -87,7 +87,7 @@ class BatchIdentity final
      * @param b  the multi-vector to be applied to
      * @param x  the output multi-vector
      */
-    BatchIdentity* apply(ptr_param<const MultiVector<value_type>> b,
+    Identity* apply(ptr_param<const MultiVector<value_type>> b,
                          ptr_param<MultiVector<value_type>> x);
 
     /**
@@ -100,7 +100,7 @@ class BatchIdentity final
      * @param beta   the scalar to scale the x vector with
      * @param x      the output multi-vector
      */
-    BatchIdentity* apply(ptr_param<const MultiVector<value_type>> alpha,
+    Identity* apply(ptr_param<const MultiVector<value_type>> alpha,
                          ptr_param<const MultiVector<value_type>> b,
                          ptr_param<const MultiVector<value_type>> beta,
                          ptr_param<MultiVector<value_type>> x);
@@ -108,7 +108,7 @@ class BatchIdentity final
     /**
      * @copydoc apply(const MultiVector<value_type>*, MultiVector<value_type>*)
      */
-    const BatchIdentity* apply(ptr_param<const MultiVector<value_type>> b,
+    const Identity* apply(ptr_param<const MultiVector<value_type>> b,
                                ptr_param<MultiVector<value_type>> x) const;
 
     /**
@@ -116,19 +116,19 @@ class BatchIdentity final
      * MultiVector<value_type>*, const MultiVector<value_type>*,
      * MultiVector<value_type>*)
      */
-    const BatchIdentity* apply(ptr_param<const MultiVector<value_type>> alpha,
+    const Identity* apply(ptr_param<const MultiVector<value_type>> alpha,
                                ptr_param<const MultiVector<value_type>> b,
                                ptr_param<const MultiVector<value_type>> beta,
                                ptr_param<MultiVector<value_type>> x) const;
 
 private:
     /**
-     * Creates an uninitialized BatchIdentity matrix of the specified size.
+     * Creates an uninitialized Identity matrix of the specified size.
      *
      * @param exec  Executor associated to the matrix
      * @param size  size of the batch matrices in a batch_dim object
      */
-    BatchIdentity(std::shared_ptr<const Executor> exec,
+    Identity(std::shared_ptr<const Executor> exec,
                   const batch_dim<2>& size = batch_dim<2>{});
 
     void apply_impl(const MultiVector<value_type>* b,
diff --git a/include/ginkgo/core/solver/batch_bicgstab.hpp b/include/ginkgo/core/solver/batch_bicgstab.hpp
index ad308985dbd..39bba1b9852 100644
--- a/include/ginkgo/core/solver/batch_bicgstab.hpp
+++ b/include/ginkgo/core/solver/batch_bicgstab.hpp
@@ -69,10 +69,10 @@ namespace solver {
  * @ingroup BatchLinOp
  */
 template <typename ValueType = default_precision>
-class BatchBicgstab final
-    : public EnableBatchSolver<BatchBicgstab<ValueType>, ValueType> {
-    friend class EnableBatchLinOp<BatchBicgstab>;
-    friend class EnablePolymorphicObject<BatchBicgstab, BatchLinOp>;
+class Bicgstab final
+    : public EnableBatchSolver<Bicgstab<ValueType>, ValueType> {
+    friend class EnableBatchLinOp<Bicgstab>;
+    friend class EnablePolymorphicObject<Bicgstab, BatchLinOp>;
 
 public:
     using value_type = ValueType;
@@ -115,17 +115,17 @@ class BatchBicgstab final
         ::gko::batch::stop::ToleranceType GKO_FACTORY_PARAMETER_SCALAR(
             tolerance_type, ::gko::batch::stop::ToleranceType::absolute);
     };
-    GKO_ENABLE_BATCH_LIN_OP_FACTORY(BatchBicgstab, parameters, Factory);
+    GKO_ENABLE_BATCH_LIN_OP_FACTORY(Bicgstab, parameters, Factory);
     GKO_ENABLE_BUILD_METHOD(Factory);
 
 private:
-    explicit BatchBicgstab(std::shared_ptr<const Executor> exec)
-        : EnableBatchSolver<BatchBicgstab>(std::move(exec))
+    explicit Bicgstab(std::shared_ptr<const Executor> exec)
+        : EnableBatchSolver<Bicgstab>(std::move(exec))
     {}
 
-    explicit BatchBicgstab(const Factory* factory,
+    explicit Bicgstab(const Factory* factory,
                            std::shared_ptr<const BatchLinOp> system_matrix)
-        : EnableBatchSolver<BatchBicgstab>(
+        : EnableBatchSolver<Bicgstab>(
               factory->get_executor(), std::move(system_matrix),
               detail::extract_common_batch_params(factory->get_parameters())),
           parameters_{factory->get_parameters()}
diff --git a/include/ginkgo/core/solver/batch_solver_base.hpp b/include/ginkgo/core/solver/batch_solver_base.hpp
index ee690b66b95..cb798ca77d7 100644
--- a/include/ginkgo/core/solver/batch_solver_base.hpp
+++ b/include/ginkgo/core/solver/batch_solver_base.hpp
@@ -171,7 +171,7 @@ class EnableBatchSolver
         GKO_ASSERT_BATCH_HAS_SQUARE_DIMENSIONS(system_matrix_);
 
         using value_type = typename ConcreteSolver::value_type;
-        using Identity = matrix::BatchIdentity<value_type>;
+        using Identity = matrix::Identity<value_type>;
         using real_type = remove_complex<value_type>;
 
         if (common_params.generated_prec) {
diff --git a/omp/solver/batch_bicgstab_kernels.cpp b/omp/solver/batch_bicgstab_kernels.cpp
index a53b05d536d..b86110ea8b2 100644
--- a/omp/solver/batch_bicgstab_kernels.cpp
+++ b/omp/solver/batch_bicgstab_kernels.cpp
@@ -65,14 +65,14 @@ constexpr int max_num_rhs = 1;
 
 
 template <typename T>
-using BatchBicgstabOptions =
-    gko::kernels::batch_bicgstab::BatchBicgstabOptions<T>;
+using BicgstabOptions =
+    gko::kernels::batch_bicgstab::BicgstabOptions<T>;
 
 template <typename ValueType>
 class KernelCaller {
 public:
     KernelCaller(std::shared_ptr<const DefaultExecutor> exec,
-                 const BatchBicgstabOptions<remove_complex<ValueType>> opts)
+                 const BicgstabOptions<remove_complex<ValueType>> opts)
         : exec_{std::move(exec)}, opts_{opts}
     {}
 
@@ -113,13 +113,13 @@ class KernelCaller {
 
 private:
     const std::shared_ptr<const DefaultExecutor> exec_;
-    const BatchBicgstabOptions<remove_complex<ValueType>> opts_;
+    const BicgstabOptions<remove_complex<ValueType>> opts_;
 };
 
 
 template <typename ValueType>
 void apply(std::shared_ptr<const DefaultExecutor> exec,
-           const BatchBicgstabOptions<remove_complex<ValueType>>& opts,
+           const BicgstabOptions<remove_complex<ValueType>>& opts,
            const batch::BatchLinOp* const a,
            const batch::BatchLinOp* const precon,
            const batch::MultiVector<ValueType>* const b,
diff --git a/reference/preconditioner/batch_identity.hpp b/reference/preconditioner/batch_identity.hpp
index e628997b0da..5484ae5c31f 100644
--- a/reference/preconditioner/batch_identity.hpp
+++ b/reference/preconditioner/batch_identity.hpp
@@ -47,7 +47,7 @@ namespace host {
  * unpreconditioned solves )
  */
 template <typename ValueType>
-class BatchIdentity final {
+class Identity final {
 public:
     using value_type = ValueType;
 
diff --git a/reference/solver/batch_bicgstab_kernels.cpp b/reference/solver/batch_bicgstab_kernels.cpp
index 05a21aa6658..88515750a1e 100644
--- a/reference/solver/batch_bicgstab_kernels.cpp
+++ b/reference/solver/batch_bicgstab_kernels.cpp
@@ -65,14 +65,14 @@ constexpr int max_num_rhs = 1;
 
 
 template <typename T>
-using BatchBicgstabOptions =
-    gko::kernels::batch_bicgstab::BatchBicgstabOptions<T>;
+using BicgstabOptions =
+    gko::kernels::batch_bicgstab::BicgstabOptions<T>;
 
 template <typename ValueType>
 class KernelCaller {
 public:
     KernelCaller(std::shared_ptr<const DefaultExecutor> exec,
-                 const BatchBicgstabOptions<remove_complex<ValueType>> opts)
+                 const BicgstabOptions<remove_complex<ValueType>> opts)
         : exec_{std::move(exec)}, opts_{opts}
     {}
 
@@ -108,13 +108,13 @@ class KernelCaller {
 
 private:
     const std::shared_ptr<const DefaultExecutor> exec_;
-    const BatchBicgstabOptions<remove_complex<ValueType>> opts_;
+    const BicgstabOptions<remove_complex<ValueType>> opts_;
 };
 
 
 template <typename ValueType>
 void apply(std::shared_ptr<const DefaultExecutor> exec,
-           const BatchBicgstabOptions<remove_complex<ValueType>>& opts,
+           const BicgstabOptions<remove_complex<ValueType>>& opts,
            const batch::BatchLinOp* const a,
            const batch::BatchLinOp* const precon,
            const batch::MultiVector<ValueType>* const b,
diff --git a/reference/solver/batch_bicgstab_kernels.hpp.inc b/reference/solver/batch_bicgstab_kernels.hpp.inc
index 376db5a25dd..0d86f857dae 100644
--- a/reference/solver/batch_bicgstab_kernels.hpp.inc
+++ b/reference/solver/batch_bicgstab_kernels.hpp.inc
@@ -180,7 +180,7 @@ inline void update_x_middle(
 template <typename StopType, typename PrecType, typename LogType,
           typename BatchMatrixType, typename ValueType>
 inline void batch_entry_bicgstab_impl(
-    const gko::kernels::batch_bicgstab::BatchBicgstabOptions<
+    const gko::kernels::batch_bicgstab::BicgstabOptions<
         remove_complex<ValueType>>& opts,
     LogType logger, PrecType prec, const BatchMatrixType& a,
     const gko::batch::multi_vector::uniform_batch<const ValueType>& b,
@@ -330,7 +330,7 @@ inline void batch_entry_bicgstab_impl(
 
 
         if (stop.check_converged(res_norms_entry.values)) {
-            // update x for the sytems (rhs) which converge at this point...  x
+            // update x for the systems (rhs) which converge at this point...  x
             // = x + alpha*p_hat
             // note bits could change from 0 to 1, not the other way round, so
             // we can use xor to get info about recent convergence...
diff --git a/reference/stop/batch_criteria.hpp b/reference/stop/batch_criteria.hpp
index 45a2fc67b5e..56e63c66101 100644
--- a/reference/stop/batch_criteria.hpp
+++ b/reference/stop/batch_criteria.hpp
@@ -39,9 +39,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/stop/batch_stop_enum.hpp>
 
 
-#include "reference/base/config.hpp"
-
-
 namespace gko {
 namespace kernels {
 namespace host {

From 17aa33073692f1fe72714b44f046fcd0d8fb5585 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sat, 21 Oct 2023 22:20:17 +0200
Subject: [PATCH 470/583] Some ref kernel simplifications

---
 .../solver/batch_bicgstab_kernels.hpp.inc     | 83 ++++++++-----------
 1 file changed, 33 insertions(+), 50 deletions(-)

diff --git a/reference/solver/batch_bicgstab_kernels.hpp.inc b/reference/solver/batch_bicgstab_kernels.hpp.inc
index 0d86f857dae..3518d890801 100644
--- a/reference/solver/batch_bicgstab_kernels.hpp.inc
+++ b/reference/solver/batch_bicgstab_kernels.hpp.inc
@@ -185,24 +185,24 @@ inline void batch_entry_bicgstab_impl(
     LogType logger, PrecType prec, const BatchMatrixType& a,
     const gko::batch::multi_vector::uniform_batch<const ValueType>& b,
     const gko::batch::multi_vector::uniform_batch<ValueType>& x,
-    const size_type ibatch, unsigned char* const local_space)
+    const size_type batch_item_id, unsigned char* const local_space)
 {
+    constexpr int max_num_rhs = 1;
     using real_type = typename gko::remove_complex<ValueType>;
-    const size_type nbatch = a.num_batch_items;
-    const auto nrows = a.num_rows;
-    const auto nrhs = b.num_rhs;
-    GKO_ASSERT((nrhs <= max_num_rhs));
+    const auto num_rows = a.num_rows;
+    const auto num_rhs = b.num_rhs;
+    GKO_ASSERT(num_rhs <= max_num_rhs);
 
     unsigned char* const shared_space = local_space;
     ValueType* const r = reinterpret_cast<ValueType*>(shared_space);
-    ValueType* const r_hat = r + nrows * nrhs;
-    ValueType* const p = r_hat + nrows * nrhs;
-    ValueType* const p_hat = p + nrows * nrhs;
-    ValueType* const v = p_hat + nrows * nrhs;
-    ValueType* const s = v + nrows * nrhs;
-    ValueType* const s_hat = s + nrows * nrhs;
-    ValueType* const t = s_hat + nrows * nrhs;
-    ValueType* const prec_work = t + nrows * nrhs;
+    ValueType* const r_hat = r + num_rows * num_rhs;
+    ValueType* const p = r_hat + num_rows * num_rhs;
+    ValueType* const p_hat = p + num_rows * num_rhs;
+    ValueType* const v = p_hat + num_rows * num_rhs;
+    ValueType* const s = v + num_rows * num_rhs;
+    ValueType* const s_hat = s + num_rows * num_rhs;
+    ValueType* const t = s_hat + num_rows * num_rhs;
+    ValueType* const prec_work = t + num_rows * num_rhs;
     ValueType rho_old[max_num_rhs];
     ValueType rho_new[max_num_rhs];
     ValueType omega[max_num_rhs];
@@ -212,62 +212,45 @@ inline void batch_entry_bicgstab_impl(
     real_type norms_res[max_num_rhs];
 
     const auto A_entry = gko::batch::matrix::extract_batch_item(
-        gko::batch::matrix::to_const(a), ibatch);
-
+        gko::batch::matrix::to_const(a), batch_item_id);
     const gko::batch::multi_vector::batch_item<const ValueType> b_entry =
-        gko::batch::extract_batch_item(gko::batch::to_const(b), ibatch);
-
+        gko::batch::extract_batch_item(gko::batch::to_const(b), batch_item_id);
     const gko::batch::multi_vector::batch_item<ValueType> x_entry =
-        gko::batch::extract_batch_item(x, ibatch);
-
+        gko::batch::extract_batch_item(x, batch_item_id);
 
     const gko::batch::multi_vector::batch_item<ValueType> r_entry{
-        r, static_cast<size_type>(nrhs), nrows, nrhs};
-
+        r, num_rhs, num_rows, num_rhs};
     const gko::batch::multi_vector::batch_item<ValueType> r_hat_entry{
-        r_hat, static_cast<size_type>(nrhs), nrows, nrhs};
-
+        r_hat, num_rhs, num_rows, num_rhs};
     const gko::batch::multi_vector::batch_item<ValueType> p_entry{
-        p, static_cast<size_type>(nrhs), nrows, nrhs};
-
+        p, num_rhs, num_rows, num_rhs};
     const gko::batch::multi_vector::batch_item<ValueType> p_hat_entry{
-        p_hat, static_cast<size_type>(nrhs), nrows, nrhs};
-
+        p_hat, num_rhs, num_rows, num_rhs};
     const gko::batch::multi_vector::batch_item<ValueType> v_entry{
-        v, static_cast<size_type>(nrhs), nrows, nrhs};
-
+        v, num_rhs, num_rows, num_rhs};
     const gko::batch::multi_vector::batch_item<ValueType> s_entry{
-        s, static_cast<size_type>(nrhs), nrows, nrhs};
-
+        s, num_rhs, num_rows, num_rhs};
     const gko::batch::multi_vector::batch_item<ValueType> s_hat_entry{
-        s_hat, static_cast<size_type>(nrhs), nrows, nrhs};
-
+        s_hat, num_rhs, num_rows, num_rhs};
     const gko::batch::multi_vector::batch_item<ValueType> t_entry{
-        t, static_cast<size_type>(nrhs), nrows, nrhs};
-
+        t, num_rhs, num_rows, num_rhs};
     const gko::batch::multi_vector::batch_item<ValueType> rho_old_entry{
-        rho_old, static_cast<size_type>(nrhs), 1, nrhs};
-
+        rho_old, num_rhs, 1, num_rhs};
     const gko::batch::multi_vector::batch_item<ValueType> rho_new_entry{
-        rho_new, static_cast<size_type>(nrhs), 1, nrhs};
-
+        rho_new, num_rhs, 1, num_rhs};
     const gko::batch::multi_vector::batch_item<ValueType> omega_entry{
-        omega, static_cast<size_type>(nrhs), 1, nrhs};
-
+        omega, num_rhs, 1, num_rhs};
     const gko::batch::multi_vector::batch_item<ValueType> alpha_entry{
-        alpha, static_cast<size_type>(nrhs), 1, nrhs};
-
+        alpha, num_rhs, 1, num_rhs};
     const gko::batch::multi_vector::batch_item<ValueType> temp_entry{
-        temp, static_cast<size_type>(nrhs), 1, nrhs};
-
+        temp, num_rhs, 1, num_rhs};
     const gko::batch::multi_vector::batch_item<real_type> rhs_norms_entry{
-        norms_rhs, static_cast<size_type>(nrhs), 1, nrhs};
-
+        norms_rhs, num_rhs, 1, num_rhs};
     const gko::batch::multi_vector::batch_item<real_type> res_norms_entry{
-        norms_res, static_cast<size_type>(nrhs), 1, nrhs};
+        norms_res, num_rhs, 1, num_rhs};
 
     // generate preconditioner
-    prec.generate(ibatch, A_entry, prec_work);
+    prec.generate(batch_item_id, A_entry, prec_work);
 
     // initialization
     // rho_old = 1, omega = 1, alpha = 1
@@ -368,5 +351,5 @@ inline void batch_entry_bicgstab_impl(
         copy_kernel(gko::batch::to_const(rho_new_entry), rho_old_entry);
     }
 
-    logger.log_iteration(ibatch, iter, res_norms_entry.values[0]);
+    logger.log_iteration(batch_item_id, iter, res_norms_entry.values[0]);
 }

From d97f6cab6ad769de074358ad54ca9169112e7ab0 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sun, 22 Oct 2023 15:21:17 +0200
Subject: [PATCH 471/583] Add reference kernel tests

---
 core/test/utils/batch_helpers.hpp             | 151 ++++++++++++++++
 .../ginkgo/core/solver/batch_solver_base.hpp  |  51 +++---
 reference/test/solver/CMakeLists.txt          |   1 +
 .../test/solver/batch_bicgstab_kernels.cpp    | 169 ++++++++++++++++++
 4 files changed, 347 insertions(+), 25 deletions(-)
 create mode 100644 reference/test/solver/batch_bicgstab_kernels.cpp

diff --git a/core/test/utils/batch_helpers.hpp b/core/test/utils/batch_helpers.hpp
index 40eeae07a25..4f83b4bc2e5 100644
--- a/core/test/utils/batch_helpers.hpp
+++ b/core/test/utils/batch_helpers.hpp
@@ -41,6 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/device_matrix_data.hpp>
 #include <ginkgo/core/base/matrix_data.hpp>
+#include <ginkgo/core/log/batch_logger.hpp>
 #include <ginkgo/core/matrix/dense.hpp>
 
 
@@ -148,6 +149,156 @@ std::unique_ptr<MatrixType> generate_3pt_stencil_batch_matrix(
 }
 
 
+template <typename MatrixType>
+struct LinearSystem {
+    using value_type = typename MatrixType::value_type;
+    using multi_vec = batch::MultiVector<value_type>;
+    using real_vec = batch::MultiVector<remove_complex<value_type>>;
+
+    std::shared_ptr<MatrixType> matrix;
+    std::shared_ptr<multi_vec> rhs;
+    std::shared_ptr<real_vec> rhs_norm;
+    std::shared_ptr<multi_vec> exact_sol;
+};
+
+
+template <typename MatrixType, typename... MatrixArgs>
+LinearSystem<MatrixType> generate_3pt_stencil_batch_problem(
+    std::shared_ptr<const Executor> exec, const size_type num_batch_items,
+    const int num_rows, const int num_rhs, MatrixArgs&&... args)
+{
+    using ValueType = typename MatrixType::value_type;
+    using multi_vec = batch::MultiVector<ValueType>;
+    using real_vec = batch::MultiVector<remove_complex<ValueType>>;
+    LinearSystem<MatrixType> sys;
+    sys.matrix = gko::test::generate_3pt_stencil_batch_matrix<MatrixType>(
+        exec, num_rows, num_batch_items, std::forward<MatrixArgs>(args)...);
+    auto exac_sol =
+        multi_vec::unbatch_type::create(exec, gko::dim<2>(num_rows, num_rhs));
+    exac_sol->fill(one<ValueType>());
+    sys.exact_sol = gko::batch::create_from_item<multi_vec>(
+        exec, num_batch_items, exac_sol.get());
+    sys.rhs = sys.exact_sol->clone();
+    sys.matrix->apply(sys.exact_sol, sys.rhs);
+    const gko::batch_dim<2> norm_dim(num_batch_items, gko::dim<2>(1, num_rhs));
+    sys.rhs_norm = real_vec::create(exec, norm_dim);
+    sys.rhs->compute_norm2(sys.rhs_norm.get());
+    return sys;
+}
+
+
+template <typename MatrixType>
+std::unique_ptr<
+    batch::MultiVector<remove_complex<typename MatrixType::value_type>>>
+compute_residual_norms(
+    const MatrixType* const mtx,
+    const batch::MultiVector<typename MatrixType::value_type>* const b,
+    const batch::MultiVector<typename MatrixType::value_type>* const x)
+{
+    using value_type = typename MatrixType::value_type;
+    using multi_vec = batch::MultiVector<value_type>;
+    using real_vec = batch::MultiVector<remove_complex<value_type>>;
+    auto exec = mtx->get_executor();
+    auto num_batch_items = x->get_num_batch_items();
+    auto num_rows = x->get_common_size()[0];
+    auto num_rhs = x->get_common_size()[1];
+    const gko::batch_dim<2> norm_dim(num_batch_items, gko::dim<2>(1, num_rhs));
+
+    auto res = b->clone();
+    auto res_norms = real_vec::create(exec, norm_dim);
+    auto alpha =
+        gko::batch::initialize<multi_vec>(num_batch_items, {-1.0}, exec);
+    auto beta = gko::batch::initialize<multi_vec>(num_batch_items, {1.0}, exec);
+    mtx->apply(alpha, x, beta, res);
+    res->compute_norm2(res_norms);
+    return res_norms;
+}
+
+
+template <typename ValueType>
+struct Result {
+    using multi_vec = batch::MultiVector<ValueType>;
+    using real_vec = batch::MultiVector<remove_complex<ValueType>>;
+
+    std::shared_ptr<multi_vec> x;
+    std::shared_ptr<real_vec> res_norm;
+    gko::batch::log::BatchLogData<double> logdata;
+};
+
+
+template <typename MatrixType, typename SolverType>
+Result<typename MatrixType::value_type> solve_linear_system(
+    std::shared_ptr<const Executor> exec, const LinearSystem<MatrixType>& sys,
+    std::shared_ptr<SolverType> solver)
+{
+    using value_type = typename MatrixType::value_type;
+    using real_type = remove_complex<value_type>;
+    using multi_vec = typename Result<value_type>::multi_vec;
+    using real_vec = typename Result<value_type>::real_vec;
+
+    const size_type num_batch_items = sys.matrix->get_num_batch_items();
+    const int num_rows = sys.matrix->get_common_size()[0];
+    const int num_rhs = sys.rhs->get_common_size()[1];
+    const gko::batch_dim<2> vec_size(num_batch_items,
+                                     gko::dim<2>(num_rows, num_rhs));
+    const gko::batch_dim<2> norm_size(num_batch_items, gko::dim<2>(1, num_rhs));
+
+    Result<value_type> result;
+    // Initialize r to the original unscaled b
+    result.x = sys.rhs->clone();
+
+    solver->apply(sys.rhs, result.x);
+    result.res_norm =
+        compute_residual_norms(sys.matrix.get(), result.x.get(), sys.rhs.get());
+
+    return std::move(result);
+}
+
+
+template <typename MatrixType, typename SolveFunction, typename Settings>
+Result<typename MatrixType::value_type> solve_linear_system(
+    std::shared_ptr<const Executor> exec, SolveFunction solve_function,
+    const Settings settings, const LinearSystem<MatrixType>& sys,
+    std::shared_ptr<batch::BatchLinOpFactory> precond_factory = nullptr)
+{
+    using value_type = typename MatrixType::value_type;
+    using real_type = remove_complex<value_type>;
+    using multi_vec = typename Result<value_type>::multi_vec;
+    using real_vec = typename Result<value_type>::real_vec;
+
+    const size_type num_batch_items = sys.matrix->get_num_batch_items();
+    const int num_rows = sys.matrix->get_common_size()[0];
+    const int num_rhs = sys.rhs->get_common_size()[1];
+    const gko::batch_dim<2> vec_size(num_batch_items,
+                                     gko::dim<2>(num_rows, num_rhs));
+    const gko::batch_dim<2> norm_size(num_batch_items, gko::dim<2>(1, num_rhs));
+
+    Result<value_type> result;
+    // Initialize r to the original unscaled b
+    result.x = sys.rhs->clone();
+
+    result.logdata.res_norms =
+        gko::batch::MultiVector<double>::create(exec, norm_size);
+    result.logdata.iter_counts.set_executor(exec);
+    result.logdata.iter_counts.resize_and_reset(num_rhs * num_batch_items);
+
+    std::unique_ptr<gko::batch::BatchLinOp> precond;
+    if (precond_factory) {
+        precond = precond_factory->generate(sys.matrix);
+    } else {
+        precond = nullptr;
+    }
+
+    solve_function(settings, precond.get(), sys.matrix.get(), sys.rhs.get(),
+                   result.x.get(), result.logdata);
+
+    result.res_norm =
+        compute_residual_norms(sys.matrix.get(), result.x.get(), sys.rhs.get());
+
+    return std::move(result);
+}
+
+
 template <typename ValueType>
 struct BatchSystem {
     using vec_type = batch::MultiVector<ValueType>;
diff --git a/include/ginkgo/core/solver/batch_solver_base.hpp b/include/ginkgo/core/solver/batch_solver_base.hpp
index cb798ca77d7..ac24d421b80 100644
--- a/include/ginkgo/core/solver/batch_solver_base.hpp
+++ b/include/ginkgo/core/solver/batch_solver_base.hpp
@@ -155,6 +155,32 @@ template <typename ConcreteSolver,
 class EnableBatchSolver
     : public BatchSolver,
       public EnableBatchLinOp<ConcreteSolver, PolymorphicBase> {
+public:
+    ConcreteSolver* apply(ptr_param<const MultiVector<ValueType>> b,
+                          ptr_param<MultiVector<ValueType>> x) const
+    {
+        this->validate_application_parameters(b.get(), x.get());
+        auto exec = this->get_executor();
+        this->apply_impl(make_temporary_clone(exec, b).get(),
+                         make_temporary_clone(exec, x).get());
+        return this;
+    }
+
+    ConcreteSolver* apply_impl(ptr_param<const MultiVector<ValueType>>* alpha,
+                               ptr_param<const MultiVector<ValueType>>* b,
+                               ptr_param<const MultiVector<ValueType>>* beta,
+                               ptr_param<MultiVector<ValueType>>* x) const
+    {
+        this->validate_application_parameters(alpha.get(), b.get(), beta.get(),
+                                              x.get());
+        auto exec = this->get_executor();
+        this->apply_impl(make_temporary_clone(exec, alpha).get(),
+                         make_temporary_clone(exec, b).get(),
+                         make_temporary_clone(exec, beta).get(),
+                         make_temporary_clone(exec, x).get());
+        return this;
+    }
+
 protected:
     explicit EnableBatchSolver(std::shared_ptr<const Executor> exec)
         : EnableBatchLinOp<ConcreteSolver, PolymorphicBase>(std::move(exec))
@@ -187,31 +213,6 @@ class EnableBatchSolver
         }
     }
 
-    ConcreteSolver* apply(ptr_param<const MultiVector<ValueType>> b,
-                          ptr_param<MultiVector<ValueType>> x) const
-    {
-        this->validate_application_parameters(b.get(), x.get());
-        auto exec = this->get_executor();
-        this->apply_impl(make_temporary_clone(exec, b).get(),
-                         make_temporary_clone(exec, x).get());
-        return this;
-    }
-
-    ConcreteSolver* apply_impl(ptr_param<const MultiVector<ValueType>>* alpha,
-                               ptr_param<const MultiVector<ValueType>>* b,
-                               ptr_param<const MultiVector<ValueType>>* beta,
-                               ptr_param<MultiVector<ValueType>>* x) const
-    {
-        this->validate_application_parameters(alpha.get(), b.get(), beta.get(),
-                                              x.get());
-        auto exec = this->get_executor();
-        this->apply_impl(make_temporary_clone(exec, alpha).get(),
-                         make_temporary_clone(exec, b).get(),
-                         make_temporary_clone(exec, beta).get(),
-                         make_temporary_clone(exec, x).get());
-        return this;
-    }
-
     void apply_impl(const MultiVector<ValueType>* b,
                     MultiVector<ValueType>* x) const
     {
diff --git a/reference/test/solver/CMakeLists.txt b/reference/test/solver/CMakeLists.txt
index 95fd0e4f932..04d7a9f4619 100644
--- a/reference/test/solver/CMakeLists.txt
+++ b/reference/test/solver/CMakeLists.txt
@@ -1,3 +1,4 @@
+ginkgo_create_test(batch_bicgstab_kernels)
 ginkgo_create_test(bicg_kernels)
 ginkgo_create_test(bicgstab_kernels)
 ginkgo_create_test(cg_kernels)
diff --git a/reference/test/solver/batch_bicgstab_kernels.cpp b/reference/test/solver/batch_bicgstab_kernels.cpp
new file mode 100644
index 00000000000..477a9e3f686
--- /dev/null
+++ b/reference/test/solver/batch_bicgstab_kernels.cpp
@@ -0,0 +1,169 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/solver/batch_bicgstab.hpp>
+
+
+#include <memory>
+#include <random>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/log/batch_logger.hpp>
+
+
+#include "core/base/batch_utilities.hpp"
+#include "core/matrix/batch_dense_kernels.hpp"
+#include "core/solver/batch_bicgstab_kernels.hpp"
+#include "core/test/utils.hpp"
+#include "core/test/utils/batch_helpers.hpp"
+
+
+template <typename T>
+class BatchBicgstab : public ::testing::Test {
+protected:
+    using value_type = T;
+    using real_type = gko::remove_complex<value_type>;
+    using solver_type = gko::batch::solver::Bicgstab<value_type>;
+    using Mtx = gko::batch::matrix::Dense<value_type>;
+    using MVec = gko::batch::MultiVector<value_type>;
+    using RealMVec = gko::batch::MultiVector<real_type>;
+    using Settings = gko::kernels::batch_bicgstab::BicgstabOptions<real_type>;
+    using LogData = gko::batch::log::BatchLogData<double>;
+    using LinSys = gko::test::LinearSystem<Mtx>;
+
+    BatchBicgstab()
+        : exec(gko::ReferenceExecutor::create()),
+          linear_system(gko::test::generate_3pt_stencil_batch_problem<Mtx>(
+              exec, num_batch_items, num_rows, num_rhs))
+    {
+        auto executor = this->exec;
+        solve_lambda = [executor](const Settings opts,
+                                  const gko::batch::BatchLinOp* prec,
+                                  const Mtx* mtx, const MVec* b, MVec* x,
+                                  LogData& logdata) {
+            gko::kernels::reference::batch_bicgstab::apply<
+                typename Mtx::value_type>(executor, opts, mtx, prec, b, x,
+                                          logdata);
+        };
+    }
+
+    std::shared_ptr<const gko::ReferenceExecutor> exec;
+    const real_type eps = r<value_type>::value;
+    const gko::size_type num_batch_items = 2;
+    const int num_rows = 3;
+    const int num_rhs = 1;
+    const Settings solver_settings{500, static_cast<real_type>(1e3) * eps,
+                                   gko::batch::stop::ToleranceType::relative};
+    LinSys linear_system;
+    std::function<void(const Settings, const gko::batch::BatchLinOp*,
+                       const Mtx*, const MVec*, MVec*, LogData&)>
+        solve_lambda;
+};
+
+TYPED_TEST_SUITE(BatchBicgstab, gko::test::ValueTypes);
+
+
+TYPED_TEST(BatchBicgstab, SolvesStencilSystem)
+{
+    auto res = gko::test::solve_linear_system(this->exec, this->solve_lambda,
+                                              this->solver_settings,
+                                              this->linear_system);
+
+    for (size_t i = 0; i < this->num_batch_items; i++) {
+        ASSERT_LE(res.res_norm->get_const_values()[i] /
+                      this->linear_system.rhs_norm->get_const_values()[i],
+                  this->solver_settings.residual_tol);
+    }
+    GKO_ASSERT_BATCH_MTX_NEAR(res.x, this->linear_system.exact_sol, this->eps);
+}
+
+
+TYPED_TEST(BatchBicgstab, StencilSystemLoggerIsCorrect)
+{
+    using value_type = typename TestFixture::value_type;
+    using real_type = gko::remove_complex<value_type>;
+
+    auto res = gko::test::solve_linear_system(this->exec, this->solve_lambda,
+                                              this->solver_settings,
+                                              this->linear_system);
+
+    const int ref_iters = 2;
+    const int* const iter_array = res.logdata.iter_counts.get_const_data();
+    const double* const res_log_array =
+        res.logdata.res_norms->get_const_values();
+    for (size_t i = 0; i < this->num_batch_items; i++) {
+        // test logger
+        GKO_ASSERT((iter_array[i] <= ref_iters + 1) &&
+                   (iter_array[i] >= ref_iters - 1));
+        ASSERT_LE(res_log_array[i] / this->linear_system.rhs_norm->at(i, 0, 0),
+                  this->solver_settings.residual_tol);
+        ASSERT_NEAR(res_log_array[i], res.res_norm->get_const_values()[i],
+                    10 * this->eps);
+    }
+}
+
+
+TYPED_TEST(BatchBicgstab, CanSolveDenseSystem)
+{
+    using value_type = typename TestFixture::value_type;
+    using real_type = gko::remove_complex<value_type>;
+    using Solver = typename TestFixture::solver_type;
+    using Mtx = typename TestFixture::Mtx;
+    const real_type tol = 1e-5;
+    const int max_iters = 1000;
+    auto solver_factory =
+        Solver::build()
+            .with_default_max_iterations(max_iters)
+            .with_default_residual_tol(tol)
+            .with_tolerance_type(gko::batch::stop::ToleranceType::relative)
+            .on(this->exec);
+    const int num_rows = 13;
+    const size_t num_batch_items = 5;
+    const int num_rhs = 1;
+    auto linear_system = gko::test::generate_3pt_stencil_batch_problem<Mtx>(
+        this->exec, num_batch_items, num_rows, num_rhs);
+    auto solver = gko::share(solver_factory->generate(linear_system.matrix));
+
+    auto res =
+        gko::test::solve_linear_system(this->exec, linear_system, solver);
+
+    GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, this->eps);
+    for (size_t i = 0; i < num_batch_items; i++) {
+        ASSERT_LE(res.res_norm->get_const_values()[i] /
+                      linear_system.rhs_norm->get_const_values()[i],
+                  tol);
+    }
+}

From 0e9d34766e80edc534fda3e8ca3fb765c99bc25f Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sun, 22 Oct 2023 18:13:40 +0200
Subject: [PATCH 472/583] Fix apply and more ref tests

---
 core/test/utils/batch_helpers.hpp             | 96 ++++++++++---------
 .../ginkgo/core/solver/batch_solver_base.hpp  | 35 +++++--
 .../test/solver/batch_bicgstab_kernels.cpp    | 95 +++++++++++++++++-
 3 files changed, 166 insertions(+), 60 deletions(-)

diff --git a/core/test/utils/batch_helpers.hpp b/core/test/utils/batch_helpers.hpp
index 4f83b4bc2e5..4e379009a83 100644
--- a/core/test/utils/batch_helpers.hpp
+++ b/core/test/utils/batch_helpers.hpp
@@ -191,9 +191,9 @@ template <typename MatrixType>
 std::unique_ptr<
     batch::MultiVector<remove_complex<typename MatrixType::value_type>>>
 compute_residual_norms(
-    const MatrixType* const mtx,
-    const batch::MultiVector<typename MatrixType::value_type>* const b,
-    const batch::MultiVector<typename MatrixType::value_type>* const x)
+    const MatrixType* mtx,
+    const batch::MultiVector<typename MatrixType::value_type>* b,
+    const batch::MultiVector<typename MatrixType::value_type>* x)
 {
     using value_type = typename MatrixType::value_type;
     using multi_vec = batch::MultiVector<value_type>;
@@ -249,7 +249,7 @@ Result<typename MatrixType::value_type> solve_linear_system(
 
     solver->apply(sys.rhs, result.x);
     result.res_norm =
-        compute_residual_norms(sys.matrix.get(), result.x.get(), sys.rhs.get());
+        compute_residual_norms(sys.matrix.get(), sys.rhs.get(), result.x.get());
 
     return std::move(result);
 }
@@ -277,10 +277,11 @@ Result<typename MatrixType::value_type> solve_linear_system(
     // Initialize r to the original unscaled b
     result.x = sys.rhs->clone();
 
-    result.logdata.res_norms =
+    gko::batch::log::BatchLogData<double> logdata;
+    logdata.res_norms =
         gko::batch::MultiVector<double>::create(exec, norm_size);
-    result.logdata.iter_counts.set_executor(exec);
-    result.logdata.iter_counts.resize_and_reset(num_rhs * num_batch_items);
+    logdata.iter_counts.set_executor(exec);
+    logdata.iter_counts.resize_and_reset(num_rhs * num_batch_items);
 
     std::unique_ptr<gko::batch::BatchLinOp> precond;
     if (precond_factory) {
@@ -290,50 +291,51 @@ Result<typename MatrixType::value_type> solve_linear_system(
     }
 
     solve_function(settings, precond.get(), sys.matrix.get(), sys.rhs.get(),
-                   result.x.get(), result.logdata);
+                   result.x.get(), logdata);
+
+    result.logdata.res_norms =
+        gko::batch::MultiVector<double>::create(exec->get_master(), norm_size);
+    result.logdata.iter_counts.set_executor(exec->get_master());
+    result.logdata.iter_counts.resize_and_reset(num_rhs * num_batch_items);
+    result.logdata.res_norms->copy_from(logdata.res_norms.get());
+    result.logdata.iter_counts = logdata.iter_counts;
 
     result.res_norm =
-        compute_residual_norms(sys.matrix.get(), result.x.get(), sys.rhs.get());
+        compute_residual_norms(sys.matrix.get(), sys.rhs.get(), result.x.get());
 
     return std::move(result);
 }
 
 
-template <typename ValueType>
-struct BatchSystem {
-    using vec_type = batch::MultiVector<ValueType>;
-    std::unique_ptr<batch::BatchLinOp> A;
-    std::unique_ptr<vec_type> b;
-};
-
-
 template <typename MatrixType, typename... MatrixArgs>
-BatchSystem<typename MatrixType::value_type>
-generate_diag_dominant_batch_system(std::shared_ptr<const gko::Executor> exec,
-                                    const size_type num_batch_items,
-                                    const int num_rows, const int num_rhs,
-                                    const bool is_hermitian,
-                                    MatrixArgs&&... args)
+LinearSystem<MatrixType> generate_diag_dominant_batch_problem(
+    std::shared_ptr<const gko::Executor> exec, const size_type num_batch_items,
+    const int num_rows, const int num_rhs, const bool is_hermitian,
+    MatrixArgs&&... args)
 {
     using value_type = typename MatrixType::value_type;
     using index_type = typename MatrixType::index_type;
-    using unbatch_type = typename MatrixType::unbatch_type;
     using real_type = remove_complex<value_type>;
+    using unbatch_type = typename MatrixType::unbatch_type;
+    using multi_vec = batch::MultiVector<value_type>;
+    using real_vec = batch::MultiVector<real_type>;
     const int num_cols = num_rows;
     gko::matrix_data<value_type, index_type> data{
         gko::dim<2>{static_cast<size_type>(num_rows),
                     static_cast<size_type>(num_cols)},
         {}};
     auto engine = std::default_random_engine(42);
-    auto rand_diag_dist = std::normal_distribution<value_type>(4.0, 12.0);
+    auto rand_diag_dist = std::normal_distribution<real_type>(4.0, 12.0);
     for (int row = 1; row < num_rows - 1; ++row) {
-        auto rand_nnz_dist = std::normal_distribution<index_type>(1, row + 1);
-        auto k = detail::get_rand_value<index_type>(rand_nnz_dist, engine);
+        std::uniform_int_distribution<index_type> rand_nnz_dist{1, row + 1};
+        const auto k = rand_nnz_dist(engine);
         data.nonzeros.emplace_back(row, k, value_type{-1.0});
         data.nonzeros.emplace_back(row, row + 1, value_type{-1.0});
         data.nonzeros.emplace_back(row - 1, row, value_type{-1.0});
         data.nonzeros.emplace_back(
-            row, row, detail::get_rand_value(rand_diag_dist, engine));
+            row, row,
+            static_cast<value_type>(
+                detail::get_rand_value<real_type>(rand_diag_dist, engine)));
     }
     data.nonzeros.emplace_back(0, 0, value_type{2.0});
     data.nonzeros.emplace_back(num_rows - 1, num_rows - 1, value_type{2.0});
@@ -356,41 +358,41 @@ generate_diag_dominant_batch_system(std::shared_ptr<const gko::Executor> exec,
                         exec->get_master(), soa_data.get_num_elems(),
                         soa_data.get_const_col_idxs())
                         .copy_to_array();
-    auto result = MatrixType::create(
-        exec, batch_dim<2>(num_batch_items, dim<2>(num_rows, num_cols)),
-        std::forward<MatrixArgs>(args)...);
-    auto rand_val_dist = std::normal_distribution<value_type>(-0.5, 0.5);
+
     std::vector<gko::matrix_data<value_type, index_type>> batch_data(
         num_batch_items);
     batch_data.reserve(num_batch_items);
-    BatchSystem<value_type> sys;
-
+    auto rand_val_dist = std::normal_distribution<>(-0.5, 0.5);
     for (size_type b = 1; b < num_batch_items; b++) {
         auto rand_data = fill_random_matrix_data<value_type, index_type>(
             num_rows, num_cols, row_idxs, col_idxs, rand_val_dist, engine);
-        if (is_hermitian) {
-            gko::utils::make_hpd(rand_data);
-        } else {
-            gko::utils::make_diag_dominant(rand_data);
-        }
+        gko::utils::make_diag_dominant(rand_data);
         batch_data.emplace_back(rand_data);
     }
-    sys.A = gko::give(gko::batch::read<value_type, index_type, MatrixType>(
+
+    LinearSystem<MatrixType> sys;
+    sys.matrix = gko::give(gko::batch::read<value_type, index_type, MatrixType>(
         exec, batch_data, std::forward<MatrixArgs>(args)...));
 
-    std::vector<gko::matrix_data<value_type, index_type>> batch_rhs_data(
+    std::vector<gko::matrix_data<value_type, index_type>> batch_sol_data(
         num_batch_items);
-    batch_rhs_data.reserve(num_batch_items);
+    batch_sol_data.reserve(num_batch_items);
     for (size_type b = 0; b < num_batch_items; b++) {
         auto rand_data = generate_random_matrix_data<value_type, index_type>(
             num_rows, num_cols,
-            std::normal_distribution<index_type>(num_rhs, num_rhs),
+            std::uniform_int_distribution<index_type>(num_rhs, num_rhs),
             rand_val_dist, engine);
-        batch_data.emplace_back(rand_data);
+        batch_sol_data.emplace_back(rand_data);
     }
-    sys.b = gko::give(gko::batch::read<value_type, index_type,
-                                       BatchSystem<value_type>::vec_type>(
-        exec, batch_rhs_data));
+    sys.exact_sol = gko::give(
+        gko::batch::read<value_type, index_type,
+                         typename LinearSystem<MatrixType>::multi_vec>(
+            exec, batch_sol_data));
+    sys.rhs = sys.exact_sol->clone();
+    sys.matrix->apply(sys.exact_sol, sys.rhs);
+    const gko::batch_dim<2> norm_dim(num_batch_items, gko::dim<2>(1, num_rhs));
+    sys.rhs_norm = real_vec::create(exec, norm_dim);
+    sys.rhs->compute_norm2(sys.rhs_norm.get());
     return sys;
 }
 
diff --git a/include/ginkgo/core/solver/batch_solver_base.hpp b/include/ginkgo/core/solver/batch_solver_base.hpp
index ac24d421b80..c22d19420f4 100644
--- a/include/ginkgo/core/solver/batch_solver_base.hpp
+++ b/include/ginkgo/core/solver/batch_solver_base.hpp
@@ -156,20 +156,21 @@ class EnableBatchSolver
     : public BatchSolver,
       public EnableBatchLinOp<ConcreteSolver, PolymorphicBase> {
 public:
-    ConcreteSolver* apply(ptr_param<const MultiVector<ValueType>> b,
-                          ptr_param<MultiVector<ValueType>> x) const
+    const ConcreteSolver* apply(ptr_param<const MultiVector<ValueType>> b,
+                                ptr_param<MultiVector<ValueType>> x) const
     {
         this->validate_application_parameters(b.get(), x.get());
         auto exec = this->get_executor();
         this->apply_impl(make_temporary_clone(exec, b).get(),
                          make_temporary_clone(exec, x).get());
-        return this;
+        return self();
     }
 
-    ConcreteSolver* apply_impl(ptr_param<const MultiVector<ValueType>>* alpha,
-                               ptr_param<const MultiVector<ValueType>>* b,
-                               ptr_param<const MultiVector<ValueType>>* beta,
-                               ptr_param<MultiVector<ValueType>>* x) const
+    const ConcreteSolver* apply_impl(
+        ptr_param<const MultiVector<ValueType>>* alpha,
+        ptr_param<const MultiVector<ValueType>>* b,
+        ptr_param<const MultiVector<ValueType>>* beta,
+        ptr_param<MultiVector<ValueType>>* x) const
     {
         this->validate_application_parameters(alpha.get(), b.get(), beta.get(),
                                               x.get());
@@ -178,10 +179,28 @@ class EnableBatchSolver
                          make_temporary_clone(exec, b).get(),
                          make_temporary_clone(exec, beta).get(),
                          make_temporary_clone(exec, x).get());
-        return this;
+        return self();
+    }
+
+    ConcreteSolver* apply(ptr_param<const MultiVector<ValueType>> b,
+                          ptr_param<MultiVector<ValueType>> x)
+    {
+        static_cast<const ConcreteSolver*>(this)->apply(b, x);
+        return self();
+    }
+
+    ConcreteSolver* apply_impl(ptr_param<const MultiVector<ValueType>>* alpha,
+                               ptr_param<const MultiVector<ValueType>>* b,
+                               ptr_param<const MultiVector<ValueType>>* beta,
+                               ptr_param<MultiVector<ValueType>>* x)
+    {
+        static_cast<const ConcreteSolver*>(this)->apply(alpha, b, beta, x);
+        return self();
     }
 
 protected:
+    GKO_ENABLE_SELF(ConcreteSolver);
+
     explicit EnableBatchSolver(std::shared_ptr<const Executor> exec)
         : EnableBatchLinOp<ConcreteSolver, PolymorphicBase>(std::move(exec))
     {}
diff --git a/reference/test/solver/batch_bicgstab_kernels.cpp b/reference/test/solver/batch_bicgstab_kernels.cpp
index 477a9e3f686..0445c8c09cf 100644
--- a/reference/test/solver/batch_bicgstab_kernels.cpp
+++ b/reference/test/solver/batch_bicgstab_kernels.cpp
@@ -42,6 +42,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/log/batch_logger.hpp>
+#include <ginkgo/core/matrix/batch_dense.hpp>
+#include <ginkgo/core/matrix/batch_ell.hpp>
 
 
 #include "core/base/batch_utilities.hpp"
@@ -58,6 +60,7 @@ class BatchBicgstab : public ::testing::Test {
     using real_type = gko::remove_complex<value_type>;
     using solver_type = gko::batch::solver::Bicgstab<value_type>;
     using Mtx = gko::batch::matrix::Dense<value_type>;
+    using EllMtx = gko::batch::matrix::Ell<value_type>;
     using MVec = gko::batch::MultiVector<value_type>;
     using RealMVec = gko::batch::MultiVector<real_type>;
     using Settings = gko::kernels::batch_bicgstab::BicgstabOptions<real_type>;
@@ -111,7 +114,7 @@ TYPED_TEST(BatchBicgstab, SolvesStencilSystem)
 }
 
 
-TYPED_TEST(BatchBicgstab, StencilSystemLoggerIsCorrect)
+TYPED_TEST(BatchBicgstab, StencilSystemLoggerLogsResidual)
 {
     using value_type = typename TestFixture::value_type;
     using real_type = gko::remove_complex<value_type>;
@@ -125,9 +128,6 @@ TYPED_TEST(BatchBicgstab, StencilSystemLoggerIsCorrect)
     const double* const res_log_array =
         res.logdata.res_norms->get_const_values();
     for (size_t i = 0; i < this->num_batch_items; i++) {
-        // test logger
-        GKO_ASSERT((iter_array[i] <= ref_iters + 1) &&
-                   (iter_array[i] >= ref_iters - 1));
         ASSERT_LE(res_log_array[i] / this->linear_system.rhs_norm->at(i, 0, 0),
                   this->solver_settings.residual_tol);
         ASSERT_NEAR(res_log_array[i], res.res_norm->get_const_values()[i],
@@ -136,6 +136,25 @@ TYPED_TEST(BatchBicgstab, StencilSystemLoggerIsCorrect)
 }
 
 
+TYPED_TEST(BatchBicgstab, StencilSystemLoggerLogsIterations)
+{
+    using value_type = typename TestFixture::value_type;
+    using Settings = typename TestFixture::Settings;
+    using real_type = gko::remove_complex<value_type>;
+    const int ref_iters = 5;
+    const Settings solver_settings{ref_iters, 0,
+                                   gko::batch::stop::ToleranceType::relative};
+
+    auto res = gko::test::solve_linear_system(
+        this->exec, this->solve_lambda, solver_settings, this->linear_system);
+
+    const int* const iter_array = res.logdata.iter_counts.get_const_data();
+    for (size_t i = 0; i < this->num_batch_items; i++) {
+        ASSERT_EQ(iter_array[i], ref_iters);
+    }
+}
+
+
 TYPED_TEST(BatchBicgstab, CanSolveDenseSystem)
 {
     using value_type = typename TestFixture::value_type;
@@ -160,7 +179,73 @@ TYPED_TEST(BatchBicgstab, CanSolveDenseSystem)
     auto res =
         gko::test::solve_linear_system(this->exec, linear_system, solver);
 
-    GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, this->eps);
+    GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol * 10);
+    for (size_t i = 0; i < num_batch_items; i++) {
+        ASSERT_LE(res.res_norm->get_const_values()[i] /
+                      linear_system.rhs_norm->get_const_values()[i],
+                  tol);
+    }
+}
+
+
+TYPED_TEST(BatchBicgstab, CanSolveEllSystem)
+{
+    using value_type = typename TestFixture::value_type;
+    using real_type = gko::remove_complex<value_type>;
+    using Solver = typename TestFixture::solver_type;
+    using Mtx = typename TestFixture::EllMtx;
+    const real_type tol = 1e-5;
+    const int max_iters = 1000;
+    auto solver_factory =
+        Solver::build()
+            .with_default_max_iterations(max_iters)
+            .with_default_residual_tol(tol)
+            .with_tolerance_type(gko::batch::stop::ToleranceType::relative)
+            .on(this->exec);
+    const int num_rows = 13;
+    const size_t num_batch_items = 5;
+    const int num_rhs = 1;
+    auto linear_system = gko::test::generate_3pt_stencil_batch_problem<Mtx>(
+        this->exec, num_batch_items, num_rows, num_rhs, 3);
+    auto solver = gko::share(solver_factory->generate(linear_system.matrix));
+
+    auto res =
+        gko::test::solve_linear_system(this->exec, linear_system, solver);
+
+    GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol * 10);
+    for (size_t i = 0; i < num_batch_items; i++) {
+        ASSERT_LE(res.res_norm->get_const_values()[i] /
+                      linear_system.rhs_norm->get_const_values()[i],
+                  tol);
+    }
+}
+
+
+TYPED_TEST(BatchBicgstab, CanSolveDenseHpdSystem)
+{
+    using value_type = typename TestFixture::value_type;
+    using real_type = gko::remove_complex<value_type>;
+    using Solver = typename TestFixture::solver_type;
+    using Mtx = typename TestFixture::Mtx;
+    const real_type tol = 1e-5;
+    const int max_iters = 1000;
+    auto solver_factory =
+        Solver::build()
+            .with_default_max_iterations(max_iters)
+            .with_default_residual_tol(tol)
+            .with_tolerance_type(gko::batch::stop::ToleranceType::relative)
+            .on(this->exec);
+    const int num_rows = 65;
+    const gko::size_type num_batch_items = 5;
+    const int num_rhs = 1;
+    auto linear_system = gko::test::generate_diag_dominant_batch_problem<Mtx>(
+        this->exec, num_batch_items, num_rows, num_rhs, true);
+    auto solver = gko::share(solver_factory->generate(linear_system.matrix));
+
+    auto res =
+        gko::test::solve_linear_system(this->exec, linear_system, solver);
+
+    GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol * 10);
     for (size_t i = 0; i < num_batch_items; i++) {
         ASSERT_LE(res.res_norm->get_const_values()[i] /
                       linear_system.rhs_norm->get_const_values()[i],

From b6f4598cae713aeb543aa7236bcf5516511b1c17 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sun, 22 Oct 2023 23:19:06 +0200
Subject: [PATCH 473/583] Fix ref test issues

---
 core/base/batch_utilities.hpp                 |  2 ++
 core/test/utils/batch_helpers.hpp             | 20 +++++++++----------
 dpcpp/preconditioner/batch_identity.hpp.inc   |  2 +-
 .../ginkgo/core/solver/batch_solver_base.hpp  | 17 ++++++++--------
 .../test/solver/batch_bicgstab_kernels.cpp    |  4 ++--
 5 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/core/base/batch_utilities.hpp b/core/base/batch_utilities.hpp
index b4e380a4162..f05a80322aa 100644
--- a/core/base/batch_utilities.hpp
+++ b/core/base/batch_utilities.hpp
@@ -201,6 +201,8 @@ std::unique_ptr<OutputType> read(
                            std::forward<TArgs>(create_args)...);
 
     for (size_type b = 0; b < num_batch_items; ++b) {
+        if (data.at(b).size != data.at(0).size)
+            GKO_INVALID_STATE("Incorrect data passed in");
         tmp->create_view_for_item(b)->read(data[b]);
     }
 
diff --git a/core/test/utils/batch_helpers.hpp b/core/test/utils/batch_helpers.hpp
index 4e379009a83..51a94ac84e1 100644
--- a/core/test/utils/batch_helpers.hpp
+++ b/core/test/utils/batch_helpers.hpp
@@ -359,35 +359,33 @@ LinearSystem<MatrixType> generate_diag_dominant_batch_problem(
                         soa_data.get_const_col_idxs())
                         .copy_to_array();
 
-    std::vector<gko::matrix_data<value_type, index_type>> batch_data(
-        num_batch_items);
+    std::vector<gko::matrix_data<value_type, index_type>> batch_data;
     batch_data.reserve(num_batch_items);
+    batch_data.emplace_back(data);
     auto rand_val_dist = std::normal_distribution<>(-0.5, 0.5);
     for (size_type b = 1; b < num_batch_items; b++) {
         auto rand_data = fill_random_matrix_data<value_type, index_type>(
             num_rows, num_cols, row_idxs, col_idxs, rand_val_dist, engine);
         gko::utils::make_diag_dominant(rand_data);
         batch_data.emplace_back(rand_data);
+        GKO_ASSERT(rand_data.size == batch_data.at(0).size);
     }
 
     LinearSystem<MatrixType> sys;
-    sys.matrix = gko::give(gko::batch::read<value_type, index_type, MatrixType>(
-        exec, batch_data, std::forward<MatrixArgs>(args)...));
+    sys.matrix = gko::batch::read<value_type, index_type, MatrixType>(
+        exec, batch_data, std::forward<MatrixArgs>(args)...);
 
-    std::vector<gko::matrix_data<value_type, index_type>> batch_sol_data(
-        num_batch_items);
+    std::vector<gko::matrix_data<value_type, index_type>> batch_sol_data;
     batch_sol_data.reserve(num_batch_items);
     for (size_type b = 0; b < num_batch_items; b++) {
         auto rand_data = generate_random_matrix_data<value_type, index_type>(
-            num_rows, num_cols,
+            num_rows, num_rhs,
             std::uniform_int_distribution<index_type>(num_rhs, num_rhs),
             rand_val_dist, engine);
         batch_sol_data.emplace_back(rand_data);
     }
-    sys.exact_sol = gko::give(
-        gko::batch::read<value_type, index_type,
-                         typename LinearSystem<MatrixType>::multi_vec>(
-            exec, batch_sol_data));
+    sys.exact_sol = gko::batch::read<value_type, index_type, multi_vec>(
+        exec, batch_sol_data);
     sys.rhs = sys.exact_sol->clone();
     sys.matrix->apply(sys.exact_sol, sys.rhs);
     const gko::batch_dim<2> norm_dim(num_batch_items, gko::dim<2>(1, num_rhs));
diff --git a/dpcpp/preconditioner/batch_identity.hpp.inc b/dpcpp/preconditioner/batch_identity.hpp.inc
index d7e6482eacd..5b76ca63a6a 100644
--- a/dpcpp/preconditioner/batch_identity.hpp.inc
+++ b/dpcpp/preconditioner/batch_identity.hpp.inc
@@ -60,7 +60,7 @@ public:
      * preconditioner values are to be stored.
      */
     void generate(size_type batch_id,
-                  const gko::batch::matrix::ell::batch_item<const ValueType>&,
+                  const gko::batch::matrix::ell::batch_item<const ValueType, const gko::int32>&,
                   ValueType* const, sycl::nd_item<3> item_ct1)
     {}
 
diff --git a/include/ginkgo/core/solver/batch_solver_base.hpp b/include/ginkgo/core/solver/batch_solver_base.hpp
index c22d19420f4..83a8510be3b 100644
--- a/include/ginkgo/core/solver/batch_solver_base.hpp
+++ b/include/ginkgo/core/solver/batch_solver_base.hpp
@@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/batch_lin_op.hpp>
 #include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/utils_helper.hpp>
 #include <ginkgo/core/log/batch_logger.hpp>
 #include <ginkgo/core/matrix/batch_identity.hpp>
 
@@ -167,10 +168,10 @@ class EnableBatchSolver
     }
 
     const ConcreteSolver* apply_impl(
-        ptr_param<const MultiVector<ValueType>>* alpha,
-        ptr_param<const MultiVector<ValueType>>* b,
-        ptr_param<const MultiVector<ValueType>>* beta,
-        ptr_param<MultiVector<ValueType>>* x) const
+        ptr_param<const MultiVector<ValueType>> alpha,
+        ptr_param<const MultiVector<ValueType>> b,
+        ptr_param<const MultiVector<ValueType>> beta,
+        ptr_param<MultiVector<ValueType>> x) const
     {
         this->validate_application_parameters(alpha.get(), b.get(), beta.get(),
                                               x.get());
@@ -189,10 +190,10 @@ class EnableBatchSolver
         return self();
     }
 
-    ConcreteSolver* apply_impl(ptr_param<const MultiVector<ValueType>>* alpha,
-                               ptr_param<const MultiVector<ValueType>>* b,
-                               ptr_param<const MultiVector<ValueType>>* beta,
-                               ptr_param<MultiVector<ValueType>>* x)
+    ConcreteSolver* apply_impl(ptr_param<const MultiVector<ValueType>> alpha,
+                               ptr_param<const MultiVector<ValueType>> b,
+                               ptr_param<const MultiVector<ValueType>> beta,
+                               ptr_param<MultiVector<ValueType>> x)
     {
         static_cast<const ConcreteSolver*>(this)->apply(alpha, b, beta, x);
         return self();
diff --git a/reference/test/solver/batch_bicgstab_kernels.cpp b/reference/test/solver/batch_bicgstab_kernels.cpp
index 0445c8c09cf..bb528cebf96 100644
--- a/reference/test/solver/batch_bicgstab_kernels.cpp
+++ b/reference/test/solver/batch_bicgstab_kernels.cpp
@@ -245,10 +245,10 @@ TYPED_TEST(BatchBicgstab, CanSolveDenseHpdSystem)
     auto res =
         gko::test::solve_linear_system(this->exec, linear_system, solver);
 
-    GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol * 10);
+    GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol * 500);
     for (size_t i = 0; i < num_batch_items; i++) {
         ASSERT_LE(res.res_norm->get_const_values()[i] /
                       linear_system.rhs_norm->get_const_values()[i],
-                  tol);
+                  tol * 100);
     }
 }

From 09d95184fd08a4c132bcbab5b4372ec8dbe8950c Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Mon, 23 Oct 2023 21:15:36 +0200
Subject: [PATCH 474/583] Add omp tests and gen improvements

---
 core/solver/batch_solver_base.hpp             | 131 ----------
 core/test/utils/batch_helpers.hpp             |  26 +-
 .../ginkgo/core/solver/batch_solver_base.hpp  |   2 +-
 .../solver/batch_bicgstab_kernels.hpp.inc     |  15 +-
 .../test/solver/batch_bicgstab_kernels.cpp    |  90 ++++++-
 test/solver/CMakeLists.txt                    |   1 +
 test/solver/batch_bicgstab_kernels.cpp        | 230 ++++++++++++++++++
 7 files changed, 334 insertions(+), 161 deletions(-)
 delete mode 100644 core/solver/batch_solver_base.hpp
 create mode 100644 test/solver/batch_bicgstab_kernels.cpp

diff --git a/core/solver/batch_solver_base.hpp b/core/solver/batch_solver_base.hpp
deleted file mode 100644
index 2c2ebdff18b..00000000000
--- a/core/solver/batch_solver_base.hpp
+++ /dev/null
@@ -1,131 +0,0 @@
-/*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2023, the Ginkgo authors
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
-1. Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
-notice, this list of conditions and the following disclaimer in the
-documentation and/or other materials provided with the distribution.
-
-3. Neither the name of the copyright holder nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-******************************<GINKGO LICENSE>*******************************/
-
-#ifndef GKO_CORE_SOLVER_BATCH_SOLVER_HPP_
-#define GKO_CORE_SOLVER_BATCH_SOLVER_HPP_
-
-
-#include <ginkgo/core/base/batch_multi_vector.hpp>
-#include <ginkgo/core/log/batch_convergence.hpp>
-#include <ginkgo/core/matrix/batch_identity.hpp>
-#include <ginkgo/core/solver/batch_solver_base.hpp>
-
-
-#include "core/log/batch_logger.hpp"
-
-
-namespace gko {
-namespace batch {
-namespace solver {
-
-
-struct BatchInfo {
-    std::unique_ptr<log::BatchLogDataBase> logdata;
-};
-
-
-template <typename ConcreteSolver, typename ValueType, typename PolymorphicBase>
-EnableBatchSolver<ConcreteSolver, ValueType, PolymorphicBase>::
-    EnableBatchSolver(std::shared_ptr<const Executor> exec,
-                      std::shared_ptr<const BatchLinOp> system_matrix,
-                      detail::common_batch_params common_params)
-    : BatchSolver(system_matrix, nullptr, common_params.residual_tolerance,
-                  common_params.max_iterations),
-      EnableBatchLinOp<ConcreteSolver, PolymorphicBase>(
-          exec, gko::transpose(system_matrix->get_size()))
-{
-    GKO_ASSERT_BATCH_HAS_SQUARE_DIMENSIONS(system_matrix_);
-
-    using value_type = typename ConcreteSolver::value_type;
-    using Identity = matrix::Identity<value_type>;
-    using real_type = remove_complex<value_type>;
-
-    if (common_params.generated_prec) {
-        GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(common_params.generated_prec, this);
-        preconditioner_ = std::move(common_params.generated_prec);
-    } else if (common_params.prec_factory) {
-        preconditioner_ = common_params.prec_factory->generate(system_matrix_);
-    } else {
-        auto id = Identity::create(exec, system_matrix->get_size());
-        preconditioner_ = std::move(id);
-    }
-}
-
-
-template <typename ConcreteSolver, typename ValueType, typename PolymorphicBase>
-void EnableBatchSolver<ConcreteSolver, ValueType, PolymorphicBase>::apply_impl(
-    const MultiVector<ValueType>* b, MultiVector<ValueType>* x) const
-{
-    using value_type = ValueType;
-    using Vector = MultiVector<value_type>;
-    using res_log_type = double;
-
-    auto exec = this->get_executor();
-
-    const size_type num_rhs = b->get_common_size()[0];
-    const size_type num_batch_items = b->get_num_batch_items();
-    batch_dim<2> batch_size(num_batch_items, dim<2>{1, num_rhs});
-
-    BatchInfo info;
-    info.logdata =
-        std::move(std::make_unique<log::BatchLogData<res_log_type>>());
-    auto concrete_logdata =
-        static_cast<log::BatchLogData<res_log_type>*>(info.logdata.get());
-    concrete_logdata->res_norms =
-        MultiVector<res_log_type>::create(this->get_executor(), batch_size);
-    concrete_logdata->iter_counts.set_executor(this->get_executor());
-    concrete_logdata->iter_counts.resize_and_reset(num_rhs * num_batch_items);
-
-    this->solver_apply(b, x, &info);
-
-    this->template log<gko::log::Logger::batch_solver_completed>(
-        concrete_logdata->iter_counts, concrete_logdata->res_norms.get());
-}
-
-
-template <typename ConcreteSolver, typename ValueType, typename PolymorphicBase>
-void EnableBatchSolver<ConcreteSolver, ValueType, PolymorphicBase>::apply_impl(
-    const MultiVector<ValueType>* alpha, const MultiVector<ValueType>* b,
-    const MultiVector<ValueType>* beta, MultiVector<ValueType>* x) const
-{
-    auto x_clone = x->clone();
-    this->apply(b, x_clone.get());
-    x->scale(beta);
-    x->add_scaled(alpha, x_clone.get());
-}
-
-
-}  // namespace solver
-}  // namespace batch
-}  // namespace gko
-
-#endif
diff --git a/core/test/utils/batch_helpers.hpp b/core/test/utils/batch_helpers.hpp
index 51a94ac84e1..e27e552a3d6 100644
--- a/core/test/utils/batch_helpers.hpp
+++ b/core/test/utils/batch_helpers.hpp
@@ -135,10 +135,10 @@ std::unique_ptr<MatrixType> generate_3pt_stencil_batch_matrix(
     for (int row = 1; row < num_rows - 1; ++row) {
         data.nonzeros.emplace_back(row, row + 1, value_type{-1.0});
         data.nonzeros.emplace_back(row - 1, row, value_type{-1.0});
-        data.nonzeros.emplace_back(row, row, value_type{2.0});
+        data.nonzeros.emplace_back(row, row, value_type{3.0});
     }
-    data.nonzeros.emplace_back(0, 0, value_type{2.0});
-    data.nonzeros.emplace_back(num_rows - 1, num_rows - 1, value_type{2.0});
+    data.nonzeros.emplace_back(0, 0, value_type{3.0});
+    data.nonzeros.emplace_back(num_rows - 1, num_rows - 1, value_type{3.0});
     data.nonzeros.emplace_back(num_rows - 1, num_rows - 2, value_type{-1.0});
     data.nonzeros.emplace_back(0, 1, value_type{-1.0});
 
@@ -173,12 +173,11 @@ LinearSystem<MatrixType> generate_3pt_stencil_batch_problem(
     LinearSystem<MatrixType> sys;
     sys.matrix = gko::test::generate_3pt_stencil_batch_matrix<MatrixType>(
         exec, num_rows, num_batch_items, std::forward<MatrixArgs>(args)...);
-    auto exac_sol =
-        multi_vec::unbatch_type::create(exec, gko::dim<2>(num_rows, num_rhs));
-    exac_sol->fill(one<ValueType>());
-    sys.exact_sol = gko::batch::create_from_item<multi_vec>(
-        exec, num_batch_items, exac_sol.get());
-    sys.rhs = sys.exact_sol->clone();
+    sys.exact_sol = multi_vec::create(
+        exec,
+        gko::batch_dim<2>(num_batch_items, gko::dim<2>(num_rows, num_rhs)));
+    sys.exact_sol->fill(ValueType{2.0});
+    sys.rhs = multi_vec::create_with_config_of(sys.exact_sol);
     sys.matrix->apply(sys.exact_sol, sys.rhs);
     const gko::batch_dim<2> norm_dim(num_batch_items, gko::dim<2>(1, num_rhs));
     sys.rhs_norm = real_vec::create(exec, norm_dim);
@@ -244,8 +243,8 @@ Result<typename MatrixType::value_type> solve_linear_system(
     const gko::batch_dim<2> norm_size(num_batch_items, gko::dim<2>(1, num_rhs));
 
     Result<value_type> result;
-    // Initialize r to the original unscaled b
-    result.x = sys.rhs->clone();
+    result.x = multi_vec::create_with_config_of(sys.rhs);
+    result.x->fill(zero<value_type>());
 
     solver->apply(sys.rhs, result.x);
     result.res_norm =
@@ -275,7 +274,8 @@ Result<typename MatrixType::value_type> solve_linear_system(
 
     Result<value_type> result;
     // Initialize r to the original unscaled b
-    result.x = sys.rhs->clone();
+    result.x = multi_vec::create_with_config_of(sys.rhs);
+    result.x->fill(zero<value_type>());
 
     gko::batch::log::BatchLogData<double> logdata;
     logdata.res_norms =
@@ -386,7 +386,7 @@ LinearSystem<MatrixType> generate_diag_dominant_batch_problem(
     }
     sys.exact_sol = gko::batch::read<value_type, index_type, multi_vec>(
         exec, batch_sol_data);
-    sys.rhs = sys.exact_sol->clone();
+    sys.rhs = multi_vec::create_with_config_of(sys.exact_sol);
     sys.matrix->apply(sys.exact_sol, sys.rhs);
     const gko::batch_dim<2> norm_dim(num_batch_items, gko::dim<2>(1, num_rhs));
     sys.rhs_norm = real_vec::create(exec, norm_dim);
diff --git a/include/ginkgo/core/solver/batch_solver_base.hpp b/include/ginkgo/core/solver/batch_solver_base.hpp
index 83a8510be3b..5e28d4e2be5 100644
--- a/include/ginkgo/core/solver/batch_solver_base.hpp
+++ b/include/ginkgo/core/solver/batch_solver_base.hpp
@@ -242,7 +242,7 @@ class EnableBatchSolver
 
         auto exec = this->get_executor();
 
-        const size_type num_rhs = b->get_common_size()[0];
+        const size_type num_rhs = b->get_common_size()[1];
         const size_type num_batch_items = b->get_num_batch_items();
         batch_dim<2> batch_size(num_batch_items, dim<2>{1, num_rhs});
 
diff --git a/reference/solver/batch_bicgstab_kernels.hpp.inc b/reference/solver/batch_bicgstab_kernels.hpp.inc
index 3518d890801..fcd533a1ea0 100644
--- a/reference/solver/batch_bicgstab_kernels.hpp.inc
+++ b/reference/solver/batch_bicgstab_kernels.hpp.inc
@@ -41,12 +41,14 @@ inline void initialize(
     const gko::batch::multi_vector::batch_item<ValueType>& r_entry,
     const gko::batch::multi_vector::batch_item<ValueType>& r_hat_entry,
     const gko::batch::multi_vector::batch_item<ValueType>& p_entry,
+    const gko::batch::multi_vector::batch_item<ValueType>& p_hat_entry,
     const gko::batch::multi_vector::batch_item<ValueType>& v_entry,
     const gko::batch::multi_vector::batch_item<
         typename gko::remove_complex<ValueType>>& rhs_norms_entry,
     const gko::batch::multi_vector::batch_item<
         typename gko::remove_complex<ValueType>>& res_norms_entry)
 {
+    using real_type = gko::remove_complex<ValueType>;
     for (int c = 0; c < rho_old_entry.num_rhs; c++) {
         rho_old_entry.values[c] = one<ValueType>();
         omega_entry.values[c] = one<ValueType>();
@@ -56,7 +58,6 @@ inline void initialize(
     // Compute norms of rhs
     compute_norm2_kernel<ValueType>(b_entry, rhs_norms_entry);
 
-
     // r = b
     copy_kernel(b_entry, r_entry);
 
@@ -72,6 +73,7 @@ inline void initialize(
     for (int r = 0; r < p_entry.num_rows; r++) {
         for (int c = 0; c < p_entry.num_rhs; c++) {
             p_entry.values[r * p_entry.stride + c] = zero<ValueType>();
+            p_hat_entry.values[r * p_hat_entry.stride + c] = zero<ValueType>();
             v_entry.values[r * v_entry.stride + c] = zero<ValueType>();
         }
     }
@@ -259,10 +261,11 @@ inline void batch_entry_bicgstab_impl(
     // compute residual norms
     // r_hat = r
     // p = 0
+    // p_hat = 0
     // v = 0
     initialize(A_entry, b_entry, gko::batch::to_const(x_entry), rho_old_entry,
-               omega_entry, alpha_entry, r_entry, r_hat_entry, p_entry, v_entry,
-               rhs_norms_entry, res_norms_entry);
+               omega_entry, alpha_entry, r_entry, r_hat_entry, p_entry,
+               p_hat_entry, v_entry, rhs_norms_entry, res_norms_entry);
 
     // stopping criterion object
     StopType stop(opts.residual_tol, rhs_norms_entry.values);
@@ -279,7 +282,6 @@ inline void batch_entry_bicgstab_impl(
                                               gko::batch::to_const(r_entry),
                                               rho_new_entry);
 
-
         // beta = (rho_new / rho_old)*(alpha / omega)
         // p = r + beta*(p - omega * v)
         update_p(gko::batch::to_const(rho_new_entry),
@@ -289,7 +291,6 @@ inline void batch_entry_bicgstab_impl(
                  gko::batch::to_const(r_entry), gko::batch::to_const(v_entry),
                  p_entry);
 
-
         // p_hat = precond * p
         prec.apply(gko::batch::to_const(p_entry), p_hat_entry);
 
@@ -302,16 +303,15 @@ inline void batch_entry_bicgstab_impl(
                       gko::batch::to_const(r_hat_entry),
                       gko::batch::to_const(v_entry), alpha_entry);
 
-
         // s = r - alpha*v
         update_s(gko::batch::to_const(r_entry),
                  gko::batch::to_const(alpha_entry),
                  gko::batch::to_const(v_entry), s_entry);
+
         // an estimate of residual norms
         compute_norm2_kernel<ValueType>(gko::batch::to_const(s_entry),
                                         res_norms_entry);
 
-
         if (stop.check_converged(res_norms_entry.values)) {
             // update x for the systems (rhs) which converge at this point...  x
             // = x + alpha*p_hat
@@ -329,7 +329,6 @@ inline void batch_entry_bicgstab_impl(
         // t = A * s_hat
         simple_apply_kernel(A_entry, gko::batch::to_const(s_hat_entry),
                             t_entry);
-
         // omega = <t,s> / <t,t>
         compute_omega(gko::batch::to_const(t_entry),
                       gko::batch::to_const(s_entry), temp_entry, omega_entry);
diff --git a/reference/test/solver/batch_bicgstab_kernels.cpp b/reference/test/solver/batch_bicgstab_kernels.cpp
index bb528cebf96..627035d9822 100644
--- a/reference/test/solver/batch_bicgstab_kernels.cpp
+++ b/reference/test/solver/batch_bicgstab_kernels.cpp
@@ -84,11 +84,11 @@ class BatchBicgstab : public ::testing::Test {
     }
 
     std::shared_ptr<const gko::ReferenceExecutor> exec;
-    const real_type eps = r<value_type>::value;
+    const real_type eps = 1e-3;
     const gko::size_type num_batch_items = 2;
     const int num_rows = 3;
     const int num_rhs = 1;
-    const Settings solver_settings{500, static_cast<real_type>(1e3) * eps,
+    const Settings solver_settings{100, eps,
                                    gko::batch::stop::ToleranceType::relative};
     LinSys linear_system;
     std::function<void(const Settings, const gko::batch::BatchLinOp*,
@@ -96,7 +96,7 @@ class BatchBicgstab : public ::testing::Test {
         solve_lambda;
 };
 
-TYPED_TEST_SUITE(BatchBicgstab, gko::test::ValueTypes);
+TYPED_TEST_SUITE(BatchBicgstab, gko::test::RealValueTypes);
 
 
 TYPED_TEST(BatchBicgstab, SolvesStencilSystem)
@@ -114,6 +114,40 @@ TYPED_TEST(BatchBicgstab, SolvesStencilSystem)
 }
 
 
+TYPED_TEST(BatchBicgstab, SolvesEllStencilSystem)
+{
+    using Mtx = typename TestFixture::EllMtx;
+    using Settings = typename TestFixture::Settings;
+    using MVec = typename TestFixture::MVec;
+    using LogData = typename TestFixture::LogData;
+    const int num_rows = 13;
+    const size_t num_batch_items = 2;
+    const int num_rhs = 1;
+    auto lin_sys = gko::test::generate_3pt_stencil_batch_problem<Mtx>(
+        this->exec, num_batch_items, num_rows, num_rhs, 3);
+
+    auto executor = this->exec;
+    auto solve_lambda = [executor](const Settings opts,
+                                   const gko::batch::BatchLinOp* prec,
+                                   const Mtx* mtx, const MVec* b, MVec* x,
+                                   LogData& logdata) {
+        gko::kernels::reference::batch_bicgstab::apply<
+            typename Mtx::value_type>(executor, opts, mtx, prec, b, x, logdata);
+    };
+
+
+    auto res = gko::test::solve_linear_system(this->exec, solve_lambda,
+                                              this->solver_settings, lin_sys);
+    auto tol = this->solver_settings.residual_tol * 10;
+    for (size_t i = 0; i < num_batch_items; i++) {
+        ASSERT_LE(res.res_norm->get_const_values()[i] /
+                      lin_sys.rhs_norm->get_const_values()[i],
+                  tol);
+    }
+    GKO_ASSERT_BATCH_MTX_NEAR(res.x, lin_sys.exact_sol, tol);
+}
+
+
 TYPED_TEST(BatchBicgstab, StencilSystemLoggerLogsResidual)
 {
     using value_type = typename TestFixture::value_type;
@@ -124,9 +158,8 @@ TYPED_TEST(BatchBicgstab, StencilSystemLoggerLogsResidual)
                                               this->linear_system);
 
     const int ref_iters = 2;
-    const int* const iter_array = res.logdata.iter_counts.get_const_data();
-    const double* const res_log_array =
-        res.logdata.res_norms->get_const_values();
+    auto iter_array = res.logdata.iter_counts.get_const_data();
+    auto res_log_array = res.logdata.res_norms->get_const_values();
     for (size_t i = 0; i < this->num_batch_items; i++) {
         ASSERT_LE(res_log_array[i] / this->linear_system.rhs_norm->at(i, 0, 0),
                   this->solver_settings.residual_tol);
@@ -188,6 +221,47 @@ TYPED_TEST(BatchBicgstab, CanSolveDenseSystem)
 }
 
 
+TYPED_TEST(BatchBicgstab, ApplyLogsResAndIters)
+{
+    using value_type = typename TestFixture::value_type;
+    using real_type = gko::remove_complex<value_type>;
+    using Solver = typename TestFixture::solver_type;
+    using Mtx = typename TestFixture::Mtx;
+    using Logger = gko::batch::log::BatchConvergence<double>;
+    const real_type tol = 1e-5;
+    const int max_iters = 1000;
+    auto solver_factory =
+        Solver::build()
+            .with_default_max_iterations(max_iters)
+            .with_default_residual_tol(tol)
+            .with_tolerance_type(gko::batch::stop::ToleranceType::relative)
+            .on(this->exec);
+    const int num_rows = 13;
+    const size_t num_batch_items = 5;
+    const int num_rhs = 1;
+    std::shared_ptr<Logger> logger = Logger::create(this->exec);
+    auto linear_system = gko::test::generate_3pt_stencil_batch_problem<Mtx>(
+        this->exec, num_batch_items, num_rows, num_rhs);
+    auto solver = gko::share(solver_factory->generate(linear_system.matrix));
+    solver->add_logger(logger);
+
+    auto res =
+        gko::test::solve_linear_system(this->exec, linear_system, solver);
+    solver->remove_logger(logger);
+
+    auto iter_counts = logger->get_num_iterations();
+    auto res_norm = logger->get_residual_norm();
+    GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol * 10);
+    for (size_t i = 0; i < num_batch_items; i++) {
+        auto rel_res_norm = res.res_norm->get_const_values()[i] /
+                            linear_system.rhs_norm->get_const_values()[i];
+        ASSERT_LE(iter_counts.get_const_data()[i], max_iters);
+        EXPECT_LE(rel_res_norm, res_norm->at(i, 0, 0));
+        ASSERT_LE(rel_res_norm, tol * 10);
+    }
+}
+
+
 TYPED_TEST(BatchBicgstab, CanSolveEllSystem)
 {
     using value_type = typename TestFixture::value_type;
@@ -245,10 +319,10 @@ TYPED_TEST(BatchBicgstab, CanSolveDenseHpdSystem)
     auto res =
         gko::test::solve_linear_system(this->exec, linear_system, solver);
 
-    GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol * 500);
+    GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol * 50);
     for (size_t i = 0; i < num_batch_items; i++) {
         ASSERT_LE(res.res_norm->get_const_values()[i] /
                       linear_system.rhs_norm->get_const_values()[i],
-                  tol * 100);
+                  tol * 10);
     }
 }
diff --git a/test/solver/CMakeLists.txt b/test/solver/CMakeLists.txt
index 3cfe2db8ac3..296a55b6271 100644
--- a/test/solver/CMakeLists.txt
+++ b/test/solver/CMakeLists.txt
@@ -1,3 +1,4 @@
+ginkgo_create_common_test(batch_bicgstab_kernels DISABLE_EXECUTORS dpcpp cuda hip)
 ginkgo_create_common_test(bicg_kernels)
 ginkgo_create_common_test(bicgstab_kernels)
 ginkgo_create_common_test(cb_gmres_kernels)
diff --git a/test/solver/batch_bicgstab_kernels.cpp b/test/solver/batch_bicgstab_kernels.cpp
new file mode 100644
index 00000000000..e2a7501d2eb
--- /dev/null
+++ b/test/solver/batch_bicgstab_kernels.cpp
@@ -0,0 +1,230 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/solver/batch_bicgstab_kernels.hpp"
+
+
+#include <memory>
+#include <random>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/log/batch_logger.hpp>
+#include <ginkgo/core/matrix/batch_dense.hpp>
+#include <ginkgo/core/matrix/batch_ell.hpp>
+#include <ginkgo/core/solver/batch_bicgstab.hpp>
+
+
+#include "core/base/batch_utilities.hpp"
+#include "core/matrix/batch_dense_kernels.hpp"
+#include "core/test/utils.hpp"
+#include "core/test/utils/batch_helpers.hpp"
+#include "test/utils/executor.hpp"
+
+
+class BatchBicgstab : public CommonTestFixture {
+protected:
+    using real_type = gko::remove_complex<value_type>;
+    using solver_type = gko::batch::solver::Bicgstab<value_type>;
+    using Mtx = gko::batch::matrix::Dense<value_type>;
+    using EllMtx = gko::batch::matrix::Ell<value_type>;
+    using MVec = gko::batch::MultiVector<value_type>;
+    using RealMVec = gko::batch::MultiVector<real_type>;
+    using Settings = gko::kernels::batch_bicgstab::BicgstabOptions<real_type>;
+    using LogData = gko::batch::log::BatchLogData<double>;
+    using Logger = gko::batch::log::BatchConvergence<double>;
+
+    BatchBicgstab() {}
+
+    template <typename MatrixType, typename... MatrixArgs>
+    gko::test::LinearSystem<MatrixType> setup_linsys_and_solver(
+        const gko::size_type num_batch_items, const int num_rows,
+        const int num_rhs, const real_type tol, const int max_iters,
+        MatrixArgs&&... args)
+    {
+        auto executor = exec;
+        solve_lambda = [executor](const Settings settings,
+                                  const gko::batch::BatchLinOp* prec,
+                                  const Mtx* mtx, const MVec* b, MVec* x,
+                                  LogData& logdata) {
+            gko::kernels::EXEC_NAMESPACE::batch_bicgstab::apply<
+                typename Mtx::value_type>(executor, settings, mtx, prec, b, x,
+                                          logdata);
+        };
+        solver_settings =
+            Settings{max_iters, tol, gko::batch::stop::ToleranceType::relative};
+
+        solver_factory =
+            solver_type::build()
+                .with_default_max_iterations(max_iters)
+                .with_default_residual_tol(tol)
+                .with_tolerance_type(gko::batch::stop::ToleranceType::relative)
+                .on(exec);
+        return gko::test::generate_3pt_stencil_batch_problem<MatrixType>(
+            exec, num_batch_items, num_rows, num_rhs,
+            std::forward<MatrixArgs>(args)...);
+    }
+
+    std::function<void(const Settings, const gko::batch::BatchLinOp*,
+                       const Mtx*, const MVec*, MVec*, LogData&)>
+        solve_lambda;
+    Settings solver_settings{};
+    std::shared_ptr<solver_type::Factory> solver_factory;
+};
+
+
+TEST_F(BatchBicgstab, SolvesStencilSystem)
+{
+    const int num_batch_items = 2;
+    const int num_rows = 10;
+    const int num_rhs = 1;
+    const real_type tol = 1e-5;
+    const int max_iters = 100;
+    auto linear_system = setup_linsys_and_solver<Mtx>(num_batch_items, num_rows,
+                                                      num_rhs, tol, max_iters);
+    auto res = gko::test::solve_linear_system(exec, solve_lambda,
+                                              solver_settings, linear_system);
+
+    for (size_t i = 0; i < num_batch_items; i++) {
+        ASSERT_LE(res.res_norm->get_const_values()[i] /
+                      linear_system.rhs_norm->get_const_values()[i],
+                  solver_settings.residual_tol);
+    }
+    GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol);
+}
+
+
+TEST_F(BatchBicgstab, StencilSystemLoggerLogsResidual)
+{
+    const int num_batch_items = 2;
+    const int num_rows = 10;
+    const int num_rhs = 1;
+    const real_type tol = 1e-5;
+    const int max_iters = 100;
+    auto linear_system = setup_linsys_and_solver<Mtx>(num_batch_items, num_rows,
+                                                      num_rhs, tol, max_iters);
+
+    auto res = gko::test::solve_linear_system(exec, solve_lambda,
+                                              solver_settings, linear_system);
+
+    auto res_log_array = res.logdata.res_norms->get_const_values();
+    for (size_t i = 0; i < num_batch_items; i++) {
+        ASSERT_LE(res_log_array[i] / linear_system.rhs_norm->at(i, 0, 0),
+                  solver_settings.residual_tol);
+        ASSERT_NEAR(res_log_array[i], res.res_norm->get_const_values()[i],
+                    10 * tol);
+    }
+}
+
+
+TEST_F(BatchBicgstab, StencilSystemLoggerLogsIterations)
+{
+    const int num_batch_items = 2;
+    const int num_rows = 10;
+    const int num_rhs = 1;
+    const int ref_iters = 5;
+    auto linear_system = setup_linsys_and_solver<Mtx>(num_batch_items, num_rows,
+                                                      num_rhs, 0, ref_iters);
+
+    auto res = gko::test::solve_linear_system(exec, solve_lambda,
+                                              solver_settings, linear_system);
+
+    auto iter_array = res.logdata.iter_counts.get_const_data();
+    for (size_t i = 0; i < num_batch_items; i++) {
+        ASSERT_EQ(iter_array[i], ref_iters);
+    }
+}
+
+
+TEST_F(BatchBicgstab, CanSolve3ptStencilSystem)
+{
+    const int num_batch_items = 12;
+    const int num_rows = 100;
+    const int num_rhs = 1;
+    const real_type tol = 1e-5;
+    const int max_iters = 100;
+    auto linear_system = setup_linsys_and_solver<Mtx>(num_batch_items, num_rows,
+                                                      num_rhs, tol, max_iters);
+
+    auto solver = gko::share(solver_factory->generate(linear_system.matrix));
+
+    auto res = gko::test::solve_linear_system(exec, linear_system, solver);
+
+    GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol * 10);
+    for (size_t i = 0; i < num_batch_items; i++) {
+        ASSERT_LE(res.res_norm->get_const_values()[i] /
+                      linear_system.rhs_norm->get_const_values()[i],
+                  tol);
+    }
+}
+
+
+TEST_F(BatchBicgstab, CanSolveLargeHpdSystem)
+{
+    const int num_batch_items = 4;
+    const int num_rows = 1025;
+    const int num_rhs = 1;
+    const real_type tol = 1e-5;
+    const int max_iters = 2000;
+    const real_type comp_tol = tol * 100;
+    auto solver_factory =
+        solver_type::build()
+            .with_default_max_iterations(max_iters)
+            .with_default_residual_tol(tol)
+            .with_tolerance_type(gko::batch::stop::ToleranceType::absolute)
+            .on(exec);
+    std::shared_ptr<Logger> logger = Logger::create(exec);
+    auto linear_system = gko::test::generate_diag_dominant_batch_problem<Mtx>(
+        exec, num_batch_items, num_rows, num_rhs, true);
+    auto solver = gko::share(solver_factory->generate(linear_system.matrix));
+    solver->add_logger(logger);
+
+    auto res = gko::test::solve_linear_system(exec, linear_system, solver);
+    solver->remove_logger(logger);
+
+    auto iter_counts = gko::make_temporary_clone(exec->get_master(),
+                                                 &logger->get_num_iterations());
+    auto res_norm = gko::make_temporary_clone(exec->get_master(),
+                                              logger->get_residual_norm());
+    GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, comp_tol);
+    for (size_t i = 0; i < num_batch_items; i++) {
+        auto rel_res_norm =
+            exec->copy_val_to_host(res.res_norm->get_const_values() + i);
+        ASSERT_LE(iter_counts->get_const_data()[i], max_iters);
+        EXPECT_LE(res_norm->at(i, 0, 0), comp_tol);
+        EXPECT_GT(res_norm->at(i, 0, 0), real_type{0.0});
+        ASSERT_LE(rel_res_norm, comp_tol);
+    }
+}

From 67541fd322479bf1ed78dc4c3c86521be2d04623 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Mon, 23 Oct 2023 23:52:27 +0200
Subject: [PATCH 475/583] Fix logger and update docs

---
 core/log/batch_logger.cpp                     |  2 +-
 core/solver/batch_bicgstab.cpp                | 22 ++---------
 core/test/solver/batch_bicgstab.cpp           |  5 ++-
 include/ginkgo/core/log/batch_logger.hpp      | 33 ++++------------
 include/ginkgo/core/log/logger.hpp            | 26 +++++++++++--
 include/ginkgo/core/solver/batch_bicgstab.hpp |  7 ++--
 .../ginkgo/core/solver/batch_solver_base.hpp  | 38 +++++++++----------
 .../test/solver/batch_bicgstab_kernels.cpp    |  4 +-
 test/solver/batch_bicgstab_kernels.cpp        | 16 ++++----
 9 files changed, 71 insertions(+), 82 deletions(-)

diff --git a/core/log/batch_logger.cpp b/core/log/batch_logger.cpp
index 58b083e03c8..0af5bf9a15b 100644
--- a/core/log/batch_logger.cpp
+++ b/core/log/batch_logger.cpp
@@ -46,7 +46,7 @@ namespace log {
 template <typename ValueType>
 void BatchConvergence<ValueType>::on_batch_solver_completed(
     const array<int>& iteration_count,
-    const MultiVector<remove_complex<ValueType>>* const residual_norm) const
+    const MultiVector<remove_complex<ValueType>>* residual_norm) const
 {
     this->iteration_count_ = iteration_count;
     this->residual_norm_->copy_from(residual_norm);
diff --git a/core/solver/batch_bicgstab.cpp b/core/solver/batch_bicgstab.cpp
index 7be1f331777..8a2c0de5fcc 100644
--- a/core/solver/batch_bicgstab.cpp
+++ b/core/solver/batch_bicgstab.cpp
@@ -55,12 +55,11 @@ GKO_REGISTER_OPERATION(apply, batch_bicgstab::apply);
 
 template <typename ValueType>
 void Bicgstab<ValueType>::solver_apply(const MultiVector<ValueType>* b,
-                                            MultiVector<ValueType>* x,
-                                            BatchInfo* const info) const
+                                       MultiVector<ValueType>* x,
+                                       BatchInfo* const info) const
 {
     using MVec = MultiVector<ValueType>;
-    const kernels::batch_bicgstab::BicgstabOptions<
-        remove_complex<ValueType>>
+    const kernels::batch_bicgstab::BicgstabOptions<remove_complex<ValueType>>
         opts{this->max_iterations_, static_cast<real_type>(this->residual_tol_),
              parameters_.tolerance_type};
     auto exec = this->get_executor();
@@ -75,21 +74,6 @@ void Bicgstab<ValueType>::solver_apply(const MultiVector<ValueType>* b,
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_BICGSTAB);
 
 
-// #define GKO_DECLARE_BATCH_BICGSTAB_APPLY_FUNCTIONS(_type)                   \
-//     EnableBatchSolver<Bicgstab<_type>, _type, BatchLinOp>::            \
-//         EnableBatchSolver(std::shared_ptr<const Executor> exec,             \
-//                           std::shared_ptr<const BatchLinOp> system_matrix,  \
-//                           detail::common_batch_params common_params);       \
-//     template void                                                           \
-//     EnableBatchSolver<Bicgstab<_type>, _type, BatchLinOp>::apply_impl( \
-//         const MultiVector<_type>* b, MultiVector<_type>* x) const;          \
-//     template void                                                           \
-//     EnableBatchSolver<Bicgstab<_type>, _type, BatchLinOp>::apply_impl( \
-//         const MultiVector<_type>* alpha, const MultiVector<_type>* b,       \
-//         const MultiVector<_type>* beta, MultiVector<_type>* x) const
-// GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_BICGSTAB_APPLY_FUNCTIONS);
-
-
 }  // namespace solver
 }  // namespace batch
 }  // namespace gko
diff --git a/core/test/solver/batch_bicgstab.cpp b/core/test/solver/batch_bicgstab.cpp
index 28a098906cf..44d1ee5eacc 100644
--- a/core/test/solver/batch_bicgstab.cpp
+++ b/core/test/solver/batch_bicgstab.cpp
@@ -98,7 +98,9 @@ TYPED_TEST(BatchBicgstab, FactoryCreatesCorrectSolver)
         ASSERT_EQ(this->solver->get_common_size(),
                   gko::dim<2>(this->nrows, this->nrows));
     }
+
     auto solver = static_cast<Solver*>(this->solver.get());
+
     ASSERT_NE(solver->get_system_matrix(), nullptr);
     ASSERT_EQ(solver->get_system_matrix(), this->mtx);
 }
@@ -140,6 +142,7 @@ TYPED_TEST(BatchBicgstab, CanBeCloned)
 {
     using Mtx = typename TestFixture::Mtx;
     using Solver = typename TestFixture::Solver;
+
     auto clone = this->solver->clone();
 
     ASSERT_EQ(clone->get_common_size(), gko::dim<2>(this->nrows, this->nrows));
@@ -174,8 +177,8 @@ TYPED_TEST(BatchBicgstab, CanSetCriteriaInFactory)
             .with_default_residual_tol(static_cast<real_type>(0.25))
             .with_tolerance_type(gko::batch::stop::ToleranceType::relative)
             .on(this->exec);
-    auto solver = solver_factory->generate(this->mtx);
 
+    auto solver = solver_factory->generate(this->mtx);
     ASSERT_EQ(solver->get_parameters().default_max_iterations, 22);
     ASSERT_EQ(solver->get_parameters().default_residual_tol, 0.25);
     ASSERT_EQ(solver->get_parameters().tolerance_type,
diff --git a/include/ginkgo/core/log/batch_logger.hpp b/include/ginkgo/core/log/batch_logger.hpp
index 0acc96faf34..5379e3e8d0a 100644
--- a/include/ginkgo/core/log/batch_logger.hpp
+++ b/include/ginkgo/core/log/batch_logger.hpp
@@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#ifndef GKO_PUBLIC_CORE_LOG_BATCH_CONVERGENCE_HPP_
-#define GKO_PUBLIC_CORE_LOG_BATCH_CONVERGENCE_HPP_
+#ifndef GKO_PUBLIC_CORE_LOG_BATCH_LOGGER_HPP_
+#define GKO_PUBLIC_CORE_LOG_BATCH_LOGGER_HPP_
 
 
 #include <memory>
@@ -91,6 +91,9 @@ struct BatchLogData : public BatchLogDataBase {
  * The purpose of this logger is to give simple access to standard data
  * generated by the solver once it has converged.
  *
+ * @note This logger copies the data (iterations and residual norm) to the host
+ * executor.
+ *
  * @ingroup log
  */
 template <typename ValueType = default_precision>
@@ -99,24 +102,9 @@ class BatchConvergence : public gko::log::Logger {
     using real_type = remove_complex<ValueType>;
     using mask_type = gko::log::Logger::mask_type;
 
-    /**
-     * Copies arrays of iterations and residual norms into this (on the host).
-     *
-     * The arguments can be on any executor and the data is copied to the host
-     * executor.
-     * TODO: Move to gko::log::Logger class and override here when a non-value
-     * type MultiVector base is available.
-     *
-     * @param iteration_count  Array (size number of matrices x number of
-     *     right-hand sides) which stores the iteration count at which each RHS
-     *     of each linear system converged. The convergence iteration count for
-     *     the different RHS are stored contiguously.
-     * @param residual_norm  A MultiVector of size
-     *     num_matrices x 1 x num_RHS, which stores the final residual norms.
-     */
     void on_batch_solver_completed(
         const array<int>& iteration_count,
-        const MultiVector<real_type>* const residual_norm) const;
+        const MultiVector<real_type>* residual_norm) const override;
 
     /**
      * Creates a convergence logger. This dynamically allocates the memory,
@@ -153,13 +141,6 @@ class BatchConvergence : public gko::log::Logger {
     }
 
 protected:
-    /**
-     * Creates a batch convergence logger.
-     *
-     * @param exec  the executor
-     * @param enabled_events  the events enabled for this logger. By default all
-     *                        events.
-     */
     explicit BatchConvergence(
         std::shared_ptr<const Executor> exec,
         const mask_type& enabled_events = gko::log::Logger::all_events_mask)
@@ -179,4 +160,4 @@ class BatchConvergence : public gko::log::Logger {
 }  // namespace gko
 
 
-#endif  // GKO_PUBLIC_CORE_LOG_BATCH_CONVERGENCE_HPP_
+#endif  // GKO_PUBLIC_CORE_LOG_BATCH_LOGGER_HPP_
diff --git a/include/ginkgo/core/log/logger.hpp b/include/ginkgo/core/log/logger.hpp
index ede351b7d48..1de5190d2ae 100644
--- a/include/ginkgo/core/log/logger.hpp
+++ b/include/ginkgo/core/log/logger.hpp
@@ -601,11 +601,31 @@ public:                                                              \
                               const batch::BatchLinOp* input,
                               const batch::BatchLinOp* output)
 
+public:
+    static constexpr size_type batch_solver_completed{26};
+    static constexpr mask_type batch_solver_completed_mask{mask_type{1} << 26};
+
+    template <size_type Event, typename... Params>
+    std::enable_if_t<Event == 26 && (26 < event_count_max)> on(
+        Params&&... params) const
+    {
+        if (enabled_events_ & (mask_type{1} << 26)) {
+            this->on_batch_solver_completed(std::forward<Params>(params)...);
+        }
+    }
+
+protected:
+    virtual void on_batch_solver_completed(
+        const array<int>& iters,
+        const batch::MultiVector<double>* residual_norms) const
+    {}
 
-    GKO_LOGGER_REGISTER_EVENT(26, batch_solver_completed,
-                              const array<int>& iteration,
-                              const batch::MultiVector<double>* res_norms)
+    virtual void on_batch_solver_completed(
+        const array<int>& iters,
+        const batch::MultiVector<float>* residual_norms) const
+    {}
 
+public:
 #undef GKO_LOGGER_REGISTER_EVENT
 
     /**
diff --git a/include/ginkgo/core/solver/batch_bicgstab.hpp b/include/ginkgo/core/solver/batch_bicgstab.hpp
index 39bba1b9852..464d39bfe1c 100644
--- a/include/ginkgo/core/solver/batch_bicgstab.hpp
+++ b/include/ginkgo/core/solver/batch_bicgstab.hpp
@@ -56,11 +56,12 @@ namespace solver {
  * Being a generic solver, it is capable of solving general matrices, including
  * non-s.p.d matrices.
  *
- * This solver solves a batch of linear systems using Bicgstab algorithm.
+ * This solver solves a batch of linear systems using the Bicgstab algorithm.
+ * Each linear system in the batch can converge independently.
  *
  * Unless otherwise specified via the `preconditioner` factory parameter, this
  * implementation does not use any preconditioner by default. The type of
- * tolerance( absolute or relative ) and the maximum number of iterations to be
+ * tolerance (absolute or relative) and the maximum number of iterations to be
  * used in the stopping criterion can be set via the factory parameters.
  *
  * @tparam ValueType  precision of matrix elements
@@ -124,7 +125,7 @@ class Bicgstab final
     {}
 
     explicit Bicgstab(const Factory* factory,
-                           std::shared_ptr<const BatchLinOp> system_matrix)
+                      std::shared_ptr<const BatchLinOp> system_matrix)
         : EnableBatchSolver<Bicgstab>(
               factory->get_executor(), std::move(system_matrix),
               detail::extract_common_batch_params(factory->get_parameters())),
diff --git a/include/ginkgo/core/solver/batch_solver_base.hpp b/include/ginkgo/core/solver/batch_solver_base.hpp
index 5e28d4e2be5..e5f018b3a5e 100644
--- a/include/ginkgo/core/solver/batch_solver_base.hpp
+++ b/include/ginkgo/core/solver/batch_solver_base.hpp
@@ -46,6 +46,12 @@ namespace batch {
 namespace solver {
 
 
+/**
+ * The BatchSolver is a base class for all batched solvers and provides the
+ * common getters and setter for these batched solver classes.
+ *
+ * @ingroup solvers
+ */
 class BatchSolver {
 public:
     /**
@@ -142,6 +148,10 @@ common_batch_params extract_common_batch_params(ParamsType& params)
 }  // namespace detail
 
 
+/**
+ * This struct stores the logger database of residual norms and iteration count
+ * which is common to all batched solvers.
+ */
 struct BatchInfo {
     std::unique_ptr<log::BatchLogDataBase> logdata;
 };
@@ -167,11 +177,10 @@ class EnableBatchSolver
         return self();
     }
 
-    const ConcreteSolver* apply_impl(
-        ptr_param<const MultiVector<ValueType>> alpha,
-        ptr_param<const MultiVector<ValueType>> b,
-        ptr_param<const MultiVector<ValueType>> beta,
-        ptr_param<MultiVector<ValueType>> x) const
+    const ConcreteSolver* apply(ptr_param<const MultiVector<ValueType>> alpha,
+                                ptr_param<const MultiVector<ValueType>> b,
+                                ptr_param<const MultiVector<ValueType>> beta,
+                                ptr_param<MultiVector<ValueType>> x) const
     {
         this->validate_application_parameters(alpha.get(), b.get(), beta.get(),
                                               x.get());
@@ -190,10 +199,10 @@ class EnableBatchSolver
         return self();
     }
 
-    ConcreteSolver* apply_impl(ptr_param<const MultiVector<ValueType>> alpha,
-                               ptr_param<const MultiVector<ValueType>> b,
-                               ptr_param<const MultiVector<ValueType>> beta,
-                               ptr_param<MultiVector<ValueType>> x)
+    ConcreteSolver* apply(ptr_param<const MultiVector<ValueType>> alpha,
+                          ptr_param<const MultiVector<ValueType>> b,
+                          ptr_param<const MultiVector<ValueType>> beta,
+                          ptr_param<MultiVector<ValueType>> x)
     {
         static_cast<const ConcreteSolver*>(this)->apply(alpha, b, beta, x);
         return self();
@@ -239,9 +248,7 @@ class EnableBatchSolver
         using value_type = ValueType;
         using Vector = MultiVector<value_type>;
         using res_log_type = double;
-
         auto exec = this->get_executor();
-
         const size_type num_rhs = b->get_common_size()[1];
         const size_type num_batch_items = b->get_num_batch_items();
         batch_dim<2> batch_size(num_batch_items, dim<2>{1, num_rhs});
@@ -274,15 +281,6 @@ class EnableBatchSolver
         x->add_scaled(alpha, x_clone.get());
     }
 
-    /**
-     * Calls the concrete solver on the given system (not necessarily on
-     * system_matrix_).
-     *
-     * @param mtx  Left-hand side matrix for the linear solve.
-     * @param b  Right-hand side vector.
-     * @param x  Solution vector and initial guess.
-     * @param info  Batch logging information.
-     */
     virtual void solver_apply(const MultiVector<ValueType>* b,
                               MultiVector<ValueType>* x,
                               BatchInfo* const info) const = 0;
diff --git a/reference/test/solver/batch_bicgstab_kernels.cpp b/reference/test/solver/batch_bicgstab_kernels.cpp
index 627035d9822..e17a7fa6ade 100644
--- a/reference/test/solver/batch_bicgstab_kernels.cpp
+++ b/reference/test/solver/batch_bicgstab_kernels.cpp
@@ -125,7 +125,6 @@ TYPED_TEST(BatchBicgstab, SolvesEllStencilSystem)
     const int num_rhs = 1;
     auto lin_sys = gko::test::generate_3pt_stencil_batch_problem<Mtx>(
         this->exec, num_batch_items, num_rows, num_rhs, 3);
-
     auto executor = this->exec;
     auto solve_lambda = [executor](const Settings opts,
                                    const gko::batch::BatchLinOp* prec,
@@ -138,6 +137,7 @@ TYPED_TEST(BatchBicgstab, SolvesEllStencilSystem)
 
     auto res = gko::test::solve_linear_system(this->exec, solve_lambda,
                                               this->solver_settings, lin_sys);
+
     auto tol = this->solver_settings.residual_tol * 10;
     for (size_t i = 0; i < num_batch_items; i++) {
         ASSERT_LE(res.res_norm->get_const_values()[i] /
@@ -243,8 +243,8 @@ TYPED_TEST(BatchBicgstab, ApplyLogsResAndIters)
     auto linear_system = gko::test::generate_3pt_stencil_batch_problem<Mtx>(
         this->exec, num_batch_items, num_rows, num_rhs);
     auto solver = gko::share(solver_factory->generate(linear_system.matrix));
-    solver->add_logger(logger);
 
+    solver->add_logger(logger);
     auto res =
         gko::test::solve_linear_system(this->exec, linear_system, solver);
     solver->remove_logger(logger);
diff --git a/test/solver/batch_bicgstab_kernels.cpp b/test/solver/batch_bicgstab_kernels.cpp
index e2a7501d2eb..24322373d90 100644
--- a/test/solver/batch_bicgstab_kernels.cpp
+++ b/test/solver/batch_bicgstab_kernels.cpp
@@ -114,6 +114,7 @@ TEST_F(BatchBicgstab, SolvesStencilSystem)
     const int max_iters = 100;
     auto linear_system = setup_linsys_and_solver<Mtx>(num_batch_items, num_rows,
                                                       num_rhs, tol, max_iters);
+
     auto res = gko::test::solve_linear_system(exec, solve_lambda,
                                               solver_settings, linear_system);
 
@@ -177,16 +178,17 @@ TEST_F(BatchBicgstab, CanSolve3ptStencilSystem)
     const int max_iters = 100;
     auto linear_system = setup_linsys_and_solver<Mtx>(num_batch_items, num_rows,
                                                       num_rhs, tol, max_iters);
-
     auto solver = gko::share(solver_factory->generate(linear_system.matrix));
 
     auto res = gko::test::solve_linear_system(exec, linear_system, solver);
 
     GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol * 10);
     for (size_t i = 0; i < num_batch_items; i++) {
-        ASSERT_LE(res.res_norm->get_const_values()[i] /
-                      linear_system.rhs_norm->get_const_values()[i],
-                  tol);
+        auto comp_res_norm =
+            exec->copy_val_to_host(res.res_norm->get_const_values() + i) /
+            exec->copy_val_to_host(linear_system.rhs_norm->get_const_values() +
+                                   i);
+        ASSERT_LE(comp_res_norm, tol);
     }
 }
 
@@ -212,19 +214,19 @@ TEST_F(BatchBicgstab, CanSolveLargeHpdSystem)
     solver->add_logger(logger);
 
     auto res = gko::test::solve_linear_system(exec, linear_system, solver);
-    solver->remove_logger(logger);
 
+    solver->remove_logger(logger);
     auto iter_counts = gko::make_temporary_clone(exec->get_master(),
                                                  &logger->get_num_iterations());
     auto res_norm = gko::make_temporary_clone(exec->get_master(),
                                               logger->get_residual_norm());
     GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, comp_tol);
     for (size_t i = 0; i < num_batch_items; i++) {
-        auto rel_res_norm =
+        auto comp_res_norm =
             exec->copy_val_to_host(res.res_norm->get_const_values() + i);
         ASSERT_LE(iter_counts->get_const_data()[i], max_iters);
         EXPECT_LE(res_norm->at(i, 0, 0), comp_tol);
         EXPECT_GT(res_norm->at(i, 0, 0), real_type{0.0});
-        ASSERT_LE(rel_res_norm, comp_tol);
+        ASSERT_LE(comp_res_norm, comp_tol);
     }
 }

From 2b3abb459a913bf3f6f628e8f8d170343ce20aa5 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 24 Oct 2023 00:14:54 +0200
Subject: [PATCH 476/583] re-template logger and logdata

---
 core/solver/batch_bicgstab.cpp                   |  3 ++-
 core/solver/batch_bicgstab_kernels.hpp           |  4 ++--
 core/solver/batch_dispatch.hpp                   |  2 +-
 core/test/utils/batch_helpers.hpp                | 10 +++++-----
 cuda/solver/batch_bicgstab_kernels.cu            |  3 ++-
 dpcpp/solver/batch_bicgstab_kernels.dp.cpp       |  6 +++---
 hip/solver/batch_bicgstab_kernels.hip.cpp        |  6 +++---
 include/ginkgo/core/solver/batch_solver_base.hpp |  2 +-
 omp/solver/batch_bicgstab_kernels.cpp            |  5 ++---
 reference/solver/batch_bicgstab_kernels.cpp      |  5 ++---
 reference/test/solver/batch_bicgstab_kernels.cpp |  4 ++--
 test/solver/batch_bicgstab_kernels.cpp           |  4 ++--
 12 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/core/solver/batch_bicgstab.cpp b/core/solver/batch_bicgstab.cpp
index 8a2c0de5fcc..9f21858966c 100644
--- a/core/solver/batch_bicgstab.cpp
+++ b/core/solver/batch_bicgstab.cpp
@@ -66,7 +66,8 @@ void Bicgstab<ValueType>::solver_apply(const MultiVector<ValueType>* b,
     exec->run(bicgstab::make_apply(
         opts, this->system_matrix_.get(), this->preconditioner_.get(),
         as<const MVec>(b), as<MVec>(x),
-        *as<log::BatchLogData<double>>(info->logdata.get())));
+        *as<log::BatchLogData<remove_complex<ValueType>>>(
+            info->logdata.get())));
 }
 
 
diff --git a/core/solver/batch_bicgstab_kernels.hpp b/core/solver/batch_bicgstab_kernels.hpp
index 4051f4f7310..d47607bee99 100644
--- a/core/solver/batch_bicgstab_kernels.hpp
+++ b/core/solver/batch_bicgstab_kernels.hpp
@@ -211,11 +211,11 @@ StorageConfig compute_shared_storage(const int shared_mem_per_blk,
 #define GKO_DECLARE_BATCH_BICGSTAB_APPLY_KERNEL(_type)                       \
     void apply(                                                              \
         std::shared_ptr<const DefaultExecutor> exec,                         \
-        const gko::kernels::batch_bicgstab::BicgstabOptions<            \
+        const gko::kernels::batch_bicgstab::BicgstabOptions<                 \
             remove_complex<_type>>& options,                                 \
         const batch::BatchLinOp* a, const batch::BatchLinOp* preconditioner, \
         const batch::MultiVector<_type>* b, batch::MultiVector<_type>* x,    \
-        gko::batch::log::BatchLogData<double>& logdata)
+        gko::batch::log::BatchLogData<remove_complex<_type>>& logdata)
 
 
 #define GKO_DECLARE_ALL_AS_TEMPLATES \
diff --git a/core/solver/batch_dispatch.hpp b/core/solver/batch_dispatch.hpp
index efa71559fb5..18a12c34d5b 100644
--- a/core/solver/batch_dispatch.hpp
+++ b/core/solver/batch_dispatch.hpp
@@ -168,7 +168,7 @@ class BatchSolverDispatch {
 public:
     using value_type = ValueType;
     using device_value_type = DeviceValueType<ValueType>;
-    using res_norm_type = double;
+    using res_norm_type = remove_complex<value_type>;
 
     BatchSolverDispatch(const KernelCaller& kernel_caller, const OptsType& opts,
                         const BatchLinOp* const matrix,
diff --git a/core/test/utils/batch_helpers.hpp b/core/test/utils/batch_helpers.hpp
index e27e552a3d6..1d60a3fbd7b 100644
--- a/core/test/utils/batch_helpers.hpp
+++ b/core/test/utils/batch_helpers.hpp
@@ -221,7 +221,7 @@ struct Result {
 
     std::shared_ptr<multi_vec> x;
     std::shared_ptr<real_vec> res_norm;
-    gko::batch::log::BatchLogData<double> logdata;
+    gko::batch::log::BatchLogData<remove_complex<ValueType>> logdata;
 };
 
 
@@ -277,9 +277,9 @@ Result<typename MatrixType::value_type> solve_linear_system(
     result.x = multi_vec::create_with_config_of(sys.rhs);
     result.x->fill(zero<value_type>());
 
-    gko::batch::log::BatchLogData<double> logdata;
+    gko::batch::log::BatchLogData<real_type> logdata;
     logdata.res_norms =
-        gko::batch::MultiVector<double>::create(exec, norm_size);
+        gko::batch::MultiVector<real_type>::create(exec, norm_size);
     logdata.iter_counts.set_executor(exec);
     logdata.iter_counts.resize_and_reset(num_rhs * num_batch_items);
 
@@ -293,8 +293,8 @@ Result<typename MatrixType::value_type> solve_linear_system(
     solve_function(settings, precond.get(), sys.matrix.get(), sys.rhs.get(),
                    result.x.get(), logdata);
 
-    result.logdata.res_norms =
-        gko::batch::MultiVector<double>::create(exec->get_master(), norm_size);
+    result.logdata.res_norms = gko::batch::MultiVector<real_type>::create(
+        exec->get_master(), norm_size);
     result.logdata.iter_counts.set_executor(exec->get_master());
     result.logdata.iter_counts.resize_and_reset(num_rhs * num_batch_items);
     result.logdata.res_norms->copy_from(logdata.res_norms.get());
diff --git a/cuda/solver/batch_bicgstab_kernels.cu b/cuda/solver/batch_bicgstab_kernels.cu
index 4a1039eacd8..5ec31f5ba58 100644
--- a/cuda/solver/batch_bicgstab_kernels.cu
+++ b/cuda/solver/batch_bicgstab_kernels.cu
@@ -73,7 +73,8 @@ void apply(std::shared_ptr<const DefaultExecutor> exec,
            const batch::BatchLinOp* const precon,
            const batch::MultiVector<ValueType>* const b,
            batch::MultiVector<ValueType>* const x,
-           batch::log::BatchLogData<double>& logdata) GKO_NOT_IMPLEMENTED;
+           batch::log::BatchLogData<remove_complex<ValueType>>& logdata)
+    GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_BICGSTAB_APPLY_KERNEL);
 
diff --git a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
index a5d123cbb6a..938c107eb4b 100644
--- a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
+++ b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
@@ -60,8 +60,7 @@ namespace batch_bicgstab {
 
 
 template <typename T>
-using BicgstabOptions =
-    gko::kernels::batch_bicgstab::BicgstabOptions<T>;
+using BicgstabOptions = gko::kernels::batch_bicgstab::BicgstabOptions<T>;
 
 
 template <typename ValueType>
@@ -71,7 +70,8 @@ void apply(std::shared_ptr<const DefaultExecutor> exec,
            const batch::BatchLinOp* const precon,
            const batch::MultiVector<ValueType>* const b,
            batch::MultiVector<ValueType>* const x,
-           batch::log::BatchLogData<double>& logdata) GKO_NOT_IMPLEMENTED;
+           batch::log::BatchLogData<remove_complex<ValueType>>& logdata)
+    GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_BICGSTAB_APPLY_KERNEL);
 
diff --git a/hip/solver/batch_bicgstab_kernels.hip.cpp b/hip/solver/batch_bicgstab_kernels.hip.cpp
index 17da9d54ac7..c88573429b8 100644
--- a/hip/solver/batch_bicgstab_kernels.hip.cpp
+++ b/hip/solver/batch_bicgstab_kernels.hip.cpp
@@ -64,8 +64,7 @@ namespace batch_bicgstab {
 
 
 template <typename T>
-using BicgstabOptions =
-    gko::kernels::batch_bicgstab::BicgstabOptions<T>;
+using BicgstabOptions = gko::kernels::batch_bicgstab::BicgstabOptions<T>;
 
 
 template <typename ValueType>
@@ -75,7 +74,8 @@ void apply(std::shared_ptr<const DefaultExecutor> exec,
            const batch::BatchLinOp* const precon,
            const batch::MultiVector<ValueType>* const b,
            batch::MultiVector<ValueType>* const x,
-           batch::log::BatchLogData<double>& logdata) GKO_NOT_IMPLEMENTED;
+           batch::log::BatchLogData<remove_complex<ValueType>>& logdata)
+    GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_BICGSTAB_APPLY_KERNEL);
 
diff --git a/include/ginkgo/core/solver/batch_solver_base.hpp b/include/ginkgo/core/solver/batch_solver_base.hpp
index e5f018b3a5e..425027a1d4e 100644
--- a/include/ginkgo/core/solver/batch_solver_base.hpp
+++ b/include/ginkgo/core/solver/batch_solver_base.hpp
@@ -247,7 +247,7 @@ class EnableBatchSolver
     {
         using value_type = ValueType;
         using Vector = MultiVector<value_type>;
-        using res_log_type = double;
+        using res_log_type = remove_complex<value_type>;
         auto exec = this->get_executor();
         const size_type num_rhs = b->get_common_size()[1];
         const size_type num_batch_items = b->get_num_batch_items();
diff --git a/omp/solver/batch_bicgstab_kernels.cpp b/omp/solver/batch_bicgstab_kernels.cpp
index b86110ea8b2..13a0c54b60f 100644
--- a/omp/solver/batch_bicgstab_kernels.cpp
+++ b/omp/solver/batch_bicgstab_kernels.cpp
@@ -65,8 +65,7 @@ constexpr int max_num_rhs = 1;
 
 
 template <typename T>
-using BicgstabOptions =
-    gko::kernels::batch_bicgstab::BicgstabOptions<T>;
+using BicgstabOptions = gko::kernels::batch_bicgstab::BicgstabOptions<T>;
 
 template <typename ValueType>
 class KernelCaller {
@@ -124,7 +123,7 @@ void apply(std::shared_ptr<const DefaultExecutor> exec,
            const batch::BatchLinOp* const precon,
            const batch::MultiVector<ValueType>* const b,
            batch::MultiVector<ValueType>* const x,
-           batch::log::BatchLogData<double>& logdata)
+           batch::log::BatchLogData<remove_complex<ValueType>>& logdata)
 {
     auto dispatcher = batch::solver::create_dispatcher<ValueType>(
         KernelCaller<ValueType>(exec, opts), opts, a, precon);
diff --git a/reference/solver/batch_bicgstab_kernels.cpp b/reference/solver/batch_bicgstab_kernels.cpp
index 88515750a1e..f0c23982bd1 100644
--- a/reference/solver/batch_bicgstab_kernels.cpp
+++ b/reference/solver/batch_bicgstab_kernels.cpp
@@ -65,8 +65,7 @@ constexpr int max_num_rhs = 1;
 
 
 template <typename T>
-using BicgstabOptions =
-    gko::kernels::batch_bicgstab::BicgstabOptions<T>;
+using BicgstabOptions = gko::kernels::batch_bicgstab::BicgstabOptions<T>;
 
 template <typename ValueType>
 class KernelCaller {
@@ -119,7 +118,7 @@ void apply(std::shared_ptr<const DefaultExecutor> exec,
            const batch::BatchLinOp* const precon,
            const batch::MultiVector<ValueType>* const b,
            batch::MultiVector<ValueType>* const x,
-           batch::log::BatchLogData<double>& logdata)
+           batch::log::BatchLogData<remove_complex<ValueType>>& logdata)
 {
     auto dispatcher = batch::solver::create_dispatcher<ValueType>(
         KernelCaller<ValueType>(exec, opts), opts, a, precon);
diff --git a/reference/test/solver/batch_bicgstab_kernels.cpp b/reference/test/solver/batch_bicgstab_kernels.cpp
index e17a7fa6ade..b7c32f31e89 100644
--- a/reference/test/solver/batch_bicgstab_kernels.cpp
+++ b/reference/test/solver/batch_bicgstab_kernels.cpp
@@ -64,7 +64,7 @@ class BatchBicgstab : public ::testing::Test {
     using MVec = gko::batch::MultiVector<value_type>;
     using RealMVec = gko::batch::MultiVector<real_type>;
     using Settings = gko::kernels::batch_bicgstab::BicgstabOptions<real_type>;
-    using LogData = gko::batch::log::BatchLogData<double>;
+    using LogData = gko::batch::log::BatchLogData<real_type>;
     using LinSys = gko::test::LinearSystem<Mtx>;
 
     BatchBicgstab()
@@ -227,7 +227,7 @@ TYPED_TEST(BatchBicgstab, ApplyLogsResAndIters)
     using real_type = gko::remove_complex<value_type>;
     using Solver = typename TestFixture::solver_type;
     using Mtx = typename TestFixture::Mtx;
-    using Logger = gko::batch::log::BatchConvergence<double>;
+    using Logger = gko::batch::log::BatchConvergence<value_type>;
     const real_type tol = 1e-5;
     const int max_iters = 1000;
     auto solver_factory =
diff --git a/test/solver/batch_bicgstab_kernels.cpp b/test/solver/batch_bicgstab_kernels.cpp
index 24322373d90..73842b8737c 100644
--- a/test/solver/batch_bicgstab_kernels.cpp
+++ b/test/solver/batch_bicgstab_kernels.cpp
@@ -63,8 +63,8 @@ class BatchBicgstab : public CommonTestFixture {
     using MVec = gko::batch::MultiVector<value_type>;
     using RealMVec = gko::batch::MultiVector<real_type>;
     using Settings = gko::kernels::batch_bicgstab::BicgstabOptions<real_type>;
-    using LogData = gko::batch::log::BatchLogData<double>;
-    using Logger = gko::batch::log::BatchConvergence<double>;
+    using LogData = gko::batch::log::BatchLogData<real_type>;
+    using Logger = gko::batch::log::BatchConvergence<real_type>;
 
     BatchBicgstab() {}
 

From 46d0a7f1ee87bcdb9ca7b555a1a2d375235b0261 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 24 Oct 2023 10:20:10 +0200
Subject: [PATCH 477/583] doc improvements and some restructuring

---
 common/cuda_hip/log/batch_logger.hpp.inc      |  25 +---
 .../preconditioner/batch_identity.hpp.inc     |  27 +---
 common/cuda_hip/stop/batch_criteria.hpp.inc   |  48 ++------
 core/solver/batch_bicgstab_kernels.hpp        | 115 ------------------
 cuda/log/batch_logger.cuh                     |   4 +-
 cuda/preconditioner/batch_preconditioners.cuh |   1 +
 cuda/stop/batch_criteria.cuh                  |   4 +-
 dpcpp/log/batch_logger.hpp                    |  19 +--
 dpcpp/preconditioner/batch_identity.hpp.inc   |  19 +--
 .../preconditioner/batch_preconditioners.hpp  |   1 +
 dpcpp/stop/batch_criteria.hpp                 |  42 +------
 hip/log/batch_logger.hip.hpp                  |   3 +
 hip/stop/batch_criteria.hip.hpp               |   2 +
 omp/solver/batch_bicgstab_kernels.cpp         |  31 +++--
 reference/log/batch_logger.hpp                |  13 +-
 reference/preconditioner/batch_identity.hpp   |  26 ++--
 reference/solver/batch_bicgstab_kernels.cpp   |  26 ++--
 reference/stop/batch_criteria.hpp             |  28 ++---
 .../test/solver/batch_bicgstab_kernels.cpp    |   8 +-
 19 files changed, 89 insertions(+), 353 deletions(-)

diff --git a/common/cuda_hip/log/batch_logger.hpp.inc b/common/cuda_hip/log/batch_logger.hpp.inc
index 70d7a9ab6f6..e8cf77960ef 100644
--- a/common/cuda_hip/log/batch_logger.hpp.inc
+++ b/common/cuda_hip/log/batch_logger.hpp.inc
@@ -30,39 +30,18 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-
-namespace batch_log {
-
-
 /**
- * Logs the final residual and iteration count for a batch solver.
- *
- * Specialized for a single RHS.
+ * @see reference/log/batch_logger.hpp
  */
 template <typename RealType>
 class SimpleFinalLogger final {
 public:
     using real_type = RealType;
 
-    /**
-     * Sets pre-allocated storage for logging.
-     *
-     * @param batch_residuals  Array of residuals norms of size
-     *                         num_batches x num_rhs. Used as row major.
-     * @param batch_iters  Array of final iteration counts for each
-     *                     linear system and each RHS in the batch.
-     */
     SimpleFinalLogger(real_type* const batch_residuals, int* const batch_iters)
         : final_residuals_{batch_residuals}, final_iters_{batch_iters}
     {}
 
-    /**
-     * Logs the iteration count and residual norm.
-     *
-     * @param batch_idx  The index of linear system in the batch to log.
-     * @param iter  The current iteration count (0-based).
-     * @param res_norm  Norm of current residual
-     */
     __device__ __forceinline__ void log_iteration(const size_type batch_idx,
                                                   const int iter,
                                                   const real_type res_norm)
@@ -75,5 +54,3 @@ private:
     real_type* const final_residuals_;
     int* const final_iters_;
 };
-
-}  // namespace batch_log
diff --git a/common/cuda_hip/preconditioner/batch_identity.hpp.inc b/common/cuda_hip/preconditioner/batch_identity.hpp.inc
index b3c25820586..1b1fb7b5482 100644
--- a/common/cuda_hip/preconditioner/batch_identity.hpp.inc
+++ b/common/cuda_hip/preconditioner/batch_identity.hpp.inc
@@ -31,50 +31,29 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
 /**
- *  Identity preconditioner for batch solvers. ( To be able to have
- * unpreconditioned solves )
+ * @see reference/preconditioner/batch_identity.hpp
  */
 template <typename ValueType>
 class Identity final {
 public:
     using value_type = ValueType;
 
-    /**
-     * The size of the work vector required in case of static allocation.
-     */
     static constexpr int work_size = 0;
 
-    /**
-     * The size of the work vector required in case of dynamic allocation.
-     *
-     * For the Identity preconditioner, this is unnecessary, but this function
-     * is part of a 'batch preconditioner interface' because other
-     * preconditioners may need it.
-     */
     __host__ __device__ static constexpr int dynamic_work_size(int, int)
     {
         return 0;
     }
 
-
-    /**
-     * Sets the input and generates the identity preconditioner.(Nothing needs
-     * to be actually generated.)
-     *
-     * @param mat  Matrix for which to build an Ideniity preconditioner.
-     * @param work  A 'work-vector', which is unneecessary here as no
-     * preconditioner values are to be stored.
-     */
     __device__ __forceinline__ void generate(
         size_type,
-        const gko::batch::matrix::ell::batch_item<const ValueType, gko::int32>&
-            mat,
+        const gko::batch::matrix::ell::batch_item<const ValueType, gko::int32>&,
         ValueType*)
     {}
 
     __device__ __forceinline__ void generate(
         size_type,
-        const gko::batch::matrix::dense::batch_item<const ValueType>& mat,
+        const gko::batch::matrix::dense::batch_item<const ValueType>&,
         ValueType*)
     {}
 
diff --git a/common/cuda_hip/stop/batch_criteria.hpp.inc b/common/cuda_hip/stop/batch_criteria.hpp.inc
index 9219d36d0f9..13449ab21bd 100644
--- a/common/cuda_hip/stop/batch_criteria.hpp.inc
+++ b/common/cuda_hip/stop/batch_criteria.hpp.inc
@@ -31,36 +31,19 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
 
-// to include "cuda|hip/matrix/batch_struct.hpp"
-// to include "cuda|hip/matrix/batch_dense_kernels.hpp"
-
-namespace stop {
-
-
+/**
+ * @see reference/stop/batch_criteria.hpp
+ */
 template <typename ValueType>
 class SimpleRelResidual {
 public:
     using real_type = remove_complex<ValueType>;
 
-    /**
-     * Set up the stopping criterion and convergence variable.
-     *
-     * @param max_iters  Maximum number of iterations allowed.
-     * @param rel_res_tol  Tolerance on relative residual norm.
-     * @param rhs_b_norms  The reference RHS norms.
-     */
     __device__ __forceinline__ SimpleRelResidual(
         const real_type rel_res_tol, const real_type* const rhs_b_norms)
         : rel_tol_{rel_res_tol}, rhs_norms_{rhs_b_norms}
     {}
 
-    /**
-     * Checks whether the right hand side has converged.
-     *
-     * @param residual_norms  Current residual norm.
-     *
-     * @return  True if RHS has converged, false otherwise.
-     */
     __device__ __forceinline__ bool check_converged(
         const real_type* const residual_norms) const
     {
@@ -72,34 +55,20 @@ private:
     const real_type* const rhs_norms_;
 };
 
+
+/**
+ * @see reference/stop/batch_criteria.hpp
+ */
 template <typename ValueType>
 class SimpleAbsResidual {
 public:
     using real_type = remove_complex<ValueType>;
 
-    /**
-     * Set up the stopping criterion and convergence variable.
-     *
-     * @param max_iters  Maximum number of iterations allowed.
-     * @param tol  Tolerance on residual norm.
-     */
     __device__ __forceinline__ SimpleAbsResidual(const real_type tol,
                                                  const real_type*)
         : abs_tol_{tol}
     {}
 
-    /**
-     * Checks whether the different right hand sides have converged.
-     *
-     * @param iter  The current iteration count.
-     * @param residual_norms  (Optional) current residual norm of each RHS.
-     * @param residual  Current residual vectors. Unused if residual_norms
-     *                  are provided.
-     * @param converged  Bits representing converged (1) or not (0) for each
-     *                   RHS. The 'right-most' bit corresponds to the first RHS.
-     *
-     * @return  True if all RHS have converged, false otherwise.
-     */
     __device__ __forceinline__ bool check_converged(
         const real_type* const residual_norms) const
     {
@@ -109,6 +78,3 @@ public:
 private:
     const real_type abs_tol_;
 };
-
-
-}  // namespace stop
diff --git a/core/solver/batch_bicgstab_kernels.hpp b/core/solver/batch_bicgstab_kernels.hpp
index d47607bee99..202a2a3dd82 100644
--- a/core/solver/batch_bicgstab_kernels.hpp
+++ b/core/solver/batch_bicgstab_kernels.hpp
@@ -87,124 +87,9 @@ template <typename ValueType>
 inline int local_memory_requirement(const int num_rows, const int num_rhs)
 {
     return (9 * num_rows * num_rhs) * sizeof(ValueType);
-    //+ 2 * num_rhs * sizeof(typename gko::remove_complex<ValueType>);
 }
 
 
-struct StorageConfig {
-    // preconditioner storage
-    bool prec_shared;
-
-    // total number of shared vectors
-    int n_shared;
-
-    // number of vectors in global memory
-    int n_global;
-
-    // global stride from one batch entry to the next
-    int gmem_stride_bytes;
-
-    // padded vector length
-    int padded_vec_len;
-};
-
-
-namespace {
-
-template <int align_bytes>
-void set_gmem_stride_bytes(StorageConfig& sconf,
-                           const int multi_vector_size_bytes,
-                           const int prec_storage_bytes)
-{
-    int gmem_stride = sconf.n_global * multi_vector_size_bytes;
-    if (!sconf.prec_shared) {
-        gmem_stride += prec_storage_bytes;
-    }
-    // align global memory chunks
-    sconf.gmem_stride_bytes =
-        gmem_stride > 0 ? ((gmem_stride - 1) / align_bytes + 1) * align_bytes
-                        : 0;
-}
-
-}  // namespace
-
-
-/**
- * Calculates the amount of in-solver storage needed by batch-Bicgstab and
- * the split between shared and global memory.
- *
- * The calculation includes multivectors for
- * - r
- * - r_hat
- * - p
- * - p_hat
- * - v
- * - s
- * - s_hat
- * - t
- * - x
- * In addition, small arrays are needed for
- * - rho_old
- * - rho_new
- * - omega
- * - alpha
- * - temp
- * - rhs_norms
- * - res_norms
- *
- * @param shared_mem_per_blk  The amount of shared memory per block to use for
- *   keeping intermediate vectors. In case keeping the matrix in L1 cache etc.
- *   should be prioritized, the cache configuration must be updated separately
- *   and the needed space should be subtracted before passing to this
- *   function.
- * @param num_rows  Size of the matrix.
- * @param num_nz  Number of nonzeros in the matrix
- * @param num_rhs  Number of right-hand-sides in the vectors.
- * @return  A struct containing allocation information specific to Bicgstab.
- */
-template <typename Prectype, typename ValueType, int align_bytes = 32>
-StorageConfig compute_shared_storage(const int shared_mem_per_blk,
-                                     const int num_rows, const int num_nz,
-                                     const int num_rhs)
-{
-    using real_type = remove_complex<ValueType>;
-    const int vec_size = num_rows * num_rhs * sizeof(ValueType);
-    const int num_priority_vecs = 4;
-    const int prec_storage =
-        Prectype::dynamic_work_size(num_rows, num_nz) * sizeof(ValueType);
-    int rem_shared = shared_mem_per_blk;
-    StorageConfig sconf{false, 0, 9, 0, num_rows};
-    if (rem_shared <= 0) {
-        set_gmem_stride_bytes<align_bytes>(sconf, vec_size, prec_storage);
-        return sconf;
-    }
-    const int initial_vecs_available = rem_shared / vec_size;
-    const int priority_available = initial_vecs_available >= num_priority_vecs
-                                       ? num_priority_vecs
-                                       : initial_vecs_available;
-    sconf.n_shared += priority_available;
-    sconf.n_global -= priority_available;
-    // for simplicity, we don't allocate anything else in shared
-    //  if all the spmv vectors were not.
-    if (priority_available < num_priority_vecs) {
-        set_gmem_stride_bytes<align_bytes>(sconf, vec_size, prec_storage);
-        return sconf;
-    }
-    rem_shared -= priority_available * vec_size;
-    if (rem_shared >= prec_storage) {
-        sconf.prec_shared = true;
-        rem_shared -= prec_storage;
-    }
-    const int shared_other_vecs =
-        rem_shared / vec_size >= 0 ? rem_shared / vec_size : 0;
-    sconf.n_shared += shared_other_vecs;
-    sconf.n_shared = min(sconf.n_shared, 9);
-    sconf.n_global -= shared_other_vecs;
-    sconf.n_global = max(sconf.n_global, 0);
-    set_gmem_stride_bytes<align_bytes>(sconf, vec_size, prec_storage);
-    return sconf;
-}
-
 }  // namespace batch_bicgstab
 
 
diff --git a/cuda/log/batch_logger.cuh b/cuda/log/batch_logger.cuh
index 09d1fe799e8..c592033219f 100644
--- a/cuda/log/batch_logger.cuh
+++ b/cuda/log/batch_logger.cuh
@@ -40,12 +40,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace gko {
 namespace kernels {
 namespace cuda {
+namespace batch_log {
 
 
 #include "common/cuda_hip/log/batch_logger.hpp.inc"
 
 
-}
+}  // namespace batch_log
+}  // namespace cuda
 }  // namespace kernels
 }  // namespace gko
 
diff --git a/cuda/preconditioner/batch_preconditioners.cuh b/cuda/preconditioner/batch_preconditioners.cuh
index e3dc3a0be3b..4f78f3cc9f9 100644
--- a/cuda/preconditioner/batch_preconditioners.cuh
+++ b/cuda/preconditioner/batch_preconditioners.cuh
@@ -54,4 +54,5 @@ namespace cuda {
 }  // namespace kernels
 }  // namespace gko
 
+
 #endif  // GKO_CUDA_PRECONDITIONER_BATCH_PRECONDITIONERS_CUH_
diff --git a/cuda/stop/batch_criteria.cuh b/cuda/stop/batch_criteria.cuh
index 242a6e824bf..19ab52a31fc 100644
--- a/cuda/stop/batch_criteria.cuh
+++ b/cuda/stop/batch_criteria.cuh
@@ -40,12 +40,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace gko {
 namespace kernels {
 namespace cuda {
+namespace stop {
 
 
 #include "common/cuda_hip/stop/batch_criteria.hpp.inc"
 
 
-}
+}  // namespace stop
+}  // namespace cuda
 }  // namespace kernels
 }  // namespace gko
 
diff --git a/dpcpp/log/batch_logger.hpp b/dpcpp/log/batch_logger.hpp
index 8ed60495dd2..c686a2ef9f9 100644
--- a/dpcpp/log/batch_logger.hpp
+++ b/dpcpp/log/batch_logger.hpp
@@ -51,34 +51,17 @@ namespace batch_log {
 
 
 /**
- * Logs the final residual and iteration count for a batch solver.
- *
- * Specialized for a single RHS.
+ * @see reference/log/batch_logger.hpp
  */
 template <typename RealType>
 class SimpleFinalLogger final {
 public:
     using real_type = RealType;
 
-    /**
-     * Sets pre-allocated storage for logging.
-     *
-     * @param batch_residuals  Array of residuals norms of size
-     *                         num_batches x num_rhs. Used as row major.
-     * @param batch_iters  Array of final iteration counts for each
-     *                     linear system and each RHS in the batch.
-     */
     SimpleFinalLogger(real_type* const batch_residuals, int* const batch_iters)
         : final_residuals_{batch_residuals}, final_iters_{batch_iters}
     {}
 
-    /**
-     * Logs the iteration count and residual norm.
-     *
-     * @param batch_idx  The index of linear system in the batch to log.
-     * @param iter  The current iteration count (0-based).
-     * @param res_norm  Norm of current residual
-     */
     __dpct_inline__ void log_iteration(const size_type batch_idx,
                                        const int iter, const real_type res_norm)
     {
diff --git a/dpcpp/preconditioner/batch_identity.hpp.inc b/dpcpp/preconditioner/batch_identity.hpp.inc
index 5b76ca63a6a..f92c4814df5 100644
--- a/dpcpp/preconditioner/batch_identity.hpp.inc
+++ b/dpcpp/preconditioner/batch_identity.hpp.inc
@@ -30,35 +30,18 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-
 /**
- *  Identity preconditioner for batch solvers. ( To be able to have
- * unpreconditioned solves )
+ * @see reference/preconditioner/batch_identity.hpp
  */
 template <typename ValueType>
 class Identity final {
 public:
     using value_type = ValueType;
 
-    /**
-     * The size of the work vector required in case of static allocation.
-     */
     static constexpr int work_size = 0;
 
-    /**
-     * The size of the work vector required in case of dynamic allocation.
-     */
     static int dynamic_work_size(int, int) { return 0; }
 
-
-    /**
-     * Sets the input and generates the identity preconditioner.(Nothing needs
-     * to be actually generated.)
-     *
-     * @param mat  Matrix for which to build an Ideniity preconditioner.
-     * @param work  A 'work-vector', which is unneecessary here as no
-     * preconditioner values are to be stored.
-     */
     void generate(size_type batch_id,
                   const gko::batch::matrix::ell::batch_item<const ValueType, const gko::int32>&,
                   ValueType* const, sycl::nd_item<3> item_ct1)
diff --git a/dpcpp/preconditioner/batch_preconditioners.hpp b/dpcpp/preconditioner/batch_preconditioners.hpp
index 51865edb64a..9eef3cdeb6f 100644
--- a/dpcpp/preconditioner/batch_preconditioners.hpp
+++ b/dpcpp/preconditioner/batch_preconditioners.hpp
@@ -52,4 +52,5 @@ namespace dpcpp {
 }  // namespace kernels
 }  // namespace gko
 
+
 #endif  // GKO_DPCPP_PRECONDITIONER_BATCH_PRECONDITIONERS_HPP_
diff --git a/dpcpp/stop/batch_criteria.hpp b/dpcpp/stop/batch_criteria.hpp
index 843c8077678..df8558897ae 100644
--- a/dpcpp/stop/batch_criteria.hpp
+++ b/dpcpp/stop/batch_criteria.hpp
@@ -51,35 +51,18 @@ namespace stop {
 
 
 /**
- * Stopping criterion for batch solvers that combines a maximum iteration
- * count and relative residual threshold.
- *
- * Supports only one right hand side.
+ * @see reference/stop/batch_criteria.hpp
  */
 template <typename ValueType>
 class SimpleRelResidual {
 public:
     using real_type = remove_complex<ValueType>;
 
-    /**
-     * Set up the stopping criterion and convergence variable.
-     *
-     * @param max_iters  Maximum number of iterations allowed.
-     * @param rel_res_tol  Tolerance on relative residual norm.
-     * @param rhs_b_norms  The dpcpp RHS norms.
-     */
     SimpleRelResidual(const real_type rel_res_tol,
                       const real_type* const rhs_b_norms)
         : rel_tol_{rel_res_tol}, rhs_norms_{rhs_b_norms}
     {}
 
-    /**
-     * Checks whether the right hand side has converged.
-     *
-     * @param residual_norms  Current residual norm.
-     *
-     * @return  True if RHS has converged, false otherwise.
-     */
     __dpct_inline__ bool check_converged(
         const real_type* const residual_norms) const
     {
@@ -93,36 +76,15 @@ class SimpleRelResidual {
 
 
 /**
- * Stopping criterion for batch solvers that combines a maximum iteration
- * count and absolute residual threshold.
- *
- * Supports only one right hand side.
+ * @see reference/stop/batch_criteria.hpp
  */
 template <typename ValueType>
 class SimpleAbsResidual {
 public:
     using real_type = remove_complex<ValueType>;
 
-    /**
-     * Set up the stopping criterion and convergence variable.
-     *
-     * @param max_iters  Maximum number of iterations allowed.
-     * @param tol  Tolerance on residual norm.
-     */
     SimpleAbsResidual(const real_type tol, const real_type*) : abs_tol_{tol} {}
 
-    /**
-     * Checks whether the different right hand sides have converged.
-     *
-     * @param iter  The current iteration count.
-     * @param residual_norms  (Optional) current residual norm of each RHS.
-     * @param residual  Current residual vectors. Unused if residual_norms
-     *                  are provided.
-     * @param converged  Bits representing converged (1) or not (0) for each
-     *                   RHS. The 'right-most' bit corresponds to the first RHS.
-     *
-     * @return  True if all RHS have converged, false otherwise.
-     */
     __dpct_inline__ bool check_converged(
         const real_type* const residual_norms) const
     {
diff --git a/hip/log/batch_logger.hip.hpp b/hip/log/batch_logger.hip.hpp
index 4962bfb3f21..3f655184307 100644
--- a/hip/log/batch_logger.hip.hpp
+++ b/hip/log/batch_logger.hip.hpp
@@ -40,12 +40,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace gko {
 namespace kernels {
 namespace hip {
+namespace batch_log {
 
 #include "common/cuda_hip/log/batch_logger.hpp.inc"
 
 
+}  // namespace batch_log
 }  // namespace hip
 }  // namespace kernels
 }  // namespace gko
 
+
 #endif  // GKO_HIP_LOG_BATCH_LOGGERS_HIP_HPP_
diff --git a/hip/stop/batch_criteria.hip.hpp b/hip/stop/batch_criteria.hip.hpp
index d6456479db3..0aa45b41f03 100644
--- a/hip/stop/batch_criteria.hip.hpp
+++ b/hip/stop/batch_criteria.hip.hpp
@@ -40,11 +40,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace gko {
 namespace kernels {
 namespace hip {
+namespace stop {
 
 
 #include "common/cuda_hip/stop/batch_criteria.hpp.inc"
 
 
+}  // namespace stop
 }  // namespace hip
 }  // namespace kernels
 }  // namespace gko
diff --git a/omp/solver/batch_bicgstab_kernels.cpp b/omp/solver/batch_bicgstab_kernels.cpp
index 13a0c54b60f..2a15a9aab50 100644
--- a/omp/solver/batch_bicgstab_kernels.cpp
+++ b/omp/solver/batch_bicgstab_kernels.cpp
@@ -39,8 +39,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace gko {
 namespace kernels {
 namespace omp {
-
-
 /**
  * @brief The batch Bicgstab solver namespace.
  *
@@ -67,6 +65,7 @@ constexpr int max_num_rhs = 1;
 template <typename T>
 using BicgstabOptions = gko::kernels::batch_bicgstab::BicgstabOptions<T>;
 
+
 template <typename ValueType>
 class KernelCaller {
 public:
@@ -75,37 +74,37 @@ class KernelCaller {
         : exec_{std::move(exec)}, opts_{opts}
     {}
 
-    template <typename BatchMatrixType, typename PrecType, typename StopType,
+    template <typename BatchMatrixType, typename PrecondType, typename StopType,
               typename LogType>
     void call_kernel(
-        const LogType& logger, const BatchMatrixType& a, PrecType prec,
+        const LogType& logger, const BatchMatrixType& mat, PrecondType precond,
         const gko::batch::multi_vector::uniform_batch<const ValueType>& b,
         const gko::batch::multi_vector::uniform_batch<ValueType>& x) const
     {
         using real_type = typename gko::remove_complex<ValueType>;
-        const size_type nbatch = a.num_batch_items;
-        const auto nrows = a.num_rows;
-        const auto nrhs = b.num_rhs;
-        if (nrhs > 1) {
+        const size_type num_batch_items = mat.num_batch_items;
+        const auto num_rows = mat.num_rows;
+        const auto num_rhs = b.num_rhs;
+        if (num_rhs > 1) {
             GKO_NOT_IMPLEMENTED;
         }
 
         const int local_size_bytes =
             gko::kernels::batch_bicgstab::local_memory_requirement<ValueType>(
-                nrows, nrhs) +
-            PrecType::dynamic_work_size(nrows, a.get_num_nnz()) *
+                num_rows, num_rhs) +
+            PrecondType::dynamic_work_size(num_rows, mat.get_num_nnz()) *
                 sizeof(ValueType);
 
 #pragma omp parallel for firstprivate(logger)
-        for (size_type ibatch = 0; ibatch < nbatch; ibatch++) {
+        for (size_type batch_id = 0; batch_id < num_batch_items; batch_id++) {
             // TODO: Align to cache line boundary
             // TODO: Allocate and free once per thread rather than once per
             // work-item.
             const auto local_space =
                 static_cast<unsigned char*>(malloc(local_size_bytes));
-            batch_entry_bicgstab_impl<StopType, PrecType, LogType,
+            batch_entry_bicgstab_impl<StopType, PrecondType, LogType,
                                       BatchMatrixType, ValueType>(
-                opts_, logger, prec, a, b, x, ibatch, local_space);
+                opts_, logger, precond, mat, b, x, batch_id, local_space);
             free(local_space);
         }
     }
@@ -119,14 +118,14 @@ class KernelCaller {
 template <typename ValueType>
 void apply(std::shared_ptr<const DefaultExecutor> exec,
            const BicgstabOptions<remove_complex<ValueType>>& opts,
-           const batch::BatchLinOp* const a,
-           const batch::BatchLinOp* const precon,
+           const batch::BatchLinOp* const mat,
+           const batch::BatchLinOp* const precond,
            const batch::MultiVector<ValueType>* const b,
            batch::MultiVector<ValueType>* const x,
            batch::log::BatchLogData<remove_complex<ValueType>>& logdata)
 {
     auto dispatcher = batch::solver::create_dispatcher<ValueType>(
-        KernelCaller<ValueType>(exec, opts), opts, a, precon);
+        KernelCaller<ValueType>(exec, opts), opts, mat, precond);
     dispatcher.apply(b, x, logdata);
 }
 
diff --git a/reference/log/batch_logger.hpp b/reference/log/batch_logger.hpp
index 5a9e0f78970..e9dadb56ddc 100644
--- a/reference/log/batch_logger.hpp
+++ b/reference/log/batch_logger.hpp
@@ -46,7 +46,7 @@ namespace batch_log {
 /**
  * Logs the final residual and iteration count for a batch solver.
  *
- * Specialized for a single RHS.
+ * @note Supports only a single RHS per batch item.
  */
 template <typename RealType>
 class SimpleFinalLogger final {
@@ -54,12 +54,12 @@ class SimpleFinalLogger final {
     using real_type = RealType;
 
     /**
-     * Sets pre-allocated storage for logging.
+     * Constructor
      *
-     * @param batch_residuals  Array of residuals norms of size
-     *                         num_batches x num_rhs. Used as row major.
-     * @param batch_iters  Array of final iteration counts for each
-     *                     linear system and each RHS in the batch.
+     * @param batch_residuals  residuals norms of size
+     *                         num_batch_items.
+     * @param batch_iters  final iteration counts for each
+     *                     linear system in the batch.
      */
     SimpleFinalLogger(real_type* const batch_residuals, int* const batch_iters)
         : final_residuals_{batch_residuals}, final_iters_{batch_iters}
@@ -90,4 +90,5 @@ class SimpleFinalLogger final {
 }  // namespace kernels
 }  // namespace gko
 
+
 #endif  // GKO_REFERENCE_LOG_BATCH_LOGGER_HPP_
diff --git a/reference/preconditioner/batch_identity.hpp b/reference/preconditioner/batch_identity.hpp
index 5484ae5c31f..3cdb78bb5a8 100644
--- a/reference/preconditioner/batch_identity.hpp
+++ b/reference/preconditioner/batch_identity.hpp
@@ -43,8 +43,9 @@ namespace host {
 
 
 /**
- *  Identity preconditioner for batch solvers. ( To be able to have
- * unpreconditioned solves )
+ * Identity preconditioner for batch solvers. Enables unpreconditioned solves
+ * by performing a copy of the preconditioned vector to the un-preconditioned
+ * vector.
  */
 template <typename ValueType>
 class Identity final {
@@ -61,27 +62,25 @@ class Identity final {
      */
     static int dynamic_work_size(int, int) { return 0; }
 
-
     /**
      * Sets the input and generates the identity preconditioner.(Nothing needs
      * to be actually generated.)
-     *
-     * @param mat  Matrix for which to build an Ideniity preconditioner.
-     * @param work  A 'work-vector', which is unneecessary here as no
-     * preconditioner values are to be stored.
      */
     void generate(size_type,
                   const gko::batch::matrix::ell::batch_item<const ValueType,
-                                                            const int32>& mat,
-                  ValueType* const work)
+                                                            const int32>&,
+                  ValueType* const)
     {}
 
-    void generate(
-        size_type,
-        const gko::batch::matrix::dense::batch_item<const ValueType>& mat,
-        ValueType* const work)
+    void generate(size_type,
+                  const gko::batch::matrix::dense::batch_item<const ValueType>&,
+                  ValueType* const)
     {}
 
+    /**
+     * Applies the preconditioner to the vector. For the identity
+     * preconditioner, this is equivalent to a copy.
+     */
     void apply(const gko::batch::multi_vector::batch_item<const ValueType>& r,
                const gko::batch::multi_vector::batch_item<ValueType>& z) const
     {
@@ -97,4 +96,5 @@ class Identity final {
 }  // namespace kernels
 }  // namespace gko
 
+
 #endif  // GKO_REFERENCE_PRECONDITIONER_BATCH_IDENTITY_HPP_
diff --git a/reference/solver/batch_bicgstab_kernels.cpp b/reference/solver/batch_bicgstab_kernels.cpp
index f0c23982bd1..f3e59e8a0dc 100644
--- a/reference/solver/batch_bicgstab_kernels.cpp
+++ b/reference/solver/batch_bicgstab_kernels.cpp
@@ -67,6 +67,7 @@ constexpr int max_num_rhs = 1;
 template <typename T>
 using BicgstabOptions = gko::kernels::batch_bicgstab::BicgstabOptions<T>;
 
+
 template <typename ValueType>
 class KernelCaller {
 public:
@@ -78,30 +79,29 @@ class KernelCaller {
     template <typename BatchMatrixType, typename PrecType, typename StopType,
               typename LogType>
     void call_kernel(
-        const LogType& logger, const BatchMatrixType& a, PrecType prec,
+        const LogType& logger, const BatchMatrixType& mat, PrecType prec,
         const gko::batch::multi_vector::uniform_batch<const ValueType>& b,
         const gko::batch::multi_vector::uniform_batch<ValueType>& x) const
     {
         using real_type = typename gko::remove_complex<ValueType>;
-        const size_type nbatch = a.num_batch_items;
-        const auto nrows = a.num_rows;
-        const auto nrhs = b.num_rhs;
-        if (nrhs > 1) {
+        const size_type num_batch_items = mat.num_batch_items;
+        const auto num_rows = mat.num_rows;
+        const auto num_rhs = b.num_rhs;
+        if (num_rhs > 1) {
             GKO_NOT_IMPLEMENTED;
         }
 
-        const int local_size_bytes =
+        const size_type local_size_bytes =
             gko::kernels::batch_bicgstab::local_memory_requirement<ValueType>(
-                nrows, nrhs) +
-            PrecType::dynamic_work_size(nrows, a.get_num_nnz()) *
+                num_rows, num_rhs) +
+            PrecType::dynamic_work_size(num_rows, mat.get_num_nnz()) *
                 sizeof(ValueType);
-        // array<unsigned char> local_space(exec_, local_size_bytes);
         std::vector<unsigned char> local_space(local_size_bytes);
 
-        for (size_type ibatch = 0; ibatch < nbatch; ibatch++) {
+        for (size_type batch_id = 0; batch_id < num_batch_items; batch_id++) {
             batch_entry_bicgstab_impl<StopType, PrecType, LogType,
                                       BatchMatrixType, ValueType>(
-                opts_, logger, prec, a, b, x, ibatch, local_space.data());
+                opts_, logger, prec, mat, b, x, batch_id, local_space.data());
         }
     }
 
@@ -114,14 +114,14 @@ class KernelCaller {
 template <typename ValueType>
 void apply(std::shared_ptr<const DefaultExecutor> exec,
            const BicgstabOptions<remove_complex<ValueType>>& opts,
-           const batch::BatchLinOp* const a,
+           const batch::BatchLinOp* const mat,
            const batch::BatchLinOp* const precon,
            const batch::MultiVector<ValueType>* const b,
            batch::MultiVector<ValueType>* const x,
            batch::log::BatchLogData<remove_complex<ValueType>>& logdata)
 {
     auto dispatcher = batch::solver::create_dispatcher<ValueType>(
-        KernelCaller<ValueType>(exec, opts), opts, a, precon);
+        KernelCaller<ValueType>(exec, opts), opts, mat, precon);
     dispatcher.apply(b, x, logdata);
 }
 
diff --git a/reference/stop/batch_criteria.hpp b/reference/stop/batch_criteria.hpp
index 56e63c66101..b18916c0eee 100644
--- a/reference/stop/batch_criteria.hpp
+++ b/reference/stop/batch_criteria.hpp
@@ -46,10 +46,9 @@ namespace stop {
 
 
 /**
- * Stopping criterion for batch solvers that combines a maximum iteration
- * count and relative residual threshold.
+ * Stopping criterion for batch solvers with relative residual threshold.
  *
- * Supports only one right hand side.
+ * @note Supports only one right hand side.
  */
 template <typename ValueType>
 class SimpleRelResidual {
@@ -59,9 +58,8 @@ class SimpleRelResidual {
     /**
      * Set up the stopping criterion and convergence variable.
      *
-     * @param max_iters  Maximum number of iterations allowed.
      * @param rel_res_tol  Tolerance on relative residual norm.
-     * @param rhs_b_norms  The reference RHS norms.
+     * @param rhs_b_norms  The RHS norms.
      */
     SimpleRelResidual(const real_type rel_res_tol,
                       const real_type* const rhs_b_norms)
@@ -73,7 +71,7 @@ class SimpleRelResidual {
      *
      * @param residual_norms  Current residual norm.
      *
-     * @return  True if RHS has converged, false otherwise.
+     * @return  true if converged, false otherwise.
      */
     bool check_converged(const real_type* const residual_norms) const
     {
@@ -87,10 +85,10 @@ class SimpleRelResidual {
 
 
 /**
- * Stopping criterion for batch solvers that combines a maximum iteration
- * count and absolute residual threshold.
+ * Stopping criterion for batch solvers that checks for an absolute residual
+ * threshold.
  *
- * Supports only one right hand side.
+ * @note Supports only one right hand side.
  */
 template <typename ValueType>
 class SimpleAbsResidual {
@@ -100,22 +98,16 @@ class SimpleAbsResidual {
     /**
      * Set up the stopping criterion and convergence variable.
      *
-     * @param max_iters  Maximum number of iterations allowed.
      * @param tol  Tolerance on residual norm.
+     * @param dummy  for uniform creation of stopping criteria.
      */
     SimpleAbsResidual(const real_type tol, const real_type*) : abs_tol_{tol} {}
 
     /**
      * Checks whether the different right hand sides have converged.
      *
-     * @param iter  The current iteration count.
-     * @param residual_norms  (Optional) current residual norm of each RHS.
-     * @param residual  Current residual vectors. Unused if residual_norms
-     *                  are provided.
-     * @param converged  Bits representing converged (1) or not (0) for each
-     *                   RHS. The 'right-most' bit corresponds to the first RHS.
-     *
-     * @return  True if all RHS have converged, false otherwise.
+     * @param residual_norms  current residual norm of each RHS.
+     * @return  true if converged, false otherwise.
      */
     bool check_converged(const real_type* const residual_norms) const
     {
diff --git a/reference/test/solver/batch_bicgstab_kernels.cpp b/reference/test/solver/batch_bicgstab_kernels.cpp
index b7c32f31e89..937ddf40f34 100644
--- a/reference/test/solver/batch_bicgstab_kernels.cpp
+++ b/reference/test/solver/batch_bicgstab_kernels.cpp
@@ -290,7 +290,7 @@ TYPED_TEST(BatchBicgstab, CanSolveEllSystem)
     for (size_t i = 0; i < num_batch_items; i++) {
         ASSERT_LE(res.res_norm->get_const_values()[i] /
                       linear_system.rhs_norm->get_const_values()[i],
-                  tol);
+                  tol * 10);
     }
 }
 
@@ -307,7 +307,7 @@ TYPED_TEST(BatchBicgstab, CanSolveDenseHpdSystem)
         Solver::build()
             .with_default_max_iterations(max_iters)
             .with_default_residual_tol(tol)
-            .with_tolerance_type(gko::batch::stop::ToleranceType::relative)
+            .with_tolerance_type(gko::batch::stop::ToleranceType::absolute)
             .on(this->exec);
     const int num_rows = 65;
     const gko::size_type num_batch_items = 5;
@@ -321,8 +321,6 @@ TYPED_TEST(BatchBicgstab, CanSolveDenseHpdSystem)
 
     GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol * 50);
     for (size_t i = 0; i < num_batch_items; i++) {
-        ASSERT_LE(res.res_norm->get_const_values()[i] /
-                      linear_system.rhs_norm->get_const_values()[i],
-                  tol * 10);
+        ASSERT_LE(res.res_norm->get_const_values()[i], tol * 10);
     }
 }

From c82db2332f7fc65a160418e56b89f725a5bc521c Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 24 Oct 2023 11:44:31 +0200
Subject: [PATCH 478/583] formatting and renames

---
 core/solver/batch_dispatch.hpp            | 122 +++++++++++++---------
 cuda/solver/batch_bicgstab_kernels.cu     |   6 --
 hip/solver/batch_bicgstab_kernels.hip.cpp |   6 --
 3 files changed, 74 insertions(+), 60 deletions(-)

diff --git a/core/solver/batch_dispatch.hpp b/core/solver/batch_dispatch.hpp
index 18a12c34d5b..af1d1b59f2f 100644
--- a/core/solver/batch_dispatch.hpp
+++ b/core/solver/batch_dispatch.hpp
@@ -46,6 +46,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #if defined GKO_COMPILING_CUDA
 
+
 #include "cuda/base/batch_struct.hpp"
 #include "cuda/components/cooperative_groups.cuh"
 #include "cuda/log/batch_logger.cuh"
@@ -53,21 +54,27 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "cuda/preconditioner/batch_preconditioners.cuh"
 #include "cuda/stop/batch_criteria.cuh"
 
+
 namespace gko {
 namespace batch {
 namespace solver {
 
+
 namespace device = gko::kernels::cuda;
 
+
 template <typename ValueType>
 using DeviceValueType = typename gko::kernels::cuda::cuda_type<ValueType>;
 
+
 }  // namespace solver
 }  // namespace batch
 }  // namespace gko
 
+
 #elif defined GKO_COMPILING_HIP
 
+
 #include "hip/base/batch_struct.hip.hpp"
 #include "hip/components/cooperative_groups.hip.hpp"
 #include "hip/log/batch_logger.hip.hpp"
@@ -75,19 +82,24 @@ using DeviceValueType = typename gko::kernels::cuda::cuda_type<ValueType>;
 #include "hip/preconditioner/batch_preconditioners.hip.hpp"
 #include "hip/stop/batch_criteria.hip.hpp"
 
+
 namespace gko {
 namespace batch {
 namespace solver {
 
+
 namespace device = gko::kernels::hip;
 
+
 template <typename ValueType>
 using DeviceValueType = gko::kernels::hip::hip_type<ValueType>;
 
+
 }  // namespace solver
 }  // namespace batch
 }  // namespace gko
 
+
 #elif defined GKO_COMPILING_DPCPP
 
 
@@ -102,48 +114,60 @@ namespace gko {
 namespace batch {
 namespace solver {
 
+
 namespace device = gko::kernels::dpcpp;
 
+
 template <typename ValueType>
 using DeviceValueType = ValueType;
 
+
 }  // namespace solver
 }  // namespace batch
 }  // namespace gko
 
+
 #else
 
+
 #include "reference/base/batch_struct.hpp"
 #include "reference/log/batch_logger.hpp"
 #include "reference/matrix/batch_struct.hpp"
 #include "reference/preconditioner/batch_identity.hpp"
 #include "reference/stop/batch_criteria.hpp"
 
+
 namespace gko {
 namespace batch {
 namespace solver {
 
+
 namespace device = gko::kernels::host;
 
+
 template <typename ValueType>
 using DeviceValueType = ValueType;
 
+
 }  // namespace solver
 }  // namespace batch
 }  // namespace gko
 
+
 #endif
 
+
 namespace gko {
 namespace batch {
 namespace solver {
 
+
 template <typename DValueType>
 class DummyKernelCaller {
 public:
     template <typename BatchMatrixType, typename PrecType, typename StopType,
               typename LogType>
-    void call_kernel(LogType logger, const BatchMatrixType& a,
+    void call_kernel(LogType logger, const BatchMatrixType& mat,
                      const multi_vector::uniform_batch<DValueType>& b,
                      const multi_vector::uniform_batch<DValueType>& x) const
     {}
@@ -157,47 +181,47 @@ class DummyKernelCaller {
  * @tparam KernelCaller  Class with an interface like DummyKernelCaller,
  *   that is responsible for finally calling the templated backend-specific
  *   kernel.
- * @tparam OptsType  Structure type of options for the particular solver to be
- *   used.
+ * @tparam SettingsType  Structure type of options for the particular solver to
+ * be used.
  * @tparam ValueType  The user-facing value type.
- * @tparam DevValueType  The backend-specific value type corresponding to
- *   ValueType.
  */
-template <typename KernelCaller, typename OptsType, typename ValueType>
+template <typename KernelCaller, typename SettingsType, typename ValueType>
 class BatchSolverDispatch {
 public:
     using value_type = ValueType;
     using device_value_type = DeviceValueType<ValueType>;
     using res_norm_type = remove_complex<value_type>;
 
-    BatchSolverDispatch(const KernelCaller& kernel_caller, const OptsType& opts,
+    BatchSolverDispatch(const KernelCaller& kernel_caller,
+                        const SettingsType& settings,
                         const BatchLinOp* const matrix,
                         const BatchLinOp* const preconditioner,
                         const log::BatchLogType logger_type =
                             log::BatchLogType::simple_convergence_completion)
         : caller_{kernel_caller},
-          opts_{opts},
-          a_{matrix},
-          precon_{preconditioner},
+          settings_{settings},
+          mat_{matrix},
+          precond_{preconditioner},
           logger_type_{logger_type}
     {}
 
     template <typename PrecType, typename BatchMatrixType, typename LogType>
     void dispatch_on_stop(
-        const LogType& logger, const BatchMatrixType& amat, PrecType prec,
-        const multi_vector::uniform_batch<const device_value_type>& b_b,
-        const multi_vector::uniform_batch<device_value_type>& x_b)
+        const LogType& logger, const BatchMatrixType& mat_item,
+        PrecType precond,
+        const multi_vector::uniform_batch<const device_value_type>& b_item,
+        const multi_vector::uniform_batch<device_value_type>& x_item)
     {
-        if (opts_.tol_type == stop::ToleranceType::absolute) {
+        if (settings_.tol_type == stop::ToleranceType::absolute) {
             caller_.template call_kernel<
                 BatchMatrixType, PrecType,
                 device::stop::SimpleAbsResidual<device_value_type>, LogType>(
-                logger, amat, prec, b_b, x_b);
-        } else if (opts_.tol_type == stop::ToleranceType::relative) {
+                logger, mat_item, precond, b_item, x_item);
+        } else if (settings_.tol_type == stop::ToleranceType::relative) {
             caller_.template call_kernel<
                 BatchMatrixType, PrecType,
                 device::stop::SimpleRelResidual<device_value_type>, LogType>(
-                logger, amat, prec, b_b, x_b);
+                logger, mat_item, precond, b_item, x_item);
         } else {
             GKO_NOT_IMPLEMENTED;
         }
@@ -205,14 +229,15 @@ class BatchSolverDispatch {
 
     template <typename BatchMatrixType, typename LogType>
     void dispatch_on_preconditioner(
-        const LogType& logger, const BatchMatrixType& amat,
-        const multi_vector::uniform_batch<const device_value_type>& b_b,
-        const multi_vector::uniform_batch<device_value_type>& x_b)
+        const LogType& logger, const BatchMatrixType& mat_item,
+        const multi_vector::uniform_batch<const device_value_type>& b_item,
+        const multi_vector::uniform_batch<device_value_type>& x_item)
     {
-        if (!precon_ ||
-            dynamic_cast<const matrix::Identity<value_type>*>(precon_)) {
+        if (!precond_ ||
+            dynamic_cast<const matrix::Identity<value_type>*>(precond_)) {
             dispatch_on_stop<device::Identity<device_value_type>>(
-                logger, amat, device::Identity<device_value_type>(), b_b, x_b);
+                logger, mat_item, device::Identity<device_value_type>(), b_item,
+                x_item);
         } else {
             GKO_NOT_IMPLEMENTED;
         }
@@ -221,15 +246,15 @@ class BatchSolverDispatch {
     template <typename BatchMatrixType>
     void dispatch_on_logger(
         const BatchMatrixType& amat,
-        const multi_vector::uniform_batch<const device_value_type>& b_b,
-        const multi_vector::uniform_batch<device_value_type>& x_b,
+        const multi_vector::uniform_batch<const device_value_type>& b_item,
+        const multi_vector::uniform_batch<device_value_type>& x_item,
         log::BatchLogData<res_norm_type>& logdata)
     {
         if (logger_type_ == log::BatchLogType::simple_convergence_completion) {
             device::batch_log::SimpleFinalLogger<res_norm_type> logger(
                 logdata.res_norms->get_values(),
                 logdata.iter_counts.get_data());
-            dispatch_on_preconditioner(logger, amat, b_b, x_b);
+            dispatch_on_preconditioner(logger, amat, b_item, x_item);
         } else {
             GKO_NOT_IMPLEMENTED;
         }
@@ -238,35 +263,36 @@ class BatchSolverDispatch {
     /**
      * Solves a linear system from the given data and kernel caller.
      *
-     * Note: The correct backend-specific get_batch_struct function needs to be
+     * @note The correct backend-specific get_batch_struct function needs to be
      * available in the current scope.
      */
     void apply(const MultiVector<ValueType>* const b,
                MultiVector<ValueType>* const x,
                log::BatchLogData<res_norm_type>& logdata)
     {
-        const auto x_b = device::get_batch_struct(x);
-        const auto b_b = device::get_batch_struct(b);
-
-        if (auto amat =
-                dynamic_cast<const batch::matrix::Ell<ValueType, int32>*>(a_)) {
-            auto m_b = device::get_batch_struct(amat);
-            dispatch_on_logger(m_b, b_b, x_b, logdata);
-        } else if (auto amat =
+        const auto x_item = device::get_batch_struct(x);
+        const auto b_item = device::get_batch_struct(b);
+
+        if (auto batch_mat =
+                dynamic_cast<const batch::matrix::Ell<ValueType, int32>*>(
+                    mat_)) {
+            auto mat_item = device::get_batch_struct(batch_mat);
+            dispatch_on_logger(mat_item, b_item, x_item, logdata);
+        } else if (auto batch_mat =
                        dynamic_cast<const batch::matrix::Dense<ValueType>*>(
-                           a_)) {
-            auto m_b = device::get_batch_struct(amat);
-            dispatch_on_logger(m_b, b_b, x_b, logdata);
+                           mat_)) {
+            auto mat_item = device::get_batch_struct(batch_mat);
+            dispatch_on_logger(mat_item, b_item, x_item, logdata);
         } else {
-            GKO_NOT_SUPPORTED(a_);
+            GKO_NOT_SUPPORTED(mat_);
         }
     }
 
 private:
     const KernelCaller caller_;
-    const OptsType opts_;
-    const BatchLinOp* a_;
-    const BatchLinOp* precon_;
+    const SettingsType settings_;
+    const BatchLinOp* mat_;
+    const BatchLinOp* precond_;
     const log::BatchLogType logger_type_;
 };
 
@@ -274,15 +300,15 @@ class BatchSolverDispatch {
 /**
  * Convenient function to create a dispatcher. Infers most template arguments.
  */
-template <typename ValueType, typename KernelCaller, typename OptsType>
-BatchSolverDispatch<KernelCaller, OptsType, ValueType> create_dispatcher(
-    const KernelCaller& kernel_caller, const OptsType& opts,
-    const BatchLinOp* const a, const BatchLinOp* const preconditioner,
+template <typename ValueType, typename KernelCaller, typename SettingsType>
+BatchSolverDispatch<KernelCaller, SettingsType, ValueType> create_dispatcher(
+    const KernelCaller& kernel_caller, const SettingsType& settings,
+    const BatchLinOp* const matrix, const BatchLinOp* const preconditioner,
     const log::BatchLogType logger_type =
         log::BatchLogType::simple_convergence_completion)
 {
-    return BatchSolverDispatch<KernelCaller, OptsType, ValueType>(
-        kernel_caller, opts, a, preconditioner, logger_type);
+    return BatchSolverDispatch<KernelCaller, SettingsType, ValueType>(
+        kernel_caller, settings, matrix, preconditioner, logger_type);
 }
 
 
diff --git a/cuda/solver/batch_bicgstab_kernels.cu b/cuda/solver/batch_bicgstab_kernels.cu
index 5ec31f5ba58..b9b128f54c4 100644
--- a/cuda/solver/batch_bicgstab_kernels.cu
+++ b/cuda/solver/batch_bicgstab_kernels.cu
@@ -48,12 +48,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace gko {
 namespace kernels {
 namespace cuda {
-
-
-// NOTE: this default block size is not used for the main solver kernel.
-constexpr int default_block_size = 256;
-constexpr int sm_oversubscription = 4;
-
 /**
  * @brief The batch Bicgstab solver namespace.
  *
diff --git a/hip/solver/batch_bicgstab_kernels.hip.cpp b/hip/solver/batch_bicgstab_kernels.hip.cpp
index c88573429b8..5c07aa9f6ed 100644
--- a/hip/solver/batch_bicgstab_kernels.hip.cpp
+++ b/hip/solver/batch_bicgstab_kernels.hip.cpp
@@ -49,12 +49,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace gko {
 namespace kernels {
 namespace hip {
-
-
-// NOTE: this default block size is not used for the main solver kernel.
-constexpr int default_block_size = 256;
-constexpr int sm_oversubscription = 4;
-
 /**
  * @brief The batch Bicgstab solver namespace.
  *

From 3b3a593f97a2e9027981d9dfdc61e934884e4e27 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 24 Oct 2023 16:37:34 +0200
Subject: [PATCH 479/583] generic logdata improvements

---
 core/log/batch_logger.cpp                     |  4 +-
 core/solver/batch_bicgstab.cpp                | 21 ++++----
 core/solver/batch_bicgstab_kernels.hpp        |  6 +--
 core/solver/batch_dispatch.hpp                | 15 +++---
 core/test/utils/batch_helpers.hpp             | 24 ++++-----
 cuda/solver/batch_bicgstab_kernels.cu         |  4 +-
 dpcpp/solver/batch_bicgstab_kernels.dp.cpp    | 10 +---
 hip/solver/batch_bicgstab_kernels.hip.cpp     |  4 +-
 include/ginkgo/core/log/batch_logger.hpp      | 52 ++++++++++---------
 include/ginkgo/core/log/logger.hpp            |  6 +--
 include/ginkgo/core/solver/batch_bicgstab.hpp |  2 +-
 .../ginkgo/core/solver/batch_solver_base.hpp  | 40 ++++----------
 omp/solver/batch_bicgstab_kernels.cpp         | 14 ++---
 reference/solver/batch_bicgstab_kernels.cpp   | 19 +++----
 .../solver/batch_bicgstab_kernels.hpp.inc     |  8 +--
 .../test/solver/batch_bicgstab_kernels.cpp    | 28 +++++-----
 test/solver/batch_bicgstab_kernels.cpp        | 16 +++---
 17 files changed, 122 insertions(+), 151 deletions(-)

diff --git a/core/log/batch_logger.cpp b/core/log/batch_logger.cpp
index 0af5bf9a15b..aa00bfcc136 100644
--- a/core/log/batch_logger.cpp
+++ b/core/log/batch_logger.cpp
@@ -46,10 +46,10 @@ namespace log {
 template <typename ValueType>
 void BatchConvergence<ValueType>::on_batch_solver_completed(
     const array<int>& iteration_count,
-    const MultiVector<remove_complex<ValueType>>* residual_norm) const
+    const array<remove_complex<ValueType>>& residual_norm) const
 {
     this->iteration_count_ = iteration_count;
-    this->residual_norm_->copy_from(residual_norm);
+    this->residual_norm_ = residual_norm;
 }
 
 
diff --git a/core/solver/batch_bicgstab.cpp b/core/solver/batch_bicgstab.cpp
index 9f21858966c..41bc91d72dd 100644
--- a/core/solver/batch_bicgstab.cpp
+++ b/core/solver/batch_bicgstab.cpp
@@ -54,20 +54,19 @@ GKO_REGISTER_OPERATION(apply, batch_bicgstab::apply);
 
 
 template <typename ValueType>
-void Bicgstab<ValueType>::solver_apply(const MultiVector<ValueType>* b,
-                                       MultiVector<ValueType>* x,
-                                       BatchInfo* const info) const
+void Bicgstab<ValueType>::solver_apply(
+    const MultiVector<ValueType>* b, MultiVector<ValueType>* x,
+    log::BatchLogData<remove_complex<ValueType>>* log_data) const
 {
     using MVec = MultiVector<ValueType>;
-    const kernels::batch_bicgstab::BicgstabOptions<remove_complex<ValueType>>
-        opts{this->max_iterations_, static_cast<real_type>(this->residual_tol_),
-             parameters_.tolerance_type};
+    const kernels::batch_bicgstab::BicgstabSettings<remove_complex<ValueType>>
+        settings{this->max_iterations_,
+                 static_cast<real_type>(this->residual_tol_),
+                 parameters_.tolerance_type};
     auto exec = this->get_executor();
-    exec->run(bicgstab::make_apply(
-        opts, this->system_matrix_.get(), this->preconditioner_.get(),
-        as<const MVec>(b), as<MVec>(x),
-        *as<log::BatchLogData<remove_complex<ValueType>>>(
-            info->logdata.get())));
+    exec->run(bicgstab::make_apply(settings, this->system_matrix_.get(),
+                                   this->preconditioner_.get(), b, x,
+                                   *log_data));
 }
 
 
diff --git a/core/solver/batch_bicgstab_kernels.hpp b/core/solver/batch_bicgstab_kernels.hpp
index 202a2a3dd82..1c7b955c03f 100644
--- a/core/solver/batch_bicgstab_kernels.hpp
+++ b/core/solver/batch_bicgstab_kernels.hpp
@@ -53,8 +53,8 @@ namespace batch_bicgstab {
  * Options controlling the batch Bicgstab solver.
  */
 template <typename RealType>
-struct BicgstabOptions {
-    int max_its;
+struct BicgstabSettings {
+    int max_iterations;
     RealType residual_tol;
     ::gko::batch::stop::ToleranceType tol_type;
 };
@@ -96,7 +96,7 @@ inline int local_memory_requirement(const int num_rows, const int num_rhs)
 #define GKO_DECLARE_BATCH_BICGSTAB_APPLY_KERNEL(_type)                       \
     void apply(                                                              \
         std::shared_ptr<const DefaultExecutor> exec,                         \
-        const gko::kernels::batch_bicgstab::BicgstabOptions<                 \
+        const gko::kernels::batch_bicgstab::BicgstabSettings<                \
             remove_complex<_type>>& options,                                 \
         const batch::BatchLinOp* a, const batch::BatchLinOp* preconditioner, \
         const batch::MultiVector<_type>* b, batch::MultiVector<_type>* x,    \
diff --git a/core/solver/batch_dispatch.hpp b/core/solver/batch_dispatch.hpp
index af1d1b59f2f..02d4f802d97 100644
--- a/core/solver/batch_dispatch.hpp
+++ b/core/solver/batch_dispatch.hpp
@@ -190,7 +190,7 @@ class BatchSolverDispatch {
 public:
     using value_type = ValueType;
     using device_value_type = DeviceValueType<ValueType>;
-    using res_norm_type = remove_complex<value_type>;
+    using real_type = remove_complex<value_type>;
 
     BatchSolverDispatch(const KernelCaller& kernel_caller,
                         const SettingsType& settings,
@@ -248,12 +248,11 @@ class BatchSolverDispatch {
         const BatchMatrixType& amat,
         const multi_vector::uniform_batch<const device_value_type>& b_item,
         const multi_vector::uniform_batch<device_value_type>& x_item,
-        log::BatchLogData<res_norm_type>& logdata)
+        log::BatchLogData<real_type>& log_data)
     {
         if (logger_type_ == log::BatchLogType::simple_convergence_completion) {
-            device::batch_log::SimpleFinalLogger<res_norm_type> logger(
-                logdata.res_norms->get_values(),
-                logdata.iter_counts.get_data());
+            device::batch_log::SimpleFinalLogger<real_type> logger(
+                log_data.res_norms.get_data(), log_data.iter_counts.get_data());
             dispatch_on_preconditioner(logger, amat, b_item, x_item);
         } else {
             GKO_NOT_IMPLEMENTED;
@@ -268,7 +267,7 @@ class BatchSolverDispatch {
      */
     void apply(const MultiVector<ValueType>* const b,
                MultiVector<ValueType>* const x,
-               log::BatchLogData<res_norm_type>& logdata)
+               log::BatchLogData<real_type>& log_data)
     {
         const auto x_item = device::get_batch_struct(x);
         const auto b_item = device::get_batch_struct(b);
@@ -277,12 +276,12 @@ class BatchSolverDispatch {
                 dynamic_cast<const batch::matrix::Ell<ValueType, int32>*>(
                     mat_)) {
             auto mat_item = device::get_batch_struct(batch_mat);
-            dispatch_on_logger(mat_item, b_item, x_item, logdata);
+            dispatch_on_logger(mat_item, b_item, x_item, log_data);
         } else if (auto batch_mat =
                        dynamic_cast<const batch::matrix::Dense<ValueType>*>(
                            mat_)) {
             auto mat_item = device::get_batch_struct(batch_mat);
-            dispatch_on_logger(mat_item, b_item, x_item, logdata);
+            dispatch_on_logger(mat_item, b_item, x_item, log_data);
         } else {
             GKO_NOT_SUPPORTED(mat_);
         }
diff --git a/core/test/utils/batch_helpers.hpp b/core/test/utils/batch_helpers.hpp
index 1d60a3fbd7b..588088783e1 100644
--- a/core/test/utils/batch_helpers.hpp
+++ b/core/test/utils/batch_helpers.hpp
@@ -221,7 +221,8 @@ struct Result {
 
     std::shared_ptr<multi_vec> x;
     std::shared_ptr<real_vec> res_norm;
-    gko::batch::log::BatchLogData<remove_complex<ValueType>> logdata;
+    std::unique_ptr<gko::batch::log::BatchLogData<remove_complex<ValueType>>>
+        log_data;
 };
 
 
@@ -277,11 +278,8 @@ Result<typename MatrixType::value_type> solve_linear_system(
     result.x = multi_vec::create_with_config_of(sys.rhs);
     result.x->fill(zero<value_type>());
 
-    gko::batch::log::BatchLogData<real_type> logdata;
-    logdata.res_norms =
-        gko::batch::MultiVector<real_type>::create(exec, norm_size);
-    logdata.iter_counts.set_executor(exec);
-    logdata.iter_counts.resize_and_reset(num_rhs * num_batch_items);
+    auto log_data = std::make_unique<batch::log::BatchLogData<real_type>>(
+        exec, num_batch_items);
 
     std::unique_ptr<gko::batch::BatchLinOp> precond;
     if (precond_factory) {
@@ -291,14 +289,12 @@ Result<typename MatrixType::value_type> solve_linear_system(
     }
 
     solve_function(settings, precond.get(), sys.matrix.get(), sys.rhs.get(),
-                   result.x.get(), logdata);
-
-    result.logdata.res_norms = gko::batch::MultiVector<real_type>::create(
-        exec->get_master(), norm_size);
-    result.logdata.iter_counts.set_executor(exec->get_master());
-    result.logdata.iter_counts.resize_and_reset(num_rhs * num_batch_items);
-    result.logdata.res_norms->copy_from(logdata.res_norms.get());
-    result.logdata.iter_counts = logdata.iter_counts;
+                   result.x.get(), *log_data.get());
+
+    result.log_data = std::make_unique<batch::log::BatchLogData<real_type>>(
+        exec->get_master());
+    result.log_data->iter_counts = log_data->iter_counts;
+    result.log_data->res_norms = log_data->res_norms;
 
     result.res_norm =
         compute_residual_norms(sys.matrix.get(), sys.rhs.get(), result.x.get());
diff --git a/cuda/solver/batch_bicgstab_kernels.cu b/cuda/solver/batch_bicgstab_kernels.cu
index b9b128f54c4..fa00bb208af 100644
--- a/cuda/solver/batch_bicgstab_kernels.cu
+++ b/cuda/solver/batch_bicgstab_kernels.cu
@@ -57,12 +57,12 @@ namespace batch_bicgstab {
 
 
 template <typename T>
-using BicgstabOptions = gko::kernels::batch_bicgstab::BicgstabOptions<T>;
+using BicgstabSettings = gko::kernels::batch_bicgstab::BicgstabSettings<T>;
 
 
 template <typename ValueType>
 void apply(std::shared_ptr<const DefaultExecutor> exec,
-           const BicgstabOptions<remove_complex<ValueType>>& opts,
+           const BicgstabSettings<remove_complex<ValueType>>& settings,
            const batch::BatchLinOp* const a,
            const batch::BatchLinOp* const precon,
            const batch::MultiVector<ValueType>* const b,
diff --git a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
index 938c107eb4b..710c7a78c07 100644
--- a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
+++ b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
@@ -45,12 +45,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace gko {
 namespace kernels {
 namespace dpcpp {
-
-
-// NOTE: this default block size is not used for the main solver kernel.
-constexpr int default_block_size = 256;
-constexpr int sm_multiplier = 4;
-
 /**
  * @brief The batch Bicgstab solver namespace.
  *
@@ -60,12 +54,12 @@ namespace batch_bicgstab {
 
 
 template <typename T>
-using BicgstabOptions = gko::kernels::batch_bicgstab::BicgstabOptions<T>;
+using BicgstabSettings = gko::kernels::batch_bicgstab::BicgstabSettings<T>;
 
 
 template <typename ValueType>
 void apply(std::shared_ptr<const DefaultExecutor> exec,
-           const BicgstabOptions<remove_complex<ValueType>>& opts,
+           const BicgstabSettings<remove_complex<ValueType>>& settings,
            const batch::BatchLinOp* const a,
            const batch::BatchLinOp* const precon,
            const batch::MultiVector<ValueType>* const b,
diff --git a/hip/solver/batch_bicgstab_kernels.hip.cpp b/hip/solver/batch_bicgstab_kernels.hip.cpp
index 5c07aa9f6ed..7a52149e21d 100644
--- a/hip/solver/batch_bicgstab_kernels.hip.cpp
+++ b/hip/solver/batch_bicgstab_kernels.hip.cpp
@@ -58,12 +58,12 @@ namespace batch_bicgstab {
 
 
 template <typename T>
-using BicgstabOptions = gko::kernels::batch_bicgstab::BicgstabOptions<T>;
+using BicgstabSettings = gko::kernels::batch_bicgstab::BicgstabSettings<T>;
 
 
 template <typename ValueType>
 void apply(std::shared_ptr<const DefaultExecutor> exec,
-           const BicgstabOptions<remove_complex<ValueType>>& opts,
+           const BicgstabSettings<remove_complex<ValueType>>& settings,
            const batch::BatchLinOp* const a,
            const batch::BatchLinOp* const precon,
            const batch::MultiVector<ValueType>* const b,
diff --git a/include/ginkgo/core/log/batch_logger.hpp b/include/ginkgo/core/log/batch_logger.hpp
index 5379e3e8d0a..595fe1b51da 100644
--- a/include/ginkgo/core/log/batch_logger.hpp
+++ b/include/ginkgo/core/log/batch_logger.hpp
@@ -55,33 +55,37 @@ namespace log {
 /**
  * Types of batch loggers available.
  */
-enum class BatchLogType {
-    convergence_completion,
-    simple_convergence_completion
-};
-
-
-struct BatchLogDataBase {
-    /**
-     * Stores convergence iteration counts for every matrix in the batch and
-     * for every right-hand side.
-     */
-    array<int> iter_counts;
-
-    virtual ~BatchLogDataBase() = default;
-};
+enum class BatchLogType { simple_convergence_completion };
 
 
 /**
  * Stores logging data for batch solver kernels.
+ *
+ * @note Supports only single rhs
  */
 template <typename ValueType>
-struct BatchLogData : public BatchLogDataBase {
+struct BatchLogData {
+    using real_type = remove_complex<ValueType>;
+
+    BatchLogData(std::shared_ptr<const Executor> exec,
+                 size_type num_batch_items = 0)
+        : res_norms(exec), iter_counts(exec)
+    {
+        if (num_batch_items > 0) {
+            iter_counts.resize_and_reset(num_batch_items);
+            res_norms.resize_and_reset(num_batch_items);
+        }
+    }
+
     /**
-     * Stores residual norm values for every linear system in the batch
-     * for every right-hand side.
+     * Stores residual norm values for every linear system in the batch.
      */
-    std::shared_ptr<MultiVector<remove_complex<ValueType>>> res_norms;
+    array<real_type> res_norms;
+
+    /**
+     * Stores convergence iteration counts for every matrix in the batch
+     */
+    array<int> iter_counts;
 };
 
 
@@ -104,7 +108,7 @@ class BatchConvergence : public gko::log::Logger {
 
     void on_batch_solver_completed(
         const array<int>& iteration_count,
-        const MultiVector<real_type>* residual_norm) const override;
+        const array<real_type>& residual_norm) const override;
 
     /**
      * Creates a convergence logger. This dynamically allocates the memory,
@@ -135,9 +139,9 @@ class BatchConvergence : public gko::log::Logger {
     /**
      * @return  The residual norms for the entire batch.
      */
-    const MultiVector<real_type>* get_residual_norm() const noexcept
+    const array<real_type>& get_residual_norm() const noexcept
     {
-        return residual_norm_.get();
+        return residual_norm_;
     }
 
 protected:
@@ -146,12 +150,12 @@ class BatchConvergence : public gko::log::Logger {
         const mask_type& enabled_events = gko::log::Logger::all_events_mask)
         : gko::log::Logger(enabled_events),
           iteration_count_(exec->get_master()),
-          residual_norm_(MultiVector<real_type>::create(exec->get_master()))
+          residual_norm_(exec->get_master())
     {}
 
 private:
     mutable array<int> iteration_count_;
-    mutable std::unique_ptr<MultiVector<real_type>> residual_norm_{};
+    mutable array<real_type> residual_norm_;
 };
 
 
diff --git a/include/ginkgo/core/log/logger.hpp b/include/ginkgo/core/log/logger.hpp
index 1de5190d2ae..c16e7efbf0d 100644
--- a/include/ginkgo/core/log/logger.hpp
+++ b/include/ginkgo/core/log/logger.hpp
@@ -616,13 +616,11 @@ public:                                                              \
 
 protected:
     virtual void on_batch_solver_completed(
-        const array<int>& iters,
-        const batch::MultiVector<double>* residual_norms) const
+        const array<int>& iters, const array<double>& residual_norms) const
     {}
 
     virtual void on_batch_solver_completed(
-        const array<int>& iters,
-        const batch::MultiVector<float>* residual_norms) const
+        const array<int>& iters, const array<float>& residual_norms) const
     {}
 
 public:
diff --git a/include/ginkgo/core/solver/batch_bicgstab.hpp b/include/ginkgo/core/solver/batch_bicgstab.hpp
index 464d39bfe1c..a05e3754412 100644
--- a/include/ginkgo/core/solver/batch_bicgstab.hpp
+++ b/include/ginkgo/core/solver/batch_bicgstab.hpp
@@ -134,7 +134,7 @@ class Bicgstab final
 
     void solver_apply(const MultiVector<ValueType>* b,
                       MultiVector<ValueType>* x,
-                      BatchInfo* const info) const override;
+                      log::BatchLogData<real_type>* log_data) const override;
 };
 
 
diff --git a/include/ginkgo/core/solver/batch_solver_base.hpp b/include/ginkgo/core/solver/batch_solver_base.hpp
index 425027a1d4e..efc6522ba65 100644
--- a/include/ginkgo/core/solver/batch_solver_base.hpp
+++ b/include/ginkgo/core/solver/batch_solver_base.hpp
@@ -148,15 +148,6 @@ common_batch_params extract_common_batch_params(ParamsType& params)
 }  // namespace detail
 
 
-/**
- * This struct stores the logger database of residual norms and iteration count
- * which is common to all batched solvers.
- */
-struct BatchInfo {
-    std::unique_ptr<log::BatchLogDataBase> logdata;
-};
-
-
 /**
  * @tparam PolymorphicBase  The base class; must be a subclass of BatchLinOp.
  */
@@ -167,6 +158,7 @@ class EnableBatchSolver
     : public BatchSolver,
       public EnableBatchLinOp<ConcreteSolver, PolymorphicBase> {
 public:
+    using real_type = remove_complex<ValueType>;
     const ConcreteSolver* apply(ptr_param<const MultiVector<ValueType>> b,
                                 ptr_param<MultiVector<ValueType>> x) const
     {
@@ -245,29 +237,17 @@ class EnableBatchSolver
     void apply_impl(const MultiVector<ValueType>* b,
                     MultiVector<ValueType>* x) const
     {
-        using value_type = ValueType;
-        using Vector = MultiVector<value_type>;
-        using res_log_type = remove_complex<value_type>;
         auto exec = this->get_executor();
-        const size_type num_rhs = b->get_common_size()[1];
-        const size_type num_batch_items = b->get_num_batch_items();
-        batch_dim<2> batch_size(num_batch_items, dim<2>{1, num_rhs});
-
-        BatchInfo info;
-        info.logdata =
-            std::move(std::make_unique<log::BatchLogData<res_log_type>>());
-        auto concrete_logdata =
-            static_cast<log::BatchLogData<res_log_type>*>(info.logdata.get());
-        concrete_logdata->res_norms =
-            MultiVector<res_log_type>::create(this->get_executor(), batch_size);
-        concrete_logdata->iter_counts.set_executor(this->get_executor());
-        concrete_logdata->iter_counts.resize_and_reset(num_rhs *
-                                                       num_batch_items);
-
-        this->solver_apply(b, x, &info);
+        if (b->get_common_size()[1] > 1) {
+            GKO_NOT_IMPLEMENTED;
+        }
+        auto log_data_ = std::make_unique<log::BatchLogData<real_type>>(
+            exec, b->get_num_batch_items());
+
+        this->solver_apply(b, x, log_data_.get());
 
         this->template log<gko::log::Logger::batch_solver_completed>(
-            concrete_logdata->iter_counts, concrete_logdata->res_norms.get());
+            log_data_->iter_counts, log_data_->res_norms);
     }
 
     void apply_impl(const MultiVector<ValueType>* alpha,
@@ -283,7 +263,7 @@ class EnableBatchSolver
 
     virtual void solver_apply(const MultiVector<ValueType>* b,
                               MultiVector<ValueType>* x,
-                              BatchInfo* const info) const = 0;
+                              log::BatchLogData<real_type>* info) const = 0;
 };
 
 
diff --git a/omp/solver/batch_bicgstab_kernels.cpp b/omp/solver/batch_bicgstab_kernels.cpp
index 2a15a9aab50..207ae042a4c 100644
--- a/omp/solver/batch_bicgstab_kernels.cpp
+++ b/omp/solver/batch_bicgstab_kernels.cpp
@@ -63,15 +63,15 @@ constexpr int max_num_rhs = 1;
 
 
 template <typename T>
-using BicgstabOptions = gko::kernels::batch_bicgstab::BicgstabOptions<T>;
+using BicgstabSettings = gko::kernels::batch_bicgstab::BicgstabSettings<T>;
 
 
 template <typename ValueType>
 class KernelCaller {
 public:
     KernelCaller(std::shared_ptr<const DefaultExecutor> exec,
-                 const BicgstabOptions<remove_complex<ValueType>> opts)
-        : exec_{std::move(exec)}, opts_{opts}
+                 const BicgstabSettings<remove_complex<ValueType>> settings)
+        : exec_{std::move(exec)}, settings_{settings}
     {}
 
     template <typename BatchMatrixType, typename PrecondType, typename StopType,
@@ -104,20 +104,20 @@ class KernelCaller {
                 static_cast<unsigned char*>(malloc(local_size_bytes));
             batch_entry_bicgstab_impl<StopType, PrecondType, LogType,
                                       BatchMatrixType, ValueType>(
-                opts_, logger, precond, mat, b, x, batch_id, local_space);
+                settings_, logger, precond, mat, b, x, batch_id, local_space);
             free(local_space);
         }
     }
 
 private:
     const std::shared_ptr<const DefaultExecutor> exec_;
-    const BicgstabOptions<remove_complex<ValueType>> opts_;
+    const BicgstabSettings<remove_complex<ValueType>> settings_;
 };
 
 
 template <typename ValueType>
 void apply(std::shared_ptr<const DefaultExecutor> exec,
-           const BicgstabOptions<remove_complex<ValueType>>& opts,
+           const BicgstabSettings<remove_complex<ValueType>>& settings,
            const batch::BatchLinOp* const mat,
            const batch::BatchLinOp* const precond,
            const batch::MultiVector<ValueType>* const b,
@@ -125,7 +125,7 @@ void apply(std::shared_ptr<const DefaultExecutor> exec,
            batch::log::BatchLogData<remove_complex<ValueType>>& logdata)
 {
     auto dispatcher = batch::solver::create_dispatcher<ValueType>(
-        KernelCaller<ValueType>(exec, opts), opts, mat, precond);
+        KernelCaller<ValueType>(exec, settings), settings, mat, precond);
     dispatcher.apply(b, x, logdata);
 }
 
diff --git a/reference/solver/batch_bicgstab_kernels.cpp b/reference/solver/batch_bicgstab_kernels.cpp
index f3e59e8a0dc..5b5d80794ad 100644
--- a/reference/solver/batch_bicgstab_kernels.cpp
+++ b/reference/solver/batch_bicgstab_kernels.cpp
@@ -65,15 +65,15 @@ constexpr int max_num_rhs = 1;
 
 
 template <typename T>
-using BicgstabOptions = gko::kernels::batch_bicgstab::BicgstabOptions<T>;
+using BicgstabSettings = gko::kernels::batch_bicgstab::BicgstabSettings<T>;
 
 
 template <typename ValueType>
 class KernelCaller {
 public:
     KernelCaller(std::shared_ptr<const DefaultExecutor> exec,
-                 const BicgstabOptions<remove_complex<ValueType>> opts)
-        : exec_{std::move(exec)}, opts_{opts}
+                 const BicgstabSettings<remove_complex<ValueType>> settings)
+        : exec_{std::move(exec)}, settings_{settings}
     {}
 
     template <typename BatchMatrixType, typename PrecType, typename StopType,
@@ -101,28 +101,29 @@ class KernelCaller {
         for (size_type batch_id = 0; batch_id < num_batch_items; batch_id++) {
             batch_entry_bicgstab_impl<StopType, PrecType, LogType,
                                       BatchMatrixType, ValueType>(
-                opts_, logger, prec, mat, b, x, batch_id, local_space.data());
+                settings_, logger, prec, mat, b, x, batch_id,
+                local_space.data());
         }
     }
 
 private:
     const std::shared_ptr<const DefaultExecutor> exec_;
-    const BicgstabOptions<remove_complex<ValueType>> opts_;
+    const BicgstabSettings<remove_complex<ValueType>> settings_;
 };
 
 
 template <typename ValueType>
 void apply(std::shared_ptr<const DefaultExecutor> exec,
-           const BicgstabOptions<remove_complex<ValueType>>& opts,
+           const BicgstabSettings<remove_complex<ValueType>>& settings,
            const batch::BatchLinOp* const mat,
            const batch::BatchLinOp* const precon,
            const batch::MultiVector<ValueType>* const b,
            batch::MultiVector<ValueType>* const x,
-           batch::log::BatchLogData<remove_complex<ValueType>>& logdata)
+           batch::log::BatchLogData<remove_complex<ValueType>>& log_data)
 {
     auto dispatcher = batch::solver::create_dispatcher<ValueType>(
-        KernelCaller<ValueType>(exec, opts), opts, mat, precon);
-    dispatcher.apply(b, x, logdata);
+        KernelCaller<ValueType>(exec, settings), settings, mat, precon);
+    dispatcher.apply(b, x, log_data);
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_BICGSTAB_APPLY_KERNEL);
diff --git a/reference/solver/batch_bicgstab_kernels.hpp.inc b/reference/solver/batch_bicgstab_kernels.hpp.inc
index fcd533a1ea0..0bf38890fe2 100644
--- a/reference/solver/batch_bicgstab_kernels.hpp.inc
+++ b/reference/solver/batch_bicgstab_kernels.hpp.inc
@@ -182,8 +182,8 @@ inline void update_x_middle(
 template <typename StopType, typename PrecType, typename LogType,
           typename BatchMatrixType, typename ValueType>
 inline void batch_entry_bicgstab_impl(
-    const gko::kernels::batch_bicgstab::BicgstabOptions<
-        remove_complex<ValueType>>& opts,
+    const gko::kernels::batch_bicgstab::BicgstabSettings<
+        remove_complex<ValueType>>& settings,
     LogType logger, PrecType prec, const BatchMatrixType& a,
     const gko::batch::multi_vector::uniform_batch<const ValueType>& b,
     const gko::batch::multi_vector::uniform_batch<ValueType>& x,
@@ -268,11 +268,11 @@ inline void batch_entry_bicgstab_impl(
                p_hat_entry, v_entry, rhs_norms_entry, res_norms_entry);
 
     // stopping criterion object
-    StopType stop(opts.residual_tol, rhs_norms_entry.values);
+    StopType stop(settings.residual_tol, rhs_norms_entry.values);
 
     int iter{};
 
-    for (iter = 0; iter < opts.max_its; iter++) {
+    for (iter = 0; iter < settings.max_iterations; iter++) {
         if (stop.check_converged(res_norms_entry.values)) {
             break;
         }
diff --git a/reference/test/solver/batch_bicgstab_kernels.cpp b/reference/test/solver/batch_bicgstab_kernels.cpp
index 937ddf40f34..a7d7c8f0f3f 100644
--- a/reference/test/solver/batch_bicgstab_kernels.cpp
+++ b/reference/test/solver/batch_bicgstab_kernels.cpp
@@ -63,7 +63,7 @@ class BatchBicgstab : public ::testing::Test {
     using EllMtx = gko::batch::matrix::Ell<value_type>;
     using MVec = gko::batch::MultiVector<value_type>;
     using RealMVec = gko::batch::MultiVector<real_type>;
-    using Settings = gko::kernels::batch_bicgstab::BicgstabOptions<real_type>;
+    using Settings = gko::kernels::batch_bicgstab::BicgstabSettings<real_type>;
     using LogData = gko::batch::log::BatchLogData<real_type>;
     using LinSys = gko::test::LinearSystem<Mtx>;
 
@@ -76,10 +76,10 @@ class BatchBicgstab : public ::testing::Test {
         solve_lambda = [executor](const Settings opts,
                                   const gko::batch::BatchLinOp* prec,
                                   const Mtx* mtx, const MVec* b, MVec* x,
-                                  LogData& logdata) {
+                                  LogData& log_data) {
             gko::kernels::reference::batch_bicgstab::apply<
                 typename Mtx::value_type>(executor, opts, mtx, prec, b, x,
-                                          logdata);
+                                          log_data);
         };
     }
 
@@ -126,13 +126,13 @@ TYPED_TEST(BatchBicgstab, SolvesEllStencilSystem)
     auto lin_sys = gko::test::generate_3pt_stencil_batch_problem<Mtx>(
         this->exec, num_batch_items, num_rows, num_rhs, 3);
     auto executor = this->exec;
-    auto solve_lambda = [executor](const Settings opts,
-                                   const gko::batch::BatchLinOp* prec,
-                                   const Mtx* mtx, const MVec* b, MVec* x,
-                                   LogData& logdata) {
-        gko::kernels::reference::batch_bicgstab::apply<
-            typename Mtx::value_type>(executor, opts, mtx, prec, b, x, logdata);
-    };
+    auto solve_lambda =
+        [executor](const Settings opts, const gko::batch::BatchLinOp* prec,
+                   const Mtx* mtx, const MVec* b, MVec* x, LogData& log_data) {
+            gko::kernels::reference::batch_bicgstab::apply<
+                typename Mtx::value_type>(executor, opts, mtx, prec, b, x,
+                                          log_data);
+        };
 
 
     auto res = gko::test::solve_linear_system(this->exec, solve_lambda,
@@ -158,8 +158,8 @@ TYPED_TEST(BatchBicgstab, StencilSystemLoggerLogsResidual)
                                               this->linear_system);
 
     const int ref_iters = 2;
-    auto iter_array = res.logdata.iter_counts.get_const_data();
-    auto res_log_array = res.logdata.res_norms->get_const_values();
+    auto iter_array = res.log_data->iter_counts.get_const_data();
+    auto res_log_array = res.log_data->res_norms.get_const_data();
     for (size_t i = 0; i < this->num_batch_items; i++) {
         ASSERT_LE(res_log_array[i] / this->linear_system.rhs_norm->at(i, 0, 0),
                   this->solver_settings.residual_tol);
@@ -181,7 +181,7 @@ TYPED_TEST(BatchBicgstab, StencilSystemLoggerLogsIterations)
     auto res = gko::test::solve_linear_system(
         this->exec, this->solve_lambda, solver_settings, this->linear_system);
 
-    const int* const iter_array = res.logdata.iter_counts.get_const_data();
+    const int* const iter_array = res.log_data->iter_counts.get_const_data();
     for (size_t i = 0; i < this->num_batch_items; i++) {
         ASSERT_EQ(iter_array[i], ref_iters);
     }
@@ -256,7 +256,7 @@ TYPED_TEST(BatchBicgstab, ApplyLogsResAndIters)
         auto rel_res_norm = res.res_norm->get_const_values()[i] /
                             linear_system.rhs_norm->get_const_values()[i];
         ASSERT_LE(iter_counts.get_const_data()[i], max_iters);
-        EXPECT_LE(rel_res_norm, res_norm->at(i, 0, 0));
+        EXPECT_LE(rel_res_norm, res_norm.get_const_data()[i]);
         ASSERT_LE(rel_res_norm, tol * 10);
     }
 }
diff --git a/test/solver/batch_bicgstab_kernels.cpp b/test/solver/batch_bicgstab_kernels.cpp
index 73842b8737c..f916f4cd11a 100644
--- a/test/solver/batch_bicgstab_kernels.cpp
+++ b/test/solver/batch_bicgstab_kernels.cpp
@@ -62,7 +62,7 @@ class BatchBicgstab : public CommonTestFixture {
     using EllMtx = gko::batch::matrix::Ell<value_type>;
     using MVec = gko::batch::MultiVector<value_type>;
     using RealMVec = gko::batch::MultiVector<real_type>;
-    using Settings = gko::kernels::batch_bicgstab::BicgstabOptions<real_type>;
+    using Settings = gko::kernels::batch_bicgstab::BicgstabSettings<real_type>;
     using LogData = gko::batch::log::BatchLogData<real_type>;
     using Logger = gko::batch::log::BatchConvergence<real_type>;
 
@@ -78,10 +78,10 @@ class BatchBicgstab : public CommonTestFixture {
         solve_lambda = [executor](const Settings settings,
                                   const gko::batch::BatchLinOp* prec,
                                   const Mtx* mtx, const MVec* b, MVec* x,
-                                  LogData& logdata) {
+                                  LogData& log_data) {
             gko::kernels::EXEC_NAMESPACE::batch_bicgstab::apply<
                 typename Mtx::value_type>(executor, settings, mtx, prec, b, x,
-                                          logdata);
+                                          log_data);
         };
         solver_settings =
             Settings{max_iters, tol, gko::batch::stop::ToleranceType::relative};
@@ -140,7 +140,7 @@ TEST_F(BatchBicgstab, StencilSystemLoggerLogsResidual)
     auto res = gko::test::solve_linear_system(exec, solve_lambda,
                                               solver_settings, linear_system);
 
-    auto res_log_array = res.logdata.res_norms->get_const_values();
+    auto res_log_array = res.log_data->res_norms.get_const_data();
     for (size_t i = 0; i < num_batch_items; i++) {
         ASSERT_LE(res_log_array[i] / linear_system.rhs_norm->at(i, 0, 0),
                   solver_settings.residual_tol);
@@ -162,7 +162,7 @@ TEST_F(BatchBicgstab, StencilSystemLoggerLogsIterations)
     auto res = gko::test::solve_linear_system(exec, solve_lambda,
                                               solver_settings, linear_system);
 
-    auto iter_array = res.logdata.iter_counts.get_const_data();
+    auto iter_array = res.log_data->iter_counts.get_const_data();
     for (size_t i = 0; i < num_batch_items; i++) {
         ASSERT_EQ(iter_array[i], ref_iters);
     }
@@ -219,14 +219,14 @@ TEST_F(BatchBicgstab, CanSolveLargeHpdSystem)
     auto iter_counts = gko::make_temporary_clone(exec->get_master(),
                                                  &logger->get_num_iterations());
     auto res_norm = gko::make_temporary_clone(exec->get_master(),
-                                              logger->get_residual_norm());
+                                              &logger->get_residual_norm());
     GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, comp_tol);
     for (size_t i = 0; i < num_batch_items; i++) {
         auto comp_res_norm =
             exec->copy_val_to_host(res.res_norm->get_const_values() + i);
         ASSERT_LE(iter_counts->get_const_data()[i], max_iters);
-        EXPECT_LE(res_norm->at(i, 0, 0), comp_tol);
-        EXPECT_GT(res_norm->at(i, 0, 0), real_type{0.0});
+        EXPECT_LE(res_norm->get_const_data()[i], comp_tol);
+        EXPECT_GT(res_norm->get_const_data()[i], real_type{0.0});
         ASSERT_LE(comp_res_norm, comp_tol);
     }
 }

From e3f2b60ba547ffd78745d52fae601d6cc1f3b470 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 24 Oct 2023 16:58:14 +0200
Subject: [PATCH 480/583] rename kernel namespaces

---
 core/solver/batch_dispatch.hpp                   | 16 +++++++++-------
 cuda/preconditioner/batch_preconditioners.cuh    |  2 ++
 cuda/stop/batch_criteria.cuh                     |  4 ++--
 dpcpp/preconditioner/batch_preconditioners.hpp   |  2 ++
 dpcpp/stop/batch_criteria.hpp                    |  4 ++--
 hip/preconditioner/batch_preconditioners.hip.hpp |  2 ++
 hip/stop/batch_criteria.hip.hpp                  |  4 ++--
 reference/preconditioner/batch_identity.hpp      |  2 ++
 reference/stop/batch_criteria.hpp                |  4 ++--
 9 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/core/solver/batch_dispatch.hpp b/core/solver/batch_dispatch.hpp
index 02d4f802d97..449f54a7cba 100644
--- a/core/solver/batch_dispatch.hpp
+++ b/core/solver/batch_dispatch.hpp
@@ -215,13 +215,13 @@ class BatchSolverDispatch {
         if (settings_.tol_type == stop::ToleranceType::absolute) {
             caller_.template call_kernel<
                 BatchMatrixType, PrecType,
-                device::stop::SimpleAbsResidual<device_value_type>, LogType>(
-                logger, mat_item, precond, b_item, x_item);
+                device::batch_stop::SimpleAbsResidual<device_value_type>,
+                LogType>(logger, mat_item, precond, b_item, x_item);
         } else if (settings_.tol_type == stop::ToleranceType::relative) {
             caller_.template call_kernel<
                 BatchMatrixType, PrecType,
-                device::stop::SimpleRelResidual<device_value_type>, LogType>(
-                logger, mat_item, precond, b_item, x_item);
+                device::batch_stop::SimpleRelResidual<device_value_type>,
+                LogType>(logger, mat_item, precond, b_item, x_item);
         } else {
             GKO_NOT_IMPLEMENTED;
         }
@@ -235,9 +235,11 @@ class BatchSolverDispatch {
     {
         if (!precond_ ||
             dynamic_cast<const matrix::Identity<value_type>*>(precond_)) {
-            dispatch_on_stop<device::Identity<device_value_type>>(
-                logger, mat_item, device::Identity<device_value_type>(), b_item,
-                x_item);
+            dispatch_on_stop<
+                device::batch_preconditioner::Identity<device_value_type>>(
+                logger, mat_item,
+                device::batch_preconditioner::Identity<device_value_type>(),
+                b_item, x_item);
         } else {
             GKO_NOT_IMPLEMENTED;
         }
diff --git a/cuda/preconditioner/batch_preconditioners.cuh b/cuda/preconditioner/batch_preconditioners.cuh
index 4f78f3cc9f9..d26639d9b62 100644
--- a/cuda/preconditioner/batch_preconditioners.cuh
+++ b/cuda/preconditioner/batch_preconditioners.cuh
@@ -45,11 +45,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace gko {
 namespace kernels {
 namespace cuda {
+namespace batch_preconditioner {
 
 
 #include "common/cuda_hip/preconditioner/batch_identity.hpp.inc"
 
 
+}  // namespace batch_preconditioner
 }  // namespace cuda
 }  // namespace kernels
 }  // namespace gko
diff --git a/cuda/stop/batch_criteria.cuh b/cuda/stop/batch_criteria.cuh
index 19ab52a31fc..d804ee8100e 100644
--- a/cuda/stop/batch_criteria.cuh
+++ b/cuda/stop/batch_criteria.cuh
@@ -40,13 +40,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace gko {
 namespace kernels {
 namespace cuda {
-namespace stop {
+namespace batch_stop {
 
 
 #include "common/cuda_hip/stop/batch_criteria.hpp.inc"
 
 
-}  // namespace stop
+}  // namespace batch_stop
 }  // namespace cuda
 }  // namespace kernels
 }  // namespace gko
diff --git a/dpcpp/preconditioner/batch_preconditioners.hpp b/dpcpp/preconditioner/batch_preconditioners.hpp
index 9eef3cdeb6f..f2b6b1d034f 100644
--- a/dpcpp/preconditioner/batch_preconditioners.hpp
+++ b/dpcpp/preconditioner/batch_preconditioners.hpp
@@ -43,11 +43,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace gko {
 namespace kernels {
 namespace dpcpp {
+namespace batch_preconditioner {
 
 
 #include "dpcpp/preconditioner/batch_identity.hpp.inc"
 
 
+}  // namespace batch_preconditioner
 }  // namespace dpcpp
 }  // namespace kernels
 }  // namespace gko
diff --git a/dpcpp/stop/batch_criteria.hpp b/dpcpp/stop/batch_criteria.hpp
index df8558897ae..ad316bff58e 100644
--- a/dpcpp/stop/batch_criteria.hpp
+++ b/dpcpp/stop/batch_criteria.hpp
@@ -47,7 +47,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace gko {
 namespace kernels {
 namespace dpcpp {
-namespace stop {
+namespace batch_stop {
 
 
 /**
@@ -96,7 +96,7 @@ class SimpleAbsResidual {
 };
 
 
-}  // namespace stop
+}  // namespace batch_stop
 }  // namespace dpcpp
 }  // namespace kernels
 }  // namespace gko
diff --git a/hip/preconditioner/batch_preconditioners.hip.hpp b/hip/preconditioner/batch_preconditioners.hip.hpp
index 59ba0646b0f..5c95913d285 100644
--- a/hip/preconditioner/batch_preconditioners.hip.hpp
+++ b/hip/preconditioner/batch_preconditioners.hip.hpp
@@ -45,11 +45,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace gko {
 namespace kernels {
 namespace hip {
+namespace batch_preconditioner {
 
 
 #include "common/cuda_hip/preconditioner/batch_identity.hpp.inc"
 
 
+}  // namespace batch_preconditioner
 }  // namespace hip
 }  // namespace kernels
 }  // namespace gko
diff --git a/hip/stop/batch_criteria.hip.hpp b/hip/stop/batch_criteria.hip.hpp
index 0aa45b41f03..49477346ab2 100644
--- a/hip/stop/batch_criteria.hip.hpp
+++ b/hip/stop/batch_criteria.hip.hpp
@@ -40,13 +40,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace gko {
 namespace kernels {
 namespace hip {
-namespace stop {
+namespace batch_stop {
 
 
 #include "common/cuda_hip/stop/batch_criteria.hpp.inc"
 
 
-}  // namespace stop
+}  // namespace batch_stop
 }  // namespace hip
 }  // namespace kernels
 }  // namespace gko
diff --git a/reference/preconditioner/batch_identity.hpp b/reference/preconditioner/batch_identity.hpp
index 3cdb78bb5a8..c842003eed5 100644
--- a/reference/preconditioner/batch_identity.hpp
+++ b/reference/preconditioner/batch_identity.hpp
@@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace gko {
 namespace kernels {
 namespace host {
+namespace batch_preconditioner {
 
 
 /**
@@ -92,6 +93,7 @@ class Identity final {
 };
 
 
+}  // namespace batch_preconditioner
 }  // namespace host
 }  // namespace kernels
 }  // namespace gko
diff --git a/reference/stop/batch_criteria.hpp b/reference/stop/batch_criteria.hpp
index b18916c0eee..4669d0b1b84 100644
--- a/reference/stop/batch_criteria.hpp
+++ b/reference/stop/batch_criteria.hpp
@@ -42,7 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace gko {
 namespace kernels {
 namespace host {
-namespace stop {
+namespace batch_stop {
 
 
 /**
@@ -119,7 +119,7 @@ class SimpleAbsResidual {
 };
 
 
-}  // namespace stop
+}  // namespace batch_stop
 }  // namespace host
 }  // namespace kernels
 }  // namespace gko

From 9376d7367c069fac3f277710048a3791d01e3c7c Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 24 Oct 2023 23:29:24 +0200
Subject: [PATCH 481/583] use workspace for logger

---
 core/test/utils/batch_helpers.hpp             |  6 ++---
 include/ginkgo/core/log/batch_logger.hpp      | 24 +++++++++++++++----
 .../ginkgo/core/solver/batch_solver_base.hpp  | 12 ++++++++--
 .../test/solver/batch_bicgstab_kernels.cpp    |  4 ++--
 test/solver/batch_bicgstab_kernels.cpp        |  2 +-
 5 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/core/test/utils/batch_helpers.hpp b/core/test/utils/batch_helpers.hpp
index 588088783e1..7a874677c86 100644
--- a/core/test/utils/batch_helpers.hpp
+++ b/core/test/utils/batch_helpers.hpp
@@ -203,13 +203,13 @@ compute_residual_norms(
     auto num_rhs = x->get_common_size()[1];
     const gko::batch_dim<2> norm_dim(num_batch_items, gko::dim<2>(1, num_rhs));
 
-    auto res = b->clone();
+    auto residual_vec = b->clone();
     auto res_norms = real_vec::create(exec, norm_dim);
     auto alpha =
         gko::batch::initialize<multi_vec>(num_batch_items, {-1.0}, exec);
     auto beta = gko::batch::initialize<multi_vec>(num_batch_items, {1.0}, exec);
-    mtx->apply(alpha, x, beta, res);
-    res->compute_norm2(res_norms);
+    mtx->apply(alpha, x, beta, residual_vec);
+    residual_vec->compute_norm2(res_norms);
     return res_norms;
 }
 
diff --git a/include/ginkgo/core/log/batch_logger.hpp b/include/ginkgo/core/log/batch_logger.hpp
index 595fe1b51da..122467893fd 100644
--- a/include/ginkgo/core/log/batch_logger.hpp
+++ b/include/ginkgo/core/log/batch_logger.hpp
@@ -68,12 +68,26 @@ struct BatchLogData {
     using real_type = remove_complex<ValueType>;
 
     BatchLogData(std::shared_ptr<const Executor> exec,
-                 size_type num_batch_items = 0)
+                 size_type num_batch_items = 0,
+                 array<unsigned char> workspace = {})
         : res_norms(exec), iter_counts(exec)
     {
+        const size_type workspace_size =
+            num_batch_items * (sizeof(real_type) + sizeof(int));
         if (num_batch_items > 0) {
-            iter_counts.resize_and_reset(num_batch_items);
-            res_norms.resize_and_reset(num_batch_items);
+            if (workspace.get_num_elems() >= workspace_size) {
+                iter_counts = array<int>::view(
+                    exec, num_batch_items,
+                    reinterpret_cast<int*>(workspace.get_data()));
+                res_norms = array<real_type>::view(
+                    exec, num_batch_items,
+                    reinterpret_cast<real_type*>(
+                        workspace.get_data() +
+                        (sizeof(int) * num_batch_items)));
+            } else {
+                iter_counts.resize_and_reset(num_batch_items);
+                res_norms.resize_and_reset(num_batch_items);
+            }
         }
     }
 
@@ -149,8 +163,8 @@ class BatchConvergence : public gko::log::Logger {
         std::shared_ptr<const Executor> exec,
         const mask_type& enabled_events = gko::log::Logger::all_events_mask)
         : gko::log::Logger(enabled_events),
-          iteration_count_(exec->get_master()),
-          residual_norm_(exec->get_master())
+          iteration_count_(exec),
+          residual_norm_(exec)
     {}
 
 private:
diff --git a/include/ginkgo/core/solver/batch_solver_base.hpp b/include/ginkgo/core/solver/batch_solver_base.hpp
index efc6522ba65..29526a961a9 100644
--- a/include/ginkgo/core/solver/batch_solver_base.hpp
+++ b/include/ginkgo/core/solver/batch_solver_base.hpp
@@ -116,13 +116,15 @@ class BatchSolver {
         : system_matrix_{std::move(system_matrix)},
           preconditioner_{std::move(gen_preconditioner)},
           residual_tol_{res_tol},
-          max_iterations_{max_iterations}
+          max_iterations_{max_iterations},
+          workspace_{}
     {}
 
     std::shared_ptr<const BatchLinOp> system_matrix_{};
     std::shared_ptr<const BatchLinOp> preconditioner_{};
     double residual_tol_{};
     int max_iterations_{};
+    mutable array<unsigned char> workspace_{};
 };
 
 
@@ -232,6 +234,12 @@ class EnableBatchSolver
             auto id = Identity::create(exec, system_matrix->get_size());
             preconditioner_ = std::move(id);
         }
+        // FIXME
+        // const size_type workspace_size = system_matrix->get_num_batch_items()
+        // *
+        //                                  (sizeof(real_type) + sizeof(int));
+        // workspace_.set_executor(exec);
+        // workspace_.resize_and_reset(workspace_size);
     }
 
     void apply_impl(const MultiVector<ValueType>* b,
@@ -242,7 +250,7 @@ class EnableBatchSolver
             GKO_NOT_IMPLEMENTED;
         }
         auto log_data_ = std::make_unique<log::BatchLogData<real_type>>(
-            exec, b->get_num_batch_items());
+            exec, b->get_num_batch_items(), workspace_);
 
         this->solver_apply(b, x, log_data_.get());
 
diff --git a/reference/test/solver/batch_bicgstab_kernels.cpp b/reference/test/solver/batch_bicgstab_kernels.cpp
index a7d7c8f0f3f..902f55f3de3 100644
--- a/reference/test/solver/batch_bicgstab_kernels.cpp
+++ b/reference/test/solver/batch_bicgstab_kernels.cpp
@@ -277,7 +277,7 @@ TYPED_TEST(BatchBicgstab, CanSolveEllSystem)
             .with_tolerance_type(gko::batch::stop::ToleranceType::relative)
             .on(this->exec);
     const int num_rows = 13;
-    const size_t num_batch_items = 5;
+    const size_t num_batch_items = 2;
     const int num_rhs = 1;
     auto linear_system = gko::test::generate_3pt_stencil_batch_problem<Mtx>(
         this->exec, num_batch_items, num_rows, num_rhs, 3);
@@ -321,6 +321,6 @@ TYPED_TEST(BatchBicgstab, CanSolveDenseHpdSystem)
 
     GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol * 50);
     for (size_t i = 0; i < num_batch_items; i++) {
-        ASSERT_LE(res.res_norm->get_const_values()[i], tol * 10);
+        ASSERT_LE(res.res_norm->get_const_values()[i], tol * 50);
     }
 }
diff --git a/test/solver/batch_bicgstab_kernels.cpp b/test/solver/batch_bicgstab_kernels.cpp
index f916f4cd11a..0afc746aef4 100644
--- a/test/solver/batch_bicgstab_kernels.cpp
+++ b/test/solver/batch_bicgstab_kernels.cpp
@@ -195,7 +195,7 @@ TEST_F(BatchBicgstab, CanSolve3ptStencilSystem)
 
 TEST_F(BatchBicgstab, CanSolveLargeHpdSystem)
 {
-    const int num_batch_items = 4;
+    const int num_batch_items = 3;
     const int num_rows = 1025;
     const int num_rhs = 1;
     const real_type tol = 1e-5;

From fe95661b6e9dd8d391347745ec0fd5061b18a39a Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Wed, 25 Oct 2023 10:55:02 +0200
Subject: [PATCH 482/583] use new factory setup, move crit to base

---
 core/test/solver/batch_bicgstab.cpp           | 14 +--
 include/ginkgo/core/solver/batch_bicgstab.hpp | 42 +--------
 .../ginkgo/core/solver/batch_solver_base.hpp  | 94 ++++++++++++++++++-
 .../test/solver/batch_bicgstab_kernels.cpp    |  8 +-
 test/solver/batch_bicgstab_kernels.cpp        |  4 +-
 5 files changed, 111 insertions(+), 51 deletions(-)

diff --git a/core/test/solver/batch_bicgstab.cpp b/core/test/solver/batch_bicgstab.cpp
index 44d1ee5eacc..ccbb924f1bd 100644
--- a/core/test/solver/batch_bicgstab.cpp
+++ b/core/test/solver/batch_bicgstab.cpp
@@ -64,7 +64,7 @@ class BatchBicgstab : public ::testing::Test {
               this->exec->get_master(), nrows, nbatch)),
           solver_factory(Solver::build()
                              .with_default_max_iterations(def_max_iters)
-                             .with_default_residual_tol(def_abs_res_tol)
+                             .with_default_tolerance(def_abs_res_tol)
                              .with_tolerance_type(def_tol_type)
                              .on(exec)),
           solver(solver_factory->generate(mtx))
@@ -174,13 +174,13 @@ TYPED_TEST(BatchBicgstab, CanSetCriteriaInFactory)
     auto solver_factory =
         Solver::build()
             .with_default_max_iterations(22)
-            .with_default_residual_tol(static_cast<real_type>(0.25))
+            .with_default_tolerance(static_cast<real_type>(0.25))
             .with_tolerance_type(gko::batch::stop::ToleranceType::relative)
             .on(this->exec);
 
     auto solver = solver_factory->generate(this->mtx);
     ASSERT_EQ(solver->get_parameters().default_max_iterations, 22);
-    ASSERT_EQ(solver->get_parameters().default_residual_tol, 0.25);
+    ASSERT_EQ(solver->get_parameters().default_tolerance, 0.25);
     ASSERT_EQ(solver->get_parameters().tolerance_type,
               gko::batch::stop::ToleranceType::relative);
 }
@@ -193,7 +193,7 @@ TYPED_TEST(BatchBicgstab, CanSetResidualTol)
     auto solver_factory =
         Solver::build()
             .with_default_max_iterations(22)
-            .with_default_residual_tol(static_cast<real_type>(0.25))
+            .with_default_tolerance(static_cast<real_type>(0.25))
             .with_tolerance_type(gko::batch::stop::ToleranceType::relative)
             .on(this->exec);
     auto solver = solver_factory->generate(this->mtx);
@@ -201,7 +201,7 @@ TYPED_TEST(BatchBicgstab, CanSetResidualTol)
     solver->set_residual_tolerance(0.5);
 
     ASSERT_EQ(solver->get_parameters().default_max_iterations, 22);
-    ASSERT_EQ(solver->get_parameters().default_residual_tol, 0.25);
+    ASSERT_EQ(solver->get_parameters().default_tolerance, 0.25);
     ASSERT_EQ(solver->get_residual_tolerance(), 0.5);
 }
 
@@ -213,14 +213,14 @@ TYPED_TEST(BatchBicgstab, CanSetMaxIterations)
     auto solver_factory =
         Solver::build()
             .with_default_max_iterations(22)
-            .with_default_residual_tol(static_cast<real_type>(0.25))
+            .with_default_tolerance(static_cast<real_type>(0.25))
             .with_tolerance_type(gko::batch::stop::ToleranceType::relative)
             .on(this->exec);
     auto solver = solver_factory->generate(this->mtx);
 
     solver->set_max_iterations(10);
 
-    ASSERT_EQ(solver->get_parameters().default_residual_tol, 0.25);
+    ASSERT_EQ(solver->get_parameters().default_tolerance, 0.25);
     ASSERT_EQ(solver->get_parameters().default_max_iterations, 22);
     ASSERT_EQ(solver->get_max_iterations(), 10);
 }
diff --git a/include/ginkgo/core/solver/batch_bicgstab.hpp b/include/ginkgo/core/solver/batch_bicgstab.hpp
index a05e3754412..32a0154f602 100644
--- a/include/ginkgo/core/solver/batch_bicgstab.hpp
+++ b/include/ginkgo/core/solver/batch_bicgstab.hpp
@@ -79,43 +79,11 @@ class Bicgstab final
     using value_type = ValueType;
     using real_type = gko::remove_complex<ValueType>;
 
-    GKO_CREATE_FACTORY_PARAMETERS(parameters, Factory)
-    {
-        /**
-         * Preconditioner factory.
-         */
-        std::shared_ptr<const BatchLinOpFactory> GKO_FACTORY_PARAMETER_SCALAR(
-            preconditioner, nullptr);
-
-        /**
-         * Already generated preconditioner. If one is provided, the factory
-         * `preconditioner` will be ignored.
-         */
-        std::shared_ptr<const BatchLinOp> GKO_FACTORY_PARAMETER_SCALAR(
-            generated_preconditioner, nullptr);
-
-        /**
-         * Default maximum number iterations allowed.
-         *
-         * Generated solvers are initialized with this value for their maximum
-         * iterations.
-         */
-        int GKO_FACTORY_PARAMETER_SCALAR(default_max_iterations, 100);
-
-        /**
-         * Default residual tolerance.
-         *
-         * Generated solvers are initialized with this value for their residual
-         * tolerance.
-         */
-        real_type GKO_FACTORY_PARAMETER_SCALAR(default_residual_tol, 1e-11);
-
-        /**
-         * To specify which tolerance is to be considered.
-         */
-        ::gko::batch::stop::ToleranceType GKO_FACTORY_PARAMETER_SCALAR(
-            tolerance_type, ::gko::batch::stop::ToleranceType::absolute);
-    };
+    class Factory;
+
+    struct parameters_type
+        : enable_preconditioned_iterative_solver_factory_parameters<
+              parameters_type, Factory> {};
     GKO_ENABLE_BATCH_LIN_OP_FACTORY(Bicgstab, parameters, Factory);
     GKO_ENABLE_BUILD_METHOD(Factory);
 
diff --git a/include/ginkgo/core/solver/batch_solver_base.hpp b/include/ginkgo/core/solver/batch_solver_base.hpp
index 29526a961a9..c0d5935fa30 100644
--- a/include/ginkgo/core/solver/batch_solver_base.hpp
+++ b/include/ginkgo/core/solver/batch_solver_base.hpp
@@ -34,11 +34,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define GKO_PUBLIC_CORE_SOLVER_BATCH_SOLVER_HPP_
 
 
+#include <ginkgo/core/base/abstract_factory.hpp>
 #include <ginkgo/core/base/batch_lin_op.hpp>
 #include <ginkgo/core/base/batch_multi_vector.hpp>
 #include <ginkgo/core/base/utils_helper.hpp>
 #include <ginkgo/core/log/batch_logger.hpp>
 #include <ginkgo/core/matrix/batch_identity.hpp>
+#include <ginkgo/core/stop/batch_stop_enum.hpp>
 
 
 namespace gko {
@@ -143,7 +145,7 @@ template <typename ParamsType>
 common_batch_params extract_common_batch_params(ParamsType& params)
 {
     return {params.preconditioner, params.generated_preconditioner,
-            params.default_residual_tol, params.default_max_iterations};
+            params.default_tolerance, params.default_max_iterations};
 }
 
 
@@ -151,6 +153,96 @@ common_batch_params extract_common_batch_params(ParamsType& params)
 
 
 /**
+ * The parameter type shared between all preconditioned iterative solvers,
+ * excluding the parameters available in iterative_solver_factory_parameters.
+ * @see GKO_CREATE_FACTORY_PARAMETERS
+ */
+struct preconditioned_iterative_solver_factory_parameters {
+    /**
+     * The preconditioner to be used by the iterative solver. By default, no
+     * preconditioner is used.
+     */
+    std::shared_ptr<const BatchLinOpFactory> preconditioner{nullptr};
+
+    /**
+     * Already generated preconditioner. If one is provided, the factory
+     * `preconditioner` will be ignored.
+     */
+    std::shared_ptr<const BatchLinOp> generated_preconditioner{nullptr};
+};
+
+
+template <typename Parameters, typename Factory>
+struct enable_preconditioned_iterative_solver_factory_parameters
+    : enable_parameters_type<Parameters, Factory>,
+      preconditioned_iterative_solver_factory_parameters {
+    /**
+     * Default maximum number iterations allowed.
+     *
+     * Generated solvers are initialized with this value for their maximum
+     * iterations.
+     */
+    int GKO_FACTORY_PARAMETER_SCALAR(default_max_iterations, 100);
+
+    /**
+     * Default residual tolerance.
+     *
+     * Generated solvers are initialized with this value for their residual
+     * tolerance.
+     */
+    double GKO_FACTORY_PARAMETER_SCALAR(default_tolerance, 1e-11);
+
+    /**
+     * To specify which type of tolerance check is to be considered, absolute or
+     * relative (to the rhs l2 norm)
+     */
+    ::gko::batch::stop::ToleranceType GKO_FACTORY_PARAMETER_SCALAR(
+        tolerance_type, ::gko::batch::stop::ToleranceType::absolute);
+
+    /**
+     * Provides a preconditioner factory to be used by the iterative solver in a
+     * fluent interface.
+     * @see preconditioned_iterative_solver_factory_parameters::preconditioner
+     */
+    Parameters& with_preconditioner(
+        deferred_factory_parameter<BatchLinOpFactory> preconditioner)
+    {
+        this->preconditioner_generator = std::move(preconditioner);
+        this->deferred_factories["preconditioner"] = [](const auto& exec,
+                                                        auto& params) {
+            if (!params.preconditioner_generator.is_empty()) {
+                params.preconditioner =
+                    params.preconditioner_generator.on(exec);
+            }
+        };
+        return *self();
+    }
+
+    /**
+     * Provides a concrete preconditioner to be used by the iterative solver in
+     * a fluent interface.
+     * @see preconditioned_iterative_solver_factory_parameters::preconditioner
+     */
+    Parameters& with_generated_preconditioner(
+        std::shared_ptr<const BatchLinOp> generated_preconditioner)
+    {
+        this->generated_preconditioner = std::move(generated_preconditioner);
+        return *self();
+    }
+
+private:
+    GKO_ENABLE_SELF(Parameters);
+
+    deferred_factory_parameter<BatchLinOpFactory> preconditioner_generator;
+};
+
+
+/**
+ * This mixin provides apply and common iterative solver functionality to all
+ * the batched solvers.
+ *
+ * @tparam ConcreteSolver  The concrete solver class.
+ * @tparam ValueType  The value type of the multivectors.
  * @tparam PolymorphicBase  The base class; must be a subclass of BatchLinOp.
  */
 template <typename ConcreteSolver,
diff --git a/reference/test/solver/batch_bicgstab_kernels.cpp b/reference/test/solver/batch_bicgstab_kernels.cpp
index 902f55f3de3..839f3c6961d 100644
--- a/reference/test/solver/batch_bicgstab_kernels.cpp
+++ b/reference/test/solver/batch_bicgstab_kernels.cpp
@@ -199,7 +199,7 @@ TYPED_TEST(BatchBicgstab, CanSolveDenseSystem)
     auto solver_factory =
         Solver::build()
             .with_default_max_iterations(max_iters)
-            .with_default_residual_tol(tol)
+            .with_default_tolerance(tol)
             .with_tolerance_type(gko::batch::stop::ToleranceType::relative)
             .on(this->exec);
     const int num_rows = 13;
@@ -233,7 +233,7 @@ TYPED_TEST(BatchBicgstab, ApplyLogsResAndIters)
     auto solver_factory =
         Solver::build()
             .with_default_max_iterations(max_iters)
-            .with_default_residual_tol(tol)
+            .with_default_tolerance(tol)
             .with_tolerance_type(gko::batch::stop::ToleranceType::relative)
             .on(this->exec);
     const int num_rows = 13;
@@ -273,7 +273,7 @@ TYPED_TEST(BatchBicgstab, CanSolveEllSystem)
     auto solver_factory =
         Solver::build()
             .with_default_max_iterations(max_iters)
-            .with_default_residual_tol(tol)
+            .with_default_tolerance(tol)
             .with_tolerance_type(gko::batch::stop::ToleranceType::relative)
             .on(this->exec);
     const int num_rows = 13;
@@ -306,7 +306,7 @@ TYPED_TEST(BatchBicgstab, CanSolveDenseHpdSystem)
     auto solver_factory =
         Solver::build()
             .with_default_max_iterations(max_iters)
-            .with_default_residual_tol(tol)
+            .with_default_tolerance(tol)
             .with_tolerance_type(gko::batch::stop::ToleranceType::absolute)
             .on(this->exec);
     const int num_rows = 65;
diff --git a/test/solver/batch_bicgstab_kernels.cpp b/test/solver/batch_bicgstab_kernels.cpp
index 0afc746aef4..e29d20cad83 100644
--- a/test/solver/batch_bicgstab_kernels.cpp
+++ b/test/solver/batch_bicgstab_kernels.cpp
@@ -89,7 +89,7 @@ class BatchBicgstab : public CommonTestFixture {
         solver_factory =
             solver_type::build()
                 .with_default_max_iterations(max_iters)
-                .with_default_residual_tol(tol)
+                .with_default_tolerance(tol)
                 .with_tolerance_type(gko::batch::stop::ToleranceType::relative)
                 .on(exec);
         return gko::test::generate_3pt_stencil_batch_problem<MatrixType>(
@@ -204,7 +204,7 @@ TEST_F(BatchBicgstab, CanSolveLargeHpdSystem)
     auto solver_factory =
         solver_type::build()
             .with_default_max_iterations(max_iters)
-            .with_default_residual_tol(tol)
+            .with_default_tolerance(tol)
             .with_tolerance_type(gko::batch::stop::ToleranceType::absolute)
             .on(exec);
     std::shared_ptr<Logger> logger = Logger::create(exec);

From 6c416f488a169a548609db363bab9101f32effb5 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Wed, 25 Oct 2023 13:37:08 +0200
Subject: [PATCH 483/583] Add batch identity test and fix apply

---
 core/matrix/batch_identity.cpp                |  49 ++---
 core/test/matrix/CMakeLists.txt               |   1 +
 core/test/matrix/batch_identity.cpp           | 186 ++++++++++++++++++
 include/ginkgo/core/matrix/batch_identity.hpp |  25 ++-
 4 files changed, 226 insertions(+), 35 deletions(-)
 create mode 100644 core/test/matrix/batch_identity.cpp

diff --git a/core/matrix/batch_identity.cpp b/core/matrix/batch_identity.cpp
index b3a57b81f18..0e4a7b59838 100644
--- a/core/matrix/batch_identity.cpp
+++ b/core/matrix/batch_identity.cpp
@@ -55,9 +55,11 @@ namespace matrix {
 
 template <typename ValueType>
 Identity<ValueType>::Identity(std::shared_ptr<const Executor> exec,
-                                        const batch_dim<2>& size)
+                              const batch_dim<2>& size)
     : EnableBatchLinOp<Identity<ValueType>>(exec, size)
-{}
+{
+    GKO_ASSERT_BATCH_HAS_SQUARE_DIMENSIONS(this->get_size());
+}
 
 
 template <typename ValueType>
@@ -65,10 +67,7 @@ Identity<ValueType>* Identity<ValueType>::apply(
     ptr_param<const MultiVector<ValueType>> b,
     ptr_param<MultiVector<ValueType>> x)
 {
-    this->validate_application_parameters(b.get(), x.get());
-    auto exec = this->get_executor();
-    this->apply_impl(make_temporary_clone(exec, b).get(),
-                     make_temporary_clone(exec, x).get());
+    static_cast<const Identity*>(this)->apply(b, x);
     return this;
 }
 
@@ -78,7 +77,10 @@ const Identity<ValueType>* Identity<ValueType>::apply(
     ptr_param<const MultiVector<ValueType>> b,
     ptr_param<MultiVector<ValueType>> x) const
 {
-    this->apply(b, x);
+    this->validate_application_parameters(b.get(), x.get());
+    auto exec = this->get_executor();
+    this->apply_impl(make_temporary_clone(exec, b).get(),
+                     make_temporary_clone(exec, x).get());
     return this;
 }
 
@@ -90,13 +92,7 @@ Identity<ValueType>* Identity<ValueType>::apply(
     ptr_param<const MultiVector<ValueType>> beta,
     ptr_param<MultiVector<ValueType>> x)
 {
-    this->validate_application_parameters(alpha.get(), b.get(), beta.get(),
-                                          x.get());
-    auto exec = this->get_executor();
-    this->apply_impl(make_temporary_clone(exec, alpha).get(),
-                     make_temporary_clone(exec, b).get(),
-                     make_temporary_clone(exec, beta).get(),
-                     make_temporary_clone(exec, x).get());
+    static_cast<const Identity*>(this)->apply(alpha, b, beta, x);
     return this;
 }
 
@@ -108,28 +104,37 @@ const Identity<ValueType>* Identity<ValueType>::apply(
     ptr_param<const MultiVector<ValueType>> beta,
     ptr_param<MultiVector<ValueType>> x) const
 {
-    this->apply(alpha, b, beta, x);
+    this->validate_application_parameters(alpha.get(), b.get(), beta.get(),
+                                          x.get());
+    auto exec = this->get_executor();
+    this->apply_impl(make_temporary_clone(exec, alpha).get(),
+                     make_temporary_clone(exec, b).get(),
+                     make_temporary_clone(exec, beta).get(),
+                     make_temporary_clone(exec, x).get());
     return this;
 }
 
 
 template <typename ValueType>
 void Identity<ValueType>::apply_impl(const MultiVector<ValueType>* b,
-                                          MultiVector<ValueType>* x) const
+                                     MultiVector<ValueType>* x) const
 {
     x->copy_from(b);
 }
 
 
 template <typename ValueType>
-void Identity<ValueType>::apply_impl(
-    const MultiVector<ValueType>* alpha, const MultiVector<ValueType>* b,
-    const MultiVector<ValueType>* beta,
-    MultiVector<ValueType>* x) const GKO_NOT_IMPLEMENTED;
+void Identity<ValueType>::apply_impl(const MultiVector<ValueType>* alpha,
+                                     const MultiVector<ValueType>* b,
+                                     const MultiVector<ValueType>* beta,
+                                     MultiVector<ValueType>* x) const
+{
+    x->scale(beta);
+    x->add_scaled(alpha, b);
+}
 
 
-#define GKO_DECLARE_BATCH_IDENTITY_MATRIX(ValueType) \
-    class Identity<ValueType>
+#define GKO_DECLARE_BATCH_IDENTITY_MATRIX(ValueType) class Identity<ValueType>
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_IDENTITY_MATRIX);
 
 
diff --git a/core/test/matrix/CMakeLists.txt b/core/test/matrix/CMakeLists.txt
index ec7ef93e517..43d3b74ff0f 100644
--- a/core/test/matrix/CMakeLists.txt
+++ b/core/test/matrix/CMakeLists.txt
@@ -1,5 +1,6 @@
 ginkgo_create_test(batch_dense)
 ginkgo_create_test(batch_ell)
+ginkgo_create_test(batch_identity)
 ginkgo_create_test(coo)
 ginkgo_create_test(coo_builder)
 ginkgo_create_test(csr)
diff --git a/core/test/matrix/batch_identity.cpp b/core/test/matrix/batch_identity.cpp
new file mode 100644
index 00000000000..9a812fa35cd
--- /dev/null
+++ b/core/test/matrix/batch_identity.cpp
@@ -0,0 +1,186 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/matrix/batch_identity.hpp>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/range.hpp>
+#include <ginkgo/core/matrix/dense.hpp>
+
+
+#include "core/base/batch_utilities.hpp"
+#include "core/test/utils.hpp"
+#include "core/test/utils/batch_helpers.hpp"
+
+
+template <typename T>
+class Identity : public ::testing::Test {
+protected:
+    using value_type = T;
+    using MVec = gko::batch::MultiVector<value_type>;
+    using size_type = gko::size_type;
+    Identity()
+        : exec(gko::ReferenceExecutor::create()),
+          mtx(gko::batch::matrix::Identity<value_type>::create(
+              exec, gko::batch_dim<2>(2, gko::dim<2>(3, 3)))),
+          mvec(gko::batch::initialize<gko::batch::MultiVector<value_type>>(
+              {{{-1.0, 2.0, 3.0}, {-1.0, 8.0, 3.0}, {-1.5, 2.5, 3.5}},
+               {{-1.0, 3.0, 2.0}, {8.0, 5.5, 7.0}, {1.0, 2.0, 5.0}}},
+              exec))
+    {}
+
+
+    static void assert_equal_to_original_mtx(
+        gko::batch::matrix::Identity<value_type>* m)
+    {
+        ASSERT_EQ(m->get_num_batch_items(), 2);
+        ASSERT_EQ(m->get_common_size(), gko::dim<2>(3, 3));
+    }
+
+    static void assert_empty(gko::batch::matrix::Identity<value_type>* m)
+    {
+        ASSERT_EQ(m->get_num_batch_items(), 0);
+    }
+
+    std::shared_ptr<const gko::Executor> exec;
+    std::unique_ptr<gko::batch::matrix::Identity<value_type>> mtx;
+    std::unique_ptr<gko::batch::MultiVector<value_type>> mvec;
+};
+
+TYPED_TEST_SUITE(Identity, gko::test::ValueTypes);
+
+
+TYPED_TEST(Identity, KnowsItsSizeAndValues)
+{
+    this->assert_equal_to_original_mtx(this->mtx.get());
+}
+
+
+TYPED_TEST(Identity, CanBeEmpty)
+{
+    auto empty = gko::batch::matrix::Identity<TypeParam>::create(this->exec);
+    this->assert_empty(empty.get());
+}
+
+
+TYPED_TEST(Identity, CanBeCopied)
+{
+    auto mtx_copy = gko::batch::matrix::Identity<TypeParam>::create(this->exec);
+
+    mtx_copy->copy_from(this->mtx.get());
+
+    this->assert_equal_to_original_mtx(this->mtx.get());
+    this->assert_equal_to_original_mtx(mtx_copy.get());
+}
+
+
+TYPED_TEST(Identity, CanBeMoved)
+{
+    auto mtx_copy = gko::batch::matrix::Identity<TypeParam>::create(this->exec);
+
+    this->mtx->move_to(mtx_copy);
+
+    this->assert_equal_to_original_mtx(mtx_copy.get());
+}
+
+
+TYPED_TEST(Identity, CanBeCloned)
+{
+    auto mtx_clone = this->mtx->clone();
+
+    this->assert_equal_to_original_mtx(
+        dynamic_cast<decltype(this->mtx.get())>(mtx_clone.get()));
+}
+
+
+TYPED_TEST(Identity, CanBeCleared)
+{
+    this->mtx->clear();
+
+    this->assert_empty(this->mtx.get());
+}
+
+
+TYPED_TEST(Identity, CanBeConstructedWithSize)
+{
+    auto m = gko::batch::matrix::Identity<TypeParam>::create(
+        this->exec, gko::batch_dim<2>(4, gko::dim<2>{4, 4}));
+
+    ASSERT_EQ(m->get_num_batch_items(), 4);
+    ASSERT_EQ(m->get_common_size(), gko::dim<2>(4, 4));
+}
+
+
+TYPED_TEST(Identity, FailsToConstructForRectangularSizes)
+{
+    ASSERT_THROW(gko::batch::matrix::Identity<TypeParam>::create(
+                     this->exec, gko::batch_dim<2>(4, gko::dim<2>{3, 4})),
+                 gko::BadDimension);
+}
+
+
+TYPED_TEST(Identity, CanApplytoMultiVector)
+{
+    using MVec = typename TestFixture::MVec;
+    using value_type = typename TestFixture::value_type;
+    auto x = this->mvec->clone();
+    x->fill(gko::zero<value_type>());
+    ASSERT_EQ(x->at(0, 0, 0), value_type{0.0});
+
+    this->mtx->apply(this->mvec, x);
+
+    GKO_ASSERT_BATCH_MTX_NEAR(this->mvec, x, 0.0);
+}
+
+
+TYPED_TEST(Identity, CanAdvancedApplytoMultiVector)
+{
+    using MVec = typename TestFixture::MVec;
+    using value_type = typename TestFixture::value_type;
+    auto x = this->mvec->clone();
+    x->fill(gko::one<value_type>());
+    ASSERT_EQ(x->at(0, 0, 0), value_type{1.0});
+    auto alpha = gko::batch::initialize<MVec>({{1.0}, {-1.0}}, this->exec);
+    auto beta = gko::batch::initialize<MVec>({{2.0}, {-4.0}}, this->exec);
+    auto axpby = x->clone();
+    axpby->scale(beta);
+    axpby->add_scaled(alpha, this->mvec);
+
+    this->mtx->apply(alpha, this->mvec, beta, x);
+
+    GKO_ASSERT_BATCH_MTX_NEAR(axpby, x, 0.0);
+}
diff --git a/include/ginkgo/core/matrix/batch_identity.hpp b/include/ginkgo/core/matrix/batch_identity.hpp
index 846640a4a24..668fbcc1527 100644
--- a/include/ginkgo/core/matrix/batch_identity.hpp
+++ b/include/ginkgo/core/matrix/batch_identity.hpp
@@ -64,9 +64,8 @@ namespace matrix {
  * @ingroup BatchLinOp
  */
 template <typename ValueType = default_precision>
-class Identity final
-    : public EnableBatchLinOp<Identity<ValueType>>,
-      public EnableCreateMethod<Identity<ValueType>> {
+class Identity final : public EnableBatchLinOp<Identity<ValueType>>,
+                       public EnableCreateMethod<Identity<ValueType>> {
     friend class EnableCreateMethod<Identity>;
     friend class EnablePolymorphicObject<Identity, BatchLinOp>;
 
@@ -88,7 +87,7 @@ class Identity final
      * @param x  the output multi-vector
      */
     Identity* apply(ptr_param<const MultiVector<value_type>> b,
-                         ptr_param<MultiVector<value_type>> x);
+                    ptr_param<MultiVector<value_type>> x);
 
     /**
      * Apply the matrix to a multi-vector with a linear combination of the given
@@ -101,15 +100,15 @@ class Identity final
      * @param x      the output multi-vector
      */
     Identity* apply(ptr_param<const MultiVector<value_type>> alpha,
-                         ptr_param<const MultiVector<value_type>> b,
-                         ptr_param<const MultiVector<value_type>> beta,
-                         ptr_param<MultiVector<value_type>> x);
+                    ptr_param<const MultiVector<value_type>> b,
+                    ptr_param<const MultiVector<value_type>> beta,
+                    ptr_param<MultiVector<value_type>> x);
 
     /**
      * @copydoc apply(const MultiVector<value_type>*, MultiVector<value_type>*)
      */
     const Identity* apply(ptr_param<const MultiVector<value_type>> b,
-                               ptr_param<MultiVector<value_type>> x) const;
+                          ptr_param<MultiVector<value_type>> x) const;
 
     /**
      * @copydoc apply(const MultiVector<value_type>*, const
@@ -117,19 +116,19 @@ class Identity final
      * MultiVector<value_type>*)
      */
     const Identity* apply(ptr_param<const MultiVector<value_type>> alpha,
-                               ptr_param<const MultiVector<value_type>> b,
-                               ptr_param<const MultiVector<value_type>> beta,
-                               ptr_param<MultiVector<value_type>> x) const;
+                          ptr_param<const MultiVector<value_type>> b,
+                          ptr_param<const MultiVector<value_type>> beta,
+                          ptr_param<MultiVector<value_type>> x) const;
 
 private:
     /**
-     * Creates an uninitialized Identity matrix of the specified size.
+     * Creates an Identity matrix of the specified size.
      *
      * @param exec  Executor associated to the matrix
      * @param size  size of the batch matrices in a batch_dim object
      */
     Identity(std::shared_ptr<const Executor> exec,
-                  const batch_dim<2>& size = batch_dim<2>{});
+             const batch_dim<2>& size = batch_dim<2>{});
 
     void apply_impl(const MultiVector<value_type>* b,
                     MultiVector<value_type>* x) const;

From c2eecfc1492b156fc7619fae5aedb06f320d2218 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Wed, 25 Oct 2023 13:37:35 +0200
Subject: [PATCH 484/583] Review updates

Co-authored-by: Yu-Hsiang Tsai <yhmtsai@gmail.com>
Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 core/base/batch_struct.hpp                    |   2 +-
 core/log/logger.cpp                           |   2 +
 core/matrix/batch_struct.hpp                  |   6 +-
 core/solver/batch_bicgstab.cpp                |   2 +-
 core/solver/batch_bicgstab_kernels.hpp        |   4 +-
 core/solver/batch_dispatch.hpp                |  84 ++++---
 core/test/solver/batch_bicgstab.cpp           |  60 ++---
 core/test/utils/batch_helpers.hpp             | 236 +++++++++---------
 cuda/solver/batch_bicgstab_kernels.cu         |   2 +-
 dpcpp/solver/batch_bicgstab_kernels.dp.cpp    |   2 +-
 hip/solver/batch_bicgstab_kernels.hip.cpp     |   2 +-
 include/ginkgo/core/log/batch_logger.hpp      |  38 ++-
 include/ginkgo/core/log/logger.hpp            |  16 +-
 include/ginkgo/core/matrix/batch_identity.hpp |  15 +-
 include/ginkgo/core/solver/batch_bicgstab.hpp |  12 +-
 .../ginkgo/core/solver/batch_solver_base.hpp  |  88 ++++---
 include/ginkgo/core/stop/batch_stop_enum.hpp  |  14 +-
 omp/solver/batch_bicgstab_kernels.cpp         |   9 +-
 reference/log/batch_logger.hpp                |   8 +-
 reference/solver/batch_bicgstab_kernels.cpp   |   2 +-
 .../solver/batch_bicgstab_kernels.hpp.inc     |  29 +--
 .../test/solver/batch_bicgstab_kernels.cpp    |  95 +++----
 test/solver/batch_bicgstab_kernels.cpp        |  50 ++--
 23 files changed, 399 insertions(+), 379 deletions(-)

diff --git a/core/base/batch_struct.hpp b/core/base/batch_struct.hpp
index 71445550b87..d7be0837534 100644
--- a/core/base/batch_struct.hpp
+++ b/core/base/batch_struct.hpp
@@ -71,7 +71,7 @@ struct uniform_batch {
     int32 num_rows;
     int32 num_rhs;
 
-    size_type get_entry_storage() const
+    size_type get_storage_size() const
     {
         return num_rows * stride * sizeof(value_type);
     }
diff --git a/core/log/logger.cpp b/core/log/logger.cpp
index 4b21bfe9b74..3cccb66d34c 100644
--- a/core/log/logger.cpp
+++ b/core/log/logger.cpp
@@ -75,6 +75,8 @@ constexpr Logger::mask_type Logger::linop_factory_generate_completed_mask;
 constexpr Logger::mask_type Logger::criterion_check_started_mask;
 constexpr Logger::mask_type Logger::criterion_check_completed_mask;
 
+constexpr Logger::mask_type Logger::batch_solver_completed_mask;
+
 constexpr Logger::mask_type Logger::iteration_complete_mask;
 
 
diff --git a/core/matrix/batch_struct.hpp b/core/matrix/batch_struct.hpp
index 575c511d051..2e668757b99 100644
--- a/core/matrix/batch_struct.hpp
+++ b/core/matrix/batch_struct.hpp
@@ -56,7 +56,6 @@ struct batch_item {
     int32 stride;
     int32 num_rows;
     int32 num_cols;
-    int32 num_nnz = num_rows * stride;
 };
 
 
@@ -73,14 +72,13 @@ struct uniform_batch {
     int32 stride;
     int32 num_rows;
     int32 num_cols;
-    int32 num_nnz = num_rows * stride;
 
     inline size_type get_num_nnz() const
     {
         return static_cast<size_type>(stride * num_rows);
     }
 
-    inline size_type get_entry_storage() const
+    inline size_type get_storage_size() const
     {
         return get_num_nnz() * sizeof(value_type);
     }
@@ -132,7 +130,7 @@ struct uniform_batch {
         return static_cast<size_type>(stride * num_stored_elems_per_row);
     }
 
-    inline size_type get_entry_storage() const
+    inline size_type get_storage_size() const
     {
         return get_num_nnz() * sizeof(value_type);
     }
diff --git a/core/solver/batch_bicgstab.cpp b/core/solver/batch_bicgstab.cpp
index 41bc91d72dd..b5ff4e3958e 100644
--- a/core/solver/batch_bicgstab.cpp
+++ b/core/solver/batch_bicgstab.cpp
@@ -56,7 +56,7 @@ GKO_REGISTER_OPERATION(apply, batch_bicgstab::apply);
 template <typename ValueType>
 void Bicgstab<ValueType>::solver_apply(
     const MultiVector<ValueType>* b, MultiVector<ValueType>* x,
-    log::BatchLogData<remove_complex<ValueType>>* log_data) const
+    log::detail::log_data<remove_complex<ValueType>>* log_data) const
 {
     using MVec = MultiVector<ValueType>;
     const kernels::batch_bicgstab::BicgstabSettings<remove_complex<ValueType>>
diff --git a/core/solver/batch_bicgstab_kernels.hpp b/core/solver/batch_bicgstab_kernels.hpp
index 1c7b955c03f..3271e937e84 100644
--- a/core/solver/batch_bicgstab_kernels.hpp
+++ b/core/solver/batch_bicgstab_kernels.hpp
@@ -56,7 +56,7 @@ template <typename RealType>
 struct BicgstabSettings {
     int max_iterations;
     RealType residual_tol;
-    ::gko::batch::stop::ToleranceType tol_type;
+    ::gko::batch::stop::tolerance_type tol_type;
 };
 
 
@@ -100,7 +100,7 @@ inline int local_memory_requirement(const int num_rows, const int num_rhs)
             remove_complex<_type>>& options,                                 \
         const batch::BatchLinOp* a, const batch::BatchLinOp* preconditioner, \
         const batch::MultiVector<_type>* b, batch::MultiVector<_type>* x,    \
-        gko::batch::log::BatchLogData<remove_complex<_type>>& logdata)
+        gko::batch::log::detail::log_data<remove_complex<_type>>& logdata)
 
 
 #define GKO_DECLARE_ALL_AS_TEMPLATES \
diff --git a/core/solver/batch_dispatch.hpp b/core/solver/batch_dispatch.hpp
index 449f54a7cba..4fa5f8fe90a 100644
--- a/core/solver/batch_dispatch.hpp
+++ b/core/solver/batch_dispatch.hpp
@@ -163,7 +163,7 @@ namespace solver {
 
 
 template <typename DValueType>
-class DummyKernelCaller {
+class KernelCallerInterface {
 public:
     template <typename BatchMatrixType, typename PrecType, typename StopType,
               typename LogType>
@@ -174,30 +174,42 @@ class DummyKernelCaller {
 };
 
 
+namespace log {
+namespace detail {
+/**
+ *
+ * Types of batch loggers available.
+ */
+enum class log_type { simple_convergence_completion };
+
+
+}  // namespace detail
+}  // namespace log
+
+
 /**
  * Handles dispatching to the correct instantiation of a batched solver
  * depending on runtime parameters.
  *
- * @tparam KernelCaller  Class with an interface like DummyKernelCaller,
+ * @tparam ValueType  The user-facing value type.
+ * @tparam KernelCaller  Class with an interface like KernelCallerInterface,
  *   that is responsible for finally calling the templated backend-specific
  *   kernel.
  * @tparam SettingsType  Structure type of options for the particular solver to
  * be used.
- * @tparam ValueType  The user-facing value type.
  */
-template <typename KernelCaller, typename SettingsType, typename ValueType>
+template <typename ValueType, typename KernelCaller, typename SettingsType>
 class BatchSolverDispatch {
 public:
     using value_type = ValueType;
     using device_value_type = DeviceValueType<ValueType>;
     using real_type = remove_complex<value_type>;
 
-    BatchSolverDispatch(const KernelCaller& kernel_caller,
-                        const SettingsType& settings,
-                        const BatchLinOp* const matrix,
-                        const BatchLinOp* const preconditioner,
-                        const log::BatchLogType logger_type =
-                            log::BatchLogType::simple_convergence_completion)
+    BatchSolverDispatch(
+        const KernelCaller& kernel_caller, const SettingsType& settings,
+        const BatchLinOp* const matrix, const BatchLinOp* const preconditioner,
+        const log::detail::log_type logger_type =
+            log::detail::log_type::simple_convergence_completion)
         : caller_{kernel_caller},
           settings_{settings},
           mat_{matrix},
@@ -212,12 +224,12 @@ class BatchSolverDispatch {
         const multi_vector::uniform_batch<const device_value_type>& b_item,
         const multi_vector::uniform_batch<device_value_type>& x_item)
     {
-        if (settings_.tol_type == stop::ToleranceType::absolute) {
+        if (settings_.tol_type == stop::tolerance_type::absolute) {
             caller_.template call_kernel<
                 BatchMatrixType, PrecType,
                 device::batch_stop::SimpleAbsResidual<device_value_type>,
                 LogType>(logger, mat_item, precond, b_item, x_item);
-        } else if (settings_.tol_type == stop::ToleranceType::relative) {
+        } else if (settings_.tol_type == stop::tolerance_type::relative) {
             caller_.template call_kernel<
                 BatchMatrixType, PrecType,
                 device::batch_stop::SimpleRelResidual<device_value_type>,
@@ -250,9 +262,10 @@ class BatchSolverDispatch {
         const BatchMatrixType& amat,
         const multi_vector::uniform_batch<const device_value_type>& b_item,
         const multi_vector::uniform_batch<device_value_type>& x_item,
-        log::BatchLogData<real_type>& log_data)
+        batch::log::detail::log_data<real_type>& log_data)
     {
-        if (logger_type_ == log::BatchLogType::simple_convergence_completion) {
+        if (logger_type_ ==
+            log::detail::log_type::simple_convergence_completion) {
             device::batch_log::SimpleFinalLogger<real_type> logger(
                 log_data.res_norms.get_data(), log_data.iter_counts.get_data());
             dispatch_on_preconditioner(logger, amat, b_item, x_item);
@@ -261,19 +274,11 @@ class BatchSolverDispatch {
         }
     }
 
-    /**
-     * Solves a linear system from the given data and kernel caller.
-     *
-     * @note The correct backend-specific get_batch_struct function needs to be
-     * available in the current scope.
-     */
-    void apply(const MultiVector<ValueType>* const b,
-               MultiVector<ValueType>* const x,
-               log::BatchLogData<real_type>& log_data)
+    void dispatch_on_matrix(
+        const multi_vector::uniform_batch<const device_value_type>& b_item,
+        const multi_vector::uniform_batch<device_value_type>& x_item,
+        batch::log::detail::log_data<real_type>& log_data)
     {
-        const auto x_item = device::get_batch_struct(x);
-        const auto b_item = device::get_batch_struct(b);
-
         if (auto batch_mat =
                 dynamic_cast<const batch::matrix::Ell<ValueType, int32>*>(
                     mat_)) {
@@ -289,12 +294,28 @@ class BatchSolverDispatch {
         }
     }
 
+    /**
+     * Solves a linear system from the given data and kernel caller.
+     *
+     * @note The correct backend-specific get_batch_struct function needs to be
+     * available in the current scope.
+     */
+    void apply(const MultiVector<ValueType>* const b,
+               MultiVector<ValueType>* const x,
+               batch::log::detail::log_data<real_type>& log_data)
+    {
+        const auto x_item = device::get_batch_struct(x);
+        const auto b_item = device::get_batch_struct(b);
+
+        dispatch_on_matrix(b_item, x_item, log_data);
+    }
+
 private:
     const KernelCaller caller_;
     const SettingsType settings_;
     const BatchLinOp* mat_;
     const BatchLinOp* precond_;
-    const log::BatchLogType logger_type_;
+    const log::detail::log_type logger_type_;
 };
 
 
@@ -302,13 +323,13 @@ class BatchSolverDispatch {
  * Convenient function to create a dispatcher. Infers most template arguments.
  */
 template <typename ValueType, typename KernelCaller, typename SettingsType>
-BatchSolverDispatch<KernelCaller, SettingsType, ValueType> create_dispatcher(
+BatchSolverDispatch<ValueType, KernelCaller, SettingsType> create_dispatcher(
     const KernelCaller& kernel_caller, const SettingsType& settings,
     const BatchLinOp* const matrix, const BatchLinOp* const preconditioner,
-    const log::BatchLogType logger_type =
-        log::BatchLogType::simple_convergence_completion)
+    const log::detail::log_type logger_type =
+        log::detail::log_type::simple_convergence_completion)
 {
-    return BatchSolverDispatch<KernelCaller, SettingsType, ValueType>(
+    return BatchSolverDispatch<ValueType, KernelCaller, SettingsType>(
         kernel_caller, settings, matrix, preconditioner, logger_type);
 }
 
@@ -317,4 +338,5 @@ BatchSolverDispatch<KernelCaller, SettingsType, ValueType> create_dispatcher(
 }  // namespace batch
 }  // namespace gko
 
+
 #endif  // GKO_CORE_SOLVER_BATCH_DISPATCH_HPP_
diff --git a/core/test/solver/batch_bicgstab.cpp b/core/test/solver/batch_bicgstab.cpp
index ccbb924f1bd..ecc6ae32d1d 100644
--- a/core/test/solver/batch_bicgstab.cpp
+++ b/core/test/solver/batch_bicgstab.cpp
@@ -60,8 +60,8 @@ class BatchBicgstab : public ::testing::Test {
 
     BatchBicgstab()
         : exec(gko::ReferenceExecutor::create()),
-          mtx(gko::test::generate_3pt_stencil_batch_matrix<Mtx>(
-              this->exec->get_master(), nrows, nbatch)),
+          mtx(gko::share(gko::test::generate_3pt_stencil_batch_matrix<Mtx>(
+              this->exec->get_master(), num_batch_items, num_rows))),
           solver_factory(Solver::build()
                              .with_default_max_iterations(def_max_iters)
                              .with_default_tolerance(def_abs_res_tol)
@@ -71,14 +71,14 @@ class BatchBicgstab : public ::testing::Test {
     {}
 
     std::shared_ptr<const gko::Executor> exec;
-    const gko::size_type nbatch = 3;
-    const int nrows = 5;
-    std::shared_ptr<Mtx> mtx;
+    const gko::size_type num_batch_items = 3;
+    const int num_rows = 5;
+    std::shared_ptr<const Mtx> mtx;
     std::unique_ptr<typename Solver::Factory> solver_factory;
     const int def_max_iters = 100;
     const real_type def_abs_res_tol = 1e-11;
-    const gko::batch::stop::ToleranceType def_tol_type =
-        gko::batch::stop::ToleranceType::absolute;
+    const gko::batch::stop::tolerance_type def_tol_type =
+        gko::batch::stop::tolerance_type::absolute;
     std::unique_ptr<gko::batch::BatchLinOp> solver;
 };
 
@@ -94,12 +94,10 @@ TYPED_TEST(BatchBicgstab, FactoryKnowsItsExecutor)
 TYPED_TEST(BatchBicgstab, FactoryCreatesCorrectSolver)
 {
     using Solver = typename TestFixture::Solver;
-    for (size_t i = 0; i < this->nbatch; i++) {
-        ASSERT_EQ(this->solver->get_common_size(),
-                  gko::dim<2>(this->nrows, this->nrows));
-    }
+    ASSERT_EQ(this->solver->get_common_size(),
+              gko::dim<2>(this->num_rows, this->num_rows));
 
-    auto solver = static_cast<Solver*>(this->solver.get());
+    auto solver = gko::as<Solver>(this->solver.get());
 
     ASSERT_NE(solver->get_system_matrix(), nullptr);
     ASSERT_EQ(solver->get_system_matrix(), this->mtx);
@@ -114,10 +112,11 @@ TYPED_TEST(BatchBicgstab, CanBeCopied)
 
     copy->copy_from(this->solver.get());
 
-    ASSERT_EQ(copy->get_common_size(), gko::dim<2>(this->nrows, this->nrows));
-    ASSERT_EQ(copy->get_num_batch_items(), this->nbatch);
-    auto copy_mtx = static_cast<Solver*>(copy.get())->get_system_matrix();
-    const auto copy_batch_mtx = static_cast<const Mtx*>(copy_mtx.get());
+    ASSERT_EQ(copy->get_common_size(),
+              gko::dim<2>(this->num_rows, this->num_rows));
+    ASSERT_EQ(copy->get_num_batch_items(), this->num_batch_items);
+    auto copy_mtx = gko::as<Solver>(copy.get())->get_system_matrix();
+    const auto copy_batch_mtx = gko::as<const Mtx>(copy_mtx.get());
     GKO_ASSERT_BATCH_MTX_NEAR(this->mtx.get(), copy_batch_mtx, 0.0);
 }
 
@@ -130,10 +129,11 @@ TYPED_TEST(BatchBicgstab, CanBeMoved)
 
     copy->move_from(this->solver);
 
-    ASSERT_EQ(copy->get_common_size(), gko::dim<2>(this->nrows, this->nrows));
-    ASSERT_EQ(copy->get_num_batch_items(), this->nbatch);
-    auto copy_mtx = static_cast<Solver*>(copy.get())->get_system_matrix();
-    const auto copy_batch_mtx = static_cast<const Mtx*>(copy_mtx.get());
+    ASSERT_EQ(copy->get_common_size(),
+              gko::dim<2>(this->num_rows, this->num_rows));
+    ASSERT_EQ(copy->get_num_batch_items(), this->num_batch_items);
+    auto copy_mtx = gko::as<Solver>(copy.get())->get_system_matrix();
+    const auto copy_batch_mtx = gko::as<const Mtx>(copy_mtx.get());
     GKO_ASSERT_BATCH_MTX_NEAR(this->mtx.get(), copy_batch_mtx, 0.0);
 }
 
@@ -145,10 +145,11 @@ TYPED_TEST(BatchBicgstab, CanBeCloned)
 
     auto clone = this->solver->clone();
 
-    ASSERT_EQ(clone->get_common_size(), gko::dim<2>(this->nrows, this->nrows));
-    ASSERT_EQ(clone->get_num_batch_items(), this->nbatch);
-    auto clone_mtx = static_cast<Solver*>(clone.get())->get_system_matrix();
-    const auto clone_batch_mtx = static_cast<const Mtx*>(clone_mtx.get());
+    ASSERT_EQ(clone->get_common_size(),
+              gko::dim<2>(this->num_rows, this->num_rows));
+    ASSERT_EQ(clone->get_num_batch_items(), this->num_batch_items);
+    auto clone_mtx = gko::as<Solver>(clone.get())->get_system_matrix();
+    const auto clone_batch_mtx = gko::as<const Mtx>(clone_mtx.get());
     GKO_ASSERT_BATCH_MTX_NEAR(this->mtx.get(), clone_batch_mtx, 0.0);
 }
 
@@ -160,8 +161,7 @@ TYPED_TEST(BatchBicgstab, CanBeCleared)
     this->solver->clear();
 
     ASSERT_EQ(this->solver->get_num_batch_items(), 0);
-    auto solver_mtx =
-        static_cast<Solver*>(this->solver.get())->get_system_matrix();
+    auto solver_mtx = gko::as<Solver>(this->solver.get())->get_system_matrix();
     ASSERT_EQ(solver_mtx, nullptr);
 }
 
@@ -175,14 +175,14 @@ TYPED_TEST(BatchBicgstab, CanSetCriteriaInFactory)
         Solver::build()
             .with_default_max_iterations(22)
             .with_default_tolerance(static_cast<real_type>(0.25))
-            .with_tolerance_type(gko::batch::stop::ToleranceType::relative)
+            .with_tolerance_type(gko::batch::stop::tolerance_type::relative)
             .on(this->exec);
 
     auto solver = solver_factory->generate(this->mtx);
     ASSERT_EQ(solver->get_parameters().default_max_iterations, 22);
     ASSERT_EQ(solver->get_parameters().default_tolerance, 0.25);
     ASSERT_EQ(solver->get_parameters().tolerance_type,
-              gko::batch::stop::ToleranceType::relative);
+              gko::batch::stop::tolerance_type::relative);
 }
 
 
@@ -194,7 +194,7 @@ TYPED_TEST(BatchBicgstab, CanSetResidualTol)
         Solver::build()
             .with_default_max_iterations(22)
             .with_default_tolerance(static_cast<real_type>(0.25))
-            .with_tolerance_type(gko::batch::stop::ToleranceType::relative)
+            .with_tolerance_type(gko::batch::stop::tolerance_type::relative)
             .on(this->exec);
     auto solver = solver_factory->generate(this->mtx);
 
@@ -214,7 +214,7 @@ TYPED_TEST(BatchBicgstab, CanSetMaxIterations)
         Solver::build()
             .with_default_max_iterations(22)
             .with_default_tolerance(static_cast<real_type>(0.25))
-            .with_tolerance_type(gko::batch::stop::ToleranceType::relative)
+            .with_tolerance_type(gko::batch::stop::tolerance_type::relative)
             .on(this->exec);
     auto solver = solver_factory->generate(this->mtx);
 
diff --git a/core/test/utils/batch_helpers.hpp b/core/test/utils/batch_helpers.hpp
index 7a874677c86..60cbbdcd118 100644
--- a/core/test/utils/batch_helpers.hpp
+++ b/core/test/utils/batch_helpers.hpp
@@ -110,7 +110,7 @@ std::unique_ptr<MatrixType> generate_random_batch_matrix(
 
 
 /**
- * Generate a batch of 1D Poisson (3pt stencil, {-1, 2, -1}) matrices in the
+ * Generate a batch of 1D Poisson (3pt stencil, {-1, 5, -1}) matrices in the
  * given input matrix format.
  *
  * @tparam MatrixType  The concrete type of the output matrix.
@@ -121,29 +121,102 @@ std::unique_ptr<MatrixType> generate_random_batch_matrix(
  * @param args The create args to be forwarded to the matrix
  */
 template <typename MatrixType, typename... MatrixArgs>
-std::unique_ptr<MatrixType> generate_3pt_stencil_batch_matrix(
-    std::shared_ptr<const Executor> exec, const int num_rows,
-    const size_type num_batch_items, MatrixArgs&&... args)
+std::unique_ptr<const MatrixType> generate_3pt_stencil_batch_matrix(
+    std::shared_ptr<const Executor> exec, const size_type num_batch_items,
+    const int num_rows, MatrixArgs&&... args)
+{
+    using value_type = typename MatrixType::value_type;
+    using index_type = typename MatrixType::index_type;
+    const int num_cols = num_rows;
+    gko::matrix_data<value_type, index_type> data{
+        gko::dim<2>{static_cast<size_type>(num_rows),
+                    static_cast<size_type>(num_cols)},
+        {}};
+    for (int row = 0; row < num_rows; ++row) {
+        if (row == 0) {
+            data.nonzeros.emplace_back(0, 0, value_type{5.0});
+            data.nonzeros.emplace_back(0, 1, value_type{-1.0});
+        } else if (row == num_rows - 1) {
+            data.nonzeros.emplace_back(num_rows - 1, num_rows - 1,
+                                       value_type{5.0});
+            data.nonzeros.emplace_back(num_rows - 1, num_rows - 2,
+                                       value_type{-1.0});
+        } else {
+            data.nonzeros.emplace_back(row, row + 1, value_type{-1.0});
+            data.nonzeros.emplace_back(row - 1, row, value_type{-1.0});
+            data.nonzeros.emplace_back(row, row, value_type{5.0});
+        }
+    }
+
+    std::vector<gko::matrix_data<value_type, index_type>> batch_data(
+        num_batch_items, data);
+    return gko::batch::read<value_type, index_type, MatrixType>(
+        exec, batch_data, std::forward<MatrixArgs>(args)...);
+}
+
+
+template <typename MatrixType, typename... MatrixArgs>
+std::unique_ptr<const MatrixType> generate_diag_dominant_batch_matrix(
+    std::shared_ptr<const gko::Executor> exec, const size_type num_batch_items,
+    const int num_rows, const bool is_hermitian, MatrixArgs&&... args)
 {
     using value_type = typename MatrixType::value_type;
     using index_type = typename MatrixType::index_type;
+    using real_type = remove_complex<value_type>;
+    using unbatch_type = typename MatrixType::unbatch_type;
+    using multi_vec = batch::MultiVector<value_type>;
+    using real_vec = batch::MultiVector<real_type>;
     const int num_cols = num_rows;
     gko::matrix_data<value_type, index_type> data{
         gko::dim<2>{static_cast<size_type>(num_rows),
                     static_cast<size_type>(num_cols)},
         {}};
+    auto engine = std::default_random_engine(42);
+    auto rand_diag_dist = std::normal_distribution<real_type>(4.0, 12.0);
     for (int row = 1; row < num_rows - 1; ++row) {
+        std::uniform_int_distribution<index_type> rand_nnz_dist{1, row + 1};
+        const auto k = rand_nnz_dist(engine);
+        data.nonzeros.emplace_back(row, k, value_type{-1.0});
         data.nonzeros.emplace_back(row, row + 1, value_type{-1.0});
         data.nonzeros.emplace_back(row - 1, row, value_type{-1.0});
-        data.nonzeros.emplace_back(row, row, value_type{3.0});
+        data.nonzeros.emplace_back(
+            row, row,
+            static_cast<value_type>(
+                detail::get_rand_value<real_type>(rand_diag_dist, engine)));
     }
-    data.nonzeros.emplace_back(0, 0, value_type{3.0});
-    data.nonzeros.emplace_back(num_rows - 1, num_rows - 1, value_type{3.0});
+    data.nonzeros.emplace_back(0, 0, value_type{2.0});
+    data.nonzeros.emplace_back(num_rows - 1, num_rows - 1, value_type{2.0});
     data.nonzeros.emplace_back(num_rows - 1, num_rows - 2, value_type{-1.0});
     data.nonzeros.emplace_back(0, 1, value_type{-1.0});
 
-    std::vector<gko::matrix_data<value_type, index_type>> batch_data(
-        num_batch_items, data);
+    if (is_hermitian) {
+        gko::utils::make_hpd(data);
+    }
+    data.ensure_row_major_order();
+
+    auto soa_data =
+        gko::device_matrix_data<value_type, index_type>::create_from_host(
+            exec->get_master(), data);
+    auto row_idxs = gko::array<index_type>::const_view(
+                        exec->get_master(), soa_data.get_num_elems(),
+                        soa_data.get_const_row_idxs())
+                        .copy_to_array();
+    auto col_idxs = gko::array<index_type>::const_view(
+                        exec->get_master(), soa_data.get_num_elems(),
+                        soa_data.get_const_col_idxs())
+                        .copy_to_array();
+
+    std::vector<gko::matrix_data<value_type, index_type>> batch_data;
+    batch_data.reserve(num_batch_items);
+    batch_data.emplace_back(data);
+    auto rand_val_dist = std::normal_distribution<>(-0.5, 0.5);
+    for (size_type b = 1; b < num_batch_items; b++) {
+        auto rand_data = fill_random_matrix_data<value_type, index_type>(
+            num_rows, num_cols, row_idxs, col_idxs, rand_val_dist, engine);
+        gko::utils::make_diag_dominant(rand_data);
+        batch_data.emplace_back(rand_data);
+        GKO_ASSERT(rand_data.size == batch_data.at(0).size);
+    }
     return gko::batch::read<value_type, index_type, MatrixType>(
         exec, batch_data, std::forward<MatrixArgs>(args)...);
 }
@@ -155,29 +228,32 @@ struct LinearSystem {
     using multi_vec = batch::MultiVector<value_type>;
     using real_vec = batch::MultiVector<remove_complex<value_type>>;
 
-    std::shared_ptr<MatrixType> matrix;
+    std::shared_ptr<const MatrixType> matrix;
     std::shared_ptr<multi_vec> rhs;
     std::shared_ptr<real_vec> rhs_norm;
     std::shared_ptr<multi_vec> exact_sol;
 };
 
 
-template <typename MatrixType, typename... MatrixArgs>
-LinearSystem<MatrixType> generate_3pt_stencil_batch_problem(
-    std::shared_ptr<const Executor> exec, const size_type num_batch_items,
-    const int num_rows, const int num_rhs, MatrixArgs&&... args)
+template <typename MatrixType>
+LinearSystem<MatrixType> generate_batch_linear_system(
+    std::shared_ptr<const MatrixType> input_batch_matrix, const int num_rhs)
 {
-    using ValueType = typename MatrixType::value_type;
-    using multi_vec = batch::MultiVector<ValueType>;
-    using real_vec = batch::MultiVector<remove_complex<ValueType>>;
+    using value_type = typename MatrixType::value_type;
+    using index_type = typename MatrixType::index_type;
+    using multi_vec = batch::MultiVector<value_type>;
+    using real_vec = batch::MultiVector<remove_complex<value_type>>;
     LinearSystem<MatrixType> sys;
-    sys.matrix = gko::test::generate_3pt_stencil_batch_matrix<MatrixType>(
-        exec, num_rows, num_batch_items, std::forward<MatrixArgs>(args)...);
+    sys.matrix = input_batch_matrix;
+    const auto num_batch_items = sys.matrix->get_num_batch_items();
+    const auto num_rows = sys.matrix->get_common_size()[0];
+    auto exec = sys.matrix->get_executor();
     sys.exact_sol = multi_vec::create(
-        exec,
-        gko::batch_dim<2>(num_batch_items, gko::dim<2>(num_rows, num_rhs)));
-    sys.exact_sol->fill(ValueType{2.0});
+        exec, batch_dim<2>(num_batch_items, gko::dim<2>(num_rows, num_rhs)));
+    sys.exact_sol->fill(value_type{2.0});
+
     sys.rhs = multi_vec::create_with_config_of(sys.exact_sol);
+    // A * x^{*} = b
     sys.matrix->apply(sys.exact_sol, sys.rhs);
     const gko::batch_dim<2> norm_dim(num_batch_items, gko::dim<2>(1, num_rhs));
     sys.rhs_norm = real_vec::create(exec, norm_dim);
@@ -199,7 +275,6 @@ compute_residual_norms(
     using real_vec = batch::MultiVector<remove_complex<value_type>>;
     auto exec = mtx->get_executor();
     auto num_batch_items = x->get_num_batch_items();
-    auto num_rows = x->get_common_size()[0];
     auto num_rhs = x->get_common_size()[1];
     const gko::batch_dim<2> norm_dim(num_batch_items, gko::dim<2>(1, num_rhs));
 
@@ -221,7 +296,13 @@ struct Result {
 
     std::shared_ptr<multi_vec> x;
     std::shared_ptr<real_vec> res_norm;
-    std::unique_ptr<gko::batch::log::BatchLogData<remove_complex<ValueType>>>
+};
+
+
+template <typename ValueType>
+struct ResultWithLogData : public Result<ValueType> {
+    std::unique_ptr<
+        gko::batch::log::detail::log_data<remove_complex<ValueType>>>
         log_data;
 };
 
@@ -255,9 +336,9 @@ Result<typename MatrixType::value_type> solve_linear_system(
 }
 
 
-template <typename MatrixType, typename SolveFunction, typename Settings>
-Result<typename MatrixType::value_type> solve_linear_system(
-    std::shared_ptr<const Executor> exec, SolveFunction solve_function,
+template <typename MatrixType, typename SolveLambda, typename Settings>
+ResultWithLogData<typename MatrixType::value_type> solve_linear_system(
+    std::shared_ptr<const Executor> exec, SolveLambda solve_lambda,
     const Settings settings, const LinearSystem<MatrixType>& sys,
     std::shared_ptr<batch::BatchLinOpFactory> precond_factory = nullptr)
 {
@@ -269,16 +350,13 @@ Result<typename MatrixType::value_type> solve_linear_system(
     const size_type num_batch_items = sys.matrix->get_num_batch_items();
     const int num_rows = sys.matrix->get_common_size()[0];
     const int num_rhs = sys.rhs->get_common_size()[1];
-    const gko::batch_dim<2> vec_size(num_batch_items,
-                                     gko::dim<2>(num_rows, num_rhs));
     const gko::batch_dim<2> norm_size(num_batch_items, gko::dim<2>(1, num_rhs));
 
-    Result<value_type> result;
-    // Initialize r to the original unscaled b
+    ResultWithLogData<value_type> result;
     result.x = multi_vec::create_with_config_of(sys.rhs);
     result.x->fill(zero<value_type>());
 
-    auto log_data = std::make_unique<batch::log::BatchLogData<real_type>>(
+    auto log_data = std::make_unique<batch::log::detail::log_data<real_type>>(
         exec, num_batch_items);
 
     std::unique_ptr<gko::batch::BatchLinOp> precond;
@@ -288,10 +366,10 @@ Result<typename MatrixType::value_type> solve_linear_system(
         precond = nullptr;
     }
 
-    solve_function(settings, precond.get(), sys.matrix.get(), sys.rhs.get(),
-                   result.x.get(), *log_data.get());
+    solve_lambda(settings, precond.get(), sys.matrix.get(), sys.rhs.get(),
+                 result.x.get(), *log_data.get());
 
-    result.log_data = std::make_unique<batch::log::BatchLogData<real_type>>(
+    result.log_data = std::make_unique<batch::log::detail::log_data<real_type>>(
         exec->get_master());
     result.log_data->iter_counts = log_data->iter_counts;
     result.log_data->res_norms = log_data->res_norms;
@@ -303,94 +381,6 @@ Result<typename MatrixType::value_type> solve_linear_system(
 }
 
 
-template <typename MatrixType, typename... MatrixArgs>
-LinearSystem<MatrixType> generate_diag_dominant_batch_problem(
-    std::shared_ptr<const gko::Executor> exec, const size_type num_batch_items,
-    const int num_rows, const int num_rhs, const bool is_hermitian,
-    MatrixArgs&&... args)
-{
-    using value_type = typename MatrixType::value_type;
-    using index_type = typename MatrixType::index_type;
-    using real_type = remove_complex<value_type>;
-    using unbatch_type = typename MatrixType::unbatch_type;
-    using multi_vec = batch::MultiVector<value_type>;
-    using real_vec = batch::MultiVector<real_type>;
-    const int num_cols = num_rows;
-    gko::matrix_data<value_type, index_type> data{
-        gko::dim<2>{static_cast<size_type>(num_rows),
-                    static_cast<size_type>(num_cols)},
-        {}};
-    auto engine = std::default_random_engine(42);
-    auto rand_diag_dist = std::normal_distribution<real_type>(4.0, 12.0);
-    for (int row = 1; row < num_rows - 1; ++row) {
-        std::uniform_int_distribution<index_type> rand_nnz_dist{1, row + 1};
-        const auto k = rand_nnz_dist(engine);
-        data.nonzeros.emplace_back(row, k, value_type{-1.0});
-        data.nonzeros.emplace_back(row, row + 1, value_type{-1.0});
-        data.nonzeros.emplace_back(row - 1, row, value_type{-1.0});
-        data.nonzeros.emplace_back(
-            row, row,
-            static_cast<value_type>(
-                detail::get_rand_value<real_type>(rand_diag_dist, engine)));
-    }
-    data.nonzeros.emplace_back(0, 0, value_type{2.0});
-    data.nonzeros.emplace_back(num_rows - 1, num_rows - 1, value_type{2.0});
-    data.nonzeros.emplace_back(num_rows - 1, num_rows - 2, value_type{-1.0});
-    data.nonzeros.emplace_back(0, 1, value_type{-1.0});
-
-    if (is_hermitian) {
-        gko::utils::make_hpd(data);
-    }
-    data.ensure_row_major_order();
-
-    auto soa_data =
-        gko::device_matrix_data<value_type, index_type>::create_from_host(
-            exec->get_master(), data);
-    auto row_idxs = gko::array<index_type>::const_view(
-                        exec->get_master(), soa_data.get_num_elems(),
-                        soa_data.get_const_row_idxs())
-                        .copy_to_array();
-    auto col_idxs = gko::array<index_type>::const_view(
-                        exec->get_master(), soa_data.get_num_elems(),
-                        soa_data.get_const_col_idxs())
-                        .copy_to_array();
-
-    std::vector<gko::matrix_data<value_type, index_type>> batch_data;
-    batch_data.reserve(num_batch_items);
-    batch_data.emplace_back(data);
-    auto rand_val_dist = std::normal_distribution<>(-0.5, 0.5);
-    for (size_type b = 1; b < num_batch_items; b++) {
-        auto rand_data = fill_random_matrix_data<value_type, index_type>(
-            num_rows, num_cols, row_idxs, col_idxs, rand_val_dist, engine);
-        gko::utils::make_diag_dominant(rand_data);
-        batch_data.emplace_back(rand_data);
-        GKO_ASSERT(rand_data.size == batch_data.at(0).size);
-    }
-
-    LinearSystem<MatrixType> sys;
-    sys.matrix = gko::batch::read<value_type, index_type, MatrixType>(
-        exec, batch_data, std::forward<MatrixArgs>(args)...);
-
-    std::vector<gko::matrix_data<value_type, index_type>> batch_sol_data;
-    batch_sol_data.reserve(num_batch_items);
-    for (size_type b = 0; b < num_batch_items; b++) {
-        auto rand_data = generate_random_matrix_data<value_type, index_type>(
-            num_rows, num_rhs,
-            std::uniform_int_distribution<index_type>(num_rhs, num_rhs),
-            rand_val_dist, engine);
-        batch_sol_data.emplace_back(rand_data);
-    }
-    sys.exact_sol = gko::batch::read<value_type, index_type, multi_vec>(
-        exec, batch_sol_data);
-    sys.rhs = multi_vec::create_with_config_of(sys.exact_sol);
-    sys.matrix->apply(sys.exact_sol, sys.rhs);
-    const gko::batch_dim<2> norm_dim(num_batch_items, gko::dim<2>(1, num_rhs));
-    sys.rhs_norm = real_vec::create(exec, norm_dim);
-    sys.rhs->compute_norm2(sys.rhs_norm.get());
-    return sys;
-}
-
-
 }  // namespace test
 }  // namespace gko
 
diff --git a/cuda/solver/batch_bicgstab_kernels.cu b/cuda/solver/batch_bicgstab_kernels.cu
index fa00bb208af..041fa4386b2 100644
--- a/cuda/solver/batch_bicgstab_kernels.cu
+++ b/cuda/solver/batch_bicgstab_kernels.cu
@@ -67,7 +67,7 @@ void apply(std::shared_ptr<const DefaultExecutor> exec,
            const batch::BatchLinOp* const precon,
            const batch::MultiVector<ValueType>* const b,
            batch::MultiVector<ValueType>* const x,
-           batch::log::BatchLogData<remove_complex<ValueType>>& logdata)
+           batch::log::detail::log_data<remove_complex<ValueType>>& logdata)
     GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_BICGSTAB_APPLY_KERNEL);
diff --git a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
index 710c7a78c07..0496375f085 100644
--- a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
+++ b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
@@ -64,7 +64,7 @@ void apply(std::shared_ptr<const DefaultExecutor> exec,
            const batch::BatchLinOp* const precon,
            const batch::MultiVector<ValueType>* const b,
            batch::MultiVector<ValueType>* const x,
-           batch::log::BatchLogData<remove_complex<ValueType>>& logdata)
+           batch::log::detail::log_data<remove_complex<ValueType>>& logdata)
     GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_BICGSTAB_APPLY_KERNEL);
diff --git a/hip/solver/batch_bicgstab_kernels.hip.cpp b/hip/solver/batch_bicgstab_kernels.hip.cpp
index 7a52149e21d..3e37b07123b 100644
--- a/hip/solver/batch_bicgstab_kernels.hip.cpp
+++ b/hip/solver/batch_bicgstab_kernels.hip.cpp
@@ -68,7 +68,7 @@ void apply(std::shared_ptr<const DefaultExecutor> exec,
            const batch::BatchLinOp* const precon,
            const batch::MultiVector<ValueType>* const b,
            batch::MultiVector<ValueType>* const x,
-           batch::log::BatchLogData<remove_complex<ValueType>>& logdata)
+           batch::log::detail::log_data<remove_complex<ValueType>>& logdata)
     GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_BICGSTAB_APPLY_KERNEL);
diff --git a/include/ginkgo/core/log/batch_logger.hpp b/include/ginkgo/core/log/batch_logger.hpp
index 122467893fd..f51e0400e0f 100644
--- a/include/ginkgo/core/log/batch_logger.hpp
+++ b/include/ginkgo/core/log/batch_logger.hpp
@@ -50,12 +50,7 @@ namespace batch {
  * @ingroup log
  */
 namespace log {
-
-
-/**
- * Types of batch loggers available.
- */
-enum class BatchLogType { simple_convergence_completion };
+namespace detail {
 
 
 /**
@@ -64,12 +59,11 @@ enum class BatchLogType { simple_convergence_completion };
  * @note Supports only single rhs
  */
 template <typename ValueType>
-struct BatchLogData {
+struct log_data final {
     using real_type = remove_complex<ValueType>;
 
-    BatchLogData(std::shared_ptr<const Executor> exec,
-                 size_type num_batch_items = 0,
-                 array<unsigned char> workspace = {})
+    log_data(std::shared_ptr<const Executor> exec,
+             size_type num_batch_items = 0, array<unsigned char> workspace = {})
         : res_norms(exec), iter_counts(exec)
     {
         const size_type workspace_size =
@@ -103,6 +97,9 @@ struct BatchLogData {
 };
 
 
+}  // namespace detail
+
+
 /**
  * Logs the final residuals and iteration counts for a batch solver.
  *
@@ -127,6 +124,8 @@ class BatchConvergence : public gko::log::Logger {
     /**
      * Creates a convergence logger. This dynamically allocates the memory,
      * constructs the object and returns an std::unique_ptr to this object.
+     * TODO: See if the objects can be pre-allocated beforehand instead of being
+     * copied in the `on_<>` event
      *
      * @param exec  the executor
      * @param enabled_events  the events enabled for this logger. By default all
@@ -135,11 +134,11 @@ class BatchConvergence : public gko::log::Logger {
      * @return an std::unique_ptr to the the constructed object
      */
     static std::unique_ptr<BatchConvergence> create(
-        std::shared_ptr<const Executor> exec,
-        const mask_type& enabled_events = gko::log::Logger::all_events_mask)
+        const mask_type& enabled_events =
+            gko::log::Logger::batch_solver_completed_mask)
     {
         return std::unique_ptr<BatchConvergence>(
-            new BatchConvergence(exec, enabled_events));
+            new BatchConvergence(enabled_events));
     }
 
     /**
@@ -159,17 +158,14 @@ class BatchConvergence : public gko::log::Logger {
     }
 
 protected:
-    explicit BatchConvergence(
-        std::shared_ptr<const Executor> exec,
-        const mask_type& enabled_events = gko::log::Logger::all_events_mask)
-        : gko::log::Logger(enabled_events),
-          iteration_count_(exec),
-          residual_norm_(exec)
+    explicit BatchConvergence(const mask_type& enabled_events =
+                                  gko::log::Logger::batch_solver_completed_mask)
+        : gko::log::Logger(enabled_events)
     {}
 
 private:
-    mutable array<int> iteration_count_;
-    mutable array<real_type> residual_norm_;
+    mutable array<int> iteration_count_{};
+    mutable array<real_type> residual_norm_{};
 };
 
 
diff --git a/include/ginkgo/core/log/logger.hpp b/include/ginkgo/core/log/logger.hpp
index c16e7efbf0d..5f6d0739012 100644
--- a/include/ginkgo/core/log/logger.hpp
+++ b/include/ginkgo/core/log/logger.hpp
@@ -609,16 +609,30 @@ public:                                                              \
     std::enable_if_t<Event == 26 && (26 < event_count_max)> on(
         Params&&... params) const
     {
-        if (enabled_events_ & (mask_type{1} << 26)) {
+        if (enabled_events_ & batch_solver_completed_mask) {
             this->on_batch_solver_completed(std::forward<Params>(params)...);
         }
     }
 
 protected:
+    /**
+     * Batch solver's event that records the iteration count and the residual
+     * norm.
+     *
+     * @param iters  the array of iteration counts.
+     * @param residual_norms  the array storing the residual norms.
+     */
     virtual void on_batch_solver_completed(
         const array<int>& iters, const array<double>& residual_norms) const
     {}
 
+    /**
+     * Batch solver's event that records the iteration count and the residual
+     * norm.
+     *
+     * @param iters  the array of iteration counts.
+     * @param residual_norms  the array storing the residual norms.
+     */
     virtual void on_batch_solver_completed(
         const array<int>& iters, const array<float>& residual_norms) const
     {}
diff --git a/include/ginkgo/core/matrix/batch_identity.hpp b/include/ginkgo/core/matrix/batch_identity.hpp
index 668fbcc1527..15b7623ac0f 100644
--- a/include/ginkgo/core/matrix/batch_identity.hpp
+++ b/include/ginkgo/core/matrix/batch_identity.hpp
@@ -48,18 +48,11 @@ namespace matrix {
 
 
 /**
- * Identity is a batch matrix format which explicitly stores all values of
- * the matrix in each of the batches.
- *
- * The values in each of the batches are stored in row-major format (values
- * belonging to the same row appear consecutive in the memory). Optionally, rows
- * can be padded for better memory access.
+ * The batch Identity matrix, which represents a batch of Identity matrices.
  *
  * @tparam ValueType  precision of matrix elements
  *
- * @note While this format is not very useful for storing sparse matrices, it
- *       is often suitable to store vectors, and sets of vectors.
- * @ingroup batch_dense
+ * @ingroup batch_identity
  * @ingroup mat_formats
  * @ingroup BatchLinOp
  */
@@ -81,7 +74,7 @@ class Identity final : public EnableBatchLinOp<Identity<ValueType>>,
 
     /**
      * Apply the matrix to a multi-vector. Represents the matrix vector
-     * multiplication, x = A * b, where x and b are both multi-vectors.
+     * multiplication, x = I * b, where x and b are both multi-vectors.
      *
      * @param b  the multi-vector to be applied to
      * @param x  the output multi-vector
@@ -91,7 +84,7 @@ class Identity final : public EnableBatchLinOp<Identity<ValueType>>,
 
     /**
      * Apply the matrix to a multi-vector with a linear combination of the given
-     * input vector. Represents the matrix vector multiplication, x = alpha * A
+     * input vector. Represents the matrix vector multiplication, x = alpha * I
      * * b + beta * x, where x and b are both multi-vectors.
      *
      * @param alpha  the scalar to scale the matrix-vector product with
diff --git a/include/ginkgo/core/solver/batch_bicgstab.hpp b/include/ginkgo/core/solver/batch_bicgstab.hpp
index 32a0154f602..1c4542c118a 100644
--- a/include/ginkgo/core/solver/batch_bicgstab.hpp
+++ b/include/ginkgo/core/solver/batch_bicgstab.hpp
@@ -94,15 +94,15 @@ class Bicgstab final
 
     explicit Bicgstab(const Factory* factory,
                       std::shared_ptr<const BatchLinOp> system_matrix)
-        : EnableBatchSolver<Bicgstab>(
-              factory->get_executor(), std::move(system_matrix),
-              detail::extract_common_batch_params(factory->get_parameters())),
+        : EnableBatchSolver<Bicgstab>(factory->get_executor(),
+                                      std::move(system_matrix),
+                                      factory->get_parameters()),
           parameters_{factory->get_parameters()}
     {}
 
-    void solver_apply(const MultiVector<ValueType>* b,
-                      MultiVector<ValueType>* x,
-                      log::BatchLogData<real_type>* log_data) const override;
+    void solver_apply(
+        const MultiVector<ValueType>* b, MultiVector<ValueType>* x,
+        log::detail::log_data<real_type>* log_data) const override;
 };
 
 
diff --git a/include/ginkgo/core/solver/batch_solver_base.hpp b/include/ginkgo/core/solver/batch_solver_base.hpp
index c0d5935fa30..ea003fdc28d 100644
--- a/include/ginkgo/core/solver/batch_solver_base.hpp
+++ b/include/ginkgo/core/solver/batch_solver_base.hpp
@@ -89,7 +89,13 @@ class BatchSolver {
      * @param res_tol  The residual tolerance to be used for subsequent
      *                 invocations of the solver.
      */
-    void set_residual_tolerance(double res_tol) { residual_tol_ = res_tol; }
+    void set_residual_tolerance(double res_tol)
+    {
+        if (res_tol < 0) {
+            GKO_INVALID_STATE("Tolerance cannot be negative!");
+        }
+        residual_tol_ = res_tol;
+    }
 
     /**
      * Get the maximum number of iterations set on the solver.
@@ -106,19 +112,48 @@ class BatchSolver {
      */
     void set_max_iterations(int max_iterations)
     {
+        if (max_iterations < 0) {
+            GKO_INVALID_STATE("Max iterations cannot be negative!");
+        }
         max_iterations_ = max_iterations;
     }
 
+    /**
+     * Get the tolerance type.
+     *
+     * @return  The tolerance type.
+     */
+    ::gko::batch::stop::tolerance_type get_tolerance_type() const
+    {
+        return tol_type_;
+    }
+
+    /**
+     * Set the type of tolerance check to use inside the solver
+     *
+     * @param tol_type  The tolerance type.
+     */
+    void set_tolerance_type(::gko::batch::stop::tolerance_type tol_type)
+    {
+        if (tol_type != ::gko::batch::stop::tolerance_type::absolute ||
+            tol_type != ::gko::batch::stop::tolerance_type::relative) {
+            GKO_INVALID_STATE("Invalid tolerance type specified!");
+        }
+        tol_type_ = tol_type;
+    }
+
 protected:
     BatchSolver() {}
 
     BatchSolver(std::shared_ptr<const BatchLinOp> system_matrix,
                 std::shared_ptr<const BatchLinOp> gen_preconditioner,
-                const double res_tol, const int max_iterations)
+                const double res_tol, const int max_iterations,
+                const ::gko::batch::stop::tolerance_type tol_type)
         : system_matrix_{std::move(system_matrix)},
           preconditioner_{std::move(gen_preconditioner)},
           residual_tol_{res_tol},
           max_iterations_{max_iterations},
+          tol_type_{tol_type},
           workspace_{}
     {}
 
@@ -126,32 +161,11 @@ class BatchSolver {
     std::shared_ptr<const BatchLinOp> preconditioner_{};
     double residual_tol_{};
     int max_iterations_{};
+    ::gko::batch::stop::tolerance_type tol_type_{};
     mutable array<unsigned char> workspace_{};
 };
 
 
-namespace detail {
-
-
-struct common_batch_params {
-    std::shared_ptr<const BatchLinOpFactory> prec_factory;
-    std::shared_ptr<const BatchLinOp> generated_prec;
-    double residual_tolerance;
-    int max_iterations;
-};
-
-
-template <typename ParamsType>
-common_batch_params extract_common_batch_params(ParamsType& params)
-{
-    return {params.preconditioner, params.generated_preconditioner,
-            params.default_tolerance, params.default_max_iterations};
-}
-
-
-}  // namespace detail
-
-
 /**
  * The parameter type shared between all preconditioned iterative solvers,
  * excluding the parameters available in iterative_solver_factory_parameters.
@@ -196,8 +210,8 @@ struct enable_preconditioned_iterative_solver_factory_parameters
      * To specify which type of tolerance check is to be considered, absolute or
      * relative (to the rhs l2 norm)
      */
-    ::gko::batch::stop::ToleranceType GKO_FACTORY_PARAMETER_SCALAR(
-        tolerance_type, ::gko::batch::stop::ToleranceType::absolute);
+    ::gko::batch::stop::tolerance_type GKO_FACTORY_PARAMETER_SCALAR(
+        tolerance_type, ::gko::batch::stop::tolerance_type::absolute);
 
     /**
      * Provides a preconditioner factory to be used by the iterative solver in a
@@ -301,11 +315,12 @@ class EnableBatchSolver
         : EnableBatchLinOp<ConcreteSolver, PolymorphicBase>(std::move(exec))
     {}
 
+    template <typename FactoryParameters>
     explicit EnableBatchSolver(std::shared_ptr<const Executor> exec,
                                std::shared_ptr<const BatchLinOp> system_matrix,
-                               detail::common_batch_params common_params)
-        : BatchSolver(system_matrix, nullptr, common_params.residual_tolerance,
-                      common_params.max_iterations),
+                               const FactoryParameters& params)
+        : BatchSolver(system_matrix, nullptr, params.default_tolerance,
+                      params.default_max_iterations, params.tolerance_type),
           EnableBatchLinOp<ConcreteSolver, PolymorphicBase>(
               exec, gko::transpose(system_matrix->get_size()))
     {
@@ -315,13 +330,12 @@ class EnableBatchSolver
         using Identity = matrix::Identity<value_type>;
         using real_type = remove_complex<value_type>;
 
-        if (common_params.generated_prec) {
-            GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(common_params.generated_prec,
+        if (params.generated_preconditioner) {
+            GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(params.generated_preconditioner,
                                               this);
-            preconditioner_ = std::move(common_params.generated_prec);
-        } else if (common_params.prec_factory) {
-            preconditioner_ =
-                common_params.prec_factory->generate(system_matrix_);
+            preconditioner_ = std::move(params.generated_preconditioner);
+        } else if (params.preconditioner) {
+            preconditioner_ = params.preconditioner->generate(system_matrix_);
         } else {
             auto id = Identity::create(exec, system_matrix->get_size());
             preconditioner_ = std::move(id);
@@ -341,7 +355,7 @@ class EnableBatchSolver
         if (b->get_common_size()[1] > 1) {
             GKO_NOT_IMPLEMENTED;
         }
-        auto log_data_ = std::make_unique<log::BatchLogData<real_type>>(
+        auto log_data_ = std::make_unique<log::detail::log_data<real_type>>(
             exec, b->get_num_batch_items(), workspace_);
 
         this->solver_apply(b, x, log_data_.get());
@@ -363,7 +377,7 @@ class EnableBatchSolver
 
     virtual void solver_apply(const MultiVector<ValueType>* b,
                               MultiVector<ValueType>* x,
-                              log::BatchLogData<real_type>* info) const = 0;
+                              log::detail::log_data<real_type>* info) const = 0;
 };
 
 
diff --git a/include/ginkgo/core/stop/batch_stop_enum.hpp b/include/ginkgo/core/stop/batch_stop_enum.hpp
index d960e384d24..6da484f8185 100644
--- a/include/ginkgo/core/stop/batch_stop_enum.hpp
+++ b/include/ginkgo/core/stop/batch_stop_enum.hpp
@@ -39,7 +39,19 @@ namespace batch {
 namespace stop {
 
 
-enum class ToleranceType { absolute, relative };
+/**
+ * This enum provides two types of options for the convergence of an iterative
+ * solver.
+ *
+ * `absolute` tolerance implies that the convergence criteria check is
+ * against the computed residual ($||r|| <= \tau$, where $||r||$ may be implicit
+ * or explicit depending on the solver).
+ *
+ * With the `relative` tolerance type, the solver
+ * convergence criteria checks against the relative residual norm
+ * ($\frac{||r||}{||b||} < \tau$, where $||b||$$ is the L2 norm of the rhs).
+ */
+enum class tolerance_type { absolute, relative };
 
 
 }  // namespace stop
diff --git a/omp/solver/batch_bicgstab_kernels.cpp b/omp/solver/batch_bicgstab_kernels.cpp
index 207ae042a4c..0564bfd57b6 100644
--- a/omp/solver/batch_bicgstab_kernels.cpp
+++ b/omp/solver/batch_bicgstab_kernels.cpp
@@ -100,12 +100,11 @@ class KernelCaller {
             // TODO: Align to cache line boundary
             // TODO: Allocate and free once per thread rather than once per
             // work-item.
-            const auto local_space =
-                static_cast<unsigned char*>(malloc(local_size_bytes));
+            auto local_space = array<unsigned char>(exec_, local_size_bytes);
             batch_entry_bicgstab_impl<StopType, PrecondType, LogType,
                                       BatchMatrixType, ValueType>(
-                settings_, logger, precond, mat, b, x, batch_id, local_space);
-            free(local_space);
+                settings_, logger, precond, mat, b, x, batch_id,
+                local_space.get_data());
         }
     }
 
@@ -122,7 +121,7 @@ void apply(std::shared_ptr<const DefaultExecutor> exec,
            const batch::BatchLinOp* const precond,
            const batch::MultiVector<ValueType>* const b,
            batch::MultiVector<ValueType>* const x,
-           batch::log::BatchLogData<remove_complex<ValueType>>& logdata)
+           batch::log::detail::log_data<remove_complex<ValueType>>& logdata)
 {
     auto dispatcher = batch::solver::create_dispatcher<ValueType>(
         KernelCaller<ValueType>(exec, settings), settings, mat, precond);
diff --git a/reference/log/batch_logger.hpp b/reference/log/batch_logger.hpp
index e9dadb56ddc..0b1be52e1f4 100644
--- a/reference/log/batch_logger.hpp
+++ b/reference/log/batch_logger.hpp
@@ -44,7 +44,7 @@ namespace batch_log {
 
 
 /**
- * Logs the final residual and iteration count for a batch solver.
+ * Logs the final residual norm and iteration count for a batch solver.
  *
  * @note Supports only a single RHS per batch item.
  */
@@ -66,11 +66,11 @@ class SimpleFinalLogger final {
     {}
 
     /**
-     * Logs the iteration count and residual norm.
+     * Logs the final iteration count and the final residual norm.
      *
      * @param batch_idx  The index of linear system in the batch to log.
-     * @param iter  The current iteration count (0-based).
-     * @param res_norm  Norm of current residual
+     * @param iter  The final iteration count (0-based).
+     * @param res_norm  Norm of final residual norm
      */
     void log_iteration(const size_type batch_idx, const int iter,
                        const real_type res_norm)
diff --git a/reference/solver/batch_bicgstab_kernels.cpp b/reference/solver/batch_bicgstab_kernels.cpp
index 5b5d80794ad..31db3c6b941 100644
--- a/reference/solver/batch_bicgstab_kernels.cpp
+++ b/reference/solver/batch_bicgstab_kernels.cpp
@@ -119,7 +119,7 @@ void apply(std::shared_ptr<const DefaultExecutor> exec,
            const batch::BatchLinOp* const precon,
            const batch::MultiVector<ValueType>* const b,
            batch::MultiVector<ValueType>* const x,
-           batch::log::BatchLogData<remove_complex<ValueType>>& log_data)
+           batch::log::detail::log_data<remove_complex<ValueType>>& log_data)
 {
     auto dispatcher = batch::solver::create_dispatcher<ValueType>(
         KernelCaller<ValueType>(exec, settings), settings, mat, precon);
diff --git a/reference/solver/batch_bicgstab_kernels.hpp.inc b/reference/solver/batch_bicgstab_kernels.hpp.inc
index 0bf38890fe2..bf578c61311 100644
--- a/reference/solver/batch_bicgstab_kernels.hpp.inc
+++ b/reference/solver/batch_bicgstab_kernels.hpp.inc
@@ -48,12 +48,9 @@ inline void initialize(
     const gko::batch::multi_vector::batch_item<
         typename gko::remove_complex<ValueType>>& res_norms_entry)
 {
-    using real_type = gko::remove_complex<ValueType>;
-    for (int c = 0; c < rho_old_entry.num_rhs; c++) {
-        rho_old_entry.values[c] = one<ValueType>();
-        omega_entry.values[c] = one<ValueType>();
-        alpha_entry.values[c] = one<ValueType>();
-    }
+    rho_old_entry.values[0] = one<ValueType>();
+    omega_entry.values[0] = one<ValueType>();
+    alpha_entry.values[0] = one<ValueType>();
 
     // Compute norms of rhs
     compute_norm2_kernel<ValueType>(b_entry, rhs_norms_entry);
@@ -71,11 +68,9 @@ inline void initialize(
     copy_kernel(gko::batch::to_const(r_entry), r_hat_entry);
 
     for (int r = 0; r < p_entry.num_rows; r++) {
-        for (int c = 0; c < p_entry.num_rhs; c++) {
-            p_entry.values[r * p_entry.stride + c] = zero<ValueType>();
-            p_hat_entry.values[r * p_hat_entry.stride + c] = zero<ValueType>();
-            v_entry.values[r * v_entry.stride + c] = zero<ValueType>();
-        }
+        p_entry.values[r * p_entry.stride] = zero<ValueType>();
+        p_hat_entry.values[r * p_hat_entry.stride] = zero<ValueType>();
+        v_entry.values[r * v_entry.stride] = zero<ValueType>();
     }
 }
 
@@ -189,7 +184,6 @@ inline void batch_entry_bicgstab_impl(
     const gko::batch::multi_vector::uniform_batch<ValueType>& x,
     const size_type batch_item_id, unsigned char* const local_space)
 {
-    constexpr int max_num_rhs = 1;
     using real_type = typename gko::remove_complex<ValueType>;
     const auto num_rows = a.num_rows;
     const auto num_rhs = b.num_rhs;
@@ -274,6 +268,8 @@ inline void batch_entry_bicgstab_impl(
 
     for (iter = 0; iter < settings.max_iterations; iter++) {
         if (stop.check_converged(res_norms_entry.values)) {
+            logger.log_iteration(batch_item_id, iter,
+                                 res_norms_entry.values[0]);
             break;
         }
 
@@ -313,13 +309,12 @@ inline void batch_entry_bicgstab_impl(
                                         res_norms_entry);
 
         if (stop.check_converged(res_norms_entry.values)) {
-            // update x for the systems (rhs) which converge at this point...  x
-            // = x + alpha*p_hat
-            // note bits could change from 0 to 1, not the other way round, so
-            // we can use xor to get info about recent convergence...
-            // const uint32 converged_recent = converged_prev ^ converged;
+            // update x for the systems
+            // x = x + alpha * p_hat
             update_x_middle(gko::batch::to_const(alpha_entry),
                             gko::batch::to_const(p_hat_entry), x_entry);
+            logger.log_iteration(batch_item_id, iter,
+                                 res_norms_entry.values[0]);
             break;
         }
 
diff --git a/reference/test/solver/batch_bicgstab_kernels.cpp b/reference/test/solver/batch_bicgstab_kernels.cpp
index 839f3c6961d..2a7c98d28bc 100644
--- a/reference/test/solver/batch_bicgstab_kernels.cpp
+++ b/reference/test/solver/batch_bicgstab_kernels.cpp
@@ -64,13 +64,14 @@ class BatchBicgstab : public ::testing::Test {
     using MVec = gko::batch::MultiVector<value_type>;
     using RealMVec = gko::batch::MultiVector<real_type>;
     using Settings = gko::kernels::batch_bicgstab::BicgstabSettings<real_type>;
-    using LogData = gko::batch::log::BatchLogData<real_type>;
+    using LogData = gko::batch::log::detail::log_data<real_type>;
     using LinSys = gko::test::LinearSystem<Mtx>;
 
     BatchBicgstab()
         : exec(gko::ReferenceExecutor::create()),
-          linear_system(gko::test::generate_3pt_stencil_batch_problem<Mtx>(
-              exec, num_batch_items, num_rows, num_rhs))
+          mat(gko::share(gko::test::generate_3pt_stencil_batch_matrix<Mtx>(
+              exec, num_batch_items, num_rows))),
+          linear_system(gko::test::generate_batch_linear_system(mat, num_rhs))
     {
         auto executor = this->exec;
         solve_lambda = [executor](const Settings opts,
@@ -89,7 +90,8 @@ class BatchBicgstab : public ::testing::Test {
     const int num_rows = 3;
     const int num_rhs = 1;
     const Settings solver_settings{100, eps,
-                                   gko::batch::stop::ToleranceType::relative};
+                                   gko::batch::stop::tolerance_type::relative};
+    std::shared_ptr<const Mtx> mat;
     LinSys linear_system;
     std::function<void(const Settings, const gko::batch::BatchLinOp*,
                        const Mtx*, const MVec*, MVec*, LogData&)>
@@ -110,41 +112,8 @@ TYPED_TEST(BatchBicgstab, SolvesStencilSystem)
                       this->linear_system.rhs_norm->get_const_values()[i],
                   this->solver_settings.residual_tol);
     }
-    GKO_ASSERT_BATCH_MTX_NEAR(res.x, this->linear_system.exact_sol, this->eps);
-}
-
-
-TYPED_TEST(BatchBicgstab, SolvesEllStencilSystem)
-{
-    using Mtx = typename TestFixture::EllMtx;
-    using Settings = typename TestFixture::Settings;
-    using MVec = typename TestFixture::MVec;
-    using LogData = typename TestFixture::LogData;
-    const int num_rows = 13;
-    const size_t num_batch_items = 2;
-    const int num_rhs = 1;
-    auto lin_sys = gko::test::generate_3pt_stencil_batch_problem<Mtx>(
-        this->exec, num_batch_items, num_rows, num_rhs, 3);
-    auto executor = this->exec;
-    auto solve_lambda =
-        [executor](const Settings opts, const gko::batch::BatchLinOp* prec,
-                   const Mtx* mtx, const MVec* b, MVec* x, LogData& log_data) {
-            gko::kernels::reference::batch_bicgstab::apply<
-                typename Mtx::value_type>(executor, opts, mtx, prec, b, x,
-                                          log_data);
-        };
-
-
-    auto res = gko::test::solve_linear_system(this->exec, solve_lambda,
-                                              this->solver_settings, lin_sys);
-
-    auto tol = this->solver_settings.residual_tol * 10;
-    for (size_t i = 0; i < num_batch_items; i++) {
-        ASSERT_LE(res.res_norm->get_const_values()[i] /
-                      lin_sys.rhs_norm->get_const_values()[i],
-                  tol);
-    }
-    GKO_ASSERT_BATCH_MTX_NEAR(res.x, lin_sys.exact_sol, tol);
+    GKO_ASSERT_BATCH_MTX_NEAR(res.x, this->linear_system.exact_sol,
+                              this->eps * 10);
 }
 
 
@@ -176,12 +145,12 @@ TYPED_TEST(BatchBicgstab, StencilSystemLoggerLogsIterations)
     using real_type = gko::remove_complex<value_type>;
     const int ref_iters = 5;
     const Settings solver_settings{ref_iters, 0,
-                                   gko::batch::stop::ToleranceType::relative};
+                                   gko::batch::stop::tolerance_type::relative};
 
     auto res = gko::test::solve_linear_system(
         this->exec, this->solve_lambda, solver_settings, this->linear_system);
 
-    const int* const iter_array = res.log_data->iter_counts.get_const_data();
+    auto iter_array = res.log_data->iter_counts.get_const_data();
     for (size_t i = 0; i < this->num_batch_items; i++) {
         ASSERT_EQ(iter_array[i], ref_iters);
     }
@@ -200,13 +169,16 @@ TYPED_TEST(BatchBicgstab, CanSolveDenseSystem)
         Solver::build()
             .with_default_max_iterations(max_iters)
             .with_default_tolerance(tol)
-            .with_tolerance_type(gko::batch::stop::ToleranceType::relative)
+            .with_tolerance_type(gko::batch::stop::tolerance_type::relative)
             .on(this->exec);
     const int num_rows = 13;
     const size_t num_batch_items = 5;
     const int num_rhs = 1;
-    auto linear_system = gko::test::generate_3pt_stencil_batch_problem<Mtx>(
-        this->exec, num_batch_items, num_rows, num_rhs);
+    auto stencil_mat =
+        gko::share(gko::test::generate_3pt_stencil_batch_matrix<Mtx>(
+            this->exec, num_batch_items, num_rows));
+    auto linear_system =
+        gko::test::generate_batch_linear_system(stencil_mat, num_rhs);
     auto solver = gko::share(solver_factory->generate(linear_system.matrix));
 
     auto res =
@@ -234,14 +206,17 @@ TYPED_TEST(BatchBicgstab, ApplyLogsResAndIters)
         Solver::build()
             .with_default_max_iterations(max_iters)
             .with_default_tolerance(tol)
-            .with_tolerance_type(gko::batch::stop::ToleranceType::relative)
+            .with_tolerance_type(gko::batch::stop::tolerance_type::relative)
             .on(this->exec);
     const int num_rows = 13;
     const size_t num_batch_items = 5;
     const int num_rhs = 1;
-    std::shared_ptr<Logger> logger = Logger::create(this->exec);
-    auto linear_system = gko::test::generate_3pt_stencil_batch_problem<Mtx>(
-        this->exec, num_batch_items, num_rows, num_rhs);
+    std::shared_ptr<Logger> logger = Logger::create();
+    auto stencil_mat =
+        gko::share(gko::test::generate_3pt_stencil_batch_matrix<Mtx>(
+            this->exec, num_batch_items, num_rows));
+    auto linear_system =
+        gko::test::generate_batch_linear_system(stencil_mat, num_rhs);
     auto solver = gko::share(solver_factory->generate(linear_system.matrix));
 
     solver->add_logger(logger);
@@ -251,13 +226,13 @@ TYPED_TEST(BatchBicgstab, ApplyLogsResAndIters)
 
     auto iter_counts = logger->get_num_iterations();
     auto res_norm = logger->get_residual_norm();
-    GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol * 10);
+    GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol * 50);
     for (size_t i = 0; i < num_batch_items; i++) {
         auto rel_res_norm = res.res_norm->get_const_values()[i] /
                             linear_system.rhs_norm->get_const_values()[i];
         ASSERT_LE(iter_counts.get_const_data()[i], max_iters);
-        EXPECT_LE(rel_res_norm, res_norm.get_const_data()[i]);
-        ASSERT_LE(rel_res_norm, tol * 10);
+        EXPECT_LE(res_norm.get_const_data()[i], tol * 50);
+        ASSERT_LE(rel_res_norm, tol * 50);
     }
 }
 
@@ -274,13 +249,16 @@ TYPED_TEST(BatchBicgstab, CanSolveEllSystem)
         Solver::build()
             .with_default_max_iterations(max_iters)
             .with_default_tolerance(tol)
-            .with_tolerance_type(gko::batch::stop::ToleranceType::relative)
+            .with_tolerance_type(gko::batch::stop::tolerance_type::relative)
             .on(this->exec);
     const int num_rows = 13;
     const size_t num_batch_items = 2;
     const int num_rhs = 1;
-    auto linear_system = gko::test::generate_3pt_stencil_batch_problem<Mtx>(
-        this->exec, num_batch_items, num_rows, num_rhs, 3);
+    auto stencil_mat =
+        gko::share(gko::test::generate_3pt_stencil_batch_matrix<Mtx>(
+            this->exec, num_batch_items, num_rows, 3));
+    auto linear_system =
+        gko::test::generate_batch_linear_system(stencil_mat, num_rhs);
     auto solver = gko::share(solver_factory->generate(linear_system.matrix));
 
     auto res =
@@ -307,13 +285,16 @@ TYPED_TEST(BatchBicgstab, CanSolveDenseHpdSystem)
         Solver::build()
             .with_default_max_iterations(max_iters)
             .with_default_tolerance(tol)
-            .with_tolerance_type(gko::batch::stop::ToleranceType::absolute)
+            .with_tolerance_type(gko::batch::stop::tolerance_type::absolute)
             .on(this->exec);
     const int num_rows = 65;
     const gko::size_type num_batch_items = 5;
     const int num_rhs = 1;
-    auto linear_system = gko::test::generate_diag_dominant_batch_problem<Mtx>(
-        this->exec, num_batch_items, num_rows, num_rhs, true);
+    auto diag_dom_mat =
+        gko::share(gko::test::generate_diag_dominant_batch_matrix<Mtx>(
+            this->exec, num_batch_items, num_rows, true));
+    auto linear_system =
+        gko::test::generate_batch_linear_system(diag_dom_mat, num_rhs);
     auto solver = gko::share(solver_factory->generate(linear_system.matrix));
 
     auto res =
diff --git a/test/solver/batch_bicgstab_kernels.cpp b/test/solver/batch_bicgstab_kernels.cpp
index e29d20cad83..89e53a02711 100644
--- a/test/solver/batch_bicgstab_kernels.cpp
+++ b/test/solver/batch_bicgstab_kernels.cpp
@@ -63,16 +63,15 @@ class BatchBicgstab : public CommonTestFixture {
     using MVec = gko::batch::MultiVector<value_type>;
     using RealMVec = gko::batch::MultiVector<real_type>;
     using Settings = gko::kernels::batch_bicgstab::BicgstabSettings<real_type>;
-    using LogData = gko::batch::log::BatchLogData<real_type>;
+    using LogData = gko::batch::log::detail::log_data<real_type>;
     using Logger = gko::batch::log::BatchConvergence<real_type>;
 
     BatchBicgstab() {}
 
-    template <typename MatrixType, typename... MatrixArgs>
+    template <typename MatrixType>
     gko::test::LinearSystem<MatrixType> setup_linsys_and_solver(
-        const gko::size_type num_batch_items, const int num_rows,
-        const int num_rhs, const real_type tol, const int max_iters,
-        MatrixArgs&&... args)
+        std::shared_ptr<const MatrixType> mat, const int num_rhs,
+        const real_type tol, const int max_iters)
     {
         auto executor = exec;
         solve_lambda = [executor](const Settings settings,
@@ -83,18 +82,16 @@ class BatchBicgstab : public CommonTestFixture {
                 typename Mtx::value_type>(executor, settings, mtx, prec, b, x,
                                           log_data);
         };
-        solver_settings =
-            Settings{max_iters, tol, gko::batch::stop::ToleranceType::relative};
+        solver_settings = Settings{max_iters, tol,
+                                   gko::batch::stop::tolerance_type::relative};
 
         solver_factory =
             solver_type::build()
                 .with_default_max_iterations(max_iters)
                 .with_default_tolerance(tol)
-                .with_tolerance_type(gko::batch::stop::ToleranceType::relative)
+                .with_tolerance_type(gko::batch::stop::tolerance_type::relative)
                 .on(exec);
-        return gko::test::generate_3pt_stencil_batch_problem<MatrixType>(
-            exec, num_batch_items, num_rows, num_rhs,
-            std::forward<MatrixArgs>(args)...);
+        return gko::test::generate_batch_linear_system(mat, num_rhs);
     }
 
     std::function<void(const Settings, const gko::batch::BatchLinOp*,
@@ -112,8 +109,9 @@ TEST_F(BatchBicgstab, SolvesStencilSystem)
     const int num_rhs = 1;
     const real_type tol = 1e-5;
     const int max_iters = 100;
-    auto linear_system = setup_linsys_and_solver<Mtx>(num_batch_items, num_rows,
-                                                      num_rhs, tol, max_iters);
+    auto mat = gko::share(gko::test::generate_3pt_stencil_batch_matrix<Mtx>(
+        exec, num_batch_items, num_rows));
+    auto linear_system = setup_linsys_and_solver(mat, num_rhs, tol, max_iters);
 
     auto res = gko::test::solve_linear_system(exec, solve_lambda,
                                               solver_settings, linear_system);
@@ -134,8 +132,9 @@ TEST_F(BatchBicgstab, StencilSystemLoggerLogsResidual)
     const int num_rhs = 1;
     const real_type tol = 1e-5;
     const int max_iters = 100;
-    auto linear_system = setup_linsys_and_solver<Mtx>(num_batch_items, num_rows,
-                                                      num_rhs, tol, max_iters);
+    auto mat = gko::share(gko::test::generate_3pt_stencil_batch_matrix<Mtx>(
+        exec, num_batch_items, num_rows));
+    auto linear_system = setup_linsys_and_solver(mat, num_rhs, tol, max_iters);
 
     auto res = gko::test::solve_linear_system(exec, solve_lambda,
                                               solver_settings, linear_system);
@@ -156,8 +155,9 @@ TEST_F(BatchBicgstab, StencilSystemLoggerLogsIterations)
     const int num_rows = 10;
     const int num_rhs = 1;
     const int ref_iters = 5;
-    auto linear_system = setup_linsys_and_solver<Mtx>(num_batch_items, num_rows,
-                                                      num_rhs, 0, ref_iters);
+    auto mat = gko::share(gko::test::generate_3pt_stencil_batch_matrix<Mtx>(
+        exec, num_batch_items, num_rows));
+    auto linear_system = setup_linsys_and_solver(mat, num_rhs, 0, ref_iters);
 
     auto res = gko::test::solve_linear_system(exec, solve_lambda,
                                               solver_settings, linear_system);
@@ -176,8 +176,9 @@ TEST_F(BatchBicgstab, CanSolve3ptStencilSystem)
     const int num_rhs = 1;
     const real_type tol = 1e-5;
     const int max_iters = 100;
-    auto linear_system = setup_linsys_and_solver<Mtx>(num_batch_items, num_rows,
-                                                      num_rhs, tol, max_iters);
+    auto mat = gko::share(gko::test::generate_3pt_stencil_batch_matrix<Mtx>(
+        exec, num_batch_items, num_rows));
+    auto linear_system = setup_linsys_and_solver(mat, num_rhs, tol, max_iters);
     auto solver = gko::share(solver_factory->generate(linear_system.matrix));
 
     auto res = gko::test::solve_linear_system(exec, linear_system, solver);
@@ -205,11 +206,14 @@ TEST_F(BatchBicgstab, CanSolveLargeHpdSystem)
         solver_type::build()
             .with_default_max_iterations(max_iters)
             .with_default_tolerance(tol)
-            .with_tolerance_type(gko::batch::stop::ToleranceType::absolute)
+            .with_tolerance_type(gko::batch::stop::tolerance_type::absolute)
             .on(exec);
-    std::shared_ptr<Logger> logger = Logger::create(exec);
-    auto linear_system = gko::test::generate_diag_dominant_batch_problem<Mtx>(
-        exec, num_batch_items, num_rows, num_rhs, true);
+    std::shared_ptr<Logger> logger = Logger::create();
+    auto diag_dom_mat =
+        gko::share(gko::test::generate_diag_dominant_batch_matrix<Mtx>(
+            exec, num_batch_items, num_rows, true));
+    auto linear_system =
+        gko::test::generate_batch_linear_system(diag_dom_mat, num_rhs);
     auto solver = gko::share(solver_factory->generate(linear_system.matrix));
     solver->add_logger(logger);
 

From 14ea40afeee6520e3e28e5aaede9943e149ccce2 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Thu, 26 Oct 2023 10:00:08 +0200
Subject: [PATCH 485/583] s/BicgstabSettings/settings

---
 core/solver/batch_bicgstab.cpp                   | 2 +-
 core/solver/batch_bicgstab_kernels.hpp           | 4 ++--
 cuda/solver/batch_bicgstab_kernels.cu            | 4 ++--
 dpcpp/solver/batch_bicgstab_kernels.dp.cpp       | 4 ++--
 hip/solver/batch_bicgstab_kernels.hip.cpp        | 4 ++--
 omp/solver/batch_bicgstab_kernels.cpp            | 8 ++++----
 reference/solver/batch_bicgstab_kernels.cpp      | 8 ++++----
 reference/solver/batch_bicgstab_kernels.hpp.inc  | 4 ++--
 reference/test/solver/batch_bicgstab_kernels.cpp | 2 +-
 test/solver/batch_bicgstab_kernels.cpp           | 2 +-
 10 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/core/solver/batch_bicgstab.cpp b/core/solver/batch_bicgstab.cpp
index b5ff4e3958e..cc1322c2db8 100644
--- a/core/solver/batch_bicgstab.cpp
+++ b/core/solver/batch_bicgstab.cpp
@@ -59,7 +59,7 @@ void Bicgstab<ValueType>::solver_apply(
     log::detail::log_data<remove_complex<ValueType>>* log_data) const
 {
     using MVec = MultiVector<ValueType>;
-    const kernels::batch_bicgstab::BicgstabSettings<remove_complex<ValueType>>
+    const kernels::batch_bicgstab::settings<remove_complex<ValueType>>
         settings{this->max_iterations_,
                  static_cast<real_type>(this->residual_tol_),
                  parameters_.tolerance_type};
diff --git a/core/solver/batch_bicgstab_kernels.hpp b/core/solver/batch_bicgstab_kernels.hpp
index 3271e937e84..ae04e67c9ac 100644
--- a/core/solver/batch_bicgstab_kernels.hpp
+++ b/core/solver/batch_bicgstab_kernels.hpp
@@ -53,7 +53,7 @@ namespace batch_bicgstab {
  * Options controlling the batch Bicgstab solver.
  */
 template <typename RealType>
-struct BicgstabSettings {
+struct settings {
     int max_iterations;
     RealType residual_tol;
     ::gko::batch::stop::tolerance_type tol_type;
@@ -96,7 +96,7 @@ inline int local_memory_requirement(const int num_rows, const int num_rhs)
 #define GKO_DECLARE_BATCH_BICGSTAB_APPLY_KERNEL(_type)                       \
     void apply(                                                              \
         std::shared_ptr<const DefaultExecutor> exec,                         \
-        const gko::kernels::batch_bicgstab::BicgstabSettings<                \
+        const gko::kernels::batch_bicgstab::settings<                \
             remove_complex<_type>>& options,                                 \
         const batch::BatchLinOp* a, const batch::BatchLinOp* preconditioner, \
         const batch::MultiVector<_type>* b, batch::MultiVector<_type>* x,    \
diff --git a/cuda/solver/batch_bicgstab_kernels.cu b/cuda/solver/batch_bicgstab_kernels.cu
index 041fa4386b2..ee7d0948b99 100644
--- a/cuda/solver/batch_bicgstab_kernels.cu
+++ b/cuda/solver/batch_bicgstab_kernels.cu
@@ -57,12 +57,12 @@ namespace batch_bicgstab {
 
 
 template <typename T>
-using BicgstabSettings = gko::kernels::batch_bicgstab::BicgstabSettings<T>;
+using settings = gko::kernels::batch_bicgstab::settings<T>;
 
 
 template <typename ValueType>
 void apply(std::shared_ptr<const DefaultExecutor> exec,
-           const BicgstabSettings<remove_complex<ValueType>>& settings,
+           const settings<remove_complex<ValueType>>& settings,
            const batch::BatchLinOp* const a,
            const batch::BatchLinOp* const precon,
            const batch::MultiVector<ValueType>* const b,
diff --git a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
index 0496375f085..81519d8e2aa 100644
--- a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
+++ b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
@@ -54,12 +54,12 @@ namespace batch_bicgstab {
 
 
 template <typename T>
-using BicgstabSettings = gko::kernels::batch_bicgstab::BicgstabSettings<T>;
+using settings = gko::kernels::batch_bicgstab::settings<T>;
 
 
 template <typename ValueType>
 void apply(std::shared_ptr<const DefaultExecutor> exec,
-           const BicgstabSettings<remove_complex<ValueType>>& settings,
+           const settings<remove_complex<ValueType>>& settings,
            const batch::BatchLinOp* const a,
            const batch::BatchLinOp* const precon,
            const batch::MultiVector<ValueType>* const b,
diff --git a/hip/solver/batch_bicgstab_kernels.hip.cpp b/hip/solver/batch_bicgstab_kernels.hip.cpp
index 3e37b07123b..4ef8cd36c1b 100644
--- a/hip/solver/batch_bicgstab_kernels.hip.cpp
+++ b/hip/solver/batch_bicgstab_kernels.hip.cpp
@@ -58,12 +58,12 @@ namespace batch_bicgstab {
 
 
 template <typename T>
-using BicgstabSettings = gko::kernels::batch_bicgstab::BicgstabSettings<T>;
+using settings = gko::kernels::batch_bicgstab::settings<T>;
 
 
 template <typename ValueType>
 void apply(std::shared_ptr<const DefaultExecutor> exec,
-           const BicgstabSettings<remove_complex<ValueType>>& settings,
+           const settings<remove_complex<ValueType>>& settings,
            const batch::BatchLinOp* const a,
            const batch::BatchLinOp* const precon,
            const batch::MultiVector<ValueType>* const b,
diff --git a/omp/solver/batch_bicgstab_kernels.cpp b/omp/solver/batch_bicgstab_kernels.cpp
index 0564bfd57b6..f0156c7682c 100644
--- a/omp/solver/batch_bicgstab_kernels.cpp
+++ b/omp/solver/batch_bicgstab_kernels.cpp
@@ -63,14 +63,14 @@ constexpr int max_num_rhs = 1;
 
 
 template <typename T>
-using BicgstabSettings = gko::kernels::batch_bicgstab::BicgstabSettings<T>;
+using settings = gko::kernels::batch_bicgstab::settings<T>;
 
 
 template <typename ValueType>
 class KernelCaller {
 public:
     KernelCaller(std::shared_ptr<const DefaultExecutor> exec,
-                 const BicgstabSettings<remove_complex<ValueType>> settings)
+                 const settings<remove_complex<ValueType>> settings)
         : exec_{std::move(exec)}, settings_{settings}
     {}
 
@@ -110,13 +110,13 @@ class KernelCaller {
 
 private:
     const std::shared_ptr<const DefaultExecutor> exec_;
-    const BicgstabSettings<remove_complex<ValueType>> settings_;
+    const settings<remove_complex<ValueType>> settings_;
 };
 
 
 template <typename ValueType>
 void apply(std::shared_ptr<const DefaultExecutor> exec,
-           const BicgstabSettings<remove_complex<ValueType>>& settings,
+           const settings<remove_complex<ValueType>>& settings,
            const batch::BatchLinOp* const mat,
            const batch::BatchLinOp* const precond,
            const batch::MultiVector<ValueType>* const b,
diff --git a/reference/solver/batch_bicgstab_kernels.cpp b/reference/solver/batch_bicgstab_kernels.cpp
index 31db3c6b941..82d2cd75945 100644
--- a/reference/solver/batch_bicgstab_kernels.cpp
+++ b/reference/solver/batch_bicgstab_kernels.cpp
@@ -65,14 +65,14 @@ constexpr int max_num_rhs = 1;
 
 
 template <typename T>
-using BicgstabSettings = gko::kernels::batch_bicgstab::BicgstabSettings<T>;
+using settings = gko::kernels::batch_bicgstab::settings<T>;
 
 
 template <typename ValueType>
 class KernelCaller {
 public:
     KernelCaller(std::shared_ptr<const DefaultExecutor> exec,
-                 const BicgstabSettings<remove_complex<ValueType>> settings)
+                 const settings<remove_complex<ValueType>> settings)
         : exec_{std::move(exec)}, settings_{settings}
     {}
 
@@ -108,13 +108,13 @@ class KernelCaller {
 
 private:
     const std::shared_ptr<const DefaultExecutor> exec_;
-    const BicgstabSettings<remove_complex<ValueType>> settings_;
+    const settings<remove_complex<ValueType>> settings_;
 };
 
 
 template <typename ValueType>
 void apply(std::shared_ptr<const DefaultExecutor> exec,
-           const BicgstabSettings<remove_complex<ValueType>>& settings,
+           const settings<remove_complex<ValueType>>& settings,
            const batch::BatchLinOp* const mat,
            const batch::BatchLinOp* const precon,
            const batch::MultiVector<ValueType>* const b,
diff --git a/reference/solver/batch_bicgstab_kernels.hpp.inc b/reference/solver/batch_bicgstab_kernels.hpp.inc
index bf578c61311..00eab18fa28 100644
--- a/reference/solver/batch_bicgstab_kernels.hpp.inc
+++ b/reference/solver/batch_bicgstab_kernels.hpp.inc
@@ -177,8 +177,8 @@ inline void update_x_middle(
 template <typename StopType, typename PrecType, typename LogType,
           typename BatchMatrixType, typename ValueType>
 inline void batch_entry_bicgstab_impl(
-    const gko::kernels::batch_bicgstab::BicgstabSettings<
-        remove_complex<ValueType>>& settings,
+    const gko::kernels::batch_bicgstab::settings<remove_complex<ValueType>>&
+        settings,
     LogType logger, PrecType prec, const BatchMatrixType& a,
     const gko::batch::multi_vector::uniform_batch<const ValueType>& b,
     const gko::batch::multi_vector::uniform_batch<ValueType>& x,
diff --git a/reference/test/solver/batch_bicgstab_kernels.cpp b/reference/test/solver/batch_bicgstab_kernels.cpp
index 2a7c98d28bc..bd9703fb30d 100644
--- a/reference/test/solver/batch_bicgstab_kernels.cpp
+++ b/reference/test/solver/batch_bicgstab_kernels.cpp
@@ -63,7 +63,7 @@ class BatchBicgstab : public ::testing::Test {
     using EllMtx = gko::batch::matrix::Ell<value_type>;
     using MVec = gko::batch::MultiVector<value_type>;
     using RealMVec = gko::batch::MultiVector<real_type>;
-    using Settings = gko::kernels::batch_bicgstab::BicgstabSettings<real_type>;
+    using Settings = gko::kernels::batch_bicgstab::settings<real_type>;
     using LogData = gko::batch::log::detail::log_data<real_type>;
     using LinSys = gko::test::LinearSystem<Mtx>;
 
diff --git a/test/solver/batch_bicgstab_kernels.cpp b/test/solver/batch_bicgstab_kernels.cpp
index 89e53a02711..52ef05e93db 100644
--- a/test/solver/batch_bicgstab_kernels.cpp
+++ b/test/solver/batch_bicgstab_kernels.cpp
@@ -62,7 +62,7 @@ class BatchBicgstab : public CommonTestFixture {
     using EllMtx = gko::batch::matrix::Ell<value_type>;
     using MVec = gko::batch::MultiVector<value_type>;
     using RealMVec = gko::batch::MultiVector<real_type>;
-    using Settings = gko::kernels::batch_bicgstab::BicgstabSettings<real_type>;
+    using Settings = gko::kernels::batch_bicgstab::settings<real_type>;
     using LogData = gko::batch::log::detail::log_data<real_type>;
     using Logger = gko::batch::log::BatchConvergence<real_type>;
 

From 304e8e43d796947980cf054e45135d34d10f19ad Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Thu, 26 Oct 2023 11:38:10 +0200
Subject: [PATCH 486/583] Fix workspace issues and review updates

Co-authored-by: Yu-Hsiang Tsai <yhmtsai@gmail.com>
---
 core/log/batch_logger.cpp                     |  8 ++++
 core/test/utils/batch_helpers.hpp             |  3 +-
 include/ginkgo/core/log/batch_logger.hpp      | 44 ++++++++++++-------
 .../ginkgo/core/solver/batch_solver_base.hpp  | 12 ++---
 reference/preconditioner/batch_identity.hpp   |  3 +-
 5 files changed, 46 insertions(+), 24 deletions(-)

diff --git a/core/log/batch_logger.cpp b/core/log/batch_logger.cpp
index aa00bfcc136..22726c44e9d 100644
--- a/core/log/batch_logger.cpp
+++ b/core/log/batch_logger.cpp
@@ -48,6 +48,14 @@ void BatchConvergence<ValueType>::on_batch_solver_completed(
     const array<int>& iteration_count,
     const array<remove_complex<ValueType>>& residual_norm) const
 {
+    if (this->iteration_count_.get_num_elems() == 0) {
+        this->iteration_count_ = gko::array<int>(
+            iteration_count.get_executor(), iteration_count.get_num_elems());
+    }
+    if (this->residual_norm_.get_num_elems() == 0) {
+        this->residual_norm_ = gko::array<remove_complex<ValueType>>(
+            residual_norm.get_executor(), residual_norm.get_num_elems());
+    }
     this->iteration_count_ = iteration_count;
     this->residual_norm_ = residual_norm;
 }
diff --git a/core/test/utils/batch_helpers.hpp b/core/test/utils/batch_helpers.hpp
index 60cbbdcd118..7a7e16cc980 100644
--- a/core/test/utils/batch_helpers.hpp
+++ b/core/test/utils/batch_helpers.hpp
@@ -369,8 +369,9 @@ ResultWithLogData<typename MatrixType::value_type> solve_linear_system(
     solve_lambda(settings, precond.get(), sys.matrix.get(), sys.rhs.get(),
                  result.x.get(), *log_data.get());
 
+
     result.log_data = std::make_unique<batch::log::detail::log_data<real_type>>(
-        exec->get_master());
+        exec->get_master(), num_batch_items);
     result.log_data->iter_counts = log_data->iter_counts;
     result.log_data->res_norms = log_data->res_norms;
 
diff --git a/include/ginkgo/core/log/batch_logger.hpp b/include/ginkgo/core/log/batch_logger.hpp
index f51e0400e0f..f47eb629cba 100644
--- a/include/ginkgo/core/log/batch_logger.hpp
+++ b/include/ginkgo/core/log/batch_logger.hpp
@@ -62,26 +62,40 @@ template <typename ValueType>
 struct log_data final {
     using real_type = remove_complex<ValueType>;
 
-    log_data(std::shared_ptr<const Executor> exec,
-             size_type num_batch_items = 0, array<unsigned char> workspace = {})
+    log_data(std::shared_ptr<const Executor> exec, size_type num_batch_items)
         : res_norms(exec), iter_counts(exec)
     {
         const size_type workspace_size =
             num_batch_items * (sizeof(real_type) + sizeof(int));
         if (num_batch_items > 0) {
-            if (workspace.get_num_elems() >= workspace_size) {
-                iter_counts = array<int>::view(
-                    exec, num_batch_items,
-                    reinterpret_cast<int*>(workspace.get_data()));
-                res_norms = array<real_type>::view(
-                    exec, num_batch_items,
-                    reinterpret_cast<real_type*>(
-                        workspace.get_data() +
-                        (sizeof(int) * num_batch_items)));
-            } else {
-                iter_counts.resize_and_reset(num_batch_items);
-                res_norms.resize_and_reset(num_batch_items);
-            }
+            iter_counts.resize_and_reset(num_batch_items);
+            res_norms.resize_and_reset(num_batch_items);
+        } else {
+            GKO_INVALID_STATE("Invalid num batch items passed in");
+        }
+    }
+
+    log_data(std::shared_ptr<const Executor> exec, size_type num_batch_items,
+             array<unsigned char> workspace)
+        : res_norms(exec), iter_counts(exec)
+    {
+        const size_type workspace_size =
+            num_batch_items * (sizeof(real_type) + sizeof(int));
+        if (num_batch_items > 0 && !workspace.is_owning() &&
+            workspace.get_num_elems() >= workspace_size) {
+            iter_counts =
+                array<int>::view(exec, num_batch_items,
+                                 reinterpret_cast<int*>(workspace.get_data()));
+            res_norms = array<real_type>::view(
+                exec, num_batch_items,
+                reinterpret_cast<real_type*>(workspace.get_data() +
+                                             (sizeof(int) * num_batch_items)));
+            // } else {
+            //     iter_counts.resize_and_reset(num_batch_items);
+            //     res_norms.resize_and_reset(num_batch_items);
+            // }
+        } else {
+            GKO_INVALID_STATE("invalid workspace or num batch items passed in");
         }
     }
 
diff --git a/include/ginkgo/core/solver/batch_solver_base.hpp b/include/ginkgo/core/solver/batch_solver_base.hpp
index ea003fdc28d..279054f5554 100644
--- a/include/ginkgo/core/solver/batch_solver_base.hpp
+++ b/include/ginkgo/core/solver/batch_solver_base.hpp
@@ -341,11 +341,10 @@ class EnableBatchSolver
             preconditioner_ = std::move(id);
         }
         // FIXME
-        // const size_type workspace_size = system_matrix->get_num_batch_items()
-        // *
-        //                                  (sizeof(real_type) + sizeof(int));
-        // workspace_.set_executor(exec);
-        // workspace_.resize_and_reset(workspace_size);
+        const size_type workspace_size = system_matrix->get_num_batch_items() *
+                                         (sizeof(real_type) + sizeof(int));
+        workspace_.set_executor(exec);
+        workspace_.resize_and_reset(workspace_size);
     }
 
     void apply_impl(const MultiVector<ValueType>* b,
@@ -356,10 +355,11 @@ class EnableBatchSolver
             GKO_NOT_IMPLEMENTED;
         }
         auto log_data_ = std::make_unique<log::detail::log_data<real_type>>(
-            exec, b->get_num_batch_items(), workspace_);
+            exec, b->get_num_batch_items(), workspace_.as_view());
 
         this->solver_apply(b, x, log_data_.get());
 
+        // TODO: This needs to allocate data with every call.
         this->template log<gko::log::Logger::batch_solver_completed>(
             log_data_->iter_counts, log_data_->res_norms);
     }
diff --git a/reference/preconditioner/batch_identity.hpp b/reference/preconditioner/batch_identity.hpp
index c842003eed5..3075cf88256 100644
--- a/reference/preconditioner/batch_identity.hpp
+++ b/reference/preconditioner/batch_identity.hpp
@@ -86,8 +86,7 @@ class Identity final {
                const gko::batch::multi_vector::batch_item<ValueType>& z) const
     {
         for (int i = 0; i < r.num_rows; i++) {
-            for (int j = 0; j < r.num_rhs; j++)
-                z.values[i * z.stride + j] = r.values[i * r.stride + j];
+            z.values[i * z.stride] = r.values[i * r.stride];
         }
     }
 };

From 9482f8984a70be1d2ef5b93ab5282e940864695e Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Thu, 26 Oct 2023 12:49:43 +0200
Subject: [PATCH 487/583] Review updates

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 common/cuda_hip/stop/batch_criteria.hpp.inc   |  4 +--
 core/base/batch_struct.hpp                    |  4 +--
 core/matrix/batch_dense.cpp                   |  8 -----
 core/matrix/batch_struct.hpp                  | 14 ++------
 core/test/matrix/batch_dense.cpp              | 13 --------
 core/test/solver/batch_bicgstab.cpp           | 19 ++++++++++-
 core/test/utils/batch_helpers.hpp             | 32 ++++++++-----------
 dpcpp/stop/batch_criteria.hpp                 |  4 +--
 include/ginkgo/core/matrix/batch_dense.hpp    | 10 ------
 .../ginkgo/core/solver/batch_solver_base.hpp  |  1 -
 include/ginkgo/core/stop/batch_stop_enum.hpp  |  2 +-
 omp/solver/batch_bicgstab_kernels.cpp         |  3 +-
 reference/solver/batch_bicgstab_kernels.cpp   |  3 +-
 reference/stop/batch_criteria.hpp             |  4 +--
 14 files changed, 46 insertions(+), 75 deletions(-)

diff --git a/common/cuda_hip/stop/batch_criteria.hpp.inc b/common/cuda_hip/stop/batch_criteria.hpp.inc
index 13449ab21bd..0576dd2c4aa 100644
--- a/common/cuda_hip/stop/batch_criteria.hpp.inc
+++ b/common/cuda_hip/stop/batch_criteria.hpp.inc
@@ -47,7 +47,7 @@ public:
     __device__ __forceinline__ bool check_converged(
         const real_type* const residual_norms) const
     {
-        return (residual_norms[0] / rhs_norms_[0] < rel_tol_);
+        return (residual_norms[0] / rhs_norms_[0] <= rel_tol_);
     }
 
 private:
@@ -72,7 +72,7 @@ public:
     __device__ __forceinline__ bool check_converged(
         const real_type* const residual_norms) const
     {
-        return (residual_norms[0] < abs_tol_);
+        return (residual_norms[0] <= abs_tol_);
     }
 
 private:
diff --git a/core/base/batch_struct.hpp b/core/base/batch_struct.hpp
index d7be0837534..975671739eb 100644
--- a/core/base/batch_struct.hpp
+++ b/core/base/batch_struct.hpp
@@ -71,9 +71,9 @@ struct uniform_batch {
     int32 num_rows;
     int32 num_rhs;
 
-    size_type get_storage_size() const
+    inline size_type get_single_item_num_nnz() const
     {
-        return num_rows * stride * sizeof(value_type);
+        return static_cast<size_type>(stride * num_rows);
     }
 };
 
diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp
index 615fd1d8a4b..58c7fa25cea 100644
--- a/core/matrix/batch_dense.cpp
+++ b/core/matrix/batch_dense.cpp
@@ -116,14 +116,6 @@ Dense<ValueType>::Dense(std::shared_ptr<const Executor> exec,
 {}
 
 
-template <typename ValueType>
-Dense<ValueType>::Dense(std::shared_ptr<const Executor> exec,
-                        const size_type num_batch_items,
-                        const dim<2>& common_size)
-    : Dense(exec, batch_dim<2>(num_batch_items, common_size))
-{}
-
-
 template <typename ValueType>
 Dense<ValueType>* Dense<ValueType>::apply(
     ptr_param<const MultiVector<ValueType>> b,
diff --git a/core/matrix/batch_struct.hpp b/core/matrix/batch_struct.hpp
index 2e668757b99..4d5027e159e 100644
--- a/core/matrix/batch_struct.hpp
+++ b/core/matrix/batch_struct.hpp
@@ -73,15 +73,10 @@ struct uniform_batch {
     int32 num_rows;
     int32 num_cols;
 
-    inline size_type get_num_nnz() const
+    inline size_type get_single_item_num_nnz() const
     {
         return static_cast<size_type>(stride * num_rows);
     }
-
-    inline size_type get_storage_size() const
-    {
-        return get_num_nnz() * sizeof(value_type);
-    }
 };
 
 
@@ -125,15 +120,10 @@ struct uniform_batch {
     index_type num_cols;
     index_type num_stored_elems_per_row;
 
-    inline size_type get_num_nnz() const
+    inline size_type get_single_item_num_nnz() const
     {
         return static_cast<size_type>(stride * num_stored_elems_per_row);
     }
-
-    inline size_type get_storage_size() const
-    {
-        return get_num_nnz() * sizeof(value_type);
-    }
 };
 
 
diff --git a/core/test/matrix/batch_dense.cpp b/core/test/matrix/batch_dense.cpp
index d02751b3840..adeddbcc994 100644
--- a/core/test/matrix/batch_dense.cpp
+++ b/core/test/matrix/batch_dense.cpp
@@ -191,19 +191,6 @@ TYPED_TEST(Dense, CanBeConstructedWithSize)
 }
 
 
-TYPED_TEST(Dense, CanBeConstructedWithSizeAndNumItems)
-{
-    using size_type = gko::size_type;
-
-    auto m = gko::batch::matrix::Dense<TypeParam>::create(this->exec, 2,
-                                                          gko::dim<2>{5, 3});
-
-    ASSERT_EQ(m->get_num_batch_items(), 2);
-    ASSERT_EQ(m->get_common_size(), gko::dim<2>(5, 3));
-    ASSERT_EQ(m->get_num_stored_elements(), 30);
-}
-
-
 TYPED_TEST(Dense, CanBeConstructedFromExistingData)
 {
     using value_type = typename TestFixture::value_type;
diff --git a/core/test/solver/batch_bicgstab.cpp b/core/test/solver/batch_bicgstab.cpp
index ecc6ae32d1d..ad8e9305246 100644
--- a/core/test/solver/batch_bicgstab.cpp
+++ b/core/test/solver/batch_bicgstab.cpp
@@ -231,11 +231,28 @@ TYPED_TEST(BatchBicgstab, ThrowsOnRectangularMatrixInFactory)
     using Mtx = typename TestFixture::Mtx;
     using Solver = typename TestFixture::Solver;
     std::shared_ptr<Mtx> rectangular_mtx =
-        Mtx::create(this->exec, 2, gko::dim<2>{3, 5});
+        Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{3, 5}));
 
     ASSERT_THROW(this->solver_factory->generate(rectangular_mtx),
                  gko::BadDimension);
 }
 
 
+TYPED_TEST(BatchBicgstab, ThrowsForMultipleRhs)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using MVec = typename TestFixture::MVec;
+    using Solver = typename TestFixture::Solver;
+    std::shared_ptr<MVec> b =
+        MVec::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{3, 2}));
+    std::shared_ptr<MVec> x =
+        MVec::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{3, 2}));
+    std::shared_ptr<Mtx> mtx =
+        Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{3, 2}));
+
+    ASSERT_THROW(this->solver_factory->generate(mtx)->apply(b, x),
+                 gko::BadDimension);
+}
+
+
 }  // namespace
diff --git a/core/test/utils/batch_helpers.hpp b/core/test/utils/batch_helpers.hpp
index 7a7e16cc980..aa71f584a0f 100644
--- a/core/test/utils/batch_helpers.hpp
+++ b/core/test/utils/batch_helpers.hpp
@@ -133,18 +133,12 @@ std::unique_ptr<const MatrixType> generate_3pt_stencil_batch_matrix(
                     static_cast<size_type>(num_cols)},
         {}};
     for (int row = 0; row < num_rows; ++row) {
-        if (row == 0) {
-            data.nonzeros.emplace_back(0, 0, value_type{5.0});
-            data.nonzeros.emplace_back(0, 1, value_type{-1.0});
-        } else if (row == num_rows - 1) {
-            data.nonzeros.emplace_back(num_rows - 1, num_rows - 1,
-                                       value_type{5.0});
-            data.nonzeros.emplace_back(num_rows - 1, num_rows - 2,
-                                       value_type{-1.0});
-        } else {
-            data.nonzeros.emplace_back(row, row + 1, value_type{-1.0});
+        if (row > 0) {
             data.nonzeros.emplace_back(row - 1, row, value_type{-1.0});
-            data.nonzeros.emplace_back(row, row, value_type{5.0});
+        }
+        data.nonzeros.emplace_back(row, row, value_type{5.0});
+        if (row < num_rows - 1) {
+            data.nonzeros.emplace_back(row, row + 1, value_type{-1.0});
         }
     }
 
@@ -173,21 +167,21 @@ std::unique_ptr<const MatrixType> generate_diag_dominant_batch_matrix(
         {}};
     auto engine = std::default_random_engine(42);
     auto rand_diag_dist = std::normal_distribution<real_type>(4.0, 12.0);
-    for (int row = 1; row < num_rows - 1; ++row) {
+    for (int row = 0; row < num_rows; ++row) {
         std::uniform_int_distribution<index_type> rand_nnz_dist{1, row + 1};
         const auto k = rand_nnz_dist(engine);
-        data.nonzeros.emplace_back(row, k, value_type{-1.0});
-        data.nonzeros.emplace_back(row, row + 1, value_type{-1.0});
-        data.nonzeros.emplace_back(row - 1, row, value_type{-1.0});
+        if (row > 0) {
+            data.nonzeros.emplace_back(row - 1, row, value_type{-1.0});
+        }
         data.nonzeros.emplace_back(
             row, row,
             static_cast<value_type>(
                 detail::get_rand_value<real_type>(rand_diag_dist, engine)));
+        if (row < num_rows - 1) {
+            data.nonzeros.emplace_back(row, k, value_type{-1.0});
+            data.nonzeros.emplace_back(row, row + 1, value_type{-1.0});
+        }
     }
-    data.nonzeros.emplace_back(0, 0, value_type{2.0});
-    data.nonzeros.emplace_back(num_rows - 1, num_rows - 1, value_type{2.0});
-    data.nonzeros.emplace_back(num_rows - 1, num_rows - 2, value_type{-1.0});
-    data.nonzeros.emplace_back(0, 1, value_type{-1.0});
 
     if (is_hermitian) {
         gko::utils::make_hpd(data);
diff --git a/dpcpp/stop/batch_criteria.hpp b/dpcpp/stop/batch_criteria.hpp
index ad316bff58e..790969e875a 100644
--- a/dpcpp/stop/batch_criteria.hpp
+++ b/dpcpp/stop/batch_criteria.hpp
@@ -66,7 +66,7 @@ class SimpleRelResidual {
     __dpct_inline__ bool check_converged(
         const real_type* const residual_norms) const
     {
-        return (residual_norms[0] / rhs_norms_[0] < rel_tol_);
+        return (residual_norms[0] / rhs_norms_[0] <= rel_tol_);
     }
 
 private:
@@ -88,7 +88,7 @@ class SimpleAbsResidual {
     __dpct_inline__ bool check_converged(
         const real_type* const residual_norms) const
     {
-        return (residual_norms[0] < abs_tol_);
+        return (residual_norms[0] <= abs_tol_);
     }
 
 private:
diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp
index 559d53564e7..5a1697afec4 100644
--- a/include/ginkgo/core/matrix/batch_dense.hpp
+++ b/include/ginkgo/core/matrix/batch_dense.hpp
@@ -316,16 +316,6 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
     Dense(std::shared_ptr<const Executor> exec,
           const batch_dim<2>& size = batch_dim<2>{});
 
-    /**
-     * Creates an uninitialized Dense matrix of the specified size.
-     *
-     * @param exec  Executor associated to the matrix
-     * @param num_batch_items  number of items in the batch
-     * @param common_size  common size of the items in the batch
-     */
-    Dense(std::shared_ptr<const Executor> exec, const size_type num_batch_items,
-          const gko::dim<2>& common_size = gko::dim<2>{});
-
     /**
      * Creates a Dense matrix from an already allocated (and initialized)
      * array.
diff --git a/include/ginkgo/core/solver/batch_solver_base.hpp b/include/ginkgo/core/solver/batch_solver_base.hpp
index 279054f5554..4d44953edb5 100644
--- a/include/ginkgo/core/solver/batch_solver_base.hpp
+++ b/include/ginkgo/core/solver/batch_solver_base.hpp
@@ -359,7 +359,6 @@ class EnableBatchSolver
 
         this->solver_apply(b, x, log_data_.get());
 
-        // TODO: This needs to allocate data with every call.
         this->template log<gko::log::Logger::batch_solver_completed>(
             log_data_->iter_counts, log_data_->res_norms);
     }
diff --git a/include/ginkgo/core/stop/batch_stop_enum.hpp b/include/ginkgo/core/stop/batch_stop_enum.hpp
index 6da484f8185..e6679b036b2 100644
--- a/include/ginkgo/core/stop/batch_stop_enum.hpp
+++ b/include/ginkgo/core/stop/batch_stop_enum.hpp
@@ -49,7 +49,7 @@ namespace stop {
  *
  * With the `relative` tolerance type, the solver
  * convergence criteria checks against the relative residual norm
- * ($\frac{||r||}{||b||} < \tau$, where $||b||$$ is the L2 norm of the rhs).
+ * ($\frac{||r||}{||b||} <= \tau$, where $||b||$$ is the L2 norm of the rhs).
  */
 enum class tolerance_type { absolute, relative };
 
diff --git a/omp/solver/batch_bicgstab_kernels.cpp b/omp/solver/batch_bicgstab_kernels.cpp
index f0156c7682c..2e8073b0d00 100644
--- a/omp/solver/batch_bicgstab_kernels.cpp
+++ b/omp/solver/batch_bicgstab_kernels.cpp
@@ -92,7 +92,8 @@ class KernelCaller {
         const int local_size_bytes =
             gko::kernels::batch_bicgstab::local_memory_requirement<ValueType>(
                 num_rows, num_rhs) +
-            PrecondType::dynamic_work_size(num_rows, mat.get_num_nnz()) *
+            PrecondType::dynamic_work_size(num_rows,
+                                           mat.get_single_item_num_nnz()) *
                 sizeof(ValueType);
 
 #pragma omp parallel for firstprivate(logger)
diff --git a/reference/solver/batch_bicgstab_kernels.cpp b/reference/solver/batch_bicgstab_kernels.cpp
index 82d2cd75945..91764f378a3 100644
--- a/reference/solver/batch_bicgstab_kernels.cpp
+++ b/reference/solver/batch_bicgstab_kernels.cpp
@@ -94,7 +94,8 @@ class KernelCaller {
         const size_type local_size_bytes =
             gko::kernels::batch_bicgstab::local_memory_requirement<ValueType>(
                 num_rows, num_rhs) +
-            PrecType::dynamic_work_size(num_rows, mat.get_num_nnz()) *
+            PrecType::dynamic_work_size(num_rows,
+                                        mat.get_single_item_num_nnz()) *
                 sizeof(ValueType);
         std::vector<unsigned char> local_space(local_size_bytes);
 
diff --git a/reference/stop/batch_criteria.hpp b/reference/stop/batch_criteria.hpp
index 4669d0b1b84..2a14de576b9 100644
--- a/reference/stop/batch_criteria.hpp
+++ b/reference/stop/batch_criteria.hpp
@@ -75,7 +75,7 @@ class SimpleRelResidual {
      */
     bool check_converged(const real_type* const residual_norms) const
     {
-        return (residual_norms[0] / rhs_norms_[0] < rel_tol_);
+        return (residual_norms[0] / rhs_norms_[0] <= rel_tol_);
     }
 
 private:
@@ -111,7 +111,7 @@ class SimpleAbsResidual {
      */
     bool check_converged(const real_type* const residual_norms) const
     {
-        return (residual_norms[0] < abs_tol_);
+        return (residual_norms[0] <= abs_tol_);
     }
 
 private:

From 85f6d04737f0be38bb1c0d159d6f28333c86ec6d Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Thu, 26 Oct 2023 13:56:56 +0200
Subject: [PATCH 488/583] rename crit getters and setters

---
 core/test/solver/batch_bicgstab.cpp           | 61 +++++++++++++------
 include/ginkgo/core/log/batch_logger.hpp      |  7 ---
 .../ginkgo/core/solver/batch_solver_base.hpp  | 23 +++----
 .../test/solver/batch_bicgstab_kernels.cpp    | 16 ++---
 test/solver/batch_bicgstab_kernels.cpp        |  8 +--
 5 files changed, 68 insertions(+), 47 deletions(-)

diff --git a/core/test/solver/batch_bicgstab.cpp b/core/test/solver/batch_bicgstab.cpp
index ad8e9305246..54d5b1b1296 100644
--- a/core/test/solver/batch_bicgstab.cpp
+++ b/core/test/solver/batch_bicgstab.cpp
@@ -63,8 +63,8 @@ class BatchBicgstab : public ::testing::Test {
           mtx(gko::share(gko::test::generate_3pt_stencil_batch_matrix<Mtx>(
               this->exec->get_master(), num_batch_items, num_rows))),
           solver_factory(Solver::build()
-                             .with_default_max_iterations(def_max_iters)
-                             .with_default_tolerance(def_abs_res_tol)
+                             .with_max_iterations(def_max_iters)
+                             .with_tolerance(def_abs_res_tol)
                              .with_tolerance_type(def_tol_type)
                              .on(exec)),
           solver(solver_factory->generate(mtx))
@@ -173,14 +173,14 @@ TYPED_TEST(BatchBicgstab, CanSetCriteriaInFactory)
 
     auto solver_factory =
         Solver::build()
-            .with_default_max_iterations(22)
-            .with_default_tolerance(static_cast<real_type>(0.25))
+            .with_max_iterations(22)
+            .with_tolerance(static_cast<real_type>(0.25))
             .with_tolerance_type(gko::batch::stop::tolerance_type::relative)
             .on(this->exec);
 
     auto solver = solver_factory->generate(this->mtx);
-    ASSERT_EQ(solver->get_parameters().default_max_iterations, 22);
-    ASSERT_EQ(solver->get_parameters().default_tolerance, 0.25);
+    ASSERT_EQ(solver->get_parameters().max_iterations, 22);
+    ASSERT_EQ(solver->get_parameters().tolerance, 0.25);
     ASSERT_EQ(solver->get_parameters().tolerance_type,
               gko::batch::stop::tolerance_type::relative);
 }
@@ -192,17 +192,19 @@ TYPED_TEST(BatchBicgstab, CanSetResidualTol)
     using real_type = typename TestFixture::real_type;
     auto solver_factory =
         Solver::build()
-            .with_default_max_iterations(22)
-            .with_default_tolerance(static_cast<real_type>(0.25))
+            .with_max_iterations(22)
+            .with_tolerance(static_cast<real_type>(0.25))
             .with_tolerance_type(gko::batch::stop::tolerance_type::relative)
             .on(this->exec);
     auto solver = solver_factory->generate(this->mtx);
 
-    solver->set_residual_tolerance(0.5);
+    solver->reset_tolerance(0.5);
 
-    ASSERT_EQ(solver->get_parameters().default_max_iterations, 22);
-    ASSERT_EQ(solver->get_parameters().default_tolerance, 0.25);
-    ASSERT_EQ(solver->get_residual_tolerance(), 0.5);
+    ASSERT_EQ(solver->get_parameters().max_iterations, 22);
+    ASSERT_EQ(solver->get_parameters().tolerance, 0.25);
+    ASSERT_EQ(solver->get_parameters().tolerance_type,
+              gko::batch::stop::tolerance_type::relative);
+    ASSERT_EQ(solver->get_tolerance(), 0.5);
 }
 
 
@@ -212,20 +214,45 @@ TYPED_TEST(BatchBicgstab, CanSetMaxIterations)
     using real_type = typename TestFixture::real_type;
     auto solver_factory =
         Solver::build()
-            .with_default_max_iterations(22)
-            .with_default_tolerance(static_cast<real_type>(0.25))
+            .with_max_iterations(22)
+            .with_tolerance(static_cast<real_type>(0.25))
             .with_tolerance_type(gko::batch::stop::tolerance_type::relative)
             .on(this->exec);
     auto solver = solver_factory->generate(this->mtx);
 
-    solver->set_max_iterations(10);
+    solver->reset_max_iterations(10);
 
-    ASSERT_EQ(solver->get_parameters().default_tolerance, 0.25);
-    ASSERT_EQ(solver->get_parameters().default_max_iterations, 22);
+    ASSERT_EQ(solver->get_parameters().tolerance, 0.25);
+    ASSERT_EQ(solver->get_parameters().max_iterations, 22);
+    ASSERT_EQ(solver->get_parameters().tolerance_type,
+              gko::batch::stop::tolerance_type::relative);
     ASSERT_EQ(solver->get_max_iterations(), 10);
 }
 
 
+TYPED_TEST(BatchBicgstab, CanSetTolType)
+{
+    using Solver = typename TestFixture::Solver;
+    using real_type = typename TestFixture::real_type;
+    auto solver_factory =
+        Solver::build()
+            .with_max_iterations(22)
+            .with_tolerance(static_cast<real_type>(0.25))
+            .with_tolerance_type(gko::batch::stop::tolerance_type::relative)
+            .on(this->exec);
+    auto solver = solver_factory->generate(this->mtx);
+
+    solver->reset_tolerance_type(gko::batch::stop::tolerance_type::absolute);
+
+    ASSERT_EQ(solver->get_parameters().max_iterations, 22);
+    ASSERT_EQ(solver->get_parameters().tolerance, 0.25);
+    ASSERT_EQ(solver->get_parameters().tolerance_type,
+              gko::batch::stop::tolerance_type::relative);
+    ASSERT_EQ(solver->get_tolerance_type(),
+              gko::batch::stop::tolerance_type::absolute);
+}
+
+
 TYPED_TEST(BatchBicgstab, ThrowsOnRectangularMatrixInFactory)
 {
     using Mtx = typename TestFixture::Mtx;
diff --git a/include/ginkgo/core/log/batch_logger.hpp b/include/ginkgo/core/log/batch_logger.hpp
index f47eb629cba..993fef6129a 100644
--- a/include/ginkgo/core/log/batch_logger.hpp
+++ b/include/ginkgo/core/log/batch_logger.hpp
@@ -90,10 +90,6 @@ struct log_data final {
                 exec, num_batch_items,
                 reinterpret_cast<real_type*>(workspace.get_data() +
                                              (sizeof(int) * num_batch_items)));
-            // } else {
-            //     iter_counts.resize_and_reset(num_batch_items);
-            //     res_norms.resize_and_reset(num_batch_items);
-            // }
         } else {
             GKO_INVALID_STATE("invalid workspace or num batch items passed in");
         }
@@ -120,9 +116,6 @@ struct log_data final {
  * The purpose of this logger is to give simple access to standard data
  * generated by the solver once it has converged.
  *
- * @note This logger copies the data (iterations and residual norm) to the host
- * executor.
- *
  * @ingroup log
  */
 template <typename ValueType = default_precision>
diff --git a/include/ginkgo/core/solver/batch_solver_base.hpp b/include/ginkgo/core/solver/batch_solver_base.hpp
index 4d44953edb5..e1f6e18bad9 100644
--- a/include/ginkgo/core/solver/batch_solver_base.hpp
+++ b/include/ginkgo/core/solver/batch_solver_base.hpp
@@ -81,7 +81,7 @@ class BatchSolver {
      *
      * @return The residual tolerance.
      */
-    double get_residual_tolerance() const { return residual_tol_; }
+    double get_tolerance() const { return residual_tol_; }
 
     /**
      * Update the residual tolerance to be used by the solver.
@@ -89,7 +89,7 @@ class BatchSolver {
      * @param res_tol  The residual tolerance to be used for subsequent
      *                 invocations of the solver.
      */
-    void set_residual_tolerance(double res_tol)
+    void reset_tolerance(double res_tol)
     {
         if (res_tol < 0) {
             GKO_INVALID_STATE("Tolerance cannot be negative!");
@@ -110,7 +110,7 @@ class BatchSolver {
      *
      * @param max_iterations  The maximum number of iterations for the solver.
      */
-    void set_max_iterations(int max_iterations)
+    void reset_max_iterations(int max_iterations)
     {
         if (max_iterations < 0) {
             GKO_INVALID_STATE("Max iterations cannot be negative!");
@@ -133,13 +133,14 @@ class BatchSolver {
      *
      * @param tol_type  The tolerance type.
      */
-    void set_tolerance_type(::gko::batch::stop::tolerance_type tol_type)
+    void reset_tolerance_type(::gko::batch::stop::tolerance_type tol_type)
     {
-        if (tol_type != ::gko::batch::stop::tolerance_type::absolute ||
-            tol_type != ::gko::batch::stop::tolerance_type::relative) {
+        if (tol_type == ::gko::batch::stop::tolerance_type::absolute ||
+            tol_type == ::gko::batch::stop::tolerance_type::relative) {
+            tol_type_ = tol_type;
+        } else {
             GKO_INVALID_STATE("Invalid tolerance type specified!");
         }
-        tol_type_ = tol_type;
     }
 
 protected:
@@ -196,7 +197,7 @@ struct enable_preconditioned_iterative_solver_factory_parameters
      * Generated solvers are initialized with this value for their maximum
      * iterations.
      */
-    int GKO_FACTORY_PARAMETER_SCALAR(default_max_iterations, 100);
+    int GKO_FACTORY_PARAMETER_SCALAR(max_iterations, 100);
 
     /**
      * Default residual tolerance.
@@ -204,7 +205,7 @@ struct enable_preconditioned_iterative_solver_factory_parameters
      * Generated solvers are initialized with this value for their residual
      * tolerance.
      */
-    double GKO_FACTORY_PARAMETER_SCALAR(default_tolerance, 1e-11);
+    double GKO_FACTORY_PARAMETER_SCALAR(tolerance, 1e-11);
 
     /**
      * To specify which type of tolerance check is to be considered, absolute or
@@ -319,8 +320,8 @@ class EnableBatchSolver
     explicit EnableBatchSolver(std::shared_ptr<const Executor> exec,
                                std::shared_ptr<const BatchLinOp> system_matrix,
                                const FactoryParameters& params)
-        : BatchSolver(system_matrix, nullptr, params.default_tolerance,
-                      params.default_max_iterations, params.tolerance_type),
+        : BatchSolver(system_matrix, nullptr, params.tolerance,
+                      params.max_iterations, params.tolerance_type),
           EnableBatchLinOp<ConcreteSolver, PolymorphicBase>(
               exec, gko::transpose(system_matrix->get_size()))
     {
diff --git a/reference/test/solver/batch_bicgstab_kernels.cpp b/reference/test/solver/batch_bicgstab_kernels.cpp
index bd9703fb30d..c47c80e64dc 100644
--- a/reference/test/solver/batch_bicgstab_kernels.cpp
+++ b/reference/test/solver/batch_bicgstab_kernels.cpp
@@ -167,8 +167,8 @@ TYPED_TEST(BatchBicgstab, CanSolveDenseSystem)
     const int max_iters = 1000;
     auto solver_factory =
         Solver::build()
-            .with_default_max_iterations(max_iters)
-            .with_default_tolerance(tol)
+            .with_max_iterations(max_iters)
+            .with_tolerance(tol)
             .with_tolerance_type(gko::batch::stop::tolerance_type::relative)
             .on(this->exec);
     const int num_rows = 13;
@@ -204,8 +204,8 @@ TYPED_TEST(BatchBicgstab, ApplyLogsResAndIters)
     const int max_iters = 1000;
     auto solver_factory =
         Solver::build()
-            .with_default_max_iterations(max_iters)
-            .with_default_tolerance(tol)
+            .with_max_iterations(max_iters)
+            .with_tolerance(tol)
             .with_tolerance_type(gko::batch::stop::tolerance_type::relative)
             .on(this->exec);
     const int num_rows = 13;
@@ -247,8 +247,8 @@ TYPED_TEST(BatchBicgstab, CanSolveEllSystem)
     const int max_iters = 1000;
     auto solver_factory =
         Solver::build()
-            .with_default_max_iterations(max_iters)
-            .with_default_tolerance(tol)
+            .with_max_iterations(max_iters)
+            .with_tolerance(tol)
             .with_tolerance_type(gko::batch::stop::tolerance_type::relative)
             .on(this->exec);
     const int num_rows = 13;
@@ -283,8 +283,8 @@ TYPED_TEST(BatchBicgstab, CanSolveDenseHpdSystem)
     const int max_iters = 1000;
     auto solver_factory =
         Solver::build()
-            .with_default_max_iterations(max_iters)
-            .with_default_tolerance(tol)
+            .with_max_iterations(max_iters)
+            .with_tolerance(tol)
             .with_tolerance_type(gko::batch::stop::tolerance_type::absolute)
             .on(this->exec);
     const int num_rows = 65;
diff --git a/test/solver/batch_bicgstab_kernels.cpp b/test/solver/batch_bicgstab_kernels.cpp
index 52ef05e93db..bb496ef5dba 100644
--- a/test/solver/batch_bicgstab_kernels.cpp
+++ b/test/solver/batch_bicgstab_kernels.cpp
@@ -87,8 +87,8 @@ class BatchBicgstab : public CommonTestFixture {
 
         solver_factory =
             solver_type::build()
-                .with_default_max_iterations(max_iters)
-                .with_default_tolerance(tol)
+                .with_max_iterations(max_iters)
+                .with_tolerance(tol)
                 .with_tolerance_type(gko::batch::stop::tolerance_type::relative)
                 .on(exec);
         return gko::test::generate_batch_linear_system(mat, num_rhs);
@@ -204,8 +204,8 @@ TEST_F(BatchBicgstab, CanSolveLargeHpdSystem)
     const real_type comp_tol = tol * 100;
     auto solver_factory =
         solver_type::build()
-            .with_default_max_iterations(max_iters)
-            .with_default_tolerance(tol)
+            .with_max_iterations(max_iters)
+            .with_tolerance(tol)
             .with_tolerance_type(gko::batch::stop::tolerance_type::absolute)
             .on(exec);
     std::shared_ptr<Logger> logger = Logger::create();

From f3dead7b8019ca91eb1426f07e570e9a5801fc74 Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Thu, 26 Oct 2023 12:06:09 +0000
Subject: [PATCH 489/583] Format files

Co-authored-by: Pratik Nayak <pratikvn@pm.me>
---
 core/solver/batch_bicgstab.cpp                   |  7 +++----
 core/solver/batch_bicgstab_kernels.hpp           |  4 ++--
 cuda/log/batch_logger.cuh                        |  6 +++---
 dpcpp/log/batch_logger.hpp                       |  6 +++---
 dpcpp/preconditioner/batch_identity.hpp.inc      |  3 ++-
 hip/log/batch_logger.hip.hpp                     |  6 +++---
 include/ginkgo/core/log/logger.hpp               | 16 ++++++++--------
 include/ginkgo/core/matrix/batch_dense.hpp       |  4 ++--
 include/ginkgo/core/solver/batch_solver_base.hpp |  6 +++---
 include/ginkgo/ginkgo.hpp                        |  5 +++++
 10 files changed, 34 insertions(+), 29 deletions(-)

diff --git a/core/solver/batch_bicgstab.cpp b/core/solver/batch_bicgstab.cpp
index cc1322c2db8..7413ab41fd4 100644
--- a/core/solver/batch_bicgstab.cpp
+++ b/core/solver/batch_bicgstab.cpp
@@ -59,10 +59,9 @@ void Bicgstab<ValueType>::solver_apply(
     log::detail::log_data<remove_complex<ValueType>>* log_data) const
 {
     using MVec = MultiVector<ValueType>;
-    const kernels::batch_bicgstab::settings<remove_complex<ValueType>>
-        settings{this->max_iterations_,
-                 static_cast<real_type>(this->residual_tol_),
-                 parameters_.tolerance_type};
+    const kernels::batch_bicgstab::settings<remove_complex<ValueType>> settings{
+        this->max_iterations_, static_cast<real_type>(this->residual_tol_),
+        parameters_.tolerance_type};
     auto exec = this->get_executor();
     exec->run(bicgstab::make_apply(settings, this->system_matrix_.get(),
                                    this->preconditioner_.get(), b, x,
diff --git a/core/solver/batch_bicgstab_kernels.hpp b/core/solver/batch_bicgstab_kernels.hpp
index ae04e67c9ac..f45f43dfbd0 100644
--- a/core/solver/batch_bicgstab_kernels.hpp
+++ b/core/solver/batch_bicgstab_kernels.hpp
@@ -96,8 +96,8 @@ inline int local_memory_requirement(const int num_rows, const int num_rhs)
 #define GKO_DECLARE_BATCH_BICGSTAB_APPLY_KERNEL(_type)                       \
     void apply(                                                              \
         std::shared_ptr<const DefaultExecutor> exec,                         \
-        const gko::kernels::batch_bicgstab::settings<                \
-            remove_complex<_type>>& options,                                 \
+        const gko::kernels::batch_bicgstab::settings<remove_complex<_type>>& \
+            options,                                                         \
         const batch::BatchLinOp* a, const batch::BatchLinOp* preconditioner, \
         const batch::MultiVector<_type>* b, batch::MultiVector<_type>* x,    \
         gko::batch::log::detail::log_data<remove_complex<_type>>& logdata)
diff --git a/cuda/log/batch_logger.cuh b/cuda/log/batch_logger.cuh
index c592033219f..5591d5fac3b 100644
--- a/cuda/log/batch_logger.cuh
+++ b/cuda/log/batch_logger.cuh
@@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#ifndef GKO_CUDA_LOG_BATCH_LOGGERS_CUH_
-#define GKO_CUDA_LOG_BATCH_LOGGERS_CUH_
+#ifndef GKO_CUDA_LOG_BATCH_LOGGER_CUH_
+#define GKO_CUDA_LOG_BATCH_LOGGER_CUH_
 
 
 #include <ginkgo/core/base/types.hpp>
@@ -51,4 +51,4 @@ namespace batch_log {
 }  // namespace kernels
 }  // namespace gko
 
-#endif  // GKO_CUDA_LOG_BATCH_LOGGERS_CUH_
+#endif  // GKO_CUDA_LOG_BATCH_LOGGER_CUH_
diff --git a/dpcpp/log/batch_logger.hpp b/dpcpp/log/batch_logger.hpp
index c686a2ef9f9..5837602fe6d 100644
--- a/dpcpp/log/batch_logger.hpp
+++ b/dpcpp/log/batch_logger.hpp
@@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#ifndef GKO_DPCPP_LOG_BATCH_LOGGERS_HPP_
-#define GKO_DPCPP_LOG_BATCH_LOGGERS_HPP_
+#ifndef GKO_DPCPP_LOG_BATCH_LOGGER_HPP_
+#define GKO_DPCPP_LOG_BATCH_LOGGER_HPP_
 
 
 #include <ginkgo/config.hpp>
@@ -80,4 +80,4 @@ class SimpleFinalLogger final {
 }  // namespace kernels
 }  // namespace gko
 
-#endif  // GKO_DPCPP_LOG_BATCH_LOGGERS_HPP_
+#endif  // GKO_DPCPP_LOG_BATCH_LOGGER_HPP_
diff --git a/dpcpp/preconditioner/batch_identity.hpp.inc b/dpcpp/preconditioner/batch_identity.hpp.inc
index f92c4814df5..e15a4d37399 100644
--- a/dpcpp/preconditioner/batch_identity.hpp.inc
+++ b/dpcpp/preconditioner/batch_identity.hpp.inc
@@ -43,7 +43,8 @@ public:
     static int dynamic_work_size(int, int) { return 0; }
 
     void generate(size_type batch_id,
-                  const gko::batch::matrix::ell::batch_item<const ValueType, const gko::int32>&,
+                  const gko::batch::matrix::ell::batch_item<const ValueType,
+                                                            const gko::int32>&,
                   ValueType* const, sycl::nd_item<3> item_ct1)
     {}
 
diff --git a/hip/log/batch_logger.hip.hpp b/hip/log/batch_logger.hip.hpp
index 3f655184307..1685a1fca6d 100644
--- a/hip/log/batch_logger.hip.hpp
+++ b/hip/log/batch_logger.hip.hpp
@@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#ifndef GKO_HIP_LOG_BATCH_LOGGERS_HIP_HPP_
-#define GKO_HIP_LOG_BATCH_LOGGERS_HIP_HPP_
+#ifndef GKO_HIP_LOG_BATCH_LOGGER_HIP_HPP_
+#define GKO_HIP_LOG_BATCH_LOGGER_HIP_HPP_
 
 
 #include <ginkgo/core/base/types.hpp>
@@ -51,4 +51,4 @@ namespace batch_log {
 }  // namespace gko
 
 
-#endif  // GKO_HIP_LOG_BATCH_LOGGERS_HIP_HPP_
+#endif  // GKO_HIP_LOG_BATCH_LOGGER_HIP_HPP_
diff --git a/include/ginkgo/core/log/logger.hpp b/include/ginkgo/core/log/logger.hpp
index 5f6d0739012..5f8376c9d26 100644
--- a/include/ginkgo/core/log/logger.hpp
+++ b/include/ginkgo/core/log/logger.hpp
@@ -462,9 +462,9 @@ public:                                                              \
      * @warning This on_iteration_complete function that this macro declares is
      * deprecated. Please use the version with the stopping information.
      */
-    [
-        [deprecated("Please use the version with the additional stopping "
-                    "information.")]] virtual void
+    [[deprecated(
+        "Please use the version with the additional stopping "
+        "information.")]] virtual void
     on_iteration_complete(const LinOp* solver, const size_type& it,
                           const LinOp* r, const LinOp* x = nullptr,
                           const LinOp* tau = nullptr) const
@@ -483,9 +483,9 @@ public:                                                              \
      * @warning This on_iteration_complete function that this macro declares is
      * deprecated. Please use the version with the stopping information.
      */
-    [
-        [deprecated("Please use the version with the additional stopping "
-                    "information.")]] virtual void
+    [[deprecated(
+        "Please use the version with the additional stopping "
+        "information.")]] virtual void
     on_iteration_complete(const LinOp* solver, const size_type& it,
                           const LinOp* r, const LinOp* x, const LinOp* tau,
                           const LinOp* implicit_tau_sq) const
@@ -854,8 +854,8 @@ class EnableLogging : public PolymorphicBase {
     template <size_type Event, typename ConcreteLoggableT>
     struct propagate_log_helper<
         Event, ConcreteLoggableT,
-        xstd::void_t<decltype(
-            std::declval<ConcreteLoggableT>().get_executor())>> {
+        xstd::void_t<
+            decltype(std::declval<ConcreteLoggableT>().get_executor())>> {
         template <typename... Args>
         static void propagate_log(const ConcreteLoggableT* loggable,
                                   Args&&... args)
diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp
index 5a1697afec4..47230c24e32 100644
--- a/include/ginkgo/core/matrix/batch_dense.hpp
+++ b/include/ginkgo/core/matrix/batch_dense.hpp
@@ -224,8 +224,8 @@ class Dense final : public EnableBatchLinOp<Dense<ValueType>>,
      *       significantly more memory efficient than the non-constant version,
      *       so always prefer this version.
      */
-    const value_type* get_const_values_for_item(size_type batch_id) const
-        noexcept
+    const value_type* get_const_values_for_item(
+        size_type batch_id) const noexcept
     {
         GKO_ASSERT(batch_id < this->get_num_batch_items());
         return values_.get_const_data() + this->get_cumulative_offset(batch_id);
diff --git a/include/ginkgo/core/solver/batch_solver_base.hpp b/include/ginkgo/core/solver/batch_solver_base.hpp
index e1f6e18bad9..634b7e657c2 100644
--- a/include/ginkgo/core/solver/batch_solver_base.hpp
+++ b/include/ginkgo/core/solver/batch_solver_base.hpp
@@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#ifndef GKO_PUBLIC_CORE_SOLVER_BATCH_SOLVER_HPP_
-#define GKO_PUBLIC_CORE_SOLVER_BATCH_SOLVER_HPP_
+#ifndef GKO_PUBLIC_CORE_SOLVER_BATCH_SOLVER_BASE_HPP_
+#define GKO_PUBLIC_CORE_SOLVER_BATCH_SOLVER_BASE_HPP_
 
 
 #include <ginkgo/core/base/abstract_factory.hpp>
@@ -386,4 +386,4 @@ class EnableBatchSolver
 }  // namespace gko
 
 
-#endif  // GKO_PUBLIC_CORE_SOLVER_BATCH_SOLVER_HPP_
+#endif  // GKO_PUBLIC_CORE_SOLVER_BATCH_SOLVER_BASE_HPP_
diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp
index baa5f5fd795..f093c056455 100644
--- a/include/ginkgo/ginkgo.hpp
+++ b/include/ginkgo/ginkgo.hpp
@@ -100,6 +100,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/factorization/par_ilu.hpp>
 #include <ginkgo/core/factorization/par_ilut.hpp>
 
+#include <ginkgo/core/log/batch_logger.hpp>
 #include <ginkgo/core/log/convergence.hpp>
 #include <ginkgo/core/log/logger.hpp>
 #include <ginkgo/core/log/papi.hpp>
@@ -110,6 +111,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/matrix/batch_dense.hpp>
 #include <ginkgo/core/matrix/batch_ell.hpp>
+#include <ginkgo/core/matrix/batch_identity.hpp>
 #include <ginkgo/core/matrix/coo.hpp>
 #include <ginkgo/core/matrix/csr.hpp>
 #include <ginkgo/core/matrix/dense.hpp>
@@ -140,6 +142,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/reorder/reordering_base.hpp>
 #include <ginkgo/core/reorder/scaled_reordered.hpp>
 
+#include <ginkgo/core/solver/batch_bicgstab.hpp>
+#include <ginkgo/core/solver/batch_solver_base.hpp>
 #include <ginkgo/core/solver/bicg.hpp>
 #include <ginkgo/core/solver/bicgstab.hpp>
 #include <ginkgo/core/solver/cb_gmres.hpp>
@@ -157,6 +161,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/solver/triangular.hpp>
 #include <ginkgo/core/solver/workspace.hpp>
 
+#include <ginkgo/core/stop/batch_stop_enum.hpp>
 #include <ginkgo/core/stop/combined.hpp>
 #include <ginkgo/core/stop/criterion.hpp>
 #include <ginkgo/core/stop/iteration.hpp>

From b8d36d75865cabc8ca5d25bfe76da0643b305888 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sun, 29 Oct 2023 10:51:17 +0100
Subject: [PATCH 490/583] Update copy/move semantics

---
 core/test/utils/batch_helpers.hpp             |  2 +-
 .../ginkgo/core/base/exception_helpers.hpp    |  2 +-
 .../ginkgo/core/solver/batch_solver_base.hpp  | 67 ++++++++++++++++++-
 omp/solver/batch_bicgstab_kernels.cpp         |  2 +-
 test/solver/batch_bicgstab_kernels.cpp        |  8 +--
 5 files changed, 73 insertions(+), 8 deletions(-)

diff --git a/core/test/utils/batch_helpers.hpp b/core/test/utils/batch_helpers.hpp
index aa71f584a0f..26c53dbd9f2 100644
--- a/core/test/utils/batch_helpers.hpp
+++ b/core/test/utils/batch_helpers.hpp
@@ -247,7 +247,7 @@ LinearSystem<MatrixType> generate_batch_linear_system(
     sys.exact_sol->fill(value_type{2.0});
 
     sys.rhs = multi_vec::create_with_config_of(sys.exact_sol);
-    // A * x^{*} = b
+    // A * x_{exact} = b
     sys.matrix->apply(sys.exact_sol, sys.rhs);
     const gko::batch_dim<2> norm_dim(num_batch_items, gko::dim<2>(1, num_rhs));
     sys.rhs_norm = real_vec::create(exec, norm_dim);
diff --git a/include/ginkgo/core/base/exception_helpers.hpp b/include/ginkgo/core/base/exception_helpers.hpp
index f847334236f..dcf07ed093f 100644
--- a/include/ginkgo/core/base/exception_helpers.hpp
+++ b/include/ginkgo/core/base/exception_helpers.hpp
@@ -328,7 +328,7 @@ inline size_type get_num_batch_items(const T& obj)
         if (!equal_num_items) {                                            \
             throw ::gko::ValueMismatch(                                    \
                 __FILE__, __LINE__, __func__,                              \
-                ::gko::detail::get_batch_size(_op2).get_num_batch_items(), \
+                ::gko::detail::get_batch_size(_op1).get_num_batch_items(), \
                 ::gko::detail::get_batch_size(_op2).get_num_batch_items(), \
                 "expected equal number of batch items");                   \
         }                                                                  \
diff --git a/include/ginkgo/core/solver/batch_solver_base.hpp b/include/ginkgo/core/solver/batch_solver_base.hpp
index 634b7e657c2..e21e63701cd 100644
--- a/include/ginkgo/core/solver/batch_solver_base.hpp
+++ b/include/ginkgo/core/solver/batch_solver_base.hpp
@@ -158,6 +158,11 @@ class BatchSolver {
           workspace_{}
     {}
 
+    void set_system_matrix_base(std::shared_ptr<const BatchLinOp> system_matrix)
+    {
+        system_matrix_ = std::move(system_matrix);
+    }
+
     std::shared_ptr<const BatchLinOp> system_matrix_{};
     std::shared_ptr<const BatchLinOp> preconditioner_{};
     double residual_tol_{};
@@ -341,13 +346,73 @@ class EnableBatchSolver
             auto id = Identity::create(exec, system_matrix->get_size());
             preconditioner_ = std::move(id);
         }
-        // FIXME
         const size_type workspace_size = system_matrix->get_num_batch_items() *
                                          (sizeof(real_type) + sizeof(int));
         workspace_.set_executor(exec);
         workspace_.resize_and_reset(workspace_size);
     }
 
+    void set_system_matrix(std::shared_ptr<const BatchLinOp> new_system_matrix)
+    {
+        auto exec = self()->get_executor();
+        if (new_system_matrix) {
+            GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(self(), new_system_matrix);
+            GKO_ASSERT_BATCH_HAS_SQUARE_DIMENSIONS(new_system_matrix);
+            if (new_system_matrix->get_executor() != exec) {
+                new_system_matrix = gko::clone(exec, new_system_matrix);
+            }
+        }
+        this->set_system_matrix_base(new_system_matrix);
+    }
+
+    /**
+     * Creates a shallow copy of the provided system matrix, clones it onto
+     * this executor if executors don't match.
+     */
+    EnableBatchSolver& operator=(const EnableBatchSolver& other)
+    {
+        if (&other != this) {
+            this->set_size(other.get_size());
+            set_system_matrix(other.get_system_matrix());
+        }
+        return *this;
+    }
+
+    /**
+     * Moves the provided system matrix, clones it onto this executor if
+     * executors don't match. The moved-from object has a nullptr system matrix.
+     */
+    EnableBatchSolver& operator=(EnableBatchSolver&& other)
+    {
+        if (&other != this) {
+            this->set_size(other.get_size());
+            set_system_matrix(other.get_system_matrix());
+            other.set_system_matrix(nullptr);
+        }
+        return *this;
+    }
+
+    /**
+     * Creates a shallow copy of the provided system matrix.
+     */
+    EnableBatchSolver(const EnableBatchSolver& other)
+        : EnableBatchLinOp<ConcreteSolver, PolymorphicBase>(
+              other.self()->get_executor(), other.self()->get_size())
+    {
+        *this = other;
+    }
+
+    /**
+     * Moves the provided system matrix. The moved-from object has a nullptr
+     * system matrix.
+     */
+    EnableBatchSolver(EnableBatchSolver&& other)
+        : EnableBatchLinOp<ConcreteSolver, PolymorphicBase>(
+              other.self()->get_executor(), other.self()->get_size())
+    {
+        *this = std::move(other);
+    }
+
     void apply_impl(const MultiVector<ValueType>* b,
                     MultiVector<ValueType>* x) const
     {
diff --git a/omp/solver/batch_bicgstab_kernels.cpp b/omp/solver/batch_bicgstab_kernels.cpp
index 2e8073b0d00..caeff6988e2 100644
--- a/omp/solver/batch_bicgstab_kernels.cpp
+++ b/omp/solver/batch_bicgstab_kernels.cpp
@@ -96,7 +96,7 @@ class KernelCaller {
                                            mat.get_single_item_num_nnz()) *
                 sizeof(ValueType);
 
-#pragma omp parallel for firstprivate(logger)
+#pragma omp parallel for
         for (size_type batch_id = 0; batch_id < num_batch_items; batch_id++) {
             // TODO: Align to cache line boundary
             // TODO: Allocate and free once per thread rather than once per
diff --git a/test/solver/batch_bicgstab_kernels.cpp b/test/solver/batch_bicgstab_kernels.cpp
index bb496ef5dba..adb68d92314 100644
--- a/test/solver/batch_bicgstab_kernels.cpp
+++ b/test/solver/batch_bicgstab_kernels.cpp
@@ -105,7 +105,7 @@ class BatchBicgstab : public CommonTestFixture {
 TEST_F(BatchBicgstab, SolvesStencilSystem)
 {
     const int num_batch_items = 2;
-    const int num_rows = 10;
+    const int num_rows = 33;
     const int num_rhs = 1;
     const real_type tol = 1e-5;
     const int max_iters = 100;
@@ -128,7 +128,7 @@ TEST_F(BatchBicgstab, SolvesStencilSystem)
 TEST_F(BatchBicgstab, StencilSystemLoggerLogsResidual)
 {
     const int num_batch_items = 2;
-    const int num_rows = 10;
+    const int num_rows = 33;
     const int num_rhs = 1;
     const real_type tol = 1e-5;
     const int max_iters = 100;
@@ -152,7 +152,7 @@ TEST_F(BatchBicgstab, StencilSystemLoggerLogsResidual)
 TEST_F(BatchBicgstab, StencilSystemLoggerLogsIterations)
 {
     const int num_batch_items = 2;
-    const int num_rows = 10;
+    const int num_rows = 33;
     const int num_rhs = 1;
     const int ref_iters = 5;
     auto mat = gko::share(gko::test::generate_3pt_stencil_batch_matrix<Mtx>(
@@ -175,7 +175,7 @@ TEST_F(BatchBicgstab, CanSolve3ptStencilSystem)
     const int num_rows = 100;
     const int num_rhs = 1;
     const real_type tol = 1e-5;
-    const int max_iters = 100;
+    const int max_iters = 500;
     auto mat = gko::share(gko::test::generate_3pt_stencil_batch_matrix<Mtx>(
         exec, num_batch_items, num_rows));
     auto linear_system = setup_linsys_and_solver(mat, num_rhs, tol, max_iters);

From 8d931586f3af922ad72efcbfbe53243bc410dbd5 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Mon, 30 Oct 2023 15:35:32 +0100
Subject: [PATCH 491/583] Review updates

Co-authored-by: Yu-Hsian Tsai <yhmtsai@gmail.com>
---
 core/solver/batch_dispatch.hpp                | 12 +++----
 omp/solver/batch_bicgstab_kernels.cpp         |  8 ++---
 reference/preconditioner/batch_identity.hpp   | 11 ++----
 reference/solver/batch_bicgstab_kernels.cpp   | 12 +++----
 .../solver/batch_bicgstab_kernels.hpp.inc     |  4 +--
 test/test_install/test_install.cpp            | 34 +++++++++++--------
 6 files changed, 40 insertions(+), 41 deletions(-)

diff --git a/core/solver/batch_dispatch.hpp b/core/solver/batch_dispatch.hpp
index 4fa5f8fe90a..84afd45cb1a 100644
--- a/core/solver/batch_dispatch.hpp
+++ b/core/solver/batch_dispatch.hpp
@@ -163,7 +163,7 @@ namespace solver {
 
 
 template <typename DValueType>
-class KernelCallerInterface {
+class kernel_caller_interface {
 public:
     template <typename BatchMatrixType, typename PrecType, typename StopType,
               typename LogType>
@@ -192,20 +192,20 @@ enum class log_type { simple_convergence_completion };
  * depending on runtime parameters.
  *
  * @tparam ValueType  The user-facing value type.
- * @tparam KernelCaller  Class with an interface like KernelCallerInterface,
+ * @tparam KernelCaller  Class with an interface like kernel_caller_interface,
  *   that is responsible for finally calling the templated backend-specific
  *   kernel.
  * @tparam SettingsType  Structure type of options for the particular solver to
  * be used.
  */
 template <typename ValueType, typename KernelCaller, typename SettingsType>
-class BatchSolverDispatch {
+class batch_solver_dispatch {
 public:
     using value_type = ValueType;
     using device_value_type = DeviceValueType<ValueType>;
     using real_type = remove_complex<value_type>;
 
-    BatchSolverDispatch(
+    batch_solver_dispatch(
         const KernelCaller& kernel_caller, const SettingsType& settings,
         const BatchLinOp* const matrix, const BatchLinOp* const preconditioner,
         const log::detail::log_type logger_type =
@@ -323,13 +323,13 @@ class BatchSolverDispatch {
  * Convenient function to create a dispatcher. Infers most template arguments.
  */
 template <typename ValueType, typename KernelCaller, typename SettingsType>
-BatchSolverDispatch<ValueType, KernelCaller, SettingsType> create_dispatcher(
+batch_solver_dispatch<ValueType, KernelCaller, SettingsType> create_dispatcher(
     const KernelCaller& kernel_caller, const SettingsType& settings,
     const BatchLinOp* const matrix, const BatchLinOp* const preconditioner,
     const log::detail::log_type logger_type =
         log::detail::log_type::simple_convergence_completion)
 {
-    return BatchSolverDispatch<ValueType, KernelCaller, SettingsType>(
+    return batch_solver_dispatch<ValueType, KernelCaller, SettingsType>(
         kernel_caller, settings, matrix, preconditioner, logger_type);
 }
 
diff --git a/omp/solver/batch_bicgstab_kernels.cpp b/omp/solver/batch_bicgstab_kernels.cpp
index caeff6988e2..9bed34141aa 100644
--- a/omp/solver/batch_bicgstab_kernels.cpp
+++ b/omp/solver/batch_bicgstab_kernels.cpp
@@ -67,10 +67,10 @@ using settings = gko::kernels::batch_bicgstab::settings<T>;
 
 
 template <typename ValueType>
-class KernelCaller {
+class kernel_caller {
 public:
-    KernelCaller(std::shared_ptr<const DefaultExecutor> exec,
-                 const settings<remove_complex<ValueType>> settings)
+    kernel_caller(std::shared_ptr<const DefaultExecutor> exec,
+                  const settings<remove_complex<ValueType>> settings)
         : exec_{std::move(exec)}, settings_{settings}
     {}
 
@@ -125,7 +125,7 @@ void apply(std::shared_ptr<const DefaultExecutor> exec,
            batch::log::detail::log_data<remove_complex<ValueType>>& logdata)
 {
     auto dispatcher = batch::solver::create_dispatcher<ValueType>(
-        KernelCaller<ValueType>(exec, settings), settings, mat, precond);
+        kernel_caller<ValueType>(exec, settings), settings, mat, precond);
     dispatcher.apply(b, x, logdata);
 }
 
diff --git a/reference/preconditioner/batch_identity.hpp b/reference/preconditioner/batch_identity.hpp
index 3075cf88256..b0bf869c6be 100644
--- a/reference/preconditioner/batch_identity.hpp
+++ b/reference/preconditioner/batch_identity.hpp
@@ -67,15 +67,8 @@ class Identity final {
      * Sets the input and generates the identity preconditioner.(Nothing needs
      * to be actually generated.)
      */
-    void generate(size_type,
-                  const gko::batch::matrix::ell::batch_item<const ValueType,
-                                                            const int32>&,
-                  ValueType* const)
-    {}
-
-    void generate(size_type,
-                  const gko::batch::matrix::dense::batch_item<const ValueType>&,
-                  ValueType* const)
+    template <typename batch_item_type>
+    void generate(size_type, const batch_item_type&, ValueType* const)
     {}
 
     /**
diff --git a/reference/solver/batch_bicgstab_kernels.cpp b/reference/solver/batch_bicgstab_kernels.cpp
index 91764f378a3..562b90a35a1 100644
--- a/reference/solver/batch_bicgstab_kernels.cpp
+++ b/reference/solver/batch_bicgstab_kernels.cpp
@@ -69,10 +69,10 @@ using settings = gko::kernels::batch_bicgstab::settings<T>;
 
 
 template <typename ValueType>
-class KernelCaller {
+class kernel_caller {
 public:
-    KernelCaller(std::shared_ptr<const DefaultExecutor> exec,
-                 const settings<remove_complex<ValueType>> settings)
+    kernel_caller(std::shared_ptr<const DefaultExecutor> exec,
+                  const settings<remove_complex<ValueType>> settings)
         : exec_{std::move(exec)}, settings_{settings}
     {}
 
@@ -97,13 +97,13 @@ class KernelCaller {
             PrecType::dynamic_work_size(num_rows,
                                         mat.get_single_item_num_nnz()) *
                 sizeof(ValueType);
-        std::vector<unsigned char> local_space(local_size_bytes);
+        array<unsigned char> local_space(exec_, local_size_bytes);
 
         for (size_type batch_id = 0; batch_id < num_batch_items; batch_id++) {
             batch_entry_bicgstab_impl<StopType, PrecType, LogType,
                                       BatchMatrixType, ValueType>(
                 settings_, logger, prec, mat, b, x, batch_id,
-                local_space.data());
+                local_space.get_data());
         }
     }
 
@@ -123,7 +123,7 @@ void apply(std::shared_ptr<const DefaultExecutor> exec,
            batch::log::detail::log_data<remove_complex<ValueType>>& log_data)
 {
     auto dispatcher = batch::solver::create_dispatcher<ValueType>(
-        KernelCaller<ValueType>(exec, settings), settings, mat, precon);
+        kernel_caller<ValueType>(exec, settings), settings, mat, precon);
     dispatcher.apply(b, x, log_data);
 }
 
diff --git a/reference/solver/batch_bicgstab_kernels.hpp.inc b/reference/solver/batch_bicgstab_kernels.hpp.inc
index 00eab18fa28..912620c080b 100644
--- a/reference/solver/batch_bicgstab_kernels.hpp.inc
+++ b/reference/solver/batch_bicgstab_kernels.hpp.inc
@@ -65,9 +65,9 @@ inline void initialize(
     compute_norm2_kernel<ValueType>(gko::batch::to_const(r_entry),
                                     res_norms_entry);
 
-    copy_kernel(gko::batch::to_const(r_entry), r_hat_entry);
-
     for (int r = 0; r < p_entry.num_rows; r++) {
+        r_hat_entry.values[r * r_hat_entry.stride] =
+            r_entry.values[r * r_entry.stride];
         p_entry.values[r * p_entry.stride] = zero<ValueType>();
         p_hat_entry.values[r * p_hat_entry.stride] = zero<ValueType>();
         v_entry.values[r * v_entry.stride] = zero<ValueType>();
diff --git a/test/test_install/test_install.cpp b/test/test_install/test_install.cpp
index c00bb594ecd..49e82865857 100644
--- a/test/test_install/test_install.cpp
+++ b/test/test_install/test_install.cpp
@@ -219,20 +219,6 @@ int main()
         auto test = batch_multi_vector_type::create(exec);
     }
 
-    // core/matrix/batch_dense.hpp
-    {
-        using type1 = float;
-        using batch_dense_type = gko::batch::matrix::Dense<type1>;
-        auto test = batch_dense_type::create(exec);
-    }
-
-    // core/matrix/batch_ell.hpp
-    {
-        using type1 = float;
-        using batch_ell_type = gko::batch::matrix::Ell<type1>;
-        auto test = batch_ell_type::create(exec);
-    }
-
     // core/base/combination.hpp
     {
         using type1 = int;
@@ -374,6 +360,20 @@ int main()
     }
 #endif  // GKO_HAVE_PAPI_SDE
 
+    // core/matrix/batch_dense.hpp
+    {
+        using type1 = float;
+        using batch_dense_type = gko::batch::matrix::Dense<type1>;
+        auto test = batch_dense_type::create(exec);
+    }
+
+    // core/matrix/batch_ell.hpp
+    {
+        using type1 = float;
+        using batch_ell_type = gko::batch::matrix::Ell<type1>;
+        auto test = batch_ell_type::create(exec);
+    }
+
     // core/matrix/coo.hpp
     {
         using Mtx = gko::matrix::Coo<>;
@@ -456,6 +456,12 @@ int main()
         auto test = Bj::build().with_max_block_size(1u).on(exec);
     }
 
+    // core/solver/batch_bicgstab.hpp
+    {
+        using Solver = gko::batch::solver::Bicgstab<>;
+        auto test = Solver::build().with_max_iterations(5).on(exec);
+    }
+
     // core/solver/bicgstab.hpp
     {
         using Solver = gko::solver::Bicgstab<>;

From 0b36a80a82447860371b2378b8e5e61052571764 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Mon, 30 Oct 2023 21:18:59 +0100
Subject: [PATCH 492/583] Review updates

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
Co-authored-by: Terry Cojean <terry.cojean@kit.edu>
Co-authored-by: Yu-Hsiang Tsai <yhmtsai@gmail.com>
---
 common/cuda_hip/log/batch_logger.hpp.inc      |  2 +-
 common/cuda_hip/stop/batch_criteria.hpp.inc   |  2 +-
 core/solver/batch_bicgstab.cpp                |  1 +
 core/solver/batch_bicgstab_kernels.hpp        |  2 +
 core/test/utils/batch_helpers.hpp             |  2 +-
 dpcpp/log/batch_logger.hpp                    |  2 +-
 dpcpp/stop/batch_criteria.hpp                 |  2 +-
 include/ginkgo/core/log/batch_logger.hpp      |  2 +-
 .../ginkgo/core/solver/batch_solver_base.hpp  | 42 ++++++++++++-------
 include/ginkgo/core/stop/batch_stop_enum.hpp  |  6 ++-
 omp/solver/batch_bicgstab_kernels.cpp         |  2 +-
 reference/log/batch_logger.hpp                |  2 +-
 reference/solver/batch_bicgstab_kernels.cpp   |  2 +-
 .../solver/batch_bicgstab_kernels.hpp.inc     |  2 +-
 reference/stop/batch_criteria.hpp             |  2 +-
 15 files changed, 45 insertions(+), 28 deletions(-)

diff --git a/common/cuda_hip/log/batch_logger.hpp.inc b/common/cuda_hip/log/batch_logger.hpp.inc
index e8cf77960ef..7a4d59b67e9 100644
--- a/common/cuda_hip/log/batch_logger.hpp.inc
+++ b/common/cuda_hip/log/batch_logger.hpp.inc
@@ -36,7 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 template <typename RealType>
 class SimpleFinalLogger final {
 public:
-    using real_type = RealType;
+    using real_type = remove_complex<RealType>;
 
     SimpleFinalLogger(real_type* const batch_residuals, int* const batch_iters)
         : final_residuals_{batch_residuals}, final_iters_{batch_iters}
diff --git a/common/cuda_hip/stop/batch_criteria.hpp.inc b/common/cuda_hip/stop/batch_criteria.hpp.inc
index 0576dd2c4aa..d9ca9d10487 100644
--- a/common/cuda_hip/stop/batch_criteria.hpp.inc
+++ b/common/cuda_hip/stop/batch_criteria.hpp.inc
@@ -47,7 +47,7 @@ public:
     __device__ __forceinline__ bool check_converged(
         const real_type* const residual_norms) const
     {
-        return (residual_norms[0] / rhs_norms_[0] <= rel_tol_);
+        return residual_norms[0] <= (rel_tol_ * rhs_norms_[0]);
     }
 
 private:
diff --git a/core/solver/batch_bicgstab.cpp b/core/solver/batch_bicgstab.cpp
index 7413ab41fd4..b4219bbc5fa 100644
--- a/core/solver/batch_bicgstab.cpp
+++ b/core/solver/batch_bicgstab.cpp
@@ -35,6 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/batch_lin_op.hpp>
 #include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/base/math.hpp>
 
 
 #include "core/base/batch_multi_vector_kernels.hpp"
diff --git a/core/solver/batch_bicgstab_kernels.hpp b/core/solver/batch_bicgstab_kernels.hpp
index f45f43dfbd0..4689badeebd 100644
--- a/core/solver/batch_bicgstab_kernels.hpp
+++ b/core/solver/batch_bicgstab_kernels.hpp
@@ -54,6 +54,8 @@ namespace batch_bicgstab {
  */
 template <typename RealType>
 struct settings {
+    static_assert(std::is_same<RealType, remove_complex<RealType>>::value,
+                  "Template parameter must be a real type");
     int max_iterations;
     RealType residual_tol;
     ::gko::batch::stop::tolerance_type tol_type;
diff --git a/core/test/utils/batch_helpers.hpp b/core/test/utils/batch_helpers.hpp
index 26c53dbd9f2..77c2d397889 100644
--- a/core/test/utils/batch_helpers.hpp
+++ b/core/test/utils/batch_helpers.hpp
@@ -118,7 +118,7 @@ std::unique_ptr<MatrixType> generate_random_batch_matrix(
  * @param exec  The executor.
  * @param num_rows  The size (number of rows) of the generated matrix
  * @param num_batch_items  The number of Poisson matrices in the batch
- * @param args The create args to be forwarded to the matrix
+ * @param args  The create args to be forwarded to the matrix
  */
 template <typename MatrixType, typename... MatrixArgs>
 std::unique_ptr<const MatrixType> generate_3pt_stencil_batch_matrix(
diff --git a/dpcpp/log/batch_logger.hpp b/dpcpp/log/batch_logger.hpp
index 5837602fe6d..ef5337e9939 100644
--- a/dpcpp/log/batch_logger.hpp
+++ b/dpcpp/log/batch_logger.hpp
@@ -56,7 +56,7 @@ namespace batch_log {
 template <typename RealType>
 class SimpleFinalLogger final {
 public:
-    using real_type = RealType;
+    using real_type = remove_complex<RealType>;
 
     SimpleFinalLogger(real_type* const batch_residuals, int* const batch_iters)
         : final_residuals_{batch_residuals}, final_iters_{batch_iters}
diff --git a/dpcpp/stop/batch_criteria.hpp b/dpcpp/stop/batch_criteria.hpp
index 790969e875a..f1e51ebd1ae 100644
--- a/dpcpp/stop/batch_criteria.hpp
+++ b/dpcpp/stop/batch_criteria.hpp
@@ -66,7 +66,7 @@ class SimpleRelResidual {
     __dpct_inline__ bool check_converged(
         const real_type* const residual_norms) const
     {
-        return (residual_norms[0] / rhs_norms_[0] <= rel_tol_);
+        return residual_norms[0] <= (rel_tol_ * rhs_norms_[0]);
     }
 
 private:
diff --git a/include/ginkgo/core/log/batch_logger.hpp b/include/ginkgo/core/log/batch_logger.hpp
index 993fef6129a..c28f5489390 100644
--- a/include/ginkgo/core/log/batch_logger.hpp
+++ b/include/ginkgo/core/log/batch_logger.hpp
@@ -119,7 +119,7 @@ struct log_data final {
  * @ingroup log
  */
 template <typename ValueType = default_precision>
-class BatchConvergence : public gko::log::Logger {
+class BatchConvergence final : public gko::log::Logger {
 public:
     using real_type = remove_complex<ValueType>;
     using mask_type = gko::log::Logger::mask_type;
diff --git a/include/ginkgo/core/solver/batch_solver_base.hpp b/include/ginkgo/core/solver/batch_solver_base.hpp
index e21e63701cd..5ff3c9306ba 100644
--- a/include/ginkgo/core/solver/batch_solver_base.hpp
+++ b/include/ginkgo/core/solver/batch_solver_base.hpp
@@ -163,6 +163,11 @@ class BatchSolver {
         system_matrix_ = std::move(system_matrix);
     }
 
+    void set_preconditioner_base(std::shared_ptr<const BatchLinOp> precond)
+    {
+        preconditioner_ = std::move(precond);
+    }
+
     std::shared_ptr<const BatchLinOp> system_matrix_{};
     std::shared_ptr<const BatchLinOp> preconditioner_{};
     double residual_tol_{};
@@ -365,36 +370,47 @@ class EnableBatchSolver
         this->set_system_matrix_base(new_system_matrix);
     }
 
-    /**
-     * Creates a shallow copy of the provided system matrix, clones it onto
-     * this executor if executors don't match.
-     */
+    void set_preconditioner(std::shared_ptr<const BatchLinOp> new_precond)
+    {
+        auto exec = self()->get_executor();
+        if (new_precond) {
+            GKO_ASSERT_BATCH_EQUAL_DIMENSIONS(self(), new_precond);
+            GKO_ASSERT_BATCH_HAS_SQUARE_DIMENSIONS(new_precond);
+            if (new_precond->get_executor() != exec) {
+                new_precond = gko::clone(exec, new_precond);
+            }
+        }
+        this->set_preconditioner_base(new_precond);
+    }
+
     EnableBatchSolver& operator=(const EnableBatchSolver& other)
     {
         if (&other != this) {
             this->set_size(other.get_size());
             set_system_matrix(other.get_system_matrix());
+            set_preconditioner(other.get_preconditioner());
+            reset_tolerance(other.get_tolerance());
+            reset_max_iterations(other.get_max_iterations());
+            reset_tolerance_type(other.get_tolerance_type());
         }
         return *this;
     }
 
-    /**
-     * Moves the provided system matrix, clones it onto this executor if
-     * executors don't match. The moved-from object has a nullptr system matrix.
-     */
     EnableBatchSolver& operator=(EnableBatchSolver&& other)
     {
         if (&other != this) {
             this->set_size(other.get_size());
             set_system_matrix(other.get_system_matrix());
+            set_preconditioner(other.get_preconditioner());
+            reset_tolerance(other.get_tolerance());
+            reset_max_iterations(other.get_max_iterations());
+            reset_tolerance_type(other.get_tolerance_type());
             other.set_system_matrix(nullptr);
+            other.set_preconditioner(nullptr);
         }
         return *this;
     }
 
-    /**
-     * Creates a shallow copy of the provided system matrix.
-     */
     EnableBatchSolver(const EnableBatchSolver& other)
         : EnableBatchLinOp<ConcreteSolver, PolymorphicBase>(
               other.self()->get_executor(), other.self()->get_size())
@@ -402,10 +418,6 @@ class EnableBatchSolver
         *this = other;
     }
 
-    /**
-     * Moves the provided system matrix. The moved-from object has a nullptr
-     * system matrix.
-     */
     EnableBatchSolver(EnableBatchSolver&& other)
         : EnableBatchLinOp<ConcreteSolver, PolymorphicBase>(
               other.self()->get_executor(), other.self()->get_size())
diff --git a/include/ginkgo/core/stop/batch_stop_enum.hpp b/include/ginkgo/core/stop/batch_stop_enum.hpp
index e6679b036b2..b23d535eb3c 100644
--- a/include/ginkgo/core/stop/batch_stop_enum.hpp
+++ b/include/ginkgo/core/stop/batch_stop_enum.hpp
@@ -44,12 +44,14 @@ namespace stop {
  * solver.
  *
  * `absolute` tolerance implies that the convergence criteria check is
- * against the computed residual ($||r|| <= \tau$, where $||r||$ may be implicit
- * or explicit depending on the solver).
+ * against the computed residual ($||r|| <= \tau$)
  *
  * With the `relative` tolerance type, the solver
  * convergence criteria checks against the relative residual norm
  * ($\frac{||r||}{||b||} <= \tau$, where $||b||$$ is the L2 norm of the rhs).
+ *
+ * @note the compute residual norm, $||r||$ may be implicit or explicit
+ * depending on the solver algorithm.
  */
 enum class tolerance_type { absolute, relative };
 
diff --git a/omp/solver/batch_bicgstab_kernels.cpp b/omp/solver/batch_bicgstab_kernels.cpp
index 9bed34141aa..16d4e4b5c61 100644
--- a/omp/solver/batch_bicgstab_kernels.cpp
+++ b/omp/solver/batch_bicgstab_kernels.cpp
@@ -85,7 +85,7 @@ class kernel_caller {
         const size_type num_batch_items = mat.num_batch_items;
         const auto num_rows = mat.num_rows;
         const auto num_rhs = b.num_rhs;
-        if (num_rhs > 1) {
+        if (num_rhs > max_num_rhs) {
             GKO_NOT_IMPLEMENTED;
         }
 
diff --git a/reference/log/batch_logger.hpp b/reference/log/batch_logger.hpp
index 0b1be52e1f4..a70af0af51c 100644
--- a/reference/log/batch_logger.hpp
+++ b/reference/log/batch_logger.hpp
@@ -51,7 +51,7 @@ namespace batch_log {
 template <typename RealType>
 class SimpleFinalLogger final {
 public:
-    using real_type = RealType;
+    using real_type = remove_complex<RealType>;
 
     /**
      * Constructor
diff --git a/reference/solver/batch_bicgstab_kernels.cpp b/reference/solver/batch_bicgstab_kernels.cpp
index 562b90a35a1..6580996a77b 100644
--- a/reference/solver/batch_bicgstab_kernels.cpp
+++ b/reference/solver/batch_bicgstab_kernels.cpp
@@ -87,7 +87,7 @@ class kernel_caller {
         const size_type num_batch_items = mat.num_batch_items;
         const auto num_rows = mat.num_rows;
         const auto num_rhs = b.num_rhs;
-        if (num_rhs > 1) {
+        if (num_rhs > max_num_rhs) {
             GKO_NOT_IMPLEMENTED;
         }
 
diff --git a/reference/solver/batch_bicgstab_kernels.hpp.inc b/reference/solver/batch_bicgstab_kernels.hpp.inc
index 912620c080b..1ec38751a0f 100644
--- a/reference/solver/batch_bicgstab_kernels.hpp.inc
+++ b/reference/solver/batch_bicgstab_kernels.hpp.inc
@@ -329,7 +329,7 @@ inline void batch_entry_bicgstab_impl(
                       gko::batch::to_const(s_entry), temp_entry, omega_entry);
 
 
-        // x = x + alpha*p_hat + omega *s_hat
+        // x = x + alpha * p_hat + omega * s_hat
         // r = s - omega * t
         update_x_and_r(gko::batch::to_const(p_hat_entry),
                        gko::batch::to_const(s_hat_entry),
diff --git a/reference/stop/batch_criteria.hpp b/reference/stop/batch_criteria.hpp
index 2a14de576b9..875132b1320 100644
--- a/reference/stop/batch_criteria.hpp
+++ b/reference/stop/batch_criteria.hpp
@@ -75,7 +75,7 @@ class SimpleRelResidual {
      */
     bool check_converged(const real_type* const residual_norms) const
     {
-        return (residual_norms[0] / rhs_norms_[0] <= rel_tol_);
+        return residual_norms[0] <= (rel_tol_ * rhs_norms_[0]);
     }
 
 private:

From 1b278c580bc355c1a76f2e6ea2034675ab56e3d0 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 31 Oct 2023 09:41:30 +0100
Subject: [PATCH 493/583] Fix cuda incom type and check defaults

---
 core/test/solver/batch_bicgstab.cpp           | 20 +++++++++++++++++++
 include/ginkgo/core/solver/batch_bicgstab.hpp |  8 ++++----
 .../ginkgo/core/solver/batch_solver_base.hpp  |  3 +--
 3 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/core/test/solver/batch_bicgstab.cpp b/core/test/solver/batch_bicgstab.cpp
index 54d5b1b1296..386bfac3e67 100644
--- a/core/test/solver/batch_bicgstab.cpp
+++ b/core/test/solver/batch_bicgstab.cpp
@@ -91,6 +91,26 @@ TYPED_TEST(BatchBicgstab, FactoryKnowsItsExecutor)
 }
 
 
+TYPED_TEST(BatchBicgstab, FactoryHasCorrectDefaults)
+{
+    using Solver = typename TestFixture::Solver;
+    using Mtx = typename TestFixture::Mtx;
+    using value_type = typename TestFixture::value_type;
+
+    auto solver_factory = Solver::build().on(this->exec);
+    auto solver = solver_factory->generate(Mtx::create(this->exec));
+
+    ASSERT_NE(solver->get_system_matrix(), nullptr);
+    ASSERT_NE(solver->get_preconditioner(), nullptr);
+    ASSERT_NO_THROW(gko::as<gko::batch::matrix::Identity<value_type>>(
+        solver->get_preconditioner()));
+    ASSERT_EQ(solver->get_tolerance(), 1e-11);
+    ASSERT_EQ(solver->get_max_iterations(), 100);
+    ASSERT_EQ(solver->get_tolerance_type(),
+              gko::batch::stop::tolerance_type::absolute);
+}
+
+
 TYPED_TEST(BatchBicgstab, FactoryCreatesCorrectSolver)
 {
     using Solver = typename TestFixture::Solver;
diff --git a/include/ginkgo/core/solver/batch_bicgstab.hpp b/include/ginkgo/core/solver/batch_bicgstab.hpp
index 1c4542c118a..e93f3e7a260 100644
--- a/include/ginkgo/core/solver/batch_bicgstab.hpp
+++ b/include/ginkgo/core/solver/batch_bicgstab.hpp
@@ -89,14 +89,14 @@ class Bicgstab final
 
 private:
     explicit Bicgstab(std::shared_ptr<const Executor> exec)
-        : EnableBatchSolver<Bicgstab>(std::move(exec))
+        : EnableBatchSolver<Bicgstab, ValueType>(std::move(exec))
     {}
 
     explicit Bicgstab(const Factory* factory,
                       std::shared_ptr<const BatchLinOp> system_matrix)
-        : EnableBatchSolver<Bicgstab>(factory->get_executor(),
-                                      std::move(system_matrix),
-                                      factory->get_parameters()),
+        : EnableBatchSolver<Bicgstab, ValueType>(factory->get_executor(),
+                                                 std::move(system_matrix),
+                                                 factory->get_parameters()),
           parameters_{factory->get_parameters()}
     {}
 
diff --git a/include/ginkgo/core/solver/batch_solver_base.hpp b/include/ginkgo/core/solver/batch_solver_base.hpp
index 5ff3c9306ba..7ff8d1c61c1 100644
--- a/include/ginkgo/core/solver/batch_solver_base.hpp
+++ b/include/ginkgo/core/solver/batch_solver_base.hpp
@@ -270,8 +270,7 @@ struct enable_preconditioned_iterative_solver_factory_parameters
  * @tparam ValueType  The value type of the multivectors.
  * @tparam PolymorphicBase  The base class; must be a subclass of BatchLinOp.
  */
-template <typename ConcreteSolver,
-          typename ValueType = typename ConcreteSolver::value_type,
+template <typename ConcreteSolver, typename ValueType,
           typename PolymorphicBase = BatchLinOp>
 class EnableBatchSolver
     : public BatchSolver,

From e186ae1dc673a717c39a58f2fcd15d4bb68e8b2c Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 31 Oct 2023 10:52:21 +0100
Subject: [PATCH 494/583] clarify implicit/actual res norm docs, MSVC fixes

---
 core/test/solver/batch_bicgstab.cpp           |  2 +-
 include/ginkgo/core/log/batch_logger.hpp      |  4 ++
 include/ginkgo/core/solver/batch_bicgstab.hpp |  6 +++
 .../ginkgo/core/solver/batch_solver_base.hpp  | 40 +++++++++----------
 4 files changed, 31 insertions(+), 21 deletions(-)

diff --git a/core/test/solver/batch_bicgstab.cpp b/core/test/solver/batch_bicgstab.cpp
index 386bfac3e67..07b94fd2617 100644
--- a/core/test/solver/batch_bicgstab.cpp
+++ b/core/test/solver/batch_bicgstab.cpp
@@ -74,11 +74,11 @@ class BatchBicgstab : public ::testing::Test {
     const gko::size_type num_batch_items = 3;
     const int num_rows = 5;
     std::shared_ptr<const Mtx> mtx;
-    std::unique_ptr<typename Solver::Factory> solver_factory;
     const int def_max_iters = 100;
     const real_type def_abs_res_tol = 1e-11;
     const gko::batch::stop::tolerance_type def_tol_type =
         gko::batch::stop::tolerance_type::absolute;
+    std::unique_ptr<typename Solver::Factory> solver_factory;
     std::unique_ptr<gko::batch::BatchLinOp> solver;
 };
 
diff --git a/include/ginkgo/core/log/batch_logger.hpp b/include/ginkgo/core/log/batch_logger.hpp
index c28f5489390..d6d76d81ccf 100644
--- a/include/ginkgo/core/log/batch_logger.hpp
+++ b/include/ginkgo/core/log/batch_logger.hpp
@@ -116,6 +116,10 @@ struct log_data final {
  * The purpose of this logger is to give simple access to standard data
  * generated by the solver once it has converged.
  *
+ * @note The final logged residuals are the implicit residuals that have been
+ * computed within the solver process. Depending on the solver algorithm, this
+ * may be significantly different from the true residual (||b - Ax||).
+ *
  * @ingroup log
  */
 template <typename ValueType = default_precision>
diff --git a/include/ginkgo/core/solver/batch_bicgstab.hpp b/include/ginkgo/core/solver/batch_bicgstab.hpp
index e93f3e7a260..4ce8ad7c1bd 100644
--- a/include/ginkgo/core/solver/batch_bicgstab.hpp
+++ b/include/ginkgo/core/solver/batch_bicgstab.hpp
@@ -64,6 +64,12 @@ namespace solver {
  * tolerance (absolute or relative) and the maximum number of iterations to be
  * used in the stopping criterion can be set via the factory parameters.
  *
+ * @note The tolerance check is against the internal residual computed within
+ * the solver process. This implicit (internal) residual, can diverge from the
+ * true residual (||b - Ax||). A posterori checks (by computing the true
+ * residual, ||b - Ax||) are recommended to ensure that the solution has
+ * converged to the desired tolerance.
+ *
  * @tparam ValueType  precision of matrix elements
  *
  * @ingroup solvers
diff --git a/include/ginkgo/core/solver/batch_solver_base.hpp b/include/ginkgo/core/solver/batch_solver_base.hpp
index 7ff8d1c61c1..06aa5c0122e 100644
--- a/include/ginkgo/core/solver/batch_solver_base.hpp
+++ b/include/ginkgo/core/solver/batch_solver_base.hpp
@@ -63,7 +63,7 @@ class BatchSolver {
      */
     std::shared_ptr<const BatchLinOp> get_system_matrix() const
     {
-        return system_matrix_;
+        return this->system_matrix_;
     }
 
     /**
@@ -73,7 +73,7 @@ class BatchSolver {
      */
     std::shared_ptr<const BatchLinOp> get_preconditioner() const
     {
-        return preconditioner_;
+        return this->preconditioner_;
     }
 
     /**
@@ -81,7 +81,7 @@ class BatchSolver {
      *
      * @return The residual tolerance.
      */
-    double get_tolerance() const { return residual_tol_; }
+    double get_tolerance() const { return this->residual_tol_; }
 
     /**
      * Update the residual tolerance to be used by the solver.
@@ -94,7 +94,7 @@ class BatchSolver {
         if (res_tol < 0) {
             GKO_INVALID_STATE("Tolerance cannot be negative!");
         }
-        residual_tol_ = res_tol;
+        this->residual_tol_ = res_tol;
     }
 
     /**
@@ -102,7 +102,7 @@ class BatchSolver {
      *
      * @return  Maximum number of iterations.
      */
-    int get_max_iterations() const { return max_iterations_; }
+    int get_max_iterations() const { return this->max_iterations_; }
 
     /**
      * Set the maximum number of iterations for the solver to use,
@@ -115,7 +115,7 @@ class BatchSolver {
         if (max_iterations < 0) {
             GKO_INVALID_STATE("Max iterations cannot be negative!");
         }
-        max_iterations_ = max_iterations;
+        this->max_iterations_ = max_iterations;
     }
 
     /**
@@ -125,7 +125,7 @@ class BatchSolver {
      */
     ::gko::batch::stop::tolerance_type get_tolerance_type() const
     {
-        return tol_type_;
+        return this->tol_type_;
     }
 
     /**
@@ -137,7 +137,7 @@ class BatchSolver {
     {
         if (tol_type == ::gko::batch::stop::tolerance_type::absolute ||
             tol_type == ::gko::batch::stop::tolerance_type::relative) {
-            tol_type_ = tol_type;
+            this->tol_type_ = tol_type;
         } else {
             GKO_INVALID_STATE("Invalid tolerance type specified!");
         }
@@ -160,12 +160,12 @@ class BatchSolver {
 
     void set_system_matrix_base(std::shared_ptr<const BatchLinOp> system_matrix)
     {
-        system_matrix_ = std::move(system_matrix);
+        this->system_matrix_ = std::move(system_matrix);
     }
 
     void set_preconditioner_base(std::shared_ptr<const BatchLinOp> precond)
     {
-        preconditioner_ = std::move(precond);
+        this->preconditioner_ = std::move(precond);
     }
 
     std::shared_ptr<const BatchLinOp> system_matrix_{};
@@ -386,11 +386,11 @@ class EnableBatchSolver
     {
         if (&other != this) {
             this->set_size(other.get_size());
-            set_system_matrix(other.get_system_matrix());
-            set_preconditioner(other.get_preconditioner());
-            reset_tolerance(other.get_tolerance());
-            reset_max_iterations(other.get_max_iterations());
-            reset_tolerance_type(other.get_tolerance_type());
+            this->set_system_matrix(other.get_system_matrix());
+            this->set_preconditioner(other.get_preconditioner());
+            this->reset_tolerance(other.get_tolerance());
+            this->reset_max_iterations(other.get_max_iterations());
+            this->reset_tolerance_type(other.get_tolerance_type());
         }
         return *this;
     }
@@ -399,11 +399,11 @@ class EnableBatchSolver
     {
         if (&other != this) {
             this->set_size(other.get_size());
-            set_system_matrix(other.get_system_matrix());
-            set_preconditioner(other.get_preconditioner());
-            reset_tolerance(other.get_tolerance());
-            reset_max_iterations(other.get_max_iterations());
-            reset_tolerance_type(other.get_tolerance_type());
+            this->set_system_matrix(other.get_system_matrix());
+            this->set_preconditioner(other.get_preconditioner());
+            this->reset_tolerance(other.get_tolerance());
+            this->reset_max_iterations(other.get_max_iterations());
+            this->reset_tolerance_type(other.get_tolerance_type());
             other.set_system_matrix(nullptr);
             other.set_preconditioner(nullptr);
         }

From e35691bdcf337614b5a33807a73599a0cbd34142 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 31 Oct 2023 16:12:46 +0100
Subject: [PATCH 495/583] review updates

Co-authored-by: Yu-Hsiang Tsai <yhmtsai@gmail.com>
---
 include/ginkgo/core/log/batch_logger.hpp         | 2 +-
 include/ginkgo/core/solver/batch_solver_base.hpp | 3 ++-
 include/ginkgo/core/stop/batch_stop_enum.hpp     | 6 +++---
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/include/ginkgo/core/log/batch_logger.hpp b/include/ginkgo/core/log/batch_logger.hpp
index d6d76d81ccf..713b5dbe1d5 100644
--- a/include/ginkgo/core/log/batch_logger.hpp
+++ b/include/ginkgo/core/log/batch_logger.hpp
@@ -76,7 +76,7 @@ struct log_data final {
     }
 
     log_data(std::shared_ptr<const Executor> exec, size_type num_batch_items,
-             array<unsigned char> workspace)
+             array<unsigned char>& workspace)
         : res_norms(exec), iter_counts(exec)
     {
         const size_type workspace_size =
diff --git a/include/ginkgo/core/solver/batch_solver_base.hpp b/include/ginkgo/core/solver/batch_solver_base.hpp
index 06aa5c0122e..3141812e259 100644
--- a/include/ginkgo/core/solver/batch_solver_base.hpp
+++ b/include/ginkgo/core/solver/batch_solver_base.hpp
@@ -431,8 +431,9 @@ class EnableBatchSolver
         if (b->get_common_size()[1] > 1) {
             GKO_NOT_IMPLEMENTED;
         }
+        auto workspace_view = workspace_.as_view();
         auto log_data_ = std::make_unique<log::detail::log_data<real_type>>(
-            exec, b->get_num_batch_items(), workspace_.as_view());
+            exec, b->get_num_batch_items(), workspace_view);
 
         this->solver_apply(b, x, log_data_.get());
 
diff --git a/include/ginkgo/core/stop/batch_stop_enum.hpp b/include/ginkgo/core/stop/batch_stop_enum.hpp
index b23d535eb3c..1694dd164d9 100644
--- a/include/ginkgo/core/stop/batch_stop_enum.hpp
+++ b/include/ginkgo/core/stop/batch_stop_enum.hpp
@@ -44,13 +44,13 @@ namespace stop {
  * solver.
  *
  * `absolute` tolerance implies that the convergence criteria check is
- * against the computed residual ($||r|| <= \tau$)
+ * against the computed residual ($||r|| \leq \tau$)
  *
  * With the `relative` tolerance type, the solver
  * convergence criteria checks against the relative residual norm
- * ($\frac{||r||}{||b||} <= \tau$, where $||b||$$ is the L2 norm of the rhs).
+ * ($||r|| \leq ||b|| \times \tau$, where $||b||$$ is the L2 norm of the rhs).
  *
- * @note the compute residual norm, $||r||$ may be implicit or explicit
+ * @note the computed residual norm, $||r||$ may be implicit or explicit
  * depending on the solver algorithm.
  */
 enum class tolerance_type { absolute, relative };

From 6cb61ecdc023c2202e433a0c1600d3d4d8da6135 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Tue, 31 Oct 2023 20:03:17 -0400
Subject: [PATCH 496/583] fix LaTeX in documentation

---
 include/ginkgo/core/matrix/scaled_permutation.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/ginkgo/core/matrix/scaled_permutation.hpp b/include/ginkgo/core/matrix/scaled_permutation.hpp
index b0b5aa15f98..d4a4687feb3 100644
--- a/include/ginkgo/core/matrix/scaled_permutation.hpp
+++ b/include/ginkgo/core/matrix/scaled_permutation.hpp
@@ -52,7 +52,7 @@ namespace matrix {
 /**
  * ScaledPermutation is a matrix combining a permutation with scaling factors.
  * It is a combination of Diagonal and Permutation, and can be read as
- * $SP = P \pdot S$, i.e. the scaling gets applied before the permutation.
+ * $SP = P \cdot S$, i.e. the scaling gets applied before the permutation.
  *
  * @tparam IndexType  index type of permutation indices
  * @tparam ValueType  value type of the scaling factors

From 5325aaa3abbd1ea984ab946130d76e204d2b85ef Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Thu, 2 Nov 2023 18:22:02 -0400
Subject: [PATCH 497/583] add the failed tests which requires <= not <

Co-authored-by: Claudius Holeksa <mail@keldu.de>
---
 reference/test/stop/residual_norm_kernels.cpp | 129 ++++++++++++++++
 test/stop/residual_norm_kernels.cpp           | 140 ++++++++++++++++++
 2 files changed, 269 insertions(+)

diff --git a/reference/test/stop/residual_norm_kernels.cpp b/reference/test/stop/residual_norm_kernels.cpp
index 1c18fbb895d..f06c95f78c6 100644
--- a/reference/test/stop/residual_norm_kernels.cpp
+++ b/reference/test/stop/residual_norm_kernels.cpp
@@ -54,6 +54,7 @@ class ResidualNorm : public ::testing::Test {
 protected:
     using Mtx = gko::matrix::Dense<T>;
     using NormVector = gko::matrix::Dense<gko::remove_complex<T>>;
+    using ValueType = T;
 
     ResidualNorm()
     {
@@ -102,6 +103,67 @@ TYPED_TEST(ResidualNorm, CanCreateFactory)
     ASSERT_EQ(this->abs_factory_->get_executor(), this->exec_);
 }
 
+TYPED_TEST(ResidualNorm, CheckIfResZeroConverges)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using NormVector = typename TestFixture::NormVector;
+    using T = typename TestFixture::ValueType;
+    std::shared_ptr<gko::LinOp> mtx = gko::initialize<Mtx>({1.0}, this->exec_);
+    std::shared_ptr<gko::LinOp> rhs = gko::initialize<Mtx>({0.0}, this->exec_);
+    std::shared_ptr<gko::LinOp> x = gko::initialize<Mtx>({0.0}, this->exec_);
+    std::shared_ptr<gko::LinOp> res_norm =
+        gko::initialize<NormVector>({0.0}, this->exec_);
+
+    {
+        auto criterion = gko::stop::ResidualNorm<T>::build()
+                             .with_reduction_factor(r<T>::value)
+                             .on(this->exec_)
+                             ->generate(mtx, rhs, x.get(), nullptr);
+        constexpr gko::uint8 RelativeStoppingId{1};
+        bool one_changed{};
+        gko::array<gko::stopping_status> stop_status(this->exec_, 1);
+        stop_status.get_data()[0].reset();
+
+        EXPECT_TRUE(criterion->update().residual_norm(res_norm).check(
+            RelativeStoppingId, true, &stop_status, &one_changed));
+        EXPECT_TRUE(stop_status.get_data()[0].has_converged());
+        EXPECT_TRUE(one_changed);
+    }
+    {
+        auto criterion = gko::stop::ResidualNorm<T>::build()
+                             .with_reduction_factor(r<T>::value)
+                             .with_baseline(gko::stop::mode::initial_resnorm)
+                             .on(this->exec_)
+                             ->generate(mtx, rhs, x.get(), nullptr);
+        constexpr gko::uint8 RelativeStoppingId{1};
+        bool one_changed{};
+        gko::array<gko::stopping_status> stop_status(this->exec_, 1);
+        stop_status.get_data()[0].reset();
+
+        EXPECT_TRUE(criterion->update().residual_norm(res_norm).check(
+            RelativeStoppingId, true, &stop_status, &one_changed));
+        EXPECT_TRUE(stop_status.get_data()[0].has_converged());
+        EXPECT_TRUE(one_changed);
+    }
+    {
+        auto criterion =
+            gko::stop::ResidualNorm<T>::build()
+                .with_reduction_factor(gko::zero<gko::remove_complex<T>>())
+                .with_baseline(gko::stop::mode::absolute)
+                .on(this->exec_)
+                ->generate(mtx, rhs, x.get(), nullptr);
+        constexpr gko::uint8 RelativeStoppingId{1};
+        bool one_changed{};
+        gko::array<gko::stopping_status> stop_status(this->exec_, 1);
+        stop_status.get_data()[0].reset();
+
+        EXPECT_TRUE(criterion->update().residual_norm(res_norm).check(
+            RelativeStoppingId, true, &stop_status, &one_changed));
+        EXPECT_TRUE(stop_status.get_data()[0].has_converged());
+        EXPECT_TRUE(one_changed);
+    }
+}
+
 
 TYPED_TEST(ResidualNorm, CannotCreateCriterionWithoutNeededInput)
 {
@@ -776,6 +838,7 @@ class ImplicitResidualNorm : public ::testing::Test {
 protected:
     using Mtx = gko::matrix::Dense<T>;
     using NormVector = gko::matrix::Dense<gko::remove_complex<T>>;
+    using ValueType = T;
 
     ImplicitResidualNorm()
     {
@@ -820,6 +883,72 @@ TYPED_TEST(ImplicitResidualNorm, CanCreateFactory)
     ASSERT_EQ(this->factory_->get_executor(), this->exec_);
 }
 
+TYPED_TEST(ImplicitResidualNorm, CheckIfResZeroConverges)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using T = typename TestFixture::ValueType;
+    std::shared_ptr<gko::LinOp> mtx = gko::initialize<Mtx>({1.0}, this->exec_);
+    std::shared_ptr<gko::LinOp> rhs = gko::initialize<Mtx>({0.0}, this->exec_);
+    std::shared_ptr<gko::LinOp> x = gko::initialize<Mtx>({0.0}, this->exec_);
+    std::shared_ptr<gko::LinOp> implicit_sq_res_norm =
+        gko::initialize<Mtx>({0.0}, this->exec_);
+
+    {
+        auto criterion = gko::stop::ImplicitResidualNorm<T>::build()
+                             .with_reduction_factor(r<T>::value)
+                             .on(this->exec_)
+                             ->generate(mtx, rhs, x.get(), nullptr);
+        constexpr gko::uint8 RelativeStoppingId{1};
+        bool one_changed{};
+        gko::array<gko::stopping_status> stop_status(this->exec_, 1);
+        stop_status.get_data()[0].reset();
+
+        EXPECT_TRUE(
+            criterion->update()
+                .implicit_sq_residual_norm(implicit_sq_res_norm)
+                .check(RelativeStoppingId, true, &stop_status, &one_changed));
+        EXPECT_TRUE(stop_status.get_data()[0].has_converged());
+        EXPECT_TRUE(one_changed);
+    }
+    {
+        auto criterion = gko::stop::ImplicitResidualNorm<T>::build()
+                             .with_reduction_factor(r<T>::value)
+                             .with_baseline(gko::stop::mode::initial_resnorm)
+                             .on(this->exec_)
+                             ->generate(mtx, rhs, x.get(), nullptr);
+        constexpr gko::uint8 RelativeStoppingId{1};
+        bool one_changed{};
+        gko::array<gko::stopping_status> stop_status(this->exec_, 1);
+        stop_status.get_data()[0].reset();
+
+        EXPECT_TRUE(
+            criterion->update()
+                .implicit_sq_residual_norm(implicit_sq_res_norm)
+                .check(RelativeStoppingId, true, &stop_status, &one_changed));
+        EXPECT_TRUE(stop_status.get_data()[0].has_converged());
+        EXPECT_TRUE(one_changed);
+    }
+    {
+        auto criterion =
+            gko::stop::ImplicitResidualNorm<T>::build()
+                .with_reduction_factor(gko::zero<gko::remove_complex<T>>())
+                .with_baseline(gko::stop::mode::absolute)
+                .on(this->exec_)
+                ->generate(mtx, rhs, x.get(), nullptr);
+        constexpr gko::uint8 RelativeStoppingId{1};
+        bool one_changed{};
+        gko::array<gko::stopping_status> stop_status(this->exec_, 1);
+        stop_status.get_data()[0].reset();
+
+        EXPECT_TRUE(
+            criterion->update()
+                .implicit_sq_residual_norm(implicit_sq_res_norm)
+                .check(RelativeStoppingId, true, &stop_status, &one_changed));
+        EXPECT_TRUE(stop_status.get_data()[0].has_converged());
+        EXPECT_TRUE(one_changed);
+    }
+}
+
 
 TYPED_TEST(ImplicitResidualNorm, CannotCreateCriterionWithoutBAndInitRes)
 {
diff --git a/test/stop/residual_norm_kernels.cpp b/test/stop/residual_norm_kernels.cpp
index 50b8ae19df1..1164e4898e6 100644
--- a/test/stop/residual_norm_kernels.cpp
+++ b/test/stop/residual_norm_kernels.cpp
@@ -65,6 +65,7 @@ class ResidualNorm : public CommonTestFixture {
 protected:
     using Mtx = gko::matrix::Dense<T>;
     using NormVector = gko::matrix::Dense<gko::remove_complex<T>>;
+    using ValueType = T;
 
     ResidualNorm()
     {
@@ -110,6 +111,72 @@ TYPED_TEST(ResidualNorm, CanIgorneResidualNorm)
                  gko::NotSupported);
 }
 
+TYPED_TEST(ResidualNorm, CheckIfResZeroConverges)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using NormVector = typename TestFixture::NormVector;
+    using T = typename TestFixture::ValueType;
+    std::shared_ptr<gko::LinOp> mtx = gko::initialize<Mtx>({1.0}, this->exec);
+    std::shared_ptr<gko::LinOp> rhs = gko::initialize<Mtx>({0.0}, this->exec);
+    std::shared_ptr<gko::LinOp> x = gko::initialize<Mtx>({0.0}, this->exec);
+    std::shared_ptr<gko::LinOp> res_norm =
+        gko::initialize<NormVector>({0.0}, this->exec);
+
+    {
+        auto criterion = gko::stop::ResidualNorm<T>::build()
+                             .with_reduction_factor(r<T>::value)
+                             .on(this->exec)
+                             ->generate(mtx, rhs, x.get(), nullptr);
+        constexpr gko::uint8 RelativeStoppingId{1};
+        bool one_changed{};
+        gko::array<gko::stopping_status> stop_status(this->ref, 1);
+        stop_status.get_data()[0].reset();
+        stop_status.set_executor(this->exec);
+
+        EXPECT_TRUE(criterion->update().residual_norm(res_norm).check(
+            RelativeStoppingId, true, &stop_status, &one_changed));
+        stop_status.set_executor(this->ref);
+        EXPECT_TRUE(stop_status.get_data()[0].has_converged());
+        EXPECT_TRUE(one_changed);
+    }
+    {
+        auto criterion = gko::stop::ResidualNorm<T>::build()
+                             .with_reduction_factor(r<T>::value)
+                             .with_baseline(gko::stop::mode::initial_resnorm)
+                             .on(this->exec)
+                             ->generate(mtx, rhs, x.get(), nullptr);
+        constexpr gko::uint8 RelativeStoppingId{1};
+        bool one_changed{};
+        gko::array<gko::stopping_status> stop_status(this->ref, 1);
+        stop_status.get_data()[0].reset();
+        stop_status.set_executor(this->exec);
+
+        EXPECT_TRUE(criterion->update().residual_norm(res_norm).check(
+            RelativeStoppingId, true, &stop_status, &one_changed));
+        stop_status.set_executor(this->ref);
+        EXPECT_TRUE(stop_status.get_data()[0].has_converged());
+        EXPECT_TRUE(one_changed);
+    }
+    {
+        auto criterion =
+            gko::stop::ResidualNorm<T>::build()
+                .with_reduction_factor(gko::zero<gko::remove_complex<T>>())
+                .with_baseline(gko::stop::mode::absolute)
+                .on(this->exec)
+                ->generate(mtx, rhs, x.get(), nullptr);
+        constexpr gko::uint8 RelativeStoppingId{1};
+        bool one_changed{};
+        gko::array<gko::stopping_status> stop_status(this->ref, 1);
+        stop_status.get_data()[0].reset();
+        stop_status.set_executor(this->exec);
+
+        EXPECT_TRUE(criterion->update().residual_norm(res_norm).check(
+            RelativeStoppingId, true, &stop_status, &one_changed));
+        stop_status.set_executor(this->ref);
+        EXPECT_TRUE(stop_status.get_data()[0].has_converged());
+        EXPECT_TRUE(one_changed);
+    }
+}
 
 TYPED_TEST(ResidualNorm, WaitsTillResidualGoal)
 {
@@ -522,6 +589,7 @@ class ImplicitResidualNorm : public CommonTestFixture {
 protected:
     using Mtx = gko::matrix::Dense<T>;
     using NormVector = gko::matrix::Dense<gko::remove_complex<T>>;
+    using ValueType = T;
 
     ImplicitResidualNorm()
     {
@@ -538,6 +606,78 @@ TYPED_TEST_SUITE(ImplicitResidualNorm, gko::test::ValueTypes,
                  TypenameNameGenerator);
 
 
+TYPED_TEST(ImplicitResidualNorm, CheckIfResZeroConverges)
+{
+    using Mtx = typename TestFixture::Mtx;
+    using T = typename TestFixture::ValueType;
+    std::shared_ptr<gko::LinOp> mtx = gko::initialize<Mtx>({1.0}, this->exec);
+    std::shared_ptr<gko::LinOp> rhs = gko::initialize<Mtx>({0.0}, this->exec);
+    std::shared_ptr<gko::LinOp> x = gko::initialize<Mtx>({0.0}, this->exec);
+    std::shared_ptr<gko::LinOp> implicit_sq_res_norm =
+        gko::initialize<Mtx>({0.0}, this->exec);
+
+    {
+        auto criterion = gko::stop::ImplicitResidualNorm<T>::build()
+                             .with_reduction_factor(r<T>::value)
+                             .on(this->exec)
+                             ->generate(mtx, rhs, x.get(), nullptr);
+        constexpr gko::uint8 RelativeStoppingId{1};
+        bool one_changed{};
+        gko::array<gko::stopping_status> stop_status(this->ref, 1);
+        stop_status.get_data()[0].reset();
+        stop_status.set_executor(this->exec);
+
+        EXPECT_TRUE(
+            criterion->update()
+                .implicit_sq_residual_norm(implicit_sq_res_norm)
+                .check(RelativeStoppingId, true, &stop_status, &one_changed));
+        stop_status.set_executor(this->ref);
+        EXPECT_TRUE(stop_status.get_data()[0].has_converged());
+        EXPECT_TRUE(one_changed);
+    }
+    {
+        auto criterion = gko::stop::ImplicitResidualNorm<T>::build()
+                             .with_reduction_factor(r<T>::value)
+                             .with_baseline(gko::stop::mode::initial_resnorm)
+                             .on(this->exec)
+                             ->generate(mtx, rhs, x.get(), nullptr);
+        constexpr gko::uint8 RelativeStoppingId{1};
+        bool one_changed{};
+        gko::array<gko::stopping_status> stop_status(this->ref, 1);
+        stop_status.get_data()[0].reset();
+        stop_status.set_executor(this->exec);
+
+        EXPECT_TRUE(
+            criterion->update()
+                .implicit_sq_residual_norm(implicit_sq_res_norm)
+                .check(RelativeStoppingId, true, &stop_status, &one_changed));
+        stop_status.set_executor(this->ref);
+        EXPECT_TRUE(stop_status.get_data()[0].has_converged());
+        EXPECT_TRUE(one_changed);
+    }
+    {
+        auto criterion =
+            gko::stop::ImplicitResidualNorm<T>::build()
+                .with_reduction_factor(gko::zero<gko::remove_complex<T>>())
+                .with_baseline(gko::stop::mode::absolute)
+                .on(this->exec)
+                ->generate(mtx, rhs, x.get(), nullptr);
+        constexpr gko::uint8 RelativeStoppingId{1};
+        bool one_changed{};
+        gko::array<gko::stopping_status> stop_status(this->ref, 1);
+        stop_status.get_data()[0].reset();
+        stop_status.set_executor(this->exec);
+
+        EXPECT_TRUE(
+            criterion->update()
+                .implicit_sq_residual_norm(implicit_sq_res_norm)
+                .check(RelativeStoppingId, true, &stop_status, &one_changed));
+        stop_status.set_executor(this->ref);
+        EXPECT_TRUE(stop_status.get_data()[0].has_converged());
+        EXPECT_TRUE(one_changed);
+    }
+}
+
 TYPED_TEST(ImplicitResidualNorm, WaitsTillResidualGoal)
 {
     using T = TypeParam;

From 92b3b38c81003a09f3083823e3d0c570839b8066 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Thu, 2 Nov 2023 18:46:17 -0400
Subject: [PATCH 498/583] adding <= to residual norms

Co-authored-by: Claudius Holeksa <mail@keldu.de>
Co-authored-by: Tobias Ribizel <ribizel@kit.edu>
Co-authored-by: Yu-Hsiang M. Tsai <yhmtsai@gmail.com>
---
 cuda/stop/residual_norm_kernels.cu         | 17 ++++++++-------
 dpcpp/stop/residual_norm_kernels.dp.cpp    |  4 ++--
 hip/stop/residual_norm_kernels.hip.cpp     | 17 ++++++++-------
 include/ginkgo/core/stop/residual_norm.hpp | 25 +++++++++++-----------
 omp/stop/residual_norm_kernels.cpp         |  4 ++--
 reference/stop/residual_norm_kernels.cpp   |  4 ++--
 6 files changed, 37 insertions(+), 34 deletions(-)

diff --git a/cuda/stop/residual_norm_kernels.cu b/cuda/stop/residual_norm_kernels.cu
index 39280b19f69..9b2884fc194 100644
--- a/cuda/stop/residual_norm_kernels.cu
+++ b/cuda/stop/residual_norm_kernels.cu
@@ -67,7 +67,7 @@ __global__ __launch_bounds__(default_block_size) void residual_norm_kernel(
 {
     const auto tidx = thread::get_thread_id_flat();
     if (tidx < num_cols) {
-        if (tau[tidx] < rel_residual_goal * orig_tau[tidx]) {
+        if (tau[tidx] <= rel_residual_goal * orig_tau[tidx]) {
             stop_status[tidx].converge(stoppingId, setFinalized);
             device_storage[1] = true;
         }
@@ -139,16 +139,17 @@ constexpr int default_block_size = 512;
 
 template <typename ValueType>
 __global__
-__launch_bounds__(default_block_size) void implicit_residual_norm_kernel(
-    size_type num_cols, remove_complex<ValueType> rel_residual_goal,
-    const ValueType* __restrict__ tau,
-    const remove_complex<ValueType>* __restrict__ orig_tau, uint8 stoppingId,
-    bool setFinalized, stopping_status* __restrict__ stop_status,
-    bool* __restrict__ device_storage)
+    __launch_bounds__(default_block_size) void implicit_residual_norm_kernel(
+        size_type num_cols, remove_complex<ValueType> rel_residual_goal,
+        const ValueType* __restrict__ tau,
+        const remove_complex<ValueType>* __restrict__ orig_tau,
+        uint8 stoppingId, bool setFinalized,
+        stopping_status* __restrict__ stop_status,
+        bool* __restrict__ device_storage)
 {
     const auto tidx = thread::get_thread_id_flat();
     if (tidx < num_cols) {
-        if (sqrt(abs(tau[tidx])) < rel_residual_goal * orig_tau[tidx]) {
+        if (sqrt(abs(tau[tidx])) <= rel_residual_goal * orig_tau[tidx]) {
             stop_status[tidx].converge(stoppingId, setFinalized);
             device_storage[1] = true;
         }
diff --git a/dpcpp/stop/residual_norm_kernels.dp.cpp b/dpcpp/stop/residual_norm_kernels.dp.cpp
index 6d47c5bdcb2..fd0d5b00455 100644
--- a/dpcpp/stop/residual_norm_kernels.dp.cpp
+++ b/dpcpp/stop/residual_norm_kernels.dp.cpp
@@ -82,7 +82,7 @@ void residual_norm(std::shared_ptr<const DpcppExecutor> exec,
         cgh.parallel_for(
             sycl::range<1>{tau->get_size()[1]}, [=](sycl::id<1> idx_id) {
                 const auto tidx = idx_id[0];
-                if (tau_val[tidx] < rel_residual_goal * orig_tau_val[tidx]) {
+                if (tau_val[tidx] <= rel_residual_goal * orig_tau_val[tidx]) {
                     stop_status_val[tidx].converge(stoppingId, setFinalized);
                     device_storage_val[1] = true;
                 }
@@ -138,7 +138,7 @@ void implicit_residual_norm(
         cgh.parallel_for(
             sycl::range<1>{tau->get_size()[1]}, [=](sycl::id<1> idx_id) {
                 const auto tidx = idx_id[0];
-                if (std::sqrt(std::abs(tau_val[tidx])) <
+                if (std::sqrt(std::abs(tau_val[tidx])) <=
                     rel_residual_goal * orig_tau_val[tidx]) {
                     stop_status_val[tidx].converge(stoppingId, setFinalized);
                     device_storage_val[1] = true;
diff --git a/hip/stop/residual_norm_kernels.hip.cpp b/hip/stop/residual_norm_kernels.hip.cpp
index 506d1416cc7..45a97e5b902 100644
--- a/hip/stop/residual_norm_kernels.hip.cpp
+++ b/hip/stop/residual_norm_kernels.hip.cpp
@@ -70,7 +70,7 @@ __global__ __launch_bounds__(default_block_size) void residual_norm_kernel(
 {
     const auto tidx = thread::get_thread_id_flat();
     if (tidx < num_cols) {
-        if (tau[tidx] < rel_residual_goal * orig_tau[tidx]) {
+        if (tau[tidx] <= rel_residual_goal * orig_tau[tidx]) {
             stop_status[tidx].converge(stoppingId, setFinalized);
             device_storage[1] = true;
         }
@@ -142,16 +142,17 @@ constexpr int default_block_size = 512;
 
 template <typename ValueType>
 __global__
-__launch_bounds__(default_block_size) void implicit_residual_norm_kernel(
-    size_type num_cols, remove_complex<ValueType> rel_residual_goal,
-    const ValueType* __restrict__ tau,
-    const remove_complex<ValueType>* __restrict__ orig_tau, uint8 stoppingId,
-    bool setFinalized, stopping_status* __restrict__ stop_status,
-    bool* __restrict__ device_storage)
+    __launch_bounds__(default_block_size) void implicit_residual_norm_kernel(
+        size_type num_cols, remove_complex<ValueType> rel_residual_goal,
+        const ValueType* __restrict__ tau,
+        const remove_complex<ValueType>* __restrict__ orig_tau,
+        uint8 stoppingId, bool setFinalized,
+        stopping_status* __restrict__ stop_status,
+        bool* __restrict__ device_storage)
 {
     const auto tidx = thread::get_thread_id_flat();
     if (tidx < num_cols) {
-        if (sqrt(abs(tau[tidx])) < rel_residual_goal * orig_tau[tidx]) {
+        if (sqrt(abs(tau[tidx])) <= rel_residual_goal * orig_tau[tidx]) {
             stop_status[tidx].converge(stoppingId, setFinalized);
             device_storage[1] = true;
         }
diff --git a/include/ginkgo/core/stop/residual_norm.hpp b/include/ginkgo/core/stop/residual_norm.hpp
index fa42a3b1919..094dcb79434 100644
--- a/include/ginkgo/core/stop/residual_norm.hpp
+++ b/include/ginkgo/core/stop/residual_norm.hpp
@@ -52,13 +52,13 @@ namespace stop {
  * The mode for the residual norm criterion.
  *
  * - absolute:        Check for tolerance against residual norm.
- *                    $ || r || < \tau $
+ *                    $ || r || \leq \tau $
  *
  * - initial_resnorm: Check for tolerance relative to the initial residual norm.
- *                    $ \frac{|| r ||}{|| r_0||} < \tau $
+ *                    $ || r || \leq \tau \times || r_0|| $
  *
  * - rhs_norm:        Check for tolerance relative to the rhs norm.
- *                    $ \frac{|| r ||}{|| b ||} < \tau $
+ *                    $ || r || \leq \tau \times || b || $
  *
  * @ingroup stop
  */
@@ -118,10 +118,11 @@ class ResidualNormBase
  * The ResidualNorm class is a stopping criterion which
  * stops the iteration process when the actual residual norm is below a
  * certain threshold relative to
- * 1. the norm of the right-hand side, norm(residual) / norm(right_hand_side)
- *                                                                  < threshold
- * 2. the initial residual, norm(residual) / norm(initial_residual) < threshold.
- * 3. one,  norm(residual) < threshold.
+ * 1. the norm of the right-hand side, norm(residual) $\leq$ < threshold *
+ *    norm(right_hand_side).
+ * 2. the initial residual, norm(residual) $\leq$ threshold *
+ *    norm(initial_residual).
+ * 3. one,  norm(residual) $\leq$ threshold.
  *
  * For better performance, the checks are run on the executor
  * where the algorithm is executed.
@@ -176,11 +177,11 @@ class ResidualNorm : public ResidualNormBase<ValueType> {
  * The ImplicitResidualNorm class is a stopping criterion which
  * stops the iteration process when the implicit residual norm is below a
  * certain threshold relative to
- * 1. the norm of the right-hand side, implicit_resnorm / norm(right_hand_side)
- *                                                          < threshold
- * 2. the initial residual, implicit_resnorm / norm(initial_residual) <
- *                                                          < threshold.
- * 3. one, implicit_resnorm < threshold.
+ * 1. the norm of the right-hand side, implicit_resnorm $\leq$ < threshold *
+ * norm(right_hand_side)
+ * 2. the initial residual, implicit_resnorm $\leq$ threshold *
+ * norm(initial_residual) .
+ * 3. one,  implicit_resnorm $\leq$ threshold.
  *
  * @note To use this stopping criterion there are some dependencies. The
  * constructor depends on either `b` or the `initial_residual` in order to
diff --git a/omp/stop/residual_norm_kernels.cpp b/omp/stop/residual_norm_kernels.cpp
index 9b6fdbede64..37edf0d1176 100644
--- a/omp/stop/residual_norm_kernels.cpp
+++ b/omp/stop/residual_norm_kernels.cpp
@@ -65,7 +65,7 @@ void residual_norm(std::shared_ptr<const OmpExecutor> exec,
     bool local_one_changed = false;
 #pragma omp parallel for reduction(|| : local_one_changed)
     for (size_type i = 0; i < tau->get_size()[1]; ++i) {
-        if (tau->at(i) < rel_residual_goal * orig_tau->at(i)) {
+        if (tau->at(i) <= rel_residual_goal * orig_tau->at(i)) {
             stop_status->get_data()[i].converge(stoppingId, setFinalized);
             local_one_changed = true;
         }
@@ -110,7 +110,7 @@ void implicit_residual_norm(
     bool local_one_changed = false;
 #pragma omp parallel for reduction(|| : local_one_changed)
     for (size_type i = 0; i < tau->get_size()[1]; ++i) {
-        if (sqrt(abs(tau->at(i))) < rel_residual_goal * orig_tau->at(i)) {
+        if (sqrt(abs(tau->at(i))) <= rel_residual_goal * orig_tau->at(i)) {
             stop_status->get_data()[i].converge(stoppingId, setFinalized);
             local_one_changed = true;
         }
diff --git a/reference/stop/residual_norm_kernels.cpp b/reference/stop/residual_norm_kernels.cpp
index 2f2caa9cc1d..08f9384375b 100644
--- a/reference/stop/residual_norm_kernels.cpp
+++ b/reference/stop/residual_norm_kernels.cpp
@@ -67,7 +67,7 @@ void residual_norm(std::shared_ptr<const ReferenceExecutor> exec,
     *all_converged = true;
     *one_changed = false;
     for (size_type i = 0; i < tau->get_size()[1]; ++i) {
-        if (tau->at(i) < rel_residual_goal * orig_tau->at(i)) {
+        if (tau->at(i) <= rel_residual_goal * orig_tau->at(i)) {
             stop_status->get_data()[i].converge(stoppingId, setFinalized);
             *one_changed = true;
         }
@@ -107,7 +107,7 @@ void implicit_residual_norm(
     *all_converged = true;
     *one_changed = false;
     for (size_type i = 0; i < tau->get_size()[1]; ++i) {
-        if (sqrt(abs(tau->at(i))) < rel_residual_goal * orig_tau->at(i)) {
+        if (sqrt(abs(tau->at(i))) <= rel_residual_goal * orig_tau->at(i)) {
             stop_status->get_data()[i].converge(stoppingId, setFinalized);
             *one_changed = true;
         }

From 14ffe901262cc40daa77096d3e31039f99465b34 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Thu, 2 Nov 2023 23:12:43 -0400
Subject: [PATCH 499/583] loop on mode for the test

Co-authored-by: Terry Cojean <terry.cojean@kit.edu>
---
 reference/test/stop/residual_norm_kernels.cpp | 88 +++--------------
 test/stop/residual_norm_kernels.cpp           | 96 +++----------------
 2 files changed, 28 insertions(+), 156 deletions(-)

diff --git a/reference/test/stop/residual_norm_kernels.cpp b/reference/test/stop/residual_norm_kernels.cpp
index f06c95f78c6..18c3a5d57af 100644
--- a/reference/test/stop/residual_norm_kernels.cpp
+++ b/reference/test/stop/residual_norm_kernels.cpp
@@ -108,31 +108,20 @@ TYPED_TEST(ResidualNorm, CheckIfResZeroConverges)
     using Mtx = typename TestFixture::Mtx;
     using NormVector = typename TestFixture::NormVector;
     using T = typename TestFixture::ValueType;
+    using mode = typename gko::stop::mode;
     std::shared_ptr<gko::LinOp> mtx = gko::initialize<Mtx>({1.0}, this->exec_);
     std::shared_ptr<gko::LinOp> rhs = gko::initialize<Mtx>({0.0}, this->exec_);
     std::shared_ptr<gko::LinOp> x = gko::initialize<Mtx>({0.0}, this->exec_);
     std::shared_ptr<gko::LinOp> res_norm =
         gko::initialize<NormVector>({0.0}, this->exec_);
 
-    {
-        auto criterion = gko::stop::ResidualNorm<T>::build()
-                             .with_reduction_factor(r<T>::value)
-                             .on(this->exec_)
-                             ->generate(mtx, rhs, x.get(), nullptr);
-        constexpr gko::uint8 RelativeStoppingId{1};
-        bool one_changed{};
-        gko::array<gko::stopping_status> stop_status(this->exec_, 1);
-        stop_status.get_data()[0].reset();
-
-        EXPECT_TRUE(criterion->update().residual_norm(res_norm).check(
-            RelativeStoppingId, true, &stop_status, &one_changed));
-        EXPECT_TRUE(stop_status.get_data()[0].has_converged());
-        EXPECT_TRUE(one_changed);
-    }
-    {
+    for (auto baseline :
+         {mode::rhs_norm, mode::initial_resnorm, mode::absolute}) {
+        gko::remove_complex<T> factor =
+            (baseline == mode::absolute) ? 0.0 : r<T>::value;
         auto criterion = gko::stop::ResidualNorm<T>::build()
-                             .with_reduction_factor(r<T>::value)
-                             .with_baseline(gko::stop::mode::initial_resnorm)
+                             .with_reduction_factor(factor)
+                             .with_baseline(baseline)
                              .on(this->exec_)
                              ->generate(mtx, rhs, x.get(), nullptr);
         constexpr gko::uint8 RelativeStoppingId{1};
@@ -140,23 +129,6 @@ TYPED_TEST(ResidualNorm, CheckIfResZeroConverges)
         gko::array<gko::stopping_status> stop_status(this->exec_, 1);
         stop_status.get_data()[0].reset();
 
-        EXPECT_TRUE(criterion->update().residual_norm(res_norm).check(
-            RelativeStoppingId, true, &stop_status, &one_changed));
-        EXPECT_TRUE(stop_status.get_data()[0].has_converged());
-        EXPECT_TRUE(one_changed);
-    }
-    {
-        auto criterion =
-            gko::stop::ResidualNorm<T>::build()
-                .with_reduction_factor(gko::zero<gko::remove_complex<T>>())
-                .with_baseline(gko::stop::mode::absolute)
-                .on(this->exec_)
-                ->generate(mtx, rhs, x.get(), nullptr);
-        constexpr gko::uint8 RelativeStoppingId{1};
-        bool one_changed{};
-        gko::array<gko::stopping_status> stop_status(this->exec_, 1);
-        stop_status.get_data()[0].reset();
-
         EXPECT_TRUE(criterion->update().residual_norm(res_norm).check(
             RelativeStoppingId, true, &stop_status, &one_changed));
         EXPECT_TRUE(stop_status.get_data()[0].has_converged());
@@ -887,33 +859,20 @@ TYPED_TEST(ImplicitResidualNorm, CheckIfResZeroConverges)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::ValueType;
+    using mode = typename gko::stop::mode;
     std::shared_ptr<gko::LinOp> mtx = gko::initialize<Mtx>({1.0}, this->exec_);
     std::shared_ptr<gko::LinOp> rhs = gko::initialize<Mtx>({0.0}, this->exec_);
     std::shared_ptr<gko::LinOp> x = gko::initialize<Mtx>({0.0}, this->exec_);
     std::shared_ptr<gko::LinOp> implicit_sq_res_norm =
         gko::initialize<Mtx>({0.0}, this->exec_);
 
-    {
-        auto criterion = gko::stop::ImplicitResidualNorm<T>::build()
-                             .with_reduction_factor(r<T>::value)
-                             .on(this->exec_)
-                             ->generate(mtx, rhs, x.get(), nullptr);
-        constexpr gko::uint8 RelativeStoppingId{1};
-        bool one_changed{};
-        gko::array<gko::stopping_status> stop_status(this->exec_, 1);
-        stop_status.get_data()[0].reset();
-
-        EXPECT_TRUE(
-            criterion->update()
-                .implicit_sq_residual_norm(implicit_sq_res_norm)
-                .check(RelativeStoppingId, true, &stop_status, &one_changed));
-        EXPECT_TRUE(stop_status.get_data()[0].has_converged());
-        EXPECT_TRUE(one_changed);
-    }
-    {
+    for (auto baseline :
+         {mode::rhs_norm, mode::initial_resnorm, mode::absolute}) {
+        gko::remove_complex<T> factor =
+            (baseline == mode::absolute) ? 0.0 : r<T>::value;
         auto criterion = gko::stop::ImplicitResidualNorm<T>::build()
-                             .with_reduction_factor(r<T>::value)
-                             .with_baseline(gko::stop::mode::initial_resnorm)
+                             .with_reduction_factor(factor)
+                             .with_baseline(baseline)
                              .on(this->exec_)
                              ->generate(mtx, rhs, x.get(), nullptr);
         constexpr gko::uint8 RelativeStoppingId{1};
@@ -921,25 +880,6 @@ TYPED_TEST(ImplicitResidualNorm, CheckIfResZeroConverges)
         gko::array<gko::stopping_status> stop_status(this->exec_, 1);
         stop_status.get_data()[0].reset();
 
-        EXPECT_TRUE(
-            criterion->update()
-                .implicit_sq_residual_norm(implicit_sq_res_norm)
-                .check(RelativeStoppingId, true, &stop_status, &one_changed));
-        EXPECT_TRUE(stop_status.get_data()[0].has_converged());
-        EXPECT_TRUE(one_changed);
-    }
-    {
-        auto criterion =
-            gko::stop::ImplicitResidualNorm<T>::build()
-                .with_reduction_factor(gko::zero<gko::remove_complex<T>>())
-                .with_baseline(gko::stop::mode::absolute)
-                .on(this->exec_)
-                ->generate(mtx, rhs, x.get(), nullptr);
-        constexpr gko::uint8 RelativeStoppingId{1};
-        bool one_changed{};
-        gko::array<gko::stopping_status> stop_status(this->exec_, 1);
-        stop_status.get_data()[0].reset();
-
         EXPECT_TRUE(
             criterion->update()
                 .implicit_sq_residual_norm(implicit_sq_res_norm)
diff --git a/test/stop/residual_norm_kernels.cpp b/test/stop/residual_norm_kernels.cpp
index 1164e4898e6..5377f42eb71 100644
--- a/test/stop/residual_norm_kernels.cpp
+++ b/test/stop/residual_norm_kernels.cpp
@@ -116,33 +116,20 @@ TYPED_TEST(ResidualNorm, CheckIfResZeroConverges)
     using Mtx = typename TestFixture::Mtx;
     using NormVector = typename TestFixture::NormVector;
     using T = typename TestFixture::ValueType;
+    using mode = gko::stop::mode;
     std::shared_ptr<gko::LinOp> mtx = gko::initialize<Mtx>({1.0}, this->exec);
     std::shared_ptr<gko::LinOp> rhs = gko::initialize<Mtx>({0.0}, this->exec);
     std::shared_ptr<gko::LinOp> x = gko::initialize<Mtx>({0.0}, this->exec);
     std::shared_ptr<gko::LinOp> res_norm =
         gko::initialize<NormVector>({0.0}, this->exec);
 
-    {
-        auto criterion = gko::stop::ResidualNorm<T>::build()
-                             .with_reduction_factor(r<T>::value)
-                             .on(this->exec)
-                             ->generate(mtx, rhs, x.get(), nullptr);
-        constexpr gko::uint8 RelativeStoppingId{1};
-        bool one_changed{};
-        gko::array<gko::stopping_status> stop_status(this->ref, 1);
-        stop_status.get_data()[0].reset();
-        stop_status.set_executor(this->exec);
-
-        EXPECT_TRUE(criterion->update().residual_norm(res_norm).check(
-            RelativeStoppingId, true, &stop_status, &one_changed));
-        stop_status.set_executor(this->ref);
-        EXPECT_TRUE(stop_status.get_data()[0].has_converged());
-        EXPECT_TRUE(one_changed);
-    }
-    {
+    for (auto baseline :
+         {mode::rhs_norm, mode::initial_resnorm, mode::absolute}) {
+        gko::remove_complex<T> factor =
+            (baseline == mode::absolute) ? 0.0 : r<T>::value;
         auto criterion = gko::stop::ResidualNorm<T>::build()
-                             .with_reduction_factor(r<T>::value)
-                             .with_baseline(gko::stop::mode::initial_resnorm)
+                             .with_reduction_factor(factor)
+                             .with_baseline(baseline)
                              .on(this->exec)
                              ->generate(mtx, rhs, x.get(), nullptr);
         constexpr gko::uint8 RelativeStoppingId{1};
@@ -151,25 +138,6 @@ TYPED_TEST(ResidualNorm, CheckIfResZeroConverges)
         stop_status.get_data()[0].reset();
         stop_status.set_executor(this->exec);
 
-        EXPECT_TRUE(criterion->update().residual_norm(res_norm).check(
-            RelativeStoppingId, true, &stop_status, &one_changed));
-        stop_status.set_executor(this->ref);
-        EXPECT_TRUE(stop_status.get_data()[0].has_converged());
-        EXPECT_TRUE(one_changed);
-    }
-    {
-        auto criterion =
-            gko::stop::ResidualNorm<T>::build()
-                .with_reduction_factor(gko::zero<gko::remove_complex<T>>())
-                .with_baseline(gko::stop::mode::absolute)
-                .on(this->exec)
-                ->generate(mtx, rhs, x.get(), nullptr);
-        constexpr gko::uint8 RelativeStoppingId{1};
-        bool one_changed{};
-        gko::array<gko::stopping_status> stop_status(this->ref, 1);
-        stop_status.get_data()[0].reset();
-        stop_status.set_executor(this->exec);
-
         EXPECT_TRUE(criterion->update().residual_norm(res_norm).check(
             RelativeStoppingId, true, &stop_status, &one_changed));
         stop_status.set_executor(this->ref);
@@ -610,35 +578,20 @@ TYPED_TEST(ImplicitResidualNorm, CheckIfResZeroConverges)
 {
     using Mtx = typename TestFixture::Mtx;
     using T = typename TestFixture::ValueType;
+    using mode = typename gko::stop::mode;
     std::shared_ptr<gko::LinOp> mtx = gko::initialize<Mtx>({1.0}, this->exec);
     std::shared_ptr<gko::LinOp> rhs = gko::initialize<Mtx>({0.0}, this->exec);
     std::shared_ptr<gko::LinOp> x = gko::initialize<Mtx>({0.0}, this->exec);
     std::shared_ptr<gko::LinOp> implicit_sq_res_norm =
         gko::initialize<Mtx>({0.0}, this->exec);
 
-    {
-        auto criterion = gko::stop::ImplicitResidualNorm<T>::build()
-                             .with_reduction_factor(r<T>::value)
-                             .on(this->exec)
-                             ->generate(mtx, rhs, x.get(), nullptr);
-        constexpr gko::uint8 RelativeStoppingId{1};
-        bool one_changed{};
-        gko::array<gko::stopping_status> stop_status(this->ref, 1);
-        stop_status.get_data()[0].reset();
-        stop_status.set_executor(this->exec);
-
-        EXPECT_TRUE(
-            criterion->update()
-                .implicit_sq_residual_norm(implicit_sq_res_norm)
-                .check(RelativeStoppingId, true, &stop_status, &one_changed));
-        stop_status.set_executor(this->ref);
-        EXPECT_TRUE(stop_status.get_data()[0].has_converged());
-        EXPECT_TRUE(one_changed);
-    }
-    {
+    for (auto baseline :
+         {mode::rhs_norm, mode::initial_resnorm, mode::absolute}) {
+        gko::remove_complex<T> factor =
+            (baseline == mode::absolute) ? 0.0 : r<T>::value;
         auto criterion = gko::stop::ImplicitResidualNorm<T>::build()
-                             .with_reduction_factor(r<T>::value)
-                             .with_baseline(gko::stop::mode::initial_resnorm)
+                             .with_reduction_factor(factor)
+                             .with_baseline(baseline)
                              .on(this->exec)
                              ->generate(mtx, rhs, x.get(), nullptr);
         constexpr gko::uint8 RelativeStoppingId{1};
@@ -647,27 +600,6 @@ TYPED_TEST(ImplicitResidualNorm, CheckIfResZeroConverges)
         stop_status.get_data()[0].reset();
         stop_status.set_executor(this->exec);
 
-        EXPECT_TRUE(
-            criterion->update()
-                .implicit_sq_residual_norm(implicit_sq_res_norm)
-                .check(RelativeStoppingId, true, &stop_status, &one_changed));
-        stop_status.set_executor(this->ref);
-        EXPECT_TRUE(stop_status.get_data()[0].has_converged());
-        EXPECT_TRUE(one_changed);
-    }
-    {
-        auto criterion =
-            gko::stop::ImplicitResidualNorm<T>::build()
-                .with_reduction_factor(gko::zero<gko::remove_complex<T>>())
-                .with_baseline(gko::stop::mode::absolute)
-                .on(this->exec)
-                ->generate(mtx, rhs, x.get(), nullptr);
-        constexpr gko::uint8 RelativeStoppingId{1};
-        bool one_changed{};
-        gko::array<gko::stopping_status> stop_status(this->ref, 1);
-        stop_status.get_data()[0].reset();
-        stop_status.set_executor(this->exec);
-
         EXPECT_TRUE(
             criterion->update()
                 .implicit_sq_residual_norm(implicit_sq_res_norm)

From 6dbba9a69a54f86f5f229580d920a27f0441c6bb Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Fri, 3 Nov 2023 03:27:25 +0000
Subject: [PATCH 500/583] Format files

Co-authored-by: Yu-Hsiang M. Tsai <yhmtsai@users.noreply.github.com>
---
 cuda/stop/residual_norm_kernels.cu     | 13 ++++++-------
 hip/stop/residual_norm_kernels.hip.cpp | 13 ++++++-------
 2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/cuda/stop/residual_norm_kernels.cu b/cuda/stop/residual_norm_kernels.cu
index 9b2884fc194..96e0667d06e 100644
--- a/cuda/stop/residual_norm_kernels.cu
+++ b/cuda/stop/residual_norm_kernels.cu
@@ -139,13 +139,12 @@ constexpr int default_block_size = 512;
 
 template <typename ValueType>
 __global__
-    __launch_bounds__(default_block_size) void implicit_residual_norm_kernel(
-        size_type num_cols, remove_complex<ValueType> rel_residual_goal,
-        const ValueType* __restrict__ tau,
-        const remove_complex<ValueType>* __restrict__ orig_tau,
-        uint8 stoppingId, bool setFinalized,
-        stopping_status* __restrict__ stop_status,
-        bool* __restrict__ device_storage)
+__launch_bounds__(default_block_size) void implicit_residual_norm_kernel(
+    size_type num_cols, remove_complex<ValueType> rel_residual_goal,
+    const ValueType* __restrict__ tau,
+    const remove_complex<ValueType>* __restrict__ orig_tau, uint8 stoppingId,
+    bool setFinalized, stopping_status* __restrict__ stop_status,
+    bool* __restrict__ device_storage)
 {
     const auto tidx = thread::get_thread_id_flat();
     if (tidx < num_cols) {
diff --git a/hip/stop/residual_norm_kernels.hip.cpp b/hip/stop/residual_norm_kernels.hip.cpp
index 45a97e5b902..d9a37cfd3e1 100644
--- a/hip/stop/residual_norm_kernels.hip.cpp
+++ b/hip/stop/residual_norm_kernels.hip.cpp
@@ -142,13 +142,12 @@ constexpr int default_block_size = 512;
 
 template <typename ValueType>
 __global__
-    __launch_bounds__(default_block_size) void implicit_residual_norm_kernel(
-        size_type num_cols, remove_complex<ValueType> rel_residual_goal,
-        const ValueType* __restrict__ tau,
-        const remove_complex<ValueType>* __restrict__ orig_tau,
-        uint8 stoppingId, bool setFinalized,
-        stopping_status* __restrict__ stop_status,
-        bool* __restrict__ device_storage)
+__launch_bounds__(default_block_size) void implicit_residual_norm_kernel(
+    size_type num_cols, remove_complex<ValueType> rel_residual_goal,
+    const ValueType* __restrict__ tau,
+    const remove_complex<ValueType>* __restrict__ orig_tau, uint8 stoppingId,
+    bool setFinalized, stopping_status* __restrict__ stop_status,
+    bool* __restrict__ device_storage)
 {
     const auto tidx = thread::get_thread_id_flat();
     if (tidx < num_cols) {

From 7a036c7ac58345246bba99cecf91de0aec260370 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Mon, 16 Oct 2023 13:48:16 +0200
Subject: [PATCH 501/583] use proper atomics for ParILU(T) and ParIC(T)

---
 .../cuda_hip/factorization/par_ic_kernels.hpp.inc   | 12 +++++++-----
 .../factorization/par_ict_sweep_kernels.hpp.inc     | 13 +++++++------
 .../cuda_hip/factorization/par_ilu_kernels.hpp.inc  | 10 ++++++----
 .../factorization/par_ilut_sweep_kernels.hpp.inc    | 13 +++++++------
 cuda/factorization/par_ic_kernels.cu                |  1 +
 cuda/factorization/par_ict_kernels.cu               |  1 +
 cuda/factorization/par_ilu_kernels.cu               |  1 +
 cuda/factorization/par_ilut_sweep_kernel.cu         |  1 +
 hip/factorization/par_ic_kernels.hip.cpp            |  1 +
 hip/factorization/par_ict_kernels.hip.cpp           |  1 +
 hip/factorization/par_ilu_kernels.hip.cpp           |  1 +
 hip/factorization/par_ilut_sweep_kernel.hip.cpp     |  1 +
 12 files changed, 35 insertions(+), 21 deletions(-)

diff --git a/common/cuda_hip/factorization/par_ic_kernels.hpp.inc b/common/cuda_hip/factorization/par_ic_kernels.hpp.inc
index 9a4d605c6a3..7a3b3da8e32 100644
--- a/common/cuda_hip/factorization/par_ic_kernels.hpp.inc
+++ b/common/cuda_hip/factorization/par_ic_kernels.hpp.inc
@@ -78,16 +78,18 @@ __global__ __launch_bounds__(default_block_size) void ic_sweep(
         auto l_col = l_col_idxs[l_row_begin];
         auto lh_row = l_col_idxs[lh_col_begin];
         if (l_col == lh_row && l_col < last_entry) {
-            sum += l_vals[l_row_begin] * conj(l_vals[lh_col_begin]);
+            sum += load_relaxed(l_vals + l_row_begin) *
+                   conj(load_relaxed(l_vals + lh_col_begin));
         }
         l_row_begin += l_col <= lh_row;
         lh_col_begin += l_col >= lh_row;
     }
-    auto to_write = row == col
-                        ? sqrt(a_val - sum)
-                        : (a_val - sum) / l_vals[l_row_ptrs[col + 1] - 1];
+    auto to_write =
+        row == col
+            ? sqrt(a_val - sum)
+            : (a_val - sum) / load_relaxed(l_vals + (l_row_ptrs[col + 1] - 1));
     if (is_finite(to_write)) {
-        l_vals[l_nz] = to_write;
+        store_relaxed(l_vals + l_nz, to_write);
     }
 }
 
diff --git a/common/cuda_hip/factorization/par_ict_sweep_kernels.hpp.inc b/common/cuda_hip/factorization/par_ict_sweep_kernels.hpp.inc
index 7eccbda61d2..d54fe3c6c77 100644
--- a/common/cuda_hip/factorization/par_ict_sweep_kernels.hpp.inc
+++ b/common/cuda_hip/factorization/par_ict_sweep_kernels.hpp.inc
@@ -75,8 +75,8 @@ __global__ __launch_bounds__(default_block_size) void ict_sweep(
             // we don't need to use the `bool valid` because last_entry is
             // already a smaller sentinel value than the one used in group_merge
             if (l_col == lh_row && l_col < last_entry) {
-                sum += l_vals[l_idx + l_row_begin] *
-                       conj(l_vals[lh_idx + lh_col_begin]);
+                sum += load_relaxed(l_vals + (l_idx + l_row_begin)) *
+                       conj(load_relaxed(l_vals + (lh_idx + lh_col_begin)));
             }
             // remember the transposed element
             auto found_transp = subwarp.ballot(lh_row == row);
@@ -90,11 +90,12 @@ __global__ __launch_bounds__(default_block_size) void ict_sweep(
     sum = reduce(subwarp, sum, [](ValueType a, ValueType b) { return a + b; });
 
     if (subwarp.thread_rank() == 0) {
-        auto to_write = row == col
-                            ? sqrt(a_val - sum)
-                            : (a_val - sum) / l_vals[l_row_ptrs[col + 1] - 1];
+        auto to_write =
+            row == col ? sqrt(a_val - sum)
+                       : (a_val - sum) /
+                             load_relaxed(l_vals + (l_row_ptrs[col + 1] - 1));
         if (is_finite(to_write)) {
-            l_vals[l_nz] = to_write;
+            store_relaxed(l_vals + l_nz, to_write);
         }
     }
 }
diff --git a/common/cuda_hip/factorization/par_ilu_kernels.hpp.inc b/common/cuda_hip/factorization/par_ilu_kernels.hpp.inc
index 08bd5bf8b4e..6785c161674 100644
--- a/common/cuda_hip/factorization/par_ilu_kernels.hpp.inc
+++ b/common/cuda_hip/factorization/par_ilu_kernels.hpp.inc
@@ -57,7 +57,8 @@ __global__ __launch_bounds__(default_block_size) void compute_l_u_factors(
             const auto u_col = u_col_idxs[u_idx];
             last_operation = zero<ValueType>();
             if (l_col == u_col) {
-                last_operation = l_values[l_idx] * u_values[u_idx];
+                last_operation = load_relaxed(l_values + l_idx) *
+                                 load_relaxed(u_values + u_idx);
                 sum -= last_operation;
             }
             l_idx += (l_col <= u_col);
@@ -65,14 +66,15 @@ __global__ __launch_bounds__(default_block_size) void compute_l_u_factors(
         }
         sum += last_operation;  // undo the last operation
         if (row > col) {
-            auto to_write = sum / u_values[u_row_ptrs[col + 1] - 1];
+            auto to_write =
+                sum / load_relaxed(u_values + (u_row_ptrs[col + 1] - 1));
             if (is_finite(to_write)) {
-                l_values[l_idx - 1] = to_write;
+                store_relaxed(l_values + (l_idx - 1), to_write);
             }
         } else {
             auto to_write = sum;
             if (is_finite(to_write)) {
-                u_values[u_idx - 1] = to_write;
+                store_relaxed(u_values + (u_idx - 1), to_write);
             }
         }
     }
diff --git a/common/cuda_hip/factorization/par_ilut_sweep_kernels.hpp.inc b/common/cuda_hip/factorization/par_ilut_sweep_kernels.hpp.inc
index e99888b35b3..d3cc4330c39 100644
--- a/common/cuda_hip/factorization/par_ilut_sweep_kernels.hpp.inc
+++ b/common/cuda_hip/factorization/par_ilut_sweep_kernels.hpp.inc
@@ -87,8 +87,8 @@ __global__ __launch_bounds__(default_block_size) void sweep(
             // we don't need to use the `bool valid` because last_entry is
             // already a smaller sentinel value than the one used in group_merge
             if (l_col == ut_row && l_col < last_entry) {
-                sum += l_vals[l_idx + l_row_begin] *
-                       ut_vals[ut_idx + ut_col_begin];
+                sum += load_relaxed(l_vals + (l_idx + l_row_begin)) *
+                       load_relaxed(ut_vals + (ut_idx + ut_col_begin));
             }
             // remember the transposed element
             auto found_transp = subwarp.ballot(ut_row == row);
@@ -103,15 +103,16 @@ __global__ __launch_bounds__(default_block_size) void sweep(
 
     if (subwarp.thread_rank() == 0) {
         if (lower) {
-            auto to_write = (a_val - sum) / ut_vals[ut_col_ptrs[col + 1] - 1];
+            auto to_write = (a_val - sum) /
+                            load_relaxed(ut_vals + (ut_col_ptrs[col + 1] - 1));
             if (is_finite(to_write)) {
-                l_vals[l_nz] = to_write;
+                store_relaxed(l_vals + l_nz, to_write);
             }
         } else {
             auto to_write = a_val - sum;
             if (is_finite(to_write)) {
-                u_vals[u_nz] = to_write;
-                ut_vals[ut_nz] = to_write;
+                store_relaxed(u_vals + u_nz, to_write);
+                store_relaxed(ut_vals + ut_nz, to_write);
             }
         }
     }
diff --git a/cuda/factorization/par_ic_kernels.cu b/cuda/factorization/par_ic_kernels.cu
index b700be483ea..0f54e5b4a98 100644
--- a/cuda/factorization/par_ic_kernels.cu
+++ b/cuda/factorization/par_ic_kernels.cu
@@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "cuda/base/math.hpp"
 #include "cuda/base/types.hpp"
+#include "cuda/components/memory.cuh"
 #include "cuda/components/thread_ids.cuh"
 
 
diff --git a/cuda/factorization/par_ict_kernels.cu b/cuda/factorization/par_ict_kernels.cu
index f2a5f9f4754..66f64e5959b 100644
--- a/cuda/factorization/par_ict_kernels.cu
+++ b/cuda/factorization/par_ict_kernels.cu
@@ -47,6 +47,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/synthesizer/implementation_selection.hpp"
 #include "cuda/base/math.hpp"
 #include "cuda/components/intrinsics.cuh"
+#include "cuda/components/memory.cuh"
 #include "cuda/components/merging.cuh"
 #include "cuda/components/prefix_sum.cuh"
 #include "cuda/components/reduction.cuh"
diff --git a/cuda/factorization/par_ilu_kernels.cu b/cuda/factorization/par_ilu_kernels.cu
index 9796ee343fc..3b45c2993f2 100644
--- a/cuda/factorization/par_ilu_kernels.cu
+++ b/cuda/factorization/par_ilu_kernels.cu
@@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "cuda/base/math.hpp"
 #include "cuda/base/types.hpp"
+#include "cuda/components/memory.cuh"
 #include "cuda/components/thread_ids.cuh"
 
 
diff --git a/cuda/factorization/par_ilut_sweep_kernel.cu b/cuda/factorization/par_ilut_sweep_kernel.cu
index c4b292402ac..98cd8c5de48 100644
--- a/cuda/factorization/par_ilut_sweep_kernel.cu
+++ b/cuda/factorization/par_ilut_sweep_kernel.cu
@@ -47,6 +47,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/synthesizer/implementation_selection.hpp"
 #include "cuda/base/math.hpp"
 #include "cuda/components/intrinsics.cuh"
+#include "cuda/components/memory.cuh"
 #include "cuda/components/merging.cuh"
 #include "cuda/components/prefix_sum.cuh"
 #include "cuda/components/reduction.cuh"
diff --git a/hip/factorization/par_ic_kernels.hip.cpp b/hip/factorization/par_ic_kernels.hip.cpp
index c8209f2c9dd..deb7d2b83f8 100644
--- a/hip/factorization/par_ic_kernels.hip.cpp
+++ b/hip/factorization/par_ic_kernels.hip.cpp
@@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "hip/base/math.hip.hpp"
 #include "hip/base/types.hip.hpp"
+#include "hip/components/memory.hip.hpp"
 #include "hip/components/thread_ids.hip.hpp"
 
 
diff --git a/hip/factorization/par_ict_kernels.hip.cpp b/hip/factorization/par_ict_kernels.hip.cpp
index fa914f4d33c..24857fe6807 100644
--- a/hip/factorization/par_ict_kernels.hip.cpp
+++ b/hip/factorization/par_ict_kernels.hip.cpp
@@ -50,6 +50,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/synthesizer/implementation_selection.hpp"
 #include "hip/base/math.hip.hpp"
 #include "hip/components/intrinsics.hip.hpp"
+#include "hip/components/memory.hip.hpp"
 #include "hip/components/merging.hip.hpp"
 #include "hip/components/prefix_sum.hip.hpp"
 #include "hip/components/reduction.hip.hpp"
diff --git a/hip/factorization/par_ilu_kernels.hip.cpp b/hip/factorization/par_ilu_kernels.hip.cpp
index 42e5fd55425..b283e00b8fd 100644
--- a/hip/factorization/par_ilu_kernels.hip.cpp
+++ b/hip/factorization/par_ilu_kernels.hip.cpp
@@ -42,6 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "hip/base/math.hip.hpp"
 #include "hip/base/types.hip.hpp"
+#include "hip/components/memory.hip.hpp"
 #include "hip/components/thread_ids.hip.hpp"
 
 
diff --git a/hip/factorization/par_ilut_sweep_kernel.hip.cpp b/hip/factorization/par_ilut_sweep_kernel.hip.cpp
index 6e8ed1d8822..f566aa5a159 100644
--- a/hip/factorization/par_ilut_sweep_kernel.hip.cpp
+++ b/hip/factorization/par_ilut_sweep_kernel.hip.cpp
@@ -50,6 +50,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/synthesizer/implementation_selection.hpp"
 #include "hip/base/math.hip.hpp"
 #include "hip/components/intrinsics.hip.hpp"
+#include "hip/components/memory.hip.hpp"
 #include "hip/components/merging.hip.hpp"
 #include "hip/components/prefix_sum.hip.hpp"
 #include "hip/components/reduction.hip.hpp"

From 8e1c1407c23d6c8028ba27fba341a3570251ebc7 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Fri, 27 Oct 2023 16:51:57 +0200
Subject: [PATCH 502/583] add check for miss_criteria

---
 core/test/base/CMakeLists.txt    |  1 +
 core/test/base/miss_criteria.cpp | 77 ++++++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+)
 create mode 100644 core/test/base/miss_criteria.cpp

diff --git a/core/test/base/CMakeLists.txt b/core/test/base/CMakeLists.txt
index 4fa00e12922..e6f137b1640 100644
--- a/core/test/base/CMakeLists.txt
+++ b/core/test/base/CMakeLists.txt
@@ -17,6 +17,7 @@ ginkgo_create_test(lin_op)
 ginkgo_create_test(math)
 ginkgo_create_test(matrix_assembly_data)
 ginkgo_create_test(matrix_data)
+ginkgo_create_test(miss_criteria)
 ginkgo_create_test(mtx_io)
 ginkgo_create_test(perturbation)
 ginkgo_create_test(polymorphic_object)
diff --git a/core/test/base/miss_criteria.cpp b/core/test/base/miss_criteria.cpp
new file mode 100644
index 00000000000..a5299f2f4f8
--- /dev/null
+++ b/core/test/base/miss_criteria.cpp
@@ -0,0 +1,77 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/base/abstract_factory.hpp>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/exception.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/solver/solver_base.hpp>
+#include <ginkgo/core/stop/criterion.hpp>
+#include <ginkgo/core/stop/iteration.hpp>
+
+
+// Note: the following Factory structure is not identical to Ginkgo Factory
+// struture, but it is easier setup without too much dependences and
+// inheritances.
+
+struct DummyFactory {
+    struct param
+        : public gko::solver::enable_iterative_solver_factory_parameters<
+              param, DummyFactory> {};
+
+    const param& get_parameters() const noexcept { return parameters_; };
+
+    void add_logger(std::shared_ptr<const gko::log::Logger> logger) {}
+
+    DummyFactory(std::shared_ptr<const gko::Executor>, const param& parameters)
+        : parameters_(parameters)
+    {}
+
+private:
+    param parameters_;
+};
+
+
+using DF = DummyFactory;
+
+
+TEST(IterativeFactoryParameter, WithCriteria)
+{
+    auto exec = gko::ReferenceExecutor::create();
+    std::vector<std::shared_ptr<gko::stop::CriterionFactory>> criteria{
+        gko::stop::Iteration::build().on(exec)};
+    DummyFactory::param{}.with_criteria(criteria);
+}

From 537f85e076716a8e8133867cf38d88c214026e94 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Wed, 11 Oct 2023 14:30:18 +0200
Subject: [PATCH 503/583] make the type in deferred_factory explicitly

---
 include/ginkgo/core/base/abstract_factory.hpp | 44 +++++++++----------
 .../distributed/preconditioner/schwarz.hpp    |  2 +-
 include/ginkgo/core/preconditioner/ic.hpp     | 14 +++---
 include/ginkgo/core/preconditioner/ilu.hpp    | 22 ++++++----
 include/ginkgo/core/solver/direct.hpp         |  2 +-
 include/ginkgo/core/solver/ir.hpp             |  2 +-
 include/ginkgo/core/solver/multigrid.hpp      | 12 ++---
 include/ginkgo/core/solver/solver_base.hpp    | 35 ++++++++++++---
 8 files changed, 83 insertions(+), 50 deletions(-)

diff --git a/include/ginkgo/core/base/abstract_factory.hpp b/include/ginkgo/core/base/abstract_factory.hpp
index cca440afe6c..a30afae4c16 100644
--- a/include/ginkgo/core/base/abstract_factory.hpp
+++ b/include/ginkgo/core/base/abstract_factory.hpp
@@ -338,14 +338,15 @@ class deferred_factory_parameter {
      * shared ownership.
      */
     template <typename ConcreteFactoryType,
-              std::enable_if_t<std::is_base_of<
-                  FactoryType,
-                  std::remove_const_t<ConcreteFactoryType>>::value>* = nullptr>
+              std::enable_if_t<
+                  std::is_base_of<FactoryType, ConcreteFactoryType>::value &&
+                  !(!std::is_const<FactoryType>::value &&
+                    std::is_const<ConcreteFactoryType>::value)>* = nullptr>
     deferred_factory_parameter(std::shared_ptr<ConcreteFactoryType> factory)
     {
-        generator_ =
-            [factory = std::shared_ptr<const FactoryType>(std::move(factory))](
-                std::shared_ptr<const Executor>) { return factory; };
+        generator_ = [factory =
+                          std::shared_ptr<FactoryType>(std::move(factory))](
+                         std::shared_ptr<const Executor>) { return factory; };
     }
 
     /**
@@ -353,15 +354,16 @@ class deferred_factory_parameter {
      * preexisting factory with unique ownership.
      */
     template <typename ConcreteFactoryType, typename Deleter,
-              std::enable_if_t<std::is_base_of<
-                  FactoryType,
-                  std::remove_const_t<ConcreteFactoryType>>::value>* = nullptr>
+              std::enable_if_t<
+                  std::is_base_of<FactoryType, ConcreteFactoryType>::value &&
+                  !(!std::is_const<FactoryType>::value &&
+                    std::is_const<ConcreteFactoryType>::value)>* = nullptr>
     deferred_factory_parameter(
         std::unique_ptr<ConcreteFactoryType, Deleter> factory)
     {
-        generator_ =
-            [factory = std::shared_ptr<const FactoryType>(std::move(factory))](
-                std::shared_ptr<const Executor>) { return factory; };
+        generator_ = [factory =
+                          std::shared_ptr<FactoryType>(std::move(factory))](
+                         std::shared_ptr<const Executor>) { return factory; };
     }
 
     /**
@@ -375,17 +377,14 @@ class deferred_factory_parameter {
     deferred_factory_parameter(ParametersType parameters)
     {
         generator_ = [parameters](std::shared_ptr<const Executor> exec)
-            -> std::shared_ptr<const FactoryType> {
-            return parameters.on(exec);
-        };
+            -> std::shared_ptr<FactoryType> { return parameters.on(exec); };
     }
 
     /**
      * Instantiates the deferred parameter into an actual factory. This will
      * throw if the deferred factory parameter is empty.
      */
-    std::shared_ptr<const FactoryType> on(
-        std::shared_ptr<const Executor> exec) const
+    std::shared_ptr<FactoryType> on(std::shared_ptr<const Executor> exec) const
     {
         if (this->is_empty()) {
             GKO_NOT_SUPPORTED(*this);
@@ -397,8 +396,7 @@ class deferred_factory_parameter {
     bool is_empty() const { return !bool(generator_); }
 
 private:
-    std::function<std::shared_ptr<const FactoryType>(
-        std::shared_ptr<const Executor>)>
+    std::function<std::shared_ptr<FactoryType>(std::shared_ptr<const Executor>)>
         generator_;
 };
 
@@ -537,7 +535,7 @@ class deferred_factory_parameter {
  */
 #define GKO_DEFERRED_FACTORY_PARAMETER(_name, _type)                         \
 public:                                                                      \
-    std::shared_ptr<const _type> _name{};                                    \
+    std::shared_ptr<_type> _name{};                                          \
     parameters_type& with_##_name(deferred_factory_parameter<_type> factory) \
     {                                                                        \
         this->_name##_generator_ = std::move(factory);                       \
@@ -570,7 +568,7 @@ public:                                                                      \
  */
 #define GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(_name, _type)                  \
 public:                                                                      \
-    std::vector<std::shared_ptr<const _type>> _name{};                       \
+    std::vector<std::shared_ptr<_type>> _name{};                             \
     template <typename... Args,                                              \
               typename =                                                     \
                   std::enable_if_t<xstd::conjunction<std::is_convertible<    \
@@ -590,7 +588,9 @@ public:                                                                      \
         };                                                                   \
         return *this;                                                        \
     }                                                                        \
-    template <typename FactoryType>                                          \
+    template <typename FactoryType,                                          \
+              typename = std::enable_if_t<std::is_convertible<               \
+                  FactoryType, deferred_factory_parameter<_type>>::value>>   \
     parameters_type& with_##_name(const std::vector<FactoryType>& factories) \
     {                                                                        \
         this->_name##_generator_.clear();                                    \
diff --git a/include/ginkgo/core/distributed/preconditioner/schwarz.hpp b/include/ginkgo/core/distributed/preconditioner/schwarz.hpp
index e7cd2b1d471..69bd691bb58 100644
--- a/include/ginkgo/core/distributed/preconditioner/schwarz.hpp
+++ b/include/ginkgo/core/distributed/preconditioner/schwarz.hpp
@@ -94,7 +94,7 @@ class Schwarz
         /**
          * Local solver factory.
          */
-        GKO_DEFERRED_FACTORY_PARAMETER(local_solver, LinOpFactory);
+        GKO_DEFERRED_FACTORY_PARAMETER(local_solver, const LinOpFactory);
 
         /**
          * Generated Inner solvers.
diff --git a/include/ginkgo/core/preconditioner/ic.hpp b/include/ginkgo/core/preconditioner/ic.hpp
index 97e7fe37871..c0bb4962663 100644
--- a/include/ginkgo/core/preconditioner/ic.hpp
+++ b/include/ginkgo/core/preconditioner/ic.hpp
@@ -136,13 +136,15 @@ class Ic : public EnableLinOp<Ic<LSolverType, IndexType>>, public Transposable {
 
         [[deprecated("use with_l_solver instead")]] parameters_type&
         with_l_solver_factory(
-            deferred_factory_parameter<typename l_solver_type::Factory> solver)
+            deferred_factory_parameter<const typename l_solver_type::Factory>
+                solver)
         {
             return with_l_solver(std::move(solver));
         }
 
         parameters_type& with_l_solver(
-            deferred_factory_parameter<typename l_solver_type::Factory> solver)
+            deferred_factory_parameter<const typename l_solver_type::Factory>
+                solver)
         {
             this->l_solver_generator = std::move(solver);
             this->deferred_factories["l_solver"] = [](const auto& exec,
@@ -157,13 +159,13 @@ class Ic : public EnableLinOp<Ic<LSolverType, IndexType>>, public Transposable {
 
         [[deprecated("use with_factorization instead")]] parameters_type&
         with_factorization_factory(
-            deferred_factory_parameter<LinOpFactory> factorization)
+            deferred_factory_parameter<const LinOpFactory> factorization)
         {
             return with_factorization(std::move(factorization));
         }
 
         parameters_type& with_factorization(
-            deferred_factory_parameter<LinOpFactory> factorization)
+            deferred_factory_parameter<const LinOpFactory> factorization)
         {
             this->factorization_generator = std::move(factorization);
             this->deferred_factories["factorization"] = [](const auto& exec,
@@ -177,10 +179,10 @@ class Ic : public EnableLinOp<Ic<LSolverType, IndexType>>, public Transposable {
         }
 
     private:
-        deferred_factory_parameter<typename l_solver_type::Factory>
+        deferred_factory_parameter<const typename l_solver_type::Factory>
             l_solver_generator;
 
-        deferred_factory_parameter<LinOpFactory> factorization_generator;
+        deferred_factory_parameter<const LinOpFactory> factorization_generator;
     };
 
     GKO_ENABLE_LIN_OP_FACTORY(Ic, parameters, Factory);
diff --git a/include/ginkgo/core/preconditioner/ilu.hpp b/include/ginkgo/core/preconditioner/ilu.hpp
index d0f32c18c8c..683e157545c 100644
--- a/include/ginkgo/core/preconditioner/ilu.hpp
+++ b/include/ginkgo/core/preconditioner/ilu.hpp
@@ -154,13 +154,15 @@ class Ilu : public EnableLinOp<
 
         [[deprecated("use with_l_solver instead")]] parameters_type&
         with_l_solver_factory(
-            deferred_factory_parameter<typename l_solver_type::Factory> solver)
+            deferred_factory_parameter<const typename l_solver_type::Factory>
+                solver)
         {
             return with_l_solver(std::move(solver));
         }
 
         parameters_type& with_l_solver(
-            deferred_factory_parameter<typename l_solver_type::Factory> solver)
+            deferred_factory_parameter<const typename l_solver_type::Factory>
+                solver)
         {
             this->l_solver_generator = std::move(solver);
             this->deferred_factories["l_solver"] = [](const auto& exec,
@@ -175,13 +177,15 @@ class Ilu : public EnableLinOp<
 
         [[deprecated("use with_u_solver instead")]] parameters_type&
         with_u_solver_factory(
-            deferred_factory_parameter<typename u_solver_type::Factory> solver)
+            deferred_factory_parameter<const typename u_solver_type::Factory>
+                solver)
         {
             return with_u_solver(std::move(solver));
         }
 
         parameters_type& with_u_solver(
-            deferred_factory_parameter<typename u_solver_type::Factory> solver)
+            deferred_factory_parameter<const typename u_solver_type::Factory>
+                solver)
         {
             this->u_solver_generator = std::move(solver);
             this->deferred_factories["u_solver"] = [](const auto& exec,
@@ -196,13 +200,13 @@ class Ilu : public EnableLinOp<
 
         [[deprecated("use with_factorization instead")]] parameters_type&
         with_factorization_factory(
-            deferred_factory_parameter<LinOpFactory> factorization)
+            deferred_factory_parameter<const LinOpFactory> factorization)
         {
             return with_factorization(std::move(factorization));
         }
 
         parameters_type& with_factorization(
-            deferred_factory_parameter<LinOpFactory> factorization)
+            deferred_factory_parameter<const LinOpFactory> factorization)
         {
             this->factorization_generator = std::move(factorization);
             this->deferred_factories["factorization"] = [](const auto& exec,
@@ -216,13 +220,13 @@ class Ilu : public EnableLinOp<
         }
 
     private:
-        deferred_factory_parameter<typename l_solver_type::Factory>
+        deferred_factory_parameter<const typename l_solver_type::Factory>
             l_solver_generator;
 
-        deferred_factory_parameter<typename u_solver_type::Factory>
+        deferred_factory_parameter<const typename u_solver_type::Factory>
             u_solver_generator;
 
-        deferred_factory_parameter<LinOpFactory> factorization_generator;
+        deferred_factory_parameter<const LinOpFactory> factorization_generator;
     };
 
     GKO_ENABLE_LIN_OP_FACTORY(Ilu, parameters, Factory);
diff --git a/include/ginkgo/core/solver/direct.hpp b/include/ginkgo/core/solver/direct.hpp
index ee6783ff96d..c86db46434f 100644
--- a/include/ginkgo/core/solver/direct.hpp
+++ b/include/ginkgo/core/solver/direct.hpp
@@ -87,7 +87,7 @@ class Direct : public EnableLinOp<Direct<ValueType, IndexType>>,
         gko::size_type GKO_FACTORY_PARAMETER_SCALAR(num_rhs, 1u);
 
         /** The factorization factory to use for generating the factors. */
-        GKO_DEFERRED_FACTORY_PARAMETER(factorization, LinOpFactory);
+        GKO_DEFERRED_FACTORY_PARAMETER(factorization, const LinOpFactory);
     };
     GKO_ENABLE_LIN_OP_FACTORY(Direct, parameters, Factory);
     GKO_ENABLE_BUILD_METHOD(Factory);
diff --git a/include/ginkgo/core/solver/ir.hpp b/include/ginkgo/core/solver/ir.hpp
index 468e539f487..7d00d82cbaa 100644
--- a/include/ginkgo/core/solver/ir.hpp
+++ b/include/ginkgo/core/solver/ir.hpp
@@ -184,7 +184,7 @@ class Ir : public EnableLinOp<Ir<ValueType>>,
         /**
          * Inner solver factory.
          */
-        GKO_DEFERRED_FACTORY_PARAMETER(solver, LinOpFactory);
+        GKO_DEFERRED_FACTORY_PARAMETER(solver, const LinOpFactory);
 
         /**
          * Already generated solver. If one is provided, the factory `solver`
diff --git a/include/ginkgo/core/solver/multigrid.hpp b/include/ginkgo/core/solver/multigrid.hpp
index 21860844d3e..5888ff65813 100644
--- a/include/ginkgo/core/solver/multigrid.hpp
+++ b/include/ginkgo/core/solver/multigrid.hpp
@@ -225,7 +225,7 @@ class Multigrid : public EnableLinOp<Multigrid>,
         /**
          * MultigridLevel Factory list
          */
-        GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(mg_level, LinOpFactory);
+        GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(mg_level, const LinOpFactory);
 
         /**
          * Custom selector size_type (size_type level, const LinOp* fine_matrix)
@@ -270,14 +270,15 @@ class Multigrid : public EnableLinOp<Multigrid>,
          * If any element in the vector is a `nullptr` then the smoother
          * application at the corresponding level is skipped.
          */
-        GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(pre_smoother, LinOpFactory);
+        GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(pre_smoother, const LinOpFactory);
 
         /**
          * Post-smooth Factory list.
          * It is similar to Pre-smooth Factory list. It is ignored if
          * the factory parameter post_uses_pre is set to true.
          */
-        GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(post_smoother, LinOpFactory);
+        GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(post_smoother,
+                                              const LinOpFactory);
 
         /**
          * Mid-smooth Factory list. If it contains available elements, multigrid
@@ -286,7 +287,7 @@ class Multigrid : public EnableLinOp<Multigrid>,
          * Pre-smooth Factory list. It is ignored if the factory parameter
          * mid_case is not mid.
          */
-        GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(mid_smoother, LinOpFactory);
+        GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(mid_smoother, const LinOpFactory);
 
         /**
          * Whether post-smoothing-related calls use corresponding
@@ -326,7 +327,8 @@ class Multigrid : public EnableLinOp<Multigrid>,
          * If not set, then a direct LU solver will be used as solver on the
          * coarsest level.
          */
-        GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(coarsest_solver, LinOpFactory);
+        GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(coarsest_solver,
+                                              const LinOpFactory);
 
         /**
          * Custom coarsest_solver selector
diff --git a/include/ginkgo/core/solver/solver_base.hpp b/include/ginkgo/core/solver/solver_base.hpp
index 3888d7fe62d..b27ade844fb 100644
--- a/include/ginkgo/core/solver/solver_base.hpp
+++ b/include/ginkgo/core/solver/solver_base.hpp
@@ -881,11 +881,14 @@ struct enable_iterative_solver_factory_parameters
      * Provides stopping criteria via stop::CriterionFactory instances to be
      * used by the iterative solver in a fluent interface.
      */
-    template <typename... Args>
+    template <typename... Args,
+              typename = std::enable_if_t<xstd::conjunction<std::is_convertible<
+                  Args, deferred_factory_parameter<
+                            const stop::CriterionFactory>>...>::value>>
     Parameters& with_criteria(Args&&... value)
     {
         this->criterion_generators = {
-            deferred_factory_parameter<stop::CriterionFactory>{
+            deferred_factory_parameter<const stop::CriterionFactory>{
                 std::forward<Args>(value)}...};
         this->deferred_factories["criteria"] = [](const auto& exec,
                                                   auto& params) {
@@ -899,10 +902,32 @@ struct enable_iterative_solver_factory_parameters
         return *self();
     }
 
+    template <typename FactoryType,
+              typename = std::enable_if_t<std::is_convertible<
+                  FactoryType, deferred_factory_parameter<
+                                   const stop::CriterionFactory>>::value>>
+    Parameters& with_criteria(const std::vector<FactoryType>& criteria_vec)
+    {
+        this->criterion_generators.clear();
+        for (const auto& factory : criteria_vec) {
+            this->criterion_generators.push_back(factory);
+        }
+        this->deferred_factories["criteria"] = [](const auto& exec,
+                                                  auto& params) {
+            if (!params.criterion_generators.empty()) {
+                params.criteria.clear();
+                for (auto& generator : params.criterion_generators) {
+                    params.criteria.push_back(generator.on(exec));
+                }
+            }
+        };
+        return *self();
+    }
+
 private:
     GKO_ENABLE_SELF(Parameters);
 
-    std::vector<deferred_factory_parameter<stop::CriterionFactory>>
+    std::vector<deferred_factory_parameter<const stop::CriterionFactory>>
         criterion_generators;
 };
 
@@ -937,7 +962,7 @@ struct enable_preconditioned_iterative_solver_factory_parameters
      * @see preconditioned_iterative_solver_factory_parameters::preconditioner
      */
     Parameters& with_preconditioner(
-        deferred_factory_parameter<LinOpFactory> preconditioner)
+        deferred_factory_parameter<const LinOpFactory> preconditioner)
     {
         this->preconditioner_generator = std::move(preconditioner);
         this->deferred_factories["preconditioner"] = [](const auto& exec,
@@ -965,7 +990,7 @@ struct enable_preconditioned_iterative_solver_factory_parameters
 private:
     GKO_ENABLE_SELF(Parameters);
 
-    deferred_factory_parameter<LinOpFactory> preconditioner_generator;
+    deferred_factory_parameter<const LinOpFactory> preconditioner_generator;
 };
 
 

From 26912ab8fd56b5cf521d1f8ea8365cfa211f2e08 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Tue, 24 Oct 2023 17:16:22 +0200
Subject: [PATCH 504/583] move type before macro like others, reduce ! cond
 uses deferred factory parameter for combined

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 include/ginkgo/core/base/abstract_factory.hpp | 124 ++++++++++--------
 .../distributed/preconditioner/schwarz.hpp    |   3 +-
 include/ginkgo/core/solver/direct.hpp         |   3 +-
 include/ginkgo/core/solver/ir.hpp             |   3 +-
 include/ginkgo/core/solver/multigrid.hpp      |  17 ++-
 include/ginkgo/core/stop/combined.hpp         |   2 +-
 6 files changed, 84 insertions(+), 68 deletions(-)

diff --git a/include/ginkgo/core/base/abstract_factory.hpp b/include/ginkgo/core/base/abstract_factory.hpp
index a30afae4c16..af7b226773d 100644
--- a/include/ginkgo/core/base/abstract_factory.hpp
+++ b/include/ginkgo/core/base/abstract_factory.hpp
@@ -340,8 +340,8 @@ class deferred_factory_parameter {
     template <typename ConcreteFactoryType,
               std::enable_if_t<
                   std::is_base_of<FactoryType, ConcreteFactoryType>::value &&
-                  !(!std::is_const<FactoryType>::value &&
-                    std::is_const<ConcreteFactoryType>::value)>* = nullptr>
+                  (std::is_const<FactoryType>::value ||
+                   !std::is_const<ConcreteFactoryType>::value)>* = nullptr>
     deferred_factory_parameter(std::shared_ptr<ConcreteFactoryType> factory)
     {
         generator_ = [factory =
@@ -356,8 +356,8 @@ class deferred_factory_parameter {
     template <typename ConcreteFactoryType, typename Deleter,
               std::enable_if_t<
                   std::is_base_of<FactoryType, ConcreteFactoryType>::value &&
-                  !(!std::is_const<FactoryType>::value &&
-                    std::is_const<ConcreteFactoryType>::value)>* = nullptr>
+                  (std::is_const<FactoryType>::value ||
+                   !std::is_const<ConcreteFactoryType>::value)>* = nullptr>
     deferred_factory_parameter(
         std::unique_ptr<ConcreteFactoryType, Deleter> factory)
     {
@@ -533,10 +533,15 @@ class deferred_factory_parameter {
  * @param _type  pointee type of the parameter, e.g. LinOpFactory
  *
  */
-#define GKO_DEFERRED_FACTORY_PARAMETER(_name, _type)                         \
+#define GKO_DEFERRED_FACTORY_PARAMETER(_name)                                \
+    _name{};                                                                 \
+                                                                             \
+private:                                                                     \
+    using _name##_type = typename decltype(_name)::element_type;             \
+                                                                             \
 public:                                                                      \
-    std::shared_ptr<_type> _name{};                                          \
-    parameters_type& with_##_name(deferred_factory_parameter<_type> factory) \
+    parameters_type& with_##_name(                                           \
+        deferred_factory_parameter<_name##_type> factory)                    \
     {                                                                        \
         this->_name##_generator_ = std::move(factory);                       \
         this->deferred_factories[#_name] = [](const auto& exec,              \
@@ -549,7 +554,7 @@ public:                                                                      \
     }                                                                        \
                                                                              \
 private:                                                                     \
-    deferred_factory_parameter<_type> _name##_generator_;                    \
+    deferred_factory_parameter<_name##_type> _name##_generator_;             \
                                                                              \
 public:                                                                      \
     static_assert(true,                                                      \
@@ -566,55 +571,60 @@ public:                                                                      \
  * @param _type  pointee type of the vector entries, e.g. LinOpFactory
  *
  */
-#define GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(_name, _type)                  \
-public:                                                                      \
-    std::vector<std::shared_ptr<_type>> _name{};                             \
-    template <typename... Args,                                              \
-              typename =                                                     \
-                  std::enable_if_t<xstd::conjunction<std::is_convertible<    \
-                      Args, deferred_factory_parameter<_type>>...>::value>>  \
-    parameters_type& with_##_name(Args&&... factories)                       \
-    {                                                                        \
-        this->_name##_generator_ = {deferred_factory_parameter<_type>{       \
-            std::forward<Args>(factories)}...};                              \
-        this->deferred_factories[#_name] = [](const auto& exec,              \
-                                              auto& params) {                \
-            if (!params._name##_generator_.empty()) {                        \
-                params._name.clear();                                        \
-                for (auto& generator : params._name##_generator_) {          \
-                    params._name.push_back(generator.on(exec));              \
-                }                                                            \
-            }                                                                \
-        };                                                                   \
-        return *this;                                                        \
-    }                                                                        \
-    template <typename FactoryType,                                          \
-              typename = std::enable_if_t<std::is_convertible<               \
-                  FactoryType, deferred_factory_parameter<_type>>::value>>   \
-    parameters_type& with_##_name(const std::vector<FactoryType>& factories) \
-    {                                                                        \
-        this->_name##_generator_.clear();                                    \
-        for (const auto& factory : factories) {                              \
-            this->_name##_generator_.push_back(factory);                     \
-        }                                                                    \
-        this->deferred_factories[#_name] = [](const auto& exec,              \
-                                              auto& params) {                \
-            if (!params._name##_generator_.empty()) {                        \
-                params._name.clear();                                        \
-                for (auto& generator : params._name##_generator_) {          \
-                    params._name.push_back(generator.on(exec));              \
-                }                                                            \
-            }                                                                \
-        };                                                                   \
-        return *this;                                                        \
-    }                                                                        \
-                                                                             \
-private:                                                                     \
-    std::vector<deferred_factory_parameter<_type>> _name##_generator_;       \
-                                                                             \
-public:                                                                      \
-    static_assert(true,                                                      \
-                  "This assert is used to counter the false positive extra " \
+#define GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(_name)                          \
+    _name{};                                                                  \
+                                                                              \
+private:                                                                      \
+    using _name##_type = typename decltype(_name)::value_type::element_type;  \
+                                                                              \
+public:                                                                       \
+    template <                                                                \
+        typename... Args,                                                     \
+        typename = std::enable_if_t<xstd::conjunction<std::is_convertible<    \
+            Args, deferred_factory_parameter<_name##_type>>...>::value>>      \
+    parameters_type& with_##_name(Args&&... factories)                        \
+    {                                                                         \
+        this->_name##_generator_ = {deferred_factory_parameter<_name##_type>{ \
+            std::forward<Args>(factories)}...};                               \
+        this->deferred_factories[#_name] = [](const auto& exec,               \
+                                              auto& params) {                 \
+            if (!params._name##_generator_.empty()) {                         \
+                params._name.clear();                                         \
+                for (auto& generator : params._name##_generator_) {           \
+                    params._name.push_back(generator.on(exec));               \
+                }                                                             \
+            }                                                                 \
+        };                                                                    \
+        return *this;                                                         \
+    }                                                                         \
+    template <                                                                \
+        typename FactoryType,                                                 \
+        typename = std::enable_if_t<std::is_convertible<                      \
+            FactoryType, deferred_factory_parameter<_name##_type>>::value>>   \
+    parameters_type& with_##_name(const std::vector<FactoryType>& factories)  \
+    {                                                                         \
+        this->_name##_generator_.clear();                                     \
+        for (const auto& factory : factories) {                               \
+            this->_name##_generator_.push_back(factory);                      \
+        }                                                                     \
+        this->deferred_factories[#_name] = [](const auto& exec,               \
+                                              auto& params) {                 \
+            if (!params._name##_generator_.empty()) {                         \
+                params._name.clear();                                         \
+                for (auto& generator : params._name##_generator_) {           \
+                    params._name.push_back(generator.on(exec));               \
+                }                                                             \
+            }                                                                 \
+        };                                                                    \
+        return *this;                                                         \
+    }                                                                         \
+                                                                              \
+private:                                                                      \
+    std::vector<deferred_factory_parameter<_name##_type>> _name##_generator_; \
+                                                                              \
+public:                                                                       \
+    static_assert(true,                                                       \
+                  "This assert is used to counter the false positive extra "  \
                   "semi-colon warnings")
 
 
diff --git a/include/ginkgo/core/distributed/preconditioner/schwarz.hpp b/include/ginkgo/core/distributed/preconditioner/schwarz.hpp
index 69bd691bb58..d2dbe8e6588 100644
--- a/include/ginkgo/core/distributed/preconditioner/schwarz.hpp
+++ b/include/ginkgo/core/distributed/preconditioner/schwarz.hpp
@@ -94,7 +94,8 @@ class Schwarz
         /**
          * Local solver factory.
          */
-        GKO_DEFERRED_FACTORY_PARAMETER(local_solver, const LinOpFactory);
+        std::shared_ptr<const LinOpFactory> GKO_DEFERRED_FACTORY_PARAMETER(
+            local_solver);
 
         /**
          * Generated Inner solvers.
diff --git a/include/ginkgo/core/solver/direct.hpp b/include/ginkgo/core/solver/direct.hpp
index c86db46434f..d65dd93545d 100644
--- a/include/ginkgo/core/solver/direct.hpp
+++ b/include/ginkgo/core/solver/direct.hpp
@@ -87,7 +87,8 @@ class Direct : public EnableLinOp<Direct<ValueType, IndexType>>,
         gko::size_type GKO_FACTORY_PARAMETER_SCALAR(num_rhs, 1u);
 
         /** The factorization factory to use for generating the factors. */
-        GKO_DEFERRED_FACTORY_PARAMETER(factorization, const LinOpFactory);
+        std::shared_ptr<const LinOpFactory> GKO_DEFERRED_FACTORY_PARAMETER(
+            factorization);
     };
     GKO_ENABLE_LIN_OP_FACTORY(Direct, parameters, Factory);
     GKO_ENABLE_BUILD_METHOD(Factory);
diff --git a/include/ginkgo/core/solver/ir.hpp b/include/ginkgo/core/solver/ir.hpp
index 7d00d82cbaa..9f2e92eb5a6 100644
--- a/include/ginkgo/core/solver/ir.hpp
+++ b/include/ginkgo/core/solver/ir.hpp
@@ -184,7 +184,8 @@ class Ir : public EnableLinOp<Ir<ValueType>>,
         /**
          * Inner solver factory.
          */
-        GKO_DEFERRED_FACTORY_PARAMETER(solver, const LinOpFactory);
+        std::shared_ptr<const LinOpFactory> GKO_DEFERRED_FACTORY_PARAMETER(
+            solver);
 
         /**
          * Already generated solver. If one is provided, the factory `solver`
diff --git a/include/ginkgo/core/solver/multigrid.hpp b/include/ginkgo/core/solver/multigrid.hpp
index 5888ff65813..a62a35ca0df 100644
--- a/include/ginkgo/core/solver/multigrid.hpp
+++ b/include/ginkgo/core/solver/multigrid.hpp
@@ -225,7 +225,8 @@ class Multigrid : public EnableLinOp<Multigrid>,
         /**
          * MultigridLevel Factory list
          */
-        GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(mg_level, const LinOpFactory);
+        std::vector<std::shared_ptr<const LinOpFactory>>
+            GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(mg_level);
 
         /**
          * Custom selector size_type (size_type level, const LinOp* fine_matrix)
@@ -270,15 +271,16 @@ class Multigrid : public EnableLinOp<Multigrid>,
          * If any element in the vector is a `nullptr` then the smoother
          * application at the corresponding level is skipped.
          */
-        GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(pre_smoother, const LinOpFactory);
+        std::vector<std::shared_ptr<const LinOpFactory>>
+            GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(pre_smoother);
 
         /**
          * Post-smooth Factory list.
          * It is similar to Pre-smooth Factory list. It is ignored if
          * the factory parameter post_uses_pre is set to true.
          */
-        GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(post_smoother,
-                                              const LinOpFactory);
+        std::vector<std::shared_ptr<const LinOpFactory>>
+            GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(post_smoother);
 
         /**
          * Mid-smooth Factory list. If it contains available elements, multigrid
@@ -287,7 +289,8 @@ class Multigrid : public EnableLinOp<Multigrid>,
          * Pre-smooth Factory list. It is ignored if the factory parameter
          * mid_case is not mid.
          */
-        GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(mid_smoother, const LinOpFactory);
+        std::vector<std::shared_ptr<const LinOpFactory>>
+            GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(mid_smoother);
 
         /**
          * Whether post-smoothing-related calls use corresponding
@@ -327,8 +330,8 @@ class Multigrid : public EnableLinOp<Multigrid>,
          * If not set, then a direct LU solver will be used as solver on the
          * coarsest level.
          */
-        GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(coarsest_solver,
-                                              const LinOpFactory);
+        std::vector<std::shared_ptr<const LinOpFactory>>
+            GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(coarsest_solver);
 
         /**
          * Custom coarsest_solver selector
diff --git a/include/ginkgo/core/stop/combined.hpp b/include/ginkgo/core/stop/combined.hpp
index f17d05abb14..7e113279de6 100644
--- a/include/ginkgo/core/stop/combined.hpp
+++ b/include/ginkgo/core/stop/combined.hpp
@@ -70,7 +70,7 @@ class Combined : public EnablePolymorphicObject<Combined, Criterion> {
          * too costly.
          */
         std::vector<std::shared_ptr<const CriterionFactory>>
-            GKO_FACTORY_PARAMETER_VECTOR(criteria, nullptr);
+            GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(criteria);
     };
 
     class Factory

From 99455ae4a0d6881fc2922dbceddcf444954d32e4 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Fri, 27 Oct 2023 00:12:01 +0200
Subject: [PATCH 505/583] add test, check non-compiled,  and use namespace from
 top

---
 core/test/base/CMakeLists.txt                 |   4 +-
 core/test/base/deferred_factory.cpp           | 433 ++++++++++++++++++
 core/test/base/miss_criteria.cpp              |  77 ----
 include/ginkgo/core/base/abstract_factory.hpp | 144 +++---
 4 files changed, 513 insertions(+), 145 deletions(-)
 create mode 100644 core/test/base/deferred_factory.cpp
 delete mode 100644 core/test/base/miss_criteria.cpp

diff --git a/core/test/base/CMakeLists.txt b/core/test/base/CMakeLists.txt
index e6f137b1640..4e611852be5 100644
--- a/core/test/base/CMakeLists.txt
+++ b/core/test/base/CMakeLists.txt
@@ -4,9 +4,10 @@ ginkgo_create_test(array)
 ginkgo_create_test(batch_dim)
 ginkgo_create_test(batch_lin_op)
 ginkgo_create_test(batch_multi_vector)
-ginkgo_create_test(dense_cache)
 ginkgo_create_test(combination)
 ginkgo_create_test(composition)
+ginkgo_create_test(deferred_factory)
+ginkgo_create_test(dense_cache)
 ginkgo_create_test(dim)
 ginkgo_create_test(exception)
 ginkgo_create_test(exception_helpers)
@@ -17,7 +18,6 @@ ginkgo_create_test(lin_op)
 ginkgo_create_test(math)
 ginkgo_create_test(matrix_assembly_data)
 ginkgo_create_test(matrix_data)
-ginkgo_create_test(miss_criteria)
 ginkgo_create_test(mtx_io)
 ginkgo_create_test(perturbation)
 ginkgo_create_test(polymorphic_object)
diff --git a/core/test/base/deferred_factory.cpp b/core/test/base/deferred_factory.cpp
new file mode 100644
index 00000000000..c206829dca8
--- /dev/null
+++ b/core/test/base/deferred_factory.cpp
@@ -0,0 +1,433 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/abstract_factory.hpp>
+#include <ginkgo/core/base/exception.hpp>
+#include <ginkgo/core/base/executor.hpp>
+
+
+template <typename T>
+using dfp = gko::deferred_factory_parameter<T>;
+
+
+// Note: the following Factory structure is not identical to Ginkgo Factory
+// struture, but it is easier setup without too much dependences and
+// inheritances.
+struct DummyBaseFactory {
+    virtual ~DummyBaseFactory() = default;
+    struct param {
+        std::unique_ptr<DummyBaseFactory> on(
+            std::shared_ptr<const gko::Executor>) const
+        {
+            return std::make_unique<DummyBaseFactory>();
+        }
+    };
+};
+
+
+struct DummyFactory : DummyBaseFactory {
+    struct param {
+        std::unique_ptr<DummyFactory> on(
+            std::shared_ptr<const gko::Executor>) const
+        {
+            return std::make_unique<DummyFactory>();
+        }
+    };
+};
+
+
+struct DummyFactory2 : DummyBaseFactory {
+    struct param : public gko::enable_parameters_type<param, DummyFactory2> {
+        using parameters_type = param;
+        std::vector<std::shared_ptr<const DummyBaseFactory>>
+            GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(const_factory_list);
+
+        std::vector<std::shared_ptr<DummyBaseFactory>>
+            GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(factory_list);
+    };
+
+    const param& get_parameters() const noexcept { return parameters_; };
+
+    void add_logger(std::shared_ptr<const gko::log::Logger> logger) {}
+
+    DummyFactory2(std::shared_ptr<const gko::Executor>, const param& parameters)
+        : parameters_(parameters)
+    {}
+
+private:
+    param parameters_;
+};
+
+
+using DF = DummyFactory;
+using DBF = DummyBaseFactory;
+using DF2 = DummyFactory2;
+
+
+// used to distinguish specialization for function check
+struct DummyFlag {};
+
+
+// test_impl is to check the constructor available or not in the compile time.
+// Note. It only checks the signature with the template and SFINAE. If the
+// compilation error is only in the function/constructor definition, it will
+// still give the true back.
+template <typename, typename T, typename...>
+struct test_impl : std::false_type {};
+
+// specialization for constructor
+template <typename T, typename... Args>
+struct test_impl<gko::xstd::void_t<decltype(T(std::declval<Args>()...))>, T,
+                 Args...> : std::true_type {};
+
+// specialization for DF2 with_factory_list
+template <typename... Args>
+struct test_impl<gko::xstd::void_t<decltype(
+                     DF2::param{}.with_factory_list(std::declval<Args>()...))>,
+                 DummyFlag, Args...> : std::true_type {};
+
+// test the object can be constructable or not with Args.
+template <typename T, typename... Args>
+using test = test_impl<void, T, Args...>;
+
+// test the DF2::param{}.with_factory_list can be called or not with Args.
+template <typename... Args>
+using test_with_factory = test_impl<void, DummyFlag, Args...>;
+
+
+class DeferredFactoryParameter : public ::testing::Test {
+protected:
+    DF::param df_param{};
+    DBF::param dbf_param{};
+    const DF::param const_df_param{};
+    const DBF::param const_dbf_param{};
+    std::shared_ptr<DF> shared_df = std::make_shared<DF>();
+    std::shared_ptr<DBF> shared_dbf = std::make_shared<DBF>();
+    std::shared_ptr<const DF> shared_const_df = std::make_shared<DF>();
+    std::shared_ptr<const DBF> shared_const_dbf = std::make_shared<DBF>();
+    dfp<DF> dfp_df{std::make_shared<DF>()};
+    dfp<DBF> dfp_dbf{std::make_shared<DBF>()};
+    dfp<const DF> dfp_const_df{std::make_shared<DF>()};
+    dfp<const DBF> dfp_const_dbf{std::make_shared<DBF>()};
+    dfp<DF> nest_dfp_df{dfp_df};
+    dfp<DBF> nest_dfp_dbf{dfp_dbf};
+    dfp<const DF> nest_dfp_const_df{dfp_const_df};
+    dfp<const DBF> nest_dfp_const_dbf{dfp_const_dbf};
+};
+
+
+TEST_F(DeferredFactoryParameter, Empty)
+{
+    auto fact = dfp<DBF>();
+    auto fact2 = dfp<const DBF>();
+
+    ASSERT_TRUE(fact.is_empty());
+    ASSERT_THROW(fact.on(nullptr), gko::NotSupported);
+    ASSERT_TRUE(fact2.is_empty());
+    ASSERT_THROW(fact2.on(nullptr), gko::NotSupported);
+}
+
+
+TEST_F(DeferredFactoryParameter, Nullptr)
+{
+    auto fact = dfp<DBF>(nullptr);
+    auto fact2 = dfp<const DBF>(nullptr);
+
+    ASSERT_FALSE(fact.is_empty());
+    ASSERT_FALSE(fact.on(nullptr));
+    ASSERT_FALSE(fact2.is_empty());
+    ASSERT_FALSE(fact2.on(nullptr));
+}
+
+
+TEST_F(DeferredFactoryParameter, NonConstConstructor)
+{
+    // Itself
+    // shared_ptr
+    auto fact0 = dfp<DBF>(this->shared_dbf);
+    // unique_ptr
+    auto fact1 = dfp<DBF>(this->dbf_param.on(nullptr));
+    // const param
+    auto fact2 = dfp<DBF>(this->const_dbf_param);
+    // param
+    auto fact3 = dfp<DBF>(this->dbf_param);
+    // deferred_factory_parameter
+    auto fact4 = dfp<DBF>(this->dfp_dbf);
+    // Childtype
+    auto fact5 = dfp<DBF>(this->shared_df);
+    auto fact6 = dfp<DBF>(this->df_param.on(nullptr));
+    auto fact7 = dfp<DBF>(this->const_df_param);
+    auto fact8 = dfp<DBF>(this->df_param);
+    auto fact9 = dfp<DBF>(this->dfp_df);
+
+    for (auto& fact : {fact0, fact1, fact2, fact3, fact4}) {
+        ASSERT_TRUE(std::dynamic_pointer_cast<DBF>(fact.on(nullptr)));
+    }
+    for (auto& fact : {fact5, fact6, fact7, fact8, fact9}) {
+        ASSERT_TRUE(std::dynamic_pointer_cast<DF>(fact.on(nullptr)));
+    }
+}
+
+
+TEST_F(DeferredFactoryParameter, ConstConstructor)
+{
+    // Itself
+    // shared_ptr
+    auto fact0 = dfp<const DBF>(this->shared_dbf);
+    // shared_ptr const
+    auto fact1 = dfp<const DBF>(this->shared_const_dbf);
+    // unique_ptr
+    auto fact2 = dfp<const DBF>(this->dbf_param.on(nullptr));
+    // unique_ptr const
+    auto fact3 = dfp<const DBF>(
+        static_cast<std::unique_ptr<const DBF>>(this->dbf_param.on(nullptr)));
+    // const param
+    auto fact4 = dfp<const DBF>(this->const_dbf_param);
+    // param
+    auto fact5 = dfp<const DBF>(this->dbf_param);
+    // deferred_factory_parameter
+    auto fact6 = dfp<const DBF>(this->dfp_dbf);
+    // deferred_factory_parameter const
+    auto fact7 = dfp<const DBF>(this->dfp_const_dbf);
+    // Childtype
+    auto fact_child0 = dfp<const DBF>(this->shared_df);
+    auto fact_child1 = dfp<const DBF>(this->shared_const_df);
+    auto fact_child2 = dfp<const DBF>(this->df_param.on(nullptr));
+    auto fact_child3 = dfp<const DBF>(
+        static_cast<std::unique_ptr<const DF>>(this->df_param.on(nullptr)));
+    auto fact_child4 = dfp<const DBF>(this->const_df_param);
+    auto fact_child5 = dfp<const DBF>(this->df_param);
+    auto fact_child6 = dfp<const DBF>(this->dfp_df);
+    auto fact_child7 = dfp<const DBF>(this->dfp_const_df);
+
+    for (auto& fact :
+         {fact0, fact1, fact2, fact3, fact4, fact5, fact6, fact7}) {
+        ASSERT_TRUE(std::dynamic_pointer_cast<const DBF>(fact.on(nullptr)));
+    }
+    for (auto& fact : {fact_child0, fact_child1, fact_child2, fact_child3,
+                       fact_child4, fact_child5, fact_child6, fact_child7}) {
+        ASSERT_TRUE(std::dynamic_pointer_cast<const DF>(fact.on(nullptr)));
+    }
+}
+
+
+TEST_F(DeferredFactoryParameter, NonConstConstructorCheck)
+{
+    ASSERT_TRUE((test<dfp<DBF>, std::shared_ptr<DBF>>::value));
+    // The following can not be construct. Using the corresponding constructor
+    // leads compile error.
+    ASSERT_FALSE((test<dfp<DBF>, std::shared_ptr<const DBF>>::value));
+    ASSERT_FALSE((test<dfp<DBF>, std::unique_ptr<const DBF>>::value));
+    ASSERT_FALSE((test<dfp<DBF>, dfp<const DBF>>::value));
+    ASSERT_FALSE((test<dfp<DF>, dfp<DBF>>::value));
+    ASSERT_FALSE((test<dfp<DF>, std::shared_ptr<DBF>>::value));
+    ASSERT_FALSE((test<dfp<DF>, dfp<DF2>>::value));
+    ASSERT_FALSE((test<dfp<DF>, std::shared_ptr<DF2>>::value));
+}
+
+
+TEST_F(DeferredFactoryParameter, MacroWithConstList)
+{
+    auto result =
+        DummyFactory2::param{}
+            .with_const_factory_list(
+                this->df_param, this->const_df_param, this->shared_df,
+                this->shared_const_df, this->df_param.on(nullptr), this->dfp_df,
+                this->dfp_const_df, this->nest_dfp_df, this->nest_dfp_const_df)
+            .on(nullptr);
+    auto result_base =
+        DummyFactory2::param{}
+            .with_const_factory_list(this->dbf_param, this->const_dbf_param,
+                                     this->shared_dbf, this->shared_const_dbf,
+                                     this->dbf_param.on(nullptr), this->dfp_dbf,
+                                     this->dfp_const_dbf, this->nest_dfp_dbf,
+                                     this->nest_dfp_const_dbf)
+            .on(nullptr);
+
+    auto& factory_list = result->get_parameters().const_factory_list;
+    auto& base_factory_list = result_base->get_parameters().const_factory_list;
+    const auto num = factory_list.size();
+    ASSERT_EQ(num, 9);
+    ASSERT_EQ(base_factory_list.size(), 9);
+    for (int i = 0; i < num; i++) {
+        // The list requires const DummyBaseFactory, so they must be const
+        ASSERT_TRUE(std::dynamic_pointer_cast<const DF>(factory_list.at(i)));
+        ASSERT_TRUE(
+            std::dynamic_pointer_cast<const DBF>(base_factory_list.at(i)));
+    }
+}
+
+
+TEST_F(DeferredFactoryParameter, MacroWithNonConstList)
+{
+    auto result =
+        DummyFactory2::param{}
+            .with_factory_list(this->df_param, this->const_df_param,
+                               this->shared_df, this->df_param.on(nullptr),
+                               this->dfp_df, this->nest_dfp_df)
+            .on(nullptr);
+    auto result_base =
+        DummyFactory2::param{}
+            .with_factory_list(this->dbf_param, this->const_dbf_param,
+                               this->shared_dbf, this->dbf_param.on(nullptr),
+                               this->dfp_dbf, this->nest_dfp_dbf)
+            .on(nullptr);
+
+    auto& factory_list = result->get_parameters().factory_list;
+    auto& base_factory_list = result_base->get_parameters().factory_list;
+    const auto num = factory_list.size();
+    ASSERT_EQ(num, 6);
+    ASSERT_EQ(base_factory_list.size(), 6);
+    for (int i = 0; i < num; i++) {
+        // The list requires DummyBaseFactory, so they must be non-const
+        ASSERT_TRUE(std::dynamic_pointer_cast<DF>(factory_list.at(i)));
+        ASSERT_TRUE(std::dynamic_pointer_cast<DBF>(base_factory_list.at(i)));
+    }
+}
+
+
+TEST_F(DeferredFactoryParameter, MacroWithConstVector)
+{
+    auto const_dbf_vec = std::vector<std::shared_ptr<const DBF>>{
+        this->shared_dbf, this->shared_dbf};
+    auto dbf_vec =
+        std::vector<std::shared_ptr<DBF>>{this->shared_dbf, this->shared_dbf};
+    auto dfp_const_dbf_vec =
+        std::vector<dfp<const DBF>>{this->dbf_param, this->shared_dbf};
+    auto dfp_dbf_vec = std::vector<dfp<DBF>>{this->dbf_param, this->shared_dbf};
+    auto dbf_param_vec =
+        std::vector<DBF::param>{this->dbf_param, this->dbf_param};
+    // child
+    auto const_df_vec = std::vector<std::shared_ptr<const DF>>{this->shared_df,
+                                                               this->shared_df};
+    auto df_vec =
+        std::vector<std::shared_ptr<DF>>{this->shared_df, this->shared_df};
+    auto dfp_const_df_vec =
+        std::vector<dfp<const DF>>{this->df_param, this->shared_df};
+    auto dfp_df_vec = std::vector<dfp<DF>>{this->df_param, this->shared_df};
+    auto df_param_vec = std::vector<DF::param>{this->df_param, this->df_param};
+    std::vector<std::shared_ptr<DF2>> result_base_vector;
+    std::vector<std::shared_ptr<DF2>> result_vector;
+
+    result_base_vector.emplace_back(
+        DF2::param{}.with_const_factory_list(const_dbf_vec).on(nullptr));
+    result_base_vector.emplace_back(
+        DF2::param{}.with_const_factory_list(dbf_vec).on(nullptr));
+    result_base_vector.emplace_back(
+        DF2::param{}.with_const_factory_list(dfp_const_dbf_vec).on(nullptr));
+    result_base_vector.emplace_back(
+        DF2::param{}.with_const_factory_list(dfp_dbf_vec).on(nullptr));
+    result_base_vector.emplace_back(
+        DF2::param{}.with_const_factory_list(dbf_param_vec).on(nullptr));
+    // For child input
+    result_vector.emplace_back(
+        DF2::param{}.with_const_factory_list(const_df_vec).on(nullptr));
+    result_vector.emplace_back(
+        DF2::param{}.with_const_factory_list(df_vec).on(nullptr));
+    result_vector.emplace_back(
+        DF2::param{}.with_const_factory_list(dfp_const_df_vec).on(nullptr));
+    result_vector.emplace_back(
+        DF2::param{}.with_const_factory_list(dfp_df_vec).on(nullptr));
+    result_vector.emplace_back(
+        DF2::param{}.with_const_factory_list(df_param_vec).on(nullptr));
+
+    for (const auto& result : result_base_vector) {
+        auto& factory_list = result->get_parameters().const_factory_list;
+        ASSERT_TRUE(std::dynamic_pointer_cast<const DBF>(factory_list.at(0)));
+        ASSERT_TRUE(std::dynamic_pointer_cast<const DBF>(factory_list.at(1)));
+    }
+    for (const auto& result : result_vector) {
+        auto& factory_list = result->get_parameters().const_factory_list;
+        ASSERT_TRUE(std::dynamic_pointer_cast<const DF>(factory_list.at(0)));
+        ASSERT_TRUE(std::dynamic_pointer_cast<const DF>(factory_list.at(1)));
+    }
+}
+
+
+TEST_F(DeferredFactoryParameter, MacroWithNonConstVector)
+{
+    auto dbf_vec =
+        std::vector<std::shared_ptr<DBF>>{this->shared_dbf, this->shared_dbf};
+    auto dfp_dbf_vec = std::vector<dfp<DBF>>{this->dbf_param, this->shared_dbf};
+    auto dbf_param_vec =
+        std::vector<DBF::param>{this->dbf_param, this->dbf_param};
+    // child
+    auto df_vec =
+        std::vector<std::shared_ptr<DF>>{this->shared_df, this->shared_df};
+    auto dfp_df_vec = std::vector<dfp<DF>>{this->df_param, this->shared_df};
+    auto df_param_vec = std::vector<DF::param>{this->df_param, this->df_param};
+    std::vector<std::shared_ptr<DF2>> result_base_vector;
+    std::vector<std::shared_ptr<DF2>> result_vector;
+
+    result_base_vector.emplace_back(
+        DF2::param{}.with_factory_list(dbf_vec).on(nullptr));
+    result_base_vector.emplace_back(
+        DF2::param{}.with_factory_list(dfp_dbf_vec).on(nullptr));
+    result_base_vector.emplace_back(
+        DF2::param{}.with_factory_list(dbf_param_vec).on(nullptr));
+    // For child input
+    result_vector.emplace_back(
+        DF2::param{}.with_factory_list(df_vec).on(nullptr));
+    result_vector.emplace_back(
+        DF2::param{}.with_factory_list(dfp_df_vec).on(nullptr));
+    result_vector.emplace_back(
+        DF2::param{}.with_factory_list(df_param_vec).on(nullptr));
+
+    for (const auto& result : result_base_vector) {
+        auto& factory_list = result->get_parameters().factory_list;
+        ASSERT_TRUE(std::dynamic_pointer_cast<DBF>(factory_list.at(0)));
+        ASSERT_TRUE(std::dynamic_pointer_cast<DBF>(factory_list.at(1)));
+    }
+    for (const auto& result : result_vector) {
+        auto& factory_list = result->get_parameters().factory_list;
+        ASSERT_TRUE(std::dynamic_pointer_cast<DF>(factory_list.at(0)));
+        ASSERT_TRUE(std::dynamic_pointer_cast<DF>(factory_list.at(1)));
+    }
+}
+
+
+TEST_F(DeferredFactoryParameter, MacroWithNonConstCheck)
+{
+    ASSERT_TRUE((test_with_factory<std::vector<std::shared_ptr<DBF>>>::value));
+    ASSERT_TRUE(
+        (test_with_factory<std::shared_ptr<DBF>, std::shared_ptr<DF>>::value));
+    ASSERT_FALSE(
+        (test_with_factory<std::vector<std::shared_ptr<const DBF>>>::value));
+    ASSERT_FALSE((test_with_factory<std::vector<dfp<const DBF>>>::value));
+    ASSERT_FALSE(
+        (test_with_factory<std::vector<std::shared_ptr<DummyFlag>>>::value));
+    ASSERT_FALSE((test_with_factory<std::shared_ptr<const DBF>,
+                                    std::shared_ptr<const DF>>::value));
+}
diff --git a/core/test/base/miss_criteria.cpp b/core/test/base/miss_criteria.cpp
deleted file mode 100644
index a5299f2f4f8..00000000000
--- a/core/test/base/miss_criteria.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-/*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2023, the Ginkgo authors
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
-1. Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
-notice, this list of conditions and the following disclaimer in the
-documentation and/or other materials provided with the distribution.
-
-3. Neither the name of the copyright holder nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-******************************<GINKGO LICENSE>*******************************/
-
-#include <ginkgo/core/base/abstract_factory.hpp>
-
-
-#include <gtest/gtest.h>
-
-
-#include <ginkgo/core/base/exception.hpp>
-#include <ginkgo/core/base/executor.hpp>
-#include <ginkgo/core/solver/solver_base.hpp>
-#include <ginkgo/core/stop/criterion.hpp>
-#include <ginkgo/core/stop/iteration.hpp>
-
-
-// Note: the following Factory structure is not identical to Ginkgo Factory
-// struture, but it is easier setup without too much dependences and
-// inheritances.
-
-struct DummyFactory {
-    struct param
-        : public gko::solver::enable_iterative_solver_factory_parameters<
-              param, DummyFactory> {};
-
-    const param& get_parameters() const noexcept { return parameters_; };
-
-    void add_logger(std::shared_ptr<const gko::log::Logger> logger) {}
-
-    DummyFactory(std::shared_ptr<const gko::Executor>, const param& parameters)
-        : parameters_(parameters)
-    {}
-
-private:
-    param parameters_;
-};
-
-
-using DF = DummyFactory;
-
-
-TEST(IterativeFactoryParameter, WithCriteria)
-{
-    auto exec = gko::ReferenceExecutor::create();
-    std::vector<std::shared_ptr<gko::stop::CriterionFactory>> criteria{
-        gko::stop::Iteration::build().on(exec)};
-    DummyFactory::param{}.with_criteria(criteria);
-}
diff --git a/include/ginkgo/core/base/abstract_factory.hpp b/include/ginkgo/core/base/abstract_factory.hpp
index af7b226773d..4a54f4db557 100644
--- a/include/ginkgo/core/base/abstract_factory.hpp
+++ b/include/ginkgo/core/base/abstract_factory.hpp
@@ -313,6 +313,18 @@ public:                                                                 \
                                                _factory_name>
 
 
+namespace detail {
+
+
+// Use pointer not the type because std::is_convertible<const type, type> can be
+// true.
+template <typename From, typename To>
+struct is_pointer_convertible : std::is_convertible<From*, To*> {};
+
+
+}  // namespace detail
+
+
 /**
  * Represents a factory parameter of factory type that can either initialized by
  * a pre-existing factory or by passing in a factory_parameters object whose
@@ -338,10 +350,8 @@ class deferred_factory_parameter {
      * shared ownership.
      */
     template <typename ConcreteFactoryType,
-              std::enable_if_t<
-                  std::is_base_of<FactoryType, ConcreteFactoryType>::value &&
-                  (std::is_const<FactoryType>::value ||
-                   !std::is_const<ConcreteFactoryType>::value)>* = nullptr>
+              std::enable_if_t<detail::is_pointer_convertible<
+                  ConcreteFactoryType, FactoryType>::value>* = nullptr>
     deferred_factory_parameter(std::shared_ptr<ConcreteFactoryType> factory)
     {
         generator_ = [factory =
@@ -354,10 +364,8 @@ class deferred_factory_parameter {
      * preexisting factory with unique ownership.
      */
     template <typename ConcreteFactoryType, typename Deleter,
-              std::enable_if_t<
-                  std::is_base_of<FactoryType, ConcreteFactoryType>::value &&
-                  (std::is_const<FactoryType>::value ||
-                   !std::is_const<ConcreteFactoryType>::value)>* = nullptr>
+              std::enable_if_t<detail::is_pointer_convertible<
+                  ConcreteFactoryType, FactoryType>::value>* = nullptr>
     deferred_factory_parameter(
         std::unique_ptr<ConcreteFactoryType, Deleter> factory)
     {
@@ -372,8 +380,10 @@ class deferred_factory_parameter {
      * parameter's `.on(exec)` function will be called.
      */
     template <typename ParametersType,
-              typename = decltype(std::declval<ParametersType>().on(
-                  std::shared_ptr<const Executor>{}))>
+              typename U = decltype(std::declval<ParametersType>().on(
+                  std::shared_ptr<const Executor>{})),
+              std::enable_if_t<detail::is_pointer_convertible<
+                  typename U::element_type, FactoryType>::value>* = nullptr>
     deferred_factory_parameter(ParametersType parameters)
     {
         generator_ = [parameters](std::shared_ptr<const Executor> exec)
@@ -541,7 +551,7 @@ private:                                                                     \
                                                                              \
 public:                                                                      \
     parameters_type& with_##_name(                                           \
-        deferred_factory_parameter<_name##_type> factory)                    \
+        ::gko::deferred_factory_parameter<_name##_type> factory)             \
     {                                                                        \
         this->_name##_generator_ = std::move(factory);                       \
         this->deferred_factories[#_name] = [](const auto& exec,              \
@@ -554,7 +564,7 @@ public:                                                                      \
     }                                                                        \
                                                                              \
 private:                                                                     \
-    deferred_factory_parameter<_name##_type> _name##_generator_;             \
+    ::gko::deferred_factory_parameter<_name##_type> _name##_generator_;      \
                                                                              \
 public:                                                                      \
     static_assert(true,                                                      \
@@ -571,60 +581,62 @@ public:                                                                      \
  * @param _type  pointee type of the vector entries, e.g. LinOpFactory
  *
  */
-#define GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(_name)                          \
-    _name{};                                                                  \
-                                                                              \
-private:                                                                      \
-    using _name##_type = typename decltype(_name)::value_type::element_type;  \
-                                                                              \
-public:                                                                       \
-    template <                                                                \
-        typename... Args,                                                     \
-        typename = std::enable_if_t<xstd::conjunction<std::is_convertible<    \
-            Args, deferred_factory_parameter<_name##_type>>...>::value>>      \
-    parameters_type& with_##_name(Args&&... factories)                        \
-    {                                                                         \
-        this->_name##_generator_ = {deferred_factory_parameter<_name##_type>{ \
-            std::forward<Args>(factories)}...};                               \
-        this->deferred_factories[#_name] = [](const auto& exec,               \
-                                              auto& params) {                 \
-            if (!params._name##_generator_.empty()) {                         \
-                params._name.clear();                                         \
-                for (auto& generator : params._name##_generator_) {           \
-                    params._name.push_back(generator.on(exec));               \
-                }                                                             \
-            }                                                                 \
-        };                                                                    \
-        return *this;                                                         \
-    }                                                                         \
-    template <                                                                \
-        typename FactoryType,                                                 \
-        typename = std::enable_if_t<std::is_convertible<                      \
-            FactoryType, deferred_factory_parameter<_name##_type>>::value>>   \
-    parameters_type& with_##_name(const std::vector<FactoryType>& factories)  \
-    {                                                                         \
-        this->_name##_generator_.clear();                                     \
-        for (const auto& factory : factories) {                               \
-            this->_name##_generator_.push_back(factory);                      \
-        }                                                                     \
-        this->deferred_factories[#_name] = [](const auto& exec,               \
-                                              auto& params) {                 \
-            if (!params._name##_generator_.empty()) {                         \
-                params._name.clear();                                         \
-                for (auto& generator : params._name##_generator_) {           \
-                    params._name.push_back(generator.on(exec));               \
-                }                                                             \
-            }                                                                 \
-        };                                                                    \
-        return *this;                                                         \
-    }                                                                         \
-                                                                              \
-private:                                                                      \
-    std::vector<deferred_factory_parameter<_name##_type>> _name##_generator_; \
-                                                                              \
-public:                                                                       \
-    static_assert(true,                                                       \
-                  "This assert is used to counter the false positive extra "  \
+#define GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(_name)                           \
+    _name{};                                                                   \
+                                                                               \
+private:                                                                       \
+    using _name##_type = typename decltype(_name)::value_type::element_type;   \
+                                                                               \
+public:                                                                        \
+    template <typename... Args,                                                \
+              typename = std::enable_if_t<::gko::xstd::conjunction<            \
+                  std::is_convertible<Args, ::gko::deferred_factory_parameter< \
+                                                _name##_type>>...>::value>>    \
+    parameters_type& with_##_name(Args&&... factories)                         \
+    {                                                                          \
+        this->_name##_generator_ = {                                           \
+            ::gko::deferred_factory_parameter<_name##_type>{                   \
+                std::forward<Args>(factories)}...};                            \
+        this->deferred_factories[#_name] = [](const auto& exec,                \
+                                              auto& params) {                  \
+            if (!params._name##_generator_.empty()) {                          \
+                params._name.clear();                                          \
+                for (auto& generator : params._name##_generator_) {            \
+                    params._name.push_back(generator.on(exec));                \
+                }                                                              \
+            }                                                                  \
+        };                                                                     \
+        return *this;                                                          \
+    }                                                                          \
+    template <typename FactoryType,                                            \
+              typename = std::enable_if_t<std::is_convertible<                 \
+                  FactoryType,                                                 \
+                  ::gko::deferred_factory_parameter<_name##_type>>::value>>    \
+    parameters_type& with_##_name(const std::vector<FactoryType>& factories)   \
+    {                                                                          \
+        this->_name##_generator_.clear();                                      \
+        for (const auto& factory : factories) {                                \
+            this->_name##_generator_.push_back(factory);                       \
+        }                                                                      \
+        this->deferred_factories[#_name] = [](const auto& exec,                \
+                                              auto& params) {                  \
+            if (!params._name##_generator_.empty()) {                          \
+                params._name.clear();                                          \
+                for (auto& generator : params._name##_generator_) {            \
+                    params._name.push_back(generator.on(exec));                \
+                }                                                              \
+            }                                                                  \
+        };                                                                     \
+        return *this;                                                          \
+    }                                                                          \
+                                                                               \
+private:                                                                       \
+    std::vector<::gko::deferred_factory_parameter<_name##_type>>               \
+        _name##_generator_;                                                    \
+                                                                               \
+public:                                                                        \
+    static_assert(true,                                                        \
+                  "This assert is used to counter the false positive extra "   \
                   "semi-colon warnings")
 
 

From 1a3de39cf718cbd5b81e2294ef754f72022922e6 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Fri, 27 Oct 2023 16:04:46 +0200
Subject: [PATCH 506/583] reuse the macro

---
 include/ginkgo/core/base/abstract_factory.hpp |  22 ++--
 include/ginkgo/core/solver/solver_base.hpp    | 124 +++---------------
 2 files changed, 26 insertions(+), 120 deletions(-)

diff --git a/include/ginkgo/core/base/abstract_factory.hpp b/include/ginkgo/core/base/abstract_factory.hpp
index 4a54f4db557..f1f8e1d6c10 100644
--- a/include/ginkgo/core/base/abstract_factory.hpp
+++ b/include/ginkgo/core/base/abstract_factory.hpp
@@ -446,11 +446,11 @@ class deferred_factory_parameter {
     mutable _name{__VA_ARGS__};                                              \
                                                                              \
     template <typename... Args>                                              \
-    auto with_##_name(Args&&... _value)->std::decay_t<decltype(*this)>&      \
+    parameters_type& with_##_name(Args&&... _value)                          \
     {                                                                        \
         using type = decltype(this->_name);                                  \
         this->_name = type{std::forward<Args>(_value)...};                   \
-        return *this;                                                        \
+        return *static_cast<parameters_type*>(this);                         \
     }                                                                        \
     static_assert(true,                                                      \
                   "This assert is used to counter the false positive extra " \
@@ -496,10 +496,10 @@ class deferred_factory_parameter {
     mutable _name{__VA_ARGS__};                                              \
                                                                              \
     template <typename... Args>                                              \
-    auto with_##_name(Args&&... _value)->std::decay_t<decltype(*this)>&      \
+    parameters_type& with_##_name(Args&&... _value)                          \
     {                                                                        \
         GKO_NOT_IMPLEMENTED;                                                 \
-        return *this;                                                        \
+        return *static_cast<parameters_type*>(this);                         \
     }                                                                        \
     static_assert(true,                                                      \
                   "This assert is used to counter the false positive extra " \
@@ -509,11 +509,11 @@ class deferred_factory_parameter {
     mutable _name{_default};                                                 \
                                                                              \
     template <typename Arg>                                                  \
-    auto with_##_name(Arg&& _value)->std::decay_t<decltype(*this)>&          \
+    parameters_type& with_##_name(Arg&& _value)                              \
     {                                                                        \
         using type = decltype(this->_name);                                  \
         this->_name = type{std::forward<Arg>(_value)};                       \
-        return *this;                                                        \
+        return *static_cast<parameters_type*>(this);                         \
     }                                                                        \
     static_assert(true,                                                      \
                   "This assert is used to counter the false positive extra " \
@@ -523,11 +523,11 @@ class deferred_factory_parameter {
     mutable _name{__VA_ARGS__};                                              \
                                                                              \
     template <typename... Args>                                              \
-    auto with_##_name(Args&&... _value)->std::decay_t<decltype(*this)>&      \
+    parameters_type& with_##_name(Args&&... _value)                          \
     {                                                                        \
         using type = decltype(this->_name);                                  \
         this->_name = type{std::forward<Args>(_value)...};                   \
-        return *this;                                                        \
+        return *static_cast<parameters_type*>(this);                         \
     }                                                                        \
     static_assert(true,                                                      \
                   "This assert is used to counter the false positive extra " \
@@ -560,7 +560,7 @@ public:                                                                      \
                 params._name = params._name##_generator_.on(exec);           \
             }                                                                \
         };                                                                   \
-        return *this;                                                        \
+        return *static_cast<parameters_type*>(this);                         \
     }                                                                        \
                                                                              \
 private:                                                                     \
@@ -606,7 +606,7 @@ public:                                                                        \
                 }                                                              \
             }                                                                  \
         };                                                                     \
-        return *this;                                                          \
+        return *static_cast<parameters_type*>(this);                           \
     }                                                                          \
     template <typename FactoryType,                                            \
               typename = std::enable_if_t<std::is_convertible<                 \
@@ -627,7 +627,7 @@ public:                                                                        \
                 }                                                              \
             }                                                                  \
         };                                                                     \
-        return *this;                                                          \
+        return *static_cast<parameters_type*>(this);                           \
     }                                                                          \
                                                                                \
 private:                                                                       \
diff --git a/include/ginkgo/core/solver/solver_base.hpp b/include/ginkgo/core/solver/solver_base.hpp
index b27ade844fb..43d0ce7b4d3 100644
--- a/include/ginkgo/core/solver/solver_base.hpp
+++ b/include/ginkgo/core/solver/solver_base.hpp
@@ -861,136 +861,42 @@ class EnablePreconditionedIterativeSolver
 };
 
 
-/**
- * The parameter type shared between all iterative solvers.
- * @see GKO_CREATE_FACTORY_PARAMETERS
- */
-struct iterative_solver_factory_parameters {
+template <typename Parameters, typename Factory>
+struct enable_iterative_solver_factory_parameters
+    : enable_parameters_type<Parameters, Factory> {
+    using parameters_type = Parameters;
     /**
      * Stopping criteria to be used by the solver.
      */
-    std::vector<std::shared_ptr<const stop::CriterionFactory>> criteria{};
-};
-
-
-template <typename Parameters, typename Factory>
-struct enable_iterative_solver_factory_parameters
-    : enable_parameters_type<Parameters, Factory>,
-      iterative_solver_factory_parameters {
-    /**
-     * Provides stopping criteria via stop::CriterionFactory instances to be
-     * used by the iterative solver in a fluent interface.
-     */
-    template <typename... Args,
-              typename = std::enable_if_t<xstd::conjunction<std::is_convertible<
-                  Args, deferred_factory_parameter<
-                            const stop::CriterionFactory>>...>::value>>
-    Parameters& with_criteria(Args&&... value)
-    {
-        this->criterion_generators = {
-            deferred_factory_parameter<const stop::CriterionFactory>{
-                std::forward<Args>(value)}...};
-        this->deferred_factories["criteria"] = [](const auto& exec,
-                                                  auto& params) {
-            if (!params.criterion_generators.empty()) {
-                params.criteria.clear();
-                for (auto& generator : params.criterion_generators) {
-                    params.criteria.push_back(generator.on(exec));
-                }
-            }
-        };
-        return *self();
-    }
-
-    template <typename FactoryType,
-              typename = std::enable_if_t<std::is_convertible<
-                  FactoryType, deferred_factory_parameter<
-                                   const stop::CriterionFactory>>::value>>
-    Parameters& with_criteria(const std::vector<FactoryType>& criteria_vec)
-    {
-        this->criterion_generators.clear();
-        for (const auto& factory : criteria_vec) {
-            this->criterion_generators.push_back(factory);
-        }
-        this->deferred_factories["criteria"] = [](const auto& exec,
-                                                  auto& params) {
-            if (!params.criterion_generators.empty()) {
-                params.criteria.clear();
-                for (auto& generator : params.criterion_generators) {
-                    params.criteria.push_back(generator.on(exec));
-                }
-            }
-        };
-        return *self();
-    }
+    std::vector<std::shared_ptr<const stop::CriterionFactory>>
+        GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(criteria);
 
 private:
     GKO_ENABLE_SELF(Parameters);
-
-    std::vector<deferred_factory_parameter<const stop::CriterionFactory>>
-        criterion_generators;
 };
 
 
-/**
- * The parameter type shared between all preconditioned iterative solvers,
- * excluding the parameters available in iterative_solver_factory_parameters.
- * @see GKO_CREATE_FACTORY_PARAMETERS
- */
-struct preconditioned_iterative_solver_factory_parameters {
+template <typename Parameters, typename Factory>
+struct enable_preconditioned_iterative_solver_factory_parameters
+    : enable_iterative_solver_factory_parameters<Parameters, Factory> {
+    using parameters_type = Parameters;
+
     /**
      * The preconditioner to be used by the iterative solver. By default, no
      * preconditioner is used.
      */
-    std::shared_ptr<const LinOpFactory> preconditioner{nullptr};
+    std::shared_ptr<const LinOpFactory> GKO_DEFERRED_FACTORY_PARAMETER(
+        preconditioner);
 
     /**
      * Already generated preconditioner. If one is provided, the factory
      * `preconditioner` will be ignored.
      */
-    std::shared_ptr<const LinOp> generated_preconditioner{nullptr};
-};
-
-
-template <typename Parameters, typename Factory>
-struct enable_preconditioned_iterative_solver_factory_parameters
-    : enable_iterative_solver_factory_parameters<Parameters, Factory>,
-      preconditioned_iterative_solver_factory_parameters {
-    /**
-     * Provides a preconditioner factory to be used by the iterative solver in a
-     * fluent interface.
-     * @see preconditioned_iterative_solver_factory_parameters::preconditioner
-     */
-    Parameters& with_preconditioner(
-        deferred_factory_parameter<const LinOpFactory> preconditioner)
-    {
-        this->preconditioner_generator = std::move(preconditioner);
-        this->deferred_factories["preconditioner"] = [](const auto& exec,
-                                                        auto& params) {
-            if (!params.preconditioner_generator.is_empty()) {
-                params.preconditioner =
-                    params.preconditioner_generator.on(exec);
-            }
-        };
-        return *self();
-    }
-
-    /**
-     * Provides a concrete preconditioner to be used by the iterative solver in
-     * a fluent interface.
-     * @see preconditioned_iterative_solver_factory_parameters::preconditioner
-     */
-    Parameters& with_generated_preconditioner(
-        std::shared_ptr<const LinOp> generated_preconditioner)
-    {
-        this->generated_preconditioner = std::move(generated_preconditioner);
-        return *self();
-    }
+    std::shared_ptr<const LinOp> GKO_FACTORY_PARAMETER_SCALAR(
+        generated_preconditioner, nullptr);
 
 private:
     GKO_ENABLE_SELF(Parameters);
-
-    deferred_factory_parameter<const LinOpFactory> preconditioner_generator;
 };
 
 

From d58ba5281201675ad4856164f762c761368bbe99 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Sat, 28 Oct 2023 22:05:34 +0200
Subject: [PATCH 507/583] remove mutable for factory parameter

---
 core/test/multigrid/fixed_coarsening.cpp      |  2 +-
 cuda/test/base/lin_op.cpp                     |  1 +
 .../par-ilu-convergence.cpp                   | 68 +++++++------------
 .../preconditioner-export.cpp                 | 42 +++++++-----
 hip/test/base/lin_op.cpp                      |  1 +
 include/ginkgo/core/base/abstract_factory.hpp |  8 +--
 6 files changed, 58 insertions(+), 64 deletions(-)

diff --git a/core/test/multigrid/fixed_coarsening.cpp b/core/test/multigrid/fixed_coarsening.cpp
index 376c58e3c25..c65a5e7f3ca 100644
--- a/core/test/multigrid/fixed_coarsening.cpp
+++ b/core/test/multigrid/fixed_coarsening.cpp
@@ -86,7 +86,7 @@ TYPED_TEST(FixedCoarseningFactory, DefaultSetting)
     using MgLevel = typename TestFixture::MgLevel;
     auto factory = MgLevel::build().on(this->exec);
 
-    ASSERT_EQ(factory->get_parameters().coarse_rows.get_data(), nullptr);
+    ASSERT_EQ(factory->get_parameters().coarse_rows.get_const_data(), nullptr);
     ASSERT_EQ(factory->get_parameters().skip_sorting, false);
 }
 
diff --git a/cuda/test/base/lin_op.cpp b/cuda/test/base/lin_op.cpp
index 77139c96232..687f62bf77b 100644
--- a/cuda/test/base/lin_op.cpp
+++ b/cuda/test/base/lin_op.cpp
@@ -144,6 +144,7 @@ class FactoryParameter : public ::testing::Test {
     FactoryParameter() {}
 
 public:
+    using parameters_type = FactoryParameter;
     std::vector<int> GKO_FACTORY_PARAMETER_VECTOR(vector_parameter, 10, 11);
     int GKO_FACTORY_PARAMETER_SCALAR(scalar_parameter, -4);
 };
diff --git a/examples/par-ilu-convergence/par-ilu-convergence.cpp b/examples/par-ilu-convergence/par-ilu-convergence.cpp
index 93e32422a7e..cc0440baa05 100644
--- a/examples/par-ilu-convergence/par-ilu-convergence.cpp
+++ b/examples/par-ilu-convergence/par-ilu-convergence.cpp
@@ -133,52 +133,36 @@ int main(int argc, char* argv[])
                                                                  exec);
     }));
 
-    std::shared_ptr<gko::LinOpFactory> factory;
-    std::function<void(int)> set_iterations;
-    if (precond == "parilu") {
-        factory =
-            gko::factorization::ParIlu<ValueType, IndexType>::build().on(exec);
-        set_iterations = [&](int it) {
-            gko::as<gko::factorization::ParIlu<ValueType, IndexType>::Factory>(
-                factory)
-                ->get_parameters()
-                .iterations = it;
-        };
-    } else if (precond == "paric") {
-        factory =
-            gko::factorization::ParIc<ValueType, IndexType>::build().on(exec);
-        set_iterations = [&](int it) {
-            gko::as<gko::factorization::ParIc<ValueType, IndexType>::Factory>(
-                factory)
-                ->get_parameters()
-                .iterations = it;
-        };
-    } else if (precond == "parilut") {
-        factory = gko::factorization::ParIlut<ValueType, IndexType>::build()
-                      .with_fill_in_limit(limit)
-                      .on(exec);
-        set_iterations = [&](int it) {
-            gko::as<gko::factorization::ParIlut<ValueType, IndexType>::Factory>(
-                factory)
-                ->get_parameters()
-                .iterations = it;
-        };
-    } else if (precond == "parict") {
-        factory = gko::factorization::ParIct<ValueType, IndexType>::build()
-                      .with_fill_in_limit(limit)
-                      .on(exec);
-        set_iterations = [&](int it) {
-            gko::as<gko::factorization::ParIct<ValueType, IndexType>::Factory>(
-                factory)
-                ->get_parameters()
-                .iterations = it;
-        };
-    }
+    auto factory_generator =
+        [&](gko::size_type iteration) -> std::shared_ptr<gko::LinOpFactory> {
+        if (precond == "parilu") {
+            return gko::factorization::ParIlu<ValueType, IndexType>::build()
+                .with_iterations(iteration)
+                .on(exec);
+        } else if (precond == "paric") {
+            return gko::factorization::ParIc<ValueType, IndexType>::build()
+                .with_iterations(iteration)
+                .on(exec);
+        } else if (precond == "parilut") {
+            return gko::factorization::ParIlut<ValueType, IndexType>::build()
+                .with_fill_in_limit(limit)
+                .with_iterations(iteration)
+                .on(exec);
+        } else if (precond == "parict") {
+            return gko::factorization::ParIct<ValueType, IndexType>::build()
+                .with_fill_in_limit(limit)
+                .with_iterations(iteration)
+                .on(exec);
+        } else {
+            GKO_NOT_IMPLEMENTED;
+        }
+    };
+
     auto one = gko::initialize<gko::matrix::Dense<ValueType>>({1.0}, exec);
     auto minus_one =
         gko::initialize<gko::matrix::Dense<ValueType>>({-1.0}, exec);
     for (int it = 1; it <= max_iterations; ++it) {
-        set_iterations(it);
+        auto factory = factory_generator(it);
         std::cout << it << ';';
         std::vector<long> times;
         std::vector<double> residuals;
diff --git a/examples/preconditioner-export/preconditioner-export.cpp b/examples/preconditioner-export/preconditioner-export.cpp
index 16baffc6472..f504a4ac991 100644
--- a/examples/preconditioner-export/preconditioner-export.cpp
+++ b/examples/preconditioner-export/preconditioner-export.cpp
@@ -131,19 +131,21 @@ int main(int argc, char* argv[])
     // handle different preconditioners
     if (precond == "jacobi") {
         // jacobi: max_block_size, accuracy, storage_optimization
-        auto factory = gko::preconditioner::Jacobi<>::build().on(exec);
+        auto factory_parameter = gko::preconditioner::Jacobi<>::build();
         if (argc >= 5) {
-            factory->get_parameters().max_block_size = std::stoi(argv[4]);
+            factory_parameter.with_max_block_size(
+                static_cast<gko::uint32>(std::stoi(argv[4])));
         }
         if (argc >= 6) {
-            factory->get_parameters().accuracy = std::stod(argv[5]);
+            factory_parameter.with_accuracy(std::stod(argv[5]));
         }
         if (argc >= 7) {
-            factory->get_parameters().storage_optimization =
+            factory_parameter.with_storage_optimization(
                 std::string{argv[6]} == "auto"
                     ? gko::precision_reduction::autodetect()
-                    : gko::precision_reduction(0, std::stoi(argv[6]));
+                    : gko::precision_reduction(0, std::stoi(argv[6])));
         }
+        auto factory = factory_parameter.on(exec);
         auto jacobi = try_generate([&] { return factory->generate(mtx); });
         output(jacobi, matrix + ".jacobi" + output_suffix);
     } else if (precond == "ilu") {
@@ -157,10 +159,12 @@ int main(int argc, char* argv[])
                matrix + ".ilu-u");
     } else if (precond == "parilu") {
         // parilu: iterations
-        auto factory = gko::factorization::ParIlu<>::build().on(exec);
+        auto factory_parameter = gko::factorization::ParIlu<>::build();
         if (argc >= 5) {
-            factory->get_parameters().iterations = std::stoi(argv[4]);
+            factory_parameter.with_iterations(
+                static_cast<gko::size_type>(std::stoi(argv[4])));
         }
+        auto factory = factory_parameter.on(exec);
         auto ilu = gko::as<gko::Composition<>>(
             try_generate([&] { return factory->generate(mtx); }));
         output(gko::as<gko::matrix::Csr<>>(ilu->get_operators()[0]),
@@ -169,13 +173,15 @@ int main(int argc, char* argv[])
                matrix + ".parilu" + output_suffix + "-u");
     } else if (precond == "parilut") {
         // parilut: iterations, fill-in limit
-        auto factory = gko::factorization::ParIlut<>::build().on(exec);
+        auto factory_parameter = gko::factorization::ParIlut<>::build();
         if (argc >= 5) {
-            factory->get_parameters().iterations = std::stoi(argv[4]);
+            factory_parameter.with_iterations(
+                static_cast<gko::size_type>(std::stoi(argv[4])));
         }
         if (argc >= 6) {
-            factory->get_parameters().fill_in_limit = std::stod(argv[5]);
+            factory_parameter.with_fill_in_limit(std::stod(argv[5]));
         }
+        auto factory = factory_parameter.on(exec);
         auto ilut = gko::as<gko::Composition<>>(
             try_generate([&] { return factory->generate(mtx); }));
         output(gko::as<gko::matrix::Csr<>>(ilut->get_operators()[0]),
@@ -206,15 +212,16 @@ int main(int argc, char* argv[])
                matrix + ".ilu-isai" + output_suffix + "-u");
     } else if (precond == "parilu-isai") {
         // parilu-isai: iterations, sparsity power
-        auto fact_factory =
-            gko::share(gko::factorization::ParIlu<>::build().on(exec));
+        auto fact_parameter = gko::factorization::ParIlu<>::build();
         int sparsity_power = 1;
         if (argc >= 5) {
-            fact_factory->get_parameters().iterations = std::stoi(argv[4]);
+            fact_parameter.with_iterations(
+                static_cast<gko::size_type>(std::stoi(argv[4])));
         }
         if (argc >= 6) {
             sparsity_power = std::stoi(argv[5]);
         }
+        auto fact_factory = gko::share(fact_parameter.on(exec));
         auto factory =
             gko::preconditioner::Ilu<gko::preconditioner::LowerIsai<>,
                                      gko::preconditioner::UpperIsai<>>::build()
@@ -231,18 +238,19 @@ int main(int argc, char* argv[])
                matrix + ".parilu-isai" + output_suffix + "-u");
     } else if (precond == "parilut-isai") {
         // parilut-isai: iterations, fill-in limit, sparsity power
-        auto fact_factory =
-            gko::share(gko::factorization::ParIlut<>::build().on(exec));
+        auto fact_parameter = gko::factorization::ParIlut<>::build();
         int sparsity_power = 1;
         if (argc >= 5) {
-            fact_factory->get_parameters().iterations = std::stoi(argv[4]);
+            fact_parameter.with_iterations(
+                static_cast<gko::size_type>(std::stoi(argv[4])));
         }
         if (argc >= 6) {
-            fact_factory->get_parameters().fill_in_limit = std::stod(argv[5]);
+            fact_parameter.with_fill_in_limit(std::stod(argv[5]));
         }
         if (argc >= 7) {
             sparsity_power = std::stoi(argv[6]);
         }
+        auto fact_factory = gko::share(fact_parameter.on(exec));
         auto factory =
             gko::preconditioner::Ilu<gko::preconditioner::LowerIsai<>,
                                      gko::preconditioner::UpperIsai<>>::build()
diff --git a/hip/test/base/lin_op.cpp b/hip/test/base/lin_op.cpp
index 0fceb6dcfee..d0ff9813bbb 100644
--- a/hip/test/base/lin_op.cpp
+++ b/hip/test/base/lin_op.cpp
@@ -44,6 +44,7 @@ class FactoryParameter : public ::testing::Test {
     FactoryParameter() {}
 
 public:
+    using parameters_type = FactoryParameter;
     std::vector<int> GKO_FACTORY_PARAMETER_VECTOR(vector_parameter, 10, 11);
     int GKO_FACTORY_PARAMETER_SCALAR(scalar_parameter, -4);
 };
diff --git a/include/ginkgo/core/base/abstract_factory.hpp b/include/ginkgo/core/base/abstract_factory.hpp
index f1f8e1d6c10..c1f49d3c919 100644
--- a/include/ginkgo/core/base/abstract_factory.hpp
+++ b/include/ginkgo/core/base/abstract_factory.hpp
@@ -443,7 +443,7 @@ class deferred_factory_parameter {
  * @ingroup LinOp
  */
 #define GKO_FACTORY_PARAMETER(_name, ...)                                    \
-    mutable _name{__VA_ARGS__};                                              \
+    _name{__VA_ARGS__};                                                      \
                                                                              \
     template <typename... Args>                                              \
     parameters_type& with_##_name(Args&&... _value)                          \
@@ -493,7 +493,7 @@ class deferred_factory_parameter {
 // cudafe into a C-style cast, the parameter pack expansion is not removed and
 // `Args&&... args` is still kept as a parameter pack.
 #define GKO_FACTORY_PARAMETER(_name, ...)                                    \
-    mutable _name{__VA_ARGS__};                                              \
+    _name{__VA_ARGS__};                                                      \
                                                                              \
     template <typename... Args>                                              \
     parameters_type& with_##_name(Args&&... _value)                          \
@@ -506,7 +506,7 @@ class deferred_factory_parameter {
                   "semi-colon warnings")
 
 #define GKO_FACTORY_PARAMETER_SCALAR(_name, _default)                        \
-    mutable _name{_default};                                                 \
+    _name{_default};                                                         \
                                                                              \
     template <typename Arg>                                                  \
     parameters_type& with_##_name(Arg&& _value)                              \
@@ -520,7 +520,7 @@ class deferred_factory_parameter {
                   "semi-colon warnings")
 
 #define GKO_FACTORY_PARAMETER_VECTOR(_name, ...)                             \
-    mutable _name{__VA_ARGS__};                                              \
+    _name{__VA_ARGS__};                                                      \
                                                                              \
     template <typename... Args>                                              \
     parameters_type& with_##_name(Args&&... _value)                          \

From bd2c945c26139c46dddbff183ab12bf4f2567046 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Sun, 29 Oct 2023 00:17:44 +0200
Subject: [PATCH 508/583] update doc

---
 include/ginkgo/core/preconditioner/ic.hpp  | 2 +-
 include/ginkgo/core/preconditioner/ilu.hpp | 9 ++++-----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/include/ginkgo/core/preconditioner/ic.hpp b/include/ginkgo/core/preconditioner/ic.hpp
index c0bb4962663..e0f39575f40 100644
--- a/include/ginkgo/core/preconditioner/ic.hpp
+++ b/include/ginkgo/core/preconditioner/ic.hpp
@@ -67,7 +67,7 @@ namespace preconditioner {
  * is a direct triangular solvers. The solver for L^H is the
  * conjugate-transposed solver for L, ensuring that the preconditioner is
  * symmetric and positive-definite. For this L solver, a factory can be provided
- * (using `with_l_solver_factory`) to have more control over their behavior. In
+ * (using `with_l_solver`) to have more control over their behavior. In
  * particular, it is possible to use an iterative method for solving the
  * triangular systems. The default parameters for an iterative triangluar solver
  * are:
diff --git a/include/ginkgo/core/preconditioner/ilu.hpp b/include/ginkgo/core/preconditioner/ilu.hpp
index 683e157545c..b81791ef117 100644
--- a/include/ginkgo/core/preconditioner/ilu.hpp
+++ b/include/ginkgo/core/preconditioner/ilu.hpp
@@ -66,11 +66,10 @@ namespace preconditioner {
  * It allows to set both the solver for L and the solver for U independently,
  * while providing the defaults solver::LowerTrs and solver::UpperTrs, which
  * are direct triangular solvers.
- * For these solvers, a factory can be provided (with `with_l_solver_factory`
- * and `with_u_solver_factory`) to have more control over their behavior.
- * In particular, it is possible to use an iterative method for solving the
- * triangular systems. The default parameters for an iterative triangluar
- * solver are:
+ * For these solvers, a factory can be provided (with `with_l_solver` and
+ * `with_u_solver`) to have more control over their behavior. In particular, it
+ * is possible to use an iterative method for solving the triangular systems.
+ * The default parameters for an iterative triangluar solver are:
  * - reduction factor = 1e-4
  * - max iteration = <number of rows of the matrix given to the solver>
  * Solvers without such criteria can also be used, in which case none are set.

From b6280201173ea246f04aa3d613aed29d935ba41a Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Tue, 31 Oct 2023 12:55:00 +0100
Subject: [PATCH 509/583] use the self from base mixin and update test name

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 core/test/base/deferred_factory.cpp           |  30 +++---
 cuda/test/base/lin_op.cpp                     |   5 +-
 hip/test/base/lin_op.cpp                      |   5 +-
 include/ginkgo/core/base/abstract_factory.hpp | 101 +++++++++---------
 include/ginkgo/core/solver/solver_base.hpp    |   6 --
 5 files changed, 76 insertions(+), 71 deletions(-)

diff --git a/core/test/base/deferred_factory.cpp b/core/test/base/deferred_factory.cpp
index c206829dca8..ef2e592ab03 100644
--- a/core/test/base/deferred_factory.cpp
+++ b/core/test/base/deferred_factory.cpp
@@ -43,7 +43,7 @@ using dfp = gko::deferred_factory_parameter<T>;
 
 
 // Note: the following Factory structure is not identical to Ginkgo Factory
-// struture, but it is easier setup without too much dependences and
+// structure, but it is easier setup without too many dependencies and
 // inheritances.
 struct DummyBaseFactory {
     virtual ~DummyBaseFactory() = default;
@@ -148,7 +148,7 @@ class DeferredFactoryParameter : public ::testing::Test {
 };
 
 
-TEST_F(DeferredFactoryParameter, Empty)
+TEST_F(DeferredFactoryParameter, CanBeDefaultConstructed)
 {
     auto fact = dfp<DBF>();
     auto fact2 = dfp<const DBF>();
@@ -160,19 +160,19 @@ TEST_F(DeferredFactoryParameter, Empty)
 }
 
 
-TEST_F(DeferredFactoryParameter, Nullptr)
+TEST_F(DeferredFactoryParameter, CanBeConstructedFromNullptr)
 {
     auto fact = dfp<DBF>(nullptr);
     auto fact2 = dfp<const DBF>(nullptr);
 
     ASSERT_FALSE(fact.is_empty());
-    ASSERT_FALSE(fact.on(nullptr));
+    ASSERT_EQ(fact.on(nullptr), nullptr);
     ASSERT_FALSE(fact2.is_empty());
-    ASSERT_FALSE(fact2.on(nullptr));
+    ASSERT_EQ(fact2.on(nullptr), nullptr);
 }
 
 
-TEST_F(DeferredFactoryParameter, NonConstConstructor)
+TEST_F(DeferredFactoryParameter, CheckNonConstConstructor)
 {
     // Itself
     // shared_ptr
@@ -201,7 +201,7 @@ TEST_F(DeferredFactoryParameter, NonConstConstructor)
 }
 
 
-TEST_F(DeferredFactoryParameter, ConstConstructor)
+TEST_F(DeferredFactoryParameter, CheckConstConstructor)
 {
     // Itself
     // shared_ptr
@@ -243,11 +243,11 @@ TEST_F(DeferredFactoryParameter, ConstConstructor)
 }
 
 
-TEST_F(DeferredFactoryParameter, NonConstConstructorCheck)
+TEST_F(DeferredFactoryParameter, ValidateNotAllowedFromNonConstConstructor)
 {
     ASSERT_TRUE((test<dfp<DBF>, std::shared_ptr<DBF>>::value));
-    // The following can not be construct. Using the corresponding constructor
-    // leads compile error.
+    // The following can not be constructed. Using the corresponding constructor
+    // leads to a compile-time error.
     ASSERT_FALSE((test<dfp<DBF>, std::shared_ptr<const DBF>>::value));
     ASSERT_FALSE((test<dfp<DBF>, std::unique_ptr<const DBF>>::value));
     ASSERT_FALSE((test<dfp<DBF>, dfp<const DBF>>::value));
@@ -258,7 +258,7 @@ TEST_F(DeferredFactoryParameter, NonConstConstructorCheck)
 }
 
 
-TEST_F(DeferredFactoryParameter, MacroWithConstList)
+TEST_F(DeferredFactoryParameter, CheckMacroWithConstList)
 {
     auto result =
         DummyFactory2::param{}
@@ -290,7 +290,7 @@ TEST_F(DeferredFactoryParameter, MacroWithConstList)
 }
 
 
-TEST_F(DeferredFactoryParameter, MacroWithNonConstList)
+TEST_F(DeferredFactoryParameter, CheckMacroWithNonConstList)
 {
     auto result =
         DummyFactory2::param{}
@@ -318,7 +318,7 @@ TEST_F(DeferredFactoryParameter, MacroWithNonConstList)
 }
 
 
-TEST_F(DeferredFactoryParameter, MacroWithConstVector)
+TEST_F(DeferredFactoryParameter, CheckMacroWithConstVector)
 {
     auto const_dbf_vec = std::vector<std::shared_ptr<const DBF>>{
         this->shared_dbf, this->shared_dbf};
@@ -376,7 +376,7 @@ TEST_F(DeferredFactoryParameter, MacroWithConstVector)
 }
 
 
-TEST_F(DeferredFactoryParameter, MacroWithNonConstVector)
+TEST_F(DeferredFactoryParameter, CheckMacroWithNonConstVector)
 {
     auto dbf_vec =
         std::vector<std::shared_ptr<DBF>>{this->shared_dbf, this->shared_dbf};
@@ -418,7 +418,7 @@ TEST_F(DeferredFactoryParameter, MacroWithNonConstVector)
 }
 
 
-TEST_F(DeferredFactoryParameter, MacroWithNonConstCheck)
+TEST_F(DeferredFactoryParameter, ValidateNotAllowedFromMacroWithNonConst)
 {
     ASSERT_TRUE((test_with_factory<std::vector<std::shared_ptr<DBF>>>::value));
     ASSERT_TRUE(
diff --git a/cuda/test/base/lin_op.cpp b/cuda/test/base/lin_op.cpp
index 687f62bf77b..f2ee75e39c6 100644
--- a/cuda/test/base/lin_op.cpp
+++ b/cuda/test/base/lin_op.cpp
@@ -144,7 +144,10 @@ class FactoryParameter : public ::testing::Test {
     FactoryParameter() {}
 
 public:
-    using parameters_type = FactoryParameter;
+    // FACTORY_PARAMETER macro needs self, which is usually available in
+    // enable_parameters_type. To reduce complexity, we add self here.
+    GKO_ENABLE_SELF(FactoryParameter);
+
     std::vector<int> GKO_FACTORY_PARAMETER_VECTOR(vector_parameter, 10, 11);
     int GKO_FACTORY_PARAMETER_SCALAR(scalar_parameter, -4);
 };
diff --git a/hip/test/base/lin_op.cpp b/hip/test/base/lin_op.cpp
index d0ff9813bbb..614048949ea 100644
--- a/hip/test/base/lin_op.cpp
+++ b/hip/test/base/lin_op.cpp
@@ -44,7 +44,10 @@ class FactoryParameter : public ::testing::Test {
     FactoryParameter() {}
 
 public:
-    using parameters_type = FactoryParameter;
+    // FACTORY_PARAMETER macro needs self, which is usually available in
+    // enable_parameters_type. To reduce complexity, we add self here.
+    GKO_ENABLE_SELF(FactoryParameter);
+
     std::vector<int> GKO_FACTORY_PARAMETER_VECTOR(vector_parameter, 10, 11);
     int GKO_FACTORY_PARAMETER_SCALAR(scalar_parameter, -4);
 };
diff --git a/include/ginkgo/core/base/abstract_factory.hpp b/include/ginkgo/core/base/abstract_factory.hpp
index c1f49d3c919..a441d4102d9 100644
--- a/include/ginkgo/core/base/abstract_factory.hpp
+++ b/include/ginkgo/core/base/abstract_factory.hpp
@@ -446,11 +446,12 @@ class deferred_factory_parameter {
     _name{__VA_ARGS__};                                                      \
                                                                              \
     template <typename... Args>                                              \
-    parameters_type& with_##_name(Args&&... _value)                          \
+    auto with_##_name(Args&&... _value)                                      \
+        ->std::decay_t<decltype(*(this->self()))>&                           \
     {                                                                        \
         using type = decltype(this->_name);                                  \
         this->_name = type{std::forward<Args>(_value)...};                   \
-        return *static_cast<parameters_type*>(this);                         \
+        return *(this->self());                                              \
     }                                                                        \
     static_assert(true,                                                      \
                   "This assert is used to counter the false positive extra " \
@@ -496,38 +497,40 @@ class deferred_factory_parameter {
     _name{__VA_ARGS__};                                                      \
                                                                              \
     template <typename... Args>                                              \
-    parameters_type& with_##_name(Args&&... _value)                          \
+    auto with_##_name(Args&&... _value)                                      \
+        ->std::decay_t<decltype(*(this->self()))>&                           \
     {                                                                        \
         GKO_NOT_IMPLEMENTED;                                                 \
-        return *static_cast<parameters_type*>(this);                         \
+        return *(this->self());                                              \
     }                                                                        \
     static_assert(true,                                                      \
                   "This assert is used to counter the false positive extra " \
                   "semi-colon warnings")
 
-#define GKO_FACTORY_PARAMETER_SCALAR(_name, _default)                        \
-    _name{_default};                                                         \
-                                                                             \
-    template <typename Arg>                                                  \
-    parameters_type& with_##_name(Arg&& _value)                              \
-    {                                                                        \
-        using type = decltype(this->_name);                                  \
-        this->_name = type{std::forward<Arg>(_value)};                       \
-        return *static_cast<parameters_type*>(this);                         \
-    }                                                                        \
-    static_assert(true,                                                      \
-                  "This assert is used to counter the false positive extra " \
+#define GKO_FACTORY_PARAMETER_SCALAR(_name, _default)                         \
+    _name{_default};                                                          \
+                                                                              \
+    template <typename Arg>                                                   \
+    auto with_##_name(Arg&& _value)->std::decay_t<decltype(*(this->self()))>& \
+    {                                                                         \
+        using type = decltype(this->_name);                                   \
+        this->_name = type{std::forward<Arg>(_value)};                        \
+        return *(this->self());                                               \
+    }                                                                         \
+    static_assert(true,                                                       \
+                  "This assert is used to counter the false positive extra "  \
                   "semi-colon warnings")
 
 #define GKO_FACTORY_PARAMETER_VECTOR(_name, ...)                             \
     _name{__VA_ARGS__};                                                      \
                                                                              \
     template <typename... Args>                                              \
-    parameters_type& with_##_name(Args&&... _value)                          \
+    auto with_##_name(Args&&... _value)                                      \
+        ->std::decay_t<decltype(*(this->self()))>&                           \
     {                                                                        \
         using type = decltype(this->_name);                                  \
         this->_name = type{std::forward<Args>(_value)...};                   \
-        return *static_cast<parameters_type*>(this);                         \
+        return *(this->self());                                              \
     }                                                                        \
     static_assert(true,                                                      \
                   "This assert is used to counter the false positive extra " \
@@ -543,32 +546,32 @@ class deferred_factory_parameter {
  * @param _type  pointee type of the parameter, e.g. LinOpFactory
  *
  */
-#define GKO_DEFERRED_FACTORY_PARAMETER(_name)                                \
-    _name{};                                                                 \
-                                                                             \
-private:                                                                     \
-    using _name##_type = typename decltype(_name)::element_type;             \
-                                                                             \
-public:                                                                      \
-    parameters_type& with_##_name(                                           \
-        ::gko::deferred_factory_parameter<_name##_type> factory)             \
-    {                                                                        \
-        this->_name##_generator_ = std::move(factory);                       \
-        this->deferred_factories[#_name] = [](const auto& exec,              \
-                                              auto& params) {                \
-            if (!params._name##_generator_.is_empty()) {                     \
-                params._name = params._name##_generator_.on(exec);           \
-            }                                                                \
-        };                                                                   \
-        return *static_cast<parameters_type*>(this);                         \
-    }                                                                        \
-                                                                             \
-private:                                                                     \
-    ::gko::deferred_factory_parameter<_name##_type> _name##_generator_;      \
-                                                                             \
-public:                                                                      \
-    static_assert(true,                                                      \
-                  "This assert is used to counter the false positive extra " \
+#define GKO_DEFERRED_FACTORY_PARAMETER(_name)                                  \
+    _name{};                                                                   \
+                                                                               \
+private:                                                                       \
+    using _name##_type = typename decltype(_name)::element_type;               \
+                                                                               \
+public:                                                                        \
+    auto with_##_name(::gko::deferred_factory_parameter<_name##_type> factory) \
+        ->std::decay_t<decltype(*(this->self()))>&                             \
+    {                                                                          \
+        this->_name##_generator_ = std::move(factory);                         \
+        this->deferred_factories[#_name] = [](const auto& exec,                \
+                                              auto& params) {                  \
+            if (!params._name##_generator_.is_empty()) {                       \
+                params._name = params._name##_generator_.on(exec);             \
+            }                                                                  \
+        };                                                                     \
+        return *(this->self());                                                \
+    }                                                                          \
+                                                                               \
+private:                                                                       \
+    ::gko::deferred_factory_parameter<_name##_type> _name##_generator_;        \
+                                                                               \
+public:                                                                        \
+    static_assert(true,                                                        \
+                  "This assert is used to counter the false positive extra "   \
                   "semi-colon warnings")
 
 /**
@@ -592,7 +595,8 @@ public:                                                                        \
               typename = std::enable_if_t<::gko::xstd::conjunction<            \
                   std::is_convertible<Args, ::gko::deferred_factory_parameter< \
                                                 _name##_type>>...>::value>>    \
-    parameters_type& with_##_name(Args&&... factories)                         \
+    auto with_##_name(Args&&... factories)                                     \
+        ->std::decay_t<decltype(*(this->self()))>&                             \
     {                                                                          \
         this->_name##_generator_ = {                                           \
             ::gko::deferred_factory_parameter<_name##_type>{                   \
@@ -606,13 +610,14 @@ public:                                                                        \
                 }                                                              \
             }                                                                  \
         };                                                                     \
-        return *static_cast<parameters_type*>(this);                           \
+        return *(this->self());                                                \
     }                                                                          \
     template <typename FactoryType,                                            \
               typename = std::enable_if_t<std::is_convertible<                 \
                   FactoryType,                                                 \
                   ::gko::deferred_factory_parameter<_name##_type>>::value>>    \
-    parameters_type& with_##_name(const std::vector<FactoryType>& factories)   \
+    auto with_##_name(const std::vector<FactoryType>& factories)               \
+        ->std::decay_t<decltype(*(this->self()))>&                             \
     {                                                                          \
         this->_name##_generator_.clear();                                      \
         for (const auto& factory : factories) {                                \
@@ -627,7 +632,7 @@ public:                                                                        \
                 }                                                              \
             }                                                                  \
         };                                                                     \
-        return *static_cast<parameters_type*>(this);                           \
+        return *(this->self());                                                \
     }                                                                          \
                                                                                \
 private:                                                                       \
diff --git a/include/ginkgo/core/solver/solver_base.hpp b/include/ginkgo/core/solver/solver_base.hpp
index 43d0ce7b4d3..f6c1b8833d7 100644
--- a/include/ginkgo/core/solver/solver_base.hpp
+++ b/include/ginkgo/core/solver/solver_base.hpp
@@ -870,9 +870,6 @@ struct enable_iterative_solver_factory_parameters
      */
     std::vector<std::shared_ptr<const stop::CriterionFactory>>
         GKO_DEFERRED_FACTORY_VECTOR_PARAMETER(criteria);
-
-private:
-    GKO_ENABLE_SELF(Parameters);
 };
 
 
@@ -894,9 +891,6 @@ struct enable_preconditioned_iterative_solver_factory_parameters
      */
     std::shared_ptr<const LinOp> GKO_FACTORY_PARAMETER_SCALAR(
         generated_preconditioner, nullptr);
-
-private:
-    GKO_ENABLE_SELF(Parameters);
 };
 
 

From 1e79ccb454035b2f138170b0aa2bf9129957c43f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Gr=C3=BCtzmacher?= <thomas.gruetzmacher@kit.edu>
Date: Fri, 3 Nov 2023 18:47:28 -0400
Subject: [PATCH 510/583] Fix return value in CUDA memory order functions

Remove the return value in CUDA memory order functions that aren't
supposed to return anything and that didn't have a proper return
statement.
---
 cuda/components/memory.cuh                    | 4 ++--
 dev_tools/scripts/generate_cuda_memory_ptx.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/cuda/components/memory.cuh b/cuda/components/memory.cuh
index a1a53284e3f..480e5f94603 100644
--- a/cuda/components/memory.cuh
+++ b/cuda/components/memory.cuh
@@ -81,7 +81,7 @@ __device__ __forceinline__ uint32 convert_generic_ptr_to_smem_ptr(void* ptr)
 }
 
 
-__device__ __forceinline__ uint32 membar_acq_rel()
+__device__ __forceinline__ void membar_acq_rel()
 {
 #if __CUDA_ARCH__ < 700
     asm volatile("membar.gl;" ::: "memory");
@@ -91,7 +91,7 @@ __device__ __forceinline__ uint32 membar_acq_rel()
 }
 
 
-__device__ __forceinline__ uint32 membar_acq_rel_shared()
+__device__ __forceinline__ void membar_acq_rel_shared()
 {
 #if __CUDA_ARCH__ < 700
     asm volatile("membar.cta;" ::: "memory");
diff --git a/dev_tools/scripts/generate_cuda_memory_ptx.py b/dev_tools/scripts/generate_cuda_memory_ptx.py
index 42bef50f9a2..1a4987be847 100755
--- a/dev_tools/scripts/generate_cuda_memory_ptx.py
+++ b/dev_tools/scripts/generate_cuda_memory_ptx.py
@@ -125,7 +125,7 @@ class type_desc:
 }
 
 
-__device__ __forceinline__ uint32 membar_acq_rel()
+__device__ __forceinline__ void membar_acq_rel()
 {
 #if __CUDA_ARCH__ < 700
     asm volatile("membar.gl;" ::: "memory");
@@ -135,7 +135,7 @@ class type_desc:
 }
 
 
-__device__ __forceinline__ uint32 membar_acq_rel_shared()
+__device__ __forceinline__ void membar_acq_rel_shared()
 {
 #if __CUDA_ARCH__ < 700
     asm volatile("membar.cta;" ::: "memory");

From 942f9e177676682c635dcc25c36576f1b239ef07 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Mon, 30 Oct 2023 18:10:14 -0400
Subject: [PATCH 511/583] add reference near-symmetric symbolic LU

---
 .../cuda_hip/factorization/lu_kernels.hpp.inc | 23 +++++
 core/device_hooks/common_kernels.inc.cpp      |  2 +
 core/factorization/lu_kernels.hpp             | 29 +++++-
 core/factorization/symbolic.cpp               | 92 ++++++++++++++++++-
 core/factorization/symbolic.hpp               | 17 ++++
 dpcpp/factorization/lu_kernels.dp.cpp         | 23 +++++
 omp/factorization/lu_kernels.cpp              | 83 +++++++++++++++++
 reference/factorization/lu_kernels.cpp        | 83 +++++++++++++++++
 reference/test/factorization/lu_kernels.cpp   | 13 +++
 9 files changed, 358 insertions(+), 7 deletions(-)

diff --git a/common/cuda_hip/factorization/lu_kernels.hpp.inc b/common/cuda_hip/factorization/lu_kernels.hpp.inc
index f3db34b3631..f81ab185c4e 100644
--- a/common/cuda_hip/factorization/lu_kernels.hpp.inc
+++ b/common/cuda_hip/factorization/lu_kernels.hpp.inc
@@ -177,3 +177,26 @@ void factorize(std::shared_ptr<const DefaultExecutor> exec,
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_LU_FACTORIZE);
+
+
+template <typename IndexType>
+void symbolic_factorize_simple(std::shared_ptr<const DefaultExecutor> exec,
+                               const IndexType* row_ptrs,
+                               const IndexType* col_idxs,
+                               const IndexType* lookup_offsets,
+                               const int64* lookup_descs,
+                               const int32* lookup_storage,
+                               matrix::Csr<float, IndexType>* factors,
+                               IndexType* out_row_nnz) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_LU_SYMMETRIC_FACTORIZE_SIMPLE);
+
+
+template <typename IndexType>
+void symbolic_factorize_simple_finalize(
+    std::shared_ptr<const DefaultExecutor> exec,
+    const matrix::Csr<float, IndexType>* factors,
+    IndexType* out_col_idxs) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
+    GKO_DECLARE_LU_SYMMETRIC_FACTORIZE_SIMPLE_FINALIZE);
diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp
index 1011f4435f5..a88cf4c790d 100644
--- a/core/device_hooks/common_kernels.inc.cpp
+++ b/core/device_hooks/common_kernels.inc.cpp
@@ -858,6 +858,8 @@ namespace lu_factorization {
 
 GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_LU_INITIALIZE);
 GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_LU_FACTORIZE);
+GKO_STUB_INDEX_TYPE(GKO_DECLARE_LU_SYMMETRIC_FACTORIZE_SIMPLE);
+GKO_STUB_INDEX_TYPE(GKO_DECLARE_LU_SYMMETRIC_FACTORIZE_SIMPLE_FINALIZE);
 
 
 }  // namespace lu_factorization
diff --git a/core/factorization/lu_kernels.hpp b/core/factorization/lu_kernels.hpp
index d3e7aea8f08..1aae4a31479 100644
--- a/core/factorization/lu_kernels.hpp
+++ b/core/factorization/lu_kernels.hpp
@@ -66,11 +66,30 @@ namespace kernels {
                    array<int>& tmp_storage)
 
 
-#define GKO_DECLARE_ALL_AS_TEMPLATES                  \
-    template <typename ValueType, typename IndexType> \
-    GKO_DECLARE_LU_INITIALIZE(ValueType, IndexType);  \
-    template <typename ValueType, typename IndexType> \
-    GKO_DECLARE_LU_FACTORIZE(ValueType, IndexType)
+#define GKO_DECLARE_LU_SYMMETRIC_FACTORIZE_SIMPLE(IndexType)                  \
+    void symbolic_factorize_simple(                                           \
+        std::shared_ptr<const DefaultExecutor> exec,                          \
+        const IndexType* row_ptrs, const IndexType* col_idxs,                 \
+        const IndexType* factor_lookup_offsets,                               \
+        const int64* factor_lookup_descs, const int32* factor_lookup_storage, \
+        matrix::Csr<float, IndexType>* factors, IndexType* out_row_nnz)
+
+
+#define GKO_DECLARE_LU_SYMMETRIC_FACTORIZE_SIMPLE_FINALIZE(IndexType) \
+    void symbolic_factorize_simple_finalize(                          \
+        std::shared_ptr<const DefaultExecutor> exec,                  \
+        const matrix::Csr<float, IndexType>* factors, IndexType* col_idxs)
+
+
+#define GKO_DECLARE_ALL_AS_TEMPLATES                      \
+    template <typename ValueType, typename IndexType>     \
+    GKO_DECLARE_LU_INITIALIZE(ValueType, IndexType);      \
+    template <typename ValueType, typename IndexType>     \
+    GKO_DECLARE_LU_FACTORIZE(ValueType, IndexType);       \
+    template <typename IndexType>                         \
+    GKO_DECLARE_LU_SYMMETRIC_FACTORIZE_SIMPLE(IndexType); \
+    template <typename IndexType>                         \
+    GKO_DECLARE_LU_SYMMETRIC_FACTORIZE_SIMPLE_FINALIZE(IndexType)
 
 
 GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(lu_factorization,
diff --git a/core/factorization/symbolic.cpp b/core/factorization/symbolic.cpp
index f4c27ffffe6..f568ad3d603 100644
--- a/core/factorization/symbolic.cpp
+++ b/core/factorization/symbolic.cpp
@@ -33,6 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/factorization/symbolic.hpp"
 
 
+#include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/temporary_clone.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/matrix/dense.hpp>
@@ -44,6 +45,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/factorization/cholesky_kernels.hpp"
 #include "core/factorization/elimination_forest.hpp"
 #include "core/factorization/lu_kernels.hpp"
+#include "core/matrix/csr_kernels.hpp"
+#include "core/matrix/csr_lookup.hpp"
 
 
 namespace gko {
@@ -53,10 +56,14 @@ namespace {
 
 GKO_REGISTER_OPERATION(symbolic_count, cholesky::symbolic_count);
 GKO_REGISTER_OPERATION(symbolic, cholesky::symbolic_factorize);
+GKO_REGISTER_OPERATION(build_lookup_offsets, csr::build_lookup_offsets);
+GKO_REGISTER_OPERATION(build_lookup, csr::build_lookup);
 GKO_REGISTER_OPERATION(prefix_sum_nonnegative,
                        components::prefix_sum_nonnegative);
-GKO_REGISTER_OPERATION(initialize, lu_factorization::initialize);
-GKO_REGISTER_OPERATION(factorize, lu_factorization::factorize);
+GKO_REGISTER_OPERATION(symbolic_factorize_simple,
+                       lu_factorization::symbolic_factorize_simple);
+GKO_REGISTER_OPERATION(symbolic_factorize_simple_finalize,
+                       lu_factorization::symbolic_factorize_simple_finalize);
 GKO_REGISTER_HOST_OPERATION(compute_elim_forest, compute_elim_forest);
 
 
@@ -70,6 +77,7 @@ void symbolic_cholesky(
     std::unique_ptr<elimination_forest<IndexType>>& forest)
 {
     using matrix_type = matrix::Csr<ValueType, IndexType>;
+    GKO_ASSERT_IS_SQUARE_MATRIX(mtx);
     const auto exec = mtx->get_executor();
     const auto host_exec = exec->get_master();
     exec->run(make_compute_elim_forest(mtx, forest));
@@ -104,6 +112,85 @@ void symbolic_cholesky(
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SYMBOLIC_CHOLESKY);
 
 
+template <typename ValueType, typename IndexType>
+void symbolic_lu_near_symm(
+    const matrix::Csr<ValueType, IndexType>* mtx,
+    std::unique_ptr<matrix::Csr<ValueType, IndexType>>& factors)
+{
+    using matrix_type = matrix::Csr<ValueType, IndexType>;
+    using float_matrix_type = matrix::Csr<float, IndexType>;
+    using scalar_type = gko::matrix::Dense<float>;
+    using id_type = gko::matrix::Identity<float>;
+    GKO_ASSERT_IS_SQUARE_MATRIX(mtx);
+    const auto exec = mtx->get_executor();
+    const auto size = mtx->get_size();
+    std::unique_ptr<float_matrix_type> symm_factors;
+    {
+        const auto nnz = mtx->get_num_stored_elements();
+        // turn the input matrix into a symbolic float matrix
+        array<float> dummy_values{exec, nnz};
+        const auto float_mtx = float_matrix_type::create_const(
+            exec, size, dummy_values.as_const_view(),
+            make_const_array_view(exec, nnz, mtx->get_const_col_idxs()),
+            make_const_array_view(exec, size[0] + 1,
+                                  mtx->get_const_row_ptrs()));
+        // compute A + A^T symbolically
+        const auto scalar = gko::initialize<scalar_type>({one<float>()}, exec);
+        const auto symm_mtx = as<float_matrix_type>(float_mtx->transpose());
+        const auto id = id_type::create(exec, size);
+        float_mtx->apply(scalar, id, scalar, symm_mtx);
+        // compute Cholesky factorization
+        std::unique_ptr<elimination_forest<IndexType>> forest;
+        symbolic_cholesky(symm_mtx.get(), true, symm_factors, forest);
+    }
+    // build lookup structure
+    array<IndexType> storage_offsets{exec, size[0] + 1};
+    array<int64> row_descs{exec, size[0]};
+    array<IndexType> diag_idxs{exec, size[0]};
+    const auto allowed_sparsity = gko::matrix::csr::sparsity_type::bitmap |
+                                  gko::matrix::csr::sparsity_type::full |
+                                  gko::matrix::csr::sparsity_type::hash;
+    exec->run(make_build_lookup_offsets(
+        symm_factors->get_const_row_ptrs(), symm_factors->get_const_col_idxs(),
+        size[0], allowed_sparsity, storage_offsets.get_data()));
+    const auto storage_size = static_cast<size_type>(
+        exec->copy_val_to_host(storage_offsets.get_const_data() + size[0]));
+    array<int32> storage{exec, storage_size};
+    exec->run(make_build_lookup(
+        symm_factors->get_const_row_ptrs(), symm_factors->get_const_col_idxs(),
+        size[0], allowed_sparsity, storage_offsets.get_const_data(),
+        row_descs.get_data(), storage.get_data()));
+    // compute "numerical" factorization with 1s and 0s
+    array<IndexType> factor_row_ptrs{exec, size[0] + 1};
+    exec->run(make_symbolic_factorize_simple(
+        mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(),
+        storage_offsets.get_const_data(), row_descs.get_const_data(),
+        storage.get_const_data(), symm_factors.get(),
+        factor_row_ptrs.get_data()));
+    // build row pointers from nnz
+    exec->run(
+        make_prefix_sum_nonnegative(factor_row_ptrs.get_data(), size[0] + 1));
+    const auto factor_nnz = static_cast<size_type>(
+        exec->copy_val_to_host(factor_row_ptrs.get_const_data() + size[0]));
+    // copy over nonzero columns
+    array<IndexType> factor_cols{exec, factor_nnz};
+    exec->run(make_symbolic_factorize_simple_finalize(symm_factors.get(),
+                                                      factor_cols.get_data()));
+    factors =
+        matrix_type::create(exec, size, array<ValueType>{exec, factor_nnz},
+                            std::move(factor_cols), std::move(factor_row_ptrs));
+}
+
+
+#define GKO_DECLARE_SYMBOLIC_LU_NEAR_SYMM(ValueType, IndexType) \
+    void symbolic_lu_near_symm(                                 \
+        const matrix::Csr<ValueType, IndexType>* mtx,           \
+        std::unique_ptr<matrix::Csr<ValueType, IndexType>>& factors)
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_SYMBOLIC_LU_NEAR_SYMM);
+
+
 template <typename ValueType, typename IndexType>
 void symbolic_lu(const matrix::Csr<ValueType, IndexType>* mtx,
                  std::unique_ptr<matrix::Csr<ValueType, IndexType>>& factors)
@@ -189,5 +276,6 @@ void symbolic_lu(const matrix::Csr<ValueType, IndexType>* mtx,
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SYMBOLIC_LU);
 
+
 }  // namespace factorization
 }  // namespace gko
diff --git a/core/factorization/symbolic.hpp b/core/factorization/symbolic.hpp
index 492a0b0ee40..ea2ee23419b 100644
--- a/core/factorization/symbolic.hpp
+++ b/core/factorization/symbolic.hpp
@@ -69,6 +69,23 @@ template <typename ValueType, typename IndexType>
 void symbolic_lu(const matrix::Csr<ValueType, IndexType>* mtx,
                  std::unique_ptr<matrix::Csr<ValueType, IndexType>>& factors);
 
+/**
+ * Computes the symbolic LU factorization of the given, nearly symmetric matrix.
+ *
+ * The implementation uses a symbolic Cholesky factorization of A + A^T and
+ * computes which entries of the resulting matrix are part of the LU
+ * factorization using a kernel similar to the numerical factorization.
+ * It works best if the amount of fill-in for A + A^T is similar to the amount
+ * of fill-in for A.
+ *
+ * @param mtx  the input matrix
+ * @param factors  the output factors stored in a combined pattern
+ */
+template <typename ValueType, typename IndexType>
+void symbolic_lu_near_symm(
+    const matrix::Csr<ValueType, IndexType>* mtx,
+    std::unique_ptr<matrix::Csr<ValueType, IndexType>>& factors);
+
 
 }  // namespace factorization
 }  // namespace gko
diff --git a/dpcpp/factorization/lu_kernels.dp.cpp b/dpcpp/factorization/lu_kernels.dp.cpp
index caa555d6203..d3bd83a7658 100644
--- a/dpcpp/factorization/lu_kernels.dp.cpp
+++ b/dpcpp/factorization/lu_kernels.dp.cpp
@@ -76,6 +76,29 @@ void factorize(std::shared_ptr<const DefaultExecutor> exec,
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_LU_FACTORIZE);
 
 
+template <typename IndexType>
+void symbolic_factorize_simple(std::shared_ptr<const DefaultExecutor> exec,
+                               const IndexType* row_ptrs,
+                               const IndexType* col_idxs,
+                               const IndexType* lookup_offsets,
+                               const int64* lookup_descs,
+                               const int32* lookup_storage,
+                               matrix::Csr<float, IndexType>* factors,
+                               IndexType* out_row_nnz) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_LU_SYMMETRIC_FACTORIZE_SIMPLE);
+
+
+template <typename IndexType>
+void symbolic_factorize_simple_finalize(
+    std::shared_ptr<const DefaultExecutor> exec,
+    const matrix::Csr<float, IndexType>* factors,
+    IndexType* out_col_idxs) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
+    GKO_DECLARE_LU_SYMMETRIC_FACTORIZE_SIMPLE_FINALIZE);
+
+
 }  // namespace lu_factorization
 }  // namespace dpcpp
 }  // namespace kernels
diff --git a/omp/factorization/lu_kernels.cpp b/omp/factorization/lu_kernels.cpp
index 8aa187d3ca0..f130efaaa54 100644
--- a/omp/factorization/lu_kernels.cpp
+++ b/omp/factorization/lu_kernels.cpp
@@ -130,6 +130,89 @@ void factorize(std::shared_ptr<const DefaultExecutor> exec,
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_LU_FACTORIZE);
 
 
+template <typename IndexType>
+void symbolic_factorize_simple(
+    std::shared_ptr<const DefaultExecutor> exec, const IndexType* row_ptrs,
+    const IndexType* col_idxs, const IndexType* lookup_offsets,
+    const int64* lookup_descs, const int32* lookup_storage,
+    matrix::Csr<float, IndexType>* factors, IndexType* out_row_nnz)
+{
+    const auto num_rows = factors->get_size()[0];
+    const auto factor_row_ptrs = factors->get_const_row_ptrs();
+    const auto factor_cols = factors->get_const_col_idxs();
+    const auto factor_vals = factors->get_values();
+    array<IndexType> diag_idx_array{exec, num_rows};
+    const auto diag_idxs = diag_idx_array.get_data();
+    for (size_type row = 0; row < num_rows; row++) {
+        matrix::csr::device_sparsity_lookup<IndexType> lookup{
+            factor_row_ptrs, factor_cols,  lookup_offsets,
+            lookup_storage,  lookup_descs, row};
+        const auto factor_begin = factor_row_ptrs[row];
+        const auto factor_end = factor_row_ptrs[row + 1];
+        const auto mtx_begin = row_ptrs[row];
+        const auto mtx_end = row_ptrs[row + 1];
+        // initialize the row
+        std::fill(factor_vals + factor_begin, factor_vals + factor_end,
+                  zero<float>());
+        for (auto nz = row_ptrs[row]; nz < row_ptrs[row + 1]; nz++) {
+            const auto col = col_idxs[nz];
+            factor_vals[lookup.lookup_unsafe(col) + factor_begin] =
+                one<float>();
+        }
+        diag_idxs[row] = lookup.lookup_unsafe(row) + factor_begin;
+        const auto row_diag = diag_idxs[row];
+        factor_vals[row_diag] = one<float>();
+        // apply factorization
+        for (auto lower_nz = factor_begin; lower_nz < row_diag; lower_nz++) {
+            const auto dep = factor_cols[lower_nz];
+            const auto dep_diag_idx = diag_idxs[dep];
+            const auto dep_diag = factor_vals[dep_diag_idx];
+            const auto dep_end = factor_row_ptrs[dep + 1];
+            if (factor_vals[lower_nz] == one<float>()) {
+                for (auto dep_nz = dep_diag_idx + 1; dep_nz < dep_end;
+                     dep_nz++) {
+                    const auto col = factor_cols[dep_nz];
+                    const auto val = factor_vals[dep_nz];
+                    const auto nz = factor_begin + lookup.lookup_unsafe(col);
+                    if (val == one<float>()) {
+                        factor_vals[nz] = one<float>();
+                    }
+                }
+            }
+        }
+        IndexType row_nnz{};
+        for (auto nz = factor_begin; nz < factor_end; nz++) {
+            row_nnz += factor_vals[nz] == one<float>() ? 1 : 0;
+        }
+        out_row_nnz[row] = row_nnz;
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_LU_SYMMETRIC_FACTORIZE_SIMPLE);
+
+
+template <typename IndexType>
+void symbolic_factorize_simple_finalize(
+    std::shared_ptr<const DefaultExecutor> exec,
+    const matrix::Csr<float, IndexType>* factors, IndexType* out_col_idxs)
+{
+    const auto col_idxs = factors->get_const_col_idxs();
+    const auto vals = factors->get_const_values();
+    size_type output_idx{};
+    // copy all nonzero entries from the symmetric factor to the unsymmetric
+    // factor
+    for (size_type i = 0; i < factors->get_num_stored_elements(); i++) {
+        if (vals[i] == one<float>()) {
+            out_col_idxs[output_idx] = col_idxs[i];
+            ++output_idx;
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
+    GKO_DECLARE_LU_SYMMETRIC_FACTORIZE_SIMPLE_FINALIZE);
+
+
 }  // namespace lu_factorization
 }  // namespace omp
 }  // namespace kernels
diff --git a/reference/factorization/lu_kernels.cpp b/reference/factorization/lu_kernels.cpp
index 8d7b186c924..25a56496221 100644
--- a/reference/factorization/lu_kernels.cpp
+++ b/reference/factorization/lu_kernels.cpp
@@ -128,6 +128,89 @@ void factorize(std::shared_ptr<const DefaultExecutor> exec,
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_LU_FACTORIZE);
 
 
+template <typename IndexType>
+void symbolic_factorize_simple(
+    std::shared_ptr<const DefaultExecutor> exec, const IndexType* row_ptrs,
+    const IndexType* col_idxs, const IndexType* lookup_offsets,
+    const int64* lookup_descs, const int32* lookup_storage,
+    matrix::Csr<float, IndexType>* factors, IndexType* out_row_nnz)
+{
+    const auto num_rows = factors->get_size()[0];
+    const auto factor_row_ptrs = factors->get_const_row_ptrs();
+    const auto factor_cols = factors->get_const_col_idxs();
+    const auto factor_vals = factors->get_values();
+    array<IndexType> diag_idx_array{exec, num_rows};
+    const auto diag_idxs = diag_idx_array.get_data();
+    for (size_type row = 0; row < num_rows; row++) {
+        matrix::csr::device_sparsity_lookup<IndexType> lookup{
+            factor_row_ptrs, factor_cols,  lookup_offsets,
+            lookup_storage,  lookup_descs, row};
+        const auto factor_begin = factor_row_ptrs[row];
+        const auto factor_end = factor_row_ptrs[row + 1];
+        const auto mtx_begin = row_ptrs[row];
+        const auto mtx_end = row_ptrs[row + 1];
+        // initialize the row
+        std::fill(factor_vals + factor_begin, factor_vals + factor_end,
+                  zero<float>());
+        for (auto nz = row_ptrs[row]; nz < row_ptrs[row + 1]; nz++) {
+            const auto col = col_idxs[nz];
+            factor_vals[lookup.lookup_unsafe(col) + factor_begin] =
+                one<float>();
+        }
+        diag_idxs[row] = lookup.lookup_unsafe(row) + factor_begin;
+        const auto row_diag = diag_idxs[row];
+        factor_vals[row_diag] = one<float>();
+        // apply factorization
+        for (auto lower_nz = factor_begin; lower_nz < row_diag; lower_nz++) {
+            const auto dep = factor_cols[lower_nz];
+            const auto dep_diag_idx = diag_idxs[dep];
+            const auto dep_diag = factor_vals[dep_diag_idx];
+            const auto dep_end = factor_row_ptrs[dep + 1];
+            if (factor_vals[lower_nz] == one<float>()) {
+                for (auto dep_nz = dep_diag_idx + 1; dep_nz < dep_end;
+                     dep_nz++) {
+                    const auto col = factor_cols[dep_nz];
+                    const auto val = factor_vals[dep_nz];
+                    const auto nz = factor_begin + lookup.lookup_unsafe(col);
+                    if (val == one<float>()) {
+                        factor_vals[nz] = one<float>();
+                    }
+                }
+            }
+        }
+        IndexType row_nnz{};
+        for (auto nz = factor_begin; nz < factor_end; nz++) {
+            row_nnz += factor_vals[nz] == one<float>() ? 1 : 0;
+        }
+        out_row_nnz[row] = row_nnz;
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_LU_SYMMETRIC_FACTORIZE_SIMPLE);
+
+
+template <typename IndexType>
+void symbolic_factorize_simple_finalize(
+    std::shared_ptr<const DefaultExecutor> exec,
+    const matrix::Csr<float, IndexType>* factors, IndexType* out_col_idxs)
+{
+    const auto col_idxs = factors->get_const_col_idxs();
+    const auto vals = factors->get_const_values();
+    size_type output_idx{};
+    // copy all nonzero entries from the symmetric factor to the unsymmetric
+    // factor
+    for (size_type i = 0; i < factors->get_num_stored_elements(); i++) {
+        if (vals[i] == one<float>()) {
+            out_col_idxs[output_idx] = col_idxs[i];
+            ++output_idx;
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
+    GKO_DECLARE_LU_SYMMETRIC_FACTORIZE_SIMPLE_FINALIZE);
+
+
 }  // namespace lu_factorization
 }  // namespace reference
 }  // namespace kernels
diff --git a/reference/test/factorization/lu_kernels.cpp b/reference/test/factorization/lu_kernels.cpp
index 5cde9f132d3..cdb457c6724 100644
--- a/reference/test/factorization/lu_kernels.cpp
+++ b/reference/test/factorization/lu_kernels.cpp
@@ -164,6 +164,19 @@ TYPED_TEST(Lu, SymbolicLUWorks)
 }
 
 
+TYPED_TEST(Lu, SymbolicLUNearSymmWorks)
+{
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    this->forall_matrices([this] {
+        std::unique_ptr<gko::matrix::Csr<value_type, index_type>> lu;
+        gko::factorization::symbolic_lu_near_symm(this->mtx.get(), lu);
+
+        GKO_ASSERT_MTX_EQ_SPARSITY(lu, this->mtx_lu);
+    });
+}
+
+
 TYPED_TEST(Lu, SymbolicLUWorksWithMissingDiagonal)
 {
     using matrix_type = typename TestFixture::matrix_type;

From 42df230237179ee71f739b602c43941ee584d827 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Mon, 30 Oct 2023 18:51:26 -0400
Subject: [PATCH 512/583] add OpenMP tests

---
 test/factorization/lu_kernels.cpp | 43 +++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/test/factorization/lu_kernels.cpp b/test/factorization/lu_kernels.cpp
index 509ed0415f1..9d226419c95 100644
--- a/test/factorization/lu_kernels.cpp
+++ b/test/factorization/lu_kernels.cpp
@@ -53,6 +53,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/components/prefix_sum_kernels.hpp"
 #include "core/factorization/cholesky_kernels.hpp"
 #include "core/factorization/elimination_forest.hpp"
+#include "core/factorization/symbolic.hpp"
 #include "core/matrix/csr_kernels.hpp"
 #include "core/matrix/csr_lookup.hpp"
 #include "core/test/utils.hpp"
@@ -238,6 +239,48 @@ TYPED_TEST(Lu, KernelFactorizeIsEquivalentToRef)
 }
 
 
+TYPED_TEST(Lu, SymbolicCholeskyWorks)
+{
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    this->forall_matrices([this] {
+        std::unique_ptr<gko::matrix::Csr<value_type, index_type>> dlu;
+        std::unique_ptr<gko::factorization::elimination_forest<index_type>>
+            forest;
+        gko::factorization::symbolic_cholesky(this->dmtx.get(), true, dlu,
+                                              forest);
+
+        GKO_ASSERT_MTX_EQ_SPARSITY(dlu, this->dmtx_lu);
+    });
+}
+
+
+TYPED_TEST(Lu, SymbolicLUWorks)
+{
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    this->forall_matrices([this] {
+        std::unique_ptr<gko::matrix::Csr<value_type, index_type>> dlu;
+        gko::factorization::symbolic_lu(this->dmtx.get(), dlu);
+
+        GKO_ASSERT_MTX_EQ_SPARSITY(dlu, this->dmtx_lu);
+    });
+}
+
+
+TYPED_TEST(Lu, SymbolicLUNearSymmWorks)
+{
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    this->forall_matrices([this] {
+        std::unique_ptr<gko::matrix::Csr<value_type, index_type>> dlu;
+        gko::factorization::symbolic_lu_near_symm(this->dmtx.get(), dlu);
+
+        GKO_ASSERT_MTX_EQ_SPARSITY(dlu, this->dmtx_lu);
+    });
+}
+
+
 TYPED_TEST(Lu, GenerateSymmWithUnknownSparsityIsEquivalentToRef)
 {
     using value_type = typename TestFixture::value_type;

From 540c76251bd0ec8be0919e96e06adc2d69eab654 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Tue, 31 Oct 2023 16:17:21 -0400
Subject: [PATCH 513/583] add CUDA/HIP near-symmetric symbolic LU

---
 .../cuda_hip/factorization/lu_kernels.hpp.inc | 138 ++++++++++++++++--
 cuda/factorization/lu_kernels.cu              |   5 +
 hip/factorization/lu_kernels.hip.cpp          |   5 +
 3 files changed, 137 insertions(+), 11 deletions(-)

diff --git a/common/cuda_hip/factorization/lu_kernels.hpp.inc b/common/cuda_hip/factorization/lu_kernels.hpp.inc
index f81ab185c4e..d30e447ac55 100644
--- a/common/cuda_hip/factorization/lu_kernels.hpp.inc
+++ b/common/cuda_hip/factorization/lu_kernels.hpp.inc
@@ -106,7 +106,10 @@ __global__ __launch_bounds__(default_block_size) void factorize(
     // for each lower triangular entry: eliminate with corresponding row
     for (auto lower_nz = row_begin; lower_nz < row_diag; lower_nz++) {
         const auto dep = cols[lower_nz];
-        auto val = vals[lower_nz];
+        // we can load the value before synchronizing because the following
+        // updates only go past the diagonal of the dependency row, i.e. at
+        // least column dep + 1
+        const auto val = vals[lower_nz];
         const auto diag_idx = diag_idxs[dep];
         const auto dep_end = row_ptrs[dep + 1];
         scheduler.wait(dep);
@@ -128,6 +131,88 @@ __global__ __launch_bounds__(default_block_size) void factorize(
 }
 
 
+template <typename ValueType, typename IndexType>
+__global__ __launch_bounds__(default_block_size) void symbolic_factorize_simple(
+    const IndexType* __restrict__ mtx_row_ptrs,
+    const IndexType* __restrict__ mtx_cols,
+    const IndexType* __restrict__ factor_row_ptrs,
+    const IndexType* __restrict__ factor_cols,
+    const IndexType* __restrict__ storage_offsets,
+    const int32* __restrict__ storage, const int64* __restrict__ row_descs,
+    IndexType* __restrict__ diag_idxs, ValueType* __restrict__ factor_vals,
+    IndexType* __restrict__ out_row_nnz, syncfree_storage dep_storage,
+    size_type num_rows)
+{
+    using scheduler_t =
+        syncfree_scheduler<default_block_size, config::warp_size, IndexType>;
+    __shared__ typename scheduler_t::shared_storage sh_dep_storage;
+    scheduler_t scheduler(dep_storage, sh_dep_storage);
+    const auto row = scheduler.get_work_id();
+    if (row >= num_rows) {
+        return;
+    }
+    const auto warp =
+        group::tiled_partition<config::warp_size>(group::this_thread_block());
+    const auto lane = warp.thread_rank();
+    const auto factor_begin = factor_row_ptrs[row];
+    const auto factor_end = factor_row_ptrs[row + 1];
+    const auto mtx_begin = mtx_row_ptrs[row];
+    const auto mtx_end = mtx_row_ptrs[row + 1];
+    gko::matrix::csr::device_sparsity_lookup<IndexType> lookup{
+        factor_row_ptrs, factor_cols, storage_offsets,
+        storage,         row_descs,   static_cast<size_type>(row)};
+    const auto row_diag = lookup.lookup_unsafe(row) + factor_begin;
+    // fill with zeros first
+    for (auto nz = factor_begin + lane; nz < factor_end;
+         nz += config::warp_size) {
+        factor_vals[nz] = zero<float>();
+    }
+    warp.sync();
+    // then fill in the system matrix
+    for (auto nz = mtx_begin + lane; nz < mtx_end; nz += config::warp_size) {
+        const auto col = mtx_cols[nz];
+        factor_vals[lookup.lookup_unsafe(col) + factor_begin] = one<float>();
+    }
+    // finally set diagonal and store diagonal index
+    if (lane == 0) {
+        diag_idxs[row] = row_diag;
+        factor_vals[row_diag] = one<float>();
+    }
+    warp.sync();
+    // for each lower triangular entry: eliminate with corresponding row
+    for (auto lower_nz = factor_begin; lower_nz < row_diag; lower_nz++) {
+        const auto dep = factor_cols[lower_nz];
+        const auto dep_end = factor_row_ptrs[dep + 1];
+        scheduler.wait(dep);
+        // read the diag entry after we are sure it was written.
+        const auto diag_idx = diag_idxs[dep];
+        if (factor_vals[lower_nz] == one<float>()) {
+            // eliminate with upper triangle/entries past the diagonal
+            for (auto upper_nz = diag_idx + 1 + lane; upper_nz < dep_end;
+                 upper_nz += config::warp_size) {
+                const auto upper_col = factor_cols[upper_nz];
+                const auto upper_val = factor_vals[upper_nz];
+                const auto output_pos =
+                    lookup.lookup_unsafe(upper_col) + factor_begin;
+                if (upper_val == one<float>()) {
+                    factor_vals[output_pos] = one<float>();
+                }
+            }
+        }
+    }
+    scheduler.mark_ready();
+    IndexType row_nnz{};
+    for (auto nz = factor_begin + lane; nz < factor_end;
+         nz += config::warp_size) {
+        row_nnz += factor_vals[nz] == one<float>() ? 1 : 0;
+    }
+    row_nnz = reduce(warp, row_nnz, thrust::plus<IndexType>{});
+    if (lane == 0) {
+        out_row_nnz[row] = row_nnz;
+    }
+}
+
+
 }  // namespace kernel
 
 
@@ -180,23 +265,54 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_LU_FACTORIZE);
 
 
 template <typename IndexType>
-void symbolic_factorize_simple(std::shared_ptr<const DefaultExecutor> exec,
-                               const IndexType* row_ptrs,
-                               const IndexType* col_idxs,
-                               const IndexType* lookup_offsets,
-                               const int64* lookup_descs,
-                               const int32* lookup_storage,
-                               matrix::Csr<float, IndexType>* factors,
-                               IndexType* out_row_nnz) GKO_NOT_IMPLEMENTED;
+void symbolic_factorize_simple(
+    std::shared_ptr<const DefaultExecutor> exec, const IndexType* row_ptrs,
+    const IndexType* col_idxs, const IndexType* lookup_offsets,
+    const int64* lookup_descs, const int32* lookup_storage,
+    matrix::Csr<float, IndexType>* factors, IndexType* out_row_nnz)
+{
+    const auto num_rows = factors->get_size()[0];
+    const auto factor_row_ptrs = factors->get_const_row_ptrs();
+    const auto factor_cols = factors->get_const_col_idxs();
+    const auto factor_vals = factors->get_values();
+    array<IndexType> diag_idx_array{exec, num_rows};
+    array<int> tmp_storage{exec};
+    const auto diag_idxs = diag_idx_array.get_data();
+    if (num_rows > 0) {
+        syncfree_storage dep_storage(exec, tmp_storage, num_rows);
+        const auto num_blocks =
+            ceildiv(num_rows, default_block_size / config::warp_size);
+        kernel::symbolic_factorize_simple<<<num_blocks, default_block_size, 0,
+                                            exec->get_stream()>>>(
+            row_ptrs, col_idxs, factor_row_ptrs, factor_cols, lookup_offsets,
+            lookup_storage, lookup_descs, diag_idxs, factor_vals, out_row_nnz,
+            dep_storage, num_rows);
+    }
+}
 
 GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_LU_SYMMETRIC_FACTORIZE_SIMPLE);
 
 
+struct eq_one_functor {
+    template <typename ValueType>
+    __device__ __forceinline__ bool operator()(ValueType val) const
+    {
+        return val == one<float>();
+    }
+};
+
+
 template <typename IndexType>
 void symbolic_factorize_simple_finalize(
     std::shared_ptr<const DefaultExecutor> exec,
-    const matrix::Csr<float, IndexType>* factors,
-    IndexType* out_col_idxs) GKO_NOT_IMPLEMENTED;
+    const matrix::Csr<float, IndexType>* factors, IndexType* out_col_idxs)
+{
+    const auto col_idxs = factors->get_const_col_idxs();
+    const auto vals = factors->get_const_values();
+    thrust::copy_if(thrust_policy(exec), col_idxs,
+                    col_idxs + factors->get_num_stored_elements(), vals,
+                    out_col_idxs, eq_one_functor{});
+}
 
 GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
     GKO_DECLARE_LU_SYMMETRIC_FACTORIZE_SIMPLE_FINALIZE);
diff --git a/cuda/factorization/lu_kernels.cu b/cuda/factorization/lu_kernels.cu
index 84656eb2510..3426cd2c33e 100644
--- a/cuda/factorization/lu_kernels.cu
+++ b/cuda/factorization/lu_kernels.cu
@@ -37,13 +37,18 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <memory>
 
 
+#include <thrust/copy.h>
+
+
 #include <ginkgo/core/matrix/csr.hpp>
 
 
 #include "core/base/allocator.hpp"
 #include "core/matrix/csr_lookup.hpp"
+#include "cuda/base/thrust.cuh"
 #include "cuda/base/types.hpp"
 #include "cuda/components/cooperative_groups.cuh"
+#include "cuda/components/reduction.cuh"
 #include "cuda/components/syncfree.cuh"
 #include "cuda/components/thread_ids.cuh"
 
diff --git a/hip/factorization/lu_kernels.hip.cpp b/hip/factorization/lu_kernels.hip.cpp
index 507e57bd430..348bef71b58 100644
--- a/hip/factorization/lu_kernels.hip.cpp
+++ b/hip/factorization/lu_kernels.hip.cpp
@@ -37,13 +37,18 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <memory>
 
 
+#include <thrust/copy.h>
+
+
 #include <ginkgo/core/matrix/csr.hpp>
 
 
 #include "core/base/allocator.hpp"
 #include "core/matrix/csr_lookup.hpp"
+#include "hip/base/thrust.hip.hpp"
 #include "hip/base/types.hip.hpp"
 #include "hip/components/cooperative_groups.hip.hpp"
+#include "hip/components/reduction.hip.hpp"
 #include "hip/components/syncfree.hip.hpp"
 #include "hip/components/thread_ids.hip.hpp"
 

From 502d6d6fafa6b23687ed537099ae1110a866522f Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Tue, 31 Oct 2023 16:47:53 -0400
Subject: [PATCH 514/583] add near-symmetric symbolic LU to interface

---
 benchmark/solver/solver_common.hpp          | 25 +++++++++----
 benchmark/sparse_blas/operations.cpp        | 35 ++++++++++++++++++
 benchmark/sparse_blas/sparse_blas.cpp       |  2 +-
 core/factorization/lu.cpp                   | 17 +++++++--
 include/ginkgo/core/factorization/lu.hpp    | 30 ++++++++++++---
 reference/test/factorization/lu_kernels.cpp | 39 +++++++++++++++++---
 reference/test/solver/direct.cpp            |  4 +-
 test/factorization/lu_kernels.cpp           | 41 ++++++++++++++++++---
 test/solver/direct.cpp                      | 26 +++++++------
 9 files changed, 179 insertions(+), 40 deletions(-)

diff --git a/benchmark/solver/solver_common.hpp b/benchmark/solver/solver_common.hpp
index b19d00cd519..b64a1a058b0 100644
--- a/benchmark/solver/solver_common.hpp
+++ b/benchmark/solver/solver_common.hpp
@@ -62,13 +62,13 @@ DEFINE_bool(
     rel_residual, false,
     "Use relative residual instead of residual reduction stopping criterion");
 
-DEFINE_string(
-    solvers, "cg",
-    "A comma-separated list of solvers to run. "
-    "Supported values are: bicgstab, bicg, cb_gmres_keep, "
-    "cb_gmres_reduce1, cb_gmres_reduce2, cb_gmres_integer, "
-    "cb_gmres_ireduce1, cb_gmres_ireduce2, cg, cgs, fcg, gmres, idr, "
-    "lower_trs, upper_trs, spd_direct, symm_direct, direct, overhead");
+DEFINE_string(solvers, "cg",
+              "A comma-separated list of solvers to run. "
+              "Supported values are: bicgstab, bicg, cb_gmres_keep, "
+              "cb_gmres_reduce1, cb_gmres_reduce2, cb_gmres_integer, "
+              "cb_gmres_ireduce1, cb_gmres_ireduce2, cg, cgs, fcg, gmres, idr, "
+              "lower_trs, upper_trs, spd_direct, symm_direct, "
+              "near_symm_direct, direct, overhead");
 
 DEFINE_uint32(
     nrhs, 1,
@@ -246,7 +246,16 @@ std::unique_ptr<gko::LinOpFactory> generate_solver(
         return gko::experimental::solver::Direct<etype, itype>::build()
             .with_factorization(
                 gko::experimental::factorization::Lu<etype, itype>::build()
-                    .with_symmetric_sparsity(true))
+                    .with_symbolic_algorithm(gko::experimental::factorization::
+                                                 symbolic_algorithm::symmetric))
+            .on(exec);
+    } else if (description == "near_symm_direct") {
+        return gko::experimental::solver::Direct<etype, itype>::build()
+            .with_factorization(
+                gko::experimental::factorization::Lu<etype, itype>::build()
+                    .with_symbolic_algorithm(
+                        gko::experimental::factorization::symbolic_algorithm::
+                            near_symmetric))
             .on(exec);
     } else if (description == "direct") {
         return gko::experimental::solver::Direct<etype, itype>::build()
diff --git a/benchmark/sparse_blas/operations.cpp b/benchmark/sparse_blas/operations.cpp
index 8eb166f451c..f8d93f6a2c0 100644
--- a/benchmark/sparse_blas/operations.cpp
+++ b/benchmark/sparse_blas/operations.cpp
@@ -642,6 +642,37 @@ class SymbolicLuOperation : public BenchmarkOperation {
 };
 
 
+class SymbolicLuNearSymmOperation : public BenchmarkOperation {
+public:
+    explicit SymbolicLuNearSymmOperation(const Mtx* mtx) : mtx_{mtx}, result_{}
+    {}
+
+    std::pair<bool, double> validate() const override
+    {
+        return std::make_pair(
+            validate_symbolic_factorization(mtx_, result_.get()), 0.0);
+    }
+
+    gko::size_type get_flops() const override { return 0; }
+
+    gko::size_type get_memory() const override { return 0; }
+
+    void run() override
+    {
+        gko::factorization::symbolic_lu_near_symm(mtx_, result_);
+    }
+
+    void write_stats(json& object) override
+    {
+        object["factor_nonzeros"] = result_->get_num_stored_elements();
+    }
+
+private:
+    const Mtx* mtx_;
+    std::unique_ptr<Mtx> result_;
+};
+
+
 class SymbolicCholeskyOperation : public BenchmarkOperation {
 public:
     explicit SymbolicCholeskyOperation(const Mtx* mtx, bool symmetric)
@@ -817,6 +848,10 @@ const std::map<std::string,
          [](const Mtx* mtx) {
              return std::make_unique<SymbolicLuOperation>(mtx);
          }},
+        {"symbolic_lu_near_symm",
+         [](const Mtx* mtx) {
+             return std::make_unique<SymbolicLuNearSymmOperation>(mtx);
+         }},
         {"symbolic_cholesky",
          [](const Mtx* mtx) {
              return std::make_unique<SymbolicCholeskyOperation>(mtx, false);
diff --git a/benchmark/sparse_blas/sparse_blas.cpp b/benchmark/sparse_blas/sparse_blas.cpp
index 5385de4264c..d1dc67f8d2d 100644
--- a/benchmark/sparse_blas/sparse_blas.cpp
+++ b/benchmark/sparse_blas/sparse_blas.cpp
@@ -61,7 +61,7 @@ using mat_data = gko::matrix_data<etype, itype>;
 const char* operations_string =
     "Comma-separated list of operations to be benchmarked. Can be "
     "spgemm, spgeam, transpose, sort, is_sorted, generate_lookup, "
-    "lookup, symbolic_lu, symbolic_cholesky, "
+    "lookup, symbolic_lu, symbolic_lu_near_symm, symbolic_cholesky, "
     "symbolic_cholesky_symmetric, reorder_rcm, "
 #if GKO_HAVE_METIS
     "reorder_nd, "
diff --git a/core/factorization/lu.cpp b/core/factorization/lu.cpp
index 47f2711c4c4..6b9e0435187 100644
--- a/core/factorization/lu.cpp
+++ b/core/factorization/lu.cpp
@@ -60,6 +60,8 @@ GKO_REGISTER_OPERATION(factorize, lu_factorization::factorize);
 GKO_REGISTER_HOST_OPERATION(symbolic_cholesky,
                             gko::factorization::symbolic_cholesky);
 GKO_REGISTER_HOST_OPERATION(symbolic_lu, gko::factorization::symbolic_lu);
+GKO_REGISTER_HOST_OPERATION(symbolic_lu_near_symm,
+                            gko::factorization::symbolic_lu_near_symm);
 
 
 }  // namespace
@@ -95,12 +97,21 @@ std::unique_ptr<LinOp> Lu<ValueType, IndexType>::generate_impl(
     const auto num_rows = mtx->get_size()[0];
     std::unique_ptr<matrix_type> factors;
     if (!parameters_.symbolic_factorization) {
-        if (parameters_.symmetric_sparsity) {
+        switch (parameters_.symbolic_algorithm) {
+        case symbolic_algorithm::general:
+            exec->run(make_symbolic_lu(mtx.get(), factors));
+            break;
+        case symbolic_algorithm::near_symmetric:
+            exec->run(make_symbolic_lu_near_symm(mtx.get(), factors));
+            break;
+        case symbolic_algorithm::symmetric: {
             std::unique_ptr<gko::factorization::elimination_forest<IndexType>>
                 forest;
             exec->run(make_symbolic_cholesky(mtx.get(), true, factors, forest));
-        } else {
-            exec->run(make_symbolic_lu(mtx.get(), factors));
+            break;
+        }
+        default:
+            GKO_INVALID_STATE("Invalid symbolic factorization algorithm");
         }
     } else {
         const auto& symbolic = parameters_.symbolic_factorization;
diff --git a/include/ginkgo/core/factorization/lu.hpp b/include/ginkgo/core/factorization/lu.hpp
index 675dfd10a8d..016c89182f1 100644
--- a/include/ginkgo/core/factorization/lu.hpp
+++ b/include/ginkgo/core/factorization/lu.hpp
@@ -46,6 +46,24 @@ namespace experimental {
 namespace factorization {
 
 
+enum class symbolic_algorithm {
+    /** An LU factorization algorithm that works on all matrices. */
+    general,
+    /**
+     * An LU factorization algorithm that works best on matrices with an almost
+     * symmetric sparsity pattern. It is correct for general matrices, but may
+     * use excessive amounts of memory and time.
+     */
+    near_symmetric,
+    /**
+     * An LU factorization algorithm that works only on matrices with a
+     * symmetric sparsity pattern. Running it on a matrix with a non-symmetric
+     * sparsity pattern will likely lead to the application crashing.
+     */
+    symmetric
+};
+
+
 /**
  * Computes an LU factorization of a sparse matrix. This LinOpFactory returns a
  * Factorization storing the L and U factors for the provided system matrix in
@@ -85,12 +103,14 @@ class Lu
             GKO_FACTORY_PARAMETER_SCALAR(symbolic_factorization, nullptr);
 
         /**
-         * If the system matrix has a symmetric sparsity pattern, set this flag
-         * to `true` to use a symbolic Cholesky factorization instead of a
-         * symbolic LU factorization to determine the sparsity pattern of L & U.
-         * This will most likely significantly reduce the generation runtime.
+         * If the symbolic factorization of the matrix is not provided to the
+         * factory, this parameter controls which algorithm will be used to
+         * compute it.
+         * @note Only use symbolic_factorization_algorithm::symmetric if you are
+         *       sure your matrix has a symmetric sparsity pattern!
          */
-        bool GKO_FACTORY_PARAMETER_SCALAR(symmetric_sparsity, false);
+        symbolic_algorithm GKO_FACTORY_PARAMETER_SCALAR(
+            symbolic_algorithm, symbolic_algorithm::general);
 
         /**
          * The `system_matrix`, which will be given to this factory, must be
diff --git a/reference/test/factorization/lu_kernels.cpp b/reference/test/factorization/lu_kernels.cpp
index cdb457c6724..a88223b15c1 100644
--- a/reference/test/factorization/lu_kernels.cpp
+++ b/reference/test/factorization/lu_kernels.cpp
@@ -265,7 +265,8 @@ TYPED_TEST(Lu, FactorizeSymmetricWorks)
             auto factory =
                 gko::experimental::factorization::Lu<value_type,
                                                      index_type>::build()
-                    .with_symmetric_sparsity(true)
+                    .with_symbolic_algorithm(gko::experimental::factorization::
+                                                 symbolic_algorithm::symmetric)
                     .on(this->ref);
 
             auto lu = factory->generate(this->mtx);
@@ -288,10 +289,38 @@ TYPED_TEST(Lu, FactorizeNonsymmetricWorks)
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
     this->forall_matrices([this] {
-        auto factory = gko::experimental::factorization::Lu<value_type,
-                                                            index_type>::build()
-                           .with_symmetric_sparsity(false)
-                           .on(this->ref);
+        auto factory =
+            gko::experimental::factorization::Lu<value_type,
+                                                 index_type>::build()
+                .with_symbolic_algorithm(gko::experimental::factorization::
+                                             symbolic_algorithm::general)
+                .on(this->ref);
+
+        auto lu = factory->generate(this->mtx);
+
+        GKO_ASSERT_MTX_EQ_SPARSITY(lu->get_combined(), this->mtx_lu);
+        GKO_ASSERT_MTX_NEAR(lu->get_combined(), this->mtx_lu,
+                            15 * r<value_type>::value);
+        ASSERT_EQ(lu->get_storage_type(),
+                  gko::experimental::factorization::storage_type::combined_lu);
+        ASSERT_EQ(lu->get_lower_factor(), nullptr);
+        ASSERT_EQ(lu->get_upper_factor(), nullptr);
+        ASSERT_EQ(lu->get_diagonal(), nullptr);
+    });
+}
+
+
+TYPED_TEST(Lu, FactorizeNearSymmetricWorks)
+{
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    this->forall_matrices([this] {
+        auto factory =
+            gko::experimental::factorization::Lu<value_type,
+                                                 index_type>::build()
+                .with_symbolic_algorithm(gko::experimental::factorization::
+                                             symbolic_algorithm::near_symmetric)
+                .on(this->ref);
 
         auto lu = factory->generate(this->mtx);
 
diff --git a/reference/test/solver/direct.cpp b/reference/test/solver/direct.cpp
index f69846b548d..4ef30c13ffb 100644
--- a/reference/test/solver/direct.cpp
+++ b/reference/test/solver/direct.cpp
@@ -77,7 +77,9 @@ class Direct : public ::testing::Test {
                 .with_factorization(
                     gko::experimental::factorization::Lu<value_type,
                                                          index_type>::build()
-                        .with_symmetric_sparsity(true))
+                        .with_symbolic_algorithm(
+                            gko::experimental::factorization::
+                                symbolic_algorithm::symmetric))
                 .on(exec);
         solver = factory->generate(mtx);
         std::normal_distribution<gko::remove_complex<value_type>> dist(0, 1);
diff --git a/test/factorization/lu_kernels.cpp b/test/factorization/lu_kernels.cpp
index 9d226419c95..5da5fa4647d 100644
--- a/test/factorization/lu_kernels.cpp
+++ b/test/factorization/lu_kernels.cpp
@@ -286,14 +286,45 @@ TYPED_TEST(Lu, GenerateSymmWithUnknownSparsityIsEquivalentToRef)
     using value_type = typename TestFixture::value_type;
     using index_type = typename TestFixture::index_type;
     this->forall_matrices([this] {
-        auto factory = gko::experimental::factorization::Lu<value_type,
-                                                            index_type>::build()
-                           .with_symmetric_sparsity(true)
-                           .on(this->ref);
+        auto factory =
+            gko::experimental::factorization::Lu<value_type,
+                                                 index_type>::build()
+                .with_symbolic_algorithm(gko::experimental::factorization::
+                                             symbolic_algorithm::symmetric)
+                .on(this->ref);
+        auto dfactory =
+            gko::experimental::factorization::Lu<value_type,
+                                                 index_type>::build()
+                .with_symbolic_algorithm(gko::experimental::factorization::
+                                             symbolic_algorithm::symmetric)
+                .on(this->exec);
+
+        auto lu = factory->generate(this->mtx);
+        auto dlu = dfactory->generate(this->dmtx);
+
+        GKO_ASSERT_MTX_EQ_SPARSITY(lu->get_combined(), dlu->get_combined());
+        GKO_ASSERT_MTX_NEAR(lu->get_combined(), dlu->get_combined(),
+                            r<value_type>::value);
+    });
+}
+
+
+TYPED_TEST(Lu, GenerateNearSymmWithUnknownSparsityIsEquivalentToRef)
+{
+    using value_type = typename TestFixture::value_type;
+    using index_type = typename TestFixture::index_type;
+    this->forall_matrices([this] {
+        auto factory =
+            gko::experimental::factorization::Lu<value_type,
+                                                 index_type>::build()
+                .with_symbolic_algorithm(gko::experimental::factorization::
+                                             symbolic_algorithm::near_symmetric)
+                .on(this->ref);
         auto dfactory =
             gko::experimental::factorization::Lu<value_type,
                                                  index_type>::build()
-                .with_symmetric_sparsity(true)
+                .with_symbolic_algorithm(gko::experimental::factorization::
+                                             symbolic_algorithm::near_symmetric)
                 .on(this->exec);
 
         auto lu = factory->generate(this->mtx);
diff --git a/test/solver/direct.cpp b/test/solver/direct.cpp
index 31b7bd976ce..32a1207d85a 100644
--- a/test/solver/direct.cpp
+++ b/test/solver/direct.cpp
@@ -93,22 +93,24 @@ class Direct : public CommonTestFixture {
         mtx = gko::read<matrix_type>(s_mtx, ref);
         dmtx = gko::clone(exec, mtx);
         const auto num_rows = mtx->get_size()[0];
-        factory =
-            solver_type::build()
-                .with_factorization(
-                    factorization_type::build().with_symmetric_sparsity(true))
-                .with_num_rhs(static_cast<gko::size_type>(nrhs))
-                .on(ref);
+        factory = solver_type::build()
+                      .with_factorization(
+                          factorization_type::build().with_symbolic_algorithm(
+                              gko::experimental::factorization::
+                                  symbolic_algorithm::symmetric))
+                      .with_num_rhs(static_cast<gko::size_type>(nrhs))
+                      .on(ref);
         alpha = gen_mtx(1, 1);
         beta = gen_mtx(1, 1);
         input = gen_mtx(num_rows, nrhs);
         output = gen_mtx(num_rows, nrhs);
-        dfactory =
-            solver_type::build()
-                .with_factorization(
-                    factorization_type::build().with_symmetric_sparsity(true))
-                .with_num_rhs(static_cast<gko::size_type>(nrhs))
-                .on(exec);
+        dfactory = solver_type::build()
+                       .with_factorization(
+                           factorization_type::build().with_symbolic_algorithm(
+                               gko::experimental::factorization::
+                                   symbolic_algorithm::symmetric))
+                       .with_num_rhs(static_cast<gko::size_type>(nrhs))
+                       .on(exec);
         dalpha = gko::clone(exec, alpha);
         dbeta = gko::clone(exec, beta);
         dinput = gko::clone(exec, input);

From 4f778eec051c9c9c6eb1e3c437fa30fb1806a0f1 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 2 Nov 2023 12:02:04 -0400
Subject: [PATCH 515/583] rename matching name and type

---
 benchmark/solver/solver_common.hpp          |  7 +++----
 core/factorization/lu.cpp                   |  6 +++---
 include/ginkgo/core/factorization/lu.hpp    |  6 +++---
 reference/test/factorization/lu_kernels.cpp |  8 ++++----
 reference/test/solver/direct.cpp            |  4 ++--
 test/factorization/lu_kernels.cpp           | 12 ++++++------
 test/solver/direct.cpp                      |  8 ++++----
 7 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/benchmark/solver/solver_common.hpp b/benchmark/solver/solver_common.hpp
index b64a1a058b0..a46cc188c50 100644
--- a/benchmark/solver/solver_common.hpp
+++ b/benchmark/solver/solver_common.hpp
@@ -247,15 +247,14 @@ std::unique_ptr<gko::LinOpFactory> generate_solver(
             .with_factorization(
                 gko::experimental::factorization::Lu<etype, itype>::build()
                     .with_symbolic_algorithm(gko::experimental::factorization::
-                                                 symbolic_algorithm::symmetric))
+                                                 symbolic_type::symmetric))
             .on(exec);
     } else if (description == "near_symm_direct") {
         return gko::experimental::solver::Direct<etype, itype>::build()
             .with_factorization(
                 gko::experimental::factorization::Lu<etype, itype>::build()
-                    .with_symbolic_algorithm(
-                        gko::experimental::factorization::symbolic_algorithm::
-                            near_symmetric))
+                    .with_symbolic_algorithm(gko::experimental::factorization::
+                                                 symbolic_type::near_symmetric))
             .on(exec);
     } else if (description == "direct") {
         return gko::experimental::solver::Direct<etype, itype>::build()
diff --git a/core/factorization/lu.cpp b/core/factorization/lu.cpp
index 6b9e0435187..fecc9bc9425 100644
--- a/core/factorization/lu.cpp
+++ b/core/factorization/lu.cpp
@@ -98,13 +98,13 @@ std::unique_ptr<LinOp> Lu<ValueType, IndexType>::generate_impl(
     std::unique_ptr<matrix_type> factors;
     if (!parameters_.symbolic_factorization) {
         switch (parameters_.symbolic_algorithm) {
-        case symbolic_algorithm::general:
+        case symbolic_type::general:
             exec->run(make_symbolic_lu(mtx.get(), factors));
             break;
-        case symbolic_algorithm::near_symmetric:
+        case symbolic_type::near_symmetric:
             exec->run(make_symbolic_lu_near_symm(mtx.get(), factors));
             break;
-        case symbolic_algorithm::symmetric: {
+        case symbolic_type::symmetric: {
             std::unique_ptr<gko::factorization::elimination_forest<IndexType>>
                 forest;
             exec->run(make_symbolic_cholesky(mtx.get(), true, factors, forest));
diff --git a/include/ginkgo/core/factorization/lu.hpp b/include/ginkgo/core/factorization/lu.hpp
index 016c89182f1..cc68e85a238 100644
--- a/include/ginkgo/core/factorization/lu.hpp
+++ b/include/ginkgo/core/factorization/lu.hpp
@@ -46,7 +46,7 @@ namespace experimental {
 namespace factorization {
 
 
-enum class symbolic_algorithm {
+enum class symbolic_type {
     /** An LU factorization algorithm that works on all matrices. */
     general,
     /**
@@ -109,8 +109,8 @@ class Lu
          * @note Only use symbolic_factorization_algorithm::symmetric if you are
          *       sure your matrix has a symmetric sparsity pattern!
          */
-        symbolic_algorithm GKO_FACTORY_PARAMETER_SCALAR(
-            symbolic_algorithm, symbolic_algorithm::general);
+        symbolic_type GKO_FACTORY_PARAMETER_SCALAR(symbolic_algorithm,
+                                                   symbolic_type::general);
 
         /**
          * The `system_matrix`, which will be given to this factory, must be
diff --git a/reference/test/factorization/lu_kernels.cpp b/reference/test/factorization/lu_kernels.cpp
index a88223b15c1..33bd3ca3209 100644
--- a/reference/test/factorization/lu_kernels.cpp
+++ b/reference/test/factorization/lu_kernels.cpp
@@ -266,7 +266,7 @@ TYPED_TEST(Lu, FactorizeSymmetricWorks)
                 gko::experimental::factorization::Lu<value_type,
                                                      index_type>::build()
                     .with_symbolic_algorithm(gko::experimental::factorization::
-                                                 symbolic_algorithm::symmetric)
+                                                 symbolic_type::symmetric)
                     .on(this->ref);
 
             auto lu = factory->generate(this->mtx);
@@ -292,8 +292,8 @@ TYPED_TEST(Lu, FactorizeNonsymmetricWorks)
         auto factory =
             gko::experimental::factorization::Lu<value_type,
                                                  index_type>::build()
-                .with_symbolic_algorithm(gko::experimental::factorization::
-                                             symbolic_algorithm::general)
+                .with_symbolic_algorithm(
+                    gko::experimental::factorization::symbolic_type::general)
                 .on(this->ref);
 
         auto lu = factory->generate(this->mtx);
@@ -319,7 +319,7 @@ TYPED_TEST(Lu, FactorizeNearSymmetricWorks)
             gko::experimental::factorization::Lu<value_type,
                                                  index_type>::build()
                 .with_symbolic_algorithm(gko::experimental::factorization::
-                                             symbolic_algorithm::near_symmetric)
+                                             symbolic_type::near_symmetric)
                 .on(this->ref);
 
         auto lu = factory->generate(this->mtx);
diff --git a/reference/test/solver/direct.cpp b/reference/test/solver/direct.cpp
index 4ef30c13ffb..7e9ddb6e4c4 100644
--- a/reference/test/solver/direct.cpp
+++ b/reference/test/solver/direct.cpp
@@ -78,8 +78,8 @@ class Direct : public ::testing::Test {
                     gko::experimental::factorization::Lu<value_type,
                                                          index_type>::build()
                         .with_symbolic_algorithm(
-                            gko::experimental::factorization::
-                                symbolic_algorithm::symmetric))
+                            gko::experimental::factorization::symbolic_type::
+                                symmetric))
                 .on(exec);
         solver = factory->generate(mtx);
         std::normal_distribution<gko::remove_complex<value_type>> dist(0, 1);
diff --git a/test/factorization/lu_kernels.cpp b/test/factorization/lu_kernels.cpp
index 5da5fa4647d..9580a551323 100644
--- a/test/factorization/lu_kernels.cpp
+++ b/test/factorization/lu_kernels.cpp
@@ -289,14 +289,14 @@ TYPED_TEST(Lu, GenerateSymmWithUnknownSparsityIsEquivalentToRef)
         auto factory =
             gko::experimental::factorization::Lu<value_type,
                                                  index_type>::build()
-                .with_symbolic_algorithm(gko::experimental::factorization::
-                                             symbolic_algorithm::symmetric)
+                .with_symbolic_algorithm(
+                    gko::experimental::factorization::symbolic_type::symmetric)
                 .on(this->ref);
         auto dfactory =
             gko::experimental::factorization::Lu<value_type,
                                                  index_type>::build()
-                .with_symbolic_algorithm(gko::experimental::factorization::
-                                             symbolic_algorithm::symmetric)
+                .with_symbolic_algorithm(
+                    gko::experimental::factorization::symbolic_type::symmetric)
                 .on(this->exec);
 
         auto lu = factory->generate(this->mtx);
@@ -318,13 +318,13 @@ TYPED_TEST(Lu, GenerateNearSymmWithUnknownSparsityIsEquivalentToRef)
             gko::experimental::factorization::Lu<value_type,
                                                  index_type>::build()
                 .with_symbolic_algorithm(gko::experimental::factorization::
-                                             symbolic_algorithm::near_symmetric)
+                                             symbolic_type::near_symmetric)
                 .on(this->ref);
         auto dfactory =
             gko::experimental::factorization::Lu<value_type,
                                                  index_type>::build()
                 .with_symbolic_algorithm(gko::experimental::factorization::
-                                             symbolic_algorithm::near_symmetric)
+                                             symbolic_type::near_symmetric)
                 .on(this->exec);
 
         auto lu = factory->generate(this->mtx);
diff --git a/test/solver/direct.cpp b/test/solver/direct.cpp
index 32a1207d85a..8ab66bee1d6 100644
--- a/test/solver/direct.cpp
+++ b/test/solver/direct.cpp
@@ -96,8 +96,8 @@ class Direct : public CommonTestFixture {
         factory = solver_type::build()
                       .with_factorization(
                           factorization_type::build().with_symbolic_algorithm(
-                              gko::experimental::factorization::
-                                  symbolic_algorithm::symmetric))
+                              gko::experimental::factorization::symbolic_type::
+                                  symmetric))
                       .with_num_rhs(static_cast<gko::size_type>(nrhs))
                       .on(ref);
         alpha = gen_mtx(1, 1);
@@ -107,8 +107,8 @@ class Direct : public CommonTestFixture {
         dfactory = solver_type::build()
                        .with_factorization(
                            factorization_type::build().with_symbolic_algorithm(
-                               gko::experimental::factorization::
-                                   symbolic_algorithm::symmetric))
+                               gko::experimental::factorization::symbolic_type::
+                                   symmetric))
                        .with_num_rhs(static_cast<gko::size_type>(nrhs))
                        .on(exec);
         dalpha = gko::clone(exec, alpha);

From 9abffd3b4ae73034a129b7adbd32c367051c11ec Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Sat, 4 Nov 2023 10:17:49 -0400
Subject: [PATCH 516/583] review updates

unused code and documentation
---
 include/ginkgo/core/factorization/lu.hpp | 3 ++-
 reference/factorization/lu_kernels.cpp   | 3 +--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/ginkgo/core/factorization/lu.hpp b/include/ginkgo/core/factorization/lu.hpp
index cc68e85a238..f7049bc0131 100644
--- a/include/ginkgo/core/factorization/lu.hpp
+++ b/include/ginkgo/core/factorization/lu.hpp
@@ -58,7 +58,8 @@ enum class symbolic_type {
     /**
      * An LU factorization algorithm that works only on matrices with a
      * symmetric sparsity pattern. Running it on a matrix with a non-symmetric
-     * sparsity pattern will likely lead to the application crashing.
+     * sparsity pattern is undefined behavior and will likely lead to the
+     * application crashing.
      */
     symmetric
 };
diff --git a/reference/factorization/lu_kernels.cpp b/reference/factorization/lu_kernels.cpp
index 25a56496221..4c03d5ecd80 100644
--- a/reference/factorization/lu_kernels.cpp
+++ b/reference/factorization/lu_kernels.cpp
@@ -152,7 +152,7 @@ void symbolic_factorize_simple(
         // initialize the row
         std::fill(factor_vals + factor_begin, factor_vals + factor_end,
                   zero<float>());
-        for (auto nz = row_ptrs[row]; nz < row_ptrs[row + 1]; nz++) {
+        for (auto nz = mtx_begin; nz < mtx_end; nz++) {
             const auto col = col_idxs[nz];
             factor_vals[lookup.lookup_unsafe(col) + factor_begin] =
                 one<float>();
@@ -164,7 +164,6 @@ void symbolic_factorize_simple(
         for (auto lower_nz = factor_begin; lower_nz < row_diag; lower_nz++) {
             const auto dep = factor_cols[lower_nz];
             const auto dep_diag_idx = diag_idxs[dep];
-            const auto dep_diag = factor_vals[dep_diag_idx];
             const auto dep_end = factor_row_ptrs[dep + 1];
             if (factor_vals[lower_nz] == one<float>()) {
                 for (auto dep_nz = dep_diag_idx + 1; dep_nz < dep_end;

From 3bee41de710bb2e8457bbed5bb11d5e7f4a57765 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Sat, 4 Nov 2023 22:23:57 +0100
Subject: [PATCH 517/583] work around rocThrust bug

---
 .../cuda_hip/factorization/lu_kernels.hpp.inc | 27 ++++++++++++++-----
 cuda/factorization/lu_kernels.cu              |  2 ++
 hip/factorization/lu_kernels.hip.cpp          |  2 ++
 3 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/common/cuda_hip/factorization/lu_kernels.hpp.inc b/common/cuda_hip/factorization/lu_kernels.hpp.inc
index d30e447ac55..1503ede4be3 100644
--- a/common/cuda_hip/factorization/lu_kernels.hpp.inc
+++ b/common/cuda_hip/factorization/lu_kernels.hpp.inc
@@ -293,11 +293,20 @@ void symbolic_factorize_simple(
 GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_LU_SYMMETRIC_FACTORIZE_SIMPLE);
 
 
-struct eq_one_functor {
-    template <typename ValueType>
-    __device__ __forceinline__ bool operator()(ValueType val) const
+struct first_eq_one_functor {
+    template <typename Pair>
+    __device__ __forceinline__ bool operator()(Pair pair) const
     {
-        return val == one<float>();
+        return thrust::get<0>(pair) == one<float>();
+    }
+};
+
+
+struct return_second_functor {
+    template <typename Pair>
+    __device__ __forceinline__ auto operator()(Pair pair) const
+    {
+        return thrust::get<1>(pair);
     }
 };
 
@@ -309,9 +318,13 @@ void symbolic_factorize_simple_finalize(
 {
     const auto col_idxs = factors->get_const_col_idxs();
     const auto vals = factors->get_const_values();
-    thrust::copy_if(thrust_policy(exec), col_idxs,
-                    col_idxs + factors->get_num_stored_elements(), vals,
-                    out_col_idxs, eq_one_functor{});
+    const auto input_it =
+        thrust::make_zip_iterator(thrust::make_tuple(vals, col_idxs));
+    const auto output_it = thrust::make_transform_output_iterator(
+        out_col_idxs, return_second_functor{});
+    thrust::copy_if(thrust_policy(exec), input_it,
+                    input_it + factors->get_num_stored_elements(), output_it,
+                    first_eq_one_functor{});
 }
 
 GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
diff --git a/cuda/factorization/lu_kernels.cu b/cuda/factorization/lu_kernels.cu
index 3426cd2c33e..55567f7fd9e 100644
--- a/cuda/factorization/lu_kernels.cu
+++ b/cuda/factorization/lu_kernels.cu
@@ -38,6 +38,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <thrust/copy.h>
+#include <thrust/iterator/transform_output_iterator.h>
+#include <thrust/iterator/zip_iterator.h>
 
 
 #include <ginkgo/core/matrix/csr.hpp>
diff --git a/hip/factorization/lu_kernels.hip.cpp b/hip/factorization/lu_kernels.hip.cpp
index 348bef71b58..7c9707967c4 100644
--- a/hip/factorization/lu_kernels.hip.cpp
+++ b/hip/factorization/lu_kernels.hip.cpp
@@ -38,6 +38,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <thrust/copy.h>
+#include <thrust/iterator/transform_output_iterator.h>
+#include <thrust/iterator/zip_iterator.h>
 
 
 #include <ginkgo/core/matrix/csr.hpp>

From 791695f37037447058ee95be3dbba35064770545 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Sat, 28 Oct 2023 11:57:34 -0400
Subject: [PATCH 518/583] handle deprecation warnings uniformly

---
 core/reorder/rcm.cpp                          | 17 +----
 core/test/log/logger.cpp                      | 22 ++-----
 core/test/preconditioner/ic.cpp               | 10 +--
 core/test/preconditioner/ilu.cpp              | 10 +--
 core/test/reorder/rcm.cpp                     | 10 +--
 core/test/reorder/scaled_reordered.cpp        | 17 +----
 cuda/test/reorder/rcm_kernels.cpp             | 10 +--
 include/ginkgo/core/base/array.hpp            |  4 +-
 include/ginkgo/core/base/executor.hpp         | 45 ++++++-------
 include/ginkgo/core/base/machine_topology.hpp |  2 +-
 .../ginkgo/core/base/polymorphic_object.hpp   | 22 +++----
 include/ginkgo/core/base/range.hpp            |  7 +-
 include/ginkgo/core/base/types.hpp            | 23 +++++--
 include/ginkgo/core/base/utils_helper.hpp     | 12 ++--
 include/ginkgo/core/log/convergence.hpp       | 13 ++--
 include/ginkgo/core/log/logger.hpp            | 65 +++++--------------
 include/ginkgo/core/log/papi.hpp              | 29 +++++----
 include/ginkgo/core/log/profiler_hook.hpp     | 17 ++---
 include/ginkgo/core/log/record.hpp            | 34 +++++-----
 include/ginkgo/core/log/stream.hpp            | 29 +++++----
 include/ginkgo/core/matrix/permutation.hpp    | 56 ++++++++--------
 include/ginkgo/core/multigrid/pgm.hpp         |  7 +-
 include/ginkgo/core/preconditioner/ic.hpp     |  8 +--
 include/ginkgo/core/preconditioner/ilu.hpp    | 12 ++--
 include/ginkgo/core/reorder/rcm.hpp           |  2 +-
 include/ginkgo/core/solver/idr.hpp            |  4 +-
 include/ginkgo/core/solver/solver_base.hpp    | 23 ++-----
 include/ginkgo/core/stop/residual_norm.hpp    | 35 ++++------
 omp/test/reorder/rcm_kernels.cpp              | 10 +--
 reference/test/reorder/rcm.cpp                | 14 +---
 reference/test/reorder/rcm_kernels.cpp        | 17 +----
 reference/test/reorder/scaled_reordered.cpp   | 17 +----
 32 files changed, 229 insertions(+), 374 deletions(-)

diff --git a/core/reorder/rcm.cpp b/core/reorder/rcm.cpp
index 970a64bdedf..c29c6fa9219 100644
--- a/core/reorder/rcm.cpp
+++ b/core/reorder/rcm.cpp
@@ -81,15 +81,7 @@ void rcm_reorder(const matrix::SparsityCsr<ValueType, IndexType>* mtx,
 }
 
 
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-
-#endif
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 5211, 4973, 4974)
-#endif
+GKO_BEGIN_DISABLE_DEPRECATION_WARNINGS
 
 
 template <typename ValueType, typename IndexType>
@@ -164,12 +156,7 @@ Rcm<ValueType, IndexType>::Rcm(const Factory* factory,
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_RCM);
 
 
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic pop
-#endif
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
+GKO_END_DISABLE_DEPRECATION_WARNINGS
 
 
 }  // namespace reorder
diff --git a/core/test/log/logger.cpp b/core/test/log/logger.cpp
index ff84f9a78c2..c739067eeb2 100644
--- a/core/test/log/logger.cpp
+++ b/core/test/log/logger.cpp
@@ -30,18 +30,13 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include <ginkgo/core/log/logger.hpp>
-
+// force-top: on
+#include <ginkgo/core/base/types.hpp>
+GKO_BEGIN_DISABLE_DEPRECATION_WARNINGS
+// force-top: off
 
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 
-#endif
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 5211, 4973, 4974)
-#endif
+#include <ginkgo/core/log/logger.hpp>
 
 
 #include <memory>
@@ -362,9 +357,4 @@ TEST(IterationCompleteOverload, CanLogCurrent)
 }  // namespace
 
 
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic pop
-#endif
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
+GKO_END_DISABLE_DEPRECATION_WARNINGS
diff --git a/core/test/preconditioner/ic.cpp b/core/test/preconditioner/ic.cpp
index dfcb5e5af3f..5480fab2d55 100644
--- a/core/test/preconditioner/ic.cpp
+++ b/core/test/preconditioner/ic.cpp
@@ -33,15 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/preconditioner/ic.hpp>
 
 
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-
-#endif
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 5211, 4973, 4974)
-#endif
+GKO_BEGIN_DISABLE_DEPRECATION_WARNINGS
 
 
 #include <memory>
diff --git a/core/test/preconditioner/ilu.cpp b/core/test/preconditioner/ilu.cpp
index dec3c8532d2..00309231d60 100644
--- a/core/test/preconditioner/ilu.cpp
+++ b/core/test/preconditioner/ilu.cpp
@@ -33,15 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/preconditioner/ilu.hpp>
 
 
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-
-#endif
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 5211, 4973, 4974)
-#endif
+GKO_BEGIN_DISABLE_DEPRECATION_WARNINGS
 
 
 #include <memory>
diff --git a/core/test/reorder/rcm.cpp b/core/test/reorder/rcm.cpp
index f74d476c4d6..90d74c6bed2 100644
--- a/core/test/reorder/rcm.cpp
+++ b/core/test/reorder/rcm.cpp
@@ -33,15 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/reorder/rcm.hpp>
 
 
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-
-#endif
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 5211, 4973, 4974)
-#endif
+GKO_BEGIN_DISABLE_DEPRECATION_WARNINGS
 
 
 #include <memory>
diff --git a/core/test/reorder/scaled_reordered.cpp b/core/test/reorder/scaled_reordered.cpp
index 7e4e99a2afb..461b5e15b4d 100644
--- a/core/test/reorder/scaled_reordered.cpp
+++ b/core/test/reorder/scaled_reordered.cpp
@@ -46,15 +46,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/solver/bicgstab.hpp>
 
 
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-
-#endif
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 5211, 4973, 4974)
-#endif
+GKO_BEGIN_DISABLE_DEPRECATION_WARNINGS
 
 
 namespace {
@@ -145,9 +137,4 @@ TEST_F(ScaledReorderedFactory, CanSetColScaling)
 }  // namespace
 
 
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic pop
-#endif
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
+GKO_END_DISABLE_DEPRECATION_WARNINGS
diff --git a/cuda/test/reorder/rcm_kernels.cpp b/cuda/test/reorder/rcm_kernels.cpp
index e77d9b61275..b3ebe183368 100644
--- a/cuda/test/reorder/rcm_kernels.cpp
+++ b/cuda/test/reorder/rcm_kernels.cpp
@@ -33,15 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/reorder/rcm.hpp>
 
 
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-
-#endif
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 5211, 4973, 4974)
-#endif
+GKO_BEGIN_DISABLE_DEPRECATION_WARNINGS
 
 
 #include <gtest/gtest.h>
diff --git a/include/ginkgo/core/base/array.hpp b/include/ginkgo/core/base/array.hpp
index 1140f1e400c..846817f9938 100644
--- a/include/ginkgo/core/base/array.hpp
+++ b/include/ginkgo/core/base/array.hpp
@@ -160,7 +160,7 @@ class const_array_view {
 
 
 template <typename ValueType>
-using ConstArrayView [[deprecated("please use const_array_view")]] =
+using ConstArrayView GKO_DEPRECATED("please use const_array_view") =
     const_array_view<ValueType>;
 
 
@@ -714,7 +714,7 @@ class array {
 
 
 template <typename ValueType>
-using Array [[deprecated("please use array")]] = array<ValueType>;
+using Array GKO_DEPRECATED("please use array") = array<ValueType>;
 
 
 /**
diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp
index c7195501178..a2a1d2ac0ff 100644
--- a/include/ginkgo/core/base/executor.hpp
+++ b/include/ginkgo/core/base/executor.hpp
@@ -1307,24 +1307,20 @@ class EnableDeviceReset {
      *
      * @param device_reset  whether to allow a device reset or not
      */
-    [[deprecated(
+    GKO_DEPRECATED(
         "device_reset is no longer supported, call "
-        "cudaDeviceReset/hipDeviceReset manually")]] void
-    set_device_reset(bool device_reset)
-    {}
+        "cudaDeviceReset/hipDeviceReset manually")
+    void set_device_reset(bool device_reset) {}
 
     /**
      * Returns the current status of the device reset boolean for this executor.
      *
      * @return the current status of the device reset boolean for this executor.
      */
-    [[deprecated(
+    GKO_DEPRECATED(
         "device_reset is no longer supported, call "
-        "cudaDeviceReset/hipDeviceReset manually")]] bool
-    get_device_reset()
-    {
-        return false;
-    }
+        "cudaDeviceReset/hipDeviceReset manually")
+    bool get_device_reset() { return false; }
 
 protected:
     /**
@@ -1334,11 +1330,10 @@ class EnableDeviceReset {
      */
     EnableDeviceReset() {}
 
-    [[deprecated(
+    GKO_DEPRECATED(
         "device_reset is no longer supported, call "
-        "cudaDeviceReset/hipDeviceReset manually")]] EnableDeviceReset(bool
-                                                                           device_reset)
-    {}
+        "cudaDeviceReset/hipDeviceReset manually")
+    EnableDeviceReset(bool device_reset) {}
 };
 
 
@@ -1530,13 +1525,14 @@ class CudaExecutor : public detail::ExecutorBase<CudaExecutor>,
      *                    on. See @allocation_mode for more details
      * @param stream  the stream to execute operations on.
      */
-    [[deprecated(
+    GKO_DEPRECATED(
         "device_reset is deprecated entirely, call cudaDeviceReset directly. "
         "alloc_mode was replaced by the Allocator type "
-        "hierarchy.")]] static std::shared_ptr<CudaExecutor>
-    create(int device_id, std::shared_ptr<Executor> master, bool device_reset,
-           allocation_mode alloc_mode = default_cuda_alloc_mode,
-           CUstream_st* stream = nullptr);
+        "hierarchy.")
+    static std::shared_ptr<CudaExecutor> create(
+        int device_id, std::shared_ptr<Executor> master, bool device_reset,
+        allocation_mode alloc_mode = default_cuda_alloc_mode,
+        CUstream_st* stream = nullptr);
 
     /**
      * Creates a new CudaExecutor with a custom allocator and device stream.
@@ -1743,13 +1739,14 @@ class HipExecutor : public detail::ExecutorBase<HipExecutor>,
      * @param alloc_mode  the allocation mode that the executor should operate
      *                    on. See @allocation_mode for more details
      */
-    [[deprecated(
+    GKO_DEPRECATED(
         "device_reset is deprecated entirely, call hipDeviceReset directly. "
         "alloc_mode was replaced by the Allocator type "
-        "hierarchy.")]] static std::shared_ptr<HipExecutor>
-    create(int device_id, std::shared_ptr<Executor> master, bool device_reset,
-           allocation_mode alloc_mode = default_hip_alloc_mode,
-           GKO_HIP_STREAM_STRUCT* stream = nullptr);
+        "hierarchy.")
+    static std::shared_ptr<HipExecutor> create(
+        int device_id, std::shared_ptr<Executor> master, bool device_reset,
+        allocation_mode alloc_mode = default_hip_alloc_mode,
+        GKO_HIP_STREAM_STRUCT* stream = nullptr);
 
     static std::shared_ptr<HipExecutor> create(
         int device_id, std::shared_ptr<Executor> master,
diff --git a/include/ginkgo/core/base/machine_topology.hpp b/include/ginkgo/core/base/machine_topology.hpp
index 317a768fb8a..b0b9ce2b0ad 100644
--- a/include/ginkgo/core/base/machine_topology.hpp
+++ b/include/ginkgo/core/base/machine_topology.hpp
@@ -415,7 +415,7 @@ class machine_topology {
 };
 
 
-using MachineTopology [[deprecated("please use machine_topology")]] =
+using MachineTopology GKO_DEPRECATED("please use machine_topology") =
     machine_topology;
 
 
diff --git a/include/ginkgo/core/base/polymorphic_object.hpp b/include/ginkgo/core/base/polymorphic_object.hpp
index fc758f97699..6ef59078b5d 100644
--- a/include/ginkgo/core/base/polymorphic_object.hpp
+++ b/include/ginkgo/core/base/polymorphic_object.hpp
@@ -182,14 +182,13 @@ class PolymorphicObject : public log::EnableLogging<PolymorphicObject> {
      * @tparam Deleter  the deleter of the unique_ptr parameter
      */
     template <typename Derived, typename Deleter>
-    [[deprecated(
+    GKO_DEPRECATED(
         "This function will be removed in a future release, the replacement "
         "will copy instead of move. If a move is intended, use move_from "
-        "instead.")]] std::
-        enable_if_t<
-            std::is_base_of<PolymorphicObject, std::decay_t<Derived>>::value,
-            PolymorphicObject>*
-        copy_from(std::unique_ptr<Derived, Deleter>&& other)
+        "instead.")
+    std::enable_if_t<
+        std::is_base_of<PolymorphicObject, std::decay_t<Derived>>::value,
+        PolymorphicObject>* copy_from(std::unique_ptr<Derived, Deleter>&& other)
     {
         this->template log<log::Logger::polymorphic_object_move_started>(
             exec_.get(), other.get(), this);
@@ -409,14 +408,13 @@ class EnableAbstractPolymorphicObject : public PolymorphicBase {
     }
 
     template <typename Derived>
-    [[deprecated(
+    GKO_DEPRECATED(
         "This function will be removed in a future release, the replacement "
         "will copy instead of move. If a move in intended, use move_to "
-        "instead.")]] std::
-        enable_if_t<
-            std::is_base_of<PolymorphicObject, std::decay_t<Derived>>::value,
-            AbstractObject>*
-        copy_from(std::unique_ptr<Derived>&& other)
+        "instead.")
+    std::enable_if_t<
+        std::is_base_of<PolymorphicObject, std::decay_t<Derived>>::value,
+        AbstractObject>* copy_from(std::unique_ptr<Derived>&& other)
     {
         return static_cast<AbstractObject*>(
             this->PolymorphicBase::copy_from(std::move(other)));
diff --git a/include/ginkgo/core/base/range.hpp b/include/ginkgo/core/base/range.hpp
index 1e4c7a5d00e..815192ed112 100644
--- a/include/ginkgo/core/base/range.hpp
+++ b/include/ginkgo/core/base/range.hpp
@@ -618,9 +618,8 @@ struct implement_binary_operation<operation_kind::range_by_scalar,
                                              _operation_name)                \
     namespace accessor {                                                     \
     template <typename Operand>                                              \
-    struct [[deprecated(                                                     \
-        "Please use " #_operation_name)]] _operation_deprecated_name         \
-        : _operation_name<Operand>{};                                        \
+    struct GKO_DEPRECATED("Please use " #_operation_name)                    \
+        _operation_deprecated_name : _operation_name<Operand> {};            \
     }                                                                        \
     static_assert(true,                                                      \
                   "This assert is used to counter the false positive extra " \
@@ -864,7 +863,7 @@ GKO_BIND_UNARY_RANGE_OPERATION_TO_OPERATOR(transpose_operation, transpose);
 
 
 #define GKO_DEPRECATED_SIMPLE_BINARY_OPERATION(_deprecated_name, _name) \
-    struct [[deprecated("Please use " #_name)]] _deprecated_name : _name {}
+    struct GKO_DEPRECATED("Please use " #_name) _deprecated_name : _name {}
 
 #define GKO_DEFINE_SIMPLE_BINARY_OPERATION(_name, ...)                         \
     struct _name {                                                             \
diff --git a/include/ginkgo/core/base/types.hpp b/include/ginkgo/core/base/types.hpp
index f5a75c7448e..c665e0ead79 100644
--- a/include/ginkgo/core/base/types.hpp
+++ b/include/ginkgo/core/base/types.hpp
@@ -89,11 +89,26 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 // Handle deprecated notices correctly on different systems
-#if defined(_WIN32) || defined(__CYGWIN__)
-#define GKO_DEPRECATED(msg) __declspec(deprecated(msg))
+// clang-format off
+#define GKO_DEPRECATED(_msg) [[deprecated(_msg)]]
+#ifdef __NVCOMPILER
+#define GKO_BEGIN_DISABLE_DEPRECATION_WARNINGS _Pragma("diag_suppress 1445")
+#define GKO_END_DISABLE_DEPRECATION_WARNINGS _Pragma("diag_warning 1445")
+#elif defined(__GNUC__) || defined(__clang__)
+#define GKO_BEGIN_DISABLE_DEPRECATION_WARNINGS                      \
+    _Pragma("GCC diagnostic push")                                  \
+    _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
+#define GKO_END_DISABLE_DEPRECATION_WARNINGS _Pragma("GCC diagnostic pop")
+#elif defined(_MSC_VER)
+#define GKO_BEGIN_DISABLE_DEPRECATION_WARNINGS        \
+    _Pragma("warning(push)")                          \
+    _Pragma("warning(disable : 5211 4973 4974 4996)")
+#define GKO_END_DISABLE_DEPRECATION_WARNINGS _Pragma("warning(pop)")
 #else
-#define GKO_DEPRECATED(msg) __attribute__((deprecated(msg)))
-#endif  // defined(_WIN32) || defined(__CYGWIN__)
+#define GKO_BEGIN_DISABLE_DEPRECATION_WARNINGS
+#define GKO_END_DISABLE_DEPRECATION_WARNINGS
+#endif
+// clang-format on
 
 
 namespace gko {
diff --git a/include/ginkgo/core/base/utils_helper.hpp b/include/ginkgo/core/base/utils_helper.hpp
index 3f26d5d7659..0fb509eb8e2 100644
--- a/include/ginkgo/core/base/utils_helper.hpp
+++ b/include/ginkgo/core/base/utils_helper.hpp
@@ -294,9 +294,9 @@ inline typename std::remove_reference<OwningPointer>::type&& give(
  *       same as calling .get() on the smart pointer.
  */
 template <typename Pointer>
-[[deprecated("no longer necessary, just pass the object without lend")]] inline
-    typename std::enable_if<detail::have_ownership_s<Pointer>::value,
-                            detail::pointee<Pointer>*>::type
+GKO_DEPRECATED("no longer necessary, just pass the object without lend")
+inline typename std::enable_if<detail::have_ownership_s<Pointer>::value,
+                               detail::pointee<Pointer>*>::type
     lend(const Pointer& p)
 {
     return p.get();
@@ -313,9 +313,9 @@ template <typename Pointer>
  *       returns `p`.
  */
 template <typename Pointer>
-[[deprecated("no longer necessary, just pass the object without lend")]] inline
-    typename std::enable_if<!detail::have_ownership_s<Pointer>::value,
-                            detail::pointee<Pointer>*>::type
+GKO_DEPRECATED("no longer necessary, just pass the object without lend")
+inline typename std::enable_if<!detail::have_ownership_s<Pointer>::value,
+                               detail::pointee<Pointer>*>::type
     lend(const Pointer& p)
 {
     return p;
diff --git a/include/ginkgo/core/log/convergence.hpp b/include/ginkgo/core/log/convergence.hpp
index bc1e2a50816..1640d3d877a 100644
--- a/include/ginkgo/core/log/convergence.hpp
+++ b/include/ginkgo/core/log/convergence.hpp
@@ -102,11 +102,11 @@ class Convergence : public Logger {
      * dependencies. At the same time, this method is short enough that it
      * shouldn't be a problem.
      */
-    [[deprecated(
-        "use single-parameter create")]] static std::unique_ptr<Convergence>
-    create(std::shared_ptr<const Executor>,
-           const mask_type& enabled_events = Logger::criterion_events_mask |
-                                             Logger::iteration_complete_mask)
+    GKO_DEPRECATED("use single-parameter create")
+    static std::unique_ptr<Convergence> create(
+        std::shared_ptr<const Executor>,
+        const mask_type& enabled_events = Logger::criterion_events_mask |
+                                          Logger::iteration_complete_mask)
     {
         return std::unique_ptr<Convergence>(new Convergence(enabled_events));
     }
@@ -188,7 +188,8 @@ class Convergence : public Logger {
      * @param enabled_events  the events enabled for this logger. By default all
      *                        events.
      */
-    [[deprecated("use single-parameter constructor")]] explicit Convergence(
+    GKO_DEPRECATED("use single-parameter constructor")
+    explicit Convergence(
         std::shared_ptr<const gko::Executor>,
         const mask_type& enabled_events = Logger::criterion_events_mask |
                                           Logger::iteration_complete_mask)
diff --git a/include/ginkgo/core/log/logger.hpp b/include/ginkgo/core/log/logger.hpp
index 5f8376c9d26..470e9e6438f 100644
--- a/include/ginkgo/core/log/logger.hpp
+++ b/include/ginkgo/core/log/logger.hpp
@@ -462,12 +462,12 @@ public:                                                              \
      * @warning This on_iteration_complete function that this macro declares is
      * deprecated. Please use the version with the stopping information.
      */
-    [[deprecated(
+    GKO_DEPRECATED(
         "Please use the version with the additional stopping "
-        "information.")]] virtual void
-    on_iteration_complete(const LinOp* solver, const size_type& it,
-                          const LinOp* r, const LinOp* x = nullptr,
-                          const LinOp* tau = nullptr) const
+        "information.")
+    virtual void on_iteration_complete(const LinOp* solver, const size_type& it,
+                                       const LinOp* r, const LinOp* x = nullptr,
+                                       const LinOp* tau = nullptr) const
     {}
 
     /**
@@ -483,28 +483,17 @@ public:                                                              \
      * @warning This on_iteration_complete function that this macro declares is
      * deprecated. Please use the version with the stopping information.
      */
-    [[deprecated(
+    GKO_DEPRECATED(
         "Please use the version with the additional stopping "
-        "information.")]] virtual void
-    on_iteration_complete(const LinOp* solver, const size_type& it,
-                          const LinOp* r, const LinOp* x, const LinOp* tau,
-                          const LinOp* implicit_tau_sq) const
+        "information.")
+    virtual void on_iteration_complete(const LinOp* solver, const size_type& it,
+                                       const LinOp* r, const LinOp* x,
+                                       const LinOp* tau,
+                                       const LinOp* implicit_tau_sq) const
     {
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-#endif
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 5211, 4973, 4974)
-#endif
+        GKO_BEGIN_DISABLE_DEPRECATION_WARNINGS
         this->on_iteration_complete(solver, it, r, x, tau);
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic pop
-#endif
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
+        GKO_END_DISABLE_DEPRECATION_WARNINGS
     }
 
     /**
@@ -529,27 +518,9 @@ public:                                                              \
                                        const array<stopping_status>* status,
                                        bool stopped) const
     {
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-#endif  // defined(__GNUC__) || defined(__clang__)
-#ifdef __NVCOMPILER
-#pragma diag_suppress 1445
-#endif  // __NVCOMPILER
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 5211, 4973, 4974)
-#endif  // _MSC_VER
+        GKO_BEGIN_DISABLE_DEPRECATION_WARNINGS
         this->on_iteration_complete(solver, it, r, x, tau, implicit_tau_sq);
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic pop
-#endif  // defined(__GNUC__) || defined(__clang__)
-#ifdef __NVCOMPILER
-#pragma diag_warning 1445
-#endif  // __NVCOMPILER
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif  // _MSC_VER
+        GKO_END_DISABLE_DEPRECATION_WARNINGS
     }
 
 public:
@@ -716,9 +687,9 @@ public:                                                              \
      *                           logs every event except linop's apply started
      *                           event.
      */
-    [[deprecated("use single-parameter constructor")]] explicit Logger(
-        std::shared_ptr<const gko::Executor> exec,
-        const mask_type& enabled_events = all_events_mask)
+    GKO_DEPRECATED("use single-parameter constructor")
+    explicit Logger(std::shared_ptr<const gko::Executor> exec,
+                    const mask_type& enabled_events = all_events_mask)
         : Logger{enabled_events}
     {}
 
diff --git a/include/ginkgo/core/log/papi.hpp b/include/ginkgo/core/log/papi.hpp
index 2b2a3326cce..a2d188bed47 100644
--- a/include/ginkgo/core/log/papi.hpp
+++ b/include/ginkgo/core/log/papi.hpp
@@ -40,11 +40,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #if GKO_HAVE_PAPI_SDE
 
 
+#include <sde_lib.h>
 #include <cstddef>
 #include <iostream>
 #include <map>
 #include <mutex>
-#include <sde_lib.h>
 
 
 #include <ginkgo/core/base/polymorphic_object.hpp>
@@ -183,17 +183,18 @@ class Papi : public Logger {
                                const array<stopping_status>* status,
                                bool stopped) const override;
 
-    [[deprecated(
+    GKO_DEPRECATED(
         "Please use the version with the additional stopping "
-        "information.")]] void
-    on_iteration_complete(const LinOp* solver, const size_type& num_iterations,
-                          const LinOp* residual, const LinOp* solution,
-                          const LinOp* residual_norm) const override;
+        "information.")
+    void on_iteration_complete(const LinOp* solver,
+                               const size_type& num_iterations,
+                               const LinOp* residual, const LinOp* solution,
+                               const LinOp* residual_norm) const override;
 
-    [[deprecated(
+    GKO_DEPRECATED(
         "Please use the version with the additional stopping "
-        "information.")]] void
-    on_iteration_complete(
+        "information.")
+    void on_iteration_complete(
         const LinOp* solver, const size_type& num_iterations,
         const LinOp* residual, const LinOp* solution,
         const LinOp* residual_norm,
@@ -204,9 +205,10 @@ class Papi : public Logger {
      *
      * @param enabled_events  the events enabled for this Logger
      */
-    [[deprecated("use single-parameter create")]] static std::shared_ptr<Papi>
-    create(std::shared_ptr<const gko::Executor>,
-           const Logger::mask_type& enabled_events = Logger::all_events_mask)
+    GKO_DEPRECATED("use single-parameter create")
+    static std::shared_ptr<Papi> create(
+        std::shared_ptr<const gko::Executor>,
+        const Logger::mask_type& enabled_events = Logger::all_events_mask)
     {
         return Papi::create(enabled_events);
     }
@@ -242,7 +244,8 @@ class Papi : public Logger {
     const papi_handle_t get_handle() const { return papi_handle; }
 
 protected:
-    [[deprecated("use single-parameter constructor")]] explicit Papi(
+    GKO_DEPRECATED("use single-parameter constructor")
+    explicit Papi(
         std::shared_ptr<const gko::Executor> exec,
         const Logger::mask_type& enabled_events = Logger::all_events_mask)
         : Papi(enabled_events)
diff --git a/include/ginkgo/core/log/profiler_hook.hpp b/include/ginkgo/core/log/profiler_hook.hpp
index 6a9b00dfac7..710c9e26209 100644
--- a/include/ginkgo/core/log/profiler_hook.hpp
+++ b/include/ginkgo/core/log/profiler_hook.hpp
@@ -188,17 +188,18 @@ class ProfilerHook : public Logger {
         const LinOp* implicit_sq_residual_norm,
         const array<stopping_status>* status, bool stopped) const override;
 
-    [[deprecated(
+    GKO_DEPRECATED(
         "Please use the version with the additional stopping "
-        "information.")]] void
-    on_iteration_complete(const LinOp* solver, const size_type& num_iterations,
-                          const LinOp* residual, const LinOp* solution,
-                          const LinOp* residual_norm) const override;
+        "information.")
+    void on_iteration_complete(const LinOp* solver,
+                               const size_type& num_iterations,
+                               const LinOp* residual, const LinOp* solution,
+                               const LinOp* residual_norm) const override;
 
-    [[deprecated(
+    GKO_DEPRECATED(
         "Please use the version with the additional stopping "
-        "information.")]] void
-    on_iteration_complete(
+        "information.")
+    void on_iteration_complete(
         const LinOp* solver, const size_type& num_iterations,
         const LinOp* residual, const LinOp* solution,
         const LinOp* residual_norm,
diff --git a/include/ginkgo/core/log/record.hpp b/include/ginkgo/core/log/record.hpp
index 62cc3f0e7fc..1d27a57bb01 100644
--- a/include/ginkgo/core/log/record.hpp
+++ b/include/ginkgo/core/log/record.hpp
@@ -402,17 +402,18 @@ class Record : public Logger {
         const LinOp* residual_norm, const LinOp* implicit_resnorm_sq,
         const array<stopping_status>* status, bool stopped) const override;
 
-    [[deprecated(
+    GKO_DEPRECATED(
         "Please use the version with the additional stopping "
-        "information.")]] void
-    on_iteration_complete(const LinOp* solver, const size_type& num_iterations,
-                          const LinOp* residual, const LinOp* solution,
-                          const LinOp* residual_norm) const override;
+        "information.")
+    void on_iteration_complete(const LinOp* solver,
+                               const size_type& num_iterations,
+                               const LinOp* residual, const LinOp* solution,
+                               const LinOp* residual_norm) const override;
 
-    [[deprecated(
+    GKO_DEPRECATED(
         "Please use the version with the additional stopping "
-        "information.")]] void
-    on_iteration_complete(
+        "information.")
+    void on_iteration_complete(
         const LinOp* solver, const size_type& num_iterations,
         const LinOp* residual, const LinOp* solution,
         const LinOp* residual_norm,
@@ -436,10 +437,11 @@ class Record : public Logger {
      * dependencies. At the same time, this method is short enough that it
      * shouldn't be a problem.
      */
-    [[deprecated("use two-parameter create")]] static std::unique_ptr<Record>
-    create(std::shared_ptr<const Executor> exec,
-           const mask_type& enabled_events = Logger::all_events_mask,
-           size_type max_storage = 1)
+    GKO_DEPRECATED("use two-parameter create")
+    static std::unique_ptr<Record> create(
+        std::shared_ptr<const Executor> exec,
+        const mask_type& enabled_events = Logger::all_events_mask,
+        size_type max_storage = 1)
     {
         return std::unique_ptr<Record>(new Record(enabled_events, max_storage));
     }
@@ -493,10 +495,10 @@ class Record : public Logger {
      *                     storage. It is advised to control this to reduce
      *                     memory overhead of this logger.
      */
-    [[deprecated("use two-parameter constructor")]] explicit Record(
-        std::shared_ptr<const gko::Executor> exec,
-        const mask_type& enabled_events = Logger::all_events_mask,
-        size_type max_storage = 0)
+    GKO_DEPRECATED("use two-parameter constructor")
+    explicit Record(std::shared_ptr<const gko::Executor> exec,
+                    const mask_type& enabled_events = Logger::all_events_mask,
+                    size_type max_storage = 0)
         : Record(enabled_events, max_storage)
     {}
 
diff --git a/include/ginkgo/core/log/stream.hpp b/include/ginkgo/core/log/stream.hpp
index 6981beacce2..723a44e2051 100644
--- a/include/ginkgo/core/log/stream.hpp
+++ b/include/ginkgo/core/log/stream.hpp
@@ -164,17 +164,18 @@ class Stream : public Logger {
                                const array<stopping_status>* status,
                                bool stopped) const override;
 
-    [[deprecated(
+    GKO_DEPRECATED(
         "Please use the version with the additional stopping "
-        "information.")]] void
-    on_iteration_complete(const LinOp* solver, const size_type& num_iterations,
-                          const LinOp* residual, const LinOp* solution,
-                          const LinOp* residual_norm) const override;
+        "information.")
+    void on_iteration_complete(const LinOp* solver,
+                               const size_type& num_iterations,
+                               const LinOp* residual, const LinOp* solution,
+                               const LinOp* residual_norm) const override;
 
-    [[deprecated(
+    GKO_DEPRECATED(
         "Please use the version with the additional stopping "
-        "information.")]] void
-    on_iteration_complete(
+        "information.")
+    void on_iteration_complete(
         const LinOp* solver, const size_type& num_iterations,
         const LinOp* residual, const LinOp* solution,
         const LinOp* residual_norm,
@@ -198,10 +199,11 @@ class Stream : public Logger {
      * dependencies. At the same time, this method is short enough that it
      * shouldn't be a problem.
      */
-    [[deprecated("use three-parameter create")]] static std::unique_ptr<Stream>
-    create(std::shared_ptr<const Executor> exec,
-           const Logger::mask_type& enabled_events = Logger::all_events_mask,
-           std::ostream& os = std::cout, bool verbose = false)
+    GKO_DEPRECATED("use three-parameter create")
+    static std::unique_ptr<Stream> create(
+        std::shared_ptr<const Executor> exec,
+        const Logger::mask_type& enabled_events = Logger::all_events_mask,
+        std::ostream& os = std::cout, bool verbose = false)
     {
         return std::unique_ptr<Stream>(new Stream(enabled_events, os, verbose));
     }
@@ -243,7 +245,8 @@ class Stream : public Logger {
      *                 includes always printing residuals and other information
      *                 which can give a large output.
      */
-    [[deprecated("use three-parameter constructor")]] explicit Stream(
+    GKO_DEPRECATED("use three-parameter constructor")
+    explicit Stream(
         std::shared_ptr<const gko::Executor> exec,
         const Logger::mask_type& enabled_events = Logger::all_events_mask,
         std::ostream& os = std::cerr, bool verbose = false)
diff --git a/include/ginkgo/core/matrix/permutation.hpp b/include/ginkgo/core/matrix/permutation.hpp
index 3fa924c3c2b..1b61249dcc4 100644
--- a/include/ginkgo/core/matrix/permutation.hpp
+++ b/include/ginkgo/core/matrix/permutation.hpp
@@ -119,10 +119,10 @@ std::ostream& operator<<(std::ostream& stream, permute_mode mode);
 using mask_type = gko::uint64;
 
 static constexpr mask_type row_permute = mask_type{1};
-[[deprecated("permute mask is no longer supported")]] static constexpr mask_type
-    column_permute = mask_type{1 << 2};
-[[deprecated("permute mask is no longer supported")]] static constexpr mask_type
-    inverse_permute = mask_type{1 << 3};
+GKO_DEPRECATED("permute mask is no longer supported")
+static constexpr mask_type column_permute = mask_type{1 << 2};
+GKO_DEPRECATED("permute mask is no longer supported")
+static constexpr mask_type inverse_permute = mask_type{1 << 3};
 
 /**
  * Permutation is a matrix format that represents a permutation matrix,
@@ -174,14 +174,14 @@ class Permutation : public EnableLinOp<Permutation<IndexType>>,
      * @return the number of elements explicitly stored in the permutation
      * array.
      */
-    [[deprecated("use get_size()[0] instead")]] size_type get_permutation_size()
-        const noexcept;
+    GKO_DEPRECATED("use get_size()[0] instead")
+    size_type get_permutation_size() const noexcept;
 
-    [[deprecated("permute mask is no longer supported")]] mask_type
-    get_permute_mask() const;
+    GKO_DEPRECATED("permute mask is no longer supported")
+    mask_type get_permute_mask() const;
 
-    [[deprecated("permute mask is no longer supported")]] void set_permute_mask(
-        mask_type permute_mask);
+    GKO_DEPRECATED("permute mask is no longer supported")
+    void set_permute_mask(mask_type permute_mask);
 
     /**
      * Returns the inverse permutation.
@@ -218,12 +218,11 @@ class Permutation : public EnableLinOp<Permutation<IndexType>>,
      *          (if it resides on the same executor as the matrix) or a copy of
      *          the array on the correct executor.
      */
-    [[deprecated(
-        "use create_const without size and permute mask")]] static std::
-        unique_ptr<const Permutation>
-        create_const(std::shared_ptr<const Executor> exec, size_type size,
-                     gko::detail::const_array_view<IndexType>&& perm_idxs,
-                     mask_type enabled_permute = row_permute);
+    GKO_DEPRECATED("use create_const without size and permute mask")
+    static std::unique_ptr<const Permutation> create_const(
+        std::shared_ptr<const Executor> exec, size_type size,
+        gko::detail::const_array_view<IndexType>&& perm_idxs,
+        mask_type enabled_permute = row_permute);
     /**
      * Creates a constant (immutable) Permutation matrix from a constant array.
      *
@@ -263,19 +262,19 @@ class Permutation : public EnableLinOp<Permutation<IndexType>>,
     Permutation(std::shared_ptr<const Executor> exec,
                 array<IndexType> permutation_indices);
 
-    [[deprecated(
+    GKO_DEPRECATED(
         "dim<2> is no longer supported as a dimension parameter, use size_type "
-        "instead")]] Permutation(std::shared_ptr<const Executor> exec,
-                                 const dim<2>& size);
+        "instead")
+    Permutation(std::shared_ptr<const Executor> exec, const dim<2>& size);
 
-    [[deprecated("permute mask is no longer supported")]] Permutation(
-        std::shared_ptr<const Executor> exec, const dim<2>& size,
-        const mask_type& enabled_permute);
+    GKO_DEPRECATED("permute mask is no longer supported")
+    Permutation(std::shared_ptr<const Executor> exec, const dim<2>& size,
+                const mask_type& enabled_permute);
 
     template <typename IndicesArray>
-    [[deprecated("use the overload without dimensions")]] Permutation(
-        std::shared_ptr<const Executor> exec, const dim<2>& size,
-        IndicesArray&& permutation_indices)
+    GKO_DEPRECATED("use the overload without dimensions")
+    Permutation(std::shared_ptr<const Executor> exec, const dim<2>& size,
+                IndicesArray&& permutation_indices)
         : Permutation{exec, array<IndexType>{exec, std::forward<IndicesArray>(
                                                        permutation_indices)}}
     {
@@ -284,9 +283,10 @@ class Permutation : public EnableLinOp<Permutation<IndexType>>,
     }
 
     template <typename IndicesArray>
-    [[deprecated("permute mask is no longer supported")]] Permutation(
-        std::shared_ptr<const Executor> exec, const dim<2>& size,
-        IndicesArray&& permutation_indices, const mask_type& enabled_permute)
+    GKO_DEPRECATED("permute mask is no longer supported")
+    Permutation(std::shared_ptr<const Executor> exec, const dim<2>& size,
+                IndicesArray&& permutation_indices,
+                const mask_type& enabled_permute)
         : Permutation{std::move(exec),
                       array<IndexType>{exec, std::forward<IndicesArray>(
                                                  permutation_indices)}}
diff --git a/include/ginkgo/core/multigrid/pgm.hpp b/include/ginkgo/core/multigrid/pgm.hpp
index a90507ce740..5856b53d108 100644
--- a/include/ginkgo/core/multigrid/pgm.hpp
+++ b/include/ginkgo/core/multigrid/pgm.hpp
@@ -196,10 +196,9 @@ class Pgm : public EnableLinOp<Pgm<ValueType, IndexType>>,
 
 
 template <typename ValueType = default_precision, typename IndexType = int32>
-using AmgxPgm
-    [[deprecated("This class is deprecated and will be removed in the next "
-                 "major release. Please use Pgm instead.")]] =
-        Pgm<ValueType, IndexType>;
+using AmgxPgm GKO_DEPRECATED(
+    "This class is deprecated and will be removed in the next "
+    "major release. Please use Pgm instead.") = Pgm<ValueType, IndexType>;
 
 
 }  // namespace multigrid
diff --git a/include/ginkgo/core/preconditioner/ic.hpp b/include/ginkgo/core/preconditioner/ic.hpp
index e0f39575f40..357be6b34df 100644
--- a/include/ginkgo/core/preconditioner/ic.hpp
+++ b/include/ginkgo/core/preconditioner/ic.hpp
@@ -134,8 +134,8 @@ class Ic : public EnableLinOp<Ic<LSolverType, IndexType>>, public Transposable {
          */
         std::shared_ptr<const LinOpFactory> factorization_factory{};
 
-        [[deprecated("use with_l_solver instead")]] parameters_type&
-        with_l_solver_factory(
+        GKO_DEPRECATED("use with_l_solver instead")
+        parameters_type& with_l_solver_factory(
             deferred_factory_parameter<const typename l_solver_type::Factory>
                 solver)
         {
@@ -157,8 +157,8 @@ class Ic : public EnableLinOp<Ic<LSolverType, IndexType>>, public Transposable {
             return *this;
         }
 
-        [[deprecated("use with_factorization instead")]] parameters_type&
-        with_factorization_factory(
+        GKO_DEPRECATED("use with_factorization instead")
+        parameters_type& with_factorization_factory(
             deferred_factory_parameter<const LinOpFactory> factorization)
         {
             return with_factorization(std::move(factorization));
diff --git a/include/ginkgo/core/preconditioner/ilu.hpp b/include/ginkgo/core/preconditioner/ilu.hpp
index b81791ef117..ed4b68b5ef4 100644
--- a/include/ginkgo/core/preconditioner/ilu.hpp
+++ b/include/ginkgo/core/preconditioner/ilu.hpp
@@ -151,8 +151,8 @@ class Ilu : public EnableLinOp<
          */
         std::shared_ptr<const LinOpFactory> factorization_factory{};
 
-        [[deprecated("use with_l_solver instead")]] parameters_type&
-        with_l_solver_factory(
+        GKO_DEPRECATED("use with_l_solver instead")
+        parameters_type& with_l_solver_factory(
             deferred_factory_parameter<const typename l_solver_type::Factory>
                 solver)
         {
@@ -174,8 +174,8 @@ class Ilu : public EnableLinOp<
             return *this;
         }
 
-        [[deprecated("use with_u_solver instead")]] parameters_type&
-        with_u_solver_factory(
+        GKO_DEPRECATED("use with_u_solver instead")
+        parameters_type& with_u_solver_factory(
             deferred_factory_parameter<const typename u_solver_type::Factory>
                 solver)
         {
@@ -197,8 +197,8 @@ class Ilu : public EnableLinOp<
             return *this;
         }
 
-        [[deprecated("use with_factorization instead")]] parameters_type&
-        with_factorization_factory(
+        GKO_DEPRECATED("use with_factorization instead")
+        parameters_type& with_factorization_factory(
             deferred_factory_parameter<const LinOpFactory> factorization)
         {
             return with_factorization(std::move(factorization));
diff --git a/include/ginkgo/core/reorder/rcm.hpp b/include/ginkgo/core/reorder/rcm.hpp
index 5556ae693e5..dcfb524f94b 100644
--- a/include/ginkgo/core/reorder/rcm.hpp
+++ b/include/ginkgo/core/reorder/rcm.hpp
@@ -97,7 +97,7 @@ enum class starting_strategy { minimum_degree, pseudo_peripheral };
  * @ingroup reorder
  */
 template <typename ValueType = default_precision, typename IndexType = int32>
-class [[deprecated("use gko::experimental::reorder::Rcm instead")]] Rcm
+class GKO_DEPRECATED("use gko::experimental::reorder::Rcm instead") Rcm
     : public EnablePolymorphicObject<Rcm<ValueType, IndexType>,
                                      ReorderingBase<IndexType>>,
       public EnablePolymorphicAssignment<Rcm<ValueType, IndexType>>
diff --git a/include/ginkgo/core/solver/idr.hpp b/include/ginkgo/core/solver/idr.hpp
index a7b8af31bf4..9c124cc184d 100644
--- a/include/ginkgo/core/solver/idr.hpp
+++ b/include/ginkgo/core/solver/idr.hpp
@@ -164,8 +164,8 @@ class Idr
      * @param other  the new complex_subspace parameter
      * @deprecated Please use set_complex_subspace instead
      */
-    [[deprecated("Use set_complex_subspace instead")]] void
-    set_complex_subpsace(const bool other)
+    GKO_DEPRECATED("Use set_complex_subspace instead")
+    void set_complex_subpsace(const bool other)
     {
         this->set_complex_subspace(other);
     }
diff --git a/include/ginkgo/core/solver/solver_base.hpp b/include/ginkgo/core/solver/solver_base.hpp
index f6c1b8833d7..cd0043c7b44 100644
--- a/include/ginkgo/core/solver/solver_base.hpp
+++ b/include/ginkgo/core/solver/solver_base.hpp
@@ -49,14 +49,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/stop/criterion.hpp>
 
 
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-#endif
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 5211, 4973, 4974)
-#endif
+GKO_BEGIN_DISABLE_DEPRECATION_WARNINGS
 
 
 namespace gko {
@@ -537,10 +530,9 @@ class SolverBaseLinOp {
 template <typename MatrixType>
 class
     // clang-format off
-    [[deprecated("This class will be replaced by the template-less detail::SolverBaseLinOp in a future release")]] SolverBase
+    GKO_DEPRECATED("This class will be replaced by the template-less detail::SolverBaseLinOp in a future release") SolverBase
     // clang-format on
-    : public detail::SolverBaseLinOp
-{
+    : public detail::SolverBaseLinOp {
 public:
     using detail::SolverBaseLinOp::SolverBaseLinOp;
 
@@ -898,10 +890,7 @@ struct enable_preconditioned_iterative_solver_factory_parameters
 }  // namespace gko
 
 
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic pop
-#endif
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
+GKO_END_DISABLE_DEPRECATION_WARNINGS
+
+
 #endif  // GKO_PUBLIC_CORE_SOLVER_SOLVER_BASE_HPP_
diff --git a/include/ginkgo/core/stop/residual_norm.hpp b/include/ginkgo/core/stop/residual_norm.hpp
index 094dcb79434..f13f60a213d 100644
--- a/include/ginkgo/core/stop/residual_norm.hpp
+++ b/include/ginkgo/core/stop/residual_norm.hpp
@@ -240,13 +240,7 @@ class ImplicitResidualNorm : public ResidualNormBase<ValueType> {
 // The following classes are deprecated, but they internally reference
 // themselves. To reduce unnecessary warnings, we disable deprecation warnings
 // for the definition of these classes.
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-#elif defined(_MSC_BUILD) || defined(__INTEL_LLVM_COMPILER)
-#pragma warning(push)
-#pragma warning(disable : 4996)
-#endif
+GKO_BEGIN_DISABLE_DEPRECATION_WARNINGS
 
 
 /**
@@ -269,11 +263,10 @@ class ImplicitResidualNorm : public ResidualNormBase<ValueType> {
  * @ingroup stop
  */
 template <typename ValueType = default_precision>
-class [[deprecated(
+class GKO_DEPRECATED(
     "Please use the class ResidualNorm with the factory parameter baseline = "
-    "mode::initial_resnorm")]] ResidualNormReduction
-    : public ResidualNormBase<ValueType>
-{
+    "mode::initial_resnorm") ResidualNormReduction
+    : public ResidualNormBase<ValueType> {
 public:
     using ComplexVector = matrix::Dense<to_complex<ValueType>>;
     using NormVector = matrix::Dense<remove_complex<ValueType>>;
@@ -326,11 +319,10 @@ class [[deprecated(
  * @ingroup stop
  */
 template <typename ValueType = default_precision>
-class [[deprecated(
+class GKO_DEPRECATED(
     "Please use the class ResidualNorm with the factory parameter baseline = "
-    "mode::rhs_norm")]] RelativeResidualNorm
-    : public ResidualNormBase<ValueType>
-{
+    "mode::rhs_norm") RelativeResidualNorm
+    : public ResidualNormBase<ValueType> {
 public:
     using ComplexVector = matrix::Dense<to_complex<ValueType>>;
     using NormVector = matrix::Dense<remove_complex<ValueType>>;
@@ -381,11 +373,10 @@ class [[deprecated(
  * @ingroup stop
  */
 template <typename ValueType = default_precision>
-class [[deprecated(
+class GKO_DEPRECATED(
     "Please use the class ResidualNorm with the factory parameter baseline = "
-    "mode::absolute")]] AbsoluteResidualNorm
-    : public ResidualNormBase<ValueType>
-{
+    "mode::absolute") AbsoluteResidualNorm
+    : public ResidualNormBase<ValueType> {
 public:
     using NormVector = matrix::Dense<remove_complex<ValueType>>;
     using Vector = matrix::Dense<ValueType>;
@@ -417,11 +408,7 @@ class [[deprecated(
 };
 
 
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic pop
-#elif defined(_MSC_BUILD) || defined(__INTEL_LLVM_COMPILER)
-#pragma warning(pop)
-#endif
+GKO_END_DISABLE_DEPRECATION_WARNINGS
 
 
 }  // namespace stop
diff --git a/omp/test/reorder/rcm_kernels.cpp b/omp/test/reorder/rcm_kernels.cpp
index 66aff074d65..f40958b3f2b 100644
--- a/omp/test/reorder/rcm_kernels.cpp
+++ b/omp/test/reorder/rcm_kernels.cpp
@@ -33,15 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/reorder/rcm.hpp>
 
 
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-
-#endif
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 5211, 4973, 4974)
-#endif
+GKO_BEGIN_DISABLE_DEPRECATION_WARNINGS
 
 
 #include <algorithm>
diff --git a/reference/test/reorder/rcm.cpp b/reference/test/reorder/rcm.cpp
index 704f4a342ed..c13cd2813f0 100644
--- a/reference/test/reorder/rcm.cpp
+++ b/reference/test/reorder/rcm.cpp
@@ -33,17 +33,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/reorder/rcm.hpp>
 
 
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-
-#endif
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 5211, 4973, 4974)
-#endif
-
-
 #include <algorithm>
 #include <fstream>
 #include <memory>
@@ -62,6 +51,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/test/utils/assertions.hpp"
 
 
+GKO_BEGIN_DISABLE_DEPRECATION_WARNINGS
+
+
 namespace {
 
 
diff --git a/reference/test/reorder/rcm_kernels.cpp b/reference/test/reorder/rcm_kernels.cpp
index 28cdab3a793..b5c427f08dd 100644
--- a/reference/test/reorder/rcm_kernels.cpp
+++ b/reference/test/reorder/rcm_kernels.cpp
@@ -50,15 +50,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/test/utils/assertions.hpp"
 
 
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-
-#endif
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 5211, 4973, 4974)
-#endif
+GKO_BEGIN_DISABLE_DEPRECATION_WARNINGS
 
 
 namespace {
@@ -191,9 +183,4 @@ TEST_F(Rcm, NewInterfaceWorksOnNonsymmetric)
 }  // namespace
 
 
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic pop
-#endif
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
+GKO_END_DISABLE_DEPRECATION_WARNINGS
diff --git a/reference/test/reorder/scaled_reordered.cpp b/reference/test/reorder/scaled_reordered.cpp
index 598073f4918..7200f587d02 100644
--- a/reference/test/reorder/scaled_reordered.cpp
+++ b/reference/test/reorder/scaled_reordered.cpp
@@ -54,15 +54,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/test/utils.hpp"
 
 
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-
-#endif
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 5211, 4973, 4974)
-#endif
+GKO_BEGIN_DISABLE_DEPRECATION_WARNINGS
 
 
 namespace {
@@ -581,9 +573,4 @@ TYPED_TEST(ScaledReordered, SolvesMultipleRhs)
 }  // namespace
 
 
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic pop
-#endif
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
+GKO_END_DISABLE_DEPRECATION_WARNINGS

From 1aaf9cbb99355b4fccf795d68c6d21601eb7fcf7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Gr=C3=BCtzmacher?= <thomas.gruetzmacher@kit.edu>
Date: Sun, 5 Nov 2023 00:05:18 -0400
Subject: [PATCH 519/583] Remove Rcm deprecation warning

In order to remove false-positive Rcm deprecation warnings wherever the
file was included, the deprecation marking was removed.
Additionally, each deprecation disable of Rcm was removed.
Finally, remove an old deprecation warning in the nested dissection.
---
 core/reorder/nested_dissection.cpp     |  3 +--
 core/reorder/rcm.cpp                   |  6 ------
 core/test/preconditioner/ic.cpp        |  3 +++
 core/test/preconditioner/ilu.cpp       |  3 +++
 core/test/reorder/rcm.cpp              |  6 +++---
 cuda/test/reorder/rcm_kernels.cpp      |  3 ---
 include/ginkgo/core/reorder/rcm.hpp    |  8 +++-----
 omp/test/reorder/rcm_kernels.cpp       |  3 ---
 reference/test/reorder/rcm.cpp         | 11 ++++-------
 reference/test/reorder/rcm_kernels.cpp |  6 ------
 10 files changed, 17 insertions(+), 35 deletions(-)

diff --git a/core/reorder/nested_dissection.cpp b/core/reorder/nested_dissection.cpp
index caf85c979e5..2f501ad1d96 100644
--- a/core/reorder/nested_dissection.cpp
+++ b/core/reorder/nested_dissection.cpp
@@ -183,8 +183,7 @@ std::unique_ptr<LinOp> NestedDissection<ValueType, IndexType>::generate_impl(
                             inv_permutation.get_data()));
     permutation.set_executor(exec);
     // we discard the inverse permutation
-    return permutation_type::create(exec, dim<2>{num_rows, num_rows},
-                                    std::move(permutation));
+    return permutation_type::create(exec, std::move(permutation));
 }
 
 
diff --git a/core/reorder/rcm.cpp b/core/reorder/rcm.cpp
index c29c6fa9219..fcc76676871 100644
--- a/core/reorder/rcm.cpp
+++ b/core/reorder/rcm.cpp
@@ -81,9 +81,6 @@ void rcm_reorder(const matrix::SparsityCsr<ValueType, IndexType>* mtx,
 }
 
 
-GKO_BEGIN_DISABLE_DEPRECATION_WARNINGS
-
-
 template <typename ValueType, typename IndexType>
 Rcm<ValueType, IndexType>::Rcm(std::shared_ptr<const Executor> exec)
     : EnablePolymorphicObject<Rcm, ReorderingBase<IndexType>>(std::move(exec))
@@ -156,9 +153,6 @@ Rcm<ValueType, IndexType>::Rcm(const Factory* factory,
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_RCM);
 
 
-GKO_END_DISABLE_DEPRECATION_WARNINGS
-
-
 }  // namespace reorder
 
 
diff --git a/core/test/preconditioner/ic.cpp b/core/test/preconditioner/ic.cpp
index 5480fab2d55..654bbab610e 100644
--- a/core/test/preconditioner/ic.cpp
+++ b/core/test/preconditioner/ic.cpp
@@ -129,3 +129,6 @@ TEST_F(IcFactory, DeferredFactoryParameter)
 
 
 }  // namespace
+
+
+GKO_END_DISABLE_DEPRECATION_WARNINGS
diff --git a/core/test/preconditioner/ilu.cpp b/core/test/preconditioner/ilu.cpp
index 00309231d60..b9c8884a6f6 100644
--- a/core/test/preconditioner/ilu.cpp
+++ b/core/test/preconditioner/ilu.cpp
@@ -147,3 +147,6 @@ TEST_F(IluFactory, DeferredFactoryParameter)
 
 
 }  // namespace
+
+
+GKO_END_DISABLE_DEPRECATION_WARNINGS
diff --git a/core/test/reorder/rcm.cpp b/core/test/reorder/rcm.cpp
index 90d74c6bed2..8a8450aa441 100644
--- a/core/test/reorder/rcm.cpp
+++ b/core/test/reorder/rcm.cpp
@@ -33,9 +33,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/reorder/rcm.hpp>
 
 
-GKO_BEGIN_DISABLE_DEPRECATION_WARNINGS
-
-
 #include <memory>
 
 
@@ -50,6 +47,7 @@ GKO_BEGIN_DISABLE_DEPRECATION_WARNINGS
 
 namespace {
 
+
 class Rcm : public ::testing::Test {
 protected:
     using v_type = double;
@@ -66,6 +64,7 @@ class Rcm : public ::testing::Test {
     std::unique_ptr<reorder_type::Factory> rcm_factory;
 };
 
+
 TEST_F(Rcm, RcmFactoryKnowsItsExecutor)
 {
     ASSERT_EQ(this->rcm_factory->get_executor(), this->exec);
@@ -92,4 +91,5 @@ TEST_F(Rcm, NewInterfaceSetParameters)
     ASSERT_EQ(param.strategy, gko::reorder::starting_strategy::minimum_degree);
 }
 
+
 }  // namespace
diff --git a/cuda/test/reorder/rcm_kernels.cpp b/cuda/test/reorder/rcm_kernels.cpp
index b3ebe183368..828c2f8f5bb 100644
--- a/cuda/test/reorder/rcm_kernels.cpp
+++ b/cuda/test/reorder/rcm_kernels.cpp
@@ -33,9 +33,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/reorder/rcm.hpp>
 
 
-GKO_BEGIN_DISABLE_DEPRECATION_WARNINGS
-
-
 #include <gtest/gtest.h>
 
 
diff --git a/include/ginkgo/core/reorder/rcm.hpp b/include/ginkgo/core/reorder/rcm.hpp
index dcfb524f94b..7c9b7751008 100644
--- a/include/ginkgo/core/reorder/rcm.hpp
+++ b/include/ginkgo/core/reorder/rcm.hpp
@@ -97,11 +97,9 @@ enum class starting_strategy { minimum_degree, pseudo_peripheral };
  * @ingroup reorder
  */
 template <typename ValueType = default_precision, typename IndexType = int32>
-class GKO_DEPRECATED("use gko::experimental::reorder::Rcm instead") Rcm
-    : public EnablePolymorphicObject<Rcm<ValueType, IndexType>,
-                                     ReorderingBase<IndexType>>,
-      public EnablePolymorphicAssignment<Rcm<ValueType, IndexType>>
-{
+class Rcm : public EnablePolymorphicObject<Rcm<ValueType, IndexType>,
+                                           ReorderingBase<IndexType>>,
+            public EnablePolymorphicAssignment<Rcm<ValueType, IndexType>> {
     friend class EnablePolymorphicObject<Rcm, ReorderingBase<IndexType>>;
 
 public:
diff --git a/omp/test/reorder/rcm_kernels.cpp b/omp/test/reorder/rcm_kernels.cpp
index f40958b3f2b..9c0797a8812 100644
--- a/omp/test/reorder/rcm_kernels.cpp
+++ b/omp/test/reorder/rcm_kernels.cpp
@@ -33,9 +33,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/reorder/rcm.hpp>
 
 
-GKO_BEGIN_DISABLE_DEPRECATION_WARNINGS
-
-
 #include <algorithm>
 #include <deque>
 #include <fstream>
diff --git a/reference/test/reorder/rcm.cpp b/reference/test/reorder/rcm.cpp
index c13cd2813f0..3c2286178b7 100644
--- a/reference/test/reorder/rcm.cpp
+++ b/reference/test/reorder/rcm.cpp
@@ -51,9 +51,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/test/utils/assertions.hpp"
 
 
-GKO_BEGIN_DISABLE_DEPRECATION_WARNINGS
-
-
 namespace {
 
 
@@ -72,12 +69,12 @@ class Rcm : public ::testing::Test {
           rcm_factory(reorder_type::build().on(exec)),
           // clang-format off
           id3_mtx(gko::initialize<CsrMtx>(
-              {{1.0, 0.0, 0.0}, 
-              {0.0, 1.0, 0.0}, 
+              {{1.0, 0.0, 0.0},
+              {0.0, 1.0, 0.0},
               {0.0, 0.0, 1.0}}, exec)),
           not_id3_mtx(gko::initialize<CsrMtx>(
-              {{1.0, 0.0, 1.0}, 
-              {0.0, 1.0, 0.0}, 
+              {{1.0, 0.0, 1.0},
+              {0.0, 1.0, 0.0},
               {1.0, 0.0, 1.0}}, exec)),
           // clang-format on
           reorder_op(rcm_factory->generate(id3_mtx))
diff --git a/reference/test/reorder/rcm_kernels.cpp b/reference/test/reorder/rcm_kernels.cpp
index b5c427f08dd..ec43de5e1f6 100644
--- a/reference/test/reorder/rcm_kernels.cpp
+++ b/reference/test/reorder/rcm_kernels.cpp
@@ -50,9 +50,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/test/utils/assertions.hpp"
 
 
-GKO_BEGIN_DISABLE_DEPRECATION_WARNINGS
-
-
 namespace {
 
 
@@ -181,6 +178,3 @@ TEST_F(Rcm, NewInterfaceWorksOnNonsymmetric)
 
 
 }  // namespace
-
-
-GKO_END_DISABLE_DEPRECATION_WARNINGS

From a80f5d7d626c49f5de818fa8486a6e1bf988bdaf Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Sun, 5 Nov 2023 08:30:20 +0000
Subject: [PATCH 520/583] Format files

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 include/ginkgo/core/log/papi.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/ginkgo/core/log/papi.hpp b/include/ginkgo/core/log/papi.hpp
index a2d188bed47..ff459858a8d 100644
--- a/include/ginkgo/core/log/papi.hpp
+++ b/include/ginkgo/core/log/papi.hpp
@@ -40,11 +40,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #if GKO_HAVE_PAPI_SDE
 
 
-#include <sde_lib.h>
 #include <cstddef>
 #include <iostream>
 #include <map>
 #include <mutex>
+#include <sde_lib.h>
 
 
 #include <ginkgo/core/base/polymorphic_object.hpp>

From 7a6f751cb059746154e37e5b1334e658e34a81f8 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Thu, 26 Oct 2023 23:05:15 +0200
Subject: [PATCH 521/583] Add cuda batch bicgstab kernels

Co-authored-by: Aditya Kashi <kashia@ornl.gov>
Co-authored-by: Isha Aggarwal <isha.aggarwal2@kit.edu>
---
 .../base/batch_multi_vector_kernels.hpp.inc   |  83 +++-
 .../solver/batch_bicgstab_kernels.hpp.inc     | 378 ++++++++++++++++++
 core/base/batch_struct.hpp                    |   9 +
 core/solver/batch_bicgstab_kernels.hpp        |  97 +++++
 core/test/utils/batch_helpers.hpp             |   4 +-
 cuda/base/exception.cuh                       |  56 +++
 cuda/base/kernel_config.cuh                   |  65 +++
 cuda/solver/batch_bicgstab_kernels.cu         | 231 ++++++++++-
 test/solver/CMakeLists.txt                    |   2 +-
 test/solver/batch_bicgstab_kernels.cpp        |  76 ++--
 10 files changed, 959 insertions(+), 42 deletions(-)
 create mode 100644 common/cuda_hip/solver/batch_bicgstab_kernels.hpp.inc
 create mode 100644 cuda/base/exception.cuh
 create mode 100644 cuda/base/kernel_config.cuh

diff --git a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
index 9f77598ff5a..779e2ab0e68 100644
--- a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
+++ b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
@@ -47,10 +47,15 @@ __device__ __forceinline__ void scale(
 }
 
 template <typename ValueType, typename Mapping>
-__global__
-__launch_bounds__(default_block_size, sm_oversubscription) void scale_kernel(
-    const gko::batch::multi_vector::uniform_batch<const ValueType> alpha,
-    const gko::batch::multi_vector::uniform_batch<ValueType> x, Mapping map)
+__global__ __launch_bounds__(
+    default_block_size,
+    sm_oversubscription) void scale_kernel(const gko::batch::multi_vector::
+                                               uniform_batch<const ValueType>
+                                                   alpha,
+                                           const gko::batch::multi_vector::
+                                               uniform_batch<ValueType>
+                                                   x,
+                                           Mapping map)
 {
     for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_items;
          batch_id += gridDim.x) {
@@ -103,6 +108,28 @@ __global__ __launch_bounds__(
 }
 
 
+template <typename Group, typename ValueType>
+__device__ __forceinline__ void single_rhs_compute_dot(Group subgroup,
+                                                       const int num_rows,
+                                                       const ValueType* x,
+                                                       const ValueType* y,
+                                                       ValueType& result)
+
+{
+    ValueType val = zero<ValueType>();
+    for (int r = subgroup.thread_rank(); r < num_rows; r += subgroup.size()) {
+        val += conj(x[r]) * y[r];
+    }
+
+    // subgroup level reduction
+    val = reduce(subgroup, val, thrust::plus<ValueType>{});
+
+    if (subgroup.thread_rank() == 0) {
+        result = val;
+    }
+}
+
+
 template <typename Group, typename ValueType, typename Mapping>
 __device__ __forceinline__ void gen_one_dot(
     const gko::batch::multi_vector::batch_item<const ValueType>& x,
@@ -149,11 +176,11 @@ __device__ __forceinline__ void compute_gen_dot_product(
 
 template <typename ValueType, typename Mapping>
 __global__
-__launch_bounds__(default_block_size, sm_oversubscription) void compute_gen_dot_product_kernel(
-    const gko::batch::multi_vector::uniform_batch<const ValueType> x,
-    const gko::batch::multi_vector::uniform_batch<const ValueType> y,
-    const gko::batch::multi_vector::uniform_batch<ValueType> result,
-    Mapping map)
+    __launch_bounds__(default_block_size, sm_oversubscription) void compute_gen_dot_product_kernel(
+        const gko::batch::multi_vector::uniform_batch<const ValueType> x,
+        const gko::batch::multi_vector::uniform_batch<const ValueType> y,
+        const gko::batch::multi_vector::uniform_batch<ValueType> result,
+        Mapping map)
 {
     for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_items;
          batch_id += gridDim.x) {
@@ -165,6 +192,27 @@ __launch_bounds__(default_block_size, sm_oversubscription) void compute_gen_dot_
 }
 
 
+template <typename Group, typename ValueType>
+__device__ __forceinline__ void single_rhs_compute_norm2(
+    Group subgroup, const int num_rows, const ValueType* x,
+    remove_complex<ValueType>& result)
+{
+    using real_type = typename gko::remove_complex<ValueType>;
+    real_type val = zero<real_type>();
+
+    for (int r = subgroup.thread_rank(); r < num_rows; r += subgroup.size()) {
+        val += squared_norm(x[r]);
+    }
+
+    // subgroup level reduction
+    val = reduce(subgroup, val, thrust::plus<remove_complex<ValueType>>{});
+
+    if (subgroup.thread_rank() == 0) {
+        result = sqrt(val);
+    }
+}
+
+
 template <typename Group, typename ValueType>
 __device__ __forceinline__ void one_norm2(
     const gko::batch::multi_vector::batch_item<const ValueType>& x,
@@ -238,6 +286,17 @@ __global__ __launch_bounds__(
 }
 
 
+template <typename ValueType>
+__device__ __forceinline__ void single_rhs_copy(const int num_rows,
+                                                const ValueType* in,
+                                                ValueType* out)
+{
+    for (int iz = threadIdx.x; iz < num_rows; iz += blockDim.x) {
+        out[iz] = in[iz];
+    }
+}
+
+
 /**
  * Copies the values of one multi-vector into another.
  *
@@ -260,9 +319,9 @@ __device__ __forceinline__ void copy(
 
 template <typename ValueType>
 __global__
-__launch_bounds__(default_block_size, sm_oversubscription) void copy_kernel(
-    const gko::batch::multi_vector::uniform_batch<const ValueType> src,
-    const gko::batch::multi_vector::uniform_batch<ValueType> dst)
+    __launch_bounds__(default_block_size, sm_oversubscription) void copy_kernel(
+        const gko::batch::multi_vector::uniform_batch<const ValueType> src,
+        const gko::batch::multi_vector::uniform_batch<ValueType> dst)
 {
     for (size_type batch_id = blockIdx.x; batch_id < src.num_batch_items;
          batch_id += gridDim.x) {
diff --git a/common/cuda_hip/solver/batch_bicgstab_kernels.hpp.inc b/common/cuda_hip/solver/batch_bicgstab_kernels.hpp.inc
new file mode 100644
index 00000000000..a4a57d99f01
--- /dev/null
+++ b/common/cuda_hip/solver/batch_bicgstab_kernels.hpp.inc
@@ -0,0 +1,378 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+
+template <typename Group, typename BatchMatrixType_entry, typename ValueType>
+__device__ __forceinline__ void initialize(
+    Group subgroup, const int num_rows, const BatchMatrixType_entry& mat_entry,
+    const ValueType* const b_global_entry,
+    const ValueType* const x_global_entry, ValueType& rho_old, ValueType& omega,
+    ValueType& alpha, ValueType* const x_shared_entry,
+    ValueType* const r_shared_entry, ValueType* const r_hat_shared_entry,
+    ValueType* const p_shared_entry, ValueType* const v_shared_entry,
+    typename gko::remove_complex<ValueType>& rhs_norm,
+    typename gko::remove_complex<ValueType>& res_norm)
+{
+    rho_old = one<ValueType>();
+    omega = one<ValueType>();
+    alpha = one<ValueType>();
+
+    // copy x from global to shared memory
+    // r = b
+    for (int iz = threadIdx.x; iz < num_rows; iz += blockDim.x) {
+        x_shared_entry[iz] = x_global_entry[iz];
+        r_shared_entry[iz] = b_global_entry[iz];
+    }
+    __syncthreads();
+
+    // r = b - A*x
+    advanced_apply(static_cast<ValueType>(-1.0), mat_entry, x_shared_entry,
+                   static_cast<ValueType>(1.0), r_shared_entry);
+    __syncthreads();
+
+    if (threadIdx.x / config::warp_size == 0) {
+        single_rhs_compute_norm2(subgroup, num_rows, r_shared_entry, res_norm);
+    } else if (threadIdx.x / config::warp_size == 1) {
+        // Compute norms of rhs
+        single_rhs_compute_norm2(subgroup, num_rows, b_global_entry, rhs_norm);
+    }
+    __syncthreads();
+
+    for (int iz = threadIdx.x; iz < num_rows; iz += blockDim.x) {
+        r_hat_shared_entry[iz] = r_shared_entry[iz];
+        p_shared_entry[iz] = zero<ValueType>();
+        v_shared_entry[iz] = zero<ValueType>();
+    }
+}
+
+
+template <typename ValueType>
+__device__ __forceinline__ void update_p(
+    const int num_rows, const ValueType& rho_new, const ValueType& rho_old,
+    const ValueType& alpha, const ValueType& omega,
+    const ValueType* const r_shared_entry,
+    const ValueType* const v_shared_entry, ValueType* const p_shared_entry)
+{
+    for (int r = threadIdx.x; r < num_rows; r += blockDim.x) {
+        const ValueType beta = (rho_new / rho_old) * (alpha / omega);
+        p_shared_entry[r] =
+            r_shared_entry[r] +
+            beta * (p_shared_entry[r] - omega * v_shared_entry[r]);
+    }
+}
+
+template <typename Group, typename ValueType>
+__device__ __forceinline__ void compute_alpha(
+    Group subgroup, const int num_rows, const ValueType& rho_new,
+    const ValueType* const r_hat_shared_entry,
+    const ValueType* const v_shared_entry, ValueType& alpha)
+{
+    if (threadIdx.x / config::warp_size == 0) {
+        single_rhs_compute_dot(subgroup, num_rows, r_hat_shared_entry,
+                               v_shared_entry, alpha);
+    }
+    __syncthreads();
+    if (threadIdx.x == 0) {
+        alpha = rho_new / alpha;
+    }
+}
+
+
+template <typename ValueType>
+__device__ __forceinline__ void update_s(const int num_rows,
+                                         const ValueType* const r_shared_entry,
+                                         const ValueType& alpha,
+                                         const ValueType* const v_shared_entry,
+                                         ValueType* const s_shared_entry)
+{
+    for (int r = threadIdx.x; r < num_rows; r += blockDim.x) {
+        s_shared_entry[r] = r_shared_entry[r] - alpha * v_shared_entry[r];
+    }
+}
+
+
+template <typename Group, typename ValueType>
+__device__ __forceinline__ void compute_omega(
+    Group subgroup, const int num_rows, const ValueType* const t_shared_entry,
+    const ValueType* const s_shared_entry, ValueType& temp, ValueType& omega)
+{
+    if (threadIdx.x / config::warp_size == 0) {
+        single_rhs_compute_dot(subgroup, num_rows, t_shared_entry,
+                               s_shared_entry, omega);
+    } else if (threadIdx.x / config::warp_size == 1) {
+        single_rhs_compute_dot(subgroup, num_rows, t_shared_entry,
+                               t_shared_entry, temp);
+    }
+
+    __syncthreads();
+    if (threadIdx.x == 0) {
+        omega /= temp;
+    }
+}
+
+template <typename ValueType>
+__device__ __forceinline__ void update_x_and_r(
+    const int num_rows, const ValueType* const p_hat_shared_entry,
+    const ValueType* const s_hat_shared_entry, const ValueType& alpha,
+    const ValueType& omega, const ValueType* const s_shared_entry,
+    const ValueType* const t_shared_entry, ValueType* const x_shared_entry,
+    ValueType* const r_shared_entry)
+{
+    for (int r = threadIdx.x; r < num_rows; r += blockDim.x) {
+        x_shared_entry[r] = x_shared_entry[r] + alpha * p_hat_shared_entry[r] +
+                            omega * s_hat_shared_entry[r];
+        r_shared_entry[r] = s_shared_entry[r] - omega * t_shared_entry[r];
+    }
+}
+
+
+template <typename ValueType>
+__device__ __forceinline__ void update_x_middle(
+    const int num_rows, const ValueType& alpha,
+    const ValueType* const p_hat_shared_entry, ValueType* const x_shared_entry)
+{
+    for (int r = threadIdx.x; r < num_rows; r += blockDim.x) {
+        x_shared_entry[r] = x_shared_entry[r] + alpha * p_hat_shared_entry[r];
+    }
+}
+
+
+template <typename StopType, int n_shared, bool prec_shared_bool,
+          typename PrecType, typename LogType, typename BatchMatrixType,
+          typename ValueType>
+__global__ void apply_kernel(
+    const gko::kernels::batch_bicgstab::storage_config sconf,
+    const int max_iter, const gko::remove_complex<ValueType> tol,
+    LogType logger, PrecType prec_shared, const BatchMatrixType mat,
+    const ValueType* const __restrict__ b, ValueType* const __restrict__ x,
+    ValueType* const __restrict__ workspace = nullptr)
+{
+    using real_type = typename gko::remove_complex<ValueType>;
+    const auto num_batch_items = mat.num_batch_items;
+    const auto num_rows = mat.num_rows;
+
+    constexpr auto tile_size = config::warp_size;
+    auto thread_block = group::this_thread_block();
+    auto subgroup = group::tiled_partition<tile_size>(thread_block);
+
+    for (int batch_id = blockIdx.x; batch_id < num_batch_items;
+         batch_id += gridDim.x) {
+        const int gmem_offset =
+            batch_id * sconf.gmem_stride_bytes / sizeof(ValueType);
+        extern __shared__ char local_mem_sh[];
+
+        ValueType* p_hat_sh;
+        ValueType* s_hat_sh;
+        ValueType* p_sh;
+        ValueType* s_sh;
+        ValueType* r_sh;
+        ValueType* r_hat_sh;
+        ValueType* v_sh;
+        ValueType* t_sh;
+        ValueType* x_sh;
+        ValueType* prec_work_sh;
+
+        if (n_shared >= 1) {
+            p_hat_sh = reinterpret_cast<ValueType*>(local_mem_sh);
+        } else {
+            p_hat_sh = workspace + gmem_offset;
+        }
+        if (n_shared == 1) {
+            s_hat_sh = workspace + gmem_offset;
+        } else {
+            s_hat_sh = p_hat_sh + sconf.padded_vec_len;
+        }
+        if (n_shared == 2) {
+            v_sh = workspace + gmem_offset;
+        } else {
+            v_sh = s_hat_sh + sconf.padded_vec_len;
+        }
+        if (n_shared == 3) {
+            t_sh = workspace + gmem_offset;
+        } else {
+            t_sh = v_sh + sconf.padded_vec_len;
+        }
+        if (n_shared == 4) {
+            p_sh = workspace + gmem_offset;
+        } else {
+            p_sh = t_sh + sconf.padded_vec_len;
+        }
+        if (n_shared == 5) {
+            s_sh = workspace + gmem_offset;
+        } else {
+            s_sh = p_sh + sconf.padded_vec_len;
+        }
+        if (n_shared == 6) {
+            r_sh = workspace + gmem_offset;
+        } else {
+            r_sh = s_sh + sconf.padded_vec_len;
+        }
+        if (n_shared == 7) {
+            r_hat_sh = workspace + gmem_offset;
+        } else {
+            r_hat_sh = r_sh + sconf.padded_vec_len;
+        }
+        if (n_shared == 8) {
+            x_sh = workspace + gmem_offset;
+        } else {
+            x_sh = r_hat_sh + sconf.padded_vec_len;
+        }
+        if (!prec_shared_bool && n_shared == 9) {
+            prec_work_sh = workspace + gmem_offset;
+        } else {
+            prec_work_sh = x_sh + sconf.padded_vec_len;
+        }
+
+        __shared__ uninitialized_array<ValueType, 1> rho_old_sh;
+        __shared__ uninitialized_array<ValueType, 1> rho_new_sh;
+        __shared__ uninitialized_array<ValueType, 1> omega_sh;
+        __shared__ uninitialized_array<ValueType, 1> alpha_sh;
+        __shared__ uninitialized_array<ValueType, 1> temp_sh;
+        __shared__ real_type norms_rhs_sh[1];
+        __shared__ real_type norms_res_sh[1];
+
+        const auto mat_entry =
+            gko::batch::matrix::extract_batch_item(mat, batch_id);
+        const ValueType* const b_entry_ptr =
+            gko::batch::multi_vector::batch_item_ptr(b, 1, num_rows, batch_id);
+        ValueType* const x_gl_entry_ptr =
+            gko::batch::multi_vector::batch_item_ptr(x, 1, num_rows, batch_id);
+
+        // generate preconditioner
+        prec_shared.generate(batch_id, mat_entry, prec_work_sh);
+
+        // initialization
+        // rho_old = 1, omega = 1, alpha = 1
+        // compute b norms
+        // copy x from global to shared memory
+        // r = b - A*x
+        // compute residual norms
+        // r_hat = r
+        // p = 0
+        // v = 0
+        initialize(subgroup, num_rows, mat_entry, b_entry_ptr, x_gl_entry_ptr,
+                   rho_old_sh[0], omega_sh[0], alpha_sh[0], x_sh, r_sh,
+                   r_hat_sh, p_sh, v_sh, norms_rhs_sh[0], norms_res_sh[0]);
+        __syncthreads();
+
+        // stopping criterion object
+        StopType stop(tol, norms_rhs_sh);
+
+        int iter = 0;
+        for (; iter < max_iter; iter++) {
+            if (stop.check_converged(norms_res_sh)) {
+                logger.log_iteration(batch_id, iter, norms_res_sh[0]);
+                break;
+            }
+
+            // rho_new =  < r_hat , r > = (r_hat)' * (r)
+            if (threadIdx.x / config::warp_size == 0) {
+                single_rhs_compute_dot(subgroup, num_rows, r_hat_sh, r_sh,
+                                       rho_new_sh[0]);
+            }
+            __syncthreads();
+
+            // beta = (rho_new / rho_old)*(alpha / omega)
+            // p = r + beta*(p - omega * v)
+            update_p(num_rows, rho_new_sh[0], rho_old_sh[0], alpha_sh[0],
+                     omega_sh[0], r_sh, v_sh, p_sh);
+            __syncthreads();
+
+            // p_hat = precond * p
+            prec_shared.apply(num_rows, p_sh, p_hat_sh);
+            __syncthreads();
+
+            // v = A * p_hat
+            simple_apply(mat_entry, p_hat_sh, v_sh);
+            __syncthreads();
+
+            // alpha = rho_new / < r_hat , v>
+            compute_alpha(subgroup, num_rows, rho_new_sh[0], r_hat_sh, v_sh,
+                          alpha_sh[0] /*, converged*/);
+            __syncthreads();
+
+            // s = r - alpha*v
+            update_s(num_rows, r_sh, alpha_sh[0], v_sh, s_sh /*, converged*/);
+            __syncthreads();
+
+            // an estimate of residual norms
+            if (threadIdx.x / config::warp_size == 0) {
+                single_rhs_compute_norm2(subgroup, num_rows, s_sh,
+                                         norms_res_sh[0]);
+            }
+            __syncthreads();
+
+            // if (norms_res_sh[0] / norms_rhs_sh[0] < tol) {
+            if (stop.check_converged(norms_res_sh)) {
+                update_x_middle(num_rows, alpha_sh[0], p_hat_sh, x_sh);
+                logger.log_iteration(batch_id, iter, norms_res_sh[0]);
+                break;
+            }
+
+            // s_hat = precond * s
+            prec_shared.apply(num_rows, s_sh, s_hat_sh);
+            __syncthreads();
+
+            // t = A * s_hat
+            simple_apply(mat_entry, s_hat_sh, t_sh);
+            __syncthreads();
+
+            // omega = <t,s> / <t,t>
+            compute_omega(subgroup, num_rows, t_sh, s_sh, temp_sh[0],
+                          omega_sh[0] /*, converged*/);
+            __syncthreads();
+
+            // x = x + alpha*p_hat + omega *s_hat
+            // r = s - omega * t
+            update_x_and_r(num_rows, p_hat_sh, s_hat_sh, alpha_sh[0],
+                           omega_sh[0], s_sh, t_sh, x_sh, r_sh /*, converged*/);
+            __syncthreads();
+
+            if (threadIdx.x / config::warp_size == 0) {
+                single_rhs_compute_norm2(subgroup, num_rows, r_sh,
+                                         norms_res_sh[0]);
+            }
+            //__syncthreads();
+
+            if (threadIdx.x == blockDim.x - 1) {
+                rho_old_sh[0] = rho_new_sh[0];
+            }
+            __syncthreads();
+        }
+
+        logger.log_iteration(batch_id, iter, norms_res_sh[0]);
+
+        // copy x back to global memory
+        single_rhs_copy(num_rows, x_sh, x_gl_entry_ptr);
+        __syncthreads();
+    }
+}
diff --git a/core/base/batch_struct.hpp b/core/base/batch_struct.hpp
index 975671739eb..041630af66e 100644
--- a/core/base/batch_struct.hpp
+++ b/core/base/batch_struct.hpp
@@ -78,6 +78,15 @@ struct uniform_batch {
 };
 
 
+template <typename ValueType>
+GKO_ATTRIBUTES GKO_INLINE ValueType* batch_item_ptr(
+    ValueType* const batch_start, const size_type stride, const int num_rows,
+    const size_type batch_idx)
+{
+    return batch_start + batch_idx * stride * num_rows;
+}
+
+
 }  // namespace multi_vector
 
 
diff --git a/core/solver/batch_bicgstab_kernels.hpp b/core/solver/batch_bicgstab_kernels.hpp
index 4689badeebd..ccde3aa6826 100644
--- a/core/solver/batch_bicgstab_kernels.hpp
+++ b/core/solver/batch_bicgstab_kernels.hpp
@@ -92,6 +92,103 @@ inline int local_memory_requirement(const int num_rows, const int num_rhs)
 }
 
 
+struct storage_config {
+    // preconditioner storage
+    bool prec_shared;
+    // total number of shared vectors
+    int n_shared;
+    // number of vectors in global memory
+    int n_global;
+    // global stride from one batch entry to the next
+    int gmem_stride_bytes;
+    // padded vector length
+    int padded_vec_len;
+};
+
+
+template <int align_bytes>
+void set_gmem_stride_bytes(storage_config& sconf,
+                           const int multi_vector_size_bytes,
+                           const int prec_storage_bytes)
+{
+    int gmem_stride = sconf.n_global * multi_vector_size_bytes;
+    if (!sconf.prec_shared) {
+        gmem_stride += prec_storage_bytes;
+    }
+    // align global memory chunks
+    sconf.gmem_stride_bytes =
+        gmem_stride > 0 ? ((gmem_stride - 1) / align_bytes + 1) * align_bytes
+                        : 0;
+}
+
+
+/**
+ * Calculates the amount of in-solver storage needed by batch-Bicgstab and
+ * the split between shared and global memory.
+ *
+ * The calculation includes multivectors for
+ * - r
+ * - r_hat
+ * - p
+ * - p_hat
+ * - v
+ * - s
+ * - s_hat
+ * - t
+ * - x
+ * In addition, small arrays are needed for
+ * - rho_old
+ * - rho_new
+ * - omega
+ * - alpha
+ * - temp
+ * - rhs_norms
+ * - res_norms
+ *
+ * @param shared_mem_per_blk  The amount of shared memory per block to use for
+ *   keeping intermediate vectors. In case keeping the matrix in L1 cache etc.
+ *   should be prioritized, the cache configuration must be updated separately
+ *   and the needed space should be subtracted before passing to this
+ *   function.
+ * @param num_rows  Size of the matrix.
+ * @param num_nz  Number of nonzeros in the matrix
+ * @param num_rhs  Number of right-hand-sides in the vectors.
+ * @return  A struct containing allocation information specific to Bicgstab.
+ */
+template <typename Prectype, typename ValueType, int align_bytes = 32>
+storage_config compute_shared_storage(const int shared_mem_per_blk,
+                                      const int num_rows, const int num_nz,
+                                      const int num_rhs)
+{
+    using real_type = remove_complex<ValueType>;
+    const int vec_size = num_rows * num_rhs * sizeof(ValueType);
+    const int num_main_vecs = 9;
+    const int prec_storage =
+        Prectype::dynamic_work_size(num_rows, num_nz) * sizeof(ValueType);
+    int rem_shared = shared_mem_per_blk;
+    storage_config sconf{false, 0, num_main_vecs, 0, num_rows};
+    if (rem_shared <= 0) {
+        set_gmem_stride_bytes<align_bytes>(sconf, vec_size, prec_storage);
+        return sconf;
+    }
+    const int initial_vecs_available = rem_shared / vec_size;
+    const int num_vecs_shared = min(initial_vecs_available, num_main_vecs);
+    sconf.n_shared += num_vecs_shared;
+    sconf.n_global -= num_vecs_shared;
+    if (sconf.n_global > 0) {
+        set_gmem_stride_bytes<align_bytes>(sconf, vec_size, prec_storage);
+        return sconf;
+    }
+    rem_shared -= num_vecs_shared * vec_size;
+    if (rem_shared >= prec_storage && prec_storage > 0) {
+        sconf.prec_shared = true;
+        rem_shared -= prec_storage;
+    }
+    set_gmem_stride_bytes<align_bytes>(sconf, vec_size, prec_storage);
+    return sconf;
+}
+
+
 }  // namespace batch_bicgstab
 
 
diff --git a/core/test/utils/batch_helpers.hpp b/core/test/utils/batch_helpers.hpp
index 77c2d397889..0a6702ff42f 100644
--- a/core/test/utils/batch_helpers.hpp
+++ b/core/test/utils/batch_helpers.hpp
@@ -250,7 +250,7 @@ LinearSystem<MatrixType> generate_batch_linear_system(
     // A * x_{exact} = b
     sys.matrix->apply(sys.exact_sol, sys.rhs);
     const gko::batch_dim<2> norm_dim(num_batch_items, gko::dim<2>(1, num_rhs));
-    sys.rhs_norm = real_vec::create(exec, norm_dim);
+    sys.rhs_norm = real_vec::create(exec->get_master(), norm_dim);
     sys.rhs->compute_norm2(sys.rhs_norm.get());
     return sys;
 }
@@ -273,7 +273,7 @@ compute_residual_norms(
     const gko::batch_dim<2> norm_dim(num_batch_items, gko::dim<2>(1, num_rhs));
 
     auto residual_vec = b->clone();
-    auto res_norms = real_vec::create(exec, norm_dim);
+    auto res_norms = real_vec::create(exec->get_master(), norm_dim);
     auto alpha =
         gko::batch::initialize<multi_vec>(num_batch_items, {-1.0}, exec);
     auto beta = gko::batch::initialize<multi_vec>(num_batch_items, {1.0}, exec);
diff --git a/cuda/base/exception.cuh b/cuda/base/exception.cuh
new file mode 100644
index 00000000000..51dfb63bf72
--- /dev/null
+++ b/cuda/base/exception.cuh
@@ -0,0 +1,56 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CUDA_BASE_EXCEPTION_CUH_
+#define GKO_CUDA_BASE_EXCEPTION_CUH_
+
+
+#include <ginkgo/core/base/exception.hpp>
+
+
+namespace gko {
+
+
+#define GKO_CUDA_LAST_IF_ERROR_THROW                                         \
+    cudaError_t err = cudaGetLastError();                                    \
+    if (err != cudaSuccess) {                                                \
+        printf(" Kernel error: %s\n", cudaGetErrorString(err));              \
+        throw gko::CudaError(__FILE__, __LINE__, __func__, err);             \
+    }                                                                        \
+    static_assert(true,                                                      \
+                  "This assert is used to counter the false positive extra " \
+                  "semi-colon warnings")
+
+
+}  // namespace gko
+
+#endif  // GKO_CUDA_BASE_EXCEPTION_CUH_
diff --git a/cuda/base/kernel_config.cuh b/cuda/base/kernel_config.cuh
new file mode 100644
index 00000000000..6280753bcda
--- /dev/null
+++ b/cuda/base/kernel_config.cuh
@@ -0,0 +1,65 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CUDA_BASE_KERNEL_CONFIG_CUH_
+#define GKO_CUDA_BASE_KERNEL_CONFIG_CUH_
+
+
+#include "cuda/base/math.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace cuda {
+
+
+/**
+ * Set shared memory bank configuration.
+ *
+ * \tparam ValueType  The scalar type used for computations.
+ */
+template <typename ValueType>
+inline void configure_shared_memory_banks()
+{
+    if (sizeof(ValueType) == 4) {
+        cudaDeviceSetSharedMemConfig(cudaSharedMemBankSizeFourByte);
+    } else if (sizeof(ValueType) % 8 == 0) {
+        cudaDeviceSetSharedMemConfig(cudaSharedMemBankSizeEightByte);
+    }
+}
+
+
+}  // namespace cuda
+}  // namespace kernels
+}  // namespace gko
+
+#endif  // GKO_CUDA_BASE_KERNEL_CONFIG_CUH_
diff --git a/cuda/solver/batch_bicgstab_kernels.cu b/cuda/solver/batch_bicgstab_kernels.cu
index ee7d0948b99..db92543fd74 100644
--- a/cuda/solver/batch_bicgstab_kernels.cu
+++ b/cuda/solver/batch_bicgstab_kernels.cu
@@ -33,21 +33,40 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/solver/batch_bicgstab_kernels.hpp"
 
 
+#include <thrust/functional.h>
+#include <thrust/transform.h>
+
+
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/math.hpp>
 
 
+#include "core/base/batch_struct.hpp"
+#include "core/matrix/batch_struct.hpp"
 #include "core/solver/batch_dispatch.hpp"
+#include "cuda/base/batch_struct.hpp"
 #include "cuda/base/config.hpp"
+#include "cuda/base/exception.cuh"
+#include "cuda/base/kernel_config.cuh"
+#include "cuda/base/thrust.cuh"
 #include "cuda/base/types.hpp"
 #include "cuda/components/cooperative_groups.cuh"
+#include "cuda/components/reduction.cuh"
 #include "cuda/components/thread_ids.cuh"
+#include "cuda/components/uninitialized_array.hpp"
 #include "cuda/matrix/batch_struct.hpp"
 
 
 namespace gko {
 namespace kernels {
 namespace cuda {
+
+
+// NOTE: this default block size is not used for the main solver kernel.
+constexpr int default_block_size = 256;
+constexpr int sm_oversubscription = 4;
+
+
 /**
  * @brief The batch Bicgstab solver namespace.
  *
@@ -56,19 +75,227 @@ namespace cuda {
 namespace batch_bicgstab {
 
 
+#include "common/cuda_hip/components/uninitialized_array.hpp.inc"
+
+#include "common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc"
+#include "common/cuda_hip/matrix/batch_dense_kernels.hpp.inc"
+#include "common/cuda_hip/matrix/batch_ell_kernels.hpp.inc"
+#include "common/cuda_hip/solver/batch_bicgstab_kernels.hpp.inc"
+
+
+template <typename StopType, typename PrecType, typename LogType,
+          typename BatchMatrixType, typename ValueType>
+int get_num_threads_per_block(std::shared_ptr<const CudaExecutor> exec,
+                              const int num_rows)
+{
+    int nwarps = num_rows / 4;
+    if (nwarps < 2) {
+        nwarps = 2;
+    }
+    const int min_block_size = 2 * config::warp_size;
+    const int device_max_threads =
+        ((std::max(num_rows, min_block_size)) / config::warp_size) *
+        config::warp_size;
+    cudaFuncAttributes funcattr;
+    cudaFuncGetAttributes(&funcattr,
+                          apply_kernel<StopType, 9, true, PrecType, LogType,
+                                       BatchMatrixType, ValueType>);
+    const int num_regs_used = funcattr.numRegs;
+    int max_regs_blk = 0;
+    cudaDeviceGetAttribute(&max_regs_blk, cudaDevAttrMaxRegistersPerBlock,
+                           exec->get_device_id());
+    const int max_threads_regs =
+        ((max_regs_blk /
+          static_cast<int>((static_cast<double>(num_regs_used)))) /
+         config::warp_size) *
+        config::warp_size;
+    int max_threads = std::min(max_threads_regs, device_max_threads);
+    max_threads = max_threads <= 1024 ? max_threads : 1024;
+    return std::min(nwarps * static_cast<int>(config::warp_size), max_threads);
+}
+
+
+template <typename StopType, typename PrecType, typename LogType,
+          typename BatchMatrixType, typename ValueType>
+int get_max_dynamic_shared_memory(std::shared_ptr<const CudaExecutor> exec,
+                                  const size_type required_cache_storage)
+{
+    int shmem_per_sm = 0;
+    cudaDeviceGetAttribute(&shmem_per_sm,
+                           cudaDevAttrMaxSharedMemoryPerMultiprocessor,
+                           exec->get_device_id());
+    GKO_ASSERT_NO_CUDA_ERRORS(cudaFuncSetAttribute(
+        apply_kernel<StopType, 9, true, PrecType, LogType, BatchMatrixType,
+                     ValueType>,
+        cudaFuncAttributePreferredSharedMemoryCarveout, 99 /*%*/));
+    cudaFuncAttributes funcattr;
+    cudaFuncGetAttributes(&funcattr,
+                          apply_kernel<StopType, 9, true, PrecType, LogType,
+                                       BatchMatrixType, ValueType>);
+    return funcattr.maxDynamicSharedSizeBytes;
+}
+
+
 template <typename T>
 using settings = gko::kernels::batch_bicgstab::settings<T>;
 
 
+template <typename CuValueType>
+class KernelCaller {
+public:
+    using value_type = CuValueType;
+
+    KernelCaller(std::shared_ptr<const DefaultExecutor> exec,
+                 const settings<remove_complex<value_type>> settings)
+        : exec_{exec}, settings_{settings}
+    {}
+
+    template <typename StopType, const int n_shared,
+              const bool prec_shared_bool, typename PrecType, typename LogType,
+              typename BatchMatrixType>
+    void launch_apply_kernel(
+        const gko::kernels::batch_bicgstab::storage_config& sconf,
+        LogType& logger, PrecType& prec, const BatchMatrixType& mat,
+        const value_type* const __restrict__ b_values,
+        value_type* const __restrict__ x_values,
+        value_type* const __restrict__ workspace_data, const int& block_size,
+        const size_t& shared_size) const
+    {
+        apply_kernel<StopType, n_shared, prec_shared_bool>
+            <<<mat.num_batch_items, block_size, shared_size,
+               exec_->get_stream()>>>(sconf, settings_.max_iterations,
+                                      settings_.residual_tol, logger, prec, mat,
+                                      b_values, x_values, workspace_data);
+    }
+
+
+    template <typename BatchMatrixType, typename PrecType, typename StopType,
+              typename LogType>
+    void call_kernel(
+        LogType logger, const BatchMatrixType& mat, PrecType prec,
+        const gko::batch::multi_vector::uniform_batch<const value_type>& b,
+        const gko::batch::multi_vector::uniform_batch<value_type>& x) const
+    {
+        using real_type = gko::remove_complex<value_type>;
+        const size_type num_batch_items = mat.num_batch_items;
+        constexpr int align_multiple = 2;
+        const int shared_gap =
+            ((mat.num_rows + align_multiple - 1) / align_multiple) *
+            align_multiple;
+        gko::kernels::cuda::configure_shared_memory_banks<value_type>();
+        const int shmem_per_blk =
+            get_max_dynamic_shared_memory<StopType, PrecType, LogType,
+                                          BatchMatrixType, value_type>(exec_,
+                                                                       0);
+        const int block_size =
+            get_num_threads_per_block<StopType, PrecType, LogType,
+                                      BatchMatrixType, value_type>(
+                exec_, mat.num_rows);
+        assert(block_size >= 2 * config::warp_size);
+
+        const size_t prec_size =
+            PrecType::dynamic_work_size(shared_gap,
+                                        mat.get_single_item_num_nnz()) *
+            sizeof(value_type);
+        const auto sconf =
+            gko::kernels::batch_bicgstab::compute_shared_storage<PrecType,
+                                                                 value_type>(
+                shmem_per_blk, shared_gap, mat.get_single_item_num_nnz(),
+                b.num_rhs);
+        const size_t shared_size =
+            sconf.n_shared * shared_gap * sizeof(value_type) +
+            (sconf.prec_shared ? prec_size : 0);
+        auto workspace = gko::array<value_type>(
+            exec_,
+            sconf.gmem_stride_bytes * num_batch_items / sizeof(value_type));
+        assert(sconf.gmem_stride_bytes % sizeof(value_type) == 0);
+
+        value_type* const workspace_data = workspace.get_data();
+
+        // Template parameters launch_apply_kernel<StopType, n_shared,
+        // prec_shared)
+        if (sconf.prec_shared)
+            launch_apply_kernel<StopType, 9, 1>(
+                sconf, logger, prec, mat, b.values, x.values, workspace_data,
+                block_size, shared_size);
+        else {
+            switch (sconf.n_shared) {
+            case 0:
+                launch_apply_kernel<StopType, 0, 0>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, block_size, shared_size);
+                break;
+            case 1:
+                launch_apply_kernel<StopType, 1, 0>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, block_size, shared_size);
+                break;
+            case 2:
+                launch_apply_kernel<StopType, 2, 0>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, block_size, shared_size);
+                break;
+            case 3:
+                launch_apply_kernel<StopType, 3, 0>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, block_size, shared_size);
+                break;
+            case 4:
+                launch_apply_kernel<StopType, 4, 0>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, block_size, shared_size);
+                break;
+            case 5:
+                launch_apply_kernel<StopType, 5, 0>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, block_size, shared_size);
+                break;
+            case 6:
+                launch_apply_kernel<StopType, 6, 0>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, block_size, shared_size);
+                break;
+            case 7:
+                launch_apply_kernel<StopType, 7, 0>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, block_size, shared_size);
+                break;
+            case 8:
+                launch_apply_kernel<StopType, 8, 0>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, block_size, shared_size);
+                break;
+            case 9:
+                launch_apply_kernel<StopType, 9, 0>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, block_size, shared_size);
+                break;
+            }
+        }
+
+        GKO_CUDA_LAST_IF_ERROR_THROW;
+    }
+
+private:
+    std::shared_ptr<const DefaultExecutor> exec_;
+    const settings<remove_complex<value_type>> settings_;
+};
+
+
 template <typename ValueType>
 void apply(std::shared_ptr<const DefaultExecutor> exec,
            const settings<remove_complex<ValueType>>& settings,
-           const batch::BatchLinOp* const a,
+           const batch::BatchLinOp* const mat,
            const batch::BatchLinOp* const precon,
            const batch::MultiVector<ValueType>* const b,
            batch::MultiVector<ValueType>* const x,
            batch::log::detail::log_data<remove_complex<ValueType>>& logdata)
-    GKO_NOT_IMPLEMENTED;
+{
+    using cu_value_type = cuda_type<ValueType>;
+    auto dispatcher = batch::solver::create_dispatcher<ValueType>(
+        KernelCaller<cu_value_type>(exec, settings), settings, mat, precon);
+    dispatcher.apply(b, x, logdata);
+}
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_BICGSTAB_APPLY_KERNEL);
 
diff --git a/test/solver/CMakeLists.txt b/test/solver/CMakeLists.txt
index 296a55b6271..28a217a79fc 100644
--- a/test/solver/CMakeLists.txt
+++ b/test/solver/CMakeLists.txt
@@ -1,4 +1,4 @@
-ginkgo_create_common_test(batch_bicgstab_kernels DISABLE_EXECUTORS dpcpp cuda hip)
+ginkgo_create_common_test(batch_bicgstab_kernels DISABLE_EXECUTORS dpcpp hip)
 ginkgo_create_common_test(bicg_kernels)
 ginkgo_create_common_test(bicgstab_kernels)
 ginkgo_create_common_test(cb_gmres_kernels)
diff --git a/test/solver/batch_bicgstab_kernels.cpp b/test/solver/batch_bicgstab_kernels.cpp
index adb68d92314..124dd27640c 100644
--- a/test/solver/batch_bicgstab_kernels.cpp
+++ b/test/solver/batch_bicgstab_kernels.cpp
@@ -171,7 +171,7 @@ TEST_F(BatchBicgstab, StencilSystemLoggerLogsIterations)
 
 TEST_F(BatchBicgstab, CanSolve3ptStencilSystem)
 {
-    const int num_batch_items = 12;
+    const int num_batch_items = 8;
     const int num_rows = 100;
     const int num_rhs = 1;
     const real_type tol = 1e-5;
@@ -185,35 +185,59 @@ TEST_F(BatchBicgstab, CanSolve3ptStencilSystem)
 
     GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol * 10);
     for (size_t i = 0; i < num_batch_items; i++) {
-        auto comp_res_norm =
-            exec->copy_val_to_host(res.res_norm->get_const_values() + i) /
-            exec->copy_val_to_host(linear_system.rhs_norm->get_const_values() +
-                                   i);
+        auto comp_res_norm = res.res_norm->get_const_values()[i] /
+                             linear_system.rhs_norm->get_const_values()[i];
         ASSERT_LE(comp_res_norm, tol);
     }
 }
 
 
-TEST_F(BatchBicgstab, CanSolveLargeHpdSystem)
+TEST_F(BatchBicgstab, CanSolveLargeBatchSizeHpdSystem)
 {
-    const int num_batch_items = 3;
+    const int num_batch_items = 100;
+    const int num_rows = 102;
+    const int num_rhs = 1;
+    const real_type tol = 1e-5;
+    const int max_iters = num_rows;
+    std::shared_ptr<Logger> logger = Logger::create();
+    auto mat = gko::share(gko::test::generate_diag_dominant_batch_matrix<Mtx>(
+        exec, num_batch_items, num_rows, true));
+    auto linear_system = setup_linsys_and_solver(mat, num_rhs, tol, max_iters);
+    auto solver = gko::share(solver_factory->generate(linear_system.matrix));
+    solver->add_logger(logger);
+
+    auto res = gko::test::solve_linear_system(exec, linear_system, solver);
+
+    solver->remove_logger(logger);
+    auto iter_counts = gko::make_temporary_clone(exec->get_master(),
+                                                 &logger->get_num_iterations());
+    auto res_norm = gko::make_temporary_clone(exec->get_master(),
+                                              &logger->get_residual_norm());
+    GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol * 50);
+    for (size_t i = 0; i < num_batch_items; i++) {
+        auto comp_res_norm = res.res_norm->get_const_values()[i] /
+                             linear_system.rhs_norm->get_const_values()[i];
+        ASSERT_LE(iter_counts->get_const_data()[i], max_iters);
+        EXPECT_LE(res_norm->get_const_data()[i] /
+                      linear_system.rhs_norm->get_const_values()[i],
+                  tol);
+        EXPECT_GT(res_norm->get_const_data()[i], real_type{0.0});
+        ASSERT_LE(comp_res_norm, tol);
+    }
+}
+
+
+TEST_F(BatchBicgstab, CanSolveLargeMatrixSizeHpdSystem)
+{
+    const int num_batch_items = 12;
     const int num_rows = 1025;
     const int num_rhs = 1;
     const real_type tol = 1e-5;
-    const int max_iters = 2000;
-    const real_type comp_tol = tol * 100;
-    auto solver_factory =
-        solver_type::build()
-            .with_max_iterations(max_iters)
-            .with_tolerance(tol)
-            .with_tolerance_type(gko::batch::stop::tolerance_type::absolute)
-            .on(exec);
+    const int max_iters = num_rows;
     std::shared_ptr<Logger> logger = Logger::create();
-    auto diag_dom_mat =
-        gko::share(gko::test::generate_diag_dominant_batch_matrix<Mtx>(
-            exec, num_batch_items, num_rows, true));
-    auto linear_system =
-        gko::test::generate_batch_linear_system(diag_dom_mat, num_rhs);
+    auto mat = gko::share(gko::test::generate_diag_dominant_batch_matrix<Mtx>(
+        exec, num_batch_items, num_rows, true));
+    auto linear_system = setup_linsys_and_solver(mat, num_rhs, tol, max_iters);
     auto solver = gko::share(solver_factory->generate(linear_system.matrix));
     solver->add_logger(logger);
 
@@ -224,13 +248,15 @@ TEST_F(BatchBicgstab, CanSolveLargeHpdSystem)
                                                  &logger->get_num_iterations());
     auto res_norm = gko::make_temporary_clone(exec->get_master(),
                                               &logger->get_residual_norm());
-    GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, comp_tol);
+    GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol * 50);
     for (size_t i = 0; i < num_batch_items; i++) {
-        auto comp_res_norm =
-            exec->copy_val_to_host(res.res_norm->get_const_values() + i);
+        auto comp_res_norm = res.res_norm->get_const_values()[i] /
+                             linear_system.rhs_norm->get_const_values()[i];
         ASSERT_LE(iter_counts->get_const_data()[i], max_iters);
-        EXPECT_LE(res_norm->get_const_data()[i], comp_tol);
+        EXPECT_LE(res_norm->get_const_data()[i] /
+                      linear_system.rhs_norm->get_const_values()[i],
+                  tol);
         EXPECT_GT(res_norm->get_const_data()[i], real_type{0.0});
-        ASSERT_LE(comp_res_norm, comp_tol);
+        ASSERT_LE(comp_res_norm, tol);
     }
 }

From 190e063425684b36c7f5140a5d3432c881731cd8 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Fri, 27 Oct 2023 00:06:11 +0200
Subject: [PATCH 522/583] Add hip bicgstab solver kernels

Co-authored-by: Aditya Kashi <kashia@ornl.gov>
Co-authored-by: Isha Aggarwal <isha.aggarwal2@kit.edu>
---
 cuda/base/executor.cpp                    |   3 +
 cuda/solver/batch_bicgstab_kernels.cu     |   6 +-
 hip/base/exception.hip.hpp                |  56 +++++++
 hip/base/executor.hip.cpp                 |   3 +
 hip/solver/batch_bicgstab_kernels.hip.cpp | 192 +++++++++++++++++++++-
 test/solver/CMakeLists.txt                |   2 +-
 6 files changed, 256 insertions(+), 6 deletions(-)
 create mode 100644 hip/base/exception.hip.hpp

diff --git a/cuda/base/executor.cpp b/cuda/base/executor.cpp
index f296fb9da86..01880127641 100644
--- a/cuda/base/executor.cpp
+++ b/cuda/base/executor.cpp
@@ -258,6 +258,9 @@ void CudaExecutor::set_gpu_property()
             kernels::cuda::config::warp_size;
         this->get_exec_info().max_subgroup_size =
             kernels::cuda::config::warp_size;
+        GKO_ASSERT_NO_CUDA_ERRORS(cudaDeviceGetAttribute(
+            &this->get_exec_info().max_shared_memory_per_workgroup,
+            cudaDevAttrMaxSharedMemoryPerBlock, this->get_device_id()));
     }
 }
 
diff --git a/cuda/solver/batch_bicgstab_kernels.cu b/cuda/solver/batch_bicgstab_kernels.cu
index db92543fd74..07e16535631 100644
--- a/cuda/solver/batch_bicgstab_kernels.cu
+++ b/cuda/solver/batch_bicgstab_kernels.cu
@@ -85,7 +85,7 @@ namespace batch_bicgstab {
 
 template <typename StopType, typename PrecType, typename LogType,
           typename BatchMatrixType, typename ValueType>
-int get_num_threads_per_block(std::shared_ptr<const CudaExecutor> exec,
+int get_num_threads_per_block(std::shared_ptr<const DefaultExecutor> exec,
                               const int num_rows)
 {
     int nwarps = num_rows / 4;
@@ -117,7 +117,7 @@ int get_num_threads_per_block(std::shared_ptr<const CudaExecutor> exec,
 
 template <typename StopType, typename PrecType, typename LogType,
           typename BatchMatrixType, typename ValueType>
-int get_max_dynamic_shared_memory(std::shared_ptr<const CudaExecutor> exec,
+int get_max_dynamic_shared_memory(std::shared_ptr<const DefaultExecutor> exec,
                                   const size_type required_cache_storage)
 {
     int shmem_per_sm = 0;
@@ -178,7 +178,7 @@ public:
     {
         using real_type = gko::remove_complex<value_type>;
         const size_type num_batch_items = mat.num_batch_items;
-        constexpr int align_multiple = 2;
+        constexpr int align_multiple = 8;
         const int shared_gap =
             ((mat.num_rows + align_multiple - 1) / align_multiple) *
             align_multiple;
diff --git a/hip/base/exception.hip.hpp b/hip/base/exception.hip.hpp
new file mode 100644
index 00000000000..7c3b3b2e12e
--- /dev/null
+++ b/hip/base/exception.hip.hpp
@@ -0,0 +1,56 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_HIP_BASE_EXCEPTION_HIP_HPP_
+#define GKO_HIP_BASE_EXCEPTION_HIP_HPP_
+
+
+#include <ginkgo/core/base/exception.hpp>
+
+
+namespace gko {
+
+
+#define GKO_HIP_LAST_IF_ERROR_THROW                                          \
+    hipError_t err = hipGetLastError();                                      \
+    if (err != hipSuccess) {                                                 \
+        printf(" Hip kernel error: %s\n", hipGetErrorString(err));           \
+        throw gko::HipError(__FILE__, __LINE__, __func__, err);              \
+    }                                                                        \
+    static_assert(true,                                                      \
+                  "This assert is used to counter the false positive extra " \
+                  "semi-colon warnings")
+
+
+}  // namespace gko
+
+#endif  // GKO_HIP_BASE_EXCEPTION_HIP_HPP_
diff --git a/hip/base/executor.hip.cpp b/hip/base/executor.hip.cpp
index 8d175c0e424..489e9b28ff9 100644
--- a/hip/base/executor.hip.cpp
+++ b/hip/base/executor.hip.cpp
@@ -262,6 +262,9 @@ void HipExecutor::set_gpu_property()
 #endif  // GINKGO_HIP_PLATFORM_NVCC
         this->get_exec_info().max_subgroup_size =
             kernels::hip::config::warp_size;
+        GKO_ASSERT_NO_HIP_ERRORS(hipDeviceGetAttribute(
+            &this->get_exec_info().max_shared_memory_per_workgroup,
+            hipDeviceAttributeMaxSharedMemoryPerBlock, this->get_device_id()));
     }
 }
 
diff --git a/hip/solver/batch_bicgstab_kernels.hip.cpp b/hip/solver/batch_bicgstab_kernels.hip.cpp
index 4ef8cd36c1b..b9fe8b0c9c3 100644
--- a/hip/solver/batch_bicgstab_kernels.hip.cpp
+++ b/hip/solver/batch_bicgstab_kernels.hip.cpp
@@ -34,21 +34,38 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <hip/hip_runtime.h>
+#include <thrust/functional.h>
+#include <thrust/transform.h>
 
 
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/math.hpp>
 
 
+#include "core/base/batch_struct.hpp"
+#include "core/matrix/batch_struct.hpp"
 #include "core/solver/batch_dispatch.hpp"
 #include "hip/base/batch_struct.hip.hpp"
 #include "hip/base/config.hip.hpp"
+#include "hip/base/exception.hip.hpp"
+#include "hip/base/math.hip.hpp"
+#include "hip/base/thrust.hip.hpp"
+#include "hip/base/types.hip.hpp"
+#include "hip/components/cooperative_groups.hip.hpp"
+#include "hip/components/reduction.hip.hpp"
+#include "hip/components/thread_ids.hip.hpp"
+#include "hip/components/uninitialized_array.hip.hpp"
 #include "hip/matrix/batch_struct.hip.hpp"
 
 
 namespace gko {
 namespace kernels {
 namespace hip {
+
+
+constexpr int default_block_size = 256;
+constexpr int sm_oversubscription = 4;
+
 /**
  * @brief The batch Bicgstab solver namespace.
  *
@@ -57,19 +74,190 @@ namespace hip {
 namespace batch_bicgstab {
 
 
+#include "common/cuda_hip/components/uninitialized_array.hpp.inc"
+
+#include "common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc"
+#include "common/cuda_hip/matrix/batch_dense_kernels.hpp.inc"
+#include "common/cuda_hip/matrix/batch_ell_kernels.hpp.inc"
+#include "common/cuda_hip/solver/batch_bicgstab_kernels.hpp.inc"
+
+
+template <typename BatchMatrixType>
+int get_num_threads_per_block(std::shared_ptr<const DefaultExecutor> exec,
+                              const int num_rows)
+{
+    int nwarps = num_rows / 4;
+    if (nwarps < 2) {
+        nwarps = 2;
+    }
+    const int min_block_size = 2 * config::warp_size;
+    const int device_max_threads =
+        ((std::max(num_rows, min_block_size)) / config::warp_size) *
+        config::warp_size;
+    const int num_regs_used_per_thread = 64;
+    int max_regs_blk = 0;
+    hipDeviceGetAttribute(&max_regs_blk, hipDeviceAttributeMaxRegistersPerBlock,
+                          exec->get_device_id());
+    const int max_threads_regs = (max_regs_blk / num_regs_used_per_thread);
+    const int max_threads = std::min(max_threads_regs, device_max_threads);
+    return std::min(nwarps * static_cast<int>(config::warp_size), max_threads);
+}
+
+
 template <typename T>
 using settings = gko::kernels::batch_bicgstab::settings<T>;
 
 
+template <typename HipValueType>
+class KernelCaller {
+public:
+    using value_type = HipValueType;
+
+    KernelCaller(std::shared_ptr<const DefaultExecutor> exec,
+                 const settings<remove_complex<value_type>> settings)
+        : exec_{exec}, settings_{settings}
+    {}
+
+    template <typename StopType, const int n_shared,
+              const bool prec_shared_bool, typename PrecType, typename LogType,
+              typename BatchMatrixType>
+    void launch_apply_kernel(
+        const gko::kernels::batch_bicgstab::storage_config& sconf,
+        LogType& logger, PrecType& prec, const BatchMatrixType& mat,
+        const value_type* const __restrict__ b_values,
+        value_type* const __restrict__ x_values,
+        value_type* const __restrict__ workspace_data, const int& block_size,
+        const size_t& shared_size) const
+    {
+        apply_kernel<StopType, n_shared, prec_shared_bool>
+            <<<mat.num_batch_items, block_size, shared_size,
+               exec_->get_stream()>>>(sconf, settings_.max_iterations,
+                                      settings_.residual_tol, logger, prec, mat,
+                                      b_values, x_values, workspace_data);
+    }
+
+
+    template <typename BatchMatrixType, typename PrecType, typename StopType,
+              typename LogType>
+    void call_kernel(
+        LogType logger, const BatchMatrixType& mat, PrecType prec,
+        const gko::batch::multi_vector::uniform_batch<const value_type>& b,
+        const gko::batch::multi_vector::uniform_batch<value_type>& x) const
+    {
+        using real_type = gko::remove_complex<value_type>;
+        const size_type num_batch_items = mat.num_batch_items;
+        constexpr int align_multiple = 8;
+        const int shared_gap =
+            ((mat.num_rows + align_multiple - 1) / align_multiple) *
+            align_multiple;
+        const int shmem_per_blk = exec_->get_max_shared_memory_per_block();
+        const int block_size =
+            get_num_threads_per_block<BatchMatrixType>(exec_, mat.num_rows);
+        assert(block_size >= 2 * config::warp_size);
+
+        const size_t prec_size =
+            PrecType::dynamic_work_size(shared_gap,
+                                        mat.get_single_item_num_nnz()) *
+            sizeof(value_type);
+        const auto sconf =
+            gko::kernels::batch_bicgstab::compute_shared_storage<PrecType,
+                                                                 value_type>(
+                shmem_per_blk, shared_gap, mat.get_single_item_num_nnz(),
+                b.num_rhs);
+        const size_t shared_size =
+            sconf.n_shared * shared_gap * sizeof(value_type) +
+            (sconf.prec_shared ? prec_size : 0);
+        auto workspace = gko::array<value_type>(
+            exec_,
+            sconf.gmem_stride_bytes * num_batch_items / sizeof(value_type));
+        assert(sconf.gmem_stride_bytes % sizeof(value_type) == 0);
+
+        value_type* const workspace_data = workspace.get_data();
+
+        // Template parameters launch_apply_kernel<StopType, n_shared,
+        // prec_shared)
+        if (sconf.prec_shared)
+            launch_apply_kernel<StopType, 9, 1>(
+                sconf, logger, prec, mat, b.values, x.values, workspace_data,
+                block_size, shared_size);
+        else {
+            switch (sconf.n_shared) {
+            case 0:
+                launch_apply_kernel<StopType, 0, 0>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, block_size, shared_size);
+                break;
+            case 1:
+                launch_apply_kernel<StopType, 1, 0>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, block_size, shared_size);
+                break;
+            case 2:
+                launch_apply_kernel<StopType, 2, 0>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, block_size, shared_size);
+                break;
+            case 3:
+                launch_apply_kernel<StopType, 3, 0>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, block_size, shared_size);
+                break;
+            case 4:
+                launch_apply_kernel<StopType, 4, 0>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, block_size, shared_size);
+                break;
+            case 5:
+                launch_apply_kernel<StopType, 5, 0>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, block_size, shared_size);
+                break;
+            case 6:
+                launch_apply_kernel<StopType, 6, 0>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, block_size, shared_size);
+                break;
+            case 7:
+                launch_apply_kernel<StopType, 7, 0>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, block_size, shared_size);
+                break;
+            case 8:
+                launch_apply_kernel<StopType, 8, 0>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, block_size, shared_size);
+                break;
+            case 9:
+                launch_apply_kernel<StopType, 9, 0>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, block_size, shared_size);
+                break;
+            }
+        }
+
+        GKO_HIP_LAST_IF_ERROR_THROW;
+    }
+
+private:
+    std::shared_ptr<const DefaultExecutor> exec_;
+    const settings<remove_complex<value_type>> settings_;
+};
+
+
 template <typename ValueType>
 void apply(std::shared_ptr<const DefaultExecutor> exec,
            const settings<remove_complex<ValueType>>& settings,
-           const batch::BatchLinOp* const a,
+           const batch::BatchLinOp* const mat,
            const batch::BatchLinOp* const precon,
            const batch::MultiVector<ValueType>* const b,
            batch::MultiVector<ValueType>* const x,
            batch::log::detail::log_data<remove_complex<ValueType>>& logdata)
-    GKO_NOT_IMPLEMENTED;
+{
+    using hip_value_type = hip_type<ValueType>;
+    auto dispatcher = batch::solver::create_dispatcher<ValueType>(
+        KernelCaller<hip_value_type>(exec, settings), settings, mat, precon);
+    dispatcher.apply(b, x, logdata);
+}
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_BICGSTAB_APPLY_KERNEL);
 
diff --git a/test/solver/CMakeLists.txt b/test/solver/CMakeLists.txt
index 28a217a79fc..de3430393ae 100644
--- a/test/solver/CMakeLists.txt
+++ b/test/solver/CMakeLists.txt
@@ -1,4 +1,4 @@
-ginkgo_create_common_test(batch_bicgstab_kernels DISABLE_EXECUTORS dpcpp hip)
+ginkgo_create_common_test(batch_bicgstab_kernels DISABLE_EXECUTORS dpcpp)
 ginkgo_create_common_test(bicg_kernels)
 ginkgo_create_common_test(bicgstab_kernels)
 ginkgo_create_common_test(cb_gmres_kernels)

From 37d62923583da914f3673094a1ed79f3cf12233b Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Fri, 27 Oct 2023 13:11:30 +0200
Subject: [PATCH 523/583] Add dpcpp kernels

Co-authored-by: Phuong Nguyen <phuong.nguyen@icl.utk.edu>
---
 dpcpp/base/batch_multi_vector_kernels.hpp.inc | 100 ++++
 dpcpp/matrix/batch_dense_kernels.dp.cpp       |  54 +--
 dpcpp/matrix/batch_dense_kernels.hpp.inc      |  18 +-
 dpcpp/matrix/batch_ell_kernels.dp.cpp         |  54 +--
 dpcpp/matrix/batch_ell_kernels.hpp.inc        |  21 +-
 dpcpp/preconditioner/batch_identity.hpp.inc   |   2 +-
 dpcpp/solver/batch_bicgstab_kernels.dp.cpp    | 222 ++++++++-
 dpcpp/solver/batch_bicgstab_kernels.hpp.inc   | 449 ++++++++++++++++++
 test/solver/CMakeLists.txt                    |   2 +-
 9 files changed, 834 insertions(+), 88 deletions(-)
 create mode 100644 dpcpp/solver/batch_bicgstab_kernels.hpp.inc

diff --git a/dpcpp/base/batch_multi_vector_kernels.hpp.inc b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
index 22d00d780f9..828833b6ea3 100644
--- a/dpcpp/base/batch_multi_vector_kernels.hpp.inc
+++ b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
@@ -67,6 +67,49 @@ __dpct_inline__ void add_scaled_kernel(
 }
 
 
+template <typename ValueType>
+__dpct_inline__ void single_rhs_compute_dot(
+    const int num_rows, const ValueType* const __restrict__ x,
+    const ValueType* const __restrict__ y, ValueType& result,
+    sycl::nd_item<3> item_ct1)
+{
+    const auto group = item_ct1.get_group();
+    const auto group_size = item_ct1.get_local_range().size();
+    const auto tid = item_ct1.get_local_linear_id();
+
+    ValueType val = zero<ValueType>();
+
+    for (int r = tid; r < num_rows; r += group_size) {
+        val += conj(x[r]) * y[r];
+    }
+    result = sycl::reduce_over_group(group, val, sycl::plus<>());
+}
+
+
+template <typename ValueType>
+__dpct_inline__ void single_rhs_compute_dot_sg(
+    const int num_rows, const ValueType* const __restrict__ x,
+    const ValueType* const __restrict__ y, ValueType& result,
+    sycl::nd_item<3> item_ct1)
+{
+    const auto sg = item_ct1.get_sub_group();
+    const auto sg_size = sg.get_local_range().size();
+    const auto sg_tid = sg.get_local_id();
+
+    ValueType val = zero<ValueType>();
+
+    for (int r = sg_tid; r < num_rows; r += sg_size) {
+        val += conj(x[r]) * y[r];
+    }
+
+    val = sycl::reduce_over_group(sg, val, sycl::plus<>());
+
+    if (sg_tid == 0) {
+        result = val;
+    }
+}
+
+
 template <typename ValueType, typename Mapping>
 __dpct_inline__ void compute_gen_dot_product_kernel(
     const gko::batch::multi_vector::batch_item<const ValueType>& x,
@@ -102,6 +145,52 @@ __dpct_inline__ void compute_gen_dot_product_kernel(
 }
 
 
+template <typename ValueType>
+__dpct_inline__ void single_rhs_compute_norm2_sg(
+    const int num_rows, const ValueType* const __restrict__ x,
+    gko::remove_complex<ValueType>& result, sycl::nd_item<3> item_ct1)
+{
+    const auto sg = item_ct1.get_sub_group();
+    const auto sg_size = sg.get_local_range().size();
+    const auto sg_tid = sg.get_local_id();
+
+    using real_type = typename gko::remove_complex<ValueType>;
+    real_type val = zero<real_type>();
+
+    for (int r = sg_tid; r < num_rows; r += sg_size) {
+        val += squared_norm(x[r]);
+    }
+
+    val = sycl::reduce_over_group(sg, val, sycl::plus<>());
+
+    if (sg_tid == 0) {
+        result = sqrt(val);
+    }
+}
+
+
+template <typename ValueType>
+__dpct_inline__ void single_rhs_compute_norm2(
+    const int num_rows, const ValueType* const __restrict__ x,
+    gko::remove_complex<ValueType>& result, sycl::nd_item<3> item_ct1)
+{
+    const auto group = item_ct1.get_group();
+    const auto group_size = item_ct1.get_local_range().size();
+    const auto tid = item_ct1.get_local_linear_id();
+
+    using real_type = typename gko::remove_complex<ValueType>;
+    real_type val = zero<real_type>();
+
+    for (int r = tid; r < num_rows; r += group_size) {
+        val += squared_norm(x[r]);
+    }
+
+    val = sycl::reduce_over_group(group, val, sycl::plus<>());
+
+    result = sqrt(val);
+}
+
+
 template <typename ValueType>
 __dpct_inline__ void compute_norm2_kernel(
     const gko::batch::multi_vector::batch_item<const ValueType>& x,
@@ -136,6 +225,17 @@ __dpct_inline__ void compute_norm2_kernel(
 }
 
 
+template <typename ValueType>
+__dpct_inline__ void copy_kernel(const int num_rows, const ValueType* in,
+                                 ValueType* out, sycl::nd_item<3>& item_ct1)
+{
+    for (int iz = item_ct1.get_local_linear_id(); iz < num_rows;
+         iz += item_ct1.get_local_range().size()) {
+        out[iz] = in[iz];
+    }
+}
+
+
 template <typename ValueType>
 __dpct_inline__ void copy_kernel(
     const gko::batch::multi_vector::batch_item<const ValueType>& in,
diff --git a/dpcpp/matrix/batch_dense_kernels.dp.cpp b/dpcpp/matrix/batch_dense_kernels.dp.cpp
index a6fba2df8e3..a80ef047e8d 100644
--- a/dpcpp/matrix/batch_dense_kernels.dp.cpp
+++ b/dpcpp/matrix/batch_dense_kernels.dp.cpp
@@ -100,17 +100,17 @@ void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
     // Launch a kernel that has nbatches blocks, each block has max group size
     exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1)
-                [[sycl::reqd_sub_group_size(config::warp_size)]] {
-                    auto group = item_ct1.get_group();
-                    auto group_id = group.get_group_linear_id();
-                    const auto mat_b =
-                        batch::matrix::extract_batch_item(mat_ub, group_id);
-                    const auto b_b = batch::extract_batch_item(b_ub, group_id);
-                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                    simple_apply_kernel(mat_b, b_b, x_b, item_ct1);
-                });
+            sycl_nd_range(grid, block), [=
+        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                            config::warp_size)]] {
+                auto group = item_ct1.get_group();
+                auto group_id = group.get_group_linear_id();
+                const auto mat_b =
+                    batch::matrix::extract_batch_item(mat_ub, group_id);
+                const auto b_b = batch::extract_batch_item(b_ub, group_id);
+                const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                simple_apply_kernel(mat_b, b_b.values, x_b.values, item_ct1);
+            });
     });
 }
 
@@ -147,22 +147,22 @@ void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
     // Launch a kernel that has nbatches blocks, each block has max group size
     exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1)
-                [[sycl::reqd_sub_group_size(config::warp_size)]] {
-                    auto group = item_ct1.get_group();
-                    auto group_id = group.get_group_linear_id();
-                    const auto mat_b =
-                        batch::matrix::extract_batch_item(mat_ub, group_id);
-                    const auto b_b = batch::extract_batch_item(b_ub, group_id);
-                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                    const auto alpha_b =
-                        batch::extract_batch_item(alpha_ub, group_id);
-                    const auto beta_b =
-                        batch::extract_batch_item(beta_ub, group_id);
-                    advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b,
-                                          item_ct1);
-                });
+            sycl_nd_range(grid, block), [=
+        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                            config::warp_size)]] {
+                auto group = item_ct1.get_group();
+                auto group_id = group.get_group_linear_id();
+                const auto mat_b =
+                    batch::matrix::extract_batch_item(mat_ub, group_id);
+                const auto b_b = batch::extract_batch_item(b_ub, group_id);
+                const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                const auto alpha_b =
+                    batch::extract_batch_item(alpha_ub, group_id);
+                const auto beta_b =
+                    batch::extract_batch_item(beta_ub, group_id);
+                advanced_apply_kernel(alpha_b.values[0], mat_b, b_b.values,
+                                      beta_b.values[0], x_b.values, item_ct1);
+            });
     });
 }
 
diff --git a/dpcpp/matrix/batch_dense_kernels.hpp.inc b/dpcpp/matrix/batch_dense_kernels.hpp.inc
index 88ef5f54764..ba232ea02e4 100644
--- a/dpcpp/matrix/batch_dense_kernels.hpp.inc
+++ b/dpcpp/matrix/batch_dense_kernels.hpp.inc
@@ -33,9 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 template <typename ValueType>
 __dpct_inline__ void simple_apply_kernel(
     const gko::batch::matrix::dense::batch_item<const ValueType>& mat,
-    const gko::batch::multi_vector::batch_item<const ValueType>& b,
-    const gko::batch::multi_vector::batch_item<ValueType>& x,
-    sycl::nd_item<3>& item_ct1)
+    const ValueType* b, ValueType* x, sycl::nd_item<3>& item_ct1)
 {
     constexpr auto tile_size = config::warp_size;
     auto subg =
@@ -50,14 +48,14 @@ __dpct_inline__ void simple_apply_kernel(
         for (int j = subgroup.get_local_id(); j < mat.num_cols;
              j += subgroup_size) {
             const ValueType val = mat.values[row * mat.stride + j];
-            temp += val * b.values[j];
+            temp += val * b[j];
         }
 
         temp = ::gko::kernels::dpcpp::reduce(
             subg, temp, [](ValueType a, ValueType b) { return a + b; });
 
         if (subgroup.get_local_id() == 0) {
-            x.values[row] = temp;
+            x[row] = temp;
         }
     }
 }
@@ -65,11 +63,9 @@ __dpct_inline__ void simple_apply_kernel(
 
 template <typename ValueType>
 __dpct_inline__ void advanced_apply_kernel(
-    const gko::batch::multi_vector::batch_item<const ValueType>& alpha,
+    const ValueType alpha,
     const gko::batch::matrix::dense::batch_item<const ValueType>& mat,
-    const gko::batch::multi_vector::batch_item<const ValueType>& b,
-    const gko::batch::multi_vector::batch_item<const ValueType>& beta,
-    const gko::batch::multi_vector::batch_item<ValueType>& x,
+    const ValueType* b, const ValueType beta, ValueType* x,
     sycl::nd_item<3>& item_ct1)
 {
     constexpr auto tile_size = config::warp_size;
@@ -85,14 +81,14 @@ __dpct_inline__ void advanced_apply_kernel(
         for (int j = subgroup.get_local_id(); j < mat.num_cols;
              j += subgroup_size) {
             const ValueType val = mat.values[row * mat.stride + j];
-            temp += alpha.values[0] * val * b.values[j];
+            temp += alpha * val * b[j];
         }
 
         temp = ::gko::kernels::dpcpp::reduce(
             subg, temp, [](ValueType a, ValueType b) { return a + b; });
 
         if (subgroup.get_local_id() == 0) {
-            x.values[row] = temp + beta.values[0] * x.values[row];
+            x[row] = temp + beta * x[row];
         }
     }
 }
diff --git a/dpcpp/matrix/batch_ell_kernels.dp.cpp b/dpcpp/matrix/batch_ell_kernels.dp.cpp
index 5a69bbd3d5d..1ebd41a7e24 100644
--- a/dpcpp/matrix/batch_ell_kernels.dp.cpp
+++ b/dpcpp/matrix/batch_ell_kernels.dp.cpp
@@ -97,17 +97,17 @@ void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
     // Launch a kernel that has nbatches blocks, each block has max group size
     exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1)
-                [[sycl::reqd_sub_group_size(config::warp_size)]] {
-                    auto group = item_ct1.get_group();
-                    auto group_id = group.get_group_linear_id();
-                    const auto mat_b =
-                        batch::matrix::extract_batch_item(mat_ub, group_id);
-                    const auto b_b = batch::extract_batch_item(b_ub, group_id);
-                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                    simple_apply_kernel(mat_b, b_b, x_b, item_ct1);
-                });
+            sycl_nd_range(grid, block), [=
+        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                            config::warp_size)]] {
+                auto group = item_ct1.get_group();
+                auto group_id = group.get_group_linear_id();
+                const auto mat_b =
+                    batch::matrix::extract_batch_item(mat_ub, group_id);
+                const auto b_b = batch::extract_batch_item(b_ub, group_id);
+                const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                simple_apply_kernel(mat_b, b_b.values, x_b.values, item_ct1);
+            });
     });
 }
 
@@ -145,22 +145,22 @@ void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
     // Launch a kernel that has nbatches blocks, each block has max group size
     exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1)
-                [[sycl::reqd_sub_group_size(config::warp_size)]] {
-                    auto group = item_ct1.get_group();
-                    auto group_id = group.get_group_linear_id();
-                    const auto mat_b =
-                        batch::matrix::extract_batch_item(mat_ub, group_id);
-                    const auto b_b = batch::extract_batch_item(b_ub, group_id);
-                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                    const auto alpha_b =
-                        batch::extract_batch_item(alpha_ub, group_id);
-                    const auto beta_b =
-                        batch::extract_batch_item(beta_ub, group_id);
-                    advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b,
-                                          item_ct1);
-                });
+            sycl_nd_range(grid, block), [=
+        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                            config::warp_size)]] {
+                auto group = item_ct1.get_group();
+                auto group_id = group.get_group_linear_id();
+                const auto mat_b =
+                    batch::matrix::extract_batch_item(mat_ub, group_id);
+                const auto b_b = batch::extract_batch_item(b_ub, group_id);
+                const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                const auto alpha_b =
+                    batch::extract_batch_item(alpha_ub, group_id);
+                const auto beta_b =
+                    batch::extract_batch_item(beta_ub, group_id);
+                advanced_apply_kernel(alpha_b.values[0], mat_b, b_b.values,
+                                      beta_b.values[0], x_b.values, item_ct1);
+            });
     });
 }
 
diff --git a/dpcpp/matrix/batch_ell_kernels.hpp.inc b/dpcpp/matrix/batch_ell_kernels.hpp.inc
index 64d71710dbb..8c54d48db7d 100644
--- a/dpcpp/matrix/batch_ell_kernels.hpp.inc
+++ b/dpcpp/matrix/batch_ell_kernels.hpp.inc
@@ -33,9 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 template <typename ValueType, typename IndexType>
 __dpct_inline__ void simple_apply_kernel(
     const gko::batch::matrix::ell::batch_item<const ValueType, IndexType>& mat,
-    const gko::batch::multi_vector::batch_item<const ValueType>& b,
-    const gko::batch::multi_vector::batch_item<ValueType>& x,
-    sycl::nd_item<3>& item_ct1)
+    const ValueType* b, ValueType* x, sycl::nd_item<3>& item_ct1)
 {
     for (int tidx = item_ct1.get_local_linear_id(); tidx < mat.num_rows;
          tidx += item_ct1.get_local_range().size()) {
@@ -45,22 +43,19 @@ __dpct_inline__ void simple_apply_kernel(
             if (col_idx == invalid_index<IndexType>()) {
                 break;
             } else {
-                temp += mat.values[tidx + idx * mat.stride] *
-                        b.values[col_idx * b.stride];
+                temp += mat.values[tidx + idx * mat.stride] * b[col_idx];
             }
         }
-        x.values[tidx * x.stride] = temp;
+        x[tidx] = temp;
     }
 }
 
 
 template <typename ValueType, typename IndexType>
 __dpct_inline__ void advanced_apply_kernel(
-    const gko::batch::multi_vector::batch_item<const ValueType>& alpha,
+    const ValueType alpha,
     const gko::batch::matrix::ell::batch_item<const ValueType, IndexType>& mat,
-    const gko::batch::multi_vector::batch_item<const ValueType>& b,
-    const gko::batch::multi_vector::batch_item<const ValueType>& beta,
-    const gko::batch::multi_vector::batch_item<ValueType>& x,
+    const ValueType* b, const ValueType beta, ValueType* x,
     sycl::nd_item<3>& item_ct1)
 {
     for (int tidx = item_ct1.get_local_linear_id(); tidx < mat.num_rows;
@@ -71,11 +66,9 @@ __dpct_inline__ void advanced_apply_kernel(
             if (col_idx == invalid_index<IndexType>()) {
                 break;
             } else {
-                temp += mat.values[tidx + idx * mat.stride] *
-                        b.values[col_idx * b.stride];
+                temp += mat.values[tidx + idx * mat.stride] * b[col_idx];
             }
         }
-        x.values[tidx * x.stride] =
-            alpha.values[0] * temp + beta.values[0] * x.values[tidx * x.stride];
+        x[tidx] = alpha * temp + beta * x[tidx];
     }
 }
diff --git a/dpcpp/preconditioner/batch_identity.hpp.inc b/dpcpp/preconditioner/batch_identity.hpp.inc
index e15a4d37399..53e2f70a7d9 100644
--- a/dpcpp/preconditioner/batch_identity.hpp.inc
+++ b/dpcpp/preconditioner/batch_identity.hpp.inc
@@ -44,7 +44,7 @@ public:
 
     void generate(size_type batch_id,
                   const gko::batch::matrix::ell::batch_item<const ValueType,
-                                                            const gko::int32>&,
+                                                            gko::int32>&,
                   ValueType* const, sycl::nd_item<3> item_ct1)
     {}
 
diff --git a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
index 81519d8e2aa..b4cb227fe03 100644
--- a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
+++ b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
@@ -33,12 +33,26 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/solver/batch_bicgstab_kernels.hpp"
 
 
-#include <ginkgo/core/base/exception_helpers.hpp>
-#include <ginkgo/core/base/math.hpp>
+#include <CL/sycl.hpp>
 
 
+#include <ginkgo/core/base/batch_multi_vector.hpp>
+#include <ginkgo/core/matrix/batch_ell.hpp>
+#include <ginkgo/core/solver/batch_bicgstab.hpp>
+
+
+#include "core/base/batch_struct.hpp"
+#include "core/matrix/batch_struct.hpp"
 #include "core/solver/batch_dispatch.hpp"
+#include "dpcpp/base/batch_struct.hpp"
 #include "dpcpp/base/config.hpp"
+#include "dpcpp/base/dim3.dp.hpp"
+#include "dpcpp/base/dpct.hpp"
+#include "dpcpp/base/helper.hpp"
+#include "dpcpp/components/cooperative_groups.dp.hpp"
+#include "dpcpp/components/intrinsics.dp.hpp"
+#include "dpcpp/components/reduction.dp.hpp"
+#include "dpcpp/components/thread_ids.dp.hpp"
 #include "dpcpp/matrix/batch_struct.hpp"
 
 
@@ -46,26 +60,220 @@ namespace gko {
 namespace kernels {
 namespace dpcpp {
 /**
- * @brief The batch Bicgstab solver namespace.
+ * @brief The batch Cg solver namespace.
  *
- * @ingroup batch_bicgstab
+ * @ingroup batch_cg
  */
 namespace batch_bicgstab {
 
 
+#include "dpcpp/base/batch_multi_vector_kernels.hpp.inc"
+#include "dpcpp/matrix/batch_dense_kernels.hpp.inc"
+#include "dpcpp/matrix/batch_ell_kernels.hpp.inc"
+#include "dpcpp/solver/batch_bicgstab_kernels.hpp.inc"
+
+
 template <typename T>
 using settings = gko::kernels::batch_bicgstab::settings<T>;
 
 
+__dpct_inline__ int get_group_size(int value, int simd_len = 32)
+{
+    int num_sg = (value + simd_len - 1) / simd_len;
+    return (num_sg * simd_len);
+}
+
+
+template <typename ValueType>
+class KernelCaller {
+public:
+    KernelCaller(std::shared_ptr<const DefaultExecutor> exec,
+                 const settings<remove_complex<ValueType>> settings)
+        : exec_{exec}, settings_{settings}
+    {}
+
+    template <typename StopType, const int simd_len, const int n_shared_total,
+              const bool sg_kernel_all, typename PrecType, typename LogType,
+              typename BatchMatrixType>
+    __dpct_inline__ void launch_apply_kernel(
+        const gko::kernels::batch_bicgstab::storage_config& sconf,
+        LogType& logger, PrecType& prec, const BatchMatrixType mat,
+        const ValueType* const __restrict__ b_values,
+        ValueType* const __restrict__ x_values,
+        ValueType* const __restrict__ workspace, const int& group_size,
+        const int& shared_size) const
+    {
+        auto num_rows = mat.num_rows;
+
+        const dim3 block(group_size);
+        const dim3 grid(mat.num_batch_items);
+
+        auto max_iters = settings_.max_iterations;
+        auto res_tol = settings_.residual_tol;
+
+        (exec_->get_queue())->submit([&](sycl::handler& cgh) {
+            sycl::accessor<ValueType, 1, sycl::access_mode::read_write,
+                           sycl::access::target::local>
+                slm_values(sycl::range<1>(shared_size), cgh);
+
+            cgh.parallel_for(
+                sycl_nd_range(grid, block), [=
+            ](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(
+                                                simd_len)]] [
+                                                [intel::kernel_args_restrict]] {
+                    auto batch_id = item_ct1.get_group_linear_id();
+                    const auto mat_global_entry =
+                        gko::batch::matrix::extract_batch_item(mat, batch_id);
+                    const ValueType* const b_global_entry =
+                        gko::batch::multi_vector::batch_item_ptr(
+                            b_values, 1, num_rows, batch_id);
+                    ValueType* const x_global_entry =
+                        gko::batch::multi_vector::batch_item_ptr(
+                            x_values, 1, num_rows, batch_id);
+                    apply_kernel<StopType, n_shared_total, sg_kernel_all>(
+                        sconf, max_iters, res_tol, logger, prec,
+                        mat_global_entry, b_global_entry, x_global_entry,
+                        num_rows, mat.get_single_item_num_nnz(),
+                        static_cast<ValueType*>(slm_values.get_pointer()),
+                        item_ct1, workspace);
+                });
+        });
+    }
+
+    template <typename BatchMatrixType, typename PrecType, typename StopType,
+              typename LogType>
+    void call_kernel(
+        LogType logger, const BatchMatrixType& mat, PrecType prec,
+        const gko::batch::multi_vector::uniform_batch<const ValueType>& b,
+        const gko::batch::multi_vector::uniform_batch<ValueType>& x) const
+    {
+        using real_type = gko::remove_complex<ValueType>;
+        const size_type num_batch_items = mat.num_batch_items;
+        const auto num_rows = mat.num_rows;
+        const auto num_rhs = b.num_rhs;
+        GKO_ASSERT(num_rhs == 1);
+
+        auto device = exec_->get_queue()->get_device();
+        auto group_size =
+            device.get_info<sycl::info::device::max_work_group_size>();
+        if (group_size > num_rows) group_size = get_group_size(num_rows);
+
+        size_type shmem_per_blk =
+            device.get_info<sycl::info::device::local_mem_size>() -
+            (group_size + 5) * sizeof(ValueType) -
+            2 * sizeof(
+                    real_type);  // reserve 5 for intermediate rho-s, norms,
+                                 // alpha, omega, temp and for reduce_over_group
+        if (shmem_per_blk < 0) shmem_per_blk = 0;
+        const int shared_gap = num_rows;
+        const size_type prec_size = PrecType::dynamic_work_size(
+            shared_gap, mat.get_single_item_num_nnz());
+        const auto sconf =
+            gko::kernels::batch_bicgstab::compute_shared_storage<PrecType,
+                                                                 ValueType>(
+                shmem_per_blk, shared_gap, mat.get_single_item_num_nnz(),
+                b.num_rhs);
+        const size_t shared_size =
+            sconf.n_shared * shared_gap + (sconf.prec_shared ? prec_size : 0);
+        auto workspace = gko::array<ValueType>(
+            exec_,
+            sconf.gmem_stride_bytes * num_batch_items / sizeof(ValueType));
+        assert(sconf.gmem_stride_bytes % sizeof(ValueType) == 0);
+
+        ValueType* const workspace_data = workspace.get_data();
+        int n_shared_total = sconf.n_shared + int(sconf.prec_shared);
+
+        // template
+        // launch_apply_kernel<StopType, SIMDLEN, n_shared_total, sg_kernel_all>
+        if (num_rows <= 32 && n_shared_total == 10)
+            launch_apply_kernel<StopType, 16, 10, true>(
+                sconf, logger, prec, mat, b.values, x.values, workspace_data,
+                group_size, shared_size);
+        else if (num_rows <= 256 && n_shared_total == 10)
+            launch_apply_kernel<StopType, 32, 10, true>(
+                sconf, logger, prec, mat, b.values, x.values, workspace_data,
+                group_size, shared_size);
+        else {
+            switch (n_shared_total) {
+            case 0:
+                launch_apply_kernel<StopType, 32, 0, false>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, group_size, shared_size);
+                break;
+            case 1:
+                launch_apply_kernel<StopType, 32, 1, false>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, group_size, shared_size);
+                break;
+            case 2:
+                launch_apply_kernel<StopType, 32, 2, false>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, group_size, shared_size);
+                break;
+            case 3:
+                launch_apply_kernel<StopType, 32, 3, false>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, group_size, shared_size);
+                break;
+            case 4:
+                launch_apply_kernel<StopType, 32, 4, false>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, group_size, shared_size);
+                break;
+            case 5:
+                launch_apply_kernel<StopType, 32, 5, false>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, group_size, shared_size);
+                break;
+            case 6:
+                launch_apply_kernel<StopType, 32, 6, false>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, group_size, shared_size);
+                break;
+            case 7:
+                launch_apply_kernel<StopType, 32, 8, false>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, group_size, shared_size);
+                break;
+            case 8:
+                launch_apply_kernel<StopType, 32, 8, false>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, group_size, shared_size);
+                break;
+            case 9:
+                launch_apply_kernel<StopType, 32, 9, false>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, group_size, shared_size);
+                break;
+            case 10:
+                launch_apply_kernel<StopType, 32, 10, false>(
+                    sconf, logger, prec, mat, b.values, x.values,
+                    workspace_data, group_size, shared_size);
+                break;
+            }
+        }
+    }
+
+private:
+    std::shared_ptr<const DefaultExecutor> exec_;
+    const settings<remove_complex<ValueType>> settings_;
+};
+
+
 template <typename ValueType>
 void apply(std::shared_ptr<const DefaultExecutor> exec,
            const settings<remove_complex<ValueType>>& settings,
-           const batch::BatchLinOp* const a,
-           const batch::BatchLinOp* const precon,
+           const batch::BatchLinOp* const mat,
+           const batch::BatchLinOp* const precond,
            const batch::MultiVector<ValueType>* const b,
            batch::MultiVector<ValueType>* const x,
            batch::log::detail::log_data<remove_complex<ValueType>>& logdata)
-    GKO_NOT_IMPLEMENTED;
+{
+    auto dispatcher = batch::solver::create_dispatcher<ValueType>(
+        KernelCaller<ValueType>(exec, settings), settings, mat, precond);
+    dispatcher.apply(b, x, logdata);
+}
+
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_BICGSTAB_APPLY_KERNEL);
 
diff --git a/dpcpp/solver/batch_bicgstab_kernels.hpp.inc b/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
new file mode 100644
index 00000000000..c7ad625b9af
--- /dev/null
+++ b/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
@@ -0,0 +1,449 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+template <const bool sg_kernel_all, typename BatchMatrixType_entry,
+          typename ValueType>
+__dpct_inline__ void initialize(
+    const int num_rows, const BatchMatrixType_entry& mat_global_entry,
+    const ValueType* const b_global_entry,
+    const ValueType* const x_global_entry, ValueType& rho_old, ValueType& omega,
+    ValueType& alpha, ValueType* const x_shared_entry,
+    ValueType* const r_shared_entry, ValueType* const r_hat_shared_entry,
+    ValueType* const p_shared_entry, ValueType* const v_shared_entry,
+    typename gko::remove_complex<ValueType>& rhs_norm,
+    typename gko::remove_complex<ValueType>& res_norm,
+    sycl::nd_item<3> item_ct1)
+{
+    auto sg = item_ct1.get_sub_group();
+    const auto sg_id = sg.get_group_id();
+    const auto tid = item_ct1.get_local_linear_id();
+    const auto group_size = item_ct1.get_local_range().size();
+    const auto group = item_ct1.get_group();
+
+    rho_old = one<ValueType>();
+    omega = one<ValueType>();
+    alpha = one<ValueType>();
+
+    // copy x from global to shared memory
+    // r = b
+    for (int iz = tid; iz < num_rows; iz += group_size) {
+        x_shared_entry[iz] = x_global_entry[iz];
+        r_shared_entry[iz] = b_global_entry[iz];
+    }
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+
+    // r = b - A*x
+    advanced_apply_kernel(static_cast<ValueType>(-1.0), mat_global_entry,
+                                  x_shared_entry, static_cast<ValueType>(1.0),
+                                  r_shared_entry, item_ct1);
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+
+    if constexpr (sg_kernel_all) {
+        if (sg_id == 0) {
+            single_rhs_compute_norm2_sg(num_rows, r_shared_entry, res_norm,
+                                    item_ct1);
+        } else if (sg_id == 1) {
+            single_rhs_compute_norm2_sg(num_rows, b_global_entry, rhs_norm,
+                                    item_ct1);
+        }
+    } else {
+        single_rhs_compute_norm2(num_rows, r_shared_entry, res_norm, item_ct1);
+        single_rhs_compute_norm2(num_rows, b_global_entry, rhs_norm, item_ct1);
+    }
+
+
+    for (int iz = tid; iz < num_rows; iz += group_size) {
+        r_hat_shared_entry[iz] = r_shared_entry[iz];
+        p_shared_entry[iz] = zero<ValueType>();
+        v_shared_entry[iz] = zero<ValueType>();
+    }
+}
+
+
+template <typename ValueType>
+__dpct_inline__ void update_p(const int num_rows, const ValueType& rho_new,
+                              const ValueType& rho_old, const ValueType& alpha,
+                              const ValueType& omega,
+                              const ValueType* const r_shared_entry,
+                              const ValueType* const v_shared_entry,
+                              ValueType* const p_shared_entry,
+                              sycl::nd_item<3> item_ct1)
+{
+    const ValueType beta = (rho_new / rho_old) * (alpha / omega);
+    for (int r = item_ct1.get_local_linear_id(); r < num_rows;
+         r += item_ct1.get_local_range().size()) {
+        p_shared_entry[r] =
+            r_shared_entry[r] +
+            beta * (p_shared_entry[r] - omega * v_shared_entry[r]);
+    }
+}
+
+template <const bool sg_kernel_all, typename ValueType>
+__dpct_inline__ void compute_alpha(const int num_rows, const ValueType& rho_new,
+                                   const ValueType* const r_hat_shared_entry,
+                                   const ValueType* const v_shared_entry,
+                                   ValueType& alpha, sycl::nd_item<3> item_ct1)
+{
+    if constexpr (sg_kernel_all) {
+        auto sg = item_ct1.get_sub_group();
+        const auto sg_id = sg.get_group_id();
+        const auto tid = item_ct1.get_local_linear_id();
+
+        if (sg_id == 0) {
+            single_rhs_compute_dot_sg(num_rows, r_hat_shared_entry,
+                                          v_shared_entry, alpha, item_ct1);
+        }
+        if (tid == 0) {
+            alpha = rho_new / alpha;
+        }
+        item_ct1.barrier(sycl::access::fence_space::local_space);
+    } else {
+        single_rhs_compute_dot(num_rows, r_hat_shared_entry, v_shared_entry,
+                                   alpha, item_ct1);
+        alpha = rho_new / alpha;
+    }
+}
+
+
+template <typename ValueType>
+__dpct_inline__ void update_s(const int num_rows,
+                              const ValueType* const r_shared_entry,
+                              const ValueType& alpha,
+                              const ValueType* const v_shared_entry,
+                              ValueType* const s_shared_entry,
+                              sycl::nd_item<3> item_ct1)
+{
+    for (int r = item_ct1.get_local_linear_id(); r < num_rows;
+         r += item_ct1.get_local_range().size()) {
+        s_shared_entry[r] = r_shared_entry[r] - alpha * v_shared_entry[r];
+    }
+}
+
+
+template <const bool sg_kernel_all, typename ValueType>
+__dpct_inline__ void compute_omega(const int num_rows,
+                                   const ValueType* const t_shared_entry,
+                                   const ValueType* const s_shared_entry,
+                                   ValueType& temp, ValueType& omega,
+                                   sycl::nd_item<3> item_ct1)
+{
+    if constexpr (sg_kernel_all) {
+        auto sg = item_ct1.get_sub_group();
+        const auto sg_id = sg.get_group_id();
+        const auto tid = item_ct1.get_local_linear_id();
+
+        if (sg_id == 0)
+            single_rhs_compute_dot_sg(num_rows, t_shared_entry,
+                                          s_shared_entry, omega, item_ct1);
+        else if (sg_id == 1)
+            single_rhs_compute_dot_sg(num_rows, t_shared_entry,
+                                          t_shared_entry, temp, item_ct1);
+        item_ct1.barrier(sycl::access::fence_space::local_space);
+        if (tid == 0) omega /= temp;
+        item_ct1.barrier(sycl::access::fence_space::local_space);
+    } else {
+        single_rhs_compute_dot(num_rows, t_shared_entry, s_shared_entry,
+                                   omega, item_ct1);
+        single_rhs_compute_dot(num_rows, t_shared_entry, t_shared_entry,
+                                   temp, item_ct1);
+        omega /= temp;
+    }
+}
+
+template <typename ValueType>
+__dpct_inline__ void update_x_and_r(
+    const int num_rows, const ValueType* const p_hat_shared_entry,
+    const ValueType* const s_hat_shared_entry, const ValueType& alpha,
+    const ValueType& omega, const ValueType* const s_shared_entry,
+    const ValueType* const t_shared_entry, ValueType* const x_shared_entry,
+    ValueType* const r_shared_entry, sycl::nd_item<3> item_ct1)
+{
+    for (int r = item_ct1.get_local_linear_id(); r < num_rows;
+         r += item_ct1.get_local_range().size()) {
+        x_shared_entry[r] = x_shared_entry[r] + alpha * p_hat_shared_entry[r] +
+                            omega * s_hat_shared_entry[r];
+        r_shared_entry[r] = s_shared_entry[r] - omega * t_shared_entry[r];
+    }
+}
+
+
+template <typename ValueType>
+__dpct_inline__ void update_x_middle(const int num_rows, const ValueType& alpha,
+                                     const ValueType* const p_hat_shared_entry,
+                                     ValueType* const x_shared_entry,
+                                     sycl::nd_item<3> item_ct1)
+{
+    for (int r = item_ct1.get_local_linear_id(); r < num_rows;
+         r += item_ct1.get_local_range().size()) {
+        x_shared_entry[r] = x_shared_entry[r] + alpha * p_hat_shared_entry[r];
+    }
+}
+
+
+template <typename StopType, const int n_shared_total, const bool sg_kernel_all,
+          typename PrecType, typename LogType, typename BatchMatrixType,
+          typename ValueType>
+void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
+                  const int max_iter, const gko::remove_complex<ValueType> tol,
+                  LogType logger, PrecType prec_shared,
+                  const BatchMatrixType mat_global_entry,
+                  const ValueType* const __restrict__ b_global_entry,
+                  ValueType* const __restrict__ x_global_entry,
+                  const size_type num_rows, const size_type nnz,
+                  ValueType* const __restrict__ slm_values,
+                  sycl::nd_item<3> item_ct1,
+                  ValueType* const __restrict__ workspace = nullptr)
+{
+    using real_type = typename gko::remove_complex<ValueType>;
+
+    const auto sg = item_ct1.get_sub_group();
+    const int sg_id = sg.get_group_id();
+    const int tid = item_ct1.get_local_linear_id();
+    auto group = item_ct1.get_group();
+    const int group_size = item_ct1.get_local_range().size();
+
+    const auto batch_id = item_ct1.get_group_linear_id();
+
+    ValueType* rho_old_sh;
+    ValueType* rho_new_sh;
+    ValueType* alpha_sh;
+    ValueType* omega_sh;
+    ValueType* temp_sh;
+    real_type* norms_rhs_sh;
+    real_type* norms_res_sh;
+
+    if constexpr (sg_kernel_all) {
+        using tile_value_t = ValueType[5];
+        tile_value_t& values =
+            *sycl::ext::oneapi::group_local_memory_for_overwrite<tile_value_t>(
+                group);
+        using tile_real_t = real_type[2];
+        tile_real_t& reals =
+            *sycl::ext::oneapi::group_local_memory_for_overwrite<tile_real_t>(
+                group);
+        rho_old_sh = &values[0];
+        rho_new_sh = &values[1];
+        alpha_sh = &values[2];
+        omega_sh = &values[3];
+        temp_sh = &values[4];
+        norms_rhs_sh = &reals[0];
+        norms_res_sh = &reals[1];
+    } else {
+        ValueType values[5];
+        real_type reals[2];
+        rho_old_sh = &values[0];
+        rho_new_sh = &values[1];
+        alpha_sh = &values[2];
+        omega_sh = &values[3];
+        temp_sh = &values[4];
+        norms_rhs_sh = &reals[0];
+        norms_res_sh = &reals[1];
+    }
+    const int gmem_offset =
+        batch_id * sconf.gmem_stride_bytes / sizeof(ValueType);
+    ValueType* p_hat_sh;
+    ValueType* s_hat_sh;
+    ValueType* s_sh;
+    ValueType* p_sh;
+    ValueType* r_sh;
+    ValueType* r_hat_sh;
+    ValueType* v_sh;
+    ValueType* t_sh;
+    ValueType* x_sh;
+    ValueType* prec_work_sh;
+
+    if constexpr (n_shared_total >= 1) {
+        p_hat_sh = slm_values;
+    } else {
+        p_hat_sh = workspace + gmem_offset;
+    }
+    if constexpr (n_shared_total == 1) {
+        s_hat_sh = workspace + gmem_offset;
+    } else {
+        s_hat_sh = p_hat_sh + sconf.padded_vec_len;
+    }
+    if constexpr (n_shared_total == 2) {
+        v_sh = workspace + gmem_offset;
+    } else {
+        v_sh = s_hat_sh + sconf.padded_vec_len;
+    }
+    if constexpr (n_shared_total == 3) {
+        t_sh = workspace + gmem_offset;
+    } else {
+        t_sh = v_sh + sconf.padded_vec_len;
+    }
+    if constexpr (n_shared_total == 4) {
+        p_sh = workspace + gmem_offset;
+    } else {
+        p_sh = t_sh + sconf.padded_vec_len;
+    }
+    if constexpr (n_shared_total == 5) {
+        s_sh = workspace + gmem_offset;
+    } else {
+        s_sh = p_sh + sconf.padded_vec_len;
+    }
+    if constexpr (n_shared_total == 6) {
+        r_sh = workspace + gmem_offset;
+    } else {
+        r_sh = s_sh + sconf.padded_vec_len;
+    }
+    if constexpr (n_shared_total == 7) {
+        r_hat_sh = workspace + gmem_offset;
+    } else {
+        r_hat_sh = r_sh + sconf.padded_vec_len;
+    }
+    if constexpr (n_shared_total == 8) {
+        x_sh = workspace + gmem_offset;
+    } else {
+        x_sh = r_hat_sh + sconf.padded_vec_len;
+    }
+    if constexpr (n_shared_total == 9) {
+        prec_work_sh = workspace + gmem_offset;
+    } else {
+        prec_work_sh = x_sh + sconf.padded_vec_len;
+    }
+
+    // generate preconditioner
+    prec_shared.generate(batch_id, mat_global_entry, prec_work_sh, item_ct1);
+
+    // initialization
+    // rho_old = 1, omega = 1, alpha = 1
+    // compute b norms
+    // copy x from global to shared memory
+    // r = b - A*x
+    // compute residual norms
+    // r_hat = r
+    // p = 0
+    // v = 0
+    initialize<sg_kernel_all>(num_rows, mat_global_entry, b_global_entry,
+                              x_global_entry, rho_old_sh[0], omega_sh[0],
+                              alpha_sh[0], x_sh, r_sh, r_hat_sh, p_sh, v_sh,
+                              norms_rhs_sh[0], norms_res_sh[0], item_ct1);
+    item_ct1.barrier(sycl::access::fence_space::local_space);
+
+    // stopping criterion object
+    StopType stop(tol, norms_rhs_sh);
+
+    int iter = 0;
+    for (; iter < max_iter; iter++) {
+        if (stop.check_converged(norms_res_sh)) {
+            break;
+        }
+
+        // rho_new =  < r_hat , r > = (r_hat)' * (r)
+        if constexpr (sg_kernel_all) {
+            if (sg_id == 0) {
+                single_rhs_compute_dot_sg(num_rows, r_hat_sh, r_sh,
+                                              rho_new_sh[0], item_ct1);
+            }
+            item_ct1.barrier(sycl::access::fence_space::local_space);
+        } else {
+            single_rhs_compute_dot(num_rows, r_hat_sh, r_sh, rho_new_sh[0],
+                                       item_ct1);
+        }
+
+        // beta = (rho_new / rho_old)*(alpha / omega)
+        // p = r + beta*(p - omega * v)
+        update_p(num_rows, rho_new_sh[0], rho_old_sh[0], alpha_sh[0], omega_sh[0],
+                 r_sh, v_sh, p_sh, item_ct1);
+        item_ct1.barrier(sycl::access::fence_space::local_space);
+
+        // p_hat = precond * p
+        prec_shared.apply(num_rows, p_sh, p_hat_sh, item_ct1);
+        item_ct1.barrier(sycl::access::fence_space::local_space);
+
+        // v = A * p_hat
+        simple_apply_kernel(mat_global_entry, p_hat_sh, v_sh, item_ct1);
+        item_ct1.barrier(sycl::access::fence_space::local_space);
+
+        // alpha = rho_new / < r_hat , v>
+        compute_alpha<sg_kernel_all>(num_rows, rho_new_sh[0], r_hat_sh, v_sh,
+                                     alpha_sh[0], item_ct1);
+        // item_ct1.barrier(sycl::access::fence_space::local_space);
+
+        // s = r - alpha*v
+        update_s(num_rows, r_sh, alpha_sh[0], v_sh, s_sh, item_ct1);
+        item_ct1.barrier(sycl::access::fence_space::local_space);
+
+        // an estimate of residual norms
+        if constexpr (sg_kernel_all) {
+            if (sg_id == 0) {
+                single_rhs_compute_norm2_sg(num_rows, s_sh, norms_res_sh[0], item_ct1);
+            }
+            item_ct1.barrier(sycl::access::fence_space::local_space);
+        } else {
+            single_rhs_compute_norm2(num_rows, s_sh, norms_res_sh[0], item_ct1);
+        }
+
+        // if (norms_res_sh[0] / norms_rhs_sh[0] < tol) {
+        if (stop.check_converged(norms_res_sh)) {
+            update_x_middle(num_rows, alpha_sh[0], p_hat_sh, x_sh, item_ct1);
+            break;
+        }
+
+        // s_hat = precond * s
+        prec_shared.apply(num_rows, s_sh, s_hat_sh, item_ct1);
+        item_ct1.barrier(sycl::access::fence_space::local_space);
+
+        // t = A * s_hat
+        simple_apply_kernel(mat_global_entry, s_hat_sh, t_sh, item_ct1);
+        item_ct1.barrier(sycl::access::fence_space::local_space);
+
+        // omega = <t,s> / <t,t>
+        compute_omega<sg_kernel_all>(num_rows, t_sh, s_sh, temp_sh[0], omega_sh[0],
+                                     item_ct1);
+        //        item_ct1.barrier(sycl::access::fence_space::local_space);
+
+        // x = x + alpha*p_hat + omega *s_hat
+        // r = s - omega * t
+        update_x_and_r(num_rows, p_hat_sh, s_hat_sh, alpha_sh[0], omega_sh[0],
+                       s_sh, t_sh, x_sh, r_sh, item_ct1);
+        item_ct1.barrier(sycl::access::fence_space::local_space);
+
+        if constexpr (sg_kernel_all) {
+            if (sg_id == 0)
+                single_rhs_compute_norm2_sg(num_rows, r_sh, norms_res_sh[0], item_ct1);
+            if (tid == group_size - 1) {
+                rho_old_sh[0] = rho_new_sh[0];
+            }
+            item_ct1.barrier(sycl::access::fence_space::local_space);
+        } else {
+            single_rhs_compute_norm2(num_rows, r_sh, norms_res_sh[0], item_ct1);
+            rho_old_sh[0] = rho_new_sh[0];
+        }
+    }
+
+    logger.log_iteration(batch_id, iter, norms_res_sh[0]);
+
+    // copy x back to global memory
+    copy_kernel(num_rows, x_sh, x_global_entry, item_ct1);
+}
diff --git a/test/solver/CMakeLists.txt b/test/solver/CMakeLists.txt
index de3430393ae..00c78eb93a0 100644
--- a/test/solver/CMakeLists.txt
+++ b/test/solver/CMakeLists.txt
@@ -1,4 +1,4 @@
-ginkgo_create_common_test(batch_bicgstab_kernels DISABLE_EXECUTORS dpcpp)
+ginkgo_create_common_test(batch_bicgstab_kernels)
 ginkgo_create_common_test(bicg_kernels)
 ginkgo_create_common_test(bicgstab_kernels)
 ginkgo_create_common_test(cb_gmres_kernels)

From ce537d17a634a46f71ec0859e279222434947be6 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sat, 28 Oct 2023 22:07:31 +0200
Subject: [PATCH 524/583] Fix dpcpp kernel issues

---
 dpcpp/base/batch_multi_vector_kernels.hpp.inc | 45 ++++++++++------
 dpcpp/solver/batch_bicgstab_kernels.dp.cpp    | 24 +++++----
 dpcpp/solver/batch_bicgstab_kernels.hpp.inc   | 52 ++++++++++---------
 3 files changed, 70 insertions(+), 51 deletions(-)

diff --git a/dpcpp/base/batch_multi_vector_kernels.hpp.inc b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
index 828833b6ea3..4db1dc5e1d7 100644
--- a/dpcpp/base/batch_multi_vector_kernels.hpp.inc
+++ b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
@@ -67,12 +67,15 @@ __dpct_inline__ void add_scaled_kernel(
 }
 
 
-template <typename ValueType>
+template <int tile_size = config::warp_size, typename ValueType>
 __dpct_inline__ void single_rhs_compute_dot(
     const int num_rows, const ValueType* const __restrict__ x,
     const ValueType* const __restrict__ y, ValueType& result,
     sycl::nd_item<3> item_ct1)
 {
+    // auto grp =
+    //     group::tiled_partition<group_size>(group::this_thread_block(item_ct1));
+    // auto grp = group::this_thread_block(item_ct1);
     const auto group = item_ct1.get_group();
     const auto group_size = item_ct1.get_local_range().size();
     const auto tid = item_ct1.get_local_linear_id();
@@ -86,25 +89,29 @@ __dpct_inline__ void single_rhs_compute_dot(
 }
 
 
-template <typename ValueType>
+template <int tile_size = config::warp_size, typename ValueType>
 __dpct_inline__ void single_rhs_compute_dot_sg(
     const int num_rows, const ValueType* const __restrict__ x,
     const ValueType* const __restrict__ y, ValueType& result,
     sycl::nd_item<3> item_ct1)
 {
-    const auto sg = item_ct1.get_sub_group();
-    const auto sg_size = sg.get_local_range().size();
-    const auto sg_tid = sg.get_local_id();
+    auto subg =
+        group::tiled_partition<tile_size>(group::this_thread_block(item_ct1));
+    const auto subgroup = static_cast<sycl::sub_group>(subg);
+    const int subgroup_id = subgroup.get_group_id();
+    const int subgroup_size = subgroup.get_local_range().size();
+    const auto subgroup_tid = subgroup.get_local_id();
 
     ValueType val = zero<ValueType>();
 
-    for (int r = sg_tid; r < num_rows; r += sg_size) {
+    for (int r = subgroup_tid; r < num_rows; r += subgroup_size) {
         val += conj(x[r]) * y[r];
     }
 
-    val = sycl::reduce_over_group(sg, val, sycl::plus<>());
+    val = ::gko::kernels::dpcpp::reduce(
+        subg, val, [](ValueType a, ValueType b) { return a + b; });
 
-    if (sg_tid == 0) {
+    if (subgroup_tid == 0) {
         result = val;
     }
 }
@@ -145,25 +152,27 @@ __dpct_inline__ void compute_gen_dot_product_kernel(
 }
 
 
-template <typename ValueType>
+template <int tile_size = config::warp_size, typename ValueType>
 __dpct_inline__ void single_rhs_compute_norm2_sg(
     const int num_rows, const ValueType* const __restrict__ x,
     gko::remove_complex<ValueType>& result, sycl::nd_item<3> item_ct1)
 {
-    const auto sg = item_ct1.get_sub_group();
-    const auto sg_size = sg.get_local_range().size();
-    const auto sg_tid = sg.get_local_id();
+    auto subg =
+        group::tiled_partition<tile_size>(group::this_thread_block(item_ct1));
+    const auto subgroup = static_cast<sycl::sub_group>(subg);
+    const int subgroup_id = subgroup.get_group_id();
+    const int subgroup_size = subgroup.get_local_range().size();
 
     using real_type = typename gko::remove_complex<ValueType>;
     real_type val = zero<real_type>();
 
-    for (int r = sg_tid; r < num_rows; r += sg_size) {
+    for (int r = subgroup.get_local_id(); r < num_rows; r += subgroup_size)
         val += squared_norm(x[r]);
-    }
 
-    val = sycl::reduce_over_group(sg, val, sycl::plus<>());
+    val = ::gko::kernels::dpcpp::reduce(
+        subg, val, [](real_type a, real_type b) { return a + b; });
 
-    if (sg_tid == 0) {
+    if (subgroup.get_local_id() == 0) {
         result = sqrt(val);
     }
 }
@@ -174,6 +183,8 @@ __dpct_inline__ void single_rhs_compute_norm2(
     const int num_rows, const ValueType* const __restrict__ x,
     gko::remove_complex<ValueType>& result, sycl::nd_item<3> item_ct1)
 {
+    // auto grp =
+    //     group::tiled_partition<tile_size>(group::this_thread_block(item_ct1));
     const auto group = item_ct1.get_group();
     const auto group_size = item_ct1.get_local_range().size();
     const auto tid = item_ct1.get_local_linear_id();
@@ -186,6 +197,8 @@ __dpct_inline__ void single_rhs_compute_norm2(
     }
 
     val = sycl::reduce_over_group(group, val, sycl::plus<>());
+    // val = ::gko::kernels::dpcpp::reduce(
+    //     grp, val, [](real_type a, real_type b) { return a + b; });
 
     result = sqrt(val);
 }
diff --git a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
index b4cb227fe03..61c888b357b 100644
--- a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
+++ b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
@@ -196,60 +196,62 @@ class KernelCaller {
         else {
             switch (n_shared_total) {
             case 0:
-                launch_apply_kernel<StopType, 32, 0, false>(
+                launch_apply_kernel<StopType, 32, 0, true>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, group_size, shared_size);
                 break;
             case 1:
-                launch_apply_kernel<StopType, 32, 1, false>(
+                launch_apply_kernel<StopType, 32, 1, true>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, group_size, shared_size);
                 break;
             case 2:
-                launch_apply_kernel<StopType, 32, 2, false>(
+                launch_apply_kernel<StopType, 32, 2, true>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, group_size, shared_size);
                 break;
             case 3:
-                launch_apply_kernel<StopType, 32, 3, false>(
+                launch_apply_kernel<StopType, 32, 3, true>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, group_size, shared_size);
                 break;
             case 4:
-                launch_apply_kernel<StopType, 32, 4, false>(
+                launch_apply_kernel<StopType, 32, 4, true>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, group_size, shared_size);
                 break;
             case 5:
-                launch_apply_kernel<StopType, 32, 5, false>(
+                launch_apply_kernel<StopType, 32, 5, true>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, group_size, shared_size);
                 break;
             case 6:
-                launch_apply_kernel<StopType, 32, 6, false>(
+                launch_apply_kernel<StopType, 32, 6, true>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, group_size, shared_size);
                 break;
             case 7:
-                launch_apply_kernel<StopType, 32, 8, false>(
+                launch_apply_kernel<StopType, 32, 7, true>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, group_size, shared_size);
                 break;
             case 8:
-                launch_apply_kernel<StopType, 32, 8, false>(
+                launch_apply_kernel<StopType, 32, 8, true>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, group_size, shared_size);
                 break;
             case 9:
-                launch_apply_kernel<StopType, 32, 9, false>(
+                launch_apply_kernel<StopType, 32, 9, true>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, group_size, shared_size);
                 break;
             case 10:
-                launch_apply_kernel<StopType, 32, 10, false>(
+                launch_apply_kernel<StopType, 32, 10, true>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, group_size, shared_size);
                 break;
+            default:
+                GKO_NOT_IMPLEMENTED;
             }
         }
     }
diff --git a/dpcpp/solver/batch_bicgstab_kernels.hpp.inc b/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
index c7ad625b9af..e0affebe3c2 100644
--- a/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
+++ b/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
@@ -63,17 +63,17 @@ __dpct_inline__ void initialize(
 
     // r = b - A*x
     advanced_apply_kernel(static_cast<ValueType>(-1.0), mat_global_entry,
-                                  x_shared_entry, static_cast<ValueType>(1.0),
-                                  r_shared_entry, item_ct1);
+                          x_shared_entry, static_cast<ValueType>(1.0),
+                          r_shared_entry, item_ct1);
     item_ct1.barrier(sycl::access::fence_space::local_space);
 
     if constexpr (sg_kernel_all) {
         if (sg_id == 0) {
             single_rhs_compute_norm2_sg(num_rows, r_shared_entry, res_norm,
-                                    item_ct1);
+                                        item_ct1);
         } else if (sg_id == 1) {
             single_rhs_compute_norm2_sg(num_rows, b_global_entry, rhs_norm,
-                                    item_ct1);
+                                        item_ct1);
         }
     } else {
         single_rhs_compute_norm2(num_rows, r_shared_entry, res_norm, item_ct1);
@@ -120,7 +120,7 @@ __dpct_inline__ void compute_alpha(const int num_rows, const ValueType& rho_new,
 
         if (sg_id == 0) {
             single_rhs_compute_dot_sg(num_rows, r_hat_shared_entry,
-                                          v_shared_entry, alpha, item_ct1);
+                                      v_shared_entry, alpha, item_ct1);
         }
         if (tid == 0) {
             alpha = rho_new / alpha;
@@ -128,7 +128,7 @@ __dpct_inline__ void compute_alpha(const int num_rows, const ValueType& rho_new,
         item_ct1.barrier(sycl::access::fence_space::local_space);
     } else {
         single_rhs_compute_dot(num_rows, r_hat_shared_entry, v_shared_entry,
-                                   alpha, item_ct1);
+                               alpha, item_ct1);
         alpha = rho_new / alpha;
     }
 }
@@ -162,19 +162,19 @@ __dpct_inline__ void compute_omega(const int num_rows,
         const auto tid = item_ct1.get_local_linear_id();
 
         if (sg_id == 0)
-            single_rhs_compute_dot_sg(num_rows, t_shared_entry,
-                                          s_shared_entry, omega, item_ct1);
+            single_rhs_compute_dot_sg(num_rows, t_shared_entry, s_shared_entry,
+                                      omega, item_ct1);
         else if (sg_id == 1)
-            single_rhs_compute_dot_sg(num_rows, t_shared_entry,
-                                          t_shared_entry, temp, item_ct1);
+            single_rhs_compute_dot_sg(num_rows, t_shared_entry, t_shared_entry,
+                                      temp, item_ct1);
         item_ct1.barrier(sycl::access::fence_space::local_space);
         if (tid == 0) omega /= temp;
         item_ct1.barrier(sycl::access::fence_space::local_space);
     } else {
-        single_rhs_compute_dot(num_rows, t_shared_entry, s_shared_entry,
-                                   omega, item_ct1);
-        single_rhs_compute_dot(num_rows, t_shared_entry, t_shared_entry,
-                                   temp, item_ct1);
+        single_rhs_compute_dot(num_rows, t_shared_entry, s_shared_entry, omega,
+                               item_ct1);
+        single_rhs_compute_dot(num_rows, t_shared_entry, t_shared_entry, temp,
+                               item_ct1);
         omega /= temp;
     }
 }
@@ -356,6 +356,7 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
     int iter = 0;
     for (; iter < max_iter; iter++) {
         if (stop.check_converged(norms_res_sh)) {
+            logger.log_iteration(batch_id, iter, norms_res_sh[0]);
             break;
         }
 
@@ -363,18 +364,18 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
         if constexpr (sg_kernel_all) {
             if (sg_id == 0) {
                 single_rhs_compute_dot_sg(num_rows, r_hat_sh, r_sh,
-                                              rho_new_sh[0], item_ct1);
+                                          rho_new_sh[0], item_ct1);
             }
             item_ct1.barrier(sycl::access::fence_space::local_space);
         } else {
             single_rhs_compute_dot(num_rows, r_hat_sh, r_sh, rho_new_sh[0],
-                                       item_ct1);
+                                   item_ct1);
         }
 
         // beta = (rho_new / rho_old)*(alpha / omega)
         // p = r + beta*(p - omega * v)
-        update_p(num_rows, rho_new_sh[0], rho_old_sh[0], alpha_sh[0], omega_sh[0],
-                 r_sh, v_sh, p_sh, item_ct1);
+        update_p(num_rows, rho_new_sh[0], rho_old_sh[0], alpha_sh[0],
+                 omega_sh[0], r_sh, v_sh, p_sh, item_ct1);
         item_ct1.barrier(sycl::access::fence_space::local_space);
 
         // p_hat = precond * p
@@ -388,7 +389,7 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
         // alpha = rho_new / < r_hat , v>
         compute_alpha<sg_kernel_all>(num_rows, rho_new_sh[0], r_hat_sh, v_sh,
                                      alpha_sh[0], item_ct1);
-        // item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(sycl::access::fence_space::local_space);
 
         // s = r - alpha*v
         update_s(num_rows, r_sh, alpha_sh[0], v_sh, s_sh, item_ct1);
@@ -397,7 +398,8 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
         // an estimate of residual norms
         if constexpr (sg_kernel_all) {
             if (sg_id == 0) {
-                single_rhs_compute_norm2_sg(num_rows, s_sh, norms_res_sh[0], item_ct1);
+                single_rhs_compute_norm2_sg(num_rows, s_sh, norms_res_sh[0],
+                                            item_ct1);
             }
             item_ct1.barrier(sycl::access::fence_space::local_space);
         } else {
@@ -407,6 +409,7 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
         // if (norms_res_sh[0] / norms_rhs_sh[0] < tol) {
         if (stop.check_converged(norms_res_sh)) {
             update_x_middle(num_rows, alpha_sh[0], p_hat_sh, x_sh, item_ct1);
+            logger.log_iteration(batch_id, iter, norms_res_sh[0]);
             break;
         }
 
@@ -419,9 +422,9 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
         item_ct1.barrier(sycl::access::fence_space::local_space);
 
         // omega = <t,s> / <t,t>
-        compute_omega<sg_kernel_all>(num_rows, t_sh, s_sh, temp_sh[0], omega_sh[0],
-                                     item_ct1);
-        //        item_ct1.barrier(sycl::access::fence_space::local_space);
+        compute_omega<sg_kernel_all>(num_rows, t_sh, s_sh, temp_sh[0],
+                                     omega_sh[0], item_ct1);
+        item_ct1.barrier(sycl::access::fence_space::local_space);
 
         // x = x + alpha*p_hat + omega *s_hat
         // r = s - omega * t
@@ -431,7 +434,8 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
 
         if constexpr (sg_kernel_all) {
             if (sg_id == 0)
-                single_rhs_compute_norm2_sg(num_rows, r_sh, norms_res_sh[0], item_ct1);
+                single_rhs_compute_norm2_sg(num_rows, r_sh, norms_res_sh[0],
+                                            item_ct1);
             if (tid == group_size - 1) {
                 rho_old_sh[0] = rho_new_sh[0];
             }

From 94a52339aa61dc23631bd5748c6f72e2f2b4803a Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sun, 29 Oct 2023 14:03:36 +0100
Subject: [PATCH 525/583] add mvec single rhs specializations

---
 dpcpp/base/batch_multi_vector_kernels.dp.cpp | 151 +++++++++++++------
 dpcpp/solver/batch_bicgstab_kernels.hpp.inc  |  39 +++--
 test/base/batch_multi_vector_kernels.cpp     |  35 ++++-
 test/matrix/batch_dense_kernels.cpp          |  18 ++-
 4 files changed, 171 insertions(+), 72 deletions(-)

diff --git a/dpcpp/base/batch_multi_vector_kernels.dp.cpp b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
index e0bc15fdc61..0c18fd80806 100644
--- a/dpcpp/base/batch_multi_vector_kernels.dp.cpp
+++ b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
@@ -184,28 +184,57 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
     const auto res_ub = get_batch_struct(result);
 
     const auto num_batches = x_ub.num_batch_items;
+    const auto num_rows = x_ub.num_rows;
     auto device = exec->get_queue()->get_device();
-    auto group_size =
-        device.get_info<sycl::info::device::max_work_group_size>();
 
-    const dim3 block(group_size);
-    const dim3 grid(num_batches);
 
-    // TODO: Remove reqd_sub_group size and use sycl::reduce_over_group
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(
-            sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                config::warp_size)]] {
-                auto group = item_ct1.get_group();
-                auto group_id = group.get_group_linear_id();
-                const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                const auto y_b = batch::extract_batch_item(y_ub, group_id);
-                const auto res_b = batch::extract_batch_item(res_ub, group_id);
-                compute_gen_dot_product_kernel(x_b, y_b, res_b, item_ct1,
-                                               [](auto val) { return val; });
-            });
-    });
+    if (x->get_common_size()[1] == 1) {
+        int group_size = ((num_rows + 32 - 1) / 32) * 32;
+
+        const dim3 block(group_size);
+        const dim3 grid(num_batches);
+
+        exec->get_queue()->submit([&](sycl::handler& cgh) {
+            cgh.parallel_for(
+                sycl_nd_range(grid, block), [=
+            ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                                config::warp_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    const auto y_b = batch::extract_batch_item(y_ub, group_id);
+                    const auto res_b =
+                        batch::extract_batch_item(res_ub, group_id);
+                    single_rhs_compute_dot_sg(x_b.num_rows, x_b.values,
+                                              y_b.values, res_b.values[0],
+                                              item_ct1);
+                });
+        });
+    } else {
+        auto group_size =
+            device.get_info<sycl::info::device::max_work_group_size>();
+
+        const dim3 block(group_size);
+        const dim3 grid(num_batches);
+
+        // TODO: Remove reqd_sub_group size and use sycl::reduce_over_group
+        exec->get_queue()->submit([&](sycl::handler& cgh) {
+            cgh.parallel_for(
+                sycl_nd_range(grid, block), [=
+            ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                                config::warp_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    const auto y_b = batch::extract_batch_item(y_ub, group_id);
+                    const auto res_b =
+                        batch::extract_batch_item(res_ub, group_id);
+                    compute_gen_dot_product_kernel(
+                        x_b, y_b, res_b, item_ct1,
+                        [](auto val) { return val; });
+                });
+        });
+    }
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
@@ -232,19 +261,18 @@ void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
 
     exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1)
-                [[sycl::reqd_sub_group_size(config::warp_size)]] {
-                    auto group = item_ct1.get_group();
-                    auto group_id = group.get_group_linear_id();
-                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                    const auto y_b = batch::extract_batch_item(y_ub, group_id);
-                    const auto res_b =
-                        batch::extract_batch_item(res_ub, group_id);
-                    compute_gen_dot_product_kernel(
-                        x_b, y_b, res_b, item_ct1,
-                        [](auto val) { return conj(val); });
-                });
+            sycl_nd_range(grid, block), [=
+        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                            config::warp_size)]] {
+                auto group = item_ct1.get_group();
+                auto group_id = group.get_group_linear_id();
+                const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                const auto y_b = batch::extract_batch_item(y_ub, group_id);
+                const auto res_b = batch::extract_batch_item(res_ub, group_id);
+                compute_gen_dot_product_kernel(
+                    x_b, y_b, res_b, item_ct1,
+                    [](auto val) { return conj(val); });
+            });
     });
 }
 
@@ -261,26 +289,51 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
     const auto res_ub = get_batch_struct(result);
 
     const auto num_batches = x_ub.num_batch_items;
+    const auto num_rows = x->get_common_size()[0];
     auto device = exec->get_queue()->get_device();
-    auto group_size =
-        device.get_info<sycl::info::device::max_work_group_size>();
 
-    const dim3 block(group_size);
-    const dim3 grid(num_batches);
+    if (x->get_common_size()[1] == 1) {
+        int group_size = ((num_rows + 32 - 1) / 32) * 32;
 
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(sycl_nd_range(grid, block),
-                         [=](sycl::nd_item<3> item_ct1)
-                             [[sycl::reqd_sub_group_size(config::warp_size)]] {
-                                 auto group = item_ct1.get_group();
-                                 auto group_id = group.get_group_linear_id();
-                                 const auto x_b =
-                                     batch::extract_batch_item(x_ub, group_id);
-                                 const auto res_b = batch::extract_batch_item(
-                                     res_ub, group_id);
-                                 compute_norm2_kernel(x_b, res_b, item_ct1);
-                             });
-    });
+        const dim3 block(group_size);
+        const dim3 grid(num_batches);
+
+        exec->get_queue()->submit([&](sycl::handler& cgh) {
+            cgh.parallel_for(
+                sycl_nd_range(grid, block), [=
+            ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                                config::warp_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    const auto res_b =
+                        batch::extract_batch_item(res_ub, group_id);
+                    single_rhs_compute_norm2_sg(x_b.num_rows, x_b.values,
+                                                res_b.values[0], item_ct1);
+                });
+        });
+
+    } else {
+        auto group_size =
+            device.get_info<sycl::info::device::max_work_group_size>();
+
+        const dim3 block(group_size);
+        const dim3 grid(num_batches);
+
+        exec->get_queue()->submit([&](sycl::handler& cgh) {
+            cgh.parallel_for(
+                sycl_nd_range(grid, block), [=
+            ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                                config::warp_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    const auto res_b =
+                        batch::extract_batch_item(res_ub, group_id);
+                    compute_norm2_kernel(x_b, res_b, item_ct1);
+                });
+        });
+    }
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
diff --git a/dpcpp/solver/batch_bicgstab_kernels.hpp.inc b/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
index e0affebe3c2..a32f6f39da8 100644
--- a/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
+++ b/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
@@ -30,6 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
+
 template <const bool sg_kernel_all, typename BatchMatrixType_entry,
           typename ValueType>
 __dpct_inline__ void initialize(
@@ -76,9 +77,11 @@ __dpct_inline__ void initialize(
                                         item_ct1);
         }
     } else {
-        single_rhs_compute_norm2(num_rows, r_shared_entry, res_norm, item_ct1);
-        single_rhs_compute_norm2(num_rows, b_global_entry, rhs_norm, item_ct1);
+        // single_rhs_compute_norm2(num_rows, r_shared_entry, res_norm,
+        // item_ct1); single_rhs_compute_norm2(num_rows, b_global_entry,
+        // rhs_norm, item_ct1);
     }
+    item_ct1.barrier(sycl::access::fence_space::local_space);
 
 
     for (int iz = tid; iz < num_rows; iz += group_size) {
@@ -107,6 +110,7 @@ __dpct_inline__ void update_p(const int num_rows, const ValueType& rho_new,
     }
 }
 
+
 template <const bool sg_kernel_all, typename ValueType>
 __dpct_inline__ void compute_alpha(const int num_rows, const ValueType& rho_new,
                                    const ValueType* const r_hat_shared_entry,
@@ -127,9 +131,9 @@ __dpct_inline__ void compute_alpha(const int num_rows, const ValueType& rho_new,
         }
         item_ct1.barrier(sycl::access::fence_space::local_space);
     } else {
-        single_rhs_compute_dot(num_rows, r_hat_shared_entry, v_shared_entry,
-                               alpha, item_ct1);
-        alpha = rho_new / alpha;
+        // single_rhs_compute_dot(num_rows, r_hat_shared_entry, v_shared_entry,
+        //                        alpha, item_ct1);
+        // alpha = rho_new / alpha;
     }
 }
 
@@ -171,14 +175,17 @@ __dpct_inline__ void compute_omega(const int num_rows,
         if (tid == 0) omega /= temp;
         item_ct1.barrier(sycl::access::fence_space::local_space);
     } else {
-        single_rhs_compute_dot(num_rows, t_shared_entry, s_shared_entry, omega,
-                               item_ct1);
-        single_rhs_compute_dot(num_rows, t_shared_entry, t_shared_entry, temp,
-                               item_ct1);
-        omega /= temp;
+        // single_rhs_compute_dot(num_rows, t_shared_entry, s_shared_entry,
+        // omega,
+        //                        item_ct1);
+        // single_rhs_compute_dot(num_rows, t_shared_entry, t_shared_entry,
+        // temp,
+        //                        item_ct1);
+        // omega /= temp;
     }
 }
 
+
 template <typename ValueType>
 __dpct_inline__ void update_x_and_r(
     const int num_rows, const ValueType* const p_hat_shared_entry,
@@ -368,8 +375,8 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
             }
             item_ct1.barrier(sycl::access::fence_space::local_space);
         } else {
-            single_rhs_compute_dot(num_rows, r_hat_sh, r_sh, rho_new_sh[0],
-                                   item_ct1);
+            // single_rhs_compute_dot(num_rows, r_hat_sh, r_sh, rho_new_sh[0],
+            //                        item_ct1);
         }
 
         // beta = (rho_new / rho_old)*(alpha / omega)
@@ -403,10 +410,10 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
             }
             item_ct1.barrier(sycl::access::fence_space::local_space);
         } else {
-            single_rhs_compute_norm2(num_rows, s_sh, norms_res_sh[0], item_ct1);
+            // single_rhs_compute_norm2(num_rows, s_sh, norms_res_sh[0],
+            // item_ct1);
         }
 
-        // if (norms_res_sh[0] / norms_rhs_sh[0] < tol) {
         if (stop.check_converged(norms_res_sh)) {
             update_x_middle(num_rows, alpha_sh[0], p_hat_sh, x_sh, item_ct1);
             logger.log_iteration(batch_id, iter, norms_res_sh[0]);
@@ -441,8 +448,8 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
             }
             item_ct1.barrier(sycl::access::fence_space::local_space);
         } else {
-            single_rhs_compute_norm2(num_rows, r_sh, norms_res_sh[0], item_ct1);
-            rho_old_sh[0] = rho_new_sh[0];
+            // single_rhs_compute_norm2(num_rows, r_sh, norms_res_sh[0],
+            // item_ct1); rho_old_sh[0] = rho_new_sh[0];
         }
     }
 
diff --git a/test/base/batch_multi_vector_kernels.cpp b/test/base/batch_multi_vector_kernels.cpp
index be625853656..6f4eb3d05a8 100644
--- a/test/base/batch_multi_vector_kernels.cpp
+++ b/test/base/batch_multi_vector_kernels.cpp
@@ -70,10 +70,9 @@ class MultiVector : public CommonTestFixture {
             std::normal_distribution<>(-1.0, 1.0), rand_engine, ref);
     }
 
-    void set_up_vector_data(gko::size_type num_vecs,
+    void set_up_vector_data(gko::size_type num_vecs, const int num_rows = 252,
                             bool different_alpha = false)
     {
-        const int num_rows = 252;
         x = gen_mtx<Mtx>(batch_size, num_rows, num_vecs);
         y = gen_mtx<Mtx>(batch_size, num_rows, num_vecs);
         c_x = gen_mtx<ComplexMtx>(batch_size, num_rows, num_vecs);
@@ -143,7 +142,7 @@ TEST_F(MultiVector, MultipleVectorAddScaledIsEquivalentToRef)
 
 TEST_F(MultiVector, MultipleVectorAddScaledWithDifferentAlphaIsEquivalentToRef)
 {
-    set_up_vector_data(20, true);
+    set_up_vector_data(20, 252, true);
 
     x->add_scaled(alpha.get(), y.get());
     dx->add_scaled(dalpha.get(), dy.get());
@@ -185,6 +184,21 @@ TEST_F(MultiVector, MultipleVectorScaleWithDifferentAlphaIsEquivalentToRef)
 }
 
 
+TEST_F(MultiVector, ComputeNorm2SingleSmallIsEquivalentToRef)
+{
+    set_up_vector_data(1, 10);
+    auto norm_size =
+        gko::batch_dim<2>(batch_size, gko::dim<2>{1, x->get_common_size()[1]});
+    auto norm_expected = NormVector::create(this->ref, norm_size);
+    auto dnorm = NormVector::create(this->exec, norm_size);
+
+    x->compute_norm2(norm_expected.get());
+    dx->compute_norm2(dnorm.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(norm_expected, dnorm, 5 * r<value_type>::value);
+}
+
+
 TEST_F(MultiVector, ComputeNorm2SingleIsEquivalentToRef)
 {
     set_up_vector_data(1);
@@ -250,6 +264,21 @@ TEST_F(MultiVector, ComputeDotSingleIsEquivalentToRef)
 }
 
 
+TEST_F(MultiVector, ComputeDotSingleSmallIsEquivalentToRef)
+{
+    set_up_vector_data(1, 10);
+    auto dot_size =
+        gko::batch_dim<2>(batch_size, gko::dim<2>{1, x->get_common_size()[1]});
+    auto dot_expected = Mtx::create(this->ref, dot_size);
+    auto ddot = Mtx::create(this->exec, dot_size);
+
+    x->compute_dot(y.get(), dot_expected.get());
+    dx->compute_dot(dy.get(), ddot.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(dot_expected, ddot, 5 * r<value_type>::value);
+}
+
+
 TEST_F(MultiVector, ComputeConjDotIsEquivalentToRef)
 {
     set_up_vector_data(20);
diff --git a/test/matrix/batch_dense_kernels.cpp b/test/matrix/batch_dense_kernels.cpp
index a243d51f3c1..1f3967b0eb8 100644
--- a/test/matrix/batch_dense_kernels.cpp
+++ b/test/matrix/batch_dense_kernels.cpp
@@ -71,9 +71,8 @@ class Dense : public CommonTestFixture {
             std::normal_distribution<>(-1.0, 1.0), rand_engine, ref);
     }
 
-    void set_up_apply_data(gko::size_type num_vecs = 1)
+    void set_up_apply_data(int num_rows, gko::size_type num_vecs = 1)
     {
-        const int num_rows = 252;
         const int num_cols = 32;
         mat = gen_mtx<BMtx>(batch_size, num_rows, num_cols);
         y = gen_mtx<BMVec>(batch_size, num_cols, num_vecs);
@@ -106,9 +105,20 @@ class Dense : public CommonTestFixture {
 };
 
 
+TEST_F(Dense, SingleVectorApplyIsEquivalentToRefForSmallMatrices)
+{
+    set_up_apply_data(10);
+
+    mat->apply(y.get(), expected.get());
+    dmat->apply(dy.get(), dresult.get());
+
+    GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, r<value_type>::value);
+}
+
+
 TEST_F(Dense, SingleVectorApplyIsEquivalentToRef)
 {
-    set_up_apply_data(1);
+    set_up_apply_data(257);
 
     mat->apply(y.get(), expected.get());
     dmat->apply(dy.get(), dresult.get());
@@ -119,7 +129,7 @@ TEST_F(Dense, SingleVectorApplyIsEquivalentToRef)
 
 TEST_F(Dense, SingleVectorAdvancedApplyIsEquivalentToRef)
 {
-    set_up_apply_data(1);
+    set_up_apply_data(257);
 
     mat->apply(alpha.get(), y.get(), beta.get(), expected.get());
     dmat->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get());

From 5e664046bdde786db345204025d1d176e1bb4e19 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sun, 29 Oct 2023 15:03:45 +0100
Subject: [PATCH 526/583] minor dpcpp fixes

---
 dpcpp/solver/batch_bicgstab_kernels.hpp.inc | 34 +++++++++------------
 1 file changed, 15 insertions(+), 19 deletions(-)

diff --git a/dpcpp/solver/batch_bicgstab_kernels.hpp.inc b/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
index a32f6f39da8..38d93d7213f 100644
--- a/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
+++ b/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
@@ -77,9 +77,8 @@ __dpct_inline__ void initialize(
                                         item_ct1);
         }
     } else {
-        // single_rhs_compute_norm2(num_rows, r_shared_entry, res_norm,
-        // item_ct1); single_rhs_compute_norm2(num_rows, b_global_entry,
-        // rhs_norm, item_ct1);
+        single_rhs_compute_norm2(num_rows, r_shared_entry, res_norm, item_ct1);
+        single_rhs_compute_norm2(num_rows, b_global_entry, rhs_norm, item_ct1);
     }
     item_ct1.barrier(sycl::access::fence_space::local_space);
 
@@ -131,9 +130,9 @@ __dpct_inline__ void compute_alpha(const int num_rows, const ValueType& rho_new,
         }
         item_ct1.barrier(sycl::access::fence_space::local_space);
     } else {
-        // single_rhs_compute_dot(num_rows, r_hat_shared_entry, v_shared_entry,
-        //                        alpha, item_ct1);
-        // alpha = rho_new / alpha;
+        single_rhs_compute_dot(num_rows, r_hat_shared_entry, v_shared_entry,
+                               alpha, item_ct1);
+        alpha = rho_new / alpha;
     }
 }
 
@@ -175,13 +174,11 @@ __dpct_inline__ void compute_omega(const int num_rows,
         if (tid == 0) omega /= temp;
         item_ct1.barrier(sycl::access::fence_space::local_space);
     } else {
-        // single_rhs_compute_dot(num_rows, t_shared_entry, s_shared_entry,
-        // omega,
-        //                        item_ct1);
-        // single_rhs_compute_dot(num_rows, t_shared_entry, t_shared_entry,
-        // temp,
-        //                        item_ct1);
-        // omega /= temp;
+        single_rhs_compute_dot(num_rows, t_shared_entry, s_shared_entry, omega,
+                               item_ct1);
+        single_rhs_compute_dot(num_rows, t_shared_entry, t_shared_entry, temp,
+                               item_ct1);
+        omega /= temp;
     }
 }
 
@@ -375,8 +372,8 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
             }
             item_ct1.barrier(sycl::access::fence_space::local_space);
         } else {
-            // single_rhs_compute_dot(num_rows, r_hat_sh, r_sh, rho_new_sh[0],
-            //                        item_ct1);
+            single_rhs_compute_dot(num_rows, r_hat_sh, r_sh, rho_new_sh[0],
+                                   item_ct1);
         }
 
         // beta = (rho_new / rho_old)*(alpha / omega)
@@ -410,8 +407,7 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
             }
             item_ct1.barrier(sycl::access::fence_space::local_space);
         } else {
-            // single_rhs_compute_norm2(num_rows, s_sh, norms_res_sh[0],
-            // item_ct1);
+            single_rhs_compute_norm2(num_rows, s_sh, norms_res_sh[0], item_ct1);
         }
 
         if (stop.check_converged(norms_res_sh)) {
@@ -448,8 +444,8 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
             }
             item_ct1.barrier(sycl::access::fence_space::local_space);
         } else {
-            // single_rhs_compute_norm2(num_rows, r_sh, norms_res_sh[0],
-            // item_ct1); rho_old_sh[0] = rho_new_sh[0];
+            single_rhs_compute_norm2(num_rows, r_sh, norms_res_sh[0], item_ct1);
+            rho_old_sh[0] = rho_new_sh[0];
         }
     }
 

From 160e854836f27461cb60a4b0fb622915eb95d7fb Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sun, 29 Oct 2023 16:24:03 +0100
Subject: [PATCH 527/583] Review updates

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 .../solver/batch_bicgstab_kernels.hpp.inc     |  8 +--
 core/solver/batch_bicgstab_kernels.hpp        |  9 +++
 cuda/base/exception.cuh                       | 11 ++--
 cuda/solver/batch_bicgstab_kernels.cu         | 62 +++++++++----------
 dpcpp/solver/batch_bicgstab_kernels.dp.cpp    | 34 +++++-----
 hip/base/exception.hip.hpp                    | 10 +--
 hip/solver/batch_bicgstab_kernels.hip.cpp     |  8 +--
 7 files changed, 76 insertions(+), 66 deletions(-)

diff --git a/common/cuda_hip/solver/batch_bicgstab_kernels.hpp.inc b/common/cuda_hip/solver/batch_bicgstab_kernels.hpp.inc
index a4a57d99f01..0f666f205e8 100644
--- a/common/cuda_hip/solver/batch_bicgstab_kernels.hpp.inc
+++ b/common/cuda_hip/solver/batch_bicgstab_kernels.hpp.inc
@@ -317,11 +317,11 @@ __global__ void apply_kernel(
 
             // alpha = rho_new / < r_hat , v>
             compute_alpha(subgroup, num_rows, rho_new_sh[0], r_hat_sh, v_sh,
-                          alpha_sh[0] /*, converged*/);
+                          alpha_sh[0]);
             __syncthreads();
 
             // s = r - alpha*v
-            update_s(num_rows, r_sh, alpha_sh[0], v_sh, s_sh /*, converged*/);
+            update_s(num_rows, r_sh, alpha_sh[0], v_sh, s_sh);
             __syncthreads();
 
             // an estimate of residual norms
@@ -348,13 +348,13 @@ __global__ void apply_kernel(
 
             // omega = <t,s> / <t,t>
             compute_omega(subgroup, num_rows, t_sh, s_sh, temp_sh[0],
-                          omega_sh[0] /*, converged*/);
+                          omega_sh[0]);
             __syncthreads();
 
             // x = x + alpha*p_hat + omega *s_hat
             // r = s - omega * t
             update_x_and_r(num_rows, p_hat_sh, s_hat_sh, alpha_sh[0],
-                           omega_sh[0], s_sh, t_sh, x_sh, r_sh /*, converged*/);
+                           omega_sh[0], s_sh, t_sh, x_sh, r_sh);
             __syncthreads();
 
             if (threadIdx.x / config::warp_size == 0) {
diff --git a/core/solver/batch_bicgstab_kernels.hpp b/core/solver/batch_bicgstab_kernels.hpp
index ccde3aa6826..cd16be76d63 100644
--- a/core/solver/batch_bicgstab_kernels.hpp
+++ b/core/solver/batch_bicgstab_kernels.hpp
@@ -166,24 +166,33 @@ storage_config compute_shared_storage(const int shared_mem_per_blk,
     const int prec_storage =
         Prectype::dynamic_work_size(num_rows, num_nz) * sizeof(ValueType);
     int rem_shared = shared_mem_per_blk;
+    // Set default values. All vecs are in global.
     storage_config sconf{false, 0, num_main_vecs, 0, num_rows};
+    // If available shared mem, is zero, set all vecs to global.
     if (rem_shared <= 0) {
         set_gmem_stride_bytes<align_bytes>(sconf, vec_size, prec_storage);
         return sconf;
     }
+    // Compute the number of vecs that can be stored in shared memory and assign
+    // the rest to global memory.
     const int initial_vecs_available = rem_shared / vec_size;
     const int num_vecs_shared = min(initial_vecs_available, num_main_vecs);
     sconf.n_shared += num_vecs_shared;
     sconf.n_global -= num_vecs_shared;
+    // Set the storage configuration with preconditioner workspace in global if
+    // there are any vectors in global memory.
     if (sconf.n_global > 0) {
         set_gmem_stride_bytes<align_bytes>(sconf, vec_size, prec_storage);
         return sconf;
     }
     rem_shared -= num_vecs_shared * vec_size;
+    // If more shared memory space is available and preconditioner workspace is
+    // needed, enable preconditioner workspace to use shared memory.
     if (rem_shared >= prec_storage && prec_storage > 0) {
         sconf.prec_shared = true;
         rem_shared -= prec_storage;
     }
+    // Set the global storage config and align to 32 bytes.
     set_gmem_stride_bytes<align_bytes>(sconf, vec_size, prec_storage);
     return sconf;
 }
diff --git a/cuda/base/exception.cuh b/cuda/base/exception.cuh
index 51dfb63bf72..ccf74ebdb7b 100644
--- a/cuda/base/exception.cuh
+++ b/cuda/base/exception.cuh
@@ -41,10 +41,12 @@ namespace gko {
 
 
 #define GKO_CUDA_LAST_IF_ERROR_THROW                                         \
-    cudaError_t err = cudaGetLastError();                                    \
-    if (err != cudaSuccess) {                                                \
-        printf(" Kernel error: %s\n", cudaGetErrorString(err));              \
-        throw gko::CudaError(__FILE__, __LINE__, __func__, err);             \
+    {                                                                        \
+        cudaError_t err = cudaGetLastError();                                \
+        if (err != cudaSuccess) {                                            \
+            printf(" Kernel error: %s\n", cudaGetErrorString(err));          \
+            throw gko::CudaError(__FILE__, __LINE__, __func__, err);         \
+        }                                                                    \
     }                                                                        \
     static_assert(true,                                                      \
                   "This assert is used to counter the false positive extra " \
@@ -53,4 +55,5 @@ namespace gko {
 
 }  // namespace gko
 
+
 #endif  // GKO_CUDA_BASE_EXCEPTION_CUH_
diff --git a/cuda/solver/batch_bicgstab_kernels.cu b/cuda/solver/batch_bicgstab_kernels.cu
index 07e16535631..d6c3c3ba0fc 100644
--- a/cuda/solver/batch_bicgstab_kernels.cu
+++ b/cuda/solver/batch_bicgstab_kernels.cu
@@ -88,14 +88,11 @@ template <typename StopType, typename PrecType, typename LogType,
 int get_num_threads_per_block(std::shared_ptr<const DefaultExecutor> exec,
                               const int num_rows)
 {
-    int nwarps = num_rows / 4;
-    if (nwarps < 2) {
-        nwarps = 2;
-    }
-    const int min_block_size = 2 * config::warp_size;
+    int num_warps = std::max(num_rows / 4, 2);
+    constexpr int warp_sz = static_cast<int>(config::warp_size);
+    const int min_block_size = 2 * warp_sz;
     const int device_max_threads =
-        ((std::max(num_rows, min_block_size)) / config::warp_size) *
-        config::warp_size;
+        ((std::max(num_rows, min_block_size)) / warp_sz) * warp_sz;
     cudaFuncAttributes funcattr;
     cudaFuncGetAttributes(&funcattr,
                           apply_kernel<StopType, 9, true, PrecType, LogType,
@@ -107,18 +104,17 @@ int get_num_threads_per_block(std::shared_ptr<const DefaultExecutor> exec,
     const int max_threads_regs =
         ((max_regs_blk /
           static_cast<int>((static_cast<double>(num_regs_used)))) /
-         config::warp_size) *
-        config::warp_size;
+         warp_sz) *
+        warp_sz;
     int max_threads = std::min(max_threads_regs, device_max_threads);
     max_threads = max_threads <= 1024 ? max_threads : 1024;
-    return std::min(nwarps * static_cast<int>(config::warp_size), max_threads);
+    return std::min(num_warps * warp_sz, max_threads);
 }
 
 
 template <typename StopType, typename PrecType, typename LogType,
           typename BatchMatrixType, typename ValueType>
-int get_max_dynamic_shared_memory(std::shared_ptr<const DefaultExecutor> exec,
-                                  const size_type required_cache_storage)
+int get_max_dynamic_shared_memory(std::shared_ptr<const DefaultExecutor> exec)
 {
     int shmem_per_sm = 0;
     cudaDeviceGetAttribute(&shmem_per_sm,
@@ -147,7 +143,7 @@ public:
 
     KernelCaller(std::shared_ptr<const DefaultExecutor> exec,
                  const settings<remove_complex<value_type>> settings)
-        : exec_{exec}, settings_{settings}
+        : exec_{std::move(exec)}, settings_{settings}
     {}
 
     template <typename StopType, const int n_shared,
@@ -179,31 +175,29 @@ public:
         using real_type = gko::remove_complex<value_type>;
         const size_type num_batch_items = mat.num_batch_items;
         constexpr int align_multiple = 8;
-        const int shared_gap =
-            ((mat.num_rows + align_multiple - 1) / align_multiple) *
-            align_multiple;
+        const int padded_num_rows =
+            ceildiv(mat.num_rows, align_multiple) * align_multiple;
         gko::kernels::cuda::configure_shared_memory_banks<value_type>();
         const int shmem_per_blk =
             get_max_dynamic_shared_memory<StopType, PrecType, LogType,
-                                          BatchMatrixType, value_type>(exec_,
-                                                                       0);
+                                          BatchMatrixType, value_type>(exec_);
         const int block_size =
             get_num_threads_per_block<StopType, PrecType, LogType,
                                       BatchMatrixType, value_type>(
                 exec_, mat.num_rows);
-        assert(block_size >= 2 * config::warp_size);
+        GKO_ASSERT(block_size >= 2 * config::warp_size);
 
         const size_t prec_size =
-            PrecType::dynamic_work_size(shared_gap,
+            PrecType::dynamic_work_size(padded_num_rows,
                                         mat.get_single_item_num_nnz()) *
             sizeof(value_type);
         const auto sconf =
             gko::kernels::batch_bicgstab::compute_shared_storage<PrecType,
                                                                  value_type>(
-                shmem_per_blk, shared_gap, mat.get_single_item_num_nnz(),
+                shmem_per_blk, padded_num_rows, mat.get_single_item_num_nnz(),
                 b.num_rhs);
         const size_t shared_size =
-            sconf.n_shared * shared_gap * sizeof(value_type) +
+            sconf.n_shared * padded_num_rows * sizeof(value_type) +
             (sconf.prec_shared ? prec_size : 0);
         auto workspace = gko::array<value_type>(
             exec_,
@@ -213,60 +207,60 @@ public:
         value_type* const workspace_data = workspace.get_data();
 
         // Template parameters launch_apply_kernel<StopType, n_shared,
-        // prec_shared)
+        // prec_shared>
         if (sconf.prec_shared)
-            launch_apply_kernel<StopType, 9, 1>(
+            launch_apply_kernel<StopType, 9, true>(
                 sconf, logger, prec, mat, b.values, x.values, workspace_data,
                 block_size, shared_size);
         else {
             switch (sconf.n_shared) {
             case 0:
-                launch_apply_kernel<StopType, 0, 0>(
+                launch_apply_kernel<StopType, 0, false>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, block_size, shared_size);
                 break;
             case 1:
-                launch_apply_kernel<StopType, 1, 0>(
+                launch_apply_kernel<StopType, 1, false>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, block_size, shared_size);
                 break;
             case 2:
-                launch_apply_kernel<StopType, 2, 0>(
+                launch_apply_kernel<StopType, 2, false>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, block_size, shared_size);
                 break;
             case 3:
-                launch_apply_kernel<StopType, 3, 0>(
+                launch_apply_kernel<StopType, 3, false>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, block_size, shared_size);
                 break;
             case 4:
-                launch_apply_kernel<StopType, 4, 0>(
+                launch_apply_kernel<StopType, 4, false>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, block_size, shared_size);
                 break;
             case 5:
-                launch_apply_kernel<StopType, 5, 0>(
+                launch_apply_kernel<StopType, 5, false>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, block_size, shared_size);
                 break;
             case 6:
-                launch_apply_kernel<StopType, 6, 0>(
+                launch_apply_kernel<StopType, 6, false>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, block_size, shared_size);
                 break;
             case 7:
-                launch_apply_kernel<StopType, 7, 0>(
+                launch_apply_kernel<StopType, 7, false>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, block_size, shared_size);
                 break;
             case 8:
-                launch_apply_kernel<StopType, 8, 0>(
+                launch_apply_kernel<StopType, 8, false>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, block_size, shared_size);
                 break;
             case 9:
-                launch_apply_kernel<StopType, 9, 0>(
+                launch_apply_kernel<StopType, 9, false>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, block_size, shared_size);
                 break;
diff --git a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
index 61c888b357b..6c702ef65df 100644
--- a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
+++ b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
@@ -79,7 +79,7 @@ using settings = gko::kernels::batch_bicgstab::settings<T>;
 
 __dpct_inline__ int get_group_size(int value, int simd_len = 32)
 {
-    int num_sg = (value + simd_len - 1) / simd_len;
+    int num_sg = ceildiv(value, simd_len);
     return (num_sg * simd_len);
 }
 
@@ -89,7 +89,7 @@ class KernelCaller {
 public:
     KernelCaller(std::shared_ptr<const DefaultExecutor> exec,
                  const settings<remove_complex<ValueType>> settings)
-        : exec_{exec}, settings_{settings}
+        : exec_{std::move(exec)}, settings_{settings}
     {}
 
     template <typename StopType, const int simd_len, const int n_shared_total,
@@ -156,29 +156,31 @@ class KernelCaller {
         auto device = exec_->get_queue()->get_device();
         auto group_size =
             device.get_info<sycl::info::device::max_work_group_size>();
-        if (group_size > num_rows) group_size = get_group_size(num_rows);
-
-        size_type shmem_per_blk =
+        if (group_size > num_rows) {
+            group_size = get_group_size(num_rows);
+        };
+
+        // reserve 5 for intermediate rho-s, norms,
+        // alpha, omega, temp and for reduce_over_group
+        // If the value available is negative, then set it to 0
+        size_type shmem_per_blk = std::max(
             device.get_info<sycl::info::device::local_mem_size>() -
-            (group_size + 5) * sizeof(ValueType) -
-            2 * sizeof(
-                    real_type);  // reserve 5 for intermediate rho-s, norms,
-                                 // alpha, omega, temp and for reduce_over_group
-        if (shmem_per_blk < 0) shmem_per_blk = 0;
-        const int shared_gap = num_rows;
+                (group_size + 5) * sizeof(ValueType) - 2 * sizeof(real_type),
+            static_cast<size_type>(0));
+        const int padded_num_rows = num_rows;
         const size_type prec_size = PrecType::dynamic_work_size(
-            shared_gap, mat.get_single_item_num_nnz());
+            padded_num_rows, mat.get_single_item_num_nnz());
         const auto sconf =
             gko::kernels::batch_bicgstab::compute_shared_storage<PrecType,
                                                                  ValueType>(
-                shmem_per_blk, shared_gap, mat.get_single_item_num_nnz(),
+                shmem_per_blk, padded_num_rows, mat.get_single_item_num_nnz(),
                 b.num_rhs);
-        const size_t shared_size =
-            sconf.n_shared * shared_gap + (sconf.prec_shared ? prec_size : 0);
+        const size_t shared_size = sconf.n_shared * padded_num_rows +
+                                   (sconf.prec_shared ? prec_size : 0);
         auto workspace = gko::array<ValueType>(
             exec_,
             sconf.gmem_stride_bytes * num_batch_items / sizeof(ValueType));
-        assert(sconf.gmem_stride_bytes % sizeof(ValueType) == 0);
+        GKO_ASSERT(sconf.gmem_stride_bytes % sizeof(ValueType) == 0);
 
         ValueType* const workspace_data = workspace.get_data();
         int n_shared_total = sconf.n_shared + int(sconf.prec_shared);
diff --git a/hip/base/exception.hip.hpp b/hip/base/exception.hip.hpp
index 7c3b3b2e12e..366f95bffbb 100644
--- a/hip/base/exception.hip.hpp
+++ b/hip/base/exception.hip.hpp
@@ -41,10 +41,12 @@ namespace gko {
 
 
 #define GKO_HIP_LAST_IF_ERROR_THROW                                          \
-    hipError_t err = hipGetLastError();                                      \
-    if (err != hipSuccess) {                                                 \
-        printf(" Hip kernel error: %s\n", hipGetErrorString(err));           \
-        throw gko::HipError(__FILE__, __LINE__, __func__, err);              \
+    {                                                                        \
+        hipError_t err = hipGetLastError();                                  \
+        if (err != hipSuccess) {                                             \
+            printf(" Hip kernel error: %s\n", hipGetErrorString(err));       \
+            throw gko::HipError(__FILE__, __LINE__, __func__, err);          \
+        }                                                                    \
     }                                                                        \
     static_assert(true,                                                      \
                   "This assert is used to counter the false positive extra " \
diff --git a/hip/solver/batch_bicgstab_kernels.hip.cpp b/hip/solver/batch_bicgstab_kernels.hip.cpp
index b9fe8b0c9c3..077b9b5da93 100644
--- a/hip/solver/batch_bicgstab_kernels.hip.cpp
+++ b/hip/solver/batch_bicgstab_kernels.hip.cpp
@@ -147,7 +147,7 @@ class KernelCaller {
         using real_type = gko::remove_complex<value_type>;
         const size_type num_batch_items = mat.num_batch_items;
         constexpr int align_multiple = 8;
-        const int shared_gap =
+        const int padded_num_rows =
             ((mat.num_rows + align_multiple - 1) / align_multiple) *
             align_multiple;
         const int shmem_per_blk = exec_->get_max_shared_memory_per_block();
@@ -156,16 +156,16 @@ class KernelCaller {
         assert(block_size >= 2 * config::warp_size);
 
         const size_t prec_size =
-            PrecType::dynamic_work_size(shared_gap,
+            PrecType::dynamic_work_size(padded_num_rows,
                                         mat.get_single_item_num_nnz()) *
             sizeof(value_type);
         const auto sconf =
             gko::kernels::batch_bicgstab::compute_shared_storage<PrecType,
                                                                  value_type>(
-                shmem_per_blk, shared_gap, mat.get_single_item_num_nnz(),
+                shmem_per_blk, padded_num_rows, mat.get_single_item_num_nnz(),
                 b.num_rhs);
         const size_t shared_size =
-            sconf.n_shared * shared_gap * sizeof(value_type) +
+            sconf.n_shared * padded_num_rows * sizeof(value_type) +
             (sconf.prec_shared ? prec_size : 0);
         auto workspace = gko::array<value_type>(
             exec_,

From 5db8662abdffaed7107dc79613bdf50f55d4ac59 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sun, 29 Oct 2023 20:40:48 +0100
Subject: [PATCH 528/583] Fix sycl group and subgroup sizes

---
 dpcpp/base/batch_multi_vector_kernels.dp.cpp | 89 ++++++++++++--------
 dpcpp/solver/batch_bicgstab_kernels.dp.cpp   |  2 +-
 2 files changed, 53 insertions(+), 38 deletions(-)

diff --git a/dpcpp/base/batch_multi_vector_kernels.dp.cpp b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
index 0c18fd80806..b4dbf1ced31 100644
--- a/dpcpp/base/batch_multi_vector_kernels.dp.cpp
+++ b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
@@ -33,6 +33,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/base/batch_multi_vector_kernels.hpp"
 
 
+#include <algorithm>
+
+
 #include <CL/sycl.hpp>
 
 
@@ -77,10 +80,15 @@ void scale(std::shared_ptr<const DefaultExecutor> exec,
     const auto alpha_ub = get_batch_struct(alpha);
     const auto x_ub = get_batch_struct(x);
 
+    const int num_rows = x->get_common_size()[0];
+    constexpr int max_subgroup_size = config::warp_size;
     const auto num_batches = x_ub.num_batch_items;
     auto device = exec->get_queue()->get_device();
-    auto group_size =
+    long max_group_size =
         device.get_info<sycl::info::device::max_work_group_size>();
+    int group_size =
+        std::max(ceildiv(num_rows, max_subgroup_size) * max_subgroup_size,
+                 max_group_size);
 
     const dim3 block(group_size);
     const dim3 grid(num_batches);
@@ -125,13 +133,16 @@ void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
                 const batch::MultiVector<ValueType>* const x,
                 batch::MultiVector<ValueType>* const y)
 {
-    const size_type num_rows = x->get_common_size()[0];
-    const size_type num_cols = x->get_common_size()[1];
-
+    constexpr int max_subgroup_size = config::warp_size;
+    const int num_rows = x->get_common_size()[0];
+    const int num_cols = x->get_common_size()[1];
     const auto num_batches = x->get_num_batch_items();
     auto device = exec->get_queue()->get_device();
-    auto group_size =
+    long max_group_size =
         device.get_info<sycl::info::device::max_work_group_size>();
+    int group_size =
+        std::max(ceildiv(num_rows, max_subgroup_size) * max_subgroup_size,
+                 max_group_size);
 
     const dim3 block(group_size);
     const dim3 grid(num_batches);
@@ -183,22 +194,25 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
     const auto y_ub = get_batch_struct(y);
     const auto res_ub = get_batch_struct(result);
 
+    constexpr int max_subgroup_size = config::warp_size;
     const auto num_batches = x_ub.num_batch_items;
-    const auto num_rows = x_ub.num_rows;
+    const int num_rows = x_ub.num_rows;
     auto device = exec->get_queue()->get_device();
 
+    long max_group_size =
+        device.get_info<sycl::info::device::max_work_group_size>();
+    int group_size =
+        std::max(ceildiv(num_rows, max_subgroup_size) * max_subgroup_size,
+                 max_group_size);
 
+    const dim3 block(group_size);
+    const dim3 grid(num_batches);
     if (x->get_common_size()[1] == 1) {
-        int group_size = ((num_rows + 32 - 1) / 32) * 32;
-
-        const dim3 block(group_size);
-        const dim3 grid(num_batches);
-
         exec->get_queue()->submit([&](sycl::handler& cgh) {
             cgh.parallel_for(
                 sycl_nd_range(grid, block), [=
             ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                                config::warp_size)]] {
+                                                max_subgroup_size)]] {
                     auto group = item_ct1.get_group();
                     auto group_id = group.get_group_linear_id();
                     const auto x_b = batch::extract_batch_item(x_ub, group_id);
@@ -211,18 +225,12 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
                 });
         });
     } else {
-        auto group_size =
-            device.get_info<sycl::info::device::max_work_group_size>();
-
-        const dim3 block(group_size);
-        const dim3 grid(num_batches);
-
         // TODO: Remove reqd_sub_group size and use sycl::reduce_over_group
         exec->get_queue()->submit([&](sycl::handler& cgh) {
             cgh.parallel_for(
                 sycl_nd_range(grid, block), [=
             ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                                config::warp_size)]] {
+                                                max_subgroup_size)]] {
                     auto group = item_ct1.get_group();
                     auto group_id = group.get_group_linear_id();
                     const auto x_b = batch::extract_batch_item(x_ub, group_id);
@@ -251,10 +259,15 @@ void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
     const auto y_ub = get_batch_struct(y);
     const auto res_ub = get_batch_struct(result);
 
+    constexpr int max_subgroup_size = config::warp_size;
+    const int num_rows = x->get_common_size()[0];
     const auto num_batches = x_ub.num_batch_items;
     auto device = exec->get_queue()->get_device();
-    auto group_size =
+    long max_group_size =
         device.get_info<sycl::info::device::max_work_group_size>();
+    int group_size =
+        std::max(ceildiv(num_rows, max_subgroup_size) * max_subgroup_size,
+                 max_group_size);
 
     const dim3 block(group_size);
     const dim3 grid(num_batches);
@@ -263,7 +276,7 @@ void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
         cgh.parallel_for(
             sycl_nd_range(grid, block), [=
         ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                            config::warp_size)]] {
+                                            max_subgroup_size)]] {
                 auto group = item_ct1.get_group();
                 auto group_id = group.get_group_linear_id();
                 const auto x_b = batch::extract_batch_item(x_ub, group_id);
@@ -289,20 +302,24 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
     const auto res_ub = get_batch_struct(result);
 
     const auto num_batches = x_ub.num_batch_items;
-    const auto num_rows = x->get_common_size()[0];
+    const int num_rows = x->get_common_size()[0];
     auto device = exec->get_queue()->get_device();
 
-    if (x->get_common_size()[1] == 1) {
-        int group_size = ((num_rows + 32 - 1) / 32) * 32;
-
-        const dim3 block(group_size);
-        const dim3 grid(num_batches);
+    constexpr int max_subgroup_size = config::warp_size;
+    long max_group_size =
+        device.get_info<sycl::info::device::max_work_group_size>();
+    int group_size =
+        std::max(ceildiv(num_rows, max_subgroup_size) * max_subgroup_size,
+                 max_group_size);
 
+    const dim3 block(group_size);
+    const dim3 grid(num_batches);
+    if (x->get_common_size()[1] == 1) {
         exec->get_queue()->submit([&](sycl::handler& cgh) {
             cgh.parallel_for(
                 sycl_nd_range(grid, block), [=
             ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                                config::warp_size)]] {
+                                                max_subgroup_size)]] {
                     auto group = item_ct1.get_group();
                     auto group_id = group.get_group_linear_id();
                     const auto x_b = batch::extract_batch_item(x_ub, group_id);
@@ -312,19 +329,12 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
                                                 res_b.values[0], item_ct1);
                 });
         });
-
     } else {
-        auto group_size =
-            device.get_info<sycl::info::device::max_work_group_size>();
-
-        const dim3 block(group_size);
-        const dim3 grid(num_batches);
-
         exec->get_queue()->submit([&](sycl::handler& cgh) {
             cgh.parallel_for(
                 sycl_nd_range(grid, block), [=
             ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                                config::warp_size)]] {
+                                                max_subgroup_size)]] {
                     auto group = item_ct1.get_group();
                     auto group_id = group.get_group_linear_id();
                     const auto x_b = batch::extract_batch_item(x_ub, group_id);
@@ -349,9 +359,14 @@ void copy(std::shared_ptr<const DefaultExecutor> exec,
     const auto result_ub = get_batch_struct(result);
 
     const auto num_batches = x_ub.num_batch_items;
+    const int num_rows = x->get_common_size()[0];
     auto device = exec->get_queue()->get_device();
-    auto group_size =
+    constexpr int max_subgroup_size = config::warp_size;
+    long max_group_size =
         device.get_info<sycl::info::device::max_work_group_size>();
+    int group_size =
+        std::max(ceildiv(num_rows, max_subgroup_size) * max_subgroup_size,
+                 max_group_size);
 
     const dim3 block(group_size);
     const dim3 grid(num_batches);
diff --git a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
index 6c702ef65df..c40d8564d09 100644
--- a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
+++ b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
@@ -80,7 +80,7 @@ using settings = gko::kernels::batch_bicgstab::settings<T>;
 __dpct_inline__ int get_group_size(int value, int simd_len = 32)
 {
     int num_sg = ceildiv(value, simd_len);
-    return (num_sg * simd_len);
+    return num_sg * simd_len;
 }
 
 

From 683f7c9ea11ec23eb46cc44fa3153dc824516c4e Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Sun, 29 Oct 2023 19:43:51 +0000
Subject: [PATCH 529/583] Format files

Co-authored-by: Pratik Nayak <pratikvn@pm.me>
---
 .../base/batch_multi_vector_kernels.hpp.inc   |  29 ++--
 cuda/solver/batch_bicgstab_kernels.cu         |   3 +-
 dpcpp/base/batch_multi_vector_kernels.dp.cpp  | 125 +++++++++---------
 dpcpp/matrix/batch_dense_kernels.dp.cpp       |  56 ++++----
 dpcpp/matrix/batch_ell_kernels.dp.cpp         |  56 ++++----
 dpcpp/preconditioner/batch_identity.hpp.inc   |   8 +-
 dpcpp/solver/batch_bicgstab_kernels.dp.cpp    |   7 +-
 dpcpp/solver/batch_bicgstab_kernels.hpp.inc   |   1 -
 hip/solver/batch_bicgstab_kernels.hip.cpp     |   3 +-
 9 files changed, 145 insertions(+), 143 deletions(-)

diff --git a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
index 779e2ab0e68..72d58ecf5b3 100644
--- a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
+++ b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
@@ -47,15 +47,10 @@ __device__ __forceinline__ void scale(
 }
 
 template <typename ValueType, typename Mapping>
-__global__ __launch_bounds__(
-    default_block_size,
-    sm_oversubscription) void scale_kernel(const gko::batch::multi_vector::
-                                               uniform_batch<const ValueType>
-                                                   alpha,
-                                           const gko::batch::multi_vector::
-                                               uniform_batch<ValueType>
-                                                   x,
-                                           Mapping map)
+__global__
+__launch_bounds__(default_block_size, sm_oversubscription) void scale_kernel(
+    const gko::batch::multi_vector::uniform_batch<const ValueType> alpha,
+    const gko::batch::multi_vector::uniform_batch<ValueType> x, Mapping map)
 {
     for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_items;
          batch_id += gridDim.x) {
@@ -176,11 +171,11 @@ __device__ __forceinline__ void compute_gen_dot_product(
 
 template <typename ValueType, typename Mapping>
 __global__
-    __launch_bounds__(default_block_size, sm_oversubscription) void compute_gen_dot_product_kernel(
-        const gko::batch::multi_vector::uniform_batch<const ValueType> x,
-        const gko::batch::multi_vector::uniform_batch<const ValueType> y,
-        const gko::batch::multi_vector::uniform_batch<ValueType> result,
-        Mapping map)
+__launch_bounds__(default_block_size, sm_oversubscription) void compute_gen_dot_product_kernel(
+    const gko::batch::multi_vector::uniform_batch<const ValueType> x,
+    const gko::batch::multi_vector::uniform_batch<const ValueType> y,
+    const gko::batch::multi_vector::uniform_batch<ValueType> result,
+    Mapping map)
 {
     for (size_type batch_id = blockIdx.x; batch_id < x.num_batch_items;
          batch_id += gridDim.x) {
@@ -319,9 +314,9 @@ __device__ __forceinline__ void copy(
 
 template <typename ValueType>
 __global__
-    __launch_bounds__(default_block_size, sm_oversubscription) void copy_kernel(
-        const gko::batch::multi_vector::uniform_batch<const ValueType> src,
-        const gko::batch::multi_vector::uniform_batch<ValueType> dst)
+__launch_bounds__(default_block_size, sm_oversubscription) void copy_kernel(
+    const gko::batch::multi_vector::uniform_batch<const ValueType> src,
+    const gko::batch::multi_vector::uniform_batch<ValueType> dst)
 {
     for (size_type batch_id = blockIdx.x; batch_id < src.num_batch_items;
          batch_id += gridDim.x) {
diff --git a/cuda/solver/batch_bicgstab_kernels.cu b/cuda/solver/batch_bicgstab_kernels.cu
index d6c3c3ba0fc..9ecb27aecf2 100644
--- a/cuda/solver/batch_bicgstab_kernels.cu
+++ b/cuda/solver/batch_bicgstab_kernels.cu
@@ -75,9 +75,8 @@ constexpr int sm_oversubscription = 4;
 namespace batch_bicgstab {
 
 
-#include "common/cuda_hip/components/uninitialized_array.hpp.inc"
-
 #include "common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc"
+#include "common/cuda_hip/components/uninitialized_array.hpp.inc"
 #include "common/cuda_hip/matrix/batch_dense_kernels.hpp.inc"
 #include "common/cuda_hip/matrix/batch_ell_kernels.hpp.inc"
 #include "common/cuda_hip/solver/batch_bicgstab_kernels.hpp.inc"
diff --git a/dpcpp/base/batch_multi_vector_kernels.dp.cpp b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
index b4dbf1ced31..3068b654b75 100644
--- a/dpcpp/base/batch_multi_vector_kernels.dp.cpp
+++ b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
@@ -210,37 +210,41 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
     if (x->get_common_size()[1] == 1) {
         exec->get_queue()->submit([&](sycl::handler& cgh) {
             cgh.parallel_for(
-                sycl_nd_range(grid, block), [=
-            ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                                max_subgroup_size)]] {
-                    auto group = item_ct1.get_group();
-                    auto group_id = group.get_group_linear_id();
-                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                    const auto y_b = batch::extract_batch_item(y_ub, group_id);
-                    const auto res_b =
-                        batch::extract_batch_item(res_ub, group_id);
-                    single_rhs_compute_dot_sg(x_b.num_rows, x_b.values,
-                                              y_b.values, res_b.values[0],
-                                              item_ct1);
-                });
+                sycl_nd_range(grid, block),
+                [=](sycl::nd_item<3> item_ct1)
+                    [[sycl::reqd_sub_group_size(max_subgroup_size)]] {
+                        auto group = item_ct1.get_group();
+                        auto group_id = group.get_group_linear_id();
+                        const auto x_b =
+                            batch::extract_batch_item(x_ub, group_id);
+                        const auto y_b =
+                            batch::extract_batch_item(y_ub, group_id);
+                        const auto res_b =
+                            batch::extract_batch_item(res_ub, group_id);
+                        single_rhs_compute_dot_sg(x_b.num_rows, x_b.values,
+                                                  y_b.values, res_b.values[0],
+                                                  item_ct1);
+                    });
         });
     } else {
         // TODO: Remove reqd_sub_group size and use sycl::reduce_over_group
         exec->get_queue()->submit([&](sycl::handler& cgh) {
             cgh.parallel_for(
-                sycl_nd_range(grid, block), [=
-            ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                                max_subgroup_size)]] {
-                    auto group = item_ct1.get_group();
-                    auto group_id = group.get_group_linear_id();
-                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                    const auto y_b = batch::extract_batch_item(y_ub, group_id);
-                    const auto res_b =
-                        batch::extract_batch_item(res_ub, group_id);
-                    compute_gen_dot_product_kernel(
-                        x_b, y_b, res_b, item_ct1,
-                        [](auto val) { return val; });
-                });
+                sycl_nd_range(grid, block),
+                [=](sycl::nd_item<3> item_ct1)
+                    [[sycl::reqd_sub_group_size(max_subgroup_size)]] {
+                        auto group = item_ct1.get_group();
+                        auto group_id = group.get_group_linear_id();
+                        const auto x_b =
+                            batch::extract_batch_item(x_ub, group_id);
+                        const auto y_b =
+                            batch::extract_batch_item(y_ub, group_id);
+                        const auto res_b =
+                            batch::extract_batch_item(res_ub, group_id);
+                        compute_gen_dot_product_kernel(
+                            x_b, y_b, res_b, item_ct1,
+                            [](auto val) { return val; });
+                    });
         });
     }
 }
@@ -274,18 +278,19 @@ void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
 
     exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=
-        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                            max_subgroup_size)]] {
-                auto group = item_ct1.get_group();
-                auto group_id = group.get_group_linear_id();
-                const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                const auto y_b = batch::extract_batch_item(y_ub, group_id);
-                const auto res_b = batch::extract_batch_item(res_ub, group_id);
-                compute_gen_dot_product_kernel(
-                    x_b, y_b, res_b, item_ct1,
-                    [](auto val) { return conj(val); });
-            });
+            sycl_nd_range(grid, block),
+            [=](sycl::nd_item<3> item_ct1)
+                [[sycl::reqd_sub_group_size(max_subgroup_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    const auto y_b = batch::extract_batch_item(y_ub, group_id);
+                    const auto res_b =
+                        batch::extract_batch_item(res_ub, group_id);
+                    compute_gen_dot_product_kernel(
+                        x_b, y_b, res_b, item_ct1,
+                        [](auto val) { return conj(val); });
+                });
     });
 }
 
@@ -317,31 +322,33 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
     if (x->get_common_size()[1] == 1) {
         exec->get_queue()->submit([&](sycl::handler& cgh) {
             cgh.parallel_for(
-                sycl_nd_range(grid, block), [=
-            ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                                max_subgroup_size)]] {
-                    auto group = item_ct1.get_group();
-                    auto group_id = group.get_group_linear_id();
-                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                    const auto res_b =
-                        batch::extract_batch_item(res_ub, group_id);
-                    single_rhs_compute_norm2_sg(x_b.num_rows, x_b.values,
-                                                res_b.values[0], item_ct1);
-                });
+                sycl_nd_range(grid, block),
+                [=](sycl::nd_item<3> item_ct1)
+                    [[sycl::reqd_sub_group_size(max_subgroup_size)]] {
+                        auto group = item_ct1.get_group();
+                        auto group_id = group.get_group_linear_id();
+                        const auto x_b =
+                            batch::extract_batch_item(x_ub, group_id);
+                        const auto res_b =
+                            batch::extract_batch_item(res_ub, group_id);
+                        single_rhs_compute_norm2_sg(x_b.num_rows, x_b.values,
+                                                    res_b.values[0], item_ct1);
+                    });
         });
     } else {
         exec->get_queue()->submit([&](sycl::handler& cgh) {
             cgh.parallel_for(
-                sycl_nd_range(grid, block), [=
-            ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                                max_subgroup_size)]] {
-                    auto group = item_ct1.get_group();
-                    auto group_id = group.get_group_linear_id();
-                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                    const auto res_b =
-                        batch::extract_batch_item(res_ub, group_id);
-                    compute_norm2_kernel(x_b, res_b, item_ct1);
-                });
+                sycl_nd_range(grid, block),
+                [=](sycl::nd_item<3> item_ct1)
+                    [[sycl::reqd_sub_group_size(max_subgroup_size)]] {
+                        auto group = item_ct1.get_group();
+                        auto group_id = group.get_group_linear_id();
+                        const auto x_b =
+                            batch::extract_batch_item(x_ub, group_id);
+                        const auto res_b =
+                            batch::extract_batch_item(res_ub, group_id);
+                        compute_norm2_kernel(x_b, res_b, item_ct1);
+                    });
         });
     }
 }
diff --git a/dpcpp/matrix/batch_dense_kernels.dp.cpp b/dpcpp/matrix/batch_dense_kernels.dp.cpp
index a80ef047e8d..d1320e79968 100644
--- a/dpcpp/matrix/batch_dense_kernels.dp.cpp
+++ b/dpcpp/matrix/batch_dense_kernels.dp.cpp
@@ -100,17 +100,18 @@ void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
     // Launch a kernel that has nbatches blocks, each block has max group size
     exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=
-        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                            config::warp_size)]] {
-                auto group = item_ct1.get_group();
-                auto group_id = group.get_group_linear_id();
-                const auto mat_b =
-                    batch::matrix::extract_batch_item(mat_ub, group_id);
-                const auto b_b = batch::extract_batch_item(b_ub, group_id);
-                const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                simple_apply_kernel(mat_b, b_b.values, x_b.values, item_ct1);
-            });
+            sycl_nd_range(grid, block),
+            [=](sycl::nd_item<3> item_ct1)
+                [[sycl::reqd_sub_group_size(config::warp_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto mat_b =
+                        batch::matrix::extract_batch_item(mat_ub, group_id);
+                    const auto b_b = batch::extract_batch_item(b_ub, group_id);
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    simple_apply_kernel(mat_b, b_b.values, x_b.values,
+                                        item_ct1);
+                });
     });
 }
 
@@ -147,22 +148,23 @@ void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
     // Launch a kernel that has nbatches blocks, each block has max group size
     exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=
-        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                            config::warp_size)]] {
-                auto group = item_ct1.get_group();
-                auto group_id = group.get_group_linear_id();
-                const auto mat_b =
-                    batch::matrix::extract_batch_item(mat_ub, group_id);
-                const auto b_b = batch::extract_batch_item(b_ub, group_id);
-                const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                const auto alpha_b =
-                    batch::extract_batch_item(alpha_ub, group_id);
-                const auto beta_b =
-                    batch::extract_batch_item(beta_ub, group_id);
-                advanced_apply_kernel(alpha_b.values[0], mat_b, b_b.values,
-                                      beta_b.values[0], x_b.values, item_ct1);
-            });
+            sycl_nd_range(grid, block),
+            [=](sycl::nd_item<3> item_ct1)
+                [[sycl::reqd_sub_group_size(config::warp_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto mat_b =
+                        batch::matrix::extract_batch_item(mat_ub, group_id);
+                    const auto b_b = batch::extract_batch_item(b_ub, group_id);
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    const auto alpha_b =
+                        batch::extract_batch_item(alpha_ub, group_id);
+                    const auto beta_b =
+                        batch::extract_batch_item(beta_ub, group_id);
+                    advanced_apply_kernel(alpha_b.values[0], mat_b, b_b.values,
+                                          beta_b.values[0], x_b.values,
+                                          item_ct1);
+                });
     });
 }
 
diff --git a/dpcpp/matrix/batch_ell_kernels.dp.cpp b/dpcpp/matrix/batch_ell_kernels.dp.cpp
index 1ebd41a7e24..f565f69f270 100644
--- a/dpcpp/matrix/batch_ell_kernels.dp.cpp
+++ b/dpcpp/matrix/batch_ell_kernels.dp.cpp
@@ -97,17 +97,18 @@ void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
     // Launch a kernel that has nbatches blocks, each block has max group size
     exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=
-        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                            config::warp_size)]] {
-                auto group = item_ct1.get_group();
-                auto group_id = group.get_group_linear_id();
-                const auto mat_b =
-                    batch::matrix::extract_batch_item(mat_ub, group_id);
-                const auto b_b = batch::extract_batch_item(b_ub, group_id);
-                const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                simple_apply_kernel(mat_b, b_b.values, x_b.values, item_ct1);
-            });
+            sycl_nd_range(grid, block),
+            [=](sycl::nd_item<3> item_ct1)
+                [[sycl::reqd_sub_group_size(config::warp_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto mat_b =
+                        batch::matrix::extract_batch_item(mat_ub, group_id);
+                    const auto b_b = batch::extract_batch_item(b_ub, group_id);
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    simple_apply_kernel(mat_b, b_b.values, x_b.values,
+                                        item_ct1);
+                });
     });
 }
 
@@ -145,22 +146,23 @@ void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
     // Launch a kernel that has nbatches blocks, each block has max group size
     exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=
-        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                            config::warp_size)]] {
-                auto group = item_ct1.get_group();
-                auto group_id = group.get_group_linear_id();
-                const auto mat_b =
-                    batch::matrix::extract_batch_item(mat_ub, group_id);
-                const auto b_b = batch::extract_batch_item(b_ub, group_id);
-                const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                const auto alpha_b =
-                    batch::extract_batch_item(alpha_ub, group_id);
-                const auto beta_b =
-                    batch::extract_batch_item(beta_ub, group_id);
-                advanced_apply_kernel(alpha_b.values[0], mat_b, b_b.values,
-                                      beta_b.values[0], x_b.values, item_ct1);
-            });
+            sycl_nd_range(grid, block),
+            [=](sycl::nd_item<3> item_ct1)
+                [[sycl::reqd_sub_group_size(config::warp_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto mat_b =
+                        batch::matrix::extract_batch_item(mat_ub, group_id);
+                    const auto b_b = batch::extract_batch_item(b_ub, group_id);
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    const auto alpha_b =
+                        batch::extract_batch_item(alpha_ub, group_id);
+                    const auto beta_b =
+                        batch::extract_batch_item(beta_ub, group_id);
+                    advanced_apply_kernel(alpha_b.values[0], mat_b, b_b.values,
+                                          beta_b.values[0], x_b.values,
+                                          item_ct1);
+                });
     });
 }
 
diff --git a/dpcpp/preconditioner/batch_identity.hpp.inc b/dpcpp/preconditioner/batch_identity.hpp.inc
index 53e2f70a7d9..404d987a3f4 100644
--- a/dpcpp/preconditioner/batch_identity.hpp.inc
+++ b/dpcpp/preconditioner/batch_identity.hpp.inc
@@ -42,10 +42,10 @@ public:
 
     static int dynamic_work_size(int, int) { return 0; }
 
-    void generate(size_type batch_id,
-                  const gko::batch::matrix::ell::batch_item<const ValueType,
-                                                            gko::int32>&,
-                  ValueType* const, sycl::nd_item<3> item_ct1)
+    void generate(
+        size_type batch_id,
+        const gko::batch::matrix::ell::batch_item<const ValueType, gko::int32>&,
+        ValueType* const, sycl::nd_item<3> item_ct1)
     {}
 
     void generate(size_type batch_id,
diff --git a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
index c40d8564d09..33749e91ae4 100644
--- a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
+++ b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
@@ -117,10 +117,9 @@ class KernelCaller {
                 slm_values(sycl::range<1>(shared_size), cgh);
 
             cgh.parallel_for(
-                sycl_nd_range(grid, block), [=
-            ](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(
-                                                simd_len)]] [
-                                                [intel::kernel_args_restrict]] {
+                sycl_nd_range(grid, block),
+                [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(
+                    simd_len)]] [[intel::kernel_args_restrict]] {
                     auto batch_id = item_ct1.get_group_linear_id();
                     const auto mat_global_entry =
                         gko::batch::matrix::extract_batch_item(mat, batch_id);
diff --git a/dpcpp/solver/batch_bicgstab_kernels.hpp.inc b/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
index 38d93d7213f..67057f80e53 100644
--- a/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
+++ b/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
@@ -30,7 +30,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-
 template <const bool sg_kernel_all, typename BatchMatrixType_entry,
           typename ValueType>
 __dpct_inline__ void initialize(
diff --git a/hip/solver/batch_bicgstab_kernels.hip.cpp b/hip/solver/batch_bicgstab_kernels.hip.cpp
index 077b9b5da93..f769a4fe6f5 100644
--- a/hip/solver/batch_bicgstab_kernels.hip.cpp
+++ b/hip/solver/batch_bicgstab_kernels.hip.cpp
@@ -74,9 +74,8 @@ constexpr int sm_oversubscription = 4;
 namespace batch_bicgstab {
 
 
-#include "common/cuda_hip/components/uninitialized_array.hpp.inc"
-
 #include "common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc"
+#include "common/cuda_hip/components/uninitialized_array.hpp.inc"
 #include "common/cuda_hip/matrix/batch_dense_kernels.hpp.inc"
 #include "common/cuda_hip/matrix/batch_ell_kernels.hpp.inc"
 #include "common/cuda_hip/solver/batch_bicgstab_kernels.hpp.inc"

From 5484be79ed6a349de473b0fc9cad37bf606a7ca6 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Mon, 30 Oct 2023 12:13:02 +0100
Subject: [PATCH 530/583] Review updates

Co-authored-by: Yu-Hsiang Tsai <yhmtsai@gmail.com>
Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 .../base/batch_multi_vector_kernels.hpp.inc   |   2 +-
 .../preconditioner/batch_identity.hpp.inc     |  13 +-
 .../solver/batch_bicgstab_kernels.hpp.inc     |  26 ++--
 core/solver/batch_bicgstab_kernels.hpp        |  18 +--
 core/test/utils/batch_helpers.hpp             |  18 +--
 cuda/matrix/batch_struct.hpp                  |   3 +-
 cuda/solver/batch_bicgstab_kernels.cu         |  17 +--
 dpcpp/base/batch_multi_vector_kernels.dp.cpp  | 137 +++++++++---------
 dpcpp/base/batch_multi_vector_kernels.hpp.inc |  13 +-
 dpcpp/matrix/batch_struct.hpp                 |   2 +-
 dpcpp/preconditioner/batch_identity.hpp.inc   |  12 +-
 dpcpp/solver/batch_bicgstab_kernels.dp.cpp    |  45 +++---
 dpcpp/solver/batch_bicgstab_kernels.hpp.inc   | 112 +++++++-------
 hip/matrix/batch_struct.hip.hpp               |   3 +-
 hip/solver/batch_bicgstab_kernels.hip.cpp     |  57 ++++----
 reference/matrix/batch_struct.hpp             |   2 +-
 .../test/solver/batch_bicgstab_kernels.cpp    |  25 ++--
 test/solver/batch_bicgstab_kernels.cpp        |  24 +--
 18 files changed, 254 insertions(+), 275 deletions(-)

diff --git a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
index 72d58ecf5b3..1e0cb3bbcff 100644
--- a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
+++ b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
@@ -104,7 +104,7 @@ __global__ __launch_bounds__(
 
 
 template <typename Group, typename ValueType>
-__device__ __forceinline__ void single_rhs_compute_dot(Group subgroup,
+__device__ __forceinline__ void single_rhs_compute_conj_dot(Group subgroup,
                                                        const int num_rows,
                                                        const ValueType* x,
                                                        const ValueType* y,
diff --git a/common/cuda_hip/preconditioner/batch_identity.hpp.inc b/common/cuda_hip/preconditioner/batch_identity.hpp.inc
index 1b1fb7b5482..923ed4ce946 100644
--- a/common/cuda_hip/preconditioner/batch_identity.hpp.inc
+++ b/common/cuda_hip/preconditioner/batch_identity.hpp.inc
@@ -45,16 +45,9 @@ public:
         return 0;
     }
 
-    __device__ __forceinline__ void generate(
-        size_type,
-        const gko::batch::matrix::ell::batch_item<const ValueType, gko::int32>&,
-        ValueType*)
-    {}
-
-    __device__ __forceinline__ void generate(
-        size_type,
-        const gko::batch::matrix::dense::batch_item<const ValueType>&,
-        ValueType*)
+    template <typename batch_item_type>
+    __device__ __forceinline__ void generate(size_type, const batch_item_type&,
+                                             ValueType*)
     {}
 
     __device__ __forceinline__ void apply(const int num_rows,
diff --git a/common/cuda_hip/solver/batch_bicgstab_kernels.hpp.inc b/common/cuda_hip/solver/batch_bicgstab_kernels.hpp.inc
index 0f666f205e8..faee2e069a7 100644
--- a/common/cuda_hip/solver/batch_bicgstab_kernels.hpp.inc
+++ b/common/cuda_hip/solver/batch_bicgstab_kernels.hpp.inc
@@ -38,7 +38,8 @@ __device__ __forceinline__ void initialize(
     const ValueType* const x_global_entry, ValueType& rho_old, ValueType& omega,
     ValueType& alpha, ValueType* const x_shared_entry,
     ValueType* const r_shared_entry, ValueType* const r_hat_shared_entry,
-    ValueType* const p_shared_entry, ValueType* const v_shared_entry,
+    ValueType* const p_shared_entry, ValueType* const p_hat_shared_entry,
+    ValueType* const v_shared_entry,
     typename gko::remove_complex<ValueType>& rhs_norm,
     typename gko::remove_complex<ValueType>& res_norm)
 {
@@ -70,6 +71,7 @@ __device__ __forceinline__ void initialize(
     for (int iz = threadIdx.x; iz < num_rows; iz += blockDim.x) {
         r_hat_shared_entry[iz] = r_shared_entry[iz];
         p_shared_entry[iz] = zero<ValueType>();
+        p_hat_shared_entry[iz] = zero<ValueType>();
         v_shared_entry[iz] = zero<ValueType>();
     }
 }
@@ -82,8 +84,8 @@ __device__ __forceinline__ void update_p(
     const ValueType* const r_shared_entry,
     const ValueType* const v_shared_entry, ValueType* const p_shared_entry)
 {
+    const ValueType beta = (rho_new / rho_old) * (alpha / omega);
     for (int r = threadIdx.x; r < num_rows; r += blockDim.x) {
-        const ValueType beta = (rho_new / rho_old) * (alpha / omega);
         p_shared_entry[r] =
             r_shared_entry[r] +
             beta * (p_shared_entry[r] - omega * v_shared_entry[r]);
@@ -97,8 +99,8 @@ __device__ __forceinline__ void compute_alpha(
     const ValueType* const v_shared_entry, ValueType& alpha)
 {
     if (threadIdx.x / config::warp_size == 0) {
-        single_rhs_compute_dot(subgroup, num_rows, r_hat_shared_entry,
-                               v_shared_entry, alpha);
+        single_rhs_compute_conj_dot(subgroup, num_rows, r_hat_shared_entry,
+                                    v_shared_entry, alpha);
     }
     __syncthreads();
     if (threadIdx.x == 0) {
@@ -126,11 +128,11 @@ __device__ __forceinline__ void compute_omega(
     const ValueType* const s_shared_entry, ValueType& temp, ValueType& omega)
 {
     if (threadIdx.x / config::warp_size == 0) {
-        single_rhs_compute_dot(subgroup, num_rows, t_shared_entry,
-                               s_shared_entry, omega);
+        single_rhs_compute_conj_dot(subgroup, num_rows, t_shared_entry,
+                                    s_shared_entry, omega);
     } else if (threadIdx.x / config::warp_size == 1) {
-        single_rhs_compute_dot(subgroup, num_rows, t_shared_entry,
-                               t_shared_entry, temp);
+        single_rhs_compute_conj_dot(subgroup, num_rows, t_shared_entry,
+                                    t_shared_entry, temp);
     }
 
     __syncthreads();
@@ -278,10 +280,12 @@ __global__ void apply_kernel(
         // compute residual norms
         // r_hat = r
         // p = 0
+        // p_hat = 0
         // v = 0
         initialize(subgroup, num_rows, mat_entry, b_entry_ptr, x_gl_entry_ptr,
                    rho_old_sh[0], omega_sh[0], alpha_sh[0], x_sh, r_sh,
-                   r_hat_sh, p_sh, v_sh, norms_rhs_sh[0], norms_res_sh[0]);
+                   r_hat_sh, p_sh, p_hat_sh, v_sh, norms_rhs_sh[0],
+                   norms_res_sh[0]);
         __syncthreads();
 
         // stopping criterion object
@@ -296,8 +300,8 @@ __global__ void apply_kernel(
 
             // rho_new =  < r_hat , r > = (r_hat)' * (r)
             if (threadIdx.x / config::warp_size == 0) {
-                single_rhs_compute_dot(subgroup, num_rows, r_hat_sh, r_sh,
-                                       rho_new_sh[0]);
+                single_rhs_compute_conj_dot(subgroup, num_rows, r_hat_sh, r_sh,
+                                            rho_new_sh[0]);
             }
             __syncthreads();
 
diff --git a/core/solver/batch_bicgstab_kernels.hpp b/core/solver/batch_bicgstab_kernels.hpp
index cd16be76d63..6f5de2e770c 100644
--- a/core/solver/batch_bicgstab_kernels.hpp
+++ b/core/solver/batch_bicgstab_kernels.hpp
@@ -117,8 +117,7 @@ void set_gmem_stride_bytes(storage_config& sconf,
     }
     // align global memory chunks
     sconf.gmem_stride_bytes =
-        gmem_stride > 0 ? ((gmem_stride - 1) / align_bytes + 1) * align_bytes
-                        : 0;
+        gmem_stride > 0 ? ceildiv(gmem_stride, align_bytes) * align_bytes : 0;
 }
 
 
@@ -145,8 +144,8 @@ void set_gmem_stride_bytes(storage_config& sconf,
  * - rhs_norms
  * - res_norms
  *
- * @param shared_mem_per_blk  The amount of shared memory per block to use for
- *   keeping intermediate vectors. In case keeping the matrix in L1 cache etc.
+ * @param available_shared_mem  The amount of shared memory per block to use
+ * for keeping intermediate vectors. In case keeping the matrix in L1 cache etc.
  *   should be prioritized, the cache configuration must be updated separately
  *   and the needed space should be subtracted before passing to this
  *   function.
@@ -156,7 +155,7 @@ void set_gmem_stride_bytes(storage_config& sconf,
  * @return  A struct containing allocation information specific to Bicgstab.
  */
 template <typename Prectype, typename ValueType, int align_bytes = 32>
-storage_config compute_shared_storage(const int shared_mem_per_blk,
+storage_config compute_shared_storage(const int available_shared_mem,
                                       const int num_rows, const int num_nz,
                                       const int num_rhs)
 {
@@ -165,10 +164,11 @@ storage_config compute_shared_storage(const int shared_mem_per_blk,
     const int num_main_vecs = 9;
     const int prec_storage =
         Prectype::dynamic_work_size(num_rows, num_nz) * sizeof(ValueType);
-    int rem_shared = shared_mem_per_blk;
-    // Set default values. All vecs are in global.
+    int rem_shared = available_shared_mem;
+    // Set default values. Initially all vecs are in global memory.
+    // {prec_shared, n_shared, n_global, gmem_stride_bytes, padded_vec_len}
     storage_config sconf{false, 0, num_main_vecs, 0, num_rows};
-    // If available shared mem, is zero, set all vecs to global.
+    // If available shared mem is zero, set all vecs to global.
     if (rem_shared <= 0) {
         set_gmem_stride_bytes<align_bytes>(sconf, vec_size, prec_storage);
         return sconf;
@@ -179,13 +179,13 @@ storage_config compute_shared_storage(const int shared_mem_per_blk,
     const int num_vecs_shared = min(initial_vecs_available, num_main_vecs);
     sconf.n_shared += num_vecs_shared;
     sconf.n_global -= num_vecs_shared;
+    rem_shared -= num_vecs_shared * vec_size;
     // Set the storage configuration with preconditioner workspace in global if
     // there are any vectors in global memory.
     if (sconf.n_global > 0) {
         set_gmem_stride_bytes<align_bytes>(sconf, vec_size, prec_storage);
         return sconf;
     }
-    rem_shared -= num_vecs_shared * vec_size;
     // If more shared memory space is available and preconditioner workspace is
     // needed, enable preconditioner workspace to use shared memory.
     if (rem_shared >= prec_storage && prec_storage > 0) {
diff --git a/core/test/utils/batch_helpers.hpp b/core/test/utils/batch_helpers.hpp
index 0a6702ff42f..43da4cd9d54 100644
--- a/core/test/utils/batch_helpers.hpp
+++ b/core/test/utils/batch_helpers.hpp
@@ -224,7 +224,7 @@ struct LinearSystem {
 
     std::shared_ptr<const MatrixType> matrix;
     std::shared_ptr<multi_vec> rhs;
-    std::shared_ptr<real_vec> rhs_norm;
+    std::shared_ptr<real_vec> host_rhs_norm;
     std::shared_ptr<multi_vec> exact_sol;
 };
 
@@ -250,8 +250,8 @@ LinearSystem<MatrixType> generate_batch_linear_system(
     // A * x_{exact} = b
     sys.matrix->apply(sys.exact_sol, sys.rhs);
     const gko::batch_dim<2> norm_dim(num_batch_items, gko::dim<2>(1, num_rhs));
-    sys.rhs_norm = real_vec::create(exec->get_master(), norm_dim);
-    sys.rhs->compute_norm2(sys.rhs_norm.get());
+    sys.host_rhs_norm = real_vec::create(exec->get_master(), norm_dim);
+    sys.rhs->compute_norm2(sys.host_rhs_norm.get());
     return sys;
 }
 
@@ -273,13 +273,13 @@ compute_residual_norms(
     const gko::batch_dim<2> norm_dim(num_batch_items, gko::dim<2>(1, num_rhs));
 
     auto residual_vec = b->clone();
-    auto res_norms = real_vec::create(exec->get_master(), norm_dim);
+    auto res_norm = real_vec::create(exec->get_master(), norm_dim);
     auto alpha =
         gko::batch::initialize<multi_vec>(num_batch_items, {-1.0}, exec);
     auto beta = gko::batch::initialize<multi_vec>(num_batch_items, {1.0}, exec);
     mtx->apply(alpha, x, beta, residual_vec);
-    residual_vec->compute_norm2(res_norms);
-    return res_norms;
+    residual_vec->compute_norm2(res_norm);
+    return res_norm;
 }
 
 
@@ -289,7 +289,7 @@ struct Result {
     using real_vec = batch::MultiVector<remove_complex<ValueType>>;
 
     std::shared_ptr<multi_vec> x;
-    std::shared_ptr<real_vec> res_norm;
+    std::shared_ptr<real_vec> host_res_norm;
 };
 
 
@@ -323,7 +323,7 @@ Result<typename MatrixType::value_type> solve_linear_system(
     result.x->fill(zero<value_type>());
 
     solver->apply(sys.rhs, result.x);
-    result.res_norm =
+    result.host_res_norm =
         compute_residual_norms(sys.matrix.get(), sys.rhs.get(), result.x.get());
 
     return std::move(result);
@@ -369,7 +369,7 @@ ResultWithLogData<typename MatrixType::value_type> solve_linear_system(
     result.log_data->iter_counts = log_data->iter_counts;
     result.log_data->res_norms = log_data->res_norms;
 
-    result.res_norm =
+    result.host_res_norm =
         compute_residual_norms(sys.matrix.get(), sys.rhs.get(), result.x.get());
 
     return std::move(result);
diff --git a/cuda/matrix/batch_struct.hpp b/cuda/matrix/batch_struct.hpp
index 4a2a1835961..55a30c043e3 100644
--- a/cuda/matrix/batch_struct.hpp
+++ b/cuda/matrix/batch_struct.hpp
@@ -92,7 +92,8 @@ get_batch_struct(batch::matrix::Dense<ValueType>* const op)
  * Generates an immutable uniform batch struct from a batch of ell matrices.
  */
 template <typename ValueType, typename IndexType>
-inline batch::matrix::ell::uniform_batch<const cuda_type<ValueType>, IndexType>
+inline batch::matrix::ell::uniform_batch<const cuda_type<ValueType>,
+                                         const IndexType>
 get_batch_struct(const batch::matrix::Ell<ValueType, IndexType>* const op)
 {
     return {as_cuda_type(op->get_const_values()),
diff --git a/cuda/solver/batch_bicgstab_kernels.cu b/cuda/solver/batch_bicgstab_kernels.cu
index 9ecb27aecf2..1c26a1754ba 100644
--- a/cuda/solver/batch_bicgstab_kernels.cu
+++ b/cuda/solver/batch_bicgstab_kernels.cu
@@ -101,13 +101,10 @@ int get_num_threads_per_block(std::shared_ptr<const DefaultExecutor> exec,
     cudaDeviceGetAttribute(&max_regs_blk, cudaDevAttrMaxRegistersPerBlock,
                            exec->get_device_id());
     const int max_threads_regs =
-        ((max_regs_blk /
-          static_cast<int>((static_cast<double>(num_regs_used)))) /
-         warp_sz) *
-        warp_sz;
+        ((max_regs_blk / static_cast<int>(num_regs_used)) / warp_sz) * warp_sz;
     int max_threads = std::min(max_threads_regs, device_max_threads);
     max_threads = max_threads <= 1024 ? max_threads : 1024;
-    return std::min(num_warps * warp_sz, max_threads);
+    return std::max(std::min(num_warps * warp_sz, max_threads), min_block_size);
 }
 
 
@@ -136,12 +133,12 @@ using settings = gko::kernels::batch_bicgstab::settings<T>;
 
 
 template <typename CuValueType>
-class KernelCaller {
+class kernel_caller {
 public:
     using value_type = CuValueType;
 
-    KernelCaller(std::shared_ptr<const DefaultExecutor> exec,
-                 const settings<remove_complex<value_type>> settings)
+    kernel_caller(std::shared_ptr<const DefaultExecutor> exec,
+                  const settings<remove_complex<value_type>> settings)
         : exec_{std::move(exec)}, settings_{settings}
     {}
 
@@ -263,6 +260,8 @@ public:
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, block_size, shared_size);
                 break;
+            default:
+                GKO_NOT_IMPLEMENTED;
             }
         }
 
@@ -286,7 +285,7 @@ void apply(std::shared_ptr<const DefaultExecutor> exec,
 {
     using cu_value_type = cuda_type<ValueType>;
     auto dispatcher = batch::solver::create_dispatcher<ValueType>(
-        KernelCaller<cu_value_type>(exec, settings), settings, mat, precon);
+        kernel_caller<cu_value_type>(exec, settings), settings, mat, precon);
     dispatcher.apply(b, x, logdata);
 }
 
diff --git a/dpcpp/base/batch_multi_vector_kernels.dp.cpp b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
index 3068b654b75..c9809696889 100644
--- a/dpcpp/base/batch_multi_vector_kernels.dp.cpp
+++ b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
@@ -87,7 +87,7 @@ void scale(std::shared_ptr<const DefaultExecutor> exec,
     long max_group_size =
         device.get_info<sycl::info::device::max_work_group_size>();
     int group_size =
-        std::max(ceildiv(num_rows, max_subgroup_size) * max_subgroup_size,
+        std::min(ceildiv(num_rows, max_subgroup_size) * max_subgroup_size,
                  max_group_size);
 
     const dim3 block(group_size);
@@ -141,7 +141,7 @@ void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
     long max_group_size =
         device.get_info<sycl::info::device::max_work_group_size>();
     int group_size =
-        std::max(ceildiv(num_rows, max_subgroup_size) * max_subgroup_size,
+        std::min(ceildiv(num_rows, max_subgroup_size) * max_subgroup_size,
                  max_group_size);
 
     const dim3 block(group_size);
@@ -202,7 +202,7 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
     long max_group_size =
         device.get_info<sycl::info::device::max_work_group_size>();
     int group_size =
-        std::max(ceildiv(num_rows, max_subgroup_size) * max_subgroup_size,
+        std::min(ceildiv(num_rows, max_subgroup_size) * max_subgroup_size,
                  max_group_size);
 
     const dim3 block(group_size);
@@ -210,41 +210,37 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
     if (x->get_common_size()[1] == 1) {
         exec->get_queue()->submit([&](sycl::handler& cgh) {
             cgh.parallel_for(
-                sycl_nd_range(grid, block),
-                [=](sycl::nd_item<3> item_ct1)
-                    [[sycl::reqd_sub_group_size(max_subgroup_size)]] {
-                        auto group = item_ct1.get_group();
-                        auto group_id = group.get_group_linear_id();
-                        const auto x_b =
-                            batch::extract_batch_item(x_ub, group_id);
-                        const auto y_b =
-                            batch::extract_batch_item(y_ub, group_id);
-                        const auto res_b =
-                            batch::extract_batch_item(res_ub, group_id);
-                        single_rhs_compute_dot_sg(x_b.num_rows, x_b.values,
-                                                  y_b.values, res_b.values[0],
-                                                  item_ct1);
-                    });
+                sycl_nd_range(grid, block), [=
+            ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                                max_subgroup_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    const auto y_b = batch::extract_batch_item(y_ub, group_id);
+                    const auto res_b =
+                        batch::extract_batch_item(res_ub, group_id);
+                    single_rhs_compute_conj_dot_sg(x_b.num_rows, x_b.values,
+                                                   y_b.values, res_b.values[0],
+                                                   item_ct1);
+                });
         });
     } else {
         // TODO: Remove reqd_sub_group size and use sycl::reduce_over_group
         exec->get_queue()->submit([&](sycl::handler& cgh) {
             cgh.parallel_for(
-                sycl_nd_range(grid, block),
-                [=](sycl::nd_item<3> item_ct1)
-                    [[sycl::reqd_sub_group_size(max_subgroup_size)]] {
-                        auto group = item_ct1.get_group();
-                        auto group_id = group.get_group_linear_id();
-                        const auto x_b =
-                            batch::extract_batch_item(x_ub, group_id);
-                        const auto y_b =
-                            batch::extract_batch_item(y_ub, group_id);
-                        const auto res_b =
-                            batch::extract_batch_item(res_ub, group_id);
-                        compute_gen_dot_product_kernel(
-                            x_b, y_b, res_b, item_ct1,
-                            [](auto val) { return val; });
-                    });
+                sycl_nd_range(grid, block), [=
+            ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                                max_subgroup_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    const auto y_b = batch::extract_batch_item(y_ub, group_id);
+                    const auto res_b =
+                        batch::extract_batch_item(res_ub, group_id);
+                    compute_gen_dot_product_kernel(
+                        x_b, y_b, res_b, item_ct1,
+                        [](auto val) { return val; });
+                });
         });
     }
 }
@@ -270,7 +266,7 @@ void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
     long max_group_size =
         device.get_info<sycl::info::device::max_work_group_size>();
     int group_size =
-        std::max(ceildiv(num_rows, max_subgroup_size) * max_subgroup_size,
+        std::min(ceildiv(num_rows, max_subgroup_size) * max_subgroup_size,
                  max_group_size);
 
     const dim3 block(group_size);
@@ -278,19 +274,18 @@ void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
 
     exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1)
-                [[sycl::reqd_sub_group_size(max_subgroup_size)]] {
-                    auto group = item_ct1.get_group();
-                    auto group_id = group.get_group_linear_id();
-                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                    const auto y_b = batch::extract_batch_item(y_ub, group_id);
-                    const auto res_b =
-                        batch::extract_batch_item(res_ub, group_id);
-                    compute_gen_dot_product_kernel(
-                        x_b, y_b, res_b, item_ct1,
-                        [](auto val) { return conj(val); });
-                });
+            sycl_nd_range(grid, block), [=
+        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                            max_subgroup_size)]] {
+                auto group = item_ct1.get_group();
+                auto group_id = group.get_group_linear_id();
+                const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                const auto y_b = batch::extract_batch_item(y_ub, group_id);
+                const auto res_b = batch::extract_batch_item(res_ub, group_id);
+                compute_gen_dot_product_kernel(
+                    x_b, y_b, res_b, item_ct1,
+                    [](auto val) { return conj(val); });
+            });
     });
 }
 
@@ -314,7 +309,7 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
     long max_group_size =
         device.get_info<sycl::info::device::max_work_group_size>();
     int group_size =
-        std::max(ceildiv(num_rows, max_subgroup_size) * max_subgroup_size,
+        std::min(ceildiv(num_rows, max_subgroup_size) * max_subgroup_size,
                  max_group_size);
 
     const dim3 block(group_size);
@@ -322,33 +317,31 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
     if (x->get_common_size()[1] == 1) {
         exec->get_queue()->submit([&](sycl::handler& cgh) {
             cgh.parallel_for(
-                sycl_nd_range(grid, block),
-                [=](sycl::nd_item<3> item_ct1)
-                    [[sycl::reqd_sub_group_size(max_subgroup_size)]] {
-                        auto group = item_ct1.get_group();
-                        auto group_id = group.get_group_linear_id();
-                        const auto x_b =
-                            batch::extract_batch_item(x_ub, group_id);
-                        const auto res_b =
-                            batch::extract_batch_item(res_ub, group_id);
-                        single_rhs_compute_norm2_sg(x_b.num_rows, x_b.values,
-                                                    res_b.values[0], item_ct1);
-                    });
+                sycl_nd_range(grid, block), [=
+            ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                                max_subgroup_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    const auto res_b =
+                        batch::extract_batch_item(res_ub, group_id);
+                    single_rhs_compute_norm2_sg(x_b.num_rows, x_b.values,
+                                                res_b.values[0], item_ct1);
+                });
         });
     } else {
         exec->get_queue()->submit([&](sycl::handler& cgh) {
             cgh.parallel_for(
-                sycl_nd_range(grid, block),
-                [=](sycl::nd_item<3> item_ct1)
-                    [[sycl::reqd_sub_group_size(max_subgroup_size)]] {
-                        auto group = item_ct1.get_group();
-                        auto group_id = group.get_group_linear_id();
-                        const auto x_b =
-                            batch::extract_batch_item(x_ub, group_id);
-                        const auto res_b =
-                            batch::extract_batch_item(res_ub, group_id);
-                        compute_norm2_kernel(x_b, res_b, item_ct1);
-                    });
+                sycl_nd_range(grid, block), [=
+            ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+                                                max_subgroup_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    const auto res_b =
+                        batch::extract_batch_item(res_ub, group_id);
+                    compute_norm2_kernel(x_b, res_b, item_ct1);
+                });
         });
     }
 }
@@ -372,7 +365,7 @@ void copy(std::shared_ptr<const DefaultExecutor> exec,
     long max_group_size =
         device.get_info<sycl::info::device::max_work_group_size>();
     int group_size =
-        std::max(ceildiv(num_rows, max_subgroup_size) * max_subgroup_size,
+        std::min(ceildiv(num_rows, max_subgroup_size) * max_subgroup_size,
                  max_group_size);
 
     const dim3 block(group_size);
diff --git a/dpcpp/base/batch_multi_vector_kernels.hpp.inc b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
index 4db1dc5e1d7..1fb5684871d 100644
--- a/dpcpp/base/batch_multi_vector_kernels.hpp.inc
+++ b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
@@ -67,15 +67,12 @@ __dpct_inline__ void add_scaled_kernel(
 }
 
 
-template <int tile_size = config::warp_size, typename ValueType>
-__dpct_inline__ void single_rhs_compute_dot(
+template <typename ValueType>
+__dpct_inline__ void single_rhs_compute_conj_dot(
     const int num_rows, const ValueType* const __restrict__ x,
     const ValueType* const __restrict__ y, ValueType& result,
     sycl::nd_item<3> item_ct1)
 {
-    // auto grp =
-    //     group::tiled_partition<group_size>(group::this_thread_block(item_ct1));
-    // auto grp = group::this_thread_block(item_ct1);
     const auto group = item_ct1.get_group();
     const auto group_size = item_ct1.get_local_range().size();
     const auto tid = item_ct1.get_local_linear_id();
@@ -90,7 +87,7 @@ __dpct_inline__ void single_rhs_compute_dot(
 
 
 template <int tile_size = config::warp_size, typename ValueType>
-__dpct_inline__ void single_rhs_compute_dot_sg(
+__dpct_inline__ void single_rhs_compute_conj_dot_sg(
     const int num_rows, const ValueType* const __restrict__ x,
     const ValueType* const __restrict__ y, ValueType& result,
     sycl::nd_item<3> item_ct1)
@@ -183,8 +180,6 @@ __dpct_inline__ void single_rhs_compute_norm2(
     const int num_rows, const ValueType* const __restrict__ x,
     gko::remove_complex<ValueType>& result, sycl::nd_item<3> item_ct1)
 {
-    // auto grp =
-    //     group::tiled_partition<tile_size>(group::this_thread_block(item_ct1));
     const auto group = item_ct1.get_group();
     const auto group_size = item_ct1.get_local_range().size();
     const auto tid = item_ct1.get_local_linear_id();
@@ -197,8 +192,6 @@ __dpct_inline__ void single_rhs_compute_norm2(
     }
 
     val = sycl::reduce_over_group(group, val, sycl::plus<>());
-    // val = ::gko::kernels::dpcpp::reduce(
-    //     grp, val, [](real_type a, real_type b) { return a + b; });
 
     result = sqrt(val);
 }
diff --git a/dpcpp/matrix/batch_struct.hpp b/dpcpp/matrix/batch_struct.hpp
index fe04407d82d..7f36378d8e1 100644
--- a/dpcpp/matrix/batch_struct.hpp
+++ b/dpcpp/matrix/batch_struct.hpp
@@ -91,7 +91,7 @@ inline batch::matrix::dense::uniform_batch<ValueType> get_batch_struct(
  * Generates an immutable uniform batch struct from a batch of ell matrices.
  */
 template <typename ValueType, typename IndexType>
-inline batch::matrix::ell::uniform_batch<const ValueType, IndexType>
+inline batch::matrix::ell::uniform_batch<const ValueType, const IndexType>
 get_batch_struct(const batch::matrix::Ell<ValueType, IndexType>* const op)
 {
     return {op->get_const_values(),
diff --git a/dpcpp/preconditioner/batch_identity.hpp.inc b/dpcpp/preconditioner/batch_identity.hpp.inc
index 404d987a3f4..792886f845d 100644
--- a/dpcpp/preconditioner/batch_identity.hpp.inc
+++ b/dpcpp/preconditioner/batch_identity.hpp.inc
@@ -42,15 +42,9 @@ public:
 
     static int dynamic_work_size(int, int) { return 0; }
 
-    void generate(
-        size_type batch_id,
-        const gko::batch::matrix::ell::batch_item<const ValueType, gko::int32>&,
-        ValueType* const, sycl::nd_item<3> item_ct1)
-    {}
-
-    void generate(size_type batch_id,
-                  const gko::batch::matrix::dense::batch_item<const ValueType>&,
-                  ValueType* const, sycl::nd_item<3> item_ct1)
+    template <typename batch_item_type>
+    void generate(size_type, const batch_item_type&, ValueType*,
+                  sycl::nd_item<3> item_ct1)
     {}
 
     __dpct_inline__ void apply(const int num_rows, const ValueType* const r,
diff --git a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
index 33749e91ae4..52f794cfc0e 100644
--- a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
+++ b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
@@ -60,9 +60,9 @@ namespace gko {
 namespace kernels {
 namespace dpcpp {
 /**
- * @brief The batch Cg solver namespace.
+ * @brief The batch Bicgstab solver namespace.
  *
- * @ingroup batch_cg
+ * @ingroup batch_bicgstab
  */
 namespace batch_bicgstab {
 
@@ -77,10 +77,10 @@ template <typename T>
 using settings = gko::kernels::batch_bicgstab::settings<T>;
 
 
-__dpct_inline__ int get_group_size(int value, int simd_len = 32)
+__dpct_inline__ int get_group_size(int value, int subgroup_size = 32)
 {
-    int num_sg = ceildiv(value, simd_len);
-    return num_sg * simd_len;
+    int num_sg = ceildiv(value, subgroup_size);
+    return num_sg * subgroup_size;
 }
 
 
@@ -92,9 +92,9 @@ class KernelCaller {
         : exec_{std::move(exec)}, settings_{settings}
     {}
 
-    template <typename StopType, const int simd_len, const int n_shared_total,
-              const bool sg_kernel_all, typename PrecType, typename LogType,
-              typename BatchMatrixType>
+    template <typename StopType, const int subgroup_size,
+              const int n_shared_total, const bool sg_kernel_all,
+              typename PrecType, typename LogType, typename BatchMatrixType>
     __dpct_inline__ void launch_apply_kernel(
         const gko::kernels::batch_bicgstab::storage_config& sconf,
         LogType& logger, PrecType& prec, const BatchMatrixType mat,
@@ -111,15 +111,16 @@ class KernelCaller {
         auto max_iters = settings_.max_iterations;
         auto res_tol = settings_.residual_tol;
 
-        (exec_->get_queue())->submit([&](sycl::handler& cgh) {
+        exec_->get_queue()->submit([&](sycl::handler& cgh) {
             sycl::accessor<ValueType, 1, sycl::access_mode::read_write,
                            sycl::access::target::local>
                 slm_values(sycl::range<1>(shared_size), cgh);
 
             cgh.parallel_for(
-                sycl_nd_range(grid, block),
-                [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(
-                    simd_len)]] [[intel::kernel_args_restrict]] {
+                sycl_nd_range(grid, block), [=
+            ](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(
+                                                subgroup_size)]] [
+                                                [intel::kernel_args_restrict]] {
                     auto batch_id = item_ct1.get_group_linear_id();
                     const auto mat_global_entry =
                         gko::batch::matrix::extract_batch_item(mat, batch_id);
@@ -162,10 +163,13 @@ class KernelCaller {
         // reserve 5 for intermediate rho-s, norms,
         // alpha, omega, temp and for reduce_over_group
         // If the value available is negative, then set it to 0
-        size_type shmem_per_blk = std::max(
-            device.get_info<sycl::info::device::local_mem_size>() -
-                (group_size + 5) * sizeof(ValueType) - 2 * sizeof(real_type),
-            static_cast<size_type>(0));
+        const int static_var_mem =
+            (group_size + 5) * sizeof(ValueType) - 2 * sizeof(real_type);
+        int shmem_per_blk = std::max(
+            static_cast<int>(
+                device.get_info<sycl::info::device::local_mem_size>()) -
+                static_var_mem,
+            0);
         const int padded_num_rows = num_rows;
         const size_type prec_size = PrecType::dynamic_work_size(
             padded_num_rows, mat.get_single_item_num_nnz());
@@ -185,16 +189,17 @@ class KernelCaller {
         int n_shared_total = sconf.n_shared + int(sconf.prec_shared);
 
         // template
-        // launch_apply_kernel<StopType, SIMDLEN, n_shared_total, sg_kernel_all>
-        if (num_rows <= 32 && n_shared_total == 10)
+        // launch_apply_kernel<StopType, subgroup_size, n_shared_total,
+        // sg_kernel_all>
+        if (num_rows <= 32 && n_shared_total == 10) {
             launch_apply_kernel<StopType, 16, 10, true>(
                 sconf, logger, prec, mat, b.values, x.values, workspace_data,
                 group_size, shared_size);
-        else if (num_rows <= 256 && n_shared_total == 10)
+        } else if (num_rows <= 256 && n_shared_total == 10) {
             launch_apply_kernel<StopType, 32, 10, true>(
                 sconf, logger, prec, mat, b.values, x.values, workspace_data,
                 group_size, shared_size);
-        else {
+        } else {
             switch (n_shared_total) {
             case 0:
                 launch_apply_kernel<StopType, 32, 0, true>(
diff --git a/dpcpp/solver/batch_bicgstab_kernels.hpp.inc b/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
index 67057f80e53..0b6f4511f02 100644
--- a/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
+++ b/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
@@ -39,6 +39,7 @@ __dpct_inline__ void initialize(
     ValueType& alpha, ValueType* const x_shared_entry,
     ValueType* const r_shared_entry, ValueType* const r_hat_shared_entry,
     ValueType* const p_shared_entry, ValueType* const v_shared_entry,
+    ValueType* const p_hat_shared_entry,
     typename gko::remove_complex<ValueType>& rhs_norm,
     typename gko::remove_complex<ValueType>& res_norm,
     sycl::nd_item<3> item_ct1)
@@ -85,6 +86,7 @@ __dpct_inline__ void initialize(
     for (int iz = tid; iz < num_rows; iz += group_size) {
         r_hat_shared_entry[iz] = r_shared_entry[iz];
         p_shared_entry[iz] = zero<ValueType>();
+        p_hat_shared_entry[iz] = zero<ValueType>();
         v_shared_entry[iz] = zero<ValueType>();
     }
 }
@@ -115,23 +117,24 @@ __dpct_inline__ void compute_alpha(const int num_rows, const ValueType& rho_new,
                                    const ValueType* const v_shared_entry,
                                    ValueType& alpha, sycl::nd_item<3> item_ct1)
 {
+    auto sg = item_ct1.get_sub_group();
+    const auto sg_id = sg.get_group_id();
+    const auto tid = item_ct1.get_local_linear_id();
     if constexpr (sg_kernel_all) {
-        auto sg = item_ct1.get_sub_group();
-        const auto sg_id = sg.get_group_id();
-        const auto tid = item_ct1.get_local_linear_id();
-
         if (sg_id == 0) {
-            single_rhs_compute_dot_sg(num_rows, r_hat_shared_entry,
-                                      v_shared_entry, alpha, item_ct1);
+            single_rhs_compute_conj_dot_sg(num_rows, r_hat_shared_entry,
+                                           v_shared_entry, alpha, item_ct1);
         }
         if (tid == 0) {
             alpha = rho_new / alpha;
         }
         item_ct1.barrier(sycl::access::fence_space::local_space);
     } else {
-        single_rhs_compute_dot(num_rows, r_hat_shared_entry, v_shared_entry,
-                               alpha, item_ct1);
-        alpha = rho_new / alpha;
+        single_rhs_compute_conj_dot(num_rows, r_hat_shared_entry,
+                                    v_shared_entry, alpha, item_ct1);
+        if (tid == 0) {
+            alpha = rho_new / alpha;
+        }
     }
 }
 
@@ -158,26 +161,30 @@ __dpct_inline__ void compute_omega(const int num_rows,
                                    ValueType& temp, ValueType& omega,
                                    sycl::nd_item<3> item_ct1)
 {
+    auto sg = item_ct1.get_sub_group();
+    const auto sg_id = sg.get_group_id();
+    const auto tid = item_ct1.get_local_linear_id();
     if constexpr (sg_kernel_all) {
-        auto sg = item_ct1.get_sub_group();
-        const auto sg_id = sg.get_group_id();
-        const auto tid = item_ct1.get_local_linear_id();
-
-        if (sg_id == 0)
-            single_rhs_compute_dot_sg(num_rows, t_shared_entry, s_shared_entry,
-                                      omega, item_ct1);
-        else if (sg_id == 1)
-            single_rhs_compute_dot_sg(num_rows, t_shared_entry, t_shared_entry,
-                                      temp, item_ct1);
+        if (sg_id == 0) {
+            single_rhs_compute_conj_dot_sg(num_rows, t_shared_entry,
+                                           s_shared_entry, omega, item_ct1);
+        } else if (sg_id == 1) {
+            single_rhs_compute_conj_dot_sg(num_rows, t_shared_entry,
+                                           t_shared_entry, temp, item_ct1);
+        }
         item_ct1.barrier(sycl::access::fence_space::local_space);
-        if (tid == 0) omega /= temp;
+        if (tid == 0) {
+            omega /= temp;
+        }
         item_ct1.barrier(sycl::access::fence_space::local_space);
     } else {
-        single_rhs_compute_dot(num_rows, t_shared_entry, s_shared_entry, omega,
-                               item_ct1);
-        single_rhs_compute_dot(num_rows, t_shared_entry, t_shared_entry, temp,
-                               item_ct1);
-        omega /= temp;
+        single_rhs_compute_conj_dot(num_rows, t_shared_entry, s_shared_entry,
+                                    omega, item_ct1);
+        single_rhs_compute_conj_dot(num_rows, t_shared_entry, t_shared_entry,
+                                    temp, item_ct1);
+        if (tid == 0) {
+            omega /= temp;
+        }
     }
 }
 
@@ -244,33 +251,21 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
     real_type* norms_rhs_sh;
     real_type* norms_res_sh;
 
-    if constexpr (sg_kernel_all) {
-        using tile_value_t = ValueType[5];
-        tile_value_t& values =
-            *sycl::ext::oneapi::group_local_memory_for_overwrite<tile_value_t>(
-                group);
-        using tile_real_t = real_type[2];
-        tile_real_t& reals =
-            *sycl::ext::oneapi::group_local_memory_for_overwrite<tile_real_t>(
-                group);
-        rho_old_sh = &values[0];
-        rho_new_sh = &values[1];
-        alpha_sh = &values[2];
-        omega_sh = &values[3];
-        temp_sh = &values[4];
-        norms_rhs_sh = &reals[0];
-        norms_res_sh = &reals[1];
-    } else {
-        ValueType values[5];
-        real_type reals[2];
-        rho_old_sh = &values[0];
-        rho_new_sh = &values[1];
-        alpha_sh = &values[2];
-        omega_sh = &values[3];
-        temp_sh = &values[4];
-        norms_rhs_sh = &reals[0];
-        norms_res_sh = &reals[1];
-    }
+    using tile_value_t = ValueType[5];
+    tile_value_t& values =
+        *sycl::ext::oneapi::group_local_memory_for_overwrite<tile_value_t>(
+            group);
+    using tile_real_t = real_type[2];
+    tile_real_t& reals =
+        *sycl::ext::oneapi::group_local_memory_for_overwrite<tile_real_t>(
+            group);
+    rho_old_sh = &values[0];
+    rho_new_sh = &values[1];
+    alpha_sh = &values[2];
+    omega_sh = &values[3];
+    temp_sh = &values[4];
+    norms_rhs_sh = &reals[0];
+    norms_res_sh = &reals[1];
     const int gmem_offset =
         batch_id * sconf.gmem_stride_bytes / sizeof(ValueType);
     ValueType* p_hat_sh;
@@ -346,11 +341,12 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
     // compute residual norms
     // r_hat = r
     // p = 0
+    // p_hat = 0
     // v = 0
     initialize<sg_kernel_all>(num_rows, mat_global_entry, b_global_entry,
                               x_global_entry, rho_old_sh[0], omega_sh[0],
-                              alpha_sh[0], x_sh, r_sh, r_hat_sh, p_sh, v_sh,
-                              norms_rhs_sh[0], norms_res_sh[0], item_ct1);
+                              alpha_sh[0], x_sh, r_sh, r_hat_sh, p_sh, p_hat_sh,
+                              v_sh, norms_rhs_sh[0], norms_res_sh[0], item_ct1);
     item_ct1.barrier(sycl::access::fence_space::local_space);
 
     // stopping criterion object
@@ -366,13 +362,13 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
         // rho_new =  < r_hat , r > = (r_hat)' * (r)
         if constexpr (sg_kernel_all) {
             if (sg_id == 0) {
-                single_rhs_compute_dot_sg(num_rows, r_hat_sh, r_sh,
-                                          rho_new_sh[0], item_ct1);
+                single_rhs_compute_conj_dot_sg(num_rows, r_hat_sh, r_sh,
+                                               rho_new_sh[0], item_ct1);
             }
             item_ct1.barrier(sycl::access::fence_space::local_space);
         } else {
-            single_rhs_compute_dot(num_rows, r_hat_sh, r_sh, rho_new_sh[0],
-                                   item_ct1);
+            single_rhs_compute_conj_dot(num_rows, r_hat_sh, r_sh, rho_new_sh[0],
+                                        item_ct1);
         }
 
         // beta = (rho_new / rho_old)*(alpha / omega)
diff --git a/hip/matrix/batch_struct.hip.hpp b/hip/matrix/batch_struct.hip.hpp
index e35f13f1249..ba75b1b634e 100644
--- a/hip/matrix/batch_struct.hip.hpp
+++ b/hip/matrix/batch_struct.hip.hpp
@@ -92,7 +92,8 @@ get_batch_struct(batch::matrix::Dense<ValueType>* const op)
  * Generates an immutable uniform batch struct from a batch of ell matrices.
  */
 template <typename ValueType, typename IndexType>
-inline batch::matrix::ell::uniform_batch<const hip_type<ValueType>, IndexType>
+inline batch::matrix::ell::uniform_batch<const hip_type<ValueType>,
+                                         const IndexType>
 get_batch_struct(const batch::matrix::Ell<ValueType, IndexType>* const op)
 {
     return {as_hip_type(op->get_const_values()),
diff --git a/hip/solver/batch_bicgstab_kernels.hip.cpp b/hip/solver/batch_bicgstab_kernels.hip.cpp
index f769a4fe6f5..a7c3667c8ef 100644
--- a/hip/solver/batch_bicgstab_kernels.hip.cpp
+++ b/hip/solver/batch_bicgstab_kernels.hip.cpp
@@ -85,21 +85,19 @@ template <typename BatchMatrixType>
 int get_num_threads_per_block(std::shared_ptr<const DefaultExecutor> exec,
                               const int num_rows)
 {
-    int nwarps = num_rows / 4;
-    if (nwarps < 2) {
-        nwarps = 2;
-    }
-    const int min_block_size = 2 * config::warp_size;
+    int num_warps = std::max(num_rows / 4, 2);
+    constexpr int warp_sz = static_cast<int>(config::warp_size);
+    const int min_block_size = 2 * warp_sz;
     const int device_max_threads =
-        ((std::max(num_rows, min_block_size)) / config::warp_size) *
-        config::warp_size;
+        ((std::max(num_rows, min_block_size)) / warp_sz) * warp_sz;
     const int num_regs_used_per_thread = 64;
     int max_regs_blk = 0;
     hipDeviceGetAttribute(&max_regs_blk, hipDeviceAttributeMaxRegistersPerBlock,
                           exec->get_device_id());
     const int max_threads_regs = (max_regs_blk / num_regs_used_per_thread);
-    const int max_threads = std::min(max_threads_regs, device_max_threads);
-    return std::min(nwarps * static_cast<int>(config::warp_size), max_threads);
+    int max_threads = std::min(max_threads_regs, device_max_threads);
+    max_threads = max_threads <= 1024 ? max_threads : 1024;
+    return std::max(std::min(num_warps * warp_sz, max_threads), min_block_size);
 }
 
 
@@ -108,12 +106,12 @@ using settings = gko::kernels::batch_bicgstab::settings<T>;
 
 
 template <typename HipValueType>
-class KernelCaller {
+class kernel_caller {
 public:
     using value_type = HipValueType;
 
-    KernelCaller(std::shared_ptr<const DefaultExecutor> exec,
-                 const settings<remove_complex<value_type>> settings)
+    kernel_caller(std::shared_ptr<const DefaultExecutor> exec,
+                  const settings<remove_complex<value_type>> settings)
         : exec_{exec}, settings_{settings}
     {}
 
@@ -147,12 +145,11 @@ class KernelCaller {
         const size_type num_batch_items = mat.num_batch_items;
         constexpr int align_multiple = 8;
         const int padded_num_rows =
-            ((mat.num_rows + align_multiple - 1) / align_multiple) *
-            align_multiple;
+            ceildiv(mat.num_rows, align_multiple) * align_multiple;
         const int shmem_per_blk = exec_->get_max_shared_memory_per_block();
         const int block_size =
             get_num_threads_per_block<BatchMatrixType>(exec_, mat.num_rows);
-        assert(block_size >= 2 * config::warp_size);
+        GKO_ASSERT(block_size >= 2 * config::warp_size);
 
         const size_t prec_size =
             PrecType::dynamic_work_size(padded_num_rows,
@@ -175,62 +172,64 @@ class KernelCaller {
 
         // Template parameters launch_apply_kernel<StopType, n_shared,
         // prec_shared)
-        if (sconf.prec_shared)
-            launch_apply_kernel<StopType, 9, 1>(
+        if (sconf.prec_shared) {
+            launch_apply_kernel<StopType, 9, true>(
                 sconf, logger, prec, mat, b.values, x.values, workspace_data,
                 block_size, shared_size);
-        else {
+        } else {
             switch (sconf.n_shared) {
             case 0:
-                launch_apply_kernel<StopType, 0, 0>(
+                launch_apply_kernel<StopType, 0, false>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, block_size, shared_size);
                 break;
             case 1:
-                launch_apply_kernel<StopType, 1, 0>(
+                launch_apply_kernel<StopType, 1, false>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, block_size, shared_size);
                 break;
             case 2:
-                launch_apply_kernel<StopType, 2, 0>(
+                launch_apply_kernel<StopType, 2, false>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, block_size, shared_size);
                 break;
             case 3:
-                launch_apply_kernel<StopType, 3, 0>(
+                launch_apply_kernel<StopType, 3, false>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, block_size, shared_size);
                 break;
             case 4:
-                launch_apply_kernel<StopType, 4, 0>(
+                launch_apply_kernel<StopType, 4, false>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, block_size, shared_size);
                 break;
             case 5:
-                launch_apply_kernel<StopType, 5, 0>(
+                launch_apply_kernel<StopType, 5, false>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, block_size, shared_size);
                 break;
             case 6:
-                launch_apply_kernel<StopType, 6, 0>(
+                launch_apply_kernel<StopType, 6, false>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, block_size, shared_size);
                 break;
             case 7:
-                launch_apply_kernel<StopType, 7, 0>(
+                launch_apply_kernel<StopType, 7, false>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, block_size, shared_size);
                 break;
             case 8:
-                launch_apply_kernel<StopType, 8, 0>(
+                launch_apply_kernel<StopType, 8, false>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, block_size, shared_size);
                 break;
             case 9:
-                launch_apply_kernel<StopType, 9, 0>(
+                launch_apply_kernel<StopType, 9, false>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, block_size, shared_size);
                 break;
+            default:
+                GKO_NOT_IMPLEMENTED;
             }
         }
 
@@ -254,7 +253,7 @@ void apply(std::shared_ptr<const DefaultExecutor> exec,
 {
     using hip_value_type = hip_type<ValueType>;
     auto dispatcher = batch::solver::create_dispatcher<ValueType>(
-        KernelCaller<hip_value_type>(exec, settings), settings, mat, precon);
+        kernel_caller<hip_value_type>(exec, settings), settings, mat, precon);
     dispatcher.apply(b, x, logdata);
 }
 
diff --git a/reference/matrix/batch_struct.hpp b/reference/matrix/batch_struct.hpp
index bb7680d1493..94beff5c2c2 100644
--- a/reference/matrix/batch_struct.hpp
+++ b/reference/matrix/batch_struct.hpp
@@ -95,7 +95,7 @@ inline batch::matrix::dense::uniform_batch<ValueType> get_batch_struct(
  * Generates an immutable uniform batch struct from a batch of ell matrices.
  */
 template <typename ValueType, typename IndexType>
-inline batch::matrix::ell::uniform_batch<const ValueType, IndexType>
+inline batch::matrix::ell::uniform_batch<const ValueType, const IndexType>
 get_batch_struct(const batch::matrix::Ell<ValueType, IndexType>* const op)
 {
     return {op->get_const_values(),
diff --git a/reference/test/solver/batch_bicgstab_kernels.cpp b/reference/test/solver/batch_bicgstab_kernels.cpp
index c47c80e64dc..311fb40e5ef 100644
--- a/reference/test/solver/batch_bicgstab_kernels.cpp
+++ b/reference/test/solver/batch_bicgstab_kernels.cpp
@@ -108,8 +108,8 @@ TYPED_TEST(BatchBicgstab, SolvesStencilSystem)
                                               this->linear_system);
 
     for (size_t i = 0; i < this->num_batch_items; i++) {
-        ASSERT_LE(res.res_norm->get_const_values()[i] /
-                      this->linear_system.rhs_norm->get_const_values()[i],
+        ASSERT_LE(res.host_res_norm->get_const_values()[i] /
+                      this->linear_system.host_rhs_norm->get_const_values()[i],
                   this->solver_settings.residual_tol);
     }
     GKO_ASSERT_BATCH_MTX_NEAR(res.x, this->linear_system.exact_sol,
@@ -130,9 +130,10 @@ TYPED_TEST(BatchBicgstab, StencilSystemLoggerLogsResidual)
     auto iter_array = res.log_data->iter_counts.get_const_data();
     auto res_log_array = res.log_data->res_norms.get_const_data();
     for (size_t i = 0; i < this->num_batch_items; i++) {
-        ASSERT_LE(res_log_array[i] / this->linear_system.rhs_norm->at(i, 0, 0),
-                  this->solver_settings.residual_tol);
-        ASSERT_NEAR(res_log_array[i], res.res_norm->get_const_values()[i],
+        ASSERT_LE(
+            res_log_array[i] / this->linear_system.host_rhs_norm->at(i, 0, 0),
+            this->solver_settings.residual_tol);
+        ASSERT_NEAR(res_log_array[i], res.host_res_norm->get_const_values()[i],
                     10 * this->eps);
     }
 }
@@ -186,8 +187,8 @@ TYPED_TEST(BatchBicgstab, CanSolveDenseSystem)
 
     GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol * 10);
     for (size_t i = 0; i < num_batch_items; i++) {
-        ASSERT_LE(res.res_norm->get_const_values()[i] /
-                      linear_system.rhs_norm->get_const_values()[i],
+        ASSERT_LE(res.host_res_norm->get_const_values()[i] /
+                      linear_system.host_rhs_norm->get_const_values()[i],
                   tol);
     }
 }
@@ -228,8 +229,8 @@ TYPED_TEST(BatchBicgstab, ApplyLogsResAndIters)
     auto res_norm = logger->get_residual_norm();
     GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol * 50);
     for (size_t i = 0; i < num_batch_items; i++) {
-        auto rel_res_norm = res.res_norm->get_const_values()[i] /
-                            linear_system.rhs_norm->get_const_values()[i];
+        auto rel_res_norm = res.host_res_norm->get_const_values()[i] /
+                            linear_system.host_rhs_norm->get_const_values()[i];
         ASSERT_LE(iter_counts.get_const_data()[i], max_iters);
         EXPECT_LE(res_norm.get_const_data()[i], tol * 50);
         ASSERT_LE(rel_res_norm, tol * 50);
@@ -266,8 +267,8 @@ TYPED_TEST(BatchBicgstab, CanSolveEllSystem)
 
     GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol * 10);
     for (size_t i = 0; i < num_batch_items; i++) {
-        ASSERT_LE(res.res_norm->get_const_values()[i] /
-                      linear_system.rhs_norm->get_const_values()[i],
+        ASSERT_LE(res.host_res_norm->get_const_values()[i] /
+                      linear_system.host_rhs_norm->get_const_values()[i],
                   tol * 10);
     }
 }
@@ -302,6 +303,6 @@ TYPED_TEST(BatchBicgstab, CanSolveDenseHpdSystem)
 
     GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol * 50);
     for (size_t i = 0; i < num_batch_items; i++) {
-        ASSERT_LE(res.res_norm->get_const_values()[i], tol * 50);
+        ASSERT_LE(res.host_res_norm->get_const_values()[i], tol * 50);
     }
 }
diff --git a/test/solver/batch_bicgstab_kernels.cpp b/test/solver/batch_bicgstab_kernels.cpp
index 124dd27640c..ea5e7ec782f 100644
--- a/test/solver/batch_bicgstab_kernels.cpp
+++ b/test/solver/batch_bicgstab_kernels.cpp
@@ -117,8 +117,8 @@ TEST_F(BatchBicgstab, SolvesStencilSystem)
                                               solver_settings, linear_system);
 
     for (size_t i = 0; i < num_batch_items; i++) {
-        ASSERT_LE(res.res_norm->get_const_values()[i] /
-                      linear_system.rhs_norm->get_const_values()[i],
+        ASSERT_LE(res.host_res_norm->get_const_values()[i] /
+                      linear_system.host_rhs_norm->get_const_values()[i],
                   solver_settings.residual_tol);
     }
     GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol);
@@ -141,9 +141,9 @@ TEST_F(BatchBicgstab, StencilSystemLoggerLogsResidual)
 
     auto res_log_array = res.log_data->res_norms.get_const_data();
     for (size_t i = 0; i < num_batch_items; i++) {
-        ASSERT_LE(res_log_array[i] / linear_system.rhs_norm->at(i, 0, 0),
+        ASSERT_LE(res_log_array[i] / linear_system.host_rhs_norm->at(i, 0, 0),
                   solver_settings.residual_tol);
-        ASSERT_NEAR(res_log_array[i], res.res_norm->get_const_values()[i],
+        ASSERT_NEAR(res_log_array[i], res.host_res_norm->get_const_values()[i],
                     10 * tol);
     }
 }
@@ -185,8 +185,8 @@ TEST_F(BatchBicgstab, CanSolve3ptStencilSystem)
 
     GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol * 10);
     for (size_t i = 0; i < num_batch_items; i++) {
-        auto comp_res_norm = res.res_norm->get_const_values()[i] /
-                             linear_system.rhs_norm->get_const_values()[i];
+        auto comp_res_norm = res.host_res_norm->get_const_values()[i] /
+                             linear_system.host_rhs_norm->get_const_values()[i];
         ASSERT_LE(comp_res_norm, tol);
     }
 }
@@ -215,11 +215,11 @@ TEST_F(BatchBicgstab, CanSolveLargeBatchSizeHpdSystem)
                                               &logger->get_residual_norm());
     GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol * 50);
     for (size_t i = 0; i < num_batch_items; i++) {
-        auto comp_res_norm = res.res_norm->get_const_values()[i] /
-                             linear_system.rhs_norm->get_const_values()[i];
+        auto comp_res_norm = res.host_res_norm->get_const_values()[i] /
+                             linear_system.host_rhs_norm->get_const_values()[i];
         ASSERT_LE(iter_counts->get_const_data()[i], max_iters);
         EXPECT_LE(res_norm->get_const_data()[i] /
-                      linear_system.rhs_norm->get_const_values()[i],
+                      linear_system.host_rhs_norm->get_const_values()[i],
                   tol);
         EXPECT_GT(res_norm->get_const_data()[i], real_type{0.0});
         ASSERT_LE(comp_res_norm, tol);
@@ -250,11 +250,11 @@ TEST_F(BatchBicgstab, CanSolveLargeMatrixSizeHpdSystem)
                                               &logger->get_residual_norm());
     GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol * 50);
     for (size_t i = 0; i < num_batch_items; i++) {
-        auto comp_res_norm = res.res_norm->get_const_values()[i] /
-                             linear_system.rhs_norm->get_const_values()[i];
+        auto comp_res_norm = res.host_res_norm->get_const_values()[i] /
+                             linear_system.host_rhs_norm->get_const_values()[i];
         ASSERT_LE(iter_counts->get_const_data()[i], max_iters);
         EXPECT_LE(res_norm->get_const_data()[i] /
-                      linear_system.rhs_norm->get_const_values()[i],
+                      linear_system.host_rhs_norm->get_const_values()[i],
                   tol);
         EXPECT_GT(res_norm->get_const_data()[i], real_type{0.0});
         ASSERT_LE(comp_res_norm, tol);

From 63588d8ff4d6759c37ae297eb7b83f361f6211d6 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Wed, 1 Nov 2023 10:07:23 +0100
Subject: [PATCH 531/583] Use synchronize for error handling

---
 core/base/batch_utilities.hpp             |  3 +-
 cuda/base/exception.cuh                   | 59 -----------------------
 cuda/solver/batch_bicgstab_kernels.cu     |  2 +-
 hip/base/exception.hip.hpp                | 58 ----------------------
 hip/solver/batch_bicgstab_kernels.hip.cpp |  2 +-
 5 files changed, 4 insertions(+), 120 deletions(-)
 delete mode 100644 cuda/base/exception.cuh
 delete mode 100644 hip/base/exception.hip.hpp

diff --git a/core/base/batch_utilities.hpp b/core/base/batch_utilities.hpp
index f05a80322aa..cc92d294173 100644
--- a/core/base/batch_utilities.hpp
+++ b/core/base/batch_utilities.hpp
@@ -201,8 +201,9 @@ std::unique_ptr<OutputType> read(
                            std::forward<TArgs>(create_args)...);
 
     for (size_type b = 0; b < num_batch_items; ++b) {
-        if (data.at(b).size != data.at(0).size)
+        if (data.at(b).size != data.at(0).size) {
             GKO_INVALID_STATE("Incorrect data passed in");
+        }
         tmp->create_view_for_item(b)->read(data[b]);
     }
 
diff --git a/cuda/base/exception.cuh b/cuda/base/exception.cuh
deleted file mode 100644
index ccf74ebdb7b..00000000000
--- a/cuda/base/exception.cuh
+++ /dev/null
@@ -1,59 +0,0 @@
-/*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2023, the Ginkgo authors
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
-1. Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
-notice, this list of conditions and the following disclaimer in the
-documentation and/or other materials provided with the distribution.
-
-3. Neither the name of the copyright holder nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-******************************<GINKGO LICENSE>*******************************/
-
-#ifndef GKO_CUDA_BASE_EXCEPTION_CUH_
-#define GKO_CUDA_BASE_EXCEPTION_CUH_
-
-
-#include <ginkgo/core/base/exception.hpp>
-
-
-namespace gko {
-
-
-#define GKO_CUDA_LAST_IF_ERROR_THROW                                         \
-    {                                                                        \
-        cudaError_t err = cudaGetLastError();                                \
-        if (err != cudaSuccess) {                                            \
-            printf(" Kernel error: %s\n", cudaGetErrorString(err));          \
-            throw gko::CudaError(__FILE__, __LINE__, __func__, err);         \
-        }                                                                    \
-    }                                                                        \
-    static_assert(true,                                                      \
-                  "This assert is used to counter the false positive extra " \
-                  "semi-colon warnings")
-
-
-}  // namespace gko
-
-
-#endif  // GKO_CUDA_BASE_EXCEPTION_CUH_
diff --git a/cuda/solver/batch_bicgstab_kernels.cu b/cuda/solver/batch_bicgstab_kernels.cu
index 1c26a1754ba..dd8c0487c23 100644
--- a/cuda/solver/batch_bicgstab_kernels.cu
+++ b/cuda/solver/batch_bicgstab_kernels.cu
@@ -265,7 +265,7 @@ public:
             }
         }
 
-        GKO_CUDA_LAST_IF_ERROR_THROW;
+        exec_->synchronize();
     }
 
 private:
diff --git a/hip/base/exception.hip.hpp b/hip/base/exception.hip.hpp
deleted file mode 100644
index 366f95bffbb..00000000000
--- a/hip/base/exception.hip.hpp
+++ /dev/null
@@ -1,58 +0,0 @@
-/*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2023, the Ginkgo authors
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
-1. Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
-notice, this list of conditions and the following disclaimer in the
-documentation and/or other materials provided with the distribution.
-
-3. Neither the name of the copyright holder nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-******************************<GINKGO LICENSE>*******************************/
-
-#ifndef GKO_HIP_BASE_EXCEPTION_HIP_HPP_
-#define GKO_HIP_BASE_EXCEPTION_HIP_HPP_
-
-
-#include <ginkgo/core/base/exception.hpp>
-
-
-namespace gko {
-
-
-#define GKO_HIP_LAST_IF_ERROR_THROW                                          \
-    {                                                                        \
-        hipError_t err = hipGetLastError();                                  \
-        if (err != hipSuccess) {                                             \
-            printf(" Hip kernel error: %s\n", hipGetErrorString(err));       \
-            throw gko::HipError(__FILE__, __LINE__, __func__, err);          \
-        }                                                                    \
-    }                                                                        \
-    static_assert(true,                                                      \
-                  "This assert is used to counter the false positive extra " \
-                  "semi-colon warnings")
-
-
-}  // namespace gko
-
-#endif  // GKO_HIP_BASE_EXCEPTION_HIP_HPP_
diff --git a/hip/solver/batch_bicgstab_kernels.hip.cpp b/hip/solver/batch_bicgstab_kernels.hip.cpp
index a7c3667c8ef..a56440a7310 100644
--- a/hip/solver/batch_bicgstab_kernels.hip.cpp
+++ b/hip/solver/batch_bicgstab_kernels.hip.cpp
@@ -233,7 +233,7 @@ class kernel_caller {
             }
         }
 
-        GKO_HIP_LAST_IF_ERROR_THROW;
+        exec_->synchronize();
     }
 
 private:

From fe1d1adfabfacc8f31d172211e618a9d10c7a99f Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Wed, 1 Nov 2023 10:59:38 +0000
Subject: [PATCH 532/583] Format files

Co-authored-by: Pratik Nayak <pratikvn@pm.me>
---
 .../base/batch_multi_vector_kernels.hpp.inc   |   8 +-
 cuda/solver/batch_bicgstab_kernels.cu         |   1 -
 dpcpp/base/batch_multi_vector_kernels.dp.cpp  | 125 +++++++++---------
 dpcpp/solver/batch_bicgstab_kernels.dp.cpp    |   7 +-
 hip/solver/batch_bicgstab_kernels.hip.cpp     |   1 -
 5 files changed, 73 insertions(+), 69 deletions(-)

diff --git a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
index 1e0cb3bbcff..cb157d80fd5 100644
--- a/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
+++ b/common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc
@@ -105,10 +105,10 @@ __global__ __launch_bounds__(
 
 template <typename Group, typename ValueType>
 __device__ __forceinline__ void single_rhs_compute_conj_dot(Group subgroup,
-                                                       const int num_rows,
-                                                       const ValueType* x,
-                                                       const ValueType* y,
-                                                       ValueType& result)
+                                                            const int num_rows,
+                                                            const ValueType* x,
+                                                            const ValueType* y,
+                                                            ValueType& result)
 
 {
     ValueType val = zero<ValueType>();
diff --git a/cuda/solver/batch_bicgstab_kernels.cu b/cuda/solver/batch_bicgstab_kernels.cu
index dd8c0487c23..73a74e52172 100644
--- a/cuda/solver/batch_bicgstab_kernels.cu
+++ b/cuda/solver/batch_bicgstab_kernels.cu
@@ -46,7 +46,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/solver/batch_dispatch.hpp"
 #include "cuda/base/batch_struct.hpp"
 #include "cuda/base/config.hpp"
-#include "cuda/base/exception.cuh"
 #include "cuda/base/kernel_config.cuh"
 #include "cuda/base/thrust.cuh"
 #include "cuda/base/types.hpp"
diff --git a/dpcpp/base/batch_multi_vector_kernels.dp.cpp b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
index c9809696889..51665d26ff9 100644
--- a/dpcpp/base/batch_multi_vector_kernels.dp.cpp
+++ b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
@@ -210,37 +210,41 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
     if (x->get_common_size()[1] == 1) {
         exec->get_queue()->submit([&](sycl::handler& cgh) {
             cgh.parallel_for(
-                sycl_nd_range(grid, block), [=
-            ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                                max_subgroup_size)]] {
-                    auto group = item_ct1.get_group();
-                    auto group_id = group.get_group_linear_id();
-                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                    const auto y_b = batch::extract_batch_item(y_ub, group_id);
-                    const auto res_b =
-                        batch::extract_batch_item(res_ub, group_id);
-                    single_rhs_compute_conj_dot_sg(x_b.num_rows, x_b.values,
-                                                   y_b.values, res_b.values[0],
-                                                   item_ct1);
-                });
+                sycl_nd_range(grid, block),
+                [=](sycl::nd_item<3> item_ct1)
+                    [[sycl::reqd_sub_group_size(max_subgroup_size)]] {
+                        auto group = item_ct1.get_group();
+                        auto group_id = group.get_group_linear_id();
+                        const auto x_b =
+                            batch::extract_batch_item(x_ub, group_id);
+                        const auto y_b =
+                            batch::extract_batch_item(y_ub, group_id);
+                        const auto res_b =
+                            batch::extract_batch_item(res_ub, group_id);
+                        single_rhs_compute_conj_dot_sg(
+                            x_b.num_rows, x_b.values, y_b.values,
+                            res_b.values[0], item_ct1);
+                    });
         });
     } else {
         // TODO: Remove reqd_sub_group size and use sycl::reduce_over_group
         exec->get_queue()->submit([&](sycl::handler& cgh) {
             cgh.parallel_for(
-                sycl_nd_range(grid, block), [=
-            ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                                max_subgroup_size)]] {
-                    auto group = item_ct1.get_group();
-                    auto group_id = group.get_group_linear_id();
-                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                    const auto y_b = batch::extract_batch_item(y_ub, group_id);
-                    const auto res_b =
-                        batch::extract_batch_item(res_ub, group_id);
-                    compute_gen_dot_product_kernel(
-                        x_b, y_b, res_b, item_ct1,
-                        [](auto val) { return val; });
-                });
+                sycl_nd_range(grid, block),
+                [=](sycl::nd_item<3> item_ct1)
+                    [[sycl::reqd_sub_group_size(max_subgroup_size)]] {
+                        auto group = item_ct1.get_group();
+                        auto group_id = group.get_group_linear_id();
+                        const auto x_b =
+                            batch::extract_batch_item(x_ub, group_id);
+                        const auto y_b =
+                            batch::extract_batch_item(y_ub, group_id);
+                        const auto res_b =
+                            batch::extract_batch_item(res_ub, group_id);
+                        compute_gen_dot_product_kernel(
+                            x_b, y_b, res_b, item_ct1,
+                            [](auto val) { return val; });
+                    });
         });
     }
 }
@@ -274,18 +278,19 @@ void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
 
     exec->get_queue()->submit([&](sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=
-        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                            max_subgroup_size)]] {
-                auto group = item_ct1.get_group();
-                auto group_id = group.get_group_linear_id();
-                const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                const auto y_b = batch::extract_batch_item(y_ub, group_id);
-                const auto res_b = batch::extract_batch_item(res_ub, group_id);
-                compute_gen_dot_product_kernel(
-                    x_b, y_b, res_b, item_ct1,
-                    [](auto val) { return conj(val); });
-            });
+            sycl_nd_range(grid, block),
+            [=](sycl::nd_item<3> item_ct1)
+                [[sycl::reqd_sub_group_size(max_subgroup_size)]] {
+                    auto group = item_ct1.get_group();
+                    auto group_id = group.get_group_linear_id();
+                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
+                    const auto y_b = batch::extract_batch_item(y_ub, group_id);
+                    const auto res_b =
+                        batch::extract_batch_item(res_ub, group_id);
+                    compute_gen_dot_product_kernel(
+                        x_b, y_b, res_b, item_ct1,
+                        [](auto val) { return conj(val); });
+                });
     });
 }
 
@@ -317,31 +322,33 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
     if (x->get_common_size()[1] == 1) {
         exec->get_queue()->submit([&](sycl::handler& cgh) {
             cgh.parallel_for(
-                sycl_nd_range(grid, block), [=
-            ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                                max_subgroup_size)]] {
-                    auto group = item_ct1.get_group();
-                    auto group_id = group.get_group_linear_id();
-                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                    const auto res_b =
-                        batch::extract_batch_item(res_ub, group_id);
-                    single_rhs_compute_norm2_sg(x_b.num_rows, x_b.values,
-                                                res_b.values[0], item_ct1);
-                });
+                sycl_nd_range(grid, block),
+                [=](sycl::nd_item<3> item_ct1)
+                    [[sycl::reqd_sub_group_size(max_subgroup_size)]] {
+                        auto group = item_ct1.get_group();
+                        auto group_id = group.get_group_linear_id();
+                        const auto x_b =
+                            batch::extract_batch_item(x_ub, group_id);
+                        const auto res_b =
+                            batch::extract_batch_item(res_ub, group_id);
+                        single_rhs_compute_norm2_sg(x_b.num_rows, x_b.values,
+                                                    res_b.values[0], item_ct1);
+                    });
         });
     } else {
         exec->get_queue()->submit([&](sycl::handler& cgh) {
             cgh.parallel_for(
-                sycl_nd_range(grid, block), [=
-            ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                                max_subgroup_size)]] {
-                    auto group = item_ct1.get_group();
-                    auto group_id = group.get_group_linear_id();
-                    const auto x_b = batch::extract_batch_item(x_ub, group_id);
-                    const auto res_b =
-                        batch::extract_batch_item(res_ub, group_id);
-                    compute_norm2_kernel(x_b, res_b, item_ct1);
-                });
+                sycl_nd_range(grid, block),
+                [=](sycl::nd_item<3> item_ct1)
+                    [[sycl::reqd_sub_group_size(max_subgroup_size)]] {
+                        auto group = item_ct1.get_group();
+                        auto group_id = group.get_group_linear_id();
+                        const auto x_b =
+                            batch::extract_batch_item(x_ub, group_id);
+                        const auto res_b =
+                            batch::extract_batch_item(res_ub, group_id);
+                        compute_norm2_kernel(x_b, res_b, item_ct1);
+                    });
         });
     }
 }
diff --git a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
index 52f794cfc0e..fa47352457e 100644
--- a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
+++ b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
@@ -117,10 +117,9 @@ class KernelCaller {
                 slm_values(sycl::range<1>(shared_size), cgh);
 
             cgh.parallel_for(
-                sycl_nd_range(grid, block), [=
-            ](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(
-                                                subgroup_size)]] [
-                                                [intel::kernel_args_restrict]] {
+                sycl_nd_range(grid, block),
+                [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(
+                    subgroup_size)]] [[intel::kernel_args_restrict]] {
                     auto batch_id = item_ct1.get_group_linear_id();
                     const auto mat_global_entry =
                         gko::batch::matrix::extract_batch_item(mat, batch_id);
diff --git a/hip/solver/batch_bicgstab_kernels.hip.cpp b/hip/solver/batch_bicgstab_kernels.hip.cpp
index a56440a7310..e042b6137d8 100644
--- a/hip/solver/batch_bicgstab_kernels.hip.cpp
+++ b/hip/solver/batch_bicgstab_kernels.hip.cpp
@@ -47,7 +47,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/solver/batch_dispatch.hpp"
 #include "hip/base/batch_struct.hip.hpp"
 #include "hip/base/config.hip.hpp"
-#include "hip/base/exception.hip.hpp"
 #include "hip/base/math.hip.hpp"
 #include "hip/base/thrust.hip.hpp"
 #include "hip/base/types.hip.hpp"

From 1dcc6a7fe05aa89ff704680123001d2aad7bc7b8 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Wed, 1 Nov 2023 16:09:35 +0100
Subject: [PATCH 533/583] Add scoped cuda shmem config

---
 .../{kernel_config.cuh => kernel_config.hpp}  | 62 ++++++++++++++-----
 cuda/solver/batch_bicgstab_kernels.cu         |  6 +-
 2 files changed, 50 insertions(+), 18 deletions(-)
 rename cuda/base/{kernel_config.cuh => kernel_config.hpp} (53%)

diff --git a/cuda/base/kernel_config.cuh b/cuda/base/kernel_config.hpp
similarity index 53%
rename from cuda/base/kernel_config.cuh
rename to cuda/base/kernel_config.hpp
index 6280753bcda..1fbc0d6e4d8 100644
--- a/cuda/base/kernel_config.cuh
+++ b/cuda/base/kernel_config.hpp
@@ -30,36 +30,66 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#ifndef GKO_CUDA_BASE_KERNEL_CONFIG_CUH_
-#define GKO_CUDA_BASE_KERNEL_CONFIG_CUH_
+#ifndef GKO_CUDA_BASE_KERNEL_CONFIG_HPP_
+#define GKO_CUDA_BASE_KERNEL_CONFIG_HPP_
 
 
-#include "cuda/base/math.hpp"
+#include <cuda_runtime.h>
 
 
 namespace gko {
 namespace kernels {
 namespace cuda {
+namespace detail {
 
 
-/**
- * Set shared memory bank configuration.
- *
- * \tparam ValueType  The scalar type used for computations.
- */
 template <typename ValueType>
-inline void configure_shared_memory_banks()
-{
-    if (sizeof(ValueType) == 4) {
-        cudaDeviceSetSharedMemConfig(cudaSharedMemBankSizeFourByte);
-    } else if (sizeof(ValueType) % 8 == 0) {
-        cudaDeviceSetSharedMemConfig(cudaSharedMemBankSizeEightByte);
+class shared_memory_config_guard {
+public:
+    using value_type = ValueType;
+    shared_memory_config_guard() : original_config_{}
+    {
+        GKO_ASSERT_NO_CUDA_ERRORS(
+            cudaDeviceGetSharedMemConfig(&original_config_));
+
+        if (sizeof(value_type) == 4) {
+            GKO_ASSERT_NO_CUDA_ERRORS(
+                cudaDeviceSetSharedMemConfig(cudaSharedMemBankSizeFourByte));
+        } else if (sizeof(value_type) % 8 == 0) {
+            GKO_ASSERT_NO_CUDA_ERRORS(
+                cudaDeviceSetSharedMemConfig(cudaSharedMemBankSizeEightByte));
+        } else {
+            GKO_ASSERT_NO_CUDA_ERRORS(
+                cudaDeviceSetSharedMemConfig(cudaSharedMemBankSizeDefault));
+        }
+    }
+
+
+    ~shared_memory_config_guard()
+    {
+        auto error_code = cudaDeviceSetSharedMemConfig(original_config_);
+        if (error_code != cudaSuccess) {
+#if GKO_VERBOSE_LEVEL >= 1
+            std::cerr << "Unrecoverable CUDA error while resetting the "
+                         "shared memory config to "
+                      << original_config_ << " in " << __func__ << ": "
+                      << cudaGetErrorName(error_code) << ": "
+                      << cudaGetErrorString(error_code) << std::endl
+                      << "Exiting program" << std::endl;
+#endif  // GKO_VERBOSE_LEVEL >= 1
+            std::exit(error_code);
+        }
     }
-}
+
+private:
+    cudaSharedMemConfig original_config_;
+};
 
 
+}  // namespace detail
 }  // namespace cuda
 }  // namespace kernels
 }  // namespace gko
 
-#endif  // GKO_CUDA_BASE_KERNEL_CONFIG_CUH_
+
+#endif  // GKO_CUDA_BASE_KERNEL_CONFIG_HPP_
diff --git a/cuda/solver/batch_bicgstab_kernels.cu b/cuda/solver/batch_bicgstab_kernels.cu
index 73a74e52172..16df7e7e55e 100644
--- a/cuda/solver/batch_bicgstab_kernels.cu
+++ b/cuda/solver/batch_bicgstab_kernels.cu
@@ -46,7 +46,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/solver/batch_dispatch.hpp"
 #include "cuda/base/batch_struct.hpp"
 #include "cuda/base/config.hpp"
-#include "cuda/base/kernel_config.cuh"
+#include "cuda/base/kernel_config.hpp"
 #include "cuda/base/thrust.cuh"
 #include "cuda/base/types.hpp"
 #include "cuda/components/cooperative_groups.cuh"
@@ -172,7 +172,9 @@ public:
         constexpr int align_multiple = 8;
         const int padded_num_rows =
             ceildiv(mat.num_rows, align_multiple) * align_multiple;
-        gko::kernels::cuda::configure_shared_memory_banks<value_type>();
+        auto shem_guard =
+            gko::kernels::cuda::detail::shared_memory_config_guard<
+                value_type>();
         const int shmem_per_blk =
             get_max_dynamic_shared_memory<StopType, PrecType, LogType,
                                           BatchMatrixType, value_type>(exec_);

From 4e13a72c57338be9e5d02c9cea32ec18c45a9b7e Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Wed, 1 Nov 2023 17:45:10 +0100
Subject: [PATCH 534/583] move max_shmem query to internal

---
 cuda/base/executor.cpp                    |  3 ---
 hip/base/executor.hip.cpp                 |  3 ---
 hip/solver/batch_bicgstab_kernels.hip.cpp | 10 +++++++---
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/cuda/base/executor.cpp b/cuda/base/executor.cpp
index 01880127641..f296fb9da86 100644
--- a/cuda/base/executor.cpp
+++ b/cuda/base/executor.cpp
@@ -258,9 +258,6 @@ void CudaExecutor::set_gpu_property()
             kernels::cuda::config::warp_size;
         this->get_exec_info().max_subgroup_size =
             kernels::cuda::config::warp_size;
-        GKO_ASSERT_NO_CUDA_ERRORS(cudaDeviceGetAttribute(
-            &this->get_exec_info().max_shared_memory_per_workgroup,
-            cudaDevAttrMaxSharedMemoryPerBlock, this->get_device_id()));
     }
 }
 
diff --git a/hip/base/executor.hip.cpp b/hip/base/executor.hip.cpp
index 489e9b28ff9..8d175c0e424 100644
--- a/hip/base/executor.hip.cpp
+++ b/hip/base/executor.hip.cpp
@@ -262,9 +262,6 @@ void HipExecutor::set_gpu_property()
 #endif  // GINKGO_HIP_PLATFORM_NVCC
         this->get_exec_info().max_subgroup_size =
             kernels::hip::config::warp_size;
-        GKO_ASSERT_NO_HIP_ERRORS(hipDeviceGetAttribute(
-            &this->get_exec_info().max_shared_memory_per_workgroup,
-            hipDeviceAttributeMaxSharedMemoryPerBlock, this->get_device_id()));
     }
 }
 
diff --git a/hip/solver/batch_bicgstab_kernels.hip.cpp b/hip/solver/batch_bicgstab_kernels.hip.cpp
index e042b6137d8..4a04317ca9d 100644
--- a/hip/solver/batch_bicgstab_kernels.hip.cpp
+++ b/hip/solver/batch_bicgstab_kernels.hip.cpp
@@ -91,8 +91,9 @@ int get_num_threads_per_block(std::shared_ptr<const DefaultExecutor> exec,
         ((std::max(num_rows, min_block_size)) / warp_sz) * warp_sz;
     const int num_regs_used_per_thread = 64;
     int max_regs_blk = 0;
-    hipDeviceGetAttribute(&max_regs_blk, hipDeviceAttributeMaxRegistersPerBlock,
-                          exec->get_device_id());
+    GKO_ASSERT_NO_HIP_ERRORS(hipDeviceGetAttribute(
+        &max_regs_blk, hipDeviceAttributeMaxRegistersPerBlock,
+        exec->get_device_id()));
     const int max_threads_regs = (max_regs_blk / num_regs_used_per_thread);
     int max_threads = std::min(max_threads_regs, device_max_threads);
     max_threads = max_threads <= 1024 ? max_threads : 1024;
@@ -145,7 +146,10 @@ class kernel_caller {
         constexpr int align_multiple = 8;
         const int padded_num_rows =
             ceildiv(mat.num_rows, align_multiple) * align_multiple;
-        const int shmem_per_blk = exec_->get_max_shared_memory_per_block();
+        int shmem_per_blk = 0;
+        GKO_ASSERT_NO_HIP_ERRORS(hipDeviceGetAttribute(
+            &shmem_per_blk, hipDeviceAttributeMaxSharedMemoryPerBlock,
+            exec_->get_device_id()));
         const int block_size =
             get_num_threads_per_block<BatchMatrixType>(exec_, mat.num_rows);
         GKO_ASSERT(block_size >= 2 * config::warp_size);

From 1df892be57ac8e38e6154718dc1024161363642a Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Thu, 2 Nov 2023 16:08:21 +0100
Subject: [PATCH 535/583] Update size_type in tests

---
 test/matrix/batch_dense_kernels.cpp | 6 +++---
 test/matrix/batch_ell_kernels.cpp   | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/test/matrix/batch_dense_kernels.cpp b/test/matrix/batch_dense_kernels.cpp
index 1f3967b0eb8..fa75a8f61e4 100644
--- a/test/matrix/batch_dense_kernels.cpp
+++ b/test/matrix/batch_dense_kernels.cpp
@@ -71,9 +71,9 @@ class Dense : public CommonTestFixture {
             std::normal_distribution<>(-1.0, 1.0), rand_engine, ref);
     }
 
-    void set_up_apply_data(int num_rows, gko::size_type num_vecs = 1)
+    void set_up_apply_data(gko::size_type num_rows, gko::size_type num_vecs = 1)
     {
-        const int num_cols = 32;
+        const gko::size_type num_cols = 32;
         mat = gen_mtx<BMtx>(batch_size, num_rows, num_cols);
         y = gen_mtx<BMVec>(batch_size, num_cols, num_vecs);
         alpha = gen_mtx<BMVec>(batch_size, 1, 1);
@@ -91,7 +91,7 @@ class Dense : public CommonTestFixture {
 
     std::default_random_engine rand_engine;
 
-    const size_t batch_size = 11;
+    const gko::size_type batch_size = 11;
     std::unique_ptr<BMtx> mat;
     std::unique_ptr<BMVec> y;
     std::unique_ptr<BMVec> alpha;
diff --git a/test/matrix/batch_ell_kernels.cpp b/test/matrix/batch_ell_kernels.cpp
index 572f47ba47d..7a4c6558c5d 100644
--- a/test/matrix/batch_ell_kernels.cpp
+++ b/test/matrix/batch_ell_kernels.cpp
@@ -87,8 +87,8 @@ class Ell : public CommonTestFixture {
     void set_up_apply_data(gko::size_type num_vecs = 1,
                            int num_elems_per_row = 5)
     {
-        const int num_rows = 252;
-        const int num_cols = 32;
+        const gko::size_type num_rows = 252;
+        const gko::size_type num_cols = 32;
         GKO_ASSERT(num_elems_per_row <= num_cols);
         mat = gen_mtx<BMtx>(batch_size, num_rows, num_cols, num_elems_per_row);
         y = gen_mvec(batch_size, num_cols, num_vecs);
@@ -107,7 +107,7 @@ class Ell : public CommonTestFixture {
 
     std::ranlux48 rand_engine;
 
-    const size_t batch_size = 11;
+    const gko::size_type batch_size = 11;
     std::unique_ptr<BMtx> mat;
     std::unique_ptr<BMVec> y;
     std::unique_ptr<BMVec> alpha;

From 16800e05d25a2d6331ccb2de5988ff8aadefaed2 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Thu, 2 Nov 2023 16:24:39 +0100
Subject: [PATCH 536/583] Update contributors.txt

Co-authored-by: Phuong Nguyen <phuong.nguyen@icl.utk.edu>
---
 contributors.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/contributors.txt b/contributors.txt
index 1f1259bc082..aec120d93dd 100644
--- a/contributors.txt
+++ b/contributors.txt
@@ -20,6 +20,7 @@ Kashi Aditya <aditya.kashi@kit.edu> Karlsruhe Institute of Technology
 Koch Marcel <marcel.koch@kit.edu> Karlsruhe Institute of Technology
 Maier Matthias <matthias@43-1.org> Texas A&M University
 Nayak Pratik <pratik.nayak@kit.edu> Karlsruhe Institute of Technology
+Nguyen Phuong <phuong.nguyen@icl.utk.edu> University of Tennessee, Knoxville
 Olenik Gregor <go@hpsim.de> HPSim
 Ribizel Tobias <mail@upsj.de> Karlsruhe Institute of Technology
 Riemer Lukas <lksriemer@gmail.com> Karlsruhe Institute of Technology

From b3dfe310ac7e4bba6f305baa4b41f0bf68db6fbf Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Thu, 2 Nov 2023 16:58:15 +0100
Subject: [PATCH 537/583] review updates

Co-authored-by: Yu-Hsiang Tsai <yhmtsai@gmail.com>
---
 dpcpp/solver/batch_bicgstab_kernels.dp.cpp  | 14 ++++++++------
 dpcpp/solver/batch_bicgstab_kernels.hpp.inc |  2 ++
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
index fa47352457e..978ab94d9c4 100644
--- a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
+++ b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
@@ -77,7 +77,8 @@ template <typename T>
 using settings = gko::kernels::batch_bicgstab::settings<T>;
 
 
-__dpct_inline__ int get_group_size(int value, int subgroup_size = 32)
+__dpct_inline__ int get_group_size(int value,
+                                   int subgroup_size = config::warp_size)
 {
     int num_sg = ceildiv(value, subgroup_size);
     return num_sg * subgroup_size;
@@ -117,9 +118,10 @@ class KernelCaller {
                 slm_values(sycl::range<1>(shared_size), cgh);
 
             cgh.parallel_for(
-                sycl_nd_range(grid, block),
-                [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(
-                    subgroup_size)]] [[intel::kernel_args_restrict]] {
+                sycl_nd_range(grid, block), [=
+            ](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(
+                                                subgroup_size)]] [
+                                                [intel::kernel_args_restrict]] {
                     auto batch_id = item_ct1.get_group_linear_id();
                     const auto mat_global_entry =
                         gko::batch::matrix::extract_batch_item(mat, batch_id);
@@ -163,7 +165,7 @@ class KernelCaller {
         // alpha, omega, temp and for reduce_over_group
         // If the value available is negative, then set it to 0
         const int static_var_mem =
-            (group_size + 5) * sizeof(ValueType) - 2 * sizeof(real_type);
+            (group_size + 5) * sizeof(ValueType) + 2 * sizeof(real_type);
         int shmem_per_blk = std::max(
             static_cast<int>(
                 device.get_info<sycl::info::device::local_mem_size>()) -
@@ -191,7 +193,7 @@ class KernelCaller {
         // launch_apply_kernel<StopType, subgroup_size, n_shared_total,
         // sg_kernel_all>
         if (num_rows <= 32 && n_shared_total == 10) {
-            launch_apply_kernel<StopType, 16, 10, true>(
+            launch_apply_kernel<StopType, 32, 10, true>(
                 sconf, logger, prec, mat, b.values, x.values, workspace_data,
                 group_size, shared_size);
         } else if (num_rows <= 256 && n_shared_total == 10) {
diff --git a/dpcpp/solver/batch_bicgstab_kernels.hpp.inc b/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
index 0b6f4511f02..e71eb060afa 100644
--- a/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
+++ b/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
@@ -259,6 +259,8 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
     tile_real_t& reals =
         *sycl::ext::oneapi::group_local_memory_for_overwrite<tile_real_t>(
             group);
+    // ValueType values[5];
+    // real_type reals[2];
     rho_old_sh = &values[0];
     rho_new_sh = &values[1];
     alpha_sh = &values[2];

From 8ec49223628b3b35fcc64e32e76e058511d2ca74 Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Thu, 2 Nov 2023 16:02:16 +0000
Subject: [PATCH 538/583] Format files

Co-authored-by: Pratik Nayak <pratikvn@pm.me>
---
 dpcpp/solver/batch_bicgstab_kernels.dp.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
index 978ab94d9c4..839cb9e0976 100644
--- a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
+++ b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
@@ -118,10 +118,9 @@ class KernelCaller {
                 slm_values(sycl::range<1>(shared_size), cgh);
 
             cgh.parallel_for(
-                sycl_nd_range(grid, block), [=
-            ](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(
-                                                subgroup_size)]] [
-                                                [intel::kernel_args_restrict]] {
+                sycl_nd_range(grid, block),
+                [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(
+                    subgroup_size)]] [[intel::kernel_args_restrict]] {
                     auto batch_id = item_ct1.get_group_linear_id();
                     const auto mat_global_entry =
                         gko::batch::matrix::extract_batch_item(mat, batch_id);

From 690c4eacaeb6522d6f15043c7f1c5963b1733799 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Thu, 2 Nov 2023 17:54:26 +0100
Subject: [PATCH 539/583] dpcpp group size and doc fixes

---
 common/cuda_hip/log/batch_logger.hpp.inc     | 2 +-
 core/solver/batch_bicgstab_kernels.hpp       | 2 +-
 dpcpp/solver/batch_bicgstab_kernels.dp.cpp   | 7 ++++++-
 hip/solver/batch_bicgstab_kernels.hip.cpp    | 3 +++
 include/ginkgo/core/stop/batch_stop_enum.hpp | 2 +-
 test/solver/batch_bicgstab_kernels.cpp       | 8 ++++----
 6 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/common/cuda_hip/log/batch_logger.hpp.inc b/common/cuda_hip/log/batch_logger.hpp.inc
index 7a4d59b67e9..e8cf77960ef 100644
--- a/common/cuda_hip/log/batch_logger.hpp.inc
+++ b/common/cuda_hip/log/batch_logger.hpp.inc
@@ -36,7 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 template <typename RealType>
 class SimpleFinalLogger final {
 public:
-    using real_type = remove_complex<RealType>;
+    using real_type = RealType;
 
     SimpleFinalLogger(real_type* const batch_residuals, int* const batch_iters)
         : final_residuals_{batch_residuals}, final_iters_{batch_iters}
diff --git a/core/solver/batch_bicgstab_kernels.hpp b/core/solver/batch_bicgstab_kernels.hpp
index 6f5de2e770c..32291562afd 100644
--- a/core/solver/batch_bicgstab_kernels.hpp
+++ b/core/solver/batch_bicgstab_kernels.hpp
@@ -192,7 +192,7 @@ storage_config compute_shared_storage(const int available_shared_mem,
         sconf.prec_shared = true;
         rem_shared -= prec_storage;
     }
-    // Set the global storage config and align to 32 bytes.
+    // Set the global storage config and align to align_bytes bytes.
     set_gmem_stride_bytes<align_bytes>(sconf, vec_size, prec_storage);
     return sconf;
 }
diff --git a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
index 839cb9e0976..9da926c7c58 100644
--- a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
+++ b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
@@ -154,11 +154,16 @@ class KernelCaller {
         GKO_ASSERT(num_rhs == 1);
 
         auto device = exec_->get_queue()->get_device();
-        auto group_size =
+        auto max_group_size =
+            device.get_info<sycl::info::device::max_work_group_size>();
+        int group_size =
             device.get_info<sycl::info::device::max_work_group_size>();
         if (group_size > num_rows) {
             group_size = get_group_size(num_rows);
         };
+        group_size = std::min(
+            std::max(group_size, static_cast<int>(2 * config::warp_size)),
+            static_cast<int>(max_group_size));
 
         // reserve 5 for intermediate rho-s, norms,
         // alpha, omega, temp and for reduce_over_group
diff --git a/hip/solver/batch_bicgstab_kernels.hip.cpp b/hip/solver/batch_bicgstab_kernels.hip.cpp
index 4a04317ca9d..fbd6543574f 100644
--- a/hip/solver/batch_bicgstab_kernels.hip.cpp
+++ b/hip/solver/batch_bicgstab_kernels.hip.cpp
@@ -89,6 +89,9 @@ int get_num_threads_per_block(std::shared_ptr<const DefaultExecutor> exec,
     const int min_block_size = 2 * warp_sz;
     const int device_max_threads =
         ((std::max(num_rows, min_block_size)) / warp_sz) * warp_sz;
+    // This value has been taken from RocM docs. This is the number of registers
+    // that maximizes the occupancy on an AMD GPU (MI200). HIP does not have an
+    // API to query the number of registers a function uses.
     const int num_regs_used_per_thread = 64;
     int max_regs_blk = 0;
     GKO_ASSERT_NO_HIP_ERRORS(hipDeviceGetAttribute(
diff --git a/include/ginkgo/core/stop/batch_stop_enum.hpp b/include/ginkgo/core/stop/batch_stop_enum.hpp
index 1694dd164d9..3c463b8730c 100644
--- a/include/ginkgo/core/stop/batch_stop_enum.hpp
+++ b/include/ginkgo/core/stop/batch_stop_enum.hpp
@@ -48,7 +48,7 @@ namespace stop {
  *
  * With the `relative` tolerance type, the solver
  * convergence criteria checks against the relative residual norm
- * ($||r|| \leq ||b|| \times \tau$, where $||b||$$ is the L2 norm of the rhs).
+ * ($||r|| \leq ||b|| \times \tau$, where $||b||$ is the L2 norm of the rhs).
  *
  * @note the computed residual norm, $||r||$ may be implicit or explicit
  * depending on the solver algorithm.
diff --git a/test/solver/batch_bicgstab_kernels.cpp b/test/solver/batch_bicgstab_kernels.cpp
index ea5e7ec782f..f99e7a469d0 100644
--- a/test/solver/batch_bicgstab_kernels.cpp
+++ b/test/solver/batch_bicgstab_kernels.cpp
@@ -198,7 +198,7 @@ TEST_F(BatchBicgstab, CanSolveLargeBatchSizeHpdSystem)
     const int num_rows = 102;
     const int num_rhs = 1;
     const real_type tol = 1e-5;
-    const int max_iters = num_rows;
+    const int max_iters = num_rows * 2;
     std::shared_ptr<Logger> logger = Logger::create();
     auto mat = gko::share(gko::test::generate_diag_dominant_batch_matrix<Mtx>(
         exec, num_batch_items, num_rows, true));
@@ -213,7 +213,7 @@ TEST_F(BatchBicgstab, CanSolveLargeBatchSizeHpdSystem)
                                                  &logger->get_num_iterations());
     auto res_norm = gko::make_temporary_clone(exec->get_master(),
                                               &logger->get_residual_norm());
-    GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol * 50);
+    GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol * 500);
     for (size_t i = 0; i < num_batch_items; i++) {
         auto comp_res_norm = res.host_res_norm->get_const_values()[i] /
                              linear_system.host_rhs_norm->get_const_values()[i];
@@ -233,7 +233,7 @@ TEST_F(BatchBicgstab, CanSolveLargeMatrixSizeHpdSystem)
     const int num_rows = 1025;
     const int num_rhs = 1;
     const real_type tol = 1e-5;
-    const int max_iters = num_rows;
+    const int max_iters = num_rows * 2;
     std::shared_ptr<Logger> logger = Logger::create();
     auto mat = gko::share(gko::test::generate_diag_dominant_batch_matrix<Mtx>(
         exec, num_batch_items, num_rows, true));
@@ -248,7 +248,7 @@ TEST_F(BatchBicgstab, CanSolveLargeMatrixSizeHpdSystem)
                                                  &logger->get_num_iterations());
     auto res_norm = gko::make_temporary_clone(exec->get_master(),
                                               &logger->get_residual_norm());
-    GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol * 50);
+    GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol * 500);
     for (size_t i = 0; i < num_batch_items; i++) {
         auto comp_res_norm = res.host_res_norm->get_const_values()[i] /
                              linear_system.host_rhs_norm->get_const_values()[i];

From d8684e93687796e7b42a6e2c78de652245450e48 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Fri, 3 Nov 2023 12:10:35 +0100
Subject: [PATCH 540/583] use global_and_local barrier

---
 dpcpp/solver/batch_bicgstab_kernels.hpp.inc | 40 +++++++++++----------
 1 file changed, 21 insertions(+), 19 deletions(-)

diff --git a/dpcpp/solver/batch_bicgstab_kernels.hpp.inc b/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
index e71eb060afa..3efb93e664b 100644
--- a/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
+++ b/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
@@ -60,13 +60,13 @@ __dpct_inline__ void initialize(
         x_shared_entry[iz] = x_global_entry[iz];
         r_shared_entry[iz] = b_global_entry[iz];
     }
-    item_ct1.barrier(sycl::access::fence_space::local_space);
+    item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
     // r = b - A*x
     advanced_apply_kernel(static_cast<ValueType>(-1.0), mat_global_entry,
                           x_shared_entry, static_cast<ValueType>(1.0),
                           r_shared_entry, item_ct1);
-    item_ct1.barrier(sycl::access::fence_space::local_space);
+    item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
     if constexpr (sg_kernel_all) {
         if (sg_id == 0) {
@@ -80,7 +80,7 @@ __dpct_inline__ void initialize(
         single_rhs_compute_norm2(num_rows, r_shared_entry, res_norm, item_ct1);
         single_rhs_compute_norm2(num_rows, b_global_entry, rhs_norm, item_ct1);
     }
-    item_ct1.barrier(sycl::access::fence_space::local_space);
+    item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
 
     for (int iz = tid; iz < num_rows; iz += group_size) {
@@ -125,10 +125,11 @@ __dpct_inline__ void compute_alpha(const int num_rows, const ValueType& rho_new,
             single_rhs_compute_conj_dot_sg(num_rows, r_hat_shared_entry,
                                            v_shared_entry, alpha, item_ct1);
         }
+        item_ct1.barrier(sycl::access::fence_space::global_and_local);
         if (tid == 0) {
             alpha = rho_new / alpha;
         }
-        item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(sycl::access::fence_space::global_and_local);
     } else {
         single_rhs_compute_conj_dot(num_rows, r_hat_shared_entry,
                                     v_shared_entry, alpha, item_ct1);
@@ -172,11 +173,11 @@ __dpct_inline__ void compute_omega(const int num_rows,
             single_rhs_compute_conj_dot_sg(num_rows, t_shared_entry,
                                            t_shared_entry, temp, item_ct1);
         }
-        item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(sycl::access::fence_space::global_and_local);
         if (tid == 0) {
             omega /= temp;
         }
-        item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(sycl::access::fence_space::global_and_local);
     } else {
         single_rhs_compute_conj_dot(num_rows, t_shared_entry, s_shared_entry,
                                     omega, item_ct1);
@@ -349,7 +350,7 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
                               x_global_entry, rho_old_sh[0], omega_sh[0],
                               alpha_sh[0], x_sh, r_sh, r_hat_sh, p_sh, p_hat_sh,
                               v_sh, norms_rhs_sh[0], norms_res_sh[0], item_ct1);
-    item_ct1.barrier(sycl::access::fence_space::local_space);
+    item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
     // stopping criterion object
     StopType stop(tol, norms_rhs_sh);
@@ -367,7 +368,7 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
                 single_rhs_compute_conj_dot_sg(num_rows, r_hat_sh, r_sh,
                                                rho_new_sh[0], item_ct1);
             }
-            item_ct1.barrier(sycl::access::fence_space::local_space);
+            item_ct1.barrier(sycl::access::fence_space::global_and_local);
         } else {
             single_rhs_compute_conj_dot(num_rows, r_hat_sh, r_sh, rho_new_sh[0],
                                         item_ct1);
@@ -377,24 +378,24 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
         // p = r + beta*(p - omega * v)
         update_p(num_rows, rho_new_sh[0], rho_old_sh[0], alpha_sh[0],
                  omega_sh[0], r_sh, v_sh, p_sh, item_ct1);
-        item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
         // p_hat = precond * p
         prec_shared.apply(num_rows, p_sh, p_hat_sh, item_ct1);
-        item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
         // v = A * p_hat
         simple_apply_kernel(mat_global_entry, p_hat_sh, v_sh, item_ct1);
-        item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
         // alpha = rho_new / < r_hat , v>
         compute_alpha<sg_kernel_all>(num_rows, rho_new_sh[0], r_hat_sh, v_sh,
                                      alpha_sh[0], item_ct1);
-        item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
         // s = r - alpha*v
         update_s(num_rows, r_sh, alpha_sh[0], v_sh, s_sh, item_ct1);
-        item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
         // an estimate of residual norms
         if constexpr (sg_kernel_all) {
@@ -402,7 +403,7 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
                 single_rhs_compute_norm2_sg(num_rows, s_sh, norms_res_sh[0],
                                             item_ct1);
             }
-            item_ct1.barrier(sycl::access::fence_space::local_space);
+            item_ct1.barrier(sycl::access::fence_space::global_and_local);
         } else {
             single_rhs_compute_norm2(num_rows, s_sh, norms_res_sh[0], item_ct1);
         }
@@ -415,22 +416,22 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
 
         // s_hat = precond * s
         prec_shared.apply(num_rows, s_sh, s_hat_sh, item_ct1);
-        item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
         // t = A * s_hat
         simple_apply_kernel(mat_global_entry, s_hat_sh, t_sh, item_ct1);
-        item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
         // omega = <t,s> / <t,t>
         compute_omega<sg_kernel_all>(num_rows, t_sh, s_sh, temp_sh[0],
                                      omega_sh[0], item_ct1);
-        item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
         // x = x + alpha*p_hat + omega *s_hat
         // r = s - omega * t
         update_x_and_r(num_rows, p_hat_sh, s_hat_sh, alpha_sh[0], omega_sh[0],
                        s_sh, t_sh, x_sh, r_sh, item_ct1);
-        item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
         if constexpr (sg_kernel_all) {
             if (sg_id == 0)
@@ -439,7 +440,7 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
             if (tid == group_size - 1) {
                 rho_old_sh[0] = rho_new_sh[0];
             }
-            item_ct1.barrier(sycl::access::fence_space::local_space);
+            item_ct1.barrier(sycl::access::fence_space::global_and_local);
         } else {
             single_rhs_compute_norm2(num_rows, r_sh, norms_res_sh[0], item_ct1);
             rho_old_sh[0] = rho_new_sh[0];
@@ -450,4 +451,5 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
 
     // copy x back to global memory
     copy_kernel(num_rows, x_sh, x_global_entry, item_ct1);
+    item_ct1.barrier(sycl::access::fence_space::global_and_local);
 }

From 4e9c4ecb67897c381eac724d44170d7284c9ba40 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Fri, 3 Nov 2023 19:56:38 +0100
Subject: [PATCH 541/583] Fix Intel2020 apply call issue

---
 core/matrix/batch_ell.cpp                        | 13 +++++++++++--
 include/ginkgo/core/solver/batch_solver_base.hpp | 14 ++++++++++++--
 reference/test/solver/batch_bicgstab_kernels.cpp |  2 +-
 3 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/core/matrix/batch_ell.cpp b/core/matrix/batch_ell.cpp
index 19b2dcae5c3..88863a05dd4 100644
--- a/core/matrix/batch_ell.cpp
+++ b/core/matrix/batch_ell.cpp
@@ -134,7 +134,10 @@ Ell<ValueType, IndexType>* Ell<ValueType, IndexType>::apply(
     ptr_param<const MultiVector<ValueType>> b,
     ptr_param<MultiVector<ValueType>> x)
 {
-    static_cast<const Ell*>(this)->apply(b, x);
+    this->validate_application_parameters(b.get(), x.get());
+    auto exec = this->get_executor();
+    this->apply_impl(make_temporary_clone(exec, b).get(),
+                     make_temporary_clone(exec, x).get());
     return this;
 }
 
@@ -159,7 +162,13 @@ Ell<ValueType, IndexType>* Ell<ValueType, IndexType>::apply(
     ptr_param<const MultiVector<ValueType>> beta,
     ptr_param<MultiVector<ValueType>> x)
 {
-    static_cast<const Ell*>(this)->apply(alpha, b, beta, x);
+    this->validate_application_parameters(alpha.get(), b.get(), beta.get(),
+                                          x.get());
+    auto exec = this->get_executor();
+    this->apply_impl(make_temporary_clone(exec, alpha).get(),
+                     make_temporary_clone(exec, b).get(),
+                     make_temporary_clone(exec, beta).get(),
+                     make_temporary_clone(exec, x).get());
     return this;
 }
 
diff --git a/include/ginkgo/core/solver/batch_solver_base.hpp b/include/ginkgo/core/solver/batch_solver_base.hpp
index 3141812e259..8cc5c67837a 100644
--- a/include/ginkgo/core/solver/batch_solver_base.hpp
+++ b/include/ginkgo/core/solver/batch_solver_base.hpp
@@ -277,6 +277,7 @@ class EnableBatchSolver
       public EnableBatchLinOp<ConcreteSolver, PolymorphicBase> {
 public:
     using real_type = remove_complex<ValueType>;
+
     const ConcreteSolver* apply(ptr_param<const MultiVector<ValueType>> b,
                                 ptr_param<MultiVector<ValueType>> x) const
     {
@@ -305,7 +306,10 @@ class EnableBatchSolver
     ConcreteSolver* apply(ptr_param<const MultiVector<ValueType>> b,
                           ptr_param<MultiVector<ValueType>> x)
     {
-        static_cast<const ConcreteSolver*>(this)->apply(b, x);
+        this->validate_application_parameters(b.get(), x.get());
+        auto exec = this->get_executor();
+        this->apply_impl(make_temporary_clone(exec, b).get(),
+                         make_temporary_clone(exec, x).get());
         return self();
     }
 
@@ -314,7 +318,13 @@ class EnableBatchSolver
                           ptr_param<const MultiVector<ValueType>> beta,
                           ptr_param<MultiVector<ValueType>> x)
     {
-        static_cast<const ConcreteSolver*>(this)->apply(alpha, b, beta, x);
+        this->validate_application_parameters(alpha.get(), b.get(), beta.get(),
+                                              x.get());
+        auto exec = this->get_executor();
+        this->apply_impl(make_temporary_clone(exec, alpha).get(),
+                         make_temporary_clone(exec, b).get(),
+                         make_temporary_clone(exec, beta).get(),
+                         make_temporary_clone(exec, x).get());
         return self();
     }
 
diff --git a/reference/test/solver/batch_bicgstab_kernels.cpp b/reference/test/solver/batch_bicgstab_kernels.cpp
index 311fb40e5ef..211318e8a8f 100644
--- a/reference/test/solver/batch_bicgstab_kernels.cpp
+++ b/reference/test/solver/batch_bicgstab_kernels.cpp
@@ -87,7 +87,7 @@ class BatchBicgstab : public ::testing::Test {
     std::shared_ptr<const gko::ReferenceExecutor> exec;
     const real_type eps = 1e-3;
     const gko::size_type num_batch_items = 2;
-    const int num_rows = 3;
+    const int num_rows = 15;
     const int num_rhs = 1;
     const Settings solver_settings{100, eps,
                                    gko::batch::stop::tolerance_type::relative};

From 368cf6b2f40a7a9f2e551eaaf2040dc076151c7d Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Fri, 3 Nov 2023 20:42:21 +0100
Subject: [PATCH 542/583] Fix diag_dominance and tol issue

---
 core/test/utils/batch_helpers.hpp           | 15 ++++++--
 dpcpp/solver/batch_bicgstab_kernels.hpp.inc | 42 ++++++++++-----------
 test/solver/batch_bicgstab_kernels.cpp      |  4 +-
 3 files changed, 34 insertions(+), 27 deletions(-)

diff --git a/core/test/utils/batch_helpers.hpp b/core/test/utils/batch_helpers.hpp
index 43da4cd9d54..eee31050505 100644
--- a/core/test/utils/batch_helpers.hpp
+++ b/core/test/utils/batch_helpers.hpp
@@ -166,7 +166,7 @@ std::unique_ptr<const MatrixType> generate_diag_dominant_batch_matrix(
                     static_cast<size_type>(num_cols)},
         {}};
     auto engine = std::default_random_engine(42);
-    auto rand_diag_dist = std::normal_distribution<real_type>(4.0, 12.0);
+    auto rand_diag_dist = std::normal_distribution<real_type>(8.0, 1.0);
     for (int row = 0; row < num_rows; ++row) {
         std::uniform_int_distribution<index_type> rand_nnz_dist{1, row + 1};
         const auto k = rand_nnz_dist(engine);
@@ -175,8 +175,8 @@ std::unique_ptr<const MatrixType> generate_diag_dominant_batch_matrix(
         }
         data.nonzeros.emplace_back(
             row, row,
-            static_cast<value_type>(
-                detail::get_rand_value<real_type>(rand_diag_dist, engine)));
+            std::abs(static_cast<value_type>(
+                detail::get_rand_value<real_type>(rand_diag_dist, engine))));
         if (row < num_rows - 1) {
             data.nonzeros.emplace_back(row, k, value_type{-1.0});
             data.nonzeros.emplace_back(row, row + 1, value_type{-1.0});
@@ -208,8 +208,15 @@ std::unique_ptr<const MatrixType> generate_diag_dominant_batch_matrix(
         auto rand_data = fill_random_matrix_data<value_type, index_type>(
             num_rows, num_cols, row_idxs, col_idxs, rand_val_dist, engine);
         gko::utils::make_diag_dominant(rand_data);
-        batch_data.emplace_back(rand_data);
         GKO_ASSERT(rand_data.size == batch_data.at(0).size);
+        GKO_ASSERT(rand_data.nonzeros.size() == data.nonzeros.size());
+        // Copy over the diagonal values
+        for (int i = 0; i < data.nonzeros.size(); ++i) {
+            if (data.nonzeros[i].row == data.nonzeros[i].column) {
+                rand_data.nonzeros[i] = data.nonzeros[i];
+            }
+        }
+        batch_data.emplace_back(rand_data);
     }
     return gko::batch::read<value_type, index_type, MatrixType>(
         exec, batch_data, std::forward<MatrixArgs>(args)...);
diff --git a/dpcpp/solver/batch_bicgstab_kernels.hpp.inc b/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
index 3efb93e664b..4e29ab32886 100644
--- a/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
+++ b/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
@@ -60,13 +60,13 @@ __dpct_inline__ void initialize(
         x_shared_entry[iz] = x_global_entry[iz];
         r_shared_entry[iz] = b_global_entry[iz];
     }
-    item_ct1.barrier(sycl::access::fence_space::global_and_local);
+    item_ct1.barrier(sycl::access::fence_space::local_space);
 
     // r = b - A*x
     advanced_apply_kernel(static_cast<ValueType>(-1.0), mat_global_entry,
                           x_shared_entry, static_cast<ValueType>(1.0),
                           r_shared_entry, item_ct1);
-    item_ct1.barrier(sycl::access::fence_space::global_and_local);
+    item_ct1.barrier(sycl::access::fence_space::local_space);
 
     if constexpr (sg_kernel_all) {
         if (sg_id == 0) {
@@ -80,7 +80,7 @@ __dpct_inline__ void initialize(
         single_rhs_compute_norm2(num_rows, r_shared_entry, res_norm, item_ct1);
         single_rhs_compute_norm2(num_rows, b_global_entry, rhs_norm, item_ct1);
     }
-    item_ct1.barrier(sycl::access::fence_space::global_and_local);
+    item_ct1.barrier(sycl::access::fence_space::local_space);
 
 
     for (int iz = tid; iz < num_rows; iz += group_size) {
@@ -125,11 +125,11 @@ __dpct_inline__ void compute_alpha(const int num_rows, const ValueType& rho_new,
             single_rhs_compute_conj_dot_sg(num_rows, r_hat_shared_entry,
                                            v_shared_entry, alpha, item_ct1);
         }
-        item_ct1.barrier(sycl::access::fence_space::global_and_local);
+        item_ct1.barrier(sycl::access::fence_space::local_space);
         if (tid == 0) {
             alpha = rho_new / alpha;
         }
-        item_ct1.barrier(sycl::access::fence_space::global_and_local);
+        item_ct1.barrier(sycl::access::fence_space::local_space);
     } else {
         single_rhs_compute_conj_dot(num_rows, r_hat_shared_entry,
                                     v_shared_entry, alpha, item_ct1);
@@ -173,11 +173,11 @@ __dpct_inline__ void compute_omega(const int num_rows,
             single_rhs_compute_conj_dot_sg(num_rows, t_shared_entry,
                                            t_shared_entry, temp, item_ct1);
         }
-        item_ct1.barrier(sycl::access::fence_space::global_and_local);
+        item_ct1.barrier(sycl::access::fence_space::local_space);
         if (tid == 0) {
             omega /= temp;
         }
-        item_ct1.barrier(sycl::access::fence_space::global_and_local);
+        item_ct1.barrier(sycl::access::fence_space::local_space);
     } else {
         single_rhs_compute_conj_dot(num_rows, t_shared_entry, s_shared_entry,
                                     omega, item_ct1);
@@ -350,7 +350,7 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
                               x_global_entry, rho_old_sh[0], omega_sh[0],
                               alpha_sh[0], x_sh, r_sh, r_hat_sh, p_sh, p_hat_sh,
                               v_sh, norms_rhs_sh[0], norms_res_sh[0], item_ct1);
-    item_ct1.barrier(sycl::access::fence_space::global_and_local);
+    item_ct1.barrier(sycl::access::fence_space::local_space);
 
     // stopping criterion object
     StopType stop(tol, norms_rhs_sh);
@@ -368,7 +368,7 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
                 single_rhs_compute_conj_dot_sg(num_rows, r_hat_sh, r_sh,
                                                rho_new_sh[0], item_ct1);
             }
-            item_ct1.barrier(sycl::access::fence_space::global_and_local);
+            item_ct1.barrier(sycl::access::fence_space::local_space);
         } else {
             single_rhs_compute_conj_dot(num_rows, r_hat_sh, r_sh, rho_new_sh[0],
                                         item_ct1);
@@ -378,24 +378,24 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
         // p = r + beta*(p - omega * v)
         update_p(num_rows, rho_new_sh[0], rho_old_sh[0], alpha_sh[0],
                  omega_sh[0], r_sh, v_sh, p_sh, item_ct1);
-        item_ct1.barrier(sycl::access::fence_space::global_and_local);
+        item_ct1.barrier(sycl::access::fence_space::local_space);
 
         // p_hat = precond * p
         prec_shared.apply(num_rows, p_sh, p_hat_sh, item_ct1);
-        item_ct1.barrier(sycl::access::fence_space::global_and_local);
+        item_ct1.barrier(sycl::access::fence_space::local_space);
 
         // v = A * p_hat
         simple_apply_kernel(mat_global_entry, p_hat_sh, v_sh, item_ct1);
-        item_ct1.barrier(sycl::access::fence_space::global_and_local);
+        item_ct1.barrier(sycl::access::fence_space::local_space);
 
         // alpha = rho_new / < r_hat , v>
         compute_alpha<sg_kernel_all>(num_rows, rho_new_sh[0], r_hat_sh, v_sh,
                                      alpha_sh[0], item_ct1);
-        item_ct1.barrier(sycl::access::fence_space::global_and_local);
+        item_ct1.barrier(sycl::access::fence_space::local_space);
 
         // s = r - alpha*v
         update_s(num_rows, r_sh, alpha_sh[0], v_sh, s_sh, item_ct1);
-        item_ct1.barrier(sycl::access::fence_space::global_and_local);
+        item_ct1.barrier(sycl::access::fence_space::local_space);
 
         // an estimate of residual norms
         if constexpr (sg_kernel_all) {
@@ -403,7 +403,7 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
                 single_rhs_compute_norm2_sg(num_rows, s_sh, norms_res_sh[0],
                                             item_ct1);
             }
-            item_ct1.barrier(sycl::access::fence_space::global_and_local);
+            item_ct1.barrier(sycl::access::fence_space::local_space);
         } else {
             single_rhs_compute_norm2(num_rows, s_sh, norms_res_sh[0], item_ct1);
         }
@@ -416,22 +416,22 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
 
         // s_hat = precond * s
         prec_shared.apply(num_rows, s_sh, s_hat_sh, item_ct1);
-        item_ct1.barrier(sycl::access::fence_space::global_and_local);
+        item_ct1.barrier(sycl::access::fence_space::local_space);
 
         // t = A * s_hat
         simple_apply_kernel(mat_global_entry, s_hat_sh, t_sh, item_ct1);
-        item_ct1.barrier(sycl::access::fence_space::global_and_local);
+        item_ct1.barrier(sycl::access::fence_space::local_space);
 
         // omega = <t,s> / <t,t>
         compute_omega<sg_kernel_all>(num_rows, t_sh, s_sh, temp_sh[0],
                                      omega_sh[0], item_ct1);
-        item_ct1.barrier(sycl::access::fence_space::global_and_local);
+        item_ct1.barrier(sycl::access::fence_space::local_space);
 
         // x = x + alpha*p_hat + omega *s_hat
         // r = s - omega * t
         update_x_and_r(num_rows, p_hat_sh, s_hat_sh, alpha_sh[0], omega_sh[0],
                        s_sh, t_sh, x_sh, r_sh, item_ct1);
-        item_ct1.barrier(sycl::access::fence_space::global_and_local);
+        item_ct1.barrier(sycl::access::fence_space::local_space);
 
         if constexpr (sg_kernel_all) {
             if (sg_id == 0)
@@ -440,7 +440,7 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
             if (tid == group_size - 1) {
                 rho_old_sh[0] = rho_new_sh[0];
             }
-            item_ct1.barrier(sycl::access::fence_space::global_and_local);
+            item_ct1.barrier(sycl::access::fence_space::local_space);
         } else {
             single_rhs_compute_norm2(num_rows, r_sh, norms_res_sh[0], item_ct1);
             rho_old_sh[0] = rho_new_sh[0];
@@ -451,5 +451,5 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
 
     // copy x back to global memory
     copy_kernel(num_rows, x_sh, x_global_entry, item_ct1);
-    item_ct1.barrier(sycl::access::fence_space::global_and_local);
+    item_ct1.barrier(sycl::access::fence_space::local_space);
 }
diff --git a/test/solver/batch_bicgstab_kernels.cpp b/test/solver/batch_bicgstab_kernels.cpp
index f99e7a469d0..4bec19a165f 100644
--- a/test/solver/batch_bicgstab_kernels.cpp
+++ b/test/solver/batch_bicgstab_kernels.cpp
@@ -222,7 +222,7 @@ TEST_F(BatchBicgstab, CanSolveLargeBatchSizeHpdSystem)
                       linear_system.host_rhs_norm->get_const_values()[i],
                   tol);
         EXPECT_GT(res_norm->get_const_data()[i], real_type{0.0});
-        ASSERT_LE(comp_res_norm, tol);
+        ASSERT_LE(comp_res_norm, tol * 10);
     }
 }
 
@@ -257,6 +257,6 @@ TEST_F(BatchBicgstab, CanSolveLargeMatrixSizeHpdSystem)
                       linear_system.host_rhs_norm->get_const_values()[i],
                   tol);
         EXPECT_GT(res_norm->get_const_data()[i], real_type{0.0});
-        ASSERT_LE(comp_res_norm, tol);
+        ASSERT_LE(comp_res_norm, tol * 10);
     }
 }

From e0d016e75c941303b0ea5c096a76f67b5bd01e35 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sat, 4 Nov 2023 09:09:39 +0100
Subject: [PATCH 543/583] Fix some include issues

---
 cuda/base/kernel_config.hpp                 | 1 +
 reference/log/batch_logger.hpp              | 8 +++-----
 reference/preconditioner/batch_identity.hpp | 1 +
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/cuda/base/kernel_config.hpp b/cuda/base/kernel_config.hpp
index 1fbc0d6e4d8..b8b4f621f06 100644
--- a/cuda/base/kernel_config.hpp
+++ b/cuda/base/kernel_config.hpp
@@ -35,6 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <cuda_runtime.h>
+#include <ginkgo/core/base/exception_helpers.hpp>
 
 
 namespace gko {
diff --git a/reference/log/batch_logger.hpp b/reference/log/batch_logger.hpp
index a70af0af51c..2598c23766f 100644
--- a/reference/log/batch_logger.hpp
+++ b/reference/log/batch_logger.hpp
@@ -51,8 +51,6 @@ namespace batch_log {
 template <typename RealType>
 class SimpleFinalLogger final {
 public:
-    using real_type = remove_complex<RealType>;
-
     /**
      * Constructor
      *
@@ -61,7 +59,7 @@ class SimpleFinalLogger final {
      * @param batch_iters  final iteration counts for each
      *                     linear system in the batch.
      */
-    SimpleFinalLogger(real_type* const batch_residuals, int* const batch_iters)
+    SimpleFinalLogger(RealType* const batch_residuals, int* const batch_iters)
         : final_residuals_{batch_residuals}, final_iters_{batch_iters}
     {}
 
@@ -73,14 +71,14 @@ class SimpleFinalLogger final {
      * @param res_norm  Norm of final residual norm
      */
     void log_iteration(const size_type batch_idx, const int iter,
-                       const real_type res_norm)
+                       const RealType res_norm)
     {
         final_iters_[batch_idx] = iter;
         final_residuals_[batch_idx] = res_norm;
     }
 
 private:
-    real_type* const final_residuals_;
+    RealType* const final_residuals_;
     int* const final_iters_;
 };
 
diff --git a/reference/preconditioner/batch_identity.hpp b/reference/preconditioner/batch_identity.hpp
index b0bf869c6be..6d6d462e660 100644
--- a/reference/preconditioner/batch_identity.hpp
+++ b/reference/preconditioner/batch_identity.hpp
@@ -34,6 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define GKO_REFERENCE_PRECONDITIONER_BATCH_IDENTITY_HPP_
 
 
+#include "core/base/batch_struct.hpp"
 #include "core/matrix/batch_struct.hpp"
 
 

From f1d661dbb9a24315519e1f4ff807f0c90f43be01 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sat, 4 Nov 2023 18:37:26 +0100
Subject: [PATCH 544/583] Review updates

Co-authored-by: Yu-Hsiang Tsai <yhmtsai@gmail.com>
---
 cuda/base/kernel_config.hpp                   | 16 ++++------------
 cuda/solver/batch_bicgstab_kernels.cu         |  6 ++----
 dpcpp/base/batch_multi_vector_kernels.hpp.inc |  3 ++-
 dpcpp/solver/batch_bicgstab_kernels.hpp.inc   |  2 --
 hip/solver/batch_bicgstab_kernels.hip.cpp     |  4 +---
 5 files changed, 9 insertions(+), 22 deletions(-)

diff --git a/cuda/base/kernel_config.hpp b/cuda/base/kernel_config.hpp
index b8b4f621f06..a4aecea1d55 100644
--- a/cuda/base/kernel_config.hpp
+++ b/cuda/base/kernel_config.hpp
@@ -35,6 +35,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <cuda_runtime.h>
+
+
 #include <ginkgo/core/base/exception_helpers.hpp>
 
 
@@ -68,18 +70,8 @@ class shared_memory_config_guard {
 
     ~shared_memory_config_guard()
     {
-        auto error_code = cudaDeviceSetSharedMemConfig(original_config_);
-        if (error_code != cudaSuccess) {
-#if GKO_VERBOSE_LEVEL >= 1
-            std::cerr << "Unrecoverable CUDA error while resetting the "
-                         "shared memory config to "
-                      << original_config_ << " in " << __func__ << ": "
-                      << cudaGetErrorName(error_code) << ": "
-                      << cudaGetErrorString(error_code) << std::endl
-                      << "Exiting program" << std::endl;
-#endif  // GKO_VERBOSE_LEVEL >= 1
-            std::exit(error_code);
-        }
+        // No need to exit or throw if we cant set the value back.
+        cudaDeviceSetSharedMemConfig(original_config_);
     }
 
 private:
diff --git a/cuda/solver/batch_bicgstab_kernels.cu b/cuda/solver/batch_bicgstab_kernels.cu
index 16df7e7e55e..1d80f206c1b 100644
--- a/cuda/solver/batch_bicgstab_kernels.cu
+++ b/cuda/solver/batch_bicgstab_kernels.cu
@@ -205,11 +205,11 @@ public:
 
         // Template parameters launch_apply_kernel<StopType, n_shared,
         // prec_shared>
-        if (sconf.prec_shared)
+        if (sconf.prec_shared) {
             launch_apply_kernel<StopType, 9, true>(
                 sconf, logger, prec, mat, b.values, x.values, workspace_data,
                 block_size, shared_size);
-        else {
+        } else {
             switch (sconf.n_shared) {
             case 0:
                 launch_apply_kernel<StopType, 0, false>(
@@ -265,8 +265,6 @@ public:
                 GKO_NOT_IMPLEMENTED;
             }
         }
-
-        exec_->synchronize();
     }
 
 private:
diff --git a/dpcpp/base/batch_multi_vector_kernels.hpp.inc b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
index 1fb5684871d..be9d02aa88d 100644
--- a/dpcpp/base/batch_multi_vector_kernels.hpp.inc
+++ b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
@@ -163,8 +163,9 @@ __dpct_inline__ void single_rhs_compute_norm2_sg(
     using real_type = typename gko::remove_complex<ValueType>;
     real_type val = zero<real_type>();
 
-    for (int r = subgroup.get_local_id(); r < num_rows; r += subgroup_size)
+    for (int r = subgroup.get_local_id(); r < num_rows; r += subgroup_size) {
         val += squared_norm(x[r]);
+    }
 
     val = ::gko::kernels::dpcpp::reduce(
         subg, val, [](real_type a, real_type b) { return a + b; });
diff --git a/dpcpp/solver/batch_bicgstab_kernels.hpp.inc b/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
index 4e29ab32886..4be5040d4ea 100644
--- a/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
+++ b/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
@@ -260,8 +260,6 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
     tile_real_t& reals =
         *sycl::ext::oneapi::group_local_memory_for_overwrite<tile_real_t>(
             group);
-    // ValueType values[5];
-    // real_type reals[2];
     rho_old_sh = &values[0];
     rho_new_sh = &values[1];
     alpha_sh = &values[2];
diff --git a/hip/solver/batch_bicgstab_kernels.hip.cpp b/hip/solver/batch_bicgstab_kernels.hip.cpp
index fbd6543574f..217d314a5c9 100644
--- a/hip/solver/batch_bicgstab_kernels.hip.cpp
+++ b/hip/solver/batch_bicgstab_kernels.hip.cpp
@@ -89,7 +89,7 @@ int get_num_threads_per_block(std::shared_ptr<const DefaultExecutor> exec,
     const int min_block_size = 2 * warp_sz;
     const int device_max_threads =
         ((std::max(num_rows, min_block_size)) / warp_sz) * warp_sz;
-    // This value has been taken from RocM docs. This is the number of registers
+    // This value has been taken from ROCm docs. This is the number of registers
     // that maximizes the occupancy on an AMD GPU (MI200). HIP does not have an
     // API to query the number of registers a function uses.
     const int num_regs_used_per_thread = 64;
@@ -238,8 +238,6 @@ class kernel_caller {
                 GKO_NOT_IMPLEMENTED;
             }
         }
-
-        exec_->synchronize();
     }
 
 private:

From bf23675f70e745fcb95cf7331531ceb465046e8e Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sat, 4 Nov 2023 22:03:00 +0100
Subject: [PATCH 545/583] use fence_space::global_and_local

---
 dpcpp/solver/batch_bicgstab_kernels.hpp.inc | 42 ++++++++++-----------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/dpcpp/solver/batch_bicgstab_kernels.hpp.inc b/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
index 4be5040d4ea..636227973a8 100644
--- a/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
+++ b/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
@@ -60,13 +60,13 @@ __dpct_inline__ void initialize(
         x_shared_entry[iz] = x_global_entry[iz];
         r_shared_entry[iz] = b_global_entry[iz];
     }
-    item_ct1.barrier(sycl::access::fence_space::local_space);
+    item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
     // r = b - A*x
     advanced_apply_kernel(static_cast<ValueType>(-1.0), mat_global_entry,
                           x_shared_entry, static_cast<ValueType>(1.0),
                           r_shared_entry, item_ct1);
-    item_ct1.barrier(sycl::access::fence_space::local_space);
+    item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
     if constexpr (sg_kernel_all) {
         if (sg_id == 0) {
@@ -80,7 +80,7 @@ __dpct_inline__ void initialize(
         single_rhs_compute_norm2(num_rows, r_shared_entry, res_norm, item_ct1);
         single_rhs_compute_norm2(num_rows, b_global_entry, rhs_norm, item_ct1);
     }
-    item_ct1.barrier(sycl::access::fence_space::local_space);
+    item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
 
     for (int iz = tid; iz < num_rows; iz += group_size) {
@@ -125,11 +125,11 @@ __dpct_inline__ void compute_alpha(const int num_rows, const ValueType& rho_new,
             single_rhs_compute_conj_dot_sg(num_rows, r_hat_shared_entry,
                                            v_shared_entry, alpha, item_ct1);
         }
-        item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(sycl::access::fence_space::global_and_local);
         if (tid == 0) {
             alpha = rho_new / alpha;
         }
-        item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(sycl::access::fence_space::global_and_local);
     } else {
         single_rhs_compute_conj_dot(num_rows, r_hat_shared_entry,
                                     v_shared_entry, alpha, item_ct1);
@@ -173,11 +173,11 @@ __dpct_inline__ void compute_omega(const int num_rows,
             single_rhs_compute_conj_dot_sg(num_rows, t_shared_entry,
                                            t_shared_entry, temp, item_ct1);
         }
-        item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(sycl::access::fence_space::global_and_local);
         if (tid == 0) {
             omega /= temp;
         }
-        item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(sycl::access::fence_space::global_and_local);
     } else {
         single_rhs_compute_conj_dot(num_rows, t_shared_entry, s_shared_entry,
                                     omega, item_ct1);
@@ -348,7 +348,7 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
                               x_global_entry, rho_old_sh[0], omega_sh[0],
                               alpha_sh[0], x_sh, r_sh, r_hat_sh, p_sh, p_hat_sh,
                               v_sh, norms_rhs_sh[0], norms_res_sh[0], item_ct1);
-    item_ct1.barrier(sycl::access::fence_space::local_space);
+    item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
     // stopping criterion object
     StopType stop(tol, norms_rhs_sh);
@@ -366,7 +366,7 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
                 single_rhs_compute_conj_dot_sg(num_rows, r_hat_sh, r_sh,
                                                rho_new_sh[0], item_ct1);
             }
-            item_ct1.barrier(sycl::access::fence_space::local_space);
+            item_ct1.barrier(sycl::access::fence_space::global_and_local);
         } else {
             single_rhs_compute_conj_dot(num_rows, r_hat_sh, r_sh, rho_new_sh[0],
                                         item_ct1);
@@ -376,24 +376,24 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
         // p = r + beta*(p - omega * v)
         update_p(num_rows, rho_new_sh[0], rho_old_sh[0], alpha_sh[0],
                  omega_sh[0], r_sh, v_sh, p_sh, item_ct1);
-        item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
         // p_hat = precond * p
         prec_shared.apply(num_rows, p_sh, p_hat_sh, item_ct1);
-        item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
         // v = A * p_hat
         simple_apply_kernel(mat_global_entry, p_hat_sh, v_sh, item_ct1);
-        item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
         // alpha = rho_new / < r_hat , v>
         compute_alpha<sg_kernel_all>(num_rows, rho_new_sh[0], r_hat_sh, v_sh,
                                      alpha_sh[0], item_ct1);
-        item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
         // s = r - alpha*v
         update_s(num_rows, r_sh, alpha_sh[0], v_sh, s_sh, item_ct1);
-        item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
         // an estimate of residual norms
         if constexpr (sg_kernel_all) {
@@ -401,7 +401,7 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
                 single_rhs_compute_norm2_sg(num_rows, s_sh, norms_res_sh[0],
                                             item_ct1);
             }
-            item_ct1.barrier(sycl::access::fence_space::local_space);
+            item_ct1.barrier(sycl::access::fence_space::global_and_local);
         } else {
             single_rhs_compute_norm2(num_rows, s_sh, norms_res_sh[0], item_ct1);
         }
@@ -414,22 +414,22 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
 
         // s_hat = precond * s
         prec_shared.apply(num_rows, s_sh, s_hat_sh, item_ct1);
-        item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
         // t = A * s_hat
         simple_apply_kernel(mat_global_entry, s_hat_sh, t_sh, item_ct1);
-        item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
         // omega = <t,s> / <t,t>
         compute_omega<sg_kernel_all>(num_rows, t_sh, s_sh, temp_sh[0],
                                      omega_sh[0], item_ct1);
-        item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
         // x = x + alpha*p_hat + omega *s_hat
         // r = s - omega * t
         update_x_and_r(num_rows, p_hat_sh, s_hat_sh, alpha_sh[0], omega_sh[0],
                        s_sh, t_sh, x_sh, r_sh, item_ct1);
-        item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
         if constexpr (sg_kernel_all) {
             if (sg_id == 0)
@@ -438,7 +438,7 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
             if (tid == group_size - 1) {
                 rho_old_sh[0] = rho_new_sh[0];
             }
-            item_ct1.barrier(sycl::access::fence_space::local_space);
+            item_ct1.barrier(sycl::access::fence_space::global_and_local);
         } else {
             single_rhs_compute_norm2(num_rows, r_sh, norms_res_sh[0], item_ct1);
             rho_old_sh[0] = rho_new_sh[0];
@@ -449,5 +449,5 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
 
     // copy x back to global memory
     copy_kernel(num_rows, x_sh, x_global_entry, item_ct1);
-    item_ct1.barrier(sycl::access::fence_space::local_space);
+    item_ct1.barrier(sycl::access::fence_space::global_and_local);
 }

From 00febd5452e013470696f1b6dfd0eaa6efd264c9 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sun, 5 Nov 2023 15:30:06 +0100
Subject: [PATCH 546/583] Use updated deferred factory macros.

---
 .../ginkgo/core/solver/batch_solver_base.hpp  | 56 ++++---------------
 1 file changed, 11 insertions(+), 45 deletions(-)

diff --git a/include/ginkgo/core/solver/batch_solver_base.hpp b/include/ginkgo/core/solver/batch_solver_base.hpp
index 8cc5c67837a..8f534753bf8 100644
--- a/include/ginkgo/core/solver/batch_solver_base.hpp
+++ b/include/ginkgo/core/solver/batch_solver_base.hpp
@@ -182,25 +182,13 @@ class BatchSolver {
  * excluding the parameters available in iterative_solver_factory_parameters.
  * @see GKO_CREATE_FACTORY_PARAMETERS
  */
-struct preconditioned_iterative_solver_factory_parameters {
-    /**
-     * The preconditioner to be used by the iterative solver. By default, no
-     * preconditioner is used.
-     */
-    std::shared_ptr<const BatchLinOpFactory> preconditioner{nullptr};
-
-    /**
-     * Already generated preconditioner. If one is provided, the factory
-     * `preconditioner` will be ignored.
-     */
-    std::shared_ptr<const BatchLinOp> generated_preconditioner{nullptr};
-};
+struct preconditioned_iterative_solver_factory_parameters {};
 
 
 template <typename Parameters, typename Factory>
 struct enable_preconditioned_iterative_solver_factory_parameters
-    : enable_parameters_type<Parameters, Factory>,
-      preconditioned_iterative_solver_factory_parameters {
+    : enable_parameters_type<Parameters, Factory> {
+    using parameters_type = Parameters;
     /**
      * Default maximum number iterations allowed.
      *
@@ -225,40 +213,18 @@ struct enable_preconditioned_iterative_solver_factory_parameters
         tolerance_type, ::gko::batch::stop::tolerance_type::absolute);
 
     /**
-     * Provides a preconditioner factory to be used by the iterative solver in a
-     * fluent interface.
-     * @see preconditioned_iterative_solver_factory_parameters::preconditioner
+     * The preconditioner to be used by the iterative solver. By default, no
+     * preconditioner is used.
      */
-    Parameters& with_preconditioner(
-        deferred_factory_parameter<BatchLinOpFactory> preconditioner)
-    {
-        this->preconditioner_generator = std::move(preconditioner);
-        this->deferred_factories["preconditioner"] = [](const auto& exec,
-                                                        auto& params) {
-            if (!params.preconditioner_generator.is_empty()) {
-                params.preconditioner =
-                    params.preconditioner_generator.on(exec);
-            }
-        };
-        return *self();
-    }
+    std::shared_ptr<const BatchLinOpFactory> GKO_DEFERRED_FACTORY_PARAMETER(
+        preconditioner);
 
     /**
-     * Provides a concrete preconditioner to be used by the iterative solver in
-     * a fluent interface.
-     * @see preconditioned_iterative_solver_factory_parameters::preconditioner
+     * Already generated preconditioner. If one is provided, the factory
+     * `preconditioner` will be ignored.
      */
-    Parameters& with_generated_preconditioner(
-        std::shared_ptr<const BatchLinOp> generated_preconditioner)
-    {
-        this->generated_preconditioner = std::move(generated_preconditioner);
-        return *self();
-    }
-
-private:
-    GKO_ENABLE_SELF(Parameters);
-
-    deferred_factory_parameter<BatchLinOpFactory> preconditioner_generator;
+    std::shared_ptr<const BatchLinOp> GKO_FACTORY_PARAMETER_SCALAR(
+        generated_preconditioner, nullptr);
 };
 
 

From 39b6f5b69baf10e00295cc153d69ddd4e5fe3875 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Sun, 5 Nov 2023 16:59:31 +0100
Subject: [PATCH 547/583] Review updates

Co-authored-by: Yu-Hsiang Tsai <yhmtsai@gmail.com>
---
 dpcpp/solver/batch_bicgstab_kernels.dp.cpp    |  39 ++---
 dpcpp/solver/batch_bicgstab_kernels.hpp.inc   | 140 +++++++-----------
 .../ginkgo/core/solver/batch_solver_base.hpp  |   9 --
 include/ginkgo/core/solver/solver_base.hpp    |   3 -
 4 files changed, 70 insertions(+), 121 deletions(-)

diff --git a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
index 9da926c7c58..8f0a334e6ac 100644
--- a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
+++ b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
@@ -94,8 +94,8 @@ class KernelCaller {
     {}
 
     template <typename StopType, const int subgroup_size,
-              const int n_shared_total, const bool sg_kernel_all,
-              typename PrecType, typename LogType, typename BatchMatrixType>
+              const int n_shared_total, typename PrecType, typename LogType,
+              typename BatchMatrixType>
     __dpct_inline__ void launch_apply_kernel(
         const gko::kernels::batch_bicgstab::storage_config& sconf,
         LogType& logger, PrecType& prec, const BatchMatrixType mat,
@@ -118,9 +118,10 @@ class KernelCaller {
                 slm_values(sycl::range<1>(shared_size), cgh);
 
             cgh.parallel_for(
-                sycl_nd_range(grid, block),
-                [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(
-                    subgroup_size)]] [[intel::kernel_args_restrict]] {
+                sycl_nd_range(grid, block), [=
+            ](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(
+                                                subgroup_size)]] [
+                                                [intel::kernel_args_restrict]] {
                     auto batch_id = item_ct1.get_group_linear_id();
                     const auto mat_global_entry =
                         gko::batch::matrix::extract_batch_item(mat, batch_id);
@@ -130,7 +131,7 @@ class KernelCaller {
                     ValueType* const x_global_entry =
                         gko::batch::multi_vector::batch_item_ptr(
                             x_values, 1, num_rows, batch_id);
-                    apply_kernel<StopType, n_shared_total, sg_kernel_all>(
+                    apply_kernel<StopType, n_shared_total>(
                         sconf, max_iters, res_tol, logger, prec,
                         mat_global_entry, b_global_entry, x_global_entry,
                         num_rows, mat.get_single_item_num_nnz(),
@@ -197,67 +198,67 @@ class KernelCaller {
         // launch_apply_kernel<StopType, subgroup_size, n_shared_total,
         // sg_kernel_all>
         if (num_rows <= 32 && n_shared_total == 10) {
-            launch_apply_kernel<StopType, 32, 10, true>(
+            launch_apply_kernel<StopType, 32, 10>(
                 sconf, logger, prec, mat, b.values, x.values, workspace_data,
                 group_size, shared_size);
         } else if (num_rows <= 256 && n_shared_total == 10) {
-            launch_apply_kernel<StopType, 32, 10, true>(
+            launch_apply_kernel<StopType, 32, 10>(
                 sconf, logger, prec, mat, b.values, x.values, workspace_data,
                 group_size, shared_size);
         } else {
             switch (n_shared_total) {
             case 0:
-                launch_apply_kernel<StopType, 32, 0, true>(
+                launch_apply_kernel<StopType, 32, 0>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, group_size, shared_size);
                 break;
             case 1:
-                launch_apply_kernel<StopType, 32, 1, true>(
+                launch_apply_kernel<StopType, 32, 1>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, group_size, shared_size);
                 break;
             case 2:
-                launch_apply_kernel<StopType, 32, 2, true>(
+                launch_apply_kernel<StopType, 32, 2>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, group_size, shared_size);
                 break;
             case 3:
-                launch_apply_kernel<StopType, 32, 3, true>(
+                launch_apply_kernel<StopType, 32, 3>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, group_size, shared_size);
                 break;
             case 4:
-                launch_apply_kernel<StopType, 32, 4, true>(
+                launch_apply_kernel<StopType, 32, 4>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, group_size, shared_size);
                 break;
             case 5:
-                launch_apply_kernel<StopType, 32, 5, true>(
+                launch_apply_kernel<StopType, 32, 5>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, group_size, shared_size);
                 break;
             case 6:
-                launch_apply_kernel<StopType, 32, 6, true>(
+                launch_apply_kernel<StopType, 32, 6>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, group_size, shared_size);
                 break;
             case 7:
-                launch_apply_kernel<StopType, 32, 7, true>(
+                launch_apply_kernel<StopType, 32, 7>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, group_size, shared_size);
                 break;
             case 8:
-                launch_apply_kernel<StopType, 32, 8, true>(
+                launch_apply_kernel<StopType, 32, 8>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, group_size, shared_size);
                 break;
             case 9:
-                launch_apply_kernel<StopType, 32, 9, true>(
+                launch_apply_kernel<StopType, 32, 9>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, group_size, shared_size);
                 break;
             case 10:
-                launch_apply_kernel<StopType, 32, 10, true>(
+                launch_apply_kernel<StopType, 32, 10>(
                     sconf, logger, prec, mat, b.values, x.values,
                     workspace_data, group_size, shared_size);
                 break;
diff --git a/dpcpp/solver/batch_bicgstab_kernels.hpp.inc b/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
index 636227973a8..03f8ea31165 100644
--- a/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
+++ b/dpcpp/solver/batch_bicgstab_kernels.hpp.inc
@@ -30,8 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-template <const bool sg_kernel_all, typename BatchMatrixType_entry,
-          typename ValueType>
+template <typename BatchMatrixType_entry, typename ValueType>
 __dpct_inline__ void initialize(
     const int num_rows, const BatchMatrixType_entry& mat_global_entry,
     const ValueType* const b_global_entry,
@@ -68,17 +67,12 @@ __dpct_inline__ void initialize(
                           r_shared_entry, item_ct1);
     item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
-    if constexpr (sg_kernel_all) {
-        if (sg_id == 0) {
-            single_rhs_compute_norm2_sg(num_rows, r_shared_entry, res_norm,
-                                        item_ct1);
-        } else if (sg_id == 1) {
-            single_rhs_compute_norm2_sg(num_rows, b_global_entry, rhs_norm,
-                                        item_ct1);
-        }
-    } else {
-        single_rhs_compute_norm2(num_rows, r_shared_entry, res_norm, item_ct1);
-        single_rhs_compute_norm2(num_rows, b_global_entry, rhs_norm, item_ct1);
+    if (sg_id == 0) {
+        single_rhs_compute_norm2_sg(num_rows, r_shared_entry, res_norm,
+                                    item_ct1);
+    } else if (sg_id == 1) {
+        single_rhs_compute_norm2_sg(num_rows, b_global_entry, rhs_norm,
+                                    item_ct1);
     }
     item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
@@ -111,7 +105,7 @@ __dpct_inline__ void update_p(const int num_rows, const ValueType& rho_new,
 }
 
 
-template <const bool sg_kernel_all, typename ValueType>
+template <typename ValueType>
 __dpct_inline__ void compute_alpha(const int num_rows, const ValueType& rho_new,
                                    const ValueType* const r_hat_shared_entry,
                                    const ValueType* const v_shared_entry,
@@ -120,23 +114,15 @@ __dpct_inline__ void compute_alpha(const int num_rows, const ValueType& rho_new,
     auto sg = item_ct1.get_sub_group();
     const auto sg_id = sg.get_group_id();
     const auto tid = item_ct1.get_local_linear_id();
-    if constexpr (sg_kernel_all) {
-        if (sg_id == 0) {
-            single_rhs_compute_conj_dot_sg(num_rows, r_hat_shared_entry,
-                                           v_shared_entry, alpha, item_ct1);
-        }
-        item_ct1.barrier(sycl::access::fence_space::global_and_local);
-        if (tid == 0) {
-            alpha = rho_new / alpha;
-        }
-        item_ct1.barrier(sycl::access::fence_space::global_and_local);
-    } else {
-        single_rhs_compute_conj_dot(num_rows, r_hat_shared_entry,
-                                    v_shared_entry, alpha, item_ct1);
-        if (tid == 0) {
-            alpha = rho_new / alpha;
-        }
+    if (sg_id == 0) {
+        single_rhs_compute_conj_dot_sg(num_rows, r_hat_shared_entry,
+                                       v_shared_entry, alpha, item_ct1);
     }
+    item_ct1.barrier(sycl::access::fence_space::global_and_local);
+    if (tid == 0) {
+        alpha = rho_new / alpha;
+    }
+    item_ct1.barrier(sycl::access::fence_space::global_and_local);
 }
 
 
@@ -155,7 +141,7 @@ __dpct_inline__ void update_s(const int num_rows,
 }
 
 
-template <const bool sg_kernel_all, typename ValueType>
+template <typename ValueType>
 __dpct_inline__ void compute_omega(const int num_rows,
                                    const ValueType* const t_shared_entry,
                                    const ValueType* const s_shared_entry,
@@ -165,28 +151,18 @@ __dpct_inline__ void compute_omega(const int num_rows,
     auto sg = item_ct1.get_sub_group();
     const auto sg_id = sg.get_group_id();
     const auto tid = item_ct1.get_local_linear_id();
-    if constexpr (sg_kernel_all) {
-        if (sg_id == 0) {
-            single_rhs_compute_conj_dot_sg(num_rows, t_shared_entry,
-                                           s_shared_entry, omega, item_ct1);
-        } else if (sg_id == 1) {
-            single_rhs_compute_conj_dot_sg(num_rows, t_shared_entry,
-                                           t_shared_entry, temp, item_ct1);
-        }
-        item_ct1.barrier(sycl::access::fence_space::global_and_local);
-        if (tid == 0) {
-            omega /= temp;
-        }
-        item_ct1.barrier(sycl::access::fence_space::global_and_local);
-    } else {
-        single_rhs_compute_conj_dot(num_rows, t_shared_entry, s_shared_entry,
-                                    omega, item_ct1);
-        single_rhs_compute_conj_dot(num_rows, t_shared_entry, t_shared_entry,
-                                    temp, item_ct1);
-        if (tid == 0) {
-            omega /= temp;
-        }
+    if (sg_id == 0) {
+        single_rhs_compute_conj_dot_sg(num_rows, t_shared_entry, s_shared_entry,
+                                       omega, item_ct1);
+    } else if (sg_id == 1) {
+        single_rhs_compute_conj_dot_sg(num_rows, t_shared_entry, t_shared_entry,
+                                       temp, item_ct1);
+    }
+    item_ct1.barrier(sycl::access::fence_space::global_and_local);
+    if (tid == 0) {
+        omega /= temp;
     }
+    item_ct1.barrier(sycl::access::fence_space::global_and_local);
 }
 
 
@@ -220,9 +196,8 @@ __dpct_inline__ void update_x_middle(const int num_rows, const ValueType& alpha,
 }
 
 
-template <typename StopType, const int n_shared_total, const bool sg_kernel_all,
-          typename PrecType, typename LogType, typename BatchMatrixType,
-          typename ValueType>
+template <typename StopType, const int n_shared_total, typename PrecType,
+          typename LogType, typename BatchMatrixType, typename ValueType>
 void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
                   const int max_iter, const gko::remove_complex<ValueType> tol,
                   LogType logger, PrecType prec_shared,
@@ -344,10 +319,10 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
     // p = 0
     // p_hat = 0
     // v = 0
-    initialize<sg_kernel_all>(num_rows, mat_global_entry, b_global_entry,
-                              x_global_entry, rho_old_sh[0], omega_sh[0],
-                              alpha_sh[0], x_sh, r_sh, r_hat_sh, p_sh, p_hat_sh,
-                              v_sh, norms_rhs_sh[0], norms_res_sh[0], item_ct1);
+    initialize(num_rows, mat_global_entry, b_global_entry, x_global_entry,
+               rho_old_sh[0], omega_sh[0], alpha_sh[0], x_sh, r_sh, r_hat_sh,
+               p_sh, p_hat_sh, v_sh, norms_rhs_sh[0], norms_res_sh[0],
+               item_ct1);
     item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
     // stopping criterion object
@@ -361,16 +336,11 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
         }
 
         // rho_new =  < r_hat , r > = (r_hat)' * (r)
-        if constexpr (sg_kernel_all) {
-            if (sg_id == 0) {
-                single_rhs_compute_conj_dot_sg(num_rows, r_hat_sh, r_sh,
-                                               rho_new_sh[0], item_ct1);
-            }
-            item_ct1.barrier(sycl::access::fence_space::global_and_local);
-        } else {
-            single_rhs_compute_conj_dot(num_rows, r_hat_sh, r_sh, rho_new_sh[0],
-                                        item_ct1);
+        if (sg_id == 0) {
+            single_rhs_compute_conj_dot_sg(num_rows, r_hat_sh, r_sh,
+                                           rho_new_sh[0], item_ct1);
         }
+        item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
         // beta = (rho_new / rho_old)*(alpha / omega)
         // p = r + beta*(p - omega * v)
@@ -387,8 +357,8 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
         item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
         // alpha = rho_new / < r_hat , v>
-        compute_alpha<sg_kernel_all>(num_rows, rho_new_sh[0], r_hat_sh, v_sh,
-                                     alpha_sh[0], item_ct1);
+        compute_alpha(num_rows, rho_new_sh[0], r_hat_sh, v_sh, alpha_sh[0],
+                      item_ct1);
         item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
         // s = r - alpha*v
@@ -396,15 +366,11 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
         item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
         // an estimate of residual norms
-        if constexpr (sg_kernel_all) {
-            if (sg_id == 0) {
-                single_rhs_compute_norm2_sg(num_rows, s_sh, norms_res_sh[0],
-                                            item_ct1);
-            }
-            item_ct1.barrier(sycl::access::fence_space::global_and_local);
-        } else {
-            single_rhs_compute_norm2(num_rows, s_sh, norms_res_sh[0], item_ct1);
+        if (sg_id == 0) {
+            single_rhs_compute_norm2_sg(num_rows, s_sh, norms_res_sh[0],
+                                        item_ct1);
         }
+        item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
         if (stop.check_converged(norms_res_sh)) {
             update_x_middle(num_rows, alpha_sh[0], p_hat_sh, x_sh, item_ct1);
@@ -421,8 +387,7 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
         item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
         // omega = <t,s> / <t,t>
-        compute_omega<sg_kernel_all>(num_rows, t_sh, s_sh, temp_sh[0],
-                                     omega_sh[0], item_ct1);
+        compute_omega(num_rows, t_sh, s_sh, temp_sh[0], omega_sh[0], item_ct1);
         item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
         // x = x + alpha*p_hat + omega *s_hat
@@ -431,18 +396,13 @@ void apply_kernel(const gko::kernels::batch_bicgstab::storage_config sconf,
                        s_sh, t_sh, x_sh, r_sh, item_ct1);
         item_ct1.barrier(sycl::access::fence_space::global_and_local);
 
-        if constexpr (sg_kernel_all) {
-            if (sg_id == 0)
-                single_rhs_compute_norm2_sg(num_rows, r_sh, norms_res_sh[0],
-                                            item_ct1);
-            if (tid == group_size - 1) {
-                rho_old_sh[0] = rho_new_sh[0];
-            }
-            item_ct1.barrier(sycl::access::fence_space::global_and_local);
-        } else {
-            single_rhs_compute_norm2(num_rows, r_sh, norms_res_sh[0], item_ct1);
+        if (sg_id == 0)
+            single_rhs_compute_norm2_sg(num_rows, r_sh, norms_res_sh[0],
+                                        item_ct1);
+        if (tid == group_size - 1) {
             rho_old_sh[0] = rho_new_sh[0];
         }
+        item_ct1.barrier(sycl::access::fence_space::global_and_local);
     }
 
     logger.log_iteration(batch_id, iter, norms_res_sh[0]);
diff --git a/include/ginkgo/core/solver/batch_solver_base.hpp b/include/ginkgo/core/solver/batch_solver_base.hpp
index 8f534753bf8..cd4ae8d1590 100644
--- a/include/ginkgo/core/solver/batch_solver_base.hpp
+++ b/include/ginkgo/core/solver/batch_solver_base.hpp
@@ -177,18 +177,9 @@ class BatchSolver {
 };
 
 
-/**
- * The parameter type shared between all preconditioned iterative solvers,
- * excluding the parameters available in iterative_solver_factory_parameters.
- * @see GKO_CREATE_FACTORY_PARAMETERS
- */
-struct preconditioned_iterative_solver_factory_parameters {};
-
-
 template <typename Parameters, typename Factory>
 struct enable_preconditioned_iterative_solver_factory_parameters
     : enable_parameters_type<Parameters, Factory> {
-    using parameters_type = Parameters;
     /**
      * Default maximum number iterations allowed.
      *
diff --git a/include/ginkgo/core/solver/solver_base.hpp b/include/ginkgo/core/solver/solver_base.hpp
index cd0043c7b44..070cc4e6b4a 100644
--- a/include/ginkgo/core/solver/solver_base.hpp
+++ b/include/ginkgo/core/solver/solver_base.hpp
@@ -856,7 +856,6 @@ class EnablePreconditionedIterativeSolver
 template <typename Parameters, typename Factory>
 struct enable_iterative_solver_factory_parameters
     : enable_parameters_type<Parameters, Factory> {
-    using parameters_type = Parameters;
     /**
      * Stopping criteria to be used by the solver.
      */
@@ -868,8 +867,6 @@ struct enable_iterative_solver_factory_parameters
 template <typename Parameters, typename Factory>
 struct enable_preconditioned_iterative_solver_factory_parameters
     : enable_iterative_solver_factory_parameters<Parameters, Factory> {
-    using parameters_type = Parameters;
-
     /**
      * The preconditioner to be used by the iterative solver. By default, no
      * preconditioner is used.

From c2d959addd5b2839d41c83e39d30bcbb05367e4b Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Sun, 5 Nov 2023 16:20:56 +0000
Subject: [PATCH 548/583] Format files

Co-authored-by: Pratik Nayak <pratikvn@pm.me>
---
 dpcpp/solver/batch_bicgstab_kernels.dp.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
index 8f0a334e6ac..9e353734f36 100644
--- a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
+++ b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp
@@ -118,10 +118,9 @@ class KernelCaller {
                 slm_values(sycl::range<1>(shared_size), cgh);
 
             cgh.parallel_for(
-                sycl_nd_range(grid, block), [=
-            ](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(
-                                                subgroup_size)]] [
-                                                [intel::kernel_args_restrict]] {
+                sycl_nd_range(grid, block),
+                [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(
+                    subgroup_size)]] [[intel::kernel_args_restrict]] {
                     auto batch_id = item_ct1.get_group_linear_id();
                     const auto mat_global_entry =
                         gko::batch::matrix::extract_batch_item(mat, batch_id);

From 994d3532a2ad5400d8270db374b44ad1505c635d Mon Sep 17 00:00:00 2001
From: Fritz Goebel <fritz.goebel@kit.edu>
Date: Tue, 22 Feb 2022 10:30:28 -0500
Subject: [PATCH 549/583] Add first MC64 kernels and test examples

---
 core/reorder/mc64_kernels.hpp           |  82 ++++++++++
 reference/CMakeLists.txt                |   1 +
 reference/reorder/mc64_kernels.cpp      | 201 ++++++++++++++++++++++++
 reference/test/reorder/CMakeLists.txt   |   1 +
 reference/test/reorder/mc64_kernels.cpp | 120 ++++++++++++++
 5 files changed, 405 insertions(+)
 create mode 100644 core/reorder/mc64_kernels.hpp
 create mode 100644 reference/reorder/mc64_kernels.cpp
 create mode 100644 reference/test/reorder/mc64_kernels.cpp

diff --git a/core/reorder/mc64_kernels.hpp b/core/reorder/mc64_kernels.hpp
new file mode 100644
index 00000000000..4022db4a553
--- /dev/null
+++ b/core/reorder/mc64_kernels.hpp
@@ -0,0 +1,82 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CORE_REORDER_MC64_KERNELS_HPP_
+#define GKO_CORE_REORDER_MC64_KERNELS_HPP_
+
+
+#include <memory>
+
+
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/matrix/csr.hpp>
+
+
+#include "core/base/kernel_declaration.hpp"
+
+
+namespace gko {
+namespace kernels {
+
+
+#define GKO_DECLARE_MC64_INITIALIZE_WEIGHTS_KERNEL(ValueType, IndexType)  \
+    void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,  \
+                            const matrix::Csr<ValueType, IndexType>* mtx, \
+                            Array<remove_complex<ValueType>>& workspace)
+
+
+#define GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL(ValueType, IndexType)    \
+    void initial_matching(                                                \
+        std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,  \
+        const IndexType* row_ptrs, const IndexType* col_idxs,             \
+        const Array<ValueType>& workspace, Array<IndexType>& permutation, \
+        Array<IndexType>& inv_permutation)
+
+#define GKO_DECLARE_ALL_AS_TEMPLATES                                  \
+    template <typename ValueType, typename IndexType>                 \
+    GKO_DECLARE_MC64_INITIALIZE_WEIGHTS_KERNEL(ValueType, IndexType); \
+    template <typename ValueType, typename IndexType>                 \
+    GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL(ValueType, IndexType)
+
+
+GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(mc64, GKO_DECLARE_ALL_AS_TEMPLATES);
+
+
+#undef GKO_DECLARE_ALL_AS_TEMPLATES
+
+
+}  // namespace kernels
+}  // namespace gko
+
+
+#endif  // GKO_CORE_REORDER_MC64_KERNELS_HPP_
diff --git a/reference/CMakeLists.txt b/reference/CMakeLists.txt
index f8dff69723b..86497f43619 100644
--- a/reference/CMakeLists.txt
+++ b/reference/CMakeLists.txt
@@ -42,6 +42,7 @@ target_sources(ginkgo_reference
     multigrid/pgm_kernels.cpp
     preconditioner/isai_kernels.cpp
     preconditioner/jacobi_kernels.cpp
+    reorder/mc64_kernels.cpp
     reorder/rcm_kernels.cpp
     solver/batch_bicgstab_kernels.cpp
     solver/bicg_kernels.cpp
diff --git a/reference/reorder/mc64_kernels.cpp b/reference/reorder/mc64_kernels.cpp
new file mode 100644
index 00000000000..2e631ac7351
--- /dev/null
+++ b/reference/reorder/mc64_kernels.cpp
@@ -0,0 +1,201 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/reorder/mc64_kernels.hpp"
+
+
+#include <algorithm>
+#include <iterator>
+#include <memory>
+#include <queue>
+#include <utility>
+#include <vector>
+
+
+#include <ginkgo/config.hpp>
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/matrix/coo.hpp>
+#include <ginkgo/core/matrix/csr.hpp>
+#include <ginkgo/core/matrix/permutation.hpp>
+#include <ginkgo/core/matrix/sparsity_csr.hpp>
+
+
+#include "core/base/allocator.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace reference {
+/**
+ * @brief The reordering namespace.
+ *
+ * @ingroup reorder
+ */
+namespace mc64 {
+
+
+template <typename ValueType, typename IndexType>
+void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
+                        const matrix::Csr<ValueType, IndexType>* mtx,
+                        Array<remove_complex<ValueType>>& workspace)
+{
+    constexpr auto inf =
+        std::numeric_limits<remove_complex<ValueType>>::infinity();
+    const auto nnz = mtx->get_num_stored_elements();
+    const auto num_rows = mtx->get_size()[0];
+    const auto row_ptrs = mtx->get_const_row_ptrs();
+    const auto col_idxs = mtx->get_const_col_idxs();
+    const auto values = mtx->get_const_values();
+    auto weight = [](ValueType a) { return abs(a); };
+    workspace.resize_and_reset(nnz + 2 * num_rows);
+    auto weights = workspace.get_data();
+    auto u = weights + nnz;
+    auto v = u + num_rows;
+    for (IndexType col = 0; col < num_rows; col++) {
+        u[col] = inf;
+        v[col] = zero<IndexType>();
+    }
+
+    for (IndexType row = 0; row < num_rows; row++) {
+        const auto row_begin = row_ptrs[row];
+        const auto row_end = row_ptrs[row + 1];
+        auto row_max = zero<remove_complex<ValueType>>();
+        for (IndexType idx = row_begin; idx < row_end; idx++) {
+            const auto w = abs(values[idx]);
+            if (w > row_max) row_max = w;
+        }
+
+        for (IndexType idx = row_begin; idx < row_end; idx++) {
+            const auto c = weight(row_max) - weight(values[idx]);
+            weights[idx] = c;
+            const auto col = col_idxs[idx];
+            if (c < u[col]) u[col] = c;
+        }
+    }
+
+    // TODO: check if this really is not necessary
+    /*for (IndexType row = 0; row < num_rows; row++) {
+        const auto row_begin = row_ptrs[row];
+        const auto row_end = row_ptrs[row + 1];
+        auto row_min = inf;
+        for (IndexType idx = row_begin; idx < row_end; idx++) {
+            const auto c = weights[idx] - u[col_idxs[idx]];
+            if (c < row_min) row_min = c;
+        }
+        v[row] = row_min;
+    }*/
+}
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_INITIALIZE_WEIGHTS_KERNEL);
+
+
+// Assume -1 in permutation and inv_permutation
+template <typename ValueType, typename IndexType>
+void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
+                      size_type num_rows, const IndexType* row_ptrs,
+                      const IndexType* col_idxs,
+                      const Array<ValueType>& workspace,
+                      Array<IndexType>& permutation,
+                      Array<IndexType>& inv_permutation)
+{
+    const auto nnz = row_ptrs[num_rows];
+    const auto c = workspace.get_const_data();
+    const auto u = c + nnz;
+    const auto v = u + num_rows;
+    auto weight = [c, u, v](IndexType row, IndexType col, IndexType idx) {
+        return c[idx] - u[col] - v[row];
+    };
+    auto p = permutation.get_data();
+    auto ip = inv_permutation.get_data();
+
+    // For each row, look for an unmatched column col for which weight(row, col)
+    // = 0. If one is found, add the edge (row, col) to the matching and move on
+    // to the next row.
+    for (IndexType row = 0; row < num_rows; row++) {
+        const auto row_begin = row_ptrs[row];
+        const auto row_end = row_ptrs[row + 1];
+        for (IndexType idx = row_begin; idx < row_end; idx++) {
+            const auto col = col_idxs[idx];
+            if (weight(row, col, idx) == zero<ValueType>() && ip[col] == -1) {
+                p[row] = col;
+                ip[col] = row;
+                break;
+            }
+        }
+    }
+
+    // For remaining unmatched rows, look for a matched column with weight(row,
+    // col) = 0 that is matched to another row, row_1. If there is another
+    // column col_1 with weight(row_1, col_1) = 0 that is not yet matched,
+    // replace the matched edge (row_1, col) with the two new matched edges
+    // (row, col) and (row_1, col_1).
+    for (IndexType row = 0; row < num_rows; row++) {
+        if (p[row] == -1) {
+            const auto row_begin = row_ptrs[row];
+            const auto row_end = row_ptrs[row + 1];
+            for (IndexType idx = row_begin; idx < row_end; idx++) {
+                const auto col = col_idxs[idx];
+                if (weight(row, col, idx) == zero<ValueType>()) {
+                    const auto row_1 = ip[col];
+                    const auto row_1_begin = row_ptrs[row_1];
+                    const auto row_1_end = row_ptrs[row_1 + 1];
+                    bool found = false;
+                    for (IndexType idx_1 = row_1_begin; idx_1 < row_1_end;
+                         idx_1++) {
+                        const auto col_1 = col_idxs[idx_1];
+                        if (weight(row_1, col_1, idx_1) == zero<ValueType>() &&
+                            ip[col_1] == -1) {
+                            p[row] = col;
+                            ip[col] = row;
+                            p[row_1] = col_1;
+                            ip[col_1] = row_1;
+                            found = true;
+                            break;
+                        }
+                    }
+                    if (found) break;
+                }
+            }
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL);
+
+}  // namespace mc64
+}  // namespace reference
+}  // namespace kernels
+}  // namespace gko
diff --git a/reference/test/reorder/CMakeLists.txt b/reference/test/reorder/CMakeLists.txt
index 766decfb749..c102beafc36 100644
--- a/reference/test/reorder/CMakeLists.txt
+++ b/reference/test/reorder/CMakeLists.txt
@@ -3,4 +3,5 @@ if(GINKGO_HAVE_METIS)
 endif()
 ginkgo_create_test(rcm)
 ginkgo_create_test(rcm_kernels)
+ginkgo_create_test(mc64_kernels)
 ginkgo_create_test(scaled_reordered)
diff --git a/reference/test/reorder/mc64_kernels.cpp b/reference/test/reorder/mc64_kernels.cpp
new file mode 100644
index 00000000000..dc79cf67d51
--- /dev/null
+++ b/reference/test/reorder/mc64_kernels.cpp
@@ -0,0 +1,120 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <algorithm>
+#include <memory>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/matrix/csr.hpp>
+
+
+#include "core/reorder/mc64_kernels.hpp"
+#include "core/test/utils.hpp"
+#include "core/test/utils/assertions.hpp"
+
+
+namespace {
+
+
+template <typename ValueIndexType>
+class Mc64 : public ::testing::Test {
+protected:
+    using value_type =
+        typename std::tuple_element<0, decltype(ValueIndexType())>::type;
+    using index_type =
+        typename std::tuple_element<1, decltype(ValueIndexType())>::type;
+    using real_type = gko::remove_complex<value_type>;
+    using matrix_type = gko::matrix::Csr<value_type, index_type>;
+
+    Mc64()
+        : ref(gko::ReferenceExecutor::create()),
+          tmp{ref},
+          mtx(gko::initialize<matrix_type>({{1., 2., 0., 0., 3., 0.},
+                                            {5., 1., 0., 0., 0., 0.},
+                                            {0., 0., 0., 6., 0., 4.},
+                                            {0., 0., 4., 0., 0., 3.},
+                                            {0., 0., 0., 4., 2., 0.},
+                                            {0., 5., 8., 0., 0., 0.}},
+                                           ref)),
+          expected_workspace{
+              ref,
+              I<real_type>({2., 1., 0., 0., 4., 0., 2., 0., 1., 0., 2., 3., 0.,
+                            0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.})},
+          expected_perm{ref, I<index_type>({1, 0, 3, 5, -1, 2})},
+          expected_inv_perm{ref, I<index_type>({1, 0, 5, 2, -1, 3})}
+    {}
+
+    std::shared_ptr<const gko::ReferenceExecutor> ref;
+    gko::Array<real_type> tmp;
+    std::shared_ptr<matrix_type> mtx;
+    gko::Array<real_type> expected_workspace;
+    gko::Array<index_type> expected_perm;
+    gko::Array<index_type> expected_inv_perm;
+};
+
+TYPED_TEST_SUITE(Mc64, gko::test::ValueIndexTypes, PairTypenameNameGenerator);
+
+
+TYPED_TEST(Mc64, InitializeWeightsExample)
+{
+    using matrix_type = typename TestFixture::matrix_type;
+    using real_type = typename TestFixture::real_type;
+
+    gko::kernels::reference::mc64::initialize_weights(
+        this->ref, this->mtx.get(), this->tmp);
+
+    GKO_ASSERT_ARRAY_EQ(this->tmp, this->expected_workspace);
+}
+
+
+TYPED_TEST(Mc64, InitialMatchingExample)
+{
+    using index_type = typename TestFixture::index_type;
+    gko::Array<index_type> p{this->ref,
+                             I<index_type>({-1, -1, -1, -1, -1, -1})};
+    gko::Array<index_type> ip{this->ref,
+                              I<index_type>({-1, -1, -1, -1, -1, -1})};
+
+    gko::kernels::reference::mc64::initial_matching(
+        this->ref, this->mtx->get_size()[0], this->mtx->get_const_row_ptrs(),
+        this->mtx->get_const_col_idxs(), this->expected_workspace, p, ip);
+
+    GKO_ASSERT_ARRAY_EQ(p, this->expected_perm);
+    GKO_ASSERT_ARRAY_EQ(ip, this->expected_inv_perm);
+}
+
+
+}  // namespace

From 21bb17af0a379e9dc33a8f1dc48a4717cd4ca90c Mon Sep 17 00:00:00 2001
From: Fritz Goebel <fritz.goebel@kit.edu>
Date: Fri, 25 Feb 2022 10:24:16 -0500
Subject: [PATCH 550/583] First working implementation of the permutation part
 of MC64

---
 core/CMakeLists.txt                      |   1 +
 core/device_hooks/common_kernels.inc.cpp |  16 +++
 core/reorder/mc64.cpp                    | 116 ++++++++++++++++
 core/reorder/mc64_kernels.hpp            |  17 ++-
 cuda/CMakeLists.txt                      |   1 +
 cuda/reorder/mc64_kernels.cu             |  91 +++++++++++++
 dpcpp/CMakeLists.txt                     |   1 +
 dpcpp/reorder/mc64_kernels.dp.cpp        |  91 +++++++++++++
 hip/CMakeLists.txt                       |   1 +
 hip/reorder/mc64_kernels.hip.cpp         |  91 +++++++++++++
 include/ginkgo/core/reorder/mc64.hpp     | 164 +++++++++++++++++++++++
 omp/CMakeLists.txt                       |   1 +
 omp/reorder/mc64_kernels.cpp             |  91 +++++++++++++
 reference/reorder/mc64_kernels.cpp       | 155 +++++++++++++++++----
 reference/test/reorder/mc64_kernels.cpp  | 102 +++++++++++++-
 15 files changed, 911 insertions(+), 28 deletions(-)
 create mode 100644 core/reorder/mc64.cpp
 create mode 100644 cuda/reorder/mc64_kernels.cu
 create mode 100644 dpcpp/reorder/mc64_kernels.dp.cpp
 create mode 100644 hip/reorder/mc64_kernels.hip.cpp
 create mode 100644 include/ginkgo/core/reorder/mc64.hpp
 create mode 100644 omp/reorder/mc64_kernels.cpp

diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
index 1d63ed7f62e..ce4a52037b9 100644
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -62,6 +62,7 @@ target_sources(ginkgo
     preconditioner/isai.cpp
     preconditioner/jacobi.cpp
     reorder/amd.cpp
+    reorder/mc64.cpp
     reorder/rcm.cpp
     reorder/scaled_reordered.cpp
     solver/batch_bicgstab.cpp
diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp
index a88cf4c790d..80013c89a06 100644
--- a/core/device_hooks/common_kernels.inc.cpp
+++ b/core/device_hooks/common_kernels.inc.cpp
@@ -74,6 +74,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/multigrid/pgm_kernels.hpp"
 #include "core/preconditioner/isai_kernels.hpp"
 #include "core/preconditioner/jacobi_kernels.hpp"
+#include "core/reorder/mc64_kernels.hpp"
 #include "core/reorder/rcm_kernels.hpp"
 #include "core/solver/batch_bicgstab_kernels.hpp"
 #include "core/solver/bicg_kernels.hpp"
@@ -906,6 +907,21 @@ GKO_STUB_VALUE_AND_INDEX_TYPE(
 
 
 }  // namespace par_ilut_factorization
+
+
+namespace mc64 {
+
+
+GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_MC64_INITIALIZE_WEIGHTS_KERNEL);
+GKO_STUB_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL);
+GKO_STUB_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL);
+
+
+}  // namespace mc64
+
+
 namespace rcm {
 
 
diff --git a/core/reorder/mc64.cpp b/core/reorder/mc64.cpp
new file mode 100644
index 00000000000..519686af253
--- /dev/null
+++ b/core/reorder/mc64.cpp
@@ -0,0 +1,116 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/reorder/mc64.hpp>
+
+
+#include <memory>
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/polymorphic_object.hpp>
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/base/utils.hpp>
+#include <ginkgo/core/matrix/csr.hpp>
+#include <ginkgo/core/matrix/permutation.hpp>
+#include <ginkgo/core/matrix/sparsity_csr.hpp>
+
+
+#include "core/matrix/csr_kernels.hpp"
+#include "core/reorder/mc64_kernels.hpp"
+
+
+namespace gko {
+namespace reorder {
+namespace mc64 {
+namespace {
+
+
+GKO_REGISTER_OPERATION(initialize_weights, mc64::initialize_weights);
+GKO_REGISTER_OPERATION(initial_matching, mc64::initial_matching);
+GKO_REGISTER_OPERATION(shortest_augmenting_path,
+                       mc64::shortest_augmenting_path);
+
+
+}  // anonymous namespace
+}  // namespace mc64
+
+
+template <typename ValueType, typename IndexType>
+void Mc64<ValueType, IndexType>::generate(
+    std::shared_ptr<const Executor>& exec,
+    std::shared_ptr<LinOp> system_matrix) const
+{
+    auto mtx = as<matrix_type>(system_matrix);
+    size_type num_rows = mtx->get_size()[0];
+
+    Array<remove_complex<ValueType>> workspace{exec};
+    Array<IndexType> permutation{exec, num_rows};
+    Array<IndexType> inv_permutation{exec, num_rows};
+    permutation.fill(-one<IndexType>());
+    inv_permutation.fill(-one<IndexType>());
+    const auto row_ptrs = mtx->get_const_row_ptrs();
+    const auto col_idxs = mtx->get_const_col_idxs();
+
+    exec->run(mc64::make_initialize_weights(mtx.get(), workspace));
+
+    std::list<IndexType> unmatched_rows{};
+    exec->run(mc64::make_initial_matching(num_rows, row_ptrs, col_idxs,
+                                          workspace, permutation,
+                                          inv_permutation, unmatched_rows));
+
+    Array<IndexType> parents{exec, num_rows};
+    for (auto root : unmatched_rows) {
+        exec->run(mc64::make_shortest_augmenting_path(
+            num_rows, row_ptrs, col_idxs, workspace, permutation,
+            inv_permutation, root, parents));
+    }
+
+    permutation_->copy_from(
+        PermutationMatrix::create(exec, system_matrix->get_size(), permutation)
+            .get());
+    inv_permutation_->copy_from(
+        share(PermutationMatrix::create(exec, system_matrix->get_size(),
+                                        inv_permutation,
+                                        matrix::column_permute))
+            .get());
+}
+
+
+#define GKO_DECLARE_MC64(ValueType, IndexType) class Mc64<ValueType, IndexType>
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_MC64);
+
+
+}  // namespace reorder
+}  // namespace gko
diff --git a/core/reorder/mc64_kernels.hpp b/core/reorder/mc64_kernels.hpp
index 4022db4a553..8ec020046eb 100644
--- a/core/reorder/mc64_kernels.hpp
+++ b/core/reorder/mc64_kernels.hpp
@@ -34,6 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define GKO_CORE_REORDER_MC64_KERNELS_HPP_
 
 
+#include <list>
 #include <memory>
 
 
@@ -60,13 +61,25 @@ namespace kernels {
         std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,  \
         const IndexType* row_ptrs, const IndexType* col_idxs,             \
         const Array<ValueType>& workspace, Array<IndexType>& permutation, \
-        Array<IndexType>& inv_permutation)
+        Array<IndexType>& inv_permutation,                                \
+        std::list<IndexType>& unmatched_rows)
+
+
+#define GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL(ValueType, IndexType) \
+    void shortest_augmenting_path(                                             \
+        std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,       \
+        const IndexType* row_ptrs, const IndexType* col_idxs,                  \
+        Array<ValueType>& workspace, Array<IndexType>& permutation,            \
+        Array<IndexType>& inv_permutation, IndexType root,                     \
+        Array<IndexType>& parents)
 
 #define GKO_DECLARE_ALL_AS_TEMPLATES                                  \
     template <typename ValueType, typename IndexType>                 \
     GKO_DECLARE_MC64_INITIALIZE_WEIGHTS_KERNEL(ValueType, IndexType); \
     template <typename ValueType, typename IndexType>                 \
-    GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL(ValueType, IndexType)
+    GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL(ValueType, IndexType);   \
+    template <typename ValueType, typename IndexType>                 \
+    GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL(ValueType, IndexType)
 
 
 GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(mc64, GKO_DECLARE_ALL_AS_TEMPLATES);
diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt
index 1efa8192aeb..b7340106f7d 100644
--- a/cuda/CMakeLists.txt
+++ b/cuda/CMakeLists.txt
@@ -55,6 +55,7 @@ target_sources(ginkgo_cuda
     preconditioner/jacobi_generate_kernel.cu
     preconditioner/jacobi_kernels.cu
     preconditioner/jacobi_simple_apply_kernel.cu
+    reorder/mc64_kernels.cu
     reorder/rcm_kernels.cu
     solver/batch_bicgstab_kernels.cu
     solver/cb_gmres_kernels.cu
diff --git a/cuda/reorder/mc64_kernels.cu b/cuda/reorder/mc64_kernels.cu
new file mode 100644
index 00000000000..837ad5cb0d7
--- /dev/null
+++ b/cuda/reorder/mc64_kernels.cu
@@ -0,0 +1,91 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/reorder/mc64_kernels.hpp"
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/matrix/csr.hpp>
+
+
+namespace gko {
+namespace kernels {
+namespace cuda {
+/**
+ * @brief The reordering namespace.
+ *
+ * @ingroup reorder
+ */
+namespace mc64 {
+
+
+template <typename ValueType, typename IndexType>
+void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
+                        const matrix::Csr<ValueType, IndexType>* mtx,
+                        Array<remove_complex<ValueType>>& workspace)
+    GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_INITIALIZE_WEIGHTS_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
+                      size_type num_rows, const IndexType* row_ptrs,
+                      const IndexType* col_idxs,
+                      const Array<ValueType>& workspace,
+                      Array<IndexType>& permutation,
+                      Array<IndexType>& inv_permutation,
+                      std::list<IndexType>& unmatched_rows) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void shortest_augmenting_path(std::shared_ptr<const DefaultExecutor> exec,
+                              size_type num_rows, const IndexType* row_ptrs,
+                              const IndexType* col_idxs,
+                              Array<ValueType>& workspace,
+                              Array<IndexType>& permutation,
+                              Array<IndexType>& inv_permutation, IndexType root,
+                              Array<IndexType>& parents) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL);
+
+
+}  // namespace mc64
+}  // namespace cuda
+}  // namespace kernels
+}  // namespace gko
diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt
index 7499bca97a5..b53505d8ca8 100644
--- a/dpcpp/CMakeLists.txt
+++ b/dpcpp/CMakeLists.txt
@@ -53,6 +53,7 @@ target_sources(ginkgo_dpcpp
     preconditioner/jacobi_generate_kernel.dp.cpp
     preconditioner/jacobi_kernels.dp.cpp
     preconditioner/jacobi_simple_apply_kernel.dp.cpp
+    reorder/mc64_kernels.dp.cpp
     reorder/rcm_kernels.dp.cpp
     solver/batch_bicgstab_kernels.dp.cpp
     solver/cb_gmres_kernels.dp.cpp
diff --git a/dpcpp/reorder/mc64_kernels.dp.cpp b/dpcpp/reorder/mc64_kernels.dp.cpp
new file mode 100644
index 00000000000..cb541363ce5
--- /dev/null
+++ b/dpcpp/reorder/mc64_kernels.dp.cpp
@@ -0,0 +1,91 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/reorder/mc64_kernels.hpp"
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/matrix/csr.hpp>
+
+
+namespace gko {
+namespace kernels {
+namespace dpcpp {
+/**
+ * @brief The reordering namespace.
+ *
+ * @ingroup reorder
+ */
+namespace mc64 {
+
+
+template <typename ValueType, typename IndexType>
+void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
+                        const matrix::Csr<ValueType, IndexType>* mtx,
+                        Array<remove_complex<ValueType>>& workspace)
+    GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_INITIALIZE_WEIGHTS_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
+                      size_type num_rows, const IndexType* row_ptrs,
+                      const IndexType* col_idxs,
+                      const Array<ValueType>& workspace,
+                      Array<IndexType>& permutation,
+                      Array<IndexType>& inv_permutation,
+                      std::list<IndexType>& unmatched_rows) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void shortest_augmenting_path(std::shared_ptr<const DefaultExecutor> exec,
+                              size_type num_rows, const IndexType* row_ptrs,
+                              const IndexType* col_idxs,
+                              Array<ValueType>& workspace,
+                              Array<IndexType>& permutation,
+                              Array<IndexType>& inv_permutation, IndexType root,
+                              Array<IndexType>& parents) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL);
+
+
+}  // namespace mc64
+}  // namespace dpcpp
+}  // namespace kernels
+}  // namespace gko
diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt
index cb193920edc..5dbd6030a23 100644
--- a/hip/CMakeLists.txt
+++ b/hip/CMakeLists.txt
@@ -51,6 +51,7 @@ set(GINKGO_HIP_SOURCES
     preconditioner/jacobi_generate_kernel.hip.cpp
     preconditioner/jacobi_kernels.hip.cpp
     preconditioner/jacobi_simple_apply_kernel.hip.cpp
+    reorder/mc64_kernels.hip.cpp
     reorder/rcm_kernels.hip.cpp
     solver/batch_bicgstab_kernels.hip.cpp
     solver/cb_gmres_kernels.hip.cpp
diff --git a/hip/reorder/mc64_kernels.hip.cpp b/hip/reorder/mc64_kernels.hip.cpp
new file mode 100644
index 00000000000..6f11c8d6d72
--- /dev/null
+++ b/hip/reorder/mc64_kernels.hip.cpp
@@ -0,0 +1,91 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/reorder/mc64_kernels.hpp"
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/matrix/csr.hpp>
+
+
+namespace gko {
+namespace kernels {
+namespace hip {
+/**
+ * @brief The reordering namespace.
+ *
+ * @ingroup reorder
+ */
+namespace mc64 {
+
+
+template <typename ValueType, typename IndexType>
+void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
+                        const matrix::Csr<ValueType, IndexType>* mtx,
+                        Array<remove_complex<ValueType>>& workspace)
+    GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_INITIALIZE_WEIGHTS_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
+                      size_type num_rows, const IndexType* row_ptrs,
+                      const IndexType* col_idxs,
+                      const Array<ValueType>& workspace,
+                      Array<IndexType>& permutation,
+                      Array<IndexType>& inv_permutation,
+                      std::list<IndexType>& unmatched_rows) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void shortest_augmenting_path(std::shared_ptr<const DefaultExecutor> exec,
+                              size_type num_rows, const IndexType* row_ptrs,
+                              const IndexType* col_idxs,
+                              Array<ValueType>& workspace,
+                              Array<IndexType>& permutation,
+                              Array<IndexType>& inv_permutation, IndexType root,
+                              Array<IndexType>& parents) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL);
+
+
+}  // namespace mc64
+}  // namespace hip
+}  // namespace kernels
+}  // namespace gko
diff --git a/include/ginkgo/core/reorder/mc64.hpp b/include/ginkgo/core/reorder/mc64.hpp
new file mode 100644
index 00000000000..12e20f0e857
--- /dev/null
+++ b/include/ginkgo/core/reorder/mc64.hpp
@@ -0,0 +1,164 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_PUBLIC_CORE_REORDER_MC64_HPP_
+#define GKO_PUBLIC_CORE_REORDER_MC64_HPP_
+
+
+#include <memory>
+
+
+#include <ginkgo/core/base/abstract_factory.hpp>
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/dim.hpp>
+#include <ginkgo/core/base/lin_op.hpp>
+#include <ginkgo/core/base/polymorphic_object.hpp>
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/base/utils.hpp>
+#include <ginkgo/core/matrix/csr.hpp>
+#include <ginkgo/core/matrix/identity.hpp>
+#include <ginkgo/core/matrix/permutation.hpp>
+#include <ginkgo/core/matrix/sparsity_csr.hpp>
+#include <ginkgo/core/reorder/reordering_base.hpp>
+
+
+namespace gko {
+/**
+ * @brief The Reorder namespace.
+ *
+ * @ingroup reorder
+ */
+namespace reorder {
+
+
+template <typename ValueType = default_precision, typename IndexType = int32>
+class Mc64 : public EnablePolymorphicObject<Mc64<ValueType, IndexType>,
+                                            ReorderingBase>,
+             public EnablePolymorphicAssignment<Mc64<ValueType, IndexType>> {
+    friend class EnablePolymorphicObject<Mc64, ReorderingBase>;
+
+public:
+    using matrix_type = matrix::Csr<ValueType, IndexType>;
+    using PermutationMatrix = matrix::Permutation<IndexType>;
+    using value_type = ValueType;
+    using index_type = IndexType;
+
+    enum class reordering_strategy { max_diagonal_product, max_diagonal_sum };
+
+    /**
+     * Gets the permutation (permutation matrix, output of the algorithm) of the
+     * linear operator.
+     *
+     * @return the permutation (permutation matrix)
+     */
+    std::shared_ptr<const PermutationMatrix> get_permutation() const
+    {
+        return permutation_;
+    }
+
+    /**
+     * Gets the inverse permutation (permutation matrix, output of the
+     * algorithm) of the linear operator.
+     *
+     * @return the inverse permutation (permutation matrix)
+     */
+    std::shared_ptr<const PermutationMatrix> get_inverse_permutation() const
+    {
+        return inv_permutation_;
+    }
+
+    GKO_CREATE_FACTORY_PARAMETERS(parameters, Factory)
+    {
+        /**
+         * This parameter controls the goal of the permutation.
+         */
+        reordering_strategy GKO_FACTORY_PARAMETER_SCALAR(
+            strategy, reordering_strategy::max_diagonal_product);
+    };
+    GKO_ENABLE_REORDERING_BASE_FACTORY(Mc64, parameters, Factory);
+    GKO_ENABLE_BUILD_METHOD(Factory);
+
+protected:
+    /**
+     * Generates the permutation matrix and the inverse permutation
+     * matrix.
+     */
+    void generate(std::shared_ptr<const Executor>& exec,
+                  std::shared_ptr<LinOp> system_matrix) const;
+
+    explicit Mc64(std::shared_ptr<const Executor> exec)
+        : EnablePolymorphicObject<Mc64, ReorderingBase>(std::move(exec))
+    {}
+
+    explicit Mc64(const Factory* factory, const ReorderingBaseArgs& args)
+        : EnablePolymorphicObject<Mc64, ReorderingBase>(
+              factory->get_executor()),
+          parameters_{factory->get_parameters()}
+    {
+        // Always execute the reordering on the cpu.
+        const auto is_gpu_executor =
+            this->get_executor() != this->get_executor()->get_master();
+        auto cpu_exec = is_gpu_executor ? this->get_executor()->get_master()
+                                        : this->get_executor();
+
+        // The system matrix has to be square.
+        GKO_ASSERT_IS_SQUARE_MATRIX(args.system_matrix);
+
+        auto const dim = args.system_matrix->get_size();
+        permutation_ = PermutationMatrix::create(cpu_exec, dim);
+        inv_permutation_ = PermutationMatrix::create(cpu_exec, dim);
+
+        this->generate(cpu_exec, args.system_matrix);
+
+        // Copy back results to gpu if necessary.
+        if (is_gpu_executor) {
+            const auto gpu_exec = this->get_executor();
+            auto gpu_perm = PermutationMatrix::create(gpu_exec, dim);
+            gpu_perm->copy_from(permutation_.get());
+            permutation_ = gko::share(gpu_perm);
+            auto gpu_inv_perm = PermutationMatrix::create(gpu_exec, dim);
+            gpu_inv_perm->copy_from(inv_permutation_.get());
+            inv_permutation_ = gko::share(gpu_inv_perm);
+        }
+    }
+
+private:
+    std::shared_ptr<PermutationMatrix> permutation_;
+    std::shared_ptr<PermutationMatrix> inv_permutation_;
+};
+
+
+}  // namespace reorder
+}  // namespace gko
+
+
+#endif  // GKO_PUBLIC_CORE_REORDER_MC64_HPP_
diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt
index 557061c5fed..071e64b17de 100644
--- a/omp/CMakeLists.txt
+++ b/omp/CMakeLists.txt
@@ -37,6 +37,7 @@ target_sources(ginkgo_omp
     multigrid/pgm_kernels.cpp
     preconditioner/isai_kernels.cpp
     preconditioner/jacobi_kernels.cpp
+    reorder/mc64_kernels.cpp
     reorder/rcm_kernels.cpp
     solver/batch_bicgstab_kernels.cpp
     solver/cb_gmres_kernels.cpp
diff --git a/omp/reorder/mc64_kernels.cpp b/omp/reorder/mc64_kernels.cpp
new file mode 100644
index 00000000000..d8b90aa1875
--- /dev/null
+++ b/omp/reorder/mc64_kernels.cpp
@@ -0,0 +1,91 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/reorder/mc64_kernels.hpp"
+
+
+#include <ginkgo/core/base/array.hpp>
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/matrix/csr.hpp>
+
+
+namespace gko {
+namespace kernels {
+namespace omp {
+/**
+ * @brief The reordering namespace.
+ *
+ * @ingroup reorder
+ */
+namespace mc64 {
+
+
+template <typename ValueType, typename IndexType>
+void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
+                        const matrix::Csr<ValueType, IndexType>* mtx,
+                        Array<remove_complex<ValueType>>& workspace)
+    GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_INITIALIZE_WEIGHTS_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
+                      size_type num_rows, const IndexType* row_ptrs,
+                      const IndexType* col_idxs,
+                      const Array<ValueType>& workspace,
+                      Array<IndexType>& permutation,
+                      Array<IndexType>& inv_permutation,
+                      std::list<IndexType>& unmatched_rows) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void shortest_augmenting_path(std::shared_ptr<const DefaultExecutor> exec,
+                              size_type num_rows, const IndexType* row_ptrs,
+                              const IndexType* col_idxs,
+                              Array<ValueType>& workspace,
+                              Array<IndexType>& permutation,
+                              Array<IndexType>& inv_permutation, IndexType root,
+                              Array<IndexType>& parents) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL);
+
+
+}  // namespace mc64
+}  // namespace omp
+}  // namespace kernels
+}  // namespace gko
diff --git a/reference/reorder/mc64_kernels.cpp b/reference/reorder/mc64_kernels.cpp
index 2e631ac7351..9136485d8f2 100644
--- a/reference/reorder/mc64_kernels.cpp
+++ b/reference/reorder/mc64_kernels.cpp
@@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <iterator>
 #include <memory>
 #include <queue>
+#include <set>
 #include <utility>
 #include <vector>
 
@@ -128,7 +129,8 @@ void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
                       const IndexType* col_idxs,
                       const Array<ValueType>& workspace,
                       Array<IndexType>& permutation,
-                      Array<IndexType>& inv_permutation)
+                      Array<IndexType>& inv_permutation,
+                      std::list<IndexType>& unmatched_rows)
 {
     const auto nnz = row_ptrs[num_rows];
     const auto c = workspace.get_const_data();
@@ -154,6 +156,7 @@ void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
                 break;
             }
         }
+        if (p[row] == -1) unmatched_rows.push_back(row);
     }
 
     // For remaining unmatched rows, look for a matched column with weight(row,
@@ -161,40 +164,142 @@ void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
     // column col_1 with weight(row_1, col_1) = 0 that is not yet matched,
     // replace the matched edge (row_1, col) with the two new matched edges
     // (row, col) and (row_1, col_1).
-    for (IndexType row = 0; row < num_rows; row++) {
-        if (p[row] == -1) {
-            const auto row_begin = row_ptrs[row];
-            const auto row_end = row_ptrs[row + 1];
-            for (IndexType idx = row_begin; idx < row_end; idx++) {
-                const auto col = col_idxs[idx];
-                if (weight(row, col, idx) == zero<ValueType>()) {
-                    const auto row_1 = ip[col];
-                    const auto row_1_begin = row_ptrs[row_1];
-                    const auto row_1_end = row_ptrs[row_1 + 1];
-                    bool found = false;
-                    for (IndexType idx_1 = row_1_begin; idx_1 < row_1_end;
-                         idx_1++) {
-                        const auto col_1 = col_idxs[idx_1];
-                        if (weight(row_1, col_1, idx_1) == zero<ValueType>() &&
-                            ip[col_1] == -1) {
-                            p[row] = col;
-                            ip[col] = row;
-                            p[row_1] = col_1;
-                            ip[col_1] = row_1;
-                            found = true;
-                            break;
-                        }
+    auto it = unmatched_rows.begin();
+    while (it != unmatched_rows.end()) {
+        const auto row = *it;
+        const auto row_begin = row_ptrs[row];
+        const auto row_end = row_ptrs[row + 1];
+        bool found = false;
+        for (IndexType idx = row_begin; idx < row_end; idx++) {
+            const auto col = col_idxs[idx];
+            if (weight(row, col, idx) == zero<ValueType>()) {
+                const auto row_1 = ip[col];
+                const auto row_1_begin = row_ptrs[row_1];
+                const auto row_1_end = row_ptrs[row_1 + 1];
+                for (IndexType idx_1 = row_1_begin; idx_1 < row_1_end;
+                     idx_1++) {
+                    const auto col_1 = col_idxs[idx_1];
+                    if (weight(row_1, col_1, idx_1) == zero<ValueType>() &&
+                        ip[col_1] == -1) {
+                        p[row] = col;
+                        ip[col] = row;
+                        p[row_1] = col_1;
+                        ip[col_1] = row_1;
+                        found = true;
+                        break;
                     }
-                    if (found) break;
                 }
+                if (found) break;
             }
         }
+        if (found)
+            it = unmatched_rows.erase(it);
+        else
+            it++;
     }
 }
 
 GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL);
 
+
+template <typename ValueType, typename IndexType>
+void shortest_augmenting_path(std::shared_ptr<const DefaultExecutor> exec,
+                              size_type num_rows, const IndexType* row_ptrs,
+                              const IndexType* col_idxs,
+                              Array<ValueType>& workspace,
+                              Array<IndexType>& permutation,
+                              Array<IndexType>& inv_permutation, IndexType root,
+                              Array<IndexType>& parents)
+{
+    const auto nnz = row_ptrs[num_rows];
+    auto c = workspace.get_data();
+    auto u = c + nnz;
+    auto v = u + num_rows;
+    auto weight = [c, u, v](IndexType row, IndexType col, IndexType idx) {
+        return c[idx] - u[col] - v[row];
+    };
+    auto p = permutation.get_data();
+    auto ip = inv_permutation.get_data();
+    parents.fill(-one<IndexType>());
+    auto parents_ = parents.get_data();
+
+    std::vector<ValueType> distance(num_rows, -one<ValueType>());
+    auto cmp = [distance](IndexType a, IndexType b) {
+        return (distance[b] == -one<IndexType>()) ||
+               (distance[a] <= distance[b]);
+    };
+    std::set<IndexType> marked_cols;
+    std::set<IndexType, decltype(cmp)> Q(cmp);
+    ValueType lsp = 0;
+    ValueType lsap = -1;
+    IndexType jsap;
+
+    auto row = root;
+
+    while (true) {
+        const auto row_begin = row_ptrs[row];
+        const auto row_end = row_ptrs[row + 1];
+        for (IndexType idx = row_begin; idx < row_end; idx++) {
+            const auto col = col_idxs[idx];
+            if (marked_cols.find(col) != marked_cols.end()) continue;
+            ValueType dnew = lsp + weight(row, col, idx);
+            if (lsap < 0 || dnew < lsap) {
+                if (ip[col] == -1) {
+                    lsap = dnew;
+                    jsap = col;
+                    parents_[col] = row;
+                } else {
+                    if (distance[col] == -1 || dnew < distance[col]) {
+                        bool new_col = false;
+                        if (distance[col] == -1) new_col = true;
+                        distance[col] = dnew;
+                        parents_[col] = row;
+                        if (!new_col) Q.erase(col);
+                        Q.insert(col);
+                    }
+                }
+            }
+        }
+
+        if (Q.empty()) break;
+        auto col_pos = Q.begin();
+        auto col = *col_pos;
+        lsp = distance[col];
+        if (lsap >= 0 && lsap <= lsp) break;
+        Q.erase(col_pos);
+        marked_cols.insert(col);
+        row = ip[col];
+    }
+    if (lsap != -1) {
+        auto row = -1;
+        auto col = -1;
+        auto next_col = jsap;
+        while (row != root) {
+            col = next_col;
+            row = parents_[col];
+            next_col = p[row];
+            p[row] = col;
+            ip[col] = row;
+        }
+
+        for (auto marked_col : marked_cols) {
+            u[marked_col] += distance[marked_col] - lsap;
+        }
+
+        for (size_type row = 0; row < num_rows; row++) {
+            if (p[row] != -1) {
+                auto col = p[row];
+                auto idx = row_ptrs[row];
+                while (col_idxs[idx] != col) idx++;
+                v[row] = c[idx] - u[col];
+            }
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL);
 }  // namespace mc64
 }  // namespace reference
 }  // namespace kernels
diff --git a/reference/test/reorder/mc64_kernels.cpp b/reference/test/reorder/mc64_kernels.cpp
index dc79cf67d51..eb67a0ca75f 100644
--- a/reference/test/reorder/mc64_kernels.cpp
+++ b/reference/test/reorder/mc64_kernels.cpp
@@ -39,6 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/matrix/csr.hpp>
+#include <ginkgo/core/reorder/mc64.hpp>
 
 
 #include "core/reorder/mc64_kernels.hpp"
@@ -107,13 +108,112 @@ TYPED_TEST(Mc64, InitialMatchingExample)
                              I<index_type>({-1, -1, -1, -1, -1, -1})};
     gko::Array<index_type> ip{this->ref,
                               I<index_type>({-1, -1, -1, -1, -1, -1})};
+    std::list<index_type> unmatched_rows{};
 
     gko::kernels::reference::mc64::initial_matching(
         this->ref, this->mtx->get_size()[0], this->mtx->get_const_row_ptrs(),
-        this->mtx->get_const_col_idxs(), this->expected_workspace, p, ip);
+        this->mtx->get_const_col_idxs(), this->expected_workspace, p, ip,
+        unmatched_rows);
 
     GKO_ASSERT_ARRAY_EQ(p, this->expected_perm);
     GKO_ASSERT_ARRAY_EQ(ip, this->expected_inv_perm);
+    GKO_ASSERT_EQ(unmatched_rows.size(), 1u);
+    GKO_ASSERT_EQ(unmatched_rows.front(), 4 * gko::one<index_type>());
+}
+
+
+TYPED_TEST(Mc64, ShortestAugmentingPathExample)
+{
+    using index_type = typename TestFixture::index_type;
+    using real_type = typename TestFixture::real_type;
+    gko::Array<index_type> expected_perm{this->ref,
+                                         I<index_type>{1, 0, 3, 5, 4, 2}};
+    gko::Array<index_type> expected_inv_perm{this->ref,
+                                             I<index_type>{1, 0, 5, 2, 4, 3}};
+    gko::Array<index_type> parents{this->ref,
+                                   I<index_type>{-1, -1, -1, -1, -1, -1}};
+    gko::Array<index_type> expected_parents{this->ref,
+                                            I<index_type>{-1, -1, 3, 4, 4, 2}};
+
+    gko::kernels::reference::mc64::shortest_augmenting_path(
+        this->ref, this->mtx->get_size()[0], this->mtx->get_const_row_ptrs(),
+        this->mtx->get_const_col_idxs(), this->expected_workspace,
+        this->expected_perm, this->expected_inv_perm,
+        4 * gko::one<index_type>(), parents);
+
+    GKO_ASSERT_ARRAY_EQ(expected_perm, this->expected_perm);
+    GKO_ASSERT_ARRAY_EQ(expected_inv_perm, this->expected_inv_perm);
+    GKO_ASSERT_ARRAY_EQ(parents, expected_parents);
+}
+
+
+TYPED_TEST(Mc64, ShortestAugmentingPathExample2)
+{
+    using index_type = typename TestFixture::index_type;
+    using real_type = typename TestFixture::real_type;
+    gko::Array<index_type> row_ptrs{
+        this->ref, I<index_type>{0, 2, 6, 7, 10, 12, 15, 19, 21}};
+    gko::Array<index_type> col_idxs{
+        this->ref, I<index_type>{0, 1, 0, 1, 4, 6, 2, 3, 4, 5, 4,
+                                 7, 4, 5, 6, 1, 3, 5, 7, 0, 2}};
+    gko::Array<real_type> workspace{
+        this->ref,
+        I<real_type>{1., 0., 0., 0., 2., 4., 0., 0., 4., 2., 0., 1., 8.,
+                     0., 6., 2., 4., 1., 8., 6., 4., 0., 0., 0., 0., 0.,
+                     0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.}};
+    gko::Array<real_type> expected_workspace{
+        this->ref,
+        I<real_type>{1.,  0., 0., 0., 2., 4., 0., 0., 4.,  2.,  0., 1.,  8.,
+                     0.,  6., 2., 4., 1., 8., 6., 4., -3., -4., 0., -2., -1.,
+                     -5., 0., 0., 4., 3., 0., 2., 1., 5.,  6.,  0.}};
+    gko::Array<index_type> perm{this->ref,
+                                I<index_type>{1, 0, 2, 3, 4, 5, -1, -1}};
+    gko::Array<index_type> inv_perm{this->ref,
+                                    I<index_type>{1, 0, 2, 3, 4, 5, -1, -1}};
+    gko::Array<index_type> expected_perm{
+        this->ref, I<index_type>{0, 4, 2, 3, 7, 5, 1, -1}};
+    gko::Array<index_type> expected_inv_perm{
+        this->ref, I<index_type>{0, 6, 2, 3, 1, 5, -1, 4}};
+    gko::Array<index_type> parents{
+        this->ref, I<index_type>{-1, -1, -1, -1, -1, -1, -1, -1}};
+    gko::Array<index_type> expected_parents{
+        this->ref, I<index_type>{0, 6, -1, 6, 1, 6, 5, 4}};
+
+    gko::kernels::reference::mc64::shortest_augmenting_path(
+        this->ref, 8u, row_ptrs.get_data(), col_idxs.get_data(), workspace,
+        perm, inv_perm, 6 * gko::one<index_type>(), parents);
+
+    GKO_ASSERT_ARRAY_EQ(perm, expected_perm);
+    GKO_ASSERT_ARRAY_EQ(inv_perm, expected_inv_perm);
+    GKO_ASSERT_ARRAY_EQ(parents, expected_parents);
+    GKO_ASSERT_ARRAY_EQ(workspace, expected_workspace);
+}
+
+
+TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingExample)
+{
+    using index_type = typename TestFixture::index_type;
+    using real_type = typename TestFixture::real_type;
+    using value_type = typename TestFixture::value_type;
+
+    auto mc64_factory =
+        gko::reorder::Mc64<value_type, index_type>::build().on(this->ref);
+    auto mc64 = mc64_factory->generate(this->mtx);
+
+    auto perm = mc64->get_permutation()->get_const_permutation();
+    auto inv_perm = mc64->get_inverse_permutation()->get_const_permutation();
+    GKO_ASSERT_EQ(perm[0], 1);
+    GKO_ASSERT_EQ(perm[1], 0);
+    GKO_ASSERT_EQ(perm[2], 3);
+    GKO_ASSERT_EQ(perm[3], 5);
+    GKO_ASSERT_EQ(perm[4], 4);
+    GKO_ASSERT_EQ(perm[5], 2);
+    GKO_ASSERT_EQ(inv_perm[0], 1);
+    GKO_ASSERT_EQ(inv_perm[1], 0);
+    GKO_ASSERT_EQ(inv_perm[2], 5);
+    GKO_ASSERT_EQ(inv_perm[3], 2);
+    GKO_ASSERT_EQ(inv_perm[4], 4);
+    GKO_ASSERT_EQ(inv_perm[5], 3);
 }
 
 

From cda6f7c96ceb11fa794512d9d1de6b6a60a9c43b Mon Sep 17 00:00:00 2001
From: Fritz Goebel <fritz.goebel@kit.edu>
Date: Fri, 4 Mar 2022 11:19:24 -0500
Subject: [PATCH 551/583] Add computation of scaling coefficients

---
 core/components/addressable_pq.hpp        |   193 +
 core/device_hooks/common_kernels.inc.cpp  |     3 +
 core/reorder/mc64.cpp                     |    26 +-
 core/reorder/mc64_kernels.hpp             |    45 +-
 cuda/reorder/mc64_kernels.cu              |    41 +-
 dpcpp/reorder/mc64_kernels.dp.cpp         |    41 +-
 hip/reorder/mc64_kernels.hip.cpp          |    41 +-
 include/ginkgo/core/reorder/mc64.hpp      |    26 +-
 include/ginkgo/ginkgo.hpp                 |     1 +
 matrices/CMakeLists.txt                   |     3 +
 matrices/config.hpp.in                    |     6 +
 matrices/test/mc64_result.mtx             |  4057 ++++
 matrices/test/nontrivial_mc64_example.mtx | 21633 ++++++++++++++++++++
 matrices/test/nontrivial_mc64_result.mtx  | 21633 ++++++++++++++++++++
 omp/reorder/mc64_kernels.cpp              |    41 +-
 reference/reorder/mc64_kernels.cpp        |   334 +-
 reference/test/reorder/mc64_kernels.cpp   |   256 +-
 17 files changed, 48258 insertions(+), 122 deletions(-)
 create mode 100644 core/components/addressable_pq.hpp
 create mode 100644 matrices/test/mc64_result.mtx
 create mode 100644 matrices/test/nontrivial_mc64_example.mtx
 create mode 100644 matrices/test/nontrivial_mc64_result.mtx

diff --git a/core/components/addressable_pq.hpp b/core/components/addressable_pq.hpp
new file mode 100644
index 00000000000..9f28060b6fa
--- /dev/null
+++ b/core/components/addressable_pq.hpp
@@ -0,0 +1,193 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CORE_COMPONENTS_ADRESSABLE_PQ_HPP_
+#define GKO_CORE_COMPONENTS_ADRESSABLE_PQ_HPP_
+
+
+#include <deque>
+#include <vector>
+
+
+namespace gko {
+
+
+/**
+ * An addressable priority queue based on a k-ary heap.
+ *
+ * It allows inserting key-value pairs, modifying their key as well as accessing
+ * and removing the key-value pair with the minimum key.
+ *
+ * @tparam Degree_Log2 the binary logarithm of the heap arity, i.e.,
+ *         `k = 1 << Degree_Log2`
+ */
+template <typename KeyType, typename ValueType, int Degree_Log2>
+struct addressable_priority_queue {
+    /**
+     * Inserts the given key-value pair into the PQ.
+     * Duplicate keys are allowed, they may be returned in an arbitrary order.
+     * @returns a handle for the pair to be used when modifying the key.
+     */
+    std::size_t insert(KeyType key, ValueType value)
+    {
+        m_keys.push_back(key);
+        m_values.push_back(value);
+        auto handle = next_handle();
+        m_handles.push_back(handle);
+        if (handle == m_handle_pos.size())
+            m_handle_pos.push_back(size() - 1);
+        else
+            m_handle_pos[handle] = size() - 1;
+        sift_up(size() - 1);
+        return handle;
+    }
+
+    /** Updates the key of the pair with the given handle. */
+    void update_key(std::size_t handle, KeyType new_key)
+    {
+        auto pos = m_handle_pos[handle];
+        assert(pos < size());
+        assert(m_handles[pos] == handle);
+        auto old_key = m_keys[pos];
+        m_keys[pos] = new_key;
+        if (old_key < new_key) {
+            sift_down(pos);
+        } else {
+            sift_up(pos);
+        }
+    }
+
+    /** Returns the minimum key from the queue. */
+    KeyType min_key() const { return m_keys[0]; }
+
+    /** Returns the value belonging to the minimum key from the queue. */
+    ValueType min_val() const { return m_values[0]; }
+
+    /** Returns the key-value pair with the minimum key from the queue. */
+    std::pair<KeyType, ValueType> min() const { return {min_key(), min_val()}; }
+
+    /** Removes the key-value pair with the minimum key from the queue. */
+    void pop_min()
+    {
+        swap(0, size() - 1);
+        m_keys.pop_back();
+        auto val = m_values.back();
+        m_values.pop_back();
+        auto old_handle = m_handles.back();
+        m_handles.pop_back();
+        m_free_handles.push_front(old_handle);
+        m_handle_pos[old_handle] = invalid_handle;
+        sift_down(0);
+    }
+
+    /** Returns the number of key-value pairs in the queue. */
+    std::size_t size() const { return m_keys.size(); }
+
+    /** Returns true if and only if the queue has size 0. */
+    bool empty() const { return size() == 0; }
+
+    void reset()
+    {
+        m_keys.clear();
+        m_values.clear();
+        m_handles.clear();
+        m_handle_pos.clear();
+        m_free_handles.clear();
+    }
+
+private:
+    constexpr static int degree = 1 << Degree_Log2;
+    constexpr static auto invalid_handle = -1;  //((std::size_t)-1);
+
+    std::size_t parent(std::size_t i) const { return (i - 1) / degree; }
+
+    std::size_t first_child(std::size_t i) const { return degree * i + 1; }
+
+    void swap(std::size_t i, std::size_t j)
+    {
+        std::swap(m_keys[i], m_keys[j]);
+        std::swap(m_values[i], m_values[j]);
+        std::swap(m_handles[i], m_handles[j]);
+        std::swap(m_handle_pos[m_handles[i]], m_handle_pos[m_handles[j]]);
+    }
+
+    void sift_down(std::size_t i)
+    {
+        auto cur = i;
+        while (first_child(cur) < size()) {
+            auto begin = m_keys.begin() + first_child(cur);
+            auto end = m_keys.begin() + std::min(first_child(cur + 1), size());
+            auto it = std::min_element(begin, end);
+            if (m_keys[cur] <= *it) {
+                break;
+            }
+            auto min_child = std::distance(m_keys.begin(), it);
+            swap(cur, min_child);
+            cur = min_child;
+        }
+    }
+
+    void sift_up(std::size_t i)
+    {
+        auto cur = i;
+        while (cur > 0) {
+            if (m_keys[cur] >= m_keys[parent(cur)]) {
+                break;
+            }
+            swap(cur, parent(cur));
+            cur = parent(cur);
+        }
+    }
+
+    std::size_t next_handle()
+    {
+        if (m_free_handles.empty()) {
+            return m_handle_pos.size();
+        } else {
+            auto next = m_free_handles.back();
+            m_free_handles.pop_back();
+            return next;
+        }
+    }
+
+    std::vector<KeyType> m_keys;
+    std::vector<ValueType> m_values;
+    std::vector<std::size_t> m_handles;
+    std::vector<std::size_t> m_handle_pos;
+    std::deque<std::size_t> m_free_handles;
+};
+
+
+}  // namespace gko
+
+
+#endif  // GKO_CORE_COMPONENTS_ADDRESSABLE_PQ_HPP_
diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp
index 80013c89a06..63846659eed 100644
--- a/core/device_hooks/common_kernels.inc.cpp
+++ b/core/device_hooks/common_kernels.inc.cpp
@@ -917,6 +917,9 @@ GKO_STUB_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL);
 GKO_STUB_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL);
+GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL);
+GKO_STUB_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_UPDATE_DUAL_VECTORS_KERNEL);
 
 
 }  // namespace mc64
diff --git a/core/reorder/mc64.cpp b/core/reorder/mc64.cpp
index 519686af253..7e0361ae417 100644
--- a/core/reorder/mc64.cpp
+++ b/core/reorder/mc64.cpp
@@ -61,6 +61,8 @@ GKO_REGISTER_OPERATION(initialize_weights, mc64::initialize_weights);
 GKO_REGISTER_OPERATION(initial_matching, mc64::initial_matching);
 GKO_REGISTER_OPERATION(shortest_augmenting_path,
                        mc64::shortest_augmenting_path);
+GKO_REGISTER_OPERATION(compute_scaling, mc64::compute_scaling);
+GKO_REGISTER_OPERATION(update_dual_vectors, mc64::update_dual_vectors);
 
 
 }  // anonymous namespace
@@ -74,6 +76,7 @@ void Mc64<ValueType, IndexType>::generate(
 {
     auto mtx = as<matrix_type>(system_matrix);
     size_type num_rows = mtx->get_size()[0];
+    size_type nnz = mtx->get_num_stored_elements();
 
     Array<remove_complex<ValueType>> workspace{exec};
     Array<IndexType> permutation{exec, num_rows};
@@ -83,28 +86,41 @@ void Mc64<ValueType, IndexType>::generate(
     const auto row_ptrs = mtx->get_const_row_ptrs();
     const auto col_idxs = mtx->get_const_col_idxs();
 
-    exec->run(mc64::make_initialize_weights(mtx.get(), workspace));
+    exec->run(mc64::make_initialize_weights(mtx.get(), workspace,
+                                            parameters_.strategy));
 
     std::list<IndexType> unmatched_rows{};
     exec->run(mc64::make_initial_matching(num_rows, row_ptrs, col_idxs,
                                           workspace, permutation,
                                           inv_permutation, unmatched_rows));
 
-    Array<IndexType> parents{exec, num_rows};
+    // exec->run(mc64::make_update_dual_vectors(num_rows, row_ptrs, col_idxs,
+    // permutation, workspace));
+
+    Array<IndexType> parents{exec, 4 * num_rows};
+    addressable_priority_queue<remove_complex<ValueType>, IndexType, 2> Q{};
+    parents.fill(-2);
     for (auto root : unmatched_rows) {
         exec->run(mc64::make_shortest_augmenting_path(
             num_rows, row_ptrs, col_idxs, workspace, permutation,
-            inv_permutation, root, parents));
+            inv_permutation, root, parents, Q));
     }
-
+    // std::cout << "\n";
     permutation_->copy_from(
-        PermutationMatrix::create(exec, system_matrix->get_size(), permutation)
+        PermutationMatrix::create(
+            exec, system_matrix->get_size(), permutation,
+            gko::matrix::row_permute | matrix::inverse_permute)
             .get());
     inv_permutation_->copy_from(
         share(PermutationMatrix::create(exec, system_matrix->get_size(),
                                         inv_permutation,
                                         matrix::column_permute))
             .get());
+    row_scaling_->copy_from(DiagonalMatrix::create(exec, num_rows));
+    col_scaling_->copy_from(DiagonalMatrix::create(exec, num_rows));
+    exec->run(
+        mc64::make_compute_scaling(mtx.get(), workspace, parameters_.strategy,
+                                   row_scaling_.get(), col_scaling_.get()));
 }
 
 
diff --git a/core/reorder/mc64_kernels.hpp b/core/reorder/mc64_kernels.hpp
index 8ec020046eb..9acd0d613c0 100644
--- a/core/reorder/mc64_kernels.hpp
+++ b/core/reorder/mc64_kernels.hpp
@@ -41,9 +41,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/executor.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/matrix/csr.hpp>
+#include <ginkgo/core/reorder/mc64.hpp>
 
 
 #include "core/base/kernel_declaration.hpp"
+#include "core/components/addressable_pq.hpp"
 
 
 namespace gko {
@@ -53,7 +55,8 @@ namespace kernels {
 #define GKO_DECLARE_MC64_INITIALIZE_WEIGHTS_KERNEL(ValueType, IndexType)  \
     void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,  \
                             const matrix::Csr<ValueType, IndexType>* mtx, \
-                            Array<remove_complex<ValueType>>& workspace)
+                            Array<remove_complex<ValueType>>& workspace,  \
+                            gko::reorder::reordering_strategy strategy)
 
 
 #define GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL(ValueType, IndexType)    \
@@ -71,15 +74,37 @@ namespace kernels {
         const IndexType* row_ptrs, const IndexType* col_idxs,                  \
         Array<ValueType>& workspace, Array<IndexType>& permutation,            \
         Array<IndexType>& inv_permutation, IndexType root,                     \
-        Array<IndexType>& parents)
-
-#define GKO_DECLARE_ALL_AS_TEMPLATES                                  \
-    template <typename ValueType, typename IndexType>                 \
-    GKO_DECLARE_MC64_INITIALIZE_WEIGHTS_KERNEL(ValueType, IndexType); \
-    template <typename ValueType, typename IndexType>                 \
-    GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL(ValueType, IndexType);   \
-    template <typename ValueType, typename IndexType>                 \
-    GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL(ValueType, IndexType)
+        Array<IndexType>& parents,                                             \
+        addressable_priority_queue<ValueType, IndexType, 2>& Q)
+
+
+#define GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL(ValueType, IndexType)   \
+    void compute_scaling(std::shared_ptr<const DefaultExecutor> exec,   \
+                         const matrix::Csr<ValueType, IndexType>* mtx,  \
+                         Array<remove_complex<ValueType>>& workspace,   \
+                         gko::reorder::reordering_strategy strategy,    \
+                         gko::matrix::Diagonal<ValueType>* row_scaling, \
+                         gko::matrix::Diagonal<ValueType>* col_scaling)
+
+
+#define GKO_DECLARE_MC64_UPDATE_DUAL_VECTORS_KERNEL(ValueType, IndexType) \
+    void update_dual_vectors(                                             \
+        std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,  \
+        const IndexType* row_ptrs, const IndexType* col_idxs,             \
+        const Array<IndexType>& permutation, Array<ValueType>& workspace)
+
+
+#define GKO_DECLARE_ALL_AS_TEMPLATES                                        \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_MC64_INITIALIZE_WEIGHTS_KERNEL(ValueType, IndexType);       \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL(ValueType, IndexType);         \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL(ValueType, IndexType); \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL(ValueType, IndexType);          \
+    template <typename ValueType, typename IndexType>                       \
+    GKO_DECLARE_MC64_UPDATE_DUAL_VECTORS_KERNEL(ValueType, IndexType)
 
 
 GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(mc64, GKO_DECLARE_ALL_AS_TEMPLATES);
diff --git a/cuda/reorder/mc64_kernels.cu b/cuda/reorder/mc64_kernels.cu
index 837ad5cb0d7..fb007e55bcd 100644
--- a/cuda/reorder/mc64_kernels.cu
+++ b/cuda/reorder/mc64_kernels.cu
@@ -52,7 +52,8 @@ namespace mc64 {
 template <typename ValueType, typename IndexType>
 void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
                         const matrix::Csr<ValueType, IndexType>* mtx,
-                        Array<remove_complex<ValueType>>& workspace)
+                        Array<remove_complex<ValueType>>& workspace,
+                        gko::reorder::reordering_strategy strategy)
     GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
@@ -73,18 +74,42 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
 
 
 template <typename ValueType, typename IndexType>
-void shortest_augmenting_path(std::shared_ptr<const DefaultExecutor> exec,
-                              size_type num_rows, const IndexType* row_ptrs,
-                              const IndexType* col_idxs,
-                              Array<ValueType>& workspace,
-                              Array<IndexType>& permutation,
-                              Array<IndexType>& inv_permutation, IndexType root,
-                              Array<IndexType>& parents) GKO_NOT_IMPLEMENTED;
+void shortest_augmenting_path(
+    std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,
+    const IndexType* row_ptrs, const IndexType* col_idxs,
+    Array<ValueType>& workspace, Array<IndexType>& permutation,
+    Array<IndexType>& inv_permutation, IndexType root,
+    Array<IndexType>& parents,
+    addressable_priority_queue<ValueType, IndexType, 2>& Q) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL);
 
 
+template <typename ValueType, typename IndexType>
+void update_dual_vectors(std::shared_ptr<const DefaultExecutor> exec,
+                         size_type num_rows, const IndexType* row_ptrs,
+                         const IndexType* col_idxs,
+                         const Array<IndexType>& permutation,
+                         Array<ValueType>& workspace) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_UPDATE_DUAL_VECTORS_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void compute_scaling(std::shared_ptr<const DefaultExecutor> exec,
+                     const matrix::Csr<ValueType, IndexType>* mtx,
+                     Array<remove_complex<ValueType>>& workspace,
+                     gko::reorder::reordering_strategy strategy,
+                     gko::matrix::Diagonal<ValueType>* row_scaling,
+                     gko::matrix::Diagonal<ValueType>* col_scaling)
+    GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL);
+
+
 }  // namespace mc64
 }  // namespace cuda
 }  // namespace kernels
diff --git a/dpcpp/reorder/mc64_kernels.dp.cpp b/dpcpp/reorder/mc64_kernels.dp.cpp
index cb541363ce5..cae6228073b 100644
--- a/dpcpp/reorder/mc64_kernels.dp.cpp
+++ b/dpcpp/reorder/mc64_kernels.dp.cpp
@@ -52,7 +52,8 @@ namespace mc64 {
 template <typename ValueType, typename IndexType>
 void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
                         const matrix::Csr<ValueType, IndexType>* mtx,
-                        Array<remove_complex<ValueType>>& workspace)
+                        Array<remove_complex<ValueType>>& workspace,
+                        gko::reorder::reordering_strategy strategy)
     GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
@@ -73,18 +74,42 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
 
 
 template <typename ValueType, typename IndexType>
-void shortest_augmenting_path(std::shared_ptr<const DefaultExecutor> exec,
-                              size_type num_rows, const IndexType* row_ptrs,
-                              const IndexType* col_idxs,
-                              Array<ValueType>& workspace,
-                              Array<IndexType>& permutation,
-                              Array<IndexType>& inv_permutation, IndexType root,
-                              Array<IndexType>& parents) GKO_NOT_IMPLEMENTED;
+void shortest_augmenting_path(
+    std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,
+    const IndexType* row_ptrs, const IndexType* col_idxs,
+    Array<ValueType>& workspace, Array<IndexType>& permutation,
+    Array<IndexType>& inv_permutation, IndexType root,
+    Array<IndexType>& parents,
+    addressable_priority_queue<ValueType, IndexType, 2>& Q) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL);
 
 
+template <typename ValueType, typename IndexType>
+void update_dual_vectors(std::shared_ptr<const DefaultExecutor> exec,
+                         size_type num_rows, const IndexType* row_ptrs,
+                         const IndexType* col_idxs,
+                         const Array<IndexType>& permutation,
+                         Array<ValueType>& workspace) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_UPDATE_DUAL_VECTORS_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void compute_scaling(std::shared_ptr<const DefaultExecutor> exec,
+                     const matrix::Csr<ValueType, IndexType>* mtx,
+                     Array<remove_complex<ValueType>>& workspace,
+                     gko::reorder::reordering_strategy strategy,
+                     gko::matrix::Diagonal<ValueType>* row_scaling,
+                     gko::matrix::Diagonal<ValueType>* col_scaling)
+    GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL);
+
+
 }  // namespace mc64
 }  // namespace dpcpp
 }  // namespace kernels
diff --git a/hip/reorder/mc64_kernels.hip.cpp b/hip/reorder/mc64_kernels.hip.cpp
index 6f11c8d6d72..1f0e601ce7b 100644
--- a/hip/reorder/mc64_kernels.hip.cpp
+++ b/hip/reorder/mc64_kernels.hip.cpp
@@ -52,7 +52,8 @@ namespace mc64 {
 template <typename ValueType, typename IndexType>
 void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
                         const matrix::Csr<ValueType, IndexType>* mtx,
-                        Array<remove_complex<ValueType>>& workspace)
+                        Array<remove_complex<ValueType>>& workspace,
+                        gko::reorder::reordering_strategy strategy)
     GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
@@ -73,18 +74,42 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
 
 
 template <typename ValueType, typename IndexType>
-void shortest_augmenting_path(std::shared_ptr<const DefaultExecutor> exec,
-                              size_type num_rows, const IndexType* row_ptrs,
-                              const IndexType* col_idxs,
-                              Array<ValueType>& workspace,
-                              Array<IndexType>& permutation,
-                              Array<IndexType>& inv_permutation, IndexType root,
-                              Array<IndexType>& parents) GKO_NOT_IMPLEMENTED;
+void shortest_augmenting_path(
+    std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,
+    const IndexType* row_ptrs, const IndexType* col_idxs,
+    Array<ValueType>& workspace, Array<IndexType>& permutation,
+    Array<IndexType>& inv_permutation, IndexType root,
+    Array<IndexType>& parents,
+    addressable_priority_queue<ValueType, IndexType, 2>& Q) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL);
 
 
+template <typename ValueType, typename IndexType>
+void update_dual_vectors(std::shared_ptr<const DefaultExecutor> exec,
+                         size_type num_rows, const IndexType* row_ptrs,
+                         const IndexType* col_idxs,
+                         const Array<IndexType>& permutation,
+                         Array<ValueType>& workspace) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_UPDATE_DUAL_VECTORS_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void compute_scaling(std::shared_ptr<const DefaultExecutor> exec,
+                     const matrix::Csr<ValueType, IndexType>* mtx,
+                     Array<remove_complex<ValueType>>& workspace,
+                     gko::reorder::reordering_strategy strategy,
+                     gko::matrix::Diagonal<ValueType>* row_scaling,
+                     gko::matrix::Diagonal<ValueType>* col_scaling)
+    GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL);
+
+
 }  // namespace mc64
 }  // namespace hip
 }  // namespace kernels
diff --git a/include/ginkgo/core/reorder/mc64.hpp b/include/ginkgo/core/reorder/mc64.hpp
index 12e20f0e857..4d04c43aae3 100644
--- a/include/ginkgo/core/reorder/mc64.hpp
+++ b/include/ginkgo/core/reorder/mc64.hpp
@@ -45,6 +45,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/base/utils.hpp>
 #include <ginkgo/core/matrix/csr.hpp>
+#include <ginkgo/core/matrix/diagonal.hpp>
 #include <ginkgo/core/matrix/identity.hpp>
 #include <ginkgo/core/matrix/permutation.hpp>
 #include <ginkgo/core/matrix/sparsity_csr.hpp>
@@ -60,6 +61,9 @@ namespace gko {
 namespace reorder {
 
 
+enum class reordering_strategy { max_diagonal_product, max_diagonal_sum };
+
+
 template <typename ValueType = default_precision, typename IndexType = int32>
 class Mc64 : public EnablePolymorphicObject<Mc64<ValueType, IndexType>,
                                             ReorderingBase>,
@@ -69,10 +73,10 @@ class Mc64 : public EnablePolymorphicObject<Mc64<ValueType, IndexType>,
 public:
     using matrix_type = matrix::Csr<ValueType, IndexType>;
     using PermutationMatrix = matrix::Permutation<IndexType>;
+    using DiagonalMatrix = matrix::Diagonal<ValueType>;
     using value_type = ValueType;
     using index_type = IndexType;
 
-    enum class reordering_strategy { max_diagonal_product, max_diagonal_sum };
 
     /**
      * Gets the permutation (permutation matrix, output of the algorithm) of the
@@ -96,6 +100,16 @@ class Mc64 : public EnablePolymorphicObject<Mc64<ValueType, IndexType>,
         return inv_permutation_;
     }
 
+    std::shared_ptr<const DiagonalMatrix> get_row_scaling() const
+    {
+        return row_scaling_;
+    }
+
+    std::shared_ptr<const DiagonalMatrix> get_col_scaling() const
+    {
+        return col_scaling_;
+    }
+
     GKO_CREATE_FACTORY_PARAMETERS(parameters, Factory)
     {
         /**
@@ -136,6 +150,8 @@ class Mc64 : public EnablePolymorphicObject<Mc64<ValueType, IndexType>,
         auto const dim = args.system_matrix->get_size();
         permutation_ = PermutationMatrix::create(cpu_exec, dim);
         inv_permutation_ = PermutationMatrix::create(cpu_exec, dim);
+        row_scaling_ = DiagonalMatrix::create(cpu_exec, dim[0]);
+        col_scaling_ = DiagonalMatrix::create(cpu_exec, dim[0]);
 
         this->generate(cpu_exec, args.system_matrix);
 
@@ -148,12 +164,20 @@ class Mc64 : public EnablePolymorphicObject<Mc64<ValueType, IndexType>,
             auto gpu_inv_perm = PermutationMatrix::create(gpu_exec, dim);
             gpu_inv_perm->copy_from(inv_permutation_.get());
             inv_permutation_ = gko::share(gpu_inv_perm);
+            auto gpu_row_scaling = DiagonalMatrix::create(gpu_exec, dim[0]);
+            gpu_row_scaling->copy_from(row_scaling_.get());
+            row_scaling_ = gko::share(gpu_row_scaling);
+            auto gpu_col_scaling = DiagonalMatrix::create(gpu_exec, dim[0]);
+            gpu_col_scaling->copy_from(col_scaling_.get());
+            col_scaling_ = gko::share(gpu_col_scaling);
         }
     }
 
 private:
     std::shared_ptr<PermutationMatrix> permutation_;
     std::shared_ptr<PermutationMatrix> inv_permutation_;
+    std::shared_ptr<DiagonalMatrix> row_scaling_;
+    std::shared_ptr<DiagonalMatrix> col_scaling_;
 };
 
 
diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp
index f093c056455..b35069c6720 100644
--- a/include/ginkgo/ginkgo.hpp
+++ b/include/ginkgo/ginkgo.hpp
@@ -137,6 +137,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/preconditioner/jacobi.hpp>
 
 #include <ginkgo/core/reorder/amd.hpp>
+#include <ginkgo/core/reorder/mc64.hpp>
 #include <ginkgo/core/reorder/nested_dissection.hpp>
 #include <ginkgo/core/reorder/rcm.hpp>
 #include <ginkgo/core/reorder/reordering_base.hpp>
diff --git a/matrices/CMakeLists.txt b/matrices/CMakeLists.txt
index a4a2c603d7c..bcc87b73e3c 100644
--- a/matrices/CMakeLists.txt
+++ b/matrices/CMakeLists.txt
@@ -36,3 +36,6 @@ configure_file("test/isai_spd_excess.mtx" "${Ginkgo_BINARY_DIR}/matrices/test/is
 configure_file("test/isai_spd_excess_rhs.mtx" "${Ginkgo_BINARY_DIR}/matrices/test/isai_spd_excess_rhs.mtx")
 configure_file("test/isai_spd_inv.mtx" "${Ginkgo_BINARY_DIR}/matrices/test/isai_spd_inv.mtx")
 configure_file("test/isai_spd_inv_partial.mtx" "${Ginkgo_BINARY_DIR}/matrices/test/isai_spd_inv_partial.mtx")
+configure_file("test/mc64_result.mtx" "${Ginkgo_BINARY_DIR}/matrices/test/mc64_result.mtx")
+configure_file("test/nontrivial_mc64_example.mtx" "${Ginkgo_BINARY_DIR}/matrices/test/nontrivial_mc64_example.mtx")
+configure_file("test/nontrivial_mc64_result.mtx" "${Ginkgo_BINARY_DIR}/matrices/test/nontrivial_mc64_result.mtx")
diff --git a/matrices/config.hpp.in b/matrices/config.hpp.in
index 8ea9cf76453..628e27cdd21 100644
--- a/matrices/config.hpp.in
+++ b/matrices/config.hpp.in
@@ -67,6 +67,12 @@ const char* location_ani4_amd_chol_mtx =
 const char* location_isai_mtxs = "@Ginkgo_BINARY_DIR@/matrices/test/";
 const char* location_1138_bus_mtx =
     "@Ginkgo_BINARY_DIR@/matrices/test/1138_bus.mtx";
+const char* location_1138_bus_mc64_result =
+    "@Ginkgo_BINARY_DIR@/matrices/test/mc64_result.mtx";
+const char* location_nontrivial_mc64_example = 
+    "@Ginkgo_BINARY_DIR@/matrices/test/nontrivial_mc64_example.mtx";
+const char* location_nontrivial_mc64_result = 
+    "@Ginkgo_BINARY_DIR@/matrices/test/nontrivial_mc64_result.mtx";
 
 
 }  // namespace matrices
diff --git a/matrices/test/mc64_result.mtx b/matrices/test/mc64_result.mtx
new file mode 100644
index 00000000000..3bfcddb0f84
--- /dev/null
+++ b/matrices/test/mc64_result.mtx
@@ -0,0 +1,4057 @@
+%%MatrixMarket matrix coordinate real general
+% Generated 16-Mar-2022
+1138 1138 4054
+1 1  1
+5 1 -0.6492727920766413
+563 1 -0.1799809173071795
+2 2  1
+10 2 -0.5965460180056977
+563 2 -0.1799809173071795
+3 3  1
+11 3 -0.3240868407965323
+34 3 -0.5531896796194379
+35 3 -0.001603749139075492
+104 3 -0.2011814076823313
+475 3 -0.2728043514189628
+4 4  1
+7 4 -0.6560933806303378
+27 4 -0.05211953253771535
+101 4 -0.8358937496719409
+102 4 -0.01586529796895537
+103 4 -0.0004063658723966864
+1 5 -0.006114226606155904
+5 5  0.9999999999999999
+9 5 -0.6789259554494135
+6 6  1
+7 6 -0.2077280495519381
+37 6 -0.7072785174509324
+98 6 -0.1903083178071727
+103 6 -0.003788508991866723
+4 7 -0.5046605693847879
+6 7 -0.09382309841408967
+7 7  1
+37 7 -0.05942382962630921
+101 7 -0.02266501810886314
+102 7 -0.0228091649658566
+103 7 -5.576657673807354e-05
+8 8  1
+26 8 -0.8584300085831883
+35 8 -4.966777796632164e-05
+724 8 -0.1658543304651265
+5 9 -0.3507274959407548
+9 9  1
+10 9 -0.1586178957762868
+104 9 -0.05775643268922086
+2 10 -0.3727836251651863
+9 10 -0.1262302857737826
+10 10  0.9999999999999999
+104 10 -0.05775643268922086
+3 11 -0.1265619981302794
+11 11  0.9999999999999999
+12 11 -1
+38 11 -0.09892352745781073
+566 11 -0.08086159583567698
+11 12 -0.04556405099125133
+12 12  1
+13 13  1
+34 13 -0.03288526117299265
+104 13 -0.2185949619329241
+14 14  1
+413 14 -0.5229655372607853
+15 15  1
+16 15 -1
+17 15 -1
+18 15 -0.9999999999999999
+19 15 -0.9999999999999999
+411 15 -0.1341190427727194
+15 16 -0.01807852749599825
+16 16  1
+15 17 -0.0179682877208037
+17 17  1
+15 18 -0.01791367762954721
+18 18  0.9999999999999999
+15 19 -0.01791367762954721
+19 19  0.9999999999999999
+20 20  1
+21 20 -0.9963844951248662
+37 20 -0.2271550870477122
+102 20 -0.2888303918407034
+20 21 -0.9944597417179796
+21 21  1
+22 21 -1
+23 21 -1
+24 21 -1
+25 21 -1
+21 22 -0.0009076899792189114
+22 22  1
+21 23 -0.0009043944382427825
+23 23  1
+21 24 -0.0009034572142879311
+24 24  1
+21 25 -0.0009001921875800999
+25 25  1
+8 26 -0.1240168180982937
+26 26  1
+35 26 -6.177197728157473e-05
+4 27 -0.006931543040297045
+27 27  1
+28 27 -1
+29 27 -1
+30 27 -1
+101 27 -0.07713031382863941
+27 28 -0.2601646039655509
+28 28  1
+27 29 -0.2673350085723925
+29 29  1
+27 30 -0.1304693804136451
+30 30  1
+31 31  0.9999999999999999
+32 31 -0.3704915964445265
+100 31 -0.0260860216426223
+31 32 -0.3704915964445265
+32 32  0.9999999999999999
+100 32 -0.0260860216426223
+33 33  1
+100 33 -0.01615606831422106
+3 34 -0.4475011592382526
+13 34 -0.2592764642122821
+34 34  1
+104 34 -0.4187353558373992
+553 34 -0.1937821077006709
+3 35 -0.2307967227601994
+8 35 -0.01644486328850534
+26 35 -0.1415700371694145
+35 35  1
+104 35 -0.04597532653564613
+710 35 -0.9982709946372883
+36 36  1
+711 36 -0.9975908181741096
+6 37 -0.4863371018105125
+7 37 -0.09046757402222515
+20 37 -0.003175736609838774
+37 37  1
+102 37 -0.01048944640359718
+11 38 -0.2728772405046156
+38 38  1
+39 38 -0.9547163927096949
+98 38 -0.8096916821928273
+412 38 -0.05015428838455092
+38 39 -0.2090108403342631
+39 39  1
+99 39 -0.2372121791869226
+40 40  1
+41 40 -0.1280029924124869
+43 40 -0.01254773538947213
+45 40 -0.5398229350705784
+40 41 -0.5147013318641015
+41 41  1
+42 41 -0.9999999999999999
+44 41 -0.07692309751479924
+41 42 -0.02931082794831556
+42 42  0.9999999999999999
+40 43 -0.2246486143611993
+43 43  1
+146 43 -0.5964911799322897
+41 44 -0.8426861897514321
+44 44  1
+486 44 -0.9899715878154298
+40 45 -0.2606500131133009
+45 45  1
+49 45 -0.1892434736834814
+46 46  1
+48 46 -0.4954576443169026
+47 47  1
+48 47 -0.005198926244193236
+46 48 -1
+47 48 -1
+48 48  1
+54 48 -0.04006438898404348
+506 48 -0.9847327041547841
+45 49 -0.4601768965046814
+49 49  1
+50 49 -0.1280029924124869
+53 49 -0.06779335198707773
+49 50 -0.438374187024249
+50 50  1
+51 50 -0.9999999999999999
+52 50 -0.9531250053613283
+50 51 -0.02931082794831556
+51 51  0.9999999999999999
+50 52 -0.8426861897514321
+52 52  0.9999999999999999
+125 52 -0.112813716587248
+49 53 -0.3723823739238244
+53 53  1
+54 53 -0.729744543538038
+55 53 -1
+48 54 -0.003885942182074739
+53 54 -0.9006833121282544
+54 54  1
+425 54 -0.131399813155743
+445 54 -0.6184459469197153
+447 54 -0.1174874450546378
+53 55 -0.03152390331492758
+55 55  1
+56 56  1
+63 56 -0.002915193935309169
+57 57  1
+66 57 -0.2542059170280336
+58 58  1
+70 58 -0.06939652171522302
+59 59  1
+67 59 -0.1453397137875275
+60 60  1
+68 60 -0.3600731023058959
+61 61  1
+130 61 -0.3502350217089675
+62 62  1
+129 62 -0.276184688174664
+56 63 -1
+63 63  1
+64 63 -0.1891643739723574
+65 63 -0.9984105304355466
+63 64 -0.00300326655095561
+64 64  1
+71 64 -0.4245283028640084
+131 64 -0.01790155758236807
+226 64 -0.4377343502707456
+63 65 -0.9940812402952818
+65 65  1
+1095 65 -0.1167582363193619
+57 66 -1
+66 66  1
+67 66 -0.5799478038864539
+68 66 -0.083900485735579
+74 66 -0.08483516860378112
+183 66 -0.3694771125075278
+226 66 -0.1725924051061115
+59 67 -1
+66 67 -0.2196535628886999
+67 67  1
+68 67 -0.106033774730965
+69 67 -0.08028172632075595
+60 68 -1
+66 68 -0.05491337941001665
+67 68 -0.183235059922787
+68 68  1
+74 68 -0.1236471895212916
+90 68 -0.4987844472802113
+201 68 -0.2909768179804689
+67 69 -0.09147724319936251
+69 69  0.9999999999999999
+70 69 -0.7779716612780849
+58 70 -1
+69 70 -0.919718483375054
+70 70  0.9999999999999999
+182 70 -0.08057177429241594
+196 70 -1
+1063 70 -0.3087247699192932
+1065 70 -0.2920428780860505
+1074 70 -0.01012736249796797
+64 71 -0.2566123516218751
+71 71  1
+72 71 -0.6542056206830282
+71 72 -0.5754716971359916
+72 72  1
+73 72 -0.9999999999999999
+121 72 -0.08744215700388705
+72 73 -0.06542055029260259
+73 73  0.9999999999999999
+66 74 -0.07387576942472925
+68 74 -0.1645116442418972
+74 74  1
+75 74 -0.05817680201285781
+76 74 -0.1726339178191859
+78 74 -0.130913693988655
+80 74 -0.02407728877274043
+119 74 -0.8263417928469541
+74 75 -0.1047605315009727
+75 75  1
+204 75 -1
+810 75 -0.116290886260006
+918 75 -0.01293657229246839
+74 76 -0.06610842125172511
+76 76  1
+77 76 -0.005970149283403877
+78 76 -0.1384384969733886
+127 76 -0.6542056206830282
+76 77 -0.2037391490184505
+77 77  1
+1050 77 -0.9382000144858081
+74 78 -0.0893397910743087
+76 78 -0.246709612638192
+78 78  1
+79 78 -0.1610659872216043
+452 78 -0.4255502636251993
+78 79 -0.3460962043628888
+79 79  1
+416 79 -0.1327936542221724
+417 79 -0.1327936542221724
+454 79 -0.9129721382228279
+74 80 -0.04110857713539848
+80 80  1
+81 80 -0.1597918618507438
+447 80 -0.380435616258874
+452 80 -0.4130341017047693
+80 81 -0.1021264623004273
+81 81  1
+448 81 -0.8894740381274725
+450 81 -0.07986076148735938
+464 81 -0.217002790610145
+82 82  1
+94 82 -0.01072028153451134
+83 83  1
+94 83 -0.03715132479984279
+84 84  1
+95 84 -0.2295183725025479
+85 85  1
+87 85 -0.03241182583534607
+126 85 -0.5576554300505696
+86 86  1
+87 86 -0.9399429116273194
+241 86 -0.01610503466391137
+251 86 -0.3412323027937502
+253 86 -0.3767441238224044
+267 86 -0.1674639809318766
+269 86 -0.5714286302040831
+283 86 -0.6470587687197322
+291 86 -0.05190200332526986
+293 86 -0.2438398111509525
+294 86 -0.2438398111509525
+302 86 -0.8625592571177644
+315 86 -0.1480342823712487
+85 87 -0.3926701367972388
+86 87 -0.7344844565461368
+87 87  1
+88 87 -1
+87 88 -0.02764537533048392
+88 88  1
+89 89  1
+105 89 -0.09105905051266536
+115 89 -0.3525008070064463
+68 90 -0.111777566842221
+90 90  1
+91 90 -0.05037407604921124
+90 91 -0.5012155527197887
+91 91  1
+92 91 -0.6321285211920449
+108 91 -0.06697729906335466
+115 91 -0.3156281444720873
+116 91 -0.8189040706406221
+91 92 -0.2537012290405794
+92 92  1
+93 92 -0.1555708087540219
+92 93 -0.3678714788079553
+93 93  1
+94 93 -0.02349809905594921
+252 93 -0.4565232400999932
+264 93 -0.03303164861788143
+271 93 -1
+288 93 -0.5127387105960322
+292 93 -0.03384616202130215
+293 93 -0.1634940914263915
+294 93 -0.1634940914263915
+322 93 -0.2987569115956855
+323 93 -0.2363949685875952
+324 93 -0.4632257298681418
+325 93 -0.1217920716761051
+326 93 -0.1930758910117183
+82 94 -1
+83 94 -1
+93 94 -0.01781344165790433
+94 94  1
+96 94 -0.0004789799359466821
+264 94 -0.6240474348763411
+318 94 -0.6516684822611513
+325 94 -0.6427657848026371
+84 95 -1
+95 95  1
+97 95 -0.6835343784212797
+262 95 -0.05282055763401513
+268 95 -0.04503491331724746
+300 95 -0.2769582090771682
+94 96 -0.01034458327910479
+96 96  1
+145 96 -0.0007225227274768234
+704 96 -0.520062458688176
+705 96 -1
+95 97 -0.1081988151288643
+97 97  1
+268 97 -0.02418445958598323
+6 98 -0.09313961968281793
+38 98 -0.6173561895170943
+98 98  0.9999999999999999
+39 99 -0.04528355247048562
+99 99  1
+413 99 -0.09651670409910426
+31 100 -0.6295078704477037
+32 100 -0.6295078704477037
+33 100 -1
+100 100  1
+725 100 -0.9402255715577669
+732 100 -1
+4 101 -0.4178502489611676
+7 101 -0.01472963348864429
+27 101 -0.2899113868247994
+101 101  1
+102 101 -0.01603039966361481
+103 101 -0.0001440539928890901
+4 102 -0.01087919632728707
+7 102 -0.02033406699533006
+20 102 -0.002364544296135724
+37 102 -0.006142357991737697
+101 102 -0.02198990452750739
+102 102  1
+103 102 -0.003016233798460693
+4 103 -0.05967834899344121
+6 103 -0.3266997521159166
+7 103 -0.01064730857732617
+101 103 -0.04232093804494264
+102 103 -0.6459754203273675
+103 103  1
+478 103 -0.9807835087137711
+3 104 -0.06994587923121962
+9 104 -0.1948438981602913
+10 104 -0.2448360862180153
+13 104 -0.740723535787718
+34 104 -0.1799673938849595
+35 104 -0.0001110723376221515
+104 104  1
+89 105 -0.5863707737231639
+105 105  1
+106 105 -1
+109 105 -0.03571428984693892
+105 106 -0.02870338989898798
+106 106  1
+107 107  1
+108 107 -0.03187343937875503
+91 108 -0.1047468281823594
+107 108 -1
+108 108  1
+110 108 -0.08283446881443744
+105 109 -0.8802377021868432
+109 109  1
+1029 109 -0.9467159842580067
+108 110 -0.9011490732177322
+110 110  1
+111 110 -0.004160861891527749
+328 110 -1
+367 110 -0.03504377681956558
+370 110 -0.325748648239424
+110 111 -0.003812882068814073
+111 111  1
+367 111 -0.02368132661919568
+368 111 -0.06986898994029898
+701 111 -1
+112 112  1
+113 112 -0.000739056659585339
+370 112 -0.2147776966477931
+686 112 -0.8697204492488572
+112 113 -0.02459076972632483
+113 113  1
+114 113 -0.0007619042670442453
+115 113 -0.3318709180961821
+690 113 -0.9824224966892362
+113 114 -0.0007594107458898919
+114 114  1
+1009 114 -0.9907189449239523
+89 115 -0.4136292262768361
+91 115 -0.03751599861097331
+113 115 -0.002534267749880733
+115 115  1
+91 116 -0.5536618526143466
+116 116  1
+117 116 -1
+126 116 -0.1422590186464617
+116 117 -0.02511424529318564
+117 117  1
+118 118  1
+119 118 -0.02418339277197503
+74 119 -0.4902001130676245
+118 119 -1
+119 119  1
+123 119 -0.3049226741952487
+120 120  1
+121 120 -0.4648825071027528
+72 121 -0.2803737701458679
+120 121 -1
+121 121  1
+122 121 -0.4642021340696652
+504 121 -0.2603369044751871
+121 122 -0.2602993953478807
+122 122  1
+123 122 -0.3617099232549205
+124 122 -0.4224685870527805
+119 123 -0.1494748187606835
+122 123 -0.2651595312696484
+123 123  1
+125 123 -0.1027109792650798
+122 124 -0.2706380726830699
+124 124  1
+125 124 -0.08995603185041415
+127 124 -0.2803737701458679
+52 125 -0.04687500607617215
+123 125 -0.3333669558380759
+124 125 -0.3341091105981521
+125 125  0.9999999999999999
+126 125 -0.1701968830132461
+129 125 -0.3858750391740373
+546 125 -0.5260663232856418
+85 126 -0.6073298632027613
+116 126 -0.1559817069955036
+125 126 -0.2240299852154929
+126 126  1
+129 126 -0.3379402726512989
+76 127 -0.3769174562144449
+124 127 -0.2434223023490674
+127 127  1
+128 127 -0.9999999999999999
+127 128 -0.06542055029260259
+128 128  0.9999999999999999
+62 129 -1
+125 129 -0.1952237040984547
+126 129 -0.1298886682897226
+129 129  1
+61 130 -1
+130 130  0.9999999999999999
+133 130 -0.1490958824850217
+173 130 -0.2512651889842554
+213 130 -0.1527244967847158
+64 131 -0.1005030361241799
+131 131  1
+132 131 -0.744358699698249
+133 131 -0.8372307665851201
+136 131 -0.3623902402130909
+131 132 -0.08385468735710638
+132 132  1
+144 132 -0.0004401727678495663
+742 132 -0.2204103966717673
+130 133 -0.3838191980346491
+131 133 -0.8578979887268778
+133 133  1
+134 133 -1
+133 134 -0.0136733618138577
+134 134  1
+135 135  1
+136 135 -0.2548965474408207
+139 135 -0.05272090100578946
+740 135 -0.9882272487852217
+131 136 -0.0403458555550351
+135 136 -0.002536870188899712
+136 136  1
+141 136 -0.4039087414105968
+137 137  0.9999999999999999
+139 137 -0.8045668283673773
+141 137 -0.2896499621302946
+761 137 -0.3010446739903038
+138 138  0.9999999999999999
+141 138 -0.3064412964591086
+877 138 -0.4155496676241109
+882 138 -0.6158803906472619
+135 139 -0.0004734045915381463
+137 139 -0.6707682979885402
+139 139  1
+140 139 -0.2823079572220138
+139 140 -0.1427123594510089
+140 140  1
+829 140 -0.4880628508730103
+830 140 -0.3928506707862064
+136 141 -0.3827132123460883
+137 141 -0.2536055391507993
+138 141 -0.1330232953033071
+141 141  1
+142 142  1
+258 142 -0.08230603486136646
+366 142 -0.3618845534505992
+143 143  1
+144 143 -0.001247889818317225
+743 143 -0.0320912765102787
+745 143 -0.9995472051160823
+826 143 -1
+132 144 -0.04365066369403546
+143 144 -0.0006245443948639468
+144 144  0.9999999999999999
+827 144 -1
+96 145 -0.0007224585698749863
+145 145  0.9999999999999999
+318 145 -0.08697806565582218
+703 145 -1
+43 146 -0.9874521783072058
+146 146  0.9999999999999999
+147 146 -0.2272727061983529
+146 147 -0.4035088200677102
+147 147  0.9999999999999999
+148 147 -1
+147 148 -0.7727270619835285
+148 148  1
+149 149  1
+166 149 -0.01525068634536729
+150 150  1
+173 150 -0.3565748655620015
+151 151  0.9999999999999999
+175 151 -0.2479331092379944
+152 152  1
+180 152 -0.3652302557109839
+153 153  1
+183 153 -0.08663602991517734
+154 154  1
+207 154 -0.01556766147450305
+155 155  0.9999999999999999
+198 155 -0.02414044938673427
+156 156  1
+213 156 -0.5135083040398597
+157 157  1
+219 157 -0.2547388402230167
+158 158  1
+226 158 -0.1692082047663807
+159 159  1
+182 159 -0.07691289229425455
+205 159 -0.1709794891075046
+160 160  1
+172 160 -0.1497323443529348
+233 160 -0.1866913624909005
+161 161  0.9999999999999999
+222 161 -0.05187792770755063
+743 161 -0.0947485123883092
+162 162  1
+163 162 -0.3157700891404107
+169 162 -0.4498732370194708
+209 162 -0.09119822038020085
+223 162 -0.2308438203347603
+232 162 -0.1209976281228946
+162 163 -0.5265917525104066
+163 163  1
+186 163 -0.3685517004209947
+201 163 -0.07910433501159535
+227 163 -0.4626865654239252
+164 164  1
+166 164 -0.01525068634536729
+165 165  1
+166 165 -0.1775549610956214
+167 165 -0.5
+175 165 -0.2085522283900542
+194 165 -0.3616755741328196
+149 166 -1
+164 166 -1
+165 166 -0.5149451714697921
+166 166  1
+192 166 -0.0970117824447721
+201 166 -0.08616722206620209
+218 166 -0.4007447552591316
+165 167 -0.1472546650557122
+167 167  1
+175 167 -0.3231294692206997
+168 168  1
+180 168 -0.3295644787942922
+209 168 -0.7372772140177369
+162 169 -0.1317998320682076
+169 169  1
+187 169 -0.1311741564673933
+170 170  1
+188 170 -0.4342783193140172
+194 170 -0.09503486190684758
+171 171  0.9999999999999999
+178 171 -0.4447402774953585
+217 171 -0.00747844137787042
+160 172 -0.4683341404018499
+172 172  1
+173 172 -0.2063582177999838
+222 172 -0.05344538530284641
+755 172 -0.2735978619364898
+130 173 -0.1451751343221355
+150 173 -1
+172 173 -0.6365217315862682
+173 173  1
+184 173 -0.1994128604433656
+174 174  1
+211 174 -0.1511691547654236
+216 174 -0.2557193721038249
+151 175 -0.9999999999999999
+165 175 -0.09504019739290467
+167 175 -0.5
+175 175  1
+179 175 -0.5
+176 176  1
+177 176 -0.62337678302584
+178 176 -0.2379405377457809
+207 176 -0.7574299629892081
+176 177 -0.2002941904928748
+177 177  1
+207 177 -0.118216976845217
+171 178 -0.7504892495239552
+176 178 -0.02437351504897521
+178 178  1
+214 178 -0.2288037099033966
+217 178 -0.007410808621842783
+175 179 -0.2203850067055207
+179 179  1
+203 179 -0.7146710247080633
+152 180 -1
+168 180 -0.1348497012234844
+180 180  1
+209 180 -0.1064243677495804
+181 181  1
+183 181 -0.05815844833421143
+70 182 -0.02197442038564537
+159 182 -0.490602429381578
+182 182  1
+183 182 -0.07522289781862007
+205 182 -0.5821309466619323
+221 182 -0.2713530595291866
+66 183 -0.3327104858956616
+153 183 -1
+181 183 -1
+182 183 -0.4868079477176213
+183 183  1
+184 183 -0.477493261245094
+201 183 -0.2035303722962272
+226 183 -0.2204647378200683
+173 184 -0.185801436688662
+183 184 -0.2304995217561386
+184 184  1
+193 184 -0.1676384474933909
+213 184 -0.0732370501941501
+185 185  1
+187 185 -0.07500200885383727
+163 186 -0.3187074948796855
+186 186  0.9999999999999999
+187 186 -0.3875104162791563
+218 186 -0.0868534158829041
+169 187 -0.5501267629805292
+185 187 -1
+186 187 -0.3301609098494387
+187 187  0.9999999999999999
+188 187 -0.5657216806859828
+206 187 -0.4602206035291696
+209 187 -0.06510001427044376
+170 188 -0.7521173590257062
+187 188 -0.2759717830437856
+188 188  1
+189 189  0.9999999999999999
+211 189 -0.1359135748244117
+216 189 -0.2027324636256603
+190 190  1
+199 190 -0.04094432996341432
+191 191  0.9999999999999999
+195 191 -0.2904941520967104
+197 191 -0.404217889580768
+198 191 -0.9253837127083051
+211 191 -0.1547214539052061
+166 192 -0.2343725509899457
+192 192  1
+199 192 -0.8270755148854977
+218 192 -0.1445724738419677
+220 192 -0.1455457958672313
+184 193 -0.215514568149774
+193 193  1
+198 193 -0.05047547978142804
+199 193 -0.1319801551510881
+208 193 -0.3139073579938568
+165 194 -0.242759881115637
+170 194 -0.2478825419204235
+194 194  1
+216 194 -0.2897421391460267
+191 195 -0.009323070927852626
+195 195  1
+211 195 -0.3102529690195143
+70 196 -0.02292884455830111
+196 196  1
+191 197 -0.01282953777422882
+197 197  1
+212 197 -0.5095949027218473
+155 198 -0.9999999999999999
+191 198 -0.9664919152459959
+193 198 -0.2453617103968713
+198 198  1
+190 199 -1
+192 199 -0.8084315607946769
+193 199 -0.4306881991198629
+199 199  1
+200 200  1
+201 200 -0.2540543780055993
+202 200 -0.7745855166056066
+68 201 -0.1737035644114691
+163 201 -0.0936094226121587
+166 201 -0.04359607765817291
+183 201 -0.08831090097023078
+200 201 -0.7301358100091028
+201 201  1
+218 201 -0.0339917365150735
+200 202 -0.2698641899908972
+202 202  0.9999999999999999
+203 202 -0.2853287939084159
+179 203 -0.5
+202 203 -0.225414340098492
+203 203  1
+75 204 -0.9308288610614192
+204 204  1
+159 205 -0.5093975706184219
+182 205 -0.2718961077307849
+205 205  0.9999999999999999
+221 205 -0.3733510001547689
+187 206 -0.0808717309623898
+206 206  1
+216 206 -0.06885254755398093
+154 207 -1
+176 207 -0.7753322828410874
+177 207 -0.3766235785326799
+207 207  1
+208 207 -0.6860928046101488
+224 207 -0.6120997068106087
+193 208 -0.1563115215358269
+207 208 -0.09577067835565496
+208 208  1
+162 209 -0.0851493262845251
+168 209 -0.865150169523072
+180 209 -0.3052053707905789
+187 209 -0.04946941148022767
+209 209  1
+210 210  1
+215 210 -0.9999999999999999
+217 210 -0.9559942548580751
+218 210 -0.271933892120588
+174 211 -0.2519083562412447
+189 211 -0.2763485495176626
+191 211 -0.0113556500204848
+195 211 -0.7095055768721543
+211 211  1
+212 211 -0.4904050972781527
+197 212 -0.5957821104192319
+211 212 -0.2479427289688081
+212 212  1
+130 213 -0.1207707019718512
+156 213 -1
+184 213 -0.1075791019747063
+213 213  1
+504 213 -0.3751913986439322
+178 214 -0.1707963143513909
+214 214  1
+217 214 -0.02911656970974873
+210 215 -0.006238858049167328
+215 215  0.9999999999999999
+174 216 -0.7480916437587553
+189 216 -0.7236514504823374
+194 216 -0.5432895259843995
+206 216 -0.5397795023215715
+216 216  1
+218 216 -0.06190365785298323
+171 217 -0.2495107504760447
+178 217 -0.1465229838162297
+210 217 -0.927510608400462
+214 217 -0.7711962900966035
+217 217  1
+166 218 -0.5139749028013086
+186 218 -0.3012874689681785
+192 218 -0.07674982880452016
+201 218 -0.08616722206620209
+210 218 -0.06625073389262212
+216 218 -0.1829536801002387
+218 218  0.9999999999999999
+157 219 -1
+219 219  1
+220 219 -0.8544540058994244
+230 219 -0.9999999999999999
+192 220 -0.01780686433544928
+219 220 -0.5709662997850107
+220 220  1
+182 221 -0.08381126170553799
+205 221 -0.2468895294191365
+221 221  1
+222 221 -0.03463990807904439
+225 221 -0.1203411291473983
+161 222 -0.4338510401982535
+172 222 -0.07289600463347098
+221 222 -0.1662744800763948
+222 222  1
+225 222 -0.8796590049126298
+233 222 -0.8133086375090995
+743 222 -0.0409641298590186
+162 223 -0.1440988704141571
+223 223  0.9999999999999999
+232 223 -0.5170361072912917
+207 224 -0.01301470898659068
+224 224  1
+1026 224 -0.43046349456815
+221 225 -0.189021512882145
+222 225 -0.2878476166912821
+225 225  1
+64 226 -0.4537203635084046
+66 226 -0.06464090797717569
+158 226 -1
+183 226 -0.09169506357364615
+226 226  1
+163 227 -0.2719131304119618
+227 227  1
+228 227 -0.4729984736306375
+227 228 -0.5373134345760748
+228 228  0.9999999999999999
+229 228 -1
+231 228 -0.408748003944203
+232 228 -0.1349282437957417
+228 229 -0.2835377610395939
+229 229  1
+219 230 -0.1742949924561643
+230 230  0.9999999999999999
+228 231 -0.1309191974690454
+231 231  1
+232 231 -0.2270379223464048
+162 232 -0.1123602872849488
+223 232 -0.7691561796652396
+228 232 -0.1125445678607232
+231 232 -0.5912516755973105
+232 232  1
+160 233 -0.53166585959815
+222 233 -0.5429204819467379
+233 233  0.9999999999999999
+234 234  0.9999999999999999
+236 234 -0.1613145392141977
+307 234 -0.1058823103391086
+235 235  1
+236 235 -0.748804717517474
+243 235 -1
+270 235 -0.8631102783055937
+272 235 -0.1305043949783015
+298 235 -0.1722019677170294
+299 235 -0.2951069778206046
+234 236 -0.4989059470960984
+235 236 -0.03191543922976967
+236 236  1
+286 236 -1
+237 237  0.9999999999999999
+261 237 -0.02151130788651816
+287 237 -1
+309 237 -0.8265861301689351
+366 237 -0.09565227983504497
+702 237 -0.5505971291504373
+238 238  1
+239 238 -0.6963025937406854
+270 238 -0.06339368056547534
+238 239 -0.2192636847968434
+239 239  1
+270 239 -0.006618942422344889
+281 239 -1
+240 240  1
+259 240 -0.3796659389180249
+86 241 -0.006832413856724998
+241 241  0.9999999999999999
+242 241 -0.5882783477841544
+246 241 -0.7890770034316641
+253 241 -0.6232557247264666
+257 241 -0.4999999460000043
+266 241 -0.5
+275 241 -0.5127701742505889
+279 241 -0.7935779392240763
+289 241 -0.005700326017321154
+291 241 -0.8823339824135301
+292 241 -0.8856410305461146
+293 241 -0.5926662254385504
+294 241 -0.5926662254385504
+299 241 -0.3049046395971649
+310 241 -0.2702981499217172
+321 241 -0.523206833127686
+327 241 -0.5254236869221548
+241 242 -0.06412189951724677
+242 242  1
+244 242 -0.4822483429344955
+245 242 -0.7553191411740608
+235 243 -0.006895598231716217
+243 243  1
+242 244 -0.1815258847276199
+244 244  1
+272 244 -0.8694955058122561
+242 245 -0.2301958627893102
+245 245  0.9999999999999999
+298 245 -0.3144909496457951
+241 246 -0.03091591534401793
+246 246  1
+310 246 -0.3838363613116763
+247 247  1
+248 247 -0.5375000696062514
+255 247 -0.3433098630232098
+247 248 -0.4637681432976252
+248 248  1
+255 248 -0.2554863662590013
+249 249  1
+282 249 -0.9200002870400321
+250 250  1
+257 250 -0.4999999460000043
+263 250 -0.5955413901937813
+289 250 -0.02259771740419979
+317 250 -0.07692309751479924
+86 251 -0.01056812225816952
+251 251  1
+267 251 -0.2453812432123873
+93 252 -0.2149332660769647
+252 252  0.9999999999999999
+310 252 -0.3458653279391815
+313 252 -1
+86 253 -0.01096245521113784
+241 253 -0.04274793301149785
+253 253  1
+254 254  1
+255 254 -0.2288732053957545
+285 254 -0.6686047109873622
+247 255 -0.5362318567023748
+248 255 -0.4624999303937486
+254 255 -0.5428570981877516
+255 255  1
+258 255 -0.1200943598705512
+260 255 -0.3766815662173065
+263 255 -0.4044586098062187
+256 256  1
+290 256 -0.9999999999999999
+317 256 -0.9230772071006792
+321 256 -0.4767933442394409
+241 257 -0.03206094629604109
+250 257 -0.08239871533148115
+257 257  1
+142 258 -0.5871884879917958
+255 258 -0.03454690348785244
+258 258  1
+261 258 -0.9784886081591408
+327 258 -0.4745761953829469
+240 259 -1
+259 259  1
+278 259 -0.46945783007923
+255 260 -0.07903536849617317
+260 260  0.9999999999999999
+289 260 -0.02259771740419979
+237 261 -0.02282970359744397
+258 261 -0.4896153731583824
+261 261  1
+95 262 -0.1263017153519744
+262 262  1
+268 262 -0.1650494208645136
+276 262 -0.05882352941176471
+314 262 -0.4957263617363166
+250 263 -0.07007135267514689
+255 263 -0.05874821643659963
+263 263  1
+93 264 -0.02070333652463496
+94 264 -0.5159559063723644
+264 264  1
+311 264 -0.6561082453021649
+265 265  1
+276 265 -0.9411763764705883
+300 265 -0.1435899342293394
+241 266 -0.04274793301149785
+266 266  1
+307 266 -0.8941175448138609
+86 267 -0.01392387744333599
+251 267 -0.6587679343626871
+267 267  1
+282 267 -0.07999998895999877
+284 267 -0.2711864537224941
+302 267 -0.1374407428822357
+95 268 -0.09328283535347889
+97 268 -0.3164657040813212
+262 268 -0.1429750129714083
+268 268  1
+270 268 -0.02325341219859649
+301 268 -0.439393852686889
+86 269 -0.0408046952947163
+269 269  1
+284 269 -0.7288134763114009
+235 270 -0.9446969671920914
+238 270 -0.7807365809480077
+239 270 -0.2588680600634418
+268 270 -0.3397252077618215
+270 270  1
+275 270 -0.487229825749411
+279 270 -0.2064220107805109
+280 270 -1
+318 270 -0.2023510083909907
+320 270 -1
+93 271 -0.1078213461325165
+271 271  1
+235 272 -0.004349433488846199
+244 272 -0.5177512350982256
+272 272  1
+273 273  0.9999999999999999
+274 273 -0.5
+303 273 -0.499999860000056
+315 273 -0.1644825491266569
+273 274 -0.2339832475480544
+274 274  1
+312 274 -0.2339832475480544
+241 275 -0.004654008389144389
+270 275 -0.005511558988624638
+275 275  1
+262 276 -0.3413528496135886
+265 276 -0.9827284475345799
+276 276  1
+277 277  1
+285 277 -0.3313953652335733
+289 277 -0.9491042828330764
+259 278 -0.6203338692747337
+278 278  1
+309 278 -0.1734138698310648
+241 279 -0.01099232414756418
+270 279 -0.003563626116163423
+279 279  1
+270 280 -0.01265557454877674
+280 280  1
+239 281 -0.04482921117026357
+281 281  1
+249 282 -1
+267 282 -0.2560499084207828
+282 282  1
+86 283 -0.02040234764735815
+283 283  1
+292 283 -0.08051282868796882
+267 284 -0.136956962273108
+269 284 -0.428571369795917
+284 284  1
+254 285 -0.4571429018122484
+277 285 -0.01709401514500712
+285 285  1
+236 286 -0.08988085409142241
+286 286  1
+237 287 -0.1536997872135341
+287 287  1
+93 288 -0.1067642635419462
+288 288  0.9999999999999999
+291 288 -0.06576402484920824
+241 289 -0.01039814502541189
+250 289 -0.1059411914419505
+260 289 -0.6233181719890193
+277 289 -0.9829058904960274
+289 289  1
+256 290 -0.005166882463238922
+290 290  0.9999999999999999
+86 291 -0.007200828322190714
+241 291 -0.2885485132017875
+288 291 -0.487261054056874
+291 291  1
+93 292 -0.05202209332507221
+241 292 -0.2885485132017875
+283 292 -0.3529411381038126
+292 292  1
+86 293 -0.008394108494518396
+93 293 -0.06258594421391314
+241 293 -0.04809142463793508
+293 293  1
+86 294 -0.008394108494518396
+93 294 -0.06258594421391314
+241 294 -0.04809142463793508
+294 294  1
+295 295  1
+298 295 -0.5133069486640399
+296 296  1
+297 296 -0.8528974928165347
+296 297 -1
+297 297  1
+299 297 -0.399988152988994
+235 298 -0.006071317063447265
+245 298 -0.244680858825939
+295 298 -1
+298 298  1
+235 299 -0.006071317063447265
+241 299 -0.004598382004544293
+297 299 -0.1471025071834653
+299 299  1
+95 300 -0.4426981441267596
+265 300 -0.01727114954671646
+300 300  1
+696 300 -1
+268 301 -0.4260059606411062
+301 301  1
+314 301 -0.5042733457851207
+86 302 -0.1013082049552435
+267 302 -0.1941477638222605
+302 302  1
+273 303 -0.5515320123481384
+303 303  1
+312 303 -0.5515320123481384
+304 304  1
+305 304 -0.5730337519050649
+315 304 -0.1233618822381362
+329 304 -0.45
+304 305 -0.1906137411613363
+305 305  0.9999999999999999
+315 305 -0.1161053009300106
+306 306  1
+322 306 -0.3519447239589286
+234 307 -0.5010940529039015
+266 307 -0.5
+307 307  1
+308 308  0.9999999999999999
+323 308 -0.5515883957284556
+237 309 -0.5644585042091543
+278 309 -0.53054216992077
+309 309  1
+241 310 -0.005819465114137099
+246 310 -0.2109227314384797
+252 310 -0.07461455592005399
+310 310  1
+264 311 -0.3429205777001247
+311 311  1
+326 311 -0.8069241089882817
+274 312 -0.5
+303 312 -0.499999860000056
+312 312  0.9999999999999999
+315 312 -0.1644825491266569
+252 313 -0.4688617529348292
+313 313  1
+262 314 -0.4628513066987083
+301 314 -0.5606058871919597
+314 314  1
+86 315 -0.03672422282730684
+273 315 -0.2144846628893347
+304 315 -0.1509025179236003
+305 315 -0.4269662480949349
+312 315 -0.2144846628893347
+315 315  1
+316 315 -0.8956228451423282
+319 315 -0.1011236045953802
+323 315 -0.2120167713746828
+315 316 -0.1910119505115473
+316 316  0.9999999999999999
+322 316 -0.3492985502721898
+250 317 -0.7415884290842688
+256 317 -0.9868745682423764
+317 317  1
+94 318 -0.2384682812552523
+145 318 -0.001473861031868857
+270 318 -0.01483007300618289
+318 318  1
+704 318 -0.479937541311824
+315 319 -0.07401714118562433
+319 319  1
+329 319 -0.5499999504999995
+270 320 -0.007063094656009557
+320 320  1
+241 321 -0.03064231940382643
+256 321 -0.007958665745583737
+321 321  1
+93 322 -0.05252390250069872
+306 322 -1
+316 322 -0.1043771270933597
+322 322  1
+93 323 -0.05691613759176354
+308 323 -0.9999999999999999
+315 323 -0.01850428529640608
+323 323  1
+93 324 -0.03926666600237935
+324 324  1
+325 324 -0.2354420589975488
+93 325 -0.02353736591673529
+94 325 -0.163861334082621
+324 325 -0.5367740774299422
+325 325  1
+93 326 -0.02695533653312913
+311 326 -0.3438913557838784
+326 326  0.9999999999999999
+241 327 -0.03091591534401793
+258 327 -0.30798392658964
+327 327  1
+110 328 -0.9111791660706036
+328 328  1
+304 329 -0.6584838133482835
+319 329 -0.8988763954046198
+329 329  1
+330 330  1
+335 330 -0.07961856004948324
+331 331  1
+336 331 -0.004036503469976048
+332 332  1
+337 332 -0.006387721320127105
+333 333  1
+350 333 -0.0921005122339543
+334 334  1
+350 334 -0.05105154464484288
+330 335 -1
+335 335  1
+337 335 -0.1067585399664951
+331 336 -1
+336 336  1
+337 336 -0.8331808584559168
+332 337 -1
+335 337 -0.9203814069088204
+336 337 -0.9959633009029084
+337 337  1
+338 337 -0.2012269867927049
+339 337 -0.2060301256276172
+340 337 -0.1039862296139679
+337 338 -0.01471824980558974
+338 338  1
+340 338 -0.2553330263644302
+337 339 -0.01516072717293947
+339 339  1
+340 339 -0.2553330263644302
+337 340 -0.02379370398206635
+338 340 -0.7987728036092778
+339 340 -0.793969666034709
+340 340  1
+341 340 -0.3883928934870693
+342 340 -0.2232543326004295
+340 341 -0.1358463020436659
+341 341  1
+342 341 -0.1914151337886629
+340 342 -0.2495012313651646
+341 342 -0.6116071065129307
+342 342  1
+343 342 -0.3747337582460827
+344 342 -0.3811732129030788
+345 342 -0.1582682305842391
+370 342 -0.4594735804023605
+342 343 -0.08506133760186088
+343 343  0.9999999999999999
+346 343 -0.09236818336109996
+342 344 -0.1605549385574835
+344 344  0.9999999999999999
+346 344 -0.1696361337947664
+342 345 -0.0631863628937859
+345 345  1
+346 345 -0.2187014002957438
+343 346 -0.6252659776415691
+344 346 -0.6188264312716342
+345 346 -0.8417318069569906
+346 346  1
+347 346 -0.2580982636355207
+349 346 -0.2476966786950227
+367 346 -0.06003346222776961
+346 347 -0.137381439155999
+347 347  1
+348 347 -0.5118576987648027
+347 348 -0.7419017363644793
+348 348  1
+349 348 -0.4619209334789083
+346 349 -0.201947792261503
+348 349 -0.4881423012351971
+349 349  1
+350 349 -0.856847731308403
+333 350 -1
+334 350 -1
+349 350 -0.2903822681885516
+350 350  0.9999999999999999
+351 351  1
+366 351 -0.1761384366913647
+352 352  1
+353 352 -0.3102438686768026
+408 352 -1
+409 352 -0.6541753911724465
+352 353 -0.3546893357009125
+353 353  1
+363 353 -0.03948596574208448
+382 353 -0.5616295349317373
+354 354  0.9999999999999999
+382 354 -0.4383704650682625
+383 354 -0.270321998160119
+355 355  1
+375 355 -0.01774115593578681
+356 356  1
+383 356 -0.06342671662889754
+398 356 -0.09754281701496809
+357 357  1
+394 357 -0.02827324335128412
+395 357 -0.0009380818989512124
+358 358  1
+479 358 -0.3946138994678621
+359 359  1
+474 359 -0.2681610793000471
+360 360  1
+715 360 -0.2430060015603755
+361 361  1
+479 361 -0.07944494922793342
+480 361 -0.9822696400885458
+362 362  1
+403 362 -1
+714 362 -0.1117773054875538
+353 363 -0.2238729351343457
+363 363  1
+364 363 -1
+365 363 -1
+373 363 -1
+374 363 -1
+388 363 -0.3718125198786338
+395 363 -0.00626805062936796
+407 363 -0.1075371685714914
+474 363 -0.08092183564139589
+363 364 -0.09465071293757095
+364 364  1
+363 365 -0.09465071293757095
+365 365  1
+142 366 -0.4128115120082042
+237 366 -0.03243884717849358
+351 366 -1
+366 366  1
+699 366 -0.5652160454017799
+110 367 -0.0009813454289330364
+111 367 -0.0007236806387622489
+346 367 -0.1799645341646411
+367 367  0.9999999999999999
+368 367 -0.9301312422203849
+372 367 -0.500000150000015
+111 368 -0.0007780416527873758
+367 368 -0.3389391133225481
+368 368  1
+369 369  1
+371 369 -0.3045684834394144
+110 370 -0.001191863352282823
+112 370 -0.02858042770526963
+342 370 -0.276527744679703
+370 370  1
+369 371 -1
+371 371  1
+372 371 -0.500000150000015
+367 372 -0.5423025162397879
+371 372 -0.6954315165605854
+372 372  1
+363 373 -0.09465071293757095
+373 373  1
+363 374 -0.09465071293757095
+374 374  1
+355 375 -1
+375 375  1
+376 375 -1
+377 375 -1
+378 375 -1
+379 375 -1
+380 375 -1
+381 375 -1
+389 375 -0.8215297736331268
+404 375 -0.373579930708927
+375 376 -0.01654813940719981
+376 376  1
+375 377 -0.01654813940719981
+377 377  1
+375 378 -0.01654813940719981
+378 378  1
+375 379 -0.01605847694438298
+379 379  1
+375 380 -0.01605847694438298
+380 380  1
+375 381 -0.01605847694438298
+381 381  1
+353 382 -0.4658833134982665
+354 382 -0.2139376306780124
+382 382  1
+354 383 -0.7860623003057075
+356 383 -0.3931236374228056
+383 383  1
+384 383 -1
+385 383 -1
+386 383 -1
+387 383 -1
+392 383 -0.6685895441854749
+383 384 -0.09663793316238328
+384 384  1
+383 385 -0.09531836681606604
+385 385  1
+383 386 -0.1958273483403571
+386 386  1
+383 387 -0.1958273483403571
+387 387  1
+363 388 -0.3617245197804639
+388 388  1
+389 388 -0.1784703040014409
+390 388 -1
+391 388 -1
+395 388 -0.006030399251820348
+401 388 -0.3234717941970725
+405 388 -0.4204597559445792
+375 389 -0.8671575255467873
+388 389 -0.2444561274895281
+389 389  1
+388 390 -0.07525718206866674
+390 390  1
+388 391 -0.07525718206866674
+391 391  1
+383 392 -0.08264022921613286
+392 392  1
+395 392 -0.004392179741254537
+393 393  1
+474 393 -0.2331070676156656
+479 393 -0.135568940489809
+357 394 -0.436057357650007
+394 394  1
+395 394 -0.02492975318342171
+357 395 -0.5639426423499929
+363 395 -0.05096217708626882
+388 395 -0.05039733873277172
+392 395 -0.3314099757672116
+394 395 -0.9717267764874861
+395 395  1
+398 395 -0.9024572420959797
+399 395 -1
+405 395 -0.5795402440554207
+406 395 -0.04473415615634896
+407 395 -0.8924627314726891
+474 395 -0.3586263690444103
+396 396  1
+406 396 -0.008160603445508431
+716 396 -0.01440107594234689
+717 396 -0.01518780397874969
+397 397  1
+716 397 -0.9638436089137943
+717 397 -0.9616916812314886
+356 398 -0.6068764361306191
+395 398 -0.09713087099944968
+398 398  0.9999999999999999
+395 399 -0.09863677341162923
+399 399  1
+400 400  1
+473 400 -0.99959116721261
+388 401 -0.1001303177040349
+401 401  1
+402 401 -1
+714 401 -0.1139920054066689
+401 402 -0.2761471656099203
+402 402  1
+362 403 -0.4912971098677563
+403 403  1
+375 404 -0.01728097778433349
+404 404  1
+409 404 -0.3458247066267777
+388 405 -0.08268926825466046
+395 405 -0.01363787723109908
+405 405  1
+395 406 -0.6028540254419255
+396 406 -0.890751778274275
+406 406  0.9999999999999999
+479 406 -0.2014764706920652
+483 406 -0.9921875155029299
+716 406 -0.009499336509416116
+717 406 -0.01001831734328415
+363 407 -0.1112998442822022
+395 407 -0.113608433562474
+407 407  1
+352 408 -0.1967652099974787
+408 408  1
+352 409 -0.448545588416744
+404 409 -0.626420069291073
+409 409  1
+410 410  1
+411 410 -0.8516560025140886
+412 410 -0.7178859939985532
+731 410 -0.3523725805191165
+15 411 -0.9281257882689085
+410 411 -0.9846890696559202
+411 411  1
+476 411 -0.6017526963944623
+477 411 -0.2988908409683733
+486 411 -0.004925231405858653
+38 412 -0.07470946936061723
+410 412 -0.007896463883081959
+412 412  1
+413 412 -0.3805177586401105
+478 412 -0.0003190680987413605
+709 412 -0.01455597090867175
+724 412 -0.0802655551807238
+14 413 -1
+99 413 -0.7627878527204898
+412 413 -0.08437359900596564
+413 413  1
+414 414  0.9999999999999999
+431 414 -0.6507934735298834
+433 414 -0.1523900201385848
+415 415  0.9999999999999999
+432 415 -0.6507934735298834
+433 415 -0.1523900201385848
+79 416 -0.08517912325790625
+416 416  1
+433 416 -0.1523900201385848
+462 416 -0.4507657586936158
+79 417 -0.08517912325790625
+417 417  1
+433 417 -0.1523900201385848
+462 417 -0.4507657586936158
+418 418  1
+419 418 -1
+545 418 -0.413164800558622
+418 419 -0.2391770137487025
+419 419  1
+420 420  1
+421 420 -1
+422 420 -1
+470 420 -0.6000000719999943
+420 421 -0.02257424779570259
+421 421  1
+420 422 -0.006733126109865712
+422 422  1
+423 423  1
+425 423 -0.5809255943077843
+516 423 -0.09337482905823084
+424 424  1
+466 424 -0.5138873189634434
+516 424 -0.09337482905823084
+54 425 -0.06081201124010476
+423 425 -0.8671329569993718
+425 425  1
+426 425 -0.13712928823864
+427 425 -1
+466 425 -0.3659501043879658
+425 426 -0.08115871916319203
+426 426  1
+437 426 -0.8291218645184498
+519 426 -0.08737927029685764
+425 427 -0.03927966726534533
+427 427  1
+428 428  1
+429 428 -1
+430 428 -1
+431 428 -0.3492062401209539
+432 428 -0.3492062401209539
+428 429 -0.08171550257485548
+429 429  1
+428 430 -0.121060013499401
+430 430  1
+414 431 -0.7124180957751635
+428 431 -0.3986121439042686
+431 431  1
+415 432 -0.7124180957751635
+428 432 -0.3986121439042686
+432 432  1
+414 433 -0.2875816534536366
+415 433 -0.2875816534536366
+416 433 -0.253404422632383
+417 433 -0.253404422632383
+433 433  1
+434 433 -1
+510 433 -0.06870259929044649
+564 433 -0.1867630698817527
+433 434 -0.05323881730496434
+434 434  1
+435 435  1
+510 435 -0.03162044064262619
+436 436  0.9999999999999999
+534 436 -0.02896297492007903
+426 437 -0.8477084324092463
+437 437  1
+438 437 -1
+439 437 -0.2825888032115383
+441 437 -0.2669272691397696
+458 437 -0.02241746129357353
+437 438 -0.08329077174854423
+438 438  1
+437 439 -0.01135076424797693
+439 439  1
+440 439 -1
+460 439 -0.08957359368376369
+439 440 -0.5071701043568294
+440 440  1
+437 441 -0.02210991030693226
+441 441  0.9999999999999999
+442 441 -1
+441 442 -0.7330727308602304
+442 442  1
+443 443  0.9999999999999999
+444 443 -0.0555555715061778
+470 443 -0.3999999280000058
+506 443 -0.01526717767729127
+516 443 -0.330813701925721
+443 444 -0.04523789924166079
+444 444  1
+458 444 -0.9443355730454898
+54 445 -0.09425862763858191
+445 445  1
+446 445 -1
+447 445 -0.05361844470471311
+445 446 -0.1566157975298198
+446 446  1
+54 447 -0.07512071976718823
+80 447 -0.724611536861429
+445 447 -0.2249381550031186
+447 447  1
+449 447 -0.03546099909863812
+537 447 -0.539325834000758
+81 448 -0.3779204994172751
+448 448  1
+464 448 -0.2285592214937007
+447 449 -0.05874373051646643
+449 449  1
+450 449 -0.9104126627476437
+81 450 -0.4177015043707345
+449 450 -0.9645391986318976
+450 450  1
+451 450 -1
+450 451 -0.009726630389756765
+451 451  1
+78 452 -0.3845513381809875
+80 452 -0.1491847272822447
+452 452  1
+453 452 -1
+452 453 -0.1614155925405549
+453 453  1
+79 454 -0.6685758194143596
+454 454  1
+455 454 -1
+454 455 -0.08702793435845524
+455 455  1
+456 456  1
+457 456 -1
+564 456 -0.7166843852531996
+456 457 -0.229906405727996
+457 457  1
+437 458 -0.0541266435763989
+444 458 -0.9444447382716963
+458 458  1
+459 458 -1
+460 458 -0.4898200084705083
+458 459 -0.01412090668302508
+459 459  1
+439 460 -0.2102408653715157
+458 460 -0.01912577945458199
+460 460  1
+461 460 -1
+460 461 -0.420606397845728
+461 461  1
+416 462 -0.6138017574189535
+417 462 -0.6138017574189535
+462 462  1
+463 462 -1
+462 463 -0.09846825948369556
+463 463  1
+81 464 -0.04458611055226133
+448 464 -0.1105263541304762
+464 464  1
+465 464 -0.2900763128134739
+545 464 -0.2899948915629452
+464 465 -0.311504030173168
+465 465  1
+968 465 -0.8426778594247277
+424 466 -0.7251463901672432
+425 466 -0.1672362061079352
+466 466  1
+467 466 -1
+466 467 -0.1201626732593874
+467 467  1
+468 468  1
+544 468 -0.9981852991261886
+469 469  1
+543 469 -0.9925459796925092
+420 470 -0.9706928474123566
+443 470 -0.68359486992077
+470 470  1
+471 471  1
+472 471 -1
+478 471 -0.00107992012499105
+481 471 -0.09361663201601354
+492 471 -0.0002235220153358096
+707 471 -0.0005052306304767164
+708 471 -0.3266172835000307
+471 472 -0.3460054965830081
+472 472  1
+400 473 -1
+473 473  1
+474 473 -0.05918361949320084
+359 474 -1
+363 474 -0.0579245379054694
+393 474 -0.5
+395 474 -0.03157356304654174
+473 474 -0.0004093329828100307
+474 474  1
+3 475 -0.1251942406400489
+475 475  1
+710 475 -0.001226376915182903
+724 475 -0.06980953516683448
+411 476 -0.004164577412549833
+476 476  1
+482 476 -0.291284452389053
+411 477 -0.005823107641653814
+477 477  1
+478 477 -0.01573028649667073
+103 478 -0.9925893280765803
+412 478 -0.02912273602944862
+471 478 -0.1544236610002906
+477 478 -0.7011088967467495
+478 478  1
+708 478 -0.2620084430920087
+709 478 -0.01357229245502209
+358 479 -1
+361 479 -0.0185528758559691
+393 479 -0.5
+406 479 -0.002263226571543491
+479 479  1
+480 479 -0.0177304974292039
+714 479 -0.08225833079854974
+726 479 -0.111260350496243
+361 480 -0.9814471162924541
+479 480 -0.07585988069551974
+480 480  1
+471 481 -0.03530816880422673
+481 481  1
+484 481 -0.2060182480051046
+485 481 -0.3476151913290253
+491 481 -0.06959016411189313
+492 481 -0.0003428948333306764
+733 481 -0.5625245168985686
+476 482 -0.3982473036055377
+482 482  1
+483 482 -0.007812500372070319
+406 483 -0.9441818569791089
+482 483 -0.7087157276247361
+483 483  1
+481 484 -0.1709676363767527
+484 484  1
+709 484 -0.2410462936113267
+733 484 -0.4374752335655681
+481 485 -0.1645665681127969
+485 485  1
+491 485 -0.1998058630326408
+44 486 -0.9230772071006792
+411 486 -0.004237094217851903
+486 486  1
+731 486 -0.2412117210433243
+487 487  1
+491 487 -0.138177046659509
+488 488  1
+491 488 -0.138177046659509
+489 489  1
+492 489 -0.0006943201111128825
+490 490  1
+492 490 -0.0007051305018412538
+481 491 -0.1075688033309249
+485 491 -0.6523848086709747
+487 491 -1
+488 491 -1
+491 491  1
+492 491 -0.00168515758261729
+734 491 -0.04417811668870859
+471 492 -0.03146328150156909
+481 492 -0.1279742141131472
+489 492 -1
+490 492 -1
+491 492 -0.4068774043093595
+492 492  1
+707 492 -0.9932834175306575
+493 493  1
+707 493 -0.001116049274803777
+494 494  1
+505 494 -0.4168904076153283
+495 495  1
+513 495 -0.2498359477364026
+496 496  1
+521 496 -0.5822657287297308
+497 497  1
+522 497 -0.3998740109359429
+498 498  1
+523 498 -0.08112378897000688
+499 499  1
+500 499 -1
+819 499 -0.07041320998812001
+914 499 -0.1324665116304762
+499 500 -0.4508108223233004
+500 500  1
+501 501  1
+502 501 -0.2747150549780659
+505 501 -0.0635409252790694
+507 501 -0.1966114184340081
+521 501 -0.0627933662930675
+531 501 -0.208452852038472
+536 501 -0.06131576799783524
+501 502 -0.1456089431347203
+502 502  1
+607 502 -0.3226529423038578
+609 502 -0.5428428081954032
+613 502 -0.4454544728292564
+937 502 -0.07988497981065862
+503 503  1
+507 503 -0.1451380721103738
+513 503 -0.2998031372836831
+553 503 -0.3388953833156444
+121 504 -0.1873760874483269
+213 504 -0.2605300072529472
+504 504  1
+505 504 -0.2441787015025683
+494 505 -1
+501 505 -0.08152969501633238
+504 505 -0.3644716458548493
+505 505  1
+521 505 -0.07972932154571288
+526 505 -0.2453931027561656
+578 505 -0.4666797005313729
+48 506 -0.4954576443169026
+443 506 -0.09538536502534155
+506 506  1
+501 507 -0.2550172885925116
+503 507 -0.353193296328452
+507 507  1
+508 507 -0.0454025610058394
+509 507 -0.05815667212600774
+515 507 -0.02977840794404767
+521 507 -0.07505769234464378
+522 507 -0.1541682341722879
+547 507 -0.3353174792622302
+507 508 -0.04069453020853694
+508 508  1
+619 508 -0.9722897428820663
+639 508 -0.5639797596028586
+781 508 -0.5734986656347684
+507 509 -0.1794014226205734
+509 509  1
+511 509 -0.4059238789079981
+515 509 -0.1200980639341044
+520 509 -0.08759754206863871
+534 509 -0.7103549994776759
+433 510 -0.3020093404231428
+435 510 -1
+510 510  1
+520 510 -0.8759754206863871
+509 511 -0.01325578438014554
+511 511  1
+914 511 -0.03744986273220905
+926 511 -0.05148352503430811
+512 512  1
+520 512 -0.03642670612621217
+495 513 -1
+503 513 -0.4571890828742012
+513 513  1
+514 513 -0.3311924195687763
+540 513 -0.7753743459087041
+513 514 -0.05062341479103843
+514 514  1
+790 514 -0.1660027056546063
+795 514 -0.05988144097700143
+507 515 -0.06929746188636214
+509 515 -0.09059944222635875
+515 515  1
+516 515 -0.4824366631147753
+517 515 -0.3756343960723746
+518 515 -1
+536 515 -0.1154615517875781
+547 515 -0.6646825207377697
+423 516 -0.1328671418537833
+424 516 -0.274853814323998
+443 516 -0.1757815643468599
+515 516 -0.6054944401809566
+516 516  1
+515 517 -0.01502777984049534
+517 517  1
+993 517 -0.4405442145405706
+995 517 -0.149188085081202
+515 518 -0.02642157609996412
+518 518  1
+426 519 -0.01516226256748818
+519 519  1
+957 519 -0.1071748112080725
+967 519 -0.7594728487360221
+509 520 -0.2610126310296668
+510 520 -0.8996766615541634
+512 520 -1
+520 520  0.9999999999999999
+496 521 -1
+501 521 -0.1433430694521997
+505 521 -0.1418465311435778
+507 521 -0.1320983078566071
+521 521  0.9999999999999999
+522 521 -0.3998740109359429
+497 522 -1
+507 522 -0.1358119062128222
+521 522 -0.2001539103016142
+522 522  1
+523 522 -0.04849897122292566
+498 523 -1
+522 523 -0.04608389750739734
+523 523  0.9999999999999999
+582 523 -0.9364088751349131
+770 523 -0.0003674442805047742
+779 523 -0.2204038881448743
+802 523 -0.0004067669457320515
+524 524  1
+533 524 -1
+985 524 -0.5971998104086521
+1001 524 -0.2766233478085622
+525 525  1
+568 525 -0.4799622565372934
+505 526 -0.06018487941918286
+526 526  1
+527 526 -0.1433119470755233
+531 526 -0.1600863532646021
+526 527 -0.2991058560504902
+527 527  1
+921 527 -0.03688742360236372
+933 527 -0.0299909671787933
+1068 527 -0.3660187912510047
+1073 527 -0.1576227343708651
+1074 527 -0.08188246918212248
+1124 527 -0.7101449040705746
+528 528  1
+529 528 -1
+809 528 -0.2776735825024428
+819 528 -0.2467421630512848
+528 529 -0.5830603065551999
+529 529  1
+530 530  1
+578 530 -0.2111385610004441
+583 530 -0.3088121221459691
+780 530 -0.1000075133138704
+797 530 -0.04471075373065521
+803 530 -0.8013450274060447
+501 531 -0.1866509607965867
+526 531 -0.4555010411933442
+531 531  1
+532 531 -0.1017617299002866
+534 531 -0.1014792837115616
+531 532 -0.2631417833604881
+532 532  0.9999999999999999
+554 532 -0.1022922985661368
+647 532 -0.8580645063858513
+652 532 -0.02231579502961252
+663 532 -0.01357466031211483
+668 532 -0.2850678741183841
+672 532 -0.02092114750264132
+673 532 -0.5746163645773398
+677 532 -0.02987668404501208
+908 532 -0.06846725232738664
+912 532 -0.09550556340629787
+918 532 -0.0276024558479934
+1047 532 -0.1074318830518355
+524 533 -0.719724171828545
+533 533  1
+436 534 -0.9999999999999999
+509 534 -0.5769753935000953
+531 534 -0.3683189623500158
+534 534  0.9999999999999999
+535 534 -0.5724771087641494
+536 534 -0.08088832701704754
+542 534 -0.155661526463627
+534 535 -0.02896297492007903
+535 535  1
+885 535 -0.2389382708060702
+501 536 -0.1878499991446719
+515 536 -0.1171924610426119
+534 536 -0.07625280158678528
+536 536  1
+537 536 -0.460674165999242
+538 536 -0.1996180737509501
+539 536 -1
+542 536 -0.2432211350994171
+447 537 -0.38971453178003
+536 537 -0.5965514081714174
+537 537  1
+536 538 -0.03026899216704202
+538 538  1
+809 538 -0.1669793513444584
+914 538 -0.03868245568487514
+948 538 -0.1007759433217734
+953 538 -0.318198132925804
+964 538 -0.4999999999999999
+536 539 -0.0260313346953794
+539 539  1
+513 540 -0.3997374624136837
+540 540  1
+800 540 -0.4229102194028947
+541 541  1
+543 541 -0.001389147502258042
+534 542 -0.05398697888056379
+536 542 -0.08948270406709628
+542 542  1
+543 542 -0.004259854492759377
+545 542 -0.2968403078784328
+546 542 -0.4739336767143581
+469 543 -1
+541 543 -1
+542 543 -0.1670188249267029
+543 543  1
+544 543 -0.001814882543535852
+468 544 -1
+543 544 -0.001804629234449286
+544 544  1
+418 545 -0.7608230117236505
+464 545 -0.2429339577229863
+542 545 -0.08350941246335145
+545 545  1
+125 546 -0.2752654453505877
+542 546 -0.35058906213152
+546 546  1
+507 547 -0.1009467792192157
+515 547 -0.08598737268087776
+547 547  1
+548 548  1
+551 548 -0.1210293052360318
+549 549  1
+563 549 -0.2679749952654444
+550 550  1
+566 550 -0.2940161986573785
+548 551 -1
+551 551  1
+552 551 -0.4989148902469458
+555 551 -0.2635769128672333
+566 551 -0.2054483669876222
+551 552 -0.266857878092583
+552 552  1
+558 552 -0.02915535043456461
+560 552 -0.7271280724192077
+34 553 -0.23395766532261
+503 553 -0.1896175385033185
+553 553  1
+561 553 -0.1332681894485847
+710 553 -0.0005026541030934422
+532 554 -0.06814369435690237
+554 554  1
+574 554 -1
+909 554 -0.236042788519142
+551 555 -0.235876050136544
+555 555  1
+556 555 -0.1079488330221924
+557 555 -0.341628902596535
+564 555 -0.09655219225636062
+566 555 -0.06195576238862799
+555 556 -0.2768627906370897
+556 556  1
+559 556 -0.8785489123309654
+579 556 -0.09337267097732671
+621 556 -0.328171501383863
+903 556 -0.5968064283730297
+555 557 -0.1462806165772434
+557 557  1
+558 557 -0.1602578832894687
+552 558 -0.08580736837667124
+557 558 -0.658370898575437
+558 558  1
+979 558 -0.02160215504205074
+980 558 -0.6940015360166825
+986 558 -0.6110065977333146
+989 558 -0.2950958967151079
+991 558 -0.1403406325935518
+998 558 -0.2584449826266326
+1000 558 -1
+556 559 -0.4310944682410488
+559 559  1
+579 559 -0.1062805574061592
+552 560 -0.4152776844002867
+560 560  1
+980 560 -0.3059984080468032
+553 561 -0.3932637367421524
+561 561  0.9999999999999999
+562 561 -0.9410823745050327
+566 561 -0.3225061323250914
+561 562 -0.6735444055009948
+562 562  1
+563 562 -0.2657079047762322
+1 563 -0.003885774749979488
+2 563 -0.6272163748348137
+549 563 -1
+562 563 -0.05891719372626142
+563 563  0.9999999999999999
+567 563 -0.2679981417964429
+433 564 -0.03519176171755352
+456 564 -0.7700932153861747
+555 564 -0.1864957331028096
+564 564  1
+565 565  1
+567 565 -0.3970889990867262
+11 566 -0.3574718677076007
+550 566 -1
+551 566 -0.3762367970100193
+555 566 -0.1267838105991092
+561 566 -0.1931872056812246
+566 566  0.9999999999999999
+567 566 -0.3349125425574516
+563 567 -0.1063551397172834
+565 567 -1
+566 567 -0.03521225999061429
+567 567  1
+525 568 -1
+568 568  1
+773 568 -0.1566391664744798
+795 568 -0.6157886834004855
+569 569  1
+785 569 -0.2361169169313239
+570 570  1
+579 570 -0.1741125261996736
+571 571  0.9999999999999999
+776 571 -0.07859332663980086
+572 572  1
+783 572 -0.4191118647815879
+573 573  1
+796 573 -0.230679743715748
+554 574 -0.2657687129924692
+574 574  1
+575 575  1
+581 575 -0.9353465108067598
+576 576  1
+582 576 -0.06359102935136633
+784 576 -0.4170212987191654
+577 577  1
+786 577 -0.5935894596561302
+802 577 -0.0008895072277967837
+1090 577 -0.9619564997814242
+505 578 -0.07335841830019779
+530 578 -0.0370962223800683
+578 578  1
+780 578 -0.2748353517557316
+556 579 -0.05235689949571262
+559 579 -0.1214511011986866
+570 579 -1
+579 579  0.9999999999999999
+580 579 -0.1740910304173178
+782 579 -0.06588825253594437
+783 579 -0.2215729147980039
+895 579 -0.03747752678371188
+927 579 -0.02471142985934896
+579 580 -0.0345179419887753
+580 580  1
+587 580 -0.2069733953428794
+575 581 -1
+581 581  1
+776 581 -0.066411555971227
+796 581 -0.05842732851632546
+797 581 -0.06919204760188463
+799 581 -0.1549773221234991
+523 582 -0.7921498633077858
+576 582 -0.5608186964555192
+582 582  1
+530 583 -0.03537880319137759
+583 583  1
+780 583 -0.2418695276871182
+803 583 -0.02648565847385529
+584 584  1
+585 584 -0.374641251962961
+1068 584 -0.0684023244486532
+584 585 -0.8738478885659794
+585 585  1
+607 585 -0.3766376408137823
+613 585 -0.5545453641343804
+659 585 -0.05009393432454906
+586 586  1
+597 586 -0.2061081168433045
+580 587 -0.8259090890091247
+587 587  1
+598 587 -1
+614 587 -1
+588 588  1
+595 588 -0.5348727181080468
+597 588 -0.4010202797242549
+589 589  0.9999999999999999
+591 589 -0.3830584606499214
+590 590  0.9999999999999999
+595 590 -0.465127176308076
+616 590 -1
+589 591 -0.9999999999999999
+591 591  1
+601 591 -0.4090902857563337
+592 592  1
+611 592 -0.3907833701421984
+593 593  1
+604 593 -0.3421239234570931
+594 594  1
+600 594 -0.1839917141395476
+588 595 -0.5063764665193276
+590 595 -0.3739656975599155
+595 595  1
+596 596  1
+597 596 -0.3928717658456408
+603 596 -0.1320406839878019
+607 596 -0.1039697919469714
+586 597 -1
+588 597 -0.4936233335632385
+596 597 -0.2191827597861238
+597 597  0.9999999999999999
+587 598 -0.2107630726658843
+598 598  1
+599 599  1
+1105 599 -0.2711267887040513
+594 600 -1
+600 600  1
+601 600 -0.1968409935831506
+782 600 -0.8931520820659508
+591 601 -0.6169415393500786
+600 601 -0.01360014050489904
+601 601  1
+602 601 -0.5145183576883209
+633 601 -1
+601 602 -0.2193548198281817
+602 602  1
+640 602 -1
+596 603 -0.2941888080142874
+603 603  0.9999999999999999
+604 603 -0.1570757188803369
+605 603 -0.3504272275082358
+1092 603 -0.737473491883647
+593 604 -1
+603 604 -0.09837573943019724
+604 604  1
+612 604 -1
+615 604 -0.3913244846817505
+603 605 -0.2229634062499809
+605 605  1
+607 605 -0.1967395281395254
+606 606  1
+610 606 -0.08339052453905878
+502 607 -0.2758489594841405
+585 607 -0.3192468629545578
+596 607 -0.4866282056741895
+605 607 -0.6495724529020662
+607 607  1
+608 608  1
+610 608 -0.9166093863665015
+1101 608 -0.2267653744566368
+502 609 -0.09124000427014103
+609 609  1
+1068 609 -0.09634756031884485
+606 610 -1
+608 610 -0.9329655179918697
+610 610  1
+592 611 -1
+611 611  1
+615 611 -0.60867556693395
+604 612 -0.2546474705503056
+612 612  1
+502 613 -0.2261056230971598
+585 613 -0.2790694621528
+613 613  1
+587 614 -0.5822636816329866
+614 614  1
+604 615 -0.2461527897096037
+611 615 -0.6092164232115411
+615 615  1
+590 616 -0.6260340477694245
+616 616  1
+617 617  1
+1123 617 -0.3509934645019262
+618 618  1
+629 618 -0.001968927809830688
+639 618 -0.1745182224467162
+508 619 -0.6619217994878592
+619 619  1
+620 619 -0.705232145496726
+619 620 -0.02771025711793374
+620 620  1
+632 620 -0.195558932571758
+556 621 -0.2990475235745716
+621 621  1
+624 621 -0.7919706249864572
+1061 621 -0.07947020956091519
+622 622  1
+625 622 -0.6230892554591451
+641 622 -0.5039609316151602
+623 623  1
+639 623 -0.1035522515894542
+642 623 -0.4487804075312052
+621 624 -0.6390707609679974
+624 624  0.9999999999999999
+638 624 -1
+622 625 -0.8560679954310355
+625 625  1
+635 625 -0.3454506343378822
+626 626  1
+634 626 -0.4517125459341182
+635 626 -0.1539421842584462
+627 627  0.9999999999999999
+639 627 -0.1579500658341977
+628 628  1
+631 628 -0.8485153033716532
+642 628 -0.5512193693351958
+618 629 -0.3659629192546133
+629 629  1
+637 629 -1
+643 629 -1
+630 630  1
+631 630 -0.1514846455477235
+635 630 -0.2326534849387227
+796 630 -0.2191153123425395
+628 631 -0.7707538647207384
+630 631 -0.2128229150706677
+631 631  1
+620 632 -0.294767854503274
+632 632  1
+634 632 -0.5482876948738408
+641 632 -0.4960390683848397
+601 633 -0.1747138590233077
+633 633  1
+626 634 -0.6617171107619035
+632 634 -0.6391731904007066
+634 634  1
+625 635 -0.376910627773944
+626 635 -0.3382828892380965
+630 635 -0.1560084494801962
+635 635  1
+636 635 -0.622851710496922
+635 636 -0.2679534289134358
+636 636  1
+1094 636 -0.4724886990502823
+629 637 -0.9756034741629955
+637 637  1
+624 638 -0.2080291041595944
+638 638  1
+508 639 -0.2131612715308346
+618 639 -0.6340370807453867
+623 639 -0.563207644487879
+627 639 -0.9999999999999999
+639 639  1
+602 640 -0.4854811519757107
+640 640  1
+622 641 -0.1439324858503279
+632 641 -0.1652683449023171
+641 641  0.9999999999999999
+623 642 -0.4367919754594565
+628 642 -0.2292459728814157
+642 642  1
+629 643 -0.02242766046579602
+643 643  1
+644 644  1
+652 644 -0.01357544319813741
+662 644 -0.3568464207014346
+645 645  1
+650 645 -0.6126687660539137
+1060 645 -1
+646 646  1
+664 646 -0.5075074575187769
+1051 646 -0.2558586820875031
+532 647 -0.2152721524790423
+647 647  1
+1041 647 -0.2199412920133198
+648 648  1
+678 648 -0.8855993933715476
+1042 648 -1
+649 649  1
+672 649 -0.06452081199441895
+1055 649 -0.1850421293191803
+645 650 -0.8317547057889145
+650 650  1
+653 650 -0.5297586117440403
+651 651  1
+670 651 -0.2186760697949558
+1045 651 -0.2989839507864746
+532 652 -0.1730040471840586
+644 652 -0.6326531123531862
+652 652  1
+653 652 -0.1387911208629091
+663 652 -0.9864252094797445
+666 652 -0.1557376991327603
+668 652 -0.7149323060445101
+676 652 -0.107615937447433
+1054 652 -0.9271842283721742
+650 653 -0.3873312339460863
+652 653 -0.00108507077950433
+653 653  1
+1048 653 -1
+654 654  1
+677 654 -0.04213683633169196
+1057 654 -0.716049205825331
+655 655  1
+657 655 -0.4285715196734902
+665 655 -0.7350993163282319
+656 656  1
+678 656 -0.05510395740508132
+655 657 -0.2439024803331427
+657 657  0.9999999999999999
+1055 657 -0.4019195360280249
+658 658  1
+671 658 -0.7121210973186182
+672 658 -0.2688367704107556
+585 659 -0.02704233267860278
+659 659  1
+677 659 -0.06610215776480366
+660 660  0.9999999999999999
+1050 660 -0.003196593808555408
+1058 660 -0.03676270879182294
+661 661  1
+677 661 -0.5288172621184293
+644 662 -0.3673468876468138
+662 662  1
+672 662 -0.093780242114347
+532 663 -0.08689882342956604
+652 663 -0.8145264452734696
+663 663  1
+646 664 -0.8652423335813741
+664 664  0.9999999999999999
+1056 664 -0.7929315886183289
+655 665 -0.7560977011303056
+665 665  1
+682 665 -0.4336734104435798
+652 666 -0.02372407392610176
+666 666  1
+681 666 -0.3214285583418375
+667 667  1
+1051 667 -0.2960650248620035
+532 668 -0.1198983771102606
+652 668 -0.03878698754776282
+668 668  1
+669 669  1
+679 669 -0.556212841507715
+1058 669 -0.6261914624421282
+651 670 -0.4222026744971679
+670 670  1
+1121 670 -0.3392857142857142
+658 671 -0.441176332525945
+671 671  1
+679 671 -0.4437868247645969
+532 672 -0.02457061657989077
+649 672 -0.4469026573390634
+658 672 -0.5588236674740549
+662 672 -0.6431534686761595
+672 672  1
+673 672 -0.4253834601646409
+682 672 -0.5663261081789878
+1049 672 -0.6044303840505328
+1050 672 -0.003153612989491784
+1052 672 -0.08403365077678537
+1058 672 -0.03727330536135791
+1121 672 -0.6607145995535949
+532 673 -0.06211127975430462
+672 673 -0.03915097063496544
+673 673  1
+674 674  0.9999999999999999
+675 674 -0.9126760388043622
+1046 674 -0.2530282823179095
+674 675 -0.9470990191973778
+675 675  1
+1050 675 -0.005791358577418576
+652 676 -0.004533543872149961
+676 676  1
+1043 676 -0.8552336511981358
+532 677 -0.05351396888495191
+654 677 -0.2156249830869165
+659 677 -0.9499062024619441
+661 677 -1
+677 677  1
+678 677 -0.05929664922337102
+1041 677 -0.7800585909778973
+1059 677 -0.8240341259614482
+648 678 -0.7416107903768294
+656 678 -1
+677 678 -0.004598410882974645
+678 678  1
+669 679 -0.166666675
+671 679 -0.287878848560159
+679 679  1
+680 680  1
+1047 680 -0.434678379342025
+1057 680 -0.2839505965450388
+666 681 -0.8442622687852741
+681 681  0.9999999999999999
+1053 681 -0.9842378233775207
+665 682 -0.2649006836717682
+672 682 -0.1897669992176851
+682 682  1
+683 683  0.9999999999999999
+689 683 -0.1603582326493311
+684 684  1
+689 684 -0.1952977674083521
+685 685  0.9999999999999999
+691 685 -0.0001757727410346504
+112 686 -0.9468287959406042
+686 686  1
+687 686 -1
+688 686 -0.3433610223642173
+686 687 -0.05585362275077257
+687 687  1
+686 688 -0.07442595844058435
+688 688  1
+689 688 -0.4477990376054506
+694 688 -0.0442271064609753
+699 688 -0.1411416778758453
+700 688 -0.4806586950457414
+702 688 -0.4494028708495627
+683 689 -0.9999999999999999
+684 689 -1
+688 689 -0.06192368781378366
+689 689  1
+693 689 -0.05366300615138479
+113 690 -0.9959673282877629
+690 690  1
+691 690 -0.0004969703850658508
+694 690 -0.8581485117171185
+697 690 -0.5390444053308227
+685 691 -0.9999999999999999
+690 691 -0.0004902307906929223
+691 691  1
+692 691 -1
+695 691 -0.3940708255506621
+691 692 -0.9959286437045359
+692 692  1
+689 693 -0.1965450638917011
+693 693  1
+695 693 -0.3940708255506621
+688 694 -0.1167370150616157
+690 694 -0.01584552192177559
+694 694  1
+695 694 -0.2118583488986758
+691 695 -0.003399073587175626
+693 695 -0.9463369661209398
+694 695 -0.09762428073199735
+695 695  1
+300 696 -0.5794516115854407
+696 696  1
+690 697 -0.001242001116031956
+697 697  0.9999999999999999
+698 697 -1
+697 698 -0.4609555946691771
+698 698  1
+366 699 -0.3663246583698724
+688 699 -0.05378576694870625
+699 699  1
+700 699 -0.5193412397769371
+688 700 -0.103565368816487
+699 700 -0.2936422030181865
+700 700  0.9999999999999999
+111 701 -0.994337249364867
+701 701  1
+237 702 -0.2265728743020855
+688 702 -0.3206270406909385
+702 702  1
+145 703 -0.99780383376189
+703 703  1
+96 704 -0.001083295253868641
+318 704 -0.0590021492713087
+704 704  1
+96 705 -0.9977152321184488
+705 705  1
+706 706  1
+707 706 -0.001097550364435686
+471 707 -0.07133650891245298
+492 707 -0.996349375886751
+493 707 -1
+706 707 -1
+707 707  1
+726 707 -0.8887397157593007
+471 708 -0.3614628831984525
+478 708 -0.002027765000348178
+708 708  1
+709 708 -0.7308255150688829
+412 709 -0.00578783131494704
+478 709 -5.912608266239568e-05
+484 709 -0.479753514815638
+708 709 -0.4113744001354463
+709 709  1
+35 710 -0.9981733427827078
+475 710 -0.3845400923020677
+553 710 -0.07405872811734446
+710 710  1
+36 711 -1
+711 711  1
+724 711 -0.07467328524575109
+734 711 -0.2790363921690038
+712 712  1
+714 712 -0.1055072126411206
+713 713  1
+714 713 -0.1068908621622177
+362 714 -0.5087028901322437
+401 714 -0.4003811088984153
+479 714 -0.07069197270975155
+712 714 -1
+713 714 -1
+714 714  0.9999999999999999
+715 714 -0.7569936966261785
+734 714 -0.5373540493761518
+360 715 -1
+714 715 -0.2453100961326864
+715 715  1
+396 716 -0.05499465273268188
+397 716 -0.5172413593341292
+406 716 -0.0003323413444016625
+716 716  1
+717 716 -0.01310206761808757
+396 717 -0.05425351746305068
+397 717 -0.4827584958383075
+406 717 -0.0003278636563631247
+716 717 -0.01225595704434848
+717 717  1
+718 718  1
+724 718 -0.09255588411789725
+719 719  1
+724 719 -0.09255588411789725
+720 720  1
+724 720 -0.09255588411789725
+721 721  1
+724 721 -0.09255588411789725
+722 722  1
+724 722 -0.09255588411789725
+723 723  1
+724 723 -0.09255588411789725
+8 724 -0.859538077353734
+412 724 -0.1126751018741988
+475 724 -0.3426554623742616
+711 724 -0.001168138944449155
+718 724 -1
+719 724 -1
+720 724 -1
+721 724 -1
+722 724 -1
+723 724 -1
+724 724  0.9999999999999999
+725 724 -0.05977453412358324
+734 724 -0.1394315090697187
+100 725 -0.8735200655474057
+724 725 -0.01442783068290446
+725 725  0.9999999999999999
+479 726 -0.042343953959263
+707 726 -0.003997116498238908
+726 726  1
+727 727  1
+728 727 -0.06309665658939982
+731 727 -0.09986143978152316
+727 728 -0.06309665658939982
+728 728  1
+731 728 -0.09986143978152316
+729 729  1
+730 729 -0.02220540286408267
+731 729 -0.1031410258510035
+729 730 -0.02229187607625067
+730 730  0.9999999999999999
+731 730 -0.1035518398185848
+410 731 -0.007414827841886507
+486 731 -0.005102946155445339
+727 731 -0.9369030799909034
+728 731 -0.9369030799909034
+729 731 -0.9777080263348169
+730 731 -0.9777944999255457
+731 731  1
+100 732 -0.05815189403627591
+732 732  1
+481 733 -0.335306257606751
+484 733 -0.3142284164151389
+733 733  1
+491 734 -0.04737235494396129
+711 734 -0.001240784505419412
+714 734 -0.2342638841752238
+724 734 -0.03963393535279071
+734 734  1
+735 735  1
+743 735 -0.05755251786922217
+736 736  1
+742 736 -0.05209091210399883
+737 737  0.9999999999999999
+743 737 -0.1598881530284162
+738 738  1
+742 738 -0.2599284254568054
+739 739  1
+744 739 -0.05525845205611164
+135 740 -0.9969900869275656
+740 740  0.9999999999999999
+741 740 -0.4378658607581571
+742 740 -0.09692244698145769
+744 740 -0.6684489726901692
+740 741 -0.002872753882991917
+741 741  1
+749 741 -0.2751090554374274
+759 741 -0.2107954651925419
+132 742 -0.2119908445071003
+736 742 -1
+738 742 -1
+740 742 -0.0009305341467102409
+742 742  1
+743 742 -0.552713265273701
+143 743 -0.0001044606959223247
+161 743 -0.5661488680856371
+222 743 -0.02926873510750779
+735 743 -1
+737 743 -0.9999999999999999
+742 743 -0.3706477673201522
+743 743  1
+746 743 -0.4714804198026962
+739 744 -1
+740 744 -0.007969574459464401
+744 744  1
+756 744 -0.5451099963613916
+143 745 -0.4996355158911574
+745 745  1
+746 745 -0.5285195801973038
+743 746 -0.06204226786511624
+745 746 -0.0004528986369174763
+746 746  1
+747 747  1
+748 747 -0.4349736477921984
+758 747 -1
+747 748 -0.1871049295551823
+748 748  1
+757 748 -0.2004651915094611
+741 749 -0.453692268691801
+749 749  1
+753 749 -1
+750 750  1
+751 750 -1
+759 750 -0.7892046519254193
+750 751 -0.2544581608411395
+751 751  1
+752 752  1
+757 752 -0.1012553621557578
+762 752 -0.4736618169146121
+749 753 -0.7248905792177237
+753 753  1
+754 754  1
+757 754 -0.4222564566656446
+765 754 -1
+172 755 -0.1408499194273259
+755 755  1
+764 755 -1
+744 756 -0.2762923017243952
+756 756  1
+757 756 -0.2760229896691365
+748 757 -0.5650263522078015
+752 757 -0.7165992841086014
+754 757 -0.4183167723111768
+756 757 -0.4548900036386085
+757 757  0.9999999999999999
+747 758 -0.8128950704448178
+758 758  1
+741 759 -0.108441930800379
+750 759 -0.7455418944780647
+759 759  1
+760 760  1
+761 760 -0.2298681841111243
+137 761 -0.07562582962297858
+760 761 -1
+761 761  0.9999999999999999
+766 761 -0.6704084254553354
+767 761 -1
+752 762 -0.2834008563448807
+762 762  1
+763 762 -1
+762 763 -0.5263381830853878
+763 763  1
+755 764 -0.7264023124000653
+764 764  1
+754 765 -0.5816831293843782
+765 765  1
+761 766 -0.1743040266247976
+766 766  0.9999999999999999
+768 766 -0.2619779337887494
+761 767 -0.2947829678822902
+767 767  1
+766 768 -0.3295914328203243
+768 768  1
+769 768 -0.9999999999999999
+772 768 -0.4285714807346967
+768 769 -0.3413652410704459
+769 769  0.9999999999999999
+523 770 -0.01485008296068023
+770 770  0.9999999999999999
+823 770 -1
+771 771  1
+772 771 -0.5714286409795956
+768 772 -0.3966568251408047
+771 772 -1
+772 772  1
+568 773 -0.04007574293727283
+773 773  1
+774 773 -1
+795 773 -0.04745458626342001
+801 773 -0.9999999999999999
+802 773 -0.001442548396825602
+773 774 -0.2189780868862489
+774 774  1
+775 775  1
+776 775 -0.5457870631438515
+781 775 -0.2341867991589159
+571 776 -0.9999999999999999
+581 776 -0.02258200065941929
+775 776 -0.9021739340484409
+776 776  1
+781 776 -0.1923145352063157
+796 776 -0.03129039631630315
+797 776 -0.03199330563572712
+1127 776 -1
+777 777  1
+793 777 -0.0003904723506218138
+795 777 -0.04533632129182412
+800 777 -0.2885448902985526
+778 778  1
+779 778 -0.5226632078696118
+523 779 -0.04686735057886637
+778 779 -1
+779 779  0.9999999999999999
+780 779 -0.06573845393809882
+787 779 -0.5308989923599799
+530 780 -0.02059790377188857
+578 780 -0.3221816949736401
+583 780 -0.4348332597928126
+779 780 -0.06732392423501758
+780 780  1
+797 780 -0.0236557021771585
+949 780 -0.1427242945176912
+508 781 -0.07951433024592491
+775 781 -0.0978260886863422
+776 781 -0.04860012103373816
+781 781  1
+579 782 -0.2426158151962665
+600 782 -0.8024083417850998
+782 782  1
+783 782 -0.3593149383581041
+572 783 -1
+579 783 -0.09300592586506133
+782 783 -0.04095982616546692
+783 783  1
+576 784 -0.4391813035444809
+784 784  1
+802 784 -0.001450960172714632
+569 785 -1
+785 785  1
+786 785 -0.02057776968385605
+788 785 -0.08347258078454704
+796 785 -0.07401488949968267
+797 785 -0.1025123423848812
+802 785 -0.0004520248083188094
+577 786 -0.8573540185300089
+785 786 -0.3135633249482591
+786 786  1
+1128 786 -1
+779 787 -0.1896089227540602
+787 787  1
+949 787 -0.08633938344831096
+785 788 -0.1976238879723042
+788 788  1
+789 788 -0.4641734010869621
+796 788 -0.1144917695434805
+1132 788 -1
+788 789 -0.01456486454539043
+789 789  1
+1094 789 -0.5275111451229092
+514 790 -0.3984775023469074
+790 790  1
+791 790 -1
+795 790 -0.1862027328011212
+790 791 -0.4838106371231226
+791 791  1
+792 792  1
+799 792 -0.2687641777966632
+1110 792 -0.7539132836750267
+777 793 -0.160508901400097
+793 793  1
+794 793 -0.160508901400097
+825 793 -0.5
+793 794 -0.0003904723506218138
+794 794  1
+795 794 -0.04533632129182412
+800 794 -0.2885448902985526
+514 795 -0.2703301133894285
+568 795 -0.4799622565372934
+773 795 -0.144568060831429
+777 795 -0.2381114154710987
+790 795 -0.3501866572222711
+794 795 -0.2381114154710987
+795 795  1
+573 796 -1
+581 796 -0.01096794801344093
+630 796 -0.6311687789769125
+776 796 -0.01727434772442819
+785 796 -0.06986704642316205
+788 796 -0.04564907258316853
+796 796  1
+799 796 -0.5762585000798377
+530 797 -0.01834330654747463
+581 797 -0.0173727051857949
+776 797 -0.02362390141251874
+780 797 -0.04712062912965587
+785 797 -0.1294289295290424
+797 797  1
+798 797 -0.4452878035256336
+803 797 -0.01554255422889035
+805 797 -0.1379870515257248
+1119 797 -0.6217239036054392
+797 798 -0.04487448066988346
+798 798  1
+1114 798 -0.3934527465453918
+581 799 -0.01373086339498046
+792 799 -0.558453245793901
+796 799 -0.2719804479555745
+799 799  1
+540 800 -0.2246256540912959
+777 800 -0.6013796009803595
+794 800 -0.6013796009803595
+800 800  1
+773 801 -0.1345274728730633
+801 801  0.9999999999999999
+523 802 -0.01650987024036579
+577 802 -0.009960322772169745
+773 802 -0.3452871176355108
+784 802 -0.5829787012808346
+785 802 -0.0533997413338065
+802 802  1
+1137 802 -1
+530 803 -0.8885837679301019
+583 803 -0.2563544179509576
+797 803 -0.04200834653486555
+803 803  1
+804 803 -1
+803 804 -0.1566265359152365
+804 804  1
+797 805 -0.1753907606969664
+805 805  1
+1138 805 -1
+806 806  1
+821 806 -0.03344117591703837
+807 807  1
+887 807 -0.1321743617489629
+808 808  1
+897 808 -0.05494562236441237
+528 809 -0.1670566454489821
+538 809 -0.3189842366904131
+809 809  0.9999999999999999
+920 809 -1
+75 810 -0.006648777579010137
+810 810  1
+811 810 -0.9999999999999999
+918 810 -0.0373798275151182
+810 811 -0.6640821695946558
+811 811  0.9999999999999999
+812 812  1
+909 812 -0.5310963272777022
+813 813  1
+885 813 -0.1660328780840257
+940 813 -0.178088831887964
+1122 813 -0.9724576188156695
+814 814  1
+815 814 -1
+909 814 -0.02458779489654669
+918 814 -0.05811829989004948
+814 815 -0.7538914180468935
+815 815  1
+816 816  1
+817 816 -0.3406804300872179
+927 816 -0.1473626935769674
+816 817 -0.700427957958775
+817 817  1
+818 817 -0.9999999999999999
+907 817 -0.6772958127999048
+817 818 -0.3765415166948903
+818 818  0.9999999999999999
+499 819 -0.2745945315853772
+528 819 -0.2498830479958179
+819 819  1
+820 819 -1
+819 820 -0.6828444214243894
+820 820  1
+806 821 -1
+821 821  1
+888 821 -0.6583849294302128
+897 821 -0.4736691583138997
+921 821 -0.04963528406352395
+822 822  1
+880 822 -1
+885 822 -0.3823013227482067
+935 822 -0.3217162155339475
+770 823 -0.9996321353741822
+823 823  1
+824 824  1
+825 824 -0.5
+793 825 -0.9992186110461619
+824 825 -1
+825 825  1
+143 826 -0.4996355158911574
+826 826  1
+144 827 -0.9983118546537804
+827 827  1
+828 828  1
+837 828 -1
+838 828 -0.385702574026371
+874 828 -0.07033360555409447
+140 829 -0.4091571791409437
+829 829  1
+833 829 -0.3904693520223962
+839 829 -1
+140 830 -0.3085346440014443
+830 830  1
+833 830 -0.4848368165175393
+840 830 -1
+831 831  1
+841 831 -1
+869 831 -0.04597108605825639
+832 832  1
+842 832 -1
+869 832 -0.07456617864612559
+829 833 -0.256377491938501
+830 833 -0.3398024612087239
+833 833  1
+843 833 -1
+834 834  1
+844 834 -1
+874 834 -0.8521398635691807
+835 835  0.9999999999999999
+845 835 -1
+862 835 -0.9471698688264778
+836 836  1
+846 836 -1
+869 836 -0.02545062694485538
+828 837 -0.270004471018193
+837 837  1
+828 838 -0.3627874693957614
+838 838  1
+859 838 -1
+860 838 -0.4283176749291381
+829 839 -0.2555594737939714
+839 839  1
+830 840 -0.267346840039331
+840 840  1
+831 841 -0.477681250005796
+841 841  1
+832 842 -0.2641549858743126
+842 842  1
+833 843 -0.1246939511662575
+843 843  1
+834 844 -0.1490953414141997
+844 844  1
+835 845 -0.01659790580587195
+845 845  1
+836 846 -0.5218397520090229
+846 846  1
+847 847  1
+848 847 -0.455984542371944
+847 848 -1
+848 848  1
+873 848 -0.410560708120102
+849 849  0.9999999999999999
+854 849 -1
+863 849 -0.2225319250998413
+864 849 -0.0898036324146653
+850 850  1
+855 850 -1
+873 850 -0.1311546132828865
+851 851  0.9999999999999999
+856 851 -1
+858 851 -0.3568092151689177
+852 852  0.9999999999999999
+857 852 -1
+869 852 -0.005099027699841861
+853 853  1
+876 853 -0.02202335032195767
+849 854 -0.4010628451505971
+854 854  1
+850 855 -0.7212950926812342
+855 855  1
+851 856 -0.4096284587312885
+856 856  1
+852 857 -0.575232508844974
+857 857  1
+851 858 -0.5903715412687115
+858 858  1
+873 858 -0.1091327605195705
+838 859 -0.5628686543246464
+859 859  1
+838 860 -0.05142857745930671
+860 860  1
+861 860 -1
+860 861 -0.5716821844370392
+861 861  1
+835 862 -0.9834020533632732
+862 862  0.9999999999999999
+863 862 -0.5084036078177585
+849 863 -0.2994790821118928
+862 863 -0.05282978706669576
+863 863  1
+869 863 -0.01160564403407642
+849 864 -0.29945807273751
+864 864  1
+865 864 -0.9999999999999999
+869 864 -0.0119014518185992
+864 865 -0.7988387331300901
+865 865  0.9999999999999999
+866 866  1
+869 866 -0.273893179615365
+877 866 -0.03287389424901925
+867 867  1
+878 867 -0.07939993664601402
+868 868  1
+869 868 -0.2776195888384431
+871 868 -0.4992913970779654
+877 868 -0.03325193322086428
+831 869 -0.5223182863146845
+832 869 -0.7358450141256875
+836 869 -0.4781599412991924
+852 869 -0.4247671511925373
+863 869 -0.2690647509241414
+864 869 -0.1113575199017589
+866 869 -0.93407082053895
+868 869 -0.8514479652337646
+869 869  1
+870 869 -0.8501006099932081
+869 870 -0.273893179615365
+870 870  1
+871 870 -0.5007088145716529
+877 870 -0.03287389424901925
+868 871 -0.08857951552213374
+870 871 -0.08989707457096673
+871 871  1
+872 872  0.9999999999999999
+882 872 -0.01641700798105282
+848 873 -0.5440151119744769
+850 873 -0.2787049073187659
+858 873 -0.6431910923149631
+873 873  1
+882 873 -0.03194293124820115
+828 874 -0.3672077419293357
+834 874 -0.8509045978312132
+874 874  1
+878 874 -0.07879578573861272
+875 875  1
+876 875 -0.9779763226427206
+882 875 -0.1982995932865795
+853 876 -1
+875 876 -0.8454513124483387
+876 876  0.9999999999999999
+138 877 -0.2748086260543924
+866 877 -0.06592920729635793
+868 877 -0.05997261937438349
+870 877 -0.06000236610181685
+877 877  1
+878 877 -0.8418041075583099
+883 877 -0.9999999999999999
+867 878 -1
+874 878 -0.07752648524463591
+877 878 -0.3778751954386883
+878 878  1
+879 879  1
+882 879 -0.06319436853934944
+822 880 -0.5804282114926005
+880 880  1
+881 881  1
+882 881 -0.07426593740498603
+138 882 -0.5921683540004956
+872 882 -0.9999999999999999
+873 882 -0.3491518137334439
+875 882 -0.1545486503517991
+879 882 -1
+881 882 -1
+882 882  0.9999999999999999
+877 883 -0.1075752139676292
+883 883  0.9999999999999999
+884 884  1
+937 884 -0.5719764534423979
+1123 884 -0.6490067418822234
+535 885 -0.4275228912358506
+813 885 -0.01382382716026394
+822 885 -0.191987825286117
+885 885  1
+914 885 -0.05206272337087715
+918 885 -0.02079875188977461
+886 886  1
+895 886 -0.02728800900368913
+927 886 -0.04447147147798995
+937 886 -0.07492207534818357
+807 887 -1
+887 887  1
+896 887 -0.3265305453927528
+897 887 -0.04426340773109826
+821 888 -0.1527653650811575
+888 888  1
+924 888 -0.1478255789917181
+889 889  1
+917 889 -0.6971406303562184
+890 890  1
+891 890 -1
+908 890 -0.05869772868567801
+917 890 -0.03024125931369406
+890 891 -0.6160583177513715
+891 891  1
+892 892  1
+893 892 -1
+899 892 -0.3265907006889268
+927 892 -0.2826370242382277
+892 893 -0.4237216938716536
+893 893  1
+894 894  1
+921 894 -0.2020068935070616
+936 894 -0.001221476590241877
+1126 894 -1
+579 895 -0.2156585813277059
+886 895 -0.385776873865221
+895 895  1
+1125 895 -0.9999999999999999
+887 896 -0.5185786831079293
+896 896  1
+897 896 -0.135556681025335
+808 897 -1
+821 897 -0.7726063436497629
+887 897 -0.349246987657998
+896 897 -0.6734694546072473
+897 897  1
+913 897 -0.8327304641897268
+917 897 -0.005223057111627633
+944 897 -0.1028792245282151
+898 898  0.9999999999999999
+914 898 -0.1206081764098259
+926 898 -0.4623422888106051
+892 899 -0.3660724673478272
+899 899  0.9999999999999999
+900 899 -1
+908 899 -0.5184968937483233
+899 900 -0.1733170743416789
+900 900  1
+901 901  1
+913 901 -0.1672693817551257
+1129 901 -0.9984204987709444
+902 902  1
+906 902 -0.6517682227470042
+556 903 -0.1095520499454286
+903 903  1
+987 903 -0.1487510776119439
+904 904  0.9999999999999999
+905 904 -1
+939 904 -0.4951456810632513
+944 904 -0.2466584880139182
+904 905 -0.5349694032326376
+905 905  1
+902 906 -1
+906 906  1
+922 906 -0.001757723379530428
+939 906 -0.5048543189367488
+817 907 -0.2827781963036652
+907 907  0.9999999999999999
+923 907 -0.4503104906876705
+532 908 -0.02605769718940079
+890 908 -0.1307670539036446
+899 908 -0.5000921769605607
+908 908  1
+912 908 -0.2246456703084351
+917 908 -0.01015772416047608
+927 908 -0.1450001555634181
+554 909 -0.6319389031296334
+812 909 -1
+814 909 -0.1553155371562572
+909 909  1
+910 909 -1
+909 910 -0.2082730361969764
+910 910  1
+911 911  1
+912 911 -0.679848507942741
+532 912 -0.0350165342737612
+908 912 -0.2164160212004601
+911 912 -1
+912 912  1
+897 913 -0.2227525620806703
+901 913 -0.001084598126434692
+913 913  1
+499 914 -0.2745945315853772
+511 914 -0.2275925446622348
+538 914 -0.06611998810316257
+885 914 -0.1405254434122094
+898 914 -0.1821428124528147
+914 914  1
+935 914 -0.3094872452465444
+938 914 -1
+963 914 -0.3702866376914646
+915 915  1
+926 915 -0.4861744219496437
+916 916  0.9999999999999999
+991 916 -0.8299655993563372
+889 917 -1
+890 917 -0.2531746283449839
+897 917 -0.01624969659706565
+908 917 -0.03817153995519193
+917 917  1
+918 917 -0.5205989726498077
+930 917 -0.3938572626448919
+75 918 -0.004345736809232553
+532 918 -0.01217477038046258
+810 918 -0.2196268179697284
+814 918 -0.09079338027838257
+885 918 -0.07220217440798232
+917 918 -0.1605535684104505
+918 918  1
+919 918 -0.7532467495294317
+1130 918 -0.9999999999999999
+918 919 -0.226049597993403
+919 919  1
+940 919 -0.1218987246907173
+809 920 -0.5553473627084599
+920 920  1
+527 921 -0.03041136545924579
+821 921 -0.04118674565981324
+894 921 -0.002484520100705276
+921 921  1
+923 921 -0.5496895093123295
+924 921 -0.3644073607610019
+927 921 -0.03768493415326107
+933 921 -0.1279470579573126
+937 921 -0.03294795914486078
+906 922 -0.02873756894689408
+922 922  1
+930 922 -0.08731140174328945
+1133 922 -1
+907 923 -0.3227040158442471
+921 923 -0.1862179271685993
+923 923  1
+888 924 -0.3416147808803835
+921 924 -0.2354790420374323
+924 924  1
+925 924 -1
+924 925 -0.48776706024728
+925 925  1
+511 926 -0.3664836099996677
+898 926 -0.8178570207043603
+915 926 -1
+926 926  1
+579 927 -0.04043596919937775
+816 927 -0.2995720420412248
+886 927 -0.1787804394201799
+892 927 -0.210205677342559
+908 927 -0.09975087269221256
+921 927 -0.04218998861882794
+927 927  1
+928 927 -0.03555708593258307
+937 927 -0.03399996196356109
+927 928 -0.2689779772848467
+928 928  1
+929 928 -1
+937 928 -0.04163722846486935
+928 929 -0.9564855465454609
+929 929  1
+917 930 -0.09668373415618978
+922 930 -0.001612668953598928
+930 930  1
+931 930 -1
+930 931 -0.5188312276949276
+931 931  1
+932 932  1
+949 932 -0.7709359499803203
+527 933 -0.03753982792599602
+921 933 -0.1942560318186789
+933 933  1
+934 933 -1
+942 933 -1
+933 934 -0.7020252086672401
+934 934  1
+822 935 -0.2275843375974782
+914 935 -0.1615162428949418
+935 935  1
+947 935 -1
+894 936 -0.001222445281108993
+936 936  1
+937 936 -0.1646314168973877
+1134 936 -1
+502 937 -0.1320904409251641
+884 937 -0.547803646058268
+886 937 -0.4354427448897494
+921 937 -0.05332770890121061
+927 937 -0.04915426319738538
+928 937 -0.007957451692692642
+936 937 -0.003274682260230539
+937 937  1
+914 938 -0.4349492081554193
+938 938  1
+904 939 -0.4012270002649703
+906 939 -0.3194941757176905
+939 939  1
+813 940 -0.03126900629441422
+919 940 -0.2467532504705683
+940 940  1
+954 940 -0.1874301111765894
+941 941  1
+954 941 -0.7572176567255978
+933 942 -0.1400365794578813
+942 942  1
+943 943  1
+944 943 -0.6504622874578666
+897 944 -0.05256278122721902
+904 944 -0.06380367995760475
+943 944 -1
+944 944  0.9999999999999999
+945 945  1
+961 945 -0.1874605786331821
+946 946  1
+965 946 -0.3382024089200985
+935 947 -0.3687965392195082
+947 947  1
+538 948 -0.1054964333092564
+948 948  1
+952 948 -0.1648707494119165
+955 948 -0.1574397892112519
+957 948 -0.1207575826800948
+958 948 -0.6627219337250112
+964 948 -0.4999999999999999
+780 949 -0.2704284685213483
+787 949 -0.4691011672282398
+932 949 -1
+949 949  1
+950 950  1
+951 950 -0.1643688506062763
+955 950 -0.517514669206253
+956 950 -1
+950 951 -0.0743971039819234
+951 951  1
+965 951 -0.159450198430038
+969 951 -0.3870782893572819
+975 951 -1
+948 952 -0.1163800435639457
+952 952  1
+960 952 -0.1404494159891439
+538 953 -0.09902398723450459
+953 953  1
+955 953 -0.3250456862278263
+940 954 -0.7000124434213186
+941 954 -1
+954 954  1
+959 954 -0.9999999999999999
+948 955 -0.09817256914536197
+950 955 -0.6050255525535728
+953 955 -0.6818018670741961
+955 955  1
+950 956 -0.3205775463900476
+956 956  1
+519 957 -0.1764802927316111
+948 957 -0.1668723702664797
+957 957  1
+958 957 -0.3372780662749887
+967 957 -0.2405271512639779
+973 957 -0.1644736748658249
+948 958 -0.316471951763979
+957 958 -0.1165525973983591
+958 958  0.9999999999999999
+954 959 -0.05535217530648858
+959 959  0.9999999999999999
+952 960 -0.8351292505880835
+960 960  1
+973 960 -0.835526241581551
+945 961 -1
+961 961  1
+962 961 -0.1410723405285448
+963 961 -0.184400059898008
+968 961 -0.08117048756716724
+961 962 -0.3418599495835197
+962 962  1
+966 962 -0.2715053237319026
+914 963 -0.02226463131031359
+961 963 -0.1132213806940643
+963 963  1
+972 963 -0.2978216846032288
+538 964 -0.2107574791923369
+948 964 -0.2013270317439558
+964 964  0.9999999999999999
+946 965 -1
+951 965 -0.185414586123736
+965 965  1
+968 965 -0.0761516273906966
+969 965 -0.3686995531051487
+962 966 -0.8589275663637097
+966 966  1
+971 966 -0.8965161605780593
+519 967 -0.736140447719182
+957 967 -0.1415823276396603
+967 967  1
+465 968 -0.709923579278136
+961 968 -0.357458091089234
+965 968 -0.1910179242470197
+968 968  1
+951 969 -0.3800716211142183
+965 969 -0.3113295969197078
+969 969  1
+970 969 -1
+969 970 -0.2442220433881821
+970 970  1
+966 971 -0.7284944555342752
+971 971  1
+972 971 -0.7021783153967711
+963 972 -0.4453132105424169
+971 972 -0.1034835677775413
+972 972  1
+957 973 -0.5139328116127432
+960 973 -0.8595505195445672
+973 973  1
+974 974  1
+983 974 -0.8223473743679232
+951 975 -0.2701449047947291
+975 975  1
+976 976  1
+977 976 -1
+989 976 -0.2306466504301426
+1001 976 -0.7233766521914379
+976 977 -0.5562293631603693
+977 977  1
+978 978  1
+989 978 -0.2450275815194624
+558 979 -0.0445242729911732
+979 979  1
+983 979 -0.1713223593806408
+558 980 -0.1200940274106261
+560 980 -0.2728718776998083
+980 980  0.9999999999999999
+981 981  1
+982 981 -1
+984 981 -1
+998 981 -0.3215656741501349
+981 982 -0.8970161297845942
+982 982  1
+974 983 -1
+979 983 -0.9783974504680341
+983 983  0.9999999999999999
+986 983 -0.3889934022666854
+981 984 -0.05619858572854571
+984 984  1
+524 985 -0.1988711050130337
+985 985  1
+987 985 -0.3238585319179285
+989 985 -0.1085252173600295
+558 986 -0.1170444766881908
+983 986 -0.006330619860842363
+986 986  1
+903 987 -0.4031935716269703
+985 987 -0.3304891454051535
+987 987  1
+999 987 -1
+988 988  1
+993 988 -0.4224845603067158
+995 988 -0.4527417648780621
+558 989 -0.1397779311452972
+976 989 -0.08600836561183348
+978 989 -1
+985 989 -0.07231097610541735
+989 989  1
+990 989 -1
+989 990 -0.1207046335399614
+990 990  1
+558 991 -0.1341590335135993
+916 991 -0.9999999999999999
+991 991  1
+992 991 -0.9999999999999999
+991 992 -0.02969377817569055
+992 992  0.9999999999999999
+517 993 -0.470516292700504
+988 993 -0.4914718923176806
+993 993  1
+994 993 -1
+993 994 -0.1369714292127601
+994 994  1
+517 995 -0.153849420198677
+988 995 -0.5085281076823195
+995 995  0.9999999999999999
+997 995 -0.07644002373735428
+998 995 -0.1293267507538932
+996 996  1
+997 996 -0.8282005827474199
+995 997 -0.255983366143847
+996 997 -1
+997 997  1
+998 997 -0.2906627207097336
+558 998 -0.07802964467240925
+981 998 -0.04678519119718638
+995 998 -0.1420867838968887
+997 998 -0.09535944610596107
+998 998  1
+987 999 -0.527390257040387
+999 999  1
+558 1000 -0.1769575734462101
+1000 1000  1
+524 1001 -0.08140504055669844
+976 1001 -0.3577625760414913
+1001 1001  1
+1002 1002  1
+1027 1002 -0.2145724100524318
+1003 1003  1
+1035 1003 -0.1179423697455311
+1004 1004  1
+1032 1004 -0.162505704441258
+1005 1005  0.9999999999999999
+1039 1005 -0.1130121128694464
+1006 1006  1
+1007 1006 -1
+1013 1006 -0.02575059378740406
+1019 1006 -0.1739559461890909
+1021 1006 -0.584900570771349
+1030 1006 -0.2546360643290361
+1035 1006 -0.6222461605622936
+1006 1007 -0.03318804123612391
+1007 1007  1
+1008 1008  0.9999999999999999
+1023 1008 -0.3678773718994644
+114 1009 -0.9992375817251438
+1009 1009  1
+1010 1009 -1
+1018 1009 -1
+1019 1009 -0.1045050198588827
+1021 1009 -0.0449350580055885
+1022 1009 -1
+1040 1009 -0.06444798577102936
+1009 1010 -0.0001552850892835913
+1010 1010  1
+1011 1011  1
+1040 1011 -0.8187280470081262
+1012 1012  1
+1040 1012 -0.1168237881650059
+1006 1013 -0.1541101352275481
+1013 1013  0.9999999999999999
+1023 1013 -0.2270471899509603
+1029 1013 -0.04302149139955865
+1030 1013 -0.5789444404552598
+1037 1013 -1
+1038 1013 -1
+1039 1013 -0.06508193719117868
+1014 1014  1
+1039 1014 -0.8219060584309685
+1015 1015  1
+1029 1015 -0.003275834222457429
+1016 1016  1
+1024 1016 -0.2074952206497009
+1034 1016 -0.2015505937972701
+1017 1017  1
+1029 1017 -0.006986834020806925
+1009 1018 -0.002098980748349462
+1018 1018  1
+1006 1019 -0.08383980010605768
+1009 1019 -0.0006238784070824515
+1019 1019  1
+1033 1019 -1
+1020 1020  1
+1021 1020 -0.3701642865294616
+1006 1021 -0.4418959850847382
+1009 1021 -0.0004205088728788707
+1020 1021 -1
+1021 1021  0.9999999999999999
+1009 1022 -0.005661251396913712
+1022 1022  1
+1008 1023 -0.9999999999999999
+1013 1023 -0.02014251699571104
+1023 1023  1
+1024 1023 -0.6336274541872923
+1025 1023 -0.6236855676661436
+1027 1023 -0.3240149993642371
+1016 1024 -0.5566915211795602
+1023 1024 -0.1050528714070786
+1024 1024  0.9999999999999999
+1034 1024 -0.1937972923128213
+1023 1025 -0.1107689760619287
+1025 1025  1
+1028 1025 -0.5681164323437917
+224 1026 -0.3879003999395794
+1026 1026  1
+1027 1026 -0.2479961601462487
+1002 1027 -1
+1023 1027 -0.1892534701639192
+1026 1027 -0.5695362685047473
+1027 1027  1
+1028 1027 -0.4318834396027577
+1035 1027 -0.141530861018014
+1025 1028 -0.3763142626913779
+1027 1028 -0.08698684705657209
+1028 1028  1
+109 1029 -0.964285748724491
+1013 1029 -0.6073239660240807
+1015 1029 -1
+1017 1029 -1
+1029 1029  1
+1006 1030 -0.1145890904553682
+1013 1030 -0.04353276909942644
+1030 1030  0.9999999999999999
+1031 1030 -0.9999999999999999
+1030 1031 -0.1664194418949211
+1031 1031  0.9999999999999999
+1004 1032 -1
+1032 1032  1
+1034 1032 -0.6046518922245098
+1036 1032 -0.7254086850559581
+1019 1033 -0.7215390173566286
+1033 1033  1
+1016 1034 -0.4433082350451801
+1024 1034 -0.1588776886116987
+1032 1034 -0.2806835657041016
+1034 1034  1
+1003 1035 -1
+1006 1035 -0.1723770918598727
+1027 1035 -0.1264294286308967
+1035 1035  0.9999999999999999
+1036 1035 -0.2745914489995771
+1032 1036 -0.556810575506737
+1035 1036 -0.1182806086741611
+1036 1036  1
+1013 1037 -0.1581604805681969
+1037 1037  1
+1013 1038 -0.1370724075827306
+1038 1038  1
+1005 1039 -0.9999999999999999
+1013 1039 -0.008017132296849331
+1014 1039 -1
+1039 1039  1
+1009 1040 -0.0003209326033716147
+1011 1040 -1
+1012 1040 -1
+1040 1040  1
+647 1041 -0.1419354181044963
+677 1041 -0.07050897180790565
+1041 1041  0.9999999999999999
+648 1042 -0.2583893238312233
+1042 1042  1
+676 1043 -0.8923839465426134
+1043 1043  1
+1050 1043 -0.004886459034246927
+1044 1044  0.9999999999999999
+1050 1044 -0.006360677110208854
+1053 1044 -0.01576181343868616
+651 1045 -0.5777970464268083
+1045 1045  1
+1047 1045 -0.4578893603053118
+674 1046 -0.05290103952276962
+1046 1046  1
+1058 1046 -0.09992418073014558
+532 1047 -0.0215762421733098
+680 1047 -0.4450128531725415
+1045 1047 -0.701015904804251
+1047 1047  0.9999999999999999
+653 1048 -0.3314501736257623
+1048 1048  1
+672 1049 -0.06452081199441895
+1049 1049  1
+1054 1049 -0.07281552128805931
+77 1050 -0.9940298566807754
+660 1050 -0.4653917129373519
+672 1050 -0.02710958053407102
+675 1050 -0.08732396119563791
+1043 1050 -0.1447661820312439
+1044 1050 -0.6557758565655354
+1050 1050  1
+1051 1050 -0.2536903963910759
+1052 1050 -0.9159664866181876
+646 1051 -0.1347576096587197
+667 1051 -1
+1050 1051 -0.0008834275223601209
+1051 1051  1
+1056 1051 -0.09668500039665608
+672 1052 -0.02959671266462888
+1050 1052 -0.03752800057943233
+1052 1052  1
+681 1053 -0.6785713195153128
+1044 1053 -0.3442239499805578
+1053 1053  1
+652 1054 -0.08145264452734696
+1049 1054 -0.3955696159494672
+1054 1054  1
+649 1055 -0.5530973426609367
+657 1055 -0.5714287991837255
+1055 1055  1
+1056 1055 -0.1103833841839284
+1058 1055 -0.1998483990317826
+664 1056 -0.492492542481223
+1051 1056 -0.194386112194775
+1055 1056 -0.01539477313860625
+1056 1056  1
+654 1057 -0.7843746921818798
+680 1057 -0.5549872433952462
+1057 1057  1
+660 1058 -0.5346082870626481
+669 1058 -0.83333325
+672 1058 -0.03200437128501262
+1046 1058 -0.7469717176820906
+1055 1058 -0.3976438082123578
+1058 1058  1
+677 1059 -0.257959598784228
+1059 1059  1
+1061 1059 -0.9205298434616072
+645 1060 -0.1682453872844421
+1060 1060  1
+621 1061 -0.03275812377468568
+1059 1061 -0.1759656375407199
+1061 1061  1
+1062 1062  1
+1072 1062 -0.5991411089611631
+1085 1062 -0.4668527181241949
+70 1063 -0.02050133543111832
+1063 1063  1
+1074 1063 -0.0328038604222115
+1064 1064  1
+1075 1064 -0.458892414147185
+1087 1064 -0.008485362269695942
+70 1065 -0.0730549777235988
+1065 1065  1
+1077 1065 -0.08843521871124733
+1080 1065 -0.7779392017109086
+1066 1066  1
+1068 1066 -0.1453309616502498
+1067 1067  1
+1071 1067 -0.3817494717681219
+527 1068 -0.2355674147961204
+584 1068 -0.1261520731594827
+609 1068 -0.4571572410404319
+1066 1068 -1
+1068 1068  0.9999999999999999
+1069 1068 -0.4372715412347995
+1092 1068 -0.2625264532483245
+1068 1069 -0.224602414144263
+1069 1069  1
+1086 1069 -0.5616605382717522
+1070 1070  1
+1077 1070 -0.2570964897400501
+1113 1070 -0.06934306577087218
+1067 1071 -1
+1071 1071  1
+1088 1071 -0.2713032536378535
+1062 1072 -0.6581951455734653
+1072 1072  1
+1075 1072 -0.1683121598255496
+527 1073 -0.2048569533832116
+1073 1073  1
+1074 1073 -0.5194838447173812
+70 1074 -0.01417206153049319
+527 1074 -0.1725667983857126
+1063 1074 -0.6912751633035462
+1073 1074 -0.8423770087041061
+1074 1074  1
+1075 1074 -0.2012280514743605
+1081 1074 -1
+1086 1074 -0.4383394617282477
+1064 1075 -0.7287183863302658
+1072 1075 -0.4008587008714421
+1074 1075 -0.01597203424768444
+1075 1075  1
+1082 1075 -1
+1076 1076  1
+1077 1076 -0.2277826604506423
+1065 1077 -0.1286430330200971
+1070 1077 -0.7634732976778843
+1076 1077 -1
+1077 1077  1
+1078 1077 -0.3055127419781863
+1080 1077 -0.2220610125335382
+1088 1077 -0.7286964939415912
+1095 1077 -0.03461953194256959
+1077 1078 -0.1147492629682491
+1078 1078  1
+1084 1078 -1
+1085 1078 -0.533147281875805
+1079 1079  1
+1117 1079 -0.4030520800304267
+1065 1080 -0.5793140002588144
+1077 1080 -0.1136788486511149
+1080 1080  1
+1074 1081 -0.2708420622001276
+1081 1081  1
+1075 1082 -0.171567055163705
+1082 1082  1
+1083 1083  1
+1087 1083 -0.9915146179000117
+1078 1084 -0.5732410671179614
+1084 1084  1
+1062 1085 -0.3418048544265347
+1078 1085 -0.1212460935674943
+1085 1085  1
+1069 1086 -0.562728256745733
+1074 1086 -0.06888808470468043
+1086 1086  1
+1064 1087 -0.2712816136697344
+1083 1087 -1
+1087 1087  1
+1071 1088 -0.6182503844268341
+1077 1088 -0.140720175561743
+1088 1088  1
+1089 1089  1
+1106 1089 -0.6056747050442844
+1112 1089 -0.3093713235769833
+577 1090 -0.1326857144258304
+1090 1090  1
+1091 1090 -0.3211576989270636
+1090 1091 -0.038043484057704
+1091 1091  1
+1093 1091 -0.3791887956199022
+1108 1091 -1
+603 1092 -0.5466200686606856
+1068 1092 -0.09929791705708746
+1092 1092  1
+1091 1093 -0.3875789320023438
+1093 1093  1
+1098 1093 -0.5865385221662452
+636 1094 -0.3771481651195848
+789 1094 -0.535826440629891
+1094 1094  1
+65 1095 -0.001589825997012756
+1077 1095 -0.0575371733077517
+1095 1095  1
+1096 1095 -0.2689990840515936
+1100 1095 -0.06618670378289888
+1105 1095 -0.7288735255861399
+1113 1095 -0.9306569130101505
+1095 1096 -0.03255957037909323
+1096 1096  1
+1103 1096 -0.9999999999999999
+1107 1096 -1
+1112 1096 -0.2222319526933478
+1097 1097  1
+1100 1097 -0.8925375098710187
+1093 1098 -0.6208114045917884
+1098 1098  1
+1099 1098 -1
+1098 1099 -0.4134616039395449
+1099 1099  1
+1095 1100 -0.05383565269865898
+1097 1100 -1
+1100 1100  1
+1101 1100 -0.3511540620714831
+608 1101 -0.06703448200813029
+1100 1101 -0.04127576831682454
+1101 1101  1
+1120 1101 -0.5526833917123255
+1102 1102  1
+1112 1102 -0.4683966202140186
+1096 1103 -0.4592767036519202
+1103 1103  0.9999999999999999
+1104 1104  1
+1118 1104 -1
+1119 1104 -0.2865086534641524
+599 1105 -1
+1095 1105 -0.1190327517016704
+1105 1105  1
+1089 1106 -0.7553044358239164
+1106 1106  1
+1109 1106 -1
+1096 1107 -0.1416711696758076
+1107 1107  1
+1091 1108 -0.2912634372844867
+1108 1108  1
+1106 1109 -0.394325163645436
+1109 1109  1
+792 1110 -0.441546754206099
+1110 1110  1
+1114 1110 -0.1722713003852328
+1111 1111  1
+1119 1111 -0.09176736832354015
+1089 1112 -0.2446951548010683
+1096 1112 -0.1300528003062854
+1102 1112 -1
+1112 1112  1
+1070 1113 -0.236526991831141
+1095 1113 -0.6431943816097146
+1113 1113  0.9999999999999999
+798 1114 -0.5547121040593305
+1110 1114 -0.2460868099610006
+1114 1114  0.9999999999999999
+1115 1114 -1
+1116 1114 -1
+1114 1115 -0.287370564857355
+1115 1115  1
+1114 1116 -0.1469055193104745
+1116 1116  1
+1079 1117 -1
+1117 1117  1
+1120 1117 -0.4473166082876745
+1104 1118 -0.5011548049107137
+1118 1118  1
+797 1119 -0.4656625120257343
+1104 1119 -0.4988451950892863
+1111 1119 -1
+1119 1119  1
+1101 1120 -0.4220806401639369
+1117 1120 -0.5969479199695733
+1120 1120  1
+670 1121 -0.7813237856601329
+672 1121 -0.169791678428273
+1121 1121  1
+813 1122 -0.9549072658556735
+1122 1122  1
+1136 1122 -0.4718065848736832
+617 1123 -1
+884 1123 -0.4521964498073819
+1123 1123  1
+527 1124 -0.1757456862958543
+1124 1124  1
+1135 1124 -0.4043507132983384
+895 1125 -0.9352345465132308
+1125 1125  0.9999999999999999
+894 1126 -0.9962927945116223
+1126 1126  1
+776 1127 -0.2197097941051068
+1127 1127  1
+786 1128 -0.3858331333431592
+1128 1128  1
+901 1129 -0.9989151781165653
+1129 1129  1
+1135 1129 -0.5956496631521251
+918 1130 -0.09651555628092359
+1130 1130  0.9999999999999999
+1131 1131  1
+1136 1131 -0.5281931985670707
+788 1132 -0.8563137155180035
+1132 1132  1
+922 1133 -0.9966293993713262
+1133 1133  1
+936 1134 -0.9955033115417659
+1134 1134  1
+1124 1135 -0.2898550959294253
+1129 1135 -0.001579779788374791
+1135 1135  0.9999999999999999
+1122 1136 -0.02754236854238207
+1131 1136 -1
+1136 1136  1
+802 1137 -0.9953586426493263
+1137 1137  1
+805 1138 -0.8620128752031443
+1138 1138  1
diff --git a/matrices/test/nontrivial_mc64_example.mtx b/matrices/test/nontrivial_mc64_example.mtx
new file mode 100644
index 00000000000..7ab00292af3
--- /dev/null
+++ b/matrices/test/nontrivial_mc64_example.mtx
@@ -0,0 +1,21633 @@
+%%MatrixMarket matrix coordinate real general
+% Generated 31-Mar-2022
+4644 4644 21630
+1 1  7.634142339967899
+2871 1  27.83662668591554
+2 2  7.682947782646563
+2873 2  27.53589835708392
+3 3  7.511127670325325
+2875 3  28.36062562360079
+4 4  7.616777479124308
+2877 4  28.21925391958385
+5 5  7.904524652115928
+2879 5  25.95736762236096
+6 6  0.6372570530657963
+2907 6  174.0181709119047
+7 7  1.489891484967641
+2909 7  209.3244594426036
+8 8  1.721279346380951
+2911 8  207.0714997893401
+9 9  1.502630991262102
+2913 9  207.1774742583954
+10 10  1.800755852556451
+2915 10  211.1161536132544
+11 11  1.806500886627965
+2917 11  210.6281765088935
+12 12  1.511557830272904
+2919 12  210.3058251130699
+13 13  1706.387673066813
+2925 13  5.68042322902059
+14 14  1519.789256663639
+2927 14  7.273812219891007
+15 15  1964.688958945828
+2929 15  4.409102144996014
+16 16  1930.16084845011
+2931 16  4.550892940867493
+17 17  1739.177574774503
+2953 17  7.251786459051289
+18 18  1993.672518522061
+2955 18  5.370237884159537
+19 19  2033.975816484419
+2957 19  5.206824237539474
+20 20  0.6518801375883284
+2961 20  156.4648269651306
+21 21  9.246170384031867
+3023 21  355.1643329947775
+22 22  10.51505982454673
+3025 22  371.7856140819339
+23 23  11.13731534458806
+3027 23  359.0699905626743
+24 24  12.41421032754424
+3043 24  301.804028950772
+25 25  12.62655259772402
+3045 25  293.7476901038987
+26 26  2.955820583202132
+3075 26  48.14115697103193
+27 27  4.606663203742626
+3077 27  298.8536823264385
+28 28  4.003331285236047
+3079 28  280.2402751866119
+29 29  4.713333127333971
+3081 29  292.0666383956488
+30 30  4.721746019232691
+3083 30  291.2499991968344
+31 31  2700.355402880154
+3095 31  3.449093081313446
+32 32  2112.401326080936
+3101 32  4.629564668789765
+33 33  2113.116196466089
+3103 33  4.560258670279392
+34 34  1982.84872124521
+3105 34  5.09241050532172
+35 35  2019.435299548997
+3107 35  5.010651825296775
+36 36  1961.301040943574
+3109 36  5.242946874727973
+37 37  1973.933368761423
+3111 37  5.144271915952205
+38 38  1962.712428603474
+3113 38  5.301254453954041
+39 39  7.39303021163466
+3137 39  32.64302008813611
+40 40  4.505299985382996
+3139 40  36.65251244868855
+41 41  2.089424079433414
+3151 41  0.2816487510175935
+42 42  2478.696448770564
+3165 42  4.071309785349727
+43 43  2355.199865364614
+3167 43  4.307158971381956
+44 44  0.06836567688636581
+2872 44  9.664803062187673
+45 45  0.07421036894812016
+2874 45  8.906846167844099
+46 46  0.06639716495725831
+2876 46  9.821457822989728
+47 47  0.07310238478762371
+2878 47  9.098592168210979
+48 48  0.006121912023818472
+2880 48  104.3552046887556
+49 49  0.007404911424217593
+2908 49  87.08836569250704
+50 50  0.003163038956096149
+2910 50  194.4659454572846
+51 51  0.003137362416404567
+2912 51  196.1842421835629
+52 52  0.003212286755258518
+2914 52  192.0789529667144
+53 53  0.003122287288006135
+2916 53  198.0844573442002
+54 54  0.003137976363508148
+2918 54  197.7267055670312
+55 55  0.003243756688600629
+2920 55  194.7139670039287
+56 56  0.006099130113976053
+2926 56  102.7471989517859
+57 57  0.006994029364087638
+2928 57  92.37138271390904
+58 58  0.003172543912365461
+2930 58  190.7684806838747
+59 59  0.003207791727140831
+2932 59  188.8552198757506
+60 60  0.01563693906268506
+2954 60  43.71332794263182
+61 61  0.1229847515375557
+2956 61  5.289532905230916
+62 62  0.124952871120281
+2958 62  5.212039501524443
+63 63  0.004730472910791034
+2962 63  134.487235958624
+64 64  0.001669661722623266
+3024 64  373.243500456582
+65 65  0.00146788692624887
+3026 65  423.2601696305773
+66 66  0.001887933450989816
+3028 66  332.6331657750853
+67 67  0.0009471235536763365
+3044 67  634.1949520085401
+68 68  0.0009524976247331163
+3046 68  629.0610030966138
+69 69  0.006996212718199472
+3076 69  93.45345366175701
+70 70  0.001823414688910915
+3078 70  332.535475221442
+71 71  0.001971755901342216
+3080 71  306.4518132610639
+72 72  0.001848859828192644
+3082 72  330.915251155541
+73 73  0.001852564728474546
+3084 73  330.1517854407101
+74 74  0.001255327403956955
+3096 74  478.2477416883492
+75 75  0.08508912507203135
+3102 75  7.67469753523842
+76 76  0.002761123807690614
+3104 76  227.0111876369949
+77 77  0.06748473012759557
+3106 77  9.553096348683425
+78 78  0.08071276598199044
+3108 78  8.110850832086975
+79 79  0.06258856552997076
+3110 79  10.39623748337741
+80 80  0.06694310514459079
+3112 80  9.643129255484244
+81 81  0.06630897231111525
+3114 81  9.910899138700378
+82 82  0.004114811156393923
+3138 82  154.3592785305395
+83 83  0.005786507825914471
+3140 83  111.6726634543674
+84 84  0.0009684222506967275
+3152 84  637.526528274368
+85 85  0.00185850803805367
+3166 85  351.8137876203253
+86 86  0.0016984477759088
+3168 86  375.2293748346238
+87 87  9.704014056376309
+2769 87  0.191132249227754
+2903 87  0.5188471304468957
+88 88  9.201424920484722
+2770 88  0.188455821489309
+2981 88  0.5765945406640466
+89 89  9.825681234198756
+2771 89  0.1928444447454806
+2983 89  0.5085834639940715
+90 90  8.948533141585585
+2772 90  0.1839664735152792
+3001 90  0.6653230544623372
+91 91  10.10766013614647
+2773 91  0.1930767719574465
+3003 91  0.4974529583688729
+92 92  9.284386162301578
+2774 92  0.188272051984764
+3067 92  0.5674518919590757
+2769 93  2.047051335767824
+2770 94  2.067610228210245
+2771 95  2.048523350808737
+2772 96  2.055941963685915
+2773 97  2.03082325966889
+2774 98  2.065517700723633
+99 99  23193.12282089102
+2775 99  3.290947783257571e-05
+100 100  290.0726879116941
+2777 100  0.002413822826166146
+101 101  9850.593471872256
+2779 101  7.669175073506544e-05
+102 102  305.4469733715024
+2781 102  0.002334975615736897
+103 103  9289.614985907097
+2783 103  7.862767004737615e-05
+104 104  290.469173172231
+2785 104  0.002444212485053505
+105 105  22741.59251705795
+2787 105  3.312742415530926e-05
+106 106  278.1022210270536
+2789 106  0.002446060487080032
+107 107  43597.72579341551
+2791 107  1.85390023051578e-05
+108 108  265.9468096443994
+2793 108  0.002547216874358393
+109 109  43152.99093848162
+2795 109  1.897510509495169e-05
+110 110  323.1812191159318
+2797 110  0.002159979334399842
+111 111  300.2822908958456
+2799 111  0.002343469592596242
+112 112  8964.772792986272
+2801 112  6.912678178860192e-05
+113 113  8625.780071398047
+2803 113  8.227547212772366e-05
+114 114  270.3728406575445
+2805 114  0.002514571520234397
+115 115  73148.51820798231
+2807 115  1.36295453496316e-05
+116 116  310.0130973953905
+2809 116  0.002351560480566124
+117 117  314.6579392372382
+2811 117  0.002323346228192606
+118 118  309.9421434232158
+2813 118  0.00233327829315433
+119 119  291.7156499391643
+2815 119  0.002498796266540171
+120 120  25443.3699441756
+2817 120  3.27326552556781e-05
+121 121  308.720038531219
+2819 121  0.002315859228091882
+122 122  304.7098242048223
+2821 122  0.002331553298144831
+123 123  9400.334047263112
+2823 123  7.851640521308691e-05
+124 124  310.0960936432602
+2825 124  0.002317790327734316
+125 125  23954.74482606583
+2827 125  3.083171649862372e-05
+126 126  298.8163876640679
+2829 126  0.002195503166803161
+127 127  8825.44218799768
+2831 127  7.861266506825708e-05
+128 128  265.1893264657563
+2833 128  0.002549007835364769
+129 129  23381.6913657311
+2835 129  3.651865383153894e-05
+130 130  314.8697550604812
+2837 130  0.002311132186057771
+131 131  287.8240827954845
+2839 131  0.002512265623002052
+132 132  9172.962949267778
+2841 132  7.556051646964701e-05
+133 133  277.4265315485119
+2843 133  0.002461879610445951
+134 134  21877.12356216064
+2845 134  3.850335510777407e-05
+135 135  295.9779339618702
+2847 135  0.002492200397856114
+136 136  273.4715243762794
+2849 136  0.002590053989662534
+137 137  11004.89040689185
+2851 137  6.694842994227576e-05
+138 138  303.8886375499986
+2853 138  0.002281699642120062
+139 139  9910.180342092066
+2855 139  6.668642108710151e-05
+140 140  2507.130654149849
+2857 140  0.0002425793965424298
+141 141  2379.264038858459
+2859 141  0.0002624552599551592
+142 142  2552.34477962965
+2861 142  0.0002482857056241695
+143 143  21740.78205992
+2863 143  3.100849114586814e-05
+144 144  8484.697120711497
+2865 144  7.880071041324561e-05
+145 145  2574.29059921975
+2867 145  0.0002319713008317471
+146 146  87898.23892340138
+2869 146  1.03968750016697e-05
+147 147  781.3813205000025
+2871 147  0.0009765914152797697
+148 148  789.1705379670939
+2873 148  0.0009675053188040424
+149 149  760.5050421247049
+2875 149  0.0009896578163032042
+150 150  777.0578480793231
+2877 150  0.0009881347343625977
+151 151  796.5132149928559
+2879 151  0.0009219840688063665
+152 152  9186.727904847106
+2881 152  6.748223267049977e-05
+153 153  68048.43940125236
+2883 153  1.096721977986664e-05
+154 154  2440.961303257629
+2885 154  0.0002765958107932558
+155 155  2506.64730670822
+2887 155  0.0002689427432129922
+156 156  9009.68081380378
+2889 156  7.678999350825993e-05
+157 157  2369.480857196066
+2891 157  0.0002662782114042295
+158 158  2485.515788576279
+2893 158  0.0002479660168346599
+159 159  2482.852251377447
+2895 159  0.0002585780513565103
+160 160  3083.195058496362
+2897 160  0.0002133964918098159
+161 161  3154.039974048575
+2899 161  0.0002015674054957784
+162 162  7998.419229928728
+2901 162  8.489615229574495e-05
+163 163  163.7095819334633
+2903 163  0.004275107728950991
+164 164  157104.7967367877
+2905 164  4.587849448195336e-06
+165 165  1135.700901010377
+2907 165  0.0006376528809592441
+166 166  1303.177541053115
+2909 166  0.0005334869199648178
+167 167  1381.206266334915
+2911 167  0.00050533641056909
+168 168  1319.196799327528
+2913 168  0.0005289936433977529
+169 169  1389.673876486326
+2915 169  0.0005052051026997433
+170 170  1396.329498175133
+2917 170  0.000504438897809328
+171 171  1329.329121500652
+2919 171  0.0005374455412895908
+172 172  2482.644118049607
+2921 172  0.0002525862906039789
+173 173  78364.67290626559
+2923 173  9.934430498701247e-06
+174 174  563.3129342535091
+2925 174  0.001278228660117016
+175 175  467.4796134729326
+2927 175  0.001575520226438138
+176 176  676.6262903819455
+2929 176  0.00103799158743649
+177 177  658.4219150969093
+2931 177  0.00106627247492575
+178 178  2386.256089199208
+2933 178  0.000261841749692465
+179 179  10122.72114522154
+2935 179  6.472756195288161e-05
+180 180  2408.264399844404
+2937 180  0.0002575738198962803
+181 181  9016.072947913897
+2939 181  7.772147814560617e-05
+182 182  2384.130599354269
+2941 182  0.0002699459866081826
+183 183  2930.784068412102
+2943 183  0.000230584620055753
+184 184  24854.53469433011
+2945 184  3.113361531407949e-05
+185 185  2713.558123173743
+2947 185  0.0002117592014781903
+186 186  22384.9703722429
+2949 186  3.083953604217996e-05
+187 187  37591.99635748944
+2951 187  2.169737691949768e-05
+188 188  480.2866720305411
+2953 188  0.001643729524906235
+189 189  598.8125130004696
+2955 189  0.001272395619536466
+190 190  615.6669758246234
+2957 190  0.001241043851549547
+191 191  19907.32657900824
+2959 191  3.437890099741047e-05
+192 192  1229.495717041241
+2961 192  0.0005900815905843416
+193 193  2394.224094694279
+2963 193  0.0002533575753207606
+194 194  8542.19478243563
+2965 194  7.547399481258875e-05
+195 195  2537.716111849562
+2967 195  0.0002452042454861101
+196 196  2584.710838446063
+2969 196  0.0002328687727934642
+197 197  2394.105306246051
+2971 197  0.0002595392277337602
+198 198  38203.98017612936
+2973 198  1.924427533902808e-05
+199 199  43223.66169545515
+2975 199  1.845055124298193e-05
+200 200  57524.40780656567
+2977 200  1.271450383568884e-05
+201 201  21031.32553155841
+2979 201  3.429173053633755e-05
+202 202  153.4775821430539
+2981 202  0.004704415141495417
+203 203  167.6097055627493
+2983 203  0.004200021129835351
+204 204  2219.850347789275
+2985 204  0.0002910577290827514
+205 205  9415.284820832747
+2987 205  7.220011983011933e-05
+206 206  2652.181667918236
+2989 206  0.000224096727052125
+207 207  2337.450062338238
+2991 207  0.000290626767416135
+208 208  3040.129956153691
+2993 208  0.0002284567159346352
+209 209  2879.971182899386
+2995 209  0.0002221752697164552
+210 210  34863.87531051019
+2997 210  2.138755265132116e-05
+211 211  20672.93143116704
+2999 211  3.369714148784347e-05
+212 212  144.982487679953
+3001 212  0.005390424631237284
+213 213  171.8206052909465
+3003 213  0.00413020803099228
+214 214  2589.968651060607
+3005 214  0.0002345475086669507
+215 215  22086.99006573134
+3007 215  3.152944354031921e-05
+216 216  9185.197575362357
+3009 216  7.021648346571437e-05
+217 217  2521.657080892551
+3011 217  0.0002629900627038044
+218 218  2664.335906809927
+3013 218  0.0002593440111466169
+219 219  20027.99610704446
+3015 219  3.244177482910733e-05
+220 220  8386.236525932127
+3017 220  7.768080563995795e-05
+221 221  91337.19777237749
+3019 221  7.320703537932268e-06
+222 222  38627.23804660944
+3021 222  1.858157080718637e-05
+223 223  1700.663563867754
+3023 223  0.000414588079576942
+224 224  1718.371127510188
+3025 224  0.0004090754753228922
+225 225  1817.500268542915
+3027 225  0.0003948065156009082
+226 226  44742.81279976114
+3029 226  1.605819002986071e-05
+227 227  2317.164855676876
+3031 227  0.0002744085827181243
+228 228  2024.959772953592
+3033 228  0.000315555720187749
+229 229  2638.330001101275
+3035 229  0.0002451618647443908
+230 230  2832.44595088773
+3037 230  0.0002235717635631153
+231 231  106218.309786511
+3039 231  6.355058706178921e-06
+232 232  40059.51720243262
+3041 232  1.761027469906141e-05
+233 233  2051.813809527901
+3043 233  0.0003339577621497702
+234 234  2104.19031995182
+3045 234  0.0003263861015757376
+235 235  2548.612960129405
+3047 235  0.000261318627407984
+236 236  2308.294366544839
+3049 236  0.0002593250911130165
+237 237  2294.835695970653
+3051 237  0.0002680207291845583
+238 238  2395.306423284108
+3053 238  0.0002673059275966173
+239 239  25327.72202184154
+3055 239  3.086294218884017e-05
+240 240  2807.534365036791
+3057 240  0.0002328109278869417
+241 241  2303.025653362471
+3059 241  0.0002641904594502731
+242 242  2301.595341465212
+3061 242  0.0002683021695836888
+243 243  2562.526180215788
+3063 243  0.0002456464353693151
+244 244  7658.260646304968
+3065 244  8.679696529177102e-05
+245 245  153.9556113409648
+3067 245  0.004650627025322416
+246 246  2491.562485472203
+3069 246  0.0002662216516774879
+247 247  83873.8237765977
+3071 247  7.51138059276294e-06
+248 248  21481.12879436501
+3073 248  3.004103046734714e-05
+249 249  586.6024214798779
+3075 249  0.001207340810307569
+250 250  1482.582038785604
+3077 250  0.0004614895303912071
+251 251  1475.017635071814
+3079 251  0.0004630428688442491
+252 252  1532.019499295367
+3081 252  0.0004532491959284241
+253 253  1535.865409908915
+3083 253  0.0004521454893938125
+254 254  11134.07335309051
+3085 254  5.598515203854433e-05
+255 255  2295.970922359987
+3087 255  0.0002533575753207605
+256 256  23914.50805406844
+3089 256  2.985054127319469e-05
+257 257  11182.38464704362
+3091 257  6.412379987319553e-05
+258 258  11880.39612994923
+3093 258  5.25703542360426e-05
+259 259  778.3811818836432
+3095 259  0.0009048698354838305
+260 260  2412.3828459666
+3097 260  0.0002540711174143024
+261 261  233043.5749752625
+3099 261  4.246187106963387e-06
+262 262  689.9800290524397
+3101 262  0.001114727656952293
+263 263  664.2166269024117
+3103 263  0.001101692355154358
+264 264  628.9767263176474
+3105 264  0.001203469082965028
+265 265  646.4625401185438
+3107 265  0.001189571893627759
+266 266  618.291818361393
+3109 266  0.001235207912293277
+267 267  624.2772372691983
+3111 267  0.001213907246388981
+268 268  617.8674156238189
+3113 268  0.001248064342159848
+269 269  2490.198709830362
+3115 269  0.0002455808494513976
+270 270  2570.720972896223
+3117 270  0.0002583346492311023
+271 271  2391.932246718013
+3119 271  0.0002662424078920341
+272 272  7749.826281920761
+3121 272  8.403976013422934e-05
+273 273  2693.891793716426
+3123 273  0.0002445586324395708
+274 274  2578.319392087782
+3125 274  0.0002479871234677247
+275 275  9156.784283533159
+3127 275  7.669511520069878e-05
+276 276  9203.785525373036
+3129 276  6.873824789383105e-05
+277 277  8168.255838690445
+3131 277  7.621293902825032e-05
+278 278  21521.12326017982
+3133 278  3.384974683660592e-05
+279 279  30936.47634157103
+3135 279  2.096412860650296e-05
+280 280  612.1568885398599
+3137 280  0.001117288914086055
+281 281  664.4128813426657
+3139 281  0.001054854527421347
+282 282  3083.001466197481
+3141 282  0.0002196191015981265
+283 283  2483.606073932579
+3143 283  0.0002467932626555914
+284 284  9133.850676003813
+3145 284  7.450142895904204e-05
+285 285  21668.40845161683
+3147 285  2.829020688581102e-05
+286 286  19306.73977728891
+3149 286  3.417957292668527e-05
+287 287  203.8704711144181
+3151 287  0.003283370210303194
+288 288  2528.187727514318
+3153 288  0.0002518348855609722
+289 289  2658.051322543227
+3155 289  0.0002411624189112242
+290 290  9400.933831849899
+3157 290  7.539218264852078e-05
+291 291  68612.08734514077
+3159 291  1.200272275159141e-05
+292 292  9362.181704426159
+3161 292  6.96324664263306e-05
+293 293  49168.95055798243
+3163 293  1.522298845113386e-05
+294 294  742.2463382517823
+3165 294  0.001033902935611
+295 295  687.7795084913223
+3167 295  0.001080160651772333
+296 296  2741.147387294496
+3169 296  0.0002274052897093193
+297 297  8322.969007704822
+3171 297  8.191795508733727e-05
+298 298  2587.207807507694
+3173 298  0.0002440296582495396
+299 299  1145.85221095573
+2775 299 -0.0006120966867165351
+300 300  14.33112650433899
+2777 300 -0.0448953457699415
+301 301  486.4128232147492
+2779 301 -0.001427149194237635
+302 302  15.08269212747152
+2781 302 -0.04345125747104736
+303 303  458.0163772397145
+2783 303 -0.00146533094654159
+304 304  14.32144407378891
+2785 304 -0.04555077314529363
+305 305  1123.954222596566
+2787 305 -0.000615931807560431
+306 306  13.74494336328175
+2789 306 -0.04547802485000561
+307 307  2154.420725520893
+2791 307 -0.0003447400910227232
+308 308  13.14252906164904
+2793 308 -0.04736458693151107
+309 309  2128.364763809643
+2795 309 -0.0003535062191017872
+310 310  15.93975248024778
+2797 310 -0.04024041627283683
+311 311  14.81037799760247
+2799 311 -0.04365875661862523
+312 312  441.9452029941102
+2801 312 -0.001288430244282178
+313 313  425.2540655909152
+2803 313 -0.001533427005586317
+314 314  13.32996201180178
+2805 314 -0.04686420874278824
+315 315  3612.481439844189
+2807 315 -0.0002535975426531655
+316 316  15.3101703230605
+2809 316 -0.04375416605988374
+317 317  15.53956096680326
+2811 317 -0.04322919930108315
+318 318  15.30669017625731
+2813 318 -0.04341399965950225
+319 319  14.40660969832466
+2815 319 -0.04649354450491961
+320 320  1255.250651690767
+2817 320 -0.0006096456996633435
+321 321  15.23074061795551
+2819 321 -0.04313281610466401
+322 322  15.03291787836028
+2821 322 -0.04342507632571436
+323 323  463.4166747265667
+2823 323 -0.001463440545769149
+324 324  15.28710372824584
+2825 324 -0.04320045332535793
+325 325  1181.787998451734
+2827 325 -0.0005742502993288677
+326 326  14.74189587610842
+2829 326 -0.04089185063854536
+327 327  436.0436830309747
+2831 327 -0.001462067874606355
+328 328  13.1032663932118
+2833 328 -0.04740439876751025
+329 329  1153.588991098119
+2835 329 -0.0006801292745497548
+330 330  15.5348069104894
+2837 330 -0.04304284573387514
+331 331  14.20051396744854
+2839 331 -0.04678851497708159
+332 332  452.8201087433384
+2841 332 -0.001406493806589446
+333 333  13.69530353316471
+2843 333 -0.04582496681635583
+334 334  1079.583685111797
+2845 334 -0.0007169453104404059
+335 335  14.60584817969457
+2847 335 -0.04640543643374314
+336 336  13.49566798447774
+2849 336 -0.04822607065271291
+337 337  543.6090676882716
+2851 337 -0.001245392720896029
+338 338  15.01121332028321
+2853 338 -0.04244471017584212
+339 339  488.8730191412681
+2855 339 -0.001242146227443936
+340 340  123.8192369631256
+2857 340 -0.004513431075080159
+341 341  117.4479842729037
+2859 341 -0.004885512119259889
+342 342  126.0740460466629
+2861 342 -0.004618836123996159
+343 343  1070.773655876053
+2863 343 -0.0005784813977331862
+344 344  417.8531720688395
+2865 344 -0.001470189001474494
+345 345  127.0201063189659
+2867 345 -0.004319878341018758
+346 346  4334.861182260374
+2869 346 -0.0001937112443097265
+347 347  38.53524499250479
+2871 347 -0.01819554706699653
+348 348  38.91935327325632
+2873 348 -0.01802624915195298
+349 349  37.50569193038142
+2875 349 -0.01843899618102551
+350 350  38.32199451486851
+2877 350 -0.01841060981384306
+351 351  39.28131634729812
+2879 351 -0.01717818552570617
+352 352  453.3457682171266
+2881 352 -0.001256535981144061
+353 353  3353.205891530642
+2883 353 -0.0002044990454309726
+354 354  120.2708188977464
+2885 354 -0.005158007404572995
+355 355  123.795920825142
+2887 355 -0.005003923514694866
+356 356  444.4319572952037
+2889 356 -0.001430404585463677
+357 357  117.050161681528
+2891 357 -0.004953175864922998
+358 358  122.749429900013
+2893 358 -0.004613733705409966
+359 359  122.3191147394357
+2895 359 -0.004822604611468701
+360 360  152.2888762275243
+2897 360 -0.003969961874120492
+361 361  155.6158716196742
+2899 361 -0.003753931548661254
+362 362  394.1893332761348
+2901 362 -0.001582798352723314
+363 363  8.066996571152382
+2903 363 -0.07971634771599438
+364 364  7750.68019761282
+2905 364 -8.544969360670532e-05
+365 365  56.02367519001429
+2907 365 -0.01187757040352815
+366 366  64.28796934260804
+2909 366 -0.009936857146071412
+367 367  68.13825085685956
+2911 367 -0.009412379775864633
+368 368  65.07816511953543
+2913 368 -0.009853159585355355
+369 369  68.55629934823999
+2915 369 -0.009409866734849158
+370 370  68.88470455447471
+2917 370 -0.0093956000093805
+371 371  65.57800954652211
+2919 371 -0.01001058661641098
+372 372  122.4736820878048
+2921 372 -0.004704700041595145
+373 373  3871.005158529259
+2923 373 -0.0001848018496588619
+374 374  27.82616320379587
+2925 374 -0.02377781766418163
+375 375  23.09224867559405
+2927 375 -0.02930808378764303
+376 376  33.42354453953602
+2929 376 -0.01930888852136976
+377 377  32.52426540054705
+2931 377 -0.01983496473725442
+378 378  117.6539243249208
+2933 378 -0.004879689278140042
+379 379  499.0316104781022
+2935 379 -0.001206424923433159
+380 380  118.7306874076851
+2937 380 -0.00480049537954443
+381 381  444.8239842539966
+2939 381 -0.001447510061668577
+382 382  117.6379726154468
+2941 382 -0.005027041570114009
+383 383  144.7685885662768
+2943 383 -0.004289483096781133
+384 384  1228.301080704984
+2945 384 -0.0005788988446378421
+385 385  133.9324557241578
+2947 385 -0.003942330393462493
+386 386  1104.950980990364
+2949 386 -0.0005740898096234878
+387 387  1852.66693787202
+2951 387 -0.0004045233295404804
+388 388  23.6699605173663
+2953 388 -0.03064584960083465
+389 389  29.51160995031188
+2955 389 -0.02372239346799022
+390 390  30.34228698549593
+2957 390 -0.02313788584603573
+391 391  982.0310227902145
+2959 391 -0.0006403700785130899
+392 392  60.64916344347138
+2961 392 -0.01099169507116858
+393 393  118.0487075627654
+2963 393 -0.004721519459519589
+394 394  420.6283889007589
+2965 394 -0.001408304602554578
+395 395  125.3552192945955
+2967 395 -0.004561366275467525
+396 396  127.6821449462198
+2969 396 -0.004331722370505195
+397 397  118.2703291496153
+2971 397 -0.004827687097720198
+398 398  1884.280380407555
+2973 398 -0.000358519151350955
+399 399  2135.905795131353
+2975 399 -0.0003430998893339208
+400 400  2834.129295273892
+2977 400 -0.000237119492924652
+401 401  1036.242572144714
+2979 401 -0.0006394846353445878
+402 402  7.559163235294251
+2981 402 -0.08776236008052875
+403 403  8.256040854579432
+2983 403 -0.07834503334779185
+404 404  109.4230691419269
+2985 404 -0.005425436839739358
+405 405  464.4929993867451
+2987 405 -0.00134475543576407
+406 406  131.0344081076233
+2989 406 -0.004167960315020639
+407 407  115.4088526547606
+2991 407 -0.005408777291702704
+408 408  150.2295479904903
+2993 408 -0.004248277560160953
+409 409  142.0687337544492
+2995 409 -0.004138430725600249
+410 410  1718.816210617059
+2997 410 -0.0003986119327895535
+411 411  1019.6146170834
+2999 411 -0.0006277791128440753
+412 412  7.146967608853739
+3001 412 -0.1004752481225429
+413 413  8.470410604423941
+3003 413 -0.07698122934126954
+414 414  127.9283806899807
+3005 414 -0.004363396788473575
+415 415  1091.019362262835
+3007 415 -0.0005865256311667257
+416 416  453.7901227575161
+3009 416 -0.00130599598075002
+417 417  124.5983796929271
+3011 417 -0.004890840474051387
+418 418  131.597258047072
+3013 418 -0.004824851841231687
+419 419  985.8145903653975
+3015 419 -0.0006055799728042041
+420 420  412.8368018348012
+3017 420 -0.001449863056352223
+421 421  4510.455970709824
+3019 421 -0.0001362209906944794
+422 422  1907.769973018723
+3021 422 -0.0003457132849898687
+423 423  83.95767739418511
+3023 423 -0.007716759589340684
+424 424  84.84694283917756
+3025 424 -0.007612824341018029
+425 425  89.74493701233533
+3027 425 -0.007347018696585064
+426 426  2205.835009863771
+3029 426 -0.0002992883659006699
+427 427  114.2682184392981
+3031 427 -0.005113009743909019
+428 428  99.85370657593273
+3033 428 -0.005879952690476989
+429 429  130.3812078382545
+3035 429 -0.004558675471144818
+430 430  139.9007887244113
+3037 430 -0.004159338793200366
+431 431  5244.69070994502
+3039 431 -0.0001182663255270315
+432 432  1977.952393739212
+3041 432 -0.000327730907480755
+433 433  101.3077721653154
+3043 433 -0.006215092193261671
+434 434  103.893900809328
+3045 434 -0.00607418045573903
+435 435  125.8316873006155
+3047 435 -0.004863454220574115
+436 436  113.8738312144944
+3049 436 -0.00483017161704053
+437 437  113.3300593831867
+3051 437 -0.004986977865485246
+438 438  118.3198838349706
+3053 438 -0.004972539521710323
+439 439  1251.603713614919
+3055 439 -0.0005739031613858375
+440 440  138.7507706737437
+3057 440 -0.004328790316093996
+441 441  113.5082552388623
+3059 441 -0.004925256200277052
+442 442  113.6139724875826
+3061 442 -0.00499437409528073
+443 443  126.5263177181465
+3063 443 -0.004571499010522023
+444 444  377.6554482136941
+3065 444 -0.001617281983965069
+445 445  7.589203851228609
+3067 445 -0.08668664125278241
+446 446  123.1635071742419
+3069 446 -0.004948899205940827
+447 447  4139.093293161161
+3071 447 -0.0001398607764914253
+448 448  1060.132899126409
+3073 448 -0.0005593298618830121
+449 449  28.94168285332436
+3075 449 -0.02248546792889855
+450 450  73.15845287950951
+3077 450 -0.008593508936690492
+451 451  72.78051062490582
+3079 451 -0.008622952094502315
+452 452  75.59799840548723
+3081 452 -0.008440059637270876
+453 453  75.78778823356824
+3083 453 -0.008419507258893049
+454 454  549.821224067692
+3085 454 -0.001041762786317972
+455 455  113.3815108355836
+3087 455 -0.004714351957937914
+456 456  1179.762350764617
+3089 456 -0.0005559936336606194
+457 457  552.3282883247791
+3091 457 -0.001192950010299268
+458 458  586.6729969649459
+3093 458 -0.0009782249711306297
+459 459  38.43767394981833
+3095 459 -0.01683773791286734
+460 460  119.1082915537506
+3097 460 -0.004728474636470728
+461 461  11495.65556624044
+3099 461 -7.909564314661077e-05
+462 462  34.03558160578945
+3101 462 -0.02076453503788485
+463 463  32.76456805081531
+3103 463 -0.02052180865954734
+464 464  31.02636275331026
+3105 464 -0.02241755093136628
+465 465  31.88893513368465
+3107 465 -0.02215869240460923
+466 466  30.49931855865703
+3109 466 -0.0230087751155384
+467 467  30.79457400417442
+3111 467 -0.02261199800075029
+468 468  30.47835926520514
+3113 468 -0.02324824654993116
+469 469  122.8400429483149
+3115 469 -0.004574455857323951
+470 470  126.8059434849797
+3117 470 -0.004812246867113213
+471 471  117.9689774903674
+3119 471 -0.004960285432200295
+472 472  382.0353592378098
+3121 472 -0.001566447111419446
+473 473  132.8819084313431
+3123 473 -0.004555614778039736
+474 474  127.2897949754821
+3125 474 -0.004615638198090777
+475 475  451.7109769126051
+3127 475 -0.00142856435158415
+476 476  453.7247226949373
+3129 476 -0.001281196436983179
+477 477  402.7241166015079
+3131 477 -0.001420343791668607
+478 478  1061.59534472576
+3133 478 -0.0006305375141651542
+479 479  1527.492235015241
+3135 479 -0.0003901485068014212
+480 480  30.21444112408203
+3137 480 -0.02080039878504665
+481 481  32.79787467315564
+3139 481 -0.0196355765396314
+482 482  152.0989067139187
+3141 482 -0.004090434994215512
+483 483  122.6147078326719
+3143 483 -0.004593389096329082
+484 484  450.8463856651576
+3145 484 -0.001386907437178993
+485 485  1066.01543186999
+3147 485 -0.0005283436953610114
+486 486  950.3229195310635
+3149 486 -0.000638010057411905
+487 487  10.02660338095689
+3151 487 -0.06133865450778468
+488 488  124.731975883077
+3153 488 -0.004690277097936834
+489 489  131.1365835736932
+3155 489 -0.004491600758461845
+490 490  463.8294545379384
+3157 490 -0.001404078290862924
+491 491  3390.317628862236
+3159 491 -0.0002232081554610602
+492 492  462.1630304839521
+3161 492 -0.001296142399366137
+493 493  2424.730464002316
+3163 493 -0.0002836445861316432
+494 494  36.60324381825318
+3165 494 -0.01926436789258804
+495 495  33.91730205023136
+3167 495 -0.02012629165347246
+496 496  135.43417876922
+3169 496 -0.004229346815530425
+497 497  410.4526076657893
+3171 497 -0.001526302574451327
+498 498  127.8105814429097
+3173 498 -0.004539146102095633
+499 499  26323.14081830639
+2776 499  2.969991746869057e-05
+500 500  332.6531461361987
+2778 500  0.002140785332416097
+501 501  12604.81812642821
+2780 501  6.257938167652743e-05
+502 502  350.3843414256796
+2782 502  0.002070345414517271
+503 503  11486.315472858
+2784 503  6.598718961816696e-05
+504 504  332.8493374117932
+2786 504  0.002169195438705725
+505 505  28019.26820123457
+2788 505  2.796488776981597e-05
+506 506  318.8795873653166
+2790 506  0.002169609219361102
+507 507  54799.79442360796
+2792 507  1.542323564781866e-05
+508 508  304.9976276535647
+2794 508  0.002258865500111695
+509 509  54003.50361334153
+2796 509  1.584972253664808e-05
+510 510  368.9056527147304
+2798 510  0.001923496708434376
+511 511  344.0225308872387
+2800 511  0.002080184625715104
+512 512  12240.66072400652
+2802 512  5.363623218723445e-05
+513 513  11369.8888764044
+2804 513  6.526480001887723e-05
+514 514  309.7875821354581
+2806 514  0.002231758090236919
+515 515  94787.22605972025
+2808 515  1.108814355667258e-05
+516 516  355.6555275880388
+2810 516  0.002084868727865555
+517 517  360.6022900582442
+2812 517  0.00206178424314167
+518 518  355.4337714610726
+2814 518  0.002069382114290299
+519 519  334.4517329597904
+2816 519  0.00221663957128552
+520 520  32832.4667624466
+2818 520  2.660882184061133e-05
+521 521  353.9194869404473
+2820 521  0.002054509126465163
+522 522  349.355465849854
+2822 522  0.00206825652931222
+523 523  12627.87425316374
+2824 523  6.153966619548327e-05
+524 524  354.8765898635783
+2826 524  0.002059372058053505
+525 525  31988.57268108342
+2828 525  2.43735293056645e-05
+526 526  342.219524224546
+2830 526  0.001949465632114935
+527 527  10832.87966443122
+2832 527  6.633863604418218e-05
+528 528  304.1009320395703
+2834 528  0.00226068439540678
+529 529  31702.22576392769
+2836 529  2.851329636555464e-05
+530 530  359.8809943352537
+2838 530  0.002055759561026759
+531 531  329.6921341161949
+2840 531  0.002230330410563787
+532 532  11450.97213019816
+2842 532  6.29130756763182e-05
+533 533  318.0042354608307
+2844 533  0.00218424136016743
+534 534  27709.67702660837
+2846 534  3.176046354497157e-05
+535 535  339.0527161384249
+2848 535  0.002212413586388379
+536 536  313.3476017870799
+2850 536  0.002298717079426249
+537 537  11176.18201130113
+2852 537  6.612709803152977e-05
+538 538  348.4420904138382
+2854 538  0.002023889893301537
+539 539  12126.38580778946
+2856 539  5.644326073895785e-05
+540 540  2958.127902589717
+2858 540  0.0002109624490132657
+541 541  2964.436629626178
+2860 541  0.0002180184147619238
+542 542  2996.739499442594
+2862 542  0.0002168279126185967
+543 543  29733.86082774269
+2864 543  2.40913445823822e-05
+544 544  10811.18802228489
+2866 544  6.434988345650326e-05
+545 545  3088.650140110554
+2868 545  0.0001989115751638596
+546 546  43357.2059155883
+2870 546  1.820335133825889e-05
+547 547  449.6511458717897
+2872 547  0.001881561431239332
+548 548  442.4829323374114
+2874 548  0.001907467106720621
+549 549  437.9884858052015
+2876 549  0.001906610501894908
+550 550  435.6203755854508
+2878 550  0.001949643189531951
+551 551  1062.514444456655
+2880 551  0.0008441117886190319
+552 552  11331.61710240373
+2882 552  5.675680462998339e-05
+553 553  97348.34299694267
+2884 553  8.254916480290429e-06
+554 554  3296.824460547154
+2886 554  0.0002143832494831788
+555 555  3262.587208018637
+2888 555  0.0002152290952202552
+556 556  11460.05996713048
+2890 556  6.296834363466511e-05
+557 557  3283.717094535645
+2892 557  0.000202070761556368
+558 558  2964.047779054717
+2894 558  0.0002137229690333978
+559 559  3225.320517392855
+2896 559  0.0002072120632641939
+560 560  2629.795751286473
+2898 560  0.0002442514875826953
+561 561  2504.422502672365
+2900 561  0.0002451718016401516
+562 562  11375.0987875235
+2902 562  6.341211780697017e-05
+563 563  659.5010114325853
+2904 563  0.001094729169089652
+564 564  172853.3273411868
+2906 564  4.25003180262438e-06
+565 565  1299.65158737097
+2908 565  0.0006968247424079914
+566 566  1769.533096935914
+2910 566  0.0005043323519647758
+567 567  1857.658592326778
+2912 567  0.0004814844967328351
+568 568  1796.9604238847
+2914 568  0.000498175840325201
+569 569  1840.063636745398
+2916 569  0.0004883751107654518
+570 570  1849.375388733017
+2918 570  0.0004874751788934755
+571 571  1814.027305005168
+2920 571  0.0005051537934849839
+572 572  3302.00751149913
+2922 572  0.0001984180754674663
+573 573  33030.38812900725
+2924 573  1.97916651121529e-05
+574 574  989.5349868962539
+2926 574  0.0008873951493023308
+575 575  774.1311082092107
+2928 575  0.001156885426737085
+576 576  1432.363937054055
+2930 576  0.0006093535781038161
+577 577  1398.397543966203
+2932 577  0.0006241449559768249
+578 578  2838.252598474493
+2934 578  0.0002261787363921484
+579 579  11664.9498685661
+2936 579  5.786973344046168e-05
+580 580  2783.642711654918
+2938 580  0.0002266107658869627
+581 581  10476.42856352934
+2940 581  6.870255487037702e-05
+582 582  3094.131406781226
+2942 582  0.0002165795532411714
+583 583  2844.720693115951
+2944 583  0.000236616467506408
+584 584  29887.0903280384
+2946 584  2.680443331181534e-05
+585 585  3272.63182646826
+2948 585  0.0001816346318888309
+586 586  28488.90946021381
+2950 586  2.52662353434824e-05
+587 587  20901.34326837456
+2952 587  3.479200106475429e-05
+588 588  535.6042576018839
+2954 588  0.00171173667699996
+589 589  331.738044319503
+2956 589  0.002448783148342478
+590 590  336.6056337904003
+2958 590  0.002415983069022926
+591 591  22518.10747090007
+2960 591  3.109764001755754e-05
+592 592  1423.42680018202
+2962 592  0.0006372419289340028
+593 593  2970.844960597701
+2964 593  0.0002111507457449779
+594 594  11199.52723236525
+2966 594  6.036731193060422e-05
+595 595  3053.350515880293
+2968 595  0.0002097250549737993
+596 596  2965.11062180003
+2970 596  0.0002081199033744698
+597 597  2792.729347961497
+2972 597  0.000227180506082851
+598 598  50291.39569270392
+2974 598  1.541997157145382e-05
+599 599  63409.56014851279
+2976 599  1.355929089197479e-05
+600 600  91095.81091672303
+2978 600  8.800095713229208e-06
+601 601  32225.37753384992
+2980 601  2.42057119496486e-05
+602 602  615.9733663356598
+2982 602  0.001228820363981551
+603 603  711.6358822668429
+2984 603  0.001028630443507459
+604 604  3580.302855317203
+2986 604  0.0001942101919334908
+605 605  11490.44673910205
+2988 605  6.122274390669451e-05
+606 606  2750.008657491529
+2990 606  0.0002174236966432122
+607 607  3451.539467902673
+2992 607  0.0002089999771376453
+608 608  3137.126787941659
+2994 608  0.0002225826697345396
+609 609  2854.888824240245
+2996 609  0.0002239401818748291
+610 610  52092.02318409094
+2998 610  1.553974479996341e-05
+611 611  26761.07923504168
+3000 611  2.716807484438463e-05
+612 612  420.7511285135843
+3002 612  0.001917255257562923
+613 613  650.3130828662307
+3004 613  0.001120828337875203
+614 614  2774.246907764889
+3006 614  0.0002241843674551126
+615 615  25664.64907877881
+3008 615  2.762658094449223e-05
+616 616  10943.99031665452
+3010 616  6.07496359260314e-05
+617 617  3049.229711901468
+3012 617  0.0002239249658143049
+618 618  2975.215640901863
+3014 618  0.000236317776079967
+619 619  29911.48422004574
+3016 619  2.356441270887905e-05
+620 620  10631.94204644858
+3018 620  6.37038810105057e-05
+621 621  110546.9747757736
+3020 621  6.279195576451717e-06
+622 622  46653.65664652479
+3022 622  1.580870884158368e-05
+623 623  2542.159037952807
+3024 623  0.000365824983912368
+624 624  2477.070603714911
+3026 624  0.0003753799864904563
+625 625  2410.630547995882
+3028 625  0.0003868371757116728
+626 626  55880.82357885133
+3030 626  1.347322955615387e-05
+627 627  2808.479972699512
+3032 627  0.000232641750217292
+628 628  4046.637575191848
+3034 628  0.0001758353255510753
+629 629  3435.556124411205
+3036 629  0.0001960485134623598
+630 630  2620.689705232281
+3038 630  0.0002387670275501264
+631 631  136545.9835998747
+3040 631  5.168868543624866e-06
+632 632  46117.51194107011
+3042 632  1.571190151769112e-05
+633 633  3478.333289951324
+3044 633  0.000264159408233615
+634 634  3610.601323354378
+3046 634  0.0002540803250229119
+635 635  3197.472226001639
+3048 635  0.000215751906695563
+636 636  2849.466688128075
+3050 636  0.0002173382328317587
+637 637  2974.094627408606
+3052 637  0.0002149933824167279
+638 638  2939.646723725627
+3054 638  0.0002247913073730503
+639 639  27700.17077445688
+3056 639  2.871344757710019e-05
+640 640  2943.98374212603
+3058 640  0.0002236223011459347
+641 641  2906.631523203411
+3060 641  0.0002168929556922801
+642 642  2995.321954809203
+3062 642  0.000214433397151115
+643 643  3051.905781306283
+3064 643  0.0002118857100087749
+644 644  10513.04333965713
+3066 644  6.668929891919444e-05
+645 645  614.5937399437895
+3068 645  0.001216300194894553
+646 646  2973.500696502688
+3070 646  0.0002291693448236669
+647 647  103359.3317103586
+3072 647  6.287606542843668e-06
+648 648  28018.13817086773
+3074 648  2.413112609046681e-05
+649 649  1058.812258185703
+3076 649  0.000863574471068328
+650 650  2107.405284921758
+3078 650  0.0004260522383241782
+651 651  2147.1162080411
+3080 651  0.0004160262001499919
+652 652  2190.370673597265
+3082 652  0.0004138991064746598
+653 653  2194.259475979664
+3084 653  0.0004130347731292516
+654 654  14354.53982761835
+3086 654  4.565100404794502e-05
+655 655  3345.895067779663
+3088 655  0.0001830830851796531
+656 656  26580.32075562833
+3090 656  2.73993020604243e-05
+657 657  10680.735828132
+3092 657  6.664285539255191e-05
+658 658  8883.729815115174
+3094 658  6.691396143939732e-05
+659 659  2144.644070458367
+3096 659  0.0004190976423508806
+660 660  2908.256690819458
+3098 660  0.0002170672928360428
+661 661  88977.29697900864
+3100 661  9.006559059406882e-06
+662 662  428.9764832710101
+3102 662  0.001934718172261428
+663 663  1362.23750906171
+3104 663  0.0006652306770844735
+664 664  418.8706309843425
+3106 664  0.001967551209388498
+665 665  407.63375000375
+3108 665  0.002041158694041396
+666 666  420.4934429160988
+3110 666  0.00198259809923097
+667 667  415.8298032795154
+3112 667  0.001984600471653515
+668 668  411.4330820521244
+3114 668  0.002041914114836727
+669 669  2874.785159204738
+3116 669  0.0002175189436278616
+670 670  3116.016455969895
+3118 670  0.0002195538691403081
+671 671  3244.753587025468
+3120 671  0.0002056555622572706
+672 672  13059.49527603733
+3122 672  5.456576583106384e-05
+673 673  2949.951202284087
+3124 673  0.0002265618216088804
+674 674  3054.687846803039
+3126 674  0.0002149306512683066
+675 675  11324.05490753369
+3128 675  6.434492811419656e-05
+676 676  13071.5232311955
+3130 676  5.150654637627023e-05
+677 677  11785.53208636935
+3132 677  5.617823458389736e-05
+678 678  32775.36602589959
+3134 678  2.404372801287854e-05
+679 679  39222.89284083748
+3136 679  1.717879208496453e-05
+680 680  1503.758106070313
+3138 680  0.0006055238470906594
+681 681  1378.090071926837
+3140 681  0.00066417680046111
+682 682  2592.42319904756
+3142 682  0.000254445809205194
+683 683  2963.215504938536
+3144 683  0.0002125568346125282
+684 684  10644.19524912137
+3146 684  6.56850785054022e-05
+685 685  30057.28923552878
+3148 685  2.157615448738946e-05
+686 686  27181.23035514305
+3150 686  2.58604492209297e-05
+687 687  3640.877420444504
+3152 687  0.0002595670747658185
+688 688  3276.800741490974
+3154 688  0.0002022003039771082
+689 689  2678.597600156133
+3156 689  0.0002396620483604543
+690 690  12521.32779190394
+3158 690  5.947442365670711e-05
+691 691  82671.96984328679
+3160 691  1.034576531382441e-05
+692 692  12142.75930045041
+3162 692  5.61734866346371e-05
+693 693  33049.83938251837
+3164 693  2.097430373839078e-05
+694 694  1700.395238372509
+3166 694  0.0005656688027113958
+695 695  1729.727558501482
+3168 695  0.0005436438887964516
+696 696  2843.862888880541
+3170 696  0.0002205299244709283
+697 697  13285.90212468715
+3172 697  5.567172963433427e-05
+698 698  3082.062352464869
+3174 698  0.0002104689560761738
+699 699  1152.589192988617
+2776 699 -0.0006211735898036456
+700 700  14.56563684762146
+2778 700 -0.04477438386005135
+701 701  551.9019057698251
+2780 701 -0.001308882018352955
+702 702  15.34156238358545
+2782 702 -0.04330235585374924
+703 703  502.9699823554473
+2784 703 -0.001380044009109575
+704 704  14.57507880584921
+2786 704 -0.04536606994919442
+705 705  1227.046573530302
+2788 705 -0.0005847977484658657
+706 706  13.96479510793963
+2790 706 -0.04537018025868264
+707 707  2399.645244972722
+2792 707 -0.0003225540370318406
+708 708  13.35583751483671
+2794 708 -0.04724019321521711
+709 709  2364.357089468501
+2796 709 -0.0003315309935129994
+710 710  16.15126512553604
+2798 710 -0.04023406550441066
+711 711  15.06184515529529
+2800 711 -0.04351148837855855
+712 712  535.9734926386114
+2802 712 -0.001121798254310583
+713 713  497.8514378756086
+2804 713 -0.001364992385095566
+714 714  13.56479446285482
+2806 714 -0.04667596022758146
+715 715  4151.705299169505
+2808 715 -0.0002318355913094673
+716 716  15.57781371370519
+2810 716 -0.04359131642342973
+717 717  15.79445420648206
+2812 717 -0.04310861514976851
+718 718  15.5680941408988
+2814 718 -0.04326749513063304
+719 719  14.64909639136468
+2816 719 -0.04634637060467815
+720 720  1437.359893683087
+2818 720 -0.0005566159660873793
+721 721  15.49412406489104
+2820 721 -0.04297719715336602
+722 722  15.29430534730055
+2822 722 -0.04326473059133334
+723 723  553.015642283468
+2824 723 -0.001286901060450809
+724 724  15.5412034071959
+2826 724 -0.04306501782502264
+725 725  1400.415076739031
+2828 725 -0.0005098568126613753
+726 726  14.98190900890695
+2830 726 -0.04077980858523558
+727 727  474.3193833460493
+2832 727 -0.001387501933997578
+728 728  13.31541318278743
+2834 728 -0.0472821775742908
+729 729  1389.181066223353
+2836 729 -0.000595912541544476
+730 730  15.76987607017677
+2838 730 -0.04296426793696614
+731 731  14.44700532192974
+2840 731 -0.04661257133778172
+732 732  501.3474841985601
+2842 732 -0.00131594329597764
+733 733  13.92297779091816
+2844 733 -0.0456871850050998
+734 734  1214.218844112899
+2846 734 -0.0006637811766721075
+735 735  14.85707135949721
+2848 735 -0.04623851729751089
+736 736  13.73083631456164
+2850 736 -0.0480417201738854
+737 737  489.423527146858
+2852 737 -0.001382881004978853
+738 738  15.2588658622997
+2854 738 -0.04232451806232144
+739 739  531.0418197270566
+2856 739 -0.001180351155434611
+740 740  129.5068808402499
+2858 740 -0.004412880081124752
+741 741  129.8684401467052
+2860 741 -0.004557562649114181
+742 742  131.2048029587701
+2862 742 -0.00453531126448616
+743 743  1302.252445734669
+2864 743 -0.000503749724503456
+744 744  473.4897090200385
+2866 744 -0.001345574214205537
+745 745  135.2081754370441
+2868 745 -0.004161190641349405
+746 746  1898.376956390851
+2870 746 -0.0003807362332910267
+747 747  19.68524035323859
+2872 747 -0.03935916995322433
+748 748  19.37192182386572
+2874 748 -0.03990008471308273
+749 749  19.17464001330935
+2876 749 -0.03988317432312981
+750 750  19.07146984270037
+2878 750 -0.04078229766318428
+751 751  46.50815516450294
+2880 751 -0.01766023403411908
+752 752  496.0696673366953
+2882 752 -0.001187293521703485
+753 753  4264.002291737392
+2884 753 -0.0001725925911988493
+754 754  144.4062552135481
+2886 754 -0.004482285199688079
+755 755  142.856795105637
+2888 755 -0.004501500167327725
+756 756  502.1411266418864
+2890 756 -0.00131608856808379
+757 757  143.809453707777
+2892 757 -0.004225502698148476
+758 758  129.7645458351089
+2894 758 -0.004470671113542863
+759 759  141.2719850254936
+2896 759 -0.004332413344118552
+760 760  115.1377796124792
+2898 760 -0.005108986069369431
+761 761  109.6482832462316
+2900 761 -0.005128263093238606
+762 762  498.1384984193623
+2902 762 -0.001326090781396694
+763 763  28.88113510448416
+2904 763 -0.02289312059755941
+764 764  7566.404662091983
+2906 764 -8.891414554671908e-05
+765 765  56.88792768395655
+2908 765 -0.01457874205479662
+766 766  77.45545008797657
+2910 766 -0.01055148350464198
+767 767  81.31286689729322
+2912 767 -0.01007346783371174
+768 768  78.65599046567311
+2914 768 -0.01042267889641481
+769 769  80.54263880602282
+2916 769 -0.01021762623013111
+770 770  80.95030682650238
+2918 770 -0.0101988030095205
+771 771  79.4031123301624
+2920 771 -0.01056867448637758
+772 772  144.589663327655
+2922 772 -0.004149701089090458
+773 773  1445.879548014593
+2924 773 -0.0004140519179016492
+774 774  43.31359687203814
+2926 774 -0.018565803370506
+775 775  33.88498745631753
+2928 775 -0.02420400608865609
+776 776  62.69699251650451
+2930 776 -0.0127487047039688
+777 777  61.21015815931671
+2932 777 -0.01305815905236744
+778 778  124.3135266004198
+2934 778 -0.004729137568385933
+779 779  510.8281122598189
+2936 779 -0.001210195935425439
+780 780  121.8893458280569
+2938 780 -0.004739384227582458
+781 781  459.0023997967518
+2940 781 -0.001436061405957997
+782 782  135.5577798856157
+2942 782 -0.004527229250447725
+783 783  124.5850880373814
+2944 783 -0.004947831503020766
+784 784  1308.79910849815
+2946 784 -0.0005605482647067403
+785 785  143.266796655959
+2948 785 -0.003799635403101938
+786 786  1247.20819111549
+2950 786 -0.0005285298443190662
+787 787  915.587302032126
+2952 787 -0.0007273659114068365
+788 788  23.44461437890571
+2954 788 -0.03581192339833519
+789 789  14.52964354834627
+2956 789 -0.05120231699174218
+790 790  14.7428495800686
+2958 790 -0.05051641835691328
+791 791  985.8131194556391
+2960 791 -0.0006505150403953535
+792 792  62.30582574921108
+2962 792 -0.01333216968778421
+793 793  130.1563347695763
+2964 793 -0.004413757630275583
+794 794  490.4766578141998
+2966 794 -0.001262348984622675
+795 795  133.6846020547174
+2968 795 -0.0043867077924628
+796 796  129.8198791801861
+2970 796 -0.004353179353834643
+797 797  122.2740480150363
+2972 797 -0.004751796082881135
+798 798  2202.499341692987
+2974 798 -0.0003224467177012544
+799 799  2776.772045297668
+2976 799 -0.0002835617900106833
+800 800  3990.209933402476
+2978 800 -0.0001839880046561397
+801 801  1411.530208613113
+2980 801 -0.0005060858793490622
+802 802  26.98155943533092
+2982 802 -0.0256910783553563
+803 803  31.17168259281084
+2984 803 -0.02150584016679073
+804 804  156.8302265818825
+2986 804 -0.004060328656766309
+805 805  503.478550922922
+2988 805 -0.001279590719005279
+806 806  120.3926483247221
+2990 806 -0.004548139563342544
+807 807  151.1811060822011
+2992 807 -0.004369780664630857
+808 808  137.3766223350889
+2994 808 -0.004654857896170716
+809 809  125.0168786688164
+2996 809 -0.0046832586311171
+810 810  2281.980553329121
+2998 810 -0.0003248647658803822
+811 811  1172.468255921368
+3000 811 -0.0005678888436367579
+812 812  18.43172505182493
+3002 812 -0.04008099176668933
+813 813  28.4881341010781
+3004 813 -0.02343137807364655
+814 814  121.4755450558781
+3006 814 -0.004688747582473832
+815 815  1123.849334069723
+3008 815 -0.000577763345796909
+816 816  479.1806255491858
+3010 816 -0.001270615822728721
+817 817  133.5395961246254
+3012 817 -0.004682511701431127
+818 818  130.3072183776819
+3014 818 -0.004941326446914052
+819 819  1309.674633660607
+3016 819 -0.0004928617663929631
+820 820  465.5750915799122
+3018 820 -0.001332244617993727
+821 821  4839.279007743422
+3020 821 -0.0001313598373684713
+822 822  2042.444254109507
+3022 822 -0.0003306921046308134
+823 823  111.2745034744868
+3024 823 -0.007653672082923691
+824 824  108.4255862884054
+3026 824 -0.007853582503672928
+825 825  105.5173052591423
+3028 825 -0.008093282368846372
+826 826  2447.881093558142
+3030 826 -0.0002816718078576912
+827 827  123.043364870394
+3032 827 -0.004862970717328313
+828 828  177.2603681257095
+3034 828 -0.003676111486126634
+829 829  150.4416465818894
+3036 829 -0.004100030848673585
+830 830  114.7959760832575
+3038 830 -0.004991865811779856
+831 831  5977.377151743068
+3040 831 -0.0001081325281897822
+832 832  2018.846701667913
+3042 832 -0.0003286879780215192
+833 833  152.2525514057391
+3044 833 -0.005526660401051663
+834 834  158.0422991895653
+3046 834 -0.005315791513777863
+835 835  139.9973221612445
+3048 835 -0.004512686485115116
+836 836  124.7606843163849
+3050 836 -0.004545857556180593
+837 837  130.245884491113
+3052 837 -0.004495843450029012
+838 838  128.7044217733526
+3054 838 -0.004701903383526127
+839 839  1213.001287054423
+3056 839 -0.0006004851677575757
+840 840  128.9202187518812
+3058 840 -0.004676543895854219
+841 841  127.3181790992209
+3060 841 -0.004534651717942856
+842 842  131.1991130288449
+3062 842 -0.004483357193906658
+843 843  133.6224694089021
+3064 843 -0.004431863029243509
+844 844  460.6460436124097
+3066 844 -0.001393861160763459
+845 845  26.93006549536226
+3068 845 -0.02542108586614161
+846 846  130.2261602355345
+3070 846 -0.00479207219007637
+847 847  4524.581979034278
+3072 847 -0.0001315370479632399
+848 848  1226.527167736951
+3074 848 -0.0005048145130488554
+849 849  46.34605859323295
+3076 849 -0.01806741737098674
+850 850  92.24479042998608
+3078 850 -0.008913735735757969
+851 851  93.982822597683
+3080 851 -0.008703965704582829
+852 852  95.87624315946061
+3082 852 -0.008659467467314986
+853 853  96.04637156402802
+3084 853 -0.008641380035250229
+854 854  628.3830407037481
+3086 854 -0.0009550099367320633
+855 855  146.4684393667403
+3088 855 -0.003830083690845991
+856 856  1163.621319673929
+3090 856 -0.0005731648617574944
+857 857  467.6777711097528
+3092 857 -0.001393809984053777
+858 858  388.8759174266569
+3094 858 -0.001399882851998593
+859 859  93.8747077042681
+3096 859 -0.008768229606671188
+860 860  127.3766293477706
+3098 860 -0.004538751110163672
+861 861  3896.088451758585
+3100 861 -0.0001883669172606672
+862 862  18.78162023672823
+3102 862 -0.04046801465160255
+863 863  59.62741833721761
+3104 863 -0.013917741230942
+864 864  18.33794849296114
+3106 864 -0.04115730664256551
+865 865  17.84714838795915
+3108 865 -0.04269440434322424
+866 866  18.40864642998536
+3110 866 -0.04147286882309242
+867 867  18.2048463587055
+3112 867 -0.04151398331184361
+868 868  18.01235820080016
+3114 868 -0.04271285292823011
+869 869  125.8741363979673
+3116 869 -0.004549463760823265
+870 870  136.4650122181682
+3118 870 -0.004591087669232101
+871 871  142.1052022666453
+3120 871 -0.004300400914613589
+872 872  571.945628211343
+3122 872 -0.001141007317637787
+873 873  129.1889516897052
+3124 873 -0.004737768664129753
+874 874  133.7381239792849
+3126 874 -0.004495757699438218
+875 875  496.2094181663953
+3128 875 -0.00134479006259534
+876 876  572.4299104481121
+3130 876 -0.001077116562284023
+877 877  516.1146241735985
+3132 877 -0.001174808882995873
+878 878  1435.341963192156
+3134 878 -0.0005027950584220524
+879 879  1717.036372304932
+3136 879 -0.0003593726340029264
+880 880  65.8220278566012
+3138 880 -0.01266857423038254
+881 881  60.32135886269092
+3140 881 -0.0138956857388481
+882 882  113.5253716363662
+3142 882 -0.005321133682834458
+883 883  129.7344804957765
+3144 883 -0.004446072187239606
+884 884  466.2221104904835
+3146 884 -0.001373362866427219
+885 885  1315.933645876123
+3148 885 -0.0004513182899802171
+886 886  1190.299232364331
+3150 886 -0.0005408106592783898
+887 887  159.3673875338714
+3152 887 -0.00543058103861438
+888 888  143.4872826445848
+3154 888 -0.004228762010147807
+889 889  117.2883655695939
+3156 889 -0.005012410749634397
+890 890  548.6677100663762
+3158 890 -0.001243009510545163
+891 891  3620.107153990338
+3160 891 -0.0002163685141143933
+892 892  531.5867015861088
+3162 892 -0.001175077666104349
+893 893  1446.906579587341
+3164 893 -0.0004387420733795854
+894 894  74.42927180561981
+3166 894 -0.01183475115907131
+895 895  75.71310294960942
+3168 895 -0.01137394716537204
+896 896  124.5159887196852
+3170 896 -0.004612588747673908
+897 897  581.8432234704047
+3172 897 -0.001164167913108969
+898 898  134.945726372956
+3174 898 -0.004402141629369785
+899 899  92.82799557362462
+900 899 -42.41338650819053
+1017 899 -4.459108211401092
+1022 899 -9.416903571706824
+1091 899 -7.681384228215313
+1099 899  7.772276933160072e-09
+1100 899 -8.952334706879839e-10
+1217 899 -0.02024470978393711
+1222 899  0.04377716463426287
+1291 899  0.001908677924063157
+3175 899  1.206910571484867
+3176 899 -0.964177128949417
+3177 899 -0.2280781806545443
+3178 899  0.182145342635277
+3179 899 -0.2666484393421067
+3180 899  0.2976922456375642
+3181 899  0.04150889810448934
+3182 899 -0.04691994301089037
+3183 899  0.2992074794146934
+3184 899 -0.3082137931705407
+3185 899 -0.04283097769707251
+3186 899  0.0469964530296306
+3187 899  0.3659411458715927
+3188 899 -0.3866656521166111
+3189 899 -0.05430488575489788
+3190 899  0.05564749987182292
+899 900 -42.41338650819053
+900 900  23.04353699951762
+1099 900  4.240457540660714e-09
+1100 900  4.86383655573519e-10
+3175 900 -0.6557243053380895
+3176 900  0.5238452566749743
+3177 900  0.1239167259828344
+3178 900 -0.09896104242681487
+901 901  85.65611988001912
+902 901 -41.9825934917825
+955 901 -8.12408466218251
+1035 901 -10.38899270243397
+1101 901  3.180253560497448e-09
+1102 901 -6.74794720101346e-10
+1155 901 -0.001782981198905943
+1235 901  0.00410810176178595
+3191 901  1.30916366888589
+3192 901 -1.095661997402904
+3193 901 -0.2127901063566257
+3194 901  0.1772971086094612
+3195 901  0.5107916688939119
+3196 901 -0.477229303927426
+3197 901 -0.06890385651214917
+3198 901  0.06209868527935691
+3199 901 -0.4283133356578697
+3200 901  0.4582406516171209
+3201 901  0.06118773248632485
+3202 901 -0.06384302758694906
+901 902 -41.9825934917825
+902 902  23.18177320674882
+1101 902  1.757215706277293e-09
+1102 902  3.725840469481767e-10
+3191 902 -0.7228885291144198
+3192 902  0.6049980674938733
+3193 902  0.1174975525597555
+3194 902 -0.0978991772419131
+903 903  91.29809696612688
+904 903 -42.46263947154299
+946 903 -20.90760235760421
+962 903 -7.74755303268057
+1103 903 -3.473398027531438e-10
+1104 903  1.714809627628711e-09
+1146 903 -0.1213569242584619
+1162 903  0.03440530029256823
+3203 903  1.330583469556757
+3204 903 -1.114733838300572
+3205 903 -0.2594613641476355
+3206 903  0.2165473412317848
+3207 903  0.3437697039398471
+3208 903 -0.3969711646061925
+3209 903 -0.04965019816243767
+3210 903  0.05175900233798363
+3211 903 -0.5852368430454002
+3212 903  0.5883155616478667
+3213 903  0.08053041805249524
+3214 903 -0.07227285678601035
+903 904 -42.46263947154299
+904 904  21.6773645691523
+1103 904 -1.756064599289786e-10
+1104 904 -8.754584834846924e-10
+3203 904 -0.6792687245591004
+3204 904  0.5690765366396284
+3205 904  0.1324561697400607
+3206 904 -0.1105483719365449
+905 905  44.46488922306026
+906 905 -21.00451468958215
+984 905 -6.201273818830178
+999 905 -5.02376469529283
+1046 905 -4.662192987407709
+1105 905  9.628000022243732e-09
+1106 905 -3.411716797963038e-09
+1184 905 -0.005320952890435077
+1199 905  0.01084140050204591
+1246 905  0.004420674229770605
+3215 905  0.6335410282029971
+3216 905 -0.5055179512196509
+3217 905 -0.1182378866958986
+3218 905  0.09380808290631094
+3219 905 -0.3154842751742941
+3220 905  0.3125038943229862
+3221 905  0.04682645461498169
+3222 905 -0.04577764559625989
+3223 905 -0.2710351994059734
+3224 905  0.2607673136961148
+3225 905  0.03931244733723651
+3226 905 -0.03642270094644444
+3227 905 -0.1808814651699105
+3228 905  0.2018352389125933
+3229 905  0.02556035874667221
+3230 905 -0.02967706389761363
+905 906 -21.00451468958215
+906 906  12.49019441336576
+1105 906  5.783544754223158e-09
+1106 906  2.02872774135443e-09
+3215 906 -0.3767309422255076
+3216 906  0.3006028743159249
+3217 906  0.07030936984151577
+3218 906 -0.05578235013745535
+907 907  54.06254273226385
+908 907 -21.78672677995597
+1022 907 -5.018475605696647
+1029 907 -8.245220727572519
+1039 907 -6.271758063514705
+1091 907 -6.778441910054136
+1107 907  6.66961486039952e-09
+1108 907 -5.105909028912947e-09
+1222 907  0.1093957121598284
+1229 907 -0.2266174204579067
+1239 907  0.001389181240069544
+1291 907  0.03029500957792949
+3231 907  0.8685875283864275
+3232 907 -0.6665887169130639
+3233 907 -0.1206085521049
+3234 907  0.09140412991465983
+3235 907 -0.2641123399251132
+3236 907  0.264840773439715
+3237 907  0.03852583269225138
+3238 907 -0.03462807079041883
+3239 907  0.3232358427975743
+3240 907 -0.2805720782281593
+3241 907 -0.0432552053525782
+3242 907  0.03751584297835035
+3243 907 -0.4391440793017472
+3244 907  0.4521140880195338
+3245 907  0.06421136233811769
+3246 907 -0.06661321196104331
+3247 907 -0.3568635586597714
+3248 907  0.3488833124621628
+3249 907  0.04742106470294021
+3250 907 -0.04590890985801144
+907 908 -21.78672677995597
+908 908  13.23781014201439
+1107 908  4.15408552001395e-09
+1108 908  3.102507639685115e-09
+3231 908 -0.5277615543631613
+3232 908  0.405025269027891
+3233 908  0.0732828354634789
+3234 908 -0.05553796721970868
+909 909  131.8476537947564
+910 909 -31.40401815845072
+911 909 -41.64250304196321
+913 909 -3.602447283387563
+991 909 -7.289318276733248
+1056 909 -7.969315195327064
+1109 909 -2.143182423197665e-09
+1110 909  1.838456387126541e-09
+1111 909  2.79507181288885e-10
+1113 909 -0.05042820426988697
+1191 909  0.1091861990651673
+1256 909  0.08752981633481208
+3251 909  0.8308549968215676
+3252 909 -0.6468743896711402
+3253 909 -0.1180094345852181
+3254 909  0.09155689421138136
+3255 909  1.178287817096909
+3256 909 -0.9115398593145428
+3257 909 -0.1896378964682771
+3258 909  0.1461665781785075
+3259 909 -0.3808984499831934
+3260 909  0.4177027925543588
+3261 909  0.05765705741137306
+3262 909 -0.05614325502160191
+3263 909 -0.1870913236157566
+3264 909  0.1922808728830558
+3265 909  0.02414079055417204
+3266 909 -0.02648423136389328
+3267 909  0.2900679375737975
+3268 909 -0.2829982094483208
+3269 909 -0.04144109370586249
+3270 909  0.03752795575477595
+909 910 -31.40401815845072
+910 910  16.77746287302074
+1109 910 -1.136426674985813e-09
+1110 910 -9.82260162096793e-10
+3251 910 -0.4438807412776823
+3252 910  0.3455898859598944
+3253 910  0.06304603751777908
+3254 910 -0.04891388055328518
+909 911 -41.64250304196321
+911 911  26.71963658975455
+1109 911 -1.360478396605913e-09
+1111 911 -1.79330175709147e-10
+3255 911 -0.7560405827089444
+3256 911  0.5848835203070897
+3257 911  0.1216799016922899
+3258 911 -0.0937868179023721
+912 912  32.26392170161625
+913 912 -2.93755601229593
+1019 912 -10.40260563970382
+1047 912 -39.60174281254437
+1112 912 -1.066833821630753e-08
+1113 912  0.1005159371417935
+1219 912 -0.2400553180149196
+1247 912  0.0912522352120306
+3271 912  2.234351633778104
+3272 912 -2.216197503914771
+3273 912 -0.05370982180576693
+3274 912  0.02389497166849069
+3275 912 -0.7145749449009308
+3276 912  0.7042110547013974
+3277 912  0.1018194078112303
+3278 912 -0.06006624651158884
+3279 912 -0.3263007639770462
+3280 912  0.2841185889321567
+3281 912  0.03520668996055829
+3282 912 -0.04170737899807132
+909 913 -3.602447283387563
+912 913 -2.93755601229593
+913 913  101.320835605144
+914 913 -37.34674214649647
+1109 913  0.1159831428547418
+1112 913 -0.08371936137436782
+1113 913 -2.733038906299612e-09
+1114 913  3.723713781766946e-09
+3259 913  0.4341815958622278
+3260 913 -0.47613442657843
+3261 913 -0.06572259141693702
+3262 913  0.06399702614503872
+3271 913 -1.277768992783638
+3272 913  1.267387106656292
+3273 913  0.03071528396597896
+3274 913 -0.01366492785641506
+3283 913  1.209832173694697
+3284 913 -1.011959526480044
+3285 913 -0.1965692531354175
+3286 913  0.16260052281627
+913 914 -37.34674214649647
+914 914  14.59973763551054
+1113 914 -1.039897890020569e-09
+1114 914 -1.45571832366187e-09
+3283 914 -0.4729524263951293
+3284 914  0.3955992606815731
+3285 914  0.07684363769328444
+3286 914 -0.0635644459381669
+915 915  59.1165725360048
+916 915 -12.55259619809594
+917 915 -16.44988377383374
+918 915 -16.11142725870809
+919 915 -15.99886411053142
+1007 915 -2.93383316576035
+1018 915 -2.838699038257353
+1115 915  6.983447865122061e-10
+1116 915 -1.486597012156921e-09
+1117 915 -1.235842539415444e-09
+1118 915 -2.074367566606838e-09
+1119 915 -4.957688704010366e-10
+1207 915  0.00435760107926482
+1218 915 -0.00428379170185561
+3287 915  0.7040247465778748
+3288 915 -0.5326975522814722
+3289 915 -0.08449893522421127
+3290 915  0.0636315736602887
+3291 915  0.7089846504761526
+3292 915 -0.5363208804398027
+3293 915 -0.110950911256798
+3294 915  0.08353224393820573
+3295 915  0.6046143966065896
+3296 915 -0.4568430531993541
+3297 915 -0.105707984352315
+3298 915  0.07949206863020782
+3299 915  0.8317530171779519
+3300 915 -0.62581245429748
+3301 915 -0.1227103143041787
+3302 915  0.09182130440361659
+3303 915  0.2917249086810749
+3304 915 -0.2448716520171612
+3305 915 -0.03508775876381117
+3306 915  0.0315126719245443
+3307 915 -0.2357224145100838
+3308 915  0.2784275257111113
+3309 915  0.03219168634554583
+3310 915 -0.03947913814761612
+915 916 -12.55259619809594
+916 916  14.98862208102334
+1115 916  8.336279444520756e-10
+1116 916  1.775087188349289e-09
+3287 916 -0.8406516622461359
+3288 916  0.6360757700302417
+3289 916  0.1008972634833432
+3290 916 -0.07598026692793559
+915 917 -16.44988377383374
+917 917  15.80466419414183
+1115 917  6.706460387029267e-10
+1117 917  1.18738463505963e-09
+3291 917 -0.6811758959436226
+3292 917  0.5152845777429752
+3293 917  0.106599044606048
+3294 917 -0.08025582932800379
+915 918 -16.11142725870809
+918 918  14.85457890021666
+1115 918  6.439443422934232e-10
+1118 918  1.912650926527704e-09
+3295 918 -0.5574485806251291
+3296 918  0.4212048422990773
+3297 918  0.09746173125990555
+3298 918 -0.07329091248500011
+915 919 -15.99886411053142
+919 919  20.19794664128352
+1115 919  8.847848009807535e-10
+1119 919  6.258814577719818e-10
+3299 919 -1.050055987727851
+3300 919  0.7900639988771302
+3301 919  0.1549170217960287
+3302 919 -0.1159208424841469
+920 920  168.1102211381862
+921 920 -52.26634609831998
+922 920 -57.67951941137702
+925 920 -4.489145811322431
+1056 920 -5.122575816198524
+1120 920 -1.795405820542717e-10
+1121 920  4.188971391982932e-11
+1122 920  4.778644147052091e-11
+1125 920  0.00637055249229182
+1256 920 -0.002249858771577174
+3311 920  1.177129754067613
+3312 920 -0.9364953446946004
+3313 920 -0.210903753621177
+3314 920  0.1670042532862615
+3315 920  1.102777759352953
+3316 920 -0.8759432230102847
+3317 920 -0.2314851909580943
+3318 920  0.1829980732903851
+3319 920 -0.2469155357709296
+3320 920  0.2462070999198854
+3321 920  0.0332412618584253
+3322 920 -0.03297048082370924
+3323 920  0.2859457102140647
+3324 920 -0.2856483693432067
+3325 920 -0.03967668007685861
+3326 920  0.03987931942944642
+920 921 -52.26634609831998
+921 921  37.02775797089851
+1120 921 -1.249107484113665e-10
+1121 921 -2.969423318344155e-11
+3311 921 -0.8339300311071588
+3312 921  0.663454125795566
+3313 921  0.1494134127611411
+3314 921 -0.1183130930611414
+920 922 -57.67951941137702
+922 922  37.15332470509485
+1120 922 -1.131374993690315e-10
+1122 922 -3.082351041072684e-11
+3315 922 -0.7103363652127415
+3316 922  0.5642245864034647
+3317 922  0.1491074223715117
+3318 922 -0.1178752338080321
+923 923  71.77914445659717
+924 923 -35.24630110894491
+962 923 -6.993239083401045
+1097 923 -5.94400377216852
+1123 923 -2.446954222579123e-09
+1124 923  4.111979645671227e-10
+1162 923 -0.0301506816774844
+1297 923  0.04698307485545858
+3327 923  1.180123281848412
+3328 923 -0.9904519113108774
+3329 923 -0.2198774630721576
+3330 923  0.1833601469074586
+3331 923 -0.3406461438400099
+3332 923  0.3416270693871428
+3333 923  0.044945398609309
+3334 923 -0.04175041492380615
+3335 923  0.2932964264633682
+3336 923 -0.2928143935251802
+3337 923 -0.04107484230154301
+3338 923  0.03819605890891807
+923 924 -35.24630110894491
+924 924  18.70782102679635
+1123 924 -1.296841745102029e-09
+1124 924 -2.182206637613149e-10
+3327 924 -0.6263787815684199
+3328 924  0.5257061452404348
+3329 924  0.1167052455721185
+3330 924 -0.09732280277379812
+920 925 -4.489145811322431
+925 925  149.6691885281758
+926 925 -43.11528650557934
+991 925 -8.629261156606795
+1056 925 -6.006365689718882
+1120 925 -0.001668075641436621
+1125 925 -2.524945641457776e-09
+1126 925  7.921518996312216e-10
+1191 925  0.02942235136820164
+1256 925 -0.009402178306684716
+3319 925  0.417439106543153
+3320 925 -0.4162414142724755
+3321 925 -0.0561981756523469
+3322 925  0.05574038917549447
+3339 925  0.9158563211506956
+3340 925 -0.7253102290040822
+3341 925 -0.1272016430323864
+3342 925  0.1000961984697644
+3343 925 -0.2028592580467722
+3344 925  0.2043567543949261
+3345 925  0.02578232528300831
+3346 925 -0.02562846525512418
+3347 925 -0.265292539862559
+3348 925  0.2663833810837147
+3349 925  0.03969926645685068
+3350 925 -0.03983694023450458
+925 926 -43.11528650557934
+926 926  13.7808182548285
+1125 926 -7.775043942004345e-10
+1126 926 -2.532467013871553e-10
+3339 926 -0.2927325904424944
+3340 926  0.2318288767653417
+3341 926  0.04065710484656371
+3342 926 -0.03199346752849337
+927 927  75.1841442036542
+928 927 -33.37484565899686
+1022 927 -7.148869153419637
+1038 927 -7.664837333066902
+1127 927  7.788518240070541e-09
+1128 927 -3.969049866547891e-09
+1222 927  0.1769630657333927
+1238 927 -0.09805225877594476
+3351 927  0.8024124670559588
+3352 927 -0.6702504154277392
+3353 927 -0.1672629016238211
+3354 927  0.1384521427653982
+3355 927  0.2713456332689875
+3356 927 -0.2943648723795997
+3357 927 -0.03534560477449201
+3358 927  0.04133866598755555
+3359 927 -0.2905604220304306
+3360 927  0.3082806228234461
+3361 927  0.03965502824588246
+3362 927 -0.03922286341907703
+927 928 -33.37484565899686
+928 928  16.18141419994012
+1127 928  3.886522659257707e-09
+1128 928  1.924533976627174e-09
+3351 928 -0.3890405552638334
+3352 928  0.3249632881958178
+3353 928  0.08109551483106915
+3354 928 -0.06712694619083193
+929 929  39.05304533306739
+930 929 -14.6337394933653
+931 929 -15.13825045655299
+1075 929 -2.942416412227689
+1090 929 -1.959100094899907
+1129 929  2.237529952986428e-10
+1130 929 -2.862770021039296e-10
+1131 929 -9.747780360669367e-11
+1275 929 -0.08503788873061549
+1290 929  0.08038780949826715
+3363 929  0.7226460726801913
+3364 929 -0.5799017536618772
+3365 929 -0.1165066754950316
+3366 929  0.0928673422230218
+3367 929  1.013213317405777
+3368 929 -0.8068876077794779
+3369 929 -0.1379058232777914
+3370 929  0.108959701522947
+3371 929  0.2698019009628373
+3372 929 -0.2582339488597439
+3373 929 -0.03441373334906914
+3374 929  0.0344323188052018
+3375 929  0.3049120194105165
+3376 929 -0.290608095678543
+3377 929 -0.0381463241923688
+3378 929  0.03561931390732355
+929 930 -14.6337394933653
+930 930  11.15515716784196
+1129 930  1.705862118228652e-10
+1130 930  2.182460878685788e-10
+3363 930 -0.550866066704825
+3364 930  0.4420534618421999
+3365 930  0.08881190461157644
+3366 930 -0.07079187097217753
+929 931 -15.13825045655299
+931 931  13.01801095497678
+1129 931  1.983945230321638e-10
+1131 931  8.381051408434814e-11
+3367 931 -0.8713042567736969
+3368 931  0.6938762009132229
+3369 931  0.1185909509790812
+3370 931 -0.09369897742442948
+932 932  54.75374718805336
+933 932 -31.51571782613292
+952 932 -6.505758821695456
+1035 932 -7.710670001509722
+1132 932  5.218769388859457e-09
+1133 932 -4.171099299288272e-10
+1152 932  0.01961434172631453
+1235 932 -0.003579148430443102
+3379 932  0.9950752022868599
+3380 932 -0.8318997011928906
+3381 932 -0.1462347531270238
+3382 932  0.1214268633825941
+3383 932 -0.2179951469697534
+3384 932  0.2168261119599326
+3385 932  0.03071718917257135
+3386 932 -0.02828487750009587
+3387 932 -0.3001546575377701
+3388 932  0.319559566955514
+3389 932  0.04181506051016628
+3390 932 -0.04598524577927123
+932 933 -31.51571782613292
+933 933  21.07935583716719
+1132 933  3.572777296234619e-09
+1133 933  2.789826286653252e-10
+3379 933 -0.6655581949704933
+3380 933  0.556417909169059
+3381 933  0.09780942999031755
+3382 933 -0.081216619435513
+934 934  57.38809028817923
+935 934 -20.99314407546383
+936 934 -18.45626484722149
+956 934 -2.476272106586077
+981 934 -2.488153159316454
+1134 934  1.578068994922965e-10
+1135 934 -1.025071694193969e-10
+1136 934 -5.515803369604555e-10
+1156 934  0.08549340419337453
+1181 934 -0.03038370759243254
+3391 934  1.295303116128931
+3392 934 -1.040527283561455
+3393 934 -0.2014590928700254
+3394 934  0.1611297600105126
+3395 934  0.9236744260690454
+3396 934 -0.7370102242628088
+3397 934 -0.1656030709028884
+3398 934  0.1310381493586493
+3399 934  0.2884753453660877
+3400 934 -0.2781466555799013
+3401 934 -0.03571433811432929
+3402 934  0.03713931211835794
+3403 934  0.3675964442608564
+3404 934 -0.3531403690853147
+3405 934 -0.04937881125291942
+3406 934  0.05012946220403072
+934 935 -20.99314407546383
+935 935  16.57233912676868
+1134 935  1.261234450211646e-10
+1135 935  8.093323233815397e-11
+3391 935 -1.022533948777946
+3392 935  0.8214096444475624
+3393 935  0.1590351780865364
+3394 935 -0.1271985280646792
+934 936 -18.45626484722149
+936 936  11.92179302754235
+1134 936  1.136628013931329e-10
+1136 936  3.563296324671228e-10
+3395 936 -0.5966459314393776
+3396 936  0.4760705063655813
+3397 936  0.1069710232300935
+3398 936 -0.0846438706881978
+937 937  50.46566610983952
+938 937 -23.82298505403072
+983 937 -4.462659499865555
+1057 937 -5.244381887558792
+1137 937  7.386056946612207e-09
+1138 937 -2.261370646294836e-09
+1183 937  0.01353528248246338
+1257 937 -0.02032800065588086
+3407 937  0.8149415052122866
+3408 937 -0.6781849347733043
+3409 937 -0.1243103797855209
+3410 937  0.1039416562056201
+3411 937 -0.2383349916533942
+3412 937  0.254906136411951
+3413 937  0.03694910669518697
+3414 937 -0.0393496559091599
+3415 937  0.2261950636286986
+3416 937 -0.2275356005844802
+3417 937 -0.03205345150204408
+3418 937  0.03282777232623683
+937 938 -23.82298505403072
+938 938  12.36566555585732
+1137 938  3.839734308286324e-09
+1138 938  1.173779629048965e-09
+3407 938 -0.423007195636838
+3408 938  0.3520217163400922
+3409 938  0.06452510371026647
+3410 938 -0.05395242262195776
+939 939  17.63530504057533
+998 939 -9.907975784566792
+1061 939 -24.9290464405827
+1078 939 -6.556163260226376
+1139 939  1.010988515126598e-10
+1198 939  0.02676690359827763
+1261 939  0.1670044571575812
+1278 939 -0.1239759113085325
+3419 939 -0.3462185500007934
+3420 939  0.3224747859897226
+3421 939  0.05123776962395828
+3422 939 -0.04575369741188583
+3423 939 -0.2320144445773898
+3424 939  0.1931415244916508
+3425 939  0.03546099465982042
+3426 939 -0.03079932722685834
+3427 939 -0.2607314758298161
+3428 939  0.2485615317211312
+3429 939  0.03391211355038503
+3430 939 -0.03113925486204759
+940 940  28.84764724886888
+942 940 -14.36339287944117
+1079 940 -24.66315021333685
+1140 940  4.42159031055489e-08
+1142 940 -0.009980420534666863
+1279 940  0.06744124888417391
+3431 940  0.5513261219039329
+3432 940 -0.5523151632588987
+3433 940 -0.07581389194665446
+3434 940  0.07308587747533948
+3435 940  0.3379217843189056
+3436 940 -0.3865529233619078
+3437 940 -0.04416026357748661
+3438 940  0.05348227050940345
+941 941  12.40148734436256
+982 941 -7.190162226521366
+1030 941 -5.832944089578626
+1141 941  1.675542971035782e-08
+1182 941  0.06312548830247269
+1230 941 -0.5157188483780635
+3439 941 -0.5049927482856362
+3440 941  0.5068995173965466
+3441 941  0.07028093491071742
+3442 941 -0.06419032347283443
+3443 941 -0.2880222797088077
+3444 941  0.2839175912746367
+3445 941  0.03627465553054866
+3446 941 -0.04106559201113318
+940 942 -14.36339287944117
+942 942  33.64983902905398
+1098 942 -13.85984777865994
+1140 942  0.1330724731424416
+1142 942  6.210807736162227e-09
+1298 942 -0.1518122571549573
+3431 942 -0.6945907046787592
+3432 942  0.6958367528966991
+3433 942  0.09551447417331302
+3434 942 -0.09207757282087774
+3447 942 -0.6262760807964288
+3448 942  0.6259992432861905
+3449 942  0.08541380566675676
+3450 942 -0.08734801081128452
+943 943  27.16004209586251
+944 943 -1.05251996975513
+953 943 -8.763955681316189
+1000 943 -9.162682012337861
+1085 943 -11.05951146044936
+1143 943 -1.724665529834191e-08
+1144 943  0.4289809294086663
+1153 943  0.02793990583526351
+1200 943  0.01754934032540434
+1285 943 -0.3809398851482149
+3451 943  0.7904980737004598
+3452 943 -0.7569472851527189
+3453 943 -0.01589242009037819
+3454 943  0.008839724875075651
+3455 943  0.389853698082942
+3456 943 -0.4123451764514424
+3457 943 -0.05097074908982863
+3458 943  0.04762898411627194
+3459 943  0.4010834794380356
+3460 943 -0.422257802508621
+3461 943 -0.05298921735679361
+3462 943  0.05159156407864095
+3463 943 -0.3527490893724305
+3464 943  0.3517173794087308
+3465 943  0.05011610583704667
+3466 943 -0.03676343318442028
+943 944 -1.05251996975513
+944 944  21.6787428129644
+959 944 -9.088036752899102
+1020 944 -10.41994192456261
+1143 944 -0.5468169676527013
+1144 944 -3.380368229777275e-08
+1159 944 -0.1049347854825309
+1220 944  1.100657149099352
+3451 944 -1.331243999373356
+3452 944  1.274742551217584
+3453 944  0.02676374501685801
+3454 944 -0.01488660262126709
+3467 944  0.5565316981856262
+3468 944 -0.5208443815767149
+3469 944 -0.07578302580183896
+3470 944  0.06567306281750092
+3471 944  0.3643483277204766
+3472 944 -0.3648201588099976
+3473 944 -0.05244155811268235
+3474 944  0.05453932493439867
+945 945  36.53926528372666
+952 945 -13.77354529336417
+964 945 -48.99930559793759
+1145 945  1.768079509190112e-09
+1152 945 -0.05125570147098663
+1164 945  0.04539053030042879
+3475 945 -0.4558445477281668
+3476 945  0.4280106222713313
+3477 945  0.06127856833043964
+3478 945 -0.05909584881736413
+3479 945 -0.374795164020496
+3480 945  0.2993620588617579
+3481 945  0.05493458803622932
+3482 945 -0.04272179200188883
+903 946 -20.90760235760421
+946 946  190.9954491375482
+947 946 -8.316362646922366
+948 946 -9.180640498511629
+949 946 -5.954288032698193
+950 946 -9.780438926304546
+951 946 -11.54246641257937
+972 946 -30.14346884520325
+1103 946  0.01772566600763419
+1146 946 -2.906373852695765e-08
+1147 946  2.273733985119764e-08
+1148 946  2.421638262140036e-08
+1149 946  1.231334647994187e-07
+1150 946  2.303789606306061e-08
+1151 946  1.743433537815253e-07
+1172 946 -0.09396492400282885
+3207 946 -1.586867364738098
+3208 946  1.832449394568724
+3209 946  0.2291891292739927
+3210 946 -0.2389235313648264
+3483 946  0.6596703231013034
+3484 946 -0.5146229196552202
+3485 946 -0.04607441940211257
+3486 946  0.03591805543649509
+3487 946  0.6964352973578405
+3488 946 -0.54293430207528
+3489 946 -0.05122786494849402
+3490 946  0.03987489423752329
+3491 946  0.4923049138822416
+3492 946 -0.3842289985371204
+3493 946 -0.02987711852991685
+3494 946  0.02326647438088502
+3495 946  0.715978620671633
+3496 946 -0.5575412851129458
+3497 946 -0.05602966115093699
+3498 946  0.04358377996383721
+3499 946  0.3935095664336485
+3500 946 -0.3068323659627205
+3501 946 -0.01947378168904091
+3502 946  0.01588572902323566
+3503 946 -0.9451250300389605
+3504 946  1.159453727321497
+3505 946  0.1285136378673143
+3506 946 -0.1618170065455398
+946 947 -8.316362646922366
+947 947  23.55048762236492
+1146 947 -8.043442764615882e-08
+1147 947 -6.438807310937822e-08
+3483 947 -1.868071070429303
+3484 947  1.457322172487891
+3485 947  0.130474703738788
+3486 947 -0.1017136559236855
+946 948 -9.180640498511629
+948 948  20.75444140780469
+1146 948 -6.474805136491568e-08
+1148 948 -5.474532364935225e-08
+3487 948 -1.574413538600893
+3488 948  1.227397748220299
+3489 948  0.1158095295205662
+3490 948 -0.09014415779328049
+946 949 -5.954288032698193
+949 949  15.89992816678108
+1146 949 -7.593639771152905e-08
+1149 949 -3.288068006757072e-07
+3491 949 -1.314617609442261
+3492 949  1.026019024575663
+3493 949  0.07978182835732844
+3494 949 -0.06212921315277567
+946 950 -9.780438926304546
+950 950  20.86078733092048
+1146 950 -6.107598557458793e-08
+1150 950 -4.913775505932527e-08
+3495 950 -1.527117243615793
+3496 950  1.189184824715845
+3497 950  0.1195061685183861
+3498 950 -0.0929602365253548
+946 951 -11.54246641257937
+951 951  67.93165662379741
+1146 951 -1.652445292266158e-07
+1151 951 -1.026074838983426e-06
+3499 951 -2.315948119036116
+3500 951  1.805821005194248
+3501 951  0.1146103472959883
+3502 951 -0.09349334143083275
+932 952 -6.505758821695456
+945 952 -13.77354529336417
+952 952  32.66632580305982
+964 952 -38.73670699458928
+1132 952 -0.01738224984325668
+1145 952  0.01703428079016718
+1152 952  6.841825883618924e-09
+1164 952 -0.004704297928304631
+3383 952  0.3736626954944669
+3384 952 -0.371658867524124
+3385 952 -0.05265194140229647
+3386 952  0.04848274704887379
+3475 952  0.346250946841912
+3476 952 -0.3251088204486319
+3477 952 -0.04654604823349623
+3478 952  0.04488809553479817
+3507 952 -0.2375980214699439
+3508 952  0.2021244230605367
+3509 952  0.03516906518592851
+3510 952 -0.02840317316188208
+943 953 -8.763955681316189
+953 953  38.11359574347498
+954 953 -0.8988235757749945
+979 953 -6.848370394905818
+1000 953 -18.59353340932061
+1010 953 -7.868079227295585
+1026 953 -6.201999031867833
+1143 953 -0.02992823809043116
+1153 953 -9.502885811585315e-09
+1154 953  0.08766147326692564
+1179 953 -0.01434635432205444
+1200 953 -0.008383946400232767
+1210 953 -0.03090332412050151
+1226 953 -0.01685058071239737
+3455 953 -0.3396256574061067
+3456 953  0.3592193746505485
+3457 953  0.04440377057660186
+3458 953 -0.04149255251807921
+3511 953  0.5039010794292205
+3512 953 -0.4287950305689273
+3513 953 -0.01971558690748688
+3514 953  0.01488552126604759
+3515 953  0.2953161655653164
+3516 953 -0.2698288575970699
+3517 953 -0.04126530553706707
+3518 953  0.03303831593886034
+3519 953 -0.4316172727288611
+3520 953  0.4350354735110427
+3521 953  0.05408041747556818
+3522 953 -0.05165212370154924
+3523 953 -0.3071829453370418
+3524 953  0.3154316116230693
+3525 953  0.03634845233500034
+3526 953 -0.04190662314365428
+3527 953 -0.3757659547701289
+3528 953  0.3861958328991782
+3529 953  0.04350500092493118
+3530 953 -0.05038154112818095
+953 954 -0.8988235757749945
+954 954  30.68956309077988
+1001 954 -16.8657683503129
+1153 954 -0.2507418162491978
+1154 954 -1.802922966476217e-08
+1201 954  0.4246988966920202
+3511 954 -1.221909365821883
+3512 954  1.039784761849569
+3513 954  0.0478083125406707
+3514 954 -0.03609588983362996
+3531 954 -0.7047014242829946
+3532 954  0.6326150413526075
+3533 954  0.08773798910582717
+3534 954 -0.07880583325813648
+901 955 -8.12408466218251
+955 955  38.14142046839715
+1057 955 -15.07976193972747
+1101 955  0.009158641086069189
+1155 955  3.591110811496101e-09
+1257 955 -0.0254465288909706
+3195 955 -1.125276871701074
+3196 955  1.051338796050469
+3197 955  0.1517955769952752
+3198 955 -0.1368037471308435
+3535 955 -0.8318326858768657
+3536 955  0.7717420244337035
+3537 955  0.1071151913100275
+3538 955 -0.103164844124989
+934 956 -2.476272106586077
+956 956  17.34714737161059
+993 956 -5.773304655979821
+1075 956 -4.711059498030628
+1134 956 -0.01449250614827455
+1156 956 -1.694234696980601e-09
+1193 956  0.08504754845844564
+1275 956  0.05001632315880056
+3399 956 -0.6725972680232485
+3400 956  0.6485153191009497
+3401 956  0.08327008401522945
+3402 956 -0.08659249488156374
+3539 956  0.377331040401673
+3540 956 -0.3524048448936835
+3541 956 -0.05101371158298227
+3542 956  0.0519531882185968
+3543 956  0.3360771639281279
+3544 956 -0.3359593223602985
+3545 956 -0.0477704058819864
+3546 956  0.04690068187393769
+957 957  19.29898377752927
+1017 957 -13.12797495416852
+1037 957 -13.92756187672575
+1157 957  1.653187683969826e-08
+1217 957  0.03597745704809928
+1237 957 -0.07867953448799736
+3547 957 -0.5244507842233862
+3548 957  0.5237947129566466
+3549 957  0.06750021622713791
+3550 957 -0.07126566929859467
+3551 957 -0.4599292057141804
+3552 957  0.4596955190523657
+3553 957  0.06526541533640681
+3554 957 -0.06150550344214045
+958 958  32.31857328996801
+995 958 -14.5168341615374
+1032 958 -15.91728652624134
+1158 958  4.097325129182039e-08
+1195 958  0.02493853014115072
+1232 958 -0.03577015867815656
+3555 958 -0.5159715744957702
+3556 958  0.5154078088312681
+3557 958  0.07280632131793441
+3558 958 -0.07622870906823133
+3559 958  0.4404116309763916
+3560 958 -0.5079389600530799
+3561 958 -0.05761232611155925
+3562 958  0.06996802127804672
+944 959 -9.088036752899102
+959 959  15.38951040896466
+1001 959 -7.201942460989617
+1144 959  0.1506769620921335
+1159 959 -1.332591936797911e-08
+1201 959 -0.1571053939386667
+3467 959 -0.455735424632883
+3468 959  0.4265116186182513
+3469 959  0.06205757831290015
+3470 959 -0.05377868191093232
+3563 959 -0.4764363681173263
+3564 959  0.425337444217761
+3565 959  0.05983512144077003
+3566 959 -0.05429826449104555
+960 960  41.10746761295785
+1057 960 -13.87451822624953
+1058 960 -31.06353440087113
+1160 960  3.964404998724191e-09
+1257 960  0.08784149961682572
+1258 960 -0.1963524920199438
+3567 960  0.6869492137454632
+3568 960 -0.7334879158217498
+3569 960 -0.1210230804289373
+3570 960  0.1300666015536589
+3571 960  0.5990913842872198
+3572 960 -0.6450813979793052
+3573 960 -0.08258340338438294
+3574 960  0.09355727171015052
+961 961  32.30342970287563
+964 961 -35.50410878617848
+1082 961 -14.41765509210598
+1161 961  3.124692560252385e-09
+1164 961 -0.6070648610656701
+1282 961  0.06933509766831331
+3575 961 -0.3915200345225039
+3576 961  0.3156322845396378
+3577 961  0.0579584615013995
+3578 961 -0.04478280109740496
+3579 961 -0.5414818109862019
+3580 961  0.5430535951303894
+3581 961  0.1050418924830158
+3582 961 -0.1048558318511552
+903 962 -7.74755303268057
+923 962 -6.993239083401045
+962 962  34.94783322085215
+963 962 -2.106648198861156
+1103 962 -0.008284771852577529
+1123 962  0.05713966978684382
+1162 962 -1.126953350777393e-08
+1163 962  1.889614750227864e-07
+3211 962  1.262649342413203
+3212 962 -1.269291682288203
+3213 962 -0.1737444943984078
+3214 962  0.1559287939226689
+3331 962  0.8386535165213083
+3332 962 -0.8410685054311342
+3333 962 -0.1106532901568742
+3334 962  0.1027874024856677
+3583 962  0.9725210198756302
+3584 962 -0.6275235799788761
+3585 962  0.013588288313204
+3586 962  0.02534124008545374
+962 963 -2.106648198861156
+963 963  4.37032937863825
+1162 963 -5.457841986689083e-08
+1163 963 -3.92008150940093e-07
+3583 963 -2.017535229907724
+3584 963  1.301823718285398
+3585 963 -0.02818946822305024
+3586 963 -0.05257145459795929
+945 964 -48.99930559793759
+952 964 -38.73670699458928
+961 964 -35.50410878617848
+964 964  659.1059785088241
+965 964 -96.92585106702003
+966 964 -25.68677687081435
+967 964 -72.88325048851985
+968 964 -34.32300893287451
+969 964 -66.56481089993019
+970 964 -75.29053380965105
+971 964 -57.22136371310454
+1056 964 -81.80499628897144
+1145 964 -0.05970019195980614
+1152 964  0.01861759887113656
+1161 964  0.8923905638513359
+1164 964 -3.875374576800539e-09
+1165 964  6.066692409545382e-08
+1166 964  4.697380394380613e-07
+1167 964  1.510357943157459e-07
+1168 964  3.067324171046604e-07
+1169 964  1.042574984955191e-07
+1170 964  7.332883339428165e-08
+1171 964  3.852382979979696e-08
+1256 964 -0.4280655451173339
+3479 964  0.9977461831798213
+3480 964 -0.796934913498078
+3481 964 -0.1462419497352594
+3482 964  0.1137301358193506
+3507 964  0.8327117855650359
+3508 964 -0.7083871666596919
+3509 964 -0.1232573187539455
+3510 964  0.0995448400328353
+3575 964  0.7615674553221563
+3576 964 -0.6139539603574344
+3577 964 -0.1127382359726241
+3578 964  0.08710952407721209
+3587 964  0.4637568782856321
+3588 964 -0.3613580899024239
+3589 964 -0.06341054135910112
+3590 964  0.05024109366902035
+3591 964  0.5198404247302832
+3592 964 -0.4107864203962353
+3593 964 -0.009769119110653198
+3594 964  0.01207874962159066
+3595 964  0.5553092879536204
+3596 964 -0.4374549805006661
+3597 964 -0.02421187475492601
+3598 964  0.02101357146535312
+3599 964  0.5186307214601972
+3600 964 -0.4089115281925863
+3601 964 -0.01510224391735361
+3602 964  0.01521306273357519
+3603 964  0.649415218994155
+3604 964 -0.5116989493650622
+3605 964 -0.02666374087039809
+3606 964  0.02305717637758508
+3607 964  0.6812081861155168
+3608 964 -0.5365945143548635
+3609 964 -0.03084309267107718
+3610 964  0.02625275857880516
+3611 964  0.7223443830121247
+3612 964 -0.5685854447490166
+3613 964 -0.03084162623578303
+3614 964  0.02765977036275495
+3615 964 -0.8389463619039152
+3616 964  0.9351913725744683
+3617 964  0.1326396128473568
+3618 964 -0.1454153226254008
+964 965 -96.92585106702003
+965 965  234.6790878010875
+1164 965 -3.308903712140321e-08
+1165 965 -1.468892856237858e-07
+3587 965 -1.12285875271712
+3588 965  0.8749284659928406
+3589 965  0.1535310519658627
+3590 965 -0.1216448842352145
+964 966 -25.68677687081435
+966 966  162.8774429316563
+1164 966 -2.257490022694597e-07
+1166 966 -2.978564379918502e-06
+3591 966 -3.296259100425206
+3592 966  2.604757945218924
+3593 966  0.06194506283026378
+3594 966 -0.07659021204936765
+964 967 -72.88325048851985
+967 967  265.0857129998892
+1164 967 -6.077346655086746e-08
+1167 967 -5.493372374765215e-07
+3595 967 -2.019730940286236
+3596 967  1.591079742886095
+3597 967  0.08806168674949939
+3598 967 -0.07642904841532995
+964 968 -34.32300893287451
+968 968  165.3329967568115
+1164 968 -1.333883210691278e-07
+1168 968 -1.477521131554305e-06
+3599 968 -2.498229903851033
+3600 968  1.969715571194801
+3601 968  0.07274709308264668
+3602 968 -0.07328090426879212
+964 969 -66.56481089993019
+969 969  203.5235180873758
+1164 969 -4.7262267141579e-08
+1169 969 -3.187709719520981e-07
+3603 969 -1.985602696417218
+3604 969  1.564531880214944
+3605 969  0.08152503085904574
+3606 969 -0.07049787292944913
+964 970 -75.29053380965105
+970 970  218.1343768118298
+1164 970 -4.1833332076191e-08
+1170 970 -2.124470280806534e-07
+3607 970 -1.973620237480235
+3608 970  1.554640438029102
+3609 970  0.08935968933261426
+3610 970 -0.07606041248016723
+964 971 -57.22136371310454
+971 971  248.537576753649
+1164 971 -9.238899598140726e-08
+1171 971 -1.67326591443695e-07
+3611 971 -3.13745961756631
+3612 971  2.469616866953718
+3613 971  0.1339587586344098
+3614 971 -0.1201385579859136
+946 972 -30.14346884520325
+972 972  21.87834180608649
+1088 972 -10.63078006025426
+1146 972  0.2145362022621393
+1172 972 -4.288324972900881e-10
+1288 972 -0.1110170413597872
+3503 972  0.361952958820202
+3504 972 -0.4440340630930363
+3505 972 -0.0492166538779582
+3506 972  0.06197078951995561
+3619 972 -0.5844299652988821
+3620 972  0.5839521479322916
+3621 972  0.07265746990450016
+3622 972 -0.07373932525269625
+973 973  501.9756250912903
+974 973 -30.40657760133354
+975 973 -12.37263278910001
+976 973 -67.25923254959042
+977 973 -43.88173563577028
+1006 973 -76.30268879794829
+1055 973 -88.2008696096471
+1173 973  4.923127752132217e-07
+1174 973 -2.21697098447271e-06
+1175 973 -7.175218402766248e-06
+1176 973 -1.474047813920798e-06
+1177 973 -1.809620373771637e-06
+1206 973  0.1608647615085388
+1255 973 -0.2169860792325076
+3623 973  0.3551605612820263
+3624 973 -0.2795935336492826
+3625 973 -0.02645413337217794
+3626 973  0.02143184395432529
+3627 973  0.2921170861291708
+3628 973 -0.2187915238870602
+3629 973 -0.01571989566011668
+3630 973  0.01298076963505745
+3631 973  0.4746237402281227
+3632 973 -0.3961258208756168
+3633 973 -0.03612576647010336
+3634 973  0.02872026982551443
+3635 973  0.4514386490354154
+3636 973 -0.374371261515384
+3637 973 -0.02777304520351097
+3638 973  0.02216543630854258
+3639 973  1.238869862237301
+3640 973 -1.017068720550362
+3641 973 -0.189558782352581
+3642 973  0.1551422975518752
+3643 973 -0.8761486615418773
+3644 973  1.07765260038838
+3645 973  0.1233417700929442
+3646 973 -0.1324205549928908
+973 974 -30.40657760133354
+974 974  127.5596980972658
+1173 974  2.078042610764719e-06
+1174 974  9.300491833896807e-06
+3623 974 -1.489946307075277
+3624 974  1.172932465922301
+3625 974  0.1109786463408995
+3626 974 -0.08990946696979601
+973 975 -12.37263278910001
+975 975  78.88474093831816
+1173 975  3.157835434829115e-06
+1175 975  4.574734240525569e-05
+3627 975 -1.862462882642613
+3628 975  1.39495808915565
+3629 975  0.1002259832656134
+3630 975 -0.08276202516527353
+973 976 -67.25923254959042
+976 976  150.2196223428273
+1173 976  1.106372518294707e-06
+1176 976  3.292199911797411e-06
+3631 976 -1.060044755936421
+3632 976  0.8847241794276495
+3633 976  0.08068481631873459
+3634 976 -0.06414506658049314
+973 977 -43.88173563577028
+977 977  119.6044007221758
+1173 977  1.350166642877237e-06
+1177 977  4.932317047678936e-06
+3635 977 -1.23044462799305
+3636 977  1.020389168253177
+3637 977  0.07569842401993218
+3638 977 -0.06041428240856869
+978 978  18.42568916669017
+998 978 -5.091800367100356
+1041 978 -7.549143391656187
+1178 978 -1.812938804635067e-08
+1198 978  0.02020512578173389
+1241 978 -0.07632676179310438
+3647 978 -0.6175792065318458
+3648 978  0.5343612663363052
+3649 978  0.07945126249495234
+3650 978 -0.07522703272150684
+3651 978 -0.4393579796250129
+3652 978  0.4424042011090423
+3653 978  0.06144026122959138
+3654 978 -0.05571639966019448
+953 979 -6.848370394905818
+979 979  25.87594842746113
+980 979 -2.606267858998225
+1076 979 -10.4263159895623
+1153 979  0.0247054716909727
+1179 979 -1.45358751424407e-08
+1180 979  0.08044888079890554
+1276 979 -0.2527625769661221
+3515 979 -0.5584106677342894
+3516 979  0.5102169475088717
+3517 979  0.07802819319119383
+3518 979 -0.06247185293402092
+3655 979  1.458939452040597
+3656 979 -1.369672770763905
+3657 979 -0.03732647943580095
+3658 979  0.01230778581599272
+3659 979 -0.4069131581499621
+3660 979  0.4084263136180201
+3661 979  0.05514218346051624
+3662 979 -0.05411446430890252
+979 980 -2.606267858998225
+980 980  30.88690458402179
+1093 980 -50.78984921359537
+1179 980 -0.1494464850449572
+1180 980 -6.531306342982823e-09
+1293 980  0.191146221601152
+3655 980 -1.713575067511206
+3656 980  1.608728249378172
+3657 980  0.04384124675612543
+3658 980 -0.01445592199255047
+3663 980 -0.6095818027697771
+3664 980  0.5163436862281057
+3665 980  0.08236427128493604
+3666 980 -0.07686249969756757
+934 981 -2.488153159316454
+981 981  21.72145690088892
+1044 981 -10.74205319605701
+1084 981 -8.173102293481225
+1134 981  0.01332766609263919
+1181 981  6.922867834546764e-11
+1244 981  0.1277397341432216
+1284 981 -0.2426662636331511
+3403 981 -0.7881853230597796
+3404 981  0.7571891954848207
+3405 981  0.1058760358195188
+3406 981 -0.1074855510138141
+3667 981 -0.5279378228544918
+3668 981  0.5324544811476037
+3669 981  0.07447310375085667
+3670 981 -0.06679631775422318
+3671 981 -0.4465921770040591
+3672 981  0.4419162630302035
+3673 981  0.05896090325177052
+3674 981 -0.06482153965237164
+941 982 -7.190162226521366
+982 982  14.90110246263578
+1011 982 -6.815356665004987
+1141 982 -0.447186593064992
+1182 982  1.077567965412918e-09
+1211 982  0.332620942724838
+3439 982  0.4919493884250233
+3440 982 -0.4938069079659762
+3441 982 -0.06846566225879751
+3442 982  0.06253236404377792
+3675 982 -0.5594355361631519
+3676 982  0.5029955255009219
+3677 982  0.07604924015919289
+3678 982 -0.06227853779439045
+937 983 -4.462659499865555
+983 983  21.85723317143601
+1018 983 -14.87328100877918
+1137 983 -0.03261242339132064
+1183 983  1.371140667671966e-08
+1218 983  0.01520725110591731
+3411 983  0.508985204526428
+3412 983 -0.5443743324327373
+3413 983 -0.07890804660218982
+3414 983  0.08403462925030836
+3679 983 -0.5603093540142886
+3680 983  0.5635642781039163
+3681 983  0.08560093037836217
+3682 983 -0.08418167880156238
+905 984 -6.201273818830178
+984 984  34.35823096932663
+999 984 -5.930212014505986
+1040 984 -12.33309367358089
+1091 984 -5.509137579435532
+1105 984  0.03052418138322882
+1184 984  4.56977637675049e-09
+1199 984  0.0299746246289474
+1240 984 -0.1517471607395
+1291 984  0.03333223564354496
+3219 984  0.5301531225102935
+3220 984 -0.5251447644432619
+3221 984 -0.07868915532003624
+3222 984  0.07692669227529296
+3683 984 -0.5222444421836163
+3684 984  0.5068312762468304
+3685 984  0.06925873775509218
+3686 984 -0.06572612318307269
+3687 984  0.4708669091657322
+3688 984 -0.4207481970090967
+3689 984 -0.07142707696284654
+3690 984  0.06335201956550504
+3691 984 -0.473560945926676
+3692 984  0.4497976392777369
+3693 984  0.07049701051212577
+3694 984 -0.0653373636694603
+985 985  40.79460611168166
+986 985 -2.389320265572053
+1047 985 -169.6380530198015
+1185 985 -4.681742493684027e-10
+1186 985 -0.06849071274544034
+1247 985  0.301148117794046
+3695 985  1.030575023275442
+3696 985 -1.145483220005211
+3697 985 -0.02747275796180583
+3698 985  0.01404202717338338
+3699 985  0.2717664094081579
+3700 985 -0.3291856606887862
+3701 985 -0.0345488517344297
+3702 985  0.04345138635311043
+985 986 -2.389320265572053
+986 986  51.57946710948104
+1048 986 -14.33758586199391
+1074 986 -21.79919967537484
+1092 986 -13.33511322562891
+1185 986  0.04645384761318282
+1186 986  4.608089770874457e-09
+1248 986 -0.07163724688851204
+1274 986  0.003935685128377539
+1292 986  0.0005819686631925447
+3695 986 -1.032621098489623
+3696 986  1.147757430782475
+3697 986  0.02752730161739742
+3698 986 -0.01406990582666666
+3703 986 -0.4083268764891531
+3704 986  0.4085753213662854
+3705 986  0.05989119208712813
+3706 986 -0.05737989255880681
+3707 986  0.5500829381246285
+3708 986 -0.4922431501793627
+3709 986 -0.0811542494651884
+3710 986  0.070036883103212
+3711 986 -0.4948905454048655
+3712 986  0.5176954413573776
+3713 986  0.06634446170697339
+3714 986 -0.0706577625688625
+987 987  93.64924459438463
+988 987 -2.961498845389432
+989 987 -3.922227251349367
+990 987 -4.914519484363213
+993 987 -15.03422373491804
+1086 987 -23.02209569566427
+1187 987 -4.496077599114301e-08
+1188 987  1.11954899111727e-07
+1189 987  2.108572429437849e-07
+1190 987  1.323586146240174e-07
+1193 987 -0.1969994537259558
+1286 987  0.1010252017704575
+3715 987  0.3582392973900931
+3716 987 -0.3225737686846976
+3717 987 -0.01555667362303796
+3718 987  0.01198742445782478
+3719 987  0.3692986362671331
+3720 987 -0.3307276363338796
+3721 987 -0.03119596209267699
+3722 987  0.02344637301136434
+3723 987  0.4209233628580787
+3724 987 -0.3794252839956596
+3725 987 -0.03967666278728941
+3726 987  0.02969939062660183
+3727 987 -0.6561496372500165
+3728 987  0.7717184399373285
+3729 987  0.09217929587242674
+3730 987 -0.1144339936435742
+3731 987  0.7655516692341778
+3732 987 -0.7325809744992411
+3733 987 -0.09262370563918518
+3734 987  0.1187456445424696
+987 988 -2.961498845389432
+988 988  43.01152805242423
+1187 988 -6.398415768466004e-07
+1188 988 -1.625983356512428e-06
+3715 988 -5.202909164506705
+3716 988  4.684918794633313
+3717 988  0.2259382495226564
+3718 988 -0.1741000527436083
+987 989 -3.922227251349367
+989 989  9.208778320999443
+1187 989 -1.044842086184161e-07
+1189 989 -4.95059458760494e-07
+3719 989 -0.8670555746636077
+3720 989  0.7764968852232117
+3721 989  0.07324325135033663
+3722 989 -0.05504842538990734
+987 990 -4.914519484363213
+990 990  9.521180709967734
+1187 990 -8.621436850542175e-08
+1190 990 -2.564258991810675e-07
+3723 990 -0.8154789505427221
+3724 990  0.735082344446817
+3725 990  0.07686787236308722
+3726 990 -0.05753833129026276
+909 991 -7.289318276733248
+925 991 -8.629261156606795
+991 991  97.14497503871327
+992 991 -22.79820814800702
+1089 991 -27.15561296572336
+1109 991 -0.1725061393317366
+1125 991 -0.1090224204334647
+1191 991 -1.026919055346909e-08
+1192 991  4.564837356724194e-08
+1289 991  0.1077273281804682
+3263 991  0.8837282746556666
+3264 991 -0.9082411774006912
+3265 991 -0.1140293348347746
+3266 991  0.1250986076557095
+3343 991  0.5914570612494344
+3344 991 -0.5958231660939404
+3345 991 -0.07517102493073879
+3346 991  0.07472243017192762
+3735 991  0.6367155082099926
+3736 991 -0.5615617669201883
+3737 991 -0.02450778911447643
+3738 991  0.02367803215021713
+3739 991 -0.7678220254674442
+3740 991  0.8596786811959881
+3741 991  0.1159096366071837
+3742 991 -0.134708500073735
+991 992 -22.79820814800702
+992 992  119.7049698625637
+1191 992 -1.02579299221528e-07
+1192 992 -2.396823412520455e-07
+3735 992 -3.343157745435155
+3736 992  2.948553233605713
+3737 992  0.1286813403240229
+3738 992 -0.1243245932586171
+956 993 -5.773304655979821
+987 993 -15.03422373491804
+993 993  9.969116450250086
+1156 993 -0.08343546006029678
+1187 993  0.09349931199942807
+1193 993 -6.511629457861545e-09
+3539 993 -0.3067285442619483
+3540 993  0.2864662948217338
+3541 993  0.04146852449400996
+3542 993 -0.04223221544427847
+3727 993  0.2302667811178378
+3728 993 -0.2708240788464196
+3729 993 -0.03234906878134064
+3730 993  0.04015905194614094
+994 994  21.03102825235615
+1020 994 -8.568402243341714
+1041 994 -7.152105470943633
+1086 994 -8.292682942672654
+1194 994 -2.743272645155947e-08
+1220 994 -0.8499437985630234
+1241 994  0.04822162698070831
+1286 994  0.4053180506262709
+3743 994  0.3903679836850418
+3744 994 -0.3911547364780136
+3745 994 -0.04713547548536468
+3746 994  0.05251609088198397
+3747 994 -0.3268708215805732
+3748 994  0.3510617949962554
+3749 994  0.04078336525166011
+3750 994 -0.05307425527674776
+3751 994 -0.4480493340962357
+3752 994  0.4295324887597057
+3753 994  0.06606790111415485
+3754 994 -0.06042534814756716
+958 995 -14.5168341615374
+995 995  27.40363140673261
+1098 995 -14.30197427870374
+1158 995 -0.09853840405478623
+1195 995  1.223172137188033e-08
+1298 995  0.1249966020337667
+3555 995  0.5043002560591464
+3556 995 -0.5037492428192337
+3557 995 -0.07115943648492561
+3558 995  0.07450440954956594
+3755 995 -0.452566586299552
+3756 995  0.4518410984309186
+3757 995  0.06444976742378884
+3758 995 -0.06562451486991176
+996 996  29.54453213187533
+997 996 -4.558549752153652
+1021 996 -36.15688796617417
+1196 996  9.116542722120702e-09
+1197 996  0.230729451368615
+1221 996 -0.2752007773679826
+3759 996  1.610329947831114
+3760 996 -1.599279391917784
+3761 996 -0.03806768353539278
+3762 996  0.02025137186317635
+3763 996 -0.5780567342368751
+3764 996  0.4789577056707151
+3765 996  0.0800375428343096
+3766 996 -0.06052909694974323
+996 997 -4.558549752153652
+997 997  20.92833324870006
+1040 997 -10.1426793564074
+1196 997 -0.1274747229578279
+1197 997  2.309806734768216e-08
+1240 997  0.2907198369401517
+3759 997 -1.621718887703775
+3760 997  1.610590177548189
+3761 997  0.03833691442155994
+3762 997 -0.0203945981981267
+3767 997 -0.5676654342824057
+3768 997  0.5657348666913733
+3769 997  0.07297260077582075
+3770 997 -0.08114410763163431
+939 998 -9.907975784566792
+978 998 -5.091800367100356
+998 998  37.7044366726637
+1072 998 -7.492541902601887
+1077 998 -7.355302891262144
+1082 998 -9.332991521575286
+1139 998 -0.07397943535962337
+1178 998 -0.03046147444904562
+1198 998 -1.121049961194664e-09
+1272 998  0.3056678326553257
+1277 998  0.08491056660704932
+1282 998 -0.03690628655442891
+3419 998  0.5428701937830359
+3420 998 -0.5056400055975701
+3421 998 -0.08034074986653671
+3422 998  0.0717417324410374
+3647 998  0.3482442751733312
+3648 998 -0.3013188428428641
+3649 998 -0.04480145546761378
+3650 998  0.04241947139162745
+3771 998 -0.365279863502515
+3772 998  0.3974234470529114
+3773 998  0.04813127346458962
+3774 998 -0.04568621642826008
+3775 998 -0.4082845072120672
+3776 998  0.4422425294985197
+3777 998  0.0550827407381792
+3778 998 -0.05462691338373406
+3779 998 -0.4687418217907903
+3780 998  0.5317541664847034
+3781 998  0.06183666269902745
+3782 998 -0.07543502736476798
+905 999 -5.02376469529283
+984 999 -5.930212014505986
+999 999  34.69934723091026
+1008 999 -13.41847869727119
+1015 999 -8.461387995206527
+1039 999 -10.3806402044065
+1105 999 -0.0186874934802756
+1184 999 -0.009006681541102603
+1199 999  8.747625068750153e-09
+1208 999  0.003480874327946859
+1215 999  0.02977237990573015
+1239 999  0.001400699084717696
+3223 999  0.3534179161138158
+3224 999 -0.3400290471461453
+3225 999 -0.05126169311480969
+3226 999  0.04749359159231554
+3683 999  0.4052407270632351
+3684 999 -0.393280728897523
+3685 999 -0.05374200082628659
+3686 999  0.05100083369846863
+3783 999  0.5183688055136251
+3784 999 -0.4507276981705915
+3785 999 -0.08159194337121471
+3786 999  0.06983187835467947
+3787 999 -0.2893683449568387
+3788 999  0.287922613699735
+3789 999  0.03207522870468611
+3790 999 -0.03256016366442265
+3791 999 -0.4394391630156717
+3792 999  0.4523778897162504
+3793 999  0.06534871961755113
+3794 999 -0.06846325693408806
+943 1000 -9.162682012337861
+953 1000 -18.59353340932061
+1000 1000  42.95999314909423
+1001 1000 -1.117305846410916
+1002 1000 -1.782278958330569
+1003 1000 -1.530468730548231
+1026 1000 -9.164925564623724
+1143 1000 -0.02941992453676529
+1153 1000  0.01312118441246557
+1200 1000 -9.116374766193758e-09
+1201 1000  0.06631071370625427
+1202 1000  8.321106473307971e-08
+1203 1000  8.643129645324876e-08
+1226 1000 -0.0187976386654834
+3459 1000 -0.4083493179864217
+3460 1000  0.4299072250755208
+3461 1000  0.05394914494757658
+3462 1000 -0.0525261724439103
+3519 1000  0.5044255115610694
+3520 1000 -0.5084203185048906
+3521 1000 -0.0632030828564326
+3522 1000  0.06036516740083515
+3795 1000  0.679218643726438
+3796 1000 -0.646543619474914
+3797 1000 -0.02138310187554149
+3798 1000  0.01366621132412578
+3799 1000  0.6456696194137627
+3800 1000 -0.3733120922080947
+3801 1000  0.008696076442513374
+3802 1000  0.0188568404948961
+3803 1000  0.6490872503986104
+3804 1000 -0.3748931907170733
+3805 1000  0.009220007761798222
+3806 1000  0.01698943114939008
+3807 1000 -0.4983302017793433
+3808 1000  0.511463018605014
+3809 1000  0.05434191170091966
+3810 1000 -0.06524249632027486
+954 1001 -16.8657683503129
+959 1001 -7.201942460989617
+1000 1001 -1.117305846410916
+1001 1001  23.38960436680297
+1004 1001 -6.847692983685751
+1154 1001 -0.1257509427677388
+1159 1001  0.07786890260343965
+1200 1001 -0.03588453104645329
+1201 1001 -2.100605200894279e-08
+1204 1001  0.05848026446275859
+3531 1001  0.4168827888937753
+3532 1001 -0.3742383847223765
+3533 1001 -0.05190348185770162
+3534 1001  0.04661945388172894
+3563 1001  0.4317442185526948
+3564 1001 -0.3854386330763389
+3565 1001 -0.05422228334611339
+3566 1001  0.04920481168154859
+3795 1001 -0.8337078536373765
+3796 1001  0.793600850409622
+3797 1001  0.02624671765627676
+3798 1001 -0.01677460978968741
+3811 1001 -0.4245578636109541
+3812 1001  0.4806889285303752
+3813 1001  0.04834484106772149
+3814 1001 -0.06044152600849758
+1000 1002 -1.782278958330569
+1002 1002  6.986598138490368
+1200 1002 -6.50146078085001e-08
+1202 1002 -3.261903830376056e-07
+3799 1002 -2.531048253677342
+3800 1002  1.463396899358267
+3801 1002 -0.03408893408002122
+3802 1002 -0.07391949654966468
+1000 1003 -1.530468730548231
+1003 1003  4.709345466229321
+1200 1003 -4.92795877993224e-08
+1203 1003 -2.659544074967712e-07
+3803 1003 -1.997280942097913
+3804 1003  1.153569146030313
+3805 1003 -0.02837052457481739
+3806 1003 -0.0522775128165329
+1001 1004 -6.847692983685751
+1004 1004  18.29460146461613
+1028 1004 -13.59416149407363
+1201 1004 -0.2362664140746497
+1204 1004 -5.676911468688317e-09
+1228 1004  0.0892421864327288
+3811 1004  0.5272035599677525
+3812 1004 -0.5969054776253586
+3813 1004 -0.06003321219915891
+3814 1004  0.07505452239311973
+3815 1004  0.735678771801283
+3816 1004 -0.6821644402533772
+3817 1004 -0.07310899636166125
+3818 1004  0.07557928868904694
+1005 1005  12.97100574956463
+1011 1005 -4.223583073177172
+1027 1005 -6.595142687039579
+1090 1005 -6.636577325017829
+1205 1005  1.270442140377526e-09
+1211 1005 -0.07579910748829327
+1227 1005  0.3216355049551655
+1290 1005 -0.1235888174409047
+3819 1005 -0.2843296651791673
+3820 1005  0.2720491980250402
+3821 1005  0.04056671199071347
+3822 1005 -0.03643788065469865
+3823 1005 -0.2824496877119697
+3824 1005  0.3027390358618913
+3825 1005  0.03972042410566178
+3826 1005 -0.03942547846731512
+3827 1005  0.361444564712929
+3828 1005 -0.362096525989742
+3829 1005 -0.05221684139379965
+3830 1005  0.05186623003949892
+973 1006 -76.30268879794829
+1006 1006  34.14274349805327
+1096 1006 -8.964500315464161
+1173 1006 -0.5102615425771657
+1206 1006  1.715180802247529e-08
+1296 1006  0.3247141990426317
+3639 1006 -0.3843162680834073
+3640 1006  0.315510181481362
+3641 1006  0.05880401649662903
+3642 1006 -0.0481274996143231
+3831 1006 -0.438438003559472
+3832 1006  0.4387320075683089
+3833 1006  0.06917527995921767
+3834 1006 -0.07005237522168833
+915 1007 -2.93383316576035
+1007 1007  27.62454972865043
+1084 1007 -11.96605332802606
+1115 1007 -0.007809535337338039
+1207 1007  5.579247508435969e-09
+1284 1007  0.03479583401598968
+3303 1007 -1.272612899536508
+3304 1007  1.068221512166234
+3305 1007  0.1530658956087559
+3306 1007 -0.1374700328859463
+3835 1007  0.685622022908018
+3836 1007 -0.7361807383336668
+3837 1007 -0.08242829338295353
+3838 1007  0.09247066617765989
+999 1008 -13.41847869727119
+1008 1008  23.49395472825631
+1091 1008 -9.148268563932652
+1199 1008 -0.01293493383306837
+1208 1008  2.945946889210127e-09
+1291 1008  0.01209611832050858
+3783 1008 -0.5784495929786024
+3784 1008  0.5029686408166937
+3785 1008  0.09104873968379988
+3786 1008 -0.07792564132243848
+3839 1008 -0.5364511862384768
+3840 1008  0.4560709551496668
+3841 1008  0.08202078818634363
+3842 1008 -0.06866652243426218
+1009 1009  17.34359327006798
+1061 1009 -23.5793179741662
+1073 1009 -8.20501197638737
+1209 1009  1.582770026153923e-10
+1261 1009 -0.0715165068859599
+1273 1009  0.01064840327673766
+3843 1009  0.2358476052144947
+3844 1009 -0.3015768589062842
+3845 1009 -0.03869333526436093
+3846 1009  0.04790427095168613
+3847 1009 -0.4668956305239359
+3848 1009  0.4686479373193943
+3849 1009  0.06831378599514144
+3850 1009 -0.06731177696858127
+953 1010 -7.868079227295585
+1010 1010  58.76200518316558
+1011 1010 -1.57272281540292
+1012 1010 -4.664019165692916
+1013 1010 -3.154571903054211
+1026 1010 -27.0139401100094
+1153 1010  0.04849641452178344
+1210 1010 -1.220871292950676e-08
+1211 1010 -0.2105201766865411
+1212 1010  2.310851920234169e-08
+1213 1010  5.20346455368248e-08
+1226 1010  0.03354622133540253
+3523 1010  0.5073715942942281
+3524 1010 -0.520995849897842
+3525 1010 -0.06003644567930714
+3526 1010  0.06921683159380579
+3851 1010  0.8859027431941622
+3852 1010 -0.8761333273285709
+3853 1010 -0.0356622993406167
+3854 1010 -0.007089990482464425
+3855 1010  0.4334726720192261
+3856 1010 -0.2894253737565123
+3857 1010 -0.030243626830779
+3858 1010  0.03061910767275307
+3859 1010  1.10546489936218
+3860 1010 -0.689967708515946
+3861 1010 -0.006504078012125597
+3862 1010  0.01318357808761143
+3863 1010  0.6066026005909084
+3864 1010 -0.6067370542914696
+3865 1010 -0.09003421014280026
+3866 1010  0.08946924016864499
+982 1011 -6.815356665004987
+1005 1011 -4.223583073177172
+1010 1011 -1.57272281540292
+1011 1011  20.99019374425364
+1090 1011 -6.618357553588883
+1182 1011 -0.05307445544769063
+1205 1011  0.05204211481294618
+1210 1011  0.08531786875140286
+1211 1011 -3.814564220050443e-10
+1290 1011  0.005776489288478071
+3675 1011  0.4828314550248289
+3676 1011 -0.4341198328483751
+3677 1011 -0.06563573978769766
+3678 1011  0.05375067380652562
+3819 1011  0.3898068756189718
+3820 1011 -0.3729707479870724
+3821 1011 -0.05561566446213129
+3822 1011  0.04995516877647897
+3851 1011 -0.7247989127679281
+3852 1011  0.716806091826627
+3853 1011  0.02917701292546772
+3854 1011  0.005800656373065214
+3867 1011 -0.3567726280354734
+3868 1011  0.372176173311343
+3869 1011  0.0501196804098229
+3870 1011 -0.05599008922381862
+1010 1012 -4.664019165692916
+1012 1012  24.66747498250689
+1210 1012 -6.996689916327625e-08
+1212 1012 -1.222183139693023e-07
+3855 1012 -2.292588351471605
+3856 1012  1.530738344827112
+3857 1012  0.1599551507030707
+3858 1012 -0.1619410267687995
+1010 1013 -3.154571903054211
+1013 1013  8.709539069389765
+1210 1013 -4.162537098340024e-08
+1213 1013 -1.436638097374043e-07
+3859 1013 -3.052106539486126
+3860 1013  1.904949633779228
+3861 1013  0.01795727665852618
+3862 1013 -0.03639888061415681
+1014 1014  48.13941450694888
+1015 1014 -3.110095548321061
+1031 1014 -43.51247883455607
+1214 1014  1.003109723063389e-08
+1215 1014  0.01263569687722853
+1231 1014 -0.004918095735968131
+3871 1014  2.989747253335061
+3872 1014 -3.114543931331563
+3873 1014 -0.03612826240957417
+3874 1014  0.005103966248689504
+3875 1014  0.7143220365505623
+3876 1014 -0.8667371882258156
+3877 1014 -0.09209370496921034
+3878 1014  0.1222265439314361
+999 1015 -8.461387995206527
+1014 1015 -3.110095548321061
+1015 1015  33.95007189497851
+1030 1015 -6.75539661299262
+1060 1015 -5.675171991154909
+1199 1015 -0.04200667912920009
+1214 1015 -0.004849260550083945
+1215 1015  2.119705658221172e-08
+1230 1015  0.6677127442223171
+1260 1015 -0.4065761617929234
+3787 1015  0.6080023804991873
+3788 1015 -0.6049647018408247
+3789 1015 -0.06739443255416847
+3790 1015  0.06841334707970535
+3871 1015 -2.129272677610363
+3872 1015  2.218151814941477
+3873 1015  0.02573024256563223
+3874 1015 -0.003635001543577661
+3879 1015 -0.3137213903015109
+3880 1015  0.3512208139362644
+3881 1015  0.05294086820243799
+3882 1015 -0.05938414807119664
+3883 1015 -0.3561529571137585
+3884 1015  0.4019295688857895
+3885 1015  0.05477280767251791
+3886 1015 -0.05419810230840651
+1016 1016  27.70275126489943
+1029 1016 -9.384104209729337
+1032 1016 -8.682516686085181
+1096 1016 -12.35396198776869
+1216 1016  2.258379669384247e-08
+1229 1016  0.4471220830932749
+1232 1016 -0.1931972031217511
+1296 1016 -0.191269335220411
+3887 1016  0.4440953835549432
+3888 1016 -0.4157020431777712
+3889 1016 -0.06641212160291675
+3890 1016  0.0583111945771093
+3891 1016 -0.4182071807007093
+3892 1016  0.3861971705223876
+3893 1016  0.05972800042777431
+3894 1016 -0.04705317091092858
+3895 1016 -0.2752074710610874
+3896 1016  0.2916024608025469
+3897 1016  0.03854659492344367
+3898 1016 -0.04164998212099323
+899 1017 -4.459108211401092
+957 1017 -13.12797495416852
+1017 1017  30.94442200079127
+1099 1017  0.07145809537069953
+1157 1017 -0.09826669971503904
+1217 1017  1.572755131684644e-08
+3179 1017  0.6965288550619865
+3180 1017 -0.7776202985712402
+3181 1017 -0.1084279560868184
+3182 1017  0.1225624806414849
+3547 1017  0.7708782074770815
+3548 1017 -0.769913863334036
+3549 1017 -0.09921702332192101
+3550 1017  0.1047517766322093
+915 1018 -2.838699038257353
+983 1018 -14.87328100877918
+1018 1018  30.10474762802639
+1115 1018  0.007667951331426184
+1183 1018 -0.03487708050989496
+1218 1018  8.149366514942358e-09
+3307 1018  1.009668228367086
+3308 1018 -1.192586743172628
+3309 1018 -0.1378864330242373
+3310 1018  0.1691007261817176
+3679 1018  0.6760566940874065
+3680 1018 -0.6799840124585417
+3681 1018 -0.1032841618434341
+3682 1018  0.1015717247366548
+912 1019 -10.40260563970382
+1019 1019  59.57679134339177
+1020 1019 -0.88143794557935
+1076 1019 -6.462935419745066
+1085 1019 -49.72163740391174
+1112 1019  0.1059721002756369
+1219 1019 -2.543621846484712e-08
+1220 1019 -0.2054867667943506
+1276 1019  0.06809256171239964
+1285 1019 -0.1159747915130676
+3275 1019  0.4852466322371797
+3276 1019 -0.4782088220647328
+3277 1019 -0.06914253723748093
+3278 1019  0.04078920488167646
+3899 1019  0.8564968076053606
+3900 1019 -0.8238314830349427
+3901 1019 -0.02279261177659806
+3902 1019 -0.01123831756390642
+3903 1019 -0.3737564443230481
+3904 1019  0.3951785216481155
+3905 1019  0.03431690856375876
+3906 1019 -0.0533470071697257
+3907 1019  0.5871755452865656
+3908 1019 -0.5881819817933323
+3909 1019 -0.06669039335604635
+3910 1019  0.0754972172149095
+944 1020 -10.41994192456261
+994 1020 -8.568402243341714
+1019 1020 -0.88143794557935
+1020 1020  23.73361771904704
+1144 1020 -1.072294506906227
+1194 1020  0.7790434228378447
+1219 1020  0.1511025029822383
+1220 1020 -5.254733564247216e-08
+3471 1020 -0.4310186947198901
+3472 1020  0.4315768639356127
+3473 1020  0.06203758932618613
+3474 1020 -0.06451921651788971
+3743 1020 -0.4616774221684724
+3744 1020  0.462607892946107
+3745 1020  0.05574582374646275
+3746 1020 -0.06210932882324032
+3899 1020 -1.237336533231109
+3900 1020  1.190146632344225
+3901 1020  0.03292730456029068
+3902 1020  0.0162354147387335
+996 1021 -36.15688796617417
+1021 1021  477.2594388157049
+1022 1021 -7.814753962990203
+1023 1021 -45.85566165593821
+1024 1021 -58.89853015420356
+1025 1021 -52.31118436254501
+1031 1021 -139.2864013622151
+1054 1021 -99.87961390971721
+1196 1021  0.1829371321651943
+1221 1021  1.693937379254606e-08
+1222 1021 -1.614266736074707
+1223 1021  6.278976277807047e-08
+1224 1021  2.808174576207634e-08
+1225 1021 -5.971258681958247e-09
+1231 1021  0.31496286658781
+1254 1021  0.5123209884216365
+3763 1021  0.8354279312537486
+3764 1021 -0.6922065283691701
+3765 1021 -0.115673073026255
+3766 1021  0.08747878062892703
+3911 1021  3.79240453316803
+3912 1021 -3.041422922448816
+3913 1021 -0.04566720801028241
+3914 1021  0.003740381880205069
+3915 1021  0.542397859204706
+3916 1021 -0.4575882069145712
+3917 1021 -0.008763702119294985
+3918 1021  0.02107167706267504
+3919 1021  0.9285201706573676
+3920 1021 -0.7197609326620245
+3921 1021  0.002697403111634132
+3922 1021  0.01651901814280531
+3923 1021  0.9936427272244395
+3924 1021 -0.8325011910731969
+3925 1021 -0.02313004556960633
+3926 1021  0.03005520669887832
+3927 1021 -0.8967784154793547
+3928 1021  0.8948534463356967
+3929 1021  0.1084484784869682
+3930 1021 -0.1046366912454179
+3931 1021  0.7922255118762921
+3932 1021 -0.7101965596901837
+3933 1021 -0.1118336807589077
+3934 1021  0.1007269057461446
+899 1022 -9.416903571706824
+907 1022 -5.018475605696647
+927 1022 -7.148869153419637
+1021 1022 -7.814753962990203
+1022 1022  55.1926800422341
+1091 1022 -7.179419233379014
+1099 1022 -0.04280741885147973
+1107 1022 -0.1107503777044012
+1127 1022 -0.2217680093153695
+1221 1022  0.9741805755638241
+1222 1022  1.79774599651239e-08
+1291 1022 -0.1615941136931558
+3183 1022 -0.5004641350120078
+3184 1022  0.5155283875242916
+3185 1022  0.07164048254012781
+3186 1022 -0.07860779169062282
+3235 1022  0.5174114692252539
+3236 1022 -0.51883851294131
+3237 1022 -0.07547435194461619
+3238 1022  0.06783840917537932
+3355 1022 -0.5210644672440067
+3356 1022  0.5652682652525904
+3357 1022  0.06787409290268773
+3358 1022 -0.07938255615129304
+3911 1022 -2.073696496349991
+3912 1022  1.663057831262543
+3913 1022  0.02497094611631533
+3914 1022 -0.002045250376681948
+3935 1022 -0.4907051457898425
+3936 1022  0.4784631094026429
+3937 1022  0.06351030020065121
+3938 1022 -0.06852574797351926
+1021 1023 -45.85566165593821
+1023 1023  397.4220503589642
+1221 1023 -1.302918114509666e-07
+1223 1023 -5.441864154853704e-07
+3915 1023 -4.70085606120625
+3916 1023  3.965827407882584
+3917 1023  0.07595329060940451
+3918 1023 -0.1826240999274897
+1021 1024 -58.89853015420356
+1024 1024  782.1131855642966
+1221 1024 -9.906925613201167e-08
+1224 1024 -3.728986932571843e-07
+3919 1024 -12.32981280576957
+3920 1024  9.557700354905545
+3921 1024 -0.03581879691919143
+3922 1024 -0.2193559255602421
+1021 1025 -52.31118436254501
+1025 1025  211.6269475335122
+1221 1025  2.090432824064514e-08
+1225 1025  2.415609212658154e-08
+3923 1025 -4.01982058245716
+3924 1025  3.367916184667294
+3925 1025  0.09357350555324298
+3926 1025 -0.1215895162194106
+953 1026 -6.201999031867833
+1000 1026 -9.164925564623724
+1010 1026 -27.0139401100094
+1026 1026  35.39663991841751
+1027 1026 -0.8927671556159681
+1031 1026 -6.462929117644935
+1153 1026  0.02710898025635339
+1200 1026  0.01932311150148924
+1210 1026 -0.03439042023821304
+1226 1026 -4.982309767878612e-09
+1227 1026 -0.3481508996189024
+1231 1026  0.0847976154813143
+3527 1026  0.4009167330545462
+3528 1026 -0.4120447041029439
+3529 1026 -0.04641687896666508
+3530 1026  0.05375367996741477
+3807 1026  0.4549416885390786
+3808 1026 -0.4669310599249826
+3809 1026 -0.04961048112152108
+3810 1026  0.05956197584346536
+3863 1026 -0.3918428911464509
+3864 1026  0.3919297432745195
+3865 1026  0.05815877671819319
+3866 1026 -0.05779382696712341
+3939 1026  0.6215604636346272
+3940 1026 -0.5365848376342145
+3941 1026 -0.02644511378818943
+3942 1026 -0.01313085696332133
+3943 1026 -0.3325996421497863
+3944 1026  0.3099939897897582
+3945 1026  0.03183450902391857
+3946 1026 -0.04857313406685036
+1005 1027 -6.595142687039579
+1026 1027 -0.8927671556159681
+1027 1027  12.18426972110153
+1205 1027 -0.254470154000132
+1226 1027  0.1585995473755764
+1227 1027 -1.642452673866757e-09
+3823 1027  0.46381160028816
+3824 1027 -0.4971288084268892
+3825 1027 -0.06522504456566502
+3826 1027  0.06474071332200976
+3939 1027 -0.9429319477336707
+3940 1027  0.8140205429349932
+3941 1027  0.04011828954905043
+3942 1027  0.01992003233190689
+1004 1028 -13.59416149407363
+1028 1028  24.48007177925223
+1042 1028 -4.766621258133744
+1204 1028 -0.1008233678779023
+1228 1028 -6.325417856167803e-09
+1242 1028  0.5302216625716982
+3815 1028 -1.021406661640207
+3816 1028  0.9471080725937661
+3817 1028  0.1015035621142007
+3818 1028 -0.1049332832589505
+3947 1028 -0.5322000356380353
+3948 1028  0.5237179831441521
+3949 1028  0.05306117946887205
+3950 1028 -0.0742486869524237
+907 1029 -8.245220727572519
+1016 1029 -9.384104209729337
+1029 1029  19.24209708384189
+1107 1029  0.1267923382078056
+1216 1029 -0.264491588907438
+1229 1029  2.632058404561377e-08
+3239 1029 -0.4783889678510902
+3240 1029  0.4152466067801181
+3241 1029  0.06401769328460785
+3242 1029 -0.05552343838215974
+3887 1029 -0.3331233226375472
+3888 1029  0.3118250064706282
+3889 1029  0.04981683537143853
+3890 1029 -0.04374019547106585
+941 1030 -5.832944089578626
+1015 1030 -6.75539661299262
+1030 1030  12.83836892907945
+1141 1030  0.5333079621201936
+1215 1030 -0.2961764240780974
+1230 1030  3.749403898467563e-08
+3443 1030  0.3346190606095454
+3444 1030 -0.3298503080348287
+3445 1030 -0.04214323686986587
+3446 1030  0.04770925997819617
+3879 1030  0.2815088383826861
+3880 1030 -0.3151578642820492
+3881 1030 -0.04750496067965254
+3882 1030  0.05328665197422941
+1014 1031 -43.51247883455607
+1021 1031 -139.2864013622151
+1026 1031 -6.462929117644935
+1031 1031  280.8731269168421
+1032 1031 -8.824003402905362
+1033 1031 -42.59535825308108
+1034 1031 -46.13299363078809
+1054 1031 -47.17922372343798
+1214 1031  0.006768474096792987
+1221 1031 -0.7334598799220151
+1226 1031 -0.2479349622596107
+1231 1031  4.101254072463867e-09
+1232 1031  1.10561328577018
+1233 1031 -9.484864810227478e-09
+1234 1031 -1.84660262358971e-08
+1254 1031 -0.3196677665324415
+3875 1031 -0.7100791253590051
+3876 1031  0.8615889655364799
+3877 1031  0.091546689209522
+3878 1031 -0.1215005459296716
+3927 1031  0.7422711571120613
+3928 1031 -0.7406778437037493
+3929 1031 -0.08976373229337341
+3930 1031  0.08660868342331542
+3943 1031  0.9054351523586399
+3944 1031 -0.8438958429460618
+3945 1031 -0.08666300222702368
+3946 1031  0.132230518229328
+3951 1031  2.25449865380728
+3952 1031 -2.145904330715092
+3953 1031 -0.03863010084770278
+3954 1031  0.02054020998672473
+3955 1031  1.009041934732004
+3956 1031 -0.9011892093493707
+3957 1031 -0.002184359699851513
+3958 1031  0.01072384218894925
+3959 1031  0.7970691740614322
+3960 1031 -0.6736858992600538
+3961 1031 -0.006563168427045199
+3962 1031  0.01247372164743909
+3963 1031  0.8761671906630912
+3964 1031 -0.7806222496203883
+3965 1031 -0.102634553712519
+3966 1031  0.08977398271040733
+958 1032 -15.91728652624134
+1016 1032 -8.682516686085181
+1031 1032 -8.824003402905362
+1032 1032  56.84740877124895
+1038 1032 -15.1191530189695
+1084 1032 -8.267859388385654
+1158 1032  0.0918804888517398
+1216 1032  0.3720024857585152
+1231 1032 -0.5154238049189607
+1232 1032  1.011402150918883e-08
+1238 1032  0.2460567017585993
+1284 1032  0.351545654975462
+3559 1032 -0.4013655946326785
+3560 1032  0.4629060824003016
+3561 1032  0.05250452962986646
+3562 1032 -0.06376479295806904
+3891 1032  0.5426439670912149
+3892 1032 -0.5011094365728938
+3893 1032 -0.0774999583800744
+3894 1032  0.06105375638109871
+3951 1032 -1.946310278292044
+3952 1032  1.852560722557531
+3953 1032  0.03334939331384015
+3954 1032 -0.01773237777185056
+3967 1032 -0.5245384263354095
+3968 1032  0.6080294260127852
+3969 1032  0.0667812926161708
+3970 1032 -0.08705433897040452
+3971 1032 -0.4795952455071584
+3972 1032  0.5234134383708403
+3973 1032  0.07931274417722842
+3974 1032 -0.08168946937534756
+1031 1033 -42.59535825308108
+1033 1033  524.0753677887301
+1231 1033  6.151957693267818e-09
+1233 1033  1.167101824073313e-07
+3955 1033 -12.4148271960868
+3956 1033  11.0878526649361
+3957 1033  0.02687544221336557
+3958 1033 -0.1319416399569855
+1031 1034 -46.13299363078809
+1034 1034  415.1968164899222
+1231 1034  1.048933193081858e-08
+1234 1034  1.662011153147347e-07
+3959 1034 -7.17362028563042
+3960 1034  6.063171165496134
+3961 1034  0.05906849706200258
+3962 1034 -0.1122634591316911
+901 1035 -10.38899270243397
+932 1035 -7.710670001509722
+1035 1035  33.66510213550209
+1101 1035 -0.007274595352445609
+1132 1035  0.02036700218743104
+1235 1035  4.701830855213807e-09
+3199 1035  0.7494701150025187
+3200 1035 -0.8018374523380369
+3201 1035 -0.1070673572020186
+3202 1035  0.1117136386944541
+3387 1035  0.6028365532304726
+3388 1035 -0.6418097572616985
+3389 1035 -0.08398219490530567
+3390 1035  0.09235767751344538
+1036 1036  26.97842583523464
+1037 1036 -5.943018521333083
+1093 1036 -30.91732538584464
+1236 1036 -6.866298751795341e-09
+1237 1036  0.1062797331368865
+1293 1036 -0.100220615586992
+3975 1036 -0.4189118125301785
+3976 1036  0.4218235797759384
+3977 1036  0.04658515728812977
+3978 1036 -0.06140384326059996
+3979 1036 -0.4549872682817689
+3980 1036  0.3870443013878095
+3981 1036  0.0673261986980111
+3982 1036 -0.05417247763812049
+957 1037 -13.92756187672575
+1036 1037 -5.943018521333083
+1037 1037  24.62019884646402
+1157 1037  0.0898090992568592
+1236 1037 -0.2219028422169306
+1237 1037  2.017315570146394e-08
+3551 1037  0.6178543307689458
+3552 1037 -0.6175404035074211
+3553 1037 -0.08767549225845923
+3554 1037  0.08262454568163802
+3975 1037  0.4166092968480921
+3976 1037 -0.4195050597952255
+3977 1037 -0.04632910565148451
+3978 1037  0.06106634188723461
+927 1038 -7.664837333066902
+1032 1038 -15.1191530189695
+1038 1038  39.98334126718024
+1127 1038  0.3372983249793092
+1232 1038 -0.375458076737654
+1238 1038  1.549589877247737e-08
+3359 1038  0.8486339951838073
+3360 1038 -0.9003890301240479
+3361 1038 -0.1158196454089177
+3362 1038  0.114557430269681
+3967 1038  0.6104971732174486
+3968 1038 -0.7076702624193901
+3969 1038 -0.07772507850532541
+3970 1038  0.1013203708049441
+907 1039 -6.271758063514705
+999 1039 -10.3806402044065
+1039 1039  43.2075847511725
+1046 1039 -7.161275667591624
+1091 1039 -13.50573421656583
+1107 1039 -0.006163960734533347
+1199 1039 -0.008049526074928537
+1239 1039  2.965038659041852e-09
+1246 1039 -0.007983550840506864
+1291 1039  0.01997349105916452
+3243 1039  0.6149993551148858
+3244 1039 -0.6331632046878009
+3245 1039 -0.08992480666432051
+3246 1039  0.09328847712876323
+3791 1039  0.6086144765908342
+3792 1039 -0.6265343550208405
+3793 1039 -0.09050667335377813
+3794 1039  0.09482024542077011
+3983 1039 -0.4164294722809435
+3984 1039  0.4690507958247839
+3985 1039  0.05876447905234403
+3986 1039 -0.07032064342926077
+3987 1039 -0.6338113374831589
+3988 1039  0.6019478457506698
+3989 1039  0.09481212065301986
+3990 1039 -0.08853178006291147
+984 1040 -12.33309367358089
+997 1040 -10.1426793564074
+1040 1040  20.96549843436591
+1184 1040  0.04085646412882799
+1197 1040 -0.1442546171938546
+1240 1040  2.41306286619114e-08
+3687 1040 -0.5600389881261721
+3688 1040  0.5004288683746699
+3689 1040  0.08495383117483674
+3690 1040 -0.07534953134862798
+3767 1040  0.3524182508441939
+3768 1040 -0.3512197152059827
+3769 1040 -0.04530287520055806
+3770 1040  0.05037591290722684
+978 1041 -7.549143391656187
+994 1041 -7.152105470943633
+1041 1041  15.16098608702285
+1178 1041  0.04340316309886361
+1194 1041 -0.03089942858856187
+1241 1041 -3.596899800228925e-08
+3651 1041  0.3530044280053762
+3652 1041 -0.3554519303210625
+3653 1041 -0.04936449382429299
+3654 1041  0.04476562781300211
+3747 1041  0.4156932976845179
+3748 1041 -0.4464578225348482
+3749 1041 -0.0518656621296367
+3750 1041  0.06749642593198793
+1028 1042 -4.766621258133744
+1042 1042  17.24046977368123
+1060 1042 -11.52161349893302
+1228 1042 -0.1053486690832382
+1242 1042  1.273314576000217e-08
+1260 1042  0.4108547346501815
+3947 1042  0.4525049260712891
+3948 1042 -0.4452930315210079
+3949 1042 -0.04511545186958153
+3950 1042  0.06313020358974136
+3991 1042  0.4124828656886404
+3992 1042 -0.4139428557132467
+3993 1042 -0.05662335010049897
+3994 1042  0.05291302942259243
+1043 1043  26.94614514240989
+1074 1043 -11.99756440152056
+1083 1043 -13.17400491990235
+1243 1043  1.224810661226705e-08
+1274 1043 -0.005509351388796824
+1283 1043  0.03805173458733085
+3995 1043 -0.6664131668293063
+3996 1043  0.6677275648607324
+3997 1043  0.09282550509358571
+3998 1043 -0.09027313903964421
+3999 1043  0.3915613265931385
+4000 1043 -0.3914197776820793
+4001 1043 -0.05174692126183886
+4002 1043  0.05322704516850629
+981 1044 -10.74205319605701
+1044 1044  18.01491297364265
+1045 1044 -2.262013003597125
+1075 1044 -5.757720128565946
+1181 1044 -0.1158402426905038
+1244 1044 -3.695717287754974e-09
+1245 1044  6.84975660636411e-08
+1275 1044  0.1686051519968662
+3667 1044  0.5728450058803592
+3668 1044 -0.5777458579020762
+3669 1044 -0.08080789765245955
+3670 1044  0.07247811272512387
+4003 1044  0.820611639782226
+4004 1044 -0.5250188060331279
+4005 1044  0.0120946820032662
+4006 1044  0.02838947709364594
+4007 1044 -0.4669117262552038
+4008 1044  0.4635324724996192
+4009 1044  0.05787300059726924
+4010 1044 -0.06374868700680204
+1044 1045 -2.262013003597125
+1045 1045  5.646993614976262
+1244 1045 -4.7148643572692e-08
+1245 1045 -1.710001454879873e-07
+4003 1045 -2.048612698416842
+4004 1045  1.310681131981685
+4005 1045 -0.03019372128547943
+4006 1045 -0.0708727983567128
+905 1046 -4.662192987407709
+1039 1046 -7.161275667591624
+1046 1046  25.15477332707293
+1105 1046 -0.01117562264376104
+1239 1046  0.002037455724496269
+1246 1046  3.557195038528249e-08
+3227 1046  0.5273830925356003
+3228 1046 -0.5884765052096235
+3229 1046 -0.07452450160925486
+3230 1046  0.08652728305245758
+3983 1046  0.6723063436140674
+3984 1046 -0.7572610645998691
+3985 1046 -0.09487256468584683
+3986 1046  0.1135294637182152
+912 1047 -39.60174281254437
+985 1047 -169.6380530198015
+1047 1047  1000.441707186239
+1048 1047 -15.57940504488523
+1049 1047 -22.19037263188586
+1050 1047 -46.2485822087331
+1051 1047 -42.86814773331403
+1052 1047 -77.31827340468764
+1053 1047 -72.65693273182117
+1112 1047 -0.03239710441769657
+1185 1047 -0.2047571376587071
+1247 1047 -1.997244994389469e-08
+1248 1047  0.340419502366803
+1249 1047  2.580894044174054e-07
+1250 1047  5.987070406110462e-07
+1251 1047  8.733743955269269e-07
+1252 1047  1.117970844788374e-07
+1253 1047  1.182548258066296e-07
+3279 1047  1.094320306063321
+3280 1047 -0.9528532431520396
+3281 1047 -0.1180732624206324
+3282 1047  0.1398747315022498
+3699 1047 -1.20025775658178
+3700 1047  1.453850177649487
+3701 1047  0.1525851828618167
+3702 1047 -0.1919032731754043
+4011 1047  2.877901742353491
+4012 1047 -2.638744501957641
+4013 1047 -0.06336630111296625
+4014 1047  0.02360881028802457
+4015 1047  0.8927534376223362
+4016 1047 -0.7104217187916017
+4017 1047 -0.01054317113570646
+4018 1047  0.02726814578945946
+4019 1047  1.086728699880724
+4020 1047 -0.8503392225516677
+4021 1047  0.003251598109236279
+4022 1047  0.0163341792817986
+4023 1047  0.6671056579552171
+4024 1047 -0.5558849024538212
+4025 1047 -0.008293849473066403
+4026 1047  0.01844642734702238
+4027 1047  1.05305624994934
+4028 1047 -0.8748469326863306
+4029 1047 -0.01413376895939106
+4030 1047  0.02460560601569959
+4031 1047  1.037339225236382
+4032 1047 -0.8610429361225128
+4033 1047 -0.01520700997590703
+4034 1047  0.0250006050735233
+986 1048 -14.33758586199391
+1047 1048 -15.57940504488523
+1048 1048  48.49824758137784
+1083 1048 -8.299861792616442
+1092 1048 -19.07696695293404
+1186 1048  0.08253558234079712
+1247 1048 -0.3912441653266929
+1248 1048  3.182346067220898e-09
+1283 1048 -0.02865248553472413
+1292 1048  0.008367416410551054
+3703 1048  0.4304029276676956
+3704 1048 -0.4306648046310764
+3705 1048 -0.06312918864769614
+3706 1048  0.06048211658000992
+4011 1048 -0.6882177177695019
+4012 1048  0.6310259631828135
+4013 1048  0.01515333567288418
+4014 1048 -0.005645780499229315
+4035 1048  0.3107249264599946
+4036 1048 -0.2786413244623387
+4037 1048 -0.04425978383985179
+4038 1048  0.03641964444494807
+4039 1048  0.6251677208402014
+4040 1048 -0.5974098867295464
+4041 1048 -0.08455623381433652
+4042 1048  0.07599027364771335
+1047 1049 -22.19037263188586
+1049 1049  132.9683527596321
+1247 1049 -1.614056271553865e-06
+1249 1049 -1.546513956562912e-06
+4015 1049 -5.349524285735199
+4016 1049  4.256962871978451
+4017 1049  0.06317640197425206
+4018 1049 -0.1633951794307062
+1047 1050 -46.2485822087331
+1050 1050  266.4605453149341
+1247 1050 -1.207106466394325e-06
+1250 1050 -3.449440251657165e-06
+4019 1050 -6.261171463865746
+4020 1050  4.899216957673652
+4021 1050 -0.01873403481084517
+4022 1050 -0.09410913433692274
+1047 1051 -42.86814773331403
+1051 1051  177.7971977309233
+1247 1051 -8.74091834635049e-07
+1251 1051 -3.622352877430401e-06
+4023 1051 -2.766844775492583
+4024 1051  2.305552680881037
+4025 1051  0.03439903980669907
+4026 1051 -0.07650722269100975
+1047 1052 -77.31827340468764
+1052 1052  395.1349021923479
+1247 1052 -8.476012625102669e-07
+1252 1052 -5.713398701923111e-07
+4027 1052 -5.381641954276575
+4028 1052  4.470903578741806
+4029 1052  0.07223059927479664
+4030 1052 -0.1257469025523179
+1047 1053 -72.65693273182117
+1053 1053  328.3425339190333
+1247 1053 -7.2243239568337e-07
+1253 1053 -5.344005542529118e-07
+4031 1053 -4.687819398588907
+4032 1053  3.891122287459317
+4033 1053  0.0687217012769834
+4034 1053 -0.1129797452838212
+1021 1054 -99.87961390971721
+1031 1054 -47.17922372343798
+1054 1054  72.0796547970898
+1055 1054 -2.286121675510262
+1221 1054 -0.2261997465922666
+1231 1054  0.06060825833018181
+1254 1054  7.969850475105303e-09
+1255 1054  0.03272045797426546
+3931 1054 -0.360035618481635
+3932 1054  0.3227566567579966
+3933 1054  0.0508240492328381
+3934 1054 -0.04577645287156246
+3963 1054 -0.4810676928389651
+3964 1054  0.4286078600129237
+3965 1054  0.05635245017868282
+3966 1054 -0.04929123482331746
+4043 1054  1.092095516810633
+4044 1054 -1.011277354339053
+4045 1054 -0.01473140339016458
+4046 1054  0.004662490664314871
+973 1055 -88.2008696096471
+1054 1055 -2.286121675510262
+1055 1055  45.44517147063204
+1173 1055  0.7232007277457907
+1254 1055 -0.1671083834788179
+1255 1055  2.11014470497678e-08
+3643 1055  0.3852188693949516
+3644 1055 -0.4738147012535258
+3645 1055 -0.05423004030019284
+3646 1055  0.05822173646792159
+4043 1055 -3.184217886071492
+4044 1055  2.948576740677008
+4045 1055  0.04295228525329622
+4046 1055 -0.01359440263092582
+909 1056 -7.969315195327064
+920 1056 -5.122575816198524
+925 1056 -6.006365689718882
+964 1056 -81.80499628897144
+1056 1056  74.09591338197826
+1109 1056 -0.1501368826175595
+1120 1056  0.002369883025423705
+1125 1056  0.037823480857684
+1164 1056  0.4594619270121967
+1256 1056 -1.882256489560419e-09
+3267 1056 -0.6676477309983397
+3268 1056  0.6513753777654052
+3269 1056  0.0953847309503813
+3270 1056 -0.08637788346499958
+3323 1056 -0.6868141551464079
+3324 1056  0.6860999709089316
+3325 1056  0.09529957797094311
+3326 1056 -0.09578629824956811
+3347 1056  0.3769085362086894
+3348 1056 -0.3784583248613025
+3349 1056 -0.05640185893113407
+3350 1056  0.05659745592016639
+3615 1056  0.3580752890249853
+3616 1056 -0.3991541488639627
+3617 1056 -0.05661263921413731
+3618 1056  0.06206550983734454
+937 1057 -5.244381887558792
+955 1057 -15.07976193972747
+960 1057 -13.87451822624953
+1057 1057  32.10677134460157
+1137 1057  0.03229296680959662
+1155 1057  0.002925901806241575
+1160 1057 -0.008922953284883881
+1257 1057  1.930360118973518e-08
+3415 1057 -0.4783279591317424
+3416 1057  0.4811627526763632
+3417 1057  0.06778247851274402
+3418 1057 -0.06941991168041609
+3535 1057  0.6196319089697585
+3536 1057 -0.5748703939518289
+3537 1057 -0.07979007268887017
+3538 1057  0.07684746029948268
+3567 1057 -0.4844075229863823
+3568 1057  0.5172246467921727
+3569 1057  0.08534035623263046
+3570 1057 -0.09171746472834619
+960 1058 -31.06353440087113
+1058 1058  131.8444500211344
+1059 1058 -26.45490214119199
+1079 1058 -74.90820044607649
+1160 1058  0.01834072349012509
+1258 1058  6.873820179720269e-08
+1259 1058 -3.722531947414343e-07
+1279 1058 -0.1647810909296956
+3571 1058 -0.8683349920286506
+3572 1058  0.9349938344358532
+3573 1058  0.1196980307182942
+3574 1058 -0.1356037741743049
+4047 1058  0.362575344758596
+4048 1058 -0.3636201727245672
+4049 1058 -0.01098342848397128
+4050 1058  0.00945573560770001
+4051 1058 -0.5655683563282903
+4052 1058  0.5370271536576061
+4053 1058  0.08118838221113756
+4054 1058 -0.07286991856067998
+1058 1059 -26.45490214119199
+1059 1059  187.1201776987702
+1258 1059  5.031070656746905e-07
+1259 1059  2.632996500162577e-06
+4047 1059 -2.564559113532273
+4048 1059  2.571949365299086
+4049 1059  0.07768771932121485
+4050 1059 -0.06688207921038947
+1015 1060 -5.675171991154909
+1042 1060 -11.52161349893302
+1060 1060  15.07645902237852
+1215 1060  0.2282308445688311
+1242 1060 -0.3938694762289616
+1260 1060  1.649276359927399e-08
+3883 1060  0.3144339221536311
+3884 1060 -0.3548483544779605
+3885 1060 -0.04835683208530907
+3886 1060  0.0478494465417923
+3991 1060 -0.3603730844290743
+3992 1060  0.3616486310085054
+3993 1060  0.0494700096023555
+3994 1060 -0.04622842111919301
+939 1061 -24.9290464405827
+1009 1061 -23.5793179741662
+1061 1061  183.7019110854502
+1062 1061 -11.61594215474339
+1063 1061 -9.65776140469529
+1064 1061 -4.512938119937939
+1065 1061 -7.133207631641115
+1066 1061 -4.127537850880238
+1067 1061 -4.342035056604149
+1068 1061 -5.255281007971666
+1077 1061 -18.07649086887797
+1139 1061 -0.2141845297361449
+1209 1061  0.05851589523119582
+1261 1061  4.807374498483341e-10
+1262 1061 -2.092441497847375e-11
+1263 1061 -1.407063632274941e-09
+1264 1061 -1.250616929460158e-09
+1265 1061 -3.967674799820742e-11
+1266 1061 -9.292320871101545e-10
+1267 1061 -1.273698507775478e-09
+1268 1061 -3.714351916317415e-10
+1277 1061  0.2765729375303243
+3423 1061  0.7029356993730441
+3424 1061 -0.5851621559330497
+3425 1061 -0.1074364103798295
+3426 1061  0.09331292568385514
+3843 1061 -0.504600061009487
+3844 1061  0.6452289446177003
+3845 1061  0.08278506503087066
+3846 1061 -0.1024920224347914
+4055 1061  0.5513927020288057
+4056 1061 -0.4572667382619597
+4057 1061 -0.07133191883687945
+4058 1061  0.04954073695663612
+4059 1061  0.274604206069199
+4060 1061 -0.234511292387475
+4061 1061 -0.01159007117619253
+4062 1061  0.008422272802529579
+4063 1061  0.3315414677749981
+4064 1061 -0.2782406077266243
+4065 1061 -0.02506043554827784
+4066 1061  0.01732501370911077
+4067 1061  0.4722832634265456
+4068 1061 -0.3888500715901649
+4069 1061 -0.04435831718922839
+4070 1061  0.03090230780246723
+4071 1061  0.3748533853638924
+4072 1061 -0.3164800245438473
+4073 1061 -0.02428214981556918
+4074 1061  0.01674951147636541
+4075 1061  0.3334711887748352
+4076 1061 -0.2799242389758857
+4077 1061 -0.02430205065482175
+4078 1061  0.01679417334838154
+4079 1061  0.428348930387738
+4080 1061 -0.35912875888719
+4081 1061 -0.03299211467792259
+4082 1061  0.02279291647912941
+4083 1061 -0.4788760357050242
+4084 1061  0.5819307001253576
+4085 1061  0.07100846120402798
+4086 1061 -0.07203379071050602
+1061 1062 -11.61594215474339
+1062 1062  18.18451598011107
+1261 1062  1.003843119740111e-10
+1262 1062  3.275554827375515e-11
+4055 1062 -0.8631938020705067
+4056 1062  0.7158415642942259
+4057 1062  0.1116686347193875
+4058 1062 -0.07755499290003219
+1061 1063 -9.65776140469529
+1063 1063  95.13079222366765
+1261 1063  2.761413792073952e-09
+1263 1063  1.386103698486352e-08
+4059 1063 -2.704903553672343
+4060 1063  2.309980743686521
+4061 1063  0.1141644010503557
+4062 1063 -0.0829609857753576
+1061 1064 -4.512938119937939
+1064 1064  15.28762986286976
+1261 1064  1.715662265033835e-09
+1264 1064  4.236453410300101e-09
+4063 1064 -1.123100460933939
+4064 1064  0.9425431964377532
+4065 1064  0.08489250803032906
+4066 1064 -0.05868867931656194
+1061 1065 -7.133207631641115
+1065 1065  15.99112376065647
+1261 1065  1.207292321669939e-10
+1265 1065  8.890274627582073e-11
+4067 1065 -1.058757894121426
+4068 1065  0.8717185528421763
+4069 1065  0.0994418437640489
+4070 1065 -0.06927635354904116
+1061 1066 -4.127537850880238
+1066 1066  19.0022305732375
+1261 1066  3.51464403414159e-09
+1266 1066  4.277776494143737e-09
+4071 1066 -1.72573819152988
+4072 1066  1.457000754258734
+4073 1066  0.1117893953885428
+4074 1066 -0.07711087260468982
+1061 1067 -4.342035056604149
+1067 1067  15.19757594372949
+1261 1067  1.771025409658122e-09
+1267 1067  4.457897107013054e-09
+4075 1067 -1.16718388103411
+4076 1067  0.9797639815414531
+4077 1067  0.08505970757052372
+4078 1067 -0.05878135529351257
+1061 1068 -5.255281007971666
+1068 1068  19.98370377461157
+1261 1068  1.926837966959027e-09
+1268 1068  1.412364641906194e-09
+4079 1068 -1.628837266086651
+4080 1068  1.365621025992523
+4081 1068  0.1254556322243195
+4082 1068 -0.08667221774173783
+1069 1069  20.24465568779704
+1088 1069 -10.48556641561825
+1093 1069 -16.36333172631541
+1269 1069 -5.449000251012137e-09
+1288 1069  0.1214000777347138
+1293 1069 -0.1119597125414634
+4087 1069 -0.4136141792640044
+4088 1069  0.4131220203587079
+4089 1069  0.05643659927341725
+4090 1069 -0.05836392563537035
+4091 1069  0.3288070523355396
+4092 1069 -0.3857623389865421
+4093 1069 -0.0437463201053321
+4094 1069  0.05446492613488767
+1070 1070  31.68167767158849
+1078 1070 -11.61846388882405
+1097 1070 -8.796687645874714
+1270 1070 -9.370967335442515e-10
+1278 1070  0.04152432414694895
+1297 1070 -0.03625623505669459
+4095 1070  0.7713662868604313
+4096 1070 -0.8634917741294119
+4097 1070 -0.1070003826683109
+4098 1070  0.1197679507899034
+4099 1070  0.807466687308941
+4100 1070 -0.8588799767622773
+4101 1070 -0.10710356231765
+4102 1070  0.1209694588614057
+1071 1071  20.11093433821907
+1072 1071 -7.513903054786311
+1073 1071 -12.14811724181201
+1271 1071 -2.899300410863503e-09
+1272 1071  0.06443650349717406
+1273 1071 -0.01468942424715695
+4103 1071 -0.4147446507941761
+4104 1071  0.3940753934243332
+4105 1071  0.05628299145492302
+4106 1071 -0.04656009377371514
+4107 1071 -0.6085690165184504
+4108 1071  0.6054677581871165
+4109 1071  0.08603670609830409
+4110 1071 -0.08930907686990948
+998 1072 -7.492541902601887
+1071 1072 -7.513903054786311
+1072 1072  23.60392256309562
+1086 1072 -5.263261695423801
+1198 1072 -0.1461852922562984
+1271 1072 -0.0430300784938554
+1272 1072 -1.591542694856507e-08
+1286 1072  0.2078468422749022
+3771 1072  0.4751085479629472
+3772 1072 -0.5169167417147753
+3773 1072 -0.0626029018629746
+3774 1072  0.05942268960851749
+4103 1072  0.433000632670903
+4104 1072 -0.411421568297578
+4105 1072 -0.05876042249592989
+4106 1072  0.048609548122274
+4111 1072 -0.4656525838783859
+4112 1072  0.4465883401929728
+4113 1072  0.06308701112702084
+4114 1072 -0.0435235817211751
+1009 1073 -8.20501197638737
+1071 1073 -12.14811724181201
+1073 1073  22.07544969633836
+1209 1073 -0.03549214591889813
+1271 1073  0.03877201488615256
+1273 1073 -2.24825810746232e-10
+3847 1073  0.6058903557215245
+3848 1073 -0.608164323859633
+3849 1073 -0.08865078486777231
+3850 1073  0.08735047797722066
+4107 1073  0.5724845587882514
+4108 1073 -0.5695671862975206
+4109 1073 -0.08093525038797107
+4110 1073  0.08401358938736914
+986 1074 -21.79919967537484
+1043 1074 -11.99756440152056
+1074 1074  33.72125780485909
+1186 1074 -0.009545633175415319
+1243 1074  0.00970013728400089
+1274 1074  3.551176783189725e-09
+3707 1074 -0.6107864907888784
+3708 1074  0.5465638824173062
+3709 1074  0.09010990126768369
+3710 1074 -0.07776569511906935
+3995 1074  0.5285974251080995
+3996 1074 -0.5296400026704317
+3997 1074 -0.07362898186763496
+3998 1074  0.07160445085414079
+929 1075 -2.942416412227689
+956 1075 -4.711059498030628
+1044 1075 -5.757720128565946
+1075 1075  19.0263421536609
+1129 1075  0.06750505025100521
+1156 1075 -0.1018754816584715
+1244 1075 -0.1463500416425706
+1275 1075 -1.330449639347364e-10
+3371 1075 -0.7320220831026034
+3372 1075  0.7006361055927417
+3373 1075  0.09337077568253691
+3374 1075 -0.09342120143663683
+3543 1075 -0.3540923641672868
+3544 1075  0.3539682057779985
+3545 1075  0.05033110776786022
+3546 1075 -0.04941476276368469
+4007 1075  0.4930004778176136
+4008 1075 -0.4894324078324542
+4009 1075 -0.06110666180098917
+4010 1075  0.06731065292933117
+979 1076 -10.4263159895623
+1019 1076 -6.462935419745066
+1076 1076  19.29387509016767
+1077 1076 -1.287538838117173
+1179 1076  0.1503132880271155
+1219 1076 -0.04422968677103412
+1276 1076 -2.449253198866241e-08
+1277 1076 -0.2432983530399724
+3659 1076  0.3962293587228495
+3660 1076 -0.3977027851991092
+3661 1076 -0.05369438553050169
+3662 1076  0.05269364988891542
+3903 1076  0.4347352342689453
+3904 1076 -0.4596523479291749
+3905 1076 -0.03991575131466345
+3906 1076  0.06205063220109317
+4115 1076  0.9163581866371621
+4116 1076 -0.8984979081539249
+4117 1076 -0.02453437618813773
+4118 1076 -0.01320838662255178
+998 1077 -7.355302891262144
+1061 1077 -18.07649086887797
+1076 1077 -1.287538838117173
+1077 1077  15.15581672854775
+1198 1077 -0.02201288531576436
+1261 1077 -0.154517529197304
+1276 1077  0.132345738765169
+1277 1077 -1.015200618015299e-08
+3775 1077  0.345354821535383
+3776 1077 -0.3740788277596528
+3777 1077 -0.04659273070930623
+3778 1077  0.04620716091210701
+4083 1077  0.2096383898392737
+4084 1077 -0.254752808402104
+4085 1077 -0.03108549679221456
+4086 1077  0.03153435706244356
+4115 1077 -0.7265339828613597
+4116 1077  0.7123734728657446
+4117 1077  0.01945206395154361
+4118 1077  0.01047226060725421
+939 1078 -6.556163260226376
+1070 1078 -11.61846388882405
+1078 1078  36.37733995725011
+1089 1078 -8.29609625260256
+1097 1078 -11.04287310642648
+1139 1078  0.250244042122215
+1270 1078 -0.006868975754008619
+1278 1078 -1.724586362605862e-09
+1289 1078 -0.04556750232906767
+1297 1078 -0.08433753082497672
+3427 1078  0.553519904683873
+3428 1078 -0.5276837209948572
+3429 1078 -0.0719937238121951
+3430 1078  0.06610708326760587
+4095 1078 -0.4916143473982249
+4096 1078  0.5503286211148283
+4097 1078  0.06819448061561421
+4098 1078 -0.07633162606373356
+4119 1078 -0.4124268531628535
+4120 1078  0.4598141657371973
+4121 1078  0.05899348825775656
+4122 1078 -0.0683561503822409
+4123 1078 -0.4682412747380797
+4124 1078  0.4773702028337876
+4125 1078  0.05326867823029235
+4126 1078 -0.05561345056860315
+940 1079 -24.66315021333685
+1058 1079 -74.90820044607649
+1079 1079  182.7426263207641
+1080 1079 -17.55834826047913
+1081 1079 -24.84175659400302
+1092 1079 -29.79209744771001
+1140 1079 -0.1493139770588935
+1258 1079  0.2566034054509343
+1279 1079  2.904484364130155e-08
+1280 1079  2.892989577618632e-07
+1281 1079  5.602588400155373e-09
+1292 1079 -0.01760425743209124
+3435 1079 -0.477692739132276
+3436 1079  0.5464386534076658
+3437 1079  0.06242579865530338
+3438 1079 -0.07560356709805922
+4051 1079  0.5099376587097865
+4052 1079 -0.4842038390860414
+4053 1079 -0.0732024928126479
+4054 1079  0.06570225375133355
+4127 1079  0.4622771875668804
+4128 1079 -0.3713550248720283
+4129 1079  0.004661810659483987
+4130 1079  0.01681848484248474
+4131 1079  0.8099385613394385
+4132 1079 -0.6777265383675919
+4133 1079 -0.01559842054359869
+4134 1079  0.03070862867577365
+4135 1079 -0.8293599276847743
+4136 1079  0.8908658230580816
+4137 1079  0.1208149423257971
+4138 1079 -0.1233504357896458
+1079 1080 -17.55834826047913
+1080 1080  176.8848810293818
+1279 1080 -3.091812144639938e-07
+1280 1080 -2.914429613198877e-06
+4127 1080 -4.657034860973241
+4128 1080  3.741074279977133
+4129 1080 -0.04696362991810545
+4130 1080 -0.1694313981454469
+1079 1081 -24.84175659400302
+1081 1081  124.2521515692489
+1279 1081 -2.020746911468585e-08
+1281 1081 -2.80225902482556e-08
+4131 1081 -4.051106699452466
+4132 1081  3.389815784838334
+4133 1081  0.07801933255350572
+4134 1081 -0.1535967507877281
+961 1082 -14.41765509210598
+998 1082 -9.332991521575286
+1082 1082  32.78546401598518
+1161 1082 -0.4707524526938228
+1198 1082  0.1639576572033569
+1282 1082  3.424985434152816e-10
+3579 1082  0.5556174516419152
+3580 1082 -0.5572302679600371
+3581 1082 -0.1077840611317312
+3582 1082  0.1075931432984304
+3779 1082  0.903604725385467
+3780 1082 -1.025075116496526
+3781 1082 -0.1192040010499577
+3782 1082  0.1454178910812212
+1043 1083 -13.17400491990235
+1048 1083 -8.299861792616442
+1083 1083  26.96703358297778
+1243 1083 -0.01362937325158806
+1248 1083  0.01227067299807493
+1283 1083  3.583140154028719e-08
+3999 1083 -0.4363940642551595
+4000 1083  0.436236308367658
+4001 1083  0.05767180706691709
+4002 1083 -0.05932140125143925
+4035 1083 -0.459904502442264
+4036 1083  0.412417507493414
+4037 1083  0.06550898280666229
+4038 1083 -0.0539047788936704
+981 1084 -8.173102293481225
+1007 1084 -11.96605332802606
+1032 1084 -8.267859388385654
+1084 1084  26.48660956080935
+1181 1084  0.216604980095333
+1207 1084 -0.01534246547224594
+1232 1084 -0.2753545287150571
+1284 1084  5.059544422314488e-09
+3671 1084  0.5627128757731583
+3672 1084 -0.556821153672813
+3673 1084 -0.07429162698182507
+3674 1084  0.08167611720054455
+3835 1084 -0.5533622237789459
+3836 1084  0.5941679188478999
+3837 1084  0.06652747753817977
+3838 1084 -0.07463262812556315
+3971 1084  0.3788236665372302
+3972 1084 -0.4134348697073162
+3973 1084 -0.06264771144796216
+3974 1084  0.0645250439743737
+943 1085 -11.05951146044936
+1019 1085 -49.72163740391174
+1085 1085  86.55876775412223
+1086 1085 -2.518767549983291
+1087 1085 -97.81167626070551
+1143 1085  0.5739644134156838
+1219 1085  0.1034698694358651
+1285 1085 -3.796000963562918e-08
+1286 1085 -0.5565646529232413
+1287 1085  1.047611239979318e-07
+3463 1085  0.5218813327191538
+3464 1085 -0.5203549498394896
+3465 1085 -0.0741452235963624
+3466 1085  0.05439035871006938
+3907 1085 -0.6299407955834422
+3908 1085  0.6310205330126762
+3909 1085  0.07154759728281507
+3910 1085 -0.0809958409515938
+4139 1085  0.7709461549449258
+4140 1085 -0.7763504480927439
+4141 1085 -0.04052335874533514
+4142 1085 -0.01693145434596672
+4143 1085  1.152193173408145
+4144 1085 -0.9933793593885633
+4145 1085 -0.003300648816836982
+4146 1085  0.02739797688598089
+987 1086 -23.02209569566427
+994 1086 -8.292682942672654
+1072 1086 -5.263261695423801
+1085 1086 -2.518767549983291
+1086 1086  27.36856052624786
+1187 1086 -0.1321988592372929
+1194 1086 -0.644560541293882
+1272 1086 -0.4068214560171308
+1285 1086  0.7958833775240365
+1286 1086 -1.977140486920936e-08
+3731 1086 -0.379859058350982
+3732 1086  0.3634993303815826
+3733 1086  0.04595895354819852
+3734 1086 -0.05892039757983346
+3751 1086  0.4552127889008838
+3752 1086 -0.4363998945032376
+3753 1086 -0.0671242009178893
+3754 1086  0.06139143428489695
+4111 1086  0.4075275464698439
+4112 1086 -0.3908429951038652
+4113 1086 -0.05521218124589022
+4114 1086  0.03809075496731831
+4139 1086 -0.8918227216379703
+4140 1086  0.8980743533410442
+4141 1086  0.04687701190851433
+4142 1086  0.01958613529525647
+1085 1087 -97.81167626070551
+1087 1087  651.8794668163698
+1285 1087 -3.395976211351126e-07
+1287 1087 -6.981968381225556e-07
+4143 1087 -7.678951010732729
+4144 1087  6.620514347654194
+4145 1087  0.02199763125930786
+4146 1087 -0.1825976122374652
+972 1088 -10.63078006025426
+1069 1088 -10.48556641561825
+1088 1088  20.79952414874093
+1172 1088  0.03701780828857204
+1269 1088 -0.1955389194213142
+1288 1088 -2.311546232203199e-10
+3619 1088  0.6011938633446567
+3620 1088 -0.6007023401756713
+3621 1088 -0.07474159031252897
+3622 1088  0.07585447780115982
+4087 1088  0.3890882961019005
+4088 1088 -0.3886253205090088
+4089 1088 -0.0530901051026678
+4090 1088  0.05490314771048892
+991 1089 -27.15561296572336
+1078 1089 -8.29609625260256
+1089 1089  38.23978412799049
+1191 1089 -0.4918304040039776
+1278 1089  0.2708866518362061
+1289 1089 -1.285860098954039e-10
+3739 1089  0.62780002806294
+3740 1089 -0.702905468036533
+3741 1089 -0.09477205745752662
+3742 1089  0.1101427118805583
+4119 1089  0.7972203259666385
+4120 1089 -0.8888199114143011
+4121 1089 -0.1140343010598945
+4122 1089  0.1321323092122628
+929 1090 -1.959100094899907
+1005 1090 -6.636577325017829
+1011 1090 -6.618357553588883
+1090 1090  16.23932328775514
+1129 1090 -0.04174380521665495
+1205 1090  0.1321958859920611
+1211 1090 -0.008999365508307933
+1290 1090  1.863167620186346e-09
+3375 1090 -0.5115751971165835
+3376 1090  0.4875763642175987
+3377 1090  0.06400112844259723
+3378 1090 -0.05976136187915728
+3827 1090 -0.4083683463701124
+3828 1090  0.4091049471507078
+3829 1090  0.05899578318349421
+3830 1090 -0.05859965444633063
+3867 1090  0.2940184468249131
+3868 1090 -0.3067126001924134
+3869 1090 -0.04130392701536499
+3870 1090  0.04614176586870518
+899 1091 -7.681384228215313
+907 1091 -6.778441910054136
+984 1091 -5.509137579435532
+1008 1091 -9.148268563932652
+1022 1091 -7.179419233379014
+1039 1091 -13.50573421656583
+1091 1091  47.22653650499972
+1099 1091 -0.00182375950404709
+1107 1091 -0.02996950125612036
+1184 1091 -0.01283241605396136
+1208 1091 -0.004170643736380167
+1222 1091  0.1579025131440057
+1239 1091 -0.004453089740284022
+1291 1091  9.598412509248533e-09
+3187 1091 -0.378532286868665
+3188 1091  0.3999698727527718
+3189 1091  0.05617338423090624
+3190 1091 -0.0575621944201822
+3247 1091  0.4323552440504631
+3248 1091 -0.4226868393937711
+3249 1091 -0.05745261880975507
+3250 1091  0.05562057947383459
+3691 1091  0.4402806087748638
+3692 1091 -0.4181873107360542
+3693 1091 -0.06554270780152668
+3694 1091  0.06074566431115451
+3839 1091  0.5759940121176559
+3840 1091 -0.4896888030185261
+3841 1091 -0.08806669474583405
+3842 1091  0.0737280609488582
+3935 1091  0.3034672176079867
+3936 1091 -0.2958963642102718
+3937 1091 -0.03927673116269679
+3938 1091  0.04237843896777917
+3987 1091  0.5483159810343683
+3988 1091 -0.5207505831071904
+3989 1091 -0.08202283215104343
+3990 1091  0.07658965210480174
+986 1092 -13.33511322562891
+1048 1092 -19.07696695293404
+1079 1092 -29.79209744771001
+1092 1092  38.33403487905078
+1186 1092 -0.0009017246897470577
+1248 1092 -0.01125287282250595
+1279 1092  0.02154089767802106
+1292 1092  2.545271146625616e-09
+3711 1092  0.4137426206676649
+3712 1092 -0.432808164560258
+3713 1092 -0.05546586352942447
+3714 1092  0.05907190615621608
+4039 1092 -0.4958500939596942
+4040 1092  0.4738340425976885
+4041 1092  0.06706554910635512
+4042 1092 -0.06027148087172729
+4135 1092  0.3355805184553645
+4136 1092 -0.3604673975635156
+4137 1092 -0.04888485641695448
+4138 1092  0.04991078277622987
+980 1093 -50.78984921359537
+1036 1093 -30.91732538584464
+1069 1093 -16.36333172631541
+1093 1093  182.6407715266371
+1094 1093 -65.45466744535615
+1095 1093 -16.9631615475301
+1180 1093 -0.1724709079791441
+1236 1093  0.3284627932841515
+1269 1093  0.3738519183151615
+1293 1093 -1.493529512737457e-08
+1294 1093  3.664150378945852e-09
+1295 1093  5.14547137606125e-08
+3663 1093  1.112658784433996
+3664 1093 -0.9424729144772576
+3665 1093 -0.1503380342921753
+3666 1093  0.1402957488124907
+3979 1093  0.6697749461861511
+3980 1093 -0.5697578684182797
+3981 1093 -0.09910914931789394
+3982 1093  0.07974589801570604
+4091 1093 -0.5913713271015886
+4092 1093  0.6938074616461828
+4093 1093  0.07867933243141366
+4094 1093 -0.0979571314547415
+4147 1093  0.9009354882907198
+4148 1093 -0.8656710639493923
+4149 1093 -0.07642521674859
+4150 1093  0.05907688964639677
+4151 1093  0.58462927218427
+4152 1093 -0.5248141364580845
+4153 1093 -0.01466585550328597
+4154 1093  0.0118751178970765
+1093 1094 -65.45466744535615
+1094 1094  314.792946352708
+1293 1094 -6.98925406994455e-08
+1294 1094 -1.762221939205499e-08
+4147 1094 -4.332893970446715
+4148 1094  4.163295798784388
+4149 1094  0.3675539094017725
+4150 1094 -0.2841201198847871
+1093 1095 -16.9631615475301
+1095 1095  208.15300181692
+1293 1095 -1.82950870328269e-07
+1295 1095 -6.313957408021764e-07
+4151 1095 -7.173917743684755
+4152 1095  6.439933176124918
+4153 1095  0.1799630056638323
+4154 1095 -0.1457181893610931
+1006 1096 -8.964500315464161
+1016 1096 -12.35396198776869
+1096 1096  25.62671609728838
+1206 1096 -0.04271695783234537
+1216 1096  0.09577340452076888
+1296 1096  9.930907562960556e-08
+3831 1096  0.3753050069630106
+3832 1096 -0.375556675786625
+3833 1096 -0.05921436717618114
+3834 1096  0.05996516487372638
+3895 1096  0.3999378180984413
+3896 1096 -0.4237633937620111
+3897 1096 -0.05601679710717079
+3898 1096  0.06052671066335877
+923 1097 -5.94400377216852
+1070 1097 -8.796687645874714
+1078 1097 -11.04287310642648
+1097 1097  31.03086265125012
+1123 1097 -0.05446009703752624
+1270 1097  0.007048039668787517
+1278 1097  0.09910991576168865
+1297 1097 -5.736181502014048e-09
+3335 1097 -0.6882189757870322
+3336 1097  0.6870878872871973
+3337 1097  0.09638196496373758
+3338 1097 -0.08962691042088074
+4099 1097 -0.5280375750914569
+4100 1097  0.5616589604898961
+4101 1097  0.07003967621048186
+4102 1097 -0.07910718884290392
+4123 1097  0.4804475820214714
+4124 1097 -0.4898144867918466
+4125 1097 -0.05465730817416642
+4126 1097  0.05706320500793532
+942 1098 -13.85984777865994
+995 1098 -14.30197427870374
+1098 1098  27.82833874115732
+1142 1098  0.01120427847728563
+1195 1098 -0.0263789648850663
+1298 1098  7.298680398948321e-08
+3447 1098  0.5580597443798457
+3448 1098 -0.5578130610481082
+3449 1098 -0.07611021403896462
+3450 1098  0.07783373831464936
+3755 1098  0.4897853072672988
+3756 1098 -0.4890001558455759
+3757 1098 -0.06975006572860991
+3758 1098  0.07102142348298011
+899 1099  7.772276933160072e-09
+900 1099  4.240457540660714e-09
+1017 1099  0.07145809537069953
+1022 1099 -0.04280741885147973
+1091 1099 -0.00182375950404709
+1099 1099  23840.16291750732
+1100 1099 -2299.853745982527
+1217 1099 -324.4426975442814
+1222 1099 -2473.241834471261
+1291 1099 -2064.592972124319
+3175 1099  3.90179844422034
+3176 1099 -3.117398006170587
+3177 1099  18.11856811951377
+3178 1099 -14.49628017496882
+3179 1099 -0.6755533116091292
+3180 1099  0.748415362690662
+3181 1099 -4.388112379693994
+3182 1099  4.793052405651258
+3183 1099  0.7019805848906145
+3184 1099 -0.7170782280222106
+3185 1099  4.688513878451054
+3186 1099 -4.577372090822865
+3187 1099  0.9345988945068904
+3188 1099 -0.9906143812408313
+3189 1099  5.460780976802373
+3190 1099 -5.952678106564355
+899 1100 -8.952334706879839e-10
+900 1100  4.86383655573519e-10
+1099 1100 -2299.853745982527
+1100 1100  263.8239666641096
+3175 1100 -0.4467658871986893
+3176 1100  0.3569501217106127
+3177 1100 -2.078221001294088
+3178 1100  1.662740239879939
+901 1101  3.180253560497448e-09
+902 1101  1.757215706277293e-09
+955 1101  0.009158641086069189
+1035 1101 -0.007274595352445609
+1101 1101  9532.370458227797
+1102 1101 -1793.912401720734
+1155 1101 -176.0078568125278
+1235 1101 -652.9015261762572
+3191 1101  2.396365901362685
+3192 1101 -2.005604434346106
+3193 1101  12.9370165976562
+3194 1101 -10.78395981212982
+3195 1101  0.8150861897290412
+3196 1101 -0.7626219247567129
+3197 1101  4.805357363027642
+3198 1101 -4.635964505191407
+3199 1101 -0.6823995581967828
+3200 1101  0.7310365971011838
+3201 1101 -4.068874943070814
+3202 1101  4.453586165850771
+901 1102 -6.74794720101346e-10
+902 1102  3.725840469481767e-10
+1101 1102 -1793.912401720734
+1102 1102  380.3377222484216
+3191 1102 -0.507839750924587
+3192 1102  0.4250292779567368
+3193 1102 -2.74278713799485
+3194 1102  2.286315857699142
+903 1103 -3.473398027531438e-10
+904 1103 -1.756064599289786e-10
+946 1103  0.01772566600763419
+962 1103 -0.008284771852577529
+1103 1103  232.4068773723512
+1104 1103 -1055.381615892587
+1146 1103 -364.3783473802901
+1162 1103 -81.90222277522484
+3203 1103  0.4418790781503563
+3204 1103 -0.3702357374009728
+3205 1103  1.988830230508456
+3206 1103 -1.662852877071636
+3207 1103  0.08312708337804624
+3208 1103 -0.09553894960349026
+3209 1103  0.5226757781751344
+3210 1103 -0.6062893774873906
+3211 1103 -0.1224846354050169
+3212 1103  0.1225743034582106
+3213 1103 -0.8243769016067968
+3214 1103  0.8830238932831251
+903 1104  1.714809627628711e-09
+904 1104 -8.754584834846924e-10
+1103 1104 -1055.381615892587
+1104 1104  5260.490321110647
+3203 1104 -2.198584419259259
+3204 1104  1.842120980065284
+3205 1104 -9.91214250431854
+3206 1104  8.287501645404546
+905 1105  9.628000022243732e-09
+906 1105  5.783544754223158e-09
+984 1105  0.03052418138322882
+999 1105 -0.0186874934802756
+1046 1105 -0.01117562264376104
+1105 1105  61006.30037132353
+1106 1105 -17000.75242245883
+1184 1105 -1483.144735189752
+1199 1105 -3998.718775286863
+1246 1105 -2530.254860117219
+3215 1105  4.67810007016215
+3216 1105 -3.733990098942753
+3217 1105  21.96944745588132
+3218 1105 -17.53117282921072
+3219 1105 -1.783496057488212
+3220 1105  1.769775574269363
+3221 1105 -11.04480381088002
+3222 1105  11.09041831683827
+3223 1105 -1.508511143996298
+3224 1105  1.454256781217253
+3225 1105 -9.134818941626229
+3226 1105  8.960689765455296
+3227 1105 -1.075929273318226
+3228 1105  1.197241948885333
+3229 1105 -6.350942033760408
+3230 1105  6.86425229593742
+905 1106 -3.411716797963038e-09
+906 1106  2.02872774135443e-09
+1105 1106 -17000.75242245883
+1106 1106  5963.802258365807
+3215 1106 -1.631607376128249
+3216 1106  1.302327940379904
+3217 1106 -7.704273779899018
+3218 1106  6.147850850098586
+907 1107  6.66961486039952e-09
+908 1107  4.15408552001395e-09
+1022 1107 -0.1107503777044012
+1029 1107  0.1267923382078056
+1039 1107 -0.006163960734533347
+1091 1107 -0.02996950125612036
+1107 1107  14882.42601010204
+1108 1107 -7371.921873768852
+1222 1107 -1364.595019358008
+1229 1107 -4056.761237014
+1239 1107 -389.1035774035147
+1291 1107 -1886.24602540587
+3231 1107  2.142516119134805
+3232 1107 -1.645419760215324
+3233 1107  13.4599818919095
+3234 1107 -10.37596843446376
+3235 1107 -0.5705865186070875
+3236 1107  0.5798036087807967
+3237 1107 -3.927743397133805
+3238 1107  4.354207390518346
+3239 1107  0.83228047449342
+3240 1107 -0.7224565093335877
+3241 1107  4.624502996452096
+3242 1107 -4.029548223983352
+3243 1107 -1.049370725032446
+3244 1107  1.078904268217582
+3245 1107 -7.445261240507539
+3246 1107  7.684265898844557
+3247 1107 -0.8572328018230927
+3248 1107  0.8384291813617976
+3249 1107 -5.317465612935973
+3250 1107  5.25881232878616
+907 1108 -5.105909028912947e-09
+908 1108  3.102507639685115e-09
+1107 1108 -7371.921873768852
+1108 1108  5505.760289180535
+3231 1108 -1.573116856290268
+3232 1108  1.208147660144721
+3233 1108 -10.04545533443596
+3234 1108  7.743774096250811
+909 1109 -2.143182423197665e-09
+910 1109 -1.136426674985813e-09
+911 1109 -1.360478396605913e-09
+913 1109  0.1159831428547418
+991 1109 -0.1725061393317366
+1056 1109 -0.1501368826175595
+1109 1109  31458.39852646493
+1110 1109 -12122.78394997258
+1111 1109 -2041.180500096246
+1113 1109 -373.7142851765856
+1191 1109 -1100.812974624583
+1256 1109 -1108.54432072992
+3251 1109  1.939877143225896
+3252 1109 -1.510326401442479
+3253 1109  12.05955198419484
+3254 1109 -9.357216344456917
+3255 1109  3.125336311573782
+3256 1109 -2.417889075233369
+3257 1109  17.05859243350098
+3258 1109 -13.15767006303645
+3259 1109 -0.8274104651385586
+3260 1109  0.9005554601895391
+3261 1109 -5.271819109375664
+3262 1109  6.213237251680835
+3263 1109 -0.4348953511661833
+3264 1109  0.4484995047183518
+3265 1109 -2.730241540721851
+3266 1109  2.709099303685861
+3267 1109  0.6948742967493817
+3268 1109 -0.6744651058693761
+3269 1109  4.127509889624091
+3270 1109 -4.187820272924031
+909 1110  1.838456387126541e-09
+910 1110 -9.82260162096793e-10
+1109 1110 -12122.78394997258
+1110 1110  10478.49399500862
+3251 1110 -1.676593698201225
+3252 1110  1.305342318696957
+3253 1110 -10.42375234850035
+3254 1110  8.087970926761525
+909 1111  2.79507181288885e-10
+911 1111 -1.79330175709147e-10
+1109 1111 -2041.180500096246
+1111 1111  269.0653913962274
+3255 1111 -0.4116790582017967
+3256 1111  0.318491909188027
+3257 1111 -2.248550885624917
+3258 1111  1.734357075949347
+912 1112 -1.066833821630753e-08
+913 1112 -0.08371936137436782
+1019 1112  0.1059721002756369
+1047 1112 -0.03239710441769657
+1112 1112  1009.561156508884
+1113 1112 -110.3564781742909
+1219 1112 -737.3345443487744
+1247 1112 -3490.231801384597
+3271 1112  0.4647968875266144
+3272 1112 -0.461579540307102
+3273 1112  8.078759484940766
+3274 1112 -14.71609250303336
+3275 1112 -0.3518552487923934
+3276 1112  0.3454461800752524
+3277 1112 -2.973502502756232
+3278 1112  3.831383234547688
+3279 1112 -0.3153990132372302
+3280 1112  0.2786502814775124
+3281 1112 -1.846682900076352
+3282 1112  1.330625120766269
+909 1113 -0.05042820426988697
+912 1113  0.1005159371417935
+913 1113 -2.733038906299612e-09
+914 1113 -1.039897890020569e-09
+1109 1113 -373.7142851765856
+1112 1113 -110.3564781742909
+1113 1113  4570.0460752516
+1114 1113 -6031.745054983595
+3259 1113  0.4961924573567638
+3260 1113 -0.5405511595669753
+3261 1113  2.593920292764252
+3262 1113 -3.058452409713802
+3271 1113 -0.07766092191744503
+3272 1113  0.07749102351241138
+3273 1113 -5.536155174462214
+3274 1113  10.08058940955139
+3283 1113  1.411694129026913
+3284 1113 -1.181461144083559
+3285 1113  7.598376411100657
+3286 1113 -6.356847459918975
+913 1114  3.723713781766946e-09
+914 1114 -1.45571832366187e-09
+1113 1114 -6031.745054983595
+1114 1114  8443.178913538493
+3283 1114 -1.953828666208752
+3284 1114  1.635191861604235
+3285 1114 -10.63017727165662
+3286 1114  8.893256781914742
+915 1115  6.983447865122061e-10
+916 1115  8.336279444520756e-10
+917 1115  6.706460387029267e-10
+918 1115  6.439443422934232e-10
+919 1115  8.847848009807535e-10
+1007 1115 -0.007809535337338039
+1018 1115  0.007667951331426184
+1115 1115  4993.88507726691
+1116 1115 -1890.901065005403
+1117 1115 -2560.524032393198
+1118 1115 -4384.67503884735
+1119 1115 -757.4364593022556
+1207 1115 -138.288761778333
+1218 1115 -133.9669120329914
+3287 1115  0.8291504643489213
+3288 1115 -0.6273802630392492
+3289 1115  6.060870599331241
+3290 1115 -4.565490514889956
+3291 1115  1.087917687407729
+3292 1115 -0.8229776380829138
+3293 1115  6.108025865977322
+3294 1115 -4.599499980584284
+3295 1115  1.03698725022598
+3296 1115 -0.7835532341187602
+3297 1115  5.206454265535194
+3298 1115 -3.916354844917197
+3299 1115  1.203896198092496
+3300 1115 -0.9058789578950869
+3301 1115  7.161691500726718
+3302 1115 -5.367583074737629
+3303 1115  0.3657690249134106
+3304 1115 -0.3073032073380307
+3305 1115  2.364021463492015
+3306 1115 -1.924123739016748
+3307 1115 -0.3247977981682151
+3308 1115  0.3847781687309847
+3309 1115 -2.071401609337344
+3310 1115  2.413229394356441
+915 1116 -1.486597012156921e-09
+916 1116  1.775087188349289e-09
+1115 1116 -1890.901065005403
+1116 1116  4026.276169403554
+3287 1116 -1.764966808023946
+3288 1116  1.335469727403376
+3289 1116 -12.90519241544406
+3290 1116  9.721133715560624
+915 1117 -1.235842539415444e-09
+917 1117  1.18738463505963e-09
+1115 1117 -2560.524032393198
+1117 1117  4533.028597823411
+3291 1117 -1.92561216315536
+3292 1117  1.456668801268975
+3293 1117 -10.81324405966066
+3294 1117  8.142649836749401
+915 1118 -2.074367566606838e-09
+918 1118  1.912650926527704e-09
+1115 1118 -4384.67503884735
+1118 1118  13023.81664536163
+3295 1118 -3.079285624558723
+3296 1118  2.326725060942811
+3297 1118 -15.46450544545504
+3298 1118  11.63257903923041
+915 1119 -4.957688704010366e-10
+919 1119  6.258814577719818e-10
+1115 1119 -757.4364593022556
+1119 1119  535.9109027118735
+3299 1119 -0.8504213801855677
+3300 1119  0.6399047897114686
+3301 1119 -5.066758450128532
+3302 1119  3.797461282443998
+920 1120 -1.795405820542717e-10
+921 1120 -1.249107484113665e-10
+922 1120 -1.131374993690315e-10
+925 1120 -0.001668075641436621
+1056 1120  0.002369883025423705
+1120 1120  15126.50868286488
+1121 1120 -1576.39846137627
+1122 1120 -2196.123797446057
+1125 1120 -1542.655870494157
+1256 1120 -437.5832889362574
+3311 1120  2.135100351227087
+3312 1120 -1.698663037724611
+3313 1120  10.46247277613657
+3314 1120 -8.287409620934962
+3315 1120  2.343665408910121
+3316 1120 -1.861641814867798
+3317 1120  9.800756234215104
+3318 1120 -7.751411372933291
+3319 1120 -0.3615308982581444
+3320 1120  0.3604726785054282
+3321 1120 -2.03036538771662
+3322 1120  2.026270799843273
+3323 1120  0.3927812825996569
+3324 1120 -0.3918426939236055
+3325 1120  2.59903491355898
+3326 1120 -2.553508280303047
+920 1121  4.188971391982932e-11
+921 1121 -2.969423318344155e-11
+1120 1121 -1576.39846137627
+1121 1121  374.3454068452776
+3311 1121 -0.5068532103322505
+3312 1121  0.4032470041467199
+3313 1121 -2.484458992001519
+3314 1121  1.967960133569293
+920 1122  4.778644147052091e-11
+922 1122 -3.082351041072684e-11
+1120 1122 -2196.123797446057
+1122 1122  598.5841054947239
+3315 1122 -0.6385059697814306
+3316 1122  0.5071839300131559
+3317 1122 -2.671248407940532
+3318 1122  2.112688522497416
+923 1123 -2.446954222579123e-09
+924 1123 -1.296841745102029e-09
+962 1123  0.05713966978684382
+1097 1123 -0.05446009703752624
+1123 1123  11317.64471455518
+1124 1123 -1762.330535576032
+1162 1123 -581.8283744128495
+1297 1123 -808.5357239496374
+3327 1123  2.943851431944534
+3328 1123 -2.470752260998025
+3329 1123  13.89790436354166
+3330 1123 -11.59313057596244
+3331 1123 -0.570678276525063
+3332 1123  0.5736363845700033
+3333 1123 -3.671971040472746
+3334 1123  3.920428096681185
+3335 1123  0.6120165062645778
+3336 1123 -0.6120093889121652
+3337 1123  3.103677962152558
+3338 1123 -3.348978833033903
+923 1124  4.111979645671227e-10
+924 1124 -2.182206637613149e-10
+1123 1124 -1762.330535576032
+1124 1124  296.6295776145759
+3327 1124 -0.4953533182334247
+3328 1124  0.4157462993400855
+3329 1124 -2.339205561498814
+3330 1124  1.951280910683752
+920 1125  0.00637055249229182
+925 1125 -2.524945641457776e-09
+926 1125 -7.775043942004345e-10
+991 1125 -0.1090224204334647
+1056 1125  0.037823480857684
+1120 1125 -1542.655870494157
+1125 1125  196426.9299350627
+1126 1125 -57716.35466464341
+1191 1125 -3056.34146169142
+1256 1125 -1959.500873940213
+3319 1125  2.348697674884534
+3320 1125 -2.341822696956453
+3321 1125  13.1085584275136
+3322 1125 -13.08212862081111
+3339 1125  4.915187326484495
+3340 1125 -3.892710312405375
+3341 1125  31.10618111938879
+3342 1125 -24.49836116910814
+3343 1125 -1.038181523195801
+3344 1125  1.045469772476363
+3345 1125 -6.405420033831049
+3346 1125  6.57456188741837
+3347 1125 -1.475513352129115
+3348 1125  1.483623786920446
+3349 1125 -9.211238697688215
+3350 1125  9.387198805594265
+925 1126  7.921518996312216e-10
+926 1126 -2.532467013871553e-10
+1125 1126 -57716.35466464341
+1126 1126  18816.79538770899
+3339 1126 -1.601102874759421
+3340 1126  1.268035068691731
+3341 1126 -10.14088311419212
+3342 1126  7.98667681451962
+927 1127  7.788518240070541e-09
+928 1127  3.886522659257707e-09
+1022 1127 -0.2217680093153695
+1038 1127  0.3372983249793092
+1127 1127  31713.64746882482
+1128 1127 -14377.54609320034
+1222 1127 -2406.24952449994
+1238 1127 -939.8659979062598
+3351 1127  3.674495501266763
+3352 1127 -3.071721810536903
+3353 1127  15.40703792650803
+3354 1127 -12.91543154405716
+3355 1127  0.7456668279970676
+3356 1127 -0.8026522732803606
+3357 1127  5.42541555256465
+3358 1127 -5.499147013641394
+3359 1127 -0.7996212717110843
+3360 1127  0.8518603849189976
+3361 1127 -5.319291135044536
+3362 1127  5.987378019002024
+927 1128 -3.969049866547891e-09
+928 1128  1.924533976627174e-09
+1127 1128 -14377.54609320034
+1128 1128  7119.152105525926
+3351 1128 -1.79658977637768
+3352 1128  1.501892209348857
+3353 1128 -7.62281482571927
+3354 1128  6.390044080050587
+929 1129  2.237529952986428e-10
+930 1129  1.705862118228652e-10
+931 1129  1.983945230321638e-10
+1075 1129  0.06750505025100521
+1090 1129 -0.04174380521665495
+1129 1129  4281.024441336025
+1130 1129 -2689.233659020882
+1131 1129 -815.4874040806222
+1275 1129 -406.3245095104962
+1290 1129 -413.568512787767
+3363 1129  1.299439778554684
+3364 1129 -1.042770960437875
+3365 1129  7.102528425181987
+3366 1129 -5.662448967730832
+3367 1129  1.541080820735221
+3368 1129 -1.227391380982957
+3369 1129  9.939184679247292
+3370 1129 -7.872866521897349
+3371 1129  0.3879503154264504
+3372 1129 -0.3704848889591102
+3373 1129  2.623574290306916
+3374 1129 -2.402169826049755
+3375 1129  0.4769040583265569
+3376 1129 -0.4546264934490209
+3377 1129  2.673553644466764
+3378 1129 -2.601348954389372
+929 1130 -2.862770021039296e-10
+930 1130  2.182460878685788e-10
+1129 1130 -2689.233659020882
+1130 1130  3436.605615427473
+3363 1130 -1.66026707237404
+3364 1130  1.332326685534424
+3365 1130 -9.076322024257928
+3366 1130  7.236044290225851
+929 1131 -9.747780360669367e-11
+931 1131  8.381051408434814e-11
+1129 1131 -815.4874040806222
+1131 1131  344.6159392533137
+3367 1131 -0.6495975312525708
+3368 1131  0.517371091673868
+3369 1131 -4.19974615976149
+3370 1131  3.32663494902817
+932 1132  5.218769388859457e-09
+933 1132  3.572777296234619e-09
+952 1132 -0.01738224984325668
+1035 1132  0.02036700218743104
+1132 1132  62923.98343331431
+1133 1132 -4227.557930025499
+1152 1132 -8436.593128476583
+1235 1132 -1557.206156923414
+3379 1132  5.296182902592481
+3380 1132 -4.429008880591571
+3381 1132  31.57511354793287
+3382 1132 -26.39354744654073
+3383 1132 -1.001383316318819
+3384 1132  1.001723462128635
+3385 1132 -6.619137619636859
+3386 1132  7.035896378794858
+3387 1132 -1.624449345440718
+3388 1132  1.726617122079207
+3389 1132 -9.45015732593915
+3390 1132  9.780829511014637
+932 1133 -4.171099299288272e-10
+933 1133  2.789826286653252e-10
+1132 1133 -4227.557930025499
+1133 1133  330.0515399065575
+3379 1133 -0.4107380900624172
+3380 1133  0.3434864457301403
+3381 1133 -2.46437663093789
+3382 1133  2.059964500670208
+934 1134  1.578068994922965e-10
+935 1134  1.261234450211646e-10
+936 1134  1.136628013931329e-10
+956 1134 -0.01449250614827455
+981 1134  0.01332766609263919
+1134 1134  1190.15783474961
+1135 1134 -353.7743879495256
+1136 1134 -1857.584300437281
+1156 1134 -302.9494469047443
+1181 1134 -117.6376812741665
+3391 1134  0.9789296151027944
+3392 1134 -0.7864238954796325
+3393 1134  5.52827321690606
+3394 1134 -4.426572750783254
+3395 1134  0.8060051230938539
+3396 1134 -0.6434239789652723
+3397 1134  3.935792415055033
+3398 1134 -3.141533746260626
+3399 1134  0.1787660967132971
+3400 1134 -0.1720286735101973
+3401 1134  1.195222696531314
+3402 1134 -1.077545589280008
+3403 1134  0.227902895301987
+3404 1134 -0.2169526291382712
+3405 1134  1.651751732869493
+3406 1134 -1.499487001255235
+934 1135 -1.025071694193969e-10
+935 1135  8.093323233815397e-11
+1134 1135 -353.7743879495256
+1135 1135  226.9337877865829
+3391 1135 -0.6272414912186035
+3392 1135  0.5038949949918273
+3393 1135 -3.546001048357712
+3394 1135  2.839337127425348
+934 1136 -5.515803369604555e-10
+936 1136  3.563296324671228e-10
+1134 1136 -1857.584300437281
+1136 1136  5823.291649924624
+3395 1136 -2.504819998846709
+3396 1136  1.999577413517036
+3397 1136 -12.33236152279313
+3398 1136  9.843630982732011
+937 1137  7.386056946612207e-09
+938 1137  3.839734308286324e-09
+983 1137 -0.03261242339132064
+1057 1137  0.03229296680959662
+1137 1137  40628.79353938391
+1138 1137 -11296.78734766712
+1183 1137 -1491.12999571951
+1257 1137 -2657.776956602856
+3407 1137  3.764148086597181
+3408 1137 -3.132558998302706
+3409 1137  21.66729182498108
+3410 1137 -18.12681824855953
+3411 1137 -0.9698287367725562
+3412 1137  1.032890475327049
+3413 1137 -7.297572934511336
+3414 1137  7.818158304523587
+3415 1137  0.9047099666389939
+3416 1137 -0.9050341677071546
+3417 1137  6.45187659773848
+3418 1137 -6.340117021540678
+937 1138 -2.261370646294836e-09
+938 1138  1.173779629048965e-09
+1137 1138 -11296.78734766712
+1138 1138  3453.806461467711
+3407 1138 -1.150203221975869
+3408 1138  0.9572098200987793
+3409 1138 -6.624244633334891
+3410 1138  5.541831397222044
+939 1139  1.010988515126598e-10
+998 1139 -0.07397943535962337
+1061 1139 -0.2141845297361449
+1078 1139  0.250244042122215
+1139 1139  11644.30540653898
+1198 1139 -2367.012469682032
+1261 1139 -12834.35703122456
+1278 1139 -2144.627198362258
+3419 1139 -1.335544408289586
+3420 1139  1.240138484038131
+3421 1139 -8.252392785519895
+3422 1139  7.855616270644008
+3423 1139 -0.8646099843727195
+3424 1139  0.7236931569492924
+3425 1139 -6.168162236656806
+3426 1139  5.42738844602399
+3427 1139 -1.017234363558991
+3428 1139  0.9696341561279327
+3429 1139 -5.52169483921999
+3430 1139  5.270632236356677
+940 1140  4.42159031055489e-08
+942 1140  0.1330724731424416
+1079 1140 -0.1493139770588935
+1140 1140  22000.33514768517
+1142 1140 -821.5346664247968
+1279 1140 -8495.362797148979
+3431 1140  2.259667890544613
+3432 1140 -2.270634095598604
+3433 1140  14.10655573046575
+3434 1140 -14.60105738105317
+3435 1140  1.274050971060097
+3436 1140 -1.450543140636408
+3437 1140  8.932423077614509
+3438 1140 -9.766438076143141
+941 1141  1.675542971035782e-08
+982 1141 -0.447186593064992
+1030 1141  0.5333079621201936
+1141 1141  15635.12232700705
+1182 1141 -1279.615417488558
+1230 1141 -7111.276237605085
+3439 1141 -2.430429941692298
+3440 1141  2.430336628269451
+3441 1141 -15.87534314164886
+3442 1141  16.87912030479092
+3443 1141 -1.646756874307934
+3444 1141  1.633130299287086
+3445 1141 -9.784170471621888
+3446 1141  9.000659918292111
+940 1142 -0.009980420534666863
+942 1142  6.210807736162227e-09
+1098 1142  0.01120427847728563
+1140 1142 -821.5346664247968
+1142 1142  144.3524370915675
+1298 1142 -805.5840716483974
+3431 1142 -0.1988363546578532
+3432 1142  0.1998547861173985
+3433 1142 -1.328711904185037
+3434 1142  1.375232541596462
+3447 1142 -0.1942141594406955
+3448 1142  0.1937305440085906
+3449 1142 -1.236746782381144
+3450 1142  1.210760367602043
+943 1143 -1.724665529834191e-08
+944 1143 -0.5468169676527013
+953 1143 -0.02992823809043116
+1000 1143 -0.02941992453676529
+1085 1143  0.5739644134156838
+1143 1143  12437.4590338889
+1144 1143 -378.1159720077119
+1153 1143 -3746.645778230619
+1200 1143 -2502.882899625627
+1285 1143 -3361.290884691284
+3451 1143  0.4635977544092252
+3452 1143 -0.4445531462354627
+3453 1143  12.40932627072254
+3454 1143 -17.80142727045091
+3455 1143  1.302765765741359
+3456 1143 -1.381540417492638
+3457 1143  6.98481666623176
+3458 1143 -8.259223745110498
+3459 1143  1.291757288849152
+3460 1143 -1.364938725708047
+3461 1143  7.534250103970887
+3462 1143 -8.559591467156073
+3463 1143 -0.7574396769710391
+3464 1143  0.7694060268190259
+3465 1143 -5.957774276479973
+3466 1143  7.695785971397996
+943 1144  0.4289809294086663
+944 1144 -3.380368229777275e-08
+959 1144  0.1506769620921335
+1020 1144 -1.072294506906227
+1143 1144 -378.1159720077119
+1144 1144  6109.88089831127
+1159 1144 -1783.747510767209
+1220 1144 -3014.351946629534
+3451 1144 -0.2370748743210307
+3452 1144  0.2279035771985117
+3453 1144 -16.36104222451913
+3454 1144  23.46700243272763
+3467 1144  1.435114498189556
+3468 1144 -1.347826856887033
+3469 1144  8.28258643358139
+3470 1144 -8.289596312603999
+3471 1144  0.9151036757311578
+3472 1144 -0.9121228986991278
+3473 1144  5.88454311618508
+3474 1144 -5.695361538067679
+945 1145  1.768079509190112e-09
+952 1145  0.01703428079016718
+964 1145 -0.05970019195980614
+1145 1145  5905.742521728604
+1152 1145 -6698.294790960239
+1164 1145 -6021.151990444164
+3475 1145 -0.8709148373434052
+3476 1145  0.8161899275134221
+3477 1145 -5.468004288207832
+3478 1145  5.008647029629411
+3479 1145 -0.7158770103681851
+3480 1145  0.5731872346895394
+3481 1145 -4.380699190373416
+3482 1145  3.606200291493431
+903 1146 -0.1213569242584619
+946 1146 -2.906373852695765e-08
+947 1146 -8.043442764615882e-08
+948 1146 -6.474805136491568e-08
+949 1146 -7.593639771152905e-08
+950 1146 -6.107598557458793e-08
+951 1146 -1.652445292266158e-07
+972 1146  0.2145362022621393
+1103 1146 -364.3783473802901
+1146 1146  22790.00297408586
+1147 1146 -280.504956305503
+1148 1146 -409.6989748006552
+1149 1146 -1152.033413949009
+1150 1146 -440.1909901329221
+1151 1146 -1453.071481978205
+1172 1146 -1575.319815998426
+3207 1146 -2.361641607385089
+3208 1146  2.713143028094987
+3209 1146 -16.60084760747767
+3210 1146  19.25433639519875
+3483 1146  0.3710219555670884
+3484 1146 -0.2840579403070753
+3485 1146  9.774567204299803
+3486 1146 -7.612750342076007
+3487 1146  0.4116155869533685
+3488 1146 -0.3163234064086289
+3489 1146  10.34202414091307
+3490 1146 -8.044573973054565
+3491 1146  0.240725896352511
+3492 1146 -0.183873148952859
+3493 1146  7.290551754845374
+3494 1146 -5.670365588613417
+3495 1146  0.4501210912144902
+3496 1146 -0.3458178949312415
+3497 1146  10.63405779705095
+3498 1146 -8.266934236741347
+3499 1146  0.160517360987844
+3500 1146 -0.1213287753319718
+3501 1146  5.696313853321982
+3502 1146 -4.640371833668683
+3503 1146 -1.606503189940934
+3504 1146  1.963053376342537
+3505 1146 -10.23123479260624
+3506 1146  11.40396313242804
+946 1147  2.273733985119764e-08
+947 1147 -6.438807310937822e-08
+1146 1147 -280.504956305503
+1147 1147  226.8844736590602
+3483 1147 -0.29728149214413
+3484 1147  0.2275648264341223
+3485 1147 -7.972830198709099
+3486 1147  6.210600761683166
+946 1148  2.421638262140036e-08
+948 1148 -5.474532364935225e-08
+1146 1148 -409.6989748006552
+1148 1148  347.8208021266269
+3487 1148 -0.3482611836492402
+3488 1148  0.2676108406262902
+3489 1148 -8.870545870477601
+3490 1148  6.900692384581894
+946 1149  1.231334647994187e-07
+949 1149 -3.288068006757072e-07
+1146 1149 -1152.033413949009
+1149 1149  5102.108289106684
+3491 1149 -1.043634220767106
+3492 1149  0.7969897672034445
+3493 1149 -32.25568801983241
+3494 1149  25.09335121359859
+946 1150  2.303789606306061e-08
+950 1150 -4.913775505932527e-08
+1146 1150 -440.1909901329221
+1150 1150  355.3102213177701
+3495 1150 -0.3623546349676862
+3496 1150  0.278365585734024
+3497 1150 -8.671639188939132
+3498 1150  6.741962214906343
+946 1151  1.743433537815253e-07
+951 1151 -1.026074838983426e-06
+1146 1151 -1453.071481978205
+1151 1151  3012134.630666946
+3499 1151 -0.998072574886444
+3500 1151  0.7540610774154664
+3501 1151 -36.48745359925508
+3502 1151  29.73823906985804
+932 1152  0.01961434172631453
+945 1152 -0.05125570147098663
+952 1152  6.841825883618924e-09
+964 1152  0.01861759887113656
+1132 1152 -8436.593128476583
+1145 1152 -6698.294790960239
+1152 1152  47801.86267389975
+1164 1152 -14322.89285310626
+3383 1152  2.237203602157426
+3384 1152 -2.235862100853107
+3385 1152  12.86842858727532
+3386 1152 -13.68121522319965
+3475 1152  1.89078858943646
+3476 1152 -1.771758321773721
+3477 1152  12.47290766139074
+3478 1152 -11.42535283239911
+3507 1152 -1.329774596250828
+3508 1152  1.136378995889816
+3509 1152 -8.235239513230283
+3510 1152  7.392636495600143
+943 1153  0.02793990583526351
+953 1153 -9.502885811585315e-09
+954 1153 -0.2507418162491978
+979 1153  0.0247054716909727
+1000 1153  0.01312118441246557
+1010 1153  0.04849641452178344
+1026 1153  0.02710898025635339
+1143 1153 -3746.645778230619
+1153 1153  15211.32763782078
+1154 1153 -125.4130424229494
+1179 1153 -1587.164328722295
+1200 1153 -4741.586008200146
+1210 1153 -2001.015904640132
+1226 1153 -1538.578415888683
+3455 1153 -0.836906752491439
+3456 1153  0.8886397238365555
+3457 1153 -5.664043368783884
+3458 1153  6.694072816259766
+3511 1153  0.4589386576308901
+3512 1153 -0.390921429635482
+3513 1153  8.639460196076747
+3514 1153 -8.030380636400102
+3515 1153  0.9130715613792872
+3516 1153 -0.8271158196402747
+3517 1153  5.326644585481059
+3518 1153 -5.221321173922546
+3519 1153 -1.159052407654222
+3520 1153  1.167217498441967
+3521 1153 -7.424743250141112
+3522 1153  7.683291575595472
+3523 1153 -0.95405564531345
+3524 1153  0.9758265066742177
+3525 1153 -5.830059675395175
+3526 1153  5.406317175231911
+3527 1153 -1.164584670469045
+3528 1153  1.194200001945778
+3529 1153 -6.959432015872205
+3530 1153  6.502617587410443
+953 1154  0.08766147326692564
+954 1154 -1.802922966476217e-08
+1001 1154 -0.1257509427677388
+1153 1154 -125.4130424229494
+1154 1154  1497.088366431581
+1201 1154 -2778.601465176169
+3511 1154 -0.3159937452580066
+3512 1154  0.269234075312335
+3513 1154 -7.316230054006878
+3514 1154  6.800195871908971
+3531 1154 -0.7102443269723129
+3532 1154  0.6375281091348801
+3533 1154 -4.291561368908504
+3534 1154  3.814598094334272
+901 1155 -0.001782981198905943
+955 1155  3.591110811496101e-09
+1057 1155  0.002925901806241575
+1101 1155 -176.0078568125278
+1155 1155  160.8704183559445
+1257 1155 -553.1419874124856
+3195 1155 -0.3265396605978775
+3196 1155  0.3055666271798853
+3197 1155 -2.057848389166554
+3198 1155  1.985217529071911
+3535 1155 -0.2560164048234211
+3536 1155  0.2373539075192858
+3537 1155 -1.53916732182028
+3538 1155  1.414753666929491
+934 1156  0.08549340419337453
+956 1156 -1.694234696980601e-09
+993 1156 -0.08343546006029678
+1075 1156 -0.1018754816584715
+1134 1156 -302.9494469047443
+1156 1156  12519.61891501326
+1193 1156 -4247.139494515291
+1275 1156 -1669.250794817422
+3399 1156 -2.83581958505017
+3400 1156  2.730179802550922
+3401 1156 -16.46922548582782
+3402 1156  14.8446566737469
+3539 1156  1.446175075669395
+3540 1156 -1.34371595571893
+3541 1156  9.606152975975975
+3542 1156 -8.318029485848061
+3543 1156  1.341321871020527
+3544 1156 -1.344209358632625
+3545 1156  8.638251497123527
+3546 1156 -8.759308364969019
+957 1157  1.653187683969826e-08
+1017 1157 -0.09826669971503904
+1037 1157  0.0898090992568592
+1157 1157  2967.843774350639
+1217 1157 -739.1349825417736
+1237 1157 -1876.369922915853
+3547 1157 -1.058339009138057
+3548 1157  1.056354509649101
+3549 1157 -5.697020217678423
+3550 1157  5.434176163677479
+3551 1157 -0.8311703023259965
+3552 1157  0.8325741162821748
+3553 1157 -4.937009585402042
+3554 1157  5.222317454495946
+958 1158  4.097325129182039e-08
+995 1158 -0.09853840405478623
+1032 1158  0.0918804888517398
+1158 1158  17698.10903710123
+1195 1158 -2011.875143802051
+1232 1158 -3393.363501279813
+3555 1158 -1.955210228146363
+3556 1158  1.946171184010313
+3557 1158 -11.53240152096248
+3558 1158  11.05483594732497
+3559 1158  1.409323205256857
+3560 1158 -1.618511483884628
+3561 1158  9.858875177032653
+3562 1158 -10.84726586818638
+944 1159 -0.1049347854825309
+959 1159 -1.332591936797911e-08
+1001 1159  0.07786890260343965
+1144 1159 -1783.747510767209
+1159 1159  2103.614864226924
+1201 1159 -1986.152622069543
+3467 1159 -0.7084294249201389
+3468 1159  0.6658399640468106
+3469 1159 -4.70567772590082
+3470 1159  4.70876580251244
+3563 1159 -0.8140452921845157
+3564 1159  0.7270941854498505
+3565 1159 -4.963811290770392
+3566 1159  4.341750101297229
+960 1160  3.964404998724191e-09
+1057 1160 -0.008922953284883881
+1058 1160  0.01834072349012509
+1160 1160  135.3183882525767
+1257 1160 -449.6118328163615
+1258 1160 -1094.71109774519
+3567 1160  0.1819988190111484
+3568 1160 -0.1954534815545992
+3569 1160  1.503671479436259
+3570 1160 -1.617273344051984
+3571 1160  0.1483181477839719
+3572 1160 -0.1580808975787135
+3573 1160  1.098045296521147
+3574 1160 -1.116117521563536
+961 1161  3.124692560252385e-09
+964 1161  0.8923905638513359
+1082 1161 -0.4707524526938228
+1161 1161  6522.078787078268
+1164 1161 -4876.157036800986
+1282 1161 -428.7203655478105
+3575 1161 -0.7691880733918308
+3576 1161  0.6268912276704567
+3577 1161 -5.531733877787849
+3578 1161  4.552179963964262
+3579 1161 -1.1983605947858
+3580 1161  1.188366015048527
+3581 1161 -9.698019033406775
+3582 1161  9.673953645810657
+903 1162  0.03440530029256823
+923 1162 -0.0301506816774844
+962 1162 -1.126953350777393e-08
+963 1162 -5.457841986689083e-08
+1103 1162 -81.90222277522484
+1123 1162 -581.8283744128495
+1162 1162  1534.288258985061
+1163 1162 -320.1999206314696
+3211 1162  1.309709728249127
+3212 1162 -1.31124216056868
+3213 1162  7.353509965567679
+3214 1162 -7.879175491863245
+3331 1162  0.8523919363373601
+3332 1162 -0.856404463517094
+3333 1162  4.779511398382989
+3334 1162 -5.103865209233088
+3583 1162 -0.1874965313805473
+3584 1162  0.1209829834408293
+3585 1162  3.094186181666295
+3586 1162 -3.100342937307653
+962 1163  1.889614750227864e-07
+963 1163 -3.92008150940093e-07
+1162 1163 -320.1999206314696
+1163 1163  2301.723012541816
+3583 1163 -2.513198853349591
+3584 1163  1.622962910510173
+3585 1163 -22.22398035275692
+3586 1163  22.19095820062608
+945 1164  0.04539053030042879
+952 1164 -0.004704297928304631
+961 1164 -0.6070648610656701
+964 1164 -3.875374576800539e-09
+965 1164 -3.308903712140321e-08
+966 1164 -2.257490022694597e-07
+967 1164 -6.077346655086746e-08
+968 1164 -1.333883210691278e-07
+969 1164 -4.7262267141579e-08
+970 1164 -4.1833332076191e-08
+971 1164 -9.238899598140726e-08
+1056 1164  0.4594619270121967
+1145 1164 -6021.151990444164
+1152 1164 -14322.89285310626
+1161 1164 -4876.157036800986
+1164 1164  61580.33486141656
+1165 1164 -16602.87934456047
+1166 1164 -4993.647022170953
+1167 1164 -16923.00885187064
+1168 1164 -7374.062406205457
+1169 1164 -13719.0325480256
+1170 1164 -12329.82642671017
+1171 1164 -2229.202872308585
+1256 1164 -7120.652080719003
+3479 1164  1.53391330281752
+3480 1164 -1.227967849405166
+3481 1164  8.887311933312343
+3482 1164 -7.316287484026701
+3507 1164  1.309395068074605
+3508 1164 -1.118335669994368
+3509 1164  7.323453369206338
+3510 1164 -6.574874411640814
+3575 1164  1.084345240019609
+3576 1164 -0.8831818088721408
+3577 1164  7.397606534928237
+3578 1164 -6.087474676626123
+3587 1164  0.6351578870039599
+3588 1164 -0.4937492055719114
+3589 1164  4.32562955086895
+3590 1164 -3.280051421621569
+3591 1164  0.08216399590560011
+3592 1164 -0.06363192359750822
+3593 1164  5.774615805405687
+3594 1164 -3.234138467376079
+3595 1164  0.2283230594027073
+3596 1164 -0.1784848649491971
+3597 1164  5.50164767601109
+3598 1164 -4.008535209606107
+3599 1164  0.1369008468551218
+3600 1164 -0.1066451828453128
+3601 1164  5.345308539788538
+3602 1164 -3.486179794654417
+3603 1164  0.2495784622735603
+3604 1164 -0.1950409950189936
+3605 1164  6.482098577577909
+3606 1164 -4.729287128522186
+3607 1164  0.290360813562731
+3608 1164 -0.2270292150903642
+3609 1164  6.760502321679454
+3610 1164 -4.991429629225376
+3611 1164  0.2916736644580288
+3612 1164 -0.227785867490716
+3613 1164  7.136143023604448
+3614 1164 -5.072850637376669
+3615 1164 -1.244478079620031
+3616 1164  1.380302330026541
+3617 1164 -8.209962281569412
+3618 1164  9.204827106431074
+964 1165  6.066692409545382e-08
+965 1165 -1.468892856237858e-07
+1164 1165 -16602.87934456047
+1165 1165  73730.82644272385
+3587 1165 -2.945954324697929
+3588 1165  2.290345940174964
+3589 1165 -19.24637451834769
+3590 1165  14.59397827582356
+964 1166  4.697380394380613e-07
+966 1166 -2.978564379918502e-06
+1164 1166 -4993.647022170953
+1166 1166  66798.37785505856
+3591 1166 -2.385134481547733
+3592 1166  1.86782777672809
+3593 1166 -77.15294482380699
+3594 1166  43.20570292286196
+964 1167  1.510357943157459e-07
+967 1167 -5.493372374765215e-07
+1164 1167 -16923.00885187064
+1167 1167  153155.7616073629
+3595 1167 -2.457041662433694
+3596 1167  1.923107839672399
+3597 1167 -50.0814285278305
+3598 1167  36.48944546192245
+964 1168  3.067324171046604e-07
+968 1168 -1.477521131554305e-06
+1164 1168 -7374.062406205457
+1168 1168  82072.21679084383
+3599 1168 -2.338296025296659
+3600 1168  1.829379136090265
+3601 1168 -59.77277409880886
+3602 1168  38.98112179863244
+964 1169  1.042574984955191e-07
+969 1169 -3.187709719520981e-07
+1164 1169 -13719.0325480256
+1169 1169  92641.36295225653
+3603 1169 -1.994003072051536
+3604 1169  1.560276963803856
+3605 1169 -44.05096156861522
+3606 1169  32.13935140025399
+964 1170  7.332883339428165e-08
+970 1170 -2.124470280806534e-07
+1164 1170 -12329.82642671017
+1170 1170  62672.16218763148
+3607 1170 -1.69914940163431
+3608 1170  1.329845790084345
+3609 1170 -34.56956155699346
+3610 1170  25.52356491229814
+964 1171  3.852382979979696e-08
+971 1171 -1.67326591443695e-07
+1164 1171 -2229.202872308585
+1171 1171  4044.843974570085
+3611 1171 -0.665968396826871
+3612 1171  0.5209611043831808
+3613 1171 -13.01473834386188
+3614 1171  9.251460191495257
+946 1172 -0.09396492400282885
+972 1172 -4.288324972900881e-10
+1088 1172  0.03701780828857204
+1146 1172 -1575.319815998426
+1172 1172  500.8003893351921
+1288 1172 -729.7679166195321
+3503 1172  0.2386303085913993
+3504 1172 -0.2914011576499591
+3505 1172  1.708753031112981
+3506 1172 -1.904872805295951
+3619 1172 -0.4153067634742414
+3620 1172  0.4148880148577192
+3621 1172 -2.410186223870166
+3622 1172  2.375670562103454
+973 1173  4.923127752132217e-07
+974 1173  2.078042610764719e-06
+975 1173  3.157835434829115e-06
+976 1173  1.106372518294707e-06
+977 1173  1.350166642877237e-06
+1006 1173 -0.5102615425771657
+1055 1173  0.7232007277457907
+1173 1173  107277.9407439756
+1174 1173 -6932.419497690117
+1175 1173 -6007.856534902506
+1176 1173 -19150.24407625645
+1177 1173 -12568.88334869237
+1206 1173 -5140.668650277364
+1255 1173 -5655.325062365802
+3623 1173  0.2557390494241053
+3624 1173 -0.1995643723770443
+3625 1173  7.851143468601221
+3626 1173 -6.360533060844858
+3627 1173  0.1570011938827146
+3628 1173 -0.116240689813081
+3629 1173  6.250508524222748
+3630 1173 -5.161142881361593
+3631 1173  0.3282970589794034
+3632 1173 -0.271490414221333
+3633 1173  11.16120855053096
+3634 1173 -8.873223095071516
+3635 1173  0.2551706654878987
+3636 1173 -0.2092439937336671
+3637 1173  10.50033560799464
+3638 1173 -8.38018467970365
+3639 1173  2.90118294589533
+3640 1173 -2.396216081757496
+3641 1173  17.2984093564214
+3642 1173 -15.269239506628
+3643 1173 -1.862842055174311
+3644 1173  2.290266277161505
+3645 1173 -10.98109255814183
+3646 1173  13.31557405400697
+973 1174 -2.21697098447271e-06
+974 1174  9.300491833896807e-06
+1173 1174 -6932.419497690117
+1174 1174  31180.78614898434
+3623 1174 -1.144600450292391
+3624 1174  0.8931472133700507
+3625 1174 -35.44874624987744
+3626 1174  28.71959313604403
+973 1175 -7.175218402766248e-06
+975 1175  4.574734240525569e-05
+1173 1175 -6007.856534902506
+1175 1175  89136.63429867299
+3627 1175 -2.274571006209698
+3628 1175  1.683936323933614
+3629 1175 -91.60308946892607
+3630 1175  75.64317956330889
+973 1176 -1.474047813920798e-06
+976 1176  3.292199911797411e-06
+1173 1176 -19150.24407625645
+1176 1176  57055.57591626922
+3631 1176 -0.9769081143049713
+3632 1176  0.8078351046585803
+3633 1176 -33.51914650193375
+3634 1176  26.64903944246305
+973 1177 -1.809620373771637e-06
+977 1177  4.932317047678936e-06
+1173 1177 -12568.88334869237
+1177 1177  46045.12024090238
+3635 1177 -0.9321754366844185
+3636 1177  0.7643499314882201
+3637 1177 -38.7926439689844
+3638 1177  30.96188122061356
+978 1178 -1.812938804635067e-08
+998 1178 -0.03046147444904562
+1041 1178  0.04340316309886361
+1178 1178  3620.078004165059
+1198 1178 -663.5344295935886
+1241 1178 -2608.162152478774
+3647 1178 -1.333010028374286
+3648 1178  1.161252280274343
+3649 1178 -8.372193934777899
+3650 1178  6.800859133664147
+3651 1178 -0.8125299017211322
+3652 1178  0.8130995252895313
+3653 1178 -5.553339525120581
+3654 1178  5.955799061503828
+953 1179 -0.01434635432205444
+979 1179 -1.45358751424407e-08
+980 1179 -0.1494464850449572
+1076 1179  0.1503132880271155
+1153 1179 -1587.164328722295
+1179 1179  3482.456808643168
+1180 1179 -188.8149620080204
+1276 1179 -2359.550833678316
+3515 1179 -0.8197257413953748
+3516 1179  0.7414429026651962
+3517 1179 -5.904518861818641
+3518 1179  5.785549463415167
+3655 1179  0.6657203254231561
+3656 1179 -0.620628984403677
+3657 1179  11.0089408248834
+3658 1179 -21.83306996925088
+3659 1179 -0.6535981302102936
+3660 1179  0.654965320821791
+3661 1179 -4.521169451237201
+3662 1179  4.541421673872437
+979 1180  0.08044888079890554
+980 1180 -6.531306342982823e-09
+1093 1180 -0.1724709079791441
+1179 1180 -188.8149620080204
+1180 1180  1204.593080199305
+1293 1180 -2195.222019859931
+3655 1180 -0.07200114489392796
+3656 1180  0.06494821711852092
+3657 1180 -7.044466715389948
+3658 1180  13.9640986807489
+3663 1180 -0.5886831418066317
+3664 1180  0.5026533925982484
+3665 1180 -3.725196007278359
+3666 1180  3.079189492083107
+934 1181 -0.03038370759243254
+981 1181  6.922867834546764e-11
+1044 1181 -0.1158402426905038
+1084 1181  0.216604980095333
+1134 1181 -117.6376812741665
+1181 1181  2341.243801698421
+1244 1181 -1276.761734036878
+1284 1181 -986.9244544448655
+3403 1181 -1.245236453990957
+3404 1181  1.186629605393843
+3405 1181 -8.143818476815005
+3406 1181  7.392567965078647
+3667 1181 -0.7314697767746239
+3668 1181  0.7341243737421274
+3669 1181 -4.855239864111756
+3670 1181  5.221813720290219
+3671 1181 -0.7155603330739858
+3672 1181  0.7181252765245181
+3673 1181 -4.616497763711963
+3674 1181  4.299471983766863
+941 1182  0.06312548830247269
+982 1182  1.077567965412918e-09
+1011 1182 -0.05307445544769063
+1141 1182 -1279.615417488558
+1182 1182  374.3495123842705
+1211 1182 -1073.023868317834
+3439 1182  0.3890546571390462
+3440 1182 -0.3891594472236065
+3441 1182  2.174900388529984
+3442 1182 -2.312987185873944
+3675 1182 -0.3797072308357856
+3676 1182  0.3406113200706714
+3677 1182 -2.422622246591045
+3678 1182  2.310469301341059
+937 1183  0.01353528248246338
+983 1183  1.371140667671966e-08
+1018 1183 -0.03487708050989496
+1137 1183 -1491.12999571951
+1183 1183  3031.132911223394
+1218 1183 -899.33891041484
+3411 1183  0.8647226323217699
+3412 1183 -0.920980246976824
+3413 1183  6.441034596124477
+3414 1183 -6.900463469725019
+3679 1183 -1.009313309363917
+3680 1183  1.01257666395277
+3681 1183 -6.434333697729489
+3682 1183  6.497392945010104
+905 1184 -0.005320952890435077
+984 1184  4.56977637675049e-09
+999 1184 -0.009006681541102603
+1040 1184  0.04085646412882799
+1091 1184 -0.01283241605396136
+1105 1184 -1483.144735189752
+1184 1184  1432.450877869441
+1199 1184 -822.8244362568955
+1240 1184 -1909.764605957655
+1291 1184 -596.6060378153098
+3219 1184  0.536638352929684
+3220 1184 -0.5324820321255284
+3221 1184  3.241025990367159
+3222 1184 -3.254430202003916
+3683 1184 -0.5140805691755161
+3684 1184  0.4990000847384159
+3685 1184 -2.845200624001803
+3686 1184  2.783228216931563
+3687 1184  0.4457567421222746
+3688 1184 -0.3979802529569074
+3689 1184  3.145655382980586
+3690 1184 -2.797268987536868
+3691 1184 -0.4618434163253965
+3692 1184  0.4399385753362182
+3693 1184 -2.86924462211004
+3694 1184  2.785061239788074
+985 1185 -4.681742493684027e-10
+986 1185  0.04645384761318282
+1047 1185 -0.2047571376587071
+1185 1185  4681.719438489662
+1186 1185 -404.2773802134753
+1247 1185 -28632.51282411447
+3695 1185  0.427839673034417
+3696 1185 -0.4773773394419411
+3697 1185  7.594442537507458
+3698 1185 -15.25141034328379
+3699 1185  0.3935586339260975
+3700 1185 -0.4755757198066598
+3701 1185  2.737883920038361
+3702 1185 -3.207071041908369
+985 1186 -0.06849071274544034
+986 1186  4.608089770874457e-09
+1048 1186  0.08253558234079712
+1074 1186 -0.009545633175415319
+1092 1186 -0.0009017246897470577
+1185 1186 -404.2773802134753
+1186 1186  12867.5447937223
+1248 1186 -3104.47930912567
+1274 1186 -2242.183874822623
+1292 1186 -2147.026562228744
+3695 1186 -0.1593269903331485
+3696 1186  0.1799753946873774
+3697 1186 -11.15066330216473
+3698 1186  22.3842856817472
+3703 1186 -0.9577254771843337
+3704 1186  0.9613848499477831
+3705 1186 -5.868347071872142
+3706 1186  6.083446015993633
+3707 1186  1.385816766949729
+3708 1186 -1.243512052379119
+3709 1186  8.036162305989681
+3710 1186 -7.386558767037341
+3711 1186 -1.161465841778596
+3712 1186  1.216256331347602
+3713 1186 -7.2681528859035
+3714 1186  7.502813973266923
+987 1187 -4.496077599114301e-08
+988 1187 -6.398415768466004e-07
+989 1187 -1.044842086184161e-07
+990 1187 -8.621436850542175e-08
+993 1187  0.09349931199942807
+1086 1187 -0.1321988592372929
+1187 1187  15818.95142763862
+1188 1187 -87.52904062559777
+1189 1187 -1337.029454706127
+1190 1187 -1274.451280633858
+1193 1187 -5350.662847075037
+1286 1187 -2971.781291812511
+3715 1187  0.1264070600600939
+3716 1187 -0.1073922996838256
+3717 1187  7.447124334255668
+3718 1187 -5.738141943326152
+3719 1187  0.2372873765635557
+3720 1187 -0.2078695435133901
+3721 1187  8.201087086647963
+3722 1187 -6.163756956372798
+3723 1187  0.2982302677568088
+3724 1187 -0.263500479617136
+3725 1187  9.459243822580133
+3726 1187 -7.08054579490533
+3727 1187 -1.429262548630259
+3728 1187  1.673333652021131
+3729 1187 -8.428942532838935
+3730 1187  8.914589671247429
+3731 1187  1.193368232042292
+3732 1187 -1.120321545249203
+3733 1187  10.03672807275466
+3734 1187 -8.019890861375147
+987 1188  1.11954899111727e-07
+988 1188 -1.625983356512428e-06
+1187 1188 -87.52904062559777
+1188 1188  356854.618981225
+3715 1188 -0.3212481593266459
+3716 1188  0.2724361971567971
+3717 1188 -20.05891812971564
+3718 1188  15.48272469413396
+987 1189  2.108572429437849e-07
+989 1189 -4.95059458760494e-07
+1187 1189 -1337.029454706127
+1189 1189  6342.186054306624
+3719 1189 -1.124307960005156
+3720 1189  0.9846771888007256
+3721 1189 -39.72433181103246
+3722 1189  29.86325998837519
+987 1190  1.323586146240174e-07
+990 1190 -2.564258991810675e-07
+1187 1190 -1274.451280633858
+1190 1190  3793.106121617589
+3723 1190 -0.8870247005076219
+3724 1190  0.78356840863058
+3725 1190 -28.7033608824471
+3726 1190  21.48971813079931
+909 1191  0.1091861990651673
+925 1191  0.02942235136820164
+991 1191 -1.026919055346909e-08
+992 1191 -1.02579299221528e-07
+1089 1191 -0.4918304040039776
+1109 1191 -1100.812974624583
+1125 1191 -3056.34146169142
+1191 1191  9286.153223378726
+1192 1191 -969.7382881692646
+1289 1191 -568.5385856792361
+3263 1191  1.175900677297633
+3264 1191 -1.21299435510975
+3265 1191  8.191331821483582
+3266 1191 -8.128733597512404
+3343 1191  0.8434979015723048
+3344 1191 -0.8494202444993569
+3345 1191  5.038247575538431
+3346 1191 -5.17134230247954
+3735 1191  0.2167729964694829
+3736 1191 -0.1851844777328494
+3737 1191  6.880724984338275
+3738 1191 -5.444318005853047
+3739 1191 -1.207141725362662
+3740 1191  1.338906589780056
+3741 1191 -7.610279116600212
+3742 1191  8.267433239967934
+991 1192  4.564837356724194e-08
+992 1192 -2.396823412520455e-07
+1191 1192 -969.7382881692646
+1192 1192  16896.89136086397
+3735 1192 -0.6168315851058197
+3736 1192  0.5298067490807584
+3737 1192 -16.5456764895831
+3738 1192  13.09683550707402
+956 1193  0.08504754845844564
+987 1193 -0.1969994537259558
+993 1193 -6.511629457861545e-09
+1156 1193 -4247.139494515291
+1187 1193 -5350.662847075037
+1193 1193  7475.545711733503
+3539 1193 -1.408878182521636
+3540 1193  1.31040985134737
+3541 1193 -7.997310816234859
+3542 1193  6.922985628648402
+3727 1193  0.9009035038811737
+3728 1193 -1.053604700739367
+3729 1193  6.200089823344896
+3730 1193 -6.559072047159436
+994 1194 -2.743272645155947e-08
+1020 1194  0.7790434228378447
+1041 1194 -0.03089942858856187
+1086 1194 -0.644560541293882
+1194 1194  5246.598377730301
+1220 1194 -2332.050437479933
+1241 1194 -2784.420804028015
+1286 1194 -1300.878810624146
+3743 1194  0.7978308621718716
+3744 1194 -0.7965184326656924
+3745 1194  5.753350078361281
+3746 1194 -5.243221560545977
+3747 1194 -0.9429089218588855
+3748 1194  1.007009323828432
+3749 1194 -5.062453033294843
+3750 1194  4.615763356430092
+3751 1194 -1.048401908253753
+3752 1194  1.008926901289915
+3753 1194 -6.460239724417905
+3754 1194  6.417487426740468
+958 1195  0.02493853014115072
+995 1195  1.223172137188033e-08
+1098 1195 -0.0263789648850663
+1158 1195 -2011.875143802051
+1195 1195  961.1959122907291
+1298 1195 -2377.009340752339
+3555 1195  0.4429005215649865
+3556 1195 -0.4406738447969539
+3557 1195  2.841907482790103
+3558 1195 -2.724430091994604
+3755 1195 -0.4226945468326312
+3756 1195  0.421458967625683
+3757 1195 -2.511814056672408
+3758 1195  2.46769213011002
+996 1196  9.116542722120702e-09
+997 1196 -0.1274747229578279
+1021 1196  0.1829371321651943
+1196 1196  1277.047285432646
+1197 1196 -356.6362302653943
+1221 1196 -2351.032529766809
+3759 1196  0.3339756423139817
+3760 1196 -0.3333397745722857
+3761 1196  7.933710171395212
+3762 1196 -12.65627869255672
+3763 1196 -0.4851424038589532
+3764 1196  0.4058510497943707
+3765 1196 -3.452105402557807
+3766 1196  3.087553492426145
+996 1197  0.230729451368615
+997 1197  2.309806734768216e-08
+1040 1197 -0.1442546171938546
+1196 1197 -356.6362302653943
+1197 1197  2963.617500136095
+1240 1197 -2894.512024611905
+3759 1197 -0.2014575555660574
+3760 1197  0.2031705763180116
+3761 1197 -14.35390862680364
+3762 1197  22.89375892241828
+3767 1197 -1.07013962521279
+3768 1197  1.056592885189063
+3769 1197 -6.741118804159942
+3770 1197  6.152334929788743
+939 1198  0.02676690359827763
+978 1198  0.02020512578173389
+998 1198 -1.121049961194664e-09
+1072 1198 -0.1461852922562984
+1077 1198 -0.02201288531576436
+1082 1198  0.1639576572033569
+1139 1198 -2367.012469682032
+1178 1198 -663.5344295935886
+1198 1198  3259.098552556408
+1272 1198 -1354.184908615927
+1277 1198 -2452.381670676807
+1282 1198 -181.5901337830495
+3419 1198  0.8076695160259592
+3420 1198 -0.7500806792626978
+3421 1198  4.667678323593734
+3422 1198 -4.443468566171155
+3647 1198  0.4276243851783329
+3648 1198 -0.3728502600956505
+3649 1198  3.153670952857216
+3650 1198 -2.562369962751278
+3771 1198 -0.439551175407246
+3772 1198  0.4774393407531063
+3773 1198 -2.839026331445033
+3774 1198  3.287177902478644
+3775 1198 -0.5013982344400243
+3776 1198  0.5451054580886334
+3777 1198 -3.303170792875972
+3778 1198  3.830799029309135
+3779 1198 -0.651822591782886
+3780 1198  0.7429555445394144
+3781 1198 -4.18167192374513
+3782 1198  4.666850163800948
+905 1199  0.01084140050204591
+984 1199  0.0299746246289474
+999 1199  8.747625068750153e-09
+1008 1199 -0.01293493383306837
+1015 1199 -0.04200667912920009
+1039 1199 -0.008049526074928537
+1105 1199 -3998.718775286863
+1184 1199 -822.8244362568955
+1199 1199  16023.1456721761
+1208 1199 -1667.451919587196
+1215 1199 -2769.253537633058
+1239 1199 -834.1124016703941
+3223 1199  1.20832718522049
+3224 1199 -1.164701975884189
+3225 1199  6.923454910311763
+3226 1199 -6.791708696781908
+3683 1199  1.368477623792071
+3684 1199 -1.328312311377791
+3685 1199  7.348777917610443
+3686 1199 -7.188787771767452
+3783 1199  1.727739253821067
+3784 1199 -1.501409605633508
+3785 1199  11.30404407094388
+3786 1199 -9.700381322229893
+3787 1199 -0.8399267021510434
+3788 1199  0.8359873259695152
+3789 1199 -4.371489003866787
+3790 1199  5.178316230070377
+3791 1199 -1.468192548459289
+3792 1199  1.512757808386402
+3793 1199 -9.106914833269292
+3794 1199  9.45399632332005
+943 1200  0.01754934032540434
+953 1200 -0.008383946400232767
+1000 1200 -9.116374766193758e-09
+1001 1200 -0.03588453104645329
+1002 1200 -6.50146078085001e-08
+1003 1200 -4.92795877993224e-08
+1026 1200  0.01932311150148924
+1143 1200 -2502.882899625627
+1153 1200 -4741.586008200146
+1200 1200  7000.072619730484
+1201 1200 -336.4229823469557
+1202 1200 -371.6922193028342
+1203 1200 -437.3868882747202
+1226 1200 -1452.75486119253
+3459 1200 -0.6720863108170413
+3460 1200  0.7107553380565065
+3461 1200 -4.55768979835456
+3462 1200  5.176873191896383
+3519 1200  0.9377921426824671
+3520 1200 -0.9444048128648267
+3521 1200  5.539440141695474
+3522 1200 -5.732696863714277
+3795 1200  0.3629961991311105
+3796 1200 -0.3459711048373486
+3797 1200  6.519516423137556
+3798 1200 -8.416866787483741
+3799 1200 -0.2146298528456314
+3800 1200  0.1240942996274762
+3801 1200  4.262660366847292
+3802 1200 -3.267971087874407
+3803 1200 -0.2466962960354827
+3804 1200  0.142484329197361
+3805 1200  3.952836893778014
+3806 1200 -3.485047843454746
+3807 1200 -1.021937167464553
+3808 1200  1.047546391471734
+3809 1200 -5.814375262302929
+3810 1200  5.190494080029731
+954 1201  0.4246988966920202
+959 1201 -0.1571053939386667
+1000 1201  0.06631071370625427
+1001 1201 -2.100605200894279e-08
+1004 1201 -0.2362664140746497
+1154 1201 -2778.601465176169
+1159 1201 -1986.152622069543
+1200 1201 -336.4229823469557
+1201 1201  13014.15053606697
+1204 1201 -943.0650083467848
+3531 1201  1.404267791315138
+3532 1201 -1.26049341189051
+3533 1201  8.573349645239935
+3534 1201 -7.620517473966892
+3563 1201  1.434561395231843
+3564 1201 -1.281336173089582
+3565 1201  9.07977748727345
+3566 1201 -7.942016865501306
+3795 1201 -0.407193771543868
+3796 1201  0.3887267162890052
+3797 1201 -14.76253650232996
+3798 1201  19.05461479822566
+3811 1201 -1.590343166199202
+3812 1201  1.799225606089988
+3813 1201 -8.889282523176131
+3814 1201  8.984707214224725
+1000 1202  8.321106473307971e-08
+1002 1202 -3.261903830376056e-07
+1200 1202 -371.6922193028342
+1202 1202  1865.329310426942
+3799 1202 -3.056071350822656
+3800 1202  1.769816809290096
+3801 1202 -21.38658464065627
+3802 1202  16.31440493686975
+1000 1203  8.643129645324876e-08
+1003 1203 -2.659544074967712e-07
+1200 1203 -437.3868882747202
+1203 1203  2361.051277833175
+3803 1203 -2.791333479427456
+3804 1203  1.61398408077391
+3805 1203 -21.33286151008621
+3806 1203  18.72857532796202
+1001 1204  0.05848026446275859
+1004 1204 -5.676911468688317e-09
+1028 1204 -0.1008233678779023
+1201 1204 -943.0650083467848
+1204 1204  623.6376208683334
+1228 1204 -410.1720988650556
+3811 1204  0.3953856088196608
+3812 1204 -0.4470126234812586
+3813 1204  2.728687156136997
+3814 1204 -2.759212294612531
+3815 1204  0.6834769259705972
+3816 1204 -0.6337624789359847
+3817 1204  2.682500907926349
+3818 1204 -3.302192501193137
+1005 1205  1.270442140377526e-09
+1011 1205  0.05204211481294618
+1027 1205 -0.254470154000132
+1090 1205  0.1321958859920611
+1205 1205  6033.106403994393
+1211 1205 -2861.256039528676
+1227 1205 -3877.196870864005
+1290 1205 -2885.83782801923
+3819 1205 -0.8432794677340965
+3820 1205  0.8122467217164322
+3821 1205 -5.634150174253534
+3822 1205  5.676480078927482
+3823 1205 -0.8258269842122644
+3824 1205  0.8899170771677279
+3825 1205 -5.4483738044597
+3826 1205  6.238244344298185
+3827 1205  1.219896523532217
+3828 1205 -1.223176188342998
+3829 1205  7.196077445191182
+3830 1205 -7.309708780356933
+973 1206  0.1608647615085388
+1006 1206  1.715180802247529e-08
+1096 1206 -0.04271695783234537
+1173 1206 -5140.668650277364
+1206 1206  725.2050124758196
+1296 1206 -1447.354453913685
+3639 1206 -0.2630108885425641
+3640 1206  0.2173437473146551
+3641 1206 -1.681171094173677
+3642 1206  1.48390937397938
+3831 1206 -0.3186270474808822
+3832 1206  0.3163594839300718
+3833 1206 -2.097538363083144
+3834 1206  2.063427325485965
+915 1207  0.00435760107926482
+1007 1207  5.579247508435969e-09
+1084 1207 -0.01534246547224594
+1115 1207 -138.288761778333
+1207 1207  726.5627170365319
+1284 1207 -713.7675168419642
+3303 1207 -0.9824708043480003
+3304 1207  0.8254367475625018
+3305 1207 -5.749376920340797
+3306 1207  4.679067459779453
+3835 1207  0.4854666781569684
+3836 1207 -0.5209336705035591
+3837 1207  3.061789935852789
+3838 1207 -3.064005787467996
+999 1208  0.003480874327946859
+1008 1208  2.945946889210127e-09
+1091 1208 -0.004170643736380167
+1199 1208 -1667.451919587196
+1208 1208  785.6540698522239
+1291 1208 -887.2704343124278
+3783 1208 -0.5174648007567855
+3784 1208  0.4496774480466198
+3785 1208 -3.396560628333188
+3786 1208  2.914703332052592
+3839 1208 -0.4753802942885685
+3840 1208  0.404989697114798
+3841 1208 -3.032021718341749
+3842 1208  2.585875819257028
+1009 1209  1.582770026153923e-10
+1061 1209  0.05851589523119582
+1073 1209 -0.03549214591889813
+1209 1209  4661.087789736292
+1261 1209 -7744.746583549255
+1273 1209 -661.5687518245369
+3843 1209  0.5953885346820643
+3844 1209 -0.7585928866866933
+3845 1209  4.119179357221501
+3846 1209 -5.027617955580298
+3847 1209 -1.089204680743506
+3848 1209  1.088994441422246
+3849 1209 -7.735889765633795
+3850 1209  7.784946548619343
+953 1210 -0.03090332412050151
+1010 1210 -1.220871292950676e-08
+1011 1210  0.08531786875140286
+1012 1210 -6.996689916327625e-08
+1013 1210 -4.162537098340024e-08
+1026 1210 -0.03439042023821304
+1153 1210 -2001.015904640132
+1210 1210  9523.030298394489
+1211 1210 -628.9024645633449
+1212 1210 -249.6415633539116
+1213 1210 -639.0767490283577
+1226 1210 -4270.428353139534
+3523 1210  0.8083482178595198
+3524 1210 -0.8256124134252635
+3525 1210  6.106888895148331
+3526 1210 -5.665416980231268
+3851 1210  0.8561130448515417
+3852 1210 -0.8532795399901716
+3853 1210  5.980557287714181
+3854 1210 -18.68721764116278
+3855 1210  0.7431772970946299
+3856 1210 -0.4962120658716677
+3857 1210  2.858778724621237
+3858 1210 -2.748627634170752
+3859 1210  0.1504825244382092
+3860 1210 -0.09392255060612208
+3861 1210  7.743227386312433
+3862 1210 -5.577951998041309
+3863 1210  1.079868425719443
+3864 1210 -1.078106438997537
+3865 1210  8.196309806968394
+3866 1210 -8.119865478221724
+982 1211  0.332620942724838
+1005 1211 -0.07579910748829327
+1010 1211 -0.2105201766865411
+1011 1211 -3.814564220050443e-10
+1090 1211 -0.008999365508307933
+1182 1211 -1073.023868317834
+1205 1211 -2861.256039528676
+1210 1211 -628.9024645633449
+1211 1211  20711.06510196762
+1290 1211 -4191.670580733649
+3675 1211  2.391996372285566
+3676 1211 -2.145916049270679
+3677 1211  13.0725484602826
+3678 1211 -12.47045602748613
+3819 1211  1.889672965421464
+3820 1211 -1.818638852608572
+3821 1211  11.31995792783744
+3822 1211 -11.4061945499796
+3851 1211  0.1281477546658397
+3852 1211 -0.1109241627550626
+3853 1211 -11.87347468739131
+3854 1211  36.98601183293343
+3867 1211 -1.855130366892074
+3868 1211  1.922720458247535
+3869 1211 -10.91898889290387
+3870 1211  10.6937103252593
+1010 1212  2.310851920234169e-08
+1012 1212 -1.222183139693023e-07
+1210 1212 -249.6415633539116
+1212 1212  436.1558917911616
+3855 1212 -1.366971763051765
+3856 1212  0.9127356585543187
+3857 1212 -4.993718753234045
+3858 1212  4.801186593602593
+1010 1213  5.20346455368248e-08
+1013 1213 -1.436638097374043e-07
+1210 1213 -639.0767490283577
+1213 1213  2205.993243270096
+3859 1213 -1.461494307484497
+3860 1213  0.9122971104867462
+3861 1213 -26.724645220298
+3862 1213  19.24788508110388
+1014 1214  1.003109723063389e-08
+1015 1214 -0.004849260550083945
+1031 1214  0.006768474096792987
+1214 1214  1644.664912023497
+1215 1214 -276.8639202152118
+1231 1214 -1080.162224615282
+3871 1214  0.5034153766096773
+3872 1214 -0.5285023005439647
+3873 1214  7.330338437253632
+3874 1214 -38.38649753335458
+3875 1214  0.5409851231816658
+3876 1214 -0.6474678388648655
+3877 1214  4.154393438575824
+3878 1214 -4.691610853794245
+999 1215  0.02977237990573015
+1014 1215  0.01263569687722853
+1015 1215  2.119705658221172e-08
+1030 1215 -0.2961764240780974
+1060 1215  0.2282308445688311
+1199 1215 -2769.253537633058
+1214 1215 -276.8639202152118
+1215 1215  7875.169207149417
+1230 1215 -3532.696627823262
+1260 1215 -2345.108354895173
+3787 1215  1.459995747599291
+3788 1215 -1.452748484428911
+3789 1215  6.510195367269129
+3790 1215 -7.713673913612801
+3871 1215 -0.01820502103528791
+3872 1215  0.02666245738760768
+3873 1215 -13.39915434300085
+3874 1215  70.14746870615768
+3879 1215 -0.6929215427424715
+3880 1215  0.7627672411821809
+3881 1215 -5.674417187443032
+3882 1215  6.342712909556337
+3883 1215 -0.7557720101028491
+3884 1215  0.8446553956316701
+3885 1215 -4.902109438887917
+3886 1215  5.982335823908858
+1016 1216  2.258379669384247e-08
+1029 1216 -0.264491588907438
+1032 1216  0.3720024857585152
+1096 1216  0.09577340452076888
+1216 1216  8524.636106107781
+1229 1216 -4881.530531556509
+1232 1216 -1387.552912371535
+1296 1216 -7591.989817871691
+3887 1216  1.27864903424727
+3888 1216 -1.203527161495361
+3889 1216  7.097758064026325
+3890 1216 -7.071254972430305
+3891 1216 -0.8926090143541199
+3892 1216  0.8343920480599086
+3893 1216 -6.326257993395976
+3894 1216  6.701573121465414
+3895 1216 -0.7140138395673383
+3896 1216  0.7578565321914594
+3897 1216 -4.667666885335501
+3898 1216  4.931373980172026
+899 1217 -0.02024470978393711
+957 1217  0.03597745704809928
+1017 1217  1.572755131684644e-08
+1099 1217 -324.4426975442814
+1157 1217 -739.1349825417736
+1217 1217  637.8787982437333
+3179 1217  0.4831540182231924
+3180 1217 -0.5351332038396536
+3181 1217  3.222143654814309
+3182 1217 -3.519407860941207
+3547 1217  0.5146427113986781
+3548 1217 -0.5135793765110132
+3549 1217  3.063488442035085
+3550 1217 -2.922493795579336
+915 1218 -0.00428379170185561
+983 1218  0.01520725110591731
+1018 1218  8.149366514942358e-09
+1115 1218 -133.9669120329914
+1183 1218 -899.33891041484
+1218 1218  793.7174921168661
+3307 1218  0.7382929550608727
+3308 1218 -0.8747461134231366
+3309 1218  4.971628818787869
+3310 1218 -5.79221202090099
+3679 1218  0.5452258426352274
+3680 1218 -0.5470217549008489
+3681 1218  3.376506641724802
+3682 1218 -3.409620496099906
+912 1219 -0.2400553180149196
+1019 1219 -2.543621846484712e-08
+1020 1219  0.1511025029822383
+1076 1219 -0.04422968677103412
+1085 1219  0.1034698694358651
+1112 1219 -737.3345443487744
+1219 1219  9565.904076745277
+1220 1219 -192.4625250703121
+1276 1219 -1597.562514947314
+1285 1219 -8948.234516972396
+3275 1219  1.186931963583773
+3276 1219 -1.163100359430245
+3277 1219  4.538624497527303
+3278 1219 -5.879825080146367
+3899 1219  0.3970145449540784
+3900 1219 -0.3832969439187849
+3901 1219  7.895064096101138
+3902 1219 -12.39953745174649
+3903 1219 -0.9053256046972465
+3904 1219  0.9552717849491632
+3905 1219 -4.875642107836658
+3906 1219  3.543387736643543
+3907 1219  0.9177680669862572
+3908 1219 -0.9166426538946926
+3909 1219  6.850788060807428
+3910 1219 -6.091736844072287
+944 1220  1.100657149099352
+994 1220 -0.8499437985630234
+1019 1220 -0.2054867667943506
+1020 1220 -5.254733564247216e-08
+1144 1220 -3014.351946629534
+1194 1220 -2332.050437479933
+1219 1220 -192.4625250703121
+1220 1220  7047.656681697156
+3471 1220 -1.19384433734122
+3472 1220  1.19038235472501
+3473 1220 -7.176916273976581
+3474 1220  6.94588192299627
+3743 1220 -1.258218614571573
+3744 1220  1.257430980505297
+3745 1220 -7.448069304163958
+3746 1220  6.785039108229999
+3899 1220  0.2510335818284439
+3900 1220 -0.2383819755548176
+3901 1220 -15.34203421950439
+3902 1220  24.06894650972545
+996 1221 -0.2752007773679826
+1021 1221  1.693937379254606e-08
+1022 1221  0.9741805755638241
+1023 1221 -1.302918114509666e-07
+1024 1221 -9.906925613201167e-08
+1025 1221  2.090432824064514e-08
+1031 1221 -0.7334598799220151
+1054 1221 -0.2261997465922666
+1196 1221 -2351.032529766809
+1221 1221  46684.4952085363
+1222 1221 -1266.678869343772
+1223 1221 -2161.61702284971
+1224 1221 -1633.067169282511
+1225 1221 -1461.603224482606
+1231 1221 -5850.690085631221
+1254 1221 -22128.00563827363
+3763 1221  1.24868825794066
+3764 1221 -1.04285678342914
+3765 1221  7.570117512134129
+3766 1221 -6.772078091746644
+3911 1221  1.3927177800326
+3912 1221 -1.125363487049104
+3913 1221  12.16385913519791
+3914 1221 -71.95507332617049
+3915 1221  0.06266636804055448
+3916 1221 -0.04937898970914998
+3917 1221  7.419701498530326
+3918 1221 -2.767625773617935
+3919 1221 -0.01329158934947286
+3920 1221  0.01432224943308115
+3921 1221  18.43213599777828
+3922 1221 -3.444370960504515
+3923 1221  0.1952707801632194
+3924 1221 -0.1578981940773998
+3925 1221  11.51290363653453
+3926 1221 -6.724196771960667
+3927 1221 -1.212084092212188
+3928 1221  1.209186270330734
+3929 1221 -7.485018312352968
+3930 1221  7.574571864439869
+3931 1221  1.095375364582689
+3932 1221 -0.9823491292559811
+3933 1221  7.911773723924099
+3934 1221 -7.11281355093847
+899 1222  0.04377716463426287
+907 1222  0.1093957121598284
+927 1222  0.1769630657333927
+1021 1222 -1.614266736074707
+1022 1222  1.79774599651239e-08
+1091 1222  0.1579025131440057
+1099 1222 -2473.241834471261
+1107 1222 -1364.595019358008
+1127 1222 -2406.24952449994
+1221 1222 -1266.678869343772
+1222 1222  14824.19990916848
+1291 1222 -1973.389699961115
+3183 1222 -1.34894428731261
+3184 1222  1.379371250818873
+3185 1222 -8.08332465039811
+3186 1222  7.890854922569274
+3235 1222  1.364159900297422
+3236 1222 -1.382288592585748
+3237 1222  7.688776076853612
+3238 1222 -8.526509371438566
+3355 1222 -1.317805230846083
+3356 1222  1.420202184002578
+3357 1222 -8.373828345005908
+3358 1222  8.486264343097229
+3911 1222 -0.007859345312601223
+3912 1222  0.01423058830428089
+3913 1222 -10.85847562888647
+3914 1222  64.19748679899583
+3935 1222 -1.271418220001935
+3936 1222  1.227066802011116
+3937 1222 -8.059296167830137
+3938 1222  7.176638732185156
+1021 1223  6.278976277807047e-08
+1023 1223 -5.441864154853704e-07
+1221 1223 -2161.61702284971
+1223 1223  9121.153135578967
+3915 1223 -1.67562254077075
+3916 1223  1.400506034542053
+3917 1223 -31.5516818208411
+3918 1223  11.74797745436025
+1021 1224  2.808174576207634e-08
+1024 1224 -3.728986932571843e-07
+1221 1224 -1633.067169282511
+1224 1224  562513.1605345258
+3919 1224 -1.619002941030767
+3920 1224  1.240374621996592
+3921 1224 -71.14768126953655
+3922 1224  13.28414805225883
+1021 1225 -5.971258681958247e-09
+1025 1225  2.415609212658154e-08
+1221 1225 -1461.603224482606
+1225 1225  1714.853712513708
+3923 1225 -0.4989904160230794
+3924 1225  0.4114850212014874
+3925 1225 -13.59929356823245
+3926 1225  7.942511812214237
+953 1226 -0.01685058071239737
+1000 1226 -0.0187976386654834
+1010 1226  0.03354622133540253
+1026 1226 -4.982309767878612e-09
+1027 1226  0.1585995473755764
+1031 1226 -0.2479349622596107
+1153 1226 -1538.578415888683
+1200 1226 -1452.75486119253
+1210 1226 -4270.428353139534
+1226 1226  5458.23897163396
+1227 1226 -302.1992729528647
+1231 1226 -340.8512513609311
+3527 1226  0.6235636743364441
+3528 1226 -0.638771030322958
+3529 1226  4.601918715157254
+3530 1226 -4.301673841163688
+3807 1226  0.6755139749733998
+3808 1226 -0.6914780544113759
+3809 1226  5.152102189575969
+3810 1226 -4.603043930537255
+3863 1226 -0.6825259342509997
+3864 1226  0.6814150005342681
+3865 1226 -5.174118981866978
+3866 1226  5.125870371838962
+3939 1226  0.4270339140806115
+3940 1226 -0.3710313110194243
+3941 1226  5.935483300800602
+3942 1226 -7.803072635065493
+3943 1226 -0.7846384665529746
+3944 1226  0.755030805637508
+3945 1226 -4.492594197832112
+3946 1226  3.075207046085619
+1005 1227  0.3216355049551655
+1026 1227 -0.3481508996189024
+1027 1227 -1.642452673866757e-09
+1205 1227 -3877.196870864005
+1226 1227 -302.1992729528647
+1227 1227  9053.637102694121
+3823 1227  1.978322116312245
+3824 1227 -2.12995668613235
+3825 1227  11.36265377461624
+3826 1227 -13.01237515753422
+3939 1227  0.5603572465690376
+3940 1227 -0.4748156898844662
+3941 1227 -19.35438219191495
+3942 1227  25.37588114465589
+1004 1228  0.0892421864327288
+1028 1228 -6.325417856167803e-09
+1042 1228 -0.1053486690832382
+1204 1228 -410.1720988650556
+1228 1228  653.794427143524
+1242 1228 -640.7103668234021
+3815 1228 -0.7053415505671318
+3816 1228  0.6542644047347482
+3817 1228 -3.296532554515937
+3818 1228  4.056786931974255
+3947 1228 -0.4953844799756825
+3948 1228  0.4897034551735098
+3949 1228 -2.566147949050465
+3950 1228  2.120683161051391
+907 1229 -0.2266174204579067
+1016 1229  0.4471220830932749
+1029 1229  2.632058404561377e-08
+1107 1229 -4056.761237014
+1216 1229 -4881.530531556509
+1229 1229  16921.24025598121
+3239 1229 -2.191374628381395
+3240 1229  1.902211567258754
+3241 1229 -12.23234957660924
+3242 1229  10.65862220841107
+3887 1229 -1.428392337327512
+3888 1229  1.345690298242458
+3889 1229 -8.947761787238363
+3890 1229  8.913001519049024
+941 1230 -0.5157188483780635
+1015 1230  0.6677127442223171
+1030 1230  3.749403898467563e-08
+1141 1230 -7111.276237605085
+1215 1230 -3532.696627823262
+1230 1230  15135.89972458079
+3443 1230  1.503112217723057
+3444 1230 -1.491993548294496
+3445 1230  11.06068010791183
+3446 1230 -10.17957716286965
+3879 1230  1.396683757074456
+3880 1230 -1.537590286100393
+3881 1230  11.28827516445929
+3882 1230 -12.61596887929604
+1014 1231 -0.004918095735968131
+1021 1231  0.31496286658781
+1026 1231  0.0847976154813143
+1031 1231  4.101254072463867e-09
+1032 1231 -0.5154238049189607
+1033 1231  6.151957693267818e-09
+1034 1231  1.048933193081858e-08
+1054 1231  0.06060825833018181
+1214 1231 -1080.162224615282
+1221 1231 -5850.690085631221
+1226 1231 -340.8512513609311
+1231 1231  5066.347088879259
+1232 1231 -341.4175255677415
+1233 1231 -1185.076996498896
+1234 1231 -1464.937248466328
+1254 1231 -4488.479032872755
+3875 1231 -0.4587975488680787
+3876 1231  0.5502970449270767
+3877 1231 -3.038828905876916
+3878 1231  3.431330450031093
+3927 1231  0.4518610492803866
+3928 1231 -0.4507761853932629
+3929 1231  2.659744674264035
+3930 1231 -2.691626173647931
+3943 1231  0.3255367652745427
+3944 1231 -0.3228669408970674
+3945 1231  4.347828500049935
+3946 1231 -2.991719644070264
+3951 1231  0.2106155001707166
+3952 1231 -0.2015349441592879
+3953 1231  7.458763893376871
+3954 1231 -10.96688731001504
+3955 1231  0.004909842124062877
+3956 1231 -0.003161834423974073
+3957 1231  8.097430154499932
+3958 1231 -1.811688403660364
+3959 1231  0.02074944381764173
+3960 1231 -0.0164243186421904
+3961 1231  4.547623166879417
+3962 1231 -2.052721196214168
+3963 1231  0.532889016796235
+3964 1231 -0.4748422610025065
+3965 1231  3.043863568647225
+3966 1231 -2.787316614264462
+958 1232 -0.03577015867815656
+1016 1232 -0.1931972031217511
+1031 1232  1.10561328577018
+1032 1232  1.011402150918883e-08
+1038 1232 -0.375458076737654
+1084 1232 -0.2753545287150571
+1158 1232 -3393.363501279813
+1216 1232 -1387.552912371535
+1231 1232 -341.4175255677415
+1232 1232  4718.204315834614
+1238 1232 -822.3564996879056
+1284 1232 -876.0754543919668
+3559 1232 -0.5518231235725737
+3560 1232  0.6340347863620098
+3561 1232 -3.512065591551504
+3562 1232  3.863844809715488
+3891 1232  0.8109099469826276
+3892 1232 -0.7551476418861992
+3893 1232  4.304302054670996
+3894 1232 -4.563164317669306
+3951 1232 -0.1400665666639198
+3952 1232  0.1353078380308161
+3953 1232 -13.70528369826246
+3954 1232  20.15001583937854
+3967 1232 -0.7796711087896829
+3968 1232  0.8977654818327304
+3969 1232 -4.689757573113847
+3970 1232  4.893403592122139
+3971 1232 -0.6622307311134192
+3972 1232  0.7022784082231178
+3973 1232 -4.573004681716164
+3974 1232  5.053130885639514
+1031 1233 -9.484864810227478e-09
+1033 1233  1.167101824073313e-07
+1231 1233 -1185.076996498896
+1233 1233  35387322.32806417
+3955 1233 -2.020187774905765
+3956 1233  1.781009244152953
+3957 1233 -157.5425247202729
+3958 1233  35.24928960928063
+1031 1234 -1.84660262358971e-08
+1034 1234  1.662011153147347e-07
+1231 1234 -1464.937248466328
+1234 1234  22763244.29675386
+3959 1234 -1.371053578210635
+3960 1234  1.141090378496645
+3961 1234 -73.86690653394791
+3962 1234  33.34660355498111
+901 1235  0.00410810176178595
+932 1235 -0.003579148430443102
+1035 1235  4.701830855213807e-09
+1101 1235 -652.9015261762572
+1132 1235 -1557.206156923414
+1235 1235  1194.791042810274
+3199 1235  0.7074154071969436
+3200 1235 -0.757770830580615
+3201 1235  4.025719241708225
+3202 1235 -4.406458593664035
+3387 1235  0.5396384156967375
+3388 1235 -0.5734959694043438
+3389 1235  3.329588287063118
+3390 1235 -3.446229419844843
+1036 1236 -6.866298751795341e-09
+1037 1236 -0.2219028422169306
+1093 1236  0.3284627932841515
+1236 1236  13881.85836852269
+1237 1236 -1464.550328926824
+1293 1236 -4853.806147764349
+3975 1236 -1.707969879333584
+3976 1236  1.704175130397323
+3977 1236 -9.799664782253885
+3978 1236  7.820271137733112
+3979 1236 -1.475543576621253
+3980 1236  1.26182715384045
+3981 1236 -9.15833764800569
+3982 1236  8.549665708518704
+957 1237 -0.07867953448799736
+1036 1237  0.1062797331368865
+1037 1237  2.017315570146394e-08
+1157 1237 -1876.369922915853
+1236 1237 -1464.550328926824
+1237 1237  2905.92342536023
+3551 1237  1.098284058681807
+3552 1237 -1.099720672228146
+3553 1237  5.821493748851517
+3554 1237 -6.158831607607141
+3975 1237  0.4947465498372808
+3976 1237 -0.4892572997374934
+3977 1237  4.604525758409499
+3978 1237 -3.682710629785925
+927 1238 -0.09805225877594476
+1032 1238  0.2460567017585993
+1038 1238  1.549589877247737e-08
+1127 1238 -939.8659979062598
+1232 1238 -822.3564996879056
+1238 1238  1425.25603684663
+3359 1238  0.7729194107635493
+3360 1238 -0.8228886130706252
+3361 1238  4.532886615050296
+3362 1238 -5.102978103686564
+3967 1238  0.4765768244277365
+3968 1238 -0.5475735569802812
+3969 1238  3.549100696841748
+3970 1238 -3.704885884667695
+907 1239  0.001389181240069544
+999 1239  0.001400699084717696
+1039 1239  2.965038659041852e-09
+1046 1239  0.002037455724496269
+1091 1239 -0.004453089740284022
+1107 1239 -389.1035774035147
+1199 1239 -834.1124016703941
+1239 1239  604.1375921740112
+1246 1239 -392.349659792455
+1291 1239 -847.0044738143413
+3243 1239  0.329511624503851
+3244 1239 -0.3387833259526468
+3245 1239  2.346704499747039
+3246 1239 -2.422035549841484
+3791 1239  0.3506092217084191
+3792 1239 -0.3612542032138767
+3793 1239  2.196719064593049
+3794 1239 -2.280441915279873
+3983 1239 -0.2561778390712127
+3984 1239  0.2870186143926446
+3985 1239 -1.513498503028144
+3986 1239  1.606821457458256
+3987 1239 -0.3620744060739602
+3988 1239  0.3444837790072527
+3989 1239 -2.259168544657926
+3990 1239  2.163038539238373
+984 1240 -0.1517471607395
+997 1240  0.2907198369401517
+1040 1240  2.41306286619114e-08
+1184 1240 -1909.764605957655
+1197 1240 -2894.512024611905
+1240 1240  12057.99234253917
+3687 1240 -1.964051061571361
+3688 1240  1.753538660217751
+3689 1240 -13.90759343198216
+3690 1240  12.36730783343644
+3767 1240  1.100245831300216
+3768 1240 -1.083749439601529
+3769 1240  8.345685815764492
+3770 1240 -7.619017266862353
+978 1241 -0.07632676179310438
+994 1241  0.04822162698070831
+1041 1241 -3.596899800228925e-08
+1178 1241 -2608.162152478774
+1194 1241 -2784.420804028015
+1241 1241  9211.471231555435
+3651 1241  1.357408579856344
+3652 1241 -1.359305061838616
+3653 1241  7.799677070497177
+3654 1241 -8.367326703808663
+3747 1241  1.314196419979139
+3748 1241 -1.397856747533507
+3749 1241  9.967472353837746
+3750 1241 -9.100375712236817
+1028 1242  0.5302216625716982
+1042 1242  1.273314576000217e-08
+1060 1242 -0.3938694762289616
+1228 1242 -640.7103668234021
+1242 1242  11663.62345738262
+1260 1242 -8130.716010688008
+3947 1242  1.253920472558426
+3948 1242 -1.238917691943269
+3949 1242  11.01433164719148
+3950 1242 -9.126926589105361
+3991 1242  1.629228927232157
+3992 1242 -1.629221333651718
+3993 1242  9.698372940762637
+3994 1242 -10.14050217838622
+1043 1243  1.224810661226705e-08
+1074 1243  0.00970013728400089
+1083 1243 -0.01362937325158806
+1243 1243  3542.477354630666
+1274 1243 -895.8117983584929
+1283 1243 -4835.233940860073
+3995 1243 -1.085914715783544
+3996 1243  1.090787344712805
+3997 1243 -7.064788357696017
+3998 1243  7.264719577018943
+3999 1243  0.6281268734336682
+4000 1243 -0.6265552018341662
+4001 1243  4.240704676139979
+4002 1243 -4.137810875831871
+981 1244  0.1277397341432216
+1044 1244 -3.695717287754974e-09
+1045 1244 -4.7148643572692e-08
+1075 1244 -0.1463500416425706
+1181 1244 -1276.761734036878
+1244 1244  2361.150198652828
+1245 1244 -430.7146894812096
+1275 1244 -869.3960245438557
+3667 1244  1.049384777518011
+3668 1244 -1.053710576606368
+3669 1244  5.781563336973328
+3670 1244 -6.220262112848772
+4003 1244 -0.2687441652242472
+4004 1244  0.1719397258262731
+4005 1244  4.840410468840943
+4006 1244 -4.198187441675281
+4007 1244 -0.8397429008188915
+4008 1244  0.8301209943719979
+4009 1244 -5.094647168144733
+4010 1244  4.665509938312524
+1044 1245  6.84975660636411e-08
+1045 1245 -1.710001454879873e-07
+1244 1245 -430.7146894812096
+1245 1245  1562.296878246622
+4003 1245 -2.64805073698945
+4004 1245  1.697227123155527
+4005 1245 -17.55536468313575
+4006 1245  15.14037838846916
+905 1246  0.004420674229770605
+1039 1246 -0.007983550840506864
+1046 1246  3.557195038528249e-08
+1105 1246 -2530.254860117219
+1239 1246 -392.349659792455
+1246 1246  5400.255554604978
+3227 1246  1.1553111591236
+3228 1246 -1.285241695049613
+3229 1246  7.303265994649156
+3230 1246 -7.893949514842467
+3983 1246  1.438432936083485
+3984 1246 -1.610282307909457
+3985 1246  9.519388148008218
+3986 1246 -10.10772533983988
+912 1247  0.0912522352120306
+985 1247  0.301148117794046
+1047 1247 -1.997244994389469e-08
+1048 1247 -0.3912441653266929
+1049 1247 -1.614056271553865e-06
+1050 1247 -1.207106466394325e-06
+1051 1247 -8.74091834635049e-07
+1052 1247 -8.476012625102669e-07
+1053 1247 -7.2243239568337e-07
+1112 1247 -3490.231801384597
+1185 1247 -28632.51282411447
+1247 1247  248357.3942530839
+1248 1247 -3365.074743920489
+1249 1247 -880.8326491407122
+1250 1247 -5694.342187155742
+1251 1247 -10632.97649536341
+1252 1247 -2531.628223652554
+1253 1247 -2952.395502406746
+3279 1247  1.809931157886173
+3280 1247 -1.609166188869075
+3281 1247  17.72193962278147
+3282 1247 -12.79869678055506
+3699 1247 -2.744556354611642
+3700 1247  3.317198260843825
+3701 1247 -17.82514147813323
+3702 1247  20.87891349987074
+4011 1247  1.530441977284314
+4012 1247 -1.407573526367871
+4013 1247  29.57976624661321
+4014 1247 -53.85667321915488
+4015 1247  0.1166433208217011
+4016 1247 -0.08565703803104169
+4017 1247  20.03181033622185
+4018 1247 -6.579453823854884
+4019 1247 -0.02717925250316533
+4020 1247  0.02664962184511493
+4021 1247  32.27437037187642
+4022 1247 -6.848306051500932
+4023 1247  0.09444032567228004
+4024 1247 -0.07327003201142045
+4025 1247  14.54355524326141
+4026 1247 -5.638102373386884
+4027 1247  0.172119184851368
+4028 1247 -0.1354663985444235
+4029 1247  21.46631831646637
+4030 1247 -9.972291551898609
+4031 1247  0.1888753139595323
+4032 1247 -0.1491602373155923
+4033 1247  20.73321827180935
+4034 1247 -10.04768624738478
+986 1248 -0.07163724688851204
+1047 1248  0.340419502366803
+1048 1248  3.182346067220898e-09
+1083 1248  0.01227067299807493
+1092 1248 -0.01125287282250595
+1186 1248 -3104.47930912567
+1247 1248 -3365.074743920489
+1248 1248  9114.7025913909
+1283 1248 -3642.295564781298
+1292 1248 -2665.923631479373
+3703 1248  0.9482549598362122
+3704 1248 -0.951589473510047
+3705 1248  5.385061985342238
+3706 1248 -5.582795958856527
+4011 1248 -0.06447409590659667
+4012 1248  0.06007445012883673
+4013 1248 -6.123461182640077
+4014 1248  11.14530142529302
+4035 1248  0.6694889657961215
+4036 1248 -0.6039061080741983
+4037 1248  3.860578289092625
+4038 1248 -3.705027669694788
+4039 1248  1.3251531551657
+4040 1248 -1.268742456177846
+4041 1248  7.496982675083278
+4042 1248 -7.554675688566832
+1047 1249  2.580894044174054e-07
+1049 1249 -1.546513956562912e-06
+1247 1249 -880.8326491407122
+1249 1249  966.5008806834494
+4015 1249 -0.873821607012483
+4016 1249  0.6890445982902249
+4017 1249 -19.53524156207414
+4018 1249  6.407256334560128
+1047 1250  5.987070406110462e-07
+1050 1250 -3.449440251657165e-06
+1247 1250 -5694.342187155742
+1250 1250  2022450.820136566
+4019 1250 -1.817279018958227
+4020 1250  1.406918477809915
+4021 1250 -94.3593702933843
+4022 1250  20.01255346091463
+1047 1251  8.733743955269269e-07
+1051 1251 -3.622352877430401e-06
+1247 1251 -10632.97649536341
+1251 1251  60924.04664080437
+4023 1251 -2.226655844234514
+4024 1251  1.833646104710556
+4025 1251 -61.53830667090656
+4026 1251  23.8410264235214
+1047 1252  1.117970844788374e-07
+1052 1252 -5.713398701923111e-07
+1247 1252 -2531.628223652554
+1252 1252  25409.47810388554
+4027 1252 -0.4312061772864732
+4028 1252  0.3531894656037325
+4029 1252 -14.79999463216308
+4030 1252  6.874531929256725
+1047 1253  1.182548258066296e-07
+1053 1253 -5.344005542529118e-07
+1247 1253 -2952.395502406746
+1253 1253  21032.85678965815
+4031 1253 -0.4702289341982735
+4032 1253  0.3847064364584903
+4033 1253 -15.68593782996181
+4034 1253  7.60087658660375
+1021 1254  0.5123209884216365
+1031 1254 -0.3196677665324415
+1054 1254  7.969850475105303e-09
+1055 1254 -0.1671083834788179
+1221 1254 -22128.00563827363
+1231 1254 -4488.479032872755
+1254 1254  36168.60998233932
+1255 1254 -224.613538448271
+3931 1254 -1.129575111871219
+3932 1254  1.013019198664114
+3933 1254 -8.140433666520655
+3934 1254  7.318382790662603
+3963 1254 -1.47406685145989
+3964 1254  1.313534336735618
+3965 1254 -8.8135613453359
+3966 1254  8.070563365347549
+4043 1254  0.5506640854169045
+4044 1254 -0.5106474974579881
+4045 1254  14.65998231301212
+4046 1254 -30.55860129286959
+973 1255 -0.2169860792325076
+1054 1255  0.03272045797426546
+1055 1255  2.11014470497678e-08
+1173 1255 -5655.325062365802
+1254 1255 -224.613538448271
+1255 1255  874.3507943057172
+3643 1255  0.2775953024833963
+3644 1255 -0.3412498653140608
+3645 1255  1.447748389810384
+3646 1255 -1.755833205048359
+4043 1255 -0.04737658212996103
+4044 1255  0.04431965773281515
+4045 1255 -8.348557923958264
+4046 1255  17.39938467274956
+909 1256  0.08752981633481208
+920 1256 -0.002249858771577174
+925 1256 -0.009402178306684716
+964 1256 -0.4280655451173339
+1056 1256 -1.882256489560419e-09
+1109 1256 -1108.54432072992
+1120 1256 -437.5832889362574
+1125 1256 -1959.500873940213
+1164 1256 -7120.652080719003
+1256 1256  6009.021418482626
+3267 1256 -0.832500880855937
+3268 1256  0.8076647819842718
+3269 1256 -5.568800014444435
+3270 1256  5.649414375316669
+3323 1256 -0.9162532415237825
+3324 1256  0.9140951370430707
+3325 1256 -5.934397339882138
+3326 1256  5.830423114127847
+3347 1256  0.5292255309373513
+3348 1256 -0.5321225312697891
+3349 1256  3.257536454796313
+3350 1256 -3.319769019166089
+3615 1256  0.5060573535847106
+3616 1256 -0.5613536361877731
+3617 1256  3.248543549093237
+3618 1256 -3.642181922510498
+937 1257 -0.02032800065588086
+955 1257 -0.0254465288909706
+960 1257  0.08784149961682572
+1057 1257  1.930360118973518e-08
+1137 1257 -2657.776956602856
+1155 1257 -553.1419874124856
+1160 1257 -449.6118328163615
+1257 1257  10242.62462136818
+3415 1257 -1.25500744830349
+3416 1257  1.255737278168746
+3417 1257 -8.635276008329065
+3418 1257  8.485710540012704
+3535 1257  1.582943479566342
+3536 1257 -1.467471123130216
+3537 1257  9.963872683218066
+3538 1257 -9.158675410300438
+3567 1257 -1.262443950678755
+3568 1257  1.35575676912055
+3569 1257 -10.37821929880745
+3570 1257  11.16247281732313
+960 1258 -0.1963524920199438
+1058 1258  6.873820179720269e-08
+1059 1258  5.031070656746905e-07
+1079 1258  0.2566034054509343
+1160 1258 -1094.71109774519
+1258 1258  49743.87056685919
+1259 1258 -7385.057567314883
+1279 1258 -18148.59164458596
+3571 1258 -2.563829187882418
+3572 1258  2.735594657287679
+3573 1258 -17.20403442676472
+3574 1258  17.4862782237909
+4047 1258  0.1232979933573407
+4048 1258 -0.1209319110556626
+4049 1258  12.18565261931853
+4050 1258 -10.49071174200598
+4051 1258 -1.563633429097309
+4052 1258  1.480104939178236
+4053 1258 -10.48222291782284
+4054 1258  9.97517741318884
+1058 1259 -3.722531947414343e-07
+1059 1259  2.632996500162577e-06
+1258 1259 -7385.057567314883
+1259 1259  39002.30945375737
+4047 1259 -0.6452838448164313
+4048 1259  0.632740697185234
+4049 1259 -65.20912110432079
+4050 1259  56.15303779109885
+1015 1260 -0.4065761617929234
+1042 1260  0.4108547346501815
+1060 1260  1.649276359927399e-08
+1215 1260 -2345.108354895173
+1242 1260 -8130.716010688008
+1260 1260  11098.26919607543
+3883 1260  1.465217279636702
+3884 1260 -1.639787184528095
+3885 1260  7.639002702936234
+3886 1260 -9.327145572218971
+3991 1260 -1.327284851091953
+3992 1260  1.326901691271821
+3993 1260 -8.871639857149663
+3994 1260  9.274886402498227
+939 1261  0.1670044571575812
+1009 1261 -0.0715165068859599
+1061 1261  4.807374498483341e-10
+1062 1261  1.003843119740111e-10
+1063 1261  2.761413792073952e-09
+1064 1261  1.715662265033835e-09
+1065 1261  1.207292321669939e-10
+1066 1261  3.51464403414159e-09
+1067 1261  1.771025409658122e-09
+1068 1261  1.926837966959027e-09
+1077 1261 -0.154517529197304
+1139 1261 -12834.35703122456
+1209 1261 -7744.746583549255
+1261 1261  73743.62518869051
+1262 1261 -973.8876219298594
+1263 1261 -1974.959337099038
+1264 1261 -1320.629257453016
+1265 1261 -938.9115804920717
+1266 1261 -438.0730939280807
+1267 1261 -1253.580811045954
+1268 1261 -406.6682599388607
+1277 1261 -12988.36445429952
+3423 1261  2.069258919988845
+3424 1261 -1.73190689006448
+3425 1261  14.65077728050171
+3426 1261 -12.89108953321492
+3843 1261 -1.57915325880766
+3844 1261  2.012113389129591
+3845 1261 -10.80951279223802
+3846 1261  13.19349528576872
+4055 1261  0.8070622115620506
+4056 1261 -0.6614271231410632
+4057 1261  19.56348943490517
+4058 1261 -13.58702526706866
+4059 1261  0.1378648745322591
+4060 1261 -0.1128908565994482
+4061 1261  9.267190079702225
+4062 1261 -6.734223707278317
+4063 1261  0.2889839995395265
+4064 1261 -0.2352278222126487
+4065 1261  11.5414689090377
+4066 1261 -7.978904295943519
+4067 1261  0.5085375863685291
+4068 1261 -0.4120665414866511
+4069 1261  16.53723785781054
+4070 1261 -11.52066789126238
+4071 1261  0.280802632960781
+4072 1261 -0.22803093980903
+4073 1261  13.01235217685689
+4074 1261 -8.975664909834439
+4075 1261  0.2803074902090753
+4076 1261 -0.2279652047020327
+4077 1261  11.6057964531526
+4078 1261 -8.020245176780445
+4079 1261  0.3804015427898206
+4080 1261 -0.3095050463446962
+4081 1261  14.91330785855446
+4082 1261 -10.30293666523228
+4083 1261 -1.261809071898539
+4084 1261  1.52499539890934
+4085 1261 -8.828418052973735
+4086 1261  11.03436685036456
+1061 1262 -2.092441497847375e-11
+1062 1262  3.275554827375515e-11
+1261 1262 -973.8876219298594
+1262 1262  318.5048147514638
+4055 1262 -0.2638765488723211
+4056 1262  0.2162445878661694
+4057 1262 -6.472519984532425
+4058 1262  4.495539572060578
+1061 1263 -1.407063632274941e-09
+1063 1263  1.386103698486352e-08
+1261 1263 -1974.959337099038
+1263 1263  22241827.1668027
+4059 1263 -0.691791395091818
+4060 1263  0.5659529217467251
+4061 1263 -48.49819059668351
+4062 1263  35.27417920269734
+1061 1264 -1.250616929460158e-09
+1064 1264  4.236453410300101e-09
+1261 1264 -1320.629257453016
+1264 1264  3293.999150088965
+4063 1264 -0.7136256126923212
+4064 1264  0.58059959929439
+4065 1264 -29.38500860027402
+4066 1264  20.3242031183579
+1061 1265 -3.967674799820742e-11
+1065 1265  8.890274627582073e-11
+1261 1265 -938.9115804920717
+1265 1265  690.7277721995864
+4067 1265 -0.3738089874614378
+4068 1265  0.3028572253506908
+4069 1265 -12.35162972393133
+4070 1265  8.60586674968607
+1061 1266 -9.292320871101545e-10
+1066 1266  4.277776494143737e-09
+1261 1266 -438.0730939280807
+1266 1266  638.1028771743796
+4071 1266 -0.3417951192656607
+4072 1266  0.2773430210866213
+4073 1266 -16.46702251650682
+4074 1266  11.36739026212329
+1061 1267 -1.273698507775478e-09
+1067 1267  4.457897107013054e-09
+1261 1267 -1253.580811045954
+1267 1267  3197.368019510227
+4075 1267 -0.7056173157039639
+4076 1267  0.5735599734651922
+4077 1267 -30.15492728060605
+4078 1267  20.84931717772058
+1061 1268 -3.714351916317415e-10
+1068 1268  1.412364641906194e-09
+1261 1268 -406.6682599388607
+1268 1268  300.7279047211774
+4079 1268 -0.2788437142254693
+4080 1268  0.2267698773866179
+4081 1268 -11.26470641944774
+4082 1268  7.785831386629021
+1069 1269 -5.449000251012137e-09
+1088 1269 -0.1955389194213142
+1093 1269  0.3738519183151615
+1269 1269  10813.18840331734
+1288 1269 -3477.000248020089
+1293 1269 -2617.344977460001
+4087 1269 -1.4539442060976
+4088 1269  1.446310343605032
+4089 1269 -9.271286129126777
+4090 1269  8.904067016422458
+4091 1269  1.012670283104241
+4092 1269 -1.178077129193787
+4093 1269  7.586503401180923
+4094 1269 -8.118669664395844
+1070 1270 -9.370967335442515e-10
+1078 1270 -0.006868975754008619
+1097 1270  0.007048039668787517
+1270 1270  140.4997717045286
+1278 1270 -311.4682163976653
+1297 1270 -200.6725500667741
+4095 1270  0.2350047561293398
+4096 1270 -0.2627322880432431
+4097 1270  1.557484621286901
+4098 1270 -1.768620278216182
+4099 1270  0.2512854022636909
+4100 1270 -0.2668875724092826
+4101 1270  1.526214952953268
+4102 1270 -1.571445886133012
+1071 1271 -2.899300410863503e-09
+1072 1271 -0.0430300784938554
+1073 1271  0.03877201488615256
+1271 1271  3389.329908355603
+1272 1271 -1896.271883950909
+1273 1271 -775.6585214792789
+4103 1271 -0.760898897674599
+4104 1271  0.7226406195901061
+4105 1271 -4.340970455026474
+4106 1271  4.279036439188589
+4107 1271 -1.255594290221721
+4108 1271  1.25204782308929
+4109 1271 -7.410192007597576
+4110 1271  7.278483379939773
+998 1272  0.3056678326553257
+1071 1272  0.06443650349717406
+1072 1272 -1.591542694856507e-08
+1086 1272 -0.4068214560171308
+1198 1272 -1354.184908615927
+1271 1272 -1896.271883950909
+1272 1272  8920.432007772803
+1286 1272 -1016.223869586874
+3771 1272  1.458551817589764
+3772 1272 -1.584168614837459
+3773 1272  7.706555182452361
+3774 1272 -8.926654332769894
+4103 1272  1.417933728749779
+4104 1272 -1.346294002937515
+4105 1272  6.781290570506568
+4106 1272 -6.686952398469979
+4111 1272 -0.9419881525931177
+4112 1272  0.9057504822207362
+4113 1272 -6.501760444913997
+4114 1272  8.109951937853118
+1009 1273  0.01064840327673766
+1071 1273 -0.01468942424715695
+1073 1273 -2.24825810746232e-10
+1209 1273 -661.5687518245369
+1271 1273 -775.6585214792789
+1273 1273  534.0266415164343
+3847 1273  0.4330918580020012
+3848 1273 -0.4330444227258353
+3849 1273  3.000161281315632
+3850 1273 -3.019182774708455
+4107 1273  0.4234260846924687
+4108 1273 -0.4222691319575352
+4109 1273  2.647109335618053
+4110 1273 -2.600153453324001
+986 1274  0.003935685128377539
+1043 1274 -0.005509351388796824
+1074 1274  3.551176783189725e-09
+1186 1274 -2242.183874822623
+1243 1274 -895.8117983584929
+1274 1274  1430.064587986382
+3707 1274 -0.5956130198385742
+3708 1274  0.5345682106594648
+3709 1274 -3.668557345336211
+3710 1274  3.371880224457544
+3995 1274  0.5173331784807369
+3996 1274 -0.5195702043955498
+3997 1274  3.190434157991982
+3998 1274 -3.280810777838017
+929 1275 -0.08503788873061549
+956 1275  0.05001632315880056
+1044 1275  0.1686051519968662
+1075 1275 -1.330449639347364e-10
+1129 1275 -406.3245095104962
+1156 1275 -1669.250794817422
+1244 1275 -869.3960245438557
+1275 1275  3309.804824869787
+3371 1275 -1.448830170961889
+3372 1275  1.383965071862861
+3373 1275 -8.986270417976883
+3374 1275  8.227311149593612
+3543 1275 -0.6717547568728794
+3544 1275  0.6732602446776976
+3545 1275 -4.45699083051915
+3546 1275  4.519424008698003
+4007 1275  0.8498311284186953
+4008 1275 -0.8390792003026281
+4009 1275  6.16663657582833
+4010 1275 -5.648969220034825
+979 1276 -0.2527625769661221
+1019 1276  0.06809256171239964
+1076 1276 -2.449253198866241e-08
+1077 1276  0.132345738765169
+1179 1276 -2359.550833678316
+1219 1276 -1597.562514947314
+1276 1276  7342.450288708699
+1277 1276 -900.7481929321769
+3659 1276  1.0954096953943
+3660 1276 -1.097736986344627
+3661 1276  7.39116338334737
+3662 1276 -7.424306139552618
+3903 1276  0.7622334325753616
+3904 1276 -0.7961901433494216
+3905 1276  8.663532512230066
+3906 1276 -6.327609477811816
+4115 1276  0.732482060546454
+4116 1276 -0.7185090865811576
+4117 1276  11.68038468730556
+4118 1276 -19.033710166426
+998 1277  0.08491056660704932
+1061 1277  0.2765729375303243
+1076 1277 -0.2432983530399724
+1077 1277 -1.015200618015299e-08
+1198 1277 -2452.381670676807
+1261 1277 -12988.36445429952
+1276 1277 -900.7481929321769
+1277 1277  19492.02732296992
+3775 1277  1.914092476329704
+3776 1277 -2.079388121953743
+3777 1277  10.8116764259082
+3778 1277 -12.54156489623117
+4083 1277  1.218052141141831
+4084 1277 -1.47300996574878
+4085 1277  6.880737915568475
+4086 1277 -8.604012257663832
+4115 1277  0.3546679947849138
+4116 1277 -0.3467933517133751
+4117 1277 -17.00310330783589
+4118 1277  27.66621100756034
+939 1278 -0.1239759113085325
+1070 1278  0.04152432414694895
+1078 1278 -1.724586362605862e-09
+1089 1278  0.2708866518362061
+1097 1278  0.09910991576168865
+1139 1278 -2144.627198362258
+1270 1278 -311.4682163976653
+1278 1278  5895.373801146832
+1289 1278 -226.1606344644552
+1297 1278 -1522.865223630604
+3427 1278  1.112187399499515
+3428 1278 -1.060130544389497
+3429 1278  5.806648304333962
+3430 1278 -5.542727459691219
+4095 1278 -0.892482446316431
+4096 1278  0.9977668333321339
+4097 1278 -6.008458200039911
+4098 1278  6.822965307570624
+4119 1278 -0.7623806137748301
+4120 1278  0.8561953528406251
+4121 1278 -5.293591386916911
+4122 1278  5.949256441509949
+4123 1278 -0.8740833773154867
+4124 1278  0.8912310185466943
+4125 1278 -4.159585723108061
+4126 1278  4.827479834422186
+940 1279  0.06744124888417391
+1058 1279 -0.1647810909296956
+1079 1279  2.904484364130155e-08
+1080 1279 -3.091812144639938e-07
+1081 1279 -2.020746911468585e-08
+1092 1279  0.02154089767802106
+1140 1279 -8495.362797148979
+1258 1279 -18148.59164458596
+1279 1279  28431.72293901326
+1280 1279 -2556.084698611827
+1281 1279 -1071.638609249924
+1292 1279 -3788.030924191581
+3435 1279 -0.9008717883866754
+3436 1279  1.026223447218297
+3437 1279 -5.728897213689582
+3438 1279  6.263257813414933
+4051 1279  0.9598433696475256
+4052 1279 -0.9086990247085501
+4053 1279  6.050637155898348
+4054 1279 -5.758158315413606
+4127 1279 -0.03784001059522953
+4128 1279  0.034701868533437
+4129 1279  8.860625202191928
+4130 1279 -2.549488194921787
+4131 1279  0.1538807196017282
+4132 1279 -0.1216367091902295
+4133 1279  12.77341480514826
+4134 1279 -5.706211648738831
+4135 1279 -1.539612202532529
+4136 1279  1.649614277715424
+4137 1279 -9.369448393175393
+4138 1279  10.36404052218175
+1079 1280  2.892989577618632e-07
+1080 1280 -2.914429613198877e-06
+1279 1280 -2556.084698611827
+1280 1280  24829.19315026917
+4127 1280 -4.420138342044347
+4128 1280  3.515861595090497
+4129 1280 -85.6481884037366
+4130 1280  24.55744025691587
+1079 1281  5.602588400155373e-09
+1081 1281 -2.80225902482556e-08
+1279 1281 -1071.638609249924
+1281 1281  1491.94341957583
+4131 1281 -0.9333140615563935
+4132 1281  0.7715902563305668
+4133 1281 -18.08711888236061
+4134 1281  8.071327482551563
+961 1282  0.06933509766831331
+998 1282 -0.03690628655442891
+1082 1282  3.424985434152816e-10
+1161 1282 -428.7203655478105
+1198 1282 -181.5901337830495
+1282 1282  143.5927519074621
+3579 1282  0.1809697428574308
+3580 1282 -0.1794700371248673
+3581 1282  1.449320632063192
+3582 1282 -1.445634609268258
+3779 1282  0.2587210270240423
+3780 1282 -0.2949996783558586
+3781 1282  1.823381827579377
+3782 1282 -2.035095850329601
+1043 1283  0.03805173458733085
+1048 1283 -0.02865248553472413
+1083 1283  3.583140154028719e-08
+1243 1283 -4835.233940860073
+1248 1283 -3642.295564781298
+1283 1283  27634.07583015538
+3999 1283 -2.060237066109748
+4000 1283  2.055356159329762
+4001 1283 -13.22275915134779
+4002 1283  12.90162353980625
+4035 1283 -1.982804233034109
+4036 1283  1.790720369267351
+4037 1283 -13.25822033623029
+4038 1283  12.72139776360994
+981 1284 -0.2426662636331511
+1007 1284  0.03479583401598968
+1032 1284  0.351545654975462
+1084 1284  5.059544422314488e-09
+1181 1284 -986.9244544448655
+1207 1284 -713.7675168419642
+1232 1284 -876.0754543919668
+1284 1284  3583.170000702486
+3671 1284  0.8548643208929529
+3672 1284 -0.8599114611795811
+3673 1284  6.615573091843956
+3674 1284 -6.162584239401313
+3835 1284 -0.9961364902256777
+3836 1284  1.069122194004589
+3837 1284 -5.607807767179665
+3838 1284  5.611106742482268
+3971 1284  0.7155565720950264
+3972 1284 -0.7605034956051496
+3973 1284  4.486795549329334
+3974 1284 -4.956539581732367
+943 1285 -0.3809398851482149
+1019 1285 -0.1159747915130676
+1085 1285 -3.796000963562918e-08
+1086 1285  0.7958833775240365
+1087 1285 -3.395976211351126e-07
+1143 1285 -3361.290884691284
+1219 1285 -8948.234516972396
+1285 1285  17460.80753327147
+1286 1285 -355.3005757252953
+1287 1285 -6086.481643221267
+3463 1285  1.320047522502771
+3464 1285 -1.327266925701353
+3465 1285  5.912988175484174
+3466 1285 -7.658943042300214
+3907 1285 -1.404708895439669
+3908 1285  1.404553468671611
+3909 1285 -8.259331367595493
+3910 1285  7.340228308969663
+4139 1285  0.8723149865446768
+4140 1285 -0.8836887209187385
+4141 1285  7.224333431575175
+4142 1285 -14.23089451374669
+4143 1285  0.02592331471776193
+4144 1285 -0.01536794211867761
+4145 1285  29.59209543546304
+4146 1285 -5.11537337312014
+987 1286  0.1010252017704575
+994 1286  0.4053180506262709
+1072 1286  0.2078468422749022
+1085 1286 -0.5565646529232413
+1086 1286 -1.977140486920936e-08
+1187 1286 -2971.781291812511
+1194 1286 -1300.878810624146
+1272 1286 -1016.223869586874
+1285 1286 -355.3005757252953
+1286 1286  2699.816287331327
+3731 1286 -0.7243212706529887
+3732 1286  0.686208191891071
+3733 1286 -3.862010681433929
+3734 1286  3.078841940888978
+3751 1286  0.7276690822554982
+3752 1286 -0.7000132451960701
+3753 1286  4.142224845053123
+3754 1286 -4.115085973968236
+4111 1286  0.7647617774871864
+4112 1286 -0.7325004544442668
+4113 1286  2.902282675062765
+4114 1286 -3.63284662768968
+4139 1286  0.1581455441266535
+4140 1286 -0.1537391891864879
+4141 1286 -5.755496466930055
+4142 1286  11.28627080486608
+1085 1287  1.047611239979318e-07
+1087 1287 -6.981968381225556e-07
+1285 1287 -6086.481643221267
+1287 1287  385721884.8626792
+4143 1287 -2.546465438723989
+4144 1287  2.184062692900765
+4145 1287 -61.62793826679011
+4146 1287  10.62505824201603
+972 1288 -0.1110170413597872
+1069 1288  0.1214000777347138
+1088 1288 -2.311546232203199e-10
+1172 1288 -729.7679166195321
+1269 1288 -3477.000248020089
+1288 1288  4282.153118111866
+3619 1288  1.244376425382915
+3620 1288 -1.24310360204769
+3621 1288  7.434019202353014
+3622 1288 -7.327623003108759
+4087 1288  0.7887413865352516
+4088 1288 -0.7843205938603733
+4089 1288  5.391699794599194
+4090 1288 -5.178361384702474
+991 1289  0.1077273281804682
+1078 1289 -0.04556750232906767
+1089 1289 -1.285860098954039e-10
+1191 1289 -568.5385856792361
+1278 1289 -226.1606344644552
+1289 1289  175.3675926617765
+3739 1289  0.2021428644557957
+3740 1289 -0.2240623962201166
+3741 1289  1.349752296503026
+3742 1289 -1.466292598487834
+4119 1289  0.2404117558855258
+4120 1289 -0.2700601958355842
+4121 1289  1.734079556945439
+4122 1289 -1.948837786899108
+929 1290  0.08038780949826715
+1005 1290 -0.1235888174409047
+1011 1290  0.005776489288478071
+1090 1290  1.863167620186346e-09
+1129 1290 -413.568512787767
+1205 1290 -2885.83782801923
+1211 1290 -4191.670580733649
+1290 1290  6601.729492759859
+3375 1290 -1.478717303034038
+3376 1290  1.409679754203769
+3377 1290 -8.636336925830612
+3378 1290  8.40294784283585
+3827 1290 -1.259969479211936
+3828 1290  1.263388549847514
+3829 1290 -7.594160657672793
+3830 1290  7.714039345814099
+3867 1290  0.8608065436983287
+3868 1290 -0.8912069480512377
+3869 1290  5.7352105175062
+3870 1290 -5.617762924949319
+899 1291  0.001908677924063157
+907 1291  0.03029500957792949
+984 1291  0.03333223564354496
+1008 1291  0.01209611832050858
+1022 1291 -0.1615941136931558
+1039 1291  0.01997349105916452
+1091 1291  9.598412509248533e-09
+1099 1291 -2064.592972124319
+1107 1291 -1886.24602540587
+1184 1291 -596.6060378153098
+1208 1291 -887.2704343124278
+1222 1291 -1973.389699961115
+1239 1291 -847.0044738143413
+1291 1291  13284.54847765901
+3187 1291 -0.9510122522007739
+3188 1291  1.008248356955873
+3189 1291 -5.89270443011472
+3190 1291  6.423284179440299
+3247 1291  1.072482363899002
+3248 1291 -1.048942876587671
+3249 1291  6.515090289325624
+3250 1291 -6.443255723032384
+3691 1291  1.168201103632352
+3692 1291 -1.112629226904043
+3693 1291  6.948584822479258
+3694 1291 -6.744836034528775
+3839 1291  1.508116623610983
+3840 1291 -1.284756180926196
+3841 1291  9.461386332231092
+3842 1291 -8.06920223422221
+3935 1291  0.6649208109974206
+3936 1291 -0.6401504275752985
+3937 1291  5.042399262607923
+3938 1291 -4.49133368198552
+3987 1291  1.440632854355911
+3988 1291 -1.37057590441584
+3989 1291  8.78157444783267
+3990 1291 -8.407949717547142
+986 1292  0.0005819686631925447
+1048 1292  0.008367416410551054
+1079 1292 -0.01760425743209124
+1092 1292  2.545271146625616e-09
+1186 1292 -2147.026562228744
+1248 1292 -2665.923631479373
+1279 1292 -3788.030924191581
+1292 1292  3983.405381393538
+3711 1292  0.6074290077697537
+3712 1292 -0.6360979627977104
+3713 1292  3.925788159606755
+3714 1292 -4.052578948064181
+4039 1292 -0.6969060374661296
+4040 1292  0.6674861693273666
+4041 1292 -4.412546562177374
+4042 1292  4.445925686481872
+4135 1292  0.5515734249662468
+4136 1292 -0.5910536703502837
+4137 1292  3.090538086242762
+4138 1292 -3.418869542545371
+980 1293  0.191146221601152
+1036 1293 -0.100220615586992
+1069 1293 -0.1119597125414634
+1093 1293 -1.493529512737457e-08
+1094 1293 -6.98925406994455e-08
+1095 1293 -1.82950870328269e-07
+1180 1293 -2195.222019859931
+1236 1293 -4853.806147764349
+1269 1293 -2617.344977460001
+1293 1293  8749.068783994611
+1294 1293 -164.3747074129485
+1295 1293 -228.5337576424091
+3663 1293  1.054444086425987
+3664 1293 -0.9011439297224629
+3665 1293  7.599039601885398
+3666 1293 -6.282130880745431
+3979 1293  0.7694145957527926
+3980 1293 -0.6573535907506843
+3981 1293  4.132697532463695
+3982 1293 -3.858923807592198
+4091 1293 -0.6342324694119614
+4092 1293  0.7388262639417724
+4093 1293 -4.118301711438476
+4094 1293  4.406401785498837
+4147 1293  0.2993576079332945
+4148 1293 -0.2812552173432513
+4149 1293  11.01771333410158
+4150 1293 -8.516713910804523
+4151 1293  0.06090597788496124
+4152 1293 -0.05346314055039419
+4153 1293  6.743407936044972
+4154 1293 -5.460172754442011
+1093 1294  3.664150378945852e-09
+1094 1294 -1.762221939205499e-08
+1293 1294 -164.3747074129485
+1294 1294  7639.560996461915
+4147 1294 -0.07547758445687719
+4148 1294  0.07089516340263885
+4149 1294 -2.84098517566667
+4150 1294  2.196645621020095
+1093 1295  5.14547137606125e-08
+1095 1295 -6.313957408021764e-07
+1293 1295 -228.5337576424091
+1295 1295  1734538.607648828
+4151 1295 -0.2101993947180658
+4152 1295  0.1844652672016618
+4153 1295 -23.80011488422777
+4154 1295  19.27597045240107
+1006 1296  0.3247141990426317
+1016 1296 -0.191269335220411
+1096 1296  9.930907562960556e-08
+1206 1296 -1447.354453913685
+1216 1296 -7591.989817871691
+1296 1296  31452.29747062553
+3831 1296  2.014211143978124
+3832 1296 -1.999469777299275
+3833 1296  13.54117139429895
+3834 1296 -13.32061162330918
+3895 1296  2.026152750963511
+3896 1296 -2.150639159344168
+3897 1296  13.57026540414182
+3898 1296 -14.33700504327225
+923 1297  0.04698307485545858
+1070 1297 -0.03625623505669459
+1078 1297 -0.08433753082497672
+1097 1297 -5.736181502014048e-09
+1123 1297 -808.5357239496374
+1270 1297 -200.6725500667741
+1278 1297 -1522.865223630604
+1297 1297  3641.527667894987
+3335 1297 -1.067537541383138
+3336 1297  1.068092189082941
+3337 1297 -6.271879177065279
+3338 1297  6.765893252663498
+4099 1297 -0.9272236969416601
+4100 1297  0.98501487730545
+4101 1297 -5.141467784189666
+4102 1297  5.293330560693399
+4123 1297  0.8485062079897245
+4124 1297 -0.8650341355217372
+4125 1297  3.631803890207103
+4126 1297 -4.215505277639843
+942 1298 -0.1518122571549573
+995 1298  0.1249966020337667
+1098 1298  7.298680398948321e-08
+1142 1298 -805.5840716483974
+1195 1298 -2377.009340752339
+1298 1298  21916.56379947439
+3447 1298  2.244902451590447
+3448 1298 -2.239071302275198
+3449 1298  14.90052319731221
+3450 1298 -14.58766608851529
+3755 1298  2.091534388030289
+3756 1298 -2.085292610827683
+3757 1298  12.86354640857851
+3758 1298 -12.63775213409496
+1299 1299  0.0009212144480015666
+2777 1299 -16421.91141900974
+3175 1299  0.3323231709594552
+4155 1299 -0.2412135486919414
+1300 1300  0.004072679508227696
+2775 1300 -116.617845527334
+3179 1300  12.11450772465217
+4157 1300  0.1661135696488067
+1301 1301  0.006325580300835057
+3021 1301 -62.98405706157925
+3183 1301  5.029592871399626
+4159 1301  0.4624605338731521
+1302 1302  0.009068887762870591
+3159 1302 -36.74905919231615
+3187 1302  5.668506884069838
+4161 1302 -0.4947242318500673
+1303 1303  0.001126548430048662
+2781 1303 -17152.4293534602
+3191 1303  0.3692929538115763
+4163 1303 -0.1766165844146627
+1304 1304  0.0006920242127232574
+2887 1304 -1194.040564031954
+3195 1304  4.360425760376224
+4165 1304 -0.3050775516043246
+1305 1305  0.002176793677748951
+2779 1305 -304.5398217309754
+3199 1305  4.242853200747289
+4167 1305 -0.3914193643951301
+1306 1306  0.0008530994861319499
+2785 1306 -15990.46775417401
+3203 1306  0.3412920430536406
+4169 1306 -0.2389835454891805
+1307 1307  0.008969100346799186
+2869 1307 -27.96163286928218
+3207 1307  8.041398985395846
+4171 1307 -0.480083502439464
+1308 1308  0.001294461194310491
+2783 1308 -246.424319230538
+3211 1308  6.55614516809574
+4173 1308 -0.4223099512030265
+1309 1309  0.0009498724832470081
+2789 1309 -16927.29847060978
+3215 1309  0.2531411838153556
+4175 1309 -0.3252342258811831
+1310 1310  0.004708060827964712
+2787 1310 -127.348789895109
+3219 1310  4.739072521175232
+4177 1310 -0.3300938184824252
+1311 1311  0.004351148550575133
+2787 1311 -121.9718452523159
+3223 1311  3.734090074267024
+4179 1311 -0.4908310005655379
+1312 1312  0.003943055987286657
+2787 1312 -116.5170618756044
+3227 1312  5.750116599788575
+4181 1312  0.4025596461382885
+1313 1313  0.0007934834584695918
+2793 1313 -15651.87407075314
+3231 1313  0.2843149864254252
+4183 1313 -0.3680346263601992
+1314 1314  0.005246094254530227
+2791 1314 -53.76846535636626
+3235 1314  7.210736930309491
+4185 1314 -0.4818355431139471
+1315 1315  0.00123992126917307
+3035 1315 -1421.489364558306
+3239 1315  2.12239641260848
+4187 1315 -0.2936091186007417
+1316 1316  0.009039595818823305
+2791 1316 -71.39421959235987
+3243 1316  8.994693294053674
+4189 1316  0.1478598441021074
+1317 1317  0.006531690474418353
+2791 1317 -60.66237248504986
+3247 1317  5.061913797901139
+4191 1317 -0.4722142733900354
+1318 1318  0.001735428750918286
+2797 1318 -19147.60457802069
+3251 1318  0.3142463672992255
+4193 1318 -0.1245359476338054
+1319 1319  0.001055952156612547
+2799 1319 -16807.87112981469
+3255 1319  0.3570471483125166
+4195 1319 -0.1886155730243332
+1320 1320  0.0040209120475161
+2795 1320 -48.65659892814862
+3259 1320  9.205270068346719
+4197 1320 -0.496807801273085
+1321 1321  0.005036551566437221
+2795 1321 -54.80645079899097
+3263 1321  9.624813023079948
+4199 1321  0.382691560057723
+1322 1322  0.003847683823750525
+3089 1322 -100.9659996956966
+3267 1322  5.922623483236039
+4201 1322 -0.3957723556168835
+1323 1323  0.000841250571717003
+2803 1323 -288.936297706273
+3271 1323  6.622390702502909
+4203 1323 -0.5915454173823133
+1324 1324  0.0008578383472785198
+2801 1324 -314.0609448710903
+3275 1324  5.191487889306021
+4205 1324 -0.6320496066149439
+1325 1325  0.001413154992221117
+2801 1325 -417.45542835111
+3279 1325  4.055752454004177
+4207 1325  0.4149441342907414
+1326 1326  0.000882780537773012
+2805 1326 -16211.02934024309
+3283 1326  0.2442413628704792
+4209 1326 -0.3720931546947013
+1327 1327  0.00110154462157207
+2809 1327 -16956.49425532315
+3287 1327  0.5026016922761118
+4211 1327 -0.1474513421173272
+1328 1328  0.001217084582774276
+2811 1328 -17479.24164685459
+3291 1328  0.421945735946015
+4213 1328 -0.150780607320847
+1329 1329  0.001208942347715192
+2813 1329 -17627.34904124655
+3295 1329  0.3915301941686539
+4215 1329 -0.166183616330309
+1330 1330  0.000779202277584279
+2815 1330 -15611.76874269715
+3299 1330  0.5049950972208146
+4217 1330 -0.2132021764710746
+1331 1331  0.0004793400206406562
+2991 1331 -1111.645130144312
+3303 1331  5.283005872263223
+4219 1331  0.3955427940574144
+1332 1332  0.006039682245261113
+2807 1332 -33.11136834639819
+3307 1332  15.68617080585754
+4221 1332  0.3375417510714098
+1333 1333  0.00118182103217571
+2819 1333 -17332.50264229675
+3311 1333  0.3539340117715401
+4223 1333 -0.1696981519351195
+1334 1334  0.001163085689897921
+2821 1334 -17422.49927394747
+3315 1334  0.3171963274323468
+4225 1334 -0.1917131080017181
+1335 1335  0.004096199521818046
+2817 1335 -111.0869138886556
+3319 1335  6.886702449628651
+4227 1335 -0.3349809983213075
+1336 1336  0.003915495812117704
+3089 1336 -102.1576650288912
+3323 1336  9.171935901419351
+4229 1336  0.2366093522986016
+1337 1337  0.001142829721099769
+2825 1337 -17020.79035452148
+3327 1337  0.36929603544498
+4231 1337 -0.1646365262812593
+1338 1338  0.001588745929855299
+2823 1338 -271.7302916287101
+3331 1338  4.703906103995145
+4233 1338 -0.5108679392009977
+1339 1339  0.001427060126464311
+3171 1339 -285.6892959186181
+3335 1339  4.656667378127493
+4235 1339 -0.5198632092236741
+1340 1340  0.001550353210027538
+2829 1340 -19127.32781891363
+3339 1340  0.2262443261916516
+4237 1340 -0.1981115390391876
+1341 1341  0.003811275724011594
+2827 1341 -104.0178710348266
+3343 1341  7.677703978905812
+4239 1341 -0.3571743727843014
+1342 1342  0.005121318497429929
+2827 1342 -120.5778323373125
+3347 1342  8.778768198726569
+4241 1342 -0.1837305798049131
+1343 1343  0.0009378587812624336
+2833 1343 -15783.20681029278
+3351 1343  0.2087528761419341
+4243 1343 -0.4115664959143436
+1344 1344  0.005775450127326082
+3021 1344 -60.13908113984031
+3355 1344  5.321843079599814
+4245 1344  0.4948051560351502
+1345 1345  0.0017937012250687
+2831 1345 -298.6016789907157
+3359 1345  4.558938647755557
+4247 1345 -0.413301806558095
+1346 1346  0.001191863116158495
+2837 1346 -17200.90873720961
+3363 1346  0.4651682718810348
+4249 1346 -0.1377344537960406
+1347 1347  0.0007388559992690033
+2839 1347 -15372.8164617752
+3367 1347  0.4764456266019807
+4251 1347 -0.2309521391793573
+1348 1348  0.001531669494970955
+3127 1348 -264.9193537345231
+3371 1348  6.478855428097081
+4253 1348  0.373558902257503
+1349 1349  0.001266167020288444
+3157 1349 -233.9428403939642
+3375 1349  6.316750882719969
+4255 1349 -0.5029630185937802
+1350 1350  0.0008420525300582919
+2843 1350 -16176.07327682596
+3379 1350  0.3022564890631526
+4257 1350 -0.3005092230155128
+1351 1351  0.002427896906783189
+2841 1351 -325.9882767001909
+3383 1351  2.896696576781846
+4259 1351 -0.4425084873605576
+1352 1352  0.002164171743180618
+2841 1352 -309.9603770265387
+3387 1352  3.97695531013616
+4261 1352  0.3915587655305323
+1353 1353  0.0007649883521880128
+2847 1353 -15319.47843184996
+3391 1353  0.5157826632315755
+4263 1353 -0.2006755550776053
+1354 1354  0.0007051468741409639
+2849 1354 -15342.38909234029
+3395 1354  0.3627133183774772
+4265 1354 -0.3338254063159729
+1355 1355  0.001219792748132925
+2889 1355 -238.4037452088409
+3399 1355  7.085454722674737
+4267 1355  0.4723947763539851
+1356 1356  0.001454930057613886
+2939 1356 -263.4733430658865
+3403 1356  8.850553534693345
+4269 1356  0.2663650847823781
+1357 1357  0.001278376874421266
+2853 1357 -17899.71932992315
+3407 1357  0.2986025758467496
+4271 1357 -0.1866146962681199
+1358 1358  0.00469741256465844
+2851 1358 -369.8949236118411
+3411 1358  7.20471030819053
+4273 1358 -0.08570975586546606
+1359 1359  0.003317459266130739
+3091 1359 -295.1726851067902
+3415 1359  4.811679126595192
+4275 1359  0.224708222751262
+1360 1360  0.002558593316707209
+2855 1360 -285.4713473471323
+3419 1360  3.187196022756801
+4277 1360 -0.3594998341374985
+1361 1361  0.005951706713139653
+2855 1361 -436.1033211741603
+3423 1361  2.361516391014415
+4279 1361 -0.1478613302797105
+1362 1362  0.001644612800042599
+2855 1362 -229.7060573133963
+3427 1362  3.949015286514313
+4281 1362 -0.58313561816606
+1363 1363  0.0009333933720843851
+2861 1363 -1269.292530967632
+3431 1363  3.718703165886996
+4283 1363 -0.2293236201966689
+1364 1364  0.006430911860547507
+3135 1364 -80.63995640005464
+3435 1364  3.873556911241002
+4285 1364  0.42314514820173
+1365 1365  0.0007547011133879957
+2859 1365 -1242.790681293919
+3439 1365  3.057040031444601
+4287 1365 -0.3584357830568553
+1366 1366  0.0008859553416288813
+2859 1366 -1347.578694003212
+3443 1366  2.768986183801089
+4289 1366  0.3532322900264607
+1367 1367  0.0009881120225423083
+2861 1367 -1306.453480190415
+3447 1367  4.104329447502788
+4291 1367  0.1980349828762465
+1368 1368  0.001487119509469413
+2865 1368 -200.3447771713429
+3451 1368  6.140079525850513
+4293 1368 -0.4977177300614612
+1369 1369  0.00473831831522752
+2883 1369 -43.12103915398581
+3455 1369  5.11477659652108
+4295 1369 -0.8291778794111532
+1370 1370  0.004665764046068509
+2977 1370 -55.1131720540257
+3459 1370  5.956658436407072
+4297 1370 -0.5661094051595106
+1371 1371  0.002305146191007362
+2863 1371 -138.6615789467266
+3463 1371  3.940241620211022
+4299 1371 -0.7204052217365209
+1372 1372  0.000868626976203476
+2895 1372 -1285.697515288127
+3467 1372  2.750846739635994
+4301 1372 -0.3615774432500488
+1373 1373  0.00245314466950895
+3017 1373 -354.9809878986568
+3471 1373  4.076318621842777
+4303 1373  0.2921192102024898
+1374 1374  0.001238197354358431
+2867 1374 -1359.412942527259
+3475 1374  2.042467781185541
+4305 1374  0.2741740413061578
+1375 1375  0.001352052470830934
+2867 1375 -1420.161798638148
+3479 1375  1.82335585995627
+4307 1375 -0.2773076465817324
+1376 1376  0.007206573329488603
+2871 1376 -3270.212868561243
+3483 1376  4.657866587415518
+4309 1376  0.01066646559674583
+1377 1377  0.00804911233999905
+2873 1377 -3383.287424261197
+3487 1377  3.930431675339554
+4311 1377  0.01083754926061709
+1378 1378  0.009054992045809201
+2875 1378 -3162.550200229806
+3491 1378  3.761329917621492
+4313 1378  0.01175361905645754
+1379 1379  0.009182548452643763
+2877 1379 -3145.78338095254
+3495 1379  3.433153619622813
+4315 1379  0.01148289706327333
+1380 1380  0.008827942062630214
+2879 1380 -4127.243651965615
+3499 1380  4.033761579760155
+4317 1380  0.008746086931736756
+1381 1381  0.01016666328654509
+2869 1381 -29.79361601042121
+3503 1381  6.379413846576525
+4319 1381  0.533396313689748
+1382 1382  0.00314554681455971
+2881 1382 -332.4374750614756
+3507 1382  2.227180481236951
+4321 1382 -0.4043473535609413
+1383 1383  0.0005973204797509472
+2885 1383 -837.2400970977618
+3511 1383  7.331635289629858
+4323 1383 -0.2764593929515081
+1384 1384  0.001435248761046485
+2935 1384 -373.0009736238315
+3515 1384  5.304835054096878
+4325 1384 -0.3420333412549693
+1385 1385  0.006093662194062648
+2883 1385 -49.71040942894664
+3519 1385  3.60741915585239
+4327 1385 -0.799264511380765
+1386 1386  0.004492944878941051
+2883 1386 -42.92117101237346
+3523 1386  7.425984975242814
+4329 1386  0.6047360650845038
+1387 1387  0.003995523251182433
+2883 1387 -40.28901682798929
+3527 1387  6.982525249864085
+4331 1387  0.7188696788923561
+1388 1388  0.00119543363370181
+2885 1388 -1637.126658242943
+3531 1388  2.017091895439074
+4333 1388  0.3127717018709487
+1389 1389  0.0008583523754602983
+2887 1389 -1330.347020163584
+3535 1389  3.178888388583059
+4335 1389  0.3275939897877578
+1390 1390  0.0008457781934121071
+2963 1390 -1267.301926818796
+3539 1390  2.807087996816875
+4337 1390  0.3539736971000679
+1391 1391  0.002489295349595793
+3127 1391 -338.2005407012112
+3543 1391  6.546784158248459
+4339 1391 -0.2029217905250291
+1392 1392  0.0006496765340230643
+2891 1392 -1177.545535152468
+3547 1392  3.23901762142973
+4341 1392  0.4429448994016109
+1393 1393  0.0008068265997349701
+2891 1393 -1310.740583779899
+3551 1393  2.722688512430139
+4343 1393 -0.3781458794328671
+1394 1394  0.000985141500557223
+2893 1394 -1319.017361047579
+3555 1394  3.132140010189771
+4345 1394  0.2653610804974919
+1395 1395  0.006455405048779167
+3041 1395 -59.68238994762429
+3559 1395  5.363493298492033
+4347 1395  0.4397497774331201
+1396 1396  0.0008956256821566003
+2895 1396 -1313.040134659249
+3563 1396  2.759734100154303
+4349 1396  0.3337399888939511
+1397 1397  0.008210508525845766
+3091 1397 -464.5939628653576
+3567 1397  8.81339838666673
+4351 1397 -0.02735485822257834
+1398 1398  0.003003991364081873
+3093 1398 -222.6965047597604
+3571 1398  4.130734036137791
+4353 1398  0.2999541214461424
+1399 1399  0.001771985501876807
+2899 1399 -1276.699267408946
+3575 1399  2.800615008201704
+4355 1399 -0.1700655800972397
+1400 1400  0.003762700731209146
+2899 1400 -1861.580695310057
+3579 1400  7.984203741216811
+4357 1400  0.01658810146559478
+1401 1401  1.272095576781374e-05
+2903 1401 -8640.649777085511
+3583 1401  0.7834252790859237
+4359 1401  23.38653888958006
+1402 1402  0.004109859119880277
+2907 1402 -1386.423275602331
+3587 1402  0.6061321059687221
+4361 1402  0.3443611707400913
+1403 1403  0.00174213380263186
+2909 1403 -1718.105842836117
+3591 1403  1.924848272360943
+4363 1403  0.2424191842671502
+1404 1404  0.002955443890929092
+2911 1404 -1984.576121024297
+3595 1404  0.9293065890597459
+4365 1404  0.2409008336125803
+1405 1405  0.002046775451173374
+2913 1405 -1772.16179742598
+3599 1405  1.515772943789459
+4367 1405  0.2465099761064946
+1406 1406  0.00307288164617961
+2915 1406 -1941.832300205852
+3603 1406  0.9154558602176557
+4369 1406  0.2338083946556382
+1407 1407  0.003118631919909647
+2917 1407 -1948.610857464787
+3607 1407  0.8856096204701881
+4371 1407  0.2329931159431059
+1408 1408  0.002064849466551203
+2919 1408 -1660.153292834334
+3611 1408  1.437804385943619
+4373 1408  0.2443954748733559
+1409 1409  0.032281480855581
+2905 1409 -17.56919005402462
+3615 1409  5.097464053705828
+4375 1409 -0.2852018326572599
+1410 1410  0.0007323167865500322
+2921 1410 -1160.143762481141
+3619 1410  3.115298956803423
+4377 1410  0.3449705889808716
+1411 1411  0.02978207371484943
+2925 1411 -8636.32260883259
+3623 1411  3.239214549120592
+4379 1411  0.001345572973655427
+1412 1412  0.0412672721801762
+2927 1412 -4186.612833750586
+3627 1412  2.661281271291927
+4381 1412  0.002458604260643332
+1413 1413  0.03353177478347204
+2929 1413 -18149.83069473371
+3631 1413  2.838604475646982
+4383 1413  0.0006418068359919498
+1414 1414  0.02793847595744413
+2931 1414 -16487.63935391007
+3635 1414  3.600111623032867
+4385 1414  0.0006826837434793908
+1415 1415  0.001457965405823138
+2989 1415 -1403.69572640005
+3639 1415  2.120223669016545
+4387 1415 -0.2149865933546265
+1416 1416  0.008610178916267667
+2923 1416 -27.80845944515527
+3643 1416  4.450891698791136
+4389 1416 -0.8943223168280579
+1417 1417  0.0006982215071245835
+2933 1417 -1197.091400538973
+3647 1417  5.382734010251454
+4391 1417  0.2320388001825638
+1418 1418  0.0008328470662532488
+2933 1418 -1307.168528785002
+3651 1418  4.03844885715099
+4393 1418 -0.2673963766163865
+1419 1419  0.0005279492621660577
+2937 1419 -1132.012822917658
+3655 1419  5.435793144188034
+4395 1419 -0.3015732552854993
+1420 1420  0.002114474146017625
+2935 1420 -455.3580237130959
+3659 1420  5.673414870400691
+4397 1420 -0.189153266415218
+1421 1421  0.001273995556104298
+2937 1421 -1554.218213369194
+3663 1421  1.663115651008956
+4399 1421  0.3162746729687368
+1422 1422  0.001914372234934805
+2939 1422 -297.8083722314928
+3667 1422  3.6738422903388
+4401 1422 -0.4954179106036137
+1423 1423  0.00214903785711034
+2939 1423 -318.3099272773828
+3671 1423  4.931596810478233
+4403 1423  0.312090482848181
+1424 1424  0.0006972192640149882
+2941 1424 -1225.44512859976
+3675 1424  2.954431694987449
+4405 1424 -0.420740409235667
+1425 1425  0.001387066813399525
+2943 1425 -1341.319488664768
+3679 1425  3.597870353883553
+4407 1425 -0.1900856271423472
+1426 1426  0.00307995418805288
+2945 1426 -92.63800029624825
+3683 1426  5.141916652259088
+4409 1426 -0.6351218515149885
+1427 1427  0.00232588309964105
+3057 1427 -1792.17746207041
+3687 1427  2.812910582659263
+4411 1427 -0.09233746945236616
+1428 1428  0.003968006746482242
+2945 1428 -105.1073406416851
+3691 1428  6.815584747470943
+4413 1428 -0.3290166805679281
+1429 1429  0.001800493541729985
+2949 1429 -115.8710614082713
+3695 1429  8.598554966475483
+4415 1429 -0.4508575542320269
+1430 1430  0.01294566560248524
+3071 1430 -45.05797616799539
+3699 1430  2.402848964234089
+4417 1430  0.5795528708874975
+1431 1431  0.004438329159053629
+2949 1431 -115.6732086865693
+3703 1431  3.660576083522866
+4419 1431 -0.4501212252666432
+1432 1432  0.001230012908033431
+3125 1432 -1440.733981857976
+3707 1432  1.803796874291739
+4421 1432 -0.3246876875018179
+1433 1433  0.004689294710517987
+2949 1433 -119.2838530078724
+3711 1433  3.693870991143675
+4423 1433  0.3866661651573575
+1434 1434  0.02600204860566276
+2953 1434 -3914.440467798176
+3715 1434  8.262976513892538
+4425 1434  0.001312335288620512
+1435 1435  0.02269136945434388
+2955 1435 -9557.969759456002
+3719 1435  6.75928451641217
+4427 1435  0.0008215588597796699
+1436 1436  0.02398061921832441
+2957 1436 -10416.16918230558
+3723 1436  6.286782905770047
+4429 1436  0.0007450434486385868
+1437 1437  0.005825805095761705
+2951 1437 -71.78900859503443
+3727 1437  4.863234068877579
+4431 1437  0.5235605415859514
+1438 1438  0.003360644493367182
+3149 1438 -112.4245790619285
+3731 1438  3.654645324820387
+4433 1438  0.6177638045468883
+1439 1439  0.002293391027564734
+2961 1439 -1904.378289787439
+3735 1439  2.770038046695757
+4435 1439  0.1199895291469222
+1440 1440  0.004179365275079932
+2959 1440 -133.1916354887076
+3739 1440  8.553881810766296
+4437 1440  0.203001397699786
+1441 1441  0.001694003237061658
+3017 1441 -293.7957542138436
+3743 1441  4.071233084981746
+4439 1441  0.4903758402830745
+1442 1442  0.001601064318780595
+2965 1442 -273.7709840087026
+3747 1442  4.626630643115307
+4441 1442  0.4838079806998937
+1443 1443  0.001946527003138808
+2965 1443 -304.386875196408
+3751 1443  4.819926478975458
+4443 1443 -0.3081198770448175
+1444 1444  0.001102967128701974
+2967 1444 -1367.085437280008
+3755 1444  3.210809158694981
+4445 1444  0.2236474776175843
+1445 1445  0.0006757264712473715
+2971 1445 -1299.463572095045
+3759 1445  3.851674085393307
+4447 1445 -0.3103276689217324
+1446 1446  0.0007622487762262233
+2969 1446 -1935.598728461542
+3763 1446  2.651381876594489
+4449 1446 -0.275851162842649
+1447 1447  0.0009857917802666748
+2971 1447 -1393.724096630088
+3767 1447  2.58440440493602
+4451 1447  0.3221464351873659
+1448 1448  0.003803805871044814
+2973 1448 -51.11619948659765
+3771 1448  6.545719728055657
+4453 1448 -0.6958761831620622
+1449 1449  0.00473626941325111
+2973 1449 -56.91674680233886
+3775 1449  5.387449154306919
+4455 1449 -0.583252118916699
+1450 1450  0.004231559667264044
+2973 1450 -54.13080652643053
+3779 1450  10.56116102730794
+4457 1450  0.3219996994163201
+1451 1451  0.002338269681504373
+2993 1451 -1694.45394947356
+3783 1451  2.192274887117568
+4459 1451 -0.1231955649162646
+1452 1452  0.002785947797894975
+2975 1452 -39.11433892927088
+3787 1452  3.725435059567603
+4461 1452 -2.421691173290549
+1453 1453  0.008550454467021431
+2975 1453 -69.39344058325088
+3791 1453  5.699343322844305
+4463 1453  0.2588732884211721
+1454 1454  0.001748386142823009
+2979 1454 -94.18004615832454
+3795 1454  9.853444154856955
+4465 1454 -0.5465093405911027
+1455 1455  1.444471295879164e-05
+2981 1455 -6186.640086873116
+3799 1455  0.6148168725043865
+4467 1455  31.84061239984158
+1456 1456  9.346594752676875e-06
+2983 1456 -9258.546324954847
+3803 1456  0.733300648789027
+4469 1456  28.84769630748797
+1457 1457  0.003439387923659759
+2977 1457 -45.6431060305684
+3807 1457  5.623959759241418
+4471 1457  0.9316726753588556
+1458 1458  0.002344163243169693
+2979 1458 -95.82820923136651
+3811 1458  5.156990654240387
+4473 1458  0.7823178849555039
+1459 1459  0.0002314920577447962
+3033 1459 -894.1445427962663
+3815 1459  2.02177708473591
+4475 1459 -2.52048241012962
+1460 1460  0.002213111143858121
+2987 1460 -294.7077344824907
+3819 1460  4.338050735598012
+4477 1460 -0.350498698842088
+1461 1461  0.002289548031987322
+2987 1461 -297.7314148715081
+3823 1461  3.336518577462315
+4479 1461 -0.4576521731453182
+1462 1462  0.00275597918269146
+3157 1462 -345.0634879409996
+3827 1462  3.432096244817039
+4481 1462 -0.3132143614225264
+1463 1463  0.001286102412847703
+2989 1463 -1319.761941130769
+3831 1463  5.167687511911667
+4483 1463  0.101747472019995
+1464 1464  0.001488586959398302
+3145 1464 -253.7591971467399
+3835 1464  4.397548178584936
+4485 1464  0.6529986254582271
+1465 1465  0.001449288496860595
+2993 1465 -1333.079786610828
+3839 1465  2.807197757919511
+4487 1465 -0.1988605404170427
+1466 1466  0.03847791458198641
+3099 1466 -14.58657917675677
+3843 1466  6.821596506872711
+4489 1466  0.267724443820664
+1467 1467  0.001331536489217977
+2995 1467 -1277.459248023892
+3847 1467  4.082745801006982
+4491 1467 -0.1320198647588829
+1468 1468  0.001389733691811989
+2999 1468 -113.8659171919261
+3851 1468  12.16370306936955
+4493 1468 -0.4884348221780255
+1469 1469  9.894340445329639e-05
+3001 1469 -3403.375662059264
+3855 1469  0.6922710528461018
+4495 1469  6.393149409211086
+1470 1470  1.482042185147961e-05
+3003 1470 -9244.38449451425
+3859 1470  0.9679618404674936
+4497 1470  13.61387792723724
+1471 1471  0.01230395744748231
+3029 1471 -129.5729114917453
+3863 1471  4.640123233526718
+4499 1471  0.1174995202916021
+1472 1472  0.004115637657561823
+2999 1472 -125.6929813506389
+3867 1472  4.083595669790726
+4501 1472  0.4574647052145012
+1473 1473  0.001865256592979132
+3007 1473 -103.5178172597545
+3871 1473  13.64297835723395
+4503 1473 -0.3400741687238894
+1474 1474  0.00837310147958267
+3039 1474 -27.2211092658337
+3875 1474  7.432083284538689
+4505 1474  0.4358076176675587
+1475 1475  0.0116383104408643
+3007 1475 -193.3276841043456
+3879 1475  9.243250804285458
+4507 1475  0.04586169342645511
+1476 1476  0.003273945598422162
+3007 1476 -102.3339494307426
+3883 1476  6.987236455151689
+4509 1476 -0.4060288904731509
+1477 1477  0.001104639692794528
+3035 1477 -1333.404649458178
+3887 1477  2.39813417884677
+4511 1477 -0.32265098291189
+1478 1478  0.001753935752301071
+3009 1478 -257.0424342781794
+3891 1478  4.872894046756631
+4513 1478 -0.4274289888767369
+1479 1479  0.003463445087067367
+3009 1479 -364.6469827985827
+3895 1479  3.666098639291213
+4515 1479  0.2139280209580445
+1480 1480  0.001273550519844935
+3017 1480 -225.3379392805973
+3899 1480  6.936513618148165
+4517 1480 -0.45896252925564
+1481 1481  0.001470739163878755
+3015 1481 -131.7144595143791
+3903 1481  6.717285078805506
+4519 1481  0.6939032742158369
+1482 1482  0.003741439421903615
+3147 1482 -138.2283842007703
+3907 1482  1.562111955011237
+4521 1482  1.045179501603212
+1483 1483  0.002654955896847113
+3021 1483 -70.31286063739829
+3911 1483  10.83128966436301
+4523 1483 -0.4690349307253373
+1484 1484  0.0008757358038397385
+3023 1484 -1887.480809470994
+3915 1484  2.298164777974027
+4525 1484  0.38440963665635
+1485 1485  0.0009202119884836104
+3025 1485 -1862.905571445791
+3919 1485  2.489086508511099
+4527 1485  0.3504553942057013
+1486 1486  0.0013857649001816
+3027 1486 -2089.757450012758
+3923 1486  1.530982244361096
+4529 1486  0.2927751267701255
+1487 1487  0.008688294155203842
+3019 1487 -34.60754748070792
+3927 1487  2.965391170512979
+4531 1487 -0.9136535932070854
+1488 1488  0.006476974321480879
+3085 1488 -657.2647720227772
+3931 1488  1.739287210125133
+4533 1488  0.1256357268577556
+1489 1489  0.00558084251145403
+3021 1489 -58.90809499914865
+3935 1489  5.818503346690632
+4535 1489  0.484327973434509
+1490 1490  0.0005323635117978055
+3031 1490 -1148.685319889543
+3939 1490  4.145530174201174
+4537 1490 -0.4158218221210877
+1491 1491  0.002410205635913214
+3029 1491 -55.77574949647519
+3943 1491  12.25543807287879
+4539 1491  0.4759321005851245
+1492 1492  0.0003995351735689209
+3033 1492 -1177.236190688291
+3947 1492  4.577395664557206
+4541 1492  0.5137320980598533
+1493 1493  0.003783807165999799
+3041 1493 -81.51090935305866
+3951 1493  7.350593198915067
+4543 1493 -0.3892150799012169
+1494 1494  0.0005240782131166497
+3043 1494 -3724.14417902935
+3955 1494  4.4348660978868
+4545 1494  0.1863477812003617
+1495 1495  0.0006723559311602305
+3045 1495 -4064.969791447917
+3959 1495  3.248940655118019
+4547 1495  0.1745842511613437
+1496 1496  0.00171834700633741
+3085 1496 -337.815695541272
+3963 1496  2.093336656375155
+4549 1496 -0.7186221160452759
+1497 1497  0.004818312918029584
+3041 1497 -51.28046095625373
+3967 1497  6.867530241475136
+4551 1497  0.5203531411457867
+1498 1498  0.007098702112012079
+3041 1498 -62.49294649216096
+3971 1498  10.14178945374154
+4553 1498 -0.1913856351165074
+1499 1499  0.0006074379931324419
+3049 1499 -1119.511295530479
+3975 1499  4.591717254542565
+4555 1499  0.3388396466565379
+1500 1500  0.001049231869003212
+3049 1500 -1470.147478176272
+3979 1500  1.989738181911157
+4557 1500 -0.355429594472465
+1501 1501  0.00336915014670341
+3055 1501 -94.94610178907207
+3983 1501  7.935781265781992
+4559 1501  0.3997908532391482
+1502 1502  0.005379152472555147
+3055 1502 -120.1015515613789
+3987 1502  4.32511889642304
+4561 1502 -0.3428911238355603
+1503 1503  0.001111460898759239
+3097 1503 -1453.267111123336
+3991 1503  2.16132391751468
+4563 1503 -0.3305622720110889
+1504 1504  0.0009756797068563019
+3063 1504 -1281.197517233999
+3995 1504  3.374113559666903
+4565 1504 -0.2436393619121192
+1505 1505  0.001187870243836471
+3143 1505 -1443.098497561728
+3999 1505  2.744133762682808
+4567 1505  0.2453331394023213
+1506 1506  1.434866011484066e-05
+3067 1506 -6333.352783592504
+4003 1506  0.6620369411300263
+4569 1506  29.39451847005743
+1507 1507  0.0014631639637669
+3065 1507 -319.687894463376
+4007 1507  5.561366858668526
+4571 1507  0.3918218595384317
+1508 1508  0.002607346686330265
+3073 1508 -156.9536240366783
+4011 1508  4.367121803999058
+4573 1508 -0.5208327148502887
+1509 1509  0.001436946898833183
+3075 1509 -1386.841317910654
+4015 1509  1.501443481224012
+4575 1509  0.4093034712580556
+1510 1510  0.001347876650566752
+3077 1510 -1652.528923853896
+4019 1510  2.046183757751627
+4577 1510  0.3139432303888887
+1511 1511  0.001490401786239473
+3079 1511 -1759.776539672839
+4023 1511  1.734383100866361
+4579 1511  0.3128830755303908
+1512 1512  0.001655944388231386
+3081 1512 -1762.230597412185
+4027 1512  1.590373655740153
+4581 1512  0.2781023775271403
+1513 1513  0.001701471392212871
+3083 1513 -1778.766172586407
+4031 1513  1.539399172915233
+4583 1513  0.2761967663045794
+1514 1514  0.0009387715985215069
+3143 1514 -1280.586129651733
+4035 1514  3.113918499513999
+4585 1514 -0.2870757766334358
+1515 1515  0.00228033139040549
+3161 1515 -284.5560271729748
+4039 1515  2.227051982652208
+4587 1515 -0.5751371098165369
+1516 1516  0.0004031283152377141
+3087 1516 -1214.20785479098
+4043 1516  5.497686376367542
+4589 1516 -0.3164743577683208
+1517 1517  0.02510763823362299
+3095 1517 -35275.65056794864
+4047 1517  5.920323678467011
+4591 1517  0.0002603335005947787
+1518 1518  0.004787271394550057
+3093 1518 -281.3424527503379
+4051 1518  1.872155078461913
+4593 1518 -0.3821410197975971
+1519 1519  0.02585059027313626
+3101 1519 -15081.1485121053
+4055 1519  5.155957958850873
+4595 1519  0.0005345093331185485
+1520 1520  0.02398509796041077
+3103 1520 -16322.74803150922
+4059 1520  7.218811248728353
+4597 1520  0.0004459490940684492
+1521 1521  0.02317810654772064
+3105 1521 -11661.81866319783
+4063 1521  7.075882839979499
+4599 1521  0.0006404324352507512
+1522 1522  0.02612393526617514
+3107 1522 -12071.07001909268
+4067 1522  5.549718155884937
+4601 1522  0.0006385206646629043
+1523 1523  0.01974036093858521
+3109 1523 -10603.54130606578
+4071 1523  8.987131743426813
+4603 1523  0.000630922008963406
+1524 1524  0.02351370604953601
+3111 1524 -11318.43109210416
+4075 1524  7.073656321403489
+4605 1524  0.0006501703848056444
+1525 1525  0.02263930530886814
+3113 1525 -10195.65103652299
+4079 1525  7.497907634820762
+4607 1525  0.0006559175699886972
+1526 1526  0.0215005867928751
+3099 1526 -10.68753553673826
+4083 1526  5.491936501011876
+4609 1526 -0.8800339580088794
+1527 1527  0.001126683613627308
+3115 1527 -1395.812439078742
+4087 1527  2.763887709407143
+4611 1527  0.2411558997368115
+1528 1528  0.006891497100827161
+3163 1528 -48.07226215433916
+4091 1528  6.465623332056095
+4613 1528  0.4387937415067642
+1529 1529  0.004306155684158316
+3133 1529 -128.0685014752367
+4095 1529  9.920476591021989
+4615 1529 -0.1666873148413968
+1530 1530  0.001278988670577771
+3171 1530 -271.5408599549471
+4099 1530  8.239370225433877
+4617 1530  0.3436678702512194
+1531 1531  0.0005904289455391131
+3119 1531 -1116.146860423355
+4103 1531  3.150909841954586
+4619 1531 -0.5518900249492503
+1532 1532  0.0009194716848954582
+3119 1532 -1395.069189494207
+4107 1532  3.217137788747237
+4621 1532  0.2515624113404345
+1533 1533  0.0009052326830714288
+3121 1533 -239.1182110177501
+4111 1533  5.084689951907726
+4623 1533 -0.8469720785595276
+1534 1534  0.000992266253014921
+3131 1534 -237.9626783531122
+4115 1534  3.945873747631456
+4625 1534 -0.9858356029493499
+1535 1535  0.004024252672441474
+3133 1535 -123.7987667072477
+4119 1535  13.82148848700469
+4627 1535  0.1280409753821415
+1536 1536  0.001630688272806079
+3133 1536 -78.70049630917389
+4123 1536  4.299742862778459
+4629 1536 -1.835179127668628
+1537 1537  0.000767485316979087
+3137 1537 -2751.002445075206
+4127 1537  1.738985765064566
+4631 1537  0.3864229560206465
+1538 1538  0.0009152151847254224
+3139 1538 -2761.360034218368
+4131 1538  1.298069093586603
+4633 1538  0.3750998189126993
+1539 1539  0.005926040074521586
+3135 1539 -77.09008222118388
+4135 1539  3.27848682728167
+4635 1539 -0.516338327826575
+1540 1540  0.001368092087469303
+3149 1540 -131.7196097356496
+4139 1540  8.334156253956104
+4637 1540 -0.5658283514265692
+1541 1541  0.0001242076078107286
+3151 1541 -45335.1954409973
+4143 1541  0.7844661652700029
+4639 1541  0.3695300341581737
+1542 1542  0.03009886865068647
+3165 1542 -21344.46684810789
+4147 1542  5.664236473525226
+4641 1542  0.0002870401850520609
+1543 1543  0.0181917050854742
+3167 1543 -18576.65367398521
+4151 1543  9.38137758123437
+4643 1543  0.0003989272310504771
+1544 1544  0.0016843275657298
+2778 1544 -18425.43685640642
+3177 1544  0.4209469588586773
+4155 1544 -0.09268507579991085
+1545 1545  0.003728230321768011
+2776 1545 -94.70154811534771
+3181 1545  11.80994163900174
+4157 1545  0.2348156967063924
+1546 1546  0.009840549643778531
+3022 1546 -60.9315937244375
+3185 1546  6.13394880971154
+4159 1546  0.2528208987834961
+1547 1547  0.01601930503333173
+3160 1547 -38.42657784269577
+3189 1547  7.015232649381273
+4161 1547 -0.2206266516513799
+1548 1548  0.002065324101967616
+2782 1548 -19263.39666857488
+3193 1548  0.4683919690682322
+4163 1548 -0.06752189118262934
+1549 1549  0.001795814074916934
+2888 1549 -1351.851190781317
+3197 1549  6.264117240664985
+4165 1549 -0.0730286811201015
+1550 1550  0.005292303306486457
+2780 1550 -343.1905811392148
+3201 1550  5.964960897183323
+4167 1550 -0.1030731399156117
+1551 1551  0.001558801796769806
+2786 1551 -17954.06452194431
+3205 1551  0.4321994354326509
+4169 1551 -0.09183437174715321
+1552 1552  0.009429398379399266
+2870 1552 -71.60785037080608
+3209 1552  7.857253594901273
+4171 1552  0.1604723692757136
+1553 1553  0.003074581210011338
+2784 1553 -287.1737937003262
+3213 1553  8.881186521865489
+4173 1553  0.1140677401031672
+1554 1554  0.001734639619015351
+2790 1554 -18984.56010772376
+3217 1554  0.3202584181695263
+4175 1554 -0.1253361300551693
+1555 1555  0.007663393581365427
+2788 1555 -123.8024931489951
+3221 1555  5.724533092062521
+4177 1555 -0.1765502579767677
+1556 1556  0.009368261214996339
+2788 1556 -136.3741855526524
+3225 1556  4.994979193430934
+4179 1556 -0.154677890064579
+1557 1557  0.007699130189734118
+2788 1557 -124.0620405337453
+3229 1557  7.594103734758398
+4181 1557  0.1487428464011694
+1558 1558  0.001451216791368765
+2794 1558 -17562.01775281531
+3233 1558  0.3591198466383587
+4183 1558 -0.1418061655056151
+1559 1559  0.00920708556558796
+2792 1559 -52.95894194050124
+3237 1559  8.661644082850422
+4185 1559 -0.2388406309706918
+1560 1560  0.004834243485814674
+3036 1560 -1970.719501164815
+3241 1560  3.613689390608025
+4187 1560 -0.03178578024713478
+1561 1561  0.00804661042634007
+2792 1561 -50.07992223279546
+3245 1561  8.659934351403086
+4189 1561  0.256909813210024
+1562 1562  0.01708826510242087
+2792 1562 -72.94981475825553
+3249 1562  7.356150780840059
+4191 1562 -0.1046018218296087
+1563 1563  0.003093469244700799
+2798 1563 -21348.38058440163
+3253 1563  0.3942430026751412
+4193 1563 -0.04988201584301125
+1564 1564  0.001925016235561819
+2800 1564 -18855.823309453
+3257 1564  0.4518359008190032
+4195 1564 -0.07276217302329462
+1565 1565  0.008371082559818523
+2796 1565 -52.52014968113491
+3261 1565  11.81516875226901
+4197 1565  0.1763884106301378
+1566 1566  0.01023391253227582
+2796 1566 -58.44422004546879
+3265 1566  13.00730635191752
+4199 1566 -0.1329182782584699
+1567 1567  0.006579407115653415
+3090 1567 -115.1682425790467
+3269 1567  7.134515633503723
+4201 1567  0.1694463566618233
+1568 1568  0.003421623294652123
+2804 1568 -403.3712790108879
+3273 1568  8.632966165362591
+4203 1568 -0.08253448028257152
+1569 1569  0.00326881651484232
+2802 1569 -407.8543128053418
+3277 1569  7.480402966450393
+4205 1569  0.09175946688769955
+1570 1570  0.002979644794530615
+2802 1570 -403.2701195876705
+3281 1570  6.053226700604847
+4207 1570 -0.1391230222463695
+1571 1571  0.001612416603821292
+2806 1571 -18200.17011809114
+3285 1571  0.3086859033408764
+4209 1571 -0.1433588781709148
+1572 1572  0.002021535306013392
+2810 1572 -19050.40909074716
+3289 1572  0.6377435166524491
+4211 1572 -0.05626831364793678
+1573 1573  0.002219061379516586
+2812 1573 -19602.52751313596
+3293 1573  0.5340480266846045
+4213 1573 -0.05817082924203715
+1574 1574  0.002213244026450304
+2814 1574 -19790.86619941803
+3297 1574  0.496336357506843
+4215 1574 -0.06367574453596507
+1575 1575  0.001424435274536764
+2816 1575 -17520.9066424914
+3301 1575  0.6396465178955674
+4217 1575 -0.0819141354576636
+1576 1576  0.00190641019701785
+2992 1576 -1314.488490707493
+3305 1576  9.289251639392099
+4219 1576 -0.04831816621937788
+1577 1577  0.01024866648551087
+2808 1577 -30.88445273847646
+3309 1577  19.59521280325052
+4221 1577 -0.1760787931148337
+1578 1578  0.00216159793600779
+2820 1578 -19459.2097238253
+3313 1578  0.448509772635577
+4223 1578 -0.06510757825207561
+1579 1579  0.002128580341413205
+2822 1579 -19563.40178710883
+3317 1579  0.4020378235142212
+4225 1579 -0.07348349316451037
+1580 1580  0.01517969877264569
+2818 1580 -153.3239932480942
+3321 1580  11.49160779515473
+4227 1580  0.03944668306923331
+1581 1581  0.006073374848498163
+3090 1581 -110.9830147806093
+3325 1581  10.94559077853698
+4229 1581  0.1182819046044615
+1582 1582  0.002072440732307426
+2826 1582 -19072.9824263394
+3329 1582  0.4664105082637809
+4231 1582 -0.06404854821625307
+1583 1583  0.005038100808173289
+2824 1583 -327.8415125728117
+3333 1583  7.204897543934027
+4233 1583 -0.0885781928998835
+1584 1584  0.005558785326437868
+3172 1584 -303.8666561211577
+3337 1584  7.745248535399567
+4235 1584 -0.07822635220456177
+1585 1585  0.002818239303591395
+2830 1585 -21438.05305311238
+3341 1585  0.2859810800059127
+4237 1585 -0.07680746283325847
+1586 1586  0.01299345028768497
+2828 1586 -131.6376658199634
+3345 1586  12.35150271313844
+4239 1586  0.05216856069111805
+1587 1587  0.007694156187287882
+2828 1587 -101.2983170663943
+3349 1587  10.32704712479219
+4241 1587 -0.1284297479768136
+1588 1588  0.00171426589027294
+2834 1588 -17706.80708692556
+3353 1588  0.2638545151755176
+4243 1588 -0.1585763878680793
+1589 1589  0.009795864941168062
+3022 1589 -60.74879639706707
+3357 1589  6.747469381550355
+4245 1589  0.2278264743594363
+1590 1590  0.004016334441257254
+2832 1590 -340.9877642962214
+3361 1590  6.103161381610501
+4247 1590 -0.1220911632709661
+1591 1591  0.00214370521255672
+2838 1591 -19229.80704563983
+3365 1591  0.5856260944274582
+4249 1591 -0.05432974549764544
+1592 1592  0.001347021732686253
+2840 1592 -17250.21196149158
+3369 1592  0.602732034666853
+4251 1592 -0.08909672224847444
+1593 1593  0.003605337840406464
+3128 1593 -307.2139916977106
+3373 1593  9.231892983713553
+4253 1593  0.09662035202600507
+1594 1594  0.006022698821091823
+3158 1594 -349.4178227246429
+3377 1594  11.53750339202983
+4255 1594 -0.03874066375731782
+1595 1595  0.001537593431467799
+2844 1595 -18148.71862379513
+3381 1595  0.3823095244806657
+4257 1595 -0.1157986402063187
+1596 1596  0.005045580372450799
+2842 1596 -350.8749049521504
+3385 1596  3.759419384143553
+4259 1596 -0.1550385259319598
+1597 1597  0.004936492740348092
+2842 1597 -349.525972853892
+3389 1597  5.568673568414256
+4261 1597  0.1097305804557451
+1598 1598  0.001394539743204105
+2848 1598 -17188.33141996008
+3393 1598  0.652651776418919
+4263 1598 -0.07741322426033026
+1599 1599  0.001287567811453098
+2850 1599 -17221.86948891564
+3397 1599  0.4585964515198062
+4265 1599 -0.1286239024388166
+1600 1600  0.003818472653616001
+2890 1600 -307.265267842773
+3401 1600  11.40560520752418
+4267 1600  0.07282084580241761
+1601 1601  0.002035811224188586
+2940 1601 -256.0588790788852
+3405 1601  10.32999844507644
+4269 1601  0.1699263311417943
+1602 1602  0.002336372415399436
+2854 1602 -20083.61864669313
+3409 1602  0.3782636212847815
+4271 1602 -0.07172764858434916
+1603 1603  0.00245215307332014
+2852 1603 -262.4471277152124
+3413 1603  5.593819250509369
+4273 1603  0.3050576835026495
+1604 1604  0.003331066019807603
+3092 1604 -315.137283850678
+3417 1604  4.846962465184034
+4275 1604  0.2068334057548165
+1605 1605  0.004746941745649515
+2856 1605 -298.0864078722183
+3421 1605  4.014843773669729
+4277 1605  0.1495820406715405
+1606 1606  0.004352281673098233
+2856 1606 -285.8909834401574
+3425 1606  2.100726724748749
+4279 1606 -0.3612348953316888
+1607 1607  0.008862541129188691
+2856 1607 -408.7832335639023
+3429 1607  7.554236674680917
+4281 1607  0.03126449269119468
+1608 1608  0.001681916425607402
+2862 1608 -1375.862662839072
+3433 1608  4.625084390822101
+4283 1608 -0.09511283394769576
+1609 1609  0.01197021890676733
+3136 1609 -80.2193526426998
+3437 1609  5.058522467030047
+4285 1609  0.1765863442776434
+1610 1610  0.001821444699037764
+2860 1610 -1439.569994942649
+3441 1610  4.189017705158125
+4287 1610  0.09444763078437719
+1611 1611  0.001909042723318199
+2860 1611 -1474.926377146568
+3445 1611  3.912969993275015
+4289 1611 -0.1063494845711242
+1612 1612  0.001712405942532441
+2862 1612 -1388.793683741341
+3449 1612  5.140739895609729
+4291 1612  0.08633500225011058
+1613 1613  0.004489168449797369
+2866 1613 -252.2885237459685
+3453 1613  7.82585977033871
+4293 1613 -0.1054013687030942
+1614 1614  0.01633539313650934
+2884 1614 -50.47971099857919
+3457 1614  7.95121257275057
+4295 1614 -0.1371328219183342
+1615 1615  0.01260242986980742
+2978 1615 -49.91127100320831
+3461 1615  8.593585019339528
+4297 1615 -0.1696769505008175
+1616 1616  0.006681965695475232
+2864 1616 -157.131131008579
+3465 1616  5.351680386731817
+4299 1616 -0.1678465865963971
+1617 1617  0.002082161476505394
+2896 1617 -1402.181171260665
+3469 1617  3.775614735741489
+4301 1617 -0.1021595851058913
+1618 1618  0.004122707179401047
+3018 1618 -335.793574863611
+3473 1618  5.083916573183807
+4303 1618  0.1498140723675607
+1619 1619  0.00239611479019984
+2868 1619 -1483.228901833766
+3477 1619  2.675607339345396
+4305 1619  0.09966399264609031
+1620 1620  0.002630837634752338
+2868 1620 -1553.767313963687
+3481 1620  2.343864161786165
+4307 1620 -0.1021338181094035
+1621 1621  0.0004298383031319623
+2872 1621 -2032.294944522458
+3485 1621  1.543087745431399
+4309 1621  1.027405058684619
+1622 1622  0.0004721276620525438
+2874 1622 -2161.517370916221
+3489 1622  1.294093298886285
+4311 1622  1.035430338244845
+1623 1623  0.0005404729617105076
+2876 1623 -1965.171276998922
+3493 1623  1.245820405796252
+4313 1623  1.132573885359342
+1624 1624  0.0005385186595112105
+2878 1624 -2011.325553409659
+3497 1624  1.130461556720701
+4315 1624  1.096989498294941
+1625 1625  0.000738108085585141
+2880 1625 -947.8080133550635
+3501 1625  1.545695419693732
+4317 1625  1.576501168390627
+1626 1626  0.01181045238790955
+2870 1626 -80.2043307459878
+3505 1626  6.784715588521387
+4319 1626  0.1398090615522035
+1627 1627  0.006341097449414356
+2882 1627 -358.1256606139722
+3509 1627  2.87720735177041
+4321 1627 -0.1463628716658473
+1628 1628  0.001709881200915327
+2886 1628 -946.4993922263754
+3513 1628  10.64580025395756
+4323 1628 -0.05970599140078857
+1629 1629  0.002501831916591455
+2936 1629 -410.9259659918831
+3517 1629  6.323546734518151
+4325 1629  0.152339017646971
+1630 1630  0.02166473185771623
+2884 1630 -59.09622972922144
+3521 1630  5.851730685056556
+4327 1630  0.120393801145661
+1631 1631  0.01171187118567339
+2884 1631 -43.69121511045876
+3525 1631  11.33592184861562
+4329 1631  0.1544468567080951
+1632 1632  0.01268609052228339
+2884 1632 -45.26251731254106
+3529 1632  11.50083919055273
+4331 1632  0.1258547872274984
+1633 1633  0.00438402189092121
+2886 1633 -2094.821178932827
+3533 1633  3.367757951666376
+4333 1633  0.04000224600538602
+1634 1634  0.00250281237797008
+2888 1634 -1596.562954523447
+3537 1634  4.887036067400858
+4335 1634  0.06114217742781798
+1635 1635  0.001891043522858185
+2964 1635 -1420.43303749647
+3541 1635  3.977636161674676
+4337 1635  0.09996941509051212
+1636 1636  0.003667040914980837
+3128 1636 -310.2635904187793
+3545 1636  7.602458701266929
+4339 1636 -0.1322602591418904
+1637 1637  0.002717922628890469
+2892 1637 -1555.582081433006
+3549 1637  5.798807440562412
+4341 1637  0.04478328549069006
+1638 1638  0.002499509756808013
+2892 1638 -1490.044806601116
+3553 1638  4.15614996725906
+4343 1638 -0.07136785085690645
+1639 1639  0.001847764096351857
+2894 1639 -1428.509188469133
+3557 1639  4.081677144998319
+4345 1639  0.1007116905315015
+1640 1640  0.01168187326594283
+3042 1640 -66.88919874628864
+3561 1640  6.902063277362459
+4347 1640  0.1692123750855458
+1641 1641  0.002801715936584132
+2896 1641 -1635.880446215101
+3565 1641  4.35180763337338
+4349 1641 -0.05438327802565162
+1642 1642  0.001563851446999221
+3092 1642 -216.0334906512935
+3569 1642  4.640556920095166
+4351 1642 -0.6125464046789606
+1643 1643  0.003038343659137196
+3094 1643 -330.3011979043866
+3573 1643  4.196712062065106
+4353 1643  0.1882841243361535
+1644 1644  0.001455820779834379
+2900 1644 -1582.59139337238
+3577 1644  2.551059225694364
+4355 1644 -0.1795099761298055
+1645 1645  0.0004996836381066346
+2900 1645 -927.7605629713167
+3581 1645  3.626585505548464
+4357 1645  0.5692601762921932
+1646 1646  0.03290590405224753
+2904 1646 -56173.00367620957
+3585 1646  19.13304546764732
+4359 1646 -4.528166521929149e-05
+1647 1647  0.006605582704795889
+2908 1647 -1798.962366958954
+3589 1647  0.7415300425733925
+4361 1647  0.1584805113003082
+1648 1648  0.002293010736883277
+2910 1648 -1602.154579037757
+3593 1648  2.537886768231079
+4363 1648  0.1789331829732354
+1649 1649  0.003609759091074866
+2912 1649 -1805.387244919989
+3597 1649  1.052700930966815
+4365 1649  0.2318901716680636
+1650 1650  0.0027895765195812
+2914 1650 -1672.560616497262
+3601 1650  1.886875011526777
+4367 1650  0.18507291152769
+1651 1651  0.003469423590966108
+2916 1651 -1736.681418547389
+3605 1651  1.004492569171269
+4369 1651  0.2559294137774343
+1652 1652  0.00353096399715718
+2918 1652 -1744.38334675991
+3609 1652  0.9675344147546086
+4371 1652  0.2553116682026549
+1653 1653  0.002871912243801552
+2920 1653 -1578.337187890964
+3613 1653  1.733088886821635
+4373 1653  0.1850395098356168
+1654 1654  0.02950709946407396
+2906 1654 -14.86291565563162
+3617 1654  4.909697213749066
+4375 1654  0.390540098759081
+1655 1655  0.003062114712856479
+2922 1655 -1618.963465864502
+3621 1655  5.489980636102064
+4377 1655  0.03342586460999813
+1656 1656  0.000815905245810513
+2926 1656 -750.1977888434145
+3625 1656  0.8107453118787733
+4379 1656  3.224514182063999
+1657 1657  0.00148698317973046
+2928 1657 -454.349588508947
+3629 1657  0.7394238804556282
+4381 1657  3.182000960903725
+1658 1658  0.0005479008995914197
+2930 1658 -937.8974042315033
+3633 1658  0.5836917319355491
+4383 1658  5.515960845011515
+1659 1659  0.0005016515610365206
+2932 1659 -889.0967633151976
+3637 1659  0.7675634623859809
+4385 1659  4.920359358439487
+1660 1660  0.001943253195808534
+2990 1660 -1547.059702964963
+3641 1660  2.338015588056324
+4387 1660 -0.1327293340165539
+1661 1661  0.0217854046428508
+2924 1661 -135.998061396762
+3645 1661  6.066391449410494
+4389 1661  0.04435814071805195
+1662 1662  0.001220074423809261
+2934 1662 -1255.707173660711
+3649 1662  6.930754193190324
+4391 1662 -0.09875202978820967
+1663 1663  0.001670158315822575
+2934 1663 -1468.90019029828
+3653 1663  5.126383130506217
+4393 1663  0.09430040141542814
+1664 1664  0.001026048097861809
+2938 1664 -1293.847311903297
+3657 1664  4.929097726466118
+4395 1664  0.1553884953900216
+1665 1665  0.003093765508925175
+2936 1665 -459.6038371669388
+3661 1665  6.562151811292857
+4397 1665  0.1129564356289893
+1666 1666  0.001873985837347229
+2938 1666 -1545.450519903304
+3665 1666  1.990410506883116
+4399 1666 -0.1802199613520399
+1667 1667  0.004321862642805954
+2940 1667 -367.6331396885809
+3669 1667  4.866384250937969
+4401 1667  0.1349748586012675
+1668 1668  0.002798788342743851
+2940 1668 -298.4479790314356
+3673 1668  5.688689200828974
+4403 1668 -0.224373509793738
+1669 1669  0.002086401274569133
+2942 1669 -1495.777176495769
+3677 1669  4.405147664394409
+4405 1669  0.07787860580799441
+1670 1670  0.001665432063801794
+2944 1670 -1533.793391362161
+3681 1670  3.834698064428319
+4407 1670  0.1290791462085817
+1671 1671  0.01361131500452684
+2946 1671 -153.1942449175083
+3685 1671  9.122041227009294
+4409 1671 -0.04844675456049608
+1672 1672  0.00172948834346845
+3058 1672 -1452.412581790726
+3689 1672  2.509630416919152
+4411 1672  0.1745420007093612
+1673 1673  0.006559135675281503
+2946 1673 -106.302999456909
+3693 1673  8.245690219381995
+4413 1673 -0.1656201138879724
+1674 1674  0.004500768246033835
+2950 1674 -133.2986622760959
+3697 1674  9.351725708391308
+4415 1674 -0.1505442317369579
+1675 1675  0.02663845477624546
+3072 1675 -48.83113539159432
+3701 1675  3.241458825572762
+4417 1675  0.1922551101717189
+1676 1676  0.009211625796665643
+2950 1676 -121.2535428786183
+3705 1676  4.813024972990116
+4419 1676 -0.1599443116995181
+1677 1677  0.002286487780322626
+3126 1677 -1567.10699891207
+3709 1677  2.274731948814601
+4421 1677 -0.1283198998011498
+1678 1678  0.00992004440023208
+2950 1678 -126.2373785289777
+3713 1678  5.000900261527375
+4423 1678 -0.1292478872971707
+1679 1679  0.0003644921698466457
+2954 1679 -457.8930633249275
+3717 1679  1.564794215233169
+4425 1679  5.802495030097443
+1680 1680  0.0001662894242141198
+2956 1680 -2119.625191287763
+3721 1680  0.9886929177097209
+4427 1680  4.348249404183824
+1681 1681  0.0001583237854652538
+2958 1681 -2232.468832444119
+3725 1681  0.8832624199164085
+4429 1681  4.722524623428282
+1682 1682  0.007684351982983249
+2952 1682 -177.6359650918144
+3729 1682  5.491941738533162
+4431 1682  0.1268899830601398
+1683 1683  0.007612917736247711
+3150 1683 -108.0991094828331
+3733 1683  5.548711987030634
+4433 1683  0.1907246660453396
+1684 1684  0.001386532732486353
+2962 1684 -1488.996464674805
+3737 1684  2.380705119026724
+4435 1684  0.3584223063522697
+1685 1685  0.004602035072414893
+2960 1685 -119.0962751518446
+3741 1685  9.011775441080573
+4437 1685  0.1990042899232015
+1686 1686  0.004695388864328182
+3018 1686 -356.9125375460848
+3745 1686  6.32479519279416
+4439 1686  0.09363089415499334
+1687 1687  0.004028977172689097
+2966 1687 -303.9341121893127
+3749 1687  7.139596631140285
+4441 1687  0.1130336965210424
+1688 1688  0.003902058440420307
+2966 1688 -301.6071311890342
+3753 1688  6.252292571035978
+4443 1688 -0.1225474573200583
+1689 1689  0.002196344218812255
+2968 1689 -1507.145374398769
+3757 1689  4.240340181312149
+4445 1689  0.07753429754535195
+1690 1690  0.001263791063289122
+2972 1690 -1443.035879728521
+3761 1690  3.947328445532761
+4447 1690 -0.1497184219294691
+1691 1691  0.001276910015239077
+2970 1691 -2094.452122116551
+3765 1691  3.144054024261908
+4449 1691 -0.1301712404853408
+1692 1692  0.001592466556747902
+2972 1692 -1438.401804291923
+3769 1692  3.243567001900533
+4451 1692  0.1537460316411932
+1693 1693  0.01402979964901917
+2974 1693 -68.68887955759087
+3773 1693  10.48923930011014
+4453 1693  0.08890596217980609
+1694 1694  0.01275460432334989
+2974 1694 -65.35317307855226
+3777 1694  7.717735001723169
+4455 1694 -0.1346845367963926
+1695 1695  0.007373498838497469
+2974 1695 -49.99682031538818
+3781 1695  13.44241674595665
+4457 1695 -0.1618338407629296
+1696 1696  0.002053808266667994
+2994 1696 -1526.042604103454
+3785 1696  2.085017424193
+4459 1696  0.1654175795499135
+1697 1697  0.05182020660590447
+2976 1697 -102.5526269566786
+3789 1697  11.29589550667638
+4461 1697 -0.01614866339436597
+1698 1698  0.01305142121535185
+2976 1698 -52.11952350302194
+3793 1698  6.796790398187187
+4463 1698 -0.200025621410963
+1699 1699  0.006862647688623929
+2980 1699 -106.6011780216583
+3797 1699  14.67920176970155
+4465 1699 -0.08653414206433409
+1700 1700  0.04089952789786202
+2982 1700 -43002.51911591569
+3801 1700  16.92007433822707
+4467 1700 -4.690062449741391e-05
+1701 1701  0.03197455500486216
+2984 1701 -64484.86655730321
+3805 1701  20.4618479887596
+4469 1701 -3.429273113850615e-05
+1702 1702  0.01461555143963207
+2978 1702 -51.84652767986045
+3809 1702  10.57287611312878
+4471 1702  0.10597916780677
+1703 1703  0.0109523653049153
+2980 1703 -118.3393931588882
+3813 1703  9.881649534957022
+4473 1703  0.07211115026646071
+1704 1704  0.01271446421827436
+3034 1704 -2616.969987642703
+3817 1704  9.357580306279953
+4475 1704 -0.003326018368451452
+1705 1705  0.003818712751264618
+2988 1705 -297.709055530353
+3821 1705  5.268104839160443
+4477 1705 -0.1685104373219622
+1706 1706  0.004809088667099274
+2988 1706 -331.8369949896509
+3825 1706  4.345881026636466
+4479 1706 -0.1519770413677799
+1707 1707  0.005752066822765839
+3158 1707 -341.395838786864
+3829 1707  4.577246699593937
+4481 1707 -0.1163894569624065
+1708 1708  0.001036778478523589
+2990 1708 -1131.21152477239
+3833 1708  4.778575385576687
+4483 1708  0.1612480981858468
+1709 1709  0.006393311962360625
+3146 1709 -430.325567156862
+3837 1709  7.935790725976097
+4485 1709  0.04867805021410753
+1710 1710  0.001700127961389886
+2994 1710 -1387.470519821244
+3841 1710  2.977783914089383
+4487 1710 -0.1538546845104868
+1711 1711  0.01766548052892358
+3100 1711 -33.99171600412565
+3845 1711  4.924367513602002
+4489 1711  0.2935533450234599
+1712 1712  0.001195085651160139
+2996 1712 -1227.52274774633
+3849 1712  3.894053238117803
+4491 1712  0.1612523570575912
+1713 1713  0.00399620272929112
+3000 1713 -137.0837482046144
+3853 1713  10.93806979401289
+4493 1713 -0.1681399935275716
+1714 1714  0.04341619045972051
+3002 1714 -14912.84496887192
+3857 1714  7.512573460654014
+4495 1714 -0.000263700452219623
+1715 1715  0.0235612852282902
+3004 1715 -51511.86319008204
+3861 1715  21.23566212034473
+4497 1715 -5.586646078540747e-05
+1716 1716  0.007683218528146999
+3030 1716 -77.01686009671205
+3865 1716  3.891832175542285
+4499 1716  0.3998996708507609
+1717 1717  0.007333811374796757
+3000 1717 -119.1218935338928
+3869 1717  5.273747361955984
+4501 1717  0.2129755068839117
+1718 1718  0.003823529587678011
+3008 1718 -120.7045050005255
+3873 1718  8.193646066739516
+4503 1718 -0.2586943980063583
+1719 1719  0.01215346638180655
+3040 1719 -23.57121050397152
+3877 1719  8.909361307278845
+4505 1719  0.2954837404412515
+1720 1720  0.003019762922634524
+3008 1720 -80.20121388753263
+3881 1720  5.498628886812048
+4507 1720  0.7532194114725147
+1721 1721  0.006124281162912542
+3008 1721 -113.9874410285411
+3885 1721  8.557143439995434
+4509 1721  0.1587647828723089
+1722 1722  0.002155724633158465
+3036 1722 -1307.863076710002
+3889 1722  3.052321424376364
+4511 1722 -0.1350370344901572
+1723 1723  0.003704455381418496
+3010 1723 -296.6587145617269
+3893 1723  6.165958730224928
+4513 1723 -0.1403809995916198
+1724 1724  0.004996325035351742
+3010 1724 -347.8089509424088
+3897 1724  4.264776865800714
+4515 1724 -0.1358345190847692
+1725 1725  0.003024383794303084
+3018 1725 -253.3858185062365
+3901 1725  7.7760742573487
+4517 1725 -0.1590047570659309
+1726 1726  0.005056420924296964
+3016 1726 -145.4276641880034
+3905 1726  12.723623314396
+4519 1726  0.09862866268575531
+1727 1727  0.0128552824245083
+3148 1727 -166.2264057254376
+3909 1727  2.666152965933335
+4521 1727  0.1490326692808954
+1728 1728  0.006968895908726415
+3022 1728 -88.35690904464344
+3913 1728  5.690920734130215
+4523 1728 -0.3044710922257314
+1729 1729  0.001538440199813304
+3024 1729 -1808.976355162421
+3917 1729  4.213041098699232
+4525 1729  0.1464469844197936
+1730 1730  0.001058160006160973
+3026 1730 -1529.723973180399
+3921 1730  5.357329400717711
+4527 1730  0.1984683173208769
+1731 1731  0.001541280633195248
+3028 1731 -1878.631462550398
+3925 1731  1.891529659129155
+4529 1731  0.2847703740543556
+1732 1732  0.03740648547010496
+3020 1732 -56.09349534828303
+3929 1732  5.191284902739278
+4531 1732  0.0741326869470682
+1733 1733  0.006324039794552517
+3086 1733 -467.2993939817052
+3933 1733  1.735401045504999
+4533 1733 -0.1905076856767562
+1734 1734  0.008772231202107173
+3022 1734 -57.28389550104841
+3937 1734  7.227063466987659
+4535 1734  0.2554443072620345
+1735 1735  0.001118269735498948
+3032 1735 -1284.461857125257
+3941 1735  4.620373825612975
+4537 1735 -0.1627178019259907
+1736 1736  0.003882904905316165
+3030 1736 -53.24994360811269
+3945 1736  17.58751732208018
+4539 1736 -0.2175541085968585
+1737 1737  0.002170256613187626
+3034 1737 -1083.556762405448
+3949 1737  9.983701017863959
+4541 1737 -0.04862359530866486
+1738 1738  0.005798880790520519
+3042 1738 -84.06953056548426
+3953 1738  7.088529923190809
+4543 1738 -0.2649587548673778
+1739 1739  0.0005182838160652974
+3044 1739 -2254.257229613936
+3957 1739  8.333234328742472
+4545 1739  0.197228464133999
+1740 1740  0.000719966335902969
+3046 1740 -2504.604832410719
+3961 1740  4.516437728111675
+4547 1740  0.2340215743243752
+1741 1741  0.009856054741935179
+3086 1741 -582.1294820043491
+3965 1741  4.100933333302652
+4549 1741 -0.0370584339888418
+1742 1742  0.009218139257277373
+3042 1742 -59.09372182667386
+3969 1742  9.249242572554918
+4551 1742  0.1751900754402029
+1743 1743  0.005272309892624279
+3042 1743 -44.87012523624564
+3973 1743  8.900323357107847
+4553 1743  0.4235524066745655
+1744 1744  0.001296557446196982
+3050 1744 -1236.100717223117
+3977 1744  6.82470037511901
+4555 1744  0.09646788709832067
+1745 1745  0.002343908477136452
+3050 1745 -1660.641795945935
+3981 1745  2.651304124674491
+4557 1745 -0.1069501833931525
+1746 1746  0.005687775843262853
+3056 1746 -109.9588006974711
+3985 1746  9.964308531455309
+4559 1746  0.1625927915066934
+1747 1747  0.007909157348944779
+3056 1747 -129.8071459044353
+3989 1747  5.008038927801397
+4561 1747 -0.1874409581057452
+1748 1748  0.00230008344184941
+3098 1748 -1629.841174651477
+3993 1748  2.810591760096432
+4563 1748  0.1104993737469106
+1749 1749  0.001805784149122609
+3064 1749 -1380.279131732715
+3997 1749  4.254422143024387
+4565 1749 -0.09766669385553386
+1750 1750  0.002239602047160964
+3144 1750 -1565.415007126418
+4001 1750  3.559130344004886
+4567 1750  0.09294349640346077
+1751 1751  0.03880525118438068
+3068 1751 -43193.72331192907
+4005 1751  17.73869895099792
+4569 1751 -4.739757669780065e-05
+1752 1752  0.003469224482167516
+3066 1752 -323.4191825769936
+4009 1752  8.121377855621056
+4571 1752  0.1137497534022037
+1753 1753  0.007071926854626404
+3074 1753 -182.1552338439317
+4013 1753  4.6918587188925
+4573 1753 -0.1620484550495435
+1754 1754  0.002739570112367415
+3076 1754 -1021.427694239394
+4017 1754  2.952431724529157
+4575 1754  0.1779337399408347
+1755 1755  0.001508473070417131
+3078 1755 -1356.306668125053
+4021 1755  4.080245895776766
+4577 1755  0.1982328660095435
+1756 1756  0.002078431308567534
+3080 1756 -1550.501925752894
+4025 1756  2.833991262156498
+4579 1756  0.1844874946189381
+1757 1757  0.001945389714289203
+3082 1757 -1461.513220882238
+4029 1757  2.230216158081471
+4581 1757  0.244104057346213
+1758 1758  0.001993552565265008
+3084 1758 -1474.31463330485
+4033 1758  2.11378126554774
+4583 1758  0.2488990680550784
+1759 1759  0.001789692624572141
+3144 1759 -1396.852787898307
+4037 1759  3.896615576766267
+4585 1759 -0.1113819036710585
+1760 1760  0.00646096026953419
+3162 1760 -339.9363785417463
+4041 1760  3.27917553517219
+4587 1760 -0.1169152573076894
+1761 1761  0.001399661724098957
+3088 1761 -1356.93253093293
+4045 1761  6.138820693139372
+4589 1761 -0.07698570639890857
+1762 1762  0.000219590190439067
+3096 1762 -922.2752719137213
+4049 1762  0.9579993383387213
+4591 1762  11.14739714497946
+1763 1763  0.006188906365168377
+3094 1763 -471.7651348192199
+4053 1763  2.024927669840395
+4593 1763  0.1554320934452434
+1764 1764  0.0001434129825704319
+3102 1764 -2477.242565758394
+4057 1764  0.6800635049014432
+4595 1764  5.741407462044037
+1765 1765  0.0002439790980817506
+3104 1765 -695.4491731006095
+4061 1765  1.226331625859217
+4597 1765  9.163761430829846
+1766 1766  0.0001490905118305186
+3106 1766 -1877.352469934645
+4065 1766  0.9860199056421528
+4599 1766  5.767059286213642
+1767 1767  0.0001621542960211256
+3108 1767 -2058.913294513919
+4069 1767  0.7641384780188016
+4601 1767  5.650459500989021
+1768 1768  0.0001287645800605308
+3110 1768 -1669.969527065347
+4073 1768  1.257569328186931
+4603 1768  5.720222066080108
+1769 1769  0.0001512851197436847
+3112 1769 -1822.079812004944
+4077 1769  0.9855846822133956
+4605 1769  5.855434617171491
+1770 1770  0.0001456087124288582
+3114 1770 -1642.48231480523
+4081 1770  1.04489898841882
+4607 1770  5.906181210680177
+1771 1771  0.02524756024454677
+3100 1771 -39.8313288230236
+4085 1771  5.446758579372629
+4609 1771  0.1679955867460642
+1772 1772  0.00179187485188899
+3116 1772 -1453.855216625808
+4089 1772  3.366289701753765
+4611 1772  0.1201547163932482
+1773 1773  0.008159159437463653
+3164 1773 -88.07077665502878
+4093 1773  7.023657380150102
+4613 1773  0.172735205835799
+1774 1774  0.006721269101813483
+3134 1774 -92.26802542553942
+4097 1774  11.88363245336253
+4615 1774  0.1310711442192151
+1775 1775  0.004367590347772836
+3172 1775 -270.4226421310651
+4101 1775  13.6661282064412
+4617 1775  0.0630421594254459
+1776 1776  0.003118907177103055
+3120 1776 -1704.53913758438
+4105 1776  5.834179421399259
+4619 1776  0.0368398814399567
+1777 1777  0.001889101762288917
+3120 1777 -1328.68635440853
+4109 1777  4.335231192834105
+4621 1777 -0.09721273839226541
+1778 1778  0.006652423644643841
+3122 1778 -324.8318137854403
+4113 1778  9.883711222736482
+4623 1778  0.04524248037789224
+1779 1779  0.006514619238101249
+3132 1779 -374.8764564904459
+4117 1779  6.606303903934953
+4625 1779 -0.05844088532524816
+1780 1780  0.004157757492589407
+3134 1780 -72.56570109333919
+4121 1780  14.27009956285187
+4627 1780 -0.2192447310494288
+1781 1781  0.03564615373321222
+3134 1781 -212.1907537540941
+4125 1781  14.02807711051003
+4629 1781  0.0093803456490435
+1782 1782  0.001764012109856772
+3138 1782 -1444.925587559849
+4129 1782  4.051300083922375
+4631 1782  0.1696359690794401
+1783 1783  0.001804901242834977
+3140 1783 -1698.599903687457
+4133 1783  2.304828423526273
+4633 1783  0.2154672694474411
+1784 1784  0.01332048699292719
+3136 1784 -84.27337155944021
+4137 1784  4.440740416984238
+4635 1784  0.1563923263725714
+1785 1785  0.004169233330436866
+3150 1785 -146.8985247574478
+4141 1785  9.599198921865771
+4637 1785 -0.1535827179703731
+1786 1786  0.001079634196703565
+3152 1786 -2504.997784199022
+4145 1786  4.182327502644779
+4639 1786  0.1905019068065086
+1787 1787  0.0002113191084465661
+3166 1787 -647.7168499720574
+4149 1787  0.8386055878177558
+4641 1787  14.17811209757292
+1788 1788  0.0002184583605285697
+3168 1788 -647.2932388607724
+4153 1788  1.713315824453342
+4643 1788  8.081208632035654
+1789 1789  0.01518846527205823
+2775 1789 -101.6692445633731
+3176 1789  1.077927120069085
+4156 1789  0.4638345268861317
+1790 1790  0.001000392465764426
+3011 1790 -1400.763427944943
+3180 1790  6.707319778353869
+4158 1790 -0.1171398876204244
+1791 1791  0.004534804164295149
+2775 1791 -121.900978849199
+3184 1791  4.398381781048775
+4160 1791 -0.4304312753963275
+1792 1792  0.004532850904955333
+2775 1792 -122.5206702801755
+3188 1792  4.228609907569939
+4162 1792  0.4225606468582997
+1793 1793  0.01039556111159428
+2779 1793 -301.3545137610665
+3192 1793  0.9388524004746962
+4164 1793  0.2943113306588002
+1794 1794  0.001654738067973231
+2779 1794 -265.5995294516822
+3196 1794  6.290838699682876
+4166 1794  0.3743951155653602
+1795 1795  0.000922492312879772
+3047 1795 -1328.059271078283
+3200 1795  2.95180477919497
+4168 1795  0.3184437332485786
+1796 1796  0.007819372791836317
+2783 1796 -275.381526703475
+3204 1796  0.8655873783036971
+4170 1796  0.3991520721206581
+1797 1797  0.00198409374825764
+2783 1797 -305.9428524547798
+3208 1797  4.357695569147362
+4172 1797  0.3448062169581231
+1798 1798  0.001180703517808303
+2901 1798 -273.8571240789814
+3212 1798  6.271467987853487
+4174 1798  0.4163534406274162
+1799 1799  0.01588587386834779
+2787 1799 -103.6735421877064
+3216 1799  0.8258525634872588
+4176 1799  0.6239097919698932
+1800 1800  0.005036189165657313
+2945 1800 -118.4060011955383
+3220 1800  4.852275384021054
+4178 1800  0.3317934535905247
+1801 1801  0.006660153124828426
+2975 1801 -60.96288613393003
+3224 1801  4.438993814771638
+4180 1801  0.5267241324446033
+1802 1802  0.0009674036423805762
+3069 1802 -1401.226384116477
+3228 1802  3.183226568638103
+4182 1802 -0.2880976473424125
+1803 1803  0.02086064684629033
+2791 1803 -45.61538669592837
+3232 1803  1.118284064744516
+4184 1803  0.7765432181564137
+1804 1804  0.004881382654547464
+3021 1804 -55.21857342172993
+3236 1804  6.944610352658391
+4186 1804  0.4752231410863287
+1805 1805  0.007422250586746427
+2791 1805 -64.6965009865016
+3240 1805  4.506874015672992
+4188 1805  0.4485203675871874
+1806 1806  0.006367261316488591
+3055 1806 -130.8728852516794
+3244 1806  7.772528439443829
+4190 1806 -0.1377473445162312
+1807 1807  0.00872545082924399
+3159 1807 -36.18476976809642
+3248 1807  5.717200439130998
+4192 1807  0.5064852131538
+1808 1808  0.03972025078180432
+2795 1808 -69.64168157022513
+3252 1808  1.17048730065587
+4194 1808  0.2571869189216621
+1809 1809  0.02547611861651027
+2795 1809 -55.76240657485057
+3256 1809  1.356706280601356
+4196 1809  0.3916020995201141
+1810 1810  0.001403055745082602
+2803 1810 -278.7531515404145
+3260 1810  5.935651367845557
+4198 1810  0.4045567407557998
+1811 1811  0.003171622775683676
+2959 1811 -116.01415125886
+3264 1811  7.866093118131661
+4200 1811 -0.3322785058448103
+1812 1812  0.00554786920226437
+2795 1812 -57.37217754133817
+3268 1812  6.921218030938292
+4202 1812  0.4366009542545615
+1813 1813  0.0008757042896855123
+2801 1813 -242.9398363399226
+3272 1813  6.697001214688135
+4204 1813  0.5892891508115687
+1814 1814  0.001190245120571321
+3015 1814 -116.1043230776163
+3276 1814  5.937171454568121
+4206 1814  1.031765062393027
+1815 1815  0.006295122976212335
+3071 1815 -31.29687216661689
+3280 1815  7.525178403866417
+4208 1815 -0.5368637578512274
+1816 1816  0.008114173195193898
+2803 1816 -284.6866218388363
+3284 1816  0.6191405042986501
+4210 1816  0.6167314111295573
+1817 1817  0.03619016545942499
+2807 1817 -36.67269099216028
+3288 1817  2.179761213796664
+4212 1817  0.3310701127831404
+1818 1818  0.03971389620619416
+2807 1818 -38.41647162556927
+3292 1818  1.823278915801148
+4214 1818  0.337094746642738
+1819 1819  0.03986167163264697
+2807 1819 -38.48702554204165
+3296 1819  1.698731768620023
+4216 1819  0.3720598064715243
+1820 1820  0.02676427502414026
+2807 1820 -31.51609232707513
+3300 1820  2.226759967985491
+4218 1820  0.4803517580962079
+1821 1821  0.004359672791846426
+2807 1821 -28.105045711462
+3304 1821  13.4005488845067
+4220 1821 -0.6484741851329106
+1822 1822  0.0007307954101522737
+3013 1822 -1148.338107851003
+3308 1822  6.452286730756979
+4222 1822 -0.2062979542711787
+1823 1823  0.01998853005907888
+2817 1823 -110.9788010398832
+3312 1823  1.158013025593776
+4224 1823  0.3231881679107107
+1824 1824  0.0198798996402246
+2817 1824 -110.6632721880846
+3316 1824  1.041618026710652
+4226 1824  0.3650963193282887
+1825 1825  0.004000250473230064
+2827 1825 -106.5673296823539
+3320 1825  6.785053675814972
+4228 1825  0.3279397104479789
+1826 1826  0.004051596317296923
+2817 1826 -110.4746806445262
+3324 1826  9.324470431849685
+4230 1826 -0.2372175650649101
+1827 1827  0.01015271408301956
+2823 1827 -312.1352460882863
+3328 1827  0.9237967396282526
+4232 1827  0.2708036975786697
+1828 1828  0.001468002769513598
+2901 1828 -306.1707000880867
+3332 1828  4.522013621012862
+4234 1828  0.4831988452769137
+1829 1829  0.001544038947918175
+2823 1829 -268.005427632543
+3336 1829  4.819142815554258
+4236 1829  0.5254114800415531
+1830 1830  0.02615827126484596
+2827 1830 -123.2291839880428
+3340 1830  0.7359627483813387
+4238 1830  0.370280352148334
+1831 1831  0.003401814250274597
+2959 1831 -120.2021649979784
+3344 1831  7.302330267651148
+4240 1831  0.353019920768631
+1832 1832  0.005060615847359536
+3089 1832 -116.1443434772085
+3348 1832  8.759577361623631
+4242 1832  0.1877146534020675
+1833 1833  0.008837253187175041
+2831 1833 -258.9487419495387
+3352 1833  0.5348328095576971
+4244 1833  0.6903575064460875
+1834 1834  0.00221861210371068
+2831 1834 -329.9404840258471
+3356 1834  3.588470025723119
+4246 1834 -0.3995006872704646
+1835 1835  0.0007954486497328819
+3053 1835 -1297.830937438026
+3360 1835  3.212877484813512
+4248 1835  0.3335609329182144
+1836 1836  0.01872639812172585
+2835 1836 -125.0209566151239
+3364 1836  1.479621537099887
+4250 1836  0.2592958859943023
+1837 1837  0.01237589418369946
+2835 1837 -101.4697467138049
+3368 1837  1.552787641592646
+4252 1837  0.4377070227596882
+1838 1838  0.002751685875050906
+2835 1838 -105.8065777979689
+3372 1838  8.326168299313496
+4254 1838 -0.4326204283218003
+1839 1839  0.002293964321536093
+2835 1839 -96.71494936649103
+3376 1839  8.096580967059195
+4256 1839  0.563121317845319
+1840 1840  0.007888933983503288
+2841 1840 -264.275729646232
+3380 1840  0.773323986147318
+4258 1840  0.5036002788122459
+1841 1841  0.002464202900146512
+2881 1841 -293.066322787933
+3384 1841  2.894015425891116
+4260 1841  0.4374740476554567
+1842 1842  0.0009592215405761291
+3047 1842 -1353.938445456118
+3388 1842  2.822747401879994
+4262 1842 -0.3222408774211639
+1843 1843  0.0119602940545533
+2845 1843 -108.8523166266819
+3392 1843  1.638248669248236
+4264 1843  0.3799777711016727
+1844 1844  0.01166488712395074
+2845 1844 -103.7126993255025
+3396 1844  1.176739949016042
+4266 1844  0.6357091254305228
+1845 1845  0.002077512515280729
+2845 1845 -100.1145963710793
+3400 1845  8.942328533639676
+4268 1845 -0.5635229915555421
+1846 1846  0.002543904703657913
+2845 1846 -110.8016182928714
+3404 1846  11.26655140607853
+4270 1846 -0.3035513417999643
+1847 1847  0.0126789154159025
+2851 1847 -274.8553042800676
+3408 1847  0.7825661744590023
+4272 1847  0.3212719394188062
+1848 1848  0.002024630564729973
+2943 1848 -1620.739611914232
+3412 1848  5.057824214743202
+4274 1848  0.07033500174578537
+1849 1849  0.003255750687821012
+2851 1849 -307.7484892352572
+3416 1849  4.79846785897801
+4276 1849 -0.2245587755506762
+1850 1850  0.006110746205119582
+2973 1850 -64.84305238357298
+3420 1850  4.580602587418016
+4278 1850  0.4379133264508283
+1851 1851  0.04707760781803985
+3099 1851 -16.10476514667766
+3424 1851  5.52794280827098
+4280 1851  0.2457031220130788
+1852 1852  0.002719985014812242
+3133 1852 -101.7537554994466
+3428 1852  4.837100171165655
+4282 1852  0.6575968965590965
+1853 1853  0.0009203120285360352
+2857 1853 -1242.454717536272
+3432 1853  3.694102619211036
+4284 1853  0.2292832397716183
+1854 1854  0.001267355105049699
+2857 1854 -1455.098749317579
+3436 1854  1.970970231250026
+4286 1854 -0.2946263988269318
+1855 1855  0.0007455918750581245
+2941 1855 -1269.238362620525
+3440 1855  3.040085219142655
+4288 1855  0.3674899949834071
+1856 1856  0.001003644851414047
+3037 1856 -1119.776904764913
+3444 1856  2.918144951839834
+4290 1856 -0.358149304314646
+1857 1857  0.0009965003960475049
+3173 1857 -1280.777882156608
+3448 1857  4.123483094099871
+4292 1857 -0.1981905656779542
+1858 1858  0.002663675528426596
+2863 1858 -65.91250277346103
+3452 1858  7.866104370676174
+4294 1858  0.5863532491442011
+1859 1859  0.002335038558192694
+2863 1859 -151.3811068800854
+3456 1859  3.779911922330594
+4296 1859  0.676872928952266
+1860 1860  0.002498149276821813
+2863 1860 -159.9708456309545
+3460 1860  4.575171516303893
+4298 1860  0.4926836001045419
+1861 1861  0.002362476830133221
+3147 1861 -127.9110281261581
+3464 1861  3.924567541092951
+4300 1861  0.6936422211413747
+1862 1862  0.001929749558429306
+2865 1862 -315.9024050162253
+3468 1862  3.826254096728573
+4302 1862  0.4298291519255887
+1863 1863  0.002463831602328363
+2865 1863 -358.782009005071
+3472 1863  4.096977356352023
+4304 1863 -0.2929670419078287
+1864 1864  0.002779642002173852
+2881 1864 -313.6568640390976
+3476 1864  2.876369892472561
+4306 1864 -0.3325105683429021
+1865 1865  0.01952363564111144
+2905 1865 -13.66247944369221
+3480 1865  5.527150768491762
+4308 1865  0.5012808683324748
+1866 1866  0.1626429678579705
+2869 1866 -15.5948834700682
+3484 1866  17.26249952801965
+4310 1866 -0.01937948406660046
+1867 1867  0.1789169121997822
+2869 1867 -16.24247111042913
+3488 1867  14.44634424441149
+4312 1867 -0.0200281710755901
+1868 1868  0.2073379396796323
+2869 1868 -14.7892626321223
+3492 1868  14.04786405689022
+4314 1868 -0.02148219787072791
+1869 1869  0.2061419754730205
+2869 1869 -14.74604734110974
+3496 1869  12.6670479033574
+4316 1869 -0.02126487268390402
+1870 1870  0.2084508119925247
+2869 1870 -21.53155213685957
+3500 1870  15.2850840004798
+4318 1870 -0.01515825745898139
+1871 1871  0.0009819936272868124
+2921 1871 -1338.557384720627
+3504 1871  2.438324611105513
+4320 1871 -0.3072494006703159
+1872 1872  0.02023566694527784
+2905 1872 -13.86164060705798
+3508 1872  4.793989873863781
+4322 1872  0.6025736075654631
+1873 1873  0.004905574726885498
+2883 1873 -18.01825116204381
+3512 1873  17.87284749444791
+4324 1873  0.4607487274148538
+1874 1874  0.004785941363708135
+2883 1874 -44.51367803683263
+3516 1874  8.815510423999974
+4326 1874  0.4601615553751069
+1875 1875  0.005456064724980531
+2977 1875 -59.31030352171324
+3520 1875  3.435120286019725
+4328 1875  0.7898807291279358
+1876 1876  0.002972034323586343
+2997 1876 -95.11637636344641
+3524 1876  6.227801375022604
+4330 1876 -0.5448550035608645
+1877 1877  0.002963069916509047
+3029 1877 -62.65563298356832
+3528 1877  6.205734509115272
+4332 1877 -0.7112006565720582
+1878 1878  0.00470779793137561
+2979 1878 -137.220649291519
+3532 1878  3.594156286348642
+4334 1878 -0.4309015624391707
+1879 1879  0.002213774594729203
+3091 1879 -241.1777446051989
+3536 1879  4.740940585753161
+4336 1879 -0.421297996296329
+1880 1880  0.001967847060143347
+2889 1880 -302.0373081441935
+3540 1880  4.013858921947923
+4338 1880 -0.4334843149085262
+1881 1881  0.002467645928209655
+2889 1881 -339.884593358769
+3544 1881  6.511901335193489
+4340 1881  0.2019684391055969
+1882 1882  0.0006604176253755498
+3011 1882 -1136.647709911751
+3548 1882  3.269308251777509
+4342 1882 -0.4705159048444391
+1883 1883  0.0008008235108888716
+3051 1883 -1335.596838688865
+3552 1883  2.703936131935484
+4344 1883  0.3712099084130325
+1884 1884  0.0009981865922368972
+2967 1884 -1299.363195504282
+3556 1884  3.155612325232486
+4346 1884 -0.2660353447014131
+1885 1885  0.001090105230124902
+2893 1885 -1386.412367526018
+3560 1885  2.546878710023711
+4348 1885 -0.2897782680634379
+1886 1886  0.003558794753835321
+2979 1886 -119.2628629587167
+3564 1886  4.914858596122737
+4350 1886 -0.4468471629984179
+1887 1887  0.003573660032862564
+2897 1887 -1942.580602090081
+3568 1887  6.209091064066214
+4352 1887  0.02303438519199841
+1888 1888  0.00126070155494538
+2897 1888 -1149.608351572851
+3572 1888  2.887296553688009
+4354 1888 -0.2437019726733171
+1889 1889  0.0224029250963791
+2905 1889 -14.63977712587374
+3576 1889  8.019342926689088
+4356 1889  0.2926855116749797
+1890 1890  0.003786126107846711
+3141 1890 -2057.918514698858
+3580 1890  8.030319348297542
+4358 1890 -0.01579633712996128
+1891 1891  0.001230938538038181
+2901 1891 -241.4737700878197
+3584 1891  4.977760599682532
+4360 1891 -0.8149863628841933
+1892 1892  0.09879642602006852
+2905 1892 -4.158339449315758
+3588 1892  2.317651149747798
+4362 1892 -0.7256220588031617
+1893 1893  0.03844282691513459
+2905 1893 -6.321546398056254
+3592 1893  7.14894107859813
+4364 1893 -0.4976321718455345
+1894 1894  0.06306776508636516
+2905 1894 -7.804363263475375
+3596 1894  3.383146747526674
+4366 1894 -0.4961149924439166
+1895 1895  0.04479802402474396
+2905 1895 -6.58912812924158
+3600 1895  5.594076654825956
+4368 1895 -0.510067162772699
+1896 1896  0.06536987024666262
+2905 1896 -7.649837605944053
+3604 1896  3.328116887610061
+4370 1896 -0.4801640088667681
+1897 1897  0.06617898539405291
+2905 1897 -7.69701995245717
+3608 1897  3.214678878395649
+4372 1897 -0.4783057895236895
+1898 1898  0.04501137747996262
+2905 1898 -6.086553600399663
+3612 1898  5.286814915057556
+4374 1898 -0.5073755737823042
+1899 1899  0.009378728642209877
+3089 1899 -157.9270743656402
+3616 1899  3.060833158311745
+4376 1899  0.2195673688647587
+1900 1900  0.0007370125534000677
+3153 1900 -1149.893986737135
+3620 1900  3.124616488720384
+4378 1900 -0.350098224088436
+1901 1901  0.6020929703070654
+2923 1901 -25.58784425527002
+3624 1901  11.46538487377347
+4380 1901  0.004528716666476341
+1902 1902  1.401132294277499
+2923 1902 -11.88075702335064
+3628 1902  11.61445266776299
+4382 1902  0.003858840238686249
+1903 1903  0.3758028597659226
+2923 1903 -54.03709490860278
+3632 1903  7.931141454227703
+4384 1903  0.005403119658024451
+1904 1904  0.3383684616440788
+2923 1904 -49.0029739474849
+3636 1904  10.38985995381369
+4386 1904  0.005116437161350581
+1905 1905  0.01342936913136771
+2923 1905 -34.74349045619399
+3640 1905  5.274149732297824
+4388 1905  0.3614419935688245
+1906 1906  0.0008155034897751313
+3087 1906 -1275.123506533136
+3644 1906  1.680006856153657
+4390 1906  0.5644202599576373
+1907 1907  0.004133278243333555
+2973 1907 -53.49842881762557
+3648 1907  11.36834776685953
+4392 1907 -0.3438732803624757
+1908 1908  0.0008040773752573335
+3059 1908 -1319.134352142724
+3652 1908  3.981648724060497
+4394 1908  0.2715368875046588
+1909 1909  0.001278322246727311
+2935 1909 -215.9521258687181
+3656 1909  7.936527709755322
+4396 1909  0.4028072587377506
+1910 1910  0.001999528023858018
+3129 1910 -493.1627696346783
+3660 1910  5.534920557416696
+4398 1910  0.1837519366993671
+1911 1911  0.008960119794622546
+3163 1911 -53.90619683861826
+3664 1911  3.745702162475517
+4400 1911 -0.4996161946091237
+1912 1912  0.001720719953907996
+3065 1912 -342.1330151030756
+3668 1912  3.499243447462259
+4402 1912  0.4893961341975044
+1913 1913  0.002197626492909087
+3145 1913 -306.5243546068534
+3672 1913  4.952358887713497
+4404 1913 -0.3065265473112019
+1914 1914  0.002721550548841121
+2999 1914 -102.6354740822629
+3676 1914  5.231194892529889
+4406 1914  0.6048601072199059
+1915 1915  0.001305838102648445
+3013 1915 -1535.226081909187
+3680 1915  3.508830808080819
+4408 1915  0.1860931450132002
+1916 1916  0.004438130353246017
+2975 1916 -49.97345032394828
+3684 1916  5.985986065281415
+4410 1916  0.6690902668564817
+1917 1917  0.00940934842178536
+2945 1917 -162.0141549217757
+3688 1917  5.055157513061181
+4412 1917  0.1263791104196366
+1918 1918  0.007585063482622535
+3159 1918 -33.71929484844811
+3692 1918  8.94117625255519
+4414 1918  0.3839594711748638
+1919 1919  0.0004614465673554198
+2947 1919 -1156.779939313139
+3696 1919  4.834616698618166
+4416 1919  0.3417002020878817
+1920 1920  0.001377148193309211
+2947 1920 -2303.443736143837
+3700 1920  0.9505763099739064
+4418 1920 -0.3580379646538538
+1921 1921  0.004343153870675413
+3073 1921 -113.7824108068532
+3704 1921  3.616822557551397
+4420 1921  0.4449142856253013
+1922 1922  0.004905569227212183
+2949 1922 -121.4320613630403
+3708 1922  3.21889629143859
+4422 1922  0.4461110896526219
+1923 1923  0.00268264578621197
+3161 1923 -314.9941849685434
+3712 1923  2.924614144859001
+4424 1923 -0.3382766882528339
+1924 1924  0.1788413622273466
+2951 1924 -15.33177832110156
+3716 1924  19.53069803414619
+4426 1924  0.01439043946019499
+1925 1925  0.133773303083964
+2951 1925 -49.94972952086324
+3720 1925  14.69789381330568
+4428 1925  0.008760842370192201
+1926 1926  0.1308411907639882
+2951 1926 -54.44165562919285
+3724 1926  13.23744473122067
+4430 1926  0.00890402837809562
+1927 1927  0.0009459805136166574
+2963 1927 -1338.547491511747
+3728 1927  2.313352821995473
+4432 1927 -0.3456801265779962
+1928 1928  0.005209275704562927
+2951 1928 -63.79072791858248
+3732 1928  4.408486684243702
+4434 1928 -0.7248750001072389
+1929 1929  0.01380038719662924
+2959 1929 -67.64929086151163
+3736 1929  5.995398509885752
+4436 1929 -0.1837226529034885
+1930 1930  0.001130589200957007
+3155 1930 -1329.895116223397
+3740 1930  4.987981631971432
+4438 1930 -0.1531666552012178
+1931 1931  0.001680904211310969
+2965 1931 -281.744969252588
+3744 1931  4.080013401664326
+4440 1931 -0.5051458348552837
+1932 1932  0.0006736040825845264
+3059 1932 -1197.283781547747
+3748 1932  3.249654147247933
+4442 1932 -0.4155301814520989
+1933 1933  0.003284291861798131
+3149 1933 -119.1012159433853
+3752 1933  5.990944680358314
+4444 1933  0.3462276026425328
+1934 1934  0.001117869191372407
+3173 1934 -1356.531427998502
+3756 1934  3.229847896446473
+4446 1934 -0.2242455162623405
+1935 1935  0.0007017298701820985
+2969 1935 -1143.36710400461
+3760 1935  3.895963455405033
+4448 1935  0.3245871165120744
+1936 1936  0.007536605982690551
+3019 1936 -32.18487755999478
+3764 1936  6.886257504275831
+4450 1936  0.4653523913028809
+1937 1937  0.001080857059295198
+3057 1937 -1209.039048347723
+3768 1937  2.707528675905532
+4452 1937 -0.333246014529252
+1938 1938  0.001332242049959249
+3121 1938 -293.3131463113477
+3772 1938  4.197681730399675
+4454 1938  0.5792515598032306
+1939 1939  0.001772899225362471
+3131 1939 -298.238748377585
+3776 1939  3.559125252631731
+4456 1939  0.4566027908873816
+1940 1940  0.0008469569496671471
+3141 1940 -972.8769469454667
+3780 1940  5.370203291089556
+4458 1940 -0.2253988735383735
+1941 1941  0.01292454086643344
+2975 1941 -85.32588697691688
+3784 1941  4.481236041630056
+4460 1941  0.1774720614068959
+1942 1942  0.00250559134395913
+3007 1942 -88.67226391527699
+3788 1942  3.503869273575924
+4462 1942  1.218330247483215
+1943 1943  0.006039676197215248
+3055 1943 -127.4445013018118
+3792 1943  4.932211784009985
+4464 1943 -0.2444719756141379
+1944 1944  0.00332455934900208
+2977 1944 -29.11525814204184
+3796 1944  12.92509130443996
+4466 1944  0.6340994985786209
+1945 1945  0.005516618807937719
+2977 1945 -16.87783507179551
+3800 1945  6.963395067069431
+4468 1945 -1.282456190904951
+1946 1946  0.003499686844742199
+2977 1946 -29.27401382993264
+3804 1946  8.20748669336651
+4470 1946 -1.085554606806667
+1947 1947  0.002695204623589962
+3029 1947 -57.88255988036589
+3808 1947  5.140340349921296
+4472 1947 -1.03602619980799
+1948 1948  0.0005202508078158344
+2985 1948 -1179.473328514326
+3812 1948  2.762528604699168
+4474 1948 -0.627656447368364
+1949 1949  0.0004616790832713247
+2985 1949 -1112.363081683879
+3816 1949  2.637769067800761
+4476 1949  0.7698732969464314
+1950 1950  0.003690632751693497
+2999 1950 -119.8659568390688
+3820 1950  5.347644834779035
+4478 1950  0.3911824002123577
+1951 1951  0.0009417216375000832
+3031 1951 -1462.759816234802
+3824 1951  2.286717637069104
+4480 1951  0.3690948751451494
+1952 1952  0.002745893837511805
+2987 1952 -329.5972945308118
+3828 1952  3.430259707843811
+4482 1952  0.3162981891849708
+1953 1953  0.001313568433023475
+3169 1953 -1331.327148348726
+3832 1953  5.22866490720696
+4484 1953 -0.1019983291898142
+1954 1954  0.0006040599848978036
+2991 1954 -1246.529432769099
+3836 1954  3.015100507243954
+4486 1954 -0.5589755650604602
+1955 1955  0.01071515778810642
+3159 1955 -40.0865932868005
+3840 1955  6.486770562819233
+4488 1955  0.3123756862682738
+1956 1956  0.002205806782900263
+2995 1956 -1643.815562542815
+3844 1956  2.089007039389847
+4490 1956 -0.1358262986664969
+1957 1957  0.001279610739959884
+3123 1957 -1425.284446253262
+3848 1957  4.015345384796815
+4492 1957  0.1297570097955425
+1958 1958  0.001954246820883836
+2997 1958 -65.95449814158825
+3852 1958  14.21588238657893
+4494 1958  0.5346710961640079
+1959 1959  0.009930744376663589
+2997 1959 -8.723987554335546
+3856 1959  4.63521196383065
+4496 1959 -1.800612709103617
+1960 1960  0.002500885653894513
+2997 1960 -43.6549692815422
+3860 1960  7.851107779123613
+4498 1960 -1.155212641574131
+1961 1961  0.01087216558491282
+2997 1961 -183.7758384048604
+3864 1961  4.362866399653369
+4500 1961 -0.10893918577006
+1962 1962  0.002476672415383437
+3157 1962 -323.5366796927644
+3868 1962  3.314597254560581
+4502 1962 -0.4059559816217324
+1963 1963  0.0004654379073351586
+3005 1963 -1123.060704870721
+3872 1963  7.094819650515715
+4504 1963  0.284851484394085
+1964 1964  0.0007789889760783562
+3005 1964 -1962.678855914129
+3876 1964  2.758844665527414
+4506 1964 -0.251856105361882
+1965 1965  0.003128947444078676
+3037 1965 -1984.935281822672
+3880 1965  5.36424014425243
+4508 1965 -0.03372710870745033
+1966 1966  0.0007814113477166023
+3097 1966 -1219.03743806919
+3884 1966  3.832755747061612
+4510 1966  0.2996883137876917
+1967 1967  0.002466223742873347
+3009 1967 -305.81056344102
+3888 1967  3.344564282593129
+4512 1967  0.3897416427359257
+1968 1968  0.004556134798128607
+3041 1968 -49.74533438256115
+3892 1968  7.209963343664713
+4514 1968  0.5271461292678989
+1969 1969  0.001610776396565617
+3169 1969 -1474.263314614031
+3896 1969  2.650447108276864
+4516 1969 -0.1785974349768129
+1970 1970  0.002196786139001541
+3015 1970 -79.95166156859864
+3900 1970  8.754460075831085
+4518 1970  0.5273107585383225
+1971 1971  0.0007714731595300477
+3129 1971 -298.3132764183199
+3904 1971  5.21837181322108
+4520 1971 -0.8072266327489762
+1972 1972  0.003487196424232244
+3015 1972 -159.5652172094772
+3908 1972  1.521343477166647
+4522 1972 -1.074344281788989
+1973 1973  0.004805094562342568
+3019 1973 -24.21142044251713
+3912 1973  11.66633099712536
+4524 1973  0.6244445149825922
+1974 1974  0.01106461485781375
+3019 1974 -16.17329708469773
+3916 1974  6.913438662925516
+4526 1974 -0.7085501020623595
+1975 1975  0.01237778660328535
+3019 1975 -16.79039672305305
+3920 1975  7.092319999538366
+4528 1975 -0.6032800041956174
+1976 1976  0.01700566749367823
+3019 1976 -19.18063033824095
+3924 1976  4.504333486281526
+4530 1976 -0.5312358475512081
+1977 1977  0.009334914461997442
+3039 1977 -28.87681478961062
+3928 1977  3.064329241855908
+4532 1977  0.9638580985212747
+1978 1978  0.02503841121672441
+3019 1978 -58.99846518694028
+3932 1978  3.065752767611288
+4534 1978 -0.1726253430003272
+1979 1979  0.008007127655204529
+3159 1979 -34.20546235418676
+3936 1979  6.821785171341784
+4536 1979 -0.5273299139143387
+1980 1980  0.003535036514714461
+3029 1980 -39.32522531591619
+3940 1980  9.188046774378051
+4538 1980  0.6449471976234391
+1981 1981  0.004366719693026736
+3039 1981 -19.26799730716725
+3944 1981  15.60000600766554
+4540 1981 -0.4997455661082103
+1982 1982  0.0004723725694069484
+3061 1982 -1020.895360476986
+3948 1982  4.949586119190606
+4542 1982 -0.4434171700268535
+1983 1983  0.006908344872276637
+3039 1983 -24.40551081717152
+3952 1983  9.450557390041146
+4544 1983  0.4663001387974148
+1984 1984  0.00648834630734077
+3039 1984 -34.66205988832636
+3956 1984  13.94274720304045
+4546 1984 -0.3189037545692974
+1985 1985  0.0082751929249987
+3039 1985 -39.08905611715512
+3960 1985  9.638290245359952
+4548 1985 -0.3170204710469747
+1986 1986  0.007383221743510169
+3039 1986 -25.73482602625847
+3964 1986  3.86258710892769
+4550 1986  0.976486147068505
+1987 1987  0.0007871273401471218
+3053 1987 -1286.699352558668
+3968 1987  3.232293749053333
+4552 1987 -0.3439335715846778
+1988 1988  0.00273487585036142
+3145 1988 -343.6950631335396
+3972 1988  6.854993073279579
+4554 1988  0.1547028690476915
+1989 1989  0.0005887925249880207
+3051 1989 -1142.59645115147
+3976 1989  4.596890674335506
+4556 1989 -0.3495907295178143
+1990 1990  0.007514577559867656
+3163 1990 -50.03970959544274
+3980 1990  4.514311543471527
+4558 1990  0.5744327013188233
+1991 1991  0.0007659718727101854
+3069 1991 -1245.08530407766
+3984 1991  4.273476333560106
+4560 1991 -0.2813043833760332
+1992 1992  0.01019124310589202
+3159 1992 -39.06108881613515
+3988 1992  5.650865178732744
+4562 1992  0.3955497753263165
+1993 1993  0.001075547549495041
+3061 1993 -1545.590542610641
+3992 1993  2.128671701455008
+4564 1993  0.330790297730725
+1994 1994  0.0009735186880607913
+3125 1994 -1288.009326238795
+3996 1994  3.373236017368225
+4566 1994  0.245190319901469
+1995 1995  0.001207820622579016
+3063 1995 -1425.924800186832
+4000 1995  2.768916866004834
+4568 1995 -0.2470979106274057
+1996 1996  0.001505642513562084
+3065 1996 -171.6721801828181
+4004 1996  4.350082008188727
+4570 1996 -0.8934591499488236
+1997 1997  0.001622558008834811
+3127 1997 -272.0493857089738
+4008 1997  5.835339611366566
+4572 1997 -0.4094422815228064
+1998 1998  0.006407727072907419
+3071 1998 -31.13094626304603
+4012 1998  6.273071084077785
+4574 1998  0.6508797907773681
+1999 1999  0.0347431850057986
+3071 1999 -3.550926574038505
+4016 1999  5.891708948648517
+4576 1999 -0.8644424052260842
+2000 2000  0.019139463715007
+3071 2000 -13.48124907748484
+4020 2000  6.039983956553282
+4578 2000 -0.5386946049506361
+2001 2001  0.02024997891309413
+3071 2001 -13.95763763671764
+4024 2001  5.335102309454471
+4580 2001 -0.5619603703326097
+2002 2002  0.02204757930171117
+3071 2002 -14.40960272507699
+4028 2002  4.826786095112703
+4582 2002 -0.4989049199151928
+2003 2003  0.02261098557575327
+3071 2003 -14.58499342652336
+4032 2003  4.663673792105634
+4584 2003 -0.4967941065737113
+2004 2004  0.003715639375564892
+3073 2004 -105.446862001134
+4036 2004  5.538149308614118
+4586 2004  0.3954580330580911
+2005 2005  0.003931178294356625
+3073 2005 -106.4028375448804
+4040 2005  2.787430652169496
+4588 2005  0.640289646401769
+2006 2006  0.001071057839405194
+3085 2006 -198.5952483373434
+4044 2006  8.295885019917028
+4590 2006  0.4286015274180359
+2007 2007  0.04476228128502545
+3093 2007 -700.4947699391313
+4048 2007  7.928418980272388
+4592 2007  0.004269479894055767
+2008 2008  0.008958295749658335
+3135 2008 -95.10938060848929
+4052 2008  2.428640135644111
+4594 2008  0.4261594979710852
+2009 2009  0.3838931121017528
+3099 2009 -12.04655336420402
+4056 2009  16.47742044559651
+4596 2009  0.01030673789282408
+2010 2010  0.311778304583312
+3099 2010 -12.11364270258297
+4060 2010  22.23399320199848
+4598 2010  0.01266738670347477
+2011 2011  0.3433557380926616
+3099 2011 -8.22902423866134
+4064 2011  22.85902787429374
+4600 2011  0.01389738784940935
+2012 2012  0.4301523174392808
+3099 2012 -9.209673704385494
+4068 2012  18.54247362970166
+4602 2012  0.01095829037406584
+2013 2013  0.2876753913896934
+3099 2013 -7.169396300539337
+4072 2013  28.97191890791841
+4604 2013  0.01440604931380529
+2014 2014  0.3498094028164577
+3099 2014 -7.905394811720712
+4076 2014  22.90635023674714
+4606 2014  0.01412310998717665
+2015 2015  0.3390025466116671
+3099 2015 -6.913440311217936
+4080 2015  24.33074727531835
+4608 2015  0.01427692261993418
+2016 2016  0.00237548481421698
+3131 2016 -340.5820452017388
+4084 2016  2.208450030879043
+4610 2016  0.5273258880967172
+2017 2017  0.001140663405325003
+3153 2017 -1429.354425178098
+4088 2017  2.781752994797915
+4612 2017 -0.2390321723025959
+2018 2018  0.001009787058010458
+3115 2018 -1319.125546000257
+4092 2018  2.912178827084738
+4614 2018 -0.2745073576810962
+2019 2019  0.00109550590404936
+3117 2019 -1426.386550071892
+4096 2019  5.599572994528584
+4616 2019  0.1242577346967617
+2020 2020  0.0005957613718304553
+3117 2020 -1051.60330096319
+4100 2020  5.994209355056856
+4618 2020 -0.2986936630393312
+2021 2021  0.001150360150332114
+3121 2021 -273.2060828791963
+4104 2021  4.161771196418701
+4620 2021  0.7749702805017827
+2022 2022  0.0009878595624204158
+3123 2022 -1251.592013646793
+4108 2022  3.322110871855388
+4622 2022 -0.2590299860673196
+2023 2023  0.001621619527829153
+3149 2023 -82.4528305498375
+4112 2023  6.446916296953027
+4624 2023  0.9878581106610489
+2024 2024  0.001188758515697542
+3129 2024 -221.2635879075791
+4116 2024  4.230773004945792
+4626 2024  0.83313447077432
+2025 2025  0.001044723928505841
+3155 2025 -1278.727802451221
+4120 2025  7.857436245259873
+4628 2025 -0.09699561503198664
+2026 2026  0.001165220306249647
+3171 2026 -258.8888628284263
+4124 2026  3.696321876416155
+4630 2026  0.9432303306922852
+2027 2027  0.009668964138980598
+3135 2027 -25.80608281993659
+4128 2027  4.980850774426043
+4632 2027 -0.723925405766279
+2028 2028  0.01083235501048359
+3135 2028 -27.74274388077859
+4132 2028  3.754470501609538
+4634 2028 -0.6821870226709527
+2029 2029  0.002676878284088942
+3161 2029 -312.8351201515704
+4136 2029  2.362308143022933
+4636 2029  0.4432101206282965
+2030 2030  0.001464397909435215
+3147 2030 -106.3422401541953
+4140 2030  8.646235660883518
+4638 2030  0.5743976794403722
+2031 2031  0.002343779129113723
+3147 2031 -166.413571525405
+4144 2031  2.978974753398158
+4640 2031 -0.8666912965737777
+2032 2032  0.0993118838893443
+3163 2032 -70.19438117818132
+4148 2032  9.893426968176996
+4642 2032  0.01095253479461104
+2033 2033  0.1155021826800087
+3163 2033 -78.02760680056846
+4152 2033  21.22166760392006
+4644 2033  0.004781083103017359
+2034 2034  0.02756698782311096
+2776 2034 -116.254058426669
+3178 2034  1.362279335536674
+4156 2034  0.1776859613752735
+2035 2035  0.0009578764182344008
+3012 2035 -1063.361714528966
+3182 2035  6.593989719295108
+4158 2035 -0.1686225023517153
+2036 2036  0.007018459542290348
+2776 2036 -128.7151420873748
+3186 2036  5.110489561178684
+4160 2036 -0.2294685995787264
+2037 2037  0.00759532575759598
+2776 2037 -134.6101055831586
+3190 2037  5.241250854683242
+4162 2037  0.1862365373886236
+2038 2038  0.02033916179016841
+2780 2038 -304.647627311909
+3194 2038  1.225211590363166
+4164 2038  0.1162526489952567
+2039 2039  0.004141325516279747
+2780 2039 -303.6755998197619
+3198 2039  9.150447768001063
+4166 2039  0.09090233254426537
+2040 2040  0.002178117174593526
+3048 2040 -1507.095428153633
+3202 2040  4.178709433689302
+4168 2040  0.08441252585843845
+2041 2041  0.01493946180636459
+2784 2041 -287.8253845063738
+3206 2041  1.118500047554219
+4170 2041  0.1569416237697656
+2042 2042  0.003585627113051674
+2784 2042 -310.99582024153
+3210 2042  5.635075703794728
+4172 2042 -0.1470971542430732
+2043 2043  0.003043495037890958
+2902 2043 -275.9084934756087
+3214 2043  9.472269231422434
+4174 2043 -0.1084103977076431
+2044 2044  0.03021750355380833
+2788 2044 -108.9523767055269
+3218 2044  1.066054919609428
+4176 2044  0.2458729163013723
+2045 2045  0.008023070608262042
+2946 2045 -117.5623798133825
+3222 2045  5.867844621528932
+4178 2045  0.1764937921731119
+2046 2046  0.01562602942879436
+2976 2046 -56.76693463695905
+3226 2046  6.307778305821778
+4180 2046  0.1767989964268323
+2047 2047  0.001893962458911336
+3070 2047 -1547.903118801705
+3230 2047  4.089469755570528
+4182 2047 -0.1043560294495194
+2048 2048  0.04008513148927602
+2792 2048 -47.01189367725382
+3234 2048  1.453282910153127
+4184 2048  0.3080625949669278
+2049 2049  0.007900795641140584
+3022 2049 -54.48808992798245
+3238 2049  8.754829522616353
+4186 2049  0.2360290859558374
+2050 2050  0.02822292740456577
+2792 2050 -93.79563538032262
+3242 2050  7.607025929898225
+4188 2050  0.04825762007543867
+2051 2051  0.005220083303881453
+3056 2051 -105.6218717573673
+3246 2051  7.209258592532089
+4190 2051 -0.2295458523844411
+2052 2052  0.02206704511270894
+3160 2052 -45.27355828678601
+3250 2052  8.260010647845204
+4192 2052  0.1115367935708566
+2053 2053  0.07477494866840419
+2796 2053 -71.48205026841438
+3254 2053  1.503944833656077
+4194 2053  0.1058671586234522
+2054 2054  0.04893194565742957
+2796 2054 -57.81317230630791
+3258 2054  1.756999795333312
+4196 2054  0.1550963396610254
+2055 2055  0.00298198818538526
+2804 2055 -281.3097209478202
+3262 2055  8.333025717575424
+4198 2055 -0.1358187924429873
+2056 2056  0.006104956663563518
+2960 2056 -137.1554046617416
+3266 2056  9.954437737461609
+4200 2056  0.1158998426965637
+2057 2057  0.01008184609791411
+2796 2057 -57.85806956991055
+3270 2057  8.985892277429645
+4202 2057 -0.1871670285025489
+2058 2058  0.002589589366922812
+2802 2058 -277.9291057772549
+3274 2058  13.63902685108462
+4204 2058  0.08441094784420153
+2059 2059  0.006756368132922738
+3016 2059 -164.7201130197417
+3278 2059  13.75844861280676
+4206 2059 -0.05562556027624922
+2060 2060  0.01287695285048797
+3072 2060 -33.81724303530061
+3282 2060  9.011062142512486
+4208 2060  0.2054208626437619
+2061 2061  0.01604516294524086
+2804 2061 -277.119091147535
+3286 2061  0.8138879164921292
+4210 2061  0.2480796714683461
+2062 2062  0.07139718376216436
+2808 2062 -36.88282535114031
+3290 2062  2.854904550605542
+4212 2062  0.1309999949189102
+2063 2063  0.07789140856387446
+2808 2063 -38.52362543915015
+3294 2063  2.382559920737088
+4214 2063  0.1348920395825686
+2064 2064  0.07846785118095698
+2808 2064 -38.66505399025309
+3298 2064  2.222986339792984
+4216 2064  0.1478411151739352
+2065 2065  0.05259636694181062
+2808 2065 -31.6350585093486
+3302 2065  2.912810030122295
+4218 2065  0.1914370186595753
+2066 2066  0.01561142725119222
+2808 2066 -38.08156604778259
+3306 2066  21.65848410135114
+4220 2066  0.08377148904147035
+2067 2067  0.001140936654369433
+3014 2067 -1239.624856043634
+3310 2067  7.602708839683275
+4222 2067  0.1043900787502424
+2068 2068  0.0392249822964684
+2818 2068 -111.4646540333997
+3314 2068  1.513344914714974
+4224 2068  0.1284506027739841
+2069 2069  0.03903060155704903
+2818 2069 -111.1745117377567
+3318 2069  1.361520831023814
+4226 2069  0.1449696716921745
+2070 2070  0.01516268397985598
+2828 2070 -142.2046129288638
+3322 2070  11.46100381309955
+4228 2070 -0.03875470100391585
+2071 2071  0.006913912857735371
+2818 2071 -103.4708021967054
+3326 2071  11.49437533928763
+4230 2071 -0.123830609841622
+2072 2072  0.02025965316108067
+2824 2072 -298.7359064250596
+3330 2072  1.216417974845579
+4232 2072  0.1105236630114776
+2073 2073  0.00451186806656099
+2902 2073 -336.8243978424858
+3334 2073  7.245181781754158
+4234 2073  0.09073099839660206
+2074 2074  0.004985618827914547
+2824 2074 -326.2824303807073
+3338 2074  7.871753304018089
+4236 2074  0.0825069857399373
+2075 2075  0.0521001814761122
+2828 2075 -119.199332372214
+3342 2075  0.9683564394347679
+4238 2075  0.1503007858341583
+2076 2076  0.01056133397428599
+2960 2076 -180.4753155990831
+3346 2076  11.42641600066333
+4240 2076 -0.04777819884013859
+2077 2077  0.00678516914651727
+3090 2077 -117.3115075315568
+3350 2077  9.866368768324078
+4242 2077  0.1242642220168503
+2078 2078  0.01674617104452671
+2832 2078 -272.0317439182016
+3354 2078  0.6902095968345779
+4244 2078  0.2721182940045235
+2079 2079  0.003957075002863835
+2832 2079 -336.2704669221371
+3358 2079  4.390097869341906
+4246 2079 -0.1829124999460475
+2080 2080  0.001707396604035941
+3054 2080 -1446.166309523558
+3362 2080  4.451290718579245
+4248 2080  0.1009613901942342
+2081 2081  0.03729470474745956
+2836 2081 -118.5292865869793
+3366 2081  1.94736180188326
+4250 2081  0.1076318904023976
+2082 2082  0.02493351554952283
+2836 2082 -96.75789985452708
+3370 2082  2.053730505034701
+4252 2082  0.1776243671061898
+2083 2083  0.007066492804996273
+2836 2083 -113.9099321998116
+3374 2083  11.87929266291417
+4254 2083 -0.1125400217472849
+2084 2084  0.0105883848494995
+2836 2084 -139.5923217119656
+3378 2084  14.8685191357142
+4256 2084  0.04628018070972438
+2085 2085  0.01512519439909087
+2842 2085 -273.2174821630666
+3382 2085  1.001843241790515
+4258 2085  0.1992142492979329
+2086 2086  0.004867897652356694
+2882 2086 -312.5285740232399
+3386 2086  3.893502727153444
+4260 2086  0.1559040304242874
+2087 2087  0.00224263695121669
+3048 2087 -1528.913791925641
+3390 2087  3.896863634483198
+4262 2087 -0.08915743596796895
+2088 2088  0.02313494457906885
+2846 2088 -111.1708407077942
+3394 2088  2.128348439022877
+4264 2088  0.1510415792376266
+2089 2089  0.02254346769173401
+2846 2089 -105.8746380386655
+3398 2089  1.530457443318091
+4266 2089  0.2527154403111643
+2090 2090  0.007010745916640551
+2846 2090 -135.0509367320167
+3402 2090  13.99685203813836
+4268 2090 -0.07966811853968624
+2091 2091  0.003848514944751526
+2846 2091 -100.0765173546339
+3406 2091  13.03167033082069
+4270 2091 -0.1978726929434511
+2092 2092  0.02153155584735624
+2852 2092 -351.7384079675031
+3410 2092  0.9606419660085704
+4272 2092  0.1190518291670926
+2093 2093  0.001028363329486931
+2944 2093 -1205.4064202328
+3414 2093  3.896453929673696
+4274 2093 -0.2460311055612411
+2094 2094  0.003421704678565958
+2852 2094 -309.8208477002044
+3418 2094  4.857233958126034
+4276 2094 -0.2103959172847824
+2095 2095  0.01178272631769443
+2974 2095 -63.00144864963377
+3422 2095  6.030018439318698
+4278 2095 -0.1823951579301308
+2096 2096  0.01707485299878989
+3100 2096 -33.35710566176917
+3426 2096  3.64070794816464
+4280 2096  0.4234806911341454
+2097 2097  0.01755433519751259
+3134 2097 -149.0690573659064
+3430 2097  10.1404814049545
+4282 2097 -0.03366812880804327
+2098 2098  0.001630753860045084
+2858 2098 -1326.699075158569
+3434 2098  4.692341990336677
+4284 2098  0.09589996761388657
+2099 2099  0.002330334855841668
+2858 2099 -1582.769498051866
+3438 2099  2.455950934716126
+4286 2099 -0.1192055091852236
+2100 2100  0.001843266313745345
+2942 2100 -1408.138756167021
+3442 2100  4.482752371805137
+4288 2100 -0.0913967464088717
+2101 2101  0.001825192537572441
+3038 2101 -1679.799948366659
+3446 2101  3.513507003847669
+4290 2101  0.1067652808173516
+2102 2102  0.001762896847965685
+3174 2102 -1348.72810908421
+3450 2102  5.121634542677966
+4292 2102 -0.08642839304797841
+2103 2103  0.006836889974004559
+2864 2103 -70.28451441984423
+3454 2103  13.81936647694512
+4294 2103  0.1224913336251501
+2104 2104  0.007007925568758736
+2864 2104 -174.5511038366085
+3458 2104  6.108252012876951
+4296 2104  0.1232115838138538
+2105 2105  0.005873616575094122
+2864 2105 -163.2629558170631
+3462 2105  6.61797598861204
+4298 2105  0.1458229277753334
+2106 2106  0.005610435401023395
+3148 2106 -127.8803524612996
+3466 2106  6.18395224060007
+4300 2106  0.1869994138422493
+2107 2107  0.004351703339650936
+2866 2107 -343.8282467755249
+3470 2107  5.425768165721768
+4302 2107  0.1243626259089881
+2108 2108  0.004232437836851164
+2866 2108 -340.8237853887593
+3474 2108  5.015211698687797
+4304 2108 -0.149642205461953
+2109 2109  0.00555280640673149
+2882 2109 -336.3627989515738
+3478 2109  3.742160235640967
+4306 2109 -0.1210270813360829
+2110 2110  0.03541685358970547
+2906 2110 -16.282455472127
+3482 2110  7.053894690456646
+4308 2110  0.181244990753315
+2111 2111  0.008236509823014474
+2870 2111 -8.765297393525904
+3486 2111  5.361468130704726
+4310 2111 -2.099407317666164
+2112 2112  0.009223257182620324
+2870 2112 -9.210837529294329
+3490 2112  4.513828312547935
+4312 2112 -2.099046152908578
+2113 2113  0.01050608747623768
+2870 2113 -8.314922294229779
+3494 2113  4.366310261693636
+4314 2113 -2.3232191764992
+2114 2114  0.01062497489801806
+2870 2114 -8.361552482805211
+3498 2114  3.956994704656382
+4316 2114 -2.229759078647565
+2115 2115  0.007948242671805029
+2870 2115 -10.50121760524715
+3502 2115  4.264900917453799
+4318 2115 -2.820119469264003
+2116 2116  0.002148314583414475
+2922 2116 -1351.12522936696
+3506 2116  3.247381743706517
+4320 2116 -0.1064731526095451
+2117 2117  0.03685169509823494
+2906 2117 -16.55199492375567
+3510 2117  6.185562461268164
+4322 2117  0.2139666338910703
+2118 2118  0.0137906417920731
+2884 2118 -19.04736438576279
+3514 2118  28.06040217986433
+4324 2118  0.1019487335374342
+2119 2119  0.009303998751781386
+2884 2119 -39.1308766868132
+3518 2119  12.01425377570906
+4326 2119 -0.2061528425704132
+2120 2120  0.02091228195574186
+2978 2120 -63.98358437997059
+3522 2120  5.945418404127518
+4328 2120 -0.1148860314163944
+2121 2121  0.008627475835805955
+2998 2121 -96.50997380438551
+3526 2121  9.103384773113829
+4330 2121 -0.1330170041161471
+2122 2122  0.009649886890828659
+3030 2122 -85.04968168489033
+3530 2122  9.438888577223668
+4332 2122 -0.1073622229111081
+2123 2123  0.01873664327863148
+2980 2123 -156.3984297145197
+3534 2123  6.190391211677775
+4334 2123 -0.05678315093978254
+2124 2124  0.005476574489081974
+3092 2124 -404.166050448459
+3538 2124  6.656168722454082
+4336 2124 -0.07021960246037766
+2125 2125  0.004729539689689114
+2890 2125 -341.0931018868103
+3542 2125  5.491666775418012
+4338 2125 -0.1186609141678331
+2126 2126  0.00366901957924182
+2890 2126 -301.9003736908056
+3546 2126  7.686660379443556
+4340 2126  0.1329277299929537
+2127 2127  0.002780578732839252
+3012 2127 -1809.384404896597
+3550 2127  5.61223414210605
+4342 2127 -0.04036045463481888
+2128 2128  0.002258023136854922
+3052 2128 -1583.280905696588
+3554 2128  4.159434126999496
+4344 2128  0.07225182247031578
+2129 2129  0.001932242375185583
+2968 2129 -1412.362136880344
+3558 2129  4.024185540800096
+4346 2129 -0.100034192619883
+2130 2130  0.002074025323621762
+2894 2130 -1512.250759457661
+3562 2130  3.218684483675813
+4348 2130 -0.1114576459576665
+2131 2131  0.01222844151572613
+2980 2131 -126.3034594719376
+3566 2131  7.954416979677193
+4350 2131  0.07850380857557879
+2132 2132  0.0006414415360330333
+2898 2132 -1021.939107874584
+3570 2132  3.17852537231288
+4352 2132  0.4916885932583254
+2133 2133  0.001414845742753739
+2898 2133 -1512.245711784602
+3574 2133  2.945059870904722
+4354 2133 -0.1586572171452398
+2134 2134  0.02207500168266791
+2906 2134 -12.85873395711873
+3578 2134  8.080073804395154
+4356 2134  0.3408694718928448
+2135 2135  0.0005029038477193737
+3142 2135 -949.4160584467323
+3582 2135  3.66926937307163
+4358 2135 -0.5859732481780078
+2136 2136  0.006006331416365539
+2902 2136 -334.71993668788
+3586 2136  8.170584099729947
+4360 2136  0.07509815561251482
+2137 2137  0.1602211799218243
+2906 2137 -4.68570583775738
+3590 2137  2.778013614142899
+4362 2137 -0.3323630168187701
+2138 2138  0.05302712387955065
+2906 2138 -6.569471674738725
+3594 2138  6.924495998814408
+4364 2138 -0.3680998962405606
+2139 2139  0.07200676148810309
+2906 2139 -7.378806665030903
+3598 2139  3.451189611415207
+4366 2139 -0.4581176150659059
+2140 2140  0.05962100119453918
+2906 2140 -6.726108868932145
+3602 2140  5.746025815755991
+4368 2140 -0.3724327269861961
+2141 2141  0.06956589248662981
+2906 2141 -6.982763315138396
+3606 2141  3.307664437943429
+4370 2141 -0.5067141365537682
+2142 2142  0.07023826370427121
+2906 2142 -7.016413577398498
+3610 2142  3.209214983593041
+4372 2142 -0.5042260023462158
+2143 2143  0.05871777369397748
+2906 2143 -6.151213853839583
+3614 2143  5.612374640258077
+4374 2143 -0.3677359737660884
+2144 2144  0.008602802752313498
+3090 2144 -131.9375715502208
+3618 2144  2.985129676088162
+4376 2144 -0.2998672521788097
+2145 2145  0.003108182426433724
+3154 2145 -1668.568919153097
+3622 2145  5.456336436211689
+4378 2145 -0.03257068910337899
+2146 2146  0.009265772232708086
+2924 2146 -9.759370161367945
+3626 2146  2.233014253505581
+4380 2146 -3.833955781960661
+2147 2147  0.01535943325292646
+2924 2147 -3.824469467733722
+3630 2147  1.985197878319526
+4382 2147 -6.252920651221183
+2148 2148  0.006608622451082367
+2924 2148 -22.0316225896654
+3634 2148  1.62655302961875
+4384 2148 -3.542334556735019
+2149 2149  0.005900417463837842
+2924 2149 -19.89518259071058
+3638 2149  2.124763369071786
+4386 2149 -3.407349241168243
+2150 2150  0.01023798290573628
+2924 2150 -93.26783624734638
+3642 2150  4.699502919587786
+4388 2150  0.1708955495131764
+2151 2151  0.004903868312070231
+3088 2151 -1875.357477626669
+3646 2151  3.482112602331831
+4390 2151 -0.03036476754524816
+2152 2152  0.00778313097281933
+2974 2152 -51.36676192902856
+3650 2152  14.16495761512215
+4392 2152  0.1578014166231115
+2153 2153  0.001652995322303571
+3060 2153 -1384.758308839667
+3654 2153  5.483147129036918
+4394 2153 -0.09193233690226214
+2154 2154  0.001812794973973293
+2936 2154 -214.5852170173555
+3658 2154  13.13678521227168
+4396 2154 -0.1683908418346857
+2155 2155  0.003304023126536034
+3130 2155 -399.3472933098862
+3662 2155  6.819550808326079
+4398 2155 -0.116110806968134
+2156 2156  0.009671103241031176
+3164 2156 -94.29553748866022
+3666 2156  3.716088592076674
+4400 2156  0.2497754017512982
+2157 2157  0.004280615664270764
+3066 2157 -354.5380781683582
+3670 2157  5.213544230958012
+4402 2157 -0.1293710693667469
+2158 2158  0.002931883492565051
+3146 2158 -289.7082847871828
+3674 2158  5.360504399026522
+4404 2158  0.2293733406665809
+2159 2159  0.008254928556956509
+3000 2159 -126.9054310671548
+3678 2159  8.34943451090035
+4406 2159 -0.1009386461263404
+2160 2160  0.00169962922165525
+3014 2160 -1513.186311996676
+3682 2160  3.919110652589746
+4408 2160 -0.1309259113047055
+2161 2161  0.02142420942973285
+2976 2161 -66.74817920758088
+3686 2161  11.18534751112075
+4410 2161  0.05669785722272903
+2162 2162  0.007592017929505716
+2946 2162 -114.4793517963469
+3690 2162  4.679317810446525
+4412 2162 -0.2486847965028564
+2163 2163  0.01239979486593527
+3160 2163 -33.91929258744634
+3694 2163  10.96265801745419
+4414 2163  0.1940686199464453
+2164 2164  0.0008416286799556312
+2948 2164 -1222.524005166853
+3698 2164  8.062081654679709
+4416 2164  0.1044236949304687
+2165 2165  0.002872457779933695
+2948 2165 -2603.281852933697
+3702 2165  1.251333503471155
+4418 2165 -0.1170091683665216
+2166 2166  0.008928425033709206
+3074 2166 -114.9637094411854
+3706 2166  4.888571989846326
+4420 2166  0.1612239847733691
+2167 2167  0.00933669679110603
+2950 2167 -121.8957475389813
+3710 2167  4.206946386074862
+4422 2167  0.181437742187558
+2168 2168  0.005754627427048083
+3162 2168 -327.4234482652935
+3714 2168  3.930409653834767
+4424 2168  0.1150695611031082
+2169 2169  0.006397495182738116
+2952 2169 -6.24756560403434
+3718 2169  5.368197347599874
+4426 2169 -3.573289293030423
+2170 2170  0.00412519236349653
+2952 2170 -18.89808564694528
+3722 2170  3.784571749614252
+4428 2170 -2.916757791234899
+2171 2171  0.004047158334189482
+2952 2171 -20.62915659632829
+3726 2171  3.411100769889254
+4430 2171 -2.948494780789273
+2172 2172  0.002132344920593365
+2964 2172 -1506.397431951823
+3730 2172  3.087002648539354
+4432 2172 -0.1031938954442674
+2173 2173  0.007972301064410235
+2952 2173 -170.023137662953
+3734 2173  4.650926558563436
+4434 2173 -0.1521679999018858
+2174 2174  0.008695987757307259
+2960 2174 -45.7592083489813
+3738 2174  4.858814960201284
+4436 2174 -0.5529352148144631
+2175 2175  0.001178830356129778
+3156 2175 -1346.893561357031
+3742 2175  5.009999875255914
+4438 2175 -0.1448367639545626
+2176 2176  0.005211806473068219
+2966 2176 -347.1985023913132
+3746 2176  6.114912483766074
+4440 2176 -0.08952016961860668
+2177 2177  0.001920170686230696
+3060 2177 -1480.003424652446
+3750 2177  4.567303761279918
+4442 2177 -0.08472478198026717
+2178 2178  0.006704128721188893
+3150 2178 -108.7083889381549
+3754 2178  8.096733468146425
+4444 2178  0.1421494758907888
+2179 2179  0.002237503450970262
+3174 2179 -1519.469895890335
+3758 2179  4.213824656027188
+4446 2179 -0.07706700524990751
+2180 2180  0.001045466262769684
+2970 2180 -1166.753340022242
+3762 2180  5.680389346472349
+4448 2180  0.1444473955056696
+2181 2181  0.01245643296977996
+3020 2181 -32.32185579454295
+3766 2181  8.680597597954014
+4450 2181  0.2255429562296098
+2182 2182  0.001726283305420655
+3058 2182 -1436.006295719475
+3770 2182  3.127037822968334
+4452 2182 -0.151870667599087
+2183 2183  0.006058149703100444
+3122 2183 -313.4339444900776
+3774 2183  7.963542386205262
+4454 2183 -0.06474292354903258
+2184 2184  0.004750113430816235
+3132 2184 -300.1393535459937
+3778 2184  5.42788620866588
+4456 2184  0.1133867662473916
+2185 2185  0.001139072744127021
+3142 2185 -1428.191459747777
+3782 2185  5.878917673856323
+4458 2185  0.1015023800057121
+2186 2186  0.0140099358624532
+2976 2186 -54.00562981105288
+3786 2186  4.67546375941967
+4460 2186 -0.264586927583474
+2187 2187  0.01648814929963461
+3008 2187 -185.2524967505786
+3790 2187  7.522783604978186
+4462 2187  0.03932985943560216
+2188 2188  0.007739884657506192
+3056 2188 -128.594774536177
+3794 2188  5.430042622363441
+4464 2188  0.1733751342852024
+2189 2189  0.01107704644766072
+2978 2189 -29.28485320635267
+3798 2189  24.01494552657894
+4466 2189  0.104793236898054
+2190 2190  0.02770173624585955
+2978 2190 -20.84064754660397
+3802 2190  10.64775227529997
+4468 2190  0.1437033518738441
+2191 2191  0.01765991222836373
+2978 2191 -36.23593022245935
+3806 2191  13.36990297318379
+4470 2191  0.112469362197627
+2192 2192  0.01267346837506577
+3030 2192 -94.41087219520271
+3810 2192  8.861752777781344
+4472 2192 -0.07884724198772149
+2193 2193  0.003114487333054934
+2986 2193 -1519.09897684033
+3814 2193  5.355974370748386
+4474 2193 -0.04271857028849171
+2194 2194  0.003695072696773231
+2986 2194 -1656.528852030899
+3818 2194  6.185139221633936
+4476 2194  0.02752220267509899
+2195 2195  0.006392453421471818
+3000 2195 -111.9988300228975
+3822 2195  6.809172415733807
+4478 2195  0.1929820576103272
+2196 2196  0.001883726199993154
+3032 2196 -1596.142391375567
+3826 2196  3.09008802429738
+4480 2196  0.1250493243013468
+2197 2197  0.005400958558659237
+2988 2197 -355.4846212830439
+3830 2197  4.499074366596648
+4482 2197  0.1148699713737675
+2198 2198  0.001065127518678436
+3170 2198 -1143.60129135068
+3834 2198  4.804865036708031
+4484 2198 -0.1615176016313629
+2199 2199  0.003398276739297626
+2992 2199 -1753.0575043452
+3838 2199  5.806460716784121
+4486 2199 -0.03688188469150969
+2200 2200  0.01373119140820175
+3160 2200 -35.70217191761186
+3842 2200  7.199818137765514
+4488 2200  0.2532649928913371
+2201 2201  0.001800943508559765
+2996 2201 -1506.533916325817
+3846 2201  1.926406935099201
+4490 2201 -0.1982479163957737
+2202 2202  0.001215285839406951
+3124 2202 -1232.001050341556
+3850 2202  3.965156627189976
+4492 2202 -0.1624424533450675
+2203 2203  0.003445552970141769
+2998 2203 -52.15381884035941
+3854 2203  31.00680216081683
+4494 2203  0.1742097470834353
+2204 2204  0.1319138366683021
+2998 2204 -18.93525889233717
+3858 2204  12.60241045455382
+4496 2204  0.02300115730517048
+2205 2205  0.02390999892652716
+2998 2205 -80.38564175531182
+3862 2205  15.41343460037283
+4498 2205  0.034635368896544
+2206 2206  0.007563606211267081
+2998 2206 -91.28444390040092
+3866 2206  3.832594921604338
+4500 2206 -0.3911929626725137
+2207 2207  0.004660300321219048
+3158 2207 -303.9354889996754
+3870 2207  4.159018631162431
+4502 2207 -0.1883580091188502
+2208 2208  0.0004449483101018156
+3006 2208 -1016.038939862732
+3874 2208  14.40382534397125
+4504 2208  0.1530160829243506
+2209 2209  0.001058231373611423
+3006 2209 -2116.689327493415
+3878 2209  3.015252189670739
+4506 2209 -0.1612767817800078
+2210 2210  0.0006992246679674015
+3038 2210 -1043.800098785419
+3882 2210  3.00679695511697
+4508 2210 -0.5300049615928422
+2211 2211  0.001476771414936002
+3098 2211 -1306.498645974566
+3886 2211  5.149209964509931
+4510 2211 -0.1104715392797194
+2212 2212  0.004400588120779132
+3010 2212 -324.405753550522
+3890 2212  4.306287197091149
+4512 2212  0.1611418556604022
+2213 2213  0.00860441933472752
+3042 2213 -56.95477588403464
+3894 2213  9.801414691082627
+4514 2213  0.179026380922564
+2214 2214  0.002145009099397701
+3170 2214 -1622.883696919502
+3898 2214  2.948617038264048
+4516 2214  0.1094120254077982
+2215 2215  0.004460848311151768
+3016 2215 -67.84254400099019
+3902 2215  14.64089060417686
+4518 2215  0.1880559437330346
+2216 2216  0.004557092943777746
+3130 2216 -456.729207514713
+3906 2216  8.974758681364245
+4520 2216 -0.05312771678709906
+2217 2217  0.01450982405114899
+3016 2217 -193.8165634075667
+3910 2217  2.541648003046831
+4522 2217 -0.1325528534387045
+2218 2218  0.005157071009933244
+3020 2218 -19.59324371202498
+3914 2218  28.46767710922084
+4524 2218  0.2730372422608475
+2219 2219  0.02612786147197997
+3020 2219 -19.41412540093749
+3918 2219  6.6028144804622
+4526 2219 -0.2765679639928379
+2220 2220  0.02478823388272888
+3020 2220 -18.56084480886999
+3922 2220  4.975973020678023
+4528 2220 -0.4251407410310105
+2221 2221  0.02142613560740973
+3020 2221 -16.81796495425821
+3926 2221  4.220607505708247
+4530 2221 -0.5372060661120606
+2222 2222  0.0427789483906531
+3040 2222 -44.43020643761637
+3930 2222  5.614276394514303
+4532 2222 -0.07388768653768076
+2223 2223  0.0234935444047887
+3020 2223 -44.64235389450946
+3934 2223  3.006008494629031
+4534 2223  0.2566908442587441
+2224 2224  0.01322498499950834
+3160 2224 -34.58560243700298
+3938 2224  8.023915727134943
+4536 2224 -0.275051179212371
+2225 2225  0.005998510819612981
+3030 2225 -38.53172034872582
+3942 2225  13.68747505294833
+4538 2225  0.2617645898062311
+2226 2226  0.007812900721490596
+3040 2226 -18.52397709846915
+3946 2226  16.66976710007537
+4540 2226  0.2828527186687306
+2227 2227  0.001678768597205332
+3062 2227 -1350.897822356165
+3950 2227  7.311204366461816
+4542 2227  0.06450870336342215
+2228 2228  0.009275613422783751
+3040 2228 -20.32550666111602
+3954 2228  13.07420285267797
+4544 2228  0.3020557569469072
+2229 2229  0.01016168905078159
+3040 2229 -31.17736956735129
+3958 2229  8.467858774888629
+4546 2229 -0.4149278867504431
+2230 2230  0.00996722449751054
+3040 2230 -30.83346999517691
+3962 2230  7.780959123178358
+4548 2230 -0.4419310962134438
+2231 2231  0.03948697092465923
+3040 2231 -42.77542700610918
+3966 2231  7.508760202322218
+4550 2231  0.05570908603588222
+2232 2232  0.001677191552912289
+3054 2232 -1428.515318978645
+3970 2232  4.168085658793879
+4552 2232 -0.114079576000751
+2233 2233  0.002027970847867968
+3146 2233 -242.1780927811974
+3974 2233  6.255402866825885
+4554 2233 -0.3347632371841684
+2234 2234  0.001537781149854484
+3052 2234 -1303.604343599536
+3978 2234  6.071785803494002
+4556 2234 -0.09046043792228446
+2235 2235  0.01108540321534985
+3164 2235 -102.3315584238236
+3982 2235  5.339475922255236
+4558 2235  0.1483671939561078
+2236 2236  0.001409923238876857
+3070 2236 -1333.656774685993
+3986 2236  5.311692012444222
+4560 2236 -0.1158650579574214
+2237 2237  0.01577747778749642
+3160 2237 -38.23756381734963
+3990 2237  6.756753262704435
+4562 2237  0.2229181676645297
+2238 2238  0.002314762263413514
+3062 2238 -1591.552719889067
+3994 2238  2.951842078980999
+4564 2238 -0.1086062631708077
+2239 2239  0.001768071828684483
+3126 2239 -1384.784060943572
+3998 2239  4.313636411491303
+4566 2239  0.09873935578019329
+2240 2240  0.002309866592158181
+3064 2240 -1561.56620828576
+4002 2240  3.53773728948649
+4568 2240 -0.09297121279972072
+2241 2241  0.006737745131675257
+3066 2241 -238.5976480186172
+4006 2241  6.391215314600386
+4570 2241  0.1005481075389927
+2242 2242  0.003851572691296312
+3128 2242 -316.8133947953895
+4010 2242  7.904228210941027
+4572 2242 -0.1107176446094387
+2243 2243  0.01207958502317402
+3072 2243 -32.29231243943666
+4014 2243  11.09669376957527
+4574 2243  0.1812633458304332
+2244 2244  0.08054258955875965
+3072 2244 -4.08462782159169
+4018 2244  5.359216924491829
+4576 2244 -0.3743197183632451
+2245 2245  0.03578117245295978
+3072 2245 -13.92596672443947
+4022 2245  4.316855635510668
+4578 2245 -0.4207031498800982
+2246 2246  0.0368359311789389
+3072 2246 -14.22224897443281
+4026 2246  4.726445981799073
+4580 2246 -0.3589957638732138
+2247 2247  0.03116076015736728
+3072 2247 -12.94220849005653
+4030 2247  4.245709838291275
+4582 2247 -0.4669064306323872
+2248 2248  0.03127325765942
+3072 2248 -12.95883236672648
+4034 2248  4.154176556399479
+4584 2248 -0.4733234783700364
+2249 2249  0.007143262340257282
+3074 2249 -103.0305453695494
+4038 2249  7.399370929771859
+4586 2249  0.1603305174549954
+2250 2250  0.01064154757997323
+3074 2250 -123.3658773366547
+4042 2250  4.221315980120219
+4588 2250  0.1358268290729027
+2251 2251  0.002359860273289677
+3086 2251 -212.104160974466
+4046 2251  16.56692854796862
+4590 2251  0.08923648311862642
+2252 2252  0.001086849801189867
+3094 2252 -160.9760242098117
+4050 2252  1.876776993784172
+4592 2252 -3.455017245353995
+2253 2253  0.01577200707147027
+3136 2253 -92.01703556881691
+4054 2253  3.081655102447302
+4594 2253 -0.1994726820739341
+2254 2254  0.008627090183268422
+3100 2254 -6.210870714987588
+4058 2254  3.707071367433247
+4596 2254 -3.656761234719059
+2255 2255  0.007620531270145355
+3100 2255 -6.513390408795219
+4062 2255  5.200657162878421
+4598 2255 -3.798595591223752
+2256 2256  0.009230750663652134
+3100 2256 -4.640420835507132
+4066 2256  5.533488946326441
+4600 2256 -3.482295420824327
+2257 2257  0.009628675897545392
+3100 2257 -4.738916599895016
+4070 2257  4.168910055156443
+4602 2257 -3.913587096844168
+2258 2258  0.008026467514725346
+3100 2258 -4.118661769723557
+4074 2258  7.127509675415745
+4604 2258 -3.355021230629743
+2259 2259  0.009400950653362484
+3100 2259 -4.457141098502226
+4078 2259  5.54551145364278
+4606 2259 -3.540289155598937
+2260 2260  0.009115319160917641
+3100 2260 -3.898893283827271
+4082 2260  5.889447827116065
+4608 2260 -3.576670360769923
+2261 2261  0.006183100582157816
+3132 2261 -337.8295892423019
+4086 2261  3.371992084196615
+4610 2261 -0.1365022792253678
+2262 2262  0.001982202309389708
+3154 2262 -1331.391267344377
+4090 2262  3.419875832853283
+4612 2262 -0.1226762451022061
+2263 2263  0.001722020163455822
+3116 2263 -1422.759825760848
+4094 2263  3.489728875483112
+4614 2263 -0.125616171688122
+2264 2264  0.001488490442629464
+3118 2264 -1285.913026873232
+4098 2264  6.35673096494675
+4616 2264 -0.09093864764125277
+2265 2265  0.001808133082679544
+3118 2265 -1416.901510606822
+4102 2265  9.085015566293682
+4618 2265 -0.04803578302891041
+2266 2266  0.008622786701664917
+3122 2266 -374.8291340849071
+4106 2266  9.53046694058189
+4620 2266 -0.03343584586800835
+2267 2267  0.001791661921895718
+3124 2267 -1495.038796569758
+4110 2267  4.142922682425776
+4622 2267  0.09567817198065209
+2268 2268  0.009192577866867101
+3150 2268 -125.4140836404946
+4114 2268  14.33869690694082
+4624 2268  0.05102946065995515
+2269 2269  0.004555864744459201
+3130 2269 -272.8671228494698
+4118 2269  8.96941517920289
+4626 2269  0.08226341234101277
+2270 2270  0.0008465650095658808
+3156 2270 -1141.695109156205
+4122 2270  7.188740199011378
+4628 2270  0.1479853343471768
+2271 2271  0.0119801205028469
+3172 2271 -447.3631435643646
+4126 2271  9.415073365629931
+4630 2271  0.0208759382523774
+2272 2272  0.02492113327360181
+3136 2272 -30.20856579477716
+4130 2272  4.497507478986273
+4632 2272 -0.2824387372225564
+2273 2273  0.02142005947727858
+3136 2273 -28.4454237197186
+4134 2273  3.634792913757748
+4634 2273 -0.364407399913329
+2274 2274  0.006100055330023413
+3162 2274 -335.1570659698852
+4138 2274  3.326033848414284
+4636 2274 -0.1308230800711792
+2275 2275  0.002971289788738846
+3148 2275 -98.27206692230219
+4142 2275  15.58372198199141
+4638 2275  0.1680855058326074
+2276 2276  0.009641291447598741
+3148 2276 -218.9673369039903
+4146 2276  2.213150142494187
+4640 2276 -0.2411266255595946
+2277 2277  0.003455105439651116
+3164 2277 -22.04467690843927
+4150 2277  2.683371796432915
+4642 2277 -3.811722119467089
+2278 2278  0.002455886901999718
+3164 2278 -19.15705378024538
+4154 2278  4.758341632139925
+4644 2278 -4.273666966017419
+2279 2279  0.1119239523365642
+4155 2279 -30.64257011981108
+2280 2280  0.1344983767680533
+4156 2280 -15.90706277519701
+2281 2281  0.05725605428029161
+4157 2281 -39.31435081710389
+2282 2282  0.05216171918870285
+4158 2282 -53.45947004812159
+2283 2283  0.1539694825415593
+4159 2283 -12.56288032466136
+2284 2284  0.1493781664609394
+4160 2284 -13.62587967120271
+2285 2285  0.1275567881938274
+4161 2285 -14.84344573266865
+2286 2286  0.1221572123415935
+4162 2286 -17.5348042025758
+2287 2287  0.05457593896858125
+4163 2287 -65.95158912186295
+2288 2288  0.06321218681720535
+4164 2288 -38.93574330129159
+2289 2289  0.06186778636566236
+4165 2289 -46.43966016640264
+2290 2290  0.06579250414222838
+4166 2290 -37.97891276037357
+2291 2291  0.07459638214306399
+4167 2291 -32.44683207965974
+2292 2292  0.07089844289995044
+4168 2292 -39.5091491257876
+2293 2293  0.1132257544445178
+4169 2293 -31.1299517046727
+2294 2294  0.1311574879890062
+4170 2294 -18.48285697955544
+2295 2295  0.1250942117543966
+4171 2295 -13.83350837304568
+2296 2296  0.1124796744211208
+4172 2296 -20.00745980162628
+2297 2297  0.1091433013247975
+4173 2297 -22.76489333995555
+2298 2298  0.1068428344512747
+4174 2298 -23.20868313310848
+2299 2299  0.1950373653598296
+4175 2299 -16.39949055853167
+2300 2300  0.234442695629943
+4176 2300 -8.433072185518855
+2301 2301  0.09286348782294103
+4177 2301 -24.12392710831048
+2302 2302  0.09295081632350155
+4178 2302 -23.44721624588622
+2303 2303  0.1160167901159831
+4179 2303 -18.65225809968672
+2304 2304  0.1195639195240917
+4180 2304 -16.41237497028351
+2305 2305  0.08227428276048966
+4181 2305 -26.1956792336746
+2306 2306  0.07476050888274532
+4182 2306 -36.11162541606858
+2307 2307  0.2549964732720557
+4183 2307 -12.26843876024679
+2308 2308  0.3147948877353467
+4184 2308 -5.60785504010041
+2309 2309  0.1499916722253693
+4185 2309 -12.72775567793986
+2310 2310  0.1510934076781992
+4186 2310 -13.02604478229861
+2311 2311  0.02229437531027123
+4187 2311 -129.8752387098407
+2312 2312  0.02512228842644491
+4188 2312 -86.07489921163659
+2313 2313  0.05259461324228189
+4189 2313 -41.9565272197917
+2314 2314  0.05122115361898756
+4190 2314 -45.97094678900272
+2315 2315  0.06314786189660278
+4191 2315 -33.18669707365662
+2316 2316  0.06454617756571765
+4192 2316 -31.12915756520504
+2317 2317  0.02451291342972035
+4193 2317 -142.3054925634301
+2318 2318  0.03023588270107117
+4194 2318 -68.22390452124144
+2319 2319  0.06515053384464752
+4195 2319 -53.6144848893498
+2320 2320  0.08036398379489164
+4196 2320 -25.16934902030714
+2321 2321  0.1207354096769008
+4197 2321 -16.85797529931353
+2322 2322  0.111247620215162
+4198 2322 -22.25527628315144
+2323 2323  0.0598464422176723
+4199 2323 -31.89341246694178
+2324 2324  0.05883639375784257
+4200 2324 -34.63780504233422
+2325 2325  0.1041261644708809
+4201 2325 -19.56367976914622
+2326 2326  0.1059349161469206
+4202 2326 -18.10501936478283
+2327 2327  0.1067434958432255
+4203 2327 -23.14455148007878
+2328 2328  0.107783758246107
+4204 2328 -22.69925480783547
+2329 2329  0.1318433256281249
+4205 2329 -17.70174015534158
+2330 2330  0.1023926470171234
+4206 2330 -21.45404488617399
+2331 2331  0.1174606480262585
+4207 2331 -17.63733576375392
+2332 2332  0.1370406035465139
+4208 2332 -11.7352471887321
+2333 2333  0.2598244585599962
+4209 2333 -12.13240977183652
+2334 2334  0.3011457380652403
+4210 2334 -7.139021306502358
+2335 2335  0.03529553863313488
+4211 2335 -107.2257743582197
+2336 2336  0.04440104766664882
+4212 2336 -47.10985428696654
+2337 2337  0.03807030882715409
+4213 2337 -99.89329952085217
+2338 2338  0.04789165040582281
+4214 2338 -43.849780178493
+2339 2339  0.04523501309428279
+4215 2339 -82.05099007645858
+2340 2340  0.05690513248832715
+4216 2340 -35.88141193511292
+2341 2341  0.08484703303098966
+4217 2341 -43.98879537092815
+2342 2342  0.1067434556159296
+4218 2342 -18.96707230539594
+2343 2343  0.05359243482324032
+4219 2343 -56.56046644171085
+2344 2344  0.06317838635427207
+4220 2344 -33.41745949104828
+2345 2345  0.06964684548079618
+4221 2345 -30.17627002650658
+2346 2346  0.06090938912384517
+4222 2346 -49.61405821049816
+2347 2347  0.05047735368336375
+4223 2347 -71.73978108877516
+2348 2348  0.06065143810652002
+4224 2348 -36.41609607105951
+2349 2349  0.06527171143018294
+4225 2349 -54.57250563862199
+2350 2350  0.07842905683946086
+4226 2350 -27.55492217173241
+2351 2351  0.01648867989436317
+4227 2351 -141.187307118909
+2352 2352  0.01645744857919768
+4228 2352 -139.724744622845
+2353 2353  0.04140589344375343
+4229 2353 -54.42625553131974
+2354 2354  0.04137477168029266
+4230 2354 -53.63654595730242
+2355 2355  0.05024271505377079
+4231 2355 -72.83364223501292
+2356 2356  0.05819292819446779
+4232 2356 -43.25696538963752
+2357 2357  0.08644731178445782
+4233 2357 -27.39216973415159
+2358 2358  0.0881774354285155
+4234 2358 -27.18217755332223
+2359 2359  0.07854956718479483
+4235 2359 -29.56194702569874
+2360 2360  0.08088629591679292
+4236 2360 -29.09591654321273
+2361 2361  0.06196352242558906
+4237 2361 -49.02162975759376
+2362 2362  0.07445393853436093
+4238 2362 -24.45068261782124
+2363 2363  0.02227190700963811
+4239 2363 -89.42768025930282
+2364 2364  0.02210796656752718
+4240 2364 -93.18051438131391
+2365 2365  0.03053677007493292
+4241 2365 -67.3680095180724
+2366 2366  0.03055031791921223
+4242 2366 -69.2391511776303
+2367 2367  0.3227402014045704
+4243 2367 -9.491059373455291
+2368 2368  0.3742462296885684
+4244 2368 -5.581731936811549
+2369 2369  0.1495189977985396
+4245 2369 -12.98416731930361
+2370 2370  0.1396576915609483
+4246 2370 -16.37502476196387
+2371 2371  0.0959600196445011
+4247 2371 -24.74624389281993
+2372 2372  0.09160013610325606
+4248 2372 -29.26099129371331
+2373 2373  0.03238831682088406
+4249 2373 -116.5520245666077
+2374 2374  0.03891597628974513
+4250 2374 -60.67747015800169
+2375 2375  0.1040498288515364
+4251 2375 -35.17526953063072
+2376 2376  0.1250335344962033
+4252 2376 -17.85544888919775
+2377 2377  0.07354424421148734
+4253 2377 -33.99037882808334
+2378 2378  0.07573419554041499
+4254 2378 -29.8305087797851
+2379 2379  0.03740432360218568
+4255 2379 -71.94298278001283
+2380 2380  0.03950027041496559
+4256 2380 -61.46922915998202
+2381 2381  0.1731743135770299
+4257 2381 -18.66777584361901
+2382 2382  0.2006420120224123
+4258 2382 -11.00044392597633
+2383 2383  0.123989508586981
+4259 2383 -17.48331613863921
+2384 2384  0.1249604166473383
+4260 2384 -17.19494524360543
+2385 2385  0.07784203640661237
+4261 2385 -30.70003257734891
+2386 2386  0.07330592731963612
+4262 2386 -36.85462203202088
+2387 2387  0.08028777006742899
+4263 2387 -47.65157494137991
+2388 2388  0.0964743360402416
+4264 2388 -25.01333265604174
+2389 2389  0.2190258722049764
+4265 2389 -15.75639783117426
+2390 2390  0.2633272411692072
+4266 2390 -8.087445797244316
+2391 2391  0.06957909845609414
+4267 2391 -36.17015837528741
+2392 2392  0.07030012238977493
+4268 2392 -33.28326480640938
+2393 2393  0.09973395782747847
+4269 2393 -26.11298173680401
+2394 2394  0.1031681699564553
+4270 2394 -22.92312256359989
+2395 2395  0.05962657655380851
+4271 2395 -56.97774952449042
+2396 2396  0.06906218356084276
+4272 2396 -33.52104777869483
+2397 2397  0.04571341958096362
+4273 2397 -52.94756993572931
+2398 2398  0.04323898123382816
+4274 2398 -64.26911966105814
+2399 2399  0.08386625845421411
+4275 2399 -26.98015626399983
+2400 2400  0.08378225446442003
+4276 2400 -27.22391958877084
+2401 2401  0.1122242528985911
+4277 2401 -19.76131076987808
+2402 2402  0.1188666281484007
+4278 2402 -16.01743515162783
+2403 2403  0.1045633964079625
+4279 2403 -19.43122435760818
+2404 2404  0.1190321852045049
+4280 2404 -12.23386114811299
+2405 2405  0.03275918527410111
+4281 2405 -73.10925705238337
+2406 2406  0.03290823451403464
+4282 2406 -64.22433096257478
+2407 2407  0.05982483436476562
+4283 2407 -42.25947272070731
+2408 2408  0.0599994371975387
+4284 2408 -42.5796885142234
+2409 2409  0.1010965789427608
+4285 2409 -17.39127207242933
+2410 2410  0.08945263200161861
+4286 2410 -25.68769827203282
+2411 2411  0.09794072646952565
+4287 2411 -26.68291634429617
+2412 2412  0.09609845781539586
+4288 2412 -27.21898062309888
+2413 2413  0.09478081423298597
+4289 2413 -25.76998281146062
+2414 2414  0.09682007347625202
+4290 2414 -24.86767301361041
+2415 2415  0.04439899649821998
+4291 2415 -58.10296868779811
+2416 2416  0.04432167070785589
+4292 2416 -57.88228387547031
+2417 2417  0.09795233791467686
+4293 2417 -22.81351654840187
+2418 2418  0.1009310156598904
+4294 2418 -20.38105920054591
+2419 2419  0.1494082769805102
+4295 2419 -12.73795579528641
+2420 2420  0.1477107284697679
+4296 2420 -14.69078715887743
+2421 2421  0.1347570443534146
+4297 2421 -14.44408841920709
+2422 2422  0.1301057826785725
+4298 2422 -16.85321717357648
+2423 2423  0.2109922133663125
+4299 2423 -9.712758796207535
+2424 2424  0.2216757858079083
+4300 2424 -9.389773414469078
+2425 2425  0.1009088790211743
+4301 2425 -25.11095667227453
+2426 2426  0.1077363422537471
+4302 2426 -20.66188603221291
+2427 2427  0.08121814441222064
+4303 2427 -27.06081236628384
+2428 2428  0.08100157620017676
+4304 2428 -27.07076287154962
+2429 2429  0.07146136906804773
+4305 2429 -34.23717637845797
+2430 2430  0.07530760103938473
+4306 2430 -28.46510147456754
+2431 2431  0.0756830656817377
+4307 2431 -30.28857007982523
+2432 2432  0.09186430414464557
+4308 2432 -15.86269138512577
+2433 2433  0.02611063867260898
+4309 2433 -77.63028018021609
+2434 2434  0.03315490509683634
+4310 2434 -41.22285965712726
+2435 2435  0.02697755251935119
+4311 2435 -75.31457172347281
+2436 2436  0.03463901097796575
+4312 2436 -39.42580673521844
+2437 2437  0.02832518569906331
+4313 2437 -66.76292516277346
+2438 2438  0.03595292820557699
+4314 2438 -35.37814580251727
+2439 2439  0.03000021100359908
+4315 2439 -66.63623857562646
+2440 2440  0.03852461104579297
+4316 2440 -34.79046888868552
+2441 2441  0.02505955443137283
+4317 2441 -70.10959107942233
+2442 2442  0.03009113666214551
+4318 2442 -42.54586097045568
+2443 2443  0.1104038318847466
+4319 2443 -14.59114960552235
+2444 2444  0.09409028096243087
+4320 2444 -25.00017435761698
+2445 2445  0.1082117185579281
+4321 2445 -17.87480776263565
+2446 2446  0.1246628009126823
+4322 2446 -11.25720746291567
+2447 2447  0.04180879219292764
+4323 2447 -65.97984348957961
+2448 2448  0.0481218504922679
+4324 2448 -41.15873081988094
+2449 2449  0.1133121138548909
+4325 2449 -20.94431514667631
+2450 2450  0.1211892095724366
+4326 2450 -15.92440412543586
+2451 2451  0.1319492973893008
+4327 2451 -14.55529250850832
+2452 2452  0.1295603694183773
+4328 2452 -15.05159484553243
+2453 2453  0.1237672779717439
+4329 2453 -15.49210742892535
+2454 2454  0.1190191866407344
+4330 2454 -16.86689236335236
+2455 2455  0.1234473604947949
+4331 2455 -16.12647392997589
+2456 2456  0.1170303315506402
+4332 2456 -18.07799191201674
+2457 2457  0.03099330087280881
+4333 2457 -87.48075773456222
+2458 2458  0.03389257841306987
+4334 2458 -63.51574285277481
+2459 2459  0.05172445215334955
+4335 2459 -52.8716550775216
+2460 2460  0.05430350869804322
+4336 2460 -43.40343426461138
+2461 2461  0.09424770419016756
+4337 2461 -26.738027357087
+2462 2462  0.09826661486038374
+4338 2462 -21.96349363409914
+2463 2463  0.0462401459381085
+4339 2463 -50.66661535986736
+2464 2464  0.04627964277352791
+4340 2464 -50.64878861975378
+2465 2465  0.05197455058635655
+4341 2465 -51.18764721528638
+2466 2466  0.0500773202512933
+4342 2466 -52.99960506788916
+2467 2467  0.0720527625538885
+4343 2467 -34.82199055582354
+2468 2468  0.07323693626014423
+4344 2468 -34.54423303070557
+2469 2469  0.06963929261539442
+4345 2469 -35.71241339576211
+2470 2470  0.06931508368711226
+4346 2470 -35.52512736400956
+2471 2471  0.09169519061656187
+4347 2471 -20.01915962930323
+2472 2472  0.08112062356808004
+4348 2472 -30.22720745240589
+2473 2473  0.04719669980974917
+4349 2473 -57.69560648218724
+2474 2474  0.05210915501845393
+4350 2474 -40.73513928545416
+2475 2475  0.0313882444475996
+4351 2475 -77.65714066116759
+2476 2476  0.02988204032213718
+4352 2476 -94.69922254722279
+2477 2477  0.1182734942483623
+4353 2477 -17.51506605655669
+2478 2478  0.1116440029982784
+4354 2478 -21.37522400115791
+2479 2479  0.07849826606941687
+4355 2479 -28.31087433954066
+2480 2480  0.09515591336471275
+4356 2480 -15.06418166640937
+2481 2481  0.02425323629999322
+4357 2481 -112.4426933973505
+2482 2482  0.02374202363688017
+4358 2482 -114.7794462571026
+2483 2483  0.003336217141762166
+4359 2483 -1058.66302267636
+2484 2484  0.1002942709937806
+4360 2484 -20.57374819150079
+2485 2485  0.1045980986716633
+4361 2485 -14.42988051776977
+2486 2486  0.1450248547856023
+4362 2486 -7.306137981052756
+2487 2487  0.07893767286330232
+4363 2487 -19.0892306599637
+2488 2488  0.1110355041530463
+4364 2488 -9.897416463906245
+2489 2489  0.1056165886407372
+4365 2489 -14.15628764799962
+2490 2490  0.1469110448440436
+4366 2490 -7.445942181911135
+2491 2491  0.0844548826318236
+4367 2491 -17.88569115961042
+2492 2492  0.1179620616930775
+4368 2492 -9.352570774505041
+2493 2493  0.1163895966138301
+4369 2493 -12.90605807444327
+2494 2494  0.1619022871289025
+4370 2494 -6.780155483318999
+2495 2495  0.1179126051390297
+4371 2495 -12.84963493329398
+2496 2496  0.163943606815921
+4372 2496 -6.754664202360627
+2497 2497  0.09281544971650256
+4373 2497 -17.05542913502588
+2498 2498  0.1291673121601658
+4374 2498 -8.949835317480224
+2499 2499  0.1091027386322047
+4375 2499 -13.22126092554109
+2500 2500  0.09868634510820998
+4376 2500 -17.94565770436443
+2501 2501  0.03138171724828276
+4377 2501 -88.03377071956065
+2502 2502  0.03107201824492463
+4378 2502 -88.64391292782304
+2503 2503  0.007046922610030895
+4379 2503 -246.2252886607093
+2504 2504  0.01101660630146964
+4380 2504 -109.1007957172354
+2505 2505  0.01058157400013313
+4381 2505 -142.1356981120592
+2506 2506  0.01148750825962296
+4382 2506 -91.07653645375166
+2507 2507  0.006443500536982267
+4383 2507 -290.0185142498619
+2508 2508  0.01548335127494408
+4384 2508 -83.51485409842199
+2509 2509  0.00597458391786553
+4385 2509 -311.0119847471854
+2510 2510  0.01356286245237532
+4386 2510 -95.15381296120231
+2511 2511  0.08067632449851583
+4387 2511 -26.58818103268005
+2512 2512  0.09279367829967604
+4388 2512 -15.77787777432359
+2513 2513  0.05352478163276006
+4389 2513 -28.09789857924424
+2514 2514  0.04290798273430511
+4390 2514 -51.51652538609446
+2515 2515  0.06249995882762888
+4391 2515 -41.49391694494913
+2516 2516  0.07116953203564343
+4392 2516 -27.01589354302817
+2517 2517  0.0629850732263631
+4393 2517 -38.65350036810663
+2518 2518  0.06201074516854403
+4394 2518 -39.56708513242784
+2519 2519  0.1388172847349383
+4395 2519 -18.86848979808568
+2520 2520  0.1387155232393041
+4396 2520 -16.18698208267388
+2521 2521  0.04087687055194109
+4397 2521 -60.01674254260863
+2522 2522  0.0408158184845995
+4398 2522 -59.89391025874022
+2523 2523  0.16752597842276
+4399 2523 -14.2607056227518
+2524 2524  0.190051566204261
+4400 2524 -9.060706229208979
+2525 2525  0.1386676867121169
+4401 2525 -16.28650106020694
+2526 2526  0.1357012594441545
+4402 2526 -16.75458882036434
+2527 2527  0.1392440681462813
+4403 2527 -16.27270670749873
+2528 2528  0.1406647778145255
+4404 2528 -15.92119246330756
+2529 2529  0.09441858750058517
+4405 2529 -28.21287469737472
+2530 2530  0.1006087923147142
+4406 2530 -21.15680116671571
+2531 2531  0.06143761520357147
+4407 2531 -43.60043341376043
+2532 2532  0.06133403882792478
+4408 2532 -43.93633027014358
+2533 2533  0.04699650969640473
+4409 2533 -49.07486674953635
+2534 2534  0.04890819174360618
+4410 2534 -43.91999193831722
+2535 2535  0.04211376285307712
+4411 2535 -67.19582273963222
+2536 2536  0.04615381541177109
+4412 2536 -47.86048355346968
+2537 2537  0.08601398163729794
+4413 2537 -25.60045733925898
+2538 2538  0.09020858091390621
+4414 2538 -22.11505592822052
+2539 2539  0.1174371003592423
+4415 2539 -17.94022473260717
+2540 2540  0.1054437328415359
+4416 2540 -25.43801423175152
+2541 2541  0.1422836968177371
+4417 2541 -10.89812561676288
+2542 2542  0.1201353281558331
+4418 2542 -18.98202374381732
+2543 2543  0.1106512982331891
+4419 2543 -18.02447912466057
+2544 2544  0.1111624363988149
+4420 2544 -18.03362624973672
+2545 2545  0.1157619489239656
+4421 2545 -21.9629176261085
+2546 2546  0.1272888309498838
+4422 2546 -15.83184549062703
+2547 2547  0.07736596157094124
+4423 2547 -27.27913987608121
+2548 2548  0.07480348354180001
+4424 2548 -31.03660494689432
+2549 2549  0.01358184067264488
+4425 2549 -137.3561110785759
+2550 2550  0.03737066957094008
+4426 2550 -34.52648286375157
+2551 2551  0.008364473049625471
+4427 2551 -273.003320275044
+2552 2552  0.02403448844219899
+4428 2552 -63.02258634203405
+2553 2553  0.008639804623062942
+4429 2553 -270.3252397919941
+2554 2554  0.02625639288774997
+4430 2554 -58.78525414828648
+2555 2555  0.1161527301577223
+4431 2555 -14.91535393188155
+2556 2556  0.1020323879042606
+4432 2556 -22.95107272924469
+2557 2557  0.2014975528639363
+4433 2557 -8.882219892636956
+2558 2558  0.1990940038546843
+4434 2558 -8.38327245565061
+2559 2559  0.08443225770800758
+4435 2559 -19.7641779034735
+2560 2560  0.1060523235832099
+4436 2560 -13.85918199986529
+2561 2561  0.0646071730806282
+4437 2561 -29.84297186502756
+2562 2562  0.05882715775282683
+4438 2562 -41.37136670068732
+2563 2563  0.08826574845857212
+4439 2563 -24.74678079539619
+2564 2564  0.08591479678730043
+4440 2564 -25.30084569767662
+2565 2565  0.1044415334241497
+4441 2565 -20.16790931364713
+2566 2566  0.09489523920199339
+4442 2566 -25.56596459955379
+2567 2567  0.07681065950214608
+4443 2567 -29.25612917202748
+2568 2568  0.08001928892924734
+4444 2568 -25.80465236855202
+2569 2569  0.04278093838414154
+4445 2569 -59.28218084687754
+2570 2570  0.04268963280680383
+4446 2570 -59.00633582364991
+2571 2571  0.1336334529360571
+4447 2571 -18.95360574513186
+2572 2572  0.1312746239509822
+4448 2572 -19.27413566638218
+2573 2573  0.1095862115528146
+4449 2573 -23.32199475871141
+2574 2574  0.1284114154007702
+4450 2574 -13.51862385101003
+2575 2575  0.130697379255608
+4451 2575 -19.10980418333428
+2576 2576  0.1298008120879252
+4452 2576 -18.85936884549872
+2577 2577  0.08185794921773322
+4453 2577 -23.31393913523488
+2578 2578  0.07247955898383203
+4454 2578 -30.79418778750476
+2579 2579  0.1099319749948935
+4455 2579 -16.9883469839645
+2580 2580  0.1047819410896658
+4456 2580 -21.187092922521
+2581 2581  0.07632832587451829
+4457 2581 -26.29693971768204
+2582 2582  0.06826513546331373
+4458 2582 -38.95148117385504
+2583 2583  0.05548920412162031
+4459 2583 -52.08330111375977
+2584 2584  0.06242960062044907
+4460 2584 -33.84723329190945
+2585 2585  0.04632685940212
+4461 2585 -43.32944578653592
+2586 2586  0.07312501103900995
+4462 2586 -28.83073740675175
+2587 2587  0.07032430974327428
+4463 2587 -29.85090627723714
+2588 2588  0.06854366154651186
+4464 2588 -33.11942355235149
+2589 2589  0.07592649786773957
+4465 2589 -28.31376341779287
+2590 2590  0.08048544405713381
+4466 2590 -24.52008198179582
+2591 2591  0.004938393551872891
+4467 2591 -707.7212790741364
+2592 2592  0.1827766081729927
+4468 2592 -8.884240395089975
+2593 2593  0.003511947818170114
+4469 2593 -1074.723247601268
+2594 2594  0.1360779090415198
+4470 2594 -13.01900306964629
+2595 2595  0.1452791181060122
+4471 2595 -13.60038854703725
+2596 2596  0.1289123528531947
+4472 2596 -16.14500136718066
+2597 2597  0.08873046260064339
+4473 2597 -23.41697572970584
+2598 2598  0.07358879924577265
+4474 2598 -36.39820196128922
+2599 2599  0.02249697437083394
+4475 2599 -128.6511580023181
+2600 2600  0.06015545646416934
+4476 2600 -45.41658912759856
+2601 2601  0.1149408597814195
+4477 2601 -19.71178008061172
+2602 2602  0.1197299143217263
+4478 2602 -17.36290067611262
+2603 2603  0.1329399661696483
+4479 2603 -16.65598169709429
+2604 2604  0.1268622654461261
+4480 2604 -20.42677738021681
+2605 2605  0.07083316622215098
+4481 2605 -33.48699900983425
+2606 2606  0.07092249337821212
+4482 2606 -33.89059592051035
+2607 2607  0.04313215890526797
+4483 2607 -58.01692328951272
+2608 2608  0.0430707185925129
+4484 2608 -57.58794486792134
+2609 2609  0.05828891131718438
+4485 2609 -40.94469844121115
+2610 2610  0.05207878693856764
+4486 2610 -52.06987938902305
+2611 2611  0.08502397913127217
+4487 2611 -33.06151549792124
+2612 2612  0.09762487990214475
+4488 2612 -20.45953083972572
+2613 2613  0.08664546140173429
+4489 2613 -17.17816150141144
+2614 2614  0.07206662686403367
+4490 2614 -33.21629444772113
+2615 2615  0.06021960728449145
+4491 2615 -44.93527974087805
+2616 2616  0.06014048126963593
+4492 2616 -44.99723518348887
+2617 2617  0.1400842568053372
+4493 2617 -15.0104256461242
+2618 2618  0.1407784681908091
+4494 2618 -13.86141918235845
+2619 2619  0.004590118292260531
+4495 2619 -663.9661521913164
+2620 2620  0.03278132812471792
+4496 2620 -49.90751676561297
+2621 2621  0.002578629170411114
+4497 2621 -1427.055298280313
+2622 2622  0.04725418204370446
+4498 2622 -39.91317791668997
+2623 2623  0.06943073957133265
+4499 2623 -30.30829080397546
+2624 2624  0.06940166048736073
+4500 2624 -29.90431021967977
+2625 2625  0.1507981659667463
+4501 2625 -13.47525275108809
+2626 2626  0.1445519709377717
+4502 2626 -15.12006951497744
+2627 2627  0.1474714715736538
+4503 2627 -13.56000062513
+2628 2628  0.1250083610670657
+4504 2628 -20.92338281670835
+2629 2629  0.1540455285343737
+4505 2629 -11.29250264561315
+2630 2630  0.1316779216956888
+4506 2630 -19.82430901439865
+2631 2631  0.04887673082053536
+4507 2631 -41.79788680020079
+2632 2632  0.04405790634343706
+4508 2632 -58.51214685233085
+2633 2633  0.09783075276190294
+4509 2633 -20.04263715626028
+2634 2634  0.08758746566650595
+4510 2634 -28.57642216065699
+2635 2635  0.1127304462248533
+4511 2635 -21.49012601492517
+2636 2636  0.1198306861816738
+4512 2636 -17.88198588321188
+2637 2637  0.1179404500054688
+4513 2637 -18.25413748051501
+2638 2638  0.1276826221232589
+4514 2638 -14.48573493618683
+2639 2639  0.04925506294141044
+4515 2639 -44.0223952034774
+2640 2640  0.04668295011547591
+4516 2640 -52.48104620642393
+2641 2641  0.1449032831622934
+4517 2641 -14.92342134195389
+2642 2642  0.1500180902974244
+4518 2642 -13.27446957183143
+2643 2643  0.1156917693131826
+4519 2643 -18.49427815385324
+2644 2644  0.08878444770835722
+4520 2644 -26.21942302703896
+2645 2645  0.2660666859811378
+4521 2645 -7.39806028426747
+2646 2646  0.2523400427959562
+4522 2646 -7.707636052561853
+2647 2647  0.2062107927551356
+4523 2647 -8.939428394535847
+2648 2648  0.1973401445684108
+4524 2648 -8.642130280788454
+2649 2649  0.1168486283772575
+4525 2649 -14.50623659110181
+2650 2650  0.1583170028860684
+4526 2650 -8.772955710261652
+2651 2651  0.1436319844485981
+4527 2651 -11.53775096265082
+2652 2652  0.2063030559582233
+4528 2652 -6.502242278141505
+2653 2653  0.1986493661779628
+4529 2653 -8.679115879554525
+2654 2654  0.268512450670337
+4530 2654 -5.239025768901889
+2655 2655  0.08198335751273234
+4531 2655 -21.47326559208629
+2656 2656  0.0802614495631593
+4532 2656 -21.61383762226636
+2657 2657  0.04473833858653953
+4533 2657 -49.96777929661594
+2658 2658  0.04934649227872699
+4534 2658 -36.67569230448201
+2659 2659  0.1622656440695301
+4535 2659 -11.88600697816776
+2660 2660  0.1644914901176733
+4536 2660 -10.90493013046683
+2661 2661  0.1971617765865387
+4537 2661 -13.12946138104847
+2662 2662  0.2246878379541619
+4538 2662 -8.410110269164797
+2663 2663  0.1653528402871484
+4539 2663 -12.29662737304278
+2664 2664  0.1808539471990467
+4540 2664 -10.10870496831491
+2665 2665  0.06547653177229835
+4541 2665 -39.02857243546901
+2666 2666  0.07668646750391619
+4542 2666 -33.70459416258378
+2667 2667  0.1656395711946458
+4543 2667 -11.53800954208228
+2668 2668  0.1681816235944119
+4544 2668 -10.30563379190454
+2669 2669  0.08371453213203854
+4545 2669 -22.73592311892862
+2670 2670  0.1192293447602497
+4546 2670 -13.20295306002752
+2671 2671  0.0949481452932414
+4547 2671 -19.99502311700704
+2672 2672  0.1305019717232
+4548 2672 -12.1403872692989
+2673 2673  0.05387167080053495
+4549 2673 -43.0743841807851
+2674 2674  0.06126472914941399
+4550 2674 -30.09884070379308
+2675 2675  0.1207157634492268
+4551 2675 -15.59699735883546
+2676 2676  0.1059158657084649
+4552 2676 -23.95216626455397
+2677 2677  0.1076828917326143
+4553 2677 -17.76723538490329
+2678 2678  0.1005416445063582
+4554 2678 -22.22333545208209
+2679 2679  0.08533876965342507
+4555 2679 -28.65423870847926
+2680 2680  0.08217527931971953
+4556 2680 -29.62058200277956
+2681 2681  0.1008750823603064
+4557 2681 -23.10572017615622
+2682 2682  0.1147271773647323
+4558 2682 -14.85325187087084
+2683 2683  0.09924864616415091
+4559 2683 -21.68107710891621
+2684 2684  0.09003795816713295
+4560 2684 -30.5213668411177
+2685 2685  0.1061597115028363
+4561 2685 -20.55827331727173
+2686 2686  0.1112383961276221
+4562 2686 -17.63060893722474
+2687 2687  0.09414695009113042
+4563 2687 -25.61127524716377
+2688 2688  0.09312878048140187
+4564 2688 -25.88133772095298
+2689 2689  0.06503883056206027
+4565 2689 -40.2130645920106
+2690 2690  0.06520091326552124
+4566 2690 -40.5384810644213
+2691 2691  0.05438011932993274
+4567 2691 -45.4076676012495
+2692 2692  0.05424499968998244
+4568 2692 -45.32463217142699
+2693 2693  0.004642257136293556
+4569 2693 -741.2098311583648
+2694 2694  0.1561461065838448
+4570 2694 -12.77174729620695
+2695 2695  0.09110300396209042
+4571 2695 -25.52026560286867
+2696 2696  0.08985858246964482
+4572 2696 -25.19976764447012
+2697 2697  0.1369403680143674
+4573 2697 -13.36559398752397
+2698 2698  0.1443467202436076
+4574 2698 -10.93453152881607
+2699 2699  0.1444965096904146
+4575 2699 -10.33116067419111
+2700 2700  0.2001302970724999
+4576 2700 -5.232280705035966
+2701 2701  0.1169753107410663
+4577 2701 -12.42379991806771
+2702 2702  0.1692496580266484
+4578 2702 -6.810968169618136
+2703 2703  0.1070580624084518
+4579 2703 -13.59626784548458
+2704 2704  0.1489024263563629
+4580 2704 -7.821157425305699
+2705 2705  0.1389339606569908
+4581 2705 -10.76106835441625
+2706 2706  0.1923766534637027
+4582 2706 -6.232406029477263
+2707 2707  0.1408475375569699
+4583 2707 -10.60859259802773
+2708 2708  0.1946165869364156
+4584 2708 -6.159803642957836
+2709 2709  0.08143798523629654
+4585 2709 -29.83696730878977
+2710 2710  0.08990418420725502
+4586 2710 -21.66867981200853
+2711 2711  0.1409949336938461
+4587 2711 -15.45716843571818
+2712 2712  0.1482421057059079
+4588 2712 -13.42102810671431
+2713 2713  0.07108280634794585
+4589 2713 -36.06244598983877
+2714 2714  0.07302488343650103
+4590 2714 -30.36544494899079
+2715 2715  0.005220165246689828
+4591 2715 -415.0261506058654
+2716 2716  0.0177286273434418
+4592 2716 -107.7628213452974
+2717 2717  0.1103339245794259
+4593 2717 -17.08211902970442
+2718 2718  0.1169847200870831
+4594 2718 -14.32959261684598
+2719 2719  0.008577244581252723
+4595 2719 -297.1502631714062
+2720 2720  0.03050852001168283
+4596 2720 -46.29589079798393
+2721 2721  0.007474666263219379
+4597 2721 -274.346995221428
+2722 2722  0.0299510636579375
+4598 2722 -41.62150383812094
+2723 2723  0.008518265423418114
+4599 2723 -269.482318837368
+2724 2724  0.03145582454813847
+4600 2724 -40.36292797655138
+2725 2725  0.009163617744673737
+4601 2725 -262.4871457333651
+2726 2726  0.03066175704966475
+4602 2726 -43.50730408335345
+2727 2727  0.008500565636813628
+4603 2727 -273.5041186801383
+2728 2728  0.03197867846257434
+4604 2728 -40.20028642953007
+2729 2729  0.008721338342122487
+4605 2729 -261.5760375061137
+2730 2730  0.03218983559492441
+4606 2730 -39.17187343467762
+2731 2731  0.009416820658209826
+4607 2731 -248.2417586977549
+2732 2732  0.03478080575779452
+4608 2732 -37.08017309753349
+2733 2733  0.1630576386497255
+4609 2733 -8.502233418711862
+2734 2734  0.1397777235897797
+4610 2734 -14.15927832194173
+2735 2735  0.07450425080229808
+4611 2735 -33.36501978941855
+2736 2736  0.07431869114522616
+4612 2736 -32.85653123541292
+2737 2737  0.1026547557561215
+4613 2737 -17.10162207087117
+2738 2738  0.09073387960505001
+4614 2738 -26.32253245363182
+2739 2739  0.03251041922797052
+4615 2739 -65.3094506908532
+2740 2740  0.02963281885079689
+4616 2740 -92.13433902811157
+2741 2741  0.04067578976053766
+4617 2741 -58.09827276913884
+2742 2742  0.03790745656462562
+4618 2742 -71.50178838039953
+2743 2743  0.05000150737058116
+4619 2743 -51.64006323574402
+2744 2744  0.04572886699134029
+4620 2744 -49.57879692739116
+2745 2745  0.06781434452399084
+4621 2745 -38.8690834748674
+2746 2746  0.06815527408168873
+4622 2746 -38.73649388997055
+2747 2747  0.07359845373321169
+4623 2747 -30.07499672096169
+2748 2748  0.0772865214414882
+4624 2748 -26.6892215368137
+2749 2749  0.110682745736426
+4625 2749 -19.69795854475236
+2750 2750  0.1291532255596348
+4626 2750 -16.419717428569
+2751 2751  0.04269896374187671
+4627 2751 -48.1812508688813
+2752 2752  0.0390336607619539
+4628 2752 -68.5724907725667
+2753 2753  0.02315332863810588
+4629 2753 -91.97145850968209
+2754 2754  0.03413086015654149
+4630 2754 -67.81117594819983
+2755 2755  0.137818520958807
+4631 2755 -12.03075557160653
+2756 2756  0.1812916553406337
+4632 2756 -7.305149508872643
+2757 2757  0.2024915552993207
+4633 2757 -8.650252245975691
+2758 2758  0.2652537536970932
+4634 2758 -5.254056246815064
+2759 2759  0.1248354377365412
+4635 2759 -14.74425618462372
+2760 2760  0.1161671612051354
+4636 2760 -18.14750969243607
+2761 2761  0.1622679931288024
+4637 2761 -12.8677934052044
+2762 2762  0.1671516006457047
+4638 2762 -12.778764953814
+2763 2763  0.246228453637452
+4639 2763 -9.449827385174531
+2764 2764  0.3172743251153573
+4640 2764 -5.864406879133139
+2765 2765  0.009473364208539475
+4641 2765 -252.032563895863
+2766 2766  0.04601859096082538
+4642 2766 -37.38065612259518
+2767 2767  0.006024175975105252
+4643 2767 -370.6027942610422
+2768 2768  0.01699700550635434
+4644 2768 -99.30572095390851
+87 2769  0.191132249227754
+93 2769  2.047051335767824
+88 2770  0.188455821489309
+94 2770  2.067610228210245
+89 2771  0.1928444447454806
+95 2771  2.048523350808737
+90 2772  0.1839664735152792
+96 2772  2.055941963685915
+91 2773  0.1930767719574465
+97 2773  2.03082325966889
+92 2774  0.188272051984764
+98 2774  2.065517700723633
+99 2775  3.290947783257571e-05
+299 2775 -0.0006120966867165351
+1300 2775 -116.617845527334
+1789 2775 -101.6692445633731
+1791 2775 -121.900978849199
+1792 2775 -122.5206702801755
+499 2776  2.969991746869057e-05
+699 2776 -0.0006211735898036456
+1545 2776 -94.70154811534771
+2034 2776 -116.254058426669
+2036 2776 -128.7151420873748
+2037 2776 -134.6101055831586
+100 2777  0.002413822826166146
+300 2777 -0.0448953457699415
+1299 2777 -16421.91141900974
+500 2778  0.002140785332416097
+700 2778 -0.04477438386005135
+1544 2778 -18425.43685640642
+101 2779  7.669175073506544e-05
+301 2779 -0.001427149194237635
+1305 2779 -304.5398217309754
+1793 2779 -301.3545137610665
+1794 2779 -265.5995294516822
+501 2780  6.257938167652743e-05
+701 2780 -0.001308882018352955
+1550 2780 -343.1905811392148
+2038 2780 -304.647627311909
+2039 2780 -303.6755998197619
+102 2781  0.002334975615736897
+302 2781 -0.04345125747104736
+1303 2781 -17152.4293534602
+502 2782  0.002070345414517271
+702 2782 -0.04330235585374924
+1548 2782 -19263.39666857488
+103 2783  7.862767004737615e-05
+303 2783 -0.00146533094654159
+1308 2783 -246.424319230538
+1796 2783 -275.381526703475
+1797 2783 -305.9428524547798
+503 2784  6.598718961816696e-05
+703 2784 -0.001380044009109575
+1553 2784 -287.1737937003262
+2041 2784 -287.8253845063738
+2042 2784 -310.99582024153
+104 2785  0.002444212485053505
+304 2785 -0.04555077314529363
+1306 2785 -15990.46775417401
+504 2786  0.002169195438705725
+704 2786 -0.04536606994919442
+1551 2786 -17954.06452194431
+105 2787  3.312742415530926e-05
+305 2787 -0.000615931807560431
+1310 2787 -127.348789895109
+1311 2787 -121.9718452523159
+1312 2787 -116.5170618756044
+1799 2787 -103.6735421877064
+505 2788  2.796488776981597e-05
+705 2788 -0.0005847977484658657
+1555 2788 -123.8024931489951
+1556 2788 -136.3741855526524
+1557 2788 -124.0620405337453
+2044 2788 -108.9523767055269
+106 2789  0.002446060487080032
+306 2789 -0.04547802485000561
+1309 2789 -16927.29847060978
+506 2790  0.002169609219361102
+706 2790 -0.04537018025868264
+1554 2790 -18984.56010772376
+107 2791  1.85390023051578e-05
+307 2791 -0.0003447400910227232
+1314 2791 -53.76846535636626
+1316 2791 -71.39421959235987
+1317 2791 -60.66237248504986
+1803 2791 -45.61538669592837
+1805 2791 -64.6965009865016
+507 2792  1.542323564781866e-05
+707 2792 -0.0003225540370318406
+1559 2792 -52.95894194050124
+1561 2792 -50.07992223279546
+1562 2792 -72.94981475825553
+2048 2792 -47.01189367725382
+2050 2792 -93.79563538032262
+108 2793  0.002547216874358393
+308 2793 -0.04736458693151107
+1313 2793 -15651.87407075314
+508 2794  0.002258865500111695
+708 2794 -0.04724019321521711
+1558 2794 -17562.01775281531
+109 2795  1.897510509495169e-05
+309 2795 -0.0003535062191017872
+1320 2795 -48.65659892814862
+1321 2795 -54.80645079899097
+1808 2795 -69.64168157022513
+1809 2795 -55.76240657485057
+1812 2795 -57.37217754133817
+509 2796  1.584972253664808e-05
+709 2796 -0.0003315309935129994
+1565 2796 -52.52014968113491
+1566 2796 -58.44422004546879
+2053 2796 -71.48205026841438
+2054 2796 -57.81317230630791
+2057 2796 -57.85806956991055
+110 2797  0.002159979334399842
+310 2797 -0.04024041627283683
+1318 2797 -19147.60457802069
+510 2798  0.001923496708434376
+710 2798 -0.04023406550441066
+1563 2798 -21348.38058440163
+111 2799  0.002343469592596242
+311 2799 -0.04365875661862523
+1319 2799 -16807.87112981469
+511 2800  0.002080184625715104
+711 2800 -0.04351148837855855
+1564 2800 -18855.823309453
+112 2801  6.912678178860192e-05
+312 2801 -0.001288430244282178
+1324 2801 -314.0609448710903
+1325 2801 -417.45542835111
+1813 2801 -242.9398363399226
+512 2802  5.363623218723445e-05
+712 2802 -0.001121798254310583
+1569 2802 -407.8543128053418
+1570 2802 -403.2701195876705
+2058 2802 -277.9291057772549
+113 2803  8.227547212772366e-05
+313 2803 -0.001533427005586317
+1323 2803 -288.936297706273
+1810 2803 -278.7531515404145
+1816 2803 -284.6866218388363
+513 2804  6.526480001887723e-05
+713 2804 -0.001364992385095566
+1568 2804 -403.3712790108879
+2055 2804 -281.3097209478202
+2061 2804 -277.119091147535
+114 2805  0.002514571520234397
+314 2805 -0.04686420874278824
+1326 2805 -16211.02934024309
+514 2806  0.002231758090236919
+714 2806 -0.04667596022758146
+1571 2806 -18200.17011809114
+115 2807  1.36295453496316e-05
+315 2807 -0.0002535975426531655
+1332 2807 -33.11136834639819
+1817 2807 -36.67269099216028
+1818 2807 -38.41647162556927
+1819 2807 -38.48702554204165
+1820 2807 -31.51609232707513
+1821 2807 -28.105045711462
+515 2808  1.108814355667258e-05
+715 2808 -0.0002318355913094673
+1577 2808 -30.88445273847646
+2062 2808 -36.88282535114031
+2063 2808 -38.52362543915015
+2064 2808 -38.66505399025309
+2065 2808 -31.6350585093486
+2066 2808 -38.08156604778259
+116 2809  0.002351560480566124
+316 2809 -0.04375416605988374
+1327 2809 -16956.49425532315
+516 2810  0.002084868727865555
+716 2810 -0.04359131642342973
+1572 2810 -19050.40909074716
+117 2811  0.002323346228192606
+317 2811 -0.04322919930108315
+1328 2811 -17479.24164685459
+517 2812  0.00206178424314167
+717 2812 -0.04310861514976851
+1573 2812 -19602.52751313596
+118 2813  0.00233327829315433
+318 2813 -0.04341399965950225
+1329 2813 -17627.34904124655
+518 2814  0.002069382114290299
+718 2814 -0.04326749513063304
+1574 2814 -19790.86619941803
+119 2815  0.002498796266540171
+319 2815 -0.04649354450491961
+1330 2815 -15611.76874269715
+519 2816  0.00221663957128552
+719 2816 -0.04634637060467815
+1575 2816 -17520.9066424914
+120 2817  3.27326552556781e-05
+320 2817 -0.0006096456996633435
+1335 2817 -111.0869138886556
+1823 2817 -110.9788010398832
+1824 2817 -110.6632721880846
+1826 2817 -110.4746806445262
+520 2818  2.660882184061133e-05
+720 2818 -0.0005566159660873793
+1580 2818 -153.3239932480942
+2068 2818 -111.4646540333997
+2069 2818 -111.1745117377567
+2071 2818 -103.4708021967054
+121 2819  0.002315859228091882
+321 2819 -0.04313281610466401
+1333 2819 -17332.50264229675
+521 2820  0.002054509126465163
+721 2820 -0.04297719715336602
+1578 2820 -19459.2097238253
+122 2821  0.002331553298144831
+322 2821 -0.04342507632571436
+1334 2821 -17422.49927394747
+522 2822  0.00206825652931222
+722 2822 -0.04326473059133334
+1579 2822 -19563.40178710883
+123 2823  7.851640521308691e-05
+323 2823 -0.001463440545769149
+1338 2823 -271.7302916287101
+1827 2823 -312.1352460882863
+1829 2823 -268.005427632543
+523 2824  6.153966619548327e-05
+723 2824 -0.001286901060450809
+1583 2824 -327.8415125728117
+2072 2824 -298.7359064250596
+2074 2824 -326.2824303807073
+124 2825  0.002317790327734316
+324 2825 -0.04320045332535793
+1337 2825 -17020.79035452148
+524 2826  0.002059372058053505
+724 2826 -0.04306501782502264
+1582 2826 -19072.9824263394
+125 2827  3.083171649862372e-05
+325 2827 -0.0005742502993288677
+1341 2827 -104.0178710348266
+1342 2827 -120.5778323373125
+1825 2827 -106.5673296823539
+1830 2827 -123.2291839880428
+525 2828  2.43735293056645e-05
+725 2828 -0.0005098568126613753
+1586 2828 -131.6376658199634
+1587 2828 -101.2983170663943
+2070 2828 -142.2046129288638
+2075 2828 -119.199332372214
+126 2829  0.002195503166803161
+326 2829 -0.04089185063854536
+1340 2829 -19127.32781891363
+526 2830  0.001949465632114935
+726 2830 -0.04077980858523558
+1585 2830 -21438.05305311238
+127 2831  7.861266506825708e-05
+327 2831 -0.001462067874606355
+1345 2831 -298.6016789907157
+1833 2831 -258.9487419495387
+1834 2831 -329.9404840258471
+527 2832  6.633863604418218e-05
+727 2832 -0.001387501933997578
+1590 2832 -340.9877642962214
+2078 2832 -272.0317439182016
+2079 2832 -336.2704669221371
+128 2833  0.002549007835364769
+328 2833 -0.04740439876751025
+1343 2833 -15783.20681029278
+528 2834  0.00226068439540678
+728 2834 -0.0472821775742908
+1588 2834 -17706.80708692556
+129 2835  3.651865383153894e-05
+329 2835 -0.0006801292745497548
+1836 2835 -125.0209566151239
+1837 2835 -101.4697467138049
+1838 2835 -105.8065777979689
+1839 2835 -96.71494936649103
+529 2836  2.851329636555464e-05
+729 2836 -0.000595912541544476
+2081 2836 -118.5292865869793
+2082 2836 -96.75789985452708
+2083 2836 -113.9099321998116
+2084 2836 -139.5923217119656
+130 2837  0.002311132186057771
+330 2837 -0.04304284573387514
+1346 2837 -17200.90873720961
+530 2838  0.002055759561026759
+730 2838 -0.04296426793696614
+1591 2838 -19229.80704563983
+131 2839  0.002512265623002052
+331 2839 -0.04678851497708159
+1347 2839 -15372.8164617752
+531 2840  0.002230330410563787
+731 2840 -0.04661257133778172
+1592 2840 -17250.21196149158
+132 2841  7.556051646964701e-05
+332 2841 -0.001406493806589446
+1351 2841 -325.9882767001909
+1352 2841 -309.9603770265387
+1840 2841 -264.275729646232
+532 2842  6.29130756763182e-05
+732 2842 -0.00131594329597764
+1596 2842 -350.8749049521504
+1597 2842 -349.525972853892
+2085 2842 -273.2174821630666
+133 2843  0.002461879610445951
+333 2843 -0.04582496681635583
+1350 2843 -16176.07327682596
+533 2844  0.00218424136016743
+733 2844 -0.0456871850050998
+1595 2844 -18148.71862379513
+134 2845  3.850335510777407e-05
+334 2845 -0.0007169453104404059
+1843 2845 -108.8523166266819
+1844 2845 -103.7126993255025
+1845 2845 -100.1145963710793
+1846 2845 -110.8016182928714
+534 2846  3.176046354497157e-05
+734 2846 -0.0006637811766721075
+2088 2846 -111.1708407077942
+2089 2846 -105.8746380386655
+2090 2846 -135.0509367320167
+2091 2846 -100.0765173546339
+135 2847  0.002492200397856114
+335 2847 -0.04640543643374314
+1353 2847 -15319.47843184996
+535 2848  0.002212413586388379
+735 2848 -0.04623851729751089
+1598 2848 -17188.33141996008
+136 2849  0.002590053989662534
+336 2849 -0.04822607065271291
+1354 2849 -15342.38909234029
+536 2850  0.002298717079426249
+736 2850 -0.0480417201738854
+1599 2850 -17221.86948891564
+137 2851  6.694842994227576e-05
+337 2851 -0.001245392720896029
+1358 2851 -369.8949236118411
+1847 2851 -274.8553042800676
+1849 2851 -307.7484892352572
+537 2852  6.612709803152977e-05
+737 2852 -0.001382881004978853
+1603 2852 -262.4471277152124
+2092 2852 -351.7384079675031
+2094 2852 -309.8208477002044
+138 2853  0.002281699642120062
+338 2853 -0.04244471017584212
+1357 2853 -17899.71932992315
+538 2854  0.002023889893301537
+738 2854 -0.04232451806232144
+1602 2854 -20083.61864669313
+139 2855  6.668642108710151e-05
+339 2855 -0.001242146227443936
+1360 2855 -285.4713473471323
+1361 2855 -436.1033211741603
+1362 2855 -229.7060573133963
+539 2856  5.644326073895785e-05
+739 2856 -0.001180351155434611
+1605 2856 -298.0864078722183
+1606 2856 -285.8909834401574
+1607 2856 -408.7832335639023
+140 2857  0.0002425793965424298
+340 2857 -0.004513431075080159
+1853 2857 -1242.454717536272
+1854 2857 -1455.098749317579
+540 2858  0.0002109624490132657
+740 2858 -0.004412880081124752
+2098 2858 -1326.699075158569
+2099 2858 -1582.769498051866
+141 2859  0.0002624552599551592
+341 2859 -0.004885512119259889
+1365 2859 -1242.790681293919
+1366 2859 -1347.578694003212
+541 2860  0.0002180184147619238
+741 2860 -0.004557562649114181
+1610 2860 -1439.569994942649
+1611 2860 -1474.926377146568
+142 2861  0.0002482857056241695
+342 2861 -0.004618836123996159
+1363 2861 -1269.292530967632
+1367 2861 -1306.453480190415
+542 2862  0.0002168279126185967
+742 2862 -0.00453531126448616
+1608 2862 -1375.862662839072
+1612 2862 -1388.793683741341
+143 2863  3.100849114586814e-05
+343 2863 -0.0005784813977331862
+1371 2863 -138.6615789467266
+1858 2863 -65.91250277346103
+1859 2863 -151.3811068800854
+1860 2863 -159.9708456309545
+543 2864  2.40913445823822e-05
+743 2864 -0.000503749724503456
+1616 2864 -157.131131008579
+2103 2864 -70.28451441984423
+2104 2864 -174.5511038366085
+2105 2864 -163.2629558170631
+144 2865  7.880071041324561e-05
+344 2865 -0.001470189001474494
+1368 2865 -200.3447771713429
+1862 2865 -315.9024050162253
+1863 2865 -358.782009005071
+544 2866  6.434988345650326e-05
+744 2866 -0.001345574214205537
+1613 2866 -252.2885237459685
+2107 2866 -343.8282467755249
+2108 2866 -340.8237853887593
+145 2867  0.0002319713008317471
+345 2867 -0.004319878341018758
+1374 2867 -1359.412942527259
+1375 2867 -1420.161798638148
+545 2868  0.0001989115751638596
+745 2868 -0.004161190641349405
+1619 2868 -1483.228901833766
+1620 2868 -1553.767313963687
+146 2869  1.03968750016697e-05
+346 2869 -0.0001937112443097265
+1307 2869 -27.96163286928218
+1381 2869 -29.79361601042121
+1866 2869 -15.5948834700682
+1867 2869 -16.24247111042913
+1868 2869 -14.7892626321223
+1869 2869 -14.74604734110974
+1870 2869 -21.53155213685957
+546 2870  1.820335133825889e-05
+746 2870 -0.0003807362332910267
+1552 2870 -71.60785037080608
+1626 2870 -80.2043307459878
+2111 2870 -8.765297393525904
+2112 2870 -9.210837529294329
+2113 2870 -8.314922294229779
+2114 2870 -8.361552482805211
+2115 2870 -10.50121760524715
+1 2871  27.83662668591554
+147 2871  0.0009765914152797697
+347 2871 -0.01819554706699653
+1376 2871 -3270.212868561243
+44 2872  9.664803062187673
+547 2872  0.001881561431239332
+747 2872 -0.03935916995322433
+1621 2872 -2032.294944522458
+2 2873  27.53589835708392
+148 2873  0.0009675053188040424
+348 2873 -0.01802624915195298
+1377 2873 -3383.287424261197
+45 2874  8.906846167844099
+548 2874  0.001907467106720621
+748 2874 -0.03990008471308273
+1622 2874 -2161.517370916221
+3 2875  28.36062562360079
+149 2875  0.0009896578163032042
+349 2875 -0.01843899618102551
+1378 2875 -3162.550200229806
+46 2876  9.821457822989728
+549 2876  0.001906610501894908
+749 2876 -0.03988317432312981
+1623 2876 -1965.171276998922
+4 2877  28.21925391958385
+150 2877  0.0009881347343625977
+350 2877 -0.01841060981384306
+1379 2877 -3145.78338095254
+47 2878  9.098592168210979
+550 2878  0.001949643189531951
+750 2878 -0.04078229766318428
+1624 2878 -2011.325553409659
+5 2879  25.95736762236096
+151 2879  0.0009219840688063665
+351 2879 -0.01717818552570617
+1380 2879 -4127.243651965615
+48 2880  104.3552046887556
+551 2880  0.0008441117886190319
+751 2880 -0.01766023403411908
+1625 2880 -947.8080133550635
+152 2881  6.748223267049977e-05
+352 2881 -0.001256535981144061
+1382 2881 -332.4374750614756
+1841 2881 -293.066322787933
+1864 2881 -313.6568640390976
+552 2882  5.675680462998339e-05
+752 2882 -0.001187293521703485
+1627 2882 -358.1256606139722
+2086 2882 -312.5285740232399
+2109 2882 -336.3627989515738
+153 2883  1.096721977986664e-05
+353 2883 -0.0002044990454309726
+1369 2883 -43.12103915398581
+1385 2883 -49.71040942894664
+1386 2883 -42.92117101237346
+1387 2883 -40.28901682798929
+1873 2883 -18.01825116204381
+1874 2883 -44.51367803683263
+553 2884  8.254916480290429e-06
+753 2884 -0.0001725925911988493
+1614 2884 -50.47971099857919
+1630 2884 -59.09622972922144
+1631 2884 -43.69121511045876
+1632 2884 -45.26251731254106
+2118 2884 -19.04736438576279
+2119 2884 -39.1308766868132
+154 2885  0.0002765958107932558
+354 2885 -0.005158007404572995
+1383 2885 -837.2400970977618
+1388 2885 -1637.126658242943
+554 2886  0.0002143832494831788
+754 2886 -0.004482285199688079
+1628 2886 -946.4993922263754
+1633 2886 -2094.821178932827
+155 2887  0.0002689427432129922
+355 2887 -0.005003923514694866
+1304 2887 -1194.040564031954
+1389 2887 -1330.347020163584
+555 2888  0.0002152290952202552
+755 2888 -0.004501500167327725
+1549 2888 -1351.851190781317
+1634 2888 -1596.562954523447
+156 2889  7.678999350825993e-05
+356 2889 -0.001430404585463677
+1355 2889 -238.4037452088409
+1880 2889 -302.0373081441935
+1881 2889 -339.884593358769
+556 2890  6.296834363466511e-05
+756 2890 -0.00131608856808379
+1600 2890 -307.265267842773
+2125 2890 -341.0931018868103
+2126 2890 -301.9003736908056
+157 2891  0.0002662782114042295
+357 2891 -0.004953175864922998
+1392 2891 -1177.545535152468
+1393 2891 -1310.740583779899
+557 2892  0.000202070761556368
+757 2892 -0.004225502698148476
+1637 2892 -1555.582081433006
+1638 2892 -1490.044806601116
+158 2893  0.0002479660168346599
+358 2893 -0.004613733705409966
+1394 2893 -1319.017361047579
+1885 2893 -1386.412367526018
+558 2894  0.0002137229690333978
+758 2894 -0.004470671113542863
+1639 2894 -1428.509188469133
+2130 2894 -1512.250759457661
+159 2895  0.0002585780513565103
+359 2895 -0.004822604611468701
+1372 2895 -1285.697515288127
+1396 2895 -1313.040134659249
+559 2896  0.0002072120632641939
+759 2896 -0.004332413344118552
+1617 2896 -1402.181171260665
+1641 2896 -1635.880446215101
+160 2897  0.0002133964918098159
+360 2897 -0.003969961874120492
+1887 2897 -1942.580602090081
+1888 2897 -1149.608351572851
+560 2898  0.0002442514875826953
+760 2898 -0.005108986069369431
+2132 2898 -1021.939107874584
+2133 2898 -1512.245711784602
+161 2899  0.0002015674054957784
+361 2899 -0.003753931548661254
+1399 2899 -1276.699267408946
+1400 2899 -1861.580695310057
+561 2900  0.0002451718016401516
+761 2900 -0.005128263093238606
+1644 2900 -1582.59139337238
+1645 2900 -927.7605629713167
+162 2901  8.489615229574495e-05
+362 2901 -0.001582798352723314
+1798 2901 -273.8571240789814
+1828 2901 -306.1707000880867
+1891 2901 -241.4737700878197
+562 2902  6.341211780697017e-05
+762 2902 -0.001326090781396694
+2043 2902 -275.9084934756087
+2073 2902 -336.8243978424858
+2136 2902 -334.71993668788
+87 2903  0.5188471304468957
+163 2903  0.004275107728950991
+363 2903 -0.07971634771599438
+1401 2903 -8640.649777085511
+563 2904  0.001094729169089652
+763 2904 -0.02289312059755941
+1646 2904 -56173.00367620957
+164 2905  4.587849448195336e-06
+364 2905 -8.544969360670532e-05
+1409 2905 -17.56919005402462
+1865 2905 -13.66247944369221
+1872 2905 -13.86164060705798
+1889 2905 -14.63977712587374
+1892 2905 -4.158339449315758
+1893 2905 -6.321546398056254
+1894 2905 -7.804363263475375
+1895 2905 -6.58912812924158
+1896 2905 -7.649837605944053
+1897 2905 -7.69701995245717
+1898 2905 -6.086553600399663
+564 2906  4.25003180262438e-06
+764 2906 -8.891414554671908e-05
+1654 2906 -14.86291565563162
+2110 2906 -16.282455472127
+2117 2906 -16.55199492375567
+2134 2906 -12.85873395711873
+2137 2906 -4.68570583775738
+2138 2906 -6.569471674738725
+2139 2906 -7.378806665030903
+2140 2906 -6.726108868932145
+2141 2906 -6.982763315138396
+2142 2906 -7.016413577398498
+2143 2906 -6.151213853839583
+6 2907  174.0181709119047
+165 2907  0.0006376528809592441
+365 2907 -0.01187757040352815
+1402 2907 -1386.423275602331
+49 2908  87.08836569250704
+565 2908  0.0006968247424079914
+765 2908 -0.01457874205479662
+1647 2908 -1798.962366958954
+7 2909  209.3244594426036
+166 2909  0.0005334869199648178
+366 2909 -0.009936857146071412
+1403 2909 -1718.105842836117
+50 2910  194.4659454572846
+566 2910  0.0005043323519647758
+766 2910 -0.01055148350464198
+1648 2910 -1602.154579037757
+8 2911  207.0714997893401
+167 2911  0.00050533641056909
+367 2911 -0.009412379775864633
+1404 2911 -1984.576121024297
+51 2912  196.1842421835629
+567 2912  0.0004814844967328351
+767 2912 -0.01007346783371174
+1649 2912 -1805.387244919989
+9 2913  207.1774742583954
+168 2913  0.0005289936433977529
+368 2913 -0.009853159585355355
+1405 2913 -1772.16179742598
+52 2914  192.0789529667144
+568 2914  0.000498175840325201
+768 2914 -0.01042267889641481
+1650 2914 -1672.560616497262
+10 2915  211.1161536132544
+169 2915  0.0005052051026997433
+369 2915 -0.009409866734849158
+1406 2915 -1941.832300205852
+53 2916  198.0844573442002
+569 2916  0.0004883751107654518
+769 2916 -0.01021762623013111
+1651 2916 -1736.681418547389
+11 2917  210.6281765088935
+170 2917  0.000504438897809328
+370 2917 -0.0093956000093805
+1407 2917 -1948.610857464787
+54 2918  197.7267055670312
+570 2918  0.0004874751788934755
+770 2918 -0.0101988030095205
+1652 2918 -1744.38334675991
+12 2919  210.3058251130699
+171 2919  0.0005374455412895908
+371 2919 -0.01001058661641098
+1408 2919 -1660.153292834334
+55 2920  194.7139670039287
+571 2920  0.0005051537934849839
+771 2920 -0.01056867448637758
+1653 2920 -1578.337187890964
+172 2921  0.0002525862906039789
+372 2921 -0.004704700041595145
+1410 2921 -1160.143762481141
+1871 2921 -1338.557384720627
+572 2922  0.0001984180754674663
+772 2922 -0.004149701089090458
+1655 2922 -1618.963465864502
+2116 2922 -1351.12522936696
+173 2923  9.934430498701247e-06
+373 2923 -0.0001848018496588619
+1416 2923 -27.80845944515527
+1901 2923 -25.58784425527002
+1902 2923 -11.88075702335064
+1903 2923 -54.03709490860278
+1904 2923 -49.0029739474849
+1905 2923 -34.74349045619399
+573 2924  1.97916651121529e-05
+773 2924 -0.0004140519179016492
+1661 2924 -135.998061396762
+2146 2924 -9.759370161367945
+2147 2924 -3.824469467733722
+2148 2924 -22.0316225896654
+2149 2924 -19.89518259071058
+2150 2924 -93.26783624734638
+13 2925  5.68042322902059
+174 2925  0.001278228660117016
+374 2925 -0.02377781766418163
+1411 2925 -8636.32260883259
+56 2926  102.7471989517859
+574 2926  0.0008873951493023308
+774 2926 -0.018565803370506
+1656 2926 -750.1977888434145
+14 2927  7.273812219891007
+175 2927  0.001575520226438138
+375 2927 -0.02930808378764303
+1412 2927 -4186.612833750586
+57 2928  92.37138271390904
+575 2928  0.001156885426737085
+775 2928 -0.02420400608865609
+1657 2928 -454.349588508947
+15 2929  4.409102144996014
+176 2929  0.00103799158743649
+376 2929 -0.01930888852136976
+1413 2929 -18149.83069473371
+58 2930  190.7684806838747
+576 2930  0.0006093535781038161
+776 2930 -0.0127487047039688
+1658 2930 -937.8974042315033
+16 2931  4.550892940867493
+177 2931  0.00106627247492575
+377 2931 -0.01983496473725442
+1414 2931 -16487.63935391007
+59 2932  188.8552198757506
+577 2932  0.0006241449559768249
+777 2932 -0.01305815905236744
+1659 2932 -889.0967633151976
+178 2933  0.000261841749692465
+378 2933 -0.004879689278140042
+1417 2933 -1197.091400538973
+1418 2933 -1307.168528785002
+578 2934  0.0002261787363921484
+778 2934 -0.004729137568385933
+1662 2934 -1255.707173660711
+1663 2934 -1468.90019029828
+179 2935  6.472756195288161e-05
+379 2935 -0.001206424923433159
+1384 2935 -373.0009736238315
+1420 2935 -455.3580237130959
+1909 2935 -215.9521258687181
+579 2936  5.786973344046168e-05
+779 2936 -0.001210195935425439
+1629 2936 -410.9259659918831
+1665 2936 -459.6038371669388
+2154 2936 -214.5852170173555
+180 2937  0.0002575738198962803
+380 2937 -0.00480049537954443
+1419 2937 -1132.012822917658
+1421 2937 -1554.218213369194
+580 2938  0.0002266107658869627
+780 2938 -0.004739384227582458
+1664 2938 -1293.847311903297
+1666 2938 -1545.450519903304
+181 2939  7.772147814560617e-05
+381 2939 -0.001447510061668577
+1356 2939 -263.4733430658865
+1422 2939 -297.8083722314928
+1423 2939 -318.3099272773828
+581 2940  6.870255487037702e-05
+781 2940 -0.001436061405957997
+1601 2940 -256.0588790788852
+1667 2940 -367.6331396885809
+1668 2940 -298.4479790314356
+182 2941  0.0002699459866081826
+382 2941 -0.005027041570114009
+1424 2941 -1225.44512859976
+1855 2941 -1269.238362620525
+582 2942  0.0002165795532411714
+782 2942 -0.004527229250447725
+1669 2942 -1495.777176495769
+2100 2942 -1408.138756167021
+183 2943  0.000230584620055753
+383 2943 -0.004289483096781133
+1425 2943 -1341.319488664768
+1848 2943 -1620.739611914232
+583 2944  0.000236616467506408
+783 2944 -0.004947831503020766
+1670 2944 -1533.793391362161
+2093 2944 -1205.4064202328
+184 2945  3.113361531407949e-05
+384 2945 -0.0005788988446378421
+1426 2945 -92.63800029624825
+1428 2945 -105.1073406416851
+1800 2945 -118.4060011955383
+1917 2945 -162.0141549217757
+584 2946  2.680443331181534e-05
+784 2946 -0.0005605482647067403
+1671 2946 -153.1942449175083
+1673 2946 -106.302999456909
+2045 2946 -117.5623798133825
+2162 2946 -114.4793517963469
+185 2947  0.0002117592014781903
+385 2947 -0.003942330393462493
+1919 2947 -1156.779939313139
+1920 2947 -2303.443736143837
+585 2948  0.0001816346318888309
+785 2948 -0.003799635403101938
+2164 2948 -1222.524005166853
+2165 2948 -2603.281852933697
+186 2949  3.083953604217996e-05
+386 2949 -0.0005740898096234878
+1429 2949 -115.8710614082713
+1431 2949 -115.6732086865693
+1433 2949 -119.2838530078724
+1922 2949 -121.4320613630403
+586 2950  2.52662353434824e-05
+786 2950 -0.0005285298443190662
+1674 2950 -133.2986622760959
+1676 2950 -121.2535428786183
+1678 2950 -126.2373785289777
+2167 2950 -121.8957475389813
+187 2951  2.169737691949768e-05
+387 2951 -0.0004045233295404804
+1437 2951 -71.78900859503443
+1924 2951 -15.33177832110156
+1925 2951 -49.94972952086324
+1926 2951 -54.44165562919285
+1928 2951 -63.79072791858248
+587 2952  3.479200106475429e-05
+787 2952 -0.0007273659114068365
+1682 2952 -177.6359650918144
+2169 2952 -6.24756560403434
+2170 2952 -18.89808564694528
+2171 2952 -20.62915659632829
+2173 2952 -170.023137662953
+17 2953  7.251786459051289
+188 2953  0.001643729524906235
+388 2953 -0.03064584960083465
+1434 2953 -3914.440467798176
+60 2954  43.71332794263182
+588 2954  0.00171173667699996
+788 2954 -0.03581192339833519
+1679 2954 -457.8930633249275
+18 2955  5.370237884159537
+189 2955  0.001272395619536466
+389 2955 -0.02372239346799022
+1435 2955 -9557.969759456002
+61 2956  5.289532905230916
+589 2956  0.002448783148342478
+789 2956 -0.05120231699174218
+1680 2956 -2119.625191287763
+19 2957  5.206824237539474
+190 2957  0.001241043851549547
+390 2957 -0.02313788584603573
+1436 2957 -10416.16918230558
+62 2958  5.212039501524443
+590 2958  0.002415983069022926
+790 2958 -0.05051641835691328
+1681 2958 -2232.468832444119
+191 2959  3.437890099741047e-05
+391 2959 -0.0006403700785130899
+1440 2959 -133.1916354887076
+1811 2959 -116.01415125886
+1831 2959 -120.2021649979784
+1929 2959 -67.64929086151163
+591 2960  3.109764001755754e-05
+791 2960 -0.0006505150403953535
+1685 2960 -119.0962751518446
+2056 2960 -137.1554046617416
+2076 2960 -180.4753155990831
+2174 2960 -45.7592083489813
+20 2961  156.4648269651306
+192 2961  0.0005900815905843416
+392 2961 -0.01099169507116858
+1439 2961 -1904.378289787439
+63 2962  134.487235958624
+592 2962  0.0006372419289340028
+792 2962 -0.01333216968778421
+1684 2962 -1488.996464674805
+193 2963  0.0002533575753207606
+393 2963 -0.004721519459519589
+1390 2963 -1267.301926818796
+1927 2963 -1338.547491511747
+593 2964  0.0002111507457449779
+793 2964 -0.004413757630275583
+1635 2964 -1420.43303749647
+2172 2964 -1506.397431951823
+194 2965  7.547399481258875e-05
+394 2965 -0.001408304602554578
+1442 2965 -273.7709840087026
+1443 2965 -304.386875196408
+1931 2965 -281.744969252588
+594 2966  6.036731193060422e-05
+794 2966 -0.001262348984622675
+1687 2966 -303.9341121893127
+1688 2966 -301.6071311890342
+2176 2966 -347.1985023913132
+195 2967  0.0002452042454861101
+395 2967 -0.004561366275467525
+1444 2967 -1367.085437280008
+1884 2967 -1299.363195504282
+595 2968  0.0002097250549737993
+795 2968 -0.0043867077924628
+1689 2968 -1507.145374398769
+2129 2968 -1412.362136880344
+196 2969  0.0002328687727934642
+396 2969 -0.004331722370505195
+1446 2969 -1935.598728461542
+1935 2969 -1143.36710400461
+596 2970  0.0002081199033744698
+796 2970 -0.004353179353834643
+1691 2970 -2094.452122116551
+2180 2970 -1166.753340022242
+197 2971  0.0002595392277337602
+397 2971 -0.004827687097720198
+1445 2971 -1299.463572095045
+1447 2971 -1393.724096630088
+597 2972  0.000227180506082851
+797 2972 -0.004751796082881135
+1690 2972 -1443.035879728521
+1692 2972 -1438.401804291923
+198 2973  1.924427533902808e-05
+398 2973 -0.000358519151350955
+1448 2973 -51.11619948659765
+1449 2973 -56.91674680233886
+1450 2973 -54.13080652643053
+1850 2973 -64.84305238357298
+1907 2973 -53.49842881762557
+598 2974  1.541997157145382e-05
+798 2974 -0.0003224467177012544
+1693 2974 -68.68887955759087
+1694 2974 -65.35317307855226
+1695 2974 -49.99682031538818
+2095 2974 -63.00144864963377
+2152 2974 -51.36676192902856
+199 2975  1.845055124298193e-05
+399 2975 -0.0003430998893339208
+1452 2975 -39.11433892927088
+1453 2975 -69.39344058325088
+1801 2975 -60.96288613393003
+1916 2975 -49.97345032394828
+1941 2975 -85.32588697691688
+599 2976  1.355929089197479e-05
+799 2976 -0.0002835617900106833
+1697 2976 -102.5526269566786
+1698 2976 -52.11952350302194
+2046 2976 -56.76693463695905
+2161 2976 -66.74817920758088
+2186 2976 -54.00562981105288
+200 2977  1.271450383568884e-05
+400 2977 -0.000237119492924652
+1370 2977 -55.1131720540257
+1457 2977 -45.6431060305684
+1875 2977 -59.31030352171324
+1944 2977 -29.11525814204184
+1945 2977 -16.87783507179551
+1946 2977 -29.27401382993264
+600 2978  8.800095713229208e-06
+800 2978 -0.0001839880046561397
+1615 2978 -49.91127100320831
+1702 2978 -51.84652767986045
+2120 2978 -63.98358437997059
+2189 2978 -29.28485320635267
+2190 2978 -20.84064754660397
+2191 2978 -36.23593022245935
+201 2979  3.429173053633755e-05
+401 2979 -0.0006394846353445878
+1454 2979 -94.18004615832454
+1458 2979 -95.82820923136651
+1878 2979 -137.220649291519
+1886 2979 -119.2628629587167
+601 2980  2.42057119496486e-05
+801 2980 -0.0005060858793490622
+1699 2980 -106.6011780216583
+1703 2980 -118.3393931588882
+2123 2980 -156.3984297145197
+2131 2980 -126.3034594719376
+88 2981  0.5765945406640466
+202 2981  0.004704415141495417
+402 2981 -0.08776236008052875
+1455 2981 -6186.640086873116
+602 2982  0.001228820363981551
+802 2982 -0.0256910783553563
+1700 2982 -43002.51911591569
+89 2983  0.5085834639940715
+203 2983  0.004200021129835351
+403 2983 -0.07834503334779185
+1456 2983 -9258.546324954847
+603 2984  0.001028630443507459
+803 2984 -0.02150584016679073
+1701 2984 -64484.86655730321
+204 2985  0.0002910577290827514
+404 2985 -0.005425436839739358
+1948 2985 -1179.473328514326
+1949 2985 -1112.363081683879
+604 2986  0.0001942101919334908
+804 2986 -0.004060328656766309
+2193 2986 -1519.09897684033
+2194 2986 -1656.528852030899
+205 2987  7.220011983011933e-05
+405 2987 -0.00134475543576407
+1460 2987 -294.7077344824907
+1461 2987 -297.7314148715081
+1952 2987 -329.5972945308118
+605 2988  6.122274390669451e-05
+805 2988 -0.001279590719005279
+1705 2988 -297.709055530353
+1706 2988 -331.8369949896509
+2197 2988 -355.4846212830439
+206 2989  0.000224096727052125
+406 2989 -0.004167960315020639
+1415 2989 -1403.69572640005
+1463 2989 -1319.761941130769
+606 2990  0.0002174236966432122
+806 2990 -0.004548139563342544
+1660 2990 -1547.059702964963
+1708 2990 -1131.21152477239
+207 2991  0.000290626767416135
+407 2991 -0.005408777291702704
+1331 2991 -1111.645130144312
+1954 2991 -1246.529432769099
+607 2992  0.0002089999771376453
+807 2992 -0.004369780664630857
+1576 2992 -1314.488490707493
+2199 2992 -1753.0575043452
+208 2993  0.0002284567159346352
+408 2993 -0.004248277560160953
+1451 2993 -1694.45394947356
+1465 2993 -1333.079786610828
+608 2994  0.0002225826697345396
+808 2994 -0.004654857896170716
+1696 2994 -1526.042604103454
+1710 2994 -1387.470519821244
+209 2995  0.0002221752697164552
+409 2995 -0.004138430725600249
+1467 2995 -1277.459248023892
+1956 2995 -1643.815562542815
+609 2996  0.0002239401818748291
+809 2996 -0.0046832586311171
+1712 2996 -1227.52274774633
+2201 2996 -1506.533916325817
+210 2997  2.138755265132116e-05
+410 2997 -0.0003986119327895535
+1876 2997 -95.11637636344641
+1958 2997 -65.95449814158825
+1959 2997 -8.723987554335546
+1960 2997 -43.6549692815422
+1961 2997 -183.7758384048604
+610 2998  1.553974479996341e-05
+810 2998 -0.0003248647658803822
+2121 2998 -96.50997380438551
+2203 2998 -52.15381884035941
+2204 2998 -18.93525889233717
+2205 2998 -80.38564175531182
+2206 2998 -91.28444390040092
+211 2999  3.369714148784347e-05
+411 2999 -0.0006277791128440753
+1468 2999 -113.8659171919261
+1472 2999 -125.6929813506389
+1914 2999 -102.6354740822629
+1950 2999 -119.8659568390688
+611 3000  2.716807484438463e-05
+811 3000 -0.0005678888436367579
+1713 3000 -137.0837482046144
+1717 3000 -119.1218935338928
+2159 3000 -126.9054310671548
+2195 3000 -111.9988300228975
+90 3001  0.6653230544623372
+212 3001  0.005390424631237284
+412 3001 -0.1004752481225429
+1469 3001 -3403.375662059264
+612 3002  0.001917255257562923
+812 3002 -0.04008099176668933
+1714 3002 -14912.84496887192
+91 3003  0.4974529583688729
+213 3003  0.00413020803099228
+413 3003 -0.07698122934126954
+1470 3003 -9244.38449451425
+613 3004  0.001120828337875203
+813 3004 -0.02343137807364655
+1715 3004 -51511.86319008204
+214 3005  0.0002345475086669507
+414 3005 -0.004363396788473575
+1963 3005 -1123.060704870721
+1964 3005 -1962.678855914129
+614 3006  0.0002241843674551126
+814 3006 -0.004688747582473832
+2208 3006 -1016.038939862732
+2209 3006 -2116.689327493415
+215 3007  3.152944354031921e-05
+415 3007 -0.0005865256311667257
+1473 3007 -103.5178172597545
+1475 3007 -193.3276841043456
+1476 3007 -102.3339494307426
+1942 3007 -88.67226391527699
+615 3008  2.762658094449223e-05
+815 3008 -0.000577763345796909
+1718 3008 -120.7045050005255
+1720 3008 -80.20121388753263
+1721 3008 -113.9874410285411
+2187 3008 -185.2524967505786
+216 3009  7.021648346571437e-05
+416 3009 -0.00130599598075002
+1478 3009 -257.0424342781794
+1479 3009 -364.6469827985827
+1967 3009 -305.81056344102
+616 3010  6.07496359260314e-05
+816 3010 -0.001270615822728721
+1723 3010 -296.6587145617269
+1724 3010 -347.8089509424088
+2212 3010 -324.405753550522
+217 3011  0.0002629900627038044
+417 3011 -0.004890840474051387
+1790 3011 -1400.763427944943
+1882 3011 -1136.647709911751
+617 3012  0.0002239249658143049
+817 3012 -0.004682511701431127
+2035 3012 -1063.361714528966
+2127 3012 -1809.384404896597
+218 3013  0.0002593440111466169
+418 3013 -0.004824851841231687
+1822 3013 -1148.338107851003
+1915 3013 -1535.226081909187
+618 3014  0.000236317776079967
+818 3014 -0.004941326446914052
+2067 3014 -1239.624856043634
+2160 3014 -1513.186311996676
+219 3015  3.244177482910733e-05
+419 3015 -0.0006055799728042041
+1481 3015 -131.7144595143791
+1814 3015 -116.1043230776163
+1970 3015 -79.95166156859864
+1972 3015 -159.5652172094772
+619 3016  2.356441270887905e-05
+819 3016 -0.0004928617663929631
+1726 3016 -145.4276641880034
+2059 3016 -164.7201130197417
+2215 3016 -67.84254400099019
+2217 3016 -193.8165634075667
+220 3017  7.768080563995795e-05
+420 3017 -0.001449863056352223
+1373 3017 -354.9809878986568
+1441 3017 -293.7957542138436
+1480 3017 -225.3379392805973
+620 3018  6.37038810105057e-05
+820 3018 -0.001332244617993727
+1618 3018 -335.793574863611
+1686 3018 -356.9125375460848
+1725 3018 -253.3858185062365
+221 3019  7.320703537932268e-06
+421 3019 -0.0001362209906944794
+1487 3019 -34.60754748070792
+1936 3019 -32.18487755999478
+1973 3019 -24.21142044251713
+1974 3019 -16.17329708469773
+1975 3019 -16.79039672305305
+1976 3019 -19.18063033824095
+1978 3019 -58.99846518694028
+621 3020  6.279195576451717e-06
+821 3020 -0.0001313598373684713
+1732 3020 -56.09349534828303
+2181 3020 -32.32185579454295
+2218 3020 -19.59324371202498
+2219 3020 -19.41412540093749
+2220 3020 -18.56084480886999
+2221 3020 -16.81796495425821
+2223 3020 -44.64235389450946
+222 3021  1.858157080718637e-05
+422 3021 -0.0003457132849898687
+1301 3021 -62.98405706157925
+1344 3021 -60.13908113984031
+1483 3021 -70.31286063739829
+1489 3021 -58.90809499914865
+1804 3021 -55.21857342172993
+622 3022  1.580870884158368e-05
+822 3022 -0.0003306921046308134
+1546 3022 -60.9315937244375
+1589 3022 -60.74879639706707
+1728 3022 -88.35690904464344
+1734 3022 -57.28389550104841
+2049 3022 -54.48808992798245
+21 3023  355.1643329947775
+223 3023  0.000414588079576942
+423 3023 -0.007716759589340684
+1484 3023 -1887.480809470994
+64 3024  373.243500456582
+623 3024  0.000365824983912368
+823 3024 -0.007653672082923691
+1729 3024 -1808.976355162421
+22 3025  371.7856140819339
+224 3025  0.0004090754753228922
+424 3025 -0.007612824341018029
+1485 3025 -1862.905571445791
+65 3026  423.2601696305773
+624 3026  0.0003753799864904563
+824 3026 -0.007853582503672928
+1730 3026 -1529.723973180399
+23 3027  359.0699905626743
+225 3027  0.0003948065156009082
+425 3027 -0.007347018696585064
+1486 3027 -2089.757450012758
+66 3028  332.6331657750853
+625 3028  0.0003868371757116728
+825 3028 -0.008093282368846372
+1731 3028 -1878.631462550398
+226 3029  1.605819002986071e-05
+426 3029 -0.0002992883659006699
+1471 3029 -129.5729114917453
+1491 3029 -55.77574949647519
+1877 3029 -62.65563298356832
+1947 3029 -57.88255988036589
+1980 3029 -39.32522531591619
+626 3030  1.347322955615387e-05
+826 3030 -0.0002816718078576912
+1716 3030 -77.01686009671205
+1736 3030 -53.24994360811269
+2122 3030 -85.04968168489033
+2192 3030 -94.41087219520271
+2225 3030 -38.53172034872582
+227 3031  0.0002744085827181243
+427 3031 -0.005113009743909019
+1490 3031 -1148.685319889543
+1951 3031 -1462.759816234802
+627 3032  0.000232641750217292
+827 3032 -0.004862970717328313
+1735 3032 -1284.461857125257
+2196 3032 -1596.142391375567
+228 3033  0.000315555720187749
+428 3033 -0.005879952690476989
+1459 3033 -894.1445427962663
+1492 3033 -1177.236190688291
+628 3034  0.0001758353255510753
+828 3034 -0.003676111486126634
+1704 3034 -2616.969987642703
+1737 3034 -1083.556762405448
+229 3035  0.0002451618647443908
+429 3035 -0.004558675471144818
+1315 3035 -1421.489364558306
+1477 3035 -1333.404649458178
+629 3036  0.0001960485134623598
+829 3036 -0.004100030848673585
+1560 3036 -1970.719501164815
+1722 3036 -1307.863076710002
+230 3037  0.0002235717635631153
+430 3037 -0.004159338793200366
+1856 3037 -1119.776904764913
+1965 3037 -1984.935281822672
+630 3038  0.0002387670275501264
+830 3038 -0.004991865811779856
+2101 3038 -1679.799948366659
+2210 3038 -1043.800098785419
+231 3039  6.355058706178921e-06
+431 3039 -0.0001182663255270315
+1474 3039 -27.2211092658337
+1977 3039 -28.87681478961062
+1981 3039 -19.26799730716725
+1983 3039 -24.40551081717152
+1984 3039 -34.66205988832636
+1985 3039 -39.08905611715512
+1986 3039 -25.73482602625847
+631 3040  5.168868543624866e-06
+831 3040 -0.0001081325281897822
+1719 3040 -23.57121050397152
+2222 3040 -44.43020643761637
+2226 3040 -18.52397709846915
+2228 3040 -20.32550666111602
+2229 3040 -31.17736956735129
+2230 3040 -30.83346999517691
+2231 3040 -42.77542700610918
+232 3041  1.761027469906141e-05
+432 3041 -0.000327730907480755
+1395 3041 -59.68238994762429
+1493 3041 -81.51090935305866
+1497 3041 -51.28046095625373
+1498 3041 -62.49294649216096
+1968 3041 -49.74533438256115
+632 3042  1.571190151769112e-05
+832 3042 -0.0003286879780215192
+1640 3042 -66.88919874628864
+1738 3042 -84.06953056548426
+1742 3042 -59.09372182667386
+1743 3042 -44.87012523624564
+2213 3042 -56.95477588403464
+24 3043  301.804028950772
+233 3043  0.0003339577621497702
+433 3043 -0.006215092193261671
+1494 3043 -3724.14417902935
+67 3044  634.1949520085401
+633 3044  0.000264159408233615
+833 3044 -0.005526660401051663
+1739 3044 -2254.257229613936
+25 3045  293.7476901038987
+234 3045  0.0003263861015757376
+434 3045 -0.00607418045573903
+1495 3045 -4064.969791447917
+68 3046  629.0610030966138
+634 3046  0.0002540803250229119
+834 3046 -0.005315791513777863
+1740 3046 -2504.604832410719
+235 3047  0.000261318627407984
+435 3047 -0.004863454220574115
+1795 3047 -1328.059271078283
+1842 3047 -1353.938445456118
+635 3048  0.000215751906695563
+835 3048 -0.004512686485115116
+2040 3048 -1507.095428153633
+2087 3048 -1528.913791925641
+236 3049  0.0002593250911130165
+436 3049 -0.00483017161704053
+1499 3049 -1119.511295530479
+1500 3049 -1470.147478176272
+636 3050  0.0002173382328317587
+836 3050 -0.004545857556180593
+1744 3050 -1236.100717223117
+1745 3050 -1660.641795945935
+237 3051  0.0002680207291845583
+437 3051 -0.004986977865485246
+1883 3051 -1335.596838688865
+1989 3051 -1142.59645115147
+637 3052  0.0002149933824167279
+837 3052 -0.004495843450029012
+2128 3052 -1583.280905696588
+2234 3052 -1303.604343599536
+238 3053  0.0002673059275966173
+438 3053 -0.004972539521710323
+1835 3053 -1297.830937438026
+1987 3053 -1286.699352558668
+638 3054  0.0002247913073730503
+838 3054 -0.004701903383526127
+2080 3054 -1446.166309523558
+2232 3054 -1428.515318978645
+239 3055  3.086294218884017e-05
+439 3055 -0.0005739031613858375
+1501 3055 -94.94610178907207
+1502 3055 -120.1015515613789
+1806 3055 -130.8728852516794
+1943 3055 -127.4445013018118
+639 3056  2.871344757710019e-05
+839 3056 -0.0006004851677575757
+1746 3056 -109.9588006974711
+1747 3056 -129.8071459044353
+2051 3056 -105.6218717573673
+2188 3056 -128.594774536177
+240 3057  0.0002328109278869417
+440 3057 -0.004328790316093996
+1427 3057 -1792.17746207041
+1937 3057 -1209.039048347723
+640 3058  0.0002236223011459347
+840 3058 -0.004676543895854219
+1672 3058 -1452.412581790726
+2182 3058 -1436.006295719475
+241 3059  0.0002641904594502731
+441 3059 -0.004925256200277052
+1908 3059 -1319.134352142724
+1932 3059 -1197.283781547747
+641 3060  0.0002168929556922801
+841 3060 -0.004534651717942856
+2153 3060 -1384.758308839667
+2177 3060 -1480.003424652446
+242 3061  0.0002683021695836888
+442 3061 -0.00499437409528073
+1982 3061 -1020.895360476986
+1993 3061 -1545.590542610641
+642 3062  0.000214433397151115
+842 3062 -0.004483357193906658
+2227 3062 -1350.897822356165
+2238 3062 -1591.552719889067
+243 3063  0.0002456464353693151
+443 3063 -0.004571499010522023
+1504 3063 -1281.197517233999
+1995 3063 -1425.924800186832
+643 3064  0.0002118857100087749
+843 3064 -0.004431863029243509
+1749 3064 -1380.279131732715
+2240 3064 -1561.56620828576
+244 3065  8.679696529177102e-05
+444 3065 -0.001617281983965069
+1507 3065 -319.687894463376
+1912 3065 -342.1330151030756
+1996 3065 -171.6721801828181
+644 3066  6.668929891919444e-05
+844 3066 -0.001393861160763459
+1752 3066 -323.4191825769936
+2157 3066 -354.5380781683582
+2241 3066 -238.5976480186172
+92 3067  0.5674518919590757
+245 3067  0.004650627025322416
+445 3067 -0.08668664125278241
+1506 3067 -6333.352783592504
+645 3068  0.001216300194894553
+845 3068 -0.02542108586614161
+1751 3068 -43193.72331192907
+246 3069  0.0002662216516774879
+446 3069 -0.004948899205940827
+1802 3069 -1401.226384116477
+1991 3069 -1245.08530407766
+646 3070  0.0002291693448236669
+846 3070 -0.00479207219007637
+2047 3070 -1547.903118801705
+2236 3070 -1333.656774685993
+247 3071  7.51138059276294e-06
+447 3071 -0.0001398607764914253
+1430 3071 -45.05797616799539
+1815 3071 -31.29687216661689
+1998 3071 -31.13094626304603
+1999 3071 -3.550926574038505
+2000 3071 -13.48124907748484
+2001 3071 -13.95763763671764
+2002 3071 -14.40960272507699
+2003 3071 -14.58499342652336
+647 3072  6.287606542843668e-06
+847 3072 -0.0001315370479632399
+1675 3072 -48.83113539159432
+2060 3072 -33.81724303530061
+2243 3072 -32.29231243943666
+2244 3072 -4.08462782159169
+2245 3072 -13.92596672443947
+2246 3072 -14.22224897443281
+2247 3072 -12.94220849005653
+2248 3072 -12.95883236672648
+248 3073  3.004103046734714e-05
+448 3073 -0.0005593298618830121
+1508 3073 -156.9536240366783
+1921 3073 -113.7824108068532
+2004 3073 -105.446862001134
+2005 3073 -106.4028375448804
+648 3074  2.413112609046681e-05
+848 3074 -0.0005048145130488554
+1753 3074 -182.1552338439317
+2166 3074 -114.9637094411854
+2249 3074 -103.0305453695494
+2250 3074 -123.3658773366547
+26 3075  48.14115697103193
+249 3075  0.001207340810307569
+449 3075 -0.02248546792889855
+1509 3075 -1386.841317910654
+69 3076  93.45345366175701
+649 3076  0.000863574471068328
+849 3076 -0.01806741737098674
+1754 3076 -1021.427694239394
+27 3077  298.8536823264385
+250 3077  0.0004614895303912071
+450 3077 -0.008593508936690492
+1510 3077 -1652.528923853896
+70 3078  332.535475221442
+650 3078  0.0004260522383241782
+850 3078 -0.008913735735757969
+1755 3078 -1356.306668125053
+28 3079  280.2402751866119
+251 3079  0.0004630428688442491
+451 3079 -0.008622952094502315
+1511 3079 -1759.776539672839
+71 3080  306.4518132610639
+651 3080  0.0004160262001499919
+851 3080 -0.008703965704582829
+1756 3080 -1550.501925752894
+29 3081  292.0666383956488
+252 3081  0.0004532491959284241
+452 3081 -0.008440059637270876
+1512 3081 -1762.230597412185
+72 3082  330.915251155541
+652 3082  0.0004138991064746598
+852 3082 -0.008659467467314986
+1757 3082 -1461.513220882238
+30 3083  291.2499991968344
+253 3083  0.0004521454893938125
+453 3083 -0.008419507258893049
+1513 3083 -1778.766172586407
+73 3084  330.1517854407101
+653 3084  0.0004130347731292516
+853 3084 -0.008641380035250229
+1758 3084 -1474.31463330485
+254 3085  5.598515203854433e-05
+454 3085 -0.001041762786317972
+1488 3085 -657.2647720227772
+1496 3085 -337.815695541272
+2006 3085 -198.5952483373434
+654 3086  4.565100404794502e-05
+854 3086 -0.0009550099367320633
+1733 3086 -467.2993939817052
+1741 3086 -582.1294820043491
+2251 3086 -212.104160974466
+255 3087  0.0002533575753207605
+455 3087 -0.004714351957937914
+1516 3087 -1214.20785479098
+1906 3087 -1275.123506533136
+655 3088  0.0001830830851796531
+855 3088 -0.003830083690845991
+1761 3088 -1356.93253093293
+2151 3088 -1875.357477626669
+256 3089  2.985054127319469e-05
+456 3089 -0.0005559936336606194
+1322 3089 -100.9659996956966
+1336 3089 -102.1576650288912
+1832 3089 -116.1443434772085
+1899 3089 -157.9270743656402
+656 3090  2.73993020604243e-05
+856 3090 -0.0005731648617574944
+1567 3090 -115.1682425790467
+1581 3090 -110.9830147806093
+2077 3090 -117.3115075315568
+2144 3090 -131.9375715502208
+257 3091  6.412379987319553e-05
+457 3091 -0.001192950010299268
+1359 3091 -295.1726851067902
+1397 3091 -464.5939628653576
+1879 3091 -241.1777446051989
+657 3092  6.664285539255191e-05
+857 3092 -0.001393809984053777
+1604 3092 -315.137283850678
+1642 3092 -216.0334906512935
+2124 3092 -404.166050448459
+258 3093  5.25703542360426e-05
+458 3093 -0.0009782249711306297
+1398 3093 -222.6965047597604
+1518 3093 -281.3424527503379
+2007 3093 -700.4947699391313
+658 3094  6.691396143939732e-05
+858 3094 -0.001399882851998593
+1643 3094 -330.3011979043866
+1763 3094 -471.7651348192199
+2252 3094 -160.9760242098117
+31 3095  3.449093081313446
+259 3095  0.0009048698354838305
+459 3095 -0.01683773791286734
+1517 3095 -35275.65056794864
+74 3096  478.2477416883492
+659 3096  0.0004190976423508806
+859 3096 -0.008768229606671188
+1762 3096 -922.2752719137213
+260 3097  0.0002540711174143024
+460 3097 -0.004728474636470728
+1503 3097 -1453.267111123336
+1966 3097 -1219.03743806919
+660 3098  0.0002170672928360428
+860 3098 -0.004538751110163672
+1748 3098 -1629.841174651477
+2211 3098 -1306.498645974566
+261 3099  4.246187106963387e-06
+461 3099 -7.909564314661077e-05
+1466 3099 -14.58657917675677
+1526 3099 -10.68753553673826
+1851 3099 -16.10476514667766
+2009 3099 -12.04655336420402
+2010 3099 -12.11364270258297
+2011 3099 -8.22902423866134
+2012 3099 -9.209673704385494
+2013 3099 -7.169396300539337
+2014 3099 -7.905394811720712
+2015 3099 -6.913440311217936
+661 3100  9.006559059406882e-06
+861 3100 -0.0001883669172606672
+1711 3100 -33.99171600412565
+1771 3100 -39.8313288230236
+2096 3100 -33.35710566176917
+2254 3100 -6.210870714987588
+2255 3100 -6.513390408795219
+2256 3100 -4.640420835507132
+2257 3100 -4.738916599895016
+2258 3100 -4.118661769723557
+2259 3100 -4.457141098502226
+2260 3100 -3.898893283827271
+32 3101  4.629564668789765
+262 3101  0.001114727656952293
+462 3101 -0.02076453503788485
+1519 3101 -15081.1485121053
+75 3102  7.67469753523842
+662 3102  0.001934718172261428
+862 3102 -0.04046801465160255
+1764 3102 -2477.242565758394
+33 3103  4.560258670279392
+263 3103  0.001101692355154358
+463 3103 -0.02052180865954734
+1520 3103 -16322.74803150922
+76 3104  227.0111876369949
+663 3104  0.0006652306770844735
+863 3104 -0.013917741230942
+1765 3104 -695.4491731006095
+34 3105  5.09241050532172
+264 3105  0.001203469082965028
+464 3105 -0.02241755093136628
+1521 3105 -11661.81866319783
+77 3106  9.553096348683425
+664 3106  0.001967551209388498
+864 3106 -0.04115730664256551
+1766 3106 -1877.352469934645
+35 3107  5.010651825296775
+265 3107  0.001189571893627759
+465 3107 -0.02215869240460923
+1522 3107 -12071.07001909268
+78 3108  8.110850832086975
+665 3108  0.002041158694041396
+865 3108 -0.04269440434322424
+1767 3108 -2058.913294513919
+36 3109  5.242946874727973
+266 3109  0.001235207912293277
+466 3109 -0.0230087751155384
+1523 3109 -10603.54130606578
+79 3110  10.39623748337741
+666 3110  0.00198259809923097
+866 3110 -0.04147286882309242
+1768 3110 -1669.969527065347
+37 3111  5.144271915952205
+267 3111  0.001213907246388981
+467 3111 -0.02261199800075029
+1524 3111 -11318.43109210416
+80 3112  9.643129255484244
+667 3112  0.001984600471653515
+867 3112 -0.04151398331184361
+1769 3112 -1822.079812004944
+38 3113  5.301254453954041
+268 3113  0.001248064342159848
+468 3113 -0.02324824654993116
+1525 3113 -10195.65103652299
+81 3114  9.910899138700378
+668 3114  0.002041914114836727
+868 3114 -0.04271285292823011
+1770 3114 -1642.48231480523
+269 3115  0.0002455808494513976
+469 3115 -0.004574455857323951
+1527 3115 -1395.812439078742
+2018 3115 -1319.125546000257
+669 3116  0.0002175189436278616
+869 3116 -0.004549463760823265
+1772 3116 -1453.855216625808
+2263 3116 -1422.759825760848
+270 3117  0.0002583346492311023
+470 3117 -0.004812246867113213
+2019 3117 -1426.386550071892
+2020 3117 -1051.60330096319
+670 3118  0.0002195538691403081
+870 3118 -0.004591087669232101
+2264 3118 -1285.913026873232
+2265 3118 -1416.901510606822
+271 3119  0.0002662424078920341
+471 3119 -0.004960285432200295
+1531 3119 -1116.146860423355
+1532 3119 -1395.069189494207
+671 3120  0.0002056555622572706
+871 3120 -0.004300400914613589
+1776 3120 -1704.53913758438
+1777 3120 -1328.68635440853
+272 3121  8.403976013422934e-05
+472 3121 -0.001566447111419446
+1533 3121 -239.1182110177501
+1938 3121 -293.3131463113477
+2021 3121 -273.2060828791963
+672 3122  5.456576583106384e-05
+872 3122 -0.001141007317637787
+1778 3122 -324.8318137854403
+2183 3122 -313.4339444900776
+2266 3122 -374.8291340849071
+273 3123  0.0002445586324395708
+473 3123 -0.004555614778039736
+1957 3123 -1425.284446253262
+2022 3123 -1251.592013646793
+673 3124  0.0002265618216088804
+873 3124 -0.004737768664129753
+2202 3124 -1232.001050341556
+2267 3124 -1495.038796569758
+274 3125  0.0002479871234677247
+474 3125 -0.004615638198090777
+1432 3125 -1440.733981857976
+1994 3125 -1288.009326238795
+674 3126  0.0002149306512683066
+874 3126 -0.004495757699438218
+1677 3126 -1567.10699891207
+2239 3126 -1384.784060943572
+275 3127  7.669511520069878e-05
+475 3127 -0.00142856435158415
+1348 3127 -264.9193537345231
+1391 3127 -338.2005407012112
+1997 3127 -272.0493857089738
+675 3128  6.434492811419656e-05
+875 3128 -0.00134479006259534
+1593 3128 -307.2139916977106
+1636 3128 -310.2635904187793
+2242 3128 -316.8133947953895
+276 3129  6.873824789383105e-05
+476 3129 -0.001281196436983179
+1910 3129 -493.1627696346783
+1971 3129 -298.3132764183199
+2024 3129 -221.2635879075791
+676 3130  5.150654637627023e-05
+876 3130 -0.001077116562284023
+2155 3130 -399.3472933098862
+2216 3130 -456.729207514713
+2269 3130 -272.8671228494698
+277 3131  7.621293902825032e-05
+477 3131 -0.001420343791668607
+1534 3131 -237.9626783531122
+1939 3131 -298.238748377585
+2016 3131 -340.5820452017388
+677 3132  5.617823458389736e-05
+877 3132 -0.001174808882995873
+1779 3132 -374.8764564904459
+2184 3132 -300.1393535459937
+2261 3132 -337.8295892423019
+278 3133  3.384974683660592e-05
+478 3133 -0.0006305375141651542
+1529 3133 -128.0685014752367
+1535 3133 -123.7987667072477
+1536 3133 -78.70049630917389
+1852 3133 -101.7537554994466
+678 3134  2.404372801287854e-05
+878 3134 -0.0005027950584220524
+1774 3134 -92.26802542553942
+1780 3134 -72.56570109333919
+1781 3134 -212.1907537540941
+2097 3134 -149.0690573659064
+279 3135  2.096412860650296e-05
+479 3135 -0.0003901485068014212
+1364 3135 -80.63995640005464
+1539 3135 -77.09008222118388
+2008 3135 -95.10938060848929
+2027 3135 -25.80608281993659
+2028 3135 -27.74274388077859
+679 3136  1.717879208496453e-05
+879 3136 -0.0003593726340029264
+1609 3136 -80.2193526426998
+1784 3136 -84.27337155944021
+2253 3136 -92.01703556881691
+2272 3136 -30.20856579477716
+2273 3136 -28.4454237197186
+39 3137  32.64302008813611
+280 3137  0.001117288914086055
+480 3137 -0.02080039878504665
+1537 3137 -2751.002445075206
+82 3138  154.3592785305395
+680 3138  0.0006055238470906594
+880 3138 -0.01266857423038254
+1782 3138 -1444.925587559849
+40 3139  36.65251244868855
+281 3139  0.001054854527421347
+481 3139 -0.0196355765396314
+1538 3139 -2761.360034218368
+83 3140  111.6726634543674
+681 3140  0.00066417680046111
+881 3140 -0.0138956857388481
+1783 3140 -1698.599903687457
+282 3141  0.0002196191015981265
+482 3141 -0.004090434994215512
+1890 3141 -2057.918514698858
+1940 3141 -972.8769469454667
+682 3142  0.000254445809205194
+882 3142 -0.005321133682834458
+2135 3142 -949.4160584467323
+2185 3142 -1428.191459747777
+283 3143  0.0002467932626555914
+483 3143 -0.004593389096329082
+1505 3143 -1443.098497561728
+1514 3143 -1280.586129651733
+683 3144  0.0002125568346125282
+883 3144 -0.004446072187239606
+1750 3144 -1565.415007126418
+1759 3144 -1396.852787898307
+284 3145  7.450142895904204e-05
+484 3145 -0.001386907437178993
+1464 3145 -253.7591971467399
+1913 3145 -306.5243546068534
+1988 3145 -343.6950631335396
+684 3146  6.56850785054022e-05
+884 3146 -0.001373362866427219
+1709 3146 -430.325567156862
+2158 3146 -289.7082847871828
+2233 3146 -242.1780927811974
+285 3147  2.829020688581102e-05
+485 3147 -0.0005283436953610114
+1482 3147 -138.2283842007703
+1861 3147 -127.9110281261581
+2030 3147 -106.3422401541953
+2031 3147 -166.413571525405
+685 3148  2.157615448738946e-05
+885 3148 -0.0004513182899802171
+1727 3148 -166.2264057254376
+2106 3148 -127.8803524612996
+2275 3148 -98.27206692230219
+2276 3148 -218.9673369039903
+286 3149  3.417957292668527e-05
+486 3149 -0.000638010057411905
+1438 3149 -112.4245790619285
+1540 3149 -131.7196097356496
+1933 3149 -119.1012159433853
+2023 3149 -82.4528305498375
+686 3150  2.58604492209297e-05
+886 3150 -0.0005408106592783898
+1683 3150 -108.0991094828331
+1785 3150 -146.8985247574478
+2178 3150 -108.7083889381549
+2268 3150 -125.4140836404946
+41 3151  0.2816487510175935
+287 3151  0.003283370210303194
+487 3151 -0.06133865450778468
+1541 3151 -45335.1954409973
+84 3152  637.526528274368
+687 3152  0.0002595670747658185
+887 3152 -0.00543058103861438
+1786 3152 -2504.997784199022
+288 3153  0.0002518348855609722
+488 3153 -0.004690277097936834
+1900 3153 -1149.893986737135
+2017 3153 -1429.354425178098
+688 3154  0.0002022003039771082
+888 3154 -0.004228762010147807
+2145 3154 -1668.568919153097
+2262 3154 -1331.391267344377
+289 3155  0.0002411624189112242
+489 3155 -0.004491600758461845
+1930 3155 -1329.895116223397
+2025 3155 -1278.727802451221
+689 3156  0.0002396620483604543
+889 3156 -0.005012410749634397
+2175 3156 -1346.893561357031
+2270 3156 -1141.695109156205
+290 3157  7.539218264852078e-05
+490 3157 -0.001404078290862924
+1349 3157 -233.9428403939642
+1462 3157 -345.0634879409996
+1962 3157 -323.5366796927644
+690 3158  5.947442365670711e-05
+890 3158 -0.001243009510545163
+1594 3158 -349.4178227246429
+1707 3158 -341.395838786864
+2207 3158 -303.9354889996754
+291 3159  1.200272275159141e-05
+491 3159 -0.0002232081554610602
+1302 3159 -36.74905919231615
+1807 3159 -36.18476976809642
+1918 3159 -33.71929484844811
+1955 3159 -40.0865932868005
+1979 3159 -34.20546235418676
+1992 3159 -39.06108881613515
+691 3160  1.034576531382441e-05
+891 3160 -0.0002163685141143933
+1547 3160 -38.42657784269577
+2052 3160 -45.27355828678601
+2163 3160 -33.91929258744634
+2200 3160 -35.70217191761186
+2224 3160 -34.58560243700298
+2237 3160 -38.23756381734963
+292 3161  6.96324664263306e-05
+492 3161 -0.001296142399366137
+1515 3161 -284.5560271729748
+1923 3161 -314.9941849685434
+2029 3161 -312.8351201515704
+692 3162  5.61734866346371e-05
+892 3162 -0.001175077666104349
+1760 3162 -339.9363785417463
+2168 3162 -327.4234482652935
+2274 3162 -335.1570659698852
+293 3163  1.522298845113386e-05
+493 3163 -0.0002836445861316432
+1528 3163 -48.07226215433916
+1911 3163 -53.90619683861826
+1990 3163 -50.03970959544274
+2032 3163 -70.19438117818132
+2033 3163 -78.02760680056846
+693 3164  2.097430373839078e-05
+893 3164 -0.0004387420733795854
+1773 3164 -88.07077665502878
+2156 3164 -94.29553748866022
+2235 3164 -102.3315584238236
+2277 3164 -22.04467690843927
+2278 3164 -19.15705378024538
+42 3165  4.071309785349727
+294 3165  0.001033902935611
+494 3165 -0.01926436789258804
+1542 3165 -21344.46684810789
+85 3166  351.8137876203253
+694 3166  0.0005656688027113958
+894 3166 -0.01183475115907131
+1787 3166 -647.7168499720574
+43 3167  4.307158971381956
+295 3167  0.001080160651772333
+495 3167 -0.02012629165347246
+1543 3167 -18576.65367398521
+86 3168  375.2293748346238
+695 3168  0.0005436438887964516
+895 3168 -0.01137394716537204
+1788 3168 -647.2932388607724
+296 3169  0.0002274052897093193
+496 3169 -0.004229346815530425
+1953 3169 -1331.327148348726
+1969 3169 -1474.263314614031
+696 3170  0.0002205299244709283
+896 3170 -0.004612588747673908
+2198 3170 -1143.60129135068
+2214 3170 -1622.883696919502
+297 3171  8.191795508733727e-05
+497 3171 -0.001526302574451327
+1339 3171 -285.6892959186181
+1530 3171 -271.5408599549471
+2026 3171 -258.8888628284263
+697 3172  5.567172963433427e-05
+897 3172 -0.001164167913108969
+1584 3172 -303.8666561211577
+1775 3172 -270.4226421310651
+2271 3172 -447.3631435643646
+298 3173  0.0002440296582495396
+498 3173 -0.004539146102095633
+1857 3173 -1280.777882156608
+1934 3173 -1356.531427998502
+698 3174  0.0002104689560761738
+898 3174 -0.004402141629369785
+2102 3174 -1348.72810908421
+2179 3174 -1519.469895890335
+899 3175  1.206910571484867
+900 3175 -0.6557243053380895
+1099 3175  3.90179844422034
+1100 3175 -0.4467658871986893
+1299 3175  0.3323231709594552
+899 3176 -0.964177128949417
+900 3176  0.5238452566749743
+1099 3176 -3.117398006170587
+1100 3176  0.3569501217106127
+1789 3176  1.077927120069085
+899 3177 -0.2280781806545443
+900 3177  0.1239167259828344
+1099 3177  18.11856811951377
+1100 3177 -2.078221001294088
+1544 3177  0.4209469588586773
+899 3178  0.182145342635277
+900 3178 -0.09896104242681487
+1099 3178 -14.49628017496882
+1100 3178  1.662740239879939
+2034 3178  1.362279335536674
+899 3179 -0.2666484393421067
+1017 3179  0.6965288550619865
+1099 3179 -0.6755533116091292
+1217 3179  0.4831540182231924
+1300 3179  12.11450772465217
+899 3180  0.2976922456375642
+1017 3180 -0.7776202985712402
+1099 3180  0.748415362690662
+1217 3180 -0.5351332038396536
+1790 3180  6.707319778353869
+899 3181  0.04150889810448934
+1017 3181 -0.1084279560868184
+1099 3181 -4.388112379693994
+1217 3181  3.222143654814309
+1545 3181  11.80994163900174
+899 3182 -0.04691994301089037
+1017 3182  0.1225624806414849
+1099 3182  4.793052405651258
+1217 3182 -3.519407860941207
+2035 3182  6.593989719295108
+899 3183  0.2992074794146934
+1022 3183 -0.5004641350120078
+1099 3183  0.7019805848906145
+1222 3183 -1.34894428731261
+1301 3183  5.029592871399626
+899 3184 -0.3082137931705407
+1022 3184  0.5155283875242916
+1099 3184 -0.7170782280222106
+1222 3184  1.379371250818873
+1791 3184  4.398381781048775
+899 3185 -0.04283097769707251
+1022 3185  0.07164048254012781
+1099 3185  4.688513878451054
+1222 3185 -8.08332465039811
+1546 3185  6.13394880971154
+899 3186  0.0469964530296306
+1022 3186 -0.07860779169062282
+1099 3186 -4.577372090822865
+1222 3186  7.890854922569274
+2036 3186  5.110489561178684
+899 3187  0.3659411458715927
+1091 3187 -0.378532286868665
+1099 3187  0.9345988945068904
+1291 3187 -0.9510122522007739
+1302 3187  5.668506884069838
+899 3188 -0.3866656521166111
+1091 3188  0.3999698727527718
+1099 3188 -0.9906143812408313
+1291 3188  1.008248356955873
+1792 3188  4.228609907569939
+899 3189 -0.05430488575489788
+1091 3189  0.05617338423090624
+1099 3189  5.460780976802373
+1291 3189 -5.89270443011472
+1547 3189  7.015232649381273
+899 3190  0.05564749987182292
+1091 3190 -0.0575621944201822
+1099 3190 -5.952678106564355
+1291 3190  6.423284179440299
+2037 3190  5.241250854683242
+901 3191  1.30916366888589
+902 3191 -0.7228885291144198
+1101 3191  2.396365901362685
+1102 3191 -0.507839750924587
+1303 3191  0.3692929538115763
+901 3192 -1.095661997402904
+902 3192  0.6049980674938733
+1101 3192 -2.005604434346106
+1102 3192  0.4250292779567368
+1793 3192  0.9388524004746962
+901 3193 -0.2127901063566257
+902 3193  0.1174975525597555
+1101 3193  12.9370165976562
+1102 3193 -2.74278713799485
+1548 3193  0.4683919690682322
+901 3194  0.1772971086094612
+902 3194 -0.0978991772419131
+1101 3194 -10.78395981212982
+1102 3194  2.286315857699142
+2038 3194  1.225211590363166
+901 3195  0.5107916688939119
+955 3195 -1.125276871701074
+1101 3195  0.8150861897290412
+1155 3195 -0.3265396605978775
+1304 3195  4.360425760376224
+901 3196 -0.477229303927426
+955 3196  1.051338796050469
+1101 3196 -0.7626219247567129
+1155 3196  0.3055666271798853
+1794 3196  6.290838699682876
+901 3197 -0.06890385651214917
+955 3197  0.1517955769952752
+1101 3197  4.805357363027642
+1155 3197 -2.057848389166554
+1549 3197  6.264117240664985
+901 3198  0.06209868527935691
+955 3198 -0.1368037471308435
+1101 3198 -4.635964505191407
+1155 3198  1.985217529071911
+2039 3198  9.150447768001063
+901 3199 -0.4283133356578697
+1035 3199  0.7494701150025187
+1101 3199 -0.6823995581967828
+1235 3199  0.7074154071969436
+1305 3199  4.242853200747289
+901 3200  0.4582406516171209
+1035 3200 -0.8018374523380369
+1101 3200  0.7310365971011838
+1235 3200 -0.757770830580615
+1795 3200  2.95180477919497
+901 3201  0.06118773248632485
+1035 3201 -0.1070673572020186
+1101 3201 -4.068874943070814
+1235 3201  4.025719241708225
+1550 3201  5.964960897183323
+901 3202 -0.06384302758694906
+1035 3202  0.1117136386944541
+1101 3202  4.453586165850771
+1235 3202 -4.406458593664035
+2040 3202  4.178709433689302
+903 3203  1.330583469556757
+904 3203 -0.6792687245591004
+1103 3203  0.4418790781503563
+1104 3203 -2.198584419259259
+1306 3203  0.3412920430536406
+903 3204 -1.114733838300572
+904 3204  0.5690765366396284
+1103 3204 -0.3702357374009728
+1104 3204  1.842120980065284
+1796 3204  0.8655873783036971
+903 3205 -0.2594613641476355
+904 3205  0.1324561697400607
+1103 3205  1.988830230508456
+1104 3205 -9.91214250431854
+1551 3205  0.4321994354326509
+903 3206  0.2165473412317848
+904 3206 -0.1105483719365449
+1103 3206 -1.662852877071636
+1104 3206  8.287501645404546
+2041 3206  1.118500047554219
+903 3207  0.3437697039398471
+946 3207 -1.586867364738098
+1103 3207  0.08312708337804624
+1146 3207 -2.361641607385089
+1307 3207  8.041398985395846
+903 3208 -0.3969711646061925
+946 3208  1.832449394568724
+1103 3208 -0.09553894960349026
+1146 3208  2.713143028094987
+1797 3208  4.357695569147362
+903 3209 -0.04965019816243767
+946 3209  0.2291891292739927
+1103 3209  0.5226757781751344
+1146 3209 -16.60084760747767
+1552 3209  7.857253594901273
+903 3210  0.05175900233798363
+946 3210 -0.2389235313648264
+1103 3210 -0.6062893774873906
+1146 3210  19.25433639519875
+2042 3210  5.635075703794728
+903 3211 -0.5852368430454002
+962 3211  1.262649342413203
+1103 3211 -0.1224846354050169
+1162 3211  1.309709728249127
+1308 3211  6.55614516809574
+903 3212  0.5883155616478667
+962 3212 -1.269291682288203
+1103 3212  0.1225743034582106
+1162 3212 -1.31124216056868
+1798 3212  6.271467987853487
+903 3213  0.08053041805249524
+962 3213 -0.1737444943984078
+1103 3213 -0.8243769016067968
+1162 3213  7.353509965567679
+1553 3213  8.881186521865489
+903 3214 -0.07227285678601035
+962 3214  0.1559287939226689
+1103 3214  0.8830238932831251
+1162 3214 -7.879175491863245
+2043 3214  9.472269231422434
+905 3215  0.6335410282029971
+906 3215 -0.3767309422255076
+1105 3215  4.67810007016215
+1106 3215 -1.631607376128249
+1309 3215  0.2531411838153556
+905 3216 -0.5055179512196509
+906 3216  0.3006028743159249
+1105 3216 -3.733990098942753
+1106 3216  1.302327940379904
+1799 3216  0.8258525634872588
+905 3217 -0.1182378866958986
+906 3217  0.07030936984151577
+1105 3217  21.96944745588132
+1106 3217 -7.704273779899018
+1554 3217  0.3202584181695263
+905 3218  0.09380808290631094
+906 3218 -0.05578235013745535
+1105 3218 -17.53117282921072
+1106 3218  6.147850850098586
+2044 3218  1.066054919609428
+905 3219 -0.3154842751742941
+984 3219  0.5301531225102935
+1105 3219 -1.783496057488212
+1184 3219  0.536638352929684
+1310 3219  4.739072521175232
+905 3220  0.3125038943229862
+984 3220 -0.5251447644432619
+1105 3220  1.769775574269363
+1184 3220 -0.5324820321255284
+1800 3220  4.852275384021054
+905 3221  0.04682645461498169
+984 3221 -0.07868915532003624
+1105 3221 -11.04480381088002
+1184 3221  3.241025990367159
+1555 3221  5.724533092062521
+905 3222 -0.04577764559625989
+984 3222  0.07692669227529296
+1105 3222  11.09041831683827
+1184 3222 -3.254430202003916
+2045 3222  5.867844621528932
+905 3223 -0.2710351994059734
+999 3223  0.3534179161138158
+1105 3223 -1.508511143996298
+1199 3223  1.20832718522049
+1311 3223  3.734090074267024
+905 3224  0.2607673136961148
+999 3224 -0.3400290471461453
+1105 3224  1.454256781217253
+1199 3224 -1.164701975884189
+1801 3224  4.438993814771638
+905 3225  0.03931244733723651
+999 3225 -0.05126169311480969
+1105 3225 -9.134818941626229
+1199 3225  6.923454910311763
+1556 3225  4.994979193430934
+905 3226 -0.03642270094644444
+999 3226  0.04749359159231554
+1105 3226  8.960689765455296
+1199 3226 -6.791708696781908
+2046 3226  6.307778305821778
+905 3227 -0.1808814651699105
+1046 3227  0.5273830925356003
+1105 3227 -1.075929273318226
+1246 3227  1.1553111591236
+1312 3227  5.750116599788575
+905 3228  0.2018352389125933
+1046 3228 -0.5884765052096235
+1105 3228  1.197241948885333
+1246 3228 -1.285241695049613
+1802 3228  3.183226568638103
+905 3229  0.02556035874667221
+1046 3229 -0.07452450160925486
+1105 3229 -6.350942033760408
+1246 3229  7.303265994649156
+1557 3229  7.594103734758398
+905 3230 -0.02967706389761363
+1046 3230  0.08652728305245758
+1105 3230  6.86425229593742
+1246 3230 -7.893949514842467
+2047 3230  4.089469755570528
+907 3231  0.8685875283864275
+908 3231 -0.5277615543631613
+1107 3231  2.142516119134805
+1108 3231 -1.573116856290268
+1313 3231  0.2843149864254252
+907 3232 -0.6665887169130639
+908 3232  0.405025269027891
+1107 3232 -1.645419760215324
+1108 3232  1.208147660144721
+1803 3232  1.118284064744516
+907 3233 -0.1206085521049
+908 3233  0.0732828354634789
+1107 3233  13.4599818919095
+1108 3233 -10.04545533443596
+1558 3233  0.3591198466383587
+907 3234  0.09140412991465983
+908 3234 -0.05553796721970868
+1107 3234 -10.37596843446376
+1108 3234  7.743774096250811
+2048 3234  1.453282910153127
+907 3235 -0.2641123399251132
+1022 3235  0.5174114692252539
+1107 3235 -0.5705865186070875
+1222 3235  1.364159900297422
+1314 3235  7.210736930309491
+907 3236  0.264840773439715
+1022 3236 -0.51883851294131
+1107 3236  0.5798036087807967
+1222 3236 -1.382288592585748
+1804 3236  6.944610352658391
+907 3237  0.03852583269225138
+1022 3237 -0.07547435194461619
+1107 3237 -3.927743397133805
+1222 3237  7.688776076853612
+1559 3237  8.661644082850422
+907 3238 -0.03462807079041883
+1022 3238  0.06783840917537932
+1107 3238  4.354207390518346
+1222 3238 -8.526509371438566
+2049 3238  8.754829522616353
+907 3239  0.3232358427975743
+1029 3239 -0.4783889678510902
+1107 3239  0.83228047449342
+1229 3239 -2.191374628381395
+1315 3239  2.12239641260848
+907 3240 -0.2805720782281593
+1029 3240  0.4152466067801181
+1107 3240 -0.7224565093335877
+1229 3240  1.902211567258754
+1805 3240  4.506874015672992
+907 3241 -0.0432552053525782
+1029 3241  0.06401769328460785
+1107 3241  4.624502996452096
+1229 3241 -12.23234957660924
+1560 3241  3.613689390608025
+907 3242  0.03751584297835035
+1029 3242 -0.05552343838215974
+1107 3242 -4.029548223983352
+1229 3242  10.65862220841107
+2050 3242  7.607025929898225
+907 3243 -0.4391440793017472
+1039 3243  0.6149993551148858
+1107 3243 -1.049370725032446
+1239 3243  0.329511624503851
+1316 3243  8.994693294053674
+907 3244  0.4521140880195338
+1039 3244 -0.6331632046878009
+1107 3244  1.078904268217582
+1239 3244 -0.3387833259526468
+1806 3244  7.772528439443829
+907 3245  0.06421136233811769
+1039 3245 -0.08992480666432051
+1107 3245 -7.445261240507539
+1239 3245  2.346704499747039
+1561 3245  8.659934351403086
+907 3246 -0.06661321196104331
+1039 3246  0.09328847712876323
+1107 3246  7.684265898844557
+1239 3246 -2.422035549841484
+2051 3246  7.209258592532089
+907 3247 -0.3568635586597714
+1091 3247  0.4323552440504631
+1107 3247 -0.8572328018230927
+1291 3247  1.072482363899002
+1317 3247  5.061913797901139
+907 3248  0.3488833124621628
+1091 3248 -0.4226868393937711
+1107 3248  0.8384291813617976
+1291 3248 -1.048942876587671
+1807 3248  5.717200439130998
+907 3249  0.04742106470294021
+1091 3249 -0.05745261880975507
+1107 3249 -5.317465612935973
+1291 3249  6.515090289325624
+1562 3249  7.356150780840059
+907 3250 -0.04590890985801144
+1091 3250  0.05562057947383459
+1107 3250  5.25881232878616
+1291 3250 -6.443255723032384
+2052 3250  8.260010647845204
+909 3251  0.8308549968215676
+910 3251 -0.4438807412776823
+1109 3251  1.939877143225896
+1110 3251 -1.676593698201225
+1318 3251  0.3142463672992255
+909 3252 -0.6468743896711402
+910 3252  0.3455898859598944
+1109 3252 -1.510326401442479
+1110 3252  1.305342318696957
+1808 3252  1.17048730065587
+909 3253 -0.1180094345852181
+910 3253  0.06304603751777908
+1109 3253  12.05955198419484
+1110 3253 -10.42375234850035
+1563 3253  0.3942430026751412
+909 3254  0.09155689421138136
+910 3254 -0.04891388055328518
+1109 3254 -9.357216344456917
+1110 3254  8.087970926761525
+2053 3254  1.503944833656077
+909 3255  1.178287817096909
+911 3255 -0.7560405827089444
+1109 3255  3.125336311573782
+1111 3255 -0.4116790582017967
+1319 3255  0.3570471483125166
+909 3256 -0.9115398593145428
+911 3256  0.5848835203070897
+1109 3256 -2.417889075233369
+1111 3256  0.318491909188027
+1809 3256  1.356706280601356
+909 3257 -0.1896378964682771
+911 3257  0.1216799016922899
+1109 3257  17.05859243350098
+1111 3257 -2.248550885624917
+1564 3257  0.4518359008190032
+909 3258  0.1461665781785075
+911 3258 -0.0937868179023721
+1109 3258 -13.15767006303645
+1111 3258  1.734357075949347
+2054 3258  1.756999795333312
+909 3259 -0.3808984499831934
+913 3259  0.4341815958622278
+1109 3259 -0.8274104651385586
+1113 3259  0.4961924573567638
+1320 3259  9.205270068346719
+909 3260  0.4177027925543588
+913 3260 -0.47613442657843
+1109 3260  0.9005554601895391
+1113 3260 -0.5405511595669753
+1810 3260  5.935651367845557
+909 3261  0.05765705741137306
+913 3261 -0.06572259141693702
+1109 3261 -5.271819109375664
+1113 3261  2.593920292764252
+1565 3261  11.81516875226901
+909 3262 -0.05614325502160191
+913 3262  0.06399702614503872
+1109 3262  6.213237251680835
+1113 3262 -3.058452409713802
+2055 3262  8.333025717575424
+909 3263 -0.1870913236157566
+991 3263  0.8837282746556666
+1109 3263 -0.4348953511661833
+1191 3263  1.175900677297633
+1321 3263  9.624813023079948
+909 3264  0.1922808728830558
+991 3264 -0.9082411774006912
+1109 3264  0.4484995047183518
+1191 3264 -1.21299435510975
+1811 3264  7.866093118131661
+909 3265  0.02414079055417204
+991 3265 -0.1140293348347746
+1109 3265 -2.730241540721851
+1191 3265  8.191331821483582
+1566 3265  13.00730635191752
+909 3266 -0.02648423136389328
+991 3266  0.1250986076557095
+1109 3266  2.709099303685861
+1191 3266 -8.128733597512404
+2056 3266  9.954437737461609
+909 3267  0.2900679375737975
+1056 3267 -0.6676477309983397
+1109 3267  0.6948742967493817
+1256 3267 -0.832500880855937
+1322 3267  5.922623483236039
+909 3268 -0.2829982094483208
+1056 3268  0.6513753777654052
+1109 3268 -0.6744651058693761
+1256 3268  0.8076647819842718
+1812 3268  6.921218030938292
+909 3269 -0.04144109370586249
+1056 3269  0.0953847309503813
+1109 3269  4.127509889624091
+1256 3269 -5.568800014444435
+1567 3269  7.134515633503723
+909 3270  0.03752795575477595
+1056 3270 -0.08637788346499958
+1109 3270 -4.187820272924031
+1256 3270  5.649414375316669
+2057 3270  8.985892277429645
+912 3271  2.234351633778104
+913 3271 -1.277768992783638
+1112 3271  0.4647968875266144
+1113 3271 -0.07766092191744503
+1323 3271  6.622390702502909
+912 3272 -2.216197503914771
+913 3272  1.267387106656292
+1112 3272 -0.461579540307102
+1113 3272  0.07749102351241138
+1813 3272  6.697001214688135
+912 3273 -0.05370982180576693
+913 3273  0.03071528396597896
+1112 3273  8.078759484940766
+1113 3273 -5.536155174462214
+1568 3273  8.632966165362591
+912 3274  0.02389497166849069
+913 3274 -0.01366492785641506
+1112 3274 -14.71609250303336
+1113 3274  10.08058940955139
+2058 3274  13.63902685108462
+912 3275 -0.7145749449009308
+1019 3275  0.4852466322371797
+1112 3275 -0.3518552487923934
+1219 3275  1.186931963583773
+1324 3275  5.191487889306021
+912 3276  0.7042110547013974
+1019 3276 -0.4782088220647328
+1112 3276  0.3454461800752524
+1219 3276 -1.163100359430245
+1814 3276  5.937171454568121
+912 3277  0.1018194078112303
+1019 3277 -0.06914253723748093
+1112 3277 -2.973502502756232
+1219 3277  4.538624497527303
+1569 3277  7.480402966450393
+912 3278 -0.06006624651158884
+1019 3278  0.04078920488167646
+1112 3278  3.831383234547688
+1219 3278 -5.879825080146367
+2059 3278  13.75844861280676
+912 3279 -0.3263007639770462
+1047 3279  1.094320306063321
+1112 3279 -0.3153990132372302
+1247 3279  1.809931157886173
+1325 3279  4.055752454004177
+912 3280  0.2841185889321567
+1047 3280 -0.9528532431520396
+1112 3280  0.2786502814775124
+1247 3280 -1.609166188869075
+1815 3280  7.525178403866417
+912 3281  0.03520668996055829
+1047 3281 -0.1180732624206324
+1112 3281 -1.846682900076352
+1247 3281  17.72193962278147
+1570 3281  6.053226700604847
+912 3282 -0.04170737899807132
+1047 3282  0.1398747315022498
+1112 3282  1.330625120766269
+1247 3282 -12.79869678055506
+2060 3282  9.011062142512486
+913 3283  1.209832173694697
+914 3283 -0.4729524263951293
+1113 3283  1.411694129026913
+1114 3283 -1.953828666208752
+1326 3283  0.2442413628704792
+913 3284 -1.011959526480044
+914 3284  0.3955992606815731
+1113 3284 -1.181461144083559
+1114 3284  1.635191861604235
+1816 3284  0.6191405042986501
+913 3285 -0.1965692531354175
+914 3285  0.07684363769328444
+1113 3285  7.598376411100657
+1114 3285 -10.63017727165662
+1571 3285  0.3086859033408764
+913 3286  0.16260052281627
+914 3286 -0.0635644459381669
+1113 3286 -6.356847459918975
+1114 3286  8.893256781914742
+2061 3286  0.8138879164921292
+915 3287  0.7040247465778748
+916 3287 -0.8406516622461359
+1115 3287  0.8291504643489213
+1116 3287 -1.764966808023946
+1327 3287  0.5026016922761118
+915 3288 -0.5326975522814722
+916 3288  0.6360757700302417
+1115 3288 -0.6273802630392492
+1116 3288  1.335469727403376
+1817 3288  2.179761213796664
+915 3289 -0.08449893522421127
+916 3289  0.1008972634833432
+1115 3289  6.060870599331241
+1116 3289 -12.90519241544406
+1572 3289  0.6377435166524491
+915 3290  0.0636315736602887
+916 3290 -0.07598026692793559
+1115 3290 -4.565490514889956
+1116 3290  9.721133715560624
+2062 3290  2.854904550605542
+915 3291  0.7089846504761526
+917 3291 -0.6811758959436226
+1115 3291  1.087917687407729
+1117 3291 -1.92561216315536
+1328 3291  0.421945735946015
+915 3292 -0.5363208804398027
+917 3292  0.5152845777429752
+1115 3292 -0.8229776380829138
+1117 3292  1.456668801268975
+1818 3292  1.823278915801148
+915 3293 -0.110950911256798
+917 3293  0.106599044606048
+1115 3293  6.108025865977322
+1117 3293 -10.81324405966066
+1573 3293  0.5340480266846045
+915 3294  0.08353224393820573
+917 3294 -0.08025582932800379
+1115 3294 -4.599499980584284
+1117 3294  8.142649836749401
+2063 3294  2.382559920737088
+915 3295  0.6046143966065896
+918 3295 -0.5574485806251291
+1115 3295  1.03698725022598
+1118 3295 -3.079285624558723
+1329 3295  0.3915301941686539
+915 3296 -0.4568430531993541
+918 3296  0.4212048422990773
+1115 3296 -0.7835532341187602
+1118 3296  2.326725060942811
+1819 3296  1.698731768620023
+915 3297 -0.105707984352315
+918 3297  0.09746173125990555
+1115 3297  5.206454265535194
+1118 3297 -15.46450544545504
+1574 3297  0.496336357506843
+915 3298  0.07949206863020782
+918 3298 -0.07329091248500011
+1115 3298 -3.916354844917197
+1118 3298  11.63257903923041
+2064 3298  2.222986339792984
+915 3299  0.8317530171779519
+919 3299 -1.050055987727851
+1115 3299  1.203896198092496
+1119 3299 -0.8504213801855677
+1330 3299  0.5049950972208146
+915 3300 -0.62581245429748
+919 3300  0.7900639988771302
+1115 3300 -0.9058789578950869
+1119 3300  0.6399047897114686
+1820 3300  2.226759967985491
+915 3301 -0.1227103143041787
+919 3301  0.1549170217960287
+1115 3301  7.161691500726718
+1119 3301 -5.066758450128532
+1575 3301  0.6396465178955674
+915 3302  0.09182130440361659
+919 3302 -0.1159208424841469
+1115 3302 -5.367583074737629
+1119 3302  3.797461282443998
+2065 3302  2.912810030122295
+915 3303  0.2917249086810749
+1007 3303 -1.272612899536508
+1115 3303  0.3657690249134106
+1207 3303 -0.9824708043480003
+1331 3303  5.283005872263223
+915 3304 -0.2448716520171612
+1007 3304  1.068221512166234
+1115 3304 -0.3073032073380307
+1207 3304  0.8254367475625018
+1821 3304  13.4005488845067
+915 3305 -0.03508775876381117
+1007 3305  0.1530658956087559
+1115 3305  2.364021463492015
+1207 3305 -5.749376920340797
+1576 3305  9.289251639392099
+915 3306  0.0315126719245443
+1007 3306 -0.1374700328859463
+1115 3306 -1.924123739016748
+1207 3306  4.679067459779453
+2066 3306  21.65848410135114
+915 3307 -0.2357224145100838
+1018 3307  1.009668228367086
+1115 3307 -0.3247977981682151
+1218 3307  0.7382929550608727
+1332 3307  15.68617080585754
+915 3308  0.2784275257111113
+1018 3308 -1.192586743172628
+1115 3308  0.3847781687309847
+1218 3308 -0.8747461134231366
+1822 3308  6.452286730756979
+915 3309  0.03219168634554583
+1018 3309 -0.1378864330242373
+1115 3309 -2.071401609337344
+1218 3309  4.971628818787869
+1577 3309  19.59521280325052
+915 3310 -0.03947913814761612
+1018 3310  0.1691007261817176
+1115 3310  2.413229394356441
+1218 3310 -5.79221202090099
+2067 3310  7.602708839683275
+920 3311  1.177129754067613
+921 3311 -0.8339300311071588
+1120 3311  2.135100351227087
+1121 3311 -0.5068532103322505
+1333 3311  0.3539340117715401
+920 3312 -0.9364953446946004
+921 3312  0.663454125795566
+1120 3312 -1.698663037724611
+1121 3312  0.4032470041467199
+1823 3312  1.158013025593776
+920 3313 -0.210903753621177
+921 3313  0.1494134127611411
+1120 3313  10.46247277613657
+1121 3313 -2.484458992001519
+1578 3313  0.448509772635577
+920 3314  0.1670042532862615
+921 3314 -0.1183130930611414
+1120 3314 -8.287409620934962
+1121 3314  1.967960133569293
+2068 3314  1.513344914714974
+920 3315  1.102777759352953
+922 3315 -0.7103363652127415
+1120 3315  2.343665408910121
+1122 3315 -0.6385059697814306
+1334 3315  0.3171963274323468
+920 3316 -0.8759432230102847
+922 3316  0.5642245864034647
+1120 3316 -1.861641814867798
+1122 3316  0.5071839300131559
+1824 3316  1.041618026710652
+920 3317 -0.2314851909580943
+922 3317  0.1491074223715117
+1120 3317  9.800756234215104
+1122 3317 -2.671248407940532
+1579 3317  0.4020378235142212
+920 3318  0.1829980732903851
+922 3318 -0.1178752338080321
+1120 3318 -7.751411372933291
+1122 3318  2.112688522497416
+2069 3318  1.361520831023814
+920 3319 -0.2469155357709296
+925 3319  0.417439106543153
+1120 3319 -0.3615308982581444
+1125 3319  2.348697674884534
+1335 3319  6.886702449628651
+920 3320  0.2462070999198854
+925 3320 -0.4162414142724755
+1120 3320  0.3604726785054282
+1125 3320 -2.341822696956453
+1825 3320  6.785053675814972
+920 3321  0.0332412618584253
+925 3321 -0.0561981756523469
+1120 3321 -2.03036538771662
+1125 3321  13.1085584275136
+1580 3321  11.49160779515473
+920 3322 -0.03297048082370924
+925 3322  0.05574038917549447
+1120 3322  2.026270799843273
+1125 3322 -13.08212862081111
+2070 3322  11.46100381309955
+920 3323  0.2859457102140647
+1056 3323 -0.6868141551464079
+1120 3323  0.3927812825996569
+1256 3323 -0.9162532415237825
+1336 3323  9.171935901419351
+920 3324 -0.2856483693432067
+1056 3324  0.6860999709089316
+1120 3324 -0.3918426939236055
+1256 3324  0.9140951370430707
+1826 3324  9.324470431849685
+920 3325 -0.03967668007685861
+1056 3325  0.09529957797094311
+1120 3325  2.59903491355898
+1256 3325 -5.934397339882138
+1581 3325  10.94559077853698
+920 3326  0.03987931942944642
+1056 3326 -0.09578629824956811
+1120 3326 -2.553508280303047
+1256 3326  5.830423114127847
+2071 3326  11.49437533928763
+923 3327  1.180123281848412
+924 3327 -0.6263787815684199
+1123 3327  2.943851431944534
+1124 3327 -0.4953533182334247
+1337 3327  0.36929603544498
+923 3328 -0.9904519113108774
+924 3328  0.5257061452404348
+1123 3328 -2.470752260998025
+1124 3328  0.4157462993400855
+1827 3328  0.9237967396282526
+923 3329 -0.2198774630721576
+924 3329  0.1167052455721185
+1123 3329  13.89790436354166
+1124 3329 -2.339205561498814
+1582 3329  0.4664105082637809
+923 3330  0.1833601469074586
+924 3330 -0.09732280277379812
+1123 3330 -11.59313057596244
+1124 3330  1.951280910683752
+2072 3330  1.216417974845579
+923 3331 -0.3406461438400099
+962 3331  0.8386535165213083
+1123 3331 -0.570678276525063
+1162 3331  0.8523919363373601
+1338 3331  4.703906103995145
+923 3332  0.3416270693871428
+962 3332 -0.8410685054311342
+1123 3332  0.5736363845700033
+1162 3332 -0.856404463517094
+1828 3332  4.522013621012862
+923 3333  0.044945398609309
+962 3333 -0.1106532901568742
+1123 3333 -3.671971040472746
+1162 3333  4.779511398382989
+1583 3333  7.204897543934027
+923 3334 -0.04175041492380615
+962 3334  0.1027874024856677
+1123 3334  3.920428096681185
+1162 3334 -5.103865209233088
+2073 3334  7.245181781754158
+923 3335  0.2932964264633682
+1097 3335 -0.6882189757870322
+1123 3335  0.6120165062645778
+1297 3335 -1.067537541383138
+1339 3335  4.656667378127493
+923 3336 -0.2928143935251802
+1097 3336  0.6870878872871973
+1123 3336 -0.6120093889121652
+1297 3336  1.068092189082941
+1829 3336  4.819142815554258
+923 3337 -0.04107484230154301
+1097 3337  0.09638196496373758
+1123 3337  3.103677962152558
+1297 3337 -6.271879177065279
+1584 3337  7.745248535399567
+923 3338  0.03819605890891807
+1097 3338 -0.08962691042088074
+1123 3338 -3.348978833033903
+1297 3338  6.765893252663498
+2074 3338  7.871753304018089
+925 3339  0.9158563211506956
+926 3339 -0.2927325904424944
+1125 3339  4.915187326484495
+1126 3339 -1.601102874759421
+1340 3339  0.2262443261916516
+925 3340 -0.7253102290040822
+926 3340  0.2318288767653417
+1125 3340 -3.892710312405375
+1126 3340  1.268035068691731
+1830 3340  0.7359627483813387
+925 3341 -0.1272016430323864
+926 3341  0.04065710484656371
+1125 3341  31.10618111938879
+1126 3341 -10.14088311419212
+1585 3341  0.2859810800059127
+925 3342  0.1000961984697644
+926 3342 -0.03199346752849337
+1125 3342 -24.49836116910814
+1126 3342  7.98667681451962
+2075 3342  0.9683564394347679
+925 3343 -0.2028592580467722
+991 3343  0.5914570612494344
+1125 3343 -1.038181523195801
+1191 3343  0.8434979015723048
+1341 3343  7.677703978905812
+925 3344  0.2043567543949261
+991 3344 -0.5958231660939404
+1125 3344  1.045469772476363
+1191 3344 -0.8494202444993569
+1831 3344  7.302330267651148
+925 3345  0.02578232528300831
+991 3345 -0.07517102493073879
+1125 3345 -6.405420033831049
+1191 3345  5.038247575538431
+1586 3345  12.35150271313844
+925 3346 -0.02562846525512418
+991 3346  0.07472243017192762
+1125 3346  6.57456188741837
+1191 3346 -5.17134230247954
+2076 3346  11.42641600066333
+925 3347 -0.265292539862559
+1056 3347  0.3769085362086894
+1125 3347 -1.475513352129115
+1256 3347  0.5292255309373513
+1342 3347  8.778768198726569
+925 3348  0.2663833810837147
+1056 3348 -0.3784583248613025
+1125 3348  1.483623786920446
+1256 3348 -0.5321225312697891
+1832 3348  8.759577361623631
+925 3349  0.03969926645685068
+1056 3349 -0.05640185893113407
+1125 3349 -9.211238697688215
+1256 3349  3.257536454796313
+1587 3349  10.32704712479219
+925 3350 -0.03983694023450458
+1056 3350  0.05659745592016639
+1125 3350  9.387198805594265
+1256 3350 -3.319769019166089
+2077 3350  9.866368768324078
+927 3351  0.8024124670559588
+928 3351 -0.3890405552638334
+1127 3351  3.674495501266763
+1128 3351 -1.79658977637768
+1343 3351  0.2087528761419341
+927 3352 -0.6702504154277392
+928 3352  0.3249632881958178
+1127 3352 -3.071721810536903
+1128 3352  1.501892209348857
+1833 3352  0.5348328095576971
+927 3353 -0.1672629016238211
+928 3353  0.08109551483106915
+1127 3353  15.40703792650803
+1128 3353 -7.62281482571927
+1588 3353  0.2638545151755176
+927 3354  0.1384521427653982
+928 3354 -0.06712694619083193
+1127 3354 -12.91543154405716
+1128 3354  6.390044080050587
+2078 3354  0.6902095968345779
+927 3355  0.2713456332689875
+1022 3355 -0.5210644672440067
+1127 3355  0.7456668279970676
+1222 3355 -1.317805230846083
+1344 3355  5.321843079599814
+927 3356 -0.2943648723795997
+1022 3356  0.5652682652525904
+1127 3356 -0.8026522732803606
+1222 3356  1.420202184002578
+1834 3356  3.588470025723119
+927 3357 -0.03534560477449201
+1022 3357  0.06787409290268773
+1127 3357  5.42541555256465
+1222 3357 -8.373828345005908
+1589 3357  6.747469381550355
+927 3358  0.04133866598755555
+1022 3358 -0.07938255615129304
+1127 3358 -5.499147013641394
+1222 3358  8.486264343097229
+2079 3358  4.390097869341906
+927 3359 -0.2905604220304306
+1038 3359  0.8486339951838073
+1127 3359 -0.7996212717110843
+1238 3359  0.7729194107635493
+1345 3359  4.558938647755557
+927 3360  0.3082806228234461
+1038 3360 -0.9003890301240479
+1127 3360  0.8518603849189976
+1238 3360 -0.8228886130706252
+1835 3360  3.212877484813512
+927 3361  0.03965502824588246
+1038 3361 -0.1158196454089177
+1127 3361 -5.319291135044536
+1238 3361  4.532886615050296
+1590 3361  6.103161381610501
+927 3362 -0.03922286341907703
+1038 3362  0.114557430269681
+1127 3362  5.987378019002024
+1238 3362 -5.102978103686564
+2080 3362  4.451290718579245
+929 3363  0.7226460726801913
+930 3363 -0.550866066704825
+1129 3363  1.299439778554684
+1130 3363 -1.66026707237404
+1346 3363  0.4651682718810348
+929 3364 -0.5799017536618772
+930 3364  0.4420534618421999
+1129 3364 -1.042770960437875
+1130 3364  1.332326685534424
+1836 3364  1.479621537099887
+929 3365 -0.1165066754950316
+930 3365  0.08881190461157644
+1129 3365  7.102528425181987
+1130 3365 -9.076322024257928
+1591 3365  0.5856260944274582
+929 3366  0.0928673422230218
+930 3366 -0.07079187097217753
+1129 3366 -5.662448967730832
+1130 3366  7.236044290225851
+2081 3366  1.94736180188326
+929 3367  1.013213317405777
+931 3367 -0.8713042567736969
+1129 3367  1.541080820735221
+1131 3367 -0.6495975312525708
+1347 3367  0.4764456266019807
+929 3368 -0.8068876077794779
+931 3368  0.6938762009132229
+1129 3368 -1.227391380982957
+1131 3368  0.517371091673868
+1837 3368  1.552787641592646
+929 3369 -0.1379058232777914
+931 3369  0.1185909509790812
+1129 3369  9.939184679247292
+1131 3369 -4.19974615976149
+1592 3369  0.602732034666853
+929 3370  0.108959701522947
+931 3370 -0.09369897742442948
+1129 3370 -7.872866521897349
+1131 3370  3.32663494902817
+2082 3370  2.053730505034701
+929 3371  0.2698019009628373
+1075 3371 -0.7320220831026034
+1129 3371  0.3879503154264504
+1275 3371 -1.448830170961889
+1348 3371  6.478855428097081
+929 3372 -0.2582339488597439
+1075 3372  0.7006361055927417
+1129 3372 -0.3704848889591102
+1275 3372  1.383965071862861
+1838 3372  8.326168299313496
+929 3373 -0.03441373334906914
+1075 3373  0.09337077568253691
+1129 3373  2.623574290306916
+1275 3373 -8.986270417976883
+1593 3373  9.231892983713553
+929 3374  0.0344323188052018
+1075 3374 -0.09342120143663683
+1129 3374 -2.402169826049755
+1275 3374  8.227311149593612
+2083 3374  11.87929266291417
+929 3375  0.3049120194105165
+1090 3375 -0.5115751971165835
+1129 3375  0.4769040583265569
+1290 3375 -1.478717303034038
+1349 3375  6.316750882719969
+929 3376 -0.290608095678543
+1090 3376  0.4875763642175987
+1129 3376 -0.4546264934490209
+1290 3376  1.409679754203769
+1839 3376  8.096580967059195
+929 3377 -0.0381463241923688
+1090 3377  0.06400112844259723
+1129 3377  2.673553644466764
+1290 3377 -8.636336925830612
+1594 3377  11.53750339202983
+929 3378  0.03561931390732355
+1090 3378 -0.05976136187915728
+1129 3378 -2.601348954389372
+1290 3378  8.40294784283585
+2084 3378  14.8685191357142
+932 3379  0.9950752022868599
+933 3379 -0.6655581949704933
+1132 3379  5.296182902592481
+1133 3379 -0.4107380900624172
+1350 3379  0.3022564890631526
+932 3380 -0.8318997011928906
+933 3380  0.556417909169059
+1132 3380 -4.429008880591571
+1133 3380  0.3434864457301403
+1840 3380  0.773323986147318
+932 3381 -0.1462347531270238
+933 3381  0.09780942999031755
+1132 3381  31.57511354793287
+1133 3381 -2.46437663093789
+1595 3381  0.3823095244806657
+932 3382  0.1214268633825941
+933 3382 -0.081216619435513
+1132 3382 -26.39354744654073
+1133 3382  2.059964500670208
+2085 3382  1.001843241790515
+932 3383 -0.2179951469697534
+952 3383  0.3736626954944669
+1132 3383 -1.001383316318819
+1152 3383  2.237203602157426
+1351 3383  2.896696576781846
+932 3384  0.2168261119599326
+952 3384 -0.371658867524124
+1132 3384  1.001723462128635
+1152 3384 -2.235862100853107
+1841 3384  2.894015425891116
+932 3385  0.03071718917257135
+952 3385 -0.05265194140229647
+1132 3385 -6.619137619636859
+1152 3385  12.86842858727532
+1596 3385  3.759419384143553
+932 3386 -0.02828487750009587
+952 3386  0.04848274704887379
+1132 3386  7.035896378794858
+1152 3386 -13.68121522319965
+2086 3386  3.893502727153444
+932 3387 -0.3001546575377701
+1035 3387  0.6028365532304726
+1132 3387 -1.624449345440718
+1235 3387  0.5396384156967375
+1352 3387  3.97695531013616
+932 3388  0.319559566955514
+1035 3388 -0.6418097572616985
+1132 3388  1.726617122079207
+1235 3388 -0.5734959694043438
+1842 3388  2.822747401879994
+932 3389  0.04181506051016628
+1035 3389 -0.08398219490530567
+1132 3389 -9.45015732593915
+1235 3389  3.329588287063118
+1597 3389  5.568673568414256
+932 3390 -0.04598524577927123
+1035 3390  0.09235767751344538
+1132 3390  9.780829511014637
+1235 3390 -3.446229419844843
+2087 3390  3.896863634483198
+934 3391  1.295303116128931
+935 3391 -1.022533948777946
+1134 3391  0.9789296151027944
+1135 3391 -0.6272414912186035
+1353 3391  0.5157826632315755
+934 3392 -1.040527283561455
+935 3392  0.8214096444475624
+1134 3392 -0.7864238954796325
+1135 3392  0.5038949949918273
+1843 3392  1.638248669248236
+934 3393 -0.2014590928700254
+935 3393  0.1590351780865364
+1134 3393  5.52827321690606
+1135 3393 -3.546001048357712
+1598 3393  0.652651776418919
+934 3394  0.1611297600105126
+935 3394 -0.1271985280646792
+1134 3394 -4.426572750783254
+1135 3394  2.839337127425348
+2088 3394  2.128348439022877
+934 3395  0.9236744260690454
+936 3395 -0.5966459314393776
+1134 3395  0.8060051230938539
+1136 3395 -2.504819998846709
+1354 3395  0.3627133183774772
+934 3396 -0.7370102242628088
+936 3396  0.4760705063655813
+1134 3396 -0.6434239789652723
+1136 3396  1.999577413517036
+1844 3396  1.176739949016042
+934 3397 -0.1656030709028884
+936 3397  0.1069710232300935
+1134 3397  3.935792415055033
+1136 3397 -12.33236152279313
+1599 3397  0.4585964515198062
+934 3398  0.1310381493586493
+936 3398 -0.0846438706881978
+1134 3398 -3.141533746260626
+1136 3398  9.843630982732011
+2089 3398  1.530457443318091
+934 3399  0.2884753453660877
+956 3399 -0.6725972680232485
+1134 3399  0.1787660967132971
+1156 3399 -2.83581958505017
+1355 3399  7.085454722674737
+934 3400 -0.2781466555799013
+956 3400  0.6485153191009497
+1134 3400 -0.1720286735101973
+1156 3400  2.730179802550922
+1845 3400  8.942328533639676
+934 3401 -0.03571433811432929
+956 3401  0.08327008401522945
+1134 3401  1.195222696531314
+1156 3401 -16.46922548582782
+1600 3401  11.40560520752418
+934 3402  0.03713931211835794
+956 3402 -0.08659249488156374
+1134 3402 -1.077545589280008
+1156 3402  14.8446566737469
+2090 3402  13.99685203813836
+934 3403  0.3675964442608564
+981 3403 -0.7881853230597796
+1134 3403  0.227902895301987
+1181 3403 -1.245236453990957
+1356 3403  8.850553534693345
+934 3404 -0.3531403690853147
+981 3404  0.7571891954848207
+1134 3404 -0.2169526291382712
+1181 3404  1.186629605393843
+1846 3404  11.26655140607853
+934 3405 -0.04937881125291942
+981 3405  0.1058760358195188
+1134 3405  1.651751732869493
+1181 3405 -8.143818476815005
+1601 3405  10.32999844507644
+934 3406  0.05012946220403072
+981 3406 -0.1074855510138141
+1134 3406 -1.499487001255235
+1181 3406  7.392567965078647
+2091 3406  13.03167033082069
+937 3407  0.8149415052122866
+938 3407 -0.423007195636838
+1137 3407  3.764148086597181
+1138 3407 -1.150203221975869
+1357 3407  0.2986025758467496
+937 3408 -0.6781849347733043
+938 3408  0.3520217163400922
+1137 3408 -3.132558998302706
+1138 3408  0.9572098200987793
+1847 3408  0.7825661744590023
+937 3409 -0.1243103797855209
+938 3409  0.06452510371026647
+1137 3409  21.66729182498108
+1138 3409 -6.624244633334891
+1602 3409  0.3782636212847815
+937 3410  0.1039416562056201
+938 3410 -0.05395242262195776
+1137 3410 -18.12681824855953
+1138 3410  5.541831397222044
+2092 3410  0.9606419660085704
+937 3411 -0.2383349916533942
+983 3411  0.508985204526428
+1137 3411 -0.9698287367725562
+1183 3411  0.8647226323217699
+1358 3411  7.20471030819053
+937 3412  0.254906136411951
+983 3412 -0.5443743324327373
+1137 3412  1.032890475327049
+1183 3412 -0.920980246976824
+1848 3412  5.057824214743202
+937 3413  0.03694910669518697
+983 3413 -0.07890804660218982
+1137 3413 -7.297572934511336
+1183 3413  6.441034596124477
+1603 3413  5.593819250509369
+937 3414 -0.0393496559091599
+983 3414  0.08403462925030836
+1137 3414  7.818158304523587
+1183 3414 -6.900463469725019
+2093 3414  3.896453929673696
+937 3415  0.2261950636286986
+1057 3415 -0.4783279591317424
+1137 3415  0.9047099666389939
+1257 3415 -1.25500744830349
+1359 3415  4.811679126595192
+937 3416 -0.2275356005844802
+1057 3416  0.4811627526763632
+1137 3416 -0.9050341677071546
+1257 3416  1.255737278168746
+1849 3416  4.79846785897801
+937 3417 -0.03205345150204408
+1057 3417  0.06778247851274402
+1137 3417  6.45187659773848
+1257 3417 -8.635276008329065
+1604 3417  4.846962465184034
+937 3418  0.03282777232623683
+1057 3418 -0.06941991168041609
+1137 3418 -6.340117021540678
+1257 3418  8.485710540012704
+2094 3418  4.857233958126034
+939 3419 -0.3462185500007934
+998 3419  0.5428701937830359
+1139 3419 -1.335544408289586
+1198 3419  0.8076695160259592
+1360 3419  3.187196022756801
+939 3420  0.3224747859897226
+998 3420 -0.5056400055975701
+1139 3420  1.240138484038131
+1198 3420 -0.7500806792626978
+1850 3420  4.580602587418016
+939 3421  0.05123776962395828
+998 3421 -0.08034074986653671
+1139 3421 -8.252392785519895
+1198 3421  4.667678323593734
+1605 3421  4.014843773669729
+939 3422 -0.04575369741188583
+998 3422  0.0717417324410374
+1139 3422  7.855616270644008
+1198 3422 -4.443468566171155
+2095 3422  6.030018439318698
+939 3423 -0.2320144445773898
+1061 3423  0.7029356993730441
+1139 3423 -0.8646099843727195
+1261 3423  2.069258919988845
+1361 3423  2.361516391014415
+939 3424  0.1931415244916508
+1061 3424 -0.5851621559330497
+1139 3424  0.7236931569492924
+1261 3424 -1.73190689006448
+1851 3424  5.52794280827098
+939 3425  0.03546099465982042
+1061 3425 -0.1074364103798295
+1139 3425 -6.168162236656806
+1261 3425  14.65077728050171
+1606 3425  2.100726724748749
+939 3426 -0.03079932722685834
+1061 3426  0.09331292568385514
+1139 3426  5.42738844602399
+1261 3426 -12.89108953321492
+2096 3426  3.64070794816464
+939 3427 -0.2607314758298161
+1078 3427  0.553519904683873
+1139 3427 -1.017234363558991
+1278 3427  1.112187399499515
+1362 3427  3.949015286514313
+939 3428  0.2485615317211312
+1078 3428 -0.5276837209948572
+1139 3428  0.9696341561279327
+1278 3428 -1.060130544389497
+1852 3428  4.837100171165655
+939 3429  0.03391211355038503
+1078 3429 -0.0719937238121951
+1139 3429 -5.52169483921999
+1278 3429  5.806648304333962
+1607 3429  7.554236674680917
+939 3430 -0.03113925486204759
+1078 3430  0.06610708326760587
+1139 3430  5.270632236356677
+1278 3430 -5.542727459691219
+2097 3430  10.1404814049545
+940 3431  0.5513261219039329
+942 3431 -0.6945907046787592
+1140 3431  2.259667890544613
+1142 3431 -0.1988363546578532
+1363 3431  3.718703165886996
+940 3432 -0.5523151632588987
+942 3432  0.6958367528966991
+1140 3432 -2.270634095598604
+1142 3432  0.1998547861173985
+1853 3432  3.694102619211036
+940 3433 -0.07581389194665446
+942 3433  0.09551447417331302
+1140 3433  14.10655573046575
+1142 3433 -1.328711904185037
+1608 3433  4.625084390822101
+940 3434  0.07308587747533948
+942 3434 -0.09207757282087774
+1140 3434 -14.60105738105317
+1142 3434  1.375232541596462
+2098 3434  4.692341990336677
+940 3435  0.3379217843189056
+1079 3435 -0.477692739132276
+1140 3435  1.274050971060097
+1279 3435 -0.9008717883866754
+1364 3435  3.873556911241002
+940 3436 -0.3865529233619078
+1079 3436  0.5464386534076658
+1140 3436 -1.450543140636408
+1279 3436  1.026223447218297
+1854 3436  1.970970231250026
+940 3437 -0.04416026357748661
+1079 3437  0.06242579865530338
+1140 3437  8.932423077614509
+1279 3437 -5.728897213689582
+1609 3437  5.058522467030047
+940 3438  0.05348227050940345
+1079 3438 -0.07560356709805922
+1140 3438 -9.766438076143141
+1279 3438  6.263257813414933
+2099 3438  2.455950934716126
+941 3439 -0.5049927482856362
+982 3439  0.4919493884250233
+1141 3439 -2.430429941692298
+1182 3439  0.3890546571390462
+1365 3439  3.057040031444601
+941 3440  0.5068995173965466
+982 3440 -0.4938069079659762
+1141 3440  2.430336628269451
+1182 3440 -0.3891594472236065
+1855 3440  3.040085219142655
+941 3441  0.07028093491071742
+982 3441 -0.06846566225879751
+1141 3441 -15.87534314164886
+1182 3441  2.174900388529984
+1610 3441  4.189017705158125
+941 3442 -0.06419032347283443
+982 3442  0.06253236404377792
+1141 3442  16.87912030479092
+1182 3442 -2.312987185873944
+2100 3442  4.482752371805137
+941 3443 -0.2880222797088077
+1030 3443  0.3346190606095454
+1141 3443 -1.646756874307934
+1230 3443  1.503112217723057
+1366 3443  2.768986183801089
+941 3444  0.2839175912746367
+1030 3444 -0.3298503080348287
+1141 3444  1.633130299287086
+1230 3444 -1.491993548294496
+1856 3444  2.918144951839834
+941 3445  0.03627465553054866
+1030 3445 -0.04214323686986587
+1141 3445 -9.784170471621888
+1230 3445  11.06068010791183
+1611 3445  3.912969993275015
+941 3446 -0.04106559201113318
+1030 3446  0.04770925997819617
+1141 3446  9.000659918292111
+1230 3446 -10.17957716286965
+2101 3446  3.513507003847669
+942 3447 -0.6262760807964288
+1098 3447  0.5580597443798457
+1142 3447 -0.1942141594406955
+1298 3447  2.244902451590447
+1367 3447  4.104329447502788
+942 3448  0.6259992432861905
+1098 3448 -0.5578130610481082
+1142 3448  0.1937305440085906
+1298 3448 -2.239071302275198
+1857 3448  4.123483094099871
+942 3449  0.08541380566675676
+1098 3449 -0.07611021403896462
+1142 3449 -1.236746782381144
+1298 3449  14.90052319731221
+1612 3449  5.140739895609729
+942 3450 -0.08734801081128452
+1098 3450  0.07783373831464936
+1142 3450  1.210760367602043
+1298 3450 -14.58766608851529
+2102 3450  5.121634542677966
+943 3451  0.7904980737004598
+944 3451 -1.331243999373356
+1143 3451  0.4635977544092252
+1144 3451 -0.2370748743210307
+1368 3451  6.140079525850513
+943 3452 -0.7569472851527189
+944 3452  1.274742551217584
+1143 3452 -0.4445531462354627
+1144 3452  0.2279035771985117
+1858 3452  7.866104370676174
+943 3453 -0.01589242009037819
+944 3453  0.02676374501685801
+1143 3453  12.40932627072254
+1144 3453 -16.36104222451913
+1613 3453  7.82585977033871
+943 3454  0.008839724875075651
+944 3454 -0.01488660262126709
+1143 3454 -17.80142727045091
+1144 3454  23.46700243272763
+2103 3454  13.81936647694512
+943 3455  0.389853698082942
+953 3455 -0.3396256574061067
+1143 3455  1.302765765741359
+1153 3455 -0.836906752491439
+1369 3455  5.11477659652108
+943 3456 -0.4123451764514424
+953 3456  0.3592193746505485
+1143 3456 -1.381540417492638
+1153 3456  0.8886397238365555
+1859 3456  3.779911922330594
+943 3457 -0.05097074908982863
+953 3457  0.04440377057660186
+1143 3457  6.98481666623176
+1153 3457 -5.664043368783884
+1614 3457  7.95121257275057
+943 3458  0.04762898411627194
+953 3458 -0.04149255251807921
+1143 3458 -8.259223745110498
+1153 3458  6.694072816259766
+2104 3458  6.108252012876951
+943 3459  0.4010834794380356
+1000 3459 -0.4083493179864217
+1143 3459  1.291757288849152
+1200 3459 -0.6720863108170413
+1370 3459  5.956658436407072
+943 3460 -0.422257802508621
+1000 3460  0.4299072250755208
+1143 3460 -1.364938725708047
+1200 3460  0.7107553380565065
+1860 3460  4.575171516303893
+943 3461 -0.05298921735679361
+1000 3461  0.05394914494757658
+1143 3461  7.534250103970887
+1200 3461 -4.55768979835456
+1615 3461  8.593585019339528
+943 3462  0.05159156407864095
+1000 3462 -0.0525261724439103
+1143 3462 -8.559591467156073
+1200 3462  5.176873191896383
+2105 3462  6.61797598861204
+943 3463 -0.3527490893724305
+1085 3463  0.5218813327191538
+1143 3463 -0.7574396769710391
+1285 3463  1.320047522502771
+1371 3463  3.940241620211022
+943 3464  0.3517173794087308
+1085 3464 -0.5203549498394896
+1143 3464  0.7694060268190259
+1285 3464 -1.327266925701353
+1861 3464  3.924567541092951
+943 3465  0.05011610583704667
+1085 3465 -0.0741452235963624
+1143 3465 -5.957774276479973
+1285 3465  5.912988175484174
+1616 3465  5.351680386731817
+943 3466 -0.03676343318442028
+1085 3466  0.05439035871006938
+1143 3466  7.695785971397996
+1285 3466 -7.658943042300214
+2106 3466  6.18395224060007
+944 3467  0.5565316981856262
+959 3467 -0.455735424632883
+1144 3467  1.435114498189556
+1159 3467 -0.7084294249201389
+1372 3467  2.750846739635994
+944 3468 -0.5208443815767149
+959 3468  0.4265116186182513
+1144 3468 -1.347826856887033
+1159 3468  0.6658399640468106
+1862 3468  3.826254096728573
+944 3469 -0.07578302580183896
+959 3469  0.06205757831290015
+1144 3469  8.28258643358139
+1159 3469 -4.70567772590082
+1617 3469  3.775614735741489
+944 3470  0.06567306281750092
+959 3470 -0.05377868191093232
+1144 3470 -8.289596312603999
+1159 3470  4.70876580251244
+2107 3470  5.425768165721768
+944 3471  0.3643483277204766
+1020 3471 -0.4310186947198901
+1144 3471  0.9151036757311578
+1220 3471 -1.19384433734122
+1373 3471  4.076318621842777
+944 3472 -0.3648201588099976
+1020 3472  0.4315768639356127
+1144 3472 -0.9121228986991278
+1220 3472  1.19038235472501
+1863 3472  4.096977356352023
+944 3473 -0.05244155811268235
+1020 3473  0.06203758932618613
+1144 3473  5.88454311618508
+1220 3473 -7.176916273976581
+1618 3473  5.083916573183807
+944 3474  0.05453932493439867
+1020 3474 -0.06451921651788971
+1144 3474 -5.695361538067679
+1220 3474  6.94588192299627
+2108 3474  5.015211698687797
+945 3475 -0.4558445477281668
+952 3475  0.346250946841912
+1145 3475 -0.8709148373434052
+1152 3475  1.89078858943646
+1374 3475  2.042467781185541
+945 3476  0.4280106222713313
+952 3476 -0.3251088204486319
+1145 3476  0.8161899275134221
+1152 3476 -1.771758321773721
+1864 3476  2.876369892472561
+945 3477  0.06127856833043964
+952 3477 -0.04654604823349623
+1145 3477 -5.468004288207832
+1152 3477  12.47290766139074
+1619 3477  2.675607339345396
+945 3478 -0.05909584881736413
+952 3478  0.04488809553479817
+1145 3478  5.008647029629411
+1152 3478 -11.42535283239911
+2109 3478  3.742160235640967
+945 3479 -0.374795164020496
+964 3479  0.9977461831798213
+1145 3479 -0.7158770103681851
+1164 3479  1.53391330281752
+1375 3479  1.82335585995627
+945 3480  0.2993620588617579
+964 3480 -0.796934913498078
+1145 3480  0.5731872346895394
+1164 3480 -1.227967849405166
+1865 3480  5.527150768491762
+945 3481  0.05493458803622932
+964 3481 -0.1462419497352594
+1145 3481 -4.380699190373416
+1164 3481  8.887311933312343
+1620 3481  2.343864161786165
+945 3482 -0.04272179200188883
+964 3482  0.1137301358193506
+1145 3482  3.606200291493431
+1164 3482 -7.316287484026701
+2110 3482  7.053894690456646
+946 3483  0.6596703231013034
+947 3483 -1.868071070429303
+1146 3483  0.3710219555670884
+1147 3483 -0.29728149214413
+1376 3483  4.657866587415518
+946 3484 -0.5146229196552202
+947 3484  1.457322172487891
+1146 3484 -0.2840579403070753
+1147 3484  0.2275648264341223
+1866 3484  17.26249952801965
+946 3485 -0.04607441940211257
+947 3485  0.130474703738788
+1146 3485  9.774567204299803
+1147 3485 -7.972830198709099
+1621 3485  1.543087745431399
+946 3486  0.03591805543649509
+947 3486 -0.1017136559236855
+1146 3486 -7.612750342076007
+1147 3486  6.210600761683166
+2111 3486  5.361468130704726
+946 3487  0.6964352973578405
+948 3487 -1.574413538600893
+1146 3487  0.4116155869533685
+1148 3487 -0.3482611836492402
+1377 3487  3.930431675339554
+946 3488 -0.54293430207528
+948 3488  1.227397748220299
+1146 3488 -0.3163234064086289
+1148 3488  0.2676108406262902
+1867 3488  14.44634424441149
+946 3489 -0.05122786494849402
+948 3489  0.1158095295205662
+1146 3489  10.34202414091307
+1148 3489 -8.870545870477601
+1622 3489  1.294093298886285
+946 3490  0.03987489423752329
+948 3490 -0.09014415779328049
+1146 3490 -8.044573973054565
+1148 3490  6.900692384581894
+2112 3490  4.513828312547935
+946 3491  0.4923049138822416
+949 3491 -1.314617609442261
+1146 3491  0.240725896352511
+1149 3491 -1.043634220767106
+1378 3491  3.761329917621492
+946 3492 -0.3842289985371204
+949 3492  1.026019024575663
+1146 3492 -0.183873148952859
+1149 3492  0.7969897672034445
+1868 3492  14.04786405689022
+946 3493 -0.02987711852991685
+949 3493  0.07978182835732844
+1146 3493  7.290551754845374
+1149 3493 -32.25568801983241
+1623 3493  1.245820405796252
+946 3494  0.02326647438088502
+949 3494 -0.06212921315277567
+1146 3494 -5.670365588613417
+1149 3494  25.09335121359859
+2113 3494  4.366310261693636
+946 3495  0.715978620671633
+950 3495 -1.527117243615793
+1146 3495  0.4501210912144902
+1150 3495 -0.3623546349676862
+1379 3495  3.433153619622813
+946 3496 -0.5575412851129458
+950 3496  1.189184824715845
+1146 3496 -0.3458178949312415
+1150 3496  0.278365585734024
+1869 3496  12.6670479033574
+946 3497 -0.05602966115093699
+950 3497  0.1195061685183861
+1146 3497  10.63405779705095
+1150 3497 -8.671639188939132
+1624 3497  1.130461556720701
+946 3498  0.04358377996383721
+950 3498 -0.0929602365253548
+1146 3498 -8.266934236741347
+1150 3498  6.741962214906343
+2114 3498  3.956994704656382
+946 3499  0.3935095664336485
+951 3499 -2.315948119036116
+1146 3499  0.160517360987844
+1151 3499 -0.998072574886444
+1380 3499  4.033761579760155
+946 3500 -0.3068323659627205
+951 3500  1.805821005194248
+1146 3500 -0.1213287753319718
+1151 3500  0.7540610774154664
+1870 3500  15.2850840004798
+946 3501 -0.01947378168904091
+951 3501  0.1146103472959883
+1146 3501  5.696313853321982
+1151 3501 -36.48745359925508
+1625 3501  1.545695419693732
+946 3502  0.01588572902323566
+951 3502 -0.09349334143083275
+1146 3502 -4.640371833668683
+1151 3502  29.73823906985804
+2115 3502  4.264900917453799
+946 3503 -0.9451250300389605
+972 3503  0.361952958820202
+1146 3503 -1.606503189940934
+1172 3503  0.2386303085913993
+1381 3503  6.379413846576525
+946 3504  1.159453727321497
+972 3504 -0.4440340630930363
+1146 3504  1.963053376342537
+1172 3504 -0.2914011576499591
+1871 3504  2.438324611105513
+946 3505  0.1285136378673143
+972 3505 -0.0492166538779582
+1146 3505 -10.23123479260624
+1172 3505  1.708753031112981
+1626 3505  6.784715588521387
+946 3506 -0.1618170065455398
+972 3506  0.06197078951995561
+1146 3506  11.40396313242804
+1172 3506 -1.904872805295951
+2116 3506  3.247381743706517
+952 3507 -0.2375980214699439
+964 3507  0.8327117855650359
+1152 3507 -1.329774596250828
+1164 3507  1.309395068074605
+1382 3507  2.227180481236951
+952 3508  0.2021244230605367
+964 3508 -0.7083871666596919
+1152 3508  1.136378995889816
+1164 3508 -1.118335669994368
+1872 3508  4.793989873863781
+952 3509  0.03516906518592851
+964 3509 -0.1232573187539455
+1152 3509 -8.235239513230283
+1164 3509  7.323453369206338
+1627 3509  2.87720735177041
+952 3510 -0.02840317316188208
+964 3510  0.0995448400328353
+1152 3510  7.392636495600143
+1164 3510 -6.574874411640814
+2117 3510  6.185562461268164
+953 3511  0.5039010794292205
+954 3511 -1.221909365821883
+1153 3511  0.4589386576308901
+1154 3511 -0.3159937452580066
+1383 3511  7.331635289629858
+953 3512 -0.4287950305689273
+954 3512  1.039784761849569
+1153 3512 -0.390921429635482
+1154 3512  0.269234075312335
+1873 3512  17.87284749444791
+953 3513 -0.01971558690748688
+954 3513  0.0478083125406707
+1153 3513  8.639460196076747
+1154 3513 -7.316230054006878
+1628 3513  10.64580025395756
+953 3514  0.01488552126604759
+954 3514 -0.03609588983362996
+1153 3514 -8.030380636400102
+1154 3514  6.800195871908971
+2118 3514  28.06040217986433
+953 3515  0.2953161655653164
+979 3515 -0.5584106677342894
+1153 3515  0.9130715613792872
+1179 3515 -0.8197257413953748
+1384 3515  5.304835054096878
+953 3516 -0.2698288575970699
+979 3516  0.5102169475088717
+1153 3516 -0.8271158196402747
+1179 3516  0.7414429026651962
+1874 3516  8.815510423999974
+953 3517 -0.04126530553706707
+979 3517  0.07802819319119383
+1153 3517  5.326644585481059
+1179 3517 -5.904518861818641
+1629 3517  6.323546734518151
+953 3518  0.03303831593886034
+979 3518 -0.06247185293402092
+1153 3518 -5.221321173922546
+1179 3518  5.785549463415167
+2119 3518  12.01425377570906
+953 3519 -0.4316172727288611
+1000 3519  0.5044255115610694
+1153 3519 -1.159052407654222
+1200 3519  0.9377921426824671
+1385 3519  3.60741915585239
+953 3520  0.4350354735110427
+1000 3520 -0.5084203185048906
+1153 3520  1.167217498441967
+1200 3520 -0.9444048128648267
+1875 3520  3.435120286019725
+953 3521  0.05408041747556818
+1000 3521 -0.0632030828564326
+1153 3521 -7.424743250141112
+1200 3521  5.539440141695474
+1630 3521  5.851730685056556
+953 3522 -0.05165212370154924
+1000 3522  0.06036516740083515
+1153 3522  7.683291575595472
+1200 3522 -5.732696863714277
+2120 3522  5.945418404127518
+953 3523 -0.3071829453370418
+1010 3523  0.5073715942942281
+1153 3523 -0.95405564531345
+1210 3523  0.8083482178595198
+1386 3523  7.425984975242814
+953 3524  0.3154316116230693
+1010 3524 -0.520995849897842
+1153 3524  0.9758265066742177
+1210 3524 -0.8256124134252635
+1876 3524  6.227801375022604
+953 3525  0.03634845233500034
+1010 3525 -0.06003644567930714
+1153 3525 -5.830059675395175
+1210 3525  6.106888895148331
+1631 3525  11.33592184861562
+953 3526 -0.04190662314365428
+1010 3526  0.06921683159380579
+1153 3526  5.406317175231911
+1210 3526 -5.665416980231268
+2121 3526  9.103384773113829
+953 3527 -0.3757659547701289
+1026 3527  0.4009167330545462
+1153 3527 -1.164584670469045
+1226 3527  0.6235636743364441
+1387 3527  6.982525249864085
+953 3528  0.3861958328991782
+1026 3528 -0.4120447041029439
+1153 3528  1.194200001945778
+1226 3528 -0.638771030322958
+1877 3528  6.205734509115272
+953 3529  0.04350500092493118
+1026 3529 -0.04641687896666508
+1153 3529 -6.959432015872205
+1226 3529  4.601918715157254
+1632 3529  11.50083919055273
+953 3530 -0.05038154112818095
+1026 3530  0.05375367996741477
+1153 3530  6.502617587410443
+1226 3530 -4.301673841163688
+2122 3530  9.438888577223668
+954 3531 -0.7047014242829946
+1001 3531  0.4168827888937753
+1154 3531 -0.7102443269723129
+1201 3531  1.404267791315138
+1388 3531  2.017091895439074
+954 3532  0.6326150413526075
+1001 3532 -0.3742383847223765
+1154 3532  0.6375281091348801
+1201 3532 -1.26049341189051
+1878 3532  3.594156286348642
+954 3533  0.08773798910582717
+1001 3533 -0.05190348185770162
+1154 3533 -4.291561368908504
+1201 3533  8.573349645239935
+1633 3533  3.367757951666376
+954 3534 -0.07880583325813648
+1001 3534  0.04661945388172894
+1154 3534  3.814598094334272
+1201 3534 -7.620517473966892
+2123 3534  6.190391211677775
+955 3535 -0.8318326858768657
+1057 3535  0.6196319089697585
+1155 3535 -0.2560164048234211
+1257 3535  1.582943479566342
+1389 3535  3.178888388583059
+955 3536  0.7717420244337035
+1057 3536 -0.5748703939518289
+1155 3536  0.2373539075192858
+1257 3536 -1.467471123130216
+1879 3536  4.740940585753161
+955 3537  0.1071151913100275
+1057 3537 -0.07979007268887017
+1155 3537 -1.53916732182028
+1257 3537  9.963872683218066
+1634 3537  4.887036067400858
+955 3538 -0.103164844124989
+1057 3538  0.07684746029948268
+1155 3538  1.414753666929491
+1257 3538 -9.158675410300438
+2124 3538  6.656168722454082
+956 3539  0.377331040401673
+993 3539 -0.3067285442619483
+1156 3539  1.446175075669395
+1193 3539 -1.408878182521636
+1390 3539  2.807087996816875
+956 3540 -0.3524048448936835
+993 3540  0.2864662948217338
+1156 3540 -1.34371595571893
+1193 3540  1.31040985134737
+1880 3540  4.013858921947923
+956 3541 -0.05101371158298227
+993 3541  0.04146852449400996
+1156 3541  9.606152975975975
+1193 3541 -7.997310816234859
+1635 3541  3.977636161674676
+956 3542  0.0519531882185968
+993 3542 -0.04223221544427847
+1156 3542 -8.318029485848061
+1193 3542  6.922985628648402
+2125 3542  5.491666775418012
+956 3543  0.3360771639281279
+1075 3543 -0.3540923641672868
+1156 3543  1.341321871020527
+1275 3543 -0.6717547568728794
+1391 3543  6.546784158248459
+956 3544 -0.3359593223602985
+1075 3544  0.3539682057779985
+1156 3544 -1.344209358632625
+1275 3544  0.6732602446776976
+1881 3544  6.511901335193489
+956 3545 -0.0477704058819864
+1075 3545  0.05033110776786022
+1156 3545  8.638251497123527
+1275 3545 -4.45699083051915
+1636 3545  7.602458701266929
+956 3546  0.04690068187393769
+1075 3546 -0.04941476276368469
+1156 3546 -8.759308364969019
+1275 3546  4.519424008698003
+2126 3546  7.686660379443556
+957 3547 -0.5244507842233862
+1017 3547  0.7708782074770815
+1157 3547 -1.058339009138057
+1217 3547  0.5146427113986781
+1392 3547  3.23901762142973
+957 3548  0.5237947129566466
+1017 3548 -0.769913863334036
+1157 3548  1.056354509649101
+1217 3548 -0.5135793765110132
+1882 3548  3.269308251777509
+957 3549  0.06750021622713791
+1017 3549 -0.09921702332192101
+1157 3549 -5.697020217678423
+1217 3549  3.063488442035085
+1637 3549  5.798807440562412
+957 3550 -0.07126566929859467
+1017 3550  0.1047517766322093
+1157 3550  5.434176163677479
+1217 3550 -2.922493795579336
+2127 3550  5.61223414210605
+957 3551 -0.4599292057141804
+1037 3551  0.6178543307689458
+1157 3551 -0.8311703023259965
+1237 3551  1.098284058681807
+1393 3551  2.722688512430139
+957 3552  0.4596955190523657
+1037 3552 -0.6175404035074211
+1157 3552  0.8325741162821748
+1237 3552 -1.099720672228146
+1883 3552  2.703936131935484
+957 3553  0.06526541533640681
+1037 3553 -0.08767549225845923
+1157 3553 -4.937009585402042
+1237 3553  5.821493748851517
+1638 3553  4.15614996725906
+957 3554 -0.06150550344214045
+1037 3554  0.08262454568163802
+1157 3554  5.222317454495946
+1237 3554 -6.158831607607141
+2128 3554  4.159434126999496
+958 3555 -0.5159715744957702
+995 3555  0.5043002560591464
+1158 3555 -1.955210228146363
+1195 3555  0.4429005215649865
+1394 3555  3.132140010189771
+958 3556  0.5154078088312681
+995 3556 -0.5037492428192337
+1158 3556  1.946171184010313
+1195 3556 -0.4406738447969539
+1884 3556  3.155612325232486
+958 3557  0.07280632131793441
+995 3557 -0.07115943648492561
+1158 3557 -11.53240152096248
+1195 3557  2.841907482790103
+1639 3557  4.081677144998319
+958 3558 -0.07622870906823133
+995 3558  0.07450440954956594
+1158 3558  11.05483594732497
+1195 3558 -2.724430091994604
+2129 3558  4.024185540800096
+958 3559  0.4404116309763916
+1032 3559 -0.4013655946326785
+1158 3559  1.409323205256857
+1232 3559 -0.5518231235725737
+1395 3559  5.363493298492033
+958 3560 -0.5079389600530799
+1032 3560  0.4629060824003016
+1158 3560 -1.618511483884628
+1232 3560  0.6340347863620098
+1885 3560  2.546878710023711
+958 3561 -0.05761232611155925
+1032 3561  0.05250452962986646
+1158 3561  9.858875177032653
+1232 3561 -3.512065591551504
+1640 3561  6.902063277362459
+958 3562  0.06996802127804672
+1032 3562 -0.06376479295806904
+1158 3562 -10.84726586818638
+1232 3562  3.863844809715488
+2130 3562  3.218684483675813
+959 3563 -0.4764363681173263
+1001 3563  0.4317442185526948
+1159 3563 -0.8140452921845157
+1201 3563  1.434561395231843
+1396 3563  2.759734100154303
+959 3564  0.425337444217761
+1001 3564 -0.3854386330763389
+1159 3564  0.7270941854498505
+1201 3564 -1.281336173089582
+1886 3564  4.914858596122737
+959 3565  0.05983512144077003
+1001 3565 -0.05422228334611339
+1159 3565 -4.963811290770392
+1201 3565  9.07977748727345
+1641 3565  4.35180763337338
+959 3566 -0.05429826449104555
+1001 3566  0.04920481168154859
+1159 3566  4.341750101297229
+1201 3566 -7.942016865501306
+2131 3566  7.954416979677193
+960 3567  0.6869492137454632
+1057 3567 -0.4844075229863823
+1160 3567  0.1819988190111484
+1257 3567 -1.262443950678755
+1397 3567  8.81339838666673
+960 3568 -0.7334879158217498
+1057 3568  0.5172246467921727
+1160 3568 -0.1954534815545992
+1257 3568  1.35575676912055
+1887 3568  6.209091064066214
+960 3569 -0.1210230804289373
+1057 3569  0.08534035623263046
+1160 3569  1.503671479436259
+1257 3569 -10.37821929880745
+1642 3569  4.640556920095166
+960 3570  0.1300666015536589
+1057 3570 -0.09171746472834619
+1160 3570 -1.617273344051984
+1257 3570  11.16247281732313
+2132 3570  3.17852537231288
+960 3571  0.5990913842872198
+1058 3571 -0.8683349920286506
+1160 3571  0.1483181477839719
+1258 3571 -2.563829187882418
+1398 3571  4.130734036137791
+960 3572 -0.6450813979793052
+1058 3572  0.9349938344358532
+1160 3572 -0.1580808975787135
+1258 3572  2.735594657287679
+1888 3572  2.887296553688009
+960 3573 -0.08258340338438294
+1058 3573  0.1196980307182942
+1160 3573  1.098045296521147
+1258 3573 -17.20403442676472
+1643 3573  4.196712062065106
+960 3574  0.09355727171015052
+1058 3574 -0.1356037741743049
+1160 3574 -1.116117521563536
+1258 3574  17.4862782237909
+2133 3574  2.945059870904722
+961 3575 -0.3915200345225039
+964 3575  0.7615674553221563
+1161 3575 -0.7691880733918308
+1164 3575  1.084345240019609
+1399 3575  2.800615008201704
+961 3576  0.3156322845396378
+964 3576 -0.6139539603574344
+1161 3576  0.6268912276704567
+1164 3576 -0.8831818088721408
+1889 3576  8.019342926689088
+961 3577  0.0579584615013995
+964 3577 -0.1127382359726241
+1161 3577 -5.531733877787849
+1164 3577  7.397606534928237
+1644 3577  2.551059225694364
+961 3578 -0.04478280109740496
+964 3578  0.08710952407721209
+1161 3578  4.552179963964262
+1164 3578 -6.087474676626123
+2134 3578  8.080073804395154
+961 3579 -0.5414818109862019
+1082 3579  0.5556174516419152
+1161 3579 -1.1983605947858
+1282 3579  0.1809697428574308
+1400 3579  7.984203741216811
+961 3580  0.5430535951303894
+1082 3580 -0.5572302679600371
+1161 3580  1.188366015048527
+1282 3580 -0.1794700371248673
+1890 3580  8.030319348297542
+961 3581  0.1050418924830158
+1082 3581 -0.1077840611317312
+1161 3581 -9.698019033406775
+1282 3581  1.449320632063192
+1645 3581  3.626585505548464
+961 3582 -0.1048558318511552
+1082 3582  0.1075931432984304
+1161 3582  9.673953645810657
+1282 3582 -1.445634609268258
+2135 3582  3.66926937307163
+962 3583  0.9725210198756302
+963 3583 -2.017535229907724
+1162 3583 -0.1874965313805473
+1163 3583 -2.513198853349591
+1401 3583  0.7834252790859237
+962 3584 -0.6275235799788761
+963 3584  1.301823718285398
+1162 3584  0.1209829834408293
+1163 3584  1.622962910510173
+1891 3584  4.977760599682532
+962 3585  0.013588288313204
+963 3585 -0.02818946822305024
+1162 3585  3.094186181666295
+1163 3585 -22.22398035275692
+1646 3585  19.13304546764732
+962 3586  0.02534124008545374
+963 3586 -0.05257145459795929
+1162 3586 -3.100342937307653
+1163 3586  22.19095820062608
+2136 3586  8.170584099729947
+964 3587  0.4637568782856321
+965 3587 -1.12285875271712
+1164 3587  0.6351578870039599
+1165 3587 -2.945954324697929
+1402 3587  0.6061321059687221
+964 3588 -0.3613580899024239
+965 3588  0.8749284659928406
+1164 3588 -0.4937492055719114
+1165 3588  2.290345940174964
+1892 3588  2.317651149747798
+964 3589 -0.06341054135910112
+965 3589  0.1535310519658627
+1164 3589  4.32562955086895
+1165 3589 -19.24637451834769
+1647 3589  0.7415300425733925
+964 3590  0.05024109366902035
+965 3590 -0.1216448842352145
+1164 3590 -3.280051421621569
+1165 3590  14.59397827582356
+2137 3590  2.778013614142899
+964 3591  0.5198404247302832
+966 3591 -3.296259100425206
+1164 3591  0.08216399590560011
+1166 3591 -2.385134481547733
+1403 3591  1.924848272360943
+964 3592 -0.4107864203962353
+966 3592  2.604757945218924
+1164 3592 -0.06363192359750822
+1166 3592  1.86782777672809
+1893 3592  7.14894107859813
+964 3593 -0.009769119110653198
+966 3593  0.06194506283026378
+1164 3593  5.774615805405687
+1166 3593 -77.15294482380699
+1648 3593  2.537886768231079
+964 3594  0.01207874962159066
+966 3594 -0.07659021204936765
+1164 3594 -3.234138467376079
+1166 3594  43.20570292286196
+2138 3594  6.924495998814408
+964 3595  0.5553092879536204
+967 3595 -2.019730940286236
+1164 3595  0.2283230594027073
+1167 3595 -2.457041662433694
+1404 3595  0.9293065890597459
+964 3596 -0.4374549805006661
+967 3596  1.591079742886095
+1164 3596 -0.1784848649491971
+1167 3596  1.923107839672399
+1894 3596  3.383146747526674
+964 3597 -0.02421187475492601
+967 3597  0.08806168674949939
+1164 3597  5.50164767601109
+1167 3597 -50.0814285278305
+1649 3597  1.052700930966815
+964 3598  0.02101357146535312
+967 3598 -0.07642904841532995
+1164 3598 -4.008535209606107
+1167 3598  36.48944546192245
+2139 3598  3.451189611415207
+964 3599  0.5186307214601972
+968 3599 -2.498229903851033
+1164 3599  0.1369008468551218
+1168 3599 -2.338296025296659
+1405 3599  1.515772943789459
+964 3600 -0.4089115281925863
+968 3600  1.969715571194801
+1164 3600 -0.1066451828453128
+1168 3600  1.829379136090265
+1895 3600  5.594076654825956
+964 3601 -0.01510224391735361
+968 3601  0.07274709308264668
+1164 3601  5.345308539788538
+1168 3601 -59.77277409880886
+1650 3601  1.886875011526777
+964 3602  0.01521306273357519
+968 3602 -0.07328090426879212
+1164 3602 -3.486179794654417
+1168 3602  38.98112179863244
+2140 3602  5.746025815755991
+964 3603  0.649415218994155
+969 3603 -1.985602696417218
+1164 3603  0.2495784622735603
+1169 3603 -1.994003072051536
+1406 3603  0.9154558602176557
+964 3604 -0.5116989493650622
+969 3604  1.564531880214944
+1164 3604 -0.1950409950189936
+1169 3604  1.560276963803856
+1896 3604  3.328116887610061
+964 3605 -0.02666374087039809
+969 3605  0.08152503085904574
+1164 3605  6.482098577577909
+1169 3605 -44.05096156861522
+1651 3605  1.004492569171269
+964 3606  0.02305717637758508
+969 3606 -0.07049787292944913
+1164 3606 -4.729287128522186
+1169 3606  32.13935140025399
+2141 3606  3.307664437943429
+964 3607  0.6812081861155168
+970 3607 -1.973620237480235
+1164 3607  0.290360813562731
+1170 3607 -1.69914940163431
+1407 3607  0.8856096204701881
+964 3608 -0.5365945143548635
+970 3608  1.554640438029102
+1164 3608 -0.2270292150903642
+1170 3608  1.329845790084345
+1897 3608  3.214678878395649
+964 3609 -0.03084309267107718
+970 3609  0.08935968933261426
+1164 3609  6.760502321679454
+1170 3609 -34.56956155699346
+1652 3609  0.9675344147546086
+964 3610  0.02625275857880516
+970 3610 -0.07606041248016723
+1164 3610 -4.991429629225376
+1170 3610  25.52356491229814
+2142 3610  3.209214983593041
+964 3611  0.7223443830121247
+971 3611 -3.13745961756631
+1164 3611  0.2916736644580288
+1171 3611 -0.665968396826871
+1408 3611  1.437804385943619
+964 3612 -0.5685854447490166
+971 3612  2.469616866953718
+1164 3612 -0.227785867490716
+1171 3612  0.5209611043831808
+1898 3612  5.286814915057556
+964 3613 -0.03084162623578303
+971 3613  0.1339587586344098
+1164 3613  7.136143023604448
+1171 3613 -13.01473834386188
+1653 3613  1.733088886821635
+964 3614  0.02765977036275495
+971 3614 -0.1201385579859136
+1164 3614 -5.072850637376669
+1171 3614  9.251460191495257
+2143 3614  5.612374640258077
+964 3615 -0.8389463619039152
+1056 3615  0.3580752890249853
+1164 3615 -1.244478079620031
+1256 3615  0.5060573535847106
+1409 3615  5.097464053705828
+964 3616  0.9351913725744683
+1056 3616 -0.3991541488639627
+1164 3616  1.380302330026541
+1256 3616 -0.5613536361877731
+1899 3616  3.060833158311745
+964 3617  0.1326396128473568
+1056 3617 -0.05661263921413731
+1164 3617 -8.209962281569412
+1256 3617  3.248543549093237
+1654 3617  4.909697213749066
+964 3618 -0.1454153226254008
+1056 3618  0.06206550983734454
+1164 3618  9.204827106431074
+1256 3618 -3.642181922510498
+2144 3618  2.985129676088162
+972 3619 -0.5844299652988821
+1088 3619  0.6011938633446567
+1172 3619 -0.4153067634742414
+1288 3619  1.244376425382915
+1410 3619  3.115298956803423
+972 3620  0.5839521479322916
+1088 3620 -0.6007023401756713
+1172 3620  0.4148880148577192
+1288 3620 -1.24310360204769
+1900 3620  3.124616488720384
+972 3621  0.07265746990450016
+1088 3621 -0.07474159031252897
+1172 3621 -2.410186223870166
+1288 3621  7.434019202353014
+1655 3621  5.489980636102064
+972 3622 -0.07373932525269625
+1088 3622  0.07585447780115982
+1172 3622  2.375670562103454
+1288 3622 -7.327623003108759
+2145 3622  5.456336436211689
+973 3623  0.3551605612820263
+974 3623 -1.489946307075277
+1173 3623  0.2557390494241053
+1174 3623 -1.144600450292391
+1411 3623  3.239214549120592
+973 3624 -0.2795935336492826
+974 3624  1.172932465922301
+1173 3624 -0.1995643723770443
+1174 3624  0.8931472133700507
+1901 3624  11.46538487377347
+973 3625 -0.02645413337217794
+974 3625  0.1109786463408995
+1173 3625  7.851143468601221
+1174 3625 -35.44874624987744
+1656 3625  0.8107453118787733
+973 3626  0.02143184395432529
+974 3626 -0.08990946696979601
+1173 3626 -6.360533060844858
+1174 3626  28.71959313604403
+2146 3626  2.233014253505581
+973 3627  0.2921170861291708
+975 3627 -1.862462882642613
+1173 3627  0.1570011938827146
+1175 3627 -2.274571006209698
+1412 3627  2.661281271291927
+973 3628 -0.2187915238870602
+975 3628  1.39495808915565
+1173 3628 -0.116240689813081
+1175 3628  1.683936323933614
+1902 3628  11.61445266776299
+973 3629 -0.01571989566011668
+975 3629  0.1002259832656134
+1173 3629  6.250508524222748
+1175 3629 -91.60308946892607
+1657 3629  0.7394238804556282
+973 3630  0.01298076963505745
+975 3630 -0.08276202516527353
+1173 3630 -5.161142881361593
+1175 3630  75.64317956330889
+2147 3630  1.985197878319526
+973 3631  0.4746237402281227
+976 3631 -1.060044755936421
+1173 3631  0.3282970589794034
+1176 3631 -0.9769081143049713
+1413 3631  2.838604475646982
+973 3632 -0.3961258208756168
+976 3632  0.8847241794276495
+1173 3632 -0.271490414221333
+1176 3632  0.8078351046585803
+1903 3632  7.931141454227703
+973 3633 -0.03612576647010336
+976 3633  0.08068481631873459
+1173 3633  11.16120855053096
+1176 3633 -33.51914650193375
+1658 3633  0.5836917319355491
+973 3634  0.02872026982551443
+976 3634 -0.06414506658049314
+1173 3634 -8.873223095071516
+1176 3634  26.64903944246305
+2148 3634  1.62655302961875
+973 3635  0.4514386490354154
+977 3635 -1.23044462799305
+1173 3635  0.2551706654878987
+1177 3635 -0.9321754366844185
+1414 3635  3.600111623032867
+973 3636 -0.374371261515384
+977 3636  1.020389168253177
+1173 3636 -0.2092439937336671
+1177 3636  0.7643499314882201
+1904 3636  10.38985995381369
+973 3637 -0.02777304520351097
+977 3637  0.07569842401993218
+1173 3637  10.50033560799464
+1177 3637 -38.7926439689844
+1659 3637  0.7675634623859809
+973 3638  0.02216543630854258
+977 3638 -0.06041428240856869
+1173 3638 -8.38018467970365
+1177 3638  30.96188122061356
+2149 3638  2.124763369071786
+973 3639  1.238869862237301
+1006 3639 -0.3843162680834073
+1173 3639  2.90118294589533
+1206 3639 -0.2630108885425641
+1415 3639  2.120223669016545
+973 3640 -1.017068720550362
+1006 3640  0.315510181481362
+1173 3640 -2.396216081757496
+1206 3640  0.2173437473146551
+1905 3640  5.274149732297824
+973 3641 -0.189558782352581
+1006 3641  0.05880401649662903
+1173 3641  17.2984093564214
+1206 3641 -1.681171094173677
+1660 3641  2.338015588056324
+973 3642  0.1551422975518752
+1006 3642 -0.0481274996143231
+1173 3642 -15.269239506628
+1206 3642  1.48390937397938
+2150 3642  4.699502919587786
+973 3643 -0.8761486615418773
+1055 3643  0.3852188693949516
+1173 3643 -1.862842055174311
+1255 3643  0.2775953024833963
+1416 3643  4.450891698791136
+973 3644  1.07765260038838
+1055 3644 -0.4738147012535258
+1173 3644  2.290266277161505
+1255 3644 -0.3412498653140608
+1906 3644  1.680006856153657
+973 3645  0.1233417700929442
+1055 3645 -0.05423004030019284
+1173 3645 -10.98109255814183
+1255 3645  1.447748389810384
+1661 3645  6.066391449410494
+973 3646 -0.1324205549928908
+1055 3646  0.05822173646792159
+1173 3646  13.31557405400697
+1255 3646 -1.755833205048359
+2151 3646  3.482112602331831
+978 3647 -0.6175792065318458
+998 3647  0.3482442751733312
+1178 3647 -1.333010028374286
+1198 3647  0.4276243851783329
+1417 3647  5.382734010251454
+978 3648  0.5343612663363052
+998 3648 -0.3013188428428641
+1178 3648  1.161252280274343
+1198 3648 -0.3728502600956505
+1907 3648  11.36834776685953
+978 3649  0.07945126249495234
+998 3649 -0.04480145546761378
+1178 3649 -8.372193934777899
+1198 3649  3.153670952857216
+1662 3649  6.930754193190324
+978 3650 -0.07522703272150684
+998 3650  0.04241947139162745
+1178 3650  6.800859133664147
+1198 3650 -2.562369962751278
+2152 3650  14.16495761512215
+978 3651 -0.4393579796250129
+1041 3651  0.3530044280053762
+1178 3651 -0.8125299017211322
+1241 3651  1.357408579856344
+1418 3651  4.03844885715099
+978 3652  0.4424042011090423
+1041 3652 -0.3554519303210625
+1178 3652  0.8130995252895313
+1241 3652 -1.359305061838616
+1908 3652  3.981648724060497
+978 3653  0.06144026122959138
+1041 3653 -0.04936449382429299
+1178 3653 -5.553339525120581
+1241 3653  7.799677070497177
+1663 3653  5.126383130506217
+978 3654 -0.05571639966019448
+1041 3654  0.04476562781300211
+1178 3654  5.955799061503828
+1241 3654 -8.367326703808663
+2153 3654  5.483147129036918
+979 3655  1.458939452040597
+980 3655 -1.713575067511206
+1179 3655  0.6657203254231561
+1180 3655 -0.07200114489392796
+1419 3655  5.435793144188034
+979 3656 -1.369672770763905
+980 3656  1.608728249378172
+1179 3656 -0.620628984403677
+1180 3656  0.06494821711852092
+1909 3656  7.936527709755322
+979 3657 -0.03732647943580095
+980 3657  0.04384124675612543
+1179 3657  11.0089408248834
+1180 3657 -7.044466715389948
+1664 3657  4.929097726466118
+979 3658  0.01230778581599272
+980 3658 -0.01445592199255047
+1179 3658 -21.83306996925088
+1180 3658  13.9640986807489
+2154 3658  13.13678521227168
+979 3659 -0.4069131581499621
+1076 3659  0.3962293587228495
+1179 3659 -0.6535981302102936
+1276 3659  1.0954096953943
+1420 3659  5.673414870400691
+979 3660  0.4084263136180201
+1076 3660 -0.3977027851991092
+1179 3660  0.654965320821791
+1276 3660 -1.097736986344627
+1910 3660  5.534920557416696
+979 3661  0.05514218346051624
+1076 3661 -0.05369438553050169
+1179 3661 -4.521169451237201
+1276 3661  7.39116338334737
+1665 3661  6.562151811292857
+979 3662 -0.05411446430890252
+1076 3662  0.05269364988891542
+1179 3662  4.541421673872437
+1276 3662 -7.424306139552618
+2155 3662  6.819550808326079
+980 3663 -0.6095818027697771
+1093 3663  1.112658784433996
+1180 3663 -0.5886831418066317
+1293 3663  1.054444086425987
+1421 3663  1.663115651008956
+980 3664  0.5163436862281057
+1093 3664 -0.9424729144772576
+1180 3664  0.5026533925982484
+1293 3664 -0.9011439297224629
+1911 3664  3.745702162475517
+980 3665  0.08236427128493604
+1093 3665 -0.1503380342921753
+1180 3665 -3.725196007278359
+1293 3665  7.599039601885398
+1666 3665  1.990410506883116
+980 3666 -0.07686249969756757
+1093 3666  0.1402957488124907
+1180 3666  3.079189492083107
+1293 3666 -6.282130880745431
+2156 3666  3.716088592076674
+981 3667 -0.5279378228544918
+1044 3667  0.5728450058803592
+1181 3667 -0.7314697767746239
+1244 3667  1.049384777518011
+1422 3667  3.6738422903388
+981 3668  0.5324544811476037
+1044 3668 -0.5777458579020762
+1181 3668  0.7341243737421274
+1244 3668 -1.053710576606368
+1912 3668  3.499243447462259
+981 3669  0.07447310375085667
+1044 3669 -0.08080789765245955
+1181 3669 -4.855239864111756
+1244 3669  5.781563336973328
+1667 3669  4.866384250937969
+981 3670 -0.06679631775422318
+1044 3670  0.07247811272512387
+1181 3670  5.221813720290219
+1244 3670 -6.220262112848772
+2157 3670  5.213544230958012
+981 3671 -0.4465921770040591
+1084 3671  0.5627128757731583
+1181 3671 -0.7155603330739858
+1284 3671  0.8548643208929529
+1423 3671  4.931596810478233
+981 3672  0.4419162630302035
+1084 3672 -0.556821153672813
+1181 3672  0.7181252765245181
+1284 3672 -0.8599114611795811
+1913 3672  4.952358887713497
+981 3673  0.05896090325177052
+1084 3673 -0.07429162698182507
+1181 3673 -4.616497763711963
+1284 3673  6.615573091843956
+1668 3673  5.688689200828974
+981 3674 -0.06482153965237164
+1084 3674  0.08167611720054455
+1181 3674  4.299471983766863
+1284 3674 -6.162584239401313
+2158 3674  5.360504399026522
+982 3675 -0.5594355361631519
+1011 3675  0.4828314550248289
+1182 3675 -0.3797072308357856
+1211 3675  2.391996372285566
+1424 3675  2.954431694987449
+982 3676  0.5029955255009219
+1011 3676 -0.4341198328483751
+1182 3676  0.3406113200706714
+1211 3676 -2.145916049270679
+1914 3676  5.231194892529889
+982 3677  0.07604924015919289
+1011 3677 -0.06563573978769766
+1182 3677 -2.422622246591045
+1211 3677  13.0725484602826
+1669 3677  4.405147664394409
+982 3678 -0.06227853779439045
+1011 3678  0.05375067380652562
+1182 3678  2.310469301341059
+1211 3678 -12.47045602748613
+2159 3678  8.34943451090035
+983 3679 -0.5603093540142886
+1018 3679  0.6760566940874065
+1183 3679 -1.009313309363917
+1218 3679  0.5452258426352274
+1425 3679  3.597870353883553
+983 3680  0.5635642781039163
+1018 3680 -0.6799840124585417
+1183 3680  1.01257666395277
+1218 3680 -0.5470217549008489
+1915 3680  3.508830808080819
+983 3681  0.08560093037836217
+1018 3681 -0.1032841618434341
+1183 3681 -6.434333697729489
+1218 3681  3.376506641724802
+1670 3681  3.834698064428319
+983 3682 -0.08418167880156238
+1018 3682  0.1015717247366548
+1183 3682  6.497392945010104
+1218 3682 -3.409620496099906
+2160 3682  3.919110652589746
+984 3683 -0.5222444421836163
+999 3683  0.4052407270632351
+1184 3683 -0.5140805691755161
+1199 3683  1.368477623792071
+1426 3683  5.141916652259088
+984 3684  0.5068312762468304
+999 3684 -0.393280728897523
+1184 3684  0.4990000847384159
+1199 3684 -1.328312311377791
+1916 3684  5.985986065281415
+984 3685  0.06925873775509218
+999 3685 -0.05374200082628659
+1184 3685 -2.845200624001803
+1199 3685  7.348777917610443
+1671 3685  9.122041227009294
+984 3686 -0.06572612318307269
+999 3686  0.05100083369846863
+1184 3686  2.783228216931563
+1199 3686 -7.188787771767452
+2161 3686  11.18534751112075
+984 3687  0.4708669091657322
+1040 3687 -0.5600389881261721
+1184 3687  0.4457567421222746
+1240 3687 -1.964051061571361
+1427 3687  2.812910582659263
+984 3688 -0.4207481970090967
+1040 3688  0.5004288683746699
+1184 3688 -0.3979802529569074
+1240 3688  1.753538660217751
+1917 3688  5.055157513061181
+984 3689 -0.07142707696284654
+1040 3689  0.08495383117483674
+1184 3689  3.145655382980586
+1240 3689 -13.90759343198216
+1672 3689  2.509630416919152
+984 3690  0.06335201956550504
+1040 3690 -0.07534953134862798
+1184 3690 -2.797268987536868
+1240 3690  12.36730783343644
+2162 3690  4.679317810446525
+984 3691 -0.473560945926676
+1091 3691  0.4402806087748638
+1184 3691 -0.4618434163253965
+1291 3691  1.168201103632352
+1428 3691  6.815584747470943
+984 3692  0.4497976392777369
+1091 3692 -0.4181873107360542
+1184 3692  0.4399385753362182
+1291 3692 -1.112629226904043
+1918 3692  8.94117625255519
+984 3693  0.07049701051212577
+1091 3693 -0.06554270780152668
+1184 3693 -2.86924462211004
+1291 3693  6.948584822479258
+1673 3693  8.245690219381995
+984 3694 -0.0653373636694603
+1091 3694  0.06074566431115451
+1184 3694  2.785061239788074
+1291 3694 -6.744836034528775
+2163 3694  10.96265801745419
+985 3695  1.030575023275442
+986 3695 -1.032621098489623
+1185 3695  0.427839673034417
+1186 3695 -0.1593269903331485
+1429 3695  8.598554966475483
+985 3696 -1.145483220005211
+986 3696  1.147757430782475
+1185 3696 -0.4773773394419411
+1186 3696  0.1799753946873774
+1919 3696  4.834616698618166
+985 3697 -0.02747275796180583
+986 3697  0.02752730161739742
+1185 3697  7.594442537507458
+1186 3697 -11.15066330216473
+1674 3697  9.351725708391308
+985 3698  0.01404202717338338
+986 3698 -0.01406990582666666
+1185 3698 -15.25141034328379
+1186 3698  22.3842856817472
+2164 3698  8.062081654679709
+985 3699  0.2717664094081579
+1047 3699 -1.20025775658178
+1185 3699  0.3935586339260975
+1247 3699 -2.744556354611642
+1430 3699  2.402848964234089
+985 3700 -0.3291856606887862
+1047 3700  1.453850177649487
+1185 3700 -0.4755757198066598
+1247 3700  3.317198260843825
+1920 3700  0.9505763099739064
+985 3701 -0.0345488517344297
+1047 3701  0.1525851828618167
+1185 3701  2.737883920038361
+1247 3701 -17.82514147813323
+1675 3701  3.241458825572762
+985 3702  0.04345138635311043
+1047 3702 -0.1919032731754043
+1185 3702 -3.207071041908369
+1247 3702  20.87891349987074
+2165 3702  1.251333503471155
+986 3703 -0.4083268764891531
+1048 3703  0.4304029276676956
+1186 3703 -0.9577254771843337
+1248 3703  0.9482549598362122
+1431 3703  3.660576083522866
+986 3704  0.4085753213662854
+1048 3704 -0.4306648046310764
+1186 3704  0.9613848499477831
+1248 3704 -0.951589473510047
+1921 3704  3.616822557551397
+986 3705  0.05989119208712813
+1048 3705 -0.06312918864769614
+1186 3705 -5.868347071872142
+1248 3705  5.385061985342238
+1676 3705  4.813024972990116
+986 3706 -0.05737989255880681
+1048 3706  0.06048211658000992
+1186 3706  6.083446015993633
+1248 3706 -5.582795958856527
+2166 3706  4.888571989846326
+986 3707  0.5500829381246285
+1074 3707 -0.6107864907888784
+1186 3707  1.385816766949729
+1274 3707 -0.5956130198385742
+1432 3707  1.803796874291739
+986 3708 -0.4922431501793627
+1074 3708  0.5465638824173062
+1186 3708 -1.243512052379119
+1274 3708  0.5345682106594648
+1922 3708  3.21889629143859
+986 3709 -0.0811542494651884
+1074 3709  0.09010990126768369
+1186 3709  8.036162305989681
+1274 3709 -3.668557345336211
+1677 3709  2.274731948814601
+986 3710  0.070036883103212
+1074 3710 -0.07776569511906935
+1186 3710 -7.386558767037341
+1274 3710  3.371880224457544
+2167 3710  4.206946386074862
+986 3711 -0.4948905454048655
+1092 3711  0.4137426206676649
+1186 3711 -1.161465841778596
+1292 3711  0.6074290077697537
+1433 3711  3.693870991143675
+986 3712  0.5176954413573776
+1092 3712 -0.432808164560258
+1186 3712  1.216256331347602
+1292 3712 -0.6360979627977104
+1923 3712  2.924614144859001
+986 3713  0.06634446170697339
+1092 3713 -0.05546586352942447
+1186 3713 -7.2681528859035
+1292 3713  3.925788159606755
+1678 3713  5.000900261527375
+986 3714 -0.0706577625688625
+1092 3714  0.05907190615621608
+1186 3714  7.502813973266923
+1292 3714 -4.052578948064181
+2168 3714  3.930409653834767
+987 3715  0.3582392973900931
+988 3715 -5.202909164506705
+1187 3715  0.1264070600600939
+1188 3715 -0.3212481593266459
+1434 3715  8.262976513892538
+987 3716 -0.3225737686846976
+988 3716  4.684918794633313
+1187 3716 -0.1073922996838256
+1188 3716  0.2724361971567971
+1924 3716  19.53069803414619
+987 3717 -0.01555667362303796
+988 3717  0.2259382495226564
+1187 3717  7.447124334255668
+1188 3717 -20.05891812971564
+1679 3717  1.564794215233169
+987 3718  0.01198742445782478
+988 3718 -0.1741000527436083
+1187 3718 -5.738141943326152
+1188 3718  15.48272469413396
+2169 3718  5.368197347599874
+987 3719  0.3692986362671331
+989 3719 -0.8670555746636077
+1187 3719  0.2372873765635557
+1189 3719 -1.124307960005156
+1435 3719  6.75928451641217
+987 3720 -0.3307276363338796
+989 3720  0.7764968852232117
+1187 3720 -0.2078695435133901
+1189 3720  0.9846771888007256
+1925 3720  14.69789381330568
+987 3721 -0.03119596209267699
+989 3721  0.07324325135033663
+1187 3721  8.201087086647963
+1189 3721 -39.72433181103246
+1680 3721  0.9886929177097209
+987 3722  0.02344637301136434
+989 3722 -0.05504842538990734
+1187 3722 -6.163756956372798
+1189 3722  29.86325998837519
+2170 3722  3.784571749614252
+987 3723  0.4209233628580787
+990 3723 -0.8154789505427221
+1187 3723  0.2982302677568088
+1190 3723 -0.8870247005076219
+1436 3723  6.286782905770047
+987 3724 -0.3794252839956596
+990 3724  0.735082344446817
+1187 3724 -0.263500479617136
+1190 3724  0.78356840863058
+1926 3724  13.23744473122067
+987 3725 -0.03967666278728941
+990 3725  0.07686787236308722
+1187 3725  9.459243822580133
+1190 3725 -28.7033608824471
+1681 3725  0.8832624199164085
+987 3726  0.02969939062660183
+990 3726 -0.05753833129026276
+1187 3726 -7.08054579490533
+1190 3726  21.48971813079931
+2171 3726  3.411100769889254
+987 3727 -0.6561496372500165
+993 3727  0.2302667811178378
+1187 3727 -1.429262548630259
+1193 3727  0.9009035038811737
+1437 3727  4.863234068877579
+987 3728  0.7717184399373285
+993 3728 -0.2708240788464196
+1187 3728  1.673333652021131
+1193 3728 -1.053604700739367
+1927 3728  2.313352821995473
+987 3729  0.09217929587242674
+993 3729 -0.03234906878134064
+1187 3729 -8.428942532838935
+1193 3729  6.200089823344896
+1682 3729  5.491941738533162
+987 3730 -0.1144339936435742
+993 3730  0.04015905194614094
+1187 3730  8.914589671247429
+1193 3730 -6.559072047159436
+2172 3730  3.087002648539354
+987 3731  0.7655516692341778
+1086 3731 -0.379859058350982
+1187 3731  1.193368232042292
+1286 3731 -0.7243212706529887
+1438 3731  3.654645324820387
+987 3732 -0.7325809744992411
+1086 3732  0.3634993303815826
+1187 3732 -1.120321545249203
+1286 3732  0.686208191891071
+1928 3732  4.408486684243702
+987 3733 -0.09262370563918518
+1086 3733  0.04595895354819852
+1187 3733  10.03672807275466
+1286 3733 -3.862010681433929
+1683 3733  5.548711987030634
+987 3734  0.1187456445424696
+1086 3734 -0.05892039757983346
+1187 3734 -8.019890861375147
+1286 3734  3.078841940888978
+2173 3734  4.650926558563436
+991 3735  0.6367155082099926
+992 3735 -3.343157745435155
+1191 3735  0.2167729964694829
+1192 3735 -0.6168315851058197
+1439 3735  2.770038046695757
+991 3736 -0.5615617669201883
+992 3736  2.948553233605713
+1191 3736 -0.1851844777328494
+1192 3736  0.5298067490807584
+1929 3736  5.995398509885752
+991 3737 -0.02450778911447643
+992 3737  0.1286813403240229
+1191 3737  6.880724984338275
+1192 3737 -16.5456764895831
+1684 3737  2.380705119026724
+991 3738  0.02367803215021713
+992 3738 -0.1243245932586171
+1191 3738 -5.444318005853047
+1192 3738  13.09683550707402
+2174 3738  4.858814960201284
+991 3739 -0.7678220254674442
+1089 3739  0.62780002806294
+1191 3739 -1.207141725362662
+1289 3739  0.2021428644557957
+1440 3739  8.553881810766296
+991 3740  0.8596786811959881
+1089 3740 -0.702905468036533
+1191 3740  1.338906589780056
+1289 3740 -0.2240623962201166
+1930 3740  4.987981631971432
+991 3741  0.1159096366071837
+1089 3741 -0.09477205745752662
+1191 3741 -7.610279116600212
+1289 3741  1.349752296503026
+1685 3741  9.011775441080573
+991 3742 -0.134708500073735
+1089 3742  0.1101427118805583
+1191 3742  8.267433239967934
+1289 3742 -1.466292598487834
+2175 3742  5.009999875255914
+994 3743  0.3903679836850418
+1020 3743 -0.4616774221684724
+1194 3743  0.7978308621718716
+1220 3743 -1.258218614571573
+1441 3743  4.071233084981746
+994 3744 -0.3911547364780136
+1020 3744  0.462607892946107
+1194 3744 -0.7965184326656924
+1220 3744  1.257430980505297
+1931 3744  4.080013401664326
+994 3745 -0.04713547548536468
+1020 3745  0.05574582374646275
+1194 3745  5.753350078361281
+1220 3745 -7.448069304163958
+1686 3745  6.32479519279416
+994 3746  0.05251609088198397
+1020 3746 -0.06210932882324032
+1194 3746 -5.243221560545977
+1220 3746  6.785039108229999
+2176 3746  6.114912483766074
+994 3747 -0.3268708215805732
+1041 3747  0.4156932976845179
+1194 3747 -0.9429089218588855
+1241 3747  1.314196419979139
+1442 3747  4.626630643115307
+994 3748  0.3510617949962554
+1041 3748 -0.4464578225348482
+1194 3748  1.007009323828432
+1241 3748 -1.397856747533507
+1932 3748  3.249654147247933
+994 3749  0.04078336525166011
+1041 3749 -0.0518656621296367
+1194 3749 -5.062453033294843
+1241 3749  9.967472353837746
+1687 3749  7.139596631140285
+994 3750 -0.05307425527674776
+1041 3750  0.06749642593198793
+1194 3750  4.615763356430092
+1241 3750 -9.100375712236817
+2177 3750  4.567303761279918
+994 3751 -0.4480493340962357
+1086 3751  0.4552127889008838
+1194 3751 -1.048401908253753
+1286 3751  0.7276690822554982
+1443 3751  4.819926478975458
+994 3752  0.4295324887597057
+1086 3752 -0.4363998945032376
+1194 3752  1.008926901289915
+1286 3752 -0.7000132451960701
+1933 3752  5.990944680358314
+994 3753  0.06606790111415485
+1086 3753 -0.0671242009178893
+1194 3753 -6.460239724417905
+1286 3753  4.142224845053123
+1688 3753  6.252292571035978
+994 3754 -0.06042534814756716
+1086 3754  0.06139143428489695
+1194 3754  6.417487426740468
+1286 3754 -4.115085973968236
+2178 3754  8.096733468146425
+995 3755 -0.452566586299552
+1098 3755  0.4897853072672988
+1195 3755 -0.4226945468326312
+1298 3755  2.091534388030289
+1444 3755  3.210809158694981
+995 3756  0.4518410984309186
+1098 3756 -0.4890001558455759
+1195 3756  0.421458967625683
+1298 3756 -2.085292610827683
+1934 3756  3.229847896446473
+995 3757  0.06444976742378884
+1098 3757 -0.06975006572860991
+1195 3757 -2.511814056672408
+1298 3757  12.86354640857851
+1689 3757  4.240340181312149
+995 3758 -0.06562451486991176
+1098 3758  0.07102142348298011
+1195 3758  2.46769213011002
+1298 3758 -12.63775213409496
+2179 3758  4.213824656027188
+996 3759  1.610329947831114
+997 3759 -1.621718887703775
+1196 3759  0.3339756423139817
+1197 3759 -0.2014575555660574
+1445 3759  3.851674085393307
+996 3760 -1.599279391917784
+997 3760  1.610590177548189
+1196 3760 -0.3333397745722857
+1197 3760  0.2031705763180116
+1935 3760  3.895963455405033
+996 3761 -0.03806768353539278
+997 3761  0.03833691442155994
+1196 3761  7.933710171395212
+1197 3761 -14.35390862680364
+1690 3761  3.947328445532761
+996 3762  0.02025137186317635
+997 3762 -0.0203945981981267
+1196 3762 -12.65627869255672
+1197 3762  22.89375892241828
+2180 3762  5.680389346472349
+996 3763 -0.5780567342368751
+1021 3763  0.8354279312537486
+1196 3763 -0.4851424038589532
+1221 3763  1.24868825794066
+1446 3763  2.651381876594489
+996 3764  0.4789577056707151
+1021 3764 -0.6922065283691701
+1196 3764  0.4058510497943707
+1221 3764 -1.04285678342914
+1936 3764  6.886257504275831
+996 3765  0.0800375428343096
+1021 3765 -0.115673073026255
+1196 3765 -3.452105402557807
+1221 3765  7.570117512134129
+1691 3765  3.144054024261908
+996 3766 -0.06052909694974323
+1021 3766  0.08747878062892703
+1196 3766  3.087553492426145
+1221 3766 -6.772078091746644
+2181 3766  8.680597597954014
+997 3767 -0.5676654342824057
+1040 3767  0.3524182508441939
+1197 3767 -1.07013962521279
+1240 3767  1.100245831300216
+1447 3767  2.58440440493602
+997 3768  0.5657348666913733
+1040 3768 -0.3512197152059827
+1197 3768  1.056592885189063
+1240 3768 -1.083749439601529
+1937 3768  2.707528675905532
+997 3769  0.07297260077582075
+1040 3769 -0.04530287520055806
+1197 3769 -6.741118804159942
+1240 3769  8.345685815764492
+1692 3769  3.243567001900533
+997 3770 -0.08114410763163431
+1040 3770  0.05037591290722684
+1197 3770  6.152334929788743
+1240 3770 -7.619017266862353
+2182 3770  3.127037822968334
+998 3771 -0.365279863502515
+1072 3771  0.4751085479629472
+1198 3771 -0.439551175407246
+1272 3771  1.458551817589764
+1448 3771  6.545719728055657
+998 3772  0.3974234470529114
+1072 3772 -0.5169167417147753
+1198 3772  0.4774393407531063
+1272 3772 -1.584168614837459
+1938 3772  4.197681730399675
+998 3773  0.04813127346458962
+1072 3773 -0.0626029018629746
+1198 3773 -2.839026331445033
+1272 3773  7.706555182452361
+1693 3773  10.48923930011014
+998 3774 -0.04568621642826008
+1072 3774  0.05942268960851749
+1198 3774  3.287177902478644
+1272 3774 -8.926654332769894
+2183 3774  7.963542386205262
+998 3775 -0.4082845072120672
+1077 3775  0.345354821535383
+1198 3775 -0.5013982344400243
+1277 3775  1.914092476329704
+1449 3775  5.387449154306919
+998 3776  0.4422425294985197
+1077 3776 -0.3740788277596528
+1198 3776  0.5451054580886334
+1277 3776 -2.079388121953743
+1939 3776  3.559125252631731
+998 3777  0.0550827407381792
+1077 3777 -0.04659273070930623
+1198 3777 -3.303170792875972
+1277 3777  10.8116764259082
+1694 3777  7.717735001723169
+998 3778 -0.05462691338373406
+1077 3778  0.04620716091210701
+1198 3778  3.830799029309135
+1277 3778 -12.54156489623117
+2184 3778  5.42788620866588
+998 3779 -0.4687418217907903
+1082 3779  0.903604725385467
+1198 3779 -0.651822591782886
+1282 3779  0.2587210270240423
+1450 3779  10.56116102730794
+998 3780  0.5317541664847034
+1082 3780 -1.025075116496526
+1198 3780  0.7429555445394144
+1282 3780 -0.2949996783558586
+1940 3780  5.370203291089556
+998 3781  0.06183666269902745
+1082 3781 -0.1192040010499577
+1198 3781 -4.18167192374513
+1282 3781  1.823381827579377
+1695 3781  13.44241674595665
+998 3782 -0.07543502736476798
+1082 3782  0.1454178910812212
+1198 3782  4.666850163800948
+1282 3782 -2.035095850329601
+2185 3782  5.878917673856323
+999 3783  0.5183688055136251
+1008 3783 -0.5784495929786024
+1199 3783  1.727739253821067
+1208 3783 -0.5174648007567855
+1451 3783  2.192274887117568
+999 3784 -0.4507276981705915
+1008 3784  0.5029686408166937
+1199 3784 -1.501409605633508
+1208 3784  0.4496774480466198
+1941 3784  4.481236041630056
+999 3785 -0.08159194337121471
+1008 3785  0.09104873968379988
+1199 3785  11.30404407094388
+1208 3785 -3.396560628333188
+1696 3785  2.085017424193
+999 3786  0.06983187835467947
+1008 3786 -0.07792564132243848
+1199 3786 -9.700381322229893
+1208 3786  2.914703332052592
+2186 3786  4.67546375941967
+999 3787 -0.2893683449568387
+1015 3787  0.6080023804991873
+1199 3787 -0.8399267021510434
+1215 3787  1.459995747599291
+1452 3787  3.725435059567603
+999 3788  0.287922613699735
+1015 3788 -0.6049647018408247
+1199 3788  0.8359873259695152
+1215 3788 -1.452748484428911
+1942 3788  3.503869273575924
+999 3789  0.03207522870468611
+1015 3789 -0.06739443255416847
+1199 3789 -4.371489003866787
+1215 3789  6.510195367269129
+1697 3789  11.29589550667638
+999 3790 -0.03256016366442265
+1015 3790  0.06841334707970535
+1199 3790  5.178316230070377
+1215 3790 -7.713673913612801
+2187 3790  7.522783604978186
+999 3791 -0.4394391630156717
+1039 3791  0.6086144765908342
+1199 3791 -1.468192548459289
+1239 3791  0.3506092217084191
+1453 3791  5.699343322844305
+999 3792  0.4523778897162504
+1039 3792 -0.6265343550208405
+1199 3792  1.512757808386402
+1239 3792 -0.3612542032138767
+1943 3792  4.932211784009985
+999 3793  0.06534871961755113
+1039 3793 -0.09050667335377813
+1199 3793 -9.106914833269292
+1239 3793  2.196719064593049
+1698 3793  6.796790398187187
+999 3794 -0.06846325693408806
+1039 3794  0.09482024542077011
+1199 3794  9.45399632332005
+1239 3794 -2.280441915279873
+2188 3794  5.430042622363441
+1000 3795  0.679218643726438
+1001 3795 -0.8337078536373765
+1200 3795  0.3629961991311105
+1201 3795 -0.407193771543868
+1454 3795  9.853444154856955
+1000 3796 -0.646543619474914
+1001 3796  0.793600850409622
+1200 3796 -0.3459711048373486
+1201 3796  0.3887267162890052
+1944 3796  12.92509130443996
+1000 3797 -0.02138310187554149
+1001 3797  0.02624671765627676
+1200 3797  6.519516423137556
+1201 3797 -14.76253650232996
+1699 3797  14.67920176970155
+1000 3798  0.01366621132412578
+1001 3798 -0.01677460978968741
+1200 3798 -8.416866787483741
+1201 3798  19.05461479822566
+2189 3798  24.01494552657894
+1000 3799  0.6456696194137627
+1002 3799 -2.531048253677342
+1200 3799 -0.2146298528456314
+1202 3799 -3.056071350822656
+1455 3799  0.6148168725043865
+1000 3800 -0.3733120922080947
+1002 3800  1.463396899358267
+1200 3800  0.1240942996274762
+1202 3800  1.769816809290096
+1945 3800  6.963395067069431
+1000 3801  0.008696076442513374
+1002 3801 -0.03408893408002122
+1200 3801  4.262660366847292
+1202 3801 -21.38658464065627
+1700 3801  16.92007433822707
+1000 3802  0.0188568404948961
+1002 3802 -0.07391949654966468
+1200 3802 -3.267971087874407
+1202 3802  16.31440493686975
+2190 3802  10.64775227529997
+1000 3803  0.6490872503986104
+1003 3803 -1.997280942097913
+1200 3803 -0.2466962960354827
+1203 3803 -2.791333479427456
+1456 3803  0.733300648789027
+1000 3804 -0.3748931907170733
+1003 3804  1.153569146030313
+1200 3804  0.142484329197361
+1203 3804  1.61398408077391
+1946 3804  8.20748669336651
+1000 3805  0.009220007761798222
+1003 3805 -0.02837052457481739
+1200 3805  3.952836893778014
+1203 3805 -21.33286151008621
+1701 3805  20.4618479887596
+1000 3806  0.01698943114939008
+1003 3806 -0.0522775128165329
+1200 3806 -3.485047843454746
+1203 3806  18.72857532796202
+2191 3806  13.36990297318379
+1000 3807 -0.4983302017793433
+1026 3807  0.4549416885390786
+1200 3807 -1.021937167464553
+1226 3807  0.6755139749733998
+1457 3807  5.623959759241418
+1000 3808  0.511463018605014
+1026 3808 -0.4669310599249826
+1200 3808  1.047546391471734
+1226 3808 -0.6914780544113759
+1947 3808  5.140340349921296
+1000 3809  0.05434191170091966
+1026 3809 -0.04961048112152108
+1200 3809 -5.814375262302929
+1226 3809  5.152102189575969
+1702 3809  10.57287611312878
+1000 3810 -0.06524249632027486
+1026 3810  0.05956197584346536
+1200 3810  5.190494080029731
+1226 3810 -4.603043930537255
+2192 3810  8.861752777781344
+1001 3811 -0.4245578636109541
+1004 3811  0.5272035599677525
+1201 3811 -1.590343166199202
+1204 3811  0.3953856088196608
+1458 3811  5.156990654240387
+1001 3812  0.4806889285303752
+1004 3812 -0.5969054776253586
+1201 3812  1.799225606089988
+1204 3812 -0.4470126234812586
+1948 3812  2.762528604699168
+1001 3813  0.04834484106772149
+1004 3813 -0.06003321219915891
+1201 3813 -8.889282523176131
+1204 3813  2.728687156136997
+1703 3813  9.881649534957022
+1001 3814 -0.06044152600849758
+1004 3814  0.07505452239311973
+1201 3814  8.984707214224725
+1204 3814 -2.759212294612531
+2193 3814  5.355974370748386
+1004 3815  0.735678771801283
+1028 3815 -1.021406661640207
+1204 3815  0.6834769259705972
+1228 3815 -0.7053415505671318
+1459 3815  2.02177708473591
+1004 3816 -0.6821644402533772
+1028 3816  0.9471080725937661
+1204 3816 -0.6337624789359847
+1228 3816  0.6542644047347482
+1949 3816  2.637769067800761
+1004 3817 -0.07310899636166125
+1028 3817  0.1015035621142007
+1204 3817  2.682500907926349
+1228 3817 -3.296532554515937
+1704 3817  9.357580306279953
+1004 3818  0.07557928868904694
+1028 3818 -0.1049332832589505
+1204 3818 -3.302192501193137
+1228 3818  4.056786931974255
+2194 3818  6.185139221633936
+1005 3819 -0.2843296651791673
+1011 3819  0.3898068756189718
+1205 3819 -0.8432794677340965
+1211 3819  1.889672965421464
+1460 3819  4.338050735598012
+1005 3820  0.2720491980250402
+1011 3820 -0.3729707479870724
+1205 3820  0.8122467217164322
+1211 3820 -1.818638852608572
+1950 3820  5.347644834779035
+1005 3821  0.04056671199071347
+1011 3821 -0.05561566446213129
+1205 3821 -5.634150174253534
+1211 3821  11.31995792783744
+1705 3821  5.268104839160443
+1005 3822 -0.03643788065469865
+1011 3822  0.04995516877647897
+1205 3822  5.676480078927482
+1211 3822 -11.4061945499796
+2195 3822  6.809172415733807
+1005 3823 -0.2824496877119697
+1027 3823  0.46381160028816
+1205 3823 -0.8258269842122644
+1227 3823  1.978322116312245
+1461 3823  3.336518577462315
+1005 3824  0.3027390358618913
+1027 3824 -0.4971288084268892
+1205 3824  0.8899170771677279
+1227 3824 -2.12995668613235
+1951 3824  2.286717637069104
+1005 3825  0.03972042410566178
+1027 3825 -0.06522504456566502
+1205 3825 -5.4483738044597
+1227 3825  11.36265377461624
+1706 3825  4.345881026636466
+1005 3826 -0.03942547846731512
+1027 3826  0.06474071332200976
+1205 3826  6.238244344298185
+1227 3826 -13.01237515753422
+2196 3826  3.09008802429738
+1005 3827  0.361444564712929
+1090 3827 -0.4083683463701124
+1205 3827  1.219896523532217
+1290 3827 -1.259969479211936
+1462 3827  3.432096244817039
+1005 3828 -0.362096525989742
+1090 3828  0.4091049471507078
+1205 3828 -1.223176188342998
+1290 3828  1.263388549847514
+1952 3828  3.430259707843811
+1005 3829 -0.05221684139379965
+1090 3829  0.05899578318349421
+1205 3829  7.196077445191182
+1290 3829 -7.594160657672793
+1707 3829  4.577246699593937
+1005 3830  0.05186623003949892
+1090 3830 -0.05859965444633063
+1205 3830 -7.309708780356933
+1290 3830  7.714039345814099
+2197 3830  4.499074366596648
+1006 3831 -0.438438003559472
+1096 3831  0.3753050069630106
+1206 3831 -0.3186270474808822
+1296 3831  2.014211143978124
+1463 3831  5.167687511911667
+1006 3832  0.4387320075683089
+1096 3832 -0.375556675786625
+1206 3832  0.3163594839300718
+1296 3832 -1.999469777299275
+1953 3832  5.22866490720696
+1006 3833  0.06917527995921767
+1096 3833 -0.05921436717618114
+1206 3833 -2.097538363083144
+1296 3833  13.54117139429895
+1708 3833  4.778575385576687
+1006 3834 -0.07005237522168833
+1096 3834  0.05996516487372638
+1206 3834  2.063427325485965
+1296 3834 -13.32061162330918
+2198 3834  4.804865036708031
+1007 3835  0.685622022908018
+1084 3835 -0.5533622237789459
+1207 3835  0.4854666781569684
+1284 3835 -0.9961364902256777
+1464 3835  4.397548178584936
+1007 3836 -0.7361807383336668
+1084 3836  0.5941679188478999
+1207 3836 -0.5209336705035591
+1284 3836  1.069122194004589
+1954 3836  3.015100507243954
+1007 3837 -0.08242829338295353
+1084 3837  0.06652747753817977
+1207 3837  3.061789935852789
+1284 3837 -5.607807767179665
+1709 3837  7.935790725976097
+1007 3838  0.09247066617765989
+1084 3838 -0.07463262812556315
+1207 3838 -3.064005787467996
+1284 3838  5.611106742482268
+2199 3838  5.806460716784121
+1008 3839 -0.5364511862384768
+1091 3839  0.5759940121176559
+1208 3839 -0.4753802942885685
+1291 3839  1.508116623610983
+1465 3839  2.807197757919511
+1008 3840  0.4560709551496668
+1091 3840 -0.4896888030185261
+1208 3840  0.404989697114798
+1291 3840 -1.284756180926196
+1955 3840  6.486770562819233
+1008 3841  0.08202078818634363
+1091 3841 -0.08806669474583405
+1208 3841 -3.032021718341749
+1291 3841  9.461386332231092
+1710 3841  2.977783914089383
+1008 3842 -0.06866652243426218
+1091 3842  0.0737280609488582
+1208 3842  2.585875819257028
+1291 3842 -8.06920223422221
+2200 3842  7.199818137765514
+1009 3843  0.2358476052144947
+1061 3843 -0.504600061009487
+1209 3843  0.5953885346820643
+1261 3843 -1.57915325880766
+1466 3843  6.821596506872711
+1009 3844 -0.3015768589062842
+1061 3844  0.6452289446177003
+1209 3844 -0.7585928866866933
+1261 3844  2.012113389129591
+1956 3844  2.089007039389847
+1009 3845 -0.03869333526436093
+1061 3845  0.08278506503087066
+1209 3845  4.119179357221501
+1261 3845 -10.80951279223802
+1711 3845  4.924367513602002
+1009 3846  0.04790427095168613
+1061 3846 -0.1024920224347914
+1209 3846 -5.027617955580298
+1261 3846  13.19349528576872
+2201 3846  1.926406935099201
+1009 3847 -0.4668956305239359
+1073 3847  0.6058903557215245
+1209 3847 -1.089204680743506
+1273 3847  0.4330918580020012
+1467 3847  4.082745801006982
+1009 3848  0.4686479373193943
+1073 3848 -0.608164323859633
+1209 3848  1.088994441422246
+1273 3848 -0.4330444227258353
+1957 3848  4.015345384796815
+1009 3849  0.06831378599514144
+1073 3849 -0.08865078486777231
+1209 3849 -7.735889765633795
+1273 3849  3.000161281315632
+1712 3849  3.894053238117803
+1009 3850 -0.06731177696858127
+1073 3850  0.08735047797722066
+1209 3850  7.784946548619343
+1273 3850 -3.019182774708455
+2202 3850  3.965156627189976
+1010 3851  0.8859027431941622
+1011 3851 -0.7247989127679281
+1210 3851  0.8561130448515417
+1211 3851  0.1281477546658397
+1468 3851  12.16370306936955
+1010 3852 -0.8761333273285709
+1011 3852  0.716806091826627
+1210 3852 -0.8532795399901716
+1211 3852 -0.1109241627550626
+1958 3852  14.21588238657893
+1010 3853 -0.0356622993406167
+1011 3853  0.02917701292546772
+1210 3853  5.980557287714181
+1211 3853 -11.87347468739131
+1713 3853  10.93806979401289
+1010 3854 -0.007089990482464425
+1011 3854  0.005800656373065214
+1210 3854 -18.68721764116278
+1211 3854  36.98601183293343
+2203 3854  31.00680216081683
+1010 3855  0.4334726720192261
+1012 3855 -2.292588351471605
+1210 3855  0.7431772970946299
+1212 3855 -1.366971763051765
+1469 3855  0.6922710528461018
+1010 3856 -0.2894253737565123
+1012 3856  1.530738344827112
+1210 3856 -0.4962120658716677
+1212 3856  0.9127356585543187
+1959 3856  4.63521196383065
+1010 3857 -0.030243626830779
+1012 3857  0.1599551507030707
+1210 3857  2.858778724621237
+1212 3857 -4.993718753234045
+1714 3857  7.512573460654014
+1010 3858  0.03061910767275307
+1012 3858 -0.1619410267687995
+1210 3858 -2.748627634170752
+1212 3858  4.801186593602593
+2204 3858  12.60241045455382
+1010 3859  1.10546489936218
+1013 3859 -3.052106539486126
+1210 3859  0.1504825244382092
+1213 3859 -1.461494307484497
+1470 3859  0.9679618404674936
+1010 3860 -0.689967708515946
+1013 3860  1.904949633779228
+1210 3860 -0.09392255060612208
+1213 3860  0.9122971104867462
+1960 3860  7.851107779123613
+1010 3861 -0.006504078012125597
+1013 3861  0.01795727665852618
+1210 3861  7.743227386312433
+1213 3861 -26.724645220298
+1715 3861  21.23566212034473
+1010 3862  0.01318357808761143
+1013 3862 -0.03639888061415681
+1210 3862 -5.577951998041309
+1213 3862  19.24788508110388
+2205 3862  15.41343460037283
+1010 3863  0.6066026005909084
+1026 3863 -0.3918428911464509
+1210 3863  1.079868425719443
+1226 3863 -0.6825259342509997
+1471 3863  4.640123233526718
+1010 3864 -0.6067370542914696
+1026 3864  0.3919297432745195
+1210 3864 -1.078106438997537
+1226 3864  0.6814150005342681
+1961 3864  4.362866399653369
+1010 3865 -0.09003421014280026
+1026 3865  0.05815877671819319
+1210 3865  8.196309806968394
+1226 3865 -5.174118981866978
+1716 3865  3.891832175542285
+1010 3866  0.08946924016864499
+1026 3866 -0.05779382696712341
+1210 3866 -8.119865478221724
+1226 3866  5.125870371838962
+2206 3866  3.832594921604338
+1011 3867 -0.3567726280354734
+1090 3867  0.2940184468249131
+1211 3867 -1.855130366892074
+1290 3867  0.8608065436983287
+1472 3867  4.083595669790726
+1011 3868  0.372176173311343
+1090 3868 -0.3067126001924134
+1211 3868  1.922720458247535
+1290 3868 -0.8912069480512377
+1962 3868  3.314597254560581
+1011 3869  0.0501196804098229
+1090 3869 -0.04130392701536499
+1211 3869 -10.91898889290387
+1290 3869  5.7352105175062
+1717 3869  5.273747361955984
+1011 3870 -0.05599008922381862
+1090 3870  0.04614176586870518
+1211 3870  10.6937103252593
+1290 3870 -5.617762924949319
+2207 3870  4.159018631162431
+1014 3871  2.989747253335061
+1015 3871 -2.129272677610363
+1214 3871  0.5034153766096773
+1215 3871 -0.01820502103528791
+1473 3871  13.64297835723395
+1014 3872 -3.114543931331563
+1015 3872  2.218151814941477
+1214 3872 -0.5285023005439647
+1215 3872  0.02666245738760768
+1963 3872  7.094819650515715
+1014 3873 -0.03612826240957417
+1015 3873  0.02573024256563223
+1214 3873  7.330338437253632
+1215 3873 -13.39915434300085
+1718 3873  8.193646066739516
+1014 3874  0.005103966248689504
+1015 3874 -0.003635001543577661
+1214 3874 -38.38649753335458
+1215 3874  70.14746870615768
+2208 3874  14.40382534397125
+1014 3875  0.7143220365505623
+1031 3875 -0.7100791253590051
+1214 3875  0.5409851231816658
+1231 3875 -0.4587975488680787
+1474 3875  7.432083284538689
+1014 3876 -0.8667371882258156
+1031 3876  0.8615889655364799
+1214 3876 -0.6474678388648655
+1231 3876  0.5502970449270767
+1964 3876  2.758844665527414
+1014 3877 -0.09209370496921034
+1031 3877  0.091546689209522
+1214 3877  4.154393438575824
+1231 3877 -3.038828905876916
+1719 3877  8.909361307278845
+1014 3878  0.1222265439314361
+1031 3878 -0.1215005459296716
+1214 3878 -4.691610853794245
+1231 3878  3.431330450031093
+2209 3878  3.015252189670739
+1015 3879 -0.3137213903015109
+1030 3879  0.2815088383826861
+1215 3879 -0.6929215427424715
+1230 3879  1.396683757074456
+1475 3879  9.243250804285458
+1015 3880  0.3512208139362644
+1030 3880 -0.3151578642820492
+1215 3880  0.7627672411821809
+1230 3880 -1.537590286100393
+1965 3880  5.36424014425243
+1015 3881  0.05294086820243799
+1030 3881 -0.04750496067965254
+1215 3881 -5.674417187443032
+1230 3881  11.28827516445929
+1720 3881  5.498628886812048
+1015 3882 -0.05938414807119664
+1030 3882  0.05328665197422941
+1215 3882  6.342712909556337
+1230 3882 -12.61596887929604
+2210 3882  3.00679695511697
+1015 3883 -0.3561529571137585
+1060 3883  0.3144339221536311
+1215 3883 -0.7557720101028491
+1260 3883  1.465217279636702
+1476 3883  6.987236455151689
+1015 3884  0.4019295688857895
+1060 3884 -0.3548483544779605
+1215 3884  0.8446553956316701
+1260 3884 -1.639787184528095
+1966 3884  3.832755747061612
+1015 3885  0.05477280767251791
+1060 3885 -0.04835683208530907
+1215 3885 -4.902109438887917
+1260 3885  7.639002702936234
+1721 3885  8.557143439995434
+1015 3886 -0.05419810230840651
+1060 3886  0.0478494465417923
+1215 3886  5.982335823908858
+1260 3886 -9.327145572218971
+2211 3886  5.149209964509931
+1016 3887  0.4440953835549432
+1029 3887 -0.3331233226375472
+1216 3887  1.27864903424727
+1229 3887 -1.428392337327512
+1477 3887  2.39813417884677
+1016 3888 -0.4157020431777712
+1029 3888  0.3118250064706282
+1216 3888 -1.203527161495361
+1229 3888  1.345690298242458
+1967 3888  3.344564282593129
+1016 3889 -0.06641212160291675
+1029 3889  0.04981683537143853
+1216 3889  7.097758064026325
+1229 3889 -8.947761787238363
+1722 3889  3.052321424376364
+1016 3890  0.0583111945771093
+1029 3890 -0.04374019547106585
+1216 3890 -7.071254972430305
+1229 3890  8.913001519049024
+2212 3890  4.306287197091149
+1016 3891 -0.4182071807007093
+1032 3891  0.5426439670912149
+1216 3891 -0.8926090143541199
+1232 3891  0.8109099469826276
+1478 3891  4.872894046756631
+1016 3892  0.3861971705223876
+1032 3892 -0.5011094365728938
+1216 3892  0.8343920480599086
+1232 3892 -0.7551476418861992
+1968 3892  7.209963343664713
+1016 3893  0.05972800042777431
+1032 3893 -0.0774999583800744
+1216 3893 -6.326257993395976
+1232 3893  4.304302054670996
+1723 3893  6.165958730224928
+1016 3894 -0.04705317091092858
+1032 3894  0.06105375638109871
+1216 3894  6.701573121465414
+1232 3894 -4.563164317669306
+2213 3894  9.801414691082627
+1016 3895 -0.2752074710610874
+1096 3895  0.3999378180984413
+1216 3895 -0.7140138395673383
+1296 3895  2.026152750963511
+1479 3895  3.666098639291213
+1016 3896  0.2916024608025469
+1096 3896 -0.4237633937620111
+1216 3896  0.7578565321914594
+1296 3896 -2.150639159344168
+1969 3896  2.650447108276864
+1016 3897  0.03854659492344367
+1096 3897 -0.05601679710717079
+1216 3897 -4.667666885335501
+1296 3897  13.57026540414182
+1724 3897  4.264776865800714
+1016 3898 -0.04164998212099323
+1096 3898  0.06052671066335877
+1216 3898  4.931373980172026
+1296 3898 -14.33700504327225
+2214 3898  2.948617038264048
+1019 3899  0.8564968076053606
+1020 3899 -1.237336533231109
+1219 3899  0.3970145449540784
+1220 3899  0.2510335818284439
+1480 3899  6.936513618148165
+1019 3900 -0.8238314830349427
+1020 3900  1.190146632344225
+1219 3900 -0.3832969439187849
+1220 3900 -0.2383819755548176
+1970 3900  8.754460075831085
+1019 3901 -0.02279261177659806
+1020 3901  0.03292730456029068
+1219 3901  7.895064096101138
+1220 3901 -15.34203421950439
+1725 3901  7.7760742573487
+1019 3902 -0.01123831756390642
+1020 3902  0.0162354147387335
+1219 3902 -12.39953745174649
+1220 3902  24.06894650972545
+2215 3902  14.64089060417686
+1019 3903 -0.3737564443230481
+1076 3903  0.4347352342689453
+1219 3903 -0.9053256046972465
+1276 3903  0.7622334325753616
+1481 3903  6.717285078805506
+1019 3904  0.3951785216481155
+1076 3904 -0.4596523479291749
+1219 3904  0.9552717849491632
+1276 3904 -0.7961901433494216
+1971 3904  5.21837181322108
+1019 3905  0.03431690856375876
+1076 3905 -0.03991575131466345
+1219 3905 -4.875642107836658
+1276 3905  8.663532512230066
+1726 3905  12.723623314396
+1019 3906 -0.0533470071697257
+1076 3906  0.06205063220109317
+1219 3906  3.543387736643543
+1276 3906 -6.327609477811816
+2216 3906  8.974758681364245
+1019 3907  0.5871755452865656
+1085 3907 -0.6299407955834422
+1219 3907  0.9177680669862572
+1285 3907 -1.404708895439669
+1482 3907  1.562111955011237
+1019 3908 -0.5881819817933323
+1085 3908  0.6310205330126762
+1219 3908 -0.9166426538946926
+1285 3908  1.404553468671611
+1972 3908  1.521343477166647
+1019 3909 -0.06669039335604635
+1085 3909  0.07154759728281507
+1219 3909  6.850788060807428
+1285 3909 -8.259331367595493
+1727 3909  2.666152965933335
+1019 3910  0.0754972172149095
+1085 3910 -0.0809958409515938
+1219 3910 -6.091736844072287
+1285 3910  7.340228308969663
+2217 3910  2.541648003046831
+1021 3911  3.79240453316803
+1022 3911 -2.073696496349991
+1221 3911  1.3927177800326
+1222 3911 -0.007859345312601223
+1483 3911  10.83128966436301
+1021 3912 -3.041422922448816
+1022 3912  1.663057831262543
+1221 3912 -1.125363487049104
+1222 3912  0.01423058830428089
+1973 3912  11.66633099712536
+1021 3913 -0.04566720801028241
+1022 3913  0.02497094611631533
+1221 3913  12.16385913519791
+1222 3913 -10.85847562888647
+1728 3913  5.690920734130215
+1021 3914  0.003740381880205069
+1022 3914 -0.002045250376681948
+1221 3914 -71.95507332617049
+1222 3914  64.19748679899583
+2218 3914  28.46767710922084
+1021 3915  0.542397859204706
+1023 3915 -4.70085606120625
+1221 3915  0.06266636804055448
+1223 3915 -1.67562254077075
+1484 3915  2.298164777974027
+1021 3916 -0.4575882069145712
+1023 3916  3.965827407882584
+1221 3916 -0.04937898970914998
+1223 3916  1.400506034542053
+1974 3916  6.913438662925516
+1021 3917 -0.008763702119294985
+1023 3917  0.07595329060940451
+1221 3917  7.419701498530326
+1223 3917 -31.5516818208411
+1729 3917  4.213041098699232
+1021 3918  0.02107167706267504
+1023 3918 -0.1826240999274897
+1221 3918 -2.767625773617935
+1223 3918  11.74797745436025
+2219 3918  6.6028144804622
+1021 3919  0.9285201706573676
+1024 3919 -12.32981280576957
+1221 3919 -0.01329158934947286
+1224 3919 -1.619002941030767
+1485 3919  2.489086508511099
+1021 3920 -0.7197609326620245
+1024 3920  9.557700354905545
+1221 3920  0.01432224943308115
+1224 3920  1.240374621996592
+1975 3920  7.092319999538366
+1021 3921  0.002697403111634132
+1024 3921 -0.03581879691919143
+1221 3921  18.43213599777828
+1224 3921 -71.14768126953655
+1730 3921  5.357329400717711
+1021 3922  0.01651901814280531
+1024 3922 -0.2193559255602421
+1221 3922 -3.444370960504515
+1224 3922  13.28414805225883
+2220 3922  4.975973020678023
+1021 3923  0.9936427272244395
+1025 3923 -4.01982058245716
+1221 3923  0.1952707801632194
+1225 3923 -0.4989904160230794
+1486 3923  1.530982244361096
+1021 3924 -0.8325011910731969
+1025 3924  3.367916184667294
+1221 3924 -0.1578981940773998
+1225 3924  0.4114850212014874
+1976 3924  4.504333486281526
+1021 3925 -0.02313004556960633
+1025 3925  0.09357350555324298
+1221 3925  11.51290363653453
+1225 3925 -13.59929356823245
+1731 3925  1.891529659129155
+1021 3926  0.03005520669887832
+1025 3926 -0.1215895162194106
+1221 3926 -6.724196771960667
+1225 3926  7.942511812214237
+2221 3926  4.220607505708247
+1021 3927 -0.8967784154793547
+1031 3927  0.7422711571120613
+1221 3927 -1.212084092212188
+1231 3927  0.4518610492803866
+1487 3927  2.965391170512979
+1021 3928  0.8948534463356967
+1031 3928 -0.7406778437037493
+1221 3928  1.209186270330734
+1231 3928 -0.4507761853932629
+1977 3928  3.064329241855908
+1021 3929  0.1084484784869682
+1031 3929 -0.08976373229337341
+1221 3929 -7.485018312352968
+1231 3929  2.659744674264035
+1732 3929  5.191284902739278
+1021 3930 -0.1046366912454179
+1031 3930  0.08660868342331542
+1221 3930  7.574571864439869
+1231 3930 -2.691626173647931
+2222 3930  5.614276394514303
+1021 3931  0.7922255118762921
+1054 3931 -0.360035618481635
+1221 3931  1.095375364582689
+1254 3931 -1.129575111871219
+1488 3931  1.739287210125133
+1021 3932 -0.7101965596901837
+1054 3932  0.3227566567579966
+1221 3932 -0.9823491292559811
+1254 3932  1.013019198664114
+1978 3932  3.065752767611288
+1021 3933 -0.1118336807589077
+1054 3933  0.0508240492328381
+1221 3933  7.911773723924099
+1254 3933 -8.140433666520655
+1733 3933  1.735401045504999
+1021 3934  0.1007269057461446
+1054 3934 -0.04577645287156246
+1221 3934 -7.11281355093847
+1254 3934  7.318382790662603
+2223 3934  3.006008494629031
+1022 3935 -0.4907051457898425
+1091 3935  0.3034672176079867
+1222 3935 -1.271418220001935
+1291 3935  0.6649208109974206
+1489 3935  5.818503346690632
+1022 3936  0.4784631094026429
+1091 3936 -0.2958963642102718
+1222 3936  1.227066802011116
+1291 3936 -0.6401504275752985
+1979 3936  6.821785171341784
+1022 3937  0.06351030020065121
+1091 3937 -0.03927673116269679
+1222 3937 -8.059296167830137
+1291 3937  5.042399262607923
+1734 3937  7.227063466987659
+1022 3938 -0.06852574797351926
+1091 3938  0.04237843896777917
+1222 3938  7.176638732185156
+1291 3938 -4.49133368198552
+2224 3938  8.023915727134943
+1026 3939  0.6215604636346272
+1027 3939 -0.9429319477336707
+1226 3939  0.4270339140806115
+1227 3939  0.5603572465690376
+1490 3939  4.145530174201174
+1026 3940 -0.5365848376342145
+1027 3940  0.8140205429349932
+1226 3940 -0.3710313110194243
+1227 3940 -0.4748156898844662
+1980 3940  9.188046774378051
+1026 3941 -0.02644511378818943
+1027 3941  0.04011828954905043
+1226 3941  5.935483300800602
+1227 3941 -19.35438219191495
+1735 3941  4.620373825612975
+1026 3942 -0.01313085696332133
+1027 3942  0.01992003233190689
+1226 3942 -7.803072635065493
+1227 3942  25.37588114465589
+2225 3942  13.68747505294833
+1026 3943 -0.3325996421497863
+1031 3943  0.9054351523586399
+1226 3943 -0.7846384665529746
+1231 3943  0.3255367652745427
+1491 3943  12.25543807287879
+1026 3944  0.3099939897897582
+1031 3944 -0.8438958429460618
+1226 3944  0.755030805637508
+1231 3944 -0.3228669408970674
+1981 3944  15.60000600766554
+1026 3945  0.03183450902391857
+1031 3945 -0.08666300222702368
+1226 3945 -4.492594197832112
+1231 3945  4.347828500049935
+1736 3945  17.58751732208018
+1026 3946 -0.04857313406685036
+1031 3946  0.132230518229328
+1226 3946  3.075207046085619
+1231 3946 -2.991719644070264
+2226 3946  16.66976710007537
+1028 3947 -0.5322000356380353
+1042 3947  0.4525049260712891
+1228 3947 -0.4953844799756825
+1242 3947  1.253920472558426
+1492 3947  4.577395664557206
+1028 3948  0.5237179831441521
+1042 3948 -0.4452930315210079
+1228 3948  0.4897034551735098
+1242 3948 -1.238917691943269
+1982 3948  4.949586119190606
+1028 3949  0.05306117946887205
+1042 3949 -0.04511545186958153
+1228 3949 -2.566147949050465
+1242 3949  11.01433164719148
+1737 3949  9.983701017863959
+1028 3950 -0.0742486869524237
+1042 3950  0.06313020358974136
+1228 3950  2.120683161051391
+1242 3950 -9.126926589105361
+2227 3950  7.311204366461816
+1031 3951  2.25449865380728
+1032 3951 -1.946310278292044
+1231 3951  0.2106155001707166
+1232 3951 -0.1400665666639198
+1493 3951  7.350593198915067
+1031 3952 -2.145904330715092
+1032 3952  1.852560722557531
+1231 3952 -0.2015349441592879
+1232 3952  0.1353078380308161
+1983 3952  9.450557390041146
+1031 3953 -0.03863010084770278
+1032 3953  0.03334939331384015
+1231 3953  7.458763893376871
+1232 3953 -13.70528369826246
+1738 3953  7.088529923190809
+1031 3954  0.02054020998672473
+1032 3954 -0.01773237777185056
+1231 3954 -10.96688731001504
+1232 3954  20.15001583937854
+2228 3954  13.07420285267797
+1031 3955  1.009041934732004
+1033 3955 -12.4148271960868
+1231 3955  0.004909842124062877
+1233 3955 -2.020187774905765
+1494 3955  4.4348660978868
+1031 3956 -0.9011892093493707
+1033 3956  11.0878526649361
+1231 3956 -0.003161834423974073
+1233 3956  1.781009244152953
+1984 3956  13.94274720304045
+1031 3957 -0.002184359699851513
+1033 3957  0.02687544221336557
+1231 3957  8.097430154499932
+1233 3957 -157.5425247202729
+1739 3957  8.333234328742472
+1031 3958  0.01072384218894925
+1033 3958 -0.1319416399569855
+1231 3958 -1.811688403660364
+1233 3958  35.24928960928063
+2229 3958  8.467858774888629
+1031 3959  0.7970691740614322
+1034 3959 -7.17362028563042
+1231 3959  0.02074944381764173
+1234 3959 -1.371053578210635
+1495 3959  3.248940655118019
+1031 3960 -0.6736858992600538
+1034 3960  6.063171165496134
+1231 3960 -0.0164243186421904
+1234 3960  1.141090378496645
+1985 3960  9.638290245359952
+1031 3961 -0.006563168427045199
+1034 3961  0.05906849706200258
+1231 3961  4.547623166879417
+1234 3961 -73.86690653394791
+1740 3961  4.516437728111675
+1031 3962  0.01247372164743909
+1034 3962 -0.1122634591316911
+1231 3962 -2.052721196214168
+1234 3962  33.34660355498111
+2230 3962  7.780959123178358
+1031 3963  0.8761671906630912
+1054 3963 -0.4810676928389651
+1231 3963  0.532889016796235
+1254 3963 -1.47406685145989
+1496 3963  2.093336656375155
+1031 3964 -0.7806222496203883
+1054 3964  0.4286078600129237
+1231 3964 -0.4748422610025065
+1254 3964  1.313534336735618
+1986 3964  3.86258710892769
+1031 3965 -0.102634553712519
+1054 3965  0.05635245017868282
+1231 3965  3.043863568647225
+1254 3965 -8.8135613453359
+1741 3965  4.100933333302652
+1031 3966  0.08977398271040733
+1054 3966 -0.04929123482331746
+1231 3966 -2.787316614264462
+1254 3966  8.070563365347549
+2231 3966  7.508760202322218
+1032 3967 -0.5245384263354095
+1038 3967  0.6104971732174486
+1232 3967 -0.7796711087896829
+1238 3967  0.4765768244277365
+1497 3967  6.867530241475136
+1032 3968  0.6080294260127852
+1038 3968 -0.7076702624193901
+1232 3968  0.8977654818327304
+1238 3968 -0.5475735569802812
+1987 3968  3.232293749053333
+1032 3969  0.0667812926161708
+1038 3969 -0.07772507850532541
+1232 3969 -4.689757573113847
+1238 3969  3.549100696841748
+1742 3969  9.249242572554918
+1032 3970 -0.08705433897040452
+1038 3970  0.1013203708049441
+1232 3970  4.893403592122139
+1238 3970 -3.704885884667695
+2232 3970  4.168085658793879
+1032 3971 -0.4795952455071584
+1084 3971  0.3788236665372302
+1232 3971 -0.6622307311134192
+1284 3971  0.7155565720950264
+1498 3971  10.14178945374154
+1032 3972  0.5234134383708403
+1084 3972 -0.4134348697073162
+1232 3972  0.7022784082231178
+1284 3972 -0.7605034956051496
+1988 3972  6.854993073279579
+1032 3973  0.07931274417722842
+1084 3973 -0.06264771144796216
+1232 3973 -4.573004681716164
+1284 3973  4.486795549329334
+1743 3973  8.900323357107847
+1032 3974 -0.08168946937534756
+1084 3974  0.0645250439743737
+1232 3974  5.053130885639514
+1284 3974 -4.956539581732367
+2233 3974  6.255402866825885
+1036 3975 -0.4189118125301785
+1037 3975  0.4166092968480921
+1236 3975 -1.707969879333584
+1237 3975  0.4947465498372808
+1499 3975  4.591717254542565
+1036 3976  0.4218235797759384
+1037 3976 -0.4195050597952255
+1236 3976  1.704175130397323
+1237 3976 -0.4892572997374934
+1989 3976  4.596890674335506
+1036 3977  0.04658515728812977
+1037 3977 -0.04632910565148451
+1236 3977 -9.799664782253885
+1237 3977  4.604525758409499
+1744 3977  6.82470037511901
+1036 3978 -0.06140384326059996
+1037 3978  0.06106634188723461
+1236 3978  7.820271137733112
+1237 3978 -3.682710629785925
+2234 3978  6.071785803494002
+1036 3979 -0.4549872682817689
+1093 3979  0.6697749461861511
+1236 3979 -1.475543576621253
+1293 3979  0.7694145957527926
+1500 3979  1.989738181911157
+1036 3980  0.3870443013878095
+1093 3980 -0.5697578684182797
+1236 3980  1.26182715384045
+1293 3980 -0.6573535907506843
+1990 3980  4.514311543471527
+1036 3981  0.0673261986980111
+1093 3981 -0.09910914931789394
+1236 3981 -9.15833764800569
+1293 3981  4.132697532463695
+1745 3981  2.651304124674491
+1036 3982 -0.05417247763812049
+1093 3982  0.07974589801570604
+1236 3982  8.549665708518704
+1293 3982 -3.858923807592198
+2235 3982  5.339475922255236
+1039 3983 -0.4164294722809435
+1046 3983  0.6723063436140674
+1239 3983 -0.2561778390712127
+1246 3983  1.438432936083485
+1501 3983  7.935781265781992
+1039 3984  0.4690507958247839
+1046 3984 -0.7572610645998691
+1239 3984  0.2870186143926446
+1246 3984 -1.610282307909457
+1991 3984  4.273476333560106
+1039 3985  0.05876447905234403
+1046 3985 -0.09487256468584683
+1239 3985 -1.513498503028144
+1246 3985  9.519388148008218
+1746 3985  9.964308531455309
+1039 3986 -0.07032064342926077
+1046 3986  0.1135294637182152
+1239 3986  1.606821457458256
+1246 3986 -10.10772533983988
+2236 3986  5.311692012444222
+1039 3987 -0.6338113374831589
+1091 3987  0.5483159810343683
+1239 3987 -0.3620744060739602
+1291 3987  1.440632854355911
+1502 3987  4.32511889642304
+1039 3988  0.6019478457506698
+1091 3988 -0.5207505831071904
+1239 3988  0.3444837790072527
+1291 3988 -1.37057590441584
+1992 3988  5.650865178732744
+1039 3989  0.09481212065301986
+1091 3989 -0.08202283215104343
+1239 3989 -2.259168544657926
+1291 3989  8.78157444783267
+1747 3989  5.008038927801397
+1039 3990 -0.08853178006291147
+1091 3990  0.07658965210480174
+1239 3990  2.163038539238373
+1291 3990 -8.407949717547142
+2237 3990  6.756753262704435
+1042 3991  0.4124828656886404
+1060 3991 -0.3603730844290743
+1242 3991  1.629228927232157
+1260 3991 -1.327284851091953
+1503 3991  2.16132391751468
+1042 3992 -0.4139428557132467
+1060 3992  0.3616486310085054
+1242 3992 -1.629221333651718
+1260 3992  1.326901691271821
+1993 3992  2.128671701455008
+1042 3993 -0.05662335010049897
+1060 3993  0.0494700096023555
+1242 3993  9.698372940762637
+1260 3993 -8.871639857149663
+1748 3993  2.810591760096432
+1042 3994  0.05291302942259243
+1060 3994 -0.04622842111919301
+1242 3994 -10.14050217838622
+1260 3994  9.274886402498227
+2238 3994  2.951842078980999
+1043 3995 -0.6664131668293063
+1074 3995  0.5285974251080995
+1243 3995 -1.085914715783544
+1274 3995  0.5173331784807369
+1504 3995  3.374113559666903
+1043 3996  0.6677275648607324
+1074 3996 -0.5296400026704317
+1243 3996  1.090787344712805
+1274 3996 -0.5195702043955498
+1994 3996  3.373236017368225
+1043 3997  0.09282550509358571
+1074 3997 -0.07362898186763496
+1243 3997 -7.064788357696017
+1274 3997  3.190434157991982
+1749 3997  4.254422143024387
+1043 3998 -0.09027313903964421
+1074 3998  0.07160445085414079
+1243 3998  7.264719577018943
+1274 3998 -3.280810777838017
+2239 3998  4.313636411491303
+1043 3999  0.3915613265931385
+1083 3999 -0.4363940642551595
+1243 3999  0.6281268734336682
+1283 3999 -2.060237066109748
+1505 3999  2.744133762682808
+1043 4000 -0.3914197776820793
+1083 4000  0.436236308367658
+1243 4000 -0.6265552018341662
+1283 4000  2.055356159329762
+1995 4000  2.768916866004834
+1043 4001 -0.05174692126183886
+1083 4001  0.05767180706691709
+1243 4001  4.240704676139979
+1283 4001 -13.22275915134779
+1750 4001  3.559130344004886
+1043 4002  0.05322704516850629
+1083 4002 -0.05932140125143925
+1243 4002 -4.137810875831871
+1283 4002  12.90162353980625
+2240 4002  3.53773728948649
+1044 4003  0.820611639782226
+1045 4003 -2.048612698416842
+1244 4003 -0.2687441652242472
+1245 4003 -2.64805073698945
+1506 4003  0.6620369411300263
+1044 4004 -0.5250188060331279
+1045 4004  1.310681131981685
+1244 4004  0.1719397258262731
+1245 4004  1.697227123155527
+1996 4004  4.350082008188727
+1044 4005  0.0120946820032662
+1045 4005 -0.03019372128547943
+1244 4005  4.840410468840943
+1245 4005 -17.55536468313575
+1751 4005  17.73869895099792
+1044 4006  0.02838947709364594
+1045 4006 -0.0708727983567128
+1244 4006 -4.198187441675281
+1245 4006  15.14037838846916
+2241 4006  6.391215314600386
+1044 4007 -0.4669117262552038
+1075 4007  0.4930004778176136
+1244 4007 -0.8397429008188915
+1275 4007  0.8498311284186953
+1507 4007  5.561366858668526
+1044 4008  0.4635324724996192
+1075 4008 -0.4894324078324542
+1244 4008  0.8301209943719979
+1275 4008 -0.8390792003026281
+1997 4008  5.835339611366566
+1044 4009  0.05787300059726924
+1075 4009 -0.06110666180098917
+1244 4009 -5.094647168144733
+1275 4009  6.16663657582833
+1752 4009  8.121377855621056
+1044 4010 -0.06374868700680204
+1075 4010  0.06731065292933117
+1244 4010  4.665509938312524
+1275 4010 -5.648969220034825
+2242 4010  7.904228210941027
+1047 4011  2.877901742353491
+1048 4011 -0.6882177177695019
+1247 4011  1.530441977284314
+1248 4011 -0.06447409590659667
+1508 4011  4.367121803999058
+1047 4012 -2.638744501957641
+1048 4012  0.6310259631828135
+1247 4012 -1.407573526367871
+1248 4012  0.06007445012883673
+1998 4012  6.273071084077785
+1047 4013 -0.06336630111296625
+1048 4013  0.01515333567288418
+1247 4013  29.57976624661321
+1248 4013 -6.123461182640077
+1753 4013  4.6918587188925
+1047 4014  0.02360881028802457
+1048 4014 -0.005645780499229315
+1247 4014 -53.85667321915488
+1248 4014  11.14530142529302
+2243 4014  11.09669376957527
+1047 4015  0.8927534376223362
+1049 4015 -5.349524285735199
+1247 4015  0.1166433208217011
+1249 4015 -0.873821607012483
+1509 4015  1.501443481224012
+1047 4016 -0.7104217187916017
+1049 4016  4.256962871978451
+1247 4016 -0.08565703803104169
+1249 4016  0.6890445982902249
+1999 4016  5.891708948648517
+1047 4017 -0.01054317113570646
+1049 4017  0.06317640197425206
+1247 4017  20.03181033622185
+1249 4017 -19.53524156207414
+1754 4017  2.952431724529157
+1047 4018  0.02726814578945946
+1049 4018 -0.1633951794307062
+1247 4018 -6.579453823854884
+1249 4018  6.407256334560128
+2244 4018  5.359216924491829
+1047 4019  1.086728699880724
+1050 4019 -6.261171463865746
+1247 4019 -0.02717925250316533
+1250 4019 -1.817279018958227
+1510 4019  2.046183757751627
+1047 4020 -0.8503392225516677
+1050 4020  4.899216957673652
+1247 4020  0.02664962184511493
+1250 4020  1.406918477809915
+2000 4020  6.039983956553282
+1047 4021  0.003251598109236279
+1050 4021 -0.01873403481084517
+1247 4021  32.27437037187642
+1250 4021 -94.3593702933843
+1755 4021  4.080245895776766
+1047 4022  0.0163341792817986
+1050 4022 -0.09410913433692274
+1247 4022 -6.848306051500932
+1250 4022  20.01255346091463
+2245 4022  4.316855635510668
+1047 4023  0.6671056579552171
+1051 4023 -2.766844775492583
+1247 4023  0.09444032567228004
+1251 4023 -2.226655844234514
+1511 4023  1.734383100866361
+1047 4024 -0.5558849024538212
+1051 4024  2.305552680881037
+1247 4024 -0.07327003201142045
+1251 4024  1.833646104710556
+2001 4024  5.335102309454471
+1047 4025 -0.008293849473066403
+1051 4025  0.03439903980669907
+1247 4025  14.54355524326141
+1251 4025 -61.53830667090656
+1756 4025  2.833991262156498
+1047 4026  0.01844642734702238
+1051 4026 -0.07650722269100975
+1247 4026 -5.638102373386884
+1251 4026  23.8410264235214
+2246 4026  4.726445981799073
+1047 4027  1.05305624994934
+1052 4027 -5.381641954276575
+1247 4027  0.172119184851368
+1252 4027 -0.4312061772864732
+1512 4027  1.590373655740153
+1047 4028 -0.8748469326863306
+1052 4028  4.470903578741806
+1247 4028 -0.1354663985444235
+1252 4028  0.3531894656037325
+2002 4028  4.826786095112703
+1047 4029 -0.01413376895939106
+1052 4029  0.07223059927479664
+1247 4029  21.46631831646637
+1252 4029 -14.79999463216308
+1757 4029  2.230216158081471
+1047 4030  0.02460560601569959
+1052 4030 -0.1257469025523179
+1247 4030 -9.972291551898609
+1252 4030  6.874531929256725
+2247 4030  4.245709838291275
+1047 4031  1.037339225236382
+1053 4031 -4.687819398588907
+1247 4031  0.1888753139595323
+1253 4031 -0.4702289341982735
+1513 4031  1.539399172915233
+1047 4032 -0.8610429361225128
+1053 4032  3.891122287459317
+1247 4032 -0.1491602373155923
+1253 4032  0.3847064364584903
+2003 4032  4.663673792105634
+1047 4033 -0.01520700997590703
+1053 4033  0.0687217012769834
+1247 4033  20.73321827180935
+1253 4033 -15.68593782996181
+1758 4033  2.11378126554774
+1047 4034  0.0250006050735233
+1053 4034 -0.1129797452838212
+1247 4034 -10.04768624738478
+1253 4034  7.60087658660375
+2248 4034  4.154176556399479
+1048 4035  0.3107249264599946
+1083 4035 -0.459904502442264
+1248 4035  0.6694889657961215
+1283 4035 -1.982804233034109
+1514 4035  3.113918499513999
+1048 4036 -0.2786413244623387
+1083 4036  0.412417507493414
+1248 4036 -0.6039061080741983
+1283 4036  1.790720369267351
+2004 4036  5.538149308614118
+1048 4037 -0.04425978383985179
+1083 4037  0.06550898280666229
+1248 4037  3.860578289092625
+1283 4037 -13.25822033623029
+1759 4037  3.896615576766267
+1048 4038  0.03641964444494807
+1083 4038 -0.0539047788936704
+1248 4038 -3.705027669694788
+1283 4038  12.72139776360994
+2249 4038  7.399370929771859
+1048 4039  0.6251677208402014
+1092 4039 -0.4958500939596942
+1248 4039  1.3251531551657
+1292 4039 -0.6969060374661296
+1515 4039  2.227051982652208
+1048 4040 -0.5974098867295464
+1092 4040  0.4738340425976885
+1248 4040 -1.268742456177846
+1292 4040  0.6674861693273666
+2005 4040  2.787430652169496
+1048 4041 -0.08455623381433652
+1092 4041  0.06706554910635512
+1248 4041  7.496982675083278
+1292 4041 -4.412546562177374
+1760 4041  3.27917553517219
+1048 4042  0.07599027364771335
+1092 4042 -0.06027148087172729
+1248 4042 -7.554675688566832
+1292 4042  4.445925686481872
+2250 4042  4.221315980120219
+1054 4043  1.092095516810633
+1055 4043 -3.184217886071492
+1254 4043  0.5506640854169045
+1255 4043 -0.04737658212996103
+1516 4043  5.497686376367542
+1054 4044 -1.011277354339053
+1055 4044  2.948576740677008
+1254 4044 -0.5106474974579881
+1255 4044  0.04431965773281515
+2006 4044  8.295885019917028
+1054 4045 -0.01473140339016458
+1055 4045  0.04295228525329622
+1254 4045  14.65998231301212
+1255 4045 -8.348557923958264
+1761 4045  6.138820693139372
+1054 4046  0.004662490664314871
+1055 4046 -0.01359440263092582
+1254 4046 -30.55860129286959
+1255 4046  17.39938467274956
+2251 4046  16.56692854796862
+1058 4047  0.362575344758596
+1059 4047 -2.564559113532273
+1258 4047  0.1232979933573407
+1259 4047 -0.6452838448164313
+1517 4047  5.920323678467011
+1058 4048 -0.3636201727245672
+1059 4048  2.571949365299086
+1258 4048 -0.1209319110556626
+1259 4048  0.632740697185234
+2007 4048  7.928418980272388
+1058 4049 -0.01098342848397128
+1059 4049  0.07768771932121485
+1258 4049  12.18565261931853
+1259 4049 -65.20912110432079
+1762 4049  0.9579993383387213
+1058 4050  0.00945573560770001
+1059 4050 -0.06688207921038947
+1258 4050 -10.49071174200598
+1259 4050  56.15303779109885
+2252 4050  1.876776993784172
+1058 4051 -0.5655683563282903
+1079 4051  0.5099376587097865
+1258 4051 -1.563633429097309
+1279 4051  0.9598433696475256
+1518 4051  1.872155078461913
+1058 4052  0.5370271536576061
+1079 4052 -0.4842038390860414
+1258 4052  1.480104939178236
+1279 4052 -0.9086990247085501
+2008 4052  2.428640135644111
+1058 4053  0.08118838221113756
+1079 4053 -0.0732024928126479
+1258 4053 -10.48222291782284
+1279 4053  6.050637155898348
+1763 4053  2.024927669840395
+1058 4054 -0.07286991856067998
+1079 4054  0.06570225375133355
+1258 4054  9.97517741318884
+1279 4054 -5.758158315413606
+2253 4054  3.081655102447302
+1061 4055  0.5513927020288057
+1062 4055 -0.8631938020705067
+1261 4055  0.8070622115620506
+1262 4055 -0.2638765488723211
+1519 4055  5.155957958850873
+1061 4056 -0.4572667382619597
+1062 4056  0.7158415642942259
+1261 4056 -0.6614271231410632
+1262 4056  0.2162445878661694
+2009 4056  16.47742044559651
+1061 4057 -0.07133191883687945
+1062 4057  0.1116686347193875
+1261 4057  19.56348943490517
+1262 4057 -6.472519984532425
+1764 4057  0.6800635049014432
+1061 4058  0.04954073695663612
+1062 4058 -0.07755499290003219
+1261 4058 -13.58702526706866
+1262 4058  4.495539572060578
+2254 4058  3.707071367433247
+1061 4059  0.274604206069199
+1063 4059 -2.704903553672343
+1261 4059  0.1378648745322591
+1263 4059 -0.691791395091818
+1520 4059  7.218811248728353
+1061 4060 -0.234511292387475
+1063 4060  2.309980743686521
+1261 4060 -0.1128908565994482
+1263 4060  0.5659529217467251
+2010 4060  22.23399320199848
+1061 4061 -0.01159007117619253
+1063 4061  0.1141644010503557
+1261 4061  9.267190079702225
+1263 4061 -48.49819059668351
+1765 4061  1.226331625859217
+1061 4062  0.008422272802529579
+1063 4062 -0.0829609857753576
+1261 4062 -6.734223707278317
+1263 4062  35.27417920269734
+2255 4062  5.200657162878421
+1061 4063  0.3315414677749981
+1064 4063 -1.123100460933939
+1261 4063  0.2889839995395265
+1264 4063 -0.7136256126923212
+1521 4063  7.075882839979499
+1061 4064 -0.2782406077266243
+1064 4064  0.9425431964377532
+1261 4064 -0.2352278222126487
+1264 4064  0.58059959929439
+2011 4064  22.85902787429374
+1061 4065 -0.02506043554827784
+1064 4065  0.08489250803032906
+1261 4065  11.5414689090377
+1264 4065 -29.38500860027402
+1766 4065  0.9860199056421528
+1061 4066  0.01732501370911077
+1064 4066 -0.05868867931656194
+1261 4066 -7.978904295943519
+1264 4066  20.3242031183579
+2256 4066  5.533488946326441
+1061 4067  0.4722832634265456
+1065 4067 -1.058757894121426
+1261 4067  0.5085375863685291
+1265 4067 -0.3738089874614378
+1522 4067  5.549718155884937
+1061 4068 -0.3888500715901649
+1065 4068  0.8717185528421763
+1261 4068 -0.4120665414866511
+1265 4068  0.3028572253506908
+2012 4068  18.54247362970166
+1061 4069 -0.04435831718922839
+1065 4069  0.0994418437640489
+1261 4069  16.53723785781054
+1265 4069 -12.35162972393133
+1767 4069  0.7641384780188016
+1061 4070  0.03090230780246723
+1065 4070 -0.06927635354904116
+1261 4070 -11.52066789126238
+1265 4070  8.60586674968607
+2257 4070  4.168910055156443
+1061 4071  0.3748533853638924
+1066 4071 -1.72573819152988
+1261 4071  0.280802632960781
+1266 4071 -0.3417951192656607
+1523 4071  8.987131743426813
+1061 4072 -0.3164800245438473
+1066 4072  1.457000754258734
+1261 4072 -0.22803093980903
+1266 4072  0.2773430210866213
+2013 4072  28.97191890791841
+1061 4073 -0.02428214981556918
+1066 4073  0.1117893953885428
+1261 4073  13.01235217685689
+1266 4073 -16.46702251650682
+1768 4073  1.257569328186931
+1061 4074  0.01674951147636541
+1066 4074 -0.07711087260468982
+1261 4074 -8.975664909834439
+1266 4074  11.36739026212329
+2258 4074  7.127509675415745
+1061 4075  0.3334711887748352
+1067 4075 -1.16718388103411
+1261 4075  0.2803074902090753
+1267 4075 -0.7056173157039639
+1524 4075  7.073656321403489
+1061 4076 -0.2799242389758857
+1067 4076  0.9797639815414531
+1261 4076 -0.2279652047020327
+1267 4076  0.5735599734651922
+2014 4076  22.90635023674714
+1061 4077 -0.02430205065482175
+1067 4077  0.08505970757052372
+1261 4077  11.6057964531526
+1267 4077 -30.15492728060605
+1769 4077  0.9855846822133956
+1061 4078  0.01679417334838154
+1067 4078 -0.05878135529351257
+1261 4078 -8.020245176780445
+1267 4078  20.84931717772058
+2259 4078  5.54551145364278
+1061 4079  0.428348930387738
+1068 4079 -1.628837266086651
+1261 4079  0.3804015427898206
+1268 4079 -0.2788437142254693
+1525 4079  7.497907634820762
+1061 4080 -0.35912875888719
+1068 4080  1.365621025992523
+1261 4080 -0.3095050463446962
+1268 4080  0.2267698773866179
+2015 4080  24.33074727531835
+1061 4081 -0.03299211467792259
+1068 4081  0.1254556322243195
+1261 4081  14.91330785855446
+1268 4081 -11.26470641944774
+1770 4081  1.04489898841882
+1061 4082  0.02279291647912941
+1068 4082 -0.08667221774173783
+1261 4082 -10.30293666523228
+1268 4082  7.785831386629021
+2260 4082  5.889447827116065
+1061 4083 -0.4788760357050242
+1077 4083  0.2096383898392737
+1261 4083 -1.261809071898539
+1277 4083  1.218052141141831
+1526 4083  5.491936501011876
+1061 4084  0.5819307001253576
+1077 4084 -0.254752808402104
+1261 4084  1.52499539890934
+1277 4084 -1.47300996574878
+2016 4084  2.208450030879043
+1061 4085  0.07100846120402798
+1077 4085 -0.03108549679221456
+1261 4085 -8.828418052973735
+1277 4085  6.880737915568475
+1771 4085  5.446758579372629
+1061 4086 -0.07203379071050602
+1077 4086  0.03153435706244356
+1261 4086  11.03436685036456
+1277 4086 -8.604012257663832
+2261 4086  3.371992084196615
+1069 4087 -0.4136141792640044
+1088 4087  0.3890882961019005
+1269 4087 -1.4539442060976
+1288 4087  0.7887413865352516
+1527 4087  2.763887709407143
+1069 4088  0.4131220203587079
+1088 4088 -0.3886253205090088
+1269 4088  1.446310343605032
+1288 4088 -0.7843205938603733
+2017 4088  2.781752994797915
+1069 4089  0.05643659927341725
+1088 4089 -0.0530901051026678
+1269 4089 -9.271286129126777
+1288 4089  5.391699794599194
+1772 4089  3.366289701753765
+1069 4090 -0.05836392563537035
+1088 4090  0.05490314771048892
+1269 4090  8.904067016422458
+1288 4090 -5.178361384702474
+2262 4090  3.419875832853283
+1069 4091  0.3288070523355396
+1093 4091 -0.5913713271015886
+1269 4091  1.012670283104241
+1293 4091 -0.6342324694119614
+1528 4091  6.465623332056095
+1069 4092 -0.3857623389865421
+1093 4092  0.6938074616461828
+1269 4092 -1.178077129193787
+1293 4092  0.7388262639417724
+2018 4092  2.912178827084738
+1069 4093 -0.0437463201053321
+1093 4093  0.07867933243141366
+1269 4093  7.586503401180923
+1293 4093 -4.118301711438476
+1773 4093  7.023657380150102
+1069 4094  0.05446492613488767
+1093 4094 -0.0979571314547415
+1269 4094 -8.118669664395844
+1293 4094  4.406401785498837
+2263 4094  3.489728875483112
+1070 4095  0.7713662868604313
+1078 4095 -0.4916143473982249
+1270 4095  0.2350047561293398
+1278 4095 -0.892482446316431
+1529 4095  9.920476591021989
+1070 4096 -0.8634917741294119
+1078 4096  0.5503286211148283
+1270 4096 -0.2627322880432431
+1278 4096  0.9977668333321339
+2019 4096  5.599572994528584
+1070 4097 -0.1070003826683109
+1078 4097  0.06819448061561421
+1270 4097  1.557484621286901
+1278 4097 -6.008458200039911
+1774 4097  11.88363245336253
+1070 4098  0.1197679507899034
+1078 4098 -0.07633162606373356
+1270 4098 -1.768620278216182
+1278 4098  6.822965307570624
+2264 4098  6.35673096494675
+1070 4099  0.807466687308941
+1097 4099 -0.5280375750914569
+1270 4099  0.2512854022636909
+1297 4099 -0.9272236969416601
+1530 4099  8.239370225433877
+1070 4100 -0.8588799767622773
+1097 4100  0.5616589604898961
+1270 4100 -0.2668875724092826
+1297 4100  0.98501487730545
+2020 4100  5.994209355056856
+1070 4101 -0.10710356231765
+1097 4101  0.07003967621048186
+1270 4101  1.526214952953268
+1297 4101 -5.141467784189666
+1775 4101  13.6661282064412
+1070 4102  0.1209694588614057
+1097 4102 -0.07910718884290392
+1270 4102 -1.571445886133012
+1297 4102  5.293330560693399
+2265 4102  9.085015566293682
+1071 4103 -0.4147446507941761
+1072 4103  0.433000632670903
+1271 4103 -0.760898897674599
+1272 4103  1.417933728749779
+1531 4103  3.150909841954586
+1071 4104  0.3940753934243332
+1072 4104 -0.411421568297578
+1271 4104  0.7226406195901061
+1272 4104 -1.346294002937515
+2021 4104  4.161771196418701
+1071 4105  0.05628299145492302
+1072 4105 -0.05876042249592989
+1271 4105 -4.340970455026474
+1272 4105  6.781290570506568
+1776 4105  5.834179421399259
+1071 4106 -0.04656009377371514
+1072 4106  0.048609548122274
+1271 4106  4.279036439188589
+1272 4106 -6.686952398469979
+2266 4106  9.53046694058189
+1071 4107 -0.6085690165184504
+1073 4107  0.5724845587882514
+1271 4107 -1.255594290221721
+1273 4107  0.4234260846924687
+1532 4107  3.217137788747237
+1071 4108  0.6054677581871165
+1073 4108 -0.5695671862975206
+1271 4108  1.25204782308929
+1273 4108 -0.4222691319575352
+2022 4108  3.322110871855388
+1071 4109  0.08603670609830409
+1073 4109 -0.08093525038797107
+1271 4109 -7.410192007597576
+1273 4109  2.647109335618053
+1777 4109  4.335231192834105
+1071 4110 -0.08930907686990948
+1073 4110  0.08401358938736914
+1271 4110  7.278483379939773
+1273 4110 -2.600153453324001
+2267 4110  4.142922682425776
+1072 4111 -0.4656525838783859
+1086 4111  0.4075275464698439
+1272 4111 -0.9419881525931177
+1286 4111  0.7647617774871864
+1533 4111  5.084689951907726
+1072 4112  0.4465883401929728
+1086 4112 -0.3908429951038652
+1272 4112  0.9057504822207362
+1286 4112 -0.7325004544442668
+2023 4112  6.446916296953027
+1072 4113  0.06308701112702084
+1086 4113 -0.05521218124589022
+1272 4113 -6.501760444913997
+1286 4113  2.902282675062765
+1778 4113  9.883711222736482
+1072 4114 -0.0435235817211751
+1086 4114  0.03809075496731831
+1272 4114  8.109951937853118
+1286 4114 -3.63284662768968
+2268 4114  14.33869690694082
+1076 4115  0.9163581866371621
+1077 4115 -0.7265339828613597
+1276 4115  0.732482060546454
+1277 4115  0.3546679947849138
+1534 4115  3.945873747631456
+1076 4116 -0.8984979081539249
+1077 4116  0.7123734728657446
+1276 4116 -0.7185090865811576
+1277 4116 -0.3467933517133751
+2024 4116  4.230773004945792
+1076 4117 -0.02453437618813773
+1077 4117  0.01945206395154361
+1276 4117  11.68038468730556
+1277 4117 -17.00310330783589
+1779 4117  6.606303903934953
+1076 4118 -0.01320838662255178
+1077 4118  0.01047226060725421
+1276 4118 -19.033710166426
+1277 4118  27.66621100756034
+2269 4118  8.96941517920289
+1078 4119 -0.4124268531628535
+1089 4119  0.7972203259666385
+1278 4119 -0.7623806137748301
+1289 4119  0.2404117558855258
+1535 4119  13.82148848700469
+1078 4120  0.4598141657371973
+1089 4120 -0.8888199114143011
+1278 4120  0.8561953528406251
+1289 4120 -0.2700601958355842
+2025 4120  7.857436245259873
+1078 4121  0.05899348825775656
+1089 4121 -0.1140343010598945
+1278 4121 -5.293591386916911
+1289 4121  1.734079556945439
+1780 4121  14.27009956285187
+1078 4122 -0.0683561503822409
+1089 4122  0.1321323092122628
+1278 4122  5.949256441509949
+1289 4122 -1.948837786899108
+2270 4122  7.188740199011378
+1078 4123 -0.4682412747380797
+1097 4123  0.4804475820214714
+1278 4123 -0.8740833773154867
+1297 4123  0.8485062079897245
+1536 4123  4.299742862778459
+1078 4124  0.4773702028337876
+1097 4124 -0.4898144867918466
+1278 4124  0.8912310185466943
+1297 4124 -0.8650341355217372
+2026 4124  3.696321876416155
+1078 4125  0.05326867823029235
+1097 4125 -0.05465730817416642
+1278 4125 -4.159585723108061
+1297 4125  3.631803890207103
+1781 4125  14.02807711051003
+1078 4126 -0.05561345056860315
+1097 4126  0.05706320500793532
+1278 4126  4.827479834422186
+1297 4126 -4.215505277639843
+2271 4126  9.415073365629931
+1079 4127  0.4622771875668804
+1080 4127 -4.657034860973241
+1279 4127 -0.03784001059522953
+1280 4127 -4.420138342044347
+1537 4127  1.738985765064566
+1079 4128 -0.3713550248720283
+1080 4128  3.741074279977133
+1279 4128  0.034701868533437
+1280 4128  3.515861595090497
+2027 4128  4.980850774426043
+1079 4129  0.004661810659483987
+1080 4129 -0.04696362991810545
+1279 4129  8.860625202191928
+1280 4129 -85.6481884037366
+1782 4129  4.051300083922375
+1079 4130  0.01681848484248474
+1080 4130 -0.1694313981454469
+1279 4130 -2.549488194921787
+1280 4130  24.55744025691587
+2272 4130  4.497507478986273
+1079 4131  0.8099385613394385
+1081 4131 -4.051106699452466
+1279 4131  0.1538807196017282
+1281 4131 -0.9333140615563935
+1538 4131  1.298069093586603
+1079 4132 -0.6777265383675919
+1081 4132  3.389815784838334
+1279 4132 -0.1216367091902295
+1281 4132  0.7715902563305668
+2028 4132  3.754470501609538
+1079 4133 -0.01559842054359869
+1081 4133  0.07801933255350572
+1279 4133  12.77341480514826
+1281 4133 -18.08711888236061
+1783 4133  2.304828423526273
+1079 4134  0.03070862867577365
+1081 4134 -0.1535967507877281
+1279 4134 -5.706211648738831
+1281 4134  8.071327482551563
+2273 4134  3.634792913757748
+1079 4135 -0.8293599276847743
+1092 4135  0.3355805184553645
+1279 4135 -1.539612202532529
+1292 4135  0.5515734249662468
+1539 4135  3.27848682728167
+1079 4136  0.8908658230580816
+1092 4136 -0.3604673975635156
+1279 4136  1.649614277715424
+1292 4136 -0.5910536703502837
+2029 4136  2.362308143022933
+1079 4137  0.1208149423257971
+1092 4137 -0.04888485641695448
+1279 4137 -9.369448393175393
+1292 4137  3.090538086242762
+1784 4137  4.440740416984238
+1079 4138 -0.1233504357896458
+1092 4138  0.04991078277622987
+1279 4138  10.36404052218175
+1292 4138 -3.418869542545371
+2274 4138  3.326033848414284
+1085 4139  0.7709461549449258
+1086 4139 -0.8918227216379703
+1285 4139  0.8723149865446768
+1286 4139  0.1581455441266535
+1540 4139  8.334156253956104
+1085 4140 -0.7763504480927439
+1086 4140  0.8980743533410442
+1285 4140 -0.8836887209187385
+1286 4140 -0.1537391891864879
+2030 4140  8.646235660883518
+1085 4141 -0.04052335874533514
+1086 4141  0.04687701190851433
+1285 4141  7.224333431575175
+1286 4141 -5.755496466930055
+1785 4141  9.599198921865771
+1085 4142 -0.01693145434596672
+1086 4142  0.01958613529525647
+1285 4142 -14.23089451374669
+1286 4142  11.28627080486608
+2275 4142  15.58372198199141
+1085 4143  1.152193173408145
+1087 4143 -7.678951010732729
+1285 4143  0.02592331471776193
+1287 4143 -2.546465438723989
+1541 4143  0.7844661652700029
+1085 4144 -0.9933793593885633
+1087 4144  6.620514347654194
+1285 4144 -0.01536794211867761
+1287 4144  2.184062692900765
+2031 4144  2.978974753398158
+1085 4145 -0.003300648816836982
+1087 4145  0.02199763125930786
+1285 4145  29.59209543546304
+1287 4145 -61.62793826679011
+1786 4145  4.182327502644779
+1085 4146  0.02739797688598089
+1087 4146 -0.1825976122374652
+1285 4146 -5.11537337312014
+1287 4146  10.62505824201603
+2276 4146  2.213150142494187
+1093 4147  0.9009354882907198
+1094 4147 -4.332893970446715
+1293 4147  0.2993576079332945
+1294 4147 -0.07547758445687719
+1542 4147  5.664236473525226
+1093 4148 -0.8656710639493923
+1094 4148  4.163295798784388
+1293 4148 -0.2812552173432513
+1294 4148  0.07089516340263885
+2032 4148  9.893426968176996
+1093 4149 -0.07642521674859
+1094 4149  0.3675539094017725
+1293 4149  11.01771333410158
+1294 4149 -2.84098517566667
+1787 4149  0.8386055878177558
+1093 4150  0.05907688964639677
+1094 4150 -0.2841201198847871
+1293 4150 -8.516713910804523
+1294 4150  2.196645621020095
+2277 4150  2.683371796432915
+1093 4151  0.58462927218427
+1095 4151 -7.173917743684755
+1293 4151  0.06090597788496124
+1295 4151 -0.2101993947180658
+1543 4151  9.38137758123437
+1093 4152 -0.5248141364580845
+1095 4152  6.439933176124918
+1293 4152 -0.05346314055039419
+1295 4152  0.1844652672016618
+2033 4152  21.22166760392006
+1093 4153 -0.01466585550328597
+1095 4153  0.1799630056638323
+1293 4153  6.743407936044972
+1295 4153 -23.80011488422777
+1788 4153  1.713315824453342
+1093 4154  0.0118751178970765
+1095 4154 -0.1457181893610931
+1293 4154 -5.460172754442011
+1295 4154  19.27597045240107
+2278 4154  4.758341632139925
+1299 4155 -0.2412135486919414
+1544 4155 -0.09268507579991085
+2279 4155 -30.64257011981108
+1789 4156  0.4638345268861317
+2034 4156  0.1776859613752735
+2280 4156 -15.90706277519701
+1300 4157  0.1661135696488067
+1545 4157  0.2348156967063924
+2281 4157 -39.31435081710389
+1790 4158 -0.1171398876204244
+2035 4158 -0.1686225023517153
+2282 4158 -53.45947004812159
+1301 4159  0.4624605338731521
+1546 4159  0.2528208987834961
+2283 4159 -12.56288032466136
+1791 4160 -0.4304312753963275
+2036 4160 -0.2294685995787264
+2284 4160 -13.62587967120271
+1302 4161 -0.4947242318500673
+1547 4161 -0.2206266516513799
+2285 4161 -14.84344573266865
+1792 4162  0.4225606468582997
+2037 4162  0.1862365373886236
+2286 4162 -17.5348042025758
+1303 4163 -0.1766165844146627
+1548 4163 -0.06752189118262934
+2287 4163 -65.95158912186295
+1793 4164  0.2943113306588002
+2038 4164  0.1162526489952567
+2288 4164 -38.93574330129159
+1304 4165 -0.3050775516043246
+1549 4165 -0.0730286811201015
+2289 4165 -46.43966016640264
+1794 4166  0.3743951155653602
+2039 4166  0.09090233254426537
+2290 4166 -37.97891276037357
+1305 4167 -0.3914193643951301
+1550 4167 -0.1030731399156117
+2291 4167 -32.44683207965974
+1795 4168  0.3184437332485786
+2040 4168  0.08441252585843845
+2292 4168 -39.5091491257876
+1306 4169 -0.2389835454891805
+1551 4169 -0.09183437174715321
+2293 4169 -31.1299517046727
+1796 4170  0.3991520721206581
+2041 4170  0.1569416237697656
+2294 4170 -18.48285697955544
+1307 4171 -0.480083502439464
+1552 4171  0.1604723692757136
+2295 4171 -13.83350837304568
+1797 4172  0.3448062169581231
+2042 4172 -0.1470971542430732
+2296 4172 -20.00745980162628
+1308 4173 -0.4223099512030265
+1553 4173  0.1140677401031672
+2297 4173 -22.76489333995555
+1798 4174  0.4163534406274162
+2043 4174 -0.1084103977076431
+2298 4174 -23.20868313310848
+1309 4175 -0.3252342258811831
+1554 4175 -0.1253361300551693
+2299 4175 -16.39949055853167
+1799 4176  0.6239097919698932
+2044 4176  0.2458729163013723
+2300 4176 -8.433072185518855
+1310 4177 -0.3300938184824252
+1555 4177 -0.1765502579767677
+2301 4177 -24.12392710831048
+1800 4178  0.3317934535905247
+2045 4178  0.1764937921731119
+2302 4178 -23.44721624588622
+1311 4179 -0.4908310005655379
+1556 4179 -0.154677890064579
+2303 4179 -18.65225809968672
+1801 4180  0.5267241324446033
+2046 4180  0.1767989964268323
+2304 4180 -16.41237497028351
+1312 4181  0.4025596461382885
+1557 4181  0.1487428464011694
+2305 4181 -26.1956792336746
+1802 4182 -0.2880976473424125
+2047 4182 -0.1043560294495194
+2306 4182 -36.11162541606858
+1313 4183 -0.3680346263601992
+1558 4183 -0.1418061655056151
+2307 4183 -12.26843876024679
+1803 4184  0.7765432181564137
+2048 4184  0.3080625949669278
+2308 4184 -5.60785504010041
+1314 4185 -0.4818355431139471
+1559 4185 -0.2388406309706918
+2309 4185 -12.72775567793986
+1804 4186  0.4752231410863287
+2049 4186  0.2360290859558374
+2310 4186 -13.02604478229861
+1315 4187 -0.2936091186007417
+1560 4187 -0.03178578024713478
+2311 4187 -129.8752387098407
+1805 4188  0.4485203675871874
+2050 4188  0.04825762007543867
+2312 4188 -86.07489921163659
+1316 4189  0.1478598441021074
+1561 4189  0.256909813210024
+2313 4189 -41.9565272197917
+1806 4190 -0.1377473445162312
+2051 4190 -0.2295458523844411
+2314 4190 -45.97094678900272
+1317 4191 -0.4722142733900354
+1562 4191 -0.1046018218296087
+2315 4191 -33.18669707365662
+1807 4192  0.5064852131538
+2052 4192  0.1115367935708566
+2316 4192 -31.12915756520504
+1318 4193 -0.1245359476338054
+1563 4193 -0.04988201584301125
+2317 4193 -142.3054925634301
+1808 4194  0.2571869189216621
+2053 4194  0.1058671586234522
+2318 4194 -68.22390452124144
+1319 4195 -0.1886155730243332
+1564 4195 -0.07276217302329462
+2319 4195 -53.6144848893498
+1809 4196  0.3916020995201141
+2054 4196  0.1550963396610254
+2320 4196 -25.16934902030714
+1320 4197 -0.496807801273085
+1565 4197  0.1763884106301378
+2321 4197 -16.85797529931353
+1810 4198  0.4045567407557998
+2055 4198 -0.1358187924429873
+2322 4198 -22.25527628315144
+1321 4199  0.382691560057723
+1566 4199 -0.1329182782584699
+2323 4199 -31.89341246694178
+1811 4200 -0.3322785058448103
+2056 4200  0.1158998426965637
+2324 4200 -34.63780504233422
+1322 4201 -0.3957723556168835
+1567 4201  0.1694463566618233
+2325 4201 -19.56367976914622
+1812 4202  0.4366009542545615
+2057 4202 -0.1871670285025489
+2326 4202 -18.10501936478283
+1323 4203 -0.5915454173823133
+1568 4203 -0.08253448028257152
+2327 4203 -23.14455148007878
+1813 4204  0.5892891508115687
+2058 4204  0.08441094784420153
+2328 4204 -22.69925480783547
+1324 4205 -0.6320496066149439
+1569 4205  0.09175946688769955
+2329 4205 -17.70174015534158
+1814 4206  1.031765062393027
+2059 4206 -0.05562556027624922
+2330 4206 -21.45404488617399
+1325 4207  0.4149441342907414
+1570 4207 -0.1391230222463695
+2331 4207 -17.63733576375392
+1815 4208 -0.5368637578512274
+2060 4208  0.2054208626437619
+2332 4208 -11.7352471887321
+1326 4209 -0.3720931546947013
+1571 4209 -0.1433588781709148
+2333 4209 -12.13240977183652
+1816 4210  0.6167314111295573
+2061 4210  0.2480796714683461
+2334 4210 -7.139021306502358
+1327 4211 -0.1474513421173272
+1572 4211 -0.05626831364793678
+2335 4211 -107.2257743582197
+1817 4212  0.3310701127831404
+2062 4212  0.1309999949189102
+2336 4212 -47.10985428696654
+1328 4213 -0.150780607320847
+1573 4213 -0.05817082924203715
+2337 4213 -99.89329952085217
+1818 4214  0.337094746642738
+2063 4214  0.1348920395825686
+2338 4214 -43.849780178493
+1329 4215 -0.166183616330309
+1574 4215 -0.06367574453596507
+2339 4215 -82.05099007645858
+1819 4216  0.3720598064715243
+2064 4216  0.1478411151739352
+2340 4216 -35.88141193511292
+1330 4217 -0.2132021764710746
+1575 4217 -0.0819141354576636
+2341 4217 -43.98879537092815
+1820 4218  0.4803517580962079
+2065 4218  0.1914370186595753
+2342 4218 -18.96707230539594
+1331 4219  0.3955427940574144
+1576 4219 -0.04831816621937788
+2343 4219 -56.56046644171085
+1821 4220 -0.6484741851329106
+2066 4220  0.08377148904147035
+2344 4220 -33.41745949104828
+1332 4221  0.3375417510714098
+1577 4221 -0.1760787931148337
+2345 4221 -30.17627002650658
+1822 4222 -0.2062979542711787
+2067 4222  0.1043900787502424
+2346 4222 -49.61405821049816
+1333 4223 -0.1696981519351195
+1578 4223 -0.06510757825207561
+2347 4223 -71.73978108877516
+1823 4224  0.3231881679107107
+2068 4224  0.1284506027739841
+2348 4224 -36.41609607105951
+1334 4225 -0.1917131080017181
+1579 4225 -0.07348349316451037
+2349 4225 -54.57250563862199
+1824 4226  0.3650963193282887
+2069 4226  0.1449696716921745
+2350 4226 -27.55492217173241
+1335 4227 -0.3349809983213075
+1580 4227  0.03944668306923331
+2351 4227 -141.187307118909
+1825 4228  0.3279397104479789
+2070 4228 -0.03875470100391585
+2352 4228 -139.724744622845
+1336 4229  0.2366093522986016
+1581 4229  0.1182819046044615
+2353 4229 -54.42625553131974
+1826 4230 -0.2372175650649101
+2071 4230 -0.123830609841622
+2354 4230 -53.63654595730242
+1337 4231 -0.1646365262812593
+1582 4231 -0.06404854821625307
+2355 4231 -72.83364223501292
+1827 4232  0.2708036975786697
+2072 4232  0.1105236630114776
+2356 4232 -43.25696538963752
+1338 4233 -0.5108679392009977
+1583 4233 -0.0885781928998835
+2357 4233 -27.39216973415159
+1828 4234  0.4831988452769137
+2073 4234  0.09073099839660206
+2358 4234 -27.18217755332223
+1339 4235 -0.5198632092236741
+1584 4235 -0.07822635220456177
+2359 4235 -29.56194702569874
+1829 4236  0.5254114800415531
+2074 4236  0.0825069857399373
+2360 4236 -29.09591654321273
+1340 4237 -0.1981115390391876
+1585 4237 -0.07680746283325847
+2361 4237 -49.02162975759376
+1830 4238  0.370280352148334
+2075 4238  0.1503007858341583
+2362 4238 -24.45068261782124
+1341 4239 -0.3571743727843014
+1586 4239  0.05216856069111805
+2363 4239 -89.42768025930282
+1831 4240  0.353019920768631
+2076 4240 -0.04777819884013859
+2364 4240 -93.18051438131391
+1342 4241 -0.1837305798049131
+1587 4241 -0.1284297479768136
+2365 4241 -67.3680095180724
+1832 4242  0.1877146534020675
+2077 4242  0.1242642220168503
+2366 4242 -69.2391511776303
+1343 4243 -0.4115664959143436
+1588 4243 -0.1585763878680793
+2367 4243 -9.491059373455291
+1833 4244  0.6903575064460875
+2078 4244  0.2721182940045235
+2368 4244 -5.581731936811549
+1344 4245  0.4948051560351502
+1589 4245  0.2278264743594363
+2369 4245 -12.98416731930361
+1834 4246 -0.3995006872704646
+2079 4246 -0.1829124999460475
+2370 4246 -16.37502476196387
+1345 4247 -0.413301806558095
+1590 4247 -0.1220911632709661
+2371 4247 -24.74624389281993
+1835 4248  0.3335609329182144
+2080 4248  0.1009613901942342
+2372 4248 -29.26099129371331
+1346 4249 -0.1377344537960406
+1591 4249 -0.05432974549764544
+2373 4249 -116.5520245666077
+1836 4250  0.2592958859943023
+2081 4250  0.1076318904023976
+2374 4250 -60.67747015800169
+1347 4251 -0.2309521391793573
+1592 4251 -0.08909672224847444
+2375 4251 -35.17526953063072
+1837 4252  0.4377070227596882
+2082 4252  0.1776243671061898
+2376 4252 -17.85544888919775
+1348 4253  0.373558902257503
+1593 4253  0.09662035202600507
+2377 4253 -33.99037882808334
+1838 4254 -0.4326204283218003
+2083 4254 -0.1125400217472849
+2378 4254 -29.8305087797851
+1349 4255 -0.5029630185937802
+1594 4255 -0.03874066375731782
+2379 4255 -71.94298278001283
+1839 4256  0.563121317845319
+2084 4256  0.04628018070972438
+2380 4256 -61.46922915998202
+1350 4257 -0.3005092230155128
+1595 4257 -0.1157986402063187
+2381 4257 -18.66777584361901
+1840 4258  0.5036002788122459
+2085 4258  0.1992142492979329
+2382 4258 -11.00044392597633
+1351 4259 -0.4425084873605576
+1596 4259 -0.1550385259319598
+2383 4259 -17.48331613863921
+1841 4260  0.4374740476554567
+2086 4260  0.1559040304242874
+2384 4260 -17.19494524360543
+1352 4261  0.3915587655305323
+1597 4261  0.1097305804557451
+2385 4261 -30.70003257734891
+1842 4262 -0.3222408774211639
+2087 4262 -0.08915743596796895
+2386 4262 -36.85462203202088
+1353 4263 -0.2006755550776053
+1598 4263 -0.07741322426033026
+2387 4263 -47.65157494137991
+1843 4264  0.3799777711016727
+2088 4264  0.1510415792376266
+2388 4264 -25.01333265604174
+1354 4265 -0.3338254063159729
+1599 4265 -0.1286239024388166
+2389 4265 -15.75639783117426
+1844 4266  0.6357091254305228
+2089 4266  0.2527154403111643
+2390 4266 -8.087445797244316
+1355 4267  0.4723947763539851
+1600 4267  0.07282084580241761
+2391 4267 -36.17015837528741
+1845 4268 -0.5635229915555421
+2090 4268 -0.07966811853968624
+2392 4268 -33.28326480640938
+1356 4269  0.2663650847823781
+1601 4269  0.1699263311417943
+2393 4269 -26.11298173680401
+1846 4270 -0.3035513417999643
+2091 4270 -0.1978726929434511
+2394 4270 -22.92312256359989
+1357 4271 -0.1866146962681199
+1602 4271 -0.07172764858434916
+2395 4271 -56.97774952449042
+1847 4272  0.3212719394188062
+2092 4272  0.1190518291670926
+2396 4272 -33.52104777869483
+1358 4273 -0.08570975586546606
+1603 4273  0.3050576835026495
+2397 4273 -52.94756993572931
+1848 4274  0.07033500174578537
+2093 4274 -0.2460311055612411
+2398 4274 -64.26911966105814
+1359 4275  0.224708222751262
+1604 4275  0.2068334057548165
+2399 4275 -26.98015626399983
+1849 4276 -0.2245587755506762
+2094 4276 -0.2103959172847824
+2400 4276 -27.22391958877084
+1360 4277 -0.3594998341374985
+1605 4277  0.1495820406715405
+2401 4277 -19.76131076987808
+1850 4278  0.4379133264508283
+2095 4278 -0.1823951579301308
+2402 4278 -16.01743515162783
+1361 4279 -0.1478613302797105
+1606 4279 -0.3612348953316888
+2403 4279 -19.43122435760818
+1851 4280  0.2457031220130788
+2096 4280  0.4234806911341454
+2404 4280 -12.23386114811299
+1362 4281 -0.58313561816606
+1607 4281  0.03126449269119468
+2405 4281 -73.10925705238337
+1852 4282  0.6575968965590965
+2097 4282 -0.03366812880804327
+2406 4282 -64.22433096257478
+1363 4283 -0.2293236201966689
+1608 4283 -0.09511283394769576
+2407 4283 -42.25947272070731
+1853 4284  0.2292832397716183
+2098 4284  0.09589996761388657
+2408 4284 -42.5796885142234
+1364 4285  0.42314514820173
+1609 4285  0.1765863442776434
+2409 4285 -17.39127207242933
+1854 4286 -0.2946263988269318
+2099 4286 -0.1192055091852236
+2410 4286 -25.68769827203282
+1365 4287 -0.3584357830568553
+1610 4287  0.09444763078437719
+2411 4287 -26.68291634429617
+1855 4288  0.3674899949834071
+2100 4288 -0.0913967464088717
+2412 4288 -27.21898062309888
+1366 4289  0.3532322900264607
+1611 4289 -0.1063494845711242
+2413 4289 -25.76998281146062
+1856 4290 -0.358149304314646
+2101 4290  0.1067652808173516
+2414 4290 -24.86767301361041
+1367 4291  0.1980349828762465
+1612 4291  0.08633500225011058
+2415 4291 -58.10296868779811
+1857 4292 -0.1981905656779542
+2102 4292 -0.08642839304797841
+2416 4292 -57.88228387547031
+1368 4293 -0.4977177300614612
+1613 4293 -0.1054013687030942
+2417 4293 -22.81351654840187
+1858 4294  0.5863532491442011
+2103 4294  0.1224913336251501
+2418 4294 -20.38105920054591
+1369 4295 -0.8291778794111532
+1614 4295 -0.1371328219183342
+2419 4295 -12.73795579528641
+1859 4296  0.676872928952266
+2104 4296  0.1232115838138538
+2420 4296 -14.69078715887743
+1370 4297 -0.5661094051595106
+1615 4297 -0.1696769505008175
+2421 4297 -14.44408841920709
+1860 4298  0.4926836001045419
+2105 4298  0.1458229277753334
+2422 4298 -16.85321717357648
+1371 4299 -0.7204052217365209
+1616 4299 -0.1678465865963971
+2423 4299 -9.712758796207535
+1861 4300  0.6936422211413747
+2106 4300  0.1869994138422493
+2424 4300 -9.389773414469078
+1372 4301 -0.3615774432500488
+1617 4301 -0.1021595851058913
+2425 4301 -25.11095667227453
+1862 4302  0.4298291519255887
+2107 4302  0.1243626259089881
+2426 4302 -20.66188603221291
+1373 4303  0.2921192102024898
+1618 4303  0.1498140723675607
+2427 4303 -27.06081236628384
+1863 4304 -0.2929670419078287
+2108 4304 -0.149642205461953
+2428 4304 -27.07076287154962
+1374 4305  0.2741740413061578
+1619 4305  0.09966399264609031
+2429 4305 -34.23717637845797
+1864 4306 -0.3325105683429021
+2109 4306 -0.1210270813360829
+2430 4306 -28.46510147456754
+1375 4307 -0.2773076465817324
+1620 4307 -0.1021338181094035
+2431 4307 -30.28857007982523
+1865 4308  0.5012808683324748
+2110 4308  0.181244990753315
+2432 4308 -15.86269138512577
+1376 4309  0.01066646559674583
+1621 4309  1.027405058684619
+2433 4309 -77.63028018021609
+1866 4310 -0.01937948406660046
+2111 4310 -2.099407317666164
+2434 4310 -41.22285965712726
+1377 4311  0.01083754926061709
+1622 4311  1.035430338244845
+2435 4311 -75.31457172347281
+1867 4312 -0.0200281710755901
+2112 4312 -2.099046152908578
+2436 4312 -39.42580673521844
+1378 4313  0.01175361905645754
+1623 4313  1.132573885359342
+2437 4313 -66.76292516277346
+1868 4314 -0.02148219787072791
+2113 4314 -2.3232191764992
+2438 4314 -35.37814580251727
+1379 4315  0.01148289706327333
+1624 4315  1.096989498294941
+2439 4315 -66.63623857562646
+1869 4316 -0.02126487268390402
+2114 4316 -2.229759078647565
+2440 4316 -34.79046888868552
+1380 4317  0.008746086931736756
+1625 4317  1.576501168390627
+2441 4317 -70.10959107942233
+1870 4318 -0.01515825745898139
+2115 4318 -2.820119469264003
+2442 4318 -42.54586097045568
+1381 4319  0.533396313689748
+1626 4319  0.1398090615522035
+2443 4319 -14.59114960552235
+1871 4320 -0.3072494006703159
+2116 4320 -0.1064731526095451
+2444 4320 -25.00017435761698
+1382 4321 -0.4043473535609413
+1627 4321 -0.1463628716658473
+2445 4321 -17.87480776263565
+1872 4322  0.6025736075654631
+2117 4322  0.2139666338910703
+2446 4322 -11.25720746291567
+1383 4323 -0.2764593929515081
+1628 4323 -0.05970599140078857
+2447 4323 -65.97984348957961
+1873 4324  0.4607487274148538
+2118 4324  0.1019487335374342
+2448 4324 -41.15873081988094
+1384 4325 -0.3420333412549693
+1629 4325  0.152339017646971
+2449 4325 -20.94431514667631
+1874 4326  0.4601615553751069
+2119 4326 -0.2061528425704132
+2450 4326 -15.92440412543586
+1385 4327 -0.799264511380765
+1630 4327  0.120393801145661
+2451 4327 -14.55529250850832
+1875 4328  0.7898807291279358
+2120 4328 -0.1148860314163944
+2452 4328 -15.05159484553243
+1386 4329  0.6047360650845038
+1631 4329  0.1544468567080951
+2453 4329 -15.49210742892535
+1876 4330 -0.5448550035608645
+2121 4330 -0.1330170041161471
+2454 4330 -16.86689236335236
+1387 4331  0.7188696788923561
+1632 4331  0.1258547872274984
+2455 4331 -16.12647392997589
+1877 4332 -0.7112006565720582
+2122 4332 -0.1073622229111081
+2456 4332 -18.07799191201674
+1388 4333  0.3127717018709487
+1633 4333  0.04000224600538602
+2457 4333 -87.48075773456222
+1878 4334 -0.4309015624391707
+2123 4334 -0.05678315093978254
+2458 4334 -63.51574285277481
+1389 4335  0.3275939897877578
+1634 4335  0.06114217742781798
+2459 4335 -52.8716550775216
+1879 4336 -0.421297996296329
+2124 4336 -0.07021960246037766
+2460 4336 -43.40343426461138
+1390 4337  0.3539736971000679
+1635 4337  0.09996941509051212
+2461 4337 -26.738027357087
+1880 4338 -0.4334843149085262
+2125 4338 -0.1186609141678331
+2462 4338 -21.96349363409914
+1391 4339 -0.2029217905250291
+1636 4339 -0.1322602591418904
+2463 4339 -50.66661535986736
+1881 4340  0.2019684391055969
+2126 4340  0.1329277299929537
+2464 4340 -50.64878861975378
+1392 4341  0.4429448994016109
+1637 4341  0.04478328549069006
+2465 4341 -51.18764721528638
+1882 4342 -0.4705159048444391
+2127 4342 -0.04036045463481888
+2466 4342 -52.99960506788916
+1393 4343 -0.3781458794328671
+1638 4343 -0.07136785085690645
+2467 4343 -34.82199055582354
+1883 4344  0.3712099084130325
+2128 4344  0.07225182247031578
+2468 4344 -34.54423303070557
+1394 4345  0.2653610804974919
+1639 4345  0.1007116905315015
+2469 4345 -35.71241339576211
+1884 4346 -0.2660353447014131
+2129 4346 -0.100034192619883
+2470 4346 -35.52512736400956
+1395 4347  0.4397497774331201
+1640 4347  0.1692123750855458
+2471 4347 -20.01915962930323
+1885 4348 -0.2897782680634379
+2130 4348 -0.1114576459576665
+2472 4348 -30.22720745240589
+1396 4349  0.3337399888939511
+1641 4349 -0.05438327802565162
+2473 4349 -57.69560648218724
+1886 4350 -0.4468471629984179
+2131 4350  0.07850380857557879
+2474 4350 -40.73513928545416
+1397 4351 -0.02735485822257834
+1642 4351 -0.6125464046789606
+2475 4351 -77.65714066116759
+1887 4352  0.02303438519199841
+2132 4352  0.4916885932583254
+2476 4352 -94.69922254722279
+1398 4353  0.2999541214461424
+1643 4353  0.1882841243361535
+2477 4353 -17.51506605655669
+1888 4354 -0.2437019726733171
+2133 4354 -0.1586572171452398
+2478 4354 -21.37522400115791
+1399 4355 -0.1700655800972397
+1644 4355 -0.1795099761298055
+2479 4355 -28.31087433954066
+1889 4356  0.2926855116749797
+2134 4356  0.3408694718928448
+2480 4356 -15.06418166640937
+1400 4357  0.01658810146559478
+1645 4357  0.5692601762921932
+2481 4357 -112.4426933973505
+1890 4358 -0.01579633712996128
+2135 4358 -0.5859732481780078
+2482 4358 -114.7794462571026
+1401 4359  23.38653888958006
+1646 4359 -4.528166521929149e-05
+2483 4359 -1058.66302267636
+1891 4360 -0.8149863628841933
+2136 4360  0.07509815561251482
+2484 4360 -20.57374819150079
+1402 4361  0.3443611707400913
+1647 4361  0.1584805113003082
+2485 4361 -14.42988051776977
+1892 4362 -0.7256220588031617
+2137 4362 -0.3323630168187701
+2486 4362 -7.306137981052756
+1403 4363  0.2424191842671502
+1648 4363  0.1789331829732354
+2487 4363 -19.0892306599637
+1893 4364 -0.4976321718455345
+2138 4364 -0.3680998962405606
+2488 4364 -9.897416463906245
+1404 4365  0.2409008336125803
+1649 4365  0.2318901716680636
+2489 4365 -14.15628764799962
+1894 4366 -0.4961149924439166
+2139 4366 -0.4581176150659059
+2490 4366 -7.445942181911135
+1405 4367  0.2465099761064946
+1650 4367  0.18507291152769
+2491 4367 -17.88569115961042
+1895 4368 -0.510067162772699
+2140 4368 -0.3724327269861961
+2492 4368 -9.352570774505041
+1406 4369  0.2338083946556382
+1651 4369  0.2559294137774343
+2493 4369 -12.90605807444327
+1896 4370 -0.4801640088667681
+2141 4370 -0.5067141365537682
+2494 4370 -6.780155483318999
+1407 4371  0.2329931159431059
+1652 4371  0.2553116682026549
+2495 4371 -12.84963493329398
+1897 4372 -0.4783057895236895
+2142 4372 -0.5042260023462158
+2496 4372 -6.754664202360627
+1408 4373  0.2443954748733559
+1653 4373  0.1850395098356168
+2497 4373 -17.05542913502588
+1898 4374 -0.5073755737823042
+2143 4374 -0.3677359737660884
+2498 4374 -8.949835317480224
+1409 4375 -0.2852018326572599
+1654 4375  0.390540098759081
+2499 4375 -13.22126092554109
+1899 4376  0.2195673688647587
+2144 4376 -0.2998672521788097
+2500 4376 -17.94565770436443
+1410 4377  0.3449705889808716
+1655 4377  0.03342586460999813
+2501 4377 -88.03377071956065
+1900 4378 -0.350098224088436
+2145 4378 -0.03257068910337899
+2502 4378 -88.64391292782304
+1411 4379  0.001345572973655427
+1656 4379  3.224514182063999
+2503 4379 -246.2252886607093
+1901 4380  0.004528716666476341
+2146 4380 -3.833955781960661
+2504 4380 -109.1007957172354
+1412 4381  0.002458604260643332
+1657 4381  3.182000960903725
+2505 4381 -142.1356981120592
+1902 4382  0.003858840238686249
+2147 4382 -6.252920651221183
+2506 4382 -91.07653645375166
+1413 4383  0.0006418068359919498
+1658 4383  5.515960845011515
+2507 4383 -290.0185142498619
+1903 4384  0.005403119658024451
+2148 4384 -3.542334556735019
+2508 4384 -83.51485409842199
+1414 4385  0.0006826837434793908
+1659 4385  4.920359358439487
+2509 4385 -311.0119847471854
+1904 4386  0.005116437161350581
+2149 4386 -3.407349241168243
+2510 4386 -95.15381296120231
+1415 4387 -0.2149865933546265
+1660 4387 -0.1327293340165539
+2511 4387 -26.58818103268005
+1905 4388  0.3614419935688245
+2150 4388  0.1708955495131764
+2512 4388 -15.77787777432359
+1416 4389 -0.8943223168280579
+1661 4389  0.04435814071805195
+2513 4389 -28.09789857924424
+1906 4390  0.5644202599576373
+2151 4390 -0.03036476754524816
+2514 4390 -51.51652538609446
+1417 4391  0.2320388001825638
+1662 4391 -0.09875202978820967
+2515 4391 -41.49391694494913
+1907 4392 -0.3438732803624757
+2152 4392  0.1578014166231115
+2516 4392 -27.01589354302817
+1418 4393 -0.2673963766163865
+1663 4393  0.09430040141542814
+2517 4393 -38.65350036810663
+1908 4394  0.2715368875046588
+2153 4394 -0.09193233690226214
+2518 4394 -39.56708513242784
+1419 4395 -0.3015732552854993
+1664 4395  0.1553884953900216
+2519 4395 -18.86848979808568
+1909 4396  0.4028072587377506
+2154 4396 -0.1683908418346857
+2520 4396 -16.18698208267388
+1420 4397 -0.189153266415218
+1665 4397  0.1129564356289893
+2521 4397 -60.01674254260863
+1910 4398  0.1837519366993671
+2155 4398 -0.116110806968134
+2522 4398 -59.89391025874022
+1421 4399  0.3162746729687368
+1666 4399 -0.1802199613520399
+2523 4399 -14.2607056227518
+1911 4400 -0.4996161946091237
+2156 4400  0.2497754017512982
+2524 4400 -9.060706229208979
+1422 4401 -0.4954179106036137
+1667 4401  0.1349748586012675
+2525 4401 -16.28650106020694
+1912 4402  0.4893961341975044
+2157 4402 -0.1293710693667469
+2526 4402 -16.75458882036434
+1423 4403  0.312090482848181
+1668 4403 -0.224373509793738
+2527 4403 -16.27270670749873
+1913 4404 -0.3065265473112019
+2158 4404  0.2293733406665809
+2528 4404 -15.92119246330756
+1424 4405 -0.420740409235667
+1669 4405  0.07787860580799441
+2529 4405 -28.21287469737472
+1914 4406  0.6048601072199059
+2159 4406 -0.1009386461263404
+2530 4406 -21.15680116671571
+1425 4407 -0.1900856271423472
+1670 4407  0.1290791462085817
+2531 4407 -43.60043341376043
+1915 4408  0.1860931450132002
+2160 4408 -0.1309259113047055
+2532 4408 -43.93633027014358
+1426 4409 -0.6351218515149885
+1671 4409 -0.04844675456049608
+2533 4409 -49.07486674953635
+1916 4410  0.6690902668564817
+2161 4410  0.05669785722272903
+2534 4410 -43.91999193831722
+1427 4411 -0.09233746945236616
+1672 4411  0.1745420007093612
+2535 4411 -67.19582273963222
+1917 4412  0.1263791104196366
+2162 4412 -0.2486847965028564
+2536 4412 -47.86048355346968
+1428 4413 -0.3290166805679281
+1673 4413 -0.1656201138879724
+2537 4413 -25.60045733925898
+1918 4414  0.3839594711748638
+2163 4414  0.1940686199464453
+2538 4414 -22.11505592822052
+1429 4415 -0.4508575542320269
+1674 4415 -0.1505442317369579
+2539 4415 -17.94022473260717
+1919 4416  0.3417002020878817
+2164 4416  0.1044236949304687
+2540 4416 -25.43801423175152
+1430 4417  0.5795528708874975
+1675 4417  0.1922551101717189
+2541 4417 -10.89812561676288
+1920 4418 -0.3580379646538538
+2165 4418 -0.1170091683665216
+2542 4418 -18.98202374381732
+1431 4419 -0.4501212252666432
+1676 4419 -0.1599443116995181
+2543 4419 -18.02447912466057
+1921 4420  0.4449142856253013
+2166 4420  0.1612239847733691
+2544 4420 -18.03362624973672
+1432 4421 -0.3246876875018179
+1677 4421 -0.1283198998011498
+2545 4421 -21.9629176261085
+1922 4422  0.4461110896526219
+2167 4422  0.181437742187558
+2546 4422 -15.83184549062703
+1433 4423  0.3866661651573575
+1678 4423 -0.1292478872971707
+2547 4423 -27.27913987608121
+1923 4424 -0.3382766882528339
+2168 4424  0.1150695611031082
+2548 4424 -31.03660494689432
+1434 4425  0.001312335288620512
+1679 4425  5.802495030097443
+2549 4425 -137.3561110785759
+1924 4426  0.01439043946019499
+2169 4426 -3.573289293030423
+2550 4426 -34.52648286375157
+1435 4427  0.0008215588597796699
+1680 4427  4.348249404183824
+2551 4427 -273.003320275044
+1925 4428  0.008760842370192201
+2170 4428 -2.916757791234899
+2552 4428 -63.02258634203405
+1436 4429  0.0007450434486385868
+1681 4429  4.722524623428282
+2553 4429 -270.3252397919941
+1926 4430  0.00890402837809562
+2171 4430 -2.948494780789273
+2554 4430 -58.78525414828648
+1437 4431  0.5235605415859514
+1682 4431  0.1268899830601398
+2555 4431 -14.91535393188155
+1927 4432 -0.3456801265779962
+2172 4432 -0.1031938954442674
+2556 4432 -22.95107272924469
+1438 4433  0.6177638045468883
+1683 4433  0.1907246660453396
+2557 4433 -8.882219892636956
+1928 4434 -0.7248750001072389
+2173 4434 -0.1521679999018858
+2558 4434 -8.38327245565061
+1439 4435  0.1199895291469222
+1684 4435  0.3584223063522697
+2559 4435 -19.7641779034735
+1929 4436 -0.1837226529034885
+2174 4436 -0.5529352148144631
+2560 4436 -13.85918199986529
+1440 4437  0.203001397699786
+1685 4437  0.1990042899232015
+2561 4437 -29.84297186502756
+1930 4438 -0.1531666552012178
+2175 4438 -0.1448367639545626
+2562 4438 -41.37136670068732
+1441 4439  0.4903758402830745
+1686 4439  0.09363089415499334
+2563 4439 -24.74678079539619
+1931 4440 -0.5051458348552837
+2176 4440 -0.08952016961860668
+2564 4440 -25.30084569767662
+1442 4441  0.4838079806998937
+1687 4441  0.1130336965210424
+2565 4441 -20.16790931364713
+1932 4442 -0.4155301814520989
+2177 4442 -0.08472478198026717
+2566 4442 -25.56596459955379
+1443 4443 -0.3081198770448175
+1688 4443 -0.1225474573200583
+2567 4443 -29.25612917202748
+1933 4444  0.3462276026425328
+2178 4444  0.1421494758907888
+2568 4444 -25.80465236855202
+1444 4445  0.2236474776175843
+1689 4445  0.07753429754535195
+2569 4445 -59.28218084687754
+1934 4446 -0.2242455162623405
+2179 4446 -0.07706700524990751
+2570 4446 -59.00633582364991
+1445 4447 -0.3103276689217324
+1690 4447 -0.1497184219294691
+2571 4447 -18.95360574513186
+1935 4448  0.3245871165120744
+2180 4448  0.1444473955056696
+2572 4448 -19.27413566638218
+1446 4449 -0.275851162842649
+1691 4449 -0.1301712404853408
+2573 4449 -23.32199475871141
+1936 4450  0.4653523913028809
+2181 4450  0.2255429562296098
+2574 4450 -13.51862385101003
+1447 4451  0.3221464351873659
+1692 4451  0.1537460316411932
+2575 4451 -19.10980418333428
+1937 4452 -0.333246014529252
+2182 4452 -0.151870667599087
+2576 4452 -18.85936884549872
+1448 4453 -0.6958761831620622
+1693 4453  0.08890596217980609
+2577 4453 -23.31393913523488
+1938 4454  0.5792515598032306
+2183 4454 -0.06474292354903258
+2578 4454 -30.79418778750476
+1449 4455 -0.583252118916699
+1694 4455 -0.1346845367963926
+2579 4455 -16.9883469839645
+1939 4456  0.4566027908873816
+2184 4456  0.1133867662473916
+2580 4456 -21.187092922521
+1450 4457  0.3219996994163201
+1695 4457 -0.1618338407629296
+2581 4457 -26.29693971768204
+1940 4458 -0.2253988735383735
+2185 4458  0.1015023800057121
+2582 4458 -38.95148117385504
+1451 4459 -0.1231955649162646
+1696 4459  0.1654175795499135
+2583 4459 -52.08330111375977
+1941 4460  0.1774720614068959
+2186 4460 -0.264586927583474
+2584 4460 -33.84723329190945
+1452 4461 -2.421691173290549
+1697 4461 -0.01614866339436597
+2585 4461 -43.32944578653592
+1942 4462  1.218330247483215
+2187 4462  0.03932985943560216
+2586 4462 -28.83073740675175
+1453 4463  0.2588732884211721
+1698 4463 -0.200025621410963
+2587 4463 -29.85090627723714
+1943 4464 -0.2444719756141379
+2188 4464  0.1733751342852024
+2588 4464 -33.11942355235149
+1454 4465 -0.5465093405911027
+1699 4465 -0.08653414206433409
+2589 4465 -28.31376341779287
+1944 4466  0.6340994985786209
+2189 4466  0.104793236898054
+2590 4466 -24.52008198179582
+1455 4467  31.84061239984158
+1700 4467 -4.690062449741391e-05
+2591 4467 -707.7212790741364
+1945 4468 -1.282456190904951
+2190 4468  0.1437033518738441
+2592 4468 -8.884240395089975
+1456 4469  28.84769630748797
+1701 4469 -3.429273113850615e-05
+2593 4469 -1074.723247601268
+1946 4470 -1.085554606806667
+2191 4470  0.112469362197627
+2594 4470 -13.01900306964629
+1457 4471  0.9316726753588556
+1702 4471  0.10597916780677
+2595 4471 -13.60038854703725
+1947 4472 -1.03602619980799
+2192 4472 -0.07884724198772149
+2596 4472 -16.14500136718066
+1458 4473  0.7823178849555039
+1703 4473  0.07211115026646071
+2597 4473 -23.41697572970584
+1948 4474 -0.627656447368364
+2193 4474 -0.04271857028849171
+2598 4474 -36.39820196128922
+1459 4475 -2.52048241012962
+1704 4475 -0.003326018368451452
+2599 4475 -128.6511580023181
+1949 4476  0.7698732969464314
+2194 4476  0.02752220267509899
+2600 4476 -45.41658912759856
+1460 4477 -0.350498698842088
+1705 4477 -0.1685104373219622
+2601 4477 -19.71178008061172
+1950 4478  0.3911824002123577
+2195 4478  0.1929820576103272
+2602 4478 -17.36290067611262
+1461 4479 -0.4576521731453182
+1706 4479 -0.1519770413677799
+2603 4479 -16.65598169709429
+1951 4480  0.3690948751451494
+2196 4480  0.1250493243013468
+2604 4480 -20.42677738021681
+1462 4481 -0.3132143614225264
+1707 4481 -0.1163894569624065
+2605 4481 -33.48699900983425
+1952 4482  0.3162981891849708
+2197 4482  0.1148699713737675
+2606 4482 -33.89059592051035
+1463 4483  0.101747472019995
+1708 4483  0.1612480981858468
+2607 4483 -58.01692328951272
+1953 4484 -0.1019983291898142
+2198 4484 -0.1615176016313629
+2608 4484 -57.58794486792134
+1464 4485  0.6529986254582271
+1709 4485  0.04867805021410753
+2609 4485 -40.94469844121115
+1954 4486 -0.5589755650604602
+2199 4486 -0.03688188469150969
+2610 4486 -52.06987938902305
+1465 4487 -0.1988605404170427
+1710 4487 -0.1538546845104868
+2611 4487 -33.06151549792124
+1955 4488  0.3123756862682738
+2200 4488  0.2532649928913371
+2612 4488 -20.45953083972572
+1466 4489  0.267724443820664
+1711 4489  0.2935533450234599
+2613 4489 -17.17816150141144
+1956 4490 -0.1358262986664969
+2201 4490 -0.1982479163957737
+2614 4490 -33.21629444772113
+1467 4491 -0.1320198647588829
+1712 4491  0.1612523570575912
+2615 4491 -44.93527974087805
+1957 4492  0.1297570097955425
+2202 4492 -0.1624424533450675
+2616 4492 -44.99723518348887
+1468 4493 -0.4884348221780255
+1713 4493 -0.1681399935275716
+2617 4493 -15.0104256461242
+1958 4494  0.5346710961640079
+2203 4494  0.1742097470834353
+2618 4494 -13.86141918235845
+1469 4495  6.393149409211086
+1714 4495 -0.000263700452219623
+2619 4495 -663.9661521913164
+1959 4496 -1.800612709103617
+2204 4496  0.02300115730517048
+2620 4496 -49.90751676561297
+1470 4497  13.61387792723724
+1715 4497 -5.586646078540747e-05
+2621 4497 -1427.055298280313
+1960 4498 -1.155212641574131
+2205 4498  0.034635368896544
+2622 4498 -39.91317791668997
+1471 4499  0.1174995202916021
+1716 4499  0.3998996708507609
+2623 4499 -30.30829080397546
+1961 4500 -0.10893918577006
+2206 4500 -0.3911929626725137
+2624 4500 -29.90431021967977
+1472 4501  0.4574647052145012
+1717 4501  0.2129755068839117
+2625 4501 -13.47525275108809
+1962 4502 -0.4059559816217324
+2207 4502 -0.1883580091188502
+2626 4502 -15.12006951497744
+1473 4503 -0.3400741687238894
+1718 4503 -0.2586943980063583
+2627 4503 -13.56000062513
+1963 4504  0.284851484394085
+2208 4504  0.1530160829243506
+2628 4504 -20.92338281670835
+1474 4505  0.4358076176675587
+1719 4505  0.2954837404412515
+2629 4505 -11.29250264561315
+1964 4506 -0.251856105361882
+2209 4506 -0.1612767817800078
+2630 4506 -19.82430901439865
+1475 4507  0.04586169342645511
+1720 4507  0.7532194114725147
+2631 4507 -41.79788680020079
+1965 4508 -0.03372710870745033
+2210 4508 -0.5300049615928422
+2632 4508 -58.51214685233085
+1476 4509 -0.4060288904731509
+1721 4509  0.1587647828723089
+2633 4509 -20.04263715626028
+1966 4510  0.2996883137876917
+2211 4510 -0.1104715392797194
+2634 4510 -28.57642216065699
+1477 4511 -0.32265098291189
+1722 4511 -0.1350370344901572
+2635 4511 -21.49012601492517
+1967 4512  0.3897416427359257
+2212 4512  0.1611418556604022
+2636 4512 -17.88198588321188
+1478 4513 -0.4274289888767369
+1723 4513 -0.1403809995916198
+2637 4513 -18.25413748051501
+1968 4514  0.5271461292678989
+2213 4514  0.179026380922564
+2638 4514 -14.48573493618683
+1479 4515  0.2139280209580445
+1724 4515 -0.1358345190847692
+2639 4515 -44.0223952034774
+1969 4516 -0.1785974349768129
+2214 4516  0.1094120254077982
+2640 4516 -52.48104620642393
+1480 4517 -0.45896252925564
+1725 4517 -0.1590047570659309
+2641 4517 -14.92342134195389
+1970 4518  0.5273107585383225
+2215 4518  0.1880559437330346
+2642 4518 -13.27446957183143
+1481 4519  0.6939032742158369
+1726 4519  0.09862866268575531
+2643 4519 -18.49427815385324
+1971 4520 -0.8072266327489762
+2216 4520 -0.05312771678709906
+2644 4520 -26.21942302703896
+1482 4521  1.045179501603212
+1727 4521  0.1490326692808954
+2645 4521 -7.39806028426747
+1972 4522 -1.074344281788989
+2217 4522 -0.1325528534387045
+2646 4522 -7.707636052561853
+1483 4523 -0.4690349307253373
+1728 4523 -0.3044710922257314
+2647 4523 -8.939428394535847
+1973 4524  0.6244445149825922
+2218 4524  0.2730372422608475
+2648 4524 -8.642130280788454
+1484 4525  0.38440963665635
+1729 4525  0.1464469844197936
+2649 4525 -14.50623659110181
+1974 4526 -0.7085501020623595
+2219 4526 -0.2765679639928379
+2650 4526 -8.772955710261652
+1485 4527  0.3504553942057013
+1730 4527  0.1984683173208769
+2651 4527 -11.53775096265082
+1975 4528 -0.6032800041956174
+2220 4528 -0.4251407410310105
+2652 4528 -6.502242278141505
+1486 4529  0.2927751267701255
+1731 4529  0.2847703740543556
+2653 4529 -8.679115879554525
+1976 4530 -0.5312358475512081
+2221 4530 -0.5372060661120606
+2654 4530 -5.239025768901889
+1487 4531 -0.9136535932070854
+1732 4531  0.0741326869470682
+2655 4531 -21.47326559208629
+1977 4532  0.9638580985212747
+2222 4532 -0.07388768653768076
+2656 4532 -21.61383762226636
+1488 4533  0.1256357268577556
+1733 4533 -0.1905076856767562
+2657 4533 -49.96777929661594
+1978 4534 -0.1726253430003272
+2223 4534  0.2566908442587441
+2658 4534 -36.67569230448201
+1489 4535  0.484327973434509
+1734 4535  0.2554443072620345
+2659 4535 -11.88600697816776
+1979 4536 -0.5273299139143387
+2224 4536 -0.275051179212371
+2660 4536 -10.90493013046683
+1490 4537 -0.4158218221210877
+1735 4537 -0.1627178019259907
+2661 4537 -13.12946138104847
+1980 4538  0.6449471976234391
+2225 4538  0.2617645898062311
+2662 4538 -8.410110269164797
+1491 4539  0.4759321005851245
+1736 4539 -0.2175541085968585
+2663 4539 -12.29662737304278
+1981 4540 -0.4997455661082103
+2226 4540  0.2828527186687306
+2664 4540 -10.10870496831491
+1492 4541  0.5137320980598533
+1737 4541 -0.04862359530866486
+2665 4541 -39.02857243546901
+1982 4542 -0.4434171700268535
+2227 4542  0.06450870336342215
+2666 4542 -33.70459416258378
+1493 4543 -0.3892150799012169
+1738 4543 -0.2649587548673778
+2667 4543 -11.53800954208228
+1983 4544  0.4663001387974148
+2228 4544  0.3020557569469072
+2668 4544 -10.30563379190454
+1494 4545  0.1863477812003617
+1739 4545  0.197228464133999
+2669 4545 -22.73592311892862
+1984 4546 -0.3189037545692974
+2229 4546 -0.4149278867504431
+2670 4546 -13.20295306002752
+1495 4547  0.1745842511613437
+1740 4547  0.2340215743243752
+2671 4547 -19.99502311700704
+1985 4548 -0.3170204710469747
+2230 4548 -0.4419310962134438
+2672 4548 -12.1403872692989
+1496 4549 -0.7186221160452759
+1741 4549 -0.0370584339888418
+2673 4549 -43.0743841807851
+1986 4550  0.976486147068505
+2231 4550  0.05570908603588222
+2674 4550 -30.09884070379308
+1497 4551  0.5203531411457867
+1742 4551  0.1751900754402029
+2675 4551 -15.59699735883546
+1987 4552 -0.3439335715846778
+2232 4552 -0.114079576000751
+2676 4552 -23.95216626455397
+1498 4553 -0.1913856351165074
+1743 4553  0.4235524066745655
+2677 4553 -17.76723538490329
+1988 4554  0.1547028690476915
+2233 4554 -0.3347632371841684
+2678 4554 -22.22333545208209
+1499 4555  0.3388396466565379
+1744 4555  0.09646788709832067
+2679 4555 -28.65423870847926
+1989 4556 -0.3495907295178143
+2234 4556 -0.09046043792228446
+2680 4556 -29.62058200277956
+1500 4557 -0.355429594472465
+1745 4557 -0.1069501833931525
+2681 4557 -23.10572017615622
+1990 4558  0.5744327013188233
+2235 4558  0.1483671939561078
+2682 4558 -14.85325187087084
+1501 4559  0.3997908532391482
+1746 4559  0.1625927915066934
+2683 4559 -21.68107710891621
+1991 4560 -0.2813043833760332
+2236 4560 -0.1158650579574214
+2684 4560 -30.5213668411177
+1502 4561 -0.3428911238355603
+1747 4561 -0.1874409581057452
+2685 4561 -20.55827331727173
+1992 4562  0.3955497753263165
+2237 4562  0.2229181676645297
+2686 4562 -17.63060893722474
+1503 4563 -0.3305622720110889
+1748 4563  0.1104993737469106
+2687 4563 -25.61127524716377
+1993 4564  0.330790297730725
+2238 4564 -0.1086062631708077
+2688 4564 -25.88133772095298
+1504 4565 -0.2436393619121192
+1749 4565 -0.09766669385553386
+2689 4565 -40.2130645920106
+1994 4566  0.245190319901469
+2239 4566  0.09873935578019329
+2690 4566 -40.5384810644213
+1505 4567  0.2453331394023213
+1750 4567  0.09294349640346077
+2691 4567 -45.4076676012495
+1995 4568 -0.2470979106274057
+2240 4568 -0.09297121279972072
+2692 4568 -45.32463217142699
+1506 4569  29.39451847005743
+1751 4569 -4.739757669780065e-05
+2693 4569 -741.2098311583648
+1996 4570 -0.8934591499488236
+2241 4570  0.1005481075389927
+2694 4570 -12.77174729620695
+1507 4571  0.3918218595384317
+1752 4571  0.1137497534022037
+2695 4571 -25.52026560286867
+1997 4572 -0.4094422815228064
+2242 4572 -0.1107176446094387
+2696 4572 -25.19976764447012
+1508 4573 -0.5208327148502887
+1753 4573 -0.1620484550495435
+2697 4573 -13.36559398752397
+1998 4574  0.6508797907773681
+2243 4574  0.1812633458304332
+2698 4574 -10.93453152881607
+1509 4575  0.4093034712580556
+1754 4575  0.1779337399408347
+2699 4575 -10.33116067419111
+1999 4576 -0.8644424052260842
+2244 4576 -0.3743197183632451
+2700 4576 -5.232280705035966
+1510 4577  0.3139432303888887
+1755 4577  0.1982328660095435
+2701 4577 -12.42379991806771
+2000 4578 -0.5386946049506361
+2245 4578 -0.4207031498800982
+2702 4578 -6.810968169618136
+1511 4579  0.3128830755303908
+1756 4579  0.1844874946189381
+2703 4579 -13.59626784548458
+2001 4580 -0.5619603703326097
+2246 4580 -0.3589957638732138
+2704 4580 -7.821157425305699
+1512 4581  0.2781023775271403
+1757 4581  0.244104057346213
+2705 4581 -10.76106835441625
+2002 4582 -0.4989049199151928
+2247 4582 -0.4669064306323872
+2706 4582 -6.232406029477263
+1513 4583  0.2761967663045794
+1758 4583  0.2488990680550784
+2707 4583 -10.60859259802773
+2003 4584 -0.4967941065737113
+2248 4584 -0.4733234783700364
+2708 4584 -6.159803642957836
+1514 4585 -0.2870757766334358
+1759 4585 -0.1113819036710585
+2709 4585 -29.83696730878977
+2004 4586  0.3954580330580911
+2249 4586  0.1603305174549954
+2710 4586 -21.66867981200853
+1515 4587 -0.5751371098165369
+1760 4587 -0.1169152573076894
+2711 4587 -15.45716843571818
+2005 4588  0.640289646401769
+2250 4588  0.1358268290729027
+2712 4588 -13.42102810671431
+1516 4589 -0.3164743577683208
+1761 4589 -0.07698570639890857
+2713 4589 -36.06244598983877
+2006 4590  0.4286015274180359
+2251 4590  0.08923648311862642
+2714 4590 -30.36544494899079
+1517 4591  0.0002603335005947787
+1762 4591  11.14739714497946
+2715 4591 -415.0261506058654
+2007 4592  0.004269479894055767
+2252 4592 -3.455017245353995
+2716 4592 -107.7628213452974
+1518 4593 -0.3821410197975971
+1763 4593  0.1554320934452434
+2717 4593 -17.08211902970442
+2008 4594  0.4261594979710852
+2253 4594 -0.1994726820739341
+2718 4594 -14.32959261684598
+1519 4595  0.0005345093331185485
+1764 4595  5.741407462044037
+2719 4595 -297.1502631714062
+2009 4596  0.01030673789282408
+2254 4596 -3.656761234719059
+2720 4596 -46.29589079798393
+1520 4597  0.0004459490940684492
+1765 4597  9.163761430829846
+2721 4597 -274.346995221428
+2010 4598  0.01266738670347477
+2255 4598 -3.798595591223752
+2722 4598 -41.62150383812094
+1521 4599  0.0006404324352507512
+1766 4599  5.767059286213642
+2723 4599 -269.482318837368
+2011 4600  0.01389738784940935
+2256 4600 -3.482295420824327
+2724 4600 -40.36292797655138
+1522 4601  0.0006385206646629043
+1767 4601  5.650459500989021
+2725 4601 -262.4871457333651
+2012 4602  0.01095829037406584
+2257 4602 -3.913587096844168
+2726 4602 -43.50730408335345
+1523 4603  0.000630922008963406
+1768 4603  5.720222066080108
+2727 4603 -273.5041186801383
+2013 4604  0.01440604931380529
+2258 4604 -3.355021230629743
+2728 4604 -40.20028642953007
+1524 4605  0.0006501703848056444
+1769 4605  5.855434617171491
+2729 4605 -261.5760375061137
+2014 4606  0.01412310998717665
+2259 4606 -3.540289155598937
+2730 4606 -39.17187343467762
+1525 4607  0.0006559175699886972
+1770 4607  5.906181210680177
+2731 4607 -248.2417586977549
+2015 4608  0.01427692261993418
+2260 4608 -3.576670360769923
+2732 4608 -37.08017309753349
+1526 4609 -0.8800339580088794
+1771 4609  0.1679955867460642
+2733 4609 -8.502233418711862
+2016 4610  0.5273258880967172
+2261 4610 -0.1365022792253678
+2734 4610 -14.15927832194173
+1527 4611  0.2411558997368115
+1772 4611  0.1201547163932482
+2735 4611 -33.36501978941855
+2017 4612 -0.2390321723025959
+2262 4612 -0.1226762451022061
+2736 4612 -32.85653123541292
+1528 4613  0.4387937415067642
+1773 4613  0.172735205835799
+2737 4613 -17.10162207087117
+2018 4614 -0.2745073576810962
+2263 4614 -0.125616171688122
+2738 4614 -26.32253245363182
+1529 4615 -0.1666873148413968
+1774 4615  0.1310711442192151
+2739 4615 -65.3094506908532
+2019 4616  0.1242577346967617
+2264 4616 -0.09093864764125277
+2740 4616 -92.13433902811157
+1530 4617  0.3436678702512194
+1775 4617  0.0630421594254459
+2741 4617 -58.09827276913884
+2020 4618 -0.2986936630393312
+2265 4618 -0.04803578302891041
+2742 4618 -71.50178838039953
+1531 4619 -0.5518900249492503
+1776 4619  0.0368398814399567
+2743 4619 -51.64006323574402
+2021 4620  0.7749702805017827
+2266 4620 -0.03343584586800835
+2744 4620 -49.57879692739116
+1532 4621  0.2515624113404345
+1777 4621 -0.09721273839226541
+2745 4621 -38.8690834748674
+2022 4622 -0.2590299860673196
+2267 4622  0.09567817198065209
+2746 4622 -38.73649388997055
+1533 4623 -0.8469720785595276
+1778 4623  0.04524248037789224
+2747 4623 -30.07499672096169
+2023 4624  0.9878581106610489
+2268 4624  0.05102946065995515
+2748 4624 -26.6892215368137
+1534 4625 -0.9858356029493499
+1779 4625 -0.05844088532524816
+2749 4625 -19.69795854475236
+2024 4626  0.83313447077432
+2269 4626  0.08226341234101277
+2750 4626 -16.419717428569
+1535 4627  0.1280409753821415
+1780 4627 -0.2192447310494288
+2751 4627 -48.1812508688813
+2025 4628 -0.09699561503198664
+2270 4628  0.1479853343471768
+2752 4628 -68.5724907725667
+1536 4629 -1.835179127668628
+1781 4629  0.0093803456490435
+2753 4629 -91.97145850968209
+2026 4630  0.9432303306922852
+2271 4630  0.0208759382523774
+2754 4630 -67.81117594819983
+1537 4631  0.3864229560206465
+1782 4631  0.1696359690794401
+2755 4631 -12.03075557160653
+2027 4632 -0.723925405766279
+2272 4632 -0.2824387372225564
+2756 4632 -7.305149508872643
+1538 4633  0.3750998189126993
+1783 4633  0.2154672694474411
+2757 4633 -8.650252245975691
+2028 4634 -0.6821870226709527
+2273 4634 -0.364407399913329
+2758 4634 -5.254056246815064
+1539 4635 -0.516338327826575
+1784 4635  0.1563923263725714
+2759 4635 -14.74425618462372
+2029 4636  0.4432101206282965
+2274 4636 -0.1308230800711792
+2760 4636 -18.14750969243607
+1540 4637 -0.5658283514265692
+1785 4637 -0.1535827179703731
+2761 4637 -12.8677934052044
+2030 4638  0.5743976794403722
+2275 4638  0.1680855058326074
+2762 4638 -12.778764953814
+1541 4639  0.3695300341581737
+1786 4639  0.1905019068065086
+2763 4639 -9.449827385174531
+2031 4640 -0.8666912965737777
+2276 4640 -0.2411266255595946
+2764 4640 -5.864406879133139
+1542 4641  0.0002870401850520609
+1787 4641  14.17811209757292
+2765 4641 -252.032563895863
+2032 4642  0.01095253479461104
+2277 4642 -3.811722119467089
+2766 4642 -37.38065612259518
+1543 4643  0.0003989272310504771
+1788 4643  8.081208632035654
+2767 4643 -370.6027942610422
+2033 4644  0.004781083103017359
+2278 4644 -4.273666966017419
+2768 4644 -99.30572095390851
diff --git a/matrices/test/nontrivial_mc64_result.mtx b/matrices/test/nontrivial_mc64_result.mtx
new file mode 100644
index 00000000000..0c129436998
--- /dev/null
+++ b/matrices/test/nontrivial_mc64_result.mtx
@@ -0,0 +1,21633 @@
+%%MatrixMarket matrix coordinate real general
+% Generated 07-Apr-2022
+4644 4644 21630
+1 1  7.634142339967899
+1376 1  27.83662668591554
+2 2  7.682947782646563
+1377 2  27.53589835708392
+3 3  7.511127670325325
+1378 3  28.36062562360079
+4 4  7.616777479124308
+1379 4  28.21925391958385
+5 5  7.904524652115928
+1380 5  25.95736762236096
+6 6  174.0181709119047
+2907 6  0.6372570530657963
+7 7  1.489891484967641
+1403 7  209.3244594426036
+8 8  1.721279346380951
+1404 8  207.0714997893401
+9 9  1.502630991262102
+1405 9  207.1774742583954
+10 10  1.800755852556451
+1406 10  211.1161536132544
+11 11  1.806500886627965
+1407 11  210.6281765088935
+12 12  1.511557830272904
+1408 12  210.3058251130699
+13 13  1706.387673066813
+1411 13  5.68042322902059
+14 14  1519.789256663639
+1412 14  7.273812219891007
+15 15  1964.688958945828
+1413 15  4.409102144996014
+16 16  1930.16084845011
+1414 16  4.550892940867493
+17 17  1739.177574774503
+1434 17  7.251786459051289
+18 18  1993.672518522061
+1435 18  5.370237884159537
+19 19  2033.975816484419
+1436 19  5.206824237539474
+20 20  0.6518801375883284
+1439 20  156.4648269651306
+21 21  9.246170384031867
+1484 21  355.1643329947775
+22 22  10.51505982454673
+1485 22  371.7856140819339
+23 23  11.13731534458806
+1486 23  359.0699905626743
+24 24  12.41421032754424
+1494 24  301.804028950772
+25 25  12.62655259772402
+1495 25  293.7476901038987
+26 26  2.955820583202132
+1509 26  48.14115697103193
+27 27  4.606663203742626
+1510 27  298.8536823264385
+28 28  4.003331285236047
+1511 28  280.2402751866119
+29 29  4.713333127333971
+1512 29  292.0666383956488
+30 30  4.721746019232691
+1513 30  291.2499991968344
+31 31  2700.355402880154
+1517 31  3.449093081313446
+32 32  2112.401326080936
+1519 32  4.629564668789765
+33 33  2113.116196466089
+1520 33  4.560258670279392
+34 34  1982.84872124521
+1521 34  5.09241050532172
+35 35  2019.435299548997
+1522 35  5.010651825296775
+36 36  1961.301040943574
+1523 36  5.242946874727973
+37 37  1973.933368761423
+1524 37  5.144271915952205
+38 38  1962.712428603474
+1525 38  5.301254453954041
+39 39  7.39303021163466
+1537 39  32.64302008813611
+40 40  4.505299985382996
+1538 40  36.65251244868855
+41 41  2.089424079433414
+1541 41  0.2816487510175935
+42 42  2478.696448770564
+1542 42  4.071309785349727
+43 43  2355.199865364614
+1543 43  4.307158971381956
+44 44  0.06836567688636581
+1621 44  9.664803062187673
+45 45  0.07421036894812016
+1622 45  8.906846167844099
+46 46  0.06639716495725831
+1623 46  9.821457822989728
+47 47  0.07310238478762371
+1624 47  9.098592168210979
+48 48  104.3552046887556
+2880 48  0.006121912023818472
+49 49  0.007404911424217593
+1647 49  87.08836569250704
+50 50  194.4659454572846
+2910 50  0.003163038956096149
+51 51  0.003137362416404567
+1649 51  196.1842421835629
+52 52  192.0789529667144
+2914 52  0.003212286755258518
+53 53  198.0844573442002
+2916 53  0.003122287288006135
+54 54  197.7267055670312
+2918 54  0.003137976363508148
+55 55  194.7139670039287
+2920 55  0.003243756688600629
+56 56  102.7471989517859
+2926 56  0.006099130113976053
+57 57  92.37138271390904
+2928 57  0.006994029364087638
+58 58  190.7684806838747
+2930 58  0.003172543912365461
+59 59  188.8552198757506
+2932 59  0.003207791727140831
+60 60  43.71332794263182
+2954 60  0.01563693906268506
+61 61  0.1229847515375557
+1680 61  5.289532905230916
+62 62  0.124952871120281
+1681 62  5.212039501524443
+63 63  0.004730472910791034
+1684 63  134.487235958624
+64 64  373.243500456582
+3024 64  0.001669661722623266
+65 65  423.2601696305773
+3026 65  0.00146788692624887
+66 66  0.001887933450989816
+1731 66  332.6331657750853
+67 67  634.1949520085401
+3044 67  0.0009471235536763365
+68 68  0.0009524976247331163
+1740 68  629.0610030966138
+69 69  0.006996212718199472
+1754 69  93.45345366175701
+70 70  332.535475221442
+3078 70  0.001823414688910915
+71 71  306.4518132610639
+3080 71  0.001971755901342216
+72 72  330.915251155541
+3082 72  0.001848859828192644
+73 73  330.1517854407101
+3084 73  0.001852564728474546
+74 74  478.2477416883492
+3096 74  0.001255327403956955
+75 75  0.08508912507203135
+1764 75  7.67469753523842
+76 76  227.0111876369949
+3104 76  0.002761123807690614
+77 77  0.06748473012759557
+1766 77  9.553096348683425
+78 78  0.08071276598199044
+1767 78  8.110850832086975
+79 79  0.06258856552997076
+1768 79  10.39623748337741
+80 80  0.06694310514459079
+1769 80  9.643129255484244
+81 81  0.06630897231111525
+1770 81  9.910899138700378
+82 82  154.3592785305395
+3138 82  0.004114811156393923
+83 83  0.005786507825914471
+1783 83  111.6726634543674
+84 84  0.0009684222506967275
+1786 84  637.526528274368
+85 85  351.8137876203253
+3166 85  0.00185850803805367
+86 86  375.2293748346238
+3168 86  0.0016984477759088
+87 87  9.704014056376309
+93 87  0.191132249227754
+1401 87  0.5188471304468957
+88 88  9.201424920484722
+94 88  0.188455821489309
+1455 88  0.5765945406640466
+89 89  9.825681234198756
+95 89  0.1928444447454806
+1456 89  0.5085834639940715
+90 90  8.948533141585585
+96 90  0.1839664735152792
+1469 90  0.6653230544623372
+91 91  10.10766013614647
+97 91  0.1930767719574465
+1470 91  0.4974529583688729
+92 92  9.284386162301578
+98 92  0.188272051984764
+1506 92  0.5674518919590757
+93 93  2.047051335767824
+94 94  2.067610228210245
+95 95  2.048523350808737
+96 96  2.055941963685915
+97 97  2.03082325966889
+98 98  2.065517700723633
+99 99  23193.12282089102
+299 99  3.290947783257571e-05
+100 100  290.0726879116941
+1299 100  0.002413822826166146
+101 101  9850.593471872256
+301 101  7.669175073506544e-05
+102 102  305.4469733715024
+1303 102  0.002334975615736897
+103 103  9289.614985907097
+303 103  7.862767004737615e-05
+104 104  290.469173172231
+1306 104  0.002444212485053505
+105 105  22741.59251705795
+305 105  3.312742415530926e-05
+106 106  278.1022210270536
+1309 106  0.002446060487080032
+107 107  43597.72579341551
+307 107  1.85390023051578e-05
+108 108  265.9468096443994
+1313 108  0.002547216874358393
+109 109  43152.99093848162
+309 109  1.897510509495169e-05
+110 110  323.1812191159318
+1318 110  0.002159979334399842
+111 111  300.2822908958456
+1319 111  0.002343469592596242
+112 112  8964.772792986272
+312 112  6.912678178860192e-05
+113 113  8625.780071398047
+313 113  8.227547212772366e-05
+114 114  270.3728406575445
+1326 114  0.002514571520234397
+115 115  73148.51820798231
+1821 115  1.36295453496316e-05
+116 116  310.0130973953905
+1327 116  0.002351560480566124
+117 117  314.6579392372382
+1328 117  0.002323346228192606
+118 118  309.9421434232158
+1329 118  0.00233327829315433
+119 119  291.7156499391643
+1330 119  0.002498796266540171
+120 120  25443.3699441756
+320 120  3.27326552556781e-05
+121 121  308.720038531219
+1333 121  0.002315859228091882
+122 122  304.7098242048223
+1334 122  0.002331553298144831
+123 123  9400.334047263112
+323 123  7.851640521308691e-05
+124 124  310.0960936432602
+1337 124  0.002317790327734316
+125 125  23954.74482606583
+325 125  3.083171649862372e-05
+126 126  298.8163876640679
+1340 126  0.002195503166803161
+127 127  8825.44218799768
+327 127  7.861266506825708e-05
+128 128  265.1893264657563
+1343 128  0.002549007835364769
+129 129  23381.6913657311
+329 129  3.651865383153894e-05
+130 130  314.8697550604812
+1346 130  0.002311132186057771
+131 131  287.8240827954845
+1347 131  0.002512265623002052
+132 132  9172.962949267778
+332 132  7.556051646964701e-05
+133 133  277.4265315485119
+1350 133  0.002461879610445951
+134 134  21877.12356216064
+1846 134  3.850335510777407e-05
+135 135  295.9779339618702
+1353 135  0.002492200397856114
+136 136  273.4715243762794
+1354 136  0.002590053989662534
+137 137  11004.89040689185
+337 137  6.694842994227576e-05
+138 138  303.8886375499986
+1357 138  0.002281699642120062
+139 139  9910.180342092066
+339 139  6.668642108710151e-05
+140 140  2507.130654149849
+1854 140  0.0002425793965424298
+141 141  2379.264038858459
+1366 141  0.0002624552599551592
+142 142  2552.34477962965
+1367 142  0.0002482857056241695
+143 143  21740.78205992
+343 143  3.100849114586814e-05
+144 144  8484.697120711497
+344 144  7.880071041324561e-05
+145 145  2574.29059921975
+345 145  0.0002319713008317471
+146 146  87898.23892340138
+1307 146  1.03968750016697e-05
+147 147  781.3813205000025
+1376 147  0.0009765914152797697
+148 148  789.1705379670939
+1377 148  0.0009675053188040424
+149 149  760.5050421247049
+1378 149  0.0009896578163032042
+150 150  777.0578480793231
+1379 150  0.0009881347343625977
+151 151  796.5132149928559
+1380 151  0.0009219840688063665
+152 152  9186.727904847106
+352 152  6.748223267049977e-05
+153 153  68048.43940125236
+1874 153  1.096721977986664e-05
+154 154  2440.961303257629
+1388 154  0.0002765958107932558
+155 155  2506.64730670822
+1389 155  0.0002689427432129922
+156 156  9009.68081380378
+356 156  7.678999350825993e-05
+157 157  2369.480857196066
+357 157  0.0002662782114042295
+158 158  2485.515788576279
+1885 158  0.0002479660168346599
+159 159  2482.852251377447
+1372 159  0.0002585780513565103
+160 160  3083.195058496362
+1887 160  0.0002133964918098159
+161 161  3154.039974048575
+1400 161  0.0002015674054957784
+162 162  7998.419229928728
+362 162  8.489615229574495e-05
+163 163  163.7095819334633
+1401 163  0.004275107728950991
+164 164  157104.7967367877
+1409 164  4.587849448195336e-06
+6 165  0.0006376528809592441
+165 165  1135.700901010377
+166 166  1303.177541053115
+1403 166  0.0005334869199648178
+167 167  1381.206266334915
+1404 167  0.00050533641056909
+168 168  1319.196799327528
+1405 168  0.0005289936433977529
+169 169  1389.673876486326
+1406 169  0.0005052051026997433
+170 170  1396.329498175133
+1407 170  0.000504438897809328
+171 171  1329.329121500652
+1408 171  0.0005374455412895908
+172 172  2482.644118049607
+372 172  0.0002525862906039789
+173 173  78364.67290626559
+1903 173  9.934430498701247e-06
+174 174  563.3129342535091
+1411 174  0.001278228660117016
+175 175  467.4796134729326
+1412 175  0.001575520226438138
+176 176  676.6262903819455
+1413 176  0.00103799158743649
+177 177  658.4219150969093
+1414 177  0.00106627247492575
+178 178  2386.256089199208
+378 178  0.000261841749692465
+179 179  10122.72114522154
+379 179  6.472756195288161e-05
+180 180  2408.264399844404
+1421 180  0.0002575738198962803
+181 181  9016.072947913897
+381 181  7.772147814560617e-05
+182 182  2384.130599354269
+1855 182  0.0002699459866081826
+183 183  2930.784068412102
+1848 183  0.000230584620055753
+184 184  24854.53469433011
+384 184  3.113361531407949e-05
+185 185  2713.558123173743
+1920 185  0.0002117592014781903
+186 186  22384.9703722429
+386 186  3.083953604217996e-05
+187 187  37591.99635748944
+1928 187  2.169737691949768e-05
+188 188  480.2866720305411
+1434 188  0.001643729524906235
+189 189  598.8125130004696
+1435 189  0.001272395619536466
+190 190  615.6669758246234
+1436 190  0.001241043851549547
+191 191  19907.32657900824
+391 191  3.437890099741047e-05
+192 192  1229.495717041241
+1439 192  0.0005900815905843416
+193 193  2394.224094694279
+1390 193  0.0002533575753207606
+194 194  8542.19478243563
+394 194  7.547399481258875e-05
+195 195  2537.716111849562
+1444 195  0.0002452042454861101
+196 196  2584.710838446063
+1446 196  0.0002328687727934642
+197 197  2394.105306246051
+1447 197  0.0002595392277337602
+198 198  38203.98017612936
+398 198  1.924427533902808e-05
+199 199  43223.66169545515
+399 199  1.845055124298193e-05
+200 200  57524.40780656567
+1370 200  1.271450383568884e-05
+201 201  21031.32553155841
+401 201  3.429173053633755e-05
+202 202  153.4775821430539
+1455 202  0.004704415141495417
+203 203  167.6097055627493
+1456 203  0.004200021129835351
+204 204  2219.850347789275
+1948 204  0.0002910577290827514
+205 205  9415.284820832747
+405 205  7.220011983011933e-05
+206 206  2652.181667918236
+1415 206  0.000224096727052125
+207 207  2337.450062338238
+407 207  0.000290626767416135
+208 208  3040.129956153691
+1451 208  0.0002284567159346352
+209 209  2879.971182899386
+1956 209  0.0002221752697164552
+210 210  34863.87531051019
+410 210  2.138755265132116e-05
+211 211  20672.93143116704
+411 211  3.369714148784347e-05
+212 212  144.982487679953
+1469 212  0.005390424631237284
+213 213  171.8206052909465
+1470 213  0.00413020803099228
+214 214  2589.968651060607
+1964 214  0.0002345475086669507
+215 215  22086.99006573134
+415 215  3.152944354031921e-05
+216 216  9185.197575362357
+416 216  7.021648346571437e-05
+217 217  2521.657080892551
+1790 217  0.0002629900627038044
+218 218  2664.335906809927
+1915 218  0.0002593440111466169
+219 219  20027.99610704446
+419 219  3.244177482910733e-05
+220 220  8386.236525932127
+420 220  7.768080563995795e-05
+221 221  91337.19777237749
+1973 221  7.320703537932268e-06
+222 222  38627.23804660944
+422 222  1.858157080718637e-05
+223 223  1700.663563867754
+1484 223  0.000414588079576942
+224 224  1718.371127510188
+1485 224  0.0004090754753228922
+225 225  1817.500268542915
+1486 225  0.0003948065156009082
+226 226  44742.81279976114
+426 226  1.605819002986071e-05
+227 227  2317.164855676876
+1951 227  0.0002744085827181243
+228 228  2024.959772953592
+1492 228  0.000315555720187749
+229 229  2638.330001101275
+429 229  0.0002451618647443908
+230 230  2832.44595088773
+1965 230  0.0002235717635631153
+231 231  106218.309786511
+1981 231  6.355058706178921e-06
+232 232  40059.51720243262
+432 232  1.761027469906141e-05
+233 233  2051.813809527901
+1494 233  0.0003339577621497702
+234 234  2104.19031995182
+1495 234  0.0003263861015757376
+235 235  2548.612960129405
+1842 235  0.000261318627407984
+236 236  2308.294366544839
+1500 236  0.0002593250911130165
+237 237  2294.835695970653
+1883 237  0.0002680207291845583
+238 238  2395.306423284108
+1835 238  0.0002673059275966173
+239 239  25327.72202184154
+439 239  3.086294218884017e-05
+240 240  2807.534365036791
+1427 240  0.0002328109278869417
+241 241  2303.025653362471
+1908 241  0.0002641904594502731
+242 242  2301.595341465212
+1993 242  0.0002683021695836888
+243 243  2562.526180215788
+1504 243  0.0002456464353693151
+244 244  7658.260646304968
+444 244  8.679696529177102e-05
+245 245  153.9556113409648
+1506 245  0.004650627025322416
+246 246  2491.562485472203
+1991 246  0.0002662216516774879
+247 247  83873.8237765977
+1815 247  7.51138059276294e-06
+248 248  21481.12879436501
+448 248  3.004103046734714e-05
+249 249  586.6024214798779
+1509 249  0.001207340810307569
+250 250  1482.582038785604
+1510 250  0.0004614895303912071
+251 251  1475.017635071814
+1511 251  0.0004630428688442491
+252 252  1532.019499295367
+1512 252  0.0004532491959284241
+253 253  1535.865409908915
+1513 253  0.0004521454893938125
+254 254  11134.07335309051
+454 254  5.598515203854433e-05
+255 255  2295.970922359987
+1906 255  0.0002533575753207605
+256 256  23914.50805406844
+456 256  2.985054127319469e-05
+257 257  11182.38464704362
+1397 257  6.412379987319553e-05
+258 258  11880.39612994923
+2007 258  5.25703542360426e-05
+259 259  778.3811818836432
+1517 259  0.0009048698354838305
+260 260  2412.3828459666
+460 260  0.0002540711174143024
+261 261  233043.5749752625
+1851 261  4.246187106963387e-06
+262 262  689.9800290524397
+1519 262  0.001114727656952293
+263 263  664.2166269024117
+1520 263  0.001101692355154358
+264 264  628.9767263176474
+1521 264  0.001203469082965028
+265 265  646.4625401185438
+1522 265  0.001189571893627759
+266 266  618.291818361393
+1523 266  0.001235207912293277
+267 267  624.2772372691983
+1524 267  0.001213907246388981
+268 268  617.8674156238189
+1525 268  0.001248064342159848
+269 269  2490.198709830362
+469 269  0.0002455808494513976
+270 270  2570.720972896223
+2019 270  0.0002583346492311023
+271 271  2391.932246718013
+1532 271  0.0002662424078920341
+272 272  7749.826281920761
+472 272  8.403976013422934e-05
+273 273  2693.891793716426
+1957 273  0.0002445586324395708
+274 274  2578.319392087782
+1432 274  0.0002479871234677247
+275 275  9156.784283533159
+475 275  7.669511520069878e-05
+276 276  9203.785525373036
+476 276  6.873824789383105e-05
+277 277  8168.255838690445
+477 277  7.621293902825032e-05
+278 278  21521.12326017982
+478 278  3.384974683660592e-05
+279 279  30936.47634157103
+479 279  2.096412860650296e-05
+280 280  612.1568885398599
+1537 280  0.001117288914086055
+281 281  664.4128813426657
+1538 281  0.001054854527421347
+282 282  3083.001466197481
+1890 282  0.0002196191015981265
+283 283  2483.606073932579
+1514 283  0.0002467932626555914
+284 284  9133.850676003813
+484 284  7.450142895904204e-05
+285 285  21668.40845161683
+485 285  2.829020688581102e-05
+286 286  19306.73977728891
+486 286  3.417957292668527e-05
+287 287  203.8704711144181
+1541 287  0.003283370210303194
+288 288  2528.187727514318
+2017 288  0.0002518348855609722
+289 289  2658.051322543227
+1930 289  0.0002411624189112242
+290 290  9400.933831849899
+490 290  7.539218264852078e-05
+291 291  68612.08734514077
+1992 291  1.200272275159141e-05
+292 292  9362.181704426159
+492 292  6.96324664263306e-05
+293 293  49168.95055798243
+493 293  1.522298845113386e-05
+294 294  742.2463382517823
+1542 294  0.001033902935611
+295 295  687.7795084913223
+1543 295  0.001080160651772333
+296 296  2741.147387294496
+1969 296  0.0002274052897093193
+297 297  8322.969007704822
+497 297  8.191795508733727e-05
+298 298  2587.207807507694
+498 298  0.0002440296582495396
+299 299 -0.0006120966867165351
+2775 299  1145.85221095573
+300 300  14.33112650433899
+1299 300 -0.0448953457699415
+301 301 -0.001427149194237635
+2779 301  486.4128232147492
+302 302  15.08269212747152
+1303 302 -0.04345125747104736
+303 303 -0.00146533094654159
+2783 303  458.0163772397145
+304 304  14.32144407378891
+1306 304 -0.04555077314529363
+305 305 -0.000615931807560431
+2787 305  1123.954222596566
+306 306  13.74494336328175
+1309 306 -0.04547802485000561
+307 307 -0.0003447400910227232
+2791 307  2154.420725520893
+308 308  13.14252906164904
+1313 308 -0.04736458693151107
+309 309 -0.0003535062191017872
+2795 309  2128.364763809643
+310 310  15.93975248024778
+1318 310 -0.04024041627283683
+311 311  14.81037799760247
+1319 311 -0.04365875661862523
+312 312 -0.001288430244282178
+2801 312  441.9452029941102
+313 313 -0.001533427005586317
+2803 313  425.2540655909152
+314 314  13.32996201180178
+1326 314 -0.04686420874278824
+315 315  3612.481439844189
+1821 315 -0.0002535975426531655
+316 316  15.3101703230605
+1327 316 -0.04375416605988374
+317 317  15.53956096680326
+1328 317 -0.04322919930108315
+318 318  15.30669017625731
+1329 318 -0.04341399965950225
+319 319  14.40660969832466
+1330 319 -0.04649354450491961
+320 320 -0.0006096456996633435
+2817 320  1255.250651690767
+321 321  15.23074061795551
+1333 321 -0.04313281610466401
+322 322  15.03291787836028
+1334 322 -0.04342507632571436
+323 323 -0.001463440545769149
+2823 323  463.4166747265667
+324 324  15.28710372824584
+1337 324 -0.04320045332535793
+325 325 -0.0005742502993288677
+2827 325  1181.787998451734
+326 326  14.74189587610842
+1340 326 -0.04089185063854536
+327 327 -0.001462067874606355
+2831 327  436.0436830309747
+328 328  13.1032663932118
+1343 328 -0.04740439876751025
+329 329 -0.0006801292745497548
+2835 329  1153.588991098119
+330 330  15.5348069104894
+1346 330 -0.04304284573387514
+331 331  14.20051396744854
+1347 331 -0.04678851497708159
+332 332 -0.001406493806589446
+2841 332  452.8201087433384
+333 333  13.69530353316471
+1350 333 -0.04582496681635583
+334 334  1079.583685111797
+1846 334 -0.0007169453104404059
+335 335  14.60584817969457
+1353 335 -0.04640543643374314
+336 336  13.49566798447774
+1354 336 -0.04822607065271291
+337 337 -0.001245392720896029
+2851 337  543.6090676882716
+338 338  15.01121332028321
+1357 338 -0.04244471017584212
+339 339 -0.001242146227443936
+2855 339  488.8730191412681
+340 340  123.8192369631256
+1854 340 -0.004513431075080159
+341 341  117.4479842729037
+1366 341 -0.004885512119259889
+342 342  126.0740460466629
+1367 342 -0.004618836123996159
+343 343 -0.0005784813977331862
+2863 343  1070.773655876053
+344 344 -0.001470189001474494
+2865 344  417.8531720688395
+345 345 -0.004319878341018758
+2867 345  127.0201063189659
+346 346  4334.861182260374
+1307 346 -0.0001937112443097265
+347 347  38.53524499250479
+1376 347 -0.01819554706699653
+348 348  38.91935327325632
+1377 348 -0.01802624915195298
+349 349  37.50569193038142
+1378 349 -0.01843899618102551
+350 350  38.32199451486851
+1379 350 -0.01841060981384306
+351 351  39.28131634729812
+1380 351 -0.01717818552570617
+352 352 -0.001256535981144061
+2881 352  453.3457682171266
+353 353  3353.205891530642
+1874 353 -0.0002044990454309726
+354 354  120.2708188977464
+1388 354 -0.005158007404572995
+355 355  123.795920825142
+1389 355 -0.005003923514694866
+356 356 -0.001430404585463677
+2889 356  444.4319572952037
+357 357 -0.004953175864922998
+2891 357  117.050161681528
+358 358  122.749429900013
+1885 358 -0.004613733705409966
+359 359  122.3191147394357
+1372 359 -0.004822604611468701
+360 360  152.2888762275243
+1887 360 -0.003969961874120492
+361 361  155.6158716196742
+1400 361 -0.003753931548661254
+362 362 -0.001582798352723314
+2901 362  394.1893332761348
+363 363  8.066996571152382
+1401 363 -0.07971634771599438
+364 364  7750.68019761282
+1409 364 -8.544969360670532e-05
+6 365 -0.01187757040352815
+365 365  56.02367519001429
+366 366  64.28796934260804
+1403 366 -0.009936857146071412
+367 367  68.13825085685956
+1404 367 -0.009412379775864633
+368 368  65.07816511953543
+1405 368 -0.009853159585355355
+369 369  68.55629934823999
+1406 369 -0.009409866734849158
+370 370  68.88470455447471
+1407 370 -0.0093956000093805
+371 371  65.57800954652211
+1408 371 -0.01001058661641098
+372 372 -0.004704700041595145
+2921 372  122.4736820878048
+373 373  3871.005158529259
+1903 373 -0.0001848018496588619
+374 374  27.82616320379587
+1411 374 -0.02377781766418163
+375 375  23.09224867559405
+1412 375 -0.02930808378764303
+376 376  33.42354453953602
+1413 376 -0.01930888852136976
+377 377  32.52426540054705
+1414 377 -0.01983496473725442
+378 378 -0.004879689278140042
+2933 378  117.6539243249208
+379 379 -0.001206424923433159
+2935 379  499.0316104781022
+380 380  118.7306874076851
+1421 380 -0.00480049537954443
+381 381 -0.001447510061668577
+2939 381  444.8239842539966
+382 382  117.6379726154468
+1855 382 -0.005027041570114009
+383 383  144.7685885662768
+1848 383 -0.004289483096781133
+384 384 -0.0005788988446378421
+2945 384  1228.301080704984
+385 385  133.9324557241578
+1920 385 -0.003942330393462493
+386 386 -0.0005740898096234878
+2949 386  1104.950980990364
+387 387  1852.66693787202
+1928 387 -0.0004045233295404804
+388 388  23.6699605173663
+1434 388 -0.03064584960083465
+389 389  29.51160995031188
+1435 389 -0.02372239346799022
+390 390  30.34228698549593
+1436 390 -0.02313788584603573
+391 391 -0.0006403700785130899
+2959 391  982.0310227902145
+392 392  60.64916344347138
+1439 392 -0.01099169507116858
+393 393  118.0487075627654
+1390 393 -0.004721519459519589
+394 394 -0.001408304602554578
+2965 394  420.6283889007589
+395 395  125.3552192945955
+1444 395 -0.004561366275467525
+396 396  127.6821449462198
+1446 396 -0.004331722370505195
+397 397  118.2703291496153
+1447 397 -0.004827687097720198
+398 398 -0.000358519151350955
+2973 398  1884.280380407555
+399 399 -0.0003430998893339208
+2975 399  2135.905795131353
+400 400  2834.129295273892
+1370 400 -0.000237119492924652
+401 401 -0.0006394846353445878
+2979 401  1036.242572144714
+402 402  7.559163235294251
+1455 402 -0.08776236008052875
+403 403  8.256040854579432
+1456 403 -0.07834503334779185
+404 404  109.4230691419269
+1948 404 -0.005425436839739358
+405 405 -0.00134475543576407
+2987 405  464.4929993867451
+406 406  131.0344081076233
+1415 406 -0.004167960315020639
+407 407 -0.005408777291702704
+2991 407  115.4088526547606
+408 408  150.2295479904903
+1451 408 -0.004248277560160953
+409 409  142.0687337544492
+1956 409 -0.004138430725600249
+410 410 -0.0003986119327895535
+2997 410  1718.816210617059
+411 411 -0.0006277791128440753
+2999 411  1019.6146170834
+412 412  7.146967608853739
+1469 412 -0.1004752481225429
+413 413  8.470410604423941
+1470 413 -0.07698122934126954
+414 414  127.9283806899807
+1964 414 -0.004363396788473575
+415 415 -0.0005865256311667257
+3007 415  1091.019362262835
+416 416 -0.00130599598075002
+3009 416  453.7901227575161
+417 417  124.5983796929271
+1790 417 -0.004890840474051387
+418 418  131.597258047072
+1915 418 -0.004824851841231687
+419 419 -0.0006055799728042041
+3015 419  985.8145903653975
+420 420 -0.001449863056352223
+3017 420  412.8368018348012
+421 421  4510.455970709824
+1973 421 -0.0001362209906944794
+422 422 -0.0003457132849898687
+3021 422  1907.769973018723
+423 423  83.95767739418511
+1484 423 -0.007716759589340684
+424 424  84.84694283917756
+1485 424 -0.007612824341018029
+425 425  89.74493701233533
+1486 425 -0.007347018696585064
+426 426 -0.0002992883659006699
+3029 426  2205.835009863771
+427 427  114.2682184392981
+1951 427 -0.005113009743909019
+428 428  99.85370657593273
+1492 428 -0.005879952690476989
+429 429 -0.004558675471144818
+3035 429  130.3812078382545
+430 430  139.9007887244113
+1965 430 -0.004159338793200366
+431 431  5244.69070994502
+1981 431 -0.0001182663255270315
+432 432 -0.000327730907480755
+3041 432  1977.952393739212
+433 433  101.3077721653154
+1494 433 -0.006215092193261671
+434 434  103.893900809328
+1495 434 -0.00607418045573903
+435 435  125.8316873006155
+1842 435 -0.004863454220574115
+436 436  113.8738312144944
+1500 436 -0.00483017161704053
+437 437  113.3300593831867
+1883 437 -0.004986977865485246
+438 438  118.3198838349706
+1835 438 -0.004972539521710323
+439 439 -0.0005739031613858375
+3055 439  1251.603713614919
+440 440  138.7507706737437
+1427 440 -0.004328790316093996
+441 441  113.5082552388623
+1908 441 -0.004925256200277052
+442 442  113.6139724875826
+1993 442 -0.00499437409528073
+443 443  126.5263177181465
+1504 443 -0.004571499010522023
+444 444 -0.001617281983965069
+3065 444  377.6554482136941
+445 445  7.589203851228609
+1506 445 -0.08668664125278241
+446 446  123.1635071742419
+1991 446 -0.004948899205940827
+447 447  4139.093293161161
+1815 447 -0.0001398607764914253
+448 448 -0.0005593298618830121
+3073 448  1060.132899126409
+449 449  28.94168285332436
+1509 449 -0.02248546792889855
+450 450  73.15845287950951
+1510 450 -0.008593508936690492
+451 451  72.78051062490582
+1511 451 -0.008622952094502315
+452 452  75.59799840548723
+1512 452 -0.008440059637270876
+453 453  75.78778823356824
+1513 453 -0.008419507258893049
+454 454 -0.001041762786317972
+3085 454  549.821224067692
+455 455  113.3815108355836
+1906 455 -0.004714351957937914
+456 456 -0.0005559936336606194
+3089 456  1179.762350764617
+457 457  552.3282883247791
+1397 457 -0.001192950010299268
+458 458  586.6729969649459
+2007 458 -0.0009782249711306297
+459 459  38.43767394981833
+1517 459 -0.01683773791286734
+460 460 -0.004728474636470728
+3097 460  119.1082915537506
+461 461  11495.65556624044
+1851 461 -7.909564314661077e-05
+462 462  34.03558160578945
+1519 462 -0.02076453503788485
+463 463  32.76456805081531
+1520 463 -0.02052180865954734
+464 464  31.02636275331026
+1521 464 -0.02241755093136628
+465 465  31.88893513368465
+1522 465 -0.02215869240460923
+466 466  30.49931855865703
+1523 466 -0.0230087751155384
+467 467  30.79457400417442
+1524 467 -0.02261199800075029
+468 468  30.47835926520514
+1525 468 -0.02324824654993116
+469 469 -0.004574455857323951
+3115 469  122.8400429483149
+470 470  126.8059434849797
+2019 470 -0.004812246867113213
+471 471  117.9689774903674
+1532 471 -0.004960285432200295
+472 472 -0.001566447111419446
+3121 472  382.0353592378098
+473 473  132.8819084313431
+1957 473 -0.004555614778039736
+474 474  127.2897949754821
+1432 474 -0.004615638198090777
+475 475 -0.00142856435158415
+3127 475  451.7109769126051
+476 476 -0.001281196436983179
+3129 476  453.7247226949373
+477 477 -0.001420343791668607
+3131 477  402.7241166015079
+478 478 -0.0006305375141651542
+3133 478  1061.59534472576
+479 479 -0.0003901485068014212
+3135 479  1527.492235015241
+480 480  30.21444112408203
+1537 480 -0.02080039878504665
+481 481  32.79787467315564
+1538 481 -0.0196355765396314
+482 482  152.0989067139187
+1890 482 -0.004090434994215512
+483 483  122.6147078326719
+1514 483 -0.004593389096329082
+484 484 -0.001386907437178993
+3145 484  450.8463856651576
+485 485 -0.0005283436953610114
+3147 485  1066.01543186999
+486 486 -0.000638010057411905
+3149 486  950.3229195310635
+487 487  10.02660338095689
+1541 487 -0.06133865450778468
+488 488  124.731975883077
+2017 488 -0.004690277097936834
+489 489  131.1365835736932
+1930 489 -0.004491600758461845
+490 490 -0.001404078290862924
+3157 490  463.8294545379384
+491 491  3390.317628862236
+1992 491 -0.0002232081554610602
+492 492 -0.001296142399366137
+3161 492  462.1630304839521
+493 493 -0.0002836445861316432
+3163 493  2424.730464002316
+494 494  36.60324381825318
+1542 494 -0.01926436789258804
+495 495  33.91730205023136
+1543 495 -0.02012629165347246
+496 496  135.43417876922
+1969 496 -0.004229346815530425
+497 497 -0.001526302574451327
+3171 497  410.4526076657893
+498 498 -0.004539146102095633
+3173 498  127.8105814429097
+499 499  26323.14081830639
+699 499  2.969991746869057e-05
+500 500  332.6531461361987
+1544 500  0.002140785332416097
+501 501  12604.81812642821
+1550 501  6.257938167652743e-05
+502 502  350.3843414256796
+1548 502  0.002070345414517271
+503 503  11486.315472858
+703 503  6.598718961816696e-05
+504 504  332.8493374117932
+1551 504  0.002169195438705725
+505 505  28019.26820123457
+705 505  2.796488776981597e-05
+506 506  318.8795873653166
+1554 506  0.002169609219361102
+507 507  54799.79442360796
+707 507  1.542323564781866e-05
+508 508  304.9976276535647
+1558 508  0.002258865500111695
+509 509  54003.50361334153
+709 509  1.584972253664808e-05
+510 510  368.9056527147304
+1563 510  0.001923496708434376
+511 511  344.0225308872387
+1564 511  0.002080184625715104
+512 512  12240.66072400652
+712 512  5.363623218723445e-05
+513 513  11369.8888764044
+713 513  6.526480001887723e-05
+514 514  309.7875821354581
+1571 514  0.002231758090236919
+515 515  94787.22605972025
+2066 515  1.108814355667258e-05
+516 516  355.6555275880388
+1572 516  0.002084868727865555
+517 517  360.6022900582442
+1573 517  0.00206178424314167
+518 518  355.4337714610726
+1574 518  0.002069382114290299
+519 519  334.4517329597904
+1575 519  0.00221663957128552
+520 520  32832.4667624466
+1580 520  2.660882184061133e-05
+521 521  353.9194869404473
+1578 521  0.002054509126465163
+522 522  349.355465849854
+1579 522  0.00206825652931222
+523 523  12627.87425316374
+1583 523  6.153966619548327e-05
+524 524  354.8765898635783
+1582 524  0.002059372058053505
+525 525  31988.57268108342
+725 525  2.43735293056645e-05
+526 526  342.219524224546
+1585 526  0.001949465632114935
+527 527  10832.87966443122
+727 527  6.633863604418218e-05
+528 528  304.1009320395703
+1588 528  0.00226068439540678
+529 529  31702.22576392769
+2083 529  2.851329636555464e-05
+530 530  359.8809943352537
+1591 530  0.002055759561026759
+531 531  329.6921341161949
+1592 531  0.002230330410563787
+532 532  11450.97213019816
+1596 532  6.29130756763182e-05
+533 533  318.0042354608307
+1595 533  0.00218424136016743
+534 534  27709.67702660837
+2090 534  3.176046354497157e-05
+535 535  339.0527161384249
+1598 535  0.002212413586388379
+536 536  313.3476017870799
+1599 536  0.002298717079426249
+537 537  11176.18201130113
+737 537  6.612709803152977e-05
+538 538  348.4420904138382
+1602 538  0.002023889893301537
+539 539  12126.38580778946
+739 539  5.644326073895785e-05
+540 540  2958.127902589717
+2099 540  0.0002109624490132657
+541 541  2964.436629626178
+1610 541  0.0002180184147619238
+542 542  2996.739499442594
+1612 542  0.0002168279126185967
+543 543  29733.86082774269
+743 543  2.40913445823822e-05
+544 544  10811.18802228489
+744 544  6.434988345650326e-05
+545 545  3088.650140110554
+1620 545  0.0001989115751638596
+546 546  43357.2059155883
+746 546  1.820335133825889e-05
+547 547  449.6511458717897
+1621 547  0.001881561431239332
+548 548  442.4829323374114
+1622 548  0.001907467106720621
+549 549  437.9884858052015
+1623 549  0.001906610501894908
+550 550  435.6203755854508
+1624 550  0.001949643189531951
+48 551  0.0008441117886190319
+551 551  1062.514444456655
+552 552  11331.61710240373
+752 552  5.675680462998339e-05
+553 553  97348.34299694267
+1630 553  8.254916480290429e-06
+554 554  3296.824460547154
+1633 554  0.0002143832494831788
+555 555  3262.587208018637
+1634 555  0.0002152290952202552
+556 556  11460.05996713048
+756 556  6.296834363466511e-05
+557 557  3283.717094535645
+1637 557  0.000202070761556368
+558 558  2964.047779054717
+2130 558  0.0002137229690333978
+559 559  3225.320517392855
+1641 559  0.0002072120632641939
+560 560  2629.795751286473
+2133 560  0.0002442514875826953
+561 561  2504.422502672365
+1644 561  0.0002451718016401516
+562 562  11375.0987875235
+762 562  6.341211780697017e-05
+563 563  659.5010114325853
+1646 563  0.001094729169089652
+564 564  172853.3273411868
+2117 564  4.25003180262438e-06
+565 565  1299.65158737097
+1647 565  0.0006968247424079914
+50 566  0.0005043323519647758
+566 566  1769.533096935914
+567 567  1857.658592326778
+1649 567  0.0004814844967328351
+52 568  0.000498175840325201
+568 568  1796.9604238847
+53 569  0.0004883751107654518
+569 569  1840.063636745398
+54 570  0.0004874751788934755
+570 570  1849.375388733017
+55 571  0.0005051537934849839
+571 571  1814.027305005168
+572 572  3302.00751149913
+1655 572  0.0001984180754674663
+573 573  33030.38812900725
+773 573  1.97916651121529e-05
+56 574  0.0008873951493023308
+574 574  989.5349868962539
+57 575  0.001156885426737085
+575 575  774.1311082092107
+58 576  0.0006093535781038161
+576 576  1432.363937054055
+59 577  0.0006241449559768249
+577 577  1398.397543966203
+578 578  2838.252598474493
+1663 578  0.0002261787363921484
+579 579  11664.9498685661
+779 579  5.786973344046168e-05
+580 580  2783.642711654918
+1666 580  0.0002266107658869627
+581 581  10476.42856352934
+781 581  6.870255487037702e-05
+582 582  3094.131406781226
+1669 582  0.0002165795532411714
+583 583  2844.720693115951
+1670 583  0.000236616467506408
+584 584  29887.0903280384
+784 584  2.680443331181534e-05
+585 585  3272.63182646826
+2165 585  0.0001816346318888309
+586 586  28488.90946021381
+786 586  2.52662353434824e-05
+587 587  20901.34326837456
+787 587  3.479200106475429e-05
+60 588  0.00171173667699996
+588 588  535.6042576018839
+589 589  331.738044319503
+1680 589  0.002448783148342478
+590 590  336.6056337904003
+1681 590  0.002415983069022926
+591 591  22518.10747090007
+791 591  3.109764001755754e-05
+592 592  1423.42680018202
+1684 592  0.0006372419289340028
+593 593  2970.844960597701
+2172 593  0.0002111507457449779
+594 594  11199.52723236525
+794 594  6.036731193060422e-05
+595 595  3053.350515880293
+1689 595  0.0002097250549737993
+596 596  2965.11062180003
+1691 596  0.0002081199033744698
+597 597  2792.729347961497
+1690 597  0.000227180506082851
+598 598  50291.39569270392
+798 598  1.541997157145382e-05
+599 599  63409.56014851279
+1697 599  1.355929089197479e-05
+600 600  91095.81091672303
+2120 600  8.800095713229208e-06
+601 601  32225.37753384992
+801 601  2.42057119496486e-05
+602 602  615.9733663356598
+1700 602  0.001228820363981551
+603 603  711.6358822668429
+1701 603  0.001028630443507459
+604 604  3580.302855317203
+2194 604  0.0001942101919334908
+605 605  11490.44673910205
+805 605  6.122274390669451e-05
+606 606  2750.008657491529
+1660 606  0.0002174236966432122
+607 607  3451.539467902673
+2199 607  0.0002089999771376453
+608 608  3137.126787941659
+1696 608  0.0002225826697345396
+609 609  2854.888824240245
+809 609  0.0002239401818748291
+610 610  52092.02318409094
+810 610  1.553974479996341e-05
+611 611  26761.07923504168
+1717 611  2.716807484438463e-05
+612 612  420.7511285135843
+1714 612  0.001917255257562923
+613 613  650.3130828662307
+1715 613  0.001120828337875203
+614 614  2774.246907764889
+2209 614  0.0002241843674551126
+615 615  25664.64907877881
+815 615  2.762658094449223e-05
+616 616  10943.99031665452
+816 616  6.07496359260314e-05
+617 617  3049.229711901468
+2127 617  0.0002239249658143049
+618 618  2975.215640901863
+2160 618  0.000236317776079967
+619 619  29911.48422004574
+2215 619  2.356441270887905e-05
+620 620  10631.94204644858
+820 620  6.37038810105057e-05
+621 621  110546.9747757736
+1732 621  6.279195576451717e-06
+622 622  46653.65664652479
+822 622  1.580870884158368e-05
+64 623  0.000365824983912368
+623 623  2542.159037952807
+65 624  0.0003753799864904563
+624 624  2477.070603714911
+625 625  2410.630547995882
+1731 625  0.0003868371757116728
+626 626  55880.82357885133
+1716 626  1.347322955615387e-05
+627 627  2808.479972699512
+2196 627  0.000232641750217292
+628 628  4046.637575191848
+1704 628  0.0001758353255510753
+629 629  3435.556124411205
+1560 629  0.0001960485134623598
+630 630  2620.689705232281
+2101 630  0.0002387670275501264
+631 631  136545.9835998747
+2231 631  5.168868543624866e-06
+632 632  46117.51194107011
+832 632  1.571190151769112e-05
+67 633  0.000264159408233615
+633 633  3478.333289951324
+634 634  3610.601323354378
+1740 634  0.0002540803250229119
+635 635  3197.472226001639
+2087 635  0.000215751906695563
+636 636  2849.466688128075
+1745 636  0.0002173382328317587
+637 637  2974.094627408606
+2128 637  0.0002149933824167279
+638 638  2939.646723725627
+2080 638  0.0002247913073730503
+639 639  27700.17077445688
+839 639  2.871344757710019e-05
+640 640  2943.98374212603
+1672 640  0.0002236223011459347
+641 641  2906.631523203411
+2177 641  0.0002168929556922801
+642 642  2995.321954809203
+2227 642  0.000214433397151115
+643 643  3051.905781306283
+2240 643  0.0002118857100087749
+644 644  10513.04333965713
+844 644  6.668929891919444e-05
+645 645  614.5937399437895
+1751 645  0.001216300194894553
+646 646  2973.500696502688
+2047 646  0.0002291693448236669
+647 647  103359.3317103586
+2060 647  6.287606542843668e-06
+648 648  28018.13817086773
+848 648  2.413112609046681e-05
+649 649  1058.812258185703
+1754 649  0.000863574471068328
+70 650  0.0004260522383241782
+650 650  2107.405284921758
+71 651  0.0004160262001499919
+651 651  2147.1162080411
+72 652  0.0004138991064746598
+652 652  2190.370673597265
+73 653  0.0004130347731292516
+653 653  2194.259475979664
+654 654  14354.53982761835
+854 654  4.565100404794502e-05
+655 655  3345.895067779663
+2151 655  0.0001830830851796531
+656 656  26580.32075562833
+856 656  2.73993020604243e-05
+657 657  10680.735828132
+857 657  6.664285539255191e-05
+658 658  8883.729815115174
+858 658  6.691396143939732e-05
+74 659  0.0004190976423508806
+659 659  2144.644070458367
+660 660  2908.256690819458
+1748 660  0.0002170672928360428
+661 661  88977.29697900864
+1771 661  9.006559059406882e-06
+662 662  428.9764832710101
+1764 662  0.001934718172261428
+76 663  0.0006652306770844735
+663 663  1362.23750906171
+664 664  418.8706309843425
+1766 664  0.001967551209388498
+665 665  407.63375000375
+1767 665  0.002041158694041396
+666 666  420.4934429160988
+1768 666  0.00198259809923097
+667 667  415.8298032795154
+1769 667  0.001984600471653515
+668 668  411.4330820521244
+1770 668  0.002041914114836727
+669 669  2874.785159204738
+1772 669  0.0002175189436278616
+670 670  3116.016455969895
+2265 670  0.0002195538691403081
+671 671  3244.753587025468
+1776 671  0.0002056555622572706
+672 672  13059.49527603733
+872 672  5.456576583106384e-05
+673 673  2949.951202284087
+2267 673  0.0002265618216088804
+674 674  3054.687846803039
+1677 674  0.0002149306512683066
+675 675  11324.05490753369
+875 675  6.434492811419656e-05
+676 676  13071.5232311955
+2216 676  5.150654637627023e-05
+677 677  11785.53208636935
+877 677  5.617823458389736e-05
+678 678  32775.36602589959
+1781 678  2.404372801287854e-05
+679 679  39222.89284083748
+1784 679  1.717879208496453e-05
+82 680  0.0006055238470906594
+680 680  1503.758106070313
+681 681  1378.090071926837
+1783 681  0.00066417680046111
+682 682  2592.42319904756
+2185 682  0.000254445809205194
+683 683  2963.215504938536
+1750 683  0.0002125568346125282
+684 684  10644.19524912137
+884 684  6.56850785054022e-05
+685 685  30057.28923552878
+1727 685  2.157615448738946e-05
+686 686  27181.23035514305
+886 686  2.58604492209297e-05
+687 687  3640.877420444504
+1786 687  0.0002595670747658185
+688 688  3276.800741490974
+2145 688  0.0002022003039771082
+689 689  2678.597600156133
+2175 689  0.0002396620483604543
+690 690  12521.32779190394
+890 690  5.947442365670711e-05
+691 691  82671.96984328679
+2052 691  1.034576531382441e-05
+692 692  12142.75930045041
+892 692  5.61734866346371e-05
+693 693  33049.83938251837
+893 693  2.097430373839078e-05
+85 694  0.0005656688027113958
+694 694  1700.395238372509
+86 695  0.0005436438887964516
+695 695  1729.727558501482
+696 696  2843.862888880541
+2214 696  0.0002205299244709283
+697 697  13285.90212468715
+2271 697  5.567172963433427e-05
+698 698  3082.062352464869
+2179 698  0.0002104689560761738
+699 699 -0.0006211735898036456
+2776 699  1152.589192988617
+700 700  14.56563684762146
+1544 700 -0.04477438386005135
+701 701  551.9019057698251
+1550 701 -0.001308882018352955
+702 702  15.34156238358545
+1548 702 -0.04330235585374924
+703 703 -0.001380044009109575
+2784 703  502.9699823554473
+704 704  14.57507880584921
+1551 704 -0.04536606994919442
+705 705 -0.0005847977484658657
+2788 705  1227.046573530302
+706 706  13.96479510793963
+1554 706 -0.04537018025868264
+707 707 -0.0003225540370318406
+2792 707  2399.645244972722
+708 708  13.35583751483671
+1558 708 -0.04724019321521711
+709 709 -0.0003315309935129994
+2796 709  2364.357089468501
+710 710  16.15126512553604
+1563 710 -0.04023406550441066
+711 711  15.06184515529529
+1564 711 -0.04351148837855855
+712 712 -0.001121798254310583
+2802 712  535.9734926386114
+713 713 -0.001364992385095566
+2804 713  497.8514378756086
+714 714  13.56479446285482
+1571 714 -0.04667596022758146
+715 715  4151.705299169505
+2066 715 -0.0002318355913094673
+716 716  15.57781371370519
+1572 716 -0.04359131642342973
+717 717  15.79445420648206
+1573 717 -0.04310861514976851
+718 718  15.5680941408988
+1574 718 -0.04326749513063304
+719 719  14.64909639136468
+1575 719 -0.04634637060467815
+720 720  1437.359893683087
+1580 720 -0.0005566159660873793
+721 721  15.49412406489104
+1578 721 -0.04297719715336602
+722 722  15.29430534730055
+1579 722 -0.04326473059133334
+723 723  553.015642283468
+1583 723 -0.001286901060450809
+724 724  15.5412034071959
+1582 724 -0.04306501782502264
+725 725 -0.0005098568126613753
+2828 725  1400.415076739031
+726 726  14.98190900890695
+1585 726 -0.04077980858523558
+727 727 -0.001387501933997578
+2832 727  474.3193833460493
+728 728  13.31541318278743
+1588 728 -0.0472821775742908
+729 729  1389.181066223353
+2083 729 -0.000595912541544476
+730 730  15.76987607017677
+1591 730 -0.04296426793696614
+731 731  14.44700532192974
+1592 731 -0.04661257133778172
+732 732  501.3474841985601
+1596 732 -0.00131594329597764
+733 733  13.92297779091816
+1595 733 -0.0456871850050998
+734 734  1214.218844112899
+2090 734 -0.0006637811766721075
+735 735  14.85707135949721
+1598 735 -0.04623851729751089
+736 736  13.73083631456164
+1599 736 -0.0480417201738854
+737 737 -0.001382881004978853
+2852 737  489.423527146858
+738 738  15.2588658622997
+1602 738 -0.04232451806232144
+739 739 -0.001180351155434611
+2856 739  531.0418197270566
+740 740  129.5068808402499
+2099 740 -0.004412880081124752
+741 741  129.8684401467052
+1610 741 -0.004557562649114181
+742 742  131.2048029587701
+1612 742 -0.00453531126448616
+743 743 -0.000503749724503456
+2864 743  1302.252445734669
+744 744 -0.001345574214205537
+2866 744  473.4897090200385
+745 745  135.2081754370441
+1620 745 -0.004161190641349405
+746 746 -0.0003807362332910267
+2870 746  1898.376956390851
+747 747  19.68524035323859
+1621 747 -0.03935916995322433
+748 748  19.37192182386572
+1622 748 -0.03990008471308273
+749 749  19.17464001330935
+1623 749 -0.03988317432312981
+750 750  19.07146984270037
+1624 750 -0.04078229766318428
+48 751 -0.01766023403411908
+751 751  46.50815516450294
+752 752 -0.001187293521703485
+2882 752  496.0696673366953
+753 753  4264.002291737392
+1630 753 -0.0001725925911988493
+754 754  144.4062552135481
+1633 754 -0.004482285199688079
+755 755  142.856795105637
+1634 755 -0.004501500167327725
+756 756 -0.00131608856808379
+2890 756  502.1411266418864
+757 757  143.809453707777
+1637 757 -0.004225502698148476
+758 758  129.7645458351089
+2130 758 -0.004470671113542863
+759 759  141.2719850254936
+1641 759 -0.004332413344118552
+760 760  115.1377796124792
+2133 760 -0.005108986069369431
+761 761  109.6482832462316
+1644 761 -0.005128263093238606
+762 762 -0.001326090781396694
+2902 762  498.1384984193623
+763 763  28.88113510448416
+1646 763 -0.02289312059755941
+764 764  7566.404662091983
+2117 764 -8.891414554671908e-05
+765 765  56.88792768395655
+1647 765 -0.01457874205479662
+50 766 -0.01055148350464198
+766 766  77.45545008797657
+767 767  81.31286689729322
+1649 767 -0.01007346783371174
+52 768 -0.01042267889641481
+768 768  78.65599046567311
+53 769 -0.01021762623013111
+769 769  80.54263880602282
+54 770 -0.0101988030095205
+770 770  80.95030682650238
+55 771 -0.01056867448637758
+771 771  79.4031123301624
+772 772  144.589663327655
+1655 772 -0.004149701089090458
+773 773 -0.0004140519179016492
+2924 773  1445.879548014593
+56 774 -0.018565803370506
+774 774  43.31359687203814
+57 775 -0.02420400608865609
+775 775  33.88498745631753
+58 776 -0.0127487047039688
+776 776  62.69699251650451
+59 777 -0.01305815905236744
+777 777  61.21015815931671
+778 778  124.3135266004198
+1663 778 -0.004729137568385933
+779 779 -0.001210195935425439
+2936 779  510.8281122598189
+780 780  121.8893458280569
+1666 780 -0.004739384227582458
+781 781 -0.001436061405957997
+2940 781  459.0023997967518
+782 782  135.5577798856157
+1669 782 -0.004527229250447725
+783 783  124.5850880373814
+1670 783 -0.004947831503020766
+784 784 -0.0005605482647067403
+2946 784  1308.79910849815
+785 785  143.266796655959
+2165 785 -0.003799635403101938
+786 786 -0.0005285298443190662
+2950 786  1247.20819111549
+787 787 -0.0007273659114068365
+2952 787  915.587302032126
+60 788 -0.03581192339833519
+788 788  23.44461437890571
+789 789  14.52964354834627
+1680 789 -0.05120231699174218
+790 790  14.7428495800686
+1681 790 -0.05051641835691328
+791 791 -0.0006505150403953535
+2960 791  985.8131194556391
+792 792  62.30582574921108
+1684 792 -0.01333216968778421
+793 793  130.1563347695763
+2172 793 -0.004413757630275583
+794 794 -0.001262348984622675
+2966 794  490.4766578141998
+795 795  133.6846020547174
+1689 795 -0.0043867077924628
+796 796  129.8198791801861
+1691 796 -0.004353179353834643
+797 797  122.2740480150363
+1690 797 -0.004751796082881135
+798 798 -0.0003224467177012544
+2974 798  2202.499341692987
+799 799  2776.772045297668
+1697 799 -0.0002835617900106833
+800 800  3990.209933402476
+2120 800 -0.0001839880046561397
+801 801 -0.0005060858793490622
+2980 801  1411.530208613113
+802 802  26.98155943533092
+1700 802 -0.0256910783553563
+803 803  31.17168259281084
+1701 803 -0.02150584016679073
+804 804  156.8302265818825
+2194 804 -0.004060328656766309
+805 805 -0.001279590719005279
+2988 805  503.478550922922
+806 806  120.3926483247221
+1660 806 -0.004548139563342544
+807 807  151.1811060822011
+2199 807 -0.004369780664630857
+808 808  137.3766223350889
+1696 808 -0.004654857896170716
+809 809 -0.0046832586311171
+2996 809  125.0168786688164
+810 810 -0.0003248647658803822
+2998 810  2281.980553329121
+811 811  1172.468255921368
+1717 811 -0.0005678888436367579
+812 812  18.43172505182493
+1714 812 -0.04008099176668933
+813 813  28.4881341010781
+1715 813 -0.02343137807364655
+814 814  121.4755450558781
+2209 814 -0.004688747582473832
+815 815 -0.000577763345796909
+3008 815  1123.849334069723
+816 816 -0.001270615822728721
+3010 816  479.1806255491858
+817 817  133.5395961246254
+2127 817 -0.004682511701431127
+818 818  130.3072183776819
+2160 818 -0.004941326446914052
+819 819  1309.674633660607
+2215 819 -0.0004928617663929631
+820 820 -0.001332244617993727
+3018 820  465.5750915799122
+821 821  4839.279007743422
+1732 821 -0.0001313598373684713
+822 822 -0.0003306921046308134
+3022 822  2042.444254109507
+64 823 -0.007653672082923691
+823 823  111.2745034744868
+65 824 -0.007853582503672928
+824 824  108.4255862884054
+825 825  105.5173052591423
+1731 825 -0.008093282368846372
+826 826  2447.881093558142
+1716 826 -0.0002816718078576912
+827 827  123.043364870394
+2196 827 -0.004862970717328313
+828 828  177.2603681257095
+1704 828 -0.003676111486126634
+829 829  150.4416465818894
+1560 829 -0.004100030848673585
+830 830  114.7959760832575
+2101 830 -0.004991865811779856
+831 831  5977.377151743068
+2231 831 -0.0001081325281897822
+832 832 -0.0003286879780215192
+3042 832  2018.846701667913
+67 833 -0.005526660401051663
+833 833  152.2525514057391
+834 834  158.0422991895653
+1740 834 -0.005315791513777863
+835 835  139.9973221612445
+2087 835 -0.004512686485115116
+836 836  124.7606843163849
+1745 836 -0.004545857556180593
+837 837  130.245884491113
+2128 837 -0.004495843450029012
+838 838  128.7044217733526
+2080 838 -0.004701903383526127
+839 839 -0.0006004851677575757
+3056 839  1213.001287054423
+840 840  128.9202187518812
+1672 840 -0.004676543895854219
+841 841  127.3181790992209
+2177 841 -0.004534651717942856
+842 842  131.1991130288449
+2227 842 -0.004483357193906658
+843 843  133.6224694089021
+2240 843 -0.004431863029243509
+844 844 -0.001393861160763459
+3066 844  460.6460436124097
+845 845  26.93006549536226
+1751 845 -0.02542108586614161
+846 846  130.2261602355345
+2047 846 -0.00479207219007637
+847 847  4524.581979034278
+2060 847 -0.0001315370479632399
+848 848 -0.0005048145130488554
+3074 848  1226.527167736951
+849 849  46.34605859323295
+1754 849 -0.01806741737098674
+70 850 -0.008913735735757969
+850 850  92.24479042998608
+71 851 -0.008703965704582829
+851 851  93.982822597683
+72 852 -0.008659467467314986
+852 852  95.87624315946061
+73 853 -0.008641380035250229
+853 853  96.04637156402802
+854 854 -0.0009550099367320633
+3086 854  628.3830407037481
+855 855  146.4684393667403
+2151 855 -0.003830083690845991
+856 856 -0.0005731648617574944
+3090 856  1163.621319673929
+857 857 -0.001393809984053777
+3092 857  467.6777711097528
+858 858 -0.001399882851998593
+3094 858  388.8759174266569
+74 859 -0.008768229606671188
+859 859  93.8747077042681
+860 860  127.3766293477706
+1748 860 -0.004538751110163672
+861 861  3896.088451758585
+1771 861 -0.0001883669172606672
+862 862  18.78162023672823
+1764 862 -0.04046801465160255
+76 863 -0.013917741230942
+863 863  59.62741833721761
+864 864  18.33794849296114
+1766 864 -0.04115730664256551
+865 865  17.84714838795915
+1767 865 -0.04269440434322424
+866 866  18.40864642998536
+1768 866 -0.04147286882309242
+867 867  18.2048463587055
+1769 867 -0.04151398331184361
+868 868  18.01235820080016
+1770 868 -0.04271285292823011
+869 869  125.8741363979673
+1772 869 -0.004549463760823265
+870 870  136.4650122181682
+2265 870 -0.004591087669232101
+871 871  142.1052022666453
+1776 871 -0.004300400914613589
+872 872 -0.001141007317637787
+3122 872  571.945628211343
+873 873  129.1889516897052
+2267 873 -0.004737768664129753
+874 874  133.7381239792849
+1677 874 -0.004495757699438218
+875 875 -0.00134479006259534
+3128 875  496.2094181663953
+876 876  572.4299104481121
+2216 876 -0.001077116562284023
+877 877 -0.001174808882995873
+3132 877  516.1146241735985
+878 878  1435.341963192156
+1781 878 -0.0005027950584220524
+879 879  1717.036372304932
+1784 879 -0.0003593726340029264
+82 880 -0.01266857423038254
+880 880  65.8220278566012
+881 881  60.32135886269092
+1783 881 -0.0138956857388481
+882 882  113.5253716363662
+2185 882 -0.005321133682834458
+883 883  129.7344804957765
+1750 883 -0.004446072187239606
+884 884 -0.001373362866427219
+3146 884  466.2221104904835
+885 885  1315.933645876123
+1727 885 -0.0004513182899802171
+886 886 -0.0005408106592783898
+3150 886  1190.299232364331
+887 887  159.3673875338714
+1786 887 -0.00543058103861438
+888 888  143.4872826445848
+2145 888 -0.004228762010147807
+889 889  117.2883655695939
+2175 889 -0.005012410749634397
+890 890 -0.001243009510545163
+3158 890  548.6677100663762
+891 891  3620.107153990338
+2052 891 -0.0002163685141143933
+892 892 -0.001175077666104349
+3162 892  531.5867015861088
+893 893 -0.0004387420733795854
+3164 893  1446.906579587341
+85 894 -0.01183475115907131
+894 894  74.42927180561981
+86 895 -0.01137394716537204
+895 895  75.71310294960942
+896 896  124.5159887196852
+2214 896 -0.004612588747673908
+897 897  581.8432234704047
+2271 897 -0.001164167913108969
+898 898  134.945726372956
+2179 898 -0.004402141629369785
+899 899  0.2976922456375642
+900 899  1.206910571484867
+1099 899  7.772276933160072e-09
+1100 899 -0.2280781806545443
+1222 899  0.04377716463426287
+1291 899  0.001908677924063157
+1300 899 -0.2666484393421067
+1301 899  0.2992074794146934
+1302 899  0.3659411458715927
+1545 899  0.04150889810448934
+1546 899 -0.04283097769707251
+1547 899 -0.05430488575489788
+1789 899 -0.964177128949417
+1791 899 -0.3082137931705407
+1792 899 -0.3866656521166111
+2034 899  0.182145342635277
+2035 899 -0.04691994301089037
+2036 899  0.0469964530296306
+2037 899  0.05564749987182292
+3175 899 -42.41338650819053
+3177 899 -8.952334706879839e-10
+3180 899  92.82799557362462
+3250 899 -7.681384228215313
+3549 899 -0.02024470978393711
+3550 899 -4.459108211401092
+3912 899 -9.416903571706824
+900 900 -0.6557243053380895
+1099 900  4.240457540660714e-09
+1100 900  0.1239167259828344
+1789 900  0.5238452566749743
+2034 900 -0.09896104242681487
+3175 900  23.04353699951762
+3177 900  4.86383655573519e-10
+3180 900 -42.41338650819053
+901 901  0.06118773248632485
+902 901  1.30916366888589
+1101 901  3.180253560497448e-09
+1102 901 -0.2127901063566257
+1304 901  0.5107916688939119
+1305 901 -0.4283133356578697
+1549 901 -0.06890385651214917
+1793 901 -1.095661997402904
+1794 901 -0.477229303927426
+1795 901  0.4582406516171209
+2038 901  0.1772971086094612
+2039 901  0.06209868527935691
+2040 901 -0.06384302758694906
+3191 901 -41.9825934917825
+3193 901 -6.74794720101346e-10
+3201 901  85.65611988001912
+3388 901 -10.38899270243397
+3390 901  0.00410810176178595
+3535 901 -8.12408466218251
+3537 901 -0.001782981198905943
+902 902 -0.7228885291144198
+1101 902  1.757215706277293e-09
+1102 902  0.1174975525597555
+1793 902  0.6049980674938733
+2038 902 -0.0978991772419131
+3191 902  23.18177320674882
+3193 902  3.725840469481767e-10
+3201 902 -41.9825934917825
+903 903  0.3437697039398471
+904 903  1.330583469556757
+1103 903 -0.2594613641476355
+1104 903  1.714809627628711e-09
+1146 903 -0.1213569242584619
+1162 903  0.03440530029256823
+1308 903 -0.5852368430454002
+1552 903 -0.04965019816243767
+1553 903  0.08053041805249524
+1796 903 -1.114733838300572
+1797 903 -0.3969711646061925
+1798 903  0.5883155616478667
+2041 903  0.2165473412317848
+2042 903  0.05175900233798363
+2043 903 -0.07227285678601035
+3203 903 -42.46263947154299
+3205 903 -3.473398027531438e-10
+3207 903  91.29809696612688
+3493 903 -20.90760235760421
+3585 903 -7.74755303268057
+904 904 -0.6792687245591004
+1103 904  0.1324561697400607
+1104 904 -8.754584834846924e-10
+1796 904  0.5690765366396284
+2041 904 -0.1105483719365449
+3203 904  21.6773645691523
+3205 904 -1.756064599289786e-10
+3207 904 -42.46263947154299
+905 905  0.6335410282029971
+906 905 -0.1182378866958986
+1046 905 -0.02967706389761363
+1105 905  9.628000022243732e-09
+1106 905 -3.411716797963038e-09
+1199 905  0.01084140050204591
+1246 905  0.004420674229770605
+1310 905 -0.3154842751742941
+1311 905 -0.2710351994059734
+1312 905 -0.1808814651699105
+1555 905  0.04682645461498169
+1556 905  0.03931244733723651
+1557 905  0.02556035874667221
+1799 905 -0.5055179512196509
+1800 905  0.3125038943229862
+1801 905  0.2607673136961148
+1802 905  0.2018352389125933
+2044 905  0.09380808290631094
+2045 905 -0.04577764559625989
+2046 905 -0.03642270094644444
+3215 905  44.46488922306026
+3217 905 -21.00451468958215
+3230 905 -4.662192987407709
+3687 905 -6.201273818830178
+3689 905 -0.005320952890435077
+3789 905 -5.02376469529283
+905 906 -0.3767309422255076
+906 906  0.07030936984151577
+1105 906  5.783544754223158e-09
+1106 906  2.02872774135443e-09
+1799 906  0.3006028743159249
+2044 906 -0.05578235013745535
+3215 906 -21.00451468958215
+3217 906  12.49019441336576
+907 907  0.8685875283864275
+908 907 -0.1206085521049
+1029 907 -0.0432552053525782
+1091 907 -0.04590890985801144
+1107 907  6.66961486039952e-09
+1108 907 -5.105909028912947e-09
+1222 907  0.1093957121598284
+1229 907 -0.2266174204579067
+1291 907  0.03029500957792949
+1314 907 -0.2641123399251132
+1315 907  0.3232358427975743
+1316 907 -0.4391440793017472
+1317 907 -0.3568635586597714
+1559 907  0.03852583269225138
+1561 907  0.06421136233811769
+1562 907  0.04742106470294021
+1803 907 -0.6665887169130639
+1804 907  0.264840773439715
+1805 907 -0.2805720782281593
+1806 907  0.4521140880195338
+1807 907  0.3488833124621628
+2048 907  0.09140412991465983
+2049 907 -0.03462807079041883
+2050 907  0.03751584297835035
+2051 907 -0.06661321196104331
+3231 907  54.06254273226385
+3233 907 -21.78672677995597
+3241 907 -8.245220727572519
+3250 907 -6.778441910054136
+3912 907 -5.018475605696647
+3984 907  0.001389181240069544
+3988 907 -6.271758063514705
+907 908 -0.5277615543631613
+908 908  0.0732828354634789
+1107 908  4.15408552001395e-09
+1108 908  3.102507639685115e-09
+1803 908  0.405025269027891
+2048 908 -0.05553796721970868
+3231 908 -21.78672677995597
+3233 908  13.23781014201439
+909 909  0.8308549968215676
+910 909 -0.1180094345852181
+911 909  1.178287817096909
+1109 909 -2.143182423197665e-09
+1110 909  1.838456387126541e-09
+1111 909 -0.1896378964682771
+1113 909 -0.05042820426988697
+1191 909  0.1091861990651673
+1256 909  0.08752981633481208
+1320 909 -0.3808984499831934
+1321 909 -0.1870913236157566
+1322 909  0.2900679375737975
+1565 909  0.05765705741137306
+1566 909  0.02414079055417204
+1567 909 -0.04144109370586249
+1808 909 -0.6468743896711402
+1809 909 -0.9115398593145428
+1810 909  0.4177027925543588
+1811 909  0.1922808728830558
+1812 909 -0.2829982094483208
+2053 909  0.09155689421138136
+2054 909  0.1461665781785075
+2055 909 -0.05614325502160191
+2056 909 -0.02648423136389328
+2057 909  0.03752795575477595
+3251 909  131.8476537947564
+3253 909 -31.40401815845072
+3255 909 -41.64250304196321
+3257 909  2.79507181288885e-10
+3283 909 -3.602447283387563
+3615 909 -7.969315195327064
+3737 909 -7.289318276733248
+909 910 -0.4438807412776823
+910 910  0.06304603751777908
+1109 910 -1.136426674985813e-09
+1110 910 -9.82260162096793e-10
+1808 910  0.3455898859598944
+2053 910 -0.04891388055328518
+3251 910 -31.40401815845072
+3253 910  16.77746287302074
+911 911 -0.7560405827089444
+1109 911 -1.360478396605913e-09
+1111 911  0.1216799016922899
+1809 911  0.5848835203070897
+2054 911 -0.0937868179023721
+3251 911 -41.64250304196321
+3255 911  26.71963658975455
+3257 911 -1.79330175709147e-10
+912 912  0.2841185889321567
+1112 912 -0.04170737899807132
+1113 912  0.1005159371417935
+1219 912 -0.2400553180149196
+1247 912  0.0912522352120306
+1323 912  2.234351633778104
+1324 912 -0.7145749449009308
+1325 912 -0.3263007639770462
+1568 912 -0.05370982180576693
+1569 912  0.1018194078112303
+1570 912  0.03520668996055829
+1813 912 -2.216197503914771
+1814 912  0.7042110547013974
+2058 912  0.02389497166849069
+2059 912 -0.06006624651158884
+3280 912  32.26392170161625
+3282 912 -1.066833821630753e-08
+3283 912 -2.93755601229593
+3700 912 -39.60174281254437
+3909 912 -10.40260563970382
+913 913  1.209832173694697
+914 913 -0.1965692531354175
+1109 913  0.1159831428547418
+1113 913 -2.733038906299612e-09
+1114 913  3.723713781766946e-09
+1320 913  0.4341815958622278
+1323 913 -1.277768992783638
+1565 913 -0.06572259141693702
+1568 913  0.03071528396597896
+1810 913 -0.47613442657843
+1813 913  1.267387106656292
+1816 913 -1.011959526480044
+2055 913  0.06399702614503872
+2058 913 -0.01366492785641506
+2061 913  0.16260052281627
+3251 913 -3.602447283387563
+3280 913 -2.93755601229593
+3282 913 -0.08371936137436782
+3283 913  101.320835605144
+3285 913 -37.34674214649647
+913 914 -0.4729524263951293
+914 914  0.07684363769328444
+1113 914 -1.039897890020569e-09
+1114 914 -1.45571832366187e-09
+1816 914  0.3955992606815731
+2061 914 -0.0635644459381669
+3283 914 -37.34674214649647
+3285 914  14.59973763551054
+915 915  0.6046143966065896
+916 915  0.7040247465778748
+917 915  0.7089846504761526
+918 915 -0.105707984352315
+919 915  0.8317530171779519
+1007 915 -0.2448716520171612
+1115 915  6.983447865122061e-10
+1116 915 -0.08449893522421127
+1117 915 -0.110950911256798
+1118 915 -2.074367566606838e-09
+1119 915 -0.1227103143041787
+1207 915  0.0315126719245443
+1331 915  0.2917249086810749
+1332 915 -0.2357224145100838
+1576 915 -0.03508775876381117
+1577 915  0.03219168634554583
+1817 915 -0.5326975522814722
+1818 915 -0.5363208804398027
+1819 915 -0.4568430531993541
+1820 915 -0.62581245429748
+1822 915  0.2784275257111113
+2062 915  0.0636315736602887
+2063 915  0.08353224393820573
+2064 915  0.07949206863020782
+2065 915  0.09182130440361659
+2067 915 -0.03947913814761612
+3287 915 -12.55259619809594
+3289 915 -1.486597012156921e-09
+3291 915 -16.44988377383374
+3293 915 -1.235842539415444e-09
+3295 915  59.1165725360048
+3297 915 -16.11142725870809
+3299 915 -15.99886411053142
+3301 915 -4.957688704010366e-10
+3304 915 -2.93383316576035
+3306 915  0.00435760107926482
+3680 915 -2.838699038257353
+3681 915 -0.00428379170185561
+916 916 -0.8406516622461359
+1115 916  8.336279444520756e-10
+1116 916  0.1008972634833432
+1817 916  0.6360757700302417
+2062 916 -0.07598026692793559
+3287 916  14.98862208102334
+3289 916  1.775087188349289e-09
+3295 916 -12.55259619809594
+917 917 -0.6811758959436226
+1115 917  6.706460387029267e-10
+1117 917  0.106599044606048
+1818 917  0.5152845777429752
+2063 917 -0.08025582932800379
+3291 917  15.80466419414183
+3293 917  1.18738463505963e-09
+3295 917 -16.44988377383374
+915 918 -0.5574485806251291
+918 918  0.09746173125990555
+1115 918  6.439443422934232e-10
+1118 918  1.912650926527704e-09
+1819 918  0.4212048422990773
+2064 918 -0.07329091248500011
+3295 918 -16.11142725870809
+3297 918  14.85457890021666
+919 919 -1.050055987727851
+1115 919  8.847848009807535e-10
+1119 919  0.1549170217960287
+1820 919  0.7900639988771302
+2065 919 -0.1159208424841469
+3295 919 -15.99886411053142
+3299 919  20.19794664128352
+3301 919  6.258814577719818e-10
+920 920  0.0332412618584253
+921 920  1.177129754067613
+922 920  1.102777759352953
+1120 920 -1.795405820542717e-10
+1121 920 -0.210903753621177
+1122 920 -0.2314851909580943
+1125 920  0.00637055249229182
+1256 920 -0.002249858771577174
+1335 920 -0.2469155357709296
+1336 920  0.2859457102140647
+1581 920 -0.03967668007685861
+1823 920 -0.9364953446946004
+1824 920 -0.8759432230102847
+1825 920  0.2462070999198854
+1826 920 -0.2856483693432067
+2068 920  0.1670042532862615
+2069 920  0.1829980732903851
+2070 920 -0.03297048082370924
+2071 920  0.03987931942944642
+3311 920 -52.26634609831998
+3313 920  4.188971391982932e-11
+3315 920 -57.67951941137702
+3317 920  4.778644147052091e-11
+3321 920  168.1102211381862
+3339 920 -4.489145811322431
+3615 920 -5.122575816198524
+921 921 -0.8339300311071588
+1120 921 -1.249107484113665e-10
+1121 921  0.1494134127611411
+1823 921  0.663454125795566
+2068 921 -0.1183130930611414
+3311 921  37.02775797089851
+3313 921 -2.969423318344155e-11
+3321 921 -52.26634609831998
+922 922 -0.7103363652127415
+1120 922 -1.131374993690315e-10
+1122 922  0.1491074223715117
+1824 922  0.5642245864034647
+2069 922 -0.1178752338080321
+3315 922  37.15332470509485
+3317 922 -3.082351041072684e-11
+3321 922 -57.67951941137702
+923 923  0.044945398609309
+924 923  1.180123281848412
+1123 923 -2.446954222579123e-09
+1124 923 -0.2198774630721576
+1162 923 -0.0301506816774844
+1297 923  0.04698307485545858
+1338 923 -0.3406461438400099
+1339 923  0.2932964264633682
+1584 923 -0.04107484230154301
+1827 923 -0.9904519113108774
+1828 923  0.3416270693871428
+1829 923 -0.2928143935251802
+2072 923  0.1833601469074586
+2073 923 -0.04175041492380615
+2074 923  0.03819605890891807
+3327 923 -35.24630110894491
+3329 923  4.111979645671227e-10
+3333 923  71.77914445659717
+3585 923 -6.993239083401045
+4126 923 -5.94400377216852
+924 924 -0.6263787815684199
+1123 924 -1.296841745102029e-09
+1124 924  0.1167052455721185
+1827 924  0.5257061452404348
+2072 924 -0.09732280277379812
+3327 924  18.70782102679635
+3329 924 -2.182206637613149e-10
+3333 924 -35.24630110894491
+920 925 -0.0561981756523469
+925 925  0.9158563211506956
+926 925 -0.1272016430323864
+1120 925 -0.001668075641436621
+1125 925 -2.524945641457776e-09
+1126 925  7.921518996312216e-10
+1191 925  0.02942235136820164
+1256 925 -0.009402178306684716
+1335 925  0.417439106543153
+1341 925 -0.2028592580467722
+1342 925 -0.265292539862559
+1586 925  0.02578232528300831
+1587 925  0.03969926645685068
+1825 925 -0.4162414142724755
+1830 925 -0.7253102290040822
+1831 925  0.2043567543949261
+1832 925  0.2663833810837147
+2070 925  0.05574038917549447
+2075 925  0.1000961984697644
+2076 925 -0.02562846525512418
+2077 925 -0.03983694023450458
+3321 925 -4.489145811322431
+3339 925  149.6691885281758
+3341 925 -43.11528650557934
+3615 925 -6.006365689718882
+3737 925 -8.629261156606795
+925 926 -0.2927325904424944
+926 926  0.04065710484656371
+1125 926 -7.775043942004345e-10
+1126 926 -2.532467013871553e-10
+1830 926  0.2318288767653417
+2075 926 -0.03199346752849337
+3339 926 -43.11528650557934
+3341 926  13.7808182548285
+927 927  0.8024124670559588
+928 927 -0.1672629016238211
+1038 927  0.3082806228234461
+1127 927  7.788518240070541e-09
+1128 927 -3.969049866547891e-09
+1222 927  0.1769630657333927
+1238 927 -0.03922286341907703
+1344 927  0.2713456332689875
+1345 927 -0.2905604220304306
+1589 927 -0.03534560477449201
+1590 927  0.03965502824588246
+1833 927 -0.6702504154277392
+1834 927 -0.2943648723795997
+2078 927  0.1384521427653982
+2079 927  0.04133866598755555
+3351 927  75.1841442036542
+3353 927 -33.37484565899686
+3360 927 -7.664837333066902
+3362 927 -0.09805225877594476
+3912 927 -7.148869153419637
+927 928 -0.3890405552638334
+928 928  0.08109551483106915
+1127 928  3.886522659257707e-09
+1128 928  1.924533976627174e-09
+1833 928  0.3249632881958178
+2078 928 -0.06712694619083193
+3351 928 -33.37484565899686
+3353 928  16.18141419994012
+929 929  0.7226460726801913
+930 929 -0.1165066754950316
+931 929  1.013213317405777
+1075 929  0.0344323188052018
+1129 929  2.237529952986428e-10
+1130 929 -2.862770021039296e-10
+1131 929 -0.1379058232777914
+1275 929 -0.08503788873061549
+1290 929  0.08038780949826715
+1348 929  0.2698019009628373
+1349 929  0.3049120194105165
+1593 929 -0.03441373334906914
+1594 929 -0.0381463241923688
+1836 929 -0.5799017536618772
+1837 929 -0.8068876077794779
+1838 929 -0.2582339488597439
+1839 929 -0.290608095678543
+2081 929  0.0928673422230218
+2082 929  0.108959701522947
+2084 929  0.03561931390732355
+3363 929  39.05304533306739
+3365 929 -14.6337394933653
+3367 929 -15.13825045655299
+3369 929 -9.747780360669367e-11
+3374 929 -2.942416412227689
+3869 929 -1.959100094899907
+929 930 -0.550866066704825
+930 930  0.08881190461157644
+1129 930  1.705862118228652e-10
+1130 930  2.182460878685788e-10
+1836 930  0.4420534618421999
+2081 930 -0.07079187097217753
+3363 930 -14.6337394933653
+3365 930  11.15515716784196
+931 931 -0.8713042567736969
+1129 931  1.983945230321638e-10
+1131 931  0.1185909509790812
+1837 931  0.6938762009132229
+2082 931 -0.09369897742442948
+3363 931 -15.13825045655299
+3367 931  13.01801095497678
+3369 931  8.381051408434814e-11
+932 932  0.03071718917257135
+933 932  0.9950752022868599
+1035 932  0.319559566955514
+1132 932  5.218769388859457e-09
+1133 932 -0.1462347531270238
+1152 932  0.01961434172631453
+1235 932 -0.04598524577927123
+1351 932 -0.2179951469697534
+1352 932 -0.3001546575377701
+1597 932  0.04181506051016628
+1840 932 -0.8318997011928906
+1841 932  0.2168261119599326
+2085 932  0.1214268633825941
+2086 932 -0.02828487750009587
+3379 932 -31.51571782613292
+3381 932 -4.171099299288272e-10
+3385 932  54.75374718805336
+3388 932 -7.710670001509722
+3390 932 -0.003579148430443102
+3510 932 -6.505758821695456
+933 933 -0.6655581949704933
+1132 933  3.572777296234619e-09
+1133 933  0.09780942999031755
+1840 933  0.556417909169059
+2085 933 -0.081216619435513
+3379 933  21.07935583716719
+3381 933  2.789826286653252e-10
+3385 933 -31.51571782613292
+934 934  0.03713931211835794
+935 934  1.295303116128931
+936 934  0.9236744260690454
+981 934 -0.3531403690853147
+1134 934 -0.1656030709028884
+1135 934 -0.2014590928700254
+1136 934 -5.515803369604555e-10
+1156 934  0.08549340419337453
+1181 934 -0.03038370759243254
+1355 934  0.2884753453660877
+1356 934  0.3675964442608564
+1600 934 -0.03571433811432929
+1601 934 -0.04937881125291942
+1843 934 -1.040527283561455
+1844 934 -0.7370102242628088
+1845 934 -0.2781466555799013
+2088 934  0.1611297600105126
+2089 934  0.1310381493586493
+2091 934  0.05012946220403072
+3391 934 -20.99314407546383
+3393 934 -1.025071694193969e-10
+3395 934 -18.45626484722149
+3397 934  1.578068994922965e-10
+3402 934  57.38809028817923
+3404 934 -2.488153159316454
+3539 934 -2.476272106586077
+935 935 -1.022533948777946
+1135 935  0.1590351780865364
+1843 935  0.8214096444475624
+2088 935 -0.1271985280646792
+3391 935  16.57233912676868
+3393 935  8.093323233815397e-11
+3397 935  1.261234450211646e-10
+3402 935 -20.99314407546383
+936 936 -0.5966459314393776
+1134 936  0.1069710232300935
+1136 936  3.563296324671228e-10
+1844 936  0.4760705063655813
+2089 936 -0.0846438706881978
+3395 936  11.92179302754235
+3397 936  1.136628013931329e-10
+3402 936 -18.45626484722149
+937 937  0.8149415052122866
+938 937 -0.1243103797855209
+983 937  0.254906136411951
+1137 937  7.386056946612207e-09
+1138 937 -2.261370646294836e-09
+1257 937 -0.02032800065588086
+1358 937 -0.2383349916533942
+1359 937  0.2261950636286986
+1603 937  0.03694910669518697
+1604 937 -0.03205345150204408
+1847 937 -0.6781849347733043
+1849 937 -0.2275356005844802
+2092 937  0.1039416562056201
+2093 937 -0.0393496559091599
+2094 937  0.03282777232623683
+3407 937  50.46566610983952
+3409 937 -23.82298505403072
+3412 937 -4.462659499865555
+3567 937 -5.244381887558792
+3682 937  0.01353528248246338
+937 938 -0.423007195636838
+938 938  0.06452510371026647
+1137 938  3.839734308286324e-09
+1138 938  1.173779629048965e-09
+1847 938  0.3520217163400922
+2092 938 -0.05395242262195776
+3407 938 -23.82298505403072
+3409 938  12.36566555585732
+939 939  0.1931415244916508
+1139 939  1.010988515126598e-10
+1198 939  0.02676690359827763
+1261 939  0.1670044571575812
+1278 939 -0.1239759113085325
+1360 939 -0.3462185500007934
+1361 939 -0.2320144445773898
+1362 939 -0.2607314758298161
+1605 939  0.05123776962395828
+1606 939  0.03546099465982042
+1607 939  0.03391211355038503
+1850 939  0.3224747859897226
+1852 939  0.2485615317211312
+2095 939 -0.04575369741188583
+2096 939 -0.03079932722685834
+2097 939 -0.03113925486204759
+3424 939  17.63530504057533
+3782 939 -9.907975784566792
+4065 939 -24.9290464405827
+4125 939 -6.556163260226376
+940 940  0.05348227050940345
+1079 940 -0.3865529233619078
+1140 940  4.42159031055489e-08
+1279 940  0.06744124888417391
+1363 940  0.5513261219039329
+1364 940  0.3379217843189056
+1608 940 -0.07581389194665446
+1609 940 -0.04416026357748661
+1853 940 -0.5523151632588987
+2098 940  0.07308587747533948
+3436 940 -24.66315021333685
+3438 940  28.84764724886888
+3447 940 -14.36339287944117
+3449 940 -0.009980420534666863
+941 941 -0.04106559201113318
+982 941  0.5068995173965466
+1030 941 -0.2880222797088077
+1141 941  1.675542971035782e-08
+1182 941  0.07028093491071742
+1230 941 -0.5157188483780635
+1365 941 -0.5049927482856362
+1611 941  0.03627465553054866
+1856 941  0.2839175912746367
+2100 941 -0.06419032347283443
+3440 941 -7.190162226521366
+3441 941  0.06312548830247269
+3443 941 -5.832944089578626
+3446 941  12.40148734436256
+942 942 -0.6262760807964288
+1140 942  0.1330724731424416
+1142 942  0.08541380566675676
+1298 942 -0.1518122571549573
+1363 942 -0.6945907046787592
+1608 942  0.09551447417331302
+1853 942  0.6958367528966991
+1857 942  0.6259992432861905
+2098 942 -0.09207757282087774
+2102 942 -0.08734801081128452
+3438 942 -14.36339287944117
+3447 942  33.64983902905398
+3449 942  6.210807736162227e-09
+3755 942 -13.85984777865994
+943 943  0.4010834794380356
+1143 943 -1.724665529834191e-08
+1144 943  0.4289809294086663
+1153 943  0.02793990583526351
+1200 943  0.01754934032540434
+1285 943 -0.3809398851482149
+1368 943  0.7904980737004598
+1369 943  0.389853698082942
+1371 943 -0.3527490893724305
+1613 943 -0.01589242009037819
+1614 943 -0.05097074908982863
+1615 943 -0.05298921735679361
+1616 943  0.05011610583704667
+1858 943 -0.7569472851527189
+1859 943 -0.4123451764514424
+1860 943 -0.422257802508621
+1861 943  0.3517173794087308
+2103 943  0.008839724875075651
+2104 943  0.04762898411627194
+2105 943  0.05159156407864095
+2106 943 -0.03676343318442028
+3459 943  27.16004209586251
+3467 943 -1.05251996975513
+3521 943 -9.162682012337861
+3522 943 -8.763955681316189
+4145 943 -11.05951146044936
+944 944  0.5565316981856262
+1143 944 -0.5468169676527013
+1144 944 -3.380368229777275e-08
+1159 944 -0.1049347854825309
+1220 944  1.100657149099352
+1368 944 -1.331243999373356
+1373 944  0.3643483277204766
+1613 944  0.02676374501685801
+1617 944 -0.07578302580183896
+1618 944 -0.05244155811268235
+1858 944  1.274742551217584
+1862 944 -0.5208443815767149
+1863 944 -0.3648201588099976
+2103 944 -0.01488660262126709
+2107 944  0.06567306281750092
+2108 944  0.05453932493439867
+3459 944 -1.05251996975513
+3467 944  21.6787428129644
+3565 944 -9.088036752899102
+3902 944 -10.41994192456261
+945 945  0.05493458803622932
+1145 945  1.768079509190112e-09
+1152 945 -0.05125570147098663
+1164 945  0.04539053030042879
+1374 945 -0.4558445477281668
+1375 945 -0.374795164020496
+1619 945  0.06127856833043964
+1864 945  0.4280106222713313
+1865 945  0.2993620588617579
+2109 945 -0.05909584881736413
+2110 945 -0.04272179200188883
+3481 945  36.53926528372666
+3510 945 -13.77354529336417
+3597 945 -48.99930559793759
+903 946 -1.586867364738098
+946 946 -0.02987711852991685
+947 946  0.6596703231013034
+948 946  0.6964352973578405
+949 946  0.4923049138822416
+950 946  0.715978620671633
+951 946  0.3935095664336485
+1146 946 -2.906373852695765e-08
+1147 946 -0.04607441940211257
+1148 946 -0.05122786494849402
+1149 946  1.231334647994187e-07
+1150 946 -0.05602966115093699
+1151 946  1.743433537815253e-07
+1381 946 -0.9451250300389605
+1552 946  0.2291891292739927
+1625 946 -0.01947378168904091
+1626 946  0.1285136378673143
+1797 946  1.832449394568724
+1866 946 -0.5146229196552202
+1867 946 -0.54293430207528
+1868 946 -0.3842289985371204
+1869 946 -0.5575412851129458
+1870 946 -0.3068323659627205
+1871 946  1.159453727321497
+2042 946 -0.2389235313648264
+2111 946  0.03591805543649509
+2112 946  0.03987489423752329
+2113 946  0.02326647438088502
+2114 946  0.04358377996383721
+2115 946  0.01588572902323566
+2116 946 -0.1618170065455398
+3205 946  0.01772566600763419
+3207 946 -20.90760235760421
+3483 946 -8.316362646922366
+3485 946  2.273733985119764e-08
+3487 946 -9.180640498511629
+3489 946  2.421638262140036e-08
+3491 946 -5.954288032698193
+3493 946  190.9954491375482
+3495 946 -9.780438926304546
+3497 946  2.303789606306061e-08
+3499 946 -11.54246641257937
+3621 946 -0.09396492400282885
+3622 946 -30.14346884520325
+947 947 -1.868071070429303
+1146 947 -8.043442764615882e-08
+1147 947  0.130474703738788
+1866 947  1.457322172487891
+2111 947 -0.1017136559236855
+3483 947  23.55048762236492
+3485 947 -6.438807310937822e-08
+3493 947 -8.316362646922366
+948 948 -1.574413538600893
+1146 948 -6.474805136491568e-08
+1148 948  0.1158095295205662
+1867 948  1.227397748220299
+2112 948 -0.09014415779328049
+3487 948  20.75444140780469
+3489 948 -5.474532364935225e-08
+3493 948 -9.180640498511629
+946 949  0.07978182835732844
+949 949 -1.314617609442261
+1146 949 -7.593639771152905e-08
+1149 949 -3.288068006757072e-07
+1868 949  1.026019024575663
+2113 949 -0.06212921315277567
+3491 949  15.89992816678108
+3493 949 -5.954288032698193
+950 950 -1.527117243615793
+1146 950 -6.107598557458793e-08
+1150 950  0.1195061685183861
+1869 950  1.189184824715845
+2114 950 -0.0929602365253548
+3493 950 -9.780438926304546
+3495 950  20.86078733092048
+3497 950 -4.913775505932527e-08
+951 951 -2.315948119036116
+1146 951 -1.652445292266158e-07
+1151 951 -1.026074838983426e-06
+1625 951  0.1146103472959883
+1870 951  1.805821005194248
+2115 951 -0.09349334143083275
+3493 951 -11.54246641257937
+3499 951  67.93165662379741
+932 952 -0.05265194140229647
+952 952 -0.02840317316188208
+1132 952 -0.01738224984325668
+1145 952  0.01703428079016718
+1152 952  6.841825883618924e-09
+1164 952 -0.004704297928304631
+1351 952  0.3736626954944669
+1374 952  0.346250946841912
+1382 952 -0.2375980214699439
+1619 952 -0.04654604823349623
+1627 952  0.03516906518592851
+1841 952 -0.371658867524124
+1864 952 -0.3251088204486319
+1872 952  0.2021244230605367
+2086 952  0.04848274704887379
+2109 952  0.04488809553479817
+3385 952 -6.505758821695456
+3481 952 -13.77354529336417
+3510 952  32.66632580305982
+3597 952 -38.73670699458928
+953 953 -0.05165212370154924
+979 953 -0.2698288575970699
+1000 953  0.05408041747556818
+1143 953 -0.02992823809043116
+1153 953 -9.502885811585315e-09
+1179 953 -0.01434635432205444
+1200 953 -0.008383946400232767
+1210 953 -0.03090332412050151
+1226 953 -0.01685058071239737
+1369 953 -0.3396256574061067
+1383 953  0.5039010794292205
+1384 953  0.2953161655653164
+1385 953 -0.4316172727288611
+1386 953 -0.3071829453370418
+1387 953 -0.3757659547701289
+1614 953  0.04440377057660186
+1628 953 -0.01971558690748688
+1629 953 -0.04126530553706707
+1631 953  0.03634845233500034
+1632 953  0.04350500092493118
+1859 953  0.3592193746505485
+1873 953 -0.4287950305689273
+1875 953  0.4350354735110427
+1876 953  0.3154316116230693
+1877 953  0.3861958328991782
+2104 953 -0.04149255251807921
+2118 953  0.01488552126604759
+2119 953  0.03303831593886034
+2121 953 -0.04190662314365428
+2122 953 -0.05038154112818095
+3459 953 -8.763955681316189
+3516 953 -6.848370394905818
+3521 953 -18.59353340932061
+3522 953  38.11359574347498
+3531 953 -0.8988235757749945
+3533 953  0.08766147326692564
+3865 953 -7.868079227295585
+3944 953 -6.201999031867833
+954 954 -0.7047014242829946
+1153 954 -0.2507418162491978
+1154 954  0.08773798910582717
+1201 954  0.4246988966920202
+1383 954 -1.221909365821883
+1628 954  0.0478083125406707
+1873 954  1.039784761849569
+1878 954  0.6326150413526075
+2118 954 -0.03609588983362996
+2123 954 -0.07880583325813648
+3522 954 -0.8988235757749945
+3531 954  30.68956309077988
+3533 954 -1.802922966476217e-08
+3812 954 -16.8657683503129
+955 955 -0.8318326858768657
+1101 955  0.009158641086069189
+1155 955  0.1071151913100275
+1257 955 -0.0254465288909706
+1304 955 -1.125276871701074
+1549 955  0.1517955769952752
+1794 955  1.051338796050469
+1879 955  0.7717420244337035
+2039 955 -0.1368037471308435
+2124 955 -0.103164844124989
+3201 955 -8.12408466218251
+3535 955  38.14142046839715
+3537 955  3.591110811496101e-09
+3567 955 -15.07976193972747
+934 956 -0.08659249488156374
+956 956  0.377331040401673
+1156 956 -1.694234696980601e-09
+1193 956  0.08504754845844564
+1275 956  0.05001632315880056
+1355 956 -0.6725972680232485
+1391 956  0.3360771639281279
+1600 956  0.08327008401522945
+1635 956 -0.05101371158298227
+1636 956 -0.0477704058819864
+1845 956  0.6485153191009497
+1880 956 -0.3524048448936835
+1881 956 -0.3359593223602985
+2125 956  0.0519531882185968
+2126 956  0.04690068187393769
+3374 956 -4.711059498030628
+3397 956 -0.01449250614827455
+3402 956 -2.476272106586077
+3539 956  17.34714737161059
+3730 956 -5.773304655979821
+957 957 -0.06150550344214045
+1017 957 -0.07126566929859467
+1037 957  0.4596955190523657
+1157 957  1.653187683969826e-08
+1217 957  0.06750021622713791
+1237 957 -0.07867953448799736
+1392 957 -0.5244507842233862
+1393 957 -0.4599292057141804
+1638 957  0.06526541533640681
+1882 957  0.5237947129566466
+3549 957  0.03597745704809928
+3550 957 -13.12797495416852
+3552 957 -13.92756187672575
+3554 957  19.29898377752927
+958 958 -0.5079389600530799
+1032 958  0.06996802127804672
+1158 958  4.097325129182039e-08
+1232 958 -0.03577015867815656
+1394 958 -0.5159715744957702
+1395 958  0.4404116309763916
+1639 958  0.07280632131793441
+1640 958 -0.05761232611155925
+1884 958  0.5154078088312681
+2129 958 -0.07622870906823133
+3560 958  32.31857328996801
+3562 958 -15.91728652624134
+3757 958  0.02493853014115072
+3758 958 -14.5168341615374
+944 959 -0.455735424632883
+959 959  0.05983512144077003
+1144 959  0.1506769620921335
+1159 959 -1.332591936797911e-08
+1201 959 -0.1571053939386667
+1396 959 -0.4764363681173263
+1617 959  0.06205757831290015
+1862 959  0.4265116186182513
+1886 959  0.425337444217761
+2107 959 -0.05377868191093232
+2131 959 -0.05429826449104555
+3467 959 -9.088036752899102
+3565 959  15.38951040896466
+3812 959 -7.201942460989617
+960 960 -0.7334879158217498
+1057 960  0.6869492137454632
+1160 960  0.09355727171015052
+1257 960  0.08784149961682572
+1258 960 -0.1963524920199438
+1398 960  0.5990913842872198
+1642 960 -0.1210230804289373
+1643 960 -0.08258340338438294
+1888 960 -0.6450813979793052
+2132 960  0.1300666015536589
+3567 960 -13.87451822624953
+3568 960  41.10746761295785
+3574 960  3.964404998724191e-09
+4047 960 -31.06353440087113
+961 961  0.0579584615013995
+1082 961  0.5430535951303894
+1161 961  3.124692560252385e-09
+1164 961 -0.6070648610656701
+1282 961 -0.5414818109862019
+1399 961 -0.3915200345225039
+1645 961  0.1050418924830158
+1889 961  0.3156322845396378
+2134 961 -0.04478280109740496
+2135 961 -0.1048558318511552
+3577 961  32.30342970287563
+3579 961  0.06933509766831331
+3580 961 -14.41765509210598
+3597 961 -35.50410878617848
+923 962 -0.1106532901568742
+962 962  0.013588288313204
+963 962  0.9725210198756302
+1123 962  0.05713966978684382
+1162 962 -1.126953350777393e-08
+1163 962  1.889614750227864e-07
+1308 962  1.262649342413203
+1338 962  0.8386535165213083
+1553 962 -0.1737444943984078
+1798 962 -1.269291682288203
+1828 962 -0.8410685054311342
+1891 962 -0.6275235799788761
+2043 962  0.1559287939226689
+2073 962  0.1027874024856677
+2136 962  0.02534124008545374
+3205 962 -0.008284771852577529
+3207 962 -7.74755303268057
+3333 962 -6.993239083401045
+3583 962 -2.106648198861156
+3585 962  34.94783322085215
+962 963 -0.02818946822305024
+963 963 -2.017535229907724
+1162 963 -5.457841986689083e-08
+1163 963 -3.92008150940093e-07
+1891 963  1.301823718285398
+2136 963 -0.05257145459795929
+3583 963  4.37032937863825
+3585 963 -2.106648198861156
+945 964 -0.1462419497352594
+952 964  0.0995448400328353
+961 964 -0.1127382359726241
+964 964 -0.02421187475492601
+965 964 -0.06341054135910112
+966 964  0.5198404247302832
+967 964  0.5553092879536204
+968 964  0.5186307214601972
+969 964  0.649415218994155
+970 964  0.6812081861155168
+971 964  0.7223443830121247
+1056 964 -0.8389463619039152
+1145 964 -0.05970019195980614
+1152 964  0.01861759887113656
+1161 964  0.8923905638513359
+1164 964 -3.875374576800539e-09
+1165 964  6.066692409545382e-08
+1166 964  4.697380394380613e-07
+1167 964  1.510357943157459e-07
+1168 964  3.067324171046604e-07
+1169 964  1.042574984955191e-07
+1170 964  7.332883339428165e-08
+1171 964  3.852382979979696e-08
+1256 964 -0.4280655451173339
+1375 964  0.9977461831798213
+1382 964  0.8327117855650359
+1399 964  0.7615674553221563
+1402 964  0.4637568782856321
+1627 964 -0.1232573187539455
+1648 964 -0.009769119110653198
+1650 964 -0.01510224391735361
+1651 964 -0.02666374087039809
+1652 964 -0.03084309267107718
+1653 964 -0.03084162623578303
+1654 964  0.1326396128473568
+1865 964 -0.796934913498078
+1872 964 -0.7083871666596919
+1889 964 -0.6139539603574344
+1892 964 -0.3613580899024239
+1893 964 -0.4107864203962353
+1894 964 -0.4374549805006661
+1895 964 -0.4089115281925863
+1896 964 -0.5116989493650622
+1897 964 -0.5365945143548635
+1898 964 -0.5685854447490166
+1899 964  0.9351913725744683
+2110 964  0.1137301358193506
+2134 964  0.08710952407721209
+2137 964  0.05024109366902035
+2138 964  0.01207874962159066
+2139 964  0.02101357146535312
+2140 964  0.01521306273357519
+2141 964  0.02305717637758508
+2142 964  0.02625275857880516
+2143 964  0.02765977036275495
+2144 964 -0.1454153226254008
+3481 964 -48.99930559793759
+3510 964 -38.73670699458928
+3577 964 -35.50410878617848
+3589 964 -96.92585106702003
+3591 964 -25.68677687081435
+3595 964 -72.88325048851985
+3597 964  659.1059785088241
+3599 964 -34.32300893287451
+3603 964 -66.56481089993019
+3607 964 -75.29053380965105
+3611 964 -57.22136371310454
+3615 964 -81.80499628897144
+965 965  0.1535310519658627
+1164 965 -3.308903712140321e-08
+1165 965 -1.468892856237858e-07
+1402 965 -1.12285875271712
+1892 965  0.8749284659928406
+2137 965 -0.1216448842352145
+3589 965  234.6790878010875
+3597 965 -96.92585106702003
+966 966 -3.296259100425206
+1164 966 -2.257490022694597e-07
+1166 966 -2.978564379918502e-06
+1648 966  0.06194506283026378
+1893 966  2.604757945218924
+2138 966 -0.07659021204936765
+3591 966  162.8774429316563
+3597 966 -25.68677687081435
+964 967  0.08806168674949939
+967 967 -2.019730940286236
+1164 967 -6.077346655086746e-08
+1167 967 -5.493372374765215e-07
+1894 967  1.591079742886095
+2139 967 -0.07642904841532995
+3595 967  265.0857129998892
+3597 967 -72.88325048851985
+968 968 -2.498229903851033
+1164 968 -1.333883210691278e-07
+1168 968 -1.477521131554305e-06
+1650 968  0.07274709308264668
+1895 968  1.969715571194801
+2140 968 -0.07328090426879212
+3597 968 -34.32300893287451
+3599 968  165.3329967568115
+969 969 -1.985602696417218
+1164 969 -4.7262267141579e-08
+1169 969 -3.187709719520981e-07
+1651 969  0.08152503085904574
+1896 969  1.564531880214944
+2141 969 -0.07049787292944913
+3597 969 -66.56481089993019
+3603 969  203.5235180873758
+970 970 -1.973620237480235
+1164 970 -4.1833332076191e-08
+1170 970 -2.124470280806534e-07
+1652 970  0.08935968933261426
+1897 970  1.554640438029102
+2142 970 -0.07606041248016723
+3597 970 -75.29053380965105
+3607 970  218.1343768118298
+971 971 -3.13745961756631
+1164 971 -9.238899598140726e-08
+1171 971 -1.67326591443695e-07
+1653 971  0.1339587586344098
+1898 971  2.469616866953718
+2143 971 -0.1201385579859136
+3597 971 -57.22136371310454
+3611 971  248.537576753649
+972 972 -0.07373932525269625
+1146 972  0.2145362022621393
+1172 972  0.07265746990450016
+1288 972 -0.1110170413597872
+1381 972  0.361952958820202
+1410 972 -0.5844299652988821
+1626 972 -0.0492166538779582
+1871 972 -0.4440340630930363
+1900 972  0.5839521479322916
+2116 972  0.06197078951995561
+3493 972 -30.14346884520325
+3621 972 -4.288324972900881e-10
+3622 972  21.87834180608649
+4089 972 -10.63078006025426
+973 973 -0.3961258208756168
+974 973  0.3551605612820263
+975 973  0.2921170861291708
+976 973  0.4746237402281227
+977 973  0.4514386490354154
+1006 973  1.238869862237301
+1055 973  1.07765260038838
+1173 973  4.923127752132217e-07
+1174 973 -2.21697098447271e-06
+1175 973 -7.175218402766248e-06
+1176 973 -1.474047813920798e-06
+1177 973 -1.809620373771637e-06
+1206 973 -0.189558782352581
+1255 973 -0.1324205549928908
+1416 973 -0.8761486615418773
+1656 973 -0.02645413337217794
+1657 973 -0.01571989566011668
+1658 973 -0.03612576647010336
+1659 973 -0.02777304520351097
+1661 973  0.1233417700929442
+1901 973 -0.2795935336492826
+1902 973 -0.2187915238870602
+1904 973 -0.374371261515384
+1905 973 -1.017068720550362
+2146 973  0.02143184395432529
+2147 973  0.01298076963505745
+2148 973  0.02872026982551443
+2149 973  0.02216543630854258
+2150 973  0.1551422975518752
+3623 973 -30.40657760133354
+3627 973 -12.37263278910001
+3631 973 -67.25923254959042
+3632 973  501.9756250912903
+3635 973 -43.88173563577028
+3639 973 -76.30268879794829
+3641 973  0.1608647615085388
+3644 973 -88.2008696096471
+3646 973 -0.2169860792325076
+974 974 -1.489946307075277
+1173 974  2.078042610764719e-06
+1174 974  9.300491833896807e-06
+1656 974  0.1109786463408995
+1901 974  1.172932465922301
+2146 974 -0.08990946696979601
+3623 974  127.5596980972658
+3632 974 -30.40657760133354
+975 975 -1.862462882642613
+1173 975  3.157835434829115e-06
+1175 975  4.574734240525569e-05
+1657 975  0.1002259832656134
+1902 975  1.39495808915565
+2147 975 -0.08276202516527353
+3627 975  78.88474093831816
+3632 975 -12.37263278910001
+973 976  0.8847241794276495
+976 976 -1.060044755936421
+1173 976  1.106372518294707e-06
+1176 976  3.292199911797411e-06
+1658 976  0.08068481631873459
+2148 976 -0.06414506658049314
+3631 976  150.2196223428273
+3632 976 -67.25923254959042
+977 977 -1.23044462799305
+1173 977  1.350166642877237e-06
+1177 977  4.932317047678936e-06
+1659 977  0.07569842401993218
+1904 977  1.020389168253177
+2149 977 -0.06041428240856869
+3632 977 -43.88173563577028
+3635 977  119.6044007221758
+978 978  0.4424042011090423
+1041 978  0.06144026122959138
+1178 978 -1.812938804635067e-08
+1198 978  0.02020512578173389
+1241 978 -0.07632676179310438
+1417 978 -0.6175792065318458
+1418 978 -0.4393579796250129
+1662 978  0.07945126249495234
+1907 978  0.5343612663363052
+2152 978 -0.07522703272150684
+2153 978 -0.05571639966019448
+3652 978  18.42568916669017
+3653 978 -7.549143391656187
+3782 978 -5.091800367100356
+979 979  0.5102169475088717
+1153 979  0.0247054716909727
+1179 979 -1.45358751424407e-08
+1276 979 -0.2527625769661221
+1384 979 -0.5584106677342894
+1419 979  1.458939452040597
+1420 979 -0.4069131581499621
+1629 979  0.07802819319119383
+1664 979 -0.03732647943580095
+1665 979  0.05514218346051624
+1909 979 -1.369672770763905
+1910 979  0.4084263136180201
+2119 979 -0.06247185293402092
+2154 979  0.01230778581599272
+2155 979 -0.05411446430890252
+3516 979  25.87594842746113
+3522 979 -6.848370394905818
+3663 979 -2.606267858998225
+3665 979  0.08044888079890554
+3906 979 -10.4263159895623
+980 980 -0.6095818027697771
+1179 980 -0.1494464850449572
+1180 980  0.08236427128493604
+1293 980  0.191146221601152
+1419 980 -1.713575067511206
+1664 980  0.04384124675612543
+1909 980  1.608728249378172
+1911 980  0.5163436862281057
+2154 980 -0.01445592199255047
+2156 980 -0.07686249969756757
+3516 980 -2.606267858998225
+3663 980  30.88690458402179
+3665 980 -6.531306342982823e-09
+3979 980 -50.78984921359537
+981 981  0.7571891954848207
+1181 981  6.922867834546764e-11
+1244 981  0.1277397341432216
+1284 981 -0.2426662636331511
+1356 981 -0.7881853230597796
+1422 981 -0.5279378228544918
+1423 981 -0.4465921770040591
+1601 981  0.1058760358195188
+1667 981  0.07447310375085667
+1668 981  0.05896090325177052
+1912 981  0.5324544811476037
+1913 981  0.4419162630302035
+2091 981 -0.1074855510138141
+2157 981 -0.06679631775422318
+2158 981 -0.06482153965237164
+3397 981  0.01332766609263919
+3402 981 -2.488153159316454
+3404 981  21.72145690088892
+3838 981 -8.173102293481225
+4005 981 -10.74205319605701
+982 982 -0.4938069079659762
+1011 982  0.07604924015919289
+1141 982 -0.447186593064992
+1182 982 -0.06846566225879751
+1211 982  0.332620942724838
+1365 982  0.4919493884250233
+1424 982 -0.5594355361631519
+1914 982  0.5029955255009219
+2100 982  0.06253236404377792
+2159 982 -0.06227853779439045
+3440 982  14.90110246263578
+3441 982  1.077567965412918e-09
+3446 982 -7.190162226521366
+3677 982 -6.815356665004987
+983 983 -0.5443743324327373
+1018 983  0.5635642781039163
+1137 983 -0.03261242339132064
+1183 983 -0.08418167880156238
+1218 983  0.08560093037836217
+1358 983  0.508985204526428
+1425 983 -0.5603093540142886
+1603 983 -0.07890804660218982
+2093 983  0.08403462925030836
+3407 983 -4.462659499865555
+3412 983  21.85723317143601
+3680 983 -14.87328100877918
+3681 983  0.01520725110591731
+3682 983  1.371140667671966e-08
+984 984  0.4708669091657322
+1105 984  0.03052418138322882
+1184 984 -0.07142707696284654
+1199 984  0.0299746246289474
+1240 984 -0.1517471607395
+1291 984  0.03333223564354496
+1310 984  0.5301531225102935
+1426 984 -0.5222444421836163
+1428 984 -0.473560945926676
+1555 984 -0.07868915532003624
+1671 984  0.06925873775509218
+1673 984  0.07049701051212577
+1800 984 -0.5251447644432619
+1916 984  0.5068312762468304
+1917 984 -0.4207481970090967
+1918 984  0.4497976392777369
+2045 984  0.07692669227529296
+2161 984 -0.06572612318307269
+2162 984  0.06335201956550504
+2163 984 -0.0653373636694603
+3215 984 -6.201273818830178
+3250 984 -5.509137579435532
+3687 984  34.35823096932663
+3689 984  4.56977637675049e-09
+3767 984 -12.33309367358089
+3789 984 -5.930212014505986
+985 985  0.04345138635311043
+1047 985 -0.3291856606887862
+1185 985 -4.681742493684027e-10
+1186 985 -0.06849071274544034
+1247 985  0.301148117794046
+1429 985  1.030575023275442
+1430 985  0.2717664094081579
+1674 985 -0.02747275796180583
+1675 985 -0.0345488517344297
+1919 985 -1.145483220005211
+2164 985  0.01404202717338338
+3700 985 -169.6380530198015
+3702 985  40.79460611168166
+3707 985 -2.389320265572053
+986 986  0.5500829381246285
+1185 986  0.04645384761318282
+1186 986  4.608089770874457e-09
+1248 986 -0.07163724688851204
+1274 986 -0.0811542494651884
+1292 986  0.0005819686631925447
+1429 986 -1.032621098489623
+1431 986 -0.4083268764891531
+1433 986 -0.4948905454048655
+1674 986  0.02752730161739742
+1676 986  0.05989119208712813
+1678 986  0.06634446170697339
+1919 986  1.147757430782475
+1921 986  0.4085753213662854
+1922 986 -0.4922431501793627
+1923 986  0.5176954413573776
+2164 986 -0.01406990582666666
+2166 986 -0.05737989255880681
+2167 986  0.070036883103212
+2168 986 -0.0706577625688625
+3702 986 -2.389320265572053
+3707 986  51.57946710948104
+3709 986  0.003935685128377539
+3995 986 -21.79919967537484
+4035 986 -14.33758586199391
+4137 986 -13.33511322562891
+987 987 -0.03119596209267699
+988 987  0.3582392973900931
+989 987  0.3692986362671331
+990 987  0.4209233628580787
+993 987 -0.1144339936435742
+1086 987 -0.7325809744992411
+1187 987 -4.496077599114301e-08
+1188 987  1.11954899111727e-07
+1189 987  2.108572429437849e-07
+1190 987 -0.03967666278728941
+1193 987 -0.1969994537259558
+1286 987  0.1010252017704575
+1437 987 -0.6561496372500165
+1438 987  0.7655516692341778
+1679 987 -0.01555667362303796
+1682 987  0.09217929587242674
+1683 987 -0.09262370563918518
+1924 987 -0.3225737686846976
+1925 987 -0.3307276363338796
+1926 987 -0.3794252839956596
+1927 987  0.7717184399373285
+2169 987  0.01198742445782478
+2170 987  0.02344637301136434
+2171 987  0.02969939062660183
+2173 987  0.1187456445424696
+3715 987 -2.961498845389432
+3719 987 -3.922227251349367
+3721 987  93.64924459438463
+3723 987 -4.914519484363213
+3725 987  1.323586146240174e-07
+3730 987 -15.03422373491804
+3732 987 -23.02209569566427
+988 988 -5.202909164506705
+1187 988 -6.398415768466004e-07
+1188 988 -1.625983356512428e-06
+1679 988  0.2259382495226564
+1924 988  4.684918794633313
+2169 988 -0.1741000527436083
+3715 988  43.01152805242423
+3721 988 -2.961498845389432
+987 989  0.07324325135033663
+989 989 -0.8670555746636077
+1187 989 -1.044842086184161e-07
+1189 989 -4.95059458760494e-07
+1925 989  0.7764968852232117
+2170 989 -0.05504842538990734
+3719 989  9.208778320999443
+3721 989 -3.922227251349367
+990 990 -0.8154789505427221
+1187 990 -8.621436850542175e-08
+1190 990  0.07686787236308722
+1926 990  0.735082344446817
+2171 990 -0.05753833129026276
+3721 990 -4.914519484363213
+3723 990  9.521180709967734
+3725 990 -2.564258991810675e-07
+991 991 -0.02450778911447643
+992 991  0.6367155082099926
+1089 991  0.8596786811959881
+1109 991 -0.1725061393317366
+1125 991 -0.1090224204334647
+1191 991 -1.026919055346909e-08
+1192 991  4.564837356724194e-08
+1289 991 -0.134708500073735
+1321 991  0.8837282746556666
+1341 991  0.5914570612494344
+1440 991 -0.7678220254674442
+1566 991 -0.1140293348347746
+1586 991 -0.07517102493073879
+1685 991  0.1159096366071837
+1811 991 -0.9082411774006912
+1831 991 -0.5958231660939404
+1929 991 -0.5615617669201883
+2056 991  0.1250986076557095
+2076 991  0.07472243017192762
+2174 991  0.02367803215021713
+3251 991 -7.289318276733248
+3339 991 -8.629261156606795
+3735 991 -22.79820814800702
+3737 991  97.14497503871327
+3740 991 -27.15561296572336
+3742 991  0.1077273281804682
+991 992  0.1286813403240229
+992 992 -3.343157745435155
+1191 992 -1.02579299221528e-07
+1192 992 -2.396823412520455e-07
+1929 992  2.948553233605713
+2174 992 -0.1243245932586171
+3735 992  119.7049698625637
+3737 992 -22.79820814800702
+956 993 -0.3067285442619483
+993 993  0.04015905194614094
+1156 993 -0.08343546006029678
+1187 993  0.09349931199942807
+1193 993 -6.511629457861545e-09
+1437 993  0.2302667811178378
+1635 993  0.04146852449400996
+1682 993 -0.03234906878134064
+1880 993  0.2864662948217338
+1927 993 -0.2708240788464196
+2125 993 -0.04223221544427847
+3539 993 -5.773304655979821
+3721 993 -15.03422373491804
+3730 993  9.969116450250086
+994 994 -0.05307425527674776
+1194 994 -2.743272645155947e-08
+1220 994 -0.8499437985630234
+1241 994  0.04822162698070831
+1286 994  0.4053180506262709
+1441 994  0.3903679836850418
+1442 994 -0.3268708215805732
+1443 994 -0.4480493340962357
+1686 994 -0.04713547548536468
+1687 994  0.04078336525166011
+1688 994  0.06606790111415485
+1931 994 -0.3911547364780136
+1932 994  0.3510617949962554
+1933 994  0.4295324887597057
+2176 994  0.05251609088198397
+2178 994 -0.06042534814756716
+3653 994 -7.152105470943633
+3732 994 -8.292682942672654
+3750 994  21.03102825235615
+3902 994 -8.568402243341714
+995 995 -0.06562451486991176
+1098 995 -0.452566586299552
+1158 995 -0.09853840405478623
+1195 995  0.06444976742378884
+1298 995  0.1249966020337667
+1394 995  0.5043002560591464
+1639 995 -0.07115943648492561
+1884 995 -0.5037492428192337
+1934 995  0.4518410984309186
+2129 995  0.07450440954956594
+3560 995 -14.5168341615374
+3755 995 -14.30197427870374
+3757 995  1.223172137188033e-08
+3758 995  27.40363140673261
+996 996 -0.5780567342368751
+997 996 -0.03806768353539278
+1196 996  0.0800375428343096
+1197 996  0.230729451368615
+1221 996 -0.2752007773679826
+1445 996  1.610329947831114
+1935 996 -1.599279391917784
+1936 996  0.4789577056707151
+2180 996  0.02025137186317635
+2181 996 -0.06052909694974323
+3761 996 -4.558549752153652
+3763 996  29.54453213187533
+3765 996  9.116542722120702e-09
+3929 996 -36.15688796617417
+997 997  0.03833691442155994
+1040 997 -0.5676654342824057
+1197 997  2.309806734768216e-08
+1240 997  0.2907198369401517
+1445 997 -1.621718887703775
+1692 997  0.07297260077582075
+1935 997  1.610590177548189
+1937 997  0.5657348666913733
+2180 997 -0.0203945981981267
+2182 997 -0.08114410763163431
+3761 997  20.92833324870006
+3763 997 -4.558549752153652
+3765 997 -0.1274747229578279
+3767 997 -10.1426793564074
+998 998 -0.07543502736476798
+1139 998 -0.07397943535962337
+1178 998 -0.03046147444904562
+1198 998 -1.121049961194664e-09
+1272 998  0.3056678326553257
+1277 998  0.08491056660704932
+1360 998  0.5428701937830359
+1417 998  0.3482442751733312
+1448 998 -0.365279863502515
+1449 998 -0.4082845072120672
+1450 998 -0.4687418217907903
+1605 998 -0.08034074986653671
+1662 998 -0.04480145546761378
+1693 998  0.04813127346458962
+1694 998  0.0550827407381792
+1695 998  0.06183666269902745
+1850 998 -0.5056400055975701
+1907 998 -0.3013188428428641
+1938 998  0.3974234470529114
+1939 998  0.4422425294985197
+1940 998  0.5317541664847034
+2095 998  0.0717417324410374
+2152 998  0.04241947139162745
+2183 998 -0.04568621642826008
+2184 998 -0.05462691338373406
+3424 998 -9.907975784566792
+3579 998 -0.03690628655442891
+3580 998 -9.332991521575286
+3652 998 -5.091800367100356
+3782 998  37.7044366726637
+4085 998 -7.355302891262144
+4105 998 -7.492541902601887
+999 999  0.03207522870468611
+1008 999  0.5183688055136251
+1105 999 -0.0186874934802756
+1199 999  8.747625068750153e-09
+1208 999 -0.08159194337121471
+1215 999  0.02977237990573015
+1311 999  0.3534179161138158
+1426 999  0.4052407270632351
+1452 999 -0.2893683449568387
+1453 999 -0.4394391630156717
+1556 999 -0.05126169311480969
+1671 999 -0.05374200082628659
+1698 999  0.06534871961755113
+1801 999 -0.3400290471461453
+1916 999 -0.393280728897523
+1941 999 -0.4507276981705915
+1942 999  0.287922613699735
+1943 999  0.4523778897162504
+2046 999  0.04749359159231554
+2161 999  0.05100083369846863
+2186 999  0.06983187835467947
+2187 999 -0.03256016366442265
+2188 999 -0.06846325693408806
+3215 999 -5.02376469529283
+3687 999 -5.930212014505986
+3689 999 -0.009006681541102603
+3783 999 -13.41847869727119
+3785 999  0.003480874327946859
+3789 999  34.69934723091026
+3880 999 -8.461387995206527
+3984 999  0.001400699084717696
+3988 999 -10.3806402044065
+943 1000 -0.4083493179864217
+953 1000  0.06036516740083515
+1000 1000 -0.0632030828564326
+1002 1000  0.6456696194137627
+1003 1000  0.6490872503986104
+1143 1000 -0.02941992453676529
+1153 1000  0.01312118441246557
+1200 1000 -9.116374766193758e-09
+1201 1000  0.06631071370625427
+1202 1000  0.008696076442513374
+1203 1000  0.009220007761798222
+1226 1000 -0.0187976386654834
+1385 1000  0.5044255115610694
+1454 1000  0.679218643726438
+1457 1000 -0.4983302017793433
+1615 1000  0.05394914494757658
+1699 1000 -0.02138310187554149
+1702 1000  0.05434191170091966
+1860 1000  0.4299072250755208
+1875 1000 -0.5084203185048906
+1944 1000 -0.646543619474914
+1945 1000 -0.3733120922080947
+1946 1000 -0.3748931907170733
+1947 1000  0.511463018605014
+2105 1000 -0.0525261724439103
+2189 1000  0.01366621132412578
+2190 1000  0.0188568404948961
+2191 1000  0.01698943114939008
+2192 1000 -0.06524249632027486
+3459 1000 -9.162682012337861
+3521 1000  42.95999314909423
+3522 1000 -18.59353340932061
+3799 1000 -1.782278958330569
+3801 1000  8.321106473307971e-08
+3803 1000 -1.530468730548231
+3805 1000  8.643129645324876e-08
+3812 1000 -1.117305846410916
+3944 1000 -9.164925564623724
+954 1001  0.4168827888937753
+959 1001 -0.05422228334611339
+1001 1001  0.4806889285303752
+1154 1001 -0.05190348185770162
+1159 1001  0.07786890260343965
+1200 1001 -0.03588453104645329
+1201 1001 -2.100605200894279e-08
+1396 1001  0.4317442185526948
+1454 1001 -0.8337078536373765
+1458 1001 -0.4245578636109541
+1699 1001  0.02624671765627676
+1703 1001  0.04834484106772149
+1878 1001 -0.3742383847223765
+1886 1001 -0.3854386330763389
+1944 1001  0.793600850409622
+2123 1001  0.04661945388172894
+2131 1001  0.04920481168154859
+2189 1001 -0.01677460978968741
+2193 1001 -0.06044152600849758
+3521 1001 -1.117305846410916
+3531 1001 -16.8657683503129
+3533 1001 -0.1257509427677388
+3565 1001 -7.201942460989617
+3812 1001  23.38960436680297
+3817 1001 -6.847692983685751
+3818 1001  0.05848026446275859
+1002 1002 -2.531048253677342
+1200 1002 -6.50146078085001e-08
+1202 1002 -0.03408893408002122
+1945 1002  1.463396899358267
+2190 1002 -0.07391949654966468
+3521 1002 -1.782278958330569
+3799 1002  6.986598138490368
+3801 1002 -3.261903830376056e-07
+1003 1003 -1.997280942097913
+1200 1003 -4.92795877993224e-08
+1203 1003 -0.02837052457481739
+1946 1003  1.153569146030313
+2191 1003 -0.0522775128165329
+3521 1003 -1.530468730548231
+3803 1003  4.709345466229321
+3805 1003 -2.659544074967712e-07
+1001 1004 -0.5969054776253586
+1004 1004 -0.07310899636166125
+1201 1004 -0.2362664140746497
+1204 1004  0.07557928868904694
+1458 1004  0.5272035599677525
+1459 1004  0.735678771801283
+1703 1004 -0.06003321219915891
+1949 1004 -0.6821644402533772
+2193 1004  0.07505452239311973
+3812 1004 -6.847692983685751
+3817 1004  18.29460146461613
+3818 1004 -5.676911468688317e-09
+3947 1004 -13.59416149407363
+3950 1004  0.0892421864327288
+1005 1005 -0.03942547846731512
+1027 1005  0.3027390358618913
+1205 1005  1.270442140377526e-09
+1211 1005 -0.07579910748829327
+1227 1005  0.3216355049551655
+1290 1005 -0.1235888174409047
+1460 1005 -0.2843296651791673
+1461 1005 -0.2824496877119697
+1462 1005  0.361444564712929
+1705 1005  0.04056671199071347
+1706 1005  0.03972042410566178
+1707 1005 -0.05221684139379965
+1950 1005  0.2720491980250402
+1952 1005 -0.362096525989742
+2195 1005 -0.03643788065469865
+2197 1005  0.05186623003949892
+3677 1005 -4.223583073177172
+3824 1005 -6.595142687039579
+3826 1005  12.97100574956463
+3869 1005 -6.636577325017829
+1006 1006 -0.3843162680834073
+1173 1006 -0.5102615425771657
+1206 1006  0.05880401649662903
+1296 1006  0.3247141990426317
+1463 1006 -0.438438003559472
+1708 1006  0.06917527995921767
+1905 1006  0.315510181481362
+1953 1006  0.4387320075683089
+2150 1006 -0.0481274996143231
+2198 1006 -0.07005237522168833
+3632 1006 -76.30268879794829
+3639 1006  34.14274349805327
+3641 1006  1.715180802247529e-08
+3896 1006 -8.964500315464161
+1007 1007  1.068221512166234
+1084 1007  0.09247066617765989
+1115 1007 -0.007809535337338039
+1207 1007 -0.1374700328859463
+1284 1007  0.03479583401598968
+1331 1007 -1.272612899536508
+1464 1007  0.685622022908018
+1576 1007  0.1530658956087559
+1709 1007 -0.08242829338295353
+1954 1007 -0.7361807383336668
+3295 1007 -2.93383316576035
+3304 1007  27.62454972865043
+3306 1007  5.579247508435969e-09
+3838 1007 -11.96605332802606
+1008 1008 -0.5784495929786024
+1199 1008 -0.01293493383306837
+1208 1008  0.09104873968379988
+1291 1008  0.01209611832050858
+1465 1008 -0.5364511862384768
+1710 1008  0.08202078818634363
+1941 1008  0.5029686408166937
+1955 1008  0.4560709551496668
+2186 1008 -0.07792564132243848
+2200 1008 -0.06866652243426218
+3250 1008 -9.148268563932652
+3783 1008  23.49395472825631
+3785 1008  2.945946889210127e-09
+3789 1008 -13.41847869727119
+1009 1009 -0.3015768589062842
+1073 1009  0.4686479373193943
+1209 1009  1.582770026153923e-10
+1261 1009 -0.0715165068859599
+1466 1009  0.2358476052144947
+1467 1009 -0.4668956305239359
+1711 1009 -0.03869333526436093
+1712 1009  0.06831378599514144
+2201 1009  0.04790427095168613
+2202 1009 -0.06731177696858127
+3844 1009  17.34359327006798
+3848 1009 -8.20501197638737
+4065 1009 -23.5793179741662
+4110 1009  0.01064840327673766
+1010 1010 -0.09003421014280026
+1012 1010  0.4334726720192261
+1013 1010  1.10546489936218
+1153 1010  0.04849641452178344
+1210 1010 -1.220871292950676e-08
+1211 1010 -0.2105201766865411
+1212 1010 -0.030243626830779
+1213 1010 -0.006504078012125597
+1226 1010  0.03354622133540253
+1386 1010  0.5073715942942281
+1468 1010  0.8859027431941622
+1471 1010  0.6066026005909084
+1631 1010 -0.06003644567930714
+1713 1010 -0.0356622993406167
+1876 1010 -0.520995849897842
+1958 1010 -0.8761333273285709
+1959 1010 -0.2894253737565123
+1960 1010 -0.689967708515946
+1961 1010 -0.6067370542914696
+2121 1010  0.06921683159380579
+2203 1010 -0.007089990482464425
+2204 1010  0.03061910767275307
+2205 1010  0.01318357808761143
+2206 1010  0.08946924016864499
+3522 1010 -7.868079227295585
+3677 1010 -1.57272281540292
+3855 1010 -4.664019165692916
+3857 1010  2.310851920234169e-08
+3859 1010 -3.154571903054211
+3861 1010  5.20346455368248e-08
+3865 1010  58.76200518316558
+3944 1010 -27.0139401100094
+1011 1011 -0.06563573978769766
+1090 1011  0.0501196804098229
+1205 1011  0.05204211481294618
+1210 1011  0.08531786875140286
+1211 1011 -3.814564220050443e-10
+1290 1011  0.005776489288478071
+1424 1011  0.4828314550248289
+1460 1011  0.3898068756189718
+1468 1011 -0.7247989127679281
+1472 1011 -0.3567726280354734
+1705 1011 -0.05561566446213129
+1713 1011  0.02917701292546772
+1914 1011 -0.4341198328483751
+1950 1011 -0.3729707479870724
+1958 1011  0.716806091826627
+1962 1011  0.372176173311343
+2159 1011  0.05375067380652562
+2195 1011  0.04995516877647897
+2203 1011  0.005800656373065214
+2207 1011 -0.05599008922381862
+3440 1011 -6.815356665004987
+3441 1011 -0.05307445544769063
+3677 1011  20.99019374425364
+3826 1011 -4.223583073177172
+3865 1011 -1.57272281540292
+3869 1011 -6.618357553588883
+1012 1012 -2.292588351471605
+1210 1012 -6.996689916327625e-08
+1212 1012  0.1599551507030707
+1959 1012  1.530738344827112
+2204 1012 -0.1619410267687995
+3855 1012  24.66747498250689
+3857 1012 -1.222183139693023e-07
+3865 1012 -4.664019165692916
+1013 1013 -3.052106539486126
+1210 1013 -4.162537098340024e-08
+1213 1013  0.01795727665852618
+1960 1013  1.904949633779228
+2205 1013 -0.03639888061415681
+3859 1013  8.709539069389765
+3861 1013 -1.436638097374043e-07
+3865 1013 -3.154571903054211
+1014 1014 -0.8667371882258156
+1214 1014  0.1222265439314361
+1215 1014  0.01263569687722853
+1231 1014 -0.004918095735968131
+1473 1014  2.989747253335061
+1474 1014  0.7143220365505623
+1718 1014 -0.03612826240957417
+1719 1014 -0.09209370496921034
+1963 1014 -3.114543931331563
+2208 1014  0.005103966248689504
+3876 1014  48.13941450694888
+3878 1014  1.003109723063389e-08
+3880 1014 -3.110095548321061
+3961 1014 -43.51247883455607
+999 1015 -0.06739443255416847
+1015 1015  0.3512208139362644
+1199 1015 -0.04200667912920009
+1215 1015  2.119705658221172e-08
+1230 1015  0.6677127442223171
+1260 1015 -0.4065761617929234
+1452 1015  0.6080023804991873
+1473 1015 -2.129272677610363
+1475 1015 -0.3137213903015109
+1476 1015 -0.3561529571137585
+1718 1015  0.02573024256563223
+1720 1015  0.05294086820243799
+1721 1015  0.05477280767251791
+1942 1015 -0.6049647018408247
+1963 1015  2.218151814941477
+1966 1015  0.4019295688857895
+2187 1015  0.06841334707970535
+2208 1015 -0.003635001543577661
+2210 1015 -0.05938414807119664
+2211 1015 -0.05419810230840651
+3443 1015 -6.75539661299262
+3789 1015 -8.461387995206527
+3876 1015 -3.110095548321061
+3878 1015 -0.004849260550083945
+3880 1015  33.95007189497851
+3993 1015 -5.675171991154909
+1016 1016 -0.04164998212099323
+1096 1016  0.2916024608025469
+1216 1016  2.258379669384247e-08
+1229 1016  0.4471220830932749
+1232 1016 -0.1931972031217511
+1296 1016 -0.191269335220411
+1477 1016  0.4440953835549432
+1478 1016 -0.4182071807007093
+1479 1016 -0.2752074710610874
+1722 1016 -0.06641212160291675
+1723 1016  0.05972800042777431
+1724 1016  0.03854659492344367
+1967 1016 -0.4157020431777712
+1968 1016  0.3861971705223876
+2212 1016  0.0583111945771093
+2213 1016 -0.04705317091092858
+3241 1016 -9.384104209729337
+3562 1016 -8.682516686085181
+3896 1016 -12.35396198776869
+3898 1016  27.70275126489943
+899 1017 -0.7776202985712402
+1017 1017  0.1047517766322093
+1099 1017  0.07145809537069953
+1157 1017 -0.09826669971503904
+1217 1017 -0.09921702332192101
+1300 1017  0.6965288550619865
+1392 1017  0.7708782074770815
+1545 1017 -0.1084279560868184
+1882 1017 -0.769913863334036
+2035 1017  0.1225624806414849
+3180 1017 -4.459108211401092
+3549 1017  1.572755131684644e-08
+3550 1017  30.94442200079127
+3554 1017 -13.12797495416852
+1018 1018 -0.6799840124585417
+1115 1018  0.007667951331426184
+1183 1018  0.1015717247366548
+1218 1018 -0.1032841618434341
+1332 1018  1.009668228367086
+1425 1018  0.6760566940874065
+1577 1018 -0.1378864330242373
+1822 1018 -1.192586743172628
+2067 1018  0.1691007261817176
+3295 1018 -2.838699038257353
+3412 1018 -14.87328100877918
+3680 1018  30.10474762802639
+3681 1018  8.149366514942358e-09
+3682 1018 -0.03487708050989496
+1019 1019 -0.06669039335604635
+1020 1019 -0.01123831756390642
+1076 1019 -0.0533470071697257
+1219 1019 -2.543621846484712e-08
+1220 1019 -0.2054867667943506
+1276 1019  0.06809256171239964
+1285 1019 -0.1159747915130676
+1324 1019  0.4852466322371797
+1480 1019  0.8564968076053606
+1481 1019 -0.3737564443230481
+1482 1019  0.5871755452865656
+1569 1019 -0.06914253723748093
+1725 1019 -0.02279261177659806
+1726 1019  0.03431690856375876
+1814 1019 -0.4782088220647328
+1970 1019 -0.8238314830349427
+1971 1019  0.3951785216481155
+1972 1019 -0.5881819817933323
+2059 1019  0.04078920488167646
+2217 1019  0.0754972172149095
+3280 1019 -10.40260563970382
+3282 1019  0.1059721002756369
+3902 1019 -0.88143794557935
+3906 1019 -6.462935419745066
+3909 1019  59.57679134339177
+4145 1019 -49.72163740391174
+1020 1020  0.0162354147387335
+1144 1020 -1.072294506906227
+1194 1020  0.7790434228378447
+1219 1020  0.1511025029822383
+1220 1020 -5.254733564247216e-08
+1373 1020 -0.4310186947198901
+1441 1020 -0.4616774221684724
+1480 1020 -1.237336533231109
+1618 1020  0.06203758932618613
+1686 1020  0.05574582374646275
+1725 1020  0.03292730456029068
+1863 1020  0.4315768639356127
+1931 1020  0.462607892946107
+1970 1020  1.190146632344225
+2108 1020 -0.06451921651788971
+2176 1020 -0.06210932882324032
+3467 1020 -10.41994192456261
+3750 1020 -8.568402243341714
+3902 1020  23.73361771904704
+3909 1020 -0.88143794557935
+996 1021  0.8354279312537486
+1021 1021  0.1084484784869682
+1022 1021 -3.041422922448816
+1023 1021  0.542397859204706
+1024 1021  0.9285201706573676
+1025 1021  0.9936427272244395
+1196 1021 -0.115673073026255
+1221 1021  1.693937379254606e-08
+1222 1021 -1.614266736074707
+1223 1021  6.278976277807047e-08
+1224 1021  2.808174576207634e-08
+1225 1021 -0.02313004556960633
+1231 1021  0.31496286658781
+1254 1021  0.5123209884216365
+1483 1021  3.79240453316803
+1487 1021 -0.8967784154793547
+1488 1021  0.7922255118762921
+1728 1021 -0.04566720801028241
+1729 1021 -0.008763702119294985
+1730 1021  0.002697403111634132
+1733 1021 -0.1118336807589077
+1936 1021 -0.6922065283691701
+1974 1021 -0.4575882069145712
+1975 1021 -0.7197609326620245
+1976 1021 -0.8325011910731969
+1977 1021  0.8948534463356967
+1978 1021 -0.7101965596901837
+2181 1021  0.08747878062892703
+2218 1021  0.003740381880205069
+2219 1021  0.02107167706267504
+2220 1021  0.01651901814280531
+2221 1021  0.03005520669887832
+2222 1021 -0.1046366912454179
+2223 1021  0.1007269057461446
+3763 1021 -36.15688796617417
+3765 1021  0.1829371321651943
+3912 1021 -7.814753962990203
+3915 1021 -45.85566165593821
+3919 1021 -58.89853015420356
+3923 1021 -52.31118436254501
+3925 1021 -5.971258681958247e-09
+3929 1021  477.2594388157049
+3961 1021 -139.2864013622151
+3966 1021 -99.87961390971721
+1022 1022  1.663057831262543
+1099 1022 -0.04280741885147973
+1107 1022 -0.1107503777044012
+1127 1022 -0.2217680093153695
+1221 1022  0.9741805755638241
+1222 1022  1.79774599651239e-08
+1291 1022 -0.1615941136931558
+1301 1022 -0.5004641350120078
+1314 1022  0.5174114692252539
+1344 1022 -0.5210644672440067
+1483 1022 -2.073696496349991
+1489 1022 -0.4907051457898425
+1546 1022  0.07164048254012781
+1559 1022 -0.07547435194461619
+1589 1022  0.06787409290268773
+1728 1022  0.02497094611631533
+1734 1022  0.06351030020065121
+1791 1022  0.5155283875242916
+1804 1022 -0.51883851294131
+1834 1022  0.5652682652525904
+1979 1022  0.4784631094026429
+2036 1022 -0.07860779169062282
+2049 1022  0.06783840917537932
+2079 1022 -0.07938255615129304
+2218 1022 -0.002045250376681948
+2224 1022 -0.06852574797351926
+3180 1022 -9.416903571706824
+3231 1022 -5.018475605696647
+3250 1022 -7.179419233379014
+3351 1022 -7.148869153419637
+3912 1022  55.1926800422341
+3929 1022 -7.814753962990203
+1023 1023 -4.70085606120625
+1221 1023 -1.302918114509666e-07
+1223 1023 -5.441864154853704e-07
+1729 1023  0.07595329060940451
+1974 1023  3.965827407882584
+2219 1023 -0.1826240999274897
+3915 1023  397.4220503589642
+3929 1023 -45.85566165593821
+1024 1024 -12.32981280576957
+1221 1024 -9.906925613201167e-08
+1224 1024 -3.728986932571843e-07
+1730 1024 -0.03581879691919143
+1975 1024  9.557700354905545
+2220 1024 -0.2193559255602421
+3919 1024  782.1131855642966
+3929 1024 -58.89853015420356
+1025 1025 -4.01982058245716
+1221 1025  2.090432824064514e-08
+1225 1025  0.09357350555324298
+1976 1025  3.367916184667294
+2221 1025 -0.1215895162194106
+3923 1025  211.6269475335122
+3925 1025  2.415609212658154e-08
+3929 1025 -52.31118436254501
+1010 1026  0.05815877671819319
+1026 1026  0.3099939897897582
+1153 1026  0.02710898025635339
+1200 1026  0.01932311150148924
+1210 1026 -0.03439042023821304
+1226 1026 -4.982309767878612e-09
+1227 1026 -0.3481508996189024
+1231 1026  0.0847976154813143
+1387 1026  0.4009167330545462
+1457 1026  0.4549416885390786
+1471 1026 -0.3918428911464509
+1490 1026  0.6215604636346272
+1491 1026 -0.3325996421497863
+1632 1026 -0.04641687896666508
+1702 1026 -0.04961048112152108
+1735 1026 -0.02644511378818943
+1736 1026  0.03183450902391857
+1877 1026 -0.4120447041029439
+1947 1026 -0.4669310599249826
+1961 1026  0.3919297432745195
+1980 1026 -0.5365848376342145
+2122 1026  0.05375367996741477
+2192 1026  0.05956197584346536
+2206 1026 -0.05779382696712341
+2225 1026 -0.01313085696332133
+2226 1026 -0.04857313406685036
+3521 1026 -9.164925564623724
+3522 1026 -6.201999031867833
+3824 1026 -0.8927671556159681
+3865 1026 -27.0139401100094
+3944 1026  35.39663991841751
+3961 1026 -6.462929117644935
+1005 1027  0.06474071332200976
+1027 1027 -0.4971288084268892
+1205 1027 -0.254470154000132
+1226 1027  0.1585995473755764
+1227 1027 -1.642452673866757e-09
+1461 1027  0.46381160028816
+1490 1027 -0.9429319477336707
+1706 1027 -0.06522504456566502
+1735 1027  0.04011828954905043
+1980 1027  0.8140205429349932
+2225 1027  0.01992003233190689
+3824 1027  12.18426972110153
+3826 1027 -6.595142687039579
+3944 1027 -0.8927671556159681
+1004 1028  0.1015035621142007
+1028 1028 -0.5322000356380353
+1204 1028 -0.1049332832589505
+1228 1028 -0.0742486869524237
+1242 1028  0.5302216625716982
+1459 1028 -1.021406661640207
+1737 1028  0.05306117946887205
+1949 1028  0.9471080725937661
+1982 1028  0.5237179831441521
+3817 1028 -13.59416149407363
+3818 1028 -0.1008233678779023
+3947 1028  24.48007177925223
+3950 1028 -6.325417856167803e-09
+3992 1028 -4.766621258133744
+1029 1029  0.06401769328460785
+1107 1029  0.1267923382078056
+1216 1029 -0.264491588907438
+1229 1029  2.632058404561377e-08
+1315 1029 -0.4783889678510902
+1477 1029 -0.3331233226375472
+1722 1029  0.04981683537143853
+1805 1029  0.4152466067801181
+1967 1029  0.3118250064706282
+2050 1029 -0.05552343838215974
+2212 1029 -0.04374019547106585
+3231 1029 -8.245220727572519
+3241 1029  19.24209708384189
+3898 1029 -9.384104209729337
+941 1030  0.04770925997819617
+1015 1030 -0.3151578642820492
+1030 1030  0.3346190606095454
+1141 1030  0.5333079621201936
+1215 1030 -0.2961764240780974
+1230 1030  3.749403898467563e-08
+1475 1030  0.2815088383826861
+1611 1030 -0.04214323686986587
+1720 1030 -0.04750496067965254
+1856 1030 -0.3298503080348287
+2210 1030  0.05328665197422941
+3443 1030  12.83836892907945
+3446 1030 -5.832944089578626
+3880 1030 -6.75539661299262
+1014 1031  0.8615889655364799
+1021 1031 -0.08976373229337341
+1026 1031 -0.8438958429460618
+1031 1031 -0.006563168427045199
+1033 1031  1.009041934732004
+1034 1031  0.7970691740614322
+1054 1031  0.08977398271040733
+1214 1031 -0.1215005459296716
+1221 1031 -0.7334598799220151
+1226 1031 -0.2479349622596107
+1231 1031  4.101254072463867e-09
+1232 1031  1.10561328577018
+1233 1031 -9.484864810227478e-09
+1234 1031 -1.84660262358971e-08
+1254 1031 -0.3196677665324415
+1474 1031 -0.7100791253590051
+1487 1031  0.7422711571120613
+1491 1031  0.9054351523586399
+1493 1031  2.25449865380728
+1496 1031  0.8761671906630912
+1719 1031  0.091546689209522
+1736 1031 -0.08666300222702368
+1738 1031 -0.03863010084770278
+1739 1031 -0.002184359699851513
+1741 1031 -0.102634553712519
+1977 1031 -0.7406778437037493
+1983 1031 -2.145904330715092
+1984 1031 -0.9011892093493707
+1985 1031 -0.6736858992600538
+1986 1031 -0.7806222496203883
+2222 1031  0.08660868342331542
+2226 1031  0.132230518229328
+2228 1031  0.02054020998672473
+2229 1031  0.01072384218894925
+2230 1031  0.01247372164743909
+3562 1031 -8.824003402905362
+3876 1031 -43.51247883455607
+3878 1031  0.006768474096792987
+3929 1031 -139.2864013622151
+3944 1031 -6.462929117644935
+3955 1031 -42.59535825308108
+3959 1031 -46.13299363078809
+3961 1031  280.8731269168421
+3966 1031 -47.17922372343798
+958 1032  0.4629060824003016
+1032 1032 -0.06376479295806904
+1158 1032  0.0918804888517398
+1216 1032  0.3720024857585152
+1231 1032 -0.5154238049189607
+1232 1032  1.011402150918883e-08
+1284 1032  0.351545654975462
+1395 1032 -0.4013655946326785
+1478 1032  0.5426439670912149
+1493 1032 -1.946310278292044
+1497 1032 -0.5245384263354095
+1498 1032 -0.4795952455071584
+1640 1032  0.05250452962986646
+1723 1032 -0.0774999583800744
+1738 1032  0.03334939331384015
+1742 1032  0.0667812926161708
+1743 1032  0.07931274417722842
+1968 1032 -0.5011094365728938
+1983 1032  1.852560722557531
+1987 1032  0.6080294260127852
+1988 1032  0.5234134383708403
+2213 1032  0.06105375638109871
+2228 1032 -0.01773237777185056
+2232 1032 -0.08705433897040452
+2233 1032 -0.08168946937534756
+3360 1032 -15.1191530189695
+3362 1032  0.2460567017585993
+3560 1032 -15.91728652624134
+3562 1032  56.84740877124895
+3838 1032 -8.267859388385654
+3898 1032 -8.682516686085181
+3961 1032 -8.824003402905362
+1033 1033 -12.4148271960868
+1231 1033  6.151957693267818e-09
+1233 1033  1.167101824073313e-07
+1739 1033  0.02687544221336557
+1984 1033  11.0878526649361
+2229 1033 -0.1319416399569855
+3955 1033  524.0753677887301
+3961 1033 -42.59535825308108
+1031 1034  0.05906849706200258
+1034 1034 -7.17362028563042
+1231 1034  1.048933193081858e-08
+1234 1034  1.662011153147347e-07
+1985 1034  6.063171165496134
+2230 1034 -0.1122634591316911
+3959 1034  415.1968164899222
+3961 1034 -46.13299363078809
+901 1035 -0.1070673572020186
+1035 1035 -0.6418097572616985
+1101 1035 -0.007274595352445609
+1132 1035  0.02036700218743104
+1235 1035  0.09235767751344538
+1305 1035  0.7494701150025187
+1352 1035  0.6028365532304726
+1597 1035 -0.08398219490530567
+1795 1035 -0.8018374523380369
+2040 1035  0.1117136386944541
+3201 1035 -10.38899270243397
+3385 1035 -7.710670001509722
+3388 1035  33.66510213550209
+3390 1035  4.701830855213807e-09
+1036 1036  0.0673261986980111
+1093 1036 -0.4549872682817689
+1236 1036 -6.866298751795341e-09
+1237 1036  0.1062797331368865
+1293 1036 -0.100220615586992
+1499 1036 -0.4189118125301785
+1744 1036  0.04658515728812977
+1989 1036  0.4218235797759384
+1990 1036  0.3870443013878095
+2234 1036 -0.06140384326059996
+2235 1036 -0.05417247763812049
+3552 1036 -5.943018521333083
+3979 1036 -30.91732538584464
+3981 1036  26.97842583523464
+957 1037  0.08262454568163802
+1037 1037 -0.6175404035074211
+1157 1037  0.0898090992568592
+1236 1037 -0.2219028422169306
+1237 1037  2.017315570146394e-08
+1393 1037  0.6178543307689458
+1499 1037  0.4166092968480921
+1638 1037 -0.08767549225845923
+1744 1037 -0.04632910565148451
+1989 1037 -0.4195050597952255
+2234 1037  0.06106634188723461
+3552 1037  24.62019884646402
+3554 1037 -13.92756187672575
+3981 1037 -5.943018521333083
+1038 1038 -0.9003890301240479
+1127 1038  0.3372983249793092
+1232 1038 -0.375458076737654
+1238 1038  0.114557430269681
+1345 1038  0.8486339951838073
+1497 1038  0.6104971732174486
+1590 1038 -0.1158196454089177
+1742 1038 -0.07772507850532541
+1987 1038 -0.7076702624193901
+2232 1038  0.1013203708049441
+3351 1038 -7.664837333066902
+3360 1038  39.98334126718024
+3362 1038  1.549589877247737e-08
+3562 1038 -15.1191530189695
+1039 1039  0.6019478457506698
+1107 1039 -0.006163960734533347
+1199 1039 -0.008049526074928537
+1239 1039  0.4690507958247839
+1246 1039 -0.007983550840506864
+1291 1039  0.01997349105916452
+1316 1039  0.6149993551148858
+1453 1039  0.6086144765908342
+1501 1039 -0.4164294722809435
+1502 1039 -0.6338113374831589
+1561 1039 -0.08992480666432051
+1698 1039 -0.09050667335377813
+1746 1039  0.05876447905234403
+1747 1039  0.09481212065301986
+1806 1039 -0.6331632046878009
+1943 1039 -0.6265343550208405
+2051 1039  0.09328847712876323
+2188 1039  0.09482024542077011
+2236 1039 -0.07032064342926077
+2237 1039 -0.08853178006291147
+3230 1039 -7.161275667591624
+3231 1039 -6.271758063514705
+3250 1039 -13.50573421656583
+3789 1039 -10.3806402044065
+3984 1039  2.965038659041852e-09
+3988 1039  43.2075847511725
+984 1040 -0.5600389881261721
+1040 1040  0.3524182508441939
+1184 1040  0.08495383117483674
+1197 1040 -0.1442546171938546
+1240 1040  2.41306286619114e-08
+1692 1040 -0.04530287520055806
+1917 1040  0.5004288683746699
+1937 1040 -0.3512197152059827
+2162 1040 -0.07534953134862798
+2182 1040  0.05037591290722684
+3687 1040 -12.33309367358089
+3689 1040  0.04085646412882799
+3761 1040 -10.1426793564074
+3767 1040  20.96549843436591
+978 1041 -0.3554519303210625
+994 1041  0.06749642593198793
+1041 1041 -0.04936449382429299
+1178 1041  0.04340316309886361
+1194 1041 -0.03089942858856187
+1241 1041 -3.596899800228925e-08
+1418 1041  0.3530044280053762
+1442 1041  0.4156932976845179
+1687 1041 -0.0518656621296367
+1932 1041 -0.4464578225348482
+2153 1041  0.04476562781300211
+3652 1041 -7.549143391656187
+3653 1041  15.16098608702285
+3750 1041 -7.152105470943633
+1028 1042  0.4525049260712891
+1042 1042 -0.4139428557132467
+1060 1042 -0.05662335010049897
+1228 1042  0.06313020358974136
+1242 1042  1.273314576000217e-08
+1260 1042  0.4108547346501815
+1503 1042  0.4124828656886404
+1737 1042 -0.04511545186958153
+1982 1042 -0.4452930315210079
+2238 1042  0.05291302942259243
+3947 1042 -4.766621258133744
+3950 1042 -0.1053486690832382
+3992 1042  17.24046977368123
+3993 1042 -11.52161349893302
+1043 1043 -0.05174692126183886
+1074 1043 -0.6664131668293063
+1083 1043  0.05322704516850629
+1243 1043  1.224810661226705e-08
+1283 1043  0.03805173458733085
+1505 1043  0.3915613265931385
+1749 1043  0.09282550509358571
+1994 1043  0.6677275648607324
+1995 1043 -0.3914197776820793
+2239 1043 -0.09027313903964421
+3709 1043 -0.005509351388796824
+3995 1043 -11.99756440152056
+4001 1043  26.94614514240989
+4002 1043 -13.17400491990235
+1044 1044  0.0120946820032662
+1045 1044  0.820611639782226
+1181 1044 -0.1158402426905038
+1244 1044 -3.695717287754974e-09
+1245 1044  6.84975660636411e-08
+1275 1044  0.1686051519968662
+1422 1044  0.5728450058803592
+1507 1044 -0.4669117262552038
+1667 1044 -0.08080789765245955
+1752 1044  0.05787300059726924
+1912 1044 -0.5777458579020762
+1996 1044 -0.5250188060331279
+1997 1044  0.4635324724996192
+2157 1044  0.07247811272512387
+2241 1044  0.02838947709364594
+2242 1044 -0.06374868700680204
+3374 1044 -5.757720128565946
+3404 1044 -10.74205319605701
+4003 1044 -2.262013003597125
+4005 1044  18.01491297364265
+1044 1045 -0.03019372128547943
+1045 1045 -2.048612698416842
+1244 1045 -4.7148643572692e-08
+1245 1045 -1.710001454879873e-07
+1996 1045  1.310681131981685
+2241 1045 -0.0708727983567128
+4003 1045  5.646993614976262
+4005 1045 -2.262013003597125
+1046 1046  0.08652728305245758
+1105 1046 -0.01117562264376104
+1239 1046 -0.7572610645998691
+1246 1046  3.557195038528249e-08
+1312 1046  0.5273830925356003
+1501 1046  0.6723063436140674
+1557 1046 -0.07452450160925486
+1746 1046 -0.09487256468584683
+1802 1046 -0.5884765052096235
+2236 1046  0.1135294637182152
+3215 1046 -4.662192987407709
+3230 1046  25.15477332707293
+3984 1046  0.002037455724496269
+3988 1046 -7.161275667591624
+912 1047 -0.9528532431520396
+985 1047 -0.1919032731754043
+1047 1047  1.453850177649487
+1049 1047  0.8927534376223362
+1050 1047  1.086728699880724
+1051 1047  0.6671056579552171
+1052 1047  1.05305624994934
+1053 1047  1.037339225236382
+1112 1047  0.1398747315022498
+1185 1047 -0.2047571376587071
+1247 1047 -1.997244994389469e-08
+1248 1047  0.340419502366803
+1249 1047 -0.01054317113570646
+1250 1047  5.987070406110462e-07
+1251 1047  8.733743955269269e-07
+1252 1047  1.117970844788374e-07
+1253 1047  1.182548258066296e-07
+1325 1047  1.094320306063321
+1430 1047 -1.20025775658178
+1508 1047  2.877901742353491
+1570 1047 -0.1180732624206324
+1675 1047  0.1525851828618167
+1753 1047 -0.06336630111296625
+1755 1047  0.003251598109236279
+1756 1047 -0.008293849473066403
+1757 1047 -0.01413376895939106
+1758 1047 -0.01520700997590703
+1998 1047 -2.638744501957641
+1999 1047 -0.7104217187916017
+2000 1047 -0.8503392225516677
+2001 1047 -0.5558849024538212
+2002 1047 -0.8748469326863306
+2003 1047 -0.8610429361225128
+2243 1047  0.02360881028802457
+2244 1047  0.02726814578945946
+2245 1047  0.0163341792817986
+2246 1047  0.01844642734702238
+2247 1047  0.02460560601569959
+2248 1047  0.0250006050735233
+3280 1047 -39.60174281254437
+3282 1047 -0.03239710441769657
+3700 1047  1000.441707186239
+3702 1047 -169.6380530198015
+4015 1047 -22.19037263188586
+4017 1047  2.580894044174054e-07
+4019 1047 -46.2485822087331
+4023 1047 -42.86814773331403
+4027 1047 -77.31827340468764
+4031 1047 -72.65693273182117
+4035 1047 -15.57940504488523
+1048 1048  0.3107249264599946
+1186 1048  0.08253558234079712
+1247 1048 -0.3912441653266929
+1248 1048  3.182346067220898e-09
+1283 1048 -0.02865248553472413
+1292 1048  0.008367416410551054
+1431 1048  0.4304029276676956
+1508 1048 -0.6882177177695019
+1515 1048  0.6251677208402014
+1676 1048 -0.06312918864769614
+1753 1048  0.01515333567288418
+1759 1048 -0.04425978383985179
+1760 1048 -0.08455623381433652
+1921 1048 -0.4306648046310764
+1998 1048  0.6310259631828135
+2004 1048 -0.2786413244623387
+2005 1048 -0.5974098867295464
+2166 1048  0.06048211658000992
+2243 1048 -0.005645780499229315
+2249 1048  0.03641964444494807
+2250 1048  0.07599027364771335
+3700 1048 -15.57940504488523
+3707 1048 -14.33758586199391
+4002 1048 -8.299861792616442
+4035 1048  48.49824758137784
+4137 1048 -19.07696695293404
+1049 1049 -5.349524285735199
+1247 1049 -1.614056271553865e-06
+1249 1049  0.06317640197425206
+1999 1049  4.256962871978451
+2244 1049 -0.1633951794307062
+3700 1049 -22.19037263188586
+4015 1049  132.9683527596321
+4017 1049 -1.546513956562912e-06
+1050 1050 -6.261171463865746
+1247 1050 -1.207106466394325e-06
+1250 1050 -3.449440251657165e-06
+1755 1050 -0.01873403481084517
+2000 1050  4.899216957673652
+2245 1050 -0.09410913433692274
+3700 1050 -46.2485822087331
+4019 1050  266.4605453149341
+1051 1051 -2.766844775492583
+1247 1051 -8.74091834635049e-07
+1251 1051 -3.622352877430401e-06
+1756 1051  0.03439903980669907
+2001 1051  2.305552680881037
+2246 1051 -0.07650722269100975
+3700 1051 -42.86814773331403
+4023 1051  177.7971977309233
+1052 1052 -5.381641954276575
+1247 1052 -8.476012625102669e-07
+1252 1052 -5.713398701923111e-07
+1757 1052  0.07223059927479664
+2002 1052  4.470903578741806
+2247 1052 -0.1257469025523179
+3700 1052 -77.31827340468764
+4027 1052  395.1349021923479
+1053 1053 -4.687819398588907
+1247 1053 -7.2243239568337e-07
+1253 1053 -5.344005542529118e-07
+1758 1053  0.0687217012769834
+2003 1053  3.891122287459317
+2248 1053 -0.1129797452838212
+3700 1053 -72.65693273182117
+4031 1053  328.3425339190333
+1054 1054 -0.04929123482331746
+1221 1054 -0.2261997465922666
+1231 1054  0.06060825833018181
+1254 1054  7.969850475105303e-09
+1488 1054 -0.360035618481635
+1496 1054 -0.4810676928389651
+1516 1054  1.092095516810633
+1733 1054  0.0508240492328381
+1741 1054  0.05635245017868282
+1761 1054 -0.01473140339016458
+1978 1054  0.3227566567579966
+1986 1054  0.4286078600129237
+2006 1054 -1.011277354339053
+2223 1054 -0.04577645287156246
+2251 1054  0.004662490664314871
+3644 1054 -2.286121675510262
+3646 1054  0.03272045797426546
+3929 1054 -99.87961390971721
+3961 1054 -47.17922372343798
+3966 1054  72.0796547970898
+1055 1055 -0.4738147012535258
+1173 1055  0.7232007277457907
+1254 1055 -0.1671083834788179
+1255 1055  0.05822173646792159
+1416 1055  0.3852188693949516
+1516 1055 -3.184217886071492
+1661 1055 -0.05423004030019284
+1761 1055  0.04295228525329622
+2006 1055  2.948576740677008
+2251 1055 -0.01359440263092582
+3632 1055 -88.2008696096471
+3644 1055  45.44517147063204
+3646 1055  2.11014470497678e-08
+3966 1055 -2.286121675510262
+1056 1056  0.3580752890249853
+1109 1056 -0.1501368826175595
+1120 1056  0.002369883025423705
+1125 1056  0.037823480857684
+1164 1056  0.4594619270121967
+1256 1056 -1.882256489560419e-09
+1322 1056 -0.6676477309983397
+1336 1056 -0.6868141551464079
+1342 1056  0.3769085362086894
+1567 1056  0.0953847309503813
+1581 1056  0.09529957797094311
+1587 1056 -0.05640185893113407
+1654 1056 -0.05661263921413731
+1812 1056  0.6513753777654052
+1826 1056  0.6860999709089316
+1832 1056 -0.3784583248613025
+1899 1056 -0.3991541488639627
+2057 1056 -0.08637788346499958
+2071 1056 -0.09578629824956811
+2077 1056  0.05659745592016639
+2144 1056  0.06206550983734454
+3251 1056 -7.969315195327064
+3321 1056 -5.122575816198524
+3339 1056 -6.006365689718882
+3597 1056 -81.80499628897144
+3615 1056  74.09591338197826
+955 1057  0.6196319089697585
+960 1057  0.5172246467921727
+1057 1057 -0.4844075229863823
+1137 1057  0.03229296680959662
+1155 1057 -0.07979007268887017
+1257 1057  1.930360118973518e-08
+1359 1057 -0.4783279591317424
+1604 1057  0.06778247851274402
+1642 1057  0.08534035623263046
+1849 1057  0.4811627526763632
+1879 1057 -0.5748703939518289
+2094 1057 -0.06941991168041609
+2124 1057  0.07684746029948268
+2132 1057 -0.09171746472834619
+3407 1057 -5.244381887558792
+3535 1057 -15.07976193972747
+3537 1057  0.002925901806241575
+3567 1057  32.10677134460157
+3568 1057 -13.87451822624953
+3574 1057 -0.008922953284883881
+1058 1058  0.362575344758596
+1059 1058 -0.3636201727245672
+1160 1058 -0.1356037741743049
+1258 1058  6.873820179720269e-08
+1259 1058 -3.722531947414343e-07
+1279 1058 -0.1647810909296956
+1398 1058 -0.8683349920286506
+1518 1058 -0.5655683563282903
+1643 1058  0.1196980307182942
+1762 1058 -0.01098342848397128
+1763 1058  0.08118838221113756
+1888 1058  0.9349938344358532
+2008 1058  0.5370271536576061
+2252 1058  0.00945573560770001
+2253 1058 -0.07286991856067998
+3436 1058 -74.90820044607649
+3568 1058 -31.06353440087113
+3574 1058  0.01834072349012509
+4047 1058  131.8444500211344
+4048 1058 -26.45490214119199
+1058 1059 -2.564559113532273
+1059 1059  2.571949365299086
+1258 1059  5.031070656746905e-07
+1259 1059  2.632996500162577e-06
+1762 1059  0.07768771932121485
+2252 1059 -0.06688207921038947
+4047 1059 -26.45490214119199
+4048 1059  187.1201776987702
+1042 1060  0.3616486310085054
+1060 1060  0.0494700096023555
+1215 1060  0.2282308445688311
+1242 1060 -0.3938694762289616
+1260 1060  1.649276359927399e-08
+1476 1060  0.3144339221536311
+1503 1060 -0.3603730844290743
+1721 1060 -0.04835683208530907
+1966 1060 -0.3548483544779605
+2211 1060  0.0478494465417923
+2238 1060 -0.04622842111919301
+3880 1060 -5.675171991154909
+3992 1060 -11.52161349893302
+3993 1060  15.07645902237852
+939 1061 -0.5851621559330497
+1009 1061  0.6452289446177003
+1061 1061 -0.02506043554827784
+1062 1061  0.5513927020288057
+1063 1061  0.274604206069199
+1064 1061  0.3315414677749981
+1065 1061  0.4722832634265456
+1066 1061  0.3748533853638924
+1067 1061  0.3334711887748352
+1068 1061  0.428348930387738
+1077 1061  0.07100846120402798
+1139 1061 -0.2141845297361449
+1209 1061  0.05851589523119582
+1261 1061  4.807374498483341e-10
+1262 1061 -0.07133191883687945
+1263 1061 -1.407063632274941e-09
+1264 1061 -1.250616929460158e-09
+1265 1061 -0.04435831718922839
+1266 1061 -0.02428214981556918
+1267 1061 -0.02430205065482175
+1268 1061 -0.03299211467792259
+1277 1061  0.2765729375303243
+1361 1061  0.7029356993730441
+1466 1061 -0.504600061009487
+1526 1061 -0.4788760357050242
+1606 1061 -0.1074364103798295
+1711 1061  0.08278506503087066
+1765 1061 -0.01159007117619253
+2009 1061 -0.4572667382619597
+2010 1061 -0.234511292387475
+2011 1061 -0.2782406077266243
+2012 1061 -0.3888500715901649
+2013 1061 -0.3164800245438473
+2014 1061 -0.2799242389758857
+2015 1061 -0.35912875888719
+2016 1061  0.5819307001253576
+2096 1061  0.09331292568385514
+2201 1061 -0.1024920224347914
+2254 1061  0.04954073695663612
+2255 1061  0.008422272802529579
+2256 1061  0.01732501370911077
+2257 1061  0.03090230780246723
+2258 1061  0.01674951147636541
+2259 1061  0.01679417334838154
+2260 1061  0.02279291647912941
+2261 1061 -0.07203379071050602
+3424 1061 -24.9290464405827
+3844 1061 -23.5793179741662
+4055 1061 -11.61594215474339
+4057 1061 -2.092441497847375e-11
+4059 1061 -9.65776140469529
+4063 1061 -4.512938119937939
+4065 1061  183.7019110854502
+4067 1061 -7.133207631641115
+4069 1061 -3.967674799820742e-11
+4071 1061 -4.127537850880238
+4073 1061 -9.292320871101545e-10
+4075 1061 -4.342035056604149
+4077 1061 -1.273698507775478e-09
+4079 1061 -5.255281007971666
+4081 1061 -3.714351916317415e-10
+4085 1061 -18.07649086887797
+1062 1062 -0.8631938020705067
+1261 1062  1.003843119740111e-10
+1262 1062  0.1116686347193875
+2009 1062  0.7158415642942259
+2254 1062 -0.07755499290003219
+4055 1062  18.18451598011107
+4057 1062  3.275554827375515e-11
+4065 1062 -11.61594215474339
+1063 1063 -2.704903553672343
+1261 1063  2.761413792073952e-09
+1263 1063  1.386103698486352e-08
+1765 1063  0.1141644010503557
+2010 1063  2.309980743686521
+2255 1063 -0.0829609857753576
+4059 1063  95.13079222366765
+4065 1063 -9.65776140469529
+1061 1064  0.08489250803032906
+1064 1064 -1.123100460933939
+1261 1064  1.715662265033835e-09
+1264 1064  4.236453410300101e-09
+2011 1064  0.9425431964377532
+2256 1064 -0.05868867931656194
+4063 1064  15.28762986286976
+4065 1064 -4.512938119937939
+1065 1065 -1.058757894121426
+1261 1065  1.207292321669939e-10
+1265 1065  0.0994418437640489
+2012 1065  0.8717185528421763
+2257 1065 -0.06927635354904116
+4065 1065 -7.133207631641115
+4067 1065  15.99112376065647
+4069 1065  8.890274627582073e-11
+1066 1066 -1.72573819152988
+1261 1066  3.51464403414159e-09
+1266 1066  0.1117893953885428
+2013 1066  1.457000754258734
+2258 1066 -0.07711087260468982
+4065 1066 -4.127537850880238
+4071 1066  19.0022305732375
+4073 1066  4.277776494143737e-09
+1067 1067 -1.16718388103411
+1261 1067  1.771025409658122e-09
+1267 1067  0.08505970757052372
+2014 1067  0.9797639815414531
+2259 1067 -0.05878135529351257
+4065 1067 -4.342035056604149
+4075 1067  15.19757594372949
+4077 1067  4.457897107013054e-09
+1068 1068 -1.628837266086651
+1261 1068  1.926837966959027e-09
+1268 1068  0.1254556322243195
+2015 1068  1.365621025992523
+2260 1068 -0.08667221774173783
+4065 1068 -5.255281007971666
+4079 1068  19.98370377461157
+4081 1068  1.412364641906194e-09
+1069 1069  0.4131220203587079
+1088 1069  0.05643659927341725
+1269 1069 -5.449000251012137e-09
+1288 1069  0.1214000777347138
+1293 1069 -0.1119597125414634
+1527 1069 -0.4136141792640044
+1528 1069  0.3288070523355396
+1773 1069 -0.0437463201053321
+2018 1069 -0.3857623389865421
+2262 1069 -0.05836392563537035
+2263 1069  0.05446492613488767
+3979 1069 -16.36333172631541
+4088 1069  20.24465568779704
+4089 1069 -10.48556641561825
+1070 1070 -0.8634917741294119
+1270 1070  0.1209694588614057
+1278 1070  0.04152432414694895
+1297 1070 -0.03625623505669459
+1529 1070  0.7713662868604313
+1530 1070  0.807466687308941
+1774 1070 -0.1070003826683109
+1775 1070 -0.10710356231765
+2020 1070 -0.8588799767622773
+2264 1070  0.1197679507899034
+4096 1070  31.68167767158849
+4102 1070 -9.370967335442515e-10
+4125 1070 -11.61846388882405
+4126 1070 -8.796687645874714
+1071 1071 -0.6085690165184504
+1072 1071  0.05628299145492302
+1271 1071 -2.899300410863503e-09
+1272 1071  0.06443650349717406
+1273 1071 -0.08930907686990948
+1531 1071 -0.4147446507941761
+1777 1071  0.08603670609830409
+2021 1071  0.3940753934243332
+2022 1071  0.6054677581871165
+2266 1071 -0.04656009377371514
+3848 1071 -12.14811724181201
+4105 1071 -7.513903054786311
+4107 1071  20.11093433821907
+4110 1071 -0.01468942424715695
+1072 1072 -0.05876042249592989
+1198 1072 -0.1461852922562984
+1271 1072 -0.0430300784938554
+1272 1072 -1.591542694856507e-08
+1286 1072  0.2078468422749022
+1448 1072  0.4751085479629472
+1531 1072  0.433000632670903
+1533 1072 -0.4656525838783859
+1693 1072 -0.0626029018629746
+1778 1072  0.06308701112702084
+1938 1072 -0.5169167417147753
+2021 1072 -0.411421568297578
+2023 1072  0.4465883401929728
+2183 1072  0.05942268960851749
+2266 1072  0.048609548122274
+2268 1072 -0.0435235817211751
+3732 1072 -5.263261695423801
+3782 1072 -7.492541902601887
+4105 1072  23.60392256309562
+4107 1072 -7.513903054786311
+1071 1073  0.5724845587882514
+1073 1073 -0.608164323859633
+1209 1073 -0.03549214591889813
+1271 1073  0.03877201488615256
+1273 1073  0.08401358938736914
+1467 1073  0.6058903557215245
+1712 1073 -0.08865078486777231
+1777 1073 -0.08093525038797107
+2022 1073 -0.5695671862975206
+2202 1073  0.08735047797722066
+3844 1073 -8.20501197638737
+3848 1073  22.07544969633836
+4107 1073 -12.14811724181201
+4110 1073 -2.24825810746232e-10
+986 1074 -0.6107864907888784
+1074 1074  0.5285974251080995
+1186 1074 -0.009545633175415319
+1243 1074  0.00970013728400089
+1274 1074  0.09010990126768369
+1749 1074 -0.07362898186763496
+1922 1074  0.5465638824173062
+1994 1074 -0.5296400026704317
+2167 1074 -0.07776569511906935
+2239 1074  0.07160445085414079
+3707 1074 -21.79919967537484
+3709 1074  3.551176783189725e-09
+3995 1074  33.72125780485909
+4001 1074 -11.99756440152056
+1075 1075 -0.09342120143663683
+1129 1075  0.06750505025100521
+1156 1075 -0.1018754816584715
+1244 1075 -0.1463500416425706
+1275 1075 -1.330449639347364e-10
+1348 1075 -0.7320220831026034
+1391 1075 -0.3540923641672868
+1507 1075  0.4930004778176136
+1593 1075  0.09337077568253691
+1636 1075  0.05033110776786022
+1752 1075 -0.06110666180098917
+1838 1075  0.7006361055927417
+1881 1075  0.3539682057779985
+1997 1075 -0.4894324078324542
+2126 1075 -0.04941476276368469
+2242 1075  0.06731065292933117
+3363 1075 -2.942416412227689
+3374 1075  19.0263421536609
+3539 1075 -4.711059498030628
+4005 1075 -5.757720128565946
+1076 1076  0.06205063220109317
+1179 1076  0.1503132880271155
+1219 1076 -0.04422968677103412
+1276 1076 -2.449253198866241e-08
+1277 1076 -0.2432983530399724
+1420 1076  0.3962293587228495
+1481 1076  0.4347352342689453
+1534 1076  0.9163581866371621
+1665 1076 -0.05369438553050169
+1726 1076 -0.03991575131466345
+1779 1076 -0.02453437618813773
+1910 1076 -0.3977027851991092
+1971 1076 -0.4596523479291749
+2024 1076 -0.8984979081539249
+2155 1076  0.05269364988891542
+2269 1076 -0.01320838662255178
+3516 1076 -10.4263159895623
+3906 1076  19.29387509016767
+3909 1076 -6.462935419745066
+4085 1076 -1.287538838117173
+1077 1077 -0.03108549679221456
+1198 1077 -0.02201288531576436
+1261 1077 -0.154517529197304
+1276 1077  0.132345738765169
+1277 1077 -1.015200618015299e-08
+1449 1077  0.345354821535383
+1526 1077  0.2096383898392737
+1534 1077 -0.7265339828613597
+1694 1077 -0.04659273070930623
+1779 1077  0.01945206395154361
+1939 1077 -0.3740788277596528
+2016 1077 -0.254752808402104
+2024 1077  0.7123734728657446
+2184 1077  0.04620716091210701
+2261 1077  0.03153435706244356
+2269 1077  0.01047226060725421
+3782 1077 -7.355302891262144
+3906 1077 -1.287538838117173
+4065 1077 -18.07649086887797
+4085 1077  15.15581672854775
+1070 1078  0.5503286211148283
+1078 1078  0.05326867823029235
+1097 1078 -0.05561345056860315
+1139 1078  0.250244042122215
+1278 1078 -1.724586362605862e-09
+1297 1078 -0.08433753082497672
+1362 1078  0.553519904683873
+1529 1078 -0.4916143473982249
+1535 1078 -0.4124268531628535
+1536 1078 -0.4682412747380797
+1607 1078 -0.0719937238121951
+1774 1078  0.06819448061561421
+1780 1078  0.05899348825775656
+1852 1078 -0.5276837209948572
+2025 1078  0.4598141657371973
+2026 1078  0.4773702028337876
+2097 1078  0.06610708326760587
+2264 1078 -0.07633162606373356
+2270 1078 -0.0683561503822409
+3424 1078 -6.556163260226376
+3740 1078 -8.29609625260256
+3742 1078 -0.04556750232906767
+4096 1078 -11.61846388882405
+4102 1078 -0.006868975754008619
+4125 1078  36.37733995725011
+4126 1078 -11.04287310642648
+940 1079 -0.07560356709805922
+1079 1079  0.5464386534076658
+1080 1079  0.4622771875668804
+1081 1079  0.8099385613394385
+1092 1079  0.1208149423257971
+1140 1079 -0.1493139770588935
+1258 1079  0.2566034054509343
+1279 1079  2.904484364130155e-08
+1280 1079  2.892989577618632e-07
+1281 1079 -0.01559842054359869
+1292 1079 -0.01760425743209124
+1364 1079 -0.477692739132276
+1518 1079  0.5099376587097865
+1539 1079 -0.8293599276847743
+1609 1079  0.06242579865530338
+1763 1079 -0.0732024928126479
+1782 1079  0.004661810659483987
+2008 1079 -0.4842038390860414
+2027 1079 -0.3713550248720283
+2028 1079 -0.6777265383675919
+2029 1079  0.8908658230580816
+2253 1079  0.06570225375133355
+2272 1079  0.01681848484248474
+2273 1079  0.03070862867577365
+2274 1079 -0.1233504357896458
+3436 1079  182.7426263207641
+3438 1079 -24.66315021333685
+4047 1079 -74.90820044607649
+4127 1079 -17.55834826047913
+4131 1079 -24.84175659400302
+4133 1079  5.602588400155373e-09
+4137 1079 -29.79209744771001
+1080 1080 -4.657034860973241
+1279 1080 -3.091812144639938e-07
+1280 1080 -2.914429613198877e-06
+1782 1080 -0.04696362991810545
+2027 1080  3.741074279977133
+2272 1080 -0.1694313981454469
+3436 1080 -17.55834826047913
+4127 1080  176.8848810293818
+1081 1081 -4.051106699452466
+1279 1081 -2.020746911468585e-08
+1281 1081  0.07801933255350572
+2028 1081  3.389815784838334
+2273 1081 -0.1535967507877281
+3436 1081 -24.84175659400302
+4131 1081  124.2521515692489
+4133 1081 -2.80225902482556e-08
+998 1082  0.1454178910812212
+1082 1082 -0.5572302679600371
+1161 1082 -0.4707524526938228
+1198 1082  0.1639576572033569
+1282 1082  0.5556174516419152
+1450 1082  0.903604725385467
+1645 1082 -0.1077840611317312
+1695 1082 -0.1192040010499577
+1940 1082 -1.025075116496526
+2135 1082  0.1075931432984304
+3577 1082 -14.41765509210598
+3579 1082  3.424985434152816e-10
+3580 1082  32.78546401598518
+3782 1082 -9.332991521575286
+1043 1083  0.05767180706691709
+1048 1083 -0.459904502442264
+1083 1083 -0.05932140125143925
+1243 1083 -0.01362937325158806
+1248 1083  0.01227067299807493
+1283 1083  3.583140154028719e-08
+1505 1083 -0.4363940642551595
+1759 1083  0.06550898280666229
+1995 1083  0.436236308367658
+2004 1083  0.412417507493414
+2249 1083 -0.0539047788936704
+4001 1083 -13.17400491990235
+4002 1083  26.96703358297778
+4035 1083 -8.299861792616442
+1084 1084 -0.07463262812556315
+1181 1084  0.216604980095333
+1232 1084 -0.2753545287150571
+1284 1084  5.059544422314488e-09
+1423 1084  0.5627128757731583
+1464 1084 -0.5533622237789459
+1498 1084  0.3788236665372302
+1668 1084 -0.07429162698182507
+1709 1084  0.06652747753817977
+1743 1084 -0.06264771144796216
+1913 1084 -0.556821153672813
+1954 1084  0.5941679188478999
+1988 1084 -0.4134348697073162
+2158 1084  0.08167611720054455
+2233 1084  0.0645250439743737
+3304 1084 -11.96605332802606
+3306 1084 -0.01534246547224594
+3404 1084 -8.173102293481225
+3562 1084 -8.267859388385654
+3838 1084  26.48660956080935
+1019 1085  0.07154759728281507
+1085 1085 -0.003300648816836982
+1087 1085  1.152193173408145
+1143 1085  0.5739644134156838
+1219 1085  0.1034698694358651
+1285 1085 -3.796000963562918e-08
+1286 1085 -0.5565646529232413
+1287 1085  1.047611239979318e-07
+1371 1085  0.5218813327191538
+1482 1085 -0.6299407955834422
+1540 1085  0.7709461549449258
+1616 1085 -0.0741452235963624
+1785 1085 -0.04052335874533514
+1861 1085 -0.5203549498394896
+1972 1085  0.6310205330126762
+2030 1085 -0.7763504480927439
+2031 1085 -0.9933793593885633
+2106 1085  0.05439035871006938
+2217 1085 -0.0809958409515938
+2275 1085 -0.01693145434596672
+2276 1085  0.02739797688598089
+3459 1085 -11.05951146044936
+3732 1085 -2.518767549983291
+3909 1085 -49.72163740391174
+4143 1085 -97.81167626070551
+4145 1085  86.55876775412223
+1086 1086  0.3634993303815826
+1187 1086 -0.1321988592372929
+1194 1086 -0.644560541293882
+1272 1086 -0.4068214560171308
+1285 1086  0.7958833775240365
+1286 1086 -1.977140486920936e-08
+1438 1086 -0.379859058350982
+1443 1086  0.4552127889008838
+1533 1086  0.4075275464698439
+1540 1086 -0.8918227216379703
+1683 1086  0.04595895354819852
+1688 1086 -0.0671242009178893
+1778 1086 -0.05521218124589022
+1785 1086  0.04687701190851433
+1933 1086 -0.4363998945032376
+2023 1086 -0.3908429951038652
+2030 1086  0.8980743533410442
+2173 1086 -0.05892039757983346
+2178 1086  0.06139143428489695
+2268 1086  0.03809075496731831
+2275 1086  0.01958613529525647
+3721 1086 -23.02209569566427
+3732 1086  27.36856052624786
+3750 1086 -8.292682942672654
+4105 1086 -5.263261695423801
+4145 1086 -2.518767549983291
+1085 1087  0.02199763125930786
+1087 1087 -7.678951010732729
+1285 1087 -3.395976211351126e-07
+1287 1087 -6.981968381225556e-07
+2031 1087  6.620514347654194
+2276 1087 -0.1825976122374652
+4143 1087  651.8794668163698
+4145 1087 -97.81167626070551
+972 1088  0.07585447780115982
+1069 1088 -0.3886253205090088
+1088 1088 -0.0530901051026678
+1172 1088 -0.07474159031252897
+1269 1088 -0.1955389194213142
+1288 1088 -2.311546232203199e-10
+1410 1088  0.6011938633446567
+1527 1088  0.3890882961019005
+1900 1088 -0.6007023401756713
+2262 1088  0.05490314771048892
+3621 1088  0.03701780828857204
+3622 1088 -10.63078006025426
+4088 1088 -10.48556641561825
+4089 1088  20.79952414874093
+1089 1089 -0.702905468036533
+1191 1089 -0.4918304040039776
+1278 1089  0.2708866518362061
+1289 1089  0.1101427118805583
+1440 1089  0.62780002806294
+1535 1089  0.7972203259666385
+1685 1089 -0.09477205745752662
+1780 1089 -0.1140343010598945
+2025 1089 -0.8888199114143011
+2270 1089  0.1321323092122628
+3737 1089 -27.15561296572336
+3740 1089  38.23978412799049
+3742 1089 -1.285860098954039e-10
+4125 1089 -8.29609625260256
+1090 1090 -0.04130392701536499
+1129 1090 -0.04174380521665495
+1205 1090  0.1321958859920611
+1211 1090 -0.008999365508307933
+1290 1090  1.863167620186346e-09
+1349 1090 -0.5115751971165835
+1462 1090 -0.4083683463701124
+1472 1090  0.2940184468249131
+1594 1090  0.06400112844259723
+1707 1090  0.05899578318349421
+1839 1090  0.4875763642175987
+1952 1090  0.4091049471507078
+1962 1090 -0.3067126001924134
+2084 1090 -0.05976136187915728
+2197 1090 -0.05859965444633063
+2207 1090  0.04614176586870518
+3363 1090 -1.959100094899907
+3677 1090 -6.618357553588883
+3826 1090 -6.636577325017829
+3869 1090  16.23932328775514
+1039 1091 -0.5207505831071904
+1091 1091  0.05562057947383459
+1099 1091 -0.00182375950404709
+1107 1091 -0.02996950125612036
+1222 1091  0.1579025131440057
+1291 1091  9.598412509248533e-09
+1302 1091 -0.378532286868665
+1317 1091  0.4323552440504631
+1428 1091  0.4402806087748638
+1465 1091  0.5759940121176559
+1489 1091  0.3034672176079867
+1502 1091  0.5483159810343683
+1547 1091  0.05617338423090624
+1562 1091 -0.05745261880975507
+1673 1091 -0.06554270780152668
+1710 1091 -0.08806669474583405
+1734 1091 -0.03927673116269679
+1747 1091 -0.08202283215104343
+1792 1091  0.3999698727527718
+1807 1091 -0.4226868393937711
+1918 1091 -0.4181873107360542
+1955 1091 -0.4896888030185261
+1979 1091 -0.2958963642102718
+2037 1091 -0.0575621944201822
+2163 1091  0.06074566431115451
+2200 1091  0.0737280609488582
+2224 1091  0.04237843896777917
+2237 1091  0.07658965210480174
+3180 1091 -7.681384228215313
+3231 1091 -6.778441910054136
+3250 1091  47.22653650499972
+3687 1091 -5.509137579435532
+3689 1091 -0.01283241605396136
+3783 1091 -9.148268563932652
+3785 1091 -0.004170643736380167
+3912 1091 -7.179419233379014
+3984 1091 -0.004453089740284022
+3988 1091 -13.50573421656583
+1092 1092 -0.04888485641695448
+1186 1092 -0.0009017246897470577
+1248 1092 -0.01125287282250595
+1279 1092  0.02154089767802106
+1292 1092  2.545271146625616e-09
+1433 1092  0.4137426206676649
+1515 1092 -0.4958500939596942
+1539 1092  0.3355805184553645
+1678 1092 -0.05546586352942447
+1760 1092  0.06706554910635512
+1923 1092 -0.432808164560258
+2005 1092  0.4738340425976885
+2029 1092 -0.3604673975635156
+2168 1092  0.05907190615621608
+2250 1092 -0.06027148087172729
+2274 1092  0.04991078277622987
+3436 1092 -29.79209744771001
+3707 1092 -13.33511322562891
+4035 1092 -19.07696695293404
+4137 1092  38.33403487905078
+980 1093  1.112658784433996
+1036 1093 -0.09910914931789394
+1093 1093  0.6697749461861511
+1094 1093  0.9009354882907198
+1095 1093  0.58462927218427
+1180 1093 -0.1503380342921753
+1236 1093  0.3284627932841515
+1269 1093  0.3738519183151615
+1293 1093 -1.493529512737457e-08
+1294 1093  3.664150378945852e-09
+1295 1093  5.14547137606125e-08
+1528 1093 -0.5913713271015886
+1773 1093  0.07867933243141366
+1787 1093 -0.07642521674859
+1788 1093 -0.01466585550328597
+1911 1093 -0.9424729144772576
+1990 1093 -0.5697578684182797
+2018 1093  0.6938074616461828
+2032 1093 -0.8656710639493923
+2033 1093 -0.5248141364580845
+2156 1093  0.1402957488124907
+2235 1093  0.07974589801570604
+2263 1093 -0.0979571314547415
+2277 1093  0.05907688964639677
+2278 1093  0.0118751178970765
+3663 1093 -50.78984921359537
+3665 1093 -0.1724709079791441
+3979 1093  182.6407715266371
+3981 1093 -30.91732538584464
+4088 1093 -16.36333172631541
+4147 1093 -65.45466744535615
+4151 1093 -16.9631615475301
+1094 1094 -4.332893970446715
+1293 1094 -6.98925406994455e-08
+1294 1094 -1.762221939205499e-08
+1787 1094  0.3675539094017725
+2032 1094  4.163295798784388
+2277 1094 -0.2841201198847871
+3979 1094 -65.45466744535615
+4147 1094  314.792946352708
+1095 1095 -7.173917743684755
+1293 1095 -1.82950870328269e-07
+1295 1095 -6.313957408021764e-07
+1788 1095  0.1799630056638323
+2033 1095  6.439933176124918
+2278 1095 -0.1457181893610931
+3979 1095 -16.9631615475301
+4151 1095  208.15300181692
+1016 1096  0.06052671066335877
+1096 1096 -0.4237633937620111
+1216 1096  0.09577340452076888
+1296 1096  9.930907562960556e-08
+1463 1096  0.3753050069630106
+1479 1096  0.3999378180984413
+1708 1096 -0.05921436717618114
+1724 1096 -0.05601679710717079
+1953 1096 -0.375556675786625
+2198 1096  0.05996516487372638
+3639 1096 -8.964500315464161
+3641 1096 -0.04271695783234537
+3896 1096  25.62671609728838
+3898 1096 -12.35396198776869
+1078 1097 -0.05465730817416642
+1097 1097  0.05706320500793532
+1123 1097 -0.05446009703752624
+1270 1097 -0.07910718884290392
+1278 1097  0.09910991576168865
+1297 1097 -5.736181502014048e-09
+1339 1097 -0.6882189757870322
+1530 1097 -0.5280375750914569
+1536 1097  0.4804475820214714
+1584 1097  0.09638196496373758
+1775 1097  0.07003967621048186
+1829 1097  0.6870878872871973
+2020 1097  0.5616589604898961
+2026 1097 -0.4898144867918466
+2074 1097 -0.08962691042088074
+3333 1097 -5.94400377216852
+4096 1097 -8.796687645874714
+4102 1097  0.007048039668787517
+4125 1097 -11.04287310642648
+4126 1097  31.03086265125012
+942 1098  0.5580597443798457
+995 1098  0.07102142348298011
+1098 1098  0.4897853072672988
+1142 1098 -0.07611021403896462
+1195 1098 -0.06975006572860991
+1298 1098  7.298680398948321e-08
+1857 1098 -0.5578130610481082
+1934 1098 -0.4890001558455759
+2102 1098  0.07783373831464936
+3447 1098 -13.85984777865994
+3449 1098  0.01120427847728563
+3755 1098  27.82833874115732
+3757 1098 -0.0263789648850663
+3758 1098 -14.30197427870374
+899 1099  0.748415362690662
+900 1099  3.90179844422034
+1099 1099  23840.16291750732
+1100 1099  18.11856811951377
+1222 1099 -2473.241834471261
+1291 1099 -2064.592972124319
+1300 1099 -0.6755533116091292
+1301 1099  0.7019805848906145
+1302 1099  0.9345988945068904
+1545 1099 -4.388112379693994
+1546 1099  4.688513878451054
+1547 1099  5.460780976802373
+1789 1099 -3.117398006170587
+1791 1099 -0.7170782280222106
+1792 1099 -0.9906143812408313
+2034 1099 -14.49628017496882
+2035 1099  4.793052405651258
+2036 1099 -4.577372090822865
+2037 1099 -5.952678106564355
+3175 1099  4.240457540660714e-09
+3177 1099 -2299.853745982527
+3180 1099  7.772276933160072e-09
+3250 1099 -0.00182375950404709
+3549 1099 -324.4426975442814
+3550 1099  0.07145809537069953
+3912 1099 -0.04280741885147973
+900 1100 -0.4467658871986893
+1099 1100 -2299.853745982527
+1100 1100 -2.078221001294088
+1789 1100  0.3569501217106127
+2034 1100  1.662740239879939
+3175 1100  4.86383655573519e-10
+3177 1100  263.8239666641096
+3180 1100 -8.952334706879839e-10
+901 1101 -4.068874943070814
+902 1101  2.396365901362685
+1101 1101  9532.370458227797
+1102 1101  12.9370165976562
+1304 1101  0.8150861897290412
+1305 1101 -0.6823995581967828
+1549 1101  4.805357363027642
+1793 1101 -2.005604434346106
+1794 1101 -0.7626219247567129
+1795 1101  0.7310365971011838
+2038 1101 -10.78395981212982
+2039 1101 -4.635964505191407
+2040 1101  4.453586165850771
+3191 1101  1.757215706277293e-09
+3193 1101 -1793.912401720734
+3201 1101  3.180253560497448e-09
+3388 1101 -0.007274595352445609
+3390 1101 -652.9015261762572
+3535 1101  0.009158641086069189
+3537 1101 -176.0078568125278
+902 1102 -0.507839750924587
+1101 1102 -1793.912401720734
+1102 1102 -2.74278713799485
+1793 1102  0.4250292779567368
+2038 1102  2.286315857699142
+3191 1102  3.725840469481767e-10
+3193 1102  380.3377222484216
+3201 1102 -6.74794720101346e-10
+903 1103  0.08312708337804624
+904 1103  0.4418790781503563
+1103 1103  1.988830230508456
+1104 1103 -1055.381615892587
+1146 1103 -364.3783473802901
+1162 1103 -81.90222277522484
+1308 1103 -0.1224846354050169
+1552 1103  0.5226757781751344
+1553 1103 -0.8243769016067968
+1796 1103 -0.3702357374009728
+1797 1103 -0.09553894960349026
+1798 1103  0.1225743034582106
+2041 1103 -1.662852877071636
+2042 1103 -0.6062893774873906
+2043 1103  0.8830238932831251
+3203 1103 -1.756064599289786e-10
+3205 1103  232.4068773723512
+3207 1103 -3.473398027531438e-10
+3493 1103  0.01772566600763419
+3585 1103 -0.008284771852577529
+904 1104 -2.198584419259259
+1103 1104 -9.91214250431854
+1104 1104  5260.490321110647
+1796 1104  1.842120980065284
+2041 1104  8.287501645404546
+3203 1104 -8.754584834846924e-10
+3205 1104 -1055.381615892587
+3207 1104  1.714809627628711e-09
+905 1105  4.67810007016215
+906 1105  21.96944745588132
+1046 1105  6.86425229593742
+1105 1105  61006.30037132353
+1106 1105 -17000.75242245883
+1199 1105 -3998.718775286863
+1246 1105 -2530.254860117219
+1310 1105 -1.783496057488212
+1311 1105 -1.508511143996298
+1312 1105 -1.075929273318226
+1555 1105 -11.04480381088002
+1556 1105 -9.134818941626229
+1557 1105 -6.350942033760408
+1799 1105 -3.733990098942753
+1800 1105  1.769775574269363
+1801 1105  1.454256781217253
+1802 1105  1.197241948885333
+2044 1105 -17.53117282921072
+2045 1105  11.09041831683827
+2046 1105  8.960689765455296
+3215 1105  9.628000022243732e-09
+3217 1105  5.783544754223158e-09
+3230 1105 -0.01117562264376104
+3687 1105  0.03052418138322882
+3689 1105 -1483.144735189752
+3789 1105 -0.0186874934802756
+905 1106 -1.631607376128249
+906 1106 -7.704273779899018
+1105 1106 -17000.75242245883
+1106 1106  5963.802258365807
+1799 1106  1.302327940379904
+2044 1106  6.147850850098586
+3215 1106 -3.411716797963038e-09
+3217 1106  2.02872774135443e-09
+907 1107  2.142516119134805
+908 1107  13.4599818919095
+1029 1107  4.624502996452096
+1091 1107  5.25881232878616
+1107 1107  14882.42601010204
+1108 1107 -7371.921873768852
+1222 1107 -1364.595019358008
+1229 1107 -4056.761237014
+1291 1107 -1886.24602540587
+1314 1107 -0.5705865186070875
+1315 1107  0.83228047449342
+1316 1107 -1.049370725032446
+1317 1107 -0.8572328018230927
+1559 1107 -3.927743397133805
+1561 1107 -7.445261240507539
+1562 1107 -5.317465612935973
+1803 1107 -1.645419760215324
+1804 1107  0.5798036087807967
+1805 1107 -0.7224565093335877
+1806 1107  1.078904268217582
+1807 1107  0.8384291813617976
+2048 1107 -10.37596843446376
+2049 1107  4.354207390518346
+2050 1107 -4.029548223983352
+2051 1107  7.684265898844557
+3231 1107  6.66961486039952e-09
+3233 1107  4.15408552001395e-09
+3241 1107  0.1267923382078056
+3250 1107 -0.02996950125612036
+3912 1107 -0.1107503777044012
+3984 1107 -389.1035774035147
+3988 1107 -0.006163960734533347
+907 1108 -1.573116856290268
+908 1108 -10.04545533443596
+1107 1108 -7371.921873768852
+1108 1108  5505.760289180535
+1803 1108  1.208147660144721
+2048 1108  7.743774096250811
+3231 1108 -5.105909028912947e-09
+3233 1108  3.102507639685115e-09
+909 1109  1.939877143225896
+910 1109  12.05955198419484
+911 1109  3.125336311573782
+1109 1109  31458.39852646493
+1110 1109 -12122.78394997258
+1111 1109  17.05859243350098
+1113 1109 -373.7142851765856
+1191 1109 -1100.812974624583
+1256 1109 -1108.54432072992
+1320 1109 -0.8274104651385586
+1321 1109 -0.4348953511661833
+1322 1109  0.6948742967493817
+1565 1109 -5.271819109375664
+1566 1109 -2.730241540721851
+1567 1109  4.127509889624091
+1808 1109 -1.510326401442479
+1809 1109 -2.417889075233369
+1810 1109  0.9005554601895391
+1811 1109  0.4484995047183518
+1812 1109 -0.6744651058693761
+2053 1109 -9.357216344456917
+2054 1109 -13.15767006303645
+2055 1109  6.213237251680835
+2056 1109  2.709099303685861
+2057 1109 -4.187820272924031
+3251 1109 -2.143182423197665e-09
+3253 1109 -1.136426674985813e-09
+3255 1109 -1.360478396605913e-09
+3257 1109 -2041.180500096246
+3283 1109  0.1159831428547418
+3615 1109 -0.1501368826175595
+3737 1109 -0.1725061393317366
+909 1110 -1.676593698201225
+910 1110 -10.42375234850035
+1109 1110 -12122.78394997258
+1110 1110  10478.49399500862
+1808 1110  1.305342318696957
+2053 1110  8.087970926761525
+3251 1110  1.838456387126541e-09
+3253 1110 -9.82260162096793e-10
+911 1111 -0.4116790582017967
+1109 1111 -2041.180500096246
+1111 1111 -2.248550885624917
+1809 1111  0.318491909188027
+2054 1111  1.734357075949347
+3251 1111  2.79507181288885e-10
+3255 1111 -1.79330175709147e-10
+3257 1111  269.0653913962274
+912 1112  0.2786502814775124
+1112 1112  1.330625120766269
+1113 1112 -110.3564781742909
+1219 1112 -737.3345443487744
+1247 1112 -3490.231801384597
+1323 1112  0.4647968875266144
+1324 1112 -0.3518552487923934
+1325 1112 -0.3153990132372302
+1568 1112  8.078759484940766
+1569 1112 -2.973502502756232
+1570 1112 -1.846682900076352
+1813 1112 -0.461579540307102
+1814 1112  0.3454461800752524
+2058 1112 -14.71609250303336
+2059 1112  3.831383234547688
+3280 1112 -1.066833821630753e-08
+3282 1112  1009.561156508884
+3283 1112 -0.08371936137436782
+3700 1112 -0.03239710441769657
+3909 1112  0.1059721002756369
+913 1113  1.411694129026913
+914 1113  7.598376411100657
+1109 1113 -373.7142851765856
+1113 1113  4570.0460752516
+1114 1113 -6031.745054983595
+1320 1113  0.4961924573567638
+1323 1113 -0.07766092191744503
+1565 1113  2.593920292764252
+1568 1113 -5.536155174462214
+1810 1113 -0.5405511595669753
+1813 1113  0.07749102351241138
+1816 1113 -1.181461144083559
+2055 1113 -3.058452409713802
+2058 1113  10.08058940955139
+2061 1113 -6.356847459918975
+3251 1113 -0.05042820426988697
+3280 1113  0.1005159371417935
+3282 1113 -110.3564781742909
+3283 1113 -2.733038906299612e-09
+3285 1113 -1.039897890020569e-09
+913 1114 -1.953828666208752
+914 1114 -10.63017727165662
+1113 1114 -6031.745054983595
+1114 1114  8443.178913538493
+1816 1114  1.635191861604235
+2061 1114  8.893256781914742
+3283 1114  3.723713781766946e-09
+3285 1114 -1.45571832366187e-09
+915 1115  1.03698725022598
+916 1115  0.8291504643489213
+917 1115  1.087917687407729
+918 1115  5.206454265535194
+919 1115  1.203896198092496
+1007 1115 -0.3073032073380307
+1115 1115  4993.88507726691
+1116 1115  6.060870599331241
+1117 1115  6.108025865977322
+1118 1115 -4384.67503884735
+1119 1115  7.161691500726718
+1207 1115 -1.924123739016748
+1331 1115  0.3657690249134106
+1332 1115 -0.3247977981682151
+1576 1115  2.364021463492015
+1577 1115 -2.071401609337344
+1817 1115 -0.6273802630392492
+1818 1115 -0.8229776380829138
+1819 1115 -0.7835532341187602
+1820 1115 -0.9058789578950869
+1822 1115  0.3847781687309847
+2062 1115 -4.565490514889956
+2063 1115 -4.599499980584284
+2064 1115 -3.916354844917197
+2065 1115 -5.367583074737629
+2067 1115  2.413229394356441
+3287 1115  8.336279444520756e-10
+3289 1115 -1890.901065005403
+3291 1115  6.706460387029267e-10
+3293 1115 -2560.524032393198
+3295 1115  6.983447865122061e-10
+3297 1115  6.439443422934232e-10
+3299 1115  8.847848009807535e-10
+3301 1115 -757.4364593022556
+3304 1115 -0.007809535337338039
+3306 1115 -138.288761778333
+3680 1115  0.007667951331426184
+3681 1115 -133.9669120329914
+916 1116 -1.764966808023946
+1115 1116 -1890.901065005403
+1116 1116 -12.90519241544406
+1817 1116  1.335469727403376
+2062 1116  9.721133715560624
+3287 1116  1.775087188349289e-09
+3289 1116  4026.276169403554
+3295 1116 -1.486597012156921e-09
+917 1117 -1.92561216315536
+1115 1117 -2560.524032393198
+1117 1117 -10.81324405966066
+1818 1117  1.456668801268975
+2063 1117  8.142649836749401
+3291 1117  1.18738463505963e-09
+3293 1117  4533.028597823411
+3295 1117 -1.235842539415444e-09
+915 1118 -3.079285624558723
+918 1118 -15.46450544545504
+1115 1118 -4384.67503884735
+1118 1118  13023.81664536163
+1819 1118  2.326725060942811
+2064 1118  11.63257903923041
+3295 1118 -2.074367566606838e-09
+3297 1118  1.912650926527704e-09
+919 1119 -0.8504213801855677
+1115 1119 -757.4364593022556
+1119 1119 -5.066758450128532
+1820 1119  0.6399047897114686
+2065 1119  3.797461282443998
+3295 1119 -4.957688704010366e-10
+3299 1119  6.258814577719818e-10
+3301 1119  535.9109027118735
+920 1120 -2.03036538771662
+921 1120  2.135100351227087
+922 1120  2.343665408910121
+1120 1120  15126.50868286488
+1121 1120  10.46247277613657
+1122 1120  9.800756234215104
+1125 1120 -1542.655870494157
+1256 1120 -437.5832889362574
+1335 1120 -0.3615308982581444
+1336 1120  0.3927812825996569
+1581 1120  2.59903491355898
+1823 1120 -1.698663037724611
+1824 1120 -1.861641814867798
+1825 1120  0.3604726785054282
+1826 1120 -0.3918426939236055
+2068 1120 -8.287409620934962
+2069 1120 -7.751411372933291
+2070 1120  2.026270799843273
+2071 1120 -2.553508280303047
+3311 1120 -1.249107484113665e-10
+3313 1120 -1576.39846137627
+3315 1120 -1.131374993690315e-10
+3317 1120 -2196.123797446057
+3321 1120 -1.795405820542717e-10
+3339 1120 -0.001668075641436621
+3615 1120  0.002369883025423705
+921 1121 -0.5068532103322505
+1120 1121 -1576.39846137627
+1121 1121 -2.484458992001519
+1823 1121  0.4032470041467199
+2068 1121  1.967960133569293
+3311 1121 -2.969423318344155e-11
+3313 1121  374.3454068452776
+3321 1121  4.188971391982932e-11
+922 1122 -0.6385059697814306
+1120 1122 -2196.123797446057
+1122 1122 -2.671248407940532
+1824 1122  0.5071839300131559
+2069 1122  2.112688522497416
+3315 1122 -3.082351041072684e-11
+3317 1122  598.5841054947239
+3321 1122  4.778644147052091e-11
+923 1123 -3.671971040472746
+924 1123  2.943851431944534
+1123 1123  11317.64471455518
+1124 1123  13.89790436354166
+1162 1123 -581.8283744128495
+1297 1123 -808.5357239496374
+1338 1123 -0.570678276525063
+1339 1123  0.6120165062645778
+1584 1123  3.103677962152558
+1827 1123 -2.470752260998025
+1828 1123  0.5736363845700033
+1829 1123 -0.6120093889121652
+2072 1123 -11.59313057596244
+2073 1123  3.920428096681185
+2074 1123 -3.348978833033903
+3327 1123 -1.296841745102029e-09
+3329 1123 -1762.330535576032
+3333 1123 -2.446954222579123e-09
+3585 1123  0.05713966978684382
+4126 1123 -0.05446009703752624
+924 1124 -0.4953533182334247
+1123 1124 -1762.330535576032
+1124 1124 -2.339205561498814
+1827 1124  0.4157462993400855
+2072 1124  1.951280910683752
+3327 1124 -2.182206637613149e-10
+3329 1124  296.6295776145759
+3333 1124  4.111979645671227e-10
+920 1125  13.1085584275136
+925 1125  4.915187326484495
+926 1125  31.10618111938879
+1120 1125 -1542.655870494157
+1125 1125  196426.9299350627
+1126 1125 -57716.35466464341
+1191 1125 -3056.34146169142
+1256 1125 -1959.500873940213
+1335 1125  2.348697674884534
+1341 1125 -1.038181523195801
+1342 1125 -1.475513352129115
+1586 1125 -6.405420033831049
+1587 1125 -9.211238697688215
+1825 1125 -2.341822696956453
+1830 1125 -3.892710312405375
+1831 1125  1.045469772476363
+1832 1125  1.483623786920446
+2070 1125 -13.08212862081111
+2075 1125 -24.49836116910814
+2076 1125  6.57456188741837
+2077 1125  9.387198805594265
+3321 1125  0.00637055249229182
+3339 1125 -2.524945641457776e-09
+3341 1125 -7.775043942004345e-10
+3615 1125  0.037823480857684
+3737 1125 -0.1090224204334647
+925 1126 -1.601102874759421
+926 1126 -10.14088311419212
+1125 1126 -57716.35466464341
+1126 1126  18816.79538770899
+1830 1126  1.268035068691731
+2075 1126  7.98667681451962
+3339 1126  7.921518996312216e-10
+3341 1126 -2.532467013871553e-10
+927 1127  3.674495501266763
+928 1127  15.40703792650803
+1038 1127  0.8518603849189976
+1127 1127  31713.64746882482
+1128 1127 -14377.54609320034
+1222 1127 -2406.24952449994
+1238 1127  5.987378019002024
+1344 1127  0.7456668279970676
+1345 1127 -0.7996212717110843
+1589 1127  5.42541555256465
+1590 1127 -5.319291135044536
+1833 1127 -3.071721810536903
+1834 1127 -0.8026522732803606
+2078 1127 -12.91543154405716
+2079 1127 -5.499147013641394
+3351 1127  7.788518240070541e-09
+3353 1127  3.886522659257707e-09
+3360 1127  0.3372983249793092
+3362 1127 -939.8659979062598
+3912 1127 -0.2217680093153695
+927 1128 -1.79658977637768
+928 1128 -7.62281482571927
+1127 1128 -14377.54609320034
+1128 1128  7119.152105525926
+1833 1128  1.501892209348857
+2078 1128  6.390044080050587
+3351 1128 -3.969049866547891e-09
+3353 1128  1.924533976627174e-09
+929 1129  1.299439778554684
+930 1129  7.102528425181987
+931 1129  1.541080820735221
+1075 1129 -2.402169826049755
+1129 1129  4281.024441336025
+1130 1129 -2689.233659020882
+1131 1129  9.939184679247292
+1275 1129 -406.3245095104962
+1290 1129 -413.568512787767
+1348 1129  0.3879503154264504
+1349 1129  0.4769040583265569
+1593 1129  2.623574290306916
+1594 1129  2.673553644466764
+1836 1129 -1.042770960437875
+1837 1129 -1.227391380982957
+1838 1129 -0.3704848889591102
+1839 1129 -0.4546264934490209
+2081 1129 -5.662448967730832
+2082 1129 -7.872866521897349
+2084 1129 -2.601348954389372
+3363 1129  2.237529952986428e-10
+3365 1129  1.705862118228652e-10
+3367 1129  1.983945230321638e-10
+3369 1129 -815.4874040806222
+3374 1129  0.06750505025100521
+3869 1129 -0.04174380521665495
+929 1130 -1.66026707237404
+930 1130 -9.076322024257928
+1129 1130 -2689.233659020882
+1130 1130  3436.605615427473
+1836 1130  1.332326685534424
+2081 1130  7.236044290225851
+3363 1130 -2.862770021039296e-10
+3365 1130  2.182460878685788e-10
+931 1131 -0.6495975312525708
+1129 1131 -815.4874040806222
+1131 1131 -4.19974615976149
+1837 1131  0.517371091673868
+2082 1131  3.32663494902817
+3363 1131 -9.747780360669367e-11
+3367 1131  8.381051408434814e-11
+3369 1131  344.6159392533137
+932 1132 -6.619137619636859
+933 1132  5.296182902592481
+1035 1132  1.726617122079207
+1132 1132  62923.98343331431
+1133 1132  31.57511354793287
+1152 1132 -8436.593128476583
+1235 1132  9.780829511014637
+1351 1132 -1.001383316318819
+1352 1132 -1.624449345440718
+1597 1132 -9.45015732593915
+1840 1132 -4.429008880591571
+1841 1132  1.001723462128635
+2085 1132 -26.39354744654073
+2086 1132  7.035896378794858
+3379 1132  3.572777296234619e-09
+3381 1132 -4227.557930025499
+3385 1132  5.218769388859457e-09
+3388 1132  0.02036700218743104
+3390 1132 -1557.206156923414
+3510 1132 -0.01738224984325668
+933 1133 -0.4107380900624172
+1132 1133 -4227.557930025499
+1133 1133 -2.46437663093789
+1840 1133  0.3434864457301403
+2085 1133  2.059964500670208
+3379 1133  2.789826286653252e-10
+3381 1133  330.0515399065575
+3385 1133 -4.171099299288272e-10
+934 1134 -1.077545589280008
+935 1134  0.9789296151027944
+936 1134  0.8060051230938539
+981 1134 -0.2169526291382712
+1134 1134  3.935792415055033
+1135 1134  5.52827321690606
+1136 1134 -1857.584300437281
+1156 1134 -302.9494469047443
+1181 1134 -117.6376812741665
+1355 1134  0.1787660967132971
+1356 1134  0.227902895301987
+1600 1134  1.195222696531314
+1601 1134  1.651751732869493
+1843 1134 -0.7864238954796325
+1844 1134 -0.6434239789652723
+1845 1134 -0.1720286735101973
+2088 1134 -4.426572750783254
+2089 1134 -3.141533746260626
+2091 1134 -1.499487001255235
+3391 1134  1.261234450211646e-10
+3393 1134 -353.7743879495256
+3395 1134  1.136628013931329e-10
+3397 1134  1190.15783474961
+3402 1134  1.578068994922965e-10
+3404 1134  0.01332766609263919
+3539 1134 -0.01449250614827455
+935 1135 -0.6272414912186035
+1135 1135 -3.546001048357712
+1843 1135  0.5038949949918273
+2088 1135  2.839337127425348
+3391 1135  8.093323233815397e-11
+3393 1135  226.9337877865829
+3397 1135 -353.7743879495256
+3402 1135 -1.025071694193969e-10
+936 1136 -2.504819998846709
+1134 1136 -12.33236152279313
+1136 1136  5823.291649924624
+1844 1136  1.999577413517036
+2089 1136  9.843630982732011
+3395 1136  3.563296324671228e-10
+3397 1136 -1857.584300437281
+3402 1136 -5.515803369604555e-10
+937 1137  3.764148086597181
+938 1137  21.66729182498108
+983 1137  1.032890475327049
+1137 1137  40628.79353938391
+1138 1137 -11296.78734766712
+1257 1137 -2657.776956602856
+1358 1137 -0.9698287367725562
+1359 1137  0.9047099666389939
+1603 1137 -7.297572934511336
+1604 1137  6.45187659773848
+1847 1137 -3.132558998302706
+1849 1137 -0.9050341677071546
+2092 1137 -18.12681824855953
+2093 1137  7.818158304523587
+2094 1137 -6.340117021540678
+3407 1137  7.386056946612207e-09
+3409 1137  3.839734308286324e-09
+3412 1137 -0.03261242339132064
+3567 1137  0.03229296680959662
+3682 1137 -1491.12999571951
+937 1138 -1.150203221975869
+938 1138 -6.624244633334891
+1137 1138 -11296.78734766712
+1138 1138  3453.806461467711
+1847 1138  0.9572098200987793
+2092 1138  5.541831397222044
+3407 1138 -2.261370646294836e-09
+3409 1138  1.173779629048965e-09
+939 1139  0.7236931569492924
+1139 1139  11644.30540653898
+1198 1139 -2367.012469682032
+1261 1139 -12834.35703122456
+1278 1139 -2144.627198362258
+1360 1139 -1.335544408289586
+1361 1139 -0.8646099843727195
+1362 1139 -1.017234363558991
+1605 1139 -8.252392785519895
+1606 1139 -6.168162236656806
+1607 1139 -5.52169483921999
+1850 1139  1.240138484038131
+1852 1139  0.9696341561279327
+2095 1139  7.855616270644008
+2096 1139  5.42738844602399
+2097 1139  5.270632236356677
+3424 1139  1.010988515126598e-10
+3782 1139 -0.07397943535962337
+4065 1139 -0.2141845297361449
+4125 1139  0.250244042122215
+940 1140 -9.766438076143141
+1079 1140 -1.450543140636408
+1140 1140  22000.33514768517
+1279 1140 -8495.362797148979
+1363 1140  2.259667890544613
+1364 1140  1.274050971060097
+1608 1140  14.10655573046575
+1609 1140  8.932423077614509
+1853 1140 -2.270634095598604
+2098 1140 -14.60105738105317
+3436 1140 -0.1493139770588935
+3438 1140  4.42159031055489e-08
+3447 1140  0.1330724731424416
+3449 1140 -821.5346664247968
+941 1141  9.000659918292111
+982 1141  2.430336628269451
+1030 1141 -1.646756874307934
+1141 1141  15635.12232700705
+1182 1141 -15.87534314164886
+1230 1141 -7111.276237605085
+1365 1141 -2.430429941692298
+1611 1141 -9.784170471621888
+1856 1141  1.633130299287086
+2100 1141  16.87912030479092
+3440 1141 -0.447186593064992
+3441 1141 -1279.615417488558
+3443 1141  0.5333079621201936
+3446 1141  1.675542971035782e-08
+942 1142 -0.1942141594406955
+1140 1142 -821.5346664247968
+1142 1142 -1.236746782381144
+1298 1142 -805.5840716483974
+1363 1142 -0.1988363546578532
+1608 1142 -1.328711904185037
+1853 1142  0.1998547861173985
+1857 1142  0.1937305440085906
+2098 1142  1.375232541596462
+2102 1142  1.210760367602043
+3438 1142 -0.009980420534666863
+3447 1142  6.210807736162227e-09
+3449 1142  144.3524370915675
+3755 1142  0.01120427847728563
+943 1143  1.291757288849152
+1143 1143  12437.4590338889
+1144 1143 -378.1159720077119
+1153 1143 -3746.645778230619
+1200 1143 -2502.882899625627
+1285 1143 -3361.290884691284
+1368 1143  0.4635977544092252
+1369 1143  1.302765765741359
+1371 1143 -0.7574396769710391
+1613 1143  12.40932627072254
+1614 1143  6.98481666623176
+1615 1143  7.534250103970887
+1616 1143 -5.957774276479973
+1858 1143 -0.4445531462354627
+1859 1143 -1.381540417492638
+1860 1143 -1.364938725708047
+1861 1143  0.7694060268190259
+2103 1143 -17.80142727045091
+2104 1143 -8.259223745110498
+2105 1143 -8.559591467156073
+2106 1143  7.695785971397996
+3459 1143 -1.724665529834191e-08
+3467 1143 -0.5468169676527013
+3521 1143 -0.02941992453676529
+3522 1143 -0.02992823809043116
+4145 1143  0.5739644134156838
+944 1144  1.435114498189556
+1143 1144 -378.1159720077119
+1144 1144  6109.88089831127
+1159 1144 -1783.747510767209
+1220 1144 -3014.351946629534
+1368 1144 -0.2370748743210307
+1373 1144  0.9151036757311578
+1613 1144 -16.36104222451913
+1617 1144  8.28258643358139
+1618 1144  5.88454311618508
+1858 1144  0.2279035771985117
+1862 1144 -1.347826856887033
+1863 1144 -0.9121228986991278
+2103 1144  23.46700243272763
+2107 1144 -8.289596312603999
+2108 1144 -5.695361538067679
+3459 1144  0.4289809294086663
+3467 1144 -3.380368229777275e-08
+3565 1144  0.1506769620921335
+3902 1144 -1.072294506906227
+945 1145 -4.380699190373416
+1145 1145  5905.742521728604
+1152 1145 -6698.294790960239
+1164 1145 -6021.151990444164
+1374 1145 -0.8709148373434052
+1375 1145 -0.7158770103681851
+1619 1145 -5.468004288207832
+1864 1145  0.8161899275134221
+1865 1145  0.5731872346895394
+2109 1145  5.008647029629411
+2110 1145  3.606200291493431
+3481 1145  1.768079509190112e-09
+3510 1145  0.01703428079016718
+3597 1145 -0.05970019195980614
+903 1146 -2.361641607385089
+946 1146  7.290551754845374
+947 1146  0.3710219555670884
+948 1146  0.4116155869533685
+949 1146  0.240725896352511
+950 1146  0.4501210912144902
+951 1146  0.160517360987844
+1146 1146  22790.00297408586
+1147 1146  9.774567204299803
+1148 1146  10.34202414091307
+1149 1146 -1152.033413949009
+1150 1146  10.63405779705095
+1151 1146 -1453.071481978205
+1381 1146 -1.606503189940934
+1552 1146 -16.60084760747767
+1625 1146  5.696313853321982
+1626 1146 -10.23123479260624
+1797 1146  2.713143028094987
+1866 1146 -0.2840579403070753
+1867 1146 -0.3163234064086289
+1868 1146 -0.183873148952859
+1869 1146 -0.3458178949312415
+1870 1146 -0.1213287753319718
+1871 1146  1.963053376342537
+2042 1146  19.25433639519875
+2111 1146 -7.612750342076007
+2112 1146 -8.044573973054565
+2113 1146 -5.670365588613417
+2114 1146 -8.266934236741347
+2115 1146 -4.640371833668683
+2116 1146  11.40396313242804
+3205 1146 -364.3783473802901
+3207 1146 -0.1213569242584619
+3483 1146 -8.043442764615882e-08
+3485 1146 -280.504956305503
+3487 1146 -6.474805136491568e-08
+3489 1146 -409.6989748006552
+3491 1146 -7.593639771152905e-08
+3493 1146 -2.906373852695765e-08
+3495 1146 -6.107598557458793e-08
+3497 1146 -440.1909901329221
+3499 1146 -1.652445292266158e-07
+3621 1146 -1575.319815998426
+3622 1146  0.2145362022621393
+947 1147 -0.29728149214413
+1146 1147 -280.504956305503
+1147 1147 -7.972830198709099
+1866 1147  0.2275648264341223
+2111 1147  6.210600761683166
+3483 1147 -6.438807310937822e-08
+3485 1147  226.8844736590602
+3493 1147  2.273733985119764e-08
+948 1148 -0.3482611836492402
+1146 1148 -409.6989748006552
+1148 1148 -8.870545870477601
+1867 1148  0.2676108406262902
+2112 1148  6.900692384581894
+3487 1148 -5.474532364935225e-08
+3489 1148  347.8208021266269
+3493 1148  2.421638262140036e-08
+946 1149 -32.25568801983241
+949 1149 -1.043634220767106
+1146 1149 -1152.033413949009
+1149 1149  5102.108289106684
+1868 1149  0.7969897672034445
+2113 1149  25.09335121359859
+3491 1149 -3.288068006757072e-07
+3493 1149  1.231334647994187e-07
+950 1150 -0.3623546349676862
+1146 1150 -440.1909901329221
+1150 1150 -8.671639188939132
+1869 1150  0.278365585734024
+2114 1150  6.741962214906343
+3493 1150  2.303789606306061e-08
+3495 1150 -4.913775505932527e-08
+3497 1150  355.3102213177701
+951 1151 -0.998072574886444
+1146 1151 -1453.071481978205
+1151 1151  3012134.630666946
+1625 1151 -36.48745359925508
+1870 1151  0.7540610774154664
+2115 1151  29.73823906985804
+3493 1151  1.743433537815253e-07
+3499 1151 -1.026074838983426e-06
+932 1152  12.86842858727532
+952 1152  7.392636495600143
+1132 1152 -8436.593128476583
+1145 1152 -6698.294790960239
+1152 1152  47801.86267389975
+1164 1152 -14322.89285310626
+1351 1152  2.237203602157426
+1374 1152  1.89078858943646
+1382 1152 -1.329774596250828
+1619 1152  12.47290766139074
+1627 1152 -8.235239513230283
+1841 1152 -2.235862100853107
+1864 1152 -1.771758321773721
+1872 1152  1.136378995889816
+2086 1152 -13.68121522319965
+2109 1152 -11.42535283239911
+3385 1152  0.01961434172631453
+3481 1152 -0.05125570147098663
+3510 1152  6.841825883618924e-09
+3597 1152  0.01861759887113656
+953 1153  7.683291575595472
+979 1153 -0.8271158196402747
+1000 1153 -7.424743250141112
+1143 1153 -3746.645778230619
+1153 1153  15211.32763782078
+1179 1153 -1587.164328722295
+1200 1153 -4741.586008200146
+1210 1153 -2001.015904640132
+1226 1153 -1538.578415888683
+1369 1153 -0.836906752491439
+1383 1153  0.4589386576308901
+1384 1153  0.9130715613792872
+1385 1153 -1.159052407654222
+1386 1153 -0.95405564531345
+1387 1153 -1.164584670469045
+1614 1153 -5.664043368783884
+1628 1153  8.639460196076747
+1629 1153  5.326644585481059
+1631 1153 -5.830059675395175
+1632 1153 -6.959432015872205
+1859 1153  0.8886397238365555
+1873 1153 -0.390921429635482
+1875 1153  1.167217498441967
+1876 1153  0.9758265066742177
+1877 1153  1.194200001945778
+2104 1153  6.694072816259766
+2118 1153 -8.030380636400102
+2119 1153 -5.221321173922546
+2121 1153  5.406317175231911
+2122 1153  6.502617587410443
+3459 1153  0.02793990583526351
+3516 1153  0.0247054716909727
+3521 1153  0.01312118441246557
+3522 1153 -9.502885811585315e-09
+3531 1153 -0.2507418162491978
+3533 1153 -125.4130424229494
+3865 1153  0.04849641452178344
+3944 1153  0.02710898025635339
+954 1154 -0.7102443269723129
+1153 1154 -125.4130424229494
+1154 1154 -4.291561368908504
+1201 1154 -2778.601465176169
+1383 1154 -0.3159937452580066
+1628 1154 -7.316230054006878
+1873 1154  0.269234075312335
+1878 1154  0.6375281091348801
+2118 1154  6.800195871908971
+2123 1154  3.814598094334272
+3522 1154  0.08766147326692564
+3531 1154 -1.802922966476217e-08
+3533 1154  1497.088366431581
+3812 1154 -0.1257509427677388
+955 1155 -0.2560164048234211
+1101 1155 -176.0078568125278
+1155 1155 -1.53916732182028
+1257 1155 -553.1419874124856
+1304 1155 -0.3265396605978775
+1549 1155 -2.057848389166554
+1794 1155  0.3055666271798853
+1879 1155  0.2373539075192858
+2039 1155  1.985217529071911
+2124 1155  1.414753666929491
+3201 1155 -0.001782981198905943
+3535 1155  3.591110811496101e-09
+3537 1155  160.8704183559445
+3567 1155  0.002925901806241575
+934 1156  14.8446566737469
+956 1156  1.446175075669395
+1156 1156  12519.61891501326
+1193 1156 -4247.139494515291
+1275 1156 -1669.250794817422
+1355 1156 -2.83581958505017
+1391 1156  1.341321871020527
+1600 1156 -16.46922548582782
+1635 1156  9.606152975975975
+1636 1156  8.638251497123527
+1845 1156  2.730179802550922
+1880 1156 -1.34371595571893
+1881 1156 -1.344209358632625
+2125 1156 -8.318029485848061
+2126 1156 -8.759308364969019
+3374 1156 -0.1018754816584715
+3397 1156 -302.9494469047443
+3402 1156  0.08549340419337453
+3539 1156 -1.694234696980601e-09
+3730 1156 -0.08343546006029678
+957 1157  5.222317454495946
+1017 1157  5.434176163677479
+1037 1157  0.8325741162821748
+1157 1157  2967.843774350639
+1217 1157 -5.697020217678423
+1237 1157 -1876.369922915853
+1392 1157 -1.058339009138057
+1393 1157 -0.8311703023259965
+1638 1157 -4.937009585402042
+1882 1157  1.056354509649101
+3549 1157 -739.1349825417736
+3550 1157 -0.09826669971503904
+3552 1157  0.0898090992568592
+3554 1157  1.653187683969826e-08
+958 1158 -1.618511483884628
+1032 1158 -10.84726586818638
+1158 1158  17698.10903710123
+1232 1158 -3393.363501279813
+1394 1158 -1.955210228146363
+1395 1158  1.409323205256857
+1639 1158 -11.53240152096248
+1640 1158  9.858875177032653
+1884 1158  1.946171184010313
+2129 1158  11.05483594732497
+3560 1158  4.097325129182039e-08
+3562 1158  0.0918804888517398
+3757 1158 -2011.875143802051
+3758 1158 -0.09853840405478623
+944 1159 -0.7084294249201389
+959 1159 -4.963811290770392
+1144 1159 -1783.747510767209
+1159 1159  2103.614864226924
+1201 1159 -1986.152622069543
+1396 1159 -0.8140452921845157
+1617 1159 -4.70567772590082
+1862 1159  0.6658399640468106
+1886 1159  0.7270941854498505
+2107 1159  4.70876580251244
+2131 1159  4.341750101297229
+3467 1159 -0.1049347854825309
+3565 1159 -1.332591936797911e-08
+3812 1159  0.07786890260343965
+960 1160 -0.1954534815545992
+1057 1160  0.1819988190111484
+1160 1160 -1.116117521563536
+1257 1160 -449.6118328163615
+1258 1160 -1094.71109774519
+1398 1160  0.1483181477839719
+1642 1160  1.503671479436259
+1643 1160  1.098045296521147
+1888 1160 -0.1580808975787135
+2132 1160 -1.617273344051984
+3567 1160 -0.008922953284883881
+3568 1160  3.964404998724191e-09
+3574 1160  135.3183882525767
+4047 1160  0.01834072349012509
+961 1161 -5.531733877787849
+1082 1161  1.188366015048527
+1161 1161  6522.078787078268
+1164 1161 -4876.157036800986
+1282 1161 -1.1983605947858
+1399 1161 -0.7691880733918308
+1645 1161 -9.698019033406775
+1889 1161  0.6268912276704567
+2134 1161  4.552179963964262
+2135 1161  9.673953645810657
+3577 1161  3.124692560252385e-09
+3579 1161 -428.7203655478105
+3580 1161 -0.4707524526938228
+3597 1161  0.8923905638513359
+923 1162  4.779511398382989
+962 1162  3.094186181666295
+963 1162 -0.1874965313805473
+1123 1162 -581.8283744128495
+1162 1162  1534.288258985061
+1163 1162 -320.1999206314696
+1308 1162  1.309709728249127
+1338 1162  0.8523919363373601
+1553 1162  7.353509965567679
+1798 1162 -1.31124216056868
+1828 1162 -0.856404463517094
+1891 1162  0.1209829834408293
+2043 1162 -7.879175491863245
+2073 1162 -5.103865209233088
+2136 1162 -3.100342937307653
+3205 1162 -81.90222277522484
+3207 1162  0.03440530029256823
+3333 1162 -0.0301506816774844
+3583 1162 -5.457841986689083e-08
+3585 1162 -1.126953350777393e-08
+962 1163 -22.22398035275692
+963 1163 -2.513198853349591
+1162 1163 -320.1999206314696
+1163 1163  2301.723012541816
+1891 1163  1.622962910510173
+2136 1163  22.19095820062608
+3583 1163 -3.92008150940093e-07
+3585 1163  1.889614750227864e-07
+945 1164  8.887311933312343
+952 1164 -6.574874411640814
+961 1164  7.397606534928237
+964 1164  5.50164767601109
+965 1164  4.32562955086895
+966 1164  0.08216399590560011
+967 1164  0.2283230594027073
+968 1164  0.1369008468551218
+969 1164  0.2495784622735603
+970 1164  0.290360813562731
+971 1164  0.2916736644580288
+1056 1164 -1.244478079620031
+1145 1164 -6021.151990444164
+1152 1164 -14322.89285310626
+1161 1164 -4876.157036800986
+1164 1164  61580.33486141656
+1165 1164 -16602.87934456047
+1166 1164 -4993.647022170953
+1167 1164 -16923.00885187064
+1168 1164 -7374.062406205457
+1169 1164 -13719.0325480256
+1170 1164 -12329.82642671017
+1171 1164 -2229.202872308585
+1256 1164 -7120.652080719003
+1375 1164  1.53391330281752
+1382 1164  1.309395068074605
+1399 1164  1.084345240019609
+1402 1164  0.6351578870039599
+1627 1164  7.323453369206338
+1648 1164  5.774615805405687
+1650 1164  5.345308539788538
+1651 1164  6.482098577577909
+1652 1164  6.760502321679454
+1653 1164  7.136143023604448
+1654 1164 -8.209962281569412
+1865 1164 -1.227967849405166
+1872 1164 -1.118335669994368
+1889 1164 -0.8831818088721408
+1892 1164 -0.4937492055719114
+1893 1164 -0.06363192359750822
+1894 1164 -0.1784848649491971
+1895 1164 -0.1066451828453128
+1896 1164 -0.1950409950189936
+1897 1164 -0.2270292150903642
+1898 1164 -0.227785867490716
+1899 1164  1.380302330026541
+2110 1164 -7.316287484026701
+2134 1164 -6.087474676626123
+2137 1164 -3.280051421621569
+2138 1164 -3.234138467376079
+2139 1164 -4.008535209606107
+2140 1164 -3.486179794654417
+2141 1164 -4.729287128522186
+2142 1164 -4.991429629225376
+2143 1164 -5.072850637376669
+2144 1164  9.204827106431074
+3481 1164  0.04539053030042879
+3510 1164 -0.004704297928304631
+3577 1164 -0.6070648610656701
+3589 1164 -3.308903712140321e-08
+3591 1164 -2.257490022694597e-07
+3595 1164 -6.077346655086746e-08
+3597 1164 -3.875374576800539e-09
+3599 1164 -1.333883210691278e-07
+3603 1164 -4.7262267141579e-08
+3607 1164 -4.1833332076191e-08
+3611 1164 -9.238899598140726e-08
+3615 1164  0.4594619270121967
+965 1165 -19.24637451834769
+1164 1165 -16602.87934456047
+1165 1165  73730.82644272385
+1402 1165 -2.945954324697929
+1892 1165  2.290345940174964
+2137 1165  14.59397827582356
+3589 1165 -1.468892856237858e-07
+3597 1165  6.066692409545382e-08
+966 1166 -2.385134481547733
+1164 1166 -4993.647022170953
+1166 1166  66798.37785505856
+1648 1166 -77.15294482380699
+1893 1166  1.86782777672809
+2138 1166  43.20570292286196
+3591 1166 -2.978564379918502e-06
+3597 1166  4.697380394380613e-07
+964 1167 -50.0814285278305
+967 1167 -2.457041662433694
+1164 1167 -16923.00885187064
+1167 1167  153155.7616073629
+1894 1167  1.923107839672399
+2139 1167  36.48944546192245
+3595 1167 -5.493372374765215e-07
+3597 1167  1.510357943157459e-07
+968 1168 -2.338296025296659
+1164 1168 -7374.062406205457
+1168 1168  82072.21679084383
+1650 1168 -59.77277409880886
+1895 1168  1.829379136090265
+2140 1168  38.98112179863244
+3597 1168  3.067324171046604e-07
+3599 1168 -1.477521131554305e-06
+969 1169 -1.994003072051536
+1164 1169 -13719.0325480256
+1169 1169  92641.36295225653
+1651 1169 -44.05096156861522
+1896 1169  1.560276963803856
+2141 1169  32.13935140025399
+3597 1169  1.042574984955191e-07
+3603 1169 -3.187709719520981e-07
+970 1170 -1.69914940163431
+1164 1170 -12329.82642671017
+1170 1170  62672.16218763148
+1652 1170 -34.56956155699346
+1897 1170  1.329845790084345
+2142 1170  25.52356491229814
+3597 1170  7.332883339428165e-08
+3607 1170 -2.124470280806534e-07
+971 1171 -0.665968396826871
+1164 1171 -2229.202872308585
+1171 1171  4044.843974570085
+1653 1171 -13.01473834386188
+1898 1171  0.5209611043831808
+2143 1171  9.251460191495257
+3597 1171  3.852382979979696e-08
+3611 1171 -1.67326591443695e-07
+972 1172  2.375670562103454
+1146 1172 -1575.319815998426
+1172 1172 -2.410186223870166
+1288 1172 -729.7679166195321
+1381 1172  0.2386303085913993
+1410 1172 -0.4153067634742414
+1626 1172  1.708753031112981
+1871 1172 -0.2914011576499591
+1900 1172  0.4148880148577192
+2116 1172 -1.904872805295951
+3493 1172 -0.09396492400282885
+3621 1172  500.8003893351921
+3622 1172 -4.288324972900881e-10
+4089 1172  0.03701780828857204
+973 1173 -0.271490414221333
+974 1173  0.2557390494241053
+975 1173  0.1570011938827146
+976 1173  0.3282970589794034
+977 1173  0.2551706654878987
+1006 1173  2.90118294589533
+1055 1173  2.290266277161505
+1173 1173  107277.9407439756
+1174 1173 -6932.419497690117
+1175 1173 -6007.856534902506
+1176 1173 -19150.24407625645
+1177 1173 -12568.88334869237
+1206 1173  17.2984093564214
+1255 1173  13.31557405400697
+1416 1173 -1.862842055174311
+1656 1173  7.851143468601221
+1657 1173  6.250508524222748
+1658 1173  11.16120855053096
+1659 1173  10.50033560799464
+1661 1173 -10.98109255814183
+1901 1173 -0.1995643723770443
+1902 1173 -0.116240689813081
+1904 1173 -0.2092439937336671
+1905 1173 -2.396216081757496
+2146 1173 -6.360533060844858
+2147 1173 -5.161142881361593
+2148 1173 -8.873223095071516
+2149 1173 -8.38018467970365
+2150 1173 -15.269239506628
+3623 1173  2.078042610764719e-06
+3627 1173  3.157835434829115e-06
+3631 1173  1.106372518294707e-06
+3632 1173  4.923127752132217e-07
+3635 1173  1.350166642877237e-06
+3639 1173 -0.5102615425771657
+3641 1173 -5140.668650277364
+3644 1173  0.7232007277457907
+3646 1173 -5655.325062365802
+974 1174 -1.144600450292391
+1173 1174 -6932.419497690117
+1174 1174  31180.78614898434
+1656 1174 -35.44874624987744
+1901 1174  0.8931472133700507
+2146 1174  28.71959313604403
+3623 1174  9.300491833896807e-06
+3632 1174 -2.21697098447271e-06
+975 1175 -2.274571006209698
+1173 1175 -6007.856534902506
+1175 1175  89136.63429867299
+1657 1175 -91.60308946892607
+1902 1175  1.683936323933614
+2147 1175  75.64317956330889
+3627 1175  4.574734240525569e-05
+3632 1175 -7.175218402766248e-06
+973 1176  0.8078351046585803
+976 1176 -0.9769081143049713
+1173 1176 -19150.24407625645
+1176 1176  57055.57591626922
+1658 1176 -33.51914650193375
+2148 1176  26.64903944246305
+3631 1176  3.292199911797411e-06
+3632 1176 -1.474047813920798e-06
+977 1177 -0.9321754366844185
+1173 1177 -12568.88334869237
+1177 1177  46045.12024090238
+1659 1177 -38.7926439689844
+1904 1177  0.7643499314882201
+2149 1177  30.96188122061356
+3632 1177 -1.809620373771637e-06
+3635 1177  4.932317047678936e-06
+978 1178  0.8130995252895313
+1041 1178 -5.553339525120581
+1178 1178  3620.078004165059
+1198 1178 -663.5344295935886
+1241 1178 -2608.162152478774
+1417 1178 -1.333010028374286
+1418 1178 -0.8125299017211322
+1662 1178 -8.372193934777899
+1907 1178  1.161252280274343
+2152 1178  6.800859133664147
+2153 1178  5.955799061503828
+3652 1178 -1.812938804635067e-08
+3653 1178  0.04340316309886361
+3782 1178 -0.03046147444904562
+979 1179  0.7414429026651962
+1153 1179 -1587.164328722295
+1179 1179  3482.456808643168
+1276 1179 -2359.550833678316
+1384 1179 -0.8197257413953748
+1419 1179  0.6657203254231561
+1420 1179 -0.6535981302102936
+1629 1179 -5.904518861818641
+1664 1179  11.0089408248834
+1665 1179 -4.521169451237201
+1909 1179 -0.620628984403677
+1910 1179  0.654965320821791
+2119 1179  5.785549463415167
+2154 1179 -21.83306996925088
+2155 1179  4.541421673872437
+3516 1179 -1.45358751424407e-08
+3522 1179 -0.01434635432205444
+3663 1179 -0.1494464850449572
+3665 1179 -188.8149620080204
+3906 1179  0.1503132880271155
+980 1180 -0.5886831418066317
+1179 1180 -188.8149620080204
+1180 1180 -3.725196007278359
+1293 1180 -2195.222019859931
+1419 1180 -0.07200114489392796
+1664 1180 -7.044466715389948
+1909 1180  0.06494821711852092
+1911 1180  0.5026533925982484
+2154 1180  13.9640986807489
+2156 1180  3.079189492083107
+3516 1180  0.08044888079890554
+3663 1180 -6.531306342982823e-09
+3665 1180  1204.593080199305
+3979 1180 -0.1724709079791441
+981 1181  1.186629605393843
+1181 1181  2341.243801698421
+1244 1181 -1276.761734036878
+1284 1181 -986.9244544448655
+1356 1181 -1.245236453990957
+1422 1181 -0.7314697767746239
+1423 1181 -0.7155603330739858
+1601 1181 -8.143818476815005
+1667 1181 -4.855239864111756
+1668 1181 -4.616497763711963
+1912 1181  0.7341243737421274
+1913 1181  0.7181252765245181
+2091 1181  7.392567965078647
+2157 1181  5.221813720290219
+2158 1181  4.299471983766863
+3397 1181 -117.6376812741665
+3402 1181 -0.03038370759243254
+3404 1181  6.922867834546764e-11
+3838 1181  0.216604980095333
+4005 1181 -0.1158402426905038
+982 1182 -0.3891594472236065
+1011 1182 -2.422622246591045
+1141 1182 -1279.615417488558
+1182 1182  2.174900388529984
+1211 1182 -1073.023868317834
+1365 1182  0.3890546571390462
+1424 1182 -0.3797072308357856
+1914 1182  0.3406113200706714
+2100 1182 -2.312987185873944
+2159 1182  2.310469301341059
+3440 1182  1.077567965412918e-09
+3441 1182  374.3495123842705
+3446 1182  0.06312548830247269
+3677 1182 -0.05307445544769063
+983 1183 -0.920980246976824
+1018 1183  1.01257666395277
+1137 1183 -1491.12999571951
+1183 1183  6.497392945010104
+1218 1183 -6.434333697729489
+1358 1183  0.8647226323217699
+1425 1183 -1.009313309363917
+1603 1183  6.441034596124477
+2093 1183 -6.900463469725019
+3407 1183  0.01353528248246338
+3412 1183  1.371140667671966e-08
+3680 1183 -0.03487708050989496
+3681 1183 -899.33891041484
+3682 1183  3031.132911223394
+984 1184  0.4457567421222746
+1105 1184 -1483.144735189752
+1184 1184  3.145655382980586
+1199 1184 -822.8244362568955
+1240 1184 -1909.764605957655
+1291 1184 -596.6060378153098
+1310 1184  0.536638352929684
+1426 1184 -0.5140805691755161
+1428 1184 -0.4618434163253965
+1555 1184  3.241025990367159
+1671 1184 -2.845200624001803
+1673 1184 -2.86924462211004
+1800 1184 -0.5324820321255284
+1916 1184  0.4990000847384159
+1917 1184 -0.3979802529569074
+1918 1184  0.4399385753362182
+2045 1184 -3.254430202003916
+2161 1184  2.783228216931563
+2162 1184 -2.797268987536868
+2163 1184  2.785061239788074
+3215 1184 -0.005320952890435077
+3250 1184 -0.01283241605396136
+3687 1184  4.56977637675049e-09
+3689 1184  1432.450877869441
+3767 1184  0.04085646412882799
+3789 1184 -0.009006681541102603
+985 1185 -3.207071041908369
+1047 1185 -0.4755757198066598
+1185 1185  4681.719438489662
+1186 1185 -404.2773802134753
+1247 1185 -28632.51282411447
+1429 1185  0.427839673034417
+1430 1185  0.3935586339260975
+1674 1185  7.594442537507458
+1675 1185  2.737883920038361
+1919 1185 -0.4773773394419411
+2164 1185 -15.25141034328379
+3700 1185 -0.2047571376587071
+3702 1185 -4.681742493684027e-10
+3707 1185  0.04645384761318282
+986 1186  1.385816766949729
+1185 1186 -404.2773802134753
+1186 1186  12867.5447937223
+1248 1186 -3104.47930912567
+1274 1186  8.036162305989681
+1292 1186 -2147.026562228744
+1429 1186 -0.1593269903331485
+1431 1186 -0.9577254771843337
+1433 1186 -1.161465841778596
+1674 1186 -11.15066330216473
+1676 1186 -5.868347071872142
+1678 1186 -7.2681528859035
+1919 1186  0.1799753946873774
+1921 1186  0.9613848499477831
+1922 1186 -1.243512052379119
+1923 1186  1.216256331347602
+2164 1186  22.3842856817472
+2166 1186  6.083446015993633
+2167 1186 -7.386558767037341
+2168 1186  7.502813973266923
+3702 1186 -0.06849071274544034
+3707 1186  4.608089770874457e-09
+3709 1186 -2242.183874822623
+3995 1186 -0.009545633175415319
+4035 1186  0.08253558234079712
+4137 1186 -0.0009017246897470577
+987 1187  8.201087086647963
+988 1187  0.1264070600600939
+989 1187  0.2372873765635557
+990 1187  0.2982302677568088
+993 1187  8.914589671247429
+1086 1187 -1.120321545249203
+1187 1187  15818.95142763862
+1188 1187 -87.52904062559777
+1189 1187 -1337.029454706127
+1190 1187  9.459243822580133
+1193 1187 -5350.662847075037
+1286 1187 -2971.781291812511
+1437 1187 -1.429262548630259
+1438 1187  1.193368232042292
+1679 1187  7.447124334255668
+1682 1187 -8.428942532838935
+1683 1187  10.03672807275466
+1924 1187 -0.1073922996838256
+1925 1187 -0.2078695435133901
+1926 1187 -0.263500479617136
+1927 1187  1.673333652021131
+2169 1187 -5.738141943326152
+2170 1187 -6.163756956372798
+2171 1187 -7.08054579490533
+2173 1187 -8.019890861375147
+3715 1187 -6.398415768466004e-07
+3719 1187 -1.044842086184161e-07
+3721 1187 -4.496077599114301e-08
+3723 1187 -8.621436850542175e-08
+3725 1187 -1274.451280633858
+3730 1187  0.09349931199942807
+3732 1187 -0.1321988592372929
+988 1188 -0.3212481593266459
+1187 1188 -87.52904062559777
+1188 1188  356854.618981225
+1679 1188 -20.05891812971564
+1924 1188  0.2724361971567971
+2169 1188  15.48272469413396
+3715 1188 -1.625983356512428e-06
+3721 1188  1.11954899111727e-07
+987 1189 -39.72433181103246
+989 1189 -1.124307960005156
+1187 1189 -1337.029454706127
+1189 1189  6342.186054306624
+1925 1189  0.9846771888007256
+2170 1189  29.86325998837519
+3719 1189 -4.95059458760494e-07
+3721 1189  2.108572429437849e-07
+990 1190 -0.8870247005076219
+1187 1190 -1274.451280633858
+1190 1190 -28.7033608824471
+1926 1190  0.78356840863058
+2171 1190  21.48971813079931
+3721 1190  1.323586146240174e-07
+3723 1190 -2.564258991810675e-07
+3725 1190  3793.106121617589
+991 1191  6.880724984338275
+992 1191  0.2167729964694829
+1089 1191  1.338906589780056
+1109 1191 -1100.812974624583
+1125 1191 -3056.34146169142
+1191 1191  9286.153223378726
+1192 1191 -969.7382881692646
+1289 1191  8.267433239967934
+1321 1191  1.175900677297633
+1341 1191  0.8434979015723048
+1440 1191 -1.207141725362662
+1566 1191  8.191331821483582
+1586 1191  5.038247575538431
+1685 1191 -7.610279116600212
+1811 1191 -1.21299435510975
+1831 1191 -0.8494202444993569
+1929 1191 -0.1851844777328494
+2056 1191 -8.128733597512404
+2076 1191 -5.17134230247954
+2174 1191 -5.444318005853047
+3251 1191  0.1091861990651673
+3339 1191  0.02942235136820164
+3735 1191 -1.02579299221528e-07
+3737 1191 -1.026919055346909e-08
+3740 1191 -0.4918304040039776
+3742 1191 -568.5385856792361
+991 1192 -16.5456764895831
+992 1192 -0.6168315851058197
+1191 1192 -969.7382881692646
+1192 1192  16896.89136086397
+1929 1192  0.5298067490807584
+2174 1192  13.09683550707402
+3735 1192 -2.396823412520455e-07
+3737 1192  4.564837356724194e-08
+956 1193 -1.408878182521636
+993 1193 -6.559072047159436
+1156 1193 -4247.139494515291
+1187 1193 -5350.662847075037
+1193 1193  7475.545711733503
+1437 1193  0.9009035038811737
+1635 1193 -7.997310816234859
+1682 1193  6.200089823344896
+1880 1193  1.31040985134737
+1927 1193 -1.053604700739367
+2125 1193  6.922985628648402
+3539 1193  0.08504754845844564
+3721 1193 -0.1969994537259558
+3730 1193 -6.511629457861545e-09
+994 1194  4.615763356430092
+1194 1194  5246.598377730301
+1220 1194 -2332.050437479933
+1241 1194 -2784.420804028015
+1286 1194 -1300.878810624146
+1441 1194  0.7978308621718716
+1442 1194 -0.9429089218588855
+1443 1194 -1.048401908253753
+1686 1194  5.753350078361281
+1687 1194 -5.062453033294843
+1688 1194 -6.460239724417905
+1931 1194 -0.7965184326656924
+1932 1194  1.007009323828432
+1933 1194  1.008926901289915
+2176 1194 -5.243221560545977
+2178 1194  6.417487426740468
+3653 1194 -0.03089942858856187
+3732 1194 -0.644560541293882
+3750 1194 -2.743272645155947e-08
+3902 1194  0.7790434228378447
+995 1195  2.46769213011002
+1098 1195 -0.4226945468326312
+1158 1195 -2011.875143802051
+1195 1195 -2.511814056672408
+1298 1195 -2377.009340752339
+1394 1195  0.4429005215649865
+1639 1195  2.841907482790103
+1884 1195 -0.4406738447969539
+1934 1195  0.421458967625683
+2129 1195 -2.724430091994604
+3560 1195  0.02493853014115072
+3755 1195 -0.0263789648850663
+3757 1195  961.1959122907291
+3758 1195  1.223172137188033e-08
+996 1196 -0.4851424038589532
+997 1196  7.933710171395212
+1196 1196 -3.452105402557807
+1197 1196 -356.6362302653943
+1221 1196 -2351.032529766809
+1445 1196  0.3339756423139817
+1935 1196 -0.3333397745722857
+1936 1196  0.4058510497943707
+2180 1196 -12.65627869255672
+2181 1196  3.087553492426145
+3761 1196 -0.1274747229578279
+3763 1196  9.116542722120702e-09
+3765 1196  1277.047285432646
+3929 1196  0.1829371321651943
+997 1197 -14.35390862680364
+1040 1197 -1.07013962521279
+1197 1197  2963.617500136095
+1240 1197 -2894.512024611905
+1445 1197 -0.2014575555660574
+1692 1197 -6.741118804159942
+1935 1197  0.2031705763180116
+1937 1197  1.056592885189063
+2180 1197  22.89375892241828
+2182 1197  6.152334929788743
+3761 1197  2.309806734768216e-08
+3763 1197  0.230729451368615
+3765 1197 -356.6362302653943
+3767 1197 -0.1442546171938546
+998 1198  4.666850163800948
+1139 1198 -2367.012469682032
+1178 1198 -663.5344295935886
+1198 1198  3259.098552556408
+1272 1198 -1354.184908615927
+1277 1198 -2452.381670676807
+1360 1198  0.8076695160259592
+1417 1198  0.4276243851783329
+1448 1198 -0.439551175407246
+1449 1198 -0.5013982344400243
+1450 1198 -0.651822591782886
+1605 1198  4.667678323593734
+1662 1198  3.153670952857216
+1693 1198 -2.839026331445033
+1694 1198 -3.303170792875972
+1695 1198 -4.18167192374513
+1850 1198 -0.7500806792626978
+1907 1198 -0.3728502600956505
+1938 1198  0.4774393407531063
+1939 1198  0.5451054580886334
+1940 1198  0.7429555445394144
+2095 1198 -4.443468566171155
+2152 1198 -2.562369962751278
+2183 1198  3.287177902478644
+2184 1198  3.830799029309135
+3424 1198  0.02676690359827763
+3579 1198 -181.5901337830495
+3580 1198  0.1639576572033569
+3652 1198  0.02020512578173389
+3782 1198 -1.121049961194664e-09
+4085 1198 -0.02201288531576436
+4105 1198 -0.1461852922562984
+999 1199 -4.371489003866787
+1008 1199  1.727739253821067
+1105 1199 -3998.718775286863
+1199 1199  16023.1456721761
+1208 1199  11.30404407094388
+1215 1199 -2769.253537633058
+1311 1199  1.20832718522049
+1426 1199  1.368477623792071
+1452 1199 -0.8399267021510434
+1453 1199 -1.468192548459289
+1556 1199  6.923454910311763
+1671 1199  7.348777917610443
+1698 1199 -9.106914833269292
+1801 1199 -1.164701975884189
+1916 1199 -1.328312311377791
+1941 1199 -1.501409605633508
+1942 1199  0.8359873259695152
+1943 1199  1.512757808386402
+2046 1199 -6.791708696781908
+2161 1199 -7.188787771767452
+2186 1199 -9.700381322229893
+2187 1199  5.178316230070377
+2188 1199  9.45399632332005
+3215 1199  0.01084140050204591
+3687 1199  0.0299746246289474
+3689 1199 -822.8244362568955
+3783 1199 -0.01293493383306837
+3785 1199 -1667.451919587196
+3789 1199  8.747625068750153e-09
+3880 1199 -0.04200667912920009
+3984 1199 -834.1124016703941
+3988 1199 -0.008049526074928537
+943 1200 -0.6720863108170413
+953 1200 -5.732696863714277
+1000 1200  5.539440141695474
+1002 1200 -0.2146298528456314
+1003 1200 -0.2466962960354827
+1143 1200 -2502.882899625627
+1153 1200 -4741.586008200146
+1200 1200  7000.072619730484
+1201 1200 -336.4229823469557
+1202 1200  4.262660366847292
+1203 1200  3.952836893778014
+1226 1200 -1452.75486119253
+1385 1200  0.9377921426824671
+1454 1200  0.3629961991311105
+1457 1200 -1.021937167464553
+1615 1200 -4.55768979835456
+1699 1200  6.519516423137556
+1702 1200 -5.814375262302929
+1860 1200  0.7107553380565065
+1875 1200 -0.9444048128648267
+1944 1200 -0.3459711048373486
+1945 1200  0.1240942996274762
+1946 1200  0.142484329197361
+1947 1200  1.047546391471734
+2105 1200  5.176873191896383
+2189 1200 -8.416866787483741
+2190 1200 -3.267971087874407
+2191 1200 -3.485047843454746
+2192 1200  5.190494080029731
+3459 1200  0.01754934032540434
+3521 1200 -9.116374766193758e-09
+3522 1200 -0.008383946400232767
+3799 1200 -6.50146078085001e-08
+3801 1200 -371.6922193028342
+3803 1200 -4.92795877993224e-08
+3805 1200 -437.3868882747202
+3812 1200 -0.03588453104645329
+3944 1200  0.01932311150148924
+954 1201  1.404267791315138
+959 1201  9.07977748727345
+1001 1201  1.799225606089988
+1154 1201  8.573349645239935
+1159 1201 -1986.152622069543
+1200 1201 -336.4229823469557
+1201 1201  13014.15053606697
+1396 1201  1.434561395231843
+1454 1201 -0.407193771543868
+1458 1201 -1.590343166199202
+1699 1201 -14.76253650232996
+1703 1201 -8.889282523176131
+1878 1201 -1.26049341189051
+1886 1201 -1.281336173089582
+1944 1201  0.3887267162890052
+2123 1201 -7.620517473966892
+2131 1201 -7.942016865501306
+2189 1201  19.05461479822566
+2193 1201  8.984707214224725
+3521 1201  0.06631071370625427
+3531 1201  0.4246988966920202
+3533 1201 -2778.601465176169
+3565 1201 -0.1571053939386667
+3812 1201 -2.100605200894279e-08
+3817 1201 -0.2362664140746497
+3818 1201 -943.0650083467848
+1002 1202 -3.056071350822656
+1200 1202 -371.6922193028342
+1202 1202 -21.38658464065627
+1945 1202  1.769816809290096
+2190 1202  16.31440493686975
+3521 1202  8.321106473307971e-08
+3799 1202 -3.261903830376056e-07
+3801 1202  1865.329310426942
+1003 1203 -2.791333479427456
+1200 1203 -437.3868882747202
+1203 1203 -21.33286151008621
+1946 1203  1.61398408077391
+2191 1203  18.72857532796202
+3521 1203  8.643129645324876e-08
+3803 1203 -2.659544074967712e-07
+3805 1203  2361.051277833175
+1001 1204 -0.4470126234812586
+1004 1204  2.682500907926349
+1201 1204 -943.0650083467848
+1204 1204 -3.302192501193137
+1458 1204  0.3953856088196608
+1459 1204  0.6834769259705972
+1703 1204  2.728687156136997
+1949 1204 -0.6337624789359847
+2193 1204 -2.759212294612531
+3812 1204  0.05848026446275859
+3817 1204 -5.676911468688317e-09
+3818 1204  623.6376208683334
+3947 1204 -0.1008233678779023
+3950 1204 -410.1720988650556
+1005 1205  6.238244344298185
+1027 1205  0.8899170771677279
+1205 1205  6033.106403994393
+1211 1205 -2861.256039528676
+1227 1205 -3877.196870864005
+1290 1205 -2885.83782801923
+1460 1205 -0.8432794677340965
+1461 1205 -0.8258269842122644
+1462 1205  1.219896523532217
+1705 1205 -5.634150174253534
+1706 1205 -5.4483738044597
+1707 1205  7.196077445191182
+1950 1205  0.8122467217164322
+1952 1205 -1.223176188342998
+2195 1205  5.676480078927482
+2197 1205 -7.309708780356933
+3677 1205  0.05204211481294618
+3824 1205 -0.254470154000132
+3826 1205  1.270442140377526e-09
+3869 1205  0.1321958859920611
+1006 1206 -0.2630108885425641
+1173 1206 -5140.668650277364
+1206 1206 -1.681171094173677
+1296 1206 -1447.354453913685
+1463 1206 -0.3186270474808822
+1708 1206 -2.097538363083144
+1905 1206  0.2173437473146551
+1953 1206  0.3163594839300718
+2150 1206  1.48390937397938
+2198 1206  2.063427325485965
+3632 1206  0.1608647615085388
+3639 1206  1.715180802247529e-08
+3641 1206  725.2050124758196
+3896 1206 -0.04271695783234537
+1007 1207  0.8254367475625018
+1084 1207 -3.064005787467996
+1115 1207 -138.288761778333
+1207 1207  4.679067459779453
+1284 1207 -713.7675168419642
+1331 1207 -0.9824708043480003
+1464 1207  0.4854666781569684
+1576 1207 -5.749376920340797
+1709 1207  3.061789935852789
+1954 1207 -0.5209336705035591
+3295 1207  0.00435760107926482
+3304 1207  5.579247508435969e-09
+3306 1207  726.5627170365319
+3838 1207 -0.01534246547224594
+1008 1208 -0.5174648007567855
+1199 1208 -1667.451919587196
+1208 1208 -3.396560628333188
+1291 1208 -887.2704343124278
+1465 1208 -0.4753802942885685
+1710 1208 -3.032021718341749
+1941 1208  0.4496774480466198
+1955 1208  0.404989697114798
+2186 1208  2.914703332052592
+2200 1208  2.585875819257028
+3250 1208 -0.004170643736380167
+3783 1208  2.945946889210127e-09
+3785 1208  785.6540698522239
+3789 1208  0.003480874327946859
+1009 1209 -0.7585928866866933
+1073 1209  1.088994441422246
+1209 1209  4661.087789736292
+1261 1209 -7744.746583549255
+1466 1209  0.5953885346820643
+1467 1209 -1.089204680743506
+1711 1209  4.119179357221501
+1712 1209 -7.735889765633795
+2201 1209 -5.027617955580298
+2202 1209  7.784946548619343
+3844 1209  1.582770026153923e-10
+3848 1209 -0.03549214591889813
+4065 1209  0.05851589523119582
+4110 1209 -661.5687518245369
+1010 1210  8.196309806968394
+1012 1210  0.7431772970946299
+1013 1210  0.1504825244382092
+1153 1210 -2001.015904640132
+1210 1210  9523.030298394489
+1211 1210 -628.9024645633449
+1212 1210  2.858778724621237
+1213 1210  7.743227386312433
+1226 1210 -4270.428353139534
+1386 1210  0.8083482178595198
+1468 1210  0.8561130448515417
+1471 1210  1.079868425719443
+1631 1210  6.106888895148331
+1713 1210  5.980557287714181
+1876 1210 -0.8256124134252635
+1958 1210 -0.8532795399901716
+1959 1210 -0.4962120658716677
+1960 1210 -0.09392255060612208
+1961 1210 -1.078106438997537
+2121 1210 -5.665416980231268
+2203 1210 -18.68721764116278
+2204 1210 -2.748627634170752
+2205 1210 -5.577951998041309
+2206 1210 -8.119865478221724
+3522 1210 -0.03090332412050151
+3677 1210  0.08531786875140286
+3855 1210 -6.996689916327625e-08
+3857 1210 -249.6415633539116
+3859 1210 -4.162537098340024e-08
+3861 1210 -639.0767490283577
+3865 1210 -1.220871292950676e-08
+3944 1210 -0.03439042023821304
+1011 1211  13.0725484602826
+1090 1211 -10.91898889290387
+1205 1211 -2861.256039528676
+1210 1211 -628.9024645633449
+1211 1211  20711.06510196762
+1290 1211 -4191.670580733649
+1424 1211  2.391996372285566
+1460 1211  1.889672965421464
+1468 1211  0.1281477546658397
+1472 1211 -1.855130366892074
+1705 1211  11.31995792783744
+1713 1211 -11.87347468739131
+1914 1211 -2.145916049270679
+1950 1211 -1.818638852608572
+1958 1211 -0.1109241627550626
+1962 1211  1.922720458247535
+2159 1211 -12.47045602748613
+2195 1211 -11.4061945499796
+2203 1211  36.98601183293343
+2207 1211  10.6937103252593
+3440 1211  0.332620942724838
+3441 1211 -1073.023868317834
+3677 1211 -3.814564220050443e-10
+3826 1211 -0.07579910748829327
+3865 1211 -0.2105201766865411
+3869 1211 -0.008999365508307933
+1012 1212 -1.366971763051765
+1210 1212 -249.6415633539116
+1212 1212 -4.993718753234045
+1959 1212  0.9127356585543187
+2204 1212  4.801186593602593
+3855 1212 -1.222183139693023e-07
+3857 1212  436.1558917911616
+3865 1212  2.310851920234169e-08
+1013 1213 -1.461494307484497
+1210 1213 -639.0767490283577
+1213 1213 -26.724645220298
+1960 1213  0.9122971104867462
+2205 1213  19.24788508110388
+3859 1213 -1.436638097374043e-07
+3861 1213  2205.993243270096
+3865 1213  5.20346455368248e-08
+1014 1214 -0.6474678388648655
+1214 1214 -4.691610853794245
+1215 1214 -276.8639202152118
+1231 1214 -1080.162224615282
+1473 1214  0.5034153766096773
+1474 1214  0.5409851231816658
+1718 1214  7.330338437253632
+1719 1214  4.154393438575824
+1963 1214 -0.5285023005439647
+2208 1214 -38.38649753335458
+3876 1214  1.003109723063389e-08
+3878 1214  1644.664912023497
+3880 1214 -0.004849260550083945
+3961 1214  0.006768474096792987
+999 1215  6.510195367269129
+1015 1215  0.7627672411821809
+1199 1215 -2769.253537633058
+1215 1215  7875.169207149417
+1230 1215 -3532.696627823262
+1260 1215 -2345.108354895173
+1452 1215  1.459995747599291
+1473 1215 -0.01820502103528791
+1475 1215 -0.6929215427424715
+1476 1215 -0.7557720101028491
+1718 1215 -13.39915434300085
+1720 1215 -5.674417187443032
+1721 1215 -4.902109438887917
+1942 1215 -1.452748484428911
+1963 1215  0.02666245738760768
+1966 1215  0.8446553956316701
+2187 1215 -7.713673913612801
+2208 1215  70.14746870615768
+2210 1215  6.342712909556337
+2211 1215  5.982335823908858
+3443 1215 -0.2961764240780974
+3789 1215  0.02977237990573015
+3876 1215  0.01263569687722853
+3878 1215 -276.8639202152118
+3880 1215  2.119705658221172e-08
+3993 1215  0.2282308445688311
+1016 1216  4.931373980172026
+1096 1216  0.7578565321914594
+1216 1216  8524.636106107781
+1229 1216 -4881.530531556509
+1232 1216 -1387.552912371535
+1296 1216 -7591.989817871691
+1477 1216  1.27864903424727
+1478 1216 -0.8926090143541199
+1479 1216 -0.7140138395673383
+1722 1216  7.097758064026325
+1723 1216 -6.326257993395976
+1724 1216 -4.667666885335501
+1967 1216 -1.203527161495361
+1968 1216  0.8343920480599086
+2212 1216 -7.071254972430305
+2213 1216  6.701573121465414
+3241 1216 -0.264491588907438
+3562 1216  0.3720024857585152
+3896 1216  0.09577340452076888
+3898 1216  2.258379669384247e-08
+899 1217 -0.5351332038396536
+1017 1217 -2.922493795579336
+1099 1217 -324.4426975442814
+1157 1217 -739.1349825417736
+1217 1217  3.063488442035085
+1300 1217  0.4831540182231924
+1392 1217  0.5146427113986781
+1545 1217  3.222143654814309
+1882 1217 -0.5135793765110132
+2035 1217 -3.519407860941207
+3180 1217 -0.02024470978393711
+3549 1217  637.8787982437333
+3550 1217  1.572755131684644e-08
+3554 1217  0.03597745704809928
+1018 1218 -0.5470217549008489
+1115 1218 -133.9669120329914
+1183 1218 -3.409620496099906
+1218 1218  3.376506641724802
+1332 1218  0.7382929550608727
+1425 1218  0.5452258426352274
+1577 1218  4.971628818787869
+1822 1218 -0.8747461134231366
+2067 1218 -5.79221202090099
+3295 1218 -0.00428379170185561
+3412 1218  0.01520725110591731
+3680 1218  8.149366514942358e-09
+3681 1218  793.7174921168661
+3682 1218 -899.33891041484
+1019 1219  6.850788060807428
+1020 1219 -12.39953745174649
+1076 1219  3.543387736643543
+1219 1219  9565.904076745277
+1220 1219 -192.4625250703121
+1276 1219 -1597.562514947314
+1285 1219 -8948.234516972396
+1324 1219  1.186931963583773
+1480 1219  0.3970145449540784
+1481 1219 -0.9053256046972465
+1482 1219  0.9177680669862572
+1569 1219  4.538624497527303
+1725 1219  7.895064096101138
+1726 1219 -4.875642107836658
+1814 1219 -1.163100359430245
+1970 1219 -0.3832969439187849
+1971 1219  0.9552717849491632
+1972 1219 -0.9166426538946926
+2059 1219 -5.879825080146367
+2217 1219 -6.091736844072287
+3280 1219 -0.2400553180149196
+3282 1219 -737.3345443487744
+3902 1219  0.1511025029822383
+3906 1219 -0.04422968677103412
+3909 1219 -2.543621846484712e-08
+4145 1219  0.1034698694358651
+1020 1220  24.06894650972545
+1144 1220 -3014.351946629534
+1194 1220 -2332.050437479933
+1219 1220 -192.4625250703121
+1220 1220  7047.656681697156
+1373 1220 -1.19384433734122
+1441 1220 -1.258218614571573
+1480 1220  0.2510335818284439
+1618 1220 -7.176916273976581
+1686 1220 -7.448069304163958
+1725 1220 -15.34203421950439
+1863 1220  1.19038235472501
+1931 1220  1.257430980505297
+1970 1220 -0.2383819755548176
+2108 1220  6.94588192299627
+2176 1220  6.785039108229999
+3467 1220  1.100657149099352
+3750 1220 -0.8499437985630234
+3902 1220 -5.254733564247216e-08
+3909 1220 -0.2054867667943506
+996 1221  1.24868825794066
+1021 1221 -7.485018312352968
+1022 1221 -1.125363487049104
+1023 1221  0.06266636804055448
+1024 1221 -0.01329158934947286
+1025 1221  0.1952707801632194
+1196 1221  7.570117512134129
+1221 1221  46684.4952085363
+1222 1221 -1266.678869343772
+1223 1221 -2161.61702284971
+1224 1221 -1633.067169282511
+1225 1221  11.51290363653453
+1231 1221 -5850.690085631221
+1254 1221 -22128.00563827363
+1483 1221  1.3927177800326
+1487 1221 -1.212084092212188
+1488 1221  1.095375364582689
+1728 1221  12.16385913519791
+1729 1221  7.419701498530326
+1730 1221  18.43213599777828
+1733 1221  7.911773723924099
+1936 1221 -1.04285678342914
+1974 1221 -0.04937898970914998
+1975 1221  0.01432224943308115
+1976 1221 -0.1578981940773998
+1977 1221  1.209186270330734
+1978 1221 -0.9823491292559811
+2181 1221 -6.772078091746644
+2218 1221 -71.95507332617049
+2219 1221 -2.767625773617935
+2220 1221 -3.444370960504515
+2221 1221 -6.724196771960667
+2222 1221  7.574571864439869
+2223 1221 -7.11281355093847
+3763 1221 -0.2752007773679826
+3765 1221 -2351.032529766809
+3912 1221  0.9741805755638241
+3915 1221 -1.302918114509666e-07
+3919 1221 -9.906925613201167e-08
+3923 1221  2.090432824064514e-08
+3925 1221 -1461.603224482606
+3929 1221  1.693937379254606e-08
+3961 1221 -0.7334598799220151
+3966 1221 -0.2261997465922666
+1022 1222  0.01423058830428089
+1099 1222 -2473.241834471261
+1107 1222 -1364.595019358008
+1127 1222 -2406.24952449994
+1221 1222 -1266.678869343772
+1222 1222  14824.19990916848
+1291 1222 -1973.389699961115
+1301 1222 -1.34894428731261
+1314 1222  1.364159900297422
+1344 1222 -1.317805230846083
+1483 1222 -0.007859345312601223
+1489 1222 -1.271418220001935
+1546 1222 -8.08332465039811
+1559 1222  7.688776076853612
+1589 1222 -8.373828345005908
+1728 1222 -10.85847562888647
+1734 1222 -8.059296167830137
+1791 1222  1.379371250818873
+1804 1222 -1.382288592585748
+1834 1222  1.420202184002578
+1979 1222  1.227066802011116
+2036 1222  7.890854922569274
+2049 1222 -8.526509371438566
+2079 1222  8.486264343097229
+2218 1222  64.19748679899583
+2224 1222  7.176638732185156
+3180 1222  0.04377716463426287
+3231 1222  0.1093957121598284
+3250 1222  0.1579025131440057
+3351 1222  0.1769630657333927
+3912 1222  1.79774599651239e-08
+3929 1222 -1.614266736074707
+1023 1223 -1.67562254077075
+1221 1223 -2161.61702284971
+1223 1223  9121.153135578967
+1729 1223 -31.5516818208411
+1974 1223  1.400506034542053
+2219 1223  11.74797745436025
+3915 1223 -5.441864154853704e-07
+3929 1223  6.278976277807047e-08
+1024 1224 -1.619002941030767
+1221 1224 -1633.067169282511
+1224 1224  562513.1605345258
+1730 1224 -71.14768126953655
+1975 1224  1.240374621996592
+2220 1224  13.28414805225883
+3919 1224 -3.728986932571843e-07
+3929 1224  2.808174576207634e-08
+1025 1225 -0.4989904160230794
+1221 1225 -1461.603224482606
+1225 1225 -13.59929356823245
+1976 1225  0.4114850212014874
+2221 1225  7.942511812214237
+3923 1225  2.415609212658154e-08
+3925 1225  1714.853712513708
+3929 1225 -5.971258681958247e-09
+1010 1226 -5.174118981866978
+1026 1226  0.755030805637508
+1153 1226 -1538.578415888683
+1200 1226 -1452.75486119253
+1210 1226 -4270.428353139534
+1226 1226  5458.23897163396
+1227 1226 -302.1992729528647
+1231 1226 -340.8512513609311
+1387 1226  0.6235636743364441
+1457 1226  0.6755139749733998
+1471 1226 -0.6825259342509997
+1490 1226  0.4270339140806115
+1491 1226 -0.7846384665529746
+1632 1226  4.601918715157254
+1702 1226  5.152102189575969
+1735 1226  5.935483300800602
+1736 1226 -4.492594197832112
+1877 1226 -0.638771030322958
+1947 1226 -0.6914780544113759
+1961 1226  0.6814150005342681
+1980 1226 -0.3710313110194243
+2122 1226 -4.301673841163688
+2192 1226 -4.603043930537255
+2206 1226  5.125870371838962
+2225 1226 -7.803072635065493
+2226 1226  3.075207046085619
+3521 1226 -0.0187976386654834
+3522 1226 -0.01685058071239737
+3824 1226  0.1585995473755764
+3865 1226  0.03354622133540253
+3944 1226 -4.982309767878612e-09
+3961 1226 -0.2479349622596107
+1005 1227 -13.01237515753422
+1027 1227 -2.12995668613235
+1205 1227 -3877.196870864005
+1226 1227 -302.1992729528647
+1227 1227  9053.637102694121
+1461 1227  1.978322116312245
+1490 1227  0.5603572465690376
+1706 1227  11.36265377461624
+1735 1227 -19.35438219191495
+1980 1227 -0.4748156898844662
+2225 1227  25.37588114465589
+3824 1227 -1.642452673866757e-09
+3826 1227  0.3216355049551655
+3944 1227 -0.3481508996189024
+1004 1228 -3.296532554515937
+1028 1228 -0.4953844799756825
+1204 1228  4.056786931974255
+1228 1228  2.120683161051391
+1242 1228 -640.7103668234021
+1459 1228 -0.7053415505671318
+1737 1228 -2.566147949050465
+1949 1228  0.6542644047347482
+1982 1228  0.4897034551735098
+3817 1228  0.0892421864327288
+3818 1228 -410.1720988650556
+3947 1228 -6.325417856167803e-09
+3950 1228  653.794427143524
+3992 1228 -0.1053486690832382
+1029 1229 -12.23234957660924
+1107 1229 -4056.761237014
+1216 1229 -4881.530531556509
+1229 1229  16921.24025598121
+1315 1229 -2.191374628381395
+1477 1229 -1.428392337327512
+1722 1229 -8.947761787238363
+1805 1229  1.902211567258754
+1967 1229  1.345690298242458
+2050 1229  10.65862220841107
+2212 1229  8.913001519049024
+3231 1229 -0.2266174204579067
+3241 1229  2.632058404561377e-08
+3898 1229  0.4471220830932749
+941 1230 -10.17957716286965
+1015 1230 -1.537590286100393
+1030 1230  1.503112217723057
+1141 1230 -7111.276237605085
+1215 1230 -3532.696627823262
+1230 1230  15135.89972458079
+1475 1230  1.396683757074456
+1611 1230  11.06068010791183
+1720 1230  11.28827516445929
+1856 1230 -1.491993548294496
+2210 1230 -12.61596887929604
+3443 1230  3.749403898467563e-08
+3446 1230 -0.5157188483780635
+3880 1230  0.6677127442223171
+1014 1231  0.5502970449270767
+1021 1231  2.659744674264035
+1026 1231 -0.3228669408970674
+1031 1231  4.547623166879417
+1033 1231  0.004909842124062877
+1034 1231  0.02074944381764173
+1054 1231 -2.787316614264462
+1214 1231  3.431330450031093
+1221 1231 -5850.690085631221
+1226 1231 -340.8512513609311
+1231 1231  5066.347088879259
+1232 1231 -341.4175255677415
+1233 1231 -1185.076996498896
+1234 1231 -1464.937248466328
+1254 1231 -4488.479032872755
+1474 1231 -0.4587975488680787
+1487 1231  0.4518610492803866
+1491 1231  0.3255367652745427
+1493 1231  0.2106155001707166
+1496 1231  0.532889016796235
+1719 1231 -3.038828905876916
+1736 1231  4.347828500049935
+1738 1231  7.458763893376871
+1739 1231  8.097430154499932
+1741 1231  3.043863568647225
+1977 1231 -0.4507761853932629
+1983 1231 -0.2015349441592879
+1984 1231 -0.003161834423974073
+1985 1231 -0.0164243186421904
+1986 1231 -0.4748422610025065
+2222 1231 -2.691626173647931
+2226 1231 -2.991719644070264
+2228 1231 -10.96688731001504
+2229 1231 -1.811688403660364
+2230 1231 -2.052721196214168
+3562 1231 -0.5154238049189607
+3876 1231 -0.004918095735968131
+3878 1231 -1080.162224615282
+3929 1231  0.31496286658781
+3944 1231  0.0847976154813143
+3955 1231  6.151957693267818e-09
+3959 1231  1.048933193081858e-08
+3961 1231  4.101254072463867e-09
+3966 1231  0.06060825833018181
+958 1232  0.6340347863620098
+1032 1232  3.863844809715488
+1158 1232 -3393.363501279813
+1216 1232 -1387.552912371535
+1231 1232 -341.4175255677415
+1232 1232  4718.204315834614
+1284 1232 -876.0754543919668
+1395 1232 -0.5518231235725737
+1478 1232  0.8109099469826276
+1493 1232 -0.1400665666639198
+1497 1232 -0.7796711087896829
+1498 1232 -0.6622307311134192
+1640 1232 -3.512065591551504
+1723 1232  4.304302054670996
+1738 1232 -13.70528369826246
+1742 1232 -4.689757573113847
+1743 1232 -4.573004681716164
+1968 1232 -0.7551476418861992
+1983 1232  0.1353078380308161
+1987 1232  0.8977654818327304
+1988 1232  0.7022784082231178
+2213 1232 -4.563164317669306
+2228 1232  20.15001583937854
+2232 1232  4.893403592122139
+2233 1232  5.053130885639514
+3360 1232 -0.375458076737654
+3362 1232 -822.3564996879056
+3560 1232 -0.03577015867815656
+3562 1232  1.011402150918883e-08
+3838 1232 -0.2753545287150571
+3898 1232 -0.1931972031217511
+3961 1232  1.10561328577018
+1033 1233 -2.020187774905765
+1231 1233 -1185.076996498896
+1233 1233  35387322.32806417
+1739 1233 -157.5425247202729
+1984 1233  1.781009244152953
+2229 1233  35.24928960928063
+3955 1233  1.167101824073313e-07
+3961 1233 -9.484864810227478e-09
+1031 1234 -73.86690653394791
+1034 1234 -1.371053578210635
+1231 1234 -1464.937248466328
+1234 1234  22763244.29675386
+1985 1234  1.141090378496645
+2230 1234  33.34660355498111
+3959 1234  1.662011153147347e-07
+3961 1234 -1.84660262358971e-08
+901 1235  4.025719241708225
+1035 1235 -0.5734959694043438
+1101 1235 -652.9015261762572
+1132 1235 -1557.206156923414
+1235 1235 -3.446229419844843
+1305 1235  0.7074154071969436
+1352 1235  0.5396384156967375
+1597 1235  3.329588287063118
+1795 1235 -0.757770830580615
+2040 1235 -4.406458593664035
+3201 1235  0.00410810176178595
+3385 1235 -0.003579148430443102
+3388 1235  4.701830855213807e-09
+3390 1235  1194.791042810274
+1036 1236 -9.15833764800569
+1093 1236 -1.475543576621253
+1236 1236  13881.85836852269
+1237 1236 -1464.550328926824
+1293 1236 -4853.806147764349
+1499 1236 -1.707969879333584
+1744 1236 -9.799664782253885
+1989 1236  1.704175130397323
+1990 1236  1.26182715384045
+2234 1236  7.820271137733112
+2235 1236  8.549665708518704
+3552 1236 -0.2219028422169306
+3979 1236  0.3284627932841515
+3981 1236 -6.866298751795341e-09
+957 1237 -6.158831607607141
+1037 1237 -1.099720672228146
+1157 1237 -1876.369922915853
+1236 1237 -1464.550328926824
+1237 1237  2905.92342536023
+1393 1237  1.098284058681807
+1499 1237  0.4947465498372808
+1638 1237  5.821493748851517
+1744 1237  4.604525758409499
+1989 1237 -0.4892572997374934
+2234 1237 -3.682710629785925
+3552 1237  2.017315570146394e-08
+3554 1237 -0.07867953448799736
+3981 1237  0.1062797331368865
+1038 1238 -0.8228886130706252
+1127 1238 -939.8659979062598
+1232 1238 -822.3564996879056
+1238 1238 -5.102978103686564
+1345 1238  0.7729194107635493
+1497 1238  0.4765768244277365
+1590 1238  4.532886615050296
+1742 1238  3.549100696841748
+1987 1238 -0.5475735569802812
+2232 1238 -3.704885884667695
+3351 1238 -0.09805225877594476
+3360 1238  1.549589877247737e-08
+3362 1238  1425.25603684663
+3562 1238  0.2460567017585993
+1039 1239  0.3444837790072527
+1107 1239 -389.1035774035147
+1199 1239 -834.1124016703941
+1239 1239  0.2870186143926446
+1246 1239 -392.349659792455
+1291 1239 -847.0044738143413
+1316 1239  0.329511624503851
+1453 1239  0.3506092217084191
+1501 1239 -0.2561778390712127
+1502 1239 -0.3620744060739602
+1561 1239  2.346704499747039
+1698 1239  2.196719064593049
+1746 1239 -1.513498503028144
+1747 1239 -2.259168544657926
+1806 1239 -0.3387833259526468
+1943 1239 -0.3612542032138767
+2051 1239 -2.422035549841484
+2188 1239 -2.280441915279873
+2236 1239  1.606821457458256
+2237 1239  2.163038539238373
+3230 1239  0.002037455724496269
+3231 1239  0.001389181240069544
+3250 1239 -0.004453089740284022
+3789 1239  0.001400699084717696
+3984 1239  604.1375921740112
+3988 1239  2.965038659041852e-09
+984 1240 -1.964051061571361
+1040 1240  1.100245831300216
+1184 1240 -13.90759343198216
+1197 1240 -2894.512024611905
+1240 1240  12057.99234253917
+1692 1240  8.345685815764492
+1917 1240  1.753538660217751
+1937 1240 -1.083749439601529
+2162 1240  12.36730783343644
+2182 1240 -7.619017266862353
+3687 1240 -0.1517471607395
+3689 1240 -1909.764605957655
+3761 1240  0.2907198369401517
+3767 1240  2.41306286619114e-08
+978 1241 -1.359305061838616
+994 1241 -9.100375712236817
+1041 1241  7.799677070497177
+1178 1241 -2608.162152478774
+1194 1241 -2784.420804028015
+1241 1241  9211.471231555435
+1418 1241  1.357408579856344
+1442 1241  1.314196419979139
+1687 1241  9.967472353837746
+1932 1241 -1.397856747533507
+2153 1241 -8.367326703808663
+3652 1241 -0.07632676179310438
+3653 1241 -3.596899800228925e-08
+3750 1241  0.04822162698070831
+1028 1242  1.253920472558426
+1042 1242 -1.629221333651718
+1060 1242  9.698372940762637
+1228 1242 -9.126926589105361
+1242 1242  11663.62345738262
+1260 1242 -8130.716010688008
+1503 1242  1.629228927232157
+1737 1242  11.01433164719148
+1982 1242 -1.238917691943269
+2238 1242 -10.14050217838622
+3947 1242  0.5302216625716982
+3950 1242 -640.7103668234021
+3992 1242  1.273314576000217e-08
+3993 1242 -0.3938694762289616
+1043 1243  4.240704676139979
+1074 1243 -1.085914715783544
+1083 1243 -4.137810875831871
+1243 1243  3542.477354630666
+1283 1243 -4835.233940860073
+1505 1243  0.6281268734336682
+1749 1243 -7.064788357696017
+1994 1243  1.090787344712805
+1995 1243 -0.6265552018341662
+2239 1243  7.264719577018943
+3709 1243 -895.8117983584929
+3995 1243  0.00970013728400089
+4001 1243  1.224810661226705e-08
+4002 1243 -0.01362937325158806
+1044 1244  4.840410468840943
+1045 1244 -0.2687441652242472
+1181 1244 -1276.761734036878
+1244 1244  2361.150198652828
+1245 1244 -430.7146894812096
+1275 1244 -869.3960245438557
+1422 1244  1.049384777518011
+1507 1244 -0.8397429008188915
+1667 1244  5.781563336973328
+1752 1244 -5.094647168144733
+1912 1244 -1.053710576606368
+1996 1244  0.1719397258262731
+1997 1244  0.8301209943719979
+2157 1244 -6.220262112848772
+2241 1244 -4.198187441675281
+2242 1244  4.665509938312524
+3374 1244 -0.1463500416425706
+3404 1244  0.1277397341432216
+4003 1244 -4.7148643572692e-08
+4005 1244 -3.695717287754974e-09
+1044 1245 -17.55536468313575
+1045 1245 -2.64805073698945
+1244 1245 -430.7146894812096
+1245 1245  1562.296878246622
+1996 1245  1.697227123155527
+2241 1245  15.14037838846916
+4003 1245 -1.710001454879873e-07
+4005 1245  6.84975660636411e-08
+1046 1246 -7.893949514842467
+1105 1246 -2530.254860117219
+1239 1246 -1.610282307909457
+1246 1246  5400.255554604978
+1312 1246  1.1553111591236
+1501 1246  1.438432936083485
+1557 1246  7.303265994649156
+1746 1246  9.519388148008218
+1802 1246 -1.285241695049613
+2236 1246 -10.10772533983988
+3215 1246  0.004420674229770605
+3230 1246  3.557195038528249e-08
+3984 1246 -392.349659792455
+3988 1246 -0.007983550840506864
+912 1247 -1.609166188869075
+985 1247  20.87891349987074
+1047 1247  3.317198260843825
+1049 1247  0.1166433208217011
+1050 1247 -0.02717925250316533
+1051 1247  0.09444032567228004
+1052 1247  0.172119184851368
+1053 1247  0.1888753139595323
+1112 1247 -12.79869678055506
+1185 1247 -28632.51282411447
+1247 1247  248357.3942530839
+1248 1247 -3365.074743920489
+1249 1247  20.03181033622185
+1250 1247 -5694.342187155742
+1251 1247 -10632.97649536341
+1252 1247 -2531.628223652554
+1253 1247 -2952.395502406746
+1325 1247  1.809931157886173
+1430 1247 -2.744556354611642
+1508 1247  1.530441977284314
+1570 1247  17.72193962278147
+1675 1247 -17.82514147813323
+1753 1247  29.57976624661321
+1755 1247  32.27437037187642
+1756 1247  14.54355524326141
+1757 1247  21.46631831646637
+1758 1247  20.73321827180935
+1998 1247 -1.407573526367871
+1999 1247 -0.08565703803104169
+2000 1247  0.02664962184511493
+2001 1247 -0.07327003201142045
+2002 1247 -0.1354663985444235
+2003 1247 -0.1491602373155923
+2243 1247 -53.85667321915488
+2244 1247 -6.579453823854884
+2245 1247 -6.848306051500932
+2246 1247 -5.638102373386884
+2247 1247 -9.972291551898609
+2248 1247 -10.04768624738478
+3280 1247  0.0912522352120306
+3282 1247 -3490.231801384597
+3700 1247 -1.997244994389469e-08
+3702 1247  0.301148117794046
+4015 1247 -1.614056271553865e-06
+4017 1247 -880.8326491407122
+4019 1247 -1.207106466394325e-06
+4023 1247 -8.74091834635049e-07
+4027 1247 -8.476012625102669e-07
+4031 1247 -7.2243239568337e-07
+4035 1247 -0.3912441653266929
+1048 1248  0.6694889657961215
+1186 1248 -3104.47930912567
+1247 1248 -3365.074743920489
+1248 1248  9114.7025913909
+1283 1248 -3642.295564781298
+1292 1248 -2665.923631479373
+1431 1248  0.9482549598362122
+1508 1248 -0.06447409590659667
+1515 1248  1.3251531551657
+1676 1248  5.385061985342238
+1753 1248 -6.123461182640077
+1759 1248  3.860578289092625
+1760 1248  7.496982675083278
+1921 1248 -0.951589473510047
+1998 1248  0.06007445012883673
+2004 1248 -0.6039061080741983
+2005 1248 -1.268742456177846
+2166 1248 -5.582795958856527
+2243 1248  11.14530142529302
+2249 1248 -3.705027669694788
+2250 1248 -7.554675688566832
+3700 1248  0.340419502366803
+3707 1248 -0.07163724688851204
+4002 1248  0.01227067299807493
+4035 1248  3.182346067220898e-09
+4137 1248 -0.01125287282250595
+1049 1249 -0.873821607012483
+1247 1249 -880.8326491407122
+1249 1249 -19.53524156207414
+1999 1249  0.6890445982902249
+2244 1249  6.407256334560128
+3700 1249  2.580894044174054e-07
+4015 1249 -1.546513956562912e-06
+4017 1249  966.5008806834494
+1050 1250 -1.817279018958227
+1247 1250 -5694.342187155742
+1250 1250  2022450.820136566
+1755 1250 -94.3593702933843
+2000 1250  1.406918477809915
+2245 1250  20.01255346091463
+3700 1250  5.987070406110462e-07
+4019 1250 -3.449440251657165e-06
+1051 1251 -2.226655844234514
+1247 1251 -10632.97649536341
+1251 1251  60924.04664080437
+1756 1251 -61.53830667090656
+2001 1251  1.833646104710556
+2246 1251  23.8410264235214
+3700 1251  8.733743955269269e-07
+4023 1251 -3.622352877430401e-06
+1052 1252 -0.4312061772864732
+1247 1252 -2531.628223652554
+1252 1252  25409.47810388554
+1757 1252 -14.79999463216308
+2002 1252  0.3531894656037325
+2247 1252  6.874531929256725
+3700 1252  1.117970844788374e-07
+4027 1252 -5.713398701923111e-07
+1053 1253 -0.4702289341982735
+1247 1253 -2952.395502406746
+1253 1253  21032.85678965815
+1758 1253 -15.68593782996181
+2003 1253  0.3847064364584903
+2248 1253  7.60087658660375
+3700 1253  1.182548258066296e-07
+4031 1253 -5.344005542529118e-07
+1054 1254  8.070563365347549
+1221 1254 -22128.00563827363
+1231 1254 -4488.479032872755
+1254 1254  36168.60998233932
+1488 1254 -1.129575111871219
+1496 1254 -1.47406685145989
+1516 1254  0.5506640854169045
+1733 1254 -8.140433666520655
+1741 1254 -8.8135613453359
+1761 1254  14.65998231301212
+1978 1254  1.013019198664114
+1986 1254  1.313534336735618
+2006 1254 -0.5106474974579881
+2223 1254  7.318382790662603
+2251 1254 -30.55860129286959
+3644 1254 -0.1671083834788179
+3646 1254 -224.613538448271
+3929 1254  0.5123209884216365
+3961 1254 -0.3196677665324415
+3966 1254  7.969850475105303e-09
+1055 1255 -0.3412498653140608
+1173 1255 -5655.325062365802
+1254 1255 -224.613538448271
+1255 1255 -1.755833205048359
+1416 1255  0.2775953024833963
+1516 1255 -0.04737658212996103
+1661 1255  1.447748389810384
+1761 1255 -8.348557923958264
+2006 1255  0.04431965773281515
+2251 1255  17.39938467274956
+3632 1255 -0.2169860792325076
+3644 1255  2.11014470497678e-08
+3646 1255  874.3507943057172
+3966 1255  0.03272045797426546
+1056 1256  0.5060573535847106
+1109 1256 -1108.54432072992
+1120 1256 -437.5832889362574
+1125 1256 -1959.500873940213
+1164 1256 -7120.652080719003
+1256 1256  6009.021418482626
+1322 1256 -0.832500880855937
+1336 1256 -0.9162532415237825
+1342 1256  0.5292255309373513
+1567 1256 -5.568800014444435
+1581 1256 -5.934397339882138
+1587 1256  3.257536454796313
+1654 1256  3.248543549093237
+1812 1256  0.8076647819842718
+1826 1256  0.9140951370430707
+1832 1256 -0.5321225312697891
+1899 1256 -0.5613536361877731
+2057 1256  5.649414375316669
+2071 1256  5.830423114127847
+2077 1256 -3.319769019166089
+2144 1256 -3.642181922510498
+3251 1256  0.08752981633481208
+3321 1256 -0.002249858771577174
+3339 1256 -0.009402178306684716
+3597 1256 -0.4280655451173339
+3615 1256 -1.882256489560419e-09
+955 1257  1.582943479566342
+960 1257  1.35575676912055
+1057 1257 -1.262443950678755
+1137 1257 -2657.776956602856
+1155 1257  9.963872683218066
+1257 1257  10242.62462136818
+1359 1257 -1.25500744830349
+1604 1257 -8.635276008329065
+1642 1257 -10.37821929880745
+1849 1257  1.255737278168746
+1879 1257 -1.467471123130216
+2094 1257  8.485710540012704
+2124 1257 -9.158675410300438
+2132 1257  11.16247281732313
+3407 1257 -0.02032800065588086
+3535 1257 -0.0254465288909706
+3537 1257 -553.1419874124856
+3567 1257  1.930360118973518e-08
+3568 1257  0.08784149961682572
+3574 1257 -449.6118328163615
+1058 1258  0.1232979933573407
+1059 1258 -0.1209319110556626
+1160 1258  17.4862782237909
+1258 1258  49743.87056685919
+1259 1258 -7385.057567314883
+1279 1258 -18148.59164458596
+1398 1258 -2.563829187882418
+1518 1258 -1.563633429097309
+1643 1258 -17.20403442676472
+1762 1258  12.18565261931853
+1763 1258 -10.48222291782284
+1888 1258  2.735594657287679
+2008 1258  1.480104939178236
+2252 1258 -10.49071174200598
+2253 1258  9.97517741318884
+3436 1258  0.2566034054509343
+3568 1258 -0.1963524920199438
+3574 1258 -1094.71109774519
+4047 1258  6.873820179720269e-08
+4048 1258  5.031070656746905e-07
+1058 1259 -0.6452838448164313
+1059 1259  0.632740697185234
+1258 1259 -7385.057567314883
+1259 1259  39002.30945375737
+1762 1259 -65.20912110432079
+2252 1259  56.15303779109885
+4047 1259 -3.722531947414343e-07
+4048 1259  2.632996500162577e-06
+1042 1260  1.326901691271821
+1060 1260 -8.871639857149663
+1215 1260 -2345.108354895173
+1242 1260 -8130.716010688008
+1260 1260  11098.26919607543
+1476 1260  1.465217279636702
+1503 1260 -1.327284851091953
+1721 1260  7.639002702936234
+1966 1260 -1.639787184528095
+2211 1260 -9.327145572218971
+2238 1260  9.274886402498227
+3880 1260 -0.4065761617929234
+3992 1260  0.4108547346501815
+3993 1260  1.649276359927399e-08
+939 1261 -1.73190689006448
+1009 1261  2.012113389129591
+1061 1261  11.5414689090377
+1062 1261  0.8070622115620506
+1063 1261  0.1378648745322591
+1064 1261  0.2889839995395265
+1065 1261  0.5085375863685291
+1066 1261  0.280802632960781
+1067 1261  0.2803074902090753
+1068 1261  0.3804015427898206
+1077 1261 -8.828418052973735
+1139 1261 -12834.35703122456
+1209 1261 -7744.746583549255
+1261 1261  73743.62518869051
+1262 1261  19.56348943490517
+1263 1261 -1974.959337099038
+1264 1261 -1320.629257453016
+1265 1261  16.53723785781054
+1266 1261  13.01235217685689
+1267 1261  11.6057964531526
+1268 1261  14.91330785855446
+1277 1261 -12988.36445429952
+1361 1261  2.069258919988845
+1466 1261 -1.57915325880766
+1526 1261 -1.261809071898539
+1606 1261  14.65077728050171
+1711 1261 -10.80951279223802
+1765 1261  9.267190079702225
+2009 1261 -0.6614271231410632
+2010 1261 -0.1128908565994482
+2011 1261 -0.2352278222126487
+2012 1261 -0.4120665414866511
+2013 1261 -0.22803093980903
+2014 1261 -0.2279652047020327
+2015 1261 -0.3095050463446962
+2016 1261  1.52499539890934
+2096 1261 -12.89108953321492
+2201 1261  13.19349528576872
+2254 1261 -13.58702526706866
+2255 1261 -6.734223707278317
+2256 1261 -7.978904295943519
+2257 1261 -11.52066789126238
+2258 1261 -8.975664909834439
+2259 1261 -8.020245176780445
+2260 1261 -10.30293666523228
+2261 1261  11.03436685036456
+3424 1261  0.1670044571575812
+3844 1261 -0.0715165068859599
+4055 1261  1.003843119740111e-10
+4057 1261 -973.8876219298594
+4059 1261  2.761413792073952e-09
+4063 1261  1.715662265033835e-09
+4065 1261  4.807374498483341e-10
+4067 1261  1.207292321669939e-10
+4069 1261 -938.9115804920717
+4071 1261  3.51464403414159e-09
+4073 1261 -438.0730939280807
+4075 1261  1.771025409658122e-09
+4077 1261 -1253.580811045954
+4079 1261  1.926837966959027e-09
+4081 1261 -406.6682599388607
+4085 1261 -0.154517529197304
+1062 1262 -0.2638765488723211
+1261 1262 -973.8876219298594
+1262 1262 -6.472519984532425
+2009 1262  0.2162445878661694
+2254 1262  4.495539572060578
+4055 1262  3.275554827375515e-11
+4057 1262  318.5048147514638
+4065 1262 -2.092441497847375e-11
+1063 1263 -0.691791395091818
+1261 1263 -1974.959337099038
+1263 1263  22241827.1668027
+1765 1263 -48.49819059668351
+2010 1263  0.5659529217467251
+2255 1263  35.27417920269734
+4059 1263  1.386103698486352e-08
+4065 1263 -1.407063632274941e-09
+1061 1264 -29.38500860027402
+1064 1264 -0.7136256126923212
+1261 1264 -1320.629257453016
+1264 1264  3293.999150088965
+2011 1264  0.58059959929439
+2256 1264  20.3242031183579
+4063 1264  4.236453410300101e-09
+4065 1264 -1.250616929460158e-09
+1065 1265 -0.3738089874614378
+1261 1265 -938.9115804920717
+1265 1265 -12.35162972393133
+2012 1265  0.3028572253506908
+2257 1265  8.60586674968607
+4065 1265 -3.967674799820742e-11
+4067 1265  8.890274627582073e-11
+4069 1265  690.7277721995864
+1066 1266 -0.3417951192656607
+1261 1266 -438.0730939280807
+1266 1266 -16.46702251650682
+2013 1266  0.2773430210866213
+2258 1266  11.36739026212329
+4065 1266 -9.292320871101545e-10
+4071 1266  4.277776494143737e-09
+4073 1266  638.1028771743796
+1067 1267 -0.7056173157039639
+1261 1267 -1253.580811045954
+1267 1267 -30.15492728060605
+2014 1267  0.5735599734651922
+2259 1267  20.84931717772058
+4065 1267 -1.273698507775478e-09
+4075 1267  4.457897107013054e-09
+4077 1267  3197.368019510227
+1068 1268 -0.2788437142254693
+1261 1268 -406.6682599388607
+1268 1268 -11.26470641944774
+2015 1268  0.2267698773866179
+2260 1268  7.785831386629021
+4065 1268 -3.714351916317415e-10
+4079 1268  1.412364641906194e-09
+4081 1268  300.7279047211774
+1069 1269  1.446310343605032
+1088 1269 -9.271286129126777
+1269 1269  10813.18840331734
+1288 1269 -3477.000248020089
+1293 1269 -2617.344977460001
+1527 1269 -1.4539442060976
+1528 1269  1.012670283104241
+1773 1269  7.586503401180923
+2018 1269 -1.178077129193787
+2262 1269  8.904067016422458
+2263 1269 -8.118669664395844
+3979 1269  0.3738519183151615
+4088 1269 -5.449000251012137e-09
+4089 1269 -0.1955389194213142
+1070 1270 -0.2627322880432431
+1270 1270 -1.571445886133012
+1278 1270 -311.4682163976653
+1297 1270 -200.6725500667741
+1529 1270  0.2350047561293398
+1530 1270  0.2512854022636909
+1774 1270  1.557484621286901
+1775 1270  1.526214952953268
+2020 1270 -0.2668875724092826
+2264 1270 -1.768620278216182
+4096 1270 -9.370967335442515e-10
+4102 1270  140.4997717045286
+4125 1270 -0.006868975754008619
+4126 1270  0.007048039668787517
+1071 1271 -1.255594290221721
+1072 1271 -4.340970455026474
+1271 1271  3389.329908355603
+1272 1271 -1896.271883950909
+1273 1271  7.278483379939773
+1531 1271 -0.760898897674599
+1777 1271 -7.410192007597576
+2021 1271  0.7226406195901061
+2022 1271  1.25204782308929
+2266 1271  4.279036439188589
+3848 1271  0.03877201488615256
+4105 1271 -0.0430300784938554
+4107 1271 -2.899300410863503e-09
+4110 1271 -775.6585214792789
+1072 1272  6.781290570506568
+1198 1272 -1354.184908615927
+1271 1272 -1896.271883950909
+1272 1272  8920.432007772803
+1286 1272 -1016.223869586874
+1448 1272  1.458551817589764
+1531 1272  1.417933728749779
+1533 1272 -0.9419881525931177
+1693 1272  7.706555182452361
+1778 1272 -6.501760444913997
+1938 1272 -1.584168614837459
+2021 1272 -1.346294002937515
+2023 1272  0.9057504822207362
+2183 1272 -8.926654332769894
+2266 1272 -6.686952398469979
+2268 1272  8.109951937853118
+3732 1272 -0.4068214560171308
+3782 1272  0.3056678326553257
+4105 1272 -1.591542694856507e-08
+4107 1272  0.06443650349717406
+1071 1273  0.4234260846924687
+1073 1273 -0.4330444227258353
+1209 1273 -661.5687518245369
+1271 1273 -775.6585214792789
+1273 1273 -2.600153453324001
+1467 1273  0.4330918580020012
+1712 1273  3.000161281315632
+1777 1273  2.647109335618053
+2022 1273 -0.4222691319575352
+2202 1273 -3.019182774708455
+3844 1273  0.01064840327673766
+3848 1273 -2.24825810746232e-10
+4107 1273 -0.01468942424715695
+4110 1273  534.0266415164343
+986 1274 -0.5956130198385742
+1074 1274  0.5173331784807369
+1186 1274 -2242.183874822623
+1243 1274 -895.8117983584929
+1274 1274 -3.668557345336211
+1749 1274  3.190434157991982
+1922 1274  0.5345682106594648
+1994 1274 -0.5195702043955498
+2167 1274  3.371880224457544
+2239 1274 -3.280810777838017
+3707 1274  0.003935685128377539
+3709 1274  1430.064587986382
+3995 1274  3.551176783189725e-09
+4001 1274 -0.005509351388796824
+1075 1275  8.227311149593612
+1129 1275 -406.3245095104962
+1156 1275 -1669.250794817422
+1244 1275 -869.3960245438557
+1275 1275  3309.804824869787
+1348 1275 -1.448830170961889
+1391 1275 -0.6717547568728794
+1507 1275  0.8498311284186953
+1593 1275 -8.986270417976883
+1636 1275 -4.45699083051915
+1752 1275  6.16663657582833
+1838 1275  1.383965071862861
+1881 1275  0.6732602446776976
+1997 1275 -0.8390792003026281
+2126 1275  4.519424008698003
+2242 1275 -5.648969220034825
+3363 1275 -0.08503788873061549
+3374 1275 -1.330449639347364e-10
+3539 1275  0.05001632315880056
+4005 1275  0.1686051519968662
+1076 1276 -6.327609477811816
+1179 1276 -2359.550833678316
+1219 1276 -1597.562514947314
+1276 1276  7342.450288708699
+1277 1276 -900.7481929321769
+1420 1276  1.0954096953943
+1481 1276  0.7622334325753616
+1534 1276  0.732482060546454
+1665 1276  7.39116338334737
+1726 1276  8.663532512230066
+1779 1276  11.68038468730556
+1910 1276 -1.097736986344627
+1971 1276 -0.7961901433494216
+2024 1276 -0.7185090865811576
+2155 1276 -7.424306139552618
+2269 1276 -19.033710166426
+3516 1276 -0.2527625769661221
+3906 1276 -2.449253198866241e-08
+3909 1276  0.06809256171239964
+4085 1276  0.132345738765169
+1077 1277  6.880737915568475
+1198 1277 -2452.381670676807
+1261 1277 -12988.36445429952
+1276 1277 -900.7481929321769
+1277 1277  19492.02732296992
+1449 1277  1.914092476329704
+1526 1277  1.218052141141831
+1534 1277  0.3546679947849138
+1694 1277  10.8116764259082
+1779 1277 -17.00310330783589
+1939 1277 -2.079388121953743
+2016 1277 -1.47300996574878
+2024 1277 -0.3467933517133751
+2184 1277 -12.54156489623117
+2261 1277 -8.604012257663832
+2269 1277  27.66621100756034
+3782 1277  0.08491056660704932
+3906 1277 -0.2432983530399724
+4065 1277  0.2765729375303243
+4085 1277 -1.015200618015299e-08
+1070 1278  0.9977668333321339
+1078 1278 -4.159585723108061
+1097 1278  4.827479834422186
+1139 1278 -2144.627198362258
+1278 1278  5895.373801146832
+1297 1278 -1522.865223630604
+1362 1278  1.112187399499515
+1529 1278 -0.892482446316431
+1535 1278 -0.7623806137748301
+1536 1278 -0.8740833773154867
+1607 1278  5.806648304333962
+1774 1278 -6.008458200039911
+1780 1278 -5.293591386916911
+1852 1278 -1.060130544389497
+2025 1278  0.8561953528406251
+2026 1278  0.8912310185466943
+2097 1278 -5.542727459691219
+2264 1278  6.822965307570624
+2270 1278  5.949256441509949
+3424 1278 -0.1239759113085325
+3740 1278  0.2708866518362061
+3742 1278 -226.1606344644552
+4096 1278  0.04152432414694895
+4102 1278 -311.4682163976653
+4125 1278 -1.724586362605862e-09
+4126 1278  0.09910991576168865
+940 1279  6.263257813414933
+1079 1279  1.026223447218297
+1080 1279 -0.03784001059522953
+1081 1279  0.1538807196017282
+1092 1279 -9.369448393175393
+1140 1279 -8495.362797148979
+1258 1279 -18148.59164458596
+1279 1279  28431.72293901326
+1280 1279 -2556.084698611827
+1281 1279  12.77341480514826
+1292 1279 -3788.030924191581
+1364 1279 -0.9008717883866754
+1518 1279  0.9598433696475256
+1539 1279 -1.539612202532529
+1609 1279 -5.728897213689582
+1763 1279  6.050637155898348
+1782 1279  8.860625202191928
+2008 1279 -0.9086990247085501
+2027 1279  0.034701868533437
+2028 1279 -0.1216367091902295
+2029 1279  1.649614277715424
+2253 1279 -5.758158315413606
+2272 1279 -2.549488194921787
+2273 1279 -5.706211648738831
+2274 1279  10.36404052218175
+3436 1279  2.904484364130155e-08
+3438 1279  0.06744124888417391
+4047 1279 -0.1647810909296956
+4127 1279 -3.091812144639938e-07
+4131 1279 -2.020746911468585e-08
+4133 1279 -1071.638609249924
+4137 1279  0.02154089767802106
+1080 1280 -4.420138342044347
+1279 1280 -2556.084698611827
+1280 1280  24829.19315026917
+1782 1280 -85.6481884037366
+2027 1280  3.515861595090497
+2272 1280  24.55744025691587
+3436 1280  2.892989577618632e-07
+4127 1280 -2.914429613198877e-06
+1081 1281 -0.9333140615563935
+1279 1281 -1071.638609249924
+1281 1281 -18.08711888236061
+2028 1281  0.7715902563305668
+2273 1281  8.071327482551563
+3436 1281  5.602588400155373e-09
+4131 1281 -2.80225902482556e-08
+4133 1281  1491.94341957583
+998 1282 -2.035095850329601
+1082 1282 -0.1794700371248673
+1161 1282 -428.7203655478105
+1198 1282 -181.5901337830495
+1282 1282  0.1809697428574308
+1450 1282  0.2587210270240423
+1645 1282  1.449320632063192
+1695 1282  1.823381827579377
+1940 1282 -0.2949996783558586
+2135 1282 -1.445634609268258
+3577 1282  0.06933509766831331
+3579 1282  143.5927519074621
+3580 1282  3.424985434152816e-10
+3782 1282 -0.03690628655442891
+1043 1283 -13.22275915134779
+1048 1283 -1.982804233034109
+1083 1283  12.90162353980625
+1243 1283 -4835.233940860073
+1248 1283 -3642.295564781298
+1283 1283  27634.07583015538
+1505 1283 -2.060237066109748
+1759 1283 -13.25822033623029
+1995 1283  2.055356159329762
+2004 1283  1.790720369267351
+2249 1283  12.72139776360994
+4001 1283  0.03805173458733085
+4002 1283  3.583140154028719e-08
+4035 1283 -0.02865248553472413
+1084 1284  5.611106742482268
+1181 1284 -986.9244544448655
+1232 1284 -876.0754543919668
+1284 1284  3583.170000702486
+1423 1284  0.8548643208929529
+1464 1284 -0.9961364902256777
+1498 1284  0.7155565720950264
+1668 1284  6.615573091843956
+1709 1284 -5.607807767179665
+1743 1284  4.486795549329334
+1913 1284 -0.8599114611795811
+1954 1284  1.069122194004589
+1988 1284 -0.7605034956051496
+2158 1284 -6.162584239401313
+2233 1284 -4.956539581732367
+3304 1284  0.03479583401598968
+3306 1284 -713.7675168419642
+3404 1284 -0.2426662636331511
+3562 1284  0.351545654975462
+3838 1284  5.059544422314488e-09
+1019 1285 -8.259331367595493
+1085 1285  29.59209543546304
+1087 1285  0.02592331471776193
+1143 1285 -3361.290884691284
+1219 1285 -8948.234516972396
+1285 1285  17460.80753327147
+1286 1285 -355.3005757252953
+1287 1285 -6086.481643221267
+1371 1285  1.320047522502771
+1482 1285 -1.404708895439669
+1540 1285  0.8723149865446768
+1616 1285  5.912988175484174
+1785 1285  7.224333431575175
+1861 1285 -1.327266925701353
+1972 1285  1.404553468671611
+2030 1285 -0.8836887209187385
+2031 1285 -0.01536794211867761
+2106 1285 -7.658943042300214
+2217 1285  7.340228308969663
+2275 1285 -14.23089451374669
+2276 1285 -5.11537337312014
+3459 1285 -0.3809398851482149
+3732 1285  0.7958833775240365
+3909 1285 -0.1159747915130676
+4143 1285 -3.395976211351126e-07
+4145 1285 -3.796000963562918e-08
+1086 1286  0.686208191891071
+1187 1286 -2971.781291812511
+1194 1286 -1300.878810624146
+1272 1286 -1016.223869586874
+1285 1286 -355.3005757252953
+1286 1286  2699.816287331327
+1438 1286 -0.7243212706529887
+1443 1286  0.7276690822554982
+1533 1286  0.7647617774871864
+1540 1286  0.1581455441266535
+1683 1286 -3.862010681433929
+1688 1286  4.142224845053123
+1778 1286  2.902282675062765
+1785 1286 -5.755496466930055
+1933 1286 -0.7000132451960701
+2023 1286 -0.7325004544442668
+2030 1286 -0.1537391891864879
+2173 1286  3.078841940888978
+2178 1286 -4.115085973968236
+2268 1286 -3.63284662768968
+2275 1286  11.28627080486608
+3721 1286  0.1010252017704575
+3732 1286 -1.977140486920936e-08
+3750 1286  0.4053180506262709
+4105 1286  0.2078468422749022
+4145 1286 -0.5565646529232413
+1085 1287 -61.62793826679011
+1087 1287 -2.546465438723989
+1285 1287 -6086.481643221267
+1287 1287  385721884.8626792
+2031 1287  2.184062692900765
+2276 1287  10.62505824201603
+4143 1287 -6.981968381225556e-07
+4145 1287  1.047611239979318e-07
+972 1288 -7.327623003108759
+1069 1288 -0.7843205938603733
+1088 1288  5.391699794599194
+1172 1288  7.434019202353014
+1269 1288 -3477.000248020089
+1288 1288  4282.153118111866
+1410 1288  1.244376425382915
+1527 1288  0.7887413865352516
+1900 1288 -1.24310360204769
+2262 1288 -5.178361384702474
+3621 1288 -729.7679166195321
+3622 1288 -0.1110170413597872
+4088 1288  0.1214000777347138
+4089 1288 -2.311546232203199e-10
+1089 1289 -0.2240623962201166
+1191 1289 -568.5385856792361
+1278 1289 -226.1606344644552
+1289 1289 -1.466292598487834
+1440 1289  0.2021428644557957
+1535 1289  0.2404117558855258
+1685 1289  1.349752296503026
+1780 1289  1.734079556945439
+2025 1289 -0.2700601958355842
+2270 1289 -1.948837786899108
+3737 1289  0.1077273281804682
+3740 1289 -1.285860098954039e-10
+3742 1289  175.3675926617765
+4125 1289 -0.04556750232906767
+1090 1290  5.7352105175062
+1129 1290 -413.568512787767
+1205 1290 -2885.83782801923
+1211 1290 -4191.670580733649
+1290 1290  6601.729492759859
+1349 1290 -1.478717303034038
+1462 1290 -1.259969479211936
+1472 1290  0.8608065436983287
+1594 1290 -8.636336925830612
+1707 1290 -7.594160657672793
+1839 1290  1.409679754203769
+1952 1290  1.263388549847514
+1962 1290 -0.8912069480512377
+2084 1290  8.40294784283585
+2197 1290  7.714039345814099
+2207 1290 -5.617762924949319
+3363 1290  0.08038780949826715
+3677 1290  0.005776489288478071
+3826 1290 -0.1235888174409047
+3869 1290  1.863167620186346e-09
+1039 1291 -1.37057590441584
+1091 1291 -6.443255723032384
+1099 1291 -2064.592972124319
+1107 1291 -1886.24602540587
+1222 1291 -1973.389699961115
+1291 1291  13284.54847765901
+1302 1291 -0.9510122522007739
+1317 1291  1.072482363899002
+1428 1291  1.168201103632352
+1465 1291  1.508116623610983
+1489 1291  0.6649208109974206
+1502 1291  1.440632854355911
+1547 1291 -5.89270443011472
+1562 1291  6.515090289325624
+1673 1291  6.948584822479258
+1710 1291  9.461386332231092
+1734 1291  5.042399262607923
+1747 1291  8.78157444783267
+1792 1291  1.008248356955873
+1807 1291 -1.048942876587671
+1918 1291 -1.112629226904043
+1955 1291 -1.284756180926196
+1979 1291 -0.6401504275752985
+2037 1291  6.423284179440299
+2163 1291 -6.744836034528775
+2200 1291 -8.06920223422221
+2224 1291 -4.49133368198552
+2237 1291 -8.407949717547142
+3180 1291  0.001908677924063157
+3231 1291  0.03029500957792949
+3250 1291  9.598412509248533e-09
+3687 1291  0.03333223564354496
+3689 1291 -596.6060378153098
+3783 1291  0.01209611832050858
+3785 1291 -887.2704343124278
+3912 1291 -0.1615941136931558
+3984 1291 -847.0044738143413
+3988 1291  0.01997349105916452
+1092 1292  3.090538086242762
+1186 1292 -2147.026562228744
+1248 1292 -2665.923631479373
+1279 1292 -3788.030924191581
+1292 1292  3983.405381393538
+1433 1292  0.6074290077697537
+1515 1292 -0.6969060374661296
+1539 1292  0.5515734249662468
+1678 1292  3.925788159606755
+1760 1292 -4.412546562177374
+1923 1292 -0.6360979627977104
+2005 1292  0.6674861693273666
+2029 1292 -0.5910536703502837
+2168 1292 -4.052578948064181
+2250 1292  4.445925686481872
+2274 1292 -3.418869542545371
+3436 1292 -0.01760425743209124
+3707 1292  0.0005819686631925447
+4035 1292  0.008367416410551054
+4137 1292  2.545271146625616e-09
+980 1293  1.054444086425987
+1036 1293  4.132697532463695
+1093 1293  0.7694145957527926
+1094 1293  0.2993576079332945
+1095 1293  0.06090597788496124
+1180 1293  7.599039601885398
+1236 1293 -4853.806147764349
+1269 1293 -2617.344977460001
+1293 1293  8749.068783994611
+1294 1293 -164.3747074129485
+1295 1293 -228.5337576424091
+1528 1293 -0.6342324694119614
+1773 1293 -4.118301711438476
+1787 1293  11.01771333410158
+1788 1293  6.743407936044972
+1911 1293 -0.9011439297224629
+1990 1293 -0.6573535907506843
+2018 1293  0.7388262639417724
+2032 1293 -0.2812552173432513
+2033 1293 -0.05346314055039419
+2156 1293 -6.282130880745431
+2235 1293 -3.858923807592198
+2263 1293  4.406401785498837
+2277 1293 -8.516713910804523
+2278 1293 -5.460172754442011
+3663 1293  0.191146221601152
+3665 1293 -2195.222019859931
+3979 1293 -1.493529512737457e-08
+3981 1293 -0.100220615586992
+4088 1293 -0.1119597125414634
+4147 1293 -6.98925406994455e-08
+4151 1293 -1.82950870328269e-07
+1094 1294 -0.07547758445687719
+1293 1294 -164.3747074129485
+1294 1294  7639.560996461915
+1787 1294 -2.84098517566667
+2032 1294  0.07089516340263885
+2277 1294  2.196645621020095
+3979 1294  3.664150378945852e-09
+4147 1294 -1.762221939205499e-08
+1095 1295 -0.2101993947180658
+1293 1295 -228.5337576424091
+1295 1295  1734538.607648828
+1788 1295 -23.80011488422777
+2033 1295  0.1844652672016618
+2278 1295  19.27597045240107
+3979 1295  5.14547137606125e-08
+4151 1295 -6.313957408021764e-07
+1016 1296 -14.33700504327225
+1096 1296 -2.150639159344168
+1216 1296 -7591.989817871691
+1296 1296  31452.29747062553
+1463 1296  2.014211143978124
+1479 1296  2.026152750963511
+1708 1296  13.54117139429895
+1724 1296  13.57026540414182
+1953 1296 -1.999469777299275
+2198 1296 -13.32061162330918
+3639 1296  0.3247141990426317
+3641 1296 -1447.354453913685
+3896 1296  9.930907562960556e-08
+3898 1296 -0.191269335220411
+1078 1297  3.631803890207103
+1097 1297 -4.215505277639843
+1123 1297 -808.5357239496374
+1270 1297  5.293330560693399
+1278 1297 -1522.865223630604
+1297 1297  3641.527667894987
+1339 1297 -1.067537541383138
+1530 1297 -0.9272236969416601
+1536 1297  0.8485062079897245
+1584 1297 -6.271879177065279
+1775 1297 -5.141467784189666
+1829 1297  1.068092189082941
+2020 1297  0.98501487730545
+2026 1297 -0.8650341355217372
+2074 1297  6.765893252663498
+3333 1297  0.04698307485545858
+4096 1297 -0.03625623505669459
+4102 1297 -200.6725500667741
+4125 1297 -0.08433753082497672
+4126 1297 -5.736181502014048e-09
+942 1298  2.244902451590447
+995 1298 -12.63775213409496
+1098 1298  2.091534388030289
+1142 1298  14.90052319731221
+1195 1298  12.86354640857851
+1298 1298  21916.56379947439
+1857 1298 -2.239071302275198
+1934 1298 -2.085292610827683
+2102 1298 -14.58766608851529
+3447 1298 -0.1518122571549573
+3449 1298 -805.5840716483974
+3755 1298  7.298680398948321e-08
+3757 1298 -2377.009340752339
+3758 1298  0.1249966020337667
+900 1299  0.3323231709594552
+1299 1299 -16421.91141900974
+2279 1299 -0.2412135486919414
+2777 1299  0.0009212144480015666
+299 1300 -116.617845527334
+1300 1300  12.11450772465217
+2281 1300  0.1661135696488067
+3179 1300  0.004072679508227696
+422 1301 -62.98405706157925
+1301 1301  5.029592871399626
+2283 1301  0.4624605338731521
+3183 1301  0.006325580300835057
+1302 1302  5.668506884069838
+1992 1302 -36.74905919231615
+2285 1302 -0.4947242318500673
+3187 1302  0.009068887762870591
+902 1303  0.3692929538115763
+1303 1303 -17152.4293534602
+2287 1303 -0.1766165844146627
+2781 1303  0.001126548430048662
+1304 1304  4.360425760376224
+1389 1304 -1194.040564031954
+2289 1304 -0.3050775516043246
+3195 1304  0.0006920242127232574
+301 1305 -304.5398217309754
+1305 1305  4.242853200747289
+2291 1305 -0.3914193643951301
+3199 1305  0.002176793677748951
+904 1306  0.3412920430536406
+1306 1306 -15990.46775417401
+2293 1306 -0.2389835454891805
+2785 1306  0.0008530994861319499
+903 1307  8.041398985395846
+1307 1307 -27.96163286928218
+2295 1307 -0.480083502439464
+2869 1307  0.008969100346799186
+303 1308 -246.424319230538
+1308 1308  6.55614516809574
+2297 1308 -0.4223099512030265
+3211 1308  0.001294461194310491
+905 1309  0.2531411838153556
+1309 1309 -16927.29847060978
+2299 1309 -0.3252342258811831
+2789 1309  0.0009498724832470081
+305 1310 -127.348789895109
+1310 1310  4.739072521175232
+2301 1310 -0.3300938184824252
+3219 1310  0.004708060827964712
+305 1311 -121.9718452523159
+1311 1311  3.734090074267024
+2303 1311 -0.4908310005655379
+3223 1311  0.004351148550575133
+305 1312 -116.5170618756044
+1312 1312  5.750116599788575
+2305 1312  0.4025596461382885
+3227 1312  0.003943055987286657
+907 1313  0.2843149864254252
+1313 1313 -15651.87407075314
+2307 1313 -0.3680346263601992
+2793 1313  0.0007934834584695918
+307 1314 -53.76846535636626
+1314 1314  7.210736930309491
+2309 1314 -0.4818355431139471
+3235 1314  0.005246094254530227
+429 1315 -1421.489364558306
+1315 1315  2.12239641260848
+2311 1315 -0.2936091186007417
+3239 1315  0.00123992126917307
+307 1316 -71.39421959235987
+1316 1316  8.994693294053674
+2313 1316  0.1478598441021074
+3243 1316  0.009039595818823305
+307 1317 -60.66237248504986
+1317 1317  5.061913797901139
+2315 1317 -0.4722142733900354
+3247 1317  0.006531690474418353
+909 1318  0.3142463672992255
+1318 1318 -19147.60457802069
+2317 1318 -0.1245359476338054
+2797 1318  0.001735428750918286
+911 1319  0.3570471483125166
+1319 1319 -16807.87112981469
+2319 1319 -0.1886155730243332
+2799 1319  0.001055952156612547
+309 1320 -48.65659892814862
+1320 1320  9.205270068346719
+2321 1320 -0.496807801273085
+3259 1320  0.0040209120475161
+309 1321 -54.80645079899097
+1321 1321  9.624813023079948
+2323 1321  0.382691560057723
+3263 1321  0.005036551566437221
+456 1322 -100.9659996956966
+1322 1322  5.922623483236039
+2325 1322 -0.3957723556168835
+3267 1322  0.003847683823750525
+313 1323 -288.936297706273
+1323 1323  6.622390702502909
+2327 1323 -0.5915454173823133
+3271 1323  0.000841250571717003
+312 1324 -314.0609448710903
+1324 1324  5.191487889306021
+2329 1324 -0.6320496066149439
+3275 1324  0.0008578383472785198
+312 1325 -417.45542835111
+1325 1325  4.055752454004177
+2331 1325  0.4149441342907414
+3279 1325  0.001413154992221117
+913 1326  0.2442413628704792
+1326 1326 -16211.02934024309
+2333 1326 -0.3720931546947013
+2805 1326  0.000882780537773012
+916 1327  0.5026016922761118
+1327 1327 -16956.49425532315
+2335 1327 -0.1474513421173272
+2809 1327  0.00110154462157207
+917 1328  0.421945735946015
+1328 1328 -17479.24164685459
+2337 1328 -0.150780607320847
+2811 1328  0.001217084582774276
+915 1329  0.3915301941686539
+1329 1329 -17627.34904124655
+2339 1329 -0.166183616330309
+2813 1329  0.001208942347715192
+919 1330  0.5049950972208146
+1330 1330 -15611.76874269715
+2341 1330 -0.2132021764710746
+2815 1330  0.000779202277584279
+407 1331 -1111.645130144312
+1331 1331  5.283005872263223
+2343 1331  0.3955427940574144
+3303 1331  0.0004793400206406562
+1332 1332  15.68617080585754
+1821 1332 -33.11136834639819
+2345 1332  0.3375417510714098
+3307 1332  0.006039682245261113
+921 1333  0.3539340117715401
+1333 1333 -17332.50264229675
+2347 1333 -0.1696981519351195
+2819 1333  0.00118182103217571
+922 1334  0.3171963274323468
+1334 1334 -17422.49927394747
+2349 1334 -0.1917131080017181
+2821 1334  0.001163085689897921
+320 1335 -111.0869138886556
+1335 1335  6.886702449628651
+2351 1335 -0.3349809983213075
+3319 1335  0.004096199521818046
+456 1336 -102.1576650288912
+1336 1336  9.171935901419351
+2353 1336  0.2366093522986016
+3323 1336  0.003915495812117704
+924 1337  0.36929603544498
+1337 1337 -17020.79035452148
+2355 1337 -0.1646365262812593
+2825 1337  0.001142829721099769
+323 1338 -271.7302916287101
+1338 1338  4.703906103995145
+2357 1338 -0.5108679392009977
+3331 1338  0.001588745929855299
+497 1339 -285.6892959186181
+1339 1339  4.656667378127493
+2359 1339 -0.5198632092236741
+3335 1339  0.001427060126464311
+925 1340  0.2262443261916516
+1340 1340 -19127.32781891363
+2361 1340 -0.1981115390391876
+2829 1340  0.001550353210027538
+325 1341 -104.0178710348266
+1341 1341  7.677703978905812
+2363 1341 -0.3571743727843014
+3343 1341  0.003811275724011594
+325 1342 -120.5778323373125
+1342 1342  8.778768198726569
+2365 1342 -0.1837305798049131
+3347 1342  0.005121318497429929
+927 1343  0.2087528761419341
+1343 1343 -15783.20681029278
+2367 1343 -0.4115664959143436
+2833 1343  0.0009378587812624336
+422 1344 -60.13908113984031
+1344 1344  5.321843079599814
+2369 1344  0.4948051560351502
+3355 1344  0.005775450127326082
+327 1345 -298.6016789907157
+1345 1345  4.558938647755557
+2371 1345 -0.413301806558095
+3359 1345  0.0017937012250687
+929 1346  0.4651682718810348
+1346 1346 -17200.90873720961
+2373 1346 -0.1377344537960406
+2837 1346  0.001191863116158495
+931 1347  0.4764456266019807
+1347 1347 -15372.8164617752
+2375 1347 -0.2309521391793573
+2839 1347  0.0007388559992690033
+475 1348 -264.9193537345231
+1348 1348  6.478855428097081
+2377 1348  0.373558902257503
+3371 1348  0.001531669494970955
+490 1349 -233.9428403939642
+1349 1349  6.316750882719969
+2379 1349 -0.5029630185937802
+3375 1349  0.001266167020288444
+933 1350  0.3022564890631526
+1350 1350 -16176.07327682596
+2381 1350 -0.3005092230155128
+2843 1350  0.0008420525300582919
+332 1351 -325.9882767001909
+1351 1351  2.896696576781846
+2383 1351 -0.4425084873605576
+3383 1351  0.002427896906783189
+332 1352 -309.9603770265387
+1352 1352  3.97695531013616
+2385 1352  0.3915587655305323
+3387 1352  0.002164171743180618
+935 1353  0.5157826632315755
+1353 1353 -15319.47843184996
+2387 1353 -0.2006755550776053
+2847 1353  0.0007649883521880128
+936 1354  0.3627133183774772
+1354 1354 -15342.38909234029
+2389 1354 -0.3338254063159729
+2849 1354  0.0007051468741409639
+356 1355 -238.4037452088409
+1355 1355  7.085454722674737
+2391 1355  0.4723947763539851
+3399 1355  0.001219792748132925
+381 1356 -263.4733430658865
+1356 1356  8.850553534693345
+2393 1356  0.2663650847823781
+3403 1356  0.001454930057613886
+937 1357  0.2986025758467496
+1357 1357 -17899.71932992315
+2395 1357 -0.1866146962681199
+2853 1357  0.001278376874421266
+337 1358 -369.8949236118411
+1358 1358  7.20471030819053
+2397 1358 -0.08570975586546606
+3411 1358  0.00469741256465844
+1359 1359  4.811679126595192
+1397 1359 -295.1726851067902
+2399 1359  0.224708222751262
+3415 1359  0.003317459266130739
+339 1360 -285.4713473471323
+1360 1360  3.187196022756801
+2401 1360 -0.3594998341374985
+3419 1360  0.002558593316707209
+339 1361 -436.1033211741603
+1361 1361  2.361516391014415
+2403 1361 -0.1478613302797105
+3423 1361  0.005951706713139653
+339 1362 -229.7060573133963
+1362 1362  3.949015286514313
+2405 1362 -0.58313561816606
+3427 1362  0.001644612800042599
+1363 1363  3.718703165886996
+1367 1363 -1269.292530967632
+2407 1363 -0.2293236201966689
+3431 1363  0.0009333933720843851
+479 1364 -80.63995640005464
+1364 1364  3.873556911241002
+2409 1364  0.42314514820173
+3435 1364  0.006430911860547507
+1365 1365  3.057040031444601
+1366 1365 -1242.790681293919
+2411 1365 -0.3584357830568553
+3439 1365  0.0007547011133879957
+1030 1366  2.768986183801089
+1366 1366 -1347.578694003212
+2413 1366  0.3532322900264607
+2859 1366  0.0008859553416288813
+942 1367  4.104329447502788
+1367 1367 -1306.453480190415
+2415 1367  0.1980349828762465
+2861 1367  0.0009881120225423083
+344 1368 -200.3447771713429
+1368 1368  6.140079525850513
+2417 1368 -0.4977177300614612
+3451 1368  0.001487119509469413
+1369 1369  5.11477659652108
+1874 1369 -43.12103915398581
+2419 1369 -0.8291778794111532
+3455 1369  0.00473831831522752
+943 1370  5.956658436407072
+1370 1370 -55.1131720540257
+2421 1370 -0.5661094051595106
+2977 1370  0.004665764046068509
+343 1371 -138.6615789467266
+1371 1371  3.940241620211022
+2423 1371 -0.7204052217365209
+3463 1371  0.002305146191007362
+944 1372  2.750846739635994
+1372 1372 -1285.697515288127
+2425 1372 -0.3615774432500488
+2895 1372  0.000868626976203476
+420 1373 -354.9809878986568
+1373 1373  4.076318621842777
+2427 1373  0.2921192102024898
+3471 1373  0.00245314466950895
+345 1374 -1359.412942527259
+1374 1374  2.042467781185541
+2429 1374  0.2741740413061578
+3475 1374  0.001238197354358431
+345 1375 -1420.161798638148
+1375 1375  1.82335585995627
+2431 1375 -0.2773076465817324
+3479 1375  0.001352052470830934
+947 1376  4.657866587415518
+1376 1376 -3270.212868561243
+2433 1376  0.01066646559674583
+2871 1376  0.007206573329488603
+948 1377  3.930431675339554
+1377 1377 -3383.287424261197
+2435 1377  0.01083754926061709
+2873 1377  0.00804911233999905
+949 1378  3.761329917621492
+1378 1378 -3162.550200229806
+2437 1378  0.01175361905645754
+2875 1378  0.009054992045809201
+950 1379  3.433153619622813
+1379 1379 -3145.78338095254
+2439 1379  0.01148289706327333
+2877 1379  0.009182548452643763
+951 1380  4.033761579760155
+1380 1380 -4127.243651965615
+2441 1380  0.008746086931736756
+2879 1380  0.008827942062630214
+1307 1381 -29.79361601042121
+1381 1381  6.379413846576525
+2443 1381  0.533396313689748
+3503 1381  0.01016666328654509
+352 1382 -332.4374750614756
+1382 1382  2.227180481236951
+2445 1382 -0.4043473535609413
+3507 1382  0.00314554681455971
+1383 1383  7.331635289629858
+1388 1383 -837.2400970977618
+2447 1383 -0.2764593929515081
+3511 1383  0.0005973204797509472
+379 1384 -373.0009736238315
+1384 1384  5.304835054096878
+2449 1384 -0.3420333412549693
+3515 1384  0.001435248761046485
+1385 1385  3.60741915585239
+1874 1385 -49.71040942894664
+2451 1385 -0.799264511380765
+3519 1385  0.006093662194062648
+1386 1386  7.425984975242814
+1874 1386 -42.92117101237346
+2453 1386  0.6047360650845038
+3523 1386  0.004492944878941051
+1387 1387  6.982525249864085
+1874 1387 -40.28901682798929
+2455 1387  0.7188696788923561
+3527 1387  0.003995523251182433
+954 1388  2.017091895439074
+1388 1388 -1637.126658242943
+2457 1388  0.3127717018709487
+2885 1388  0.00119543363370181
+955 1389  3.178888388583059
+1389 1389 -1330.347020163584
+2459 1389  0.3275939897877578
+2887 1389  0.0008583523754602983
+956 1390  2.807087996816875
+1390 1390 -1267.301926818796
+2461 1390  0.3539736971000679
+2963 1390  0.0008457781934121071
+475 1391 -338.2005407012112
+1391 1391  6.546784158248459
+2463 1391 -0.2029217905250291
+3543 1391  0.002489295349595793
+357 1392 -1177.545535152468
+1392 1392  3.23901762142973
+2465 1392  0.4429448994016109
+3547 1392  0.0006496765340230643
+357 1393 -1310.740583779899
+1393 1393  2.722688512430139
+2467 1393 -0.3781458794328671
+3551 1393  0.0008068265997349701
+1394 1394  3.132140010189771
+1885 1394 -1319.017361047579
+2469 1394  0.2653610804974919
+3555 1394  0.000985141500557223
+432 1395 -59.68238994762429
+1395 1395  5.363493298492033
+2471 1395  0.4397497774331201
+3559 1395  0.006455405048779167
+1372 1396 -1313.040134659249
+1396 1396  2.759734100154303
+2473 1396  0.3337399888939511
+3563 1396  0.0008956256821566003
+1057 1397  8.81339838666673
+1397 1397 -464.5939628653576
+2475 1397 -0.02735485822257834
+3091 1397  0.008210508525845766
+1398 1398  4.130734036137791
+2007 1398 -222.6965047597604
+2477 1398  0.2999541214461424
+3571 1398  0.003003991364081873
+1399 1399  2.800615008201704
+1400 1399 -1276.699267408946
+2479 1399 -0.1700655800972397
+3575 1399  0.001771985501876807
+1282 1400  7.984203741216811
+1400 1400 -1861.580695310057
+2481 1400  0.01658810146559478
+2899 1400  0.003762700731209146
+963 1401  0.7834252790859237
+1401 1401 -8640.649777085511
+2483 1401  23.38653888958006
+2903 1401  1.272095576781374e-05
+6 1402 -1386.423275602331
+1402 1402  0.6061321059687221
+2485 1402  0.3443611707400913
+3587 1402  0.004109859119880277
+966 1403  1.924848272360943
+1403 1403 -1718.105842836117
+2487 1403  0.2424191842671502
+2909 1403  0.00174213380263186
+967 1404  0.9293065890597459
+1404 1404 -1984.576121024297
+2489 1404  0.2409008336125803
+2911 1404  0.002955443890929092
+968 1405  1.515772943789459
+1405 1405 -1772.16179742598
+2491 1405  0.2465099761064946
+2913 1405  0.002046775451173374
+969 1406  0.9154558602176557
+1406 1406 -1941.832300205852
+2493 1406  0.2338083946556382
+2915 1406  0.00307288164617961
+970 1407  0.8856096204701881
+1407 1407 -1948.610857464787
+2495 1407  0.2329931159431059
+2917 1407  0.003118631919909647
+971 1408  1.437804385943619
+1408 1408 -1660.153292834334
+2497 1408  0.2443954748733559
+2919 1408  0.002064849466551203
+1056 1409  5.097464053705828
+1409 1409 -17.56919005402462
+2499 1409 -0.2852018326572599
+2905 1409  0.032281480855581
+372 1410 -1160.143762481141
+1410 1410  3.115298956803423
+2501 1410  0.3449705889808716
+3619 1410  0.0007323167865500322
+974 1411  3.239214549120592
+1411 1411 -8636.32260883259
+2503 1411  0.001345572973655427
+2925 1411  0.02978207371484943
+975 1412  2.661281271291927
+1412 1412 -4186.612833750586
+2505 1412  0.002458604260643332
+2927 1412  0.0412672721801762
+976 1413  2.838604475646982
+1413 1413 -18149.83069473371
+2507 1413  0.0006418068359919498
+2929 1413  0.03353177478347204
+977 1414  3.600111623032867
+1414 1414 -16487.63935391007
+2509 1414  0.0006826837434793908
+2931 1414  0.02793847595744413
+1006 1415  2.120223669016545
+1415 1415 -1403.69572640005
+2511 1415 -0.2149865933546265
+2989 1415  0.001457965405823138
+1416 1416  4.450891698791136
+1903 1416 -27.80845944515527
+2513 1416 -0.8943223168280579
+3643 1416  0.008610178916267667
+378 1417 -1197.091400538973
+1417 1417  5.382734010251454
+2515 1417  0.2320388001825638
+3647 1417  0.0006982215071245835
+378 1418 -1307.168528785002
+1418 1418  4.03844885715099
+2517 1418 -0.2673963766163865
+3651 1418  0.0008328470662532488
+1419 1419  5.435793144188034
+1421 1419 -1132.012822917658
+2519 1419 -0.3015732552854993
+3655 1419  0.0005279492621660577
+379 1420 -455.3580237130959
+1420 1420  5.673414870400691
+2521 1420 -0.189153266415218
+3659 1420  0.002114474146017625
+980 1421  1.663115651008956
+1421 1421 -1554.218213369194
+2523 1421  0.3162746729687368
+2937 1421  0.001273995556104298
+381 1422 -297.8083722314928
+1422 1422  3.6738422903388
+2525 1422 -0.4954179106036137
+3667 1422  0.001914372234934805
+381 1423 -318.3099272773828
+1423 1423  4.931596810478233
+2527 1423  0.312090482848181
+3671 1423  0.00214903785711034
+1424 1424  2.954431694987449
+1855 1424 -1225.44512859976
+2529 1424 -0.420740409235667
+3675 1424  0.0006972192640149882
+1425 1425  3.597870353883553
+1848 1425 -1341.319488664768
+2531 1425 -0.1900856271423472
+3679 1425  0.001387066813399525
+384 1426 -92.63800029624825
+1426 1426  5.141916652259088
+2533 1426 -0.6351218515149885
+3683 1426  0.00307995418805288
+984 1427  2.812910582659263
+1427 1427 -1792.17746207041
+2535 1427 -0.09233746945236616
+3057 1427  0.00232588309964105
+384 1428 -105.1073406416851
+1428 1428  6.815584747470943
+2537 1428 -0.3290166805679281
+3691 1428  0.003968006746482242
+386 1429 -115.8710614082713
+1429 1429  8.598554966475483
+2539 1429 -0.4508575542320269
+3695 1429  0.001800493541729985
+1430 1430  2.402848964234089
+1815 1430 -45.05797616799539
+2541 1430  0.5795528708874975
+3699 1430  0.01294566560248524
+386 1431 -115.6732086865693
+1431 1431  3.660576083522866
+2543 1431 -0.4501212252666432
+3703 1431  0.004438329159053629
+986 1432  1.803796874291739
+1432 1432 -1440.733981857976
+2545 1432 -0.3246876875018179
+3125 1432  0.001230012908033431
+386 1433 -119.2838530078724
+1433 1433  3.693870991143675
+2547 1433  0.3866661651573575
+3711 1433  0.004689294710517987
+988 1434  8.262976513892538
+1434 1434 -3914.440467798176
+2549 1434  0.001312335288620512
+2953 1434  0.02600204860566276
+989 1435  6.75928451641217
+1435 1435 -9557.969759456002
+2551 1435  0.0008215588597796699
+2955 1435  0.02269136945434388
+990 1436  6.286782905770047
+1436 1436 -10416.16918230558
+2553 1436  0.0007450434486385868
+2957 1436  0.02398061921832441
+1437 1437  4.863234068877579
+1928 1437 -71.78900859503443
+2555 1437  0.5235605415859514
+3727 1437  0.005825805095761705
+486 1438 -112.4245790619285
+1438 1438  3.654645324820387
+2557 1438  0.6177638045468883
+3731 1438  0.003360644493367182
+992 1439  2.770038046695757
+1439 1439 -1904.378289787439
+2559 1439  0.1199895291469222
+2961 1439  0.002293391027564734
+391 1440 -133.1916354887076
+1440 1440  8.553881810766296
+2561 1440  0.203001397699786
+3739 1440  0.004179365275079932
+420 1441 -293.7957542138436
+1441 1441  4.071233084981746
+2563 1441  0.4903758402830745
+3743 1441  0.001694003237061658
+394 1442 -273.7709840087026
+1442 1442  4.626630643115307
+2565 1442  0.4838079806998937
+3747 1442  0.001601064318780595
+394 1443 -304.386875196408
+1443 1443  4.819926478975458
+2567 1443 -0.3081198770448175
+3751 1443  0.001946527003138808
+1098 1444  3.210809158694981
+1444 1444 -1367.085437280008
+2569 1444  0.2236474776175843
+2967 1444  0.001102967128701974
+1445 1445  3.851674085393307
+1447 1445 -1299.463572095045
+2571 1445 -0.3103276689217324
+3759 1445  0.0006757264712473715
+996 1446  2.651381876594489
+1446 1446 -1935.598728461542
+2573 1446 -0.275851162842649
+2969 1446  0.0007622487762262233
+1040 1447  2.58440440493602
+1447 1447 -1393.724096630088
+2575 1447  0.3221464351873659
+2971 1447  0.0009857917802666748
+398 1448 -51.11619948659765
+1448 1448  6.545719728055657
+2577 1448 -0.6958761831620622
+3771 1448  0.003803805871044814
+398 1449 -56.91674680233886
+1449 1449  5.387449154306919
+2579 1449 -0.583252118916699
+3775 1449  0.00473626941325111
+398 1450 -54.13080652643053
+1450 1450  10.56116102730794
+2581 1450  0.3219996994163201
+3779 1450  0.004231559667264044
+1008 1451  2.192274887117568
+1451 1451 -1694.45394947356
+2583 1451 -0.1231955649162646
+2993 1451  0.002338269681504373
+399 1452 -39.11433892927088
+1452 1452  3.725435059567603
+2585 1452 -2.421691173290549
+3787 1452  0.002785947797894975
+399 1453 -69.39344058325088
+1453 1453  5.699343322844305
+2587 1453  0.2588732884211721
+3791 1453  0.008550454467021431
+401 1454 -94.18004615832454
+1454 1454  9.853444154856955
+2589 1454 -0.5465093405911027
+3795 1454  0.001748386142823009
+1002 1455  0.6148168725043865
+1455 1455 -6186.640086873116
+2591 1455  31.84061239984158
+2981 1455  1.444471295879164e-05
+1003 1456  0.733300648789027
+1456 1456 -9258.546324954847
+2593 1456  28.84769630748797
+2983 1456  9.346594752676875e-06
+1370 1457 -45.6431060305684
+1457 1457  5.623959759241418
+2595 1457  0.9316726753588556
+3807 1457  0.003439387923659759
+401 1458 -95.82820923136651
+1458 1458  5.156990654240387
+2597 1458  0.7823178849555039
+3811 1458  0.002344163243169693
+1459 1459  2.02177708473591
+1492 1459 -894.1445427962663
+2599 1459 -2.52048241012962
+3815 1459  0.0002314920577447962
+405 1460 -294.7077344824907
+1460 1460  4.338050735598012
+2601 1460 -0.350498698842088
+3819 1460  0.002213111143858121
+405 1461 -297.7314148715081
+1461 1461  3.336518577462315
+2603 1461 -0.4576521731453182
+3823 1461  0.002289548031987322
+490 1462 -345.0634879409996
+1462 1462  3.432096244817039
+2605 1462 -0.3132143614225264
+3827 1462  0.00275597918269146
+1415 1463 -1319.761941130769
+1463 1463  5.167687511911667
+2607 1463  0.101747472019995
+3831 1463  0.001286102412847703
+484 1464 -253.7591971467399
+1464 1464  4.397548178584936
+2609 1464  0.6529986254582271
+3835 1464  0.001488586959398302
+1451 1465 -1333.079786610828
+1465 1465  2.807197757919511
+2611 1465 -0.1988605404170427
+3839 1465  0.001449288496860595
+1466 1466  6.821596506872711
+1851 1466 -14.58657917675677
+2613 1466  0.267724443820664
+3843 1466  0.03847791458198641
+1467 1467  4.082745801006982
+1956 1467 -1277.459248023892
+2615 1467 -0.1320198647588829
+3847 1467  0.001331536489217977
+411 1468 -113.8659171919261
+1468 1468  12.16370306936955
+2617 1468 -0.4884348221780255
+3851 1468  0.001389733691811989
+1012 1469  0.6922710528461018
+1469 1469 -3403.375662059264
+2619 1469  6.393149409211086
+3001 1469  9.894340445329639e-05
+1013 1470  0.9679618404674936
+1470 1470 -9244.38449451425
+2621 1470  13.61387792723724
+3003 1470  1.482042185147961e-05
+426 1471 -129.5729114917453
+1471 1471  4.640123233526718
+2623 1471  0.1174995202916021
+3863 1471  0.01230395744748231
+411 1472 -125.6929813506389
+1472 1472  4.083595669790726
+2625 1472  0.4574647052145012
+3867 1472  0.004115637657561823
+415 1473 -103.5178172597545
+1473 1473  13.64297835723395
+2627 1473 -0.3400741687238894
+3871 1473  0.001865256592979132
+1474 1474  7.432083284538689
+1981 1474 -27.2211092658337
+2629 1474  0.4358076176675587
+3875 1474  0.00837310147958267
+415 1475 -193.3276841043456
+1475 1475  9.243250804285458
+2631 1475  0.04586169342645511
+3879 1475  0.0116383104408643
+415 1476 -102.3339494307426
+1476 1476  6.987236455151689
+2633 1476 -0.4060288904731509
+3883 1476  0.003273945598422162
+429 1477 -1333.404649458178
+1477 1477  2.39813417884677
+2635 1477 -0.32265098291189
+3887 1477  0.001104639692794528
+416 1478 -257.0424342781794
+1478 1478  4.872894046756631
+2637 1478 -0.4274289888767369
+3891 1478  0.001753935752301071
+416 1479 -364.6469827985827
+1479 1479  3.666098639291213
+2639 1479  0.2139280209580445
+3895 1479  0.003463445087067367
+420 1480 -225.3379392805973
+1480 1480  6.936513618148165
+2641 1480 -0.45896252925564
+3899 1480  0.001273550519844935
+419 1481 -131.7144595143791
+1481 1481  6.717285078805506
+2643 1481  0.6939032742158369
+3903 1481  0.001470739163878755
+485 1482 -138.2283842007703
+1482 1482  1.562111955011237
+2645 1482  1.045179501603212
+3907 1482  0.003741439421903615
+422 1483 -70.31286063739829
+1483 1483  10.83128966436301
+2647 1483 -0.4690349307253373
+3911 1483  0.002654955896847113
+1023 1484  2.298164777974027
+1484 1484 -1887.480809470994
+2649 1484  0.38440963665635
+3023 1484  0.0008757358038397385
+1024 1485  2.489086508511099
+1485 1485 -1862.905571445791
+2651 1485  0.3504553942057013
+3025 1485  0.0009202119884836104
+1025 1486  1.530982244361096
+1486 1486 -2089.757450012758
+2653 1486  0.2927751267701255
+3027 1486  0.0013857649001816
+1487 1487  2.965391170512979
+1973 1487 -34.60754748070792
+2655 1487 -0.9136535932070854
+3927 1487  0.008688294155203842
+454 1488 -657.2647720227772
+1488 1488  1.739287210125133
+2657 1488  0.1256357268577556
+3931 1488  0.006476974321480879
+422 1489 -58.90809499914865
+1489 1489  5.818503346690632
+2659 1489  0.484327973434509
+3935 1489  0.00558084251145403
+1490 1490  4.145530174201174
+1951 1490 -1148.685319889543
+2661 1490 -0.4158218221210877
+3939 1490  0.0005323635117978055
+426 1491 -55.77574949647519
+1491 1491  12.25543807287879
+2663 1491  0.4759321005851245
+3943 1491  0.002410205635913214
+1028 1492  4.577395664557206
+1492 1492 -1177.236190688291
+2665 1492  0.5137320980598533
+3033 1492  0.0003995351735689209
+432 1493 -81.51090935305866
+1493 1493  7.350593198915067
+2667 1493 -0.3892150799012169
+3951 1493  0.003783807165999799
+1033 1494  4.4348660978868
+1494 1494 -3724.14417902935
+2669 1494  0.1863477812003617
+3043 1494  0.0005240782131166497
+1034 1495  3.248940655118019
+1495 1495 -4064.969791447917
+2671 1495  0.1745842511613437
+3045 1495  0.0006723559311602305
+454 1496 -337.815695541272
+1496 1496  2.093336656375155
+2673 1496 -0.7186221160452759
+3963 1496  0.00171834700633741
+432 1497 -51.28046095625373
+1497 1497  6.867530241475136
+2675 1497  0.5203531411457867
+3967 1497  0.004818312918029584
+432 1498 -62.49294649216096
+1498 1498  10.14178945374154
+2677 1498 -0.1913856351165074
+3971 1498  0.007098702112012079
+1499 1499  4.591717254542565
+1500 1499 -1119.511295530479
+2679 1499  0.3388396466565379
+3975 1499  0.0006074379931324419
+1093 1500  1.989738181911157
+1500 1500 -1470.147478176272
+2681 1500 -0.355429594472465
+3049 1500  0.001049231869003212
+439 1501 -94.94610178907207
+1501 1501  7.935781265781992
+2683 1501  0.3997908532391482
+3983 1501  0.00336915014670341
+439 1502 -120.1015515613789
+1502 1502  4.32511889642304
+2685 1502 -0.3428911238355603
+3987 1502  0.005379152472555147
+460 1503 -1453.267111123336
+1503 1503  2.16132391751468
+2687 1503 -0.3305622720110889
+3991 1503  0.001111460898759239
+1074 1504  3.374113559666903
+1504 1504 -1281.197517233999
+2689 1504 -0.2436393619121192
+3063 1504  0.0009756797068563019
+1505 1505  2.744133762682808
+1514 1505 -1443.098497561728
+2691 1505  0.2453331394023213
+3999 1505  0.001187870243836471
+1045 1506  0.6620369411300263
+1506 1506 -6333.352783592504
+2693 1506  29.39451847005743
+3067 1506  1.434866011484066e-05
+444 1507 -319.687894463376
+1507 1507  5.561366858668526
+2695 1507  0.3918218595384317
+4007 1507  0.0014631639637669
+448 1508 -156.9536240366783
+1508 1508  4.367121803999058
+2697 1508 -0.5208327148502887
+4011 1508  0.002607346686330265
+1049 1509  1.501443481224012
+1509 1509 -1386.841317910654
+2699 1509  0.4093034712580556
+3075 1509  0.001436946898833183
+1050 1510  2.046183757751627
+1510 1510 -1652.528923853896
+2701 1510  0.3139432303888887
+3077 1510  0.001347876650566752
+1051 1511  1.734383100866361
+1511 1511 -1759.776539672839
+2703 1511  0.3128830755303908
+3079 1511  0.001490401786239473
+1052 1512  1.590373655740153
+1512 1512 -1762.230597412185
+2705 1512  0.2781023775271403
+3081 1512  0.001655944388231386
+1053 1513  1.539399172915233
+1513 1513 -1778.766172586407
+2707 1513  0.2761967663045794
+3083 1513  0.001701471392212871
+1048 1514  3.113918499513999
+1514 1514 -1280.586129651733
+2709 1514 -0.2870757766334358
+3143 1514  0.0009387715985215069
+492 1515 -284.5560271729748
+1515 1515  2.227051982652208
+2711 1515 -0.5751371098165369
+4039 1515  0.00228033139040549
+1516 1516  5.497686376367542
+1906 1516 -1214.20785479098
+2713 1516 -0.3164743577683208
+4043 1516  0.0004031283152377141
+1058 1517  5.920323678467011
+1517 1517 -35275.65056794864
+2715 1517  0.0002603335005947787
+3095 1517  0.02510763823362299
+1518 1518  1.872155078461913
+2007 1518 -281.3424527503379
+2717 1518 -0.3821410197975971
+4051 1518  0.004787271394550057
+1062 1519  5.155957958850873
+1519 1519 -15081.1485121053
+2719 1519  0.0005345093331185485
+3101 1519  0.02585059027313626
+1063 1520  7.218811248728353
+1520 1520 -16322.74803150922
+2721 1520  0.0004459490940684492
+3103 1520  0.02398509796041077
+1064 1521  7.075882839979499
+1521 1521 -11661.81866319783
+2723 1521  0.0006404324352507512
+3105 1521  0.02317810654772064
+1065 1522  5.549718155884937
+1522 1522 -12071.07001909268
+2725 1522  0.0006385206646629043
+3107 1522  0.02612393526617514
+1066 1523  8.987131743426813
+1523 1523 -10603.54130606578
+2727 1523  0.000630922008963406
+3109 1523  0.01974036093858521
+1067 1524  7.073656321403489
+1524 1524 -11318.43109210416
+2729 1524  0.0006501703848056444
+3111 1524  0.02351370604953601
+1068 1525  7.497907634820762
+1525 1525 -10195.65103652299
+2731 1525  0.0006559175699886972
+3113 1525  0.02263930530886814
+1526 1526  5.491936501011876
+1851 1526 -10.68753553673826
+2733 1526 -0.8800339580088794
+4083 1526  0.0215005867928751
+469 1527 -1395.812439078742
+1527 1527  2.763887709407143
+2735 1527  0.2411558997368115
+4087 1527  0.001126683613627308
+493 1528 -48.07226215433916
+1528 1528  6.465623332056095
+2737 1528  0.4387937415067642
+4091 1528  0.006891497100827161
+478 1529 -128.0685014752367
+1529 1529  9.920476591021989
+2739 1529 -0.1666873148413968
+4095 1529  0.004306155684158316
+497 1530 -271.5408599549471
+1530 1530  8.239370225433877
+2741 1530  0.3436678702512194
+4099 1530  0.001278988670577771
+1531 1531  3.150909841954586
+1532 1531 -1116.146860423355
+2743 1531 -0.5518900249492503
+4103 1531  0.0005904289455391131
+1071 1532  3.217137788747237
+1532 1532 -1395.069189494207
+2745 1532  0.2515624113404345
+3119 1532  0.0009194716848954582
+472 1533 -239.1182110177501
+1533 1533  5.084689951907726
+2747 1533 -0.8469720785595276
+4111 1533  0.0009052326830714288
+477 1534 -237.9626783531122
+1534 1534  3.945873747631456
+2749 1534 -0.9858356029493499
+4115 1534  0.000992266253014921
+478 1535 -123.7987667072477
+1535 1535  13.82148848700469
+2751 1535  0.1280409753821415
+4119 1535  0.004024252672441474
+478 1536 -78.70049630917389
+1536 1536  4.299742862778459
+2753 1536 -1.835179127668628
+4123 1536  0.001630688272806079
+1080 1537  1.738985765064566
+1537 1537 -2751.002445075206
+2755 1537  0.3864229560206465
+3137 1537  0.000767485316979087
+1081 1538  1.298069093586603
+1538 1538 -2761.360034218368
+2757 1538  0.3750998189126993
+3139 1538  0.0009152151847254224
+479 1539 -77.09008222118388
+1539 1539  3.27848682728167
+2759 1539 -0.516338327826575
+4135 1539  0.005926040074521586
+486 1540 -131.7196097356496
+1540 1540  8.334156253956104
+2761 1540 -0.5658283514265692
+4139 1540  0.001368092087469303
+1087 1541  0.7844661652700029
+1541 1541 -45335.1954409973
+2763 1541  0.3695300341581737
+3151 1541  0.0001242076078107286
+1094 1542  5.664236473525226
+1542 1542 -21344.46684810789
+2765 1542  0.0002870401850520609
+3165 1542  0.03009886865068647
+1095 1543  9.38137758123437
+1543 1543 -18576.65367398521
+2767 1543  0.0003989272310504771
+3167 1543  0.0181917050854742
+1100 1544  0.4209469588586773
+1544 1544 -18425.43685640642
+2279 1544 -0.09268507579991085
+2778 1544  0.0016843275657298
+699 1545 -94.70154811534771
+1545 1545  11.80994163900174
+2281 1545  0.2348156967063924
+3181 1545  0.003728230321768011
+822 1546 -60.9315937244375
+1546 1546  6.13394880971154
+2283 1546  0.2528208987834961
+3185 1546  0.009840549643778531
+1547 1547  7.015232649381273
+2052 1547 -38.42657784269577
+2285 1547 -0.2206266516513799
+3189 1547  0.01601930503333173
+1102 1548  0.4683919690682322
+1548 1548 -19263.39666857488
+2287 1548 -0.06752189118262934
+2782 1548  0.002065324101967616
+1549 1549  6.264117240664985
+1634 1549 -1351.851190781317
+2289 1549 -0.0730286811201015
+3197 1549  0.001795814074916934
+901 1550  5.964960897183323
+1550 1550 -343.1905811392148
+2291 1550 -0.1030731399156117
+2780 1550  0.005292303306486457
+1103 1551  0.4321994354326509
+1551 1551 -17954.06452194431
+2293 1551 -0.09183437174715321
+2786 1551  0.001558801796769806
+746 1552 -71.60785037080608
+1552 1552  7.857253594901273
+2295 1552  0.1604723692757136
+3209 1552  0.009429398379399266
+703 1553 -287.1737937003262
+1553 1553  8.881186521865489
+2297 1553  0.1140677401031672
+3213 1553  0.003074581210011338
+906 1554  0.3202584181695263
+1554 1554 -18984.56010772376
+2299 1554 -0.1253361300551693
+2790 1554  0.001734639619015351
+705 1555 -123.8024931489951
+1555 1555  5.724533092062521
+2301 1555 -0.1765502579767677
+3221 1555  0.007663393581365427
+705 1556 -136.3741855526524
+1556 1556  4.994979193430934
+2303 1556 -0.154677890064579
+3225 1556  0.009368261214996339
+705 1557 -124.0620405337453
+1557 1557  7.594103734758398
+2305 1557  0.1487428464011694
+3229 1557  0.007699130189734118
+908 1558  0.3591198466383587
+1558 1558 -17562.01775281531
+2307 1558 -0.1418061655056151
+2794 1558  0.001451216791368765
+707 1559 -52.95894194050124
+1559 1559  8.661644082850422
+2309 1559 -0.2388406309706918
+3237 1559  0.00920708556558796
+1029 1560  3.613689390608025
+1560 1560 -1970.719501164815
+2311 1560 -0.03178578024713478
+3036 1560  0.004834243485814674
+707 1561 -50.07992223279546
+1561 1561  8.659934351403086
+2313 1561  0.256909813210024
+3245 1561  0.00804661042634007
+707 1562 -72.94981475825553
+1562 1562  7.356150780840059
+2315 1562 -0.1046018218296087
+3249 1562  0.01708826510242087
+910 1563  0.3942430026751412
+1563 1563 -21348.38058440163
+2317 1563 -0.04988201584301125
+2798 1563  0.003093469244700799
+1111 1564  0.4518359008190032
+1564 1564 -18855.823309453
+2319 1564 -0.07276217302329462
+2800 1564  0.001925016235561819
+709 1565 -52.52014968113491
+1565 1565  11.81516875226901
+2321 1565  0.1763884106301378
+3261 1565  0.008371082559818523
+709 1566 -58.44422004546879
+1566 1566  13.00730635191752
+2323 1566 -0.1329182782584699
+3265 1566  0.01023391253227582
+856 1567 -115.1682425790467
+1567 1567  7.134515633503723
+2325 1567  0.1694463566618233
+3269 1567  0.006579407115653415
+713 1568 -403.3712790108879
+1568 1568  8.632966165362591
+2327 1568 -0.08253448028257152
+3273 1568  0.003421623294652123
+712 1569 -407.8543128053418
+1569 1569  7.480402966450393
+2329 1569  0.09175946688769955
+3277 1569  0.00326881651484232
+712 1570 -403.2701195876705
+1570 1570  6.053226700604847
+2331 1570 -0.1391230222463695
+3281 1570  0.002979644794530615
+914 1571  0.3086859033408764
+1571 1571 -18200.17011809114
+2333 1571 -0.1433588781709148
+2806 1571  0.001612416603821292
+1116 1572  0.6377435166524491
+1572 1572 -19050.40909074716
+2335 1572 -0.05626831364793678
+2810 1572  0.002021535306013392
+1117 1573  0.5340480266846045
+1573 1573 -19602.52751313596
+2337 1573 -0.05817082924203715
+2812 1573  0.002219061379516586
+918 1574  0.496336357506843
+1574 1574 -19790.86619941803
+2339 1574 -0.06367574453596507
+2814 1574  0.002213244026450304
+1119 1575  0.6396465178955674
+1575 1575 -17520.9066424914
+2341 1575 -0.0819141354576636
+2816 1575  0.001424435274536764
+1576 1576  9.289251639392099
+2199 1576 -1314.488490707493
+2343 1576 -0.04831816621937788
+3305 1576  0.00190641019701785
+1577 1577  19.59521280325052
+2066 1577 -30.88445273847646
+2345 1577 -0.1760787931148337
+3309 1577  0.01024866648551087
+1121 1578  0.448509772635577
+1578 1578 -19459.2097238253
+2347 1578 -0.06510757825207561
+2820 1578  0.00216159793600779
+1122 1579  0.4020378235142212
+1579 1579 -19563.40178710883
+2349 1579 -0.07348349316451037
+2822 1579  0.002128580341413205
+920 1580  11.49160779515473
+1580 1580 -153.3239932480942
+2351 1580  0.03944668306923331
+2818 1580  0.01517969877264569
+856 1581 -110.9830147806093
+1581 1581  10.94559077853698
+2353 1581  0.1182819046044615
+3325 1581  0.006073374848498163
+1124 1582  0.4664105082637809
+1582 1582 -19072.9824263394
+2355 1582 -0.06404854821625307
+2826 1582  0.002072440732307426
+923 1583  7.204897543934027
+1583 1583 -327.8415125728117
+2357 1583 -0.0885781928998835
+2824 1583  0.005038100808173289
+1584 1584  7.745248535399567
+2271 1584 -303.8666561211577
+2359 1584 -0.07822635220456177
+3337 1584  0.005558785326437868
+926 1585  0.2859810800059127
+1585 1585 -21438.05305311238
+2361 1585 -0.07680746283325847
+2830 1585  0.002818239303591395
+725 1586 -131.6376658199634
+1586 1586  12.35150271313844
+2363 1586  0.05216856069111805
+3345 1586  0.01299345028768497
+725 1587 -101.2983170663943
+1587 1587  10.32704712479219
+2365 1587 -0.1284297479768136
+3349 1587  0.007694156187287882
+928 1588  0.2638545151755176
+1588 1588 -17706.80708692556
+2367 1588 -0.1585763878680793
+2834 1588  0.00171426589027294
+822 1589 -60.74879639706707
+1589 1589  6.747469381550355
+2369 1589  0.2278264743594363
+3357 1589  0.009795864941168062
+727 1590 -340.9877642962214
+1590 1590  6.103161381610501
+2371 1590 -0.1220911632709661
+3361 1590  0.004016334441257254
+930 1591  0.5856260944274582
+1591 1591 -19229.80704563983
+2373 1591 -0.05432974549764544
+2838 1591  0.00214370521255672
+1131 1592  0.602732034666853
+1592 1592 -17250.21196149158
+2375 1592 -0.08909672224847444
+2840 1592  0.001347021732686253
+875 1593 -307.2139916977106
+1593 1593  9.231892983713553
+2377 1593  0.09662035202600507
+3373 1593  0.003605337840406464
+890 1594 -349.4178227246429
+1594 1594  11.53750339202983
+2379 1594 -0.03874066375731782
+3377 1594  0.006022698821091823
+1133 1595  0.3823095244806657
+1595 1595 -18148.71862379513
+2381 1595 -0.1157986402063187
+2844 1595  0.001537593431467799
+932 1596  3.759419384143553
+1596 1596 -350.8749049521504
+2383 1596 -0.1550385259319598
+2842 1596  0.005045580372450799
+1596 1597 -349.525972853892
+1597 1597  5.568673568414256
+2385 1597  0.1097305804557451
+3389 1597  0.004936492740348092
+1135 1598  0.652651776418919
+1598 1598 -17188.33141996008
+2387 1598 -0.07741322426033026
+2848 1598  0.001394539743204105
+1134 1599  0.4585964515198062
+1599 1599 -17221.86948891564
+2389 1599 -0.1286239024388166
+2850 1599  0.001287567811453098
+756 1600 -307.265267842773
+1600 1600  11.40560520752418
+2391 1600  0.07282084580241761
+3401 1600  0.003818472653616001
+781 1601 -256.0588790788852
+1601 1601  10.32999844507644
+2393 1601  0.1699263311417943
+3405 1601  0.002035811224188586
+938 1602  0.3782636212847815
+1602 1602 -20083.61864669313
+2395 1602 -0.07172764858434916
+2854 1602  0.002336372415399436
+737 1603 -262.4471277152124
+1603 1603  5.593819250509369
+2397 1603  0.3050576835026495
+3413 1603  0.00245215307332014
+857 1604 -315.137283850678
+1604 1604  4.846962465184034
+2399 1604  0.2068334057548165
+3417 1604  0.003331066019807603
+739 1605 -298.0864078722183
+1605 1605  4.014843773669729
+2401 1605  0.1495820406715405
+3421 1605  0.004746941745649515
+739 1606 -285.8909834401574
+1606 1606  2.100726724748749
+2403 1606 -0.3612348953316888
+3425 1606  0.004352281673098233
+739 1607 -408.7832335639023
+1607 1607  7.554236674680917
+2405 1607  0.03126449269119468
+3429 1607  0.008862541129188691
+1608 1608  4.625084390822101
+1612 1608 -1375.862662839072
+2407 1608 -0.09511283394769576
+3433 1608  0.001681916425607402
+1609 1609  5.058522467030047
+1784 1609 -80.2193526426998
+2409 1609  0.1765863442776434
+3437 1609  0.01197021890676733
+1182 1610  4.189017705158125
+1610 1610 -1439.569994942649
+2411 1610  0.09444763078437719
+2860 1610  0.001821444699037764
+1610 1611 -1474.926377146568
+1611 1611  3.912969993275015
+2413 1611 -0.1063494845711242
+3445 1611  0.001909042723318199
+1142 1612  5.140739895609729
+1612 1612 -1388.793683741341
+2415 1612  0.08633500225011058
+2862 1612  0.001712405942532441
+744 1613 -252.2885237459685
+1613 1613  7.82585977033871
+2417 1613 -0.1054013687030942
+3453 1613  0.004489168449797369
+1614 1614  7.95121257275057
+1630 1614 -50.47971099857919
+2419 1614 -0.1371328219183342
+3457 1614  0.01633539313650934
+1615 1615  8.593585019339528
+2120 1615 -49.91127100320831
+2421 1615 -0.1696769505008175
+3461 1615  0.01260242986980742
+743 1616 -157.131131008579
+1616 1616  5.351680386731817
+2423 1616 -0.1678465865963971
+3465 1616  0.006681965695475232
+1617 1617  3.775614735741489
+1641 1617 -1402.181171260665
+2425 1617 -0.1021595851058913
+3469 1617  0.002082161476505394
+820 1618 -335.793574863611
+1618 1618  5.083916573183807
+2427 1618  0.1498140723675607
+3473 1618  0.004122707179401047
+1619 1619  2.675607339345396
+1620 1619 -1483.228901833766
+2429 1619  0.09966399264609031
+3477 1619  0.00239611479019984
+945 1620  2.343864161786165
+1620 1620 -1553.767313963687
+2431 1620 -0.1021338181094035
+2868 1620  0.002630837634752338
+1147 1621  1.543087745431399
+1621 1621 -2032.294944522458
+2433 1621  1.027405058684619
+2872 1621  0.0004298383031319623
+1148 1622  1.294093298886285
+1622 1622 -2161.517370916221
+2435 1622  1.035430338244845
+2874 1622  0.0004721276620525438
+946 1623  1.245820405796252
+1623 1623 -1965.171276998922
+2437 1623  1.132573885359342
+2876 1623  0.0005404729617105076
+1150 1624  1.130461556720701
+1624 1624 -2011.325553409659
+2439 1624  1.096989498294941
+2878 1624  0.0005385186595112105
+48 1625 -947.8080133550635
+1625 1625  1.545695419693732
+2441 1625  1.576501168390627
+3501 1625  0.000738108085585141
+746 1626 -80.2043307459878
+1626 1626  6.784715588521387
+2443 1626  0.1398090615522035
+3505 1626  0.01181045238790955
+752 1627 -358.1256606139722
+1627 1627  2.87720735177041
+2445 1627 -0.1463628716658473
+3509 1627  0.006341097449414356
+1628 1628  10.64580025395756
+1633 1628 -946.4993922263754
+2447 1628 -0.05970599140078857
+3513 1628  0.001709881200915327
+779 1629 -410.9259659918831
+1629 1629  6.323546734518151
+2449 1629  0.152339017646971
+3517 1629  0.002501831916591455
+1000 1630  5.851730685056556
+1630 1630 -59.09622972922144
+2451 1630  0.120393801145661
+2884 1630  0.02166473185771623
+1630 1631 -43.69121511045876
+1631 1631  11.33592184861562
+2453 1631  0.1544468567080951
+3525 1631  0.01171187118567339
+1630 1632 -45.26251731254106
+1632 1632  11.50083919055273
+2455 1632  0.1258547872274984
+3529 1632  0.01268609052228339
+1154 1633  3.367757951666376
+1633 1633 -2094.821178932827
+2457 1633  0.04000224600538602
+2886 1633  0.00438402189092121
+1155 1634  4.887036067400858
+1634 1634 -1596.562954523447
+2459 1634  0.06114217742781798
+2888 1634  0.00250281237797008
+1635 1635  3.977636161674676
+2172 1635 -1420.43303749647
+2461 1635  0.09996941509051212
+3541 1635  0.001891043522858185
+875 1636 -310.2635904187793
+1636 1636  7.602458701266929
+2463 1636 -0.1322602591418904
+3545 1636  0.003667040914980837
+1217 1637  5.798807440562412
+1637 1637 -1555.582081433006
+2465 1637  0.04478328549069006
+2892 1637  0.002717922628890469
+1637 1638 -1490.044806601116
+1638 1638  4.15614996725906
+2467 1638 -0.07136785085690645
+3553 1638  0.002499509756808013
+1639 1639  4.081677144998319
+2130 1639 -1428.509188469133
+2469 1639  0.1007116905315015
+3557 1639  0.001847764096351857
+832 1640 -66.88919874628864
+1640 1640  6.902063277362459
+2471 1640  0.1692123750855458
+3561 1640  0.01168187326594283
+959 1641  4.35180763337338
+1641 1641 -1635.880446215101
+2473 1641 -0.05438327802565162
+2896 1641  0.002801715936584132
+857 1642 -216.0334906512935
+1642 1642  4.640556920095166
+2475 1642 -0.6125464046789606
+3569 1642  0.001563851446999221
+858 1643 -330.3011979043866
+1643 1643  4.196712062065106
+2477 1643  0.1882841243361535
+3573 1643  0.003038343659137196
+961 1644  2.551059225694364
+1644 1644 -1582.59139337238
+2479 1644 -0.1795099761298055
+2900 1644  0.001455820779834379
+1644 1645 -927.7605629713167
+1645 1645  3.626585505548464
+2481 1645  0.5692601762921932
+3581 1645  0.0004996836381066346
+962 1646  19.13304546764732
+1646 1646 -56173.00367620957
+2483 1646 -4.528166521929149e-05
+2904 1646  0.03290590405224753
+965 1647  0.7415300425733925
+1647 1647 -1798.962366958954
+2485 1647  0.1584805113003082
+2908 1647  0.006605582704795889
+50 1648 -1602.154579037757
+1648 1648  2.537886768231079
+2487 1648  0.1789331829732354
+3593 1648  0.002293010736883277
+964 1649  1.052700930966815
+1649 1649 -1805.387244919989
+2489 1649  0.2318901716680636
+2912 1649  0.003609759091074866
+52 1650 -1672.560616497262
+1650 1650  1.886875011526777
+2491 1650  0.18507291152769
+3601 1650  0.0027895765195812
+53 1651 -1736.681418547389
+1651 1651  1.004492569171269
+2493 1651  0.2559294137774343
+3605 1651  0.003469423590966108
+54 1652 -1744.38334675991
+1652 1652  0.9675344147546086
+2495 1652  0.2553116682026549
+3609 1652  0.00353096399715718
+55 1653 -1578.337187890964
+1653 1653  1.733088886821635
+2497 1653  0.1850395098356168
+3613 1653  0.002871912243801552
+1654 1654  4.909697213749066
+2117 1654 -14.86291565563162
+2499 1654  0.390540098759081
+3617 1654  0.02950709946407396
+1172 1655  5.489980636102064
+1655 1655 -1618.963465864502
+2501 1655  0.03342586460999813
+2922 1655  0.003062114712856479
+56 1656 -750.1977888434145
+1656 1656  0.8107453118787733
+2503 1656  3.224514182063999
+3625 1656  0.000815905245810513
+57 1657 -454.349588508947
+1657 1657  0.7394238804556282
+2505 1657  3.182000960903725
+3629 1657  0.00148698317973046
+58 1658 -937.8974042315033
+1658 1658  0.5836917319355491
+2507 1658  5.515960845011515
+3633 1658  0.0005479008995914197
+59 1659 -889.0967633151976
+1659 1659  0.7675634623859809
+2509 1659  4.920359358439487
+3637 1659  0.0005016515610365206
+1206 1660  2.338015588056324
+1660 1660 -1547.059702964963
+2511 1660 -0.1327293340165539
+2990 1660  0.001943253195808534
+773 1661 -135.998061396762
+1661 1661  6.066391449410494
+2513 1661  0.04435814071805195
+3645 1661  0.0217854046428508
+1662 1662  6.930754193190324
+1663 1662 -1255.707173660711
+2515 1662 -0.09875202978820967
+3649 1662  0.001220074423809261
+1041 1663  5.126383130506217
+1663 1663 -1468.90019029828
+2517 1663  0.09430040141542814
+2934 1663  0.001670158315822575
+1664 1664  4.929097726466118
+1666 1664 -1293.847311903297
+2519 1664  0.1553884953900216
+3657 1664  0.001026048097861809
+779 1665 -459.6038371669388
+1665 1665  6.562151811292857
+2521 1665  0.1129564356289893
+3661 1665  0.003093765508925175
+1180 1666  1.990410506883116
+1666 1666 -1545.450519903304
+2523 1666 -0.1802199613520399
+2938 1666  0.001873985837347229
+781 1667 -367.6331396885809
+1667 1667  4.866384250937969
+2525 1667  0.1349748586012675
+3669 1667  0.004321862642805954
+781 1668 -298.4479790314356
+1668 1668  5.688689200828974
+2527 1668 -0.224373509793738
+3673 1668  0.002798788342743851
+1011 1669  4.405147664394409
+1669 1669 -1495.777176495769
+2529 1669  0.07787860580799441
+2942 1669  0.002086401274569133
+1218 1670  3.834698064428319
+1670 1670 -1533.793391362161
+2531 1670  0.1290791462085817
+2944 1670  0.001665432063801794
+784 1671 -153.1942449175083
+1671 1671  9.122041227009294
+2533 1671 -0.04844675456049608
+3685 1671  0.01361131500452684
+1184 1672  2.509630416919152
+1672 1672 -1452.412581790726
+2535 1672  0.1745420007093612
+3058 1672  0.00172948834346845
+784 1673 -106.302999456909
+1673 1673  8.245690219381995
+2537 1673 -0.1656201138879724
+3693 1673  0.006559135675281503
+786 1674 -133.2986622760959
+1674 1674  9.351725708391308
+2539 1674 -0.1505442317369579
+3697 1674  0.004500768246033835
+1675 1675  3.241458825572762
+2060 1675 -48.83113539159432
+2541 1675  0.1922551101717189
+3701 1675  0.02663845477624546
+786 1676 -121.2535428786183
+1676 1676  4.813024972990116
+2543 1676 -0.1599443116995181
+3705 1676  0.009211625796665643
+1274 1677  2.274731948814601
+1677 1677 -1567.10699891207
+2545 1677 -0.1283198998011498
+3126 1677  0.002286487780322626
+786 1678 -126.2373785289777
+1678 1678  5.000900261527375
+2547 1678 -0.1292478872971707
+3713 1678  0.00992004440023208
+60 1679 -457.8930633249275
+1679 1679  1.564794215233169
+2549 1679  5.802495030097443
+3717 1679  0.0003644921698466457
+987 1680  0.9886929177097209
+1680 1680 -2119.625191287763
+2551 1680  4.348249404183824
+2956 1680  0.0001662894242141198
+1190 1681  0.8832624199164085
+1681 1681 -2232.468832444119
+2553 1681  4.722524623428282
+2958 1681  0.0001583237854652538
+787 1682 -177.6359650918144
+1682 1682  5.491941738533162
+2555 1682  0.1268899830601398
+3729 1682  0.007684351982983249
+886 1683 -108.0991094828331
+1683 1683  5.548711987030634
+2557 1683  0.1907246660453396
+3733 1683  0.007612917736247711
+991 1684  2.380705119026724
+1684 1684 -1488.996464674805
+2559 1684  0.3584223063522697
+2962 1684  0.001386532732486353
+791 1685 -119.0962751518446
+1685 1685  9.011775441080573
+2561 1685  0.1990042899232015
+3741 1685  0.004602035072414893
+820 1686 -356.9125375460848
+1686 1686  6.32479519279416
+2563 1686  0.09363089415499334
+3745 1686  0.004695388864328182
+794 1687 -303.9341121893127
+1687 1687  7.139596631140285
+2565 1687  0.1130336965210424
+3749 1687  0.004028977172689097
+794 1688 -301.6071311890342
+1688 1688  6.252292571035978
+2567 1688 -0.1225474573200583
+3753 1688  0.003902058440420307
+1195 1689  4.240340181312149
+1689 1689 -1507.145374398769
+2569 1689  0.07753429754535195
+2968 1689  0.002196344218812255
+997 1690  3.947328445532761
+1690 1690 -1443.035879728521
+2571 1690 -0.1497184219294691
+2972 1690  0.001263791063289122
+1196 1691  3.144054024261908
+1691 1691 -2094.452122116551
+2573 1691 -0.1301712404853408
+2970 1691  0.001276910015239077
+1690 1692 -1438.401804291923
+1692 1692  3.243567001900533
+2575 1692  0.1537460316411932
+3769 1692  0.001592466556747902
+798 1693 -68.68887955759087
+1693 1693  10.48923930011014
+2577 1693  0.08890596217980609
+3773 1693  0.01402979964901917
+798 1694 -65.35317307855226
+1694 1694  7.717735001723169
+2579 1694 -0.1346845367963926
+3777 1694  0.01275460432334989
+798 1695 -49.99682031538818
+1695 1695  13.44241674595665
+2581 1695 -0.1618338407629296
+3781 1695  0.007373498838497469
+1208 1696  2.085017424193
+1696 1696 -1526.042604103454
+2583 1696  0.1654175795499135
+2994 1696  0.002053808266667994
+999 1697  11.29589550667638
+1697 1697 -102.5526269566786
+2585 1697 -0.01614866339436597
+2976 1697  0.05182020660590447
+1697 1698 -52.11952350302194
+1698 1698  6.796790398187187
+2587 1698 -0.200025621410963
+3793 1698  0.01305142121535185
+801 1699 -106.6011780216583
+1699 1699  14.67920176970155
+2589 1699 -0.08653414206433409
+3797 1699  0.006862647688623929
+1202 1700  16.92007433822707
+1700 1700 -43002.51911591569
+2591 1700 -4.690062449741391e-05
+2982 1700  0.04089952789786202
+1203 1701  20.4618479887596
+1701 1701 -64484.86655730321
+2593 1701 -3.429273113850615e-05
+2984 1701  0.03197455500486216
+1702 1702  10.57287611312878
+2120 1702 -51.84652767986045
+2595 1702  0.10597916780677
+3809 1702  0.01461555143963207
+801 1703 -118.3393931588882
+1703 1703  9.881649534957022
+2597 1703  0.07211115026646071
+3813 1703  0.0109523653049153
+1004 1704  9.357580306279953
+1704 1704 -2616.969987642703
+2599 1704 -0.003326018368451452
+3034 1704  0.01271446421827436
+805 1705 -297.709055530353
+1705 1705  5.268104839160443
+2601 1705 -0.1685104373219622
+3821 1705  0.003818712751264618
+805 1706 -331.8369949896509
+1706 1706  4.345881026636466
+2603 1706 -0.1519770413677799
+3825 1706  0.004809088667099274
+890 1707 -341.395838786864
+1707 1707  4.577246699593937
+2605 1707 -0.1163894569624065
+3829 1707  0.005752066822765839
+1660 1708 -1131.21152477239
+1708 1708  4.778575385576687
+2607 1708  0.1612480981858468
+3833 1708  0.001036778478523589
+884 1709 -430.325567156862
+1709 1709  7.935790725976097
+2609 1709  0.04867805021410753
+3837 1709  0.006393311962360625
+1696 1710 -1387.470519821244
+1710 1710  2.977783914089383
+2611 1710 -0.1538546845104868
+3841 1710  0.001700127961389886
+1711 1711  4.924367513602002
+1771 1711 -33.99171600412565
+2613 1711  0.2935533450234599
+3845 1711  0.01766548052892358
+809 1712 -1227.52274774633
+1712 1712  3.894053238117803
+2615 1712  0.1612523570575912
+3849 1712  0.001195085651160139
+1713 1713  10.93806979401289
+1717 1713 -137.0837482046144
+2617 1713 -0.1681399935275716
+3853 1713  0.00399620272929112
+1212 1714  7.512573460654014
+1714 1714 -14912.84496887192
+2619 1714 -0.000263700452219623
+3002 1714  0.04341619045972051
+1213 1715  21.23566212034473
+1715 1715 -51511.86319008204
+2621 1715 -5.586646078540747e-05
+3004 1715  0.0235612852282902
+1010 1716  3.891832175542285
+1716 1716 -77.01686009671205
+2623 1716  0.3998996708507609
+3030 1716  0.007683218528146999
+1090 1717  5.273747361955984
+1717 1717 -119.1218935338928
+2625 1717  0.2129755068839117
+3000 1717  0.007333811374796757
+815 1718 -120.7045050005255
+1718 1718  8.193646066739516
+2627 1718 -0.2586943980063583
+3873 1718  0.003823529587678011
+1719 1719  8.909361307278845
+2231 1719 -23.57121050397152
+2629 1719  0.2954837404412515
+3877 1719  0.01215346638180655
+815 1720 -80.20121388753263
+1720 1720  5.498628886812048
+2631 1720  0.7532194114725147
+3881 1720  0.003019762922634524
+815 1721 -113.9874410285411
+1721 1721  8.557143439995434
+2633 1721  0.1587647828723089
+3885 1721  0.006124281162912542
+1560 1722 -1307.863076710002
+1722 1722  3.052321424376364
+2635 1722 -0.1350370344901572
+3889 1722  0.002155724633158465
+816 1723 -296.6587145617269
+1723 1723  6.165958730224928
+2637 1723 -0.1403809995916198
+3893 1723  0.003704455381418496
+816 1724 -347.8089509424088
+1724 1724  4.264776865800714
+2639 1724 -0.1358345190847692
+3897 1724  0.004996325035351742
+820 1725 -253.3858185062365
+1725 1725  7.7760742573487
+2641 1725 -0.1590047570659309
+3901 1725  0.003024383794303084
+1726 1726  12.723623314396
+2215 1726 -145.4276641880034
+2643 1726  0.09862866268575531
+3905 1726  0.005056420924296964
+1019 1727  2.666152965933335
+1727 1727 -166.2264057254376
+2645 1727  0.1490326692808954
+3148 1727  0.0128552824245083
+822 1728 -88.35690904464344
+1728 1728  5.690920734130215
+2647 1728 -0.3044710922257314
+3913 1728  0.006968895908726415
+64 1729 -1808.976355162421
+1729 1729  4.213041098699232
+2649 1729  0.1464469844197936
+3917 1729  0.001538440199813304
+65 1730 -1529.723973180399
+1730 1730  5.357329400717711
+2651 1730  0.1984683173208769
+3921 1730  0.001058160006160973
+1225 1731  1.891529659129155
+1731 1731 -1878.631462550398
+2653 1731  0.2847703740543556
+3028 1731  0.001541280633195248
+1021 1732  5.191284902739278
+1732 1732 -56.09349534828303
+2655 1732  0.0741326869470682
+3020 1732  0.03740648547010496
+854 1733 -467.2993939817052
+1733 1733  1.735401045504999
+2657 1733 -0.1905076856767562
+3933 1733  0.006324039794552517
+822 1734 -57.28389550104841
+1734 1734  7.227063466987659
+2659 1734  0.2554443072620345
+3937 1734  0.008772231202107173
+1735 1735  4.620373825612975
+2196 1735 -1284.461857125257
+2661 1735 -0.1627178019259907
+3941 1735  0.001118269735498948
+1716 1736 -53.24994360811269
+1736 1736  17.58751732208018
+2663 1736 -0.2175541085968585
+3945 1736  0.003882904905316165
+1704 1737 -1083.556762405448
+1737 1737  9.983701017863959
+2665 1737 -0.04862359530866486
+3949 1737  0.002170256613187626
+832 1738 -84.06953056548426
+1738 1738  7.088529923190809
+2667 1738 -0.2649587548673778
+3953 1738  0.005798880790520519
+67 1739 -2254.257229613936
+1739 1739  8.333234328742472
+2669 1739  0.197228464133999
+3957 1739  0.0005182838160652974
+1031 1740  4.516437728111675
+1740 1740 -2504.604832410719
+2671 1740  0.2340215743243752
+3046 1740  0.000719966335902969
+854 1741 -582.1294820043491
+1741 1741  4.100933333302652
+2673 1741 -0.0370584339888418
+3965 1741  0.009856054741935179
+832 1742 -59.09372182667386
+1742 1742  9.249242572554918
+2675 1742  0.1751900754402029
+3969 1742  0.009218139257277373
+832 1743 -44.87012523624564
+1743 1743  8.900323357107847
+2677 1743  0.4235524066745655
+3973 1743  0.005272309892624279
+1744 1744  6.82470037511901
+1745 1744 -1236.100717223117
+2679 1744  0.09646788709832067
+3977 1744  0.001296557446196982
+1036 1745  2.651304124674491
+1745 1745 -1660.641795945935
+2681 1745 -0.1069501833931525
+3050 1745  0.002343908477136452
+839 1746 -109.9588006974711
+1746 1746  9.964308531455309
+2683 1746  0.1625927915066934
+3985 1746  0.005687775843262853
+839 1747 -129.8071459044353
+1747 1747  5.008038927801397
+2685 1747 -0.1874409581057452
+3989 1747  0.007909157348944779
+1060 1748  2.810591760096432
+1748 1748 -1629.841174651477
+2687 1748  0.1104993737469106
+3098 1748  0.00230008344184941
+1749 1749  4.254422143024387
+2240 1749 -1380.279131732715
+2689 1749 -0.09766669385553386
+3997 1749  0.001805784149122609
+1043 1750  3.559130344004886
+1750 1750 -1565.415007126418
+2691 1750  0.09294349640346077
+3144 1750  0.002239602047160964
+1044 1751  17.73869895099792
+1751 1751 -43193.72331192907
+2693 1751 -4.739757669780065e-05
+3068 1751  0.03880525118438068
+844 1752 -323.4191825769936
+1752 1752  8.121377855621056
+2695 1752  0.1137497534022037
+4009 1752  0.003469224482167516
+848 1753 -182.1552338439317
+1753 1753  4.6918587188925
+2697 1753 -0.1620484550495435
+4013 1753  0.007071926854626404
+1249 1754  2.952431724529157
+1754 1754 -1021.427694239394
+2699 1754  0.1779337399408347
+3076 1754  0.002739570112367415
+70 1755 -1356.306668125053
+1755 1755  4.080245895776766
+2701 1755  0.1982328660095435
+4021 1755  0.001508473070417131
+71 1756 -1550.501925752894
+1756 1756  2.833991262156498
+2703 1756  0.1844874946189381
+4025 1756  0.002078431308567534
+72 1757 -1461.513220882238
+1757 1757  2.230216158081471
+2705 1757  0.244104057346213
+4029 1757  0.001945389714289203
+73 1758 -1474.31463330485
+1758 1758  2.11378126554774
+2707 1758  0.2488990680550784
+4033 1758  0.001993552565265008
+1750 1759 -1396.852787898307
+1759 1759  3.896615576766267
+2709 1759 -0.1113819036710585
+4037 1759  0.001789692624572141
+892 1760 -339.9363785417463
+1760 1760  3.27917553517219
+2711 1760 -0.1169152573076894
+4041 1760  0.00646096026953419
+1761 1761  6.138820693139372
+2151 1761 -1356.93253093293
+2713 1761 -0.07698570639890857
+4045 1761  0.001399661724098957
+74 1762 -922.2752719137213
+1762 1762  0.9579993383387213
+2715 1762  11.14739714497946
+4049 1762  0.000219590190439067
+858 1763 -471.7651348192199
+1763 1763  2.024927669840395
+2717 1763  0.1554320934452434
+4053 1763  0.006188906365168377
+1262 1764  0.6800635049014432
+1764 1764 -2477.242565758394
+2719 1764  5.741407462044037
+3102 1764  0.0001434129825704319
+76 1765 -695.4491731006095
+1765 1765  1.226331625859217
+2721 1765  9.163761430829846
+4061 1765  0.0002439790980817506
+1061 1766  0.9860199056421528
+1766 1766 -1877.352469934645
+2723 1766  5.767059286213642
+3106 1766  0.0001490905118305186
+1265 1767  0.7641384780188016
+1767 1767 -2058.913294513919
+2725 1767  5.650459500989021
+3108 1767  0.0001621542960211256
+1266 1768  1.257569328186931
+1768 1768 -1669.969527065347
+2727 1768  5.720222066080108
+3110 1768  0.0001287645800605308
+1267 1769  0.9855846822133956
+1769 1769 -1822.079812004944
+2729 1769  5.855434617171491
+3112 1769  0.0001512851197436847
+1268 1770  1.04489898841882
+1770 1770 -1642.48231480523
+2731 1770  5.906181210680177
+3114 1770  0.0001456087124288582
+1077 1771  5.446758579372629
+1771 1771 -39.8313288230236
+2733 1771  0.1679955867460642
+3100 1771  0.02524756024454677
+1088 1772  3.366289701753765
+1772 1772 -1453.855216625808
+2735 1772  0.1201547163932482
+3116 1772  0.00179187485188899
+893 1773 -88.07077665502878
+1773 1773  7.023657380150102
+2737 1773  0.172735205835799
+4093 1773  0.008159159437463653
+1774 1774  11.88363245336253
+1781 1774 -92.26802542553942
+2739 1774  0.1310711442192151
+4097 1774  0.006721269101813483
+1775 1775  13.6661282064412
+2271 1775 -270.4226421310651
+2741 1775  0.0630421594254459
+4101 1775  0.004367590347772836
+1072 1776  5.834179421399259
+1776 1776 -1704.53913758438
+2743 1776  0.0368398814399567
+3120 1776  0.003118907177103055
+1776 1777 -1328.68635440853
+1777 1777  4.335231192834105
+2745 1777 -0.09721273839226541
+4109 1777  0.001889101762288917
+872 1778 -324.8318137854403
+1778 1778  9.883711222736482
+2747 1778  0.04524248037789224
+4113 1778  0.006652423644643841
+877 1779 -374.8764564904459
+1779 1779  6.606303903934953
+2749 1779 -0.05844088532524816
+4117 1779  0.006514619238101249
+1780 1780  14.27009956285187
+1781 1780 -72.56570109333919
+2751 1780 -0.2192447310494288
+4121 1780  0.004157757492589407
+1078 1781  14.02807711051003
+1781 1781 -212.1907537540941
+2753 1781  0.0093803456490435
+3134 1781  0.03564615373321222
+82 1782 -1444.925587559849
+1782 1782  4.051300083922375
+2755 1782  0.1696359690794401
+4129 1782  0.001764012109856772
+1281 1783  2.304828423526273
+1783 1783 -1698.599903687457
+2757 1783  0.2154672694474411
+3140 1783  0.001804901242834977
+1092 1784  4.440740416984238
+1784 1784 -84.27337155944021
+2759 1784  0.1563923263725714
+3136 1784  0.01332048699292719
+886 1785 -146.8985247574478
+1785 1785  9.599198921865771
+2761 1785 -0.1535827179703731
+4141 1785  0.004169233330436866
+1085 1786  4.182327502644779
+1786 1786 -2504.997784199022
+2763 1786  0.1905019068065086
+3152 1786  0.001079634196703565
+85 1787 -647.7168499720574
+1787 1787  0.8386055878177558
+2765 1787  14.17811209757292
+4149 1787  0.0002113191084465661
+86 1788 -647.2932388607724
+1788 1788  1.713315824453342
+2767 1788  8.081208632035654
+4153 1788  0.0002184583605285697
+299 1789 -101.6692445633731
+1789 1789  1.077927120069085
+2280 1789  0.4638345268861317
+3176 1789  0.01518846527205823
+899 1790  6.707319778353869
+1790 1790 -1400.763427944943
+2282 1790 -0.1171398876204244
+3011 1790  0.001000392465764426
+299 1791 -121.900978849199
+1791 1791  4.398381781048775
+2284 1791 -0.4304312753963275
+3184 1791  0.004534804164295149
+299 1792 -122.5206702801755
+1792 1792  4.228609907569939
+2286 1792  0.4225606468582997
+3188 1792  0.004532850904955333
+301 1793 -301.3545137610665
+1793 1793  0.9388524004746962
+2288 1793  0.2943113306588002
+3192 1793  0.01039556111159428
+301 1794 -265.5995294516822
+1794 1794  6.290838699682876
+2290 1794  0.3743951155653602
+3196 1794  0.001654738067973231
+1795 1795  2.95180477919497
+1842 1795 -1328.059271078283
+2292 1795  0.3184437332485786
+3200 1795  0.000922492312879772
+303 1796 -275.381526703475
+1796 1796  0.8655873783036971
+2294 1796  0.3991520721206581
+3204 1796  0.007819372791836317
+303 1797 -305.9428524547798
+1797 1797  4.357695569147362
+2296 1797  0.3448062169581231
+3208 1797  0.00198409374825764
+362 1798 -273.8571240789814
+1798 1798  6.271467987853487
+2298 1798  0.4163534406274162
+3212 1798  0.001180703517808303
+305 1799 -103.6735421877064
+1799 1799  0.8258525634872588
+2300 1799  0.6239097919698932
+3216 1799  0.01588587386834779
+384 1800 -118.4060011955383
+1800 1800  4.852275384021054
+2302 1800  0.3317934535905247
+3220 1800  0.005036189165657313
+399 1801 -60.96288613393003
+1801 1801  4.438993814771638
+2304 1801  0.5267241324446033
+3224 1801  0.006660153124828426
+1802 1802  3.183226568638103
+1991 1802 -1401.226384116477
+2306 1802 -0.2880976473424125
+3228 1802  0.0009674036423805762
+307 1803 -45.61538669592837
+1803 1803  1.118284064744516
+2308 1803  0.7765432181564137
+3232 1803  0.02086064684629033
+422 1804 -55.21857342172993
+1804 1804  6.944610352658391
+2310 1804  0.4752231410863287
+3236 1804  0.004881382654547464
+307 1805 -64.6965009865016
+1805 1805  4.506874015672992
+2312 1805  0.4485203675871874
+3240 1805  0.007422250586746427
+439 1806 -130.8728852516794
+1806 1806  7.772528439443829
+2314 1806 -0.1377473445162312
+3244 1806  0.006367261316488591
+1807 1807  5.717200439130998
+1992 1807 -36.18476976809642
+2316 1807  0.5064852131538
+3248 1807  0.00872545082924399
+309 1808 -69.64168157022513
+1808 1808  1.17048730065587
+2318 1808  0.2571869189216621
+3252 1808  0.03972025078180432
+309 1809 -55.76240657485057
+1809 1809  1.356706280601356
+2320 1809  0.3916020995201141
+3256 1809  0.02547611861651027
+313 1810 -278.7531515404145
+1810 1810  5.935651367845557
+2322 1810  0.4045567407557998
+3260 1810  0.001403055745082602
+391 1811 -116.01415125886
+1811 1811  7.866093118131661
+2324 1811 -0.3322785058448103
+3264 1811  0.003171622775683676
+309 1812 -57.37217754133817
+1812 1812  6.921218030938292
+2326 1812  0.4366009542545615
+3268 1812  0.00554786920226437
+312 1813 -242.9398363399226
+1813 1813  6.697001214688135
+2328 1813  0.5892891508115687
+3272 1813  0.0008757042896855123
+419 1814 -116.1043230776163
+1814 1814  5.937171454568121
+2330 1814  1.031765062393027
+3276 1814  0.001190245120571321
+912 1815  7.525178403866417
+1815 1815 -31.29687216661689
+2332 1815 -0.5368637578512274
+3071 1815  0.006295122976212335
+313 1816 -284.6866218388363
+1816 1816  0.6191405042986501
+2334 1816  0.6167314111295573
+3284 1816  0.008114173195193898
+1817 1817  2.179761213796664
+1821 1817 -36.67269099216028
+2336 1817  0.3310701127831404
+3288 1817  0.03619016545942499
+1818 1818  1.823278915801148
+1821 1818 -38.41647162556927
+2338 1818  0.337094746642738
+3292 1818  0.03971389620619416
+1819 1819  1.698731768620023
+1821 1819 -38.48702554204165
+2340 1819  0.3720598064715243
+3296 1819  0.03986167163264697
+1820 1820  2.226759967985491
+1821 1820 -31.51609232707513
+2342 1820  0.4803517580962079
+3300 1820  0.02676427502414026
+1007 1821  13.4005488845067
+1821 1821 -28.105045711462
+2344 1821 -0.6484741851329106
+2807 1821  0.004359672791846426
+1822 1822  6.452286730756979
+1915 1822 -1148.338107851003
+2346 1822 -0.2062979542711787
+3308 1822  0.0007307954101522737
+320 1823 -110.9788010398832
+1823 1823  1.158013025593776
+2348 1823  0.3231881679107107
+3312 1823  0.01998853005907888
+320 1824 -110.6632721880846
+1824 1824  1.041618026710652
+2350 1824  0.3650963193282887
+3316 1824  0.0198798996402246
+325 1825 -106.5673296823539
+1825 1825  6.785053675814972
+2352 1825  0.3279397104479789
+3320 1825  0.004000250473230064
+320 1826 -110.4746806445262
+1826 1826  9.324470431849685
+2354 1826 -0.2372175650649101
+3324 1826  0.004051596317296923
+323 1827 -312.1352460882863
+1827 1827  0.9237967396282526
+2356 1827  0.2708036975786697
+3328 1827  0.01015271408301956
+362 1828 -306.1707000880867
+1828 1828  4.522013621012862
+2358 1828  0.4831988452769137
+3332 1828  0.001468002769513598
+323 1829 -268.005427632543
+1829 1829  4.819142815554258
+2360 1829  0.5254114800415531
+3336 1829  0.001544038947918175
+325 1830 -123.2291839880428
+1830 1830  0.7359627483813387
+2362 1830  0.370280352148334
+3340 1830  0.02615827126484596
+391 1831 -120.2021649979784
+1831 1831  7.302330267651148
+2364 1831  0.353019920768631
+3344 1831  0.003401814250274597
+456 1832 -116.1443434772085
+1832 1832  8.759577361623631
+2366 1832  0.1877146534020675
+3348 1832  0.005060615847359536
+327 1833 -258.9487419495387
+1833 1833  0.5348328095576971
+2368 1833  0.6903575064460875
+3352 1833  0.008837253187175041
+327 1834 -329.9404840258471
+1834 1834  3.588470025723119
+2370 1834 -0.3995006872704646
+3356 1834  0.00221861210371068
+1038 1835  3.212877484813512
+1835 1835 -1297.830937438026
+2372 1835  0.3335609329182144
+3053 1835  0.0007954486497328819
+329 1836 -125.0209566151239
+1836 1836  1.479621537099887
+2374 1836  0.2592958859943023
+3364 1836  0.01872639812172585
+329 1837 -101.4697467138049
+1837 1837  1.552787641592646
+2376 1837  0.4377070227596882
+3368 1837  0.01237589418369946
+329 1838 -105.8065777979689
+1838 1838  8.326168299313496
+2378 1838 -0.4326204283218003
+3372 1838  0.002751685875050906
+329 1839 -96.71494936649103
+1839 1839  8.096580967059195
+2380 1839  0.563121317845319
+3376 1839  0.002293964321536093
+332 1840 -264.275729646232
+1840 1840  0.773323986147318
+2382 1840  0.5036002788122459
+3380 1840  0.007888933983503288
+352 1841 -293.066322787933
+1841 1841  2.894015425891116
+2384 1841  0.4374740476554567
+3384 1841  0.002464202900146512
+1035 1842  2.822747401879994
+1842 1842 -1353.938445456118
+2386 1842 -0.3222408774211639
+3047 1842  0.0009592215405761291
+1843 1843  1.638248669248236
+1846 1843 -108.8523166266819
+2388 1843  0.3799777711016727
+3392 1843  0.0119602940545533
+1844 1844  1.176739949016042
+1846 1844 -103.7126993255025
+2390 1844  0.6357091254305228
+3396 1844  0.01166488712395074
+1845 1845  8.942328533639676
+1846 1845 -100.1145963710793
+2392 1845 -0.5635229915555421
+3400 1845  0.002077512515280729
+981 1846  11.26655140607853
+1846 1846 -110.8016182928714
+2394 1846 -0.3035513417999643
+2845 1846  0.002543904703657913
+337 1847 -274.8553042800676
+1847 1847  0.7825661744590023
+2396 1847  0.3212719394188062
+3408 1847  0.0126789154159025
+983 1848  5.057824214743202
+1848 1848 -1620.739611914232
+2398 1848  0.07033500174578537
+2943 1848  0.002024630564729973
+337 1849 -307.7484892352572
+1849 1849  4.79846785897801
+2400 1849 -0.2245587755506762
+3416 1849  0.003255750687821012
+398 1850 -64.84305238357298
+1850 1850  4.580602587418016
+2402 1850  0.4379133264508283
+3420 1850  0.006110746205119582
+939 1851  5.52794280827098
+1851 1851 -16.10476514667766
+2404 1851  0.2457031220130788
+3099 1851  0.04707760781803985
+478 1852 -101.7537554994466
+1852 1852  4.837100171165655
+2406 1852  0.6575968965590965
+3428 1852  0.002719985014812242
+1853 1853  3.694102619211036
+1854 1853 -1242.454717536272
+2408 1853  0.2292832397716183
+3432 1853  0.0009203120285360352
+1079 1854  1.970970231250026
+1854 1854 -1455.098749317579
+2410 1854 -0.2946263988269318
+2857 1854  0.001267355105049699
+982 1855  3.040085219142655
+1855 1855 -1269.238362620525
+2412 1855  0.3674899949834071
+2941 1855  0.0007455918750581245
+1856 1856  2.918144951839834
+1965 1856 -1119.776904764913
+2414 1856 -0.358149304314646
+3444 1856  0.001003644851414047
+498 1857 -1280.777882156608
+1857 1857  4.123483094099871
+2416 1857 -0.1981905656779542
+3448 1857  0.0009965003960475049
+343 1858 -65.91250277346103
+1858 1858  7.866104370676174
+2418 1858  0.5863532491442011
+3452 1858  0.002663675528426596
+343 1859 -151.3811068800854
+1859 1859  3.779911922330594
+2420 1859  0.676872928952266
+3456 1859  0.002335038558192694
+343 1860 -159.9708456309545
+1860 1860  4.575171516303893
+2422 1860  0.4926836001045419
+3460 1860  0.002498149276821813
+485 1861 -127.9110281261581
+1861 1861  3.924567541092951
+2424 1861  0.6936422211413747
+3464 1861  0.002362476830133221
+344 1862 -315.9024050162253
+1862 1862  3.826254096728573
+2426 1862  0.4298291519255887
+3468 1862  0.001929749558429306
+344 1863 -358.782009005071
+1863 1863  4.096977356352023
+2428 1863 -0.2929670419078287
+3472 1863  0.002463831602328363
+352 1864 -313.6568640390976
+1864 1864  2.876369892472561
+2430 1864 -0.3325105683429021
+3476 1864  0.002779642002173852
+1409 1865 -13.66247944369221
+1865 1865  5.527150768491762
+2432 1865  0.5012808683324748
+3480 1865  0.01952363564111144
+1307 1866 -15.5948834700682
+1866 1866  17.26249952801965
+2434 1866 -0.01937948406660046
+3484 1866  0.1626429678579705
+1307 1867 -16.24247111042913
+1867 1867  14.44634424441149
+2436 1867 -0.0200281710755901
+3488 1867  0.1789169121997822
+1307 1868 -14.7892626321223
+1868 1868  14.04786405689022
+2438 1868 -0.02148219787072791
+3492 1868  0.2073379396796323
+1307 1869 -14.74604734110974
+1869 1869  12.6670479033574
+2440 1869 -0.02126487268390402
+3496 1869  0.2061419754730205
+1307 1870 -21.53155213685957
+1870 1870  15.2850840004798
+2442 1870 -0.01515825745898139
+3500 1870  0.2084508119925247
+372 1871 -1338.557384720627
+1871 1871  2.438324611105513
+2444 1871 -0.3072494006703159
+3504 1871  0.0009819936272868124
+1409 1872 -13.86164060705798
+1872 1872  4.793989873863781
+2446 1872  0.6025736075654631
+3508 1872  0.02023566694527784
+1873 1873  17.87284749444791
+1874 1873 -18.01825116204381
+2448 1873  0.4607487274148538
+3512 1873  0.004905574726885498
+979 1874  8.815510423999974
+1874 1874 -44.51367803683263
+2450 1874  0.4601615553751069
+2883 1874  0.004785941363708135
+1370 1875 -59.31030352171324
+1875 1875  3.435120286019725
+2452 1875  0.7898807291279358
+3520 1875  0.005456064724980531
+410 1876 -95.11637636344641
+1876 1876  6.227801375022604
+2454 1876 -0.5448550035608645
+3524 1876  0.002972034323586343
+426 1877 -62.65563298356832
+1877 1877  6.205734509115272
+2456 1877 -0.7112006565720582
+3528 1877  0.002963069916509047
+401 1878 -137.220649291519
+1878 1878  3.594156286348642
+2458 1878 -0.4309015624391707
+3532 1878  0.00470779793137561
+1397 1879 -241.1777446051989
+1879 1879  4.740940585753161
+2460 1879 -0.421297996296329
+3536 1879  0.002213774594729203
+356 1880 -302.0373081441935
+1880 1880  4.013858921947923
+2462 1880 -0.4334843149085262
+3540 1880  0.001967847060143347
+356 1881 -339.884593358769
+1881 1881  6.511901335193489
+2464 1881  0.2019684391055969
+3544 1881  0.002467645928209655
+1790 1882 -1136.647709911751
+1882 1882  3.269308251777509
+2466 1882 -0.4705159048444391
+3548 1882  0.0006604176253755498
+1037 1883  2.703936131935484
+1883 1883 -1335.596838688865
+2468 1883  0.3712099084130325
+3051 1883  0.0008008235108888716
+1444 1884 -1299.363195504282
+1884 1884  3.155612325232486
+2470 1884 -0.2660353447014131
+3556 1884  0.0009981865922368972
+958 1885  2.546878710023711
+1885 1885 -1386.412367526018
+2472 1885 -0.2897782680634379
+2893 1885  0.001090105230124902
+401 1886 -119.2628629587167
+1886 1886  4.914858596122737
+2474 1886 -0.4468471629984179
+3564 1886  0.003558794753835321
+960 1887  6.209091064066214
+1887 1887 -1942.580602090081
+2476 1887  0.02303438519199841
+2897 1887  0.003573660032862564
+1887 1888 -1149.608351572851
+1888 1888  2.887296553688009
+2478 1888 -0.2437019726733171
+3572 1888  0.00126070155494538
+1409 1889 -14.63977712587374
+1889 1889  8.019342926689088
+2480 1889  0.2926855116749797
+3576 1889  0.0224029250963791
+1082 1890  8.030319348297542
+1890 1890 -2057.918514698858
+2482 1890 -0.01579633712996128
+3141 1890  0.003786126107846711
+362 1891 -241.4737700878197
+1891 1891  4.977760599682532
+2484 1891 -0.8149863628841933
+3584 1891  0.001230938538038181
+1409 1892 -4.158339449315758
+1892 1892  2.317651149747798
+2486 1892 -0.7256220588031617
+3588 1892  0.09879642602006852
+1409 1893 -6.321546398056254
+1893 1893  7.14894107859813
+2488 1893 -0.4976321718455345
+3592 1893  0.03844282691513459
+1409 1894 -7.804363263475375
+1894 1894  3.383146747526674
+2490 1894 -0.4961149924439166
+3596 1894  0.06306776508636516
+1409 1895 -6.58912812924158
+1895 1895  5.594076654825956
+2492 1895 -0.510067162772699
+3600 1895  0.04479802402474396
+1409 1896 -7.649837605944053
+1896 1896  3.328116887610061
+2494 1896 -0.4801640088667681
+3604 1896  0.06536987024666262
+1409 1897 -7.69701995245717
+1897 1897  3.214678878395649
+2496 1897 -0.4783057895236895
+3608 1897  0.06617898539405291
+1409 1898 -6.086553600399663
+1898 1898  5.286814915057556
+2498 1898 -0.5073755737823042
+3612 1898  0.04501137747996262
+456 1899 -157.9270743656402
+1899 1899  3.060833158311745
+2500 1899  0.2195673688647587
+3616 1899  0.009378728642209877
+1900 1900  3.124616488720384
+2017 1900 -1149.893986737135
+2502 1900 -0.350098224088436
+3620 1900  0.0007370125534000677
+1901 1901  11.46538487377347
+1903 1901 -25.58784425527002
+2504 1901  0.004528716666476341
+3624 1901  0.6020929703070654
+1902 1902  11.61445266776299
+1903 1902 -11.88075702335064
+2506 1902  0.003858840238686249
+3628 1902  1.401132294277499
+973 1903  7.931141454227703
+1903 1903 -54.03709490860278
+2508 1903  0.005403119658024451
+2923 1903  0.3758028597659226
+1903 1904 -49.0029739474849
+1904 1904  10.38985995381369
+2510 1904  0.005116437161350581
+3636 1904  0.3383684616440788
+1903 1905 -34.74349045619399
+1905 1905  5.274149732297824
+2512 1905  0.3614419935688245
+3640 1905  0.01342936913136771
+1055 1906  1.680006856153657
+1906 1906 -1275.123506533136
+2514 1906  0.5644202599576373
+3087 1906  0.0008155034897751313
+398 1907 -53.49842881762557
+1907 1907  11.36834776685953
+2516 1907 -0.3438732803624757
+3648 1907  0.004133278243333555
+978 1908  3.981648724060497
+1908 1908 -1319.134352142724
+2518 1908  0.2715368875046588
+3059 1908  0.0008040773752573335
+379 1909 -215.9521258687181
+1909 1909  7.936527709755322
+2520 1909  0.4028072587377506
+3656 1909  0.001278322246727311
+476 1910 -493.1627696346783
+1910 1910  5.534920557416696
+2522 1910  0.1837519366993671
+3660 1910  0.001999528023858018
+493 1911 -53.90619683861826
+1911 1911  3.745702162475517
+2524 1911 -0.4996161946091237
+3664 1911  0.008960119794622546
+444 1912 -342.1330151030756
+1912 1912  3.499243447462259
+2526 1912  0.4893961341975044
+3668 1912  0.001720719953907996
+484 1913 -306.5243546068534
+1913 1913  4.952358887713497
+2528 1913 -0.3065265473112019
+3672 1913  0.002197626492909087
+411 1914 -102.6354740822629
+1914 1914  5.231194892529889
+2530 1914  0.6048601072199059
+3676 1914  0.002721550548841121
+1018 1915  3.508830808080819
+1915 1915 -1535.226081909187
+2532 1915  0.1860931450132002
+3013 1915  0.001305838102648445
+399 1916 -49.97345032394828
+1916 1916  5.985986065281415
+2534 1916  0.6690902668564817
+3684 1916  0.004438130353246017
+384 1917 -162.0141549217757
+1917 1917  5.055157513061181
+2536 1917  0.1263791104196366
+3688 1917  0.00940934842178536
+1918 1918  8.94117625255519
+1992 1918 -33.71929484844811
+2538 1918  0.3839594711748638
+3692 1918  0.007585063482622535
+1919 1919  4.834616698618166
+1920 1919 -1156.779939313139
+2540 1919  0.3417002020878817
+3696 1919  0.0004614465673554198
+1047 1920  0.9505763099739064
+1920 1920 -2303.443736143837
+2542 1920 -0.3580379646538538
+2947 1920  0.001377148193309211
+448 1921 -113.7824108068532
+1921 1921  3.616822557551397
+2544 1921  0.4449142856253013
+3704 1921  0.004343153870675413
+386 1922 -121.4320613630403
+1922 1922  3.21889629143859
+2546 1922  0.4461110896526219
+3708 1922  0.004905569227212183
+492 1923 -314.9941849685434
+1923 1923  2.924614144859001
+2548 1923 -0.3382766882528339
+3712 1923  0.00268264578621197
+1924 1924  19.53069803414619
+1928 1924 -15.33177832110156
+2550 1924  0.01439043946019499
+3716 1924  0.1788413622273466
+1925 1925  14.69789381330568
+1928 1925 -49.94972952086324
+2552 1925  0.008760842370192201
+3720 1925  0.133773303083964
+1926 1926  13.23744473122067
+1928 1926 -54.44165562919285
+2554 1926  0.00890402837809562
+3724 1926  0.1308411907639882
+1390 1927 -1338.547491511747
+1927 1927  2.313352821995473
+2556 1927 -0.3456801265779962
+3728 1927  0.0009459805136166574
+1086 1928  4.408486684243702
+1928 1928 -63.79072791858248
+2558 1928 -0.7248750001072389
+2951 1928  0.005209275704562927
+391 1929 -67.64929086151163
+1929 1929  5.995398509885752
+2560 1929 -0.1837226529034885
+3736 1929  0.01380038719662924
+1089 1930  4.987981631971432
+1930 1930 -1329.895116223397
+2562 1930 -0.1531666552012178
+3155 1930  0.001130589200957007
+394 1931 -281.744969252588
+1931 1931  4.080013401664326
+2564 1931 -0.5051458348552837
+3744 1931  0.001680904211310969
+1908 1932 -1197.283781547747
+1932 1932  3.249654147247933
+2566 1932 -0.4155301814520989
+3748 1932  0.0006736040825845264
+486 1933 -119.1012159433853
+1933 1933  5.990944680358314
+2568 1933  0.3462276026425328
+3752 1933  0.003284291861798131
+498 1934 -1356.531427998502
+1934 1934  3.229847896446473
+2570 1934 -0.2242455162623405
+3756 1934  0.001117869191372407
+1446 1935 -1143.36710400461
+1935 1935  3.895963455405033
+2572 1935  0.3245871165120744
+3760 1935  0.0007017298701820985
+1936 1936  6.886257504275831
+1973 1936 -32.18487755999478
+2574 1936  0.4653523913028809
+3764 1936  0.007536605982690551
+1427 1937 -1209.039048347723
+1937 1937  2.707528675905532
+2576 1937 -0.333246014529252
+3768 1937  0.001080857059295198
+472 1938 -293.3131463113477
+1938 1938  4.197681730399675
+2578 1938  0.5792515598032306
+3772 1938  0.001332242049959249
+477 1939 -298.238748377585
+1939 1939  3.559125252631731
+2580 1939  0.4566027908873816
+3776 1939  0.001772899225362471
+1890 1940 -972.8769469454667
+1940 1940  5.370203291089556
+2582 1940 -0.2253988735383735
+3780 1940  0.0008469569496671471
+399 1941 -85.32588697691688
+1941 1941  4.481236041630056
+2584 1941  0.1774720614068959
+3784 1941  0.01292454086643344
+415 1942 -88.67226391527699
+1942 1942  3.503869273575924
+2586 1942  1.218330247483215
+3788 1942  0.00250559134395913
+439 1943 -127.4445013018118
+1943 1943  4.932211784009985
+2588 1943 -0.2444719756141379
+3792 1943  0.006039676197215248
+1370 1944 -29.11525814204184
+1944 1944  12.92509130443996
+2590 1944  0.6340994985786209
+3796 1944  0.00332455934900208
+1370 1945 -16.87783507179551
+1945 1945  6.963395067069431
+2592 1945 -1.282456190904951
+3800 1945  0.005516618807937719
+1370 1946 -29.27401382993264
+1946 1946  8.20748669336651
+2594 1946 -1.085554606806667
+3804 1946  0.003499686844742199
+426 1947 -57.88255988036589
+1947 1947  5.140340349921296
+2596 1947 -1.03602619980799
+3808 1947  0.002695204623589962
+1001 1948  2.762528604699168
+1948 1948 -1179.473328514326
+2598 1948 -0.627656447368364
+2985 1948  0.0005202508078158344
+1948 1949 -1112.363081683879
+1949 1949  2.637769067800761
+2600 1949  0.7698732969464314
+3816 1949  0.0004616790832713247
+411 1950 -119.8659568390688
+1950 1950  5.347644834779035
+2602 1950  0.3911824002123577
+3820 1950  0.003690632751693497
+1027 1951  2.286717637069104
+1951 1951 -1462.759816234802
+2604 1951  0.3690948751451494
+3031 1951  0.0009417216375000832
+405 1952 -329.5972945308118
+1952 1952  3.430259707843811
+2606 1952  0.3162981891849708
+3828 1952  0.002745893837511805
+1953 1953  5.22866490720696
+1969 1953 -1331.327148348726
+2608 1953 -0.1019983291898142
+3832 1953  0.001313568433023475
+407 1954 -1246.529432769099
+1954 1954  3.015100507243954
+2610 1954 -0.5589755650604602
+3836 1954  0.0006040599848978036
+1955 1955  6.486770562819233
+1992 1955 -40.0865932868005
+2612 1955  0.3123756862682738
+3840 1955  0.01071515778810642
+1009 1956  2.089007039389847
+1956 1956 -1643.815562542815
+2614 1956 -0.1358262986664969
+2995 1956  0.002205806782900263
+1073 1957  4.015345384796815
+1957 1957 -1425.284446253262
+2616 1957  0.1297570097955425
+3123 1957  0.001279610739959884
+410 1958 -65.95449814158825
+1958 1958  14.21588238657893
+2618 1958  0.5346710961640079
+3852 1958  0.001954246820883836
+410 1959 -8.723987554335546
+1959 1959  4.63521196383065
+2620 1959 -1.800612709103617
+3856 1959  0.009930744376663589
+410 1960 -43.6549692815422
+1960 1960  7.851107779123613
+2622 1960 -1.155212641574131
+3860 1960  0.002500885653894513
+410 1961 -183.7758384048604
+1961 1961  4.362866399653369
+2624 1961 -0.10893918577006
+3864 1961  0.01087216558491282
+490 1962 -323.5366796927644
+1962 1962  3.314597254560581
+2626 1962 -0.4059559816217324
+3868 1962  0.002476672415383437
+1963 1963  7.094819650515715
+1964 1963 -1123.060704870721
+2628 1963  0.284851484394085
+3872 1963  0.0004654379073351586
+1014 1964  2.758844665527414
+1964 1964 -1962.678855914129
+2630 1964 -0.251856105361882
+3005 1964  0.0007789889760783562
+1015 1965  5.36424014425243
+1965 1965 -1984.935281822672
+2632 1965 -0.03372710870745033
+3037 1965  0.003128947444078676
+460 1966 -1219.03743806919
+1966 1966  3.832755747061612
+2634 1966  0.2996883137876917
+3884 1966  0.0007814113477166023
+416 1967 -305.81056344102
+1967 1967  3.344564282593129
+2636 1967  0.3897416427359257
+3888 1967  0.002466223742873347
+432 1968 -49.74533438256115
+1968 1968  7.209963343664713
+2638 1968  0.5271461292678989
+3892 1968  0.004556134798128607
+1096 1969  2.650447108276864
+1969 1969 -1474.263314614031
+2640 1969 -0.1785974349768129
+3169 1969  0.001610776396565617
+419 1970 -79.95166156859864
+1970 1970  8.754460075831085
+2642 1970  0.5273107585383225
+3900 1970  0.002196786139001541
+476 1971 -298.3132764183199
+1971 1971  5.21837181322108
+2644 1971 -0.8072266327489762
+3904 1971  0.0007714731595300477
+419 1972 -159.5652172094772
+1972 1972  1.521343477166647
+2646 1972 -1.074344281788989
+3908 1972  0.003487196424232244
+1022 1973  11.66633099712536
+1973 1973 -24.21142044251713
+2648 1973  0.6244445149825922
+3019 1973  0.004805094562342568
+1973 1974 -16.17329708469773
+1974 1974  6.913438662925516
+2650 1974 -0.7085501020623595
+3916 1974  0.01106461485781375
+1973 1975 -16.79039672305305
+1975 1975  7.092319999538366
+2652 1975 -0.6032800041956174
+3920 1975  0.01237778660328535
+1973 1976 -19.18063033824095
+1976 1976  4.504333486281526
+2654 1976 -0.5312358475512081
+3924 1976  0.01700566749367823
+1977 1977  3.064329241855908
+1981 1977 -28.87681478961062
+2656 1977  0.9638580985212747
+3928 1977  0.009334914461997442
+1973 1978 -58.99846518694028
+1978 1978  3.065752767611288
+2658 1978 -0.1726253430003272
+3932 1978  0.02503841121672441
+1979 1979  6.821785171341784
+1992 1979 -34.20546235418676
+2660 1979 -0.5273299139143387
+3936 1979  0.008007127655204529
+426 1980 -39.32522531591619
+1980 1980  9.188046774378051
+2662 1980  0.6449471976234391
+3940 1980  0.003535036514714461
+1026 1981  15.60000600766554
+1981 1981 -19.26799730716725
+2664 1981 -0.4997455661082103
+3039 1981  0.004366719693026736
+1982 1982  4.949586119190606
+1993 1982 -1020.895360476986
+2666 1982 -0.4434171700268535
+3948 1982  0.0004723725694069484
+1981 1983 -24.40551081717152
+1983 1983  9.450557390041146
+2668 1983  0.4663001387974148
+3952 1983  0.006908344872276637
+1981 1984 -34.66205988832636
+1984 1984  13.94274720304045
+2670 1984 -0.3189037545692974
+3956 1984  0.00648834630734077
+1981 1985 -39.08905611715512
+1985 1985  9.638290245359952
+2672 1985 -0.3170204710469747
+3960 1985  0.0082751929249987
+1981 1986 -25.73482602625847
+1986 1986  3.86258710892769
+2674 1986  0.976486147068505
+3964 1986  0.007383221743510169
+1835 1987 -1286.699352558668
+1987 1987  3.232293749053333
+2676 1987 -0.3439335715846778
+3968 1987  0.0007871273401471218
+484 1988 -343.6950631335396
+1988 1988  6.854993073279579
+2678 1988  0.1547028690476915
+3972 1988  0.00273487585036142
+1883 1989 -1142.59645115147
+1989 1989  4.596890674335506
+2680 1989 -0.3495907295178143
+3976 1989  0.0005887925249880207
+493 1990 -50.03970959544274
+1990 1990  4.514311543471527
+2682 1990  0.5744327013188233
+3980 1990  0.007514577559867656
+1239 1991  4.273476333560106
+1991 1991 -1245.08530407766
+2684 1991 -0.2813043833760332
+3069 1991  0.0007659718727101854
+1039 1992  5.650865178732744
+1992 1992 -39.06108881613515
+2686 1992  0.3955497753263165
+3159 1992  0.01019124310589202
+1042 1993  2.128671701455008
+1993 1993 -1545.590542610641
+2688 1993  0.330790297730725
+3061 1993  0.001075547549495041
+1432 1994 -1288.009326238795
+1994 1994  3.373236017368225
+2690 1994  0.245190319901469
+3996 1994  0.0009735186880607913
+1504 1995 -1425.924800186832
+1995 1995  2.768916866004834
+2692 1995 -0.2470979106274057
+4000 1995  0.001207820622579016
+444 1996 -171.6721801828181
+1996 1996  4.350082008188727
+2694 1996 -0.8934591499488236
+4004 1996  0.001505642513562084
+475 1997 -272.0493857089738
+1997 1997  5.835339611366566
+2696 1997 -0.4094422815228064
+4008 1997  0.001622558008834811
+1815 1998 -31.13094626304603
+1998 1998  6.273071084077785
+2698 1998  0.6508797907773681
+4012 1998  0.006407727072907419
+1815 1999 -3.550926574038505
+1999 1999  5.891708948648517
+2700 1999 -0.8644424052260842
+4016 1999  0.0347431850057986
+1815 2000 -13.48124907748484
+2000 2000  6.039983956553282
+2702 2000 -0.5386946049506361
+4020 2000  0.019139463715007
+1815 2001 -13.95763763671764
+2001 2001  5.335102309454471
+2704 2001 -0.5619603703326097
+4024 2001  0.02024997891309413
+1815 2002 -14.40960272507699
+2002 2002  4.826786095112703
+2706 2002 -0.4989049199151928
+4028 2002  0.02204757930171117
+1815 2003 -14.58499342652336
+2003 2003  4.663673792105634
+2708 2003 -0.4967941065737113
+4032 2003  0.02261098557575327
+448 2004 -105.446862001134
+2004 2004  5.538149308614118
+2710 2004  0.3954580330580911
+4036 2004  0.003715639375564892
+448 2005 -106.4028375448804
+2005 2005  2.787430652169496
+2712 2005  0.640289646401769
+4040 2005  0.003931178294356625
+454 2006 -198.5952483373434
+2006 2006  8.295885019917028
+2714 2006  0.4286015274180359
+4044 2006  0.001071057839405194
+1059 2007  7.928418980272388
+2007 2007 -700.4947699391313
+2716 2007  0.004269479894055767
+3093 2007  0.04476228128502545
+479 2008 -95.10938060848929
+2008 2008  2.428640135644111
+2718 2008  0.4261594979710852
+4052 2008  0.008958295749658335
+1851 2009 -12.04655336420402
+2009 2009  16.47742044559651
+2720 2009  0.01030673789282408
+4056 2009  0.3838931121017528
+1851 2010 -12.11364270258297
+2010 2010  22.23399320199848
+2722 2010  0.01266738670347477
+4060 2010  0.311778304583312
+1851 2011 -8.22902423866134
+2011 2011  22.85902787429374
+2724 2011  0.01389738784940935
+4064 2011  0.3433557380926616
+1851 2012 -9.209673704385494
+2012 2012  18.54247362970166
+2726 2012  0.01095829037406584
+4068 2012  0.4301523174392808
+1851 2013 -7.169396300539337
+2013 2013  28.97191890791841
+2728 2013  0.01440604931380529
+4072 2013  0.2876753913896934
+1851 2014 -7.905394811720712
+2014 2014  22.90635023674714
+2730 2014  0.01412310998717665
+4076 2014  0.3498094028164577
+1851 2015 -6.913440311217936
+2015 2015  24.33074727531835
+2732 2015  0.01427692261993418
+4080 2015  0.3390025466116671
+477 2016 -340.5820452017388
+2016 2016  2.208450030879043
+2734 2016  0.5273258880967172
+4084 2016  0.00237548481421698
+1069 2017  2.781752994797915
+2017 2017 -1429.354425178098
+2736 2017 -0.2390321723025959
+3153 2017  0.001140663405325003
+469 2018 -1319.125546000257
+2018 2018  2.912178827084738
+2738 2018 -0.2745073576810962
+4092 2018  0.001009787058010458
+1070 2019  5.599572994528584
+2019 2019 -1426.386550071892
+2740 2019  0.1242577346967617
+3117 2019  0.00109550590404936
+2019 2020 -1051.60330096319
+2020 2020  5.994209355056856
+2742 2020 -0.2986936630393312
+4100 2020  0.0005957613718304553
+472 2021 -273.2060828791963
+2021 2021  4.161771196418701
+2744 2021  0.7749702805017827
+4104 2021  0.001150360150332114
+1957 2022 -1251.592013646793
+2022 2022  3.322110871855388
+2746 2022 -0.2590299860673196
+4108 2022  0.0009878595624204158
+486 2023 -82.4528305498375
+2023 2023  6.446916296953027
+2748 2023  0.9878581106610489
+4112 2023  0.001621619527829153
+476 2024 -221.2635879075791
+2024 2024  4.230773004945792
+2750 2024  0.83313447077432
+4116 2024  0.001188758515697542
+1930 2025 -1278.727802451221
+2025 2025  7.857436245259873
+2752 2025 -0.09699561503198664
+4120 2025  0.001044723928505841
+497 2026 -258.8888628284263
+2026 2026  3.696321876416155
+2754 2026  0.9432303306922852
+4124 2026  0.001165220306249647
+479 2027 -25.80608281993659
+2027 2027  4.980850774426043
+2756 2027 -0.723925405766279
+4128 2027  0.009668964138980598
+479 2028 -27.74274388077859
+2028 2028  3.754470501609538
+2758 2028 -0.6821870226709527
+4132 2028  0.01083235501048359
+492 2029 -312.8351201515704
+2029 2029  2.362308143022933
+2760 2029  0.4432101206282965
+4136 2029  0.002676878284088942
+485 2030 -106.3422401541953
+2030 2030  8.646235660883518
+2762 2030  0.5743976794403722
+4140 2030  0.001464397909435215
+485 2031 -166.413571525405
+2031 2031  2.978974753398158
+2764 2031 -0.8666912965737777
+4144 2031  0.002343779129113723
+493 2032 -70.19438117818132
+2032 2032  9.893426968176996
+2766 2032  0.01095253479461104
+4148 2032  0.0993118838893443
+493 2033 -78.02760680056846
+2033 2033  21.22166760392006
+2768 2033  0.004781083103017359
+4152 2033  0.1155021826800087
+699 2034 -116.254058426669
+2034 2034  1.362279335536674
+2280 2034  0.1776859613752735
+3178 2034  0.02756698782311096
+2035 2035  6.593989719295108
+2127 2035 -1063.361714528966
+2282 2035 -0.1686225023517153
+3182 2035  0.0009578764182344008
+699 2036 -128.7151420873748
+2036 2036  5.110489561178684
+2284 2036 -0.2294685995787264
+3186 2036  0.007018459542290348
+699 2037 -134.6101055831586
+2037 2037  5.241250854683242
+2286 2037  0.1862365373886236
+3190 2037  0.00759532575759598
+1550 2038 -304.647627311909
+2038 2038  1.225211590363166
+2288 2038  0.1162526489952567
+3194 2038  0.02033916179016841
+1550 2039 -303.6755998197619
+2039 2039  9.150447768001063
+2290 2039  0.09090233254426537
+3198 2039  0.004141325516279747
+2040 2040  4.178709433689302
+2087 2040 -1507.095428153633
+2292 2040  0.08441252585843845
+3202 2040  0.002178117174593526
+703 2041 -287.8253845063738
+2041 2041  1.118500047554219
+2294 2041  0.1569416237697656
+3206 2041  0.01493946180636459
+703 2042 -310.99582024153
+2042 2042  5.635075703794728
+2296 2042 -0.1470971542430732
+3210 2042  0.003585627113051674
+762 2043 -275.9084934756087
+2043 2043  9.472269231422434
+2298 2043 -0.1084103977076431
+3214 2043  0.003043495037890958
+705 2044 -108.9523767055269
+2044 2044  1.066054919609428
+2300 2044  0.2458729163013723
+3218 2044  0.03021750355380833
+784 2045 -117.5623798133825
+2045 2045  5.867844621528932
+2302 2045  0.1764937921731119
+3222 2045  0.008023070608262042
+1697 2046 -56.76693463695905
+2046 2046  6.307778305821778
+2304 2046  0.1767989964268323
+3226 2046  0.01562602942879436
+1046 2047  4.089469755570528
+2047 2047 -1547.903118801705
+2306 2047 -0.1043560294495194
+3070 2047  0.001893962458911336
+707 2048 -47.01189367725382
+2048 2048  1.453282910153127
+2308 2048  0.3080625949669278
+3234 2048  0.04008513148927602
+822 2049 -54.48808992798245
+2049 2049  8.754829522616353
+2310 2049  0.2360290859558374
+3238 2049  0.007900795641140584
+707 2050 -93.79563538032262
+2050 2050  7.607025929898225
+2312 2050  0.04825762007543867
+3242 2050  0.02822292740456577
+839 2051 -105.6218717573673
+2051 2051  7.209258592532089
+2314 2051 -0.2295458523844411
+3246 2051  0.005220083303881453
+1091 2052  8.260010647845204
+2052 2052 -45.27355828678601
+2316 2052  0.1115367935708566
+3160 2052  0.02206704511270894
+709 2053 -71.48205026841438
+2053 2053  1.503944833656077
+2318 2053  0.1058671586234522
+3254 2053  0.07477494866840419
+709 2054 -57.81317230630791
+2054 2054  1.756999795333312
+2320 2054  0.1550963396610254
+3258 2054  0.04893194565742957
+713 2055 -281.3097209478202
+2055 2055  8.333025717575424
+2322 2055 -0.1358187924429873
+3262 2055  0.00298198818538526
+791 2056 -137.1554046617416
+2056 2056  9.954437737461609
+2324 2056  0.1158998426965637
+3266 2056  0.006104956663563518
+709 2057 -57.85806956991055
+2057 2057  8.985892277429645
+2326 2057 -0.1871670285025489
+3270 2057  0.01008184609791411
+712 2058 -277.9291057772549
+2058 2058  13.63902685108462
+2328 2058  0.08441094784420153
+3274 2058  0.002589589366922812
+2059 2059  13.75844861280676
+2215 2059 -164.7201130197417
+2330 2059 -0.05562556027624922
+3278 2059  0.006756368132922738
+1112 2060  9.011062142512486
+2060 2060 -33.81724303530061
+2332 2060  0.2054208626437619
+3072 2060  0.01287695285048797
+713 2061 -277.119091147535
+2061 2061  0.8138879164921292
+2334 2061  0.2480796714683461
+3286 2061  0.01604516294524086
+2062 2062  2.854904550605542
+2066 2062 -36.88282535114031
+2336 2062  0.1309999949189102
+3290 2062  0.07139718376216436
+2063 2063  2.382559920737088
+2066 2063 -38.52362543915015
+2338 2063  0.1348920395825686
+3294 2063  0.07789140856387446
+2064 2064  2.222986339792984
+2066 2064 -38.66505399025309
+2340 2064  0.1478411151739352
+3298 2064  0.07846785118095698
+2065 2065  2.912810030122295
+2066 2065 -31.6350585093486
+2342 2065  0.1914370186595753
+3302 2065  0.05259636694181062
+1207 2066  21.65848410135114
+2066 2066 -38.08156604778259
+2344 2066  0.08377148904147035
+2808 2066  0.01561142725119222
+2067 2067  7.602708839683275
+2160 2067 -1239.624856043634
+2346 2067  0.1043900787502424
+3310 2067  0.001140936654369433
+1580 2068 -111.4646540333997
+2068 2068  1.513344914714974
+2348 2068  0.1284506027739841
+3314 2068  0.0392249822964684
+1580 2069 -111.1745117377567
+2069 2069  1.361520831023814
+2350 2069  0.1449696716921745
+3318 2069  0.03903060155704903
+725 2070 -142.2046129288638
+2070 2070  11.46100381309955
+2352 2070 -0.03875470100391585
+3322 2070  0.01516268397985598
+1580 2071 -103.4708021967054
+2071 2071  11.49437533928763
+2354 2071 -0.123830609841622
+3326 2071  0.006913912857735371
+1583 2072 -298.7359064250596
+2072 2072  1.216417974845579
+2356 2072  0.1105236630114776
+3330 2072  0.02025965316108067
+762 2073 -336.8243978424858
+2073 2073  7.245181781754158
+2358 2073  0.09073099839660206
+3334 2073  0.00451186806656099
+1583 2074 -326.2824303807073
+2074 2074  7.871753304018089
+2360 2074  0.0825069857399373
+3338 2074  0.004985618827914547
+725 2075 -119.199332372214
+2075 2075  0.9683564394347679
+2362 2075  0.1503007858341583
+3342 2075  0.0521001814761122
+791 2076 -180.4753155990831
+2076 2076  11.42641600066333
+2364 2076 -0.04777819884013859
+3346 2076  0.01056133397428599
+856 2077 -117.3115075315568
+2077 2077  9.866368768324078
+2366 2077  0.1242642220168503
+3350 2077  0.00678516914651727
+727 2078 -272.0317439182016
+2078 2078  0.6902095968345779
+2368 2078  0.2721182940045235
+3354 2078  0.01674617104452671
+727 2079 -336.2704669221371
+2079 2079  4.390097869341906
+2370 2079 -0.1829124999460475
+3358 2079  0.003957075002863835
+1238 2080  4.451290718579245
+2080 2080 -1446.166309523558
+2372 2080  0.1009613901942342
+3054 2080  0.001707396604035941
+2081 2081  1.94736180188326
+2083 2081 -118.5292865869793
+2374 2081  0.1076318904023976
+3366 2081  0.03729470474745956
+2082 2082  2.053730505034701
+2083 2082 -96.75789985452708
+2376 2082  0.1776243671061898
+3370 2082  0.02493351554952283
+1075 2083  11.87929266291417
+2083 2083 -113.9099321998116
+2378 2083 -0.1125400217472849
+2836 2083  0.007066492804996273
+2083 2084 -139.5923217119656
+2084 2084  14.8685191357142
+2380 2084  0.04628018070972438
+3378 2084  0.0105883848494995
+1596 2085 -273.2174821630666
+2085 2085  1.001843241790515
+2382 2085  0.1992142492979329
+3382 2085  0.01512519439909087
+752 2086 -312.5285740232399
+2086 2086  3.893502727153444
+2384 2086  0.1559040304242874
+3386 2086  0.004867897652356694
+1235 2087  3.896863634483198
+2087 2087 -1528.913791925641
+2386 2087 -0.08915743596796895
+3048 2087  0.00224263695121669
+2088 2088  2.128348439022877
+2090 2088 -111.1708407077942
+2388 2088  0.1510415792376266
+3394 2088  0.02313494457906885
+2089 2089  1.530457443318091
+2090 2089 -105.8746380386655
+2390 2089  0.2527154403111643
+3398 2089  0.02254346769173401
+934 2090  13.99685203813836
+2090 2090 -135.0509367320167
+2392 2090 -0.07966811853968624
+2846 2090  0.007010745916640551
+2090 2091 -100.0765173546339
+2091 2091  13.03167033082069
+2394 2091 -0.1978726929434511
+3406 2091  0.003848514944751526
+737 2092 -351.7384079675031
+2092 2092  0.9606419660085704
+2396 2092  0.1190518291670926
+3410 2092  0.02153155584735624
+1670 2093 -1205.4064202328
+2093 2093  3.896453929673696
+2398 2093 -0.2460311055612411
+3414 2093  0.001028363329486931
+737 2094 -309.8208477002044
+2094 2094  4.857233958126034
+2400 2094 -0.2103959172847824
+3418 2094  0.003421704678565958
+798 2095 -63.00144864963377
+2095 2095  6.030018439318698
+2402 2095 -0.1823951579301308
+3422 2095  0.01178272631769443
+1771 2096 -33.35710566176917
+2096 2096  3.64070794816464
+2404 2096  0.4234806911341454
+3426 2096  0.01707485299878989
+1781 2097 -149.0690573659064
+2097 2097  10.1404814049545
+2406 2097 -0.03366812880804327
+3430 2097  0.01755433519751259
+2098 2098  4.692341990336677
+2099 2098 -1326.699075158569
+2408 2098  0.09589996761388657
+3434 2098  0.001630753860045084
+940 2099  2.455950934716126
+2099 2099 -1582.769498051866
+2410 2099 -0.1192055091852236
+2858 2099  0.002330334855841668
+1669 2100 -1408.138756167021
+2100 2100  4.482752371805137
+2412 2100 -0.0913967464088717
+3442 2100  0.001843266313745345
+941 2101  3.513507003847669
+2101 2101 -1679.799948366659
+2414 2101  0.1067652808173516
+3038 2101  0.001825192537572441
+2102 2102  5.121634542677966
+2179 2102 -1348.72810908421
+2416 2102 -0.08642839304797841
+3450 2102  0.001762896847965685
+743 2103 -70.28451441984423
+2103 2103  13.81936647694512
+2418 2103  0.1224913336251501
+3454 2103  0.006836889974004559
+743 2104 -174.5511038366085
+2104 2104  6.108252012876951
+2420 2104  0.1232115838138538
+3458 2104  0.007007925568758736
+743 2105 -163.2629558170631
+2105 2105  6.61797598861204
+2422 2105  0.1458229277753334
+3462 2105  0.005873616575094122
+1727 2106 -127.8803524612996
+2106 2106  6.18395224060007
+2424 2106  0.1869994138422493
+3466 2106  0.005610435401023395
+744 2107 -343.8282467755249
+2107 2107  5.425768165721768
+2426 2107  0.1243626259089881
+3470 2107  0.004351703339650936
+744 2108 -340.8237853887593
+2108 2108  5.015211698687797
+2428 2108 -0.149642205461953
+3474 2108  0.004232437836851164
+752 2109 -336.3627989515738
+2109 2109  3.742160235640967
+2430 2109 -0.1210270813360829
+3478 2109  0.00555280640673149
+2110 2110  7.053894690456646
+2117 2110 -16.282455472127
+2432 2110  0.181244990753315
+3482 2110  0.03541685358970547
+746 2111 -8.765297393525904
+2111 2111  5.361468130704726
+2434 2111 -2.099407317666164
+3486 2111  0.008236509823014474
+746 2112 -9.210837529294329
+2112 2112  4.513828312547935
+2436 2112 -2.099046152908578
+3490 2112  0.009223257182620324
+746 2113 -8.314922294229779
+2113 2113  4.366310261693636
+2438 2113 -2.3232191764992
+3494 2113  0.01050608747623768
+746 2114 -8.361552482805211
+2114 2114  3.956994704656382
+2440 2114 -2.229759078647565
+3498 2114  0.01062497489801806
+746 2115 -10.50121760524715
+2115 2115  4.264900917453799
+2442 2115 -2.820119469264003
+3502 2115  0.007948242671805029
+1655 2116 -1351.12522936696
+2116 2116  3.247381743706517
+2444 2116 -0.1064731526095451
+3506 2116  0.002148314583414475
+952 2117  6.185562461268164
+2117 2117 -16.55199492375567
+2446 2117  0.2139666338910703
+2906 2117  0.03685169509823494
+1630 2118 -19.04736438576279
+2118 2118  28.06040217986433
+2448 2118  0.1019487335374342
+3514 2118  0.0137906417920731
+1630 2119 -39.1308766868132
+2119 2119  12.01425377570906
+2450 2119 -0.2061528425704132
+3518 2119  0.009303998751781386
+953 2120  5.945418404127518
+2120 2120 -63.98358437997059
+2452 2120 -0.1148860314163944
+2978 2120  0.02091228195574186
+810 2121 -96.50997380438551
+2121 2121  9.103384773113829
+2454 2121 -0.1330170041161471
+3526 2121  0.008627475835805955
+1716 2122 -85.04968168489033
+2122 2122  9.438888577223668
+2456 2122 -0.1073622229111081
+3530 2122  0.009649886890828659
+801 2123 -156.3984297145197
+2123 2123  6.190391211677775
+2458 2123 -0.05678315093978254
+3534 2123  0.01873664327863148
+857 2124 -404.166050448459
+2124 2124  6.656168722454082
+2460 2124 -0.07021960246037766
+3538 2124  0.005476574489081974
+756 2125 -341.0931018868103
+2125 2125  5.491666775418012
+2462 2125 -0.1186609141678331
+3542 2125  0.004729539689689114
+756 2126 -301.9003736908056
+2126 2126  7.686660379443556
+2464 2126  0.1329277299929537
+3546 2126  0.00366901957924182
+1017 2127  5.61223414210605
+2127 2127 -1809.384404896597
+2466 2127 -0.04036045463481888
+3012 2127  0.002780578732839252
+957 2128  4.159434126999496
+2128 2128 -1583.280905696588
+2468 2128  0.07225182247031578
+3052 2128  0.002258023136854922
+1689 2129 -1412.362136880344
+2129 2129  4.024185540800096
+2470 2129 -0.100034192619883
+3558 2129  0.001932242375185583
+1032 2130  3.218684483675813
+2130 2130 -1512.250759457661
+2472 2130 -0.1114576459576665
+2894 2130  0.002074025323621762
+801 2131 -126.3034594719376
+2131 2131  7.954416979677193
+2474 2131  0.07850380857557879
+3566 2131  0.01222844151572613
+2132 2132  3.17852537231288
+2133 2132 -1021.939107874584
+2476 2132  0.4916885932583254
+3570 2132  0.0006414415360330333
+1160 2133  2.945059870904722
+2133 2133 -1512.245711784602
+2478 2133 -0.1586572171452398
+2898 2133  0.001414845742753739
+2117 2134 -12.85873395711873
+2134 2134  8.080073804395154
+2480 2134  0.3408694718928448
+3578 2134  0.02207500168266791
+2135 2135  3.66926937307163
+2185 2135 -949.4160584467323
+2482 2135 -0.5859732481780078
+3582 2135  0.0005029038477193737
+762 2136 -334.71993668788
+2136 2136  8.170584099729947
+2484 2136  0.07509815561251482
+3586 2136  0.006006331416365539
+2117 2137 -4.68570583775738
+2137 2137  2.778013614142899
+2486 2137 -0.3323630168187701
+3590 2137  0.1602211799218243
+2117 2138 -6.569471674738725
+2138 2138  6.924495998814408
+2488 2138 -0.3680998962405606
+3594 2138  0.05302712387955065
+2117 2139 -7.378806665030903
+2139 2139  3.451189611415207
+2490 2139 -0.4581176150659059
+3598 2139  0.07200676148810309
+2117 2140 -6.726108868932145
+2140 2140  5.746025815755991
+2492 2140 -0.3724327269861961
+3602 2140  0.05962100119453918
+2117 2141 -6.982763315138396
+2141 2141  3.307664437943429
+2494 2141 -0.5067141365537682
+3606 2141  0.06956589248662981
+2117 2142 -7.016413577398498
+2142 2142  3.209214983593041
+2496 2142 -0.5042260023462158
+3610 2142  0.07023826370427121
+2117 2143 -6.151213853839583
+2143 2143  5.612374640258077
+2498 2143 -0.3677359737660884
+3614 2143  0.05871777369397748
+856 2144 -131.9375715502208
+2144 2144  2.985129676088162
+2500 2144 -0.2998672521788097
+3618 2144  0.008602802752313498
+972 2145  5.456336436211689
+2145 2145 -1668.568919153097
+2502 2145 -0.03257068910337899
+3154 2145  0.003108182426433724
+773 2146 -9.759370161367945
+2146 2146  2.233014253505581
+2504 2146 -3.833955781960661
+3626 2146  0.009265772232708086
+773 2147 -3.824469467733722
+2147 2147  1.985197878319526
+2506 2147 -6.252920651221183
+3630 2147  0.01535943325292646
+773 2148 -22.0316225896654
+2148 2148  1.62655302961875
+2508 2148 -3.542334556735019
+3634 2148  0.006608622451082367
+773 2149 -19.89518259071058
+2149 2149  2.124763369071786
+2510 2149 -3.407349241168243
+3638 2149  0.005900417463837842
+773 2150 -93.26783624734638
+2150 2150  4.699502919587786
+2512 2150  0.1708955495131764
+3642 2150  0.01023798290573628
+1255 2151  3.482112602331831
+2151 2151 -1875.357477626669
+2514 2151 -0.03036476754524816
+3088 2151  0.004903868312070231
+798 2152 -51.36676192902856
+2152 2152  14.16495761512215
+2516 2152  0.1578014166231115
+3650 2152  0.00778313097281933
+2153 2153  5.483147129036918
+2177 2153 -1384.758308839667
+2518 2153 -0.09193233690226214
+3654 2153  0.001652995322303571
+779 2154 -214.5852170173555
+2154 2154  13.13678521227168
+2520 2154 -0.1683908418346857
+3658 2154  0.001812794973973293
+2155 2155  6.819550808326079
+2216 2155 -399.3472933098862
+2522 2155 -0.116110806968134
+3662 2155  0.003304023126536034
+893 2156 -94.29553748866022
+2156 2156  3.716088592076674
+2524 2156  0.2497754017512982
+3666 2156  0.009671103241031176
+844 2157 -354.5380781683582
+2157 2157  5.213544230958012
+2526 2157 -0.1293710693667469
+3670 2157  0.004280615664270764
+884 2158 -289.7082847871828
+2158 2158  5.360504399026522
+2528 2158  0.2293733406665809
+3674 2158  0.002931883492565051
+1717 2159 -126.9054310671548
+2159 2159  8.34943451090035
+2530 2159 -0.1009386461263404
+3678 2159  0.008254928556956509
+1183 2160  3.919110652589746
+2160 2160 -1513.186311996676
+2532 2160 -0.1309259113047055
+3014 2160  0.00169962922165525
+1697 2161 -66.74817920758088
+2161 2161  11.18534751112075
+2534 2161  0.05669785722272903
+3686 2161  0.02142420942973285
+784 2162 -114.4793517963469
+2162 2162  4.679317810446525
+2536 2162 -0.2486847965028564
+3690 2162  0.007592017929505716
+2052 2163 -33.91929258744634
+2163 2163  10.96265801745419
+2538 2163  0.1940686199464453
+3694 2163  0.01239979486593527
+2164 2164  8.062081654679709
+2165 2164 -1222.524005166853
+2540 2164  0.1044236949304687
+3698 2164  0.0008416286799556312
+985 2165  1.251333503471155
+2165 2165 -2603.281852933697
+2542 2165 -0.1170091683665216
+2948 2165  0.002872457779933695
+848 2166 -114.9637094411854
+2166 2166  4.888571989846326
+2544 2166  0.1612239847733691
+3706 2166  0.008928425033709206
+786 2167 -121.8957475389813
+2167 2167  4.206946386074862
+2546 2167  0.181437742187558
+3710 2167  0.00933669679110603
+892 2168 -327.4234482652935
+2168 2168  3.930409653834767
+2548 2168  0.1150695611031082
+3714 2168  0.005754627427048083
+787 2169 -6.24756560403434
+2169 2169  5.368197347599874
+2550 2169 -3.573289293030423
+3718 2169  0.006397495182738116
+787 2170 -18.89808564694528
+2170 2170  3.784571749614252
+2552 2170 -2.916757791234899
+3722 2170  0.00412519236349653
+787 2171 -20.62915659632829
+2171 2171  3.411100769889254
+2554 2171 -2.948494780789273
+3726 2171  0.004047158334189482
+993 2172  3.087002648539354
+2172 2172 -1506.397431951823
+2556 2172 -0.1031938954442674
+2964 2172  0.002132344920593365
+787 2173 -170.023137662953
+2173 2173  4.650926558563436
+2558 2173 -0.1521679999018858
+3734 2173  0.007972301064410235
+791 2174 -45.7592083489813
+2174 2174  4.858814960201284
+2560 2174 -0.5529352148144631
+3738 2174  0.008695987757307259
+1289 2175  5.009999875255914
+2175 2175 -1346.893561357031
+2562 2175 -0.1448367639545626
+3156 2175  0.001178830356129778
+794 2176 -347.1985023913132
+2176 2176  6.114912483766074
+2564 2176 -0.08952016961860668
+3746 2176  0.005211806473068219
+994 2177  4.567303761279918
+2177 2177 -1480.003424652446
+2566 2177 -0.08472478198026717
+3060 2177  0.001920170686230696
+886 2178 -108.7083889381549
+2178 2178  8.096733468146425
+2568 2178  0.1421494758907888
+3754 2178  0.006704128721188893
+995 2179  4.213824656027188
+2179 2179 -1519.469895890335
+2570 2179 -0.07706700524990751
+3174 2179  0.002237503450970262
+1691 2180 -1166.753340022242
+2180 2180  5.680389346472349
+2572 2180  0.1444473955056696
+3762 2180  0.001045466262769684
+1732 2181 -32.32185579454295
+2181 2181  8.680597597954014
+2574 2181  0.2255429562296098
+3766 2181  0.01245643296977996
+1672 2182 -1436.006295719475
+2182 2182  3.127037822968334
+2576 2182 -0.151870667599087
+3770 2182  0.001726283305420655
+872 2183 -313.4339444900776
+2183 2183  7.963542386205262
+2578 2183 -0.06474292354903258
+3774 2183  0.006058149703100444
+877 2184 -300.1393535459937
+2184 2184  5.42788620866588
+2580 2184  0.1133867662473916
+3778 2184  0.004750113430816235
+998 2185  5.878917673856323
+2185 2185 -1428.191459747777
+2582 2185  0.1015023800057121
+3142 2185  0.001139072744127021
+1697 2186 -54.00562981105288
+2186 2186  4.67546375941967
+2584 2186 -0.264586927583474
+3786 2186  0.0140099358624532
+815 2187 -185.2524967505786
+2187 2187  7.522783604978186
+2586 2187  0.03932985943560216
+3790 2187  0.01648814929963461
+839 2188 -128.594774536177
+2188 2188  5.430042622363441
+2588 2188  0.1733751342852024
+3794 2188  0.007739884657506192
+2120 2189 -29.28485320635267
+2189 2189  24.01494552657894
+2590 2189  0.104793236898054
+3798 2189  0.01107704644766072
+2120 2190 -20.84064754660397
+2190 2190  10.64775227529997
+2592 2190  0.1437033518738441
+3802 2190  0.02770173624585955
+2120 2191 -36.23593022245935
+2191 2191  13.36990297318379
+2594 2191  0.112469362197627
+3806 2191  0.01765991222836373
+1716 2192 -94.41087219520271
+2192 2192  8.861752777781344
+2596 2192 -0.07884724198772149
+3810 2192  0.01267346837506577
+2193 2193  5.355974370748386
+2194 2193 -1519.09897684033
+2598 2193 -0.04271857028849171
+3814 2193  0.003114487333054934
+1204 2194  6.185139221633936
+2194 2194 -1656.528852030899
+2600 2194  0.02752220267509899
+2986 2194  0.003695072696773231
+1717 2195 -111.9988300228975
+2195 2195  6.809172415733807
+2602 2195  0.1929820576103272
+3822 2195  0.006392453421471818
+1005 2196  3.09008802429738
+2196 2196 -1596.142391375567
+2604 2196  0.1250493243013468
+3032 2196  0.001883726199993154
+805 2197 -355.4846212830439
+2197 2197  4.499074366596648
+2606 2197  0.1148699713737675
+3830 2197  0.005400958558659237
+2198 2198  4.804865036708031
+2214 2198 -1143.60129135068
+2608 2198 -0.1615176016313629
+3834 2198  0.001065127518678436
+1084 2199  5.806460716784121
+2199 2199 -1753.0575043452
+2610 2199 -0.03688188469150969
+2992 2199  0.003398276739297626
+2052 2200 -35.70217191761186
+2200 2200  7.199818137765514
+2612 2200  0.2532649928913371
+3842 2200  0.01373119140820175
+809 2201 -1506.533916325817
+2201 2201  1.926406935099201
+2614 2201 -0.1982479163957737
+3846 2201  0.001800943508559765
+2202 2202  3.965156627189976
+2267 2202 -1232.001050341556
+2616 2202 -0.1624424533450675
+3850 2202  0.001215285839406951
+810 2203 -52.15381884035941
+2203 2203  31.00680216081683
+2618 2203  0.1742097470834353
+3854 2203  0.003445552970141769
+810 2204 -18.93525889233717
+2204 2204  12.60241045455382
+2620 2204  0.02300115730517048
+3858 2204  0.1319138366683021
+810 2205 -80.38564175531182
+2205 2205  15.41343460037283
+2622 2205  0.034635368896544
+3862 2205  0.02390999892652716
+810 2206 -91.28444390040092
+2206 2206  3.832594921604338
+2624 2206 -0.3911929626725137
+3866 2206  0.007563606211267081
+890 2207 -303.9354889996754
+2207 2207  4.159018631162431
+2626 2207 -0.1883580091188502
+3870 2207  0.004660300321219048
+2208 2208  14.40382534397125
+2209 2208 -1016.038939862732
+2628 2208  0.1530160829243506
+3874 2208  0.0004449483101018156
+1214 2209  3.015252189670739
+2209 2209 -2116.689327493415
+2630 2209 -0.1612767817800078
+3006 2209  0.001058231373611423
+2101 2210 -1043.800098785419
+2210 2210  3.00679695511697
+2632 2210 -0.5300049615928422
+3882 2210  0.0006992246679674015
+1748 2211 -1306.498645974566
+2211 2211  5.149209964509931
+2634 2211 -0.1104715392797194
+3886 2211  0.001476771414936002
+816 2212 -324.405753550522
+2212 2212  4.306287197091149
+2636 2212  0.1611418556604022
+3890 2212  0.004400588120779132
+832 2213 -56.95477588403464
+2213 2213  9.801414691082627
+2638 2213  0.179026380922564
+3894 2213  0.00860441933472752
+1016 2214  2.948617038264048
+2214 2214 -1622.883696919502
+2640 2214  0.1094120254077982
+3170 2214  0.002145009099397701
+1020 2215  14.64089060417686
+2215 2215 -67.84254400099019
+2642 2215  0.1880559437330346
+3016 2215  0.004460848311151768
+1076 2216  8.974758681364245
+2216 2216 -456.729207514713
+2644 2216 -0.05312771678709906
+3130 2216  0.004557092943777746
+2215 2217 -193.8165634075667
+2217 2217  2.541648003046831
+2646 2217 -0.1325528534387045
+3910 2217  0.01450982405114899
+1732 2218 -19.59324371202498
+2218 2218  28.46767710922084
+2648 2218  0.2730372422608475
+3914 2218  0.005157071009933244
+1732 2219 -19.41412540093749
+2219 2219  6.6028144804622
+2650 2219 -0.2765679639928379
+3918 2219  0.02612786147197997
+1732 2220 -18.56084480886999
+2220 2220  4.975973020678023
+2652 2220 -0.4251407410310105
+3922 2220  0.02478823388272888
+1732 2221 -16.81796495425821
+2221 2221  4.220607505708247
+2654 2221 -0.5372060661120606
+3926 2221  0.02142613560740973
+2222 2222  5.614276394514303
+2231 2222 -44.43020643761637
+2656 2222 -0.07388768653768076
+3930 2222  0.0427789483906531
+1732 2223 -44.64235389450946
+2223 2223  3.006008494629031
+2658 2223  0.2566908442587441
+3934 2223  0.0234935444047887
+2052 2224 -34.58560243700298
+2224 2224  8.023915727134943
+2660 2224 -0.275051179212371
+3938 2224  0.01322498499950834
+1716 2225 -38.53172034872582
+2225 2225  13.68747505294833
+2662 2225  0.2617645898062311
+3942 2225  0.005998510819612981
+2226 2226  16.66976710007537
+2231 2226 -18.52397709846915
+2664 2226  0.2828527186687306
+3946 2226  0.007812900721490596
+1228 2227  7.311204366461816
+2227 2227 -1350.897822356165
+2666 2227  0.06450870336342215
+3062 2227  0.001678768597205332
+2228 2228  13.07420285267797
+2231 2228 -20.32550666111602
+2668 2228  0.3020557569469072
+3954 2228  0.009275613422783751
+2229 2229  8.467858774888629
+2231 2229 -31.17736956735129
+2670 2229 -0.4149278867504431
+3958 2229  0.01016168905078159
+2230 2230  7.780959123178358
+2231 2230 -30.83346999517691
+2672 2230 -0.4419310962134438
+3962 2230  0.00996722449751054
+1054 2231  7.508760202322218
+2231 2231 -42.77542700610918
+2674 2231  0.05570908603588222
+3040 2231  0.03948697092465923
+2080 2232 -1428.515318978645
+2232 2232  4.168085658793879
+2676 2232 -0.114079576000751
+3970 2232  0.001677191552912289
+884 2233 -242.1780927811974
+2233 2233  6.255402866825885
+2678 2233 -0.3347632371841684
+3974 2233  0.002027970847867968
+2128 2234 -1303.604343599536
+2234 2234  6.071785803494002
+2680 2234 -0.09046043792228446
+3978 2234  0.001537781149854484
+893 2235 -102.3315584238236
+2235 2235  5.339475922255236
+2682 2235  0.1483671939561078
+3982 2235  0.01108540321534985
+2047 2236 -1333.656774685993
+2236 2236  5.311692012444222
+2684 2236 -0.1158650579574214
+3986 2236  0.001409923238876857
+2052 2237 -38.23756381734963
+2237 2237  6.756753262704435
+2686 2237  0.2229181676645297
+3990 2237  0.01577747778749642
+2227 2238 -1591.552719889067
+2238 2238  2.951842078980999
+2688 2238 -0.1086062631708077
+3994 2238  0.002314762263413514
+1677 2239 -1384.784060943572
+2239 2239  4.313636411491303
+2690 2239  0.09873935578019329
+3998 2239  0.001768071828684483
+1083 2240  3.53773728948649
+2240 2240 -1561.56620828576
+2692 2240 -0.09297121279972072
+3064 2240  0.002309866592158181
+844 2241 -238.5976480186172
+2241 2241  6.391215314600386
+2694 2241  0.1005481075389927
+4006 2241  0.006737745131675257
+875 2242 -316.8133947953895
+2242 2242  7.904228210941027
+2696 2242 -0.1107176446094387
+4010 2242  0.003851572691296312
+2060 2243 -32.29231243943666
+2243 2243  11.09669376957527
+2698 2243  0.1812633458304332
+4014 2243  0.01207958502317402
+2060 2244 -4.08462782159169
+2244 2244  5.359216924491829
+2700 2244 -0.3743197183632451
+4018 2244  0.08054258955875965
+2060 2245 -13.92596672443947
+2245 2245  4.316855635510668
+2702 2245 -0.4207031498800982
+4022 2245  0.03578117245295978
+2060 2246 -14.22224897443281
+2246 2246  4.726445981799073
+2704 2246 -0.3589957638732138
+4026 2246  0.0368359311789389
+2060 2247 -12.94220849005653
+2247 2247  4.245709838291275
+2706 2247 -0.4669064306323872
+4030 2247  0.03116076015736728
+2060 2248 -12.95883236672648
+2248 2248  4.154176556399479
+2708 2248 -0.4733234783700364
+4034 2248  0.03127325765942
+848 2249 -103.0305453695494
+2249 2249  7.399370929771859
+2710 2249  0.1603305174549954
+4038 2249  0.007143262340257282
+848 2250 -123.3658773366547
+2250 2250  4.221315980120219
+2712 2250  0.1358268290729027
+4042 2250  0.01064154757997323
+854 2251 -212.104160974466
+2251 2251  16.56692854796862
+2714 2251  0.08923648311862642
+4046 2251  0.002359860273289677
+858 2252 -160.9760242098117
+2252 2252  1.876776993784172
+2716 2252 -3.455017245353995
+4050 2252  0.001086849801189867
+1784 2253 -92.01703556881691
+2253 2253  3.081655102447302
+2718 2253 -0.1994726820739341
+4054 2253  0.01577200707147027
+1771 2254 -6.210870714987588
+2254 2254  3.707071367433247
+2720 2254 -3.656761234719059
+4058 2254  0.008627090183268422
+1771 2255 -6.513390408795219
+2255 2255  5.200657162878421
+2722 2255 -3.798595591223752
+4062 2255  0.007620531270145355
+1771 2256 -4.640420835507132
+2256 2256  5.533488946326441
+2724 2256 -3.482295420824327
+4066 2256  0.009230750663652134
+1771 2257 -4.738916599895016
+2257 2257  4.168910055156443
+2726 2257 -3.913587096844168
+4070 2257  0.009628675897545392
+1771 2258 -4.118661769723557
+2258 2258  7.127509675415745
+2728 2258 -3.355021230629743
+4074 2258  0.008026467514725346
+1771 2259 -4.457141098502226
+2259 2259  5.54551145364278
+2730 2259 -3.540289155598937
+4078 2259  0.009400950653362484
+1771 2260 -3.898893283827271
+2260 2260  5.889447827116065
+2732 2260 -3.576670360769923
+4082 2260  0.009115319160917641
+877 2261 -337.8295892423019
+2261 2261  3.371992084196615
+2734 2261 -0.1365022792253678
+4086 2261  0.006183100582157816
+2145 2262 -1331.391267344377
+2262 2262  3.419875832853283
+2736 2262 -0.1226762451022061
+4090 2262  0.001982202309389708
+1772 2263 -1422.759825760848
+2263 2263  3.489728875483112
+2738 2263 -0.125616171688122
+4094 2263  0.001722020163455822
+2264 2264  6.35673096494675
+2265 2264 -1285.913026873232
+2740 2264 -0.09093864764125277
+4098 2264  0.001488490442629464
+1270 2265  9.085015566293682
+2265 2265 -1416.901510606822
+2742 2265 -0.04803578302891041
+3118 2265  0.001808133082679544
+872 2266 -374.8291340849071
+2266 2266  9.53046694058189
+2744 2266 -0.03343584586800835
+4106 2266  0.008622786701664917
+1273 2267  4.142922682425776
+2267 2267 -1495.038796569758
+2746 2267  0.09567817198065209
+3124 2267  0.001791661921895718
+886 2268 -125.4140836404946
+2268 2268  14.33869690694082
+2748 2268  0.05102946065995515
+4114 2268  0.009192577866867101
+2216 2269 -272.8671228494698
+2269 2269  8.96941517920289
+2750 2269  0.08226341234101277
+4118 2269  0.004555864744459201
+2175 2270 -1141.695109156205
+2270 2270  7.188740199011378
+2752 2270  0.1479853343471768
+4122 2270  0.0008465650095658808
+1097 2271  9.415073365629931
+2271 2271 -447.3631435643646
+2754 2271  0.0208759382523774
+3172 2271  0.0119801205028469
+1784 2272 -30.20856579477716
+2272 2272  4.497507478986273
+2756 2272 -0.2824387372225564
+4130 2272  0.02492113327360181
+1784 2273 -28.4454237197186
+2273 2273  3.634792913757748
+2758 2273 -0.364407399913329
+4134 2273  0.02142005947727858
+892 2274 -335.1570659698852
+2274 2274  3.326033848414284
+2760 2274 -0.1308230800711792
+4138 2274  0.006100055330023413
+1727 2275 -98.27206692230219
+2275 2275  15.58372198199141
+2762 2275  0.1680855058326074
+4142 2275  0.002971289788738846
+1727 2276 -218.9673369039903
+2276 2276  2.213150142494187
+2764 2276 -0.2411266255595946
+4146 2276  0.009641291447598741
+893 2277 -22.04467690843927
+2277 2277  2.683371796432915
+2766 2277 -3.811722119467089
+4150 2277  0.003455105439651116
+893 2278 -19.15705378024538
+2278 2278  4.758341632139925
+2768 2278 -4.273666966017419
+4154 2278  0.002455886901999718
+2279 2279 -30.64257011981108
+4155 2279  0.1119239523365642
+2280 2280 -15.90706277519701
+4156 2280  0.1344983767680533
+2281 2281 -39.31435081710389
+4157 2281  0.05725605428029161
+2282 2282 -53.45947004812159
+4158 2282  0.05216171918870285
+2283 2283 -12.56288032466136
+4159 2283  0.1539694825415593
+2284 2284 -13.62587967120271
+4160 2284  0.1493781664609394
+2285 2285 -14.84344573266865
+4161 2285  0.1275567881938274
+2286 2286 -17.5348042025758
+4162 2286  0.1221572123415935
+2287 2287 -65.95158912186295
+4163 2287  0.05457593896858125
+2288 2288 -38.93574330129159
+4164 2288  0.06321218681720535
+2289 2289 -46.43966016640264
+4165 2289  0.06186778636566236
+2290 2290 -37.97891276037357
+4166 2290  0.06579250414222838
+2291 2291 -32.44683207965974
+4167 2291  0.07459638214306399
+2292 2292 -39.5091491257876
+4168 2292  0.07089844289995044
+2293 2293 -31.1299517046727
+4169 2293  0.1132257544445178
+2294 2294 -18.48285697955544
+4170 2294  0.1311574879890062
+2295 2295 -13.83350837304568
+4171 2295  0.1250942117543966
+2296 2296 -20.00745980162628
+4172 2296  0.1124796744211208
+2297 2297 -22.76489333995555
+4173 2297  0.1091433013247975
+2298 2298 -23.20868313310848
+4174 2298  0.1068428344512747
+2299 2299 -16.39949055853167
+4175 2299  0.1950373653598296
+2300 2300 -8.433072185518855
+4176 2300  0.234442695629943
+2301 2301 -24.12392710831048
+4177 2301  0.09286348782294103
+2302 2302 -23.44721624588622
+4178 2302  0.09295081632350155
+2303 2303 -18.65225809968672
+4179 2303  0.1160167901159831
+2304 2304 -16.41237497028351
+4180 2304  0.1195639195240917
+2305 2305 -26.1956792336746
+4181 2305  0.08227428276048966
+2306 2306 -36.11162541606858
+4182 2306  0.07476050888274532
+2307 2307 -12.26843876024679
+4183 2307  0.2549964732720557
+2308 2308 -5.60785504010041
+4184 2308  0.3147948877353467
+2309 2309 -12.72775567793986
+4185 2309  0.1499916722253693
+2310 2310 -13.02604478229861
+4186 2310  0.1510934076781992
+2311 2311 -129.8752387098407
+4187 2311  0.02229437531027123
+2312 2312 -86.07489921163659
+4188 2312  0.02512228842644491
+2313 2313 -41.9565272197917
+4189 2313  0.05259461324228189
+2314 2314 -45.97094678900272
+4190 2314  0.05122115361898756
+2315 2315 -33.18669707365662
+4191 2315  0.06314786189660278
+2316 2316 -31.12915756520504
+4192 2316  0.06454617756571765
+2317 2317 -142.3054925634301
+4193 2317  0.02451291342972035
+2318 2318 -68.22390452124144
+4194 2318  0.03023588270107117
+2319 2319 -53.6144848893498
+4195 2319  0.06515053384464752
+2320 2320 -25.16934902030714
+4196 2320  0.08036398379489164
+2321 2321 -16.85797529931353
+4197 2321  0.1207354096769008
+2322 2322 -22.25527628315144
+4198 2322  0.111247620215162
+2323 2323 -31.89341246694178
+4199 2323  0.0598464422176723
+2324 2324 -34.63780504233422
+4200 2324  0.05883639375784257
+2325 2325 -19.56367976914622
+4201 2325  0.1041261644708809
+2326 2326 -18.10501936478283
+4202 2326  0.1059349161469206
+2327 2327 -23.14455148007878
+4203 2327  0.1067434958432255
+2328 2328 -22.69925480783547
+4204 2328  0.107783758246107
+2329 2329 -17.70174015534158
+4205 2329  0.1318433256281249
+2330 2330 -21.45404488617399
+4206 2330  0.1023926470171234
+2331 2331 -17.63733576375392
+4207 2331  0.1174606480262585
+2332 2332 -11.7352471887321
+4208 2332  0.1370406035465139
+2333 2333 -12.13240977183652
+4209 2333  0.2598244585599962
+2334 2334 -7.139021306502358
+4210 2334  0.3011457380652403
+2335 2335 -107.2257743582197
+4211 2335  0.03529553863313488
+2336 2336 -47.10985428696654
+4212 2336  0.04440104766664882
+2337 2337 -99.89329952085217
+4213 2337  0.03807030882715409
+2338 2338 -43.849780178493
+4214 2338  0.04789165040582281
+2339 2339 -82.05099007645858
+4215 2339  0.04523501309428279
+2340 2340 -35.88141193511292
+4216 2340  0.05690513248832715
+2341 2341 -43.98879537092815
+4217 2341  0.08484703303098966
+2342 2342 -18.96707230539594
+4218 2342  0.1067434556159296
+2343 2343 -56.56046644171085
+4219 2343  0.05359243482324032
+2344 2344 -33.41745949104828
+4220 2344  0.06317838635427207
+2345 2345 -30.17627002650658
+4221 2345  0.06964684548079618
+2346 2346 -49.61405821049816
+4222 2346  0.06090938912384517
+2347 2347 -71.73978108877516
+4223 2347  0.05047735368336375
+2348 2348 -36.41609607105951
+4224 2348  0.06065143810652002
+2349 2349 -54.57250563862199
+4225 2349  0.06527171143018294
+2350 2350 -27.55492217173241
+4226 2350  0.07842905683946086
+2351 2351 -141.187307118909
+4227 2351  0.01648867989436317
+2352 2352 -139.724744622845
+4228 2352  0.01645744857919768
+2353 2353 -54.42625553131974
+4229 2353  0.04140589344375343
+2354 2354 -53.63654595730242
+4230 2354  0.04137477168029266
+2355 2355 -72.83364223501292
+4231 2355  0.05024271505377079
+2356 2356 -43.25696538963752
+4232 2356  0.05819292819446779
+2357 2357 -27.39216973415159
+4233 2357  0.08644731178445782
+2358 2358 -27.18217755332223
+4234 2358  0.0881774354285155
+2359 2359 -29.56194702569874
+4235 2359  0.07854956718479483
+2360 2360 -29.09591654321273
+4236 2360  0.08088629591679292
+2361 2361 -49.02162975759376
+4237 2361  0.06196352242558906
+2362 2362 -24.45068261782124
+4238 2362  0.07445393853436093
+2363 2363 -89.42768025930282
+4239 2363  0.02227190700963811
+2364 2364 -93.18051438131391
+4240 2364  0.02210796656752718
+2365 2365 -67.3680095180724
+4241 2365  0.03053677007493292
+2366 2366 -69.2391511776303
+4242 2366  0.03055031791921223
+2367 2367 -9.491059373455291
+4243 2367  0.3227402014045704
+2368 2368 -5.581731936811549
+4244 2368  0.3742462296885684
+2369 2369 -12.98416731930361
+4245 2369  0.1495189977985396
+2370 2370 -16.37502476196387
+4246 2370  0.1396576915609483
+2371 2371 -24.74624389281993
+4247 2371  0.0959600196445011
+2372 2372 -29.26099129371331
+4248 2372  0.09160013610325606
+2373 2373 -116.5520245666077
+4249 2373  0.03238831682088406
+2374 2374 -60.67747015800169
+4250 2374  0.03891597628974513
+2375 2375 -35.17526953063072
+4251 2375  0.1040498288515364
+2376 2376 -17.85544888919775
+4252 2376  0.1250335344962033
+2377 2377 -33.99037882808334
+4253 2377  0.07354424421148734
+2378 2378 -29.8305087797851
+4254 2378  0.07573419554041499
+2379 2379 -71.94298278001283
+4255 2379  0.03740432360218568
+2380 2380 -61.46922915998202
+4256 2380  0.03950027041496559
+2381 2381 -18.66777584361901
+4257 2381  0.1731743135770299
+2382 2382 -11.00044392597633
+4258 2382  0.2006420120224123
+2383 2383 -17.48331613863921
+4259 2383  0.123989508586981
+2384 2384 -17.19494524360543
+4260 2384  0.1249604166473383
+2385 2385 -30.70003257734891
+4261 2385  0.07784203640661237
+2386 2386 -36.85462203202088
+4262 2386  0.07330592731963612
+2387 2387 -47.65157494137991
+4263 2387  0.08028777006742899
+2388 2388 -25.01333265604174
+4264 2388  0.0964743360402416
+2389 2389 -15.75639783117426
+4265 2389  0.2190258722049764
+2390 2390 -8.087445797244316
+4266 2390  0.2633272411692072
+2391 2391 -36.17015837528741
+4267 2391  0.06957909845609414
+2392 2392 -33.28326480640938
+4268 2392  0.07030012238977493
+2393 2393 -26.11298173680401
+4269 2393  0.09973395782747847
+2394 2394 -22.92312256359989
+4270 2394  0.1031681699564553
+2395 2395 -56.97774952449042
+4271 2395  0.05962657655380851
+2396 2396 -33.52104777869483
+4272 2396  0.06906218356084276
+2397 2397 -52.94756993572931
+4273 2397  0.04571341958096362
+2398 2398 -64.26911966105814
+4274 2398  0.04323898123382816
+2399 2399 -26.98015626399983
+4275 2399  0.08386625845421411
+2400 2400 -27.22391958877084
+4276 2400  0.08378225446442003
+2401 2401 -19.76131076987808
+4277 2401  0.1122242528985911
+2402 2402 -16.01743515162783
+4278 2402  0.1188666281484007
+2403 2403 -19.43122435760818
+4279 2403  0.1045633964079625
+2404 2404 -12.23386114811299
+4280 2404  0.1190321852045049
+2405 2405 -73.10925705238337
+4281 2405  0.03275918527410111
+2406 2406 -64.22433096257478
+4282 2406  0.03290823451403464
+2407 2407 -42.25947272070731
+4283 2407  0.05982483436476562
+2408 2408 -42.5796885142234
+4284 2408  0.0599994371975387
+2409 2409 -17.39127207242933
+4285 2409  0.1010965789427608
+2410 2410 -25.68769827203282
+4286 2410  0.08945263200161861
+2411 2411 -26.68291634429617
+4287 2411  0.09794072646952565
+2412 2412 -27.21898062309888
+4288 2412  0.09609845781539586
+2413 2413 -25.76998281146062
+4289 2413  0.09478081423298597
+2414 2414 -24.86767301361041
+4290 2414  0.09682007347625202
+2415 2415 -58.10296868779811
+4291 2415  0.04439899649821998
+2416 2416 -57.88228387547031
+4292 2416  0.04432167070785589
+2417 2417 -22.81351654840187
+4293 2417  0.09795233791467686
+2418 2418 -20.38105920054591
+4294 2418  0.1009310156598904
+2419 2419 -12.73795579528641
+4295 2419  0.1494082769805102
+2420 2420 -14.69078715887743
+4296 2420  0.1477107284697679
+2421 2421 -14.44408841920709
+4297 2421  0.1347570443534146
+2422 2422 -16.85321717357648
+4298 2422  0.1301057826785725
+2423 2423 -9.712758796207535
+4299 2423  0.2109922133663125
+2424 2424 -9.389773414469078
+4300 2424  0.2216757858079083
+2425 2425 -25.11095667227453
+4301 2425  0.1009088790211743
+2426 2426 -20.66188603221291
+4302 2426  0.1077363422537471
+2427 2427 -27.06081236628384
+4303 2427  0.08121814441222064
+2428 2428 -27.07076287154962
+4304 2428  0.08100157620017676
+2429 2429 -34.23717637845797
+4305 2429  0.07146136906804773
+2430 2430 -28.46510147456754
+4306 2430  0.07530760103938473
+2431 2431 -30.28857007982523
+4307 2431  0.0756830656817377
+2432 2432 -15.86269138512577
+4308 2432  0.09186430414464557
+2433 2433 -77.63028018021609
+4309 2433  0.02611063867260898
+2434 2434 -41.22285965712726
+4310 2434  0.03315490509683634
+2435 2435 -75.31457172347281
+4311 2435  0.02697755251935119
+2436 2436 -39.42580673521844
+4312 2436  0.03463901097796575
+2437 2437 -66.76292516277346
+4313 2437  0.02832518569906331
+2438 2438 -35.37814580251727
+4314 2438  0.03595292820557699
+2439 2439 -66.63623857562646
+4315 2439  0.03000021100359908
+2440 2440 -34.79046888868552
+4316 2440  0.03852461104579297
+2441 2441 -70.10959107942233
+4317 2441  0.02505955443137283
+2442 2442 -42.54586097045568
+4318 2442  0.03009113666214551
+2443 2443 -14.59114960552235
+4319 2443  0.1104038318847466
+2444 2444 -25.00017435761698
+4320 2444  0.09409028096243087
+2445 2445 -17.87480776263565
+4321 2445  0.1082117185579281
+2446 2446 -11.25720746291567
+4322 2446  0.1246628009126823
+2447 2447 -65.97984348957961
+4323 2447  0.04180879219292764
+2448 2448 -41.15873081988094
+4324 2448  0.0481218504922679
+2449 2449 -20.94431514667631
+4325 2449  0.1133121138548909
+2450 2450 -15.92440412543586
+4326 2450  0.1211892095724366
+2451 2451 -14.55529250850832
+4327 2451  0.1319492973893008
+2452 2452 -15.05159484553243
+4328 2452  0.1295603694183773
+2453 2453 -15.49210742892535
+4329 2453  0.1237672779717439
+2454 2454 -16.86689236335236
+4330 2454  0.1190191866407344
+2455 2455 -16.12647392997589
+4331 2455  0.1234473604947949
+2456 2456 -18.07799191201674
+4332 2456  0.1170303315506402
+2457 2457 -87.48075773456222
+4333 2457  0.03099330087280881
+2458 2458 -63.51574285277481
+4334 2458  0.03389257841306987
+2459 2459 -52.8716550775216
+4335 2459  0.05172445215334955
+2460 2460 -43.40343426461138
+4336 2460  0.05430350869804322
+2461 2461 -26.738027357087
+4337 2461  0.09424770419016756
+2462 2462 -21.96349363409914
+4338 2462  0.09826661486038374
+2463 2463 -50.66661535986736
+4339 2463  0.0462401459381085
+2464 2464 -50.64878861975378
+4340 2464  0.04627964277352791
+2465 2465 -51.18764721528638
+4341 2465  0.05197455058635655
+2466 2466 -52.99960506788916
+4342 2466  0.0500773202512933
+2467 2467 -34.82199055582354
+4343 2467  0.0720527625538885
+2468 2468 -34.54423303070557
+4344 2468  0.07323693626014423
+2469 2469 -35.71241339576211
+4345 2469  0.06963929261539442
+2470 2470 -35.52512736400956
+4346 2470  0.06931508368711226
+2471 2471 -20.01915962930323
+4347 2471  0.09169519061656187
+2472 2472 -30.22720745240589
+4348 2472  0.08112062356808004
+2473 2473 -57.69560648218724
+4349 2473  0.04719669980974917
+2474 2474 -40.73513928545416
+4350 2474  0.05210915501845393
+2475 2475 -77.65714066116759
+4351 2475  0.0313882444475996
+2476 2476 -94.69922254722279
+4352 2476  0.02988204032213718
+2477 2477 -17.51506605655669
+4353 2477  0.1182734942483623
+2478 2478 -21.37522400115791
+4354 2478  0.1116440029982784
+2479 2479 -28.31087433954066
+4355 2479  0.07849826606941687
+2480 2480 -15.06418166640937
+4356 2480  0.09515591336471275
+2481 2481 -112.4426933973505
+4357 2481  0.02425323629999322
+2482 2482 -114.7794462571026
+4358 2482  0.02374202363688017
+2483 2483 -1058.66302267636
+4359 2483  0.003336217141762166
+2484 2484 -20.57374819150079
+4360 2484  0.1002942709937806
+2485 2485 -14.42988051776977
+4361 2485  0.1045980986716633
+2486 2486 -7.306137981052756
+4362 2486  0.1450248547856023
+2487 2487 -19.0892306599637
+4363 2487  0.07893767286330232
+2488 2488 -9.897416463906245
+4364 2488  0.1110355041530463
+2489 2489 -14.15628764799962
+4365 2489  0.1056165886407372
+2490 2490 -7.445942181911135
+4366 2490  0.1469110448440436
+2491 2491 -17.88569115961042
+4367 2491  0.0844548826318236
+2492 2492 -9.352570774505041
+4368 2492  0.1179620616930775
+2493 2493 -12.90605807444327
+4369 2493  0.1163895966138301
+2494 2494 -6.780155483318999
+4370 2494  0.1619022871289025
+2495 2495 -12.84963493329398
+4371 2495  0.1179126051390297
+2496 2496 -6.754664202360627
+4372 2496  0.163943606815921
+2497 2497 -17.05542913502588
+4373 2497  0.09281544971650256
+2498 2498 -8.949835317480224
+4374 2498  0.1291673121601658
+2499 2499 -13.22126092554109
+4375 2499  0.1091027386322047
+2500 2500 -17.94565770436443
+4376 2500  0.09868634510820998
+2501 2501 -88.03377071956065
+4377 2501  0.03138171724828276
+2502 2502 -88.64391292782304
+4378 2502  0.03107201824492463
+2503 2503 -246.2252886607093
+4379 2503  0.007046922610030895
+2504 2504 -109.1007957172354
+4380 2504  0.01101660630146964
+2505 2505 -142.1356981120592
+4381 2505  0.01058157400013313
+2506 2506 -91.07653645375166
+4382 2506  0.01148750825962296
+2507 2507 -290.0185142498619
+4383 2507  0.006443500536982267
+2508 2508 -83.51485409842199
+4384 2508  0.01548335127494408
+2509 2509 -311.0119847471854
+4385 2509  0.00597458391786553
+2510 2510 -95.15381296120231
+4386 2510  0.01356286245237532
+2511 2511 -26.58818103268005
+4387 2511  0.08067632449851583
+2512 2512 -15.77787777432359
+4388 2512  0.09279367829967604
+2513 2513 -28.09789857924424
+4389 2513  0.05352478163276006
+2514 2514 -51.51652538609446
+4390 2514  0.04290798273430511
+2515 2515 -41.49391694494913
+4391 2515  0.06249995882762888
+2516 2516 -27.01589354302817
+4392 2516  0.07116953203564343
+2517 2517 -38.65350036810663
+4393 2517  0.0629850732263631
+2518 2518 -39.56708513242784
+4394 2518  0.06201074516854403
+2519 2519 -18.86848979808568
+4395 2519  0.1388172847349383
+2520 2520 -16.18698208267388
+4396 2520  0.1387155232393041
+2521 2521 -60.01674254260863
+4397 2521  0.04087687055194109
+2522 2522 -59.89391025874022
+4398 2522  0.0408158184845995
+2523 2523 -14.2607056227518
+4399 2523  0.16752597842276
+2524 2524 -9.060706229208979
+4400 2524  0.190051566204261
+2525 2525 -16.28650106020694
+4401 2525  0.1386676867121169
+2526 2526 -16.75458882036434
+4402 2526  0.1357012594441545
+2527 2527 -16.27270670749873
+4403 2527  0.1392440681462813
+2528 2528 -15.92119246330756
+4404 2528  0.1406647778145255
+2529 2529 -28.21287469737472
+4405 2529  0.09441858750058517
+2530 2530 -21.15680116671571
+4406 2530  0.1006087923147142
+2531 2531 -43.60043341376043
+4407 2531  0.06143761520357147
+2532 2532 -43.93633027014358
+4408 2532  0.06133403882792478
+2533 2533 -49.07486674953635
+4409 2533  0.04699650969640473
+2534 2534 -43.91999193831722
+4410 2534  0.04890819174360618
+2535 2535 -67.19582273963222
+4411 2535  0.04211376285307712
+2536 2536 -47.86048355346968
+4412 2536  0.04615381541177109
+2537 2537 -25.60045733925898
+4413 2537  0.08601398163729794
+2538 2538 -22.11505592822052
+4414 2538  0.09020858091390621
+2539 2539 -17.94022473260717
+4415 2539  0.1174371003592423
+2540 2540 -25.43801423175152
+4416 2540  0.1054437328415359
+2541 2541 -10.89812561676288
+4417 2541  0.1422836968177371
+2542 2542 -18.98202374381732
+4418 2542  0.1201353281558331
+2543 2543 -18.02447912466057
+4419 2543  0.1106512982331891
+2544 2544 -18.03362624973672
+4420 2544  0.1111624363988149
+2545 2545 -21.9629176261085
+4421 2545  0.1157619489239656
+2546 2546 -15.83184549062703
+4422 2546  0.1272888309498838
+2547 2547 -27.27913987608121
+4423 2547  0.07736596157094124
+2548 2548 -31.03660494689432
+4424 2548  0.07480348354180001
+2549 2549 -137.3561110785759
+4425 2549  0.01358184067264488
+2550 2550 -34.52648286375157
+4426 2550  0.03737066957094008
+2551 2551 -273.003320275044
+4427 2551  0.008364473049625471
+2552 2552 -63.02258634203405
+4428 2552  0.02403448844219899
+2553 2553 -270.3252397919941
+4429 2553  0.008639804623062942
+2554 2554 -58.78525414828648
+4430 2554  0.02625639288774997
+2555 2555 -14.91535393188155
+4431 2555  0.1161527301577223
+2556 2556 -22.95107272924469
+4432 2556  0.1020323879042606
+2557 2557 -8.882219892636956
+4433 2557  0.2014975528639363
+2558 2558 -8.38327245565061
+4434 2558  0.1990940038546843
+2559 2559 -19.7641779034735
+4435 2559  0.08443225770800758
+2560 2560 -13.85918199986529
+4436 2560  0.1060523235832099
+2561 2561 -29.84297186502756
+4437 2561  0.0646071730806282
+2562 2562 -41.37136670068732
+4438 2562  0.05882715775282683
+2563 2563 -24.74678079539619
+4439 2563  0.08826574845857212
+2564 2564 -25.30084569767662
+4440 2564  0.08591479678730043
+2565 2565 -20.16790931364713
+4441 2565  0.1044415334241497
+2566 2566 -25.56596459955379
+4442 2566  0.09489523920199339
+2567 2567 -29.25612917202748
+4443 2567  0.07681065950214608
+2568 2568 -25.80465236855202
+4444 2568  0.08001928892924734
+2569 2569 -59.28218084687754
+4445 2569  0.04278093838414154
+2570 2570 -59.00633582364991
+4446 2570  0.04268963280680383
+2571 2571 -18.95360574513186
+4447 2571  0.1336334529360571
+2572 2572 -19.27413566638218
+4448 2572  0.1312746239509822
+2573 2573 -23.32199475871141
+4449 2573  0.1095862115528146
+2574 2574 -13.51862385101003
+4450 2574  0.1284114154007702
+2575 2575 -19.10980418333428
+4451 2575  0.130697379255608
+2576 2576 -18.85936884549872
+4452 2576  0.1298008120879252
+2577 2577 -23.31393913523488
+4453 2577  0.08185794921773322
+2578 2578 -30.79418778750476
+4454 2578  0.07247955898383203
+2579 2579 -16.9883469839645
+4455 2579  0.1099319749948935
+2580 2580 -21.187092922521
+4456 2580  0.1047819410896658
+2581 2581 -26.29693971768204
+4457 2581  0.07632832587451829
+2582 2582 -38.95148117385504
+4458 2582  0.06826513546331373
+2583 2583 -52.08330111375977
+4459 2583  0.05548920412162031
+2584 2584 -33.84723329190945
+4460 2584  0.06242960062044907
+2585 2585 -43.32944578653592
+4461 2585  0.04632685940212
+2586 2586 -28.83073740675175
+4462 2586  0.07312501103900995
+2587 2587 -29.85090627723714
+4463 2587  0.07032430974327428
+2588 2588 -33.11942355235149
+4464 2588  0.06854366154651186
+2589 2589 -28.31376341779287
+4465 2589  0.07592649786773957
+2590 2590 -24.52008198179582
+4466 2590  0.08048544405713381
+2591 2591 -707.7212790741364
+4467 2591  0.004938393551872891
+2592 2592 -8.884240395089975
+4468 2592  0.1827766081729927
+2593 2593 -1074.723247601268
+4469 2593  0.003511947818170114
+2594 2594 -13.01900306964629
+4470 2594  0.1360779090415198
+2595 2595 -13.60038854703725
+4471 2595  0.1452791181060122
+2596 2596 -16.14500136718066
+4472 2596  0.1289123528531947
+2597 2597 -23.41697572970584
+4473 2597  0.08873046260064339
+2598 2598 -36.39820196128922
+4474 2598  0.07358879924577265
+2599 2599 -128.6511580023181
+4475 2599  0.02249697437083394
+2600 2600 -45.41658912759856
+4476 2600  0.06015545646416934
+2601 2601 -19.71178008061172
+4477 2601  0.1149408597814195
+2602 2602 -17.36290067611262
+4478 2602  0.1197299143217263
+2603 2603 -16.65598169709429
+4479 2603  0.1329399661696483
+2604 2604 -20.42677738021681
+4480 2604  0.1268622654461261
+2605 2605 -33.48699900983425
+4481 2605  0.07083316622215098
+2606 2606 -33.89059592051035
+4482 2606  0.07092249337821212
+2607 2607 -58.01692328951272
+4483 2607  0.04313215890526797
+2608 2608 -57.58794486792134
+4484 2608  0.0430707185925129
+2609 2609 -40.94469844121115
+4485 2609  0.05828891131718438
+2610 2610 -52.06987938902305
+4486 2610  0.05207878693856764
+2611 2611 -33.06151549792124
+4487 2611  0.08502397913127217
+2612 2612 -20.45953083972572
+4488 2612  0.09762487990214475
+2613 2613 -17.17816150141144
+4489 2613  0.08664546140173429
+2614 2614 -33.21629444772113
+4490 2614  0.07206662686403367
+2615 2615 -44.93527974087805
+4491 2615  0.06021960728449145
+2616 2616 -44.99723518348887
+4492 2616  0.06014048126963593
+2617 2617 -15.0104256461242
+4493 2617  0.1400842568053372
+2618 2618 -13.86141918235845
+4494 2618  0.1407784681908091
+2619 2619 -663.9661521913164
+4495 2619  0.004590118292260531
+2620 2620 -49.90751676561297
+4496 2620  0.03278132812471792
+2621 2621 -1427.055298280313
+4497 2621  0.002578629170411114
+2622 2622 -39.91317791668997
+4498 2622  0.04725418204370446
+2623 2623 -30.30829080397546
+4499 2623  0.06943073957133265
+2624 2624 -29.90431021967977
+4500 2624  0.06940166048736073
+2625 2625 -13.47525275108809
+4501 2625  0.1507981659667463
+2626 2626 -15.12006951497744
+4502 2626  0.1445519709377717
+2627 2627 -13.56000062513
+4503 2627  0.1474714715736538
+2628 2628 -20.92338281670835
+4504 2628  0.1250083610670657
+2629 2629 -11.29250264561315
+4505 2629  0.1540455285343737
+2630 2630 -19.82430901439865
+4506 2630  0.1316779216956888
+2631 2631 -41.79788680020079
+4507 2631  0.04887673082053536
+2632 2632 -58.51214685233085
+4508 2632  0.04405790634343706
+2633 2633 -20.04263715626028
+4509 2633  0.09783075276190294
+2634 2634 -28.57642216065699
+4510 2634  0.08758746566650595
+2635 2635 -21.49012601492517
+4511 2635  0.1127304462248533
+2636 2636 -17.88198588321188
+4512 2636  0.1198306861816738
+2637 2637 -18.25413748051501
+4513 2637  0.1179404500054688
+2638 2638 -14.48573493618683
+4514 2638  0.1276826221232589
+2639 2639 -44.0223952034774
+4515 2639  0.04925506294141044
+2640 2640 -52.48104620642393
+4516 2640  0.04668295011547591
+2641 2641 -14.92342134195389
+4517 2641  0.1449032831622934
+2642 2642 -13.27446957183143
+4518 2642  0.1500180902974244
+2643 2643 -18.49427815385324
+4519 2643  0.1156917693131826
+2644 2644 -26.21942302703896
+4520 2644  0.08878444770835722
+2645 2645 -7.39806028426747
+4521 2645  0.2660666859811378
+2646 2646 -7.707636052561853
+4522 2646  0.2523400427959562
+2647 2647 -8.939428394535847
+4523 2647  0.2062107927551356
+2648 2648 -8.642130280788454
+4524 2648  0.1973401445684108
+2649 2649 -14.50623659110181
+4525 2649  0.1168486283772575
+2650 2650 -8.772955710261652
+4526 2650  0.1583170028860684
+2651 2651 -11.53775096265082
+4527 2651  0.1436319844485981
+2652 2652 -6.502242278141505
+4528 2652  0.2063030559582233
+2653 2653 -8.679115879554525
+4529 2653  0.1986493661779628
+2654 2654 -5.239025768901889
+4530 2654  0.268512450670337
+2655 2655 -21.47326559208629
+4531 2655  0.08198335751273234
+2656 2656 -21.61383762226636
+4532 2656  0.0802614495631593
+2657 2657 -49.96777929661594
+4533 2657  0.04473833858653953
+2658 2658 -36.67569230448201
+4534 2658  0.04934649227872699
+2659 2659 -11.88600697816776
+4535 2659  0.1622656440695301
+2660 2660 -10.90493013046683
+4536 2660  0.1644914901176733
+2661 2661 -13.12946138104847
+4537 2661  0.1971617765865387
+2662 2662 -8.410110269164797
+4538 2662  0.2246878379541619
+2663 2663 -12.29662737304278
+4539 2663  0.1653528402871484
+2664 2664 -10.10870496831491
+4540 2664  0.1808539471990467
+2665 2665 -39.02857243546901
+4541 2665  0.06547653177229835
+2666 2666 -33.70459416258378
+4542 2666  0.07668646750391619
+2667 2667 -11.53800954208228
+4543 2667  0.1656395711946458
+2668 2668 -10.30563379190454
+4544 2668  0.1681816235944119
+2669 2669 -22.73592311892862
+4545 2669  0.08371453213203854
+2670 2670 -13.20295306002752
+4546 2670  0.1192293447602497
+2671 2671 -19.99502311700704
+4547 2671  0.0949481452932414
+2672 2672 -12.1403872692989
+4548 2672  0.1305019717232
+2673 2673 -43.0743841807851
+4549 2673  0.05387167080053495
+2674 2674 -30.09884070379308
+4550 2674  0.06126472914941399
+2675 2675 -15.59699735883546
+4551 2675  0.1207157634492268
+2676 2676 -23.95216626455397
+4552 2676  0.1059158657084649
+2677 2677 -17.76723538490329
+4553 2677  0.1076828917326143
+2678 2678 -22.22333545208209
+4554 2678  0.1005416445063582
+2679 2679 -28.65423870847926
+4555 2679  0.08533876965342507
+2680 2680 -29.62058200277956
+4556 2680  0.08217527931971953
+2681 2681 -23.10572017615622
+4557 2681  0.1008750823603064
+2682 2682 -14.85325187087084
+4558 2682  0.1147271773647323
+2683 2683 -21.68107710891621
+4559 2683  0.09924864616415091
+2684 2684 -30.5213668411177
+4560 2684  0.09003795816713295
+2685 2685 -20.55827331727173
+4561 2685  0.1061597115028363
+2686 2686 -17.63060893722474
+4562 2686  0.1112383961276221
+2687 2687 -25.61127524716377
+4563 2687  0.09414695009113042
+2688 2688 -25.88133772095298
+4564 2688  0.09312878048140187
+2689 2689 -40.2130645920106
+4565 2689  0.06503883056206027
+2690 2690 -40.5384810644213
+4566 2690  0.06520091326552124
+2691 2691 -45.4076676012495
+4567 2691  0.05438011932993274
+2692 2692 -45.32463217142699
+4568 2692  0.05424499968998244
+2693 2693 -741.2098311583648
+4569 2693  0.004642257136293556
+2694 2694 -12.77174729620695
+4570 2694  0.1561461065838448
+2695 2695 -25.52026560286867
+4571 2695  0.09110300396209042
+2696 2696 -25.19976764447012
+4572 2696  0.08985858246964482
+2697 2697 -13.36559398752397
+4573 2697  0.1369403680143674
+2698 2698 -10.93453152881607
+4574 2698  0.1443467202436076
+2699 2699 -10.33116067419111
+4575 2699  0.1444965096904146
+2700 2700 -5.232280705035966
+4576 2700  0.2001302970724999
+2701 2701 -12.42379991806771
+4577 2701  0.1169753107410663
+2702 2702 -6.810968169618136
+4578 2702  0.1692496580266484
+2703 2703 -13.59626784548458
+4579 2703  0.1070580624084518
+2704 2704 -7.821157425305699
+4580 2704  0.1489024263563629
+2705 2705 -10.76106835441625
+4581 2705  0.1389339606569908
+2706 2706 -6.232406029477263
+4582 2706  0.1923766534637027
+2707 2707 -10.60859259802773
+4583 2707  0.1408475375569699
+2708 2708 -6.159803642957836
+4584 2708  0.1946165869364156
+2709 2709 -29.83696730878977
+4585 2709  0.08143798523629654
+2710 2710 -21.66867981200853
+4586 2710  0.08990418420725502
+2711 2711 -15.45716843571818
+4587 2711  0.1409949336938461
+2712 2712 -13.42102810671431
+4588 2712  0.1482421057059079
+2713 2713 -36.06244598983877
+4589 2713  0.07108280634794585
+2714 2714 -30.36544494899079
+4590 2714  0.07302488343650103
+2715 2715 -415.0261506058654
+4591 2715  0.005220165246689828
+2716 2716 -107.7628213452974
+4592 2716  0.0177286273434418
+2717 2717 -17.08211902970442
+4593 2717  0.1103339245794259
+2718 2718 -14.32959261684598
+4594 2718  0.1169847200870831
+2719 2719 -297.1502631714062
+4595 2719  0.008577244581252723
+2720 2720 -46.29589079798393
+4596 2720  0.03050852001168283
+2721 2721 -274.346995221428
+4597 2721  0.007474666263219379
+2722 2722 -41.62150383812094
+4598 2722  0.0299510636579375
+2723 2723 -269.482318837368
+4599 2723  0.008518265423418114
+2724 2724 -40.36292797655138
+4600 2724  0.03145582454813847
+2725 2725 -262.4871457333651
+4601 2725  0.009163617744673737
+2726 2726 -43.50730408335345
+4602 2726  0.03066175704966475
+2727 2727 -273.5041186801383
+4603 2727  0.008500565636813628
+2728 2728 -40.20028642953007
+4604 2728  0.03197867846257434
+2729 2729 -261.5760375061137
+4605 2729  0.008721338342122487
+2730 2730 -39.17187343467762
+4606 2730  0.03218983559492441
+2731 2731 -248.2417586977549
+4607 2731  0.009416820658209826
+2732 2732 -37.08017309753349
+4608 2732  0.03478080575779452
+2733 2733 -8.502233418711862
+4609 2733  0.1630576386497255
+2734 2734 -14.15927832194173
+4610 2734  0.1397777235897797
+2735 2735 -33.36501978941855
+4611 2735  0.07450425080229808
+2736 2736 -32.85653123541292
+4612 2736  0.07431869114522616
+2737 2737 -17.10162207087117
+4613 2737  0.1026547557561215
+2738 2738 -26.32253245363182
+4614 2738  0.09073387960505001
+2739 2739 -65.3094506908532
+4615 2739  0.03251041922797052
+2740 2740 -92.13433902811157
+4616 2740  0.02963281885079689
+2741 2741 -58.09827276913884
+4617 2741  0.04067578976053766
+2742 2742 -71.50178838039953
+4618 2742  0.03790745656462562
+2743 2743 -51.64006323574402
+4619 2743  0.05000150737058116
+2744 2744 -49.57879692739116
+4620 2744  0.04572886699134029
+2745 2745 -38.8690834748674
+4621 2745  0.06781434452399084
+2746 2746 -38.73649388997055
+4622 2746  0.06815527408168873
+2747 2747 -30.07499672096169
+4623 2747  0.07359845373321169
+2748 2748 -26.6892215368137
+4624 2748  0.0772865214414882
+2749 2749 -19.69795854475236
+4625 2749  0.110682745736426
+2750 2750 -16.419717428569
+4626 2750  0.1291532255596348
+2751 2751 -48.1812508688813
+4627 2751  0.04269896374187671
+2752 2752 -68.5724907725667
+4628 2752  0.0390336607619539
+2753 2753 -91.97145850968209
+4629 2753  0.02315332863810588
+2754 2754 -67.81117594819983
+4630 2754  0.03413086015654149
+2755 2755 -12.03075557160653
+4631 2755  0.137818520958807
+2756 2756 -7.305149508872643
+4632 2756  0.1812916553406337
+2757 2757 -8.650252245975691
+4633 2757  0.2024915552993207
+2758 2758 -5.254056246815064
+4634 2758  0.2652537536970932
+2759 2759 -14.74425618462372
+4635 2759  0.1248354377365412
+2760 2760 -18.14750969243607
+4636 2760  0.1161671612051354
+2761 2761 -12.8677934052044
+4637 2761  0.1622679931288024
+2762 2762 -12.778764953814
+4638 2762  0.1671516006457047
+2763 2763 -9.449827385174531
+4639 2763  0.246228453637452
+2764 2764 -5.864406879133139
+4640 2764  0.3172743251153573
+2765 2765 -252.032563895863
+4641 2765  0.009473364208539475
+2766 2766 -37.38065612259518
+4642 2766  0.04601859096082538
+2767 2767 -370.6027942610422
+4643 2767  0.006024175975105252
+2768 2768 -99.30572095390851
+4644 2768  0.01699700550635434
+87 2769  0.191132249227754
+2769 2769  2.047051335767824
+88 2770  0.188455821489309
+2770 2770  2.067610228210245
+89 2771  0.1928444447454806
+2771 2771  2.048523350808737
+90 2772  0.1839664735152792
+2772 2772  2.055941963685915
+91 2773  0.1930767719574465
+2773 2773  2.03082325966889
+92 2774  0.188272051984764
+2774 2774  2.065517700723633
+99 2775  3.290947783257571e-05
+2775 2775 -0.0006120966867165351
+3176 2775 -101.6692445633731
+3179 2775 -116.617845527334
+3184 2775 -121.900978849199
+3188 2775 -122.5206702801755
+499 2776  2.969991746869057e-05
+2776 2776 -0.0006211735898036456
+3178 2776 -116.254058426669
+3181 2776 -94.70154811534771
+3186 2776 -128.7151420873748
+3190 2776 -134.6101055831586
+100 2777  0.002413822826166146
+300 2777 -0.0448953457699415
+2777 2777 -16421.91141900974
+500 2778  0.002140785332416097
+700 2778 -0.04477438386005135
+2778 2778 -18425.43685640642
+101 2779  7.669175073506544e-05
+2779 2779 -0.001427149194237635
+3192 2779 -301.3545137610665
+3196 2779 -265.5995294516822
+3199 2779 -304.5398217309754
+501 2780  6.257938167652743e-05
+701 2780 -0.001308882018352955
+2780 2780 -343.1905811392148
+3194 2780 -304.647627311909
+3198 2780 -303.6755998197619
+102 2781  0.002334975615736897
+302 2781 -0.04345125747104736
+2781 2781 -17152.4293534602
+502 2782  0.002070345414517271
+702 2782 -0.04330235585374924
+2782 2782 -19263.39666857488
+103 2783  7.862767004737615e-05
+2783 2783 -0.00146533094654159
+3204 2783 -275.381526703475
+3208 2783 -305.9428524547798
+3211 2783 -246.424319230538
+503 2784  6.598718961816696e-05
+2784 2784 -0.001380044009109575
+3206 2784 -287.8253845063738
+3210 2784 -310.99582024153
+3213 2784 -287.1737937003262
+104 2785  0.002444212485053505
+304 2785 -0.04555077314529363
+2785 2785 -15990.46775417401
+504 2786  0.002169195438705725
+704 2786 -0.04536606994919442
+2786 2786 -17954.06452194431
+105 2787  3.312742415530926e-05
+2787 2787 -0.000615931807560431
+3216 2787 -103.6735421877064
+3219 2787 -127.348789895109
+3223 2787 -121.9718452523159
+3227 2787 -116.5170618756044
+505 2788  2.796488776981597e-05
+2788 2788 -0.0005847977484658657
+3218 2788 -108.9523767055269
+3221 2788 -123.8024931489951
+3225 2788 -136.3741855526524
+3229 2788 -124.0620405337453
+106 2789  0.002446060487080032
+306 2789 -0.04547802485000561
+2789 2789 -16927.29847060978
+506 2790  0.002169609219361102
+706 2790 -0.04537018025868264
+2790 2790 -18984.56010772376
+107 2791  1.85390023051578e-05
+2791 2791 -0.0003447400910227232
+3232 2791 -45.61538669592837
+3235 2791 -53.76846535636626
+3240 2791 -64.6965009865016
+3243 2791 -71.39421959235987
+3247 2791 -60.66237248504986
+507 2792  1.542323564781866e-05
+2792 2792 -0.0003225540370318406
+3234 2792 -47.01189367725382
+3237 2792 -52.95894194050124
+3242 2792 -93.79563538032262
+3245 2792 -50.07992223279546
+3249 2792 -72.94981475825553
+108 2793  0.002547216874358393
+308 2793 -0.04736458693151107
+2793 2793 -15651.87407075314
+508 2794  0.002258865500111695
+708 2794 -0.04724019321521711
+2794 2794 -17562.01775281531
+109 2795  1.897510509495169e-05
+2795 2795 -0.0003535062191017872
+3252 2795 -69.64168157022513
+3256 2795 -55.76240657485057
+3259 2795 -48.65659892814862
+3263 2795 -54.80645079899097
+3268 2795 -57.37217754133817
+509 2796  1.584972253664808e-05
+2796 2796 -0.0003315309935129994
+3254 2796 -71.48205026841438
+3258 2796 -57.81317230630791
+3261 2796 -52.52014968113491
+3265 2796 -58.44422004546879
+3270 2796 -57.85806956991055
+110 2797  0.002159979334399842
+310 2797 -0.04024041627283683
+2797 2797 -19147.60457802069
+510 2798  0.001923496708434376
+710 2798 -0.04023406550441066
+2798 2798 -21348.38058440163
+111 2799  0.002343469592596242
+311 2799 -0.04365875661862523
+2799 2799 -16807.87112981469
+511 2800  0.002080184625715104
+711 2800 -0.04351148837855855
+2800 2800 -18855.823309453
+112 2801  6.912678178860192e-05
+2801 2801 -0.001288430244282178
+3272 2801 -242.9398363399226
+3275 2801 -314.0609448710903
+3279 2801 -417.45542835111
+512 2802  5.363623218723445e-05
+2802 2802 -0.001121798254310583
+3274 2802 -277.9291057772549
+3277 2802 -407.8543128053418
+3281 2802 -403.2701195876705
+113 2803  8.227547212772366e-05
+2803 2803 -0.001533427005586317
+3260 2803 -278.7531515404145
+3271 2803 -288.936297706273
+3284 2803 -284.6866218388363
+513 2804  6.526480001887723e-05
+2804 2804 -0.001364992385095566
+3262 2804 -281.3097209478202
+3273 2804 -403.3712790108879
+3286 2804 -277.119091147535
+114 2805  0.002514571520234397
+314 2805 -0.04686420874278824
+2805 2805 -16211.02934024309
+514 2806  0.002231758090236919
+714 2806 -0.04667596022758146
+2806 2806 -18200.17011809114
+115 2807  1.36295453496316e-05
+315 2807 -0.0002535975426531655
+2807 2807 -28.105045711462
+3288 2807 -36.67269099216028
+3292 2807 -38.41647162556927
+3296 2807 -38.48702554204165
+3300 2807 -31.51609232707513
+3307 2807 -33.11136834639819
+515 2808  1.108814355667258e-05
+715 2808 -0.0002318355913094673
+2808 2808 -38.08156604778259
+3290 2808 -36.88282535114031
+3294 2808 -38.52362543915015
+3298 2808 -38.66505399025309
+3302 2808 -31.6350585093486
+3309 2808 -30.88445273847646
+116 2809  0.002351560480566124
+316 2809 -0.04375416605988374
+2809 2809 -16956.49425532315
+516 2810  0.002084868727865555
+716 2810 -0.04359131642342973
+2810 2810 -19050.40909074716
+117 2811  0.002323346228192606
+317 2811 -0.04322919930108315
+2811 2811 -17479.24164685459
+517 2812  0.00206178424314167
+717 2812 -0.04310861514976851
+2812 2812 -19602.52751313596
+118 2813  0.00233327829315433
+318 2813 -0.04341399965950225
+2813 2813 -17627.34904124655
+518 2814  0.002069382114290299
+718 2814 -0.04326749513063304
+2814 2814 -19790.86619941803
+119 2815  0.002498796266540171
+319 2815 -0.04649354450491961
+2815 2815 -15611.76874269715
+519 2816  0.00221663957128552
+719 2816 -0.04634637060467815
+2816 2816 -17520.9066424914
+120 2817  3.27326552556781e-05
+2817 2817 -0.0006096456996633435
+3312 2817 -110.9788010398832
+3316 2817 -110.6632721880846
+3319 2817 -111.0869138886556
+3324 2817 -110.4746806445262
+520 2818  2.660882184061133e-05
+720 2818 -0.0005566159660873793
+2818 2818 -153.3239932480942
+3314 2818 -111.4646540333997
+3318 2818 -111.1745117377567
+3326 2818 -103.4708021967054
+121 2819  0.002315859228091882
+321 2819 -0.04313281610466401
+2819 2819 -17332.50264229675
+521 2820  0.002054509126465163
+721 2820 -0.04297719715336602
+2820 2820 -19459.2097238253
+122 2821  0.002331553298144831
+322 2821 -0.04342507632571436
+2821 2821 -17422.49927394747
+522 2822  0.00206825652931222
+722 2822 -0.04326473059133334
+2822 2822 -19563.40178710883
+123 2823  7.851640521308691e-05
+2823 2823 -0.001463440545769149
+3328 2823 -312.1352460882863
+3331 2823 -271.7302916287101
+3336 2823 -268.005427632543
+523 2824  6.153966619548327e-05
+723 2824 -0.001286901060450809
+2824 2824 -327.8415125728117
+3330 2824 -298.7359064250596
+3338 2824 -326.2824303807073
+124 2825  0.002317790327734316
+324 2825 -0.04320045332535793
+2825 2825 -17020.79035452148
+524 2826  0.002059372058053505
+724 2826 -0.04306501782502264
+2826 2826 -19072.9824263394
+125 2827  3.083171649862372e-05
+2827 2827 -0.0005742502993288677
+3320 2827 -106.5673296823539
+3340 2827 -123.2291839880428
+3343 2827 -104.0178710348266
+3347 2827 -120.5778323373125
+525 2828  2.43735293056645e-05
+2828 2828 -0.0005098568126613753
+3322 2828 -142.2046129288638
+3342 2828 -119.199332372214
+3345 2828 -131.6376658199634
+3349 2828 -101.2983170663943
+126 2829  0.002195503166803161
+326 2829 -0.04089185063854536
+2829 2829 -19127.32781891363
+526 2830  0.001949465632114935
+726 2830 -0.04077980858523558
+2830 2830 -21438.05305311238
+127 2831  7.861266506825708e-05
+2831 2831 -0.001462067874606355
+3352 2831 -258.9487419495387
+3356 2831 -329.9404840258471
+3359 2831 -298.6016789907157
+527 2832  6.633863604418218e-05
+2832 2832 -0.001387501933997578
+3354 2832 -272.0317439182016
+3358 2832 -336.2704669221371
+3361 2832 -340.9877642962214
+128 2833  0.002549007835364769
+328 2833 -0.04740439876751025
+2833 2833 -15783.20681029278
+528 2834  0.00226068439540678
+728 2834 -0.0472821775742908
+2834 2834 -17706.80708692556
+129 2835  3.651865383153894e-05
+2835 2835 -0.0006801292745497548
+3364 2835 -125.0209566151239
+3368 2835 -101.4697467138049
+3372 2835 -105.8065777979689
+3376 2835 -96.71494936649103
+529 2836  2.851329636555464e-05
+729 2836 -0.000595912541544476
+2836 2836 -113.9099321998116
+3366 2836 -118.5292865869793
+3370 2836 -96.75789985452708
+3378 2836 -139.5923217119656
+130 2837  0.002311132186057771
+330 2837 -0.04304284573387514
+2837 2837 -17200.90873720961
+530 2838  0.002055759561026759
+730 2838 -0.04296426793696614
+2838 2838 -19229.80704563983
+131 2839  0.002512265623002052
+331 2839 -0.04678851497708159
+2839 2839 -15372.8164617752
+531 2840  0.002230330410563787
+731 2840 -0.04661257133778172
+2840 2840 -17250.21196149158
+132 2841  7.556051646964701e-05
+2841 2841 -0.001406493806589446
+3380 2841 -264.275729646232
+3383 2841 -325.9882767001909
+3387 2841 -309.9603770265387
+532 2842  6.29130756763182e-05
+732 2842 -0.00131594329597764
+2842 2842 -350.8749049521504
+3382 2842 -273.2174821630666
+3389 2842 -349.525972853892
+133 2843  0.002461879610445951
+333 2843 -0.04582496681635583
+2843 2843 -16176.07327682596
+533 2844  0.00218424136016743
+733 2844 -0.0456871850050998
+2844 2844 -18148.71862379513
+134 2845  3.850335510777407e-05
+334 2845 -0.0007169453104404059
+2845 2845 -110.8016182928714
+3392 2845 -108.8523166266819
+3396 2845 -103.7126993255025
+3400 2845 -100.1145963710793
+534 2846  3.176046354497157e-05
+734 2846 -0.0006637811766721075
+2846 2846 -135.0509367320167
+3394 2846 -111.1708407077942
+3398 2846 -105.8746380386655
+3406 2846 -100.0765173546339
+135 2847  0.002492200397856114
+335 2847 -0.04640543643374314
+2847 2847 -15319.47843184996
+535 2848  0.002212413586388379
+735 2848 -0.04623851729751089
+2848 2848 -17188.33141996008
+136 2849  0.002590053989662534
+336 2849 -0.04822607065271291
+2849 2849 -15342.38909234029
+536 2850  0.002298717079426249
+736 2850 -0.0480417201738854
+2850 2850 -17221.86948891564
+137 2851  6.694842994227576e-05
+2851 2851 -0.001245392720896029
+3408 2851 -274.8553042800676
+3411 2851 -369.8949236118411
+3416 2851 -307.7484892352572
+537 2852  6.612709803152977e-05
+2852 2852 -0.001382881004978853
+3410 2852 -351.7384079675031
+3413 2852 -262.4471277152124
+3418 2852 -309.8208477002044
+138 2853  0.002281699642120062
+338 2853 -0.04244471017584212
+2853 2853 -17899.71932992315
+538 2854  0.002023889893301537
+738 2854 -0.04232451806232144
+2854 2854 -20083.61864669313
+139 2855  6.668642108710151e-05
+2855 2855 -0.001242146227443936
+3419 2855 -285.4713473471323
+3423 2855 -436.1033211741603
+3427 2855 -229.7060573133963
+539 2856  5.644326073895785e-05
+2856 2856 -0.001180351155434611
+3421 2856 -298.0864078722183
+3425 2856 -285.8909834401574
+3429 2856 -408.7832335639023
+140 2857  0.0002425793965424298
+340 2857 -0.004513431075080159
+2857 2857 -1455.098749317579
+3432 2857 -1242.454717536272
+540 2858  0.0002109624490132657
+740 2858 -0.004412880081124752
+2858 2858 -1582.769498051866
+3434 2858 -1326.699075158569
+141 2859  0.0002624552599551592
+341 2859 -0.004885512119259889
+2859 2859 -1347.578694003212
+3439 2859 -1242.790681293919
+541 2860  0.0002180184147619238
+741 2860 -0.004557562649114181
+2860 2860 -1439.569994942649
+3445 2860 -1474.926377146568
+142 2861  0.0002482857056241695
+342 2861 -0.004618836123996159
+2861 2861 -1306.453480190415
+3431 2861 -1269.292530967632
+542 2862  0.0002168279126185967
+742 2862 -0.00453531126448616
+2862 2862 -1388.793683741341
+3433 2862 -1375.862662839072
+143 2863  3.100849114586814e-05
+2863 2863 -0.0005784813977331862
+3452 2863 -65.91250277346103
+3456 2863 -151.3811068800854
+3460 2863 -159.9708456309545
+3463 2863 -138.6615789467266
+543 2864  2.40913445823822e-05
+2864 2864 -0.000503749724503456
+3454 2864 -70.28451441984423
+3458 2864 -174.5511038366085
+3462 2864 -163.2629558170631
+3465 2864 -157.131131008579
+144 2865  7.880071041324561e-05
+2865 2865 -0.001470189001474494
+3451 2865 -200.3447771713429
+3468 2865 -315.9024050162253
+3472 2865 -358.782009005071
+544 2866  6.434988345650326e-05
+2866 2866 -0.001345574214205537
+3453 2866 -252.2885237459685
+3470 2866 -343.8282467755249
+3474 2866 -340.8237853887593
+145 2867  0.0002319713008317471
+2867 2867 -0.004319878341018758
+3475 2867 -1359.412942527259
+3479 2867 -1420.161798638148
+545 2868  0.0001989115751638596
+745 2868 -0.004161190641349405
+2868 2868 -1553.767313963687
+3477 2868 -1483.228901833766
+146 2869  1.03968750016697e-05
+346 2869 -0.0001937112443097265
+2869 2869 -27.96163286928218
+3484 2869 -15.5948834700682
+3488 2869 -16.24247111042913
+3492 2869 -14.7892626321223
+3496 2869 -14.74604734110974
+3500 2869 -21.53155213685957
+3503 2869 -29.79361601042121
+546 2870  1.820335133825889e-05
+2870 2870 -0.0003807362332910267
+3209 2870 -71.60785037080608
+3486 2870 -8.765297393525904
+3490 2870 -9.210837529294329
+3494 2870 -8.314922294229779
+3498 2870 -8.361552482805211
+3502 2870 -10.50121760524715
+3505 2870 -80.2043307459878
+1 2871  27.83662668591554
+147 2871  0.0009765914152797697
+347 2871 -0.01819554706699653
+2871 2871 -3270.212868561243
+44 2872  9.664803062187673
+547 2872  0.001881561431239332
+747 2872 -0.03935916995322433
+2872 2872 -2032.294944522458
+2 2873  27.53589835708392
+148 2873  0.0009675053188040424
+348 2873 -0.01802624915195298
+2873 2873 -3383.287424261197
+45 2874  8.906846167844099
+548 2874  0.001907467106720621
+748 2874 -0.03990008471308273
+2874 2874 -2161.517370916221
+3 2875  28.36062562360079
+149 2875  0.0009896578163032042
+349 2875 -0.01843899618102551
+2875 2875 -3162.550200229806
+46 2876  9.821457822989728
+549 2876  0.001906610501894908
+749 2876 -0.03988317432312981
+2876 2876 -1965.171276998922
+4 2877  28.21925391958385
+150 2877  0.0009881347343625977
+350 2877 -0.01841060981384306
+2877 2877 -3145.78338095254
+47 2878  9.098592168210979
+550 2878  0.001949643189531951
+750 2878 -0.04078229766318428
+2878 2878 -2011.325553409659
+5 2879  25.95736762236096
+151 2879  0.0009219840688063665
+351 2879 -0.01717818552570617
+2879 2879 -4127.243651965615
+551 2880  0.0008441117886190319
+751 2880 -0.01766023403411908
+2880 2880  104.3552046887556
+3501 2880 -947.8080133550635
+152 2881  6.748223267049977e-05
+2881 2881 -0.001256535981144061
+3384 2881 -293.066322787933
+3476 2881 -313.6568640390976
+3507 2881 -332.4374750614756
+552 2882  5.675680462998339e-05
+2882 2882 -0.001187293521703485
+3386 2882 -312.5285740232399
+3478 2882 -336.3627989515738
+3509 2882 -358.1256606139722
+153 2883  1.096721977986664e-05
+353 2883 -0.0002044990454309726
+2883 2883 -44.51367803683263
+3455 2883 -43.12103915398581
+3512 2883 -18.01825116204381
+3519 2883 -49.71040942894664
+3523 2883 -42.92117101237346
+3527 2883 -40.28901682798929
+553 2884  8.254916480290429e-06
+753 2884 -0.0001725925911988493
+2884 2884 -59.09622972922144
+3457 2884 -50.47971099857919
+3514 2884 -19.04736438576279
+3518 2884 -39.1308766868132
+3525 2884 -43.69121511045876
+3529 2884 -45.26251731254106
+154 2885  0.0002765958107932558
+354 2885 -0.005158007404572995
+2885 2885 -1637.126658242943
+3511 2885 -837.2400970977618
+554 2886  0.0002143832494831788
+754 2886 -0.004482285199688079
+2886 2886 -2094.821178932827
+3513 2886 -946.4993922263754
+155 2887  0.0002689427432129922
+355 2887 -0.005003923514694866
+2887 2887 -1330.347020163584
+3195 2887 -1194.040564031954
+555 2888  0.0002152290952202552
+755 2888 -0.004501500167327725
+2888 2888 -1596.562954523447
+3197 2888 -1351.851190781317
+156 2889  7.678999350825993e-05
+2889 2889 -0.001430404585463677
+3399 2889 -238.4037452088409
+3540 2889 -302.0373081441935
+3544 2889 -339.884593358769
+556 2890  6.296834363466511e-05
+2890 2890 -0.00131608856808379
+3401 2890 -307.265267842773
+3542 2890 -341.0931018868103
+3546 2890 -301.9003736908056
+157 2891  0.0002662782114042295
+2891 2891 -0.004953175864922998
+3547 2891 -1177.545535152468
+3551 2891 -1310.740583779899
+557 2892  0.000202070761556368
+757 2892 -0.004225502698148476
+2892 2892 -1555.582081433006
+3553 2892 -1490.044806601116
+158 2893  0.0002479660168346599
+358 2893 -0.004613733705409966
+2893 2893 -1386.412367526018
+3555 2893 -1319.017361047579
+558 2894  0.0002137229690333978
+758 2894 -0.004470671113542863
+2894 2894 -1512.250759457661
+3557 2894 -1428.509188469133
+159 2895  0.0002585780513565103
+359 2895 -0.004822604611468701
+2895 2895 -1285.697515288127
+3563 2895 -1313.040134659249
+559 2896  0.0002072120632641939
+759 2896 -0.004332413344118552
+2896 2896 -1635.880446215101
+3469 2896 -1402.181171260665
+160 2897  0.0002133964918098159
+360 2897 -0.003969961874120492
+2897 2897 -1942.580602090081
+3572 2897 -1149.608351572851
+560 2898  0.0002442514875826953
+760 2898 -0.005108986069369431
+2898 2898 -1512.245711784602
+3570 2898 -1021.939107874584
+161 2899  0.0002015674054957784
+361 2899 -0.003753931548661254
+2899 2899 -1861.580695310057
+3575 2899 -1276.699267408946
+561 2900  0.0002451718016401516
+761 2900 -0.005128263093238606
+2900 2900 -1582.59139337238
+3581 2900 -927.7605629713167
+162 2901  8.489615229574495e-05
+2901 2901 -0.001582798352723314
+3212 2901 -273.8571240789814
+3332 2901 -306.1707000880867
+3584 2901 -241.4737700878197
+562 2902  6.341211780697017e-05
+2902 2902 -0.001326090781396694
+3214 2902 -275.9084934756087
+3334 2902 -336.8243978424858
+3586 2902 -334.71993668788
+87 2903  0.5188471304468957
+163 2903  0.004275107728950991
+363 2903 -0.07971634771599438
+2903 2903 -8640.649777085511
+563 2904  0.001094729169089652
+763 2904 -0.02289312059755941
+2904 2904 -56173.00367620957
+164 2905  4.587849448195336e-06
+364 2905 -8.544969360670532e-05
+2905 2905 -17.56919005402462
+3480 2905 -13.66247944369221
+3508 2905 -13.86164060705798
+3576 2905 -14.63977712587374
+3588 2905 -4.158339449315758
+3592 2905 -6.321546398056254
+3596 2905 -7.804363263475375
+3600 2905 -6.58912812924158
+3604 2905 -7.649837605944053
+3608 2905 -7.69701995245717
+3612 2905 -6.086553600399663
+564 2906  4.25003180262438e-06
+764 2906 -8.891414554671908e-05
+2906 2906 -16.55199492375567
+3482 2906 -16.282455472127
+3578 2906 -12.85873395711873
+3590 2906 -4.68570583775738
+3594 2906 -6.569471674738725
+3598 2906 -7.378806665030903
+3602 2906 -6.726108868932145
+3606 2906 -6.982763315138396
+3610 2906 -7.016413577398498
+3614 2906 -6.151213853839583
+3617 2906 -14.86291565563162
+165 2907  0.0006376528809592441
+365 2907 -0.01187757040352815
+2907 2907  174.0181709119047
+3587 2907 -1386.423275602331
+49 2908  87.08836569250704
+565 2908  0.0006968247424079914
+765 2908 -0.01457874205479662
+2908 2908 -1798.962366958954
+7 2909  209.3244594426036
+166 2909  0.0005334869199648178
+366 2909 -0.009936857146071412
+2909 2909 -1718.105842836117
+566 2910  0.0005043323519647758
+766 2910 -0.01055148350464198
+2910 2910  194.4659454572846
+3593 2910 -1602.154579037757
+8 2911  207.0714997893401
+167 2911  0.00050533641056909
+367 2911 -0.009412379775864633
+2911 2911 -1984.576121024297
+51 2912  196.1842421835629
+567 2912  0.0004814844967328351
+767 2912 -0.01007346783371174
+2912 2912 -1805.387244919989
+9 2913  207.1774742583954
+168 2913  0.0005289936433977529
+368 2913 -0.009853159585355355
+2913 2913 -1772.16179742598
+568 2914  0.000498175840325201
+768 2914 -0.01042267889641481
+2914 2914  192.0789529667144
+3601 2914 -1672.560616497262
+10 2915  211.1161536132544
+169 2915  0.0005052051026997433
+369 2915 -0.009409866734849158
+2915 2915 -1941.832300205852
+569 2916  0.0004883751107654518
+769 2916 -0.01021762623013111
+2916 2916  198.0844573442002
+3605 2916 -1736.681418547389
+11 2917  210.6281765088935
+170 2917  0.000504438897809328
+370 2917 -0.0093956000093805
+2917 2917 -1948.610857464787
+570 2918  0.0004874751788934755
+770 2918 -0.0101988030095205
+2918 2918  197.7267055670312
+3609 2918 -1744.38334675991
+12 2919  210.3058251130699
+171 2919  0.0005374455412895908
+371 2919 -0.01001058661641098
+2919 2919 -1660.153292834334
+571 2920  0.0005051537934849839
+771 2920 -0.01056867448637758
+2920 2920  194.7139670039287
+3613 2920 -1578.337187890964
+172 2921  0.0002525862906039789
+2921 2921 -0.004704700041595145
+3504 2921 -1338.557384720627
+3619 2921 -1160.143762481141
+572 2922  0.0001984180754674663
+772 2922 -0.004149701089090458
+2922 2922 -1618.963465864502
+3506 2922 -1351.12522936696
+173 2923  9.934430498701247e-06
+373 2923 -0.0001848018496588619
+2923 2923 -54.03709490860278
+3624 2923 -25.58784425527002
+3628 2923 -11.88075702335064
+3636 2923 -49.0029739474849
+3640 2923 -34.74349045619399
+3643 2923 -27.80845944515527
+573 2924  1.97916651121529e-05
+2924 2924 -0.0004140519179016492
+3626 2924 -9.759370161367945
+3630 2924 -3.824469467733722
+3634 2924 -22.0316225896654
+3638 2924 -19.89518259071058
+3642 2924 -93.26783624734638
+3645 2924 -135.998061396762
+13 2925  5.68042322902059
+174 2925  0.001278228660117016
+374 2925 -0.02377781766418163
+2925 2925 -8636.32260883259
+574 2926  0.0008873951493023308
+774 2926 -0.018565803370506
+2926 2926  102.7471989517859
+3625 2926 -750.1977888434145
+14 2927  7.273812219891007
+175 2927  0.001575520226438138
+375 2927 -0.02930808378764303
+2927 2927 -4186.612833750586
+575 2928  0.001156885426737085
+775 2928 -0.02420400608865609
+2928 2928  92.37138271390904
+3629 2928 -454.349588508947
+15 2929  4.409102144996014
+176 2929  0.00103799158743649
+376 2929 -0.01930888852136976
+2929 2929 -18149.83069473371
+576 2930  0.0006093535781038161
+776 2930 -0.0127487047039688
+2930 2930  190.7684806838747
+3633 2930 -937.8974042315033
+16 2931  4.550892940867493
+177 2931  0.00106627247492575
+377 2931 -0.01983496473725442
+2931 2931 -16487.63935391007
+577 2932  0.0006241449559768249
+777 2932 -0.01305815905236744
+2932 2932  188.8552198757506
+3637 2932 -889.0967633151976
+178 2933  0.000261841749692465
+2933 2933 -0.004879689278140042
+3647 2933 -1197.091400538973
+3651 2933 -1307.168528785002
+578 2934  0.0002261787363921484
+778 2934 -0.004729137568385933
+2934 2934 -1468.90019029828
+3649 2934 -1255.707173660711
+179 2935  6.472756195288161e-05
+2935 2935 -0.001206424923433159
+3515 2935 -373.0009736238315
+3656 2935 -215.9521258687181
+3659 2935 -455.3580237130959
+579 2936  5.786973344046168e-05
+2936 2936 -0.001210195935425439
+3517 2936 -410.9259659918831
+3658 2936 -214.5852170173555
+3661 2936 -459.6038371669388
+180 2937  0.0002575738198962803
+380 2937 -0.00480049537954443
+2937 2937 -1554.218213369194
+3655 2937 -1132.012822917658
+580 2938  0.0002266107658869627
+780 2938 -0.004739384227582458
+2938 2938 -1545.450519903304
+3657 2938 -1293.847311903297
+181 2939  7.772147814560617e-05
+2939 2939 -0.001447510061668577
+3403 2939 -263.4733430658865
+3667 2939 -297.8083722314928
+3671 2939 -318.3099272773828
+581 2940  6.870255487037702e-05
+2940 2940 -0.001436061405957997
+3405 2940 -256.0588790788852
+3669 2940 -367.6331396885809
+3673 2940 -298.4479790314356
+182 2941  0.0002699459866081826
+382 2941 -0.005027041570114009
+2941 2941 -1269.238362620525
+3675 2941 -1225.44512859976
+582 2942  0.0002165795532411714
+782 2942 -0.004527229250447725
+2942 2942 -1495.777176495769
+3442 2942 -1408.138756167021
+183 2943  0.000230584620055753
+383 2943 -0.004289483096781133
+2943 2943 -1620.739611914232
+3679 2943 -1341.319488664768
+583 2944  0.000236616467506408
+783 2944 -0.004947831503020766
+2944 2944 -1533.793391362161
+3414 2944 -1205.4064202328
+184 2945  3.113361531407949e-05
+2945 2945 -0.0005788988446378421
+3220 2945 -118.4060011955383
+3683 2945 -92.63800029624825
+3688 2945 -162.0141549217757
+3691 2945 -105.1073406416851
+584 2946  2.680443331181534e-05
+2946 2946 -0.0005605482647067403
+3222 2946 -117.5623798133825
+3685 2946 -153.1942449175083
+3690 2946 -114.4793517963469
+3693 2946 -106.302999456909
+185 2947  0.0002117592014781903
+385 2947 -0.003942330393462493
+2947 2947 -2303.443736143837
+3696 2947 -1156.779939313139
+585 2948  0.0001816346318888309
+785 2948 -0.003799635403101938
+2948 2948 -2603.281852933697
+3698 2948 -1222.524005166853
+186 2949  3.083953604217996e-05
+2949 2949 -0.0005740898096234878
+3695 2949 -115.8710614082713
+3703 2949 -115.6732086865693
+3708 2949 -121.4320613630403
+3711 2949 -119.2838530078724
+586 2950  2.52662353434824e-05
+2950 2950 -0.0005285298443190662
+3697 2950 -133.2986622760959
+3705 2950 -121.2535428786183
+3710 2950 -121.8957475389813
+3713 2950 -126.2373785289777
+187 2951  2.169737691949768e-05
+387 2951 -0.0004045233295404804
+2951 2951 -63.79072791858248
+3716 2951 -15.33177832110156
+3720 2951 -49.94972952086324
+3724 2951 -54.44165562919285
+3727 2951 -71.78900859503443
+587 2952  3.479200106475429e-05
+2952 2952 -0.0007273659114068365
+3718 2952 -6.24756560403434
+3722 2952 -18.89808564694528
+3726 2952 -20.62915659632829
+3729 2952 -177.6359650918144
+3734 2952 -170.023137662953
+17 2953  7.251786459051289
+188 2953  0.001643729524906235
+388 2953 -0.03064584960083465
+2953 2953 -3914.440467798176
+588 2954  0.00171173667699996
+788 2954 -0.03581192339833519
+2954 2954  43.71332794263182
+3717 2954 -457.8930633249275
+18 2955  5.370237884159537
+189 2955  0.001272395619536466
+389 2955 -0.02372239346799022
+2955 2955 -9557.969759456002
+61 2956  5.289532905230916
+589 2956  0.002448783148342478
+789 2956 -0.05120231699174218
+2956 2956 -2119.625191287763
+19 2957  5.206824237539474
+190 2957  0.001241043851549547
+390 2957 -0.02313788584603573
+2957 2957 -10416.16918230558
+62 2958  5.212039501524443
+590 2958  0.002415983069022926
+790 2958 -0.05051641835691328
+2958 2958 -2232.468832444119
+191 2959  3.437890099741047e-05
+2959 2959 -0.0006403700785130899
+3264 2959 -116.01415125886
+3344 2959 -120.2021649979784
+3736 2959 -67.64929086151163
+3739 2959 -133.1916354887076
+591 2960  3.109764001755754e-05
+2960 2960 -0.0006505150403953535
+3266 2960 -137.1554046617416
+3346 2960 -180.4753155990831
+3738 2960 -45.7592083489813
+3741 2960 -119.0962751518446
+20 2961  156.4648269651306
+192 2961  0.0005900815905843416
+392 2961 -0.01099169507116858
+2961 2961 -1904.378289787439
+63 2962  134.487235958624
+592 2962  0.0006372419289340028
+792 2962 -0.01333216968778421
+2962 2962 -1488.996464674805
+193 2963  0.0002533575753207606
+393 2963 -0.004721519459519589
+2963 2963 -1267.301926818796
+3728 2963 -1338.547491511747
+593 2964  0.0002111507457449779
+793 2964 -0.004413757630275583
+2964 2964 -1506.397431951823
+3541 2964 -1420.43303749647
+194 2965  7.547399481258875e-05
+2965 2965 -0.001408304602554578
+3744 2965 -281.744969252588
+3747 2965 -273.7709840087026
+3751 2965 -304.386875196408
+594 2966  6.036731193060422e-05
+2966 2966 -0.001262348984622675
+3746 2966 -347.1985023913132
+3749 2966 -303.9341121893127
+3753 2966 -301.6071311890342
+195 2967  0.0002452042454861101
+395 2967 -0.004561366275467525
+2967 2967 -1367.085437280008
+3556 2967 -1299.363195504282
+595 2968  0.0002097250549737993
+795 2968 -0.0043867077924628
+2968 2968 -1507.145374398769
+3558 2968 -1412.362136880344
+196 2969  0.0002328687727934642
+396 2969 -0.004331722370505195
+2969 2969 -1935.598728461542
+3760 2969 -1143.36710400461
+596 2970  0.0002081199033744698
+796 2970 -0.004353179353834643
+2970 2970 -2094.452122116551
+3762 2970 -1166.753340022242
+197 2971  0.0002595392277337602
+397 2971 -0.004827687097720198
+2971 2971 -1393.724096630088
+3759 2971 -1299.463572095045
+597 2972  0.000227180506082851
+797 2972 -0.004751796082881135
+2972 2972 -1443.035879728521
+3769 2972 -1438.401804291923
+198 2973  1.924427533902808e-05
+2973 2973 -0.000358519151350955
+3420 2973 -64.84305238357298
+3648 2973 -53.49842881762557
+3771 2973 -51.11619948659765
+3775 2973 -56.91674680233886
+3779 2973 -54.13080652643053
+598 2974  1.541997157145382e-05
+2974 2974 -0.0003224467177012544
+3422 2974 -63.00144864963377
+3650 2974 -51.36676192902856
+3773 2974 -68.68887955759087
+3777 2974 -65.35317307855226
+3781 2974 -49.99682031538818
+199 2975  1.845055124298193e-05
+2975 2975 -0.0003430998893339208
+3224 2975 -60.96288613393003
+3684 2975 -49.97345032394828
+3784 2975 -85.32588697691688
+3787 2975 -39.11433892927088
+3791 2975 -69.39344058325088
+599 2976  1.355929089197479e-05
+799 2976 -0.0002835617900106833
+2976 2976 -102.5526269566786
+3226 2976 -56.76693463695905
+3686 2976 -66.74817920758088
+3786 2976 -54.00562981105288
+3793 2976 -52.11952350302194
+200 2977  1.271450383568884e-05
+400 2977 -0.000237119492924652
+2977 2977 -55.1131720540257
+3520 2977 -59.31030352171324
+3796 2977 -29.11525814204184
+3800 2977 -16.87783507179551
+3804 2977 -29.27401382993264
+3807 2977 -45.6431060305684
+600 2978  8.800095713229208e-06
+800 2978 -0.0001839880046561397
+2978 2978 -63.98358437997059
+3461 2978 -49.91127100320831
+3798 2978 -29.28485320635267
+3802 2978 -20.84064754660397
+3806 2978 -36.23593022245935
+3809 2978 -51.84652767986045
+201 2979  3.429173053633755e-05
+2979 2979 -0.0006394846353445878
+3532 2979 -137.220649291519
+3564 2979 -119.2628629587167
+3795 2979 -94.18004615832454
+3811 2979 -95.82820923136651
+601 2980  2.42057119496486e-05
+2980 2980 -0.0005060858793490622
+3534 2980 -156.3984297145197
+3566 2980 -126.3034594719376
+3797 2980 -106.6011780216583
+3813 2980 -118.3393931588882
+88 2981  0.5765945406640466
+202 2981  0.004704415141495417
+402 2981 -0.08776236008052875
+2981 2981 -6186.640086873116
+602 2982  0.001228820363981551
+802 2982 -0.0256910783553563
+2982 2982 -43002.51911591569
+89 2983  0.5085834639940715
+203 2983  0.004200021129835351
+403 2983 -0.07834503334779185
+2983 2983 -9258.546324954847
+603 2984  0.001028630443507459
+803 2984 -0.02150584016679073
+2984 2984 -64484.86655730321
+204 2985  0.0002910577290827514
+404 2985 -0.005425436839739358
+2985 2985 -1179.473328514326
+3816 2985 -1112.363081683879
+604 2986  0.0001942101919334908
+804 2986 -0.004060328656766309
+2986 2986 -1656.528852030899
+3814 2986 -1519.09897684033
+205 2987  7.220011983011933e-05
+2987 2987 -0.00134475543576407
+3819 2987 -294.7077344824907
+3823 2987 -297.7314148715081
+3828 2987 -329.5972945308118
+605 2988  6.122274390669451e-05
+2988 2988 -0.001279590719005279
+3821 2988 -297.709055530353
+3825 2988 -331.8369949896509
+3830 2988 -355.4846212830439
+206 2989  0.000224096727052125
+406 2989 -0.004167960315020639
+2989 2989 -1403.69572640005
+3831 2989 -1319.761941130769
+606 2990  0.0002174236966432122
+806 2990 -0.004548139563342544
+2990 2990 -1547.059702964963
+3833 2990 -1131.21152477239
+207 2991  0.000290626767416135
+2991 2991 -0.005408777291702704
+3303 2991 -1111.645130144312
+3836 2991 -1246.529432769099
+607 2992  0.0002089999771376453
+807 2992 -0.004369780664630857
+2992 2992 -1753.0575043452
+3305 2992 -1314.488490707493
+208 2993  0.0002284567159346352
+408 2993 -0.004248277560160953
+2993 2993 -1694.45394947356
+3839 2993 -1333.079786610828
+608 2994  0.0002225826697345396
+808 2994 -0.004654857896170716
+2994 2994 -1526.042604103454
+3841 2994 -1387.470519821244
+209 2995  0.0002221752697164552
+409 2995 -0.004138430725600249
+2995 2995 -1643.815562542815
+3847 2995 -1277.459248023892
+609 2996  0.0002239401818748291
+2996 2996 -0.0046832586311171
+3846 2996 -1506.533916325817
+3849 2996 -1227.52274774633
+210 2997  2.138755265132116e-05
+2997 2997 -0.0003986119327895535
+3524 2997 -95.11637636344641
+3852 2997 -65.95449814158825
+3856 2997 -8.723987554335546
+3860 2997 -43.6549692815422
+3864 2997 -183.7758384048604
+610 2998  1.553974479996341e-05
+2998 2998 -0.0003248647658803822
+3526 2998 -96.50997380438551
+3854 2998 -52.15381884035941
+3858 2998 -18.93525889233717
+3862 2998 -80.38564175531182
+3866 2998 -91.28444390040092
+211 2999  3.369714148784347e-05
+2999 2999 -0.0006277791128440753
+3676 2999 -102.6354740822629
+3820 2999 -119.8659568390688
+3851 2999 -113.8659171919261
+3867 2999 -125.6929813506389
+611 3000  2.716807484438463e-05
+811 3000 -0.0005678888436367579
+3000 3000 -119.1218935338928
+3678 3000 -126.9054310671548
+3822 3000 -111.9988300228975
+3853 3000 -137.0837482046144
+90 3001  0.6653230544623372
+212 3001  0.005390424631237284
+412 3001 -0.1004752481225429
+3001 3001 -3403.375662059264
+612 3002  0.001917255257562923
+812 3002 -0.04008099176668933
+3002 3002 -14912.84496887192
+91 3003  0.4974529583688729
+213 3003  0.00413020803099228
+413 3003 -0.07698122934126954
+3003 3003 -9244.38449451425
+613 3004  0.001120828337875203
+813 3004 -0.02343137807364655
+3004 3004 -51511.86319008204
+214 3005  0.0002345475086669507
+414 3005 -0.004363396788473575
+3005 3005 -1962.678855914129
+3872 3005 -1123.060704870721
+614 3006  0.0002241843674551126
+814 3006 -0.004688747582473832
+3006 3006 -2116.689327493415
+3874 3006 -1016.038939862732
+215 3007  3.152944354031921e-05
+3007 3007 -0.0005865256311667257
+3788 3007 -88.67226391527699
+3871 3007 -103.5178172597545
+3879 3007 -193.3276841043456
+3883 3007 -102.3339494307426
+615 3008  2.762658094449223e-05
+3008 3008 -0.000577763345796909
+3790 3008 -185.2524967505786
+3873 3008 -120.7045050005255
+3881 3008 -80.20121388753263
+3885 3008 -113.9874410285411
+216 3009  7.021648346571437e-05
+3009 3009 -0.00130599598075002
+3888 3009 -305.81056344102
+3891 3009 -257.0424342781794
+3895 3009 -364.6469827985827
+616 3010  6.07496359260314e-05
+3010 3010 -0.001270615822728721
+3890 3010 -324.405753550522
+3893 3010 -296.6587145617269
+3897 3010 -347.8089509424088
+217 3011  0.0002629900627038044
+417 3011 -0.004890840474051387
+3011 3011 -1400.763427944943
+3548 3011 -1136.647709911751
+617 3012  0.0002239249658143049
+817 3012 -0.004682511701431127
+3012 3012 -1809.384404896597
+3182 3012 -1063.361714528966
+218 3013  0.0002593440111466169
+418 3013 -0.004824851841231687
+3013 3013 -1535.226081909187
+3308 3013 -1148.338107851003
+618 3014  0.000236317776079967
+818 3014 -0.004941326446914052
+3014 3014 -1513.186311996676
+3310 3014 -1239.624856043634
+219 3015  3.244177482910733e-05
+3015 3015 -0.0006055799728042041
+3276 3015 -116.1043230776163
+3900 3015 -79.95166156859864
+3903 3015 -131.7144595143791
+3908 3015 -159.5652172094772
+619 3016  2.356441270887905e-05
+819 3016 -0.0004928617663929631
+3016 3016 -67.84254400099019
+3278 3016 -164.7201130197417
+3905 3016 -145.4276641880034
+3910 3016 -193.8165634075667
+220 3017  7.768080563995795e-05
+3017 3017 -0.001449863056352223
+3471 3017 -354.9809878986568
+3743 3017 -293.7957542138436
+3899 3017 -225.3379392805973
+620 3018  6.37038810105057e-05
+3018 3018 -0.001332244617993727
+3473 3018 -335.793574863611
+3745 3018 -356.9125375460848
+3901 3018 -253.3858185062365
+221 3019  7.320703537932268e-06
+421 3019 -0.0001362209906944794
+3019 3019 -24.21142044251713
+3764 3019 -32.18487755999478
+3916 3019 -16.17329708469773
+3920 3019 -16.79039672305305
+3924 3019 -19.18063033824095
+3927 3019 -34.60754748070792
+3932 3019 -58.99846518694028
+621 3020  6.279195576451717e-06
+821 3020 -0.0001313598373684713
+3020 3020 -56.09349534828303
+3766 3020 -32.32185579454295
+3914 3020 -19.59324371202498
+3918 3020 -19.41412540093749
+3922 3020 -18.56084480886999
+3926 3020 -16.81796495425821
+3934 3020 -44.64235389450946
+222 3021  1.858157080718637e-05
+3021 3021 -0.0003457132849898687
+3183 3021 -62.98405706157925
+3236 3021 -55.21857342172993
+3355 3021 -60.13908113984031
+3911 3021 -70.31286063739829
+3935 3021 -58.90809499914865
+622 3022  1.580870884158368e-05
+3022 3022 -0.0003306921046308134
+3185 3022 -60.9315937244375
+3238 3022 -54.48808992798245
+3357 3022 -60.74879639706707
+3913 3022 -88.35690904464344
+3937 3022 -57.28389550104841
+21 3023  355.1643329947775
+223 3023  0.000414588079576942
+423 3023 -0.007716759589340684
+3023 3023 -1887.480809470994
+623 3024  0.000365824983912368
+823 3024 -0.007653672082923691
+3024 3024  373.243500456582
+3917 3024 -1808.976355162421
+22 3025  371.7856140819339
+224 3025  0.0004090754753228922
+424 3025 -0.007612824341018029
+3025 3025 -1862.905571445791
+624 3026  0.0003753799864904563
+824 3026 -0.007853582503672928
+3026 3026  423.2601696305773
+3921 3026 -1529.723973180399
+23 3027  359.0699905626743
+225 3027  0.0003948065156009082
+425 3027 -0.007347018696585064
+3027 3027 -2089.757450012758
+66 3028  332.6331657750853
+625 3028  0.0003868371757116728
+825 3028 -0.008093282368846372
+3028 3028 -1878.631462550398
+226 3029  1.605819002986071e-05
+3029 3029 -0.0002992883659006699
+3528 3029 -62.65563298356832
+3808 3029 -57.88255988036589
+3863 3029 -129.5729114917453
+3940 3029 -39.32522531591619
+3943 3029 -55.77574949647519
+626 3030  1.347322955615387e-05
+826 3030 -0.0002816718078576912
+3030 3030 -77.01686009671205
+3530 3030 -85.04968168489033
+3810 3030 -94.41087219520271
+3942 3030 -38.53172034872582
+3945 3030 -53.24994360811269
+227 3031  0.0002744085827181243
+427 3031 -0.005113009743909019
+3031 3031 -1462.759816234802
+3939 3031 -1148.685319889543
+627 3032  0.000232641750217292
+827 3032 -0.004862970717328313
+3032 3032 -1596.142391375567
+3941 3032 -1284.461857125257
+228 3033  0.000315555720187749
+428 3033 -0.005879952690476989
+3033 3033 -1177.236190688291
+3815 3033 -894.1445427962663
+628 3034  0.0001758353255510753
+828 3034 -0.003676111486126634
+3034 3034 -2616.969987642703
+3949 3034 -1083.556762405448
+229 3035  0.0002451618647443908
+3035 3035 -0.004558675471144818
+3239 3035 -1421.489364558306
+3887 3035 -1333.404649458178
+629 3036  0.0001960485134623598
+829 3036 -0.004100030848673585
+3036 3036 -1970.719501164815
+3889 3036 -1307.863076710002
+230 3037  0.0002235717635631153
+430 3037 -0.004159338793200366
+3037 3037 -1984.935281822672
+3444 3037 -1119.776904764913
+630 3038  0.0002387670275501264
+830 3038 -0.004991865811779856
+3038 3038 -1679.799948366659
+3882 3038 -1043.800098785419
+231 3039  6.355058706178921e-06
+431 3039 -0.0001182663255270315
+3039 3039 -19.26799730716725
+3875 3039 -27.2211092658337
+3928 3039 -28.87681478961062
+3952 3039 -24.40551081717152
+3956 3039 -34.66205988832636
+3960 3039 -39.08905611715512
+3964 3039 -25.73482602625847
+631 3040  5.168868543624866e-06
+831 3040 -0.0001081325281897822
+3040 3040 -42.77542700610918
+3877 3040 -23.57121050397152
+3930 3040 -44.43020643761637
+3946 3040 -18.52397709846915
+3954 3040 -20.32550666111602
+3958 3040 -31.17736956735129
+3962 3040 -30.83346999517691
+232 3041  1.761027469906141e-05
+3041 3041 -0.000327730907480755
+3559 3041 -59.68238994762429
+3892 3041 -49.74533438256115
+3951 3041 -81.51090935305866
+3967 3041 -51.28046095625373
+3971 3041 -62.49294649216096
+632 3042  1.571190151769112e-05
+3042 3042 -0.0003286879780215192
+3561 3042 -66.88919874628864
+3894 3042 -56.95477588403464
+3953 3042 -84.06953056548426
+3969 3042 -59.09372182667386
+3973 3042 -44.87012523624564
+24 3043  301.804028950772
+233 3043  0.0003339577621497702
+433 3043 -0.006215092193261671
+3043 3043 -3724.14417902935
+633 3044  0.000264159408233615
+833 3044 -0.005526660401051663
+3044 3044  634.1949520085401
+3957 3044 -2254.257229613936
+25 3045  293.7476901038987
+234 3045  0.0003263861015757376
+434 3045 -0.00607418045573903
+3045 3045 -4064.969791447917
+68 3046  629.0610030966138
+634 3046  0.0002540803250229119
+834 3046 -0.005315791513777863
+3046 3046 -2504.604832410719
+235 3047  0.000261318627407984
+435 3047 -0.004863454220574115
+3047 3047 -1353.938445456118
+3200 3047 -1328.059271078283
+635 3048  0.000215751906695563
+835 3048 -0.004512686485115116
+3048 3048 -1528.913791925641
+3202 3048 -1507.095428153633
+236 3049  0.0002593250911130165
+436 3049 -0.00483017161704053
+3049 3049 -1470.147478176272
+3975 3049 -1119.511295530479
+636 3050  0.0002173382328317587
+836 3050 -0.004545857556180593
+3050 3050 -1660.641795945935
+3977 3050 -1236.100717223117
+237 3051  0.0002680207291845583
+437 3051 -0.004986977865485246
+3051 3051 -1335.596838688865
+3976 3051 -1142.59645115147
+637 3052  0.0002149933824167279
+837 3052 -0.004495843450029012
+3052 3052 -1583.280905696588
+3978 3052 -1303.604343599536
+238 3053  0.0002673059275966173
+438 3053 -0.004972539521710323
+3053 3053 -1297.830937438026
+3968 3053 -1286.699352558668
+638 3054  0.0002247913073730503
+838 3054 -0.004701903383526127
+3054 3054 -1446.166309523558
+3970 3054 -1428.515318978645
+239 3055  3.086294218884017e-05
+3055 3055 -0.0005739031613858375
+3244 3055 -130.8728852516794
+3792 3055 -127.4445013018118
+3983 3055 -94.94610178907207
+3987 3055 -120.1015515613789
+639 3056  2.871344757710019e-05
+3056 3056 -0.0006004851677575757
+3246 3056 -105.6218717573673
+3794 3056 -128.594774536177
+3985 3056 -109.9588006974711
+3989 3056 -129.8071459044353
+240 3057  0.0002328109278869417
+440 3057 -0.004328790316093996
+3057 3057 -1792.17746207041
+3768 3057 -1209.039048347723
+640 3058  0.0002236223011459347
+840 3058 -0.004676543895854219
+3058 3058 -1452.412581790726
+3770 3058 -1436.006295719475
+241 3059  0.0002641904594502731
+441 3059 -0.004925256200277052
+3059 3059 -1319.134352142724
+3748 3059 -1197.283781547747
+641 3060  0.0002168929556922801
+841 3060 -0.004534651717942856
+3060 3060 -1480.003424652446
+3654 3060 -1384.758308839667
+242 3061  0.0002683021695836888
+442 3061 -0.00499437409528073
+3061 3061 -1545.590542610641
+3948 3061 -1020.895360476986
+642 3062  0.000214433397151115
+842 3062 -0.004483357193906658
+3062 3062 -1350.897822356165
+3994 3062 -1591.552719889067
+243 3063  0.0002456464353693151
+443 3063 -0.004571499010522023
+3063 3063 -1281.197517233999
+4000 3063 -1425.924800186832
+643 3064  0.0002118857100087749
+843 3064 -0.004431863029243509
+3064 3064 -1561.56620828576
+3997 3064 -1380.279131732715
+244 3065  8.679696529177102e-05
+3065 3065 -0.001617281983965069
+3668 3065 -342.1330151030756
+4004 3065 -171.6721801828181
+4007 3065 -319.687894463376
+644 3066  6.668929891919444e-05
+3066 3066 -0.001393861160763459
+3670 3066 -354.5380781683582
+4006 3066 -238.5976480186172
+4009 3066 -323.4191825769936
+92 3067  0.5674518919590757
+245 3067  0.004650627025322416
+445 3067 -0.08668664125278241
+3067 3067 -6333.352783592504
+645 3068  0.001216300194894553
+845 3068 -0.02542108586614161
+3068 3068 -43193.72331192907
+246 3069  0.0002662216516774879
+446 3069 -0.004948899205940827
+3069 3069 -1245.08530407766
+3228 3069 -1401.226384116477
+646 3070  0.0002291693448236669
+846 3070 -0.00479207219007637
+3070 3070 -1547.903118801705
+3986 3070 -1333.656774685993
+247 3071  7.51138059276294e-06
+447 3071 -0.0001398607764914253
+3071 3071 -31.29687216661689
+3699 3071 -45.05797616799539
+4012 3071 -31.13094626304603
+4016 3071 -3.550926574038505
+4020 3071 -13.48124907748484
+4024 3071 -13.95763763671764
+4028 3071 -14.40960272507699
+4032 3071 -14.58499342652336
+647 3072  6.287606542843668e-06
+847 3072 -0.0001315370479632399
+3072 3072 -33.81724303530061
+3701 3072 -48.83113539159432
+4014 3072 -32.29231243943666
+4018 3072 -4.08462782159169
+4022 3072 -13.92596672443947
+4026 3072 -14.22224897443281
+4030 3072 -12.94220849005653
+4034 3072 -12.95883236672648
+248 3073  3.004103046734714e-05
+3073 3073 -0.0005593298618830121
+3704 3073 -113.7824108068532
+4011 3073 -156.9536240366783
+4036 3073 -105.446862001134
+4040 3073 -106.4028375448804
+648 3074  2.413112609046681e-05
+3074 3074 -0.0005048145130488554
+3706 3074 -114.9637094411854
+4013 3074 -182.1552338439317
+4038 3074 -103.0305453695494
+4042 3074 -123.3658773366547
+26 3075  48.14115697103193
+249 3075  0.001207340810307569
+449 3075 -0.02248546792889855
+3075 3075 -1386.841317910654
+69 3076  93.45345366175701
+649 3076  0.000863574471068328
+849 3076 -0.01806741737098674
+3076 3076 -1021.427694239394
+27 3077  298.8536823264385
+250 3077  0.0004614895303912071
+450 3077 -0.008593508936690492
+3077 3077 -1652.528923853896
+650 3078  0.0004260522383241782
+850 3078 -0.008913735735757969
+3078 3078  332.535475221442
+4021 3078 -1356.306668125053
+28 3079  280.2402751866119
+251 3079  0.0004630428688442491
+451 3079 -0.008622952094502315
+3079 3079 -1759.776539672839
+651 3080  0.0004160262001499919
+851 3080 -0.008703965704582829
+3080 3080  306.4518132610639
+4025 3080 -1550.501925752894
+29 3081  292.0666383956488
+252 3081  0.0004532491959284241
+452 3081 -0.008440059637270876
+3081 3081 -1762.230597412185
+652 3082  0.0004138991064746598
+852 3082 -0.008659467467314986
+3082 3082  330.915251155541
+4029 3082 -1461.513220882238
+30 3083  291.2499991968344
+253 3083  0.0004521454893938125
+453 3083 -0.008419507258893049
+3083 3083 -1778.766172586407
+653 3084  0.0004130347731292516
+853 3084 -0.008641380035250229
+3084 3084  330.1517854407101
+4033 3084 -1474.31463330485
+254 3085  5.598515203854433e-05
+3085 3085 -0.001041762786317972
+3931 3085 -657.2647720227772
+3963 3085 -337.815695541272
+4044 3085 -198.5952483373434
+654 3086  4.565100404794502e-05
+3086 3086 -0.0009550099367320633
+3933 3086 -467.2993939817052
+3965 3086 -582.1294820043491
+4046 3086 -212.104160974466
+255 3087  0.0002533575753207605
+455 3087 -0.004714351957937914
+3087 3087 -1275.123506533136
+4043 3087 -1214.20785479098
+655 3088  0.0001830830851796531
+855 3088 -0.003830083690845991
+3088 3088 -1875.357477626669
+4045 3088 -1356.93253093293
+256 3089  2.985054127319469e-05
+3089 3089 -0.0005559936336606194
+3267 3089 -100.9659996956966
+3323 3089 -102.1576650288912
+3348 3089 -116.1443434772085
+3616 3089 -157.9270743656402
+656 3090  2.73993020604243e-05
+3090 3090 -0.0005731648617574944
+3269 3090 -115.1682425790467
+3325 3090 -110.9830147806093
+3350 3090 -117.3115075315568
+3618 3090 -131.9375715502208
+257 3091  6.412379987319553e-05
+457 3091 -0.001192950010299268
+3091 3091 -464.5939628653576
+3415 3091 -295.1726851067902
+3536 3091 -241.1777446051989
+657 3092  6.664285539255191e-05
+3092 3092 -0.001393809984053777
+3417 3092 -315.137283850678
+3538 3092 -404.166050448459
+3569 3092 -216.0334906512935
+258 3093  5.25703542360426e-05
+458 3093 -0.0009782249711306297
+3093 3093 -700.4947699391313
+3571 3093 -222.6965047597604
+4051 3093 -281.3424527503379
+658 3094  6.691396143939732e-05
+3094 3094 -0.001399882851998593
+3573 3094 -330.3011979043866
+4050 3094 -160.9760242098117
+4053 3094 -471.7651348192199
+31 3095  3.449093081313446
+259 3095  0.0009048698354838305
+459 3095 -0.01683773791286734
+3095 3095 -35275.65056794864
+659 3096  0.0004190976423508806
+859 3096 -0.008768229606671188
+3096 3096  478.2477416883492
+4049 3096 -922.2752719137213
+260 3097  0.0002540711174143024
+3097 3097 -0.004728474636470728
+3884 3097 -1219.03743806919
+3991 3097 -1453.267111123336
+660 3098  0.0002170672928360428
+860 3098 -0.004538751110163672
+3098 3098 -1629.841174651477
+3886 3098 -1306.498645974566
+261 3099  4.246187106963387e-06
+461 3099 -7.909564314661077e-05
+3099 3099 -16.10476514667766
+3843 3099 -14.58657917675677
+4056 3099 -12.04655336420402
+4060 3099 -12.11364270258297
+4064 3099 -8.22902423866134
+4068 3099 -9.209673704385494
+4072 3099 -7.169396300539337
+4076 3099 -7.905394811720712
+4080 3099 -6.913440311217936
+4083 3099 -10.68753553673826
+661 3100  9.006559059406882e-06
+861 3100 -0.0001883669172606672
+3100 3100 -39.8313288230236
+3426 3100 -33.35710566176917
+3845 3100 -33.99171600412565
+4058 3100 -6.210870714987588
+4062 3100 -6.513390408795219
+4066 3100 -4.640420835507132
+4070 3100 -4.738916599895016
+4074 3100 -4.118661769723557
+4078 3100 -4.457141098502226
+4082 3100 -3.898893283827271
+32 3101  4.629564668789765
+262 3101  0.001114727656952293
+462 3101 -0.02076453503788485
+3101 3101 -15081.1485121053
+75 3102  7.67469753523842
+662 3102  0.001934718172261428
+862 3102 -0.04046801465160255
+3102 3102 -2477.242565758394
+33 3103  4.560258670279392
+263 3103  0.001101692355154358
+463 3103 -0.02052180865954734
+3103 3103 -16322.74803150922
+663 3104  0.0006652306770844735
+863 3104 -0.013917741230942
+3104 3104  227.0111876369949
+4061 3104 -695.4491731006095
+34 3105  5.09241050532172
+264 3105  0.001203469082965028
+464 3105 -0.02241755093136628
+3105 3105 -11661.81866319783
+77 3106  9.553096348683425
+664 3106  0.001967551209388498
+864 3106 -0.04115730664256551
+3106 3106 -1877.352469934645
+35 3107  5.010651825296775
+265 3107  0.001189571893627759
+465 3107 -0.02215869240460923
+3107 3107 -12071.07001909268
+78 3108  8.110850832086975
+665 3108  0.002041158694041396
+865 3108 -0.04269440434322424
+3108 3108 -2058.913294513919
+36 3109  5.242946874727973
+266 3109  0.001235207912293277
+466 3109 -0.0230087751155384
+3109 3109 -10603.54130606578
+79 3110  10.39623748337741
+666 3110  0.00198259809923097
+866 3110 -0.04147286882309242
+3110 3110 -1669.969527065347
+37 3111  5.144271915952205
+267 3111  0.001213907246388981
+467 3111 -0.02261199800075029
+3111 3111 -11318.43109210416
+80 3112  9.643129255484244
+667 3112  0.001984600471653515
+867 3112 -0.04151398331184361
+3112 3112 -1822.079812004944
+38 3113  5.301254453954041
+268 3113  0.001248064342159848
+468 3113 -0.02324824654993116
+3113 3113 -10195.65103652299
+81 3114  9.910899138700378
+668 3114  0.002041914114836727
+868 3114 -0.04271285292823011
+3114 3114 -1642.48231480523
+269 3115  0.0002455808494513976
+3115 3115 -0.004574455857323951
+4087 3115 -1395.812439078742
+4092 3115 -1319.125546000257
+669 3116  0.0002175189436278616
+869 3116 -0.004549463760823265
+3116 3116 -1453.855216625808
+4094 3116 -1422.759825760848
+270 3117  0.0002583346492311023
+470 3117 -0.004812246867113213
+3117 3117 -1426.386550071892
+4100 3117 -1051.60330096319
+670 3118  0.0002195538691403081
+870 3118 -0.004591087669232101
+3118 3118 -1416.901510606822
+4098 3118 -1285.913026873232
+271 3119  0.0002662424078920341
+471 3119 -0.004960285432200295
+3119 3119 -1395.069189494207
+4103 3119 -1116.146860423355
+671 3120  0.0002056555622572706
+871 3120 -0.004300400914613589
+3120 3120 -1704.53913758438
+4109 3120 -1328.68635440853
+272 3121  8.403976013422934e-05
+3121 3121 -0.001566447111419446
+3772 3121 -293.3131463113477
+4104 3121 -273.2060828791963
+4111 3121 -239.1182110177501
+672 3122  5.456576583106384e-05
+3122 3122 -0.001141007317637787
+3774 3122 -313.4339444900776
+4106 3122 -374.8291340849071
+4113 3122 -324.8318137854403
+273 3123  0.0002445586324395708
+473 3123 -0.004555614778039736
+3123 3123 -1425.284446253262
+4108 3123 -1251.592013646793
+673 3124  0.0002265618216088804
+873 3124 -0.004737768664129753
+3124 3124 -1495.038796569758
+3850 3124 -1232.001050341556
+274 3125  0.0002479871234677247
+474 3125 -0.004615638198090777
+3125 3125 -1440.733981857976
+3996 3125 -1288.009326238795
+674 3126  0.0002149306512683066
+874 3126 -0.004495757699438218
+3126 3126 -1567.10699891207
+3998 3126 -1384.784060943572
+275 3127  7.669511520069878e-05
+3127 3127 -0.00142856435158415
+3371 3127 -264.9193537345231
+3543 3127 -338.2005407012112
+4008 3127 -272.0493857089738
+675 3128  6.434492811419656e-05
+3128 3128 -0.00134479006259534
+3373 3128 -307.2139916977106
+3545 3128 -310.2635904187793
+4010 3128 -316.8133947953895
+276 3129  6.873824789383105e-05
+3129 3129 -0.001281196436983179
+3660 3129 -493.1627696346783
+3904 3129 -298.3132764183199
+4116 3129 -221.2635879075791
+676 3130  5.150654637627023e-05
+876 3130 -0.001077116562284023
+3130 3130 -456.729207514713
+3662 3130 -399.3472933098862
+4118 3130 -272.8671228494698
+277 3131  7.621293902825032e-05
+3131 3131 -0.001420343791668607
+3776 3131 -298.238748377585
+4084 3131 -340.5820452017388
+4115 3131 -237.9626783531122
+677 3132  5.617823458389736e-05
+3132 3132 -0.001174808882995873
+3778 3132 -300.1393535459937
+4086 3132 -337.8295892423019
+4117 3132 -374.8764564904459
+278 3133  3.384974683660592e-05
+3133 3133 -0.0006305375141651542
+3428 3133 -101.7537554994466
+4095 3133 -128.0685014752367
+4119 3133 -123.7987667072477
+4123 3133 -78.70049630917389
+678 3134  2.404372801287854e-05
+878 3134 -0.0005027950584220524
+3134 3134 -212.1907537540941
+3430 3134 -149.0690573659064
+4097 3134 -92.26802542553942
+4121 3134 -72.56570109333919
+279 3135  2.096412860650296e-05
+3135 3135 -0.0003901485068014212
+3435 3135 -80.63995640005464
+4052 3135 -95.10938060848929
+4128 3135 -25.80608281993659
+4132 3135 -27.74274388077859
+4135 3135 -77.09008222118388
+679 3136  1.717879208496453e-05
+879 3136 -0.0003593726340029264
+3136 3136 -84.27337155944021
+3437 3136 -80.2193526426998
+4054 3136 -92.01703556881691
+4130 3136 -30.20856579477716
+4134 3136 -28.4454237197186
+39 3137  32.64302008813611
+280 3137  0.001117288914086055
+480 3137 -0.02080039878504665
+3137 3137 -2751.002445075206
+680 3138  0.0006055238470906594
+880 3138 -0.01266857423038254
+3138 3138  154.3592785305395
+4129 3138 -1444.925587559849
+40 3139  36.65251244868855
+281 3139  0.001054854527421347
+481 3139 -0.0196355765396314
+3139 3139 -2761.360034218368
+83 3140  111.6726634543674
+681 3140  0.00066417680046111
+881 3140 -0.0138956857388481
+3140 3140 -1698.599903687457
+282 3141  0.0002196191015981265
+482 3141 -0.004090434994215512
+3141 3141 -2057.918514698858
+3780 3141 -972.8769469454667
+682 3142  0.000254445809205194
+882 3142 -0.005321133682834458
+3142 3142 -1428.191459747777
+3582 3142 -949.4160584467323
+283 3143  0.0002467932626555914
+483 3143 -0.004593389096329082
+3143 3143 -1280.586129651733
+3999 3143 -1443.098497561728
+683 3144  0.0002125568346125282
+883 3144 -0.004446072187239606
+3144 3144 -1565.415007126418
+4037 3144 -1396.852787898307
+284 3145  7.450142895904204e-05
+3145 3145 -0.001386907437178993
+3672 3145 -306.5243546068534
+3835 3145 -253.7591971467399
+3972 3145 -343.6950631335396
+684 3146  6.56850785054022e-05
+3146 3146 -0.001373362866427219
+3674 3146 -289.7082847871828
+3837 3146 -430.325567156862
+3974 3146 -242.1780927811974
+285 3147  2.829020688581102e-05
+3147 3147 -0.0005283436953610114
+3464 3147 -127.9110281261581
+3907 3147 -138.2283842007703
+4140 3147 -106.3422401541953
+4144 3147 -166.413571525405
+685 3148  2.157615448738946e-05
+885 3148 -0.0004513182899802171
+3148 3148 -166.2264057254376
+3466 3148 -127.8803524612996
+4142 3148 -98.27206692230219
+4146 3148 -218.9673369039903
+286 3149  3.417957292668527e-05
+3149 3149 -0.000638010057411905
+3731 3149 -112.4245790619285
+3752 3149 -119.1012159433853
+4112 3149 -82.4528305498375
+4139 3149 -131.7196097356496
+686 3150  2.58604492209297e-05
+3150 3150 -0.0005408106592783898
+3733 3150 -108.0991094828331
+3754 3150 -108.7083889381549
+4114 3150 -125.4140836404946
+4141 3150 -146.8985247574478
+41 3151  0.2816487510175935
+287 3151  0.003283370210303194
+487 3151 -0.06133865450778468
+3151 3151 -45335.1954409973
+84 3152  637.526528274368
+687 3152  0.0002595670747658185
+887 3152 -0.00543058103861438
+3152 3152 -2504.997784199022
+288 3153  0.0002518348855609722
+488 3153 -0.004690277097936834
+3153 3153 -1429.354425178098
+3620 3153 -1149.893986737135
+688 3154  0.0002022003039771082
+888 3154 -0.004228762010147807
+3154 3154 -1668.568919153097
+4090 3154 -1331.391267344377
+289 3155  0.0002411624189112242
+489 3155 -0.004491600758461845
+3155 3155 -1329.895116223397
+4120 3155 -1278.727802451221
+689 3156  0.0002396620483604543
+889 3156 -0.005012410749634397
+3156 3156 -1346.893561357031
+4122 3156 -1141.695109156205
+290 3157  7.539218264852078e-05
+3157 3157 -0.001404078290862924
+3375 3157 -233.9428403939642
+3827 3157 -345.0634879409996
+3868 3157 -323.5366796927644
+690 3158  5.947442365670711e-05
+3158 3158 -0.001243009510545163
+3377 3158 -349.4178227246429
+3829 3158 -341.395838786864
+3870 3158 -303.9354889996754
+291 3159  1.200272275159141e-05
+491 3159 -0.0002232081554610602
+3159 3159 -39.06108881613515
+3187 3159 -36.74905919231615
+3248 3159 -36.18476976809642
+3692 3159 -33.71929484844811
+3840 3159 -40.0865932868005
+3936 3159 -34.20546235418676
+691 3160  1.034576531382441e-05
+891 3160 -0.0002163685141143933
+3160 3160 -45.27355828678601
+3189 3160 -38.42657784269577
+3694 3160 -33.91929258744634
+3842 3160 -35.70217191761186
+3938 3160 -34.58560243700298
+3990 3160 -38.23756381734963
+292 3161  6.96324664263306e-05
+3161 3161 -0.001296142399366137
+3712 3161 -314.9941849685434
+4039 3161 -284.5560271729748
+4136 3161 -312.8351201515704
+692 3162  5.61734866346371e-05
+3162 3162 -0.001175077666104349
+3714 3162 -327.4234482652935
+4041 3162 -339.9363785417463
+4138 3162 -335.1570659698852
+293 3163  1.522298845113386e-05
+3163 3163 -0.0002836445861316432
+3664 3163 -53.90619683861826
+3980 3163 -50.03970959544274
+4091 3163 -48.07226215433916
+4148 3163 -70.19438117818132
+4152 3163 -78.02760680056846
+693 3164  2.097430373839078e-05
+3164 3164 -0.0004387420733795854
+3666 3164 -94.29553748866022
+3982 3164 -102.3315584238236
+4093 3164 -88.07077665502878
+4150 3164 -22.04467690843927
+4154 3164 -19.15705378024538
+42 3165  4.071309785349727
+294 3165  0.001033902935611
+494 3165 -0.01926436789258804
+3165 3165 -21344.46684810789
+694 3166  0.0005656688027113958
+894 3166 -0.01183475115907131
+3166 3166  351.8137876203253
+4149 3166 -647.7168499720574
+43 3167  4.307158971381956
+295 3167  0.001080160651772333
+495 3167 -0.02012629165347246
+3167 3167 -18576.65367398521
+695 3168  0.0005436438887964516
+895 3168 -0.01137394716537204
+3168 3168  375.2293748346238
+4153 3168 -647.2932388607724
+296 3169  0.0002274052897093193
+496 3169 -0.004229346815530425
+3169 3169 -1474.263314614031
+3832 3169 -1331.327148348726
+696 3170  0.0002205299244709283
+896 3170 -0.004612588747673908
+3170 3170 -1622.883696919502
+3834 3170 -1143.60129135068
+297 3171  8.191795508733727e-05
+3171 3171 -0.001526302574451327
+3335 3171 -285.6892959186181
+4099 3171 -271.5408599549471
+4124 3171 -258.8888628284263
+697 3172  5.567172963433427e-05
+897 3172 -0.001164167913108969
+3172 3172 -447.3631435643646
+3337 3172 -303.8666561211577
+4101 3172 -270.4226421310651
+298 3173  0.0002440296582495396
+3173 3173 -0.004539146102095633
+3448 3173 -1280.777882156608
+3756 3173 -1356.531427998502
+698 3174  0.0002104689560761738
+898 3174 -0.004402141629369785
+3174 3174 -1519.469895890335
+3450 3174 -1348.72810908421
+1099 3175  3.90179844422034
+2777 3175  0.3323231709594552
+3175 3175 -0.6557243053380895
+3177 3175 -0.4467658871986893
+3180 3175  1.206910571484867
+1099 3176 -3.117398006170587
+3175 3176  0.5238452566749743
+3176 3176  1.077927120069085
+3177 3176  0.3569501217106127
+3180 3176 -0.964177128949417
+1099 3177  18.11856811951377
+2778 3177  0.4209469588586773
+3175 3177  0.1239167259828344
+3177 3177 -2.078221001294088
+3180 3177 -0.2280781806545443
+1099 3178 -14.49628017496882
+3175 3178 -0.09896104242681487
+3177 3178  1.662740239879939
+3178 3178  1.362279335536674
+3180 3178  0.182145342635277
+1099 3179 -0.6755533116091292
+3179 3179  12.11450772465217
+3180 3179 -0.2666484393421067
+3549 3179  0.4831540182231924
+3550 3179  0.6965288550619865
+1099 3180  0.748415362690662
+3011 3180  6.707319778353869
+3180 3180  0.2976922456375642
+3549 3180 -0.5351332038396536
+3550 3180 -0.7776202985712402
+1099 3181 -4.388112379693994
+3180 3181  0.04150889810448934
+3181 3181  11.80994163900174
+3549 3181  3.222143654814309
+3550 3181 -0.1084279560868184
+1099 3182  4.793052405651258
+3180 3182 -0.04691994301089037
+3182 3182  6.593989719295108
+3549 3182 -3.519407860941207
+3550 3182  0.1225624806414849
+1099 3183  0.7019805848906145
+1222 3183 -1.34894428731261
+3180 3183  0.2992074794146934
+3183 3183  5.029592871399626
+3912 3183 -0.5004641350120078
+1099 3184 -0.7170782280222106
+1222 3184  1.379371250818873
+3180 3184 -0.3082137931705407
+3184 3184  4.398381781048775
+3912 3184  0.5155283875242916
+1099 3185  4.688513878451054
+1222 3185 -8.08332465039811
+3180 3185 -0.04283097769707251
+3185 3185  6.13394880971154
+3912 3185  0.07164048254012781
+1099 3186 -4.577372090822865
+1222 3186  7.890854922569274
+3180 3186  0.0469964530296306
+3186 3186  5.110489561178684
+3912 3186 -0.07860779169062282
+1099 3187  0.9345988945068904
+1291 3187 -0.9510122522007739
+3180 3187  0.3659411458715927
+3187 3187  5.668506884069838
+3250 3187 -0.378532286868665
+1099 3188 -0.9906143812408313
+1291 3188  1.008248356955873
+3180 3188 -0.3866656521166111
+3188 3188  4.228609907569939
+3250 3188  0.3999698727527718
+1099 3189  5.460780976802373
+1291 3189 -5.89270443011472
+3180 3189 -0.05430488575489788
+3189 3189  7.015232649381273
+3250 3189  0.05617338423090624
+1099 3190 -5.952678106564355
+1291 3190  6.423284179440299
+3180 3190  0.05564749987182292
+3190 3190  5.241250854683242
+3250 3190 -0.0575621944201822
+1101 3191  2.396365901362685
+2781 3191  0.3692929538115763
+3191 3191 -0.7228885291144198
+3193 3191 -0.507839750924587
+3201 3191  1.30916366888589
+1101 3192 -2.005604434346106
+3191 3192  0.6049980674938733
+3192 3192  0.9388524004746962
+3193 3192  0.4250292779567368
+3201 3192 -1.095661997402904
+1101 3193  12.9370165976562
+2782 3193  0.4683919690682322
+3191 3193  0.1174975525597555
+3193 3193 -2.74278713799485
+3201 3193 -0.2127901063566257
+1101 3194 -10.78395981212982
+3191 3194 -0.0978991772419131
+3193 3194  2.286315857699142
+3194 3194  1.225211590363166
+3201 3194  0.1772971086094612
+1101 3195  0.8150861897290412
+3195 3195  4.360425760376224
+3201 3195  0.5107916688939119
+3535 3195 -1.125276871701074
+3537 3195 -0.3265396605978775
+1101 3196 -0.7626219247567129
+3196 3196  6.290838699682876
+3201 3196 -0.477229303927426
+3535 3196  1.051338796050469
+3537 3196  0.3055666271798853
+1101 3197  4.805357363027642
+3197 3197  6.264117240664985
+3201 3197 -0.06890385651214917
+3535 3197  0.1517955769952752
+3537 3197 -2.057848389166554
+1101 3198 -4.635964505191407
+3198 3198  9.150447768001063
+3201 3198  0.06209868527935691
+3535 3198 -0.1368037471308435
+3537 3198  1.985217529071911
+1101 3199 -0.6823995581967828
+3199 3199  4.242853200747289
+3201 3199 -0.4283133356578697
+3388 3199  0.7494701150025187
+3390 3199  0.7074154071969436
+1101 3200  0.7310365971011838
+3200 3200  2.95180477919497
+3201 3200  0.4582406516171209
+3388 3200 -0.8018374523380369
+3390 3200 -0.757770830580615
+1101 3201 -4.068874943070814
+2780 3201  5.964960897183323
+3201 3201  0.06118773248632485
+3388 3201 -0.1070673572020186
+3390 3201  4.025719241708225
+1101 3202  4.453586165850771
+3201 3202 -0.06384302758694906
+3202 3202  4.178709433689302
+3388 3202  0.1117136386944541
+3390 3202 -4.406458593664035
+1104 3203 -2.198584419259259
+2785 3203  0.3412920430536406
+3203 3203 -0.6792687245591004
+3205 3203  0.4418790781503563
+3207 3203  1.330583469556757
+1104 3204  1.842120980065284
+3203 3204  0.5690765366396284
+3204 3204  0.8655873783036971
+3205 3204 -0.3702357374009728
+3207 3204 -1.114733838300572
+1104 3205 -9.91214250431854
+2786 3205  0.4321994354326509
+3203 3205  0.1324561697400607
+3205 3205  1.988830230508456
+3207 3205 -0.2594613641476355
+1104 3206  8.287501645404546
+3203 3206 -0.1105483719365449
+3205 3206 -1.662852877071636
+3206 3206  1.118500047554219
+3207 3206  0.2165473412317848
+1146 3207 -2.361641607385089
+2869 3207  8.041398985395846
+3205 3207  0.08312708337804624
+3207 3207  0.3437697039398471
+3493 3207 -1.586867364738098
+1146 3208  2.713143028094987
+3205 3208 -0.09553894960349026
+3207 3208 -0.3969711646061925
+3208 3208  4.357695569147362
+3493 3208  1.832449394568724
+1146 3209 -16.60084760747767
+3205 3209  0.5226757781751344
+3207 3209 -0.04965019816243767
+3209 3209  7.857253594901273
+3493 3209  0.2291891292739927
+1146 3210  19.25433639519875
+3205 3210 -0.6062893774873906
+3207 3210  0.05175900233798363
+3210 3210  5.635075703794728
+3493 3210 -0.2389235313648264
+1162 3211  1.309709728249127
+3205 3211 -0.1224846354050169
+3207 3211 -0.5852368430454002
+3211 3211  6.55614516809574
+3585 3211  1.262649342413203
+1162 3212 -1.31124216056868
+3205 3212  0.1225743034582106
+3207 3212  0.5883155616478667
+3212 3212  6.271467987853487
+3585 3212 -1.269291682288203
+1162 3213  7.353509965567679
+3205 3213 -0.8243769016067968
+3207 3213  0.08053041805249524
+3213 3213  8.881186521865489
+3585 3213 -0.1737444943984078
+1162 3214 -7.879175491863245
+3205 3214  0.8830238932831251
+3207 3214 -0.07227285678601035
+3214 3214  9.472269231422434
+3585 3214  0.1559287939226689
+1105 3215  4.67810007016215
+1106 3215 -1.631607376128249
+2789 3215  0.2531411838153556
+3215 3215  0.6335410282029971
+3217 3215 -0.3767309422255076
+1105 3216 -3.733990098942753
+1106 3216  1.302327940379904
+3215 3216 -0.5055179512196509
+3216 3216  0.8258525634872588
+3217 3216  0.3006028743159249
+1105 3217  21.96944745588132
+1106 3217 -7.704273779899018
+2790 3217  0.3202584181695263
+3215 3217 -0.1182378866958986
+3217 3217  0.07030936984151577
+1105 3218 -17.53117282921072
+1106 3218  6.147850850098586
+3215 3218  0.09380808290631094
+3217 3218 -0.05578235013745535
+3218 3218  1.066054919609428
+1105 3219 -1.783496057488212
+3215 3219 -0.3154842751742941
+3219 3219  4.739072521175232
+3687 3219  0.5301531225102935
+3689 3219  0.536638352929684
+1105 3220  1.769775574269363
+3215 3220  0.3125038943229862
+3220 3220  4.852275384021054
+3687 3220 -0.5251447644432619
+3689 3220 -0.5324820321255284
+1105 3221 -11.04480381088002
+3215 3221  0.04682645461498169
+3221 3221  5.724533092062521
+3687 3221 -0.07868915532003624
+3689 3221  3.241025990367159
+1105 3222  11.09041831683827
+3215 3222 -0.04577764559625989
+3222 3222  5.867844621528932
+3687 3222  0.07692669227529296
+3689 3222 -3.254430202003916
+1105 3223 -1.508511143996298
+1199 3223  1.20832718522049
+3215 3223 -0.2710351994059734
+3223 3223  3.734090074267024
+3789 3223  0.3534179161138158
+1105 3224  1.454256781217253
+1199 3224 -1.164701975884189
+3215 3224  0.2607673136961148
+3224 3224  4.438993814771638
+3789 3224 -0.3400290471461453
+1105 3225 -9.134818941626229
+1199 3225  6.923454910311763
+3215 3225  0.03931244733723651
+3225 3225  4.994979193430934
+3789 3225 -0.05126169311480969
+1105 3226  8.960689765455296
+1199 3226 -6.791708696781908
+3215 3226 -0.03642270094644444
+3226 3226  6.307778305821778
+3789 3226  0.04749359159231554
+1105 3227 -1.075929273318226
+1246 3227  1.1553111591236
+3215 3227 -0.1808814651699105
+3227 3227  5.750116599788575
+3230 3227  0.5273830925356003
+1105 3228  1.197241948885333
+1246 3228 -1.285241695049613
+3215 3228  0.2018352389125933
+3228 3228  3.183226568638103
+3230 3228 -0.5884765052096235
+1105 3229 -6.350942033760408
+1246 3229  7.303265994649156
+3215 3229  0.02556035874667221
+3229 3229  7.594103734758398
+3230 3229 -0.07452450160925486
+1105 3230  6.86425229593742
+1246 3230 -7.893949514842467
+3070 3230  4.089469755570528
+3215 3230 -0.02967706389761363
+3230 3230  0.08652728305245758
+1107 3231  2.142516119134805
+1108 3231 -1.573116856290268
+2793 3231  0.2843149864254252
+3231 3231  0.8685875283864275
+3233 3231 -0.5277615543631613
+1107 3232 -1.645419760215324
+1108 3232  1.208147660144721
+3231 3232 -0.6665887169130639
+3232 3232  1.118284064744516
+3233 3232  0.405025269027891
+1107 3233  13.4599818919095
+1108 3233 -10.04545533443596
+2794 3233  0.3591198466383587
+3231 3233 -0.1206085521049
+3233 3233  0.0732828354634789
+1107 3234 -10.37596843446376
+1108 3234  7.743774096250811
+3231 3234  0.09140412991465983
+3233 3234 -0.05553796721970868
+3234 3234  1.453282910153127
+1107 3235 -0.5705865186070875
+1222 3235  1.364159900297422
+3231 3235 -0.2641123399251132
+3235 3235  7.210736930309491
+3912 3235  0.5174114692252539
+1107 3236  0.5798036087807967
+1222 3236 -1.382288592585748
+3231 3236  0.264840773439715
+3236 3236  6.944610352658391
+3912 3236 -0.51883851294131
+1107 3237 -3.927743397133805
+1222 3237  7.688776076853612
+3231 3237  0.03852583269225138
+3237 3237  8.661644082850422
+3912 3237 -0.07547435194461619
+1107 3238  4.354207390518346
+1222 3238 -8.526509371438566
+3231 3238 -0.03462807079041883
+3238 3238  8.754829522616353
+3912 3238  0.06783840917537932
+1107 3239  0.83228047449342
+1229 3239 -2.191374628381395
+3231 3239  0.3232358427975743
+3239 3239  2.12239641260848
+3241 3239 -0.4783889678510902
+1107 3240 -0.7224565093335877
+1229 3240  1.902211567258754
+3231 3240 -0.2805720782281593
+3240 3240  4.506874015672992
+3241 3240  0.4152466067801181
+1107 3241  4.624502996452096
+1229 3241 -12.23234957660924
+3036 3241  3.613689390608025
+3231 3241 -0.0432552053525782
+3241 3241  0.06401769328460785
+1107 3242 -4.029548223983352
+1229 3242  10.65862220841107
+3231 3242  0.03751584297835035
+3241 3242 -0.05552343838215974
+3242 3242  7.607025929898225
+1107 3243 -1.049370725032446
+3231 3243 -0.4391440793017472
+3243 3243  8.994693294053674
+3984 3243  0.329511624503851
+3988 3243  0.6149993551148858
+1107 3244  1.078904268217582
+3231 3244  0.4521140880195338
+3244 3244  7.772528439443829
+3984 3244 -0.3387833259526468
+3988 3244 -0.6331632046878009
+1107 3245 -7.445261240507539
+3231 3245  0.06421136233811769
+3245 3245  8.659934351403086
+3984 3245  2.346704499747039
+3988 3245 -0.08992480666432051
+1107 3246  7.684265898844557
+3231 3246 -0.06661321196104331
+3246 3246  7.209258592532089
+3984 3246 -2.422035549841484
+3988 3246  0.09328847712876323
+1107 3247 -0.8572328018230927
+1291 3247  1.072482363899002
+3231 3247 -0.3568635586597714
+3247 3247  5.061913797901139
+3250 3247  0.4323552440504631
+1107 3248  0.8384291813617976
+1291 3248 -1.048942876587671
+3231 3248  0.3488833124621628
+3248 3248  5.717200439130998
+3250 3248 -0.4226868393937711
+1107 3249 -5.317465612935973
+1291 3249  6.515090289325624
+3231 3249  0.04742106470294021
+3249 3249  7.356150780840059
+3250 3249 -0.05745261880975507
+1107 3250  5.25881232878616
+1291 3250 -6.443255723032384
+3160 3250  8.260010647845204
+3231 3250 -0.04590890985801144
+3250 3250  0.05562057947383459
+1109 3251  1.939877143225896
+1110 3251 -1.676593698201225
+2797 3251  0.3142463672992255
+3251 3251  0.8308549968215676
+3253 3251 -0.4438807412776823
+1109 3252 -1.510326401442479
+1110 3252  1.305342318696957
+3251 3252 -0.6468743896711402
+3252 3252  1.17048730065587
+3253 3252  0.3455898859598944
+1109 3253  12.05955198419484
+1110 3253 -10.42375234850035
+2798 3253  0.3942430026751412
+3251 3253 -0.1180094345852181
+3253 3253  0.06304603751777908
+1109 3254 -9.357216344456917
+1110 3254  8.087970926761525
+3251 3254  0.09155689421138136
+3253 3254 -0.04891388055328518
+3254 3254  1.503944833656077
+1109 3255  3.125336311573782
+2799 3255  0.3570471483125166
+3251 3255  1.178287817096909
+3255 3255 -0.7560405827089444
+3257 3255 -0.4116790582017967
+1109 3256 -2.417889075233369
+3251 3256 -0.9115398593145428
+3255 3256  0.5848835203070897
+3256 3256  1.356706280601356
+3257 3256  0.318491909188027
+1109 3257  17.05859243350098
+2800 3257  0.4518359008190032
+3251 3257 -0.1896378964682771
+3255 3257  0.1216799016922899
+3257 3257 -2.248550885624917
+1109 3258 -13.15767006303645
+3251 3258  0.1461665781785075
+3255 3258 -0.0937868179023721
+3257 3258  1.734357075949347
+3258 3258  1.756999795333312
+1109 3259 -0.8274104651385586
+1113 3259  0.4961924573567638
+3251 3259 -0.3808984499831934
+3259 3259  9.205270068346719
+3283 3259  0.4341815958622278
+1109 3260  0.9005554601895391
+1113 3260 -0.5405511595669753
+3251 3260  0.4177027925543588
+3260 3260  5.935651367845557
+3283 3260 -0.47613442657843
+1109 3261 -5.271819109375664
+1113 3261  2.593920292764252
+3251 3261  0.05765705741137306
+3261 3261  11.81516875226901
+3283 3261 -0.06572259141693702
+1109 3262  6.213237251680835
+1113 3262 -3.058452409713802
+3251 3262 -0.05614325502160191
+3262 3262  8.333025717575424
+3283 3262  0.06399702614503872
+1109 3263 -0.4348953511661833
+1191 3263  1.175900677297633
+3251 3263 -0.1870913236157566
+3263 3263  9.624813023079948
+3737 3263  0.8837282746556666
+1109 3264  0.4484995047183518
+1191 3264 -1.21299435510975
+3251 3264  0.1922808728830558
+3264 3264  7.866093118131661
+3737 3264 -0.9082411774006912
+1109 3265 -2.730241540721851
+1191 3265  8.191331821483582
+3251 3265  0.02414079055417204
+3265 3265  13.00730635191752
+3737 3265 -0.1140293348347746
+1109 3266  2.709099303685861
+1191 3266 -8.128733597512404
+3251 3266 -0.02648423136389328
+3266 3266  9.954437737461609
+3737 3266  0.1250986076557095
+1109 3267  0.6948742967493817
+1256 3267 -0.832500880855937
+3251 3267  0.2900679375737975
+3267 3267  5.922623483236039
+3615 3267 -0.6676477309983397
+1109 3268 -0.6744651058693761
+1256 3268  0.8076647819842718
+3251 3268 -0.2829982094483208
+3268 3268  6.921218030938292
+3615 3268  0.6513753777654052
+1109 3269  4.127509889624091
+1256 3269 -5.568800014444435
+3251 3269 -0.04144109370586249
+3269 3269  7.134515633503723
+3615 3269  0.0953847309503813
+1109 3270 -4.187820272924031
+1256 3270  5.649414375316669
+3251 3270  0.03752795575477595
+3270 3270  8.985892277429645
+3615 3270 -0.08637788346499958
+1113 3271 -0.07766092191744503
+3271 3271  6.622390702502909
+3280 3271  2.234351633778104
+3282 3271  0.4647968875266144
+3283 3271 -1.277768992783638
+1113 3272  0.07749102351241138
+3272 3272  6.697001214688135
+3280 3272 -2.216197503914771
+3282 3272 -0.461579540307102
+3283 3272  1.267387106656292
+1113 3273 -5.536155174462214
+3273 3273  8.632966165362591
+3280 3273 -0.05370982180576693
+3282 3273  8.078759484940766
+3283 3273  0.03071528396597896
+1113 3274  10.08058940955139
+3274 3274  13.63902685108462
+3280 3274  0.02389497166849069
+3282 3274 -14.71609250303336
+3283 3274 -0.01366492785641506
+1219 3275  1.186931963583773
+3275 3275  5.191487889306021
+3280 3275 -0.7145749449009308
+3282 3275 -0.3518552487923934
+3909 3275  0.4852466322371797
+1219 3276 -1.163100359430245
+3276 3276  5.937171454568121
+3280 3276  0.7042110547013974
+3282 3276  0.3454461800752524
+3909 3276 -0.4782088220647328
+1219 3277  4.538624497527303
+3277 3277  7.480402966450393
+3280 3277  0.1018194078112303
+3282 3277 -2.973502502756232
+3909 3277 -0.06914253723748093
+1219 3278 -5.879825080146367
+3278 3278  13.75844861280676
+3280 3278 -0.06006624651158884
+3282 3278  3.831383234547688
+3909 3278  0.04078920488167646
+1247 3279  1.809931157886173
+3279 3279  4.055752454004177
+3280 3279 -0.3263007639770462
+3282 3279 -0.3153990132372302
+3700 3279  1.094320306063321
+1247 3280 -1.609166188869075
+3071 3280  7.525178403866417
+3280 3280  0.2841185889321567
+3282 3280  0.2786502814775124
+3700 3280 -0.9528532431520396
+1247 3281  17.72193962278147
+3280 3281  0.03520668996055829
+3281 3281  6.053226700604847
+3282 3281 -1.846682900076352
+3700 3281 -0.1180732624206324
+1247 3282 -12.79869678055506
+3072 3282  9.011062142512486
+3280 3282 -0.04170737899807132
+3282 3282  1.330625120766269
+3700 3282  0.1398747315022498
+1113 3283  1.411694129026913
+1114 3283 -1.953828666208752
+2805 3283  0.2442413628704792
+3283 3283  1.209832173694697
+3285 3283 -0.4729524263951293
+1113 3284 -1.181461144083559
+1114 3284  1.635191861604235
+3283 3284 -1.011959526480044
+3284 3284  0.6191405042986501
+3285 3284  0.3955992606815731
+1113 3285  7.598376411100657
+1114 3285 -10.63017727165662
+2806 3285  0.3086859033408764
+3283 3285 -0.1965692531354175
+3285 3285  0.07684363769328444
+1113 3286 -6.356847459918975
+1114 3286  8.893256781914742
+3283 3286  0.16260052281627
+3285 3286 -0.0635644459381669
+3286 3286  0.8138879164921292
+1115 3287  0.8291504643489213
+2809 3287  0.5026016922761118
+3287 3287 -0.8406516622461359
+3289 3287 -1.764966808023946
+3295 3287  0.7040247465778748
+1115 3288 -0.6273802630392492
+3287 3288  0.6360757700302417
+3288 3288  2.179761213796664
+3289 3288  1.335469727403376
+3295 3288 -0.5326975522814722
+1115 3289  6.060870599331241
+2810 3289  0.6377435166524491
+3287 3289  0.1008972634833432
+3289 3289 -12.90519241544406
+3295 3289 -0.08449893522421127
+1115 3290 -4.565490514889956
+3287 3290 -0.07598026692793559
+3289 3290  9.721133715560624
+3290 3290  2.854904550605542
+3295 3290  0.0636315736602887
+1115 3291  1.087917687407729
+2811 3291  0.421945735946015
+3291 3291 -0.6811758959436226
+3293 3291 -1.92561216315536
+3295 3291  0.7089846504761526
+1115 3292 -0.8229776380829138
+3291 3292  0.5152845777429752
+3292 3292  1.823278915801148
+3293 3292  1.456668801268975
+3295 3292 -0.5363208804398027
+1115 3293  6.108025865977322
+2812 3293  0.5340480266846045
+3291 3293  0.106599044606048
+3293 3293 -10.81324405966066
+3295 3293 -0.110950911256798
+1115 3294 -4.599499980584284
+3291 3294 -0.08025582932800379
+3293 3294  8.142649836749401
+3294 3294  2.382559920737088
+3295 3294  0.08353224393820573
+1115 3295  1.03698725022598
+1118 3295 -3.079285624558723
+2813 3295  0.3915301941686539
+3295 3295  0.6046143966065896
+3297 3295 -0.5574485806251291
+1115 3296 -0.7835532341187602
+1118 3296  2.326725060942811
+3295 3296 -0.4568430531993541
+3296 3296  1.698731768620023
+3297 3296  0.4212048422990773
+1115 3297  5.206454265535194
+1118 3297 -15.46450544545504
+2814 3297  0.496336357506843
+3295 3297 -0.105707984352315
+3297 3297  0.09746173125990555
+1115 3298 -3.916354844917197
+1118 3298  11.63257903923041
+3295 3298  0.07949206863020782
+3297 3298 -0.07329091248500011
+3298 3298  2.222986339792984
+1115 3299  1.203896198092496
+2815 3299  0.5049950972208146
+3295 3299  0.8317530171779519
+3299 3299 -1.050055987727851
+3301 3299 -0.8504213801855677
+1115 3300 -0.9058789578950869
+3295 3300 -0.62581245429748
+3299 3300  0.7900639988771302
+3300 3300  2.226759967985491
+3301 3300  0.6399047897114686
+1115 3301  7.161691500726718
+2816 3301  0.6396465178955674
+3295 3301 -0.1227103143041787
+3299 3301  0.1549170217960287
+3301 3301 -5.066758450128532
+1115 3302 -5.367583074737629
+3295 3302  0.09182130440361659
+3299 3302 -0.1159208424841469
+3301 3302  3.797461282443998
+3302 3302  2.912810030122295
+1115 3303  0.3657690249134106
+3295 3303  0.2917249086810749
+3303 3303  5.283005872263223
+3304 3303 -1.272612899536508
+3306 3303 -0.9824708043480003
+1115 3304 -0.3073032073380307
+2807 3304  13.4005488845067
+3295 3304 -0.2448716520171612
+3304 3304  1.068221512166234
+3306 3304  0.8254367475625018
+1115 3305  2.364021463492015
+3295 3305 -0.03508775876381117
+3304 3305  0.1530658956087559
+3305 3305  9.289251639392099
+3306 3305 -5.749376920340797
+1115 3306 -1.924123739016748
+2808 3306  21.65848410135114
+3295 3306  0.0315126719245443
+3304 3306 -0.1374700328859463
+3306 3306  4.679067459779453
+1115 3307 -0.3247977981682151
+3295 3307 -0.2357224145100838
+3307 3307  15.68617080585754
+3680 3307  1.009668228367086
+3681 3307  0.7382929550608727
+1115 3308  0.3847781687309847
+3295 3308  0.2784275257111113
+3308 3308  6.452286730756979
+3680 3308 -1.192586743172628
+3681 3308 -0.8747461134231366
+1115 3309 -2.071401609337344
+3295 3309  0.03219168634554583
+3309 3309  19.59521280325052
+3680 3309 -0.1378864330242373
+3681 3309  4.971628818787869
+1115 3310  2.413229394356441
+3295 3310 -0.03947913814761612
+3310 3310  7.602708839683275
+3680 3310  0.1691007261817176
+3681 3310 -5.79221202090099
+1120 3311  2.135100351227087
+2819 3311  0.3539340117715401
+3311 3311 -0.8339300311071588
+3313 3311 -0.5068532103322505
+3321 3311  1.177129754067613
+1120 3312 -1.698663037724611
+3311 3312  0.663454125795566
+3312 3312  1.158013025593776
+3313 3312  0.4032470041467199
+3321 3312 -0.9364953446946004
+1120 3313  10.46247277613657
+2820 3313  0.448509772635577
+3311 3313  0.1494134127611411
+3313 3313 -2.484458992001519
+3321 3313 -0.210903753621177
+1120 3314 -8.287409620934962
+3311 3314 -0.1183130930611414
+3313 3314  1.967960133569293
+3314 3314  1.513344914714974
+3321 3314  0.1670042532862615
+1120 3315  2.343665408910121
+2821 3315  0.3171963274323468
+3315 3315 -0.7103363652127415
+3317 3315 -0.6385059697814306
+3321 3315  1.102777759352953
+1120 3316 -1.861641814867798
+3315 3316  0.5642245864034647
+3316 3316  1.041618026710652
+3317 3316  0.5071839300131559
+3321 3316 -0.8759432230102847
+1120 3317  9.800756234215104
+2822 3317  0.4020378235142212
+3315 3317  0.1491074223715117
+3317 3317 -2.671248407940532
+3321 3317 -0.2314851909580943
+1120 3318 -7.751411372933291
+3315 3318 -0.1178752338080321
+3317 3318  2.112688522497416
+3318 3318  1.361520831023814
+3321 3318  0.1829980732903851
+1120 3319 -0.3615308982581444
+1125 3319  2.348697674884534
+3319 3319  6.886702449628651
+3321 3319 -0.2469155357709296
+3339 3319  0.417439106543153
+1120 3320  0.3604726785054282
+1125 3320 -2.341822696956453
+3320 3320  6.785053675814972
+3321 3320  0.2462070999198854
+3339 3320 -0.4162414142724755
+1120 3321 -2.03036538771662
+1125 3321  13.1085584275136
+2818 3321  11.49160779515473
+3321 3321  0.0332412618584253
+3339 3321 -0.0561981756523469
+1120 3322  2.026270799843273
+1125 3322 -13.08212862081111
+3321 3322 -0.03297048082370924
+3322 3322  11.46100381309955
+3339 3322  0.05574038917549447
+1120 3323  0.3927812825996569
+1256 3323 -0.9162532415237825
+3321 3323  0.2859457102140647
+3323 3323  9.171935901419351
+3615 3323 -0.6868141551464079
+1120 3324 -0.3918426939236055
+1256 3324  0.9140951370430707
+3321 3324 -0.2856483693432067
+3324 3324  9.324470431849685
+3615 3324  0.6860999709089316
+1120 3325  2.59903491355898
+1256 3325 -5.934397339882138
+3321 3325 -0.03967668007685861
+3325 3325  10.94559077853698
+3615 3325  0.09529957797094311
+1120 3326 -2.553508280303047
+1256 3326  5.830423114127847
+3321 3326  0.03987931942944642
+3326 3326  11.49437533928763
+3615 3326 -0.09578629824956811
+1123 3327  2.943851431944534
+2825 3327  0.36929603544498
+3327 3327 -0.6263787815684199
+3329 3327 -0.4953533182334247
+3333 3327  1.180123281848412
+1123 3328 -2.470752260998025
+3327 3328  0.5257061452404348
+3328 3328  0.9237967396282526
+3329 3328  0.4157462993400855
+3333 3328 -0.9904519113108774
+1123 3329  13.89790436354166
+2826 3329  0.4664105082637809
+3327 3329  0.1167052455721185
+3329 3329 -2.339205561498814
+3333 3329 -0.2198774630721576
+1123 3330 -11.59313057596244
+3327 3330 -0.09732280277379812
+3329 3330  1.951280910683752
+3330 3330  1.216417974845579
+3333 3330  0.1833601469074586
+1123 3331 -0.570678276525063
+1162 3331  0.8523919363373601
+3331 3331  4.703906103995145
+3333 3331 -0.3406461438400099
+3585 3331  0.8386535165213083
+1123 3332  0.5736363845700033
+1162 3332 -0.856404463517094
+3332 3332  4.522013621012862
+3333 3332  0.3416270693871428
+3585 3332 -0.8410685054311342
+1123 3333 -3.671971040472746
+1162 3333  4.779511398382989
+2824 3333  7.204897543934027
+3333 3333  0.044945398609309
+3585 3333 -0.1106532901568742
+1123 3334  3.920428096681185
+1162 3334 -5.103865209233088
+3333 3334 -0.04175041492380615
+3334 3334  7.245181781754158
+3585 3334  0.1027874024856677
+1123 3335  0.6120165062645778
+1297 3335 -1.067537541383138
+3333 3335  0.2932964264633682
+3335 3335  4.656667378127493
+4126 3335 -0.6882189757870322
+1123 3336 -0.6120093889121652
+1297 3336  1.068092189082941
+3333 3336 -0.2928143935251802
+3336 3336  4.819142815554258
+4126 3336  0.6870878872871973
+1123 3337  3.103677962152558
+1297 3337 -6.271879177065279
+3333 3337 -0.04107484230154301
+3337 3337  7.745248535399567
+4126 3337  0.09638196496373758
+1123 3338 -3.348978833033903
+1297 3338  6.765893252663498
+3333 3338  0.03819605890891807
+3338 3338  7.871753304018089
+4126 3338 -0.08962691042088074
+1125 3339  4.915187326484495
+1126 3339 -1.601102874759421
+2829 3339  0.2262443261916516
+3339 3339  0.9158563211506956
+3341 3339 -0.2927325904424944
+1125 3340 -3.892710312405375
+1126 3340  1.268035068691731
+3339 3340 -0.7253102290040822
+3340 3340  0.7359627483813387
+3341 3340  0.2318288767653417
+1125 3341  31.10618111938879
+1126 3341 -10.14088311419212
+2830 3341  0.2859810800059127
+3339 3341 -0.1272016430323864
+3341 3341  0.04065710484656371
+1125 3342 -24.49836116910814
+1126 3342  7.98667681451962
+3339 3342  0.1000961984697644
+3341 3342 -0.03199346752849337
+3342 3342  0.9683564394347679
+1125 3343 -1.038181523195801
+1191 3343  0.8434979015723048
+3339 3343 -0.2028592580467722
+3343 3343  7.677703978905812
+3737 3343  0.5914570612494344
+1125 3344  1.045469772476363
+1191 3344 -0.8494202444993569
+3339 3344  0.2043567543949261
+3344 3344  7.302330267651148
+3737 3344 -0.5958231660939404
+1125 3345 -6.405420033831049
+1191 3345  5.038247575538431
+3339 3345  0.02578232528300831
+3345 3345  12.35150271313844
+3737 3345 -0.07517102493073879
+1125 3346  6.57456188741837
+1191 3346 -5.17134230247954
+3339 3346 -0.02562846525512418
+3346 3346  11.42641600066333
+3737 3346  0.07472243017192762
+1125 3347 -1.475513352129115
+1256 3347  0.5292255309373513
+3339 3347 -0.265292539862559
+3347 3347  8.778768198726569
+3615 3347  0.3769085362086894
+1125 3348  1.483623786920446
+1256 3348 -0.5321225312697891
+3339 3348  0.2663833810837147
+3348 3348  8.759577361623631
+3615 3348 -0.3784583248613025
+1125 3349 -9.211238697688215
+1256 3349  3.257536454796313
+3339 3349  0.03969926645685068
+3349 3349  10.32704712479219
+3615 3349 -0.05640185893113407
+1125 3350  9.387198805594265
+1256 3350 -3.319769019166089
+3339 3350 -0.03983694023450458
+3350 3350  9.866368768324078
+3615 3350  0.05659745592016639
+1127 3351  3.674495501266763
+1128 3351 -1.79658977637768
+2833 3351  0.2087528761419341
+3351 3351  0.8024124670559588
+3353 3351 -0.3890405552638334
+1127 3352 -3.071721810536903
+1128 3352  1.501892209348857
+3351 3352 -0.6702504154277392
+3352 3352  0.5348328095576971
+3353 3352  0.3249632881958178
+1127 3353  15.40703792650803
+1128 3353 -7.62281482571927
+2834 3353  0.2638545151755176
+3351 3353 -0.1672629016238211
+3353 3353  0.08109551483106915
+1127 3354 -12.91543154405716
+1128 3354  6.390044080050587
+3351 3354  0.1384521427653982
+3353 3354 -0.06712694619083193
+3354 3354  0.6902095968345779
+1127 3355  0.7456668279970676
+1222 3355 -1.317805230846083
+3351 3355  0.2713456332689875
+3355 3355  5.321843079599814
+3912 3355 -0.5210644672440067
+1127 3356 -0.8026522732803606
+1222 3356  1.420202184002578
+3351 3356 -0.2943648723795997
+3356 3356  3.588470025723119
+3912 3356  0.5652682652525904
+1127 3357  5.42541555256465
+1222 3357 -8.373828345005908
+3351 3357 -0.03534560477449201
+3357 3357  6.747469381550355
+3912 3357  0.06787409290268773
+1127 3358 -5.499147013641394
+1222 3358  8.486264343097229
+3351 3358  0.04133866598755555
+3358 3358  4.390097869341906
+3912 3358 -0.07938255615129304
+1127 3359 -0.7996212717110843
+3351 3359 -0.2905604220304306
+3359 3359  4.558938647755557
+3360 3359  0.8486339951838073
+3362 3359  0.7729194107635493
+1127 3360  0.8518603849189976
+3053 3360  3.212877484813512
+3351 3360  0.3082806228234461
+3360 3360 -0.9003890301240479
+3362 3360 -0.8228886130706252
+1127 3361 -5.319291135044536
+3351 3361  0.03965502824588246
+3360 3361 -0.1158196454089177
+3361 3361  6.103161381610501
+3362 3361  4.532886615050296
+1127 3362  5.987378019002024
+3054 3362  4.451290718579245
+3351 3362 -0.03922286341907703
+3360 3362  0.114557430269681
+3362 3362 -5.102978103686564
+1129 3363  1.299439778554684
+1130 3363 -1.66026707237404
+2837 3363  0.4651682718810348
+3363 3363  0.7226460726801913
+3365 3363 -0.550866066704825
+1129 3364 -1.042770960437875
+1130 3364  1.332326685534424
+3363 3364 -0.5799017536618772
+3364 3364  1.479621537099887
+3365 3364  0.4420534618421999
+1129 3365  7.102528425181987
+1130 3365 -9.076322024257928
+2838 3365  0.5856260944274582
+3363 3365 -0.1165066754950316
+3365 3365  0.08881190461157644
+1129 3366 -5.662448967730832
+1130 3366  7.236044290225851
+3363 3366  0.0928673422230218
+3365 3366 -0.07079187097217753
+3366 3366  1.94736180188326
+1129 3367  1.541080820735221
+2839 3367  0.4764456266019807
+3363 3367  1.013213317405777
+3367 3367 -0.8713042567736969
+3369 3367 -0.6495975312525708
+1129 3368 -1.227391380982957
+3363 3368 -0.8068876077794779
+3367 3368  0.6938762009132229
+3368 3368  1.552787641592646
+3369 3368  0.517371091673868
+1129 3369  9.939184679247292
+2840 3369  0.602732034666853
+3363 3369 -0.1379058232777914
+3367 3369  0.1185909509790812
+3369 3369 -4.19974615976149
+1129 3370 -7.872866521897349
+3363 3370  0.108959701522947
+3367 3370 -0.09369897742442948
+3369 3370  3.32663494902817
+3370 3370  2.053730505034701
+1129 3371  0.3879503154264504
+1275 3371 -1.448830170961889
+3363 3371  0.2698019009628373
+3371 3371  6.478855428097081
+3374 3371 -0.7320220831026034
+1129 3372 -0.3704848889591102
+1275 3372  1.383965071862861
+3363 3372 -0.2582339488597439
+3372 3372  8.326168299313496
+3374 3372  0.7006361055927417
+1129 3373  2.623574290306916
+1275 3373 -8.986270417976883
+3363 3373 -0.03441373334906914
+3373 3373  9.231892983713553
+3374 3373  0.09337077568253691
+1129 3374 -2.402169826049755
+1275 3374  8.227311149593612
+2836 3374  11.87929266291417
+3363 3374  0.0344323188052018
+3374 3374 -0.09342120143663683
+1129 3375  0.4769040583265569
+1290 3375 -1.478717303034038
+3363 3375  0.3049120194105165
+3375 3375  6.316750882719969
+3869 3375 -0.5115751971165835
+1129 3376 -0.4546264934490209
+1290 3376  1.409679754203769
+3363 3376 -0.290608095678543
+3376 3376  8.096580967059195
+3869 3376  0.4875763642175987
+1129 3377  2.673553644466764
+1290 3377 -8.636336925830612
+3363 3377 -0.0381463241923688
+3377 3377  11.53750339202983
+3869 3377  0.06400112844259723
+1129 3378 -2.601348954389372
+1290 3378  8.40294784283585
+3363 3378  0.03561931390732355
+3378 3378  14.8685191357142
+3869 3378 -0.05976136187915728
+1132 3379  5.296182902592481
+2843 3379  0.3022564890631526
+3379 3379 -0.6655581949704933
+3381 3379 -0.4107380900624172
+3385 3379  0.9950752022868599
+1132 3380 -4.429008880591571
+3379 3380  0.556417909169059
+3380 3380  0.773323986147318
+3381 3380  0.3434864457301403
+3385 3380 -0.8318997011928906
+1132 3381  31.57511354793287
+2844 3381  0.3823095244806657
+3379 3381  0.09780942999031755
+3381 3381 -2.46437663093789
+3385 3381 -0.1462347531270238
+1132 3382 -26.39354744654073
+3379 3382 -0.081216619435513
+3381 3382  2.059964500670208
+3382 3382  1.001843241790515
+3385 3382  0.1214268633825941
+1132 3383 -1.001383316318819
+1152 3383  2.237203602157426
+3383 3383  2.896696576781846
+3385 3383 -0.2179951469697534
+3510 3383  0.3736626954944669
+1132 3384  1.001723462128635
+1152 3384 -2.235862100853107
+3384 3384  2.894015425891116
+3385 3384  0.2168261119599326
+3510 3384 -0.371658867524124
+1132 3385 -6.619137619636859
+1152 3385  12.86842858727532
+2842 3385  3.759419384143553
+3385 3385  0.03071718917257135
+3510 3385 -0.05265194140229647
+1132 3386  7.035896378794858
+1152 3386 -13.68121522319965
+3385 3386 -0.02828487750009587
+3386 3386  3.893502727153444
+3510 3386  0.04848274704887379
+1132 3387 -1.624449345440718
+3385 3387 -0.3001546575377701
+3387 3387  3.97695531013616
+3388 3387  0.6028365532304726
+3390 3387  0.5396384156967375
+1132 3388  1.726617122079207
+3047 3388  2.822747401879994
+3385 3388  0.319559566955514
+3388 3388 -0.6418097572616985
+3390 3388 -0.5734959694043438
+1132 3389 -9.45015732593915
+3385 3389  0.04181506051016628
+3388 3389 -0.08398219490530567
+3389 3389  5.568673568414256
+3390 3389  3.329588287063118
+1132 3390  9.780829511014637
+3048 3390  3.896863634483198
+3385 3390 -0.04598524577927123
+3388 3390  0.09235767751344538
+3390 3390 -3.446229419844843
+2847 3391  0.5157826632315755
+3391 3391 -1.022533948777946
+3393 3391 -0.6272414912186035
+3397 3391  0.9789296151027944
+3402 3391  1.295303116128931
+3391 3392  0.8214096444475624
+3392 3392  1.638248669248236
+3393 3392  0.5038949949918273
+3397 3392 -0.7864238954796325
+3402 3392 -1.040527283561455
+2848 3393  0.652651776418919
+3391 3393  0.1590351780865364
+3393 3393 -3.546001048357712
+3397 3393  5.52827321690606
+3402 3393 -0.2014590928700254
+3391 3394 -0.1271985280646792
+3393 3394  2.839337127425348
+3394 3394  2.128348439022877
+3397 3394 -4.426572750783254
+3402 3394  0.1611297600105126
+1136 3395 -2.504819998846709
+2849 3395  0.3627133183774772
+3395 3395 -0.5966459314393776
+3397 3395  0.8060051230938539
+3402 3395  0.9236744260690454
+1136 3396  1.999577413517036
+3395 3396  0.4760705063655813
+3396 3396  1.176739949016042
+3397 3396 -0.6434239789652723
+3402 3396 -0.7370102242628088
+1136 3397 -12.33236152279313
+2850 3397  0.4585964515198062
+3395 3397  0.1069710232300935
+3397 3397  3.935792415055033
+3402 3397 -0.1656030709028884
+1136 3398  9.843630982732011
+3395 3398 -0.0846438706881978
+3397 3398 -3.141533746260626
+3398 3398  1.530457443318091
+3402 3398  0.1310381493586493
+1156 3399 -2.83581958505017
+3397 3399  0.1787660967132971
+3399 3399  7.085454722674737
+3402 3399  0.2884753453660877
+3539 3399 -0.6725972680232485
+1156 3400  2.730179802550922
+3397 3400 -0.1720286735101973
+3400 3400  8.942328533639676
+3402 3400 -0.2781466555799013
+3539 3400  0.6485153191009497
+1156 3401 -16.46922548582782
+3397 3401  1.195222696531314
+3401 3401  11.40560520752418
+3402 3401 -0.03571433811432929
+3539 3401  0.08327008401522945
+1156 3402  14.8446566737469
+2846 3402  13.99685203813836
+3397 3402 -1.077545589280008
+3402 3402  0.03713931211835794
+3539 3402 -0.08659249488156374
+1181 3403 -1.245236453990957
+3397 3403  0.227902895301987
+3402 3403  0.3675964442608564
+3403 3403  8.850553534693345
+3404 3403 -0.7881853230597796
+1181 3404  1.186629605393843
+2845 3404  11.26655140607853
+3397 3404 -0.2169526291382712
+3402 3404 -0.3531403690853147
+3404 3404  0.7571891954848207
+1181 3405 -8.143818476815005
+3397 3405  1.651751732869493
+3402 3405 -0.04937881125291942
+3404 3405  0.1058760358195188
+3405 3405  10.32999844507644
+1181 3406  7.392567965078647
+3397 3406 -1.499487001255235
+3402 3406  0.05012946220403072
+3404 3406 -0.1074855510138141
+3406 3406  13.03167033082069
+1137 3407  3.764148086597181
+1138 3407 -1.150203221975869
+2853 3407  0.2986025758467496
+3407 3407  0.8149415052122866
+3409 3407 -0.423007195636838
+1137 3408 -3.132558998302706
+1138 3408  0.9572098200987793
+3407 3408 -0.6781849347733043
+3408 3408  0.7825661744590023
+3409 3408  0.3520217163400922
+1137 3409  21.66729182498108
+1138 3409 -6.624244633334891
+2854 3409  0.3782636212847815
+3407 3409 -0.1243103797855209
+3409 3409  0.06452510371026647
+1137 3410 -18.12681824855953
+1138 3410  5.541831397222044
+3407 3410  0.1039416562056201
+3409 3410 -0.05395242262195776
+3410 3410  0.9606419660085704
+1137 3411 -0.9698287367725562
+3407 3411 -0.2383349916533942
+3411 3411  7.20471030819053
+3412 3411  0.508985204526428
+3682 3411  0.8647226323217699
+1137 3412  1.032890475327049
+2943 3412  5.057824214743202
+3407 3412  0.254906136411951
+3412 3412 -0.5443743324327373
+3682 3412 -0.920980246976824
+1137 3413 -7.297572934511336
+3407 3413  0.03694910669518697
+3412 3413 -0.07890804660218982
+3413 3413  5.593819250509369
+3682 3413  6.441034596124477
+1137 3414  7.818158304523587
+3407 3414 -0.0393496559091599
+3412 3414  0.08403462925030836
+3414 3414  3.896453929673696
+3682 3414 -6.900463469725019
+1137 3415  0.9047099666389939
+1257 3415 -1.25500744830349
+3407 3415  0.2261950636286986
+3415 3415  4.811679126595192
+3567 3415 -0.4783279591317424
+1137 3416 -0.9050341677071546
+1257 3416  1.255737278168746
+3407 3416 -0.2275356005844802
+3416 3416  4.79846785897801
+3567 3416  0.4811627526763632
+1137 3417  6.45187659773848
+1257 3417 -8.635276008329065
+3407 3417 -0.03205345150204408
+3417 3417  4.846962465184034
+3567 3417  0.06778247851274402
+1137 3418 -6.340117021540678
+1257 3418  8.485710540012704
+3407 3418  0.03282777232623683
+3418 3418  4.857233958126034
+3567 3418 -0.06941991168041609
+1139 3419 -1.335544408289586
+1198 3419  0.8076695160259592
+3419 3419  3.187196022756801
+3424 3419 -0.3462185500007934
+3782 3419  0.5428701937830359
+1139 3420  1.240138484038131
+1198 3420 -0.7500806792626978
+3420 3420  4.580602587418016
+3424 3420  0.3224747859897226
+3782 3420 -0.5056400055975701
+1139 3421 -8.252392785519895
+1198 3421  4.667678323593734
+3421 3421  4.014843773669729
+3424 3421  0.05123776962395828
+3782 3421 -0.08034074986653671
+1139 3422  7.855616270644008
+1198 3422 -4.443468566171155
+3422 3422  6.030018439318698
+3424 3422 -0.04575369741188583
+3782 3422  0.0717417324410374
+1139 3423 -0.8646099843727195
+1261 3423  2.069258919988845
+3423 3423  2.361516391014415
+3424 3423 -0.2320144445773898
+4065 3423  0.7029356993730441
+1139 3424  0.7236931569492924
+1261 3424 -1.73190689006448
+3099 3424  5.52794280827098
+3424 3424  0.1931415244916508
+4065 3424 -0.5851621559330497
+1139 3425 -6.168162236656806
+1261 3425  14.65077728050171
+3424 3425  0.03546099465982042
+3425 3425  2.100726724748749
+4065 3425 -0.1074364103798295
+1139 3426  5.42738844602399
+1261 3426 -12.89108953321492
+3424 3426 -0.03079932722685834
+3426 3426  3.64070794816464
+4065 3426  0.09331292568385514
+1139 3427 -1.017234363558991
+1278 3427  1.112187399499515
+3424 3427 -0.2607314758298161
+3427 3427  3.949015286514313
+4125 3427  0.553519904683873
+1139 3428  0.9696341561279327
+1278 3428 -1.060130544389497
+3424 3428  0.2485615317211312
+3428 3428  4.837100171165655
+4125 3428 -0.5276837209948572
+1139 3429 -5.52169483921999
+1278 3429  5.806648304333962
+3424 3429  0.03391211355038503
+3429 3429  7.554236674680917
+4125 3429 -0.0719937238121951
+1139 3430  5.270632236356677
+1278 3430 -5.542727459691219
+3424 3430 -0.03113925486204759
+3430 3430  10.1404814049545
+4125 3430  0.06610708326760587
+1140 3431  2.259667890544613
+3431 3431  3.718703165886996
+3438 3431  0.5513261219039329
+3447 3431 -0.6945907046787592
+3449 3431 -0.1988363546578532
+1140 3432 -2.270634095598604
+3432 3432  3.694102619211036
+3438 3432 -0.5523151632588987
+3447 3432  0.6958367528966991
+3449 3432  0.1998547861173985
+1140 3433  14.10655573046575
+3433 3433  4.625084390822101
+3438 3433 -0.07581389194665446
+3447 3433  0.09551447417331302
+3449 3433 -1.328711904185037
+1140 3434 -14.60105738105317
+3434 3434  4.692341990336677
+3438 3434  0.07308587747533948
+3447 3434 -0.09207757282087774
+3449 3434  1.375232541596462
+1140 3435  1.274050971060097
+1279 3435 -0.9008717883866754
+3435 3435  3.873556911241002
+3436 3435 -0.477692739132276
+3438 3435  0.3379217843189056
+1140 3436 -1.450543140636408
+1279 3436  1.026223447218297
+2857 3436  1.970970231250026
+3436 3436  0.5464386534076658
+3438 3436 -0.3865529233619078
+1140 3437  8.932423077614509
+1279 3437 -5.728897213689582
+3436 3437  0.06242579865530338
+3437 3437  5.058522467030047
+3438 3437 -0.04416026357748661
+1140 3438 -9.766438076143141
+1279 3438  6.263257813414933
+2858 3438  2.455950934716126
+3436 3438 -0.07560356709805922
+3438 3438  0.05348227050940345
+1141 3439 -2.430429941692298
+3439 3439  3.057040031444601
+3440 3439  0.4919493884250233
+3441 3439  0.3890546571390462
+3446 3439 -0.5049927482856362
+1141 3440  2.430336628269451
+2941 3440  3.040085219142655
+3440 3440 -0.4938069079659762
+3441 3440 -0.3891594472236065
+3446 3440  0.5068995173965466
+1141 3441 -15.87534314164886
+2860 3441  4.189017705158125
+3440 3441 -0.06846566225879751
+3441 3441  2.174900388529984
+3446 3441  0.07028093491071742
+1141 3442  16.87912030479092
+3440 3442  0.06253236404377792
+3441 3442 -2.312987185873944
+3442 3442  4.482752371805137
+3446 3442 -0.06419032347283443
+1141 3443 -1.646756874307934
+1230 3443  1.503112217723057
+2859 3443  2.768986183801089
+3443 3443  0.3346190606095454
+3446 3443 -0.2880222797088077
+1141 3444  1.633130299287086
+1230 3444 -1.491993548294496
+3443 3444 -0.3298503080348287
+3444 3444  2.918144951839834
+3446 3444  0.2839175912746367
+1141 3445 -9.784170471621888
+1230 3445  11.06068010791183
+3443 3445 -0.04214323686986587
+3445 3445  3.912969993275015
+3446 3445  0.03627465553054866
+1141 3446  9.000659918292111
+1230 3446 -10.17957716286965
+3038 3446  3.513507003847669
+3443 3446  0.04770925997819617
+3446 3446 -0.04106559201113318
+1298 3447  2.244902451590447
+2861 3447  4.104329447502788
+3447 3447 -0.6262760807964288
+3449 3447 -0.1942141594406955
+3755 3447  0.5580597443798457
+1298 3448 -2.239071302275198
+3447 3448  0.6259992432861905
+3448 3448  4.123483094099871
+3449 3448  0.1937305440085906
+3755 3448 -0.5578130610481082
+1298 3449  14.90052319731221
+2862 3449  5.140739895609729
+3447 3449  0.08541380566675676
+3449 3449 -1.236746782381144
+3755 3449 -0.07611021403896462
+1298 3450 -14.58766608851529
+3447 3450 -0.08734801081128452
+3449 3450  1.210760367602043
+3450 3450  5.121634542677966
+3755 3450  0.07783373831464936
+1143 3451  0.4635977544092252
+1144 3451 -0.2370748743210307
+3451 3451  6.140079525850513
+3459 3451  0.7904980737004598
+3467 3451 -1.331243999373356
+1143 3452 -0.4445531462354627
+1144 3452  0.2279035771985117
+3452 3452  7.866104370676174
+3459 3452 -0.7569472851527189
+3467 3452  1.274742551217584
+1143 3453  12.40932627072254
+1144 3453 -16.36104222451913
+3453 3453  7.82585977033871
+3459 3453 -0.01589242009037819
+3467 3453  0.02676374501685801
+1143 3454 -17.80142727045091
+1144 3454  23.46700243272763
+3454 3454  13.81936647694512
+3459 3454  0.008839724875075651
+3467 3454 -0.01488660262126709
+1143 3455  1.302765765741359
+1153 3455 -0.836906752491439
+3455 3455  5.11477659652108
+3459 3455  0.389853698082942
+3522 3455 -0.3396256574061067
+1143 3456 -1.381540417492638
+1153 3456  0.8886397238365555
+3456 3456  3.779911922330594
+3459 3456 -0.4123451764514424
+3522 3456  0.3592193746505485
+1143 3457  6.98481666623176
+1153 3457 -5.664043368783884
+3457 3457  7.95121257275057
+3459 3457 -0.05097074908982863
+3522 3457  0.04440377057660186
+1143 3458 -8.259223745110498
+1153 3458  6.694072816259766
+3458 3458  6.108252012876951
+3459 3458  0.04762898411627194
+3522 3458 -0.04149255251807921
+1143 3459  1.291757288849152
+1200 3459 -0.6720863108170413
+2977 3459  5.956658436407072
+3459 3459  0.4010834794380356
+3521 3459 -0.4083493179864217
+1143 3460 -1.364938725708047
+1200 3460  0.7107553380565065
+3459 3460 -0.422257802508621
+3460 3460  4.575171516303893
+3521 3460  0.4299072250755208
+1143 3461  7.534250103970887
+1200 3461 -4.55768979835456
+3459 3461 -0.05298921735679361
+3461 3461  8.593585019339528
+3521 3461  0.05394914494757658
+1143 3462 -8.559591467156073
+1200 3462  5.176873191896383
+3459 3462  0.05159156407864095
+3462 3462  6.61797598861204
+3521 3462 -0.0525261724439103
+1143 3463 -0.7574396769710391
+1285 3463  1.320047522502771
+3459 3463 -0.3527490893724305
+3463 3463  3.940241620211022
+4145 3463  0.5218813327191538
+1143 3464  0.7694060268190259
+1285 3464 -1.327266925701353
+3459 3464  0.3517173794087308
+3464 3464  3.924567541092951
+4145 3464 -0.5203549498394896
+1143 3465 -5.957774276479973
+1285 3465  5.912988175484174
+3459 3465  0.05011610583704667
+3465 3465  5.351680386731817
+4145 3465 -0.0741452235963624
+1143 3466  7.695785971397996
+1285 3466 -7.658943042300214
+3459 3466 -0.03676343318442028
+3466 3466  6.18395224060007
+4145 3466  0.05439035871006938
+1144 3467  1.435114498189556
+1159 3467 -0.7084294249201389
+2895 3467  2.750846739635994
+3467 3467  0.5565316981856262
+3565 3467 -0.455735424632883
+1144 3468 -1.347826856887033
+1159 3468  0.6658399640468106
+3467 3468 -0.5208443815767149
+3468 3468  3.826254096728573
+3565 3468  0.4265116186182513
+1144 3469  8.28258643358139
+1159 3469 -4.70567772590082
+3467 3469 -0.07578302580183896
+3469 3469  3.775614735741489
+3565 3469  0.06205757831290015
+1144 3470 -8.289596312603999
+1159 3470  4.70876580251244
+3467 3470  0.06567306281750092
+3470 3470  5.425768165721768
+3565 3470 -0.05377868191093232
+1144 3471  0.9151036757311578
+1220 3471 -1.19384433734122
+3467 3471  0.3643483277204766
+3471 3471  4.076318621842777
+3902 3471 -0.4310186947198901
+1144 3472 -0.9121228986991278
+1220 3472  1.19038235472501
+3467 3472 -0.3648201588099976
+3472 3472  4.096977356352023
+3902 3472  0.4315768639356127
+1144 3473  5.88454311618508
+1220 3473 -7.176916273976581
+3467 3473 -0.05244155811268235
+3473 3473  5.083916573183807
+3902 3473  0.06203758932618613
+1144 3474 -5.695361538067679
+1220 3474  6.94588192299627
+3467 3474  0.05453932493439867
+3474 3474  5.015211698687797
+3902 3474 -0.06451921651788971
+1145 3475 -0.8709148373434052
+1152 3475  1.89078858943646
+3475 3475  2.042467781185541
+3481 3475 -0.4558445477281668
+3510 3475  0.346250946841912
+1145 3476  0.8161899275134221
+1152 3476 -1.771758321773721
+3476 3476  2.876369892472561
+3481 3476  0.4280106222713313
+3510 3476 -0.3251088204486319
+1145 3477 -5.468004288207832
+1152 3477  12.47290766139074
+3477 3477  2.675607339345396
+3481 3477  0.06127856833043964
+3510 3477 -0.04654604823349623
+1145 3478  5.008647029629411
+1152 3478 -11.42535283239911
+3478 3478  3.742160235640967
+3481 3478 -0.05909584881736413
+3510 3478  0.04488809553479817
+1145 3479 -0.7158770103681851
+1164 3479  1.53391330281752
+3479 3479  1.82335585995627
+3481 3479 -0.374795164020496
+3597 3479  0.9977461831798213
+1145 3480  0.5731872346895394
+1164 3480 -1.227967849405166
+3480 3480  5.527150768491762
+3481 3480  0.2993620588617579
+3597 3480 -0.796934913498078
+1145 3481 -4.380699190373416
+1164 3481  8.887311933312343
+2868 3481  2.343864161786165
+3481 3481  0.05493458803622932
+3597 3481 -0.1462419497352594
+1145 3482  3.606200291493431
+1164 3482 -7.316287484026701
+3481 3482 -0.04272179200188883
+3482 3482  7.053894690456646
+3597 3482  0.1137301358193506
+1146 3483  0.3710219555670884
+2871 3483  4.657866587415518
+3483 3483 -1.868071070429303
+3485 3483 -0.29728149214413
+3493 3483  0.6596703231013034
+1146 3484 -0.2840579403070753
+3483 3484  1.457322172487891
+3484 3484  17.26249952801965
+3485 3484  0.2275648264341223
+3493 3484 -0.5146229196552202
+1146 3485  9.774567204299803
+2872 3485  1.543087745431399
+3483 3485  0.130474703738788
+3485 3485 -7.972830198709099
+3493 3485 -0.04607441940211257
+1146 3486 -7.612750342076007
+3483 3486 -0.1017136559236855
+3485 3486  6.210600761683166
+3486 3486  5.361468130704726
+3493 3486  0.03591805543649509
+1146 3487  0.4116155869533685
+2873 3487  3.930431675339554
+3487 3487 -1.574413538600893
+3489 3487 -0.3482611836492402
+3493 3487  0.6964352973578405
+1146 3488 -0.3163234064086289
+3487 3488  1.227397748220299
+3488 3488  14.44634424441149
+3489 3488  0.2676108406262902
+3493 3488 -0.54293430207528
+1146 3489  10.34202414091307
+2874 3489  1.294093298886285
+3487 3489  0.1158095295205662
+3489 3489 -8.870545870477601
+3493 3489 -0.05122786494849402
+1146 3490 -8.044573973054565
+3487 3490 -0.09014415779328049
+3489 3490  6.900692384581894
+3490 3490  4.513828312547935
+3493 3490  0.03987489423752329
+1146 3491  0.240725896352511
+1149 3491 -1.043634220767106
+2875 3491  3.761329917621492
+3491 3491 -1.314617609442261
+3493 3491  0.4923049138822416
+1146 3492 -0.183873148952859
+1149 3492  0.7969897672034445
+3491 3492  1.026019024575663
+3492 3492  14.04786405689022
+3493 3492 -0.3842289985371204
+1146 3493  7.290551754845374
+1149 3493 -32.25568801983241
+2876 3493  1.245820405796252
+3491 3493  0.07978182835732844
+3493 3493 -0.02987711852991685
+1146 3494 -5.670365588613417
+1149 3494  25.09335121359859
+3491 3494 -0.06212921315277567
+3493 3494  0.02326647438088502
+3494 3494  4.366310261693636
+1146 3495  0.4501210912144902
+2877 3495  3.433153619622813
+3493 3495  0.715978620671633
+3495 3495 -1.527117243615793
+3497 3495 -0.3623546349676862
+1146 3496 -0.3458178949312415
+3493 3496 -0.5575412851129458
+3495 3496  1.189184824715845
+3496 3496  12.6670479033574
+3497 3496  0.278365585734024
+1146 3497  10.63405779705095
+2878 3497  1.130461556720701
+3493 3497 -0.05602966115093699
+3495 3497  0.1195061685183861
+3497 3497 -8.671639188939132
+1146 3498 -8.266934236741347
+3493 3498  0.04358377996383721
+3495 3498 -0.0929602365253548
+3497 3498  6.741962214906343
+3498 3498  3.956994704656382
+1146 3499  0.160517360987844
+1151 3499 -0.998072574886444
+2879 3499  4.033761579760155
+3493 3499  0.3935095664336485
+3499 3499 -2.315948119036116
+1146 3500 -0.1213287753319718
+1151 3500  0.7540610774154664
+3493 3500 -0.3068323659627205
+3499 3500  1.805821005194248
+3500 3500  15.2850840004798
+1146 3501  5.696313853321982
+1151 3501 -36.48745359925508
+3493 3501 -0.01947378168904091
+3499 3501  0.1146103472959883
+3501 3501  1.545695419693732
+1146 3502 -4.640371833668683
+1151 3502  29.73823906985804
+3493 3502  0.01588572902323566
+3499 3502 -0.09349334143083275
+3502 3502  4.264900917453799
+1146 3503 -1.606503189940934
+3493 3503 -0.9451250300389605
+3503 3503  6.379413846576525
+3621 3503  0.2386303085913993
+3622 3503  0.361952958820202
+1146 3504  1.963053376342537
+3493 3504  1.159453727321497
+3504 3504  2.438324611105513
+3621 3504 -0.2914011576499591
+3622 3504 -0.4440340630930363
+1146 3505 -10.23123479260624
+3493 3505  0.1285136378673143
+3505 3505  6.784715588521387
+3621 3505  1.708753031112981
+3622 3505 -0.0492166538779582
+1146 3506  11.40396313242804
+3493 3506 -0.1618170065455398
+3506 3506  3.247381743706517
+3621 3506 -1.904872805295951
+3622 3506  0.06197078951995561
+1152 3507 -1.329774596250828
+1164 3507  1.309395068074605
+3507 3507  2.227180481236951
+3510 3507 -0.2375980214699439
+3597 3507  0.8327117855650359
+1152 3508  1.136378995889816
+1164 3508 -1.118335669994368
+3508 3508  4.793989873863781
+3510 3508  0.2021244230605367
+3597 3508 -0.7083871666596919
+1152 3509 -8.235239513230283
+1164 3509  7.323453369206338
+3509 3509  2.87720735177041
+3510 3509  0.03516906518592851
+3597 3509 -0.1232573187539455
+1152 3510  7.392636495600143
+1164 3510 -6.574874411640814
+2906 3510  6.185562461268164
+3510 3510 -0.02840317316188208
+3597 3510  0.0995448400328353
+1153 3511  0.4589386576308901
+3511 3511  7.331635289629858
+3522 3511  0.5039010794292205
+3531 3511 -1.221909365821883
+3533 3511 -0.3159937452580066
+1153 3512 -0.390921429635482
+3512 3512  17.87284749444791
+3522 3512 -0.4287950305689273
+3531 3512  1.039784761849569
+3533 3512  0.269234075312335
+1153 3513  8.639460196076747
+3513 3513  10.64580025395756
+3522 3513 -0.01971558690748688
+3531 3513  0.0478083125406707
+3533 3513 -7.316230054006878
+1153 3514 -8.030380636400102
+3514 3514  28.06040217986433
+3522 3514  0.01488552126604759
+3531 3514 -0.03609588983362996
+3533 3514  6.800195871908971
+1153 3515  0.9130715613792872
+1179 3515 -0.8197257413953748
+3515 3515  5.304835054096878
+3516 3515 -0.5584106677342894
+3522 3515  0.2953161655653164
+1153 3516 -0.8271158196402747
+1179 3516  0.7414429026651962
+2883 3516  8.815510423999974
+3516 3516  0.5102169475088717
+3522 3516 -0.2698288575970699
+1153 3517  5.326644585481059
+1179 3517 -5.904518861818641
+3516 3517  0.07802819319119383
+3517 3517  6.323546734518151
+3522 3517 -0.04126530553706707
+1153 3518 -5.221321173922546
+1179 3518  5.785549463415167
+3516 3518 -0.06247185293402092
+3518 3518  12.01425377570906
+3522 3518  0.03303831593886034
+1153 3519 -1.159052407654222
+1200 3519  0.9377921426824671
+3519 3519  3.60741915585239
+3521 3519  0.5044255115610694
+3522 3519 -0.4316172727288611
+1153 3520  1.167217498441967
+1200 3520 -0.9444048128648267
+3520 3520  3.435120286019725
+3521 3520 -0.5084203185048906
+3522 3520  0.4350354735110427
+1153 3521 -7.424743250141112
+1200 3521  5.539440141695474
+2884 3521  5.851730685056556
+3521 3521 -0.0632030828564326
+3522 3521  0.05408041747556818
+1153 3522  7.683291575595472
+1200 3522 -5.732696863714277
+2978 3522  5.945418404127518
+3521 3522  0.06036516740083515
+3522 3522 -0.05165212370154924
+1153 3523 -0.95405564531345
+1210 3523  0.8083482178595198
+3522 3523 -0.3071829453370418
+3523 3523  7.425984975242814
+3865 3523  0.5073715942942281
+1153 3524  0.9758265066742177
+1210 3524 -0.8256124134252635
+3522 3524  0.3154316116230693
+3524 3524  6.227801375022604
+3865 3524 -0.520995849897842
+1153 3525 -5.830059675395175
+1210 3525  6.106888895148331
+3522 3525  0.03634845233500034
+3525 3525  11.33592184861562
+3865 3525 -0.06003644567930714
+1153 3526  5.406317175231911
+1210 3526 -5.665416980231268
+3522 3526 -0.04190662314365428
+3526 3526  9.103384773113829
+3865 3526  0.06921683159380579
+1153 3527 -1.164584670469045
+1226 3527  0.6235636743364441
+3522 3527 -0.3757659547701289
+3527 3527  6.982525249864085
+3944 3527  0.4009167330545462
+1153 3528  1.194200001945778
+1226 3528 -0.638771030322958
+3522 3528  0.3861958328991782
+3528 3528  6.205734509115272
+3944 3528 -0.4120447041029439
+1153 3529 -6.959432015872205
+1226 3529  4.601918715157254
+3522 3529  0.04350500092493118
+3529 3529  11.50083919055273
+3944 3529 -0.04641687896666508
+1153 3530  6.502617587410443
+1226 3530 -4.301673841163688
+3522 3530 -0.05038154112818095
+3530 3530  9.438888577223668
+3944 3530  0.05375367996741477
+1201 3531  1.404267791315138
+2885 3531  2.017091895439074
+3531 3531 -0.7047014242829946
+3533 3531 -0.7102443269723129
+3812 3531  0.4168827888937753
+1201 3532 -1.26049341189051
+3531 3532  0.6326150413526075
+3532 3532  3.594156286348642
+3533 3532  0.6375281091348801
+3812 3532 -0.3742383847223765
+1201 3533  8.573349645239935
+2886 3533  3.367757951666376
+3531 3533  0.08773798910582717
+3533 3533 -4.291561368908504
+3812 3533 -0.05190348185770162
+1201 3534 -7.620517473966892
+3531 3534 -0.07880583325813648
+3533 3534  3.814598094334272
+3534 3534  6.190391211677775
+3812 3534  0.04661945388172894
+1257 3535  1.582943479566342
+2887 3535  3.178888388583059
+3535 3535 -0.8318326858768657
+3537 3535 -0.2560164048234211
+3567 3535  0.6196319089697585
+1257 3536 -1.467471123130216
+3535 3536  0.7717420244337035
+3536 3536  4.740940585753161
+3537 3536  0.2373539075192858
+3567 3536 -0.5748703939518289
+1257 3537  9.963872683218066
+2888 3537  4.887036067400858
+3535 3537  0.1071151913100275
+3537 3537 -1.53916732182028
+3567 3537 -0.07979007268887017
+1257 3538 -9.158675410300438
+3535 3538 -0.103164844124989
+3537 3538  1.414753666929491
+3538 3538  6.656168722454082
+3567 3538  0.07684746029948268
+1156 3539  1.446175075669395
+1193 3539 -1.408878182521636
+2963 3539  2.807087996816875
+3539 3539  0.377331040401673
+3730 3539 -0.3067285442619483
+1156 3540 -1.34371595571893
+1193 3540  1.31040985134737
+3539 3540 -0.3524048448936835
+3540 3540  4.013858921947923
+3730 3540  0.2864662948217338
+1156 3541  9.606152975975975
+1193 3541 -7.997310816234859
+3539 3541 -0.05101371158298227
+3541 3541  3.977636161674676
+3730 3541  0.04146852449400996
+1156 3542 -8.318029485848061
+1193 3542  6.922985628648402
+3539 3542  0.0519531882185968
+3542 3542  5.491666775418012
+3730 3542 -0.04223221544427847
+1156 3543  1.341321871020527
+1275 3543 -0.6717547568728794
+3374 3543 -0.3540923641672868
+3539 3543  0.3360771639281279
+3543 3543  6.546784158248459
+1156 3544 -1.344209358632625
+1275 3544  0.6732602446776976
+3374 3544  0.3539682057779985
+3539 3544 -0.3359593223602985
+3544 3544  6.511901335193489
+1156 3545  8.638251497123527
+1275 3545 -4.45699083051915
+3374 3545  0.05033110776786022
+3539 3545 -0.0477704058819864
+3545 3545  7.602458701266929
+1156 3546 -8.759308364969019
+1275 3546  4.519424008698003
+3374 3546 -0.04941476276368469
+3539 3546  0.04690068187393769
+3546 3546  7.686660379443556
+1157 3547 -1.058339009138057
+3547 3547  3.23901762142973
+3549 3547  0.5146427113986781
+3550 3547  0.7708782074770815
+3554 3547 -0.5244507842233862
+1157 3548  1.056354509649101
+3548 3548  3.269308251777509
+3549 3548 -0.5135793765110132
+3550 3548 -0.769913863334036
+3554 3548  0.5237947129566466
+1157 3549 -5.697020217678423
+2892 3549  5.798807440562412
+3549 3549  3.063488442035085
+3550 3549 -0.09921702332192101
+3554 3549  0.06750021622713791
+1157 3550  5.434176163677479
+3012 3550  5.61223414210605
+3549 3550 -2.922493795579336
+3550 3550  0.1047517766322093
+3554 3550 -0.07126566929859467
+1157 3551 -0.8311703023259965
+1237 3551  1.098284058681807
+3551 3551  2.722688512430139
+3552 3551  0.6178543307689458
+3554 3551 -0.4599292057141804
+1157 3552  0.8325741162821748
+1237 3552 -1.099720672228146
+3051 3552  2.703936131935484
+3552 3552 -0.6175404035074211
+3554 3552  0.4596955190523657
+1157 3553 -4.937009585402042
+1237 3553  5.821493748851517
+3552 3553 -0.08767549225845923
+3553 3553  4.15614996725906
+3554 3553  0.06526541533640681
+1157 3554  5.222317454495946
+1237 3554 -6.158831607607141
+3052 3554  4.159434126999496
+3552 3554  0.08262454568163802
+3554 3554 -0.06150550344214045
+1158 3555 -1.955210228146363
+3555 3555  3.132140010189771
+3560 3555 -0.5159715744957702
+3757 3555  0.4429005215649865
+3758 3555  0.5043002560591464
+1158 3556  1.946171184010313
+3556 3556  3.155612325232486
+3560 3556  0.5154078088312681
+3757 3556 -0.4406738447969539
+3758 3556 -0.5037492428192337
+1158 3557 -11.53240152096248
+3557 3557  4.081677144998319
+3560 3557  0.07280632131793441
+3757 3557  2.841907482790103
+3758 3557 -0.07115943648492561
+1158 3558  11.05483594732497
+3558 3558  4.024185540800096
+3560 3558 -0.07622870906823133
+3757 3558 -2.724430091994604
+3758 3558  0.07450440954956594
+1158 3559  1.409323205256857
+1232 3559 -0.5518231235725737
+3559 3559  5.363493298492033
+3560 3559  0.4404116309763916
+3562 3559 -0.4013655946326785
+1158 3560 -1.618511483884628
+1232 3560  0.6340347863620098
+2893 3560  2.546878710023711
+3560 3560 -0.5079389600530799
+3562 3560  0.4629060824003016
+1158 3561  9.858875177032653
+1232 3561 -3.512065591551504
+3560 3561 -0.05761232611155925
+3561 3561  6.902063277362459
+3562 3561  0.05250452962986646
+1158 3562 -10.84726586818638
+1232 3562  3.863844809715488
+2894 3562  3.218684483675813
+3560 3562  0.06996802127804672
+3562 3562 -0.06376479295806904
+1159 3563 -0.8140452921845157
+1201 3563  1.434561395231843
+3563 3563  2.759734100154303
+3565 3563 -0.4764363681173263
+3812 3563  0.4317442185526948
+1159 3564  0.7270941854498505
+1201 3564 -1.281336173089582
+3564 3564  4.914858596122737
+3565 3564  0.425337444217761
+3812 3564 -0.3854386330763389
+1159 3565 -4.963811290770392
+1201 3565  9.07977748727345
+2896 3565  4.35180763337338
+3565 3565  0.05983512144077003
+3812 3565 -0.05422228334611339
+1159 3566  4.341750101297229
+1201 3566 -7.942016865501306
+3565 3566 -0.05429826449104555
+3566 3566  7.954416979677193
+3812 3566  0.04920481168154859
+1257 3567 -1.262443950678755
+3091 3567  8.81339838666673
+3567 3567 -0.4844075229863823
+3568 3567  0.6869492137454632
+3574 3567  0.1819988190111484
+1257 3568  1.35575676912055
+2897 3568  6.209091064066214
+3567 3568  0.5172246467921727
+3568 3568 -0.7334879158217498
+3574 3568 -0.1954534815545992
+1257 3569 -10.37821929880745
+3567 3569  0.08534035623263046
+3568 3569 -0.1210230804289373
+3569 3569  4.640556920095166
+3574 3569  1.503671479436259
+1257 3570  11.16247281732313
+3567 3570 -0.09171746472834619
+3568 3570  0.1300666015536589
+3570 3570  3.17852537231288
+3574 3570 -1.617273344051984
+1258 3571 -2.563829187882418
+3568 3571  0.5990913842872198
+3571 3571  4.130734036137791
+3574 3571  0.1483181477839719
+4047 3571 -0.8683349920286506
+1258 3572  2.735594657287679
+3568 3572 -0.6450813979793052
+3572 3572  2.887296553688009
+3574 3572 -0.1580808975787135
+4047 3572  0.9349938344358532
+1258 3573 -17.20403442676472
+3568 3573 -0.08258340338438294
+3573 3573  4.196712062065106
+3574 3573  1.098045296521147
+4047 3573  0.1196980307182942
+1258 3574  17.4862782237909
+2898 3574  2.945059870904722
+3568 3574  0.09355727171015052
+3574 3574 -1.116117521563536
+4047 3574 -0.1356037741743049
+1161 3575 -0.7691880733918308
+1164 3575  1.084345240019609
+3575 3575  2.800615008201704
+3577 3575 -0.3915200345225039
+3597 3575  0.7615674553221563
+1161 3576  0.6268912276704567
+1164 3576 -0.8831818088721408
+3576 3576  8.019342926689088
+3577 3576  0.3156322845396378
+3597 3576 -0.6139539603574344
+1161 3577 -5.531733877787849
+1164 3577  7.397606534928237
+2900 3577  2.551059225694364
+3577 3577  0.0579584615013995
+3597 3577 -0.1127382359726241
+1161 3578  4.552179963964262
+1164 3578 -6.087474676626123
+3577 3578 -0.04478280109740496
+3578 3578  8.080073804395154
+3597 3578  0.08710952407721209
+1161 3579 -1.1983605947858
+2899 3579  7.984203741216811
+3577 3579 -0.5414818109862019
+3579 3579  0.1809697428574308
+3580 3579  0.5556174516419152
+1161 3580  1.188366015048527
+3141 3580  8.030319348297542
+3577 3580  0.5430535951303894
+3579 3580 -0.1794700371248673
+3580 3580 -0.5572302679600371
+1161 3581 -9.698019033406775
+3577 3581  0.1050418924830158
+3579 3581  1.449320632063192
+3580 3581 -0.1077840611317312
+3581 3581  3.626585505548464
+1161 3582  9.673953645810657
+3577 3582 -0.1048558318511552
+3579 3582 -1.445634609268258
+3580 3582  0.1075931432984304
+3582 3582  3.66926937307163
+1162 3583 -0.1874965313805473
+1163 3583 -2.513198853349591
+2903 3583  0.7834252790859237
+3583 3583 -2.017535229907724
+3585 3583  0.9725210198756302
+1162 3584  0.1209829834408293
+1163 3584  1.622962910510173
+3583 3584  1.301823718285398
+3584 3584  4.977760599682532
+3585 3584 -0.6275235799788761
+1162 3585  3.094186181666295
+1163 3585 -22.22398035275692
+2904 3585  19.13304546764732
+3583 3585 -0.02818946822305024
+3585 3585  0.013588288313204
+1162 3586 -3.100342937307653
+1163 3586  22.19095820062608
+3583 3586 -0.05257145459795929
+3585 3586  0.02534124008545374
+3586 3586  8.170584099729947
+1164 3587  0.6351578870039599
+1165 3587 -2.945954324697929
+3587 3587  0.6061321059687221
+3589 3587 -1.12285875271712
+3597 3587  0.4637568782856321
+1164 3588 -0.4937492055719114
+1165 3588  2.290345940174964
+3588 3588  2.317651149747798
+3589 3588  0.8749284659928406
+3597 3588 -0.3613580899024239
+1164 3589  4.32562955086895
+1165 3589 -19.24637451834769
+2908 3589  0.7415300425733925
+3589 3589  0.1535310519658627
+3597 3589 -0.06341054135910112
+1164 3590 -3.280051421621569
+1165 3590  14.59397827582356
+3589 3590 -0.1216448842352145
+3590 3590  2.778013614142899
+3597 3590  0.05024109366902035
+1164 3591  0.08216399590560011
+1166 3591 -2.385134481547733
+2909 3591  1.924848272360943
+3591 3591 -3.296259100425206
+3597 3591  0.5198404247302832
+1164 3592 -0.06363192359750822
+1166 3592  1.86782777672809
+3591 3592  2.604757945218924
+3592 3592  7.14894107859813
+3597 3592 -0.4107864203962353
+1164 3593  5.774615805405687
+1166 3593 -77.15294482380699
+3591 3593  0.06194506283026378
+3593 3593  2.537886768231079
+3597 3593 -0.009769119110653198
+1164 3594 -3.234138467376079
+1166 3594  43.20570292286196
+3591 3594 -0.07659021204936765
+3594 3594  6.924495998814408
+3597 3594  0.01207874962159066
+1164 3595  0.2283230594027073
+1167 3595 -2.457041662433694
+2911 3595  0.9293065890597459
+3595 3595 -2.019730940286236
+3597 3595  0.5553092879536204
+1164 3596 -0.1784848649491971
+1167 3596  1.923107839672399
+3595 3596  1.591079742886095
+3596 3596  3.383146747526674
+3597 3596 -0.4374549805006661
+1164 3597  5.50164767601109
+1167 3597 -50.0814285278305
+2912 3597  1.052700930966815
+3595 3597  0.08806168674949939
+3597 3597 -0.02421187475492601
+1164 3598 -4.008535209606107
+1167 3598  36.48944546192245
+3595 3598 -0.07642904841532995
+3597 3598  0.02101357146535312
+3598 3598  3.451189611415207
+1164 3599  0.1369008468551218
+1168 3599 -2.338296025296659
+2913 3599  1.515772943789459
+3597 3599  0.5186307214601972
+3599 3599 -2.498229903851033
+1164 3600 -0.1066451828453128
+1168 3600  1.829379136090265
+3597 3600 -0.4089115281925863
+3599 3600  1.969715571194801
+3600 3600  5.594076654825956
+1164 3601  5.345308539788538
+1168 3601 -59.77277409880886
+3597 3601 -0.01510224391735361
+3599 3601  0.07274709308264668
+3601 3601  1.886875011526777
+1164 3602 -3.486179794654417
+1168 3602  38.98112179863244
+3597 3602  0.01521306273357519
+3599 3602 -0.07328090426879212
+3602 3602  5.746025815755991
+1164 3603  0.2495784622735603
+1169 3603 -1.994003072051536
+2915 3603  0.9154558602176557
+3597 3603  0.649415218994155
+3603 3603 -1.985602696417218
+1164 3604 -0.1950409950189936
+1169 3604  1.560276963803856
+3597 3604 -0.5116989493650622
+3603 3604  1.564531880214944
+3604 3604  3.328116887610061
+1164 3605  6.482098577577909
+1169 3605 -44.05096156861522
+3597 3605 -0.02666374087039809
+3603 3605  0.08152503085904574
+3605 3605  1.004492569171269
+1164 3606 -4.729287128522186
+1169 3606  32.13935140025399
+3597 3606  0.02305717637758508
+3603 3606 -0.07049787292944913
+3606 3606  3.307664437943429
+1164 3607  0.290360813562731
+1170 3607 -1.69914940163431
+2917 3607  0.8856096204701881
+3597 3607  0.6812081861155168
+3607 3607 -1.973620237480235
+1164 3608 -0.2270292150903642
+1170 3608  1.329845790084345
+3597 3608 -0.5365945143548635
+3607 3608  1.554640438029102
+3608 3608  3.214678878395649
+1164 3609  6.760502321679454
+1170 3609 -34.56956155699346
+3597 3609 -0.03084309267107718
+3607 3609  0.08935968933261426
+3609 3609  0.9675344147546086
+1164 3610 -4.991429629225376
+1170 3610  25.52356491229814
+3597 3610  0.02625275857880516
+3607 3610 -0.07606041248016723
+3610 3610  3.209214983593041
+1164 3611  0.2916736644580288
+1171 3611 -0.665968396826871
+2919 3611  1.437804385943619
+3597 3611  0.7223443830121247
+3611 3611 -3.13745961756631
+1164 3612 -0.227785867490716
+1171 3612  0.5209611043831808
+3597 3612 -0.5685854447490166
+3611 3612  2.469616866953718
+3612 3612  5.286814915057556
+1164 3613  7.136143023604448
+1171 3613 -13.01473834386188
+3597 3613 -0.03084162623578303
+3611 3613  0.1339587586344098
+3613 3613  1.733088886821635
+1164 3614 -5.072850637376669
+1171 3614  9.251460191495257
+3597 3614  0.02765977036275495
+3611 3614 -0.1201385579859136
+3614 3614  5.612374640258077
+1164 3615 -1.244478079620031
+1256 3615  0.5060573535847106
+2905 3615  5.097464053705828
+3597 3615 -0.8389463619039152
+3615 3615  0.3580752890249853
+1164 3616  1.380302330026541
+1256 3616 -0.5613536361877731
+3597 3616  0.9351913725744683
+3615 3616 -0.3991541488639627
+3616 3616  3.060833158311745
+1164 3617 -8.209962281569412
+1256 3617  3.248543549093237
+3597 3617  0.1326396128473568
+3615 3617 -0.05661263921413731
+3617 3617  4.909697213749066
+1164 3618  9.204827106431074
+1256 3618 -3.642181922510498
+3597 3618 -0.1454153226254008
+3615 3618  0.06206550983734454
+3618 3618  2.985129676088162
+1288 3619  1.244376425382915
+3619 3619  3.115298956803423
+3621 3619 -0.4153067634742414
+3622 3619 -0.5844299652988821
+4089 3619  0.6011938633446567
+1288 3620 -1.24310360204769
+3620 3620  3.124616488720384
+3621 3620  0.4148880148577192
+3622 3620  0.5839521479322916
+4089 3620 -0.6007023401756713
+1288 3621  7.434019202353014
+2922 3621  5.489980636102064
+3621 3621 -2.410186223870166
+3622 3621  0.07265746990450016
+4089 3621 -0.07474159031252897
+1288 3622 -7.327623003108759
+3154 3622  5.456336436211689
+3621 3622  2.375670562103454
+3622 3622 -0.07373932525269625
+4089 3622  0.07585447780115982
+1173 3623  0.2557390494241053
+1174 3623 -1.144600450292391
+2925 3623  3.239214549120592
+3623 3623 -1.489946307075277
+3632 3623  0.3551605612820263
+1173 3624 -0.1995643723770443
+1174 3624  0.8931472133700507
+3623 3624  1.172932465922301
+3624 3624  11.46538487377347
+3632 3624 -0.2795935336492826
+1173 3625  7.851143468601221
+1174 3625 -35.44874624987744
+3623 3625  0.1109786463408995
+3625 3625  0.8107453118787733
+3632 3625 -0.02645413337217794
+1173 3626 -6.360533060844858
+1174 3626  28.71959313604403
+3623 3626 -0.08990946696979601
+3626 3626  2.233014253505581
+3632 3626  0.02143184395432529
+1173 3627  0.1570011938827146
+1175 3627 -2.274571006209698
+2927 3627  2.661281271291927
+3627 3627 -1.862462882642613
+3632 3627  0.2921170861291708
+1173 3628 -0.116240689813081
+1175 3628  1.683936323933614
+3627 3628  1.39495808915565
+3628 3628  11.61445266776299
+3632 3628 -0.2187915238870602
+1173 3629  6.250508524222748
+1175 3629 -91.60308946892607
+3627 3629  0.1002259832656134
+3629 3629  0.7394238804556282
+3632 3629 -0.01571989566011668
+1173 3630 -5.161142881361593
+1175 3630  75.64317956330889
+3627 3630 -0.08276202516527353
+3630 3630  1.985197878319526
+3632 3630  0.01298076963505745
+1173 3631  0.3282970589794034
+1176 3631 -0.9769081143049713
+2929 3631  2.838604475646982
+3631 3631 -1.060044755936421
+3632 3631  0.4746237402281227
+1173 3632 -0.271490414221333
+1176 3632  0.8078351046585803
+2923 3632  7.931141454227703
+3631 3632  0.8847241794276495
+3632 3632 -0.3961258208756168
+1173 3633  11.16120855053096
+1176 3633 -33.51914650193375
+3631 3633  0.08068481631873459
+3632 3633 -0.03612576647010336
+3633 3633  0.5836917319355491
+1173 3634 -8.873223095071516
+1176 3634  26.64903944246305
+3631 3634 -0.06414506658049314
+3632 3634  0.02872026982551443
+3634 3634  1.62655302961875
+1173 3635  0.2551706654878987
+1177 3635 -0.9321754366844185
+2931 3635  3.600111623032867
+3632 3635  0.4514386490354154
+3635 3635 -1.23044462799305
+1173 3636 -0.2092439937336671
+1177 3636  0.7643499314882201
+3632 3636 -0.374371261515384
+3635 3636  1.020389168253177
+3636 3636  10.38985995381369
+1173 3637  10.50033560799464
+1177 3637 -38.7926439689844
+3632 3637 -0.02777304520351097
+3635 3637  0.07569842401993218
+3637 3637  0.7675634623859809
+1173 3638 -8.38018467970365
+1177 3638  30.96188122061356
+3632 3638  0.02216543630854258
+3635 3638 -0.06041428240856869
+3638 3638  2.124763369071786
+1173 3639  2.90118294589533
+2989 3639  2.120223669016545
+3632 3639  1.238869862237301
+3639 3639 -0.3843162680834073
+3641 3639 -0.2630108885425641
+1173 3640 -2.396216081757496
+3632 3640 -1.017068720550362
+3639 3640  0.315510181481362
+3640 3640  5.274149732297824
+3641 3640  0.2173437473146551
+1173 3641  17.2984093564214
+2990 3641  2.338015588056324
+3632 3641 -0.189558782352581
+3639 3641  0.05880401649662903
+3641 3641 -1.681171094173677
+1173 3642 -15.269239506628
+3632 3642  0.1551422975518752
+3639 3642 -0.0481274996143231
+3641 3642  1.48390937397938
+3642 3642  4.699502919587786
+1173 3643 -1.862842055174311
+3632 3643 -0.8761486615418773
+3643 3643  4.450891698791136
+3644 3643  0.3852188693949516
+3646 3643  0.2775953024833963
+1173 3644  2.290266277161505
+3087 3644  1.680006856153657
+3632 3644  1.07765260038838
+3644 3644 -0.4738147012535258
+3646 3644 -0.3412498653140608
+1173 3645 -10.98109255814183
+3632 3645  0.1233417700929442
+3644 3645 -0.05423004030019284
+3645 3645  6.066391449410494
+3646 3645  1.447748389810384
+1173 3646  13.31557405400697
+3088 3646  3.482112602331831
+3632 3646 -0.1324205549928908
+3644 3646  0.05822173646792159
+3646 3646 -1.755833205048359
+1178 3647 -1.333010028374286
+1198 3647  0.4276243851783329
+3647 3647  5.382734010251454
+3652 3647 -0.6175792065318458
+3782 3647  0.3482442751733312
+1178 3648  1.161252280274343
+1198 3648 -0.3728502600956505
+3648 3648  11.36834776685953
+3652 3648  0.5343612663363052
+3782 3648 -0.3013188428428641
+1178 3649 -8.372193934777899
+1198 3649  3.153670952857216
+3649 3649  6.930754193190324
+3652 3649  0.07945126249495234
+3782 3649 -0.04480145546761378
+1178 3650  6.800859133664147
+1198 3650 -2.562369962751278
+3650 3650  14.16495761512215
+3652 3650 -0.07522703272150684
+3782 3650  0.04241947139162745
+1178 3651 -0.8125299017211322
+1241 3651  1.357408579856344
+3651 3651  4.03844885715099
+3652 3651 -0.4393579796250129
+3653 3651  0.3530044280053762
+1178 3652  0.8130995252895313
+1241 3652 -1.359305061838616
+3059 3652  3.981648724060497
+3652 3652  0.4424042011090423
+3653 3652 -0.3554519303210625
+1178 3653 -5.553339525120581
+1241 3653  7.799677070497177
+2934 3653  5.126383130506217
+3652 3653  0.06144026122959138
+3653 3653 -0.04936449382429299
+1178 3654  5.955799061503828
+1241 3654 -8.367326703808663
+3652 3654 -0.05571639966019448
+3653 3654  0.04476562781300211
+3654 3654  5.483147129036918
+1179 3655  0.6657203254231561
+3516 3655  1.458939452040597
+3655 3655  5.435793144188034
+3663 3655 -1.713575067511206
+3665 3655 -0.07200114489392796
+1179 3656 -0.620628984403677
+3516 3656 -1.369672770763905
+3656 3656  7.936527709755322
+3663 3656  1.608728249378172
+3665 3656  0.06494821711852092
+1179 3657  11.0089408248834
+3516 3657 -0.03732647943580095
+3657 3657  4.929097726466118
+3663 3657  0.04384124675612543
+3665 3657 -7.044466715389948
+1179 3658 -21.83306996925088
+3516 3658  0.01230778581599272
+3658 3658  13.13678521227168
+3663 3658 -0.01445592199255047
+3665 3658  13.9640986807489
+1179 3659 -0.6535981302102936
+1276 3659  1.0954096953943
+3516 3659 -0.4069131581499621
+3659 3659  5.673414870400691
+3906 3659  0.3962293587228495
+1179 3660  0.654965320821791
+1276 3660 -1.097736986344627
+3516 3660  0.4084263136180201
+3660 3660  5.534920557416696
+3906 3660 -0.3977027851991092
+1179 3661 -4.521169451237201
+1276 3661  7.39116338334737
+3516 3661  0.05514218346051624
+3661 3661  6.562151811292857
+3906 3661 -0.05369438553050169
+1179 3662  4.541421673872437
+1276 3662 -7.424306139552618
+3516 3662 -0.05411446430890252
+3662 3662  6.819550808326079
+3906 3662  0.05269364988891542
+1293 3663  1.054444086425987
+2937 3663  1.663115651008956
+3663 3663 -0.6095818027697771
+3665 3663 -0.5886831418066317
+3979 3663  1.112658784433996
+1293 3664 -0.9011439297224629
+3663 3664  0.5163436862281057
+3664 3664  3.745702162475517
+3665 3664  0.5026533925982484
+3979 3664 -0.9424729144772576
+1293 3665  7.599039601885398
+2938 3665  1.990410506883116
+3663 3665  0.08236427128493604
+3665 3665 -3.725196007278359
+3979 3665 -0.1503380342921753
+1293 3666 -6.282130880745431
+3663 3666 -0.07686249969756757
+3665 3666  3.079189492083107
+3666 3666  3.716088592076674
+3979 3666  0.1402957488124907
+1181 3667 -0.7314697767746239
+1244 3667  1.049384777518011
+3404 3667 -0.5279378228544918
+3667 3667  3.6738422903388
+4005 3667  0.5728450058803592
+1181 3668  0.7341243737421274
+1244 3668 -1.053710576606368
+3404 3668  0.5324544811476037
+3668 3668  3.499243447462259
+4005 3668 -0.5777458579020762
+1181 3669 -4.855239864111756
+1244 3669  5.781563336973328
+3404 3669  0.07447310375085667
+3669 3669  4.866384250937969
+4005 3669 -0.08080789765245955
+1181 3670  5.221813720290219
+1244 3670 -6.220262112848772
+3404 3670 -0.06679631775422318
+3670 3670  5.213544230958012
+4005 3670  0.07247811272512387
+1181 3671 -0.7155603330739858
+1284 3671  0.8548643208929529
+3404 3671 -0.4465921770040591
+3671 3671  4.931596810478233
+3838 3671  0.5627128757731583
+1181 3672  0.7181252765245181
+1284 3672 -0.8599114611795811
+3404 3672  0.4419162630302035
+3672 3672  4.952358887713497
+3838 3672 -0.556821153672813
+1181 3673 -4.616497763711963
+1284 3673  6.615573091843956
+3404 3673  0.05896090325177052
+3673 3673  5.688689200828974
+3838 3673 -0.07429162698182507
+1181 3674  4.299471983766863
+1284 3674 -6.162584239401313
+3404 3674 -0.06482153965237164
+3674 3674  5.360504399026522
+3838 3674  0.08167611720054455
+1211 3675  2.391996372285566
+3440 3675 -0.5594355361631519
+3441 3675 -0.3797072308357856
+3675 3675  2.954431694987449
+3677 3675  0.4828314550248289
+1211 3676 -2.145916049270679
+3440 3676  0.5029955255009219
+3441 3676  0.3406113200706714
+3676 3676  5.231194892529889
+3677 3676 -0.4341198328483751
+1211 3677  13.0725484602826
+2942 3677  4.405147664394409
+3440 3677  0.07604924015919289
+3441 3677 -2.422622246591045
+3677 3677 -0.06563573978769766
+1211 3678 -12.47045602748613
+3440 3678 -0.06227853779439045
+3441 3678  2.310469301341059
+3677 3678  0.05375067380652562
+3678 3678  8.34943451090035
+3412 3679 -0.5603093540142886
+3679 3679  3.597870353883553
+3680 3679  0.6760566940874065
+3681 3679  0.5452258426352274
+3682 3679 -1.009313309363917
+3013 3680  3.508830808080819
+3412 3680  0.5635642781039163
+3680 3680 -0.6799840124585417
+3681 3680 -0.5470217549008489
+3682 3680  1.01257666395277
+2944 3681  3.834698064428319
+3412 3681  0.08560093037836217
+3680 3681 -0.1032841618434341
+3681 3681  3.376506641724802
+3682 3681 -6.434333697729489
+3014 3682  3.919110652589746
+3412 3682 -0.08418167880156238
+3680 3682  0.1015717247366548
+3681 3682 -3.409620496099906
+3682 3682  6.497392945010104
+1199 3683  1.368477623792071
+3683 3683  5.141916652259088
+3687 3683 -0.5222444421836163
+3689 3683 -0.5140805691755161
+3789 3683  0.4052407270632351
+1199 3684 -1.328312311377791
+3684 3684  5.985986065281415
+3687 3684  0.5068312762468304
+3689 3684  0.4990000847384159
+3789 3684 -0.393280728897523
+1199 3685  7.348777917610443
+3685 3685  9.122041227009294
+3687 3685  0.06925873775509218
+3689 3685 -2.845200624001803
+3789 3685 -0.05374200082628659
+1199 3686 -7.188787771767452
+3686 3686  11.18534751112075
+3687 3686 -0.06572612318307269
+3689 3686  2.783228216931563
+3789 3686  0.05100083369846863
+1240 3687 -1.964051061571361
+3057 3687  2.812910582659263
+3687 3687  0.4708669091657322
+3689 3687  0.4457567421222746
+3767 3687 -0.5600389881261721
+1240 3688  1.753538660217751
+3687 3688 -0.4207481970090967
+3688 3688  5.055157513061181
+3689 3688 -0.3979802529569074
+3767 3688  0.5004288683746699
+1240 3689 -13.90759343198216
+3058 3689  2.509630416919152
+3687 3689 -0.07142707696284654
+3689 3689  3.145655382980586
+3767 3689  0.08495383117483674
+1240 3690  12.36730783343644
+3687 3690  0.06335201956550504
+3689 3690 -2.797268987536868
+3690 3690  4.679317810446525
+3767 3690 -0.07534953134862798
+1291 3691  1.168201103632352
+3250 3691  0.4402806087748638
+3687 3691 -0.473560945926676
+3689 3691 -0.4618434163253965
+3691 3691  6.815584747470943
+1291 3692 -1.112629226904043
+3250 3692 -0.4181873107360542
+3687 3692  0.4497976392777369
+3689 3692  0.4399385753362182
+3692 3692  8.94117625255519
+1291 3693  6.948584822479258
+3250 3693 -0.06554270780152668
+3687 3693  0.07049701051212577
+3689 3693 -2.86924462211004
+3693 3693  8.245690219381995
+1291 3694 -6.744836034528775
+3250 3694  0.06074566431115451
+3687 3694 -0.0653373636694603
+3689 3694  2.785061239788074
+3694 3694  10.96265801745419
+1185 3695  0.427839673034417
+1186 3695 -0.1593269903331485
+3695 3695  8.598554966475483
+3702 3695  1.030575023275442
+3707 3695 -1.032621098489623
+1185 3696 -0.4773773394419411
+1186 3696  0.1799753946873774
+3696 3696  4.834616698618166
+3702 3696 -1.145483220005211
+3707 3696  1.147757430782475
+1185 3697  7.594442537507458
+1186 3697 -11.15066330216473
+3697 3697  9.351725708391308
+3702 3697 -0.02747275796180583
+3707 3697  0.02752730161739742
+1185 3698 -15.25141034328379
+1186 3698  22.3842856817472
+3698 3698  8.062081654679709
+3702 3698  0.01404202717338338
+3707 3698 -0.01406990582666666
+1185 3699  0.3935586339260975
+1247 3699 -2.744556354611642
+3699 3699  2.402848964234089
+3700 3699 -1.20025775658178
+3702 3699  0.2717664094081579
+1185 3700 -0.4755757198066598
+1247 3700  3.317198260843825
+2947 3700  0.9505763099739064
+3700 3700  1.453850177649487
+3702 3700 -0.3291856606887862
+1185 3701  2.737883920038361
+1247 3701 -17.82514147813323
+3700 3701  0.1525851828618167
+3701 3701  3.241458825572762
+3702 3701 -0.0345488517344297
+1185 3702 -3.207071041908369
+1247 3702  20.87891349987074
+2948 3702  1.251333503471155
+3700 3702 -0.1919032731754043
+3702 3702  0.04345138635311043
+1186 3703 -0.9577254771843337
+1248 3703  0.9482549598362122
+3703 3703  3.660576083522866
+3707 3703 -0.4083268764891531
+4035 3703  0.4304029276676956
+1186 3704  0.9613848499477831
+1248 3704 -0.951589473510047
+3704 3704  3.616822557551397
+3707 3704  0.4085753213662854
+4035 3704 -0.4306648046310764
+1186 3705 -5.868347071872142
+1248 3705  5.385061985342238
+3705 3705  4.813024972990116
+3707 3705  0.05989119208712813
+4035 3705 -0.06312918864769614
+1186 3706  6.083446015993633
+1248 3706 -5.582795958856527
+3706 3706  4.888571989846326
+3707 3706 -0.05737989255880681
+4035 3706  0.06048211658000992
+1186 3707  1.385816766949729
+3125 3707  1.803796874291739
+3707 3707  0.5500829381246285
+3709 3707 -0.5956130198385742
+3995 3707 -0.6107864907888784
+1186 3708 -1.243512052379119
+3707 3708 -0.4922431501793627
+3708 3708  3.21889629143859
+3709 3708  0.5345682106594648
+3995 3708  0.5465638824173062
+1186 3709  8.036162305989681
+3126 3709  2.274731948814601
+3707 3709 -0.0811542494651884
+3709 3709 -3.668557345336211
+3995 3709  0.09010990126768369
+1186 3710 -7.386558767037341
+3707 3710  0.070036883103212
+3709 3710  3.371880224457544
+3710 3710  4.206946386074862
+3995 3710 -0.07776569511906935
+1186 3711 -1.161465841778596
+1292 3711  0.6074290077697537
+3707 3711 -0.4948905454048655
+3711 3711  3.693870991143675
+4137 3711  0.4137426206676649
+1186 3712  1.216256331347602
+1292 3712 -0.6360979627977104
+3707 3712  0.5176954413573776
+3712 3712  2.924614144859001
+4137 3712 -0.432808164560258
+1186 3713 -7.2681528859035
+1292 3713  3.925788159606755
+3707 3713  0.06634446170697339
+3713 3713  5.000900261527375
+4137 3713 -0.05546586352942447
+1186 3714  7.502813973266923
+1292 3714 -4.052578948064181
+3707 3714 -0.0706577625688625
+3714 3714  3.930409653834767
+4137 3714  0.05907190615621608
+1187 3715  0.1264070600600939
+1188 3715 -0.3212481593266459
+2953 3715  8.262976513892538
+3715 3715 -5.202909164506705
+3721 3715  0.3582392973900931
+1187 3716 -0.1073922996838256
+1188 3716  0.2724361971567971
+3715 3716  4.684918794633313
+3716 3716  19.53069803414619
+3721 3716 -0.3225737686846976
+1187 3717  7.447124334255668
+1188 3717 -20.05891812971564
+3715 3717  0.2259382495226564
+3717 3717  1.564794215233169
+3721 3717 -0.01555667362303796
+1187 3718 -5.738141943326152
+1188 3718  15.48272469413396
+3715 3718 -0.1741000527436083
+3718 3718  5.368197347599874
+3721 3718  0.01198742445782478
+1187 3719  0.2372873765635557
+1189 3719 -1.124307960005156
+2955 3719  6.75928451641217
+3719 3719 -0.8670555746636077
+3721 3719  0.3692986362671331
+1187 3720 -0.2078695435133901
+1189 3720  0.9846771888007256
+3719 3720  0.7764968852232117
+3720 3720  14.69789381330568
+3721 3720 -0.3307276363338796
+1187 3721  8.201087086647963
+1189 3721 -39.72433181103246
+2956 3721  0.9886929177097209
+3719 3721  0.07324325135033663
+3721 3721 -0.03119596209267699
+1187 3722 -6.163756956372798
+1189 3722  29.86325998837519
+3719 3722 -0.05504842538990734
+3721 3722  0.02344637301136434
+3722 3722  3.784571749614252
+1187 3723  0.2982302677568088
+2957 3723  6.286782905770047
+3721 3723  0.4209233628580787
+3723 3723 -0.8154789505427221
+3725 3723 -0.8870247005076219
+1187 3724 -0.263500479617136
+3721 3724 -0.3794252839956596
+3723 3724  0.735082344446817
+3724 3724  13.23744473122067
+3725 3724  0.78356840863058
+1187 3725  9.459243822580133
+2958 3725  0.8832624199164085
+3721 3725 -0.03967666278728941
+3723 3725  0.07686787236308722
+3725 3725 -28.7033608824471
+1187 3726 -7.08054579490533
+3721 3726  0.02969939062660183
+3723 3726 -0.05753833129026276
+3725 3726  21.48971813079931
+3726 3726  3.411100769889254
+1187 3727 -1.429262548630259
+1193 3727  0.9009035038811737
+3721 3727 -0.6561496372500165
+3727 3727  4.863234068877579
+3730 3727  0.2302667811178378
+1187 3728  1.673333652021131
+1193 3728 -1.053604700739367
+3721 3728  0.7717184399373285
+3728 3728  2.313352821995473
+3730 3728 -0.2708240788464196
+1187 3729 -8.428942532838935
+1193 3729  6.200089823344896
+3721 3729  0.09217929587242674
+3729 3729  5.491941738533162
+3730 3729 -0.03234906878134064
+1187 3730  8.914589671247429
+1193 3730 -6.559072047159436
+2964 3730  3.087002648539354
+3721 3730 -0.1144339936435742
+3730 3730  0.04015905194614094
+1187 3731  1.193368232042292
+1286 3731 -0.7243212706529887
+3721 3731  0.7655516692341778
+3731 3731  3.654645324820387
+3732 3731 -0.379859058350982
+1187 3732 -1.120321545249203
+1286 3732  0.686208191891071
+2951 3732  4.408486684243702
+3721 3732 -0.7325809744992411
+3732 3732  0.3634993303815826
+1187 3733  10.03672807275466
+1286 3733 -3.862010681433929
+3721 3733 -0.09262370563918518
+3732 3733  0.04595895354819852
+3733 3733  5.548711987030634
+1187 3734 -8.019890861375147
+1286 3734  3.078841940888978
+3721 3734  0.1187456445424696
+3732 3734 -0.05892039757983346
+3734 3734  4.650926558563436
+1191 3735  0.2167729964694829
+1192 3735 -0.6168315851058197
+2961 3735  2.770038046695757
+3735 3735 -3.343157745435155
+3737 3735  0.6367155082099926
+1191 3736 -0.1851844777328494
+1192 3736  0.5298067490807584
+3735 3736  2.948553233605713
+3736 3736  5.995398509885752
+3737 3736 -0.5615617669201883
+1191 3737  6.880724984338275
+1192 3737 -16.5456764895831
+2962 3737  2.380705119026724
+3735 3737  0.1286813403240229
+3737 3737 -0.02450778911447643
+1191 3738 -5.444318005853047
+1192 3738  13.09683550707402
+3735 3738 -0.1243245932586171
+3737 3738  0.02367803215021713
+3738 3738  4.858814960201284
+1191 3739 -1.207141725362662
+3737 3739 -0.7678220254674442
+3739 3739  8.553881810766296
+3740 3739  0.62780002806294
+3742 3739  0.2021428644557957
+1191 3740  1.338906589780056
+3155 3740  4.987981631971432
+3737 3740  0.8596786811959881
+3740 3740 -0.702905468036533
+3742 3740 -0.2240623962201166
+1191 3741 -7.610279116600212
+3737 3741  0.1159096366071837
+3740 3741 -0.09477205745752662
+3741 3741  9.011775441080573
+3742 3741  1.349752296503026
+1191 3742  8.267433239967934
+3156 3742  5.009999875255914
+3737 3742 -0.134708500073735
+3740 3742  0.1101427118805583
+3742 3742 -1.466292598487834
+1194 3743  0.7978308621718716
+1220 3743 -1.258218614571573
+3743 3743  4.071233084981746
+3750 3743  0.3903679836850418
+3902 3743 -0.4616774221684724
+1194 3744 -0.7965184326656924
+1220 3744  1.257430980505297
+3744 3744  4.080013401664326
+3750 3744 -0.3911547364780136
+3902 3744  0.462607892946107
+1194 3745  5.753350078361281
+1220 3745 -7.448069304163958
+3745 3745  6.32479519279416
+3750 3745 -0.04713547548536468
+3902 3745  0.05574582374646275
+1194 3746 -5.243221560545977
+1220 3746  6.785039108229999
+3746 3746  6.114912483766074
+3750 3746  0.05251609088198397
+3902 3746 -0.06210932882324032
+1194 3747 -0.9429089218588855
+1241 3747  1.314196419979139
+3653 3747  0.4156932976845179
+3747 3747  4.626630643115307
+3750 3747 -0.3268708215805732
+1194 3748  1.007009323828432
+1241 3748 -1.397856747533507
+3653 3748 -0.4464578225348482
+3748 3748  3.249654147247933
+3750 3748  0.3510617949962554
+1194 3749 -5.062453033294843
+1241 3749  9.967472353837746
+3653 3749 -0.0518656621296367
+3749 3749  7.139596631140285
+3750 3749  0.04078336525166011
+1194 3750  4.615763356430092
+1241 3750 -9.100375712236817
+3060 3750  4.567303761279918
+3653 3750  0.06749642593198793
+3750 3750 -0.05307425527674776
+1194 3751 -1.048401908253753
+1286 3751  0.7276690822554982
+3732 3751  0.4552127889008838
+3750 3751 -0.4480493340962357
+3751 3751  4.819926478975458
+1194 3752  1.008926901289915
+1286 3752 -0.7000132451960701
+3732 3752 -0.4363998945032376
+3750 3752  0.4295324887597057
+3752 3752  5.990944680358314
+1194 3753 -6.460239724417905
+1286 3753  4.142224845053123
+3732 3753 -0.0671242009178893
+3750 3753  0.06606790111415485
+3753 3753  6.252292571035978
+1194 3754  6.417487426740468
+1286 3754 -4.115085973968236
+3732 3754  0.06139143428489695
+3750 3754 -0.06042534814756716
+3754 3754  8.096733468146425
+1298 3755  2.091534388030289
+2967 3755  3.210809158694981
+3755 3755  0.4897853072672988
+3757 3755 -0.4226945468326312
+3758 3755 -0.452566586299552
+1298 3756 -2.085292610827683
+3755 3756 -0.4890001558455759
+3756 3756  3.229847896446473
+3757 3756  0.421458967625683
+3758 3756  0.4518410984309186
+1298 3757  12.86354640857851
+2968 3757  4.240340181312149
+3755 3757 -0.06975006572860991
+3757 3757 -2.511814056672408
+3758 3757  0.06444976742378884
+1298 3758 -12.63775213409496
+3174 3758  4.213824656027188
+3755 3758  0.07102142348298011
+3757 3758  2.46769213011002
+3758 3758 -0.06562451486991176
+1197 3759 -0.2014575555660574
+3759 3759  3.851674085393307
+3761 3759 -1.621718887703775
+3763 3759  1.610329947831114
+3765 3759  0.3339756423139817
+1197 3760  0.2031705763180116
+3760 3760  3.895963455405033
+3761 3760  1.610590177548189
+3763 3760 -1.599279391917784
+3765 3760 -0.3333397745722857
+1197 3761 -14.35390862680364
+2972 3761  3.947328445532761
+3761 3761  0.03833691442155994
+3763 3761 -0.03806768353539278
+3765 3761  7.933710171395212
+1197 3762  22.89375892241828
+3761 3762 -0.0203945981981267
+3762 3762  5.680389346472349
+3763 3762  0.02025137186317635
+3765 3762 -12.65627869255672
+1221 3763  1.24868825794066
+2969 3763  2.651381876594489
+3763 3763 -0.5780567342368751
+3765 3763 -0.4851424038589532
+3929 3763  0.8354279312537486
+1221 3764 -1.04285678342914
+3763 3764  0.4789577056707151
+3764 3764  6.886257504275831
+3765 3764  0.4058510497943707
+3929 3764 -0.6922065283691701
+1221 3765  7.570117512134129
+2970 3765  3.144054024261908
+3763 3765  0.0800375428343096
+3765 3765 -3.452105402557807
+3929 3765 -0.115673073026255
+1221 3766 -6.772078091746644
+3763 3766 -0.06052909694974323
+3765 3766  3.087553492426145
+3766 3766  8.680597597954014
+3929 3766  0.08747878062892703
+1197 3767 -1.07013962521279
+1240 3767  1.100245831300216
+2971 3767  2.58440440493602
+3761 3767 -0.5676654342824057
+3767 3767  0.3524182508441939
+1197 3768  1.056592885189063
+1240 3768 -1.083749439601529
+3761 3768  0.5657348666913733
+3767 3768 -0.3512197152059827
+3768 3768  2.707528675905532
+1197 3769 -6.741118804159942
+1240 3769  8.345685815764492
+3761 3769  0.07297260077582075
+3767 3769 -0.04530287520055806
+3769 3769  3.243567001900533
+1197 3770  6.152334929788743
+1240 3770 -7.619017266862353
+3761 3770 -0.08114410763163431
+3767 3770  0.05037591290722684
+3770 3770  3.127037822968334
+1198 3771 -0.439551175407246
+1272 3771  1.458551817589764
+3771 3771  6.545719728055657
+3782 3771 -0.365279863502515
+4105 3771  0.4751085479629472
+1198 3772  0.4774393407531063
+1272 3772 -1.584168614837459
+3772 3772  4.197681730399675
+3782 3772  0.3974234470529114
+4105 3772 -0.5169167417147753
+1198 3773 -2.839026331445033
+1272 3773  7.706555182452361
+3773 3773  10.48923930011014
+3782 3773  0.04813127346458962
+4105 3773 -0.0626029018629746
+1198 3774  3.287177902478644
+1272 3774 -8.926654332769894
+3774 3774  7.963542386205262
+3782 3774 -0.04568621642826008
+4105 3774  0.05942268960851749
+1198 3775 -0.5013982344400243
+1277 3775  1.914092476329704
+3775 3775  5.387449154306919
+3782 3775 -0.4082845072120672
+4085 3775  0.345354821535383
+1198 3776  0.5451054580886334
+1277 3776 -2.079388121953743
+3776 3776  3.559125252631731
+3782 3776  0.4422425294985197
+4085 3776 -0.3740788277596528
+1198 3777 -3.303170792875972
+1277 3777  10.8116764259082
+3777 3777  7.717735001723169
+3782 3777  0.0550827407381792
+4085 3777 -0.04659273070930623
+1198 3778  3.830799029309135
+1277 3778 -12.54156489623117
+3778 3778  5.42788620866588
+3782 3778 -0.05462691338373406
+4085 3778  0.04620716091210701
+1198 3779 -0.651822591782886
+3579 3779  0.2587210270240423
+3580 3779  0.903604725385467
+3779 3779  10.56116102730794
+3782 3779 -0.4687418217907903
+1198 3780  0.7429555445394144
+3579 3780 -0.2949996783558586
+3580 3780 -1.025075116496526
+3780 3780  5.370203291089556
+3782 3780  0.5317541664847034
+1198 3781 -4.18167192374513
+3579 3781  1.823381827579377
+3580 3781 -0.1192040010499577
+3781 3781  13.44241674595665
+3782 3781  0.06183666269902745
+1198 3782  4.666850163800948
+3142 3782  5.878917673856323
+3579 3782 -2.035095850329601
+3580 3782  0.1454178910812212
+3782 3782 -0.07543502736476798
+1199 3783  1.727739253821067
+2993 3783  2.192274887117568
+3783 3783 -0.5784495929786024
+3785 3783 -0.5174648007567855
+3789 3783  0.5183688055136251
+1199 3784 -1.501409605633508
+3783 3784  0.5029686408166937
+3784 3784  4.481236041630056
+3785 3784  0.4496774480466198
+3789 3784 -0.4507276981705915
+1199 3785  11.30404407094388
+2994 3785  2.085017424193
+3783 3785  0.09104873968379988
+3785 3785 -3.396560628333188
+3789 3785 -0.08159194337121471
+1199 3786 -9.700381322229893
+3783 3786 -0.07792564132243848
+3785 3786  2.914703332052592
+3786 3786  4.67546375941967
+3789 3786  0.06983187835467947
+1199 3787 -0.8399267021510434
+1215 3787  1.459995747599291
+3787 3787  3.725435059567603
+3789 3787 -0.2893683449568387
+3880 3787  0.6080023804991873
+1199 3788  0.8359873259695152
+1215 3788 -1.452748484428911
+3788 3788  3.503869273575924
+3789 3788  0.287922613699735
+3880 3788 -0.6049647018408247
+1199 3789 -4.371489003866787
+1215 3789  6.510195367269129
+2976 3789  11.29589550667638
+3789 3789  0.03207522870468611
+3880 3789 -0.06739443255416847
+1199 3790  5.178316230070377
+1215 3790 -7.713673913612801
+3789 3790 -0.03256016366442265
+3790 3790  7.522783604978186
+3880 3790  0.06841334707970535
+1199 3791 -1.468192548459289
+3789 3791 -0.4394391630156717
+3791 3791  5.699343322844305
+3984 3791  0.3506092217084191
+3988 3791  0.6086144765908342
+1199 3792  1.512757808386402
+3789 3792  0.4523778897162504
+3792 3792  4.932211784009985
+3984 3792 -0.3612542032138767
+3988 3792 -0.6265343550208405
+1199 3793 -9.106914833269292
+3789 3793  0.06534871961755113
+3793 3793  6.796790398187187
+3984 3793  2.196719064593049
+3988 3793 -0.09050667335377813
+1199 3794  9.45399632332005
+3789 3794 -0.06846325693408806
+3794 3794  5.430042622363441
+3984 3794 -2.280441915279873
+3988 3794  0.09482024542077011
+1200 3795  0.3629961991311105
+1201 3795 -0.407193771543868
+3521 3795  0.679218643726438
+3795 3795  9.853444154856955
+3812 3795 -0.8337078536373765
+1200 3796 -0.3459711048373486
+1201 3796  0.3887267162890052
+3521 3796 -0.646543619474914
+3796 3796  12.92509130443996
+3812 3796  0.793600850409622
+1200 3797  6.519516423137556
+1201 3797 -14.76253650232996
+3521 3797 -0.02138310187554149
+3797 3797  14.67920176970155
+3812 3797  0.02624671765627676
+1200 3798 -8.416866787483741
+1201 3798  19.05461479822566
+3521 3798  0.01366621132412578
+3798 3798  24.01494552657894
+3812 3798 -0.01677460978968741
+1200 3799 -0.2146298528456314
+2981 3799  0.6148168725043865
+3521 3799  0.6456696194137627
+3799 3799 -2.531048253677342
+3801 3799 -3.056071350822656
+1200 3800  0.1240942996274762
+3521 3800 -0.3733120922080947
+3799 3800  1.463396899358267
+3800 3800  6.963395067069431
+3801 3800  1.769816809290096
+1200 3801  4.262660366847292
+2982 3801  16.92007433822707
+3521 3801  0.008696076442513374
+3799 3801 -0.03408893408002122
+3801 3801 -21.38658464065627
+1200 3802 -3.267971087874407
+3521 3802  0.0188568404948961
+3799 3802 -0.07391949654966468
+3801 3802  16.31440493686975
+3802 3802  10.64775227529997
+1200 3803 -0.2466962960354827
+2983 3803  0.733300648789027
+3521 3803  0.6490872503986104
+3803 3803 -1.997280942097913
+3805 3803 -2.791333479427456
+1200 3804  0.142484329197361
+3521 3804 -0.3748931907170733
+3803 3804  1.153569146030313
+3804 3804  8.20748669336651
+3805 3804  1.61398408077391
+1200 3805  3.952836893778014
+2984 3805  20.4618479887596
+3521 3805  0.009220007761798222
+3803 3805 -0.02837052457481739
+3805 3805 -21.33286151008621
+1200 3806 -3.485047843454746
+3521 3806  0.01698943114939008
+3803 3806 -0.0522775128165329
+3805 3806  18.72857532796202
+3806 3806  13.36990297318379
+1200 3807 -1.021937167464553
+1226 3807  0.6755139749733998
+3521 3807 -0.4983302017793433
+3807 3807  5.623959759241418
+3944 3807  0.4549416885390786
+1200 3808  1.047546391471734
+1226 3808 -0.6914780544113759
+3521 3808  0.511463018605014
+3808 3808  5.140340349921296
+3944 3808 -0.4669310599249826
+1200 3809 -5.814375262302929
+1226 3809  5.152102189575969
+3521 3809  0.05434191170091966
+3809 3809  10.57287611312878
+3944 3809 -0.04961048112152108
+1200 3810  5.190494080029731
+1226 3810 -4.603043930537255
+3521 3810 -0.06524249632027486
+3810 3810  8.861752777781344
+3944 3810  0.05956197584346536
+1201 3811 -1.590343166199202
+3811 3811  5.156990654240387
+3812 3811 -0.4245578636109541
+3817 3811  0.5272035599677525
+3818 3811  0.3953856088196608
+1201 3812  1.799225606089988
+2985 3812  2.762528604699168
+3812 3812  0.4806889285303752
+3817 3812 -0.5969054776253586
+3818 3812 -0.4470126234812586
+1201 3813 -8.889282523176131
+3812 3813  0.04834484106772149
+3813 3813  9.881649534957022
+3817 3813 -0.06003321219915891
+3818 3813  2.728687156136997
+1201 3814  8.984707214224725
+3812 3814 -0.06044152600849758
+3814 3814  5.355974370748386
+3817 3814  0.07505452239311973
+3818 3814 -2.759212294612531
+3815 3815  2.02177708473591
+3817 3815  0.735678771801283
+3818 3815  0.6834769259705972
+3947 3815 -1.021406661640207
+3950 3815 -0.7053415505671318
+3816 3816  2.637769067800761
+3817 3816 -0.6821644402533772
+3818 3816 -0.6337624789359847
+3947 3816  0.9471080725937661
+3950 3816  0.6542644047347482
+3034 3817  9.357580306279953
+3817 3817 -0.07310899636166125
+3818 3817  2.682500907926349
+3947 3817  0.1015035621142007
+3950 3817 -3.296532554515937
+2986 3818  6.185139221633936
+3817 3818  0.07557928868904694
+3818 3818 -3.302192501193137
+3947 3818 -0.1049332832589505
+3950 3818  4.056786931974255
+1205 3819 -0.8432794677340965
+1211 3819  1.889672965421464
+3677 3819  0.3898068756189718
+3819 3819  4.338050735598012
+3826 3819 -0.2843296651791673
+1205 3820  0.8122467217164322
+1211 3820 -1.818638852608572
+3677 3820 -0.3729707479870724
+3820 3820  5.347644834779035
+3826 3820  0.2720491980250402
+1205 3821 -5.634150174253534
+1211 3821  11.31995792783744
+3677 3821 -0.05561566446213129
+3821 3821  5.268104839160443
+3826 3821  0.04056671199071347
+1205 3822  5.676480078927482
+1211 3822 -11.4061945499796
+3677 3822  0.04995516877647897
+3822 3822  6.809172415733807
+3826 3822 -0.03643788065469865
+1205 3823 -0.8258269842122644
+1227 3823  1.978322116312245
+3823 3823  3.336518577462315
+3824 3823  0.46381160028816
+3826 3823 -0.2824496877119697
+1205 3824  0.8899170771677279
+1227 3824 -2.12995668613235
+3031 3824  2.286717637069104
+3824 3824 -0.4971288084268892
+3826 3824  0.3027390358618913
+1205 3825 -5.4483738044597
+1227 3825  11.36265377461624
+3824 3825 -0.06522504456566502
+3825 3825  4.345881026636466
+3826 3825  0.03972042410566178
+1205 3826  6.238244344298185
+1227 3826 -13.01237515753422
+3032 3826  3.09008802429738
+3824 3826  0.06474071332200976
+3826 3826 -0.03942547846731512
+1205 3827  1.219896523532217
+1290 3827 -1.259969479211936
+3826 3827  0.361444564712929
+3827 3827  3.432096244817039
+3869 3827 -0.4083683463701124
+1205 3828 -1.223176188342998
+1290 3828  1.263388549847514
+3826 3828 -0.362096525989742
+3828 3828  3.430259707843811
+3869 3828  0.4091049471507078
+1205 3829  7.196077445191182
+1290 3829 -7.594160657672793
+3826 3829 -0.05221684139379965
+3829 3829  4.577246699593937
+3869 3829  0.05899578318349421
+1205 3830 -7.309708780356933
+1290 3830  7.714039345814099
+3826 3830  0.05186623003949892
+3830 3830  4.499074366596648
+3869 3830 -0.05859965444633063
+1296 3831  2.014211143978124
+3639 3831 -0.438438003559472
+3641 3831 -0.3186270474808822
+3831 3831  5.167687511911667
+3896 3831  0.3753050069630106
+1296 3832 -1.999469777299275
+3639 3832  0.4387320075683089
+3641 3832  0.3163594839300718
+3832 3832  5.22866490720696
+3896 3832 -0.375556675786625
+1296 3833  13.54117139429895
+3639 3833  0.06917527995921767
+3641 3833 -2.097538363083144
+3833 3833  4.778575385576687
+3896 3833 -0.05921436717618114
+1296 3834 -13.32061162330918
+3639 3834 -0.07005237522168833
+3641 3834  2.063427325485965
+3834 3834  4.804865036708031
+3896 3834  0.05996516487372638
+1284 3835 -0.9961364902256777
+3304 3835  0.685622022908018
+3306 3835  0.4854666781569684
+3835 3835  4.397548178584936
+3838 3835 -0.5533622237789459
+1284 3836  1.069122194004589
+3304 3836 -0.7361807383336668
+3306 3836 -0.5209336705035591
+3836 3836  3.015100507243954
+3838 3836  0.5941679188478999
+1284 3837 -5.607807767179665
+3304 3837 -0.08242829338295353
+3306 3837  3.061789935852789
+3837 3837  7.935790725976097
+3838 3837  0.06652747753817977
+1284 3838  5.611106742482268
+2992 3838  5.806460716784121
+3304 3838  0.09247066617765989
+3306 3838 -3.064005787467996
+3838 3838 -0.07463262812556315
+1291 3839  1.508116623610983
+3250 3839  0.5759940121176559
+3783 3839 -0.5364511862384768
+3785 3839 -0.4753802942885685
+3839 3839  2.807197757919511
+1291 3840 -1.284756180926196
+3250 3840 -0.4896888030185261
+3783 3840  0.4560709551496668
+3785 3840  0.404989697114798
+3840 3840  6.486770562819233
+1291 3841  9.461386332231092
+3250 3841 -0.08806669474583405
+3783 3841  0.08202078818634363
+3785 3841 -3.032021718341749
+3841 3841  2.977783914089383
+1291 3842 -8.06920223422221
+3250 3842  0.0737280609488582
+3783 3842 -0.06866652243426218
+3785 3842  2.585875819257028
+3842 3842  7.199818137765514
+1209 3843  0.5953885346820643
+1261 3843 -1.57915325880766
+3843 3843  6.821596506872711
+3844 3843  0.2358476052144947
+4065 3843 -0.504600061009487
+1209 3844 -0.7585928866866933
+1261 3844  2.012113389129591
+2995 3844  2.089007039389847
+3844 3844 -0.3015768589062842
+4065 3844  0.6452289446177003
+1209 3845  4.119179357221501
+1261 3845 -10.80951279223802
+3844 3845 -0.03869333526436093
+3845 3845  4.924367513602002
+4065 3845  0.08278506503087066
+1209 3846 -5.027617955580298
+1261 3846  13.19349528576872
+3844 3846  0.04790427095168613
+3846 3846  1.926406935099201
+4065 3846 -0.1024920224347914
+1209 3847 -1.089204680743506
+3844 3847 -0.4668956305239359
+3847 3847  4.082745801006982
+3848 3847  0.6058903557215245
+4110 3847  0.4330918580020012
+1209 3848  1.088994441422246
+3123 3848  4.015345384796815
+3844 3848  0.4686479373193943
+3848 3848 -0.608164323859633
+4110 3848 -0.4330444227258353
+1209 3849 -7.735889765633795
+3844 3849  0.06831378599514144
+3848 3849 -0.08865078486777231
+3849 3849  3.894053238117803
+4110 3849  3.000161281315632
+1209 3850  7.784946548619343
+3844 3850 -0.06731177696858127
+3848 3850  0.08735047797722066
+3850 3850  3.965156627189976
+4110 3850 -3.019182774708455
+1210 3851  0.8561130448515417
+1211 3851  0.1281477546658397
+3677 3851 -0.7247989127679281
+3851 3851  12.16370306936955
+3865 3851  0.8859027431941622
+1210 3852 -0.8532795399901716
+1211 3852 -0.1109241627550626
+3677 3852  0.716806091826627
+3852 3852  14.21588238657893
+3865 3852 -0.8761333273285709
+1210 3853  5.980557287714181
+1211 3853 -11.87347468739131
+3677 3853  0.02917701292546772
+3853 3853  10.93806979401289
+3865 3853 -0.0356622993406167
+1210 3854 -18.68721764116278
+1211 3854  36.98601183293343
+3677 3854  0.005800656373065214
+3854 3854  31.00680216081683
+3865 3854 -0.007089990482464425
+1210 3855  0.7431772970946299
+3001 3855  0.6922710528461018
+3855 3855 -2.292588351471605
+3857 3855 -1.366971763051765
+3865 3855  0.4334726720192261
+1210 3856 -0.4962120658716677
+3855 3856  1.530738344827112
+3856 3856  4.63521196383065
+3857 3856  0.9127356585543187
+3865 3856 -0.2894253737565123
+1210 3857  2.858778724621237
+3002 3857  7.512573460654014
+3855 3857  0.1599551507030707
+3857 3857 -4.993718753234045
+3865 3857 -0.030243626830779
+1210 3858 -2.748627634170752
+3855 3858 -0.1619410267687995
+3857 3858  4.801186593602593
+3858 3858  12.60241045455382
+3865 3858  0.03061910767275307
+1210 3859  0.1504825244382092
+3003 3859  0.9679618404674936
+3859 3859 -3.052106539486126
+3861 3859 -1.461494307484497
+3865 3859  1.10546489936218
+1210 3860 -0.09392255060612208
+3859 3860  1.904949633779228
+3860 3860  7.851107779123613
+3861 3860  0.9122971104867462
+3865 3860 -0.689967708515946
+1210 3861  7.743227386312433
+3004 3861  21.23566212034473
+3859 3861  0.01795727665852618
+3861 3861 -26.724645220298
+3865 3861 -0.006504078012125597
+1210 3862 -5.577951998041309
+3859 3862 -0.03639888061415681
+3861 3862  19.24788508110388
+3862 3862  15.41343460037283
+3865 3862  0.01318357808761143
+1210 3863  1.079868425719443
+1226 3863 -0.6825259342509997
+3863 3863  4.640123233526718
+3865 3863  0.6066026005909084
+3944 3863 -0.3918428911464509
+1210 3864 -1.078106438997537
+1226 3864  0.6814150005342681
+3864 3864  4.362866399653369
+3865 3864 -0.6067370542914696
+3944 3864  0.3919297432745195
+1210 3865  8.196309806968394
+1226 3865 -5.174118981866978
+3030 3865  3.891832175542285
+3865 3865 -0.09003421014280026
+3944 3865  0.05815877671819319
+1210 3866 -8.119865478221724
+1226 3866  5.125870371838962
+3865 3866  0.08946924016864499
+3866 3866  3.832594921604338
+3944 3866 -0.05779382696712341
+1211 3867 -1.855130366892074
+1290 3867  0.8608065436983287
+3677 3867 -0.3567726280354734
+3867 3867  4.083595669790726
+3869 3867  0.2940184468249131
+1211 3868  1.922720458247535
+1290 3868 -0.8912069480512377
+3677 3868  0.372176173311343
+3868 3868  3.314597254560581
+3869 3868 -0.3067126001924134
+1211 3869 -10.91898889290387
+1290 3869  5.7352105175062
+3000 3869  5.273747361955984
+3677 3869  0.0501196804098229
+3869 3869 -0.04130392701536499
+1211 3870  10.6937103252593
+1290 3870 -5.617762924949319
+3677 3870 -0.05599008922381862
+3869 3870  0.04614176586870518
+3870 3870  4.159018631162431
+1215 3871 -0.01820502103528791
+3871 3871  13.64297835723395
+3876 3871  2.989747253335061
+3878 3871  0.5034153766096773
+3880 3871 -2.129272677610363
+1215 3872  0.02666245738760768
+3872 3872  7.094819650515715
+3876 3872 -3.114543931331563
+3878 3872 -0.5285023005439647
+3880 3872  2.218151814941477
+1215 3873 -13.39915434300085
+3873 3873  8.193646066739516
+3876 3873 -0.03612826240957417
+3878 3873  7.330338437253632
+3880 3873  0.02573024256563223
+1215 3874  70.14746870615768
+3874 3874  14.40382534397125
+3876 3874  0.005103966248689504
+3878 3874 -38.38649753335458
+3880 3874 -0.003635001543577661
+1231 3875 -0.4587975488680787
+3875 3875  7.432083284538689
+3876 3875  0.7143220365505623
+3878 3875  0.5409851231816658
+3961 3875 -0.7100791253590051
+1231 3876  0.5502970449270767
+3005 3876  2.758844665527414
+3876 3876 -0.8667371882258156
+3878 3876 -0.6474678388648655
+3961 3876  0.8615889655364799
+1231 3877 -3.038828905876916
+3876 3877 -0.09209370496921034
+3877 3877  8.909361307278845
+3878 3877  4.154393438575824
+3961 3877  0.091546689209522
+1231 3878  3.431330450031093
+3006 3878  3.015252189670739
+3876 3878  0.1222265439314361
+3878 3878 -4.691610853794245
+3961 3878 -0.1215005459296716
+1215 3879 -0.6929215427424715
+1230 3879  1.396683757074456
+3443 3879  0.2815088383826861
+3879 3879  9.243250804285458
+3880 3879 -0.3137213903015109
+1215 3880  0.7627672411821809
+1230 3880 -1.537590286100393
+3037 3880  5.36424014425243
+3443 3880 -0.3151578642820492
+3880 3880  0.3512208139362644
+1215 3881 -5.674417187443032
+1230 3881  11.28827516445929
+3443 3881 -0.04750496067965254
+3880 3881  0.05294086820243799
+3881 3881  5.498628886812048
+1215 3882  6.342712909556337
+1230 3882 -12.61596887929604
+3443 3882  0.05328665197422941
+3880 3882 -0.05938414807119664
+3882 3882  3.00679695511697
+1215 3883 -0.7557720101028491
+1260 3883  1.465217279636702
+3880 3883 -0.3561529571137585
+3883 3883  6.987236455151689
+3993 3883  0.3144339221536311
+1215 3884  0.8446553956316701
+1260 3884 -1.639787184528095
+3880 3884  0.4019295688857895
+3884 3884  3.832755747061612
+3993 3884 -0.3548483544779605
+1215 3885 -4.902109438887917
+1260 3885  7.639002702936234
+3880 3885  0.05477280767251791
+3885 3885  8.557143439995434
+3993 3885 -0.04835683208530907
+1215 3886  5.982335823908858
+1260 3886 -9.327145572218971
+3880 3886 -0.05419810230840651
+3886 3886  5.149209964509931
+3993 3886  0.0478494465417923
+1216 3887  1.27864903424727
+1229 3887 -1.428392337327512
+3241 3887 -0.3331233226375472
+3887 3887  2.39813417884677
+3898 3887  0.4440953835549432
+1216 3888 -1.203527161495361
+1229 3888  1.345690298242458
+3241 3888  0.3118250064706282
+3888 3888  3.344564282593129
+3898 3888 -0.4157020431777712
+1216 3889  7.097758064026325
+1229 3889 -8.947761787238363
+3241 3889  0.04981683537143853
+3889 3889  3.052321424376364
+3898 3889 -0.06641212160291675
+1216 3890 -7.071254972430305
+1229 3890  8.913001519049024
+3241 3890 -0.04374019547106585
+3890 3890  4.306287197091149
+3898 3890  0.0583111945771093
+1216 3891 -0.8926090143541199
+1232 3891  0.8109099469826276
+3562 3891  0.5426439670912149
+3891 3891  4.872894046756631
+3898 3891 -0.4182071807007093
+1216 3892  0.8343920480599086
+1232 3892 -0.7551476418861992
+3562 3892 -0.5011094365728938
+3892 3892  7.209963343664713
+3898 3892  0.3861971705223876
+1216 3893 -6.326257993395976
+1232 3893  4.304302054670996
+3562 3893 -0.0774999583800744
+3893 3893  6.165958730224928
+3898 3893  0.05972800042777431
+1216 3894  6.701573121465414
+1232 3894 -4.563164317669306
+3562 3894  0.06105375638109871
+3894 3894  9.801414691082627
+3898 3894 -0.04705317091092858
+1216 3895 -0.7140138395673383
+1296 3895  2.026152750963511
+3895 3895  3.666098639291213
+3896 3895  0.3999378180984413
+3898 3895 -0.2752074710610874
+1216 3896  0.7578565321914594
+1296 3896 -2.150639159344168
+3169 3896  2.650447108276864
+3896 3896 -0.4237633937620111
+3898 3896  0.2916024608025469
+1216 3897 -4.667666885335501
+1296 3897  13.57026540414182
+3896 3897 -0.05601679710717079
+3897 3897  4.264776865800714
+3898 3897  0.03854659492344367
+1216 3898  4.931373980172026
+1296 3898 -14.33700504327225
+3170 3898  2.948617038264048
+3896 3898  0.06052671066335877
+3898 3898 -0.04164998212099323
+1219 3899  0.3970145449540784
+1220 3899  0.2510335818284439
+3899 3899  6.936513618148165
+3902 3899 -1.237336533231109
+3909 3899  0.8564968076053606
+1219 3900 -0.3832969439187849
+1220 3900 -0.2383819755548176
+3900 3900  8.754460075831085
+3902 3900  1.190146632344225
+3909 3900 -0.8238314830349427
+1219 3901  7.895064096101138
+1220 3901 -15.34203421950439
+3901 3901  7.7760742573487
+3902 3901  0.03292730456029068
+3909 3901 -0.02279261177659806
+1219 3902 -12.39953745174649
+1220 3902  24.06894650972545
+3016 3902  14.64089060417686
+3902 3902  0.0162354147387335
+3909 3902 -0.01123831756390642
+1219 3903 -0.9053256046972465
+1276 3903  0.7622334325753616
+3903 3903  6.717285078805506
+3906 3903  0.4347352342689453
+3909 3903 -0.3737564443230481
+1219 3904  0.9552717849491632
+1276 3904 -0.7961901433494216
+3904 3904  5.21837181322108
+3906 3904 -0.4596523479291749
+3909 3904  0.3951785216481155
+1219 3905 -4.875642107836658
+1276 3905  8.663532512230066
+3905 3905  12.723623314396
+3906 3905 -0.03991575131466345
+3909 3905  0.03431690856375876
+1219 3906  3.543387736643543
+1276 3906 -6.327609477811816
+3130 3906  8.974758681364245
+3906 3906  0.06205063220109317
+3909 3906 -0.0533470071697257
+1219 3907  0.9177680669862572
+1285 3907 -1.404708895439669
+3907 3907  1.562111955011237
+3909 3907  0.5871755452865656
+4145 3907 -0.6299407955834422
+1219 3908 -0.9166426538946926
+1285 3908  1.404553468671611
+3908 3908  1.521343477166647
+3909 3908 -0.5881819817933323
+4145 3908  0.6310205330126762
+1219 3909  6.850788060807428
+1285 3909 -8.259331367595493
+3148 3909  2.666152965933335
+3909 3909 -0.06669039335604635
+4145 3909  0.07154759728281507
+1219 3910 -6.091736844072287
+1285 3910  7.340228308969663
+3909 3910  0.0754972172149095
+3910 3910  2.541648003046831
+4145 3910 -0.0809958409515938
+1221 3911  1.3927177800326
+1222 3911 -0.007859345312601223
+3911 3911  10.83128966436301
+3912 3911 -2.073696496349991
+3929 3911  3.79240453316803
+1221 3912 -1.125363487049104
+1222 3912  0.01423058830428089
+3019 3912  11.66633099712536
+3912 3912  1.663057831262543
+3929 3912 -3.041422922448816
+1221 3913  12.16385913519791
+1222 3913 -10.85847562888647
+3912 3913  0.02497094611631533
+3913 3913  5.690920734130215
+3929 3913 -0.04566720801028241
+1221 3914 -71.95507332617049
+1222 3914  64.19748679899583
+3912 3914 -0.002045250376681948
+3914 3914  28.46767710922084
+3929 3914  0.003740381880205069
+1221 3915  0.06266636804055448
+1223 3915 -1.67562254077075
+3023 3915  2.298164777974027
+3915 3915 -4.70085606120625
+3929 3915  0.542397859204706
+1221 3916 -0.04937898970914998
+1223 3916  1.400506034542053
+3915 3916  3.965827407882584
+3916 3916  6.913438662925516
+3929 3916 -0.4575882069145712
+1221 3917  7.419701498530326
+1223 3917 -31.5516818208411
+3915 3917  0.07595329060940451
+3917 3917  4.213041098699232
+3929 3917 -0.008763702119294985
+1221 3918 -2.767625773617935
+1223 3918  11.74797745436025
+3915 3918 -0.1826240999274897
+3918 3918  6.6028144804622
+3929 3918  0.02107167706267504
+1221 3919 -0.01329158934947286
+1224 3919 -1.619002941030767
+3025 3919  2.489086508511099
+3919 3919 -12.32981280576957
+3929 3919  0.9285201706573676
+1221 3920  0.01432224943308115
+1224 3920  1.240374621996592
+3919 3920  9.557700354905545
+3920 3920  7.092319999538366
+3929 3920 -0.7197609326620245
+1221 3921  18.43213599777828
+1224 3921 -71.14768126953655
+3919 3921 -0.03581879691919143
+3921 3921  5.357329400717711
+3929 3921  0.002697403111634132
+1221 3922 -3.444370960504515
+1224 3922  13.28414805225883
+3919 3922 -0.2193559255602421
+3922 3922  4.975973020678023
+3929 3922  0.01651901814280531
+1221 3923  0.1952707801632194
+3027 3923  1.530982244361096
+3923 3923 -4.01982058245716
+3925 3923 -0.4989904160230794
+3929 3923  0.9936427272244395
+1221 3924 -0.1578981940773998
+3923 3924  3.367916184667294
+3924 3924  4.504333486281526
+3925 3924  0.4114850212014874
+3929 3924 -0.8325011910731969
+1221 3925  11.51290363653453
+3028 3925  1.891529659129155
+3923 3925  0.09357350555324298
+3925 3925 -13.59929356823245
+3929 3925 -0.02313004556960633
+1221 3926 -6.724196771960667
+3923 3926 -0.1215895162194106
+3925 3926  7.942511812214237
+3926 3926  4.220607505708247
+3929 3926  0.03005520669887832
+1221 3927 -1.212084092212188
+1231 3927  0.4518610492803866
+3927 3927  2.965391170512979
+3929 3927 -0.8967784154793547
+3961 3927  0.7422711571120613
+1221 3928  1.209186270330734
+1231 3928 -0.4507761853932629
+3928 3928  3.064329241855908
+3929 3928  0.8948534463356967
+3961 3928 -0.7406778437037493
+1221 3929 -7.485018312352968
+1231 3929  2.659744674264035
+3020 3929  5.191284902739278
+3929 3929  0.1084484784869682
+3961 3929 -0.08976373229337341
+1221 3930  7.574571864439869
+1231 3930 -2.691626173647931
+3929 3930 -0.1046366912454179
+3930 3930  5.614276394514303
+3961 3930  0.08660868342331542
+1221 3931  1.095375364582689
+1254 3931 -1.129575111871219
+3929 3931  0.7922255118762921
+3931 3931  1.739287210125133
+3966 3931 -0.360035618481635
+1221 3932 -0.9823491292559811
+1254 3932  1.013019198664114
+3929 3932 -0.7101965596901837
+3932 3932  3.065752767611288
+3966 3932  0.3227566567579966
+1221 3933  7.911773723924099
+1254 3933 -8.140433666520655
+3929 3933 -0.1118336807589077
+3933 3933  1.735401045504999
+3966 3933  0.0508240492328381
+1221 3934 -7.11281355093847
+1254 3934  7.318382790662603
+3929 3934  0.1007269057461446
+3934 3934  3.006008494629031
+3966 3934 -0.04577645287156246
+1222 3935 -1.271418220001935
+1291 3935  0.6649208109974206
+3250 3935  0.3034672176079867
+3912 3935 -0.4907051457898425
+3935 3935  5.818503346690632
+1222 3936  1.227066802011116
+1291 3936 -0.6401504275752985
+3250 3936 -0.2958963642102718
+3912 3936  0.4784631094026429
+3936 3936  6.821785171341784
+1222 3937 -8.059296167830137
+1291 3937  5.042399262607923
+3250 3937 -0.03927673116269679
+3912 3937  0.06351030020065121
+3937 3937  7.227063466987659
+1222 3938  7.176638732185156
+1291 3938 -4.49133368198552
+3250 3938  0.04237843896777917
+3912 3938 -0.06852574797351926
+3938 3938  8.023915727134943
+1226 3939  0.4270339140806115
+1227 3939  0.5603572465690376
+3824 3939 -0.9429319477336707
+3939 3939  4.145530174201174
+3944 3939  0.6215604636346272
+1226 3940 -0.3710313110194243
+1227 3940 -0.4748156898844662
+3824 3940  0.8140205429349932
+3940 3940  9.188046774378051
+3944 3940 -0.5365848376342145
+1226 3941  5.935483300800602
+1227 3941 -19.35438219191495
+3824 3941  0.04011828954905043
+3941 3941  4.620373825612975
+3944 3941 -0.02644511378818943
+1226 3942 -7.803072635065493
+1227 3942  25.37588114465589
+3824 3942  0.01992003233190689
+3942 3942  13.68747505294833
+3944 3942 -0.01313085696332133
+1226 3943 -0.7846384665529746
+1231 3943  0.3255367652745427
+3943 3943  12.25543807287879
+3944 3943 -0.3325996421497863
+3961 3943  0.9054351523586399
+1226 3944  0.755030805637508
+1231 3944 -0.3228669408970674
+3039 3944  15.60000600766554
+3944 3944  0.3099939897897582
+3961 3944 -0.8438958429460618
+1226 3945 -4.492594197832112
+1231 3945  4.347828500049935
+3944 3945  0.03183450902391857
+3945 3945  17.58751732208018
+3961 3945 -0.08666300222702368
+1226 3946  3.075207046085619
+1231 3946 -2.991719644070264
+3944 3946 -0.04857313406685036
+3946 3946  16.66976710007537
+3961 3946  0.132230518229328
+1242 3947  1.253920472558426
+3033 3947  4.577395664557206
+3947 3947 -0.5322000356380353
+3950 3947 -0.4953844799756825
+3992 3947  0.4525049260712891
+1242 3948 -1.238917691943269
+3947 3948  0.5237179831441521
+3948 3948  4.949586119190606
+3950 3948  0.4897034551735098
+3992 3948 -0.4452930315210079
+1242 3949  11.01433164719148
+3947 3949  0.05306117946887205
+3949 3949  9.983701017863959
+3950 3949 -2.566147949050465
+3992 3949 -0.04511545186958153
+1242 3950 -9.126926589105361
+3062 3950  7.311204366461816
+3947 3950 -0.0742486869524237
+3950 3950  2.120683161051391
+3992 3950  0.06313020358974136
+1231 3951  0.2106155001707166
+1232 3951 -0.1400665666639198
+3562 3951 -1.946310278292044
+3951 3951  7.350593198915067
+3961 3951  2.25449865380728
+1231 3952 -0.2015349441592879
+1232 3952  0.1353078380308161
+3562 3952  1.852560722557531
+3952 3952  9.450557390041146
+3961 3952 -2.145904330715092
+1231 3953  7.458763893376871
+1232 3953 -13.70528369826246
+3562 3953  0.03334939331384015
+3953 3953  7.088529923190809
+3961 3953 -0.03863010084770278
+1231 3954 -10.96688731001504
+1232 3954  20.15001583937854
+3562 3954 -0.01773237777185056
+3954 3954  13.07420285267797
+3961 3954  0.02054020998672473
+1231 3955  0.004909842124062877
+1233 3955 -2.020187774905765
+3043 3955  4.4348660978868
+3955 3955 -12.4148271960868
+3961 3955  1.009041934732004
+1231 3956 -0.003161834423974073
+1233 3956  1.781009244152953
+3955 3956  11.0878526649361
+3956 3956  13.94274720304045
+3961 3956 -0.9011892093493707
+1231 3957  8.097430154499932
+1233 3957 -157.5425247202729
+3955 3957  0.02687544221336557
+3957 3957  8.333234328742472
+3961 3957 -0.002184359699851513
+1231 3958 -1.811688403660364
+1233 3958  35.24928960928063
+3955 3958 -0.1319416399569855
+3958 3958  8.467858774888629
+3961 3958  0.01072384218894925
+1231 3959  0.02074944381764173
+1234 3959 -1.371053578210635
+3045 3959  3.248940655118019
+3959 3959 -7.17362028563042
+3961 3959  0.7970691740614322
+1231 3960 -0.0164243186421904
+1234 3960  1.141090378496645
+3959 3960  6.063171165496134
+3960 3960  9.638290245359952
+3961 3960 -0.6736858992600538
+1231 3961  4.547623166879417
+1234 3961 -73.86690653394791
+3046 3961  4.516437728111675
+3959 3961  0.05906849706200258
+3961 3961 -0.006563168427045199
+1231 3962 -2.052721196214168
+1234 3962  33.34660355498111
+3959 3962 -0.1122634591316911
+3961 3962  0.01247372164743909
+3962 3962  7.780959123178358
+1231 3963  0.532889016796235
+1254 3963 -1.47406685145989
+3961 3963  0.8761671906630912
+3963 3963  2.093336656375155
+3966 3963 -0.4810676928389651
+1231 3964 -0.4748422610025065
+1254 3964  1.313534336735618
+3961 3964 -0.7806222496203883
+3964 3964  3.86258710892769
+3966 3964  0.4286078600129237
+1231 3965  3.043863568647225
+1254 3965 -8.8135613453359
+3961 3965 -0.102634553712519
+3965 3965  4.100933333302652
+3966 3965  0.05635245017868282
+1231 3966 -2.787316614264462
+1254 3966  8.070563365347549
+3040 3966  7.508760202322218
+3961 3966  0.08977398271040733
+3966 3966 -0.04929123482331746
+1232 3967 -0.7796711087896829
+3360 3967  0.6104971732174486
+3362 3967  0.4765768244277365
+3562 3967 -0.5245384263354095
+3967 3967  6.867530241475136
+1232 3968  0.8977654818327304
+3360 3968 -0.7076702624193901
+3362 3968 -0.5475735569802812
+3562 3968  0.6080294260127852
+3968 3968  3.232293749053333
+1232 3969 -4.689757573113847
+3360 3969 -0.07772507850532541
+3362 3969  3.549100696841748
+3562 3969  0.0667812926161708
+3969 3969  9.249242572554918
+1232 3970  4.893403592122139
+3360 3970  0.1013203708049441
+3362 3970 -3.704885884667695
+3562 3970 -0.08705433897040452
+3970 3970  4.168085658793879
+1232 3971 -0.6622307311134192
+1284 3971  0.7155565720950264
+3562 3971 -0.4795952455071584
+3838 3971  0.3788236665372302
+3971 3971  10.14178945374154
+1232 3972  0.7022784082231178
+1284 3972 -0.7605034956051496
+3562 3972  0.5234134383708403
+3838 3972 -0.4134348697073162
+3972 3972  6.854993073279579
+1232 3973 -4.573004681716164
+1284 3973  4.486795549329334
+3562 3973  0.07931274417722842
+3838 3973 -0.06264771144796216
+3973 3973  8.900323357107847
+1232 3974  5.053130885639514
+1284 3974 -4.956539581732367
+3562 3974 -0.08168946937534756
+3838 3974  0.0645250439743737
+3974 3974  6.255402866825885
+1236 3975 -1.707969879333584
+1237 3975  0.4947465498372808
+3552 3975  0.4166092968480921
+3975 3975  4.591717254542565
+3981 3975 -0.4189118125301785
+1236 3976  1.704175130397323
+1237 3976 -0.4892572997374934
+3552 3976 -0.4195050597952255
+3976 3976  4.596890674335506
+3981 3976  0.4218235797759384
+1236 3977 -9.799664782253885
+1237 3977  4.604525758409499
+3552 3977 -0.04632910565148451
+3977 3977  6.82470037511901
+3981 3977  0.04658515728812977
+1236 3978  7.820271137733112
+1237 3978 -3.682710629785925
+3552 3978  0.06106634188723461
+3978 3978  6.071785803494002
+3981 3978 -0.06140384326059996
+1236 3979 -1.475543576621253
+1293 3979  0.7694145957527926
+3049 3979  1.989738181911157
+3979 3979  0.6697749461861511
+3981 3979 -0.4549872682817689
+1236 3980  1.26182715384045
+1293 3980 -0.6573535907506843
+3979 3980 -0.5697578684182797
+3980 3980  4.514311543471527
+3981 3980  0.3870443013878095
+1236 3981 -9.15833764800569
+1293 3981  4.132697532463695
+3050 3981  2.651304124674491
+3979 3981 -0.09910914931789394
+3981 3981  0.0673261986980111
+1236 3982  8.549665708518704
+1293 3982 -3.858923807592198
+3979 3982  0.07974589801570604
+3981 3982 -0.05417247763812049
+3982 3982  5.339475922255236
+1246 3983  1.438432936083485
+3230 3983  0.6723063436140674
+3983 3983  7.935781265781992
+3984 3983 -0.2561778390712127
+3988 3983 -0.4164294722809435
+1246 3984 -1.610282307909457
+3069 3984  4.273476333560106
+3230 3984 -0.7572610645998691
+3984 3984  0.2870186143926446
+3988 3984  0.4690507958247839
+1246 3985  9.519388148008218
+3230 3985 -0.09487256468584683
+3984 3985 -1.513498503028144
+3985 3985  9.964308531455309
+3988 3985  0.05876447905234403
+1246 3986 -10.10772533983988
+3230 3986  0.1135294637182152
+3984 3986  1.606821457458256
+3986 3986  5.311692012444222
+3988 3986 -0.07032064342926077
+1291 3987  1.440632854355911
+3250 3987  0.5483159810343683
+3984 3987 -0.3620744060739602
+3987 3987  4.32511889642304
+3988 3987 -0.6338113374831589
+1291 3988 -1.37057590441584
+3159 3988  5.650865178732744
+3250 3988 -0.5207505831071904
+3984 3988  0.3444837790072527
+3988 3988  0.6019478457506698
+1291 3989  8.78157444783267
+3250 3989 -0.08202283215104343
+3984 3989 -2.259168544657926
+3988 3989  0.09481212065301986
+3989 3989  5.008038927801397
+1291 3990 -8.407949717547142
+3250 3990  0.07658965210480174
+3984 3990  2.163038539238373
+3988 3990 -0.08853178006291147
+3990 3990  6.756753262704435
+1242 3991  1.629228927232157
+1260 3991 -1.327284851091953
+3991 3991  2.16132391751468
+3992 3991  0.4124828656886404
+3993 3991 -0.3603730844290743
+1242 3992 -1.629221333651718
+1260 3992  1.326901691271821
+3061 3992  2.128671701455008
+3992 3992 -0.4139428557132467
+3993 3992  0.3616486310085054
+1242 3993  9.698372940762637
+1260 3993 -8.871639857149663
+3098 3993  2.810591760096432
+3992 3993 -0.05662335010049897
+3993 3993  0.0494700096023555
+1242 3994 -10.14050217838622
+1260 3994  9.274886402498227
+3992 3994  0.05291302942259243
+3993 3994 -0.04622842111919301
+3994 3994  2.951842078980999
+1243 3995 -1.085914715783544
+3063 3995  3.374113559666903
+3709 3995  0.5173331784807369
+3995 3995  0.5285974251080995
+4001 3995 -0.6664131668293063
+1243 3996  1.090787344712805
+3709 3996 -0.5195702043955498
+3995 3996 -0.5296400026704317
+3996 3996  3.373236017368225
+4001 3996  0.6677275648607324
+1243 3997 -7.064788357696017
+3709 3997  3.190434157991982
+3995 3997 -0.07362898186763496
+3997 3997  4.254422143024387
+4001 3997  0.09282550509358571
+1243 3998  7.264719577018943
+3709 3998 -3.280810777838017
+3995 3998  0.07160445085414079
+3998 3998  4.313636411491303
+4001 3998 -0.09027313903964421
+1243 3999  0.6281268734336682
+1283 3999 -2.060237066109748
+3999 3999  2.744133762682808
+4001 3999  0.3915613265931385
+4002 3999 -0.4363940642551595
+1243 4000 -0.6265552018341662
+1283 4000  2.055356159329762
+4000 4000  2.768916866004834
+4001 4000 -0.3914197776820793
+4002 4000  0.436236308367658
+1243 4001  4.240704676139979
+1283 4001 -13.22275915134779
+3144 4001  3.559130344004886
+4001 4001 -0.05174692126183886
+4002 4001  0.05767180706691709
+1243 4002 -4.137810875831871
+1283 4002  12.90162353980625
+3064 4002  3.53773728948649
+4001 4002  0.05322704516850629
+4002 4002 -0.05932140125143925
+1244 4003 -0.2687441652242472
+1245 4003 -2.64805073698945
+3067 4003  0.6620369411300263
+4003 4003 -2.048612698416842
+4005 4003  0.820611639782226
+1244 4004  0.1719397258262731
+1245 4004  1.697227123155527
+4003 4004  1.310681131981685
+4004 4004  4.350082008188727
+4005 4004 -0.5250188060331279
+1244 4005  4.840410468840943
+1245 4005 -17.55536468313575
+3068 4005  17.73869895099792
+4003 4005 -0.03019372128547943
+4005 4005  0.0120946820032662
+1244 4006 -4.198187441675281
+1245 4006  15.14037838846916
+4003 4006 -0.0708727983567128
+4005 4006  0.02838947709364594
+4006 4006  6.391215314600386
+1244 4007 -0.8397429008188915
+1275 4007  0.8498311284186953
+3374 4007  0.4930004778176136
+4005 4007 -0.4669117262552038
+4007 4007  5.561366858668526
+1244 4008  0.8301209943719979
+1275 4008 -0.8390792003026281
+3374 4008 -0.4894324078324542
+4005 4008  0.4635324724996192
+4008 4008  5.835339611366566
+1244 4009 -5.094647168144733
+1275 4009  6.16663657582833
+3374 4009 -0.06110666180098917
+4005 4009  0.05787300059726924
+4009 4009  8.121377855621056
+1244 4010  4.665509938312524
+1275 4010 -5.648969220034825
+3374 4010  0.06731065292933117
+4005 4010 -0.06374868700680204
+4010 4010  7.904228210941027
+1247 4011  1.530441977284314
+1248 4011 -0.06447409590659667
+3700 4011  2.877901742353491
+4011 4011  4.367121803999058
+4035 4011 -0.6882177177695019
+1247 4012 -1.407573526367871
+1248 4012  0.06007445012883673
+3700 4012 -2.638744501957641
+4012 4012  6.273071084077785
+4035 4012  0.6310259631828135
+1247 4013  29.57976624661321
+1248 4013 -6.123461182640077
+3700 4013 -0.06336630111296625
+4013 4013  4.6918587188925
+4035 4013  0.01515333567288418
+1247 4014 -53.85667321915488
+1248 4014  11.14530142529302
+3700 4014  0.02360881028802457
+4014 4014  11.09669376957527
+4035 4014 -0.005645780499229315
+1247 4015  0.1166433208217011
+3075 4015  1.501443481224012
+3700 4015  0.8927534376223362
+4015 4015 -5.349524285735199
+4017 4015 -0.873821607012483
+1247 4016 -0.08565703803104169
+3700 4016 -0.7104217187916017
+4015 4016  4.256962871978451
+4016 4016  5.891708948648517
+4017 4016  0.6890445982902249
+1247 4017  20.03181033622185
+3076 4017  2.952431724529157
+3700 4017 -0.01054317113570646
+4015 4017  0.06317640197425206
+4017 4017 -19.53524156207414
+1247 4018 -6.579453823854884
+3700 4018  0.02726814578945946
+4015 4018 -0.1633951794307062
+4017 4018  6.407256334560128
+4018 4018  5.359216924491829
+1247 4019 -0.02717925250316533
+1250 4019 -1.817279018958227
+3077 4019  2.046183757751627
+3700 4019  1.086728699880724
+4019 4019 -6.261171463865746
+1247 4020  0.02664962184511493
+1250 4020  1.406918477809915
+3700 4020 -0.8503392225516677
+4019 4020  4.899216957673652
+4020 4020  6.039983956553282
+1247 4021  32.27437037187642
+1250 4021 -94.3593702933843
+3700 4021  0.003251598109236279
+4019 4021 -0.01873403481084517
+4021 4021  4.080245895776766
+1247 4022 -6.848306051500932
+1250 4022  20.01255346091463
+3700 4022  0.0163341792817986
+4019 4022 -0.09410913433692274
+4022 4022  4.316855635510668
+1247 4023  0.09444032567228004
+1251 4023 -2.226655844234514
+3079 4023  1.734383100866361
+3700 4023  0.6671056579552171
+4023 4023 -2.766844775492583
+1247 4024 -0.07327003201142045
+1251 4024  1.833646104710556
+3700 4024 -0.5558849024538212
+4023 4024  2.305552680881037
+4024 4024  5.335102309454471
+1247 4025  14.54355524326141
+1251 4025 -61.53830667090656
+3700 4025 -0.008293849473066403
+4023 4025  0.03439903980669907
+4025 4025  2.833991262156498
+1247 4026 -5.638102373386884
+1251 4026  23.8410264235214
+3700 4026  0.01844642734702238
+4023 4026 -0.07650722269100975
+4026 4026  4.726445981799073
+1247 4027  0.172119184851368
+1252 4027 -0.4312061772864732
+3081 4027  1.590373655740153
+3700 4027  1.05305624994934
+4027 4027 -5.381641954276575
+1247 4028 -0.1354663985444235
+1252 4028  0.3531894656037325
+3700 4028 -0.8748469326863306
+4027 4028  4.470903578741806
+4028 4028  4.826786095112703
+1247 4029  21.46631831646637
+1252 4029 -14.79999463216308
+3700 4029 -0.01413376895939106
+4027 4029  0.07223059927479664
+4029 4029  2.230216158081471
+1247 4030 -9.972291551898609
+1252 4030  6.874531929256725
+3700 4030  0.02460560601569959
+4027 4030 -0.1257469025523179
+4030 4030  4.245709838291275
+1247 4031  0.1888753139595323
+1253 4031 -0.4702289341982735
+3083 4031  1.539399172915233
+3700 4031  1.037339225236382
+4031 4031 -4.687819398588907
+1247 4032 -0.1491602373155923
+1253 4032  0.3847064364584903
+3700 4032 -0.8610429361225128
+4031 4032  3.891122287459317
+4032 4032  4.663673792105634
+1247 4033  20.73321827180935
+1253 4033 -15.68593782996181
+3700 4033 -0.01520700997590703
+4031 4033  0.0687217012769834
+4033 4033  2.11378126554774
+1247 4034 -10.04768624738478
+1253 4034  7.60087658660375
+3700 4034  0.0250006050735233
+4031 4034 -0.1129797452838212
+4034 4034  4.154176556399479
+1248 4035  0.6694889657961215
+1283 4035 -1.982804233034109
+3143 4035  3.113918499513999
+4002 4035 -0.459904502442264
+4035 4035  0.3107249264599946
+1248 4036 -0.6039061080741983
+1283 4036  1.790720369267351
+4002 4036  0.412417507493414
+4035 4036 -0.2786413244623387
+4036 4036  5.538149308614118
+1248 4037  3.860578289092625
+1283 4037 -13.25822033623029
+4002 4037  0.06550898280666229
+4035 4037 -0.04425978383985179
+4037 4037  3.896615576766267
+1248 4038 -3.705027669694788
+1283 4038  12.72139776360994
+4002 4038 -0.0539047788936704
+4035 4038  0.03641964444494807
+4038 4038  7.399370929771859
+1248 4039  1.3251531551657
+1292 4039 -0.6969060374661296
+4035 4039  0.6251677208402014
+4039 4039  2.227051982652208
+4137 4039 -0.4958500939596942
+1248 4040 -1.268742456177846
+1292 4040  0.6674861693273666
+4035 4040 -0.5974098867295464
+4040 4040  2.787430652169496
+4137 4040  0.4738340425976885
+1248 4041  7.496982675083278
+1292 4041 -4.412546562177374
+4035 4041 -0.08455623381433652
+4041 4041  3.27917553517219
+4137 4041  0.06706554910635512
+1248 4042 -7.554675688566832
+1292 4042  4.445925686481872
+4035 4042  0.07599027364771335
+4042 4042  4.221315980120219
+4137 4042 -0.06027148087172729
+1254 4043  0.5506640854169045
+3644 4043 -3.184217886071492
+3646 4043 -0.04737658212996103
+3966 4043  1.092095516810633
+4043 4043  5.497686376367542
+1254 4044 -0.5106474974579881
+3644 4044  2.948576740677008
+3646 4044  0.04431965773281515
+3966 4044 -1.011277354339053
+4044 4044  8.295885019917028
+1254 4045  14.65998231301212
+3644 4045  0.04295228525329622
+3646 4045 -8.348557923958264
+3966 4045 -0.01473140339016458
+4045 4045  6.138820693139372
+1254 4046 -30.55860129286959
+3644 4046 -0.01359440263092582
+3646 4046  17.39938467274956
+3966 4046  0.004662490664314871
+4046 4046  16.56692854796862
+1258 4047  0.1232979933573407
+1259 4047 -0.6452838448164313
+3095 4047  5.920323678467011
+4047 4047  0.362575344758596
+4048 4047 -2.564559113532273
+1258 4048 -0.1209319110556626
+1259 4048  0.632740697185234
+3093 4048  7.928418980272388
+4047 4048 -0.3636201727245672
+4048 4048  2.571949365299086
+1258 4049  12.18565261931853
+1259 4049 -65.20912110432079
+4047 4049 -0.01098342848397128
+4048 4049  0.07768771932121485
+4049 4049  0.9579993383387213
+1258 4050 -10.49071174200598
+1259 4050  56.15303779109885
+4047 4050  0.00945573560770001
+4048 4050 -0.06688207921038947
+4050 4050  1.876776993784172
+1258 4051 -1.563633429097309
+1279 4051  0.9598433696475256
+3436 4051  0.5099376587097865
+4047 4051 -0.5655683563282903
+4051 4051  1.872155078461913
+1258 4052  1.480104939178236
+1279 4052 -0.9086990247085501
+3436 4052 -0.4842038390860414
+4047 4052  0.5370271536576061
+4052 4052  2.428640135644111
+1258 4053 -10.48222291782284
+1279 4053  6.050637155898348
+3436 4053 -0.0732024928126479
+4047 4053  0.08118838221113756
+4053 4053  2.024927669840395
+1258 4054  9.97517741318884
+1279 4054 -5.758158315413606
+3436 4054  0.06570225375133355
+4047 4054 -0.07286991856067998
+4054 4054  3.081655102447302
+1261 4055  0.8070622115620506
+3101 4055  5.155957958850873
+4055 4055 -0.8631938020705067
+4057 4055 -0.2638765488723211
+4065 4055  0.5513927020288057
+1261 4056 -0.6614271231410632
+4055 4056  0.7158415642942259
+4056 4056  16.47742044559651
+4057 4056  0.2162445878661694
+4065 4056 -0.4572667382619597
+1261 4057  19.56348943490517
+3102 4057  0.6800635049014432
+4055 4057  0.1116686347193875
+4057 4057 -6.472519984532425
+4065 4057 -0.07133191883687945
+1261 4058 -13.58702526706866
+4055 4058 -0.07755499290003219
+4057 4058  4.495539572060578
+4058 4058  3.707071367433247
+4065 4058  0.04954073695663612
+1261 4059  0.1378648745322591
+1263 4059 -0.691791395091818
+3103 4059  7.218811248728353
+4059 4059 -2.704903553672343
+4065 4059  0.274604206069199
+1261 4060 -0.1128908565994482
+1263 4060  0.5659529217467251
+4059 4060  2.309980743686521
+4060 4060  22.23399320199848
+4065 4060 -0.234511292387475
+1261 4061  9.267190079702225
+1263 4061 -48.49819059668351
+4059 4061  0.1141644010503557
+4061 4061  1.226331625859217
+4065 4061 -0.01159007117619253
+1261 4062 -6.734223707278317
+1263 4062  35.27417920269734
+4059 4062 -0.0829609857753576
+4062 4062  5.200657162878421
+4065 4062  0.008422272802529579
+1261 4063  0.2889839995395265
+1264 4063 -0.7136256126923212
+3105 4063  7.075882839979499
+4063 4063 -1.123100460933939
+4065 4063  0.3315414677749981
+1261 4064 -0.2352278222126487
+1264 4064  0.58059959929439
+4063 4064  0.9425431964377532
+4064 4064  22.85902787429374
+4065 4064 -0.2782406077266243
+1261 4065  11.5414689090377
+1264 4065 -29.38500860027402
+3106 4065  0.9860199056421528
+4063 4065  0.08489250803032906
+4065 4065 -0.02506043554827784
+1261 4066 -7.978904295943519
+1264 4066  20.3242031183579
+4063 4066 -0.05868867931656194
+4065 4066  0.01732501370911077
+4066 4066  5.533488946326441
+1261 4067  0.5085375863685291
+3107 4067  5.549718155884937
+4065 4067  0.4722832634265456
+4067 4067 -1.058757894121426
+4069 4067 -0.3738089874614378
+1261 4068 -0.4120665414866511
+4065 4068 -0.3888500715901649
+4067 4068  0.8717185528421763
+4068 4068  18.54247362970166
+4069 4068  0.3028572253506908
+1261 4069  16.53723785781054
+3108 4069  0.7641384780188016
+4065 4069 -0.04435831718922839
+4067 4069  0.0994418437640489
+4069 4069 -12.35162972393133
+1261 4070 -11.52066789126238
+4065 4070  0.03090230780246723
+4067 4070 -0.06927635354904116
+4069 4070  8.60586674968607
+4070 4070  4.168910055156443
+1261 4071  0.280802632960781
+3109 4071  8.987131743426813
+4065 4071  0.3748533853638924
+4071 4071 -1.72573819152988
+4073 4071 -0.3417951192656607
+1261 4072 -0.22803093980903
+4065 4072 -0.3164800245438473
+4071 4072  1.457000754258734
+4072 4072  28.97191890791841
+4073 4072  0.2773430210866213
+1261 4073  13.01235217685689
+3110 4073  1.257569328186931
+4065 4073 -0.02428214981556918
+4071 4073  0.1117893953885428
+4073 4073 -16.46702251650682
+1261 4074 -8.975664909834439
+4065 4074  0.01674951147636541
+4071 4074 -0.07711087260468982
+4073 4074  11.36739026212329
+4074 4074  7.127509675415745
+1261 4075  0.2803074902090753
+3111 4075  7.073656321403489
+4065 4075  0.3334711887748352
+4075 4075 -1.16718388103411
+4077 4075 -0.7056173157039639
+1261 4076 -0.2279652047020327
+4065 4076 -0.2799242389758857
+4075 4076  0.9797639815414531
+4076 4076  22.90635023674714
+4077 4076  0.5735599734651922
+1261 4077  11.6057964531526
+3112 4077  0.9855846822133956
+4065 4077 -0.02430205065482175
+4075 4077  0.08505970757052372
+4077 4077 -30.15492728060605
+1261 4078 -8.020245176780445
+4065 4078  0.01679417334838154
+4075 4078 -0.05878135529351257
+4077 4078  20.84931717772058
+4078 4078  5.54551145364278
+1261 4079  0.3804015427898206
+3113 4079  7.497907634820762
+4065 4079  0.428348930387738
+4079 4079 -1.628837266086651
+4081 4079 -0.2788437142254693
+1261 4080 -0.3095050463446962
+4065 4080 -0.35912875888719
+4079 4080  1.365621025992523
+4080 4080  24.33074727531835
+4081 4080  0.2267698773866179
+1261 4081  14.91330785855446
+3114 4081  1.04489898841882
+4065 4081 -0.03299211467792259
+4079 4081  0.1254556322243195
+4081 4081 -11.26470641944774
+1261 4082 -10.30293666523228
+4065 4082  0.02279291647912941
+4079 4082 -0.08667221774173783
+4081 4082  7.785831386629021
+4082 4082  5.889447827116065
+1261 4083 -1.261809071898539
+1277 4083  1.218052141141831
+4065 4083 -0.4788760357050242
+4083 4083  5.491936501011876
+4085 4083  0.2096383898392737
+1261 4084  1.52499539890934
+1277 4084 -1.47300996574878
+4065 4084  0.5819307001253576
+4084 4084  2.208450030879043
+4085 4084 -0.254752808402104
+1261 4085 -8.828418052973735
+1277 4085  6.880737915568475
+3100 4085  5.446758579372629
+4065 4085  0.07100846120402798
+4085 4085 -0.03108549679221456
+1261 4086  11.03436685036456
+1277 4086 -8.604012257663832
+4065 4086 -0.07203379071050602
+4085 4086  0.03153435706244356
+4086 4086  3.371992084196615
+1269 4087 -1.4539442060976
+1288 4087  0.7887413865352516
+4087 4087  2.763887709407143
+4088 4087 -0.4136141792640044
+4089 4087  0.3890882961019005
+1269 4088  1.446310343605032
+1288 4088 -0.7843205938603733
+3153 4088  2.781752994797915
+4088 4088  0.4131220203587079
+4089 4088 -0.3886253205090088
+1269 4089 -9.271286129126777
+1288 4089  5.391699794599194
+3116 4089  3.366289701753765
+4088 4089  0.05643659927341725
+4089 4089 -0.0530901051026678
+1269 4090  8.904067016422458
+1288 4090 -5.178361384702474
+4088 4090 -0.05836392563537035
+4089 4090  0.05490314771048892
+4090 4090  3.419875832853283
+1269 4091  1.012670283104241
+1293 4091 -0.6342324694119614
+3979 4091 -0.5913713271015886
+4088 4091  0.3288070523355396
+4091 4091  6.465623332056095
+1269 4092 -1.178077129193787
+1293 4092  0.7388262639417724
+3979 4092  0.6938074616461828
+4088 4092 -0.3857623389865421
+4092 4092  2.912178827084738
+1269 4093  7.586503401180923
+1293 4093 -4.118301711438476
+3979 4093  0.07867933243141366
+4088 4093 -0.0437463201053321
+4093 4093  7.023657380150102
+1269 4094 -8.118669664395844
+1293 4094  4.406401785498837
+3979 4094 -0.0979571314547415
+4088 4094  0.05446492613488767
+4094 4094  3.489728875483112
+1278 4095 -0.892482446316431
+4095 4095  9.920476591021989
+4096 4095  0.7713662868604313
+4102 4095  0.2350047561293398
+4125 4095 -0.4916143473982249
+1278 4096  0.9977668333321339
+3117 4096  5.599572994528584
+4096 4096 -0.8634917741294119
+4102 4096 -0.2627322880432431
+4125 4096  0.5503286211148283
+1278 4097 -6.008458200039911
+4096 4097 -0.1070003826683109
+4097 4097  11.88363245336253
+4102 4097  1.557484621286901
+4125 4097  0.06819448061561421
+1278 4098  6.822965307570624
+4096 4098  0.1197679507899034
+4098 4098  6.35673096494675
+4102 4098 -1.768620278216182
+4125 4098 -0.07633162606373356
+1297 4099 -0.9272236969416601
+4096 4099  0.807466687308941
+4099 4099  8.239370225433877
+4102 4099  0.2512854022636909
+4126 4099 -0.5280375750914569
+1297 4100  0.98501487730545
+4096 4100 -0.8588799767622773
+4100 4100  5.994209355056856
+4102 4100 -0.2668875724092826
+4126 4100  0.5616589604898961
+1297 4101 -5.141467784189666
+4096 4101 -0.10710356231765
+4101 4101  13.6661282064412
+4102 4101  1.526214952953268
+4126 4101  0.07003967621048186
+1297 4102  5.293330560693399
+3118 4102  9.085015566293682
+4096 4102  0.1209694588614057
+4102 4102 -1.571445886133012
+4126 4102 -0.07910718884290392
+1271 4103 -0.760898897674599
+1272 4103  1.417933728749779
+4103 4103  3.150909841954586
+4105 4103  0.433000632670903
+4107 4103 -0.4147446507941761
+1271 4104  0.7226406195901061
+1272 4104 -1.346294002937515
+4104 4104  4.161771196418701
+4105 4104 -0.411421568297578
+4107 4104  0.3940753934243332
+1271 4105 -4.340970455026474
+1272 4105  6.781290570506568
+3120 4105  5.834179421399259
+4105 4105 -0.05876042249592989
+4107 4105  0.05628299145492302
+1271 4106  4.279036439188589
+1272 4106 -6.686952398469979
+4105 4106  0.048609548122274
+4106 4106  9.53046694058189
+4107 4106 -0.04656009377371514
+1271 4107 -1.255594290221721
+3119 4107  3.217137788747237
+3848 4107  0.5724845587882514
+4107 4107 -0.6085690165184504
+4110 4107  0.4234260846924687
+1271 4108  1.25204782308929
+3848 4108 -0.5695671862975206
+4107 4108  0.6054677581871165
+4108 4108  3.322110871855388
+4110 4108 -0.4222691319575352
+1271 4109 -7.410192007597576
+3848 4109 -0.08093525038797107
+4107 4109  0.08603670609830409
+4109 4109  4.335231192834105
+4110 4109  2.647109335618053
+1271 4110  7.278483379939773
+3124 4110  4.142922682425776
+3848 4110  0.08401358938736914
+4107 4110 -0.08930907686990948
+4110 4110 -2.600153453324001
+1272 4111 -0.9419881525931177
+1286 4111  0.7647617774871864
+3732 4111  0.4075275464698439
+4105 4111 -0.4656525838783859
+4111 4111  5.084689951907726
+1272 4112  0.9057504822207362
+1286 4112 -0.7325004544442668
+3732 4112 -0.3908429951038652
+4105 4112  0.4465883401929728
+4112 4112  6.446916296953027
+1272 4113 -6.501760444913997
+1286 4113  2.902282675062765
+3732 4113 -0.05521218124589022
+4105 4113  0.06308701112702084
+4113 4113  9.883711222736482
+1272 4114  8.109951937853118
+1286 4114 -3.63284662768968
+3732 4114  0.03809075496731831
+4105 4114 -0.0435235817211751
+4114 4114  14.33869690694082
+1276 4115  0.732482060546454
+1277 4115  0.3546679947849138
+3906 4115  0.9163581866371621
+4085 4115 -0.7265339828613597
+4115 4115  3.945873747631456
+1276 4116 -0.7185090865811576
+1277 4116 -0.3467933517133751
+3906 4116 -0.8984979081539249
+4085 4116  0.7123734728657446
+4116 4116  4.230773004945792
+1276 4117  11.68038468730556
+1277 4117 -17.00310330783589
+3906 4117 -0.02453437618813773
+4085 4117  0.01945206395154361
+4117 4117  6.606303903934953
+1276 4118 -19.033710166426
+1277 4118  27.66621100756034
+3906 4118 -0.01320838662255178
+4085 4118  0.01047226060725421
+4118 4118  8.96941517920289
+1278 4119 -0.7623806137748301
+3740 4119  0.7972203259666385
+3742 4119  0.2404117558855258
+4119 4119  13.82148848700469
+4125 4119 -0.4124268531628535
+1278 4120  0.8561953528406251
+3740 4120 -0.8888199114143011
+3742 4120 -0.2700601958355842
+4120 4120  7.857436245259873
+4125 4120  0.4598141657371973
+1278 4121 -5.293591386916911
+3740 4121 -0.1140343010598945
+3742 4121  1.734079556945439
+4121 4121  14.27009956285187
+4125 4121  0.05899348825775656
+1278 4122  5.949256441509949
+3740 4122  0.1321323092122628
+3742 4122 -1.948837786899108
+4122 4122  7.188740199011378
+4125 4122 -0.0683561503822409
+1278 4123 -0.8740833773154867
+1297 4123  0.8485062079897245
+4123 4123  4.299742862778459
+4125 4123 -0.4682412747380797
+4126 4123  0.4804475820214714
+1278 4124  0.8912310185466943
+1297 4124 -0.8650341355217372
+4124 4124  3.696321876416155
+4125 4124  0.4773702028337876
+4126 4124 -0.4898144867918466
+1278 4125 -4.159585723108061
+1297 4125  3.631803890207103
+3134 4125  14.02807711051003
+4125 4125  0.05326867823029235
+4126 4125 -0.05465730817416642
+1278 4126  4.827479834422186
+1297 4126 -4.215505277639843
+3172 4126  9.415073365629931
+4125 4126 -0.05561345056860315
+4126 4126  0.05706320500793532
+1279 4127 -0.03784001059522953
+1280 4127 -4.420138342044347
+3137 4127  1.738985765064566
+3436 4127  0.4622771875668804
+4127 4127 -4.657034860973241
+1279 4128  0.034701868533437
+1280 4128  3.515861595090497
+3436 4128 -0.3713550248720283
+4127 4128  3.741074279977133
+4128 4128  4.980850774426043
+1279 4129  8.860625202191928
+1280 4129 -85.6481884037366
+3436 4129  0.004661810659483987
+4127 4129 -0.04696362991810545
+4129 4129  4.051300083922375
+1279 4130 -2.549488194921787
+1280 4130  24.55744025691587
+3436 4130  0.01681848484248474
+4127 4130 -0.1694313981454469
+4130 4130  4.497507478986273
+1279 4131  0.1538807196017282
+3139 4131  1.298069093586603
+3436 4131  0.8099385613394385
+4131 4131 -4.051106699452466
+4133 4131 -0.9333140615563935
+1279 4132 -0.1216367091902295
+3436 4132 -0.6777265383675919
+4131 4132  3.389815784838334
+4132 4132  3.754470501609538
+4133 4132  0.7715902563305668
+1279 4133  12.77341480514826
+3140 4133  2.304828423526273
+3436 4133 -0.01559842054359869
+4131 4133  0.07801933255350572
+4133 4133 -18.08711888236061
+1279 4134 -5.706211648738831
+3436 4134  0.03070862867577365
+4131 4134 -0.1535967507877281
+4133 4134  8.071327482551563
+4134 4134  3.634792913757748
+1279 4135 -1.539612202532529
+1292 4135  0.5515734249662468
+3436 4135 -0.8293599276847743
+4135 4135  3.27848682728167
+4137 4135  0.3355805184553645
+1279 4136  1.649614277715424
+1292 4136 -0.5910536703502837
+3436 4136  0.8908658230580816
+4136 4136  2.362308143022933
+4137 4136 -0.3604673975635156
+1279 4137 -9.369448393175393
+1292 4137  3.090538086242762
+3136 4137  4.440740416984238
+3436 4137  0.1208149423257971
+4137 4137 -0.04888485641695448
+1279 4138  10.36404052218175
+1292 4138 -3.418869542545371
+3436 4138 -0.1233504357896458
+4137 4138  0.04991078277622987
+4138 4138  3.326033848414284
+1285 4139  0.8723149865446768
+1286 4139  0.1581455441266535
+3732 4139 -0.8918227216379703
+4139 4139  8.334156253956104
+4145 4139  0.7709461549449258
+1285 4140 -0.8836887209187385
+1286 4140 -0.1537391891864879
+3732 4140  0.8980743533410442
+4140 4140  8.646235660883518
+4145 4140 -0.7763504480927439
+1285 4141  7.224333431575175
+1286 4141 -5.755496466930055
+3732 4141  0.04687701190851433
+4141 4141  9.599198921865771
+4145 4141 -0.04052335874533514
+1285 4142 -14.23089451374669
+1286 4142  11.28627080486608
+3732 4142  0.01958613529525647
+4142 4142  15.58372198199141
+4145 4142 -0.01693145434596672
+1285 4143  0.02592331471776193
+1287 4143 -2.546465438723989
+3151 4143  0.7844661652700029
+4143 4143 -7.678951010732729
+4145 4143  1.152193173408145
+1285 4144 -0.01536794211867761
+1287 4144  2.184062692900765
+4143 4144  6.620514347654194
+4144 4144  2.978974753398158
+4145 4144 -0.9933793593885633
+1285 4145  29.59209543546304
+1287 4145 -61.62793826679011
+3152 4145  4.182327502644779
+4143 4145  0.02199763125930786
+4145 4145 -0.003300648816836982
+1285 4146 -5.11537337312014
+1287 4146  10.62505824201603
+4143 4146 -0.1825976122374652
+4145 4146  0.02739797688598089
+4146 4146  2.213150142494187
+1293 4147  0.2993576079332945
+1294 4147 -0.07547758445687719
+3165 4147  5.664236473525226
+3979 4147  0.9009354882907198
+4147 4147 -4.332893970446715
+1293 4148 -0.2812552173432513
+1294 4148  0.07089516340263885
+3979 4148 -0.8656710639493923
+4147 4148  4.163295798784388
+4148 4148  9.893426968176996
+1293 4149  11.01771333410158
+1294 4149 -2.84098517566667
+3979 4149 -0.07642521674859
+4147 4149  0.3675539094017725
+4149 4149  0.8386055878177558
+1293 4150 -8.516713910804523
+1294 4150  2.196645621020095
+3979 4150  0.05907688964639677
+4147 4150 -0.2841201198847871
+4150 4150  2.683371796432915
+1293 4151  0.06090597788496124
+1295 4151 -0.2101993947180658
+3167 4151  9.38137758123437
+3979 4151  0.58462927218427
+4151 4151 -7.173917743684755
+1293 4152 -0.05346314055039419
+1295 4152  0.1844652672016618
+3979 4152 -0.5248141364580845
+4151 4152  6.439933176124918
+4152 4152  21.22166760392006
+1293 4153  6.743407936044972
+1295 4153 -23.80011488422777
+3979 4153 -0.01466585550328597
+4151 4153  0.1799630056638323
+4153 4153  1.713315824453342
+1293 4154 -5.460172754442011
+1295 4154  19.27597045240107
+3979 4154  0.0118751178970765
+4151 4154 -0.1457181893610931
+4154 4154  4.758341632139925
+2777 4155 -0.2412135486919414
+2778 4155 -0.09268507579991085
+4155 4155 -30.64257011981108
+3176 4156  0.4638345268861317
+3178 4156  0.1776859613752735
+4156 4156 -15.90706277519701
+3179 4157  0.1661135696488067
+3181 4157  0.2348156967063924
+4157 4157 -39.31435081710389
+3011 4158 -0.1171398876204244
+3182 4158 -0.1686225023517153
+4158 4158 -53.45947004812159
+3183 4159  0.4624605338731521
+3185 4159  0.2528208987834961
+4159 4159 -12.56288032466136
+3184 4160 -0.4304312753963275
+3186 4160 -0.2294685995787264
+4160 4160 -13.62587967120271
+3187 4161 -0.4947242318500673
+3189 4161 -0.2206266516513799
+4161 4161 -14.84344573266865
+3188 4162  0.4225606468582997
+3190 4162  0.1862365373886236
+4162 4162 -17.5348042025758
+2781 4163 -0.1766165844146627
+2782 4163 -0.06752189118262934
+4163 4163 -65.95158912186295
+3192 4164  0.2943113306588002
+3194 4164  0.1162526489952567
+4164 4164 -38.93574330129159
+3195 4165 -0.3050775516043246
+3197 4165 -0.0730286811201015
+4165 4165 -46.43966016640264
+3196 4166  0.3743951155653602
+3198 4166  0.09090233254426537
+4166 4166 -37.97891276037357
+2780 4167 -0.1030731399156117
+3199 4167 -0.3914193643951301
+4167 4167 -32.44683207965974
+3200 4168  0.3184437332485786
+3202 4168  0.08441252585843845
+4168 4168 -39.5091491257876
+2785 4169 -0.2389835454891805
+2786 4169 -0.09183437174715321
+4169 4169 -31.1299517046727
+3204 4170  0.3991520721206581
+3206 4170  0.1569416237697656
+4170 4170 -18.48285697955544
+2869 4171 -0.480083502439464
+3209 4171  0.1604723692757136
+4171 4171 -13.83350837304568
+3208 4172  0.3448062169581231
+3210 4172 -0.1470971542430732
+4172 4172 -20.00745980162628
+3211 4173 -0.4223099512030265
+3213 4173  0.1140677401031672
+4173 4173 -22.76489333995555
+3212 4174  0.4163534406274162
+3214 4174 -0.1084103977076431
+4174 4174 -23.20868313310848
+2789 4175 -0.3252342258811831
+2790 4175 -0.1253361300551693
+4175 4175 -16.39949055853167
+3216 4176  0.6239097919698932
+3218 4176  0.2458729163013723
+4176 4176 -8.433072185518855
+3219 4177 -0.3300938184824252
+3221 4177 -0.1765502579767677
+4177 4177 -24.12392710831048
+3220 4178  0.3317934535905247
+3222 4178  0.1764937921731119
+4178 4178 -23.44721624588622
+3223 4179 -0.4908310005655379
+3225 4179 -0.154677890064579
+4179 4179 -18.65225809968672
+3224 4180  0.5267241324446033
+3226 4180  0.1767989964268323
+4180 4180 -16.41237497028351
+3227 4181  0.4025596461382885
+3229 4181  0.1487428464011694
+4181 4181 -26.1956792336746
+3070 4182 -0.1043560294495194
+3228 4182 -0.2880976473424125
+4182 4182 -36.11162541606858
+2793 4183 -0.3680346263601992
+2794 4183 -0.1418061655056151
+4183 4183 -12.26843876024679
+3232 4184  0.7765432181564137
+3234 4184  0.3080625949669278
+4184 4184 -5.60785504010041
+3235 4185 -0.4818355431139471
+3237 4185 -0.2388406309706918
+4185 4185 -12.72775567793986
+3236 4186  0.4752231410863287
+3238 4186  0.2360290859558374
+4186 4186 -13.02604478229861
+3036 4187 -0.03178578024713478
+3239 4187 -0.2936091186007417
+4187 4187 -129.8752387098407
+3240 4188  0.4485203675871874
+3242 4188  0.04825762007543867
+4188 4188 -86.07489921163659
+3243 4189  0.1478598441021074
+3245 4189  0.256909813210024
+4189 4189 -41.9565272197917
+3244 4190 -0.1377473445162312
+3246 4190 -0.2295458523844411
+4190 4190 -45.97094678900272
+3247 4191 -0.4722142733900354
+3249 4191 -0.1046018218296087
+4191 4191 -33.18669707365662
+3160 4192  0.1115367935708566
+3248 4192  0.5064852131538
+4192 4192 -31.12915756520504
+2797 4193 -0.1245359476338054
+2798 4193 -0.04988201584301125
+4193 4193 -142.3054925634301
+3252 4194  0.2571869189216621
+3254 4194  0.1058671586234522
+4194 4194 -68.22390452124144
+2799 4195 -0.1886155730243332
+2800 4195 -0.07276217302329462
+4195 4195 -53.6144848893498
+3256 4196  0.3916020995201141
+3258 4196  0.1550963396610254
+4196 4196 -25.16934902030714
+3259 4197 -0.496807801273085
+3261 4197  0.1763884106301378
+4197 4197 -16.85797529931353
+3260 4198  0.4045567407557998
+3262 4198 -0.1358187924429873
+4198 4198 -22.25527628315144
+3263 4199  0.382691560057723
+3265 4199 -0.1329182782584699
+4199 4199 -31.89341246694178
+3264 4200 -0.3322785058448103
+3266 4200  0.1158998426965637
+4200 4200 -34.63780504233422
+3267 4201 -0.3957723556168835
+3269 4201  0.1694463566618233
+4201 4201 -19.56367976914622
+3268 4202  0.4366009542545615
+3270 4202 -0.1871670285025489
+4202 4202 -18.10501936478283
+3271 4203 -0.5915454173823133
+3273 4203 -0.08253448028257152
+4203 4203 -23.14455148007878
+3272 4204  0.5892891508115687
+3274 4204  0.08441094784420153
+4204 4204 -22.69925480783547
+3275 4205 -0.6320496066149439
+3277 4205  0.09175946688769955
+4205 4205 -17.70174015534158
+3276 4206  1.031765062393027
+3278 4206 -0.05562556027624922
+4206 4206 -21.45404488617399
+3279 4207  0.4149441342907414
+3281 4207 -0.1391230222463695
+4207 4207 -17.63733576375392
+3071 4208 -0.5368637578512274
+3072 4208  0.2054208626437619
+4208 4208 -11.7352471887321
+2805 4209 -0.3720931546947013
+2806 4209 -0.1433588781709148
+4209 4209 -12.13240977183652
+3284 4210  0.6167314111295573
+3286 4210  0.2480796714683461
+4210 4210 -7.139021306502358
+2809 4211 -0.1474513421173272
+2810 4211 -0.05626831364793678
+4211 4211 -107.2257743582197
+3288 4212  0.3310701127831404
+3290 4212  0.1309999949189102
+4212 4212 -47.10985428696654
+2811 4213 -0.150780607320847
+2812 4213 -0.05817082924203715
+4213 4213 -99.89329952085217
+3292 4214  0.337094746642738
+3294 4214  0.1348920395825686
+4214 4214 -43.849780178493
+2813 4215 -0.166183616330309
+2814 4215 -0.06367574453596507
+4215 4215 -82.05099007645858
+3296 4216  0.3720598064715243
+3298 4216  0.1478411151739352
+4216 4216 -35.88141193511292
+2815 4217 -0.2132021764710746
+2816 4217 -0.0819141354576636
+4217 4217 -43.98879537092815
+3300 4218  0.4803517580962079
+3302 4218  0.1914370186595753
+4218 4218 -18.96707230539594
+3303 4219  0.3955427940574144
+3305 4219 -0.04831816621937788
+4219 4219 -56.56046644171085
+2807 4220 -0.6484741851329106
+2808 4220  0.08377148904147035
+4220 4220 -33.41745949104828
+3307 4221  0.3375417510714098
+3309 4221 -0.1760787931148337
+4221 4221 -30.17627002650658
+3308 4222 -0.2062979542711787
+3310 4222  0.1043900787502424
+4222 4222 -49.61405821049816
+2819 4223 -0.1696981519351195
+2820 4223 -0.06510757825207561
+4223 4223 -71.73978108877516
+3312 4224  0.3231881679107107
+3314 4224  0.1284506027739841
+4224 4224 -36.41609607105951
+2821 4225 -0.1917131080017181
+2822 4225 -0.07348349316451037
+4225 4225 -54.57250563862199
+3316 4226  0.3650963193282887
+3318 4226  0.1449696716921745
+4226 4226 -27.55492217173241
+2818 4227  0.03944668306923331
+3319 4227 -0.3349809983213075
+4227 4227 -141.187307118909
+3320 4228  0.3279397104479789
+3322 4228 -0.03875470100391585
+4228 4228 -139.724744622845
+3323 4229  0.2366093522986016
+3325 4229  0.1182819046044615
+4229 4229 -54.42625553131974
+3324 4230 -0.2372175650649101
+3326 4230 -0.123830609841622
+4230 4230 -53.63654595730242
+2825 4231 -0.1646365262812593
+2826 4231 -0.06404854821625307
+4231 4231 -72.83364223501292
+3328 4232  0.2708036975786697
+3330 4232  0.1105236630114776
+4232 4232 -43.25696538963752
+2824 4233 -0.0885781928998835
+3331 4233 -0.5108679392009977
+4233 4233 -27.39216973415159
+3332 4234  0.4831988452769137
+3334 4234  0.09073099839660206
+4234 4234 -27.18217755332223
+3335 4235 -0.5198632092236741
+3337 4235 -0.07822635220456177
+4235 4235 -29.56194702569874
+3336 4236  0.5254114800415531
+3338 4236  0.0825069857399373
+4236 4236 -29.09591654321273
+2829 4237 -0.1981115390391876
+2830 4237 -0.07680746283325847
+4237 4237 -49.02162975759376
+3340 4238  0.370280352148334
+3342 4238  0.1503007858341583
+4238 4238 -24.45068261782124
+3343 4239 -0.3571743727843014
+3345 4239  0.05216856069111805
+4239 4239 -89.42768025930282
+3344 4240  0.353019920768631
+3346 4240 -0.04777819884013859
+4240 4240 -93.18051438131391
+3347 4241 -0.1837305798049131
+3349 4241 -0.1284297479768136
+4241 4241 -67.3680095180724
+3348 4242  0.1877146534020675
+3350 4242  0.1242642220168503
+4242 4242 -69.2391511776303
+2833 4243 -0.4115664959143436
+2834 4243 -0.1585763878680793
+4243 4243 -9.491059373455291
+3352 4244  0.6903575064460875
+3354 4244  0.2721182940045235
+4244 4244 -5.581731936811549
+3355 4245  0.4948051560351502
+3357 4245  0.2278264743594363
+4245 4245 -12.98416731930361
+3356 4246 -0.3995006872704646
+3358 4246 -0.1829124999460475
+4246 4246 -16.37502476196387
+3359 4247 -0.413301806558095
+3361 4247 -0.1220911632709661
+4247 4247 -24.74624389281993
+3053 4248  0.3335609329182144
+3054 4248  0.1009613901942342
+4248 4248 -29.26099129371331
+2837 4249 -0.1377344537960406
+2838 4249 -0.05432974549764544
+4249 4249 -116.5520245666077
+3364 4250  0.2592958859943023
+3366 4250  0.1076318904023976
+4250 4250 -60.67747015800169
+2839 4251 -0.2309521391793573
+2840 4251 -0.08909672224847444
+4251 4251 -35.17526953063072
+3368 4252  0.4377070227596882
+3370 4252  0.1776243671061898
+4252 4252 -17.85544888919775
+3371 4253  0.373558902257503
+3373 4253  0.09662035202600507
+4253 4253 -33.99037882808334
+2836 4254 -0.1125400217472849
+3372 4254 -0.4326204283218003
+4254 4254 -29.8305087797851
+3375 4255 -0.5029630185937802
+3377 4255 -0.03874066375731782
+4255 4255 -71.94298278001283
+3376 4256  0.563121317845319
+3378 4256  0.04628018070972438
+4256 4256 -61.46922915998202
+2843 4257 -0.3005092230155128
+2844 4257 -0.1157986402063187
+4257 4257 -18.66777584361901
+3380 4258  0.5036002788122459
+3382 4258  0.1992142492979329
+4258 4258 -11.00044392597633
+2842 4259 -0.1550385259319598
+3383 4259 -0.4425084873605576
+4259 4259 -17.48331613863921
+3384 4260  0.4374740476554567
+3386 4260  0.1559040304242874
+4260 4260 -17.19494524360543
+3387 4261  0.3915587655305323
+3389 4261  0.1097305804557451
+4261 4261 -30.70003257734891
+3047 4262 -0.3222408774211639
+3048 4262 -0.08915743596796895
+4262 4262 -36.85462203202088
+2847 4263 -0.2006755550776053
+2848 4263 -0.07741322426033026
+4263 4263 -47.65157494137991
+3392 4264  0.3799777711016727
+3394 4264  0.1510415792376266
+4264 4264 -25.01333265604174
+2849 4265 -0.3338254063159729
+2850 4265 -0.1286239024388166
+4265 4265 -15.75639783117426
+3396 4266  0.6357091254305228
+3398 4266  0.2527154403111643
+4266 4266 -8.087445797244316
+3399 4267  0.4723947763539851
+3401 4267  0.07282084580241761
+4267 4267 -36.17015837528741
+2846 4268 -0.07966811853968624
+3400 4268 -0.5635229915555421
+4268 4268 -33.28326480640938
+3403 4269  0.2663650847823781
+3405 4269  0.1699263311417943
+4269 4269 -26.11298173680401
+2845 4270 -0.3035513417999643
+3406 4270 -0.1978726929434511
+4270 4270 -22.92312256359989
+2853 4271 -0.1866146962681199
+2854 4271 -0.07172764858434916
+4271 4271 -56.97774952449042
+3408 4272  0.3212719394188062
+3410 4272  0.1190518291670926
+4272 4272 -33.52104777869483
+3411 4273 -0.08570975586546606
+3413 4273  0.3050576835026495
+4273 4273 -52.94756993572931
+2943 4274  0.07033500174578537
+3414 4274 -0.2460311055612411
+4274 4274 -64.26911966105814
+3415 4275  0.224708222751262
+3417 4275  0.2068334057548165
+4275 4275 -26.98015626399983
+3416 4276 -0.2245587755506762
+3418 4276 -0.2103959172847824
+4276 4276 -27.22391958877084
+3419 4277 -0.3594998341374985
+3421 4277  0.1495820406715405
+4277 4277 -19.76131076987808
+3420 4278  0.4379133264508283
+3422 4278 -0.1823951579301308
+4278 4278 -16.01743515162783
+3423 4279 -0.1478613302797105
+3425 4279 -0.3612348953316888
+4279 4279 -19.43122435760818
+3099 4280  0.2457031220130788
+3426 4280  0.4234806911341454
+4280 4280 -12.23386114811299
+3427 4281 -0.58313561816606
+3429 4281  0.03126449269119468
+4281 4281 -73.10925705238337
+3428 4282  0.6575968965590965
+3430 4282 -0.03366812880804327
+4282 4282 -64.22433096257478
+3431 4283 -0.2293236201966689
+3433 4283 -0.09511283394769576
+4283 4283 -42.25947272070731
+3432 4284  0.2292832397716183
+3434 4284  0.09589996761388657
+4284 4284 -42.5796885142234
+3435 4285  0.42314514820173
+3437 4285  0.1765863442776434
+4285 4285 -17.39127207242933
+2857 4286 -0.2946263988269318
+2858 4286 -0.1192055091852236
+4286 4286 -25.68769827203282
+2860 4287  0.09444763078437719
+3439 4287 -0.3584357830568553
+4287 4287 -26.68291634429617
+2941 4288  0.3674899949834071
+3442 4288 -0.0913967464088717
+4288 4288 -27.21898062309888
+2859 4289  0.3532322900264607
+3445 4289 -0.1063494845711242
+4289 4289 -25.76998281146062
+3038 4290  0.1067652808173516
+3444 4290 -0.358149304314646
+4290 4290 -24.86767301361041
+2861 4291  0.1980349828762465
+2862 4291  0.08633500225011058
+4291 4291 -58.10296868779811
+3448 4292 -0.1981905656779542
+3450 4292 -0.08642839304797841
+4292 4292 -57.88228387547031
+3451 4293 -0.4977177300614612
+3453 4293 -0.1054013687030942
+4293 4293 -22.81351654840187
+3452 4294  0.5863532491442011
+3454 4294  0.1224913336251501
+4294 4294 -20.38105920054591
+3455 4295 -0.8291778794111532
+3457 4295 -0.1371328219183342
+4295 4295 -12.73795579528641
+3456 4296  0.676872928952266
+3458 4296  0.1232115838138538
+4296 4296 -14.69078715887743
+2977 4297 -0.5661094051595106
+3461 4297 -0.1696769505008175
+4297 4297 -14.44408841920709
+3460 4298  0.4926836001045419
+3462 4298  0.1458229277753334
+4298 4298 -16.85321717357648
+3463 4299 -0.7204052217365209
+3465 4299 -0.1678465865963971
+4299 4299 -9.712758796207535
+3464 4300  0.6936422211413747
+3466 4300  0.1869994138422493
+4300 4300 -9.389773414469078
+2895 4301 -0.3615774432500488
+3469 4301 -0.1021595851058913
+4301 4301 -25.11095667227453
+3468 4302  0.4298291519255887
+3470 4302  0.1243626259089881
+4302 4302 -20.66188603221291
+3471 4303  0.2921192102024898
+3473 4303  0.1498140723675607
+4303 4303 -27.06081236628384
+3472 4304 -0.2929670419078287
+3474 4304 -0.149642205461953
+4304 4304 -27.07076287154962
+3475 4305  0.2741740413061578
+3477 4305  0.09966399264609031
+4305 4305 -34.23717637845797
+3476 4306 -0.3325105683429021
+3478 4306 -0.1210270813360829
+4306 4306 -28.46510147456754
+2868 4307 -0.1021338181094035
+3479 4307 -0.2773076465817324
+4307 4307 -30.28857007982523
+3480 4308  0.5012808683324748
+3482 4308  0.181244990753315
+4308 4308 -15.86269138512577
+2871 4309  0.01066646559674583
+2872 4309  1.027405058684619
+4309 4309 -77.63028018021609
+3484 4310 -0.01937948406660046
+3486 4310 -2.099407317666164
+4310 4310 -41.22285965712726
+2873 4311  0.01083754926061709
+2874 4311  1.035430338244845
+4311 4311 -75.31457172347281
+3488 4312 -0.0200281710755901
+3490 4312 -2.099046152908578
+4312 4312 -39.42580673521844
+2875 4313  0.01175361905645754
+2876 4313  1.132573885359342
+4313 4313 -66.76292516277346
+3492 4314 -0.02148219787072791
+3494 4314 -2.3232191764992
+4314 4314 -35.37814580251727
+2877 4315  0.01148289706327333
+2878 4315  1.096989498294941
+4315 4315 -66.63623857562646
+3496 4316 -0.02126487268390402
+3498 4316 -2.229759078647565
+4316 4316 -34.79046888868552
+2879 4317  0.008746086931736756
+3501 4317  1.576501168390627
+4317 4317 -70.10959107942233
+3500 4318 -0.01515825745898139
+3502 4318 -2.820119469264003
+4318 4318 -42.54586097045568
+3503 4319  0.533396313689748
+3505 4319  0.1398090615522035
+4319 4319 -14.59114960552235
+3504 4320 -0.3072494006703159
+3506 4320 -0.1064731526095451
+4320 4320 -25.00017435761698
+3507 4321 -0.4043473535609413
+3509 4321 -0.1463628716658473
+4321 4321 -17.87480776263565
+2906 4322  0.2139666338910703
+3508 4322  0.6025736075654631
+4322 4322 -11.25720746291567
+3511 4323 -0.2764593929515081
+3513 4323 -0.05970599140078857
+4323 4323 -65.97984348957961
+3512 4324  0.4607487274148538
+3514 4324  0.1019487335374342
+4324 4324 -41.15873081988094
+3515 4325 -0.3420333412549693
+3517 4325  0.152339017646971
+4325 4325 -20.94431514667631
+2883 4326  0.4601615553751069
+3518 4326 -0.2061528425704132
+4326 4326 -15.92440412543586
+2884 4327  0.120393801145661
+3519 4327 -0.799264511380765
+4327 4327 -14.55529250850832
+2978 4328 -0.1148860314163944
+3520 4328  0.7898807291279358
+4328 4328 -15.05159484553243
+3523 4329  0.6047360650845038
+3525 4329  0.1544468567080951
+4329 4329 -15.49210742892535
+3524 4330 -0.5448550035608645
+3526 4330 -0.1330170041161471
+4330 4330 -16.86689236335236
+3527 4331  0.7188696788923561
+3529 4331  0.1258547872274984
+4331 4331 -16.12647392997589
+3528 4332 -0.7112006565720582
+3530 4332 -0.1073622229111081
+4332 4332 -18.07799191201674
+2885 4333  0.3127717018709487
+2886 4333  0.04000224600538602
+4333 4333 -87.48075773456222
+3532 4334 -0.4309015624391707
+3534 4334 -0.05678315093978254
+4334 4334 -63.51574285277481
+2887 4335  0.3275939897877578
+2888 4335  0.06114217742781798
+4335 4335 -52.8716550775216
+3536 4336 -0.421297996296329
+3538 4336 -0.07021960246037766
+4336 4336 -43.40343426461138
+2963 4337  0.3539736971000679
+3541 4337  0.09996941509051212
+4337 4337 -26.738027357087
+3540 4338 -0.4334843149085262
+3542 4338 -0.1186609141678331
+4338 4338 -21.96349363409914
+3543 4339 -0.2029217905250291
+3545 4339 -0.1322602591418904
+4339 4339 -50.66661535986736
+3544 4340  0.2019684391055969
+3546 4340  0.1329277299929537
+4340 4340 -50.64878861975378
+2892 4341  0.04478328549069006
+3547 4341  0.4429448994016109
+4341 4341 -51.18764721528638
+3012 4342 -0.04036045463481888
+3548 4342 -0.4705159048444391
+4342 4342 -52.99960506788916
+3551 4343 -0.3781458794328671
+3553 4343 -0.07136785085690645
+4343 4343 -34.82199055582354
+3051 4344  0.3712099084130325
+3052 4344  0.07225182247031578
+4344 4344 -34.54423303070557
+3555 4345  0.2653610804974919
+3557 4345  0.1007116905315015
+4345 4345 -35.71241339576211
+3556 4346 -0.2660353447014131
+3558 4346 -0.100034192619883
+4346 4346 -35.52512736400956
+3559 4347  0.4397497774331201
+3561 4347  0.1692123750855458
+4347 4347 -20.01915962930323
+2893 4348 -0.2897782680634379
+2894 4348 -0.1114576459576665
+4348 4348 -30.22720745240589
+2896 4349 -0.05438327802565162
+3563 4349  0.3337399888939511
+4349 4349 -57.69560648218724
+3564 4350 -0.4468471629984179
+3566 4350  0.07850380857557879
+4350 4350 -40.73513928545416
+3091 4351 -0.02735485822257834
+3569 4351 -0.6125464046789606
+4351 4351 -77.65714066116759
+2897 4352  0.02303438519199841
+3570 4352  0.4916885932583254
+4352 4352 -94.69922254722279
+3571 4353  0.2999541214461424
+3573 4353  0.1882841243361535
+4353 4353 -17.51506605655669
+2898 4354 -0.1586572171452398
+3572 4354 -0.2437019726733171
+4354 4354 -21.37522400115791
+2900 4355 -0.1795099761298055
+3575 4355 -0.1700655800972397
+4355 4355 -28.31087433954066
+3576 4356  0.2926855116749797
+3578 4356  0.3408694718928448
+4356 4356 -15.06418166640937
+2899 4357  0.01658810146559478
+3581 4357  0.5692601762921932
+4357 4357 -112.4426933973505
+3141 4358 -0.01579633712996128
+3582 4358 -0.5859732481780078
+4358 4358 -114.7794462571026
+2903 4359  23.38653888958006
+2904 4359 -4.528166521929149e-05
+4359 4359 -1058.66302267636
+3584 4360 -0.8149863628841933
+3586 4360  0.07509815561251482
+4360 4360 -20.57374819150079
+2908 4361  0.1584805113003082
+3587 4361  0.3443611707400913
+4361 4361 -14.42988051776977
+3588 4362 -0.7256220588031617
+3590 4362 -0.3323630168187701
+4362 4362 -7.306137981052756
+2909 4363  0.2424191842671502
+3593 4363  0.1789331829732354
+4363 4363 -19.0892306599637
+3592 4364 -0.4976321718455345
+3594 4364 -0.3680998962405606
+4364 4364 -9.897416463906245
+2911 4365  0.2409008336125803
+2912 4365  0.2318901716680636
+4365 4365 -14.15628764799962
+3596 4366 -0.4961149924439166
+3598 4366 -0.4581176150659059
+4366 4366 -7.445942181911135
+2913 4367  0.2465099761064946
+3601 4367  0.18507291152769
+4367 4367 -17.88569115961042
+3600 4368 -0.510067162772699
+3602 4368 -0.3724327269861961
+4368 4368 -9.352570774505041
+2915 4369  0.2338083946556382
+3605 4369  0.2559294137774343
+4369 4369 -12.90605807444327
+3604 4370 -0.4801640088667681
+3606 4370 -0.5067141365537682
+4370 4370 -6.780155483318999
+2917 4371  0.2329931159431059
+3609 4371  0.2553116682026549
+4371 4371 -12.84963493329398
+3608 4372 -0.4783057895236895
+3610 4372 -0.5042260023462158
+4372 4372 -6.754664202360627
+2919 4373  0.2443954748733559
+3613 4373  0.1850395098356168
+4373 4373 -17.05542913502588
+3612 4374 -0.5073755737823042
+3614 4374 -0.3677359737660884
+4374 4374 -8.949835317480224
+2905 4375 -0.2852018326572599
+3617 4375  0.390540098759081
+4375 4375 -13.22126092554109
+3616 4376  0.2195673688647587
+3618 4376 -0.2998672521788097
+4376 4376 -17.94565770436443
+2922 4377  0.03342586460999813
+3619 4377  0.3449705889808716
+4377 4377 -88.03377071956065
+3154 4378 -0.03257068910337899
+3620 4378 -0.350098224088436
+4378 4378 -88.64391292782304
+2925 4379  0.001345572973655427
+3625 4379  3.224514182063999
+4379 4379 -246.2252886607093
+3624 4380  0.004528716666476341
+3626 4380 -3.833955781960661
+4380 4380 -109.1007957172354
+2927 4381  0.002458604260643332
+3629 4381  3.182000960903725
+4381 4381 -142.1356981120592
+3628 4382  0.003858840238686249
+3630 4382 -6.252920651221183
+4382 4382 -91.07653645375166
+2929 4383  0.0006418068359919498
+3633 4383  5.515960845011515
+4383 4383 -290.0185142498619
+2923 4384  0.005403119658024451
+3634 4384 -3.542334556735019
+4384 4384 -83.51485409842199
+2931 4385  0.0006826837434793908
+3637 4385  4.920359358439487
+4385 4385 -311.0119847471854
+3636 4386  0.005116437161350581
+3638 4386 -3.407349241168243
+4386 4386 -95.15381296120231
+2989 4387 -0.2149865933546265
+2990 4387 -0.1327293340165539
+4387 4387 -26.58818103268005
+3640 4388  0.3614419935688245
+3642 4388  0.1708955495131764
+4388 4388 -15.77787777432359
+3643 4389 -0.8943223168280579
+3645 4389  0.04435814071805195
+4389 4389 -28.09789857924424
+3087 4390  0.5644202599576373
+3088 4390 -0.03036476754524816
+4390 4390 -51.51652538609446
+3647 4391  0.2320388001825638
+3649 4391 -0.09875202978820967
+4391 4391 -41.49391694494913
+3648 4392 -0.3438732803624757
+3650 4392  0.1578014166231115
+4392 4392 -27.01589354302817
+2934 4393  0.09430040141542814
+3651 4393 -0.2673963766163865
+4393 4393 -38.65350036810663
+3059 4394  0.2715368875046588
+3654 4394 -0.09193233690226214
+4394 4394 -39.56708513242784
+3655 4395 -0.3015732552854993
+3657 4395  0.1553884953900216
+4395 4395 -18.86848979808568
+3656 4396  0.4028072587377506
+3658 4396 -0.1683908418346857
+4396 4396 -16.18698208267388
+3659 4397 -0.189153266415218
+3661 4397  0.1129564356289893
+4397 4397 -60.01674254260863
+3660 4398  0.1837519366993671
+3662 4398 -0.116110806968134
+4398 4398 -59.89391025874022
+2937 4399  0.3162746729687368
+2938 4399 -0.1802199613520399
+4399 4399 -14.2607056227518
+3664 4400 -0.4996161946091237
+3666 4400  0.2497754017512982
+4400 4400 -9.060706229208979
+3667 4401 -0.4954179106036137
+3669 4401  0.1349748586012675
+4401 4401 -16.28650106020694
+3668 4402  0.4893961341975044
+3670 4402 -0.1293710693667469
+4402 4402 -16.75458882036434
+3671 4403  0.312090482848181
+3673 4403 -0.224373509793738
+4403 4403 -16.27270670749873
+3672 4404 -0.3065265473112019
+3674 4404  0.2293733406665809
+4404 4404 -15.92119246330756
+2942 4405  0.07787860580799441
+3675 4405 -0.420740409235667
+4405 4405 -28.21287469737472
+3676 4406  0.6048601072199059
+3678 4406 -0.1009386461263404
+4406 4406 -21.15680116671571
+2944 4407  0.1290791462085817
+3679 4407 -0.1900856271423472
+4407 4407 -43.60043341376043
+3013 4408  0.1860931450132002
+3014 4408 -0.1309259113047055
+4408 4408 -43.93633027014358
+3683 4409 -0.6351218515149885
+3685 4409 -0.04844675456049608
+4409 4409 -49.07486674953635
+3684 4410  0.6690902668564817
+3686 4410  0.05669785722272903
+4410 4410 -43.91999193831722
+3057 4411 -0.09233746945236616
+3058 4411  0.1745420007093612
+4411 4411 -67.19582273963222
+3688 4412  0.1263791104196366
+3690 4412 -0.2486847965028564
+4412 4412 -47.86048355346968
+3691 4413 -0.3290166805679281
+3693 4413 -0.1656201138879724
+4413 4413 -25.60045733925898
+3692 4414  0.3839594711748638
+3694 4414  0.1940686199464453
+4414 4414 -22.11505592822052
+3695 4415 -0.4508575542320269
+3697 4415 -0.1505442317369579
+4415 4415 -17.94022473260717
+3696 4416  0.3417002020878817
+3698 4416  0.1044236949304687
+4416 4416 -25.43801423175152
+3699 4417  0.5795528708874975
+3701 4417  0.1922551101717189
+4417 4417 -10.89812561676288
+2947 4418 -0.3580379646538538
+2948 4418 -0.1170091683665216
+4418 4418 -18.98202374381732
+3703 4419 -0.4501212252666432
+3705 4419 -0.1599443116995181
+4419 4419 -18.02447912466057
+3704 4420  0.4449142856253013
+3706 4420  0.1612239847733691
+4420 4420 -18.03362624973672
+3125 4421 -0.3246876875018179
+3126 4421 -0.1283198998011498
+4421 4421 -21.9629176261085
+3708 4422  0.4461110896526219
+3710 4422  0.181437742187558
+4422 4422 -15.83184549062703
+3711 4423  0.3866661651573575
+3713 4423 -0.1292478872971707
+4423 4423 -27.27913987608121
+3712 4424 -0.3382766882528339
+3714 4424  0.1150695611031082
+4424 4424 -31.03660494689432
+2953 4425  0.001312335288620512
+3717 4425  5.802495030097443
+4425 4425 -137.3561110785759
+3716 4426  0.01439043946019499
+3718 4426 -3.573289293030423
+4426 4426 -34.52648286375157
+2955 4427  0.0008215588597796699
+2956 4427  4.348249404183824
+4427 4427 -273.003320275044
+3720 4428  0.008760842370192201
+3722 4428 -2.916757791234899
+4428 4428 -63.02258634203405
+2957 4429  0.0007450434486385868
+2958 4429  4.722524623428282
+4429 4429 -270.3252397919941
+3724 4430  0.00890402837809562
+3726 4430 -2.948494780789273
+4430 4430 -58.78525414828648
+3727 4431  0.5235605415859514
+3729 4431  0.1268899830601398
+4431 4431 -14.91535393188155
+2964 4432 -0.1031938954442674
+3728 4432 -0.3456801265779962
+4432 4432 -22.95107272924469
+3731 4433  0.6177638045468883
+3733 4433  0.1907246660453396
+4433 4433 -8.882219892636956
+2951 4434 -0.7248750001072389
+3734 4434 -0.1521679999018858
+4434 4434 -8.38327245565061
+2961 4435  0.1199895291469222
+2962 4435  0.3584223063522697
+4435 4435 -19.7641779034735
+3736 4436 -0.1837226529034885
+3738 4436 -0.5529352148144631
+4436 4436 -13.85918199986529
+3739 4437  0.203001397699786
+3741 4437  0.1990042899232015
+4437 4437 -29.84297186502756
+3155 4438 -0.1531666552012178
+3156 4438 -0.1448367639545626
+4438 4438 -41.37136670068732
+3743 4439  0.4903758402830745
+3745 4439  0.09363089415499334
+4439 4439 -24.74678079539619
+3744 4440 -0.5051458348552837
+3746 4440 -0.08952016961860668
+4440 4440 -25.30084569767662
+3747 4441  0.4838079806998937
+3749 4441  0.1130336965210424
+4441 4441 -20.16790931364713
+3060 4442 -0.08472478198026717
+3748 4442 -0.4155301814520989
+4442 4442 -25.56596459955379
+3751 4443 -0.3081198770448175
+3753 4443 -0.1225474573200583
+4443 4443 -29.25612917202748
+3752 4444  0.3462276026425328
+3754 4444  0.1421494758907888
+4444 4444 -25.80465236855202
+2967 4445  0.2236474776175843
+2968 4445  0.07753429754535195
+4445 4445 -59.28218084687754
+3174 4446 -0.07706700524990751
+3756 4446 -0.2242455162623405
+4446 4446 -59.00633582364991
+2972 4447 -0.1497184219294691
+3759 4447 -0.3103276689217324
+4447 4447 -18.95360574513186
+3760 4448  0.3245871165120744
+3762 4448  0.1444473955056696
+4448 4448 -19.27413566638218
+2969 4449 -0.275851162842649
+2970 4449 -0.1301712404853408
+4449 4449 -23.32199475871141
+3764 4450  0.4653523913028809
+3766 4450  0.2255429562296098
+4450 4450 -13.51862385101003
+2971 4451  0.3221464351873659
+3769 4451  0.1537460316411932
+4451 4451 -19.10980418333428
+3768 4452 -0.333246014529252
+3770 4452 -0.151870667599087
+4452 4452 -18.85936884549872
+3771 4453 -0.6958761831620622
+3773 4453  0.08890596217980609
+4453 4453 -23.31393913523488
+3772 4454  0.5792515598032306
+3774 4454 -0.06474292354903258
+4454 4454 -30.79418778750476
+3775 4455 -0.583252118916699
+3777 4455 -0.1346845367963926
+4455 4455 -16.9883469839645
+3776 4456  0.4566027908873816
+3778 4456  0.1133867662473916
+4456 4456 -21.187092922521
+3779 4457  0.3219996994163201
+3781 4457 -0.1618338407629296
+4457 4457 -26.29693971768204
+3142 4458  0.1015023800057121
+3780 4458 -0.2253988735383735
+4458 4458 -38.95148117385504
+2993 4459 -0.1231955649162646
+2994 4459  0.1654175795499135
+4459 4459 -52.08330111375977
+3784 4460  0.1774720614068959
+3786 4460 -0.264586927583474
+4460 4460 -33.84723329190945
+2976 4461 -0.01614866339436597
+3787 4461 -2.421691173290549
+4461 4461 -43.32944578653592
+3788 4462  1.218330247483215
+3790 4462  0.03932985943560216
+4462 4462 -28.83073740675175
+3791 4463  0.2588732884211721
+3793 4463 -0.200025621410963
+4463 4463 -29.85090627723714
+3792 4464 -0.2444719756141379
+3794 4464  0.1733751342852024
+4464 4464 -33.11942355235149
+3795 4465 -0.5465093405911027
+3797 4465 -0.08653414206433409
+4465 4465 -28.31376341779287
+3796 4466  0.6340994985786209
+3798 4466  0.104793236898054
+4466 4466 -24.52008198179582
+2981 4467  31.84061239984158
+2982 4467 -4.690062449741391e-05
+4467 4467 -707.7212790741364
+3800 4468 -1.282456190904951
+3802 4468  0.1437033518738441
+4468 4468 -8.884240395089975
+2983 4469  28.84769630748797
+2984 4469 -3.429273113850615e-05
+4469 4469 -1074.723247601268
+3804 4470 -1.085554606806667
+3806 4470  0.112469362197627
+4470 4470 -13.01900306964629
+3807 4471  0.9316726753588556
+3809 4471  0.10597916780677
+4471 4471 -13.60038854703725
+3808 4472 -1.03602619980799
+3810 4472 -0.07884724198772149
+4472 4472 -16.14500136718066
+3811 4473  0.7823178849555039
+3813 4473  0.07211115026646071
+4473 4473 -23.41697572970584
+2985 4474 -0.627656447368364
+3814 4474 -0.04271857028849171
+4474 4474 -36.39820196128922
+3034 4475 -0.003326018368451452
+3815 4475 -2.52048241012962
+4475 4475 -128.6511580023181
+2986 4476  0.02752220267509899
+3816 4476  0.7698732969464314
+4476 4476 -45.41658912759856
+3819 4477 -0.350498698842088
+3821 4477 -0.1685104373219622
+4477 4477 -19.71178008061172
+3820 4478  0.3911824002123577
+3822 4478  0.1929820576103272
+4478 4478 -17.36290067611262
+3823 4479 -0.4576521731453182
+3825 4479 -0.1519770413677799
+4479 4479 -16.65598169709429
+3031 4480  0.3690948751451494
+3032 4480  0.1250493243013468
+4480 4480 -20.42677738021681
+3827 4481 -0.3132143614225264
+3829 4481 -0.1163894569624065
+4481 4481 -33.48699900983425
+3828 4482  0.3162981891849708
+3830 4482  0.1148699713737675
+4482 4482 -33.89059592051035
+3831 4483  0.101747472019995
+3833 4483  0.1612480981858468
+4483 4483 -58.01692328951272
+3832 4484 -0.1019983291898142
+3834 4484 -0.1615176016313629
+4484 4484 -57.58794486792134
+3835 4485  0.6529986254582271
+3837 4485  0.04867805021410753
+4485 4485 -40.94469844121115
+2992 4486 -0.03688188469150969
+3836 4486 -0.5589755650604602
+4486 4486 -52.06987938902305
+3839 4487 -0.1988605404170427
+3841 4487 -0.1538546845104868
+4487 4487 -33.06151549792124
+3840 4488  0.3123756862682738
+3842 4488  0.2532649928913371
+4488 4488 -20.45953083972572
+3843 4489  0.267724443820664
+3845 4489  0.2935533450234599
+4489 4489 -17.17816150141144
+2995 4490 -0.1358262986664969
+3846 4490 -0.1982479163957737
+4490 4490 -33.21629444772113
+3847 4491 -0.1320198647588829
+3849 4491  0.1612523570575912
+4491 4491 -44.93527974087805
+3123 4492  0.1297570097955425
+3850 4492 -0.1624424533450675
+4492 4492 -44.99723518348887
+3851 4493 -0.4884348221780255
+3853 4493 -0.1681399935275716
+4493 4493 -15.0104256461242
+3852 4494  0.5346710961640079
+3854 4494  0.1742097470834353
+4494 4494 -13.86141918235845
+3001 4495  6.393149409211086
+3002 4495 -0.000263700452219623
+4495 4495 -663.9661521913164
+3856 4496 -1.800612709103617
+3858 4496  0.02300115730517048
+4496 4496 -49.90751676561297
+3003 4497  13.61387792723724
+3004 4497 -5.586646078540747e-05
+4497 4497 -1427.055298280313
+3860 4498 -1.155212641574131
+3862 4498  0.034635368896544
+4498 4498 -39.91317791668997
+3030 4499  0.3998996708507609
+3863 4499  0.1174995202916021
+4499 4499 -30.30829080397546
+3864 4500 -0.10893918577006
+3866 4500 -0.3911929626725137
+4500 4500 -29.90431021967977
+3000 4501  0.2129755068839117
+3867 4501  0.4574647052145012
+4501 4501 -13.47525275108809
+3868 4502 -0.4059559816217324
+3870 4502 -0.1883580091188502
+4502 4502 -15.12006951497744
+3871 4503 -0.3400741687238894
+3873 4503 -0.2586943980063583
+4503 4503 -13.56000062513
+3872 4504  0.284851484394085
+3874 4504  0.1530160829243506
+4504 4504 -20.92338281670835
+3875 4505  0.4358076176675587
+3877 4505  0.2954837404412515
+4505 4505 -11.29250264561315
+3005 4506 -0.251856105361882
+3006 4506 -0.1612767817800078
+4506 4506 -19.82430901439865
+3879 4507  0.04586169342645511
+3881 4507  0.7532194114725147
+4507 4507 -41.79788680020079
+3037 4508 -0.03372710870745033
+3882 4508 -0.5300049615928422
+4508 4508 -58.51214685233085
+3883 4509 -0.4060288904731509
+3885 4509  0.1587647828723089
+4509 4509 -20.04263715626028
+3884 4510  0.2996883137876917
+3886 4510 -0.1104715392797194
+4510 4510 -28.57642216065699
+3887 4511 -0.32265098291189
+3889 4511 -0.1350370344901572
+4511 4511 -21.49012601492517
+3888 4512  0.3897416427359257
+3890 4512  0.1611418556604022
+4512 4512 -17.88198588321188
+3891 4513 -0.4274289888767369
+3893 4513 -0.1403809995916198
+4513 4513 -18.25413748051501
+3892 4514  0.5271461292678989
+3894 4514  0.179026380922564
+4514 4514 -14.48573493618683
+3895 4515  0.2139280209580445
+3897 4515 -0.1358345190847692
+4515 4515 -44.0223952034774
+3169 4516 -0.1785974349768129
+3170 4516  0.1094120254077982
+4516 4516 -52.48104620642393
+3899 4517 -0.45896252925564
+3901 4517 -0.1590047570659309
+4517 4517 -14.92342134195389
+3016 4518  0.1880559437330346
+3900 4518  0.5273107585383225
+4518 4518 -13.27446957183143
+3903 4519  0.6939032742158369
+3905 4519  0.09862866268575531
+4519 4519 -18.49427815385324
+3130 4520 -0.05312771678709906
+3904 4520 -0.8072266327489762
+4520 4520 -26.21942302703896
+3148 4521  0.1490326692808954
+3907 4521  1.045179501603212
+4521 4521 -7.39806028426747
+3908 4522 -1.074344281788989
+3910 4522 -0.1325528534387045
+4522 4522 -7.707636052561853
+3911 4523 -0.4690349307253373
+3913 4523 -0.3044710922257314
+4523 4523 -8.939428394535847
+3019 4524  0.6244445149825922
+3914 4524  0.2730372422608475
+4524 4524 -8.642130280788454
+3023 4525  0.38440963665635
+3917 4525  0.1464469844197936
+4525 4525 -14.50623659110181
+3916 4526 -0.7085501020623595
+3918 4526 -0.2765679639928379
+4526 4526 -8.772955710261652
+3025 4527  0.3504553942057013
+3921 4527  0.1984683173208769
+4527 4527 -11.53775096265082
+3920 4528 -0.6032800041956174
+3922 4528 -0.4251407410310105
+4528 4528 -6.502242278141505
+3027 4529  0.2927751267701255
+3028 4529  0.2847703740543556
+4529 4529 -8.679115879554525
+3924 4530 -0.5312358475512081
+3926 4530 -0.5372060661120606
+4530 4530 -5.239025768901889
+3020 4531  0.0741326869470682
+3927 4531 -0.9136535932070854
+4531 4531 -21.47326559208629
+3928 4532  0.9638580985212747
+3930 4532 -0.07388768653768076
+4532 4532 -21.61383762226636
+3931 4533  0.1256357268577556
+3933 4533 -0.1905076856767562
+4533 4533 -49.96777929661594
+3932 4534 -0.1726253430003272
+3934 4534  0.2566908442587441
+4534 4534 -36.67569230448201
+3935 4535  0.484327973434509
+3937 4535  0.2554443072620345
+4535 4535 -11.88600697816776
+3936 4536 -0.5273299139143387
+3938 4536 -0.275051179212371
+4536 4536 -10.90493013046683
+3939 4537 -0.4158218221210877
+3941 4537 -0.1627178019259907
+4537 4537 -13.12946138104847
+3940 4538  0.6449471976234391
+3942 4538  0.2617645898062311
+4538 4538 -8.410110269164797
+3943 4539  0.4759321005851245
+3945 4539 -0.2175541085968585
+4539 4539 -12.29662737304278
+3039 4540 -0.4997455661082103
+3946 4540  0.2828527186687306
+4540 4540 -10.10870496831491
+3033 4541  0.5137320980598533
+3949 4541 -0.04862359530866486
+4541 4541 -39.02857243546901
+3062 4542  0.06450870336342215
+3948 4542 -0.4434171700268535
+4542 4542 -33.70459416258378
+3951 4543 -0.3892150799012169
+3953 4543 -0.2649587548673778
+4543 4543 -11.53800954208228
+3952 4544  0.4663001387974148
+3954 4544  0.3020557569469072
+4544 4544 -10.30563379190454
+3043 4545  0.1863477812003617
+3957 4545  0.197228464133999
+4545 4545 -22.73592311892862
+3956 4546 -0.3189037545692974
+3958 4546 -0.4149278867504431
+4546 4546 -13.20295306002752
+3045 4547  0.1745842511613437
+3046 4547  0.2340215743243752
+4547 4547 -19.99502311700704
+3960 4548 -0.3170204710469747
+3962 4548 -0.4419310962134438
+4548 4548 -12.1403872692989
+3963 4549 -0.7186221160452759
+3965 4549 -0.0370584339888418
+4549 4549 -43.0743841807851
+3040 4550  0.05570908603588222
+3964 4550  0.976486147068505
+4550 4550 -30.09884070379308
+3967 4551  0.5203531411457867
+3969 4551  0.1751900754402029
+4551 4551 -15.59699735883546
+3968 4552 -0.3439335715846778
+3970 4552 -0.114079576000751
+4552 4552 -23.95216626455397
+3971 4553 -0.1913856351165074
+3973 4553  0.4235524066745655
+4553 4553 -17.76723538490329
+3972 4554  0.1547028690476915
+3974 4554 -0.3347632371841684
+4554 4554 -22.22333545208209
+3975 4555  0.3388396466565379
+3977 4555  0.09646788709832067
+4555 4555 -28.65423870847926
+3976 4556 -0.3495907295178143
+3978 4556 -0.09046043792228446
+4556 4556 -29.62058200277956
+3049 4557 -0.355429594472465
+3050 4557 -0.1069501833931525
+4557 4557 -23.10572017615622
+3980 4558  0.5744327013188233
+3982 4558  0.1483671939561078
+4558 4558 -14.85325187087084
+3983 4559  0.3997908532391482
+3985 4559  0.1625927915066934
+4559 4559 -21.68107710891621
+3069 4560 -0.2813043833760332
+3986 4560 -0.1158650579574214
+4560 4560 -30.5213668411177
+3987 4561 -0.3428911238355603
+3989 4561 -0.1874409581057452
+4561 4561 -20.55827331727173
+3159 4562  0.3955497753263165
+3990 4562  0.2229181676645297
+4562 4562 -17.63060893722474
+3098 4563  0.1104993737469106
+3991 4563 -0.3305622720110889
+4563 4563 -25.61127524716377
+3061 4564  0.330790297730725
+3994 4564 -0.1086062631708077
+4564 4564 -25.88133772095298
+3063 4565 -0.2436393619121192
+3997 4565 -0.09766669385553386
+4565 4565 -40.2130645920106
+3996 4566  0.245190319901469
+3998 4566  0.09873935578019329
+4566 4566 -40.5384810644213
+3144 4567  0.09294349640346077
+3999 4567  0.2453331394023213
+4567 4567 -45.4076676012495
+3064 4568 -0.09297121279972072
+4000 4568 -0.2470979106274057
+4568 4568 -45.32463217142699
+3067 4569  29.39451847005743
+3068 4569 -4.739757669780065e-05
+4569 4569 -741.2098311583648
+4004 4570 -0.8934591499488236
+4006 4570  0.1005481075389927
+4570 4570 -12.77174729620695
+4007 4571  0.3918218595384317
+4009 4571  0.1137497534022037
+4571 4571 -25.52026560286867
+4008 4572 -0.4094422815228064
+4010 4572 -0.1107176446094387
+4572 4572 -25.19976764447012
+4011 4573 -0.5208327148502887
+4013 4573 -0.1620484550495435
+4573 4573 -13.36559398752397
+4012 4574  0.6508797907773681
+4014 4574  0.1812633458304332
+4574 4574 -10.93453152881607
+3075 4575  0.4093034712580556
+3076 4575  0.1779337399408347
+4575 4575 -10.33116067419111
+4016 4576 -0.8644424052260842
+4018 4576 -0.3743197183632451
+4576 4576 -5.232280705035966
+3077 4577  0.3139432303888887
+4021 4577  0.1982328660095435
+4577 4577 -12.42379991806771
+4020 4578 -0.5386946049506361
+4022 4578 -0.4207031498800982
+4578 4578 -6.810968169618136
+3079 4579  0.3128830755303908
+4025 4579  0.1844874946189381
+4579 4579 -13.59626784548458
+4024 4580 -0.5619603703326097
+4026 4580 -0.3589957638732138
+4580 4580 -7.821157425305699
+3081 4581  0.2781023775271403
+4029 4581  0.244104057346213
+4581 4581 -10.76106835441625
+4028 4582 -0.4989049199151928
+4030 4582 -0.4669064306323872
+4582 4582 -6.232406029477263
+3083 4583  0.2761967663045794
+4033 4583  0.2488990680550784
+4583 4583 -10.60859259802773
+4032 4584 -0.4967941065737113
+4034 4584 -0.4733234783700364
+4584 4584 -6.159803642957836
+3143 4585 -0.2870757766334358
+4037 4585 -0.1113819036710585
+4585 4585 -29.83696730878977
+4036 4586  0.3954580330580911
+4038 4586  0.1603305174549954
+4586 4586 -21.66867981200853
+4039 4587 -0.5751371098165369
+4041 4587 -0.1169152573076894
+4587 4587 -15.45716843571818
+4040 4588  0.640289646401769
+4042 4588  0.1358268290729027
+4588 4588 -13.42102810671431
+4043 4589 -0.3164743577683208
+4045 4589 -0.07698570639890857
+4589 4589 -36.06244598983877
+4044 4590  0.4286015274180359
+4046 4590  0.08923648311862642
+4590 4590 -30.36544494899079
+3095 4591  0.0002603335005947787
+4049 4591  11.14739714497946
+4591 4591 -415.0261506058654
+3093 4592  0.004269479894055767
+4050 4592 -3.455017245353995
+4592 4592 -107.7628213452974
+4051 4593 -0.3821410197975971
+4053 4593  0.1554320934452434
+4593 4593 -17.08211902970442
+4052 4594  0.4261594979710852
+4054 4594 -0.1994726820739341
+4594 4594 -14.32959261684598
+3101 4595  0.0005345093331185485
+3102 4595  5.741407462044037
+4595 4595 -297.1502631714062
+4056 4596  0.01030673789282408
+4058 4596 -3.656761234719059
+4596 4596 -46.29589079798393
+3103 4597  0.0004459490940684492
+4061 4597  9.163761430829846
+4597 4597 -274.346995221428
+4060 4598  0.01266738670347477
+4062 4598 -3.798595591223752
+4598 4598 -41.62150383812094
+3105 4599  0.0006404324352507512
+3106 4599  5.767059286213642
+4599 4599 -269.482318837368
+4064 4600  0.01389738784940935
+4066 4600 -3.482295420824327
+4600 4600 -40.36292797655138
+3107 4601  0.0006385206646629043
+3108 4601  5.650459500989021
+4601 4601 -262.4871457333651
+4068 4602  0.01095829037406584
+4070 4602 -3.913587096844168
+4602 4602 -43.50730408335345
+3109 4603  0.000630922008963406
+3110 4603  5.720222066080108
+4603 4603 -273.5041186801383
+4072 4604  0.01440604931380529
+4074 4604 -3.355021230629743
+4604 4604 -40.20028642953007
+3111 4605  0.0006501703848056444
+3112 4605  5.855434617171491
+4605 4605 -261.5760375061137
+4076 4606  0.01412310998717665
+4078 4606 -3.540289155598937
+4606 4606 -39.17187343467762
+3113 4607  0.0006559175699886972
+3114 4607  5.906181210680177
+4607 4607 -248.2417586977549
+4080 4608  0.01427692261993418
+4082 4608 -3.576670360769923
+4608 4608 -37.08017309753349
+3100 4609  0.1679955867460642
+4083 4609 -0.8800339580088794
+4609 4609 -8.502233418711862
+4084 4610  0.5273258880967172
+4086 4610 -0.1365022792253678
+4610 4610 -14.15927832194173
+3116 4611  0.1201547163932482
+4087 4611  0.2411558997368115
+4611 4611 -33.36501978941855
+3153 4612 -0.2390321723025959
+4090 4612 -0.1226762451022061
+4612 4612 -32.85653123541292
+4091 4613  0.4387937415067642
+4093 4613  0.172735205835799
+4613 4613 -17.10162207087117
+4092 4614 -0.2745073576810962
+4094 4614 -0.125616171688122
+4614 4614 -26.32253245363182
+4095 4615 -0.1666873148413968
+4097 4615  0.1310711442192151
+4615 4615 -65.3094506908532
+3117 4616  0.1242577346967617
+4098 4616 -0.09093864764125277
+4616 4616 -92.13433902811157
+4099 4617  0.3436678702512194
+4101 4617  0.0630421594254459
+4617 4617 -58.09827276913884
+3118 4618 -0.04803578302891041
+4100 4618 -0.2986936630393312
+4618 4618 -71.50178838039953
+3120 4619  0.0368398814399567
+4103 4619 -0.5518900249492503
+4619 4619 -51.64006323574402
+4104 4620  0.7749702805017827
+4106 4620 -0.03343584586800835
+4620 4620 -49.57879692739116
+3119 4621  0.2515624113404345
+4109 4621 -0.09721273839226541
+4621 4621 -38.8690834748674
+3124 4622  0.09567817198065209
+4108 4622 -0.2590299860673196
+4622 4622 -38.73649388997055
+4111 4623 -0.8469720785595276
+4113 4623  0.04524248037789224
+4623 4623 -30.07499672096169
+4112 4624  0.9878581106610489
+4114 4624  0.05102946065995515
+4624 4624 -26.6892215368137
+4115 4625 -0.9858356029493499
+4117 4625 -0.05844088532524816
+4625 4625 -19.69795854475236
+4116 4626  0.83313447077432
+4118 4626  0.08226341234101277
+4626 4626 -16.419717428569
+4119 4627  0.1280409753821415
+4121 4627 -0.2192447310494288
+4627 4627 -48.1812508688813
+4120 4628 -0.09699561503198664
+4122 4628  0.1479853343471768
+4628 4628 -68.5724907725667
+3134 4629  0.0093803456490435
+4123 4629 -1.835179127668628
+4629 4629 -91.97145850968209
+3172 4630  0.0208759382523774
+4124 4630  0.9432303306922852
+4630 4630 -67.81117594819983
+3137 4631  0.3864229560206465
+4129 4631  0.1696359690794401
+4631 4631 -12.03075557160653
+4128 4632 -0.723925405766279
+4130 4632 -0.2824387372225564
+4632 4632 -7.305149508872643
+3139 4633  0.3750998189126993
+3140 4633  0.2154672694474411
+4633 4633 -8.650252245975691
+4132 4634 -0.6821870226709527
+4134 4634 -0.364407399913329
+4634 4634 -5.254056246815064
+3136 4635  0.1563923263725714
+4135 4635 -0.516338327826575
+4635 4635 -14.74425618462372
+4136 4636  0.4432101206282965
+4138 4636 -0.1308230800711792
+4636 4636 -18.14750969243607
+4139 4637 -0.5658283514265692
+4141 4637 -0.1535827179703731
+4637 4637 -12.8677934052044
+4140 4638  0.5743976794403722
+4142 4638  0.1680855058326074
+4638 4638 -12.778764953814
+3151 4639  0.3695300341581737
+3152 4639  0.1905019068065086
+4639 4639 -9.449827385174531
+4144 4640 -0.8666912965737777
+4146 4640 -0.2411266255595946
+4640 4640 -5.864406879133139
+3165 4641  0.0002870401850520609
+4149 4641  14.17811209757292
+4641 4641 -252.032563895863
+4148 4642  0.01095253479461104
+4150 4642 -3.811722119467089
+4642 4642 -37.38065612259518
+3167 4643  0.0003989272310504771
+4153 4643  8.081208632035654
+4643 4643 -370.6027942610422
+4152 4644  0.004781083103017359
+4154 4644 -4.273666966017419
+4644 4644 -99.30572095390851
diff --git a/omp/reorder/mc64_kernels.cpp b/omp/reorder/mc64_kernels.cpp
index d8b90aa1875..3eb29d242d3 100644
--- a/omp/reorder/mc64_kernels.cpp
+++ b/omp/reorder/mc64_kernels.cpp
@@ -52,7 +52,8 @@ namespace mc64 {
 template <typename ValueType, typename IndexType>
 void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
                         const matrix::Csr<ValueType, IndexType>* mtx,
-                        Array<remove_complex<ValueType>>& workspace)
+                        Array<remove_complex<ValueType>>& workspace,
+                        gko::reorder::reordering_strategy strategy)
     GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
@@ -73,18 +74,42 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
 
 
 template <typename ValueType, typename IndexType>
-void shortest_augmenting_path(std::shared_ptr<const DefaultExecutor> exec,
-                              size_type num_rows, const IndexType* row_ptrs,
-                              const IndexType* col_idxs,
-                              Array<ValueType>& workspace,
-                              Array<IndexType>& permutation,
-                              Array<IndexType>& inv_permutation, IndexType root,
-                              Array<IndexType>& parents) GKO_NOT_IMPLEMENTED;
+void shortest_augmenting_path(
+    std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,
+    const IndexType* row_ptrs, const IndexType* col_idxs,
+    Array<ValueType>& workspace, Array<IndexType>& permutation,
+    Array<IndexType>& inv_permutation, IndexType root,
+    Array<IndexType>& parents,
+    addressable_priority_queue<ValueType, IndexType, 2>& Q) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL);
 
 
+template <typename ValueType, typename IndexType>
+void update_dual_vectors(std::shared_ptr<const DefaultExecutor> exec,
+                         size_type num_rows, const IndexType* row_ptrs,
+                         const IndexType* col_idxs,
+                         const Array<IndexType>& permutation,
+                         Array<ValueType>& workspace) GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_UPDATE_DUAL_VECTORS_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void compute_scaling(std::shared_ptr<const DefaultExecutor> exec,
+                     const matrix::Csr<ValueType, IndexType>* mtx,
+                     Array<remove_complex<ValueType>>& workspace,
+                     gko::reorder::reordering_strategy strategy,
+                     gko::matrix::Diagonal<ValueType>* row_scaling,
+                     gko::matrix::Diagonal<ValueType>* col_scaling)
+    GKO_NOT_IMPLEMENTED;
+
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL);
+
+
 }  // namespace mc64
 }  // namespace omp
 }  // namespace kernels
diff --git a/reference/reorder/mc64_kernels.cpp b/reference/reorder/mc64_kernels.cpp
index 9136485d8f2..fd676e337d1 100644
--- a/reference/reorder/mc64_kernels.cpp
+++ b/reference/reorder/mc64_kernels.cpp
@@ -32,8 +32,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "core/reorder/mc64_kernels.hpp"
 
-
 #include <algorithm>
+#include <cmath>
 #include <iterator>
 #include <memory>
 #include <queue>
@@ -66,10 +66,117 @@ namespace reference {
 namespace mc64 {
 
 
+namespace {
+
+
+float fastlog2(float x)
+{
+    // a*(x-1)^2 + b*(x-1) approximates log2(x) when 0.75 <= x < 1.5
+    const float a = -.6296735;
+    const float b = 1.466967;
+    float signif, fexp;
+    int exp;
+    float lg2;
+    union {
+        float f;
+        unsigned int i;
+    } ux1, ux2;
+    int greater;  // really a boolean
+    /*
+     * Assume IEEE representation, which is sgn(1):exp(8):frac(23)
+     * representing (1+frac)*2^(exp-127)  Call 1+frac the significand
+     */
+
+    // get exponent
+    ux1.f = x;
+    exp = (ux1.i & 0x7F800000) >> 23;
+    // actual exponent is exp-127, will subtract 127 later
+
+    greater = ux1.i & 0x00400000;  // true if signif > 1.5
+    if (greater) {
+        // signif >= 1.5 so need to divide by 2.  Accomplish this by
+        // stuffing exp = 126 which corresponds to an exponent of -1
+        ux2.i = (ux1.i & 0x007FFFFF) | 0x3f000000;
+        signif = ux2.f;
+        fexp = exp - 126;  // 126 instead of 127 compensates for division by 2
+        signif = signif - 1.0;                          // <
+        lg2 = fexp + a * signif * signif + b * signif;  // <
+    } else {
+        // get signif by stuffing exp = 127 which corresponds to an exponent of
+        // 0
+        ux2.i = (ux1.i & 0x007FFFFF) | 0x3f800000;
+        signif = ux2.f;
+        fexp = exp - 127;
+        signif = signif - 1.0;                          // <<--
+        lg2 = fexp + a * signif * signif + b * signif;  // <<--
+    }
+    // lines marked <<-- are common code, but optimize better
+    //  when duplicated, at least when using gcc
+    return (lg2);
+}
+
+
+double fastlog2(double x)
+{
+    // a*(x-1)^2 + b*(x-1) approximates log2(x) when 0.75 <= x < 1.5
+    const double a = -.6296735;
+    const double b = 1.466967;
+    // const double a = 2.971710;
+    // const double b = 2.049810;
+    double signif, fexp;
+    long int exp;
+    double lg2;
+    union {
+        double f;
+        long int i;
+    } ux1, ux2;
+    long int greater;  // really a boolean
+//#define FN  fexp + (a*signif)/(signif + b)
+#define FN fexp + signif*(a * signif + b)
+    /*
+     * Assume IEEE representation, which is sgn(1):exp(11):frac(52)
+     * representing (1+frac)*2^(exp-127)  Call 1+frac the significand
+     */
+
+    // get exponent
+    ux1.f = x;
+    exp = (ux1.i & 0x7FF0000000000000) >> 52;
+    // actual exponent is exp-1023, will subtract 1023 later
+
+    greater = ux1.i & 0x0008000000000000;  // true if signif > 1.5
+    if (greater) {
+        // signif >= 1.5 so need to divide by 2.  Accomplish this by
+        // stuffing exp = 1022 which corresponds to an exponent of -1
+        ux2.i = (ux1.i & 0x000FFFFFFFFFFFFF) | 0x3fe0000000000000;
+        signif = ux2.f;
+        fexp = exp - 1022;  // 126 instead of 127 compensates for division by 2
+        signif = signif - 1.0;  // <
+        // lg2 = fexp + signif * (a*signif + b);  // <
+        lg2 = FN;
+    } else {
+        // get signif by stuffing exp = 1023 which corresponds to an exponent of
+        // 0
+        ux2.i = (ux1.i & 0x000FFFFFFFFFFFFF) | 0x3ff0000000000000;
+        signif = ux2.f;
+        fexp = exp - 1023;
+        signif = signif - 1.0;  // <<--
+        // lg2 = fexp + signif * (a*signif + b);  // <<--
+        lg2 = FN;
+    }
+    // lines marked <<-- are common code, but optimize better
+    //  when duplicated, at least when using gcc
+    return (lg2);
+}
+
+
+}  // namespace
+
+
 template <typename ValueType, typename IndexType>
 void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
                         const matrix::Csr<ValueType, IndexType>* mtx,
-                        Array<remove_complex<ValueType>>& workspace)
+                        Array<remove_complex<ValueType>>& workspace,
+                        gko::reorder::reordering_strategy strategy)
 {
     constexpr auto inf =
         std::numeric_limits<remove_complex<ValueType>>::infinity();
@@ -78,27 +185,31 @@ void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
     const auto row_ptrs = mtx->get_const_row_ptrs();
     const auto col_idxs = mtx->get_const_col_idxs();
     const auto values = mtx->get_const_values();
-    auto weight = [](ValueType a) { return abs(a); };
-    workspace.resize_and_reset(nnz + 2 * num_rows);
+    auto weight =
+        strategy == gko::reorder::reordering_strategy::max_diagonal_sum
+            ? [](ValueType a) { return abs(a); }
+            : [](ValueType a) { return fastlog2(abs(a)); };
+    workspace.resize_and_reset(nnz + 3 * num_rows);
     auto weights = workspace.get_data();
     auto u = weights + nnz;
     auto v = u + num_rows;
     for (IndexType col = 0; col < num_rows; col++) {
         u[col] = inf;
-        v[col] = zero<IndexType>();
+        v[col] = zero<remove_complex<ValueType>>();
     }
 
     for (IndexType row = 0; row < num_rows; row++) {
         const auto row_begin = row_ptrs[row];
         const auto row_end = row_ptrs[row + 1];
-        auto row_max = zero<remove_complex<ValueType>>();
+        auto row_max = -inf;
         for (IndexType idx = row_begin; idx < row_end; idx++) {
-            const auto w = abs(values[idx]);
+            const auto w = weight(values[idx]);
+            weights[idx] = w;
             if (w > row_max) row_max = w;
         }
 
         for (IndexType idx = row_begin; idx < row_end; idx++) {
-            const auto c = weight(row_max) - weight(values[idx]);
+            const auto c = row_max - weights[idx];
             weights[idx] = c;
             const auto col = col_idxs[idx];
             if (c < u[col]) u[col] = c;
@@ -170,16 +281,21 @@ void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
         const auto row_begin = row_ptrs[row];
         const auto row_end = row_ptrs[row + 1];
         bool found = false;
-        for (IndexType idx = row_begin; idx < row_end; idx++) {
+        for (IndexType idx = row_begin; idx < row_end; idx++) {  //<
             const auto col = col_idxs[idx];
             if (weight(row, col, idx) == zero<ValueType>()) {
+                // std::numeric_limits<ValueType>::epsilon()) {
                 const auto row_1 = ip[col];
                 const auto row_1_begin = row_ptrs[row_1];
                 const auto row_1_end = row_ptrs[row_1 + 1];
                 for (IndexType idx_1 = row_1_begin; idx_1 < row_1_end;
                      idx_1++) {
                     const auto col_1 = col_idxs[idx_1];
-                    if (weight(row_1, col_1, idx_1) == zero<ValueType>() &&
+                    if (weight(row_1, col_1, idx_1) ==
+                            zero<
+                                ValueType>() &&  //<
+                                                 // std::numeric_limits<ValueType>::epsilon()
+                                                 // &&
                         ip[col_1] == -1) {
                         p[row] = col;
                         ip[col] = row;
@@ -197,6 +313,13 @@ void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
         else
             it++;
     }
+
+    /*for (auto i = 0; i < num_rows; i++) {
+        std::cout << p[i] << ", ";
+    }
+    std::cout << "\n";
+    std::cout << "EPSILON: " << std::numeric_limits<ValueType>::epsilon()
+              << std::endl;*/
 }
 
 GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
@@ -204,74 +327,80 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
 
 
 template <typename ValueType, typename IndexType>
-void shortest_augmenting_path(std::shared_ptr<const DefaultExecutor> exec,
-                              size_type num_rows, const IndexType* row_ptrs,
-                              const IndexType* col_idxs,
-                              Array<ValueType>& workspace,
-                              Array<IndexType>& permutation,
-                              Array<IndexType>& inv_permutation, IndexType root,
-                              Array<IndexType>& parents)
+void shortest_augmenting_path(
+    std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,
+    const IndexType* row_ptrs, const IndexType* col_idxs,
+    Array<ValueType>& workspace, Array<IndexType>& permutation,
+    Array<IndexType>& inv_permutation, IndexType root,
+    Array<IndexType>& parents,
+    addressable_priority_queue<ValueType, IndexType, 2>& Q)
 {
+    constexpr auto inf = std::numeric_limits<ValueType>::infinity();
     const auto nnz = row_ptrs[num_rows];
     auto c = workspace.get_data();
     auto u = c + nnz;
     auto v = u + num_rows;
-    auto weight = [c, u, v](IndexType row, IndexType col, IndexType idx) {
-        return c[idx] - u[col] - v[row];
-    };
+    auto distance = v + num_rows;
+
     auto p = permutation.get_data();
     auto ip = inv_permutation.get_data();
-    parents.fill(-one<IndexType>());
+
     auto parents_ = parents.get_data();
+    auto handles = parents_ + num_rows;
+    auto generation = handles + num_rows;
+    auto marked_cols = generation + num_rows;
 
-    std::vector<ValueType> distance(num_rows, -one<ValueType>());
-    auto cmp = [distance](IndexType a, IndexType b) {
-        return (distance[b] == -one<IndexType>()) ||
-               (distance[a] <= distance[b]);
-    };
-    std::set<IndexType> marked_cols;
-    std::set<IndexType, decltype(cmp)> Q(cmp);
-    ValueType lsp = 0;
-    ValueType lsap = -1;
+    Q.reset();
+
+    ValueType lsp = zero<ValueType>();
+    ValueType lsap = inf;
     IndexType jsap;
 
     auto row = root;
+    auto marked_counter = 0;
+    auto update_v_counter = 0;
 
     while (true) {
         const auto row_begin = row_ptrs[row];
         const auto row_end = row_ptrs[row + 1];
         for (IndexType idx = row_begin; idx < row_end; idx++) {
             const auto col = col_idxs[idx];
-            if (marked_cols.find(col) != marked_cols.end()) continue;
-            ValueType dnew = lsp + weight(row, col, idx);
-            if (lsap < 0 || dnew < lsap) {
+            const auto gen = generation[col];
+
+            if (gen == -1) continue;
+
+            const ValueType dnew = lsp + c[idx] - u[col] - v[row];
+
+            if (dnew < lsap) {
                 if (ip[col] == -1) {
                     lsap = dnew;
                     jsap = col;
                     parents_[col] = row;
                 } else {
-                    if (distance[col] == -1 || dnew < distance[col]) {
-                        bool new_col = false;
-                        if (distance[col] == -1) new_col = true;
+                    if (gen != root || dnew < distance[col]) {
+                        bool new_col = gen != root;
                         distance[col] = dnew;
                         parents_[col] = row;
-                        if (!new_col) Q.erase(col);
-                        Q.insert(col);
+                        generation[col] = root;
+                        if (new_col)
+                            handles[col] = Q.insert(dnew, col);
+                        else
+                            Q.update_key(handles[col], dnew);
                     }
                 }
             }
         }
-
         if (Q.empty()) break;
-        auto col_pos = Q.begin();
-        auto col = *col_pos;
+        const auto col = Q.min_val();
         lsp = distance[col];
-        if (lsap >= 0 && lsap <= lsp) break;
-        Q.erase(col_pos);
-        marked_cols.insert(col);
+        if (lsap <= lsp) break;
+        generation[col] = -1;
+        marked_cols[marked_counter] = col;
+        marked_counter++;
+        Q.pop_min();
         row = ip[col];
     }
-    if (lsap != -1) {
+    if (lsap != inf) {
         auto row = -1;
         auto col = -1;
         auto next_col = jsap;
@@ -281,25 +410,122 @@ void shortest_augmenting_path(std::shared_ptr<const DefaultExecutor> exec,
             next_col = p[row];
             p[row] = col;
             ip[col] = row;
+            if (generation[col] != -1) {
+                marked_cols[marked_counter + update_v_counter] = col;
+                update_v_counter++;
+            }
+        }
+
+        for (size_type i = 0; i < marked_counter + update_v_counter; i++) {
+            const auto col = marked_cols[i];
+            if (i < marked_counter) {
+                u[col] += distance[col] - lsap;
+            }
+            row = ip[col];
+            auto idx = row_ptrs[row];
+            auto stop = row_ptrs[row + 1];
+            while (col_idxs[idx] != col) idx++;
+            v[row] = c[idx] - u[col];
+            generation[col] = -2;
+        }
+    }
+}
+
+GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void update_dual_vectors(std::shared_ptr<const DefaultExecutor> exec,
+                         size_type num_rows, const IndexType* row_ptrs,
+                         const IndexType* col_idxs,
+                         const Array<IndexType>& permutation,
+                         Array<ValueType>& workspace)
+{
+    auto nnz = row_ptrs[num_rows];
+    const auto p = permutation.get_const_data();
+    auto c = workspace.get_data();
+    auto u = c + nnz;
+    auto v = u + num_rows;
+    for (size_type row = 0; row < num_rows; row++) {
+        if (p[row] != -1) {
+            auto col = p[row];
+            auto idx = row_ptrs[row];
+            while (col_idxs[idx] != col) idx++;
+            v[row] = c[idx] - u[col];
         }
+    }
+}
 
-        for (auto marked_col : marked_cols) {
-            u[marked_col] += distance[marked_col] - lsap;
+GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_UPDATE_DUAL_VECTORS_KERNEL);
+
+
+template <typename ValueType, typename IndexType>
+void compute_scaling(std::shared_ptr<const DefaultExecutor> exec,
+                     const matrix::Csr<ValueType, IndexType>* mtx,
+                     Array<remove_complex<ValueType>>& workspace,
+                     gko::reorder::reordering_strategy strategy,
+                     gko::matrix::Diagonal<ValueType>* row_scaling,
+                     gko::matrix::Diagonal<ValueType>* col_scaling)
+{
+    constexpr auto inf =
+        std::numeric_limits<remove_complex<ValueType>>::infinity();
+    const auto nnz = mtx->get_num_stored_elements();
+    const auto num_rows = mtx->get_size()[0];
+    const auto row_ptrs = mtx->get_const_row_ptrs();
+    const auto col_idxs = mtx->get_const_col_idxs();
+    const auto values = mtx->get_const_values();
+    auto weights = workspace.get_data();
+    auto u = weights + nnz;
+    auto v = u + num_rows;
+    auto rv = row_scaling->get_values();
+    auto cv = col_scaling->get_values();
+
+    remove_complex<ValueType> minu, maxu, minv, maxv;
+    minu = inf;
+    minv = inf;
+    maxu = -inf;
+    maxv = -inf;
+
+    if (strategy == gko::reorder::reordering_strategy::max_diagonal_product) {
+        for (size_type i = 0; i < num_rows; i++) {
+            if (u[i] < minu) minu = u[i];
+            if (u[i] > maxu) maxu = u[i];
+            if (v[i] < minv) minv = v[i];
+            if (v[i] > maxv) maxv = v[i];
+        }
+        auto scale = (std::min(minu, -maxv) + std::max(maxu, -minv)) / 2.;
+        for (size_type i = 0; i < num_rows; i++) {
+            remove_complex<ValueType> u_val = std::exp2(u[i] - scale);
+            remove_complex<ValueType> v_val = std::exp2(v[i] + scale);
+            cv[i] = ValueType{u_val};
+            rv[i] = ValueType{v_val};
         }
 
         for (size_type row = 0; row < num_rows; row++) {
-            if (p[row] != -1) {
-                auto col = p[row];
-                auto idx = row_ptrs[row];
-                while (col_idxs[idx] != col) idx++;
-                v[row] = c[idx] - u[col];
+            const auto row_begin = row_ptrs[row];
+            const auto row_end = row_ptrs[row + 1];
+            auto row_max = zero<remove_complex<ValueType>>();
+            for (size_type idx = row_begin; idx < row_end; idx++) {
+                const auto abs_v = abs(values[idx]);
+                if (row_max < abs_v) {
+                    row_max = abs_v;
+                }
             }
+            rv[row] /= row_max;
+        }
+    } else {
+        for (size_type i = 0; i < num_rows; i++) {
+            cv[i] = 1.;
+            rv[i] = 1.;
         }
     }
 }
 
-GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL);
+
 }  // namespace mc64
 }  // namespace reference
 }  // namespace kernels
diff --git a/reference/test/reorder/mc64_kernels.cpp b/reference/test/reorder/mc64_kernels.cpp
index eb67a0ca75f..5b05dee93b7 100644
--- a/reference/test/reorder/mc64_kernels.cpp
+++ b/reference/test/reorder/mc64_kernels.cpp
@@ -31,6 +31,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
 #include <algorithm>
+#include <cmath>
+#include <fstream>
 #include <memory>
 
 
@@ -45,6 +47,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/reorder/mc64_kernels.hpp"
 #include "core/test/utils.hpp"
 #include "core/test/utils/assertions.hpp"
+#include "matrices/config.hpp"
 
 
 namespace {
@@ -70,38 +73,92 @@ class Mc64 : public ::testing::Test {
                                             {0., 0., 0., 4., 2., 0.},
                                             {0., 5., 8., 0., 0., 0.}},
                                            ref)),
-          expected_workspace{
-              ref,
-              I<real_type>({2., 1., 0., 0., 4., 0., 2., 0., 1., 0., 2., 3., 0.,
-                            0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.})},
+          expected_workspace_sum{
+              ref, I<real_type>({2., 1., 0., 0., 4., 0., 2., 0., 1., 0., 2.,
+                                 3., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0.,
+                                 0., 0., 0., 0., 0., 0., 0., 0., 0.})},
+          expected_workspace_product{ref, I<real_type>({std::log2(3.),
+                                                        std::log2(1.5),
+                                                        0.,
+                                                        0.,
+                                                        std::log2(5.),
+                                                        0.,
+                                                        std::log2(1.5),
+                                                        0.,
+                                                        std::log2(4. / 3.),
+                                                        0.,
+                                                        std::log2(2.),
+                                                        std::log2(1.6),
+                                                        0.,
+                                                        0.,
+                                                        std::log2(1.5),
+                                                        0.,
+                                                        0.,
+                                                        0.,
+                                                        std::log2(4. / 3.),
+                                                        0.,
+                                                        0.,
+                                                        0.,
+                                                        0.,
+                                                        0.,
+                                                        0.,
+                                                        0.,
+                                                        0.,
+                                                        0.,
+                                                        0.,
+                                                        0.,
+                                                        0.})},
           expected_perm{ref, I<index_type>({1, 0, 3, 5, -1, 2})},
-          expected_inv_perm{ref, I<index_type>({1, 0, 5, 2, -1, 3})}
+          expected_inv_perm{ref, I<index_type>({1, 0, 5, 2, -1, 3})},
+          tolerance{std::numeric_limits<real_type>::epsilon()}
     {}
 
     std::shared_ptr<const gko::ReferenceExecutor> ref;
     gko::Array<real_type> tmp;
     std::shared_ptr<matrix_type> mtx;
-    gko::Array<real_type> expected_workspace;
+    gko::Array<real_type> expected_workspace_sum;
+    gko::Array<real_type> expected_workspace_product;
     gko::Array<index_type> expected_perm;
     gko::Array<index_type> expected_inv_perm;
+    const real_type tolerance;
 };
 
 TYPED_TEST_SUITE(Mc64, gko::test::ValueIndexTypes, PairTypenameNameGenerator);
 
 
-TYPED_TEST(Mc64, InitializeWeightsExample)
+TYPED_TEST(Mc64, InitializeWeightsExampleSum)
 {
     using matrix_type = typename TestFixture::matrix_type;
     using real_type = typename TestFixture::real_type;
 
     gko::kernels::reference::mc64::initialize_weights(
-        this->ref, this->mtx.get(), this->tmp);
+        this->ref, this->mtx.get(), this->tmp,
+        gko::reorder::reordering_strategy::max_diagonal_sum);
 
-    GKO_ASSERT_ARRAY_EQ(this->tmp, this->expected_workspace);
+    GKO_ASSERT_ARRAY_EQ(this->tmp, this->expected_workspace_sum);
 }
 
 
-TYPED_TEST(Mc64, InitialMatchingExample)
+TYPED_TEST(Mc64, InitializeWeightsExampleProduct)
+{
+    using matrix_type = typename TestFixture::matrix_type;
+    using real_type = typename TestFixture::real_type;
+
+    gko::kernels::reference::mc64::initialize_weights(
+        this->ref, this->mtx.get(), this->tmp,
+        gko::reorder::reordering_strategy::max_diagonal_product);
+
+    GKO_ASSERT_EQ(this->tmp.get_num_elems(),
+                  this->expected_workspace_product.get_num_elems());
+    for (gko::size_type i = 0; i < this->tmp.get_num_elems(); i++) {
+        GKO_ASSERT_NEAR(this->tmp.get_data()[i],
+                        this->expected_workspace_product.get_data()[i],
+                        this->tolerance);
+    }
+}
+
+
+TYPED_TEST(Mc64, InitialMatchingExampleSum)
 {
     using index_type = typename TestFixture::index_type;
     gko::Array<index_type> p{this->ref,
@@ -112,7 +169,7 @@ TYPED_TEST(Mc64, InitialMatchingExample)
 
     gko::kernels::reference::mc64::initial_matching(
         this->ref, this->mtx->get_size()[0], this->mtx->get_const_row_ptrs(),
-        this->mtx->get_const_col_idxs(), this->expected_workspace, p, ip,
+        this->mtx->get_const_col_idxs(), this->expected_workspace_sum, p, ip,
         unmatched_rows);
 
     GKO_ASSERT_ARRAY_EQ(p, this->expected_perm);
@@ -122,6 +179,27 @@ TYPED_TEST(Mc64, InitialMatchingExample)
 }
 
 
+TYPED_TEST(Mc64, InitialMatchingExampleProduct)
+{
+    using index_type = typename TestFixture::index_type;
+    gko::Array<index_type> p{this->ref,
+                             I<index_type>({-1, -1, -1, -1, -1, -1})};
+    gko::Array<index_type> ip{this->ref,
+                              I<index_type>({-1, -1, -1, -1, -1, -1})};
+    std::list<index_type> unmatched_rows{};
+
+    gko::kernels::reference::mc64::initial_matching(
+        this->ref, this->mtx->get_size()[0], this->mtx->get_const_row_ptrs(),
+        this->mtx->get_const_col_idxs(), this->expected_workspace_product, p,
+        ip, unmatched_rows);
+
+    GKO_ASSERT_ARRAY_EQ(p, this->expected_perm);
+    GKO_ASSERT_ARRAY_EQ(ip, this->expected_inv_perm);
+    GKO_ASSERT_EQ(unmatched_rows.size(), 1u);
+    GKO_ASSERT_EQ(unmatched_rows.front(), 4 * gko::one<index_type>());
+}
+
+
 TYPED_TEST(Mc64, ShortestAugmentingPathExample)
 {
     using index_type = typename TestFixture::index_type;
@@ -130,14 +208,16 @@ TYPED_TEST(Mc64, ShortestAugmentingPathExample)
                                          I<index_type>{1, 0, 3, 5, 4, 2}};
     gko::Array<index_type> expected_inv_perm{this->ref,
                                              I<index_type>{1, 0, 5, 2, 4, 3}};
-    gko::Array<index_type> parents{this->ref,
-                                   I<index_type>{-1, -1, -1, -1, -1, -1}};
-    gko::Array<index_type> expected_parents{this->ref,
-                                            I<index_type>{-1, -1, 3, 4, 4, 2}};
+    gko::Array<index_type> parents{
+        this->ref, I<index_type>{-1, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, -2,
+                                 -2, -2, -2, -2, -2, -2}};
+    gko::Array<index_type> expected_parents{
+        this->ref, I<index_type>{-1, -1, 3, 4, 4, 2, -1, -1, -1, -1, -1, -1, -2,
+                                 -2, -2, -2, -2, -2}};
 
     gko::kernels::reference::mc64::shortest_augmenting_path(
         this->ref, this->mtx->get_size()[0], this->mtx->get_const_row_ptrs(),
-        this->mtx->get_const_col_idxs(), this->expected_workspace,
+        this->mtx->get_const_col_idxs(), this->expected_workspace_sum,
         this->expected_perm, this->expected_inv_perm,
         4 * gko::one<index_type>(), parents);
 
@@ -147,7 +227,7 @@ TYPED_TEST(Mc64, ShortestAugmentingPathExample)
 }
 
 
-TYPED_TEST(Mc64, ShortestAugmentingPathExample2)
+/*TYPED_TEST(Mc64, ShortestAugmentingPathExample2)
 {
     using index_type = typename TestFixture::index_type;
     using real_type = typename TestFixture::real_type;
@@ -187,17 +267,19 @@ TYPED_TEST(Mc64, ShortestAugmentingPathExample2)
     GKO_ASSERT_ARRAY_EQ(inv_perm, expected_inv_perm);
     GKO_ASSERT_ARRAY_EQ(parents, expected_parents);
     GKO_ASSERT_ARRAY_EQ(workspace, expected_workspace);
-}
+}*/
 
 
-TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingExample)
+TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingExampleSum)
 {
     using index_type = typename TestFixture::index_type;
     using real_type = typename TestFixture::real_type;
     using value_type = typename TestFixture::value_type;
 
     auto mc64_factory =
-        gko::reorder::Mc64<value_type, index_type>::build().on(this->ref);
+        gko::reorder::Mc64<value_type, index_type>::build()
+            .with_strategy(gko::reorder::reordering_strategy::max_diagonal_sum)
+            .on(this->ref);
     auto mc64 = mc64_factory->generate(this->mtx);
 
     auto perm = mc64->get_permutation()->get_const_permutation();
@@ -217,4 +299,138 @@ TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingExample)
 }
 
 
+TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingExampleProduct)
+{
+    using index_type = typename TestFixture::index_type;
+    using real_type = typename TestFixture::real_type;
+    using value_type = typename TestFixture::value_type;
+    using matrix_type = typename TestFixture::matrix_type;
+
+    auto expected_result =
+        gko::initialize<matrix_type>({{1., 0.3, 0., 0., 0., 0.},
+                                      {0., 1., 1., 0., 0., 0.},
+                                      {0., 0., 1., 0., 0., 1.},
+                                      {0., 0., 0., 1., 0.6, 0.},
+                                      {1. / 3., 1., 0., 0., 1., 0.},
+                                      {0., 0., 0., 1., 0., 1.}},
+                                     this->ref);
+
+    auto mc64_factory =
+        gko::reorder::Mc64<value_type, index_type>::build()
+            .with_strategy(
+                gko::reorder::reordering_strategy::max_diagonal_product)
+            .on(this->ref);
+    auto mc64 = mc64_factory->generate(this->mtx);
+
+    auto perm = mc64->get_permutation();  //->get_permutation();
+    auto inv_perm =
+        mc64->get_inverse_permutation();         //->get_const_permutation();
+    auto row_scaling = mc64->get_row_scaling();  //->get_const_values();
+    auto col_scaling = mc64->get_col_scaling();  //->get_const_values();
+
+    auto result = gko::clone(this->ref, this->mtx);
+    col_scaling->rapply(result.get(), result.get());
+    row_scaling->apply(result.get(), result.get());
+    perm->apply(result.get(), result.get());
+
+    auto rp = result->get_row_ptrs();
+    auto ci = result->get_col_idxs();
+    auto v = result->get_values();
+    for (auto i = 0; i < result->get_size()[0]; i++) {
+        for (auto idx = rp[i]; idx < rp[i + 1]; idx++)
+            std::cout << "(" << i << "," << ci[idx] << "," << v[idx] << ")";
+    }
+    std::cout << "CHECKING" << std::endl;
+    GKO_ASSERT_MTX_NEAR(result, expected_result, this->tolerance);
+    std::cout << "DONE" << std::endl;
+    /*GKO_ASSERT_EQ(perm[0], 4);
+    GKO_ASSERT_EQ(perm[1], 0);
+    GKO_ASSERT_EQ(perm[2], 5);
+    GKO_ASSERT_EQ(perm[3], 2);
+    GKO_ASSERT_EQ(perm[4], 3);
+    GKO_ASSERT_EQ(perm[5], 1);
+    GKO_ASSERT_EQ(inv_perm[0], 1);
+    GKO_ASSERT_EQ(inv_perm[1], 5);
+    GKO_ASSERT_EQ(inv_perm[2], 3);
+    GKO_ASSERT_EQ(inv_perm[3], 4);
+    GKO_ASSERT_EQ(inv_perm[4], 0);
+    GKO_ASSERT_EQ(inv_perm[5], 2);
+    GKO_ASSERT_NEAR(row_scaling[0], real_type{0.6}, this->tolerance);
+    GKO_ASSERT_NEAR(row_scaling[1], real_type{1./3.}, this->tolerance);
+    GKO_ASSERT_NEAR(row_scaling[2], real_type{2./3.}, this->tolerance);
+    GKO_ASSERT_NEAR(row_scaling[3], real_type{0.75}, this->tolerance);
+    GKO_ASSERT_NEAR(row_scaling[4], real_type{5./6.}, this->tolerance);
+    GKO_ASSERT_NEAR(row_scaling[5], real_type{0.5}, this->tolerance);
+    GKO_ASSERT_NEAR(col_scaling[0], real_type{1./3.}, this->tolerance);
+    GKO_ASSERT_NEAR(col_scaling[1], real_type{0.6}, this->tolerance);
+    GKO_ASSERT_NEAR(col_scaling[2], real_type{0.375}, this->tolerance);
+    GKO_ASSERT_NEAR(col_scaling[3], real_type{1./3.}, this->tolerance);
+    GKO_ASSERT_NEAR(col_scaling[4], real_type{0.4}, this->tolerance);
+    GKO_ASSERT_NEAR(col_scaling[5], real_type{0.5}, this->tolerance);*/
+}
+
+
+TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingLargeTrivialExampleProduct)
+{
+    using index_type = typename TestFixture::index_type;
+    using real_type = typename TestFixture::real_type;
+    using value_type = typename TestFixture::value_type;
+    using matrix_type = typename TestFixture::matrix_type;
+
+    std::ifstream mtx_stream{gko::matrices::location_1138_bus_mtx};
+    auto mtx = gko::share(gko::read<matrix_type>(mtx_stream, this->ref));
+    std::ifstream result_stream{gko::matrices::location_1138_bus_mc64_result};
+    auto expected_result = gko::read<matrix_type>(result_stream, this->ref);
+
+    auto mc64_factory =
+        gko::reorder::Mc64<value_type, index_type>::build()
+            .with_strategy(
+                gko::reorder::reordering_strategy::max_diagonal_product)
+            .on(this->ref);
+    auto mc64 = mc64_factory->generate(mtx);
+
+    auto perm = mc64->get_permutation();
+    auto row_scaling = mc64->get_row_scaling();
+    auto col_scaling = mc64->get_col_scaling();
+
+    col_scaling->rapply(mtx.get(), mtx.get());
+    row_scaling->apply(mtx.get(), mtx.get());
+    perm->apply(mtx.get(), mtx.get());
+
+    GKO_ASSERT_MTX_NEAR(mtx, expected_result, this->tolerance);
+}
+
+
+TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingLargeExampleProduct)
+{
+    using index_type = typename TestFixture::index_type;
+    using real_type = typename TestFixture::real_type;
+    using value_type = typename TestFixture::value_type;
+    using matrix_type = typename TestFixture::matrix_type;
+
+    std::ifstream mtx_stream{gko::matrices::location_nontrivial_mc64_example};
+    auto mtx = gko::share(gko::read<matrix_type>(mtx_stream, this->ref));
+    mtx->sort_by_column_index();
+    std::ifstream result_stream{gko::matrices::location_nontrivial_mc64_result};
+    auto expected_result = gko::read<matrix_type>(result_stream, this->ref);
+
+    auto mc64_factory =
+        gko::reorder::Mc64<value_type, index_type>::build()
+            .with_strategy(gko::reorder::reordering_strategy::max_diagonal_sum)
+            .on(this->ref);
+    auto mc64 = mc64_factory->generate(mtx);
+
+    auto perm = mc64->get_permutation();
+    auto row_scaling = mc64->get_row_scaling();
+    auto col_scaling = mc64->get_col_scaling();
+
+    col_scaling->rapply(mtx.get(), mtx.get());
+    row_scaling->apply(mtx.get(), mtx.get());
+    perm->apply(mtx.get(), mtx.get());
+
+    // GKO_ASSERT_MTX_NEAR(mtx, expected_result, this->tolerance);
+    GKO_ASSERT_MTX_EQ_SPARSITY(mtx, expected_result);
+}
+
+
 }  // namespace

From ef2146f9a267d775e0be4f9292b97b0a259f5306 Mon Sep 17 00:00:00 2001
From: Fritz Goebel <fritz.goebel@kit.edu>
Date: Fri, 6 May 2022 06:57:16 -0400
Subject: [PATCH 552/583] tune to match HSL performance

---
 core/components/addressable_pq.hpp       |  26 +-
 core/device_hooks/common_kernels.inc.cpp |   2 -
 core/reorder/mc64.cpp                    |  81 ++--
 core/reorder/mc64_kernels.hpp            |  45 +-
 cuda/reorder/mc64_kernels.cu             |  45 +-
 dpcpp/reorder/mc64_kernels.dp.cpp        |  45 +-
 hip/reorder/mc64_kernels.hip.cpp         |  45 +-
 include/ginkgo/core/reorder/mc64.hpp     |  49 ++-
 omp/reorder/mc64_kernels.cpp             |  45 +-
 reference/reorder/mc64_kernels.cpp       | 511 ++++++++++++++---------
 10 files changed, 482 insertions(+), 412 deletions(-)

diff --git a/core/components/addressable_pq.hpp b/core/components/addressable_pq.hpp
index 9f28060b6fa..f4a4b9ec829 100644
--- a/core/components/addressable_pq.hpp
+++ b/core/components/addressable_pq.hpp
@@ -34,7 +34,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define GKO_CORE_COMPONENTS_ADRESSABLE_PQ_HPP_
 
 
-#include <deque>
 #include <vector>
 
 
@@ -100,12 +99,9 @@ struct addressable_priority_queue {
     {
         swap(0, size() - 1);
         m_keys.pop_back();
-        auto val = m_values.back();
         m_values.pop_back();
         auto old_handle = m_handles.back();
         m_handles.pop_back();
-        m_free_handles.push_front(old_handle);
-        m_handle_pos[old_handle] = invalid_handle;
         sift_down(0);
     }
 
@@ -121,11 +117,10 @@ struct addressable_priority_queue {
         m_values.clear();
         m_handles.clear();
         m_handle_pos.clear();
-        m_free_handles.clear();
     }
 
 private:
-    constexpr static int degree = 1 << Degree_Log2;
+    constexpr static int degree = 1 << 4;       // Degree_Log2;
     constexpr static auto invalid_handle = -1;  //((std::size_t)-1);
 
     std::size_t parent(std::size_t i) const { return (i - 1) / degree; }
@@ -144,9 +139,10 @@ struct addressable_priority_queue {
     {
         auto cur = i;
         while (first_child(cur) < size()) {
-            auto begin = m_keys.begin() + first_child(cur);
-            auto end = m_keys.begin() + std::min(first_child(cur + 1), size());
-            auto it = std::min_element(begin, end);
+            const auto begin = m_keys.begin() + first_child(cur);
+            const auto end =
+                m_keys.begin() + std::min(first_child(cur + 1), size());
+            const auto it = std::min_element(begin, end);
             if (m_keys[cur] <= *it) {
                 break;
             }
@@ -168,22 +164,12 @@ struct addressable_priority_queue {
         }
     }
 
-    std::size_t next_handle()
-    {
-        if (m_free_handles.empty()) {
-            return m_handle_pos.size();
-        } else {
-            auto next = m_free_handles.back();
-            m_free_handles.pop_back();
-            return next;
-        }
-    }
+    std::size_t next_handle() const { return m_handle_pos.size(); }
 
     std::vector<KeyType> m_keys;
     std::vector<ValueType> m_values;
     std::vector<std::size_t> m_handles;
     std::vector<std::size_t> m_handle_pos;
-    std::deque<std::size_t> m_free_handles;
 };
 
 
diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp
index 63846659eed..926b6609a39 100644
--- a/core/device_hooks/common_kernels.inc.cpp
+++ b/core/device_hooks/common_kernels.inc.cpp
@@ -918,8 +918,6 @@ GKO_STUB_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
 GKO_STUB_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL);
 GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL);
-GKO_STUB_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_MC64_UPDATE_DUAL_VECTORS_KERNEL);
 
 
 }  // namespace mc64
diff --git a/core/reorder/mc64.cpp b/core/reorder/mc64.cpp
index 7e0361ae417..ef7cbd3af29 100644
--- a/core/reorder/mc64.cpp
+++ b/core/reorder/mc64.cpp
@@ -33,6 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/reorder/mc64.hpp>
 
 
+#include <chrono>
 #include <memory>
 
 
@@ -62,7 +63,6 @@ GKO_REGISTER_OPERATION(initial_matching, mc64::initial_matching);
 GKO_REGISTER_OPERATION(shortest_augmenting_path,
                        mc64::shortest_augmenting_path);
 GKO_REGISTER_OPERATION(compute_scaling, mc64::compute_scaling);
-GKO_REGISTER_OPERATION(update_dual_vectors, mc64::update_dual_vectors);
 
 
 }  // anonymous namespace
@@ -70,57 +70,70 @@ GKO_REGISTER_OPERATION(update_dual_vectors, mc64::update_dual_vectors);
 
 
 template <typename ValueType, typename IndexType>
-void Mc64<ValueType, IndexType>::generate(
-    std::shared_ptr<const Executor>& exec,
-    std::shared_ptr<LinOp> system_matrix) const
+void Mc64<ValueType, IndexType>::generate(std::shared_ptr<const Executor>& exec,
+                                          std::shared_ptr<LinOp> system_matrix)
 {
     auto mtx = as<matrix_type>(system_matrix);
     size_type num_rows = mtx->get_size()[0];
     size_type nnz = mtx->get_num_stored_elements();
 
-    Array<remove_complex<ValueType>> workspace{exec};
-    Array<IndexType> permutation{exec, num_rows};
-    Array<IndexType> inv_permutation{exec, num_rows};
+    array<remove_complex<ValueType>> workspace{exec, nnz + 3 * num_rows};
+    array<IndexType> permutation{exec, num_rows};
+    array<IndexType> inv_permutation{exec, num_rows};
     permutation.fill(-one<IndexType>());
     inv_permutation.fill(-one<IndexType>());
     const auto row_ptrs = mtx->get_const_row_ptrs();
     const auto col_idxs = mtx->get_const_col_idxs();
 
+    // auto tic = std::chrono::high_resolution_clock::now();
     exec->run(mc64::make_initialize_weights(mtx.get(), workspace,
                                             parameters_.strategy));
+    // auto toc = std::chrono::high_resolution_clock::now();
+    // std::chrono::duration<double> duration = toc - tic;
+    // std::cout << "INIT: " << duration.count() << std::endl;
 
-    std::list<IndexType> unmatched_rows{};
+    // tic = std::chrono::high_resolution_clock::now();
+    array<IndexType> parents{exec, 6 * num_rows};
+    parents.fill(0);
     exec->run(mc64::make_initial_matching(num_rows, row_ptrs, col_idxs,
                                           workspace, permutation,
-                                          inv_permutation, unmatched_rows));
+                                          inv_permutation, parents));
+    // toc = std::chrono::high_resolution_clock::now();
+    // duration = toc - tic;
+    // std::cout << "INITIAL MATCHING: " << duration.count() << std::endl;
 
-    // exec->run(mc64::make_update_dual_vectors(num_rows, row_ptrs, col_idxs,
-    // permutation, workspace));
-
-    Array<IndexType> parents{exec, 4 * num_rows};
+    // tic = std::chrono::high_resolution_clock::now();
     addressable_priority_queue<remove_complex<ValueType>, IndexType, 2> Q{};
-    parents.fill(-2);
-    for (auto root : unmatched_rows) {
-        exec->run(mc64::make_shortest_augmenting_path(
-            num_rows, row_ptrs, col_idxs, workspace, permutation,
-            inv_permutation, root, parents, Q));
+    std::vector<IndexType> q_j{};
+    const auto unmatched = parents.get_data() + 5 * num_rows;
+    auto um = 0;
+    auto root = unmatched[um];
+    while (root != 0 && um < num_rows) {
+        if (root != -1)
+            exec->run(mc64::make_shortest_augmenting_path(
+                num_rows, row_ptrs, col_idxs, workspace, permutation,
+                inv_permutation, root, parents, Q, q_j));
+        root = unmatched[++um];
     }
-    // std::cout << "\n";
-    permutation_->copy_from(
-        PermutationMatrix::create(
-            exec, system_matrix->get_size(), permutation,
-            gko::matrix::row_permute | matrix::inverse_permute)
-            .get());
-    inv_permutation_->copy_from(
-        share(PermutationMatrix::create(exec, system_matrix->get_size(),
-                                        inv_permutation,
-                                        matrix::column_permute))
-            .get());
-    row_scaling_->copy_from(DiagonalMatrix::create(exec, num_rows));
-    col_scaling_->copy_from(DiagonalMatrix::create(exec, num_rows));
-    exec->run(
-        mc64::make_compute_scaling(mtx.get(), workspace, parameters_.strategy,
-                                   row_scaling_.get(), col_scaling_.get()));
+    // toc = std::chrono::high_resolution_clock::now();
+    // duration = toc - tic;
+    // std::cout << "SAP: " << duration.count() << std::endl;
+
+    permutation_ = std::move(share(PermutationMatrix::create(
+        exec, system_matrix->get_size(), permutation,
+        gko::matrix::row_permute | matrix::inverse_permute)));
+    inv_permutation_ = std::move(share(
+        PermutationMatrix::create(exec, system_matrix->get_size(),
+                                  inv_permutation, matrix::column_permute)));
+    row_scaling_ = std::move(DiagonalMatrix::create(exec, num_rows));
+    col_scaling_ = std::move(DiagonalMatrix::create(exec, num_rows));
+    // tic = std::chrono::high_resolution_clock::now();
+    exec->run(mc64::make_compute_scaling(
+        mtx.get(), workspace, permutation, parents, parameters_.strategy,
+        row_scaling_.get(), col_scaling_.get()));
+    // toc = std::chrono::high_resolution_clock::now();
+    // duration = toc - tic;
+    // std::cout << "SCALING: " << duration.count() << std::endl;
 }
 
 
diff --git a/core/reorder/mc64_kernels.hpp b/core/reorder/mc64_kernels.hpp
index 9acd0d613c0..76e5e619437 100644
--- a/core/reorder/mc64_kernels.hpp
+++ b/core/reorder/mc64_kernels.hpp
@@ -55,7 +55,7 @@ namespace kernels {
 #define GKO_DECLARE_MC64_INITIALIZE_WEIGHTS_KERNEL(ValueType, IndexType)  \
     void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,  \
                             const matrix::Csr<ValueType, IndexType>* mtx, \
-                            Array<remove_complex<ValueType>>& workspace,  \
+                            array<remove_complex<ValueType>>& workspace,  \
                             gko::reorder::reordering_strategy strategy)
 
 
@@ -63,37 +63,32 @@ namespace kernels {
     void initial_matching(                                                \
         std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,  \
         const IndexType* row_ptrs, const IndexType* col_idxs,             \
-        const Array<ValueType>& workspace, Array<IndexType>& permutation, \
-        Array<IndexType>& inv_permutation,                                \
-        std::list<IndexType>& unmatched_rows)
+        const array<ValueType>& workspace, array<IndexType>& permutation, \
+        array<IndexType>& inv_permutation, array<IndexType>& parents)
 
 
 #define GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL(ValueType, IndexType) \
     void shortest_augmenting_path(                                             \
         std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,       \
         const IndexType* row_ptrs, const IndexType* col_idxs,                  \
-        Array<ValueType>& workspace, Array<IndexType>& permutation,            \
-        Array<IndexType>& inv_permutation, IndexType root,                     \
-        Array<IndexType>& parents,                                             \
-        addressable_priority_queue<ValueType, IndexType, 2>& Q)
-
-
-#define GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL(ValueType, IndexType)   \
-    void compute_scaling(std::shared_ptr<const DefaultExecutor> exec,   \
-                         const matrix::Csr<ValueType, IndexType>* mtx,  \
-                         Array<remove_complex<ValueType>>& workspace,   \
-                         gko::reorder::reordering_strategy strategy,    \
-                         gko::matrix::Diagonal<ValueType>* row_scaling, \
+        array<ValueType>& workspace, array<IndexType>& permutation,            \
+        array<IndexType>& inv_permutation, IndexType root,                     \
+        array<IndexType>& parents,                                             \
+        addressable_priority_queue<ValueType, IndexType, 2>& Q,                \
+        std::vector<IndexType>& q_j)
+
+
+#define GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL(ValueType, IndexType)       \
+    void compute_scaling(std::shared_ptr<const DefaultExecutor> exec,       \
+                         const matrix::Csr<ValueType, IndexType>* mtx,      \
+                         const array<remove_complex<ValueType>>& workspace, \
+                         const array<IndexType>& permutation,               \
+                         const array<IndexType>& parents,                   \
+                         gko::reorder::reordering_strategy strategy,        \
+                         gko::matrix::Diagonal<ValueType>* row_scaling,     \
                          gko::matrix::Diagonal<ValueType>* col_scaling)
 
 
-#define GKO_DECLARE_MC64_UPDATE_DUAL_VECTORS_KERNEL(ValueType, IndexType) \
-    void update_dual_vectors(                                             \
-        std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,  \
-        const IndexType* row_ptrs, const IndexType* col_idxs,             \
-        const Array<IndexType>& permutation, Array<ValueType>& workspace)
-
-
 #define GKO_DECLARE_ALL_AS_TEMPLATES                                        \
     template <typename ValueType, typename IndexType>                       \
     GKO_DECLARE_MC64_INITIALIZE_WEIGHTS_KERNEL(ValueType, IndexType);       \
@@ -102,9 +97,7 @@ namespace kernels {
     template <typename ValueType, typename IndexType>                       \
     GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL(ValueType, IndexType); \
     template <typename ValueType, typename IndexType>                       \
-    GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL(ValueType, IndexType);          \
-    template <typename ValueType, typename IndexType>                       \
-    GKO_DECLARE_MC64_UPDATE_DUAL_VECTORS_KERNEL(ValueType, IndexType)
+    GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL(ValueType, IndexType)
 
 
 GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(mc64, GKO_DECLARE_ALL_AS_TEMPLATES);
diff --git a/cuda/reorder/mc64_kernels.cu b/cuda/reorder/mc64_kernels.cu
index fb007e55bcd..87d556df242 100644
--- a/cuda/reorder/mc64_kernels.cu
+++ b/cuda/reorder/mc64_kernels.cu
@@ -52,7 +52,7 @@ namespace mc64 {
 template <typename ValueType, typename IndexType>
 void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
                         const matrix::Csr<ValueType, IndexType>* mtx,
-                        Array<remove_complex<ValueType>>& workspace,
+                        array<remove_complex<ValueType>>& workspace,
                         gko::reorder::reordering_strategy strategy)
     GKO_NOT_IMPLEMENTED;
 
@@ -64,10 +64,10 @@ template <typename ValueType, typename IndexType>
 void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
                       size_type num_rows, const IndexType* row_ptrs,
                       const IndexType* col_idxs,
-                      const Array<ValueType>& workspace,
-                      Array<IndexType>& permutation,
-                      Array<IndexType>& inv_permutation,
-                      std::list<IndexType>& unmatched_rows) GKO_NOT_IMPLEMENTED;
+                      const array<ValueType>& workspace,
+                      array<IndexType>& permutation,
+                      array<IndexType>& inv_permutation,
+                      array<IndexType>& parents) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL);
@@ -77,34 +77,25 @@ template <typename ValueType, typename IndexType>
 void shortest_augmenting_path(
     std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,
     const IndexType* row_ptrs, const IndexType* col_idxs,
-    Array<ValueType>& workspace, Array<IndexType>& permutation,
-    Array<IndexType>& inv_permutation, IndexType root,
-    Array<IndexType>& parents,
-    addressable_priority_queue<ValueType, IndexType, 2>& Q) GKO_NOT_IMPLEMENTED;
+    array<ValueType>& workspace, array<IndexType>& permutation,
+    array<IndexType>& inv_permutation, IndexType root,
+    array<IndexType>& parents,
+    addressable_priority_queue<ValueType, IndexType, 2>& Q,
+    std::vector<IndexType>& q_j) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL);
 
 
 template <typename ValueType, typename IndexType>
-void update_dual_vectors(std::shared_ptr<const DefaultExecutor> exec,
-                         size_type num_rows, const IndexType* row_ptrs,
-                         const IndexType* col_idxs,
-                         const Array<IndexType>& permutation,
-                         Array<ValueType>& workspace) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_MC64_UPDATE_DUAL_VECTORS_KERNEL);
-
-
-template <typename ValueType, typename IndexType>
-void compute_scaling(std::shared_ptr<const DefaultExecutor> exec,
-                     const matrix::Csr<ValueType, IndexType>* mtx,
-                     Array<remove_complex<ValueType>>& workspace,
-                     gko::reorder::reordering_strategy strategy,
-                     gko::matrix::Diagonal<ValueType>* row_scaling,
-                     gko::matrix::Diagonal<ValueType>* col_scaling)
-    GKO_NOT_IMPLEMENTED;
+void compute_scaling(
+    std::shared_ptr<const DefaultExecutor> exec,
+    const matrix::Csr<ValueType, IndexType>* mtx,
+    const array<remove_complex<ValueType>>& workspace,
+    const array<IndexType>& permutation, const array<IndexType>& parents,
+    gko::reorder::reordering_strategy strategy,
+    gko::matrix::Diagonal<ValueType>* row_scaling,
+    gko::matrix::Diagonal<ValueType>* col_scaling) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL);
diff --git a/dpcpp/reorder/mc64_kernels.dp.cpp b/dpcpp/reorder/mc64_kernels.dp.cpp
index cae6228073b..00147a53553 100644
--- a/dpcpp/reorder/mc64_kernels.dp.cpp
+++ b/dpcpp/reorder/mc64_kernels.dp.cpp
@@ -52,7 +52,7 @@ namespace mc64 {
 template <typename ValueType, typename IndexType>
 void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
                         const matrix::Csr<ValueType, IndexType>* mtx,
-                        Array<remove_complex<ValueType>>& workspace,
+                        array<remove_complex<ValueType>>& workspace,
                         gko::reorder::reordering_strategy strategy)
     GKO_NOT_IMPLEMENTED;
 
@@ -64,10 +64,10 @@ template <typename ValueType, typename IndexType>
 void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
                       size_type num_rows, const IndexType* row_ptrs,
                       const IndexType* col_idxs,
-                      const Array<ValueType>& workspace,
-                      Array<IndexType>& permutation,
-                      Array<IndexType>& inv_permutation,
-                      std::list<IndexType>& unmatched_rows) GKO_NOT_IMPLEMENTED;
+                      const array<ValueType>& workspace,
+                      array<IndexType>& permutation,
+                      array<IndexType>& inv_permutation,
+                      array<IndexType>& parents) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL);
@@ -77,34 +77,25 @@ template <typename ValueType, typename IndexType>
 void shortest_augmenting_path(
     std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,
     const IndexType* row_ptrs, const IndexType* col_idxs,
-    Array<ValueType>& workspace, Array<IndexType>& permutation,
-    Array<IndexType>& inv_permutation, IndexType root,
-    Array<IndexType>& parents,
-    addressable_priority_queue<ValueType, IndexType, 2>& Q) GKO_NOT_IMPLEMENTED;
+    array<ValueType>& workspace, array<IndexType>& permutation,
+    array<IndexType>& inv_permutation, IndexType root,
+    array<IndexType>& parents,
+    addressable_priority_queue<ValueType, IndexType, 2>& Q,
+    std::vector<IndexType>& q_j) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL);
 
 
 template <typename ValueType, typename IndexType>
-void update_dual_vectors(std::shared_ptr<const DefaultExecutor> exec,
-                         size_type num_rows, const IndexType* row_ptrs,
-                         const IndexType* col_idxs,
-                         const Array<IndexType>& permutation,
-                         Array<ValueType>& workspace) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_MC64_UPDATE_DUAL_VECTORS_KERNEL);
-
-
-template <typename ValueType, typename IndexType>
-void compute_scaling(std::shared_ptr<const DefaultExecutor> exec,
-                     const matrix::Csr<ValueType, IndexType>* mtx,
-                     Array<remove_complex<ValueType>>& workspace,
-                     gko::reorder::reordering_strategy strategy,
-                     gko::matrix::Diagonal<ValueType>* row_scaling,
-                     gko::matrix::Diagonal<ValueType>* col_scaling)
-    GKO_NOT_IMPLEMENTED;
+void compute_scaling(
+    std::shared_ptr<const DefaultExecutor> exec,
+    const matrix::Csr<ValueType, IndexType>* mtx,
+    const array<remove_complex<ValueType>>& workspace,
+    const array<IndexType>& permutation, const array<IndexType>& parents,
+    gko::reorder::reordering_strategy strategy,
+    gko::matrix::Diagonal<ValueType>* row_scaling,
+    gko::matrix::Diagonal<ValueType>* col_scaling) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL);
diff --git a/hip/reorder/mc64_kernels.hip.cpp b/hip/reorder/mc64_kernels.hip.cpp
index 1f0e601ce7b..a40fc3d0dd6 100644
--- a/hip/reorder/mc64_kernels.hip.cpp
+++ b/hip/reorder/mc64_kernels.hip.cpp
@@ -52,7 +52,7 @@ namespace mc64 {
 template <typename ValueType, typename IndexType>
 void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
                         const matrix::Csr<ValueType, IndexType>* mtx,
-                        Array<remove_complex<ValueType>>& workspace,
+                        array<remove_complex<ValueType>>& workspace,
                         gko::reorder::reordering_strategy strategy)
     GKO_NOT_IMPLEMENTED;
 
@@ -64,10 +64,10 @@ template <typename ValueType, typename IndexType>
 void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
                       size_type num_rows, const IndexType* row_ptrs,
                       const IndexType* col_idxs,
-                      const Array<ValueType>& workspace,
-                      Array<IndexType>& permutation,
-                      Array<IndexType>& inv_permutation,
-                      std::list<IndexType>& unmatched_rows) GKO_NOT_IMPLEMENTED;
+                      const array<ValueType>& workspace,
+                      array<IndexType>& permutation,
+                      array<IndexType>& inv_permutation,
+                      array<IndexType>& parents) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL);
@@ -77,34 +77,25 @@ template <typename ValueType, typename IndexType>
 void shortest_augmenting_path(
     std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,
     const IndexType* row_ptrs, const IndexType* col_idxs,
-    Array<ValueType>& workspace, Array<IndexType>& permutation,
-    Array<IndexType>& inv_permutation, IndexType root,
-    Array<IndexType>& parents,
-    addressable_priority_queue<ValueType, IndexType, 2>& Q) GKO_NOT_IMPLEMENTED;
+    array<ValueType>& workspace, array<IndexType>& permutation,
+    array<IndexType>& inv_permutation, IndexType root,
+    array<IndexType>& parents,
+    addressable_priority_queue<ValueType, IndexType, 2>& Q,
+    std::vector<IndexType>& q_j) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL);
 
 
 template <typename ValueType, typename IndexType>
-void update_dual_vectors(std::shared_ptr<const DefaultExecutor> exec,
-                         size_type num_rows, const IndexType* row_ptrs,
-                         const IndexType* col_idxs,
-                         const Array<IndexType>& permutation,
-                         Array<ValueType>& workspace) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_MC64_UPDATE_DUAL_VECTORS_KERNEL);
-
-
-template <typename ValueType, typename IndexType>
-void compute_scaling(std::shared_ptr<const DefaultExecutor> exec,
-                     const matrix::Csr<ValueType, IndexType>* mtx,
-                     Array<remove_complex<ValueType>>& workspace,
-                     gko::reorder::reordering_strategy strategy,
-                     gko::matrix::Diagonal<ValueType>* row_scaling,
-                     gko::matrix::Diagonal<ValueType>* col_scaling)
-    GKO_NOT_IMPLEMENTED;
+void compute_scaling(
+    std::shared_ptr<const DefaultExecutor> exec,
+    const matrix::Csr<ValueType, IndexType>* mtx,
+    const array<remove_complex<ValueType>>& workspace,
+    const array<IndexType>& permutation, const array<IndexType>& parents,
+    gko::reorder::reordering_strategy strategy,
+    gko::matrix::Diagonal<ValueType>* row_scaling,
+    gko::matrix::Diagonal<ValueType>* col_scaling) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL);
diff --git a/include/ginkgo/core/reorder/mc64.hpp b/include/ginkgo/core/reorder/mc64.hpp
index 4d04c43aae3..1838e44fb55 100644
--- a/include/ginkgo/core/reorder/mc64.hpp
+++ b/include/ginkgo/core/reorder/mc64.hpp
@@ -61,7 +61,20 @@ namespace gko {
 namespace reorder {
 
 
-enum class reordering_strategy { max_diagonal_product, max_diagonal_sum };
+/**
+ * Strategy defining the goal of the MC64 reordering.
+ * max_diagonal_product and max_diagonal_product_fast aim at
+ * maximizing the product of absolute diagonal entries using
+ * the standard library or faster, approximate implementations
+ * for logarithm and exponential function computations.
+ * max_diag_sum aims at maximizing the sum of absolute values
+ * for the diagonal entries.
+ */
+enum class reordering_strategy {
+    max_diagonal_product,
+    max_diagonal_product_fast,
+    max_diagonal_sum
+};
 
 
 template <typename ValueType = default_precision, typename IndexType = int32>
@@ -84,7 +97,7 @@ class Mc64 : public EnablePolymorphicObject<Mc64<ValueType, IndexType>,
      *
      * @return the permutation (permutation matrix)
      */
-    std::shared_ptr<const PermutationMatrix> get_permutation() const
+    std::shared_ptr<const LinOp> get_permutation() const override
     {
         return permutation_;
     }
@@ -95,7 +108,7 @@ class Mc64 : public EnablePolymorphicObject<Mc64<ValueType, IndexType>,
      *
      * @return the inverse permutation (permutation matrix)
      */
-    std::shared_ptr<const PermutationMatrix> get_inverse_permutation() const
+    std::shared_ptr<const LinOp> get_inverse_permutation() const override
     {
         return inv_permutation_;
     }
@@ -116,7 +129,7 @@ class Mc64 : public EnablePolymorphicObject<Mc64<ValueType, IndexType>,
          * This parameter controls the goal of the permutation.
          */
         reordering_strategy GKO_FACTORY_PARAMETER_SCALAR(
-            strategy, reordering_strategy::max_diagonal_product);
+            strategy, reordering_strategy::max_diagonal_product_fast);
     };
     GKO_ENABLE_REORDERING_BASE_FACTORY(Mc64, parameters, Factory);
     GKO_ENABLE_BUILD_METHOD(Factory);
@@ -127,7 +140,7 @@ class Mc64 : public EnablePolymorphicObject<Mc64<ValueType, IndexType>,
      * matrix.
      */
     void generate(std::shared_ptr<const Executor>& exec,
-                  std::shared_ptr<LinOp> system_matrix) const;
+                  std::shared_ptr<LinOp> system_matrix);
 
     explicit Mc64(std::shared_ptr<const Executor> exec)
         : EnablePolymorphicObject<Mc64, ReorderingBase>(std::move(exec))
@@ -148,28 +161,30 @@ class Mc64 : public EnablePolymorphicObject<Mc64<ValueType, IndexType>,
         GKO_ASSERT_IS_SQUARE_MATRIX(args.system_matrix);
 
         auto const dim = args.system_matrix->get_size();
-        permutation_ = PermutationMatrix::create(cpu_exec, dim);
-        inv_permutation_ = PermutationMatrix::create(cpu_exec, dim);
-        row_scaling_ = DiagonalMatrix::create(cpu_exec, dim[0]);
-        col_scaling_ = DiagonalMatrix::create(cpu_exec, dim[0]);
+        // permutation_ = PermutationMatrix::create(cpu_exec, dim);
+        // inv_permutation_ = PermutationMatrix::create(cpu_exec, dim);
+        // row_scaling_ = DiagonalMatrix::create(cpu_exec, dim[0]);
+        // col_scaling_ = DiagonalMatrix::create(cpu_exec, dim[0]);
 
         this->generate(cpu_exec, args.system_matrix);
 
         // Copy back results to gpu if necessary.
         if (is_gpu_executor) {
             const auto gpu_exec = this->get_executor();
-            auto gpu_perm = PermutationMatrix::create(gpu_exec, dim);
+            auto gpu_perm = share(PermutationMatrix::create(gpu_exec, dim));
             gpu_perm->copy_from(permutation_.get());
-            permutation_ = gko::share(gpu_perm);
-            auto gpu_inv_perm = PermutationMatrix::create(gpu_exec, dim);
+            permutation_ = gpu_perm;
+            auto gpu_inv_perm = share(PermutationMatrix::create(gpu_exec, dim));
             gpu_inv_perm->copy_from(inv_permutation_.get());
-            inv_permutation_ = gko::share(gpu_inv_perm);
-            auto gpu_row_scaling = DiagonalMatrix::create(gpu_exec, dim[0]);
+            inv_permutation_ = gpu_inv_perm;
+            auto gpu_row_scaling =
+                share(DiagonalMatrix::create(gpu_exec, dim[0]));
             gpu_row_scaling->copy_from(row_scaling_.get());
-            row_scaling_ = gko::share(gpu_row_scaling);
-            auto gpu_col_scaling = DiagonalMatrix::create(gpu_exec, dim[0]);
+            row_scaling_ = gpu_row_scaling;
+            auto gpu_col_scaling =
+                share(DiagonalMatrix::create(gpu_exec, dim[0]));
             gpu_col_scaling->copy_from(col_scaling_.get());
-            col_scaling_ = gko::share(gpu_col_scaling);
+            col_scaling_ = gpu_col_scaling;
         }
     }
 
diff --git a/omp/reorder/mc64_kernels.cpp b/omp/reorder/mc64_kernels.cpp
index 3eb29d242d3..83df0158aaa 100644
--- a/omp/reorder/mc64_kernels.cpp
+++ b/omp/reorder/mc64_kernels.cpp
@@ -52,7 +52,7 @@ namespace mc64 {
 template <typename ValueType, typename IndexType>
 void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
                         const matrix::Csr<ValueType, IndexType>* mtx,
-                        Array<remove_complex<ValueType>>& workspace,
+                        array<remove_complex<ValueType>>& workspace,
                         gko::reorder::reordering_strategy strategy)
     GKO_NOT_IMPLEMENTED;
 
@@ -64,10 +64,10 @@ template <typename ValueType, typename IndexType>
 void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
                       size_type num_rows, const IndexType* row_ptrs,
                       const IndexType* col_idxs,
-                      const Array<ValueType>& workspace,
-                      Array<IndexType>& permutation,
-                      Array<IndexType>& inv_permutation,
-                      std::list<IndexType>& unmatched_rows) GKO_NOT_IMPLEMENTED;
+                      const array<ValueType>& workspace,
+                      array<IndexType>& permutation,
+                      array<IndexType>& inv_permutation,
+                      array<IndexType>& parents) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL);
@@ -77,34 +77,25 @@ template <typename ValueType, typename IndexType>
 void shortest_augmenting_path(
     std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,
     const IndexType* row_ptrs, const IndexType* col_idxs,
-    Array<ValueType>& workspace, Array<IndexType>& permutation,
-    Array<IndexType>& inv_permutation, IndexType root,
-    Array<IndexType>& parents,
-    addressable_priority_queue<ValueType, IndexType, 2>& Q) GKO_NOT_IMPLEMENTED;
+    array<ValueType>& workspace, array<IndexType>& permutation,
+    array<IndexType>& inv_permutation, IndexType root,
+    array<IndexType>& parents,
+    addressable_priority_queue<ValueType, IndexType, 2>& Q,
+    std::vector<IndexType>& q_j) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL);
 
 
 template <typename ValueType, typename IndexType>
-void update_dual_vectors(std::shared_ptr<const DefaultExecutor> exec,
-                         size_type num_rows, const IndexType* row_ptrs,
-                         const IndexType* col_idxs,
-                         const Array<IndexType>& permutation,
-                         Array<ValueType>& workspace) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_MC64_UPDATE_DUAL_VECTORS_KERNEL);
-
-
-template <typename ValueType, typename IndexType>
-void compute_scaling(std::shared_ptr<const DefaultExecutor> exec,
-                     const matrix::Csr<ValueType, IndexType>* mtx,
-                     Array<remove_complex<ValueType>>& workspace,
-                     gko::reorder::reordering_strategy strategy,
-                     gko::matrix::Diagonal<ValueType>* row_scaling,
-                     gko::matrix::Diagonal<ValueType>* col_scaling)
-    GKO_NOT_IMPLEMENTED;
+void compute_scaling(
+    std::shared_ptr<const DefaultExecutor> exec,
+    const matrix::Csr<ValueType, IndexType>* mtx,
+    const array<remove_complex<ValueType>>& workspace,
+    const array<IndexType>& permutation, const array<IndexType>& parents,
+    gko::reorder::reordering_strategy strategy,
+    gko::matrix::Diagonal<ValueType>* row_scaling,
+    gko::matrix::Diagonal<ValueType>* col_scaling) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL);
diff --git a/reference/reorder/mc64_kernels.cpp b/reference/reorder/mc64_kernels.cpp
index fd676e337d1..46706942f96 100644
--- a/reference/reorder/mc64_kernels.cpp
+++ b/reference/reorder/mc64_kernels.cpp
@@ -69,11 +69,71 @@ namespace mc64 {
 namespace {
 
 
+float fastexp2(float x)
+{
+    bool positive = x >= 0;
+    float xi = floor(x);
+    float xf = x - xi;
+
+    union {
+        float f;
+        unsigned int i;
+    } frac;
+
+    frac.f = xf ? 1. +
+                      (((xf * 1.36779598e-02 + 5.16742848e-02) * xf +
+                        2.41696769e-01) *
+                           xf +
+                       6.92937406e-01) *
+                          xf +
+                      6.58721338e-06
+                : 1.;
+
+    unsigned int exp = (frac.i & 0x7F800000) + ((unsigned int)(abs(xi)) << 23);
+
+    if (!positive) {
+        exp = (254 - (exp >> 23)) << 23;
+    }
+
+    frac.i = (frac.i & 0x007FFFFF) | (exp & 0x7F800000);
+    return frac.f;
+}
+
+
+double fastexp2(double x)
+{
+    bool positive = x >= 0;
+    double xi = floor(x);
+    double xf = x - xi;
+
+    union {
+        double f;
+        long int i;
+    } frac;
+
+    frac.f = xf ? 1. +
+                      (((xf * 1.36779598e-02 + 5.16742848e-02) * xf +
+                        2.41696769e-01) *
+                           xf +
+                       6.92937406e-01) *
+                          xf +
+                      6.58721338e-06
+                : 1.;
+
+    long int exp = (frac.i & 0x7FF0000000000000) + ((long int)(abs(xi)) << 52);
+
+    if (!positive) {
+        exp = (2046 - (exp >> 52)) << 52;
+    }
+
+    frac.i = (frac.i & 0x000FFFFFFFFFFFFF) | (exp & 0x7FF0000000000000);
+
+    return frac.f;
+}
+
+
 float fastlog2(float x)
 {
-    // a*(x-1)^2 + b*(x-1) approximates log2(x) when 0.75 <= x < 1.5
-    const float a = -.6296735;
-    const float b = 1.466967;
     float signif, fexp;
     int exp;
     float lg2;
@@ -81,48 +141,40 @@ float fastlog2(float x)
         float f;
         unsigned int i;
     } ux1, ux2;
-    int greater;  // really a boolean
-    /*
-     * Assume IEEE representation, which is sgn(1):exp(8):frac(23)
-     * representing (1+frac)*2^(exp-127)  Call 1+frac the significand
-     */
+    int greater;
+
+#define FN                                                                  \
+    fexp + (((((-.149902 * signif + .293811) * signif - .369586) * signif + \
+              .481330) *                                                    \
+                 signif -                                                   \
+             .721171) *                                                     \
+                signif +                                                    \
+            1.442691) *                                                     \
+               signif
 
-    // get exponent
     ux1.f = x;
     exp = (ux1.i & 0x7F800000) >> 23;
-    // actual exponent is exp-127, will subtract 127 later
 
     greater = ux1.i & 0x00400000;  // true if signif > 1.5
     if (greater) {
-        // signif >= 1.5 so need to divide by 2.  Accomplish this by
-        // stuffing exp = 126 which corresponds to an exponent of -1
         ux2.i = (ux1.i & 0x007FFFFF) | 0x3f000000;
         signif = ux2.f;
-        fexp = exp - 126;  // 126 instead of 127 compensates for division by 2
-        signif = signif - 1.0;                          // <
-        lg2 = fexp + a * signif * signif + b * signif;  // <
+        fexp = exp - 126;
+        signif = signif - 1.0;
+        lg2 = FN;
     } else {
-        // get signif by stuffing exp = 127 which corresponds to an exponent of
-        // 0
         ux2.i = (ux1.i & 0x007FFFFF) | 0x3f800000;
         signif = ux2.f;
         fexp = exp - 127;
-        signif = signif - 1.0;                          // <<--
-        lg2 = fexp + a * signif * signif + b * signif;  // <<--
+        signif = signif - 1.0;
+        lg2 = FN;
     }
-    // lines marked <<-- are common code, but optimize better
-    //  when duplicated, at least when using gcc
-    return (lg2);
+    return lg2;
 }
 
 
 double fastlog2(double x)
 {
-    // a*(x-1)^2 + b*(x-1) approximates log2(x) when 0.75 <= x < 1.5
-    const double a = -.6296735;
-    const double b = 1.466967;
-    // const double a = 2.971710;
-    // const double b = 2.049810;
     double signif, fexp;
     long int exp;
     double lg2;
@@ -130,42 +182,35 @@ double fastlog2(double x)
         double f;
         long int i;
     } ux1, ux2;
-    long int greater;  // really a boolean
-//#define FN  fexp + (a*signif)/(signif + b)
-#define FN fexp + signif*(a * signif + b)
-    /*
-     * Assume IEEE representation, which is sgn(1):exp(11):frac(52)
-     * representing (1+frac)*2^(exp-127)  Call 1+frac the significand
-     */
-
-    // get exponent
+    long int greater;
+
+#define FN                                                                  \
+    fexp + (((((-.149902 * signif + .293811) * signif - .369586) * signif + \
+              .481330) *                                                    \
+                 signif -                                                   \
+             .721171) *                                                     \
+                signif +                                                    \
+            1.442691) *                                                     \
+               signif
+
     ux1.f = x;
     exp = (ux1.i & 0x7FF0000000000000) >> 52;
-    // actual exponent is exp-1023, will subtract 1023 later
 
     greater = ux1.i & 0x0008000000000000;  // true if signif > 1.5
     if (greater) {
-        // signif >= 1.5 so need to divide by 2.  Accomplish this by
-        // stuffing exp = 1022 which corresponds to an exponent of -1
         ux2.i = (ux1.i & 0x000FFFFFFFFFFFFF) | 0x3fe0000000000000;
         signif = ux2.f;
-        fexp = exp - 1022;  // 126 instead of 127 compensates for division by 2
-        signif = signif - 1.0;  // <
-        // lg2 = fexp + signif * (a*signif + b);  // <
+        fexp = exp - 1022;
+        signif = signif - 1.0;
         lg2 = FN;
     } else {
-        // get signif by stuffing exp = 1023 which corresponds to an exponent of
-        // 0
         ux2.i = (ux1.i & 0x000FFFFFFFFFFFFF) | 0x3ff0000000000000;
         signif = ux2.f;
         fexp = exp - 1023;
-        signif = signif - 1.0;  // <<--
-        // lg2 = fexp + signif * (a*signif + b);  // <<--
+        signif = signif - 1.0;
         lg2 = FN;
     }
-    // lines marked <<-- are common code, but optimize better
-    //  when duplicated, at least when using gcc
-    return (lg2);
+    return lg2;
 }
 
 
@@ -175,7 +220,7 @@ double fastlog2(double x)
 template <typename ValueType, typename IndexType>
 void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
                         const matrix::Csr<ValueType, IndexType>* mtx,
-                        Array<remove_complex<ValueType>>& workspace,
+                        array<remove_complex<ValueType>>& workspace,
                         gko::reorder::reordering_strategy strategy)
 {
     constexpr auto inf =
@@ -188,14 +233,17 @@ void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
     auto weight =
         strategy == gko::reorder::reordering_strategy::max_diagonal_sum
             ? [](ValueType a) { return abs(a); }
-            : [](ValueType a) { return fastlog2(abs(a)); };
-    workspace.resize_and_reset(nnz + 3 * num_rows);
+            //: [](ValueType a) { return fastlog2(abs(a)); };
+            : [](ValueType a) { return std::log2(abs(a)); };
+    //: strategy == gko::reorder::reordering_strategy::max_diagonal_product_fast
+    //? [](ValueType a) { return fastlog2(abs(a)); }
+    //: [](ValueType a) { return std::log2(abs(a)); };
+    // workspace.resize_and_reset(nnz + 4 * num_rows);
     auto weights = workspace.get_data();
     auto u = weights + nnz;
-    auto v = u + num_rows;
+    auto m = u + 2 * num_rows;
     for (IndexType col = 0; col < num_rows; col++) {
         u[col] = inf;
-        v[col] = zero<remove_complex<ValueType>>();
     }
 
     for (IndexType row = 0; row < num_rows; row++) {
@@ -208,6 +256,8 @@ void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
             if (w > row_max) row_max = w;
         }
 
+        m[row] = row_max;
+
         for (IndexType idx = row_begin; idx < row_end; idx++) {
             const auto c = row_max - weights[idx];
             weights[idx] = c;
@@ -215,18 +265,6 @@ void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
             if (c < u[col]) u[col] = c;
         }
     }
-
-    // TODO: check if this really is not necessary
-    /*for (IndexType row = 0; row < num_rows; row++) {
-        const auto row_begin = row_ptrs[row];
-        const auto row_end = row_ptrs[row + 1];
-        auto row_min = inf;
-        for (IndexType idx = row_begin; idx < row_end; idx++) {
-            const auto c = weights[idx] - u[col_idxs[idx]];
-            if (c < row_min) row_min = c;
-        }
-        v[row] = row_min;
-    }*/
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
@@ -238,36 +276,90 @@ template <typename ValueType, typename IndexType>
 void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
                       size_type num_rows, const IndexType* row_ptrs,
                       const IndexType* col_idxs,
-                      const Array<ValueType>& workspace,
-                      Array<IndexType>& permutation,
-                      Array<IndexType>& inv_permutation,
-                      std::list<IndexType>& unmatched_rows)
+                      const array<ValueType>& workspace,
+                      array<IndexType>& permutation,
+                      array<IndexType>& inv_permutation,
+                      array<IndexType>& parents)
 {
     const auto nnz = row_ptrs[num_rows];
     const auto c = workspace.get_const_data();
     const auto u = c + nnz;
-    const auto v = u + num_rows;
-    auto weight = [c, u, v](IndexType row, IndexType col, IndexType idx) {
-        return c[idx] - u[col] - v[row];
-    };
     auto p = permutation.get_data();
     auto ip = inv_permutation.get_data();
-
+    auto parents_ = parents.get_data();
+    auto prev_rows = parents_ + num_rows;
+    auto generation = prev_rows + num_rows;
+    auto idxs = parents_ + 4 * num_rows;
+    auto unmatched = parents_ + 5 * num_rows;
+    auto um_cnt = 0;
+
+    /*for (IndexType root = 0; root < num_rows; root++) {
+        IndexType row = root;
+        IndexType jap = -1;
+        while (true) {
+            const auto row_begin = row_ptrs[row];
+            const auto row_end = row_ptrs[row + 1];
+            for (IndexType idx = row_begin; idx < row_end; idx++) {
+                const auto col = col_idxs[idx];
+                if (c[idx] - u[col] == zero<ValueType>() && ip[col] == -1) {
+                    jap = col;
+                    parents_[col] = row;
+                    break;
+                }
+            }
+            if (jap != -1) break;
+            IndexType idx = row_begin;
+            for (; idx < row_end; idx++) {
+                const auto col = col_idxs[idx];
+                if (c[idx] - u[col] == zero<ValueType>()
+                        && generation[col] != num_rows + root) {
+                    generation[col] = num_rows + root;
+                    parents_[col] = row;
+                    auto next_row = ip[col];
+                    prev_rows[next_row] = row;
+                    row = next_row;
+                    break;
+                }
+            }
+            if (idx == row_end) {
+                if (row == root) break;
+                row = prev_rows[row];
+            }
+        }
+        if (jap == -1) {
+            unmatched[um_cnt++] = root;
+        } else {
+            IndexType col = jap;
+            do {
+                row = parents_[col];
+                ip[col] = row;
+                auto idx = row_ptrs[row];
+                while (col_idxs[idx] != col) idx++;
+                idxs[row] = idx;
+                std::swap(col, p[row]);
+            } while (row != root);
+        }
+    }*/
     // For each row, look for an unmatched column col for which weight(row, col)
     // = 0. If one is found, add the edge (row, col) to the matching and move on
     // to the next row.
     for (IndexType row = 0; row < num_rows; row++) {
         const auto row_begin = row_ptrs[row];
         const auto row_end = row_ptrs[row + 1];
+        bool matched = false;
         for (IndexType idx = row_begin; idx < row_end; idx++) {
             const auto col = col_idxs[idx];
-            if (weight(row, col, idx) == zero<ValueType>() && ip[col] == -1) {
+            if (c[idx] - u[col] == zero<ValueType>() && ip[col] == -1) {
                 p[row] = col;
                 ip[col] = row;
+                idxs[row] = idx;
+                matched = true;
                 break;
             }
         }
-        if (p[row] == -1) unmatched_rows.push_back(row);
+        if (!matched) {
+            unmatched[um_cnt++] = row;
+        }
     }
 
     // For remaining unmatched rows, look for a matched column with weight(row,
@@ -275,32 +367,29 @@ void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
     // column col_1 with weight(row_1, col_1) = 0 that is not yet matched,
     // replace the matched edge (row_1, col) with the two new matched edges
     // (row, col) and (row_1, col_1).
-    auto it = unmatched_rows.begin();
-    while (it != unmatched_rows.end()) {
-        const auto row = *it;
+    auto um = 0;
+    auto row = unmatched[um];
+    while (row != 0 && um < num_rows) {
         const auto row_begin = row_ptrs[row];
         const auto row_end = row_ptrs[row + 1];
         bool found = false;
-        for (IndexType idx = row_begin; idx < row_end; idx++) {  //<
+        for (IndexType idx = row_begin; idx < row_end; idx++) {
             const auto col = col_idxs[idx];
-            if (weight(row, col, idx) == zero<ValueType>()) {
-                // std::numeric_limits<ValueType>::epsilon()) {
+            if (c[idx] - u[col] == zero<ValueType>()) {
                 const auto row_1 = ip[col];
                 const auto row_1_begin = row_ptrs[row_1];
                 const auto row_1_end = row_ptrs[row_1 + 1];
                 for (IndexType idx_1 = row_1_begin; idx_1 < row_1_end;
                      idx_1++) {
                     const auto col_1 = col_idxs[idx_1];
-                    if (weight(row_1, col_1, idx_1) ==
-                            zero<
-                                ValueType>() &&  //<
-                                                 // std::numeric_limits<ValueType>::epsilon()
-                                                 // &&
+                    if (c[idx_1] - u[col_1] == zero<ValueType>() &&
                         ip[col_1] == -1) {
                         p[row] = col;
                         ip[col] = row;
+                        idxs[row] = idx;
                         p[row_1] = col_1;
                         ip[col_1] = row_1;
+                        idxs[row_1] = idx_1;
                         found = true;
                         break;
                     }
@@ -308,18 +397,11 @@ void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
                 if (found) break;
             }
         }
-        if (found)
-            it = unmatched_rows.erase(it);
-        else
-            it++;
-    }
-
-    /*for (auto i = 0; i < num_rows; i++) {
-        std::cout << p[i] << ", ";
+        if (found) {
+            unmatched[um] = -1;
+        }
+        row = unmatched[++um];
     }
-    std::cout << "\n";
-    std::cout << "EPSILON: " << std::numeric_limits<ValueType>::epsilon()
-              << std::endl;*/
 }
 
 GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
@@ -330,17 +412,17 @@ template <typename ValueType, typename IndexType>
 void shortest_augmenting_path(
     std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,
     const IndexType* row_ptrs, const IndexType* col_idxs,
-    Array<ValueType>& workspace, Array<IndexType>& permutation,
-    Array<IndexType>& inv_permutation, IndexType root,
-    Array<IndexType>& parents,
-    addressable_priority_queue<ValueType, IndexType, 2>& Q)
+    array<ValueType>& workspace, array<IndexType>& permutation,
+    array<IndexType>& inv_permutation, IndexType root,
+    array<IndexType>& parents,
+    addressable_priority_queue<ValueType, IndexType, 2>& Q,
+    std::vector<IndexType>& q_j)
 {
     constexpr auto inf = std::numeric_limits<ValueType>::infinity();
     const auto nnz = row_ptrs[num_rows];
     auto c = workspace.get_data();
     auto u = c + nnz;
-    auto v = u + num_rows;
-    auto distance = v + num_rows;
+    auto distance = u + num_rows;
 
     auto p = permutation.get_data();
     auto ip = inv_permutation.get_data();
@@ -349,27 +431,92 @@ void shortest_augmenting_path(
     auto handles = parents_ + num_rows;
     auto generation = handles + num_rows;
     auto marked_cols = generation + num_rows;
+    auto idxs = marked_cols + num_rows;
 
     Q.reset();
+    q_j.clear();
 
-    ValueType lsp = zero<ValueType>();
+    ValueType lsp = inf;  // zero<ValueType>();
     ValueType lsap = inf;
-    IndexType jsap;
+    IndexType jsap = -1;
 
     auto row = root;
     auto marked_counter = 0;
-    auto update_v_counter = 0;
+
+    const auto begin = row_ptrs[row];
+    const auto end = row_ptrs[row + 1];
+
+    for (IndexType idx = begin; idx < end; idx++) {
+        const auto col = col_idxs[idx];
+        const ValueType dnew = c[idx] - u[col];
+
+        if (dnew < lsap) {
+            if (ip[col] == -1) {
+                lsap = dnew;
+                jsap = col;
+                parents_[col] = row;
+            } else {
+                distance[col] = dnew;
+                parents_[col] = row;
+                generation[col] = num_rows + root;
+                if (dnew < lsp) {
+                    lsp = dnew;
+                }
+            }
+        }
+    }
+
+    for (IndexType idx = begin; idx < end; idx++) {
+        const auto col = col_idxs[idx];
+        const auto dist = distance[col];
+        const auto gen = generation[col];
+        if (dist < lsap && gen == num_rows + root) {
+            generation[col] = root;
+            if (dist == lsp) {
+                q_j.push_back(col);
+            } else {
+                handles[col] = Q.insert(dist, col);
+            }
+        }
+    }
 
     while (true) {
+        if (q_j.size() > 0) {
+            if (lsap <= lsp) break;
+            const auto col = q_j.back();
+            q_j.pop_back();
+            generation[col] = -root;
+            marked_cols[marked_counter++] = col;
+            row = ip[col];
+        } else {
+            if (Q.empty()) break;
+            auto col = Q.min_val();
+            while (generation[col] == -root && !Q.empty()) {
+                Q.pop_min();
+                col = Q.min_val();
+            }
+            if (Q.empty()) break;
+            lsp = distance[col];
+            if (lsap <= lsp) break;
+            generation[col] = -root;
+            marked_cols[marked_counter++] = col;
+            Q.pop_min();
+            while (Q.min_key() == lsp && !Q.empty()) {
+                q_j.push_back(Q.min_val());
+                Q.pop_min();
+            }
+            row = ip[col];
+        }
         const auto row_begin = row_ptrs[row];
         const auto row_end = row_ptrs[row + 1];
+        const auto vi = c[idxs[row]] - u[p[row]];  // v[row];
         for (IndexType idx = row_begin; idx < row_end; idx++) {
             const auto col = col_idxs[idx];
             const auto gen = generation[col];
 
-            if (gen == -1) continue;
+            if (gen == -root) continue;
 
-            const ValueType dnew = lsp + c[idx] - u[col] - v[row];
+            const ValueType dnew = lsp + c[idx] - u[col] - vi;
 
             if (dnew < lsap) {
                 if (ip[col] == -1) {
@@ -378,55 +525,50 @@ void shortest_augmenting_path(
                     parents_[col] = row;
                 } else {
                     if (gen != root || dnew < distance[col]) {
-                        bool new_col = gen != root;
                         distance[col] = dnew;
                         parents_[col] = row;
-                        generation[col] = root;
-                        if (new_col)
+                        generation[col] = root;  // num_rows + gen;
+                        if (dnew == lsp) {
+                            q_j.push_back(col);
+                            /*if (gen == root) {
+                                Q.update_key(handles[col], -inf);
+                                Q.pop_min();
+                            }*/
+                        } else if (gen != root) {
+                            // if (gen != root) {
                             handles[col] = Q.insert(dnew, col);
-                        else
+                        } else {
                             Q.update_key(handles[col], dnew);
+                        }
                     }
                 }
             }
         }
-        if (Q.empty()) break;
-        const auto col = Q.min_val();
-        lsp = distance[col];
-        if (lsap <= lsp) break;
-        generation[col] = -1;
-        marked_cols[marked_counter] = col;
-        marked_counter++;
-        Q.pop_min();
-        row = ip[col];
+        /*for (IndexType idx = row_begin; idx < row_end; idx++) {
+            const auto col = col_idxs[idx];
+            const auto gen = generation[col];
+            const auto dist = distance[col];
+            if (dist < lsap) {
+                generation[col] = root;
+                if (dist == lsp) {
+                    q_j.push_back(col);
+                } else if ()
+            }
+        }*/
     }
     if (lsap != inf) {
-        auto row = -1;
-        auto col = -1;
-        auto next_col = jsap;
-        while (row != root) {
-            col = next_col;
+        IndexType col = jsap;
+        do {
             row = parents_[col];
-            next_col = p[row];
-            p[row] = col;
             ip[col] = row;
-            if (generation[col] != -1) {
-                marked_cols[marked_counter + update_v_counter] = col;
-                update_v_counter++;
-            }
-        }
-
-        for (size_type i = 0; i < marked_counter + update_v_counter; i++) {
-            const auto col = marked_cols[i];
-            if (i < marked_counter) {
-                u[col] += distance[col] - lsap;
-            }
-            row = ip[col];
             auto idx = row_ptrs[row];
-            auto stop = row_ptrs[row + 1];
             while (col_idxs[idx] != col) idx++;
-            v[row] = c[idx] - u[col];
-            generation[col] = -2;
+            idxs[row] = idx;
+            std::swap(col, p[row]);
+        } while (row != root);
+        for (size_type i = 0; i < marked_counter; i++) {
+            const auto col = marked_cols[i];
+            u[col] += distance[col] - lsap;
         }
     }
 }
@@ -435,36 +577,12 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL);
 
 
-template <typename ValueType, typename IndexType>
-void update_dual_vectors(std::shared_ptr<const DefaultExecutor> exec,
-                         size_type num_rows, const IndexType* row_ptrs,
-                         const IndexType* col_idxs,
-                         const Array<IndexType>& permutation,
-                         Array<ValueType>& workspace)
-{
-    auto nnz = row_ptrs[num_rows];
-    const auto p = permutation.get_const_data();
-    auto c = workspace.get_data();
-    auto u = c + nnz;
-    auto v = u + num_rows;
-    for (size_type row = 0; row < num_rows; row++) {
-        if (p[row] != -1) {
-            auto col = p[row];
-            auto idx = row_ptrs[row];
-            while (col_idxs[idx] != col) idx++;
-            v[row] = c[idx] - u[col];
-        }
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_MC64_UPDATE_DUAL_VECTORS_KERNEL);
-
-
 template <typename ValueType, typename IndexType>
 void compute_scaling(std::shared_ptr<const DefaultExecutor> exec,
                      const matrix::Csr<ValueType, IndexType>* mtx,
-                     Array<remove_complex<ValueType>>& workspace,
+                     const array<remove_complex<ValueType>>& workspace,
+                     const array<IndexType>& permutation,
+                     const array<IndexType>& parents,
                      gko::reorder::reordering_strategy strategy,
                      gko::matrix::Diagonal<ValueType>* row_scaling,
                      gko::matrix::Diagonal<ValueType>* col_scaling)
@@ -476,44 +594,27 @@ void compute_scaling(std::shared_ptr<const DefaultExecutor> exec,
     const auto row_ptrs = mtx->get_const_row_ptrs();
     const auto col_idxs = mtx->get_const_col_idxs();
     const auto values = mtx->get_const_values();
-    auto weights = workspace.get_data();
-    auto u = weights + nnz;
-    auto v = u + num_rows;
+    const auto weights = workspace.get_const_data();
+    const auto u = weights + nnz;
+    const auto m = u + 2 * num_rows;
+    const auto p = permutation.get_const_data();
+    const auto idxs = parents.get_const_data() + 4 * num_rows;
     auto rv = row_scaling->get_values();
     auto cv = col_scaling->get_values();
 
-    remove_complex<ValueType> minu, maxu, minv, maxv;
-    minu = inf;
-    minv = inf;
-    maxu = -inf;
-    maxv = -inf;
-
-    if (strategy == gko::reorder::reordering_strategy::max_diagonal_product) {
-        for (size_type i = 0; i < num_rows; i++) {
-            if (u[i] < minu) minu = u[i];
-            if (u[i] > maxu) maxu = u[i];
-            if (v[i] < minv) minv = v[i];
-            if (v[i] > maxv) maxv = v[i];
-        }
-        auto scale = (std::min(minu, -maxv) + std::max(maxu, -minv)) / 2.;
+    if (strategy == gko::reorder::reordering_strategy::max_diagonal_product ||
+        strategy ==
+            gko::reorder::reordering_strategy::max_diagonal_product_fast) {
+        // auto exp2_ = strategy ==
+        // gko::reorder::reordering_strategy::max_diagonal_product_fast
+        //     ? [](remove_complex<ValueType> a) { return fastexp2(a); }
+        //     : [](remove_complex<ValueType> a) { return std::exp2(a); };
         for (size_type i = 0; i < num_rows; i++) {
-            remove_complex<ValueType> u_val = std::exp2(u[i] - scale);
-            remove_complex<ValueType> v_val = std::exp2(v[i] + scale);
+            const remove_complex<ValueType> u_val = std::exp2(u[i]);
+            const remove_complex<ValueType> v_val =
+                weights[idxs[i]] - u[p[i]] - m[i];
             cv[i] = ValueType{u_val};
-            rv[i] = ValueType{v_val};
-        }
-
-        for (size_type row = 0; row < num_rows; row++) {
-            const auto row_begin = row_ptrs[row];
-            const auto row_end = row_ptrs[row + 1];
-            auto row_max = zero<remove_complex<ValueType>>();
-            for (size_type idx = row_begin; idx < row_end; idx++) {
-                const auto abs_v = abs(values[idx]);
-                if (row_max < abs_v) {
-                    row_max = abs_v;
-                }
-            }
-            rv[row] /= row_max;
+            rv[i] = ValueType{std::exp2(v_val)};
         }
     } else {
         for (size_type i = 0; i < num_rows; i++) {

From 70580abdf0f9bdaf976f4ac1ded833f9403ace69 Mon Sep 17 00:00:00 2001
From: Fritz Goebel <fritz.goebel@kit.edu>
Date: Fri, 9 Sep 2022 10:00:21 -0400
Subject: [PATCH 553/583] Fix reference kernel tests to current version

---
 core/reorder/mc64.cpp                     |    16 -
 matrices/test/nontrivial_mc64_example.mtx | 29327 +++++---------------
 matrices/test/nontrivial_mc64_result.mtx  | 29315 +++++--------------
 reference/reorder/mc64_kernels.cpp        |   261 +-
 reference/test/reorder/mc64_kernels.cpp   |   302 +-
 5 files changed, 15533 insertions(+), 43688 deletions(-)

diff --git a/core/reorder/mc64.cpp b/core/reorder/mc64.cpp
index ef7cbd3af29..530642d1d7f 100644
--- a/core/reorder/mc64.cpp
+++ b/core/reorder/mc64.cpp
@@ -85,24 +85,15 @@ void Mc64<ValueType, IndexType>::generate(std::shared_ptr<const Executor>& exec,
     const auto row_ptrs = mtx->get_const_row_ptrs();
     const auto col_idxs = mtx->get_const_col_idxs();
 
-    // auto tic = std::chrono::high_resolution_clock::now();
     exec->run(mc64::make_initialize_weights(mtx.get(), workspace,
                                             parameters_.strategy));
-    // auto toc = std::chrono::high_resolution_clock::now();
-    // std::chrono::duration<double> duration = toc - tic;
-    // std::cout << "INIT: " << duration.count() << std::endl;
 
-    // tic = std::chrono::high_resolution_clock::now();
     array<IndexType> parents{exec, 6 * num_rows};
     parents.fill(0);
     exec->run(mc64::make_initial_matching(num_rows, row_ptrs, col_idxs,
                                           workspace, permutation,
                                           inv_permutation, parents));
-    // toc = std::chrono::high_resolution_clock::now();
-    // duration = toc - tic;
-    // std::cout << "INITIAL MATCHING: " << duration.count() << std::endl;
 
-    // tic = std::chrono::high_resolution_clock::now();
     addressable_priority_queue<remove_complex<ValueType>, IndexType, 2> Q{};
     std::vector<IndexType> q_j{};
     const auto unmatched = parents.get_data() + 5 * num_rows;
@@ -115,9 +106,6 @@ void Mc64<ValueType, IndexType>::generate(std::shared_ptr<const Executor>& exec,
                 inv_permutation, root, parents, Q, q_j));
         root = unmatched[++um];
     }
-    // toc = std::chrono::high_resolution_clock::now();
-    // duration = toc - tic;
-    // std::cout << "SAP: " << duration.count() << std::endl;
 
     permutation_ = std::move(share(PermutationMatrix::create(
         exec, system_matrix->get_size(), permutation,
@@ -127,13 +115,9 @@ void Mc64<ValueType, IndexType>::generate(std::shared_ptr<const Executor>& exec,
                                   inv_permutation, matrix::column_permute)));
     row_scaling_ = std::move(DiagonalMatrix::create(exec, num_rows));
     col_scaling_ = std::move(DiagonalMatrix::create(exec, num_rows));
-    // tic = std::chrono::high_resolution_clock::now();
     exec->run(mc64::make_compute_scaling(
         mtx.get(), workspace, permutation, parents, parameters_.strategy,
         row_scaling_.get(), col_scaling_.get()));
-    // toc = std::chrono::high_resolution_clock::now();
-    // duration = toc - tic;
-    // std::cout << "SCALING: " << duration.count() << std::endl;
 }
 
 
diff --git a/matrices/test/nontrivial_mc64_example.mtx b/matrices/test/nontrivial_mc64_example.mtx
index 7ab00292af3..185d4af98d0 100644
--- a/matrices/test/nontrivial_mc64_example.mtx
+++ b/matrices/test/nontrivial_mc64_example.mtx
@@ -1,21633 +1,7696 @@
 %%MatrixMarket matrix coordinate real general
-% Generated 31-Mar-2022
-4644 4644 21630
-1 1  7.634142339967899
-2871 1  27.83662668591554
-2 2  7.682947782646563
-2873 2  27.53589835708392
-3 3  7.511127670325325
-2875 3  28.36062562360079
-4 4  7.616777479124308
-2877 4  28.21925391958385
-5 5  7.904524652115928
-2879 5  25.95736762236096
-6 6  0.6372570530657963
-2907 6  174.0181709119047
-7 7  1.489891484967641
-2909 7  209.3244594426036
-8 8  1.721279346380951
-2911 8  207.0714997893401
-9 9  1.502630991262102
-2913 9  207.1774742583954
-10 10  1.800755852556451
-2915 10  211.1161536132544
-11 11  1.806500886627965
-2917 11  210.6281765088935
-12 12  1.511557830272904
-2919 12  210.3058251130699
-13 13  1706.387673066813
-2925 13  5.68042322902059
-14 14  1519.789256663639
-2927 14  7.273812219891007
-15 15  1964.688958945828
-2929 15  4.409102144996014
-16 16  1930.16084845011
-2931 16  4.550892940867493
-17 17  1739.177574774503
-2953 17  7.251786459051289
-18 18  1993.672518522061
-2955 18  5.370237884159537
-19 19  2033.975816484419
-2957 19  5.206824237539474
-20 20  0.6518801375883284
-2961 20  156.4648269651306
-21 21  9.246170384031867
-3023 21  355.1643329947775
-22 22  10.51505982454673
-3025 22  371.7856140819339
-23 23  11.13731534458806
-3027 23  359.0699905626743
-24 24  12.41421032754424
-3043 24  301.804028950772
-25 25  12.62655259772402
-3045 25  293.7476901038987
-26 26  2.955820583202132
-3075 26  48.14115697103193
-27 27  4.606663203742626
-3077 27  298.8536823264385
-28 28  4.003331285236047
-3079 28  280.2402751866119
-29 29  4.713333127333971
-3081 29  292.0666383956488
-30 30  4.721746019232691
-3083 30  291.2499991968344
-31 31  2700.355402880154
-3095 31  3.449093081313446
-32 32  2112.401326080936
-3101 32  4.629564668789765
-33 33  2113.116196466089
-3103 33  4.560258670279392
-34 34  1982.84872124521
-3105 34  5.09241050532172
-35 35  2019.435299548997
-3107 35  5.010651825296775
-36 36  1961.301040943574
-3109 36  5.242946874727973
-37 37  1973.933368761423
-3111 37  5.144271915952205
-38 38  1962.712428603474
-3113 38  5.301254453954041
-39 39  7.39303021163466
-3137 39  32.64302008813611
-40 40  4.505299985382996
-3139 40  36.65251244868855
-41 41  2.089424079433414
-3151 41  0.2816487510175935
-42 42  2478.696448770564
-3165 42  4.071309785349727
-43 43  2355.199865364614
-3167 43  4.307158971381956
-44 44  0.06836567688636581
-2872 44  9.664803062187673
-45 45  0.07421036894812016
-2874 45  8.906846167844099
-46 46  0.06639716495725831
-2876 46  9.821457822989728
-47 47  0.07310238478762371
-2878 47  9.098592168210979
-48 48  0.006121912023818472
-2880 48  104.3552046887556
-49 49  0.007404911424217593
-2908 49  87.08836569250704
-50 50  0.003163038956096149
-2910 50  194.4659454572846
-51 51  0.003137362416404567
-2912 51  196.1842421835629
-52 52  0.003212286755258518
-2914 52  192.0789529667144
-53 53  0.003122287288006135
-2916 53  198.0844573442002
-54 54  0.003137976363508148
-2918 54  197.7267055670312
-55 55  0.003243756688600629
-2920 55  194.7139670039287
-56 56  0.006099130113976053
-2926 56  102.7471989517859
-57 57  0.006994029364087638
-2928 57  92.37138271390904
-58 58  0.003172543912365461
-2930 58  190.7684806838747
-59 59  0.003207791727140831
-2932 59  188.8552198757506
-60 60  0.01563693906268506
-2954 60  43.71332794263182
-61 61  0.1229847515375557
-2956 61  5.289532905230916
-62 62  0.124952871120281
-2958 62  5.212039501524443
-63 63  0.004730472910791034
-2962 63  134.487235958624
-64 64  0.001669661722623266
-3024 64  373.243500456582
-65 65  0.00146788692624887
-3026 65  423.2601696305773
-66 66  0.001887933450989816
-3028 66  332.6331657750853
-67 67  0.0009471235536763365
-3044 67  634.1949520085401
-68 68  0.0009524976247331163
-3046 68  629.0610030966138
-69 69  0.006996212718199472
-3076 69  93.45345366175701
-70 70  0.001823414688910915
-3078 70  332.535475221442
-71 71  0.001971755901342216
-3080 71  306.4518132610639
-72 72  0.001848859828192644
-3082 72  330.915251155541
-73 73  0.001852564728474546
-3084 73  330.1517854407101
-74 74  0.001255327403956955
-3096 74  478.2477416883492
-75 75  0.08508912507203135
-3102 75  7.67469753523842
-76 76  0.002761123807690614
-3104 76  227.0111876369949
-77 77  0.06748473012759557
-3106 77  9.553096348683425
-78 78  0.08071276598199044
-3108 78  8.110850832086975
-79 79  0.06258856552997076
-3110 79  10.39623748337741
-80 80  0.06694310514459079
-3112 80  9.643129255484244
-81 81  0.06630897231111525
-3114 81  9.910899138700378
-82 82  0.004114811156393923
-3138 82  154.3592785305395
-83 83  0.005786507825914471
-3140 83  111.6726634543674
-84 84  0.0009684222506967275
-3152 84  637.526528274368
-85 85  0.00185850803805367
-3166 85  351.8137876203253
-86 86  0.0016984477759088
-3168 86  375.2293748346238
-87 87  9.704014056376309
-2769 87  0.191132249227754
-2903 87  0.5188471304468957
-88 88  9.201424920484722
-2770 88  0.188455821489309
-2981 88  0.5765945406640466
-89 89  9.825681234198756
-2771 89  0.1928444447454806
-2983 89  0.5085834639940715
-90 90  8.948533141585585
-2772 90  0.1839664735152792
-3001 90  0.6653230544623372
-91 91  10.10766013614647
-2773 91  0.1930767719574465
-3003 91  0.4974529583688729
-92 92  9.284386162301578
-2774 92  0.188272051984764
-3067 92  0.5674518919590757
-2769 93  2.047051335767824
-2770 94  2.067610228210245
-2771 95  2.048523350808737
-2772 96  2.055941963685915
-2773 97  2.03082325966889
-2774 98  2.065517700723633
-99 99  23193.12282089102
-2775 99  3.290947783257571e-05
-100 100  290.0726879116941
-2777 100  0.002413822826166146
-101 101  9850.593471872256
-2779 101  7.669175073506544e-05
-102 102  305.4469733715024
-2781 102  0.002334975615736897
-103 103  9289.614985907097
-2783 103  7.862767004737615e-05
-104 104  290.469173172231
-2785 104  0.002444212485053505
-105 105  22741.59251705795
-2787 105  3.312742415530926e-05
-106 106  278.1022210270536
-2789 106  0.002446060487080032
-107 107  43597.72579341551
-2791 107  1.85390023051578e-05
-108 108  265.9468096443994
-2793 108  0.002547216874358393
-109 109  43152.99093848162
-2795 109  1.897510509495169e-05
-110 110  323.1812191159318
-2797 110  0.002159979334399842
-111 111  300.2822908958456
-2799 111  0.002343469592596242
-112 112  8964.772792986272
-2801 112  6.912678178860192e-05
-113 113  8625.780071398047
-2803 113  8.227547212772366e-05
-114 114  270.3728406575445
-2805 114  0.002514571520234397
-115 115  73148.51820798231
-2807 115  1.36295453496316e-05
-116 116  310.0130973953905
-2809 116  0.002351560480566124
-117 117  314.6579392372382
-2811 117  0.002323346228192606
-118 118  309.9421434232158
-2813 118  0.00233327829315433
-119 119  291.7156499391643
-2815 119  0.002498796266540171
-120 120  25443.3699441756
-2817 120  3.27326552556781e-05
-121 121  308.720038531219
-2819 121  0.002315859228091882
-122 122  304.7098242048223
-2821 122  0.002331553298144831
-123 123  9400.334047263112
-2823 123  7.851640521308691e-05
-124 124  310.0960936432602
-2825 124  0.002317790327734316
-125 125  23954.74482606583
-2827 125  3.083171649862372e-05
-126 126  298.8163876640679
-2829 126  0.002195503166803161
-127 127  8825.44218799768
-2831 127  7.861266506825708e-05
-128 128  265.1893264657563
-2833 128  0.002549007835364769
-129 129  23381.6913657311
-2835 129  3.651865383153894e-05
-130 130  314.8697550604812
-2837 130  0.002311132186057771
-131 131  287.8240827954845
-2839 131  0.002512265623002052
-132 132  9172.962949267778
-2841 132  7.556051646964701e-05
-133 133  277.4265315485119
-2843 133  0.002461879610445951
-134 134  21877.12356216064
-2845 134  3.850335510777407e-05
-135 135  295.9779339618702
-2847 135  0.002492200397856114
-136 136  273.4715243762794
-2849 136  0.002590053989662534
-137 137  11004.89040689185
-2851 137  6.694842994227576e-05
-138 138  303.8886375499986
-2853 138  0.002281699642120062
-139 139  9910.180342092066
-2855 139  6.668642108710151e-05
-140 140  2507.130654149849
-2857 140  0.0002425793965424298
-141 141  2379.264038858459
-2859 141  0.0002624552599551592
-142 142  2552.34477962965
-2861 142  0.0002482857056241695
-143 143  21740.78205992
-2863 143  3.100849114586814e-05
-144 144  8484.697120711497
-2865 144  7.880071041324561e-05
-145 145  2574.29059921975
-2867 145  0.0002319713008317471
-146 146  87898.23892340138
-2869 146  1.03968750016697e-05
-147 147  781.3813205000025
-2871 147  0.0009765914152797697
-148 148  789.1705379670939
-2873 148  0.0009675053188040424
-149 149  760.5050421247049
-2875 149  0.0009896578163032042
-150 150  777.0578480793231
-2877 150  0.0009881347343625977
-151 151  796.5132149928559
-2879 151  0.0009219840688063665
-152 152  9186.727904847106
-2881 152  6.748223267049977e-05
-153 153  68048.43940125236
-2883 153  1.096721977986664e-05
-154 154  2440.961303257629
-2885 154  0.0002765958107932558
-155 155  2506.64730670822
-2887 155  0.0002689427432129922
-156 156  9009.68081380378
-2889 156  7.678999350825993e-05
-157 157  2369.480857196066
-2891 157  0.0002662782114042295
-158 158  2485.515788576279
-2893 158  0.0002479660168346599
-159 159  2482.852251377447
-2895 159  0.0002585780513565103
-160 160  3083.195058496362
-2897 160  0.0002133964918098159
-161 161  3154.039974048575
-2899 161  0.0002015674054957784
-162 162  7998.419229928728
-2901 162  8.489615229574495e-05
-163 163  163.7095819334633
-2903 163  0.004275107728950991
-164 164  157104.7967367877
-2905 164  4.587849448195336e-06
-165 165  1135.700901010377
-2907 165  0.0006376528809592441
-166 166  1303.177541053115
-2909 166  0.0005334869199648178
-167 167  1381.206266334915
-2911 167  0.00050533641056909
-168 168  1319.196799327528
-2913 168  0.0005289936433977529
-169 169  1389.673876486326
-2915 169  0.0005052051026997433
-170 170  1396.329498175133
-2917 170  0.000504438897809328
-171 171  1329.329121500652
-2919 171  0.0005374455412895908
-172 172  2482.644118049607
-2921 172  0.0002525862906039789
-173 173  78364.67290626559
-2923 173  9.934430498701247e-06
-174 174  563.3129342535091
-2925 174  0.001278228660117016
-175 175  467.4796134729326
-2927 175  0.001575520226438138
-176 176  676.6262903819455
-2929 176  0.00103799158743649
-177 177  658.4219150969093
-2931 177  0.00106627247492575
-178 178  2386.256089199208
-2933 178  0.000261841749692465
-179 179  10122.72114522154
-2935 179  6.472756195288161e-05
-180 180  2408.264399844404
-2937 180  0.0002575738198962803
-181 181  9016.072947913897
-2939 181  7.772147814560617e-05
-182 182  2384.130599354269
-2941 182  0.0002699459866081826
-183 183  2930.784068412102
-2943 183  0.000230584620055753
-184 184  24854.53469433011
-2945 184  3.113361531407949e-05
-185 185  2713.558123173743
-2947 185  0.0002117592014781903
-186 186  22384.9703722429
-2949 186  3.083953604217996e-05
-187 187  37591.99635748944
-2951 187  2.169737691949768e-05
-188 188  480.2866720305411
-2953 188  0.001643729524906235
-189 189  598.8125130004696
-2955 189  0.001272395619536466
-190 190  615.6669758246234
-2957 190  0.001241043851549547
-191 191  19907.32657900824
-2959 191  3.437890099741047e-05
-192 192  1229.495717041241
-2961 192  0.0005900815905843416
-193 193  2394.224094694279
-2963 193  0.0002533575753207606
-194 194  8542.19478243563
-2965 194  7.547399481258875e-05
-195 195  2537.716111849562
-2967 195  0.0002452042454861101
-196 196  2584.710838446063
-2969 196  0.0002328687727934642
-197 197  2394.105306246051
-2971 197  0.0002595392277337602
-198 198  38203.98017612936
-2973 198  1.924427533902808e-05
-199 199  43223.66169545515
-2975 199  1.845055124298193e-05
-200 200  57524.40780656567
-2977 200  1.271450383568884e-05
-201 201  21031.32553155841
-2979 201  3.429173053633755e-05
-202 202  153.4775821430539
-2981 202  0.004704415141495417
-203 203  167.6097055627493
-2983 203  0.004200021129835351
-204 204  2219.850347789275
-2985 204  0.0002910577290827514
-205 205  9415.284820832747
-2987 205  7.220011983011933e-05
-206 206  2652.181667918236
-2989 206  0.000224096727052125
-207 207  2337.450062338238
-2991 207  0.000290626767416135
-208 208  3040.129956153691
-2993 208  0.0002284567159346352
-209 209  2879.971182899386
-2995 209  0.0002221752697164552
-210 210  34863.87531051019
-2997 210  2.138755265132116e-05
-211 211  20672.93143116704
-2999 211  3.369714148784347e-05
-212 212  144.982487679953
-3001 212  0.005390424631237284
-213 213  171.8206052909465
-3003 213  0.00413020803099228
-214 214  2589.968651060607
-3005 214  0.0002345475086669507
-215 215  22086.99006573134
-3007 215  3.152944354031921e-05
-216 216  9185.197575362357
-3009 216  7.021648346571437e-05
-217 217  2521.657080892551
-3011 217  0.0002629900627038044
-218 218  2664.335906809927
-3013 218  0.0002593440111466169
-219 219  20027.99610704446
-3015 219  3.244177482910733e-05
-220 220  8386.236525932127
-3017 220  7.768080563995795e-05
-221 221  91337.19777237749
-3019 221  7.320703537932268e-06
-222 222  38627.23804660944
-3021 222  1.858157080718637e-05
-223 223  1700.663563867754
-3023 223  0.000414588079576942
-224 224  1718.371127510188
-3025 224  0.0004090754753228922
-225 225  1817.500268542915
-3027 225  0.0003948065156009082
-226 226  44742.81279976114
-3029 226  1.605819002986071e-05
-227 227  2317.164855676876
-3031 227  0.0002744085827181243
-228 228  2024.959772953592
-3033 228  0.000315555720187749
-229 229  2638.330001101275
-3035 229  0.0002451618647443908
-230 230  2832.44595088773
-3037 230  0.0002235717635631153
-231 231  106218.309786511
-3039 231  6.355058706178921e-06
-232 232  40059.51720243262
-3041 232  1.761027469906141e-05
-233 233  2051.813809527901
-3043 233  0.0003339577621497702
-234 234  2104.19031995182
-3045 234  0.0003263861015757376
-235 235  2548.612960129405
-3047 235  0.000261318627407984
-236 236  2308.294366544839
-3049 236  0.0002593250911130165
-237 237  2294.835695970653
-3051 237  0.0002680207291845583
-238 238  2395.306423284108
-3053 238  0.0002673059275966173
-239 239  25327.72202184154
-3055 239  3.086294218884017e-05
-240 240  2807.534365036791
-3057 240  0.0002328109278869417
-241 241  2303.025653362471
-3059 241  0.0002641904594502731
-242 242  2301.595341465212
-3061 242  0.0002683021695836888
-243 243  2562.526180215788
-3063 243  0.0002456464353693151
-244 244  7658.260646304968
-3065 244  8.679696529177102e-05
-245 245  153.9556113409648
-3067 245  0.004650627025322416
-246 246  2491.562485472203
-3069 246  0.0002662216516774879
-247 247  83873.8237765977
-3071 247  7.51138059276294e-06
-248 248  21481.12879436501
-3073 248  3.004103046734714e-05
-249 249  586.6024214798779
-3075 249  0.001207340810307569
-250 250  1482.582038785604
-3077 250  0.0004614895303912071
-251 251  1475.017635071814
-3079 251  0.0004630428688442491
-252 252  1532.019499295367
-3081 252  0.0004532491959284241
-253 253  1535.865409908915
-3083 253  0.0004521454893938125
-254 254  11134.07335309051
-3085 254  5.598515203854433e-05
-255 255  2295.970922359987
-3087 255  0.0002533575753207605
-256 256  23914.50805406844
-3089 256  2.985054127319469e-05
-257 257  11182.38464704362
-3091 257  6.412379987319553e-05
-258 258  11880.39612994923
-3093 258  5.25703542360426e-05
-259 259  778.3811818836432
-3095 259  0.0009048698354838305
-260 260  2412.3828459666
-3097 260  0.0002540711174143024
-261 261  233043.5749752625
-3099 261  4.246187106963387e-06
-262 262  689.9800290524397
-3101 262  0.001114727656952293
-263 263  664.2166269024117
-3103 263  0.001101692355154358
-264 264  628.9767263176474
-3105 264  0.001203469082965028
-265 265  646.4625401185438
-3107 265  0.001189571893627759
-266 266  618.291818361393
-3109 266  0.001235207912293277
-267 267  624.2772372691983
-3111 267  0.001213907246388981
-268 268  617.8674156238189
-3113 268  0.001248064342159848
-269 269  2490.198709830362
-3115 269  0.0002455808494513976
-270 270  2570.720972896223
-3117 270  0.0002583346492311023
-271 271  2391.932246718013
-3119 271  0.0002662424078920341
-272 272  7749.826281920761
-3121 272  8.403976013422934e-05
-273 273  2693.891793716426
-3123 273  0.0002445586324395708
-274 274  2578.319392087782
-3125 274  0.0002479871234677247
-275 275  9156.784283533159
-3127 275  7.669511520069878e-05
-276 276  9203.785525373036
-3129 276  6.873824789383105e-05
-277 277  8168.255838690445
-3131 277  7.621293902825032e-05
-278 278  21521.12326017982
-3133 278  3.384974683660592e-05
-279 279  30936.47634157103
-3135 279  2.096412860650296e-05
-280 280  612.1568885398599
-3137 280  0.001117288914086055
-281 281  664.4128813426657
-3139 281  0.001054854527421347
-282 282  3083.001466197481
-3141 282  0.0002196191015981265
-283 283  2483.606073932579
-3143 283  0.0002467932626555914
-284 284  9133.850676003813
-3145 284  7.450142895904204e-05
-285 285  21668.40845161683
-3147 285  2.829020688581102e-05
-286 286  19306.73977728891
-3149 286  3.417957292668527e-05
-287 287  203.8704711144181
-3151 287  0.003283370210303194
-288 288  2528.187727514318
-3153 288  0.0002518348855609722
-289 289  2658.051322543227
-3155 289  0.0002411624189112242
-290 290  9400.933831849899
-3157 290  7.539218264852078e-05
-291 291  68612.08734514077
-3159 291  1.200272275159141e-05
-292 292  9362.181704426159
-3161 292  6.96324664263306e-05
-293 293  49168.95055798243
-3163 293  1.522298845113386e-05
-294 294  742.2463382517823
-3165 294  0.001033902935611
-295 295  687.7795084913223
-3167 295  0.001080160651772333
-296 296  2741.147387294496
-3169 296  0.0002274052897093193
-297 297  8322.969007704822
-3171 297  8.191795508733727e-05
-298 298  2587.207807507694
-3173 298  0.0002440296582495396
-299 299  1145.85221095573
-2775 299 -0.0006120966867165351
-300 300  14.33112650433899
-2777 300 -0.0448953457699415
-301 301  486.4128232147492
-2779 301 -0.001427149194237635
-302 302  15.08269212747152
-2781 302 -0.04345125747104736
-303 303  458.0163772397145
-2783 303 -0.00146533094654159
-304 304  14.32144407378891
-2785 304 -0.04555077314529363
-305 305  1123.954222596566
-2787 305 -0.000615931807560431
-306 306  13.74494336328175
-2789 306 -0.04547802485000561
-307 307  2154.420725520893
-2791 307 -0.0003447400910227232
-308 308  13.14252906164904
-2793 308 -0.04736458693151107
-309 309  2128.364763809643
-2795 309 -0.0003535062191017872
-310 310  15.93975248024778
-2797 310 -0.04024041627283683
-311 311  14.81037799760247
-2799 311 -0.04365875661862523
-312 312  441.9452029941102
-2801 312 -0.001288430244282178
-313 313  425.2540655909152
-2803 313 -0.001533427005586317
-314 314  13.32996201180178
-2805 314 -0.04686420874278824
-315 315  3612.481439844189
-2807 315 -0.0002535975426531655
-316 316  15.3101703230605
-2809 316 -0.04375416605988374
-317 317  15.53956096680326
-2811 317 -0.04322919930108315
-318 318  15.30669017625731
-2813 318 -0.04341399965950225
-319 319  14.40660969832466
-2815 319 -0.04649354450491961
-320 320  1255.250651690767
-2817 320 -0.0006096456996633435
-321 321  15.23074061795551
-2819 321 -0.04313281610466401
-322 322  15.03291787836028
-2821 322 -0.04342507632571436
-323 323  463.4166747265667
-2823 323 -0.001463440545769149
-324 324  15.28710372824584
-2825 324 -0.04320045332535793
-325 325  1181.787998451734
-2827 325 -0.0005742502993288677
-326 326  14.74189587610842
-2829 326 -0.04089185063854536
-327 327  436.0436830309747
-2831 327 -0.001462067874606355
-328 328  13.1032663932118
-2833 328 -0.04740439876751025
-329 329  1153.588991098119
-2835 329 -0.0006801292745497548
-330 330  15.5348069104894
-2837 330 -0.04304284573387514
-331 331  14.20051396744854
-2839 331 -0.04678851497708159
-332 332  452.8201087433384
-2841 332 -0.001406493806589446
-333 333  13.69530353316471
-2843 333 -0.04582496681635583
-334 334  1079.583685111797
-2845 334 -0.0007169453104404059
-335 335  14.60584817969457
-2847 335 -0.04640543643374314
-336 336  13.49566798447774
-2849 336 -0.04822607065271291
-337 337  543.6090676882716
-2851 337 -0.001245392720896029
-338 338  15.01121332028321
-2853 338 -0.04244471017584212
-339 339  488.8730191412681
-2855 339 -0.001242146227443936
-340 340  123.8192369631256
-2857 340 -0.004513431075080159
-341 341  117.4479842729037
-2859 341 -0.004885512119259889
-342 342  126.0740460466629
-2861 342 -0.004618836123996159
-343 343  1070.773655876053
-2863 343 -0.0005784813977331862
-344 344  417.8531720688395
-2865 344 -0.001470189001474494
-345 345  127.0201063189659
-2867 345 -0.004319878341018758
-346 346  4334.861182260374
-2869 346 -0.0001937112443097265
-347 347  38.53524499250479
-2871 347 -0.01819554706699653
-348 348  38.91935327325632
-2873 348 -0.01802624915195298
-349 349  37.50569193038142
-2875 349 -0.01843899618102551
-350 350  38.32199451486851
-2877 350 -0.01841060981384306
-351 351  39.28131634729812
-2879 351 -0.01717818552570617
-352 352  453.3457682171266
-2881 352 -0.001256535981144061
-353 353  3353.205891530642
-2883 353 -0.0002044990454309726
-354 354  120.2708188977464
-2885 354 -0.005158007404572995
-355 355  123.795920825142
-2887 355 -0.005003923514694866
-356 356  444.4319572952037
-2889 356 -0.001430404585463677
-357 357  117.050161681528
-2891 357 -0.004953175864922998
-358 358  122.749429900013
-2893 358 -0.004613733705409966
-359 359  122.3191147394357
-2895 359 -0.004822604611468701
-360 360  152.2888762275243
-2897 360 -0.003969961874120492
-361 361  155.6158716196742
-2899 361 -0.003753931548661254
-362 362  394.1893332761348
-2901 362 -0.001582798352723314
-363 363  8.066996571152382
-2903 363 -0.07971634771599438
-364 364  7750.68019761282
-2905 364 -8.544969360670532e-05
-365 365  56.02367519001429
-2907 365 -0.01187757040352815
-366 366  64.28796934260804
-2909 366 -0.009936857146071412
-367 367  68.13825085685956
-2911 367 -0.009412379775864633
-368 368  65.07816511953543
-2913 368 -0.009853159585355355
-369 369  68.55629934823999
-2915 369 -0.009409866734849158
-370 370  68.88470455447471
-2917 370 -0.0093956000093805
-371 371  65.57800954652211
-2919 371 -0.01001058661641098
-372 372  122.4736820878048
-2921 372 -0.004704700041595145
-373 373  3871.005158529259
-2923 373 -0.0001848018496588619
-374 374  27.82616320379587
-2925 374 -0.02377781766418163
-375 375  23.09224867559405
-2927 375 -0.02930808378764303
-376 376  33.42354453953602
-2929 376 -0.01930888852136976
-377 377  32.52426540054705
-2931 377 -0.01983496473725442
-378 378  117.6539243249208
-2933 378 -0.004879689278140042
-379 379  499.0316104781022
-2935 379 -0.001206424923433159
-380 380  118.7306874076851
-2937 380 -0.00480049537954443
-381 381  444.8239842539966
-2939 381 -0.001447510061668577
-382 382  117.6379726154468
-2941 382 -0.005027041570114009
-383 383  144.7685885662768
-2943 383 -0.004289483096781133
-384 384  1228.301080704984
-2945 384 -0.0005788988446378421
-385 385  133.9324557241578
-2947 385 -0.003942330393462493
-386 386  1104.950980990364
-2949 386 -0.0005740898096234878
-387 387  1852.66693787202
-2951 387 -0.0004045233295404804
-388 388  23.6699605173663
-2953 388 -0.03064584960083465
-389 389  29.51160995031188
-2955 389 -0.02372239346799022
-390 390  30.34228698549593
-2957 390 -0.02313788584603573
-391 391  982.0310227902145
-2959 391 -0.0006403700785130899
-392 392  60.64916344347138
-2961 392 -0.01099169507116858
-393 393  118.0487075627654
-2963 393 -0.004721519459519589
-394 394  420.6283889007589
-2965 394 -0.001408304602554578
-395 395  125.3552192945955
-2967 395 -0.004561366275467525
-396 396  127.6821449462198
-2969 396 -0.004331722370505195
-397 397  118.2703291496153
-2971 397 -0.004827687097720198
-398 398  1884.280380407555
-2973 398 -0.000358519151350955
-399 399  2135.905795131353
-2975 399 -0.0003430998893339208
-400 400  2834.129295273892
-2977 400 -0.000237119492924652
-401 401  1036.242572144714
-2979 401 -0.0006394846353445878
-402 402  7.559163235294251
-2981 402 -0.08776236008052875
-403 403  8.256040854579432
-2983 403 -0.07834503334779185
-404 404  109.4230691419269
-2985 404 -0.005425436839739358
-405 405  464.4929993867451
-2987 405 -0.00134475543576407
-406 406  131.0344081076233
-2989 406 -0.004167960315020639
-407 407  115.4088526547606
-2991 407 -0.005408777291702704
-408 408  150.2295479904903
-2993 408 -0.004248277560160953
-409 409  142.0687337544492
-2995 409 -0.004138430725600249
-410 410  1718.816210617059
-2997 410 -0.0003986119327895535
-411 411  1019.6146170834
-2999 411 -0.0006277791128440753
-412 412  7.146967608853739
-3001 412 -0.1004752481225429
-413 413  8.470410604423941
-3003 413 -0.07698122934126954
-414 414  127.9283806899807
-3005 414 -0.004363396788473575
-415 415  1091.019362262835
-3007 415 -0.0005865256311667257
-416 416  453.7901227575161
-3009 416 -0.00130599598075002
-417 417  124.5983796929271
-3011 417 -0.004890840474051387
-418 418  131.597258047072
-3013 418 -0.004824851841231687
-419 419  985.8145903653975
-3015 419 -0.0006055799728042041
-420 420  412.8368018348012
-3017 420 -0.001449863056352223
-421 421  4510.455970709824
-3019 421 -0.0001362209906944794
-422 422  1907.769973018723
-3021 422 -0.0003457132849898687
-423 423  83.95767739418511
-3023 423 -0.007716759589340684
-424 424  84.84694283917756
-3025 424 -0.007612824341018029
-425 425  89.74493701233533
-3027 425 -0.007347018696585064
-426 426  2205.835009863771
-3029 426 -0.0002992883659006699
-427 427  114.2682184392981
-3031 427 -0.005113009743909019
-428 428  99.85370657593273
-3033 428 -0.005879952690476989
-429 429  130.3812078382545
-3035 429 -0.004558675471144818
-430 430  139.9007887244113
-3037 430 -0.004159338793200366
-431 431  5244.69070994502
-3039 431 -0.0001182663255270315
-432 432  1977.952393739212
-3041 432 -0.000327730907480755
-433 433  101.3077721653154
-3043 433 -0.006215092193261671
-434 434  103.893900809328
-3045 434 -0.00607418045573903
-435 435  125.8316873006155
-3047 435 -0.004863454220574115
-436 436  113.8738312144944
-3049 436 -0.00483017161704053
-437 437  113.3300593831867
-3051 437 -0.004986977865485246
-438 438  118.3198838349706
-3053 438 -0.004972539521710323
-439 439  1251.603713614919
-3055 439 -0.0005739031613858375
-440 440  138.7507706737437
-3057 440 -0.004328790316093996
-441 441  113.5082552388623
-3059 441 -0.004925256200277052
-442 442  113.6139724875826
-3061 442 -0.00499437409528073
-443 443  126.5263177181465
-3063 443 -0.004571499010522023
-444 444  377.6554482136941
-3065 444 -0.001617281983965069
-445 445  7.589203851228609
-3067 445 -0.08668664125278241
-446 446  123.1635071742419
-3069 446 -0.004948899205940827
-447 447  4139.093293161161
-3071 447 -0.0001398607764914253
-448 448  1060.132899126409
-3073 448 -0.0005593298618830121
-449 449  28.94168285332436
-3075 449 -0.02248546792889855
-450 450  73.15845287950951
-3077 450 -0.008593508936690492
-451 451  72.78051062490582
-3079 451 -0.008622952094502315
-452 452  75.59799840548723
-3081 452 -0.008440059637270876
-453 453  75.78778823356824
-3083 453 -0.008419507258893049
-454 454  549.821224067692
-3085 454 -0.001041762786317972
-455 455  113.3815108355836
-3087 455 -0.004714351957937914
-456 456  1179.762350764617
-3089 456 -0.0005559936336606194
-457 457  552.3282883247791
-3091 457 -0.001192950010299268
-458 458  586.6729969649459
-3093 458 -0.0009782249711306297
-459 459  38.43767394981833
-3095 459 -0.01683773791286734
-460 460  119.1082915537506
-3097 460 -0.004728474636470728
-461 461  11495.65556624044
-3099 461 -7.909564314661077e-05
-462 462  34.03558160578945
-3101 462 -0.02076453503788485
-463 463  32.76456805081531
-3103 463 -0.02052180865954734
-464 464  31.02636275331026
-3105 464 -0.02241755093136628
-465 465  31.88893513368465
-3107 465 -0.02215869240460923
-466 466  30.49931855865703
-3109 466 -0.0230087751155384
-467 467  30.79457400417442
-3111 467 -0.02261199800075029
-468 468  30.47835926520514
-3113 468 -0.02324824654993116
-469 469  122.8400429483149
-3115 469 -0.004574455857323951
-470 470  126.8059434849797
-3117 470 -0.004812246867113213
-471 471  117.9689774903674
-3119 471 -0.004960285432200295
-472 472  382.0353592378098
-3121 472 -0.001566447111419446
-473 473  132.8819084313431
-3123 473 -0.004555614778039736
-474 474  127.2897949754821
-3125 474 -0.004615638198090777
-475 475  451.7109769126051
-3127 475 -0.00142856435158415
-476 476  453.7247226949373
-3129 476 -0.001281196436983179
-477 477  402.7241166015079
-3131 477 -0.001420343791668607
-478 478  1061.59534472576
-3133 478 -0.0006305375141651542
-479 479  1527.492235015241
-3135 479 -0.0003901485068014212
-480 480  30.21444112408203
-3137 480 -0.02080039878504665
-481 481  32.79787467315564
-3139 481 -0.0196355765396314
-482 482  152.0989067139187
-3141 482 -0.004090434994215512
-483 483  122.6147078326719
-3143 483 -0.004593389096329082
-484 484  450.8463856651576
-3145 484 -0.001386907437178993
-485 485  1066.01543186999
-3147 485 -0.0005283436953610114
-486 486  950.3229195310635
-3149 486 -0.000638010057411905
-487 487  10.02660338095689
-3151 487 -0.06133865450778468
-488 488  124.731975883077
-3153 488 -0.004690277097936834
-489 489  131.1365835736932
-3155 489 -0.004491600758461845
-490 490  463.8294545379384
-3157 490 -0.001404078290862924
-491 491  3390.317628862236
-3159 491 -0.0002232081554610602
-492 492  462.1630304839521
-3161 492 -0.001296142399366137
-493 493  2424.730464002316
-3163 493 -0.0002836445861316432
-494 494  36.60324381825318
-3165 494 -0.01926436789258804
-495 495  33.91730205023136
-3167 495 -0.02012629165347246
-496 496  135.43417876922
-3169 496 -0.004229346815530425
-497 497  410.4526076657893
-3171 497 -0.001526302574451327
-498 498  127.8105814429097
-3173 498 -0.004539146102095633
-499 499  26323.14081830639
-2776 499  2.969991746869057e-05
-500 500  332.6531461361987
-2778 500  0.002140785332416097
-501 501  12604.81812642821
-2780 501  6.257938167652743e-05
-502 502  350.3843414256796
-2782 502  0.002070345414517271
-503 503  11486.315472858
-2784 503  6.598718961816696e-05
-504 504  332.8493374117932
-2786 504  0.002169195438705725
-505 505  28019.26820123457
-2788 505  2.796488776981597e-05
-506 506  318.8795873653166
-2790 506  0.002169609219361102
-507 507  54799.79442360796
-2792 507  1.542323564781866e-05
-508 508  304.9976276535647
-2794 508  0.002258865500111695
-509 509  54003.50361334153
-2796 509  1.584972253664808e-05
-510 510  368.9056527147304
-2798 510  0.001923496708434376
-511 511  344.0225308872387
-2800 511  0.002080184625715104
-512 512  12240.66072400652
-2802 512  5.363623218723445e-05
-513 513  11369.8888764044
-2804 513  6.526480001887723e-05
-514 514  309.7875821354581
-2806 514  0.002231758090236919
-515 515  94787.22605972025
-2808 515  1.108814355667258e-05
-516 516  355.6555275880388
-2810 516  0.002084868727865555
-517 517  360.6022900582442
-2812 517  0.00206178424314167
-518 518  355.4337714610726
-2814 518  0.002069382114290299
-519 519  334.4517329597904
-2816 519  0.00221663957128552
-520 520  32832.4667624466
-2818 520  2.660882184061133e-05
-521 521  353.9194869404473
-2820 521  0.002054509126465163
-522 522  349.355465849854
-2822 522  0.00206825652931222
-523 523  12627.87425316374
-2824 523  6.153966619548327e-05
-524 524  354.8765898635783
-2826 524  0.002059372058053505
-525 525  31988.57268108342
-2828 525  2.43735293056645e-05
-526 526  342.219524224546
-2830 526  0.001949465632114935
-527 527  10832.87966443122
-2832 527  6.633863604418218e-05
-528 528  304.1009320395703
-2834 528  0.00226068439540678
-529 529  31702.22576392769
-2836 529  2.851329636555464e-05
-530 530  359.8809943352537
-2838 530  0.002055759561026759
-531 531  329.6921341161949
-2840 531  0.002230330410563787
-532 532  11450.97213019816
-2842 532  6.29130756763182e-05
-533 533  318.0042354608307
-2844 533  0.00218424136016743
-534 534  27709.67702660837
-2846 534  3.176046354497157e-05
-535 535  339.0527161384249
-2848 535  0.002212413586388379
-536 536  313.3476017870799
-2850 536  0.002298717079426249
-537 537  11176.18201130113
-2852 537  6.612709803152977e-05
-538 538  348.4420904138382
-2854 538  0.002023889893301537
-539 539  12126.38580778946
-2856 539  5.644326073895785e-05
-540 540  2958.127902589717
-2858 540  0.0002109624490132657
-541 541  2964.436629626178
-2860 541  0.0002180184147619238
-542 542  2996.739499442594
-2862 542  0.0002168279126185967
-543 543  29733.86082774269
-2864 543  2.40913445823822e-05
-544 544  10811.18802228489
-2866 544  6.434988345650326e-05
-545 545  3088.650140110554
-2868 545  0.0001989115751638596
-546 546  43357.2059155883
-2870 546  1.820335133825889e-05
-547 547  449.6511458717897
-2872 547  0.001881561431239332
-548 548  442.4829323374114
-2874 548  0.001907467106720621
-549 549  437.9884858052015
-2876 549  0.001906610501894908
-550 550  435.6203755854508
-2878 550  0.001949643189531951
-551 551  1062.514444456655
-2880 551  0.0008441117886190319
-552 552  11331.61710240373
-2882 552  5.675680462998339e-05
-553 553  97348.34299694267
-2884 553  8.254916480290429e-06
-554 554  3296.824460547154
-2886 554  0.0002143832494831788
-555 555  3262.587208018637
-2888 555  0.0002152290952202552
-556 556  11460.05996713048
-2890 556  6.296834363466511e-05
-557 557  3283.717094535645
-2892 557  0.000202070761556368
-558 558  2964.047779054717
-2894 558  0.0002137229690333978
-559 559  3225.320517392855
-2896 559  0.0002072120632641939
-560 560  2629.795751286473
-2898 560  0.0002442514875826953
-561 561  2504.422502672365
-2900 561  0.0002451718016401516
-562 562  11375.0987875235
-2902 562  6.341211780697017e-05
-563 563  659.5010114325853
-2904 563  0.001094729169089652
-564 564  172853.3273411868
-2906 564  4.25003180262438e-06
-565 565  1299.65158737097
-2908 565  0.0006968247424079914
-566 566  1769.533096935914
-2910 566  0.0005043323519647758
-567 567  1857.658592326778
-2912 567  0.0004814844967328351
-568 568  1796.9604238847
-2914 568  0.000498175840325201
-569 569  1840.063636745398
-2916 569  0.0004883751107654518
-570 570  1849.375388733017
-2918 570  0.0004874751788934755
-571 571  1814.027305005168
-2920 571  0.0005051537934849839
-572 572  3302.00751149913
-2922 572  0.0001984180754674663
-573 573  33030.38812900725
-2924 573  1.97916651121529e-05
-574 574  989.5349868962539
-2926 574  0.0008873951493023308
-575 575  774.1311082092107
-2928 575  0.001156885426737085
-576 576  1432.363937054055
-2930 576  0.0006093535781038161
-577 577  1398.397543966203
-2932 577  0.0006241449559768249
-578 578  2838.252598474493
-2934 578  0.0002261787363921484
-579 579  11664.9498685661
-2936 579  5.786973344046168e-05
-580 580  2783.642711654918
-2938 580  0.0002266107658869627
-581 581  10476.42856352934
-2940 581  6.870255487037702e-05
-582 582  3094.131406781226
-2942 582  0.0002165795532411714
-583 583  2844.720693115951
-2944 583  0.000236616467506408
-584 584  29887.0903280384
-2946 584  2.680443331181534e-05
-585 585  3272.63182646826
-2948 585  0.0001816346318888309
-586 586  28488.90946021381
-2950 586  2.52662353434824e-05
-587 587  20901.34326837456
-2952 587  3.479200106475429e-05
-588 588  535.6042576018839
-2954 588  0.00171173667699996
-589 589  331.738044319503
-2956 589  0.002448783148342478
-590 590  336.6056337904003
-2958 590  0.002415983069022926
-591 591  22518.10747090007
-2960 591  3.109764001755754e-05
-592 592  1423.42680018202
-2962 592  0.0006372419289340028
-593 593  2970.844960597701
-2964 593  0.0002111507457449779
-594 594  11199.52723236525
-2966 594  6.036731193060422e-05
-595 595  3053.350515880293
-2968 595  0.0002097250549737993
-596 596  2965.11062180003
-2970 596  0.0002081199033744698
-597 597  2792.729347961497
-2972 597  0.000227180506082851
-598 598  50291.39569270392
-2974 598  1.541997157145382e-05
-599 599  63409.56014851279
-2976 599  1.355929089197479e-05
-600 600  91095.81091672303
-2978 600  8.800095713229208e-06
-601 601  32225.37753384992
-2980 601  2.42057119496486e-05
-602 602  615.9733663356598
-2982 602  0.001228820363981551
-603 603  711.6358822668429
-2984 603  0.001028630443507459
-604 604  3580.302855317203
-2986 604  0.0001942101919334908
-605 605  11490.44673910205
-2988 605  6.122274390669451e-05
-606 606  2750.008657491529
-2990 606  0.0002174236966432122
-607 607  3451.539467902673
-2992 607  0.0002089999771376453
-608 608  3137.126787941659
-2994 608  0.0002225826697345396
-609 609  2854.888824240245
-2996 609  0.0002239401818748291
-610 610  52092.02318409094
-2998 610  1.553974479996341e-05
-611 611  26761.07923504168
-3000 611  2.716807484438463e-05
-612 612  420.7511285135843
-3002 612  0.001917255257562923
-613 613  650.3130828662307
-3004 613  0.001120828337875203
-614 614  2774.246907764889
-3006 614  0.0002241843674551126
-615 615  25664.64907877881
-3008 615  2.762658094449223e-05
-616 616  10943.99031665452
-3010 616  6.07496359260314e-05
-617 617  3049.229711901468
-3012 617  0.0002239249658143049
-618 618  2975.215640901863
-3014 618  0.000236317776079967
-619 619  29911.48422004574
-3016 619  2.356441270887905e-05
-620 620  10631.94204644858
-3018 620  6.37038810105057e-05
-621 621  110546.9747757736
-3020 621  6.279195576451717e-06
-622 622  46653.65664652479
-3022 622  1.580870884158368e-05
-623 623  2542.159037952807
-3024 623  0.000365824983912368
-624 624  2477.070603714911
-3026 624  0.0003753799864904563
-625 625  2410.630547995882
-3028 625  0.0003868371757116728
-626 626  55880.82357885133
-3030 626  1.347322955615387e-05
-627 627  2808.479972699512
-3032 627  0.000232641750217292
-628 628  4046.637575191848
-3034 628  0.0001758353255510753
-629 629  3435.556124411205
-3036 629  0.0001960485134623598
-630 630  2620.689705232281
-3038 630  0.0002387670275501264
-631 631  136545.9835998747
-3040 631  5.168868543624866e-06
-632 632  46117.51194107011
-3042 632  1.571190151769112e-05
-633 633  3478.333289951324
-3044 633  0.000264159408233615
-634 634  3610.601323354378
-3046 634  0.0002540803250229119
-635 635  3197.472226001639
-3048 635  0.000215751906695563
-636 636  2849.466688128075
-3050 636  0.0002173382328317587
-637 637  2974.094627408606
-3052 637  0.0002149933824167279
-638 638  2939.646723725627
-3054 638  0.0002247913073730503
-639 639  27700.17077445688
-3056 639  2.871344757710019e-05
-640 640  2943.98374212603
-3058 640  0.0002236223011459347
-641 641  2906.631523203411
-3060 641  0.0002168929556922801
-642 642  2995.321954809203
-3062 642  0.000214433397151115
-643 643  3051.905781306283
-3064 643  0.0002118857100087749
-644 644  10513.04333965713
-3066 644  6.668929891919444e-05
-645 645  614.5937399437895
-3068 645  0.001216300194894553
-646 646  2973.500696502688
-3070 646  0.0002291693448236669
-647 647  103359.3317103586
-3072 647  6.287606542843668e-06
-648 648  28018.13817086773
-3074 648  2.413112609046681e-05
-649 649  1058.812258185703
-3076 649  0.000863574471068328
-650 650  2107.405284921758
-3078 650  0.0004260522383241782
-651 651  2147.1162080411
-3080 651  0.0004160262001499919
-652 652  2190.370673597265
-3082 652  0.0004138991064746598
-653 653  2194.259475979664
-3084 653  0.0004130347731292516
-654 654  14354.53982761835
-3086 654  4.565100404794502e-05
-655 655  3345.895067779663
-3088 655  0.0001830830851796531
-656 656  26580.32075562833
-3090 656  2.73993020604243e-05
-657 657  10680.735828132
-3092 657  6.664285539255191e-05
-658 658  8883.729815115174
-3094 658  6.691396143939732e-05
-659 659  2144.644070458367
-3096 659  0.0004190976423508806
-660 660  2908.256690819458
-3098 660  0.0002170672928360428
-661 661  88977.29697900864
-3100 661  9.006559059406882e-06
-662 662  428.9764832710101
-3102 662  0.001934718172261428
-663 663  1362.23750906171
-3104 663  0.0006652306770844735
-664 664  418.8706309843425
-3106 664  0.001967551209388498
-665 665  407.63375000375
-3108 665  0.002041158694041396
-666 666  420.4934429160988
-3110 666  0.00198259809923097
-667 667  415.8298032795154
-3112 667  0.001984600471653515
-668 668  411.4330820521244
-3114 668  0.002041914114836727
-669 669  2874.785159204738
-3116 669  0.0002175189436278616
-670 670  3116.016455969895
-3118 670  0.0002195538691403081
-671 671  3244.753587025468
-3120 671  0.0002056555622572706
-672 672  13059.49527603733
-3122 672  5.456576583106384e-05
-673 673  2949.951202284087
-3124 673  0.0002265618216088804
-674 674  3054.687846803039
-3126 674  0.0002149306512683066
-675 675  11324.05490753369
-3128 675  6.434492811419656e-05
-676 676  13071.5232311955
-3130 676  5.150654637627023e-05
-677 677  11785.53208636935
-3132 677  5.617823458389736e-05
-678 678  32775.36602589959
-3134 678  2.404372801287854e-05
-679 679  39222.89284083748
-3136 679  1.717879208496453e-05
-680 680  1503.758106070313
-3138 680  0.0006055238470906594
-681 681  1378.090071926837
-3140 681  0.00066417680046111
-682 682  2592.42319904756
-3142 682  0.000254445809205194
-683 683  2963.215504938536
-3144 683  0.0002125568346125282
-684 684  10644.19524912137
-3146 684  6.56850785054022e-05
-685 685  30057.28923552878
-3148 685  2.157615448738946e-05
-686 686  27181.23035514305
-3150 686  2.58604492209297e-05
-687 687  3640.877420444504
-3152 687  0.0002595670747658185
-688 688  3276.800741490974
-3154 688  0.0002022003039771082
-689 689  2678.597600156133
-3156 689  0.0002396620483604543
-690 690  12521.32779190394
-3158 690  5.947442365670711e-05
-691 691  82671.96984328679
-3160 691  1.034576531382441e-05
-692 692  12142.75930045041
-3162 692  5.61734866346371e-05
-693 693  33049.83938251837
-3164 693  2.097430373839078e-05
-694 694  1700.395238372509
-3166 694  0.0005656688027113958
-695 695  1729.727558501482
-3168 695  0.0005436438887964516
-696 696  2843.862888880541
-3170 696  0.0002205299244709283
-697 697  13285.90212468715
-3172 697  5.567172963433427e-05
-698 698  3082.062352464869
-3174 698  0.0002104689560761738
-699 699  1152.589192988617
-2776 699 -0.0006211735898036456
-700 700  14.56563684762146
-2778 700 -0.04477438386005135
-701 701  551.9019057698251
-2780 701 -0.001308882018352955
-702 702  15.34156238358545
-2782 702 -0.04330235585374924
-703 703  502.9699823554473
-2784 703 -0.001380044009109575
-704 704  14.57507880584921
-2786 704 -0.04536606994919442
-705 705  1227.046573530302
-2788 705 -0.0005847977484658657
-706 706  13.96479510793963
-2790 706 -0.04537018025868264
-707 707  2399.645244972722
-2792 707 -0.0003225540370318406
-708 708  13.35583751483671
-2794 708 -0.04724019321521711
-709 709  2364.357089468501
-2796 709 -0.0003315309935129994
-710 710  16.15126512553604
-2798 710 -0.04023406550441066
-711 711  15.06184515529529
-2800 711 -0.04351148837855855
-712 712  535.9734926386114
-2802 712 -0.001121798254310583
-713 713  497.8514378756086
-2804 713 -0.001364992385095566
-714 714  13.56479446285482
-2806 714 -0.04667596022758146
-715 715  4151.705299169505
-2808 715 -0.0002318355913094673
-716 716  15.57781371370519
-2810 716 -0.04359131642342973
-717 717  15.79445420648206
-2812 717 -0.04310861514976851
-718 718  15.5680941408988
-2814 718 -0.04326749513063304
-719 719  14.64909639136468
-2816 719 -0.04634637060467815
-720 720  1437.359893683087
-2818 720 -0.0005566159660873793
-721 721  15.49412406489104
-2820 721 -0.04297719715336602
-722 722  15.29430534730055
-2822 722 -0.04326473059133334
-723 723  553.015642283468
-2824 723 -0.001286901060450809
-724 724  15.5412034071959
-2826 724 -0.04306501782502264
-725 725  1400.415076739031
-2828 725 -0.0005098568126613753
-726 726  14.98190900890695
-2830 726 -0.04077980858523558
-727 727  474.3193833460493
-2832 727 -0.001387501933997578
-728 728  13.31541318278743
-2834 728 -0.0472821775742908
-729 729  1389.181066223353
-2836 729 -0.000595912541544476
-730 730  15.76987607017677
-2838 730 -0.04296426793696614
-731 731  14.44700532192974
-2840 731 -0.04661257133778172
-732 732  501.3474841985601
-2842 732 -0.00131594329597764
-733 733  13.92297779091816
-2844 733 -0.0456871850050998
-734 734  1214.218844112899
-2846 734 -0.0006637811766721075
-735 735  14.85707135949721
-2848 735 -0.04623851729751089
-736 736  13.73083631456164
-2850 736 -0.0480417201738854
-737 737  489.423527146858
-2852 737 -0.001382881004978853
-738 738  15.2588658622997
-2854 738 -0.04232451806232144
-739 739  531.0418197270566
-2856 739 -0.001180351155434611
-740 740  129.5068808402499
-2858 740 -0.004412880081124752
-741 741  129.8684401467052
-2860 741 -0.004557562649114181
-742 742  131.2048029587701
-2862 742 -0.00453531126448616
-743 743  1302.252445734669
-2864 743 -0.000503749724503456
-744 744  473.4897090200385
-2866 744 -0.001345574214205537
-745 745  135.2081754370441
-2868 745 -0.004161190641349405
-746 746  1898.376956390851
-2870 746 -0.0003807362332910267
-747 747  19.68524035323859
-2872 747 -0.03935916995322433
-748 748  19.37192182386572
-2874 748 -0.03990008471308273
-749 749  19.17464001330935
-2876 749 -0.03988317432312981
-750 750  19.07146984270037
-2878 750 -0.04078229766318428
-751 751  46.50815516450294
-2880 751 -0.01766023403411908
-752 752  496.0696673366953
-2882 752 -0.001187293521703485
-753 753  4264.002291737392
-2884 753 -0.0001725925911988493
-754 754  144.4062552135481
-2886 754 -0.004482285199688079
-755 755  142.856795105637
-2888 755 -0.004501500167327725
-756 756  502.1411266418864
-2890 756 -0.00131608856808379
-757 757  143.809453707777
-2892 757 -0.004225502698148476
-758 758  129.7645458351089
-2894 758 -0.004470671113542863
-759 759  141.2719850254936
-2896 759 -0.004332413344118552
-760 760  115.1377796124792
-2898 760 -0.005108986069369431
-761 761  109.6482832462316
-2900 761 -0.005128263093238606
-762 762  498.1384984193623
-2902 762 -0.001326090781396694
-763 763  28.88113510448416
-2904 763 -0.02289312059755941
-764 764  7566.404662091983
-2906 764 -8.891414554671908e-05
-765 765  56.88792768395655
-2908 765 -0.01457874205479662
-766 766  77.45545008797657
-2910 766 -0.01055148350464198
-767 767  81.31286689729322
-2912 767 -0.01007346783371174
-768 768  78.65599046567311
-2914 768 -0.01042267889641481
-769 769  80.54263880602282
-2916 769 -0.01021762623013111
-770 770  80.95030682650238
-2918 770 -0.0101988030095205
-771 771  79.4031123301624
-2920 771 -0.01056867448637758
-772 772  144.589663327655
-2922 772 -0.004149701089090458
-773 773  1445.879548014593
-2924 773 -0.0004140519179016492
-774 774  43.31359687203814
-2926 774 -0.018565803370506
-775 775  33.88498745631753
-2928 775 -0.02420400608865609
-776 776  62.69699251650451
-2930 776 -0.0127487047039688
-777 777  61.21015815931671
-2932 777 -0.01305815905236744
-778 778  124.3135266004198
-2934 778 -0.004729137568385933
-779 779  510.8281122598189
-2936 779 -0.001210195935425439
-780 780  121.8893458280569
-2938 780 -0.004739384227582458
-781 781  459.0023997967518
-2940 781 -0.001436061405957997
-782 782  135.5577798856157
-2942 782 -0.004527229250447725
-783 783  124.5850880373814
-2944 783 -0.004947831503020766
-784 784  1308.79910849815
-2946 784 -0.0005605482647067403
-785 785  143.266796655959
-2948 785 -0.003799635403101938
-786 786  1247.20819111549
-2950 786 -0.0005285298443190662
-787 787  915.587302032126
-2952 787 -0.0007273659114068365
-788 788  23.44461437890571
-2954 788 -0.03581192339833519
-789 789  14.52964354834627
-2956 789 -0.05120231699174218
-790 790  14.7428495800686
-2958 790 -0.05051641835691328
-791 791  985.8131194556391
-2960 791 -0.0006505150403953535
-792 792  62.30582574921108
-2962 792 -0.01333216968778421
-793 793  130.1563347695763
-2964 793 -0.004413757630275583
-794 794  490.4766578141998
-2966 794 -0.001262348984622675
-795 795  133.6846020547174
-2968 795 -0.0043867077924628
-796 796  129.8198791801861
-2970 796 -0.004353179353834643
-797 797  122.2740480150363
-2972 797 -0.004751796082881135
-798 798  2202.499341692987
-2974 798 -0.0003224467177012544
-799 799  2776.772045297668
-2976 799 -0.0002835617900106833
-800 800  3990.209933402476
-2978 800 -0.0001839880046561397
-801 801  1411.530208613113
-2980 801 -0.0005060858793490622
-802 802  26.98155943533092
-2982 802 -0.0256910783553563
-803 803  31.17168259281084
-2984 803 -0.02150584016679073
-804 804  156.8302265818825
-2986 804 -0.004060328656766309
-805 805  503.478550922922
-2988 805 -0.001279590719005279
-806 806  120.3926483247221
-2990 806 -0.004548139563342544
-807 807  151.1811060822011
-2992 807 -0.004369780664630857
-808 808  137.3766223350889
-2994 808 -0.004654857896170716
-809 809  125.0168786688164
-2996 809 -0.0046832586311171
-810 810  2281.980553329121
-2998 810 -0.0003248647658803822
-811 811  1172.468255921368
-3000 811 -0.0005678888436367579
-812 812  18.43172505182493
-3002 812 -0.04008099176668933
-813 813  28.4881341010781
-3004 813 -0.02343137807364655
-814 814  121.4755450558781
-3006 814 -0.004688747582473832
-815 815  1123.849334069723
-3008 815 -0.000577763345796909
-816 816  479.1806255491858
-3010 816 -0.001270615822728721
-817 817  133.5395961246254
-3012 817 -0.004682511701431127
-818 818  130.3072183776819
-3014 818 -0.004941326446914052
-819 819  1309.674633660607
-3016 819 -0.0004928617663929631
-820 820  465.5750915799122
-3018 820 -0.001332244617993727
-821 821  4839.279007743422
-3020 821 -0.0001313598373684713
-822 822  2042.444254109507
-3022 822 -0.0003306921046308134
-823 823  111.2745034744868
-3024 823 -0.007653672082923691
-824 824  108.4255862884054
-3026 824 -0.007853582503672928
-825 825  105.5173052591423
-3028 825 -0.008093282368846372
-826 826  2447.881093558142
-3030 826 -0.0002816718078576912
-827 827  123.043364870394
-3032 827 -0.004862970717328313
-828 828  177.2603681257095
-3034 828 -0.003676111486126634
-829 829  150.4416465818894
-3036 829 -0.004100030848673585
-830 830  114.7959760832575
-3038 830 -0.004991865811779856
-831 831  5977.377151743068
-3040 831 -0.0001081325281897822
-832 832  2018.846701667913
-3042 832 -0.0003286879780215192
-833 833  152.2525514057391
-3044 833 -0.005526660401051663
-834 834  158.0422991895653
-3046 834 -0.005315791513777863
-835 835  139.9973221612445
-3048 835 -0.004512686485115116
-836 836  124.7606843163849
-3050 836 -0.004545857556180593
-837 837  130.245884491113
-3052 837 -0.004495843450029012
-838 838  128.7044217733526
-3054 838 -0.004701903383526127
-839 839  1213.001287054423
-3056 839 -0.0006004851677575757
-840 840  128.9202187518812
-3058 840 -0.004676543895854219
-841 841  127.3181790992209
-3060 841 -0.004534651717942856
-842 842  131.1991130288449
-3062 842 -0.004483357193906658
-843 843  133.6224694089021
-3064 843 -0.004431863029243509
-844 844  460.6460436124097
-3066 844 -0.001393861160763459
-845 845  26.93006549536226
-3068 845 -0.02542108586614161
-846 846  130.2261602355345
-3070 846 -0.00479207219007637
-847 847  4524.581979034278
-3072 847 -0.0001315370479632399
-848 848  1226.527167736951
-3074 848 -0.0005048145130488554
-849 849  46.34605859323295
-3076 849 -0.01806741737098674
-850 850  92.24479042998608
-3078 850 -0.008913735735757969
-851 851  93.982822597683
-3080 851 -0.008703965704582829
-852 852  95.87624315946061
-3082 852 -0.008659467467314986
-853 853  96.04637156402802
-3084 853 -0.008641380035250229
-854 854  628.3830407037481
-3086 854 -0.0009550099367320633
-855 855  146.4684393667403
-3088 855 -0.003830083690845991
-856 856  1163.621319673929
-3090 856 -0.0005731648617574944
-857 857  467.6777711097528
-3092 857 -0.001393809984053777
-858 858  388.8759174266569
-3094 858 -0.001399882851998593
-859 859  93.8747077042681
-3096 859 -0.008768229606671188
-860 860  127.3766293477706
-3098 860 -0.004538751110163672
-861 861  3896.088451758585
-3100 861 -0.0001883669172606672
-862 862  18.78162023672823
-3102 862 -0.04046801465160255
-863 863  59.62741833721761
-3104 863 -0.013917741230942
-864 864  18.33794849296114
-3106 864 -0.04115730664256551
-865 865  17.84714838795915
-3108 865 -0.04269440434322424
-866 866  18.40864642998536
-3110 866 -0.04147286882309242
-867 867  18.2048463587055
-3112 867 -0.04151398331184361
-868 868  18.01235820080016
-3114 868 -0.04271285292823011
-869 869  125.8741363979673
-3116 869 -0.004549463760823265
-870 870  136.4650122181682
-3118 870 -0.004591087669232101
-871 871  142.1052022666453
-3120 871 -0.004300400914613589
-872 872  571.945628211343
-3122 872 -0.001141007317637787
-873 873  129.1889516897052
-3124 873 -0.004737768664129753
-874 874  133.7381239792849
-3126 874 -0.004495757699438218
-875 875  496.2094181663953
-3128 875 -0.00134479006259534
-876 876  572.4299104481121
-3130 876 -0.001077116562284023
-877 877  516.1146241735985
-3132 877 -0.001174808882995873
-878 878  1435.341963192156
-3134 878 -0.0005027950584220524
-879 879  1717.036372304932
-3136 879 -0.0003593726340029264
-880 880  65.8220278566012
-3138 880 -0.01266857423038254
-881 881  60.32135886269092
-3140 881 -0.0138956857388481
-882 882  113.5253716363662
-3142 882 -0.005321133682834458
-883 883  129.7344804957765
-3144 883 -0.004446072187239606
-884 884  466.2221104904835
-3146 884 -0.001373362866427219
-885 885  1315.933645876123
-3148 885 -0.0004513182899802171
-886 886  1190.299232364331
-3150 886 -0.0005408106592783898
-887 887  159.3673875338714
-3152 887 -0.00543058103861438
-888 888  143.4872826445848
-3154 888 -0.004228762010147807
-889 889  117.2883655695939
-3156 889 -0.005012410749634397
-890 890  548.6677100663762
-3158 890 -0.001243009510545163
-891 891  3620.107153990338
-3160 891 -0.0002163685141143933
-892 892  531.5867015861088
-3162 892 -0.001175077666104349
-893 893  1446.906579587341
-3164 893 -0.0004387420733795854
-894 894  74.42927180561981
-3166 894 -0.01183475115907131
-895 895  75.71310294960942
-3168 895 -0.01137394716537204
-896 896  124.5159887196852
-3170 896 -0.004612588747673908
-897 897  581.8432234704047
-3172 897 -0.001164167913108969
-898 898  134.945726372956
-3174 898 -0.004402141629369785
-899 899  92.82799557362462
-900 899 -42.41338650819053
-1017 899 -4.459108211401092
-1022 899 -9.416903571706824
-1091 899 -7.681384228215313
-1099 899  7.772276933160072e-09
-1100 899 -8.952334706879839e-10
-1217 899 -0.02024470978393711
-1222 899  0.04377716463426287
-1291 899  0.001908677924063157
-3175 899  1.206910571484867
-3176 899 -0.964177128949417
-3177 899 -0.2280781806545443
-3178 899  0.182145342635277
-3179 899 -0.2666484393421067
-3180 899  0.2976922456375642
-3181 899  0.04150889810448934
-3182 899 -0.04691994301089037
-3183 899  0.2992074794146934
-3184 899 -0.3082137931705407
-3185 899 -0.04283097769707251
-3186 899  0.0469964530296306
-3187 899  0.3659411458715927
-3188 899 -0.3866656521166111
-3189 899 -0.05430488575489788
-3190 899  0.05564749987182292
-899 900 -42.41338650819053
-900 900  23.04353699951762
-1099 900  4.240457540660714e-09
-1100 900  4.86383655573519e-10
-3175 900 -0.6557243053380895
-3176 900  0.5238452566749743
-3177 900  0.1239167259828344
-3178 900 -0.09896104242681487
-901 901  85.65611988001912
-902 901 -41.9825934917825
-955 901 -8.12408466218251
-1035 901 -10.38899270243397
-1101 901  3.180253560497448e-09
-1102 901 -6.74794720101346e-10
-1155 901 -0.001782981198905943
-1235 901  0.00410810176178595
-3191 901  1.30916366888589
-3192 901 -1.095661997402904
-3193 901 -0.2127901063566257
-3194 901  0.1772971086094612
-3195 901  0.5107916688939119
-3196 901 -0.477229303927426
-3197 901 -0.06890385651214917
-3198 901  0.06209868527935691
-3199 901 -0.4283133356578697
-3200 901  0.4582406516171209
-3201 901  0.06118773248632485
-3202 901 -0.06384302758694906
-901 902 -41.9825934917825
-902 902  23.18177320674882
-1101 902  1.757215706277293e-09
-1102 902  3.725840469481767e-10
-3191 902 -0.7228885291144198
-3192 902  0.6049980674938733
-3193 902  0.1174975525597555
-3194 902 -0.0978991772419131
-903 903  91.29809696612688
-904 903 -42.46263947154299
-946 903 -20.90760235760421
-962 903 -7.74755303268057
-1103 903 -3.473398027531438e-10
-1104 903  1.714809627628711e-09
-1146 903 -0.1213569242584619
-1162 903  0.03440530029256823
-3203 903  1.330583469556757
-3204 903 -1.114733838300572
-3205 903 -0.2594613641476355
-3206 903  0.2165473412317848
-3207 903  0.3437697039398471
-3208 903 -0.3969711646061925
-3209 903 -0.04965019816243767
-3210 903  0.05175900233798363
-3211 903 -0.5852368430454002
-3212 903  0.5883155616478667
-3213 903  0.08053041805249524
-3214 903 -0.07227285678601035
-903 904 -42.46263947154299
-904 904  21.6773645691523
-1103 904 -1.756064599289786e-10
-1104 904 -8.754584834846924e-10
-3203 904 -0.6792687245591004
-3204 904  0.5690765366396284
-3205 904  0.1324561697400607
-3206 904 -0.1105483719365449
-905 905  44.46488922306026
-906 905 -21.00451468958215
-984 905 -6.201273818830178
-999 905 -5.02376469529283
-1046 905 -4.662192987407709
-1105 905  9.628000022243732e-09
-1106 905 -3.411716797963038e-09
-1184 905 -0.005320952890435077
-1199 905  0.01084140050204591
-1246 905  0.004420674229770605
-3215 905  0.6335410282029971
-3216 905 -0.5055179512196509
-3217 905 -0.1182378866958986
-3218 905  0.09380808290631094
-3219 905 -0.3154842751742941
-3220 905  0.3125038943229862
-3221 905  0.04682645461498169
-3222 905 -0.04577764559625989
-3223 905 -0.2710351994059734
-3224 905  0.2607673136961148
-3225 905  0.03931244733723651
-3226 905 -0.03642270094644444
-3227 905 -0.1808814651699105
-3228 905  0.2018352389125933
-3229 905  0.02556035874667221
-3230 905 -0.02967706389761363
-905 906 -21.00451468958215
-906 906  12.49019441336576
-1105 906  5.783544754223158e-09
-1106 906  2.02872774135443e-09
-3215 906 -0.3767309422255076
-3216 906  0.3006028743159249
-3217 906  0.07030936984151577
-3218 906 -0.05578235013745535
-907 907  54.06254273226385
-908 907 -21.78672677995597
-1022 907 -5.018475605696647
-1029 907 -8.245220727572519
-1039 907 -6.271758063514705
-1091 907 -6.778441910054136
-1107 907  6.66961486039952e-09
-1108 907 -5.105909028912947e-09
-1222 907  0.1093957121598284
-1229 907 -0.2266174204579067
-1239 907  0.001389181240069544
-1291 907  0.03029500957792949
-3231 907  0.8685875283864275
-3232 907 -0.6665887169130639
-3233 907 -0.1206085521049
-3234 907  0.09140412991465983
-3235 907 -0.2641123399251132
-3236 907  0.264840773439715
-3237 907  0.03852583269225138
-3238 907 -0.03462807079041883
-3239 907  0.3232358427975743
-3240 907 -0.2805720782281593
-3241 907 -0.0432552053525782
-3242 907  0.03751584297835035
-3243 907 -0.4391440793017472
-3244 907  0.4521140880195338
-3245 907  0.06421136233811769
-3246 907 -0.06661321196104331
-3247 907 -0.3568635586597714
-3248 907  0.3488833124621628
-3249 907  0.04742106470294021
-3250 907 -0.04590890985801144
-907 908 -21.78672677995597
-908 908  13.23781014201439
-1107 908  4.15408552001395e-09
-1108 908  3.102507639685115e-09
-3231 908 -0.5277615543631613
-3232 908  0.405025269027891
-3233 908  0.0732828354634789
-3234 908 -0.05553796721970868
-909 909  131.8476537947564
-910 909 -31.40401815845072
-911 909 -41.64250304196321
-913 909 -3.602447283387563
-991 909 -7.289318276733248
-1056 909 -7.969315195327064
-1109 909 -2.143182423197665e-09
-1110 909  1.838456387126541e-09
-1111 909  2.79507181288885e-10
-1113 909 -0.05042820426988697
-1191 909  0.1091861990651673
-1256 909  0.08752981633481208
-3251 909  0.8308549968215676
-3252 909 -0.6468743896711402
-3253 909 -0.1180094345852181
-3254 909  0.09155689421138136
-3255 909  1.178287817096909
-3256 909 -0.9115398593145428
-3257 909 -0.1896378964682771
-3258 909  0.1461665781785075
-3259 909 -0.3808984499831934
-3260 909  0.4177027925543588
-3261 909  0.05765705741137306
-3262 909 -0.05614325502160191
-3263 909 -0.1870913236157566
-3264 909  0.1922808728830558
-3265 909  0.02414079055417204
-3266 909 -0.02648423136389328
-3267 909  0.2900679375737975
-3268 909 -0.2829982094483208
-3269 909 -0.04144109370586249
-3270 909  0.03752795575477595
-909 910 -31.40401815845072
-910 910  16.77746287302074
-1109 910 -1.136426674985813e-09
-1110 910 -9.82260162096793e-10
-3251 910 -0.4438807412776823
-3252 910  0.3455898859598944
-3253 910  0.06304603751777908
-3254 910 -0.04891388055328518
-909 911 -41.64250304196321
-911 911  26.71963658975455
-1109 911 -1.360478396605913e-09
-1111 911 -1.79330175709147e-10
-3255 911 -0.7560405827089444
-3256 911  0.5848835203070897
-3257 911  0.1216799016922899
-3258 911 -0.0937868179023721
-912 912  32.26392170161625
-913 912 -2.93755601229593
-1019 912 -10.40260563970382
-1047 912 -39.60174281254437
-1112 912 -1.066833821630753e-08
-1113 912  0.1005159371417935
-1219 912 -0.2400553180149196
-1247 912  0.0912522352120306
-3271 912  2.234351633778104
-3272 912 -2.216197503914771
-3273 912 -0.05370982180576693
-3274 912  0.02389497166849069
-3275 912 -0.7145749449009308
-3276 912  0.7042110547013974
-3277 912  0.1018194078112303
-3278 912 -0.06006624651158884
-3279 912 -0.3263007639770462
-3280 912  0.2841185889321567
-3281 912  0.03520668996055829
-3282 912 -0.04170737899807132
-909 913 -3.602447283387563
-912 913 -2.93755601229593
-913 913  101.320835605144
-914 913 -37.34674214649647
-1109 913  0.1159831428547418
-1112 913 -0.08371936137436782
-1113 913 -2.733038906299612e-09
-1114 913  3.723713781766946e-09
-3259 913  0.4341815958622278
-3260 913 -0.47613442657843
-3261 913 -0.06572259141693702
-3262 913  0.06399702614503872
-3271 913 -1.277768992783638
-3272 913  1.267387106656292
-3273 913  0.03071528396597896
-3274 913 -0.01366492785641506
-3283 913  1.209832173694697
-3284 913 -1.011959526480044
-3285 913 -0.1965692531354175
-3286 913  0.16260052281627
-913 914 -37.34674214649647
-914 914  14.59973763551054
-1113 914 -1.039897890020569e-09
-1114 914 -1.45571832366187e-09
-3283 914 -0.4729524263951293
-3284 914  0.3955992606815731
-3285 914  0.07684363769328444
-3286 914 -0.0635644459381669
-915 915  59.1165725360048
-916 915 -12.55259619809594
-917 915 -16.44988377383374
-918 915 -16.11142725870809
-919 915 -15.99886411053142
-1007 915 -2.93383316576035
-1018 915 -2.838699038257353
-1115 915  6.983447865122061e-10
-1116 915 -1.486597012156921e-09
-1117 915 -1.235842539415444e-09
-1118 915 -2.074367566606838e-09
-1119 915 -4.957688704010366e-10
-1207 915  0.00435760107926482
-1218 915 -0.00428379170185561
-3287 915  0.7040247465778748
-3288 915 -0.5326975522814722
-3289 915 -0.08449893522421127
-3290 915  0.0636315736602887
-3291 915  0.7089846504761526
-3292 915 -0.5363208804398027
-3293 915 -0.110950911256798
-3294 915  0.08353224393820573
-3295 915  0.6046143966065896
-3296 915 -0.4568430531993541
-3297 915 -0.105707984352315
-3298 915  0.07949206863020782
-3299 915  0.8317530171779519
-3300 915 -0.62581245429748
-3301 915 -0.1227103143041787
-3302 915  0.09182130440361659
-3303 915  0.2917249086810749
-3304 915 -0.2448716520171612
-3305 915 -0.03508775876381117
-3306 915  0.0315126719245443
-3307 915 -0.2357224145100838
-3308 915  0.2784275257111113
-3309 915  0.03219168634554583
-3310 915 -0.03947913814761612
-915 916 -12.55259619809594
-916 916  14.98862208102334
-1115 916  8.336279444520756e-10
-1116 916  1.775087188349289e-09
-3287 916 -0.8406516622461359
-3288 916  0.6360757700302417
-3289 916  0.1008972634833432
-3290 916 -0.07598026692793559
-915 917 -16.44988377383374
-917 917  15.80466419414183
-1115 917  6.706460387029267e-10
-1117 917  1.18738463505963e-09
-3291 917 -0.6811758959436226
-3292 917  0.5152845777429752
-3293 917  0.106599044606048
-3294 917 -0.08025582932800379
-915 918 -16.11142725870809
-918 918  14.85457890021666
-1115 918  6.439443422934232e-10
-1118 918  1.912650926527704e-09
-3295 918 -0.5574485806251291
-3296 918  0.4212048422990773
-3297 918  0.09746173125990555
-3298 918 -0.07329091248500011
-915 919 -15.99886411053142
-919 919  20.19794664128352
-1115 919  8.847848009807535e-10
-1119 919  6.258814577719818e-10
-3299 919 -1.050055987727851
-3300 919  0.7900639988771302
-3301 919  0.1549170217960287
-3302 919 -0.1159208424841469
-920 920  168.1102211381862
-921 920 -52.26634609831998
-922 920 -57.67951941137702
-925 920 -4.489145811322431
-1056 920 -5.122575816198524
-1120 920 -1.795405820542717e-10
-1121 920  4.188971391982932e-11
-1122 920  4.778644147052091e-11
-1125 920  0.00637055249229182
-1256 920 -0.002249858771577174
-3311 920  1.177129754067613
-3312 920 -0.9364953446946004
-3313 920 -0.210903753621177
-3314 920  0.1670042532862615
-3315 920  1.102777759352953
-3316 920 -0.8759432230102847
-3317 920 -0.2314851909580943
-3318 920  0.1829980732903851
-3319 920 -0.2469155357709296
-3320 920  0.2462070999198854
-3321 920  0.0332412618584253
-3322 920 -0.03297048082370924
-3323 920  0.2859457102140647
-3324 920 -0.2856483693432067
-3325 920 -0.03967668007685861
-3326 920  0.03987931942944642
-920 921 -52.26634609831998
-921 921  37.02775797089851
-1120 921 -1.249107484113665e-10
-1121 921 -2.969423318344155e-11
-3311 921 -0.8339300311071588
-3312 921  0.663454125795566
-3313 921  0.1494134127611411
-3314 921 -0.1183130930611414
-920 922 -57.67951941137702
-922 922  37.15332470509485
-1120 922 -1.131374993690315e-10
-1122 922 -3.082351041072684e-11
-3315 922 -0.7103363652127415
-3316 922  0.5642245864034647
-3317 922  0.1491074223715117
-3318 922 -0.1178752338080321
-923 923  71.77914445659717
-924 923 -35.24630110894491
-962 923 -6.993239083401045
-1097 923 -5.94400377216852
-1123 923 -2.446954222579123e-09
-1124 923  4.111979645671227e-10
-1162 923 -0.0301506816774844
-1297 923  0.04698307485545858
-3327 923  1.180123281848412
-3328 923 -0.9904519113108774
-3329 923 -0.2198774630721576
-3330 923  0.1833601469074586
-3331 923 -0.3406461438400099
-3332 923  0.3416270693871428
-3333 923  0.044945398609309
-3334 923 -0.04175041492380615
-3335 923  0.2932964264633682
-3336 923 -0.2928143935251802
-3337 923 -0.04107484230154301
-3338 923  0.03819605890891807
-923 924 -35.24630110894491
-924 924  18.70782102679635
-1123 924 -1.296841745102029e-09
-1124 924 -2.182206637613149e-10
-3327 924 -0.6263787815684199
-3328 924  0.5257061452404348
-3329 924  0.1167052455721185
-3330 924 -0.09732280277379812
-920 925 -4.489145811322431
-925 925  149.6691885281758
-926 925 -43.11528650557934
-991 925 -8.629261156606795
-1056 925 -6.006365689718882
-1120 925 -0.001668075641436621
-1125 925 -2.524945641457776e-09
-1126 925  7.921518996312216e-10
-1191 925  0.02942235136820164
-1256 925 -0.009402178306684716
-3319 925  0.417439106543153
-3320 925 -0.4162414142724755
-3321 925 -0.0561981756523469
-3322 925  0.05574038917549447
-3339 925  0.9158563211506956
-3340 925 -0.7253102290040822
-3341 925 -0.1272016430323864
-3342 925  0.1000961984697644
-3343 925 -0.2028592580467722
-3344 925  0.2043567543949261
-3345 925  0.02578232528300831
-3346 925 -0.02562846525512418
-3347 925 -0.265292539862559
-3348 925  0.2663833810837147
-3349 925  0.03969926645685068
-3350 925 -0.03983694023450458
-925 926 -43.11528650557934
-926 926  13.7808182548285
-1125 926 -7.775043942004345e-10
-1126 926 -2.532467013871553e-10
-3339 926 -0.2927325904424944
-3340 926  0.2318288767653417
-3341 926  0.04065710484656371
-3342 926 -0.03199346752849337
-927 927  75.1841442036542
-928 927 -33.37484565899686
-1022 927 -7.148869153419637
-1038 927 -7.664837333066902
-1127 927  7.788518240070541e-09
-1128 927 -3.969049866547891e-09
-1222 927  0.1769630657333927
-1238 927 -0.09805225877594476
-3351 927  0.8024124670559588
-3352 927 -0.6702504154277392
-3353 927 -0.1672629016238211
-3354 927  0.1384521427653982
-3355 927  0.2713456332689875
-3356 927 -0.2943648723795997
-3357 927 -0.03534560477449201
-3358 927  0.04133866598755555
-3359 927 -0.2905604220304306
-3360 927  0.3082806228234461
-3361 927  0.03965502824588246
-3362 927 -0.03922286341907703
-927 928 -33.37484565899686
-928 928  16.18141419994012
-1127 928  3.886522659257707e-09
-1128 928  1.924533976627174e-09
-3351 928 -0.3890405552638334
-3352 928  0.3249632881958178
-3353 928  0.08109551483106915
-3354 928 -0.06712694619083193
-929 929  39.05304533306739
-930 929 -14.6337394933653
-931 929 -15.13825045655299
-1075 929 -2.942416412227689
-1090 929 -1.959100094899907
-1129 929  2.237529952986428e-10
-1130 929 -2.862770021039296e-10
-1131 929 -9.747780360669367e-11
-1275 929 -0.08503788873061549
-1290 929  0.08038780949826715
-3363 929  0.7226460726801913
-3364 929 -0.5799017536618772
-3365 929 -0.1165066754950316
-3366 929  0.0928673422230218
-3367 929  1.013213317405777
-3368 929 -0.8068876077794779
-3369 929 -0.1379058232777914
-3370 929  0.108959701522947
-3371 929  0.2698019009628373
-3372 929 -0.2582339488597439
-3373 929 -0.03441373334906914
-3374 929  0.0344323188052018
-3375 929  0.3049120194105165
-3376 929 -0.290608095678543
-3377 929 -0.0381463241923688
-3378 929  0.03561931390732355
-929 930 -14.6337394933653
-930 930  11.15515716784196
-1129 930  1.705862118228652e-10
-1130 930  2.182460878685788e-10
-3363 930 -0.550866066704825
-3364 930  0.4420534618421999
-3365 930  0.08881190461157644
-3366 930 -0.07079187097217753
-929 931 -15.13825045655299
-931 931  13.01801095497678
-1129 931  1.983945230321638e-10
-1131 931  8.381051408434814e-11
-3367 931 -0.8713042567736969
-3368 931  0.6938762009132229
-3369 931  0.1185909509790812
-3370 931 -0.09369897742442948
-932 932  54.75374718805336
-933 932 -31.51571782613292
-952 932 -6.505758821695456
-1035 932 -7.710670001509722
-1132 932  5.218769388859457e-09
-1133 932 -4.171099299288272e-10
-1152 932  0.01961434172631453
-1235 932 -0.003579148430443102
-3379 932  0.9950752022868599
-3380 932 -0.8318997011928906
-3381 932 -0.1462347531270238
-3382 932  0.1214268633825941
-3383 932 -0.2179951469697534
-3384 932  0.2168261119599326
-3385 932  0.03071718917257135
-3386 932 -0.02828487750009587
-3387 932 -0.3001546575377701
-3388 932  0.319559566955514
-3389 932  0.04181506051016628
-3390 932 -0.04598524577927123
-932 933 -31.51571782613292
-933 933  21.07935583716719
-1132 933  3.572777296234619e-09
-1133 933  2.789826286653252e-10
-3379 933 -0.6655581949704933
-3380 933  0.556417909169059
-3381 933  0.09780942999031755
-3382 933 -0.081216619435513
-934 934  57.38809028817923
-935 934 -20.99314407546383
-936 934 -18.45626484722149
-956 934 -2.476272106586077
-981 934 -2.488153159316454
-1134 934  1.578068994922965e-10
-1135 934 -1.025071694193969e-10
-1136 934 -5.515803369604555e-10
-1156 934  0.08549340419337453
-1181 934 -0.03038370759243254
-3391 934  1.295303116128931
-3392 934 -1.040527283561455
-3393 934 -0.2014590928700254
-3394 934  0.1611297600105126
-3395 934  0.9236744260690454
-3396 934 -0.7370102242628088
-3397 934 -0.1656030709028884
-3398 934  0.1310381493586493
-3399 934  0.2884753453660877
-3400 934 -0.2781466555799013
-3401 934 -0.03571433811432929
-3402 934  0.03713931211835794
-3403 934  0.3675964442608564
-3404 934 -0.3531403690853147
-3405 934 -0.04937881125291942
-3406 934  0.05012946220403072
-934 935 -20.99314407546383
-935 935  16.57233912676868
-1134 935  1.261234450211646e-10
-1135 935  8.093323233815397e-11
-3391 935 -1.022533948777946
-3392 935  0.8214096444475624
-3393 935  0.1590351780865364
-3394 935 -0.1271985280646792
-934 936 -18.45626484722149
-936 936  11.92179302754235
-1134 936  1.136628013931329e-10
-1136 936  3.563296324671228e-10
-3395 936 -0.5966459314393776
-3396 936  0.4760705063655813
-3397 936  0.1069710232300935
-3398 936 -0.0846438706881978
-937 937  50.46566610983952
-938 937 -23.82298505403072
-983 937 -4.462659499865555
-1057 937 -5.244381887558792
-1137 937  7.386056946612207e-09
-1138 937 -2.261370646294836e-09
-1183 937  0.01353528248246338
-1257 937 -0.02032800065588086
-3407 937  0.8149415052122866
-3408 937 -0.6781849347733043
-3409 937 -0.1243103797855209
-3410 937  0.1039416562056201
-3411 937 -0.2383349916533942
-3412 937  0.254906136411951
-3413 937  0.03694910669518697
-3414 937 -0.0393496559091599
-3415 937  0.2261950636286986
-3416 937 -0.2275356005844802
-3417 937 -0.03205345150204408
-3418 937  0.03282777232623683
-937 938 -23.82298505403072
-938 938  12.36566555585732
-1137 938  3.839734308286324e-09
-1138 938  1.173779629048965e-09
-3407 938 -0.423007195636838
-3408 938  0.3520217163400922
-3409 938  0.06452510371026647
-3410 938 -0.05395242262195776
-939 939  17.63530504057533
-998 939 -9.907975784566792
-1061 939 -24.9290464405827
-1078 939 -6.556163260226376
-1139 939  1.010988515126598e-10
-1198 939  0.02676690359827763
-1261 939  0.1670044571575812
-1278 939 -0.1239759113085325
-3419 939 -0.3462185500007934
-3420 939  0.3224747859897226
-3421 939  0.05123776962395828
-3422 939 -0.04575369741188583
-3423 939 -0.2320144445773898
-3424 939  0.1931415244916508
-3425 939  0.03546099465982042
-3426 939 -0.03079932722685834
-3427 939 -0.2607314758298161
-3428 939  0.2485615317211312
-3429 939  0.03391211355038503
-3430 939 -0.03113925486204759
-940 940  28.84764724886888
-942 940 -14.36339287944117
-1079 940 -24.66315021333685
-1140 940  4.42159031055489e-08
-1142 940 -0.009980420534666863
-1279 940  0.06744124888417391
-3431 940  0.5513261219039329
-3432 940 -0.5523151632588987
-3433 940 -0.07581389194665446
-3434 940  0.07308587747533948
-3435 940  0.3379217843189056
-3436 940 -0.3865529233619078
-3437 940 -0.04416026357748661
-3438 940  0.05348227050940345
-941 941  12.40148734436256
-982 941 -7.190162226521366
-1030 941 -5.832944089578626
-1141 941  1.675542971035782e-08
-1182 941  0.06312548830247269
-1230 941 -0.5157188483780635
-3439 941 -0.5049927482856362
-3440 941  0.5068995173965466
-3441 941  0.07028093491071742
-3442 941 -0.06419032347283443
-3443 941 -0.2880222797088077
-3444 941  0.2839175912746367
-3445 941  0.03627465553054866
-3446 941 -0.04106559201113318
-940 942 -14.36339287944117
-942 942  33.64983902905398
-1098 942 -13.85984777865994
-1140 942  0.1330724731424416
-1142 942  6.210807736162227e-09
-1298 942 -0.1518122571549573
-3431 942 -0.6945907046787592
-3432 942  0.6958367528966991
-3433 942  0.09551447417331302
-3434 942 -0.09207757282087774
-3447 942 -0.6262760807964288
-3448 942  0.6259992432861905
-3449 942  0.08541380566675676
-3450 942 -0.08734801081128452
-943 943  27.16004209586251
-944 943 -1.05251996975513
-953 943 -8.763955681316189
-1000 943 -9.162682012337861
-1085 943 -11.05951146044936
-1143 943 -1.724665529834191e-08
-1144 943  0.4289809294086663
-1153 943  0.02793990583526351
-1200 943  0.01754934032540434
-1285 943 -0.3809398851482149
-3451 943  0.7904980737004598
-3452 943 -0.7569472851527189
-3453 943 -0.01589242009037819
-3454 943  0.008839724875075651
-3455 943  0.389853698082942
-3456 943 -0.4123451764514424
-3457 943 -0.05097074908982863
-3458 943  0.04762898411627194
-3459 943  0.4010834794380356
-3460 943 -0.422257802508621
-3461 943 -0.05298921735679361
-3462 943  0.05159156407864095
-3463 943 -0.3527490893724305
-3464 943  0.3517173794087308
-3465 943  0.05011610583704667
-3466 943 -0.03676343318442028
-943 944 -1.05251996975513
-944 944  21.6787428129644
-959 944 -9.088036752899102
-1020 944 -10.41994192456261
-1143 944 -0.5468169676527013
-1144 944 -3.380368229777275e-08
-1159 944 -0.1049347854825309
-1220 944  1.100657149099352
-3451 944 -1.331243999373356
-3452 944  1.274742551217584
-3453 944  0.02676374501685801
-3454 944 -0.01488660262126709
-3467 944  0.5565316981856262
-3468 944 -0.5208443815767149
-3469 944 -0.07578302580183896
-3470 944  0.06567306281750092
-3471 944  0.3643483277204766
-3472 944 -0.3648201588099976
-3473 944 -0.05244155811268235
-3474 944  0.05453932493439867
-945 945  36.53926528372666
-952 945 -13.77354529336417
-964 945 -48.99930559793759
-1145 945  1.768079509190112e-09
-1152 945 -0.05125570147098663
-1164 945  0.04539053030042879
-3475 945 -0.4558445477281668
-3476 945  0.4280106222713313
-3477 945  0.06127856833043964
-3478 945 -0.05909584881736413
-3479 945 -0.374795164020496
-3480 945  0.2993620588617579
-3481 945  0.05493458803622932
-3482 945 -0.04272179200188883
-903 946 -20.90760235760421
-946 946  190.9954491375482
-947 946 -8.316362646922366
-948 946 -9.180640498511629
-949 946 -5.954288032698193
-950 946 -9.780438926304546
-951 946 -11.54246641257937
-972 946 -30.14346884520325
-1103 946  0.01772566600763419
-1146 946 -2.906373852695765e-08
-1147 946  2.273733985119764e-08
-1148 946  2.421638262140036e-08
-1149 946  1.231334647994187e-07
-1150 946  2.303789606306061e-08
-1151 946  1.743433537815253e-07
-1172 946 -0.09396492400282885
-3207 946 -1.586867364738098
-3208 946  1.832449394568724
-3209 946  0.2291891292739927
-3210 946 -0.2389235313648264
-3483 946  0.6596703231013034
-3484 946 -0.5146229196552202
-3485 946 -0.04607441940211257
-3486 946  0.03591805543649509
-3487 946  0.6964352973578405
-3488 946 -0.54293430207528
-3489 946 -0.05122786494849402
-3490 946  0.03987489423752329
-3491 946  0.4923049138822416
-3492 946 -0.3842289985371204
-3493 946 -0.02987711852991685
-3494 946  0.02326647438088502
-3495 946  0.715978620671633
-3496 946 -0.5575412851129458
-3497 946 -0.05602966115093699
-3498 946  0.04358377996383721
-3499 946  0.3935095664336485
-3500 946 -0.3068323659627205
-3501 946 -0.01947378168904091
-3502 946  0.01588572902323566
-3503 946 -0.9451250300389605
-3504 946  1.159453727321497
-3505 946  0.1285136378673143
-3506 946 -0.1618170065455398
-946 947 -8.316362646922366
-947 947  23.55048762236492
-1146 947 -8.043442764615882e-08
-1147 947 -6.438807310937822e-08
-3483 947 -1.868071070429303
-3484 947  1.457322172487891
-3485 947  0.130474703738788
-3486 947 -0.1017136559236855
-946 948 -9.180640498511629
-948 948  20.75444140780469
-1146 948 -6.474805136491568e-08
-1148 948 -5.474532364935225e-08
-3487 948 -1.574413538600893
-3488 948  1.227397748220299
-3489 948  0.1158095295205662
-3490 948 -0.09014415779328049
-946 949 -5.954288032698193
-949 949  15.89992816678108
-1146 949 -7.593639771152905e-08
-1149 949 -3.288068006757072e-07
-3491 949 -1.314617609442261
-3492 949  1.026019024575663
-3493 949  0.07978182835732844
-3494 949 -0.06212921315277567
-946 950 -9.780438926304546
-950 950  20.86078733092048
-1146 950 -6.107598557458793e-08
-1150 950 -4.913775505932527e-08
-3495 950 -1.527117243615793
-3496 950  1.189184824715845
-3497 950  0.1195061685183861
-3498 950 -0.0929602365253548
-946 951 -11.54246641257937
-951 951  67.93165662379741
-1146 951 -1.652445292266158e-07
-1151 951 -1.026074838983426e-06
-3499 951 -2.315948119036116
-3500 951  1.805821005194248
-3501 951  0.1146103472959883
-3502 951 -0.09349334143083275
-932 952 -6.505758821695456
-945 952 -13.77354529336417
-952 952  32.66632580305982
-964 952 -38.73670699458928
-1132 952 -0.01738224984325668
-1145 952  0.01703428079016718
-1152 952  6.841825883618924e-09
-1164 952 -0.004704297928304631
-3383 952  0.3736626954944669
-3384 952 -0.371658867524124
-3385 952 -0.05265194140229647
-3386 952  0.04848274704887379
-3475 952  0.346250946841912
-3476 952 -0.3251088204486319
-3477 952 -0.04654604823349623
-3478 952  0.04488809553479817
-3507 952 -0.2375980214699439
-3508 952  0.2021244230605367
-3509 952  0.03516906518592851
-3510 952 -0.02840317316188208
-943 953 -8.763955681316189
-953 953  38.11359574347498
-954 953 -0.8988235757749945
-979 953 -6.848370394905818
-1000 953 -18.59353340932061
-1010 953 -7.868079227295585
-1026 953 -6.201999031867833
-1143 953 -0.02992823809043116
-1153 953 -9.502885811585315e-09
-1154 953  0.08766147326692564
-1179 953 -0.01434635432205444
-1200 953 -0.008383946400232767
-1210 953 -0.03090332412050151
-1226 953 -0.01685058071239737
-3455 953 -0.3396256574061067
-3456 953  0.3592193746505485
-3457 953  0.04440377057660186
-3458 953 -0.04149255251807921
-3511 953  0.5039010794292205
-3512 953 -0.4287950305689273
-3513 953 -0.01971558690748688
-3514 953  0.01488552126604759
-3515 953  0.2953161655653164
-3516 953 -0.2698288575970699
-3517 953 -0.04126530553706707
-3518 953  0.03303831593886034
-3519 953 -0.4316172727288611
-3520 953  0.4350354735110427
-3521 953  0.05408041747556818
-3522 953 -0.05165212370154924
-3523 953 -0.3071829453370418
-3524 953  0.3154316116230693
-3525 953  0.03634845233500034
-3526 953 -0.04190662314365428
-3527 953 -0.3757659547701289
-3528 953  0.3861958328991782
-3529 953  0.04350500092493118
-3530 953 -0.05038154112818095
-953 954 -0.8988235757749945
-954 954  30.68956309077988
-1001 954 -16.8657683503129
-1153 954 -0.2507418162491978
-1154 954 -1.802922966476217e-08
-1201 954  0.4246988966920202
-3511 954 -1.221909365821883
-3512 954  1.039784761849569
-3513 954  0.0478083125406707
-3514 954 -0.03609588983362996
-3531 954 -0.7047014242829946
-3532 954  0.6326150413526075
-3533 954  0.08773798910582717
-3534 954 -0.07880583325813648
-901 955 -8.12408466218251
-955 955  38.14142046839715
-1057 955 -15.07976193972747
-1101 955  0.009158641086069189
-1155 955  3.591110811496101e-09
-1257 955 -0.0254465288909706
-3195 955 -1.125276871701074
-3196 955  1.051338796050469
-3197 955  0.1517955769952752
-3198 955 -0.1368037471308435
-3535 955 -0.8318326858768657
-3536 955  0.7717420244337035
-3537 955  0.1071151913100275
-3538 955 -0.103164844124989
-934 956 -2.476272106586077
-956 956  17.34714737161059
-993 956 -5.773304655979821
-1075 956 -4.711059498030628
-1134 956 -0.01449250614827455
-1156 956 -1.694234696980601e-09
-1193 956  0.08504754845844564
-1275 956  0.05001632315880056
-3399 956 -0.6725972680232485
-3400 956  0.6485153191009497
-3401 956  0.08327008401522945
-3402 956 -0.08659249488156374
-3539 956  0.377331040401673
-3540 956 -0.3524048448936835
-3541 956 -0.05101371158298227
-3542 956  0.0519531882185968
-3543 956  0.3360771639281279
-3544 956 -0.3359593223602985
-3545 956 -0.0477704058819864
-3546 956  0.04690068187393769
-957 957  19.29898377752927
-1017 957 -13.12797495416852
-1037 957 -13.92756187672575
-1157 957  1.653187683969826e-08
-1217 957  0.03597745704809928
-1237 957 -0.07867953448799736
-3547 957 -0.5244507842233862
-3548 957  0.5237947129566466
-3549 957  0.06750021622713791
-3550 957 -0.07126566929859467
-3551 957 -0.4599292057141804
-3552 957  0.4596955190523657
-3553 957  0.06526541533640681
-3554 957 -0.06150550344214045
-958 958  32.31857328996801
-995 958 -14.5168341615374
-1032 958 -15.91728652624134
-1158 958  4.097325129182039e-08
-1195 958  0.02493853014115072
-1232 958 -0.03577015867815656
-3555 958 -0.5159715744957702
-3556 958  0.5154078088312681
-3557 958  0.07280632131793441
-3558 958 -0.07622870906823133
-3559 958  0.4404116309763916
-3560 958 -0.5079389600530799
-3561 958 -0.05761232611155925
-3562 958  0.06996802127804672
-944 959 -9.088036752899102
-959 959  15.38951040896466
-1001 959 -7.201942460989617
-1144 959  0.1506769620921335
-1159 959 -1.332591936797911e-08
-1201 959 -0.1571053939386667
-3467 959 -0.455735424632883
-3468 959  0.4265116186182513
-3469 959  0.06205757831290015
-3470 959 -0.05377868191093232
-3563 959 -0.4764363681173263
-3564 959  0.425337444217761
-3565 959  0.05983512144077003
-3566 959 -0.05429826449104555
-960 960  41.10746761295785
-1057 960 -13.87451822624953
-1058 960 -31.06353440087113
-1160 960  3.964404998724191e-09
-1257 960  0.08784149961682572
-1258 960 -0.1963524920199438
-3567 960  0.6869492137454632
-3568 960 -0.7334879158217498
-3569 960 -0.1210230804289373
-3570 960  0.1300666015536589
-3571 960  0.5990913842872198
-3572 960 -0.6450813979793052
-3573 960 -0.08258340338438294
-3574 960  0.09355727171015052
-961 961  32.30342970287563
-964 961 -35.50410878617848
-1082 961 -14.41765509210598
-1161 961  3.124692560252385e-09
-1164 961 -0.6070648610656701
-1282 961  0.06933509766831331
-3575 961 -0.3915200345225039
-3576 961  0.3156322845396378
-3577 961  0.0579584615013995
-3578 961 -0.04478280109740496
-3579 961 -0.5414818109862019
-3580 961  0.5430535951303894
-3581 961  0.1050418924830158
-3582 961 -0.1048558318511552
-903 962 -7.74755303268057
-923 962 -6.993239083401045
-962 962  34.94783322085215
-963 962 -2.106648198861156
-1103 962 -0.008284771852577529
-1123 962  0.05713966978684382
-1162 962 -1.126953350777393e-08
-1163 962  1.889614750227864e-07
-3211 962  1.262649342413203
-3212 962 -1.269291682288203
-3213 962 -0.1737444943984078
-3214 962  0.1559287939226689
-3331 962  0.8386535165213083
-3332 962 -0.8410685054311342
-3333 962 -0.1106532901568742
-3334 962  0.1027874024856677
-3583 962  0.9725210198756302
-3584 962 -0.6275235799788761
-3585 962  0.013588288313204
-3586 962  0.02534124008545374
-962 963 -2.106648198861156
-963 963  4.37032937863825
-1162 963 -5.457841986689083e-08
-1163 963 -3.92008150940093e-07
-3583 963 -2.017535229907724
-3584 963  1.301823718285398
-3585 963 -0.02818946822305024
-3586 963 -0.05257145459795929
-945 964 -48.99930559793759
-952 964 -38.73670699458928
-961 964 -35.50410878617848
-964 964  659.1059785088241
-965 964 -96.92585106702003
-966 964 -25.68677687081435
-967 964 -72.88325048851985
-968 964 -34.32300893287451
-969 964 -66.56481089993019
-970 964 -75.29053380965105
-971 964 -57.22136371310454
-1056 964 -81.80499628897144
-1145 964 -0.05970019195980614
-1152 964  0.01861759887113656
-1161 964  0.8923905638513359
-1164 964 -3.875374576800539e-09
-1165 964  6.066692409545382e-08
-1166 964  4.697380394380613e-07
-1167 964  1.510357943157459e-07
-1168 964  3.067324171046604e-07
-1169 964  1.042574984955191e-07
-1170 964  7.332883339428165e-08
-1171 964  3.852382979979696e-08
-1256 964 -0.4280655451173339
-3479 964  0.9977461831798213
-3480 964 -0.796934913498078
-3481 964 -0.1462419497352594
-3482 964  0.1137301358193506
-3507 964  0.8327117855650359
-3508 964 -0.7083871666596919
-3509 964 -0.1232573187539455
-3510 964  0.0995448400328353
-3575 964  0.7615674553221563
-3576 964 -0.6139539603574344
-3577 964 -0.1127382359726241
-3578 964  0.08710952407721209
-3587 964  0.4637568782856321
-3588 964 -0.3613580899024239
-3589 964 -0.06341054135910112
-3590 964  0.05024109366902035
-3591 964  0.5198404247302832
-3592 964 -0.4107864203962353
-3593 964 -0.009769119110653198
-3594 964  0.01207874962159066
-3595 964  0.5553092879536204
-3596 964 -0.4374549805006661
-3597 964 -0.02421187475492601
-3598 964  0.02101357146535312
-3599 964  0.5186307214601972
-3600 964 -0.4089115281925863
-3601 964 -0.01510224391735361
-3602 964  0.01521306273357519
-3603 964  0.649415218994155
-3604 964 -0.5116989493650622
-3605 964 -0.02666374087039809
-3606 964  0.02305717637758508
-3607 964  0.6812081861155168
-3608 964 -0.5365945143548635
-3609 964 -0.03084309267107718
-3610 964  0.02625275857880516
-3611 964  0.7223443830121247
-3612 964 -0.5685854447490166
-3613 964 -0.03084162623578303
-3614 964  0.02765977036275495
-3615 964 -0.8389463619039152
-3616 964  0.9351913725744683
-3617 964  0.1326396128473568
-3618 964 -0.1454153226254008
-964 965 -96.92585106702003
-965 965  234.6790878010875
-1164 965 -3.308903712140321e-08
-1165 965 -1.468892856237858e-07
-3587 965 -1.12285875271712
-3588 965  0.8749284659928406
-3589 965  0.1535310519658627
-3590 965 -0.1216448842352145
-964 966 -25.68677687081435
-966 966  162.8774429316563
-1164 966 -2.257490022694597e-07
-1166 966 -2.978564379918502e-06
-3591 966 -3.296259100425206
-3592 966  2.604757945218924
-3593 966  0.06194506283026378
-3594 966 -0.07659021204936765
-964 967 -72.88325048851985
-967 967  265.0857129998892
-1164 967 -6.077346655086746e-08
-1167 967 -5.493372374765215e-07
-3595 967 -2.019730940286236
-3596 967  1.591079742886095
-3597 967  0.08806168674949939
-3598 967 -0.07642904841532995
-964 968 -34.32300893287451
-968 968  165.3329967568115
-1164 968 -1.333883210691278e-07
-1168 968 -1.477521131554305e-06
-3599 968 -2.498229903851033
-3600 968  1.969715571194801
-3601 968  0.07274709308264668
-3602 968 -0.07328090426879212
-964 969 -66.56481089993019
-969 969  203.5235180873758
-1164 969 -4.7262267141579e-08
-1169 969 -3.187709719520981e-07
-3603 969 -1.985602696417218
-3604 969  1.564531880214944
-3605 969  0.08152503085904574
-3606 969 -0.07049787292944913
-964 970 -75.29053380965105
-970 970  218.1343768118298
-1164 970 -4.1833332076191e-08
-1170 970 -2.124470280806534e-07
-3607 970 -1.973620237480235
-3608 970  1.554640438029102
-3609 970  0.08935968933261426
-3610 970 -0.07606041248016723
-964 971 -57.22136371310454
-971 971  248.537576753649
-1164 971 -9.238899598140726e-08
-1171 971 -1.67326591443695e-07
-3611 971 -3.13745961756631
-3612 971  2.469616866953718
-3613 971  0.1339587586344098
-3614 971 -0.1201385579859136
-946 972 -30.14346884520325
-972 972  21.87834180608649
-1088 972 -10.63078006025426
-1146 972  0.2145362022621393
-1172 972 -4.288324972900881e-10
-1288 972 -0.1110170413597872
-3503 972  0.361952958820202
-3504 972 -0.4440340630930363
-3505 972 -0.0492166538779582
-3506 972  0.06197078951995561
-3619 972 -0.5844299652988821
-3620 972  0.5839521479322916
-3621 972  0.07265746990450016
-3622 972 -0.07373932525269625
-973 973  501.9756250912903
-974 973 -30.40657760133354
-975 973 -12.37263278910001
-976 973 -67.25923254959042
-977 973 -43.88173563577028
-1006 973 -76.30268879794829
-1055 973 -88.2008696096471
-1173 973  4.923127752132217e-07
-1174 973 -2.21697098447271e-06
-1175 973 -7.175218402766248e-06
-1176 973 -1.474047813920798e-06
-1177 973 -1.809620373771637e-06
-1206 973  0.1608647615085388
-1255 973 -0.2169860792325076
-3623 973  0.3551605612820263
-3624 973 -0.2795935336492826
-3625 973 -0.02645413337217794
-3626 973  0.02143184395432529
-3627 973  0.2921170861291708
-3628 973 -0.2187915238870602
-3629 973 -0.01571989566011668
-3630 973  0.01298076963505745
-3631 973  0.4746237402281227
-3632 973 -0.3961258208756168
-3633 973 -0.03612576647010336
-3634 973  0.02872026982551443
-3635 973  0.4514386490354154
-3636 973 -0.374371261515384
-3637 973 -0.02777304520351097
-3638 973  0.02216543630854258
-3639 973  1.238869862237301
-3640 973 -1.017068720550362
-3641 973 -0.189558782352581
-3642 973  0.1551422975518752
-3643 973 -0.8761486615418773
-3644 973  1.07765260038838
-3645 973  0.1233417700929442
-3646 973 -0.1324205549928908
-973 974 -30.40657760133354
-974 974  127.5596980972658
-1173 974  2.078042610764719e-06
-1174 974  9.300491833896807e-06
-3623 974 -1.489946307075277
-3624 974  1.172932465922301
-3625 974  0.1109786463408995
-3626 974 -0.08990946696979601
-973 975 -12.37263278910001
-975 975  78.88474093831816
-1173 975  3.157835434829115e-06
-1175 975  4.574734240525569e-05
-3627 975 -1.862462882642613
-3628 975  1.39495808915565
-3629 975  0.1002259832656134
-3630 975 -0.08276202516527353
-973 976 -67.25923254959042
-976 976  150.2196223428273
-1173 976  1.106372518294707e-06
-1176 976  3.292199911797411e-06
-3631 976 -1.060044755936421
-3632 976  0.8847241794276495
-3633 976  0.08068481631873459
-3634 976 -0.06414506658049314
-973 977 -43.88173563577028
-977 977  119.6044007221758
-1173 977  1.350166642877237e-06
-1177 977  4.932317047678936e-06
-3635 977 -1.23044462799305
-3636 977  1.020389168253177
-3637 977  0.07569842401993218
-3638 977 -0.06041428240856869
-978 978  18.42568916669017
-998 978 -5.091800367100356
-1041 978 -7.549143391656187
-1178 978 -1.812938804635067e-08
-1198 978  0.02020512578173389
-1241 978 -0.07632676179310438
-3647 978 -0.6175792065318458
-3648 978  0.5343612663363052
-3649 978  0.07945126249495234
-3650 978 -0.07522703272150684
-3651 978 -0.4393579796250129
-3652 978  0.4424042011090423
-3653 978  0.06144026122959138
-3654 978 -0.05571639966019448
-953 979 -6.848370394905818
-979 979  25.87594842746113
-980 979 -2.606267858998225
-1076 979 -10.4263159895623
-1153 979  0.0247054716909727
-1179 979 -1.45358751424407e-08
-1180 979  0.08044888079890554
-1276 979 -0.2527625769661221
-3515 979 -0.5584106677342894
-3516 979  0.5102169475088717
-3517 979  0.07802819319119383
-3518 979 -0.06247185293402092
-3655 979  1.458939452040597
-3656 979 -1.369672770763905
-3657 979 -0.03732647943580095
-3658 979  0.01230778581599272
-3659 979 -0.4069131581499621
-3660 979  0.4084263136180201
-3661 979  0.05514218346051624
-3662 979 -0.05411446430890252
-979 980 -2.606267858998225
-980 980  30.88690458402179
-1093 980 -50.78984921359537
-1179 980 -0.1494464850449572
-1180 980 -6.531306342982823e-09
-1293 980  0.191146221601152
-3655 980 -1.713575067511206
-3656 980  1.608728249378172
-3657 980  0.04384124675612543
-3658 980 -0.01445592199255047
-3663 980 -0.6095818027697771
-3664 980  0.5163436862281057
-3665 980  0.08236427128493604
-3666 980 -0.07686249969756757
-934 981 -2.488153159316454
-981 981  21.72145690088892
-1044 981 -10.74205319605701
-1084 981 -8.173102293481225
-1134 981  0.01332766609263919
-1181 981  6.922867834546764e-11
-1244 981  0.1277397341432216
-1284 981 -0.2426662636331511
-3403 981 -0.7881853230597796
-3404 981  0.7571891954848207
-3405 981  0.1058760358195188
-3406 981 -0.1074855510138141
-3667 981 -0.5279378228544918
-3668 981  0.5324544811476037
-3669 981  0.07447310375085667
-3670 981 -0.06679631775422318
-3671 981 -0.4465921770040591
-3672 981  0.4419162630302035
-3673 981  0.05896090325177052
-3674 981 -0.06482153965237164
-941 982 -7.190162226521366
-982 982  14.90110246263578
-1011 982 -6.815356665004987
-1141 982 -0.447186593064992
-1182 982  1.077567965412918e-09
-1211 982  0.332620942724838
-3439 982  0.4919493884250233
-3440 982 -0.4938069079659762
-3441 982 -0.06846566225879751
-3442 982  0.06253236404377792
-3675 982 -0.5594355361631519
-3676 982  0.5029955255009219
-3677 982  0.07604924015919289
-3678 982 -0.06227853779439045
-937 983 -4.462659499865555
-983 983  21.85723317143601
-1018 983 -14.87328100877918
-1137 983 -0.03261242339132064
-1183 983  1.371140667671966e-08
-1218 983  0.01520725110591731
-3411 983  0.508985204526428
-3412 983 -0.5443743324327373
-3413 983 -0.07890804660218982
-3414 983  0.08403462925030836
-3679 983 -0.5603093540142886
-3680 983  0.5635642781039163
-3681 983  0.08560093037836217
-3682 983 -0.08418167880156238
-905 984 -6.201273818830178
-984 984  34.35823096932663
-999 984 -5.930212014505986
-1040 984 -12.33309367358089
-1091 984 -5.509137579435532
-1105 984  0.03052418138322882
-1184 984  4.56977637675049e-09
-1199 984  0.0299746246289474
-1240 984 -0.1517471607395
-1291 984  0.03333223564354496
-3219 984  0.5301531225102935
-3220 984 -0.5251447644432619
-3221 984 -0.07868915532003624
-3222 984  0.07692669227529296
-3683 984 -0.5222444421836163
-3684 984  0.5068312762468304
-3685 984  0.06925873775509218
-3686 984 -0.06572612318307269
-3687 984  0.4708669091657322
-3688 984 -0.4207481970090967
-3689 984 -0.07142707696284654
-3690 984  0.06335201956550504
-3691 984 -0.473560945926676
-3692 984  0.4497976392777369
-3693 984  0.07049701051212577
-3694 984 -0.0653373636694603
-985 985  40.79460611168166
-986 985 -2.389320265572053
-1047 985 -169.6380530198015
-1185 985 -4.681742493684027e-10
-1186 985 -0.06849071274544034
-1247 985  0.301148117794046
-3695 985  1.030575023275442
-3696 985 -1.145483220005211
-3697 985 -0.02747275796180583
-3698 985  0.01404202717338338
-3699 985  0.2717664094081579
-3700 985 -0.3291856606887862
-3701 985 -0.0345488517344297
-3702 985  0.04345138635311043
-985 986 -2.389320265572053
-986 986  51.57946710948104
-1048 986 -14.33758586199391
-1074 986 -21.79919967537484
-1092 986 -13.33511322562891
-1185 986  0.04645384761318282
-1186 986  4.608089770874457e-09
-1248 986 -0.07163724688851204
-1274 986  0.003935685128377539
-1292 986  0.0005819686631925447
-3695 986 -1.032621098489623
-3696 986  1.147757430782475
-3697 986  0.02752730161739742
-3698 986 -0.01406990582666666
-3703 986 -0.4083268764891531
-3704 986  0.4085753213662854
-3705 986  0.05989119208712813
-3706 986 -0.05737989255880681
-3707 986  0.5500829381246285
-3708 986 -0.4922431501793627
-3709 986 -0.0811542494651884
-3710 986  0.070036883103212
-3711 986 -0.4948905454048655
-3712 986  0.5176954413573776
-3713 986  0.06634446170697339
-3714 986 -0.0706577625688625
-987 987  93.64924459438463
-988 987 -2.961498845389432
-989 987 -3.922227251349367
-990 987 -4.914519484363213
-993 987 -15.03422373491804
-1086 987 -23.02209569566427
-1187 987 -4.496077599114301e-08
-1188 987  1.11954899111727e-07
-1189 987  2.108572429437849e-07
-1190 987  1.323586146240174e-07
-1193 987 -0.1969994537259558
-1286 987  0.1010252017704575
-3715 987  0.3582392973900931
-3716 987 -0.3225737686846976
-3717 987 -0.01555667362303796
-3718 987  0.01198742445782478
-3719 987  0.3692986362671331
-3720 987 -0.3307276363338796
-3721 987 -0.03119596209267699
-3722 987  0.02344637301136434
-3723 987  0.4209233628580787
-3724 987 -0.3794252839956596
-3725 987 -0.03967666278728941
-3726 987  0.02969939062660183
-3727 987 -0.6561496372500165
-3728 987  0.7717184399373285
-3729 987  0.09217929587242674
-3730 987 -0.1144339936435742
-3731 987  0.7655516692341778
-3732 987 -0.7325809744992411
-3733 987 -0.09262370563918518
-3734 987  0.1187456445424696
-987 988 -2.961498845389432
-988 988  43.01152805242423
-1187 988 -6.398415768466004e-07
-1188 988 -1.625983356512428e-06
-3715 988 -5.202909164506705
-3716 988  4.684918794633313
-3717 988  0.2259382495226564
-3718 988 -0.1741000527436083
-987 989 -3.922227251349367
-989 989  9.208778320999443
-1187 989 -1.044842086184161e-07
-1189 989 -4.95059458760494e-07
-3719 989 -0.8670555746636077
-3720 989  0.7764968852232117
-3721 989  0.07324325135033663
-3722 989 -0.05504842538990734
-987 990 -4.914519484363213
-990 990  9.521180709967734
-1187 990 -8.621436850542175e-08
-1190 990 -2.564258991810675e-07
-3723 990 -0.8154789505427221
-3724 990  0.735082344446817
-3725 990  0.07686787236308722
-3726 990 -0.05753833129026276
-909 991 -7.289318276733248
-925 991 -8.629261156606795
-991 991  97.14497503871327
-992 991 -22.79820814800702
-1089 991 -27.15561296572336
-1109 991 -0.1725061393317366
-1125 991 -0.1090224204334647
-1191 991 -1.026919055346909e-08
-1192 991  4.564837356724194e-08
-1289 991  0.1077273281804682
-3263 991  0.8837282746556666
-3264 991 -0.9082411774006912
-3265 991 -0.1140293348347746
-3266 991  0.1250986076557095
-3343 991  0.5914570612494344
-3344 991 -0.5958231660939404
-3345 991 -0.07517102493073879
-3346 991  0.07472243017192762
-3735 991  0.6367155082099926
-3736 991 -0.5615617669201883
-3737 991 -0.02450778911447643
-3738 991  0.02367803215021713
-3739 991 -0.7678220254674442
-3740 991  0.8596786811959881
-3741 991  0.1159096366071837
-3742 991 -0.134708500073735
-991 992 -22.79820814800702
-992 992  119.7049698625637
-1191 992 -1.02579299221528e-07
-1192 992 -2.396823412520455e-07
-3735 992 -3.343157745435155
-3736 992  2.948553233605713
-3737 992  0.1286813403240229
-3738 992 -0.1243245932586171
-956 993 -5.773304655979821
-987 993 -15.03422373491804
-993 993  9.969116450250086
-1156 993 -0.08343546006029678
-1187 993  0.09349931199942807
-1193 993 -6.511629457861545e-09
-3539 993 -0.3067285442619483
-3540 993  0.2864662948217338
-3541 993  0.04146852449400996
-3542 993 -0.04223221544427847
-3727 993  0.2302667811178378
-3728 993 -0.2708240788464196
-3729 993 -0.03234906878134064
-3730 993  0.04015905194614094
-994 994  21.03102825235615
-1020 994 -8.568402243341714
-1041 994 -7.152105470943633
-1086 994 -8.292682942672654
-1194 994 -2.743272645155947e-08
-1220 994 -0.8499437985630234
-1241 994  0.04822162698070831
-1286 994  0.4053180506262709
-3743 994  0.3903679836850418
-3744 994 -0.3911547364780136
-3745 994 -0.04713547548536468
-3746 994  0.05251609088198397
-3747 994 -0.3268708215805732
-3748 994  0.3510617949962554
-3749 994  0.04078336525166011
-3750 994 -0.05307425527674776
-3751 994 -0.4480493340962357
-3752 994  0.4295324887597057
-3753 994  0.06606790111415485
-3754 994 -0.06042534814756716
-958 995 -14.5168341615374
-995 995  27.40363140673261
-1098 995 -14.30197427870374
-1158 995 -0.09853840405478623
-1195 995  1.223172137188033e-08
-1298 995  0.1249966020337667
-3555 995  0.5043002560591464
-3556 995 -0.5037492428192337
-3557 995 -0.07115943648492561
-3558 995  0.07450440954956594
-3755 995 -0.452566586299552
-3756 995  0.4518410984309186
-3757 995  0.06444976742378884
-3758 995 -0.06562451486991176
-996 996  29.54453213187533
-997 996 -4.558549752153652
-1021 996 -36.15688796617417
-1196 996  9.116542722120702e-09
-1197 996  0.230729451368615
-1221 996 -0.2752007773679826
-3759 996  1.610329947831114
-3760 996 -1.599279391917784
-3761 996 -0.03806768353539278
-3762 996  0.02025137186317635
-3763 996 -0.5780567342368751
-3764 996  0.4789577056707151
-3765 996  0.0800375428343096
-3766 996 -0.06052909694974323
-996 997 -4.558549752153652
-997 997  20.92833324870006
-1040 997 -10.1426793564074
-1196 997 -0.1274747229578279
-1197 997  2.309806734768216e-08
-1240 997  0.2907198369401517
-3759 997 -1.621718887703775
-3760 997  1.610590177548189
-3761 997  0.03833691442155994
-3762 997 -0.0203945981981267
-3767 997 -0.5676654342824057
-3768 997  0.5657348666913733
-3769 997  0.07297260077582075
-3770 997 -0.08114410763163431
-939 998 -9.907975784566792
-978 998 -5.091800367100356
-998 998  37.7044366726637
-1072 998 -7.492541902601887
-1077 998 -7.355302891262144
-1082 998 -9.332991521575286
-1139 998 -0.07397943535962337
-1178 998 -0.03046147444904562
-1198 998 -1.121049961194664e-09
-1272 998  0.3056678326553257
-1277 998  0.08491056660704932
-1282 998 -0.03690628655442891
-3419 998  0.5428701937830359
-3420 998 -0.5056400055975701
-3421 998 -0.08034074986653671
-3422 998  0.0717417324410374
-3647 998  0.3482442751733312
-3648 998 -0.3013188428428641
-3649 998 -0.04480145546761378
-3650 998  0.04241947139162745
-3771 998 -0.365279863502515
-3772 998  0.3974234470529114
-3773 998  0.04813127346458962
-3774 998 -0.04568621642826008
-3775 998 -0.4082845072120672
-3776 998  0.4422425294985197
-3777 998  0.0550827407381792
-3778 998 -0.05462691338373406
-3779 998 -0.4687418217907903
-3780 998  0.5317541664847034
-3781 998  0.06183666269902745
-3782 998 -0.07543502736476798
-905 999 -5.02376469529283
-984 999 -5.930212014505986
-999 999  34.69934723091026
-1008 999 -13.41847869727119
-1015 999 -8.461387995206527
-1039 999 -10.3806402044065
-1105 999 -0.0186874934802756
-1184 999 -0.009006681541102603
-1199 999  8.747625068750153e-09
-1208 999  0.003480874327946859
-1215 999  0.02977237990573015
-1239 999  0.001400699084717696
-3223 999  0.3534179161138158
-3224 999 -0.3400290471461453
-3225 999 -0.05126169311480969
-3226 999  0.04749359159231554
-3683 999  0.4052407270632351
-3684 999 -0.393280728897523
-3685 999 -0.05374200082628659
-3686 999  0.05100083369846863
-3783 999  0.5183688055136251
-3784 999 -0.4507276981705915
-3785 999 -0.08159194337121471
-3786 999  0.06983187835467947
-3787 999 -0.2893683449568387
-3788 999  0.287922613699735
-3789 999  0.03207522870468611
-3790 999 -0.03256016366442265
-3791 999 -0.4394391630156717
-3792 999  0.4523778897162504
-3793 999  0.06534871961755113
-3794 999 -0.06846325693408806
-943 1000 -9.162682012337861
-953 1000 -18.59353340932061
-1000 1000  42.95999314909423
-1001 1000 -1.117305846410916
-1002 1000 -1.782278958330569
-1003 1000 -1.530468730548231
-1026 1000 -9.164925564623724
-1143 1000 -0.02941992453676529
-1153 1000  0.01312118441246557
-1200 1000 -9.116374766193758e-09
-1201 1000  0.06631071370625427
-1202 1000  8.321106473307971e-08
-1203 1000  8.643129645324876e-08
-1226 1000 -0.0187976386654834
-3459 1000 -0.4083493179864217
-3460 1000  0.4299072250755208
-3461 1000  0.05394914494757658
-3462 1000 -0.0525261724439103
-3519 1000  0.5044255115610694
-3520 1000 -0.5084203185048906
-3521 1000 -0.0632030828564326
-3522 1000  0.06036516740083515
-3795 1000  0.679218643726438
-3796 1000 -0.646543619474914
-3797 1000 -0.02138310187554149
-3798 1000  0.01366621132412578
-3799 1000  0.6456696194137627
-3800 1000 -0.3733120922080947
-3801 1000  0.008696076442513374
-3802 1000  0.0188568404948961
-3803 1000  0.6490872503986104
-3804 1000 -0.3748931907170733
-3805 1000  0.009220007761798222
-3806 1000  0.01698943114939008
-3807 1000 -0.4983302017793433
-3808 1000  0.511463018605014
-3809 1000  0.05434191170091966
-3810 1000 -0.06524249632027486
-954 1001 -16.8657683503129
-959 1001 -7.201942460989617
-1000 1001 -1.117305846410916
-1001 1001  23.38960436680297
-1004 1001 -6.847692983685751
-1154 1001 -0.1257509427677388
-1159 1001  0.07786890260343965
-1200 1001 -0.03588453104645329
-1201 1001 -2.100605200894279e-08
-1204 1001  0.05848026446275859
-3531 1001  0.4168827888937753
-3532 1001 -0.3742383847223765
-3533 1001 -0.05190348185770162
-3534 1001  0.04661945388172894
-3563 1001  0.4317442185526948
-3564 1001 -0.3854386330763389
-3565 1001 -0.05422228334611339
-3566 1001  0.04920481168154859
-3795 1001 -0.8337078536373765
-3796 1001  0.793600850409622
-3797 1001  0.02624671765627676
-3798 1001 -0.01677460978968741
-3811 1001 -0.4245578636109541
-3812 1001  0.4806889285303752
-3813 1001  0.04834484106772149
-3814 1001 -0.06044152600849758
-1000 1002 -1.782278958330569
-1002 1002  6.986598138490368
-1200 1002 -6.50146078085001e-08
-1202 1002 -3.261903830376056e-07
-3799 1002 -2.531048253677342
-3800 1002  1.463396899358267
-3801 1002 -0.03408893408002122
-3802 1002 -0.07391949654966468
-1000 1003 -1.530468730548231
-1003 1003  4.709345466229321
-1200 1003 -4.92795877993224e-08
-1203 1003 -2.659544074967712e-07
-3803 1003 -1.997280942097913
-3804 1003  1.153569146030313
-3805 1003 -0.02837052457481739
-3806 1003 -0.0522775128165329
-1001 1004 -6.847692983685751
-1004 1004  18.29460146461613
-1028 1004 -13.59416149407363
-1201 1004 -0.2362664140746497
-1204 1004 -5.676911468688317e-09
-1228 1004  0.0892421864327288
-3811 1004  0.5272035599677525
-3812 1004 -0.5969054776253586
-3813 1004 -0.06003321219915891
-3814 1004  0.07505452239311973
-3815 1004  0.735678771801283
-3816 1004 -0.6821644402533772
-3817 1004 -0.07310899636166125
-3818 1004  0.07557928868904694
-1005 1005  12.97100574956463
-1011 1005 -4.223583073177172
-1027 1005 -6.595142687039579
-1090 1005 -6.636577325017829
-1205 1005  1.270442140377526e-09
-1211 1005 -0.07579910748829327
-1227 1005  0.3216355049551655
-1290 1005 -0.1235888174409047
-3819 1005 -0.2843296651791673
-3820 1005  0.2720491980250402
-3821 1005  0.04056671199071347
-3822 1005 -0.03643788065469865
-3823 1005 -0.2824496877119697
-3824 1005  0.3027390358618913
-3825 1005  0.03972042410566178
-3826 1005 -0.03942547846731512
-3827 1005  0.361444564712929
-3828 1005 -0.362096525989742
-3829 1005 -0.05221684139379965
-3830 1005  0.05186623003949892
-973 1006 -76.30268879794829
-1006 1006  34.14274349805327
-1096 1006 -8.964500315464161
-1173 1006 -0.5102615425771657
-1206 1006  1.715180802247529e-08
-1296 1006  0.3247141990426317
-3639 1006 -0.3843162680834073
-3640 1006  0.315510181481362
-3641 1006  0.05880401649662903
-3642 1006 -0.0481274996143231
-3831 1006 -0.438438003559472
-3832 1006  0.4387320075683089
-3833 1006  0.06917527995921767
-3834 1006 -0.07005237522168833
-915 1007 -2.93383316576035
-1007 1007  27.62454972865043
-1084 1007 -11.96605332802606
-1115 1007 -0.007809535337338039
-1207 1007  5.579247508435969e-09
-1284 1007  0.03479583401598968
-3303 1007 -1.272612899536508
-3304 1007  1.068221512166234
-3305 1007  0.1530658956087559
-3306 1007 -0.1374700328859463
-3835 1007  0.685622022908018
-3836 1007 -0.7361807383336668
-3837 1007 -0.08242829338295353
-3838 1007  0.09247066617765989
-999 1008 -13.41847869727119
-1008 1008  23.49395472825631
-1091 1008 -9.148268563932652
-1199 1008 -0.01293493383306837
-1208 1008  2.945946889210127e-09
-1291 1008  0.01209611832050858
-3783 1008 -0.5784495929786024
-3784 1008  0.5029686408166937
-3785 1008  0.09104873968379988
-3786 1008 -0.07792564132243848
-3839 1008 -0.5364511862384768
-3840 1008  0.4560709551496668
-3841 1008  0.08202078818634363
-3842 1008 -0.06866652243426218
-1009 1009  17.34359327006798
-1061 1009 -23.5793179741662
-1073 1009 -8.20501197638737
-1209 1009  1.582770026153923e-10
-1261 1009 -0.0715165068859599
-1273 1009  0.01064840327673766
-3843 1009  0.2358476052144947
-3844 1009 -0.3015768589062842
-3845 1009 -0.03869333526436093
-3846 1009  0.04790427095168613
-3847 1009 -0.4668956305239359
-3848 1009  0.4686479373193943
-3849 1009  0.06831378599514144
-3850 1009 -0.06731177696858127
-953 1010 -7.868079227295585
-1010 1010  58.76200518316558
-1011 1010 -1.57272281540292
-1012 1010 -4.664019165692916
-1013 1010 -3.154571903054211
-1026 1010 -27.0139401100094
-1153 1010  0.04849641452178344
-1210 1010 -1.220871292950676e-08
-1211 1010 -0.2105201766865411
-1212 1010  2.310851920234169e-08
-1213 1010  5.20346455368248e-08
-1226 1010  0.03354622133540253
-3523 1010  0.5073715942942281
-3524 1010 -0.520995849897842
-3525 1010 -0.06003644567930714
-3526 1010  0.06921683159380579
-3851 1010  0.8859027431941622
-3852 1010 -0.8761333273285709
-3853 1010 -0.0356622993406167
-3854 1010 -0.007089990482464425
-3855 1010  0.4334726720192261
-3856 1010 -0.2894253737565123
-3857 1010 -0.030243626830779
-3858 1010  0.03061910767275307
-3859 1010  1.10546489936218
-3860 1010 -0.689967708515946
-3861 1010 -0.006504078012125597
-3862 1010  0.01318357808761143
-3863 1010  0.6066026005909084
-3864 1010 -0.6067370542914696
-3865 1010 -0.09003421014280026
-3866 1010  0.08946924016864499
-982 1011 -6.815356665004987
-1005 1011 -4.223583073177172
-1010 1011 -1.57272281540292
-1011 1011  20.99019374425364
-1090 1011 -6.618357553588883
-1182 1011 -0.05307445544769063
-1205 1011  0.05204211481294618
-1210 1011  0.08531786875140286
-1211 1011 -3.814564220050443e-10
-1290 1011  0.005776489288478071
-3675 1011  0.4828314550248289
-3676 1011 -0.4341198328483751
-3677 1011 -0.06563573978769766
-3678 1011  0.05375067380652562
-3819 1011  0.3898068756189718
-3820 1011 -0.3729707479870724
-3821 1011 -0.05561566446213129
-3822 1011  0.04995516877647897
-3851 1011 -0.7247989127679281
-3852 1011  0.716806091826627
-3853 1011  0.02917701292546772
-3854 1011  0.005800656373065214
-3867 1011 -0.3567726280354734
-3868 1011  0.372176173311343
-3869 1011  0.0501196804098229
-3870 1011 -0.05599008922381862
-1010 1012 -4.664019165692916
-1012 1012  24.66747498250689
-1210 1012 -6.996689916327625e-08
-1212 1012 -1.222183139693023e-07
-3855 1012 -2.292588351471605
-3856 1012  1.530738344827112
-3857 1012  0.1599551507030707
-3858 1012 -0.1619410267687995
-1010 1013 -3.154571903054211
-1013 1013  8.709539069389765
-1210 1013 -4.162537098340024e-08
-1213 1013 -1.436638097374043e-07
-3859 1013 -3.052106539486126
-3860 1013  1.904949633779228
-3861 1013  0.01795727665852618
-3862 1013 -0.03639888061415681
-1014 1014  48.13941450694888
-1015 1014 -3.110095548321061
-1031 1014 -43.51247883455607
-1214 1014  1.003109723063389e-08
-1215 1014  0.01263569687722853
-1231 1014 -0.004918095735968131
-3871 1014  2.989747253335061
-3872 1014 -3.114543931331563
-3873 1014 -0.03612826240957417
-3874 1014  0.005103966248689504
-3875 1014  0.7143220365505623
-3876 1014 -0.8667371882258156
-3877 1014 -0.09209370496921034
-3878 1014  0.1222265439314361
-999 1015 -8.461387995206527
-1014 1015 -3.110095548321061
-1015 1015  33.95007189497851
-1030 1015 -6.75539661299262
-1060 1015 -5.675171991154909
-1199 1015 -0.04200667912920009
-1214 1015 -0.004849260550083945
-1215 1015  2.119705658221172e-08
-1230 1015  0.6677127442223171
-1260 1015 -0.4065761617929234
-3787 1015  0.6080023804991873
-3788 1015 -0.6049647018408247
-3789 1015 -0.06739443255416847
-3790 1015  0.06841334707970535
-3871 1015 -2.129272677610363
-3872 1015  2.218151814941477
-3873 1015  0.02573024256563223
-3874 1015 -0.003635001543577661
-3879 1015 -0.3137213903015109
-3880 1015  0.3512208139362644
-3881 1015  0.05294086820243799
-3882 1015 -0.05938414807119664
-3883 1015 -0.3561529571137585
-3884 1015  0.4019295688857895
-3885 1015  0.05477280767251791
-3886 1015 -0.05419810230840651
-1016 1016  27.70275126489943
-1029 1016 -9.384104209729337
-1032 1016 -8.682516686085181
-1096 1016 -12.35396198776869
-1216 1016  2.258379669384247e-08
-1229 1016  0.4471220830932749
-1232 1016 -0.1931972031217511
-1296 1016 -0.191269335220411
-3887 1016  0.4440953835549432
-3888 1016 -0.4157020431777712
-3889 1016 -0.06641212160291675
-3890 1016  0.0583111945771093
-3891 1016 -0.4182071807007093
-3892 1016  0.3861971705223876
-3893 1016  0.05972800042777431
-3894 1016 -0.04705317091092858
-3895 1016 -0.2752074710610874
-3896 1016  0.2916024608025469
-3897 1016  0.03854659492344367
-3898 1016 -0.04164998212099323
-899 1017 -4.459108211401092
-957 1017 -13.12797495416852
-1017 1017  30.94442200079127
-1099 1017  0.07145809537069953
-1157 1017 -0.09826669971503904
-1217 1017  1.572755131684644e-08
-3179 1017  0.6965288550619865
-3180 1017 -0.7776202985712402
-3181 1017 -0.1084279560868184
-3182 1017  0.1225624806414849
-3547 1017  0.7708782074770815
-3548 1017 -0.769913863334036
-3549 1017 -0.09921702332192101
-3550 1017  0.1047517766322093
-915 1018 -2.838699038257353
-983 1018 -14.87328100877918
-1018 1018  30.10474762802639
-1115 1018  0.007667951331426184
-1183 1018 -0.03487708050989496
-1218 1018  8.149366514942358e-09
-3307 1018  1.009668228367086
-3308 1018 -1.192586743172628
-3309 1018 -0.1378864330242373
-3310 1018  0.1691007261817176
-3679 1018  0.6760566940874065
-3680 1018 -0.6799840124585417
-3681 1018 -0.1032841618434341
-3682 1018  0.1015717247366548
-912 1019 -10.40260563970382
-1019 1019  59.57679134339177
-1020 1019 -0.88143794557935
-1076 1019 -6.462935419745066
-1085 1019 -49.72163740391174
-1112 1019  0.1059721002756369
-1219 1019 -2.543621846484712e-08
-1220 1019 -0.2054867667943506
-1276 1019  0.06809256171239964
-1285 1019 -0.1159747915130676
-3275 1019  0.4852466322371797
-3276 1019 -0.4782088220647328
-3277 1019 -0.06914253723748093
-3278 1019  0.04078920488167646
-3899 1019  0.8564968076053606
-3900 1019 -0.8238314830349427
-3901 1019 -0.02279261177659806
-3902 1019 -0.01123831756390642
-3903 1019 -0.3737564443230481
-3904 1019  0.3951785216481155
-3905 1019  0.03431690856375876
-3906 1019 -0.0533470071697257
-3907 1019  0.5871755452865656
-3908 1019 -0.5881819817933323
-3909 1019 -0.06669039335604635
-3910 1019  0.0754972172149095
-944 1020 -10.41994192456261
-994 1020 -8.568402243341714
-1019 1020 -0.88143794557935
-1020 1020  23.73361771904704
-1144 1020 -1.072294506906227
-1194 1020  0.7790434228378447
-1219 1020  0.1511025029822383
-1220 1020 -5.254733564247216e-08
-3471 1020 -0.4310186947198901
-3472 1020  0.4315768639356127
-3473 1020  0.06203758932618613
-3474 1020 -0.06451921651788971
-3743 1020 -0.4616774221684724
-3744 1020  0.462607892946107
-3745 1020  0.05574582374646275
-3746 1020 -0.06210932882324032
-3899 1020 -1.237336533231109
-3900 1020  1.190146632344225
-3901 1020  0.03292730456029068
-3902 1020  0.0162354147387335
-996 1021 -36.15688796617417
-1021 1021  477.2594388157049
-1022 1021 -7.814753962990203
-1023 1021 -45.85566165593821
-1024 1021 -58.89853015420356
-1025 1021 -52.31118436254501
-1031 1021 -139.2864013622151
-1054 1021 -99.87961390971721
-1196 1021  0.1829371321651943
-1221 1021  1.693937379254606e-08
-1222 1021 -1.614266736074707
-1223 1021  6.278976277807047e-08
-1224 1021  2.808174576207634e-08
-1225 1021 -5.971258681958247e-09
-1231 1021  0.31496286658781
-1254 1021  0.5123209884216365
-3763 1021  0.8354279312537486
-3764 1021 -0.6922065283691701
-3765 1021 -0.115673073026255
-3766 1021  0.08747878062892703
-3911 1021  3.79240453316803
-3912 1021 -3.041422922448816
-3913 1021 -0.04566720801028241
-3914 1021  0.003740381880205069
-3915 1021  0.542397859204706
-3916 1021 -0.4575882069145712
-3917 1021 -0.008763702119294985
-3918 1021  0.02107167706267504
-3919 1021  0.9285201706573676
-3920 1021 -0.7197609326620245
-3921 1021  0.002697403111634132
-3922 1021  0.01651901814280531
-3923 1021  0.9936427272244395
-3924 1021 -0.8325011910731969
-3925 1021 -0.02313004556960633
-3926 1021  0.03005520669887832
-3927 1021 -0.8967784154793547
-3928 1021  0.8948534463356967
-3929 1021  0.1084484784869682
-3930 1021 -0.1046366912454179
-3931 1021  0.7922255118762921
-3932 1021 -0.7101965596901837
-3933 1021 -0.1118336807589077
-3934 1021  0.1007269057461446
-899 1022 -9.416903571706824
-907 1022 -5.018475605696647
-927 1022 -7.148869153419637
-1021 1022 -7.814753962990203
-1022 1022  55.1926800422341
-1091 1022 -7.179419233379014
-1099 1022 -0.04280741885147973
-1107 1022 -0.1107503777044012
-1127 1022 -0.2217680093153695
-1221 1022  0.9741805755638241
-1222 1022  1.79774599651239e-08
-1291 1022 -0.1615941136931558
-3183 1022 -0.5004641350120078
-3184 1022  0.5155283875242916
-3185 1022  0.07164048254012781
-3186 1022 -0.07860779169062282
-3235 1022  0.5174114692252539
-3236 1022 -0.51883851294131
-3237 1022 -0.07547435194461619
-3238 1022  0.06783840917537932
-3355 1022 -0.5210644672440067
-3356 1022  0.5652682652525904
-3357 1022  0.06787409290268773
-3358 1022 -0.07938255615129304
-3911 1022 -2.073696496349991
-3912 1022  1.663057831262543
-3913 1022  0.02497094611631533
-3914 1022 -0.002045250376681948
-3935 1022 -0.4907051457898425
-3936 1022  0.4784631094026429
-3937 1022  0.06351030020065121
-3938 1022 -0.06852574797351926
-1021 1023 -45.85566165593821
-1023 1023  397.4220503589642
-1221 1023 -1.302918114509666e-07
-1223 1023 -5.441864154853704e-07
-3915 1023 -4.70085606120625
-3916 1023  3.965827407882584
-3917 1023  0.07595329060940451
-3918 1023 -0.1826240999274897
-1021 1024 -58.89853015420356
-1024 1024  782.1131855642966
-1221 1024 -9.906925613201167e-08
-1224 1024 -3.728986932571843e-07
-3919 1024 -12.32981280576957
-3920 1024  9.557700354905545
-3921 1024 -0.03581879691919143
-3922 1024 -0.2193559255602421
-1021 1025 -52.31118436254501
-1025 1025  211.6269475335122
-1221 1025  2.090432824064514e-08
-1225 1025  2.415609212658154e-08
-3923 1025 -4.01982058245716
-3924 1025  3.367916184667294
-3925 1025  0.09357350555324298
-3926 1025 -0.1215895162194106
-953 1026 -6.201999031867833
-1000 1026 -9.164925564623724
-1010 1026 -27.0139401100094
-1026 1026  35.39663991841751
-1027 1026 -0.8927671556159681
-1031 1026 -6.462929117644935
-1153 1026  0.02710898025635339
-1200 1026  0.01932311150148924
-1210 1026 -0.03439042023821304
-1226 1026 -4.982309767878612e-09
-1227 1026 -0.3481508996189024
-1231 1026  0.0847976154813143
-3527 1026  0.4009167330545462
-3528 1026 -0.4120447041029439
-3529 1026 -0.04641687896666508
-3530 1026  0.05375367996741477
-3807 1026  0.4549416885390786
-3808 1026 -0.4669310599249826
-3809 1026 -0.04961048112152108
-3810 1026  0.05956197584346536
-3863 1026 -0.3918428911464509
-3864 1026  0.3919297432745195
-3865 1026  0.05815877671819319
-3866 1026 -0.05779382696712341
-3939 1026  0.6215604636346272
-3940 1026 -0.5365848376342145
-3941 1026 -0.02644511378818943
-3942 1026 -0.01313085696332133
-3943 1026 -0.3325996421497863
-3944 1026  0.3099939897897582
-3945 1026  0.03183450902391857
-3946 1026 -0.04857313406685036
-1005 1027 -6.595142687039579
-1026 1027 -0.8927671556159681
-1027 1027  12.18426972110153
-1205 1027 -0.254470154000132
-1226 1027  0.1585995473755764
-1227 1027 -1.642452673866757e-09
-3823 1027  0.46381160028816
-3824 1027 -0.4971288084268892
-3825 1027 -0.06522504456566502
-3826 1027  0.06474071332200976
-3939 1027 -0.9429319477336707
-3940 1027  0.8140205429349932
-3941 1027  0.04011828954905043
-3942 1027  0.01992003233190689
-1004 1028 -13.59416149407363
-1028 1028  24.48007177925223
-1042 1028 -4.766621258133744
-1204 1028 -0.1008233678779023
-1228 1028 -6.325417856167803e-09
-1242 1028  0.5302216625716982
-3815 1028 -1.021406661640207
-3816 1028  0.9471080725937661
-3817 1028  0.1015035621142007
-3818 1028 -0.1049332832589505
-3947 1028 -0.5322000356380353
-3948 1028  0.5237179831441521
-3949 1028  0.05306117946887205
-3950 1028 -0.0742486869524237
-907 1029 -8.245220727572519
-1016 1029 -9.384104209729337
-1029 1029  19.24209708384189
-1107 1029  0.1267923382078056
-1216 1029 -0.264491588907438
-1229 1029  2.632058404561377e-08
-3239 1029 -0.4783889678510902
-3240 1029  0.4152466067801181
-3241 1029  0.06401769328460785
-3242 1029 -0.05552343838215974
-3887 1029 -0.3331233226375472
-3888 1029  0.3118250064706282
-3889 1029  0.04981683537143853
-3890 1029 -0.04374019547106585
-941 1030 -5.832944089578626
-1015 1030 -6.75539661299262
-1030 1030  12.83836892907945
-1141 1030  0.5333079621201936
-1215 1030 -0.2961764240780974
-1230 1030  3.749403898467563e-08
-3443 1030  0.3346190606095454
-3444 1030 -0.3298503080348287
-3445 1030 -0.04214323686986587
-3446 1030  0.04770925997819617
-3879 1030  0.2815088383826861
-3880 1030 -0.3151578642820492
-3881 1030 -0.04750496067965254
-3882 1030  0.05328665197422941
-1014 1031 -43.51247883455607
-1021 1031 -139.2864013622151
-1026 1031 -6.462929117644935
-1031 1031  280.8731269168421
-1032 1031 -8.824003402905362
-1033 1031 -42.59535825308108
-1034 1031 -46.13299363078809
-1054 1031 -47.17922372343798
-1214 1031  0.006768474096792987
-1221 1031 -0.7334598799220151
-1226 1031 -0.2479349622596107
-1231 1031  4.101254072463867e-09
-1232 1031  1.10561328577018
-1233 1031 -9.484864810227478e-09
-1234 1031 -1.84660262358971e-08
-1254 1031 -0.3196677665324415
-3875 1031 -0.7100791253590051
-3876 1031  0.8615889655364799
-3877 1031  0.091546689209522
-3878 1031 -0.1215005459296716
-3927 1031  0.7422711571120613
-3928 1031 -0.7406778437037493
-3929 1031 -0.08976373229337341
-3930 1031  0.08660868342331542
-3943 1031  0.9054351523586399
-3944 1031 -0.8438958429460618
-3945 1031 -0.08666300222702368
-3946 1031  0.132230518229328
-3951 1031  2.25449865380728
-3952 1031 -2.145904330715092
-3953 1031 -0.03863010084770278
-3954 1031  0.02054020998672473
-3955 1031  1.009041934732004
-3956 1031 -0.9011892093493707
-3957 1031 -0.002184359699851513
-3958 1031  0.01072384218894925
-3959 1031  0.7970691740614322
-3960 1031 -0.6736858992600538
-3961 1031 -0.006563168427045199
-3962 1031  0.01247372164743909
-3963 1031  0.8761671906630912
-3964 1031 -0.7806222496203883
-3965 1031 -0.102634553712519
-3966 1031  0.08977398271040733
-958 1032 -15.91728652624134
-1016 1032 -8.682516686085181
-1031 1032 -8.824003402905362
-1032 1032  56.84740877124895
-1038 1032 -15.1191530189695
-1084 1032 -8.267859388385654
-1158 1032  0.0918804888517398
-1216 1032  0.3720024857585152
-1231 1032 -0.5154238049189607
-1232 1032  1.011402150918883e-08
-1238 1032  0.2460567017585993
-1284 1032  0.351545654975462
-3559 1032 -0.4013655946326785
-3560 1032  0.4629060824003016
-3561 1032  0.05250452962986646
-3562 1032 -0.06376479295806904
-3891 1032  0.5426439670912149
-3892 1032 -0.5011094365728938
-3893 1032 -0.0774999583800744
-3894 1032  0.06105375638109871
-3951 1032 -1.946310278292044
-3952 1032  1.852560722557531
-3953 1032  0.03334939331384015
-3954 1032 -0.01773237777185056
-3967 1032 -0.5245384263354095
-3968 1032  0.6080294260127852
-3969 1032  0.0667812926161708
-3970 1032 -0.08705433897040452
-3971 1032 -0.4795952455071584
-3972 1032  0.5234134383708403
-3973 1032  0.07931274417722842
-3974 1032 -0.08168946937534756
-1031 1033 -42.59535825308108
-1033 1033  524.0753677887301
-1231 1033  6.151957693267818e-09
-1233 1033  1.167101824073313e-07
-3955 1033 -12.4148271960868
-3956 1033  11.0878526649361
-3957 1033  0.02687544221336557
-3958 1033 -0.1319416399569855
-1031 1034 -46.13299363078809
-1034 1034  415.1968164899222
-1231 1034  1.048933193081858e-08
-1234 1034  1.662011153147347e-07
-3959 1034 -7.17362028563042
-3960 1034  6.063171165496134
-3961 1034  0.05906849706200258
-3962 1034 -0.1122634591316911
-901 1035 -10.38899270243397
-932 1035 -7.710670001509722
-1035 1035  33.66510213550209
-1101 1035 -0.007274595352445609
-1132 1035  0.02036700218743104
-1235 1035  4.701830855213807e-09
-3199 1035  0.7494701150025187
-3200 1035 -0.8018374523380369
-3201 1035 -0.1070673572020186
-3202 1035  0.1117136386944541
-3387 1035  0.6028365532304726
-3388 1035 -0.6418097572616985
-3389 1035 -0.08398219490530567
-3390 1035  0.09235767751344538
-1036 1036  26.97842583523464
-1037 1036 -5.943018521333083
-1093 1036 -30.91732538584464
-1236 1036 -6.866298751795341e-09
-1237 1036  0.1062797331368865
-1293 1036 -0.100220615586992
-3975 1036 -0.4189118125301785
-3976 1036  0.4218235797759384
-3977 1036  0.04658515728812977
-3978 1036 -0.06140384326059996
-3979 1036 -0.4549872682817689
-3980 1036  0.3870443013878095
-3981 1036  0.0673261986980111
-3982 1036 -0.05417247763812049
-957 1037 -13.92756187672575
-1036 1037 -5.943018521333083
-1037 1037  24.62019884646402
-1157 1037  0.0898090992568592
-1236 1037 -0.2219028422169306
-1237 1037  2.017315570146394e-08
-3551 1037  0.6178543307689458
-3552 1037 -0.6175404035074211
-3553 1037 -0.08767549225845923
-3554 1037  0.08262454568163802
-3975 1037  0.4166092968480921
-3976 1037 -0.4195050597952255
-3977 1037 -0.04632910565148451
-3978 1037  0.06106634188723461
-927 1038 -7.664837333066902
-1032 1038 -15.1191530189695
-1038 1038  39.98334126718024
-1127 1038  0.3372983249793092
-1232 1038 -0.375458076737654
-1238 1038  1.549589877247737e-08
-3359 1038  0.8486339951838073
-3360 1038 -0.9003890301240479
-3361 1038 -0.1158196454089177
-3362 1038  0.114557430269681
-3967 1038  0.6104971732174486
-3968 1038 -0.7076702624193901
-3969 1038 -0.07772507850532541
-3970 1038  0.1013203708049441
-907 1039 -6.271758063514705
-999 1039 -10.3806402044065
-1039 1039  43.2075847511725
-1046 1039 -7.161275667591624
-1091 1039 -13.50573421656583
-1107 1039 -0.006163960734533347
-1199 1039 -0.008049526074928537
-1239 1039  2.965038659041852e-09
-1246 1039 -0.007983550840506864
-1291 1039  0.01997349105916452
-3243 1039  0.6149993551148858
-3244 1039 -0.6331632046878009
-3245 1039 -0.08992480666432051
-3246 1039  0.09328847712876323
-3791 1039  0.6086144765908342
-3792 1039 -0.6265343550208405
-3793 1039 -0.09050667335377813
-3794 1039  0.09482024542077011
-3983 1039 -0.4164294722809435
-3984 1039  0.4690507958247839
-3985 1039  0.05876447905234403
-3986 1039 -0.07032064342926077
-3987 1039 -0.6338113374831589
-3988 1039  0.6019478457506698
-3989 1039  0.09481212065301986
-3990 1039 -0.08853178006291147
-984 1040 -12.33309367358089
-997 1040 -10.1426793564074
-1040 1040  20.96549843436591
-1184 1040  0.04085646412882799
-1197 1040 -0.1442546171938546
-1240 1040  2.41306286619114e-08
-3687 1040 -0.5600389881261721
-3688 1040  0.5004288683746699
-3689 1040  0.08495383117483674
-3690 1040 -0.07534953134862798
-3767 1040  0.3524182508441939
-3768 1040 -0.3512197152059827
-3769 1040 -0.04530287520055806
-3770 1040  0.05037591290722684
-978 1041 -7.549143391656187
-994 1041 -7.152105470943633
-1041 1041  15.16098608702285
-1178 1041  0.04340316309886361
-1194 1041 -0.03089942858856187
-1241 1041 -3.596899800228925e-08
-3651 1041  0.3530044280053762
-3652 1041 -0.3554519303210625
-3653 1041 -0.04936449382429299
-3654 1041  0.04476562781300211
-3747 1041  0.4156932976845179
-3748 1041 -0.4464578225348482
-3749 1041 -0.0518656621296367
-3750 1041  0.06749642593198793
-1028 1042 -4.766621258133744
-1042 1042  17.24046977368123
-1060 1042 -11.52161349893302
-1228 1042 -0.1053486690832382
-1242 1042  1.273314576000217e-08
-1260 1042  0.4108547346501815
-3947 1042  0.4525049260712891
-3948 1042 -0.4452930315210079
-3949 1042 -0.04511545186958153
-3950 1042  0.06313020358974136
-3991 1042  0.4124828656886404
-3992 1042 -0.4139428557132467
-3993 1042 -0.05662335010049897
-3994 1042  0.05291302942259243
-1043 1043  26.94614514240989
-1074 1043 -11.99756440152056
-1083 1043 -13.17400491990235
-1243 1043  1.224810661226705e-08
-1274 1043 -0.005509351388796824
-1283 1043  0.03805173458733085
-3995 1043 -0.6664131668293063
-3996 1043  0.6677275648607324
-3997 1043  0.09282550509358571
-3998 1043 -0.09027313903964421
-3999 1043  0.3915613265931385
-4000 1043 -0.3914197776820793
-4001 1043 -0.05174692126183886
-4002 1043  0.05322704516850629
-981 1044 -10.74205319605701
-1044 1044  18.01491297364265
-1045 1044 -2.262013003597125
-1075 1044 -5.757720128565946
-1181 1044 -0.1158402426905038
-1244 1044 -3.695717287754974e-09
-1245 1044  6.84975660636411e-08
-1275 1044  0.1686051519968662
-3667 1044  0.5728450058803592
-3668 1044 -0.5777458579020762
-3669 1044 -0.08080789765245955
-3670 1044  0.07247811272512387
-4003 1044  0.820611639782226
-4004 1044 -0.5250188060331279
-4005 1044  0.0120946820032662
-4006 1044  0.02838947709364594
-4007 1044 -0.4669117262552038
-4008 1044  0.4635324724996192
-4009 1044  0.05787300059726924
-4010 1044 -0.06374868700680204
-1044 1045 -2.262013003597125
-1045 1045  5.646993614976262
-1244 1045 -4.7148643572692e-08
-1245 1045 -1.710001454879873e-07
-4003 1045 -2.048612698416842
-4004 1045  1.310681131981685
-4005 1045 -0.03019372128547943
-4006 1045 -0.0708727983567128
-905 1046 -4.662192987407709
-1039 1046 -7.161275667591624
-1046 1046  25.15477332707293
-1105 1046 -0.01117562264376104
-1239 1046  0.002037455724496269
-1246 1046  3.557195038528249e-08
-3227 1046  0.5273830925356003
-3228 1046 -0.5884765052096235
-3229 1046 -0.07452450160925486
-3230 1046  0.08652728305245758
-3983 1046  0.6723063436140674
-3984 1046 -0.7572610645998691
-3985 1046 -0.09487256468584683
-3986 1046  0.1135294637182152
-912 1047 -39.60174281254437
-985 1047 -169.6380530198015
-1047 1047  1000.441707186239
-1048 1047 -15.57940504488523
-1049 1047 -22.19037263188586
-1050 1047 -46.2485822087331
-1051 1047 -42.86814773331403
-1052 1047 -77.31827340468764
-1053 1047 -72.65693273182117
-1112 1047 -0.03239710441769657
-1185 1047 -0.2047571376587071
-1247 1047 -1.997244994389469e-08
-1248 1047  0.340419502366803
-1249 1047  2.580894044174054e-07
-1250 1047  5.987070406110462e-07
-1251 1047  8.733743955269269e-07
-1252 1047  1.117970844788374e-07
-1253 1047  1.182548258066296e-07
-3279 1047  1.094320306063321
-3280 1047 -0.9528532431520396
-3281 1047 -0.1180732624206324
-3282 1047  0.1398747315022498
-3699 1047 -1.20025775658178
-3700 1047  1.453850177649487
-3701 1047  0.1525851828618167
-3702 1047 -0.1919032731754043
-4011 1047  2.877901742353491
-4012 1047 -2.638744501957641
-4013 1047 -0.06336630111296625
-4014 1047  0.02360881028802457
-4015 1047  0.8927534376223362
-4016 1047 -0.7104217187916017
-4017 1047 -0.01054317113570646
-4018 1047  0.02726814578945946
-4019 1047  1.086728699880724
-4020 1047 -0.8503392225516677
-4021 1047  0.003251598109236279
-4022 1047  0.0163341792817986
-4023 1047  0.6671056579552171
-4024 1047 -0.5558849024538212
-4025 1047 -0.008293849473066403
-4026 1047  0.01844642734702238
-4027 1047  1.05305624994934
-4028 1047 -0.8748469326863306
-4029 1047 -0.01413376895939106
-4030 1047  0.02460560601569959
-4031 1047  1.037339225236382
-4032 1047 -0.8610429361225128
-4033 1047 -0.01520700997590703
-4034 1047  0.0250006050735233
-986 1048 -14.33758586199391
-1047 1048 -15.57940504488523
-1048 1048  48.49824758137784
-1083 1048 -8.299861792616442
-1092 1048 -19.07696695293404
-1186 1048  0.08253558234079712
-1247 1048 -0.3912441653266929
-1248 1048  3.182346067220898e-09
-1283 1048 -0.02865248553472413
-1292 1048  0.008367416410551054
-3703 1048  0.4304029276676956
-3704 1048 -0.4306648046310764
-3705 1048 -0.06312918864769614
-3706 1048  0.06048211658000992
-4011 1048 -0.6882177177695019
-4012 1048  0.6310259631828135
-4013 1048  0.01515333567288418
-4014 1048 -0.005645780499229315
-4035 1048  0.3107249264599946
-4036 1048 -0.2786413244623387
-4037 1048 -0.04425978383985179
-4038 1048  0.03641964444494807
-4039 1048  0.6251677208402014
-4040 1048 -0.5974098867295464
-4041 1048 -0.08455623381433652
-4042 1048  0.07599027364771335
-1047 1049 -22.19037263188586
-1049 1049  132.9683527596321
-1247 1049 -1.614056271553865e-06
-1249 1049 -1.546513956562912e-06
-4015 1049 -5.349524285735199
-4016 1049  4.256962871978451
-4017 1049  0.06317640197425206
-4018 1049 -0.1633951794307062
-1047 1050 -46.2485822087331
-1050 1050  266.4605453149341
-1247 1050 -1.207106466394325e-06
-1250 1050 -3.449440251657165e-06
-4019 1050 -6.261171463865746
-4020 1050  4.899216957673652
-4021 1050 -0.01873403481084517
-4022 1050 -0.09410913433692274
-1047 1051 -42.86814773331403
-1051 1051  177.7971977309233
-1247 1051 -8.74091834635049e-07
-1251 1051 -3.622352877430401e-06
-4023 1051 -2.766844775492583
-4024 1051  2.305552680881037
-4025 1051  0.03439903980669907
-4026 1051 -0.07650722269100975
-1047 1052 -77.31827340468764
-1052 1052  395.1349021923479
-1247 1052 -8.476012625102669e-07
-1252 1052 -5.713398701923111e-07
-4027 1052 -5.381641954276575
-4028 1052  4.470903578741806
-4029 1052  0.07223059927479664
-4030 1052 -0.1257469025523179
-1047 1053 -72.65693273182117
-1053 1053  328.3425339190333
-1247 1053 -7.2243239568337e-07
-1253 1053 -5.344005542529118e-07
-4031 1053 -4.687819398588907
-4032 1053  3.891122287459317
-4033 1053  0.0687217012769834
-4034 1053 -0.1129797452838212
-1021 1054 -99.87961390971721
-1031 1054 -47.17922372343798
-1054 1054  72.0796547970898
-1055 1054 -2.286121675510262
-1221 1054 -0.2261997465922666
-1231 1054  0.06060825833018181
-1254 1054  7.969850475105303e-09
-1255 1054  0.03272045797426546
-3931 1054 -0.360035618481635
-3932 1054  0.3227566567579966
-3933 1054  0.0508240492328381
-3934 1054 -0.04577645287156246
-3963 1054 -0.4810676928389651
-3964 1054  0.4286078600129237
-3965 1054  0.05635245017868282
-3966 1054 -0.04929123482331746
-4043 1054  1.092095516810633
-4044 1054 -1.011277354339053
-4045 1054 -0.01473140339016458
-4046 1054  0.004662490664314871
-973 1055 -88.2008696096471
-1054 1055 -2.286121675510262
-1055 1055  45.44517147063204
-1173 1055  0.7232007277457907
-1254 1055 -0.1671083834788179
-1255 1055  2.11014470497678e-08
-3643 1055  0.3852188693949516
-3644 1055 -0.4738147012535258
-3645 1055 -0.05423004030019284
-3646 1055  0.05822173646792159
-4043 1055 -3.184217886071492
-4044 1055  2.948576740677008
-4045 1055  0.04295228525329622
-4046 1055 -0.01359440263092582
-909 1056 -7.969315195327064
-920 1056 -5.122575816198524
-925 1056 -6.006365689718882
-964 1056 -81.80499628897144
-1056 1056  74.09591338197826
-1109 1056 -0.1501368826175595
-1120 1056  0.002369883025423705
-1125 1056  0.037823480857684
-1164 1056  0.4594619270121967
-1256 1056 -1.882256489560419e-09
-3267 1056 -0.6676477309983397
-3268 1056  0.6513753777654052
-3269 1056  0.0953847309503813
-3270 1056 -0.08637788346499958
-3323 1056 -0.6868141551464079
-3324 1056  0.6860999709089316
-3325 1056  0.09529957797094311
-3326 1056 -0.09578629824956811
-3347 1056  0.3769085362086894
-3348 1056 -0.3784583248613025
-3349 1056 -0.05640185893113407
-3350 1056  0.05659745592016639
-3615 1056  0.3580752890249853
-3616 1056 -0.3991541488639627
-3617 1056 -0.05661263921413731
-3618 1056  0.06206550983734454
-937 1057 -5.244381887558792
-955 1057 -15.07976193972747
-960 1057 -13.87451822624953
-1057 1057  32.10677134460157
-1137 1057  0.03229296680959662
-1155 1057  0.002925901806241575
-1160 1057 -0.008922953284883881
-1257 1057  1.930360118973518e-08
-3415 1057 -0.4783279591317424
-3416 1057  0.4811627526763632
-3417 1057  0.06778247851274402
-3418 1057 -0.06941991168041609
-3535 1057  0.6196319089697585
-3536 1057 -0.5748703939518289
-3537 1057 -0.07979007268887017
-3538 1057  0.07684746029948268
-3567 1057 -0.4844075229863823
-3568 1057  0.5172246467921727
-3569 1057  0.08534035623263046
-3570 1057 -0.09171746472834619
-960 1058 -31.06353440087113
-1058 1058  131.8444500211344
-1059 1058 -26.45490214119199
-1079 1058 -74.90820044607649
-1160 1058  0.01834072349012509
-1258 1058  6.873820179720269e-08
-1259 1058 -3.722531947414343e-07
-1279 1058 -0.1647810909296956
-3571 1058 -0.8683349920286506
-3572 1058  0.9349938344358532
-3573 1058  0.1196980307182942
-3574 1058 -0.1356037741743049
-4047 1058  0.362575344758596
-4048 1058 -0.3636201727245672
-4049 1058 -0.01098342848397128
-4050 1058  0.00945573560770001
-4051 1058 -0.5655683563282903
-4052 1058  0.5370271536576061
-4053 1058  0.08118838221113756
-4054 1058 -0.07286991856067998
-1058 1059 -26.45490214119199
-1059 1059  187.1201776987702
-1258 1059  5.031070656746905e-07
-1259 1059  2.632996500162577e-06
-4047 1059 -2.564559113532273
-4048 1059  2.571949365299086
-4049 1059  0.07768771932121485
-4050 1059 -0.06688207921038947
-1015 1060 -5.675171991154909
-1042 1060 -11.52161349893302
-1060 1060  15.07645902237852
-1215 1060  0.2282308445688311
-1242 1060 -0.3938694762289616
-1260 1060  1.649276359927399e-08
-3883 1060  0.3144339221536311
-3884 1060 -0.3548483544779605
-3885 1060 -0.04835683208530907
-3886 1060  0.0478494465417923
-3991 1060 -0.3603730844290743
-3992 1060  0.3616486310085054
-3993 1060  0.0494700096023555
-3994 1060 -0.04622842111919301
-939 1061 -24.9290464405827
-1009 1061 -23.5793179741662
-1061 1061  183.7019110854502
-1062 1061 -11.61594215474339
-1063 1061 -9.65776140469529
-1064 1061 -4.512938119937939
-1065 1061 -7.133207631641115
-1066 1061 -4.127537850880238
-1067 1061 -4.342035056604149
-1068 1061 -5.255281007971666
-1077 1061 -18.07649086887797
-1139 1061 -0.2141845297361449
-1209 1061  0.05851589523119582
-1261 1061  4.807374498483341e-10
-1262 1061 -2.092441497847375e-11
-1263 1061 -1.407063632274941e-09
-1264 1061 -1.250616929460158e-09
-1265 1061 -3.967674799820742e-11
-1266 1061 -9.292320871101545e-10
-1267 1061 -1.273698507775478e-09
-1268 1061 -3.714351916317415e-10
-1277 1061  0.2765729375303243
-3423 1061  0.7029356993730441
-3424 1061 -0.5851621559330497
-3425 1061 -0.1074364103798295
-3426 1061  0.09331292568385514
-3843 1061 -0.504600061009487
-3844 1061  0.6452289446177003
-3845 1061  0.08278506503087066
-3846 1061 -0.1024920224347914
-4055 1061  0.5513927020288057
-4056 1061 -0.4572667382619597
-4057 1061 -0.07133191883687945
-4058 1061  0.04954073695663612
-4059 1061  0.274604206069199
-4060 1061 -0.234511292387475
-4061 1061 -0.01159007117619253
-4062 1061  0.008422272802529579
-4063 1061  0.3315414677749981
-4064 1061 -0.2782406077266243
-4065 1061 -0.02506043554827784
-4066 1061  0.01732501370911077
-4067 1061  0.4722832634265456
-4068 1061 -0.3888500715901649
-4069 1061 -0.04435831718922839
-4070 1061  0.03090230780246723
-4071 1061  0.3748533853638924
-4072 1061 -0.3164800245438473
-4073 1061 -0.02428214981556918
-4074 1061  0.01674951147636541
-4075 1061  0.3334711887748352
-4076 1061 -0.2799242389758857
-4077 1061 -0.02430205065482175
-4078 1061  0.01679417334838154
-4079 1061  0.428348930387738
-4080 1061 -0.35912875888719
-4081 1061 -0.03299211467792259
-4082 1061  0.02279291647912941
-4083 1061 -0.4788760357050242
-4084 1061  0.5819307001253576
-4085 1061  0.07100846120402798
-4086 1061 -0.07203379071050602
-1061 1062 -11.61594215474339
-1062 1062  18.18451598011107
-1261 1062  1.003843119740111e-10
-1262 1062  3.275554827375515e-11
-4055 1062 -0.8631938020705067
-4056 1062  0.7158415642942259
-4057 1062  0.1116686347193875
-4058 1062 -0.07755499290003219
-1061 1063 -9.65776140469529
-1063 1063  95.13079222366765
-1261 1063  2.761413792073952e-09
-1263 1063  1.386103698486352e-08
-4059 1063 -2.704903553672343
-4060 1063  2.309980743686521
-4061 1063  0.1141644010503557
-4062 1063 -0.0829609857753576
-1061 1064 -4.512938119937939
-1064 1064  15.28762986286976
-1261 1064  1.715662265033835e-09
-1264 1064  4.236453410300101e-09
-4063 1064 -1.123100460933939
-4064 1064  0.9425431964377532
-4065 1064  0.08489250803032906
-4066 1064 -0.05868867931656194
-1061 1065 -7.133207631641115
-1065 1065  15.99112376065647
-1261 1065  1.207292321669939e-10
-1265 1065  8.890274627582073e-11
-4067 1065 -1.058757894121426
-4068 1065  0.8717185528421763
-4069 1065  0.0994418437640489
-4070 1065 -0.06927635354904116
-1061 1066 -4.127537850880238
-1066 1066  19.0022305732375
-1261 1066  3.51464403414159e-09
-1266 1066  4.277776494143737e-09
-4071 1066 -1.72573819152988
-4072 1066  1.457000754258734
-4073 1066  0.1117893953885428
-4074 1066 -0.07711087260468982
-1061 1067 -4.342035056604149
-1067 1067  15.19757594372949
-1261 1067  1.771025409658122e-09
-1267 1067  4.457897107013054e-09
-4075 1067 -1.16718388103411
-4076 1067  0.9797639815414531
-4077 1067  0.08505970757052372
-4078 1067 -0.05878135529351257
-1061 1068 -5.255281007971666
-1068 1068  19.98370377461157
-1261 1068  1.926837966959027e-09
-1268 1068  1.412364641906194e-09
-4079 1068 -1.628837266086651
-4080 1068  1.365621025992523
-4081 1068  0.1254556322243195
-4082 1068 -0.08667221774173783
-1069 1069  20.24465568779704
-1088 1069 -10.48556641561825
-1093 1069 -16.36333172631541
-1269 1069 -5.449000251012137e-09
-1288 1069  0.1214000777347138
-1293 1069 -0.1119597125414634
-4087 1069 -0.4136141792640044
-4088 1069  0.4131220203587079
-4089 1069  0.05643659927341725
-4090 1069 -0.05836392563537035
-4091 1069  0.3288070523355396
-4092 1069 -0.3857623389865421
-4093 1069 -0.0437463201053321
-4094 1069  0.05446492613488767
-1070 1070  31.68167767158849
-1078 1070 -11.61846388882405
-1097 1070 -8.796687645874714
-1270 1070 -9.370967335442515e-10
-1278 1070  0.04152432414694895
-1297 1070 -0.03625623505669459
-4095 1070  0.7713662868604313
-4096 1070 -0.8634917741294119
-4097 1070 -0.1070003826683109
-4098 1070  0.1197679507899034
-4099 1070  0.807466687308941
-4100 1070 -0.8588799767622773
-4101 1070 -0.10710356231765
-4102 1070  0.1209694588614057
-1071 1071  20.11093433821907
-1072 1071 -7.513903054786311
-1073 1071 -12.14811724181201
-1271 1071 -2.899300410863503e-09
-1272 1071  0.06443650349717406
-1273 1071 -0.01468942424715695
-4103 1071 -0.4147446507941761
-4104 1071  0.3940753934243332
-4105 1071  0.05628299145492302
-4106 1071 -0.04656009377371514
-4107 1071 -0.6085690165184504
-4108 1071  0.6054677581871165
-4109 1071  0.08603670609830409
-4110 1071 -0.08930907686990948
-998 1072 -7.492541902601887
-1071 1072 -7.513903054786311
-1072 1072  23.60392256309562
-1086 1072 -5.263261695423801
-1198 1072 -0.1461852922562984
-1271 1072 -0.0430300784938554
-1272 1072 -1.591542694856507e-08
-1286 1072  0.2078468422749022
-3771 1072  0.4751085479629472
-3772 1072 -0.5169167417147753
-3773 1072 -0.0626029018629746
-3774 1072  0.05942268960851749
-4103 1072  0.433000632670903
-4104 1072 -0.411421568297578
-4105 1072 -0.05876042249592989
-4106 1072  0.048609548122274
-4111 1072 -0.4656525838783859
-4112 1072  0.4465883401929728
-4113 1072  0.06308701112702084
-4114 1072 -0.0435235817211751
-1009 1073 -8.20501197638737
-1071 1073 -12.14811724181201
-1073 1073  22.07544969633836
-1209 1073 -0.03549214591889813
-1271 1073  0.03877201488615256
-1273 1073 -2.24825810746232e-10
-3847 1073  0.6058903557215245
-3848 1073 -0.608164323859633
-3849 1073 -0.08865078486777231
-3850 1073  0.08735047797722066
-4107 1073  0.5724845587882514
-4108 1073 -0.5695671862975206
-4109 1073 -0.08093525038797107
-4110 1073  0.08401358938736914
-986 1074 -21.79919967537484
-1043 1074 -11.99756440152056
-1074 1074  33.72125780485909
-1186 1074 -0.009545633175415319
-1243 1074  0.00970013728400089
-1274 1074  3.551176783189725e-09
-3707 1074 -0.6107864907888784
-3708 1074  0.5465638824173062
-3709 1074  0.09010990126768369
-3710 1074 -0.07776569511906935
-3995 1074  0.5285974251080995
-3996 1074 -0.5296400026704317
-3997 1074 -0.07362898186763496
-3998 1074  0.07160445085414079
-929 1075 -2.942416412227689
-956 1075 -4.711059498030628
-1044 1075 -5.757720128565946
-1075 1075  19.0263421536609
-1129 1075  0.06750505025100521
-1156 1075 -0.1018754816584715
-1244 1075 -0.1463500416425706
-1275 1075 -1.330449639347364e-10
-3371 1075 -0.7320220831026034
-3372 1075  0.7006361055927417
-3373 1075  0.09337077568253691
-3374 1075 -0.09342120143663683
-3543 1075 -0.3540923641672868
-3544 1075  0.3539682057779985
-3545 1075  0.05033110776786022
-3546 1075 -0.04941476276368469
-4007 1075  0.4930004778176136
-4008 1075 -0.4894324078324542
-4009 1075 -0.06110666180098917
-4010 1075  0.06731065292933117
-979 1076 -10.4263159895623
-1019 1076 -6.462935419745066
-1076 1076  19.29387509016767
-1077 1076 -1.287538838117173
-1179 1076  0.1503132880271155
-1219 1076 -0.04422968677103412
-1276 1076 -2.449253198866241e-08
-1277 1076 -0.2432983530399724
-3659 1076  0.3962293587228495
-3660 1076 -0.3977027851991092
-3661 1076 -0.05369438553050169
-3662 1076  0.05269364988891542
-3903 1076  0.4347352342689453
-3904 1076 -0.4596523479291749
-3905 1076 -0.03991575131466345
-3906 1076  0.06205063220109317
-4115 1076  0.9163581866371621
-4116 1076 -0.8984979081539249
-4117 1076 -0.02453437618813773
-4118 1076 -0.01320838662255178
-998 1077 -7.355302891262144
-1061 1077 -18.07649086887797
-1076 1077 -1.287538838117173
-1077 1077  15.15581672854775
-1198 1077 -0.02201288531576436
-1261 1077 -0.154517529197304
-1276 1077  0.132345738765169
-1277 1077 -1.015200618015299e-08
-3775 1077  0.345354821535383
-3776 1077 -0.3740788277596528
-3777 1077 -0.04659273070930623
-3778 1077  0.04620716091210701
-4083 1077  0.2096383898392737
-4084 1077 -0.254752808402104
-4085 1077 -0.03108549679221456
-4086 1077  0.03153435706244356
-4115 1077 -0.7265339828613597
-4116 1077  0.7123734728657446
-4117 1077  0.01945206395154361
-4118 1077  0.01047226060725421
-939 1078 -6.556163260226376
-1070 1078 -11.61846388882405
-1078 1078  36.37733995725011
-1089 1078 -8.29609625260256
-1097 1078 -11.04287310642648
-1139 1078  0.250244042122215
-1270 1078 -0.006868975754008619
-1278 1078 -1.724586362605862e-09
-1289 1078 -0.04556750232906767
-1297 1078 -0.08433753082497672
-3427 1078  0.553519904683873
-3428 1078 -0.5276837209948572
-3429 1078 -0.0719937238121951
-3430 1078  0.06610708326760587
-4095 1078 -0.4916143473982249
-4096 1078  0.5503286211148283
-4097 1078  0.06819448061561421
-4098 1078 -0.07633162606373356
-4119 1078 -0.4124268531628535
-4120 1078  0.4598141657371973
-4121 1078  0.05899348825775656
-4122 1078 -0.0683561503822409
-4123 1078 -0.4682412747380797
-4124 1078  0.4773702028337876
-4125 1078  0.05326867823029235
-4126 1078 -0.05561345056860315
-940 1079 -24.66315021333685
-1058 1079 -74.90820044607649
-1079 1079  182.7426263207641
-1080 1079 -17.55834826047913
-1081 1079 -24.84175659400302
-1092 1079 -29.79209744771001
-1140 1079 -0.1493139770588935
-1258 1079  0.2566034054509343
-1279 1079  2.904484364130155e-08
-1280 1079  2.892989577618632e-07
-1281 1079  5.602588400155373e-09
-1292 1079 -0.01760425743209124
-3435 1079 -0.477692739132276
-3436 1079  0.5464386534076658
-3437 1079  0.06242579865530338
-3438 1079 -0.07560356709805922
-4051 1079  0.5099376587097865
-4052 1079 -0.4842038390860414
-4053 1079 -0.0732024928126479
-4054 1079  0.06570225375133355
-4127 1079  0.4622771875668804
-4128 1079 -0.3713550248720283
-4129 1079  0.004661810659483987
-4130 1079  0.01681848484248474
-4131 1079  0.8099385613394385
-4132 1079 -0.6777265383675919
-4133 1079 -0.01559842054359869
-4134 1079  0.03070862867577365
-4135 1079 -0.8293599276847743
-4136 1079  0.8908658230580816
-4137 1079  0.1208149423257971
-4138 1079 -0.1233504357896458
-1079 1080 -17.55834826047913
-1080 1080  176.8848810293818
-1279 1080 -3.091812144639938e-07
-1280 1080 -2.914429613198877e-06
-4127 1080 -4.657034860973241
-4128 1080  3.741074279977133
-4129 1080 -0.04696362991810545
-4130 1080 -0.1694313981454469
-1079 1081 -24.84175659400302
-1081 1081  124.2521515692489
-1279 1081 -2.020746911468585e-08
-1281 1081 -2.80225902482556e-08
-4131 1081 -4.051106699452466
-4132 1081  3.389815784838334
-4133 1081  0.07801933255350572
-4134 1081 -0.1535967507877281
-961 1082 -14.41765509210598
-998 1082 -9.332991521575286
-1082 1082  32.78546401598518
-1161 1082 -0.4707524526938228
-1198 1082  0.1639576572033569
-1282 1082  3.424985434152816e-10
-3579 1082  0.5556174516419152
-3580 1082 -0.5572302679600371
-3581 1082 -0.1077840611317312
-3582 1082  0.1075931432984304
-3779 1082  0.903604725385467
-3780 1082 -1.025075116496526
-3781 1082 -0.1192040010499577
-3782 1082  0.1454178910812212
-1043 1083 -13.17400491990235
-1048 1083 -8.299861792616442
-1083 1083  26.96703358297778
-1243 1083 -0.01362937325158806
-1248 1083  0.01227067299807493
-1283 1083  3.583140154028719e-08
-3999 1083 -0.4363940642551595
-4000 1083  0.436236308367658
-4001 1083  0.05767180706691709
-4002 1083 -0.05932140125143925
-4035 1083 -0.459904502442264
-4036 1083  0.412417507493414
-4037 1083  0.06550898280666229
-4038 1083 -0.0539047788936704
-981 1084 -8.173102293481225
-1007 1084 -11.96605332802606
-1032 1084 -8.267859388385654
-1084 1084  26.48660956080935
-1181 1084  0.216604980095333
-1207 1084 -0.01534246547224594
-1232 1084 -0.2753545287150571
-1284 1084  5.059544422314488e-09
-3671 1084  0.5627128757731583
-3672 1084 -0.556821153672813
-3673 1084 -0.07429162698182507
-3674 1084  0.08167611720054455
-3835 1084 -0.5533622237789459
-3836 1084  0.5941679188478999
-3837 1084  0.06652747753817977
-3838 1084 -0.07463262812556315
-3971 1084  0.3788236665372302
-3972 1084 -0.4134348697073162
-3973 1084 -0.06264771144796216
-3974 1084  0.0645250439743737
-943 1085 -11.05951146044936
-1019 1085 -49.72163740391174
-1085 1085  86.55876775412223
-1086 1085 -2.518767549983291
-1087 1085 -97.81167626070551
-1143 1085  0.5739644134156838
-1219 1085  0.1034698694358651
-1285 1085 -3.796000963562918e-08
-1286 1085 -0.5565646529232413
-1287 1085  1.047611239979318e-07
-3463 1085  0.5218813327191538
-3464 1085 -0.5203549498394896
-3465 1085 -0.0741452235963624
-3466 1085  0.05439035871006938
-3907 1085 -0.6299407955834422
-3908 1085  0.6310205330126762
-3909 1085  0.07154759728281507
-3910 1085 -0.0809958409515938
-4139 1085  0.7709461549449258
-4140 1085 -0.7763504480927439
-4141 1085 -0.04052335874533514
-4142 1085 -0.01693145434596672
-4143 1085  1.152193173408145
-4144 1085 -0.9933793593885633
-4145 1085 -0.003300648816836982
-4146 1085  0.02739797688598089
-987 1086 -23.02209569566427
-994 1086 -8.292682942672654
-1072 1086 -5.263261695423801
-1085 1086 -2.518767549983291
-1086 1086  27.36856052624786
-1187 1086 -0.1321988592372929
-1194 1086 -0.644560541293882
-1272 1086 -0.4068214560171308
-1285 1086  0.7958833775240365
-1286 1086 -1.977140486920936e-08
-3731 1086 -0.379859058350982
-3732 1086  0.3634993303815826
-3733 1086  0.04595895354819852
-3734 1086 -0.05892039757983346
-3751 1086  0.4552127889008838
-3752 1086 -0.4363998945032376
-3753 1086 -0.0671242009178893
-3754 1086  0.06139143428489695
-4111 1086  0.4075275464698439
-4112 1086 -0.3908429951038652
-4113 1086 -0.05521218124589022
-4114 1086  0.03809075496731831
-4139 1086 -0.8918227216379703
-4140 1086  0.8980743533410442
-4141 1086  0.04687701190851433
-4142 1086  0.01958613529525647
-1085 1087 -97.81167626070551
-1087 1087  651.8794668163698
-1285 1087 -3.395976211351126e-07
-1287 1087 -6.981968381225556e-07
-4143 1087 -7.678951010732729
-4144 1087  6.620514347654194
-4145 1087  0.02199763125930786
-4146 1087 -0.1825976122374652
-972 1088 -10.63078006025426
-1069 1088 -10.48556641561825
-1088 1088  20.79952414874093
-1172 1088  0.03701780828857204
-1269 1088 -0.1955389194213142
-1288 1088 -2.311546232203199e-10
-3619 1088  0.6011938633446567
-3620 1088 -0.6007023401756713
-3621 1088 -0.07474159031252897
-3622 1088  0.07585447780115982
-4087 1088  0.3890882961019005
-4088 1088 -0.3886253205090088
-4089 1088 -0.0530901051026678
-4090 1088  0.05490314771048892
-991 1089 -27.15561296572336
-1078 1089 -8.29609625260256
-1089 1089  38.23978412799049
-1191 1089 -0.4918304040039776
-1278 1089  0.2708866518362061
-1289 1089 -1.285860098954039e-10
-3739 1089  0.62780002806294
-3740 1089 -0.702905468036533
-3741 1089 -0.09477205745752662
-3742 1089  0.1101427118805583
-4119 1089  0.7972203259666385
-4120 1089 -0.8888199114143011
-4121 1089 -0.1140343010598945
-4122 1089  0.1321323092122628
-929 1090 -1.959100094899907
-1005 1090 -6.636577325017829
-1011 1090 -6.618357553588883
-1090 1090  16.23932328775514
-1129 1090 -0.04174380521665495
-1205 1090  0.1321958859920611
-1211 1090 -0.008999365508307933
-1290 1090  1.863167620186346e-09
-3375 1090 -0.5115751971165835
-3376 1090  0.4875763642175987
-3377 1090  0.06400112844259723
-3378 1090 -0.05976136187915728
-3827 1090 -0.4083683463701124
-3828 1090  0.4091049471507078
-3829 1090  0.05899578318349421
-3830 1090 -0.05859965444633063
-3867 1090  0.2940184468249131
-3868 1090 -0.3067126001924134
-3869 1090 -0.04130392701536499
-3870 1090  0.04614176586870518
-899 1091 -7.681384228215313
-907 1091 -6.778441910054136
-984 1091 -5.509137579435532
-1008 1091 -9.148268563932652
-1022 1091 -7.179419233379014
-1039 1091 -13.50573421656583
-1091 1091  47.22653650499972
-1099 1091 -0.00182375950404709
-1107 1091 -0.02996950125612036
-1184 1091 -0.01283241605396136
-1208 1091 -0.004170643736380167
-1222 1091  0.1579025131440057
-1239 1091 -0.004453089740284022
-1291 1091  9.598412509248533e-09
-3187 1091 -0.378532286868665
-3188 1091  0.3999698727527718
-3189 1091  0.05617338423090624
-3190 1091 -0.0575621944201822
-3247 1091  0.4323552440504631
-3248 1091 -0.4226868393937711
-3249 1091 -0.05745261880975507
-3250 1091  0.05562057947383459
-3691 1091  0.4402806087748638
-3692 1091 -0.4181873107360542
-3693 1091 -0.06554270780152668
-3694 1091  0.06074566431115451
-3839 1091  0.5759940121176559
-3840 1091 -0.4896888030185261
-3841 1091 -0.08806669474583405
-3842 1091  0.0737280609488582
-3935 1091  0.3034672176079867
-3936 1091 -0.2958963642102718
-3937 1091 -0.03927673116269679
-3938 1091  0.04237843896777917
-3987 1091  0.5483159810343683
-3988 1091 -0.5207505831071904
-3989 1091 -0.08202283215104343
-3990 1091  0.07658965210480174
-986 1092 -13.33511322562891
-1048 1092 -19.07696695293404
-1079 1092 -29.79209744771001
-1092 1092  38.33403487905078
-1186 1092 -0.0009017246897470577
-1248 1092 -0.01125287282250595
-1279 1092  0.02154089767802106
-1292 1092  2.545271146625616e-09
-3711 1092  0.4137426206676649
-3712 1092 -0.432808164560258
-3713 1092 -0.05546586352942447
-3714 1092  0.05907190615621608
-4039 1092 -0.4958500939596942
-4040 1092  0.4738340425976885
-4041 1092  0.06706554910635512
-4042 1092 -0.06027148087172729
-4135 1092  0.3355805184553645
-4136 1092 -0.3604673975635156
-4137 1092 -0.04888485641695448
-4138 1092  0.04991078277622987
-980 1093 -50.78984921359537
-1036 1093 -30.91732538584464
-1069 1093 -16.36333172631541
-1093 1093  182.6407715266371
-1094 1093 -65.45466744535615
-1095 1093 -16.9631615475301
-1180 1093 -0.1724709079791441
-1236 1093  0.3284627932841515
-1269 1093  0.3738519183151615
-1293 1093 -1.493529512737457e-08
-1294 1093  3.664150378945852e-09
-1295 1093  5.14547137606125e-08
-3663 1093  1.112658784433996
-3664 1093 -0.9424729144772576
-3665 1093 -0.1503380342921753
-3666 1093  0.1402957488124907
-3979 1093  0.6697749461861511
-3980 1093 -0.5697578684182797
-3981 1093 -0.09910914931789394
-3982 1093  0.07974589801570604
-4091 1093 -0.5913713271015886
-4092 1093  0.6938074616461828
-4093 1093  0.07867933243141366
-4094 1093 -0.0979571314547415
-4147 1093  0.9009354882907198
-4148 1093 -0.8656710639493923
-4149 1093 -0.07642521674859
-4150 1093  0.05907688964639677
-4151 1093  0.58462927218427
-4152 1093 -0.5248141364580845
-4153 1093 -0.01466585550328597
-4154 1093  0.0118751178970765
-1093 1094 -65.45466744535615
-1094 1094  314.792946352708
-1293 1094 -6.98925406994455e-08
-1294 1094 -1.762221939205499e-08
-4147 1094 -4.332893970446715
-4148 1094  4.163295798784388
-4149 1094  0.3675539094017725
-4150 1094 -0.2841201198847871
-1093 1095 -16.9631615475301
-1095 1095  208.15300181692
-1293 1095 -1.82950870328269e-07
-1295 1095 -6.313957408021764e-07
-4151 1095 -7.173917743684755
-4152 1095  6.439933176124918
-4153 1095  0.1799630056638323
-4154 1095 -0.1457181893610931
-1006 1096 -8.964500315464161
-1016 1096 -12.35396198776869
-1096 1096  25.62671609728838
-1206 1096 -0.04271695783234537
-1216 1096  0.09577340452076888
-1296 1096  9.930907562960556e-08
-3831 1096  0.3753050069630106
-3832 1096 -0.375556675786625
-3833 1096 -0.05921436717618114
-3834 1096  0.05996516487372638
-3895 1096  0.3999378180984413
-3896 1096 -0.4237633937620111
-3897 1096 -0.05601679710717079
-3898 1096  0.06052671066335877
-923 1097 -5.94400377216852
-1070 1097 -8.796687645874714
-1078 1097 -11.04287310642648
-1097 1097  31.03086265125012
-1123 1097 -0.05446009703752624
-1270 1097  0.007048039668787517
-1278 1097  0.09910991576168865
-1297 1097 -5.736181502014048e-09
-3335 1097 -0.6882189757870322
-3336 1097  0.6870878872871973
-3337 1097  0.09638196496373758
-3338 1097 -0.08962691042088074
-4099 1097 -0.5280375750914569
-4100 1097  0.5616589604898961
-4101 1097  0.07003967621048186
-4102 1097 -0.07910718884290392
-4123 1097  0.4804475820214714
-4124 1097 -0.4898144867918466
-4125 1097 -0.05465730817416642
-4126 1097  0.05706320500793532
-942 1098 -13.85984777865994
-995 1098 -14.30197427870374
-1098 1098  27.82833874115732
-1142 1098  0.01120427847728563
-1195 1098 -0.0263789648850663
-1298 1098  7.298680398948321e-08
-3447 1098  0.5580597443798457
-3448 1098 -0.5578130610481082
-3449 1098 -0.07611021403896462
-3450 1098  0.07783373831464936
-3755 1098  0.4897853072672988
-3756 1098 -0.4890001558455759
-3757 1098 -0.06975006572860991
-3758 1098  0.07102142348298011
-899 1099  7.772276933160072e-09
-900 1099  4.240457540660714e-09
-1017 1099  0.07145809537069953
-1022 1099 -0.04280741885147973
-1091 1099 -0.00182375950404709
-1099 1099  23840.16291750732
-1100 1099 -2299.853745982527
-1217 1099 -324.4426975442814
-1222 1099 -2473.241834471261
-1291 1099 -2064.592972124319
-3175 1099  3.90179844422034
-3176 1099 -3.117398006170587
-3177 1099  18.11856811951377
-3178 1099 -14.49628017496882
-3179 1099 -0.6755533116091292
-3180 1099  0.748415362690662
-3181 1099 -4.388112379693994
-3182 1099  4.793052405651258
-3183 1099  0.7019805848906145
-3184 1099 -0.7170782280222106
-3185 1099  4.688513878451054
-3186 1099 -4.577372090822865
-3187 1099  0.9345988945068904
-3188 1099 -0.9906143812408313
-3189 1099  5.460780976802373
-3190 1099 -5.952678106564355
-899 1100 -8.952334706879839e-10
-900 1100  4.86383655573519e-10
-1099 1100 -2299.853745982527
-1100 1100  263.8239666641096
-3175 1100 -0.4467658871986893
-3176 1100  0.3569501217106127
-3177 1100 -2.078221001294088
-3178 1100  1.662740239879939
-901 1101  3.180253560497448e-09
-902 1101  1.757215706277293e-09
-955 1101  0.009158641086069189
-1035 1101 -0.007274595352445609
-1101 1101  9532.370458227797
-1102 1101 -1793.912401720734
-1155 1101 -176.0078568125278
-1235 1101 -652.9015261762572
-3191 1101  2.396365901362685
-3192 1101 -2.005604434346106
-3193 1101  12.9370165976562
-3194 1101 -10.78395981212982
-3195 1101  0.8150861897290412
-3196 1101 -0.7626219247567129
-3197 1101  4.805357363027642
-3198 1101 -4.635964505191407
-3199 1101 -0.6823995581967828
-3200 1101  0.7310365971011838
-3201 1101 -4.068874943070814
-3202 1101  4.453586165850771
-901 1102 -6.74794720101346e-10
-902 1102  3.725840469481767e-10
-1101 1102 -1793.912401720734
-1102 1102  380.3377222484216
-3191 1102 -0.507839750924587
-3192 1102  0.4250292779567368
-3193 1102 -2.74278713799485
-3194 1102  2.286315857699142
-903 1103 -3.473398027531438e-10
-904 1103 -1.756064599289786e-10
-946 1103  0.01772566600763419
-962 1103 -0.008284771852577529
-1103 1103  232.4068773723512
-1104 1103 -1055.381615892587
-1146 1103 -364.3783473802901
-1162 1103 -81.90222277522484
-3203 1103  0.4418790781503563
-3204 1103 -0.3702357374009728
-3205 1103  1.988830230508456
-3206 1103 -1.662852877071636
-3207 1103  0.08312708337804624
-3208 1103 -0.09553894960349026
-3209 1103  0.5226757781751344
-3210 1103 -0.6062893774873906
-3211 1103 -0.1224846354050169
-3212 1103  0.1225743034582106
-3213 1103 -0.8243769016067968
-3214 1103  0.8830238932831251
-903 1104  1.714809627628711e-09
-904 1104 -8.754584834846924e-10
-1103 1104 -1055.381615892587
-1104 1104  5260.490321110647
-3203 1104 -2.198584419259259
-3204 1104  1.842120980065284
-3205 1104 -9.91214250431854
-3206 1104  8.287501645404546
-905 1105  9.628000022243732e-09
-906 1105  5.783544754223158e-09
-984 1105  0.03052418138322882
-999 1105 -0.0186874934802756
-1046 1105 -0.01117562264376104
-1105 1105  61006.30037132353
-1106 1105 -17000.75242245883
-1184 1105 -1483.144735189752
-1199 1105 -3998.718775286863
-1246 1105 -2530.254860117219
-3215 1105  4.67810007016215
-3216 1105 -3.733990098942753
-3217 1105  21.96944745588132
-3218 1105 -17.53117282921072
-3219 1105 -1.783496057488212
-3220 1105  1.769775574269363
-3221 1105 -11.04480381088002
-3222 1105  11.09041831683827
-3223 1105 -1.508511143996298
-3224 1105  1.454256781217253
-3225 1105 -9.134818941626229
-3226 1105  8.960689765455296
-3227 1105 -1.075929273318226
-3228 1105  1.197241948885333
-3229 1105 -6.350942033760408
-3230 1105  6.86425229593742
-905 1106 -3.411716797963038e-09
-906 1106  2.02872774135443e-09
-1105 1106 -17000.75242245883
-1106 1106  5963.802258365807
-3215 1106 -1.631607376128249
-3216 1106  1.302327940379904
-3217 1106 -7.704273779899018
-3218 1106  6.147850850098586
-907 1107  6.66961486039952e-09
-908 1107  4.15408552001395e-09
-1022 1107 -0.1107503777044012
-1029 1107  0.1267923382078056
-1039 1107 -0.006163960734533347
-1091 1107 -0.02996950125612036
-1107 1107  14882.42601010204
-1108 1107 -7371.921873768852
-1222 1107 -1364.595019358008
-1229 1107 -4056.761237014
-1239 1107 -389.1035774035147
-1291 1107 -1886.24602540587
-3231 1107  2.142516119134805
-3232 1107 -1.645419760215324
-3233 1107  13.4599818919095
-3234 1107 -10.37596843446376
-3235 1107 -0.5705865186070875
-3236 1107  0.5798036087807967
-3237 1107 -3.927743397133805
-3238 1107  4.354207390518346
-3239 1107  0.83228047449342
-3240 1107 -0.7224565093335877
-3241 1107  4.624502996452096
-3242 1107 -4.029548223983352
-3243 1107 -1.049370725032446
-3244 1107  1.078904268217582
-3245 1107 -7.445261240507539
-3246 1107  7.684265898844557
-3247 1107 -0.8572328018230927
-3248 1107  0.8384291813617976
-3249 1107 -5.317465612935973
-3250 1107  5.25881232878616
-907 1108 -5.105909028912947e-09
-908 1108  3.102507639685115e-09
-1107 1108 -7371.921873768852
-1108 1108  5505.760289180535
-3231 1108 -1.573116856290268
-3232 1108  1.208147660144721
-3233 1108 -10.04545533443596
-3234 1108  7.743774096250811
-909 1109 -2.143182423197665e-09
-910 1109 -1.136426674985813e-09
-911 1109 -1.360478396605913e-09
-913 1109  0.1159831428547418
-991 1109 -0.1725061393317366
-1056 1109 -0.1501368826175595
-1109 1109  31458.39852646493
-1110 1109 -12122.78394997258
-1111 1109 -2041.180500096246
-1113 1109 -373.7142851765856
-1191 1109 -1100.812974624583
-1256 1109 -1108.54432072992
-3251 1109  1.939877143225896
-3252 1109 -1.510326401442479
-3253 1109  12.05955198419484
-3254 1109 -9.357216344456917
-3255 1109  3.125336311573782
-3256 1109 -2.417889075233369
-3257 1109  17.05859243350098
-3258 1109 -13.15767006303645
-3259 1109 -0.8274104651385586
-3260 1109  0.9005554601895391
-3261 1109 -5.271819109375664
-3262 1109  6.213237251680835
-3263 1109 -0.4348953511661833
-3264 1109  0.4484995047183518
-3265 1109 -2.730241540721851
-3266 1109  2.709099303685861
-3267 1109  0.6948742967493817
-3268 1109 -0.6744651058693761
-3269 1109  4.127509889624091
-3270 1109 -4.187820272924031
-909 1110  1.838456387126541e-09
-910 1110 -9.82260162096793e-10
-1109 1110 -12122.78394997258
-1110 1110  10478.49399500862
-3251 1110 -1.676593698201225
-3252 1110  1.305342318696957
-3253 1110 -10.42375234850035
-3254 1110  8.087970926761525
-909 1111  2.79507181288885e-10
-911 1111 -1.79330175709147e-10
-1109 1111 -2041.180500096246
-1111 1111  269.0653913962274
-3255 1111 -0.4116790582017967
-3256 1111  0.318491909188027
-3257 1111 -2.248550885624917
-3258 1111  1.734357075949347
-912 1112 -1.066833821630753e-08
-913 1112 -0.08371936137436782
-1019 1112  0.1059721002756369
-1047 1112 -0.03239710441769657
-1112 1112  1009.561156508884
-1113 1112 -110.3564781742909
-1219 1112 -737.3345443487744
-1247 1112 -3490.231801384597
-3271 1112  0.4647968875266144
-3272 1112 -0.461579540307102
-3273 1112  8.078759484940766
-3274 1112 -14.71609250303336
-3275 1112 -0.3518552487923934
-3276 1112  0.3454461800752524
-3277 1112 -2.973502502756232
-3278 1112  3.831383234547688
-3279 1112 -0.3153990132372302
-3280 1112  0.2786502814775124
-3281 1112 -1.846682900076352
-3282 1112  1.330625120766269
-909 1113 -0.05042820426988697
-912 1113  0.1005159371417935
-913 1113 -2.733038906299612e-09
-914 1113 -1.039897890020569e-09
-1109 1113 -373.7142851765856
-1112 1113 -110.3564781742909
-1113 1113  4570.0460752516
-1114 1113 -6031.745054983595
-3259 1113  0.4961924573567638
-3260 1113 -0.5405511595669753
-3261 1113  2.593920292764252
-3262 1113 -3.058452409713802
-3271 1113 -0.07766092191744503
-3272 1113  0.07749102351241138
-3273 1113 -5.536155174462214
-3274 1113  10.08058940955139
-3283 1113  1.411694129026913
-3284 1113 -1.181461144083559
-3285 1113  7.598376411100657
-3286 1113 -6.356847459918975
-913 1114  3.723713781766946e-09
-914 1114 -1.45571832366187e-09
-1113 1114 -6031.745054983595
-1114 1114  8443.178913538493
-3283 1114 -1.953828666208752
-3284 1114  1.635191861604235
-3285 1114 -10.63017727165662
-3286 1114  8.893256781914742
-915 1115  6.983447865122061e-10
-916 1115  8.336279444520756e-10
-917 1115  6.706460387029267e-10
-918 1115  6.439443422934232e-10
-919 1115  8.847848009807535e-10
-1007 1115 -0.007809535337338039
-1018 1115  0.007667951331426184
-1115 1115  4993.88507726691
-1116 1115 -1890.901065005403
-1117 1115 -2560.524032393198
-1118 1115 -4384.67503884735
-1119 1115 -757.4364593022556
-1207 1115 -138.288761778333
-1218 1115 -133.9669120329914
-3287 1115  0.8291504643489213
-3288 1115 -0.6273802630392492
-3289 1115  6.060870599331241
-3290 1115 -4.565490514889956
-3291 1115  1.087917687407729
-3292 1115 -0.8229776380829138
-3293 1115  6.108025865977322
-3294 1115 -4.599499980584284
-3295 1115  1.03698725022598
-3296 1115 -0.7835532341187602
-3297 1115  5.206454265535194
-3298 1115 -3.916354844917197
-3299 1115  1.203896198092496
-3300 1115 -0.9058789578950869
-3301 1115  7.161691500726718
-3302 1115 -5.367583074737629
-3303 1115  0.3657690249134106
-3304 1115 -0.3073032073380307
-3305 1115  2.364021463492015
-3306 1115 -1.924123739016748
-3307 1115 -0.3247977981682151
-3308 1115  0.3847781687309847
-3309 1115 -2.071401609337344
-3310 1115  2.413229394356441
-915 1116 -1.486597012156921e-09
-916 1116  1.775087188349289e-09
-1115 1116 -1890.901065005403
-1116 1116  4026.276169403554
-3287 1116 -1.764966808023946
-3288 1116  1.335469727403376
-3289 1116 -12.90519241544406
-3290 1116  9.721133715560624
-915 1117 -1.235842539415444e-09
-917 1117  1.18738463505963e-09
-1115 1117 -2560.524032393198
-1117 1117  4533.028597823411
-3291 1117 -1.92561216315536
-3292 1117  1.456668801268975
-3293 1117 -10.81324405966066
-3294 1117  8.142649836749401
-915 1118 -2.074367566606838e-09
-918 1118  1.912650926527704e-09
-1115 1118 -4384.67503884735
-1118 1118  13023.81664536163
-3295 1118 -3.079285624558723
-3296 1118  2.326725060942811
-3297 1118 -15.46450544545504
-3298 1118  11.63257903923041
-915 1119 -4.957688704010366e-10
-919 1119  6.258814577719818e-10
-1115 1119 -757.4364593022556
-1119 1119  535.9109027118735
-3299 1119 -0.8504213801855677
-3300 1119  0.6399047897114686
-3301 1119 -5.066758450128532
-3302 1119  3.797461282443998
-920 1120 -1.795405820542717e-10
-921 1120 -1.249107484113665e-10
-922 1120 -1.131374993690315e-10
-925 1120 -0.001668075641436621
-1056 1120  0.002369883025423705
-1120 1120  15126.50868286488
-1121 1120 -1576.39846137627
-1122 1120 -2196.123797446057
-1125 1120 -1542.655870494157
-1256 1120 -437.5832889362574
-3311 1120  2.135100351227087
-3312 1120 -1.698663037724611
-3313 1120  10.46247277613657
-3314 1120 -8.287409620934962
-3315 1120  2.343665408910121
-3316 1120 -1.861641814867798
-3317 1120  9.800756234215104
-3318 1120 -7.751411372933291
-3319 1120 -0.3615308982581444
-3320 1120  0.3604726785054282
-3321 1120 -2.03036538771662
-3322 1120  2.026270799843273
-3323 1120  0.3927812825996569
-3324 1120 -0.3918426939236055
-3325 1120  2.59903491355898
-3326 1120 -2.553508280303047
-920 1121  4.188971391982932e-11
-921 1121 -2.969423318344155e-11
-1120 1121 -1576.39846137627
-1121 1121  374.3454068452776
-3311 1121 -0.5068532103322505
-3312 1121  0.4032470041467199
-3313 1121 -2.484458992001519
-3314 1121  1.967960133569293
-920 1122  4.778644147052091e-11
-922 1122 -3.082351041072684e-11
-1120 1122 -2196.123797446057
-1122 1122  598.5841054947239
-3315 1122 -0.6385059697814306
-3316 1122  0.5071839300131559
-3317 1122 -2.671248407940532
-3318 1122  2.112688522497416
-923 1123 -2.446954222579123e-09
-924 1123 -1.296841745102029e-09
-962 1123  0.05713966978684382
-1097 1123 -0.05446009703752624
-1123 1123  11317.64471455518
-1124 1123 -1762.330535576032
-1162 1123 -581.8283744128495
-1297 1123 -808.5357239496374
-3327 1123  2.943851431944534
-3328 1123 -2.470752260998025
-3329 1123  13.89790436354166
-3330 1123 -11.59313057596244
-3331 1123 -0.570678276525063
-3332 1123  0.5736363845700033
-3333 1123 -3.671971040472746
-3334 1123  3.920428096681185
-3335 1123  0.6120165062645778
-3336 1123 -0.6120093889121652
-3337 1123  3.103677962152558
-3338 1123 -3.348978833033903
-923 1124  4.111979645671227e-10
-924 1124 -2.182206637613149e-10
-1123 1124 -1762.330535576032
-1124 1124  296.6295776145759
-3327 1124 -0.4953533182334247
-3328 1124  0.4157462993400855
-3329 1124 -2.339205561498814
-3330 1124  1.951280910683752
-920 1125  0.00637055249229182
-925 1125 -2.524945641457776e-09
-926 1125 -7.775043942004345e-10
-991 1125 -0.1090224204334647
-1056 1125  0.037823480857684
-1120 1125 -1542.655870494157
-1125 1125  196426.9299350627
-1126 1125 -57716.35466464341
-1191 1125 -3056.34146169142
-1256 1125 -1959.500873940213
-3319 1125  2.348697674884534
-3320 1125 -2.341822696956453
-3321 1125  13.1085584275136
-3322 1125 -13.08212862081111
-3339 1125  4.915187326484495
-3340 1125 -3.892710312405375
-3341 1125  31.10618111938879
-3342 1125 -24.49836116910814
-3343 1125 -1.038181523195801
-3344 1125  1.045469772476363
-3345 1125 -6.405420033831049
-3346 1125  6.57456188741837
-3347 1125 -1.475513352129115
-3348 1125  1.483623786920446
-3349 1125 -9.211238697688215
-3350 1125  9.387198805594265
-925 1126  7.921518996312216e-10
-926 1126 -2.532467013871553e-10
-1125 1126 -57716.35466464341
-1126 1126  18816.79538770899
-3339 1126 -1.601102874759421
-3340 1126  1.268035068691731
-3341 1126 -10.14088311419212
-3342 1126  7.98667681451962
-927 1127  7.788518240070541e-09
-928 1127  3.886522659257707e-09
-1022 1127 -0.2217680093153695
-1038 1127  0.3372983249793092
-1127 1127  31713.64746882482
-1128 1127 -14377.54609320034
-1222 1127 -2406.24952449994
-1238 1127 -939.8659979062598
-3351 1127  3.674495501266763
-3352 1127 -3.071721810536903
-3353 1127  15.40703792650803
-3354 1127 -12.91543154405716
-3355 1127  0.7456668279970676
-3356 1127 -0.8026522732803606
-3357 1127  5.42541555256465
-3358 1127 -5.499147013641394
-3359 1127 -0.7996212717110843
-3360 1127  0.8518603849189976
-3361 1127 -5.319291135044536
-3362 1127  5.987378019002024
-927 1128 -3.969049866547891e-09
-928 1128  1.924533976627174e-09
-1127 1128 -14377.54609320034
-1128 1128  7119.152105525926
-3351 1128 -1.79658977637768
-3352 1128  1.501892209348857
-3353 1128 -7.62281482571927
-3354 1128  6.390044080050587
-929 1129  2.237529952986428e-10
-930 1129  1.705862118228652e-10
-931 1129  1.983945230321638e-10
-1075 1129  0.06750505025100521
-1090 1129 -0.04174380521665495
-1129 1129  4281.024441336025
-1130 1129 -2689.233659020882
-1131 1129 -815.4874040806222
-1275 1129 -406.3245095104962
-1290 1129 -413.568512787767
-3363 1129  1.299439778554684
-3364 1129 -1.042770960437875
-3365 1129  7.102528425181987
-3366 1129 -5.662448967730832
-3367 1129  1.541080820735221
-3368 1129 -1.227391380982957
-3369 1129  9.939184679247292
-3370 1129 -7.872866521897349
-3371 1129  0.3879503154264504
-3372 1129 -0.3704848889591102
-3373 1129  2.623574290306916
-3374 1129 -2.402169826049755
-3375 1129  0.4769040583265569
-3376 1129 -0.4546264934490209
-3377 1129  2.673553644466764
-3378 1129 -2.601348954389372
-929 1130 -2.862770021039296e-10
-930 1130  2.182460878685788e-10
-1129 1130 -2689.233659020882
-1130 1130  3436.605615427473
-3363 1130 -1.66026707237404
-3364 1130  1.332326685534424
-3365 1130 -9.076322024257928
-3366 1130  7.236044290225851
-929 1131 -9.747780360669367e-11
-931 1131  8.381051408434814e-11
-1129 1131 -815.4874040806222
-1131 1131  344.6159392533137
-3367 1131 -0.6495975312525708
-3368 1131  0.517371091673868
-3369 1131 -4.19974615976149
-3370 1131  3.32663494902817
-932 1132  5.218769388859457e-09
-933 1132  3.572777296234619e-09
-952 1132 -0.01738224984325668
-1035 1132  0.02036700218743104
-1132 1132  62923.98343331431
-1133 1132 -4227.557930025499
-1152 1132 -8436.593128476583
-1235 1132 -1557.206156923414
-3379 1132  5.296182902592481
-3380 1132 -4.429008880591571
-3381 1132  31.57511354793287
-3382 1132 -26.39354744654073
-3383 1132 -1.001383316318819
-3384 1132  1.001723462128635
-3385 1132 -6.619137619636859
-3386 1132  7.035896378794858
-3387 1132 -1.624449345440718
-3388 1132  1.726617122079207
-3389 1132 -9.45015732593915
-3390 1132  9.780829511014637
-932 1133 -4.171099299288272e-10
-933 1133  2.789826286653252e-10
-1132 1133 -4227.557930025499
-1133 1133  330.0515399065575
-3379 1133 -0.4107380900624172
-3380 1133  0.3434864457301403
-3381 1133 -2.46437663093789
-3382 1133  2.059964500670208
-934 1134  1.578068994922965e-10
-935 1134  1.261234450211646e-10
-936 1134  1.136628013931329e-10
-956 1134 -0.01449250614827455
-981 1134  0.01332766609263919
-1134 1134  1190.15783474961
-1135 1134 -353.7743879495256
-1136 1134 -1857.584300437281
-1156 1134 -302.9494469047443
-1181 1134 -117.6376812741665
-3391 1134  0.9789296151027944
-3392 1134 -0.7864238954796325
-3393 1134  5.52827321690606
-3394 1134 -4.426572750783254
-3395 1134  0.8060051230938539
-3396 1134 -0.6434239789652723
-3397 1134  3.935792415055033
-3398 1134 -3.141533746260626
-3399 1134  0.1787660967132971
-3400 1134 -0.1720286735101973
-3401 1134  1.195222696531314
-3402 1134 -1.077545589280008
-3403 1134  0.227902895301987
-3404 1134 -0.2169526291382712
-3405 1134  1.651751732869493
-3406 1134 -1.499487001255235
-934 1135 -1.025071694193969e-10
-935 1135  8.093323233815397e-11
-1134 1135 -353.7743879495256
-1135 1135  226.9337877865829
-3391 1135 -0.6272414912186035
-3392 1135  0.5038949949918273
-3393 1135 -3.546001048357712
-3394 1135  2.839337127425348
-934 1136 -5.515803369604555e-10
-936 1136  3.563296324671228e-10
-1134 1136 -1857.584300437281
-1136 1136  5823.291649924624
-3395 1136 -2.504819998846709
-3396 1136  1.999577413517036
-3397 1136 -12.33236152279313
-3398 1136  9.843630982732011
-937 1137  7.386056946612207e-09
-938 1137  3.839734308286324e-09
-983 1137 -0.03261242339132064
-1057 1137  0.03229296680959662
-1137 1137  40628.79353938391
-1138 1137 -11296.78734766712
-1183 1137 -1491.12999571951
-1257 1137 -2657.776956602856
-3407 1137  3.764148086597181
-3408 1137 -3.132558998302706
-3409 1137  21.66729182498108
-3410 1137 -18.12681824855953
-3411 1137 -0.9698287367725562
-3412 1137  1.032890475327049
-3413 1137 -7.297572934511336
-3414 1137  7.818158304523587
-3415 1137  0.9047099666389939
-3416 1137 -0.9050341677071546
-3417 1137  6.45187659773848
-3418 1137 -6.340117021540678
-937 1138 -2.261370646294836e-09
-938 1138  1.173779629048965e-09
-1137 1138 -11296.78734766712
-1138 1138  3453.806461467711
-3407 1138 -1.150203221975869
-3408 1138  0.9572098200987793
-3409 1138 -6.624244633334891
-3410 1138  5.541831397222044
-939 1139  1.010988515126598e-10
-998 1139 -0.07397943535962337
-1061 1139 -0.2141845297361449
-1078 1139  0.250244042122215
-1139 1139  11644.30540653898
-1198 1139 -2367.012469682032
-1261 1139 -12834.35703122456
-1278 1139 -2144.627198362258
-3419 1139 -1.335544408289586
-3420 1139  1.240138484038131
-3421 1139 -8.252392785519895
-3422 1139  7.855616270644008
-3423 1139 -0.8646099843727195
-3424 1139  0.7236931569492924
-3425 1139 -6.168162236656806
-3426 1139  5.42738844602399
-3427 1139 -1.017234363558991
-3428 1139  0.9696341561279327
-3429 1139 -5.52169483921999
-3430 1139  5.270632236356677
-940 1140  4.42159031055489e-08
-942 1140  0.1330724731424416
-1079 1140 -0.1493139770588935
-1140 1140  22000.33514768517
-1142 1140 -821.5346664247968
-1279 1140 -8495.362797148979
-3431 1140  2.259667890544613
-3432 1140 -2.270634095598604
-3433 1140  14.10655573046575
-3434 1140 -14.60105738105317
-3435 1140  1.274050971060097
-3436 1140 -1.450543140636408
-3437 1140  8.932423077614509
-3438 1140 -9.766438076143141
-941 1141  1.675542971035782e-08
-982 1141 -0.447186593064992
-1030 1141  0.5333079621201936
-1141 1141  15635.12232700705
-1182 1141 -1279.615417488558
-1230 1141 -7111.276237605085
-3439 1141 -2.430429941692298
-3440 1141  2.430336628269451
-3441 1141 -15.87534314164886
-3442 1141  16.87912030479092
-3443 1141 -1.646756874307934
-3444 1141  1.633130299287086
-3445 1141 -9.784170471621888
-3446 1141  9.000659918292111
-940 1142 -0.009980420534666863
-942 1142  6.210807736162227e-09
-1098 1142  0.01120427847728563
-1140 1142 -821.5346664247968
-1142 1142  144.3524370915675
-1298 1142 -805.5840716483974
-3431 1142 -0.1988363546578532
-3432 1142  0.1998547861173985
-3433 1142 -1.328711904185037
-3434 1142  1.375232541596462
-3447 1142 -0.1942141594406955
-3448 1142  0.1937305440085906
-3449 1142 -1.236746782381144
-3450 1142  1.210760367602043
-943 1143 -1.724665529834191e-08
-944 1143 -0.5468169676527013
-953 1143 -0.02992823809043116
-1000 1143 -0.02941992453676529
-1085 1143  0.5739644134156838
-1143 1143  12437.4590338889
-1144 1143 -378.1159720077119
-1153 1143 -3746.645778230619
-1200 1143 -2502.882899625627
-1285 1143 -3361.290884691284
-3451 1143  0.4635977544092252
-3452 1143 -0.4445531462354627
-3453 1143  12.40932627072254
-3454 1143 -17.80142727045091
-3455 1143  1.302765765741359
-3456 1143 -1.381540417492638
-3457 1143  6.98481666623176
-3458 1143 -8.259223745110498
-3459 1143  1.291757288849152
-3460 1143 -1.364938725708047
-3461 1143  7.534250103970887
-3462 1143 -8.559591467156073
-3463 1143 -0.7574396769710391
-3464 1143  0.7694060268190259
-3465 1143 -5.957774276479973
-3466 1143  7.695785971397996
-943 1144  0.4289809294086663
-944 1144 -3.380368229777275e-08
-959 1144  0.1506769620921335
-1020 1144 -1.072294506906227
-1143 1144 -378.1159720077119
-1144 1144  6109.88089831127
-1159 1144 -1783.747510767209
-1220 1144 -3014.351946629534
-3451 1144 -0.2370748743210307
-3452 1144  0.2279035771985117
-3453 1144 -16.36104222451913
-3454 1144  23.46700243272763
-3467 1144  1.435114498189556
-3468 1144 -1.347826856887033
-3469 1144  8.28258643358139
-3470 1144 -8.289596312603999
-3471 1144  0.9151036757311578
-3472 1144 -0.9121228986991278
-3473 1144  5.88454311618508
-3474 1144 -5.695361538067679
-945 1145  1.768079509190112e-09
-952 1145  0.01703428079016718
-964 1145 -0.05970019195980614
-1145 1145  5905.742521728604
-1152 1145 -6698.294790960239
-1164 1145 -6021.151990444164
-3475 1145 -0.8709148373434052
-3476 1145  0.8161899275134221
-3477 1145 -5.468004288207832
-3478 1145  5.008647029629411
-3479 1145 -0.7158770103681851
-3480 1145  0.5731872346895394
-3481 1145 -4.380699190373416
-3482 1145  3.606200291493431
-903 1146 -0.1213569242584619
-946 1146 -2.906373852695765e-08
-947 1146 -8.043442764615882e-08
-948 1146 -6.474805136491568e-08
-949 1146 -7.593639771152905e-08
-950 1146 -6.107598557458793e-08
-951 1146 -1.652445292266158e-07
-972 1146  0.2145362022621393
-1103 1146 -364.3783473802901
-1146 1146  22790.00297408586
-1147 1146 -280.504956305503
-1148 1146 -409.6989748006552
-1149 1146 -1152.033413949009
-1150 1146 -440.1909901329221
-1151 1146 -1453.071481978205
-1172 1146 -1575.319815998426
-3207 1146 -2.361641607385089
-3208 1146  2.713143028094987
-3209 1146 -16.60084760747767
-3210 1146  19.25433639519875
-3483 1146  0.3710219555670884
-3484 1146 -0.2840579403070753
-3485 1146  9.774567204299803
-3486 1146 -7.612750342076007
-3487 1146  0.4116155869533685
-3488 1146 -0.3163234064086289
-3489 1146  10.34202414091307
-3490 1146 -8.044573973054565
-3491 1146  0.240725896352511
-3492 1146 -0.183873148952859
-3493 1146  7.290551754845374
-3494 1146 -5.670365588613417
-3495 1146  0.4501210912144902
-3496 1146 -0.3458178949312415
-3497 1146  10.63405779705095
-3498 1146 -8.266934236741347
-3499 1146  0.160517360987844
-3500 1146 -0.1213287753319718
-3501 1146  5.696313853321982
-3502 1146 -4.640371833668683
-3503 1146 -1.606503189940934
-3504 1146  1.963053376342537
-3505 1146 -10.23123479260624
-3506 1146  11.40396313242804
-946 1147  2.273733985119764e-08
-947 1147 -6.438807310937822e-08
-1146 1147 -280.504956305503
-1147 1147  226.8844736590602
-3483 1147 -0.29728149214413
-3484 1147  0.2275648264341223
-3485 1147 -7.972830198709099
-3486 1147  6.210600761683166
-946 1148  2.421638262140036e-08
-948 1148 -5.474532364935225e-08
-1146 1148 -409.6989748006552
-1148 1148  347.8208021266269
-3487 1148 -0.3482611836492402
-3488 1148  0.2676108406262902
-3489 1148 -8.870545870477601
-3490 1148  6.900692384581894
-946 1149  1.231334647994187e-07
-949 1149 -3.288068006757072e-07
-1146 1149 -1152.033413949009
-1149 1149  5102.108289106684
-3491 1149 -1.043634220767106
-3492 1149  0.7969897672034445
-3493 1149 -32.25568801983241
-3494 1149  25.09335121359859
-946 1150  2.303789606306061e-08
-950 1150 -4.913775505932527e-08
-1146 1150 -440.1909901329221
-1150 1150  355.3102213177701
-3495 1150 -0.3623546349676862
-3496 1150  0.278365585734024
-3497 1150 -8.671639188939132
-3498 1150  6.741962214906343
-946 1151  1.743433537815253e-07
-951 1151 -1.026074838983426e-06
-1146 1151 -1453.071481978205
-1151 1151  3012134.630666946
-3499 1151 -0.998072574886444
-3500 1151  0.7540610774154664
-3501 1151 -36.48745359925508
-3502 1151  29.73823906985804
-932 1152  0.01961434172631453
-945 1152 -0.05125570147098663
-952 1152  6.841825883618924e-09
-964 1152  0.01861759887113656
-1132 1152 -8436.593128476583
-1145 1152 -6698.294790960239
-1152 1152  47801.86267389975
-1164 1152 -14322.89285310626
-3383 1152  2.237203602157426
-3384 1152 -2.235862100853107
-3385 1152  12.86842858727532
-3386 1152 -13.68121522319965
-3475 1152  1.89078858943646
-3476 1152 -1.771758321773721
-3477 1152  12.47290766139074
-3478 1152 -11.42535283239911
-3507 1152 -1.329774596250828
-3508 1152  1.136378995889816
-3509 1152 -8.235239513230283
-3510 1152  7.392636495600143
-943 1153  0.02793990583526351
-953 1153 -9.502885811585315e-09
-954 1153 -0.2507418162491978
-979 1153  0.0247054716909727
-1000 1153  0.01312118441246557
-1010 1153  0.04849641452178344
-1026 1153  0.02710898025635339
-1143 1153 -3746.645778230619
-1153 1153  15211.32763782078
-1154 1153 -125.4130424229494
-1179 1153 -1587.164328722295
-1200 1153 -4741.586008200146
-1210 1153 -2001.015904640132
-1226 1153 -1538.578415888683
-3455 1153 -0.836906752491439
-3456 1153  0.8886397238365555
-3457 1153 -5.664043368783884
-3458 1153  6.694072816259766
-3511 1153  0.4589386576308901
-3512 1153 -0.390921429635482
-3513 1153  8.639460196076747
-3514 1153 -8.030380636400102
-3515 1153  0.9130715613792872
-3516 1153 -0.8271158196402747
-3517 1153  5.326644585481059
-3518 1153 -5.221321173922546
-3519 1153 -1.159052407654222
-3520 1153  1.167217498441967
-3521 1153 -7.424743250141112
-3522 1153  7.683291575595472
-3523 1153 -0.95405564531345
-3524 1153  0.9758265066742177
-3525 1153 -5.830059675395175
-3526 1153  5.406317175231911
-3527 1153 -1.164584670469045
-3528 1153  1.194200001945778
-3529 1153 -6.959432015872205
-3530 1153  6.502617587410443
-953 1154  0.08766147326692564
-954 1154 -1.802922966476217e-08
-1001 1154 -0.1257509427677388
-1153 1154 -125.4130424229494
-1154 1154  1497.088366431581
-1201 1154 -2778.601465176169
-3511 1154 -0.3159937452580066
-3512 1154  0.269234075312335
-3513 1154 -7.316230054006878
-3514 1154  6.800195871908971
-3531 1154 -0.7102443269723129
-3532 1154  0.6375281091348801
-3533 1154 -4.291561368908504
-3534 1154  3.814598094334272
-901 1155 -0.001782981198905943
-955 1155  3.591110811496101e-09
-1057 1155  0.002925901806241575
-1101 1155 -176.0078568125278
-1155 1155  160.8704183559445
-1257 1155 -553.1419874124856
-3195 1155 -0.3265396605978775
-3196 1155  0.3055666271798853
-3197 1155 -2.057848389166554
-3198 1155  1.985217529071911
-3535 1155 -0.2560164048234211
-3536 1155  0.2373539075192858
-3537 1155 -1.53916732182028
-3538 1155  1.414753666929491
-934 1156  0.08549340419337453
-956 1156 -1.694234696980601e-09
-993 1156 -0.08343546006029678
-1075 1156 -0.1018754816584715
-1134 1156 -302.9494469047443
-1156 1156  12519.61891501326
-1193 1156 -4247.139494515291
-1275 1156 -1669.250794817422
-3399 1156 -2.83581958505017
-3400 1156  2.730179802550922
-3401 1156 -16.46922548582782
-3402 1156  14.8446566737469
-3539 1156  1.446175075669395
-3540 1156 -1.34371595571893
-3541 1156  9.606152975975975
-3542 1156 -8.318029485848061
-3543 1156  1.341321871020527
-3544 1156 -1.344209358632625
-3545 1156  8.638251497123527
-3546 1156 -8.759308364969019
-957 1157  1.653187683969826e-08
-1017 1157 -0.09826669971503904
-1037 1157  0.0898090992568592
-1157 1157  2967.843774350639
-1217 1157 -739.1349825417736
-1237 1157 -1876.369922915853
-3547 1157 -1.058339009138057
-3548 1157  1.056354509649101
-3549 1157 -5.697020217678423
-3550 1157  5.434176163677479
-3551 1157 -0.8311703023259965
-3552 1157  0.8325741162821748
-3553 1157 -4.937009585402042
-3554 1157  5.222317454495946
-958 1158  4.097325129182039e-08
-995 1158 -0.09853840405478623
-1032 1158  0.0918804888517398
-1158 1158  17698.10903710123
-1195 1158 -2011.875143802051
-1232 1158 -3393.363501279813
-3555 1158 -1.955210228146363
-3556 1158  1.946171184010313
-3557 1158 -11.53240152096248
-3558 1158  11.05483594732497
-3559 1158  1.409323205256857
-3560 1158 -1.618511483884628
-3561 1158  9.858875177032653
-3562 1158 -10.84726586818638
-944 1159 -0.1049347854825309
-959 1159 -1.332591936797911e-08
-1001 1159  0.07786890260343965
-1144 1159 -1783.747510767209
-1159 1159  2103.614864226924
-1201 1159 -1986.152622069543
-3467 1159 -0.7084294249201389
-3468 1159  0.6658399640468106
-3469 1159 -4.70567772590082
-3470 1159  4.70876580251244
-3563 1159 -0.8140452921845157
-3564 1159  0.7270941854498505
-3565 1159 -4.963811290770392
-3566 1159  4.341750101297229
-960 1160  3.964404998724191e-09
-1057 1160 -0.008922953284883881
-1058 1160  0.01834072349012509
-1160 1160  135.3183882525767
-1257 1160 -449.6118328163615
-1258 1160 -1094.71109774519
-3567 1160  0.1819988190111484
-3568 1160 -0.1954534815545992
-3569 1160  1.503671479436259
-3570 1160 -1.617273344051984
-3571 1160  0.1483181477839719
-3572 1160 -0.1580808975787135
-3573 1160  1.098045296521147
-3574 1160 -1.116117521563536
-961 1161  3.124692560252385e-09
-964 1161  0.8923905638513359
-1082 1161 -0.4707524526938228
-1161 1161  6522.078787078268
-1164 1161 -4876.157036800986
-1282 1161 -428.7203655478105
-3575 1161 -0.7691880733918308
-3576 1161  0.6268912276704567
-3577 1161 -5.531733877787849
-3578 1161  4.552179963964262
-3579 1161 -1.1983605947858
-3580 1161  1.188366015048527
-3581 1161 -9.698019033406775
-3582 1161  9.673953645810657
-903 1162  0.03440530029256823
-923 1162 -0.0301506816774844
-962 1162 -1.126953350777393e-08
-963 1162 -5.457841986689083e-08
-1103 1162 -81.90222277522484
-1123 1162 -581.8283744128495
-1162 1162  1534.288258985061
-1163 1162 -320.1999206314696
-3211 1162  1.309709728249127
-3212 1162 -1.31124216056868
-3213 1162  7.353509965567679
-3214 1162 -7.879175491863245
-3331 1162  0.8523919363373601
-3332 1162 -0.856404463517094
-3333 1162  4.779511398382989
-3334 1162 -5.103865209233088
-3583 1162 -0.1874965313805473
-3584 1162  0.1209829834408293
-3585 1162  3.094186181666295
-3586 1162 -3.100342937307653
-962 1163  1.889614750227864e-07
-963 1163 -3.92008150940093e-07
-1162 1163 -320.1999206314696
-1163 1163  2301.723012541816
-3583 1163 -2.513198853349591
-3584 1163  1.622962910510173
-3585 1163 -22.22398035275692
-3586 1163  22.19095820062608
-945 1164  0.04539053030042879
-952 1164 -0.004704297928304631
-961 1164 -0.6070648610656701
-964 1164 -3.875374576800539e-09
-965 1164 -3.308903712140321e-08
-966 1164 -2.257490022694597e-07
-967 1164 -6.077346655086746e-08
-968 1164 -1.333883210691278e-07
-969 1164 -4.7262267141579e-08
-970 1164 -4.1833332076191e-08
-971 1164 -9.238899598140726e-08
-1056 1164  0.4594619270121967
-1145 1164 -6021.151990444164
-1152 1164 -14322.89285310626
-1161 1164 -4876.157036800986
-1164 1164  61580.33486141656
-1165 1164 -16602.87934456047
-1166 1164 -4993.647022170953
-1167 1164 -16923.00885187064
-1168 1164 -7374.062406205457
-1169 1164 -13719.0325480256
-1170 1164 -12329.82642671017
-1171 1164 -2229.202872308585
-1256 1164 -7120.652080719003
-3479 1164  1.53391330281752
-3480 1164 -1.227967849405166
-3481 1164  8.887311933312343
-3482 1164 -7.316287484026701
-3507 1164  1.309395068074605
-3508 1164 -1.118335669994368
-3509 1164  7.323453369206338
-3510 1164 -6.574874411640814
-3575 1164  1.084345240019609
-3576 1164 -0.8831818088721408
-3577 1164  7.397606534928237
-3578 1164 -6.087474676626123
-3587 1164  0.6351578870039599
-3588 1164 -0.4937492055719114
-3589 1164  4.32562955086895
-3590 1164 -3.280051421621569
-3591 1164  0.08216399590560011
-3592 1164 -0.06363192359750822
-3593 1164  5.774615805405687
-3594 1164 -3.234138467376079
-3595 1164  0.2283230594027073
-3596 1164 -0.1784848649491971
-3597 1164  5.50164767601109
-3598 1164 -4.008535209606107
-3599 1164  0.1369008468551218
-3600 1164 -0.1066451828453128
-3601 1164  5.345308539788538
-3602 1164 -3.486179794654417
-3603 1164  0.2495784622735603
-3604 1164 -0.1950409950189936
-3605 1164  6.482098577577909
-3606 1164 -4.729287128522186
-3607 1164  0.290360813562731
-3608 1164 -0.2270292150903642
-3609 1164  6.760502321679454
-3610 1164 -4.991429629225376
-3611 1164  0.2916736644580288
-3612 1164 -0.227785867490716
-3613 1164  7.136143023604448
-3614 1164 -5.072850637376669
-3615 1164 -1.244478079620031
-3616 1164  1.380302330026541
-3617 1164 -8.209962281569412
-3618 1164  9.204827106431074
-964 1165  6.066692409545382e-08
-965 1165 -1.468892856237858e-07
-1164 1165 -16602.87934456047
-1165 1165  73730.82644272385
-3587 1165 -2.945954324697929
-3588 1165  2.290345940174964
-3589 1165 -19.24637451834769
-3590 1165  14.59397827582356
-964 1166  4.697380394380613e-07
-966 1166 -2.978564379918502e-06
-1164 1166 -4993.647022170953
-1166 1166  66798.37785505856
-3591 1166 -2.385134481547733
-3592 1166  1.86782777672809
-3593 1166 -77.15294482380699
-3594 1166  43.20570292286196
-964 1167  1.510357943157459e-07
-967 1167 -5.493372374765215e-07
-1164 1167 -16923.00885187064
-1167 1167  153155.7616073629
-3595 1167 -2.457041662433694
-3596 1167  1.923107839672399
-3597 1167 -50.0814285278305
-3598 1167  36.48944546192245
-964 1168  3.067324171046604e-07
-968 1168 -1.477521131554305e-06
-1164 1168 -7374.062406205457
-1168 1168  82072.21679084383
-3599 1168 -2.338296025296659
-3600 1168  1.829379136090265
-3601 1168 -59.77277409880886
-3602 1168  38.98112179863244
-964 1169  1.042574984955191e-07
-969 1169 -3.187709719520981e-07
-1164 1169 -13719.0325480256
-1169 1169  92641.36295225653
-3603 1169 -1.994003072051536
-3604 1169  1.560276963803856
-3605 1169 -44.05096156861522
-3606 1169  32.13935140025399
-964 1170  7.332883339428165e-08
-970 1170 -2.124470280806534e-07
-1164 1170 -12329.82642671017
-1170 1170  62672.16218763148
-3607 1170 -1.69914940163431
-3608 1170  1.329845790084345
-3609 1170 -34.56956155699346
-3610 1170  25.52356491229814
-964 1171  3.852382979979696e-08
-971 1171 -1.67326591443695e-07
-1164 1171 -2229.202872308585
-1171 1171  4044.843974570085
-3611 1171 -0.665968396826871
-3612 1171  0.5209611043831808
-3613 1171 -13.01473834386188
-3614 1171  9.251460191495257
-946 1172 -0.09396492400282885
-972 1172 -4.288324972900881e-10
-1088 1172  0.03701780828857204
-1146 1172 -1575.319815998426
-1172 1172  500.8003893351921
-1288 1172 -729.7679166195321
-3503 1172  0.2386303085913993
-3504 1172 -0.2914011576499591
-3505 1172  1.708753031112981
-3506 1172 -1.904872805295951
-3619 1172 -0.4153067634742414
-3620 1172  0.4148880148577192
-3621 1172 -2.410186223870166
-3622 1172  2.375670562103454
-973 1173  4.923127752132217e-07
-974 1173  2.078042610764719e-06
-975 1173  3.157835434829115e-06
-976 1173  1.106372518294707e-06
-977 1173  1.350166642877237e-06
-1006 1173 -0.5102615425771657
-1055 1173  0.7232007277457907
-1173 1173  107277.9407439756
-1174 1173 -6932.419497690117
-1175 1173 -6007.856534902506
-1176 1173 -19150.24407625645
-1177 1173 -12568.88334869237
-1206 1173 -5140.668650277364
-1255 1173 -5655.325062365802
-3623 1173  0.2557390494241053
-3624 1173 -0.1995643723770443
-3625 1173  7.851143468601221
-3626 1173 -6.360533060844858
-3627 1173  0.1570011938827146
-3628 1173 -0.116240689813081
-3629 1173  6.250508524222748
-3630 1173 -5.161142881361593
-3631 1173  0.3282970589794034
-3632 1173 -0.271490414221333
-3633 1173  11.16120855053096
-3634 1173 -8.873223095071516
-3635 1173  0.2551706654878987
-3636 1173 -0.2092439937336671
-3637 1173  10.50033560799464
-3638 1173 -8.38018467970365
-3639 1173  2.90118294589533
-3640 1173 -2.396216081757496
-3641 1173  17.2984093564214
-3642 1173 -15.269239506628
-3643 1173 -1.862842055174311
-3644 1173  2.290266277161505
-3645 1173 -10.98109255814183
-3646 1173  13.31557405400697
-973 1174 -2.21697098447271e-06
-974 1174  9.300491833896807e-06
-1173 1174 -6932.419497690117
-1174 1174  31180.78614898434
-3623 1174 -1.144600450292391
-3624 1174  0.8931472133700507
-3625 1174 -35.44874624987744
-3626 1174  28.71959313604403
-973 1175 -7.175218402766248e-06
-975 1175  4.574734240525569e-05
-1173 1175 -6007.856534902506
-1175 1175  89136.63429867299
-3627 1175 -2.274571006209698
-3628 1175  1.683936323933614
-3629 1175 -91.60308946892607
-3630 1175  75.64317956330889
-973 1176 -1.474047813920798e-06
-976 1176  3.292199911797411e-06
-1173 1176 -19150.24407625645
-1176 1176  57055.57591626922
-3631 1176 -0.9769081143049713
-3632 1176  0.8078351046585803
-3633 1176 -33.51914650193375
-3634 1176  26.64903944246305
-973 1177 -1.809620373771637e-06
-977 1177  4.932317047678936e-06
-1173 1177 -12568.88334869237
-1177 1177  46045.12024090238
-3635 1177 -0.9321754366844185
-3636 1177  0.7643499314882201
-3637 1177 -38.7926439689844
-3638 1177  30.96188122061356
-978 1178 -1.812938804635067e-08
-998 1178 -0.03046147444904562
-1041 1178  0.04340316309886361
-1178 1178  3620.078004165059
-1198 1178 -663.5344295935886
-1241 1178 -2608.162152478774
-3647 1178 -1.333010028374286
-3648 1178  1.161252280274343
-3649 1178 -8.372193934777899
-3650 1178  6.800859133664147
-3651 1178 -0.8125299017211322
-3652 1178  0.8130995252895313
-3653 1178 -5.553339525120581
-3654 1178  5.955799061503828
-953 1179 -0.01434635432205444
-979 1179 -1.45358751424407e-08
-980 1179 -0.1494464850449572
-1076 1179  0.1503132880271155
-1153 1179 -1587.164328722295
-1179 1179  3482.456808643168
-1180 1179 -188.8149620080204
-1276 1179 -2359.550833678316
-3515 1179 -0.8197257413953748
-3516 1179  0.7414429026651962
-3517 1179 -5.904518861818641
-3518 1179  5.785549463415167
-3655 1179  0.6657203254231561
-3656 1179 -0.620628984403677
-3657 1179  11.0089408248834
-3658 1179 -21.83306996925088
-3659 1179 -0.6535981302102936
-3660 1179  0.654965320821791
-3661 1179 -4.521169451237201
-3662 1179  4.541421673872437
-979 1180  0.08044888079890554
-980 1180 -6.531306342982823e-09
-1093 1180 -0.1724709079791441
-1179 1180 -188.8149620080204
-1180 1180  1204.593080199305
-1293 1180 -2195.222019859931
-3655 1180 -0.07200114489392796
-3656 1180  0.06494821711852092
-3657 1180 -7.044466715389948
-3658 1180  13.9640986807489
-3663 1180 -0.5886831418066317
-3664 1180  0.5026533925982484
-3665 1180 -3.725196007278359
-3666 1180  3.079189492083107
-934 1181 -0.03038370759243254
-981 1181  6.922867834546764e-11
-1044 1181 -0.1158402426905038
-1084 1181  0.216604980095333
-1134 1181 -117.6376812741665
-1181 1181  2341.243801698421
-1244 1181 -1276.761734036878
-1284 1181 -986.9244544448655
-3403 1181 -1.245236453990957
-3404 1181  1.186629605393843
-3405 1181 -8.143818476815005
-3406 1181  7.392567965078647
-3667 1181 -0.7314697767746239
-3668 1181  0.7341243737421274
-3669 1181 -4.855239864111756
-3670 1181  5.221813720290219
-3671 1181 -0.7155603330739858
-3672 1181  0.7181252765245181
-3673 1181 -4.616497763711963
-3674 1181  4.299471983766863
-941 1182  0.06312548830247269
-982 1182  1.077567965412918e-09
-1011 1182 -0.05307445544769063
-1141 1182 -1279.615417488558
-1182 1182  374.3495123842705
-1211 1182 -1073.023868317834
-3439 1182  0.3890546571390462
-3440 1182 -0.3891594472236065
-3441 1182  2.174900388529984
-3442 1182 -2.312987185873944
-3675 1182 -0.3797072308357856
-3676 1182  0.3406113200706714
-3677 1182 -2.422622246591045
-3678 1182  2.310469301341059
-937 1183  0.01353528248246338
-983 1183  1.371140667671966e-08
-1018 1183 -0.03487708050989496
-1137 1183 -1491.12999571951
-1183 1183  3031.132911223394
-1218 1183 -899.33891041484
-3411 1183  0.8647226323217699
-3412 1183 -0.920980246976824
-3413 1183  6.441034596124477
-3414 1183 -6.900463469725019
-3679 1183 -1.009313309363917
-3680 1183  1.01257666395277
-3681 1183 -6.434333697729489
-3682 1183  6.497392945010104
-905 1184 -0.005320952890435077
-984 1184  4.56977637675049e-09
-999 1184 -0.009006681541102603
-1040 1184  0.04085646412882799
-1091 1184 -0.01283241605396136
-1105 1184 -1483.144735189752
-1184 1184  1432.450877869441
-1199 1184 -822.8244362568955
-1240 1184 -1909.764605957655
-1291 1184 -596.6060378153098
-3219 1184  0.536638352929684
-3220 1184 -0.5324820321255284
-3221 1184  3.241025990367159
-3222 1184 -3.254430202003916
-3683 1184 -0.5140805691755161
-3684 1184  0.4990000847384159
-3685 1184 -2.845200624001803
-3686 1184  2.783228216931563
-3687 1184  0.4457567421222746
-3688 1184 -0.3979802529569074
-3689 1184  3.145655382980586
-3690 1184 -2.797268987536868
-3691 1184 -0.4618434163253965
-3692 1184  0.4399385753362182
-3693 1184 -2.86924462211004
-3694 1184  2.785061239788074
-985 1185 -4.681742493684027e-10
-986 1185  0.04645384761318282
-1047 1185 -0.2047571376587071
-1185 1185  4681.719438489662
-1186 1185 -404.2773802134753
-1247 1185 -28632.51282411447
-3695 1185  0.427839673034417
-3696 1185 -0.4773773394419411
-3697 1185  7.594442537507458
-3698 1185 -15.25141034328379
-3699 1185  0.3935586339260975
-3700 1185 -0.4755757198066598
-3701 1185  2.737883920038361
-3702 1185 -3.207071041908369
-985 1186 -0.06849071274544034
-986 1186  4.608089770874457e-09
-1048 1186  0.08253558234079712
-1074 1186 -0.009545633175415319
-1092 1186 -0.0009017246897470577
-1185 1186 -404.2773802134753
-1186 1186  12867.5447937223
-1248 1186 -3104.47930912567
-1274 1186 -2242.183874822623
-1292 1186 -2147.026562228744
-3695 1186 -0.1593269903331485
-3696 1186  0.1799753946873774
-3697 1186 -11.15066330216473
-3698 1186  22.3842856817472
-3703 1186 -0.9577254771843337
-3704 1186  0.9613848499477831
-3705 1186 -5.868347071872142
-3706 1186  6.083446015993633
-3707 1186  1.385816766949729
-3708 1186 -1.243512052379119
-3709 1186  8.036162305989681
-3710 1186 -7.386558767037341
-3711 1186 -1.161465841778596
-3712 1186  1.216256331347602
-3713 1186 -7.2681528859035
-3714 1186  7.502813973266923
-987 1187 -4.496077599114301e-08
-988 1187 -6.398415768466004e-07
-989 1187 -1.044842086184161e-07
-990 1187 -8.621436850542175e-08
-993 1187  0.09349931199942807
-1086 1187 -0.1321988592372929
-1187 1187  15818.95142763862
-1188 1187 -87.52904062559777
-1189 1187 -1337.029454706127
-1190 1187 -1274.451280633858
-1193 1187 -5350.662847075037
-1286 1187 -2971.781291812511
-3715 1187  0.1264070600600939
-3716 1187 -0.1073922996838256
-3717 1187  7.447124334255668
-3718 1187 -5.738141943326152
-3719 1187  0.2372873765635557
-3720 1187 -0.2078695435133901
-3721 1187  8.201087086647963
-3722 1187 -6.163756956372798
-3723 1187  0.2982302677568088
-3724 1187 -0.263500479617136
-3725 1187  9.459243822580133
-3726 1187 -7.08054579490533
-3727 1187 -1.429262548630259
-3728 1187  1.673333652021131
-3729 1187 -8.428942532838935
-3730 1187  8.914589671247429
-3731 1187  1.193368232042292
-3732 1187 -1.120321545249203
-3733 1187  10.03672807275466
-3734 1187 -8.019890861375147
-987 1188  1.11954899111727e-07
-988 1188 -1.625983356512428e-06
-1187 1188 -87.52904062559777
-1188 1188  356854.618981225
-3715 1188 -0.3212481593266459
-3716 1188  0.2724361971567971
-3717 1188 -20.05891812971564
-3718 1188  15.48272469413396
-987 1189  2.108572429437849e-07
-989 1189 -4.95059458760494e-07
-1187 1189 -1337.029454706127
-1189 1189  6342.186054306624
-3719 1189 -1.124307960005156
-3720 1189  0.9846771888007256
-3721 1189 -39.72433181103246
-3722 1189  29.86325998837519
-987 1190  1.323586146240174e-07
-990 1190 -2.564258991810675e-07
-1187 1190 -1274.451280633858
-1190 1190  3793.106121617589
-3723 1190 -0.8870247005076219
-3724 1190  0.78356840863058
-3725 1190 -28.7033608824471
-3726 1190  21.48971813079931
-909 1191  0.1091861990651673
-925 1191  0.02942235136820164
-991 1191 -1.026919055346909e-08
-992 1191 -1.02579299221528e-07
-1089 1191 -0.4918304040039776
-1109 1191 -1100.812974624583
-1125 1191 -3056.34146169142
-1191 1191  9286.153223378726
-1192 1191 -969.7382881692646
-1289 1191 -568.5385856792361
-3263 1191  1.175900677297633
-3264 1191 -1.21299435510975
-3265 1191  8.191331821483582
-3266 1191 -8.128733597512404
-3343 1191  0.8434979015723048
-3344 1191 -0.8494202444993569
-3345 1191  5.038247575538431
-3346 1191 -5.17134230247954
-3735 1191  0.2167729964694829
-3736 1191 -0.1851844777328494
-3737 1191  6.880724984338275
-3738 1191 -5.444318005853047
-3739 1191 -1.207141725362662
-3740 1191  1.338906589780056
-3741 1191 -7.610279116600212
-3742 1191  8.267433239967934
-991 1192  4.564837356724194e-08
-992 1192 -2.396823412520455e-07
-1191 1192 -969.7382881692646
-1192 1192  16896.89136086397
-3735 1192 -0.6168315851058197
-3736 1192  0.5298067490807584
-3737 1192 -16.5456764895831
-3738 1192  13.09683550707402
-956 1193  0.08504754845844564
-987 1193 -0.1969994537259558
-993 1193 -6.511629457861545e-09
-1156 1193 -4247.139494515291
-1187 1193 -5350.662847075037
-1193 1193  7475.545711733503
-3539 1193 -1.408878182521636
-3540 1193  1.31040985134737
-3541 1193 -7.997310816234859
-3542 1193  6.922985628648402
-3727 1193  0.9009035038811737
-3728 1193 -1.053604700739367
-3729 1193  6.200089823344896
-3730 1193 -6.559072047159436
-994 1194 -2.743272645155947e-08
-1020 1194  0.7790434228378447
-1041 1194 -0.03089942858856187
-1086 1194 -0.644560541293882
-1194 1194  5246.598377730301
-1220 1194 -2332.050437479933
-1241 1194 -2784.420804028015
-1286 1194 -1300.878810624146
-3743 1194  0.7978308621718716
-3744 1194 -0.7965184326656924
-3745 1194  5.753350078361281
-3746 1194 -5.243221560545977
-3747 1194 -0.9429089218588855
-3748 1194  1.007009323828432
-3749 1194 -5.062453033294843
-3750 1194  4.615763356430092
-3751 1194 -1.048401908253753
-3752 1194  1.008926901289915
-3753 1194 -6.460239724417905
-3754 1194  6.417487426740468
-958 1195  0.02493853014115072
-995 1195  1.223172137188033e-08
-1098 1195 -0.0263789648850663
-1158 1195 -2011.875143802051
-1195 1195  961.1959122907291
-1298 1195 -2377.009340752339
-3555 1195  0.4429005215649865
-3556 1195 -0.4406738447969539
-3557 1195  2.841907482790103
-3558 1195 -2.724430091994604
-3755 1195 -0.4226945468326312
-3756 1195  0.421458967625683
-3757 1195 -2.511814056672408
-3758 1195  2.46769213011002
-996 1196  9.116542722120702e-09
-997 1196 -0.1274747229578279
-1021 1196  0.1829371321651943
-1196 1196  1277.047285432646
-1197 1196 -356.6362302653943
-1221 1196 -2351.032529766809
-3759 1196  0.3339756423139817
-3760 1196 -0.3333397745722857
-3761 1196  7.933710171395212
-3762 1196 -12.65627869255672
-3763 1196 -0.4851424038589532
-3764 1196  0.4058510497943707
-3765 1196 -3.452105402557807
-3766 1196  3.087553492426145
-996 1197  0.230729451368615
-997 1197  2.309806734768216e-08
-1040 1197 -0.1442546171938546
-1196 1197 -356.6362302653943
-1197 1197  2963.617500136095
-1240 1197 -2894.512024611905
-3759 1197 -0.2014575555660574
-3760 1197  0.2031705763180116
-3761 1197 -14.35390862680364
-3762 1197  22.89375892241828
-3767 1197 -1.07013962521279
-3768 1197  1.056592885189063
-3769 1197 -6.741118804159942
-3770 1197  6.152334929788743
-939 1198  0.02676690359827763
-978 1198  0.02020512578173389
-998 1198 -1.121049961194664e-09
-1072 1198 -0.1461852922562984
-1077 1198 -0.02201288531576436
-1082 1198  0.1639576572033569
-1139 1198 -2367.012469682032
-1178 1198 -663.5344295935886
-1198 1198  3259.098552556408
-1272 1198 -1354.184908615927
-1277 1198 -2452.381670676807
-1282 1198 -181.5901337830495
-3419 1198  0.8076695160259592
-3420 1198 -0.7500806792626978
-3421 1198  4.667678323593734
-3422 1198 -4.443468566171155
-3647 1198  0.4276243851783329
-3648 1198 -0.3728502600956505
-3649 1198  3.153670952857216
-3650 1198 -2.562369962751278
-3771 1198 -0.439551175407246
-3772 1198  0.4774393407531063
-3773 1198 -2.839026331445033
-3774 1198  3.287177902478644
-3775 1198 -0.5013982344400243
-3776 1198  0.5451054580886334
-3777 1198 -3.303170792875972
-3778 1198  3.830799029309135
-3779 1198 -0.651822591782886
-3780 1198  0.7429555445394144
-3781 1198 -4.18167192374513
-3782 1198  4.666850163800948
-905 1199  0.01084140050204591
-984 1199  0.0299746246289474
-999 1199  8.747625068750153e-09
-1008 1199 -0.01293493383306837
-1015 1199 -0.04200667912920009
-1039 1199 -0.008049526074928537
-1105 1199 -3998.718775286863
-1184 1199 -822.8244362568955
-1199 1199  16023.1456721761
-1208 1199 -1667.451919587196
-1215 1199 -2769.253537633058
-1239 1199 -834.1124016703941
-3223 1199  1.20832718522049
-3224 1199 -1.164701975884189
-3225 1199  6.923454910311763
-3226 1199 -6.791708696781908
-3683 1199  1.368477623792071
-3684 1199 -1.328312311377791
-3685 1199  7.348777917610443
-3686 1199 -7.188787771767452
-3783 1199  1.727739253821067
-3784 1199 -1.501409605633508
-3785 1199  11.30404407094388
-3786 1199 -9.700381322229893
-3787 1199 -0.8399267021510434
-3788 1199  0.8359873259695152
-3789 1199 -4.371489003866787
-3790 1199  5.178316230070377
-3791 1199 -1.468192548459289
-3792 1199  1.512757808386402
-3793 1199 -9.106914833269292
-3794 1199  9.45399632332005
-943 1200  0.01754934032540434
-953 1200 -0.008383946400232767
-1000 1200 -9.116374766193758e-09
-1001 1200 -0.03588453104645329
-1002 1200 -6.50146078085001e-08
-1003 1200 -4.92795877993224e-08
-1026 1200  0.01932311150148924
-1143 1200 -2502.882899625627
-1153 1200 -4741.586008200146
-1200 1200  7000.072619730484
-1201 1200 -336.4229823469557
-1202 1200 -371.6922193028342
-1203 1200 -437.3868882747202
-1226 1200 -1452.75486119253
-3459 1200 -0.6720863108170413
-3460 1200  0.7107553380565065
-3461 1200 -4.55768979835456
-3462 1200  5.176873191896383
-3519 1200  0.9377921426824671
-3520 1200 -0.9444048128648267
-3521 1200  5.539440141695474
-3522 1200 -5.732696863714277
-3795 1200  0.3629961991311105
-3796 1200 -0.3459711048373486
-3797 1200  6.519516423137556
-3798 1200 -8.416866787483741
-3799 1200 -0.2146298528456314
-3800 1200  0.1240942996274762
-3801 1200  4.262660366847292
-3802 1200 -3.267971087874407
-3803 1200 -0.2466962960354827
-3804 1200  0.142484329197361
-3805 1200  3.952836893778014
-3806 1200 -3.485047843454746
-3807 1200 -1.021937167464553
-3808 1200  1.047546391471734
-3809 1200 -5.814375262302929
-3810 1200  5.190494080029731
-954 1201  0.4246988966920202
-959 1201 -0.1571053939386667
-1000 1201  0.06631071370625427
-1001 1201 -2.100605200894279e-08
-1004 1201 -0.2362664140746497
-1154 1201 -2778.601465176169
-1159 1201 -1986.152622069543
-1200 1201 -336.4229823469557
-1201 1201  13014.15053606697
-1204 1201 -943.0650083467848
-3531 1201  1.404267791315138
-3532 1201 -1.26049341189051
-3533 1201  8.573349645239935
-3534 1201 -7.620517473966892
-3563 1201  1.434561395231843
-3564 1201 -1.281336173089582
-3565 1201  9.07977748727345
-3566 1201 -7.942016865501306
-3795 1201 -0.407193771543868
-3796 1201  0.3887267162890052
-3797 1201 -14.76253650232996
-3798 1201  19.05461479822566
-3811 1201 -1.590343166199202
-3812 1201  1.799225606089988
-3813 1201 -8.889282523176131
-3814 1201  8.984707214224725
-1000 1202  8.321106473307971e-08
-1002 1202 -3.261903830376056e-07
-1200 1202 -371.6922193028342
-1202 1202  1865.329310426942
-3799 1202 -3.056071350822656
-3800 1202  1.769816809290096
-3801 1202 -21.38658464065627
-3802 1202  16.31440493686975
-1000 1203  8.643129645324876e-08
-1003 1203 -2.659544074967712e-07
-1200 1203 -437.3868882747202
-1203 1203  2361.051277833175
-3803 1203 -2.791333479427456
-3804 1203  1.61398408077391
-3805 1203 -21.33286151008621
-3806 1203  18.72857532796202
-1001 1204  0.05848026446275859
-1004 1204 -5.676911468688317e-09
-1028 1204 -0.1008233678779023
-1201 1204 -943.0650083467848
-1204 1204  623.6376208683334
-1228 1204 -410.1720988650556
-3811 1204  0.3953856088196608
-3812 1204 -0.4470126234812586
-3813 1204  2.728687156136997
-3814 1204 -2.759212294612531
-3815 1204  0.6834769259705972
-3816 1204 -0.6337624789359847
-3817 1204  2.682500907926349
-3818 1204 -3.302192501193137
-1005 1205  1.270442140377526e-09
-1011 1205  0.05204211481294618
-1027 1205 -0.254470154000132
-1090 1205  0.1321958859920611
-1205 1205  6033.106403994393
-1211 1205 -2861.256039528676
-1227 1205 -3877.196870864005
-1290 1205 -2885.83782801923
-3819 1205 -0.8432794677340965
-3820 1205  0.8122467217164322
-3821 1205 -5.634150174253534
-3822 1205  5.676480078927482
-3823 1205 -0.8258269842122644
-3824 1205  0.8899170771677279
-3825 1205 -5.4483738044597
-3826 1205  6.238244344298185
-3827 1205  1.219896523532217
-3828 1205 -1.223176188342998
-3829 1205  7.196077445191182
-3830 1205 -7.309708780356933
-973 1206  0.1608647615085388
-1006 1206  1.715180802247529e-08
-1096 1206 -0.04271695783234537
-1173 1206 -5140.668650277364
-1206 1206  725.2050124758196
-1296 1206 -1447.354453913685
-3639 1206 -0.2630108885425641
-3640 1206  0.2173437473146551
-3641 1206 -1.681171094173677
-3642 1206  1.48390937397938
-3831 1206 -0.3186270474808822
-3832 1206  0.3163594839300718
-3833 1206 -2.097538363083144
-3834 1206  2.063427325485965
-915 1207  0.00435760107926482
-1007 1207  5.579247508435969e-09
-1084 1207 -0.01534246547224594
-1115 1207 -138.288761778333
-1207 1207  726.5627170365319
-1284 1207 -713.7675168419642
-3303 1207 -0.9824708043480003
-3304 1207  0.8254367475625018
-3305 1207 -5.749376920340797
-3306 1207  4.679067459779453
-3835 1207  0.4854666781569684
-3836 1207 -0.5209336705035591
-3837 1207  3.061789935852789
-3838 1207 -3.064005787467996
-999 1208  0.003480874327946859
-1008 1208  2.945946889210127e-09
-1091 1208 -0.004170643736380167
-1199 1208 -1667.451919587196
-1208 1208  785.6540698522239
-1291 1208 -887.2704343124278
-3783 1208 -0.5174648007567855
-3784 1208  0.4496774480466198
-3785 1208 -3.396560628333188
-3786 1208  2.914703332052592
-3839 1208 -0.4753802942885685
-3840 1208  0.404989697114798
-3841 1208 -3.032021718341749
-3842 1208  2.585875819257028
-1009 1209  1.582770026153923e-10
-1061 1209  0.05851589523119582
-1073 1209 -0.03549214591889813
-1209 1209  4661.087789736292
-1261 1209 -7744.746583549255
-1273 1209 -661.5687518245369
-3843 1209  0.5953885346820643
-3844 1209 -0.7585928866866933
-3845 1209  4.119179357221501
-3846 1209 -5.027617955580298
-3847 1209 -1.089204680743506
-3848 1209  1.088994441422246
-3849 1209 -7.735889765633795
-3850 1209  7.784946548619343
-953 1210 -0.03090332412050151
-1010 1210 -1.220871292950676e-08
-1011 1210  0.08531786875140286
-1012 1210 -6.996689916327625e-08
-1013 1210 -4.162537098340024e-08
-1026 1210 -0.03439042023821304
-1153 1210 -2001.015904640132
-1210 1210  9523.030298394489
-1211 1210 -628.9024645633449
-1212 1210 -249.6415633539116
-1213 1210 -639.0767490283577
-1226 1210 -4270.428353139534
-3523 1210  0.8083482178595198
-3524 1210 -0.8256124134252635
-3525 1210  6.106888895148331
-3526 1210 -5.665416980231268
-3851 1210  0.8561130448515417
-3852 1210 -0.8532795399901716
-3853 1210  5.980557287714181
-3854 1210 -18.68721764116278
-3855 1210  0.7431772970946299
-3856 1210 -0.4962120658716677
-3857 1210  2.858778724621237
-3858 1210 -2.748627634170752
-3859 1210  0.1504825244382092
-3860 1210 -0.09392255060612208
-3861 1210  7.743227386312433
-3862 1210 -5.577951998041309
-3863 1210  1.079868425719443
-3864 1210 -1.078106438997537
-3865 1210  8.196309806968394
-3866 1210 -8.119865478221724
-982 1211  0.332620942724838
-1005 1211 -0.07579910748829327
-1010 1211 -0.2105201766865411
-1011 1211 -3.814564220050443e-10
-1090 1211 -0.008999365508307933
-1182 1211 -1073.023868317834
-1205 1211 -2861.256039528676
-1210 1211 -628.9024645633449
-1211 1211  20711.06510196762
-1290 1211 -4191.670580733649
-3675 1211  2.391996372285566
-3676 1211 -2.145916049270679
-3677 1211  13.0725484602826
-3678 1211 -12.47045602748613
-3819 1211  1.889672965421464
-3820 1211 -1.818638852608572
-3821 1211  11.31995792783744
-3822 1211 -11.4061945499796
-3851 1211  0.1281477546658397
-3852 1211 -0.1109241627550626
-3853 1211 -11.87347468739131
-3854 1211  36.98601183293343
-3867 1211 -1.855130366892074
-3868 1211  1.922720458247535
-3869 1211 -10.91898889290387
-3870 1211  10.6937103252593
-1010 1212  2.310851920234169e-08
-1012 1212 -1.222183139693023e-07
-1210 1212 -249.6415633539116
-1212 1212  436.1558917911616
-3855 1212 -1.366971763051765
-3856 1212  0.9127356585543187
-3857 1212 -4.993718753234045
-3858 1212  4.801186593602593
-1010 1213  5.20346455368248e-08
-1013 1213 -1.436638097374043e-07
-1210 1213 -639.0767490283577
-1213 1213  2205.993243270096
-3859 1213 -1.461494307484497
-3860 1213  0.9122971104867462
-3861 1213 -26.724645220298
-3862 1213  19.24788508110388
-1014 1214  1.003109723063389e-08
-1015 1214 -0.004849260550083945
-1031 1214  0.006768474096792987
-1214 1214  1644.664912023497
-1215 1214 -276.8639202152118
-1231 1214 -1080.162224615282
-3871 1214  0.5034153766096773
-3872 1214 -0.5285023005439647
-3873 1214  7.330338437253632
-3874 1214 -38.38649753335458
-3875 1214  0.5409851231816658
-3876 1214 -0.6474678388648655
-3877 1214  4.154393438575824
-3878 1214 -4.691610853794245
-999 1215  0.02977237990573015
-1014 1215  0.01263569687722853
-1015 1215  2.119705658221172e-08
-1030 1215 -0.2961764240780974
-1060 1215  0.2282308445688311
-1199 1215 -2769.253537633058
-1214 1215 -276.8639202152118
-1215 1215  7875.169207149417
-1230 1215 -3532.696627823262
-1260 1215 -2345.108354895173
-3787 1215  1.459995747599291
-3788 1215 -1.452748484428911
-3789 1215  6.510195367269129
-3790 1215 -7.713673913612801
-3871 1215 -0.01820502103528791
-3872 1215  0.02666245738760768
-3873 1215 -13.39915434300085
-3874 1215  70.14746870615768
-3879 1215 -0.6929215427424715
-3880 1215  0.7627672411821809
-3881 1215 -5.674417187443032
-3882 1215  6.342712909556337
-3883 1215 -0.7557720101028491
-3884 1215  0.8446553956316701
-3885 1215 -4.902109438887917
-3886 1215  5.982335823908858
-1016 1216  2.258379669384247e-08
-1029 1216 -0.264491588907438
-1032 1216  0.3720024857585152
-1096 1216  0.09577340452076888
-1216 1216  8524.636106107781
-1229 1216 -4881.530531556509
-1232 1216 -1387.552912371535
-1296 1216 -7591.989817871691
-3887 1216  1.27864903424727
-3888 1216 -1.203527161495361
-3889 1216  7.097758064026325
-3890 1216 -7.071254972430305
-3891 1216 -0.8926090143541199
-3892 1216  0.8343920480599086
-3893 1216 -6.326257993395976
-3894 1216  6.701573121465414
-3895 1216 -0.7140138395673383
-3896 1216  0.7578565321914594
-3897 1216 -4.667666885335501
-3898 1216  4.931373980172026
-899 1217 -0.02024470978393711
-957 1217  0.03597745704809928
-1017 1217  1.572755131684644e-08
-1099 1217 -324.4426975442814
-1157 1217 -739.1349825417736
-1217 1217  637.8787982437333
-3179 1217  0.4831540182231924
-3180 1217 -0.5351332038396536
-3181 1217  3.222143654814309
-3182 1217 -3.519407860941207
-3547 1217  0.5146427113986781
-3548 1217 -0.5135793765110132
-3549 1217  3.063488442035085
-3550 1217 -2.922493795579336
-915 1218 -0.00428379170185561
-983 1218  0.01520725110591731
-1018 1218  8.149366514942358e-09
-1115 1218 -133.9669120329914
-1183 1218 -899.33891041484
-1218 1218  793.7174921168661
-3307 1218  0.7382929550608727
-3308 1218 -0.8747461134231366
-3309 1218  4.971628818787869
-3310 1218 -5.79221202090099
-3679 1218  0.5452258426352274
-3680 1218 -0.5470217549008489
-3681 1218  3.376506641724802
-3682 1218 -3.409620496099906
-912 1219 -0.2400553180149196
-1019 1219 -2.543621846484712e-08
-1020 1219  0.1511025029822383
-1076 1219 -0.04422968677103412
-1085 1219  0.1034698694358651
-1112 1219 -737.3345443487744
-1219 1219  9565.904076745277
-1220 1219 -192.4625250703121
-1276 1219 -1597.562514947314
-1285 1219 -8948.234516972396
-3275 1219  1.186931963583773
-3276 1219 -1.163100359430245
-3277 1219  4.538624497527303
-3278 1219 -5.879825080146367
-3899 1219  0.3970145449540784
-3900 1219 -0.3832969439187849
-3901 1219  7.895064096101138
-3902 1219 -12.39953745174649
-3903 1219 -0.9053256046972465
-3904 1219  0.9552717849491632
-3905 1219 -4.875642107836658
-3906 1219  3.543387736643543
-3907 1219  0.9177680669862572
-3908 1219 -0.9166426538946926
-3909 1219  6.850788060807428
-3910 1219 -6.091736844072287
-944 1220  1.100657149099352
-994 1220 -0.8499437985630234
-1019 1220 -0.2054867667943506
-1020 1220 -5.254733564247216e-08
-1144 1220 -3014.351946629534
-1194 1220 -2332.050437479933
-1219 1220 -192.4625250703121
-1220 1220  7047.656681697156
-3471 1220 -1.19384433734122
-3472 1220  1.19038235472501
-3473 1220 -7.176916273976581
-3474 1220  6.94588192299627
-3743 1220 -1.258218614571573
-3744 1220  1.257430980505297
-3745 1220 -7.448069304163958
-3746 1220  6.785039108229999
-3899 1220  0.2510335818284439
-3900 1220 -0.2383819755548176
-3901 1220 -15.34203421950439
-3902 1220  24.06894650972545
-996 1221 -0.2752007773679826
-1021 1221  1.693937379254606e-08
-1022 1221  0.9741805755638241
-1023 1221 -1.302918114509666e-07
-1024 1221 -9.906925613201167e-08
-1025 1221  2.090432824064514e-08
-1031 1221 -0.7334598799220151
-1054 1221 -0.2261997465922666
-1196 1221 -2351.032529766809
-1221 1221  46684.4952085363
-1222 1221 -1266.678869343772
-1223 1221 -2161.61702284971
-1224 1221 -1633.067169282511
-1225 1221 -1461.603224482606
-1231 1221 -5850.690085631221
-1254 1221 -22128.00563827363
-3763 1221  1.24868825794066
-3764 1221 -1.04285678342914
-3765 1221  7.570117512134129
-3766 1221 -6.772078091746644
-3911 1221  1.3927177800326
-3912 1221 -1.125363487049104
-3913 1221  12.16385913519791
-3914 1221 -71.95507332617049
-3915 1221  0.06266636804055448
-3916 1221 -0.04937898970914998
-3917 1221  7.419701498530326
-3918 1221 -2.767625773617935
-3919 1221 -0.01329158934947286
-3920 1221  0.01432224943308115
-3921 1221  18.43213599777828
-3922 1221 -3.444370960504515
-3923 1221  0.1952707801632194
-3924 1221 -0.1578981940773998
-3925 1221  11.51290363653453
-3926 1221 -6.724196771960667
-3927 1221 -1.212084092212188
-3928 1221  1.209186270330734
-3929 1221 -7.485018312352968
-3930 1221  7.574571864439869
-3931 1221  1.095375364582689
-3932 1221 -0.9823491292559811
-3933 1221  7.911773723924099
-3934 1221 -7.11281355093847
-899 1222  0.04377716463426287
-907 1222  0.1093957121598284
-927 1222  0.1769630657333927
-1021 1222 -1.614266736074707
-1022 1222  1.79774599651239e-08
-1091 1222  0.1579025131440057
-1099 1222 -2473.241834471261
-1107 1222 -1364.595019358008
-1127 1222 -2406.24952449994
-1221 1222 -1266.678869343772
-1222 1222  14824.19990916848
-1291 1222 -1973.389699961115
-3183 1222 -1.34894428731261
-3184 1222  1.379371250818873
-3185 1222 -8.08332465039811
-3186 1222  7.890854922569274
-3235 1222  1.364159900297422
-3236 1222 -1.382288592585748
-3237 1222  7.688776076853612
-3238 1222 -8.526509371438566
-3355 1222 -1.317805230846083
-3356 1222  1.420202184002578
-3357 1222 -8.373828345005908
-3358 1222  8.486264343097229
-3911 1222 -0.007859345312601223
-3912 1222  0.01423058830428089
-3913 1222 -10.85847562888647
-3914 1222  64.19748679899583
-3935 1222 -1.271418220001935
-3936 1222  1.227066802011116
-3937 1222 -8.059296167830137
-3938 1222  7.176638732185156
-1021 1223  6.278976277807047e-08
-1023 1223 -5.441864154853704e-07
-1221 1223 -2161.61702284971
-1223 1223  9121.153135578967
-3915 1223 -1.67562254077075
-3916 1223  1.400506034542053
-3917 1223 -31.5516818208411
-3918 1223  11.74797745436025
-1021 1224  2.808174576207634e-08
-1024 1224 -3.728986932571843e-07
-1221 1224 -1633.067169282511
-1224 1224  562513.1605345258
-3919 1224 -1.619002941030767
-3920 1224  1.240374621996592
-3921 1224 -71.14768126953655
-3922 1224  13.28414805225883
-1021 1225 -5.971258681958247e-09
-1025 1225  2.415609212658154e-08
-1221 1225 -1461.603224482606
-1225 1225  1714.853712513708
-3923 1225 -0.4989904160230794
-3924 1225  0.4114850212014874
-3925 1225 -13.59929356823245
-3926 1225  7.942511812214237
-953 1226 -0.01685058071239737
-1000 1226 -0.0187976386654834
-1010 1226  0.03354622133540253
-1026 1226 -4.982309767878612e-09
-1027 1226  0.1585995473755764
-1031 1226 -0.2479349622596107
-1153 1226 -1538.578415888683
-1200 1226 -1452.75486119253
-1210 1226 -4270.428353139534
-1226 1226  5458.23897163396
-1227 1226 -302.1992729528647
-1231 1226 -340.8512513609311
-3527 1226  0.6235636743364441
-3528 1226 -0.638771030322958
-3529 1226  4.601918715157254
-3530 1226 -4.301673841163688
-3807 1226  0.6755139749733998
-3808 1226 -0.6914780544113759
-3809 1226  5.152102189575969
-3810 1226 -4.603043930537255
-3863 1226 -0.6825259342509997
-3864 1226  0.6814150005342681
-3865 1226 -5.174118981866978
-3866 1226  5.125870371838962
-3939 1226  0.4270339140806115
-3940 1226 -0.3710313110194243
-3941 1226  5.935483300800602
-3942 1226 -7.803072635065493
-3943 1226 -0.7846384665529746
-3944 1226  0.755030805637508
-3945 1226 -4.492594197832112
-3946 1226  3.075207046085619
-1005 1227  0.3216355049551655
-1026 1227 -0.3481508996189024
-1027 1227 -1.642452673866757e-09
-1205 1227 -3877.196870864005
-1226 1227 -302.1992729528647
-1227 1227  9053.637102694121
-3823 1227  1.978322116312245
-3824 1227 -2.12995668613235
-3825 1227  11.36265377461624
-3826 1227 -13.01237515753422
-3939 1227  0.5603572465690376
-3940 1227 -0.4748156898844662
-3941 1227 -19.35438219191495
-3942 1227  25.37588114465589
-1004 1228  0.0892421864327288
-1028 1228 -6.325417856167803e-09
-1042 1228 -0.1053486690832382
-1204 1228 -410.1720988650556
-1228 1228  653.794427143524
-1242 1228 -640.7103668234021
-3815 1228 -0.7053415505671318
-3816 1228  0.6542644047347482
-3817 1228 -3.296532554515937
-3818 1228  4.056786931974255
-3947 1228 -0.4953844799756825
-3948 1228  0.4897034551735098
-3949 1228 -2.566147949050465
-3950 1228  2.120683161051391
-907 1229 -0.2266174204579067
-1016 1229  0.4471220830932749
-1029 1229  2.632058404561377e-08
-1107 1229 -4056.761237014
-1216 1229 -4881.530531556509
-1229 1229  16921.24025598121
-3239 1229 -2.191374628381395
-3240 1229  1.902211567258754
-3241 1229 -12.23234957660924
-3242 1229  10.65862220841107
-3887 1229 -1.428392337327512
-3888 1229  1.345690298242458
-3889 1229 -8.947761787238363
-3890 1229  8.913001519049024
-941 1230 -0.5157188483780635
-1015 1230  0.6677127442223171
-1030 1230  3.749403898467563e-08
-1141 1230 -7111.276237605085
-1215 1230 -3532.696627823262
-1230 1230  15135.89972458079
-3443 1230  1.503112217723057
-3444 1230 -1.491993548294496
-3445 1230  11.06068010791183
-3446 1230 -10.17957716286965
-3879 1230  1.396683757074456
-3880 1230 -1.537590286100393
-3881 1230  11.28827516445929
-3882 1230 -12.61596887929604
-1014 1231 -0.004918095735968131
-1021 1231  0.31496286658781
-1026 1231  0.0847976154813143
-1031 1231  4.101254072463867e-09
-1032 1231 -0.5154238049189607
-1033 1231  6.151957693267818e-09
-1034 1231  1.048933193081858e-08
-1054 1231  0.06060825833018181
-1214 1231 -1080.162224615282
-1221 1231 -5850.690085631221
-1226 1231 -340.8512513609311
-1231 1231  5066.347088879259
-1232 1231 -341.4175255677415
-1233 1231 -1185.076996498896
-1234 1231 -1464.937248466328
-1254 1231 -4488.479032872755
-3875 1231 -0.4587975488680787
-3876 1231  0.5502970449270767
-3877 1231 -3.038828905876916
-3878 1231  3.431330450031093
-3927 1231  0.4518610492803866
-3928 1231 -0.4507761853932629
-3929 1231  2.659744674264035
-3930 1231 -2.691626173647931
-3943 1231  0.3255367652745427
-3944 1231 -0.3228669408970674
-3945 1231  4.347828500049935
-3946 1231 -2.991719644070264
-3951 1231  0.2106155001707166
-3952 1231 -0.2015349441592879
-3953 1231  7.458763893376871
-3954 1231 -10.96688731001504
-3955 1231  0.004909842124062877
-3956 1231 -0.003161834423974073
-3957 1231  8.097430154499932
-3958 1231 -1.811688403660364
-3959 1231  0.02074944381764173
-3960 1231 -0.0164243186421904
-3961 1231  4.547623166879417
-3962 1231 -2.052721196214168
-3963 1231  0.532889016796235
-3964 1231 -0.4748422610025065
-3965 1231  3.043863568647225
-3966 1231 -2.787316614264462
-958 1232 -0.03577015867815656
-1016 1232 -0.1931972031217511
-1031 1232  1.10561328577018
-1032 1232  1.011402150918883e-08
-1038 1232 -0.375458076737654
-1084 1232 -0.2753545287150571
-1158 1232 -3393.363501279813
-1216 1232 -1387.552912371535
-1231 1232 -341.4175255677415
-1232 1232  4718.204315834614
-1238 1232 -822.3564996879056
-1284 1232 -876.0754543919668
-3559 1232 -0.5518231235725737
-3560 1232  0.6340347863620098
-3561 1232 -3.512065591551504
-3562 1232  3.863844809715488
-3891 1232  0.8109099469826276
-3892 1232 -0.7551476418861992
-3893 1232  4.304302054670996
-3894 1232 -4.563164317669306
-3951 1232 -0.1400665666639198
-3952 1232  0.1353078380308161
-3953 1232 -13.70528369826246
-3954 1232  20.15001583937854
-3967 1232 -0.7796711087896829
-3968 1232  0.8977654818327304
-3969 1232 -4.689757573113847
-3970 1232  4.893403592122139
-3971 1232 -0.6622307311134192
-3972 1232  0.7022784082231178
-3973 1232 -4.573004681716164
-3974 1232  5.053130885639514
-1031 1233 -9.484864810227478e-09
-1033 1233  1.167101824073313e-07
-1231 1233 -1185.076996498896
-1233 1233  35387322.32806417
-3955 1233 -2.020187774905765
-3956 1233  1.781009244152953
-3957 1233 -157.5425247202729
-3958 1233  35.24928960928063
-1031 1234 -1.84660262358971e-08
-1034 1234  1.662011153147347e-07
-1231 1234 -1464.937248466328
-1234 1234  22763244.29675386
-3959 1234 -1.371053578210635
-3960 1234  1.141090378496645
-3961 1234 -73.86690653394791
-3962 1234  33.34660355498111
-901 1235  0.00410810176178595
-932 1235 -0.003579148430443102
-1035 1235  4.701830855213807e-09
-1101 1235 -652.9015261762572
-1132 1235 -1557.206156923414
-1235 1235  1194.791042810274
-3199 1235  0.7074154071969436
-3200 1235 -0.757770830580615
-3201 1235  4.025719241708225
-3202 1235 -4.406458593664035
-3387 1235  0.5396384156967375
-3388 1235 -0.5734959694043438
-3389 1235  3.329588287063118
-3390 1235 -3.446229419844843
-1036 1236 -6.866298751795341e-09
-1037 1236 -0.2219028422169306
-1093 1236  0.3284627932841515
-1236 1236  13881.85836852269
-1237 1236 -1464.550328926824
-1293 1236 -4853.806147764349
-3975 1236 -1.707969879333584
-3976 1236  1.704175130397323
-3977 1236 -9.799664782253885
-3978 1236  7.820271137733112
-3979 1236 -1.475543576621253
-3980 1236  1.26182715384045
-3981 1236 -9.15833764800569
-3982 1236  8.549665708518704
-957 1237 -0.07867953448799736
-1036 1237  0.1062797331368865
-1037 1237  2.017315570146394e-08
-1157 1237 -1876.369922915853
-1236 1237 -1464.550328926824
-1237 1237  2905.92342536023
-3551 1237  1.098284058681807
-3552 1237 -1.099720672228146
-3553 1237  5.821493748851517
-3554 1237 -6.158831607607141
-3975 1237  0.4947465498372808
-3976 1237 -0.4892572997374934
-3977 1237  4.604525758409499
-3978 1237 -3.682710629785925
-927 1238 -0.09805225877594476
-1032 1238  0.2460567017585993
-1038 1238  1.549589877247737e-08
-1127 1238 -939.8659979062598
-1232 1238 -822.3564996879056
-1238 1238  1425.25603684663
-3359 1238  0.7729194107635493
-3360 1238 -0.8228886130706252
-3361 1238  4.532886615050296
-3362 1238 -5.102978103686564
-3967 1238  0.4765768244277365
-3968 1238 -0.5475735569802812
-3969 1238  3.549100696841748
-3970 1238 -3.704885884667695
-907 1239  0.001389181240069544
-999 1239  0.001400699084717696
-1039 1239  2.965038659041852e-09
-1046 1239  0.002037455724496269
-1091 1239 -0.004453089740284022
-1107 1239 -389.1035774035147
-1199 1239 -834.1124016703941
-1239 1239  604.1375921740112
-1246 1239 -392.349659792455
-1291 1239 -847.0044738143413
-3243 1239  0.329511624503851
-3244 1239 -0.3387833259526468
-3245 1239  2.346704499747039
-3246 1239 -2.422035549841484
-3791 1239  0.3506092217084191
-3792 1239 -0.3612542032138767
-3793 1239  2.196719064593049
-3794 1239 -2.280441915279873
-3983 1239 -0.2561778390712127
-3984 1239  0.2870186143926446
-3985 1239 -1.513498503028144
-3986 1239  1.606821457458256
-3987 1239 -0.3620744060739602
-3988 1239  0.3444837790072527
-3989 1239 -2.259168544657926
-3990 1239  2.163038539238373
-984 1240 -0.1517471607395
-997 1240  0.2907198369401517
-1040 1240  2.41306286619114e-08
-1184 1240 -1909.764605957655
-1197 1240 -2894.512024611905
-1240 1240  12057.99234253917
-3687 1240 -1.964051061571361
-3688 1240  1.753538660217751
-3689 1240 -13.90759343198216
-3690 1240  12.36730783343644
-3767 1240  1.100245831300216
-3768 1240 -1.083749439601529
-3769 1240  8.345685815764492
-3770 1240 -7.619017266862353
-978 1241 -0.07632676179310438
-994 1241  0.04822162698070831
-1041 1241 -3.596899800228925e-08
-1178 1241 -2608.162152478774
-1194 1241 -2784.420804028015
-1241 1241  9211.471231555435
-3651 1241  1.357408579856344
-3652 1241 -1.359305061838616
-3653 1241  7.799677070497177
-3654 1241 -8.367326703808663
-3747 1241  1.314196419979139
-3748 1241 -1.397856747533507
-3749 1241  9.967472353837746
-3750 1241 -9.100375712236817
-1028 1242  0.5302216625716982
-1042 1242  1.273314576000217e-08
-1060 1242 -0.3938694762289616
-1228 1242 -640.7103668234021
-1242 1242  11663.62345738262
-1260 1242 -8130.716010688008
-3947 1242  1.253920472558426
-3948 1242 -1.238917691943269
-3949 1242  11.01433164719148
-3950 1242 -9.126926589105361
-3991 1242  1.629228927232157
-3992 1242 -1.629221333651718
-3993 1242  9.698372940762637
-3994 1242 -10.14050217838622
-1043 1243  1.224810661226705e-08
-1074 1243  0.00970013728400089
-1083 1243 -0.01362937325158806
-1243 1243  3542.477354630666
-1274 1243 -895.8117983584929
-1283 1243 -4835.233940860073
-3995 1243 -1.085914715783544
-3996 1243  1.090787344712805
-3997 1243 -7.064788357696017
-3998 1243  7.264719577018943
-3999 1243  0.6281268734336682
-4000 1243 -0.6265552018341662
-4001 1243  4.240704676139979
-4002 1243 -4.137810875831871
-981 1244  0.1277397341432216
-1044 1244 -3.695717287754974e-09
-1045 1244 -4.7148643572692e-08
-1075 1244 -0.1463500416425706
-1181 1244 -1276.761734036878
-1244 1244  2361.150198652828
-1245 1244 -430.7146894812096
-1275 1244 -869.3960245438557
-3667 1244  1.049384777518011
-3668 1244 -1.053710576606368
-3669 1244  5.781563336973328
-3670 1244 -6.220262112848772
-4003 1244 -0.2687441652242472
-4004 1244  0.1719397258262731
-4005 1244  4.840410468840943
-4006 1244 -4.198187441675281
-4007 1244 -0.8397429008188915
-4008 1244  0.8301209943719979
-4009 1244 -5.094647168144733
-4010 1244  4.665509938312524
-1044 1245  6.84975660636411e-08
-1045 1245 -1.710001454879873e-07
-1244 1245 -430.7146894812096
-1245 1245  1562.296878246622
-4003 1245 -2.64805073698945
-4004 1245  1.697227123155527
-4005 1245 -17.55536468313575
-4006 1245  15.14037838846916
-905 1246  0.004420674229770605
-1039 1246 -0.007983550840506864
-1046 1246  3.557195038528249e-08
-1105 1246 -2530.254860117219
-1239 1246 -392.349659792455
-1246 1246  5400.255554604978
-3227 1246  1.1553111591236
-3228 1246 -1.285241695049613
-3229 1246  7.303265994649156
-3230 1246 -7.893949514842467
-3983 1246  1.438432936083485
-3984 1246 -1.610282307909457
-3985 1246  9.519388148008218
-3986 1246 -10.10772533983988
-912 1247  0.0912522352120306
-985 1247  0.301148117794046
-1047 1247 -1.997244994389469e-08
-1048 1247 -0.3912441653266929
-1049 1247 -1.614056271553865e-06
-1050 1247 -1.207106466394325e-06
-1051 1247 -8.74091834635049e-07
-1052 1247 -8.476012625102669e-07
-1053 1247 -7.2243239568337e-07
-1112 1247 -3490.231801384597
-1185 1247 -28632.51282411447
-1247 1247  248357.3942530839
-1248 1247 -3365.074743920489
-1249 1247 -880.8326491407122
-1250 1247 -5694.342187155742
-1251 1247 -10632.97649536341
-1252 1247 -2531.628223652554
-1253 1247 -2952.395502406746
-3279 1247  1.809931157886173
-3280 1247 -1.609166188869075
-3281 1247  17.72193962278147
-3282 1247 -12.79869678055506
-3699 1247 -2.744556354611642
-3700 1247  3.317198260843825
-3701 1247 -17.82514147813323
-3702 1247  20.87891349987074
-4011 1247  1.530441977284314
-4012 1247 -1.407573526367871
-4013 1247  29.57976624661321
-4014 1247 -53.85667321915488
-4015 1247  0.1166433208217011
-4016 1247 -0.08565703803104169
-4017 1247  20.03181033622185
-4018 1247 -6.579453823854884
-4019 1247 -0.02717925250316533
-4020 1247  0.02664962184511493
-4021 1247  32.27437037187642
-4022 1247 -6.848306051500932
-4023 1247  0.09444032567228004
-4024 1247 -0.07327003201142045
-4025 1247  14.54355524326141
-4026 1247 -5.638102373386884
-4027 1247  0.172119184851368
-4028 1247 -0.1354663985444235
-4029 1247  21.46631831646637
-4030 1247 -9.972291551898609
-4031 1247  0.1888753139595323
-4032 1247 -0.1491602373155923
-4033 1247  20.73321827180935
-4034 1247 -10.04768624738478
-986 1248 -0.07163724688851204
-1047 1248  0.340419502366803
-1048 1248  3.182346067220898e-09
-1083 1248  0.01227067299807493
-1092 1248 -0.01125287282250595
-1186 1248 -3104.47930912567
-1247 1248 -3365.074743920489
-1248 1248  9114.7025913909
-1283 1248 -3642.295564781298
-1292 1248 -2665.923631479373
-3703 1248  0.9482549598362122
-3704 1248 -0.951589473510047
-3705 1248  5.385061985342238
-3706 1248 -5.582795958856527
-4011 1248 -0.06447409590659667
-4012 1248  0.06007445012883673
-4013 1248 -6.123461182640077
-4014 1248  11.14530142529302
-4035 1248  0.6694889657961215
-4036 1248 -0.6039061080741983
-4037 1248  3.860578289092625
-4038 1248 -3.705027669694788
-4039 1248  1.3251531551657
-4040 1248 -1.268742456177846
-4041 1248  7.496982675083278
-4042 1248 -7.554675688566832
-1047 1249  2.580894044174054e-07
-1049 1249 -1.546513956562912e-06
-1247 1249 -880.8326491407122
-1249 1249  966.5008806834494
-4015 1249 -0.873821607012483
-4016 1249  0.6890445982902249
-4017 1249 -19.53524156207414
-4018 1249  6.407256334560128
-1047 1250  5.987070406110462e-07
-1050 1250 -3.449440251657165e-06
-1247 1250 -5694.342187155742
-1250 1250  2022450.820136566
-4019 1250 -1.817279018958227
-4020 1250  1.406918477809915
-4021 1250 -94.3593702933843
-4022 1250  20.01255346091463
-1047 1251  8.733743955269269e-07
-1051 1251 -3.622352877430401e-06
-1247 1251 -10632.97649536341
-1251 1251  60924.04664080437
-4023 1251 -2.226655844234514
-4024 1251  1.833646104710556
-4025 1251 -61.53830667090656
-4026 1251  23.8410264235214
-1047 1252  1.117970844788374e-07
-1052 1252 -5.713398701923111e-07
-1247 1252 -2531.628223652554
-1252 1252  25409.47810388554
-4027 1252 -0.4312061772864732
-4028 1252  0.3531894656037325
-4029 1252 -14.79999463216308
-4030 1252  6.874531929256725
-1047 1253  1.182548258066296e-07
-1053 1253 -5.344005542529118e-07
-1247 1253 -2952.395502406746
-1253 1253  21032.85678965815
-4031 1253 -0.4702289341982735
-4032 1253  0.3847064364584903
-4033 1253 -15.68593782996181
-4034 1253  7.60087658660375
-1021 1254  0.5123209884216365
-1031 1254 -0.3196677665324415
-1054 1254  7.969850475105303e-09
-1055 1254 -0.1671083834788179
-1221 1254 -22128.00563827363
-1231 1254 -4488.479032872755
-1254 1254  36168.60998233932
-1255 1254 -224.613538448271
-3931 1254 -1.129575111871219
-3932 1254  1.013019198664114
-3933 1254 -8.140433666520655
-3934 1254  7.318382790662603
-3963 1254 -1.47406685145989
-3964 1254  1.313534336735618
-3965 1254 -8.8135613453359
-3966 1254  8.070563365347549
-4043 1254  0.5506640854169045
-4044 1254 -0.5106474974579881
-4045 1254  14.65998231301212
-4046 1254 -30.55860129286959
-973 1255 -0.2169860792325076
-1054 1255  0.03272045797426546
-1055 1255  2.11014470497678e-08
-1173 1255 -5655.325062365802
-1254 1255 -224.613538448271
-1255 1255  874.3507943057172
-3643 1255  0.2775953024833963
-3644 1255 -0.3412498653140608
-3645 1255  1.447748389810384
-3646 1255 -1.755833205048359
-4043 1255 -0.04737658212996103
-4044 1255  0.04431965773281515
-4045 1255 -8.348557923958264
-4046 1255  17.39938467274956
-909 1256  0.08752981633481208
-920 1256 -0.002249858771577174
-925 1256 -0.009402178306684716
-964 1256 -0.4280655451173339
-1056 1256 -1.882256489560419e-09
-1109 1256 -1108.54432072992
-1120 1256 -437.5832889362574
-1125 1256 -1959.500873940213
-1164 1256 -7120.652080719003
-1256 1256  6009.021418482626
-3267 1256 -0.832500880855937
-3268 1256  0.8076647819842718
-3269 1256 -5.568800014444435
-3270 1256  5.649414375316669
-3323 1256 -0.9162532415237825
-3324 1256  0.9140951370430707
-3325 1256 -5.934397339882138
-3326 1256  5.830423114127847
-3347 1256  0.5292255309373513
-3348 1256 -0.5321225312697891
-3349 1256  3.257536454796313
-3350 1256 -3.319769019166089
-3615 1256  0.5060573535847106
-3616 1256 -0.5613536361877731
-3617 1256  3.248543549093237
-3618 1256 -3.642181922510498
-937 1257 -0.02032800065588086
-955 1257 -0.0254465288909706
-960 1257  0.08784149961682572
-1057 1257  1.930360118973518e-08
-1137 1257 -2657.776956602856
-1155 1257 -553.1419874124856
-1160 1257 -449.6118328163615
-1257 1257  10242.62462136818
-3415 1257 -1.25500744830349
-3416 1257  1.255737278168746
-3417 1257 -8.635276008329065
-3418 1257  8.485710540012704
-3535 1257  1.582943479566342
-3536 1257 -1.467471123130216
-3537 1257  9.963872683218066
-3538 1257 -9.158675410300438
-3567 1257 -1.262443950678755
-3568 1257  1.35575676912055
-3569 1257 -10.37821929880745
-3570 1257  11.16247281732313
-960 1258 -0.1963524920199438
-1058 1258  6.873820179720269e-08
-1059 1258  5.031070656746905e-07
-1079 1258  0.2566034054509343
-1160 1258 -1094.71109774519
-1258 1258  49743.87056685919
-1259 1258 -7385.057567314883
-1279 1258 -18148.59164458596
-3571 1258 -2.563829187882418
-3572 1258  2.735594657287679
-3573 1258 -17.20403442676472
-3574 1258  17.4862782237909
-4047 1258  0.1232979933573407
-4048 1258 -0.1209319110556626
-4049 1258  12.18565261931853
-4050 1258 -10.49071174200598
-4051 1258 -1.563633429097309
-4052 1258  1.480104939178236
-4053 1258 -10.48222291782284
-4054 1258  9.97517741318884
-1058 1259 -3.722531947414343e-07
-1059 1259  2.632996500162577e-06
-1258 1259 -7385.057567314883
-1259 1259  39002.30945375737
-4047 1259 -0.6452838448164313
-4048 1259  0.632740697185234
-4049 1259 -65.20912110432079
-4050 1259  56.15303779109885
-1015 1260 -0.4065761617929234
-1042 1260  0.4108547346501815
-1060 1260  1.649276359927399e-08
-1215 1260 -2345.108354895173
-1242 1260 -8130.716010688008
-1260 1260  11098.26919607543
-3883 1260  1.465217279636702
-3884 1260 -1.639787184528095
-3885 1260  7.639002702936234
-3886 1260 -9.327145572218971
-3991 1260 -1.327284851091953
-3992 1260  1.326901691271821
-3993 1260 -8.871639857149663
-3994 1260  9.274886402498227
-939 1261  0.1670044571575812
-1009 1261 -0.0715165068859599
-1061 1261  4.807374498483341e-10
-1062 1261  1.003843119740111e-10
-1063 1261  2.761413792073952e-09
-1064 1261  1.715662265033835e-09
-1065 1261  1.207292321669939e-10
-1066 1261  3.51464403414159e-09
-1067 1261  1.771025409658122e-09
-1068 1261  1.926837966959027e-09
-1077 1261 -0.154517529197304
-1139 1261 -12834.35703122456
-1209 1261 -7744.746583549255
-1261 1261  73743.62518869051
-1262 1261 -973.8876219298594
-1263 1261 -1974.959337099038
-1264 1261 -1320.629257453016
-1265 1261 -938.9115804920717
-1266 1261 -438.0730939280807
-1267 1261 -1253.580811045954
-1268 1261 -406.6682599388607
-1277 1261 -12988.36445429952
-3423 1261  2.069258919988845
-3424 1261 -1.73190689006448
-3425 1261  14.65077728050171
-3426 1261 -12.89108953321492
-3843 1261 -1.57915325880766
-3844 1261  2.012113389129591
-3845 1261 -10.80951279223802
-3846 1261  13.19349528576872
-4055 1261  0.8070622115620506
-4056 1261 -0.6614271231410632
-4057 1261  19.56348943490517
-4058 1261 -13.58702526706866
-4059 1261  0.1378648745322591
-4060 1261 -0.1128908565994482
-4061 1261  9.267190079702225
-4062 1261 -6.734223707278317
-4063 1261  0.2889839995395265
-4064 1261 -0.2352278222126487
-4065 1261  11.5414689090377
-4066 1261 -7.978904295943519
-4067 1261  0.5085375863685291
-4068 1261 -0.4120665414866511
-4069 1261  16.53723785781054
-4070 1261 -11.52066789126238
-4071 1261  0.280802632960781
-4072 1261 -0.22803093980903
-4073 1261  13.01235217685689
-4074 1261 -8.975664909834439
-4075 1261  0.2803074902090753
-4076 1261 -0.2279652047020327
-4077 1261  11.6057964531526
-4078 1261 -8.020245176780445
-4079 1261  0.3804015427898206
-4080 1261 -0.3095050463446962
-4081 1261  14.91330785855446
-4082 1261 -10.30293666523228
-4083 1261 -1.261809071898539
-4084 1261  1.52499539890934
-4085 1261 -8.828418052973735
-4086 1261  11.03436685036456
-1061 1262 -2.092441497847375e-11
-1062 1262  3.275554827375515e-11
-1261 1262 -973.8876219298594
-1262 1262  318.5048147514638
-4055 1262 -0.2638765488723211
-4056 1262  0.2162445878661694
-4057 1262 -6.472519984532425
-4058 1262  4.495539572060578
-1061 1263 -1.407063632274941e-09
-1063 1263  1.386103698486352e-08
-1261 1263 -1974.959337099038
-1263 1263  22241827.1668027
-4059 1263 -0.691791395091818
-4060 1263  0.5659529217467251
-4061 1263 -48.49819059668351
-4062 1263  35.27417920269734
-1061 1264 -1.250616929460158e-09
-1064 1264  4.236453410300101e-09
-1261 1264 -1320.629257453016
-1264 1264  3293.999150088965
-4063 1264 -0.7136256126923212
-4064 1264  0.58059959929439
-4065 1264 -29.38500860027402
-4066 1264  20.3242031183579
-1061 1265 -3.967674799820742e-11
-1065 1265  8.890274627582073e-11
-1261 1265 -938.9115804920717
-1265 1265  690.7277721995864
-4067 1265 -0.3738089874614378
-4068 1265  0.3028572253506908
-4069 1265 -12.35162972393133
-4070 1265  8.60586674968607
-1061 1266 -9.292320871101545e-10
-1066 1266  4.277776494143737e-09
-1261 1266 -438.0730939280807
-1266 1266  638.1028771743796
-4071 1266 -0.3417951192656607
-4072 1266  0.2773430210866213
-4073 1266 -16.46702251650682
-4074 1266  11.36739026212329
-1061 1267 -1.273698507775478e-09
-1067 1267  4.457897107013054e-09
-1261 1267 -1253.580811045954
-1267 1267  3197.368019510227
-4075 1267 -0.7056173157039639
-4076 1267  0.5735599734651922
-4077 1267 -30.15492728060605
-4078 1267  20.84931717772058
-1061 1268 -3.714351916317415e-10
-1068 1268  1.412364641906194e-09
-1261 1268 -406.6682599388607
-1268 1268  300.7279047211774
-4079 1268 -0.2788437142254693
-4080 1268  0.2267698773866179
-4081 1268 -11.26470641944774
-4082 1268  7.785831386629021
-1069 1269 -5.449000251012137e-09
-1088 1269 -0.1955389194213142
-1093 1269  0.3738519183151615
-1269 1269  10813.18840331734
-1288 1269 -3477.000248020089
-1293 1269 -2617.344977460001
-4087 1269 -1.4539442060976
-4088 1269  1.446310343605032
-4089 1269 -9.271286129126777
-4090 1269  8.904067016422458
-4091 1269  1.012670283104241
-4092 1269 -1.178077129193787
-4093 1269  7.586503401180923
-4094 1269 -8.118669664395844
-1070 1270 -9.370967335442515e-10
-1078 1270 -0.006868975754008619
-1097 1270  0.007048039668787517
-1270 1270  140.4997717045286
-1278 1270 -311.4682163976653
-1297 1270 -200.6725500667741
-4095 1270  0.2350047561293398
-4096 1270 -0.2627322880432431
-4097 1270  1.557484621286901
-4098 1270 -1.768620278216182
-4099 1270  0.2512854022636909
-4100 1270 -0.2668875724092826
-4101 1270  1.526214952953268
-4102 1270 -1.571445886133012
-1071 1271 -2.899300410863503e-09
-1072 1271 -0.0430300784938554
-1073 1271  0.03877201488615256
-1271 1271  3389.329908355603
-1272 1271 -1896.271883950909
-1273 1271 -775.6585214792789
-4103 1271 -0.760898897674599
-4104 1271  0.7226406195901061
-4105 1271 -4.340970455026474
-4106 1271  4.279036439188589
-4107 1271 -1.255594290221721
-4108 1271  1.25204782308929
-4109 1271 -7.410192007597576
-4110 1271  7.278483379939773
-998 1272  0.3056678326553257
-1071 1272  0.06443650349717406
-1072 1272 -1.591542694856507e-08
-1086 1272 -0.4068214560171308
-1198 1272 -1354.184908615927
-1271 1272 -1896.271883950909
-1272 1272  8920.432007772803
-1286 1272 -1016.223869586874
-3771 1272  1.458551817589764
-3772 1272 -1.584168614837459
-3773 1272  7.706555182452361
-3774 1272 -8.926654332769894
-4103 1272  1.417933728749779
-4104 1272 -1.346294002937515
-4105 1272  6.781290570506568
-4106 1272 -6.686952398469979
-4111 1272 -0.9419881525931177
-4112 1272  0.9057504822207362
-4113 1272 -6.501760444913997
-4114 1272  8.109951937853118
-1009 1273  0.01064840327673766
-1071 1273 -0.01468942424715695
-1073 1273 -2.24825810746232e-10
-1209 1273 -661.5687518245369
-1271 1273 -775.6585214792789
-1273 1273  534.0266415164343
-3847 1273  0.4330918580020012
-3848 1273 -0.4330444227258353
-3849 1273  3.000161281315632
-3850 1273 -3.019182774708455
-4107 1273  0.4234260846924687
-4108 1273 -0.4222691319575352
-4109 1273  2.647109335618053
-4110 1273 -2.600153453324001
-986 1274  0.003935685128377539
-1043 1274 -0.005509351388796824
-1074 1274  3.551176783189725e-09
-1186 1274 -2242.183874822623
-1243 1274 -895.8117983584929
-1274 1274  1430.064587986382
-3707 1274 -0.5956130198385742
-3708 1274  0.5345682106594648
-3709 1274 -3.668557345336211
-3710 1274  3.371880224457544
-3995 1274  0.5173331784807369
-3996 1274 -0.5195702043955498
-3997 1274  3.190434157991982
-3998 1274 -3.280810777838017
-929 1275 -0.08503788873061549
-956 1275  0.05001632315880056
-1044 1275  0.1686051519968662
-1075 1275 -1.330449639347364e-10
-1129 1275 -406.3245095104962
-1156 1275 -1669.250794817422
-1244 1275 -869.3960245438557
-1275 1275  3309.804824869787
-3371 1275 -1.448830170961889
-3372 1275  1.383965071862861
-3373 1275 -8.986270417976883
-3374 1275  8.227311149593612
-3543 1275 -0.6717547568728794
-3544 1275  0.6732602446776976
-3545 1275 -4.45699083051915
-3546 1275  4.519424008698003
-4007 1275  0.8498311284186953
-4008 1275 -0.8390792003026281
-4009 1275  6.16663657582833
-4010 1275 -5.648969220034825
-979 1276 -0.2527625769661221
-1019 1276  0.06809256171239964
-1076 1276 -2.449253198866241e-08
-1077 1276  0.132345738765169
-1179 1276 -2359.550833678316
-1219 1276 -1597.562514947314
-1276 1276  7342.450288708699
-1277 1276 -900.7481929321769
-3659 1276  1.0954096953943
-3660 1276 -1.097736986344627
-3661 1276  7.39116338334737
-3662 1276 -7.424306139552618
-3903 1276  0.7622334325753616
-3904 1276 -0.7961901433494216
-3905 1276  8.663532512230066
-3906 1276 -6.327609477811816
-4115 1276  0.732482060546454
-4116 1276 -0.7185090865811576
-4117 1276  11.68038468730556
-4118 1276 -19.033710166426
-998 1277  0.08491056660704932
-1061 1277  0.2765729375303243
-1076 1277 -0.2432983530399724
-1077 1277 -1.015200618015299e-08
-1198 1277 -2452.381670676807
-1261 1277 -12988.36445429952
-1276 1277 -900.7481929321769
-1277 1277  19492.02732296992
-3775 1277  1.914092476329704
-3776 1277 -2.079388121953743
-3777 1277  10.8116764259082
-3778 1277 -12.54156489623117
-4083 1277  1.218052141141831
-4084 1277 -1.47300996574878
-4085 1277  6.880737915568475
-4086 1277 -8.604012257663832
-4115 1277  0.3546679947849138
-4116 1277 -0.3467933517133751
-4117 1277 -17.00310330783589
-4118 1277  27.66621100756034
-939 1278 -0.1239759113085325
-1070 1278  0.04152432414694895
-1078 1278 -1.724586362605862e-09
-1089 1278  0.2708866518362061
-1097 1278  0.09910991576168865
-1139 1278 -2144.627198362258
-1270 1278 -311.4682163976653
-1278 1278  5895.373801146832
-1289 1278 -226.1606344644552
-1297 1278 -1522.865223630604
-3427 1278  1.112187399499515
-3428 1278 -1.060130544389497
-3429 1278  5.806648304333962
-3430 1278 -5.542727459691219
-4095 1278 -0.892482446316431
-4096 1278  0.9977668333321339
-4097 1278 -6.008458200039911
-4098 1278  6.822965307570624
-4119 1278 -0.7623806137748301
-4120 1278  0.8561953528406251
-4121 1278 -5.293591386916911
-4122 1278  5.949256441509949
-4123 1278 -0.8740833773154867
-4124 1278  0.8912310185466943
-4125 1278 -4.159585723108061
-4126 1278  4.827479834422186
-940 1279  0.06744124888417391
-1058 1279 -0.1647810909296956
-1079 1279  2.904484364130155e-08
-1080 1279 -3.091812144639938e-07
-1081 1279 -2.020746911468585e-08
-1092 1279  0.02154089767802106
-1140 1279 -8495.362797148979
-1258 1279 -18148.59164458596
-1279 1279  28431.72293901326
-1280 1279 -2556.084698611827
-1281 1279 -1071.638609249924
-1292 1279 -3788.030924191581
-3435 1279 -0.9008717883866754
-3436 1279  1.026223447218297
-3437 1279 -5.728897213689582
-3438 1279  6.263257813414933
-4051 1279  0.9598433696475256
-4052 1279 -0.9086990247085501
-4053 1279  6.050637155898348
-4054 1279 -5.758158315413606
-4127 1279 -0.03784001059522953
-4128 1279  0.034701868533437
-4129 1279  8.860625202191928
-4130 1279 -2.549488194921787
-4131 1279  0.1538807196017282
-4132 1279 -0.1216367091902295
-4133 1279  12.77341480514826
-4134 1279 -5.706211648738831
-4135 1279 -1.539612202532529
-4136 1279  1.649614277715424
-4137 1279 -9.369448393175393
-4138 1279  10.36404052218175
-1079 1280  2.892989577618632e-07
-1080 1280 -2.914429613198877e-06
-1279 1280 -2556.084698611827
-1280 1280  24829.19315026917
-4127 1280 -4.420138342044347
-4128 1280  3.515861595090497
-4129 1280 -85.6481884037366
-4130 1280  24.55744025691587
-1079 1281  5.602588400155373e-09
-1081 1281 -2.80225902482556e-08
-1279 1281 -1071.638609249924
-1281 1281  1491.94341957583
-4131 1281 -0.9333140615563935
-4132 1281  0.7715902563305668
-4133 1281 -18.08711888236061
-4134 1281  8.071327482551563
-961 1282  0.06933509766831331
-998 1282 -0.03690628655442891
-1082 1282  3.424985434152816e-10
-1161 1282 -428.7203655478105
-1198 1282 -181.5901337830495
-1282 1282  143.5927519074621
-3579 1282  0.1809697428574308
-3580 1282 -0.1794700371248673
-3581 1282  1.449320632063192
-3582 1282 -1.445634609268258
-3779 1282  0.2587210270240423
-3780 1282 -0.2949996783558586
-3781 1282  1.823381827579377
-3782 1282 -2.035095850329601
-1043 1283  0.03805173458733085
-1048 1283 -0.02865248553472413
-1083 1283  3.583140154028719e-08
-1243 1283 -4835.233940860073
-1248 1283 -3642.295564781298
-1283 1283  27634.07583015538
-3999 1283 -2.060237066109748
-4000 1283  2.055356159329762
-4001 1283 -13.22275915134779
-4002 1283  12.90162353980625
-4035 1283 -1.982804233034109
-4036 1283  1.790720369267351
-4037 1283 -13.25822033623029
-4038 1283  12.72139776360994
-981 1284 -0.2426662636331511
-1007 1284  0.03479583401598968
-1032 1284  0.351545654975462
-1084 1284  5.059544422314488e-09
-1181 1284 -986.9244544448655
-1207 1284 -713.7675168419642
-1232 1284 -876.0754543919668
-1284 1284  3583.170000702486
-3671 1284  0.8548643208929529
-3672 1284 -0.8599114611795811
-3673 1284  6.615573091843956
-3674 1284 -6.162584239401313
-3835 1284 -0.9961364902256777
-3836 1284  1.069122194004589
-3837 1284 -5.607807767179665
-3838 1284  5.611106742482268
-3971 1284  0.7155565720950264
-3972 1284 -0.7605034956051496
-3973 1284  4.486795549329334
-3974 1284 -4.956539581732367
-943 1285 -0.3809398851482149
-1019 1285 -0.1159747915130676
-1085 1285 -3.796000963562918e-08
-1086 1285  0.7958833775240365
-1087 1285 -3.395976211351126e-07
-1143 1285 -3361.290884691284
-1219 1285 -8948.234516972396
-1285 1285  17460.80753327147
-1286 1285 -355.3005757252953
-1287 1285 -6086.481643221267
-3463 1285  1.320047522502771
-3464 1285 -1.327266925701353
-3465 1285  5.912988175484174
-3466 1285 -7.658943042300214
-3907 1285 -1.404708895439669
-3908 1285  1.404553468671611
-3909 1285 -8.259331367595493
-3910 1285  7.340228308969663
-4139 1285  0.8723149865446768
-4140 1285 -0.8836887209187385
-4141 1285  7.224333431575175
-4142 1285 -14.23089451374669
-4143 1285  0.02592331471776193
-4144 1285 -0.01536794211867761
-4145 1285  29.59209543546304
-4146 1285 -5.11537337312014
-987 1286  0.1010252017704575
-994 1286  0.4053180506262709
-1072 1286  0.2078468422749022
-1085 1286 -0.5565646529232413
-1086 1286 -1.977140486920936e-08
-1187 1286 -2971.781291812511
-1194 1286 -1300.878810624146
-1272 1286 -1016.223869586874
-1285 1286 -355.3005757252953
-1286 1286  2699.816287331327
-3731 1286 -0.7243212706529887
-3732 1286  0.686208191891071
-3733 1286 -3.862010681433929
-3734 1286  3.078841940888978
-3751 1286  0.7276690822554982
-3752 1286 -0.7000132451960701
-3753 1286  4.142224845053123
-3754 1286 -4.115085973968236
-4111 1286  0.7647617774871864
-4112 1286 -0.7325004544442668
-4113 1286  2.902282675062765
-4114 1286 -3.63284662768968
-4139 1286  0.1581455441266535
-4140 1286 -0.1537391891864879
-4141 1286 -5.755496466930055
-4142 1286  11.28627080486608
-1085 1287  1.047611239979318e-07
-1087 1287 -6.981968381225556e-07
-1285 1287 -6086.481643221267
-1287 1287  385721884.8626792
-4143 1287 -2.546465438723989
-4144 1287  2.184062692900765
-4145 1287 -61.62793826679011
-4146 1287  10.62505824201603
-972 1288 -0.1110170413597872
-1069 1288  0.1214000777347138
-1088 1288 -2.311546232203199e-10
-1172 1288 -729.7679166195321
-1269 1288 -3477.000248020089
-1288 1288  4282.153118111866
-3619 1288  1.244376425382915
-3620 1288 -1.24310360204769
-3621 1288  7.434019202353014
-3622 1288 -7.327623003108759
-4087 1288  0.7887413865352516
-4088 1288 -0.7843205938603733
-4089 1288  5.391699794599194
-4090 1288 -5.178361384702474
-991 1289  0.1077273281804682
-1078 1289 -0.04556750232906767
-1089 1289 -1.285860098954039e-10
-1191 1289 -568.5385856792361
-1278 1289 -226.1606344644552
-1289 1289  175.3675926617765
-3739 1289  0.2021428644557957
-3740 1289 -0.2240623962201166
-3741 1289  1.349752296503026
-3742 1289 -1.466292598487834
-4119 1289  0.2404117558855258
-4120 1289 -0.2700601958355842
-4121 1289  1.734079556945439
-4122 1289 -1.948837786899108
-929 1290  0.08038780949826715
-1005 1290 -0.1235888174409047
-1011 1290  0.005776489288478071
-1090 1290  1.863167620186346e-09
-1129 1290 -413.568512787767
-1205 1290 -2885.83782801923
-1211 1290 -4191.670580733649
-1290 1290  6601.729492759859
-3375 1290 -1.478717303034038
-3376 1290  1.409679754203769
-3377 1290 -8.636336925830612
-3378 1290  8.40294784283585
-3827 1290 -1.259969479211936
-3828 1290  1.263388549847514
-3829 1290 -7.594160657672793
-3830 1290  7.714039345814099
-3867 1290  0.8608065436983287
-3868 1290 -0.8912069480512377
-3869 1290  5.7352105175062
-3870 1290 -5.617762924949319
-899 1291  0.001908677924063157
-907 1291  0.03029500957792949
-984 1291  0.03333223564354496
-1008 1291  0.01209611832050858
-1022 1291 -0.1615941136931558
-1039 1291  0.01997349105916452
-1091 1291  9.598412509248533e-09
-1099 1291 -2064.592972124319
-1107 1291 -1886.24602540587
-1184 1291 -596.6060378153098
-1208 1291 -887.2704343124278
-1222 1291 -1973.389699961115
-1239 1291 -847.0044738143413
-1291 1291  13284.54847765901
-3187 1291 -0.9510122522007739
-3188 1291  1.008248356955873
-3189 1291 -5.89270443011472
-3190 1291  6.423284179440299
-3247 1291  1.072482363899002
-3248 1291 -1.048942876587671
-3249 1291  6.515090289325624
-3250 1291 -6.443255723032384
-3691 1291  1.168201103632352
-3692 1291 -1.112629226904043
-3693 1291  6.948584822479258
-3694 1291 -6.744836034528775
-3839 1291  1.508116623610983
-3840 1291 -1.284756180926196
-3841 1291  9.461386332231092
-3842 1291 -8.06920223422221
-3935 1291  0.6649208109974206
-3936 1291 -0.6401504275752985
-3937 1291  5.042399262607923
-3938 1291 -4.49133368198552
-3987 1291  1.440632854355911
-3988 1291 -1.37057590441584
-3989 1291  8.78157444783267
-3990 1291 -8.407949717547142
-986 1292  0.0005819686631925447
-1048 1292  0.008367416410551054
-1079 1292 -0.01760425743209124
-1092 1292  2.545271146625616e-09
-1186 1292 -2147.026562228744
-1248 1292 -2665.923631479373
-1279 1292 -3788.030924191581
-1292 1292  3983.405381393538
-3711 1292  0.6074290077697537
-3712 1292 -0.6360979627977104
-3713 1292  3.925788159606755
-3714 1292 -4.052578948064181
-4039 1292 -0.6969060374661296
-4040 1292  0.6674861693273666
-4041 1292 -4.412546562177374
-4042 1292  4.445925686481872
-4135 1292  0.5515734249662468
-4136 1292 -0.5910536703502837
-4137 1292  3.090538086242762
-4138 1292 -3.418869542545371
-980 1293  0.191146221601152
-1036 1293 -0.100220615586992
-1069 1293 -0.1119597125414634
-1093 1293 -1.493529512737457e-08
-1094 1293 -6.98925406994455e-08
-1095 1293 -1.82950870328269e-07
-1180 1293 -2195.222019859931
-1236 1293 -4853.806147764349
-1269 1293 -2617.344977460001
-1293 1293  8749.068783994611
-1294 1293 -164.3747074129485
-1295 1293 -228.5337576424091
-3663 1293  1.054444086425987
-3664 1293 -0.9011439297224629
-3665 1293  7.599039601885398
-3666 1293 -6.282130880745431
-3979 1293  0.7694145957527926
-3980 1293 -0.6573535907506843
-3981 1293  4.132697532463695
-3982 1293 -3.858923807592198
-4091 1293 -0.6342324694119614
-4092 1293  0.7388262639417724
-4093 1293 -4.118301711438476
-4094 1293  4.406401785498837
-4147 1293  0.2993576079332945
-4148 1293 -0.2812552173432513
-4149 1293  11.01771333410158
-4150 1293 -8.516713910804523
-4151 1293  0.06090597788496124
-4152 1293 -0.05346314055039419
-4153 1293  6.743407936044972
-4154 1293 -5.460172754442011
-1093 1294  3.664150378945852e-09
-1094 1294 -1.762221939205499e-08
-1293 1294 -164.3747074129485
-1294 1294  7639.560996461915
-4147 1294 -0.07547758445687719
-4148 1294  0.07089516340263885
-4149 1294 -2.84098517566667
-4150 1294  2.196645621020095
-1093 1295  5.14547137606125e-08
-1095 1295 -6.313957408021764e-07
-1293 1295 -228.5337576424091
-1295 1295  1734538.607648828
-4151 1295 -0.2101993947180658
-4152 1295  0.1844652672016618
-4153 1295 -23.80011488422777
-4154 1295  19.27597045240107
-1006 1296  0.3247141990426317
-1016 1296 -0.191269335220411
-1096 1296  9.930907562960556e-08
-1206 1296 -1447.354453913685
-1216 1296 -7591.989817871691
-1296 1296  31452.29747062553
-3831 1296  2.014211143978124
-3832 1296 -1.999469777299275
-3833 1296  13.54117139429895
-3834 1296 -13.32061162330918
-3895 1296  2.026152750963511
-3896 1296 -2.150639159344168
-3897 1296  13.57026540414182
-3898 1296 -14.33700504327225
-923 1297  0.04698307485545858
-1070 1297 -0.03625623505669459
-1078 1297 -0.08433753082497672
-1097 1297 -5.736181502014048e-09
-1123 1297 -808.5357239496374
-1270 1297 -200.6725500667741
-1278 1297 -1522.865223630604
-1297 1297  3641.527667894987
-3335 1297 -1.067537541383138
-3336 1297  1.068092189082941
-3337 1297 -6.271879177065279
-3338 1297  6.765893252663498
-4099 1297 -0.9272236969416601
-4100 1297  0.98501487730545
-4101 1297 -5.141467784189666
-4102 1297  5.293330560693399
-4123 1297  0.8485062079897245
-4124 1297 -0.8650341355217372
-4125 1297  3.631803890207103
-4126 1297 -4.215505277639843
-942 1298 -0.1518122571549573
-995 1298  0.1249966020337667
-1098 1298  7.298680398948321e-08
-1142 1298 -805.5840716483974
-1195 1298 -2377.009340752339
-1298 1298  21916.56379947439
-3447 1298  2.244902451590447
-3448 1298 -2.239071302275198
-3449 1298  14.90052319731221
-3450 1298 -14.58766608851529
-3755 1298  2.091534388030289
-3756 1298 -2.085292610827683
-3757 1298  12.86354640857851
-3758 1298 -12.63775213409496
-1299 1299  0.0009212144480015666
-2777 1299 -16421.91141900974
-3175 1299  0.3323231709594552
-4155 1299 -0.2412135486919414
-1300 1300  0.004072679508227696
-2775 1300 -116.617845527334
-3179 1300  12.11450772465217
-4157 1300  0.1661135696488067
-1301 1301  0.006325580300835057
-3021 1301 -62.98405706157925
-3183 1301  5.029592871399626
-4159 1301  0.4624605338731521
-1302 1302  0.009068887762870591
-3159 1302 -36.74905919231615
-3187 1302  5.668506884069838
-4161 1302 -0.4947242318500673
-1303 1303  0.001126548430048662
-2781 1303 -17152.4293534602
-3191 1303  0.3692929538115763
-4163 1303 -0.1766165844146627
-1304 1304  0.0006920242127232574
-2887 1304 -1194.040564031954
-3195 1304  4.360425760376224
-4165 1304 -0.3050775516043246
-1305 1305  0.002176793677748951
-2779 1305 -304.5398217309754
-3199 1305  4.242853200747289
-4167 1305 -0.3914193643951301
-1306 1306  0.0008530994861319499
-2785 1306 -15990.46775417401
-3203 1306  0.3412920430536406
-4169 1306 -0.2389835454891805
-1307 1307  0.008969100346799186
-2869 1307 -27.96163286928218
-3207 1307  8.041398985395846
-4171 1307 -0.480083502439464
-1308 1308  0.001294461194310491
-2783 1308 -246.424319230538
-3211 1308  6.55614516809574
-4173 1308 -0.4223099512030265
-1309 1309  0.0009498724832470081
-2789 1309 -16927.29847060978
-3215 1309  0.2531411838153556
-4175 1309 -0.3252342258811831
-1310 1310  0.004708060827964712
-2787 1310 -127.348789895109
-3219 1310  4.739072521175232
-4177 1310 -0.3300938184824252
-1311 1311  0.004351148550575133
-2787 1311 -121.9718452523159
-3223 1311  3.734090074267024
-4179 1311 -0.4908310005655379
-1312 1312  0.003943055987286657
-2787 1312 -116.5170618756044
-3227 1312  5.750116599788575
-4181 1312  0.4025596461382885
-1313 1313  0.0007934834584695918
-2793 1313 -15651.87407075314
-3231 1313  0.2843149864254252
-4183 1313 -0.3680346263601992
-1314 1314  0.005246094254530227
-2791 1314 -53.76846535636626
-3235 1314  7.210736930309491
-4185 1314 -0.4818355431139471
-1315 1315  0.00123992126917307
-3035 1315 -1421.489364558306
-3239 1315  2.12239641260848
-4187 1315 -0.2936091186007417
-1316 1316  0.009039595818823305
-2791 1316 -71.39421959235987
-3243 1316  8.994693294053674
-4189 1316  0.1478598441021074
-1317 1317  0.006531690474418353
-2791 1317 -60.66237248504986
-3247 1317  5.061913797901139
-4191 1317 -0.4722142733900354
-1318 1318  0.001735428750918286
-2797 1318 -19147.60457802069
-3251 1318  0.3142463672992255
-4193 1318 -0.1245359476338054
-1319 1319  0.001055952156612547
-2799 1319 -16807.87112981469
-3255 1319  0.3570471483125166
-4195 1319 -0.1886155730243332
-1320 1320  0.0040209120475161
-2795 1320 -48.65659892814862
-3259 1320  9.205270068346719
-4197 1320 -0.496807801273085
-1321 1321  0.005036551566437221
-2795 1321 -54.80645079899097
-3263 1321  9.624813023079948
-4199 1321  0.382691560057723
-1322 1322  0.003847683823750525
-3089 1322 -100.9659996956966
-3267 1322  5.922623483236039
-4201 1322 -0.3957723556168835
-1323 1323  0.000841250571717003
-2803 1323 -288.936297706273
-3271 1323  6.622390702502909
-4203 1323 -0.5915454173823133
-1324 1324  0.0008578383472785198
-2801 1324 -314.0609448710903
-3275 1324  5.191487889306021
-4205 1324 -0.6320496066149439
-1325 1325  0.001413154992221117
-2801 1325 -417.45542835111
-3279 1325  4.055752454004177
-4207 1325  0.4149441342907414
-1326 1326  0.000882780537773012
-2805 1326 -16211.02934024309
-3283 1326  0.2442413628704792
-4209 1326 -0.3720931546947013
-1327 1327  0.00110154462157207
-2809 1327 -16956.49425532315
-3287 1327  0.5026016922761118
-4211 1327 -0.1474513421173272
-1328 1328  0.001217084582774276
-2811 1328 -17479.24164685459
-3291 1328  0.421945735946015
-4213 1328 -0.150780607320847
-1329 1329  0.001208942347715192
-2813 1329 -17627.34904124655
-3295 1329  0.3915301941686539
-4215 1329 -0.166183616330309
-1330 1330  0.000779202277584279
-2815 1330 -15611.76874269715
-3299 1330  0.5049950972208146
-4217 1330 -0.2132021764710746
-1331 1331  0.0004793400206406562
-2991 1331 -1111.645130144312
-3303 1331  5.283005872263223
-4219 1331  0.3955427940574144
-1332 1332  0.006039682245261113
-2807 1332 -33.11136834639819
-3307 1332  15.68617080585754
-4221 1332  0.3375417510714098
-1333 1333  0.00118182103217571
-2819 1333 -17332.50264229675
-3311 1333  0.3539340117715401
-4223 1333 -0.1696981519351195
-1334 1334  0.001163085689897921
-2821 1334 -17422.49927394747
-3315 1334  0.3171963274323468
-4225 1334 -0.1917131080017181
-1335 1335  0.004096199521818046
-2817 1335 -111.0869138886556
-3319 1335  6.886702449628651
-4227 1335 -0.3349809983213075
-1336 1336  0.003915495812117704
-3089 1336 -102.1576650288912
-3323 1336  9.171935901419351
-4229 1336  0.2366093522986016
-1337 1337  0.001142829721099769
-2825 1337 -17020.79035452148
-3327 1337  0.36929603544498
-4231 1337 -0.1646365262812593
-1338 1338  0.001588745929855299
-2823 1338 -271.7302916287101
-3331 1338  4.703906103995145
-4233 1338 -0.5108679392009977
-1339 1339  0.001427060126464311
-3171 1339 -285.6892959186181
-3335 1339  4.656667378127493
-4235 1339 -0.5198632092236741
-1340 1340  0.001550353210027538
-2829 1340 -19127.32781891363
-3339 1340  0.2262443261916516
-4237 1340 -0.1981115390391876
-1341 1341  0.003811275724011594
-2827 1341 -104.0178710348266
-3343 1341  7.677703978905812
-4239 1341 -0.3571743727843014
-1342 1342  0.005121318497429929
-2827 1342 -120.5778323373125
-3347 1342  8.778768198726569
-4241 1342 -0.1837305798049131
-1343 1343  0.0009378587812624336
-2833 1343 -15783.20681029278
-3351 1343  0.2087528761419341
-4243 1343 -0.4115664959143436
-1344 1344  0.005775450127326082
-3021 1344 -60.13908113984031
-3355 1344  5.321843079599814
-4245 1344  0.4948051560351502
-1345 1345  0.0017937012250687
-2831 1345 -298.6016789907157
-3359 1345  4.558938647755557
-4247 1345 -0.413301806558095
-1346 1346  0.001191863116158495
-2837 1346 -17200.90873720961
-3363 1346  0.4651682718810348
-4249 1346 -0.1377344537960406
-1347 1347  0.0007388559992690033
-2839 1347 -15372.8164617752
-3367 1347  0.4764456266019807
-4251 1347 -0.2309521391793573
-1348 1348  0.001531669494970955
-3127 1348 -264.9193537345231
-3371 1348  6.478855428097081
-4253 1348  0.373558902257503
-1349 1349  0.001266167020288444
-3157 1349 -233.9428403939642
-3375 1349  6.316750882719969
-4255 1349 -0.5029630185937802
-1350 1350  0.0008420525300582919
-2843 1350 -16176.07327682596
-3379 1350  0.3022564890631526
-4257 1350 -0.3005092230155128
-1351 1351  0.002427896906783189
-2841 1351 -325.9882767001909
-3383 1351  2.896696576781846
-4259 1351 -0.4425084873605576
-1352 1352  0.002164171743180618
-2841 1352 -309.9603770265387
-3387 1352  3.97695531013616
-4261 1352  0.3915587655305323
-1353 1353  0.0007649883521880128
-2847 1353 -15319.47843184996
-3391 1353  0.5157826632315755
-4263 1353 -0.2006755550776053
-1354 1354  0.0007051468741409639
-2849 1354 -15342.38909234029
-3395 1354  0.3627133183774772
-4265 1354 -0.3338254063159729
-1355 1355  0.001219792748132925
-2889 1355 -238.4037452088409
-3399 1355  7.085454722674737
-4267 1355  0.4723947763539851
-1356 1356  0.001454930057613886
-2939 1356 -263.4733430658865
-3403 1356  8.850553534693345
-4269 1356  0.2663650847823781
-1357 1357  0.001278376874421266
-2853 1357 -17899.71932992315
-3407 1357  0.2986025758467496
-4271 1357 -0.1866146962681199
-1358 1358  0.00469741256465844
-2851 1358 -369.8949236118411
-3411 1358  7.20471030819053
-4273 1358 -0.08570975586546606
-1359 1359  0.003317459266130739
-3091 1359 -295.1726851067902
-3415 1359  4.811679126595192
-4275 1359  0.224708222751262
-1360 1360  0.002558593316707209
-2855 1360 -285.4713473471323
-3419 1360  3.187196022756801
-4277 1360 -0.3594998341374985
-1361 1361  0.005951706713139653
-2855 1361 -436.1033211741603
-3423 1361  2.361516391014415
-4279 1361 -0.1478613302797105
-1362 1362  0.001644612800042599
-2855 1362 -229.7060573133963
-3427 1362  3.949015286514313
-4281 1362 -0.58313561816606
-1363 1363  0.0009333933720843851
-2861 1363 -1269.292530967632
-3431 1363  3.718703165886996
-4283 1363 -0.2293236201966689
-1364 1364  0.006430911860547507
-3135 1364 -80.63995640005464
-3435 1364  3.873556911241002
-4285 1364  0.42314514820173
-1365 1365  0.0007547011133879957
-2859 1365 -1242.790681293919
-3439 1365  3.057040031444601
-4287 1365 -0.3584357830568553
-1366 1366  0.0008859553416288813
-2859 1366 -1347.578694003212
-3443 1366  2.768986183801089
-4289 1366  0.3532322900264607
-1367 1367  0.0009881120225423083
-2861 1367 -1306.453480190415
-3447 1367  4.104329447502788
-4291 1367  0.1980349828762465
-1368 1368  0.001487119509469413
-2865 1368 -200.3447771713429
-3451 1368  6.140079525850513
-4293 1368 -0.4977177300614612
-1369 1369  0.00473831831522752
-2883 1369 -43.12103915398581
-3455 1369  5.11477659652108
-4295 1369 -0.8291778794111532
-1370 1370  0.004665764046068509
-2977 1370 -55.1131720540257
-3459 1370  5.956658436407072
-4297 1370 -0.5661094051595106
-1371 1371  0.002305146191007362
-2863 1371 -138.6615789467266
-3463 1371  3.940241620211022
-4299 1371 -0.7204052217365209
-1372 1372  0.000868626976203476
-2895 1372 -1285.697515288127
-3467 1372  2.750846739635994
-4301 1372 -0.3615774432500488
-1373 1373  0.00245314466950895
-3017 1373 -354.9809878986568
-3471 1373  4.076318621842777
-4303 1373  0.2921192102024898
-1374 1374  0.001238197354358431
-2867 1374 -1359.412942527259
-3475 1374  2.042467781185541
-4305 1374  0.2741740413061578
-1375 1375  0.001352052470830934
-2867 1375 -1420.161798638148
-3479 1375  1.82335585995627
-4307 1375 -0.2773076465817324
-1376 1376  0.007206573329488603
-2871 1376 -3270.212868561243
-3483 1376  4.657866587415518
-4309 1376  0.01066646559674583
-1377 1377  0.00804911233999905
-2873 1377 -3383.287424261197
-3487 1377  3.930431675339554
-4311 1377  0.01083754926061709
-1378 1378  0.009054992045809201
-2875 1378 -3162.550200229806
-3491 1378  3.761329917621492
-4313 1378  0.01175361905645754
-1379 1379  0.009182548452643763
-2877 1379 -3145.78338095254
-3495 1379  3.433153619622813
-4315 1379  0.01148289706327333
-1380 1380  0.008827942062630214
-2879 1380 -4127.243651965615
-3499 1380  4.033761579760155
-4317 1380  0.008746086931736756
-1381 1381  0.01016666328654509
-2869 1381 -29.79361601042121
-3503 1381  6.379413846576525
-4319 1381  0.533396313689748
-1382 1382  0.00314554681455971
-2881 1382 -332.4374750614756
-3507 1382  2.227180481236951
-4321 1382 -0.4043473535609413
-1383 1383  0.0005973204797509472
-2885 1383 -837.2400970977618
-3511 1383  7.331635289629858
-4323 1383 -0.2764593929515081
-1384 1384  0.001435248761046485
-2935 1384 -373.0009736238315
-3515 1384  5.304835054096878
-4325 1384 -0.3420333412549693
-1385 1385  0.006093662194062648
-2883 1385 -49.71040942894664
-3519 1385  3.60741915585239
-4327 1385 -0.799264511380765
-1386 1386  0.004492944878941051
-2883 1386 -42.92117101237346
-3523 1386  7.425984975242814
-4329 1386  0.6047360650845038
-1387 1387  0.003995523251182433
-2883 1387 -40.28901682798929
-3527 1387  6.982525249864085
-4331 1387  0.7188696788923561
-1388 1388  0.00119543363370181
-2885 1388 -1637.126658242943
-3531 1388  2.017091895439074
-4333 1388  0.3127717018709487
-1389 1389  0.0008583523754602983
-2887 1389 -1330.347020163584
-3535 1389  3.178888388583059
-4335 1389  0.3275939897877578
-1390 1390  0.0008457781934121071
-2963 1390 -1267.301926818796
-3539 1390  2.807087996816875
-4337 1390  0.3539736971000679
-1391 1391  0.002489295349595793
-3127 1391 -338.2005407012112
-3543 1391  6.546784158248459
-4339 1391 -0.2029217905250291
-1392 1392  0.0006496765340230643
-2891 1392 -1177.545535152468
-3547 1392  3.23901762142973
-4341 1392  0.4429448994016109
-1393 1393  0.0008068265997349701
-2891 1393 -1310.740583779899
-3551 1393  2.722688512430139
-4343 1393 -0.3781458794328671
-1394 1394  0.000985141500557223
-2893 1394 -1319.017361047579
-3555 1394  3.132140010189771
-4345 1394  0.2653610804974919
-1395 1395  0.006455405048779167
-3041 1395 -59.68238994762429
-3559 1395  5.363493298492033
-4347 1395  0.4397497774331201
-1396 1396  0.0008956256821566003
-2895 1396 -1313.040134659249
-3563 1396  2.759734100154303
-4349 1396  0.3337399888939511
-1397 1397  0.008210508525845766
-3091 1397 -464.5939628653576
-3567 1397  8.81339838666673
-4351 1397 -0.02735485822257834
-1398 1398  0.003003991364081873
-3093 1398 -222.6965047597604
-3571 1398  4.130734036137791
-4353 1398  0.2999541214461424
-1399 1399  0.001771985501876807
-2899 1399 -1276.699267408946
-3575 1399  2.800615008201704
-4355 1399 -0.1700655800972397
-1400 1400  0.003762700731209146
-2899 1400 -1861.580695310057
-3579 1400  7.984203741216811
-4357 1400  0.01658810146559478
-1401 1401  1.272095576781374e-05
-2903 1401 -8640.649777085511
-3583 1401  0.7834252790859237
-4359 1401  23.38653888958006
-1402 1402  0.004109859119880277
-2907 1402 -1386.423275602331
-3587 1402  0.6061321059687221
-4361 1402  0.3443611707400913
-1403 1403  0.00174213380263186
-2909 1403 -1718.105842836117
-3591 1403  1.924848272360943
-4363 1403  0.2424191842671502
-1404 1404  0.002955443890929092
-2911 1404 -1984.576121024297
-3595 1404  0.9293065890597459
-4365 1404  0.2409008336125803
-1405 1405  0.002046775451173374
-2913 1405 -1772.16179742598
-3599 1405  1.515772943789459
-4367 1405  0.2465099761064946
-1406 1406  0.00307288164617961
-2915 1406 -1941.832300205852
-3603 1406  0.9154558602176557
-4369 1406  0.2338083946556382
-1407 1407  0.003118631919909647
-2917 1407 -1948.610857464787
-3607 1407  0.8856096204701881
-4371 1407  0.2329931159431059
-1408 1408  0.002064849466551203
-2919 1408 -1660.153292834334
-3611 1408  1.437804385943619
-4373 1408  0.2443954748733559
-1409 1409  0.032281480855581
-2905 1409 -17.56919005402462
-3615 1409  5.097464053705828
-4375 1409 -0.2852018326572599
-1410 1410  0.0007323167865500322
-2921 1410 -1160.143762481141
-3619 1410  3.115298956803423
-4377 1410  0.3449705889808716
-1411 1411  0.02978207371484943
-2925 1411 -8636.32260883259
-3623 1411  3.239214549120592
-4379 1411  0.001345572973655427
-1412 1412  0.0412672721801762
-2927 1412 -4186.612833750586
-3627 1412  2.661281271291927
-4381 1412  0.002458604260643332
-1413 1413  0.03353177478347204
-2929 1413 -18149.83069473371
-3631 1413  2.838604475646982
-4383 1413  0.0006418068359919498
-1414 1414  0.02793847595744413
-2931 1414 -16487.63935391007
-3635 1414  3.600111623032867
-4385 1414  0.0006826837434793908
-1415 1415  0.001457965405823138
-2989 1415 -1403.69572640005
-3639 1415  2.120223669016545
-4387 1415 -0.2149865933546265
-1416 1416  0.008610178916267667
-2923 1416 -27.80845944515527
-3643 1416  4.450891698791136
-4389 1416 -0.8943223168280579
-1417 1417  0.0006982215071245835
-2933 1417 -1197.091400538973
-3647 1417  5.382734010251454
-4391 1417  0.2320388001825638
-1418 1418  0.0008328470662532488
-2933 1418 -1307.168528785002
-3651 1418  4.03844885715099
-4393 1418 -0.2673963766163865
-1419 1419  0.0005279492621660577
-2937 1419 -1132.012822917658
-3655 1419  5.435793144188034
-4395 1419 -0.3015732552854993
-1420 1420  0.002114474146017625
-2935 1420 -455.3580237130959
-3659 1420  5.673414870400691
-4397 1420 -0.189153266415218
-1421 1421  0.001273995556104298
-2937 1421 -1554.218213369194
-3663 1421  1.663115651008956
-4399 1421  0.3162746729687368
-1422 1422  0.001914372234934805
-2939 1422 -297.8083722314928
-3667 1422  3.6738422903388
-4401 1422 -0.4954179106036137
-1423 1423  0.00214903785711034
-2939 1423 -318.3099272773828
-3671 1423  4.931596810478233
-4403 1423  0.312090482848181
-1424 1424  0.0006972192640149882
-2941 1424 -1225.44512859976
-3675 1424  2.954431694987449
-4405 1424 -0.420740409235667
-1425 1425  0.001387066813399525
-2943 1425 -1341.319488664768
-3679 1425  3.597870353883553
-4407 1425 -0.1900856271423472
-1426 1426  0.00307995418805288
-2945 1426 -92.63800029624825
-3683 1426  5.141916652259088
-4409 1426 -0.6351218515149885
-1427 1427  0.00232588309964105
-3057 1427 -1792.17746207041
-3687 1427  2.812910582659263
-4411 1427 -0.09233746945236616
-1428 1428  0.003968006746482242
-2945 1428 -105.1073406416851
-3691 1428  6.815584747470943
-4413 1428 -0.3290166805679281
-1429 1429  0.001800493541729985
-2949 1429 -115.8710614082713
-3695 1429  8.598554966475483
-4415 1429 -0.4508575542320269
-1430 1430  0.01294566560248524
-3071 1430 -45.05797616799539
-3699 1430  2.402848964234089
-4417 1430  0.5795528708874975
-1431 1431  0.004438329159053629
-2949 1431 -115.6732086865693
-3703 1431  3.660576083522866
-4419 1431 -0.4501212252666432
-1432 1432  0.001230012908033431
-3125 1432 -1440.733981857976
-3707 1432  1.803796874291739
-4421 1432 -0.3246876875018179
-1433 1433  0.004689294710517987
-2949 1433 -119.2838530078724
-3711 1433  3.693870991143675
-4423 1433  0.3866661651573575
-1434 1434  0.02600204860566276
-2953 1434 -3914.440467798176
-3715 1434  8.262976513892538
-4425 1434  0.001312335288620512
-1435 1435  0.02269136945434388
-2955 1435 -9557.969759456002
-3719 1435  6.75928451641217
-4427 1435  0.0008215588597796699
-1436 1436  0.02398061921832441
-2957 1436 -10416.16918230558
-3723 1436  6.286782905770047
-4429 1436  0.0007450434486385868
-1437 1437  0.005825805095761705
-2951 1437 -71.78900859503443
-3727 1437  4.863234068877579
-4431 1437  0.5235605415859514
-1438 1438  0.003360644493367182
-3149 1438 -112.4245790619285
-3731 1438  3.654645324820387
-4433 1438  0.6177638045468883
-1439 1439  0.002293391027564734
-2961 1439 -1904.378289787439
-3735 1439  2.770038046695757
-4435 1439  0.1199895291469222
-1440 1440  0.004179365275079932
-2959 1440 -133.1916354887076
-3739 1440  8.553881810766296
-4437 1440  0.203001397699786
-1441 1441  0.001694003237061658
-3017 1441 -293.7957542138436
-3743 1441  4.071233084981746
-4439 1441  0.4903758402830745
-1442 1442  0.001601064318780595
-2965 1442 -273.7709840087026
-3747 1442  4.626630643115307
-4441 1442  0.4838079806998937
-1443 1443  0.001946527003138808
-2965 1443 -304.386875196408
-3751 1443  4.819926478975458
-4443 1443 -0.3081198770448175
-1444 1444  0.001102967128701974
-2967 1444 -1367.085437280008
-3755 1444  3.210809158694981
-4445 1444  0.2236474776175843
-1445 1445  0.0006757264712473715
-2971 1445 -1299.463572095045
-3759 1445  3.851674085393307
-4447 1445 -0.3103276689217324
-1446 1446  0.0007622487762262233
-2969 1446 -1935.598728461542
-3763 1446  2.651381876594489
-4449 1446 -0.275851162842649
-1447 1447  0.0009857917802666748
-2971 1447 -1393.724096630088
-3767 1447  2.58440440493602
-4451 1447  0.3221464351873659
-1448 1448  0.003803805871044814
-2973 1448 -51.11619948659765
-3771 1448  6.545719728055657
-4453 1448 -0.6958761831620622
-1449 1449  0.00473626941325111
-2973 1449 -56.91674680233886
-3775 1449  5.387449154306919
-4455 1449 -0.583252118916699
-1450 1450  0.004231559667264044
-2973 1450 -54.13080652643053
-3779 1450  10.56116102730794
-4457 1450  0.3219996994163201
-1451 1451  0.002338269681504373
-2993 1451 -1694.45394947356
-3783 1451  2.192274887117568
-4459 1451 -0.1231955649162646
-1452 1452  0.002785947797894975
-2975 1452 -39.11433892927088
-3787 1452  3.725435059567603
-4461 1452 -2.421691173290549
-1453 1453  0.008550454467021431
-2975 1453 -69.39344058325088
-3791 1453  5.699343322844305
-4463 1453  0.2588732884211721
-1454 1454  0.001748386142823009
-2979 1454 -94.18004615832454
-3795 1454  9.853444154856955
-4465 1454 -0.5465093405911027
-1455 1455  1.444471295879164e-05
-2981 1455 -6186.640086873116
-3799 1455  0.6148168725043865
-4467 1455  31.84061239984158
-1456 1456  9.346594752676875e-06
-2983 1456 -9258.546324954847
-3803 1456  0.733300648789027
-4469 1456  28.84769630748797
-1457 1457  0.003439387923659759
-2977 1457 -45.6431060305684
-3807 1457  5.623959759241418
-4471 1457  0.9316726753588556
-1458 1458  0.002344163243169693
-2979 1458 -95.82820923136651
-3811 1458  5.156990654240387
-4473 1458  0.7823178849555039
-1459 1459  0.0002314920577447962
-3033 1459 -894.1445427962663
-3815 1459  2.02177708473591
-4475 1459 -2.52048241012962
-1460 1460  0.002213111143858121
-2987 1460 -294.7077344824907
-3819 1460  4.338050735598012
-4477 1460 -0.350498698842088
-1461 1461  0.002289548031987322
-2987 1461 -297.7314148715081
-3823 1461  3.336518577462315
-4479 1461 -0.4576521731453182
-1462 1462  0.00275597918269146
-3157 1462 -345.0634879409996
-3827 1462  3.432096244817039
-4481 1462 -0.3132143614225264
-1463 1463  0.001286102412847703
-2989 1463 -1319.761941130769
-3831 1463  5.167687511911667
-4483 1463  0.101747472019995
-1464 1464  0.001488586959398302
-3145 1464 -253.7591971467399
-3835 1464  4.397548178584936
-4485 1464  0.6529986254582271
-1465 1465  0.001449288496860595
-2993 1465 -1333.079786610828
-3839 1465  2.807197757919511
-4487 1465 -0.1988605404170427
-1466 1466  0.03847791458198641
-3099 1466 -14.58657917675677
-3843 1466  6.821596506872711
-4489 1466  0.267724443820664
-1467 1467  0.001331536489217977
-2995 1467 -1277.459248023892
-3847 1467  4.082745801006982
-4491 1467 -0.1320198647588829
-1468 1468  0.001389733691811989
-2999 1468 -113.8659171919261
-3851 1468  12.16370306936955
-4493 1468 -0.4884348221780255
-1469 1469  9.894340445329639e-05
-3001 1469 -3403.375662059264
-3855 1469  0.6922710528461018
-4495 1469  6.393149409211086
-1470 1470  1.482042185147961e-05
-3003 1470 -9244.38449451425
-3859 1470  0.9679618404674936
-4497 1470  13.61387792723724
-1471 1471  0.01230395744748231
-3029 1471 -129.5729114917453
-3863 1471  4.640123233526718
-4499 1471  0.1174995202916021
-1472 1472  0.004115637657561823
-2999 1472 -125.6929813506389
-3867 1472  4.083595669790726
-4501 1472  0.4574647052145012
-1473 1473  0.001865256592979132
-3007 1473 -103.5178172597545
-3871 1473  13.64297835723395
-4503 1473 -0.3400741687238894
-1474 1474  0.00837310147958267
-3039 1474 -27.2211092658337
-3875 1474  7.432083284538689
-4505 1474  0.4358076176675587
-1475 1475  0.0116383104408643
-3007 1475 -193.3276841043456
-3879 1475  9.243250804285458
-4507 1475  0.04586169342645511
-1476 1476  0.003273945598422162
-3007 1476 -102.3339494307426
-3883 1476  6.987236455151689
-4509 1476 -0.4060288904731509
-1477 1477  0.001104639692794528
-3035 1477 -1333.404649458178
-3887 1477  2.39813417884677
-4511 1477 -0.32265098291189
-1478 1478  0.001753935752301071
-3009 1478 -257.0424342781794
-3891 1478  4.872894046756631
-4513 1478 -0.4274289888767369
-1479 1479  0.003463445087067367
-3009 1479 -364.6469827985827
-3895 1479  3.666098639291213
-4515 1479  0.2139280209580445
-1480 1480  0.001273550519844935
-3017 1480 -225.3379392805973
-3899 1480  6.936513618148165
-4517 1480 -0.45896252925564
-1481 1481  0.001470739163878755
-3015 1481 -131.7144595143791
-3903 1481  6.717285078805506
-4519 1481  0.6939032742158369
-1482 1482  0.003741439421903615
-3147 1482 -138.2283842007703
-3907 1482  1.562111955011237
-4521 1482  1.045179501603212
-1483 1483  0.002654955896847113
-3021 1483 -70.31286063739829
-3911 1483  10.83128966436301
-4523 1483 -0.4690349307253373
-1484 1484  0.0008757358038397385
-3023 1484 -1887.480809470994
-3915 1484  2.298164777974027
-4525 1484  0.38440963665635
-1485 1485  0.0009202119884836104
-3025 1485 -1862.905571445791
-3919 1485  2.489086508511099
-4527 1485  0.3504553942057013
-1486 1486  0.0013857649001816
-3027 1486 -2089.757450012758
-3923 1486  1.530982244361096
-4529 1486  0.2927751267701255
-1487 1487  0.008688294155203842
-3019 1487 -34.60754748070792
-3927 1487  2.965391170512979
-4531 1487 -0.9136535932070854
-1488 1488  0.006476974321480879
-3085 1488 -657.2647720227772
-3931 1488  1.739287210125133
-4533 1488  0.1256357268577556
-1489 1489  0.00558084251145403
-3021 1489 -58.90809499914865
-3935 1489  5.818503346690632
-4535 1489  0.484327973434509
-1490 1490  0.0005323635117978055
-3031 1490 -1148.685319889543
-3939 1490  4.145530174201174
-4537 1490 -0.4158218221210877
-1491 1491  0.002410205635913214
-3029 1491 -55.77574949647519
-3943 1491  12.25543807287879
-4539 1491  0.4759321005851245
-1492 1492  0.0003995351735689209
-3033 1492 -1177.236190688291
-3947 1492  4.577395664557206
-4541 1492  0.5137320980598533
-1493 1493  0.003783807165999799
-3041 1493 -81.51090935305866
-3951 1493  7.350593198915067
-4543 1493 -0.3892150799012169
-1494 1494  0.0005240782131166497
-3043 1494 -3724.14417902935
-3955 1494  4.4348660978868
-4545 1494  0.1863477812003617
-1495 1495  0.0006723559311602305
-3045 1495 -4064.969791447917
-3959 1495  3.248940655118019
-4547 1495  0.1745842511613437
-1496 1496  0.00171834700633741
-3085 1496 -337.815695541272
-3963 1496  2.093336656375155
-4549 1496 -0.7186221160452759
-1497 1497  0.004818312918029584
-3041 1497 -51.28046095625373
-3967 1497  6.867530241475136
-4551 1497  0.5203531411457867
-1498 1498  0.007098702112012079
-3041 1498 -62.49294649216096
-3971 1498  10.14178945374154
-4553 1498 -0.1913856351165074
-1499 1499  0.0006074379931324419
-3049 1499 -1119.511295530479
-3975 1499  4.591717254542565
-4555 1499  0.3388396466565379
-1500 1500  0.001049231869003212
-3049 1500 -1470.147478176272
-3979 1500  1.989738181911157
-4557 1500 -0.355429594472465
-1501 1501  0.00336915014670341
-3055 1501 -94.94610178907207
-3983 1501  7.935781265781992
-4559 1501  0.3997908532391482
-1502 1502  0.005379152472555147
-3055 1502 -120.1015515613789
-3987 1502  4.32511889642304
-4561 1502 -0.3428911238355603
-1503 1503  0.001111460898759239
-3097 1503 -1453.267111123336
-3991 1503  2.16132391751468
-4563 1503 -0.3305622720110889
-1504 1504  0.0009756797068563019
-3063 1504 -1281.197517233999
-3995 1504  3.374113559666903
-4565 1504 -0.2436393619121192
-1505 1505  0.001187870243836471
-3143 1505 -1443.098497561728
-3999 1505  2.744133762682808
-4567 1505  0.2453331394023213
-1506 1506  1.434866011484066e-05
-3067 1506 -6333.352783592504
-4003 1506  0.6620369411300263
-4569 1506  29.39451847005743
-1507 1507  0.0014631639637669
-3065 1507 -319.687894463376
-4007 1507  5.561366858668526
-4571 1507  0.3918218595384317
-1508 1508  0.002607346686330265
-3073 1508 -156.9536240366783
-4011 1508  4.367121803999058
-4573 1508 -0.5208327148502887
-1509 1509  0.001436946898833183
-3075 1509 -1386.841317910654
-4015 1509  1.501443481224012
-4575 1509  0.4093034712580556
-1510 1510  0.001347876650566752
-3077 1510 -1652.528923853896
-4019 1510  2.046183757751627
-4577 1510  0.3139432303888887
-1511 1511  0.001490401786239473
-3079 1511 -1759.776539672839
-4023 1511  1.734383100866361
-4579 1511  0.3128830755303908
-1512 1512  0.001655944388231386
-3081 1512 -1762.230597412185
-4027 1512  1.590373655740153
-4581 1512  0.2781023775271403
-1513 1513  0.001701471392212871
-3083 1513 -1778.766172586407
-4031 1513  1.539399172915233
-4583 1513  0.2761967663045794
-1514 1514  0.0009387715985215069
-3143 1514 -1280.586129651733
-4035 1514  3.113918499513999
-4585 1514 -0.2870757766334358
-1515 1515  0.00228033139040549
-3161 1515 -284.5560271729748
-4039 1515  2.227051982652208
-4587 1515 -0.5751371098165369
-1516 1516  0.0004031283152377141
-3087 1516 -1214.20785479098
-4043 1516  5.497686376367542
-4589 1516 -0.3164743577683208
-1517 1517  0.02510763823362299
-3095 1517 -35275.65056794864
-4047 1517  5.920323678467011
-4591 1517  0.0002603335005947787
-1518 1518  0.004787271394550057
-3093 1518 -281.3424527503379
-4051 1518  1.872155078461913
-4593 1518 -0.3821410197975971
-1519 1519  0.02585059027313626
-3101 1519 -15081.1485121053
-4055 1519  5.155957958850873
-4595 1519  0.0005345093331185485
-1520 1520  0.02398509796041077
-3103 1520 -16322.74803150922
-4059 1520  7.218811248728353
-4597 1520  0.0004459490940684492
-1521 1521  0.02317810654772064
-3105 1521 -11661.81866319783
-4063 1521  7.075882839979499
-4599 1521  0.0006404324352507512
-1522 1522  0.02612393526617514
-3107 1522 -12071.07001909268
-4067 1522  5.549718155884937
-4601 1522  0.0006385206646629043
-1523 1523  0.01974036093858521
-3109 1523 -10603.54130606578
-4071 1523  8.987131743426813
-4603 1523  0.000630922008963406
-1524 1524  0.02351370604953601
-3111 1524 -11318.43109210416
-4075 1524  7.073656321403489
-4605 1524  0.0006501703848056444
-1525 1525  0.02263930530886814
-3113 1525 -10195.65103652299
-4079 1525  7.497907634820762
-4607 1525  0.0006559175699886972
-1526 1526  0.0215005867928751
-3099 1526 -10.68753553673826
-4083 1526  5.491936501011876
-4609 1526 -0.8800339580088794
-1527 1527  0.001126683613627308
-3115 1527 -1395.812439078742
-4087 1527  2.763887709407143
-4611 1527  0.2411558997368115
-1528 1528  0.006891497100827161
-3163 1528 -48.07226215433916
-4091 1528  6.465623332056095
-4613 1528  0.4387937415067642
-1529 1529  0.004306155684158316
-3133 1529 -128.0685014752367
-4095 1529  9.920476591021989
-4615 1529 -0.1666873148413968
-1530 1530  0.001278988670577771
-3171 1530 -271.5408599549471
-4099 1530  8.239370225433877
-4617 1530  0.3436678702512194
-1531 1531  0.0005904289455391131
-3119 1531 -1116.146860423355
-4103 1531  3.150909841954586
-4619 1531 -0.5518900249492503
-1532 1532  0.0009194716848954582
-3119 1532 -1395.069189494207
-4107 1532  3.217137788747237
-4621 1532  0.2515624113404345
-1533 1533  0.0009052326830714288
-3121 1533 -239.1182110177501
-4111 1533  5.084689951907726
-4623 1533 -0.8469720785595276
-1534 1534  0.000992266253014921
-3131 1534 -237.9626783531122
-4115 1534  3.945873747631456
-4625 1534 -0.9858356029493499
-1535 1535  0.004024252672441474
-3133 1535 -123.7987667072477
-4119 1535  13.82148848700469
-4627 1535  0.1280409753821415
-1536 1536  0.001630688272806079
-3133 1536 -78.70049630917389
-4123 1536  4.299742862778459
-4629 1536 -1.835179127668628
-1537 1537  0.000767485316979087
-3137 1537 -2751.002445075206
-4127 1537  1.738985765064566
-4631 1537  0.3864229560206465
-1538 1538  0.0009152151847254224
-3139 1538 -2761.360034218368
-4131 1538  1.298069093586603
-4633 1538  0.3750998189126993
-1539 1539  0.005926040074521586
-3135 1539 -77.09008222118388
-4135 1539  3.27848682728167
-4635 1539 -0.516338327826575
-1540 1540  0.001368092087469303
-3149 1540 -131.7196097356496
-4139 1540  8.334156253956104
-4637 1540 -0.5658283514265692
-1541 1541  0.0001242076078107286
-3151 1541 -45335.1954409973
-4143 1541  0.7844661652700029
-4639 1541  0.3695300341581737
-1542 1542  0.03009886865068647
-3165 1542 -21344.46684810789
-4147 1542  5.664236473525226
-4641 1542  0.0002870401850520609
-1543 1543  0.0181917050854742
-3167 1543 -18576.65367398521
-4151 1543  9.38137758123437
-4643 1543  0.0003989272310504771
-1544 1544  0.0016843275657298
-2778 1544 -18425.43685640642
-3177 1544  0.4209469588586773
-4155 1544 -0.09268507579991085
-1545 1545  0.003728230321768011
-2776 1545 -94.70154811534771
-3181 1545  11.80994163900174
-4157 1545  0.2348156967063924
-1546 1546  0.009840549643778531
-3022 1546 -60.9315937244375
-3185 1546  6.13394880971154
-4159 1546  0.2528208987834961
-1547 1547  0.01601930503333173
-3160 1547 -38.42657784269577
-3189 1547  7.015232649381273
-4161 1547 -0.2206266516513799
-1548 1548  0.002065324101967616
-2782 1548 -19263.39666857488
-3193 1548  0.4683919690682322
-4163 1548 -0.06752189118262934
-1549 1549  0.001795814074916934
-2888 1549 -1351.851190781317
-3197 1549  6.264117240664985
-4165 1549 -0.0730286811201015
-1550 1550  0.005292303306486457
-2780 1550 -343.1905811392148
-3201 1550  5.964960897183323
-4167 1550 -0.1030731399156117
-1551 1551  0.001558801796769806
-2786 1551 -17954.06452194431
-3205 1551  0.4321994354326509
-4169 1551 -0.09183437174715321
-1552 1552  0.009429398379399266
-2870 1552 -71.60785037080608
-3209 1552  7.857253594901273
-4171 1552  0.1604723692757136
-1553 1553  0.003074581210011338
-2784 1553 -287.1737937003262
-3213 1553  8.881186521865489
-4173 1553  0.1140677401031672
-1554 1554  0.001734639619015351
-2790 1554 -18984.56010772376
-3217 1554  0.3202584181695263
-4175 1554 -0.1253361300551693
-1555 1555  0.007663393581365427
-2788 1555 -123.8024931489951
-3221 1555  5.724533092062521
-4177 1555 -0.1765502579767677
-1556 1556  0.009368261214996339
-2788 1556 -136.3741855526524
-3225 1556  4.994979193430934
-4179 1556 -0.154677890064579
-1557 1557  0.007699130189734118
-2788 1557 -124.0620405337453
-3229 1557  7.594103734758398
-4181 1557  0.1487428464011694
-1558 1558  0.001451216791368765
-2794 1558 -17562.01775281531
-3233 1558  0.3591198466383587
-4183 1558 -0.1418061655056151
-1559 1559  0.00920708556558796
-2792 1559 -52.95894194050124
-3237 1559  8.661644082850422
-4185 1559 -0.2388406309706918
-1560 1560  0.004834243485814674
-3036 1560 -1970.719501164815
-3241 1560  3.613689390608025
-4187 1560 -0.03178578024713478
-1561 1561  0.00804661042634007
-2792 1561 -50.07992223279546
-3245 1561  8.659934351403086
-4189 1561  0.256909813210024
-1562 1562  0.01708826510242087
-2792 1562 -72.94981475825553
-3249 1562  7.356150780840059
-4191 1562 -0.1046018218296087
-1563 1563  0.003093469244700799
-2798 1563 -21348.38058440163
-3253 1563  0.3942430026751412
-4193 1563 -0.04988201584301125
-1564 1564  0.001925016235561819
-2800 1564 -18855.823309453
-3257 1564  0.4518359008190032
-4195 1564 -0.07276217302329462
-1565 1565  0.008371082559818523
-2796 1565 -52.52014968113491
-3261 1565  11.81516875226901
-4197 1565  0.1763884106301378
-1566 1566  0.01023391253227582
-2796 1566 -58.44422004546879
-3265 1566  13.00730635191752
-4199 1566 -0.1329182782584699
-1567 1567  0.006579407115653415
-3090 1567 -115.1682425790467
-3269 1567  7.134515633503723
-4201 1567  0.1694463566618233
-1568 1568  0.003421623294652123
-2804 1568 -403.3712790108879
-3273 1568  8.632966165362591
-4203 1568 -0.08253448028257152
-1569 1569  0.00326881651484232
-2802 1569 -407.8543128053418
-3277 1569  7.480402966450393
-4205 1569  0.09175946688769955
-1570 1570  0.002979644794530615
-2802 1570 -403.2701195876705
-3281 1570  6.053226700604847
-4207 1570 -0.1391230222463695
-1571 1571  0.001612416603821292
-2806 1571 -18200.17011809114
-3285 1571  0.3086859033408764
-4209 1571 -0.1433588781709148
-1572 1572  0.002021535306013392
-2810 1572 -19050.40909074716
-3289 1572  0.6377435166524491
-4211 1572 -0.05626831364793678
-1573 1573  0.002219061379516586
-2812 1573 -19602.52751313596
-3293 1573  0.5340480266846045
-4213 1573 -0.05817082924203715
-1574 1574  0.002213244026450304
-2814 1574 -19790.86619941803
-3297 1574  0.496336357506843
-4215 1574 -0.06367574453596507
-1575 1575  0.001424435274536764
-2816 1575 -17520.9066424914
-3301 1575  0.6396465178955674
-4217 1575 -0.0819141354576636
-1576 1576  0.00190641019701785
-2992 1576 -1314.488490707493
-3305 1576  9.289251639392099
-4219 1576 -0.04831816621937788
-1577 1577  0.01024866648551087
-2808 1577 -30.88445273847646
-3309 1577  19.59521280325052
-4221 1577 -0.1760787931148337
-1578 1578  0.00216159793600779
-2820 1578 -19459.2097238253
-3313 1578  0.448509772635577
-4223 1578 -0.06510757825207561
-1579 1579  0.002128580341413205
-2822 1579 -19563.40178710883
-3317 1579  0.4020378235142212
-4225 1579 -0.07348349316451037
-1580 1580  0.01517969877264569
-2818 1580 -153.3239932480942
-3321 1580  11.49160779515473
-4227 1580  0.03944668306923331
-1581 1581  0.006073374848498163
-3090 1581 -110.9830147806093
-3325 1581  10.94559077853698
-4229 1581  0.1182819046044615
-1582 1582  0.002072440732307426
-2826 1582 -19072.9824263394
-3329 1582  0.4664105082637809
-4231 1582 -0.06404854821625307
-1583 1583  0.005038100808173289
-2824 1583 -327.8415125728117
-3333 1583  7.204897543934027
-4233 1583 -0.0885781928998835
-1584 1584  0.005558785326437868
-3172 1584 -303.8666561211577
-3337 1584  7.745248535399567
-4235 1584 -0.07822635220456177
-1585 1585  0.002818239303591395
-2830 1585 -21438.05305311238
-3341 1585  0.2859810800059127
-4237 1585 -0.07680746283325847
-1586 1586  0.01299345028768497
-2828 1586 -131.6376658199634
-3345 1586  12.35150271313844
-4239 1586  0.05216856069111805
-1587 1587  0.007694156187287882
-2828 1587 -101.2983170663943
-3349 1587  10.32704712479219
-4241 1587 -0.1284297479768136
-1588 1588  0.00171426589027294
-2834 1588 -17706.80708692556
-3353 1588  0.2638545151755176
-4243 1588 -0.1585763878680793
-1589 1589  0.009795864941168062
-3022 1589 -60.74879639706707
-3357 1589  6.747469381550355
-4245 1589  0.2278264743594363
-1590 1590  0.004016334441257254
-2832 1590 -340.9877642962214
-3361 1590  6.103161381610501
-4247 1590 -0.1220911632709661
-1591 1591  0.00214370521255672
-2838 1591 -19229.80704563983
-3365 1591  0.5856260944274582
-4249 1591 -0.05432974549764544
-1592 1592  0.001347021732686253
-2840 1592 -17250.21196149158
-3369 1592  0.602732034666853
-4251 1592 -0.08909672224847444
-1593 1593  0.003605337840406464
-3128 1593 -307.2139916977106
-3373 1593  9.231892983713553
-4253 1593  0.09662035202600507
-1594 1594  0.006022698821091823
-3158 1594 -349.4178227246429
-3377 1594  11.53750339202983
-4255 1594 -0.03874066375731782
-1595 1595  0.001537593431467799
-2844 1595 -18148.71862379513
-3381 1595  0.3823095244806657
-4257 1595 -0.1157986402063187
-1596 1596  0.005045580372450799
-2842 1596 -350.8749049521504
-3385 1596  3.759419384143553
-4259 1596 -0.1550385259319598
-1597 1597  0.004936492740348092
-2842 1597 -349.525972853892
-3389 1597  5.568673568414256
-4261 1597  0.1097305804557451
-1598 1598  0.001394539743204105
-2848 1598 -17188.33141996008
-3393 1598  0.652651776418919
-4263 1598 -0.07741322426033026
-1599 1599  0.001287567811453098
-2850 1599 -17221.86948891564
-3397 1599  0.4585964515198062
-4265 1599 -0.1286239024388166
-1600 1600  0.003818472653616001
-2890 1600 -307.265267842773
-3401 1600  11.40560520752418
-4267 1600  0.07282084580241761
-1601 1601  0.002035811224188586
-2940 1601 -256.0588790788852
-3405 1601  10.32999844507644
-4269 1601  0.1699263311417943
-1602 1602  0.002336372415399436
-2854 1602 -20083.61864669313
-3409 1602  0.3782636212847815
-4271 1602 -0.07172764858434916
-1603 1603  0.00245215307332014
-2852 1603 -262.4471277152124
-3413 1603  5.593819250509369
-4273 1603  0.3050576835026495
-1604 1604  0.003331066019807603
-3092 1604 -315.137283850678
-3417 1604  4.846962465184034
-4275 1604  0.2068334057548165
-1605 1605  0.004746941745649515
-2856 1605 -298.0864078722183
-3421 1605  4.014843773669729
-4277 1605  0.1495820406715405
-1606 1606  0.004352281673098233
-2856 1606 -285.8909834401574
-3425 1606  2.100726724748749
-4279 1606 -0.3612348953316888
-1607 1607  0.008862541129188691
-2856 1607 -408.7832335639023
-3429 1607  7.554236674680917
-4281 1607  0.03126449269119468
-1608 1608  0.001681916425607402
-2862 1608 -1375.862662839072
-3433 1608  4.625084390822101
-4283 1608 -0.09511283394769576
-1609 1609  0.01197021890676733
-3136 1609 -80.2193526426998
-3437 1609  5.058522467030047
-4285 1609  0.1765863442776434
-1610 1610  0.001821444699037764
-2860 1610 -1439.569994942649
-3441 1610  4.189017705158125
-4287 1610  0.09444763078437719
-1611 1611  0.001909042723318199
-2860 1611 -1474.926377146568
-3445 1611  3.912969993275015
-4289 1611 -0.1063494845711242
-1612 1612  0.001712405942532441
-2862 1612 -1388.793683741341
-3449 1612  5.140739895609729
-4291 1612  0.08633500225011058
-1613 1613  0.004489168449797369
-2866 1613 -252.2885237459685
-3453 1613  7.82585977033871
-4293 1613 -0.1054013687030942
-1614 1614  0.01633539313650934
-2884 1614 -50.47971099857919
-3457 1614  7.95121257275057
-4295 1614 -0.1371328219183342
-1615 1615  0.01260242986980742
-2978 1615 -49.91127100320831
-3461 1615  8.593585019339528
-4297 1615 -0.1696769505008175
-1616 1616  0.006681965695475232
-2864 1616 -157.131131008579
-3465 1616  5.351680386731817
-4299 1616 -0.1678465865963971
-1617 1617  0.002082161476505394
-2896 1617 -1402.181171260665
-3469 1617  3.775614735741489
-4301 1617 -0.1021595851058913
-1618 1618  0.004122707179401047
-3018 1618 -335.793574863611
-3473 1618  5.083916573183807
-4303 1618  0.1498140723675607
-1619 1619  0.00239611479019984
-2868 1619 -1483.228901833766
-3477 1619  2.675607339345396
-4305 1619  0.09966399264609031
-1620 1620  0.002630837634752338
-2868 1620 -1553.767313963687
-3481 1620  2.343864161786165
-4307 1620 -0.1021338181094035
-1621 1621  0.0004298383031319623
-2872 1621 -2032.294944522458
-3485 1621  1.543087745431399
-4309 1621  1.027405058684619
-1622 1622  0.0004721276620525438
-2874 1622 -2161.517370916221
-3489 1622  1.294093298886285
-4311 1622  1.035430338244845
-1623 1623  0.0005404729617105076
-2876 1623 -1965.171276998922
-3493 1623  1.245820405796252
-4313 1623  1.132573885359342
-1624 1624  0.0005385186595112105
-2878 1624 -2011.325553409659
-3497 1624  1.130461556720701
-4315 1624  1.096989498294941
-1625 1625  0.000738108085585141
-2880 1625 -947.8080133550635
-3501 1625  1.545695419693732
-4317 1625  1.576501168390627
-1626 1626  0.01181045238790955
-2870 1626 -80.2043307459878
-3505 1626  6.784715588521387
-4319 1626  0.1398090615522035
-1627 1627  0.006341097449414356
-2882 1627 -358.1256606139722
-3509 1627  2.87720735177041
-4321 1627 -0.1463628716658473
-1628 1628  0.001709881200915327
-2886 1628 -946.4993922263754
-3513 1628  10.64580025395756
-4323 1628 -0.05970599140078857
-1629 1629  0.002501831916591455
-2936 1629 -410.9259659918831
-3517 1629  6.323546734518151
-4325 1629  0.152339017646971
-1630 1630  0.02166473185771623
-2884 1630 -59.09622972922144
-3521 1630  5.851730685056556
-4327 1630  0.120393801145661
-1631 1631  0.01171187118567339
-2884 1631 -43.69121511045876
-3525 1631  11.33592184861562
-4329 1631  0.1544468567080951
-1632 1632  0.01268609052228339
-2884 1632 -45.26251731254106
-3529 1632  11.50083919055273
-4331 1632  0.1258547872274984
-1633 1633  0.00438402189092121
-2886 1633 -2094.821178932827
-3533 1633  3.367757951666376
-4333 1633  0.04000224600538602
-1634 1634  0.00250281237797008
-2888 1634 -1596.562954523447
-3537 1634  4.887036067400858
-4335 1634  0.06114217742781798
-1635 1635  0.001891043522858185
-2964 1635 -1420.43303749647
-3541 1635  3.977636161674676
-4337 1635  0.09996941509051212
-1636 1636  0.003667040914980837
-3128 1636 -310.2635904187793
-3545 1636  7.602458701266929
-4339 1636 -0.1322602591418904
-1637 1637  0.002717922628890469
-2892 1637 -1555.582081433006
-3549 1637  5.798807440562412
-4341 1637  0.04478328549069006
-1638 1638  0.002499509756808013
-2892 1638 -1490.044806601116
-3553 1638  4.15614996725906
-4343 1638 -0.07136785085690645
-1639 1639  0.001847764096351857
-2894 1639 -1428.509188469133
-3557 1639  4.081677144998319
-4345 1639  0.1007116905315015
-1640 1640  0.01168187326594283
-3042 1640 -66.88919874628864
-3561 1640  6.902063277362459
-4347 1640  0.1692123750855458
-1641 1641  0.002801715936584132
-2896 1641 -1635.880446215101
-3565 1641  4.35180763337338
-4349 1641 -0.05438327802565162
-1642 1642  0.001563851446999221
-3092 1642 -216.0334906512935
-3569 1642  4.640556920095166
-4351 1642 -0.6125464046789606
-1643 1643  0.003038343659137196
-3094 1643 -330.3011979043866
-3573 1643  4.196712062065106
-4353 1643  0.1882841243361535
-1644 1644  0.001455820779834379
-2900 1644 -1582.59139337238
-3577 1644  2.551059225694364
-4355 1644 -0.1795099761298055
-1645 1645  0.0004996836381066346
-2900 1645 -927.7605629713167
-3581 1645  3.626585505548464
-4357 1645  0.5692601762921932
-1646 1646  0.03290590405224753
-2904 1646 -56173.00367620957
-3585 1646  19.13304546764732
-4359 1646 -4.528166521929149e-05
-1647 1647  0.006605582704795889
-2908 1647 -1798.962366958954
-3589 1647  0.7415300425733925
-4361 1647  0.1584805113003082
-1648 1648  0.002293010736883277
-2910 1648 -1602.154579037757
-3593 1648  2.537886768231079
-4363 1648  0.1789331829732354
-1649 1649  0.003609759091074866
-2912 1649 -1805.387244919989
-3597 1649  1.052700930966815
-4365 1649  0.2318901716680636
-1650 1650  0.0027895765195812
-2914 1650 -1672.560616497262
-3601 1650  1.886875011526777
-4367 1650  0.18507291152769
-1651 1651  0.003469423590966108
-2916 1651 -1736.681418547389
-3605 1651  1.004492569171269
-4369 1651  0.2559294137774343
-1652 1652  0.00353096399715718
-2918 1652 -1744.38334675991
-3609 1652  0.9675344147546086
-4371 1652  0.2553116682026549
-1653 1653  0.002871912243801552
-2920 1653 -1578.337187890964
-3613 1653  1.733088886821635
-4373 1653  0.1850395098356168
-1654 1654  0.02950709946407396
-2906 1654 -14.86291565563162
-3617 1654  4.909697213749066
-4375 1654  0.390540098759081
-1655 1655  0.003062114712856479
-2922 1655 -1618.963465864502
-3621 1655  5.489980636102064
-4377 1655  0.03342586460999813
-1656 1656  0.000815905245810513
-2926 1656 -750.1977888434145
-3625 1656  0.8107453118787733
-4379 1656  3.224514182063999
-1657 1657  0.00148698317973046
-2928 1657 -454.349588508947
-3629 1657  0.7394238804556282
-4381 1657  3.182000960903725
-1658 1658  0.0005479008995914197
-2930 1658 -937.8974042315033
-3633 1658  0.5836917319355491
-4383 1658  5.515960845011515
-1659 1659  0.0005016515610365206
-2932 1659 -889.0967633151976
-3637 1659  0.7675634623859809
-4385 1659  4.920359358439487
-1660 1660  0.001943253195808534
-2990 1660 -1547.059702964963
-3641 1660  2.338015588056324
-4387 1660 -0.1327293340165539
-1661 1661  0.0217854046428508
-2924 1661 -135.998061396762
-3645 1661  6.066391449410494
-4389 1661  0.04435814071805195
-1662 1662  0.001220074423809261
-2934 1662 -1255.707173660711
-3649 1662  6.930754193190324
-4391 1662 -0.09875202978820967
-1663 1663  0.001670158315822575
-2934 1663 -1468.90019029828
-3653 1663  5.126383130506217
-4393 1663  0.09430040141542814
-1664 1664  0.001026048097861809
-2938 1664 -1293.847311903297
-3657 1664  4.929097726466118
-4395 1664  0.1553884953900216
-1665 1665  0.003093765508925175
-2936 1665 -459.6038371669388
-3661 1665  6.562151811292857
-4397 1665  0.1129564356289893
-1666 1666  0.001873985837347229
-2938 1666 -1545.450519903304
-3665 1666  1.990410506883116
-4399 1666 -0.1802199613520399
-1667 1667  0.004321862642805954
-2940 1667 -367.6331396885809
-3669 1667  4.866384250937969
-4401 1667  0.1349748586012675
-1668 1668  0.002798788342743851
-2940 1668 -298.4479790314356
-3673 1668  5.688689200828974
-4403 1668 -0.224373509793738
-1669 1669  0.002086401274569133
-2942 1669 -1495.777176495769
-3677 1669  4.405147664394409
-4405 1669  0.07787860580799441
-1670 1670  0.001665432063801794
-2944 1670 -1533.793391362161
-3681 1670  3.834698064428319
-4407 1670  0.1290791462085817
-1671 1671  0.01361131500452684
-2946 1671 -153.1942449175083
-3685 1671  9.122041227009294
-4409 1671 -0.04844675456049608
-1672 1672  0.00172948834346845
-3058 1672 -1452.412581790726
-3689 1672  2.509630416919152
-4411 1672  0.1745420007093612
-1673 1673  0.006559135675281503
-2946 1673 -106.302999456909
-3693 1673  8.245690219381995
-4413 1673 -0.1656201138879724
-1674 1674  0.004500768246033835
-2950 1674 -133.2986622760959
-3697 1674  9.351725708391308
-4415 1674 -0.1505442317369579
-1675 1675  0.02663845477624546
-3072 1675 -48.83113539159432
-3701 1675  3.241458825572762
-4417 1675  0.1922551101717189
-1676 1676  0.009211625796665643
-2950 1676 -121.2535428786183
-3705 1676  4.813024972990116
-4419 1676 -0.1599443116995181
-1677 1677  0.002286487780322626
-3126 1677 -1567.10699891207
-3709 1677  2.274731948814601
-4421 1677 -0.1283198998011498
-1678 1678  0.00992004440023208
-2950 1678 -126.2373785289777
-3713 1678  5.000900261527375
-4423 1678 -0.1292478872971707
-1679 1679  0.0003644921698466457
-2954 1679 -457.8930633249275
-3717 1679  1.564794215233169
-4425 1679  5.802495030097443
-1680 1680  0.0001662894242141198
-2956 1680 -2119.625191287763
-3721 1680  0.9886929177097209
-4427 1680  4.348249404183824
-1681 1681  0.0001583237854652538
-2958 1681 -2232.468832444119
-3725 1681  0.8832624199164085
-4429 1681  4.722524623428282
-1682 1682  0.007684351982983249
-2952 1682 -177.6359650918144
-3729 1682  5.491941738533162
-4431 1682  0.1268899830601398
-1683 1683  0.007612917736247711
-3150 1683 -108.0991094828331
-3733 1683  5.548711987030634
-4433 1683  0.1907246660453396
-1684 1684  0.001386532732486353
-2962 1684 -1488.996464674805
-3737 1684  2.380705119026724
-4435 1684  0.3584223063522697
-1685 1685  0.004602035072414893
-2960 1685 -119.0962751518446
-3741 1685  9.011775441080573
-4437 1685  0.1990042899232015
-1686 1686  0.004695388864328182
-3018 1686 -356.9125375460848
-3745 1686  6.32479519279416
-4439 1686  0.09363089415499334
-1687 1687  0.004028977172689097
-2966 1687 -303.9341121893127
-3749 1687  7.139596631140285
-4441 1687  0.1130336965210424
-1688 1688  0.003902058440420307
-2966 1688 -301.6071311890342
-3753 1688  6.252292571035978
-4443 1688 -0.1225474573200583
-1689 1689  0.002196344218812255
-2968 1689 -1507.145374398769
-3757 1689  4.240340181312149
-4445 1689  0.07753429754535195
-1690 1690  0.001263791063289122
-2972 1690 -1443.035879728521
-3761 1690  3.947328445532761
-4447 1690 -0.1497184219294691
-1691 1691  0.001276910015239077
-2970 1691 -2094.452122116551
-3765 1691  3.144054024261908
-4449 1691 -0.1301712404853408
-1692 1692  0.001592466556747902
-2972 1692 -1438.401804291923
-3769 1692  3.243567001900533
-4451 1692  0.1537460316411932
-1693 1693  0.01402979964901917
-2974 1693 -68.68887955759087
-3773 1693  10.48923930011014
-4453 1693  0.08890596217980609
-1694 1694  0.01275460432334989
-2974 1694 -65.35317307855226
-3777 1694  7.717735001723169
-4455 1694 -0.1346845367963926
-1695 1695  0.007373498838497469
-2974 1695 -49.99682031538818
-3781 1695  13.44241674595665
-4457 1695 -0.1618338407629296
-1696 1696  0.002053808266667994
-2994 1696 -1526.042604103454
-3785 1696  2.085017424193
-4459 1696  0.1654175795499135
-1697 1697  0.05182020660590447
-2976 1697 -102.5526269566786
-3789 1697  11.29589550667638
-4461 1697 -0.01614866339436597
-1698 1698  0.01305142121535185
-2976 1698 -52.11952350302194
-3793 1698  6.796790398187187
-4463 1698 -0.200025621410963
-1699 1699  0.006862647688623929
-2980 1699 -106.6011780216583
-3797 1699  14.67920176970155
-4465 1699 -0.08653414206433409
-1700 1700  0.04089952789786202
-2982 1700 -43002.51911591569
-3801 1700  16.92007433822707
-4467 1700 -4.690062449741391e-05
-1701 1701  0.03197455500486216
-2984 1701 -64484.86655730321
-3805 1701  20.4618479887596
-4469 1701 -3.429273113850615e-05
-1702 1702  0.01461555143963207
-2978 1702 -51.84652767986045
-3809 1702  10.57287611312878
-4471 1702  0.10597916780677
-1703 1703  0.0109523653049153
-2980 1703 -118.3393931588882
-3813 1703  9.881649534957022
-4473 1703  0.07211115026646071
-1704 1704  0.01271446421827436
-3034 1704 -2616.969987642703
-3817 1704  9.357580306279953
-4475 1704 -0.003326018368451452
-1705 1705  0.003818712751264618
-2988 1705 -297.709055530353
-3821 1705  5.268104839160443
-4477 1705 -0.1685104373219622
-1706 1706  0.004809088667099274
-2988 1706 -331.8369949896509
-3825 1706  4.345881026636466
-4479 1706 -0.1519770413677799
-1707 1707  0.005752066822765839
-3158 1707 -341.395838786864
-3829 1707  4.577246699593937
-4481 1707 -0.1163894569624065
-1708 1708  0.001036778478523589
-2990 1708 -1131.21152477239
-3833 1708  4.778575385576687
-4483 1708  0.1612480981858468
-1709 1709  0.006393311962360625
-3146 1709 -430.325567156862
-3837 1709  7.935790725976097
-4485 1709  0.04867805021410753
-1710 1710  0.001700127961389886
-2994 1710 -1387.470519821244
-3841 1710  2.977783914089383
-4487 1710 -0.1538546845104868
-1711 1711  0.01766548052892358
-3100 1711 -33.99171600412565
-3845 1711  4.924367513602002
-4489 1711  0.2935533450234599
-1712 1712  0.001195085651160139
-2996 1712 -1227.52274774633
-3849 1712  3.894053238117803
-4491 1712  0.1612523570575912
-1713 1713  0.00399620272929112
-3000 1713 -137.0837482046144
-3853 1713  10.93806979401289
-4493 1713 -0.1681399935275716
-1714 1714  0.04341619045972051
-3002 1714 -14912.84496887192
-3857 1714  7.512573460654014
-4495 1714 -0.000263700452219623
-1715 1715  0.0235612852282902
-3004 1715 -51511.86319008204
-3861 1715  21.23566212034473
-4497 1715 -5.586646078540747e-05
-1716 1716  0.007683218528146999
-3030 1716 -77.01686009671205
-3865 1716  3.891832175542285
-4499 1716  0.3998996708507609
-1717 1717  0.007333811374796757
-3000 1717 -119.1218935338928
-3869 1717  5.273747361955984
-4501 1717  0.2129755068839117
-1718 1718  0.003823529587678011
-3008 1718 -120.7045050005255
-3873 1718  8.193646066739516
-4503 1718 -0.2586943980063583
-1719 1719  0.01215346638180655
-3040 1719 -23.57121050397152
-3877 1719  8.909361307278845
-4505 1719  0.2954837404412515
-1720 1720  0.003019762922634524
-3008 1720 -80.20121388753263
-3881 1720  5.498628886812048
-4507 1720  0.7532194114725147
-1721 1721  0.006124281162912542
-3008 1721 -113.9874410285411
-3885 1721  8.557143439995434
-4509 1721  0.1587647828723089
-1722 1722  0.002155724633158465
-3036 1722 -1307.863076710002
-3889 1722  3.052321424376364
-4511 1722 -0.1350370344901572
-1723 1723  0.003704455381418496
-3010 1723 -296.6587145617269
-3893 1723  6.165958730224928
-4513 1723 -0.1403809995916198
-1724 1724  0.004996325035351742
-3010 1724 -347.8089509424088
-3897 1724  4.264776865800714
-4515 1724 -0.1358345190847692
-1725 1725  0.003024383794303084
-3018 1725 -253.3858185062365
-3901 1725  7.7760742573487
-4517 1725 -0.1590047570659309
-1726 1726  0.005056420924296964
-3016 1726 -145.4276641880034
-3905 1726  12.723623314396
-4519 1726  0.09862866268575531
-1727 1727  0.0128552824245083
-3148 1727 -166.2264057254376
-3909 1727  2.666152965933335
-4521 1727  0.1490326692808954
-1728 1728  0.006968895908726415
-3022 1728 -88.35690904464344
-3913 1728  5.690920734130215
-4523 1728 -0.3044710922257314
-1729 1729  0.001538440199813304
-3024 1729 -1808.976355162421
-3917 1729  4.213041098699232
-4525 1729  0.1464469844197936
-1730 1730  0.001058160006160973
-3026 1730 -1529.723973180399
-3921 1730  5.357329400717711
-4527 1730  0.1984683173208769
-1731 1731  0.001541280633195248
-3028 1731 -1878.631462550398
-3925 1731  1.891529659129155
-4529 1731  0.2847703740543556
-1732 1732  0.03740648547010496
-3020 1732 -56.09349534828303
-3929 1732  5.191284902739278
-4531 1732  0.0741326869470682
-1733 1733  0.006324039794552517
-3086 1733 -467.2993939817052
-3933 1733  1.735401045504999
-4533 1733 -0.1905076856767562
-1734 1734  0.008772231202107173
-3022 1734 -57.28389550104841
-3937 1734  7.227063466987659
-4535 1734  0.2554443072620345
-1735 1735  0.001118269735498948
-3032 1735 -1284.461857125257
-3941 1735  4.620373825612975
-4537 1735 -0.1627178019259907
-1736 1736  0.003882904905316165
-3030 1736 -53.24994360811269
-3945 1736  17.58751732208018
-4539 1736 -0.2175541085968585
-1737 1737  0.002170256613187626
-3034 1737 -1083.556762405448
-3949 1737  9.983701017863959
-4541 1737 -0.04862359530866486
-1738 1738  0.005798880790520519
-3042 1738 -84.06953056548426
-3953 1738  7.088529923190809
-4543 1738 -0.2649587548673778
-1739 1739  0.0005182838160652974
-3044 1739 -2254.257229613936
-3957 1739  8.333234328742472
-4545 1739  0.197228464133999
-1740 1740  0.000719966335902969
-3046 1740 -2504.604832410719
-3961 1740  4.516437728111675
-4547 1740  0.2340215743243752
-1741 1741  0.009856054741935179
-3086 1741 -582.1294820043491
-3965 1741  4.100933333302652
-4549 1741 -0.0370584339888418
-1742 1742  0.009218139257277373
-3042 1742 -59.09372182667386
-3969 1742  9.249242572554918
-4551 1742  0.1751900754402029
-1743 1743  0.005272309892624279
-3042 1743 -44.87012523624564
-3973 1743  8.900323357107847
-4553 1743  0.4235524066745655
-1744 1744  0.001296557446196982
-3050 1744 -1236.100717223117
-3977 1744  6.82470037511901
-4555 1744  0.09646788709832067
-1745 1745  0.002343908477136452
-3050 1745 -1660.641795945935
-3981 1745  2.651304124674491
-4557 1745 -0.1069501833931525
-1746 1746  0.005687775843262853
-3056 1746 -109.9588006974711
-3985 1746  9.964308531455309
-4559 1746  0.1625927915066934
-1747 1747  0.007909157348944779
-3056 1747 -129.8071459044353
-3989 1747  5.008038927801397
-4561 1747 -0.1874409581057452
-1748 1748  0.00230008344184941
-3098 1748 -1629.841174651477
-3993 1748  2.810591760096432
-4563 1748  0.1104993737469106
-1749 1749  0.001805784149122609
-3064 1749 -1380.279131732715
-3997 1749  4.254422143024387
-4565 1749 -0.09766669385553386
-1750 1750  0.002239602047160964
-3144 1750 -1565.415007126418
-4001 1750  3.559130344004886
-4567 1750  0.09294349640346077
-1751 1751  0.03880525118438068
-3068 1751 -43193.72331192907
-4005 1751  17.73869895099792
-4569 1751 -4.739757669780065e-05
-1752 1752  0.003469224482167516
-3066 1752 -323.4191825769936
-4009 1752  8.121377855621056
-4571 1752  0.1137497534022037
-1753 1753  0.007071926854626404
-3074 1753 -182.1552338439317
-4013 1753  4.6918587188925
-4573 1753 -0.1620484550495435
-1754 1754  0.002739570112367415
-3076 1754 -1021.427694239394
-4017 1754  2.952431724529157
-4575 1754  0.1779337399408347
-1755 1755  0.001508473070417131
-3078 1755 -1356.306668125053
-4021 1755  4.080245895776766
-4577 1755  0.1982328660095435
-1756 1756  0.002078431308567534
-3080 1756 -1550.501925752894
-4025 1756  2.833991262156498
-4579 1756  0.1844874946189381
-1757 1757  0.001945389714289203
-3082 1757 -1461.513220882238
-4029 1757  2.230216158081471
-4581 1757  0.244104057346213
-1758 1758  0.001993552565265008
-3084 1758 -1474.31463330485
-4033 1758  2.11378126554774
-4583 1758  0.2488990680550784
-1759 1759  0.001789692624572141
-3144 1759 -1396.852787898307
-4037 1759  3.896615576766267
-4585 1759 -0.1113819036710585
-1760 1760  0.00646096026953419
-3162 1760 -339.9363785417463
-4041 1760  3.27917553517219
-4587 1760 -0.1169152573076894
-1761 1761  0.001399661724098957
-3088 1761 -1356.93253093293
-4045 1761  6.138820693139372
-4589 1761 -0.07698570639890857
-1762 1762  0.000219590190439067
-3096 1762 -922.2752719137213
-4049 1762  0.9579993383387213
-4591 1762  11.14739714497946
-1763 1763  0.006188906365168377
-3094 1763 -471.7651348192199
-4053 1763  2.024927669840395
-4593 1763  0.1554320934452434
-1764 1764  0.0001434129825704319
-3102 1764 -2477.242565758394
-4057 1764  0.6800635049014432
-4595 1764  5.741407462044037
-1765 1765  0.0002439790980817506
-3104 1765 -695.4491731006095
-4061 1765  1.226331625859217
-4597 1765  9.163761430829846
-1766 1766  0.0001490905118305186
-3106 1766 -1877.352469934645
-4065 1766  0.9860199056421528
-4599 1766  5.767059286213642
-1767 1767  0.0001621542960211256
-3108 1767 -2058.913294513919
-4069 1767  0.7641384780188016
-4601 1767  5.650459500989021
-1768 1768  0.0001287645800605308
-3110 1768 -1669.969527065347
-4073 1768  1.257569328186931
-4603 1768  5.720222066080108
-1769 1769  0.0001512851197436847
-3112 1769 -1822.079812004944
-4077 1769  0.9855846822133956
-4605 1769  5.855434617171491
-1770 1770  0.0001456087124288582
-3114 1770 -1642.48231480523
-4081 1770  1.04489898841882
-4607 1770  5.906181210680177
-1771 1771  0.02524756024454677
-3100 1771 -39.8313288230236
-4085 1771  5.446758579372629
-4609 1771  0.1679955867460642
-1772 1772  0.00179187485188899
-3116 1772 -1453.855216625808
-4089 1772  3.366289701753765
-4611 1772  0.1201547163932482
-1773 1773  0.008159159437463653
-3164 1773 -88.07077665502878
-4093 1773  7.023657380150102
-4613 1773  0.172735205835799
-1774 1774  0.006721269101813483
-3134 1774 -92.26802542553942
-4097 1774  11.88363245336253
-4615 1774  0.1310711442192151
-1775 1775  0.004367590347772836
-3172 1775 -270.4226421310651
-4101 1775  13.6661282064412
-4617 1775  0.0630421594254459
-1776 1776  0.003118907177103055
-3120 1776 -1704.53913758438
-4105 1776  5.834179421399259
-4619 1776  0.0368398814399567
-1777 1777  0.001889101762288917
-3120 1777 -1328.68635440853
-4109 1777  4.335231192834105
-4621 1777 -0.09721273839226541
-1778 1778  0.006652423644643841
-3122 1778 -324.8318137854403
-4113 1778  9.883711222736482
-4623 1778  0.04524248037789224
-1779 1779  0.006514619238101249
-3132 1779 -374.8764564904459
-4117 1779  6.606303903934953
-4625 1779 -0.05844088532524816
-1780 1780  0.004157757492589407
-3134 1780 -72.56570109333919
-4121 1780  14.27009956285187
-4627 1780 -0.2192447310494288
-1781 1781  0.03564615373321222
-3134 1781 -212.1907537540941
-4125 1781  14.02807711051003
-4629 1781  0.0093803456490435
-1782 1782  0.001764012109856772
-3138 1782 -1444.925587559849
-4129 1782  4.051300083922375
-4631 1782  0.1696359690794401
-1783 1783  0.001804901242834977
-3140 1783 -1698.599903687457
-4133 1783  2.304828423526273
-4633 1783  0.2154672694474411
-1784 1784  0.01332048699292719
-3136 1784 -84.27337155944021
-4137 1784  4.440740416984238
-4635 1784  0.1563923263725714
-1785 1785  0.004169233330436866
-3150 1785 -146.8985247574478
-4141 1785  9.599198921865771
-4637 1785 -0.1535827179703731
-1786 1786  0.001079634196703565
-3152 1786 -2504.997784199022
-4145 1786  4.182327502644779
-4639 1786  0.1905019068065086
-1787 1787  0.0002113191084465661
-3166 1787 -647.7168499720574
-4149 1787  0.8386055878177558
-4641 1787  14.17811209757292
-1788 1788  0.0002184583605285697
-3168 1788 -647.2932388607724
-4153 1788  1.713315824453342
-4643 1788  8.081208632035654
-1789 1789  0.01518846527205823
-2775 1789 -101.6692445633731
-3176 1789  1.077927120069085
-4156 1789  0.4638345268861317
-1790 1790  0.001000392465764426
-3011 1790 -1400.763427944943
-3180 1790  6.707319778353869
-4158 1790 -0.1171398876204244
-1791 1791  0.004534804164295149
-2775 1791 -121.900978849199
-3184 1791  4.398381781048775
-4160 1791 -0.4304312753963275
-1792 1792  0.004532850904955333
-2775 1792 -122.5206702801755
-3188 1792  4.228609907569939
-4162 1792  0.4225606468582997
-1793 1793  0.01039556111159428
-2779 1793 -301.3545137610665
-3192 1793  0.9388524004746962
-4164 1793  0.2943113306588002
-1794 1794  0.001654738067973231
-2779 1794 -265.5995294516822
-3196 1794  6.290838699682876
-4166 1794  0.3743951155653602
-1795 1795  0.000922492312879772
-3047 1795 -1328.059271078283
-3200 1795  2.95180477919497
-4168 1795  0.3184437332485786
-1796 1796  0.007819372791836317
-2783 1796 -275.381526703475
-3204 1796  0.8655873783036971
-4170 1796  0.3991520721206581
-1797 1797  0.00198409374825764
-2783 1797 -305.9428524547798
-3208 1797  4.357695569147362
-4172 1797  0.3448062169581231
-1798 1798  0.001180703517808303
-2901 1798 -273.8571240789814
-3212 1798  6.271467987853487
-4174 1798  0.4163534406274162
-1799 1799  0.01588587386834779
-2787 1799 -103.6735421877064
-3216 1799  0.8258525634872588
-4176 1799  0.6239097919698932
-1800 1800  0.005036189165657313
-2945 1800 -118.4060011955383
-3220 1800  4.852275384021054
-4178 1800  0.3317934535905247
-1801 1801  0.006660153124828426
-2975 1801 -60.96288613393003
-3224 1801  4.438993814771638
-4180 1801  0.5267241324446033
-1802 1802  0.0009674036423805762
-3069 1802 -1401.226384116477
-3228 1802  3.183226568638103
-4182 1802 -0.2880976473424125
-1803 1803  0.02086064684629033
-2791 1803 -45.61538669592837
-3232 1803  1.118284064744516
-4184 1803  0.7765432181564137
-1804 1804  0.004881382654547464
-3021 1804 -55.21857342172993
-3236 1804  6.944610352658391
-4186 1804  0.4752231410863287
-1805 1805  0.007422250586746427
-2791 1805 -64.6965009865016
-3240 1805  4.506874015672992
-4188 1805  0.4485203675871874
-1806 1806  0.006367261316488591
-3055 1806 -130.8728852516794
-3244 1806  7.772528439443829
-4190 1806 -0.1377473445162312
-1807 1807  0.00872545082924399
-3159 1807 -36.18476976809642
-3248 1807  5.717200439130998
-4192 1807  0.5064852131538
-1808 1808  0.03972025078180432
-2795 1808 -69.64168157022513
-3252 1808  1.17048730065587
-4194 1808  0.2571869189216621
-1809 1809  0.02547611861651027
-2795 1809 -55.76240657485057
-3256 1809  1.356706280601356
-4196 1809  0.3916020995201141
-1810 1810  0.001403055745082602
-2803 1810 -278.7531515404145
-3260 1810  5.935651367845557
-4198 1810  0.4045567407557998
-1811 1811  0.003171622775683676
-2959 1811 -116.01415125886
-3264 1811  7.866093118131661
-4200 1811 -0.3322785058448103
-1812 1812  0.00554786920226437
-2795 1812 -57.37217754133817
-3268 1812  6.921218030938292
-4202 1812  0.4366009542545615
-1813 1813  0.0008757042896855123
-2801 1813 -242.9398363399226
-3272 1813  6.697001214688135
-4204 1813  0.5892891508115687
-1814 1814  0.001190245120571321
-3015 1814 -116.1043230776163
-3276 1814  5.937171454568121
-4206 1814  1.031765062393027
-1815 1815  0.006295122976212335
-3071 1815 -31.29687216661689
-3280 1815  7.525178403866417
-4208 1815 -0.5368637578512274
-1816 1816  0.008114173195193898
-2803 1816 -284.6866218388363
-3284 1816  0.6191405042986501
-4210 1816  0.6167314111295573
-1817 1817  0.03619016545942499
-2807 1817 -36.67269099216028
-3288 1817  2.179761213796664
-4212 1817  0.3310701127831404
-1818 1818  0.03971389620619416
-2807 1818 -38.41647162556927
-3292 1818  1.823278915801148
-4214 1818  0.337094746642738
-1819 1819  0.03986167163264697
-2807 1819 -38.48702554204165
-3296 1819  1.698731768620023
-4216 1819  0.3720598064715243
-1820 1820  0.02676427502414026
-2807 1820 -31.51609232707513
-3300 1820  2.226759967985491
-4218 1820  0.4803517580962079
-1821 1821  0.004359672791846426
-2807 1821 -28.105045711462
-3304 1821  13.4005488845067
-4220 1821 -0.6484741851329106
-1822 1822  0.0007307954101522737
-3013 1822 -1148.338107851003
-3308 1822  6.452286730756979
-4222 1822 -0.2062979542711787
-1823 1823  0.01998853005907888
-2817 1823 -110.9788010398832
-3312 1823  1.158013025593776
-4224 1823  0.3231881679107107
-1824 1824  0.0198798996402246
-2817 1824 -110.6632721880846
-3316 1824  1.041618026710652
-4226 1824  0.3650963193282887
-1825 1825  0.004000250473230064
-2827 1825 -106.5673296823539
-3320 1825  6.785053675814972
-4228 1825  0.3279397104479789
-1826 1826  0.004051596317296923
-2817 1826 -110.4746806445262
-3324 1826  9.324470431849685
-4230 1826 -0.2372175650649101
-1827 1827  0.01015271408301956
-2823 1827 -312.1352460882863
-3328 1827  0.9237967396282526
-4232 1827  0.2708036975786697
-1828 1828  0.001468002769513598
-2901 1828 -306.1707000880867
-3332 1828  4.522013621012862
-4234 1828  0.4831988452769137
-1829 1829  0.001544038947918175
-2823 1829 -268.005427632543
-3336 1829  4.819142815554258
-4236 1829  0.5254114800415531
-1830 1830  0.02615827126484596
-2827 1830 -123.2291839880428
-3340 1830  0.7359627483813387
-4238 1830  0.370280352148334
-1831 1831  0.003401814250274597
-2959 1831 -120.2021649979784
-3344 1831  7.302330267651148
-4240 1831  0.353019920768631
-1832 1832  0.005060615847359536
-3089 1832 -116.1443434772085
-3348 1832  8.759577361623631
-4242 1832  0.1877146534020675
-1833 1833  0.008837253187175041
-2831 1833 -258.9487419495387
-3352 1833  0.5348328095576971
-4244 1833  0.6903575064460875
-1834 1834  0.00221861210371068
-2831 1834 -329.9404840258471
-3356 1834  3.588470025723119
-4246 1834 -0.3995006872704646
-1835 1835  0.0007954486497328819
-3053 1835 -1297.830937438026
-3360 1835  3.212877484813512
-4248 1835  0.3335609329182144
-1836 1836  0.01872639812172585
-2835 1836 -125.0209566151239
-3364 1836  1.479621537099887
-4250 1836  0.2592958859943023
-1837 1837  0.01237589418369946
-2835 1837 -101.4697467138049
-3368 1837  1.552787641592646
-4252 1837  0.4377070227596882
-1838 1838  0.002751685875050906
-2835 1838 -105.8065777979689
-3372 1838  8.326168299313496
-4254 1838 -0.4326204283218003
-1839 1839  0.002293964321536093
-2835 1839 -96.71494936649103
-3376 1839  8.096580967059195
-4256 1839  0.563121317845319
-1840 1840  0.007888933983503288
-2841 1840 -264.275729646232
-3380 1840  0.773323986147318
-4258 1840  0.5036002788122459
-1841 1841  0.002464202900146512
-2881 1841 -293.066322787933
-3384 1841  2.894015425891116
-4260 1841  0.4374740476554567
-1842 1842  0.0009592215405761291
-3047 1842 -1353.938445456118
-3388 1842  2.822747401879994
-4262 1842 -0.3222408774211639
-1843 1843  0.0119602940545533
-2845 1843 -108.8523166266819
-3392 1843  1.638248669248236
-4264 1843  0.3799777711016727
-1844 1844  0.01166488712395074
-2845 1844 -103.7126993255025
-3396 1844  1.176739949016042
-4266 1844  0.6357091254305228
-1845 1845  0.002077512515280729
-2845 1845 -100.1145963710793
-3400 1845  8.942328533639676
-4268 1845 -0.5635229915555421
-1846 1846  0.002543904703657913
-2845 1846 -110.8016182928714
-3404 1846  11.26655140607853
-4270 1846 -0.3035513417999643
-1847 1847  0.0126789154159025
-2851 1847 -274.8553042800676
-3408 1847  0.7825661744590023
-4272 1847  0.3212719394188062
-1848 1848  0.002024630564729973
-2943 1848 -1620.739611914232
-3412 1848  5.057824214743202
-4274 1848  0.07033500174578537
-1849 1849  0.003255750687821012
-2851 1849 -307.7484892352572
-3416 1849  4.79846785897801
-4276 1849 -0.2245587755506762
-1850 1850  0.006110746205119582
-2973 1850 -64.84305238357298
-3420 1850  4.580602587418016
-4278 1850  0.4379133264508283
-1851 1851  0.04707760781803985
-3099 1851 -16.10476514667766
-3424 1851  5.52794280827098
-4280 1851  0.2457031220130788
-1852 1852  0.002719985014812242
-3133 1852 -101.7537554994466
-3428 1852  4.837100171165655
-4282 1852  0.6575968965590965
-1853 1853  0.0009203120285360352
-2857 1853 -1242.454717536272
-3432 1853  3.694102619211036
-4284 1853  0.2292832397716183
-1854 1854  0.001267355105049699
-2857 1854 -1455.098749317579
-3436 1854  1.970970231250026
-4286 1854 -0.2946263988269318
-1855 1855  0.0007455918750581245
-2941 1855 -1269.238362620525
-3440 1855  3.040085219142655
-4288 1855  0.3674899949834071
-1856 1856  0.001003644851414047
-3037 1856 -1119.776904764913
-3444 1856  2.918144951839834
-4290 1856 -0.358149304314646
-1857 1857  0.0009965003960475049
-3173 1857 -1280.777882156608
-3448 1857  4.123483094099871
-4292 1857 -0.1981905656779542
-1858 1858  0.002663675528426596
-2863 1858 -65.91250277346103
-3452 1858  7.866104370676174
-4294 1858  0.5863532491442011
-1859 1859  0.002335038558192694
-2863 1859 -151.3811068800854
-3456 1859  3.779911922330594
-4296 1859  0.676872928952266
-1860 1860  0.002498149276821813
-2863 1860 -159.9708456309545
-3460 1860  4.575171516303893
-4298 1860  0.4926836001045419
-1861 1861  0.002362476830133221
-3147 1861 -127.9110281261581
-3464 1861  3.924567541092951
-4300 1861  0.6936422211413747
-1862 1862  0.001929749558429306
-2865 1862 -315.9024050162253
-3468 1862  3.826254096728573
-4302 1862  0.4298291519255887
-1863 1863  0.002463831602328363
-2865 1863 -358.782009005071
-3472 1863  4.096977356352023
-4304 1863 -0.2929670419078287
-1864 1864  0.002779642002173852
-2881 1864 -313.6568640390976
-3476 1864  2.876369892472561
-4306 1864 -0.3325105683429021
-1865 1865  0.01952363564111144
-2905 1865 -13.66247944369221
-3480 1865  5.527150768491762
-4308 1865  0.5012808683324748
-1866 1866  0.1626429678579705
-2869 1866 -15.5948834700682
-3484 1866  17.26249952801965
-4310 1866 -0.01937948406660046
-1867 1867  0.1789169121997822
-2869 1867 -16.24247111042913
-3488 1867  14.44634424441149
-4312 1867 -0.0200281710755901
-1868 1868  0.2073379396796323
-2869 1868 -14.7892626321223
-3492 1868  14.04786405689022
-4314 1868 -0.02148219787072791
-1869 1869  0.2061419754730205
-2869 1869 -14.74604734110974
-3496 1869  12.6670479033574
-4316 1869 -0.02126487268390402
-1870 1870  0.2084508119925247
-2869 1870 -21.53155213685957
-3500 1870  15.2850840004798
-4318 1870 -0.01515825745898139
-1871 1871  0.0009819936272868124
-2921 1871 -1338.557384720627
-3504 1871  2.438324611105513
-4320 1871 -0.3072494006703159
-1872 1872  0.02023566694527784
-2905 1872 -13.86164060705798
-3508 1872  4.793989873863781
-4322 1872  0.6025736075654631
-1873 1873  0.004905574726885498
-2883 1873 -18.01825116204381
-3512 1873  17.87284749444791
-4324 1873  0.4607487274148538
-1874 1874  0.004785941363708135
-2883 1874 -44.51367803683263
-3516 1874  8.815510423999974
-4326 1874  0.4601615553751069
-1875 1875  0.005456064724980531
-2977 1875 -59.31030352171324
-3520 1875  3.435120286019725
-4328 1875  0.7898807291279358
-1876 1876  0.002972034323586343
-2997 1876 -95.11637636344641
-3524 1876  6.227801375022604
-4330 1876 -0.5448550035608645
-1877 1877  0.002963069916509047
-3029 1877 -62.65563298356832
-3528 1877  6.205734509115272
-4332 1877 -0.7112006565720582
-1878 1878  0.00470779793137561
-2979 1878 -137.220649291519
-3532 1878  3.594156286348642
-4334 1878 -0.4309015624391707
-1879 1879  0.002213774594729203
-3091 1879 -241.1777446051989
-3536 1879  4.740940585753161
-4336 1879 -0.421297996296329
-1880 1880  0.001967847060143347
-2889 1880 -302.0373081441935
-3540 1880  4.013858921947923
-4338 1880 -0.4334843149085262
-1881 1881  0.002467645928209655
-2889 1881 -339.884593358769
-3544 1881  6.511901335193489
-4340 1881  0.2019684391055969
-1882 1882  0.0006604176253755498
-3011 1882 -1136.647709911751
-3548 1882  3.269308251777509
-4342 1882 -0.4705159048444391
-1883 1883  0.0008008235108888716
-3051 1883 -1335.596838688865
-3552 1883  2.703936131935484
-4344 1883  0.3712099084130325
-1884 1884  0.0009981865922368972
-2967 1884 -1299.363195504282
-3556 1884  3.155612325232486
-4346 1884 -0.2660353447014131
-1885 1885  0.001090105230124902
-2893 1885 -1386.412367526018
-3560 1885  2.546878710023711
-4348 1885 -0.2897782680634379
-1886 1886  0.003558794753835321
-2979 1886 -119.2628629587167
-3564 1886  4.914858596122737
-4350 1886 -0.4468471629984179
-1887 1887  0.003573660032862564
-2897 1887 -1942.580602090081
-3568 1887  6.209091064066214
-4352 1887  0.02303438519199841
-1888 1888  0.00126070155494538
-2897 1888 -1149.608351572851
-3572 1888  2.887296553688009
-4354 1888 -0.2437019726733171
-1889 1889  0.0224029250963791
-2905 1889 -14.63977712587374
-3576 1889  8.019342926689088
-4356 1889  0.2926855116749797
-1890 1890  0.003786126107846711
-3141 1890 -2057.918514698858
-3580 1890  8.030319348297542
-4358 1890 -0.01579633712996128
-1891 1891  0.001230938538038181
-2901 1891 -241.4737700878197
-3584 1891  4.977760599682532
-4360 1891 -0.8149863628841933
-1892 1892  0.09879642602006852
-2905 1892 -4.158339449315758
-3588 1892  2.317651149747798
-4362 1892 -0.7256220588031617
-1893 1893  0.03844282691513459
-2905 1893 -6.321546398056254
-3592 1893  7.14894107859813
-4364 1893 -0.4976321718455345
-1894 1894  0.06306776508636516
-2905 1894 -7.804363263475375
-3596 1894  3.383146747526674
-4366 1894 -0.4961149924439166
-1895 1895  0.04479802402474396
-2905 1895 -6.58912812924158
-3600 1895  5.594076654825956
-4368 1895 -0.510067162772699
-1896 1896  0.06536987024666262
-2905 1896 -7.649837605944053
-3604 1896  3.328116887610061
-4370 1896 -0.4801640088667681
-1897 1897  0.06617898539405291
-2905 1897 -7.69701995245717
-3608 1897  3.214678878395649
-4372 1897 -0.4783057895236895
-1898 1898  0.04501137747996262
-2905 1898 -6.086553600399663
-3612 1898  5.286814915057556
-4374 1898 -0.5073755737823042
-1899 1899  0.009378728642209877
-3089 1899 -157.9270743656402
-3616 1899  3.060833158311745
-4376 1899  0.2195673688647587
-1900 1900  0.0007370125534000677
-3153 1900 -1149.893986737135
-3620 1900  3.124616488720384
-4378 1900 -0.350098224088436
-1901 1901  0.6020929703070654
-2923 1901 -25.58784425527002
-3624 1901  11.46538487377347
-4380 1901  0.004528716666476341
-1902 1902  1.401132294277499
-2923 1902 -11.88075702335064
-3628 1902  11.61445266776299
-4382 1902  0.003858840238686249
-1903 1903  0.3758028597659226
-2923 1903 -54.03709490860278
-3632 1903  7.931141454227703
-4384 1903  0.005403119658024451
-1904 1904  0.3383684616440788
-2923 1904 -49.0029739474849
-3636 1904  10.38985995381369
-4386 1904  0.005116437161350581
-1905 1905  0.01342936913136771
-2923 1905 -34.74349045619399
-3640 1905  5.274149732297824
-4388 1905  0.3614419935688245
-1906 1906  0.0008155034897751313
-3087 1906 -1275.123506533136
-3644 1906  1.680006856153657
-4390 1906  0.5644202599576373
-1907 1907  0.004133278243333555
-2973 1907 -53.49842881762557
-3648 1907  11.36834776685953
-4392 1907 -0.3438732803624757
-1908 1908  0.0008040773752573335
-3059 1908 -1319.134352142724
-3652 1908  3.981648724060497
-4394 1908  0.2715368875046588
-1909 1909  0.001278322246727311
-2935 1909 -215.9521258687181
-3656 1909  7.936527709755322
-4396 1909  0.4028072587377506
-1910 1910  0.001999528023858018
-3129 1910 -493.1627696346783
-3660 1910  5.534920557416696
-4398 1910  0.1837519366993671
-1911 1911  0.008960119794622546
-3163 1911 -53.90619683861826
-3664 1911  3.745702162475517
-4400 1911 -0.4996161946091237
-1912 1912  0.001720719953907996
-3065 1912 -342.1330151030756
-3668 1912  3.499243447462259
-4402 1912  0.4893961341975044
-1913 1913  0.002197626492909087
-3145 1913 -306.5243546068534
-3672 1913  4.952358887713497
-4404 1913 -0.3065265473112019
-1914 1914  0.002721550548841121
-2999 1914 -102.6354740822629
-3676 1914  5.231194892529889
-4406 1914  0.6048601072199059
-1915 1915  0.001305838102648445
-3013 1915 -1535.226081909187
-3680 1915  3.508830808080819
-4408 1915  0.1860931450132002
-1916 1916  0.004438130353246017
-2975 1916 -49.97345032394828
-3684 1916  5.985986065281415
-4410 1916  0.6690902668564817
-1917 1917  0.00940934842178536
-2945 1917 -162.0141549217757
-3688 1917  5.055157513061181
-4412 1917  0.1263791104196366
-1918 1918  0.007585063482622535
-3159 1918 -33.71929484844811
-3692 1918  8.94117625255519
-4414 1918  0.3839594711748638
-1919 1919  0.0004614465673554198
-2947 1919 -1156.779939313139
-3696 1919  4.834616698618166
-4416 1919  0.3417002020878817
-1920 1920  0.001377148193309211
-2947 1920 -2303.443736143837
-3700 1920  0.9505763099739064
-4418 1920 -0.3580379646538538
-1921 1921  0.004343153870675413
-3073 1921 -113.7824108068532
-3704 1921  3.616822557551397
-4420 1921  0.4449142856253013
-1922 1922  0.004905569227212183
-2949 1922 -121.4320613630403
-3708 1922  3.21889629143859
-4422 1922  0.4461110896526219
-1923 1923  0.00268264578621197
-3161 1923 -314.9941849685434
-3712 1923  2.924614144859001
-4424 1923 -0.3382766882528339
-1924 1924  0.1788413622273466
-2951 1924 -15.33177832110156
-3716 1924  19.53069803414619
-4426 1924  0.01439043946019499
-1925 1925  0.133773303083964
-2951 1925 -49.94972952086324
-3720 1925  14.69789381330568
-4428 1925  0.008760842370192201
-1926 1926  0.1308411907639882
-2951 1926 -54.44165562919285
-3724 1926  13.23744473122067
-4430 1926  0.00890402837809562
-1927 1927  0.0009459805136166574
-2963 1927 -1338.547491511747
-3728 1927  2.313352821995473
-4432 1927 -0.3456801265779962
-1928 1928  0.005209275704562927
-2951 1928 -63.79072791858248
-3732 1928  4.408486684243702
-4434 1928 -0.7248750001072389
-1929 1929  0.01380038719662924
-2959 1929 -67.64929086151163
-3736 1929  5.995398509885752
-4436 1929 -0.1837226529034885
-1930 1930  0.001130589200957007
-3155 1930 -1329.895116223397
-3740 1930  4.987981631971432
-4438 1930 -0.1531666552012178
-1931 1931  0.001680904211310969
-2965 1931 -281.744969252588
-3744 1931  4.080013401664326
-4440 1931 -0.5051458348552837
-1932 1932  0.0006736040825845264
-3059 1932 -1197.283781547747
-3748 1932  3.249654147247933
-4442 1932 -0.4155301814520989
-1933 1933  0.003284291861798131
-3149 1933 -119.1012159433853
-3752 1933  5.990944680358314
-4444 1933  0.3462276026425328
-1934 1934  0.001117869191372407
-3173 1934 -1356.531427998502
-3756 1934  3.229847896446473
-4446 1934 -0.2242455162623405
-1935 1935  0.0007017298701820985
-2969 1935 -1143.36710400461
-3760 1935  3.895963455405033
-4448 1935  0.3245871165120744
-1936 1936  0.007536605982690551
-3019 1936 -32.18487755999478
-3764 1936  6.886257504275831
-4450 1936  0.4653523913028809
-1937 1937  0.001080857059295198
-3057 1937 -1209.039048347723
-3768 1937  2.707528675905532
-4452 1937 -0.333246014529252
-1938 1938  0.001332242049959249
-3121 1938 -293.3131463113477
-3772 1938  4.197681730399675
-4454 1938  0.5792515598032306
-1939 1939  0.001772899225362471
-3131 1939 -298.238748377585
-3776 1939  3.559125252631731
-4456 1939  0.4566027908873816
-1940 1940  0.0008469569496671471
-3141 1940 -972.8769469454667
-3780 1940  5.370203291089556
-4458 1940 -0.2253988735383735
-1941 1941  0.01292454086643344
-2975 1941 -85.32588697691688
-3784 1941  4.481236041630056
-4460 1941  0.1774720614068959
-1942 1942  0.00250559134395913
-3007 1942 -88.67226391527699
-3788 1942  3.503869273575924
-4462 1942  1.218330247483215
-1943 1943  0.006039676197215248
-3055 1943 -127.4445013018118
-3792 1943  4.932211784009985
-4464 1943 -0.2444719756141379
-1944 1944  0.00332455934900208
-2977 1944 -29.11525814204184
-3796 1944  12.92509130443996
-4466 1944  0.6340994985786209
-1945 1945  0.005516618807937719
-2977 1945 -16.87783507179551
-3800 1945  6.963395067069431
-4468 1945 -1.282456190904951
-1946 1946  0.003499686844742199
-2977 1946 -29.27401382993264
-3804 1946  8.20748669336651
-4470 1946 -1.085554606806667
-1947 1947  0.002695204623589962
-3029 1947 -57.88255988036589
-3808 1947  5.140340349921296
-4472 1947 -1.03602619980799
-1948 1948  0.0005202508078158344
-2985 1948 -1179.473328514326
-3812 1948  2.762528604699168
-4474 1948 -0.627656447368364
-1949 1949  0.0004616790832713247
-2985 1949 -1112.363081683879
-3816 1949  2.637769067800761
-4476 1949  0.7698732969464314
-1950 1950  0.003690632751693497
-2999 1950 -119.8659568390688
-3820 1950  5.347644834779035
-4478 1950  0.3911824002123577
-1951 1951  0.0009417216375000832
-3031 1951 -1462.759816234802
-3824 1951  2.286717637069104
-4480 1951  0.3690948751451494
-1952 1952  0.002745893837511805
-2987 1952 -329.5972945308118
-3828 1952  3.430259707843811
-4482 1952  0.3162981891849708
-1953 1953  0.001313568433023475
-3169 1953 -1331.327148348726
-3832 1953  5.22866490720696
-4484 1953 -0.1019983291898142
-1954 1954  0.0006040599848978036
-2991 1954 -1246.529432769099
-3836 1954  3.015100507243954
-4486 1954 -0.5589755650604602
-1955 1955  0.01071515778810642
-3159 1955 -40.0865932868005
-3840 1955  6.486770562819233
-4488 1955  0.3123756862682738
-1956 1956  0.002205806782900263
-2995 1956 -1643.815562542815
-3844 1956  2.089007039389847
-4490 1956 -0.1358262986664969
-1957 1957  0.001279610739959884
-3123 1957 -1425.284446253262
-3848 1957  4.015345384796815
-4492 1957  0.1297570097955425
-1958 1958  0.001954246820883836
-2997 1958 -65.95449814158825
-3852 1958  14.21588238657893
-4494 1958  0.5346710961640079
-1959 1959  0.009930744376663589
-2997 1959 -8.723987554335546
-3856 1959  4.63521196383065
-4496 1959 -1.800612709103617
-1960 1960  0.002500885653894513
-2997 1960 -43.6549692815422
-3860 1960  7.851107779123613
-4498 1960 -1.155212641574131
-1961 1961  0.01087216558491282
-2997 1961 -183.7758384048604
-3864 1961  4.362866399653369
-4500 1961 -0.10893918577006
-1962 1962  0.002476672415383437
-3157 1962 -323.5366796927644
-3868 1962  3.314597254560581
-4502 1962 -0.4059559816217324
-1963 1963  0.0004654379073351586
-3005 1963 -1123.060704870721
-3872 1963  7.094819650515715
-4504 1963  0.284851484394085
-1964 1964  0.0007789889760783562
-3005 1964 -1962.678855914129
-3876 1964  2.758844665527414
-4506 1964 -0.251856105361882
-1965 1965  0.003128947444078676
-3037 1965 -1984.935281822672
-3880 1965  5.36424014425243
-4508 1965 -0.03372710870745033
-1966 1966  0.0007814113477166023
-3097 1966 -1219.03743806919
-3884 1966  3.832755747061612
-4510 1966  0.2996883137876917
-1967 1967  0.002466223742873347
-3009 1967 -305.81056344102
-3888 1967  3.344564282593129
-4512 1967  0.3897416427359257
-1968 1968  0.004556134798128607
-3041 1968 -49.74533438256115
-3892 1968  7.209963343664713
-4514 1968  0.5271461292678989
-1969 1969  0.001610776396565617
-3169 1969 -1474.263314614031
-3896 1969  2.650447108276864
-4516 1969 -0.1785974349768129
-1970 1970  0.002196786139001541
-3015 1970 -79.95166156859864
-3900 1970  8.754460075831085
-4518 1970  0.5273107585383225
-1971 1971  0.0007714731595300477
-3129 1971 -298.3132764183199
-3904 1971  5.21837181322108
-4520 1971 -0.8072266327489762
-1972 1972  0.003487196424232244
-3015 1972 -159.5652172094772
-3908 1972  1.521343477166647
-4522 1972 -1.074344281788989
-1973 1973  0.004805094562342568
-3019 1973 -24.21142044251713
-3912 1973  11.66633099712536
-4524 1973  0.6244445149825922
-1974 1974  0.01106461485781375
-3019 1974 -16.17329708469773
-3916 1974  6.913438662925516
-4526 1974 -0.7085501020623595
-1975 1975  0.01237778660328535
-3019 1975 -16.79039672305305
-3920 1975  7.092319999538366
-4528 1975 -0.6032800041956174
-1976 1976  0.01700566749367823
-3019 1976 -19.18063033824095
-3924 1976  4.504333486281526
-4530 1976 -0.5312358475512081
-1977 1977  0.009334914461997442
-3039 1977 -28.87681478961062
-3928 1977  3.064329241855908
-4532 1977  0.9638580985212747
-1978 1978  0.02503841121672441
-3019 1978 -58.99846518694028
-3932 1978  3.065752767611288
-4534 1978 -0.1726253430003272
-1979 1979  0.008007127655204529
-3159 1979 -34.20546235418676
-3936 1979  6.821785171341784
-4536 1979 -0.5273299139143387
-1980 1980  0.003535036514714461
-3029 1980 -39.32522531591619
-3940 1980  9.188046774378051
-4538 1980  0.6449471976234391
-1981 1981  0.004366719693026736
-3039 1981 -19.26799730716725
-3944 1981  15.60000600766554
-4540 1981 -0.4997455661082103
-1982 1982  0.0004723725694069484
-3061 1982 -1020.895360476986
-3948 1982  4.949586119190606
-4542 1982 -0.4434171700268535
-1983 1983  0.006908344872276637
-3039 1983 -24.40551081717152
-3952 1983  9.450557390041146
-4544 1983  0.4663001387974148
-1984 1984  0.00648834630734077
-3039 1984 -34.66205988832636
-3956 1984  13.94274720304045
-4546 1984 -0.3189037545692974
-1985 1985  0.0082751929249987
-3039 1985 -39.08905611715512
-3960 1985  9.638290245359952
-4548 1985 -0.3170204710469747
-1986 1986  0.007383221743510169
-3039 1986 -25.73482602625847
-3964 1986  3.86258710892769
-4550 1986  0.976486147068505
-1987 1987  0.0007871273401471218
-3053 1987 -1286.699352558668
-3968 1987  3.232293749053333
-4552 1987 -0.3439335715846778
-1988 1988  0.00273487585036142
-3145 1988 -343.6950631335396
-3972 1988  6.854993073279579
-4554 1988  0.1547028690476915
-1989 1989  0.0005887925249880207
-3051 1989 -1142.59645115147
-3976 1989  4.596890674335506
-4556 1989 -0.3495907295178143
-1990 1990  0.007514577559867656
-3163 1990 -50.03970959544274
-3980 1990  4.514311543471527
-4558 1990  0.5744327013188233
-1991 1991  0.0007659718727101854
-3069 1991 -1245.08530407766
-3984 1991  4.273476333560106
-4560 1991 -0.2813043833760332
-1992 1992  0.01019124310589202
-3159 1992 -39.06108881613515
-3988 1992  5.650865178732744
-4562 1992  0.3955497753263165
-1993 1993  0.001075547549495041
-3061 1993 -1545.590542610641
-3992 1993  2.128671701455008
-4564 1993  0.330790297730725
-1994 1994  0.0009735186880607913
-3125 1994 -1288.009326238795
-3996 1994  3.373236017368225
-4566 1994  0.245190319901469
-1995 1995  0.001207820622579016
-3063 1995 -1425.924800186832
-4000 1995  2.768916866004834
-4568 1995 -0.2470979106274057
-1996 1996  0.001505642513562084
-3065 1996 -171.6721801828181
-4004 1996  4.350082008188727
-4570 1996 -0.8934591499488236
-1997 1997  0.001622558008834811
-3127 1997 -272.0493857089738
-4008 1997  5.835339611366566
-4572 1997 -0.4094422815228064
-1998 1998  0.006407727072907419
-3071 1998 -31.13094626304603
-4012 1998  6.273071084077785
-4574 1998  0.6508797907773681
-1999 1999  0.0347431850057986
-3071 1999 -3.550926574038505
-4016 1999  5.891708948648517
-4576 1999 -0.8644424052260842
-2000 2000  0.019139463715007
-3071 2000 -13.48124907748484
-4020 2000  6.039983956553282
-4578 2000 -0.5386946049506361
-2001 2001  0.02024997891309413
-3071 2001 -13.95763763671764
-4024 2001  5.335102309454471
-4580 2001 -0.5619603703326097
-2002 2002  0.02204757930171117
-3071 2002 -14.40960272507699
-4028 2002  4.826786095112703
-4582 2002 -0.4989049199151928
-2003 2003  0.02261098557575327
-3071 2003 -14.58499342652336
-4032 2003  4.663673792105634
-4584 2003 -0.4967941065737113
-2004 2004  0.003715639375564892
-3073 2004 -105.446862001134
-4036 2004  5.538149308614118
-4586 2004  0.3954580330580911
-2005 2005  0.003931178294356625
-3073 2005 -106.4028375448804
-4040 2005  2.787430652169496
-4588 2005  0.640289646401769
-2006 2006  0.001071057839405194
-3085 2006 -198.5952483373434
-4044 2006  8.295885019917028
-4590 2006  0.4286015274180359
-2007 2007  0.04476228128502545
-3093 2007 -700.4947699391313
-4048 2007  7.928418980272388
-4592 2007  0.004269479894055767
-2008 2008  0.008958295749658335
-3135 2008 -95.10938060848929
-4052 2008  2.428640135644111
-4594 2008  0.4261594979710852
-2009 2009  0.3838931121017528
-3099 2009 -12.04655336420402
-4056 2009  16.47742044559651
-4596 2009  0.01030673789282408
-2010 2010  0.311778304583312
-3099 2010 -12.11364270258297
-4060 2010  22.23399320199848
-4598 2010  0.01266738670347477
-2011 2011  0.3433557380926616
-3099 2011 -8.22902423866134
-4064 2011  22.85902787429374
-4600 2011  0.01389738784940935
-2012 2012  0.4301523174392808
-3099 2012 -9.209673704385494
-4068 2012  18.54247362970166
-4602 2012  0.01095829037406584
-2013 2013  0.2876753913896934
-3099 2013 -7.169396300539337
-4072 2013  28.97191890791841
-4604 2013  0.01440604931380529
-2014 2014  0.3498094028164577
-3099 2014 -7.905394811720712
-4076 2014  22.90635023674714
-4606 2014  0.01412310998717665
-2015 2015  0.3390025466116671
-3099 2015 -6.913440311217936
-4080 2015  24.33074727531835
-4608 2015  0.01427692261993418
-2016 2016  0.00237548481421698
-3131 2016 -340.5820452017388
-4084 2016  2.208450030879043
-4610 2016  0.5273258880967172
-2017 2017  0.001140663405325003
-3153 2017 -1429.354425178098
-4088 2017  2.781752994797915
-4612 2017 -0.2390321723025959
-2018 2018  0.001009787058010458
-3115 2018 -1319.125546000257
-4092 2018  2.912178827084738
-4614 2018 -0.2745073576810962
-2019 2019  0.00109550590404936
-3117 2019 -1426.386550071892
-4096 2019  5.599572994528584
-4616 2019  0.1242577346967617
-2020 2020  0.0005957613718304553
-3117 2020 -1051.60330096319
-4100 2020  5.994209355056856
-4618 2020 -0.2986936630393312
-2021 2021  0.001150360150332114
-3121 2021 -273.2060828791963
-4104 2021  4.161771196418701
-4620 2021  0.7749702805017827
-2022 2022  0.0009878595624204158
-3123 2022 -1251.592013646793
-4108 2022  3.322110871855388
-4622 2022 -0.2590299860673196
-2023 2023  0.001621619527829153
-3149 2023 -82.4528305498375
-4112 2023  6.446916296953027
-4624 2023  0.9878581106610489
-2024 2024  0.001188758515697542
-3129 2024 -221.2635879075791
-4116 2024  4.230773004945792
-4626 2024  0.83313447077432
-2025 2025  0.001044723928505841
-3155 2025 -1278.727802451221
-4120 2025  7.857436245259873
-4628 2025 -0.09699561503198664
-2026 2026  0.001165220306249647
-3171 2026 -258.8888628284263
-4124 2026  3.696321876416155
-4630 2026  0.9432303306922852
-2027 2027  0.009668964138980598
-3135 2027 -25.80608281993659
-4128 2027  4.980850774426043
-4632 2027 -0.723925405766279
-2028 2028  0.01083235501048359
-3135 2028 -27.74274388077859
-4132 2028  3.754470501609538
-4634 2028 -0.6821870226709527
-2029 2029  0.002676878284088942
-3161 2029 -312.8351201515704
-4136 2029  2.362308143022933
-4636 2029  0.4432101206282965
-2030 2030  0.001464397909435215
-3147 2030 -106.3422401541953
-4140 2030  8.646235660883518
-4638 2030  0.5743976794403722
-2031 2031  0.002343779129113723
-3147 2031 -166.413571525405
-4144 2031  2.978974753398158
-4640 2031 -0.8666912965737777
-2032 2032  0.0993118838893443
-3163 2032 -70.19438117818132
-4148 2032  9.893426968176996
-4642 2032  0.01095253479461104
-2033 2033  0.1155021826800087
-3163 2033 -78.02760680056846
-4152 2033  21.22166760392006
-4644 2033  0.004781083103017359
-2034 2034  0.02756698782311096
-2776 2034 -116.254058426669
-3178 2034  1.362279335536674
-4156 2034  0.1776859613752735
-2035 2035  0.0009578764182344008
-3012 2035 -1063.361714528966
-3182 2035  6.593989719295108
-4158 2035 -0.1686225023517153
-2036 2036  0.007018459542290348
-2776 2036 -128.7151420873748
-3186 2036  5.110489561178684
-4160 2036 -0.2294685995787264
-2037 2037  0.00759532575759598
-2776 2037 -134.6101055831586
-3190 2037  5.241250854683242
-4162 2037  0.1862365373886236
-2038 2038  0.02033916179016841
-2780 2038 -304.647627311909
-3194 2038  1.225211590363166
-4164 2038  0.1162526489952567
-2039 2039  0.004141325516279747
-2780 2039 -303.6755998197619
-3198 2039  9.150447768001063
-4166 2039  0.09090233254426537
-2040 2040  0.002178117174593526
-3048 2040 -1507.095428153633
-3202 2040  4.178709433689302
-4168 2040  0.08441252585843845
-2041 2041  0.01493946180636459
-2784 2041 -287.8253845063738
-3206 2041  1.118500047554219
-4170 2041  0.1569416237697656
-2042 2042  0.003585627113051674
-2784 2042 -310.99582024153
-3210 2042  5.635075703794728
-4172 2042 -0.1470971542430732
-2043 2043  0.003043495037890958
-2902 2043 -275.9084934756087
-3214 2043  9.472269231422434
-4174 2043 -0.1084103977076431
-2044 2044  0.03021750355380833
-2788 2044 -108.9523767055269
-3218 2044  1.066054919609428
-4176 2044  0.2458729163013723
-2045 2045  0.008023070608262042
-2946 2045 -117.5623798133825
-3222 2045  5.867844621528932
-4178 2045  0.1764937921731119
-2046 2046  0.01562602942879436
-2976 2046 -56.76693463695905
-3226 2046  6.307778305821778
-4180 2046  0.1767989964268323
-2047 2047  0.001893962458911336
-3070 2047 -1547.903118801705
-3230 2047  4.089469755570528
-4182 2047 -0.1043560294495194
-2048 2048  0.04008513148927602
-2792 2048 -47.01189367725382
-3234 2048  1.453282910153127
-4184 2048  0.3080625949669278
-2049 2049  0.007900795641140584
-3022 2049 -54.48808992798245
-3238 2049  8.754829522616353
-4186 2049  0.2360290859558374
-2050 2050  0.02822292740456577
-2792 2050 -93.79563538032262
-3242 2050  7.607025929898225
-4188 2050  0.04825762007543867
-2051 2051  0.005220083303881453
-3056 2051 -105.6218717573673
-3246 2051  7.209258592532089
-4190 2051 -0.2295458523844411
-2052 2052  0.02206704511270894
-3160 2052 -45.27355828678601
-3250 2052  8.260010647845204
-4192 2052  0.1115367935708566
-2053 2053  0.07477494866840419
-2796 2053 -71.48205026841438
-3254 2053  1.503944833656077
-4194 2053  0.1058671586234522
-2054 2054  0.04893194565742957
-2796 2054 -57.81317230630791
-3258 2054  1.756999795333312
-4196 2054  0.1550963396610254
-2055 2055  0.00298198818538526
-2804 2055 -281.3097209478202
-3262 2055  8.333025717575424
-4198 2055 -0.1358187924429873
-2056 2056  0.006104956663563518
-2960 2056 -137.1554046617416
-3266 2056  9.954437737461609
-4200 2056  0.1158998426965637
-2057 2057  0.01008184609791411
-2796 2057 -57.85806956991055
-3270 2057  8.985892277429645
-4202 2057 -0.1871670285025489
-2058 2058  0.002589589366922812
-2802 2058 -277.9291057772549
-3274 2058  13.63902685108462
-4204 2058  0.08441094784420153
-2059 2059  0.006756368132922738
-3016 2059 -164.7201130197417
-3278 2059  13.75844861280676
-4206 2059 -0.05562556027624922
-2060 2060  0.01287695285048797
-3072 2060 -33.81724303530061
-3282 2060  9.011062142512486
-4208 2060  0.2054208626437619
-2061 2061  0.01604516294524086
-2804 2061 -277.119091147535
-3286 2061  0.8138879164921292
-4210 2061  0.2480796714683461
-2062 2062  0.07139718376216436
-2808 2062 -36.88282535114031
-3290 2062  2.854904550605542
-4212 2062  0.1309999949189102
-2063 2063  0.07789140856387446
-2808 2063 -38.52362543915015
-3294 2063  2.382559920737088
-4214 2063  0.1348920395825686
-2064 2064  0.07846785118095698
-2808 2064 -38.66505399025309
-3298 2064  2.222986339792984
-4216 2064  0.1478411151739352
-2065 2065  0.05259636694181062
-2808 2065 -31.6350585093486
-3302 2065  2.912810030122295
-4218 2065  0.1914370186595753
-2066 2066  0.01561142725119222
-2808 2066 -38.08156604778259
-3306 2066  21.65848410135114
-4220 2066  0.08377148904147035
-2067 2067  0.001140936654369433
-3014 2067 -1239.624856043634
-3310 2067  7.602708839683275
-4222 2067  0.1043900787502424
-2068 2068  0.0392249822964684
-2818 2068 -111.4646540333997
-3314 2068  1.513344914714974
-4224 2068  0.1284506027739841
-2069 2069  0.03903060155704903
-2818 2069 -111.1745117377567
-3318 2069  1.361520831023814
-4226 2069  0.1449696716921745
-2070 2070  0.01516268397985598
-2828 2070 -142.2046129288638
-3322 2070  11.46100381309955
-4228 2070 -0.03875470100391585
-2071 2071  0.006913912857735371
-2818 2071 -103.4708021967054
-3326 2071  11.49437533928763
-4230 2071 -0.123830609841622
-2072 2072  0.02025965316108067
-2824 2072 -298.7359064250596
-3330 2072  1.216417974845579
-4232 2072  0.1105236630114776
-2073 2073  0.00451186806656099
-2902 2073 -336.8243978424858
-3334 2073  7.245181781754158
-4234 2073  0.09073099839660206
-2074 2074  0.004985618827914547
-2824 2074 -326.2824303807073
-3338 2074  7.871753304018089
-4236 2074  0.0825069857399373
-2075 2075  0.0521001814761122
-2828 2075 -119.199332372214
-3342 2075  0.9683564394347679
-4238 2075  0.1503007858341583
-2076 2076  0.01056133397428599
-2960 2076 -180.4753155990831
-3346 2076  11.42641600066333
-4240 2076 -0.04777819884013859
-2077 2077  0.00678516914651727
-3090 2077 -117.3115075315568
-3350 2077  9.866368768324078
-4242 2077  0.1242642220168503
-2078 2078  0.01674617104452671
-2832 2078 -272.0317439182016
-3354 2078  0.6902095968345779
-4244 2078  0.2721182940045235
-2079 2079  0.003957075002863835
-2832 2079 -336.2704669221371
-3358 2079  4.390097869341906
-4246 2079 -0.1829124999460475
-2080 2080  0.001707396604035941
-3054 2080 -1446.166309523558
-3362 2080  4.451290718579245
-4248 2080  0.1009613901942342
-2081 2081  0.03729470474745956
-2836 2081 -118.5292865869793
-3366 2081  1.94736180188326
-4250 2081  0.1076318904023976
-2082 2082  0.02493351554952283
-2836 2082 -96.75789985452708
-3370 2082  2.053730505034701
-4252 2082  0.1776243671061898
-2083 2083  0.007066492804996273
-2836 2083 -113.9099321998116
-3374 2083  11.87929266291417
-4254 2083 -0.1125400217472849
-2084 2084  0.0105883848494995
-2836 2084 -139.5923217119656
-3378 2084  14.8685191357142
-4256 2084  0.04628018070972438
-2085 2085  0.01512519439909087
-2842 2085 -273.2174821630666
-3382 2085  1.001843241790515
-4258 2085  0.1992142492979329
-2086 2086  0.004867897652356694
-2882 2086 -312.5285740232399
-3386 2086  3.893502727153444
-4260 2086  0.1559040304242874
-2087 2087  0.00224263695121669
-3048 2087 -1528.913791925641
-3390 2087  3.896863634483198
-4262 2087 -0.08915743596796895
-2088 2088  0.02313494457906885
-2846 2088 -111.1708407077942
-3394 2088  2.128348439022877
-4264 2088  0.1510415792376266
-2089 2089  0.02254346769173401
-2846 2089 -105.8746380386655
-3398 2089  1.530457443318091
-4266 2089  0.2527154403111643
-2090 2090  0.007010745916640551
-2846 2090 -135.0509367320167
-3402 2090  13.99685203813836
-4268 2090 -0.07966811853968624
-2091 2091  0.003848514944751526
-2846 2091 -100.0765173546339
-3406 2091  13.03167033082069
-4270 2091 -0.1978726929434511
-2092 2092  0.02153155584735624
-2852 2092 -351.7384079675031
-3410 2092  0.9606419660085704
-4272 2092  0.1190518291670926
-2093 2093  0.001028363329486931
-2944 2093 -1205.4064202328
-3414 2093  3.896453929673696
-4274 2093 -0.2460311055612411
-2094 2094  0.003421704678565958
-2852 2094 -309.8208477002044
-3418 2094  4.857233958126034
-4276 2094 -0.2103959172847824
-2095 2095  0.01178272631769443
-2974 2095 -63.00144864963377
-3422 2095  6.030018439318698
-4278 2095 -0.1823951579301308
-2096 2096  0.01707485299878989
-3100 2096 -33.35710566176917
-3426 2096  3.64070794816464
-4280 2096  0.4234806911341454
-2097 2097  0.01755433519751259
-3134 2097 -149.0690573659064
-3430 2097  10.1404814049545
-4282 2097 -0.03366812880804327
-2098 2098  0.001630753860045084
-2858 2098 -1326.699075158569
-3434 2098  4.692341990336677
-4284 2098  0.09589996761388657
-2099 2099  0.002330334855841668
-2858 2099 -1582.769498051866
-3438 2099  2.455950934716126
-4286 2099 -0.1192055091852236
-2100 2100  0.001843266313745345
-2942 2100 -1408.138756167021
-3442 2100  4.482752371805137
-4288 2100 -0.0913967464088717
-2101 2101  0.001825192537572441
-3038 2101 -1679.799948366659
-3446 2101  3.513507003847669
-4290 2101  0.1067652808173516
-2102 2102  0.001762896847965685
-3174 2102 -1348.72810908421
-3450 2102  5.121634542677966
-4292 2102 -0.08642839304797841
-2103 2103  0.006836889974004559
-2864 2103 -70.28451441984423
-3454 2103  13.81936647694512
-4294 2103  0.1224913336251501
-2104 2104  0.007007925568758736
-2864 2104 -174.5511038366085
-3458 2104  6.108252012876951
-4296 2104  0.1232115838138538
-2105 2105  0.005873616575094122
-2864 2105 -163.2629558170631
-3462 2105  6.61797598861204
-4298 2105  0.1458229277753334
-2106 2106  0.005610435401023395
-3148 2106 -127.8803524612996
-3466 2106  6.18395224060007
-4300 2106  0.1869994138422493
-2107 2107  0.004351703339650936
-2866 2107 -343.8282467755249
-3470 2107  5.425768165721768
-4302 2107  0.1243626259089881
-2108 2108  0.004232437836851164
-2866 2108 -340.8237853887593
-3474 2108  5.015211698687797
-4304 2108 -0.149642205461953
-2109 2109  0.00555280640673149
-2882 2109 -336.3627989515738
-3478 2109  3.742160235640967
-4306 2109 -0.1210270813360829
-2110 2110  0.03541685358970547
-2906 2110 -16.282455472127
-3482 2110  7.053894690456646
-4308 2110  0.181244990753315
-2111 2111  0.008236509823014474
-2870 2111 -8.765297393525904
-3486 2111  5.361468130704726
-4310 2111 -2.099407317666164
-2112 2112  0.009223257182620324
-2870 2112 -9.210837529294329
-3490 2112  4.513828312547935
-4312 2112 -2.099046152908578
-2113 2113  0.01050608747623768
-2870 2113 -8.314922294229779
-3494 2113  4.366310261693636
-4314 2113 -2.3232191764992
-2114 2114  0.01062497489801806
-2870 2114 -8.361552482805211
-3498 2114  3.956994704656382
-4316 2114 -2.229759078647565
-2115 2115  0.007948242671805029
-2870 2115 -10.50121760524715
-3502 2115  4.264900917453799
-4318 2115 -2.820119469264003
-2116 2116  0.002148314583414475
-2922 2116 -1351.12522936696
-3506 2116  3.247381743706517
-4320 2116 -0.1064731526095451
-2117 2117  0.03685169509823494
-2906 2117 -16.55199492375567
-3510 2117  6.185562461268164
-4322 2117  0.2139666338910703
-2118 2118  0.0137906417920731
-2884 2118 -19.04736438576279
-3514 2118  28.06040217986433
-4324 2118  0.1019487335374342
-2119 2119  0.009303998751781386
-2884 2119 -39.1308766868132
-3518 2119  12.01425377570906
-4326 2119 -0.2061528425704132
-2120 2120  0.02091228195574186
-2978 2120 -63.98358437997059
-3522 2120  5.945418404127518
-4328 2120 -0.1148860314163944
-2121 2121  0.008627475835805955
-2998 2121 -96.50997380438551
-3526 2121  9.103384773113829
-4330 2121 -0.1330170041161471
-2122 2122  0.009649886890828659
-3030 2122 -85.04968168489033
-3530 2122  9.438888577223668
-4332 2122 -0.1073622229111081
-2123 2123  0.01873664327863148
-2980 2123 -156.3984297145197
-3534 2123  6.190391211677775
-4334 2123 -0.05678315093978254
-2124 2124  0.005476574489081974
-3092 2124 -404.166050448459
-3538 2124  6.656168722454082
-4336 2124 -0.07021960246037766
-2125 2125  0.004729539689689114
-2890 2125 -341.0931018868103
-3542 2125  5.491666775418012
-4338 2125 -0.1186609141678331
-2126 2126  0.00366901957924182
-2890 2126 -301.9003736908056
-3546 2126  7.686660379443556
-4340 2126  0.1329277299929537
-2127 2127  0.002780578732839252
-3012 2127 -1809.384404896597
-3550 2127  5.61223414210605
-4342 2127 -0.04036045463481888
-2128 2128  0.002258023136854922
-3052 2128 -1583.280905696588
-3554 2128  4.159434126999496
-4344 2128  0.07225182247031578
-2129 2129  0.001932242375185583
-2968 2129 -1412.362136880344
-3558 2129  4.024185540800096
-4346 2129 -0.100034192619883
-2130 2130  0.002074025323621762
-2894 2130 -1512.250759457661
-3562 2130  3.218684483675813
-4348 2130 -0.1114576459576665
-2131 2131  0.01222844151572613
-2980 2131 -126.3034594719376
-3566 2131  7.954416979677193
-4350 2131  0.07850380857557879
-2132 2132  0.0006414415360330333
-2898 2132 -1021.939107874584
-3570 2132  3.17852537231288
-4352 2132  0.4916885932583254
-2133 2133  0.001414845742753739
-2898 2133 -1512.245711784602
-3574 2133  2.945059870904722
-4354 2133 -0.1586572171452398
-2134 2134  0.02207500168266791
-2906 2134 -12.85873395711873
-3578 2134  8.080073804395154
-4356 2134  0.3408694718928448
-2135 2135  0.0005029038477193737
-3142 2135 -949.4160584467323
-3582 2135  3.66926937307163
-4358 2135 -0.5859732481780078
-2136 2136  0.006006331416365539
-2902 2136 -334.71993668788
-3586 2136  8.170584099729947
-4360 2136  0.07509815561251482
-2137 2137  0.1602211799218243
-2906 2137 -4.68570583775738
-3590 2137  2.778013614142899
-4362 2137 -0.3323630168187701
-2138 2138  0.05302712387955065
-2906 2138 -6.569471674738725
-3594 2138  6.924495998814408
-4364 2138 -0.3680998962405606
-2139 2139  0.07200676148810309
-2906 2139 -7.378806665030903
-3598 2139  3.451189611415207
-4366 2139 -0.4581176150659059
-2140 2140  0.05962100119453918
-2906 2140 -6.726108868932145
-3602 2140  5.746025815755991
-4368 2140 -0.3724327269861961
-2141 2141  0.06956589248662981
-2906 2141 -6.982763315138396
-3606 2141  3.307664437943429
-4370 2141 -0.5067141365537682
-2142 2142  0.07023826370427121
-2906 2142 -7.016413577398498
-3610 2142  3.209214983593041
-4372 2142 -0.5042260023462158
-2143 2143  0.05871777369397748
-2906 2143 -6.151213853839583
-3614 2143  5.612374640258077
-4374 2143 -0.3677359737660884
-2144 2144  0.008602802752313498
-3090 2144 -131.9375715502208
-3618 2144  2.985129676088162
-4376 2144 -0.2998672521788097
-2145 2145  0.003108182426433724
-3154 2145 -1668.568919153097
-3622 2145  5.456336436211689
-4378 2145 -0.03257068910337899
-2146 2146  0.009265772232708086
-2924 2146 -9.759370161367945
-3626 2146  2.233014253505581
-4380 2146 -3.833955781960661
-2147 2147  0.01535943325292646
-2924 2147 -3.824469467733722
-3630 2147  1.985197878319526
-4382 2147 -6.252920651221183
-2148 2148  0.006608622451082367
-2924 2148 -22.0316225896654
-3634 2148  1.62655302961875
-4384 2148 -3.542334556735019
-2149 2149  0.005900417463837842
-2924 2149 -19.89518259071058
-3638 2149  2.124763369071786
-4386 2149 -3.407349241168243
-2150 2150  0.01023798290573628
-2924 2150 -93.26783624734638
-3642 2150  4.699502919587786
-4388 2150  0.1708955495131764
-2151 2151  0.004903868312070231
-3088 2151 -1875.357477626669
-3646 2151  3.482112602331831
-4390 2151 -0.03036476754524816
-2152 2152  0.00778313097281933
-2974 2152 -51.36676192902856
-3650 2152  14.16495761512215
-4392 2152  0.1578014166231115
-2153 2153  0.001652995322303571
-3060 2153 -1384.758308839667
-3654 2153  5.483147129036918
-4394 2153 -0.09193233690226214
-2154 2154  0.001812794973973293
-2936 2154 -214.5852170173555
-3658 2154  13.13678521227168
-4396 2154 -0.1683908418346857
-2155 2155  0.003304023126536034
-3130 2155 -399.3472933098862
-3662 2155  6.819550808326079
-4398 2155 -0.116110806968134
-2156 2156  0.009671103241031176
-3164 2156 -94.29553748866022
-3666 2156  3.716088592076674
-4400 2156  0.2497754017512982
-2157 2157  0.004280615664270764
-3066 2157 -354.5380781683582
-3670 2157  5.213544230958012
-4402 2157 -0.1293710693667469
-2158 2158  0.002931883492565051
-3146 2158 -289.7082847871828
-3674 2158  5.360504399026522
-4404 2158  0.2293733406665809
-2159 2159  0.008254928556956509
-3000 2159 -126.9054310671548
-3678 2159  8.34943451090035
-4406 2159 -0.1009386461263404
-2160 2160  0.00169962922165525
-3014 2160 -1513.186311996676
-3682 2160  3.919110652589746
-4408 2160 -0.1309259113047055
-2161 2161  0.02142420942973285
-2976 2161 -66.74817920758088
-3686 2161  11.18534751112075
-4410 2161  0.05669785722272903
-2162 2162  0.007592017929505716
-2946 2162 -114.4793517963469
-3690 2162  4.679317810446525
-4412 2162 -0.2486847965028564
-2163 2163  0.01239979486593527
-3160 2163 -33.91929258744634
-3694 2163  10.96265801745419
-4414 2163  0.1940686199464453
-2164 2164  0.0008416286799556312
-2948 2164 -1222.524005166853
-3698 2164  8.062081654679709
-4416 2164  0.1044236949304687
-2165 2165  0.002872457779933695
-2948 2165 -2603.281852933697
-3702 2165  1.251333503471155
-4418 2165 -0.1170091683665216
-2166 2166  0.008928425033709206
-3074 2166 -114.9637094411854
-3706 2166  4.888571989846326
-4420 2166  0.1612239847733691
-2167 2167  0.00933669679110603
-2950 2167 -121.8957475389813
-3710 2167  4.206946386074862
-4422 2167  0.181437742187558
-2168 2168  0.005754627427048083
-3162 2168 -327.4234482652935
-3714 2168  3.930409653834767
-4424 2168  0.1150695611031082
-2169 2169  0.006397495182738116
-2952 2169 -6.24756560403434
-3718 2169  5.368197347599874
-4426 2169 -3.573289293030423
-2170 2170  0.00412519236349653
-2952 2170 -18.89808564694528
-3722 2170  3.784571749614252
-4428 2170 -2.916757791234899
-2171 2171  0.004047158334189482
-2952 2171 -20.62915659632829
-3726 2171  3.411100769889254
-4430 2171 -2.948494780789273
-2172 2172  0.002132344920593365
-2964 2172 -1506.397431951823
-3730 2172  3.087002648539354
-4432 2172 -0.1031938954442674
-2173 2173  0.007972301064410235
-2952 2173 -170.023137662953
-3734 2173  4.650926558563436
-4434 2173 -0.1521679999018858
-2174 2174  0.008695987757307259
-2960 2174 -45.7592083489813
-3738 2174  4.858814960201284
-4436 2174 -0.5529352148144631
-2175 2175  0.001178830356129778
-3156 2175 -1346.893561357031
-3742 2175  5.009999875255914
-4438 2175 -0.1448367639545626
-2176 2176  0.005211806473068219
-2966 2176 -347.1985023913132
-3746 2176  6.114912483766074
-4440 2176 -0.08952016961860668
-2177 2177  0.001920170686230696
-3060 2177 -1480.003424652446
-3750 2177  4.567303761279918
-4442 2177 -0.08472478198026717
-2178 2178  0.006704128721188893
-3150 2178 -108.7083889381549
-3754 2178  8.096733468146425
-4444 2178  0.1421494758907888
-2179 2179  0.002237503450970262
-3174 2179 -1519.469895890335
-3758 2179  4.213824656027188
-4446 2179 -0.07706700524990751
-2180 2180  0.001045466262769684
-2970 2180 -1166.753340022242
-3762 2180  5.680389346472349
-4448 2180  0.1444473955056696
-2181 2181  0.01245643296977996
-3020 2181 -32.32185579454295
-3766 2181  8.680597597954014
-4450 2181  0.2255429562296098
-2182 2182  0.001726283305420655
-3058 2182 -1436.006295719475
-3770 2182  3.127037822968334
-4452 2182 -0.151870667599087
-2183 2183  0.006058149703100444
-3122 2183 -313.4339444900776
-3774 2183  7.963542386205262
-4454 2183 -0.06474292354903258
-2184 2184  0.004750113430816235
-3132 2184 -300.1393535459937
-3778 2184  5.42788620866588
-4456 2184  0.1133867662473916
-2185 2185  0.001139072744127021
-3142 2185 -1428.191459747777
-3782 2185  5.878917673856323
-4458 2185  0.1015023800057121
-2186 2186  0.0140099358624532
-2976 2186 -54.00562981105288
-3786 2186  4.67546375941967
-4460 2186 -0.264586927583474
-2187 2187  0.01648814929963461
-3008 2187 -185.2524967505786
-3790 2187  7.522783604978186
-4462 2187  0.03932985943560216
-2188 2188  0.007739884657506192
-3056 2188 -128.594774536177
-3794 2188  5.430042622363441
-4464 2188  0.1733751342852024
-2189 2189  0.01107704644766072
-2978 2189 -29.28485320635267
-3798 2189  24.01494552657894
-4466 2189  0.104793236898054
-2190 2190  0.02770173624585955
-2978 2190 -20.84064754660397
-3802 2190  10.64775227529997
-4468 2190  0.1437033518738441
-2191 2191  0.01765991222836373
-2978 2191 -36.23593022245935
-3806 2191  13.36990297318379
-4470 2191  0.112469362197627
-2192 2192  0.01267346837506577
-3030 2192 -94.41087219520271
-3810 2192  8.861752777781344
-4472 2192 -0.07884724198772149
-2193 2193  0.003114487333054934
-2986 2193 -1519.09897684033
-3814 2193  5.355974370748386
-4474 2193 -0.04271857028849171
-2194 2194  0.003695072696773231
-2986 2194 -1656.528852030899
-3818 2194  6.185139221633936
-4476 2194  0.02752220267509899
-2195 2195  0.006392453421471818
-3000 2195 -111.9988300228975
-3822 2195  6.809172415733807
-4478 2195  0.1929820576103272
-2196 2196  0.001883726199993154
-3032 2196 -1596.142391375567
-3826 2196  3.09008802429738
-4480 2196  0.1250493243013468
-2197 2197  0.005400958558659237
-2988 2197 -355.4846212830439
-3830 2197  4.499074366596648
-4482 2197  0.1148699713737675
-2198 2198  0.001065127518678436
-3170 2198 -1143.60129135068
-3834 2198  4.804865036708031
-4484 2198 -0.1615176016313629
-2199 2199  0.003398276739297626
-2992 2199 -1753.0575043452
-3838 2199  5.806460716784121
-4486 2199 -0.03688188469150969
-2200 2200  0.01373119140820175
-3160 2200 -35.70217191761186
-3842 2200  7.199818137765514
-4488 2200  0.2532649928913371
-2201 2201  0.001800943508559765
-2996 2201 -1506.533916325817
-3846 2201  1.926406935099201
-4490 2201 -0.1982479163957737
-2202 2202  0.001215285839406951
-3124 2202 -1232.001050341556
-3850 2202  3.965156627189976
-4492 2202 -0.1624424533450675
-2203 2203  0.003445552970141769
-2998 2203 -52.15381884035941
-3854 2203  31.00680216081683
-4494 2203  0.1742097470834353
-2204 2204  0.1319138366683021
-2998 2204 -18.93525889233717
-3858 2204  12.60241045455382
-4496 2204  0.02300115730517048
-2205 2205  0.02390999892652716
-2998 2205 -80.38564175531182
-3862 2205  15.41343460037283
-4498 2205  0.034635368896544
-2206 2206  0.007563606211267081
-2998 2206 -91.28444390040092
-3866 2206  3.832594921604338
-4500 2206 -0.3911929626725137
-2207 2207  0.004660300321219048
-3158 2207 -303.9354889996754
-3870 2207  4.159018631162431
-4502 2207 -0.1883580091188502
-2208 2208  0.0004449483101018156
-3006 2208 -1016.038939862732
-3874 2208  14.40382534397125
-4504 2208  0.1530160829243506
-2209 2209  0.001058231373611423
-3006 2209 -2116.689327493415
-3878 2209  3.015252189670739
-4506 2209 -0.1612767817800078
-2210 2210  0.0006992246679674015
-3038 2210 -1043.800098785419
-3882 2210  3.00679695511697
-4508 2210 -0.5300049615928422
-2211 2211  0.001476771414936002
-3098 2211 -1306.498645974566
-3886 2211  5.149209964509931
-4510 2211 -0.1104715392797194
-2212 2212  0.004400588120779132
-3010 2212 -324.405753550522
-3890 2212  4.306287197091149
-4512 2212  0.1611418556604022
-2213 2213  0.00860441933472752
-3042 2213 -56.95477588403464
-3894 2213  9.801414691082627
-4514 2213  0.179026380922564
-2214 2214  0.002145009099397701
-3170 2214 -1622.883696919502
-3898 2214  2.948617038264048
-4516 2214  0.1094120254077982
-2215 2215  0.004460848311151768
-3016 2215 -67.84254400099019
-3902 2215  14.64089060417686
-4518 2215  0.1880559437330346
-2216 2216  0.004557092943777746
-3130 2216 -456.729207514713
-3906 2216  8.974758681364245
-4520 2216 -0.05312771678709906
-2217 2217  0.01450982405114899
-3016 2217 -193.8165634075667
-3910 2217  2.541648003046831
-4522 2217 -0.1325528534387045
-2218 2218  0.005157071009933244
-3020 2218 -19.59324371202498
-3914 2218  28.46767710922084
-4524 2218  0.2730372422608475
-2219 2219  0.02612786147197997
-3020 2219 -19.41412540093749
-3918 2219  6.6028144804622
-4526 2219 -0.2765679639928379
-2220 2220  0.02478823388272888
-3020 2220 -18.56084480886999
-3922 2220  4.975973020678023
-4528 2220 -0.4251407410310105
-2221 2221  0.02142613560740973
-3020 2221 -16.81796495425821
-3926 2221  4.220607505708247
-4530 2221 -0.5372060661120606
-2222 2222  0.0427789483906531
-3040 2222 -44.43020643761637
-3930 2222  5.614276394514303
-4532 2222 -0.07388768653768076
-2223 2223  0.0234935444047887
-3020 2223 -44.64235389450946
-3934 2223  3.006008494629031
-4534 2223  0.2566908442587441
-2224 2224  0.01322498499950834
-3160 2224 -34.58560243700298
-3938 2224  8.023915727134943
-4536 2224 -0.275051179212371
-2225 2225  0.005998510819612981
-3030 2225 -38.53172034872582
-3942 2225  13.68747505294833
-4538 2225  0.2617645898062311
-2226 2226  0.007812900721490596
-3040 2226 -18.52397709846915
-3946 2226  16.66976710007537
-4540 2226  0.2828527186687306
-2227 2227  0.001678768597205332
-3062 2227 -1350.897822356165
-3950 2227  7.311204366461816
-4542 2227  0.06450870336342215
-2228 2228  0.009275613422783751
-3040 2228 -20.32550666111602
-3954 2228  13.07420285267797
-4544 2228  0.3020557569469072
-2229 2229  0.01016168905078159
-3040 2229 -31.17736956735129
-3958 2229  8.467858774888629
-4546 2229 -0.4149278867504431
-2230 2230  0.00996722449751054
-3040 2230 -30.83346999517691
-3962 2230  7.780959123178358
-4548 2230 -0.4419310962134438
-2231 2231  0.03948697092465923
-3040 2231 -42.77542700610918
-3966 2231  7.508760202322218
-4550 2231  0.05570908603588222
-2232 2232  0.001677191552912289
-3054 2232 -1428.515318978645
-3970 2232  4.168085658793879
-4552 2232 -0.114079576000751
-2233 2233  0.002027970847867968
-3146 2233 -242.1780927811974
-3974 2233  6.255402866825885
-4554 2233 -0.3347632371841684
-2234 2234  0.001537781149854484
-3052 2234 -1303.604343599536
-3978 2234  6.071785803494002
-4556 2234 -0.09046043792228446
-2235 2235  0.01108540321534985
-3164 2235 -102.3315584238236
-3982 2235  5.339475922255236
-4558 2235  0.1483671939561078
-2236 2236  0.001409923238876857
-3070 2236 -1333.656774685993
-3986 2236  5.311692012444222
-4560 2236 -0.1158650579574214
-2237 2237  0.01577747778749642
-3160 2237 -38.23756381734963
-3990 2237  6.756753262704435
-4562 2237  0.2229181676645297
-2238 2238  0.002314762263413514
-3062 2238 -1591.552719889067
-3994 2238  2.951842078980999
-4564 2238 -0.1086062631708077
-2239 2239  0.001768071828684483
-3126 2239 -1384.784060943572
-3998 2239  4.313636411491303
-4566 2239  0.09873935578019329
-2240 2240  0.002309866592158181
-3064 2240 -1561.56620828576
-4002 2240  3.53773728948649
-4568 2240 -0.09297121279972072
-2241 2241  0.006737745131675257
-3066 2241 -238.5976480186172
-4006 2241  6.391215314600386
-4570 2241  0.1005481075389927
-2242 2242  0.003851572691296312
-3128 2242 -316.8133947953895
-4010 2242  7.904228210941027
-4572 2242 -0.1107176446094387
-2243 2243  0.01207958502317402
-3072 2243 -32.29231243943666
-4014 2243  11.09669376957527
-4574 2243  0.1812633458304332
-2244 2244  0.08054258955875965
-3072 2244 -4.08462782159169
-4018 2244  5.359216924491829
-4576 2244 -0.3743197183632451
-2245 2245  0.03578117245295978
-3072 2245 -13.92596672443947
-4022 2245  4.316855635510668
-4578 2245 -0.4207031498800982
-2246 2246  0.0368359311789389
-3072 2246 -14.22224897443281
-4026 2246  4.726445981799073
-4580 2246 -0.3589957638732138
-2247 2247  0.03116076015736728
-3072 2247 -12.94220849005653
-4030 2247  4.245709838291275
-4582 2247 -0.4669064306323872
-2248 2248  0.03127325765942
-3072 2248 -12.95883236672648
-4034 2248  4.154176556399479
-4584 2248 -0.4733234783700364
-2249 2249  0.007143262340257282
-3074 2249 -103.0305453695494
-4038 2249  7.399370929771859
-4586 2249  0.1603305174549954
-2250 2250  0.01064154757997323
-3074 2250 -123.3658773366547
-4042 2250  4.221315980120219
-4588 2250  0.1358268290729027
-2251 2251  0.002359860273289677
-3086 2251 -212.104160974466
-4046 2251  16.56692854796862
-4590 2251  0.08923648311862642
-2252 2252  0.001086849801189867
-3094 2252 -160.9760242098117
-4050 2252  1.876776993784172
-4592 2252 -3.455017245353995
-2253 2253  0.01577200707147027
-3136 2253 -92.01703556881691
-4054 2253  3.081655102447302
-4594 2253 -0.1994726820739341
-2254 2254  0.008627090183268422
-3100 2254 -6.210870714987588
-4058 2254  3.707071367433247
-4596 2254 -3.656761234719059
-2255 2255  0.007620531270145355
-3100 2255 -6.513390408795219
-4062 2255  5.200657162878421
-4598 2255 -3.798595591223752
-2256 2256  0.009230750663652134
-3100 2256 -4.640420835507132
-4066 2256  5.533488946326441
-4600 2256 -3.482295420824327
-2257 2257  0.009628675897545392
-3100 2257 -4.738916599895016
-4070 2257  4.168910055156443
-4602 2257 -3.913587096844168
-2258 2258  0.008026467514725346
-3100 2258 -4.118661769723557
-4074 2258  7.127509675415745
-4604 2258 -3.355021230629743
-2259 2259  0.009400950653362484
-3100 2259 -4.457141098502226
-4078 2259  5.54551145364278
-4606 2259 -3.540289155598937
-2260 2260  0.009115319160917641
-3100 2260 -3.898893283827271
-4082 2260  5.889447827116065
-4608 2260 -3.576670360769923
-2261 2261  0.006183100582157816
-3132 2261 -337.8295892423019
-4086 2261  3.371992084196615
-4610 2261 -0.1365022792253678
-2262 2262  0.001982202309389708
-3154 2262 -1331.391267344377
-4090 2262  3.419875832853283
-4612 2262 -0.1226762451022061
-2263 2263  0.001722020163455822
-3116 2263 -1422.759825760848
-4094 2263  3.489728875483112
-4614 2263 -0.125616171688122
-2264 2264  0.001488490442629464
-3118 2264 -1285.913026873232
-4098 2264  6.35673096494675
-4616 2264 -0.09093864764125277
-2265 2265  0.001808133082679544
-3118 2265 -1416.901510606822
-4102 2265  9.085015566293682
-4618 2265 -0.04803578302891041
-2266 2266  0.008622786701664917
-3122 2266 -374.8291340849071
-4106 2266  9.53046694058189
-4620 2266 -0.03343584586800835
-2267 2267  0.001791661921895718
-3124 2267 -1495.038796569758
-4110 2267  4.142922682425776
-4622 2267  0.09567817198065209
-2268 2268  0.009192577866867101
-3150 2268 -125.4140836404946
-4114 2268  14.33869690694082
-4624 2268  0.05102946065995515
-2269 2269  0.004555864744459201
-3130 2269 -272.8671228494698
-4118 2269  8.96941517920289
-4626 2269  0.08226341234101277
-2270 2270  0.0008465650095658808
-3156 2270 -1141.695109156205
-4122 2270  7.188740199011378
-4628 2270  0.1479853343471768
-2271 2271  0.0119801205028469
-3172 2271 -447.3631435643646
-4126 2271  9.415073365629931
-4630 2271  0.0208759382523774
-2272 2272  0.02492113327360181
-3136 2272 -30.20856579477716
-4130 2272  4.497507478986273
-4632 2272 -0.2824387372225564
-2273 2273  0.02142005947727858
-3136 2273 -28.4454237197186
-4134 2273  3.634792913757748
-4634 2273 -0.364407399913329
-2274 2274  0.006100055330023413
-3162 2274 -335.1570659698852
-4138 2274  3.326033848414284
-4636 2274 -0.1308230800711792
-2275 2275  0.002971289788738846
-3148 2275 -98.27206692230219
-4142 2275  15.58372198199141
-4638 2275  0.1680855058326074
-2276 2276  0.009641291447598741
-3148 2276 -218.9673369039903
-4146 2276  2.213150142494187
-4640 2276 -0.2411266255595946
-2277 2277  0.003455105439651116
-3164 2277 -22.04467690843927
-4150 2277  2.683371796432915
-4642 2277 -3.811722119467089
-2278 2278  0.002455886901999718
-3164 2278 -19.15705378024538
-4154 2278  4.758341632139925
-4644 2278 -4.273666966017419
-2279 2279  0.1119239523365642
-4155 2279 -30.64257011981108
-2280 2280  0.1344983767680533
-4156 2280 -15.90706277519701
-2281 2281  0.05725605428029161
-4157 2281 -39.31435081710389
-2282 2282  0.05216171918870285
-4158 2282 -53.45947004812159
-2283 2283  0.1539694825415593
-4159 2283 -12.56288032466136
-2284 2284  0.1493781664609394
-4160 2284 -13.62587967120271
-2285 2285  0.1275567881938274
-4161 2285 -14.84344573266865
-2286 2286  0.1221572123415935
-4162 2286 -17.5348042025758
-2287 2287  0.05457593896858125
-4163 2287 -65.95158912186295
-2288 2288  0.06321218681720535
-4164 2288 -38.93574330129159
-2289 2289  0.06186778636566236
-4165 2289 -46.43966016640264
-2290 2290  0.06579250414222838
-4166 2290 -37.97891276037357
-2291 2291  0.07459638214306399
-4167 2291 -32.44683207965974
-2292 2292  0.07089844289995044
-4168 2292 -39.5091491257876
-2293 2293  0.1132257544445178
-4169 2293 -31.1299517046727
-2294 2294  0.1311574879890062
-4170 2294 -18.48285697955544
-2295 2295  0.1250942117543966
-4171 2295 -13.83350837304568
-2296 2296  0.1124796744211208
-4172 2296 -20.00745980162628
-2297 2297  0.1091433013247975
-4173 2297 -22.76489333995555
-2298 2298  0.1068428344512747
-4174 2298 -23.20868313310848
-2299 2299  0.1950373653598296
-4175 2299 -16.39949055853167
-2300 2300  0.234442695629943
-4176 2300 -8.433072185518855
-2301 2301  0.09286348782294103
-4177 2301 -24.12392710831048
-2302 2302  0.09295081632350155
-4178 2302 -23.44721624588622
-2303 2303  0.1160167901159831
-4179 2303 -18.65225809968672
-2304 2304  0.1195639195240917
-4180 2304 -16.41237497028351
-2305 2305  0.08227428276048966
-4181 2305 -26.1956792336746
-2306 2306  0.07476050888274532
-4182 2306 -36.11162541606858
-2307 2307  0.2549964732720557
-4183 2307 -12.26843876024679
-2308 2308  0.3147948877353467
-4184 2308 -5.60785504010041
-2309 2309  0.1499916722253693
-4185 2309 -12.72775567793986
-2310 2310  0.1510934076781992
-4186 2310 -13.02604478229861
-2311 2311  0.02229437531027123
-4187 2311 -129.8752387098407
-2312 2312  0.02512228842644491
-4188 2312 -86.07489921163659
-2313 2313  0.05259461324228189
-4189 2313 -41.9565272197917
-2314 2314  0.05122115361898756
-4190 2314 -45.97094678900272
-2315 2315  0.06314786189660278
-4191 2315 -33.18669707365662
-2316 2316  0.06454617756571765
-4192 2316 -31.12915756520504
-2317 2317  0.02451291342972035
-4193 2317 -142.3054925634301
-2318 2318  0.03023588270107117
-4194 2318 -68.22390452124144
-2319 2319  0.06515053384464752
-4195 2319 -53.6144848893498
-2320 2320  0.08036398379489164
-4196 2320 -25.16934902030714
-2321 2321  0.1207354096769008
-4197 2321 -16.85797529931353
-2322 2322  0.111247620215162
-4198 2322 -22.25527628315144
-2323 2323  0.0598464422176723
-4199 2323 -31.89341246694178
-2324 2324  0.05883639375784257
-4200 2324 -34.63780504233422
-2325 2325  0.1041261644708809
-4201 2325 -19.56367976914622
-2326 2326  0.1059349161469206
-4202 2326 -18.10501936478283
-2327 2327  0.1067434958432255
-4203 2327 -23.14455148007878
-2328 2328  0.107783758246107
-4204 2328 -22.69925480783547
-2329 2329  0.1318433256281249
-4205 2329 -17.70174015534158
-2330 2330  0.1023926470171234
-4206 2330 -21.45404488617399
-2331 2331  0.1174606480262585
-4207 2331 -17.63733576375392
-2332 2332  0.1370406035465139
-4208 2332 -11.7352471887321
-2333 2333  0.2598244585599962
-4209 2333 -12.13240977183652
-2334 2334  0.3011457380652403
-4210 2334 -7.139021306502358
-2335 2335  0.03529553863313488
-4211 2335 -107.2257743582197
-2336 2336  0.04440104766664882
-4212 2336 -47.10985428696654
-2337 2337  0.03807030882715409
-4213 2337 -99.89329952085217
-2338 2338  0.04789165040582281
-4214 2338 -43.849780178493
-2339 2339  0.04523501309428279
-4215 2339 -82.05099007645858
-2340 2340  0.05690513248832715
-4216 2340 -35.88141193511292
-2341 2341  0.08484703303098966
-4217 2341 -43.98879537092815
-2342 2342  0.1067434556159296
-4218 2342 -18.96707230539594
-2343 2343  0.05359243482324032
-4219 2343 -56.56046644171085
-2344 2344  0.06317838635427207
-4220 2344 -33.41745949104828
-2345 2345  0.06964684548079618
-4221 2345 -30.17627002650658
-2346 2346  0.06090938912384517
-4222 2346 -49.61405821049816
-2347 2347  0.05047735368336375
-4223 2347 -71.73978108877516
-2348 2348  0.06065143810652002
-4224 2348 -36.41609607105951
-2349 2349  0.06527171143018294
-4225 2349 -54.57250563862199
-2350 2350  0.07842905683946086
-4226 2350 -27.55492217173241
-2351 2351  0.01648867989436317
-4227 2351 -141.187307118909
-2352 2352  0.01645744857919768
-4228 2352 -139.724744622845
-2353 2353  0.04140589344375343
-4229 2353 -54.42625553131974
-2354 2354  0.04137477168029266
-4230 2354 -53.63654595730242
-2355 2355  0.05024271505377079
-4231 2355 -72.83364223501292
-2356 2356  0.05819292819446779
-4232 2356 -43.25696538963752
-2357 2357  0.08644731178445782
-4233 2357 -27.39216973415159
-2358 2358  0.0881774354285155
-4234 2358 -27.18217755332223
-2359 2359  0.07854956718479483
-4235 2359 -29.56194702569874
-2360 2360  0.08088629591679292
-4236 2360 -29.09591654321273
-2361 2361  0.06196352242558906
-4237 2361 -49.02162975759376
-2362 2362  0.07445393853436093
-4238 2362 -24.45068261782124
-2363 2363  0.02227190700963811
-4239 2363 -89.42768025930282
-2364 2364  0.02210796656752718
-4240 2364 -93.18051438131391
-2365 2365  0.03053677007493292
-4241 2365 -67.3680095180724
-2366 2366  0.03055031791921223
-4242 2366 -69.2391511776303
-2367 2367  0.3227402014045704
-4243 2367 -9.491059373455291
-2368 2368  0.3742462296885684
-4244 2368 -5.581731936811549
-2369 2369  0.1495189977985396
-4245 2369 -12.98416731930361
-2370 2370  0.1396576915609483
-4246 2370 -16.37502476196387
-2371 2371  0.0959600196445011
-4247 2371 -24.74624389281993
-2372 2372  0.09160013610325606
-4248 2372 -29.26099129371331
-2373 2373  0.03238831682088406
-4249 2373 -116.5520245666077
-2374 2374  0.03891597628974513
-4250 2374 -60.67747015800169
-2375 2375  0.1040498288515364
-4251 2375 -35.17526953063072
-2376 2376  0.1250335344962033
-4252 2376 -17.85544888919775
-2377 2377  0.07354424421148734
-4253 2377 -33.99037882808334
-2378 2378  0.07573419554041499
-4254 2378 -29.8305087797851
-2379 2379  0.03740432360218568
-4255 2379 -71.94298278001283
-2380 2380  0.03950027041496559
-4256 2380 -61.46922915998202
-2381 2381  0.1731743135770299
-4257 2381 -18.66777584361901
-2382 2382  0.2006420120224123
-4258 2382 -11.00044392597633
-2383 2383  0.123989508586981
-4259 2383 -17.48331613863921
-2384 2384  0.1249604166473383
-4260 2384 -17.19494524360543
-2385 2385  0.07784203640661237
-4261 2385 -30.70003257734891
-2386 2386  0.07330592731963612
-4262 2386 -36.85462203202088
-2387 2387  0.08028777006742899
-4263 2387 -47.65157494137991
-2388 2388  0.0964743360402416
-4264 2388 -25.01333265604174
-2389 2389  0.2190258722049764
-4265 2389 -15.75639783117426
-2390 2390  0.2633272411692072
-4266 2390 -8.087445797244316
-2391 2391  0.06957909845609414
-4267 2391 -36.17015837528741
-2392 2392  0.07030012238977493
-4268 2392 -33.28326480640938
-2393 2393  0.09973395782747847
-4269 2393 -26.11298173680401
-2394 2394  0.1031681699564553
-4270 2394 -22.92312256359989
-2395 2395  0.05962657655380851
-4271 2395 -56.97774952449042
-2396 2396  0.06906218356084276
-4272 2396 -33.52104777869483
-2397 2397  0.04571341958096362
-4273 2397 -52.94756993572931
-2398 2398  0.04323898123382816
-4274 2398 -64.26911966105814
-2399 2399  0.08386625845421411
-4275 2399 -26.98015626399983
-2400 2400  0.08378225446442003
-4276 2400 -27.22391958877084
-2401 2401  0.1122242528985911
-4277 2401 -19.76131076987808
-2402 2402  0.1188666281484007
-4278 2402 -16.01743515162783
-2403 2403  0.1045633964079625
-4279 2403 -19.43122435760818
-2404 2404  0.1190321852045049
-4280 2404 -12.23386114811299
-2405 2405  0.03275918527410111
-4281 2405 -73.10925705238337
-2406 2406  0.03290823451403464
-4282 2406 -64.22433096257478
-2407 2407  0.05982483436476562
-4283 2407 -42.25947272070731
-2408 2408  0.0599994371975387
-4284 2408 -42.5796885142234
-2409 2409  0.1010965789427608
-4285 2409 -17.39127207242933
-2410 2410  0.08945263200161861
-4286 2410 -25.68769827203282
-2411 2411  0.09794072646952565
-4287 2411 -26.68291634429617
-2412 2412  0.09609845781539586
-4288 2412 -27.21898062309888
-2413 2413  0.09478081423298597
-4289 2413 -25.76998281146062
-2414 2414  0.09682007347625202
-4290 2414 -24.86767301361041
-2415 2415  0.04439899649821998
-4291 2415 -58.10296868779811
-2416 2416  0.04432167070785589
-4292 2416 -57.88228387547031
-2417 2417  0.09795233791467686
-4293 2417 -22.81351654840187
-2418 2418  0.1009310156598904
-4294 2418 -20.38105920054591
-2419 2419  0.1494082769805102
-4295 2419 -12.73795579528641
-2420 2420  0.1477107284697679
-4296 2420 -14.69078715887743
-2421 2421  0.1347570443534146
-4297 2421 -14.44408841920709
-2422 2422  0.1301057826785725
-4298 2422 -16.85321717357648
-2423 2423  0.2109922133663125
-4299 2423 -9.712758796207535
-2424 2424  0.2216757858079083
-4300 2424 -9.389773414469078
-2425 2425  0.1009088790211743
-4301 2425 -25.11095667227453
-2426 2426  0.1077363422537471
-4302 2426 -20.66188603221291
-2427 2427  0.08121814441222064
-4303 2427 -27.06081236628384
-2428 2428  0.08100157620017676
-4304 2428 -27.07076287154962
-2429 2429  0.07146136906804773
-4305 2429 -34.23717637845797
-2430 2430  0.07530760103938473
-4306 2430 -28.46510147456754
-2431 2431  0.0756830656817377
-4307 2431 -30.28857007982523
-2432 2432  0.09186430414464557
-4308 2432 -15.86269138512577
-2433 2433  0.02611063867260898
-4309 2433 -77.63028018021609
-2434 2434  0.03315490509683634
-4310 2434 -41.22285965712726
-2435 2435  0.02697755251935119
-4311 2435 -75.31457172347281
-2436 2436  0.03463901097796575
-4312 2436 -39.42580673521844
-2437 2437  0.02832518569906331
-4313 2437 -66.76292516277346
-2438 2438  0.03595292820557699
-4314 2438 -35.37814580251727
-2439 2439  0.03000021100359908
-4315 2439 -66.63623857562646
-2440 2440  0.03852461104579297
-4316 2440 -34.79046888868552
-2441 2441  0.02505955443137283
-4317 2441 -70.10959107942233
-2442 2442  0.03009113666214551
-4318 2442 -42.54586097045568
-2443 2443  0.1104038318847466
-4319 2443 -14.59114960552235
-2444 2444  0.09409028096243087
-4320 2444 -25.00017435761698
-2445 2445  0.1082117185579281
-4321 2445 -17.87480776263565
-2446 2446  0.1246628009126823
-4322 2446 -11.25720746291567
-2447 2447  0.04180879219292764
-4323 2447 -65.97984348957961
-2448 2448  0.0481218504922679
-4324 2448 -41.15873081988094
-2449 2449  0.1133121138548909
-4325 2449 -20.94431514667631
-2450 2450  0.1211892095724366
-4326 2450 -15.92440412543586
-2451 2451  0.1319492973893008
-4327 2451 -14.55529250850832
-2452 2452  0.1295603694183773
-4328 2452 -15.05159484553243
-2453 2453  0.1237672779717439
-4329 2453 -15.49210742892535
-2454 2454  0.1190191866407344
-4330 2454 -16.86689236335236
-2455 2455  0.1234473604947949
-4331 2455 -16.12647392997589
-2456 2456  0.1170303315506402
-4332 2456 -18.07799191201674
-2457 2457  0.03099330087280881
-4333 2457 -87.48075773456222
-2458 2458  0.03389257841306987
-4334 2458 -63.51574285277481
-2459 2459  0.05172445215334955
-4335 2459 -52.8716550775216
-2460 2460  0.05430350869804322
-4336 2460 -43.40343426461138
-2461 2461  0.09424770419016756
-4337 2461 -26.738027357087
-2462 2462  0.09826661486038374
-4338 2462 -21.96349363409914
-2463 2463  0.0462401459381085
-4339 2463 -50.66661535986736
-2464 2464  0.04627964277352791
-4340 2464 -50.64878861975378
-2465 2465  0.05197455058635655
-4341 2465 -51.18764721528638
-2466 2466  0.0500773202512933
-4342 2466 -52.99960506788916
-2467 2467  0.0720527625538885
-4343 2467 -34.82199055582354
-2468 2468  0.07323693626014423
-4344 2468 -34.54423303070557
-2469 2469  0.06963929261539442
-4345 2469 -35.71241339576211
-2470 2470  0.06931508368711226
-4346 2470 -35.52512736400956
-2471 2471  0.09169519061656187
-4347 2471 -20.01915962930323
-2472 2472  0.08112062356808004
-4348 2472 -30.22720745240589
-2473 2473  0.04719669980974917
-4349 2473 -57.69560648218724
-2474 2474  0.05210915501845393
-4350 2474 -40.73513928545416
-2475 2475  0.0313882444475996
-4351 2475 -77.65714066116759
-2476 2476  0.02988204032213718
-4352 2476 -94.69922254722279
-2477 2477  0.1182734942483623
-4353 2477 -17.51506605655669
-2478 2478  0.1116440029982784
-4354 2478 -21.37522400115791
-2479 2479  0.07849826606941687
-4355 2479 -28.31087433954066
-2480 2480  0.09515591336471275
-4356 2480 -15.06418166640937
-2481 2481  0.02425323629999322
-4357 2481 -112.4426933973505
-2482 2482  0.02374202363688017
-4358 2482 -114.7794462571026
-2483 2483  0.003336217141762166
-4359 2483 -1058.66302267636
-2484 2484  0.1002942709937806
-4360 2484 -20.57374819150079
-2485 2485  0.1045980986716633
-4361 2485 -14.42988051776977
-2486 2486  0.1450248547856023
-4362 2486 -7.306137981052756
-2487 2487  0.07893767286330232
-4363 2487 -19.0892306599637
-2488 2488  0.1110355041530463
-4364 2488 -9.897416463906245
-2489 2489  0.1056165886407372
-4365 2489 -14.15628764799962
-2490 2490  0.1469110448440436
-4366 2490 -7.445942181911135
-2491 2491  0.0844548826318236
-4367 2491 -17.88569115961042
-2492 2492  0.1179620616930775
-4368 2492 -9.352570774505041
-2493 2493  0.1163895966138301
-4369 2493 -12.90605807444327
-2494 2494  0.1619022871289025
-4370 2494 -6.780155483318999
-2495 2495  0.1179126051390297
-4371 2495 -12.84963493329398
-2496 2496  0.163943606815921
-4372 2496 -6.754664202360627
-2497 2497  0.09281544971650256
-4373 2497 -17.05542913502588
-2498 2498  0.1291673121601658
-4374 2498 -8.949835317480224
-2499 2499  0.1091027386322047
-4375 2499 -13.22126092554109
-2500 2500  0.09868634510820998
-4376 2500 -17.94565770436443
-2501 2501  0.03138171724828276
-4377 2501 -88.03377071956065
-2502 2502  0.03107201824492463
-4378 2502 -88.64391292782304
-2503 2503  0.007046922610030895
-4379 2503 -246.2252886607093
-2504 2504  0.01101660630146964
-4380 2504 -109.1007957172354
-2505 2505  0.01058157400013313
-4381 2505 -142.1356981120592
-2506 2506  0.01148750825962296
-4382 2506 -91.07653645375166
-2507 2507  0.006443500536982267
-4383 2507 -290.0185142498619
-2508 2508  0.01548335127494408
-4384 2508 -83.51485409842199
-2509 2509  0.00597458391786553
-4385 2509 -311.0119847471854
-2510 2510  0.01356286245237532
-4386 2510 -95.15381296120231
-2511 2511  0.08067632449851583
-4387 2511 -26.58818103268005
-2512 2512  0.09279367829967604
-4388 2512 -15.77787777432359
-2513 2513  0.05352478163276006
-4389 2513 -28.09789857924424
-2514 2514  0.04290798273430511
-4390 2514 -51.51652538609446
-2515 2515  0.06249995882762888
-4391 2515 -41.49391694494913
-2516 2516  0.07116953203564343
-4392 2516 -27.01589354302817
-2517 2517  0.0629850732263631
-4393 2517 -38.65350036810663
-2518 2518  0.06201074516854403
-4394 2518 -39.56708513242784
-2519 2519  0.1388172847349383
-4395 2519 -18.86848979808568
-2520 2520  0.1387155232393041
-4396 2520 -16.18698208267388
-2521 2521  0.04087687055194109
-4397 2521 -60.01674254260863
-2522 2522  0.0408158184845995
-4398 2522 -59.89391025874022
-2523 2523  0.16752597842276
-4399 2523 -14.2607056227518
-2524 2524  0.190051566204261
-4400 2524 -9.060706229208979
-2525 2525  0.1386676867121169
-4401 2525 -16.28650106020694
-2526 2526  0.1357012594441545
-4402 2526 -16.75458882036434
-2527 2527  0.1392440681462813
-4403 2527 -16.27270670749873
-2528 2528  0.1406647778145255
-4404 2528 -15.92119246330756
-2529 2529  0.09441858750058517
-4405 2529 -28.21287469737472
-2530 2530  0.1006087923147142
-4406 2530 -21.15680116671571
-2531 2531  0.06143761520357147
-4407 2531 -43.60043341376043
-2532 2532  0.06133403882792478
-4408 2532 -43.93633027014358
-2533 2533  0.04699650969640473
-4409 2533 -49.07486674953635
-2534 2534  0.04890819174360618
-4410 2534 -43.91999193831722
-2535 2535  0.04211376285307712
-4411 2535 -67.19582273963222
-2536 2536  0.04615381541177109
-4412 2536 -47.86048355346968
-2537 2537  0.08601398163729794
-4413 2537 -25.60045733925898
-2538 2538  0.09020858091390621
-4414 2538 -22.11505592822052
-2539 2539  0.1174371003592423
-4415 2539 -17.94022473260717
-2540 2540  0.1054437328415359
-4416 2540 -25.43801423175152
-2541 2541  0.1422836968177371
-4417 2541 -10.89812561676288
-2542 2542  0.1201353281558331
-4418 2542 -18.98202374381732
-2543 2543  0.1106512982331891
-4419 2543 -18.02447912466057
-2544 2544  0.1111624363988149
-4420 2544 -18.03362624973672
-2545 2545  0.1157619489239656
-4421 2545 -21.9629176261085
-2546 2546  0.1272888309498838
-4422 2546 -15.83184549062703
-2547 2547  0.07736596157094124
-4423 2547 -27.27913987608121
-2548 2548  0.07480348354180001
-4424 2548 -31.03660494689432
-2549 2549  0.01358184067264488
-4425 2549 -137.3561110785759
-2550 2550  0.03737066957094008
-4426 2550 -34.52648286375157
-2551 2551  0.008364473049625471
-4427 2551 -273.003320275044
-2552 2552  0.02403448844219899
-4428 2552 -63.02258634203405
-2553 2553  0.008639804623062942
-4429 2553 -270.3252397919941
-2554 2554  0.02625639288774997
-4430 2554 -58.78525414828648
-2555 2555  0.1161527301577223
-4431 2555 -14.91535393188155
-2556 2556  0.1020323879042606
-4432 2556 -22.95107272924469
-2557 2557  0.2014975528639363
-4433 2557 -8.882219892636956
-2558 2558  0.1990940038546843
-4434 2558 -8.38327245565061
-2559 2559  0.08443225770800758
-4435 2559 -19.7641779034735
-2560 2560  0.1060523235832099
-4436 2560 -13.85918199986529
-2561 2561  0.0646071730806282
-4437 2561 -29.84297186502756
-2562 2562  0.05882715775282683
-4438 2562 -41.37136670068732
-2563 2563  0.08826574845857212
-4439 2563 -24.74678079539619
-2564 2564  0.08591479678730043
-4440 2564 -25.30084569767662
-2565 2565  0.1044415334241497
-4441 2565 -20.16790931364713
-2566 2566  0.09489523920199339
-4442 2566 -25.56596459955379
-2567 2567  0.07681065950214608
-4443 2567 -29.25612917202748
-2568 2568  0.08001928892924734
-4444 2568 -25.80465236855202
-2569 2569  0.04278093838414154
-4445 2569 -59.28218084687754
-2570 2570  0.04268963280680383
-4446 2570 -59.00633582364991
-2571 2571  0.1336334529360571
-4447 2571 -18.95360574513186
-2572 2572  0.1312746239509822
-4448 2572 -19.27413566638218
-2573 2573  0.1095862115528146
-4449 2573 -23.32199475871141
-2574 2574  0.1284114154007702
-4450 2574 -13.51862385101003
-2575 2575  0.130697379255608
-4451 2575 -19.10980418333428
-2576 2576  0.1298008120879252
-4452 2576 -18.85936884549872
-2577 2577  0.08185794921773322
-4453 2577 -23.31393913523488
-2578 2578  0.07247955898383203
-4454 2578 -30.79418778750476
-2579 2579  0.1099319749948935
-4455 2579 -16.9883469839645
-2580 2580  0.1047819410896658
-4456 2580 -21.187092922521
-2581 2581  0.07632832587451829
-4457 2581 -26.29693971768204
-2582 2582  0.06826513546331373
-4458 2582 -38.95148117385504
-2583 2583  0.05548920412162031
-4459 2583 -52.08330111375977
-2584 2584  0.06242960062044907
-4460 2584 -33.84723329190945
-2585 2585  0.04632685940212
-4461 2585 -43.32944578653592
-2586 2586  0.07312501103900995
-4462 2586 -28.83073740675175
-2587 2587  0.07032430974327428
-4463 2587 -29.85090627723714
-2588 2588  0.06854366154651186
-4464 2588 -33.11942355235149
-2589 2589  0.07592649786773957
-4465 2589 -28.31376341779287
-2590 2590  0.08048544405713381
-4466 2590 -24.52008198179582
-2591 2591  0.004938393551872891
-4467 2591 -707.7212790741364
-2592 2592  0.1827766081729927
-4468 2592 -8.884240395089975
-2593 2593  0.003511947818170114
-4469 2593 -1074.723247601268
-2594 2594  0.1360779090415198
-4470 2594 -13.01900306964629
-2595 2595  0.1452791181060122
-4471 2595 -13.60038854703725
-2596 2596  0.1289123528531947
-4472 2596 -16.14500136718066
-2597 2597  0.08873046260064339
-4473 2597 -23.41697572970584
-2598 2598  0.07358879924577265
-4474 2598 -36.39820196128922
-2599 2599  0.02249697437083394
-4475 2599 -128.6511580023181
-2600 2600  0.06015545646416934
-4476 2600 -45.41658912759856
-2601 2601  0.1149408597814195
-4477 2601 -19.71178008061172
-2602 2602  0.1197299143217263
-4478 2602 -17.36290067611262
-2603 2603  0.1329399661696483
-4479 2603 -16.65598169709429
-2604 2604  0.1268622654461261
-4480 2604 -20.42677738021681
-2605 2605  0.07083316622215098
-4481 2605 -33.48699900983425
-2606 2606  0.07092249337821212
-4482 2606 -33.89059592051035
-2607 2607  0.04313215890526797
-4483 2607 -58.01692328951272
-2608 2608  0.0430707185925129
-4484 2608 -57.58794486792134
-2609 2609  0.05828891131718438
-4485 2609 -40.94469844121115
-2610 2610  0.05207878693856764
-4486 2610 -52.06987938902305
-2611 2611  0.08502397913127217
-4487 2611 -33.06151549792124
-2612 2612  0.09762487990214475
-4488 2612 -20.45953083972572
-2613 2613  0.08664546140173429
-4489 2613 -17.17816150141144
-2614 2614  0.07206662686403367
-4490 2614 -33.21629444772113
-2615 2615  0.06021960728449145
-4491 2615 -44.93527974087805
-2616 2616  0.06014048126963593
-4492 2616 -44.99723518348887
-2617 2617  0.1400842568053372
-4493 2617 -15.0104256461242
-2618 2618  0.1407784681908091
-4494 2618 -13.86141918235845
-2619 2619  0.004590118292260531
-4495 2619 -663.9661521913164
-2620 2620  0.03278132812471792
-4496 2620 -49.90751676561297
-2621 2621  0.002578629170411114
-4497 2621 -1427.055298280313
-2622 2622  0.04725418204370446
-4498 2622 -39.91317791668997
-2623 2623  0.06943073957133265
-4499 2623 -30.30829080397546
-2624 2624  0.06940166048736073
-4500 2624 -29.90431021967977
-2625 2625  0.1507981659667463
-4501 2625 -13.47525275108809
-2626 2626  0.1445519709377717
-4502 2626 -15.12006951497744
-2627 2627  0.1474714715736538
-4503 2627 -13.56000062513
-2628 2628  0.1250083610670657
-4504 2628 -20.92338281670835
-2629 2629  0.1540455285343737
-4505 2629 -11.29250264561315
-2630 2630  0.1316779216956888
-4506 2630 -19.82430901439865
-2631 2631  0.04887673082053536
-4507 2631 -41.79788680020079
-2632 2632  0.04405790634343706
-4508 2632 -58.51214685233085
-2633 2633  0.09783075276190294
-4509 2633 -20.04263715626028
-2634 2634  0.08758746566650595
-4510 2634 -28.57642216065699
-2635 2635  0.1127304462248533
-4511 2635 -21.49012601492517
-2636 2636  0.1198306861816738
-4512 2636 -17.88198588321188
-2637 2637  0.1179404500054688
-4513 2637 -18.25413748051501
-2638 2638  0.1276826221232589
-4514 2638 -14.48573493618683
-2639 2639  0.04925506294141044
-4515 2639 -44.0223952034774
-2640 2640  0.04668295011547591
-4516 2640 -52.48104620642393
-2641 2641  0.1449032831622934
-4517 2641 -14.92342134195389
-2642 2642  0.1500180902974244
-4518 2642 -13.27446957183143
-2643 2643  0.1156917693131826
-4519 2643 -18.49427815385324
-2644 2644  0.08878444770835722
-4520 2644 -26.21942302703896
-2645 2645  0.2660666859811378
-4521 2645 -7.39806028426747
-2646 2646  0.2523400427959562
-4522 2646 -7.707636052561853
-2647 2647  0.2062107927551356
-4523 2647 -8.939428394535847
-2648 2648  0.1973401445684108
-4524 2648 -8.642130280788454
-2649 2649  0.1168486283772575
-4525 2649 -14.50623659110181
-2650 2650  0.1583170028860684
-4526 2650 -8.772955710261652
-2651 2651  0.1436319844485981
-4527 2651 -11.53775096265082
-2652 2652  0.2063030559582233
-4528 2652 -6.502242278141505
-2653 2653  0.1986493661779628
-4529 2653 -8.679115879554525
-2654 2654  0.268512450670337
-4530 2654 -5.239025768901889
-2655 2655  0.08198335751273234
-4531 2655 -21.47326559208629
-2656 2656  0.0802614495631593
-4532 2656 -21.61383762226636
-2657 2657  0.04473833858653953
-4533 2657 -49.96777929661594
-2658 2658  0.04934649227872699
-4534 2658 -36.67569230448201
-2659 2659  0.1622656440695301
-4535 2659 -11.88600697816776
-2660 2660  0.1644914901176733
-4536 2660 -10.90493013046683
-2661 2661  0.1971617765865387
-4537 2661 -13.12946138104847
-2662 2662  0.2246878379541619
-4538 2662 -8.410110269164797
-2663 2663  0.1653528402871484
-4539 2663 -12.29662737304278
-2664 2664  0.1808539471990467
-4540 2664 -10.10870496831491
-2665 2665  0.06547653177229835
-4541 2665 -39.02857243546901
-2666 2666  0.07668646750391619
-4542 2666 -33.70459416258378
-2667 2667  0.1656395711946458
-4543 2667 -11.53800954208228
-2668 2668  0.1681816235944119
-4544 2668 -10.30563379190454
-2669 2669  0.08371453213203854
-4545 2669 -22.73592311892862
-2670 2670  0.1192293447602497
-4546 2670 -13.20295306002752
-2671 2671  0.0949481452932414
-4547 2671 -19.99502311700704
-2672 2672  0.1305019717232
-4548 2672 -12.1403872692989
-2673 2673  0.05387167080053495
-4549 2673 -43.0743841807851
-2674 2674  0.06126472914941399
-4550 2674 -30.09884070379308
-2675 2675  0.1207157634492268
-4551 2675 -15.59699735883546
-2676 2676  0.1059158657084649
-4552 2676 -23.95216626455397
-2677 2677  0.1076828917326143
-4553 2677 -17.76723538490329
-2678 2678  0.1005416445063582
-4554 2678 -22.22333545208209
-2679 2679  0.08533876965342507
-4555 2679 -28.65423870847926
-2680 2680  0.08217527931971953
-4556 2680 -29.62058200277956
-2681 2681  0.1008750823603064
-4557 2681 -23.10572017615622
-2682 2682  0.1147271773647323
-4558 2682 -14.85325187087084
-2683 2683  0.09924864616415091
-4559 2683 -21.68107710891621
-2684 2684  0.09003795816713295
-4560 2684 -30.5213668411177
-2685 2685  0.1061597115028363
-4561 2685 -20.55827331727173
-2686 2686  0.1112383961276221
-4562 2686 -17.63060893722474
-2687 2687  0.09414695009113042
-4563 2687 -25.61127524716377
-2688 2688  0.09312878048140187
-4564 2688 -25.88133772095298
-2689 2689  0.06503883056206027
-4565 2689 -40.2130645920106
-2690 2690  0.06520091326552124
-4566 2690 -40.5384810644213
-2691 2691  0.05438011932993274
-4567 2691 -45.4076676012495
-2692 2692  0.05424499968998244
-4568 2692 -45.32463217142699
-2693 2693  0.004642257136293556
-4569 2693 -741.2098311583648
-2694 2694  0.1561461065838448
-4570 2694 -12.77174729620695
-2695 2695  0.09110300396209042
-4571 2695 -25.52026560286867
-2696 2696  0.08985858246964482
-4572 2696 -25.19976764447012
-2697 2697  0.1369403680143674
-4573 2697 -13.36559398752397
-2698 2698  0.1443467202436076
-4574 2698 -10.93453152881607
-2699 2699  0.1444965096904146
-4575 2699 -10.33116067419111
-2700 2700  0.2001302970724999
-4576 2700 -5.232280705035966
-2701 2701  0.1169753107410663
-4577 2701 -12.42379991806771
-2702 2702  0.1692496580266484
-4578 2702 -6.810968169618136
-2703 2703  0.1070580624084518
-4579 2703 -13.59626784548458
-2704 2704  0.1489024263563629
-4580 2704 -7.821157425305699
-2705 2705  0.1389339606569908
-4581 2705 -10.76106835441625
-2706 2706  0.1923766534637027
-4582 2706 -6.232406029477263
-2707 2707  0.1408475375569699
-4583 2707 -10.60859259802773
-2708 2708  0.1946165869364156
-4584 2708 -6.159803642957836
-2709 2709  0.08143798523629654
-4585 2709 -29.83696730878977
-2710 2710  0.08990418420725502
-4586 2710 -21.66867981200853
-2711 2711  0.1409949336938461
-4587 2711 -15.45716843571818
-2712 2712  0.1482421057059079
-4588 2712 -13.42102810671431
-2713 2713  0.07108280634794585
-4589 2713 -36.06244598983877
-2714 2714  0.07302488343650103
-4590 2714 -30.36544494899079
-2715 2715  0.005220165246689828
-4591 2715 -415.0261506058654
-2716 2716  0.0177286273434418
-4592 2716 -107.7628213452974
-2717 2717  0.1103339245794259
-4593 2717 -17.08211902970442
-2718 2718  0.1169847200870831
-4594 2718 -14.32959261684598
-2719 2719  0.008577244581252723
-4595 2719 -297.1502631714062
-2720 2720  0.03050852001168283
-4596 2720 -46.29589079798393
-2721 2721  0.007474666263219379
-4597 2721 -274.346995221428
-2722 2722  0.0299510636579375
-4598 2722 -41.62150383812094
-2723 2723  0.008518265423418114
-4599 2723 -269.482318837368
-2724 2724  0.03145582454813847
-4600 2724 -40.36292797655138
-2725 2725  0.009163617744673737
-4601 2725 -262.4871457333651
-2726 2726  0.03066175704966475
-4602 2726 -43.50730408335345
-2727 2727  0.008500565636813628
-4603 2727 -273.5041186801383
-2728 2728  0.03197867846257434
-4604 2728 -40.20028642953007
-2729 2729  0.008721338342122487
-4605 2729 -261.5760375061137
-2730 2730  0.03218983559492441
-4606 2730 -39.17187343467762
-2731 2731  0.009416820658209826
-4607 2731 -248.2417586977549
-2732 2732  0.03478080575779452
-4608 2732 -37.08017309753349
-2733 2733  0.1630576386497255
-4609 2733 -8.502233418711862
-2734 2734  0.1397777235897797
-4610 2734 -14.15927832194173
-2735 2735  0.07450425080229808
-4611 2735 -33.36501978941855
-2736 2736  0.07431869114522616
-4612 2736 -32.85653123541292
-2737 2737  0.1026547557561215
-4613 2737 -17.10162207087117
-2738 2738  0.09073387960505001
-4614 2738 -26.32253245363182
-2739 2739  0.03251041922797052
-4615 2739 -65.3094506908532
-2740 2740  0.02963281885079689
-4616 2740 -92.13433902811157
-2741 2741  0.04067578976053766
-4617 2741 -58.09827276913884
-2742 2742  0.03790745656462562
-4618 2742 -71.50178838039953
-2743 2743  0.05000150737058116
-4619 2743 -51.64006323574402
-2744 2744  0.04572886699134029
-4620 2744 -49.57879692739116
-2745 2745  0.06781434452399084
-4621 2745 -38.8690834748674
-2746 2746  0.06815527408168873
-4622 2746 -38.73649388997055
-2747 2747  0.07359845373321169
-4623 2747 -30.07499672096169
-2748 2748  0.0772865214414882
-4624 2748 -26.6892215368137
-2749 2749  0.110682745736426
-4625 2749 -19.69795854475236
-2750 2750  0.1291532255596348
-4626 2750 -16.419717428569
-2751 2751  0.04269896374187671
-4627 2751 -48.1812508688813
-2752 2752  0.0390336607619539
-4628 2752 -68.5724907725667
-2753 2753  0.02315332863810588
-4629 2753 -91.97145850968209
-2754 2754  0.03413086015654149
-4630 2754 -67.81117594819983
-2755 2755  0.137818520958807
-4631 2755 -12.03075557160653
-2756 2756  0.1812916553406337
-4632 2756 -7.305149508872643
-2757 2757  0.2024915552993207
-4633 2757 -8.650252245975691
-2758 2758  0.2652537536970932
-4634 2758 -5.254056246815064
-2759 2759  0.1248354377365412
-4635 2759 -14.74425618462372
-2760 2760  0.1161671612051354
-4636 2760 -18.14750969243607
-2761 2761  0.1622679931288024
-4637 2761 -12.8677934052044
-2762 2762  0.1671516006457047
-4638 2762 -12.778764953814
-2763 2763  0.246228453637452
-4639 2763 -9.449827385174531
-2764 2764  0.3172743251153573
-4640 2764 -5.864406879133139
-2765 2765  0.009473364208539475
-4641 2765 -252.032563895863
-2766 2766  0.04601859096082538
-4642 2766 -37.38065612259518
-2767 2767  0.006024175975105252
-4643 2767 -370.6027942610422
-2768 2768  0.01699700550635434
-4644 2768 -99.30572095390851
-87 2769  0.191132249227754
-93 2769  2.047051335767824
-88 2770  0.188455821489309
-94 2770  2.067610228210245
-89 2771  0.1928444447454806
-95 2771  2.048523350808737
-90 2772  0.1839664735152792
-96 2772  2.055941963685915
-91 2773  0.1930767719574465
-97 2773  2.03082325966889
-92 2774  0.188272051984764
-98 2774  2.065517700723633
-99 2775  3.290947783257571e-05
-299 2775 -0.0006120966867165351
-1300 2775 -116.617845527334
-1789 2775 -101.6692445633731
-1791 2775 -121.900978849199
-1792 2775 -122.5206702801755
-499 2776  2.969991746869057e-05
-699 2776 -0.0006211735898036456
-1545 2776 -94.70154811534771
-2034 2776 -116.254058426669
-2036 2776 -128.7151420873748
-2037 2776 -134.6101055831586
-100 2777  0.002413822826166146
-300 2777 -0.0448953457699415
-1299 2777 -16421.91141900974
-500 2778  0.002140785332416097
-700 2778 -0.04477438386005135
-1544 2778 -18425.43685640642
-101 2779  7.669175073506544e-05
-301 2779 -0.001427149194237635
-1305 2779 -304.5398217309754
-1793 2779 -301.3545137610665
-1794 2779 -265.5995294516822
-501 2780  6.257938167652743e-05
-701 2780 -0.001308882018352955
-1550 2780 -343.1905811392148
-2038 2780 -304.647627311909
-2039 2780 -303.6755998197619
-102 2781  0.002334975615736897
-302 2781 -0.04345125747104736
-1303 2781 -17152.4293534602
-502 2782  0.002070345414517271
-702 2782 -0.04330235585374924
-1548 2782 -19263.39666857488
-103 2783  7.862767004737615e-05
-303 2783 -0.00146533094654159
-1308 2783 -246.424319230538
-1796 2783 -275.381526703475
-1797 2783 -305.9428524547798
-503 2784  6.598718961816696e-05
-703 2784 -0.001380044009109575
-1553 2784 -287.1737937003262
-2041 2784 -287.8253845063738
-2042 2784 -310.99582024153
-104 2785  0.002444212485053505
-304 2785 -0.04555077314529363
-1306 2785 -15990.46775417401
-504 2786  0.002169195438705725
-704 2786 -0.04536606994919442
-1551 2786 -17954.06452194431
-105 2787  3.312742415530926e-05
-305 2787 -0.000615931807560431
-1310 2787 -127.348789895109
-1311 2787 -121.9718452523159
-1312 2787 -116.5170618756044
-1799 2787 -103.6735421877064
-505 2788  2.796488776981597e-05
-705 2788 -0.0005847977484658657
-1555 2788 -123.8024931489951
-1556 2788 -136.3741855526524
-1557 2788 -124.0620405337453
-2044 2788 -108.9523767055269
-106 2789  0.002446060487080032
-306 2789 -0.04547802485000561
-1309 2789 -16927.29847060978
-506 2790  0.002169609219361102
-706 2790 -0.04537018025868264
-1554 2790 -18984.56010772376
-107 2791  1.85390023051578e-05
-307 2791 -0.0003447400910227232
-1314 2791 -53.76846535636626
-1316 2791 -71.39421959235987
-1317 2791 -60.66237248504986
-1803 2791 -45.61538669592837
-1805 2791 -64.6965009865016
-507 2792  1.542323564781866e-05
-707 2792 -0.0003225540370318406
-1559 2792 -52.95894194050124
-1561 2792 -50.07992223279546
-1562 2792 -72.94981475825553
-2048 2792 -47.01189367725382
-2050 2792 -93.79563538032262
-108 2793  0.002547216874358393
-308 2793 -0.04736458693151107
-1313 2793 -15651.87407075314
-508 2794  0.002258865500111695
-708 2794 -0.04724019321521711
-1558 2794 -17562.01775281531
-109 2795  1.897510509495169e-05
-309 2795 -0.0003535062191017872
-1320 2795 -48.65659892814862
-1321 2795 -54.80645079899097
-1808 2795 -69.64168157022513
-1809 2795 -55.76240657485057
-1812 2795 -57.37217754133817
-509 2796  1.584972253664808e-05
-709 2796 -0.0003315309935129994
-1565 2796 -52.52014968113491
-1566 2796 -58.44422004546879
-2053 2796 -71.48205026841438
-2054 2796 -57.81317230630791
-2057 2796 -57.85806956991055
-110 2797  0.002159979334399842
-310 2797 -0.04024041627283683
-1318 2797 -19147.60457802069
-510 2798  0.001923496708434376
-710 2798 -0.04023406550441066
-1563 2798 -21348.38058440163
-111 2799  0.002343469592596242
-311 2799 -0.04365875661862523
-1319 2799 -16807.87112981469
-511 2800  0.002080184625715104
-711 2800 -0.04351148837855855
-1564 2800 -18855.823309453
-112 2801  6.912678178860192e-05
-312 2801 -0.001288430244282178
-1324 2801 -314.0609448710903
-1325 2801 -417.45542835111
-1813 2801 -242.9398363399226
-512 2802  5.363623218723445e-05
-712 2802 -0.001121798254310583
-1569 2802 -407.8543128053418
-1570 2802 -403.2701195876705
-2058 2802 -277.9291057772549
-113 2803  8.227547212772366e-05
-313 2803 -0.001533427005586317
-1323 2803 -288.936297706273
-1810 2803 -278.7531515404145
-1816 2803 -284.6866218388363
-513 2804  6.526480001887723e-05
-713 2804 -0.001364992385095566
-1568 2804 -403.3712790108879
-2055 2804 -281.3097209478202
-2061 2804 -277.119091147535
-114 2805  0.002514571520234397
-314 2805 -0.04686420874278824
-1326 2805 -16211.02934024309
-514 2806  0.002231758090236919
-714 2806 -0.04667596022758146
-1571 2806 -18200.17011809114
-115 2807  1.36295453496316e-05
-315 2807 -0.0002535975426531655
-1332 2807 -33.11136834639819
-1817 2807 -36.67269099216028
-1818 2807 -38.41647162556927
-1819 2807 -38.48702554204165
-1820 2807 -31.51609232707513
-1821 2807 -28.105045711462
-515 2808  1.108814355667258e-05
-715 2808 -0.0002318355913094673
-1577 2808 -30.88445273847646
-2062 2808 -36.88282535114031
-2063 2808 -38.52362543915015
-2064 2808 -38.66505399025309
-2065 2808 -31.6350585093486
-2066 2808 -38.08156604778259
-116 2809  0.002351560480566124
-316 2809 -0.04375416605988374
-1327 2809 -16956.49425532315
-516 2810  0.002084868727865555
-716 2810 -0.04359131642342973
-1572 2810 -19050.40909074716
-117 2811  0.002323346228192606
-317 2811 -0.04322919930108315
-1328 2811 -17479.24164685459
-517 2812  0.00206178424314167
-717 2812 -0.04310861514976851
-1573 2812 -19602.52751313596
-118 2813  0.00233327829315433
-318 2813 -0.04341399965950225
-1329 2813 -17627.34904124655
-518 2814  0.002069382114290299
-718 2814 -0.04326749513063304
-1574 2814 -19790.86619941803
-119 2815  0.002498796266540171
-319 2815 -0.04649354450491961
-1330 2815 -15611.76874269715
-519 2816  0.00221663957128552
-719 2816 -0.04634637060467815
-1575 2816 -17520.9066424914
-120 2817  3.27326552556781e-05
-320 2817 -0.0006096456996633435
-1335 2817 -111.0869138886556
-1823 2817 -110.9788010398832
-1824 2817 -110.6632721880846
-1826 2817 -110.4746806445262
-520 2818  2.660882184061133e-05
-720 2818 -0.0005566159660873793
-1580 2818 -153.3239932480942
-2068 2818 -111.4646540333997
-2069 2818 -111.1745117377567
-2071 2818 -103.4708021967054
-121 2819  0.002315859228091882
-321 2819 -0.04313281610466401
-1333 2819 -17332.50264229675
-521 2820  0.002054509126465163
-721 2820 -0.04297719715336602
-1578 2820 -19459.2097238253
-122 2821  0.002331553298144831
-322 2821 -0.04342507632571436
-1334 2821 -17422.49927394747
-522 2822  0.00206825652931222
-722 2822 -0.04326473059133334
-1579 2822 -19563.40178710883
-123 2823  7.851640521308691e-05
-323 2823 -0.001463440545769149
-1338 2823 -271.7302916287101
-1827 2823 -312.1352460882863
-1829 2823 -268.005427632543
-523 2824  6.153966619548327e-05
-723 2824 -0.001286901060450809
-1583 2824 -327.8415125728117
-2072 2824 -298.7359064250596
-2074 2824 -326.2824303807073
-124 2825  0.002317790327734316
-324 2825 -0.04320045332535793
-1337 2825 -17020.79035452148
-524 2826  0.002059372058053505
-724 2826 -0.04306501782502264
-1582 2826 -19072.9824263394
-125 2827  3.083171649862372e-05
-325 2827 -0.0005742502993288677
-1341 2827 -104.0178710348266
-1342 2827 -120.5778323373125
-1825 2827 -106.5673296823539
-1830 2827 -123.2291839880428
-525 2828  2.43735293056645e-05
-725 2828 -0.0005098568126613753
-1586 2828 -131.6376658199634
-1587 2828 -101.2983170663943
-2070 2828 -142.2046129288638
-2075 2828 -119.199332372214
-126 2829  0.002195503166803161
-326 2829 -0.04089185063854536
-1340 2829 -19127.32781891363
-526 2830  0.001949465632114935
-726 2830 -0.04077980858523558
-1585 2830 -21438.05305311238
-127 2831  7.861266506825708e-05
-327 2831 -0.001462067874606355
-1345 2831 -298.6016789907157
-1833 2831 -258.9487419495387
-1834 2831 -329.9404840258471
-527 2832  6.633863604418218e-05
-727 2832 -0.001387501933997578
-1590 2832 -340.9877642962214
-2078 2832 -272.0317439182016
-2079 2832 -336.2704669221371
-128 2833  0.002549007835364769
-328 2833 -0.04740439876751025
-1343 2833 -15783.20681029278
-528 2834  0.00226068439540678
-728 2834 -0.0472821775742908
-1588 2834 -17706.80708692556
-129 2835  3.651865383153894e-05
-329 2835 -0.0006801292745497548
-1836 2835 -125.0209566151239
-1837 2835 -101.4697467138049
-1838 2835 -105.8065777979689
-1839 2835 -96.71494936649103
-529 2836  2.851329636555464e-05
-729 2836 -0.000595912541544476
-2081 2836 -118.5292865869793
-2082 2836 -96.75789985452708
-2083 2836 -113.9099321998116
-2084 2836 -139.5923217119656
-130 2837  0.002311132186057771
-330 2837 -0.04304284573387514
-1346 2837 -17200.90873720961
-530 2838  0.002055759561026759
-730 2838 -0.04296426793696614
-1591 2838 -19229.80704563983
-131 2839  0.002512265623002052
-331 2839 -0.04678851497708159
-1347 2839 -15372.8164617752
-531 2840  0.002230330410563787
-731 2840 -0.04661257133778172
-1592 2840 -17250.21196149158
-132 2841  7.556051646964701e-05
-332 2841 -0.001406493806589446
-1351 2841 -325.9882767001909
-1352 2841 -309.9603770265387
-1840 2841 -264.275729646232
-532 2842  6.29130756763182e-05
-732 2842 -0.00131594329597764
-1596 2842 -350.8749049521504
-1597 2842 -349.525972853892
-2085 2842 -273.2174821630666
-133 2843  0.002461879610445951
-333 2843 -0.04582496681635583
-1350 2843 -16176.07327682596
-533 2844  0.00218424136016743
-733 2844 -0.0456871850050998
-1595 2844 -18148.71862379513
-134 2845  3.850335510777407e-05
-334 2845 -0.0007169453104404059
-1843 2845 -108.8523166266819
-1844 2845 -103.7126993255025
-1845 2845 -100.1145963710793
-1846 2845 -110.8016182928714
-534 2846  3.176046354497157e-05
-734 2846 -0.0006637811766721075
-2088 2846 -111.1708407077942
-2089 2846 -105.8746380386655
-2090 2846 -135.0509367320167
-2091 2846 -100.0765173546339
-135 2847  0.002492200397856114
-335 2847 -0.04640543643374314
-1353 2847 -15319.47843184996
-535 2848  0.002212413586388379
-735 2848 -0.04623851729751089
-1598 2848 -17188.33141996008
-136 2849  0.002590053989662534
-336 2849 -0.04822607065271291
-1354 2849 -15342.38909234029
-536 2850  0.002298717079426249
-736 2850 -0.0480417201738854
-1599 2850 -17221.86948891564
-137 2851  6.694842994227576e-05
-337 2851 -0.001245392720896029
-1358 2851 -369.8949236118411
-1847 2851 -274.8553042800676
-1849 2851 -307.7484892352572
-537 2852  6.612709803152977e-05
-737 2852 -0.001382881004978853
-1603 2852 -262.4471277152124
-2092 2852 -351.7384079675031
-2094 2852 -309.8208477002044
-138 2853  0.002281699642120062
-338 2853 -0.04244471017584212
-1357 2853 -17899.71932992315
-538 2854  0.002023889893301537
-738 2854 -0.04232451806232144
-1602 2854 -20083.61864669313
-139 2855  6.668642108710151e-05
-339 2855 -0.001242146227443936
-1360 2855 -285.4713473471323
-1361 2855 -436.1033211741603
-1362 2855 -229.7060573133963
-539 2856  5.644326073895785e-05
-739 2856 -0.001180351155434611
-1605 2856 -298.0864078722183
-1606 2856 -285.8909834401574
-1607 2856 -408.7832335639023
-140 2857  0.0002425793965424298
-340 2857 -0.004513431075080159
-1853 2857 -1242.454717536272
-1854 2857 -1455.098749317579
-540 2858  0.0002109624490132657
-740 2858 -0.004412880081124752
-2098 2858 -1326.699075158569
-2099 2858 -1582.769498051866
-141 2859  0.0002624552599551592
-341 2859 -0.004885512119259889
-1365 2859 -1242.790681293919
-1366 2859 -1347.578694003212
-541 2860  0.0002180184147619238
-741 2860 -0.004557562649114181
-1610 2860 -1439.569994942649
-1611 2860 -1474.926377146568
-142 2861  0.0002482857056241695
-342 2861 -0.004618836123996159
-1363 2861 -1269.292530967632
-1367 2861 -1306.453480190415
-542 2862  0.0002168279126185967
-742 2862 -0.00453531126448616
-1608 2862 -1375.862662839072
-1612 2862 -1388.793683741341
-143 2863  3.100849114586814e-05
-343 2863 -0.0005784813977331862
-1371 2863 -138.6615789467266
-1858 2863 -65.91250277346103
-1859 2863 -151.3811068800854
-1860 2863 -159.9708456309545
-543 2864  2.40913445823822e-05
-743 2864 -0.000503749724503456
-1616 2864 -157.131131008579
-2103 2864 -70.28451441984423
-2104 2864 -174.5511038366085
-2105 2864 -163.2629558170631
-144 2865  7.880071041324561e-05
-344 2865 -0.001470189001474494
-1368 2865 -200.3447771713429
-1862 2865 -315.9024050162253
-1863 2865 -358.782009005071
-544 2866  6.434988345650326e-05
-744 2866 -0.001345574214205537
-1613 2866 -252.2885237459685
-2107 2866 -343.8282467755249
-2108 2866 -340.8237853887593
-145 2867  0.0002319713008317471
-345 2867 -0.004319878341018758
-1374 2867 -1359.412942527259
-1375 2867 -1420.161798638148
-545 2868  0.0001989115751638596
-745 2868 -0.004161190641349405
-1619 2868 -1483.228901833766
-1620 2868 -1553.767313963687
-146 2869  1.03968750016697e-05
-346 2869 -0.0001937112443097265
-1307 2869 -27.96163286928218
-1381 2869 -29.79361601042121
-1866 2869 -15.5948834700682
-1867 2869 -16.24247111042913
-1868 2869 -14.7892626321223
-1869 2869 -14.74604734110974
-1870 2869 -21.53155213685957
-546 2870  1.820335133825889e-05
-746 2870 -0.0003807362332910267
-1552 2870 -71.60785037080608
-1626 2870 -80.2043307459878
-2111 2870 -8.765297393525904
-2112 2870 -9.210837529294329
-2113 2870 -8.314922294229779
-2114 2870 -8.361552482805211
-2115 2870 -10.50121760524715
-1 2871  27.83662668591554
-147 2871  0.0009765914152797697
-347 2871 -0.01819554706699653
-1376 2871 -3270.212868561243
-44 2872  9.664803062187673
-547 2872  0.001881561431239332
-747 2872 -0.03935916995322433
-1621 2872 -2032.294944522458
-2 2873  27.53589835708392
-148 2873  0.0009675053188040424
-348 2873 -0.01802624915195298
-1377 2873 -3383.287424261197
-45 2874  8.906846167844099
-548 2874  0.001907467106720621
-748 2874 -0.03990008471308273
-1622 2874 -2161.517370916221
-3 2875  28.36062562360079
-149 2875  0.0009896578163032042
-349 2875 -0.01843899618102551
-1378 2875 -3162.550200229806
-46 2876  9.821457822989728
-549 2876  0.001906610501894908
-749 2876 -0.03988317432312981
-1623 2876 -1965.171276998922
-4 2877  28.21925391958385
-150 2877  0.0009881347343625977
-350 2877 -0.01841060981384306
-1379 2877 -3145.78338095254
-47 2878  9.098592168210979
-550 2878  0.001949643189531951
-750 2878 -0.04078229766318428
-1624 2878 -2011.325553409659
-5 2879  25.95736762236096
-151 2879  0.0009219840688063665
-351 2879 -0.01717818552570617
-1380 2879 -4127.243651965615
-48 2880  104.3552046887556
-551 2880  0.0008441117886190319
-751 2880 -0.01766023403411908
-1625 2880 -947.8080133550635
-152 2881  6.748223267049977e-05
-352 2881 -0.001256535981144061
-1382 2881 -332.4374750614756
-1841 2881 -293.066322787933
-1864 2881 -313.6568640390976
-552 2882  5.675680462998339e-05
-752 2882 -0.001187293521703485
-1627 2882 -358.1256606139722
-2086 2882 -312.5285740232399
-2109 2882 -336.3627989515738
-153 2883  1.096721977986664e-05
-353 2883 -0.0002044990454309726
-1369 2883 -43.12103915398581
-1385 2883 -49.71040942894664
-1386 2883 -42.92117101237346
-1387 2883 -40.28901682798929
-1873 2883 -18.01825116204381
-1874 2883 -44.51367803683263
-553 2884  8.254916480290429e-06
-753 2884 -0.0001725925911988493
-1614 2884 -50.47971099857919
-1630 2884 -59.09622972922144
-1631 2884 -43.69121511045876
-1632 2884 -45.26251731254106
-2118 2884 -19.04736438576279
-2119 2884 -39.1308766868132
-154 2885  0.0002765958107932558
-354 2885 -0.005158007404572995
-1383 2885 -837.2400970977618
-1388 2885 -1637.126658242943
-554 2886  0.0002143832494831788
-754 2886 -0.004482285199688079
-1628 2886 -946.4993922263754
-1633 2886 -2094.821178932827
-155 2887  0.0002689427432129922
-355 2887 -0.005003923514694866
-1304 2887 -1194.040564031954
-1389 2887 -1330.347020163584
-555 2888  0.0002152290952202552
-755 2888 -0.004501500167327725
-1549 2888 -1351.851190781317
-1634 2888 -1596.562954523447
-156 2889  7.678999350825993e-05
-356 2889 -0.001430404585463677
-1355 2889 -238.4037452088409
-1880 2889 -302.0373081441935
-1881 2889 -339.884593358769
-556 2890  6.296834363466511e-05
-756 2890 -0.00131608856808379
-1600 2890 -307.265267842773
-2125 2890 -341.0931018868103
-2126 2890 -301.9003736908056
-157 2891  0.0002662782114042295
-357 2891 -0.004953175864922998
-1392 2891 -1177.545535152468
-1393 2891 -1310.740583779899
-557 2892  0.000202070761556368
-757 2892 -0.004225502698148476
-1637 2892 -1555.582081433006
-1638 2892 -1490.044806601116
-158 2893  0.0002479660168346599
-358 2893 -0.004613733705409966
-1394 2893 -1319.017361047579
-1885 2893 -1386.412367526018
-558 2894  0.0002137229690333978
-758 2894 -0.004470671113542863
-1639 2894 -1428.509188469133
-2130 2894 -1512.250759457661
-159 2895  0.0002585780513565103
-359 2895 -0.004822604611468701
-1372 2895 -1285.697515288127
-1396 2895 -1313.040134659249
-559 2896  0.0002072120632641939
-759 2896 -0.004332413344118552
-1617 2896 -1402.181171260665
-1641 2896 -1635.880446215101
-160 2897  0.0002133964918098159
-360 2897 -0.003969961874120492
-1887 2897 -1942.580602090081
-1888 2897 -1149.608351572851
-560 2898  0.0002442514875826953
-760 2898 -0.005108986069369431
-2132 2898 -1021.939107874584
-2133 2898 -1512.245711784602
-161 2899  0.0002015674054957784
-361 2899 -0.003753931548661254
-1399 2899 -1276.699267408946
-1400 2899 -1861.580695310057
-561 2900  0.0002451718016401516
-761 2900 -0.005128263093238606
-1644 2900 -1582.59139337238
-1645 2900 -927.7605629713167
-162 2901  8.489615229574495e-05
-362 2901 -0.001582798352723314
-1798 2901 -273.8571240789814
-1828 2901 -306.1707000880867
-1891 2901 -241.4737700878197
-562 2902  6.341211780697017e-05
-762 2902 -0.001326090781396694
-2043 2902 -275.9084934756087
-2073 2902 -336.8243978424858
-2136 2902 -334.71993668788
-87 2903  0.5188471304468957
-163 2903  0.004275107728950991
-363 2903 -0.07971634771599438
-1401 2903 -8640.649777085511
-563 2904  0.001094729169089652
-763 2904 -0.02289312059755941
-1646 2904 -56173.00367620957
-164 2905  4.587849448195336e-06
-364 2905 -8.544969360670532e-05
-1409 2905 -17.56919005402462
-1865 2905 -13.66247944369221
-1872 2905 -13.86164060705798
-1889 2905 -14.63977712587374
-1892 2905 -4.158339449315758
-1893 2905 -6.321546398056254
-1894 2905 -7.804363263475375
-1895 2905 -6.58912812924158
-1896 2905 -7.649837605944053
-1897 2905 -7.69701995245717
-1898 2905 -6.086553600399663
-564 2906  4.25003180262438e-06
-764 2906 -8.891414554671908e-05
-1654 2906 -14.86291565563162
-2110 2906 -16.282455472127
-2117 2906 -16.55199492375567
-2134 2906 -12.85873395711873
-2137 2906 -4.68570583775738
-2138 2906 -6.569471674738725
-2139 2906 -7.378806665030903
-2140 2906 -6.726108868932145
-2141 2906 -6.982763315138396
-2142 2906 -7.016413577398498
-2143 2906 -6.151213853839583
-6 2907  174.0181709119047
-165 2907  0.0006376528809592441
-365 2907 -0.01187757040352815
-1402 2907 -1386.423275602331
-49 2908  87.08836569250704
-565 2908  0.0006968247424079914
-765 2908 -0.01457874205479662
-1647 2908 -1798.962366958954
-7 2909  209.3244594426036
-166 2909  0.0005334869199648178
-366 2909 -0.009936857146071412
-1403 2909 -1718.105842836117
-50 2910  194.4659454572846
-566 2910  0.0005043323519647758
-766 2910 -0.01055148350464198
-1648 2910 -1602.154579037757
-8 2911  207.0714997893401
-167 2911  0.00050533641056909
-367 2911 -0.009412379775864633
-1404 2911 -1984.576121024297
-51 2912  196.1842421835629
-567 2912  0.0004814844967328351
-767 2912 -0.01007346783371174
-1649 2912 -1805.387244919989
-9 2913  207.1774742583954
-168 2913  0.0005289936433977529
-368 2913 -0.009853159585355355
-1405 2913 -1772.16179742598
-52 2914  192.0789529667144
-568 2914  0.000498175840325201
-768 2914 -0.01042267889641481
-1650 2914 -1672.560616497262
-10 2915  211.1161536132544
-169 2915  0.0005052051026997433
-369 2915 -0.009409866734849158
-1406 2915 -1941.832300205852
-53 2916  198.0844573442002
-569 2916  0.0004883751107654518
-769 2916 -0.01021762623013111
-1651 2916 -1736.681418547389
-11 2917  210.6281765088935
-170 2917  0.000504438897809328
-370 2917 -0.0093956000093805
-1407 2917 -1948.610857464787
-54 2918  197.7267055670312
-570 2918  0.0004874751788934755
-770 2918 -0.0101988030095205
-1652 2918 -1744.38334675991
-12 2919  210.3058251130699
-171 2919  0.0005374455412895908
-371 2919 -0.01001058661641098
-1408 2919 -1660.153292834334
-55 2920  194.7139670039287
-571 2920  0.0005051537934849839
-771 2920 -0.01056867448637758
-1653 2920 -1578.337187890964
-172 2921  0.0002525862906039789
-372 2921 -0.004704700041595145
-1410 2921 -1160.143762481141
-1871 2921 -1338.557384720627
-572 2922  0.0001984180754674663
-772 2922 -0.004149701089090458
-1655 2922 -1618.963465864502
-2116 2922 -1351.12522936696
-173 2923  9.934430498701247e-06
-373 2923 -0.0001848018496588619
-1416 2923 -27.80845944515527
-1901 2923 -25.58784425527002
-1902 2923 -11.88075702335064
-1903 2923 -54.03709490860278
-1904 2923 -49.0029739474849
-1905 2923 -34.74349045619399
-573 2924  1.97916651121529e-05
-773 2924 -0.0004140519179016492
-1661 2924 -135.998061396762
-2146 2924 -9.759370161367945
-2147 2924 -3.824469467733722
-2148 2924 -22.0316225896654
-2149 2924 -19.89518259071058
-2150 2924 -93.26783624734638
-13 2925  5.68042322902059
-174 2925  0.001278228660117016
-374 2925 -0.02377781766418163
-1411 2925 -8636.32260883259
-56 2926  102.7471989517859
-574 2926  0.0008873951493023308
-774 2926 -0.018565803370506
-1656 2926 -750.1977888434145
-14 2927  7.273812219891007
-175 2927  0.001575520226438138
-375 2927 -0.02930808378764303
-1412 2927 -4186.612833750586
-57 2928  92.37138271390904
-575 2928  0.001156885426737085
-775 2928 -0.02420400608865609
-1657 2928 -454.349588508947
-15 2929  4.409102144996014
-176 2929  0.00103799158743649
-376 2929 -0.01930888852136976
-1413 2929 -18149.83069473371
-58 2930  190.7684806838747
-576 2930  0.0006093535781038161
-776 2930 -0.0127487047039688
-1658 2930 -937.8974042315033
-16 2931  4.550892940867493
-177 2931  0.00106627247492575
-377 2931 -0.01983496473725442
-1414 2931 -16487.63935391007
-59 2932  188.8552198757506
-577 2932  0.0006241449559768249
-777 2932 -0.01305815905236744
-1659 2932 -889.0967633151976
-178 2933  0.000261841749692465
-378 2933 -0.004879689278140042
-1417 2933 -1197.091400538973
-1418 2933 -1307.168528785002
-578 2934  0.0002261787363921484
-778 2934 -0.004729137568385933
-1662 2934 -1255.707173660711
-1663 2934 -1468.90019029828
-179 2935  6.472756195288161e-05
-379 2935 -0.001206424923433159
-1384 2935 -373.0009736238315
-1420 2935 -455.3580237130959
-1909 2935 -215.9521258687181
-579 2936  5.786973344046168e-05
-779 2936 -0.001210195935425439
-1629 2936 -410.9259659918831
-1665 2936 -459.6038371669388
-2154 2936 -214.5852170173555
-180 2937  0.0002575738198962803
-380 2937 -0.00480049537954443
-1419 2937 -1132.012822917658
-1421 2937 -1554.218213369194
-580 2938  0.0002266107658869627
-780 2938 -0.004739384227582458
-1664 2938 -1293.847311903297
-1666 2938 -1545.450519903304
-181 2939  7.772147814560617e-05
-381 2939 -0.001447510061668577
-1356 2939 -263.4733430658865
-1422 2939 -297.8083722314928
-1423 2939 -318.3099272773828
-581 2940  6.870255487037702e-05
-781 2940 -0.001436061405957997
-1601 2940 -256.0588790788852
-1667 2940 -367.6331396885809
-1668 2940 -298.4479790314356
-182 2941  0.0002699459866081826
-382 2941 -0.005027041570114009
-1424 2941 -1225.44512859976
-1855 2941 -1269.238362620525
-582 2942  0.0002165795532411714
-782 2942 -0.004527229250447725
-1669 2942 -1495.777176495769
-2100 2942 -1408.138756167021
-183 2943  0.000230584620055753
-383 2943 -0.004289483096781133
-1425 2943 -1341.319488664768
-1848 2943 -1620.739611914232
-583 2944  0.000236616467506408
-783 2944 -0.004947831503020766
-1670 2944 -1533.793391362161
-2093 2944 -1205.4064202328
-184 2945  3.113361531407949e-05
-384 2945 -0.0005788988446378421
-1426 2945 -92.63800029624825
-1428 2945 -105.1073406416851
-1800 2945 -118.4060011955383
-1917 2945 -162.0141549217757
-584 2946  2.680443331181534e-05
-784 2946 -0.0005605482647067403
-1671 2946 -153.1942449175083
-1673 2946 -106.302999456909
-2045 2946 -117.5623798133825
-2162 2946 -114.4793517963469
-185 2947  0.0002117592014781903
-385 2947 -0.003942330393462493
-1919 2947 -1156.779939313139
-1920 2947 -2303.443736143837
-585 2948  0.0001816346318888309
-785 2948 -0.003799635403101938
-2164 2948 -1222.524005166853
-2165 2948 -2603.281852933697
-186 2949  3.083953604217996e-05
-386 2949 -0.0005740898096234878
-1429 2949 -115.8710614082713
-1431 2949 -115.6732086865693
-1433 2949 -119.2838530078724
-1922 2949 -121.4320613630403
-586 2950  2.52662353434824e-05
-786 2950 -0.0005285298443190662
-1674 2950 -133.2986622760959
-1676 2950 -121.2535428786183
-1678 2950 -126.2373785289777
-2167 2950 -121.8957475389813
-187 2951  2.169737691949768e-05
-387 2951 -0.0004045233295404804
-1437 2951 -71.78900859503443
-1924 2951 -15.33177832110156
-1925 2951 -49.94972952086324
-1926 2951 -54.44165562919285
-1928 2951 -63.79072791858248
-587 2952  3.479200106475429e-05
-787 2952 -0.0007273659114068365
-1682 2952 -177.6359650918144
-2169 2952 -6.24756560403434
-2170 2952 -18.89808564694528
-2171 2952 -20.62915659632829
-2173 2952 -170.023137662953
-17 2953  7.251786459051289
-188 2953  0.001643729524906235
-388 2953 -0.03064584960083465
-1434 2953 -3914.440467798176
-60 2954  43.71332794263182
-588 2954  0.00171173667699996
-788 2954 -0.03581192339833519
-1679 2954 -457.8930633249275
-18 2955  5.370237884159537
-189 2955  0.001272395619536466
-389 2955 -0.02372239346799022
-1435 2955 -9557.969759456002
-61 2956  5.289532905230916
-589 2956  0.002448783148342478
-789 2956 -0.05120231699174218
-1680 2956 -2119.625191287763
-19 2957  5.206824237539474
-190 2957  0.001241043851549547
-390 2957 -0.02313788584603573
-1436 2957 -10416.16918230558
-62 2958  5.212039501524443
-590 2958  0.002415983069022926
-790 2958 -0.05051641835691328
-1681 2958 -2232.468832444119
-191 2959  3.437890099741047e-05
-391 2959 -0.0006403700785130899
-1440 2959 -133.1916354887076
-1811 2959 -116.01415125886
-1831 2959 -120.2021649979784
-1929 2959 -67.64929086151163
-591 2960  3.109764001755754e-05
-791 2960 -0.0006505150403953535
-1685 2960 -119.0962751518446
-2056 2960 -137.1554046617416
-2076 2960 -180.4753155990831
-2174 2960 -45.7592083489813
-20 2961  156.4648269651306
-192 2961  0.0005900815905843416
-392 2961 -0.01099169507116858
-1439 2961 -1904.378289787439
-63 2962  134.487235958624
-592 2962  0.0006372419289340028
-792 2962 -0.01333216968778421
-1684 2962 -1488.996464674805
-193 2963  0.0002533575753207606
-393 2963 -0.004721519459519589
-1390 2963 -1267.301926818796
-1927 2963 -1338.547491511747
-593 2964  0.0002111507457449779
-793 2964 -0.004413757630275583
-1635 2964 -1420.43303749647
-2172 2964 -1506.397431951823
-194 2965  7.547399481258875e-05
-394 2965 -0.001408304602554578
-1442 2965 -273.7709840087026
-1443 2965 -304.386875196408
-1931 2965 -281.744969252588
-594 2966  6.036731193060422e-05
-794 2966 -0.001262348984622675
-1687 2966 -303.9341121893127
-1688 2966 -301.6071311890342
-2176 2966 -347.1985023913132
-195 2967  0.0002452042454861101
-395 2967 -0.004561366275467525
-1444 2967 -1367.085437280008
-1884 2967 -1299.363195504282
-595 2968  0.0002097250549737993
-795 2968 -0.0043867077924628
-1689 2968 -1507.145374398769
-2129 2968 -1412.362136880344
-196 2969  0.0002328687727934642
-396 2969 -0.004331722370505195
-1446 2969 -1935.598728461542
-1935 2969 -1143.36710400461
-596 2970  0.0002081199033744698
-796 2970 -0.004353179353834643
-1691 2970 -2094.452122116551
-2180 2970 -1166.753340022242
-197 2971  0.0002595392277337602
-397 2971 -0.004827687097720198
-1445 2971 -1299.463572095045
-1447 2971 -1393.724096630088
-597 2972  0.000227180506082851
-797 2972 -0.004751796082881135
-1690 2972 -1443.035879728521
-1692 2972 -1438.401804291923
-198 2973  1.924427533902808e-05
-398 2973 -0.000358519151350955
-1448 2973 -51.11619948659765
-1449 2973 -56.91674680233886
-1450 2973 -54.13080652643053
-1850 2973 -64.84305238357298
-1907 2973 -53.49842881762557
-598 2974  1.541997157145382e-05
-798 2974 -0.0003224467177012544
-1693 2974 -68.68887955759087
-1694 2974 -65.35317307855226
-1695 2974 -49.99682031538818
-2095 2974 -63.00144864963377
-2152 2974 -51.36676192902856
-199 2975  1.845055124298193e-05
-399 2975 -0.0003430998893339208
-1452 2975 -39.11433892927088
-1453 2975 -69.39344058325088
-1801 2975 -60.96288613393003
-1916 2975 -49.97345032394828
-1941 2975 -85.32588697691688
-599 2976  1.355929089197479e-05
-799 2976 -0.0002835617900106833
-1697 2976 -102.5526269566786
-1698 2976 -52.11952350302194
-2046 2976 -56.76693463695905
-2161 2976 -66.74817920758088
-2186 2976 -54.00562981105288
-200 2977  1.271450383568884e-05
-400 2977 -0.000237119492924652
-1370 2977 -55.1131720540257
-1457 2977 -45.6431060305684
-1875 2977 -59.31030352171324
-1944 2977 -29.11525814204184
-1945 2977 -16.87783507179551
-1946 2977 -29.27401382993264
-600 2978  8.800095713229208e-06
-800 2978 -0.0001839880046561397
-1615 2978 -49.91127100320831
-1702 2978 -51.84652767986045
-2120 2978 -63.98358437997059
-2189 2978 -29.28485320635267
-2190 2978 -20.84064754660397
-2191 2978 -36.23593022245935
-201 2979  3.429173053633755e-05
-401 2979 -0.0006394846353445878
-1454 2979 -94.18004615832454
-1458 2979 -95.82820923136651
-1878 2979 -137.220649291519
-1886 2979 -119.2628629587167
-601 2980  2.42057119496486e-05
-801 2980 -0.0005060858793490622
-1699 2980 -106.6011780216583
-1703 2980 -118.3393931588882
-2123 2980 -156.3984297145197
-2131 2980 -126.3034594719376
-88 2981  0.5765945406640466
-202 2981  0.004704415141495417
-402 2981 -0.08776236008052875
-1455 2981 -6186.640086873116
-602 2982  0.001228820363981551
-802 2982 -0.0256910783553563
-1700 2982 -43002.51911591569
-89 2983  0.5085834639940715
-203 2983  0.004200021129835351
-403 2983 -0.07834503334779185
-1456 2983 -9258.546324954847
-603 2984  0.001028630443507459
-803 2984 -0.02150584016679073
-1701 2984 -64484.86655730321
-204 2985  0.0002910577290827514
-404 2985 -0.005425436839739358
-1948 2985 -1179.473328514326
-1949 2985 -1112.363081683879
-604 2986  0.0001942101919334908
-804 2986 -0.004060328656766309
-2193 2986 -1519.09897684033
-2194 2986 -1656.528852030899
-205 2987  7.220011983011933e-05
-405 2987 -0.00134475543576407
-1460 2987 -294.7077344824907
-1461 2987 -297.7314148715081
-1952 2987 -329.5972945308118
-605 2988  6.122274390669451e-05
-805 2988 -0.001279590719005279
-1705 2988 -297.709055530353
-1706 2988 -331.8369949896509
-2197 2988 -355.4846212830439
-206 2989  0.000224096727052125
-406 2989 -0.004167960315020639
-1415 2989 -1403.69572640005
-1463 2989 -1319.761941130769
-606 2990  0.0002174236966432122
-806 2990 -0.004548139563342544
-1660 2990 -1547.059702964963
-1708 2990 -1131.21152477239
-207 2991  0.000290626767416135
-407 2991 -0.005408777291702704
-1331 2991 -1111.645130144312
-1954 2991 -1246.529432769099
-607 2992  0.0002089999771376453
-807 2992 -0.004369780664630857
-1576 2992 -1314.488490707493
-2199 2992 -1753.0575043452
-208 2993  0.0002284567159346352
-408 2993 -0.004248277560160953
-1451 2993 -1694.45394947356
-1465 2993 -1333.079786610828
-608 2994  0.0002225826697345396
-808 2994 -0.004654857896170716
-1696 2994 -1526.042604103454
-1710 2994 -1387.470519821244
-209 2995  0.0002221752697164552
-409 2995 -0.004138430725600249
-1467 2995 -1277.459248023892
-1956 2995 -1643.815562542815
-609 2996  0.0002239401818748291
-809 2996 -0.0046832586311171
-1712 2996 -1227.52274774633
-2201 2996 -1506.533916325817
-210 2997  2.138755265132116e-05
-410 2997 -0.0003986119327895535
-1876 2997 -95.11637636344641
-1958 2997 -65.95449814158825
-1959 2997 -8.723987554335546
-1960 2997 -43.6549692815422
-1961 2997 -183.7758384048604
-610 2998  1.553974479996341e-05
-810 2998 -0.0003248647658803822
-2121 2998 -96.50997380438551
-2203 2998 -52.15381884035941
-2204 2998 -18.93525889233717
-2205 2998 -80.38564175531182
-2206 2998 -91.28444390040092
-211 2999  3.369714148784347e-05
-411 2999 -0.0006277791128440753
-1468 2999 -113.8659171919261
-1472 2999 -125.6929813506389
-1914 2999 -102.6354740822629
-1950 2999 -119.8659568390688
-611 3000  2.716807484438463e-05
-811 3000 -0.0005678888436367579
-1713 3000 -137.0837482046144
-1717 3000 -119.1218935338928
-2159 3000 -126.9054310671548
-2195 3000 -111.9988300228975
-90 3001  0.6653230544623372
-212 3001  0.005390424631237284
-412 3001 -0.1004752481225429
-1469 3001 -3403.375662059264
-612 3002  0.001917255257562923
-812 3002 -0.04008099176668933
-1714 3002 -14912.84496887192
-91 3003  0.4974529583688729
-213 3003  0.00413020803099228
-413 3003 -0.07698122934126954
-1470 3003 -9244.38449451425
-613 3004  0.001120828337875203
-813 3004 -0.02343137807364655
-1715 3004 -51511.86319008204
-214 3005  0.0002345475086669507
-414 3005 -0.004363396788473575
-1963 3005 -1123.060704870721
-1964 3005 -1962.678855914129
-614 3006  0.0002241843674551126
-814 3006 -0.004688747582473832
-2208 3006 -1016.038939862732
-2209 3006 -2116.689327493415
-215 3007  3.152944354031921e-05
-415 3007 -0.0005865256311667257
-1473 3007 -103.5178172597545
-1475 3007 -193.3276841043456
-1476 3007 -102.3339494307426
-1942 3007 -88.67226391527699
-615 3008  2.762658094449223e-05
-815 3008 -0.000577763345796909
-1718 3008 -120.7045050005255
-1720 3008 -80.20121388753263
-1721 3008 -113.9874410285411
-2187 3008 -185.2524967505786
-216 3009  7.021648346571437e-05
-416 3009 -0.00130599598075002
-1478 3009 -257.0424342781794
-1479 3009 -364.6469827985827
-1967 3009 -305.81056344102
-616 3010  6.07496359260314e-05
-816 3010 -0.001270615822728721
-1723 3010 -296.6587145617269
-1724 3010 -347.8089509424088
-2212 3010 -324.405753550522
-217 3011  0.0002629900627038044
-417 3011 -0.004890840474051387
-1790 3011 -1400.763427944943
-1882 3011 -1136.647709911751
-617 3012  0.0002239249658143049
-817 3012 -0.004682511701431127
-2035 3012 -1063.361714528966
-2127 3012 -1809.384404896597
-218 3013  0.0002593440111466169
-418 3013 -0.004824851841231687
-1822 3013 -1148.338107851003
-1915 3013 -1535.226081909187
-618 3014  0.000236317776079967
-818 3014 -0.004941326446914052
-2067 3014 -1239.624856043634
-2160 3014 -1513.186311996676
-219 3015  3.244177482910733e-05
-419 3015 -0.0006055799728042041
-1481 3015 -131.7144595143791
-1814 3015 -116.1043230776163
-1970 3015 -79.95166156859864
-1972 3015 -159.5652172094772
-619 3016  2.356441270887905e-05
-819 3016 -0.0004928617663929631
-1726 3016 -145.4276641880034
-2059 3016 -164.7201130197417
-2215 3016 -67.84254400099019
-2217 3016 -193.8165634075667
-220 3017  7.768080563995795e-05
-420 3017 -0.001449863056352223
-1373 3017 -354.9809878986568
-1441 3017 -293.7957542138436
-1480 3017 -225.3379392805973
-620 3018  6.37038810105057e-05
-820 3018 -0.001332244617993727
-1618 3018 -335.793574863611
-1686 3018 -356.9125375460848
-1725 3018 -253.3858185062365
-221 3019  7.320703537932268e-06
-421 3019 -0.0001362209906944794
-1487 3019 -34.60754748070792
-1936 3019 -32.18487755999478
-1973 3019 -24.21142044251713
-1974 3019 -16.17329708469773
-1975 3019 -16.79039672305305
-1976 3019 -19.18063033824095
-1978 3019 -58.99846518694028
-621 3020  6.279195576451717e-06
-821 3020 -0.0001313598373684713
-1732 3020 -56.09349534828303
-2181 3020 -32.32185579454295
-2218 3020 -19.59324371202498
-2219 3020 -19.41412540093749
-2220 3020 -18.56084480886999
-2221 3020 -16.81796495425821
-2223 3020 -44.64235389450946
-222 3021  1.858157080718637e-05
-422 3021 -0.0003457132849898687
-1301 3021 -62.98405706157925
-1344 3021 -60.13908113984031
-1483 3021 -70.31286063739829
-1489 3021 -58.90809499914865
-1804 3021 -55.21857342172993
-622 3022  1.580870884158368e-05
-822 3022 -0.0003306921046308134
-1546 3022 -60.9315937244375
-1589 3022 -60.74879639706707
-1728 3022 -88.35690904464344
-1734 3022 -57.28389550104841
-2049 3022 -54.48808992798245
-21 3023  355.1643329947775
-223 3023  0.000414588079576942
-423 3023 -0.007716759589340684
-1484 3023 -1887.480809470994
-64 3024  373.243500456582
-623 3024  0.000365824983912368
-823 3024 -0.007653672082923691
-1729 3024 -1808.976355162421
-22 3025  371.7856140819339
-224 3025  0.0004090754753228922
-424 3025 -0.007612824341018029
-1485 3025 -1862.905571445791
-65 3026  423.2601696305773
-624 3026  0.0003753799864904563
-824 3026 -0.007853582503672928
-1730 3026 -1529.723973180399
-23 3027  359.0699905626743
-225 3027  0.0003948065156009082
-425 3027 -0.007347018696585064
-1486 3027 -2089.757450012758
-66 3028  332.6331657750853
-625 3028  0.0003868371757116728
-825 3028 -0.008093282368846372
-1731 3028 -1878.631462550398
-226 3029  1.605819002986071e-05
-426 3029 -0.0002992883659006699
-1471 3029 -129.5729114917453
-1491 3029 -55.77574949647519
-1877 3029 -62.65563298356832
-1947 3029 -57.88255988036589
-1980 3029 -39.32522531591619
-626 3030  1.347322955615387e-05
-826 3030 -0.0002816718078576912
-1716 3030 -77.01686009671205
-1736 3030 -53.24994360811269
-2122 3030 -85.04968168489033
-2192 3030 -94.41087219520271
-2225 3030 -38.53172034872582
-227 3031  0.0002744085827181243
-427 3031 -0.005113009743909019
-1490 3031 -1148.685319889543
-1951 3031 -1462.759816234802
-627 3032  0.000232641750217292
-827 3032 -0.004862970717328313
-1735 3032 -1284.461857125257
-2196 3032 -1596.142391375567
-228 3033  0.000315555720187749
-428 3033 -0.005879952690476989
-1459 3033 -894.1445427962663
-1492 3033 -1177.236190688291
-628 3034  0.0001758353255510753
-828 3034 -0.003676111486126634
-1704 3034 -2616.969987642703
-1737 3034 -1083.556762405448
-229 3035  0.0002451618647443908
-429 3035 -0.004558675471144818
-1315 3035 -1421.489364558306
-1477 3035 -1333.404649458178
-629 3036  0.0001960485134623598
-829 3036 -0.004100030848673585
-1560 3036 -1970.719501164815
-1722 3036 -1307.863076710002
-230 3037  0.0002235717635631153
-430 3037 -0.004159338793200366
-1856 3037 -1119.776904764913
-1965 3037 -1984.935281822672
-630 3038  0.0002387670275501264
-830 3038 -0.004991865811779856
-2101 3038 -1679.799948366659
-2210 3038 -1043.800098785419
-231 3039  6.355058706178921e-06
-431 3039 -0.0001182663255270315
-1474 3039 -27.2211092658337
-1977 3039 -28.87681478961062
-1981 3039 -19.26799730716725
-1983 3039 -24.40551081717152
-1984 3039 -34.66205988832636
-1985 3039 -39.08905611715512
-1986 3039 -25.73482602625847
-631 3040  5.168868543624866e-06
-831 3040 -0.0001081325281897822
-1719 3040 -23.57121050397152
-2222 3040 -44.43020643761637
-2226 3040 -18.52397709846915
-2228 3040 -20.32550666111602
-2229 3040 -31.17736956735129
-2230 3040 -30.83346999517691
-2231 3040 -42.77542700610918
-232 3041  1.761027469906141e-05
-432 3041 -0.000327730907480755
-1395 3041 -59.68238994762429
-1493 3041 -81.51090935305866
-1497 3041 -51.28046095625373
-1498 3041 -62.49294649216096
-1968 3041 -49.74533438256115
-632 3042  1.571190151769112e-05
-832 3042 -0.0003286879780215192
-1640 3042 -66.88919874628864
-1738 3042 -84.06953056548426
-1742 3042 -59.09372182667386
-1743 3042 -44.87012523624564
-2213 3042 -56.95477588403464
-24 3043  301.804028950772
-233 3043  0.0003339577621497702
-433 3043 -0.006215092193261671
-1494 3043 -3724.14417902935
-67 3044  634.1949520085401
-633 3044  0.000264159408233615
-833 3044 -0.005526660401051663
-1739 3044 -2254.257229613936
-25 3045  293.7476901038987
-234 3045  0.0003263861015757376
-434 3045 -0.00607418045573903
-1495 3045 -4064.969791447917
-68 3046  629.0610030966138
-634 3046  0.0002540803250229119
-834 3046 -0.005315791513777863
-1740 3046 -2504.604832410719
-235 3047  0.000261318627407984
-435 3047 -0.004863454220574115
-1795 3047 -1328.059271078283
-1842 3047 -1353.938445456118
-635 3048  0.000215751906695563
-835 3048 -0.004512686485115116
-2040 3048 -1507.095428153633
-2087 3048 -1528.913791925641
-236 3049  0.0002593250911130165
-436 3049 -0.00483017161704053
-1499 3049 -1119.511295530479
-1500 3049 -1470.147478176272
-636 3050  0.0002173382328317587
-836 3050 -0.004545857556180593
-1744 3050 -1236.100717223117
-1745 3050 -1660.641795945935
-237 3051  0.0002680207291845583
-437 3051 -0.004986977865485246
-1883 3051 -1335.596838688865
-1989 3051 -1142.59645115147
-637 3052  0.0002149933824167279
-837 3052 -0.004495843450029012
-2128 3052 -1583.280905696588
-2234 3052 -1303.604343599536
-238 3053  0.0002673059275966173
-438 3053 -0.004972539521710323
-1835 3053 -1297.830937438026
-1987 3053 -1286.699352558668
-638 3054  0.0002247913073730503
-838 3054 -0.004701903383526127
-2080 3054 -1446.166309523558
-2232 3054 -1428.515318978645
-239 3055  3.086294218884017e-05
-439 3055 -0.0005739031613858375
-1501 3055 -94.94610178907207
-1502 3055 -120.1015515613789
-1806 3055 -130.8728852516794
-1943 3055 -127.4445013018118
-639 3056  2.871344757710019e-05
-839 3056 -0.0006004851677575757
-1746 3056 -109.9588006974711
-1747 3056 -129.8071459044353
-2051 3056 -105.6218717573673
-2188 3056 -128.594774536177
-240 3057  0.0002328109278869417
-440 3057 -0.004328790316093996
-1427 3057 -1792.17746207041
-1937 3057 -1209.039048347723
-640 3058  0.0002236223011459347
-840 3058 -0.004676543895854219
-1672 3058 -1452.412581790726
-2182 3058 -1436.006295719475
-241 3059  0.0002641904594502731
-441 3059 -0.004925256200277052
-1908 3059 -1319.134352142724
-1932 3059 -1197.283781547747
-641 3060  0.0002168929556922801
-841 3060 -0.004534651717942856
-2153 3060 -1384.758308839667
-2177 3060 -1480.003424652446
-242 3061  0.0002683021695836888
-442 3061 -0.00499437409528073
-1982 3061 -1020.895360476986
-1993 3061 -1545.590542610641
-642 3062  0.000214433397151115
-842 3062 -0.004483357193906658
-2227 3062 -1350.897822356165
-2238 3062 -1591.552719889067
-243 3063  0.0002456464353693151
-443 3063 -0.004571499010522023
-1504 3063 -1281.197517233999
-1995 3063 -1425.924800186832
-643 3064  0.0002118857100087749
-843 3064 -0.004431863029243509
-1749 3064 -1380.279131732715
-2240 3064 -1561.56620828576
-244 3065  8.679696529177102e-05
-444 3065 -0.001617281983965069
-1507 3065 -319.687894463376
-1912 3065 -342.1330151030756
-1996 3065 -171.6721801828181
-644 3066  6.668929891919444e-05
-844 3066 -0.001393861160763459
-1752 3066 -323.4191825769936
-2157 3066 -354.5380781683582
-2241 3066 -238.5976480186172
-92 3067  0.5674518919590757
-245 3067  0.004650627025322416
-445 3067 -0.08668664125278241
-1506 3067 -6333.352783592504
-645 3068  0.001216300194894553
-845 3068 -0.02542108586614161
-1751 3068 -43193.72331192907
-246 3069  0.0002662216516774879
-446 3069 -0.004948899205940827
-1802 3069 -1401.226384116477
-1991 3069 -1245.08530407766
-646 3070  0.0002291693448236669
-846 3070 -0.00479207219007637
-2047 3070 -1547.903118801705
-2236 3070 -1333.656774685993
-247 3071  7.51138059276294e-06
-447 3071 -0.0001398607764914253
-1430 3071 -45.05797616799539
-1815 3071 -31.29687216661689
-1998 3071 -31.13094626304603
-1999 3071 -3.550926574038505
-2000 3071 -13.48124907748484
-2001 3071 -13.95763763671764
-2002 3071 -14.40960272507699
-2003 3071 -14.58499342652336
-647 3072  6.287606542843668e-06
-847 3072 -0.0001315370479632399
-1675 3072 -48.83113539159432
-2060 3072 -33.81724303530061
-2243 3072 -32.29231243943666
-2244 3072 -4.08462782159169
-2245 3072 -13.92596672443947
-2246 3072 -14.22224897443281
-2247 3072 -12.94220849005653
-2248 3072 -12.95883236672648
-248 3073  3.004103046734714e-05
-448 3073 -0.0005593298618830121
-1508 3073 -156.9536240366783
-1921 3073 -113.7824108068532
-2004 3073 -105.446862001134
-2005 3073 -106.4028375448804
-648 3074  2.413112609046681e-05
-848 3074 -0.0005048145130488554
-1753 3074 -182.1552338439317
-2166 3074 -114.9637094411854
-2249 3074 -103.0305453695494
-2250 3074 -123.3658773366547
-26 3075  48.14115697103193
-249 3075  0.001207340810307569
-449 3075 -0.02248546792889855
-1509 3075 -1386.841317910654
-69 3076  93.45345366175701
-649 3076  0.000863574471068328
-849 3076 -0.01806741737098674
-1754 3076 -1021.427694239394
-27 3077  298.8536823264385
-250 3077  0.0004614895303912071
-450 3077 -0.008593508936690492
-1510 3077 -1652.528923853896
-70 3078  332.535475221442
-650 3078  0.0004260522383241782
-850 3078 -0.008913735735757969
-1755 3078 -1356.306668125053
-28 3079  280.2402751866119
-251 3079  0.0004630428688442491
-451 3079 -0.008622952094502315
-1511 3079 -1759.776539672839
-71 3080  306.4518132610639
-651 3080  0.0004160262001499919
-851 3080 -0.008703965704582829
-1756 3080 -1550.501925752894
-29 3081  292.0666383956488
-252 3081  0.0004532491959284241
-452 3081 -0.008440059637270876
-1512 3081 -1762.230597412185
-72 3082  330.915251155541
-652 3082  0.0004138991064746598
-852 3082 -0.008659467467314986
-1757 3082 -1461.513220882238
-30 3083  291.2499991968344
-253 3083  0.0004521454893938125
-453 3083 -0.008419507258893049
-1513 3083 -1778.766172586407
-73 3084  330.1517854407101
-653 3084  0.0004130347731292516
-853 3084 -0.008641380035250229
-1758 3084 -1474.31463330485
-254 3085  5.598515203854433e-05
-454 3085 -0.001041762786317972
-1488 3085 -657.2647720227772
-1496 3085 -337.815695541272
-2006 3085 -198.5952483373434
-654 3086  4.565100404794502e-05
-854 3086 -0.0009550099367320633
-1733 3086 -467.2993939817052
-1741 3086 -582.1294820043491
-2251 3086 -212.104160974466
-255 3087  0.0002533575753207605
-455 3087 -0.004714351957937914
-1516 3087 -1214.20785479098
-1906 3087 -1275.123506533136
-655 3088  0.0001830830851796531
-855 3088 -0.003830083690845991
-1761 3088 -1356.93253093293
-2151 3088 -1875.357477626669
-256 3089  2.985054127319469e-05
-456 3089 -0.0005559936336606194
-1322 3089 -100.9659996956966
-1336 3089 -102.1576650288912
-1832 3089 -116.1443434772085
-1899 3089 -157.9270743656402
-656 3090  2.73993020604243e-05
-856 3090 -0.0005731648617574944
-1567 3090 -115.1682425790467
-1581 3090 -110.9830147806093
-2077 3090 -117.3115075315568
-2144 3090 -131.9375715502208
-257 3091  6.412379987319553e-05
-457 3091 -0.001192950010299268
-1359 3091 -295.1726851067902
-1397 3091 -464.5939628653576
-1879 3091 -241.1777446051989
-657 3092  6.664285539255191e-05
-857 3092 -0.001393809984053777
-1604 3092 -315.137283850678
-1642 3092 -216.0334906512935
-2124 3092 -404.166050448459
-258 3093  5.25703542360426e-05
-458 3093 -0.0009782249711306297
-1398 3093 -222.6965047597604
-1518 3093 -281.3424527503379
-2007 3093 -700.4947699391313
-658 3094  6.691396143939732e-05
-858 3094 -0.001399882851998593
-1643 3094 -330.3011979043866
-1763 3094 -471.7651348192199
-2252 3094 -160.9760242098117
-31 3095  3.449093081313446
-259 3095  0.0009048698354838305
-459 3095 -0.01683773791286734
-1517 3095 -35275.65056794864
-74 3096  478.2477416883492
-659 3096  0.0004190976423508806
-859 3096 -0.008768229606671188
-1762 3096 -922.2752719137213
-260 3097  0.0002540711174143024
-460 3097 -0.004728474636470728
-1503 3097 -1453.267111123336
-1966 3097 -1219.03743806919
-660 3098  0.0002170672928360428
-860 3098 -0.004538751110163672
-1748 3098 -1629.841174651477
-2211 3098 -1306.498645974566
-261 3099  4.246187106963387e-06
-461 3099 -7.909564314661077e-05
-1466 3099 -14.58657917675677
-1526 3099 -10.68753553673826
-1851 3099 -16.10476514667766
-2009 3099 -12.04655336420402
-2010 3099 -12.11364270258297
-2011 3099 -8.22902423866134
-2012 3099 -9.209673704385494
-2013 3099 -7.169396300539337
-2014 3099 -7.905394811720712
-2015 3099 -6.913440311217936
-661 3100  9.006559059406882e-06
-861 3100 -0.0001883669172606672
-1711 3100 -33.99171600412565
-1771 3100 -39.8313288230236
-2096 3100 -33.35710566176917
-2254 3100 -6.210870714987588
-2255 3100 -6.513390408795219
-2256 3100 -4.640420835507132
-2257 3100 -4.738916599895016
-2258 3100 -4.118661769723557
-2259 3100 -4.457141098502226
-2260 3100 -3.898893283827271
-32 3101  4.629564668789765
-262 3101  0.001114727656952293
-462 3101 -0.02076453503788485
-1519 3101 -15081.1485121053
-75 3102  7.67469753523842
-662 3102  0.001934718172261428
-862 3102 -0.04046801465160255
-1764 3102 -2477.242565758394
-33 3103  4.560258670279392
-263 3103  0.001101692355154358
-463 3103 -0.02052180865954734
-1520 3103 -16322.74803150922
-76 3104  227.0111876369949
-663 3104  0.0006652306770844735
-863 3104 -0.013917741230942
-1765 3104 -695.4491731006095
-34 3105  5.09241050532172
-264 3105  0.001203469082965028
-464 3105 -0.02241755093136628
-1521 3105 -11661.81866319783
-77 3106  9.553096348683425
-664 3106  0.001967551209388498
-864 3106 -0.04115730664256551
-1766 3106 -1877.352469934645
-35 3107  5.010651825296775
-265 3107  0.001189571893627759
-465 3107 -0.02215869240460923
-1522 3107 -12071.07001909268
-78 3108  8.110850832086975
-665 3108  0.002041158694041396
-865 3108 -0.04269440434322424
-1767 3108 -2058.913294513919
-36 3109  5.242946874727973
-266 3109  0.001235207912293277
-466 3109 -0.0230087751155384
-1523 3109 -10603.54130606578
-79 3110  10.39623748337741
-666 3110  0.00198259809923097
-866 3110 -0.04147286882309242
-1768 3110 -1669.969527065347
-37 3111  5.144271915952205
-267 3111  0.001213907246388981
-467 3111 -0.02261199800075029
-1524 3111 -11318.43109210416
-80 3112  9.643129255484244
-667 3112  0.001984600471653515
-867 3112 -0.04151398331184361
-1769 3112 -1822.079812004944
-38 3113  5.301254453954041
-268 3113  0.001248064342159848
-468 3113 -0.02324824654993116
-1525 3113 -10195.65103652299
-81 3114  9.910899138700378
-668 3114  0.002041914114836727
-868 3114 -0.04271285292823011
-1770 3114 -1642.48231480523
-269 3115  0.0002455808494513976
-469 3115 -0.004574455857323951
-1527 3115 -1395.812439078742
-2018 3115 -1319.125546000257
-669 3116  0.0002175189436278616
-869 3116 -0.004549463760823265
-1772 3116 -1453.855216625808
-2263 3116 -1422.759825760848
-270 3117  0.0002583346492311023
-470 3117 -0.004812246867113213
-2019 3117 -1426.386550071892
-2020 3117 -1051.60330096319
-670 3118  0.0002195538691403081
-870 3118 -0.004591087669232101
-2264 3118 -1285.913026873232
-2265 3118 -1416.901510606822
-271 3119  0.0002662424078920341
-471 3119 -0.004960285432200295
-1531 3119 -1116.146860423355
-1532 3119 -1395.069189494207
-671 3120  0.0002056555622572706
-871 3120 -0.004300400914613589
-1776 3120 -1704.53913758438
-1777 3120 -1328.68635440853
-272 3121  8.403976013422934e-05
-472 3121 -0.001566447111419446
-1533 3121 -239.1182110177501
-1938 3121 -293.3131463113477
-2021 3121 -273.2060828791963
-672 3122  5.456576583106384e-05
-872 3122 -0.001141007317637787
-1778 3122 -324.8318137854403
-2183 3122 -313.4339444900776
-2266 3122 -374.8291340849071
-273 3123  0.0002445586324395708
-473 3123 -0.004555614778039736
-1957 3123 -1425.284446253262
-2022 3123 -1251.592013646793
-673 3124  0.0002265618216088804
-873 3124 -0.004737768664129753
-2202 3124 -1232.001050341556
-2267 3124 -1495.038796569758
-274 3125  0.0002479871234677247
-474 3125 -0.004615638198090777
-1432 3125 -1440.733981857976
-1994 3125 -1288.009326238795
-674 3126  0.0002149306512683066
-874 3126 -0.004495757699438218
-1677 3126 -1567.10699891207
-2239 3126 -1384.784060943572
-275 3127  7.669511520069878e-05
-475 3127 -0.00142856435158415
-1348 3127 -264.9193537345231
-1391 3127 -338.2005407012112
-1997 3127 -272.0493857089738
-675 3128  6.434492811419656e-05
-875 3128 -0.00134479006259534
-1593 3128 -307.2139916977106
-1636 3128 -310.2635904187793
-2242 3128 -316.8133947953895
-276 3129  6.873824789383105e-05
-476 3129 -0.001281196436983179
-1910 3129 -493.1627696346783
-1971 3129 -298.3132764183199
-2024 3129 -221.2635879075791
-676 3130  5.150654637627023e-05
-876 3130 -0.001077116562284023
-2155 3130 -399.3472933098862
-2216 3130 -456.729207514713
-2269 3130 -272.8671228494698
-277 3131  7.621293902825032e-05
-477 3131 -0.001420343791668607
-1534 3131 -237.9626783531122
-1939 3131 -298.238748377585
-2016 3131 -340.5820452017388
-677 3132  5.617823458389736e-05
-877 3132 -0.001174808882995873
-1779 3132 -374.8764564904459
-2184 3132 -300.1393535459937
-2261 3132 -337.8295892423019
-278 3133  3.384974683660592e-05
-478 3133 -0.0006305375141651542
-1529 3133 -128.0685014752367
-1535 3133 -123.7987667072477
-1536 3133 -78.70049630917389
-1852 3133 -101.7537554994466
-678 3134  2.404372801287854e-05
-878 3134 -0.0005027950584220524
-1774 3134 -92.26802542553942
-1780 3134 -72.56570109333919
-1781 3134 -212.1907537540941
-2097 3134 -149.0690573659064
-279 3135  2.096412860650296e-05
-479 3135 -0.0003901485068014212
-1364 3135 -80.63995640005464
-1539 3135 -77.09008222118388
-2008 3135 -95.10938060848929
-2027 3135 -25.80608281993659
-2028 3135 -27.74274388077859
-679 3136  1.717879208496453e-05
-879 3136 -0.0003593726340029264
-1609 3136 -80.2193526426998
-1784 3136 -84.27337155944021
-2253 3136 -92.01703556881691
-2272 3136 -30.20856579477716
-2273 3136 -28.4454237197186
-39 3137  32.64302008813611
-280 3137  0.001117288914086055
-480 3137 -0.02080039878504665
-1537 3137 -2751.002445075206
-82 3138  154.3592785305395
-680 3138  0.0006055238470906594
-880 3138 -0.01266857423038254
-1782 3138 -1444.925587559849
-40 3139  36.65251244868855
-281 3139  0.001054854527421347
-481 3139 -0.0196355765396314
-1538 3139 -2761.360034218368
-83 3140  111.6726634543674
-681 3140  0.00066417680046111
-881 3140 -0.0138956857388481
-1783 3140 -1698.599903687457
-282 3141  0.0002196191015981265
-482 3141 -0.004090434994215512
-1890 3141 -2057.918514698858
-1940 3141 -972.8769469454667
-682 3142  0.000254445809205194
-882 3142 -0.005321133682834458
-2135 3142 -949.4160584467323
-2185 3142 -1428.191459747777
-283 3143  0.0002467932626555914
-483 3143 -0.004593389096329082
-1505 3143 -1443.098497561728
-1514 3143 -1280.586129651733
-683 3144  0.0002125568346125282
-883 3144 -0.004446072187239606
-1750 3144 -1565.415007126418
-1759 3144 -1396.852787898307
-284 3145  7.450142895904204e-05
-484 3145 -0.001386907437178993
-1464 3145 -253.7591971467399
-1913 3145 -306.5243546068534
-1988 3145 -343.6950631335396
-684 3146  6.56850785054022e-05
-884 3146 -0.001373362866427219
-1709 3146 -430.325567156862
-2158 3146 -289.7082847871828
-2233 3146 -242.1780927811974
-285 3147  2.829020688581102e-05
-485 3147 -0.0005283436953610114
-1482 3147 -138.2283842007703
-1861 3147 -127.9110281261581
-2030 3147 -106.3422401541953
-2031 3147 -166.413571525405
-685 3148  2.157615448738946e-05
-885 3148 -0.0004513182899802171
-1727 3148 -166.2264057254376
-2106 3148 -127.8803524612996
-2275 3148 -98.27206692230219
-2276 3148 -218.9673369039903
-286 3149  3.417957292668527e-05
-486 3149 -0.000638010057411905
-1438 3149 -112.4245790619285
-1540 3149 -131.7196097356496
-1933 3149 -119.1012159433853
-2023 3149 -82.4528305498375
-686 3150  2.58604492209297e-05
-886 3150 -0.0005408106592783898
-1683 3150 -108.0991094828331
-1785 3150 -146.8985247574478
-2178 3150 -108.7083889381549
-2268 3150 -125.4140836404946
-41 3151  0.2816487510175935
-287 3151  0.003283370210303194
-487 3151 -0.06133865450778468
-1541 3151 -45335.1954409973
-84 3152  637.526528274368
-687 3152  0.0002595670747658185
-887 3152 -0.00543058103861438
-1786 3152 -2504.997784199022
-288 3153  0.0002518348855609722
-488 3153 -0.004690277097936834
-1900 3153 -1149.893986737135
-2017 3153 -1429.354425178098
-688 3154  0.0002022003039771082
-888 3154 -0.004228762010147807
-2145 3154 -1668.568919153097
-2262 3154 -1331.391267344377
-289 3155  0.0002411624189112242
-489 3155 -0.004491600758461845
-1930 3155 -1329.895116223397
-2025 3155 -1278.727802451221
-689 3156  0.0002396620483604543
-889 3156 -0.005012410749634397
-2175 3156 -1346.893561357031
-2270 3156 -1141.695109156205
-290 3157  7.539218264852078e-05
-490 3157 -0.001404078290862924
-1349 3157 -233.9428403939642
-1462 3157 -345.0634879409996
-1962 3157 -323.5366796927644
-690 3158  5.947442365670711e-05
-890 3158 -0.001243009510545163
-1594 3158 -349.4178227246429
-1707 3158 -341.395838786864
-2207 3158 -303.9354889996754
-291 3159  1.200272275159141e-05
-491 3159 -0.0002232081554610602
-1302 3159 -36.74905919231615
-1807 3159 -36.18476976809642
-1918 3159 -33.71929484844811
-1955 3159 -40.0865932868005
-1979 3159 -34.20546235418676
-1992 3159 -39.06108881613515
-691 3160  1.034576531382441e-05
-891 3160 -0.0002163685141143933
-1547 3160 -38.42657784269577
-2052 3160 -45.27355828678601
-2163 3160 -33.91929258744634
-2200 3160 -35.70217191761186
-2224 3160 -34.58560243700298
-2237 3160 -38.23756381734963
-292 3161  6.96324664263306e-05
-492 3161 -0.001296142399366137
-1515 3161 -284.5560271729748
-1923 3161 -314.9941849685434
-2029 3161 -312.8351201515704
-692 3162  5.61734866346371e-05
-892 3162 -0.001175077666104349
-1760 3162 -339.9363785417463
-2168 3162 -327.4234482652935
-2274 3162 -335.1570659698852
-293 3163  1.522298845113386e-05
-493 3163 -0.0002836445861316432
-1528 3163 -48.07226215433916
-1911 3163 -53.90619683861826
-1990 3163 -50.03970959544274
-2032 3163 -70.19438117818132
-2033 3163 -78.02760680056846
-693 3164  2.097430373839078e-05
-893 3164 -0.0004387420733795854
-1773 3164 -88.07077665502878
-2156 3164 -94.29553748866022
-2235 3164 -102.3315584238236
-2277 3164 -22.04467690843927
-2278 3164 -19.15705378024538
-42 3165  4.071309785349727
-294 3165  0.001033902935611
-494 3165 -0.01926436789258804
-1542 3165 -21344.46684810789
-85 3166  351.8137876203253
-694 3166  0.0005656688027113958
-894 3166 -0.01183475115907131
-1787 3166 -647.7168499720574
-43 3167  4.307158971381956
-295 3167  0.001080160651772333
-495 3167 -0.02012629165347246
-1543 3167 -18576.65367398521
-86 3168  375.2293748346238
-695 3168  0.0005436438887964516
-895 3168 -0.01137394716537204
-1788 3168 -647.2932388607724
-296 3169  0.0002274052897093193
-496 3169 -0.004229346815530425
-1953 3169 -1331.327148348726
-1969 3169 -1474.263314614031
-696 3170  0.0002205299244709283
-896 3170 -0.004612588747673908
-2198 3170 -1143.60129135068
-2214 3170 -1622.883696919502
-297 3171  8.191795508733727e-05
-497 3171 -0.001526302574451327
-1339 3171 -285.6892959186181
-1530 3171 -271.5408599549471
-2026 3171 -258.8888628284263
-697 3172  5.567172963433427e-05
-897 3172 -0.001164167913108969
-1584 3172 -303.8666561211577
-1775 3172 -270.4226421310651
-2271 3172 -447.3631435643646
-298 3173  0.0002440296582495396
-498 3173 -0.004539146102095633
-1857 3173 -1280.777882156608
-1934 3173 -1356.531427998502
-698 3174  0.0002104689560761738
-898 3174 -0.004402141629369785
-2102 3174 -1348.72810908421
-2179 3174 -1519.469895890335
-899 3175  1.206910571484867
-900 3175 -0.6557243053380895
-1099 3175  3.90179844422034
-1100 3175 -0.4467658871986893
-1299 3175  0.3323231709594552
-899 3176 -0.964177128949417
-900 3176  0.5238452566749743
-1099 3176 -3.117398006170587
-1100 3176  0.3569501217106127
-1789 3176  1.077927120069085
-899 3177 -0.2280781806545443
-900 3177  0.1239167259828344
-1099 3177  18.11856811951377
-1100 3177 -2.078221001294088
-1544 3177  0.4209469588586773
-899 3178  0.182145342635277
-900 3178 -0.09896104242681487
-1099 3178 -14.49628017496882
-1100 3178  1.662740239879939
-2034 3178  1.362279335536674
-899 3179 -0.2666484393421067
-1017 3179  0.6965288550619865
-1099 3179 -0.6755533116091292
-1217 3179  0.4831540182231924
-1300 3179  12.11450772465217
-899 3180  0.2976922456375642
-1017 3180 -0.7776202985712402
-1099 3180  0.748415362690662
-1217 3180 -0.5351332038396536
-1790 3180  6.707319778353869
-899 3181  0.04150889810448934
-1017 3181 -0.1084279560868184
-1099 3181 -4.388112379693994
-1217 3181  3.222143654814309
-1545 3181  11.80994163900174
-899 3182 -0.04691994301089037
-1017 3182  0.1225624806414849
-1099 3182  4.793052405651258
-1217 3182 -3.519407860941207
-2035 3182  6.593989719295108
-899 3183  0.2992074794146934
-1022 3183 -0.5004641350120078
-1099 3183  0.7019805848906145
-1222 3183 -1.34894428731261
-1301 3183  5.029592871399626
-899 3184 -0.3082137931705407
-1022 3184  0.5155283875242916
-1099 3184 -0.7170782280222106
-1222 3184  1.379371250818873
-1791 3184  4.398381781048775
-899 3185 -0.04283097769707251
-1022 3185  0.07164048254012781
-1099 3185  4.688513878451054
-1222 3185 -8.08332465039811
-1546 3185  6.13394880971154
-899 3186  0.0469964530296306
-1022 3186 -0.07860779169062282
-1099 3186 -4.577372090822865
-1222 3186  7.890854922569274
-2036 3186  5.110489561178684
-899 3187  0.3659411458715927
-1091 3187 -0.378532286868665
-1099 3187  0.9345988945068904
-1291 3187 -0.9510122522007739
-1302 3187  5.668506884069838
-899 3188 -0.3866656521166111
-1091 3188  0.3999698727527718
-1099 3188 -0.9906143812408313
-1291 3188  1.008248356955873
-1792 3188  4.228609907569939
-899 3189 -0.05430488575489788
-1091 3189  0.05617338423090624
-1099 3189  5.460780976802373
-1291 3189 -5.89270443011472
-1547 3189  7.015232649381273
-899 3190  0.05564749987182292
-1091 3190 -0.0575621944201822
-1099 3190 -5.952678106564355
-1291 3190  6.423284179440299
-2037 3190  5.241250854683242
-901 3191  1.30916366888589
-902 3191 -0.7228885291144198
-1101 3191  2.396365901362685
-1102 3191 -0.507839750924587
-1303 3191  0.3692929538115763
-901 3192 -1.095661997402904
-902 3192  0.6049980674938733
-1101 3192 -2.005604434346106
-1102 3192  0.4250292779567368
-1793 3192  0.9388524004746962
-901 3193 -0.2127901063566257
-902 3193  0.1174975525597555
-1101 3193  12.9370165976562
-1102 3193 -2.74278713799485
-1548 3193  0.4683919690682322
-901 3194  0.1772971086094612
-902 3194 -0.0978991772419131
-1101 3194 -10.78395981212982
-1102 3194  2.286315857699142
-2038 3194  1.225211590363166
-901 3195  0.5107916688939119
-955 3195 -1.125276871701074
-1101 3195  0.8150861897290412
-1155 3195 -0.3265396605978775
-1304 3195  4.360425760376224
-901 3196 -0.477229303927426
-955 3196  1.051338796050469
-1101 3196 -0.7626219247567129
-1155 3196  0.3055666271798853
-1794 3196  6.290838699682876
-901 3197 -0.06890385651214917
-955 3197  0.1517955769952752
-1101 3197  4.805357363027642
-1155 3197 -2.057848389166554
-1549 3197  6.264117240664985
-901 3198  0.06209868527935691
-955 3198 -0.1368037471308435
-1101 3198 -4.635964505191407
-1155 3198  1.985217529071911
-2039 3198  9.150447768001063
-901 3199 -0.4283133356578697
-1035 3199  0.7494701150025187
-1101 3199 -0.6823995581967828
-1235 3199  0.7074154071969436
-1305 3199  4.242853200747289
-901 3200  0.4582406516171209
-1035 3200 -0.8018374523380369
-1101 3200  0.7310365971011838
-1235 3200 -0.757770830580615
-1795 3200  2.95180477919497
-901 3201  0.06118773248632485
-1035 3201 -0.1070673572020186
-1101 3201 -4.068874943070814
-1235 3201  4.025719241708225
-1550 3201  5.964960897183323
-901 3202 -0.06384302758694906
-1035 3202  0.1117136386944541
-1101 3202  4.453586165850771
-1235 3202 -4.406458593664035
-2040 3202  4.178709433689302
-903 3203  1.330583469556757
-904 3203 -0.6792687245591004
-1103 3203  0.4418790781503563
-1104 3203 -2.198584419259259
-1306 3203  0.3412920430536406
-903 3204 -1.114733838300572
-904 3204  0.5690765366396284
-1103 3204 -0.3702357374009728
-1104 3204  1.842120980065284
-1796 3204  0.8655873783036971
-903 3205 -0.2594613641476355
-904 3205  0.1324561697400607
-1103 3205  1.988830230508456
-1104 3205 -9.91214250431854
-1551 3205  0.4321994354326509
-903 3206  0.2165473412317848
-904 3206 -0.1105483719365449
-1103 3206 -1.662852877071636
-1104 3206  8.287501645404546
-2041 3206  1.118500047554219
-903 3207  0.3437697039398471
-946 3207 -1.586867364738098
-1103 3207  0.08312708337804624
-1146 3207 -2.361641607385089
-1307 3207  8.041398985395846
-903 3208 -0.3969711646061925
-946 3208  1.832449394568724
-1103 3208 -0.09553894960349026
-1146 3208  2.713143028094987
-1797 3208  4.357695569147362
-903 3209 -0.04965019816243767
-946 3209  0.2291891292739927
-1103 3209  0.5226757781751344
-1146 3209 -16.60084760747767
-1552 3209  7.857253594901273
-903 3210  0.05175900233798363
-946 3210 -0.2389235313648264
-1103 3210 -0.6062893774873906
-1146 3210  19.25433639519875
-2042 3210  5.635075703794728
-903 3211 -0.5852368430454002
-962 3211  1.262649342413203
-1103 3211 -0.1224846354050169
-1162 3211  1.309709728249127
-1308 3211  6.55614516809574
-903 3212  0.5883155616478667
-962 3212 -1.269291682288203
-1103 3212  0.1225743034582106
-1162 3212 -1.31124216056868
-1798 3212  6.271467987853487
-903 3213  0.08053041805249524
-962 3213 -0.1737444943984078
-1103 3213 -0.8243769016067968
-1162 3213  7.353509965567679
-1553 3213  8.881186521865489
-903 3214 -0.07227285678601035
-962 3214  0.1559287939226689
-1103 3214  0.8830238932831251
-1162 3214 -7.879175491863245
-2043 3214  9.472269231422434
-905 3215  0.6335410282029971
-906 3215 -0.3767309422255076
-1105 3215  4.67810007016215
-1106 3215 -1.631607376128249
-1309 3215  0.2531411838153556
-905 3216 -0.5055179512196509
-906 3216  0.3006028743159249
-1105 3216 -3.733990098942753
-1106 3216  1.302327940379904
-1799 3216  0.8258525634872588
-905 3217 -0.1182378866958986
-906 3217  0.07030936984151577
-1105 3217  21.96944745588132
-1106 3217 -7.704273779899018
-1554 3217  0.3202584181695263
-905 3218  0.09380808290631094
-906 3218 -0.05578235013745535
-1105 3218 -17.53117282921072
-1106 3218  6.147850850098586
-2044 3218  1.066054919609428
-905 3219 -0.3154842751742941
-984 3219  0.5301531225102935
-1105 3219 -1.783496057488212
-1184 3219  0.536638352929684
-1310 3219  4.739072521175232
-905 3220  0.3125038943229862
-984 3220 -0.5251447644432619
-1105 3220  1.769775574269363
-1184 3220 -0.5324820321255284
-1800 3220  4.852275384021054
-905 3221  0.04682645461498169
-984 3221 -0.07868915532003624
-1105 3221 -11.04480381088002
-1184 3221  3.241025990367159
-1555 3221  5.724533092062521
-905 3222 -0.04577764559625989
-984 3222  0.07692669227529296
-1105 3222  11.09041831683827
-1184 3222 -3.254430202003916
-2045 3222  5.867844621528932
-905 3223 -0.2710351994059734
-999 3223  0.3534179161138158
-1105 3223 -1.508511143996298
-1199 3223  1.20832718522049
-1311 3223  3.734090074267024
-905 3224  0.2607673136961148
-999 3224 -0.3400290471461453
-1105 3224  1.454256781217253
-1199 3224 -1.164701975884189
-1801 3224  4.438993814771638
-905 3225  0.03931244733723651
-999 3225 -0.05126169311480969
-1105 3225 -9.134818941626229
-1199 3225  6.923454910311763
-1556 3225  4.994979193430934
-905 3226 -0.03642270094644444
-999 3226  0.04749359159231554
-1105 3226  8.960689765455296
-1199 3226 -6.791708696781908
-2046 3226  6.307778305821778
-905 3227 -0.1808814651699105
-1046 3227  0.5273830925356003
-1105 3227 -1.075929273318226
-1246 3227  1.1553111591236
-1312 3227  5.750116599788575
-905 3228  0.2018352389125933
-1046 3228 -0.5884765052096235
-1105 3228  1.197241948885333
-1246 3228 -1.285241695049613
-1802 3228  3.183226568638103
-905 3229  0.02556035874667221
-1046 3229 -0.07452450160925486
-1105 3229 -6.350942033760408
-1246 3229  7.303265994649156
-1557 3229  7.594103734758398
-905 3230 -0.02967706389761363
-1046 3230  0.08652728305245758
-1105 3230  6.86425229593742
-1246 3230 -7.893949514842467
-2047 3230  4.089469755570528
-907 3231  0.8685875283864275
-908 3231 -0.5277615543631613
-1107 3231  2.142516119134805
-1108 3231 -1.573116856290268
-1313 3231  0.2843149864254252
-907 3232 -0.6665887169130639
-908 3232  0.405025269027891
-1107 3232 -1.645419760215324
-1108 3232  1.208147660144721
-1803 3232  1.118284064744516
-907 3233 -0.1206085521049
-908 3233  0.0732828354634789
-1107 3233  13.4599818919095
-1108 3233 -10.04545533443596
-1558 3233  0.3591198466383587
-907 3234  0.09140412991465983
-908 3234 -0.05553796721970868
-1107 3234 -10.37596843446376
-1108 3234  7.743774096250811
-2048 3234  1.453282910153127
-907 3235 -0.2641123399251132
-1022 3235  0.5174114692252539
-1107 3235 -0.5705865186070875
-1222 3235  1.364159900297422
-1314 3235  7.210736930309491
-907 3236  0.264840773439715
-1022 3236 -0.51883851294131
-1107 3236  0.5798036087807967
-1222 3236 -1.382288592585748
-1804 3236  6.944610352658391
-907 3237  0.03852583269225138
-1022 3237 -0.07547435194461619
-1107 3237 -3.927743397133805
-1222 3237  7.688776076853612
-1559 3237  8.661644082850422
-907 3238 -0.03462807079041883
-1022 3238  0.06783840917537932
-1107 3238  4.354207390518346
-1222 3238 -8.526509371438566
-2049 3238  8.754829522616353
-907 3239  0.3232358427975743
-1029 3239 -0.4783889678510902
-1107 3239  0.83228047449342
-1229 3239 -2.191374628381395
-1315 3239  2.12239641260848
-907 3240 -0.2805720782281593
-1029 3240  0.4152466067801181
-1107 3240 -0.7224565093335877
-1229 3240  1.902211567258754
-1805 3240  4.506874015672992
-907 3241 -0.0432552053525782
-1029 3241  0.06401769328460785
-1107 3241  4.624502996452096
-1229 3241 -12.23234957660924
-1560 3241  3.613689390608025
-907 3242  0.03751584297835035
-1029 3242 -0.05552343838215974
-1107 3242 -4.029548223983352
-1229 3242  10.65862220841107
-2050 3242  7.607025929898225
-907 3243 -0.4391440793017472
-1039 3243  0.6149993551148858
-1107 3243 -1.049370725032446
-1239 3243  0.329511624503851
-1316 3243  8.994693294053674
-907 3244  0.4521140880195338
-1039 3244 -0.6331632046878009
-1107 3244  1.078904268217582
-1239 3244 -0.3387833259526468
-1806 3244  7.772528439443829
-907 3245  0.06421136233811769
-1039 3245 -0.08992480666432051
-1107 3245 -7.445261240507539
-1239 3245  2.346704499747039
-1561 3245  8.659934351403086
-907 3246 -0.06661321196104331
-1039 3246  0.09328847712876323
-1107 3246  7.684265898844557
-1239 3246 -2.422035549841484
-2051 3246  7.209258592532089
-907 3247 -0.3568635586597714
-1091 3247  0.4323552440504631
-1107 3247 -0.8572328018230927
-1291 3247  1.072482363899002
-1317 3247  5.061913797901139
-907 3248  0.3488833124621628
-1091 3248 -0.4226868393937711
-1107 3248  0.8384291813617976
-1291 3248 -1.048942876587671
-1807 3248  5.717200439130998
-907 3249  0.04742106470294021
-1091 3249 -0.05745261880975507
-1107 3249 -5.317465612935973
-1291 3249  6.515090289325624
-1562 3249  7.356150780840059
-907 3250 -0.04590890985801144
-1091 3250  0.05562057947383459
-1107 3250  5.25881232878616
-1291 3250 -6.443255723032384
-2052 3250  8.260010647845204
-909 3251  0.8308549968215676
-910 3251 -0.4438807412776823
-1109 3251  1.939877143225896
-1110 3251 -1.676593698201225
-1318 3251  0.3142463672992255
-909 3252 -0.6468743896711402
-910 3252  0.3455898859598944
-1109 3252 -1.510326401442479
-1110 3252  1.305342318696957
-1808 3252  1.17048730065587
-909 3253 -0.1180094345852181
-910 3253  0.06304603751777908
-1109 3253  12.05955198419484
-1110 3253 -10.42375234850035
-1563 3253  0.3942430026751412
-909 3254  0.09155689421138136
-910 3254 -0.04891388055328518
-1109 3254 -9.357216344456917
-1110 3254  8.087970926761525
-2053 3254  1.503944833656077
-909 3255  1.178287817096909
-911 3255 -0.7560405827089444
-1109 3255  3.125336311573782
-1111 3255 -0.4116790582017967
-1319 3255  0.3570471483125166
-909 3256 -0.9115398593145428
-911 3256  0.5848835203070897
-1109 3256 -2.417889075233369
-1111 3256  0.318491909188027
-1809 3256  1.356706280601356
-909 3257 -0.1896378964682771
-911 3257  0.1216799016922899
-1109 3257  17.05859243350098
-1111 3257 -2.248550885624917
-1564 3257  0.4518359008190032
-909 3258  0.1461665781785075
-911 3258 -0.0937868179023721
-1109 3258 -13.15767006303645
-1111 3258  1.734357075949347
-2054 3258  1.756999795333312
-909 3259 -0.3808984499831934
-913 3259  0.4341815958622278
-1109 3259 -0.8274104651385586
-1113 3259  0.4961924573567638
-1320 3259  9.205270068346719
-909 3260  0.4177027925543588
-913 3260 -0.47613442657843
-1109 3260  0.9005554601895391
-1113 3260 -0.5405511595669753
-1810 3260  5.935651367845557
-909 3261  0.05765705741137306
-913 3261 -0.06572259141693702
-1109 3261 -5.271819109375664
-1113 3261  2.593920292764252
-1565 3261  11.81516875226901
-909 3262 -0.05614325502160191
-913 3262  0.06399702614503872
-1109 3262  6.213237251680835
-1113 3262 -3.058452409713802
-2055 3262  8.333025717575424
-909 3263 -0.1870913236157566
-991 3263  0.8837282746556666
-1109 3263 -0.4348953511661833
-1191 3263  1.175900677297633
-1321 3263  9.624813023079948
-909 3264  0.1922808728830558
-991 3264 -0.9082411774006912
-1109 3264  0.4484995047183518
-1191 3264 -1.21299435510975
-1811 3264  7.866093118131661
-909 3265  0.02414079055417204
-991 3265 -0.1140293348347746
-1109 3265 -2.730241540721851
-1191 3265  8.191331821483582
-1566 3265  13.00730635191752
-909 3266 -0.02648423136389328
-991 3266  0.1250986076557095
-1109 3266  2.709099303685861
-1191 3266 -8.128733597512404
-2056 3266  9.954437737461609
-909 3267  0.2900679375737975
-1056 3267 -0.6676477309983397
-1109 3267  0.6948742967493817
-1256 3267 -0.832500880855937
-1322 3267  5.922623483236039
-909 3268 -0.2829982094483208
-1056 3268  0.6513753777654052
-1109 3268 -0.6744651058693761
-1256 3268  0.8076647819842718
-1812 3268  6.921218030938292
-909 3269 -0.04144109370586249
-1056 3269  0.0953847309503813
-1109 3269  4.127509889624091
-1256 3269 -5.568800014444435
-1567 3269  7.134515633503723
-909 3270  0.03752795575477595
-1056 3270 -0.08637788346499958
-1109 3270 -4.187820272924031
-1256 3270  5.649414375316669
-2057 3270  8.985892277429645
-912 3271  2.234351633778104
-913 3271 -1.277768992783638
-1112 3271  0.4647968875266144
-1113 3271 -0.07766092191744503
-1323 3271  6.622390702502909
-912 3272 -2.216197503914771
-913 3272  1.267387106656292
-1112 3272 -0.461579540307102
-1113 3272  0.07749102351241138
-1813 3272  6.697001214688135
-912 3273 -0.05370982180576693
-913 3273  0.03071528396597896
-1112 3273  8.078759484940766
-1113 3273 -5.536155174462214
-1568 3273  8.632966165362591
-912 3274  0.02389497166849069
-913 3274 -0.01366492785641506
-1112 3274 -14.71609250303336
-1113 3274  10.08058940955139
-2058 3274  13.63902685108462
-912 3275 -0.7145749449009308
-1019 3275  0.4852466322371797
-1112 3275 -0.3518552487923934
-1219 3275  1.186931963583773
-1324 3275  5.191487889306021
-912 3276  0.7042110547013974
-1019 3276 -0.4782088220647328
-1112 3276  0.3454461800752524
-1219 3276 -1.163100359430245
-1814 3276  5.937171454568121
-912 3277  0.1018194078112303
-1019 3277 -0.06914253723748093
-1112 3277 -2.973502502756232
-1219 3277  4.538624497527303
-1569 3277  7.480402966450393
-912 3278 -0.06006624651158884
-1019 3278  0.04078920488167646
-1112 3278  3.831383234547688
-1219 3278 -5.879825080146367
-2059 3278  13.75844861280676
-912 3279 -0.3263007639770462
-1047 3279  1.094320306063321
-1112 3279 -0.3153990132372302
-1247 3279  1.809931157886173
-1325 3279  4.055752454004177
-912 3280  0.2841185889321567
-1047 3280 -0.9528532431520396
-1112 3280  0.2786502814775124
-1247 3280 -1.609166188869075
-1815 3280  7.525178403866417
-912 3281  0.03520668996055829
-1047 3281 -0.1180732624206324
-1112 3281 -1.846682900076352
-1247 3281  17.72193962278147
-1570 3281  6.053226700604847
-912 3282 -0.04170737899807132
-1047 3282  0.1398747315022498
-1112 3282  1.330625120766269
-1247 3282 -12.79869678055506
-2060 3282  9.011062142512486
-913 3283  1.209832173694697
-914 3283 -0.4729524263951293
-1113 3283  1.411694129026913
-1114 3283 -1.953828666208752
-1326 3283  0.2442413628704792
-913 3284 -1.011959526480044
-914 3284  0.3955992606815731
-1113 3284 -1.181461144083559
-1114 3284  1.635191861604235
-1816 3284  0.6191405042986501
-913 3285 -0.1965692531354175
-914 3285  0.07684363769328444
-1113 3285  7.598376411100657
-1114 3285 -10.63017727165662
-1571 3285  0.3086859033408764
-913 3286  0.16260052281627
-914 3286 -0.0635644459381669
-1113 3286 -6.356847459918975
-1114 3286  8.893256781914742
-2061 3286  0.8138879164921292
-915 3287  0.7040247465778748
-916 3287 -0.8406516622461359
-1115 3287  0.8291504643489213
-1116 3287 -1.764966808023946
-1327 3287  0.5026016922761118
-915 3288 -0.5326975522814722
-916 3288  0.6360757700302417
-1115 3288 -0.6273802630392492
-1116 3288  1.335469727403376
-1817 3288  2.179761213796664
-915 3289 -0.08449893522421127
-916 3289  0.1008972634833432
-1115 3289  6.060870599331241
-1116 3289 -12.90519241544406
-1572 3289  0.6377435166524491
-915 3290  0.0636315736602887
-916 3290 -0.07598026692793559
-1115 3290 -4.565490514889956
-1116 3290  9.721133715560624
-2062 3290  2.854904550605542
-915 3291  0.7089846504761526
-917 3291 -0.6811758959436226
-1115 3291  1.087917687407729
-1117 3291 -1.92561216315536
-1328 3291  0.421945735946015
-915 3292 -0.5363208804398027
-917 3292  0.5152845777429752
-1115 3292 -0.8229776380829138
-1117 3292  1.456668801268975
-1818 3292  1.823278915801148
-915 3293 -0.110950911256798
-917 3293  0.106599044606048
-1115 3293  6.108025865977322
-1117 3293 -10.81324405966066
-1573 3293  0.5340480266846045
-915 3294  0.08353224393820573
-917 3294 -0.08025582932800379
-1115 3294 -4.599499980584284
-1117 3294  8.142649836749401
-2063 3294  2.382559920737088
-915 3295  0.6046143966065896
-918 3295 -0.5574485806251291
-1115 3295  1.03698725022598
-1118 3295 -3.079285624558723
-1329 3295  0.3915301941686539
-915 3296 -0.4568430531993541
-918 3296  0.4212048422990773
-1115 3296 -0.7835532341187602
-1118 3296  2.326725060942811
-1819 3296  1.698731768620023
-915 3297 -0.105707984352315
-918 3297  0.09746173125990555
-1115 3297  5.206454265535194
-1118 3297 -15.46450544545504
-1574 3297  0.496336357506843
-915 3298  0.07949206863020782
-918 3298 -0.07329091248500011
-1115 3298 -3.916354844917197
-1118 3298  11.63257903923041
-2064 3298  2.222986339792984
-915 3299  0.8317530171779519
-919 3299 -1.050055987727851
-1115 3299  1.203896198092496
-1119 3299 -0.8504213801855677
-1330 3299  0.5049950972208146
-915 3300 -0.62581245429748
-919 3300  0.7900639988771302
-1115 3300 -0.9058789578950869
-1119 3300  0.6399047897114686
-1820 3300  2.226759967985491
-915 3301 -0.1227103143041787
-919 3301  0.1549170217960287
-1115 3301  7.161691500726718
-1119 3301 -5.066758450128532
-1575 3301  0.6396465178955674
-915 3302  0.09182130440361659
-919 3302 -0.1159208424841469
-1115 3302 -5.367583074737629
-1119 3302  3.797461282443998
-2065 3302  2.912810030122295
-915 3303  0.2917249086810749
-1007 3303 -1.272612899536508
-1115 3303  0.3657690249134106
-1207 3303 -0.9824708043480003
-1331 3303  5.283005872263223
-915 3304 -0.2448716520171612
-1007 3304  1.068221512166234
-1115 3304 -0.3073032073380307
-1207 3304  0.8254367475625018
-1821 3304  13.4005488845067
-915 3305 -0.03508775876381117
-1007 3305  0.1530658956087559
-1115 3305  2.364021463492015
-1207 3305 -5.749376920340797
-1576 3305  9.289251639392099
-915 3306  0.0315126719245443
-1007 3306 -0.1374700328859463
-1115 3306 -1.924123739016748
-1207 3306  4.679067459779453
-2066 3306  21.65848410135114
-915 3307 -0.2357224145100838
-1018 3307  1.009668228367086
-1115 3307 -0.3247977981682151
-1218 3307  0.7382929550608727
-1332 3307  15.68617080585754
-915 3308  0.2784275257111113
-1018 3308 -1.192586743172628
-1115 3308  0.3847781687309847
-1218 3308 -0.8747461134231366
-1822 3308  6.452286730756979
-915 3309  0.03219168634554583
-1018 3309 -0.1378864330242373
-1115 3309 -2.071401609337344
-1218 3309  4.971628818787869
-1577 3309  19.59521280325052
-915 3310 -0.03947913814761612
-1018 3310  0.1691007261817176
-1115 3310  2.413229394356441
-1218 3310 -5.79221202090099
-2067 3310  7.602708839683275
-920 3311  1.177129754067613
-921 3311 -0.8339300311071588
-1120 3311  2.135100351227087
-1121 3311 -0.5068532103322505
-1333 3311  0.3539340117715401
-920 3312 -0.9364953446946004
-921 3312  0.663454125795566
-1120 3312 -1.698663037724611
-1121 3312  0.4032470041467199
-1823 3312  1.158013025593776
-920 3313 -0.210903753621177
-921 3313  0.1494134127611411
-1120 3313  10.46247277613657
-1121 3313 -2.484458992001519
-1578 3313  0.448509772635577
-920 3314  0.1670042532862615
-921 3314 -0.1183130930611414
-1120 3314 -8.287409620934962
-1121 3314  1.967960133569293
-2068 3314  1.513344914714974
-920 3315  1.102777759352953
-922 3315 -0.7103363652127415
-1120 3315  2.343665408910121
-1122 3315 -0.6385059697814306
-1334 3315  0.3171963274323468
-920 3316 -0.8759432230102847
-922 3316  0.5642245864034647
-1120 3316 -1.861641814867798
-1122 3316  0.5071839300131559
-1824 3316  1.041618026710652
-920 3317 -0.2314851909580943
-922 3317  0.1491074223715117
-1120 3317  9.800756234215104
-1122 3317 -2.671248407940532
-1579 3317  0.4020378235142212
-920 3318  0.1829980732903851
-922 3318 -0.1178752338080321
-1120 3318 -7.751411372933291
-1122 3318  2.112688522497416
-2069 3318  1.361520831023814
-920 3319 -0.2469155357709296
-925 3319  0.417439106543153
-1120 3319 -0.3615308982581444
-1125 3319  2.348697674884534
-1335 3319  6.886702449628651
-920 3320  0.2462070999198854
-925 3320 -0.4162414142724755
-1120 3320  0.3604726785054282
-1125 3320 -2.341822696956453
-1825 3320  6.785053675814972
-920 3321  0.0332412618584253
-925 3321 -0.0561981756523469
-1120 3321 -2.03036538771662
-1125 3321  13.1085584275136
-1580 3321  11.49160779515473
-920 3322 -0.03297048082370924
-925 3322  0.05574038917549447
-1120 3322  2.026270799843273
-1125 3322 -13.08212862081111
-2070 3322  11.46100381309955
-920 3323  0.2859457102140647
-1056 3323 -0.6868141551464079
-1120 3323  0.3927812825996569
-1256 3323 -0.9162532415237825
-1336 3323  9.171935901419351
-920 3324 -0.2856483693432067
-1056 3324  0.6860999709089316
-1120 3324 -0.3918426939236055
-1256 3324  0.9140951370430707
-1826 3324  9.324470431849685
-920 3325 -0.03967668007685861
-1056 3325  0.09529957797094311
-1120 3325  2.59903491355898
-1256 3325 -5.934397339882138
-1581 3325  10.94559077853698
-920 3326  0.03987931942944642
-1056 3326 -0.09578629824956811
-1120 3326 -2.553508280303047
-1256 3326  5.830423114127847
-2071 3326  11.49437533928763
-923 3327  1.180123281848412
-924 3327 -0.6263787815684199
-1123 3327  2.943851431944534
-1124 3327 -0.4953533182334247
-1337 3327  0.36929603544498
-923 3328 -0.9904519113108774
-924 3328  0.5257061452404348
-1123 3328 -2.470752260998025
-1124 3328  0.4157462993400855
-1827 3328  0.9237967396282526
-923 3329 -0.2198774630721576
-924 3329  0.1167052455721185
-1123 3329  13.89790436354166
-1124 3329 -2.339205561498814
-1582 3329  0.4664105082637809
-923 3330  0.1833601469074586
-924 3330 -0.09732280277379812
-1123 3330 -11.59313057596244
-1124 3330  1.951280910683752
-2072 3330  1.216417974845579
-923 3331 -0.3406461438400099
-962 3331  0.8386535165213083
-1123 3331 -0.570678276525063
-1162 3331  0.8523919363373601
-1338 3331  4.703906103995145
-923 3332  0.3416270693871428
-962 3332 -0.8410685054311342
-1123 3332  0.5736363845700033
-1162 3332 -0.856404463517094
-1828 3332  4.522013621012862
-923 3333  0.044945398609309
-962 3333 -0.1106532901568742
-1123 3333 -3.671971040472746
-1162 3333  4.779511398382989
-1583 3333  7.204897543934027
-923 3334 -0.04175041492380615
-962 3334  0.1027874024856677
-1123 3334  3.920428096681185
-1162 3334 -5.103865209233088
-2073 3334  7.245181781754158
-923 3335  0.2932964264633682
-1097 3335 -0.6882189757870322
-1123 3335  0.6120165062645778
-1297 3335 -1.067537541383138
-1339 3335  4.656667378127493
-923 3336 -0.2928143935251802
-1097 3336  0.6870878872871973
-1123 3336 -0.6120093889121652
-1297 3336  1.068092189082941
-1829 3336  4.819142815554258
-923 3337 -0.04107484230154301
-1097 3337  0.09638196496373758
-1123 3337  3.103677962152558
-1297 3337 -6.271879177065279
-1584 3337  7.745248535399567
-923 3338  0.03819605890891807
-1097 3338 -0.08962691042088074
-1123 3338 -3.348978833033903
-1297 3338  6.765893252663498
-2074 3338  7.871753304018089
-925 3339  0.9158563211506956
-926 3339 -0.2927325904424944
-1125 3339  4.915187326484495
-1126 3339 -1.601102874759421
-1340 3339  0.2262443261916516
-925 3340 -0.7253102290040822
-926 3340  0.2318288767653417
-1125 3340 -3.892710312405375
-1126 3340  1.268035068691731
-1830 3340  0.7359627483813387
-925 3341 -0.1272016430323864
-926 3341  0.04065710484656371
-1125 3341  31.10618111938879
-1126 3341 -10.14088311419212
-1585 3341  0.2859810800059127
-925 3342  0.1000961984697644
-926 3342 -0.03199346752849337
-1125 3342 -24.49836116910814
-1126 3342  7.98667681451962
-2075 3342  0.9683564394347679
-925 3343 -0.2028592580467722
-991 3343  0.5914570612494344
-1125 3343 -1.038181523195801
-1191 3343  0.8434979015723048
-1341 3343  7.677703978905812
-925 3344  0.2043567543949261
-991 3344 -0.5958231660939404
-1125 3344  1.045469772476363
-1191 3344 -0.8494202444993569
-1831 3344  7.302330267651148
-925 3345  0.02578232528300831
-991 3345 -0.07517102493073879
-1125 3345 -6.405420033831049
-1191 3345  5.038247575538431
-1586 3345  12.35150271313844
-925 3346 -0.02562846525512418
-991 3346  0.07472243017192762
-1125 3346  6.57456188741837
-1191 3346 -5.17134230247954
-2076 3346  11.42641600066333
-925 3347 -0.265292539862559
-1056 3347  0.3769085362086894
-1125 3347 -1.475513352129115
-1256 3347  0.5292255309373513
-1342 3347  8.778768198726569
-925 3348  0.2663833810837147
-1056 3348 -0.3784583248613025
-1125 3348  1.483623786920446
-1256 3348 -0.5321225312697891
-1832 3348  8.759577361623631
-925 3349  0.03969926645685068
-1056 3349 -0.05640185893113407
-1125 3349 -9.211238697688215
-1256 3349  3.257536454796313
-1587 3349  10.32704712479219
-925 3350 -0.03983694023450458
-1056 3350  0.05659745592016639
-1125 3350  9.387198805594265
-1256 3350 -3.319769019166089
-2077 3350  9.866368768324078
-927 3351  0.8024124670559588
-928 3351 -0.3890405552638334
-1127 3351  3.674495501266763
-1128 3351 -1.79658977637768
-1343 3351  0.2087528761419341
-927 3352 -0.6702504154277392
-928 3352  0.3249632881958178
-1127 3352 -3.071721810536903
-1128 3352  1.501892209348857
-1833 3352  0.5348328095576971
-927 3353 -0.1672629016238211
-928 3353  0.08109551483106915
-1127 3353  15.40703792650803
-1128 3353 -7.62281482571927
-1588 3353  0.2638545151755176
-927 3354  0.1384521427653982
-928 3354 -0.06712694619083193
-1127 3354 -12.91543154405716
-1128 3354  6.390044080050587
-2078 3354  0.6902095968345779
-927 3355  0.2713456332689875
-1022 3355 -0.5210644672440067
-1127 3355  0.7456668279970676
-1222 3355 -1.317805230846083
-1344 3355  5.321843079599814
-927 3356 -0.2943648723795997
-1022 3356  0.5652682652525904
-1127 3356 -0.8026522732803606
-1222 3356  1.420202184002578
-1834 3356  3.588470025723119
-927 3357 -0.03534560477449201
-1022 3357  0.06787409290268773
-1127 3357  5.42541555256465
-1222 3357 -8.373828345005908
-1589 3357  6.747469381550355
-927 3358  0.04133866598755555
-1022 3358 -0.07938255615129304
-1127 3358 -5.499147013641394
-1222 3358  8.486264343097229
-2079 3358  4.390097869341906
-927 3359 -0.2905604220304306
-1038 3359  0.8486339951838073
-1127 3359 -0.7996212717110843
-1238 3359  0.7729194107635493
-1345 3359  4.558938647755557
-927 3360  0.3082806228234461
-1038 3360 -0.9003890301240479
-1127 3360  0.8518603849189976
-1238 3360 -0.8228886130706252
-1835 3360  3.212877484813512
-927 3361  0.03965502824588246
-1038 3361 -0.1158196454089177
-1127 3361 -5.319291135044536
-1238 3361  4.532886615050296
-1590 3361  6.103161381610501
-927 3362 -0.03922286341907703
-1038 3362  0.114557430269681
-1127 3362  5.987378019002024
-1238 3362 -5.102978103686564
-2080 3362  4.451290718579245
-929 3363  0.7226460726801913
-930 3363 -0.550866066704825
-1129 3363  1.299439778554684
-1130 3363 -1.66026707237404
-1346 3363  0.4651682718810348
-929 3364 -0.5799017536618772
-930 3364  0.4420534618421999
-1129 3364 -1.042770960437875
-1130 3364  1.332326685534424
-1836 3364  1.479621537099887
-929 3365 -0.1165066754950316
-930 3365  0.08881190461157644
-1129 3365  7.102528425181987
-1130 3365 -9.076322024257928
-1591 3365  0.5856260944274582
-929 3366  0.0928673422230218
-930 3366 -0.07079187097217753
-1129 3366 -5.662448967730832
-1130 3366  7.236044290225851
-2081 3366  1.94736180188326
-929 3367  1.013213317405777
-931 3367 -0.8713042567736969
-1129 3367  1.541080820735221
-1131 3367 -0.6495975312525708
-1347 3367  0.4764456266019807
-929 3368 -0.8068876077794779
-931 3368  0.6938762009132229
-1129 3368 -1.227391380982957
-1131 3368  0.517371091673868
-1837 3368  1.552787641592646
-929 3369 -0.1379058232777914
-931 3369  0.1185909509790812
-1129 3369  9.939184679247292
-1131 3369 -4.19974615976149
-1592 3369  0.602732034666853
-929 3370  0.108959701522947
-931 3370 -0.09369897742442948
-1129 3370 -7.872866521897349
-1131 3370  3.32663494902817
-2082 3370  2.053730505034701
-929 3371  0.2698019009628373
-1075 3371 -0.7320220831026034
-1129 3371  0.3879503154264504
-1275 3371 -1.448830170961889
-1348 3371  6.478855428097081
-929 3372 -0.2582339488597439
-1075 3372  0.7006361055927417
-1129 3372 -0.3704848889591102
-1275 3372  1.383965071862861
-1838 3372  8.326168299313496
-929 3373 -0.03441373334906914
-1075 3373  0.09337077568253691
-1129 3373  2.623574290306916
-1275 3373 -8.986270417976883
-1593 3373  9.231892983713553
-929 3374  0.0344323188052018
-1075 3374 -0.09342120143663683
-1129 3374 -2.402169826049755
-1275 3374  8.227311149593612
-2083 3374  11.87929266291417
-929 3375  0.3049120194105165
-1090 3375 -0.5115751971165835
-1129 3375  0.4769040583265569
-1290 3375 -1.478717303034038
-1349 3375  6.316750882719969
-929 3376 -0.290608095678543
-1090 3376  0.4875763642175987
-1129 3376 -0.4546264934490209
-1290 3376  1.409679754203769
-1839 3376  8.096580967059195
-929 3377 -0.0381463241923688
-1090 3377  0.06400112844259723
-1129 3377  2.673553644466764
-1290 3377 -8.636336925830612
-1594 3377  11.53750339202983
-929 3378  0.03561931390732355
-1090 3378 -0.05976136187915728
-1129 3378 -2.601348954389372
-1290 3378  8.40294784283585
-2084 3378  14.8685191357142
-932 3379  0.9950752022868599
-933 3379 -0.6655581949704933
-1132 3379  5.296182902592481
-1133 3379 -0.4107380900624172
-1350 3379  0.3022564890631526
-932 3380 -0.8318997011928906
-933 3380  0.556417909169059
-1132 3380 -4.429008880591571
-1133 3380  0.3434864457301403
-1840 3380  0.773323986147318
-932 3381 -0.1462347531270238
-933 3381  0.09780942999031755
-1132 3381  31.57511354793287
-1133 3381 -2.46437663093789
-1595 3381  0.3823095244806657
-932 3382  0.1214268633825941
-933 3382 -0.081216619435513
-1132 3382 -26.39354744654073
-1133 3382  2.059964500670208
-2085 3382  1.001843241790515
-932 3383 -0.2179951469697534
-952 3383  0.3736626954944669
-1132 3383 -1.001383316318819
-1152 3383  2.237203602157426
-1351 3383  2.896696576781846
-932 3384  0.2168261119599326
-952 3384 -0.371658867524124
-1132 3384  1.001723462128635
-1152 3384 -2.235862100853107
-1841 3384  2.894015425891116
-932 3385  0.03071718917257135
-952 3385 -0.05265194140229647
-1132 3385 -6.619137619636859
-1152 3385  12.86842858727532
-1596 3385  3.759419384143553
-932 3386 -0.02828487750009587
-952 3386  0.04848274704887379
-1132 3386  7.035896378794858
-1152 3386 -13.68121522319965
-2086 3386  3.893502727153444
-932 3387 -0.3001546575377701
-1035 3387  0.6028365532304726
-1132 3387 -1.624449345440718
-1235 3387  0.5396384156967375
-1352 3387  3.97695531013616
-932 3388  0.319559566955514
-1035 3388 -0.6418097572616985
-1132 3388  1.726617122079207
-1235 3388 -0.5734959694043438
-1842 3388  2.822747401879994
-932 3389  0.04181506051016628
-1035 3389 -0.08398219490530567
-1132 3389 -9.45015732593915
-1235 3389  3.329588287063118
-1597 3389  5.568673568414256
-932 3390 -0.04598524577927123
-1035 3390  0.09235767751344538
-1132 3390  9.780829511014637
-1235 3390 -3.446229419844843
-2087 3390  3.896863634483198
-934 3391  1.295303116128931
-935 3391 -1.022533948777946
-1134 3391  0.9789296151027944
-1135 3391 -0.6272414912186035
-1353 3391  0.5157826632315755
-934 3392 -1.040527283561455
-935 3392  0.8214096444475624
-1134 3392 -0.7864238954796325
-1135 3392  0.5038949949918273
-1843 3392  1.638248669248236
-934 3393 -0.2014590928700254
-935 3393  0.1590351780865364
-1134 3393  5.52827321690606
-1135 3393 -3.546001048357712
-1598 3393  0.652651776418919
-934 3394  0.1611297600105126
-935 3394 -0.1271985280646792
-1134 3394 -4.426572750783254
-1135 3394  2.839337127425348
-2088 3394  2.128348439022877
-934 3395  0.9236744260690454
-936 3395 -0.5966459314393776
-1134 3395  0.8060051230938539
-1136 3395 -2.504819998846709
-1354 3395  0.3627133183774772
-934 3396 -0.7370102242628088
-936 3396  0.4760705063655813
-1134 3396 -0.6434239789652723
-1136 3396  1.999577413517036
-1844 3396  1.176739949016042
-934 3397 -0.1656030709028884
-936 3397  0.1069710232300935
-1134 3397  3.935792415055033
-1136 3397 -12.33236152279313
-1599 3397  0.4585964515198062
-934 3398  0.1310381493586493
-936 3398 -0.0846438706881978
-1134 3398 -3.141533746260626
-1136 3398  9.843630982732011
-2089 3398  1.530457443318091
-934 3399  0.2884753453660877
-956 3399 -0.6725972680232485
-1134 3399  0.1787660967132971
-1156 3399 -2.83581958505017
-1355 3399  7.085454722674737
-934 3400 -0.2781466555799013
-956 3400  0.6485153191009497
-1134 3400 -0.1720286735101973
-1156 3400  2.730179802550922
-1845 3400  8.942328533639676
-934 3401 -0.03571433811432929
-956 3401  0.08327008401522945
-1134 3401  1.195222696531314
-1156 3401 -16.46922548582782
-1600 3401  11.40560520752418
-934 3402  0.03713931211835794
-956 3402 -0.08659249488156374
-1134 3402 -1.077545589280008
-1156 3402  14.8446566737469
-2090 3402  13.99685203813836
-934 3403  0.3675964442608564
-981 3403 -0.7881853230597796
-1134 3403  0.227902895301987
-1181 3403 -1.245236453990957
-1356 3403  8.850553534693345
-934 3404 -0.3531403690853147
-981 3404  0.7571891954848207
-1134 3404 -0.2169526291382712
-1181 3404  1.186629605393843
-1846 3404  11.26655140607853
-934 3405 -0.04937881125291942
-981 3405  0.1058760358195188
-1134 3405  1.651751732869493
-1181 3405 -8.143818476815005
-1601 3405  10.32999844507644
-934 3406  0.05012946220403072
-981 3406 -0.1074855510138141
-1134 3406 -1.499487001255235
-1181 3406  7.392567965078647
-2091 3406  13.03167033082069
-937 3407  0.8149415052122866
-938 3407 -0.423007195636838
-1137 3407  3.764148086597181
-1138 3407 -1.150203221975869
-1357 3407  0.2986025758467496
-937 3408 -0.6781849347733043
-938 3408  0.3520217163400922
-1137 3408 -3.132558998302706
-1138 3408  0.9572098200987793
-1847 3408  0.7825661744590023
-937 3409 -0.1243103797855209
-938 3409  0.06452510371026647
-1137 3409  21.66729182498108
-1138 3409 -6.624244633334891
-1602 3409  0.3782636212847815
-937 3410  0.1039416562056201
-938 3410 -0.05395242262195776
-1137 3410 -18.12681824855953
-1138 3410  5.541831397222044
-2092 3410  0.9606419660085704
-937 3411 -0.2383349916533942
-983 3411  0.508985204526428
-1137 3411 -0.9698287367725562
-1183 3411  0.8647226323217699
-1358 3411  7.20471030819053
-937 3412  0.254906136411951
-983 3412 -0.5443743324327373
-1137 3412  1.032890475327049
-1183 3412 -0.920980246976824
-1848 3412  5.057824214743202
-937 3413  0.03694910669518697
-983 3413 -0.07890804660218982
-1137 3413 -7.297572934511336
-1183 3413  6.441034596124477
-1603 3413  5.593819250509369
-937 3414 -0.0393496559091599
-983 3414  0.08403462925030836
-1137 3414  7.818158304523587
-1183 3414 -6.900463469725019
-2093 3414  3.896453929673696
-937 3415  0.2261950636286986
-1057 3415 -0.4783279591317424
-1137 3415  0.9047099666389939
-1257 3415 -1.25500744830349
-1359 3415  4.811679126595192
-937 3416 -0.2275356005844802
-1057 3416  0.4811627526763632
-1137 3416 -0.9050341677071546
-1257 3416  1.255737278168746
-1849 3416  4.79846785897801
-937 3417 -0.03205345150204408
-1057 3417  0.06778247851274402
-1137 3417  6.45187659773848
-1257 3417 -8.635276008329065
-1604 3417  4.846962465184034
-937 3418  0.03282777232623683
-1057 3418 -0.06941991168041609
-1137 3418 -6.340117021540678
-1257 3418  8.485710540012704
-2094 3418  4.857233958126034
-939 3419 -0.3462185500007934
-998 3419  0.5428701937830359
-1139 3419 -1.335544408289586
-1198 3419  0.8076695160259592
-1360 3419  3.187196022756801
-939 3420  0.3224747859897226
-998 3420 -0.5056400055975701
-1139 3420  1.240138484038131
-1198 3420 -0.7500806792626978
-1850 3420  4.580602587418016
-939 3421  0.05123776962395828
-998 3421 -0.08034074986653671
-1139 3421 -8.252392785519895
-1198 3421  4.667678323593734
-1605 3421  4.014843773669729
-939 3422 -0.04575369741188583
-998 3422  0.0717417324410374
-1139 3422  7.855616270644008
-1198 3422 -4.443468566171155
-2095 3422  6.030018439318698
-939 3423 -0.2320144445773898
-1061 3423  0.7029356993730441
-1139 3423 -0.8646099843727195
-1261 3423  2.069258919988845
-1361 3423  2.361516391014415
-939 3424  0.1931415244916508
-1061 3424 -0.5851621559330497
-1139 3424  0.7236931569492924
-1261 3424 -1.73190689006448
-1851 3424  5.52794280827098
-939 3425  0.03546099465982042
-1061 3425 -0.1074364103798295
-1139 3425 -6.168162236656806
-1261 3425  14.65077728050171
-1606 3425  2.100726724748749
-939 3426 -0.03079932722685834
-1061 3426  0.09331292568385514
-1139 3426  5.42738844602399
-1261 3426 -12.89108953321492
-2096 3426  3.64070794816464
-939 3427 -0.2607314758298161
-1078 3427  0.553519904683873
-1139 3427 -1.017234363558991
-1278 3427  1.112187399499515
-1362 3427  3.949015286514313
-939 3428  0.2485615317211312
-1078 3428 -0.5276837209948572
-1139 3428  0.9696341561279327
-1278 3428 -1.060130544389497
-1852 3428  4.837100171165655
-939 3429  0.03391211355038503
-1078 3429 -0.0719937238121951
-1139 3429 -5.52169483921999
-1278 3429  5.806648304333962
-1607 3429  7.554236674680917
-939 3430 -0.03113925486204759
-1078 3430  0.06610708326760587
-1139 3430  5.270632236356677
-1278 3430 -5.542727459691219
-2097 3430  10.1404814049545
-940 3431  0.5513261219039329
-942 3431 -0.6945907046787592
-1140 3431  2.259667890544613
-1142 3431 -0.1988363546578532
-1363 3431  3.718703165886996
-940 3432 -0.5523151632588987
-942 3432  0.6958367528966991
-1140 3432 -2.270634095598604
-1142 3432  0.1998547861173985
-1853 3432  3.694102619211036
-940 3433 -0.07581389194665446
-942 3433  0.09551447417331302
-1140 3433  14.10655573046575
-1142 3433 -1.328711904185037
-1608 3433  4.625084390822101
-940 3434  0.07308587747533948
-942 3434 -0.09207757282087774
-1140 3434 -14.60105738105317
-1142 3434  1.375232541596462
-2098 3434  4.692341990336677
-940 3435  0.3379217843189056
-1079 3435 -0.477692739132276
-1140 3435  1.274050971060097
-1279 3435 -0.9008717883866754
-1364 3435  3.873556911241002
-940 3436 -0.3865529233619078
-1079 3436  0.5464386534076658
-1140 3436 -1.450543140636408
-1279 3436  1.026223447218297
-1854 3436  1.970970231250026
-940 3437 -0.04416026357748661
-1079 3437  0.06242579865530338
-1140 3437  8.932423077614509
-1279 3437 -5.728897213689582
-1609 3437  5.058522467030047
-940 3438  0.05348227050940345
-1079 3438 -0.07560356709805922
-1140 3438 -9.766438076143141
-1279 3438  6.263257813414933
-2099 3438  2.455950934716126
-941 3439 -0.5049927482856362
-982 3439  0.4919493884250233
-1141 3439 -2.430429941692298
-1182 3439  0.3890546571390462
-1365 3439  3.057040031444601
-941 3440  0.5068995173965466
-982 3440 -0.4938069079659762
-1141 3440  2.430336628269451
-1182 3440 -0.3891594472236065
-1855 3440  3.040085219142655
-941 3441  0.07028093491071742
-982 3441 -0.06846566225879751
-1141 3441 -15.87534314164886
-1182 3441  2.174900388529984
-1610 3441  4.189017705158125
-941 3442 -0.06419032347283443
-982 3442  0.06253236404377792
-1141 3442  16.87912030479092
-1182 3442 -2.312987185873944
-2100 3442  4.482752371805137
-941 3443 -0.2880222797088077
-1030 3443  0.3346190606095454
-1141 3443 -1.646756874307934
-1230 3443  1.503112217723057
-1366 3443  2.768986183801089
-941 3444  0.2839175912746367
-1030 3444 -0.3298503080348287
-1141 3444  1.633130299287086
-1230 3444 -1.491993548294496
-1856 3444  2.918144951839834
-941 3445  0.03627465553054866
-1030 3445 -0.04214323686986587
-1141 3445 -9.784170471621888
-1230 3445  11.06068010791183
-1611 3445  3.912969993275015
-941 3446 -0.04106559201113318
-1030 3446  0.04770925997819617
-1141 3446  9.000659918292111
-1230 3446 -10.17957716286965
-2101 3446  3.513507003847669
-942 3447 -0.6262760807964288
-1098 3447  0.5580597443798457
-1142 3447 -0.1942141594406955
-1298 3447  2.244902451590447
-1367 3447  4.104329447502788
-942 3448  0.6259992432861905
-1098 3448 -0.5578130610481082
-1142 3448  0.1937305440085906
-1298 3448 -2.239071302275198
-1857 3448  4.123483094099871
-942 3449  0.08541380566675676
-1098 3449 -0.07611021403896462
-1142 3449 -1.236746782381144
-1298 3449  14.90052319731221
-1612 3449  5.140739895609729
-942 3450 -0.08734801081128452
-1098 3450  0.07783373831464936
-1142 3450  1.210760367602043
-1298 3450 -14.58766608851529
-2102 3450  5.121634542677966
-943 3451  0.7904980737004598
-944 3451 -1.331243999373356
-1143 3451  0.4635977544092252
-1144 3451 -0.2370748743210307
-1368 3451  6.140079525850513
-943 3452 -0.7569472851527189
-944 3452  1.274742551217584
-1143 3452 -0.4445531462354627
-1144 3452  0.2279035771985117
-1858 3452  7.866104370676174
-943 3453 -0.01589242009037819
-944 3453  0.02676374501685801
-1143 3453  12.40932627072254
-1144 3453 -16.36104222451913
-1613 3453  7.82585977033871
-943 3454  0.008839724875075651
-944 3454 -0.01488660262126709
-1143 3454 -17.80142727045091
-1144 3454  23.46700243272763
-2103 3454  13.81936647694512
-943 3455  0.389853698082942
-953 3455 -0.3396256574061067
-1143 3455  1.302765765741359
-1153 3455 -0.836906752491439
-1369 3455  5.11477659652108
-943 3456 -0.4123451764514424
-953 3456  0.3592193746505485
-1143 3456 -1.381540417492638
-1153 3456  0.8886397238365555
-1859 3456  3.779911922330594
-943 3457 -0.05097074908982863
-953 3457  0.04440377057660186
-1143 3457  6.98481666623176
-1153 3457 -5.664043368783884
-1614 3457  7.95121257275057
-943 3458  0.04762898411627194
-953 3458 -0.04149255251807921
-1143 3458 -8.259223745110498
-1153 3458  6.694072816259766
-2104 3458  6.108252012876951
-943 3459  0.4010834794380356
-1000 3459 -0.4083493179864217
-1143 3459  1.291757288849152
-1200 3459 -0.6720863108170413
-1370 3459  5.956658436407072
-943 3460 -0.422257802508621
-1000 3460  0.4299072250755208
-1143 3460 -1.364938725708047
-1200 3460  0.7107553380565065
-1860 3460  4.575171516303893
-943 3461 -0.05298921735679361
-1000 3461  0.05394914494757658
-1143 3461  7.534250103970887
-1200 3461 -4.55768979835456
-1615 3461  8.593585019339528
-943 3462  0.05159156407864095
-1000 3462 -0.0525261724439103
-1143 3462 -8.559591467156073
-1200 3462  5.176873191896383
-2105 3462  6.61797598861204
-943 3463 -0.3527490893724305
-1085 3463  0.5218813327191538
-1143 3463 -0.7574396769710391
-1285 3463  1.320047522502771
-1371 3463  3.940241620211022
-943 3464  0.3517173794087308
-1085 3464 -0.5203549498394896
-1143 3464  0.7694060268190259
-1285 3464 -1.327266925701353
-1861 3464  3.924567541092951
-943 3465  0.05011610583704667
-1085 3465 -0.0741452235963624
-1143 3465 -5.957774276479973
-1285 3465  5.912988175484174
-1616 3465  5.351680386731817
-943 3466 -0.03676343318442028
-1085 3466  0.05439035871006938
-1143 3466  7.695785971397996
-1285 3466 -7.658943042300214
-2106 3466  6.18395224060007
-944 3467  0.5565316981856262
-959 3467 -0.455735424632883
-1144 3467  1.435114498189556
-1159 3467 -0.7084294249201389
-1372 3467  2.750846739635994
-944 3468 -0.5208443815767149
-959 3468  0.4265116186182513
-1144 3468 -1.347826856887033
-1159 3468  0.6658399640468106
-1862 3468  3.826254096728573
-944 3469 -0.07578302580183896
-959 3469  0.06205757831290015
-1144 3469  8.28258643358139
-1159 3469 -4.70567772590082
-1617 3469  3.775614735741489
-944 3470  0.06567306281750092
-959 3470 -0.05377868191093232
-1144 3470 -8.289596312603999
-1159 3470  4.70876580251244
-2107 3470  5.425768165721768
-944 3471  0.3643483277204766
-1020 3471 -0.4310186947198901
-1144 3471  0.9151036757311578
-1220 3471 -1.19384433734122
-1373 3471  4.076318621842777
-944 3472 -0.3648201588099976
-1020 3472  0.4315768639356127
-1144 3472 -0.9121228986991278
-1220 3472  1.19038235472501
-1863 3472  4.096977356352023
-944 3473 -0.05244155811268235
-1020 3473  0.06203758932618613
-1144 3473  5.88454311618508
-1220 3473 -7.176916273976581
-1618 3473  5.083916573183807
-944 3474  0.05453932493439867
-1020 3474 -0.06451921651788971
-1144 3474 -5.695361538067679
-1220 3474  6.94588192299627
-2108 3474  5.015211698687797
-945 3475 -0.4558445477281668
-952 3475  0.346250946841912
-1145 3475 -0.8709148373434052
-1152 3475  1.89078858943646
-1374 3475  2.042467781185541
-945 3476  0.4280106222713313
-952 3476 -0.3251088204486319
-1145 3476  0.8161899275134221
-1152 3476 -1.771758321773721
-1864 3476  2.876369892472561
-945 3477  0.06127856833043964
-952 3477 -0.04654604823349623
-1145 3477 -5.468004288207832
-1152 3477  12.47290766139074
-1619 3477  2.675607339345396
-945 3478 -0.05909584881736413
-952 3478  0.04488809553479817
-1145 3478  5.008647029629411
-1152 3478 -11.42535283239911
-2109 3478  3.742160235640967
-945 3479 -0.374795164020496
-964 3479  0.9977461831798213
-1145 3479 -0.7158770103681851
-1164 3479  1.53391330281752
-1375 3479  1.82335585995627
-945 3480  0.2993620588617579
-964 3480 -0.796934913498078
-1145 3480  0.5731872346895394
-1164 3480 -1.227967849405166
-1865 3480  5.527150768491762
-945 3481  0.05493458803622932
-964 3481 -0.1462419497352594
-1145 3481 -4.380699190373416
-1164 3481  8.887311933312343
-1620 3481  2.343864161786165
-945 3482 -0.04272179200188883
-964 3482  0.1137301358193506
-1145 3482  3.606200291493431
-1164 3482 -7.316287484026701
-2110 3482  7.053894690456646
-946 3483  0.6596703231013034
-947 3483 -1.868071070429303
-1146 3483  0.3710219555670884
-1147 3483 -0.29728149214413
-1376 3483  4.657866587415518
-946 3484 -0.5146229196552202
-947 3484  1.457322172487891
-1146 3484 -0.2840579403070753
-1147 3484  0.2275648264341223
-1866 3484  17.26249952801965
-946 3485 -0.04607441940211257
-947 3485  0.130474703738788
-1146 3485  9.774567204299803
-1147 3485 -7.972830198709099
-1621 3485  1.543087745431399
-946 3486  0.03591805543649509
-947 3486 -0.1017136559236855
-1146 3486 -7.612750342076007
-1147 3486  6.210600761683166
-2111 3486  5.361468130704726
-946 3487  0.6964352973578405
-948 3487 -1.574413538600893
-1146 3487  0.4116155869533685
-1148 3487 -0.3482611836492402
-1377 3487  3.930431675339554
-946 3488 -0.54293430207528
-948 3488  1.227397748220299
-1146 3488 -0.3163234064086289
-1148 3488  0.2676108406262902
-1867 3488  14.44634424441149
-946 3489 -0.05122786494849402
-948 3489  0.1158095295205662
-1146 3489  10.34202414091307
-1148 3489 -8.870545870477601
-1622 3489  1.294093298886285
-946 3490  0.03987489423752329
-948 3490 -0.09014415779328049
-1146 3490 -8.044573973054565
-1148 3490  6.900692384581894
-2112 3490  4.513828312547935
-946 3491  0.4923049138822416
-949 3491 -1.314617609442261
-1146 3491  0.240725896352511
-1149 3491 -1.043634220767106
-1378 3491  3.761329917621492
-946 3492 -0.3842289985371204
-949 3492  1.026019024575663
-1146 3492 -0.183873148952859
-1149 3492  0.7969897672034445
-1868 3492  14.04786405689022
-946 3493 -0.02987711852991685
-949 3493  0.07978182835732844
-1146 3493  7.290551754845374
-1149 3493 -32.25568801983241
-1623 3493  1.245820405796252
-946 3494  0.02326647438088502
-949 3494 -0.06212921315277567
-1146 3494 -5.670365588613417
-1149 3494  25.09335121359859
-2113 3494  4.366310261693636
-946 3495  0.715978620671633
-950 3495 -1.527117243615793
-1146 3495  0.4501210912144902
-1150 3495 -0.3623546349676862
-1379 3495  3.433153619622813
-946 3496 -0.5575412851129458
-950 3496  1.189184824715845
-1146 3496 -0.3458178949312415
-1150 3496  0.278365585734024
-1869 3496  12.6670479033574
-946 3497 -0.05602966115093699
-950 3497  0.1195061685183861
-1146 3497  10.63405779705095
-1150 3497 -8.671639188939132
-1624 3497  1.130461556720701
-946 3498  0.04358377996383721
-950 3498 -0.0929602365253548
-1146 3498 -8.266934236741347
-1150 3498  6.741962214906343
-2114 3498  3.956994704656382
-946 3499  0.3935095664336485
-951 3499 -2.315948119036116
-1146 3499  0.160517360987844
-1151 3499 -0.998072574886444
-1380 3499  4.033761579760155
-946 3500 -0.3068323659627205
-951 3500  1.805821005194248
-1146 3500 -0.1213287753319718
-1151 3500  0.7540610774154664
-1870 3500  15.2850840004798
-946 3501 -0.01947378168904091
-951 3501  0.1146103472959883
-1146 3501  5.696313853321982
-1151 3501 -36.48745359925508
-1625 3501  1.545695419693732
-946 3502  0.01588572902323566
-951 3502 -0.09349334143083275
-1146 3502 -4.640371833668683
-1151 3502  29.73823906985804
-2115 3502  4.264900917453799
-946 3503 -0.9451250300389605
-972 3503  0.361952958820202
-1146 3503 -1.606503189940934
-1172 3503  0.2386303085913993
-1381 3503  6.379413846576525
-946 3504  1.159453727321497
-972 3504 -0.4440340630930363
-1146 3504  1.963053376342537
-1172 3504 -0.2914011576499591
-1871 3504  2.438324611105513
-946 3505  0.1285136378673143
-972 3505 -0.0492166538779582
-1146 3505 -10.23123479260624
-1172 3505  1.708753031112981
-1626 3505  6.784715588521387
-946 3506 -0.1618170065455398
-972 3506  0.06197078951995561
-1146 3506  11.40396313242804
-1172 3506 -1.904872805295951
-2116 3506  3.247381743706517
-952 3507 -0.2375980214699439
-964 3507  0.8327117855650359
-1152 3507 -1.329774596250828
-1164 3507  1.309395068074605
-1382 3507  2.227180481236951
-952 3508  0.2021244230605367
-964 3508 -0.7083871666596919
-1152 3508  1.136378995889816
-1164 3508 -1.118335669994368
-1872 3508  4.793989873863781
-952 3509  0.03516906518592851
-964 3509 -0.1232573187539455
-1152 3509 -8.235239513230283
-1164 3509  7.323453369206338
-1627 3509  2.87720735177041
-952 3510 -0.02840317316188208
-964 3510  0.0995448400328353
-1152 3510  7.392636495600143
-1164 3510 -6.574874411640814
-2117 3510  6.185562461268164
-953 3511  0.5039010794292205
-954 3511 -1.221909365821883
-1153 3511  0.4589386576308901
-1154 3511 -0.3159937452580066
-1383 3511  7.331635289629858
-953 3512 -0.4287950305689273
-954 3512  1.039784761849569
-1153 3512 -0.390921429635482
-1154 3512  0.269234075312335
-1873 3512  17.87284749444791
-953 3513 -0.01971558690748688
-954 3513  0.0478083125406707
-1153 3513  8.639460196076747
-1154 3513 -7.316230054006878
-1628 3513  10.64580025395756
-953 3514  0.01488552126604759
-954 3514 -0.03609588983362996
-1153 3514 -8.030380636400102
-1154 3514  6.800195871908971
-2118 3514  28.06040217986433
-953 3515  0.2953161655653164
-979 3515 -0.5584106677342894
-1153 3515  0.9130715613792872
-1179 3515 -0.8197257413953748
-1384 3515  5.304835054096878
-953 3516 -0.2698288575970699
-979 3516  0.5102169475088717
-1153 3516 -0.8271158196402747
-1179 3516  0.7414429026651962
-1874 3516  8.815510423999974
-953 3517 -0.04126530553706707
-979 3517  0.07802819319119383
-1153 3517  5.326644585481059
-1179 3517 -5.904518861818641
-1629 3517  6.323546734518151
-953 3518  0.03303831593886034
-979 3518 -0.06247185293402092
-1153 3518 -5.221321173922546
-1179 3518  5.785549463415167
-2119 3518  12.01425377570906
-953 3519 -0.4316172727288611
-1000 3519  0.5044255115610694
-1153 3519 -1.159052407654222
-1200 3519  0.9377921426824671
-1385 3519  3.60741915585239
-953 3520  0.4350354735110427
-1000 3520 -0.5084203185048906
-1153 3520  1.167217498441967
-1200 3520 -0.9444048128648267
-1875 3520  3.435120286019725
-953 3521  0.05408041747556818
-1000 3521 -0.0632030828564326
-1153 3521 -7.424743250141112
-1200 3521  5.539440141695474
-1630 3521  5.851730685056556
-953 3522 -0.05165212370154924
-1000 3522  0.06036516740083515
-1153 3522  7.683291575595472
-1200 3522 -5.732696863714277
-2120 3522  5.945418404127518
-953 3523 -0.3071829453370418
-1010 3523  0.5073715942942281
-1153 3523 -0.95405564531345
-1210 3523  0.8083482178595198
-1386 3523  7.425984975242814
-953 3524  0.3154316116230693
-1010 3524 -0.520995849897842
-1153 3524  0.9758265066742177
-1210 3524 -0.8256124134252635
-1876 3524  6.227801375022604
-953 3525  0.03634845233500034
-1010 3525 -0.06003644567930714
-1153 3525 -5.830059675395175
-1210 3525  6.106888895148331
-1631 3525  11.33592184861562
-953 3526 -0.04190662314365428
-1010 3526  0.06921683159380579
-1153 3526  5.406317175231911
-1210 3526 -5.665416980231268
-2121 3526  9.103384773113829
-953 3527 -0.3757659547701289
-1026 3527  0.4009167330545462
-1153 3527 -1.164584670469045
-1226 3527  0.6235636743364441
-1387 3527  6.982525249864085
-953 3528  0.3861958328991782
-1026 3528 -0.4120447041029439
-1153 3528  1.194200001945778
-1226 3528 -0.638771030322958
-1877 3528  6.205734509115272
-953 3529  0.04350500092493118
-1026 3529 -0.04641687896666508
-1153 3529 -6.959432015872205
-1226 3529  4.601918715157254
-1632 3529  11.50083919055273
-953 3530 -0.05038154112818095
-1026 3530  0.05375367996741477
-1153 3530  6.502617587410443
-1226 3530 -4.301673841163688
-2122 3530  9.438888577223668
-954 3531 -0.7047014242829946
-1001 3531  0.4168827888937753
-1154 3531 -0.7102443269723129
-1201 3531  1.404267791315138
-1388 3531  2.017091895439074
-954 3532  0.6326150413526075
-1001 3532 -0.3742383847223765
-1154 3532  0.6375281091348801
-1201 3532 -1.26049341189051
-1878 3532  3.594156286348642
-954 3533  0.08773798910582717
-1001 3533 -0.05190348185770162
-1154 3533 -4.291561368908504
-1201 3533  8.573349645239935
-1633 3533  3.367757951666376
-954 3534 -0.07880583325813648
-1001 3534  0.04661945388172894
-1154 3534  3.814598094334272
-1201 3534 -7.620517473966892
-2123 3534  6.190391211677775
-955 3535 -0.8318326858768657
-1057 3535  0.6196319089697585
-1155 3535 -0.2560164048234211
-1257 3535  1.582943479566342
-1389 3535  3.178888388583059
-955 3536  0.7717420244337035
-1057 3536 -0.5748703939518289
-1155 3536  0.2373539075192858
-1257 3536 -1.467471123130216
-1879 3536  4.740940585753161
-955 3537  0.1071151913100275
-1057 3537 -0.07979007268887017
-1155 3537 -1.53916732182028
-1257 3537  9.963872683218066
-1634 3537  4.887036067400858
-955 3538 -0.103164844124989
-1057 3538  0.07684746029948268
-1155 3538  1.414753666929491
-1257 3538 -9.158675410300438
-2124 3538  6.656168722454082
-956 3539  0.377331040401673
-993 3539 -0.3067285442619483
-1156 3539  1.446175075669395
-1193 3539 -1.408878182521636
-1390 3539  2.807087996816875
-956 3540 -0.3524048448936835
-993 3540  0.2864662948217338
-1156 3540 -1.34371595571893
-1193 3540  1.31040985134737
-1880 3540  4.013858921947923
-956 3541 -0.05101371158298227
-993 3541  0.04146852449400996
-1156 3541  9.606152975975975
-1193 3541 -7.997310816234859
-1635 3541  3.977636161674676
-956 3542  0.0519531882185968
-993 3542 -0.04223221544427847
-1156 3542 -8.318029485848061
-1193 3542  6.922985628648402
-2125 3542  5.491666775418012
-956 3543  0.3360771639281279
-1075 3543 -0.3540923641672868
-1156 3543  1.341321871020527
-1275 3543 -0.6717547568728794
-1391 3543  6.546784158248459
-956 3544 -0.3359593223602985
-1075 3544  0.3539682057779985
-1156 3544 -1.344209358632625
-1275 3544  0.6732602446776976
-1881 3544  6.511901335193489
-956 3545 -0.0477704058819864
-1075 3545  0.05033110776786022
-1156 3545  8.638251497123527
-1275 3545 -4.45699083051915
-1636 3545  7.602458701266929
-956 3546  0.04690068187393769
-1075 3546 -0.04941476276368469
-1156 3546 -8.759308364969019
-1275 3546  4.519424008698003
-2126 3546  7.686660379443556
-957 3547 -0.5244507842233862
-1017 3547  0.7708782074770815
-1157 3547 -1.058339009138057
-1217 3547  0.5146427113986781
-1392 3547  3.23901762142973
-957 3548  0.5237947129566466
-1017 3548 -0.769913863334036
-1157 3548  1.056354509649101
-1217 3548 -0.5135793765110132
-1882 3548  3.269308251777509
-957 3549  0.06750021622713791
-1017 3549 -0.09921702332192101
-1157 3549 -5.697020217678423
-1217 3549  3.063488442035085
-1637 3549  5.798807440562412
-957 3550 -0.07126566929859467
-1017 3550  0.1047517766322093
-1157 3550  5.434176163677479
-1217 3550 -2.922493795579336
-2127 3550  5.61223414210605
-957 3551 -0.4599292057141804
-1037 3551  0.6178543307689458
-1157 3551 -0.8311703023259965
-1237 3551  1.098284058681807
-1393 3551  2.722688512430139
-957 3552  0.4596955190523657
-1037 3552 -0.6175404035074211
-1157 3552  0.8325741162821748
-1237 3552 -1.099720672228146
-1883 3552  2.703936131935484
-957 3553  0.06526541533640681
-1037 3553 -0.08767549225845923
-1157 3553 -4.937009585402042
-1237 3553  5.821493748851517
-1638 3553  4.15614996725906
-957 3554 -0.06150550344214045
-1037 3554  0.08262454568163802
-1157 3554  5.222317454495946
-1237 3554 -6.158831607607141
-2128 3554  4.159434126999496
-958 3555 -0.5159715744957702
-995 3555  0.5043002560591464
-1158 3555 -1.955210228146363
-1195 3555  0.4429005215649865
-1394 3555  3.132140010189771
-958 3556  0.5154078088312681
-995 3556 -0.5037492428192337
-1158 3556  1.946171184010313
-1195 3556 -0.4406738447969539
-1884 3556  3.155612325232486
-958 3557  0.07280632131793441
-995 3557 -0.07115943648492561
-1158 3557 -11.53240152096248
-1195 3557  2.841907482790103
-1639 3557  4.081677144998319
-958 3558 -0.07622870906823133
-995 3558  0.07450440954956594
-1158 3558  11.05483594732497
-1195 3558 -2.724430091994604
-2129 3558  4.024185540800096
-958 3559  0.4404116309763916
-1032 3559 -0.4013655946326785
-1158 3559  1.409323205256857
-1232 3559 -0.5518231235725737
-1395 3559  5.363493298492033
-958 3560 -0.5079389600530799
-1032 3560  0.4629060824003016
-1158 3560 -1.618511483884628
-1232 3560  0.6340347863620098
-1885 3560  2.546878710023711
-958 3561 -0.05761232611155925
-1032 3561  0.05250452962986646
-1158 3561  9.858875177032653
-1232 3561 -3.512065591551504
-1640 3561  6.902063277362459
-958 3562  0.06996802127804672
-1032 3562 -0.06376479295806904
-1158 3562 -10.84726586818638
-1232 3562  3.863844809715488
-2130 3562  3.218684483675813
-959 3563 -0.4764363681173263
-1001 3563  0.4317442185526948
-1159 3563 -0.8140452921845157
-1201 3563  1.434561395231843
-1396 3563  2.759734100154303
-959 3564  0.425337444217761
-1001 3564 -0.3854386330763389
-1159 3564  0.7270941854498505
-1201 3564 -1.281336173089582
-1886 3564  4.914858596122737
-959 3565  0.05983512144077003
-1001 3565 -0.05422228334611339
-1159 3565 -4.963811290770392
-1201 3565  9.07977748727345
-1641 3565  4.35180763337338
-959 3566 -0.05429826449104555
-1001 3566  0.04920481168154859
-1159 3566  4.341750101297229
-1201 3566 -7.942016865501306
-2131 3566  7.954416979677193
-960 3567  0.6869492137454632
-1057 3567 -0.4844075229863823
-1160 3567  0.1819988190111484
-1257 3567 -1.262443950678755
-1397 3567  8.81339838666673
-960 3568 -0.7334879158217498
-1057 3568  0.5172246467921727
-1160 3568 -0.1954534815545992
-1257 3568  1.35575676912055
-1887 3568  6.209091064066214
-960 3569 -0.1210230804289373
-1057 3569  0.08534035623263046
-1160 3569  1.503671479436259
-1257 3569 -10.37821929880745
-1642 3569  4.640556920095166
-960 3570  0.1300666015536589
-1057 3570 -0.09171746472834619
-1160 3570 -1.617273344051984
-1257 3570  11.16247281732313
-2132 3570  3.17852537231288
-960 3571  0.5990913842872198
-1058 3571 -0.8683349920286506
-1160 3571  0.1483181477839719
-1258 3571 -2.563829187882418
-1398 3571  4.130734036137791
-960 3572 -0.6450813979793052
-1058 3572  0.9349938344358532
-1160 3572 -0.1580808975787135
-1258 3572  2.735594657287679
-1888 3572  2.887296553688009
-960 3573 -0.08258340338438294
-1058 3573  0.1196980307182942
-1160 3573  1.098045296521147
-1258 3573 -17.20403442676472
-1643 3573  4.196712062065106
-960 3574  0.09355727171015052
-1058 3574 -0.1356037741743049
-1160 3574 -1.116117521563536
-1258 3574  17.4862782237909
-2133 3574  2.945059870904722
-961 3575 -0.3915200345225039
-964 3575  0.7615674553221563
-1161 3575 -0.7691880733918308
-1164 3575  1.084345240019609
-1399 3575  2.800615008201704
-961 3576  0.3156322845396378
-964 3576 -0.6139539603574344
-1161 3576  0.6268912276704567
-1164 3576 -0.8831818088721408
-1889 3576  8.019342926689088
-961 3577  0.0579584615013995
-964 3577 -0.1127382359726241
-1161 3577 -5.531733877787849
-1164 3577  7.397606534928237
-1644 3577  2.551059225694364
-961 3578 -0.04478280109740496
-964 3578  0.08710952407721209
-1161 3578  4.552179963964262
-1164 3578 -6.087474676626123
-2134 3578  8.080073804395154
-961 3579 -0.5414818109862019
-1082 3579  0.5556174516419152
-1161 3579 -1.1983605947858
-1282 3579  0.1809697428574308
-1400 3579  7.984203741216811
-961 3580  0.5430535951303894
-1082 3580 -0.5572302679600371
-1161 3580  1.188366015048527
-1282 3580 -0.1794700371248673
-1890 3580  8.030319348297542
-961 3581  0.1050418924830158
-1082 3581 -0.1077840611317312
-1161 3581 -9.698019033406775
-1282 3581  1.449320632063192
-1645 3581  3.626585505548464
-961 3582 -0.1048558318511552
-1082 3582  0.1075931432984304
-1161 3582  9.673953645810657
-1282 3582 -1.445634609268258
-2135 3582  3.66926937307163
-962 3583  0.9725210198756302
-963 3583 -2.017535229907724
-1162 3583 -0.1874965313805473
-1163 3583 -2.513198853349591
-1401 3583  0.7834252790859237
-962 3584 -0.6275235799788761
-963 3584  1.301823718285398
-1162 3584  0.1209829834408293
-1163 3584  1.622962910510173
-1891 3584  4.977760599682532
-962 3585  0.013588288313204
-963 3585 -0.02818946822305024
-1162 3585  3.094186181666295
-1163 3585 -22.22398035275692
-1646 3585  19.13304546764732
-962 3586  0.02534124008545374
-963 3586 -0.05257145459795929
-1162 3586 -3.100342937307653
-1163 3586  22.19095820062608
-2136 3586  8.170584099729947
-964 3587  0.4637568782856321
-965 3587 -1.12285875271712
-1164 3587  0.6351578870039599
-1165 3587 -2.945954324697929
-1402 3587  0.6061321059687221
-964 3588 -0.3613580899024239
-965 3588  0.8749284659928406
-1164 3588 -0.4937492055719114
-1165 3588  2.290345940174964
-1892 3588  2.317651149747798
-964 3589 -0.06341054135910112
-965 3589  0.1535310519658627
-1164 3589  4.32562955086895
-1165 3589 -19.24637451834769
-1647 3589  0.7415300425733925
-964 3590  0.05024109366902035
-965 3590 -0.1216448842352145
-1164 3590 -3.280051421621569
-1165 3590  14.59397827582356
-2137 3590  2.778013614142899
-964 3591  0.5198404247302832
-966 3591 -3.296259100425206
-1164 3591  0.08216399590560011
-1166 3591 -2.385134481547733
-1403 3591  1.924848272360943
-964 3592 -0.4107864203962353
-966 3592  2.604757945218924
-1164 3592 -0.06363192359750822
-1166 3592  1.86782777672809
-1893 3592  7.14894107859813
-964 3593 -0.009769119110653198
-966 3593  0.06194506283026378
-1164 3593  5.774615805405687
-1166 3593 -77.15294482380699
-1648 3593  2.537886768231079
-964 3594  0.01207874962159066
-966 3594 -0.07659021204936765
-1164 3594 -3.234138467376079
-1166 3594  43.20570292286196
-2138 3594  6.924495998814408
-964 3595  0.5553092879536204
-967 3595 -2.019730940286236
-1164 3595  0.2283230594027073
-1167 3595 -2.457041662433694
-1404 3595  0.9293065890597459
-964 3596 -0.4374549805006661
-967 3596  1.591079742886095
-1164 3596 -0.1784848649491971
-1167 3596  1.923107839672399
-1894 3596  3.383146747526674
-964 3597 -0.02421187475492601
-967 3597  0.08806168674949939
-1164 3597  5.50164767601109
-1167 3597 -50.0814285278305
-1649 3597  1.052700930966815
-964 3598  0.02101357146535312
-967 3598 -0.07642904841532995
-1164 3598 -4.008535209606107
-1167 3598  36.48944546192245
-2139 3598  3.451189611415207
-964 3599  0.5186307214601972
-968 3599 -2.498229903851033
-1164 3599  0.1369008468551218
-1168 3599 -2.338296025296659
-1405 3599  1.515772943789459
-964 3600 -0.4089115281925863
-968 3600  1.969715571194801
-1164 3600 -0.1066451828453128
-1168 3600  1.829379136090265
-1895 3600  5.594076654825956
-964 3601 -0.01510224391735361
-968 3601  0.07274709308264668
-1164 3601  5.345308539788538
-1168 3601 -59.77277409880886
-1650 3601  1.886875011526777
-964 3602  0.01521306273357519
-968 3602 -0.07328090426879212
-1164 3602 -3.486179794654417
-1168 3602  38.98112179863244
-2140 3602  5.746025815755991
-964 3603  0.649415218994155
-969 3603 -1.985602696417218
-1164 3603  0.2495784622735603
-1169 3603 -1.994003072051536
-1406 3603  0.9154558602176557
-964 3604 -0.5116989493650622
-969 3604  1.564531880214944
-1164 3604 -0.1950409950189936
-1169 3604  1.560276963803856
-1896 3604  3.328116887610061
-964 3605 -0.02666374087039809
-969 3605  0.08152503085904574
-1164 3605  6.482098577577909
-1169 3605 -44.05096156861522
-1651 3605  1.004492569171269
-964 3606  0.02305717637758508
-969 3606 -0.07049787292944913
-1164 3606 -4.729287128522186
-1169 3606  32.13935140025399
-2141 3606  3.307664437943429
-964 3607  0.6812081861155168
-970 3607 -1.973620237480235
-1164 3607  0.290360813562731
-1170 3607 -1.69914940163431
-1407 3607  0.8856096204701881
-964 3608 -0.5365945143548635
-970 3608  1.554640438029102
-1164 3608 -0.2270292150903642
-1170 3608  1.329845790084345
-1897 3608  3.214678878395649
-964 3609 -0.03084309267107718
-970 3609  0.08935968933261426
-1164 3609  6.760502321679454
-1170 3609 -34.56956155699346
-1652 3609  0.9675344147546086
-964 3610  0.02625275857880516
-970 3610 -0.07606041248016723
-1164 3610 -4.991429629225376
-1170 3610  25.52356491229814
-2142 3610  3.209214983593041
-964 3611  0.7223443830121247
-971 3611 -3.13745961756631
-1164 3611  0.2916736644580288
-1171 3611 -0.665968396826871
-1408 3611  1.437804385943619
-964 3612 -0.5685854447490166
-971 3612  2.469616866953718
-1164 3612 -0.227785867490716
-1171 3612  0.5209611043831808
-1898 3612  5.286814915057556
-964 3613 -0.03084162623578303
-971 3613  0.1339587586344098
-1164 3613  7.136143023604448
-1171 3613 -13.01473834386188
-1653 3613  1.733088886821635
-964 3614  0.02765977036275495
-971 3614 -0.1201385579859136
-1164 3614 -5.072850637376669
-1171 3614  9.251460191495257
-2143 3614  5.612374640258077
-964 3615 -0.8389463619039152
-1056 3615  0.3580752890249853
-1164 3615 -1.244478079620031
-1256 3615  0.5060573535847106
-1409 3615  5.097464053705828
-964 3616  0.9351913725744683
-1056 3616 -0.3991541488639627
-1164 3616  1.380302330026541
-1256 3616 -0.5613536361877731
-1899 3616  3.060833158311745
-964 3617  0.1326396128473568
-1056 3617 -0.05661263921413731
-1164 3617 -8.209962281569412
-1256 3617  3.248543549093237
-1654 3617  4.909697213749066
-964 3618 -0.1454153226254008
-1056 3618  0.06206550983734454
-1164 3618  9.204827106431074
-1256 3618 -3.642181922510498
-2144 3618  2.985129676088162
-972 3619 -0.5844299652988821
-1088 3619  0.6011938633446567
-1172 3619 -0.4153067634742414
-1288 3619  1.244376425382915
-1410 3619  3.115298956803423
-972 3620  0.5839521479322916
-1088 3620 -0.6007023401756713
-1172 3620  0.4148880148577192
-1288 3620 -1.24310360204769
-1900 3620  3.124616488720384
-972 3621  0.07265746990450016
-1088 3621 -0.07474159031252897
-1172 3621 -2.410186223870166
-1288 3621  7.434019202353014
-1655 3621  5.489980636102064
-972 3622 -0.07373932525269625
-1088 3622  0.07585447780115982
-1172 3622  2.375670562103454
-1288 3622 -7.327623003108759
-2145 3622  5.456336436211689
-973 3623  0.3551605612820263
-974 3623 -1.489946307075277
-1173 3623  0.2557390494241053
-1174 3623 -1.144600450292391
-1411 3623  3.239214549120592
-973 3624 -0.2795935336492826
-974 3624  1.172932465922301
-1173 3624 -0.1995643723770443
-1174 3624  0.8931472133700507
-1901 3624  11.46538487377347
-973 3625 -0.02645413337217794
-974 3625  0.1109786463408995
-1173 3625  7.851143468601221
-1174 3625 -35.44874624987744
-1656 3625  0.8107453118787733
-973 3626  0.02143184395432529
-974 3626 -0.08990946696979601
-1173 3626 -6.360533060844858
-1174 3626  28.71959313604403
-2146 3626  2.233014253505581
-973 3627  0.2921170861291708
-975 3627 -1.862462882642613
-1173 3627  0.1570011938827146
-1175 3627 -2.274571006209698
-1412 3627  2.661281271291927
-973 3628 -0.2187915238870602
-975 3628  1.39495808915565
-1173 3628 -0.116240689813081
-1175 3628  1.683936323933614
-1902 3628  11.61445266776299
-973 3629 -0.01571989566011668
-975 3629  0.1002259832656134
-1173 3629  6.250508524222748
-1175 3629 -91.60308946892607
-1657 3629  0.7394238804556282
-973 3630  0.01298076963505745
-975 3630 -0.08276202516527353
-1173 3630 -5.161142881361593
-1175 3630  75.64317956330889
-2147 3630  1.985197878319526
-973 3631  0.4746237402281227
-976 3631 -1.060044755936421
-1173 3631  0.3282970589794034
-1176 3631 -0.9769081143049713
-1413 3631  2.838604475646982
-973 3632 -0.3961258208756168
-976 3632  0.8847241794276495
-1173 3632 -0.271490414221333
-1176 3632  0.8078351046585803
-1903 3632  7.931141454227703
-973 3633 -0.03612576647010336
-976 3633  0.08068481631873459
-1173 3633  11.16120855053096
-1176 3633 -33.51914650193375
-1658 3633  0.5836917319355491
-973 3634  0.02872026982551443
-976 3634 -0.06414506658049314
-1173 3634 -8.873223095071516
-1176 3634  26.64903944246305
-2148 3634  1.62655302961875
-973 3635  0.4514386490354154
-977 3635 -1.23044462799305
-1173 3635  0.2551706654878987
-1177 3635 -0.9321754366844185
-1414 3635  3.600111623032867
-973 3636 -0.374371261515384
-977 3636  1.020389168253177
-1173 3636 -0.2092439937336671
-1177 3636  0.7643499314882201
-1904 3636  10.38985995381369
-973 3637 -0.02777304520351097
-977 3637  0.07569842401993218
-1173 3637  10.50033560799464
-1177 3637 -38.7926439689844
-1659 3637  0.7675634623859809
-973 3638  0.02216543630854258
-977 3638 -0.06041428240856869
-1173 3638 -8.38018467970365
-1177 3638  30.96188122061356
-2149 3638  2.124763369071786
-973 3639  1.238869862237301
-1006 3639 -0.3843162680834073
-1173 3639  2.90118294589533
-1206 3639 -0.2630108885425641
-1415 3639  2.120223669016545
-973 3640 -1.017068720550362
-1006 3640  0.315510181481362
-1173 3640 -2.396216081757496
-1206 3640  0.2173437473146551
-1905 3640  5.274149732297824
-973 3641 -0.189558782352581
-1006 3641  0.05880401649662903
-1173 3641  17.2984093564214
-1206 3641 -1.681171094173677
-1660 3641  2.338015588056324
-973 3642  0.1551422975518752
-1006 3642 -0.0481274996143231
-1173 3642 -15.269239506628
-1206 3642  1.48390937397938
-2150 3642  4.699502919587786
-973 3643 -0.8761486615418773
-1055 3643  0.3852188693949516
-1173 3643 -1.862842055174311
-1255 3643  0.2775953024833963
-1416 3643  4.450891698791136
-973 3644  1.07765260038838
-1055 3644 -0.4738147012535258
-1173 3644  2.290266277161505
-1255 3644 -0.3412498653140608
-1906 3644  1.680006856153657
-973 3645  0.1233417700929442
-1055 3645 -0.05423004030019284
-1173 3645 -10.98109255814183
-1255 3645  1.447748389810384
-1661 3645  6.066391449410494
-973 3646 -0.1324205549928908
-1055 3646  0.05822173646792159
-1173 3646  13.31557405400697
-1255 3646 -1.755833205048359
-2151 3646  3.482112602331831
-978 3647 -0.6175792065318458
-998 3647  0.3482442751733312
-1178 3647 -1.333010028374286
-1198 3647  0.4276243851783329
-1417 3647  5.382734010251454
-978 3648  0.5343612663363052
-998 3648 -0.3013188428428641
-1178 3648  1.161252280274343
-1198 3648 -0.3728502600956505
-1907 3648  11.36834776685953
-978 3649  0.07945126249495234
-998 3649 -0.04480145546761378
-1178 3649 -8.372193934777899
-1198 3649  3.153670952857216
-1662 3649  6.930754193190324
-978 3650 -0.07522703272150684
-998 3650  0.04241947139162745
-1178 3650  6.800859133664147
-1198 3650 -2.562369962751278
-2152 3650  14.16495761512215
-978 3651 -0.4393579796250129
-1041 3651  0.3530044280053762
-1178 3651 -0.8125299017211322
-1241 3651  1.357408579856344
-1418 3651  4.03844885715099
-978 3652  0.4424042011090423
-1041 3652 -0.3554519303210625
-1178 3652  0.8130995252895313
-1241 3652 -1.359305061838616
-1908 3652  3.981648724060497
-978 3653  0.06144026122959138
-1041 3653 -0.04936449382429299
-1178 3653 -5.553339525120581
-1241 3653  7.799677070497177
-1663 3653  5.126383130506217
-978 3654 -0.05571639966019448
-1041 3654  0.04476562781300211
-1178 3654  5.955799061503828
-1241 3654 -8.367326703808663
-2153 3654  5.483147129036918
-979 3655  1.458939452040597
-980 3655 -1.713575067511206
-1179 3655  0.6657203254231561
-1180 3655 -0.07200114489392796
-1419 3655  5.435793144188034
-979 3656 -1.369672770763905
-980 3656  1.608728249378172
-1179 3656 -0.620628984403677
-1180 3656  0.06494821711852092
-1909 3656  7.936527709755322
-979 3657 -0.03732647943580095
-980 3657  0.04384124675612543
-1179 3657  11.0089408248834
-1180 3657 -7.044466715389948
-1664 3657  4.929097726466118
-979 3658  0.01230778581599272
-980 3658 -0.01445592199255047
-1179 3658 -21.83306996925088
-1180 3658  13.9640986807489
-2154 3658  13.13678521227168
-979 3659 -0.4069131581499621
-1076 3659  0.3962293587228495
-1179 3659 -0.6535981302102936
-1276 3659  1.0954096953943
-1420 3659  5.673414870400691
-979 3660  0.4084263136180201
-1076 3660 -0.3977027851991092
-1179 3660  0.654965320821791
-1276 3660 -1.097736986344627
-1910 3660  5.534920557416696
-979 3661  0.05514218346051624
-1076 3661 -0.05369438553050169
-1179 3661 -4.521169451237201
-1276 3661  7.39116338334737
-1665 3661  6.562151811292857
-979 3662 -0.05411446430890252
-1076 3662  0.05269364988891542
-1179 3662  4.541421673872437
-1276 3662 -7.424306139552618
-2155 3662  6.819550808326079
-980 3663 -0.6095818027697771
-1093 3663  1.112658784433996
-1180 3663 -0.5886831418066317
-1293 3663  1.054444086425987
-1421 3663  1.663115651008956
-980 3664  0.5163436862281057
-1093 3664 -0.9424729144772576
-1180 3664  0.5026533925982484
-1293 3664 -0.9011439297224629
-1911 3664  3.745702162475517
-980 3665  0.08236427128493604
-1093 3665 -0.1503380342921753
-1180 3665 -3.725196007278359
-1293 3665  7.599039601885398
-1666 3665  1.990410506883116
-980 3666 -0.07686249969756757
-1093 3666  0.1402957488124907
-1180 3666  3.079189492083107
-1293 3666 -6.282130880745431
-2156 3666  3.716088592076674
-981 3667 -0.5279378228544918
-1044 3667  0.5728450058803592
-1181 3667 -0.7314697767746239
-1244 3667  1.049384777518011
-1422 3667  3.6738422903388
-981 3668  0.5324544811476037
-1044 3668 -0.5777458579020762
-1181 3668  0.7341243737421274
-1244 3668 -1.053710576606368
-1912 3668  3.499243447462259
-981 3669  0.07447310375085667
-1044 3669 -0.08080789765245955
-1181 3669 -4.855239864111756
-1244 3669  5.781563336973328
-1667 3669  4.866384250937969
-981 3670 -0.06679631775422318
-1044 3670  0.07247811272512387
-1181 3670  5.221813720290219
-1244 3670 -6.220262112848772
-2157 3670  5.213544230958012
-981 3671 -0.4465921770040591
-1084 3671  0.5627128757731583
-1181 3671 -0.7155603330739858
-1284 3671  0.8548643208929529
-1423 3671  4.931596810478233
-981 3672  0.4419162630302035
-1084 3672 -0.556821153672813
-1181 3672  0.7181252765245181
-1284 3672 -0.8599114611795811
-1913 3672  4.952358887713497
-981 3673  0.05896090325177052
-1084 3673 -0.07429162698182507
-1181 3673 -4.616497763711963
-1284 3673  6.615573091843956
-1668 3673  5.688689200828974
-981 3674 -0.06482153965237164
-1084 3674  0.08167611720054455
-1181 3674  4.299471983766863
-1284 3674 -6.162584239401313
-2158 3674  5.360504399026522
-982 3675 -0.5594355361631519
-1011 3675  0.4828314550248289
-1182 3675 -0.3797072308357856
-1211 3675  2.391996372285566
-1424 3675  2.954431694987449
-982 3676  0.5029955255009219
-1011 3676 -0.4341198328483751
-1182 3676  0.3406113200706714
-1211 3676 -2.145916049270679
-1914 3676  5.231194892529889
-982 3677  0.07604924015919289
-1011 3677 -0.06563573978769766
-1182 3677 -2.422622246591045
-1211 3677  13.0725484602826
-1669 3677  4.405147664394409
-982 3678 -0.06227853779439045
-1011 3678  0.05375067380652562
-1182 3678  2.310469301341059
-1211 3678 -12.47045602748613
-2159 3678  8.34943451090035
-983 3679 -0.5603093540142886
-1018 3679  0.6760566940874065
-1183 3679 -1.009313309363917
-1218 3679  0.5452258426352274
-1425 3679  3.597870353883553
-983 3680  0.5635642781039163
-1018 3680 -0.6799840124585417
-1183 3680  1.01257666395277
-1218 3680 -0.5470217549008489
-1915 3680  3.508830808080819
-983 3681  0.08560093037836217
-1018 3681 -0.1032841618434341
-1183 3681 -6.434333697729489
-1218 3681  3.376506641724802
-1670 3681  3.834698064428319
-983 3682 -0.08418167880156238
-1018 3682  0.1015717247366548
-1183 3682  6.497392945010104
-1218 3682 -3.409620496099906
-2160 3682  3.919110652589746
-984 3683 -0.5222444421836163
-999 3683  0.4052407270632351
-1184 3683 -0.5140805691755161
-1199 3683  1.368477623792071
-1426 3683  5.141916652259088
-984 3684  0.5068312762468304
-999 3684 -0.393280728897523
-1184 3684  0.4990000847384159
-1199 3684 -1.328312311377791
-1916 3684  5.985986065281415
-984 3685  0.06925873775509218
-999 3685 -0.05374200082628659
-1184 3685 -2.845200624001803
-1199 3685  7.348777917610443
-1671 3685  9.122041227009294
-984 3686 -0.06572612318307269
-999 3686  0.05100083369846863
-1184 3686  2.783228216931563
-1199 3686 -7.188787771767452
-2161 3686  11.18534751112075
-984 3687  0.4708669091657322
-1040 3687 -0.5600389881261721
-1184 3687  0.4457567421222746
-1240 3687 -1.964051061571361
-1427 3687  2.812910582659263
-984 3688 -0.4207481970090967
-1040 3688  0.5004288683746699
-1184 3688 -0.3979802529569074
-1240 3688  1.753538660217751
-1917 3688  5.055157513061181
-984 3689 -0.07142707696284654
-1040 3689  0.08495383117483674
-1184 3689  3.145655382980586
-1240 3689 -13.90759343198216
-1672 3689  2.509630416919152
-984 3690  0.06335201956550504
-1040 3690 -0.07534953134862798
-1184 3690 -2.797268987536868
-1240 3690  12.36730783343644
-2162 3690  4.679317810446525
-984 3691 -0.473560945926676
-1091 3691  0.4402806087748638
-1184 3691 -0.4618434163253965
-1291 3691  1.168201103632352
-1428 3691  6.815584747470943
-984 3692  0.4497976392777369
-1091 3692 -0.4181873107360542
-1184 3692  0.4399385753362182
-1291 3692 -1.112629226904043
-1918 3692  8.94117625255519
-984 3693  0.07049701051212577
-1091 3693 -0.06554270780152668
-1184 3693 -2.86924462211004
-1291 3693  6.948584822479258
-1673 3693  8.245690219381995
-984 3694 -0.0653373636694603
-1091 3694  0.06074566431115451
-1184 3694  2.785061239788074
-1291 3694 -6.744836034528775
-2163 3694  10.96265801745419
-985 3695  1.030575023275442
-986 3695 -1.032621098489623
-1185 3695  0.427839673034417
-1186 3695 -0.1593269903331485
-1429 3695  8.598554966475483
-985 3696 -1.145483220005211
-986 3696  1.147757430782475
-1185 3696 -0.4773773394419411
-1186 3696  0.1799753946873774
-1919 3696  4.834616698618166
-985 3697 -0.02747275796180583
-986 3697  0.02752730161739742
-1185 3697  7.594442537507458
-1186 3697 -11.15066330216473
-1674 3697  9.351725708391308
-985 3698  0.01404202717338338
-986 3698 -0.01406990582666666
-1185 3698 -15.25141034328379
-1186 3698  22.3842856817472
-2164 3698  8.062081654679709
-985 3699  0.2717664094081579
-1047 3699 -1.20025775658178
-1185 3699  0.3935586339260975
-1247 3699 -2.744556354611642
-1430 3699  2.402848964234089
-985 3700 -0.3291856606887862
-1047 3700  1.453850177649487
-1185 3700 -0.4755757198066598
-1247 3700  3.317198260843825
-1920 3700  0.9505763099739064
-985 3701 -0.0345488517344297
-1047 3701  0.1525851828618167
-1185 3701  2.737883920038361
-1247 3701 -17.82514147813323
-1675 3701  3.241458825572762
-985 3702  0.04345138635311043
-1047 3702 -0.1919032731754043
-1185 3702 -3.207071041908369
-1247 3702  20.87891349987074
-2165 3702  1.251333503471155
-986 3703 -0.4083268764891531
-1048 3703  0.4304029276676956
-1186 3703 -0.9577254771843337
-1248 3703  0.9482549598362122
-1431 3703  3.660576083522866
-986 3704  0.4085753213662854
-1048 3704 -0.4306648046310764
-1186 3704  0.9613848499477831
-1248 3704 -0.951589473510047
-1921 3704  3.616822557551397
-986 3705  0.05989119208712813
-1048 3705 -0.06312918864769614
-1186 3705 -5.868347071872142
-1248 3705  5.385061985342238
-1676 3705  4.813024972990116
-986 3706 -0.05737989255880681
-1048 3706  0.06048211658000992
-1186 3706  6.083446015993633
-1248 3706 -5.582795958856527
-2166 3706  4.888571989846326
-986 3707  0.5500829381246285
-1074 3707 -0.6107864907888784
-1186 3707  1.385816766949729
-1274 3707 -0.5956130198385742
-1432 3707  1.803796874291739
-986 3708 -0.4922431501793627
-1074 3708  0.5465638824173062
-1186 3708 -1.243512052379119
-1274 3708  0.5345682106594648
-1922 3708  3.21889629143859
-986 3709 -0.0811542494651884
-1074 3709  0.09010990126768369
-1186 3709  8.036162305989681
-1274 3709 -3.668557345336211
-1677 3709  2.274731948814601
-986 3710  0.070036883103212
-1074 3710 -0.07776569511906935
-1186 3710 -7.386558767037341
-1274 3710  3.371880224457544
-2167 3710  4.206946386074862
-986 3711 -0.4948905454048655
-1092 3711  0.4137426206676649
-1186 3711 -1.161465841778596
-1292 3711  0.6074290077697537
-1433 3711  3.693870991143675
-986 3712  0.5176954413573776
-1092 3712 -0.432808164560258
-1186 3712  1.216256331347602
-1292 3712 -0.6360979627977104
-1923 3712  2.924614144859001
-986 3713  0.06634446170697339
-1092 3713 -0.05546586352942447
-1186 3713 -7.2681528859035
-1292 3713  3.925788159606755
-1678 3713  5.000900261527375
-986 3714 -0.0706577625688625
-1092 3714  0.05907190615621608
-1186 3714  7.502813973266923
-1292 3714 -4.052578948064181
-2168 3714  3.930409653834767
-987 3715  0.3582392973900931
-988 3715 -5.202909164506705
-1187 3715  0.1264070600600939
-1188 3715 -0.3212481593266459
-1434 3715  8.262976513892538
-987 3716 -0.3225737686846976
-988 3716  4.684918794633313
-1187 3716 -0.1073922996838256
-1188 3716  0.2724361971567971
-1924 3716  19.53069803414619
-987 3717 -0.01555667362303796
-988 3717  0.2259382495226564
-1187 3717  7.447124334255668
-1188 3717 -20.05891812971564
-1679 3717  1.564794215233169
-987 3718  0.01198742445782478
-988 3718 -0.1741000527436083
-1187 3718 -5.738141943326152
-1188 3718  15.48272469413396
-2169 3718  5.368197347599874
-987 3719  0.3692986362671331
-989 3719 -0.8670555746636077
-1187 3719  0.2372873765635557
-1189 3719 -1.124307960005156
-1435 3719  6.75928451641217
-987 3720 -0.3307276363338796
-989 3720  0.7764968852232117
-1187 3720 -0.2078695435133901
-1189 3720  0.9846771888007256
-1925 3720  14.69789381330568
-987 3721 -0.03119596209267699
-989 3721  0.07324325135033663
-1187 3721  8.201087086647963
-1189 3721 -39.72433181103246
-1680 3721  0.9886929177097209
-987 3722  0.02344637301136434
-989 3722 -0.05504842538990734
-1187 3722 -6.163756956372798
-1189 3722  29.86325998837519
-2170 3722  3.784571749614252
-987 3723  0.4209233628580787
-990 3723 -0.8154789505427221
-1187 3723  0.2982302677568088
-1190 3723 -0.8870247005076219
-1436 3723  6.286782905770047
-987 3724 -0.3794252839956596
-990 3724  0.735082344446817
-1187 3724 -0.263500479617136
-1190 3724  0.78356840863058
-1926 3724  13.23744473122067
-987 3725 -0.03967666278728941
-990 3725  0.07686787236308722
-1187 3725  9.459243822580133
-1190 3725 -28.7033608824471
-1681 3725  0.8832624199164085
-987 3726  0.02969939062660183
-990 3726 -0.05753833129026276
-1187 3726 -7.08054579490533
-1190 3726  21.48971813079931
-2171 3726  3.411100769889254
-987 3727 -0.6561496372500165
-993 3727  0.2302667811178378
-1187 3727 -1.429262548630259
-1193 3727  0.9009035038811737
-1437 3727  4.863234068877579
-987 3728  0.7717184399373285
-993 3728 -0.2708240788464196
-1187 3728  1.673333652021131
-1193 3728 -1.053604700739367
-1927 3728  2.313352821995473
-987 3729  0.09217929587242674
-993 3729 -0.03234906878134064
-1187 3729 -8.428942532838935
-1193 3729  6.200089823344896
-1682 3729  5.491941738533162
-987 3730 -0.1144339936435742
-993 3730  0.04015905194614094
-1187 3730  8.914589671247429
-1193 3730 -6.559072047159436
-2172 3730  3.087002648539354
-987 3731  0.7655516692341778
-1086 3731 -0.379859058350982
-1187 3731  1.193368232042292
-1286 3731 -0.7243212706529887
-1438 3731  3.654645324820387
-987 3732 -0.7325809744992411
-1086 3732  0.3634993303815826
-1187 3732 -1.120321545249203
-1286 3732  0.686208191891071
-1928 3732  4.408486684243702
-987 3733 -0.09262370563918518
-1086 3733  0.04595895354819852
-1187 3733  10.03672807275466
-1286 3733 -3.862010681433929
-1683 3733  5.548711987030634
-987 3734  0.1187456445424696
-1086 3734 -0.05892039757983346
-1187 3734 -8.019890861375147
-1286 3734  3.078841940888978
-2173 3734  4.650926558563436
-991 3735  0.6367155082099926
-992 3735 -3.343157745435155
-1191 3735  0.2167729964694829
-1192 3735 -0.6168315851058197
-1439 3735  2.770038046695757
-991 3736 -0.5615617669201883
-992 3736  2.948553233605713
-1191 3736 -0.1851844777328494
-1192 3736  0.5298067490807584
-1929 3736  5.995398509885752
-991 3737 -0.02450778911447643
-992 3737  0.1286813403240229
-1191 3737  6.880724984338275
-1192 3737 -16.5456764895831
-1684 3737  2.380705119026724
-991 3738  0.02367803215021713
-992 3738 -0.1243245932586171
-1191 3738 -5.444318005853047
-1192 3738  13.09683550707402
-2174 3738  4.858814960201284
-991 3739 -0.7678220254674442
-1089 3739  0.62780002806294
-1191 3739 -1.207141725362662
-1289 3739  0.2021428644557957
-1440 3739  8.553881810766296
-991 3740  0.8596786811959881
-1089 3740 -0.702905468036533
-1191 3740  1.338906589780056
-1289 3740 -0.2240623962201166
-1930 3740  4.987981631971432
-991 3741  0.1159096366071837
-1089 3741 -0.09477205745752662
-1191 3741 -7.610279116600212
-1289 3741  1.349752296503026
-1685 3741  9.011775441080573
-991 3742 -0.134708500073735
-1089 3742  0.1101427118805583
-1191 3742  8.267433239967934
-1289 3742 -1.466292598487834
-2175 3742  5.009999875255914
-994 3743  0.3903679836850418
-1020 3743 -0.4616774221684724
-1194 3743  0.7978308621718716
-1220 3743 -1.258218614571573
-1441 3743  4.071233084981746
-994 3744 -0.3911547364780136
-1020 3744  0.462607892946107
-1194 3744 -0.7965184326656924
-1220 3744  1.257430980505297
-1931 3744  4.080013401664326
-994 3745 -0.04713547548536468
-1020 3745  0.05574582374646275
-1194 3745  5.753350078361281
-1220 3745 -7.448069304163958
-1686 3745  6.32479519279416
-994 3746  0.05251609088198397
-1020 3746 -0.06210932882324032
-1194 3746 -5.243221560545977
-1220 3746  6.785039108229999
-2176 3746  6.114912483766074
-994 3747 -0.3268708215805732
-1041 3747  0.4156932976845179
-1194 3747 -0.9429089218588855
-1241 3747  1.314196419979139
-1442 3747  4.626630643115307
-994 3748  0.3510617949962554
-1041 3748 -0.4464578225348482
-1194 3748  1.007009323828432
-1241 3748 -1.397856747533507
-1932 3748  3.249654147247933
-994 3749  0.04078336525166011
-1041 3749 -0.0518656621296367
-1194 3749 -5.062453033294843
-1241 3749  9.967472353837746
-1687 3749  7.139596631140285
-994 3750 -0.05307425527674776
-1041 3750  0.06749642593198793
-1194 3750  4.615763356430092
-1241 3750 -9.100375712236817
-2177 3750  4.567303761279918
-994 3751 -0.4480493340962357
-1086 3751  0.4552127889008838
-1194 3751 -1.048401908253753
-1286 3751  0.7276690822554982
-1443 3751  4.819926478975458
-994 3752  0.4295324887597057
-1086 3752 -0.4363998945032376
-1194 3752  1.008926901289915
-1286 3752 -0.7000132451960701
-1933 3752  5.990944680358314
-994 3753  0.06606790111415485
-1086 3753 -0.0671242009178893
-1194 3753 -6.460239724417905
-1286 3753  4.142224845053123
-1688 3753  6.252292571035978
-994 3754 -0.06042534814756716
-1086 3754  0.06139143428489695
-1194 3754  6.417487426740468
-1286 3754 -4.115085973968236
-2178 3754  8.096733468146425
-995 3755 -0.452566586299552
-1098 3755  0.4897853072672988
-1195 3755 -0.4226945468326312
-1298 3755  2.091534388030289
-1444 3755  3.210809158694981
-995 3756  0.4518410984309186
-1098 3756 -0.4890001558455759
-1195 3756  0.421458967625683
-1298 3756 -2.085292610827683
-1934 3756  3.229847896446473
-995 3757  0.06444976742378884
-1098 3757 -0.06975006572860991
-1195 3757 -2.511814056672408
-1298 3757  12.86354640857851
-1689 3757  4.240340181312149
-995 3758 -0.06562451486991176
-1098 3758  0.07102142348298011
-1195 3758  2.46769213011002
-1298 3758 -12.63775213409496
-2179 3758  4.213824656027188
-996 3759  1.610329947831114
-997 3759 -1.621718887703775
-1196 3759  0.3339756423139817
-1197 3759 -0.2014575555660574
-1445 3759  3.851674085393307
-996 3760 -1.599279391917784
-997 3760  1.610590177548189
-1196 3760 -0.3333397745722857
-1197 3760  0.2031705763180116
-1935 3760  3.895963455405033
-996 3761 -0.03806768353539278
-997 3761  0.03833691442155994
-1196 3761  7.933710171395212
-1197 3761 -14.35390862680364
-1690 3761  3.947328445532761
-996 3762  0.02025137186317635
-997 3762 -0.0203945981981267
-1196 3762 -12.65627869255672
-1197 3762  22.89375892241828
-2180 3762  5.680389346472349
-996 3763 -0.5780567342368751
-1021 3763  0.8354279312537486
-1196 3763 -0.4851424038589532
-1221 3763  1.24868825794066
-1446 3763  2.651381876594489
-996 3764  0.4789577056707151
-1021 3764 -0.6922065283691701
-1196 3764  0.4058510497943707
-1221 3764 -1.04285678342914
-1936 3764  6.886257504275831
-996 3765  0.0800375428343096
-1021 3765 -0.115673073026255
-1196 3765 -3.452105402557807
-1221 3765  7.570117512134129
-1691 3765  3.144054024261908
-996 3766 -0.06052909694974323
-1021 3766  0.08747878062892703
-1196 3766  3.087553492426145
-1221 3766 -6.772078091746644
-2181 3766  8.680597597954014
-997 3767 -0.5676654342824057
-1040 3767  0.3524182508441939
-1197 3767 -1.07013962521279
-1240 3767  1.100245831300216
-1447 3767  2.58440440493602
-997 3768  0.5657348666913733
-1040 3768 -0.3512197152059827
-1197 3768  1.056592885189063
-1240 3768 -1.083749439601529
-1937 3768  2.707528675905532
-997 3769  0.07297260077582075
-1040 3769 -0.04530287520055806
-1197 3769 -6.741118804159942
-1240 3769  8.345685815764492
-1692 3769  3.243567001900533
-997 3770 -0.08114410763163431
-1040 3770  0.05037591290722684
-1197 3770  6.152334929788743
-1240 3770 -7.619017266862353
-2182 3770  3.127037822968334
-998 3771 -0.365279863502515
-1072 3771  0.4751085479629472
-1198 3771 -0.439551175407246
-1272 3771  1.458551817589764
-1448 3771  6.545719728055657
-998 3772  0.3974234470529114
-1072 3772 -0.5169167417147753
-1198 3772  0.4774393407531063
-1272 3772 -1.584168614837459
-1938 3772  4.197681730399675
-998 3773  0.04813127346458962
-1072 3773 -0.0626029018629746
-1198 3773 -2.839026331445033
-1272 3773  7.706555182452361
-1693 3773  10.48923930011014
-998 3774 -0.04568621642826008
-1072 3774  0.05942268960851749
-1198 3774  3.287177902478644
-1272 3774 -8.926654332769894
-2183 3774  7.963542386205262
-998 3775 -0.4082845072120672
-1077 3775  0.345354821535383
-1198 3775 -0.5013982344400243
-1277 3775  1.914092476329704
-1449 3775  5.387449154306919
-998 3776  0.4422425294985197
-1077 3776 -0.3740788277596528
-1198 3776  0.5451054580886334
-1277 3776 -2.079388121953743
-1939 3776  3.559125252631731
-998 3777  0.0550827407381792
-1077 3777 -0.04659273070930623
-1198 3777 -3.303170792875972
-1277 3777  10.8116764259082
-1694 3777  7.717735001723169
-998 3778 -0.05462691338373406
-1077 3778  0.04620716091210701
-1198 3778  3.830799029309135
-1277 3778 -12.54156489623117
-2184 3778  5.42788620866588
-998 3779 -0.4687418217907903
-1082 3779  0.903604725385467
-1198 3779 -0.651822591782886
-1282 3779  0.2587210270240423
-1450 3779  10.56116102730794
-998 3780  0.5317541664847034
-1082 3780 -1.025075116496526
-1198 3780  0.7429555445394144
-1282 3780 -0.2949996783558586
-1940 3780  5.370203291089556
-998 3781  0.06183666269902745
-1082 3781 -0.1192040010499577
-1198 3781 -4.18167192374513
-1282 3781  1.823381827579377
-1695 3781  13.44241674595665
-998 3782 -0.07543502736476798
-1082 3782  0.1454178910812212
-1198 3782  4.666850163800948
-1282 3782 -2.035095850329601
-2185 3782  5.878917673856323
-999 3783  0.5183688055136251
-1008 3783 -0.5784495929786024
-1199 3783  1.727739253821067
-1208 3783 -0.5174648007567855
-1451 3783  2.192274887117568
-999 3784 -0.4507276981705915
-1008 3784  0.5029686408166937
-1199 3784 -1.501409605633508
-1208 3784  0.4496774480466198
-1941 3784  4.481236041630056
-999 3785 -0.08159194337121471
-1008 3785  0.09104873968379988
-1199 3785  11.30404407094388
-1208 3785 -3.396560628333188
-1696 3785  2.085017424193
-999 3786  0.06983187835467947
-1008 3786 -0.07792564132243848
-1199 3786 -9.700381322229893
-1208 3786  2.914703332052592
-2186 3786  4.67546375941967
-999 3787 -0.2893683449568387
-1015 3787  0.6080023804991873
-1199 3787 -0.8399267021510434
-1215 3787  1.459995747599291
-1452 3787  3.725435059567603
-999 3788  0.287922613699735
-1015 3788 -0.6049647018408247
-1199 3788  0.8359873259695152
-1215 3788 -1.452748484428911
-1942 3788  3.503869273575924
-999 3789  0.03207522870468611
-1015 3789 -0.06739443255416847
-1199 3789 -4.371489003866787
-1215 3789  6.510195367269129
-1697 3789  11.29589550667638
-999 3790 -0.03256016366442265
-1015 3790  0.06841334707970535
-1199 3790  5.178316230070377
-1215 3790 -7.713673913612801
-2187 3790  7.522783604978186
-999 3791 -0.4394391630156717
-1039 3791  0.6086144765908342
-1199 3791 -1.468192548459289
-1239 3791  0.3506092217084191
-1453 3791  5.699343322844305
-999 3792  0.4523778897162504
-1039 3792 -0.6265343550208405
-1199 3792  1.512757808386402
-1239 3792 -0.3612542032138767
-1943 3792  4.932211784009985
-999 3793  0.06534871961755113
-1039 3793 -0.09050667335377813
-1199 3793 -9.106914833269292
-1239 3793  2.196719064593049
-1698 3793  6.796790398187187
-999 3794 -0.06846325693408806
-1039 3794  0.09482024542077011
-1199 3794  9.45399632332005
-1239 3794 -2.280441915279873
-2188 3794  5.430042622363441
-1000 3795  0.679218643726438
-1001 3795 -0.8337078536373765
-1200 3795  0.3629961991311105
-1201 3795 -0.407193771543868
-1454 3795  9.853444154856955
-1000 3796 -0.646543619474914
-1001 3796  0.793600850409622
-1200 3796 -0.3459711048373486
-1201 3796  0.3887267162890052
-1944 3796  12.92509130443996
-1000 3797 -0.02138310187554149
-1001 3797  0.02624671765627676
-1200 3797  6.519516423137556
-1201 3797 -14.76253650232996
-1699 3797  14.67920176970155
-1000 3798  0.01366621132412578
-1001 3798 -0.01677460978968741
-1200 3798 -8.416866787483741
-1201 3798  19.05461479822566
-2189 3798  24.01494552657894
-1000 3799  0.6456696194137627
-1002 3799 -2.531048253677342
-1200 3799 -0.2146298528456314
-1202 3799 -3.056071350822656
-1455 3799  0.6148168725043865
-1000 3800 -0.3733120922080947
-1002 3800  1.463396899358267
-1200 3800  0.1240942996274762
-1202 3800  1.769816809290096
-1945 3800  6.963395067069431
-1000 3801  0.008696076442513374
-1002 3801 -0.03408893408002122
-1200 3801  4.262660366847292
-1202 3801 -21.38658464065627
-1700 3801  16.92007433822707
-1000 3802  0.0188568404948961
-1002 3802 -0.07391949654966468
-1200 3802 -3.267971087874407
-1202 3802  16.31440493686975
-2190 3802  10.64775227529997
-1000 3803  0.6490872503986104
-1003 3803 -1.997280942097913
-1200 3803 -0.2466962960354827
-1203 3803 -2.791333479427456
-1456 3803  0.733300648789027
-1000 3804 -0.3748931907170733
-1003 3804  1.153569146030313
-1200 3804  0.142484329197361
-1203 3804  1.61398408077391
-1946 3804  8.20748669336651
-1000 3805  0.009220007761798222
-1003 3805 -0.02837052457481739
-1200 3805  3.952836893778014
-1203 3805 -21.33286151008621
-1701 3805  20.4618479887596
-1000 3806  0.01698943114939008
-1003 3806 -0.0522775128165329
-1200 3806 -3.485047843454746
-1203 3806  18.72857532796202
-2191 3806  13.36990297318379
-1000 3807 -0.4983302017793433
-1026 3807  0.4549416885390786
-1200 3807 -1.021937167464553
-1226 3807  0.6755139749733998
-1457 3807  5.623959759241418
-1000 3808  0.511463018605014
-1026 3808 -0.4669310599249826
-1200 3808  1.047546391471734
-1226 3808 -0.6914780544113759
-1947 3808  5.140340349921296
-1000 3809  0.05434191170091966
-1026 3809 -0.04961048112152108
-1200 3809 -5.814375262302929
-1226 3809  5.152102189575969
-1702 3809  10.57287611312878
-1000 3810 -0.06524249632027486
-1026 3810  0.05956197584346536
-1200 3810  5.190494080029731
-1226 3810 -4.603043930537255
-2192 3810  8.861752777781344
-1001 3811 -0.4245578636109541
-1004 3811  0.5272035599677525
-1201 3811 -1.590343166199202
-1204 3811  0.3953856088196608
-1458 3811  5.156990654240387
-1001 3812  0.4806889285303752
-1004 3812 -0.5969054776253586
-1201 3812  1.799225606089988
-1204 3812 -0.4470126234812586
-1948 3812  2.762528604699168
-1001 3813  0.04834484106772149
-1004 3813 -0.06003321219915891
-1201 3813 -8.889282523176131
-1204 3813  2.728687156136997
-1703 3813  9.881649534957022
-1001 3814 -0.06044152600849758
-1004 3814  0.07505452239311973
-1201 3814  8.984707214224725
-1204 3814 -2.759212294612531
-2193 3814  5.355974370748386
-1004 3815  0.735678771801283
-1028 3815 -1.021406661640207
-1204 3815  0.6834769259705972
-1228 3815 -0.7053415505671318
-1459 3815  2.02177708473591
-1004 3816 -0.6821644402533772
-1028 3816  0.9471080725937661
-1204 3816 -0.6337624789359847
-1228 3816  0.6542644047347482
-1949 3816  2.637769067800761
-1004 3817 -0.07310899636166125
-1028 3817  0.1015035621142007
-1204 3817  2.682500907926349
-1228 3817 -3.296532554515937
-1704 3817  9.357580306279953
-1004 3818  0.07557928868904694
-1028 3818 -0.1049332832589505
-1204 3818 -3.302192501193137
-1228 3818  4.056786931974255
-2194 3818  6.185139221633936
-1005 3819 -0.2843296651791673
-1011 3819  0.3898068756189718
-1205 3819 -0.8432794677340965
-1211 3819  1.889672965421464
-1460 3819  4.338050735598012
-1005 3820  0.2720491980250402
-1011 3820 -0.3729707479870724
-1205 3820  0.8122467217164322
-1211 3820 -1.818638852608572
-1950 3820  5.347644834779035
-1005 3821  0.04056671199071347
-1011 3821 -0.05561566446213129
-1205 3821 -5.634150174253534
-1211 3821  11.31995792783744
-1705 3821  5.268104839160443
-1005 3822 -0.03643788065469865
-1011 3822  0.04995516877647897
-1205 3822  5.676480078927482
-1211 3822 -11.4061945499796
-2195 3822  6.809172415733807
-1005 3823 -0.2824496877119697
-1027 3823  0.46381160028816
-1205 3823 -0.8258269842122644
-1227 3823  1.978322116312245
-1461 3823  3.336518577462315
-1005 3824  0.3027390358618913
-1027 3824 -0.4971288084268892
-1205 3824  0.8899170771677279
-1227 3824 -2.12995668613235
-1951 3824  2.286717637069104
-1005 3825  0.03972042410566178
-1027 3825 -0.06522504456566502
-1205 3825 -5.4483738044597
-1227 3825  11.36265377461624
-1706 3825  4.345881026636466
-1005 3826 -0.03942547846731512
-1027 3826  0.06474071332200976
-1205 3826  6.238244344298185
-1227 3826 -13.01237515753422
-2196 3826  3.09008802429738
-1005 3827  0.361444564712929
-1090 3827 -0.4083683463701124
-1205 3827  1.219896523532217
-1290 3827 -1.259969479211936
-1462 3827  3.432096244817039
-1005 3828 -0.362096525989742
-1090 3828  0.4091049471507078
-1205 3828 -1.223176188342998
-1290 3828  1.263388549847514
-1952 3828  3.430259707843811
-1005 3829 -0.05221684139379965
-1090 3829  0.05899578318349421
-1205 3829  7.196077445191182
-1290 3829 -7.594160657672793
-1707 3829  4.577246699593937
-1005 3830  0.05186623003949892
-1090 3830 -0.05859965444633063
-1205 3830 -7.309708780356933
-1290 3830  7.714039345814099
-2197 3830  4.499074366596648
-1006 3831 -0.438438003559472
-1096 3831  0.3753050069630106
-1206 3831 -0.3186270474808822
-1296 3831  2.014211143978124
-1463 3831  5.167687511911667
-1006 3832  0.4387320075683089
-1096 3832 -0.375556675786625
-1206 3832  0.3163594839300718
-1296 3832 -1.999469777299275
-1953 3832  5.22866490720696
-1006 3833  0.06917527995921767
-1096 3833 -0.05921436717618114
-1206 3833 -2.097538363083144
-1296 3833  13.54117139429895
-1708 3833  4.778575385576687
-1006 3834 -0.07005237522168833
-1096 3834  0.05996516487372638
-1206 3834  2.063427325485965
-1296 3834 -13.32061162330918
-2198 3834  4.804865036708031
-1007 3835  0.685622022908018
-1084 3835 -0.5533622237789459
-1207 3835  0.4854666781569684
-1284 3835 -0.9961364902256777
-1464 3835  4.397548178584936
-1007 3836 -0.7361807383336668
-1084 3836  0.5941679188478999
-1207 3836 -0.5209336705035591
-1284 3836  1.069122194004589
-1954 3836  3.015100507243954
-1007 3837 -0.08242829338295353
-1084 3837  0.06652747753817977
-1207 3837  3.061789935852789
-1284 3837 -5.607807767179665
-1709 3837  7.935790725976097
-1007 3838  0.09247066617765989
-1084 3838 -0.07463262812556315
-1207 3838 -3.064005787467996
-1284 3838  5.611106742482268
-2199 3838  5.806460716784121
-1008 3839 -0.5364511862384768
-1091 3839  0.5759940121176559
-1208 3839 -0.4753802942885685
-1291 3839  1.508116623610983
-1465 3839  2.807197757919511
-1008 3840  0.4560709551496668
-1091 3840 -0.4896888030185261
-1208 3840  0.404989697114798
-1291 3840 -1.284756180926196
-1955 3840  6.486770562819233
-1008 3841  0.08202078818634363
-1091 3841 -0.08806669474583405
-1208 3841 -3.032021718341749
-1291 3841  9.461386332231092
-1710 3841  2.977783914089383
-1008 3842 -0.06866652243426218
-1091 3842  0.0737280609488582
-1208 3842  2.585875819257028
-1291 3842 -8.06920223422221
-2200 3842  7.199818137765514
-1009 3843  0.2358476052144947
-1061 3843 -0.504600061009487
-1209 3843  0.5953885346820643
-1261 3843 -1.57915325880766
-1466 3843  6.821596506872711
-1009 3844 -0.3015768589062842
-1061 3844  0.6452289446177003
-1209 3844 -0.7585928866866933
-1261 3844  2.012113389129591
-1956 3844  2.089007039389847
-1009 3845 -0.03869333526436093
-1061 3845  0.08278506503087066
-1209 3845  4.119179357221501
-1261 3845 -10.80951279223802
-1711 3845  4.924367513602002
-1009 3846  0.04790427095168613
-1061 3846 -0.1024920224347914
-1209 3846 -5.027617955580298
-1261 3846  13.19349528576872
-2201 3846  1.926406935099201
-1009 3847 -0.4668956305239359
-1073 3847  0.6058903557215245
-1209 3847 -1.089204680743506
-1273 3847  0.4330918580020012
-1467 3847  4.082745801006982
-1009 3848  0.4686479373193943
-1073 3848 -0.608164323859633
-1209 3848  1.088994441422246
-1273 3848 -0.4330444227258353
-1957 3848  4.015345384796815
-1009 3849  0.06831378599514144
-1073 3849 -0.08865078486777231
-1209 3849 -7.735889765633795
-1273 3849  3.000161281315632
-1712 3849  3.894053238117803
-1009 3850 -0.06731177696858127
-1073 3850  0.08735047797722066
-1209 3850  7.784946548619343
-1273 3850 -3.019182774708455
-2202 3850  3.965156627189976
-1010 3851  0.8859027431941622
-1011 3851 -0.7247989127679281
-1210 3851  0.8561130448515417
-1211 3851  0.1281477546658397
-1468 3851  12.16370306936955
-1010 3852 -0.8761333273285709
-1011 3852  0.716806091826627
-1210 3852 -0.8532795399901716
-1211 3852 -0.1109241627550626
-1958 3852  14.21588238657893
-1010 3853 -0.0356622993406167
-1011 3853  0.02917701292546772
-1210 3853  5.980557287714181
-1211 3853 -11.87347468739131
-1713 3853  10.93806979401289
-1010 3854 -0.007089990482464425
-1011 3854  0.005800656373065214
-1210 3854 -18.68721764116278
-1211 3854  36.98601183293343
-2203 3854  31.00680216081683
-1010 3855  0.4334726720192261
-1012 3855 -2.292588351471605
-1210 3855  0.7431772970946299
-1212 3855 -1.366971763051765
-1469 3855  0.6922710528461018
-1010 3856 -0.2894253737565123
-1012 3856  1.530738344827112
-1210 3856 -0.4962120658716677
-1212 3856  0.9127356585543187
-1959 3856  4.63521196383065
-1010 3857 -0.030243626830779
-1012 3857  0.1599551507030707
-1210 3857  2.858778724621237
-1212 3857 -4.993718753234045
-1714 3857  7.512573460654014
-1010 3858  0.03061910767275307
-1012 3858 -0.1619410267687995
-1210 3858 -2.748627634170752
-1212 3858  4.801186593602593
-2204 3858  12.60241045455382
-1010 3859  1.10546489936218
-1013 3859 -3.052106539486126
-1210 3859  0.1504825244382092
-1213 3859 -1.461494307484497
-1470 3859  0.9679618404674936
-1010 3860 -0.689967708515946
-1013 3860  1.904949633779228
-1210 3860 -0.09392255060612208
-1213 3860  0.9122971104867462
-1960 3860  7.851107779123613
-1010 3861 -0.006504078012125597
-1013 3861  0.01795727665852618
-1210 3861  7.743227386312433
-1213 3861 -26.724645220298
-1715 3861  21.23566212034473
-1010 3862  0.01318357808761143
-1013 3862 -0.03639888061415681
-1210 3862 -5.577951998041309
-1213 3862  19.24788508110388
-2205 3862  15.41343460037283
-1010 3863  0.6066026005909084
-1026 3863 -0.3918428911464509
-1210 3863  1.079868425719443
-1226 3863 -0.6825259342509997
-1471 3863  4.640123233526718
-1010 3864 -0.6067370542914696
-1026 3864  0.3919297432745195
-1210 3864 -1.078106438997537
-1226 3864  0.6814150005342681
-1961 3864  4.362866399653369
-1010 3865 -0.09003421014280026
-1026 3865  0.05815877671819319
-1210 3865  8.196309806968394
-1226 3865 -5.174118981866978
-1716 3865  3.891832175542285
-1010 3866  0.08946924016864499
-1026 3866 -0.05779382696712341
-1210 3866 -8.119865478221724
-1226 3866  5.125870371838962
-2206 3866  3.832594921604338
-1011 3867 -0.3567726280354734
-1090 3867  0.2940184468249131
-1211 3867 -1.855130366892074
-1290 3867  0.8608065436983287
-1472 3867  4.083595669790726
-1011 3868  0.372176173311343
-1090 3868 -0.3067126001924134
-1211 3868  1.922720458247535
-1290 3868 -0.8912069480512377
-1962 3868  3.314597254560581
-1011 3869  0.0501196804098229
-1090 3869 -0.04130392701536499
-1211 3869 -10.91898889290387
-1290 3869  5.7352105175062
-1717 3869  5.273747361955984
-1011 3870 -0.05599008922381862
-1090 3870  0.04614176586870518
-1211 3870  10.6937103252593
-1290 3870 -5.617762924949319
-2207 3870  4.159018631162431
-1014 3871  2.989747253335061
-1015 3871 -2.129272677610363
-1214 3871  0.5034153766096773
-1215 3871 -0.01820502103528791
-1473 3871  13.64297835723395
-1014 3872 -3.114543931331563
-1015 3872  2.218151814941477
-1214 3872 -0.5285023005439647
-1215 3872  0.02666245738760768
-1963 3872  7.094819650515715
-1014 3873 -0.03612826240957417
-1015 3873  0.02573024256563223
-1214 3873  7.330338437253632
-1215 3873 -13.39915434300085
-1718 3873  8.193646066739516
-1014 3874  0.005103966248689504
-1015 3874 -0.003635001543577661
-1214 3874 -38.38649753335458
-1215 3874  70.14746870615768
-2208 3874  14.40382534397125
-1014 3875  0.7143220365505623
-1031 3875 -0.7100791253590051
-1214 3875  0.5409851231816658
-1231 3875 -0.4587975488680787
-1474 3875  7.432083284538689
-1014 3876 -0.8667371882258156
-1031 3876  0.8615889655364799
-1214 3876 -0.6474678388648655
-1231 3876  0.5502970449270767
-1964 3876  2.758844665527414
-1014 3877 -0.09209370496921034
-1031 3877  0.091546689209522
-1214 3877  4.154393438575824
-1231 3877 -3.038828905876916
-1719 3877  8.909361307278845
-1014 3878  0.1222265439314361
-1031 3878 -0.1215005459296716
-1214 3878 -4.691610853794245
-1231 3878  3.431330450031093
-2209 3878  3.015252189670739
-1015 3879 -0.3137213903015109
-1030 3879  0.2815088383826861
-1215 3879 -0.6929215427424715
-1230 3879  1.396683757074456
-1475 3879  9.243250804285458
-1015 3880  0.3512208139362644
-1030 3880 -0.3151578642820492
-1215 3880  0.7627672411821809
-1230 3880 -1.537590286100393
-1965 3880  5.36424014425243
-1015 3881  0.05294086820243799
-1030 3881 -0.04750496067965254
-1215 3881 -5.674417187443032
-1230 3881  11.28827516445929
-1720 3881  5.498628886812048
-1015 3882 -0.05938414807119664
-1030 3882  0.05328665197422941
-1215 3882  6.342712909556337
-1230 3882 -12.61596887929604
-2210 3882  3.00679695511697
-1015 3883 -0.3561529571137585
-1060 3883  0.3144339221536311
-1215 3883 -0.7557720101028491
-1260 3883  1.465217279636702
-1476 3883  6.987236455151689
-1015 3884  0.4019295688857895
-1060 3884 -0.3548483544779605
-1215 3884  0.8446553956316701
-1260 3884 -1.639787184528095
-1966 3884  3.832755747061612
-1015 3885  0.05477280767251791
-1060 3885 -0.04835683208530907
-1215 3885 -4.902109438887917
-1260 3885  7.639002702936234
-1721 3885  8.557143439995434
-1015 3886 -0.05419810230840651
-1060 3886  0.0478494465417923
-1215 3886  5.982335823908858
-1260 3886 -9.327145572218971
-2211 3886  5.149209964509931
-1016 3887  0.4440953835549432
-1029 3887 -0.3331233226375472
-1216 3887  1.27864903424727
-1229 3887 -1.428392337327512
-1477 3887  2.39813417884677
-1016 3888 -0.4157020431777712
-1029 3888  0.3118250064706282
-1216 3888 -1.203527161495361
-1229 3888  1.345690298242458
-1967 3888  3.344564282593129
-1016 3889 -0.06641212160291675
-1029 3889  0.04981683537143853
-1216 3889  7.097758064026325
-1229 3889 -8.947761787238363
-1722 3889  3.052321424376364
-1016 3890  0.0583111945771093
-1029 3890 -0.04374019547106585
-1216 3890 -7.071254972430305
-1229 3890  8.913001519049024
-2212 3890  4.306287197091149
-1016 3891 -0.4182071807007093
-1032 3891  0.5426439670912149
-1216 3891 -0.8926090143541199
-1232 3891  0.8109099469826276
-1478 3891  4.872894046756631
-1016 3892  0.3861971705223876
-1032 3892 -0.5011094365728938
-1216 3892  0.8343920480599086
-1232 3892 -0.7551476418861992
-1968 3892  7.209963343664713
-1016 3893  0.05972800042777431
-1032 3893 -0.0774999583800744
-1216 3893 -6.326257993395976
-1232 3893  4.304302054670996
-1723 3893  6.165958730224928
-1016 3894 -0.04705317091092858
-1032 3894  0.06105375638109871
-1216 3894  6.701573121465414
-1232 3894 -4.563164317669306
-2213 3894  9.801414691082627
-1016 3895 -0.2752074710610874
-1096 3895  0.3999378180984413
-1216 3895 -0.7140138395673383
-1296 3895  2.026152750963511
-1479 3895  3.666098639291213
-1016 3896  0.2916024608025469
-1096 3896 -0.4237633937620111
-1216 3896  0.7578565321914594
-1296 3896 -2.150639159344168
-1969 3896  2.650447108276864
-1016 3897  0.03854659492344367
-1096 3897 -0.05601679710717079
-1216 3897 -4.667666885335501
-1296 3897  13.57026540414182
-1724 3897  4.264776865800714
-1016 3898 -0.04164998212099323
-1096 3898  0.06052671066335877
-1216 3898  4.931373980172026
-1296 3898 -14.33700504327225
-2214 3898  2.948617038264048
-1019 3899  0.8564968076053606
-1020 3899 -1.237336533231109
-1219 3899  0.3970145449540784
-1220 3899  0.2510335818284439
-1480 3899  6.936513618148165
-1019 3900 -0.8238314830349427
-1020 3900  1.190146632344225
-1219 3900 -0.3832969439187849
-1220 3900 -0.2383819755548176
-1970 3900  8.754460075831085
-1019 3901 -0.02279261177659806
-1020 3901  0.03292730456029068
-1219 3901  7.895064096101138
-1220 3901 -15.34203421950439
-1725 3901  7.7760742573487
-1019 3902 -0.01123831756390642
-1020 3902  0.0162354147387335
-1219 3902 -12.39953745174649
-1220 3902  24.06894650972545
-2215 3902  14.64089060417686
-1019 3903 -0.3737564443230481
-1076 3903  0.4347352342689453
-1219 3903 -0.9053256046972465
-1276 3903  0.7622334325753616
-1481 3903  6.717285078805506
-1019 3904  0.3951785216481155
-1076 3904 -0.4596523479291749
-1219 3904  0.9552717849491632
-1276 3904 -0.7961901433494216
-1971 3904  5.21837181322108
-1019 3905  0.03431690856375876
-1076 3905 -0.03991575131466345
-1219 3905 -4.875642107836658
-1276 3905  8.663532512230066
-1726 3905  12.723623314396
-1019 3906 -0.0533470071697257
-1076 3906  0.06205063220109317
-1219 3906  3.543387736643543
-1276 3906 -6.327609477811816
-2216 3906  8.974758681364245
-1019 3907  0.5871755452865656
-1085 3907 -0.6299407955834422
-1219 3907  0.9177680669862572
-1285 3907 -1.404708895439669
-1482 3907  1.562111955011237
-1019 3908 -0.5881819817933323
-1085 3908  0.6310205330126762
-1219 3908 -0.9166426538946926
-1285 3908  1.404553468671611
-1972 3908  1.521343477166647
-1019 3909 -0.06669039335604635
-1085 3909  0.07154759728281507
-1219 3909  6.850788060807428
-1285 3909 -8.259331367595493
-1727 3909  2.666152965933335
-1019 3910  0.0754972172149095
-1085 3910 -0.0809958409515938
-1219 3910 -6.091736844072287
-1285 3910  7.340228308969663
-2217 3910  2.541648003046831
-1021 3911  3.79240453316803
-1022 3911 -2.073696496349991
-1221 3911  1.3927177800326
-1222 3911 -0.007859345312601223
-1483 3911  10.83128966436301
-1021 3912 -3.041422922448816
-1022 3912  1.663057831262543
-1221 3912 -1.125363487049104
-1222 3912  0.01423058830428089
-1973 3912  11.66633099712536
-1021 3913 -0.04566720801028241
-1022 3913  0.02497094611631533
-1221 3913  12.16385913519791
-1222 3913 -10.85847562888647
-1728 3913  5.690920734130215
-1021 3914  0.003740381880205069
-1022 3914 -0.002045250376681948
-1221 3914 -71.95507332617049
-1222 3914  64.19748679899583
-2218 3914  28.46767710922084
-1021 3915  0.542397859204706
-1023 3915 -4.70085606120625
-1221 3915  0.06266636804055448
-1223 3915 -1.67562254077075
-1484 3915  2.298164777974027
-1021 3916 -0.4575882069145712
-1023 3916  3.965827407882584
-1221 3916 -0.04937898970914998
-1223 3916  1.400506034542053
-1974 3916  6.913438662925516
-1021 3917 -0.008763702119294985
-1023 3917  0.07595329060940451
-1221 3917  7.419701498530326
-1223 3917 -31.5516818208411
-1729 3917  4.213041098699232
-1021 3918  0.02107167706267504
-1023 3918 -0.1826240999274897
-1221 3918 -2.767625773617935
-1223 3918  11.74797745436025
-2219 3918  6.6028144804622
-1021 3919  0.9285201706573676
-1024 3919 -12.32981280576957
-1221 3919 -0.01329158934947286
-1224 3919 -1.619002941030767
-1485 3919  2.489086508511099
-1021 3920 -0.7197609326620245
-1024 3920  9.557700354905545
-1221 3920  0.01432224943308115
-1224 3920  1.240374621996592
-1975 3920  7.092319999538366
-1021 3921  0.002697403111634132
-1024 3921 -0.03581879691919143
-1221 3921  18.43213599777828
-1224 3921 -71.14768126953655
-1730 3921  5.357329400717711
-1021 3922  0.01651901814280531
-1024 3922 -0.2193559255602421
-1221 3922 -3.444370960504515
-1224 3922  13.28414805225883
-2220 3922  4.975973020678023
-1021 3923  0.9936427272244395
-1025 3923 -4.01982058245716
-1221 3923  0.1952707801632194
-1225 3923 -0.4989904160230794
-1486 3923  1.530982244361096
-1021 3924 -0.8325011910731969
-1025 3924  3.367916184667294
-1221 3924 -0.1578981940773998
-1225 3924  0.4114850212014874
-1976 3924  4.504333486281526
-1021 3925 -0.02313004556960633
-1025 3925  0.09357350555324298
-1221 3925  11.51290363653453
-1225 3925 -13.59929356823245
-1731 3925  1.891529659129155
-1021 3926  0.03005520669887832
-1025 3926 -0.1215895162194106
-1221 3926 -6.724196771960667
-1225 3926  7.942511812214237
-2221 3926  4.220607505708247
-1021 3927 -0.8967784154793547
-1031 3927  0.7422711571120613
-1221 3927 -1.212084092212188
-1231 3927  0.4518610492803866
-1487 3927  2.965391170512979
-1021 3928  0.8948534463356967
-1031 3928 -0.7406778437037493
-1221 3928  1.209186270330734
-1231 3928 -0.4507761853932629
-1977 3928  3.064329241855908
-1021 3929  0.1084484784869682
-1031 3929 -0.08976373229337341
-1221 3929 -7.485018312352968
-1231 3929  2.659744674264035
-1732 3929  5.191284902739278
-1021 3930 -0.1046366912454179
-1031 3930  0.08660868342331542
-1221 3930  7.574571864439869
-1231 3930 -2.691626173647931
-2222 3930  5.614276394514303
-1021 3931  0.7922255118762921
-1054 3931 -0.360035618481635
-1221 3931  1.095375364582689
-1254 3931 -1.129575111871219
-1488 3931  1.739287210125133
-1021 3932 -0.7101965596901837
-1054 3932  0.3227566567579966
-1221 3932 -0.9823491292559811
-1254 3932  1.013019198664114
-1978 3932  3.065752767611288
-1021 3933 -0.1118336807589077
-1054 3933  0.0508240492328381
-1221 3933  7.911773723924099
-1254 3933 -8.140433666520655
-1733 3933  1.735401045504999
-1021 3934  0.1007269057461446
-1054 3934 -0.04577645287156246
-1221 3934 -7.11281355093847
-1254 3934  7.318382790662603
-2223 3934  3.006008494629031
-1022 3935 -0.4907051457898425
-1091 3935  0.3034672176079867
-1222 3935 -1.271418220001935
-1291 3935  0.6649208109974206
-1489 3935  5.818503346690632
-1022 3936  0.4784631094026429
-1091 3936 -0.2958963642102718
-1222 3936  1.227066802011116
-1291 3936 -0.6401504275752985
-1979 3936  6.821785171341784
-1022 3937  0.06351030020065121
-1091 3937 -0.03927673116269679
-1222 3937 -8.059296167830137
-1291 3937  5.042399262607923
-1734 3937  7.227063466987659
-1022 3938 -0.06852574797351926
-1091 3938  0.04237843896777917
-1222 3938  7.176638732185156
-1291 3938 -4.49133368198552
-2224 3938  8.023915727134943
-1026 3939  0.6215604636346272
-1027 3939 -0.9429319477336707
-1226 3939  0.4270339140806115
-1227 3939  0.5603572465690376
-1490 3939  4.145530174201174
-1026 3940 -0.5365848376342145
-1027 3940  0.8140205429349932
-1226 3940 -0.3710313110194243
-1227 3940 -0.4748156898844662
-1980 3940  9.188046774378051
-1026 3941 -0.02644511378818943
-1027 3941  0.04011828954905043
-1226 3941  5.935483300800602
-1227 3941 -19.35438219191495
-1735 3941  4.620373825612975
-1026 3942 -0.01313085696332133
-1027 3942  0.01992003233190689
-1226 3942 -7.803072635065493
-1227 3942  25.37588114465589
-2225 3942  13.68747505294833
-1026 3943 -0.3325996421497863
-1031 3943  0.9054351523586399
-1226 3943 -0.7846384665529746
-1231 3943  0.3255367652745427
-1491 3943  12.25543807287879
-1026 3944  0.3099939897897582
-1031 3944 -0.8438958429460618
-1226 3944  0.755030805637508
-1231 3944 -0.3228669408970674
-1981 3944  15.60000600766554
-1026 3945  0.03183450902391857
-1031 3945 -0.08666300222702368
-1226 3945 -4.492594197832112
-1231 3945  4.347828500049935
-1736 3945  17.58751732208018
-1026 3946 -0.04857313406685036
-1031 3946  0.132230518229328
-1226 3946  3.075207046085619
-1231 3946 -2.991719644070264
-2226 3946  16.66976710007537
-1028 3947 -0.5322000356380353
-1042 3947  0.4525049260712891
-1228 3947 -0.4953844799756825
-1242 3947  1.253920472558426
-1492 3947  4.577395664557206
-1028 3948  0.5237179831441521
-1042 3948 -0.4452930315210079
-1228 3948  0.4897034551735098
-1242 3948 -1.238917691943269
-1982 3948  4.949586119190606
-1028 3949  0.05306117946887205
-1042 3949 -0.04511545186958153
-1228 3949 -2.566147949050465
-1242 3949  11.01433164719148
-1737 3949  9.983701017863959
-1028 3950 -0.0742486869524237
-1042 3950  0.06313020358974136
-1228 3950  2.120683161051391
-1242 3950 -9.126926589105361
-2227 3950  7.311204366461816
-1031 3951  2.25449865380728
-1032 3951 -1.946310278292044
-1231 3951  0.2106155001707166
-1232 3951 -0.1400665666639198
-1493 3951  7.350593198915067
-1031 3952 -2.145904330715092
-1032 3952  1.852560722557531
-1231 3952 -0.2015349441592879
-1232 3952  0.1353078380308161
-1983 3952  9.450557390041146
-1031 3953 -0.03863010084770278
-1032 3953  0.03334939331384015
-1231 3953  7.458763893376871
-1232 3953 -13.70528369826246
-1738 3953  7.088529923190809
-1031 3954  0.02054020998672473
-1032 3954 -0.01773237777185056
-1231 3954 -10.96688731001504
-1232 3954  20.15001583937854
-2228 3954  13.07420285267797
-1031 3955  1.009041934732004
-1033 3955 -12.4148271960868
-1231 3955  0.004909842124062877
-1233 3955 -2.020187774905765
-1494 3955  4.4348660978868
-1031 3956 -0.9011892093493707
-1033 3956  11.0878526649361
-1231 3956 -0.003161834423974073
-1233 3956  1.781009244152953
-1984 3956  13.94274720304045
-1031 3957 -0.002184359699851513
-1033 3957  0.02687544221336557
-1231 3957  8.097430154499932
-1233 3957 -157.5425247202729
-1739 3957  8.333234328742472
-1031 3958  0.01072384218894925
-1033 3958 -0.1319416399569855
-1231 3958 -1.811688403660364
-1233 3958  35.24928960928063
-2229 3958  8.467858774888629
-1031 3959  0.7970691740614322
-1034 3959 -7.17362028563042
-1231 3959  0.02074944381764173
-1234 3959 -1.371053578210635
-1495 3959  3.248940655118019
-1031 3960 -0.6736858992600538
-1034 3960  6.063171165496134
-1231 3960 -0.0164243186421904
-1234 3960  1.141090378496645
-1985 3960  9.638290245359952
-1031 3961 -0.006563168427045199
-1034 3961  0.05906849706200258
-1231 3961  4.547623166879417
-1234 3961 -73.86690653394791
-1740 3961  4.516437728111675
-1031 3962  0.01247372164743909
-1034 3962 -0.1122634591316911
-1231 3962 -2.052721196214168
-1234 3962  33.34660355498111
-2230 3962  7.780959123178358
-1031 3963  0.8761671906630912
-1054 3963 -0.4810676928389651
-1231 3963  0.532889016796235
-1254 3963 -1.47406685145989
-1496 3963  2.093336656375155
-1031 3964 -0.7806222496203883
-1054 3964  0.4286078600129237
-1231 3964 -0.4748422610025065
-1254 3964  1.313534336735618
-1986 3964  3.86258710892769
-1031 3965 -0.102634553712519
-1054 3965  0.05635245017868282
-1231 3965  3.043863568647225
-1254 3965 -8.8135613453359
-1741 3965  4.100933333302652
-1031 3966  0.08977398271040733
-1054 3966 -0.04929123482331746
-1231 3966 -2.787316614264462
-1254 3966  8.070563365347549
-2231 3966  7.508760202322218
-1032 3967 -0.5245384263354095
-1038 3967  0.6104971732174486
-1232 3967 -0.7796711087896829
-1238 3967  0.4765768244277365
-1497 3967  6.867530241475136
-1032 3968  0.6080294260127852
-1038 3968 -0.7076702624193901
-1232 3968  0.8977654818327304
-1238 3968 -0.5475735569802812
-1987 3968  3.232293749053333
-1032 3969  0.0667812926161708
-1038 3969 -0.07772507850532541
-1232 3969 -4.689757573113847
-1238 3969  3.549100696841748
-1742 3969  9.249242572554918
-1032 3970 -0.08705433897040452
-1038 3970  0.1013203708049441
-1232 3970  4.893403592122139
-1238 3970 -3.704885884667695
-2232 3970  4.168085658793879
-1032 3971 -0.4795952455071584
-1084 3971  0.3788236665372302
-1232 3971 -0.6622307311134192
-1284 3971  0.7155565720950264
-1498 3971  10.14178945374154
-1032 3972  0.5234134383708403
-1084 3972 -0.4134348697073162
-1232 3972  0.7022784082231178
-1284 3972 -0.7605034956051496
-1988 3972  6.854993073279579
-1032 3973  0.07931274417722842
-1084 3973 -0.06264771144796216
-1232 3973 -4.573004681716164
-1284 3973  4.486795549329334
-1743 3973  8.900323357107847
-1032 3974 -0.08168946937534756
-1084 3974  0.0645250439743737
-1232 3974  5.053130885639514
-1284 3974 -4.956539581732367
-2233 3974  6.255402866825885
-1036 3975 -0.4189118125301785
-1037 3975  0.4166092968480921
-1236 3975 -1.707969879333584
-1237 3975  0.4947465498372808
-1499 3975  4.591717254542565
-1036 3976  0.4218235797759384
-1037 3976 -0.4195050597952255
-1236 3976  1.704175130397323
-1237 3976 -0.4892572997374934
-1989 3976  4.596890674335506
-1036 3977  0.04658515728812977
-1037 3977 -0.04632910565148451
-1236 3977 -9.799664782253885
-1237 3977  4.604525758409499
-1744 3977  6.82470037511901
-1036 3978 -0.06140384326059996
-1037 3978  0.06106634188723461
-1236 3978  7.820271137733112
-1237 3978 -3.682710629785925
-2234 3978  6.071785803494002
-1036 3979 -0.4549872682817689
-1093 3979  0.6697749461861511
-1236 3979 -1.475543576621253
-1293 3979  0.7694145957527926
-1500 3979  1.989738181911157
-1036 3980  0.3870443013878095
-1093 3980 -0.5697578684182797
-1236 3980  1.26182715384045
-1293 3980 -0.6573535907506843
-1990 3980  4.514311543471527
-1036 3981  0.0673261986980111
-1093 3981 -0.09910914931789394
-1236 3981 -9.15833764800569
-1293 3981  4.132697532463695
-1745 3981  2.651304124674491
-1036 3982 -0.05417247763812049
-1093 3982  0.07974589801570604
-1236 3982  8.549665708518704
-1293 3982 -3.858923807592198
-2235 3982  5.339475922255236
-1039 3983 -0.4164294722809435
-1046 3983  0.6723063436140674
-1239 3983 -0.2561778390712127
-1246 3983  1.438432936083485
-1501 3983  7.935781265781992
-1039 3984  0.4690507958247839
-1046 3984 -0.7572610645998691
-1239 3984  0.2870186143926446
-1246 3984 -1.610282307909457
-1991 3984  4.273476333560106
-1039 3985  0.05876447905234403
-1046 3985 -0.09487256468584683
-1239 3985 -1.513498503028144
-1246 3985  9.519388148008218
-1746 3985  9.964308531455309
-1039 3986 -0.07032064342926077
-1046 3986  0.1135294637182152
-1239 3986  1.606821457458256
-1246 3986 -10.10772533983988
-2236 3986  5.311692012444222
-1039 3987 -0.6338113374831589
-1091 3987  0.5483159810343683
-1239 3987 -0.3620744060739602
-1291 3987  1.440632854355911
-1502 3987  4.32511889642304
-1039 3988  0.6019478457506698
-1091 3988 -0.5207505831071904
-1239 3988  0.3444837790072527
-1291 3988 -1.37057590441584
-1992 3988  5.650865178732744
-1039 3989  0.09481212065301986
-1091 3989 -0.08202283215104343
-1239 3989 -2.259168544657926
-1291 3989  8.78157444783267
-1747 3989  5.008038927801397
-1039 3990 -0.08853178006291147
-1091 3990  0.07658965210480174
-1239 3990  2.163038539238373
-1291 3990 -8.407949717547142
-2237 3990  6.756753262704435
-1042 3991  0.4124828656886404
-1060 3991 -0.3603730844290743
-1242 3991  1.629228927232157
-1260 3991 -1.327284851091953
-1503 3991  2.16132391751468
-1042 3992 -0.4139428557132467
-1060 3992  0.3616486310085054
-1242 3992 -1.629221333651718
-1260 3992  1.326901691271821
-1993 3992  2.128671701455008
-1042 3993 -0.05662335010049897
-1060 3993  0.0494700096023555
-1242 3993  9.698372940762637
-1260 3993 -8.871639857149663
-1748 3993  2.810591760096432
-1042 3994  0.05291302942259243
-1060 3994 -0.04622842111919301
-1242 3994 -10.14050217838622
-1260 3994  9.274886402498227
-2238 3994  2.951842078980999
-1043 3995 -0.6664131668293063
-1074 3995  0.5285974251080995
-1243 3995 -1.085914715783544
-1274 3995  0.5173331784807369
-1504 3995  3.374113559666903
-1043 3996  0.6677275648607324
-1074 3996 -0.5296400026704317
-1243 3996  1.090787344712805
-1274 3996 -0.5195702043955498
-1994 3996  3.373236017368225
-1043 3997  0.09282550509358571
-1074 3997 -0.07362898186763496
-1243 3997 -7.064788357696017
-1274 3997  3.190434157991982
-1749 3997  4.254422143024387
-1043 3998 -0.09027313903964421
-1074 3998  0.07160445085414079
-1243 3998  7.264719577018943
-1274 3998 -3.280810777838017
-2239 3998  4.313636411491303
-1043 3999  0.3915613265931385
-1083 3999 -0.4363940642551595
-1243 3999  0.6281268734336682
-1283 3999 -2.060237066109748
-1505 3999  2.744133762682808
-1043 4000 -0.3914197776820793
-1083 4000  0.436236308367658
-1243 4000 -0.6265552018341662
-1283 4000  2.055356159329762
-1995 4000  2.768916866004834
-1043 4001 -0.05174692126183886
-1083 4001  0.05767180706691709
-1243 4001  4.240704676139979
-1283 4001 -13.22275915134779
-1750 4001  3.559130344004886
-1043 4002  0.05322704516850629
-1083 4002 -0.05932140125143925
-1243 4002 -4.137810875831871
-1283 4002  12.90162353980625
-2240 4002  3.53773728948649
-1044 4003  0.820611639782226
-1045 4003 -2.048612698416842
-1244 4003 -0.2687441652242472
-1245 4003 -2.64805073698945
-1506 4003  0.6620369411300263
-1044 4004 -0.5250188060331279
-1045 4004  1.310681131981685
-1244 4004  0.1719397258262731
-1245 4004  1.697227123155527
-1996 4004  4.350082008188727
-1044 4005  0.0120946820032662
-1045 4005 -0.03019372128547943
-1244 4005  4.840410468840943
-1245 4005 -17.55536468313575
-1751 4005  17.73869895099792
-1044 4006  0.02838947709364594
-1045 4006 -0.0708727983567128
-1244 4006 -4.198187441675281
-1245 4006  15.14037838846916
-2241 4006  6.391215314600386
-1044 4007 -0.4669117262552038
-1075 4007  0.4930004778176136
-1244 4007 -0.8397429008188915
-1275 4007  0.8498311284186953
-1507 4007  5.561366858668526
-1044 4008  0.4635324724996192
-1075 4008 -0.4894324078324542
-1244 4008  0.8301209943719979
-1275 4008 -0.8390792003026281
-1997 4008  5.835339611366566
-1044 4009  0.05787300059726924
-1075 4009 -0.06110666180098917
-1244 4009 -5.094647168144733
-1275 4009  6.16663657582833
-1752 4009  8.121377855621056
-1044 4010 -0.06374868700680204
-1075 4010  0.06731065292933117
-1244 4010  4.665509938312524
-1275 4010 -5.648969220034825
-2242 4010  7.904228210941027
-1047 4011  2.877901742353491
-1048 4011 -0.6882177177695019
-1247 4011  1.530441977284314
-1248 4011 -0.06447409590659667
-1508 4011  4.367121803999058
-1047 4012 -2.638744501957641
-1048 4012  0.6310259631828135
-1247 4012 -1.407573526367871
-1248 4012  0.06007445012883673
-1998 4012  6.273071084077785
-1047 4013 -0.06336630111296625
-1048 4013  0.01515333567288418
-1247 4013  29.57976624661321
-1248 4013 -6.123461182640077
-1753 4013  4.6918587188925
-1047 4014  0.02360881028802457
-1048 4014 -0.005645780499229315
-1247 4014 -53.85667321915488
-1248 4014  11.14530142529302
-2243 4014  11.09669376957527
-1047 4015  0.8927534376223362
-1049 4015 -5.349524285735199
-1247 4015  0.1166433208217011
-1249 4015 -0.873821607012483
-1509 4015  1.501443481224012
-1047 4016 -0.7104217187916017
-1049 4016  4.256962871978451
-1247 4016 -0.08565703803104169
-1249 4016  0.6890445982902249
-1999 4016  5.891708948648517
-1047 4017 -0.01054317113570646
-1049 4017  0.06317640197425206
-1247 4017  20.03181033622185
-1249 4017 -19.53524156207414
-1754 4017  2.952431724529157
-1047 4018  0.02726814578945946
-1049 4018 -0.1633951794307062
-1247 4018 -6.579453823854884
-1249 4018  6.407256334560128
-2244 4018  5.359216924491829
-1047 4019  1.086728699880724
-1050 4019 -6.261171463865746
-1247 4019 -0.02717925250316533
-1250 4019 -1.817279018958227
-1510 4019  2.046183757751627
-1047 4020 -0.8503392225516677
-1050 4020  4.899216957673652
-1247 4020  0.02664962184511493
-1250 4020  1.406918477809915
-2000 4020  6.039983956553282
-1047 4021  0.003251598109236279
-1050 4021 -0.01873403481084517
-1247 4021  32.27437037187642
-1250 4021 -94.3593702933843
-1755 4021  4.080245895776766
-1047 4022  0.0163341792817986
-1050 4022 -0.09410913433692274
-1247 4022 -6.848306051500932
-1250 4022  20.01255346091463
-2245 4022  4.316855635510668
-1047 4023  0.6671056579552171
-1051 4023 -2.766844775492583
-1247 4023  0.09444032567228004
-1251 4023 -2.226655844234514
-1511 4023  1.734383100866361
-1047 4024 -0.5558849024538212
-1051 4024  2.305552680881037
-1247 4024 -0.07327003201142045
-1251 4024  1.833646104710556
-2001 4024  5.335102309454471
-1047 4025 -0.008293849473066403
-1051 4025  0.03439903980669907
-1247 4025  14.54355524326141
-1251 4025 -61.53830667090656
-1756 4025  2.833991262156498
-1047 4026  0.01844642734702238
-1051 4026 -0.07650722269100975
-1247 4026 -5.638102373386884
-1251 4026  23.8410264235214
-2246 4026  4.726445981799073
-1047 4027  1.05305624994934
-1052 4027 -5.381641954276575
-1247 4027  0.172119184851368
-1252 4027 -0.4312061772864732
-1512 4027  1.590373655740153
-1047 4028 -0.8748469326863306
-1052 4028  4.470903578741806
-1247 4028 -0.1354663985444235
-1252 4028  0.3531894656037325
-2002 4028  4.826786095112703
-1047 4029 -0.01413376895939106
-1052 4029  0.07223059927479664
-1247 4029  21.46631831646637
-1252 4029 -14.79999463216308
-1757 4029  2.230216158081471
-1047 4030  0.02460560601569959
-1052 4030 -0.1257469025523179
-1247 4030 -9.972291551898609
-1252 4030  6.874531929256725
-2247 4030  4.245709838291275
-1047 4031  1.037339225236382
-1053 4031 -4.687819398588907
-1247 4031  0.1888753139595323
-1253 4031 -0.4702289341982735
-1513 4031  1.539399172915233
-1047 4032 -0.8610429361225128
-1053 4032  3.891122287459317
-1247 4032 -0.1491602373155923
-1253 4032  0.3847064364584903
-2003 4032  4.663673792105634
-1047 4033 -0.01520700997590703
-1053 4033  0.0687217012769834
-1247 4033  20.73321827180935
-1253 4033 -15.68593782996181
-1758 4033  2.11378126554774
-1047 4034  0.0250006050735233
-1053 4034 -0.1129797452838212
-1247 4034 -10.04768624738478
-1253 4034  7.60087658660375
-2248 4034  4.154176556399479
-1048 4035  0.3107249264599946
-1083 4035 -0.459904502442264
-1248 4035  0.6694889657961215
-1283 4035 -1.982804233034109
-1514 4035  3.113918499513999
-1048 4036 -0.2786413244623387
-1083 4036  0.412417507493414
-1248 4036 -0.6039061080741983
-1283 4036  1.790720369267351
-2004 4036  5.538149308614118
-1048 4037 -0.04425978383985179
-1083 4037  0.06550898280666229
-1248 4037  3.860578289092625
-1283 4037 -13.25822033623029
-1759 4037  3.896615576766267
-1048 4038  0.03641964444494807
-1083 4038 -0.0539047788936704
-1248 4038 -3.705027669694788
-1283 4038  12.72139776360994
-2249 4038  7.399370929771859
-1048 4039  0.6251677208402014
-1092 4039 -0.4958500939596942
-1248 4039  1.3251531551657
-1292 4039 -0.6969060374661296
-1515 4039  2.227051982652208
-1048 4040 -0.5974098867295464
-1092 4040  0.4738340425976885
-1248 4040 -1.268742456177846
-1292 4040  0.6674861693273666
-2005 4040  2.787430652169496
-1048 4041 -0.08455623381433652
-1092 4041  0.06706554910635512
-1248 4041  7.496982675083278
-1292 4041 -4.412546562177374
-1760 4041  3.27917553517219
-1048 4042  0.07599027364771335
-1092 4042 -0.06027148087172729
-1248 4042 -7.554675688566832
-1292 4042  4.445925686481872
-2250 4042  4.221315980120219
-1054 4043  1.092095516810633
-1055 4043 -3.184217886071492
-1254 4043  0.5506640854169045
-1255 4043 -0.04737658212996103
-1516 4043  5.497686376367542
-1054 4044 -1.011277354339053
-1055 4044  2.948576740677008
-1254 4044 -0.5106474974579881
-1255 4044  0.04431965773281515
-2006 4044  8.295885019917028
-1054 4045 -0.01473140339016458
-1055 4045  0.04295228525329622
-1254 4045  14.65998231301212
-1255 4045 -8.348557923958264
-1761 4045  6.138820693139372
-1054 4046  0.004662490664314871
-1055 4046 -0.01359440263092582
-1254 4046 -30.55860129286959
-1255 4046  17.39938467274956
-2251 4046  16.56692854796862
-1058 4047  0.362575344758596
-1059 4047 -2.564559113532273
-1258 4047  0.1232979933573407
-1259 4047 -0.6452838448164313
-1517 4047  5.920323678467011
-1058 4048 -0.3636201727245672
-1059 4048  2.571949365299086
-1258 4048 -0.1209319110556626
-1259 4048  0.632740697185234
-2007 4048  7.928418980272388
-1058 4049 -0.01098342848397128
-1059 4049  0.07768771932121485
-1258 4049  12.18565261931853
-1259 4049 -65.20912110432079
-1762 4049  0.9579993383387213
-1058 4050  0.00945573560770001
-1059 4050 -0.06688207921038947
-1258 4050 -10.49071174200598
-1259 4050  56.15303779109885
-2252 4050  1.876776993784172
-1058 4051 -0.5655683563282903
-1079 4051  0.5099376587097865
-1258 4051 -1.563633429097309
-1279 4051  0.9598433696475256
-1518 4051  1.872155078461913
-1058 4052  0.5370271536576061
-1079 4052 -0.4842038390860414
-1258 4052  1.480104939178236
-1279 4052 -0.9086990247085501
-2008 4052  2.428640135644111
-1058 4053  0.08118838221113756
-1079 4053 -0.0732024928126479
-1258 4053 -10.48222291782284
-1279 4053  6.050637155898348
-1763 4053  2.024927669840395
-1058 4054 -0.07286991856067998
-1079 4054  0.06570225375133355
-1258 4054  9.97517741318884
-1279 4054 -5.758158315413606
-2253 4054  3.081655102447302
-1061 4055  0.5513927020288057
-1062 4055 -0.8631938020705067
-1261 4055  0.8070622115620506
-1262 4055 -0.2638765488723211
-1519 4055  5.155957958850873
-1061 4056 -0.4572667382619597
-1062 4056  0.7158415642942259
-1261 4056 -0.6614271231410632
-1262 4056  0.2162445878661694
-2009 4056  16.47742044559651
-1061 4057 -0.07133191883687945
-1062 4057  0.1116686347193875
-1261 4057  19.56348943490517
-1262 4057 -6.472519984532425
-1764 4057  0.6800635049014432
-1061 4058  0.04954073695663612
-1062 4058 -0.07755499290003219
-1261 4058 -13.58702526706866
-1262 4058  4.495539572060578
-2254 4058  3.707071367433247
-1061 4059  0.274604206069199
-1063 4059 -2.704903553672343
-1261 4059  0.1378648745322591
-1263 4059 -0.691791395091818
-1520 4059  7.218811248728353
-1061 4060 -0.234511292387475
-1063 4060  2.309980743686521
-1261 4060 -0.1128908565994482
-1263 4060  0.5659529217467251
-2010 4060  22.23399320199848
-1061 4061 -0.01159007117619253
-1063 4061  0.1141644010503557
-1261 4061  9.267190079702225
-1263 4061 -48.49819059668351
-1765 4061  1.226331625859217
-1061 4062  0.008422272802529579
-1063 4062 -0.0829609857753576
-1261 4062 -6.734223707278317
-1263 4062  35.27417920269734
-2255 4062  5.200657162878421
-1061 4063  0.3315414677749981
-1064 4063 -1.123100460933939
-1261 4063  0.2889839995395265
-1264 4063 -0.7136256126923212
-1521 4063  7.075882839979499
-1061 4064 -0.2782406077266243
-1064 4064  0.9425431964377532
-1261 4064 -0.2352278222126487
-1264 4064  0.58059959929439
-2011 4064  22.85902787429374
-1061 4065 -0.02506043554827784
-1064 4065  0.08489250803032906
-1261 4065  11.5414689090377
-1264 4065 -29.38500860027402
-1766 4065  0.9860199056421528
-1061 4066  0.01732501370911077
-1064 4066 -0.05868867931656194
-1261 4066 -7.978904295943519
-1264 4066  20.3242031183579
-2256 4066  5.533488946326441
-1061 4067  0.4722832634265456
-1065 4067 -1.058757894121426
-1261 4067  0.5085375863685291
-1265 4067 -0.3738089874614378
-1522 4067  5.549718155884937
-1061 4068 -0.3888500715901649
-1065 4068  0.8717185528421763
-1261 4068 -0.4120665414866511
-1265 4068  0.3028572253506908
-2012 4068  18.54247362970166
-1061 4069 -0.04435831718922839
-1065 4069  0.0994418437640489
-1261 4069  16.53723785781054
-1265 4069 -12.35162972393133
-1767 4069  0.7641384780188016
-1061 4070  0.03090230780246723
-1065 4070 -0.06927635354904116
-1261 4070 -11.52066789126238
-1265 4070  8.60586674968607
-2257 4070  4.168910055156443
-1061 4071  0.3748533853638924
-1066 4071 -1.72573819152988
-1261 4071  0.280802632960781
-1266 4071 -0.3417951192656607
-1523 4071  8.987131743426813
-1061 4072 -0.3164800245438473
-1066 4072  1.457000754258734
-1261 4072 -0.22803093980903
-1266 4072  0.2773430210866213
-2013 4072  28.97191890791841
-1061 4073 -0.02428214981556918
-1066 4073  0.1117893953885428
-1261 4073  13.01235217685689
-1266 4073 -16.46702251650682
-1768 4073  1.257569328186931
-1061 4074  0.01674951147636541
-1066 4074 -0.07711087260468982
-1261 4074 -8.975664909834439
-1266 4074  11.36739026212329
-2258 4074  7.127509675415745
-1061 4075  0.3334711887748352
-1067 4075 -1.16718388103411
-1261 4075  0.2803074902090753
-1267 4075 -0.7056173157039639
-1524 4075  7.073656321403489
-1061 4076 -0.2799242389758857
-1067 4076  0.9797639815414531
-1261 4076 -0.2279652047020327
-1267 4076  0.5735599734651922
-2014 4076  22.90635023674714
-1061 4077 -0.02430205065482175
-1067 4077  0.08505970757052372
-1261 4077  11.6057964531526
-1267 4077 -30.15492728060605
-1769 4077  0.9855846822133956
-1061 4078  0.01679417334838154
-1067 4078 -0.05878135529351257
-1261 4078 -8.020245176780445
-1267 4078  20.84931717772058
-2259 4078  5.54551145364278
-1061 4079  0.428348930387738
-1068 4079 -1.628837266086651
-1261 4079  0.3804015427898206
-1268 4079 -0.2788437142254693
-1525 4079  7.497907634820762
-1061 4080 -0.35912875888719
-1068 4080  1.365621025992523
-1261 4080 -0.3095050463446962
-1268 4080  0.2267698773866179
-2015 4080  24.33074727531835
-1061 4081 -0.03299211467792259
-1068 4081  0.1254556322243195
-1261 4081  14.91330785855446
-1268 4081 -11.26470641944774
-1770 4081  1.04489898841882
-1061 4082  0.02279291647912941
-1068 4082 -0.08667221774173783
-1261 4082 -10.30293666523228
-1268 4082  7.785831386629021
-2260 4082  5.889447827116065
-1061 4083 -0.4788760357050242
-1077 4083  0.2096383898392737
-1261 4083 -1.261809071898539
-1277 4083  1.218052141141831
-1526 4083  5.491936501011876
-1061 4084  0.5819307001253576
-1077 4084 -0.254752808402104
-1261 4084  1.52499539890934
-1277 4084 -1.47300996574878
-2016 4084  2.208450030879043
-1061 4085  0.07100846120402798
-1077 4085 -0.03108549679221456
-1261 4085 -8.828418052973735
-1277 4085  6.880737915568475
-1771 4085  5.446758579372629
-1061 4086 -0.07203379071050602
-1077 4086  0.03153435706244356
-1261 4086  11.03436685036456
-1277 4086 -8.604012257663832
-2261 4086  3.371992084196615
-1069 4087 -0.4136141792640044
-1088 4087  0.3890882961019005
-1269 4087 -1.4539442060976
-1288 4087  0.7887413865352516
-1527 4087  2.763887709407143
-1069 4088  0.4131220203587079
-1088 4088 -0.3886253205090088
-1269 4088  1.446310343605032
-1288 4088 -0.7843205938603733
-2017 4088  2.781752994797915
-1069 4089  0.05643659927341725
-1088 4089 -0.0530901051026678
-1269 4089 -9.271286129126777
-1288 4089  5.391699794599194
-1772 4089  3.366289701753765
-1069 4090 -0.05836392563537035
-1088 4090  0.05490314771048892
-1269 4090  8.904067016422458
-1288 4090 -5.178361384702474
-2262 4090  3.419875832853283
-1069 4091  0.3288070523355396
-1093 4091 -0.5913713271015886
-1269 4091  1.012670283104241
-1293 4091 -0.6342324694119614
-1528 4091  6.465623332056095
-1069 4092 -0.3857623389865421
-1093 4092  0.6938074616461828
-1269 4092 -1.178077129193787
-1293 4092  0.7388262639417724
-2018 4092  2.912178827084738
-1069 4093 -0.0437463201053321
-1093 4093  0.07867933243141366
-1269 4093  7.586503401180923
-1293 4093 -4.118301711438476
-1773 4093  7.023657380150102
-1069 4094  0.05446492613488767
-1093 4094 -0.0979571314547415
-1269 4094 -8.118669664395844
-1293 4094  4.406401785498837
-2263 4094  3.489728875483112
-1070 4095  0.7713662868604313
-1078 4095 -0.4916143473982249
-1270 4095  0.2350047561293398
-1278 4095 -0.892482446316431
-1529 4095  9.920476591021989
-1070 4096 -0.8634917741294119
-1078 4096  0.5503286211148283
-1270 4096 -0.2627322880432431
-1278 4096  0.9977668333321339
-2019 4096  5.599572994528584
-1070 4097 -0.1070003826683109
-1078 4097  0.06819448061561421
-1270 4097  1.557484621286901
-1278 4097 -6.008458200039911
-1774 4097  11.88363245336253
-1070 4098  0.1197679507899034
-1078 4098 -0.07633162606373356
-1270 4098 -1.768620278216182
-1278 4098  6.822965307570624
-2264 4098  6.35673096494675
-1070 4099  0.807466687308941
-1097 4099 -0.5280375750914569
-1270 4099  0.2512854022636909
-1297 4099 -0.9272236969416601
-1530 4099  8.239370225433877
-1070 4100 -0.8588799767622773
-1097 4100  0.5616589604898961
-1270 4100 -0.2668875724092826
-1297 4100  0.98501487730545
-2020 4100  5.994209355056856
-1070 4101 -0.10710356231765
-1097 4101  0.07003967621048186
-1270 4101  1.526214952953268
-1297 4101 -5.141467784189666
-1775 4101  13.6661282064412
-1070 4102  0.1209694588614057
-1097 4102 -0.07910718884290392
-1270 4102 -1.571445886133012
-1297 4102  5.293330560693399
-2265 4102  9.085015566293682
-1071 4103 -0.4147446507941761
-1072 4103  0.433000632670903
-1271 4103 -0.760898897674599
-1272 4103  1.417933728749779
-1531 4103  3.150909841954586
-1071 4104  0.3940753934243332
-1072 4104 -0.411421568297578
-1271 4104  0.7226406195901061
-1272 4104 -1.346294002937515
-2021 4104  4.161771196418701
-1071 4105  0.05628299145492302
-1072 4105 -0.05876042249592989
-1271 4105 -4.340970455026474
-1272 4105  6.781290570506568
-1776 4105  5.834179421399259
-1071 4106 -0.04656009377371514
-1072 4106  0.048609548122274
-1271 4106  4.279036439188589
-1272 4106 -6.686952398469979
-2266 4106  9.53046694058189
-1071 4107 -0.6085690165184504
-1073 4107  0.5724845587882514
-1271 4107 -1.255594290221721
-1273 4107  0.4234260846924687
-1532 4107  3.217137788747237
-1071 4108  0.6054677581871165
-1073 4108 -0.5695671862975206
-1271 4108  1.25204782308929
-1273 4108 -0.4222691319575352
-2022 4108  3.322110871855388
-1071 4109  0.08603670609830409
-1073 4109 -0.08093525038797107
-1271 4109 -7.410192007597576
-1273 4109  2.647109335618053
-1777 4109  4.335231192834105
-1071 4110 -0.08930907686990948
-1073 4110  0.08401358938736914
-1271 4110  7.278483379939773
-1273 4110 -2.600153453324001
-2267 4110  4.142922682425776
-1072 4111 -0.4656525838783859
-1086 4111  0.4075275464698439
-1272 4111 -0.9419881525931177
-1286 4111  0.7647617774871864
-1533 4111  5.084689951907726
-1072 4112  0.4465883401929728
-1086 4112 -0.3908429951038652
-1272 4112  0.9057504822207362
-1286 4112 -0.7325004544442668
-2023 4112  6.446916296953027
-1072 4113  0.06308701112702084
-1086 4113 -0.05521218124589022
-1272 4113 -6.501760444913997
-1286 4113  2.902282675062765
-1778 4113  9.883711222736482
-1072 4114 -0.0435235817211751
-1086 4114  0.03809075496731831
-1272 4114  8.109951937853118
-1286 4114 -3.63284662768968
-2268 4114  14.33869690694082
-1076 4115  0.9163581866371621
-1077 4115 -0.7265339828613597
-1276 4115  0.732482060546454
-1277 4115  0.3546679947849138
-1534 4115  3.945873747631456
-1076 4116 -0.8984979081539249
-1077 4116  0.7123734728657446
-1276 4116 -0.7185090865811576
-1277 4116 -0.3467933517133751
-2024 4116  4.230773004945792
-1076 4117 -0.02453437618813773
-1077 4117  0.01945206395154361
-1276 4117  11.68038468730556
-1277 4117 -17.00310330783589
-1779 4117  6.606303903934953
-1076 4118 -0.01320838662255178
-1077 4118  0.01047226060725421
-1276 4118 -19.033710166426
-1277 4118  27.66621100756034
-2269 4118  8.96941517920289
-1078 4119 -0.4124268531628535
-1089 4119  0.7972203259666385
-1278 4119 -0.7623806137748301
-1289 4119  0.2404117558855258
-1535 4119  13.82148848700469
-1078 4120  0.4598141657371973
-1089 4120 -0.8888199114143011
-1278 4120  0.8561953528406251
-1289 4120 -0.2700601958355842
-2025 4120  7.857436245259873
-1078 4121  0.05899348825775656
-1089 4121 -0.1140343010598945
-1278 4121 -5.293591386916911
-1289 4121  1.734079556945439
-1780 4121  14.27009956285187
-1078 4122 -0.0683561503822409
-1089 4122  0.1321323092122628
-1278 4122  5.949256441509949
-1289 4122 -1.948837786899108
-2270 4122  7.188740199011378
-1078 4123 -0.4682412747380797
-1097 4123  0.4804475820214714
-1278 4123 -0.8740833773154867
-1297 4123  0.8485062079897245
-1536 4123  4.299742862778459
-1078 4124  0.4773702028337876
-1097 4124 -0.4898144867918466
-1278 4124  0.8912310185466943
-1297 4124 -0.8650341355217372
-2026 4124  3.696321876416155
-1078 4125  0.05326867823029235
-1097 4125 -0.05465730817416642
-1278 4125 -4.159585723108061
-1297 4125  3.631803890207103
-1781 4125  14.02807711051003
-1078 4126 -0.05561345056860315
-1097 4126  0.05706320500793532
-1278 4126  4.827479834422186
-1297 4126 -4.215505277639843
-2271 4126  9.415073365629931
-1079 4127  0.4622771875668804
-1080 4127 -4.657034860973241
-1279 4127 -0.03784001059522953
-1280 4127 -4.420138342044347
-1537 4127  1.738985765064566
-1079 4128 -0.3713550248720283
-1080 4128  3.741074279977133
-1279 4128  0.034701868533437
-1280 4128  3.515861595090497
-2027 4128  4.980850774426043
-1079 4129  0.004661810659483987
-1080 4129 -0.04696362991810545
-1279 4129  8.860625202191928
-1280 4129 -85.6481884037366
-1782 4129  4.051300083922375
-1079 4130  0.01681848484248474
-1080 4130 -0.1694313981454469
-1279 4130 -2.549488194921787
-1280 4130  24.55744025691587
-2272 4130  4.497507478986273
-1079 4131  0.8099385613394385
-1081 4131 -4.051106699452466
-1279 4131  0.1538807196017282
-1281 4131 -0.9333140615563935
-1538 4131  1.298069093586603
-1079 4132 -0.6777265383675919
-1081 4132  3.389815784838334
-1279 4132 -0.1216367091902295
-1281 4132  0.7715902563305668
-2028 4132  3.754470501609538
-1079 4133 -0.01559842054359869
-1081 4133  0.07801933255350572
-1279 4133  12.77341480514826
-1281 4133 -18.08711888236061
-1783 4133  2.304828423526273
-1079 4134  0.03070862867577365
-1081 4134 -0.1535967507877281
-1279 4134 -5.706211648738831
-1281 4134  8.071327482551563
-2273 4134  3.634792913757748
-1079 4135 -0.8293599276847743
-1092 4135  0.3355805184553645
-1279 4135 -1.539612202532529
-1292 4135  0.5515734249662468
-1539 4135  3.27848682728167
-1079 4136  0.8908658230580816
-1092 4136 -0.3604673975635156
-1279 4136  1.649614277715424
-1292 4136 -0.5910536703502837
-2029 4136  2.362308143022933
-1079 4137  0.1208149423257971
-1092 4137 -0.04888485641695448
-1279 4137 -9.369448393175393
-1292 4137  3.090538086242762
-1784 4137  4.440740416984238
-1079 4138 -0.1233504357896458
-1092 4138  0.04991078277622987
-1279 4138  10.36404052218175
-1292 4138 -3.418869542545371
-2274 4138  3.326033848414284
-1085 4139  0.7709461549449258
-1086 4139 -0.8918227216379703
-1285 4139  0.8723149865446768
-1286 4139  0.1581455441266535
-1540 4139  8.334156253956104
-1085 4140 -0.7763504480927439
-1086 4140  0.8980743533410442
-1285 4140 -0.8836887209187385
-1286 4140 -0.1537391891864879
-2030 4140  8.646235660883518
-1085 4141 -0.04052335874533514
-1086 4141  0.04687701190851433
-1285 4141  7.224333431575175
-1286 4141 -5.755496466930055
-1785 4141  9.599198921865771
-1085 4142 -0.01693145434596672
-1086 4142  0.01958613529525647
-1285 4142 -14.23089451374669
-1286 4142  11.28627080486608
-2275 4142  15.58372198199141
-1085 4143  1.152193173408145
-1087 4143 -7.678951010732729
-1285 4143  0.02592331471776193
-1287 4143 -2.546465438723989
-1541 4143  0.7844661652700029
-1085 4144 -0.9933793593885633
-1087 4144  6.620514347654194
-1285 4144 -0.01536794211867761
-1287 4144  2.184062692900765
-2031 4144  2.978974753398158
-1085 4145 -0.003300648816836982
-1087 4145  0.02199763125930786
-1285 4145  29.59209543546304
-1287 4145 -61.62793826679011
-1786 4145  4.182327502644779
-1085 4146  0.02739797688598089
-1087 4146 -0.1825976122374652
-1285 4146 -5.11537337312014
-1287 4146  10.62505824201603
-2276 4146  2.213150142494187
-1093 4147  0.9009354882907198
-1094 4147 -4.332893970446715
-1293 4147  0.2993576079332945
-1294 4147 -0.07547758445687719
-1542 4147  5.664236473525226
-1093 4148 -0.8656710639493923
-1094 4148  4.163295798784388
-1293 4148 -0.2812552173432513
-1294 4148  0.07089516340263885
-2032 4148  9.893426968176996
-1093 4149 -0.07642521674859
-1094 4149  0.3675539094017725
-1293 4149  11.01771333410158
-1294 4149 -2.84098517566667
-1787 4149  0.8386055878177558
-1093 4150  0.05907688964639677
-1094 4150 -0.2841201198847871
-1293 4150 -8.516713910804523
-1294 4150  2.196645621020095
-2277 4150  2.683371796432915
-1093 4151  0.58462927218427
-1095 4151 -7.173917743684755
-1293 4151  0.06090597788496124
-1295 4151 -0.2101993947180658
-1543 4151  9.38137758123437
-1093 4152 -0.5248141364580845
-1095 4152  6.439933176124918
-1293 4152 -0.05346314055039419
-1295 4152  0.1844652672016618
-2033 4152  21.22166760392006
-1093 4153 -0.01466585550328597
-1095 4153  0.1799630056638323
-1293 4153  6.743407936044972
-1295 4153 -23.80011488422777
-1788 4153  1.713315824453342
-1093 4154  0.0118751178970765
-1095 4154 -0.1457181893610931
-1293 4154 -5.460172754442011
-1295 4154  19.27597045240107
-2278 4154  4.758341632139925
-1299 4155 -0.2412135486919414
-1544 4155 -0.09268507579991085
-2279 4155 -30.64257011981108
-1789 4156  0.4638345268861317
-2034 4156  0.1776859613752735
-2280 4156 -15.90706277519701
-1300 4157  0.1661135696488067
-1545 4157  0.2348156967063924
-2281 4157 -39.31435081710389
-1790 4158 -0.1171398876204244
-2035 4158 -0.1686225023517153
-2282 4158 -53.45947004812159
-1301 4159  0.4624605338731521
-1546 4159  0.2528208987834961
-2283 4159 -12.56288032466136
-1791 4160 -0.4304312753963275
-2036 4160 -0.2294685995787264
-2284 4160 -13.62587967120271
-1302 4161 -0.4947242318500673
-1547 4161 -0.2206266516513799
-2285 4161 -14.84344573266865
-1792 4162  0.4225606468582997
-2037 4162  0.1862365373886236
-2286 4162 -17.5348042025758
-1303 4163 -0.1766165844146627
-1548 4163 -0.06752189118262934
-2287 4163 -65.95158912186295
-1793 4164  0.2943113306588002
-2038 4164  0.1162526489952567
-2288 4164 -38.93574330129159
-1304 4165 -0.3050775516043246
-1549 4165 -0.0730286811201015
-2289 4165 -46.43966016640264
-1794 4166  0.3743951155653602
-2039 4166  0.09090233254426537
-2290 4166 -37.97891276037357
-1305 4167 -0.3914193643951301
-1550 4167 -0.1030731399156117
-2291 4167 -32.44683207965974
-1795 4168  0.3184437332485786
-2040 4168  0.08441252585843845
-2292 4168 -39.5091491257876
-1306 4169 -0.2389835454891805
-1551 4169 -0.09183437174715321
-2293 4169 -31.1299517046727
-1796 4170  0.3991520721206581
-2041 4170  0.1569416237697656
-2294 4170 -18.48285697955544
-1307 4171 -0.480083502439464
-1552 4171  0.1604723692757136
-2295 4171 -13.83350837304568
-1797 4172  0.3448062169581231
-2042 4172 -0.1470971542430732
-2296 4172 -20.00745980162628
-1308 4173 -0.4223099512030265
-1553 4173  0.1140677401031672
-2297 4173 -22.76489333995555
-1798 4174  0.4163534406274162
-2043 4174 -0.1084103977076431
-2298 4174 -23.20868313310848
-1309 4175 -0.3252342258811831
-1554 4175 -0.1253361300551693
-2299 4175 -16.39949055853167
-1799 4176  0.6239097919698932
-2044 4176  0.2458729163013723
-2300 4176 -8.433072185518855
-1310 4177 -0.3300938184824252
-1555 4177 -0.1765502579767677
-2301 4177 -24.12392710831048
-1800 4178  0.3317934535905247
-2045 4178  0.1764937921731119
-2302 4178 -23.44721624588622
-1311 4179 -0.4908310005655379
-1556 4179 -0.154677890064579
-2303 4179 -18.65225809968672
-1801 4180  0.5267241324446033
-2046 4180  0.1767989964268323
-2304 4180 -16.41237497028351
-1312 4181  0.4025596461382885
-1557 4181  0.1487428464011694
-2305 4181 -26.1956792336746
-1802 4182 -0.2880976473424125
-2047 4182 -0.1043560294495194
-2306 4182 -36.11162541606858
-1313 4183 -0.3680346263601992
-1558 4183 -0.1418061655056151
-2307 4183 -12.26843876024679
-1803 4184  0.7765432181564137
-2048 4184  0.3080625949669278
-2308 4184 -5.60785504010041
-1314 4185 -0.4818355431139471
-1559 4185 -0.2388406309706918
-2309 4185 -12.72775567793986
-1804 4186  0.4752231410863287
-2049 4186  0.2360290859558374
-2310 4186 -13.02604478229861
-1315 4187 -0.2936091186007417
-1560 4187 -0.03178578024713478
-2311 4187 -129.8752387098407
-1805 4188  0.4485203675871874
-2050 4188  0.04825762007543867
-2312 4188 -86.07489921163659
-1316 4189  0.1478598441021074
-1561 4189  0.256909813210024
-2313 4189 -41.9565272197917
-1806 4190 -0.1377473445162312
-2051 4190 -0.2295458523844411
-2314 4190 -45.97094678900272
-1317 4191 -0.4722142733900354
-1562 4191 -0.1046018218296087
-2315 4191 -33.18669707365662
-1807 4192  0.5064852131538
-2052 4192  0.1115367935708566
-2316 4192 -31.12915756520504
-1318 4193 -0.1245359476338054
-1563 4193 -0.04988201584301125
-2317 4193 -142.3054925634301
-1808 4194  0.2571869189216621
-2053 4194  0.1058671586234522
-2318 4194 -68.22390452124144
-1319 4195 -0.1886155730243332
-1564 4195 -0.07276217302329462
-2319 4195 -53.6144848893498
-1809 4196  0.3916020995201141
-2054 4196  0.1550963396610254
-2320 4196 -25.16934902030714
-1320 4197 -0.496807801273085
-1565 4197  0.1763884106301378
-2321 4197 -16.85797529931353
-1810 4198  0.4045567407557998
-2055 4198 -0.1358187924429873
-2322 4198 -22.25527628315144
-1321 4199  0.382691560057723
-1566 4199 -0.1329182782584699
-2323 4199 -31.89341246694178
-1811 4200 -0.3322785058448103
-2056 4200  0.1158998426965637
-2324 4200 -34.63780504233422
-1322 4201 -0.3957723556168835
-1567 4201  0.1694463566618233
-2325 4201 -19.56367976914622
-1812 4202  0.4366009542545615
-2057 4202 -0.1871670285025489
-2326 4202 -18.10501936478283
-1323 4203 -0.5915454173823133
-1568 4203 -0.08253448028257152
-2327 4203 -23.14455148007878
-1813 4204  0.5892891508115687
-2058 4204  0.08441094784420153
-2328 4204 -22.69925480783547
-1324 4205 -0.6320496066149439
-1569 4205  0.09175946688769955
-2329 4205 -17.70174015534158
-1814 4206  1.031765062393027
-2059 4206 -0.05562556027624922
-2330 4206 -21.45404488617399
-1325 4207  0.4149441342907414
-1570 4207 -0.1391230222463695
-2331 4207 -17.63733576375392
-1815 4208 -0.5368637578512274
-2060 4208  0.2054208626437619
-2332 4208 -11.7352471887321
-1326 4209 -0.3720931546947013
-1571 4209 -0.1433588781709148
-2333 4209 -12.13240977183652
-1816 4210  0.6167314111295573
-2061 4210  0.2480796714683461
-2334 4210 -7.139021306502358
-1327 4211 -0.1474513421173272
-1572 4211 -0.05626831364793678
-2335 4211 -107.2257743582197
-1817 4212  0.3310701127831404
-2062 4212  0.1309999949189102
-2336 4212 -47.10985428696654
-1328 4213 -0.150780607320847
-1573 4213 -0.05817082924203715
-2337 4213 -99.89329952085217
-1818 4214  0.337094746642738
-2063 4214  0.1348920395825686
-2338 4214 -43.849780178493
-1329 4215 -0.166183616330309
-1574 4215 -0.06367574453596507
-2339 4215 -82.05099007645858
-1819 4216  0.3720598064715243
-2064 4216  0.1478411151739352
-2340 4216 -35.88141193511292
-1330 4217 -0.2132021764710746
-1575 4217 -0.0819141354576636
-2341 4217 -43.98879537092815
-1820 4218  0.4803517580962079
-2065 4218  0.1914370186595753
-2342 4218 -18.96707230539594
-1331 4219  0.3955427940574144
-1576 4219 -0.04831816621937788
-2343 4219 -56.56046644171085
-1821 4220 -0.6484741851329106
-2066 4220  0.08377148904147035
-2344 4220 -33.41745949104828
-1332 4221  0.3375417510714098
-1577 4221 -0.1760787931148337
-2345 4221 -30.17627002650658
-1822 4222 -0.2062979542711787
-2067 4222  0.1043900787502424
-2346 4222 -49.61405821049816
-1333 4223 -0.1696981519351195
-1578 4223 -0.06510757825207561
-2347 4223 -71.73978108877516
-1823 4224  0.3231881679107107
-2068 4224  0.1284506027739841
-2348 4224 -36.41609607105951
-1334 4225 -0.1917131080017181
-1579 4225 -0.07348349316451037
-2349 4225 -54.57250563862199
-1824 4226  0.3650963193282887
-2069 4226  0.1449696716921745
-2350 4226 -27.55492217173241
-1335 4227 -0.3349809983213075
-1580 4227  0.03944668306923331
-2351 4227 -141.187307118909
-1825 4228  0.3279397104479789
-2070 4228 -0.03875470100391585
-2352 4228 -139.724744622845
-1336 4229  0.2366093522986016
-1581 4229  0.1182819046044615
-2353 4229 -54.42625553131974
-1826 4230 -0.2372175650649101
-2071 4230 -0.123830609841622
-2354 4230 -53.63654595730242
-1337 4231 -0.1646365262812593
-1582 4231 -0.06404854821625307
-2355 4231 -72.83364223501292
-1827 4232  0.2708036975786697
-2072 4232  0.1105236630114776
-2356 4232 -43.25696538963752
-1338 4233 -0.5108679392009977
-1583 4233 -0.0885781928998835
-2357 4233 -27.39216973415159
-1828 4234  0.4831988452769137
-2073 4234  0.09073099839660206
-2358 4234 -27.18217755332223
-1339 4235 -0.5198632092236741
-1584 4235 -0.07822635220456177
-2359 4235 -29.56194702569874
-1829 4236  0.5254114800415531
-2074 4236  0.0825069857399373
-2360 4236 -29.09591654321273
-1340 4237 -0.1981115390391876
-1585 4237 -0.07680746283325847
-2361 4237 -49.02162975759376
-1830 4238  0.370280352148334
-2075 4238  0.1503007858341583
-2362 4238 -24.45068261782124
-1341 4239 -0.3571743727843014
-1586 4239  0.05216856069111805
-2363 4239 -89.42768025930282
-1831 4240  0.353019920768631
-2076 4240 -0.04777819884013859
-2364 4240 -93.18051438131391
-1342 4241 -0.1837305798049131
-1587 4241 -0.1284297479768136
-2365 4241 -67.3680095180724
-1832 4242  0.1877146534020675
-2077 4242  0.1242642220168503
-2366 4242 -69.2391511776303
-1343 4243 -0.4115664959143436
-1588 4243 -0.1585763878680793
-2367 4243 -9.491059373455291
-1833 4244  0.6903575064460875
-2078 4244  0.2721182940045235
-2368 4244 -5.581731936811549
-1344 4245  0.4948051560351502
-1589 4245  0.2278264743594363
-2369 4245 -12.98416731930361
-1834 4246 -0.3995006872704646
-2079 4246 -0.1829124999460475
-2370 4246 -16.37502476196387
-1345 4247 -0.413301806558095
-1590 4247 -0.1220911632709661
-2371 4247 -24.74624389281993
-1835 4248  0.3335609329182144
-2080 4248  0.1009613901942342
-2372 4248 -29.26099129371331
-1346 4249 -0.1377344537960406
-1591 4249 -0.05432974549764544
-2373 4249 -116.5520245666077
-1836 4250  0.2592958859943023
-2081 4250  0.1076318904023976
-2374 4250 -60.67747015800169
-1347 4251 -0.2309521391793573
-1592 4251 -0.08909672224847444
-2375 4251 -35.17526953063072
-1837 4252  0.4377070227596882
-2082 4252  0.1776243671061898
-2376 4252 -17.85544888919775
-1348 4253  0.373558902257503
-1593 4253  0.09662035202600507
-2377 4253 -33.99037882808334
-1838 4254 -0.4326204283218003
-2083 4254 -0.1125400217472849
-2378 4254 -29.8305087797851
-1349 4255 -0.5029630185937802
-1594 4255 -0.03874066375731782
-2379 4255 -71.94298278001283
-1839 4256  0.563121317845319
-2084 4256  0.04628018070972438
-2380 4256 -61.46922915998202
-1350 4257 -0.3005092230155128
-1595 4257 -0.1157986402063187
-2381 4257 -18.66777584361901
-1840 4258  0.5036002788122459
-2085 4258  0.1992142492979329
-2382 4258 -11.00044392597633
-1351 4259 -0.4425084873605576
-1596 4259 -0.1550385259319598
-2383 4259 -17.48331613863921
-1841 4260  0.4374740476554567
-2086 4260  0.1559040304242874
-2384 4260 -17.19494524360543
-1352 4261  0.3915587655305323
-1597 4261  0.1097305804557451
-2385 4261 -30.70003257734891
-1842 4262 -0.3222408774211639
-2087 4262 -0.08915743596796895
-2386 4262 -36.85462203202088
-1353 4263 -0.2006755550776053
-1598 4263 -0.07741322426033026
-2387 4263 -47.65157494137991
-1843 4264  0.3799777711016727
-2088 4264  0.1510415792376266
-2388 4264 -25.01333265604174
-1354 4265 -0.3338254063159729
-1599 4265 -0.1286239024388166
-2389 4265 -15.75639783117426
-1844 4266  0.6357091254305228
-2089 4266  0.2527154403111643
-2390 4266 -8.087445797244316
-1355 4267  0.4723947763539851
-1600 4267  0.07282084580241761
-2391 4267 -36.17015837528741
-1845 4268 -0.5635229915555421
-2090 4268 -0.07966811853968624
-2392 4268 -33.28326480640938
-1356 4269  0.2663650847823781
-1601 4269  0.1699263311417943
-2393 4269 -26.11298173680401
-1846 4270 -0.3035513417999643
-2091 4270 -0.1978726929434511
-2394 4270 -22.92312256359989
-1357 4271 -0.1866146962681199
-1602 4271 -0.07172764858434916
-2395 4271 -56.97774952449042
-1847 4272  0.3212719394188062
-2092 4272  0.1190518291670926
-2396 4272 -33.52104777869483
-1358 4273 -0.08570975586546606
-1603 4273  0.3050576835026495
-2397 4273 -52.94756993572931
-1848 4274  0.07033500174578537
-2093 4274 -0.2460311055612411
-2398 4274 -64.26911966105814
-1359 4275  0.224708222751262
-1604 4275  0.2068334057548165
-2399 4275 -26.98015626399983
-1849 4276 -0.2245587755506762
-2094 4276 -0.2103959172847824
-2400 4276 -27.22391958877084
-1360 4277 -0.3594998341374985
-1605 4277  0.1495820406715405
-2401 4277 -19.76131076987808
-1850 4278  0.4379133264508283
-2095 4278 -0.1823951579301308
-2402 4278 -16.01743515162783
-1361 4279 -0.1478613302797105
-1606 4279 -0.3612348953316888
-2403 4279 -19.43122435760818
-1851 4280  0.2457031220130788
-2096 4280  0.4234806911341454
-2404 4280 -12.23386114811299
-1362 4281 -0.58313561816606
-1607 4281  0.03126449269119468
-2405 4281 -73.10925705238337
-1852 4282  0.6575968965590965
-2097 4282 -0.03366812880804327
-2406 4282 -64.22433096257478
-1363 4283 -0.2293236201966689
-1608 4283 -0.09511283394769576
-2407 4283 -42.25947272070731
-1853 4284  0.2292832397716183
-2098 4284  0.09589996761388657
-2408 4284 -42.5796885142234
-1364 4285  0.42314514820173
-1609 4285  0.1765863442776434
-2409 4285 -17.39127207242933
-1854 4286 -0.2946263988269318
-2099 4286 -0.1192055091852236
-2410 4286 -25.68769827203282
-1365 4287 -0.3584357830568553
-1610 4287  0.09444763078437719
-2411 4287 -26.68291634429617
-1855 4288  0.3674899949834071
-2100 4288 -0.0913967464088717
-2412 4288 -27.21898062309888
-1366 4289  0.3532322900264607
-1611 4289 -0.1063494845711242
-2413 4289 -25.76998281146062
-1856 4290 -0.358149304314646
-2101 4290  0.1067652808173516
-2414 4290 -24.86767301361041
-1367 4291  0.1980349828762465
-1612 4291  0.08633500225011058
-2415 4291 -58.10296868779811
-1857 4292 -0.1981905656779542
-2102 4292 -0.08642839304797841
-2416 4292 -57.88228387547031
-1368 4293 -0.4977177300614612
-1613 4293 -0.1054013687030942
-2417 4293 -22.81351654840187
-1858 4294  0.5863532491442011
-2103 4294  0.1224913336251501
-2418 4294 -20.38105920054591
-1369 4295 -0.8291778794111532
-1614 4295 -0.1371328219183342
-2419 4295 -12.73795579528641
-1859 4296  0.676872928952266
-2104 4296  0.1232115838138538
-2420 4296 -14.69078715887743
-1370 4297 -0.5661094051595106
-1615 4297 -0.1696769505008175
-2421 4297 -14.44408841920709
-1860 4298  0.4926836001045419
-2105 4298  0.1458229277753334
-2422 4298 -16.85321717357648
-1371 4299 -0.7204052217365209
-1616 4299 -0.1678465865963971
-2423 4299 -9.712758796207535
-1861 4300  0.6936422211413747
-2106 4300  0.1869994138422493
-2424 4300 -9.389773414469078
-1372 4301 -0.3615774432500488
-1617 4301 -0.1021595851058913
-2425 4301 -25.11095667227453
-1862 4302  0.4298291519255887
-2107 4302  0.1243626259089881
-2426 4302 -20.66188603221291
-1373 4303  0.2921192102024898
-1618 4303  0.1498140723675607
-2427 4303 -27.06081236628384
-1863 4304 -0.2929670419078287
-2108 4304 -0.149642205461953
-2428 4304 -27.07076287154962
-1374 4305  0.2741740413061578
-1619 4305  0.09966399264609031
-2429 4305 -34.23717637845797
-1864 4306 -0.3325105683429021
-2109 4306 -0.1210270813360829
-2430 4306 -28.46510147456754
-1375 4307 -0.2773076465817324
-1620 4307 -0.1021338181094035
-2431 4307 -30.28857007982523
-1865 4308  0.5012808683324748
-2110 4308  0.181244990753315
-2432 4308 -15.86269138512577
-1376 4309  0.01066646559674583
-1621 4309  1.027405058684619
-2433 4309 -77.63028018021609
-1866 4310 -0.01937948406660046
-2111 4310 -2.099407317666164
-2434 4310 -41.22285965712726
-1377 4311  0.01083754926061709
-1622 4311  1.035430338244845
-2435 4311 -75.31457172347281
-1867 4312 -0.0200281710755901
-2112 4312 -2.099046152908578
-2436 4312 -39.42580673521844
-1378 4313  0.01175361905645754
-1623 4313  1.132573885359342
-2437 4313 -66.76292516277346
-1868 4314 -0.02148219787072791
-2113 4314 -2.3232191764992
-2438 4314 -35.37814580251727
-1379 4315  0.01148289706327333
-1624 4315  1.096989498294941
-2439 4315 -66.63623857562646
-1869 4316 -0.02126487268390402
-2114 4316 -2.229759078647565
-2440 4316 -34.79046888868552
-1380 4317  0.008746086931736756
-1625 4317  1.576501168390627
-2441 4317 -70.10959107942233
-1870 4318 -0.01515825745898139
-2115 4318 -2.820119469264003
-2442 4318 -42.54586097045568
-1381 4319  0.533396313689748
-1626 4319  0.1398090615522035
-2443 4319 -14.59114960552235
-1871 4320 -0.3072494006703159
-2116 4320 -0.1064731526095451
-2444 4320 -25.00017435761698
-1382 4321 -0.4043473535609413
-1627 4321 -0.1463628716658473
-2445 4321 -17.87480776263565
-1872 4322  0.6025736075654631
-2117 4322  0.2139666338910703
-2446 4322 -11.25720746291567
-1383 4323 -0.2764593929515081
-1628 4323 -0.05970599140078857
-2447 4323 -65.97984348957961
-1873 4324  0.4607487274148538
-2118 4324  0.1019487335374342
-2448 4324 -41.15873081988094
-1384 4325 -0.3420333412549693
-1629 4325  0.152339017646971
-2449 4325 -20.94431514667631
-1874 4326  0.4601615553751069
-2119 4326 -0.2061528425704132
-2450 4326 -15.92440412543586
-1385 4327 -0.799264511380765
-1630 4327  0.120393801145661
-2451 4327 -14.55529250850832
-1875 4328  0.7898807291279358
-2120 4328 -0.1148860314163944
-2452 4328 -15.05159484553243
-1386 4329  0.6047360650845038
-1631 4329  0.1544468567080951
-2453 4329 -15.49210742892535
-1876 4330 -0.5448550035608645
-2121 4330 -0.1330170041161471
-2454 4330 -16.86689236335236
-1387 4331  0.7188696788923561
-1632 4331  0.1258547872274984
-2455 4331 -16.12647392997589
-1877 4332 -0.7112006565720582
-2122 4332 -0.1073622229111081
-2456 4332 -18.07799191201674
-1388 4333  0.3127717018709487
-1633 4333  0.04000224600538602
-2457 4333 -87.48075773456222
-1878 4334 -0.4309015624391707
-2123 4334 -0.05678315093978254
-2458 4334 -63.51574285277481
-1389 4335  0.3275939897877578
-1634 4335  0.06114217742781798
-2459 4335 -52.8716550775216
-1879 4336 -0.421297996296329
-2124 4336 -0.07021960246037766
-2460 4336 -43.40343426461138
-1390 4337  0.3539736971000679
-1635 4337  0.09996941509051212
-2461 4337 -26.738027357087
-1880 4338 -0.4334843149085262
-2125 4338 -0.1186609141678331
-2462 4338 -21.96349363409914
-1391 4339 -0.2029217905250291
-1636 4339 -0.1322602591418904
-2463 4339 -50.66661535986736
-1881 4340  0.2019684391055969
-2126 4340  0.1329277299929537
-2464 4340 -50.64878861975378
-1392 4341  0.4429448994016109
-1637 4341  0.04478328549069006
-2465 4341 -51.18764721528638
-1882 4342 -0.4705159048444391
-2127 4342 -0.04036045463481888
-2466 4342 -52.99960506788916
-1393 4343 -0.3781458794328671
-1638 4343 -0.07136785085690645
-2467 4343 -34.82199055582354
-1883 4344  0.3712099084130325
-2128 4344  0.07225182247031578
-2468 4344 -34.54423303070557
-1394 4345  0.2653610804974919
-1639 4345  0.1007116905315015
-2469 4345 -35.71241339576211
-1884 4346 -0.2660353447014131
-2129 4346 -0.100034192619883
-2470 4346 -35.52512736400956
-1395 4347  0.4397497774331201
-1640 4347  0.1692123750855458
-2471 4347 -20.01915962930323
-1885 4348 -0.2897782680634379
-2130 4348 -0.1114576459576665
-2472 4348 -30.22720745240589
-1396 4349  0.3337399888939511
-1641 4349 -0.05438327802565162
-2473 4349 -57.69560648218724
-1886 4350 -0.4468471629984179
-2131 4350  0.07850380857557879
-2474 4350 -40.73513928545416
-1397 4351 -0.02735485822257834
-1642 4351 -0.6125464046789606
-2475 4351 -77.65714066116759
-1887 4352  0.02303438519199841
-2132 4352  0.4916885932583254
-2476 4352 -94.69922254722279
-1398 4353  0.2999541214461424
-1643 4353  0.1882841243361535
-2477 4353 -17.51506605655669
-1888 4354 -0.2437019726733171
-2133 4354 -0.1586572171452398
-2478 4354 -21.37522400115791
-1399 4355 -0.1700655800972397
-1644 4355 -0.1795099761298055
-2479 4355 -28.31087433954066
-1889 4356  0.2926855116749797
-2134 4356  0.3408694718928448
-2480 4356 -15.06418166640937
-1400 4357  0.01658810146559478
-1645 4357  0.5692601762921932
-2481 4357 -112.4426933973505
-1890 4358 -0.01579633712996128
-2135 4358 -0.5859732481780078
-2482 4358 -114.7794462571026
-1401 4359  23.38653888958006
-1646 4359 -4.528166521929149e-05
-2483 4359 -1058.66302267636
-1891 4360 -0.8149863628841933
-2136 4360  0.07509815561251482
-2484 4360 -20.57374819150079
-1402 4361  0.3443611707400913
-1647 4361  0.1584805113003082
-2485 4361 -14.42988051776977
-1892 4362 -0.7256220588031617
-2137 4362 -0.3323630168187701
-2486 4362 -7.306137981052756
-1403 4363  0.2424191842671502
-1648 4363  0.1789331829732354
-2487 4363 -19.0892306599637
-1893 4364 -0.4976321718455345
-2138 4364 -0.3680998962405606
-2488 4364 -9.897416463906245
-1404 4365  0.2409008336125803
-1649 4365  0.2318901716680636
-2489 4365 -14.15628764799962
-1894 4366 -0.4961149924439166
-2139 4366 -0.4581176150659059
-2490 4366 -7.445942181911135
-1405 4367  0.2465099761064946
-1650 4367  0.18507291152769
-2491 4367 -17.88569115961042
-1895 4368 -0.510067162772699
-2140 4368 -0.3724327269861961
-2492 4368 -9.352570774505041
-1406 4369  0.2338083946556382
-1651 4369  0.2559294137774343
-2493 4369 -12.90605807444327
-1896 4370 -0.4801640088667681
-2141 4370 -0.5067141365537682
-2494 4370 -6.780155483318999
-1407 4371  0.2329931159431059
-1652 4371  0.2553116682026549
-2495 4371 -12.84963493329398
-1897 4372 -0.4783057895236895
-2142 4372 -0.5042260023462158
-2496 4372 -6.754664202360627
-1408 4373  0.2443954748733559
-1653 4373  0.1850395098356168
-2497 4373 -17.05542913502588
-1898 4374 -0.5073755737823042
-2143 4374 -0.3677359737660884
-2498 4374 -8.949835317480224
-1409 4375 -0.2852018326572599
-1654 4375  0.390540098759081
-2499 4375 -13.22126092554109
-1899 4376  0.2195673688647587
-2144 4376 -0.2998672521788097
-2500 4376 -17.94565770436443
-1410 4377  0.3449705889808716
-1655 4377  0.03342586460999813
-2501 4377 -88.03377071956065
-1900 4378 -0.350098224088436
-2145 4378 -0.03257068910337899
-2502 4378 -88.64391292782304
-1411 4379  0.001345572973655427
-1656 4379  3.224514182063999
-2503 4379 -246.2252886607093
-1901 4380  0.004528716666476341
-2146 4380 -3.833955781960661
-2504 4380 -109.1007957172354
-1412 4381  0.002458604260643332
-1657 4381  3.182000960903725
-2505 4381 -142.1356981120592
-1902 4382  0.003858840238686249
-2147 4382 -6.252920651221183
-2506 4382 -91.07653645375166
-1413 4383  0.0006418068359919498
-1658 4383  5.515960845011515
-2507 4383 -290.0185142498619
-1903 4384  0.005403119658024451
-2148 4384 -3.542334556735019
-2508 4384 -83.51485409842199
-1414 4385  0.0006826837434793908
-1659 4385  4.920359358439487
-2509 4385 -311.0119847471854
-1904 4386  0.005116437161350581
-2149 4386 -3.407349241168243
-2510 4386 -95.15381296120231
-1415 4387 -0.2149865933546265
-1660 4387 -0.1327293340165539
-2511 4387 -26.58818103268005
-1905 4388  0.3614419935688245
-2150 4388  0.1708955495131764
-2512 4388 -15.77787777432359
-1416 4389 -0.8943223168280579
-1661 4389  0.04435814071805195
-2513 4389 -28.09789857924424
-1906 4390  0.5644202599576373
-2151 4390 -0.03036476754524816
-2514 4390 -51.51652538609446
-1417 4391  0.2320388001825638
-1662 4391 -0.09875202978820967
-2515 4391 -41.49391694494913
-1907 4392 -0.3438732803624757
-2152 4392  0.1578014166231115
-2516 4392 -27.01589354302817
-1418 4393 -0.2673963766163865
-1663 4393  0.09430040141542814
-2517 4393 -38.65350036810663
-1908 4394  0.2715368875046588
-2153 4394 -0.09193233690226214
-2518 4394 -39.56708513242784
-1419 4395 -0.3015732552854993
-1664 4395  0.1553884953900216
-2519 4395 -18.86848979808568
-1909 4396  0.4028072587377506
-2154 4396 -0.1683908418346857
-2520 4396 -16.18698208267388
-1420 4397 -0.189153266415218
-1665 4397  0.1129564356289893
-2521 4397 -60.01674254260863
-1910 4398  0.1837519366993671
-2155 4398 -0.116110806968134
-2522 4398 -59.89391025874022
-1421 4399  0.3162746729687368
-1666 4399 -0.1802199613520399
-2523 4399 -14.2607056227518
-1911 4400 -0.4996161946091237
-2156 4400  0.2497754017512982
-2524 4400 -9.060706229208979
-1422 4401 -0.4954179106036137
-1667 4401  0.1349748586012675
-2525 4401 -16.28650106020694
-1912 4402  0.4893961341975044
-2157 4402 -0.1293710693667469
-2526 4402 -16.75458882036434
-1423 4403  0.312090482848181
-1668 4403 -0.224373509793738
-2527 4403 -16.27270670749873
-1913 4404 -0.3065265473112019
-2158 4404  0.2293733406665809
-2528 4404 -15.92119246330756
-1424 4405 -0.420740409235667
-1669 4405  0.07787860580799441
-2529 4405 -28.21287469737472
-1914 4406  0.6048601072199059
-2159 4406 -0.1009386461263404
-2530 4406 -21.15680116671571
-1425 4407 -0.1900856271423472
-1670 4407  0.1290791462085817
-2531 4407 -43.60043341376043
-1915 4408  0.1860931450132002
-2160 4408 -0.1309259113047055
-2532 4408 -43.93633027014358
-1426 4409 -0.6351218515149885
-1671 4409 -0.04844675456049608
-2533 4409 -49.07486674953635
-1916 4410  0.6690902668564817
-2161 4410  0.05669785722272903
-2534 4410 -43.91999193831722
-1427 4411 -0.09233746945236616
-1672 4411  0.1745420007093612
-2535 4411 -67.19582273963222
-1917 4412  0.1263791104196366
-2162 4412 -0.2486847965028564
-2536 4412 -47.86048355346968
-1428 4413 -0.3290166805679281
-1673 4413 -0.1656201138879724
-2537 4413 -25.60045733925898
-1918 4414  0.3839594711748638
-2163 4414  0.1940686199464453
-2538 4414 -22.11505592822052
-1429 4415 -0.4508575542320269
-1674 4415 -0.1505442317369579
-2539 4415 -17.94022473260717
-1919 4416  0.3417002020878817
-2164 4416  0.1044236949304687
-2540 4416 -25.43801423175152
-1430 4417  0.5795528708874975
-1675 4417  0.1922551101717189
-2541 4417 -10.89812561676288
-1920 4418 -0.3580379646538538
-2165 4418 -0.1170091683665216
-2542 4418 -18.98202374381732
-1431 4419 -0.4501212252666432
-1676 4419 -0.1599443116995181
-2543 4419 -18.02447912466057
-1921 4420  0.4449142856253013
-2166 4420  0.1612239847733691
-2544 4420 -18.03362624973672
-1432 4421 -0.3246876875018179
-1677 4421 -0.1283198998011498
-2545 4421 -21.9629176261085
-1922 4422  0.4461110896526219
-2167 4422  0.181437742187558
-2546 4422 -15.83184549062703
-1433 4423  0.3866661651573575
-1678 4423 -0.1292478872971707
-2547 4423 -27.27913987608121
-1923 4424 -0.3382766882528339
-2168 4424  0.1150695611031082
-2548 4424 -31.03660494689432
-1434 4425  0.001312335288620512
-1679 4425  5.802495030097443
-2549 4425 -137.3561110785759
-1924 4426  0.01439043946019499
-2169 4426 -3.573289293030423
-2550 4426 -34.52648286375157
-1435 4427  0.0008215588597796699
-1680 4427  4.348249404183824
-2551 4427 -273.003320275044
-1925 4428  0.008760842370192201
-2170 4428 -2.916757791234899
-2552 4428 -63.02258634203405
-1436 4429  0.0007450434486385868
-1681 4429  4.722524623428282
-2553 4429 -270.3252397919941
-1926 4430  0.00890402837809562
-2171 4430 -2.948494780789273
-2554 4430 -58.78525414828648
-1437 4431  0.5235605415859514
-1682 4431  0.1268899830601398
-2555 4431 -14.91535393188155
-1927 4432 -0.3456801265779962
-2172 4432 -0.1031938954442674
-2556 4432 -22.95107272924469
-1438 4433  0.6177638045468883
-1683 4433  0.1907246660453396
-2557 4433 -8.882219892636956
-1928 4434 -0.7248750001072389
-2173 4434 -0.1521679999018858
-2558 4434 -8.38327245565061
-1439 4435  0.1199895291469222
-1684 4435  0.3584223063522697
-2559 4435 -19.7641779034735
-1929 4436 -0.1837226529034885
-2174 4436 -0.5529352148144631
-2560 4436 -13.85918199986529
-1440 4437  0.203001397699786
-1685 4437  0.1990042899232015
-2561 4437 -29.84297186502756
-1930 4438 -0.1531666552012178
-2175 4438 -0.1448367639545626
-2562 4438 -41.37136670068732
-1441 4439  0.4903758402830745
-1686 4439  0.09363089415499334
-2563 4439 -24.74678079539619
-1931 4440 -0.5051458348552837
-2176 4440 -0.08952016961860668
-2564 4440 -25.30084569767662
-1442 4441  0.4838079806998937
-1687 4441  0.1130336965210424
-2565 4441 -20.16790931364713
-1932 4442 -0.4155301814520989
-2177 4442 -0.08472478198026717
-2566 4442 -25.56596459955379
-1443 4443 -0.3081198770448175
-1688 4443 -0.1225474573200583
-2567 4443 -29.25612917202748
-1933 4444  0.3462276026425328
-2178 4444  0.1421494758907888
-2568 4444 -25.80465236855202
-1444 4445  0.2236474776175843
-1689 4445  0.07753429754535195
-2569 4445 -59.28218084687754
-1934 4446 -0.2242455162623405
-2179 4446 -0.07706700524990751
-2570 4446 -59.00633582364991
-1445 4447 -0.3103276689217324
-1690 4447 -0.1497184219294691
-2571 4447 -18.95360574513186
-1935 4448  0.3245871165120744
-2180 4448  0.1444473955056696
-2572 4448 -19.27413566638218
-1446 4449 -0.275851162842649
-1691 4449 -0.1301712404853408
-2573 4449 -23.32199475871141
-1936 4450  0.4653523913028809
-2181 4450  0.2255429562296098
-2574 4450 -13.51862385101003
-1447 4451  0.3221464351873659
-1692 4451  0.1537460316411932
-2575 4451 -19.10980418333428
-1937 4452 -0.333246014529252
-2182 4452 -0.151870667599087
-2576 4452 -18.85936884549872
-1448 4453 -0.6958761831620622
-1693 4453  0.08890596217980609
-2577 4453 -23.31393913523488
-1938 4454  0.5792515598032306
-2183 4454 -0.06474292354903258
-2578 4454 -30.79418778750476
-1449 4455 -0.583252118916699
-1694 4455 -0.1346845367963926
-2579 4455 -16.9883469839645
-1939 4456  0.4566027908873816
-2184 4456  0.1133867662473916
-2580 4456 -21.187092922521
-1450 4457  0.3219996994163201
-1695 4457 -0.1618338407629296
-2581 4457 -26.29693971768204
-1940 4458 -0.2253988735383735
-2185 4458  0.1015023800057121
-2582 4458 -38.95148117385504
-1451 4459 -0.1231955649162646
-1696 4459  0.1654175795499135
-2583 4459 -52.08330111375977
-1941 4460  0.1774720614068959
-2186 4460 -0.264586927583474
-2584 4460 -33.84723329190945
-1452 4461 -2.421691173290549
-1697 4461 -0.01614866339436597
-2585 4461 -43.32944578653592
-1942 4462  1.218330247483215
-2187 4462  0.03932985943560216
-2586 4462 -28.83073740675175
-1453 4463  0.2588732884211721
-1698 4463 -0.200025621410963
-2587 4463 -29.85090627723714
-1943 4464 -0.2444719756141379
-2188 4464  0.1733751342852024
-2588 4464 -33.11942355235149
-1454 4465 -0.5465093405911027
-1699 4465 -0.08653414206433409
-2589 4465 -28.31376341779287
-1944 4466  0.6340994985786209
-2189 4466  0.104793236898054
-2590 4466 -24.52008198179582
-1455 4467  31.84061239984158
-1700 4467 -4.690062449741391e-05
-2591 4467 -707.7212790741364
-1945 4468 -1.282456190904951
-2190 4468  0.1437033518738441
-2592 4468 -8.884240395089975
-1456 4469  28.84769630748797
-1701 4469 -3.429273113850615e-05
-2593 4469 -1074.723247601268
-1946 4470 -1.085554606806667
-2191 4470  0.112469362197627
-2594 4470 -13.01900306964629
-1457 4471  0.9316726753588556
-1702 4471  0.10597916780677
-2595 4471 -13.60038854703725
-1947 4472 -1.03602619980799
-2192 4472 -0.07884724198772149
-2596 4472 -16.14500136718066
-1458 4473  0.7823178849555039
-1703 4473  0.07211115026646071
-2597 4473 -23.41697572970584
-1948 4474 -0.627656447368364
-2193 4474 -0.04271857028849171
-2598 4474 -36.39820196128922
-1459 4475 -2.52048241012962
-1704 4475 -0.003326018368451452
-2599 4475 -128.6511580023181
-1949 4476  0.7698732969464314
-2194 4476  0.02752220267509899
-2600 4476 -45.41658912759856
-1460 4477 -0.350498698842088
-1705 4477 -0.1685104373219622
-2601 4477 -19.71178008061172
-1950 4478  0.3911824002123577
-2195 4478  0.1929820576103272
-2602 4478 -17.36290067611262
-1461 4479 -0.4576521731453182
-1706 4479 -0.1519770413677799
-2603 4479 -16.65598169709429
-1951 4480  0.3690948751451494
-2196 4480  0.1250493243013468
-2604 4480 -20.42677738021681
-1462 4481 -0.3132143614225264
-1707 4481 -0.1163894569624065
-2605 4481 -33.48699900983425
-1952 4482  0.3162981891849708
-2197 4482  0.1148699713737675
-2606 4482 -33.89059592051035
-1463 4483  0.101747472019995
-1708 4483  0.1612480981858468
-2607 4483 -58.01692328951272
-1953 4484 -0.1019983291898142
-2198 4484 -0.1615176016313629
-2608 4484 -57.58794486792134
-1464 4485  0.6529986254582271
-1709 4485  0.04867805021410753
-2609 4485 -40.94469844121115
-1954 4486 -0.5589755650604602
-2199 4486 -0.03688188469150969
-2610 4486 -52.06987938902305
-1465 4487 -0.1988605404170427
-1710 4487 -0.1538546845104868
-2611 4487 -33.06151549792124
-1955 4488  0.3123756862682738
-2200 4488  0.2532649928913371
-2612 4488 -20.45953083972572
-1466 4489  0.267724443820664
-1711 4489  0.2935533450234599
-2613 4489 -17.17816150141144
-1956 4490 -0.1358262986664969
-2201 4490 -0.1982479163957737
-2614 4490 -33.21629444772113
-1467 4491 -0.1320198647588829
-1712 4491  0.1612523570575912
-2615 4491 -44.93527974087805
-1957 4492  0.1297570097955425
-2202 4492 -0.1624424533450675
-2616 4492 -44.99723518348887
-1468 4493 -0.4884348221780255
-1713 4493 -0.1681399935275716
-2617 4493 -15.0104256461242
-1958 4494  0.5346710961640079
-2203 4494  0.1742097470834353
-2618 4494 -13.86141918235845
-1469 4495  6.393149409211086
-1714 4495 -0.000263700452219623
-2619 4495 -663.9661521913164
-1959 4496 -1.800612709103617
-2204 4496  0.02300115730517048
-2620 4496 -49.90751676561297
-1470 4497  13.61387792723724
-1715 4497 -5.586646078540747e-05
-2621 4497 -1427.055298280313
-1960 4498 -1.155212641574131
-2205 4498  0.034635368896544
-2622 4498 -39.91317791668997
-1471 4499  0.1174995202916021
-1716 4499  0.3998996708507609
-2623 4499 -30.30829080397546
-1961 4500 -0.10893918577006
-2206 4500 -0.3911929626725137
-2624 4500 -29.90431021967977
-1472 4501  0.4574647052145012
-1717 4501  0.2129755068839117
-2625 4501 -13.47525275108809
-1962 4502 -0.4059559816217324
-2207 4502 -0.1883580091188502
-2626 4502 -15.12006951497744
-1473 4503 -0.3400741687238894
-1718 4503 -0.2586943980063583
-2627 4503 -13.56000062513
-1963 4504  0.284851484394085
-2208 4504  0.1530160829243506
-2628 4504 -20.92338281670835
-1474 4505  0.4358076176675587
-1719 4505  0.2954837404412515
-2629 4505 -11.29250264561315
-1964 4506 -0.251856105361882
-2209 4506 -0.1612767817800078
-2630 4506 -19.82430901439865
-1475 4507  0.04586169342645511
-1720 4507  0.7532194114725147
-2631 4507 -41.79788680020079
-1965 4508 -0.03372710870745033
-2210 4508 -0.5300049615928422
-2632 4508 -58.51214685233085
-1476 4509 -0.4060288904731509
-1721 4509  0.1587647828723089
-2633 4509 -20.04263715626028
-1966 4510  0.2996883137876917
-2211 4510 -0.1104715392797194
-2634 4510 -28.57642216065699
-1477 4511 -0.32265098291189
-1722 4511 -0.1350370344901572
-2635 4511 -21.49012601492517
-1967 4512  0.3897416427359257
-2212 4512  0.1611418556604022
-2636 4512 -17.88198588321188
-1478 4513 -0.4274289888767369
-1723 4513 -0.1403809995916198
-2637 4513 -18.25413748051501
-1968 4514  0.5271461292678989
-2213 4514  0.179026380922564
-2638 4514 -14.48573493618683
-1479 4515  0.2139280209580445
-1724 4515 -0.1358345190847692
-2639 4515 -44.0223952034774
-1969 4516 -0.1785974349768129
-2214 4516  0.1094120254077982
-2640 4516 -52.48104620642393
-1480 4517 -0.45896252925564
-1725 4517 -0.1590047570659309
-2641 4517 -14.92342134195389
-1970 4518  0.5273107585383225
-2215 4518  0.1880559437330346
-2642 4518 -13.27446957183143
-1481 4519  0.6939032742158369
-1726 4519  0.09862866268575531
-2643 4519 -18.49427815385324
-1971 4520 -0.8072266327489762
-2216 4520 -0.05312771678709906
-2644 4520 -26.21942302703896
-1482 4521  1.045179501603212
-1727 4521  0.1490326692808954
-2645 4521 -7.39806028426747
-1972 4522 -1.074344281788989
-2217 4522 -0.1325528534387045
-2646 4522 -7.707636052561853
-1483 4523 -0.4690349307253373
-1728 4523 -0.3044710922257314
-2647 4523 -8.939428394535847
-1973 4524  0.6244445149825922
-2218 4524  0.2730372422608475
-2648 4524 -8.642130280788454
-1484 4525  0.38440963665635
-1729 4525  0.1464469844197936
-2649 4525 -14.50623659110181
-1974 4526 -0.7085501020623595
-2219 4526 -0.2765679639928379
-2650 4526 -8.772955710261652
-1485 4527  0.3504553942057013
-1730 4527  0.1984683173208769
-2651 4527 -11.53775096265082
-1975 4528 -0.6032800041956174
-2220 4528 -0.4251407410310105
-2652 4528 -6.502242278141505
-1486 4529  0.2927751267701255
-1731 4529  0.2847703740543556
-2653 4529 -8.679115879554525
-1976 4530 -0.5312358475512081
-2221 4530 -0.5372060661120606
-2654 4530 -5.239025768901889
-1487 4531 -0.9136535932070854
-1732 4531  0.0741326869470682
-2655 4531 -21.47326559208629
-1977 4532  0.9638580985212747
-2222 4532 -0.07388768653768076
-2656 4532 -21.61383762226636
-1488 4533  0.1256357268577556
-1733 4533 -0.1905076856767562
-2657 4533 -49.96777929661594
-1978 4534 -0.1726253430003272
-2223 4534  0.2566908442587441
-2658 4534 -36.67569230448201
-1489 4535  0.484327973434509
-1734 4535  0.2554443072620345
-2659 4535 -11.88600697816776
-1979 4536 -0.5273299139143387
-2224 4536 -0.275051179212371
-2660 4536 -10.90493013046683
-1490 4537 -0.4158218221210877
-1735 4537 -0.1627178019259907
-2661 4537 -13.12946138104847
-1980 4538  0.6449471976234391
-2225 4538  0.2617645898062311
-2662 4538 -8.410110269164797
-1491 4539  0.4759321005851245
-1736 4539 -0.2175541085968585
-2663 4539 -12.29662737304278
-1981 4540 -0.4997455661082103
-2226 4540  0.2828527186687306
-2664 4540 -10.10870496831491
-1492 4541  0.5137320980598533
-1737 4541 -0.04862359530866486
-2665 4541 -39.02857243546901
-1982 4542 -0.4434171700268535
-2227 4542  0.06450870336342215
-2666 4542 -33.70459416258378
-1493 4543 -0.3892150799012169
-1738 4543 -0.2649587548673778
-2667 4543 -11.53800954208228
-1983 4544  0.4663001387974148
-2228 4544  0.3020557569469072
-2668 4544 -10.30563379190454
-1494 4545  0.1863477812003617
-1739 4545  0.197228464133999
-2669 4545 -22.73592311892862
-1984 4546 -0.3189037545692974
-2229 4546 -0.4149278867504431
-2670 4546 -13.20295306002752
-1495 4547  0.1745842511613437
-1740 4547  0.2340215743243752
-2671 4547 -19.99502311700704
-1985 4548 -0.3170204710469747
-2230 4548 -0.4419310962134438
-2672 4548 -12.1403872692989
-1496 4549 -0.7186221160452759
-1741 4549 -0.0370584339888418
-2673 4549 -43.0743841807851
-1986 4550  0.976486147068505
-2231 4550  0.05570908603588222
-2674 4550 -30.09884070379308
-1497 4551  0.5203531411457867
-1742 4551  0.1751900754402029
-2675 4551 -15.59699735883546
-1987 4552 -0.3439335715846778
-2232 4552 -0.114079576000751
-2676 4552 -23.95216626455397
-1498 4553 -0.1913856351165074
-1743 4553  0.4235524066745655
-2677 4553 -17.76723538490329
-1988 4554  0.1547028690476915
-2233 4554 -0.3347632371841684
-2678 4554 -22.22333545208209
-1499 4555  0.3388396466565379
-1744 4555  0.09646788709832067
-2679 4555 -28.65423870847926
-1989 4556 -0.3495907295178143
-2234 4556 -0.09046043792228446
-2680 4556 -29.62058200277956
-1500 4557 -0.355429594472465
-1745 4557 -0.1069501833931525
-2681 4557 -23.10572017615622
-1990 4558  0.5744327013188233
-2235 4558  0.1483671939561078
-2682 4558 -14.85325187087084
-1501 4559  0.3997908532391482
-1746 4559  0.1625927915066934
-2683 4559 -21.68107710891621
-1991 4560 -0.2813043833760332
-2236 4560 -0.1158650579574214
-2684 4560 -30.5213668411177
-1502 4561 -0.3428911238355603
-1747 4561 -0.1874409581057452
-2685 4561 -20.55827331727173
-1992 4562  0.3955497753263165
-2237 4562  0.2229181676645297
-2686 4562 -17.63060893722474
-1503 4563 -0.3305622720110889
-1748 4563  0.1104993737469106
-2687 4563 -25.61127524716377
-1993 4564  0.330790297730725
-2238 4564 -0.1086062631708077
-2688 4564 -25.88133772095298
-1504 4565 -0.2436393619121192
-1749 4565 -0.09766669385553386
-2689 4565 -40.2130645920106
-1994 4566  0.245190319901469
-2239 4566  0.09873935578019329
-2690 4566 -40.5384810644213
-1505 4567  0.2453331394023213
-1750 4567  0.09294349640346077
-2691 4567 -45.4076676012495
-1995 4568 -0.2470979106274057
-2240 4568 -0.09297121279972072
-2692 4568 -45.32463217142699
-1506 4569  29.39451847005743
-1751 4569 -4.739757669780065e-05
-2693 4569 -741.2098311583648
-1996 4570 -0.8934591499488236
-2241 4570  0.1005481075389927
-2694 4570 -12.77174729620695
-1507 4571  0.3918218595384317
-1752 4571  0.1137497534022037
-2695 4571 -25.52026560286867
-1997 4572 -0.4094422815228064
-2242 4572 -0.1107176446094387
-2696 4572 -25.19976764447012
-1508 4573 -0.5208327148502887
-1753 4573 -0.1620484550495435
-2697 4573 -13.36559398752397
-1998 4574  0.6508797907773681
-2243 4574  0.1812633458304332
-2698 4574 -10.93453152881607
-1509 4575  0.4093034712580556
-1754 4575  0.1779337399408347
-2699 4575 -10.33116067419111
-1999 4576 -0.8644424052260842
-2244 4576 -0.3743197183632451
-2700 4576 -5.232280705035966
-1510 4577  0.3139432303888887
-1755 4577  0.1982328660095435
-2701 4577 -12.42379991806771
-2000 4578 -0.5386946049506361
-2245 4578 -0.4207031498800982
-2702 4578 -6.810968169618136
-1511 4579  0.3128830755303908
-1756 4579  0.1844874946189381
-2703 4579 -13.59626784548458
-2001 4580 -0.5619603703326097
-2246 4580 -0.3589957638732138
-2704 4580 -7.821157425305699
-1512 4581  0.2781023775271403
-1757 4581  0.244104057346213
-2705 4581 -10.76106835441625
-2002 4582 -0.4989049199151928
-2247 4582 -0.4669064306323872
-2706 4582 -6.232406029477263
-1513 4583  0.2761967663045794
-1758 4583  0.2488990680550784
-2707 4583 -10.60859259802773
-2003 4584 -0.4967941065737113
-2248 4584 -0.4733234783700364
-2708 4584 -6.159803642957836
-1514 4585 -0.2870757766334358
-1759 4585 -0.1113819036710585
-2709 4585 -29.83696730878977
-2004 4586  0.3954580330580911
-2249 4586  0.1603305174549954
-2710 4586 -21.66867981200853
-1515 4587 -0.5751371098165369
-1760 4587 -0.1169152573076894
-2711 4587 -15.45716843571818
-2005 4588  0.640289646401769
-2250 4588  0.1358268290729027
-2712 4588 -13.42102810671431
-1516 4589 -0.3164743577683208
-1761 4589 -0.07698570639890857
-2713 4589 -36.06244598983877
-2006 4590  0.4286015274180359
-2251 4590  0.08923648311862642
-2714 4590 -30.36544494899079
-1517 4591  0.0002603335005947787
-1762 4591  11.14739714497946
-2715 4591 -415.0261506058654
-2007 4592  0.004269479894055767
-2252 4592 -3.455017245353995
-2716 4592 -107.7628213452974
-1518 4593 -0.3821410197975971
-1763 4593  0.1554320934452434
-2717 4593 -17.08211902970442
-2008 4594  0.4261594979710852
-2253 4594 -0.1994726820739341
-2718 4594 -14.32959261684598
-1519 4595  0.0005345093331185485
-1764 4595  5.741407462044037
-2719 4595 -297.1502631714062
-2009 4596  0.01030673789282408
-2254 4596 -3.656761234719059
-2720 4596 -46.29589079798393
-1520 4597  0.0004459490940684492
-1765 4597  9.163761430829846
-2721 4597 -274.346995221428
-2010 4598  0.01266738670347477
-2255 4598 -3.798595591223752
-2722 4598 -41.62150383812094
-1521 4599  0.0006404324352507512
-1766 4599  5.767059286213642
-2723 4599 -269.482318837368
-2011 4600  0.01389738784940935
-2256 4600 -3.482295420824327
-2724 4600 -40.36292797655138
-1522 4601  0.0006385206646629043
-1767 4601  5.650459500989021
-2725 4601 -262.4871457333651
-2012 4602  0.01095829037406584
-2257 4602 -3.913587096844168
-2726 4602 -43.50730408335345
-1523 4603  0.000630922008963406
-1768 4603  5.720222066080108
-2727 4603 -273.5041186801383
-2013 4604  0.01440604931380529
-2258 4604 -3.355021230629743
-2728 4604 -40.20028642953007
-1524 4605  0.0006501703848056444
-1769 4605  5.855434617171491
-2729 4605 -261.5760375061137
-2014 4606  0.01412310998717665
-2259 4606 -3.540289155598937
-2730 4606 -39.17187343467762
-1525 4607  0.0006559175699886972
-1770 4607  5.906181210680177
-2731 4607 -248.2417586977549
-2015 4608  0.01427692261993418
-2260 4608 -3.576670360769923
-2732 4608 -37.08017309753349
-1526 4609 -0.8800339580088794
-1771 4609  0.1679955867460642
-2733 4609 -8.502233418711862
-2016 4610  0.5273258880967172
-2261 4610 -0.1365022792253678
-2734 4610 -14.15927832194173
-1527 4611  0.2411558997368115
-1772 4611  0.1201547163932482
-2735 4611 -33.36501978941855
-2017 4612 -0.2390321723025959
-2262 4612 -0.1226762451022061
-2736 4612 -32.85653123541292
-1528 4613  0.4387937415067642
-1773 4613  0.172735205835799
-2737 4613 -17.10162207087117
-2018 4614 -0.2745073576810962
-2263 4614 -0.125616171688122
-2738 4614 -26.32253245363182
-1529 4615 -0.1666873148413968
-1774 4615  0.1310711442192151
-2739 4615 -65.3094506908532
-2019 4616  0.1242577346967617
-2264 4616 -0.09093864764125277
-2740 4616 -92.13433902811157
-1530 4617  0.3436678702512194
-1775 4617  0.0630421594254459
-2741 4617 -58.09827276913884
-2020 4618 -0.2986936630393312
-2265 4618 -0.04803578302891041
-2742 4618 -71.50178838039953
-1531 4619 -0.5518900249492503
-1776 4619  0.0368398814399567
-2743 4619 -51.64006323574402
-2021 4620  0.7749702805017827
-2266 4620 -0.03343584586800835
-2744 4620 -49.57879692739116
-1532 4621  0.2515624113404345
-1777 4621 -0.09721273839226541
-2745 4621 -38.8690834748674
-2022 4622 -0.2590299860673196
-2267 4622  0.09567817198065209
-2746 4622 -38.73649388997055
-1533 4623 -0.8469720785595276
-1778 4623  0.04524248037789224
-2747 4623 -30.07499672096169
-2023 4624  0.9878581106610489
-2268 4624  0.05102946065995515
-2748 4624 -26.6892215368137
-1534 4625 -0.9858356029493499
-1779 4625 -0.05844088532524816
-2749 4625 -19.69795854475236
-2024 4626  0.83313447077432
-2269 4626  0.08226341234101277
-2750 4626 -16.419717428569
-1535 4627  0.1280409753821415
-1780 4627 -0.2192447310494288
-2751 4627 -48.1812508688813
-2025 4628 -0.09699561503198664
-2270 4628  0.1479853343471768
-2752 4628 -68.5724907725667
-1536 4629 -1.835179127668628
-1781 4629  0.0093803456490435
-2753 4629 -91.97145850968209
-2026 4630  0.9432303306922852
-2271 4630  0.0208759382523774
-2754 4630 -67.81117594819983
-1537 4631  0.3864229560206465
-1782 4631  0.1696359690794401
-2755 4631 -12.03075557160653
-2027 4632 -0.723925405766279
-2272 4632 -0.2824387372225564
-2756 4632 -7.305149508872643
-1538 4633  0.3750998189126993
-1783 4633  0.2154672694474411
-2757 4633 -8.650252245975691
-2028 4634 -0.6821870226709527
-2273 4634 -0.364407399913329
-2758 4634 -5.254056246815064
-1539 4635 -0.516338327826575
-1784 4635  0.1563923263725714
-2759 4635 -14.74425618462372
-2029 4636  0.4432101206282965
-2274 4636 -0.1308230800711792
-2760 4636 -18.14750969243607
-1540 4637 -0.5658283514265692
-1785 4637 -0.1535827179703731
-2761 4637 -12.8677934052044
-2030 4638  0.5743976794403722
-2275 4638  0.1680855058326074
-2762 4638 -12.778764953814
-1541 4639  0.3695300341581737
-1786 4639  0.1905019068065086
-2763 4639 -9.449827385174531
-2031 4640 -0.8666912965737777
-2276 4640 -0.2411266255595946
-2764 4640 -5.864406879133139
-1542 4641  0.0002870401850520609
-1787 4641  14.17811209757292
-2765 4641 -252.032563895863
-2032 4642  0.01095253479461104
-2277 4642 -3.811722119467089
-2766 4642 -37.38065612259518
-1543 4643  0.0003989272310504771
-1788 4643  8.081208632035654
-2767 4643 -370.6027942610422
-2033 4644  0.004781083103017359
-2278 4644 -4.273666966017419
-2768 4644 -99.30572095390851
+%-------------------------------------------------------------------------------
+% UF Sparse Matrix Collection, Tim Davis
+% http://www.cise.ufl.edu/research/sparse/matrices/HB/mahindas
+% name: HB/mahindas
+% [UNSYMMETRIC MATRIX             MAHINDAS ST ERTAR             24OCT84]
+% id: 208
+% date: 1984
+% author: K. Pearson
+% ed: I. Duff, R. Grimes, J. Lewis
+% fields: title A b name id date author ed kind
+% kind: economic problem
+%-------------------------------------------------------------------------------
+1258 1258 7682
+1 1 -1
+53 1 -1
+105 1 -1
+157 1 -1
+158 1 -1
+159 1 -1
+160 1 -1
+161 1 -1
+162 1 -1
+163 1 -1
+164 1 -1
+165 1 -1
+166 1 -1
+986 1 -.00593589153
+987 1 -.00827388279
+988 1 -.00860892702
+989 1 -.00137353456
+991 1 -.00492890272
+992 1 -.000155171787
+996 1 -.000130801986
+999 1 -.000856874278
+1003 1 -.00018323437
+1016 1 -.00137678441
+1017 1 -.000981827034
+1018 1 -.0039323396
+1029 1 -.000373784045
+1032 1 -6.89376247e-5
+1033 1 -.000166634141
+1035 1 -.000105326253
+1200 1 -.0100132599
+1201 1 -.00998593494
+1202 1 -.00546094589
+1229 1 -.000706019695
+1231 1 -.0027809015
+1236 1 -.000238090535
+1242 1 -.00164178631
+2 2 -1
+54 2 -1
+106 2 -1
+167 2 -1
+168 2 -1
+169 2 -1
+170 2 -1
+171 2 -1
+172 2 -1
+173 2 -1
+174 2 -1
+175 2 -1
+176 2 -1
+986 2 -.0619863123
+987 2 -.0716687143
+988 2 -.0668680817
+989 2 -.0115005681
+990 2 -.000190558087
+991 2 -.073800005
+992 2 -.00259988802
+996 2 -.00193114602
+999 2 -.0036379823
+1003 2 -.00155267026
+1016 2 -.00903802924
+1017 2 -.00642052805
+1018 2 -.0330437534
+1029 2 -.00313421409
+1032 2 -.000557913794
+1033 2 -.00134995126
+1035 2 -.000688492961
+1200 2 -.0700013712
+1201 2 -.0700070336
+1202 2 -.0382313095
+1229 2 -.00813334715
+1231 2 -.0148213431
+1236 2 -.00286629447
+1242 2 -.00533711677
+3 3 -1
+55 3 -1
+107 3 -1
+177 3 -1
+178 3 -1
+179 3 -1
+180 3 -1
+181 3 -1
+182 3 -1
+183 3 -1
+184 3 -1
+185 3 -1
+186 3 -1
+986 3 -.00643365784
+987 3 -.146014765
+988 3 -.132799476
+989 3 -.048029162
+990 3 -.0127418283
+991 3 -.209978819
+992 3 -.00617644563
+996 3 -.00345171918
+999 3 -.00997103006
+1003 3 -.00230328832
+1016 3 -.0180562958
+1017 3 -.0128339566
+1018 3 -.0490272008
+1029 3 -.00465024495
+1032 3 -.000843684305
+1033 3 -.00204204884
+1035 3 -.00137659872
+1200 3 -.169996798
+1201 3 -.170007035
+1202 3 -.0928501487
+1229 3 -.00388781494
+1231 3 -.0444640294
+1236 3 -.00644817576
+1242 3 -.00985071808
+4 4 -1
+56 4 -1
+108 4 -1
+187 4 -1
+188 4 -1
+189 4 -1
+190 4 -1
+191 4 -1
+192 4 -1
+193 4 -1
+194 4 -1
+195 4 -1
+196 4 -1
+986 4 -.0952748433
+987 4 -.184948742
+988 4 -.201589674
+989 4 -.0273518991
+990 4 -.000488014601
+991 4 -.138668075
+992 4 -.00640007528
+996 4 -.00422320887
+999 4 -.0026694655
+1003 4 -.00267779361
+1016 4 -.014202618
+1017 4 -.0100969886
+1018 4 -.0569765754
+1029 4 -.00540477782
+1032 4 -.000967531581
+1033 4 -.00234273169
+1035 4 -.00108307914
+1200 4 -.180010065
+1201 4 -.179992974
+1202 4 -.0983157754
+1229 4 -.0123726027
+1231 4 -.0111134751
+1236 4 -.00453819009
+1242 4 -.00738803856
+5 5 -1
+57 5 -1
+109 5 -1
+197 5 -1
+198 5 -1
+199 5 -1
+200 5 -1
+201 5 -1
+202 5 -1
+203 5 -1
+204 5 -1
+205 5 -1
+206 5 -1
+986 5 -.00871037971
+987 5 -.118917592
+988 5 -.102624908
+989 5 -.0330613479
+990 5 -.00681361323
+991 5 -.136338934
+992 5 -.00395535911
+996 5 -.00264995149
+999 5 -.00138095417
+1003 5 -.00148837746
+1016 5 -.00673240982
+1017 5 -.00478634099
+1018 5 -.0316886082
+1029 5 -.00300652371
+1032 5 -.000541481015
+1033 5 -.00131053664
+1035 5 -.000513465493
+1200 5 -.12998949
+1201 5 -.129992962
+1202 5 -.0710063577
+1229 5 -.00318179536
+1231 5 -.00648370478
+1236 5 -.00238879793
+1242 5 -.00410446571
+6 6 -1
+58 6 -1
+110 6 -1
+207 6 -1
+208 6 -1
+209 6 -1
+210 6 -1
+211 6 -1
+212 6 -1
+213 6 -1
+214 6 -1
+215 6 -1
+216 6 -1
+986 6 -.00251121563
+987 6 -.203185737
+988 6 -.192379743
+989 6 -.0552012399
+990 6 -.0222557895
+991 6 -.292612076
+992 6 -.0219613705
+996 6 -.00513518928
+999 6 -.00195196667
+1003 6 -.00293014268
+1016 6 -.0119892228
+1017 6 -.00852459297
+1018 6 -.0623729564
+1029 6 -.00591553887
+1032 6 -.00105771166
+1033 6 -.0025602926
+1035 6 -.000914247415
+1200 6 -.239998177
+1201 6 -.240014061
+1202 6 -.131086141
+1229 6 -.00388781494
+1231 6 -.00833763927
+1236 6 -.00429878384
+1242 6 -.00738803856
+7 7 -1
+59 7 -1
+111 7 -1
+217 7 -1
+218 7 -1
+219 7 -1
+220 7 -1
+221 7 -1
+222 7 -1
+223 7 -1
+224 7 -1
+225 7 -1
+226 7 -1
+986 7 -.00912127737
+987 7 -.0966502652
+988 7 -.0896248892
+989 7 -.0180861093
+990 7 -.00258647744
+991 7 -.0319746435
+992 7 -.00887065381
+996 7 -.00236170273
+999 7 -.000852607656
+1003 7 -.000657393481
+1016 7 -.00491426373
+1017 7 -.00349397887
+1018 7 -.0139749302
+1029 7 -.00132565643
+1032 7 -.000253706501
+1033 7 -.000614048797
+1035 7 -.000374643947
+1200 7 -.110008687
+1201 7 -.109985933
+1202 7 -.0600797832
+1229 7 -.00176975597
+1231 7 -.00370280328
+1236 7 -.0016718955
+1242 7 -.00246267952
+8 8 -1
+60 8 -1
+112 8 -1
+227 8 -1
+228 8 -1
+229 8 -1
+230 8 -1
+231 8 -1
+232 8 -1
+233 8 -1
+234 8 -1
+235 8 -1
+236 8 -1
+986 8 -.00172779313
+987 8 -.0732349306
+988 8 -.0692287683
+989 8 -.0376125723
+990 8 -.00632327469
+991 8 -.111695595
+992 8 -.00223021419
+996 8 -.00186090055
+999 8 -.000239640358
+1003 8 -.0010817257
+1016 8 -.00417646533
+1017 8 -.00296730525
+1018 8 -.0230374597
+1029 8 -.0021846632
+1032 8 -.000389978552
+1033 8 -.000944000029
+1035 8 -.000318302133
+1200 8 -.0899821669
+1201 8 -.0900140628
+1202 8 -.0491578877
+1229 8 -.00141517725
+1231 8 -.000926967128
+1236 8 -.00143248949
+1242 8 -.00287443749
+9 9 -1
+61 9 -1
+113 9 -1
+237 9 -1
+986 9 -.539141834
+987 9 -.0724108219
+989 9 -.270749658
+994 9 -.0121773174
+1016 9 -.0046902895
+1017 9 -.0154549666
+1018 9 -.132671088
+1020 9 -.0541632026
+1024 9 -.022907
+1029 9 -.0102059292
+1032 9 -.00230139447
+1033 9 -.00557160983
+1035 9 -.000713856425
+1199 9 -1
+1231 9 -.473746061
+1237 9 -.0332121551
+1242 9 -.0478609577
+10 10 -1
+62 10 -1
+114 10 -1
+238 10 -1
+988 10 -.128765732
+989 10 -.311814636
+990 10 -.0233154781
+992 10 -.0709880516
+1021 10 -.0469889343
+1024 10 -.00315410877
+1025 10 -.0107982764
+1032 10 -.000553905789
+1033 10 -.00134165853
+1035 10 -.000259249733
+1202 10 -.131625205
+1203 10 -.144391209
+1234 10 -.0564676113
+1236 10 -.0239616428
+1237 10 -.00595942466
+1238 10 -.150985867
+11 11 -1
+63 11 -1
+115 11 -1
+239 11 -1
+993 11 -.00688045193
+999 11 -.0194378905
+1016 11 -.0044465526
+1017 11 -.000245325267
+1024 11 -.00537030539
+1032 11 -.000153506466
+1033 11 -.000371707312
+1035 11 -7.68649334e-5
+1206 11 -.0963710174
+1237 11 -.00738798082
+12 12 -1
+64 12 -1
+116 12 -1
+240 12 -1
+993 12 -.0901386291
+995 12 -.122320458
+1001 12 -.0304286312
+1016 12 -.0837532878
+1024 12 -.0046857805
+1032 12 -.00173666712
+1033 12 -.00420468301
+1035 12 -.000642606348
+1206 12 -.69760108
+1212 12 -.387774557
+1237 12 -.00643205643
+13 13 -1
+65 13 -1
+117 13 -1
+241 13 -1
+1024 13 -.00567285763
+1025 13 -.0843898356
+1029 13 -.0712952465
+1031 13 -.00218436681
+1032 13 -.000209217687
+1033 13 -.000506146345
+1035 13 -.000128947213
+1237 13 -.00380237587
+14 14 -1
+66 14 -1
+118 14 -1
+242 14 -1
+995 14 -.00241041323
+999 14 -.0125423633
+1016 14 -.0131881451
+1017 14 -.0133477459
+1022 14 -.536979139
+1024 14 -.00527575752
+1029 14 -.130613148
+1032 14 -.00069779309
+1033 14 -.00168956094
+1035 14 -.0002896472
+1214 14 -.343984067
+1237 14 -.0051491987
+15 15 -1
+67 15 -1
+119 15 -1
+243 15 -1
+992 15 -.101745531
+1021 15 -.026953999
+1024 15 -.000438700983
+1029 15 -.0381027572
+1032 15 -.000216832879
+1033 15 -.000524975592
+1035 15 -9.08051734e-5
+1205 15 -.0447068959
+1237 15 -.000604115834
+16 16 -1
+68 16 -1
+120 16 -1
+244 16 -1
+1016 16 -.0170154739
+1023 16 -.0544804186
+1024 16 -.00294610416
+1029 16 -.00352424942
+1032 16 -.000530659396
+1033 16 -.00128507311
+1035 16 -.000373675866
+1229 16 -.0134332012
+1236 16 -.133385956
+1237 16 -.00307743694
+1242 16 -.0323741399
+17 17 -1
+69 17 -1
+121 17 -1
+245 17 -1
+1017 17 -.000522729533
+1023 17 -.00491144974
+1024 17 -.000117239055
+1028 17 -.00491660368
+1032 17 -4.84968186e-5
+1033 17 -.000117170734
+1035 17 -3.02038534e-5
+1237 17 -.000120823162
+18 18 -1
+70 18 -1
+122 18 -1
+246 18 -1
+1003 18 -.0184873845
+1016 18 -.000948597852
+1017 18 -.00118245208
+1032 18 -3.76752141e-5
+1033 18 -9.16097561e-5
+1035 18 -2.0716745e-5
+19 19 -1
+71 19 -1
+123 19 -1
+247 19 -1
+1017 19 -.000771998952
+1023 19 -.016720295
+1024 19 -.000654269592
+1032 19 -8.01600327e-5
+1033 19 -.00019453658
+1035 19 -4.97589099e-5
+1219 19 -.203145415
+1237 19 -.000749814324
+20 20 -1
+72 20 -1
+124 20 -1
+248 20 -1
+1016 20 -.0393997468
+1024 20 -3.02552417e-5
+1032 20 -1.28256052e-5
+1033 20 -3.05365866e-5
+1035 20 -9.0998783e-6
+1237 20 -3.55362245e-5
+21 21 -1
+73 21 -1
+125 21 -1
+249 21 -1
+1025 21 -.049051892
+1027 21 -.03672003
+1028 21 -.0271796845
+1029 21 -.0359273776
+1032 21 -.000238876892
+1033 21 -.000577658531
+1035 21 -.000104939027
+1221 21 -.0505565293
+22 22 -1
+74 22 -1
+126 22 -1
+250 22 -1
+1028 22 -.00121411122
+1032 22 -6.01200236e-6
+1033 22 -1.42439021e-5
+1035 22 -3.67867437e-6
+23 23 -1
+75 23 -1
+127 23 -1
+251 23 -1
+1024 23 -.000347935274
+1028 23 -.00235384447
+1032 23 -3.32664131e-5
+1033 23 -7.99024419e-5
+1035 23 -1.47146975e-5
+1223 23 -.136343017
+1237 23 -.000337594131
+24 24 -1
+76 24 -1
+128 24 -1
+252 24 -1
+1017 24 -.0018250834
+1024 24 -.00935265142
+1028 24 -.0397599563
+1029 24 -.00592482509
+1032 24 -.000505008211
+1033 24 -.00122273166
+1035 24 -.00755212503
+1231 24 -.00374332629
+1237 24 -.00920388196
+1241 24 -.0966170877
+25 25 -1
+77 25 -1
+129 25 -1
+253 25 -1
+999 25 -.0553583428
+1017 25 -.0156889856
+1023 25 -.0150773702
+1024 25 -.000801763905
+1028 25 -.0358140953
+1029 25 -.00303902687
+1032 25 -.000537873828
+1033 25 -.00130273169
+1035 25 -.000315010693
+1225 25 -.746915162
+1237 25 -.000920388207
+26 26 -1
+78 26 -1
+130 26 -1
+254 26 -1
+1017 26 -.00196155044
+1018 26 -.0213435292
+1023 26 -.00322694657
+1024 26 -.00308981654
+1028 26 -.000841127534
+1029 26 -.00117707148
+1032 26 -4.40880176e-5
+1033 26 -.000106243904
+1035 26 -6.73778268e-5
+1226 26 -1
+1237 26 -.00356072956
+27 27 -1
+79 27 -1
+131 27 -1
+255 27 -1
+1017 27 -.00653464487
+1023 27 -.00478093931
+1024 27 -.000298770494
+1025 27 -.125943303
+1028 27 -.00790266134
+1029 27 -.0120934229
+1031 27 -.000896510552
+1032 27 -.000188376071
+1033 27 -.000456000009
+1035 27 -.000134755654
+1237 27 -.000302057917
+28 28 -1
+80 28 -1
+132 28 -1
+256 28 -1
+999 28 -.0342344381
+1017 28 -.00749491062
+1023 28 -.0508867763
+1024 28 -.00153167162
+1028 28 -.00446431991
+1032 28 -.000415228977
+1033 28 -.00100536586
+1035 28 -.000218397094
+1228 28 -.0115931267
+1229 28 -.0134739932
+1236 28 -.0205113031
+1237 28 -.00180524017
+29 29 -1
+81 29 -1
+133 29 -1
+257 29 -1
+999 29 -.000225418378
+1016 29 -.00357041694
+1017 29 -2.39277597e-5
+1021 29 -.0142107746
+1024 29 -.00245445641
+1028 29 -.000650260481
+1029 29 -.00939567667
+1032 29 -.000157514456
+1033 29 -.000381170743
+1035 29 -7.84138465e-5
+1229 29 -.152073503
+1236 29 -.013414626
+1237 29 -.0029779356
+30 30 -1
+82 30 -1
+134 30 -1
+258 30 -1
+993 30 -.13983576
+995 30 -.0519003533
+998 30 -.434840858
+999 30 -.0361280963
+1016 30 -.0804134309
+1017 30 -.193728089
+1022 30 -.111547887
+1024 30 -.0011648268
+1032 30 -.00334107014
+1033 30 -.00808946323
+1035 30 -.00176595734
+1206 30 -.206027895
+1211 30 -.00305439346
+1212 30 -.612225473
+1230 30 -.0151701225
+1231 30 -.0947329998
+1236 30 -.0337812342
+1237 30 -.00157425471
+31 31 -1
+83 31 -1
+135 31 -1
+259 31 -1
+1017 31 -.0022081905
+1018 31 -.0220815986
+1024 31 -.0012631563
+1029 31 -.00698581478
+1032 31 -.000108616841
+1033 31 -.000263512193
+1035 31 -4.74355365e-5
+1231 31 -.168003932
+1237 31 -.00138235907
+32 32 -1
+84 32 -1
+136 32 -1
+260 32 -1
+994 32 -.0379240662
+995 32 -.0379112288
+1017 32 -3.78637087e-5
+1021 32 -.020616198
+1024 32 -.00118751824
+1029 32 -.00924244896
+1032 32 -.000309818512
+1033 32 -.000749756116
+1035 32 -.00011365168
+1237 32 -.00190118793
+33 33 -1
+85 33 -1
+137 33 -1
+261 33 -1
+990 33 -.103008263
+992 33 -.386701792
+994 33 -.0299895387
+996 33 -.0525375903
+1003 33 -.0123860007
+1017 33 -.000547971984
+1020 33 -.0851425901
+1021 33 -.0635736883
+1024 33 -.00141821441
+1032 33 -.00084248191
+1033 33 -.00203999993
+1035 33 -.000350635761
+1237 33 -.00242357049
+34 34 -1
+86 34 -1
+138 34 -1
+262 34 -1
+992 34 -.166872039
+994 34 -.0337420292
+1020 34 -.0880064443
+1024 34 -.00389158027
+1026 34 -.662715316
+1032 34 -.000541481015
+1033 34 -.00131102442
+1035 34 -.000228077814
+1203 34 -.12712808
+1234 34 -.00809310284
+1237 34 -.00743773161
+35 35 -1
+87 35 -1
+139 35 -1
+263 35 -1
+995 35 -.0297079161
+999 35 -.00542924087
+1017 35 -.00206804206
+1024 35 -.00353608117
+1032 35 -.000203606483
+1033 35 -.000492780469
+1035 35 -9.89369801e-5
+1235 35 -.320892364
+1237 35 -.00484358752
+36 36 -1
+88 36 -1
+140 36 -1
+264 36 -1
+996 36 -.1226741
+1017 36 -.0109305168
+1023 36 -.205520347
+1024 36 -.0430607721
+1029 36 -.00488937367
+1032 36 -.00213786797
+1033 36 -.00517551228
+1035 36 -.000938836427
+1236 36 -.467726886
+1237 36 -.0498075709
+37 37 -1
+89 37 -1
+141 37 -1
+265 37 -1
+1023 37 -.00256573432
+1024 37 -.000937912439
+1029 37 -.0942864418
+1031 37 -.0170647688
+1032 37 -.000370740134
+1033 37 -.00089843903
+1035 37 -.00022846504
+1237 37 -.000678741897
+38 38 -1
+90 38 -1
+142 38 -1
+266 38 -1
+1024 38 -.002072484
+1025 38 -.202045768
+1029 38 -.0614770278
+1032 38 -.00021001928
+1033 38 -.000507902412
+1035 38 -.000128366373
+1237 38 -.00147830695
+39 39 -1
+91 39 -1
+143 39 -1
+267 39 -1
+1024 39 -.000340371451
+1026 39 -.259070516
+1032 39 -6.81360279e-6
+1033 39 -1.66829268e-5
+1035 39 -6.38927668e-6
+1237 39 -.000604115834
+40 40 -1
+92 40 -1
+144 40 -1
+268 40 -1
+1025 40 -.015785791
+1027 40 -.0408238396
+1029 40 -.0586399846
+1031 40 -.0160451792
+1032 40 -.000332263327
+1033 40 -.000804975629
+1035 40 -.000156246853
+1240 40 -.0734207183
+41 41 -1
+93 41 -1
+145 41 -1
+269 41 -1
+1016 41 -.046553757
+1024 41 -.0124538131
+1028 41 -.412854701
+1029 41 -.00376570015
+1031 41 -.00429942692
+1032 41 -.00245209527
+1033 41 -.00593648758
+1035 41 -.00122170709
+1237 41 -.0208562091
+1241 41 -.158014163
+42 42 -1
+94 42 -1
+146 42 -1
+270 42 -1
+994 42 -.0314314887
+996 42 -.0347981751
+1003 42 -.00416617095
+1016 42 -.0112184873
+1017 42 -.0180052444
+1018 42 -.0245015007
+1020 42 -.0156204402
+1021 42 -.0299912523
+1022 42 -.0206400584
+1024 42 -.00367979356
+1027 42 -.0441914909
+1028 42 -.00477933232
+1032 42 -.000408415362
+1033 42 -.000988682965
+1035 42 -.000165540347
+1237 42 -.0060127289
+1242 42 -.0315532461
+43 43 -1
+95 43 -1
+147 43 -1
+271 43 -1
+1010 43 -.0185339358
+1024 43 -.00116104481
+1029 43 -.000473614549
+1032 43 -9.61920341e-6
+1033 43 -2.3121951e-5
+1035 43 -4.22079465e-5
+1223 43 -.032330662
+1237 43 -.0010412113
+44 44 -1
+96 44 -1
+148 44 -1
+272 44 -1
+1017 44 -.00453286432
+1024 44 -.178381115
+1032 44 -.000397593743
+1033 44 -.000962439051
+1035 44 -.00175821269
+1039 44 -1
+1223 44 -.471103936
+1237 44 -.314317912
+45 45 -1
+97 45 -1
+149 45 -1
+273 45 -1
+1017 45 -.0437515154
+1024 45 -.510670662
+1028 45 -.160897091
+1029 45 -.0078657167
+1032 45 -.00205049361
+1033 45 -.00496419519
+1035 45 -.00157118123
+1237 45 -.500001788
+1241 45 -.0621112846
+1242 45 -.0410499051
+46 46 -1
+98 46 -1
+150 46 -1
+274 46 -1
+1003 46 -.698079586
+1017 46 -.194451973
+1018 46 -.0852652192
+1023 46 -.0307986289
+1032 46 -.0070488723
+1033 46 -.017065756
+1035 46 -.27732712
+1236 46 -.0143419951
+47 47 -1
+99 47 -1
+151 47 -1
+275 47 -1
+1000 47 -.7457304
+1010 47 -.641808689
+1014 47 -.949445844
+1016 47 -.116051726
+1017 47 -.0741642267
+1018 47 -.146283031
+1023 47 -.169224694
+1025 47 -.178926215
+1028 47 -.140577674
+1029 47 -.0135398041
+1030 47 -.927194357
+1032 47 -.00480118487
+1033 47 -.011623024
+1035 47 -.0306480043
+1213 47 -1
+1227 47 -1
+1229 47 -.175491393
+1231 47 -.019035751
+1236 47 -.00282683177
+1238 47 -.279957622
+1241 47 -.0448854752
+48 48 -1
+100 48 -1
+152 48 -1
+276 48 -1
+1003 48 -.133129418
+1017 48 -.0100396676
+1032 48 -.000390780158
+1033 48 -.000946048764
+1216 48 -.0660542548
+1236 48 -.011331005
+49 49 -1
+101 49 -1
+153 49 -1
+277 49 -1
+1014 49 -.0376148261
+1016 49 -.040249534
+1017 49 -.0278500728
+1025 49 -.0421503261
+1030 49 -.0728056729
+1032 49 -.000298596104
+1033 49 -.000722439028
+1035 49 -.00263993279
+50 50 -1
+102 50 -1
+154 50 -1
+278 50 -1
+1032 50 -.00809095241
+1033 50 -.00952975638
+1250 50 -1
+51 51 -1
+103 51 -1
+155 51 -1
+279 51 -1
+52 52 -1
+104 52 -1
+156 52 -1
+280 52 -1
+1 53 1
+1094 53 -.00356158568
+2 54 1
+1094 54 -.0336924791
+3 55 1
+1094 55 -.0901731327
+4 56 1
+1094 56 -.0566755012
+5 57 1
+1094 57 -.0280031972
+6 58 1
+1094 58 -.0498667546
+7 59 1
+1094 59 -.023759827
+8 60 1
+1094 60 -.01643911
+9 61 1
+1094 61 -.00231136009
+10 62 1
+1094 62 -.00248041283
+11 63 1
+1094 63 -.00127226813
+12 64 1
+1094 64 -.00537730055
+13 65 1
+1094 65 -.00257951277
+14 66 1
+1094 66 -.00156978657
+15 67 1
+1094 67 -.00218602782
+16 68 1
+1094 68 -.00861218385
+17 69 1
+1094 69 -.000697780051
+18 70 1
+1094 70 -.000519145164
+19 71 1
+1094 71 -.00132793898
+20 72 1
+1094 72 -.000291506789
+21 73 1
+1094 73 -.00151601026
+22 74 1
+1094 74 -9.0428679e-5
+23 75 1
+1094 75 -.000142273973
+24 76 1
+1094 76 -.00739766378
+25 77 1
+1094 77 -.00819738582
+26 78 1
+1094 78 -.00208459608
+27 79 1
+1094 79 -.0036535447
+28 80 1
+1094 80 -.00488446048
+29 81 1
+1094 81 -.00122504996
+30 82 1
+1094 82 -.0436096154
+31 83 1
+1094 83 -.000693262264
+32 84 1
+1094 84 -.00165846641
+33 85 1
+1094 85 -.00810870528
+34 86 1
+1094 86 -.00322366226
+35 87 1
+1094 87 -.0012720495
+36 88 1
+1094 88 -.0130607868
+37 89 1
+1094 89 -.00165249128
+38 90 1
+1094 90 -.00177341502
+39 91 1
+1094 91 -.000316500373
+40 92 1
+1094 92 -.00204896391
+41 93 1
+1094 93 -.0236979984
+42 94 1
+1094 94 -.00135482708
+43 95 1
+1094 95 -.0015658153
+44 96 1
+1094 96 -.053842701
+45 97 1
+1094 97 -.022164572
+46 98 1
+1094 98 -.142507151
+47 99 1
+1094 99 -.0482241735
+48 100 1
+1094 100 -.179205015
+49 101 1
+50 102 1
+51 103 1
+1094 103 -.0576346964
+52 104 1
+1094 104 -.0317968652
+53 105 1
+1095 105 -1
+54 106 1
+1096 106 -1
+55 107 1
+1097 107 -1
+56 108 1
+1098 108 -1
+57 109 1
+1099 109 -1
+58 110 1
+1100 110 -1
+59 111 1
+1101 111 -1
+60 112 1
+1102 112 -1
+61 113 1
+1103 113 -1
+62 114 1
+1104 114 -1
+63 115 1
+1105 115 -1
+64 116 1
+1106 116 -1
+65 117 1
+1107 117 -1
+66 118 1
+1108 118 -1
+67 119 1
+1109 119 -1
+68 120 1
+1110 120 -1
+69 121 1
+1111 121 -1
+70 122 1
+1112 122 -1
+71 123 1
+1113 123 -1
+72 124 1
+1114 124 -1
+73 125 1
+1115 125 -1
+74 126 1
+1116 126 -1
+75 127 1
+1117 127 -1
+76 128 1
+1118 128 -1
+77 129 1
+1119 129 -1
+78 130 1
+1120 130 -1
+79 131 1
+1121 131 -1
+80 132 1
+1122 132 -1
+81 133 1
+1123 133 -1
+82 134 1
+1124 134 -1
+83 135 1
+1125 135 -1
+84 136 1
+1126 136 -1
+85 137 1
+1127 137 -1
+86 138 1
+1128 138 -1
+87 139 1
+1129 139 -1
+88 140 1
+1130 140 -1
+89 141 1
+1131 141 -1
+90 142 1
+1132 142 -1
+91 143 1
+1133 143 -1
+92 144 1
+1134 144 -1
+93 145 1
+1135 145 -1
+94 146 1
+1136 146 -1
+95 147 1
+1137 147 -1
+96 148 1
+1138 148 -1
+97 149 1
+1139 149 -1
+98 150 1
+1140 150 -1
+99 151 1
+1141 151 -1
+100 152 1
+1142 152 -1
+101 153 1
+1143 153 -1
+102 154 1
+1144 154 -1
+103 155 1
+1145 155 -1
+104 156 1
+1146 156 -1
+105 157 1
+1147 157 -1
+106 158 1
+1148 158 -1
+107 159 1
+1149 159 -1
+108 160 1
+1150 160 -1
+109 161 1
+1151 161 -1
+110 162 1
+1152 162 -1
+111 163 1
+1153 163 -1
+112 164 1
+1154 164 -1
+113 165 1
+1155 165 -1
+114 166 1
+1156 166 -1
+115 167 1
+1157 167 -1
+116 168 1
+1158 168 -1
+117 169 1
+1159 169 -1
+118 170 1
+1160 170 -1
+119 171 1
+1161 171 -1
+120 172 1
+1162 172 -1
+121 173 1
+1163 173 -1
+122 174 1
+1164 174 -1
+123 175 1
+1165 175 -1
+124 176 1
+1166 176 -1
+125 177 1
+1167 177 -1
+126 178 1
+1168 178 -1
+127 179 1
+1169 179 -1
+128 180 1
+1170 180 -1
+129 181 1
+1171 181 -1
+130 182 1
+1172 182 -1
+131 183 1
+1173 183 -1
+132 184 1
+1174 184 -1
+133 185 1
+1175 185 -1
+134 186 1
+1176 186 -1
+135 187 1
+1177 187 -1
+136 188 1
+1178 188 -1
+137 189 1
+1179 189 -1
+138 190 1
+1180 190 -1
+139 191 1
+1181 191 -1
+140 192 1
+1182 192 -1
+141 193 1
+1183 193 -1
+142 194 1
+1184 194 -1
+143 195 1
+1185 195 -1
+144 196 1
+1186 196 -1
+145 197 1
+1187 197 -1
+146 198 1
+1188 198 -1
+147 199 1
+1189 199 -1
+148 200 1
+1190 200 -1
+149 201 1
+1191 201 -1
+150 202 1
+1192 202 -1
+151 203 1
+1193 203 -1
+152 204 1
+1194 204 -1
+153 205 1
+1195 205 -1
+154 206 1
+1196 206 -1
+155 207 1
+1197 207 -1
+156 208 1
+1198 208 -1
+157 209 1
+1040 209 -.0178465787
+158 210 1
+1041 210 -.000569905678
+159 211 1
+1042 211 -.00449396856
+160 212 1
+1043 212 -5.19715795e-5
+161 213 1
+1044 213 -1.39432741e-5
+162 214 1
+1045 214 -.00359829981
+163 215 1
+1046 215 -.00216023461
+164 216 1
+1047 216 -.0296454262
+165 217 1
+1048 217 -.00490016444
+166 218 1
+1049 218 -.0130853141
+167 219 1
+1040 219 -.292256594
+168 220 1
+1041 220 -.0458589569
+169 221 1
+1042 221 -.0251878798
+170 222 1
+1043 222 -.00707555935
+171 223 1
+1044 223 -.0032789933
+172 224 1
+1045 224 -.0671901256
+173 225 1
+1046 225 -.0452964678
+174 226 1
+1047 226 -.156934559
+175 227 1
+1048 227 -.052540075
+176 228 1
+1049 228 -.0818570703
+177 229 1
+1040 229 -.0368810818
+178 230 1
+1041 230 -.144300938
+179 231 1
+1042 231 -.0779947191
+180 232 1
+1043 232 -.254118741
+181 233 1
+1044 233 -.248348296
+182 234 1
+1045 234 -.214205191
+183 235 1
+1046 235 -.120531969
+184 236 1
+1047 236 -.481847942
+185 237 1
+1048 237 -.16768302
+186 238 1
+1049 238 -.287884384
+187 239 1
+1040 239 -.531182885
+188 240 1
+1041 240 -.348290294
+189 241 1
+1042 241 -.613069355
+190 242 1
+1043 242 -.0259709395
+191 243 1
+1044 243 -.00881912094
+192 244 1
+1045 244 -.131283581
+193 245 1
+1046 245 -.115888983
+194 246 1
+1047 246 -.119731724
+195 247 1
+1048 247 -.163664266
+196 248 1
+1049 248 -.143314779
+197 249 1
+1040 249 -.0488956943
+198 250 1
+1041 250 -.182706028
+199 251 1
+1042 251 -.0637331307
+200 252 1
+1043 252 -.164185643
+201 253 1
+1044 253 -.131180644
+202 254 1
+1045 254 -.137433782
+203 255 1
+1046 255 -.0762486756
+204 256 1
+1047 256 -.0659873635
+205 257 1
+1048 257 -.153722018
+206 258 1
+1049 258 -.100952752
+207 259 1
+1040 259 -.015941849
+208 260 1
+1041 260 -.175022557
+209 261 1
+1042 261 -.148241401
+210 262 1
+1043 262 -.295332223
+211 263 1
+1044 263 -.435067326
+212 264 1
+1045 264 -.299447656
+213 265 1
+1046 265 -.429889739
+214 266 1
+1047 266 -.094635874
+215 267 1
+1048 267 -.262589782
+216 268 1
+1049 268 -.157239005
+217 269 1
+1040 269 -.0479257405
+218 270 1
+1041 270 -.0681549832
+219 271 1
+1042 271 -.041436553
+220 272 1
+1043 272 -.0313165896
+221 273 1
+1044 273 -.0482902043
+222 274 1
+1045 274 -.0312532112
+223 275 1
+1046 275 -.165843651
+224 276 1
+1047 276 -.0394780114
+225 277 1
+1048 277 -.106069766
+226 278 1
+1049 278 -.106182404
+227 279 1
+1040 279 -.00906958152
+228 280 1
+1041 280 -.0350963511
+229 281 1
+1042 281 -.0258430243
+230 282 1
+1043 282 -.221948341
+231 283 1
+1044 283 -.125001445
+232 284 1
+1045 284 -.115588143
+233 285 1
+1046 285 -.0441402867
+234 286 1
+1047 286 -.0117391003
+235 287 1
+1048 287 -.0888309106
+236 288 1
+1049 288 -.109484315
+237 289 1
+1050 289 -1
+238 290 1
+1051 290 -1
+239 291 1
+1052 291 -1
+240 292 1
+1053 292 -1
+241 293 1
+1054 293 -1
+242 294 1
+1055 294 -1
+243 295 1
+1056 295 -1
+244 296 1
+1057 296 -1
+245 297 1
+1058 297 -1
+246 298 1
+1059 298 -1
+247 299 1
+1060 299 -1
+248 300 1
+1061 300 -1
+249 301 1
+1062 301 -1
+250 302 1
+1063 302 -1
+251 303 1
+1064 303 -1
+252 304 1
+1065 304 -1
+253 305 1
+1066 305 -1
+254 306 1
+1067 306 -1
+255 307 1
+1068 307 -1
+256 308 1
+1069 308 -1
+257 309 1
+1070 309 -1
+258 310 1
+1071 310 -1
+259 311 1
+1072 311 -1
+260 312 1
+1073 312 -1
+261 313 1
+1074 313 -1
+262 314 1
+1075 314 -1
+263 315 1
+1076 315 -1
+264 316 1
+1077 316 -1
+265 317 1
+1078 317 -1
+266 318 1
+1079 318 -1
+267 319 1
+1080 319 -1
+268 320 1
+1081 320 -1
+269 321 1
+1082 321 -1
+270 322 1
+1083 322 -1
+271 323 1
+1084 323 -1
+272 324 1
+1085 324 -1
+273 325 1
+1086 325 -1
+274 326 1
+1087 326 -1
+275 327 1
+1088 327 -1
+276 328 1
+1089 328 -1
+277 329 1
+1090 329 -1
+278 330 1
+1091 330 -1
+279 331 1
+1092 331 -1
+280 332 1
+1093 332 -1
+285 333 1
+286 334 1
+287 335 1
+288 336 1
+989 336 -.0303959493
+289 337 1
+990 337 -.35601595
+290 338 1
+291 339 1
+992 339 -.209159389
+292 340 1
+293 341 1
+994 341 -.819865763
+294 342 1
+995 342 -.645634174
+295 343 1
+996 343 -.639332294
+296 344 1
+997 344 -.997142851
+297 345 1
+998 345 -.5
+298 346 1
+299 347 1
+1000 347 -.251026601
+300 348 1
+1001 348 -.573770761
+301 349 1
+1002 349 -.999197245
+302 350 1
+1003 350 -.0612838604
+303 351 1
+1004 351 -.979579866
+304 352 1
+1005 352 -.941222906
+305 353 1
+1006 353 -.982795656
+306 354 1
+1007 354 -.999544799
+307 355 1
+1008 355 -.998440027
+308 356 1
+309 357 1
+310 358 1
+1011 358 -.068720825
+311 359 1
+312 360 1
+313 361 1
+314 362 1
+1015 362 -.937243104
+315 363 1
+1016 363 -.305369467
+316 364 1
+1017 364 -.24439393
+317 365 1
+1018 365 -.059916757
+318 366 1
+1019 366 -.409031749
+319 367 1
+1020 367 -.654813766
+320 368 1
+1021 368 -.775436103
+321 369 1
+1022 369 -.0567775071
+322 370 1
+1023 370 -.349151224
+323 371 1
+1024 371 -.100001134
+324 372 1
+1025 372 -.060165748
+325 373 1
+1026 373 -.0782141462
+326 374 1
+1027 374 -.87296319
+327 375 1
+1028 375 -.0758721083
+328 376 1
+1029 376 -.0622733496
+329 377 1
+330 378 1
+331 379 1
+1032 379 -.742725551
+332 380 1
+1033 380 -.790000021
+333 381 1
+334 382 1
+1035 382 -.662945509
+335 383 1
+1036 383 -1
+336 384 1
+337 385 1
+338 386 1
+339 387 1
+340 388 1
+341 389 1
+342 390 1
+1202 390 -.32218644
+343 391 1
+1203 391 -.728480697
+344 392 1
+1204 392 -1
+345 393 1
+1205 393 -.955293119
+346 394 1
+347 395 1
+1207 395 -1
+348 396 1
+1208 396 -1
+349 397 1
+1209 397 -1
+350 398 1
+1210 398 -1
+351 399 1
+1211 399 -.99694562
+352 400 1
+353 401 1
+354 402 1
+1214 402 -.656015933
+355 403 1
+1215 403 -1
+356 404 1
+1216 404 -.86789149
+357 405 1
+1217 405 -1
+358 406 1
+1218 406 -1
+359 407 1
+1219 407 -.796854556
+360 408 1
+1220 408 -1
+361 409 1
+1221 409 -.80319649
+362 410 1
+363 411 1
+1223 411 -.23457785
+364 412 1
+1224 412 -.101583153
+365 413 1
+366 414 1
+367 415 1
+368 416 1
+1228 416 -.988406897
+369 417 1
+1229 417 -.610173583
+370 418 1
+1230 418 -.984829903
+371 419 1
+1231 419 -.145812437
+372 420 1
+1232 420 -1
+373 421 1
+1233 421 -1
+374 422 1
+1234 422 -.935439289
+375 423 1
+1235 423 -.679107606
+376 424 1
+1236 424 -.254835814
+377 425 1
+378 426 1
+1238 426 -.569056511
+379 427 1
+1239 427 -1
+380 428 1
+1240 428 -.926579297
+381 429 1
+1241 429 -.0386048704
+382 430 1
+1242 430 -.491169512
+383 431 1
+1243 431 -1
+384 432 1
+1244 432 -1
+385 433 1
+1245 433 -1
+386 434 1
+1246 434 -1
+387 435 1
+1247 435 -1
+388 436 1
+1248 436 -1
+389 437 1
+1249 437 -1
+390 438 1
+391 439 1
+1251 439 -1
+392 440 1
+1252 440 -1
+285 441 -1
+339 441 -1
+447 441 1
+286 442 -1
+340 442 -1
+448 442 1
+287 443 -1
+341 443 -1
+449 443 1
+288 444 -1
+342 444 -1
+450 444 1
+289 445 -1
+343 445 -1
+451 445 1
+290 446 -1
+344 446 -1
+452 446 1
+291 447 -1
+345 447 -1
+453 447 1
+292 448 -1
+346 448 -1
+454 448 1
+293 449 -1
+347 449 -1
+455 449 1
+294 450 -1
+348 450 -1
+456 450 1
+295 451 -1
+349 451 -1
+457 451 1
+296 452 -1
+350 452 -1
+458 452 1
+297 453 -1
+351 453 -1
+459 453 1
+298 454 -1
+352 454 -1
+460 454 1
+299 455 -1
+353 455 -1
+461 455 1
+300 456 -1
+354 456 -1
+462 456 1
+301 457 -1
+355 457 -1
+463 457 1
+302 458 -1
+356 458 -1
+464 458 1
+303 459 -1
+357 459 -1
+465 459 1
+304 460 -1
+358 460 -1
+466 460 1
+305 461 -1
+359 461 -1
+467 461 1
+306 462 -1
+360 462 -1
+468 462 1
+307 463 -1
+361 463 -1
+469 463 1
+308 464 -1
+362 464 -1
+470 464 1
+309 465 -1
+363 465 -1
+471 465 1
+310 466 -1
+364 466 -1
+472 466 1
+311 467 -1
+365 467 -1
+473 467 1
+312 468 -1
+366 468 -1
+474 468 1
+313 469 -1
+367 469 -1
+475 469 1
+314 470 -1
+368 470 -1
+476 470 1
+315 471 -1
+369 471 -1
+477 471 1
+316 472 -1
+370 472 -1
+478 472 1
+317 473 -1
+371 473 -1
+479 473 1
+318 474 -1
+372 474 -1
+480 474 1
+319 475 -1
+373 475 -1
+481 475 1
+320 476 -1
+374 476 -1
+482 476 1
+321 477 -1
+375 477 -1
+483 477 1
+322 478 -1
+376 478 -1
+484 478 1
+323 479 -1
+377 479 -1
+485 479 1
+324 480 -1
+378 480 -1
+486 480 1
+325 481 -1
+379 481 -1
+487 481 1
+326 482 -1
+380 482 -1
+488 482 1
+327 483 -1
+381 483 -1
+489 483 1
+328 484 -1
+382 484 -1
+490 484 1
+329 485 -1
+383 485 -1
+491 485 1
+330 486 -1
+384 486 -1
+492 486 1
+331 487 -1
+385 487 -1
+493 487 1
+332 488 -1
+386 488 -1
+494 488 1
+333 489 -1
+387 489 -1
+495 489 1
+334 490 -1
+388 490 -1
+496 490 1
+335 491 -1
+389 491 -1
+497 491 1
+336 492 -1
+390 492 -1
+498 492 1
+337 493 -1
+391 493 -1
+499 493 1
+338 494 -1
+392 494 -1
+500 494 1
+501 495 .0500000007
+986 495 -.269156814
+1032 495 -.00465328991
+1033 495 -.00622985372
+1254 495 -.0380239636
+505 496 .0500000007
+990 496 -.466260761
+1032 496 -.00184849033
+1033 496 -.00247492688
+1254 496 -.0151058435
+508 497 .560000002
+993 497 -.763145924
+1032 497 -.106123865
+1033 497 -.00968712196
+1254 497 -.216793314
+514 498 .239999995
+999 498 -.815083683
+1032 498 -.0105610844
+1033 498 -.0141391223
+1254 498 -.086298421
+516 499 .0500000007
+1001 499 -.39580062
+1032 499 -.00164448307
+1033 499 -.00220156088
+1254 499 -.0134372637
+534 500 .0500000007
+1019 500 -.511013329
+1032 500 -.00136712939
+1033 500 -.00183034141
+1254 500 -.0111717703
+546 501 .0500000007
+1031 501 -.959510028
+1032 501 -.0295934808
+1033 501 -.0396195129
+1254 501 -.241819128
+555 502 1
+556 503 1
+557 504 1
+558 505 1
+559 506 1
+560 507 1
+561 508 1
+562 509 1
+563 510 1
+564 511 1
+565 512 1
+566 513 1
+567 514 1
+568 515 1
+569 516 1
+570 517 1
+571 518 1
+572 519 1
+1003 519 -.027629815
+573 520 1
+574 521 1
+575 522 1
+576 523 1
+577 524 1
+578 525 1
+579 526 1
+580 527 1
+581 528 1
+582 529 1
+583 530 1
+584 531 1
+585 532 1
+586 533 1
+1017 533 -.00340405246
+587 534 1
+588 535 1
+1019 535 -.079954952
+589 536 1
+1020 536 -.0364257284
+590 537 1
+591 538 1
+592 539 1
+593 540 1
+1024 540 -.0636040792
+594 541 1
+595 542 1
+596 543 1
+597 544 1
+598 545 1
+1029 545 -.125452146
+599 546 1
+600 547 1
+601 548 1
+1032 548 -.0137514537
+602 549 1
+1033 549 -.00257521961
+603 550 1
+604 551 1
+1035 551 -.00106875168
+605 552 1
+606 553 1
+607 554 1
+1038 554 -1
+608 555 1
+609 556 1
+610 557 1
+611 558 1
+612 559 1
+613 560 1
+614 561 1
+615 562 1
+616 563 1
+617 564 1
+618 565 1
+619 566 1
+620 567 1
+621 568 1
+622 569 1
+623 570 1
+624 571 1
+625 572 1
+626 573 1
+1216 573 -.0660542548
+627 574 1
+628 575 1
+629 576 1
+630 577 1
+631 578 1
+632 579 1
+633 580 1
+1223 580 -.12564452
+634 581 1
+635 582 1
+636 583 1
+637 584 1
+638 585 1
+639 586 1
+640 587 1
+641 588 1
+642 589 1
+643 590 1
+644 591 1
+645 592 1
+646 593 1
+647 594 1
+648 595 1
+649 596 1
+650 597 1
+651 598 1
+652 599 1
+1242 599 -.0715435967
+653 600 1
+654 601 1
+655 602 1
+656 603 1
+657 604 1
+658 605 1
+659 606 1
+660 607 1
+661 608 1
+662 609 1
+986 610 1
+1040 610 1
+987 611 1
+1041 611 1
+988 612 1
+1042 612 1
+989 613 1
+1043 613 1
+990 614 1
+1044 614 1
+991 615 1
+1045 615 1
+992 616 1
+1046 616 1
+993 617 1
+1047 617 1
+994 618 1
+1048 618 1
+995 619 1
+1049 619 1
+996 620 1
+1050 620 1
+997 621 1
+1051 621 1
+998 622 1
+1052 622 1
+999 623 1
+1053 623 1
+1000 624 1
+1054 624 1
+1001 625 1
+1055 625 1
+1002 626 1
+1056 626 1
+1003 627 1
+1057 627 1
+1004 628 1
+1058 628 1
+1005 629 1
+1059 629 1
+1006 630 1
+1060 630 1
+1007 631 1
+1061 631 1
+1008 632 1
+1062 632 1
+1009 633 1
+1063 633 1
+1010 634 1
+1064 634 1
+1011 635 1
+1065 635 1
+1012 636 1
+1066 636 1
+1013 637 1
+1067 637 1
+1014 638 1
+1068 638 1
+1015 639 1
+1069 639 1
+1016 640 1
+1070 640 1
+1017 641 1
+1071 641 1
+1018 642 1
+1072 642 1
+1019 643 1
+1073 643 1
+1020 644 1
+1074 644 1
+1021 645 1
+1075 645 1
+1022 646 1
+1076 646 1
+1023 647 1
+1077 647 1
+1024 648 1
+1078 648 1
+1025 649 1
+1079 649 1
+1026 650 1
+1080 650 1
+1027 651 1
+1081 651 1
+1028 652 1
+1082 652 1
+1029 653 1
+1083 653 1
+1030 654 1
+1084 654 1
+1031 655 1
+1085 655 1
+1032 656 1
+1086 656 1
+1033 657 1
+1087 657 1
+1034 658 1
+1088 658 1
+1035 659 1
+1089 659 1
+1036 660 1
+1090 660 1
+1037 661 1
+1091 661 1
+1038 662 1
+1092 662 1
+1039 663 1
+1093 663 1
+1199 664 1
+1253 664 -2.12379946e-5
+1200 665 1
+1253 665 -.00138877379
+1201 666 1
+1253 666 -.0016952916
+1202 667 1
+1253 667 -.0130305747
+1203 668 1
+1253 668 -.0715932772
+1204 669 1
+1205 670 1
+1253 670 -.0156391002
+1206 671 1
+1253 671 -.0196913686
+1207 672 1
+1253 672 -.000465031248
+1208 673 1
+1253 673 -.0646045059
+1209 674 1
+1253 674 -1.41096707e-5
+1210 675 1
+1253 675 -.0213194918
+1211 676 1
+1253 676 -.0482018702
+1212 677 1
+1253 677 -.011754605
+1213 678 1
+1253 678 -7.12832334e-5
+1214 679 1
+1253 679 -.00290225632
+1215 680 1
+1253 680 -.00432887627
+1216 681 1
+1253 681 -.0161347762
+1217 682 1
+1253 682 -.00365646253
+1218 683 1
+1253 683 -.00112061657
+1219 684 1
+1253 684 -.00128508243
+1220 685 1
+1253 685 -.000721944845
+1221 686 1
+1253 686 -.00312227919
+1222 687 1
+1253 687 -.000479361363
+1223 688 1
+1253 688 -.00376875186
+1224 689 1
+1253 689 -.00984223001
+1225 690 1
+1253 690 -.00119829318
+1226 691 1
+1253 691 -8.72301025e-5
+1227 692 1
+1253 692 -.00248197932
+1228 693 1
+1253 693 -.0026970047
+1229 694 1
+1253 694 -.0204223525
+1230 695 1
+1253 695 -.121343024
+1231 696 1
+1253 696 -.0130022084
+1232 697 1
+1253 697 -.00675853249
+1233 698 1
+1253 698 -.0129593657
+1234 699 1
+1253 699 -.0472033173
+1235 700 1
+1253 700 -.00285191718
+1236 701 1
+1253 701 -.0533207394
+1237 702 1
+1253 702 -.0206797067
+1238 703 1
+1253 703 -.00944649801
+1239 704 1
+1240 705 1
+1253 705 -.00370004075
+1241 706 1
+1253 706 -.0503387488
+1242 707 1
+1253 707 -.0259307083
+1243 708 1
+1244 709 1
+1245 710 1
+1246 711 1
+1247 712 1
+1248 713 1
+1249 714 1
+1250 715 1
+1253 715 -.288725168
+1251 716 1
+1252 717 1
+281 718 1
+1011 718 -.548067629
+1032 718 -.00743604545
+1033 718 -.00482097548
+1034 718 -.131497085
+1224 718 -.247637033
+1231 718 -.00229462353
+1241 718 -.101192757
+1242 718 -.0262266193
+282 719 1
+1011 719 -.213327646
+1032 719 -.00371461594
+1033 719 -.00346439029
+1034 719 -.128691241
+1224 719 -.302950412
+1241 719 -.00893040746
+1242 719 -.0201682709
+283 720 1
+1011 720 -.0083140498
+1032 720 -.000252504105
+1033 720 -.000163707315
+1034 720 -.000393955124
+1224 720 -.062551029
+1241 720 -.0197172053
+284 721 1
+1009 721 -1
+1011 721 -.0402082577
+1012 721 -.967077792
+1013 721 -.997612417
+1028 721 -.0500000529
+1032 721 -.00488695642
+1033 721 -.00211219513
+1034 721 -.739417732
+1221 721 -.146247
+1222 721 -1
+1224 721 -.28527838
+1225 721 -.253084838
+1241 721 -.469926745
+1242 721 -.197006494
+281 722 -.0399474278
+925 722 -.0500000007
+977 722 .00667069061
+281 723 -.131836802
+926 723 -.0900000036
+977 723 .0220149979
+281 724 -.242864177
+927 724 -.0500000007
+977 724 .0405550972
+281 725 -.181239873
+928 725 -.0900000036
+977 725 .0302646551
+281 726 -.0938324183
+929 726 -.0700000003
+977 726 .0156687703
+281 727 -.179161012
+930 727 -.0700000003
+977 727 .0299175121
+281 728 -.0624370202
+931 728 -.0700000003
+977 728 .0104261544
+281 729 -.0686812699
+932 729 -.0700000003
+977 729 .0114688613
+282 730 -.0683486238
+933 730 -.129999995
+977 730 .00817558262
+282 731 -.0505733937
+934 731 -.0799999982
+977 731 .0060493825
+282 732 -.0338348635
+935 732 -.100000001
+977 732 .00404718798
+282 733 -.0425351672
+936 733 -.0799999982
+977 733 .00508788275
+282 734 -.0193547402
+937 734 -.109999999
+977 734 .00231513497
+282 735 -.023237003
+938 735 -.180000007
+977 735 .00277951523
+282 736 -.00978440326
+939 736 -.140000001
+977 736 .00117037038
+282 737 -.061770644
+940 737 -.129999995
+977 737 .00738875149
+282 738 -.000721712539
+941 738 -.129999995
+977 738 8.63283058e-5
+282 739 -.00198470941
+942 739 -.129999995
+977 739 .000237402841
+282 740 -.00272782869
+943 740 -.129999995
+977 740 .000326291716
+282 741 -.00198165141
+944 741 -.100000001
+977 741 .000237037035
+282 742 -.00797706377
+945 742 -.140000001
+977 742 .000954183808
+282 743 -.000946483167
+946 743 -.140000001
+977 743 .000113214446
+282 744 -.00112079515
+947 744 -.129999995
+977 744 .000134064932
+282 745 -.0594709478
+948 745 -.0900000036
+977 745 .00711367186
+282 746 -.0249847099
+949 746 -.109999999
+977 746 .00298856874
+282 747 -.0744204894
+950 747 -.109999999
+977 747 .00890187453
+282 748 -.0019556575
+951 748 -.150000006
+977 748 .000233927756
+282 749 -.0174281336
+952 749 -.129999995
+977 749 .00208468223
+282 750 -.0133516816
+953 750 -.140000001
+977 750 .00159707363
+282 751 -.0467324145
+954 751 -.109999999
+977 751 .00558994059
+282 752 -.00600917451
+955 752 -.140000001
+977 752 .000718792842
+282 753 -.0112645263
+956 753 -.100000001
+977 753 .00134741655
+282 754 -.0153486235
+957 754 -.119999997
+977 754 .00183593959
+282 755 -.0424143746
+958 755 -.140000001
+977 755 .00507343374
+282 756 -.0116284406
+959 756 -.0799999982
+977 756 .00139094645
+282 757 -.0841406733
+960 757 -.109999999
+977 757 .0100645637
+282 758 -.131145254
+961 758 -.200000003
+977 758 .0156870596
+282 759 -.00849541277
+962 759 -.119999997
+977 759 .00101618655
+282 760 -.00309327221
+963 760 -.0599999987
+977 760 .000370004564
+282 761 -.00779969431
+964 761 -.150000006
+977 761 .000932967523
+282 762 -.0735229328
+965 762 -.119999997
+977 762 .0087945126
+282 763 -.0398944952
+966 763 -.129999995
+977 763 .00477201631
+283 764 -.0177741945
+967 764 -.200000003
+977 764 .000100777317
+283 765 -.982225835
+968 765 -.200000003
+977 765 .00556909014
+284 766 -.0799999982
+969 766 -.159999996
+978 766 1
+284 767 -.360000014
+970 767 -.109999999
+979 767 1
+284 768 -.0799999982
+971 768 -.200000003
+980 768 1
+284 769 -.239999995
+972 769 -.150000006
+981 769 1
+284 770 -.239999995
+973 770 -.150000006
+982 770 1
+974 771 -.159999996
+983 771 1
+975 772 -.119999997
+984 772 1
+976 773 -.200000003
+157 774 -2.0382719
+158 774 2.5072279
+159 774 2.5072279
+160 774 2.5072279
+161 774 2.5072279
+162 774 2.5072279
+163 774 2.5072279
+164 774 .0574199595
+165 774 .889650881
+166 774 .889650881
+167 774 -.684651732
+168 774 .757262707
+169 774 .757262707
+170 774 .757262707
+171 774 .757262707
+172 774 .757262707
+173 774 .757262707
+174 774 .171273291
+175 774 .757262707
+176 774 .298174381
+177 774 -.173215106
+178 774 -.173215106
+179 774 -.173215106
+180 774 -.173215106
+181 774 -.173215106
+182 774 -.173215106
+183 774 -.173215106
+184 774 .00630014949
+185 774 .097612977
+186 774 .097612977
+187 774 -.74012363
+188 774 .712008893
+189 774 .281816572
+190 774 .712008893
+191 774 .712008893
+192 774 .712008893
+193 774 .712008893
+194 774 .161038041
+195 774 .712008893
+196 774 .280355543
+197 774 -2.82574105
+198 774 1.02045894
+199 774 1.02045894
+200 774 1.02045894
+201 774 1.02045894
+202 774 .0305909496
+203 774 1.02045894
+204 774 .016741449
+205 774 .0993608907
+206 774 .0993608907
+207 774 -.0301978122
+208 774 -.0301978122
+209 774 -.0301978122
+210 774 -.0301978122
+211 774 -.0301978122
+212 774 .00475290697
+213 774 -.0301978122
+214 774 .00260111387
+215 774 .0154376719
+216 774 .0154376719
+217 774 -.169090942
+218 774 -.169090942
+219 774 -.169090942
+220 774 -.169090942
+221 774 -.169090942
+222 774 -.169090942
+223 774 -.169090942
+224 774 .016070785
+225 774 .0953804851
+226 774 .0953804851
+227 774 -.12439438
+228 774 -.12439438
+229 774 -.12439438
+230 774 -.12439438
+231 774 -.12439438
+232 774 .00998765975
+233 774 -.12439438
+234 774 .00546592707
+235 774 .0324404053
+236 774 .0324404053
+339 774 -.5
+393 774 -1
+664 774 .110741682
+665 774 .262503922
+666 774 .0154968342
+667 774 .303868353
+668 774 .0555011146
+669 774 .0102592101
+670 774 .0757173747
+671 774 .0159322936
+672 774 -.612610757
+774 774 -.869999826
+986 774 8.81728047e-5
+1199 774 -.499836445
+157 775 .0104071647
+158 775 -.0639951527
+159 775 -.0639951527
+160 775 -.0639951527
+161 775 -.0639951527
+162 775 -.0639951527
+163 775 -.0639951527
+164 775 .000238342502
+165 775 .00369282067
+166 775 .00369282067
+167 775 .0154453237
+168 775 -.0784192756
+169 775 -.0784192756
+170 775 -.0784192756
+171 775 -.0784192756
+172 775 -.0784192756
+173 775 -.0784192756
+174 775 .00284985313
+175 775 .012600258
+176 775 .00496138772
+177 775 -.0880930871
+178 775 -.0880930871
+179 775 -.0880930871
+180 775 -.0880930871
+181 775 -.0880930871
+182 775 -.0880930871
+183 775 -.0880930871
+184 775 .00320410589
+185 775 .0496436357
+186 775 .0496436357
+187 775 .0606838651
+188 775 -.236646697
+189 775 .0195946172
+190 775 -.236646697
+191 775 -.236646697
+192 775 -.236646697
+193 775 -.236646697
+194 775 .0111969244
+195 775 .0495057516
+196 775 .0194930322
+197 775 .495642304
+198 775 -.430728585
+199 775 -.430728585
+200 775 -.430728585
+201 775 -.430728585
+202 775 .0148581862
+203 775 -.430728585
+204 775 .00813141
+205 775 .0482601114
+206 775 .0482601114
+207 775 -.043094445
+208 775 -.043094445
+209 775 -.043094445
+210 775 -.043094445
+211 775 -.043094445
+212 775 .00678273896
+213 775 -.043094445
+214 775 .00371197611
+215 775 .0220306665
+216 775 .0220306665
+217 775 -.0312564522
+218 775 -.0312564522
+219 775 -.0312564522
+220 775 -.0312564522
+221 775 -.0312564522
+222 775 -.0312564522
+223 775 -.0312564522
+224 775 .00297068362
+225 775 .0176310781
+226 775 .0176310781
+227 775 -.0625699833
+228 775 -.0625699833
+229 775 -.0625699833
+230 775 -.0625699833
+231 775 -.0625699833
+232 775 .00502376119
+233 775 -.0625699833
+234 775 .00274934387
+235 775 .0163174197
+236 775 .0163174197
+340 775 -.5
+394 775 -1
+664 775 -.00931064598
+665 775 -.00382446544
+666 775 -.000113382936
+667 775 .0148005467
+668 775 .0114515871
+669 775 -.00279633701
+670 775 -.00722729228
+671 775 -.0107579976
+672 775 -.0106948726
+775 775 -.869891703
+987 775 .0407628454
+1200 775 -.454577208
+157 776 .0621434972
+158 776 -.382129312
+159 776 -.382129312
+160 776 -.382129312
+161 776 -.382129312
+162 776 -.382129312
+163 776 -.382129312
+164 776 .00142319617
+165 776 .0220506545
+166 776 .0220506545
+167 776 .00642392877
+168 776 -.0326156914
+169 776 -.0326156914
+170 776 -.0326156914
+171 776 -.0326156914
+172 776 -.0326156914
+173 776 -.0326156914
+174 776 .00118529436
+175 776 .0052406257
+176 776 .00206351141
+177 776 -.0360557176
+178 776 -.0360557176
+179 776 -.0360557176
+180 776 -.0360557176
+181 776 -.0360557176
+182 776 -.0360557176
+183 776 -.0360557176
+184 776 .00131141208
+185 776 .0203187
+186 776 .0203187
+187 776 .0320153944
+188 776 .0261180811
+189 776 -.510462344
+190 776 .0261180811
+191 776 .0261180811
+192 776 .0261180811
+193 776 .0261180811
+194 776 .00590723613
+195 776 .0261180811
+196 776 .0102840699
+197 776 .130923346
+198 776 -.11377646
+199 776 -.11377646
+200 776 -.11377646
+201 776 -.11377646
+202 776 .00392477307
+203 776 -.11377646
+204 776 .00214790273
+205 776 .0127478531
+206 776 .0127478531
+207 776 -.0276396852
+208 776 -.0276396852
+209 776 -.0276396852
+210 776 -.0276396852
+211 776 -.0276396852
+212 776 .00435027713
+213 776 -.0276396852
+214 776 .0023807676
+215 776 .0141299125
+216 776 .0141299125
+217 776 -.0143900374
+218 776 -.0143900374
+219 776 -.0143900374
+220 776 -.0143900374
+221 776 -.0143900374
+222 776 -.0143900374
+223 776 -.0143900374
+224 776 .00136766164
+225 776 .00811710395
+226 776 .00811710395
+227 776 -.0348886102
+228 776 -.0348886102
+229 776 -.0348886102
+230 776 -.0348886102
+231 776 -.0348886102
+232 776 .00280121644
+233 776 -.0348886102
+234 776 .0015330161
+235 776 .00909848697
+236 776 .00909848697
+341 776 -.5
+395 776 -1
+664 776 -.0037658806
+665 776 -.00467685144
+666 776 -.00274560135
+667 776 .0282335691
+668 776 -.00528713875
+669 776 -.00331864133
+670 776 -.00925511029
+671 776 -.00926723704
+673 776 -.0304701719
+776 776 -.87013799
+988 776 .0651841611
+1201 776 -.423311949
+157 777 .000524101779
+158 777 -.00322277728
+159 777 -.00322277728
+160 777 -.00322277728
+161 777 -.00322277728
+162 777 -.00322277728
+163 777 -.00322277728
+164 777 1.20028599e-5
+165 777 .000185969388
+166 777 .000185969388
+167 777 .00131599407
+168 777 -.00668158941
+169 777 -.00668158941
+170 777 -.00668158941
+171 777 -.00668158941
+172 777 -.00668158941
+173 777 -.00668158941
+174 777 .000242817172
+175 777 .00107358478
+176 777 .000422727084
+177 777 -.0856701881
+178 777 -.0856701881
+179 777 -.0856701881
+180 777 -.0856701881
+181 777 -.0856701881
+182 777 -.0856701881
+183 777 -.0856701881
+184 777 .00311598089
+185 777 .0482782498
+186 777 .0482782498
+187 777 .00249884813
+188 777 -.00974466838
+189 777 .000806869706
+190 777 -.00974466838
+191 777 -.00974466838
+192 777 -.00974466838
+193 777 -.00974466838
+194 777 .000461068412
+195 777 .00203855429
+196 777 .000802686671
+197 777 .245963693
+198 777 -.213750109
+199 777 -.213750109
+200 777 -.213750109
+201 777 -.213750109
+202 777 .00737341121
+203 777 -.213750109
+204 777 .00403523212
+205 777 .0239491984
+206 777 .0239491984
+207 777 -.040156737
+208 777 -.040156737
+209 777 -.040156737
+210 777 -.040156737
+211 777 -.040156737
+212 777 .00632036664
+213 777 -.040156737
+214 777 .00345893437
+215 777 .0205288567
+216 777 .0205288567
+217 777 -.00793116167
+218 777 -.00793116167
+219 777 -.00793116167
+220 777 -.00793116167
+221 777 -.00793116167
+222 777 -.00793116167
+223 777 -.00793116167
+224 777 .000753795495
+225 777 .00447379425
+226 777 .00447379425
+227 777 -.218512505
+228 777 -.218512505
+229 777 -.218512505
+230 777 -.218512505
+231 777 -.218512505
+232 777 .0175444297
+233 777 -.218512505
+234 777 .00960150547
+235 777 .0569851622
+236 777 .0569851622
+288 777 .471892297
+342 777 -.0281077065
+396 777 -.0562154129
+664 777 -.000882008986
+665 777 -.000362094084
+666 777 .00752938213
+667 777 -6.91006426e-5
+668 777 .0129995495
+669 777 .0131666847
+670 777 .00185310841
+671 777 .0287142098
+672 777 -.0220831167
+673 777 -.0538090281
+777 777 -.870029628
+989 777 .152490258
+1202 777 -.0962755829
+1256 777 -.000112710521
+157 778 .000449230138
+158 778 -.00276238052
+159 778 -.00276238052
+160 778 -.00276238052
+161 778 -.00276238052
+162 778 -.00276238052
+163 778 -.00276238052
+164 778 1.02881659e-5
+165 778 .000159402334
+166 778 .000159402334
+167 778 .00194844441
+168 778 -.00989267789
+169 778 -.00989267789
+170 778 -.00989267789
+171 778 -.00989267789
+172 778 -.00989267789
+173 778 -.00989267789
+174 778 .000359512109
+175 778 .00158953632
+176 778 .00062588451
+177 778 -.267490625
+178 778 -.267490625
+179 778 -.267490625
+180 778 -.267490625
+181 778 -.267490625
+182 778 -.267490625
+183 778 -.267490625
+184 778 .00972912088
+185 778 .150740623
+186 778 .150740623
+187 778 .00271101436
+188 778 -.0105720451
+189 778 .000875377445
+190 778 -.0105720451
+191 778 -.0105720451
+192 778 -.0105720451
+193 778 -.0105720451
+194 778 .000500215683
+195 778 .00221163896
+196 778 .000870839227
+197 778 .627855837
+198 778 -.545626283
+199 778 -.545626283
+200 778 -.545626283
+201 778 -.545626283
+202 778 .0188216381
+203 778 -.545626283
+204 778 .0103004798
+205 778 .0611335933
+206 778 .0611335933
+207 778 -.188998535
+208 778 -.188998535
+209 778 -.188998535
+210 778 -.188998535
+211 778 -.188998535
+212 778 .0297469385
+213 778 -.188998535
+214 778 .0162795484
+215 778 .0966195017
+216 778 .0966195017
+217 778 -.0390729122
+218 778 -.0390729122
+219 778 -.0390729122
+220 778 -.0390729122
+221 778 -.0390729122
+222 778 -.0390729122
+223 778 -.0390729122
+224 778 .00371357752
+225 778 .0220401697
+226 778 .0220401697
+227 778 -.393182158
+228 778 -.393182158
+229 778 -.393182158
+230 778 -.393182158
+231 778 -.393182158
+232 778 .031568706
+233 778 -.393182158
+234 778 .0172765423
+235 778 .102536693
+236 778 .102536693
+289 778 1.67131245
+343 778 -.328687608
+397 778 -.164343804
+664 778 2.97306397e-5
+665 778 .000807423727
+666 778 .027500771
+667 778 .00133185962
+668 778 .0393917151
+669 778 .0723160058
+670 778 .0204517543
+671 778 .059062589
+673 778 -.012854564
+696 778 -.0463634431
+778 778 -.870002031
+990 778 .0437857695
+1203 778 -.0176200569
+1256 778 -.00421766937
+157 779 .455968589
+158 779 -2.80381632
+159 779 -2.80381632
+160 779 -2.80381632
+161 779 -2.80381632
+162 779 -2.80381632
+163 779 -2.80381632
+164 779 .0104424879
+165 779 .161793381
+166 779 .161793381
+167 779 .157031372
+168 779 -.797282577
+169 779 -.797282577
+170 779 -.797282577
+171 779 -.797282577
+172 779 -.797282577
+173 779 -.797282577
+174 779 .0289742295
+175 779 .128105819
+176 779 .0504420325
+177 779 -.907425106
+178 779 -.907425106
+179 779 -.907425106
+180 779 -.907425106
+181 779 -.907425106
+182 779 -.907425106
+183 779 -.907425106
+184 779 .0330047049
+185 779 .511366904
+186 779 .511366904
+187 779 .158726871
+188 779 -.618981421
+189 779 .0512523726
+190 779 -.618981421
+191 779 -.618981421
+192 779 -.618981421
+193 779 -.618981421
+194 779 .02928707
+195 779 .12948899
+196 779 .0509866662
+197 779 .0775558501
+198 779 .0775558501
+199 779 .0775558501
+200 779 .0775558501
+201 779 .0775558501
+202 779 -.112975061
+203 779 .0775558501
+204 779 .00127236603
+205 779 .00755152293
+206 779 .00755152293
+207 779 .0805267543
+208 779 .0805267543
+209 779 .0805267543
+210 779 .0805267543
+211 779 .0805267543
+212 779 -.112885997
+213 779 .0805267543
+214 779 .0013211061
+215 779 .00784079637
+216 779 .00784079637
+217 779 -.0994591713
+218 779 -.0994591713
+219 779 -.0994591713
+220 779 -.0994591713
+221 779 -.0994591713
+222 779 -.0994591713
+223 779 -.0994591713
+224 779 .00945282448
+225 779 .0561027341
+226 779 .0561027341
+227 779 .114812307
+228 779 .114812307
+229 779 .114812307
+230 779 .114812307
+231 779 .114812307
+232 779 -.111858197
+233 779 .114812307
+234 779 .00188358827
+235 779 .0111791408
+236 779 .0111791408
+344 779 -2
+398 779 -1
+664 779 -.0111589003
+665 779 -.00679647923
+666 779 .00194025785
+667 779 -.00464311987
+668 779 .00136367977
+669 779 .00470969081
+670 779 -.00126966089
+671 779 .00761918724
+779 779 -1
+157 780 .106317788
+158 780 -.653763413
+159 780 -.653763413
+160 780 -.653763413
+161 780 -.653763413
+162 780 -.653763413
+163 780 -.653763413
+164 780 .00243486557
+165 780 .0377252176
+166 780 .0377252176
+167 780 .0411161855
+168 780 -.208755851
+169 780 -.208755851
+170 780 -.208755851
+171 780 -.208755851
+172 780 -.208755851
+173 780 -.208755851
+174 780 .00758644473
+175 780 .033542484
+176 780 .0132074496
+177 780 -.198312685
+178 780 -.198312685
+179 780 -.198312685
+180 780 -.198312685
+181 780 -.198312685
+182 780 -.198312685
+183 780 -.198312685
+184 780 .00721299369
+185 780 .111756369
+186 780 .111756369
+187 780 .0544188842
+188 780 -.212215364
+189 780 .0175716747
+190 780 -.212215364
+191 780 -.212215364
+192 780 -.212215364
+193 780 -.212215364
+194 780 .0100409575
+195 780 .0443947949
+196 780 .0174805783
+197 780 .557472408
+198 780 -.48446089
+199 780 -.48446089
+200 780 -.48446089
+201 780 -.48446089
+202 780 .0167117082
+203 780 -.48446089
+204 780 .00914578326
+205 780 .0542804413
+206 780 .0542804413
+207 780 -.285272539
+208 780 -.285272539
+209 780 -.285272539
+210 780 -.285272539
+211 780 -.285272539
+212 780 .0448997393
+213 780 -.285272539
+214 780 .0245721899
+215 780 .145836532
+216 780 .145836532
+217 780 -.20498237
+218 780 -.20498237
+219 780 -.20498237
+220 780 -.20498237
+221 780 -.20498237
+222 780 -.20498237
+223 780 -.20498237
+224 780 .0194819868
+225 780 .115626052
+226 780 .115626052
+227 780 -.212087378
+228 780 -.212087378
+229 780 -.212087378
+230 780 -.212087378
+231 780 -.212087378
+232 780 .0170285534
+233 780 -.212087378
+234 780 .00931918249
+235 780 .0553095713
+236 780 .0553095713
+291 780 1.46780527
+345 780 -.532194793
+399 780 -.266097397
+664 780 .00653083064
+665 780 .0170512833
+666 780 .0203899294
+667 780 .0267378725
+668 780 .0349783003
+669 780 .109160982
+670 780 .107301265
+671 780 .0318611152
+673 780 -.0597859584
+678 780 -.335545868
+696 780 -.265876949
+697 780 -.157820597
+780 780 -.87005651
+992 780 .0426713973
+1205 780 -.0706640184
+1256 780 -.00378513522
+157 781 .196210727
+158 781 .196210727
+159 781 .196210727
+160 781 .196210727
+161 781 .196210727
+162 781 .196210727
+163 781 .196210727
+164 781 -.0996064246
+165 781 .0696223304
+166 781 .0696223304
+167 781 .189191252
+168 781 .154341772
+169 781 .154341772
+170 781 .154341772
+171 781 .154341772
+172 781 .154341772
+173 781 .154341772
+174 781 -.262691885
+175 781 .154341772
+176 781 .0607725158
+177 781 .169322357
+178 781 .169322357
+179 781 .169322357
+180 781 .169322357
+181 781 .169322357
+182 781 .169322357
+183 781 .169322357
+184 781 -.100222215
+185 781 .0600814112
+186 781 .0600814112
+187 781 .0746707991
+188 781 .0609162562
+189 781 .0241109487
+190 781 .0609162562
+191 781 .0609162562
+192 781 .0609162562
+193 781 .0609162562
+194 781 -.283822298
+195 781 .0609162562
+196 781 .0239859503
+197 781 .0464772396
+198 781 .0464772396
+199 781 .0464772396
+200 781 .0464772396
+201 781 .0464772396
+202 781 .00139327801
+203 781 .0464772396
+204 781 -.0623375066
+205 781 .00452543469
+206 781 .00452543469
+207 781 .0317638852
+208 781 .0317638852
+209 781 .0317638852
+210 781 .0317638852
+211 781 .0317638852
+212 781 .000952206377
+213 781 .0317638852
+214 781 -.0625788867
+215 781 .00309281261
+216 781 .00309281261
+217 781 .0272320695
+218 781 .0272320695
+219 781 .0272320695
+220 781 .0272320695
+221 781 .0272320695
+222 781 .0272320695
+223 781 .0272320695
+224 781 -.0626532361
+225 781 .00265155477
+226 781 .00265155477
+227 781 .0145535357
+228 781 .0145535357
+229 781 .0145535357
+230 781 .0145535357
+231 781 .0145535357
+232 781 .000436280679
+233 781 .0145535357
+234 781 -.0628612414
+235 781 .00141706085
+236 781 .00141706085
+346 781 -.5
+400 781 -1
+664 781 .567007899
+665 781 .368021756
+666 781 .504498124
+667 781 .171694726
+668 781 .187478885
+669 781 .148706645
+670 781 .158462361
+671 781 .0524044745
+674 781 -.157417119
+675 781 -.247411415
+693 781 -.141924486
+781 781 -.890000045
+993 781 .0239348169
+1206 781 -.344784945
+157 782 .274278283
+158 782 .274278283
+159 782 .274278283
+160 782 .274278283
+161 782 .274278283
+162 782 .274278283
+163 782 .274278283
+164 782 .00628145831
+165 782 -.269419193
+166 782 -.269419193
+167 782 .152858287
+168 782 .124701418
+169 782 .124701418
+170 782 .124701418
+171 782 .124701418
+172 782 .124701418
+173 782 .124701418
+174 782 .0282042436
+175 782 -1.19109857
+176 782 .0491015427
+177 782 .498320878
+178 782 .498320878
+179 782 .498320878
+180 782 .498320878
+181 782 .498320878
+182 782 .498320878
+183 782 .498320878
+184 782 .0114124306
+185 782 -.329651117
+186 782 -.329651117
+187 782 .246326566
+188 782 .200952619
+189 782 .0795380175
+190 782 .200952619
+191 782 .200952619
+192 782 .200952619
+193 782 .200952619
+194 782 .0454503
+195 782 -1.1148473
+196 782 .0791256726
+197 782 .350749463
+198 782 .350749463
+199 782 .350749463
+200 782 .350749463
+201 782 .350749463
+202 782 .0105146412
+203 782 .350749463
+204 782 .00575432694
+205 782 -.340347946
+206 782 .0341520645
+207 782 .285520315
+208 782 .285520315
+209 782 .285520315
+210 782 .285520315
+211 782 .285520315
+212 782 .00855922513
+213 782 .285520315
+214 782 .00468419027
+215 782 -.346699238
+216 782 .0278007798
+217 782 .237027302
+218 782 .237027302
+219 782 .237027302
+220 782 .237027302
+221 782 .237027302
+222 782 .237027302
+223 782 .237027302
+224 782 .00388862356
+225 782 -.351420939
+226 782 .0230790731
+227 782 .356762469
+228 782 .356762469
+229 782 .356762469
+230 782 .356762469
+231 782 .356762469
+232 782 .0106948968
+233 782 .356762469
+234 782 .00585297495
+235 782 -.339762479
+236 782 .034737546
+293 782 .00739657879
+347 782 -1.99260342
+401 782 -.996301711
+664 782 .0511565208
+665 782 .0672518909
+666 782 .0958291367
+667 782 .128103361
+668 782 .238389209
+669 782 .225222245
+670 782 .232392117
+671 782 .216449484
+672 782 -.0155363604
+695 782 -.275162965
+696 782 -.0660882518
+697 782 -.102282472
+705 782 -.153743282
+782 782 -.890000701
+1256 782 -.0475550555
+157 783 .931972265
+158 783 .931972265
+159 783 .931972265
+160 783 .931972265
+161 783 .931972265
+162 783 .931972265
+163 783 .931972265
+164 783 .0213438161
+165 783 -.9154616
+166 783 -.9154616
+167 783 .119320869
+168 783 .097341679
+169 783 .097341679
+170 783 .097341679
+171 783 .097341679
+172 783 .097341679
+173 783 .097341679
+174 783 .022016177
+175 783 .097341679
+176 783 -.479771465
+177 783 1.08861959
+178 783 1.08861959
+179 783 1.08861959
+180 783 1.08861959
+181 783 1.08861959
+182 783 1.08861959
+183 783 1.08861959
+184 783 .0249313172
+185 783 -.720147789
+186 783 -.720147789
+187 783 .10807126
+188 783 .0881642774
+189 783 .0348958485
+190 783 .0881642774
+191 783 .0881642774
+192 783 .0881642774
+193 783 .0881642774
+194 783 .0199404843
+195 783 .0881642774
+196 783 -.483385086
+197 783 .293100953
+198 783 .293100953
+199 783 .293100953
+200 783 .293100953
+201 783 .293100953
+202 783 .00878647529
+203 783 .293100953
+204 783 .00480855675
+205 783 .0285388995
+206 783 -.345961124
+207 783 .21754922
+208 783 .21754922
+209 783 .21754922
+210 783 .21754922
+211 783 .21754922
+212 783 .00652161241
+213 783 .21754922
+214 783 .00356906978
+215 783 .0211825129
+216 783 -.353317499
+217 783 .301923811
+218 783 .301923811
+219 783 .301923811
+220 783 .301923811
+221 783 .301923811
+222 783 .301923811
+223 783 .301923811
+224 783 .00495330291
+225 783 .0293979701
+226 783 -.345102042
+227 783 .559506476
+228 783 .559506476
+229 783 .559506476
+230 783 .559506476
+231 783 .559506476
+232 783 .0167726837
+233 783 .559506476
+234 783 .00917915348
+235 783 .0544784926
+236 783 -.32002154
+294 783 .695431352
+348 783 -1.30456865
+402 783 -.652284324
+664 783 .173825145
+665 783 .133323938
+666 783 .209345996
+667 783 .142736718
+668 783 .199208006
+669 783 .171605751
+670 783 .296019554
+671 783 .339455217
+675 783 -.233187303
+677 783 -.0143305426
+693 783 -.0365852825
+695 783 -.350010514
+698 783 -.368325382
+783 783 -.890000045
+1256 783 -.0476517156
+295 784 .000457370246
+349 784 -2.09954262
+403 784 -.999782205
+664 784 -.00107030303
+665 784 -.00193744991
+666 784 -.00154613005
+667 784 -.00259090355
+668 784 -.00322100311
+669 784 -.00345216854
+670 784 -.00405561412
+671 784 -.00355400215
+672 784 1
+696 784 -.0907458216
+699 784 -.116984315
+705 784 -.133410409
+784 784 -.870000362
+1256 784 -.029065825
+296 785 .900660157
+350 785 -1.19933975
+404 785 -.571114182
+673 785 1
+785 785 -.870074153
+1256 785 -.0214263145
+297 786 2.60216546
+351 786 -.297834694
+405 786 -.102701619
+674 786 1
+693 786 -.0192899816
+786 786 -.869974673
+998 786 .0376543887
+1211 786 -.00859325007
+1256 786 -.00232234877
+352 787 -2
+406 787 -1
+664 787 -.00597090367
+665 787 -.00310818246
+666 787 -.00380347972
+667 787 -.00139464473
+668 787 -.00142943056
+669 787 -.00111747673
+670 787 -.00124684128
+671 787 -.00038974901
+674 787 -.162022635
+675 787 1
+677 787 -.0391151533
+688 787 -.134885684
+691 787 -.117574222
+692 787 -.00208823266
+693 787 -.0133590531
+698 787 -.0353096239
+787 787 -.869999945
+999 787 .0484563895
+1212 787 -.403310657
+353 788 -1.39999998
+407 788 -1
+676 788 1
+710 788 -.0296438169
+788 788 -.869649827
+1000 788 .0107462266
+1213 788 -1.3855896
+1256 788 -.000952560804
+300 789 .124517657
+354 789 -1.07548237
+408 789 -.896235287
+675 789 -.0097569963
+677 789 1
+789 789 -.86999929
+1256 789 -.00712292502
+301 790 .74846828
+355 790 -1.25153172
+409 790 -.62576586
+678 790 1
+790 790 -.869565189
+1256 790 -.00548300613
+302 791 1.67540276
+356 791 -.32459721
+410 791 -.162298605
+664 791 -.000564882183
+665 791 -.000586885144
+666 791 -.000388702523
+667 791 -.000618933933
+668 791 -.000681592501
+669 791 -.000742134813
+670 791 -.000425319507
+671 791 -.000778341491
+679 791 1
+681 791 -.324237466
+696 791 -.00806020573
+705 791 -.00601767702
+709 791 -.0423720069
+711 791 -.0160365067
+791 791 -.870012224
+1003 791 .0407752655
+1216 791 -.111877225
+1256 791 -.00104968902
+303 792 .66875571
+357 792 -.531244338
+411 792 -.442703605
+680 792 1
+792 792 -.87057811
+1256 792 -.00133256649
+304 793 .627053499
+358 793 -1.3729465
+412 793 -.68647325
+681 793 1
+793 793 -.869837284
+1256 793 -.00091919367
+305 794 .18695505
+359 794 -1.01304495
+413 794 -.844204128
+682 794 1
+794 794 -.869710445
+1256 794 -.00245653326
+306 795 .721822023
+360 795 -1.27817798
+414 795 -.639088988
+683 795 1
+795 795 -.888888896
+1256 795 -.000483603566
+307 796 .105930507
+361 796 -.394069493
+415 796 -.788138986
+684 796 1
+796 796 -.870722413
+1256 796 -.00403519627
+362 797 -.5
+416 797 -1
+685 797 1
+719 797 -.00144721544
+797 797 -1
+1009 797 .269034058
+1222 797 -.230965927
+309 798 1.20000005
+686 798 1
+706 798 -.00698827021
+710 798 -.00422448991
+798 798 -.870033681
+1010 798 .0182339847
+1223 798 -.00698937196
+310 799 .151235133
+364 799 -.348764867
+418 799 -.697529733
+687 799 1
+716 799 -.295740426
+717 799 -.160700306
+718 799 -.132129028
+719 799 -.00511926599
+799 799 -.870001733
+1011 799 .0798794478
+1224 799 -.272300512
+1256 799 -.000932077994
+365 800 -.5
+419 800 -1
+688 800 1
+719 800 -.144242674
+800 800 -.869969308
+1012 800 .00420977129
+1225 800 -.125440717
+366 801 -.5
+420 801 -1
+689 801 1
+719 801 -.0319625288
+801 801 -.868035197
+367 802 -1.29999995
+421 802 -1
+690 802 1
+710 802 -.059168525
+712 802 -.00159905537
+802 802 -.869984448
+1014 802 .175757363
+1227 802 -1.11488414
+314 803 .200484842
+368 803 -1.6995151
+422 803 -.894481659
+691 803 1
+803 803 -.869993567
+1256 803 -.010565496
+315 804 1.61500096
+369 804 -.384999037
+423 804 -.192499518
+664 804 -.00103561732
+665 804 -.000833546976
+666 804 -.000743498676
+667 804 -.000800973328
+668 804 -.000752254389
+669 804 -.000740913558
+670 804 -.000775766151
+671 804 -.000733237015
+672 804 -.000431161869
+674 804 -.00400092453
+675 804 -.00904091913
+677 804 -.00443976279
+679 804 -.00415170519
+681 804 -.004059311
+683 804 -.340312958
+692 804 .996429563
+693 804 -.00320973806
+704 804 -.0038649186
+705 804 -.00395374373
+710 804 -.00508075068
+712 804 -.00120204582
+804 804 -.869974256
+1016 804 .271766007
+1229 804 -.129452288
+1256 804 -.00127621123
+316 805 2.23066592
+370 805 -1.16933417
+424 805 -.34392181
+664 805 -.0185023677
+665 805 -.0148349488
+666 805 -.0132394843
+667 805 -.0142659442
+668 805 -.0133985188
+669 805 -.0131980311
+670 805 -.0138182044
+671 805 -.0130513879
+672 805 -.0355932601
+674 805 -.00553016691
+677 805 -.112575263
+680 805 -.0402331427
+681 805 -.126768902
+682 805 -.0323383622
+687 805 -.0140888244
+688 805 -.103382498
+689 805 -.0601734221
+690 805 -.115011364
+691 805 -.0696121082
+692 805 -.000599461142
+693 805 .806271911
+694 805 -.101612881
+695 805 -.000495907036
+696 805 -.00217979099
+698 805 -.0363733061
+699 805 -.0240057558
+705 805 -.158976257
+707 805 -.00514993165
+708 805 -.0666899681
+709 805 -.0721485987
+710 805 -.0813447908
+711 805 -.00739258807
+712 805 -.0208374932
+805 805 -.869999588
+1017 805 .0235442594
+1230 805 -.0497347564
+1256 805 -.0255885925
+317 806 2.04773259
+371 806 -.352267474
+425 806 -.146778107
+664 806 -.0016104097
+665 806 -.00165919599
+666 806 -.0010991086
+667 806 -.00174943579
+668 806 -.00192774378
+669 806 -.0020985764
+670 806 -.00120108563
+671 806 -.00220202422
+672 806 -.00664001377
+689 806 -.0142286755
+694 806 .977918148
+705 806 -.0047013103
+709 806 -.000687512336
+710 806 -.00348676136
+806 806 -.870021164
+1018 806 .668438077
+1231 806 -.279838085
+1256 806 -.00013633183
+318 807 .233849168
+372 807 -.266150832
+426 807 -.532301664
+695 807 1
+807 807 -.870003164
+1256 807 -.00326989894
+319 808 .291807085
+373 808 -.808192909
+427 808 -.734720826
+672 808 -.0313579403
+696 808 .914857388
+697 808 -.121057041
+705 808 -.0346712917
+808 808 -.86999917
+1256 808 -.0172352381
+320 809 1.1448102
+374 809 -.8551898
+428 809 -.4275949
+673 809 -.0418436378
+678 809 -.093989566
+692 809 -.0650645867
+695 809 -.0493461937
+696 809 -.0462170057
+697 809 1
+705 809 -.0483944751
+809 809 -.869988739
+1021 809 .0537051633
+1234 809 -.0483965762
+1256 809 -.0148378145
+321 810 1.42642903
+375 810 -.573570967
+429 810 -.286785483
+677 810 -.257495135
+693 810 -.00634217123
+698 810 1
+705 810 -.0103614554
+810 810 -.870010138
+1256 810 -.000337993901
+322 811 .485329628
+376 811 -1.51467037
+430 811 -.757335186
+679 811 -.151637703
+680 811 -.172124177
+682 811 -.318911761
+688 811 -.0452379622
+689 811 -.0450736023
+690 811 -.0383139811
+691 811 -.215203106
+699 811 .794479668
+700 811 -.0168030038
+709 811 -.00520322053
+710 811 -.0845131576
+811 811 -.870004654
+1023 811 .290093929
+1236 811 -.660795033
+1256 811 -.0166454222
+377 812 -.5
+431 812 -1
+672 812 -.0036679036
+673 812 -.00106854446
+674 812 -.00841675978
+675 812 -.000881052285
+676 812 -.0108826561
+677 812 -.00309364079
+678 812 -.000581978704
+679 812 -.00125210162
+680 812 -.000627377944
+682 812 -.00190549623
+683 812 -.000455192028
+686 812 -.00403084466
+687 812 -.00501968898
+688 812 -.000367323461
+689 812 -.00659003854
+690 812 -.000365600252
+691 812 -.000989085878
+692 812 -.00427527772
+693 812 -8.09862686e-5
+694 812 -.00404128386
+695 812 -.00108135282
+696 812 -.000392236834
+697 812 -.00148049882
+698 812 -.00432409951
+699 812 -.00657516345
+700 812 .999062061
+701 812 -.00375950173
+702 812 -.00498532085
+704 812 -.00180093071
+705 812 -.00225895038
+706 812 -.00507186539
+707 812 -.0140905408
+708 812 -.0541200675
+812 812 -.870759308
+1024 812 .229801148
+1237 812 -.215929925
+1256 812 -.000727965671
+324 813 1.27107942
+378 813 -.128920481
+432 813 -.0920860618
+673 813 -.00201665331
+676 813 -.0892450362
+684 813 -.0487057231
+690 813 -.0849580914
+701 813 .797954261
+703 813 -.0126410509
+710 813 -.00752177229
+712 813 -.00120873482
+813 813 -.869994819
+1025 813 .169435248
+1238 813 -.162539542
+1256 813 -.000241443879
+379 814 -2
+433 814 -1
+697 814 -.0172134973
+702 814 .740929484
+814 814 -1
+1256 814 -3.88732915e-5
+326 815 .333289057
+380 815 -.966710925
+434 815 -.743623793
+684 815 -.0455313362
+703 815 .959176183
+705 815 -.0186752286
+815 815 -.870153308
+1027 815 .0195702668
+1240 815 -.0602502711
+1256 815 -.00437467685
+327 816 .0834249258
+381 816 -.416575074
+435 816 -.833150148
+680 816 -.181939617
+684 816 -.338542223
+685 816 -.396428585
+686 816 -.188573435
+687 816 -.1475683
+688 816 -.113464832
+689 816 -.0124057271
+690 816 -.0668724552
+691 816 -.0199355744
+692 816 -.00783251971
+704 816 .587145507
+705 816 -.0202887654
+708 816 -.117915452
+710 816 -.0741321817
+719 816 -.0236270837
+816 816 -.869995236
+1028 816 .074881725
+1241 816 -.190253779
+1256 816 -.00381938345
+328 817 1.04966235
+382 817 -.150337696
+436 817 -.125281408
+664 817 -.000797772198
+665 817 -.0008201811
+666 817 -.000543315487
+667 817 -.000864872884
+668 817 -.000953199051
+669 817 -.00103727891
+670 817 -.00059378345
+671 817 -.00108829024
+672 817 -.00266206125
+676 817 -.222796991
+677 817 -.124763541
+678 817 -.0823399574
+679 817 -.00243991031
+684 817 -.105415531
+687 817 -.00518004317
+688 817 -.00226804917
+689 817 -.00408953428
+690 817 -.0241064783
+692 817 -.026659552
+694 817 -.0364078544
+695 817 -.0137097631
+699 817 -.00121616852
+700 817 -.153590724
+701 817 -.181663513
+703 817 -.138760403
+704 817 -.000887066359
+705 817 1
+706 817 -.00337022962
+708 817 -.0013579115
+710 817 -.00168195146
+817 817 -.869997799
+1029 817 .0393045694
+1242 817 -.0444007665
+1256 817 -.000738454866
+383 818 -2
+437 818 -1
+706 818 1
+710 818 -.016186187
+712 818 -.000867007475
+818 818 -1
+384 819 -.5
+438 819 -1
+676 819 -.0530493185
+690 819 -.0138881821
+700 819 -.216033772
+703 819 -.295067728
+704 819 -.00787093677
+707 819 1
+819 819 -.870000243
+385 820 -2
+439 820 -1
+664 820 -.000852278376
+665 820 -.000845697825
+666 820 -.000570983102
+667 820 -.000896822661
+668 820 -.000994418398
+669 820 -.00107432459
+670 820 -.000658257341
+671 820 -.00112529914
+672 820 -.00347715081
+673 820 -.0017706576
+674 820 -.00227015419
+675 820 -.00308119413
+676 820 -.00378716434
+677 820 -.00386095257
+678 820 -.00271422835
+679 820 -.00212809048
+680 820 -.00244879792
+681 820 -.00264982809
+682 820 -.00220288569
+683 820 -.00182076811
+684 820 -.0040599457
+685 820 -.0026785715
+686 820 -.00363652292
+687 820 -.00255754474
+688 820 -.00232522679
+689 820 -.000887275673
+690 820 -.00217509014
+691 820 -.00253010611
+692 820 -.00258888165
+693 820 -.00219188794
+694 820 -.00327900588
+695 820 -.00266205659
+696 820 -.00219861837
+697 820 -.00194378418
+698 820 -.00234935014
+699 820 -.00308026723
+700 820 -.00349826226
+701 820 -.00359485205
+702 820 -.000941671722
+703 820 -.00455429452
+704 820 -.00334591372
+705 820 -.0023657456
+706 820 -.000396497606
+707 820 -.000296347367
+708 820 .997949481
+709 820 -.00171580527
+710 820 -.00345474901
+711 820 -.000188774109
+712 820 -.000146566963
+713 820 -.00498844869
+716 820 -.0203209203
+717 820 -.0141712539
+718 820 -.0203225799
+719 820 -.0031510531
+774 820 -.0199997593
+775 820 -.0200722013
+776 820 -.0200752821
+777 820 -.0199858136
+778 820 -.019998353
+780 820 -.0199913085
+781 820 -.0799998567
+782 820 -.08000388
+783 820 -.0799991563
+784 820 -.0200005732
+785 820 -.0198985543
+786 820 -.0200221576
+787 820 -.0199998934
+788 820 -.0194552504
+789 820 -.0200004857
+790 820 -.0217391308
+791 820 -.0199947488
+792 820 -.0198446941
+793 820 -.0200250316
+794 820 -.0200445447
+796 820 -.0190114081
+798 820 -.0199775547
+799 820 -.0199930165
+800 820 -.0200082418
+801 820 -.0205278583
+802 820 -.0199066866
+803 820 -.0200088024
+804 820 -.020014802
+805 820 -.0200016722
+806 820 -.0199828986
+807 820 -.0199990626
+808 820 -.0200033169
+809 820 -.0199887399
+810 820 -.019996183
+811 820 -.0199976135
+812 820 -.0193861071
+813 820 -.0199948251
+815 820 -.0198376905
+816 820 -.0200031791
+817 820 -.019998813
+819 820 -.0199998822
+820 820 -1
+1256 820 -.0510172583
+386 821 -2
+440 821 -1
+664 821 -.00846332218
+665 821 -.00840655249
+666 821 -.00567755196
+667 821 -.00892104488
+668 821 -.00988750812
+669 821 -.0106834034
+670 821 -.00654513668
+671 821 -.0111905383
+672 821 -.0345831774
+673 821 -.0176194515
+674 821 -.0225829966
+675 821 -.0306469649
+676 821 -.0376394801
+677 821 -.0384055004
+678 821 -.0269967895
+679 821 -.0211716071
+680 821 -.0243058372
+681 821 -.0264700912
+682 821 -.0219627712
+683 821 -.0178093892
+684 821 -.0403337888
+685 821 -.0260714293
+686 821 -.0358832814
+687 821 -.0254394505
+688 821 -.0231361799
+689 821 -.0087840287
+690 821 -.0216305777
+691 821 -.0251667406
+692 821 -.0257373042
+693 821 -.0218023974
+694 821 -.0326811634
+695 821 -.0264655948
+696 821 -.0218711272
+697 821 -.0193342511
+698 821 -.0233593863
+699 821 -.0306346249
+700 821 -.0348275639
+701 821 -.0357152671
+702 821 -.00947211031
+703 821 -.0453286879
+704 821 -.0332781151
+705 821 -.0235274453
+706 821 -.00391541375
+707 821 -.00294704316
+708 821 -.0203939229
+709 821 .982934237
+710 821 -.034358874
+711 821 -.0018774796
+712 821 -.00145681656
+713 821 -.0241378937
+716 821 -.0541237667
+717 821 -.0542966351
+718 821 -.0541290306
+719 821 -.00559503818
+774 821 -.110000402
+775 821 -.110036097
+776 821 -.109786704
+777 821 -.109984562
+778 821 -.109999612
+780 821 -.109952196
+781 821 -.0300000962
+782 821 -.0299953986
+783 821 -.0300008152
+784 821 -.109999068
+785 821 -.110027306
+786 821 -.110003166
+787 821 -.110000178
+788 821 -.110894933
+789 821 -.110000238
+790 821 -.108695649
+791 821 -.109992996
+792 821 -.109577231
+793 821 -.110137671
+794 821 -.110244997
+795 821 -.111111112
+796 821 -.110266164
+798 821 -.109988779
+799 821 -.110005237
+800 821 -.110022433
+801 821 -.111436948
+802 821 -.110108867
+803 821 -.10999763
+804 821 -.110010929
+805 821 -.10999874
+806 821 -.109995946
+807 821 -.109997772
+808 821 -.109997511
+809 821 -.110022523
+810 821 -.109993681
+811 821 -.10999772
+812 821 -.109854601
+813 821 -.110010341
+815 821 -.110009015
+816 821 -.110001586
+817 821 -.110003375
+819 821 -.109999895
+821 821 -1
+1256 821 -.222929522
+387 822 -2
+441 822 -1
+710 822 1
+716 822 -.499400884
+717 822 -.682299674
+718 822 -.0440645143
+719 822 -.662580967
+822 822 -1
+388 823 -2
+442 823 -1
+664 823 -.00269557815
+665 823 -.00216041761
+666 823 -.00192859373
+667 823 -.00207822118
+668 823 -.00195203384
+669 823 -.00192230428
+670 823 -.00201220834
+671 823 -.00190132763
+672 823 -.00223271595
+673 823 -.00171556475
+674 823 -.00235313643
+675 823 -.00236013927
+676 823 -.00483189942
+677 823 -.0033176248
+678 823 -.00235300022
+679 823 -.00310212583
+680 823 -.00315712788
+681 823 -.00301629351
+682 823 -.00283070817
+683 823 -.00267425319
+684 823 -.00369209819
+685 823 -.00339285703
+686 823 -.00332982815
+687 823 -.0791742802
+688 823 -.00281903427
+689 823 -.00280701765
+690 823 -.00322098448
+691 823 -.00275478722
+692 823 -.00266793137
+693 823 -.00239829789
+694 823 -.00296441489
+695 823 -.00202150992
+696 823 -.00189424248
+697 823 -.00169487623
+698 823 -.00236322428
+699 823 -.00280019036
+700 823 -.00446264818
+701 823 -.00454844814
+702 823 -.00182795106
+703 823 -.00443343259
+704 823 -.00345091801
+705 823 -.00198499765
+706 823 -.00360151986
+707 823 -.00271283323
+708 823 -.00325249461
+709 823 -.139743254
+710 823 -.0456520617
+711 823 1
+712 823 -.00268247048
+823 823 -1
+1256 823 -.094266355
+389 824 -2
+443 824 -1
+712 824 1
+824 824 -1
+1256 824 -.13993834
+390 825 -2
+444 825 -1
+713 825 1
+825 825 -1
+391 826 -2
+445 826 -1
+714 826 1
+826 826 -1
+392 827 -2
+446 827 -1
+707 827 -.260716975
+715 827 1
+827 827 -1
+339 828 .5
+672 828 -.00020044182
+720 828 1
+986 828 -8.81728047e-5
+1199 828 .499836445
+340 829 .5
+664 829 -.00108516833
+665 829 -.000930146081
+666 829 -.00100851082
+667 829 -.00146263081
+668 829 -.00209262152
+669 829 -.00213684351
+670 829 -.00250200182
+671 829 -.00227604178
+721 829 1
+987 829 -.0407628454
+1200 829 .454577208
+341 830 .5
+664 830 -.0014072503
+665 830 -.00120961515
+666 830 -.001311498
+667 830 -.00190175441
+668 830 -.00272121769
+669 830 -.0027788328
+670 830 -.00325281033
+671 830 -.00296070473
+722 830 1
+988 830 -.0651841611
+1201 830 .423311949
+288 831 -.471892297
+342 831 .0281077027
+396 831 -.943784595
+664 831 -.00577269914
+665 831 -.00495510874
+666 831 -.0053729373
+667 831 -.00779202906
+668 831 -.0111498525
+669 831 -.011384421
+670 831 -.0133284107
+671 831 -.0121284807
+673 831 -.0359768905
+723 831 1
+989 831 -.152490258
+1202 831 .0962755829
+1256 831 -.00189226482
+289 832 -1.67131233
+343 832 .328687668
+397 832 -.835656166
+673 832 -.197824225
+697 832 -.19558987
+724 832 1
+990 832 -.0437857695
+1203 832 .0176200569
+1256 832 -.021446025
+344 833 2
+725 833 1
+291 834 -1.46780527
+345 834 .532194734
+399 834 -.733902633
+678 834 -.0890327096
+726 834 1
+992 834 -.0426713973
+1205 834 .0706640184
+1256 834 -.0104394881
+346 835 .5
+674 835 -.153060555
+675 835 -.132922277
+693 835 -.0145159997
+727 835 1
+993 835 -.0239348169
+1206 835 .344784945
+293 836 -.00739663653
+347 836 1.99260342
+401 836 -.00369831827
+728 836 1
+1256 836 -.000176526592
+294 837 -.695431292
+348 837 1.30456877
+402 837 -.347715646
+729 837 1
+1256 837 -.0254018791
+295 838 -.000457389804
+349 838 2.09954262
+403 838 -.000217804685
+730 838 1
+1256 838 -6.33205173e-6
+296 839 -.900660157
+350 839 1.19933975
+404 839 -.428885818
+731 839 1
+1256 839 -.0160903763
+297 840 -2.60216546
+351 840 .297834665
+405 840 -.897298396
+693 840 -.000593723962
+732 840 1
+998 840 -.0376543887
+1211 840 .00859325007
+1256 840 -.0202902332
+352 841 2
+675 841 -.0465898141
+693 841 -.0271990262
+733 841 1
+999 841 -.0484563895
+1212 841 .403310657
+353 842 1.39999998
+710 842 -.000308300077
+734 842 1
+1000 842 -.0107462266
+1213 842 1.3855896
+300 843 -.124517642
+354 843 1.07548237
+408 843 -.103764698
+677 843 -.0348328426
+735 843 1
+1256 843 -.000824680901
+301 844 -.74846828
+355 844 1.25153172
+409 844 -.37423414
+736 844 1
+1256 844 -.00327906664
+302 845 -1.67540276
+356 845 .324597239
+410 845 -.83770138
+711 845 -.00289995759
+737 845 1
+1003 845 -.0407752655
+1216 845 .111877225
+1256 845 -.00541795138
+303 846 -.66875571
+357 846 .531244338
+411 846 -.557296395
+738 846 1
+1256 846 -.0016774981
+304 847 -.627053499
+358 847 1.3729465
+412 847 -.31352675
+739 847 1
+1256 847 -.000419815013
+305 848 -.18695505
+359 848 1.01304495
+413 848 -.155795872
+682 848 -.0462385714
+740 848 1
+1256 848 -.000453347369
+306 849 -.721822023
+360 849 1.27817798
+414 849 -.360911012
+741 849 1
+1256 849 -.000273104146
+307 850 -.105930492
+361 850 .394069523
+415 850 -.211860985
+684 850 -.0168937333
+719 850 -.00185398629
+742 850 1
+1256 850 -.00108470803
+362 851 .5
+719 851 -.00168574753
+743 851 1
+1009 851 -.269034058
+1222 851 .230965927
+309 852 -1.20000005
+417 852 -1
+686 852 -.355678231
+706 852 -.0318024121
+707 852 -.00837958045
+744 852 1
+1010 852 -.0182339847
+1223 852 .00698937196
+1256 852 -.00038452071
+310 853 -.151235133
+364 853 .348764867
+418 853 -.302470267
+716 853 -.0391993411
+717 853 -.0669464841
+718 853 -.291612893
+719 853 -.0106548648
+745 853 1
+1011 853 -.0798794478
+1224 853 .272300512
+1256 853 -.000404177612
+365 854 .5
+688 854 -.0250663608
+719 854 -.00126683037
+746 854 1
+1012 854 -.00420977129
+1225 854 .125440717
+366 855 .5
+689 855 -.00976003241
+747 855 1
+367 856 1.29999995
+710 856 -.00982436165
+748 856 1
+1014 856 -.175757363
+1227 856 1.11488414
+314 857 -.200484797
+368 857 1.6995151
+422 857 -.105518319
+691 857 -.00129680149
+749 857 1
+1256 857 -.00124636805
+315 858 -1.61500096
+369 858 .384999037
+423 858 -.807500482
+664 858 -.00111489906
+665 858 -.00157474761
+666 858 -.000336079829
+667 858 -.00146485993
+668 858 -.000746365869
+669 858 -.000504391151
+670 858 -.000586504175
+671 858 -.000521592912
+679 858 -.00688093295
+691 858 -.0104867527
+692 858 -.319255888
+710 858 -.0161293726
+750 858 1
+1016 858 -.271766007
+1229 858 .129452288
+1256 858 -.00535347452
+316 859 -2.23066592
+370 859 1.16933429
+424 859 -.65607816
+693 859 -.00718148332
+751 859 1
+1017 859 -.0235442594
+1230 859 .0497347564
+1256 859 -.0488137603
+317 860 -2.04773259
+371 860 .352267474
+425 860 -.853221893
+664 860 -.00272035366
+665 860 -.0017776665
+666 860 -.00238104025
+667 860 -.000815090723
+668 860 -.000942158105
+669 860 -.000670078967
+670 860 -.000760167604
+671 860 -.000211644132
+672 860 -.0566360168
+687 860 -.00150002027
+693 860 -.00491754897
+694 860 -.401309192
+710 860 -.00108380883
+716 860 -.000496166467
+752 860 1
+1018 860 -.668438077
+1231 860 .279838085
+1256 860 -.000792497536
+318 861 -.233849168
+372 861 .266150832
+426 861 -.467698336
+753 861 1
+1256 861 -.00287304446
+319 862 -.291807055
+373 862 .808192968
+427 862 -.265279144
+754 862 1
+1256 862 -.00622297544
+320 863 -1.1448102
+374 863 .8551898
+428 863 -.5724051
+673 863 -.0558000579
+697 863 -.0089808302
+755 863 1
+1021 863 -.0537051633
+1234 863 .0483965762
+1256 863 -.0198628195
+321 864 -1.42642903
+375 864 .573570967
+429 864 -.713214517
+698 864 -.0667206198
+756 864 1
+1256 864 -.000840566121
+322 865 -.485329628
+376 865 1.51467037
+430 865 -.242664814
+664 865 -.000896874291
+665 865 -.00132383301
+666 865 -.00132967182
+667 865 -.00128170592
+668 865 -.00133668678
+669 865 -.00133038755
+670 865 -.00132171414
+671 865 -.00125945604
+673 865 -.0233388562
+679 865 -.16298537
+691 865 -.0380810276
+692 865 -.0671791732
+693 865 -.00675262418
+699 865 -.205335543
+709 865 -.00106370752
+710 865 -.000619772589
+711 865 -.00166779512
+757 865 1
+1023 865 -.290093929
+1236 865 .660795033
+1256 865 -.00533351488
+377 866 .5
+672 866 -.00565960491
+673 866 -.00214861985
+674 866 -.0123228477
+675 866 -.00128709001
+676 866 -.00776296109
+677 866 -.00321339467
+678 866 -.000852899859
+679 866 -.00139193831
+680 866 -.000688091968
+682 866 -.00232404447
+683 866 -.000568990072
+686 866 -.00416228548
+687 866 -.0052571753
+688 866 -.000448758365
+689 866 -.00808227435
+690 866 -.000393367372
+691 866 -.00124063122
+692 866 -.00552031258
+693 866 -.00011648349
+694 866 -.00470676506
+695 866 -.00184243242
+696 866 -.000713348098
+697 866 -.00301135471
+698 866 -.00630347338
+699 866 -.00809392985
+700 866 -.000722343859
+701 866 -.00285392837
+702 866 -.0094167171
+704 866 -.00320973643
+705 866 -.00392820593
+706 866 -.00484057469
+707 866 -.0264233109
+708 866 -.0563934073
+758 866 1
+1024 866 -.229801148
+1237 866 .215929925
+324 867 -1.27107942
+378 867 .12892051
+432 867 -.907913923
+673 867 -.0293939412
+710 867 -.0122682666
+759 867 1
+1025 867 -.169435248
+1238 867 .162539542
+1256 867 -.00238049356
+379 868 2
+760 868 1
+326 869 -.333289027
+380 869 .966710985
+434 869 -.256376177
+703 869 -.0238482412
+761 869 1
+1027 869 -.0195702668
+1240 869 .0602502711
+1256 869 -.00150823966
+327 870 -.0834249184
+381 870 .416575074
+435 870 -.166849837
+687 870 -.141137898
+704 870 -.0621925406
+708 870 -.0179157741
+710 870 -.00931619946
+716 870 -.0797656104
+717 870 -.00982721709
+718 870 -.457741946
+719 870 -.0874001831
+762 870 1
+1028 870 -.074881725
+1241 870 .190253779
+1256 870 -.000764884287
+328 871 -1.04966235
+382 871 .150337681
+436 871 -.874718606
+664 871 -.00310189673
+665 871 -.00123634702
+666 871 -.00101881835
+667 871 -.0010465408
+668 871 -.00115193555
+669 871 -.00114678754
+670 871 -.000976467039
+671 871 -.00126755168
+672 871 -.0110509451
+679 871 -.0198407471
+705 871 -.0279315859
+708 871 -.00627332646
+716 871 -.010952903
+717 871 -.0117584094
+719 871 -.0194125865
+763 871 1
+1029 871 -.0393045694
+1242 871 .0444007665
+1256 871 -.00515591446
+383 872 2
+764 872 1
+384 873 .5
+765 873 1
+385 874 2
+766 874 1
+386 875 2
+767 875 1
+387 876 2
+768 876 1
+388 877 2
+769 877 1
+389 878 2
+770 878 1
+390 879 2
+713 879 -.970873654
+771 879 1
+391 880 2
+772 880 1
+392 881 2
+773 881 1
+1 882 .20696941
+2 882 .142192304
+3 882 .0991375148
+4 882 .121215612
+5 882 .107199252
+6 882 .0979955196
+7 882 .0866832137
+8 882 .113363981
+9 882 .514388204
+10 882 .399175406
+11 882 .279817909
+12 882 .377357244
+13 882 .0494234264
+14 882 .367496192
+15 882 .0883079469
+16 882 .273117274
+17 882 .162650615
+18 882 .108350277
+19 882 .178367764
+20 882 .1423015
+21 882 .199999988
+22 882 .157062501
+23 882 .346346229
+24 882 .168499485
+25 882 .143396467
+26 882 .221011877
+27 882 .131137341
+28 882 .203243569
+29 882 .262830466
+30 882 .220662773
+31 882 .205521852
+32 882 .304139465
+33 882 .250172228
+34 882 .327778846
+35 882 .386875868
+36 882 .28614679
+37 882 .349301606
+38 882 .204914153
+39 882 .16316402
+40 882 .144504279
+41 882 .119339019
+42 882 .449773788
+43 882 .122339576
+44 882 .175039321
+45 882 .313483953
+46 882 .235000014
+47 882 .118269891
+48 882 .00908070803
+49 882 .5
+50 882 .25
+53 882 -.29303059
+54 882 -.357807696
+55 882 -.400862485
+56 882 -.378784388
+57 882 -.392800748
+58 882 -.40200448
+59 882 -.413316786
+60 882 -.386636019
+61 882 -.185611829
+62 882 -.100824602
+63 882 -.220182091
+64 882 -.222642779
+65 882 -.450576574
+66 882 -.132503808
+67 882 -.411692053
+68 882 -.426882714
+69 882 -.337349385
+70 882 -.391649723
+71 882 -.42163226
+72 882 -.3576985
+73 882 -.5
+74 882 -.542937458
+75 882 -.253653795
+76 882 -.431500524
+77 882 -.356603533
+78 882 -.278988123
+79 882 -.368862659
+80 882 -.39675644
+81 882 -.237169534
+82 882 -.379337251
+83 882 -.294478148
+84 882 -.395860523
+85 882 -.44982776
+86 882 -.172221169
+87 882 -.213124171
+88 882 -.313853234
+89 882 -.150698408
+90 882 -.295085847
+91 882 -.33683598
+92 882 -.355495721
+93 882 -.380660981
+94 882 -.150226235
+95 882 -.377660424
+96 882 -.324960679
+97 882 -.186516061
+98 882 -.264999986
+99 882 -.381730109
+100 882 -.490919292
+102 882 -.25
+103 882 -.5
+104 882 -.5
+105 882 -.29303059
+106 882 -.357807696
+107 882 -.400862485
+108 882 -.378784388
+109 882 -.392800748
+110 882 -.40200448
+111 882 -.413316786
+112 882 -.386636019
+113 882 -.185611829
+114 882 -.100824602
+115 882 -.220182091
+116 882 -.222642779
+117 882 -.450576574
+118 882 -.132503808
+119 882 -.411692053
+120 882 -.426882714
+121 882 -.337349385
+122 882 -.391649723
+123 882 -.42163226
+124 882 -.3576985
+125 882 -.5
+126 882 -.542937458
+127 882 -.253653795
+128 882 -.431500524
+129 882 -.356603533
+130 882 -.278988123
+131 882 -.368862659
+132 882 -.39675644
+133 882 -.237169534
+134 882 -.379337251
+135 882 -.294478148
+136 882 -.395860523
+137 882 -.44982776
+138 882 -.172221169
+139 882 -.213124171
+140 882 -.313853234
+141 882 -.150698408
+142 882 -.295085847
+143 882 -.33683598
+144 882 -.355495721
+145 882 -.380660981
+146 882 -.150226235
+147 882 -.377660424
+148 882 -.324960679
+149 882 -.186516061
+150 882 -.264999986
+151 882 -.381730109
+152 882 -.490919292
+154 882 -.25
+155 882 -.5
+156 882 -.5
+664 882 -.484386444
+665 882 -.561829507
+666 882 -.671342134
+667 882 -.577910066
+668 882 -.565740168
+669 882 -.557189345
+670 882 -.678157926
+671 882 -.521830022
+672 882 -.0384170078
+673 882 -.0872260258
+674 882 -.206981167
+675 882 -.104951881
+676 882 -.513661385
+677 882 -.0955502614
+678 882 -.301023483
+679 882 -.379936486
+680 882 -.387598157
+681 882 -.401674479
+682 882 -.401453912
+683 882 -.455248922
+684 882 -.283446878
+685 882 -.443214297
+686 882 -.17109184
+687 882 -.412138194
+688 882 -.389837623
+689 882 -.461512387
+690 882 -.46407631
+691 882 -.327409387
+692 882 -.221497595
+693 882 -.314730257
+694 882 -.230232194
+695 882 -.156761721
+696 882 -.232789949
+697 882 -.127302766
+698 882 -.161466956
+699 882 -.207014278
+700 882 -.171532094
+701 882 -.333930194
+702 882 -.481194258
+703 882 -.308955878
+704 882 -.35572347
+705 882 -.0863323063
+706 882 -.710011542
+707 882 -.441480815
+708 882 -.243827671
+709 882 -.381600082
+710 882 -.38172999
+711 882 -.95232147
+714 882 -1
+715 882 -1
+1258 882 1
+1 883 -.175656468
+53 883 .324343532
+105 883 -.175656468
+664 883 -.290364295
+828 883 -2.5
+2 884 -.0748264492
+54 884 .425173551
+106 884 -.0748264492
+665 884 -.11749246
+829 884 -2.70000005
+3 885 -.0577010401
+55 885 .442298949
+107 885 -.0577010401
+666 885 -.0966344848
+830 885 -2.5
+4 886 -.0647369847
+56 886 .435263008
+108 886 -.0647369847
+667 886 -.0987690017
+831 886 -2.70000005
+5 887 -.0703428686
+57 887 .429657131
+109 887 -.0703428686
+668 887 -.101312913
+832 887 -2.9000001
+6 888 -.0771906078
+58 888 .422809392
+110 888 -.0771906078
+669 888 -.106988318
+833 888 -2.9000001
+7 889 -.0580480322
+59 889 .44195196
+111 889 -.0580480322
+670 889 -.0952434912
+834 889 -2.9000001
+8 890 -.086330615
+60 890 .413669378
+112 890 -.086330615
+671 890 -.116517611
+835 890 -2.9000001
+9 891 -.514388204
+61 891 .185611799
+113 891 -.514388204
+672 891 -.106465489
+836 891 -2.4000001
+10 892 -.399175406
+62 892 .100824594
+114 892 -.399175406
+673 892 -.345337152
+837 892 -1.5
+11 893 -.279817909
+63 893 .220182091
+115 893 -.279817909
+674 893 -.263041526
+838 893 -2.4000001
+12 894 -.377357244
+64 894 .222642779
+116 894 -.377357244
+675 894 -.177882954
+839 894 -1.89999998
+13 895 -.0494234152
+65 895 .450576574
+117 895 -.0494234152
+676 895 -.0563431382
+840 895 -1.60000002
+14 896 -.367496192
+66 896 .132503808
+118 896 -.367496192
+677 896 -.265006363
+841 896 -2.0999999
+15 897 -.0883079469
+67 897 .411692053
+119 897 -.0883079469
+678 897 -.0645695329
+842 897 -1.79999995
+16 898 -.273117244
+68 898 .426882774
+120 898 -.273117244
+679 898 -.243081301
+843 898 -1.79999995
+17 899 -.1626506
+69 899 .337349415
+121 899 -.1626506
+680 899 -.186877683
+844 899 -1.79999995
+18 900 -.108350284
+70 900 .391649723
+122 900 -.108350284
+681 900 -.111123636
+845 900 -1.79999995
+19 901 -.178367749
+71 901 .42163229
+123 901 -.178367749
+682 901 -.169831485
+846 901 -1.79999995
+20 902 -.1423015
+72 902 .3576985
+124 902 -.1423015
+683 902 -.181109533
+847 902 -1.79999995
+21 903 -.200000003
+73 903 .5
+125 903 -.200000003
+684 903 -.113378748
+848 903 -1.89999998
+22 904 -.157062501
+74 904 .542937458
+126 904 -.157062501
+685 904 -.128214285
+849 904 -1.89999998
+23 905 -.346346229
+75 905 .253653795
+127 905 -.346346229
+686 905 -.233613744
+850 905 -1.79999995
+24 906 -.1684995
+76 906 .431500524
+128 906 -.1684995
+687 906 -.160938576
+851 906 -2.20000005
+25 907 -.143396482
+77 907 .356603533
+129 907 -.143396482
+688 907 -.156760484
+852 907 -2.5999999
+26 908 -.221011877
+78 908 .278988123
+130 908 -.221011877
+689 908 -.36560598
+853 908 -2.5999999
+27 909 -.131137356
+79 909 .368862659
+131 909 -.131137356
+690 909 -.164987534
+854 909 -2.20000005
+28 910 -.203243569
+80 910 .39675644
+132 910 -.203243569
+691 910 -.167719662
+855 910 -1.5
+29 911 -.262830466
+81 911 .237169534
+133 911 -.262830466
+692 911 -.245462865
+856 911 -3
+30 912 -.220662788
+82 912 .379337251
+134 912 -.220662788
+693 912 -.183080494
+857 912 -1.89999998
+31 913 -.205521852
+83 913 .294478148
+135 913 -.205521852
+694 913 -.160683393
+858 913 -2.4000001
+32 914 -.304139495
+84 914 .395860523
+136 914 -.304139495
+695 914 -.120439976
+859 914 -2.4000001
+33 915 -.250172228
+85 915 .44982776
+137 915 -.250172228
+696 915 -.1294664
+860 915 -2.4000001
+34 916 -.327778846
+86 916 .172221154
+138 916 -.327778846
+697 916 -.242288172
+861 916 -2.4000001
+35 917 -.386875868
+87 917 .213124156
+139 917 -.386875868
+698 917 -.293104559
+862 917 -1.89999998
+36 918 -.28614679
+88 918 .313853234
+140 918 -.28614679
+699 918 -.188739419
+863 918 -2.20000005
+37 919 -.349301606
+89 919 .150698394
+141 919 -.349301606
+700 919 -.39759168
+864 919 -1.70000005
+38 920 -.204914153
+90 920 .295085847
+142 920 -.204914153
+701 920 -.231888533
+865 920 -2.20000005
+39 921 -.16316402
+91 921 .33683598
+143 921 -.16316402
+702 921 -.233091459
+866 921 -2.9000001
+40 922 -.144504279
+92 922 .355495721
+144 922 -.144504279
+703 922 -.12558645
+867 922 -2.20000005
+41 923 -.119339012
+93 923 .380660981
+145 923 -.119339012
+704 923 -.111520983
+868 923 -2.5999999
+42 924 -.449773788
+94 924 .150226235
+146 924 -.449773788
+705 924 -.258476883
+869 924 -1.79999995
+43 925 -.122339584
+95 925 .377660424
+147 925 -.122339584
+706 925 -.230001658
+870 925 -1.70000005
+44 926 -.175039321
+96 926 .324960679
+148 926 -.175039321
+707 926 -.237802625
+871 926 -1.70000005
+45 927 -.313483924
+97 927 .186516076
+149 927 -.313483924
+708 927 -.4098095
+872 927 -1.60000002
+46 928 -.234999999
+98 928 .265000015
+150 928 -.234999999
+709 928 -.338400066
+873 928 -1.60000002
+47 929 -.118269883
+99 929 .381730109
+151 929 -.118269883
+710 929 -.118269853
+874 929 -1.60000002
+48 930 -.00908071082
+100 930 .490919292
+152 930 -.00908071082
+711 930 -.0176154319
+875 930 -1.5
+49 931 -.5
+153 931 -.5
+712 931 -.96999979
+876 931 -1.5
+50 932 -.25
+102 932 .25
+154 932 -.25
+877 932 -1.60000002
+103 933 .5
+878 933 -1.70000005
+104 934 .5
+879 934 -1.70000005
+1 935 -.03131295
+53 935 -.03131295
+105 935 .468687057
+664 935 -.0517610461
+2 936 -.067365855
+54 936 -.067365855
+106 936 .432634145
+665 936 -.105777845
+3 937 -.0414364599
+55 937 -.0414364599
+107 937 .458563536
+666 937 -.0693954676
+4 938 -.0564786419
+56 938 -.0564786419
+108 938 .443521351
+667 938 -.0861692801
+5 939 -.0368563868
+57 939 -.0368563868
+109 939 .463143617
+668 939 -.0530832484
+6 940 -.0208049174
+58 940 -.0208049174
+110 940 .479195088
+669 940 -.0288361926
+7 941 -.0286351871
+59 941 -.0286351871
+111 941 .471364826
+670 941 -.0469837673
+8 942 -.0270333719
+60 942 -.0270333719
+112 942 .472966641
+671 942 -.0364860594
+113 943 .699999988
+114 944 .5
+115 945 .5
+116 946 .600000024
+117 947 .5
+118 948 .5
+119 949 .5
+120 950 .699999988
+121 951 .5
+122 952 .5
+123 953 .600000024
+124 954 .5
+125 955 .699999988
+126 956 .699999988
+127 957 .600000024
+128 958 .600000024
+129 959 .5
+130 960 .5
+131 961 .5
+132 962 .600000024
+133 963 .5
+134 964 .600000024
+135 965 .5
+136 966 .699999988
+137 967 .699999988
+138 968 .5
+139 969 .600000024
+140 970 .600000024
+141 971 .5
+142 972 .5
+143 973 .5
+144 974 .5
+145 975 .5
+146 976 .600000024
+147 977 .5
+148 978 .5
+149 979 .5
+150 980 .5
+151 981 .5
+152 982 .5
+153 983 .5
+154 984 .5
+155 985 .5
+156 986 .5
+393 987 1
+447 987 .21751985
+394 988 1
+448 988 .21751985
+395 989 1
+449 989 .21751985
+396 990 1
+447 990 .000778065412
+448 990 .000778065412
+449 990 .000778065412
+450 990 .218297914
+451 990 .000778065412
+452 990 .00194401259
+453 990 .000778065412
+454 990 .00230427063
+455 990 .00110379385
+456 990 .00124306313
+457 990 .000778065412
+458 990 .000778065412
+459 990 .00230427063
+460 990 .00164245476
+461 990 .00148016575
+462 990 .00194401259
+463 990 .000778065412
+464 990 .00148016575
+465 990 .00148016575
+466 990 .00148016575
+467 990 .00194401259
+468 990 .00194401259
+469 990 .00148016575
+470 990 .00194401259
+471 990 .00194401259
+472 990 .00148016575
+473 990 .00194401259
+474 990 .00194401259
+475 990 .00194401259
+476 990 .00148016575
+477 990 .00194401259
+478 990 .00230427063
+479 990 .00148016575
+480 990 .00124306313
+481 990 .000778065412
+482 990 .000778065412
+483 990 .00164245476
+484 990 .000962222868
+485 990 .000962222868
+486 990 .00148016575
+487 990 .000778065412
+488 990 .00194401259
+489 990 .00148016575
+490 990 .00194401259
+491 990 .00194401259
+492 990 .00194401259
+493 990 .00194401259
+494 990 .00194401259
+495 990 .00194401259
+496 990 .00194401259
+497 990 .00144678715
+498 990 .00194401259
+499 990 .00194401259
+500 990 .00194401259
+397 991 1
+447 991 .00995924044
+448 991 .00995924044
+449 991 .00995924044
+450 991 .00995924044
+451 991 .227479085
+452 991 .024883369
+453 991 .00995924044
+454 991 .029494673
+455 991 .0141285667
+456 991 .0159112141
+457 991 .00995924044
+458 991 .00995924044
+459 991 .029494673
+460 991 .0210234281
+461 991 .0189461298
+462 991 .024883369
+463 991 .00995924044
+464 991 .0189461298
+465 991 .0189461298
+466 991 .0189461298
+467 991 .024883369
+468 991 .024883369
+469 991 .0189461298
+470 991 .024883369
+471 991 .024883369
+472 991 .0189461298
+473 991 .024883369
+474 991 .024883369
+475 991 .024883369
+476 991 .0189461298
+477 991 .024883369
+478 991 .029494673
+479 991 .0189461298
+480 991 .0159112141
+481 991 .00995924044
+482 991 .00995924044
+483 991 .0210234281
+484 991 .0123164579
+485 991 .0123164579
+486 991 .0189461298
+487 991 .00995924044
+488 991 .024883369
+489 991 .0189461298
+490 991 .024883369
+491 991 .024883369
+492 991 .024883369
+493 991 .024883369
+494 991 .024883369
+495 991 .024883369
+496 991 .024883369
+497 991 .0185188837
+498 991 .024883369
+499 991 .024883369
+500 991 .024883369
+398 992 1
+452 992 .543477893
+399 993 1
+447 993 .00552011142
+448 993 .00552011142
+449 993 .00552011142
+450 993 .00552011142
+451 993 .00552011142
+452 993 .0137921125
+453 993 .223039955
+454 993 .0163480211
+455 993 .0078310445
+456 993 .00881911349
+457 993 .00552011142
+458 993 .00552011142
+459 993 .0163480211
+460 993 .0116526615
+461 993 .0105012767
+462 993 .0137921125
+463 993 .00552011142
+464 993 .0105012767
+465 993 .0105012767
+466 993 .0105012767
+467 993 .0137921125
+468 993 .0137921125
+469 993 .0105012767
+470 993 .0137921125
+471 993 .0137921125
+472 993 .0105012767
+473 993 .0137921125
+474 993 .0137921125
+475 993 .0137921125
+476 993 .0105012767
+477 993 .0137921125
+478 993 .0163480211
+479 993 .0105012767
+480 993 .00881911349
+481 993 .00552011142
+482 993 .00552011142
+483 993 .0116526615
+484 993 .00682664663
+485 993 .00682664663
+486 993 .0105012767
+487 993 .00552011142
+488 993 .0137921125
+489 993 .0105012767
+490 993 .0137921125
+491 993 .0137921125
+492 993 .0137921125
+493 993 .0137921125
+494 993 .0137921125
+495 993 .0137921125
+496 993 .0137921125
+497 993 .0102644665
+498 993 .0137921125
+499 993 .0137921125
+500 993 .0137921125
+400 994 1
+454 994 .644193411
+401 995 1
+447 995 .0163674168
+448 995 .0163674168
+449 995 .0163674168
+450 995 .0163674168
+451 995 .0163674168
+452 995 .0408943295
+453 995 .0163674168
+454 995 .048472736
+455 995 .331801593
+456 995 .0261491276
+457 995 .0163674168
+458 995 .0163674168
+459 995 .048472736
+460 995 .034550745
+461 995 .0311368294
+462 995 .0408943295
+463 995 .0163674168
+464 995 .0311368294
+465 995 .0311368294
+466 995 .0311368294
+467 995 .0408943295
+468 995 .0408943295
+469 995 .0311368294
+470 995 .0408943295
+471 995 .0408943295
+472 995 .0311368294
+473 995 .0408943295
+474 995 .0408943295
+475 995 .0408943295
+476 995 .0311368294
+477 995 .0408943295
+478 995 .048472736
+479 995 .0311368294
+480 995 .0261491276
+481 995 .0163674168
+482 995 .0163674168
+483 995 .034550745
+484 995 .0202413611
+485 995 .0202413611
+486 995 .0311368294
+487 995 .0163674168
+488 995 .0408943295
+489 995 .0311368294
+490 995 .0408943295
+491 995 .0408943295
+492 995 .0408943295
+493 995 .0408943295
+494 995 .0408943295
+495 995 .0408943295
+496 995 .0408943295
+497 995 .0304346774
+498 995 .0408943295
+499 995 .0408943295
+500 995 .0408943295
+402 996 1
+447 996 .0236398429
+448 996 .0236398429
+449 996 .0236398429
+450 996 .0236398429
+451 996 .0236398429
+452 996 .0590646379
+453 996 .0236398429
+454 996 .0700103045
+455 996 .0335364044
+456 996 .385284722
+457 996 .0236398429
+458 996 .0236398429
+459 996 .0700103045
+460 996 .049902454
+461 996 .0449716561
+462 996 .0590646379
+463 996 .0236398429
+464 996 .0449716561
+465 996 .0449716561
+466 996 .0449716561
+467 996 .0590646379
+468 996 .0590646379
+469 996 .0449716561
+470 996 .0590646379
+471 996 .0590646379
+472 996 .0449716561
+473 996 .0590646379
+474 996 .0590646379
+475 996 .0590646379
+476 996 .0449716561
+477 996 .0590646379
+478 996 .0700103045
+479 996 .0449716561
+480 996 .0377677977
+481 996 .0236398429
+482 996 .0236398429
+483 996 .049902454
+484 996 .0292350724
+485 996 .0292350724
+486 996 .0449716561
+487 996 .0236398429
+488 996 .0590646379
+489 996 .0449716561
+490 996 .0590646379
+491 996 .0590646379
+492 996 .0590646379
+493 996 .0590646379
+494 996 .0590646379
+495 996 .0590646379
+496 996 .0590646379
+497 996 .0439575128
+498 996 .0590646379
+499 996 .0590646379
+500 996 .0590646379
+403 997 1
+447 997 .0112819523
+448 997 .0112819523
+449 997 .0112819523
+450 997 .0112819523
+451 997 .0112819523
+452 997 .0281881951
+453 997 .0112819523
+454 997 .033411935
+455 997 .0160050169
+456 997 .0180244222
+457 997 .228801802
+458 997 .0112819523
+459 997 .033411935
+460 997 .0238156039
+461 997 .0214624126
+462 997 .0281881951
+463 997 .0112819523
+464 997 .0214624126
+465 997 .0214624126
+466 997 .0214624126
+467 997 .0281881951
+468 997 .0281881951
+469 997 .0214624126
+470 997 .0281881951
+471 997 .0281881951
+472 997 .0214624126
+473 997 .0281881951
+474 997 .0281881951
+475 997 .0281881951
+476 997 .0214624126
+477 997 .0281881951
+478 997 .033411935
+479 997 .0214624126
+480 997 .0180244222
+481 997 .0112819523
+482 997 .0112819523
+483 997 .0238156039
+484 997 .0139522376
+485 997 .0139522376
+486 997 .0214624126
+487 997 .0112819523
+488 997 .0281881951
+489 997 .0214624126
+490 997 .0281881951
+491 997 .0281881951
+492 997 .0281881951
+493 997 .0281881951
+494 997 .0281881951
+495 997 .0281881951
+496 997 .0281881951
+497 997 .0209784228
+498 997 .0281881951
+499 997 .0281881951
+500 997 .0281881951
+404 998 1
+447 998 .0145590007
+448 998 .0145590007
+449 998 .0145590007
+450 998 .0145590007
+451 998 .0145590007
+452 998 .0363759659
+453 998 .0145590007
+454 998 .0431170389
+455 998 .020653965
+456 998 .0232599434
+457 998 .0145590007
+458 998 .23207885
+459 998 .0431170389
+460 998 .030733278
+461 998 .0276965592
+462 998 .0363759659
+463 998 .0145590007
+464 998 .0276965592
+465 998 .0276965592
+466 998 .0276965592
+467 998 .0363759659
+468 998 .0363759659
+469 998 .0276965592
+470 998 .0363759659
+471 998 .0363759659
+472 998 .0276965592
+473 998 .0363759659
+474 998 .0363759659
+475 998 .0363759659
+476 998 .0276965592
+477 998 .0363759659
+478 998 .0431170389
+479 998 .0276965592
+480 998 .0232599434
+481 998 .0145590007
+482 998 .0145590007
+483 998 .030733278
+484 998 .0180049166
+485 998 .0180049166
+486 998 .0276965592
+487 998 .0145590007
+488 998 .0363759659
+489 998 .0276965592
+490 998 .0363759659
+491 998 .0363759659
+492 998 .0363759659
+493 998 .0363759659
+494 998 .0363759659
+495 998 .0363759659
+496 998 .0363759659
+497 998 .0270719863
+498 998 .0363759659
+499 998 .0363759659
+500 998 .0363759659
+405 999 1
+447 999 .00399022875
+448 999 .00399022875
+449 999 .00399022875
+450 999 .00399022875
+451 999 .00399022875
+452 999 .00996967033
+453 999 .00399022875
+454 999 .0118172178
+455 999 .00566069456
+456 999 .00637492212
+457 999 .00399022875
+458 999 .00399022875
+459 999 .656010628
+460 999 .00842316169
+461 999 .00759087969
+462 999 .00996967033
+463 999 .00399022875
+464 999 .00759087969
+465 999 .00759087969
+466 999 .00759087969
+467 999 .00996967033
+468 999 .00996967033
+469 999 .00759087969
+470 999 .00996967033
+471 999 .00996967033
+472 999 .00759087969
+473 999 .00996967033
+474 999 .00996967033
+475 999 .00996967033
+476 999 .00759087969
+477 999 .00996967033
+478 999 .0118172178
+479 999 .00759087969
+480 999 .00637492212
+481 999 .00399022875
+482 999 .00399022875
+483 999 .00842316169
+484 999 .00493466202
+485 999 .00493466202
+486 999 .00759087969
+487 999 .00399022875
+488 999 .00996967033
+489 999 .00759087969
+490 999 .00996967033
+491 999 .00996967033
+492 999 .00996967033
+493 999 .00996967033
+494 999 .00996967033
+495 999 .00996967033
+496 999 .00996967033
+497 999 .00741970027
+498 999 .00996967033
+499 999 .00996967033
+500 999 .00996967033
+406 1000 1
+460 1000 .459172845
+407 1001 1
+447 1001 .000276930194
+448 1001 .000276930194
+449 1001 .000276930194
+450 1001 .000276930194
+451 1001 .000276930194
+452 1001 .000691915862
+453 1001 .000276930194
+454 1001 .000820139423
+455 1001 .000392863934
+456 1001 .00044243288
+457 1001 .000276930194
+458 1001 .000276930194
+459 1001 .000820139423
+460 1001 .000584584952
+461 1001 .41432938
+462 1001 .000691915862
+463 1001 .000276930194
+464 1001 .000526822812
+465 1001 .000526822812
+466 1001 .000526822812
+467 1001 .000691915862
+468 1001 .000691915862
+469 1001 .000526822812
+470 1001 .000691915862
+471 1001 .000691915862
+472 1001 .000526822812
+473 1001 .000691915862
+474 1001 .000691915862
+475 1001 .000691915862
+476 1001 .000526822812
+477 1001 .000691915862
+478 1001 .000820139423
+479 1001 .000526822812
+480 1001 .00044243288
+481 1001 .000276930194
+482 1001 .000276930194
+483 1001 .000584584952
+484 1001 .000342475803
+485 1001 .000342475803
+486 1001 .000526822812
+487 1001 .000276930194
+488 1001 .000691915862
+489 1001 .000526822812
+490 1001 .000691915862
+491 1001 .000691915862
+492 1001 .000691915862
+493 1001 .000691915862
+494 1001 .000691915862
+495 1001 .000691915862
+496 1001 .000691915862
+497 1001 .000514942629
+498 1001 .000691915862
+499 1001 .000691915862
+500 1001 .000691915862
+408 1002 1
+447 1002 .00179941696
+448 1002 .00179941696
+449 1002 .00179941696
+450 1002 .00179941696
+451 1002 .00179941696
+452 1002 .00449588103
+453 1002 .00179941696
+454 1002 .00532904267
+455 1002 .00255272305
+456 1002 .00287480839
+457 1002 .00179941696
+458 1002 .00179941696
+459 1002 .00532904267
+460 1002 .00379847386
+461 1002 .00342315156
+462 1002 .547973752
+463 1002 .00179941696
+464 1002 .00342315156
+465 1002 .00342315156
+466 1002 .00342315156
+467 1002 .00449588103
+468 1002 .00449588103
+469 1002 .00342315156
+470 1002 .00449588103
+471 1002 .00449588103
+472 1002 .00342315156
+473 1002 .00449588103
+474 1002 .00449588103
+475 1002 .00449588103
+476 1002 .00342315156
+477 1002 .00449588103
+478 1002 .00532904267
+479 1002 .00342315156
+480 1002 .00287480839
+481 1002 .00179941696
+482 1002 .00179941696
+483 1002 .00379847386
+484 1002 .00222531473
+485 1002 .00222531473
+486 1002 .00342315156
+487 1002 .00179941696
+488 1002 .00449588103
+489 1002 .00342315156
+490 1002 .00449588103
+491 1002 .00449588103
+492 1002 .00449588103
+493 1002 .00449588103
+494 1002 .00449588103
+495 1002 .00449588103
+496 1002 .00449588103
+497 1002 .00334595726
+498 1002 .00449588103
+499 1002 .00449588103
+500 1002 .00449588103
+409 1003 1
+447 1003 .00340027385
+448 1003 .00340027385
+449 1003 .00340027385
+450 1003 .00340027385
+451 1003 .00340027385
+452 1003 .00849565491
+453 1003 .00340027385
+454 1003 .0100700418
+455 1003 .00482376106
+456 1003 .00543239061
+457 1003 .00340027385
+458 1003 .00340027385
+459 1003 .0100700418
+460 1003 .00717779715
+461 1003 .00646856846
+462 1003 .00849565491
+463 1003 .220920131
+464 1003 .00646856846
+465 1003 .00646856846
+466 1003 .00646856846
+467 1003 .00849565491
+468 1003 .00849565491
+469 1003 .00646856846
+470 1003 .00849565491
+471 1003 .00849565491
+472 1003 .00646856846
+473 1003 .00849565491
+474 1003 .00849565491
+475 1003 .00849565491
+476 1003 .00646856846
+477 1003 .00849565491
+478 1003 .0100700418
+479 1003 .00646856846
+480 1003 .00543239061
+481 1003 .00340027385
+482 1003 .00340027385
+483 1003 .00717779715
+484 1003 .0042050723
+485 1003 .0042050723
+486 1003 .00646856846
+487 1003 .00340027385
+488 1003 .00849565491
+489 1003 .00646856846
+490 1003 .00849565491
+491 1003 .00849565491
+492 1003 .00849565491
+493 1003 .00849565491
+494 1003 .00849565491
+495 1003 .00849565491
+496 1003 .00849565491
+497 1003 .00632269774
+498 1003 .00849565491
+499 1003 .00849565491
+500 1003 .00849565491
+410 1004 1
+447 1004 .00188028405
+448 1004 .00188028405
+449 1004 .00188028405
+450 1004 .00188028405
+451 1004 .00188028405
+452 1004 .00469792867
+453 1004 .00188028405
+454 1004 .00556853367
+455 1004 .00266744406
+456 1004 .00300400425
+457 1004 .00188028405
+458 1004 .00188028405
+459 1004 .00556853367
+460 1004 .00396917993
+461 1004 .00357698998
+462 1004 .00469792867
+463 1004 .00188028405
+464 1004 .417379558
+465 1004 .00357698998
+466 1004 .00357698998
+467 1004 .00469792867
+468 1004 .00469792867
+469 1004 .00357698998
+470 1004 .00469792867
+471 1004 .00469792867
+472 1004 .00357698998
+473 1004 .00469792867
+474 1004 .00469792867
+475 1004 .00469792867
+476 1004 .00357698998
+477 1004 .00469792867
+478 1004 .00556853367
+479 1004 .00357698998
+480 1004 .00300400425
+481 1004 .00188028405
+482 1004 .00188028405
+483 1004 .00396917993
+484 1004 .00232532178
+485 1004 .00232532178
+486 1004 .00357698998
+487 1004 .00188028405
+488 1004 .00469792867
+489 1004 .00357698998
+490 1004 .00469792867
+491 1004 .00469792867
+492 1004 .00469792867
+493 1004 .00469792867
+494 1004 .00469792867
+495 1004 .00469792867
+496 1004 .00469792867
+497 1004 .00349632674
+498 1004 .00469792867
+499 1004 .00469792867
+500 1004 .00469792867
+411 1005 1
+447 1005 .000875091413
+448 1005 .000875091413
+449 1005 .000875091413
+450 1005 .000875091413
+451 1005 .000875091413
+452 1005 .00218643411
+453 1005 .000875091413
+454 1005 .00259161694
+455 1005 .00124143891
+456 1005 .00139807514
+457 1005 .000875091413
+458 1005 .000875091413
+459 1005 .00259161694
+460 1005 .00184727146
+461 1005 .00166474504
+462 1005 .00218643411
+463 1005 .000875091413
+464 1005 .00166474504
+465 1005 .415467322
+466 1005 .00166474504
+467 1005 .00218643411
+468 1005 .00218643411
+469 1005 .00166474504
+470 1005 .00218643411
+471 1005 .00218643411
+472 1005 .00166474504
+473 1005 .00218643411
+474 1005 .00218643411
+475 1005 .00218643411
+476 1005 .00166474504
+477 1005 .00218643411
+478 1005 .00259161694
+479 1005 .00166474504
+480 1005 .00139807514
+481 1005 .000875091413
+482 1005 .000875091413
+483 1005 .00184727146
+484 1005 .00108221371
+485 1005 .00108221371
+486 1005 .00166474504
+487 1005 .000875091413
+488 1005 .00218643411
+489 1005 .00166474504
+490 1005 .00218643411
+491 1005 .00218643411
+492 1005 .00218643411
+493 1005 .00218643411
+494 1005 .00218643411
+495 1005 .00218643411
+496 1005 .00218643411
+497 1005 .00162720389
+498 1005 .00218643411
+499 1005 .00218643411
+500 1005 .00218643411
+412 1006 1
+447 1006 .000389279012
+448 1006 .000389279012
+449 1006 .000389279012
+450 1006 .000389279012
+451 1006 .000389279012
+452 1006 .000972621783
+453 1006 .000389279012
+454 1006 .00115286477
+455 1006 .000552246405
+456 1006 .000621925166
+457 1006 .000389279012
+458 1006 .000389279012
+459 1006 .00115286477
+460 1006 .000821747351
+461 1006 .000740551506
+462 1006 .000972621783
+463 1006 .000389279012
+464 1006 .000740551506
+465 1006 .000740551506
+466 1006 .414543122
+467 1006 .000972621783
+468 1006 .000972621783
+469 1006 .000740551506
+470 1006 .000972621783
+471 1006 .000972621783
+472 1006 .000740551506
+473 1006 .000972621783
+474 1006 .000972621783
+475 1006 .000972621783
+476 1006 .000740551506
+477 1006 .000972621783
+478 1006 .00115286477
+479 1006 .000740551506
+480 1006 .000621925166
+481 1006 .000389279012
+482 1006 .000389279012
+483 1006 .000821747351
+484 1006 .000481416093
+485 1006 .000481416093
+486 1006 .000740551506
+487 1006 .000389279012
+488 1006 .000972621783
+489 1006 .000740551506
+490 1006 .000972621783
+491 1006 .000972621783
+492 1006 .000972621783
+493 1006 .000972621783
+494 1006 .000972621783
+495 1006 .000972621783
+496 1006 .000972621783
+497 1006 .00072385167
+498 1006 .000972621783
+499 1006 .000972621783
+500 1006 .000972621783
+413 1007 1
+447 1007 .000658825913
+448 1007 .000658825913
+449 1007 .000658825913
+450 1007 .000658825913
+451 1007 .000658825913
+452 1007 .0016460903
+453 1007 .000658825913
+454 1007 .00195113849
+455 1007 .000934636162
+456 1007 .00105256215
+457 1007 .000658825913
+458 1007 .000658825913
+459 1007 .00195113849
+460 1007 .00139074656
+461 1007 .00125332864
+462 1007 .0016460903
+463 1007 .000658825913
+464 1007 .00125332864
+465 1007 .00125332864
+466 1007 .00125332864
+467 1007 .545123994
+468 1007 .0016460903
+469 1007 .00125332864
+470 1007 .0016460903
+471 1007 .0016460903
+472 1007 .00125332864
+473 1007 .0016460903
+474 1007 .0016460903
+475 1007 .0016460903
+476 1007 .00125332864
+477 1007 .0016460903
+478 1007 .00195113849
+479 1007 .00125332864
+480 1007 .00105256215
+481 1007 .000658825913
+482 1007 .000658825913
+483 1007 .00139074656
+484 1007 .000814761093
+485 1007 .000814761093
+486 1007 .00125332864
+487 1007 .000658825913
+488 1007 .0016460903
+489 1007 .00125332864
+490 1007 .0016460903
+491 1007 .0016460903
+492 1007 .0016460903
+493 1007 .0016460903
+494 1007 .0016460903
+495 1007 .0016460903
+496 1007 .0016460903
+497 1007 .00122506532
+498 1007 .0016460903
+499 1007 .0016460903
+500 1007 .0016460903
+414 1008 1
+447 1008 .000171326144
+448 1008 .000171326144
+449 1008 .000171326144
+450 1008 .000171326144
+451 1008 .000171326144
+452 1008 .000428061961
+453 1008 .000171326144
+454 1008 .000507389021
+455 1008 .000243049974
+456 1008 .000273716345
+457 1008 .000171326144
+458 1008 .000171326144
+459 1008 .000507389021
+460 1008 .000361660408
+461 1008 .000325925212
+462 1008 .000428061961
+463 1008 .000171326144
+464 1008 .000325925212
+465 1008 .000325925212
+466 1008 .000325925212
+467 1008 .000428061961
+468 1008 .543905973
+469 1008 .000325925212
+470 1008 .000428061961
+471 1008 .000428061961
+472 1008 .000325925212
+473 1008 .000428061961
+474 1008 .000428061961
+475 1008 .000428061961
+476 1008 .000325925212
+477 1008 .000428061961
+478 1008 .000507389021
+479 1008 .000325925212
+480 1008 .000273716345
+481 1008 .000171326144
+482 1008 .000171326144
+483 1008 .000361660408
+484 1008 .00021187673
+485 1008 .00021187673
+486 1008 .000325925212
+487 1008 .000171326144
+488 1008 .000428061961
+489 1008 .000325925212
+490 1008 .000428061961
+491 1008 .000428061961
+492 1008 .000428061961
+493 1008 .000428061961
+494 1008 .000428061961
+495 1008 .000428061961
+496 1008 .000428061961
+497 1008 .000318575389
+498 1008 .000428061961
+499 1008 .000428061961
+500 1008 .000428061961
+415 1009 1
+447 1009 .00148846791
+448 1009 .00148846791
+449 1009 .00148846791
+450 1009 .00148846791
+451 1009 .00148846791
+452 1009 .00371896802
+453 1009 .00148846791
+454 1009 .0044081551
+455 1009 .00211159862
+456 1009 .00237802579
+457 1009 .00148846791
+458 1009 .00148846791
+459 1009 .0044081551
+460 1009 .00314207654
+461 1009 .00283161202
+462 1009 .00371896802
+463 1009 .00148846791
+464 1009 .00283161202
+465 1009 .00283161202
+466 1009 .00283161202
+467 1009 .00371896802
+468 1009 .00371896802
+469 1009 .416634172
+470 1009 .00371896802
+471 1009 .00371896802
+472 1009 .00283161202
+473 1009 .00371896802
+474 1009 .00371896802
+475 1009 .00371896802
+476 1009 .00283161202
+477 1009 .00371896802
+478 1009 .0044081551
+479 1009 .00283161202
+480 1009 .00237802579
+481 1009 .00148846791
+482 1009 .00148846791
+483 1009 .00314207654
+484 1009 .00184076803
+485 1009 .00184076803
+486 1009 .00283161202
+487 1009 .00148846791
+488 1009 .00371896802
+489 1009 .00283161202
+490 1009 .00371896802
+491 1009 .00371896802
+492 1009 .00371896802
+493 1009 .00371896802
+494 1009 .00371896802
+495 1009 .00371896802
+496 1009 .00371896802
+497 1009 .00276775728
+498 1009 .00371896802
+499 1009 .00371896802
+500 1009 .00371896802
+416 1010 1
+470 1010 .543477893
+417 1011 1
+447 1011 8.7059314e-5
+448 1011 8.7059314e-5
+449 1011 8.7059314e-5
+450 1011 8.7059314e-5
+451 1011 8.7059314e-5
+452 1011 .000217519526
+453 1011 8.7059314e-5
+454 1011 .000257829524
+455 1011 .000123505743
+456 1011 .00013908885
+457 1011 8.7059314e-5
+458 1011 8.7059314e-5
+459 1011 .000257829524
+460 1011 .000183777607
+461 1011 .000165618767
+462 1011 .000217519526
+463 1011 8.7059314e-5
+464 1011 .000165618767
+465 1011 .000165618767
+466 1011 .000165618767
+467 1011 .000217519526
+468 1011 .000217519526
+469 1011 .000165618767
+470 1011 .000217519526
+471 1011 .54369539
+472 1011 .000165618767
+473 1011 .000217519526
+474 1011 .000217519526
+475 1011 .000217519526
+476 1011 .000165618767
+477 1011 .000217519526
+478 1011 .000257829524
+479 1011 .000165618767
+480 1011 .00013908885
+481 1011 8.7059314e-5
+482 1011 8.7059314e-5
+483 1011 .000183777607
+484 1011 .000107665073
+485 1011 .000107665073
+486 1011 .000165618767
+487 1011 8.7059314e-5
+488 1011 .000217519526
+489 1011 .000165618767
+490 1011 .000217519526
+491 1011 .000217519526
+492 1011 .000217519526
+493 1011 .000217519526
+494 1011 .000217519526
+495 1011 .000217519526
+496 1011 .000217519526
+497 1011 .00016188396
+498 1011 .000217519526
+499 1011 .000217519526
+500 1011 .000217519526
+418 1012 1
+447 1012 .000388478627
+448 1012 .000388478627
+449 1012 .000388478627
+450 1012 .000388478627
+451 1012 .000388478627
+452 1012 .000970622001
+453 1012 .000388478627
+454 1012 .00115049444
+455 1012 .000551110948
+456 1012 .000620646402
+457 1012 .000388478627
+458 1012 .000388478627
+459 1012 .00115049444
+460 1012 .000820057758
+461 1012 .00073902891
+462 1012 .000970622001
+463 1012 .000388478627
+464 1012 .00073902891
+465 1012 .00073902891
+466 1012 .00073902891
+467 1012 .000970622001
+468 1012 .000970622001
+469 1012 .00073902891
+470 1012 .000970622001
+471 1012 .000970622001
+472 1012 .414541602
+473 1012 .000970622001
+474 1012 .000970622001
+475 1012 .000970622001
+476 1012 .00073902891
+477 1012 .000970622001
+478 1012 .00115049444
+479 1012 .00073902891
+480 1012 .000620646402
+481 1012 .000388478627
+482 1012 .000388478627
+483 1012 .000820057758
+484 1012 .000480426272
+485 1012 .000480426272
+486 1012 .00073902891
+487 1012 .000388478627
+488 1012 .000970622001
+489 1012 .00073902891
+490 1012 .000970622001
+491 1012 .000970622001
+492 1012 .000970622001
+493 1012 .000970622001
+494 1012 .000970622001
+495 1012 .000970622001
+496 1012 .000970622001
+497 1012 .000722363358
+498 1012 .000970622001
+499 1012 .000970622001
+500 1012 .000970622001
+419 1013 1
+473 1013 .543477893
+420 1014 1
+474 1014 .543477893
+421 1015 1
+475 1015 .543477893
+422 1016 1
+447 1016 .00343396631
+448 1016 .00343396631
+449 1016 .00343396631
+450 1016 .00343396631
+451 1016 .00343396631
+452 1016 .00857983623
+453 1016 .00343396631
+454 1016 .0101698246
+455 1016 .0048715584
+456 1016 .00548621872
+457 1016 .00343396631
+458 1016 .00343396631
+459 1016 .0101698246
+460 1016 .00724892039
+461 1016 .00653266395
+462 1016 .00857983623
+463 1016 .00343396631
+464 1016 .00653266395
+465 1016 .00653266395
+466 1016 .00653266395
+467 1016 .00857983623
+468 1016 .00857983623
+469 1016 .00653266395
+470 1016 .00857983623
+471 1016 .00857983623
+472 1016 .00653266395
+473 1016 .00857983623
+474 1016 .00857983623
+475 1016 .00857983623
+476 1016 .420335233
+477 1016 .00857983623
+478 1016 .0101698246
+479 1016 .00653266395
+480 1016 .00548621872
+481 1016 .00343396631
+482 1016 .00343396631
+483 1016 .00724892039
+484 1016 .00424673967
+485 1016 .00424673967
+486 1016 .00653266395
+487 1016 .00343396631
+488 1016 .00857983623
+489 1016 .00653266395
+490 1016 .00857983623
+491 1016 .00857983623
+492 1016 .00857983623
+493 1016 .00857983623
+494 1016 .00857983623
+495 1016 .00857983623
+496 1016 .00857983623
+497 1016 .00638534827
+498 1016 .00857983623
+499 1016 .00857983623
+500 1016 .00857983623
+423 1017 1
+447 1017 .0015010268
+448 1017 .0015010268
+449 1017 .0015010268
+450 1017 .0015010268
+451 1017 .0015010268
+452 1017 .0037503466
+453 1017 .0015010268
+454 1017 .00444534887
+455 1017 .00212941528
+456 1017 .0023980902
+457 1017 .0015010268
+458 1017 .0015010268
+459 1017 .00444534887
+460 1017 .00316858804
+461 1017 .0028555037
+462 1017 .0037503466
+463 1017 .0015010268
+464 1017 .0028555037
+465 1017 .0028555037
+466 1017 .0028555037
+467 1017 .0037503466
+468 1017 .0037503466
+469 1017 .0028555037
+470 1017 .0037503466
+471 1017 .0037503466
+472 1017 .0028555037
+473 1017 .0037503466
+474 1017 .0037503466
+475 1017 .0037503466
+476 1017 .0028555037
+477 1017 .547228217
+478 1017 .00444534887
+479 1017 .0028555037
+480 1017 .0023980902
+481 1017 .0015010268
+482 1017 .0015010268
+483 1017 .00316858804
+484 1017 .00185629935
+485 1017 .00185629935
+486 1017 .0028555037
+487 1017 .0015010268
+488 1017 .0037503466
+489 1017 .0028555037
+490 1017 .0037503466
+491 1017 .0037503466
+492 1017 .0037503466
+493 1017 .0037503466
+494 1017 .0037503466
+495 1017 .0037503466
+496 1017 .0037503466
+497 1017 .00279111019
+498 1017 .0037503466
+499 1017 .0037503466
+500 1017 .0037503466
+424 1018 1
+447 1018 .0131290816
+448 1018 .0131290816
+449 1018 .0131290816
+450 1018 .0131290816
+451 1018 .0131290816
+452 1018 .0328032821
+453 1018 .0131290816
+454 1018 .0388822779
+455 1018 .0186254252
+456 1018 .0209754556
+457 1018 .0131290816
+458 1018 .0131290816
+459 1018 .0388822779
+460 1018 .0277147945
+461 1018 .024976328
+462 1018 .0328032821
+463 1018 .0131290816
+464 1018 .024976328
+465 1018 .024976328
+466 1018 .024976328
+467 1018 .0328032821
+468 1018 .0328032821
+469 1018 .024976328
+470 1018 .0328032821
+471 1018 .0328032821
+472 1018 .024976328
+473 1018 .0328032821
+474 1018 .0328032821
+475 1018 .0328032821
+476 1018 .024976328
+477 1018 .0328032821
+478 1018 .683075666
+479 1018 .024976328
+480 1018 .0209754556
+481 1018 .0131290816
+482 1018 .0131290816
+483 1018 .0277147945
+484 1018 .0162365567
+485 1018 .0162365567
+486 1018 .024976328
+487 1018 .0131290816
+488 1018 .0328032821
+489 1018 .024976328
+490 1018 .0328032821
+491 1018 .0328032821
+492 1018 .0328032821
+493 1018 .0328032821
+494 1018 .0328032821
+495 1018 .0328032821
+496 1018 .0328032821
+497 1018 .0244130976
+498 1018 .0328032821
+499 1018 .0328032821
+500 1018 .0328032821
+425 1019 1
+447 1019 .000270030956
+448 1019 .000270030956
+449 1019 .000270030956
+450 1019 .000270030956
+451 1019 .000270030956
+452 1019 .000674678013
+453 1019 .000270030956
+454 1019 .000799707079
+455 1019 .000383076462
+456 1019 .000431410444
+457 1019 .000270030956
+458 1019 .000270030956
+459 1019 .000799707079
+460 1019 .000570021046
+461 1019 .000513697974
+462 1019 .000674678013
+463 1019 .000270030956
+464 1019 .000513697974
+465 1019 .000513697974
+466 1019 .000513697974
+467 1019 .000674678013
+468 1019 .000674678013
+469 1019 .000513697974
+470 1019 .000674678013
+471 1019 .000674678013
+472 1019 .000513697974
+473 1019 .000674678013
+474 1019 .000674678013
+475 1019 .000674678013
+476 1019 .000513697974
+477 1019 .000674678013
+478 1019 .000799707079
+479 1019 .414316267
+480 1019 .000431410444
+481 1019 .000270030956
+482 1019 .000270030956
+483 1019 .000570021046
+484 1019 .000333943637
+485 1019 .000333943637
+486 1019 .000513697974
+487 1019 .000270030956
+488 1019 .000674678013
+489 1019 .000513697974
+490 1019 .000674678013
+491 1019 .000674678013
+492 1019 .000674678013
+493 1019 .000674678013
+494 1019 .000674678013
+495 1019 .000674678013
+496 1019 .000674678013
+497 1019 .000502113777
+498 1019 .000674678013
+499 1019 .000674678013
+500 1019 .000674678013
+426 1020 1
+447 1020 .00198783097
+448 1020 .00198783097
+449 1020 .00198783097
+450 1020 .00198783097
+451 1020 .00198783097
+452 1020 .00496663759
+453 1020 .00198783097
+454 1020 .00588703807
+455 1020 .00282001449
+456 1020 .00317582488
+457 1020 .00198783097
+458 1020 .00198783097
+459 1020 .00588703807
+460 1020 .00419620611
+461 1020 .00378158386
+462 1020 .00496663759
+463 1020 .00198783097
+464 1020 .00378158386
+465 1020 .00378158386
+466 1020 .00378158386
+467 1020 .00496663759
+468 1020 .00496663759
+469 1020 .00378158386
+470 1020 .00496663759
+471 1020 .00496663759
+472 1020 .00378158386
+473 1020 .00496663759
+474 1020 .00496663759
+475 1020 .00496663759
+476 1020 .00378158386
+477 1020 .00496663759
+478 1020 .00588703807
+479 1020 .00378158386
+480 1020 .350692749
+481 1020 .00198783097
+482 1020 .00198783097
+483 1020 .00419620611
+484 1020 .00245832372
+485 1020 .00245832372
+486 1020 .00378158386
+487 1020 .00198783097
+488 1020 .00496663759
+489 1020 .00378158386
+490 1020 .00496663759
+491 1020 .00496663759
+492 1020 .00496663759
+493 1020 .00496663759
+494 1020 .00496663759
+495 1020 .00496663759
+496 1020 .00496663759
+497 1020 .00369630684
+498 1020 .00496663759
+499 1020 .00496663759
+500 1020 .00496663759
+427 1021 1
+447 1021 .00910336524
+448 1021 .00910336524
+449 1021 .00910336524
+450 1021 .00910336524
+451 1021 .00910336524
+452 1021 .022744948
+453 1021 .00910336524
+454 1021 .0269599687
+455 1021 .0129143884
+456 1021 .0145438388
+457 1021 .00910336524
+458 1021 .00910336524
+459 1021 .0269599687
+460 1021 .0192167219
+461 1021 .0173179395
+462 1021 .022744948
+463 1021 .00910336524
+464 1021 .0173179395
+465 1021 .0173179395
+466 1021 .0173179395
+467 1021 .022744948
+468 1021 .022744948
+469 1021 .0173179395
+470 1021 .022744948
+471 1021 .022744948
+472 1021 .0173179395
+473 1021 .022744948
+474 1021 .022744948
+475 1021 .022744948
+476 1021 .0173179395
+477 1021 .022744948
+478 1021 .0269599687
+479 1021 .0173179395
+480 1021 .0145438388
+481 1021 .226623222
+482 1021 .00910336524
+483 1021 .0192167219
+484 1021 .011258008
+485 1021 .011258008
+486 1021 .0173179395
+487 1021 .00910336524
+488 1021 .022744948
+489 1021 .0173179395
+490 1021 .022744948
+491 1021 .022744948
+492 1021 .022744948
+493 1021 .022744948
+494 1021 .022744948
+495 1021 .022744948
+496 1021 .022744948
+497 1021 .0169274099
+498 1021 .022744948
+499 1021 .022744948
+500 1021 .022744948
+428 1022 1
+447 1022 .0134661812
+448 1022 .0134661812
+449 1022 .0134661812
+450 1022 .0134661812
+451 1022 .0134661812
+452 1022 .0336455368
+453 1022 .0134661812
+454 1022 .0398806147
+455 1022 .01910365
+456 1022 .021514019
+457 1022 .0134661812
+458 1022 .0134661812
+459 1022 .0398806147
+460 1022 .0284263939
+461 1022 .0256176181
+462 1022 .0336455368
+463 1022 .0134661812
+464 1022 .0256176181
+465 1022 .0256176181
+466 1022 .0256176181
+467 1022 .0336455368
+468 1022 .0336455368
+469 1022 .0256176181
+470 1022 .0336455368
+471 1022 .0336455368
+472 1022 .0256176181
+473 1022 .0336455368
+474 1022 .0336455368
+475 1022 .0336455368
+476 1022 .0256176181
+477 1022 .0336455368
+478 1022 .0398806147
+479 1022 .0256176181
+480 1022 .021514019
+481 1022 .0134661812
+482 1022 .230986029
+483 1022 .0284263939
+484 1022 .0166534446
+485 1022 .0166534446
+486 1022 .0256176181
+487 1022 .0134661812
+488 1022 .0336455368
+489 1022 .0256176181
+490 1022 .0336455368
+491 1022 .0336455368
+492 1022 .0336455368
+493 1022 .0336455368
+494 1022 .0336455368
+495 1022 .0336455368
+496 1022 .0336455368
+497 1022 .0250399243
+498 1022 .0336455368
+499 1022 .0336455368
+500 1022 .0336455368
+429 1023 1
+447 1023 .000316114078
+448 1023 .000316114078
+449 1023 .000316114078
+450 1023 .000316114078
+451 1023 .000316114078
+452 1023 .000789817655
+453 1023 .000316114078
+454 1023 .000936184078
+455 1023 .000448451785
+456 1023 .000505034404
+457 1023 .000316114078
+458 1023 .000316114078
+459 1023 .000936184078
+460 1023 .000667300075
+461 1023 .000601364998
+462 1023 .000789817655
+463 1023 .000316114078
+464 1023 .000601364998
+465 1023 .000601364998
+466 1023 .000601364998
+467 1023 .000789817655
+468 1023 .000789817655
+469 1023 .000601364998
+470 1023 .000789817655
+471 1023 .000789817655
+472 1023 .000601364998
+473 1023 .000789817655
+474 1023 .000789817655
+475 1023 .000789817655
+476 1023 .000601364998
+477 1023 .000789817655
+478 1023 .000936184078
+479 1023 .000601364998
+480 1023 .000505034404
+481 1023 .000316114078
+482 1023 .000316114078
+483 1023 .459840149
+484 1023 .000390934001
+485 1023 .000390934001
+486 1023 .000601364998
+487 1023 .000316114078
+488 1023 .000789817655
+489 1023 .000601364998
+490 1023 .000789817655
+491 1023 .000789817655
+492 1023 .000789817655
+493 1023 .000789817655
+494 1023 .000789817655
+495 1023 .000789817655
+496 1023 .000789817655
+497 1023 .000587803836
+498 1023 .000789817655
+499 1023 .000789817655
+500 1023 .000789817655
+430 1024 1
+447 1024 .00796811283
+448 1024 .00796811283
+449 1024 .00796811283
+450 1024 .00796811283
+451 1024 .00796811283
+452 1024 .0199084971
+453 1024 .00796811283
+454 1024 .0235978737
+455 1024 .0113038765
+456 1024 .0127301235
+457 1024 .00796811283
+458 1024 .00796811283
+459 1024 .0235978737
+460 1024 .0168202631
+461 1024 .0151582742
+462 1024 .0199084971
+463 1024 .00796811283
+464 1024 .0151582742
+465 1024 .0151582742
+466 1024 .0151582742
+467 1024 .0199084971
+468 1024 .0199084971
+469 1024 .0151582742
+470 1024 .0199084971
+471 1024 .0199084971
+472 1024 .0151582742
+473 1024 .0199084971
+474 1024 .0199084971
+475 1024 .0199084971
+476 1024 .0151582742
+477 1024 .0199084971
+478 1024 .0235978737
+479 1024 .0151582742
+480 1024 .0127301235
+481 1024 .00796811283
+482 1024 .00796811283
+483 1024 .0168202631
+484 1024 .278857887
+485 1024 .0098540578
+486 1024 .0151582742
+487 1024 .00796811283
+488 1024 .0199084971
+489 1024 .0151582742
+490 1024 .0199084971
+491 1024 .0199084971
+492 1024 .0199084971
+493 1024 .0199084971
+494 1024 .0199084971
+495 1024 .0199084971
+496 1024 .0199084971
+497 1024 .0148164472
+498 1024 .0199084971
+499 1024 .0199084971
+500 1024 .0199084971
+431 1025 1
+447 1025 .00026391237
+448 1025 .00026391237
+449 1025 .00026391237
+450 1025 .00026391237
+451 1025 .00026391237
+452 1025 .000659390527
+453 1025 .00026391237
+454 1025 .000781586568
+455 1025 .000374396332
+456 1025 .000421635137
+457 1025 .00026391237
+458 1025 .00026391237
+459 1025 .000781586568
+460 1025 .000557104941
+461 1025 .00050205813
+462 1025 .000659390527
+463 1025 .00026391237
+464 1025 .00050205813
+465 1025 .00050205813
+466 1025 .00050205813
+467 1025 .000659390527
+468 1025 .000659390527
+469 1025 .00050205813
+470 1025 .000659390527
+471 1025 .000659390527
+472 1025 .00050205813
+473 1025 .000659390527
+474 1025 .000659390527
+475 1025 .000659390527
+476 1025 .00050205813
+477 1025 .000659390527
+478 1025 .000781586568
+479 1025 .00050205813
+480 1025 .000421635137
+481 1025 .00026391237
+482 1025 .00026391237
+483 1025 .000557104941
+484 1025 .000326376816
+485 1025 .269330204
+486 1025 .00050205813
+487 1025 .00026391237
+488 1025 .000659390527
+489 1025 .00050205813
+490 1025 .000659390527
+491 1025 .000659390527
+492 1025 .000659390527
+493 1025 .000659390527
+494 1025 .000659390527
+495 1025 .000659390527
+496 1025 .000659390527
+497 1025 .000490736391
+498 1025 .000659390527
+499 1025 .000659390527
+500 1025 .000659390527
+432 1026 1
+447 1026 .000762254349
+448 1026 .000762254349
+449 1026 .000762254349
+450 1026 .000762254349
+451 1026 .000762254349
+452 1026 .00190450845
+453 1026 .000762254349
+454 1026 .00225744559
+455 1026 .00108136376
+456 1026 .00121780287
+457 1026 .000762254349
+458 1026 .000762254349
+459 1026 .00225744559
+460 1026 .00160907849
+461 1026 .00145008741
+462 1026 .00190450845
+463 1026 .000762254349
+464 1026 .00145008741
+465 1026 .00145008741
+466 1026 .00145008741
+467 1026 .00190450845
+468 1026 .00190450845
+469 1026 .00145008741
+470 1026 .00190450845
+471 1026 .00190450845
+472 1026 .00145008741
+473 1026 .00190450845
+474 1026 .00190450845
+475 1026 .00190450845
+476 1026 .00145008741
+477 1026 .00190450845
+478 1026 .00225744559
+479 1026 .00145008741
+480 1026 .00121780287
+481 1026 .000762254349
+482 1026 .000762254349
+483 1026 .00160907849
+484 1026 .000942669634
+485 1026 .000942669634
+486 1026 .415252656
+487 1026 .000762254349
+488 1026 .00190450845
+489 1026 .00145008741
+490 1026 .00190450845
+491 1026 .00190450845
+492 1026 .00190450845
+493 1026 .00190450845
+494 1026 .00190450845
+495 1026 .00190450845
+496 1026 .00190450845
+497 1026 .00141738704
+498 1026 .00190450845
+499 1026 .00190450845
+500 1026 .00190450845
+433 1027 1
+447 1027 1.5085453e-5
+448 1027 1.5085453e-5
+449 1027 1.5085453e-5
+450 1027 1.5085453e-5
+451 1027 1.5085453e-5
+452 1027 3.76913194e-5
+453 1027 1.5085453e-5
+454 1027 4.46761514e-5
+455 1027 2.14008123e-5
+456 1027 2.41010202e-5
+457 1027 1.5085453e-5
+458 1027 1.5085453e-5
+459 1027 4.46761514e-5
+460 1027 3.18445891e-5
+461 1027 2.86980667e-5
+462 1027 3.76913194e-5
+463 1027 1.5085453e-5
+464 1027 2.86980667e-5
+465 1027 2.86980667e-5
+466 1027 2.86980667e-5
+467 1027 3.76913194e-5
+468 1027 3.76913194e-5
+469 1027 2.86980667e-5
+470 1027 3.76913194e-5
+471 1027 3.76913194e-5
+472 1027 2.86980667e-5
+473 1027 3.76913194e-5
+474 1027 3.76913194e-5
+475 1027 3.76913194e-5
+476 1027 2.86980667e-5
+477 1027 3.76913194e-5
+478 1027 4.46761514e-5
+479 1027 2.86980667e-5
+480 1027 2.41010202e-5
+481 1027 1.5085453e-5
+482 1027 1.5085453e-5
+483 1027 3.18445891e-5
+484 1027 1.86559755e-5
+485 1027 1.86559755e-5
+486 1027 2.86980667e-5
+487 1027 .21753493
+488 1027 3.76913194e-5
+489 1027 2.86980667e-5
+490 1027 3.76913194e-5
+491 1027 3.76913194e-5
+492 1027 3.76913194e-5
+493 1027 3.76913194e-5
+494 1027 3.76913194e-5
+495 1027 3.76913194e-5
+496 1027 3.76913194e-5
+497 1027 2.80509066e-5
+498 1027 3.76913194e-5
+499 1027 3.76913194e-5
+500 1027 3.76913194e-5
+434 1028 1
+447 1028 .00133195089
+448 1028 .00133195089
+449 1028 .00133195089
+450 1028 .00133195089
+451 1028 .00133195089
+452 1028 .00332790706
+453 1028 .00133195089
+454 1028 .00394462375
+455 1028 .00188955734
+456 1028 .00212796894
+457 1028 .00133195089
+458 1028 .00133195089
+459 1028 .00394462375
+460 1028 .00281167752
+461 1028 .00253385911
+462 1028 .00332790706
+463 1028 .00133195089
+464 1028 .00253385911
+465 1028 .00253385911
+466 1028 .00253385911
+467 1028 .00332790706
+468 1028 .00332790706
+469 1028 .00253385911
+470 1028 .00332790706
+471 1028 .00332790706
+472 1028 .00253385911
+473 1028 .00332790706
+474 1028 .00332790706
+475 1028 .00332790706
+476 1028 .00253385911
+477 1028 .00332790706
+478 1028 .00394462375
+479 1028 .00253385911
+480 1028 .00212796894
+481 1028 .00133195089
+482 1028 .00133195089
+483 1028 .00281167752
+484 1028 .00164720556
+485 1028 .00164720556
+486 1028 .00253385911
+487 1028 .00133195089
+488 1028 .546805799
+489 1028 .00253385911
+490 1028 .00332790706
+491 1028 .00332790706
+492 1028 .00332790706
+493 1028 .00332790706
+494 1028 .00332790706
+495 1028 .00332790706
+496 1028 .00332790706
+497 1028 .00247671921
+498 1028 .00332790706
+499 1028 .00332790706
+500 1028 .00332790706
+435 1029 1
+447 1029 .00133274659
+448 1029 .00133274659
+449 1029 .00133274659
+450 1029 .00133274659
+451 1029 .00133274659
+452 1029 .0033298952
+453 1029 .00133274659
+454 1029 .00394698046
+455 1029 .00189068634
+456 1029 .00212924019
+457 1029 .00133274659
+458 1029 .00133274659
+459 1029 .00394698046
+460 1029 .00281335716
+461 1029 .00253537297
+462 1029 .0033298952
+463 1029 .00133274659
+464 1029 .00253537297
+465 1029 .00253537297
+466 1029 .00253537297
+467 1029 .0033298952
+468 1029 .0033298952
+469 1029 .00253537297
+470 1029 .0033298952
+471 1029 .0033298952
+472 1029 .00253537297
+473 1029 .0033298952
+474 1029 .0033298952
+475 1029 .0033298952
+476 1029 .00253537297
+477 1029 .0033298952
+478 1029 .00394698046
+479 1029 .00253537297
+480 1029 .00212924019
+481 1029 .00133274659
+482 1029 .00133274659
+483 1029 .00281335716
+484 1029 .0016481895
+485 1029 .0016481895
+486 1029 .00253537297
+487 1029 .00133274659
+488 1029 .0033298952
+489 1029 .416337937
+490 1029 .0033298952
+491 1029 .0033298952
+492 1029 .0033298952
+493 1029 .0033298952
+494 1029 .0033298952
+495 1029 .0033298952
+496 1029 .0033298952
+497 1029 .00247819885
+498 1029 .0033298952
+499 1029 .0033298952
+500 1029 .0033298952
+436 1030 1
+447 1030 .00133454381
+448 1030 .00133454381
+449 1030 .00133454381
+450 1030 .00133454381
+451 1030 .00133454381
+452 1030 .00333438558
+453 1030 .00133454381
+454 1030 .00395230297
+455 1030 .00189323595
+456 1030 .00213211169
+457 1030 .00133454381
+458 1030 .00133454381
+459 1030 .00395230297
+460 1030 .00281715114
+461 1030 .00253879209
+462 1030 .00333438558
+463 1030 .00133454381
+464 1030 .00253879209
+465 1030 .00253879209
+466 1030 .00253879209
+467 1030 .00333438558
+468 1030 .00333438558
+469 1030 .00253879209
+470 1030 .00333438558
+471 1030 .00333438558
+472 1030 .00253879209
+473 1030 .00333438558
+474 1030 .00333438558
+475 1030 .00333438558
+476 1030 .00253879209
+477 1030 .00333438558
+478 1030 .00395230297
+479 1030 .00253879209
+480 1030 .00213211169
+481 1030 .00133454381
+482 1030 .00133454381
+483 1030 .00281715114
+484 1030 .0016504121
+485 1030 .0016504121
+486 1030 .00253879209
+487 1030 .00133454381
+488 1030 .00333438558
+489 1030 .00253879209
+490 1030 .546812296
+491 1030 .00333438558
+492 1030 .00333438558
+493 1030 .00333438558
+494 1030 .00333438558
+495 1030 .00333438558
+496 1030 .00333438558
+497 1030 .00248154067
+498 1030 .00333438558
+499 1030 .00333438558
+500 1030 .00333438558
+437 1031 1
+491 1031 .543477893
+438 1032 1
+492 1032 .543477893
+439 1033 1
+447 1033 .0115508148
+448 1033 .0115508148
+449 1033 .0115508148
+450 1033 .0115508148
+451 1033 .0115508148
+452 1033 .0288599506
+453 1033 .0115508148
+454 1033 .0342081785
+455 1033 .0163864344
+456 1033 .018453965
+457 1033 .0115508148
+458 1033 .0115508148
+459 1033 .0342081785
+460 1033 .0243831556
+461 1033 .0219738856
+462 1033 .0288599506
+463 1033 .0115508148
+464 1033 .0219738856
+465 1033 .0219738856
+466 1033 .0219738856
+467 1033 .0288599506
+468 1033 .0288599506
+469 1033 .0219738856
+470 1033 .0288599506
+471 1033 .0288599506
+472 1033 .0219738856
+473 1033 .0288599506
+474 1033 .0288599506
+475 1033 .0288599506
+476 1033 .0219738856
+477 1033 .0288599506
+478 1033 .0342081785
+479 1033 .0219738856
+480 1033 .018453965
+481 1033 .0115508148
+482 1033 .0115508148
+483 1033 .0243831556
+484 1033 .0142847355
+485 1033 .0142847355
+486 1033 .0219738856
+487 1033 .0115508148
+488 1033 .0288599506
+489 1033 .0219738856
+490 1033 .0288599506
+491 1033 .0288599506
+492 1033 .0288599506
+493 1033 .572337866
+494 1033 .0288599506
+495 1033 .0288599506
+496 1033 .0288599506
+497 1033 .0214783605
+498 1033 .0288599506
+499 1033 .0288599506
+500 1033 .0288599506
+440 1034 1
+447 1034 .0504734591
+448 1034 .0504734591
+449 1034 .0504734591
+450 1034 .0504734591
+451 1034 .0504734591
+452 1034 .126108989
+453 1034 .0504734591
+454 1034 .149479091
+455 1034 .0716036186
+456 1034 .0806380734
+457 1034 .0504734591
+458 1034 .0504734591
+459 1034 .149479091
+460 1034 .106546797
+461 1034 .0960190371
+462 1034 .126108989
+463 1034 .0504734591
+464 1034 .0960190371
+465 1034 .0960190371
+466 1034 .0960190371
+467 1034 .126108989
+468 1034 .126108989
+469 1034 .0960190371
+470 1034 .126108989
+471 1034 .126108989
+472 1034 .0960190371
+473 1034 .126108989
+474 1034 .126108989
+475 1034 .126108989
+476 1034 .0960190371
+477 1034 .126108989
+478 1034 .149479091
+479 1034 .0960190371
+480 1034 .0806380734
+481 1034 .0504734591
+482 1034 .0504734591
+483 1034 .106546797
+484 1034 .0624198392
+485 1034 .0624198392
+486 1034 .0960190371
+487 1034 .0504734591
+488 1034 .126108989
+489 1034 .0960190371
+490 1034 .126108989
+491 1034 .126108989
+492 1034 .126108989
+493 1034 .126108989
+494 1034 .669586897
+495 1034 .126108989
+496 1034 .126108989
+497 1034 .0938537493
+498 1034 .126108989
+499 1034 .126108989
+500 1034 .126108989
+441 1035 1
+495 1035 .543477893
+442 1036 1
+447 1036 .0213428419
+448 1036 .0213428419
+449 1036 .0213428419
+450 1036 .0213428419
+451 1036 .0213428419
+452 1036 .0533255301
+453 1036 .0213428419
+454 1036 .0632076487
+455 1036 .0302777849
+456 1036 .0340980291
+457 1036 .0213428419
+458 1036 .0213428419
+459 1036 .0632076487
+460 1036 .045053605
+461 1036 .0406019129
+462 1036 .0533255301
+463 1036 .0213428419
+464 1036 .0406019129
+465 1036 .0406019129
+466 1036 .0406019129
+467 1036 .0533255301
+468 1036 .0533255301
+469 1036 .0406019129
+470 1036 .0533255301
+471 1036 .0533255301
+472 1036 .0406019129
+473 1036 .0533255301
+474 1036 .0533255301
+475 1036 .0533255301
+476 1036 .0406019129
+477 1036 .0533255301
+478 1036 .0632076487
+479 1036 .0406019129
+480 1036 .0340980291
+481 1036 .0213428419
+482 1036 .0213428419
+483 1036 .045053605
+484 1036 .0263944026
+485 1036 .0263944026
+486 1036 .0406019129
+487 1036 .0213428419
+488 1036 .0533255301
+489 1036 .0406019129
+490 1036 .0533255301
+491 1036 .0533255301
+492 1036 .0533255301
+493 1036 .0533255301
+494 1036 .0533255301
+495 1036 .0533255301
+496 1036 .596803427
+497 1036 .0396863185
+498 1036 .0533255301
+499 1036 .0533255301
+500 1036 .0533255301
+443 1037 1
+447 1037 .041330751
+448 1037 .041330751
+449 1037 .041330751
+450 1037 .041330751
+451 1037 .041330751
+452 1037 .10326574
+453 1037 .041330751
+454 1037 .122402608
+455 1037 .0586334132
+456 1037 .0660313815
+457 1037 .041330751
+458 1037 .041330751
+459 1037 .122402608
+460 1037 .0872470215
+461 1037 .0786262453
+462 1037 .10326574
+463 1037 .041330751
+464 1037 .0786262453
+465 1037 .0786262453
+466 1037 .0786262453
+467 1037 .10326574
+468 1037 .10326574
+469 1037 .0786262453
+470 1037 .10326574
+471 1037 .10326574
+472 1037 .0786262453
+473 1037 .10326574
+474 1037 .10326574
+475 1037 .10326574
+476 1037 .0786262453
+477 1037 .10326574
+478 1037 .122402608
+479 1037 .0786262453
+480 1037 .0660313815
+481 1037 .041330751
+482 1037 .041330751
+483 1037 .0872470215
+484 1037 .0511131771
+485 1037 .0511131771
+486 1037 .0786262453
+487 1037 .041330751
+488 1037 .10326574
+489 1037 .0786262453
+490 1037 .10326574
+491 1037 .10326574
+492 1037 .10326574
+493 1037 .10326574
+494 1037 .10326574
+495 1037 .10326574
+496 1037 .10326574
+497 1037 .481324255
+498 1037 .10326574
+499 1037 .10326574
+500 1037 .10326574
+444 1038 1
+498 1038 .543477893
+445 1039 1
+499 1039 .543477893
+446 1040 1
+500 1040 .543477893
+501 1041 1
+774 1041 1
+1254 1041 -.0380239636
+502 1042 1
+775 1042 1
+1254 1042 -.000453596498
+503 1043 1
+776 1043 1
+1254 1043 -.000104409068
+504 1044 1
+777 1044 1
+1254 1044 -.00156986958
+505 1045 1
+778 1045 1
+1254 1045 -.0151058435
+506 1046 1
+779 1046 1
+507 1047 1
+780 1047 1
+1254 1047 -.000602873974
+508 1048 1
+781 1048 1
+1254 1048 -.216793314
+509 1049 1
+782 1049 1
+1254 1049 -.00540687004
+510 1050 1
+783 1050 1
+1254 1050 -.021726191
+511 1051 1
+784 1051 1
+1254 1051 -.0160277095
+512 1052 1
+785 1052 1
+1254 1052 -.000167879829
+513 1053 1
+786 1053 1
+1254 1053 -.000827674405
+514 1054 1
+787 1054 1
+1254 1054 -.086298421
+515 1055 1
+788 1055 1
+1254 1055 -3.36676676e-5
+516 1056 1
+789 1056 1
+1254 1056 -.0134372637
+517 1057 1
+790 1057 1
+1254 1057 -1.20522391e-5
+518 1058 1
+791 1058 1
+1254 1058 -.00149709766
+519 1059 1
+792 1059 1
+1254 1059 -7.5916003e-5
+520 1060 1
+793 1060 1
+1254 1060 -.000157006609
+521 1061 1
+794 1061 1
+1254 1061 -.000117640331
+522 1062 1
+795 1062 1
+1254 1062 -5.89511671e-7
+523 1063 1
+796 1063 1
+1254 1063 -1.7226841e-5
+524 1064 1
+797 1064 1
+525 1065 1
+798 1065 1
+1254 1065 -.000583616551
+526 1066 1
+799 1066 1
+1254 1066 -.00450151134
+527 1067 1
+800 1067 1
+1254 1067 -.0014306138
+528 1068 1
+801 1068 1
+1254 1068 -2.2335942e-5
+529 1069 1
+802 1069 1
+1254 1069 -.000210586673
+530 1070 1
+803 1070 1
+1254 1070 -.00193471182
+531 1071 1
+804 1071 1
+1254 1071 -.0018588613
+532 1072 1
+805 1072 1
+1254 1072 -.0156698748
+533 1073 1
+806 1073 1
+1254 1073 -.00145537336
+534 1074 1
+807 1074 1
+1254 1074 -.0111717703
+535 1075 1
+808 1075 1
+1254 1075 -.00473823305
+536 1076 1
+809 1076 1
+1254 1076 -.00116330304
+537 1077 1
+810 1077 1
+1254 1077 -.00446149008
+538 1078 1
+811 1078 1
+1254 1078 -.0120798806
+539 1079 1
+812 1079 1
+1254 1079 -4.05453029e-5
+540 1080 1
+813 1080 1
+1254 1080 -.00253228028
+541 1081 1
+814 1081 1
+542 1082 1
+815 1082 1
+1254 1082 -7.26409417e-5
+543 1083 1
+816 1083 1
+1254 1083 -.00411937665
+544 1084 1
+817 1084 1
+1254 1084 -.00660947384
+545 1085 1
+818 1085 1
+546 1086 1
+819 1086 1
+1254 1086 -.241819128
+547 1087 1
+820 1087 1
+548 1088 1
+821 1088 1
+549 1089 1
+822 1089 1
+550 1090 1
+823 1090 1
+551 1091 1
+824 1091 1
+552 1092 1
+825 1092 1
+1254 1092 -.265067309
+553 1093 1
+826 1093 1
+554 1094 1
+827 1094 1
+716 1095 1
+828 1095 2.5
+829 1095 2.70000005
+830 1095 2.5
+831 1095 2.70000005
+832 1095 2.9000001
+833 1095 2.9000001
+834 1095 2.9000001
+835 1095 2.9000001
+977 1095 .166986734
+1257 1095 -.57133919
+717 1096 1
+836 1096 2.4000001
+837 1096 1.5
+838 1096 2.4000001
+839 1096 1.89999998
+840 1096 1.60000002
+841 1096 2.0999999
+842 1096 1.79999995
+843 1096 1.79999995
+844 1096 1.79999995
+845 1096 1.79999995
+846 1096 1.79999995
+847 1096 1.79999995
+848 1096 1.89999998
+849 1096 1.89999998
+850 1096 1.79999995
+851 1096 2.20000005
+852 1096 2.5999999
+853 1096 2.5999999
+854 1096 2.20000005
+855 1096 1.5
+856 1096 3
+857 1096 1.89999998
+858 1096 2.4000001
+859 1096 2.4000001
+860 1096 2.4000001
+861 1096 2.4000001
+862 1096 1.89999998
+863 1096 2.20000005
+864 1096 1.70000005
+865 1096 2.20000005
+866 1096 2.9000001
+867 1096 2.20000005
+868 1096 2.5999999
+869 1096 1.79999995
+977 1096 .119615935
+1257 1096 -.409261674
+718 1097 1
+870 1097 1.70000005
+871 1097 1.70000005
+879 1097 1.70000005
+977 1097 .00566986762
+1257 1097 -.0193992499
+719 1098 1
+872 1098 1.60000002
+873 1098 1.60000002
+874 1098 1.60000002
+875 1098 1.5
+876 1098 1.5
+877 1098 1.60000002
+878 1098 1.70000005
+880 1099 -60
+925 1099 1
+881 1100 -36
+926 1100 1
+882 1101 -60
+927 1101 1
+883 1102 -36
+928 1102 1
+884 1103 -41
+929 1103 1
+885 1104 -41
+930 1104 1
+886 1105 -41
+931 1105 1
+887 1106 -41
+932 1106 1
+888 1107 -19
+933 1107 1
+889 1108 -33
+934 1108 1
+890 1109 -60
+935 1109 1
+891 1110 -60
+936 1110 1
+892 1111 -32
+937 1111 1
+893 1112 -26
+938 1112 1
+894 1113 -60
+939 1113 1
+895 1114 -31
+940 1114 1
+896 1115 -30
+941 1115 1
+897 1116 -31
+942 1116 1
+898 1117 -30
+943 1117 1
+899 1118 -41
+944 1118 1
+900 1119 -30
+945 1119 1
+901 1120 -30
+946 1120 1
+902 1121 -30
+947 1121 1
+903 1122 -60
+948 1122 1
+904 1123 -28
+949 1123 1
+905 1124 -28
+950 1124 1
+906 1125 -31
+951 1125 1
+907 1126 -23
+952 1126 1
+908 1127 -15
+953 1127 1
+909 1128 -45
+954 1128 1
+910 1129 -15
+955 1129 1
+911 1130 -49
+956 1130 1
+912 1131 -26
+957 1131 1
+913 1132 -15
+958 1132 1
+914 1133 -60
+959 1133 1
+915 1134 -60
+960 1134 1
+916 1135 -8
+961 1135 1
+917 1136 -25
+962 1136 1
+918 1137 -9
+963 1137 1
+919 1138 -31
+964 1138 1
+920 1139 -21
+965 1139 1
+921 1140 -30
+966 1140 1
+922 1141 -8
+967 1141 1
+923 1142 -8
+968 1142 1
+969 1143 1
+970 1144 1
+971 1145 1
+972 1146 1
+973 1147 1
+974 1148 1
+975 1149 1
+924 1150 -8
+976 1150 1
+828 1151 1
+880 1151 1
+829 1152 1
+881 1152 1
+830 1153 1
+882 1153 1
+831 1154 1
+883 1154 1
+832 1155 1
+884 1155 1
+833 1156 1
+885 1156 1
+834 1157 1
+886 1157 1
+835 1158 1
+887 1158 1
+836 1159 1
+888 1159 1
+837 1160 1
+889 1160 1
+838 1161 1
+890 1161 1
+839 1162 1
+891 1162 1
+840 1163 1
+892 1163 1
+841 1164 1
+893 1164 1
+842 1165 1
+894 1165 1
+843 1166 1
+895 1166 1
+844 1167 1
+896 1167 1
+845 1168 1
+897 1168 1
+846 1169 1
+898 1169 1
+847 1170 1
+899 1170 1
+848 1171 1
+900 1171 1
+849 1172 1
+901 1172 1
+850 1173 1
+902 1173 1
+851 1174 1
+903 1174 1
+852 1175 1
+904 1175 1
+853 1176 1
+905 1176 1
+854 1177 1
+906 1177 1
+855 1178 1
+907 1178 1
+856 1179 1
+908 1179 1
+857 1180 1
+909 1180 1
+858 1181 1
+910 1181 1
+859 1182 1
+911 1182 1
+860 1183 1
+912 1183 1
+861 1184 1
+913 1184 1
+862 1185 1
+914 1185 1
+863 1186 1
+915 1186 1
+864 1187 1
+916 1187 1
+865 1188 1
+917 1188 1
+866 1189 1
+918 1189 1
+867 1190 1
+919 1190 1
+868 1191 1
+920 1191 1
+869 1192 1
+921 1192 1
+870 1193 1
+922 1193 1
+871 1194 1
+923 1194 1
+872 1195 1
+873 1196 1
+874 1197 1
+875 1198 1
+876 1199 1
+877 1200 1
+878 1201 1
+879 1202 1
+924 1202 1
+880 1203 -1
+881 1203 -1
+882 1203 -1
+883 1203 -1
+884 1203 -1
+885 1203 -1
+886 1203 -1
+887 1203 -1
+888 1203 -1
+889 1203 -1
+890 1203 -1
+891 1203 -1
+892 1203 -1
+893 1203 -1
+894 1203 -1
+895 1203 -1
+896 1203 -1
+897 1203 -1
+898 1203 -1
+899 1203 -1
+900 1203 -1
+901 1203 -1
+902 1203 -1
+903 1203 -1
+904 1203 -1
+905 1203 -1
+906 1203 -1
+907 1203 -1
+908 1203 -1
+909 1203 -1
+910 1203 -1
+911 1203 -1
+912 1203 -1
+913 1203 -1
+914 1203 -1
+915 1203 -1
+916 1203 -1
+917 1203 -1
+918 1203 -1
+919 1203 -1
+920 1203 -1
+921 1203 -1
+922 1203 -1
+923 1203 -1
+924 1203 -1
+1094 1204 1
+1253 1205 1
+1255 1205 13607688
+1254 1206 1
+1255 1206 -15266873
+1255 1207 100
+663 1208 1
+1256 1208 1
+1258 1208 -1
+985 1209 1
+1257 1209 1
+447 1210 -.495945245
+448 1210 -.495945245
+449 1210 -.495945245
+450 1210 -.495945245
+451 1210 -.495945245
+452 1210 -1.23912954
+453 1210 -.495945245
+454 1210 -1.46876097
+455 1210 -.703567266
+456 1210 -.79233861
+457 1210 -.495945245
+458 1210 -.495945245
+459 1210 -1.46876097
+460 1210 -1.0469141
+461 1210 -.943469822
+462 1210 -1.23912954
+463 1210 -.495945245
+464 1210 -.943469822
+465 1210 -.943469822
+466 1210 -.943469822
+467 1210 -1.23912954
+468 1210 -1.23912954
+469 1210 -.943469822
+470 1210 -1.23912954
+471 1210 -1.23912954
+472 1210 -.943469822
+473 1210 -1.23912954
+474 1210 -1.23912954
+475 1210 -1.23912954
+476 1210 -.943469822
+477 1210 -1.23912954
+478 1210 -1.46876097
+479 1210 -.943469822
+480 1210 -.79233861
+481 1210 -.495945245
+482 1210 -.495945245
+483 1210 -1.0469141
+484 1210 -.613328755
+485 1210 -.613328755
+486 1210 -.943469822
+487 1210 -.495945245
+488 1210 -1.23912954
+489 1210 -.943469822
+490 1210 -1.23912954
+491 1210 -1.23912954
+492 1210 -1.23912954
+493 1210 -1.23912954
+494 1210 -1.23912954
+495 1210 -1.23912954
+496 1210 -1.23912954
+497 1210 -.922194004
+498 1210 -1.23912954
+499 1210 -1.23912954
+500 1210 -1.23912954
+663 1210 -1
+977 1211 -.292272508
+985 1211 -1
+775 1212 1
+776 1213 1
+777 1214 1
+779 1215 1
+780 1216 1
+782 1217 1
+783 1218 1
+784 1219 1
+785 1220 1
+786 1221 1
+788 1222 1
+790 1223 1
+791 1224 1
+792 1225 1
+793 1226 1
+794 1227 1
+795 1228 1
+796 1229 1
+797 1230 1
+798 1231 1
+799 1232 1
+800 1233 1
+801 1234 1
+802 1235 1
+803 1236 1
+804 1237 1
+805 1238 1
+806 1239 1
+808 1240 1
+809 1241 1
+810 1242 1
+811 1243 1
+812 1244 1
+813 1245 1
+814 1246 1
+815 1247 1
+816 1248 1
+817 1249 1
+818 1250 1
+820 1251 1
+821 1252 1
+822 1253 1
+823 1254 1
+824 1255 1
+825 1256 1
+826 1257 1
+827 1258 1
diff --git a/matrices/test/nontrivial_mc64_result.mtx b/matrices/test/nontrivial_mc64_result.mtx
index 0c129436998..d03fb9b6965 100644
--- a/matrices/test/nontrivial_mc64_result.mtx
+++ b/matrices/test/nontrivial_mc64_result.mtx
@@ -1,21633 +1,7684 @@
 %%MatrixMarket matrix coordinate real general
-% Generated 07-Apr-2022
-4644 4644 21630
-1 1  7.634142339967899
-1376 1  27.83662668591554
-2 2  7.682947782646563
-1377 2  27.53589835708392
-3 3  7.511127670325325
-1378 3  28.36062562360079
-4 4  7.616777479124308
-1379 4  28.21925391958385
-5 5  7.904524652115928
-1380 5  25.95736762236096
-6 6  174.0181709119047
-2907 6  0.6372570530657963
-7 7  1.489891484967641
-1403 7  209.3244594426036
-8 8  1.721279346380951
-1404 8  207.0714997893401
-9 9  1.502630991262102
-1405 9  207.1774742583954
-10 10  1.800755852556451
-1406 10  211.1161536132544
-11 11  1.806500886627965
-1407 11  210.6281765088935
-12 12  1.511557830272904
-1408 12  210.3058251130699
-13 13  1706.387673066813
-1411 13  5.68042322902059
-14 14  1519.789256663639
-1412 14  7.273812219891007
-15 15  1964.688958945828
-1413 15  4.409102144996014
-16 16  1930.16084845011
-1414 16  4.550892940867493
-17 17  1739.177574774503
-1434 17  7.251786459051289
-18 18  1993.672518522061
-1435 18  5.370237884159537
-19 19  2033.975816484419
-1436 19  5.206824237539474
-20 20  0.6518801375883284
-1439 20  156.4648269651306
-21 21  9.246170384031867
-1484 21  355.1643329947775
-22 22  10.51505982454673
-1485 22  371.7856140819339
-23 23  11.13731534458806
-1486 23  359.0699905626743
-24 24  12.41421032754424
-1494 24  301.804028950772
-25 25  12.62655259772402
-1495 25  293.7476901038987
-26 26  2.955820583202132
-1509 26  48.14115697103193
-27 27  4.606663203742626
-1510 27  298.8536823264385
-28 28  4.003331285236047
-1511 28  280.2402751866119
-29 29  4.713333127333971
-1512 29  292.0666383956488
-30 30  4.721746019232691
-1513 30  291.2499991968344
-31 31  2700.355402880154
-1517 31  3.449093081313446
-32 32  2112.401326080936
-1519 32  4.629564668789765
-33 33  2113.116196466089
-1520 33  4.560258670279392
-34 34  1982.84872124521
-1521 34  5.09241050532172
-35 35  2019.435299548997
-1522 35  5.010651825296775
-36 36  1961.301040943574
-1523 36  5.242946874727973
-37 37  1973.933368761423
-1524 37  5.144271915952205
-38 38  1962.712428603474
-1525 38  5.301254453954041
-39 39  7.39303021163466
-1537 39  32.64302008813611
-40 40  4.505299985382996
-1538 40  36.65251244868855
-41 41  2.089424079433414
-1541 41  0.2816487510175935
-42 42  2478.696448770564
-1542 42  4.071309785349727
-43 43  2355.199865364614
-1543 43  4.307158971381956
-44 44  0.06836567688636581
-1621 44  9.664803062187673
-45 45  0.07421036894812016
-1622 45  8.906846167844099
-46 46  0.06639716495725831
-1623 46  9.821457822989728
-47 47  0.07310238478762371
-1624 47  9.098592168210979
-48 48  104.3552046887556
-2880 48  0.006121912023818472
-49 49  0.007404911424217593
-1647 49  87.08836569250704
-50 50  194.4659454572846
-2910 50  0.003163038956096149
-51 51  0.003137362416404567
-1649 51  196.1842421835629
-52 52  192.0789529667144
-2914 52  0.003212286755258518
-53 53  198.0844573442002
-2916 53  0.003122287288006135
-54 54  197.7267055670312
-2918 54  0.003137976363508148
-55 55  194.7139670039287
-2920 55  0.003243756688600629
-56 56  102.7471989517859
-2926 56  0.006099130113976053
-57 57  92.37138271390904
-2928 57  0.006994029364087638
-58 58  190.7684806838747
-2930 58  0.003172543912365461
-59 59  188.8552198757506
-2932 59  0.003207791727140831
-60 60  43.71332794263182
-2954 60  0.01563693906268506
-61 61  0.1229847515375557
-1680 61  5.289532905230916
-62 62  0.124952871120281
-1681 62  5.212039501524443
-63 63  0.004730472910791034
-1684 63  134.487235958624
-64 64  373.243500456582
-3024 64  0.001669661722623266
-65 65  423.2601696305773
-3026 65  0.00146788692624887
-66 66  0.001887933450989816
-1731 66  332.6331657750853
-67 67  634.1949520085401
-3044 67  0.0009471235536763365
-68 68  0.0009524976247331163
-1740 68  629.0610030966138
-69 69  0.006996212718199472
-1754 69  93.45345366175701
-70 70  332.535475221442
-3078 70  0.001823414688910915
-71 71  306.4518132610639
-3080 71  0.001971755901342216
-72 72  330.915251155541
-3082 72  0.001848859828192644
-73 73  330.1517854407101
-3084 73  0.001852564728474546
-74 74  478.2477416883492
-3096 74  0.001255327403956955
-75 75  0.08508912507203135
-1764 75  7.67469753523842
-76 76  227.0111876369949
-3104 76  0.002761123807690614
-77 77  0.06748473012759557
-1766 77  9.553096348683425
-78 78  0.08071276598199044
-1767 78  8.110850832086975
-79 79  0.06258856552997076
-1768 79  10.39623748337741
-80 80  0.06694310514459079
-1769 80  9.643129255484244
-81 81  0.06630897231111525
-1770 81  9.910899138700378
-82 82  154.3592785305395
-3138 82  0.004114811156393923
-83 83  0.005786507825914471
-1783 83  111.6726634543674
-84 84  0.0009684222506967275
-1786 84  637.526528274368
-85 85  351.8137876203253
-3166 85  0.00185850803805367
-86 86  375.2293748346238
-3168 86  0.0016984477759088
-87 87  9.704014056376309
-93 87  0.191132249227754
-1401 87  0.5188471304468957
-88 88  9.201424920484722
-94 88  0.188455821489309
-1455 88  0.5765945406640466
-89 89  9.825681234198756
-95 89  0.1928444447454806
-1456 89  0.5085834639940715
-90 90  8.948533141585585
-96 90  0.1839664735152792
-1469 90  0.6653230544623372
-91 91  10.10766013614647
-97 91  0.1930767719574465
-1470 91  0.4974529583688729
-92 92  9.284386162301578
-98 92  0.188272051984764
-1506 92  0.5674518919590757
-93 93  2.047051335767824
-94 94  2.067610228210245
-95 95  2.048523350808737
-96 96  2.055941963685915
-97 97  2.03082325966889
-98 98  2.065517700723633
-99 99  23193.12282089102
-299 99  3.290947783257571e-05
-100 100  290.0726879116941
-1299 100  0.002413822826166146
-101 101  9850.593471872256
-301 101  7.669175073506544e-05
-102 102  305.4469733715024
-1303 102  0.002334975615736897
-103 103  9289.614985907097
-303 103  7.862767004737615e-05
-104 104  290.469173172231
-1306 104  0.002444212485053505
-105 105  22741.59251705795
-305 105  3.312742415530926e-05
-106 106  278.1022210270536
-1309 106  0.002446060487080032
-107 107  43597.72579341551
-307 107  1.85390023051578e-05
-108 108  265.9468096443994
-1313 108  0.002547216874358393
-109 109  43152.99093848162
-309 109  1.897510509495169e-05
-110 110  323.1812191159318
-1318 110  0.002159979334399842
-111 111  300.2822908958456
-1319 111  0.002343469592596242
-112 112  8964.772792986272
-312 112  6.912678178860192e-05
-113 113  8625.780071398047
-313 113  8.227547212772366e-05
-114 114  270.3728406575445
-1326 114  0.002514571520234397
-115 115  73148.51820798231
-1821 115  1.36295453496316e-05
-116 116  310.0130973953905
-1327 116  0.002351560480566124
-117 117  314.6579392372382
-1328 117  0.002323346228192606
-118 118  309.9421434232158
-1329 118  0.00233327829315433
-119 119  291.7156499391643
-1330 119  0.002498796266540171
-120 120  25443.3699441756
-320 120  3.27326552556781e-05
-121 121  308.720038531219
-1333 121  0.002315859228091882
-122 122  304.7098242048223
-1334 122  0.002331553298144831
-123 123  9400.334047263112
-323 123  7.851640521308691e-05
-124 124  310.0960936432602
-1337 124  0.002317790327734316
-125 125  23954.74482606583
-325 125  3.083171649862372e-05
-126 126  298.8163876640679
-1340 126  0.002195503166803161
-127 127  8825.44218799768
-327 127  7.861266506825708e-05
-128 128  265.1893264657563
-1343 128  0.002549007835364769
-129 129  23381.6913657311
-329 129  3.651865383153894e-05
-130 130  314.8697550604812
-1346 130  0.002311132186057771
-131 131  287.8240827954845
-1347 131  0.002512265623002052
-132 132  9172.962949267778
-332 132  7.556051646964701e-05
-133 133  277.4265315485119
-1350 133  0.002461879610445951
-134 134  21877.12356216064
-1846 134  3.850335510777407e-05
-135 135  295.9779339618702
-1353 135  0.002492200397856114
-136 136  273.4715243762794
-1354 136  0.002590053989662534
-137 137  11004.89040689185
-337 137  6.694842994227576e-05
-138 138  303.8886375499986
-1357 138  0.002281699642120062
-139 139  9910.180342092066
-339 139  6.668642108710151e-05
-140 140  2507.130654149849
-1854 140  0.0002425793965424298
-141 141  2379.264038858459
-1366 141  0.0002624552599551592
-142 142  2552.34477962965
-1367 142  0.0002482857056241695
-143 143  21740.78205992
-343 143  3.100849114586814e-05
-144 144  8484.697120711497
-344 144  7.880071041324561e-05
-145 145  2574.29059921975
-345 145  0.0002319713008317471
-146 146  87898.23892340138
-1307 146  1.03968750016697e-05
-147 147  781.3813205000025
-1376 147  0.0009765914152797697
-148 148  789.1705379670939
-1377 148  0.0009675053188040424
-149 149  760.5050421247049
-1378 149  0.0009896578163032042
-150 150  777.0578480793231
-1379 150  0.0009881347343625977
-151 151  796.5132149928559
-1380 151  0.0009219840688063665
-152 152  9186.727904847106
-352 152  6.748223267049977e-05
-153 153  68048.43940125236
-1874 153  1.096721977986664e-05
-154 154  2440.961303257629
-1388 154  0.0002765958107932558
-155 155  2506.64730670822
-1389 155  0.0002689427432129922
-156 156  9009.68081380378
-356 156  7.678999350825993e-05
-157 157  2369.480857196066
-357 157  0.0002662782114042295
-158 158  2485.515788576279
-1885 158  0.0002479660168346599
-159 159  2482.852251377447
-1372 159  0.0002585780513565103
-160 160  3083.195058496362
-1887 160  0.0002133964918098159
-161 161  3154.039974048575
-1400 161  0.0002015674054957784
-162 162  7998.419229928728
-362 162  8.489615229574495e-05
-163 163  163.7095819334633
-1401 163  0.004275107728950991
-164 164  157104.7967367877
-1409 164  4.587849448195336e-06
-6 165  0.0006376528809592441
-165 165  1135.700901010377
-166 166  1303.177541053115
-1403 166  0.0005334869199648178
-167 167  1381.206266334915
-1404 167  0.00050533641056909
-168 168  1319.196799327528
-1405 168  0.0005289936433977529
-169 169  1389.673876486326
-1406 169  0.0005052051026997433
-170 170  1396.329498175133
-1407 170  0.000504438897809328
-171 171  1329.329121500652
-1408 171  0.0005374455412895908
-172 172  2482.644118049607
-372 172  0.0002525862906039789
-173 173  78364.67290626559
-1903 173  9.934430498701247e-06
-174 174  563.3129342535091
-1411 174  0.001278228660117016
-175 175  467.4796134729326
-1412 175  0.001575520226438138
-176 176  676.6262903819455
-1413 176  0.00103799158743649
-177 177  658.4219150969093
-1414 177  0.00106627247492575
-178 178  2386.256089199208
-378 178  0.000261841749692465
-179 179  10122.72114522154
-379 179  6.472756195288161e-05
-180 180  2408.264399844404
-1421 180  0.0002575738198962803
-181 181  9016.072947913897
-381 181  7.772147814560617e-05
-182 182  2384.130599354269
-1855 182  0.0002699459866081826
-183 183  2930.784068412102
-1848 183  0.000230584620055753
-184 184  24854.53469433011
-384 184  3.113361531407949e-05
-185 185  2713.558123173743
-1920 185  0.0002117592014781903
-186 186  22384.9703722429
-386 186  3.083953604217996e-05
-187 187  37591.99635748944
-1928 187  2.169737691949768e-05
-188 188  480.2866720305411
-1434 188  0.001643729524906235
-189 189  598.8125130004696
-1435 189  0.001272395619536466
-190 190  615.6669758246234
-1436 190  0.001241043851549547
-191 191  19907.32657900824
-391 191  3.437890099741047e-05
-192 192  1229.495717041241
-1439 192  0.0005900815905843416
-193 193  2394.224094694279
-1390 193  0.0002533575753207606
-194 194  8542.19478243563
-394 194  7.547399481258875e-05
-195 195  2537.716111849562
-1444 195  0.0002452042454861101
-196 196  2584.710838446063
-1446 196  0.0002328687727934642
-197 197  2394.105306246051
-1447 197  0.0002595392277337602
-198 198  38203.98017612936
-398 198  1.924427533902808e-05
-199 199  43223.66169545515
-399 199  1.845055124298193e-05
-200 200  57524.40780656567
-1370 200  1.271450383568884e-05
-201 201  21031.32553155841
-401 201  3.429173053633755e-05
-202 202  153.4775821430539
-1455 202  0.004704415141495417
-203 203  167.6097055627493
-1456 203  0.004200021129835351
-204 204  2219.850347789275
-1948 204  0.0002910577290827514
-205 205  9415.284820832747
-405 205  7.220011983011933e-05
-206 206  2652.181667918236
-1415 206  0.000224096727052125
-207 207  2337.450062338238
-407 207  0.000290626767416135
-208 208  3040.129956153691
-1451 208  0.0002284567159346352
-209 209  2879.971182899386
-1956 209  0.0002221752697164552
-210 210  34863.87531051019
-410 210  2.138755265132116e-05
-211 211  20672.93143116704
-411 211  3.369714148784347e-05
-212 212  144.982487679953
-1469 212  0.005390424631237284
-213 213  171.8206052909465
-1470 213  0.00413020803099228
-214 214  2589.968651060607
-1964 214  0.0002345475086669507
-215 215  22086.99006573134
-415 215  3.152944354031921e-05
-216 216  9185.197575362357
-416 216  7.021648346571437e-05
-217 217  2521.657080892551
-1790 217  0.0002629900627038044
-218 218  2664.335906809927
-1915 218  0.0002593440111466169
-219 219  20027.99610704446
-419 219  3.244177482910733e-05
-220 220  8386.236525932127
-420 220  7.768080563995795e-05
-221 221  91337.19777237749
-1973 221  7.320703537932268e-06
-222 222  38627.23804660944
-422 222  1.858157080718637e-05
-223 223  1700.663563867754
-1484 223  0.000414588079576942
-224 224  1718.371127510188
-1485 224  0.0004090754753228922
-225 225  1817.500268542915
-1486 225  0.0003948065156009082
-226 226  44742.81279976114
-426 226  1.605819002986071e-05
-227 227  2317.164855676876
-1951 227  0.0002744085827181243
-228 228  2024.959772953592
-1492 228  0.000315555720187749
-229 229  2638.330001101275
-429 229  0.0002451618647443908
-230 230  2832.44595088773
-1965 230  0.0002235717635631153
-231 231  106218.309786511
-1981 231  6.355058706178921e-06
-232 232  40059.51720243262
-432 232  1.761027469906141e-05
-233 233  2051.813809527901
-1494 233  0.0003339577621497702
-234 234  2104.19031995182
-1495 234  0.0003263861015757376
-235 235  2548.612960129405
-1842 235  0.000261318627407984
-236 236  2308.294366544839
-1500 236  0.0002593250911130165
-237 237  2294.835695970653
-1883 237  0.0002680207291845583
-238 238  2395.306423284108
-1835 238  0.0002673059275966173
-239 239  25327.72202184154
-439 239  3.086294218884017e-05
-240 240  2807.534365036791
-1427 240  0.0002328109278869417
-241 241  2303.025653362471
-1908 241  0.0002641904594502731
-242 242  2301.595341465212
-1993 242  0.0002683021695836888
-243 243  2562.526180215788
-1504 243  0.0002456464353693151
-244 244  7658.260646304968
-444 244  8.679696529177102e-05
-245 245  153.9556113409648
-1506 245  0.004650627025322416
-246 246  2491.562485472203
-1991 246  0.0002662216516774879
-247 247  83873.8237765977
-1815 247  7.51138059276294e-06
-248 248  21481.12879436501
-448 248  3.004103046734714e-05
-249 249  586.6024214798779
-1509 249  0.001207340810307569
-250 250  1482.582038785604
-1510 250  0.0004614895303912071
-251 251  1475.017635071814
-1511 251  0.0004630428688442491
-252 252  1532.019499295367
-1512 252  0.0004532491959284241
-253 253  1535.865409908915
-1513 253  0.0004521454893938125
-254 254  11134.07335309051
-454 254  5.598515203854433e-05
-255 255  2295.970922359987
-1906 255  0.0002533575753207605
-256 256  23914.50805406844
-456 256  2.985054127319469e-05
-257 257  11182.38464704362
-1397 257  6.412379987319553e-05
-258 258  11880.39612994923
-2007 258  5.25703542360426e-05
-259 259  778.3811818836432
-1517 259  0.0009048698354838305
-260 260  2412.3828459666
-460 260  0.0002540711174143024
-261 261  233043.5749752625
-1851 261  4.246187106963387e-06
-262 262  689.9800290524397
-1519 262  0.001114727656952293
-263 263  664.2166269024117
-1520 263  0.001101692355154358
-264 264  628.9767263176474
-1521 264  0.001203469082965028
-265 265  646.4625401185438
-1522 265  0.001189571893627759
-266 266  618.291818361393
-1523 266  0.001235207912293277
-267 267  624.2772372691983
-1524 267  0.001213907246388981
-268 268  617.8674156238189
-1525 268  0.001248064342159848
-269 269  2490.198709830362
-469 269  0.0002455808494513976
-270 270  2570.720972896223
-2019 270  0.0002583346492311023
-271 271  2391.932246718013
-1532 271  0.0002662424078920341
-272 272  7749.826281920761
-472 272  8.403976013422934e-05
-273 273  2693.891793716426
-1957 273  0.0002445586324395708
-274 274  2578.319392087782
-1432 274  0.0002479871234677247
-275 275  9156.784283533159
-475 275  7.669511520069878e-05
-276 276  9203.785525373036
-476 276  6.873824789383105e-05
-277 277  8168.255838690445
-477 277  7.621293902825032e-05
-278 278  21521.12326017982
-478 278  3.384974683660592e-05
-279 279  30936.47634157103
-479 279  2.096412860650296e-05
-280 280  612.1568885398599
-1537 280  0.001117288914086055
-281 281  664.4128813426657
-1538 281  0.001054854527421347
-282 282  3083.001466197481
-1890 282  0.0002196191015981265
-283 283  2483.606073932579
-1514 283  0.0002467932626555914
-284 284  9133.850676003813
-484 284  7.450142895904204e-05
-285 285  21668.40845161683
-485 285  2.829020688581102e-05
-286 286  19306.73977728891
-486 286  3.417957292668527e-05
-287 287  203.8704711144181
-1541 287  0.003283370210303194
-288 288  2528.187727514318
-2017 288  0.0002518348855609722
-289 289  2658.051322543227
-1930 289  0.0002411624189112242
-290 290  9400.933831849899
-490 290  7.539218264852078e-05
-291 291  68612.08734514077
-1992 291  1.200272275159141e-05
-292 292  9362.181704426159
-492 292  6.96324664263306e-05
-293 293  49168.95055798243
-493 293  1.522298845113386e-05
-294 294  742.2463382517823
-1542 294  0.001033902935611
-295 295  687.7795084913223
-1543 295  0.001080160651772333
-296 296  2741.147387294496
-1969 296  0.0002274052897093193
-297 297  8322.969007704822
-497 297  8.191795508733727e-05
-298 298  2587.207807507694
-498 298  0.0002440296582495396
-299 299 -0.0006120966867165351
-2775 299  1145.85221095573
-300 300  14.33112650433899
-1299 300 -0.0448953457699415
-301 301 -0.001427149194237635
-2779 301  486.4128232147492
-302 302  15.08269212747152
-1303 302 -0.04345125747104736
-303 303 -0.00146533094654159
-2783 303  458.0163772397145
-304 304  14.32144407378891
-1306 304 -0.04555077314529363
-305 305 -0.000615931807560431
-2787 305  1123.954222596566
-306 306  13.74494336328175
-1309 306 -0.04547802485000561
-307 307 -0.0003447400910227232
-2791 307  2154.420725520893
-308 308  13.14252906164904
-1313 308 -0.04736458693151107
-309 309 -0.0003535062191017872
-2795 309  2128.364763809643
-310 310  15.93975248024778
-1318 310 -0.04024041627283683
-311 311  14.81037799760247
-1319 311 -0.04365875661862523
-312 312 -0.001288430244282178
-2801 312  441.9452029941102
-313 313 -0.001533427005586317
-2803 313  425.2540655909152
-314 314  13.32996201180178
-1326 314 -0.04686420874278824
-315 315  3612.481439844189
-1821 315 -0.0002535975426531655
-316 316  15.3101703230605
-1327 316 -0.04375416605988374
-317 317  15.53956096680326
-1328 317 -0.04322919930108315
-318 318  15.30669017625731
-1329 318 -0.04341399965950225
-319 319  14.40660969832466
-1330 319 -0.04649354450491961
-320 320 -0.0006096456996633435
-2817 320  1255.250651690767
-321 321  15.23074061795551
-1333 321 -0.04313281610466401
-322 322  15.03291787836028
-1334 322 -0.04342507632571436
-323 323 -0.001463440545769149
-2823 323  463.4166747265667
-324 324  15.28710372824584
-1337 324 -0.04320045332535793
-325 325 -0.0005742502993288677
-2827 325  1181.787998451734
-326 326  14.74189587610842
-1340 326 -0.04089185063854536
-327 327 -0.001462067874606355
-2831 327  436.0436830309747
-328 328  13.1032663932118
-1343 328 -0.04740439876751025
-329 329 -0.0006801292745497548
-2835 329  1153.588991098119
-330 330  15.5348069104894
-1346 330 -0.04304284573387514
-331 331  14.20051396744854
-1347 331 -0.04678851497708159
-332 332 -0.001406493806589446
-2841 332  452.8201087433384
-333 333  13.69530353316471
-1350 333 -0.04582496681635583
-334 334  1079.583685111797
-1846 334 -0.0007169453104404059
-335 335  14.60584817969457
-1353 335 -0.04640543643374314
-336 336  13.49566798447774
-1354 336 -0.04822607065271291
-337 337 -0.001245392720896029
-2851 337  543.6090676882716
-338 338  15.01121332028321
-1357 338 -0.04244471017584212
-339 339 -0.001242146227443936
-2855 339  488.8730191412681
-340 340  123.8192369631256
-1854 340 -0.004513431075080159
-341 341  117.4479842729037
-1366 341 -0.004885512119259889
-342 342  126.0740460466629
-1367 342 -0.004618836123996159
-343 343 -0.0005784813977331862
-2863 343  1070.773655876053
-344 344 -0.001470189001474494
-2865 344  417.8531720688395
-345 345 -0.004319878341018758
-2867 345  127.0201063189659
-346 346  4334.861182260374
-1307 346 -0.0001937112443097265
-347 347  38.53524499250479
-1376 347 -0.01819554706699653
-348 348  38.91935327325632
-1377 348 -0.01802624915195298
-349 349  37.50569193038142
-1378 349 -0.01843899618102551
-350 350  38.32199451486851
-1379 350 -0.01841060981384306
-351 351  39.28131634729812
-1380 351 -0.01717818552570617
-352 352 -0.001256535981144061
-2881 352  453.3457682171266
-353 353  3353.205891530642
-1874 353 -0.0002044990454309726
-354 354  120.2708188977464
-1388 354 -0.005158007404572995
-355 355  123.795920825142
-1389 355 -0.005003923514694866
-356 356 -0.001430404585463677
-2889 356  444.4319572952037
-357 357 -0.004953175864922998
-2891 357  117.050161681528
-358 358  122.749429900013
-1885 358 -0.004613733705409966
-359 359  122.3191147394357
-1372 359 -0.004822604611468701
-360 360  152.2888762275243
-1887 360 -0.003969961874120492
-361 361  155.6158716196742
-1400 361 -0.003753931548661254
-362 362 -0.001582798352723314
-2901 362  394.1893332761348
-363 363  8.066996571152382
-1401 363 -0.07971634771599438
-364 364  7750.68019761282
-1409 364 -8.544969360670532e-05
-6 365 -0.01187757040352815
-365 365  56.02367519001429
-366 366  64.28796934260804
-1403 366 -0.009936857146071412
-367 367  68.13825085685956
-1404 367 -0.009412379775864633
-368 368  65.07816511953543
-1405 368 -0.009853159585355355
-369 369  68.55629934823999
-1406 369 -0.009409866734849158
-370 370  68.88470455447471
-1407 370 -0.0093956000093805
-371 371  65.57800954652211
-1408 371 -0.01001058661641098
-372 372 -0.004704700041595145
-2921 372  122.4736820878048
-373 373  3871.005158529259
-1903 373 -0.0001848018496588619
-374 374  27.82616320379587
-1411 374 -0.02377781766418163
-375 375  23.09224867559405
-1412 375 -0.02930808378764303
-376 376  33.42354453953602
-1413 376 -0.01930888852136976
-377 377  32.52426540054705
-1414 377 -0.01983496473725442
-378 378 -0.004879689278140042
-2933 378  117.6539243249208
-379 379 -0.001206424923433159
-2935 379  499.0316104781022
-380 380  118.7306874076851
-1421 380 -0.00480049537954443
-381 381 -0.001447510061668577
-2939 381  444.8239842539966
-382 382  117.6379726154468
-1855 382 -0.005027041570114009
-383 383  144.7685885662768
-1848 383 -0.004289483096781133
-384 384 -0.0005788988446378421
-2945 384  1228.301080704984
-385 385  133.9324557241578
-1920 385 -0.003942330393462493
-386 386 -0.0005740898096234878
-2949 386  1104.950980990364
-387 387  1852.66693787202
-1928 387 -0.0004045233295404804
-388 388  23.6699605173663
-1434 388 -0.03064584960083465
-389 389  29.51160995031188
-1435 389 -0.02372239346799022
-390 390  30.34228698549593
-1436 390 -0.02313788584603573
-391 391 -0.0006403700785130899
-2959 391  982.0310227902145
-392 392  60.64916344347138
-1439 392 -0.01099169507116858
-393 393  118.0487075627654
-1390 393 -0.004721519459519589
-394 394 -0.001408304602554578
-2965 394  420.6283889007589
-395 395  125.3552192945955
-1444 395 -0.004561366275467525
-396 396  127.6821449462198
-1446 396 -0.004331722370505195
-397 397  118.2703291496153
-1447 397 -0.004827687097720198
-398 398 -0.000358519151350955
-2973 398  1884.280380407555
-399 399 -0.0003430998893339208
-2975 399  2135.905795131353
-400 400  2834.129295273892
-1370 400 -0.000237119492924652
-401 401 -0.0006394846353445878
-2979 401  1036.242572144714
-402 402  7.559163235294251
-1455 402 -0.08776236008052875
-403 403  8.256040854579432
-1456 403 -0.07834503334779185
-404 404  109.4230691419269
-1948 404 -0.005425436839739358
-405 405 -0.00134475543576407
-2987 405  464.4929993867451
-406 406  131.0344081076233
-1415 406 -0.004167960315020639
-407 407 -0.005408777291702704
-2991 407  115.4088526547606
-408 408  150.2295479904903
-1451 408 -0.004248277560160953
-409 409  142.0687337544492
-1956 409 -0.004138430725600249
-410 410 -0.0003986119327895535
-2997 410  1718.816210617059
-411 411 -0.0006277791128440753
-2999 411  1019.6146170834
-412 412  7.146967608853739
-1469 412 -0.1004752481225429
-413 413  8.470410604423941
-1470 413 -0.07698122934126954
-414 414  127.9283806899807
-1964 414 -0.004363396788473575
-415 415 -0.0005865256311667257
-3007 415  1091.019362262835
-416 416 -0.00130599598075002
-3009 416  453.7901227575161
-417 417  124.5983796929271
-1790 417 -0.004890840474051387
-418 418  131.597258047072
-1915 418 -0.004824851841231687
-419 419 -0.0006055799728042041
-3015 419  985.8145903653975
-420 420 -0.001449863056352223
-3017 420  412.8368018348012
-421 421  4510.455970709824
-1973 421 -0.0001362209906944794
-422 422 -0.0003457132849898687
-3021 422  1907.769973018723
-423 423  83.95767739418511
-1484 423 -0.007716759589340684
-424 424  84.84694283917756
-1485 424 -0.007612824341018029
-425 425  89.74493701233533
-1486 425 -0.007347018696585064
-426 426 -0.0002992883659006699
-3029 426  2205.835009863771
-427 427  114.2682184392981
-1951 427 -0.005113009743909019
-428 428  99.85370657593273
-1492 428 -0.005879952690476989
-429 429 -0.004558675471144818
-3035 429  130.3812078382545
-430 430  139.9007887244113
-1965 430 -0.004159338793200366
-431 431  5244.69070994502
-1981 431 -0.0001182663255270315
-432 432 -0.000327730907480755
-3041 432  1977.952393739212
-433 433  101.3077721653154
-1494 433 -0.006215092193261671
-434 434  103.893900809328
-1495 434 -0.00607418045573903
-435 435  125.8316873006155
-1842 435 -0.004863454220574115
-436 436  113.8738312144944
-1500 436 -0.00483017161704053
-437 437  113.3300593831867
-1883 437 -0.004986977865485246
-438 438  118.3198838349706
-1835 438 -0.004972539521710323
-439 439 -0.0005739031613858375
-3055 439  1251.603713614919
-440 440  138.7507706737437
-1427 440 -0.004328790316093996
-441 441  113.5082552388623
-1908 441 -0.004925256200277052
-442 442  113.6139724875826
-1993 442 -0.00499437409528073
-443 443  126.5263177181465
-1504 443 -0.004571499010522023
-444 444 -0.001617281983965069
-3065 444  377.6554482136941
-445 445  7.589203851228609
-1506 445 -0.08668664125278241
-446 446  123.1635071742419
-1991 446 -0.004948899205940827
-447 447  4139.093293161161
-1815 447 -0.0001398607764914253
-448 448 -0.0005593298618830121
-3073 448  1060.132899126409
-449 449  28.94168285332436
-1509 449 -0.02248546792889855
-450 450  73.15845287950951
-1510 450 -0.008593508936690492
-451 451  72.78051062490582
-1511 451 -0.008622952094502315
-452 452  75.59799840548723
-1512 452 -0.008440059637270876
-453 453  75.78778823356824
-1513 453 -0.008419507258893049
-454 454 -0.001041762786317972
-3085 454  549.821224067692
-455 455  113.3815108355836
-1906 455 -0.004714351957937914
-456 456 -0.0005559936336606194
-3089 456  1179.762350764617
-457 457  552.3282883247791
-1397 457 -0.001192950010299268
-458 458  586.6729969649459
-2007 458 -0.0009782249711306297
-459 459  38.43767394981833
-1517 459 -0.01683773791286734
-460 460 -0.004728474636470728
-3097 460  119.1082915537506
-461 461  11495.65556624044
-1851 461 -7.909564314661077e-05
-462 462  34.03558160578945
-1519 462 -0.02076453503788485
-463 463  32.76456805081531
-1520 463 -0.02052180865954734
-464 464  31.02636275331026
-1521 464 -0.02241755093136628
-465 465  31.88893513368465
-1522 465 -0.02215869240460923
-466 466  30.49931855865703
-1523 466 -0.0230087751155384
-467 467  30.79457400417442
-1524 467 -0.02261199800075029
-468 468  30.47835926520514
-1525 468 -0.02324824654993116
-469 469 -0.004574455857323951
-3115 469  122.8400429483149
-470 470  126.8059434849797
-2019 470 -0.004812246867113213
-471 471  117.9689774903674
-1532 471 -0.004960285432200295
-472 472 -0.001566447111419446
-3121 472  382.0353592378098
-473 473  132.8819084313431
-1957 473 -0.004555614778039736
-474 474  127.2897949754821
-1432 474 -0.004615638198090777
-475 475 -0.00142856435158415
-3127 475  451.7109769126051
-476 476 -0.001281196436983179
-3129 476  453.7247226949373
-477 477 -0.001420343791668607
-3131 477  402.7241166015079
-478 478 -0.0006305375141651542
-3133 478  1061.59534472576
-479 479 -0.0003901485068014212
-3135 479  1527.492235015241
-480 480  30.21444112408203
-1537 480 -0.02080039878504665
-481 481  32.79787467315564
-1538 481 -0.0196355765396314
-482 482  152.0989067139187
-1890 482 -0.004090434994215512
-483 483  122.6147078326719
-1514 483 -0.004593389096329082
-484 484 -0.001386907437178993
-3145 484  450.8463856651576
-485 485 -0.0005283436953610114
-3147 485  1066.01543186999
-486 486 -0.000638010057411905
-3149 486  950.3229195310635
-487 487  10.02660338095689
-1541 487 -0.06133865450778468
-488 488  124.731975883077
-2017 488 -0.004690277097936834
-489 489  131.1365835736932
-1930 489 -0.004491600758461845
-490 490 -0.001404078290862924
-3157 490  463.8294545379384
-491 491  3390.317628862236
-1992 491 -0.0002232081554610602
-492 492 -0.001296142399366137
-3161 492  462.1630304839521
-493 493 -0.0002836445861316432
-3163 493  2424.730464002316
-494 494  36.60324381825318
-1542 494 -0.01926436789258804
-495 495  33.91730205023136
-1543 495 -0.02012629165347246
-496 496  135.43417876922
-1969 496 -0.004229346815530425
-497 497 -0.001526302574451327
-3171 497  410.4526076657893
-498 498 -0.004539146102095633
-3173 498  127.8105814429097
-499 499  26323.14081830639
-699 499  2.969991746869057e-05
-500 500  332.6531461361987
-1544 500  0.002140785332416097
-501 501  12604.81812642821
-1550 501  6.257938167652743e-05
-502 502  350.3843414256796
-1548 502  0.002070345414517271
-503 503  11486.315472858
-703 503  6.598718961816696e-05
-504 504  332.8493374117932
-1551 504  0.002169195438705725
-505 505  28019.26820123457
-705 505  2.796488776981597e-05
-506 506  318.8795873653166
-1554 506  0.002169609219361102
-507 507  54799.79442360796
-707 507  1.542323564781866e-05
-508 508  304.9976276535647
-1558 508  0.002258865500111695
-509 509  54003.50361334153
-709 509  1.584972253664808e-05
-510 510  368.9056527147304
-1563 510  0.001923496708434376
-511 511  344.0225308872387
-1564 511  0.002080184625715104
-512 512  12240.66072400652
-712 512  5.363623218723445e-05
-513 513  11369.8888764044
-713 513  6.526480001887723e-05
-514 514  309.7875821354581
-1571 514  0.002231758090236919
-515 515  94787.22605972025
-2066 515  1.108814355667258e-05
-516 516  355.6555275880388
-1572 516  0.002084868727865555
-517 517  360.6022900582442
-1573 517  0.00206178424314167
-518 518  355.4337714610726
-1574 518  0.002069382114290299
-519 519  334.4517329597904
-1575 519  0.00221663957128552
-520 520  32832.4667624466
-1580 520  2.660882184061133e-05
-521 521  353.9194869404473
-1578 521  0.002054509126465163
-522 522  349.355465849854
-1579 522  0.00206825652931222
-523 523  12627.87425316374
-1583 523  6.153966619548327e-05
-524 524  354.8765898635783
-1582 524  0.002059372058053505
-525 525  31988.57268108342
-725 525  2.43735293056645e-05
-526 526  342.219524224546
-1585 526  0.001949465632114935
-527 527  10832.87966443122
-727 527  6.633863604418218e-05
-528 528  304.1009320395703
-1588 528  0.00226068439540678
-529 529  31702.22576392769
-2083 529  2.851329636555464e-05
-530 530  359.8809943352537
-1591 530  0.002055759561026759
-531 531  329.6921341161949
-1592 531  0.002230330410563787
-532 532  11450.97213019816
-1596 532  6.29130756763182e-05
-533 533  318.0042354608307
-1595 533  0.00218424136016743
-534 534  27709.67702660837
-2090 534  3.176046354497157e-05
-535 535  339.0527161384249
-1598 535  0.002212413586388379
-536 536  313.3476017870799
-1599 536  0.002298717079426249
-537 537  11176.18201130113
-737 537  6.612709803152977e-05
-538 538  348.4420904138382
-1602 538  0.002023889893301537
-539 539  12126.38580778946
-739 539  5.644326073895785e-05
-540 540  2958.127902589717
-2099 540  0.0002109624490132657
-541 541  2964.436629626178
-1610 541  0.0002180184147619238
-542 542  2996.739499442594
-1612 542  0.0002168279126185967
-543 543  29733.86082774269
-743 543  2.40913445823822e-05
-544 544  10811.18802228489
-744 544  6.434988345650326e-05
-545 545  3088.650140110554
-1620 545  0.0001989115751638596
-546 546  43357.2059155883
-746 546  1.820335133825889e-05
-547 547  449.6511458717897
-1621 547  0.001881561431239332
-548 548  442.4829323374114
-1622 548  0.001907467106720621
-549 549  437.9884858052015
-1623 549  0.001906610501894908
-550 550  435.6203755854508
-1624 550  0.001949643189531951
-48 551  0.0008441117886190319
-551 551  1062.514444456655
-552 552  11331.61710240373
-752 552  5.675680462998339e-05
-553 553  97348.34299694267
-1630 553  8.254916480290429e-06
-554 554  3296.824460547154
-1633 554  0.0002143832494831788
-555 555  3262.587208018637
-1634 555  0.0002152290952202552
-556 556  11460.05996713048
-756 556  6.296834363466511e-05
-557 557  3283.717094535645
-1637 557  0.000202070761556368
-558 558  2964.047779054717
-2130 558  0.0002137229690333978
-559 559  3225.320517392855
-1641 559  0.0002072120632641939
-560 560  2629.795751286473
-2133 560  0.0002442514875826953
-561 561  2504.422502672365
-1644 561  0.0002451718016401516
-562 562  11375.0987875235
-762 562  6.341211780697017e-05
-563 563  659.5010114325853
-1646 563  0.001094729169089652
-564 564  172853.3273411868
-2117 564  4.25003180262438e-06
-565 565  1299.65158737097
-1647 565  0.0006968247424079914
-50 566  0.0005043323519647758
-566 566  1769.533096935914
-567 567  1857.658592326778
-1649 567  0.0004814844967328351
-52 568  0.000498175840325201
-568 568  1796.9604238847
-53 569  0.0004883751107654518
-569 569  1840.063636745398
-54 570  0.0004874751788934755
-570 570  1849.375388733017
-55 571  0.0005051537934849839
-571 571  1814.027305005168
-572 572  3302.00751149913
-1655 572  0.0001984180754674663
-573 573  33030.38812900725
-773 573  1.97916651121529e-05
-56 574  0.0008873951493023308
-574 574  989.5349868962539
-57 575  0.001156885426737085
-575 575  774.1311082092107
-58 576  0.0006093535781038161
-576 576  1432.363937054055
-59 577  0.0006241449559768249
-577 577  1398.397543966203
-578 578  2838.252598474493
-1663 578  0.0002261787363921484
-579 579  11664.9498685661
-779 579  5.786973344046168e-05
-580 580  2783.642711654918
-1666 580  0.0002266107658869627
-581 581  10476.42856352934
-781 581  6.870255487037702e-05
-582 582  3094.131406781226
-1669 582  0.0002165795532411714
-583 583  2844.720693115951
-1670 583  0.000236616467506408
-584 584  29887.0903280384
-784 584  2.680443331181534e-05
-585 585  3272.63182646826
-2165 585  0.0001816346318888309
-586 586  28488.90946021381
-786 586  2.52662353434824e-05
-587 587  20901.34326837456
-787 587  3.479200106475429e-05
-60 588  0.00171173667699996
-588 588  535.6042576018839
-589 589  331.738044319503
-1680 589  0.002448783148342478
-590 590  336.6056337904003
-1681 590  0.002415983069022926
-591 591  22518.10747090007
-791 591  3.109764001755754e-05
-592 592  1423.42680018202
-1684 592  0.0006372419289340028
-593 593  2970.844960597701
-2172 593  0.0002111507457449779
-594 594  11199.52723236525
-794 594  6.036731193060422e-05
-595 595  3053.350515880293
-1689 595  0.0002097250549737993
-596 596  2965.11062180003
-1691 596  0.0002081199033744698
-597 597  2792.729347961497
-1690 597  0.000227180506082851
-598 598  50291.39569270392
-798 598  1.541997157145382e-05
-599 599  63409.56014851279
-1697 599  1.355929089197479e-05
-600 600  91095.81091672303
-2120 600  8.800095713229208e-06
-601 601  32225.37753384992
-801 601  2.42057119496486e-05
-602 602  615.9733663356598
-1700 602  0.001228820363981551
-603 603  711.6358822668429
-1701 603  0.001028630443507459
-604 604  3580.302855317203
-2194 604  0.0001942101919334908
-605 605  11490.44673910205
-805 605  6.122274390669451e-05
-606 606  2750.008657491529
-1660 606  0.0002174236966432122
-607 607  3451.539467902673
-2199 607  0.0002089999771376453
-608 608  3137.126787941659
-1696 608  0.0002225826697345396
-609 609  2854.888824240245
-809 609  0.0002239401818748291
-610 610  52092.02318409094
-810 610  1.553974479996341e-05
-611 611  26761.07923504168
-1717 611  2.716807484438463e-05
-612 612  420.7511285135843
-1714 612  0.001917255257562923
-613 613  650.3130828662307
-1715 613  0.001120828337875203
-614 614  2774.246907764889
-2209 614  0.0002241843674551126
-615 615  25664.64907877881
-815 615  2.762658094449223e-05
-616 616  10943.99031665452
-816 616  6.07496359260314e-05
-617 617  3049.229711901468
-2127 617  0.0002239249658143049
-618 618  2975.215640901863
-2160 618  0.000236317776079967
-619 619  29911.48422004574
-2215 619  2.356441270887905e-05
-620 620  10631.94204644858
-820 620  6.37038810105057e-05
-621 621  110546.9747757736
-1732 621  6.279195576451717e-06
-622 622  46653.65664652479
-822 622  1.580870884158368e-05
-64 623  0.000365824983912368
-623 623  2542.159037952807
-65 624  0.0003753799864904563
-624 624  2477.070603714911
-625 625  2410.630547995882
-1731 625  0.0003868371757116728
-626 626  55880.82357885133
-1716 626  1.347322955615387e-05
-627 627  2808.479972699512
-2196 627  0.000232641750217292
-628 628  4046.637575191848
-1704 628  0.0001758353255510753
-629 629  3435.556124411205
-1560 629  0.0001960485134623598
-630 630  2620.689705232281
-2101 630  0.0002387670275501264
-631 631  136545.9835998747
-2231 631  5.168868543624866e-06
-632 632  46117.51194107011
-832 632  1.571190151769112e-05
-67 633  0.000264159408233615
-633 633  3478.333289951324
-634 634  3610.601323354378
-1740 634  0.0002540803250229119
-635 635  3197.472226001639
-2087 635  0.000215751906695563
-636 636  2849.466688128075
-1745 636  0.0002173382328317587
-637 637  2974.094627408606
-2128 637  0.0002149933824167279
-638 638  2939.646723725627
-2080 638  0.0002247913073730503
-639 639  27700.17077445688
-839 639  2.871344757710019e-05
-640 640  2943.98374212603
-1672 640  0.0002236223011459347
-641 641  2906.631523203411
-2177 641  0.0002168929556922801
-642 642  2995.321954809203
-2227 642  0.000214433397151115
-643 643  3051.905781306283
-2240 643  0.0002118857100087749
-644 644  10513.04333965713
-844 644  6.668929891919444e-05
-645 645  614.5937399437895
-1751 645  0.001216300194894553
-646 646  2973.500696502688
-2047 646  0.0002291693448236669
-647 647  103359.3317103586
-2060 647  6.287606542843668e-06
-648 648  28018.13817086773
-848 648  2.413112609046681e-05
-649 649  1058.812258185703
-1754 649  0.000863574471068328
-70 650  0.0004260522383241782
-650 650  2107.405284921758
-71 651  0.0004160262001499919
-651 651  2147.1162080411
-72 652  0.0004138991064746598
-652 652  2190.370673597265
-73 653  0.0004130347731292516
-653 653  2194.259475979664
-654 654  14354.53982761835
-854 654  4.565100404794502e-05
-655 655  3345.895067779663
-2151 655  0.0001830830851796531
-656 656  26580.32075562833
-856 656  2.73993020604243e-05
-657 657  10680.735828132
-857 657  6.664285539255191e-05
-658 658  8883.729815115174
-858 658  6.691396143939732e-05
-74 659  0.0004190976423508806
-659 659  2144.644070458367
-660 660  2908.256690819458
-1748 660  0.0002170672928360428
-661 661  88977.29697900864
-1771 661  9.006559059406882e-06
-662 662  428.9764832710101
-1764 662  0.001934718172261428
-76 663  0.0006652306770844735
-663 663  1362.23750906171
-664 664  418.8706309843425
-1766 664  0.001967551209388498
-665 665  407.63375000375
-1767 665  0.002041158694041396
-666 666  420.4934429160988
-1768 666  0.00198259809923097
-667 667  415.8298032795154
-1769 667  0.001984600471653515
-668 668  411.4330820521244
-1770 668  0.002041914114836727
-669 669  2874.785159204738
-1772 669  0.0002175189436278616
-670 670  3116.016455969895
-2265 670  0.0002195538691403081
-671 671  3244.753587025468
-1776 671  0.0002056555622572706
-672 672  13059.49527603733
-872 672  5.456576583106384e-05
-673 673  2949.951202284087
-2267 673  0.0002265618216088804
-674 674  3054.687846803039
-1677 674  0.0002149306512683066
-675 675  11324.05490753369
-875 675  6.434492811419656e-05
-676 676  13071.5232311955
-2216 676  5.150654637627023e-05
-677 677  11785.53208636935
-877 677  5.617823458389736e-05
-678 678  32775.36602589959
-1781 678  2.404372801287854e-05
-679 679  39222.89284083748
-1784 679  1.717879208496453e-05
-82 680  0.0006055238470906594
-680 680  1503.758106070313
-681 681  1378.090071926837
-1783 681  0.00066417680046111
-682 682  2592.42319904756
-2185 682  0.000254445809205194
-683 683  2963.215504938536
-1750 683  0.0002125568346125282
-684 684  10644.19524912137
-884 684  6.56850785054022e-05
-685 685  30057.28923552878
-1727 685  2.157615448738946e-05
-686 686  27181.23035514305
-886 686  2.58604492209297e-05
-687 687  3640.877420444504
-1786 687  0.0002595670747658185
-688 688  3276.800741490974
-2145 688  0.0002022003039771082
-689 689  2678.597600156133
-2175 689  0.0002396620483604543
-690 690  12521.32779190394
-890 690  5.947442365670711e-05
-691 691  82671.96984328679
-2052 691  1.034576531382441e-05
-692 692  12142.75930045041
-892 692  5.61734866346371e-05
-693 693  33049.83938251837
-893 693  2.097430373839078e-05
-85 694  0.0005656688027113958
-694 694  1700.395238372509
-86 695  0.0005436438887964516
-695 695  1729.727558501482
-696 696  2843.862888880541
-2214 696  0.0002205299244709283
-697 697  13285.90212468715
-2271 697  5.567172963433427e-05
-698 698  3082.062352464869
-2179 698  0.0002104689560761738
-699 699 -0.0006211735898036456
-2776 699  1152.589192988617
-700 700  14.56563684762146
-1544 700 -0.04477438386005135
-701 701  551.9019057698251
-1550 701 -0.001308882018352955
-702 702  15.34156238358545
-1548 702 -0.04330235585374924
-703 703 -0.001380044009109575
-2784 703  502.9699823554473
-704 704  14.57507880584921
-1551 704 -0.04536606994919442
-705 705 -0.0005847977484658657
-2788 705  1227.046573530302
-706 706  13.96479510793963
-1554 706 -0.04537018025868264
-707 707 -0.0003225540370318406
-2792 707  2399.645244972722
-708 708  13.35583751483671
-1558 708 -0.04724019321521711
-709 709 -0.0003315309935129994
-2796 709  2364.357089468501
-710 710  16.15126512553604
-1563 710 -0.04023406550441066
-711 711  15.06184515529529
-1564 711 -0.04351148837855855
-712 712 -0.001121798254310583
-2802 712  535.9734926386114
-713 713 -0.001364992385095566
-2804 713  497.8514378756086
-714 714  13.56479446285482
-1571 714 -0.04667596022758146
-715 715  4151.705299169505
-2066 715 -0.0002318355913094673
-716 716  15.57781371370519
-1572 716 -0.04359131642342973
-717 717  15.79445420648206
-1573 717 -0.04310861514976851
-718 718  15.5680941408988
-1574 718 -0.04326749513063304
-719 719  14.64909639136468
-1575 719 -0.04634637060467815
-720 720  1437.359893683087
-1580 720 -0.0005566159660873793
-721 721  15.49412406489104
-1578 721 -0.04297719715336602
-722 722  15.29430534730055
-1579 722 -0.04326473059133334
-723 723  553.015642283468
-1583 723 -0.001286901060450809
-724 724  15.5412034071959
-1582 724 -0.04306501782502264
-725 725 -0.0005098568126613753
-2828 725  1400.415076739031
-726 726  14.98190900890695
-1585 726 -0.04077980858523558
-727 727 -0.001387501933997578
-2832 727  474.3193833460493
-728 728  13.31541318278743
-1588 728 -0.0472821775742908
-729 729  1389.181066223353
-2083 729 -0.000595912541544476
-730 730  15.76987607017677
-1591 730 -0.04296426793696614
-731 731  14.44700532192974
-1592 731 -0.04661257133778172
-732 732  501.3474841985601
-1596 732 -0.00131594329597764
-733 733  13.92297779091816
-1595 733 -0.0456871850050998
-734 734  1214.218844112899
-2090 734 -0.0006637811766721075
-735 735  14.85707135949721
-1598 735 -0.04623851729751089
-736 736  13.73083631456164
-1599 736 -0.0480417201738854
-737 737 -0.001382881004978853
-2852 737  489.423527146858
-738 738  15.2588658622997
-1602 738 -0.04232451806232144
-739 739 -0.001180351155434611
-2856 739  531.0418197270566
-740 740  129.5068808402499
-2099 740 -0.004412880081124752
-741 741  129.8684401467052
-1610 741 -0.004557562649114181
-742 742  131.2048029587701
-1612 742 -0.00453531126448616
-743 743 -0.000503749724503456
-2864 743  1302.252445734669
-744 744 -0.001345574214205537
-2866 744  473.4897090200385
-745 745  135.2081754370441
-1620 745 -0.004161190641349405
-746 746 -0.0003807362332910267
-2870 746  1898.376956390851
-747 747  19.68524035323859
-1621 747 -0.03935916995322433
-748 748  19.37192182386572
-1622 748 -0.03990008471308273
-749 749  19.17464001330935
-1623 749 -0.03988317432312981
-750 750  19.07146984270037
-1624 750 -0.04078229766318428
-48 751 -0.01766023403411908
-751 751  46.50815516450294
-752 752 -0.001187293521703485
-2882 752  496.0696673366953
-753 753  4264.002291737392
-1630 753 -0.0001725925911988493
-754 754  144.4062552135481
-1633 754 -0.004482285199688079
-755 755  142.856795105637
-1634 755 -0.004501500167327725
-756 756 -0.00131608856808379
-2890 756  502.1411266418864
-757 757  143.809453707777
-1637 757 -0.004225502698148476
-758 758  129.7645458351089
-2130 758 -0.004470671113542863
-759 759  141.2719850254936
-1641 759 -0.004332413344118552
-760 760  115.1377796124792
-2133 760 -0.005108986069369431
-761 761  109.6482832462316
-1644 761 -0.005128263093238606
-762 762 -0.001326090781396694
-2902 762  498.1384984193623
-763 763  28.88113510448416
-1646 763 -0.02289312059755941
-764 764  7566.404662091983
-2117 764 -8.891414554671908e-05
-765 765  56.88792768395655
-1647 765 -0.01457874205479662
-50 766 -0.01055148350464198
-766 766  77.45545008797657
-767 767  81.31286689729322
-1649 767 -0.01007346783371174
-52 768 -0.01042267889641481
-768 768  78.65599046567311
-53 769 -0.01021762623013111
-769 769  80.54263880602282
-54 770 -0.0101988030095205
-770 770  80.95030682650238
-55 771 -0.01056867448637758
-771 771  79.4031123301624
-772 772  144.589663327655
-1655 772 -0.004149701089090458
-773 773 -0.0004140519179016492
-2924 773  1445.879548014593
-56 774 -0.018565803370506
-774 774  43.31359687203814
-57 775 -0.02420400608865609
-775 775  33.88498745631753
-58 776 -0.0127487047039688
-776 776  62.69699251650451
-59 777 -0.01305815905236744
-777 777  61.21015815931671
-778 778  124.3135266004198
-1663 778 -0.004729137568385933
-779 779 -0.001210195935425439
-2936 779  510.8281122598189
-780 780  121.8893458280569
-1666 780 -0.004739384227582458
-781 781 -0.001436061405957997
-2940 781  459.0023997967518
-782 782  135.5577798856157
-1669 782 -0.004527229250447725
-783 783  124.5850880373814
-1670 783 -0.004947831503020766
-784 784 -0.0005605482647067403
-2946 784  1308.79910849815
-785 785  143.266796655959
-2165 785 -0.003799635403101938
-786 786 -0.0005285298443190662
-2950 786  1247.20819111549
-787 787 -0.0007273659114068365
-2952 787  915.587302032126
-60 788 -0.03581192339833519
-788 788  23.44461437890571
-789 789  14.52964354834627
-1680 789 -0.05120231699174218
-790 790  14.7428495800686
-1681 790 -0.05051641835691328
-791 791 -0.0006505150403953535
-2960 791  985.8131194556391
-792 792  62.30582574921108
-1684 792 -0.01333216968778421
-793 793  130.1563347695763
-2172 793 -0.004413757630275583
-794 794 -0.001262348984622675
-2966 794  490.4766578141998
-795 795  133.6846020547174
-1689 795 -0.0043867077924628
-796 796  129.8198791801861
-1691 796 -0.004353179353834643
-797 797  122.2740480150363
-1690 797 -0.004751796082881135
-798 798 -0.0003224467177012544
-2974 798  2202.499341692987
-799 799  2776.772045297668
-1697 799 -0.0002835617900106833
-800 800  3990.209933402476
-2120 800 -0.0001839880046561397
-801 801 -0.0005060858793490622
-2980 801  1411.530208613113
-802 802  26.98155943533092
-1700 802 -0.0256910783553563
-803 803  31.17168259281084
-1701 803 -0.02150584016679073
-804 804  156.8302265818825
-2194 804 -0.004060328656766309
-805 805 -0.001279590719005279
-2988 805  503.478550922922
-806 806  120.3926483247221
-1660 806 -0.004548139563342544
-807 807  151.1811060822011
-2199 807 -0.004369780664630857
-808 808  137.3766223350889
-1696 808 -0.004654857896170716
-809 809 -0.0046832586311171
-2996 809  125.0168786688164
-810 810 -0.0003248647658803822
-2998 810  2281.980553329121
-811 811  1172.468255921368
-1717 811 -0.0005678888436367579
-812 812  18.43172505182493
-1714 812 -0.04008099176668933
-813 813  28.4881341010781
-1715 813 -0.02343137807364655
-814 814  121.4755450558781
-2209 814 -0.004688747582473832
-815 815 -0.000577763345796909
-3008 815  1123.849334069723
-816 816 -0.001270615822728721
-3010 816  479.1806255491858
-817 817  133.5395961246254
-2127 817 -0.004682511701431127
-818 818  130.3072183776819
-2160 818 -0.004941326446914052
-819 819  1309.674633660607
-2215 819 -0.0004928617663929631
-820 820 -0.001332244617993727
-3018 820  465.5750915799122
-821 821  4839.279007743422
-1732 821 -0.0001313598373684713
-822 822 -0.0003306921046308134
-3022 822  2042.444254109507
-64 823 -0.007653672082923691
-823 823  111.2745034744868
-65 824 -0.007853582503672928
-824 824  108.4255862884054
-825 825  105.5173052591423
-1731 825 -0.008093282368846372
-826 826  2447.881093558142
-1716 826 -0.0002816718078576912
-827 827  123.043364870394
-2196 827 -0.004862970717328313
-828 828  177.2603681257095
-1704 828 -0.003676111486126634
-829 829  150.4416465818894
-1560 829 -0.004100030848673585
-830 830  114.7959760832575
-2101 830 -0.004991865811779856
-831 831  5977.377151743068
-2231 831 -0.0001081325281897822
-832 832 -0.0003286879780215192
-3042 832  2018.846701667913
-67 833 -0.005526660401051663
-833 833  152.2525514057391
-834 834  158.0422991895653
-1740 834 -0.005315791513777863
-835 835  139.9973221612445
-2087 835 -0.004512686485115116
-836 836  124.7606843163849
-1745 836 -0.004545857556180593
-837 837  130.245884491113
-2128 837 -0.004495843450029012
-838 838  128.7044217733526
-2080 838 -0.004701903383526127
-839 839 -0.0006004851677575757
-3056 839  1213.001287054423
-840 840  128.9202187518812
-1672 840 -0.004676543895854219
-841 841  127.3181790992209
-2177 841 -0.004534651717942856
-842 842  131.1991130288449
-2227 842 -0.004483357193906658
-843 843  133.6224694089021
-2240 843 -0.004431863029243509
-844 844 -0.001393861160763459
-3066 844  460.6460436124097
-845 845  26.93006549536226
-1751 845 -0.02542108586614161
-846 846  130.2261602355345
-2047 846 -0.00479207219007637
-847 847  4524.581979034278
-2060 847 -0.0001315370479632399
-848 848 -0.0005048145130488554
-3074 848  1226.527167736951
-849 849  46.34605859323295
-1754 849 -0.01806741737098674
-70 850 -0.008913735735757969
-850 850  92.24479042998608
-71 851 -0.008703965704582829
-851 851  93.982822597683
-72 852 -0.008659467467314986
-852 852  95.87624315946061
-73 853 -0.008641380035250229
-853 853  96.04637156402802
-854 854 -0.0009550099367320633
-3086 854  628.3830407037481
-855 855  146.4684393667403
-2151 855 -0.003830083690845991
-856 856 -0.0005731648617574944
-3090 856  1163.621319673929
-857 857 -0.001393809984053777
-3092 857  467.6777711097528
-858 858 -0.001399882851998593
-3094 858  388.8759174266569
-74 859 -0.008768229606671188
-859 859  93.8747077042681
-860 860  127.3766293477706
-1748 860 -0.004538751110163672
-861 861  3896.088451758585
-1771 861 -0.0001883669172606672
-862 862  18.78162023672823
-1764 862 -0.04046801465160255
-76 863 -0.013917741230942
-863 863  59.62741833721761
-864 864  18.33794849296114
-1766 864 -0.04115730664256551
-865 865  17.84714838795915
-1767 865 -0.04269440434322424
-866 866  18.40864642998536
-1768 866 -0.04147286882309242
-867 867  18.2048463587055
-1769 867 -0.04151398331184361
-868 868  18.01235820080016
-1770 868 -0.04271285292823011
-869 869  125.8741363979673
-1772 869 -0.004549463760823265
-870 870  136.4650122181682
-2265 870 -0.004591087669232101
-871 871  142.1052022666453
-1776 871 -0.004300400914613589
-872 872 -0.001141007317637787
-3122 872  571.945628211343
-873 873  129.1889516897052
-2267 873 -0.004737768664129753
-874 874  133.7381239792849
-1677 874 -0.004495757699438218
-875 875 -0.00134479006259534
-3128 875  496.2094181663953
-876 876  572.4299104481121
-2216 876 -0.001077116562284023
-877 877 -0.001174808882995873
-3132 877  516.1146241735985
-878 878  1435.341963192156
-1781 878 -0.0005027950584220524
-879 879  1717.036372304932
-1784 879 -0.0003593726340029264
-82 880 -0.01266857423038254
-880 880  65.8220278566012
-881 881  60.32135886269092
-1783 881 -0.0138956857388481
-882 882  113.5253716363662
-2185 882 -0.005321133682834458
-883 883  129.7344804957765
-1750 883 -0.004446072187239606
-884 884 -0.001373362866427219
-3146 884  466.2221104904835
-885 885  1315.933645876123
-1727 885 -0.0004513182899802171
-886 886 -0.0005408106592783898
-3150 886  1190.299232364331
-887 887  159.3673875338714
-1786 887 -0.00543058103861438
-888 888  143.4872826445848
-2145 888 -0.004228762010147807
-889 889  117.2883655695939
-2175 889 -0.005012410749634397
-890 890 -0.001243009510545163
-3158 890  548.6677100663762
-891 891  3620.107153990338
-2052 891 -0.0002163685141143933
-892 892 -0.001175077666104349
-3162 892  531.5867015861088
-893 893 -0.0004387420733795854
-3164 893  1446.906579587341
-85 894 -0.01183475115907131
-894 894  74.42927180561981
-86 895 -0.01137394716537204
-895 895  75.71310294960942
-896 896  124.5159887196852
-2214 896 -0.004612588747673908
-897 897  581.8432234704047
-2271 897 -0.001164167913108969
-898 898  134.945726372956
-2179 898 -0.004402141629369785
-899 899  0.2976922456375642
-900 899  1.206910571484867
-1099 899  7.772276933160072e-09
-1100 899 -0.2280781806545443
-1222 899  0.04377716463426287
-1291 899  0.001908677924063157
-1300 899 -0.2666484393421067
-1301 899  0.2992074794146934
-1302 899  0.3659411458715927
-1545 899  0.04150889810448934
-1546 899 -0.04283097769707251
-1547 899 -0.05430488575489788
-1789 899 -0.964177128949417
-1791 899 -0.3082137931705407
-1792 899 -0.3866656521166111
-2034 899  0.182145342635277
-2035 899 -0.04691994301089037
-2036 899  0.0469964530296306
-2037 899  0.05564749987182292
-3175 899 -42.41338650819053
-3177 899 -8.952334706879839e-10
-3180 899  92.82799557362462
-3250 899 -7.681384228215313
-3549 899 -0.02024470978393711
-3550 899 -4.459108211401092
-3912 899 -9.416903571706824
-900 900 -0.6557243053380895
-1099 900  4.240457540660714e-09
-1100 900  0.1239167259828344
-1789 900  0.5238452566749743
-2034 900 -0.09896104242681487
-3175 900  23.04353699951762
-3177 900  4.86383655573519e-10
-3180 900 -42.41338650819053
-901 901  0.06118773248632485
-902 901  1.30916366888589
-1101 901  3.180253560497448e-09
-1102 901 -0.2127901063566257
-1304 901  0.5107916688939119
-1305 901 -0.4283133356578697
-1549 901 -0.06890385651214917
-1793 901 -1.095661997402904
-1794 901 -0.477229303927426
-1795 901  0.4582406516171209
-2038 901  0.1772971086094612
-2039 901  0.06209868527935691
-2040 901 -0.06384302758694906
-3191 901 -41.9825934917825
-3193 901 -6.74794720101346e-10
-3201 901  85.65611988001912
-3388 901 -10.38899270243397
-3390 901  0.00410810176178595
-3535 901 -8.12408466218251
-3537 901 -0.001782981198905943
-902 902 -0.7228885291144198
-1101 902  1.757215706277293e-09
-1102 902  0.1174975525597555
-1793 902  0.6049980674938733
-2038 902 -0.0978991772419131
-3191 902  23.18177320674882
-3193 902  3.725840469481767e-10
-3201 902 -41.9825934917825
-903 903  0.3437697039398471
-904 903  1.330583469556757
-1103 903 -0.2594613641476355
-1104 903  1.714809627628711e-09
-1146 903 -0.1213569242584619
-1162 903  0.03440530029256823
-1308 903 -0.5852368430454002
-1552 903 -0.04965019816243767
-1553 903  0.08053041805249524
-1796 903 -1.114733838300572
-1797 903 -0.3969711646061925
-1798 903  0.5883155616478667
-2041 903  0.2165473412317848
-2042 903  0.05175900233798363
-2043 903 -0.07227285678601035
-3203 903 -42.46263947154299
-3205 903 -3.473398027531438e-10
-3207 903  91.29809696612688
-3493 903 -20.90760235760421
-3585 903 -7.74755303268057
-904 904 -0.6792687245591004
-1103 904  0.1324561697400607
-1104 904 -8.754584834846924e-10
-1796 904  0.5690765366396284
-2041 904 -0.1105483719365449
-3203 904  21.6773645691523
-3205 904 -1.756064599289786e-10
-3207 904 -42.46263947154299
-905 905  0.6335410282029971
-906 905 -0.1182378866958986
-1046 905 -0.02967706389761363
-1105 905  9.628000022243732e-09
-1106 905 -3.411716797963038e-09
-1199 905  0.01084140050204591
-1246 905  0.004420674229770605
-1310 905 -0.3154842751742941
-1311 905 -0.2710351994059734
-1312 905 -0.1808814651699105
-1555 905  0.04682645461498169
-1556 905  0.03931244733723651
-1557 905  0.02556035874667221
-1799 905 -0.5055179512196509
-1800 905  0.3125038943229862
-1801 905  0.2607673136961148
-1802 905  0.2018352389125933
-2044 905  0.09380808290631094
-2045 905 -0.04577764559625989
-2046 905 -0.03642270094644444
-3215 905  44.46488922306026
-3217 905 -21.00451468958215
-3230 905 -4.662192987407709
-3687 905 -6.201273818830178
-3689 905 -0.005320952890435077
-3789 905 -5.02376469529283
-905 906 -0.3767309422255076
-906 906  0.07030936984151577
-1105 906  5.783544754223158e-09
-1106 906  2.02872774135443e-09
-1799 906  0.3006028743159249
-2044 906 -0.05578235013745535
-3215 906 -21.00451468958215
-3217 906  12.49019441336576
-907 907  0.8685875283864275
-908 907 -0.1206085521049
-1029 907 -0.0432552053525782
-1091 907 -0.04590890985801144
-1107 907  6.66961486039952e-09
-1108 907 -5.105909028912947e-09
-1222 907  0.1093957121598284
-1229 907 -0.2266174204579067
-1291 907  0.03029500957792949
-1314 907 -0.2641123399251132
-1315 907  0.3232358427975743
-1316 907 -0.4391440793017472
-1317 907 -0.3568635586597714
-1559 907  0.03852583269225138
-1561 907  0.06421136233811769
-1562 907  0.04742106470294021
-1803 907 -0.6665887169130639
-1804 907  0.264840773439715
-1805 907 -0.2805720782281593
-1806 907  0.4521140880195338
-1807 907  0.3488833124621628
-2048 907  0.09140412991465983
-2049 907 -0.03462807079041883
-2050 907  0.03751584297835035
-2051 907 -0.06661321196104331
-3231 907  54.06254273226385
-3233 907 -21.78672677995597
-3241 907 -8.245220727572519
-3250 907 -6.778441910054136
-3912 907 -5.018475605696647
-3984 907  0.001389181240069544
-3988 907 -6.271758063514705
-907 908 -0.5277615543631613
-908 908  0.0732828354634789
-1107 908  4.15408552001395e-09
-1108 908  3.102507639685115e-09
-1803 908  0.405025269027891
-2048 908 -0.05553796721970868
-3231 908 -21.78672677995597
-3233 908  13.23781014201439
-909 909  0.8308549968215676
-910 909 -0.1180094345852181
-911 909  1.178287817096909
-1109 909 -2.143182423197665e-09
-1110 909  1.838456387126541e-09
-1111 909 -0.1896378964682771
-1113 909 -0.05042820426988697
-1191 909  0.1091861990651673
-1256 909  0.08752981633481208
-1320 909 -0.3808984499831934
-1321 909 -0.1870913236157566
-1322 909  0.2900679375737975
-1565 909  0.05765705741137306
-1566 909  0.02414079055417204
-1567 909 -0.04144109370586249
-1808 909 -0.6468743896711402
-1809 909 -0.9115398593145428
-1810 909  0.4177027925543588
-1811 909  0.1922808728830558
-1812 909 -0.2829982094483208
-2053 909  0.09155689421138136
-2054 909  0.1461665781785075
-2055 909 -0.05614325502160191
-2056 909 -0.02648423136389328
-2057 909  0.03752795575477595
-3251 909  131.8476537947564
-3253 909 -31.40401815845072
-3255 909 -41.64250304196321
-3257 909  2.79507181288885e-10
-3283 909 -3.602447283387563
-3615 909 -7.969315195327064
-3737 909 -7.289318276733248
-909 910 -0.4438807412776823
-910 910  0.06304603751777908
-1109 910 -1.136426674985813e-09
-1110 910 -9.82260162096793e-10
-1808 910  0.3455898859598944
-2053 910 -0.04891388055328518
-3251 910 -31.40401815845072
-3253 910  16.77746287302074
-911 911 -0.7560405827089444
-1109 911 -1.360478396605913e-09
-1111 911  0.1216799016922899
-1809 911  0.5848835203070897
-2054 911 -0.0937868179023721
-3251 911 -41.64250304196321
-3255 911  26.71963658975455
-3257 911 -1.79330175709147e-10
-912 912  0.2841185889321567
-1112 912 -0.04170737899807132
-1113 912  0.1005159371417935
-1219 912 -0.2400553180149196
-1247 912  0.0912522352120306
-1323 912  2.234351633778104
-1324 912 -0.7145749449009308
-1325 912 -0.3263007639770462
-1568 912 -0.05370982180576693
-1569 912  0.1018194078112303
-1570 912  0.03520668996055829
-1813 912 -2.216197503914771
-1814 912  0.7042110547013974
-2058 912  0.02389497166849069
-2059 912 -0.06006624651158884
-3280 912  32.26392170161625
-3282 912 -1.066833821630753e-08
-3283 912 -2.93755601229593
-3700 912 -39.60174281254437
-3909 912 -10.40260563970382
-913 913  1.209832173694697
-914 913 -0.1965692531354175
-1109 913  0.1159831428547418
-1113 913 -2.733038906299612e-09
-1114 913  3.723713781766946e-09
-1320 913  0.4341815958622278
-1323 913 -1.277768992783638
-1565 913 -0.06572259141693702
-1568 913  0.03071528396597896
-1810 913 -0.47613442657843
-1813 913  1.267387106656292
-1816 913 -1.011959526480044
-2055 913  0.06399702614503872
-2058 913 -0.01366492785641506
-2061 913  0.16260052281627
-3251 913 -3.602447283387563
-3280 913 -2.93755601229593
-3282 913 -0.08371936137436782
-3283 913  101.320835605144
-3285 913 -37.34674214649647
-913 914 -0.4729524263951293
-914 914  0.07684363769328444
-1113 914 -1.039897890020569e-09
-1114 914 -1.45571832366187e-09
-1816 914  0.3955992606815731
-2061 914 -0.0635644459381669
-3283 914 -37.34674214649647
-3285 914  14.59973763551054
-915 915  0.6046143966065896
-916 915  0.7040247465778748
-917 915  0.7089846504761526
-918 915 -0.105707984352315
-919 915  0.8317530171779519
-1007 915 -0.2448716520171612
-1115 915  6.983447865122061e-10
-1116 915 -0.08449893522421127
-1117 915 -0.110950911256798
-1118 915 -2.074367566606838e-09
-1119 915 -0.1227103143041787
-1207 915  0.0315126719245443
-1331 915  0.2917249086810749
-1332 915 -0.2357224145100838
-1576 915 -0.03508775876381117
-1577 915  0.03219168634554583
-1817 915 -0.5326975522814722
-1818 915 -0.5363208804398027
-1819 915 -0.4568430531993541
-1820 915 -0.62581245429748
-1822 915  0.2784275257111113
-2062 915  0.0636315736602887
-2063 915  0.08353224393820573
-2064 915  0.07949206863020782
-2065 915  0.09182130440361659
-2067 915 -0.03947913814761612
-3287 915 -12.55259619809594
-3289 915 -1.486597012156921e-09
-3291 915 -16.44988377383374
-3293 915 -1.235842539415444e-09
-3295 915  59.1165725360048
-3297 915 -16.11142725870809
-3299 915 -15.99886411053142
-3301 915 -4.957688704010366e-10
-3304 915 -2.93383316576035
-3306 915  0.00435760107926482
-3680 915 -2.838699038257353
-3681 915 -0.00428379170185561
-916 916 -0.8406516622461359
-1115 916  8.336279444520756e-10
-1116 916  0.1008972634833432
-1817 916  0.6360757700302417
-2062 916 -0.07598026692793559
-3287 916  14.98862208102334
-3289 916  1.775087188349289e-09
-3295 916 -12.55259619809594
-917 917 -0.6811758959436226
-1115 917  6.706460387029267e-10
-1117 917  0.106599044606048
-1818 917  0.5152845777429752
-2063 917 -0.08025582932800379
-3291 917  15.80466419414183
-3293 917  1.18738463505963e-09
-3295 917 -16.44988377383374
-915 918 -0.5574485806251291
-918 918  0.09746173125990555
-1115 918  6.439443422934232e-10
-1118 918  1.912650926527704e-09
-1819 918  0.4212048422990773
-2064 918 -0.07329091248500011
-3295 918 -16.11142725870809
-3297 918  14.85457890021666
-919 919 -1.050055987727851
-1115 919  8.847848009807535e-10
-1119 919  0.1549170217960287
-1820 919  0.7900639988771302
-2065 919 -0.1159208424841469
-3295 919 -15.99886411053142
-3299 919  20.19794664128352
-3301 919  6.258814577719818e-10
-920 920  0.0332412618584253
-921 920  1.177129754067613
-922 920  1.102777759352953
-1120 920 -1.795405820542717e-10
-1121 920 -0.210903753621177
-1122 920 -0.2314851909580943
-1125 920  0.00637055249229182
-1256 920 -0.002249858771577174
-1335 920 -0.2469155357709296
-1336 920  0.2859457102140647
-1581 920 -0.03967668007685861
-1823 920 -0.9364953446946004
-1824 920 -0.8759432230102847
-1825 920  0.2462070999198854
-1826 920 -0.2856483693432067
-2068 920  0.1670042532862615
-2069 920  0.1829980732903851
-2070 920 -0.03297048082370924
-2071 920  0.03987931942944642
-3311 920 -52.26634609831998
-3313 920  4.188971391982932e-11
-3315 920 -57.67951941137702
-3317 920  4.778644147052091e-11
-3321 920  168.1102211381862
-3339 920 -4.489145811322431
-3615 920 -5.122575816198524
-921 921 -0.8339300311071588
-1120 921 -1.249107484113665e-10
-1121 921  0.1494134127611411
-1823 921  0.663454125795566
-2068 921 -0.1183130930611414
-3311 921  37.02775797089851
-3313 921 -2.969423318344155e-11
-3321 921 -52.26634609831998
-922 922 -0.7103363652127415
-1120 922 -1.131374993690315e-10
-1122 922  0.1491074223715117
-1824 922  0.5642245864034647
-2069 922 -0.1178752338080321
-3315 922  37.15332470509485
-3317 922 -3.082351041072684e-11
-3321 922 -57.67951941137702
-923 923  0.044945398609309
-924 923  1.180123281848412
-1123 923 -2.446954222579123e-09
-1124 923 -0.2198774630721576
-1162 923 -0.0301506816774844
-1297 923  0.04698307485545858
-1338 923 -0.3406461438400099
-1339 923  0.2932964264633682
-1584 923 -0.04107484230154301
-1827 923 -0.9904519113108774
-1828 923  0.3416270693871428
-1829 923 -0.2928143935251802
-2072 923  0.1833601469074586
-2073 923 -0.04175041492380615
-2074 923  0.03819605890891807
-3327 923 -35.24630110894491
-3329 923  4.111979645671227e-10
-3333 923  71.77914445659717
-3585 923 -6.993239083401045
-4126 923 -5.94400377216852
-924 924 -0.6263787815684199
-1123 924 -1.296841745102029e-09
-1124 924  0.1167052455721185
-1827 924  0.5257061452404348
-2072 924 -0.09732280277379812
-3327 924  18.70782102679635
-3329 924 -2.182206637613149e-10
-3333 924 -35.24630110894491
-920 925 -0.0561981756523469
-925 925  0.9158563211506956
-926 925 -0.1272016430323864
-1120 925 -0.001668075641436621
-1125 925 -2.524945641457776e-09
-1126 925  7.921518996312216e-10
-1191 925  0.02942235136820164
-1256 925 -0.009402178306684716
-1335 925  0.417439106543153
-1341 925 -0.2028592580467722
-1342 925 -0.265292539862559
-1586 925  0.02578232528300831
-1587 925  0.03969926645685068
-1825 925 -0.4162414142724755
-1830 925 -0.7253102290040822
-1831 925  0.2043567543949261
-1832 925  0.2663833810837147
-2070 925  0.05574038917549447
-2075 925  0.1000961984697644
-2076 925 -0.02562846525512418
-2077 925 -0.03983694023450458
-3321 925 -4.489145811322431
-3339 925  149.6691885281758
-3341 925 -43.11528650557934
-3615 925 -6.006365689718882
-3737 925 -8.629261156606795
-925 926 -0.2927325904424944
-926 926  0.04065710484656371
-1125 926 -7.775043942004345e-10
-1126 926 -2.532467013871553e-10
-1830 926  0.2318288767653417
-2075 926 -0.03199346752849337
-3339 926 -43.11528650557934
-3341 926  13.7808182548285
-927 927  0.8024124670559588
-928 927 -0.1672629016238211
-1038 927  0.3082806228234461
-1127 927  7.788518240070541e-09
-1128 927 -3.969049866547891e-09
-1222 927  0.1769630657333927
-1238 927 -0.03922286341907703
-1344 927  0.2713456332689875
-1345 927 -0.2905604220304306
-1589 927 -0.03534560477449201
-1590 927  0.03965502824588246
-1833 927 -0.6702504154277392
-1834 927 -0.2943648723795997
-2078 927  0.1384521427653982
-2079 927  0.04133866598755555
-3351 927  75.1841442036542
-3353 927 -33.37484565899686
-3360 927 -7.664837333066902
-3362 927 -0.09805225877594476
-3912 927 -7.148869153419637
-927 928 -0.3890405552638334
-928 928  0.08109551483106915
-1127 928  3.886522659257707e-09
-1128 928  1.924533976627174e-09
-1833 928  0.3249632881958178
-2078 928 -0.06712694619083193
-3351 928 -33.37484565899686
-3353 928  16.18141419994012
-929 929  0.7226460726801913
-930 929 -0.1165066754950316
-931 929  1.013213317405777
-1075 929  0.0344323188052018
-1129 929  2.237529952986428e-10
-1130 929 -2.862770021039296e-10
-1131 929 -0.1379058232777914
-1275 929 -0.08503788873061549
-1290 929  0.08038780949826715
-1348 929  0.2698019009628373
-1349 929  0.3049120194105165
-1593 929 -0.03441373334906914
-1594 929 -0.0381463241923688
-1836 929 -0.5799017536618772
-1837 929 -0.8068876077794779
-1838 929 -0.2582339488597439
-1839 929 -0.290608095678543
-2081 929  0.0928673422230218
-2082 929  0.108959701522947
-2084 929  0.03561931390732355
-3363 929  39.05304533306739
-3365 929 -14.6337394933653
-3367 929 -15.13825045655299
-3369 929 -9.747780360669367e-11
-3374 929 -2.942416412227689
-3869 929 -1.959100094899907
-929 930 -0.550866066704825
-930 930  0.08881190461157644
-1129 930  1.705862118228652e-10
-1130 930  2.182460878685788e-10
-1836 930  0.4420534618421999
-2081 930 -0.07079187097217753
-3363 930 -14.6337394933653
-3365 930  11.15515716784196
-931 931 -0.8713042567736969
-1129 931  1.983945230321638e-10
-1131 931  0.1185909509790812
-1837 931  0.6938762009132229
-2082 931 -0.09369897742442948
-3363 931 -15.13825045655299
-3367 931  13.01801095497678
-3369 931  8.381051408434814e-11
-932 932  0.03071718917257135
-933 932  0.9950752022868599
-1035 932  0.319559566955514
-1132 932  5.218769388859457e-09
-1133 932 -0.1462347531270238
-1152 932  0.01961434172631453
-1235 932 -0.04598524577927123
-1351 932 -0.2179951469697534
-1352 932 -0.3001546575377701
-1597 932  0.04181506051016628
-1840 932 -0.8318997011928906
-1841 932  0.2168261119599326
-2085 932  0.1214268633825941
-2086 932 -0.02828487750009587
-3379 932 -31.51571782613292
-3381 932 -4.171099299288272e-10
-3385 932  54.75374718805336
-3388 932 -7.710670001509722
-3390 932 -0.003579148430443102
-3510 932 -6.505758821695456
-933 933 -0.6655581949704933
-1132 933  3.572777296234619e-09
-1133 933  0.09780942999031755
-1840 933  0.556417909169059
-2085 933 -0.081216619435513
-3379 933  21.07935583716719
-3381 933  2.789826286653252e-10
-3385 933 -31.51571782613292
-934 934  0.03713931211835794
-935 934  1.295303116128931
-936 934  0.9236744260690454
-981 934 -0.3531403690853147
-1134 934 -0.1656030709028884
-1135 934 -0.2014590928700254
-1136 934 -5.515803369604555e-10
-1156 934  0.08549340419337453
-1181 934 -0.03038370759243254
-1355 934  0.2884753453660877
-1356 934  0.3675964442608564
-1600 934 -0.03571433811432929
-1601 934 -0.04937881125291942
-1843 934 -1.040527283561455
-1844 934 -0.7370102242628088
-1845 934 -0.2781466555799013
-2088 934  0.1611297600105126
-2089 934  0.1310381493586493
-2091 934  0.05012946220403072
-3391 934 -20.99314407546383
-3393 934 -1.025071694193969e-10
-3395 934 -18.45626484722149
-3397 934  1.578068994922965e-10
-3402 934  57.38809028817923
-3404 934 -2.488153159316454
-3539 934 -2.476272106586077
-935 935 -1.022533948777946
-1135 935  0.1590351780865364
-1843 935  0.8214096444475624
-2088 935 -0.1271985280646792
-3391 935  16.57233912676868
-3393 935  8.093323233815397e-11
-3397 935  1.261234450211646e-10
-3402 935 -20.99314407546383
-936 936 -0.5966459314393776
-1134 936  0.1069710232300935
-1136 936  3.563296324671228e-10
-1844 936  0.4760705063655813
-2089 936 -0.0846438706881978
-3395 936  11.92179302754235
-3397 936  1.136628013931329e-10
-3402 936 -18.45626484722149
-937 937  0.8149415052122866
-938 937 -0.1243103797855209
-983 937  0.254906136411951
-1137 937  7.386056946612207e-09
-1138 937 -2.261370646294836e-09
-1257 937 -0.02032800065588086
-1358 937 -0.2383349916533942
-1359 937  0.2261950636286986
-1603 937  0.03694910669518697
-1604 937 -0.03205345150204408
-1847 937 -0.6781849347733043
-1849 937 -0.2275356005844802
-2092 937  0.1039416562056201
-2093 937 -0.0393496559091599
-2094 937  0.03282777232623683
-3407 937  50.46566610983952
-3409 937 -23.82298505403072
-3412 937 -4.462659499865555
-3567 937 -5.244381887558792
-3682 937  0.01353528248246338
-937 938 -0.423007195636838
-938 938  0.06452510371026647
-1137 938  3.839734308286324e-09
-1138 938  1.173779629048965e-09
-1847 938  0.3520217163400922
-2092 938 -0.05395242262195776
-3407 938 -23.82298505403072
-3409 938  12.36566555585732
-939 939  0.1931415244916508
-1139 939  1.010988515126598e-10
-1198 939  0.02676690359827763
-1261 939  0.1670044571575812
-1278 939 -0.1239759113085325
-1360 939 -0.3462185500007934
-1361 939 -0.2320144445773898
-1362 939 -0.2607314758298161
-1605 939  0.05123776962395828
-1606 939  0.03546099465982042
-1607 939  0.03391211355038503
-1850 939  0.3224747859897226
-1852 939  0.2485615317211312
-2095 939 -0.04575369741188583
-2096 939 -0.03079932722685834
-2097 939 -0.03113925486204759
-3424 939  17.63530504057533
-3782 939 -9.907975784566792
-4065 939 -24.9290464405827
-4125 939 -6.556163260226376
-940 940  0.05348227050940345
-1079 940 -0.3865529233619078
-1140 940  4.42159031055489e-08
-1279 940  0.06744124888417391
-1363 940  0.5513261219039329
-1364 940  0.3379217843189056
-1608 940 -0.07581389194665446
-1609 940 -0.04416026357748661
-1853 940 -0.5523151632588987
-2098 940  0.07308587747533948
-3436 940 -24.66315021333685
-3438 940  28.84764724886888
-3447 940 -14.36339287944117
-3449 940 -0.009980420534666863
-941 941 -0.04106559201113318
-982 941  0.5068995173965466
-1030 941 -0.2880222797088077
-1141 941  1.675542971035782e-08
-1182 941  0.07028093491071742
-1230 941 -0.5157188483780635
-1365 941 -0.5049927482856362
-1611 941  0.03627465553054866
-1856 941  0.2839175912746367
-2100 941 -0.06419032347283443
-3440 941 -7.190162226521366
-3441 941  0.06312548830247269
-3443 941 -5.832944089578626
-3446 941  12.40148734436256
-942 942 -0.6262760807964288
-1140 942  0.1330724731424416
-1142 942  0.08541380566675676
-1298 942 -0.1518122571549573
-1363 942 -0.6945907046787592
-1608 942  0.09551447417331302
-1853 942  0.6958367528966991
-1857 942  0.6259992432861905
-2098 942 -0.09207757282087774
-2102 942 -0.08734801081128452
-3438 942 -14.36339287944117
-3447 942  33.64983902905398
-3449 942  6.210807736162227e-09
-3755 942 -13.85984777865994
-943 943  0.4010834794380356
-1143 943 -1.724665529834191e-08
-1144 943  0.4289809294086663
-1153 943  0.02793990583526351
-1200 943  0.01754934032540434
-1285 943 -0.3809398851482149
-1368 943  0.7904980737004598
-1369 943  0.389853698082942
-1371 943 -0.3527490893724305
-1613 943 -0.01589242009037819
-1614 943 -0.05097074908982863
-1615 943 -0.05298921735679361
-1616 943  0.05011610583704667
-1858 943 -0.7569472851527189
-1859 943 -0.4123451764514424
-1860 943 -0.422257802508621
-1861 943  0.3517173794087308
-2103 943  0.008839724875075651
-2104 943  0.04762898411627194
-2105 943  0.05159156407864095
-2106 943 -0.03676343318442028
-3459 943  27.16004209586251
-3467 943 -1.05251996975513
-3521 943 -9.162682012337861
-3522 943 -8.763955681316189
-4145 943 -11.05951146044936
-944 944  0.5565316981856262
-1143 944 -0.5468169676527013
-1144 944 -3.380368229777275e-08
-1159 944 -0.1049347854825309
-1220 944  1.100657149099352
-1368 944 -1.331243999373356
-1373 944  0.3643483277204766
-1613 944  0.02676374501685801
-1617 944 -0.07578302580183896
-1618 944 -0.05244155811268235
-1858 944  1.274742551217584
-1862 944 -0.5208443815767149
-1863 944 -0.3648201588099976
-2103 944 -0.01488660262126709
-2107 944  0.06567306281750092
-2108 944  0.05453932493439867
-3459 944 -1.05251996975513
-3467 944  21.6787428129644
-3565 944 -9.088036752899102
-3902 944 -10.41994192456261
-945 945  0.05493458803622932
-1145 945  1.768079509190112e-09
-1152 945 -0.05125570147098663
-1164 945  0.04539053030042879
-1374 945 -0.4558445477281668
-1375 945 -0.374795164020496
-1619 945  0.06127856833043964
-1864 945  0.4280106222713313
-1865 945  0.2993620588617579
-2109 945 -0.05909584881736413
-2110 945 -0.04272179200188883
-3481 945  36.53926528372666
-3510 945 -13.77354529336417
-3597 945 -48.99930559793759
-903 946 -1.586867364738098
-946 946 -0.02987711852991685
-947 946  0.6596703231013034
-948 946  0.6964352973578405
-949 946  0.4923049138822416
-950 946  0.715978620671633
-951 946  0.3935095664336485
-1146 946 -2.906373852695765e-08
-1147 946 -0.04607441940211257
-1148 946 -0.05122786494849402
-1149 946  1.231334647994187e-07
-1150 946 -0.05602966115093699
-1151 946  1.743433537815253e-07
-1381 946 -0.9451250300389605
-1552 946  0.2291891292739927
-1625 946 -0.01947378168904091
-1626 946  0.1285136378673143
-1797 946  1.832449394568724
-1866 946 -0.5146229196552202
-1867 946 -0.54293430207528
-1868 946 -0.3842289985371204
-1869 946 -0.5575412851129458
-1870 946 -0.3068323659627205
-1871 946  1.159453727321497
-2042 946 -0.2389235313648264
-2111 946  0.03591805543649509
-2112 946  0.03987489423752329
-2113 946  0.02326647438088502
-2114 946  0.04358377996383721
-2115 946  0.01588572902323566
-2116 946 -0.1618170065455398
-3205 946  0.01772566600763419
-3207 946 -20.90760235760421
-3483 946 -8.316362646922366
-3485 946  2.273733985119764e-08
-3487 946 -9.180640498511629
-3489 946  2.421638262140036e-08
-3491 946 -5.954288032698193
-3493 946  190.9954491375482
-3495 946 -9.780438926304546
-3497 946  2.303789606306061e-08
-3499 946 -11.54246641257937
-3621 946 -0.09396492400282885
-3622 946 -30.14346884520325
-947 947 -1.868071070429303
-1146 947 -8.043442764615882e-08
-1147 947  0.130474703738788
-1866 947  1.457322172487891
-2111 947 -0.1017136559236855
-3483 947  23.55048762236492
-3485 947 -6.438807310937822e-08
-3493 947 -8.316362646922366
-948 948 -1.574413538600893
-1146 948 -6.474805136491568e-08
-1148 948  0.1158095295205662
-1867 948  1.227397748220299
-2112 948 -0.09014415779328049
-3487 948  20.75444140780469
-3489 948 -5.474532364935225e-08
-3493 948 -9.180640498511629
-946 949  0.07978182835732844
-949 949 -1.314617609442261
-1146 949 -7.593639771152905e-08
-1149 949 -3.288068006757072e-07
-1868 949  1.026019024575663
-2113 949 -0.06212921315277567
-3491 949  15.89992816678108
-3493 949 -5.954288032698193
-950 950 -1.527117243615793
-1146 950 -6.107598557458793e-08
-1150 950  0.1195061685183861
-1869 950  1.189184824715845
-2114 950 -0.0929602365253548
-3493 950 -9.780438926304546
-3495 950  20.86078733092048
-3497 950 -4.913775505932527e-08
-951 951 -2.315948119036116
-1146 951 -1.652445292266158e-07
-1151 951 -1.026074838983426e-06
-1625 951  0.1146103472959883
-1870 951  1.805821005194248
-2115 951 -0.09349334143083275
-3493 951 -11.54246641257937
-3499 951  67.93165662379741
-932 952 -0.05265194140229647
-952 952 -0.02840317316188208
-1132 952 -0.01738224984325668
-1145 952  0.01703428079016718
-1152 952  6.841825883618924e-09
-1164 952 -0.004704297928304631
-1351 952  0.3736626954944669
-1374 952  0.346250946841912
-1382 952 -0.2375980214699439
-1619 952 -0.04654604823349623
-1627 952  0.03516906518592851
-1841 952 -0.371658867524124
-1864 952 -0.3251088204486319
-1872 952  0.2021244230605367
-2086 952  0.04848274704887379
-2109 952  0.04488809553479817
-3385 952 -6.505758821695456
-3481 952 -13.77354529336417
-3510 952  32.66632580305982
-3597 952 -38.73670699458928
-953 953 -0.05165212370154924
-979 953 -0.2698288575970699
-1000 953  0.05408041747556818
-1143 953 -0.02992823809043116
-1153 953 -9.502885811585315e-09
-1179 953 -0.01434635432205444
-1200 953 -0.008383946400232767
-1210 953 -0.03090332412050151
-1226 953 -0.01685058071239737
-1369 953 -0.3396256574061067
-1383 953  0.5039010794292205
-1384 953  0.2953161655653164
-1385 953 -0.4316172727288611
-1386 953 -0.3071829453370418
-1387 953 -0.3757659547701289
-1614 953  0.04440377057660186
-1628 953 -0.01971558690748688
-1629 953 -0.04126530553706707
-1631 953  0.03634845233500034
-1632 953  0.04350500092493118
-1859 953  0.3592193746505485
-1873 953 -0.4287950305689273
-1875 953  0.4350354735110427
-1876 953  0.3154316116230693
-1877 953  0.3861958328991782
-2104 953 -0.04149255251807921
-2118 953  0.01488552126604759
-2119 953  0.03303831593886034
-2121 953 -0.04190662314365428
-2122 953 -0.05038154112818095
-3459 953 -8.763955681316189
-3516 953 -6.848370394905818
-3521 953 -18.59353340932061
-3522 953  38.11359574347498
-3531 953 -0.8988235757749945
-3533 953  0.08766147326692564
-3865 953 -7.868079227295585
-3944 953 -6.201999031867833
-954 954 -0.7047014242829946
-1153 954 -0.2507418162491978
-1154 954  0.08773798910582717
-1201 954  0.4246988966920202
-1383 954 -1.221909365821883
-1628 954  0.0478083125406707
-1873 954  1.039784761849569
-1878 954  0.6326150413526075
-2118 954 -0.03609588983362996
-2123 954 -0.07880583325813648
-3522 954 -0.8988235757749945
-3531 954  30.68956309077988
-3533 954 -1.802922966476217e-08
-3812 954 -16.8657683503129
-955 955 -0.8318326858768657
-1101 955  0.009158641086069189
-1155 955  0.1071151913100275
-1257 955 -0.0254465288909706
-1304 955 -1.125276871701074
-1549 955  0.1517955769952752
-1794 955  1.051338796050469
-1879 955  0.7717420244337035
-2039 955 -0.1368037471308435
-2124 955 -0.103164844124989
-3201 955 -8.12408466218251
-3535 955  38.14142046839715
-3537 955  3.591110811496101e-09
-3567 955 -15.07976193972747
-934 956 -0.08659249488156374
-956 956  0.377331040401673
-1156 956 -1.694234696980601e-09
-1193 956  0.08504754845844564
-1275 956  0.05001632315880056
-1355 956 -0.6725972680232485
-1391 956  0.3360771639281279
-1600 956  0.08327008401522945
-1635 956 -0.05101371158298227
-1636 956 -0.0477704058819864
-1845 956  0.6485153191009497
-1880 956 -0.3524048448936835
-1881 956 -0.3359593223602985
-2125 956  0.0519531882185968
-2126 956  0.04690068187393769
-3374 956 -4.711059498030628
-3397 956 -0.01449250614827455
-3402 956 -2.476272106586077
-3539 956  17.34714737161059
-3730 956 -5.773304655979821
-957 957 -0.06150550344214045
-1017 957 -0.07126566929859467
-1037 957  0.4596955190523657
-1157 957  1.653187683969826e-08
-1217 957  0.06750021622713791
-1237 957 -0.07867953448799736
-1392 957 -0.5244507842233862
-1393 957 -0.4599292057141804
-1638 957  0.06526541533640681
-1882 957  0.5237947129566466
-3549 957  0.03597745704809928
-3550 957 -13.12797495416852
-3552 957 -13.92756187672575
-3554 957  19.29898377752927
-958 958 -0.5079389600530799
-1032 958  0.06996802127804672
-1158 958  4.097325129182039e-08
-1232 958 -0.03577015867815656
-1394 958 -0.5159715744957702
-1395 958  0.4404116309763916
-1639 958  0.07280632131793441
-1640 958 -0.05761232611155925
-1884 958  0.5154078088312681
-2129 958 -0.07622870906823133
-3560 958  32.31857328996801
-3562 958 -15.91728652624134
-3757 958  0.02493853014115072
-3758 958 -14.5168341615374
-944 959 -0.455735424632883
-959 959  0.05983512144077003
-1144 959  0.1506769620921335
-1159 959 -1.332591936797911e-08
-1201 959 -0.1571053939386667
-1396 959 -0.4764363681173263
-1617 959  0.06205757831290015
-1862 959  0.4265116186182513
-1886 959  0.425337444217761
-2107 959 -0.05377868191093232
-2131 959 -0.05429826449104555
-3467 959 -9.088036752899102
-3565 959  15.38951040896466
-3812 959 -7.201942460989617
-960 960 -0.7334879158217498
-1057 960  0.6869492137454632
-1160 960  0.09355727171015052
-1257 960  0.08784149961682572
-1258 960 -0.1963524920199438
-1398 960  0.5990913842872198
-1642 960 -0.1210230804289373
-1643 960 -0.08258340338438294
-1888 960 -0.6450813979793052
-2132 960  0.1300666015536589
-3567 960 -13.87451822624953
-3568 960  41.10746761295785
-3574 960  3.964404998724191e-09
-4047 960 -31.06353440087113
-961 961  0.0579584615013995
-1082 961  0.5430535951303894
-1161 961  3.124692560252385e-09
-1164 961 -0.6070648610656701
-1282 961 -0.5414818109862019
-1399 961 -0.3915200345225039
-1645 961  0.1050418924830158
-1889 961  0.3156322845396378
-2134 961 -0.04478280109740496
-2135 961 -0.1048558318511552
-3577 961  32.30342970287563
-3579 961  0.06933509766831331
-3580 961 -14.41765509210598
-3597 961 -35.50410878617848
-923 962 -0.1106532901568742
-962 962  0.013588288313204
-963 962  0.9725210198756302
-1123 962  0.05713966978684382
-1162 962 -1.126953350777393e-08
-1163 962  1.889614750227864e-07
-1308 962  1.262649342413203
-1338 962  0.8386535165213083
-1553 962 -0.1737444943984078
-1798 962 -1.269291682288203
-1828 962 -0.8410685054311342
-1891 962 -0.6275235799788761
-2043 962  0.1559287939226689
-2073 962  0.1027874024856677
-2136 962  0.02534124008545374
-3205 962 -0.008284771852577529
-3207 962 -7.74755303268057
-3333 962 -6.993239083401045
-3583 962 -2.106648198861156
-3585 962  34.94783322085215
-962 963 -0.02818946822305024
-963 963 -2.017535229907724
-1162 963 -5.457841986689083e-08
-1163 963 -3.92008150940093e-07
-1891 963  1.301823718285398
-2136 963 -0.05257145459795929
-3583 963  4.37032937863825
-3585 963 -2.106648198861156
-945 964 -0.1462419497352594
-952 964  0.0995448400328353
-961 964 -0.1127382359726241
-964 964 -0.02421187475492601
-965 964 -0.06341054135910112
-966 964  0.5198404247302832
-967 964  0.5553092879536204
-968 964  0.5186307214601972
-969 964  0.649415218994155
-970 964  0.6812081861155168
-971 964  0.7223443830121247
-1056 964 -0.8389463619039152
-1145 964 -0.05970019195980614
-1152 964  0.01861759887113656
-1161 964  0.8923905638513359
-1164 964 -3.875374576800539e-09
-1165 964  6.066692409545382e-08
-1166 964  4.697380394380613e-07
-1167 964  1.510357943157459e-07
-1168 964  3.067324171046604e-07
-1169 964  1.042574984955191e-07
-1170 964  7.332883339428165e-08
-1171 964  3.852382979979696e-08
-1256 964 -0.4280655451173339
-1375 964  0.9977461831798213
-1382 964  0.8327117855650359
-1399 964  0.7615674553221563
-1402 964  0.4637568782856321
-1627 964 -0.1232573187539455
-1648 964 -0.009769119110653198
-1650 964 -0.01510224391735361
-1651 964 -0.02666374087039809
-1652 964 -0.03084309267107718
-1653 964 -0.03084162623578303
-1654 964  0.1326396128473568
-1865 964 -0.796934913498078
-1872 964 -0.7083871666596919
-1889 964 -0.6139539603574344
-1892 964 -0.3613580899024239
-1893 964 -0.4107864203962353
-1894 964 -0.4374549805006661
-1895 964 -0.4089115281925863
-1896 964 -0.5116989493650622
-1897 964 -0.5365945143548635
-1898 964 -0.5685854447490166
-1899 964  0.9351913725744683
-2110 964  0.1137301358193506
-2134 964  0.08710952407721209
-2137 964  0.05024109366902035
-2138 964  0.01207874962159066
-2139 964  0.02101357146535312
-2140 964  0.01521306273357519
-2141 964  0.02305717637758508
-2142 964  0.02625275857880516
-2143 964  0.02765977036275495
-2144 964 -0.1454153226254008
-3481 964 -48.99930559793759
-3510 964 -38.73670699458928
-3577 964 -35.50410878617848
-3589 964 -96.92585106702003
-3591 964 -25.68677687081435
-3595 964 -72.88325048851985
-3597 964  659.1059785088241
-3599 964 -34.32300893287451
-3603 964 -66.56481089993019
-3607 964 -75.29053380965105
-3611 964 -57.22136371310454
-3615 964 -81.80499628897144
-965 965  0.1535310519658627
-1164 965 -3.308903712140321e-08
-1165 965 -1.468892856237858e-07
-1402 965 -1.12285875271712
-1892 965  0.8749284659928406
-2137 965 -0.1216448842352145
-3589 965  234.6790878010875
-3597 965 -96.92585106702003
-966 966 -3.296259100425206
-1164 966 -2.257490022694597e-07
-1166 966 -2.978564379918502e-06
-1648 966  0.06194506283026378
-1893 966  2.604757945218924
-2138 966 -0.07659021204936765
-3591 966  162.8774429316563
-3597 966 -25.68677687081435
-964 967  0.08806168674949939
-967 967 -2.019730940286236
-1164 967 -6.077346655086746e-08
-1167 967 -5.493372374765215e-07
-1894 967  1.591079742886095
-2139 967 -0.07642904841532995
-3595 967  265.0857129998892
-3597 967 -72.88325048851985
-968 968 -2.498229903851033
-1164 968 -1.333883210691278e-07
-1168 968 -1.477521131554305e-06
-1650 968  0.07274709308264668
-1895 968  1.969715571194801
-2140 968 -0.07328090426879212
-3597 968 -34.32300893287451
-3599 968  165.3329967568115
-969 969 -1.985602696417218
-1164 969 -4.7262267141579e-08
-1169 969 -3.187709719520981e-07
-1651 969  0.08152503085904574
-1896 969  1.564531880214944
-2141 969 -0.07049787292944913
-3597 969 -66.56481089993019
-3603 969  203.5235180873758
-970 970 -1.973620237480235
-1164 970 -4.1833332076191e-08
-1170 970 -2.124470280806534e-07
-1652 970  0.08935968933261426
-1897 970  1.554640438029102
-2142 970 -0.07606041248016723
-3597 970 -75.29053380965105
-3607 970  218.1343768118298
-971 971 -3.13745961756631
-1164 971 -9.238899598140726e-08
-1171 971 -1.67326591443695e-07
-1653 971  0.1339587586344098
-1898 971  2.469616866953718
-2143 971 -0.1201385579859136
-3597 971 -57.22136371310454
-3611 971  248.537576753649
-972 972 -0.07373932525269625
-1146 972  0.2145362022621393
-1172 972  0.07265746990450016
-1288 972 -0.1110170413597872
-1381 972  0.361952958820202
-1410 972 -0.5844299652988821
-1626 972 -0.0492166538779582
-1871 972 -0.4440340630930363
-1900 972  0.5839521479322916
-2116 972  0.06197078951995561
-3493 972 -30.14346884520325
-3621 972 -4.288324972900881e-10
-3622 972  21.87834180608649
-4089 972 -10.63078006025426
-973 973 -0.3961258208756168
-974 973  0.3551605612820263
-975 973  0.2921170861291708
-976 973  0.4746237402281227
-977 973  0.4514386490354154
-1006 973  1.238869862237301
-1055 973  1.07765260038838
-1173 973  4.923127752132217e-07
-1174 973 -2.21697098447271e-06
-1175 973 -7.175218402766248e-06
-1176 973 -1.474047813920798e-06
-1177 973 -1.809620373771637e-06
-1206 973 -0.189558782352581
-1255 973 -0.1324205549928908
-1416 973 -0.8761486615418773
-1656 973 -0.02645413337217794
-1657 973 -0.01571989566011668
-1658 973 -0.03612576647010336
-1659 973 -0.02777304520351097
-1661 973  0.1233417700929442
-1901 973 -0.2795935336492826
-1902 973 -0.2187915238870602
-1904 973 -0.374371261515384
-1905 973 -1.017068720550362
-2146 973  0.02143184395432529
-2147 973  0.01298076963505745
-2148 973  0.02872026982551443
-2149 973  0.02216543630854258
-2150 973  0.1551422975518752
-3623 973 -30.40657760133354
-3627 973 -12.37263278910001
-3631 973 -67.25923254959042
-3632 973  501.9756250912903
-3635 973 -43.88173563577028
-3639 973 -76.30268879794829
-3641 973  0.1608647615085388
-3644 973 -88.2008696096471
-3646 973 -0.2169860792325076
-974 974 -1.489946307075277
-1173 974  2.078042610764719e-06
-1174 974  9.300491833896807e-06
-1656 974  0.1109786463408995
-1901 974  1.172932465922301
-2146 974 -0.08990946696979601
-3623 974  127.5596980972658
-3632 974 -30.40657760133354
-975 975 -1.862462882642613
-1173 975  3.157835434829115e-06
-1175 975  4.574734240525569e-05
-1657 975  0.1002259832656134
-1902 975  1.39495808915565
-2147 975 -0.08276202516527353
-3627 975  78.88474093831816
-3632 975 -12.37263278910001
-973 976  0.8847241794276495
-976 976 -1.060044755936421
-1173 976  1.106372518294707e-06
-1176 976  3.292199911797411e-06
-1658 976  0.08068481631873459
-2148 976 -0.06414506658049314
-3631 976  150.2196223428273
-3632 976 -67.25923254959042
-977 977 -1.23044462799305
-1173 977  1.350166642877237e-06
-1177 977  4.932317047678936e-06
-1659 977  0.07569842401993218
-1904 977  1.020389168253177
-2149 977 -0.06041428240856869
-3632 977 -43.88173563577028
-3635 977  119.6044007221758
-978 978  0.4424042011090423
-1041 978  0.06144026122959138
-1178 978 -1.812938804635067e-08
-1198 978  0.02020512578173389
-1241 978 -0.07632676179310438
-1417 978 -0.6175792065318458
-1418 978 -0.4393579796250129
-1662 978  0.07945126249495234
-1907 978  0.5343612663363052
-2152 978 -0.07522703272150684
-2153 978 -0.05571639966019448
-3652 978  18.42568916669017
-3653 978 -7.549143391656187
-3782 978 -5.091800367100356
-979 979  0.5102169475088717
-1153 979  0.0247054716909727
-1179 979 -1.45358751424407e-08
-1276 979 -0.2527625769661221
-1384 979 -0.5584106677342894
-1419 979  1.458939452040597
-1420 979 -0.4069131581499621
-1629 979  0.07802819319119383
-1664 979 -0.03732647943580095
-1665 979  0.05514218346051624
-1909 979 -1.369672770763905
-1910 979  0.4084263136180201
-2119 979 -0.06247185293402092
-2154 979  0.01230778581599272
-2155 979 -0.05411446430890252
-3516 979  25.87594842746113
-3522 979 -6.848370394905818
-3663 979 -2.606267858998225
-3665 979  0.08044888079890554
-3906 979 -10.4263159895623
-980 980 -0.6095818027697771
-1179 980 -0.1494464850449572
-1180 980  0.08236427128493604
-1293 980  0.191146221601152
-1419 980 -1.713575067511206
-1664 980  0.04384124675612543
-1909 980  1.608728249378172
-1911 980  0.5163436862281057
-2154 980 -0.01445592199255047
-2156 980 -0.07686249969756757
-3516 980 -2.606267858998225
-3663 980  30.88690458402179
-3665 980 -6.531306342982823e-09
-3979 980 -50.78984921359537
-981 981  0.7571891954848207
-1181 981  6.922867834546764e-11
-1244 981  0.1277397341432216
-1284 981 -0.2426662636331511
-1356 981 -0.7881853230597796
-1422 981 -0.5279378228544918
-1423 981 -0.4465921770040591
-1601 981  0.1058760358195188
-1667 981  0.07447310375085667
-1668 981  0.05896090325177052
-1912 981  0.5324544811476037
-1913 981  0.4419162630302035
-2091 981 -0.1074855510138141
-2157 981 -0.06679631775422318
-2158 981 -0.06482153965237164
-3397 981  0.01332766609263919
-3402 981 -2.488153159316454
-3404 981  21.72145690088892
-3838 981 -8.173102293481225
-4005 981 -10.74205319605701
-982 982 -0.4938069079659762
-1011 982  0.07604924015919289
-1141 982 -0.447186593064992
-1182 982 -0.06846566225879751
-1211 982  0.332620942724838
-1365 982  0.4919493884250233
-1424 982 -0.5594355361631519
-1914 982  0.5029955255009219
-2100 982  0.06253236404377792
-2159 982 -0.06227853779439045
-3440 982  14.90110246263578
-3441 982  1.077567965412918e-09
-3446 982 -7.190162226521366
-3677 982 -6.815356665004987
-983 983 -0.5443743324327373
-1018 983  0.5635642781039163
-1137 983 -0.03261242339132064
-1183 983 -0.08418167880156238
-1218 983  0.08560093037836217
-1358 983  0.508985204526428
-1425 983 -0.5603093540142886
-1603 983 -0.07890804660218982
-2093 983  0.08403462925030836
-3407 983 -4.462659499865555
-3412 983  21.85723317143601
-3680 983 -14.87328100877918
-3681 983  0.01520725110591731
-3682 983  1.371140667671966e-08
-984 984  0.4708669091657322
-1105 984  0.03052418138322882
-1184 984 -0.07142707696284654
-1199 984  0.0299746246289474
-1240 984 -0.1517471607395
-1291 984  0.03333223564354496
-1310 984  0.5301531225102935
-1426 984 -0.5222444421836163
-1428 984 -0.473560945926676
-1555 984 -0.07868915532003624
-1671 984  0.06925873775509218
-1673 984  0.07049701051212577
-1800 984 -0.5251447644432619
-1916 984  0.5068312762468304
-1917 984 -0.4207481970090967
-1918 984  0.4497976392777369
-2045 984  0.07692669227529296
-2161 984 -0.06572612318307269
-2162 984  0.06335201956550504
-2163 984 -0.0653373636694603
-3215 984 -6.201273818830178
-3250 984 -5.509137579435532
-3687 984  34.35823096932663
-3689 984  4.56977637675049e-09
-3767 984 -12.33309367358089
-3789 984 -5.930212014505986
-985 985  0.04345138635311043
-1047 985 -0.3291856606887862
-1185 985 -4.681742493684027e-10
-1186 985 -0.06849071274544034
-1247 985  0.301148117794046
-1429 985  1.030575023275442
-1430 985  0.2717664094081579
-1674 985 -0.02747275796180583
-1675 985 -0.0345488517344297
-1919 985 -1.145483220005211
-2164 985  0.01404202717338338
-3700 985 -169.6380530198015
-3702 985  40.79460611168166
-3707 985 -2.389320265572053
-986 986  0.5500829381246285
-1185 986  0.04645384761318282
-1186 986  4.608089770874457e-09
-1248 986 -0.07163724688851204
-1274 986 -0.0811542494651884
-1292 986  0.0005819686631925447
-1429 986 -1.032621098489623
-1431 986 -0.4083268764891531
-1433 986 -0.4948905454048655
-1674 986  0.02752730161739742
-1676 986  0.05989119208712813
-1678 986  0.06634446170697339
-1919 986  1.147757430782475
-1921 986  0.4085753213662854
-1922 986 -0.4922431501793627
-1923 986  0.5176954413573776
-2164 986 -0.01406990582666666
-2166 986 -0.05737989255880681
-2167 986  0.070036883103212
-2168 986 -0.0706577625688625
-3702 986 -2.389320265572053
-3707 986  51.57946710948104
-3709 986  0.003935685128377539
-3995 986 -21.79919967537484
-4035 986 -14.33758586199391
-4137 986 -13.33511322562891
-987 987 -0.03119596209267699
-988 987  0.3582392973900931
-989 987  0.3692986362671331
-990 987  0.4209233628580787
-993 987 -0.1144339936435742
-1086 987 -0.7325809744992411
-1187 987 -4.496077599114301e-08
-1188 987  1.11954899111727e-07
-1189 987  2.108572429437849e-07
-1190 987 -0.03967666278728941
-1193 987 -0.1969994537259558
-1286 987  0.1010252017704575
-1437 987 -0.6561496372500165
-1438 987  0.7655516692341778
-1679 987 -0.01555667362303796
-1682 987  0.09217929587242674
-1683 987 -0.09262370563918518
-1924 987 -0.3225737686846976
-1925 987 -0.3307276363338796
-1926 987 -0.3794252839956596
-1927 987  0.7717184399373285
-2169 987  0.01198742445782478
-2170 987  0.02344637301136434
-2171 987  0.02969939062660183
-2173 987  0.1187456445424696
-3715 987 -2.961498845389432
-3719 987 -3.922227251349367
-3721 987  93.64924459438463
-3723 987 -4.914519484363213
-3725 987  1.323586146240174e-07
-3730 987 -15.03422373491804
-3732 987 -23.02209569566427
-988 988 -5.202909164506705
-1187 988 -6.398415768466004e-07
-1188 988 -1.625983356512428e-06
-1679 988  0.2259382495226564
-1924 988  4.684918794633313
-2169 988 -0.1741000527436083
-3715 988  43.01152805242423
-3721 988 -2.961498845389432
-987 989  0.07324325135033663
-989 989 -0.8670555746636077
-1187 989 -1.044842086184161e-07
-1189 989 -4.95059458760494e-07
-1925 989  0.7764968852232117
-2170 989 -0.05504842538990734
-3719 989  9.208778320999443
-3721 989 -3.922227251349367
-990 990 -0.8154789505427221
-1187 990 -8.621436850542175e-08
-1190 990  0.07686787236308722
-1926 990  0.735082344446817
-2171 990 -0.05753833129026276
-3721 990 -4.914519484363213
-3723 990  9.521180709967734
-3725 990 -2.564258991810675e-07
-991 991 -0.02450778911447643
-992 991  0.6367155082099926
-1089 991  0.8596786811959881
-1109 991 -0.1725061393317366
-1125 991 -0.1090224204334647
-1191 991 -1.026919055346909e-08
-1192 991  4.564837356724194e-08
-1289 991 -0.134708500073735
-1321 991  0.8837282746556666
-1341 991  0.5914570612494344
-1440 991 -0.7678220254674442
-1566 991 -0.1140293348347746
-1586 991 -0.07517102493073879
-1685 991  0.1159096366071837
-1811 991 -0.9082411774006912
-1831 991 -0.5958231660939404
-1929 991 -0.5615617669201883
-2056 991  0.1250986076557095
-2076 991  0.07472243017192762
-2174 991  0.02367803215021713
-3251 991 -7.289318276733248
-3339 991 -8.629261156606795
-3735 991 -22.79820814800702
-3737 991  97.14497503871327
-3740 991 -27.15561296572336
-3742 991  0.1077273281804682
-991 992  0.1286813403240229
-992 992 -3.343157745435155
-1191 992 -1.02579299221528e-07
-1192 992 -2.396823412520455e-07
-1929 992  2.948553233605713
-2174 992 -0.1243245932586171
-3735 992  119.7049698625637
-3737 992 -22.79820814800702
-956 993 -0.3067285442619483
-993 993  0.04015905194614094
-1156 993 -0.08343546006029678
-1187 993  0.09349931199942807
-1193 993 -6.511629457861545e-09
-1437 993  0.2302667811178378
-1635 993  0.04146852449400996
-1682 993 -0.03234906878134064
-1880 993  0.2864662948217338
-1927 993 -0.2708240788464196
-2125 993 -0.04223221544427847
-3539 993 -5.773304655979821
-3721 993 -15.03422373491804
-3730 993  9.969116450250086
-994 994 -0.05307425527674776
-1194 994 -2.743272645155947e-08
-1220 994 -0.8499437985630234
-1241 994  0.04822162698070831
-1286 994  0.4053180506262709
-1441 994  0.3903679836850418
-1442 994 -0.3268708215805732
-1443 994 -0.4480493340962357
-1686 994 -0.04713547548536468
-1687 994  0.04078336525166011
-1688 994  0.06606790111415485
-1931 994 -0.3911547364780136
-1932 994  0.3510617949962554
-1933 994  0.4295324887597057
-2176 994  0.05251609088198397
-2178 994 -0.06042534814756716
-3653 994 -7.152105470943633
-3732 994 -8.292682942672654
-3750 994  21.03102825235615
-3902 994 -8.568402243341714
-995 995 -0.06562451486991176
-1098 995 -0.452566586299552
-1158 995 -0.09853840405478623
-1195 995  0.06444976742378884
-1298 995  0.1249966020337667
-1394 995  0.5043002560591464
-1639 995 -0.07115943648492561
-1884 995 -0.5037492428192337
-1934 995  0.4518410984309186
-2129 995  0.07450440954956594
-3560 995 -14.5168341615374
-3755 995 -14.30197427870374
-3757 995  1.223172137188033e-08
-3758 995  27.40363140673261
-996 996 -0.5780567342368751
-997 996 -0.03806768353539278
-1196 996  0.0800375428343096
-1197 996  0.230729451368615
-1221 996 -0.2752007773679826
-1445 996  1.610329947831114
-1935 996 -1.599279391917784
-1936 996  0.4789577056707151
-2180 996  0.02025137186317635
-2181 996 -0.06052909694974323
-3761 996 -4.558549752153652
-3763 996  29.54453213187533
-3765 996  9.116542722120702e-09
-3929 996 -36.15688796617417
-997 997  0.03833691442155994
-1040 997 -0.5676654342824057
-1197 997  2.309806734768216e-08
-1240 997  0.2907198369401517
-1445 997 -1.621718887703775
-1692 997  0.07297260077582075
-1935 997  1.610590177548189
-1937 997  0.5657348666913733
-2180 997 -0.0203945981981267
-2182 997 -0.08114410763163431
-3761 997  20.92833324870006
-3763 997 -4.558549752153652
-3765 997 -0.1274747229578279
-3767 997 -10.1426793564074
-998 998 -0.07543502736476798
-1139 998 -0.07397943535962337
-1178 998 -0.03046147444904562
-1198 998 -1.121049961194664e-09
-1272 998  0.3056678326553257
-1277 998  0.08491056660704932
-1360 998  0.5428701937830359
-1417 998  0.3482442751733312
-1448 998 -0.365279863502515
-1449 998 -0.4082845072120672
-1450 998 -0.4687418217907903
-1605 998 -0.08034074986653671
-1662 998 -0.04480145546761378
-1693 998  0.04813127346458962
-1694 998  0.0550827407381792
-1695 998  0.06183666269902745
-1850 998 -0.5056400055975701
-1907 998 -0.3013188428428641
-1938 998  0.3974234470529114
-1939 998  0.4422425294985197
-1940 998  0.5317541664847034
-2095 998  0.0717417324410374
-2152 998  0.04241947139162745
-2183 998 -0.04568621642826008
-2184 998 -0.05462691338373406
-3424 998 -9.907975784566792
-3579 998 -0.03690628655442891
-3580 998 -9.332991521575286
-3652 998 -5.091800367100356
-3782 998  37.7044366726637
-4085 998 -7.355302891262144
-4105 998 -7.492541902601887
-999 999  0.03207522870468611
-1008 999  0.5183688055136251
-1105 999 -0.0186874934802756
-1199 999  8.747625068750153e-09
-1208 999 -0.08159194337121471
-1215 999  0.02977237990573015
-1311 999  0.3534179161138158
-1426 999  0.4052407270632351
-1452 999 -0.2893683449568387
-1453 999 -0.4394391630156717
-1556 999 -0.05126169311480969
-1671 999 -0.05374200082628659
-1698 999  0.06534871961755113
-1801 999 -0.3400290471461453
-1916 999 -0.393280728897523
-1941 999 -0.4507276981705915
-1942 999  0.287922613699735
-1943 999  0.4523778897162504
-2046 999  0.04749359159231554
-2161 999  0.05100083369846863
-2186 999  0.06983187835467947
-2187 999 -0.03256016366442265
-2188 999 -0.06846325693408806
-3215 999 -5.02376469529283
-3687 999 -5.930212014505986
-3689 999 -0.009006681541102603
-3783 999 -13.41847869727119
-3785 999  0.003480874327946859
-3789 999  34.69934723091026
-3880 999 -8.461387995206527
-3984 999  0.001400699084717696
-3988 999 -10.3806402044065
-943 1000 -0.4083493179864217
-953 1000  0.06036516740083515
-1000 1000 -0.0632030828564326
-1002 1000  0.6456696194137627
-1003 1000  0.6490872503986104
-1143 1000 -0.02941992453676529
-1153 1000  0.01312118441246557
-1200 1000 -9.116374766193758e-09
-1201 1000  0.06631071370625427
-1202 1000  0.008696076442513374
-1203 1000  0.009220007761798222
-1226 1000 -0.0187976386654834
-1385 1000  0.5044255115610694
-1454 1000  0.679218643726438
-1457 1000 -0.4983302017793433
-1615 1000  0.05394914494757658
-1699 1000 -0.02138310187554149
-1702 1000  0.05434191170091966
-1860 1000  0.4299072250755208
-1875 1000 -0.5084203185048906
-1944 1000 -0.646543619474914
-1945 1000 -0.3733120922080947
-1946 1000 -0.3748931907170733
-1947 1000  0.511463018605014
-2105 1000 -0.0525261724439103
-2189 1000  0.01366621132412578
-2190 1000  0.0188568404948961
-2191 1000  0.01698943114939008
-2192 1000 -0.06524249632027486
-3459 1000 -9.162682012337861
-3521 1000  42.95999314909423
-3522 1000 -18.59353340932061
-3799 1000 -1.782278958330569
-3801 1000  8.321106473307971e-08
-3803 1000 -1.530468730548231
-3805 1000  8.643129645324876e-08
-3812 1000 -1.117305846410916
-3944 1000 -9.164925564623724
-954 1001  0.4168827888937753
-959 1001 -0.05422228334611339
-1001 1001  0.4806889285303752
-1154 1001 -0.05190348185770162
-1159 1001  0.07786890260343965
-1200 1001 -0.03588453104645329
-1201 1001 -2.100605200894279e-08
-1396 1001  0.4317442185526948
-1454 1001 -0.8337078536373765
-1458 1001 -0.4245578636109541
-1699 1001  0.02624671765627676
-1703 1001  0.04834484106772149
-1878 1001 -0.3742383847223765
-1886 1001 -0.3854386330763389
-1944 1001  0.793600850409622
-2123 1001  0.04661945388172894
-2131 1001  0.04920481168154859
-2189 1001 -0.01677460978968741
-2193 1001 -0.06044152600849758
-3521 1001 -1.117305846410916
-3531 1001 -16.8657683503129
-3533 1001 -0.1257509427677388
-3565 1001 -7.201942460989617
-3812 1001  23.38960436680297
-3817 1001 -6.847692983685751
-3818 1001  0.05848026446275859
-1002 1002 -2.531048253677342
-1200 1002 -6.50146078085001e-08
-1202 1002 -0.03408893408002122
-1945 1002  1.463396899358267
-2190 1002 -0.07391949654966468
-3521 1002 -1.782278958330569
-3799 1002  6.986598138490368
-3801 1002 -3.261903830376056e-07
-1003 1003 -1.997280942097913
-1200 1003 -4.92795877993224e-08
-1203 1003 -0.02837052457481739
-1946 1003  1.153569146030313
-2191 1003 -0.0522775128165329
-3521 1003 -1.530468730548231
-3803 1003  4.709345466229321
-3805 1003 -2.659544074967712e-07
-1001 1004 -0.5969054776253586
-1004 1004 -0.07310899636166125
-1201 1004 -0.2362664140746497
-1204 1004  0.07557928868904694
-1458 1004  0.5272035599677525
-1459 1004  0.735678771801283
-1703 1004 -0.06003321219915891
-1949 1004 -0.6821644402533772
-2193 1004  0.07505452239311973
-3812 1004 -6.847692983685751
-3817 1004  18.29460146461613
-3818 1004 -5.676911468688317e-09
-3947 1004 -13.59416149407363
-3950 1004  0.0892421864327288
-1005 1005 -0.03942547846731512
-1027 1005  0.3027390358618913
-1205 1005  1.270442140377526e-09
-1211 1005 -0.07579910748829327
-1227 1005  0.3216355049551655
-1290 1005 -0.1235888174409047
-1460 1005 -0.2843296651791673
-1461 1005 -0.2824496877119697
-1462 1005  0.361444564712929
-1705 1005  0.04056671199071347
-1706 1005  0.03972042410566178
-1707 1005 -0.05221684139379965
-1950 1005  0.2720491980250402
-1952 1005 -0.362096525989742
-2195 1005 -0.03643788065469865
-2197 1005  0.05186623003949892
-3677 1005 -4.223583073177172
-3824 1005 -6.595142687039579
-3826 1005  12.97100574956463
-3869 1005 -6.636577325017829
-1006 1006 -0.3843162680834073
-1173 1006 -0.5102615425771657
-1206 1006  0.05880401649662903
-1296 1006  0.3247141990426317
-1463 1006 -0.438438003559472
-1708 1006  0.06917527995921767
-1905 1006  0.315510181481362
-1953 1006  0.4387320075683089
-2150 1006 -0.0481274996143231
-2198 1006 -0.07005237522168833
-3632 1006 -76.30268879794829
-3639 1006  34.14274349805327
-3641 1006  1.715180802247529e-08
-3896 1006 -8.964500315464161
-1007 1007  1.068221512166234
-1084 1007  0.09247066617765989
-1115 1007 -0.007809535337338039
-1207 1007 -0.1374700328859463
-1284 1007  0.03479583401598968
-1331 1007 -1.272612899536508
-1464 1007  0.685622022908018
-1576 1007  0.1530658956087559
-1709 1007 -0.08242829338295353
-1954 1007 -0.7361807383336668
-3295 1007 -2.93383316576035
-3304 1007  27.62454972865043
-3306 1007  5.579247508435969e-09
-3838 1007 -11.96605332802606
-1008 1008 -0.5784495929786024
-1199 1008 -0.01293493383306837
-1208 1008  0.09104873968379988
-1291 1008  0.01209611832050858
-1465 1008 -0.5364511862384768
-1710 1008  0.08202078818634363
-1941 1008  0.5029686408166937
-1955 1008  0.4560709551496668
-2186 1008 -0.07792564132243848
-2200 1008 -0.06866652243426218
-3250 1008 -9.148268563932652
-3783 1008  23.49395472825631
-3785 1008  2.945946889210127e-09
-3789 1008 -13.41847869727119
-1009 1009 -0.3015768589062842
-1073 1009  0.4686479373193943
-1209 1009  1.582770026153923e-10
-1261 1009 -0.0715165068859599
-1466 1009  0.2358476052144947
-1467 1009 -0.4668956305239359
-1711 1009 -0.03869333526436093
-1712 1009  0.06831378599514144
-2201 1009  0.04790427095168613
-2202 1009 -0.06731177696858127
-3844 1009  17.34359327006798
-3848 1009 -8.20501197638737
-4065 1009 -23.5793179741662
-4110 1009  0.01064840327673766
-1010 1010 -0.09003421014280026
-1012 1010  0.4334726720192261
-1013 1010  1.10546489936218
-1153 1010  0.04849641452178344
-1210 1010 -1.220871292950676e-08
-1211 1010 -0.2105201766865411
-1212 1010 -0.030243626830779
-1213 1010 -0.006504078012125597
-1226 1010  0.03354622133540253
-1386 1010  0.5073715942942281
-1468 1010  0.8859027431941622
-1471 1010  0.6066026005909084
-1631 1010 -0.06003644567930714
-1713 1010 -0.0356622993406167
-1876 1010 -0.520995849897842
-1958 1010 -0.8761333273285709
-1959 1010 -0.2894253737565123
-1960 1010 -0.689967708515946
-1961 1010 -0.6067370542914696
-2121 1010  0.06921683159380579
-2203 1010 -0.007089990482464425
-2204 1010  0.03061910767275307
-2205 1010  0.01318357808761143
-2206 1010  0.08946924016864499
-3522 1010 -7.868079227295585
-3677 1010 -1.57272281540292
-3855 1010 -4.664019165692916
-3857 1010  2.310851920234169e-08
-3859 1010 -3.154571903054211
-3861 1010  5.20346455368248e-08
-3865 1010  58.76200518316558
-3944 1010 -27.0139401100094
-1011 1011 -0.06563573978769766
-1090 1011  0.0501196804098229
-1205 1011  0.05204211481294618
-1210 1011  0.08531786875140286
-1211 1011 -3.814564220050443e-10
-1290 1011  0.005776489288478071
-1424 1011  0.4828314550248289
-1460 1011  0.3898068756189718
-1468 1011 -0.7247989127679281
-1472 1011 -0.3567726280354734
-1705 1011 -0.05561566446213129
-1713 1011  0.02917701292546772
-1914 1011 -0.4341198328483751
-1950 1011 -0.3729707479870724
-1958 1011  0.716806091826627
-1962 1011  0.372176173311343
-2159 1011  0.05375067380652562
-2195 1011  0.04995516877647897
-2203 1011  0.005800656373065214
-2207 1011 -0.05599008922381862
-3440 1011 -6.815356665004987
-3441 1011 -0.05307445544769063
-3677 1011  20.99019374425364
-3826 1011 -4.223583073177172
-3865 1011 -1.57272281540292
-3869 1011 -6.618357553588883
-1012 1012 -2.292588351471605
-1210 1012 -6.996689916327625e-08
-1212 1012  0.1599551507030707
-1959 1012  1.530738344827112
-2204 1012 -0.1619410267687995
-3855 1012  24.66747498250689
-3857 1012 -1.222183139693023e-07
-3865 1012 -4.664019165692916
-1013 1013 -3.052106539486126
-1210 1013 -4.162537098340024e-08
-1213 1013  0.01795727665852618
-1960 1013  1.904949633779228
-2205 1013 -0.03639888061415681
-3859 1013  8.709539069389765
-3861 1013 -1.436638097374043e-07
-3865 1013 -3.154571903054211
-1014 1014 -0.8667371882258156
-1214 1014  0.1222265439314361
-1215 1014  0.01263569687722853
-1231 1014 -0.004918095735968131
-1473 1014  2.989747253335061
-1474 1014  0.7143220365505623
-1718 1014 -0.03612826240957417
-1719 1014 -0.09209370496921034
-1963 1014 -3.114543931331563
-2208 1014  0.005103966248689504
-3876 1014  48.13941450694888
-3878 1014  1.003109723063389e-08
-3880 1014 -3.110095548321061
-3961 1014 -43.51247883455607
-999 1015 -0.06739443255416847
-1015 1015  0.3512208139362644
-1199 1015 -0.04200667912920009
-1215 1015  2.119705658221172e-08
-1230 1015  0.6677127442223171
-1260 1015 -0.4065761617929234
-1452 1015  0.6080023804991873
-1473 1015 -2.129272677610363
-1475 1015 -0.3137213903015109
-1476 1015 -0.3561529571137585
-1718 1015  0.02573024256563223
-1720 1015  0.05294086820243799
-1721 1015  0.05477280767251791
-1942 1015 -0.6049647018408247
-1963 1015  2.218151814941477
-1966 1015  0.4019295688857895
-2187 1015  0.06841334707970535
-2208 1015 -0.003635001543577661
-2210 1015 -0.05938414807119664
-2211 1015 -0.05419810230840651
-3443 1015 -6.75539661299262
-3789 1015 -8.461387995206527
-3876 1015 -3.110095548321061
-3878 1015 -0.004849260550083945
-3880 1015  33.95007189497851
-3993 1015 -5.675171991154909
-1016 1016 -0.04164998212099323
-1096 1016  0.2916024608025469
-1216 1016  2.258379669384247e-08
-1229 1016  0.4471220830932749
-1232 1016 -0.1931972031217511
-1296 1016 -0.191269335220411
-1477 1016  0.4440953835549432
-1478 1016 -0.4182071807007093
-1479 1016 -0.2752074710610874
-1722 1016 -0.06641212160291675
-1723 1016  0.05972800042777431
-1724 1016  0.03854659492344367
-1967 1016 -0.4157020431777712
-1968 1016  0.3861971705223876
-2212 1016  0.0583111945771093
-2213 1016 -0.04705317091092858
-3241 1016 -9.384104209729337
-3562 1016 -8.682516686085181
-3896 1016 -12.35396198776869
-3898 1016  27.70275126489943
-899 1017 -0.7776202985712402
-1017 1017  0.1047517766322093
-1099 1017  0.07145809537069953
-1157 1017 -0.09826669971503904
-1217 1017 -0.09921702332192101
-1300 1017  0.6965288550619865
-1392 1017  0.7708782074770815
-1545 1017 -0.1084279560868184
-1882 1017 -0.769913863334036
-2035 1017  0.1225624806414849
-3180 1017 -4.459108211401092
-3549 1017  1.572755131684644e-08
-3550 1017  30.94442200079127
-3554 1017 -13.12797495416852
-1018 1018 -0.6799840124585417
-1115 1018  0.007667951331426184
-1183 1018  0.1015717247366548
-1218 1018 -0.1032841618434341
-1332 1018  1.009668228367086
-1425 1018  0.6760566940874065
-1577 1018 -0.1378864330242373
-1822 1018 -1.192586743172628
-2067 1018  0.1691007261817176
-3295 1018 -2.838699038257353
-3412 1018 -14.87328100877918
-3680 1018  30.10474762802639
-3681 1018  8.149366514942358e-09
-3682 1018 -0.03487708050989496
-1019 1019 -0.06669039335604635
-1020 1019 -0.01123831756390642
-1076 1019 -0.0533470071697257
-1219 1019 -2.543621846484712e-08
-1220 1019 -0.2054867667943506
-1276 1019  0.06809256171239964
-1285 1019 -0.1159747915130676
-1324 1019  0.4852466322371797
-1480 1019  0.8564968076053606
-1481 1019 -0.3737564443230481
-1482 1019  0.5871755452865656
-1569 1019 -0.06914253723748093
-1725 1019 -0.02279261177659806
-1726 1019  0.03431690856375876
-1814 1019 -0.4782088220647328
-1970 1019 -0.8238314830349427
-1971 1019  0.3951785216481155
-1972 1019 -0.5881819817933323
-2059 1019  0.04078920488167646
-2217 1019  0.0754972172149095
-3280 1019 -10.40260563970382
-3282 1019  0.1059721002756369
-3902 1019 -0.88143794557935
-3906 1019 -6.462935419745066
-3909 1019  59.57679134339177
-4145 1019 -49.72163740391174
-1020 1020  0.0162354147387335
-1144 1020 -1.072294506906227
-1194 1020  0.7790434228378447
-1219 1020  0.1511025029822383
-1220 1020 -5.254733564247216e-08
-1373 1020 -0.4310186947198901
-1441 1020 -0.4616774221684724
-1480 1020 -1.237336533231109
-1618 1020  0.06203758932618613
-1686 1020  0.05574582374646275
-1725 1020  0.03292730456029068
-1863 1020  0.4315768639356127
-1931 1020  0.462607892946107
-1970 1020  1.190146632344225
-2108 1020 -0.06451921651788971
-2176 1020 -0.06210932882324032
-3467 1020 -10.41994192456261
-3750 1020 -8.568402243341714
-3902 1020  23.73361771904704
-3909 1020 -0.88143794557935
-996 1021  0.8354279312537486
-1021 1021  0.1084484784869682
-1022 1021 -3.041422922448816
-1023 1021  0.542397859204706
-1024 1021  0.9285201706573676
-1025 1021  0.9936427272244395
-1196 1021 -0.115673073026255
-1221 1021  1.693937379254606e-08
-1222 1021 -1.614266736074707
-1223 1021  6.278976277807047e-08
-1224 1021  2.808174576207634e-08
-1225 1021 -0.02313004556960633
-1231 1021  0.31496286658781
-1254 1021  0.5123209884216365
-1483 1021  3.79240453316803
-1487 1021 -0.8967784154793547
-1488 1021  0.7922255118762921
-1728 1021 -0.04566720801028241
-1729 1021 -0.008763702119294985
-1730 1021  0.002697403111634132
-1733 1021 -0.1118336807589077
-1936 1021 -0.6922065283691701
-1974 1021 -0.4575882069145712
-1975 1021 -0.7197609326620245
-1976 1021 -0.8325011910731969
-1977 1021  0.8948534463356967
-1978 1021 -0.7101965596901837
-2181 1021  0.08747878062892703
-2218 1021  0.003740381880205069
-2219 1021  0.02107167706267504
-2220 1021  0.01651901814280531
-2221 1021  0.03005520669887832
-2222 1021 -0.1046366912454179
-2223 1021  0.1007269057461446
-3763 1021 -36.15688796617417
-3765 1021  0.1829371321651943
-3912 1021 -7.814753962990203
-3915 1021 -45.85566165593821
-3919 1021 -58.89853015420356
-3923 1021 -52.31118436254501
-3925 1021 -5.971258681958247e-09
-3929 1021  477.2594388157049
-3961 1021 -139.2864013622151
-3966 1021 -99.87961390971721
-1022 1022  1.663057831262543
-1099 1022 -0.04280741885147973
-1107 1022 -0.1107503777044012
-1127 1022 -0.2217680093153695
-1221 1022  0.9741805755638241
-1222 1022  1.79774599651239e-08
-1291 1022 -0.1615941136931558
-1301 1022 -0.5004641350120078
-1314 1022  0.5174114692252539
-1344 1022 -0.5210644672440067
-1483 1022 -2.073696496349991
-1489 1022 -0.4907051457898425
-1546 1022  0.07164048254012781
-1559 1022 -0.07547435194461619
-1589 1022  0.06787409290268773
-1728 1022  0.02497094611631533
-1734 1022  0.06351030020065121
-1791 1022  0.5155283875242916
-1804 1022 -0.51883851294131
-1834 1022  0.5652682652525904
-1979 1022  0.4784631094026429
-2036 1022 -0.07860779169062282
-2049 1022  0.06783840917537932
-2079 1022 -0.07938255615129304
-2218 1022 -0.002045250376681948
-2224 1022 -0.06852574797351926
-3180 1022 -9.416903571706824
-3231 1022 -5.018475605696647
-3250 1022 -7.179419233379014
-3351 1022 -7.148869153419637
-3912 1022  55.1926800422341
-3929 1022 -7.814753962990203
-1023 1023 -4.70085606120625
-1221 1023 -1.302918114509666e-07
-1223 1023 -5.441864154853704e-07
-1729 1023  0.07595329060940451
-1974 1023  3.965827407882584
-2219 1023 -0.1826240999274897
-3915 1023  397.4220503589642
-3929 1023 -45.85566165593821
-1024 1024 -12.32981280576957
-1221 1024 -9.906925613201167e-08
-1224 1024 -3.728986932571843e-07
-1730 1024 -0.03581879691919143
-1975 1024  9.557700354905545
-2220 1024 -0.2193559255602421
-3919 1024  782.1131855642966
-3929 1024 -58.89853015420356
-1025 1025 -4.01982058245716
-1221 1025  2.090432824064514e-08
-1225 1025  0.09357350555324298
-1976 1025  3.367916184667294
-2221 1025 -0.1215895162194106
-3923 1025  211.6269475335122
-3925 1025  2.415609212658154e-08
-3929 1025 -52.31118436254501
-1010 1026  0.05815877671819319
-1026 1026  0.3099939897897582
-1153 1026  0.02710898025635339
-1200 1026  0.01932311150148924
-1210 1026 -0.03439042023821304
-1226 1026 -4.982309767878612e-09
-1227 1026 -0.3481508996189024
-1231 1026  0.0847976154813143
-1387 1026  0.4009167330545462
-1457 1026  0.4549416885390786
-1471 1026 -0.3918428911464509
-1490 1026  0.6215604636346272
-1491 1026 -0.3325996421497863
-1632 1026 -0.04641687896666508
-1702 1026 -0.04961048112152108
-1735 1026 -0.02644511378818943
-1736 1026  0.03183450902391857
-1877 1026 -0.4120447041029439
-1947 1026 -0.4669310599249826
-1961 1026  0.3919297432745195
-1980 1026 -0.5365848376342145
-2122 1026  0.05375367996741477
-2192 1026  0.05956197584346536
-2206 1026 -0.05779382696712341
-2225 1026 -0.01313085696332133
-2226 1026 -0.04857313406685036
-3521 1026 -9.164925564623724
-3522 1026 -6.201999031867833
-3824 1026 -0.8927671556159681
-3865 1026 -27.0139401100094
-3944 1026  35.39663991841751
-3961 1026 -6.462929117644935
-1005 1027  0.06474071332200976
-1027 1027 -0.4971288084268892
-1205 1027 -0.254470154000132
-1226 1027  0.1585995473755764
-1227 1027 -1.642452673866757e-09
-1461 1027  0.46381160028816
-1490 1027 -0.9429319477336707
-1706 1027 -0.06522504456566502
-1735 1027  0.04011828954905043
-1980 1027  0.8140205429349932
-2225 1027  0.01992003233190689
-3824 1027  12.18426972110153
-3826 1027 -6.595142687039579
-3944 1027 -0.8927671556159681
-1004 1028  0.1015035621142007
-1028 1028 -0.5322000356380353
-1204 1028 -0.1049332832589505
-1228 1028 -0.0742486869524237
-1242 1028  0.5302216625716982
-1459 1028 -1.021406661640207
-1737 1028  0.05306117946887205
-1949 1028  0.9471080725937661
-1982 1028  0.5237179831441521
-3817 1028 -13.59416149407363
-3818 1028 -0.1008233678779023
-3947 1028  24.48007177925223
-3950 1028 -6.325417856167803e-09
-3992 1028 -4.766621258133744
-1029 1029  0.06401769328460785
-1107 1029  0.1267923382078056
-1216 1029 -0.264491588907438
-1229 1029  2.632058404561377e-08
-1315 1029 -0.4783889678510902
-1477 1029 -0.3331233226375472
-1722 1029  0.04981683537143853
-1805 1029  0.4152466067801181
-1967 1029  0.3118250064706282
-2050 1029 -0.05552343838215974
-2212 1029 -0.04374019547106585
-3231 1029 -8.245220727572519
-3241 1029  19.24209708384189
-3898 1029 -9.384104209729337
-941 1030  0.04770925997819617
-1015 1030 -0.3151578642820492
-1030 1030  0.3346190606095454
-1141 1030  0.5333079621201936
-1215 1030 -0.2961764240780974
-1230 1030  3.749403898467563e-08
-1475 1030  0.2815088383826861
-1611 1030 -0.04214323686986587
-1720 1030 -0.04750496067965254
-1856 1030 -0.3298503080348287
-2210 1030  0.05328665197422941
-3443 1030  12.83836892907945
-3446 1030 -5.832944089578626
-3880 1030 -6.75539661299262
-1014 1031  0.8615889655364799
-1021 1031 -0.08976373229337341
-1026 1031 -0.8438958429460618
-1031 1031 -0.006563168427045199
-1033 1031  1.009041934732004
-1034 1031  0.7970691740614322
-1054 1031  0.08977398271040733
-1214 1031 -0.1215005459296716
-1221 1031 -0.7334598799220151
-1226 1031 -0.2479349622596107
-1231 1031  4.101254072463867e-09
-1232 1031  1.10561328577018
-1233 1031 -9.484864810227478e-09
-1234 1031 -1.84660262358971e-08
-1254 1031 -0.3196677665324415
-1474 1031 -0.7100791253590051
-1487 1031  0.7422711571120613
-1491 1031  0.9054351523586399
-1493 1031  2.25449865380728
-1496 1031  0.8761671906630912
-1719 1031  0.091546689209522
-1736 1031 -0.08666300222702368
-1738 1031 -0.03863010084770278
-1739 1031 -0.002184359699851513
-1741 1031 -0.102634553712519
-1977 1031 -0.7406778437037493
-1983 1031 -2.145904330715092
-1984 1031 -0.9011892093493707
-1985 1031 -0.6736858992600538
-1986 1031 -0.7806222496203883
-2222 1031  0.08660868342331542
-2226 1031  0.132230518229328
-2228 1031  0.02054020998672473
-2229 1031  0.01072384218894925
-2230 1031  0.01247372164743909
-3562 1031 -8.824003402905362
-3876 1031 -43.51247883455607
-3878 1031  0.006768474096792987
-3929 1031 -139.2864013622151
-3944 1031 -6.462929117644935
-3955 1031 -42.59535825308108
-3959 1031 -46.13299363078809
-3961 1031  280.8731269168421
-3966 1031 -47.17922372343798
-958 1032  0.4629060824003016
-1032 1032 -0.06376479295806904
-1158 1032  0.0918804888517398
-1216 1032  0.3720024857585152
-1231 1032 -0.5154238049189607
-1232 1032  1.011402150918883e-08
-1284 1032  0.351545654975462
-1395 1032 -0.4013655946326785
-1478 1032  0.5426439670912149
-1493 1032 -1.946310278292044
-1497 1032 -0.5245384263354095
-1498 1032 -0.4795952455071584
-1640 1032  0.05250452962986646
-1723 1032 -0.0774999583800744
-1738 1032  0.03334939331384015
-1742 1032  0.0667812926161708
-1743 1032  0.07931274417722842
-1968 1032 -0.5011094365728938
-1983 1032  1.852560722557531
-1987 1032  0.6080294260127852
-1988 1032  0.5234134383708403
-2213 1032  0.06105375638109871
-2228 1032 -0.01773237777185056
-2232 1032 -0.08705433897040452
-2233 1032 -0.08168946937534756
-3360 1032 -15.1191530189695
-3362 1032  0.2460567017585993
-3560 1032 -15.91728652624134
-3562 1032  56.84740877124895
-3838 1032 -8.267859388385654
-3898 1032 -8.682516686085181
-3961 1032 -8.824003402905362
-1033 1033 -12.4148271960868
-1231 1033  6.151957693267818e-09
-1233 1033  1.167101824073313e-07
-1739 1033  0.02687544221336557
-1984 1033  11.0878526649361
-2229 1033 -0.1319416399569855
-3955 1033  524.0753677887301
-3961 1033 -42.59535825308108
-1031 1034  0.05906849706200258
-1034 1034 -7.17362028563042
-1231 1034  1.048933193081858e-08
-1234 1034  1.662011153147347e-07
-1985 1034  6.063171165496134
-2230 1034 -0.1122634591316911
-3959 1034  415.1968164899222
-3961 1034 -46.13299363078809
-901 1035 -0.1070673572020186
-1035 1035 -0.6418097572616985
-1101 1035 -0.007274595352445609
-1132 1035  0.02036700218743104
-1235 1035  0.09235767751344538
-1305 1035  0.7494701150025187
-1352 1035  0.6028365532304726
-1597 1035 -0.08398219490530567
-1795 1035 -0.8018374523380369
-2040 1035  0.1117136386944541
-3201 1035 -10.38899270243397
-3385 1035 -7.710670001509722
-3388 1035  33.66510213550209
-3390 1035  4.701830855213807e-09
-1036 1036  0.0673261986980111
-1093 1036 -0.4549872682817689
-1236 1036 -6.866298751795341e-09
-1237 1036  0.1062797331368865
-1293 1036 -0.100220615586992
-1499 1036 -0.4189118125301785
-1744 1036  0.04658515728812977
-1989 1036  0.4218235797759384
-1990 1036  0.3870443013878095
-2234 1036 -0.06140384326059996
-2235 1036 -0.05417247763812049
-3552 1036 -5.943018521333083
-3979 1036 -30.91732538584464
-3981 1036  26.97842583523464
-957 1037  0.08262454568163802
-1037 1037 -0.6175404035074211
-1157 1037  0.0898090992568592
-1236 1037 -0.2219028422169306
-1237 1037  2.017315570146394e-08
-1393 1037  0.6178543307689458
-1499 1037  0.4166092968480921
-1638 1037 -0.08767549225845923
-1744 1037 -0.04632910565148451
-1989 1037 -0.4195050597952255
-2234 1037  0.06106634188723461
-3552 1037  24.62019884646402
-3554 1037 -13.92756187672575
-3981 1037 -5.943018521333083
-1038 1038 -0.9003890301240479
-1127 1038  0.3372983249793092
-1232 1038 -0.375458076737654
-1238 1038  0.114557430269681
-1345 1038  0.8486339951838073
-1497 1038  0.6104971732174486
-1590 1038 -0.1158196454089177
-1742 1038 -0.07772507850532541
-1987 1038 -0.7076702624193901
-2232 1038  0.1013203708049441
-3351 1038 -7.664837333066902
-3360 1038  39.98334126718024
-3362 1038  1.549589877247737e-08
-3562 1038 -15.1191530189695
-1039 1039  0.6019478457506698
-1107 1039 -0.006163960734533347
-1199 1039 -0.008049526074928537
-1239 1039  0.4690507958247839
-1246 1039 -0.007983550840506864
-1291 1039  0.01997349105916452
-1316 1039  0.6149993551148858
-1453 1039  0.6086144765908342
-1501 1039 -0.4164294722809435
-1502 1039 -0.6338113374831589
-1561 1039 -0.08992480666432051
-1698 1039 -0.09050667335377813
-1746 1039  0.05876447905234403
-1747 1039  0.09481212065301986
-1806 1039 -0.6331632046878009
-1943 1039 -0.6265343550208405
-2051 1039  0.09328847712876323
-2188 1039  0.09482024542077011
-2236 1039 -0.07032064342926077
-2237 1039 -0.08853178006291147
-3230 1039 -7.161275667591624
-3231 1039 -6.271758063514705
-3250 1039 -13.50573421656583
-3789 1039 -10.3806402044065
-3984 1039  2.965038659041852e-09
-3988 1039  43.2075847511725
-984 1040 -0.5600389881261721
-1040 1040  0.3524182508441939
-1184 1040  0.08495383117483674
-1197 1040 -0.1442546171938546
-1240 1040  2.41306286619114e-08
-1692 1040 -0.04530287520055806
-1917 1040  0.5004288683746699
-1937 1040 -0.3512197152059827
-2162 1040 -0.07534953134862798
-2182 1040  0.05037591290722684
-3687 1040 -12.33309367358089
-3689 1040  0.04085646412882799
-3761 1040 -10.1426793564074
-3767 1040  20.96549843436591
-978 1041 -0.3554519303210625
-994 1041  0.06749642593198793
-1041 1041 -0.04936449382429299
-1178 1041  0.04340316309886361
-1194 1041 -0.03089942858856187
-1241 1041 -3.596899800228925e-08
-1418 1041  0.3530044280053762
-1442 1041  0.4156932976845179
-1687 1041 -0.0518656621296367
-1932 1041 -0.4464578225348482
-2153 1041  0.04476562781300211
-3652 1041 -7.549143391656187
-3653 1041  15.16098608702285
-3750 1041 -7.152105470943633
-1028 1042  0.4525049260712891
-1042 1042 -0.4139428557132467
-1060 1042 -0.05662335010049897
-1228 1042  0.06313020358974136
-1242 1042  1.273314576000217e-08
-1260 1042  0.4108547346501815
-1503 1042  0.4124828656886404
-1737 1042 -0.04511545186958153
-1982 1042 -0.4452930315210079
-2238 1042  0.05291302942259243
-3947 1042 -4.766621258133744
-3950 1042 -0.1053486690832382
-3992 1042  17.24046977368123
-3993 1042 -11.52161349893302
-1043 1043 -0.05174692126183886
-1074 1043 -0.6664131668293063
-1083 1043  0.05322704516850629
-1243 1043  1.224810661226705e-08
-1283 1043  0.03805173458733085
-1505 1043  0.3915613265931385
-1749 1043  0.09282550509358571
-1994 1043  0.6677275648607324
-1995 1043 -0.3914197776820793
-2239 1043 -0.09027313903964421
-3709 1043 -0.005509351388796824
-3995 1043 -11.99756440152056
-4001 1043  26.94614514240989
-4002 1043 -13.17400491990235
-1044 1044  0.0120946820032662
-1045 1044  0.820611639782226
-1181 1044 -0.1158402426905038
-1244 1044 -3.695717287754974e-09
-1245 1044  6.84975660636411e-08
-1275 1044  0.1686051519968662
-1422 1044  0.5728450058803592
-1507 1044 -0.4669117262552038
-1667 1044 -0.08080789765245955
-1752 1044  0.05787300059726924
-1912 1044 -0.5777458579020762
-1996 1044 -0.5250188060331279
-1997 1044  0.4635324724996192
-2157 1044  0.07247811272512387
-2241 1044  0.02838947709364594
-2242 1044 -0.06374868700680204
-3374 1044 -5.757720128565946
-3404 1044 -10.74205319605701
-4003 1044 -2.262013003597125
-4005 1044  18.01491297364265
-1044 1045 -0.03019372128547943
-1045 1045 -2.048612698416842
-1244 1045 -4.7148643572692e-08
-1245 1045 -1.710001454879873e-07
-1996 1045  1.310681131981685
-2241 1045 -0.0708727983567128
-4003 1045  5.646993614976262
-4005 1045 -2.262013003597125
-1046 1046  0.08652728305245758
-1105 1046 -0.01117562264376104
-1239 1046 -0.7572610645998691
-1246 1046  3.557195038528249e-08
-1312 1046  0.5273830925356003
-1501 1046  0.6723063436140674
-1557 1046 -0.07452450160925486
-1746 1046 -0.09487256468584683
-1802 1046 -0.5884765052096235
-2236 1046  0.1135294637182152
-3215 1046 -4.662192987407709
-3230 1046  25.15477332707293
-3984 1046  0.002037455724496269
-3988 1046 -7.161275667591624
-912 1047 -0.9528532431520396
-985 1047 -0.1919032731754043
-1047 1047  1.453850177649487
-1049 1047  0.8927534376223362
-1050 1047  1.086728699880724
-1051 1047  0.6671056579552171
-1052 1047  1.05305624994934
-1053 1047  1.037339225236382
-1112 1047  0.1398747315022498
-1185 1047 -0.2047571376587071
-1247 1047 -1.997244994389469e-08
-1248 1047  0.340419502366803
-1249 1047 -0.01054317113570646
-1250 1047  5.987070406110462e-07
-1251 1047  8.733743955269269e-07
-1252 1047  1.117970844788374e-07
-1253 1047  1.182548258066296e-07
-1325 1047  1.094320306063321
-1430 1047 -1.20025775658178
-1508 1047  2.877901742353491
-1570 1047 -0.1180732624206324
-1675 1047  0.1525851828618167
-1753 1047 -0.06336630111296625
-1755 1047  0.003251598109236279
-1756 1047 -0.008293849473066403
-1757 1047 -0.01413376895939106
-1758 1047 -0.01520700997590703
-1998 1047 -2.638744501957641
-1999 1047 -0.7104217187916017
-2000 1047 -0.8503392225516677
-2001 1047 -0.5558849024538212
-2002 1047 -0.8748469326863306
-2003 1047 -0.8610429361225128
-2243 1047  0.02360881028802457
-2244 1047  0.02726814578945946
-2245 1047  0.0163341792817986
-2246 1047  0.01844642734702238
-2247 1047  0.02460560601569959
-2248 1047  0.0250006050735233
-3280 1047 -39.60174281254437
-3282 1047 -0.03239710441769657
-3700 1047  1000.441707186239
-3702 1047 -169.6380530198015
-4015 1047 -22.19037263188586
-4017 1047  2.580894044174054e-07
-4019 1047 -46.2485822087331
-4023 1047 -42.86814773331403
-4027 1047 -77.31827340468764
-4031 1047 -72.65693273182117
-4035 1047 -15.57940504488523
-1048 1048  0.3107249264599946
-1186 1048  0.08253558234079712
-1247 1048 -0.3912441653266929
-1248 1048  3.182346067220898e-09
-1283 1048 -0.02865248553472413
-1292 1048  0.008367416410551054
-1431 1048  0.4304029276676956
-1508 1048 -0.6882177177695019
-1515 1048  0.6251677208402014
-1676 1048 -0.06312918864769614
-1753 1048  0.01515333567288418
-1759 1048 -0.04425978383985179
-1760 1048 -0.08455623381433652
-1921 1048 -0.4306648046310764
-1998 1048  0.6310259631828135
-2004 1048 -0.2786413244623387
-2005 1048 -0.5974098867295464
-2166 1048  0.06048211658000992
-2243 1048 -0.005645780499229315
-2249 1048  0.03641964444494807
-2250 1048  0.07599027364771335
-3700 1048 -15.57940504488523
-3707 1048 -14.33758586199391
-4002 1048 -8.299861792616442
-4035 1048  48.49824758137784
-4137 1048 -19.07696695293404
-1049 1049 -5.349524285735199
-1247 1049 -1.614056271553865e-06
-1249 1049  0.06317640197425206
-1999 1049  4.256962871978451
-2244 1049 -0.1633951794307062
-3700 1049 -22.19037263188586
-4015 1049  132.9683527596321
-4017 1049 -1.546513956562912e-06
-1050 1050 -6.261171463865746
-1247 1050 -1.207106466394325e-06
-1250 1050 -3.449440251657165e-06
-1755 1050 -0.01873403481084517
-2000 1050  4.899216957673652
-2245 1050 -0.09410913433692274
-3700 1050 -46.2485822087331
-4019 1050  266.4605453149341
-1051 1051 -2.766844775492583
-1247 1051 -8.74091834635049e-07
-1251 1051 -3.622352877430401e-06
-1756 1051  0.03439903980669907
-2001 1051  2.305552680881037
-2246 1051 -0.07650722269100975
-3700 1051 -42.86814773331403
-4023 1051  177.7971977309233
-1052 1052 -5.381641954276575
-1247 1052 -8.476012625102669e-07
-1252 1052 -5.713398701923111e-07
-1757 1052  0.07223059927479664
-2002 1052  4.470903578741806
-2247 1052 -0.1257469025523179
-3700 1052 -77.31827340468764
-4027 1052  395.1349021923479
-1053 1053 -4.687819398588907
-1247 1053 -7.2243239568337e-07
-1253 1053 -5.344005542529118e-07
-1758 1053  0.0687217012769834
-2003 1053  3.891122287459317
-2248 1053 -0.1129797452838212
-3700 1053 -72.65693273182117
-4031 1053  328.3425339190333
-1054 1054 -0.04929123482331746
-1221 1054 -0.2261997465922666
-1231 1054  0.06060825833018181
-1254 1054  7.969850475105303e-09
-1488 1054 -0.360035618481635
-1496 1054 -0.4810676928389651
-1516 1054  1.092095516810633
-1733 1054  0.0508240492328381
-1741 1054  0.05635245017868282
-1761 1054 -0.01473140339016458
-1978 1054  0.3227566567579966
-1986 1054  0.4286078600129237
-2006 1054 -1.011277354339053
-2223 1054 -0.04577645287156246
-2251 1054  0.004662490664314871
-3644 1054 -2.286121675510262
-3646 1054  0.03272045797426546
-3929 1054 -99.87961390971721
-3961 1054 -47.17922372343798
-3966 1054  72.0796547970898
-1055 1055 -0.4738147012535258
-1173 1055  0.7232007277457907
-1254 1055 -0.1671083834788179
-1255 1055  0.05822173646792159
-1416 1055  0.3852188693949516
-1516 1055 -3.184217886071492
-1661 1055 -0.05423004030019284
-1761 1055  0.04295228525329622
-2006 1055  2.948576740677008
-2251 1055 -0.01359440263092582
-3632 1055 -88.2008696096471
-3644 1055  45.44517147063204
-3646 1055  2.11014470497678e-08
-3966 1055 -2.286121675510262
-1056 1056  0.3580752890249853
-1109 1056 -0.1501368826175595
-1120 1056  0.002369883025423705
-1125 1056  0.037823480857684
-1164 1056  0.4594619270121967
-1256 1056 -1.882256489560419e-09
-1322 1056 -0.6676477309983397
-1336 1056 -0.6868141551464079
-1342 1056  0.3769085362086894
-1567 1056  0.0953847309503813
-1581 1056  0.09529957797094311
-1587 1056 -0.05640185893113407
-1654 1056 -0.05661263921413731
-1812 1056  0.6513753777654052
-1826 1056  0.6860999709089316
-1832 1056 -0.3784583248613025
-1899 1056 -0.3991541488639627
-2057 1056 -0.08637788346499958
-2071 1056 -0.09578629824956811
-2077 1056  0.05659745592016639
-2144 1056  0.06206550983734454
-3251 1056 -7.969315195327064
-3321 1056 -5.122575816198524
-3339 1056 -6.006365689718882
-3597 1056 -81.80499628897144
-3615 1056  74.09591338197826
-955 1057  0.6196319089697585
-960 1057  0.5172246467921727
-1057 1057 -0.4844075229863823
-1137 1057  0.03229296680959662
-1155 1057 -0.07979007268887017
-1257 1057  1.930360118973518e-08
-1359 1057 -0.4783279591317424
-1604 1057  0.06778247851274402
-1642 1057  0.08534035623263046
-1849 1057  0.4811627526763632
-1879 1057 -0.5748703939518289
-2094 1057 -0.06941991168041609
-2124 1057  0.07684746029948268
-2132 1057 -0.09171746472834619
-3407 1057 -5.244381887558792
-3535 1057 -15.07976193972747
-3537 1057  0.002925901806241575
-3567 1057  32.10677134460157
-3568 1057 -13.87451822624953
-3574 1057 -0.008922953284883881
-1058 1058  0.362575344758596
-1059 1058 -0.3636201727245672
-1160 1058 -0.1356037741743049
-1258 1058  6.873820179720269e-08
-1259 1058 -3.722531947414343e-07
-1279 1058 -0.1647810909296956
-1398 1058 -0.8683349920286506
-1518 1058 -0.5655683563282903
-1643 1058  0.1196980307182942
-1762 1058 -0.01098342848397128
-1763 1058  0.08118838221113756
-1888 1058  0.9349938344358532
-2008 1058  0.5370271536576061
-2252 1058  0.00945573560770001
-2253 1058 -0.07286991856067998
-3436 1058 -74.90820044607649
-3568 1058 -31.06353440087113
-3574 1058  0.01834072349012509
-4047 1058  131.8444500211344
-4048 1058 -26.45490214119199
-1058 1059 -2.564559113532273
-1059 1059  2.571949365299086
-1258 1059  5.031070656746905e-07
-1259 1059  2.632996500162577e-06
-1762 1059  0.07768771932121485
-2252 1059 -0.06688207921038947
-4047 1059 -26.45490214119199
-4048 1059  187.1201776987702
-1042 1060  0.3616486310085054
-1060 1060  0.0494700096023555
-1215 1060  0.2282308445688311
-1242 1060 -0.3938694762289616
-1260 1060  1.649276359927399e-08
-1476 1060  0.3144339221536311
-1503 1060 -0.3603730844290743
-1721 1060 -0.04835683208530907
-1966 1060 -0.3548483544779605
-2211 1060  0.0478494465417923
-2238 1060 -0.04622842111919301
-3880 1060 -5.675171991154909
-3992 1060 -11.52161349893302
-3993 1060  15.07645902237852
-939 1061 -0.5851621559330497
-1009 1061  0.6452289446177003
-1061 1061 -0.02506043554827784
-1062 1061  0.5513927020288057
-1063 1061  0.274604206069199
-1064 1061  0.3315414677749981
-1065 1061  0.4722832634265456
-1066 1061  0.3748533853638924
-1067 1061  0.3334711887748352
-1068 1061  0.428348930387738
-1077 1061  0.07100846120402798
-1139 1061 -0.2141845297361449
-1209 1061  0.05851589523119582
-1261 1061  4.807374498483341e-10
-1262 1061 -0.07133191883687945
-1263 1061 -1.407063632274941e-09
-1264 1061 -1.250616929460158e-09
-1265 1061 -0.04435831718922839
-1266 1061 -0.02428214981556918
-1267 1061 -0.02430205065482175
-1268 1061 -0.03299211467792259
-1277 1061  0.2765729375303243
-1361 1061  0.7029356993730441
-1466 1061 -0.504600061009487
-1526 1061 -0.4788760357050242
-1606 1061 -0.1074364103798295
-1711 1061  0.08278506503087066
-1765 1061 -0.01159007117619253
-2009 1061 -0.4572667382619597
-2010 1061 -0.234511292387475
-2011 1061 -0.2782406077266243
-2012 1061 -0.3888500715901649
-2013 1061 -0.3164800245438473
-2014 1061 -0.2799242389758857
-2015 1061 -0.35912875888719
-2016 1061  0.5819307001253576
-2096 1061  0.09331292568385514
-2201 1061 -0.1024920224347914
-2254 1061  0.04954073695663612
-2255 1061  0.008422272802529579
-2256 1061  0.01732501370911077
-2257 1061  0.03090230780246723
-2258 1061  0.01674951147636541
-2259 1061  0.01679417334838154
-2260 1061  0.02279291647912941
-2261 1061 -0.07203379071050602
-3424 1061 -24.9290464405827
-3844 1061 -23.5793179741662
-4055 1061 -11.61594215474339
-4057 1061 -2.092441497847375e-11
-4059 1061 -9.65776140469529
-4063 1061 -4.512938119937939
-4065 1061  183.7019110854502
-4067 1061 -7.133207631641115
-4069 1061 -3.967674799820742e-11
-4071 1061 -4.127537850880238
-4073 1061 -9.292320871101545e-10
-4075 1061 -4.342035056604149
-4077 1061 -1.273698507775478e-09
-4079 1061 -5.255281007971666
-4081 1061 -3.714351916317415e-10
-4085 1061 -18.07649086887797
-1062 1062 -0.8631938020705067
-1261 1062  1.003843119740111e-10
-1262 1062  0.1116686347193875
-2009 1062  0.7158415642942259
-2254 1062 -0.07755499290003219
-4055 1062  18.18451598011107
-4057 1062  3.275554827375515e-11
-4065 1062 -11.61594215474339
-1063 1063 -2.704903553672343
-1261 1063  2.761413792073952e-09
-1263 1063  1.386103698486352e-08
-1765 1063  0.1141644010503557
-2010 1063  2.309980743686521
-2255 1063 -0.0829609857753576
-4059 1063  95.13079222366765
-4065 1063 -9.65776140469529
-1061 1064  0.08489250803032906
-1064 1064 -1.123100460933939
-1261 1064  1.715662265033835e-09
-1264 1064  4.236453410300101e-09
-2011 1064  0.9425431964377532
-2256 1064 -0.05868867931656194
-4063 1064  15.28762986286976
-4065 1064 -4.512938119937939
-1065 1065 -1.058757894121426
-1261 1065  1.207292321669939e-10
-1265 1065  0.0994418437640489
-2012 1065  0.8717185528421763
-2257 1065 -0.06927635354904116
-4065 1065 -7.133207631641115
-4067 1065  15.99112376065647
-4069 1065  8.890274627582073e-11
-1066 1066 -1.72573819152988
-1261 1066  3.51464403414159e-09
-1266 1066  0.1117893953885428
-2013 1066  1.457000754258734
-2258 1066 -0.07711087260468982
-4065 1066 -4.127537850880238
-4071 1066  19.0022305732375
-4073 1066  4.277776494143737e-09
-1067 1067 -1.16718388103411
-1261 1067  1.771025409658122e-09
-1267 1067  0.08505970757052372
-2014 1067  0.9797639815414531
-2259 1067 -0.05878135529351257
-4065 1067 -4.342035056604149
-4075 1067  15.19757594372949
-4077 1067  4.457897107013054e-09
-1068 1068 -1.628837266086651
-1261 1068  1.926837966959027e-09
-1268 1068  0.1254556322243195
-2015 1068  1.365621025992523
-2260 1068 -0.08667221774173783
-4065 1068 -5.255281007971666
-4079 1068  19.98370377461157
-4081 1068  1.412364641906194e-09
-1069 1069  0.4131220203587079
-1088 1069  0.05643659927341725
-1269 1069 -5.449000251012137e-09
-1288 1069  0.1214000777347138
-1293 1069 -0.1119597125414634
-1527 1069 -0.4136141792640044
-1528 1069  0.3288070523355396
-1773 1069 -0.0437463201053321
-2018 1069 -0.3857623389865421
-2262 1069 -0.05836392563537035
-2263 1069  0.05446492613488767
-3979 1069 -16.36333172631541
-4088 1069  20.24465568779704
-4089 1069 -10.48556641561825
-1070 1070 -0.8634917741294119
-1270 1070  0.1209694588614057
-1278 1070  0.04152432414694895
-1297 1070 -0.03625623505669459
-1529 1070  0.7713662868604313
-1530 1070  0.807466687308941
-1774 1070 -0.1070003826683109
-1775 1070 -0.10710356231765
-2020 1070 -0.8588799767622773
-2264 1070  0.1197679507899034
-4096 1070  31.68167767158849
-4102 1070 -9.370967335442515e-10
-4125 1070 -11.61846388882405
-4126 1070 -8.796687645874714
-1071 1071 -0.6085690165184504
-1072 1071  0.05628299145492302
-1271 1071 -2.899300410863503e-09
-1272 1071  0.06443650349717406
-1273 1071 -0.08930907686990948
-1531 1071 -0.4147446507941761
-1777 1071  0.08603670609830409
-2021 1071  0.3940753934243332
-2022 1071  0.6054677581871165
-2266 1071 -0.04656009377371514
-3848 1071 -12.14811724181201
-4105 1071 -7.513903054786311
-4107 1071  20.11093433821907
-4110 1071 -0.01468942424715695
-1072 1072 -0.05876042249592989
-1198 1072 -0.1461852922562984
-1271 1072 -0.0430300784938554
-1272 1072 -1.591542694856507e-08
-1286 1072  0.2078468422749022
-1448 1072  0.4751085479629472
-1531 1072  0.433000632670903
-1533 1072 -0.4656525838783859
-1693 1072 -0.0626029018629746
-1778 1072  0.06308701112702084
-1938 1072 -0.5169167417147753
-2021 1072 -0.411421568297578
-2023 1072  0.4465883401929728
-2183 1072  0.05942268960851749
-2266 1072  0.048609548122274
-2268 1072 -0.0435235817211751
-3732 1072 -5.263261695423801
-3782 1072 -7.492541902601887
-4105 1072  23.60392256309562
-4107 1072 -7.513903054786311
-1071 1073  0.5724845587882514
-1073 1073 -0.608164323859633
-1209 1073 -0.03549214591889813
-1271 1073  0.03877201488615256
-1273 1073  0.08401358938736914
-1467 1073  0.6058903557215245
-1712 1073 -0.08865078486777231
-1777 1073 -0.08093525038797107
-2022 1073 -0.5695671862975206
-2202 1073  0.08735047797722066
-3844 1073 -8.20501197638737
-3848 1073  22.07544969633836
-4107 1073 -12.14811724181201
-4110 1073 -2.24825810746232e-10
-986 1074 -0.6107864907888784
-1074 1074  0.5285974251080995
-1186 1074 -0.009545633175415319
-1243 1074  0.00970013728400089
-1274 1074  0.09010990126768369
-1749 1074 -0.07362898186763496
-1922 1074  0.5465638824173062
-1994 1074 -0.5296400026704317
-2167 1074 -0.07776569511906935
-2239 1074  0.07160445085414079
-3707 1074 -21.79919967537484
-3709 1074  3.551176783189725e-09
-3995 1074  33.72125780485909
-4001 1074 -11.99756440152056
-1075 1075 -0.09342120143663683
-1129 1075  0.06750505025100521
-1156 1075 -0.1018754816584715
-1244 1075 -0.1463500416425706
-1275 1075 -1.330449639347364e-10
-1348 1075 -0.7320220831026034
-1391 1075 -0.3540923641672868
-1507 1075  0.4930004778176136
-1593 1075  0.09337077568253691
-1636 1075  0.05033110776786022
-1752 1075 -0.06110666180098917
-1838 1075  0.7006361055927417
-1881 1075  0.3539682057779985
-1997 1075 -0.4894324078324542
-2126 1075 -0.04941476276368469
-2242 1075  0.06731065292933117
-3363 1075 -2.942416412227689
-3374 1075  19.0263421536609
-3539 1075 -4.711059498030628
-4005 1075 -5.757720128565946
-1076 1076  0.06205063220109317
-1179 1076  0.1503132880271155
-1219 1076 -0.04422968677103412
-1276 1076 -2.449253198866241e-08
-1277 1076 -0.2432983530399724
-1420 1076  0.3962293587228495
-1481 1076  0.4347352342689453
-1534 1076  0.9163581866371621
-1665 1076 -0.05369438553050169
-1726 1076 -0.03991575131466345
-1779 1076 -0.02453437618813773
-1910 1076 -0.3977027851991092
-1971 1076 -0.4596523479291749
-2024 1076 -0.8984979081539249
-2155 1076  0.05269364988891542
-2269 1076 -0.01320838662255178
-3516 1076 -10.4263159895623
-3906 1076  19.29387509016767
-3909 1076 -6.462935419745066
-4085 1076 -1.287538838117173
-1077 1077 -0.03108549679221456
-1198 1077 -0.02201288531576436
-1261 1077 -0.154517529197304
-1276 1077  0.132345738765169
-1277 1077 -1.015200618015299e-08
-1449 1077  0.345354821535383
-1526 1077  0.2096383898392737
-1534 1077 -0.7265339828613597
-1694 1077 -0.04659273070930623
-1779 1077  0.01945206395154361
-1939 1077 -0.3740788277596528
-2016 1077 -0.254752808402104
-2024 1077  0.7123734728657446
-2184 1077  0.04620716091210701
-2261 1077  0.03153435706244356
-2269 1077  0.01047226060725421
-3782 1077 -7.355302891262144
-3906 1077 -1.287538838117173
-4065 1077 -18.07649086887797
-4085 1077  15.15581672854775
-1070 1078  0.5503286211148283
-1078 1078  0.05326867823029235
-1097 1078 -0.05561345056860315
-1139 1078  0.250244042122215
-1278 1078 -1.724586362605862e-09
-1297 1078 -0.08433753082497672
-1362 1078  0.553519904683873
-1529 1078 -0.4916143473982249
-1535 1078 -0.4124268531628535
-1536 1078 -0.4682412747380797
-1607 1078 -0.0719937238121951
-1774 1078  0.06819448061561421
-1780 1078  0.05899348825775656
-1852 1078 -0.5276837209948572
-2025 1078  0.4598141657371973
-2026 1078  0.4773702028337876
-2097 1078  0.06610708326760587
-2264 1078 -0.07633162606373356
-2270 1078 -0.0683561503822409
-3424 1078 -6.556163260226376
-3740 1078 -8.29609625260256
-3742 1078 -0.04556750232906767
-4096 1078 -11.61846388882405
-4102 1078 -0.006868975754008619
-4125 1078  36.37733995725011
-4126 1078 -11.04287310642648
-940 1079 -0.07560356709805922
-1079 1079  0.5464386534076658
-1080 1079  0.4622771875668804
-1081 1079  0.8099385613394385
-1092 1079  0.1208149423257971
-1140 1079 -0.1493139770588935
-1258 1079  0.2566034054509343
-1279 1079  2.904484364130155e-08
-1280 1079  2.892989577618632e-07
-1281 1079 -0.01559842054359869
-1292 1079 -0.01760425743209124
-1364 1079 -0.477692739132276
-1518 1079  0.5099376587097865
-1539 1079 -0.8293599276847743
-1609 1079  0.06242579865530338
-1763 1079 -0.0732024928126479
-1782 1079  0.004661810659483987
-2008 1079 -0.4842038390860414
-2027 1079 -0.3713550248720283
-2028 1079 -0.6777265383675919
-2029 1079  0.8908658230580816
-2253 1079  0.06570225375133355
-2272 1079  0.01681848484248474
-2273 1079  0.03070862867577365
-2274 1079 -0.1233504357896458
-3436 1079  182.7426263207641
-3438 1079 -24.66315021333685
-4047 1079 -74.90820044607649
-4127 1079 -17.55834826047913
-4131 1079 -24.84175659400302
-4133 1079  5.602588400155373e-09
-4137 1079 -29.79209744771001
-1080 1080 -4.657034860973241
-1279 1080 -3.091812144639938e-07
-1280 1080 -2.914429613198877e-06
-1782 1080 -0.04696362991810545
-2027 1080  3.741074279977133
-2272 1080 -0.1694313981454469
-3436 1080 -17.55834826047913
-4127 1080  176.8848810293818
-1081 1081 -4.051106699452466
-1279 1081 -2.020746911468585e-08
-1281 1081  0.07801933255350572
-2028 1081  3.389815784838334
-2273 1081 -0.1535967507877281
-3436 1081 -24.84175659400302
-4131 1081  124.2521515692489
-4133 1081 -2.80225902482556e-08
-998 1082  0.1454178910812212
-1082 1082 -0.5572302679600371
-1161 1082 -0.4707524526938228
-1198 1082  0.1639576572033569
-1282 1082  0.5556174516419152
-1450 1082  0.903604725385467
-1645 1082 -0.1077840611317312
-1695 1082 -0.1192040010499577
-1940 1082 -1.025075116496526
-2135 1082  0.1075931432984304
-3577 1082 -14.41765509210598
-3579 1082  3.424985434152816e-10
-3580 1082  32.78546401598518
-3782 1082 -9.332991521575286
-1043 1083  0.05767180706691709
-1048 1083 -0.459904502442264
-1083 1083 -0.05932140125143925
-1243 1083 -0.01362937325158806
-1248 1083  0.01227067299807493
-1283 1083  3.583140154028719e-08
-1505 1083 -0.4363940642551595
-1759 1083  0.06550898280666229
-1995 1083  0.436236308367658
-2004 1083  0.412417507493414
-2249 1083 -0.0539047788936704
-4001 1083 -13.17400491990235
-4002 1083  26.96703358297778
-4035 1083 -8.299861792616442
-1084 1084 -0.07463262812556315
-1181 1084  0.216604980095333
-1232 1084 -0.2753545287150571
-1284 1084  5.059544422314488e-09
-1423 1084  0.5627128757731583
-1464 1084 -0.5533622237789459
-1498 1084  0.3788236665372302
-1668 1084 -0.07429162698182507
-1709 1084  0.06652747753817977
-1743 1084 -0.06264771144796216
-1913 1084 -0.556821153672813
-1954 1084  0.5941679188478999
-1988 1084 -0.4134348697073162
-2158 1084  0.08167611720054455
-2233 1084  0.0645250439743737
-3304 1084 -11.96605332802606
-3306 1084 -0.01534246547224594
-3404 1084 -8.173102293481225
-3562 1084 -8.267859388385654
-3838 1084  26.48660956080935
-1019 1085  0.07154759728281507
-1085 1085 -0.003300648816836982
-1087 1085  1.152193173408145
-1143 1085  0.5739644134156838
-1219 1085  0.1034698694358651
-1285 1085 -3.796000963562918e-08
-1286 1085 -0.5565646529232413
-1287 1085  1.047611239979318e-07
-1371 1085  0.5218813327191538
-1482 1085 -0.6299407955834422
-1540 1085  0.7709461549449258
-1616 1085 -0.0741452235963624
-1785 1085 -0.04052335874533514
-1861 1085 -0.5203549498394896
-1972 1085  0.6310205330126762
-2030 1085 -0.7763504480927439
-2031 1085 -0.9933793593885633
-2106 1085  0.05439035871006938
-2217 1085 -0.0809958409515938
-2275 1085 -0.01693145434596672
-2276 1085  0.02739797688598089
-3459 1085 -11.05951146044936
-3732 1085 -2.518767549983291
-3909 1085 -49.72163740391174
-4143 1085 -97.81167626070551
-4145 1085  86.55876775412223
-1086 1086  0.3634993303815826
-1187 1086 -0.1321988592372929
-1194 1086 -0.644560541293882
-1272 1086 -0.4068214560171308
-1285 1086  0.7958833775240365
-1286 1086 -1.977140486920936e-08
-1438 1086 -0.379859058350982
-1443 1086  0.4552127889008838
-1533 1086  0.4075275464698439
-1540 1086 -0.8918227216379703
-1683 1086  0.04595895354819852
-1688 1086 -0.0671242009178893
-1778 1086 -0.05521218124589022
-1785 1086  0.04687701190851433
-1933 1086 -0.4363998945032376
-2023 1086 -0.3908429951038652
-2030 1086  0.8980743533410442
-2173 1086 -0.05892039757983346
-2178 1086  0.06139143428489695
-2268 1086  0.03809075496731831
-2275 1086  0.01958613529525647
-3721 1086 -23.02209569566427
-3732 1086  27.36856052624786
-3750 1086 -8.292682942672654
-4105 1086 -5.263261695423801
-4145 1086 -2.518767549983291
-1085 1087  0.02199763125930786
-1087 1087 -7.678951010732729
-1285 1087 -3.395976211351126e-07
-1287 1087 -6.981968381225556e-07
-2031 1087  6.620514347654194
-2276 1087 -0.1825976122374652
-4143 1087  651.8794668163698
-4145 1087 -97.81167626070551
-972 1088  0.07585447780115982
-1069 1088 -0.3886253205090088
-1088 1088 -0.0530901051026678
-1172 1088 -0.07474159031252897
-1269 1088 -0.1955389194213142
-1288 1088 -2.311546232203199e-10
-1410 1088  0.6011938633446567
-1527 1088  0.3890882961019005
-1900 1088 -0.6007023401756713
-2262 1088  0.05490314771048892
-3621 1088  0.03701780828857204
-3622 1088 -10.63078006025426
-4088 1088 -10.48556641561825
-4089 1088  20.79952414874093
-1089 1089 -0.702905468036533
-1191 1089 -0.4918304040039776
-1278 1089  0.2708866518362061
-1289 1089  0.1101427118805583
-1440 1089  0.62780002806294
-1535 1089  0.7972203259666385
-1685 1089 -0.09477205745752662
-1780 1089 -0.1140343010598945
-2025 1089 -0.8888199114143011
-2270 1089  0.1321323092122628
-3737 1089 -27.15561296572336
-3740 1089  38.23978412799049
-3742 1089 -1.285860098954039e-10
-4125 1089 -8.29609625260256
-1090 1090 -0.04130392701536499
-1129 1090 -0.04174380521665495
-1205 1090  0.1321958859920611
-1211 1090 -0.008999365508307933
-1290 1090  1.863167620186346e-09
-1349 1090 -0.5115751971165835
-1462 1090 -0.4083683463701124
-1472 1090  0.2940184468249131
-1594 1090  0.06400112844259723
-1707 1090  0.05899578318349421
-1839 1090  0.4875763642175987
-1952 1090  0.4091049471507078
-1962 1090 -0.3067126001924134
-2084 1090 -0.05976136187915728
-2197 1090 -0.05859965444633063
-2207 1090  0.04614176586870518
-3363 1090 -1.959100094899907
-3677 1090 -6.618357553588883
-3826 1090 -6.636577325017829
-3869 1090  16.23932328775514
-1039 1091 -0.5207505831071904
-1091 1091  0.05562057947383459
-1099 1091 -0.00182375950404709
-1107 1091 -0.02996950125612036
-1222 1091  0.1579025131440057
-1291 1091  9.598412509248533e-09
-1302 1091 -0.378532286868665
-1317 1091  0.4323552440504631
-1428 1091  0.4402806087748638
-1465 1091  0.5759940121176559
-1489 1091  0.3034672176079867
-1502 1091  0.5483159810343683
-1547 1091  0.05617338423090624
-1562 1091 -0.05745261880975507
-1673 1091 -0.06554270780152668
-1710 1091 -0.08806669474583405
-1734 1091 -0.03927673116269679
-1747 1091 -0.08202283215104343
-1792 1091  0.3999698727527718
-1807 1091 -0.4226868393937711
-1918 1091 -0.4181873107360542
-1955 1091 -0.4896888030185261
-1979 1091 -0.2958963642102718
-2037 1091 -0.0575621944201822
-2163 1091  0.06074566431115451
-2200 1091  0.0737280609488582
-2224 1091  0.04237843896777917
-2237 1091  0.07658965210480174
-3180 1091 -7.681384228215313
-3231 1091 -6.778441910054136
-3250 1091  47.22653650499972
-3687 1091 -5.509137579435532
-3689 1091 -0.01283241605396136
-3783 1091 -9.148268563932652
-3785 1091 -0.004170643736380167
-3912 1091 -7.179419233379014
-3984 1091 -0.004453089740284022
-3988 1091 -13.50573421656583
-1092 1092 -0.04888485641695448
-1186 1092 -0.0009017246897470577
-1248 1092 -0.01125287282250595
-1279 1092  0.02154089767802106
-1292 1092  2.545271146625616e-09
-1433 1092  0.4137426206676649
-1515 1092 -0.4958500939596942
-1539 1092  0.3355805184553645
-1678 1092 -0.05546586352942447
-1760 1092  0.06706554910635512
-1923 1092 -0.432808164560258
-2005 1092  0.4738340425976885
-2029 1092 -0.3604673975635156
-2168 1092  0.05907190615621608
-2250 1092 -0.06027148087172729
-2274 1092  0.04991078277622987
-3436 1092 -29.79209744771001
-3707 1092 -13.33511322562891
-4035 1092 -19.07696695293404
-4137 1092  38.33403487905078
-980 1093  1.112658784433996
-1036 1093 -0.09910914931789394
-1093 1093  0.6697749461861511
-1094 1093  0.9009354882907198
-1095 1093  0.58462927218427
-1180 1093 -0.1503380342921753
-1236 1093  0.3284627932841515
-1269 1093  0.3738519183151615
-1293 1093 -1.493529512737457e-08
-1294 1093  3.664150378945852e-09
-1295 1093  5.14547137606125e-08
-1528 1093 -0.5913713271015886
-1773 1093  0.07867933243141366
-1787 1093 -0.07642521674859
-1788 1093 -0.01466585550328597
-1911 1093 -0.9424729144772576
-1990 1093 -0.5697578684182797
-2018 1093  0.6938074616461828
-2032 1093 -0.8656710639493923
-2033 1093 -0.5248141364580845
-2156 1093  0.1402957488124907
-2235 1093  0.07974589801570604
-2263 1093 -0.0979571314547415
-2277 1093  0.05907688964639677
-2278 1093  0.0118751178970765
-3663 1093 -50.78984921359537
-3665 1093 -0.1724709079791441
-3979 1093  182.6407715266371
-3981 1093 -30.91732538584464
-4088 1093 -16.36333172631541
-4147 1093 -65.45466744535615
-4151 1093 -16.9631615475301
-1094 1094 -4.332893970446715
-1293 1094 -6.98925406994455e-08
-1294 1094 -1.762221939205499e-08
-1787 1094  0.3675539094017725
-2032 1094  4.163295798784388
-2277 1094 -0.2841201198847871
-3979 1094 -65.45466744535615
-4147 1094  314.792946352708
-1095 1095 -7.173917743684755
-1293 1095 -1.82950870328269e-07
-1295 1095 -6.313957408021764e-07
-1788 1095  0.1799630056638323
-2033 1095  6.439933176124918
-2278 1095 -0.1457181893610931
-3979 1095 -16.9631615475301
-4151 1095  208.15300181692
-1016 1096  0.06052671066335877
-1096 1096 -0.4237633937620111
-1216 1096  0.09577340452076888
-1296 1096  9.930907562960556e-08
-1463 1096  0.3753050069630106
-1479 1096  0.3999378180984413
-1708 1096 -0.05921436717618114
-1724 1096 -0.05601679710717079
-1953 1096 -0.375556675786625
-2198 1096  0.05996516487372638
-3639 1096 -8.964500315464161
-3641 1096 -0.04271695783234537
-3896 1096  25.62671609728838
-3898 1096 -12.35396198776869
-1078 1097 -0.05465730817416642
-1097 1097  0.05706320500793532
-1123 1097 -0.05446009703752624
-1270 1097 -0.07910718884290392
-1278 1097  0.09910991576168865
-1297 1097 -5.736181502014048e-09
-1339 1097 -0.6882189757870322
-1530 1097 -0.5280375750914569
-1536 1097  0.4804475820214714
-1584 1097  0.09638196496373758
-1775 1097  0.07003967621048186
-1829 1097  0.6870878872871973
-2020 1097  0.5616589604898961
-2026 1097 -0.4898144867918466
-2074 1097 -0.08962691042088074
-3333 1097 -5.94400377216852
-4096 1097 -8.796687645874714
-4102 1097  0.007048039668787517
-4125 1097 -11.04287310642648
-4126 1097  31.03086265125012
-942 1098  0.5580597443798457
-995 1098  0.07102142348298011
-1098 1098  0.4897853072672988
-1142 1098 -0.07611021403896462
-1195 1098 -0.06975006572860991
-1298 1098  7.298680398948321e-08
-1857 1098 -0.5578130610481082
-1934 1098 -0.4890001558455759
-2102 1098  0.07783373831464936
-3447 1098 -13.85984777865994
-3449 1098  0.01120427847728563
-3755 1098  27.82833874115732
-3757 1098 -0.0263789648850663
-3758 1098 -14.30197427870374
-899 1099  0.748415362690662
-900 1099  3.90179844422034
-1099 1099  23840.16291750732
-1100 1099  18.11856811951377
-1222 1099 -2473.241834471261
-1291 1099 -2064.592972124319
-1300 1099 -0.6755533116091292
-1301 1099  0.7019805848906145
-1302 1099  0.9345988945068904
-1545 1099 -4.388112379693994
-1546 1099  4.688513878451054
-1547 1099  5.460780976802373
-1789 1099 -3.117398006170587
-1791 1099 -0.7170782280222106
-1792 1099 -0.9906143812408313
-2034 1099 -14.49628017496882
-2035 1099  4.793052405651258
-2036 1099 -4.577372090822865
-2037 1099 -5.952678106564355
-3175 1099  4.240457540660714e-09
-3177 1099 -2299.853745982527
-3180 1099  7.772276933160072e-09
-3250 1099 -0.00182375950404709
-3549 1099 -324.4426975442814
-3550 1099  0.07145809537069953
-3912 1099 -0.04280741885147973
-900 1100 -0.4467658871986893
-1099 1100 -2299.853745982527
-1100 1100 -2.078221001294088
-1789 1100  0.3569501217106127
-2034 1100  1.662740239879939
-3175 1100  4.86383655573519e-10
-3177 1100  263.8239666641096
-3180 1100 -8.952334706879839e-10
-901 1101 -4.068874943070814
-902 1101  2.396365901362685
-1101 1101  9532.370458227797
-1102 1101  12.9370165976562
-1304 1101  0.8150861897290412
-1305 1101 -0.6823995581967828
-1549 1101  4.805357363027642
-1793 1101 -2.005604434346106
-1794 1101 -0.7626219247567129
-1795 1101  0.7310365971011838
-2038 1101 -10.78395981212982
-2039 1101 -4.635964505191407
-2040 1101  4.453586165850771
-3191 1101  1.757215706277293e-09
-3193 1101 -1793.912401720734
-3201 1101  3.180253560497448e-09
-3388 1101 -0.007274595352445609
-3390 1101 -652.9015261762572
-3535 1101  0.009158641086069189
-3537 1101 -176.0078568125278
-902 1102 -0.507839750924587
-1101 1102 -1793.912401720734
-1102 1102 -2.74278713799485
-1793 1102  0.4250292779567368
-2038 1102  2.286315857699142
-3191 1102  3.725840469481767e-10
-3193 1102  380.3377222484216
-3201 1102 -6.74794720101346e-10
-903 1103  0.08312708337804624
-904 1103  0.4418790781503563
-1103 1103  1.988830230508456
-1104 1103 -1055.381615892587
-1146 1103 -364.3783473802901
-1162 1103 -81.90222277522484
-1308 1103 -0.1224846354050169
-1552 1103  0.5226757781751344
-1553 1103 -0.8243769016067968
-1796 1103 -0.3702357374009728
-1797 1103 -0.09553894960349026
-1798 1103  0.1225743034582106
-2041 1103 -1.662852877071636
-2042 1103 -0.6062893774873906
-2043 1103  0.8830238932831251
-3203 1103 -1.756064599289786e-10
-3205 1103  232.4068773723512
-3207 1103 -3.473398027531438e-10
-3493 1103  0.01772566600763419
-3585 1103 -0.008284771852577529
-904 1104 -2.198584419259259
-1103 1104 -9.91214250431854
-1104 1104  5260.490321110647
-1796 1104  1.842120980065284
-2041 1104  8.287501645404546
-3203 1104 -8.754584834846924e-10
-3205 1104 -1055.381615892587
-3207 1104  1.714809627628711e-09
-905 1105  4.67810007016215
-906 1105  21.96944745588132
-1046 1105  6.86425229593742
-1105 1105  61006.30037132353
-1106 1105 -17000.75242245883
-1199 1105 -3998.718775286863
-1246 1105 -2530.254860117219
-1310 1105 -1.783496057488212
-1311 1105 -1.508511143996298
-1312 1105 -1.075929273318226
-1555 1105 -11.04480381088002
-1556 1105 -9.134818941626229
-1557 1105 -6.350942033760408
-1799 1105 -3.733990098942753
-1800 1105  1.769775574269363
-1801 1105  1.454256781217253
-1802 1105  1.197241948885333
-2044 1105 -17.53117282921072
-2045 1105  11.09041831683827
-2046 1105  8.960689765455296
-3215 1105  9.628000022243732e-09
-3217 1105  5.783544754223158e-09
-3230 1105 -0.01117562264376104
-3687 1105  0.03052418138322882
-3689 1105 -1483.144735189752
-3789 1105 -0.0186874934802756
-905 1106 -1.631607376128249
-906 1106 -7.704273779899018
-1105 1106 -17000.75242245883
-1106 1106  5963.802258365807
-1799 1106  1.302327940379904
-2044 1106  6.147850850098586
-3215 1106 -3.411716797963038e-09
-3217 1106  2.02872774135443e-09
-907 1107  2.142516119134805
-908 1107  13.4599818919095
-1029 1107  4.624502996452096
-1091 1107  5.25881232878616
-1107 1107  14882.42601010204
-1108 1107 -7371.921873768852
-1222 1107 -1364.595019358008
-1229 1107 -4056.761237014
-1291 1107 -1886.24602540587
-1314 1107 -0.5705865186070875
-1315 1107  0.83228047449342
-1316 1107 -1.049370725032446
-1317 1107 -0.8572328018230927
-1559 1107 -3.927743397133805
-1561 1107 -7.445261240507539
-1562 1107 -5.317465612935973
-1803 1107 -1.645419760215324
-1804 1107  0.5798036087807967
-1805 1107 -0.7224565093335877
-1806 1107  1.078904268217582
-1807 1107  0.8384291813617976
-2048 1107 -10.37596843446376
-2049 1107  4.354207390518346
-2050 1107 -4.029548223983352
-2051 1107  7.684265898844557
-3231 1107  6.66961486039952e-09
-3233 1107  4.15408552001395e-09
-3241 1107  0.1267923382078056
-3250 1107 -0.02996950125612036
-3912 1107 -0.1107503777044012
-3984 1107 -389.1035774035147
-3988 1107 -0.006163960734533347
-907 1108 -1.573116856290268
-908 1108 -10.04545533443596
-1107 1108 -7371.921873768852
-1108 1108  5505.760289180535
-1803 1108  1.208147660144721
-2048 1108  7.743774096250811
-3231 1108 -5.105909028912947e-09
-3233 1108  3.102507639685115e-09
-909 1109  1.939877143225896
-910 1109  12.05955198419484
-911 1109  3.125336311573782
-1109 1109  31458.39852646493
-1110 1109 -12122.78394997258
-1111 1109  17.05859243350098
-1113 1109 -373.7142851765856
-1191 1109 -1100.812974624583
-1256 1109 -1108.54432072992
-1320 1109 -0.8274104651385586
-1321 1109 -0.4348953511661833
-1322 1109  0.6948742967493817
-1565 1109 -5.271819109375664
-1566 1109 -2.730241540721851
-1567 1109  4.127509889624091
-1808 1109 -1.510326401442479
-1809 1109 -2.417889075233369
-1810 1109  0.9005554601895391
-1811 1109  0.4484995047183518
-1812 1109 -0.6744651058693761
-2053 1109 -9.357216344456917
-2054 1109 -13.15767006303645
-2055 1109  6.213237251680835
-2056 1109  2.709099303685861
-2057 1109 -4.187820272924031
-3251 1109 -2.143182423197665e-09
-3253 1109 -1.136426674985813e-09
-3255 1109 -1.360478396605913e-09
-3257 1109 -2041.180500096246
-3283 1109  0.1159831428547418
-3615 1109 -0.1501368826175595
-3737 1109 -0.1725061393317366
-909 1110 -1.676593698201225
-910 1110 -10.42375234850035
-1109 1110 -12122.78394997258
-1110 1110  10478.49399500862
-1808 1110  1.305342318696957
-2053 1110  8.087970926761525
-3251 1110  1.838456387126541e-09
-3253 1110 -9.82260162096793e-10
-911 1111 -0.4116790582017967
-1109 1111 -2041.180500096246
-1111 1111 -2.248550885624917
-1809 1111  0.318491909188027
-2054 1111  1.734357075949347
-3251 1111  2.79507181288885e-10
-3255 1111 -1.79330175709147e-10
-3257 1111  269.0653913962274
-912 1112  0.2786502814775124
-1112 1112  1.330625120766269
-1113 1112 -110.3564781742909
-1219 1112 -737.3345443487744
-1247 1112 -3490.231801384597
-1323 1112  0.4647968875266144
-1324 1112 -0.3518552487923934
-1325 1112 -0.3153990132372302
-1568 1112  8.078759484940766
-1569 1112 -2.973502502756232
-1570 1112 -1.846682900076352
-1813 1112 -0.461579540307102
-1814 1112  0.3454461800752524
-2058 1112 -14.71609250303336
-2059 1112  3.831383234547688
-3280 1112 -1.066833821630753e-08
-3282 1112  1009.561156508884
-3283 1112 -0.08371936137436782
-3700 1112 -0.03239710441769657
-3909 1112  0.1059721002756369
-913 1113  1.411694129026913
-914 1113  7.598376411100657
-1109 1113 -373.7142851765856
-1113 1113  4570.0460752516
-1114 1113 -6031.745054983595
-1320 1113  0.4961924573567638
-1323 1113 -0.07766092191744503
-1565 1113  2.593920292764252
-1568 1113 -5.536155174462214
-1810 1113 -0.5405511595669753
-1813 1113  0.07749102351241138
-1816 1113 -1.181461144083559
-2055 1113 -3.058452409713802
-2058 1113  10.08058940955139
-2061 1113 -6.356847459918975
-3251 1113 -0.05042820426988697
-3280 1113  0.1005159371417935
-3282 1113 -110.3564781742909
-3283 1113 -2.733038906299612e-09
-3285 1113 -1.039897890020569e-09
-913 1114 -1.953828666208752
-914 1114 -10.63017727165662
-1113 1114 -6031.745054983595
-1114 1114  8443.178913538493
-1816 1114  1.635191861604235
-2061 1114  8.893256781914742
-3283 1114  3.723713781766946e-09
-3285 1114 -1.45571832366187e-09
-915 1115  1.03698725022598
-916 1115  0.8291504643489213
-917 1115  1.087917687407729
-918 1115  5.206454265535194
-919 1115  1.203896198092496
-1007 1115 -0.3073032073380307
-1115 1115  4993.88507726691
-1116 1115  6.060870599331241
-1117 1115  6.108025865977322
-1118 1115 -4384.67503884735
-1119 1115  7.161691500726718
-1207 1115 -1.924123739016748
-1331 1115  0.3657690249134106
-1332 1115 -0.3247977981682151
-1576 1115  2.364021463492015
-1577 1115 -2.071401609337344
-1817 1115 -0.6273802630392492
-1818 1115 -0.8229776380829138
-1819 1115 -0.7835532341187602
-1820 1115 -0.9058789578950869
-1822 1115  0.3847781687309847
-2062 1115 -4.565490514889956
-2063 1115 -4.599499980584284
-2064 1115 -3.916354844917197
-2065 1115 -5.367583074737629
-2067 1115  2.413229394356441
-3287 1115  8.336279444520756e-10
-3289 1115 -1890.901065005403
-3291 1115  6.706460387029267e-10
-3293 1115 -2560.524032393198
-3295 1115  6.983447865122061e-10
-3297 1115  6.439443422934232e-10
-3299 1115  8.847848009807535e-10
-3301 1115 -757.4364593022556
-3304 1115 -0.007809535337338039
-3306 1115 -138.288761778333
-3680 1115  0.007667951331426184
-3681 1115 -133.9669120329914
-916 1116 -1.764966808023946
-1115 1116 -1890.901065005403
-1116 1116 -12.90519241544406
-1817 1116  1.335469727403376
-2062 1116  9.721133715560624
-3287 1116  1.775087188349289e-09
-3289 1116  4026.276169403554
-3295 1116 -1.486597012156921e-09
-917 1117 -1.92561216315536
-1115 1117 -2560.524032393198
-1117 1117 -10.81324405966066
-1818 1117  1.456668801268975
-2063 1117  8.142649836749401
-3291 1117  1.18738463505963e-09
-3293 1117  4533.028597823411
-3295 1117 -1.235842539415444e-09
-915 1118 -3.079285624558723
-918 1118 -15.46450544545504
-1115 1118 -4384.67503884735
-1118 1118  13023.81664536163
-1819 1118  2.326725060942811
-2064 1118  11.63257903923041
-3295 1118 -2.074367566606838e-09
-3297 1118  1.912650926527704e-09
-919 1119 -0.8504213801855677
-1115 1119 -757.4364593022556
-1119 1119 -5.066758450128532
-1820 1119  0.6399047897114686
-2065 1119  3.797461282443998
-3295 1119 -4.957688704010366e-10
-3299 1119  6.258814577719818e-10
-3301 1119  535.9109027118735
-920 1120 -2.03036538771662
-921 1120  2.135100351227087
-922 1120  2.343665408910121
-1120 1120  15126.50868286488
-1121 1120  10.46247277613657
-1122 1120  9.800756234215104
-1125 1120 -1542.655870494157
-1256 1120 -437.5832889362574
-1335 1120 -0.3615308982581444
-1336 1120  0.3927812825996569
-1581 1120  2.59903491355898
-1823 1120 -1.698663037724611
-1824 1120 -1.861641814867798
-1825 1120  0.3604726785054282
-1826 1120 -0.3918426939236055
-2068 1120 -8.287409620934962
-2069 1120 -7.751411372933291
-2070 1120  2.026270799843273
-2071 1120 -2.553508280303047
-3311 1120 -1.249107484113665e-10
-3313 1120 -1576.39846137627
-3315 1120 -1.131374993690315e-10
-3317 1120 -2196.123797446057
-3321 1120 -1.795405820542717e-10
-3339 1120 -0.001668075641436621
-3615 1120  0.002369883025423705
-921 1121 -0.5068532103322505
-1120 1121 -1576.39846137627
-1121 1121 -2.484458992001519
-1823 1121  0.4032470041467199
-2068 1121  1.967960133569293
-3311 1121 -2.969423318344155e-11
-3313 1121  374.3454068452776
-3321 1121  4.188971391982932e-11
-922 1122 -0.6385059697814306
-1120 1122 -2196.123797446057
-1122 1122 -2.671248407940532
-1824 1122  0.5071839300131559
-2069 1122  2.112688522497416
-3315 1122 -3.082351041072684e-11
-3317 1122  598.5841054947239
-3321 1122  4.778644147052091e-11
-923 1123 -3.671971040472746
-924 1123  2.943851431944534
-1123 1123  11317.64471455518
-1124 1123  13.89790436354166
-1162 1123 -581.8283744128495
-1297 1123 -808.5357239496374
-1338 1123 -0.570678276525063
-1339 1123  0.6120165062645778
-1584 1123  3.103677962152558
-1827 1123 -2.470752260998025
-1828 1123  0.5736363845700033
-1829 1123 -0.6120093889121652
-2072 1123 -11.59313057596244
-2073 1123  3.920428096681185
-2074 1123 -3.348978833033903
-3327 1123 -1.296841745102029e-09
-3329 1123 -1762.330535576032
-3333 1123 -2.446954222579123e-09
-3585 1123  0.05713966978684382
-4126 1123 -0.05446009703752624
-924 1124 -0.4953533182334247
-1123 1124 -1762.330535576032
-1124 1124 -2.339205561498814
-1827 1124  0.4157462993400855
-2072 1124  1.951280910683752
-3327 1124 -2.182206637613149e-10
-3329 1124  296.6295776145759
-3333 1124  4.111979645671227e-10
-920 1125  13.1085584275136
-925 1125  4.915187326484495
-926 1125  31.10618111938879
-1120 1125 -1542.655870494157
-1125 1125  196426.9299350627
-1126 1125 -57716.35466464341
-1191 1125 -3056.34146169142
-1256 1125 -1959.500873940213
-1335 1125  2.348697674884534
-1341 1125 -1.038181523195801
-1342 1125 -1.475513352129115
-1586 1125 -6.405420033831049
-1587 1125 -9.211238697688215
-1825 1125 -2.341822696956453
-1830 1125 -3.892710312405375
-1831 1125  1.045469772476363
-1832 1125  1.483623786920446
-2070 1125 -13.08212862081111
-2075 1125 -24.49836116910814
-2076 1125  6.57456188741837
-2077 1125  9.387198805594265
-3321 1125  0.00637055249229182
-3339 1125 -2.524945641457776e-09
-3341 1125 -7.775043942004345e-10
-3615 1125  0.037823480857684
-3737 1125 -0.1090224204334647
-925 1126 -1.601102874759421
-926 1126 -10.14088311419212
-1125 1126 -57716.35466464341
-1126 1126  18816.79538770899
-1830 1126  1.268035068691731
-2075 1126  7.98667681451962
-3339 1126  7.921518996312216e-10
-3341 1126 -2.532467013871553e-10
-927 1127  3.674495501266763
-928 1127  15.40703792650803
-1038 1127  0.8518603849189976
-1127 1127  31713.64746882482
-1128 1127 -14377.54609320034
-1222 1127 -2406.24952449994
-1238 1127  5.987378019002024
-1344 1127  0.7456668279970676
-1345 1127 -0.7996212717110843
-1589 1127  5.42541555256465
-1590 1127 -5.319291135044536
-1833 1127 -3.071721810536903
-1834 1127 -0.8026522732803606
-2078 1127 -12.91543154405716
-2079 1127 -5.499147013641394
-3351 1127  7.788518240070541e-09
-3353 1127  3.886522659257707e-09
-3360 1127  0.3372983249793092
-3362 1127 -939.8659979062598
-3912 1127 -0.2217680093153695
-927 1128 -1.79658977637768
-928 1128 -7.62281482571927
-1127 1128 -14377.54609320034
-1128 1128  7119.152105525926
-1833 1128  1.501892209348857
-2078 1128  6.390044080050587
-3351 1128 -3.969049866547891e-09
-3353 1128  1.924533976627174e-09
-929 1129  1.299439778554684
-930 1129  7.102528425181987
-931 1129  1.541080820735221
-1075 1129 -2.402169826049755
-1129 1129  4281.024441336025
-1130 1129 -2689.233659020882
-1131 1129  9.939184679247292
-1275 1129 -406.3245095104962
-1290 1129 -413.568512787767
-1348 1129  0.3879503154264504
-1349 1129  0.4769040583265569
-1593 1129  2.623574290306916
-1594 1129  2.673553644466764
-1836 1129 -1.042770960437875
-1837 1129 -1.227391380982957
-1838 1129 -0.3704848889591102
-1839 1129 -0.4546264934490209
-2081 1129 -5.662448967730832
-2082 1129 -7.872866521897349
-2084 1129 -2.601348954389372
-3363 1129  2.237529952986428e-10
-3365 1129  1.705862118228652e-10
-3367 1129  1.983945230321638e-10
-3369 1129 -815.4874040806222
-3374 1129  0.06750505025100521
-3869 1129 -0.04174380521665495
-929 1130 -1.66026707237404
-930 1130 -9.076322024257928
-1129 1130 -2689.233659020882
-1130 1130  3436.605615427473
-1836 1130  1.332326685534424
-2081 1130  7.236044290225851
-3363 1130 -2.862770021039296e-10
-3365 1130  2.182460878685788e-10
-931 1131 -0.6495975312525708
-1129 1131 -815.4874040806222
-1131 1131 -4.19974615976149
-1837 1131  0.517371091673868
-2082 1131  3.32663494902817
-3363 1131 -9.747780360669367e-11
-3367 1131  8.381051408434814e-11
-3369 1131  344.6159392533137
-932 1132 -6.619137619636859
-933 1132  5.296182902592481
-1035 1132  1.726617122079207
-1132 1132  62923.98343331431
-1133 1132  31.57511354793287
-1152 1132 -8436.593128476583
-1235 1132  9.780829511014637
-1351 1132 -1.001383316318819
-1352 1132 -1.624449345440718
-1597 1132 -9.45015732593915
-1840 1132 -4.429008880591571
-1841 1132  1.001723462128635
-2085 1132 -26.39354744654073
-2086 1132  7.035896378794858
-3379 1132  3.572777296234619e-09
-3381 1132 -4227.557930025499
-3385 1132  5.218769388859457e-09
-3388 1132  0.02036700218743104
-3390 1132 -1557.206156923414
-3510 1132 -0.01738224984325668
-933 1133 -0.4107380900624172
-1132 1133 -4227.557930025499
-1133 1133 -2.46437663093789
-1840 1133  0.3434864457301403
-2085 1133  2.059964500670208
-3379 1133  2.789826286653252e-10
-3381 1133  330.0515399065575
-3385 1133 -4.171099299288272e-10
-934 1134 -1.077545589280008
-935 1134  0.9789296151027944
-936 1134  0.8060051230938539
-981 1134 -0.2169526291382712
-1134 1134  3.935792415055033
-1135 1134  5.52827321690606
-1136 1134 -1857.584300437281
-1156 1134 -302.9494469047443
-1181 1134 -117.6376812741665
-1355 1134  0.1787660967132971
-1356 1134  0.227902895301987
-1600 1134  1.195222696531314
-1601 1134  1.651751732869493
-1843 1134 -0.7864238954796325
-1844 1134 -0.6434239789652723
-1845 1134 -0.1720286735101973
-2088 1134 -4.426572750783254
-2089 1134 -3.141533746260626
-2091 1134 -1.499487001255235
-3391 1134  1.261234450211646e-10
-3393 1134 -353.7743879495256
-3395 1134  1.136628013931329e-10
-3397 1134  1190.15783474961
-3402 1134  1.578068994922965e-10
-3404 1134  0.01332766609263919
-3539 1134 -0.01449250614827455
-935 1135 -0.6272414912186035
-1135 1135 -3.546001048357712
-1843 1135  0.5038949949918273
-2088 1135  2.839337127425348
-3391 1135  8.093323233815397e-11
-3393 1135  226.9337877865829
-3397 1135 -353.7743879495256
-3402 1135 -1.025071694193969e-10
-936 1136 -2.504819998846709
-1134 1136 -12.33236152279313
-1136 1136  5823.291649924624
-1844 1136  1.999577413517036
-2089 1136  9.843630982732011
-3395 1136  3.563296324671228e-10
-3397 1136 -1857.584300437281
-3402 1136 -5.515803369604555e-10
-937 1137  3.764148086597181
-938 1137  21.66729182498108
-983 1137  1.032890475327049
-1137 1137  40628.79353938391
-1138 1137 -11296.78734766712
-1257 1137 -2657.776956602856
-1358 1137 -0.9698287367725562
-1359 1137  0.9047099666389939
-1603 1137 -7.297572934511336
-1604 1137  6.45187659773848
-1847 1137 -3.132558998302706
-1849 1137 -0.9050341677071546
-2092 1137 -18.12681824855953
-2093 1137  7.818158304523587
-2094 1137 -6.340117021540678
-3407 1137  7.386056946612207e-09
-3409 1137  3.839734308286324e-09
-3412 1137 -0.03261242339132064
-3567 1137  0.03229296680959662
-3682 1137 -1491.12999571951
-937 1138 -1.150203221975869
-938 1138 -6.624244633334891
-1137 1138 -11296.78734766712
-1138 1138  3453.806461467711
-1847 1138  0.9572098200987793
-2092 1138  5.541831397222044
-3407 1138 -2.261370646294836e-09
-3409 1138  1.173779629048965e-09
-939 1139  0.7236931569492924
-1139 1139  11644.30540653898
-1198 1139 -2367.012469682032
-1261 1139 -12834.35703122456
-1278 1139 -2144.627198362258
-1360 1139 -1.335544408289586
-1361 1139 -0.8646099843727195
-1362 1139 -1.017234363558991
-1605 1139 -8.252392785519895
-1606 1139 -6.168162236656806
-1607 1139 -5.52169483921999
-1850 1139  1.240138484038131
-1852 1139  0.9696341561279327
-2095 1139  7.855616270644008
-2096 1139  5.42738844602399
-2097 1139  5.270632236356677
-3424 1139  1.010988515126598e-10
-3782 1139 -0.07397943535962337
-4065 1139 -0.2141845297361449
-4125 1139  0.250244042122215
-940 1140 -9.766438076143141
-1079 1140 -1.450543140636408
-1140 1140  22000.33514768517
-1279 1140 -8495.362797148979
-1363 1140  2.259667890544613
-1364 1140  1.274050971060097
-1608 1140  14.10655573046575
-1609 1140  8.932423077614509
-1853 1140 -2.270634095598604
-2098 1140 -14.60105738105317
-3436 1140 -0.1493139770588935
-3438 1140  4.42159031055489e-08
-3447 1140  0.1330724731424416
-3449 1140 -821.5346664247968
-941 1141  9.000659918292111
-982 1141  2.430336628269451
-1030 1141 -1.646756874307934
-1141 1141  15635.12232700705
-1182 1141 -15.87534314164886
-1230 1141 -7111.276237605085
-1365 1141 -2.430429941692298
-1611 1141 -9.784170471621888
-1856 1141  1.633130299287086
-2100 1141  16.87912030479092
-3440 1141 -0.447186593064992
-3441 1141 -1279.615417488558
-3443 1141  0.5333079621201936
-3446 1141  1.675542971035782e-08
-942 1142 -0.1942141594406955
-1140 1142 -821.5346664247968
-1142 1142 -1.236746782381144
-1298 1142 -805.5840716483974
-1363 1142 -0.1988363546578532
-1608 1142 -1.328711904185037
-1853 1142  0.1998547861173985
-1857 1142  0.1937305440085906
-2098 1142  1.375232541596462
-2102 1142  1.210760367602043
-3438 1142 -0.009980420534666863
-3447 1142  6.210807736162227e-09
-3449 1142  144.3524370915675
-3755 1142  0.01120427847728563
-943 1143  1.291757288849152
-1143 1143  12437.4590338889
-1144 1143 -378.1159720077119
-1153 1143 -3746.645778230619
-1200 1143 -2502.882899625627
-1285 1143 -3361.290884691284
-1368 1143  0.4635977544092252
-1369 1143  1.302765765741359
-1371 1143 -0.7574396769710391
-1613 1143  12.40932627072254
-1614 1143  6.98481666623176
-1615 1143  7.534250103970887
-1616 1143 -5.957774276479973
-1858 1143 -0.4445531462354627
-1859 1143 -1.381540417492638
-1860 1143 -1.364938725708047
-1861 1143  0.7694060268190259
-2103 1143 -17.80142727045091
-2104 1143 -8.259223745110498
-2105 1143 -8.559591467156073
-2106 1143  7.695785971397996
-3459 1143 -1.724665529834191e-08
-3467 1143 -0.5468169676527013
-3521 1143 -0.02941992453676529
-3522 1143 -0.02992823809043116
-4145 1143  0.5739644134156838
-944 1144  1.435114498189556
-1143 1144 -378.1159720077119
-1144 1144  6109.88089831127
-1159 1144 -1783.747510767209
-1220 1144 -3014.351946629534
-1368 1144 -0.2370748743210307
-1373 1144  0.9151036757311578
-1613 1144 -16.36104222451913
-1617 1144  8.28258643358139
-1618 1144  5.88454311618508
-1858 1144  0.2279035771985117
-1862 1144 -1.347826856887033
-1863 1144 -0.9121228986991278
-2103 1144  23.46700243272763
-2107 1144 -8.289596312603999
-2108 1144 -5.695361538067679
-3459 1144  0.4289809294086663
-3467 1144 -3.380368229777275e-08
-3565 1144  0.1506769620921335
-3902 1144 -1.072294506906227
-945 1145 -4.380699190373416
-1145 1145  5905.742521728604
-1152 1145 -6698.294790960239
-1164 1145 -6021.151990444164
-1374 1145 -0.8709148373434052
-1375 1145 -0.7158770103681851
-1619 1145 -5.468004288207832
-1864 1145  0.8161899275134221
-1865 1145  0.5731872346895394
-2109 1145  5.008647029629411
-2110 1145  3.606200291493431
-3481 1145  1.768079509190112e-09
-3510 1145  0.01703428079016718
-3597 1145 -0.05970019195980614
-903 1146 -2.361641607385089
-946 1146  7.290551754845374
-947 1146  0.3710219555670884
-948 1146  0.4116155869533685
-949 1146  0.240725896352511
-950 1146  0.4501210912144902
-951 1146  0.160517360987844
-1146 1146  22790.00297408586
-1147 1146  9.774567204299803
-1148 1146  10.34202414091307
-1149 1146 -1152.033413949009
-1150 1146  10.63405779705095
-1151 1146 -1453.071481978205
-1381 1146 -1.606503189940934
-1552 1146 -16.60084760747767
-1625 1146  5.696313853321982
-1626 1146 -10.23123479260624
-1797 1146  2.713143028094987
-1866 1146 -0.2840579403070753
-1867 1146 -0.3163234064086289
-1868 1146 -0.183873148952859
-1869 1146 -0.3458178949312415
-1870 1146 -0.1213287753319718
-1871 1146  1.963053376342537
-2042 1146  19.25433639519875
-2111 1146 -7.612750342076007
-2112 1146 -8.044573973054565
-2113 1146 -5.670365588613417
-2114 1146 -8.266934236741347
-2115 1146 -4.640371833668683
-2116 1146  11.40396313242804
-3205 1146 -364.3783473802901
-3207 1146 -0.1213569242584619
-3483 1146 -8.043442764615882e-08
-3485 1146 -280.504956305503
-3487 1146 -6.474805136491568e-08
-3489 1146 -409.6989748006552
-3491 1146 -7.593639771152905e-08
-3493 1146 -2.906373852695765e-08
-3495 1146 -6.107598557458793e-08
-3497 1146 -440.1909901329221
-3499 1146 -1.652445292266158e-07
-3621 1146 -1575.319815998426
-3622 1146  0.2145362022621393
-947 1147 -0.29728149214413
-1146 1147 -280.504956305503
-1147 1147 -7.972830198709099
-1866 1147  0.2275648264341223
-2111 1147  6.210600761683166
-3483 1147 -6.438807310937822e-08
-3485 1147  226.8844736590602
-3493 1147  2.273733985119764e-08
-948 1148 -0.3482611836492402
-1146 1148 -409.6989748006552
-1148 1148 -8.870545870477601
-1867 1148  0.2676108406262902
-2112 1148  6.900692384581894
-3487 1148 -5.474532364935225e-08
-3489 1148  347.8208021266269
-3493 1148  2.421638262140036e-08
-946 1149 -32.25568801983241
-949 1149 -1.043634220767106
-1146 1149 -1152.033413949009
-1149 1149  5102.108289106684
-1868 1149  0.7969897672034445
-2113 1149  25.09335121359859
-3491 1149 -3.288068006757072e-07
-3493 1149  1.231334647994187e-07
-950 1150 -0.3623546349676862
-1146 1150 -440.1909901329221
-1150 1150 -8.671639188939132
-1869 1150  0.278365585734024
-2114 1150  6.741962214906343
-3493 1150  2.303789606306061e-08
-3495 1150 -4.913775505932527e-08
-3497 1150  355.3102213177701
-951 1151 -0.998072574886444
-1146 1151 -1453.071481978205
-1151 1151  3012134.630666946
-1625 1151 -36.48745359925508
-1870 1151  0.7540610774154664
-2115 1151  29.73823906985804
-3493 1151  1.743433537815253e-07
-3499 1151 -1.026074838983426e-06
-932 1152  12.86842858727532
-952 1152  7.392636495600143
-1132 1152 -8436.593128476583
-1145 1152 -6698.294790960239
-1152 1152  47801.86267389975
-1164 1152 -14322.89285310626
-1351 1152  2.237203602157426
-1374 1152  1.89078858943646
-1382 1152 -1.329774596250828
-1619 1152  12.47290766139074
-1627 1152 -8.235239513230283
-1841 1152 -2.235862100853107
-1864 1152 -1.771758321773721
-1872 1152  1.136378995889816
-2086 1152 -13.68121522319965
-2109 1152 -11.42535283239911
-3385 1152  0.01961434172631453
-3481 1152 -0.05125570147098663
-3510 1152  6.841825883618924e-09
-3597 1152  0.01861759887113656
-953 1153  7.683291575595472
-979 1153 -0.8271158196402747
-1000 1153 -7.424743250141112
-1143 1153 -3746.645778230619
-1153 1153  15211.32763782078
-1179 1153 -1587.164328722295
-1200 1153 -4741.586008200146
-1210 1153 -2001.015904640132
-1226 1153 -1538.578415888683
-1369 1153 -0.836906752491439
-1383 1153  0.4589386576308901
-1384 1153  0.9130715613792872
-1385 1153 -1.159052407654222
-1386 1153 -0.95405564531345
-1387 1153 -1.164584670469045
-1614 1153 -5.664043368783884
-1628 1153  8.639460196076747
-1629 1153  5.326644585481059
-1631 1153 -5.830059675395175
-1632 1153 -6.959432015872205
-1859 1153  0.8886397238365555
-1873 1153 -0.390921429635482
-1875 1153  1.167217498441967
-1876 1153  0.9758265066742177
-1877 1153  1.194200001945778
-2104 1153  6.694072816259766
-2118 1153 -8.030380636400102
-2119 1153 -5.221321173922546
-2121 1153  5.406317175231911
-2122 1153  6.502617587410443
-3459 1153  0.02793990583526351
-3516 1153  0.0247054716909727
-3521 1153  0.01312118441246557
-3522 1153 -9.502885811585315e-09
-3531 1153 -0.2507418162491978
-3533 1153 -125.4130424229494
-3865 1153  0.04849641452178344
-3944 1153  0.02710898025635339
-954 1154 -0.7102443269723129
-1153 1154 -125.4130424229494
-1154 1154 -4.291561368908504
-1201 1154 -2778.601465176169
-1383 1154 -0.3159937452580066
-1628 1154 -7.316230054006878
-1873 1154  0.269234075312335
-1878 1154  0.6375281091348801
-2118 1154  6.800195871908971
-2123 1154  3.814598094334272
-3522 1154  0.08766147326692564
-3531 1154 -1.802922966476217e-08
-3533 1154  1497.088366431581
-3812 1154 -0.1257509427677388
-955 1155 -0.2560164048234211
-1101 1155 -176.0078568125278
-1155 1155 -1.53916732182028
-1257 1155 -553.1419874124856
-1304 1155 -0.3265396605978775
-1549 1155 -2.057848389166554
-1794 1155  0.3055666271798853
-1879 1155  0.2373539075192858
-2039 1155  1.985217529071911
-2124 1155  1.414753666929491
-3201 1155 -0.001782981198905943
-3535 1155  3.591110811496101e-09
-3537 1155  160.8704183559445
-3567 1155  0.002925901806241575
-934 1156  14.8446566737469
-956 1156  1.446175075669395
-1156 1156  12519.61891501326
-1193 1156 -4247.139494515291
-1275 1156 -1669.250794817422
-1355 1156 -2.83581958505017
-1391 1156  1.341321871020527
-1600 1156 -16.46922548582782
-1635 1156  9.606152975975975
-1636 1156  8.638251497123527
-1845 1156  2.730179802550922
-1880 1156 -1.34371595571893
-1881 1156 -1.344209358632625
-2125 1156 -8.318029485848061
-2126 1156 -8.759308364969019
-3374 1156 -0.1018754816584715
-3397 1156 -302.9494469047443
-3402 1156  0.08549340419337453
-3539 1156 -1.694234696980601e-09
-3730 1156 -0.08343546006029678
-957 1157  5.222317454495946
-1017 1157  5.434176163677479
-1037 1157  0.8325741162821748
-1157 1157  2967.843774350639
-1217 1157 -5.697020217678423
-1237 1157 -1876.369922915853
-1392 1157 -1.058339009138057
-1393 1157 -0.8311703023259965
-1638 1157 -4.937009585402042
-1882 1157  1.056354509649101
-3549 1157 -739.1349825417736
-3550 1157 -0.09826669971503904
-3552 1157  0.0898090992568592
-3554 1157  1.653187683969826e-08
-958 1158 -1.618511483884628
-1032 1158 -10.84726586818638
-1158 1158  17698.10903710123
-1232 1158 -3393.363501279813
-1394 1158 -1.955210228146363
-1395 1158  1.409323205256857
-1639 1158 -11.53240152096248
-1640 1158  9.858875177032653
-1884 1158  1.946171184010313
-2129 1158  11.05483594732497
-3560 1158  4.097325129182039e-08
-3562 1158  0.0918804888517398
-3757 1158 -2011.875143802051
-3758 1158 -0.09853840405478623
-944 1159 -0.7084294249201389
-959 1159 -4.963811290770392
-1144 1159 -1783.747510767209
-1159 1159  2103.614864226924
-1201 1159 -1986.152622069543
-1396 1159 -0.8140452921845157
-1617 1159 -4.70567772590082
-1862 1159  0.6658399640468106
-1886 1159  0.7270941854498505
-2107 1159  4.70876580251244
-2131 1159  4.341750101297229
-3467 1159 -0.1049347854825309
-3565 1159 -1.332591936797911e-08
-3812 1159  0.07786890260343965
-960 1160 -0.1954534815545992
-1057 1160  0.1819988190111484
-1160 1160 -1.116117521563536
-1257 1160 -449.6118328163615
-1258 1160 -1094.71109774519
-1398 1160  0.1483181477839719
-1642 1160  1.503671479436259
-1643 1160  1.098045296521147
-1888 1160 -0.1580808975787135
-2132 1160 -1.617273344051984
-3567 1160 -0.008922953284883881
-3568 1160  3.964404998724191e-09
-3574 1160  135.3183882525767
-4047 1160  0.01834072349012509
-961 1161 -5.531733877787849
-1082 1161  1.188366015048527
-1161 1161  6522.078787078268
-1164 1161 -4876.157036800986
-1282 1161 -1.1983605947858
-1399 1161 -0.7691880733918308
-1645 1161 -9.698019033406775
-1889 1161  0.6268912276704567
-2134 1161  4.552179963964262
-2135 1161  9.673953645810657
-3577 1161  3.124692560252385e-09
-3579 1161 -428.7203655478105
-3580 1161 -0.4707524526938228
-3597 1161  0.8923905638513359
-923 1162  4.779511398382989
-962 1162  3.094186181666295
-963 1162 -0.1874965313805473
-1123 1162 -581.8283744128495
-1162 1162  1534.288258985061
-1163 1162 -320.1999206314696
-1308 1162  1.309709728249127
-1338 1162  0.8523919363373601
-1553 1162  7.353509965567679
-1798 1162 -1.31124216056868
-1828 1162 -0.856404463517094
-1891 1162  0.1209829834408293
-2043 1162 -7.879175491863245
-2073 1162 -5.103865209233088
-2136 1162 -3.100342937307653
-3205 1162 -81.90222277522484
-3207 1162  0.03440530029256823
-3333 1162 -0.0301506816774844
-3583 1162 -5.457841986689083e-08
-3585 1162 -1.126953350777393e-08
-962 1163 -22.22398035275692
-963 1163 -2.513198853349591
-1162 1163 -320.1999206314696
-1163 1163  2301.723012541816
-1891 1163  1.622962910510173
-2136 1163  22.19095820062608
-3583 1163 -3.92008150940093e-07
-3585 1163  1.889614750227864e-07
-945 1164  8.887311933312343
-952 1164 -6.574874411640814
-961 1164  7.397606534928237
-964 1164  5.50164767601109
-965 1164  4.32562955086895
-966 1164  0.08216399590560011
-967 1164  0.2283230594027073
-968 1164  0.1369008468551218
-969 1164  0.2495784622735603
-970 1164  0.290360813562731
-971 1164  0.2916736644580288
-1056 1164 -1.244478079620031
-1145 1164 -6021.151990444164
-1152 1164 -14322.89285310626
-1161 1164 -4876.157036800986
-1164 1164  61580.33486141656
-1165 1164 -16602.87934456047
-1166 1164 -4993.647022170953
-1167 1164 -16923.00885187064
-1168 1164 -7374.062406205457
-1169 1164 -13719.0325480256
-1170 1164 -12329.82642671017
-1171 1164 -2229.202872308585
-1256 1164 -7120.652080719003
-1375 1164  1.53391330281752
-1382 1164  1.309395068074605
-1399 1164  1.084345240019609
-1402 1164  0.6351578870039599
-1627 1164  7.323453369206338
-1648 1164  5.774615805405687
-1650 1164  5.345308539788538
-1651 1164  6.482098577577909
-1652 1164  6.760502321679454
-1653 1164  7.136143023604448
-1654 1164 -8.209962281569412
-1865 1164 -1.227967849405166
-1872 1164 -1.118335669994368
-1889 1164 -0.8831818088721408
-1892 1164 -0.4937492055719114
-1893 1164 -0.06363192359750822
-1894 1164 -0.1784848649491971
-1895 1164 -0.1066451828453128
-1896 1164 -0.1950409950189936
-1897 1164 -0.2270292150903642
-1898 1164 -0.227785867490716
-1899 1164  1.380302330026541
-2110 1164 -7.316287484026701
-2134 1164 -6.087474676626123
-2137 1164 -3.280051421621569
-2138 1164 -3.234138467376079
-2139 1164 -4.008535209606107
-2140 1164 -3.486179794654417
-2141 1164 -4.729287128522186
-2142 1164 -4.991429629225376
-2143 1164 -5.072850637376669
-2144 1164  9.204827106431074
-3481 1164  0.04539053030042879
-3510 1164 -0.004704297928304631
-3577 1164 -0.6070648610656701
-3589 1164 -3.308903712140321e-08
-3591 1164 -2.257490022694597e-07
-3595 1164 -6.077346655086746e-08
-3597 1164 -3.875374576800539e-09
-3599 1164 -1.333883210691278e-07
-3603 1164 -4.7262267141579e-08
-3607 1164 -4.1833332076191e-08
-3611 1164 -9.238899598140726e-08
-3615 1164  0.4594619270121967
-965 1165 -19.24637451834769
-1164 1165 -16602.87934456047
-1165 1165  73730.82644272385
-1402 1165 -2.945954324697929
-1892 1165  2.290345940174964
-2137 1165  14.59397827582356
-3589 1165 -1.468892856237858e-07
-3597 1165  6.066692409545382e-08
-966 1166 -2.385134481547733
-1164 1166 -4993.647022170953
-1166 1166  66798.37785505856
-1648 1166 -77.15294482380699
-1893 1166  1.86782777672809
-2138 1166  43.20570292286196
-3591 1166 -2.978564379918502e-06
-3597 1166  4.697380394380613e-07
-964 1167 -50.0814285278305
-967 1167 -2.457041662433694
-1164 1167 -16923.00885187064
-1167 1167  153155.7616073629
-1894 1167  1.923107839672399
-2139 1167  36.48944546192245
-3595 1167 -5.493372374765215e-07
-3597 1167  1.510357943157459e-07
-968 1168 -2.338296025296659
-1164 1168 -7374.062406205457
-1168 1168  82072.21679084383
-1650 1168 -59.77277409880886
-1895 1168  1.829379136090265
-2140 1168  38.98112179863244
-3597 1168  3.067324171046604e-07
-3599 1168 -1.477521131554305e-06
-969 1169 -1.994003072051536
-1164 1169 -13719.0325480256
-1169 1169  92641.36295225653
-1651 1169 -44.05096156861522
-1896 1169  1.560276963803856
-2141 1169  32.13935140025399
-3597 1169  1.042574984955191e-07
-3603 1169 -3.187709719520981e-07
-970 1170 -1.69914940163431
-1164 1170 -12329.82642671017
-1170 1170  62672.16218763148
-1652 1170 -34.56956155699346
-1897 1170  1.329845790084345
-2142 1170  25.52356491229814
-3597 1170  7.332883339428165e-08
-3607 1170 -2.124470280806534e-07
-971 1171 -0.665968396826871
-1164 1171 -2229.202872308585
-1171 1171  4044.843974570085
-1653 1171 -13.01473834386188
-1898 1171  0.5209611043831808
-2143 1171  9.251460191495257
-3597 1171  3.852382979979696e-08
-3611 1171 -1.67326591443695e-07
-972 1172  2.375670562103454
-1146 1172 -1575.319815998426
-1172 1172 -2.410186223870166
-1288 1172 -729.7679166195321
-1381 1172  0.2386303085913993
-1410 1172 -0.4153067634742414
-1626 1172  1.708753031112981
-1871 1172 -0.2914011576499591
-1900 1172  0.4148880148577192
-2116 1172 -1.904872805295951
-3493 1172 -0.09396492400282885
-3621 1172  500.8003893351921
-3622 1172 -4.288324972900881e-10
-4089 1172  0.03701780828857204
-973 1173 -0.271490414221333
-974 1173  0.2557390494241053
-975 1173  0.1570011938827146
-976 1173  0.3282970589794034
-977 1173  0.2551706654878987
-1006 1173  2.90118294589533
-1055 1173  2.290266277161505
-1173 1173  107277.9407439756
-1174 1173 -6932.419497690117
-1175 1173 -6007.856534902506
-1176 1173 -19150.24407625645
-1177 1173 -12568.88334869237
-1206 1173  17.2984093564214
-1255 1173  13.31557405400697
-1416 1173 -1.862842055174311
-1656 1173  7.851143468601221
-1657 1173  6.250508524222748
-1658 1173  11.16120855053096
-1659 1173  10.50033560799464
-1661 1173 -10.98109255814183
-1901 1173 -0.1995643723770443
-1902 1173 -0.116240689813081
-1904 1173 -0.2092439937336671
-1905 1173 -2.396216081757496
-2146 1173 -6.360533060844858
-2147 1173 -5.161142881361593
-2148 1173 -8.873223095071516
-2149 1173 -8.38018467970365
-2150 1173 -15.269239506628
-3623 1173  2.078042610764719e-06
-3627 1173  3.157835434829115e-06
-3631 1173  1.106372518294707e-06
-3632 1173  4.923127752132217e-07
-3635 1173  1.350166642877237e-06
-3639 1173 -0.5102615425771657
-3641 1173 -5140.668650277364
-3644 1173  0.7232007277457907
-3646 1173 -5655.325062365802
-974 1174 -1.144600450292391
-1173 1174 -6932.419497690117
-1174 1174  31180.78614898434
-1656 1174 -35.44874624987744
-1901 1174  0.8931472133700507
-2146 1174  28.71959313604403
-3623 1174  9.300491833896807e-06
-3632 1174 -2.21697098447271e-06
-975 1175 -2.274571006209698
-1173 1175 -6007.856534902506
-1175 1175  89136.63429867299
-1657 1175 -91.60308946892607
-1902 1175  1.683936323933614
-2147 1175  75.64317956330889
-3627 1175  4.574734240525569e-05
-3632 1175 -7.175218402766248e-06
-973 1176  0.8078351046585803
-976 1176 -0.9769081143049713
-1173 1176 -19150.24407625645
-1176 1176  57055.57591626922
-1658 1176 -33.51914650193375
-2148 1176  26.64903944246305
-3631 1176  3.292199911797411e-06
-3632 1176 -1.474047813920798e-06
-977 1177 -0.9321754366844185
-1173 1177 -12568.88334869237
-1177 1177  46045.12024090238
-1659 1177 -38.7926439689844
-1904 1177  0.7643499314882201
-2149 1177  30.96188122061356
-3632 1177 -1.809620373771637e-06
-3635 1177  4.932317047678936e-06
-978 1178  0.8130995252895313
-1041 1178 -5.553339525120581
-1178 1178  3620.078004165059
-1198 1178 -663.5344295935886
-1241 1178 -2608.162152478774
-1417 1178 -1.333010028374286
-1418 1178 -0.8125299017211322
-1662 1178 -8.372193934777899
-1907 1178  1.161252280274343
-2152 1178  6.800859133664147
-2153 1178  5.955799061503828
-3652 1178 -1.812938804635067e-08
-3653 1178  0.04340316309886361
-3782 1178 -0.03046147444904562
-979 1179  0.7414429026651962
-1153 1179 -1587.164328722295
-1179 1179  3482.456808643168
-1276 1179 -2359.550833678316
-1384 1179 -0.8197257413953748
-1419 1179  0.6657203254231561
-1420 1179 -0.6535981302102936
-1629 1179 -5.904518861818641
-1664 1179  11.0089408248834
-1665 1179 -4.521169451237201
-1909 1179 -0.620628984403677
-1910 1179  0.654965320821791
-2119 1179  5.785549463415167
-2154 1179 -21.83306996925088
-2155 1179  4.541421673872437
-3516 1179 -1.45358751424407e-08
-3522 1179 -0.01434635432205444
-3663 1179 -0.1494464850449572
-3665 1179 -188.8149620080204
-3906 1179  0.1503132880271155
-980 1180 -0.5886831418066317
-1179 1180 -188.8149620080204
-1180 1180 -3.725196007278359
-1293 1180 -2195.222019859931
-1419 1180 -0.07200114489392796
-1664 1180 -7.044466715389948
-1909 1180  0.06494821711852092
-1911 1180  0.5026533925982484
-2154 1180  13.9640986807489
-2156 1180  3.079189492083107
-3516 1180  0.08044888079890554
-3663 1180 -6.531306342982823e-09
-3665 1180  1204.593080199305
-3979 1180 -0.1724709079791441
-981 1181  1.186629605393843
-1181 1181  2341.243801698421
-1244 1181 -1276.761734036878
-1284 1181 -986.9244544448655
-1356 1181 -1.245236453990957
-1422 1181 -0.7314697767746239
-1423 1181 -0.7155603330739858
-1601 1181 -8.143818476815005
-1667 1181 -4.855239864111756
-1668 1181 -4.616497763711963
-1912 1181  0.7341243737421274
-1913 1181  0.7181252765245181
-2091 1181  7.392567965078647
-2157 1181  5.221813720290219
-2158 1181  4.299471983766863
-3397 1181 -117.6376812741665
-3402 1181 -0.03038370759243254
-3404 1181  6.922867834546764e-11
-3838 1181  0.216604980095333
-4005 1181 -0.1158402426905038
-982 1182 -0.3891594472236065
-1011 1182 -2.422622246591045
-1141 1182 -1279.615417488558
-1182 1182  2.174900388529984
-1211 1182 -1073.023868317834
-1365 1182  0.3890546571390462
-1424 1182 -0.3797072308357856
-1914 1182  0.3406113200706714
-2100 1182 -2.312987185873944
-2159 1182  2.310469301341059
-3440 1182  1.077567965412918e-09
-3441 1182  374.3495123842705
-3446 1182  0.06312548830247269
-3677 1182 -0.05307445544769063
-983 1183 -0.920980246976824
-1018 1183  1.01257666395277
-1137 1183 -1491.12999571951
-1183 1183  6.497392945010104
-1218 1183 -6.434333697729489
-1358 1183  0.8647226323217699
-1425 1183 -1.009313309363917
-1603 1183  6.441034596124477
-2093 1183 -6.900463469725019
-3407 1183  0.01353528248246338
-3412 1183  1.371140667671966e-08
-3680 1183 -0.03487708050989496
-3681 1183 -899.33891041484
-3682 1183  3031.132911223394
-984 1184  0.4457567421222746
-1105 1184 -1483.144735189752
-1184 1184  3.145655382980586
-1199 1184 -822.8244362568955
-1240 1184 -1909.764605957655
-1291 1184 -596.6060378153098
-1310 1184  0.536638352929684
-1426 1184 -0.5140805691755161
-1428 1184 -0.4618434163253965
-1555 1184  3.241025990367159
-1671 1184 -2.845200624001803
-1673 1184 -2.86924462211004
-1800 1184 -0.5324820321255284
-1916 1184  0.4990000847384159
-1917 1184 -0.3979802529569074
-1918 1184  0.4399385753362182
-2045 1184 -3.254430202003916
-2161 1184  2.783228216931563
-2162 1184 -2.797268987536868
-2163 1184  2.785061239788074
-3215 1184 -0.005320952890435077
-3250 1184 -0.01283241605396136
-3687 1184  4.56977637675049e-09
-3689 1184  1432.450877869441
-3767 1184  0.04085646412882799
-3789 1184 -0.009006681541102603
-985 1185 -3.207071041908369
-1047 1185 -0.4755757198066598
-1185 1185  4681.719438489662
-1186 1185 -404.2773802134753
-1247 1185 -28632.51282411447
-1429 1185  0.427839673034417
-1430 1185  0.3935586339260975
-1674 1185  7.594442537507458
-1675 1185  2.737883920038361
-1919 1185 -0.4773773394419411
-2164 1185 -15.25141034328379
-3700 1185 -0.2047571376587071
-3702 1185 -4.681742493684027e-10
-3707 1185  0.04645384761318282
-986 1186  1.385816766949729
-1185 1186 -404.2773802134753
-1186 1186  12867.5447937223
-1248 1186 -3104.47930912567
-1274 1186  8.036162305989681
-1292 1186 -2147.026562228744
-1429 1186 -0.1593269903331485
-1431 1186 -0.9577254771843337
-1433 1186 -1.161465841778596
-1674 1186 -11.15066330216473
-1676 1186 -5.868347071872142
-1678 1186 -7.2681528859035
-1919 1186  0.1799753946873774
-1921 1186  0.9613848499477831
-1922 1186 -1.243512052379119
-1923 1186  1.216256331347602
-2164 1186  22.3842856817472
-2166 1186  6.083446015993633
-2167 1186 -7.386558767037341
-2168 1186  7.502813973266923
-3702 1186 -0.06849071274544034
-3707 1186  4.608089770874457e-09
-3709 1186 -2242.183874822623
-3995 1186 -0.009545633175415319
-4035 1186  0.08253558234079712
-4137 1186 -0.0009017246897470577
-987 1187  8.201087086647963
-988 1187  0.1264070600600939
-989 1187  0.2372873765635557
-990 1187  0.2982302677568088
-993 1187  8.914589671247429
-1086 1187 -1.120321545249203
-1187 1187  15818.95142763862
-1188 1187 -87.52904062559777
-1189 1187 -1337.029454706127
-1190 1187  9.459243822580133
-1193 1187 -5350.662847075037
-1286 1187 -2971.781291812511
-1437 1187 -1.429262548630259
-1438 1187  1.193368232042292
-1679 1187  7.447124334255668
-1682 1187 -8.428942532838935
-1683 1187  10.03672807275466
-1924 1187 -0.1073922996838256
-1925 1187 -0.2078695435133901
-1926 1187 -0.263500479617136
-1927 1187  1.673333652021131
-2169 1187 -5.738141943326152
-2170 1187 -6.163756956372798
-2171 1187 -7.08054579490533
-2173 1187 -8.019890861375147
-3715 1187 -6.398415768466004e-07
-3719 1187 -1.044842086184161e-07
-3721 1187 -4.496077599114301e-08
-3723 1187 -8.621436850542175e-08
-3725 1187 -1274.451280633858
-3730 1187  0.09349931199942807
-3732 1187 -0.1321988592372929
-988 1188 -0.3212481593266459
-1187 1188 -87.52904062559777
-1188 1188  356854.618981225
-1679 1188 -20.05891812971564
-1924 1188  0.2724361971567971
-2169 1188  15.48272469413396
-3715 1188 -1.625983356512428e-06
-3721 1188  1.11954899111727e-07
-987 1189 -39.72433181103246
-989 1189 -1.124307960005156
-1187 1189 -1337.029454706127
-1189 1189  6342.186054306624
-1925 1189  0.9846771888007256
-2170 1189  29.86325998837519
-3719 1189 -4.95059458760494e-07
-3721 1189  2.108572429437849e-07
-990 1190 -0.8870247005076219
-1187 1190 -1274.451280633858
-1190 1190 -28.7033608824471
-1926 1190  0.78356840863058
-2171 1190  21.48971813079931
-3721 1190  1.323586146240174e-07
-3723 1190 -2.564258991810675e-07
-3725 1190  3793.106121617589
-991 1191  6.880724984338275
-992 1191  0.2167729964694829
-1089 1191  1.338906589780056
-1109 1191 -1100.812974624583
-1125 1191 -3056.34146169142
-1191 1191  9286.153223378726
-1192 1191 -969.7382881692646
-1289 1191  8.267433239967934
-1321 1191  1.175900677297633
-1341 1191  0.8434979015723048
-1440 1191 -1.207141725362662
-1566 1191  8.191331821483582
-1586 1191  5.038247575538431
-1685 1191 -7.610279116600212
-1811 1191 -1.21299435510975
-1831 1191 -0.8494202444993569
-1929 1191 -0.1851844777328494
-2056 1191 -8.128733597512404
-2076 1191 -5.17134230247954
-2174 1191 -5.444318005853047
-3251 1191  0.1091861990651673
-3339 1191  0.02942235136820164
-3735 1191 -1.02579299221528e-07
-3737 1191 -1.026919055346909e-08
-3740 1191 -0.4918304040039776
-3742 1191 -568.5385856792361
-991 1192 -16.5456764895831
-992 1192 -0.6168315851058197
-1191 1192 -969.7382881692646
-1192 1192  16896.89136086397
-1929 1192  0.5298067490807584
-2174 1192  13.09683550707402
-3735 1192 -2.396823412520455e-07
-3737 1192  4.564837356724194e-08
-956 1193 -1.408878182521636
-993 1193 -6.559072047159436
-1156 1193 -4247.139494515291
-1187 1193 -5350.662847075037
-1193 1193  7475.545711733503
-1437 1193  0.9009035038811737
-1635 1193 -7.997310816234859
-1682 1193  6.200089823344896
-1880 1193  1.31040985134737
-1927 1193 -1.053604700739367
-2125 1193  6.922985628648402
-3539 1193  0.08504754845844564
-3721 1193 -0.1969994537259558
-3730 1193 -6.511629457861545e-09
-994 1194  4.615763356430092
-1194 1194  5246.598377730301
-1220 1194 -2332.050437479933
-1241 1194 -2784.420804028015
-1286 1194 -1300.878810624146
-1441 1194  0.7978308621718716
-1442 1194 -0.9429089218588855
-1443 1194 -1.048401908253753
-1686 1194  5.753350078361281
-1687 1194 -5.062453033294843
-1688 1194 -6.460239724417905
-1931 1194 -0.7965184326656924
-1932 1194  1.007009323828432
-1933 1194  1.008926901289915
-2176 1194 -5.243221560545977
-2178 1194  6.417487426740468
-3653 1194 -0.03089942858856187
-3732 1194 -0.644560541293882
-3750 1194 -2.743272645155947e-08
-3902 1194  0.7790434228378447
-995 1195  2.46769213011002
-1098 1195 -0.4226945468326312
-1158 1195 -2011.875143802051
-1195 1195 -2.511814056672408
-1298 1195 -2377.009340752339
-1394 1195  0.4429005215649865
-1639 1195  2.841907482790103
-1884 1195 -0.4406738447969539
-1934 1195  0.421458967625683
-2129 1195 -2.724430091994604
-3560 1195  0.02493853014115072
-3755 1195 -0.0263789648850663
-3757 1195  961.1959122907291
-3758 1195  1.223172137188033e-08
-996 1196 -0.4851424038589532
-997 1196  7.933710171395212
-1196 1196 -3.452105402557807
-1197 1196 -356.6362302653943
-1221 1196 -2351.032529766809
-1445 1196  0.3339756423139817
-1935 1196 -0.3333397745722857
-1936 1196  0.4058510497943707
-2180 1196 -12.65627869255672
-2181 1196  3.087553492426145
-3761 1196 -0.1274747229578279
-3763 1196  9.116542722120702e-09
-3765 1196  1277.047285432646
-3929 1196  0.1829371321651943
-997 1197 -14.35390862680364
-1040 1197 -1.07013962521279
-1197 1197  2963.617500136095
-1240 1197 -2894.512024611905
-1445 1197 -0.2014575555660574
-1692 1197 -6.741118804159942
-1935 1197  0.2031705763180116
-1937 1197  1.056592885189063
-2180 1197  22.89375892241828
-2182 1197  6.152334929788743
-3761 1197  2.309806734768216e-08
-3763 1197  0.230729451368615
-3765 1197 -356.6362302653943
-3767 1197 -0.1442546171938546
-998 1198  4.666850163800948
-1139 1198 -2367.012469682032
-1178 1198 -663.5344295935886
-1198 1198  3259.098552556408
-1272 1198 -1354.184908615927
-1277 1198 -2452.381670676807
-1360 1198  0.8076695160259592
-1417 1198  0.4276243851783329
-1448 1198 -0.439551175407246
-1449 1198 -0.5013982344400243
-1450 1198 -0.651822591782886
-1605 1198  4.667678323593734
-1662 1198  3.153670952857216
-1693 1198 -2.839026331445033
-1694 1198 -3.303170792875972
-1695 1198 -4.18167192374513
-1850 1198 -0.7500806792626978
-1907 1198 -0.3728502600956505
-1938 1198  0.4774393407531063
-1939 1198  0.5451054580886334
-1940 1198  0.7429555445394144
-2095 1198 -4.443468566171155
-2152 1198 -2.562369962751278
-2183 1198  3.287177902478644
-2184 1198  3.830799029309135
-3424 1198  0.02676690359827763
-3579 1198 -181.5901337830495
-3580 1198  0.1639576572033569
-3652 1198  0.02020512578173389
-3782 1198 -1.121049961194664e-09
-4085 1198 -0.02201288531576436
-4105 1198 -0.1461852922562984
-999 1199 -4.371489003866787
-1008 1199  1.727739253821067
-1105 1199 -3998.718775286863
-1199 1199  16023.1456721761
-1208 1199  11.30404407094388
-1215 1199 -2769.253537633058
-1311 1199  1.20832718522049
-1426 1199  1.368477623792071
-1452 1199 -0.8399267021510434
-1453 1199 -1.468192548459289
-1556 1199  6.923454910311763
-1671 1199  7.348777917610443
-1698 1199 -9.106914833269292
-1801 1199 -1.164701975884189
-1916 1199 -1.328312311377791
-1941 1199 -1.501409605633508
-1942 1199  0.8359873259695152
-1943 1199  1.512757808386402
-2046 1199 -6.791708696781908
-2161 1199 -7.188787771767452
-2186 1199 -9.700381322229893
-2187 1199  5.178316230070377
-2188 1199  9.45399632332005
-3215 1199  0.01084140050204591
-3687 1199  0.0299746246289474
-3689 1199 -822.8244362568955
-3783 1199 -0.01293493383306837
-3785 1199 -1667.451919587196
-3789 1199  8.747625068750153e-09
-3880 1199 -0.04200667912920009
-3984 1199 -834.1124016703941
-3988 1199 -0.008049526074928537
-943 1200 -0.6720863108170413
-953 1200 -5.732696863714277
-1000 1200  5.539440141695474
-1002 1200 -0.2146298528456314
-1003 1200 -0.2466962960354827
-1143 1200 -2502.882899625627
-1153 1200 -4741.586008200146
-1200 1200  7000.072619730484
-1201 1200 -336.4229823469557
-1202 1200  4.262660366847292
-1203 1200  3.952836893778014
-1226 1200 -1452.75486119253
-1385 1200  0.9377921426824671
-1454 1200  0.3629961991311105
-1457 1200 -1.021937167464553
-1615 1200 -4.55768979835456
-1699 1200  6.519516423137556
-1702 1200 -5.814375262302929
-1860 1200  0.7107553380565065
-1875 1200 -0.9444048128648267
-1944 1200 -0.3459711048373486
-1945 1200  0.1240942996274762
-1946 1200  0.142484329197361
-1947 1200  1.047546391471734
-2105 1200  5.176873191896383
-2189 1200 -8.416866787483741
-2190 1200 -3.267971087874407
-2191 1200 -3.485047843454746
-2192 1200  5.190494080029731
-3459 1200  0.01754934032540434
-3521 1200 -9.116374766193758e-09
-3522 1200 -0.008383946400232767
-3799 1200 -6.50146078085001e-08
-3801 1200 -371.6922193028342
-3803 1200 -4.92795877993224e-08
-3805 1200 -437.3868882747202
-3812 1200 -0.03588453104645329
-3944 1200  0.01932311150148924
-954 1201  1.404267791315138
-959 1201  9.07977748727345
-1001 1201  1.799225606089988
-1154 1201  8.573349645239935
-1159 1201 -1986.152622069543
-1200 1201 -336.4229823469557
-1201 1201  13014.15053606697
-1396 1201  1.434561395231843
-1454 1201 -0.407193771543868
-1458 1201 -1.590343166199202
-1699 1201 -14.76253650232996
-1703 1201 -8.889282523176131
-1878 1201 -1.26049341189051
-1886 1201 -1.281336173089582
-1944 1201  0.3887267162890052
-2123 1201 -7.620517473966892
-2131 1201 -7.942016865501306
-2189 1201  19.05461479822566
-2193 1201  8.984707214224725
-3521 1201  0.06631071370625427
-3531 1201  0.4246988966920202
-3533 1201 -2778.601465176169
-3565 1201 -0.1571053939386667
-3812 1201 -2.100605200894279e-08
-3817 1201 -0.2362664140746497
-3818 1201 -943.0650083467848
-1002 1202 -3.056071350822656
-1200 1202 -371.6922193028342
-1202 1202 -21.38658464065627
-1945 1202  1.769816809290096
-2190 1202  16.31440493686975
-3521 1202  8.321106473307971e-08
-3799 1202 -3.261903830376056e-07
-3801 1202  1865.329310426942
-1003 1203 -2.791333479427456
-1200 1203 -437.3868882747202
-1203 1203 -21.33286151008621
-1946 1203  1.61398408077391
-2191 1203  18.72857532796202
-3521 1203  8.643129645324876e-08
-3803 1203 -2.659544074967712e-07
-3805 1203  2361.051277833175
-1001 1204 -0.4470126234812586
-1004 1204  2.682500907926349
-1201 1204 -943.0650083467848
-1204 1204 -3.302192501193137
-1458 1204  0.3953856088196608
-1459 1204  0.6834769259705972
-1703 1204  2.728687156136997
-1949 1204 -0.6337624789359847
-2193 1204 -2.759212294612531
-3812 1204  0.05848026446275859
-3817 1204 -5.676911468688317e-09
-3818 1204  623.6376208683334
-3947 1204 -0.1008233678779023
-3950 1204 -410.1720988650556
-1005 1205  6.238244344298185
-1027 1205  0.8899170771677279
-1205 1205  6033.106403994393
-1211 1205 -2861.256039528676
-1227 1205 -3877.196870864005
-1290 1205 -2885.83782801923
-1460 1205 -0.8432794677340965
-1461 1205 -0.8258269842122644
-1462 1205  1.219896523532217
-1705 1205 -5.634150174253534
-1706 1205 -5.4483738044597
-1707 1205  7.196077445191182
-1950 1205  0.8122467217164322
-1952 1205 -1.223176188342998
-2195 1205  5.676480078927482
-2197 1205 -7.309708780356933
-3677 1205  0.05204211481294618
-3824 1205 -0.254470154000132
-3826 1205  1.270442140377526e-09
-3869 1205  0.1321958859920611
-1006 1206 -0.2630108885425641
-1173 1206 -5140.668650277364
-1206 1206 -1.681171094173677
-1296 1206 -1447.354453913685
-1463 1206 -0.3186270474808822
-1708 1206 -2.097538363083144
-1905 1206  0.2173437473146551
-1953 1206  0.3163594839300718
-2150 1206  1.48390937397938
-2198 1206  2.063427325485965
-3632 1206  0.1608647615085388
-3639 1206  1.715180802247529e-08
-3641 1206  725.2050124758196
-3896 1206 -0.04271695783234537
-1007 1207  0.8254367475625018
-1084 1207 -3.064005787467996
-1115 1207 -138.288761778333
-1207 1207  4.679067459779453
-1284 1207 -713.7675168419642
-1331 1207 -0.9824708043480003
-1464 1207  0.4854666781569684
-1576 1207 -5.749376920340797
-1709 1207  3.061789935852789
-1954 1207 -0.5209336705035591
-3295 1207  0.00435760107926482
-3304 1207  5.579247508435969e-09
-3306 1207  726.5627170365319
-3838 1207 -0.01534246547224594
-1008 1208 -0.5174648007567855
-1199 1208 -1667.451919587196
-1208 1208 -3.396560628333188
-1291 1208 -887.2704343124278
-1465 1208 -0.4753802942885685
-1710 1208 -3.032021718341749
-1941 1208  0.4496774480466198
-1955 1208  0.404989697114798
-2186 1208  2.914703332052592
-2200 1208  2.585875819257028
-3250 1208 -0.004170643736380167
-3783 1208  2.945946889210127e-09
-3785 1208  785.6540698522239
-3789 1208  0.003480874327946859
-1009 1209 -0.7585928866866933
-1073 1209  1.088994441422246
-1209 1209  4661.087789736292
-1261 1209 -7744.746583549255
-1466 1209  0.5953885346820643
-1467 1209 -1.089204680743506
-1711 1209  4.119179357221501
-1712 1209 -7.735889765633795
-2201 1209 -5.027617955580298
-2202 1209  7.784946548619343
-3844 1209  1.582770026153923e-10
-3848 1209 -0.03549214591889813
-4065 1209  0.05851589523119582
-4110 1209 -661.5687518245369
-1010 1210  8.196309806968394
-1012 1210  0.7431772970946299
-1013 1210  0.1504825244382092
-1153 1210 -2001.015904640132
-1210 1210  9523.030298394489
-1211 1210 -628.9024645633449
-1212 1210  2.858778724621237
-1213 1210  7.743227386312433
-1226 1210 -4270.428353139534
-1386 1210  0.8083482178595198
-1468 1210  0.8561130448515417
-1471 1210  1.079868425719443
-1631 1210  6.106888895148331
-1713 1210  5.980557287714181
-1876 1210 -0.8256124134252635
-1958 1210 -0.8532795399901716
-1959 1210 -0.4962120658716677
-1960 1210 -0.09392255060612208
-1961 1210 -1.078106438997537
-2121 1210 -5.665416980231268
-2203 1210 -18.68721764116278
-2204 1210 -2.748627634170752
-2205 1210 -5.577951998041309
-2206 1210 -8.119865478221724
-3522 1210 -0.03090332412050151
-3677 1210  0.08531786875140286
-3855 1210 -6.996689916327625e-08
-3857 1210 -249.6415633539116
-3859 1210 -4.162537098340024e-08
-3861 1210 -639.0767490283577
-3865 1210 -1.220871292950676e-08
-3944 1210 -0.03439042023821304
-1011 1211  13.0725484602826
-1090 1211 -10.91898889290387
-1205 1211 -2861.256039528676
-1210 1211 -628.9024645633449
-1211 1211  20711.06510196762
-1290 1211 -4191.670580733649
-1424 1211  2.391996372285566
-1460 1211  1.889672965421464
-1468 1211  0.1281477546658397
-1472 1211 -1.855130366892074
-1705 1211  11.31995792783744
-1713 1211 -11.87347468739131
-1914 1211 -2.145916049270679
-1950 1211 -1.818638852608572
-1958 1211 -0.1109241627550626
-1962 1211  1.922720458247535
-2159 1211 -12.47045602748613
-2195 1211 -11.4061945499796
-2203 1211  36.98601183293343
-2207 1211  10.6937103252593
-3440 1211  0.332620942724838
-3441 1211 -1073.023868317834
-3677 1211 -3.814564220050443e-10
-3826 1211 -0.07579910748829327
-3865 1211 -0.2105201766865411
-3869 1211 -0.008999365508307933
-1012 1212 -1.366971763051765
-1210 1212 -249.6415633539116
-1212 1212 -4.993718753234045
-1959 1212  0.9127356585543187
-2204 1212  4.801186593602593
-3855 1212 -1.222183139693023e-07
-3857 1212  436.1558917911616
-3865 1212  2.310851920234169e-08
-1013 1213 -1.461494307484497
-1210 1213 -639.0767490283577
-1213 1213 -26.724645220298
-1960 1213  0.9122971104867462
-2205 1213  19.24788508110388
-3859 1213 -1.436638097374043e-07
-3861 1213  2205.993243270096
-3865 1213  5.20346455368248e-08
-1014 1214 -0.6474678388648655
-1214 1214 -4.691610853794245
-1215 1214 -276.8639202152118
-1231 1214 -1080.162224615282
-1473 1214  0.5034153766096773
-1474 1214  0.5409851231816658
-1718 1214  7.330338437253632
-1719 1214  4.154393438575824
-1963 1214 -0.5285023005439647
-2208 1214 -38.38649753335458
-3876 1214  1.003109723063389e-08
-3878 1214  1644.664912023497
-3880 1214 -0.004849260550083945
-3961 1214  0.006768474096792987
-999 1215  6.510195367269129
-1015 1215  0.7627672411821809
-1199 1215 -2769.253537633058
-1215 1215  7875.169207149417
-1230 1215 -3532.696627823262
-1260 1215 -2345.108354895173
-1452 1215  1.459995747599291
-1473 1215 -0.01820502103528791
-1475 1215 -0.6929215427424715
-1476 1215 -0.7557720101028491
-1718 1215 -13.39915434300085
-1720 1215 -5.674417187443032
-1721 1215 -4.902109438887917
-1942 1215 -1.452748484428911
-1963 1215  0.02666245738760768
-1966 1215  0.8446553956316701
-2187 1215 -7.713673913612801
-2208 1215  70.14746870615768
-2210 1215  6.342712909556337
-2211 1215  5.982335823908858
-3443 1215 -0.2961764240780974
-3789 1215  0.02977237990573015
-3876 1215  0.01263569687722853
-3878 1215 -276.8639202152118
-3880 1215  2.119705658221172e-08
-3993 1215  0.2282308445688311
-1016 1216  4.931373980172026
-1096 1216  0.7578565321914594
-1216 1216  8524.636106107781
-1229 1216 -4881.530531556509
-1232 1216 -1387.552912371535
-1296 1216 -7591.989817871691
-1477 1216  1.27864903424727
-1478 1216 -0.8926090143541199
-1479 1216 -0.7140138395673383
-1722 1216  7.097758064026325
-1723 1216 -6.326257993395976
-1724 1216 -4.667666885335501
-1967 1216 -1.203527161495361
-1968 1216  0.8343920480599086
-2212 1216 -7.071254972430305
-2213 1216  6.701573121465414
-3241 1216 -0.264491588907438
-3562 1216  0.3720024857585152
-3896 1216  0.09577340452076888
-3898 1216  2.258379669384247e-08
-899 1217 -0.5351332038396536
-1017 1217 -2.922493795579336
-1099 1217 -324.4426975442814
-1157 1217 -739.1349825417736
-1217 1217  3.063488442035085
-1300 1217  0.4831540182231924
-1392 1217  0.5146427113986781
-1545 1217  3.222143654814309
-1882 1217 -0.5135793765110132
-2035 1217 -3.519407860941207
-3180 1217 -0.02024470978393711
-3549 1217  637.8787982437333
-3550 1217  1.572755131684644e-08
-3554 1217  0.03597745704809928
-1018 1218 -0.5470217549008489
-1115 1218 -133.9669120329914
-1183 1218 -3.409620496099906
-1218 1218  3.376506641724802
-1332 1218  0.7382929550608727
-1425 1218  0.5452258426352274
-1577 1218  4.971628818787869
-1822 1218 -0.8747461134231366
-2067 1218 -5.79221202090099
-3295 1218 -0.00428379170185561
-3412 1218  0.01520725110591731
-3680 1218  8.149366514942358e-09
-3681 1218  793.7174921168661
-3682 1218 -899.33891041484
-1019 1219  6.850788060807428
-1020 1219 -12.39953745174649
-1076 1219  3.543387736643543
-1219 1219  9565.904076745277
-1220 1219 -192.4625250703121
-1276 1219 -1597.562514947314
-1285 1219 -8948.234516972396
-1324 1219  1.186931963583773
-1480 1219  0.3970145449540784
-1481 1219 -0.9053256046972465
-1482 1219  0.9177680669862572
-1569 1219  4.538624497527303
-1725 1219  7.895064096101138
-1726 1219 -4.875642107836658
-1814 1219 -1.163100359430245
-1970 1219 -0.3832969439187849
-1971 1219  0.9552717849491632
-1972 1219 -0.9166426538946926
-2059 1219 -5.879825080146367
-2217 1219 -6.091736844072287
-3280 1219 -0.2400553180149196
-3282 1219 -737.3345443487744
-3902 1219  0.1511025029822383
-3906 1219 -0.04422968677103412
-3909 1219 -2.543621846484712e-08
-4145 1219  0.1034698694358651
-1020 1220  24.06894650972545
-1144 1220 -3014.351946629534
-1194 1220 -2332.050437479933
-1219 1220 -192.4625250703121
-1220 1220  7047.656681697156
-1373 1220 -1.19384433734122
-1441 1220 -1.258218614571573
-1480 1220  0.2510335818284439
-1618 1220 -7.176916273976581
-1686 1220 -7.448069304163958
-1725 1220 -15.34203421950439
-1863 1220  1.19038235472501
-1931 1220  1.257430980505297
-1970 1220 -0.2383819755548176
-2108 1220  6.94588192299627
-2176 1220  6.785039108229999
-3467 1220  1.100657149099352
-3750 1220 -0.8499437985630234
-3902 1220 -5.254733564247216e-08
-3909 1220 -0.2054867667943506
-996 1221  1.24868825794066
-1021 1221 -7.485018312352968
-1022 1221 -1.125363487049104
-1023 1221  0.06266636804055448
-1024 1221 -0.01329158934947286
-1025 1221  0.1952707801632194
-1196 1221  7.570117512134129
-1221 1221  46684.4952085363
-1222 1221 -1266.678869343772
-1223 1221 -2161.61702284971
-1224 1221 -1633.067169282511
-1225 1221  11.51290363653453
-1231 1221 -5850.690085631221
-1254 1221 -22128.00563827363
-1483 1221  1.3927177800326
-1487 1221 -1.212084092212188
-1488 1221  1.095375364582689
-1728 1221  12.16385913519791
-1729 1221  7.419701498530326
-1730 1221  18.43213599777828
-1733 1221  7.911773723924099
-1936 1221 -1.04285678342914
-1974 1221 -0.04937898970914998
-1975 1221  0.01432224943308115
-1976 1221 -0.1578981940773998
-1977 1221  1.209186270330734
-1978 1221 -0.9823491292559811
-2181 1221 -6.772078091746644
-2218 1221 -71.95507332617049
-2219 1221 -2.767625773617935
-2220 1221 -3.444370960504515
-2221 1221 -6.724196771960667
-2222 1221  7.574571864439869
-2223 1221 -7.11281355093847
-3763 1221 -0.2752007773679826
-3765 1221 -2351.032529766809
-3912 1221  0.9741805755638241
-3915 1221 -1.302918114509666e-07
-3919 1221 -9.906925613201167e-08
-3923 1221  2.090432824064514e-08
-3925 1221 -1461.603224482606
-3929 1221  1.693937379254606e-08
-3961 1221 -0.7334598799220151
-3966 1221 -0.2261997465922666
-1022 1222  0.01423058830428089
-1099 1222 -2473.241834471261
-1107 1222 -1364.595019358008
-1127 1222 -2406.24952449994
-1221 1222 -1266.678869343772
-1222 1222  14824.19990916848
-1291 1222 -1973.389699961115
-1301 1222 -1.34894428731261
-1314 1222  1.364159900297422
-1344 1222 -1.317805230846083
-1483 1222 -0.007859345312601223
-1489 1222 -1.271418220001935
-1546 1222 -8.08332465039811
-1559 1222  7.688776076853612
-1589 1222 -8.373828345005908
-1728 1222 -10.85847562888647
-1734 1222 -8.059296167830137
-1791 1222  1.379371250818873
-1804 1222 -1.382288592585748
-1834 1222  1.420202184002578
-1979 1222  1.227066802011116
-2036 1222  7.890854922569274
-2049 1222 -8.526509371438566
-2079 1222  8.486264343097229
-2218 1222  64.19748679899583
-2224 1222  7.176638732185156
-3180 1222  0.04377716463426287
-3231 1222  0.1093957121598284
-3250 1222  0.1579025131440057
-3351 1222  0.1769630657333927
-3912 1222  1.79774599651239e-08
-3929 1222 -1.614266736074707
-1023 1223 -1.67562254077075
-1221 1223 -2161.61702284971
-1223 1223  9121.153135578967
-1729 1223 -31.5516818208411
-1974 1223  1.400506034542053
-2219 1223  11.74797745436025
-3915 1223 -5.441864154853704e-07
-3929 1223  6.278976277807047e-08
-1024 1224 -1.619002941030767
-1221 1224 -1633.067169282511
-1224 1224  562513.1605345258
-1730 1224 -71.14768126953655
-1975 1224  1.240374621996592
-2220 1224  13.28414805225883
-3919 1224 -3.728986932571843e-07
-3929 1224  2.808174576207634e-08
-1025 1225 -0.4989904160230794
-1221 1225 -1461.603224482606
-1225 1225 -13.59929356823245
-1976 1225  0.4114850212014874
-2221 1225  7.942511812214237
-3923 1225  2.415609212658154e-08
-3925 1225  1714.853712513708
-3929 1225 -5.971258681958247e-09
-1010 1226 -5.174118981866978
-1026 1226  0.755030805637508
-1153 1226 -1538.578415888683
-1200 1226 -1452.75486119253
-1210 1226 -4270.428353139534
-1226 1226  5458.23897163396
-1227 1226 -302.1992729528647
-1231 1226 -340.8512513609311
-1387 1226  0.6235636743364441
-1457 1226  0.6755139749733998
-1471 1226 -0.6825259342509997
-1490 1226  0.4270339140806115
-1491 1226 -0.7846384665529746
-1632 1226  4.601918715157254
-1702 1226  5.152102189575969
-1735 1226  5.935483300800602
-1736 1226 -4.492594197832112
-1877 1226 -0.638771030322958
-1947 1226 -0.6914780544113759
-1961 1226  0.6814150005342681
-1980 1226 -0.3710313110194243
-2122 1226 -4.301673841163688
-2192 1226 -4.603043930537255
-2206 1226  5.125870371838962
-2225 1226 -7.803072635065493
-2226 1226  3.075207046085619
-3521 1226 -0.0187976386654834
-3522 1226 -0.01685058071239737
-3824 1226  0.1585995473755764
-3865 1226  0.03354622133540253
-3944 1226 -4.982309767878612e-09
-3961 1226 -0.2479349622596107
-1005 1227 -13.01237515753422
-1027 1227 -2.12995668613235
-1205 1227 -3877.196870864005
-1226 1227 -302.1992729528647
-1227 1227  9053.637102694121
-1461 1227  1.978322116312245
-1490 1227  0.5603572465690376
-1706 1227  11.36265377461624
-1735 1227 -19.35438219191495
-1980 1227 -0.4748156898844662
-2225 1227  25.37588114465589
-3824 1227 -1.642452673866757e-09
-3826 1227  0.3216355049551655
-3944 1227 -0.3481508996189024
-1004 1228 -3.296532554515937
-1028 1228 -0.4953844799756825
-1204 1228  4.056786931974255
-1228 1228  2.120683161051391
-1242 1228 -640.7103668234021
-1459 1228 -0.7053415505671318
-1737 1228 -2.566147949050465
-1949 1228  0.6542644047347482
-1982 1228  0.4897034551735098
-3817 1228  0.0892421864327288
-3818 1228 -410.1720988650556
-3947 1228 -6.325417856167803e-09
-3950 1228  653.794427143524
-3992 1228 -0.1053486690832382
-1029 1229 -12.23234957660924
-1107 1229 -4056.761237014
-1216 1229 -4881.530531556509
-1229 1229  16921.24025598121
-1315 1229 -2.191374628381395
-1477 1229 -1.428392337327512
-1722 1229 -8.947761787238363
-1805 1229  1.902211567258754
-1967 1229  1.345690298242458
-2050 1229  10.65862220841107
-2212 1229  8.913001519049024
-3231 1229 -0.2266174204579067
-3241 1229  2.632058404561377e-08
-3898 1229  0.4471220830932749
-941 1230 -10.17957716286965
-1015 1230 -1.537590286100393
-1030 1230  1.503112217723057
-1141 1230 -7111.276237605085
-1215 1230 -3532.696627823262
-1230 1230  15135.89972458079
-1475 1230  1.396683757074456
-1611 1230  11.06068010791183
-1720 1230  11.28827516445929
-1856 1230 -1.491993548294496
-2210 1230 -12.61596887929604
-3443 1230  3.749403898467563e-08
-3446 1230 -0.5157188483780635
-3880 1230  0.6677127442223171
-1014 1231  0.5502970449270767
-1021 1231  2.659744674264035
-1026 1231 -0.3228669408970674
-1031 1231  4.547623166879417
-1033 1231  0.004909842124062877
-1034 1231  0.02074944381764173
-1054 1231 -2.787316614264462
-1214 1231  3.431330450031093
-1221 1231 -5850.690085631221
-1226 1231 -340.8512513609311
-1231 1231  5066.347088879259
-1232 1231 -341.4175255677415
-1233 1231 -1185.076996498896
-1234 1231 -1464.937248466328
-1254 1231 -4488.479032872755
-1474 1231 -0.4587975488680787
-1487 1231  0.4518610492803866
-1491 1231  0.3255367652745427
-1493 1231  0.2106155001707166
-1496 1231  0.532889016796235
-1719 1231 -3.038828905876916
-1736 1231  4.347828500049935
-1738 1231  7.458763893376871
-1739 1231  8.097430154499932
-1741 1231  3.043863568647225
-1977 1231 -0.4507761853932629
-1983 1231 -0.2015349441592879
-1984 1231 -0.003161834423974073
-1985 1231 -0.0164243186421904
-1986 1231 -0.4748422610025065
-2222 1231 -2.691626173647931
-2226 1231 -2.991719644070264
-2228 1231 -10.96688731001504
-2229 1231 -1.811688403660364
-2230 1231 -2.052721196214168
-3562 1231 -0.5154238049189607
-3876 1231 -0.004918095735968131
-3878 1231 -1080.162224615282
-3929 1231  0.31496286658781
-3944 1231  0.0847976154813143
-3955 1231  6.151957693267818e-09
-3959 1231  1.048933193081858e-08
-3961 1231  4.101254072463867e-09
-3966 1231  0.06060825833018181
-958 1232  0.6340347863620098
-1032 1232  3.863844809715488
-1158 1232 -3393.363501279813
-1216 1232 -1387.552912371535
-1231 1232 -341.4175255677415
-1232 1232  4718.204315834614
-1284 1232 -876.0754543919668
-1395 1232 -0.5518231235725737
-1478 1232  0.8109099469826276
-1493 1232 -0.1400665666639198
-1497 1232 -0.7796711087896829
-1498 1232 -0.6622307311134192
-1640 1232 -3.512065591551504
-1723 1232  4.304302054670996
-1738 1232 -13.70528369826246
-1742 1232 -4.689757573113847
-1743 1232 -4.573004681716164
-1968 1232 -0.7551476418861992
-1983 1232  0.1353078380308161
-1987 1232  0.8977654818327304
-1988 1232  0.7022784082231178
-2213 1232 -4.563164317669306
-2228 1232  20.15001583937854
-2232 1232  4.893403592122139
-2233 1232  5.053130885639514
-3360 1232 -0.375458076737654
-3362 1232 -822.3564996879056
-3560 1232 -0.03577015867815656
-3562 1232  1.011402150918883e-08
-3838 1232 -0.2753545287150571
-3898 1232 -0.1931972031217511
-3961 1232  1.10561328577018
-1033 1233 -2.020187774905765
-1231 1233 -1185.076996498896
-1233 1233  35387322.32806417
-1739 1233 -157.5425247202729
-1984 1233  1.781009244152953
-2229 1233  35.24928960928063
-3955 1233  1.167101824073313e-07
-3961 1233 -9.484864810227478e-09
-1031 1234 -73.86690653394791
-1034 1234 -1.371053578210635
-1231 1234 -1464.937248466328
-1234 1234  22763244.29675386
-1985 1234  1.141090378496645
-2230 1234  33.34660355498111
-3959 1234  1.662011153147347e-07
-3961 1234 -1.84660262358971e-08
-901 1235  4.025719241708225
-1035 1235 -0.5734959694043438
-1101 1235 -652.9015261762572
-1132 1235 -1557.206156923414
-1235 1235 -3.446229419844843
-1305 1235  0.7074154071969436
-1352 1235  0.5396384156967375
-1597 1235  3.329588287063118
-1795 1235 -0.757770830580615
-2040 1235 -4.406458593664035
-3201 1235  0.00410810176178595
-3385 1235 -0.003579148430443102
-3388 1235  4.701830855213807e-09
-3390 1235  1194.791042810274
-1036 1236 -9.15833764800569
-1093 1236 -1.475543576621253
-1236 1236  13881.85836852269
-1237 1236 -1464.550328926824
-1293 1236 -4853.806147764349
-1499 1236 -1.707969879333584
-1744 1236 -9.799664782253885
-1989 1236  1.704175130397323
-1990 1236  1.26182715384045
-2234 1236  7.820271137733112
-2235 1236  8.549665708518704
-3552 1236 -0.2219028422169306
-3979 1236  0.3284627932841515
-3981 1236 -6.866298751795341e-09
-957 1237 -6.158831607607141
-1037 1237 -1.099720672228146
-1157 1237 -1876.369922915853
-1236 1237 -1464.550328926824
-1237 1237  2905.92342536023
-1393 1237  1.098284058681807
-1499 1237  0.4947465498372808
-1638 1237  5.821493748851517
-1744 1237  4.604525758409499
-1989 1237 -0.4892572997374934
-2234 1237 -3.682710629785925
-3552 1237  2.017315570146394e-08
-3554 1237 -0.07867953448799736
-3981 1237  0.1062797331368865
-1038 1238 -0.8228886130706252
-1127 1238 -939.8659979062598
-1232 1238 -822.3564996879056
-1238 1238 -5.102978103686564
-1345 1238  0.7729194107635493
-1497 1238  0.4765768244277365
-1590 1238  4.532886615050296
-1742 1238  3.549100696841748
-1987 1238 -0.5475735569802812
-2232 1238 -3.704885884667695
-3351 1238 -0.09805225877594476
-3360 1238  1.549589877247737e-08
-3362 1238  1425.25603684663
-3562 1238  0.2460567017585993
-1039 1239  0.3444837790072527
-1107 1239 -389.1035774035147
-1199 1239 -834.1124016703941
-1239 1239  0.2870186143926446
-1246 1239 -392.349659792455
-1291 1239 -847.0044738143413
-1316 1239  0.329511624503851
-1453 1239  0.3506092217084191
-1501 1239 -0.2561778390712127
-1502 1239 -0.3620744060739602
-1561 1239  2.346704499747039
-1698 1239  2.196719064593049
-1746 1239 -1.513498503028144
-1747 1239 -2.259168544657926
-1806 1239 -0.3387833259526468
-1943 1239 -0.3612542032138767
-2051 1239 -2.422035549841484
-2188 1239 -2.280441915279873
-2236 1239  1.606821457458256
-2237 1239  2.163038539238373
-3230 1239  0.002037455724496269
-3231 1239  0.001389181240069544
-3250 1239 -0.004453089740284022
-3789 1239  0.001400699084717696
-3984 1239  604.1375921740112
-3988 1239  2.965038659041852e-09
-984 1240 -1.964051061571361
-1040 1240  1.100245831300216
-1184 1240 -13.90759343198216
-1197 1240 -2894.512024611905
-1240 1240  12057.99234253917
-1692 1240  8.345685815764492
-1917 1240  1.753538660217751
-1937 1240 -1.083749439601529
-2162 1240  12.36730783343644
-2182 1240 -7.619017266862353
-3687 1240 -0.1517471607395
-3689 1240 -1909.764605957655
-3761 1240  0.2907198369401517
-3767 1240  2.41306286619114e-08
-978 1241 -1.359305061838616
-994 1241 -9.100375712236817
-1041 1241  7.799677070497177
-1178 1241 -2608.162152478774
-1194 1241 -2784.420804028015
-1241 1241  9211.471231555435
-1418 1241  1.357408579856344
-1442 1241  1.314196419979139
-1687 1241  9.967472353837746
-1932 1241 -1.397856747533507
-2153 1241 -8.367326703808663
-3652 1241 -0.07632676179310438
-3653 1241 -3.596899800228925e-08
-3750 1241  0.04822162698070831
-1028 1242  1.253920472558426
-1042 1242 -1.629221333651718
-1060 1242  9.698372940762637
-1228 1242 -9.126926589105361
-1242 1242  11663.62345738262
-1260 1242 -8130.716010688008
-1503 1242  1.629228927232157
-1737 1242  11.01433164719148
-1982 1242 -1.238917691943269
-2238 1242 -10.14050217838622
-3947 1242  0.5302216625716982
-3950 1242 -640.7103668234021
-3992 1242  1.273314576000217e-08
-3993 1242 -0.3938694762289616
-1043 1243  4.240704676139979
-1074 1243 -1.085914715783544
-1083 1243 -4.137810875831871
-1243 1243  3542.477354630666
-1283 1243 -4835.233940860073
-1505 1243  0.6281268734336682
-1749 1243 -7.064788357696017
-1994 1243  1.090787344712805
-1995 1243 -0.6265552018341662
-2239 1243  7.264719577018943
-3709 1243 -895.8117983584929
-3995 1243  0.00970013728400089
-4001 1243  1.224810661226705e-08
-4002 1243 -0.01362937325158806
-1044 1244  4.840410468840943
-1045 1244 -0.2687441652242472
-1181 1244 -1276.761734036878
-1244 1244  2361.150198652828
-1245 1244 -430.7146894812096
-1275 1244 -869.3960245438557
-1422 1244  1.049384777518011
-1507 1244 -0.8397429008188915
-1667 1244  5.781563336973328
-1752 1244 -5.094647168144733
-1912 1244 -1.053710576606368
-1996 1244  0.1719397258262731
-1997 1244  0.8301209943719979
-2157 1244 -6.220262112848772
-2241 1244 -4.198187441675281
-2242 1244  4.665509938312524
-3374 1244 -0.1463500416425706
-3404 1244  0.1277397341432216
-4003 1244 -4.7148643572692e-08
-4005 1244 -3.695717287754974e-09
-1044 1245 -17.55536468313575
-1045 1245 -2.64805073698945
-1244 1245 -430.7146894812096
-1245 1245  1562.296878246622
-1996 1245  1.697227123155527
-2241 1245  15.14037838846916
-4003 1245 -1.710001454879873e-07
-4005 1245  6.84975660636411e-08
-1046 1246 -7.893949514842467
-1105 1246 -2530.254860117219
-1239 1246 -1.610282307909457
-1246 1246  5400.255554604978
-1312 1246  1.1553111591236
-1501 1246  1.438432936083485
-1557 1246  7.303265994649156
-1746 1246  9.519388148008218
-1802 1246 -1.285241695049613
-2236 1246 -10.10772533983988
-3215 1246  0.004420674229770605
-3230 1246  3.557195038528249e-08
-3984 1246 -392.349659792455
-3988 1246 -0.007983550840506864
-912 1247 -1.609166188869075
-985 1247  20.87891349987074
-1047 1247  3.317198260843825
-1049 1247  0.1166433208217011
-1050 1247 -0.02717925250316533
-1051 1247  0.09444032567228004
-1052 1247  0.172119184851368
-1053 1247  0.1888753139595323
-1112 1247 -12.79869678055506
-1185 1247 -28632.51282411447
-1247 1247  248357.3942530839
-1248 1247 -3365.074743920489
-1249 1247  20.03181033622185
-1250 1247 -5694.342187155742
-1251 1247 -10632.97649536341
-1252 1247 -2531.628223652554
-1253 1247 -2952.395502406746
-1325 1247  1.809931157886173
-1430 1247 -2.744556354611642
-1508 1247  1.530441977284314
-1570 1247  17.72193962278147
-1675 1247 -17.82514147813323
-1753 1247  29.57976624661321
-1755 1247  32.27437037187642
-1756 1247  14.54355524326141
-1757 1247  21.46631831646637
-1758 1247  20.73321827180935
-1998 1247 -1.407573526367871
-1999 1247 -0.08565703803104169
-2000 1247  0.02664962184511493
-2001 1247 -0.07327003201142045
-2002 1247 -0.1354663985444235
-2003 1247 -0.1491602373155923
-2243 1247 -53.85667321915488
-2244 1247 -6.579453823854884
-2245 1247 -6.848306051500932
-2246 1247 -5.638102373386884
-2247 1247 -9.972291551898609
-2248 1247 -10.04768624738478
-3280 1247  0.0912522352120306
-3282 1247 -3490.231801384597
-3700 1247 -1.997244994389469e-08
-3702 1247  0.301148117794046
-4015 1247 -1.614056271553865e-06
-4017 1247 -880.8326491407122
-4019 1247 -1.207106466394325e-06
-4023 1247 -8.74091834635049e-07
-4027 1247 -8.476012625102669e-07
-4031 1247 -7.2243239568337e-07
-4035 1247 -0.3912441653266929
-1048 1248  0.6694889657961215
-1186 1248 -3104.47930912567
-1247 1248 -3365.074743920489
-1248 1248  9114.7025913909
-1283 1248 -3642.295564781298
-1292 1248 -2665.923631479373
-1431 1248  0.9482549598362122
-1508 1248 -0.06447409590659667
-1515 1248  1.3251531551657
-1676 1248  5.385061985342238
-1753 1248 -6.123461182640077
-1759 1248  3.860578289092625
-1760 1248  7.496982675083278
-1921 1248 -0.951589473510047
-1998 1248  0.06007445012883673
-2004 1248 -0.6039061080741983
-2005 1248 -1.268742456177846
-2166 1248 -5.582795958856527
-2243 1248  11.14530142529302
-2249 1248 -3.705027669694788
-2250 1248 -7.554675688566832
-3700 1248  0.340419502366803
-3707 1248 -0.07163724688851204
-4002 1248  0.01227067299807493
-4035 1248  3.182346067220898e-09
-4137 1248 -0.01125287282250595
-1049 1249 -0.873821607012483
-1247 1249 -880.8326491407122
-1249 1249 -19.53524156207414
-1999 1249  0.6890445982902249
-2244 1249  6.407256334560128
-3700 1249  2.580894044174054e-07
-4015 1249 -1.546513956562912e-06
-4017 1249  966.5008806834494
-1050 1250 -1.817279018958227
-1247 1250 -5694.342187155742
-1250 1250  2022450.820136566
-1755 1250 -94.3593702933843
-2000 1250  1.406918477809915
-2245 1250  20.01255346091463
-3700 1250  5.987070406110462e-07
-4019 1250 -3.449440251657165e-06
-1051 1251 -2.226655844234514
-1247 1251 -10632.97649536341
-1251 1251  60924.04664080437
-1756 1251 -61.53830667090656
-2001 1251  1.833646104710556
-2246 1251  23.8410264235214
-3700 1251  8.733743955269269e-07
-4023 1251 -3.622352877430401e-06
-1052 1252 -0.4312061772864732
-1247 1252 -2531.628223652554
-1252 1252  25409.47810388554
-1757 1252 -14.79999463216308
-2002 1252  0.3531894656037325
-2247 1252  6.874531929256725
-3700 1252  1.117970844788374e-07
-4027 1252 -5.713398701923111e-07
-1053 1253 -0.4702289341982735
-1247 1253 -2952.395502406746
-1253 1253  21032.85678965815
-1758 1253 -15.68593782996181
-2003 1253  0.3847064364584903
-2248 1253  7.60087658660375
-3700 1253  1.182548258066296e-07
-4031 1253 -5.344005542529118e-07
-1054 1254  8.070563365347549
-1221 1254 -22128.00563827363
-1231 1254 -4488.479032872755
-1254 1254  36168.60998233932
-1488 1254 -1.129575111871219
-1496 1254 -1.47406685145989
-1516 1254  0.5506640854169045
-1733 1254 -8.140433666520655
-1741 1254 -8.8135613453359
-1761 1254  14.65998231301212
-1978 1254  1.013019198664114
-1986 1254  1.313534336735618
-2006 1254 -0.5106474974579881
-2223 1254  7.318382790662603
-2251 1254 -30.55860129286959
-3644 1254 -0.1671083834788179
-3646 1254 -224.613538448271
-3929 1254  0.5123209884216365
-3961 1254 -0.3196677665324415
-3966 1254  7.969850475105303e-09
-1055 1255 -0.3412498653140608
-1173 1255 -5655.325062365802
-1254 1255 -224.613538448271
-1255 1255 -1.755833205048359
-1416 1255  0.2775953024833963
-1516 1255 -0.04737658212996103
-1661 1255  1.447748389810384
-1761 1255 -8.348557923958264
-2006 1255  0.04431965773281515
-2251 1255  17.39938467274956
-3632 1255 -0.2169860792325076
-3644 1255  2.11014470497678e-08
-3646 1255  874.3507943057172
-3966 1255  0.03272045797426546
-1056 1256  0.5060573535847106
-1109 1256 -1108.54432072992
-1120 1256 -437.5832889362574
-1125 1256 -1959.500873940213
-1164 1256 -7120.652080719003
-1256 1256  6009.021418482626
-1322 1256 -0.832500880855937
-1336 1256 -0.9162532415237825
-1342 1256  0.5292255309373513
-1567 1256 -5.568800014444435
-1581 1256 -5.934397339882138
-1587 1256  3.257536454796313
-1654 1256  3.248543549093237
-1812 1256  0.8076647819842718
-1826 1256  0.9140951370430707
-1832 1256 -0.5321225312697891
-1899 1256 -0.5613536361877731
-2057 1256  5.649414375316669
-2071 1256  5.830423114127847
-2077 1256 -3.319769019166089
-2144 1256 -3.642181922510498
-3251 1256  0.08752981633481208
-3321 1256 -0.002249858771577174
-3339 1256 -0.009402178306684716
-3597 1256 -0.4280655451173339
-3615 1256 -1.882256489560419e-09
-955 1257  1.582943479566342
-960 1257  1.35575676912055
-1057 1257 -1.262443950678755
-1137 1257 -2657.776956602856
-1155 1257  9.963872683218066
-1257 1257  10242.62462136818
-1359 1257 -1.25500744830349
-1604 1257 -8.635276008329065
-1642 1257 -10.37821929880745
-1849 1257  1.255737278168746
-1879 1257 -1.467471123130216
-2094 1257  8.485710540012704
-2124 1257 -9.158675410300438
-2132 1257  11.16247281732313
-3407 1257 -0.02032800065588086
-3535 1257 -0.0254465288909706
-3537 1257 -553.1419874124856
-3567 1257  1.930360118973518e-08
-3568 1257  0.08784149961682572
-3574 1257 -449.6118328163615
-1058 1258  0.1232979933573407
-1059 1258 -0.1209319110556626
-1160 1258  17.4862782237909
-1258 1258  49743.87056685919
-1259 1258 -7385.057567314883
-1279 1258 -18148.59164458596
-1398 1258 -2.563829187882418
-1518 1258 -1.563633429097309
-1643 1258 -17.20403442676472
-1762 1258  12.18565261931853
-1763 1258 -10.48222291782284
-1888 1258  2.735594657287679
-2008 1258  1.480104939178236
-2252 1258 -10.49071174200598
-2253 1258  9.97517741318884
-3436 1258  0.2566034054509343
-3568 1258 -0.1963524920199438
-3574 1258 -1094.71109774519
-4047 1258  6.873820179720269e-08
-4048 1258  5.031070656746905e-07
-1058 1259 -0.6452838448164313
-1059 1259  0.632740697185234
-1258 1259 -7385.057567314883
-1259 1259  39002.30945375737
-1762 1259 -65.20912110432079
-2252 1259  56.15303779109885
-4047 1259 -3.722531947414343e-07
-4048 1259  2.632996500162577e-06
-1042 1260  1.326901691271821
-1060 1260 -8.871639857149663
-1215 1260 -2345.108354895173
-1242 1260 -8130.716010688008
-1260 1260  11098.26919607543
-1476 1260  1.465217279636702
-1503 1260 -1.327284851091953
-1721 1260  7.639002702936234
-1966 1260 -1.639787184528095
-2211 1260 -9.327145572218971
-2238 1260  9.274886402498227
-3880 1260 -0.4065761617929234
-3992 1260  0.4108547346501815
-3993 1260  1.649276359927399e-08
-939 1261 -1.73190689006448
-1009 1261  2.012113389129591
-1061 1261  11.5414689090377
-1062 1261  0.8070622115620506
-1063 1261  0.1378648745322591
-1064 1261  0.2889839995395265
-1065 1261  0.5085375863685291
-1066 1261  0.280802632960781
-1067 1261  0.2803074902090753
-1068 1261  0.3804015427898206
-1077 1261 -8.828418052973735
-1139 1261 -12834.35703122456
-1209 1261 -7744.746583549255
-1261 1261  73743.62518869051
-1262 1261  19.56348943490517
-1263 1261 -1974.959337099038
-1264 1261 -1320.629257453016
-1265 1261  16.53723785781054
-1266 1261  13.01235217685689
-1267 1261  11.6057964531526
-1268 1261  14.91330785855446
-1277 1261 -12988.36445429952
-1361 1261  2.069258919988845
-1466 1261 -1.57915325880766
-1526 1261 -1.261809071898539
-1606 1261  14.65077728050171
-1711 1261 -10.80951279223802
-1765 1261  9.267190079702225
-2009 1261 -0.6614271231410632
-2010 1261 -0.1128908565994482
-2011 1261 -0.2352278222126487
-2012 1261 -0.4120665414866511
-2013 1261 -0.22803093980903
-2014 1261 -0.2279652047020327
-2015 1261 -0.3095050463446962
-2016 1261  1.52499539890934
-2096 1261 -12.89108953321492
-2201 1261  13.19349528576872
-2254 1261 -13.58702526706866
-2255 1261 -6.734223707278317
-2256 1261 -7.978904295943519
-2257 1261 -11.52066789126238
-2258 1261 -8.975664909834439
-2259 1261 -8.020245176780445
-2260 1261 -10.30293666523228
-2261 1261  11.03436685036456
-3424 1261  0.1670044571575812
-3844 1261 -0.0715165068859599
-4055 1261  1.003843119740111e-10
-4057 1261 -973.8876219298594
-4059 1261  2.761413792073952e-09
-4063 1261  1.715662265033835e-09
-4065 1261  4.807374498483341e-10
-4067 1261  1.207292321669939e-10
-4069 1261 -938.9115804920717
-4071 1261  3.51464403414159e-09
-4073 1261 -438.0730939280807
-4075 1261  1.771025409658122e-09
-4077 1261 -1253.580811045954
-4079 1261  1.926837966959027e-09
-4081 1261 -406.6682599388607
-4085 1261 -0.154517529197304
-1062 1262 -0.2638765488723211
-1261 1262 -973.8876219298594
-1262 1262 -6.472519984532425
-2009 1262  0.2162445878661694
-2254 1262  4.495539572060578
-4055 1262  3.275554827375515e-11
-4057 1262  318.5048147514638
-4065 1262 -2.092441497847375e-11
-1063 1263 -0.691791395091818
-1261 1263 -1974.959337099038
-1263 1263  22241827.1668027
-1765 1263 -48.49819059668351
-2010 1263  0.5659529217467251
-2255 1263  35.27417920269734
-4059 1263  1.386103698486352e-08
-4065 1263 -1.407063632274941e-09
-1061 1264 -29.38500860027402
-1064 1264 -0.7136256126923212
-1261 1264 -1320.629257453016
-1264 1264  3293.999150088965
-2011 1264  0.58059959929439
-2256 1264  20.3242031183579
-4063 1264  4.236453410300101e-09
-4065 1264 -1.250616929460158e-09
-1065 1265 -0.3738089874614378
-1261 1265 -938.9115804920717
-1265 1265 -12.35162972393133
-2012 1265  0.3028572253506908
-2257 1265  8.60586674968607
-4065 1265 -3.967674799820742e-11
-4067 1265  8.890274627582073e-11
-4069 1265  690.7277721995864
-1066 1266 -0.3417951192656607
-1261 1266 -438.0730939280807
-1266 1266 -16.46702251650682
-2013 1266  0.2773430210866213
-2258 1266  11.36739026212329
-4065 1266 -9.292320871101545e-10
-4071 1266  4.277776494143737e-09
-4073 1266  638.1028771743796
-1067 1267 -0.7056173157039639
-1261 1267 -1253.580811045954
-1267 1267 -30.15492728060605
-2014 1267  0.5735599734651922
-2259 1267  20.84931717772058
-4065 1267 -1.273698507775478e-09
-4075 1267  4.457897107013054e-09
-4077 1267  3197.368019510227
-1068 1268 -0.2788437142254693
-1261 1268 -406.6682599388607
-1268 1268 -11.26470641944774
-2015 1268  0.2267698773866179
-2260 1268  7.785831386629021
-4065 1268 -3.714351916317415e-10
-4079 1268  1.412364641906194e-09
-4081 1268  300.7279047211774
-1069 1269  1.446310343605032
-1088 1269 -9.271286129126777
-1269 1269  10813.18840331734
-1288 1269 -3477.000248020089
-1293 1269 -2617.344977460001
-1527 1269 -1.4539442060976
-1528 1269  1.012670283104241
-1773 1269  7.586503401180923
-2018 1269 -1.178077129193787
-2262 1269  8.904067016422458
-2263 1269 -8.118669664395844
-3979 1269  0.3738519183151615
-4088 1269 -5.449000251012137e-09
-4089 1269 -0.1955389194213142
-1070 1270 -0.2627322880432431
-1270 1270 -1.571445886133012
-1278 1270 -311.4682163976653
-1297 1270 -200.6725500667741
-1529 1270  0.2350047561293398
-1530 1270  0.2512854022636909
-1774 1270  1.557484621286901
-1775 1270  1.526214952953268
-2020 1270 -0.2668875724092826
-2264 1270 -1.768620278216182
-4096 1270 -9.370967335442515e-10
-4102 1270  140.4997717045286
-4125 1270 -0.006868975754008619
-4126 1270  0.007048039668787517
-1071 1271 -1.255594290221721
-1072 1271 -4.340970455026474
-1271 1271  3389.329908355603
-1272 1271 -1896.271883950909
-1273 1271  7.278483379939773
-1531 1271 -0.760898897674599
-1777 1271 -7.410192007597576
-2021 1271  0.7226406195901061
-2022 1271  1.25204782308929
-2266 1271  4.279036439188589
-3848 1271  0.03877201488615256
-4105 1271 -0.0430300784938554
-4107 1271 -2.899300410863503e-09
-4110 1271 -775.6585214792789
-1072 1272  6.781290570506568
-1198 1272 -1354.184908615927
-1271 1272 -1896.271883950909
-1272 1272  8920.432007772803
-1286 1272 -1016.223869586874
-1448 1272  1.458551817589764
-1531 1272  1.417933728749779
-1533 1272 -0.9419881525931177
-1693 1272  7.706555182452361
-1778 1272 -6.501760444913997
-1938 1272 -1.584168614837459
-2021 1272 -1.346294002937515
-2023 1272  0.9057504822207362
-2183 1272 -8.926654332769894
-2266 1272 -6.686952398469979
-2268 1272  8.109951937853118
-3732 1272 -0.4068214560171308
-3782 1272  0.3056678326553257
-4105 1272 -1.591542694856507e-08
-4107 1272  0.06443650349717406
-1071 1273  0.4234260846924687
-1073 1273 -0.4330444227258353
-1209 1273 -661.5687518245369
-1271 1273 -775.6585214792789
-1273 1273 -2.600153453324001
-1467 1273  0.4330918580020012
-1712 1273  3.000161281315632
-1777 1273  2.647109335618053
-2022 1273 -0.4222691319575352
-2202 1273 -3.019182774708455
-3844 1273  0.01064840327673766
-3848 1273 -2.24825810746232e-10
-4107 1273 -0.01468942424715695
-4110 1273  534.0266415164343
-986 1274 -0.5956130198385742
-1074 1274  0.5173331784807369
-1186 1274 -2242.183874822623
-1243 1274 -895.8117983584929
-1274 1274 -3.668557345336211
-1749 1274  3.190434157991982
-1922 1274  0.5345682106594648
-1994 1274 -0.5195702043955498
-2167 1274  3.371880224457544
-2239 1274 -3.280810777838017
-3707 1274  0.003935685128377539
-3709 1274  1430.064587986382
-3995 1274  3.551176783189725e-09
-4001 1274 -0.005509351388796824
-1075 1275  8.227311149593612
-1129 1275 -406.3245095104962
-1156 1275 -1669.250794817422
-1244 1275 -869.3960245438557
-1275 1275  3309.804824869787
-1348 1275 -1.448830170961889
-1391 1275 -0.6717547568728794
-1507 1275  0.8498311284186953
-1593 1275 -8.986270417976883
-1636 1275 -4.45699083051915
-1752 1275  6.16663657582833
-1838 1275  1.383965071862861
-1881 1275  0.6732602446776976
-1997 1275 -0.8390792003026281
-2126 1275  4.519424008698003
-2242 1275 -5.648969220034825
-3363 1275 -0.08503788873061549
-3374 1275 -1.330449639347364e-10
-3539 1275  0.05001632315880056
-4005 1275  0.1686051519968662
-1076 1276 -6.327609477811816
-1179 1276 -2359.550833678316
-1219 1276 -1597.562514947314
-1276 1276  7342.450288708699
-1277 1276 -900.7481929321769
-1420 1276  1.0954096953943
-1481 1276  0.7622334325753616
-1534 1276  0.732482060546454
-1665 1276  7.39116338334737
-1726 1276  8.663532512230066
-1779 1276  11.68038468730556
-1910 1276 -1.097736986344627
-1971 1276 -0.7961901433494216
-2024 1276 -0.7185090865811576
-2155 1276 -7.424306139552618
-2269 1276 -19.033710166426
-3516 1276 -0.2527625769661221
-3906 1276 -2.449253198866241e-08
-3909 1276  0.06809256171239964
-4085 1276  0.132345738765169
-1077 1277  6.880737915568475
-1198 1277 -2452.381670676807
-1261 1277 -12988.36445429952
-1276 1277 -900.7481929321769
-1277 1277  19492.02732296992
-1449 1277  1.914092476329704
-1526 1277  1.218052141141831
-1534 1277  0.3546679947849138
-1694 1277  10.8116764259082
-1779 1277 -17.00310330783589
-1939 1277 -2.079388121953743
-2016 1277 -1.47300996574878
-2024 1277 -0.3467933517133751
-2184 1277 -12.54156489623117
-2261 1277 -8.604012257663832
-2269 1277  27.66621100756034
-3782 1277  0.08491056660704932
-3906 1277 -0.2432983530399724
-4065 1277  0.2765729375303243
-4085 1277 -1.015200618015299e-08
-1070 1278  0.9977668333321339
-1078 1278 -4.159585723108061
-1097 1278  4.827479834422186
-1139 1278 -2144.627198362258
-1278 1278  5895.373801146832
-1297 1278 -1522.865223630604
-1362 1278  1.112187399499515
-1529 1278 -0.892482446316431
-1535 1278 -0.7623806137748301
-1536 1278 -0.8740833773154867
-1607 1278  5.806648304333962
-1774 1278 -6.008458200039911
-1780 1278 -5.293591386916911
-1852 1278 -1.060130544389497
-2025 1278  0.8561953528406251
-2026 1278  0.8912310185466943
-2097 1278 -5.542727459691219
-2264 1278  6.822965307570624
-2270 1278  5.949256441509949
-3424 1278 -0.1239759113085325
-3740 1278  0.2708866518362061
-3742 1278 -226.1606344644552
-4096 1278  0.04152432414694895
-4102 1278 -311.4682163976653
-4125 1278 -1.724586362605862e-09
-4126 1278  0.09910991576168865
-940 1279  6.263257813414933
-1079 1279  1.026223447218297
-1080 1279 -0.03784001059522953
-1081 1279  0.1538807196017282
-1092 1279 -9.369448393175393
-1140 1279 -8495.362797148979
-1258 1279 -18148.59164458596
-1279 1279  28431.72293901326
-1280 1279 -2556.084698611827
-1281 1279  12.77341480514826
-1292 1279 -3788.030924191581
-1364 1279 -0.9008717883866754
-1518 1279  0.9598433696475256
-1539 1279 -1.539612202532529
-1609 1279 -5.728897213689582
-1763 1279  6.050637155898348
-1782 1279  8.860625202191928
-2008 1279 -0.9086990247085501
-2027 1279  0.034701868533437
-2028 1279 -0.1216367091902295
-2029 1279  1.649614277715424
-2253 1279 -5.758158315413606
-2272 1279 -2.549488194921787
-2273 1279 -5.706211648738831
-2274 1279  10.36404052218175
-3436 1279  2.904484364130155e-08
-3438 1279  0.06744124888417391
-4047 1279 -0.1647810909296956
-4127 1279 -3.091812144639938e-07
-4131 1279 -2.020746911468585e-08
-4133 1279 -1071.638609249924
-4137 1279  0.02154089767802106
-1080 1280 -4.420138342044347
-1279 1280 -2556.084698611827
-1280 1280  24829.19315026917
-1782 1280 -85.6481884037366
-2027 1280  3.515861595090497
-2272 1280  24.55744025691587
-3436 1280  2.892989577618632e-07
-4127 1280 -2.914429613198877e-06
-1081 1281 -0.9333140615563935
-1279 1281 -1071.638609249924
-1281 1281 -18.08711888236061
-2028 1281  0.7715902563305668
-2273 1281  8.071327482551563
-3436 1281  5.602588400155373e-09
-4131 1281 -2.80225902482556e-08
-4133 1281  1491.94341957583
-998 1282 -2.035095850329601
-1082 1282 -0.1794700371248673
-1161 1282 -428.7203655478105
-1198 1282 -181.5901337830495
-1282 1282  0.1809697428574308
-1450 1282  0.2587210270240423
-1645 1282  1.449320632063192
-1695 1282  1.823381827579377
-1940 1282 -0.2949996783558586
-2135 1282 -1.445634609268258
-3577 1282  0.06933509766831331
-3579 1282  143.5927519074621
-3580 1282  3.424985434152816e-10
-3782 1282 -0.03690628655442891
-1043 1283 -13.22275915134779
-1048 1283 -1.982804233034109
-1083 1283  12.90162353980625
-1243 1283 -4835.233940860073
-1248 1283 -3642.295564781298
-1283 1283  27634.07583015538
-1505 1283 -2.060237066109748
-1759 1283 -13.25822033623029
-1995 1283  2.055356159329762
-2004 1283  1.790720369267351
-2249 1283  12.72139776360994
-4001 1283  0.03805173458733085
-4002 1283  3.583140154028719e-08
-4035 1283 -0.02865248553472413
-1084 1284  5.611106742482268
-1181 1284 -986.9244544448655
-1232 1284 -876.0754543919668
-1284 1284  3583.170000702486
-1423 1284  0.8548643208929529
-1464 1284 -0.9961364902256777
-1498 1284  0.7155565720950264
-1668 1284  6.615573091843956
-1709 1284 -5.607807767179665
-1743 1284  4.486795549329334
-1913 1284 -0.8599114611795811
-1954 1284  1.069122194004589
-1988 1284 -0.7605034956051496
-2158 1284 -6.162584239401313
-2233 1284 -4.956539581732367
-3304 1284  0.03479583401598968
-3306 1284 -713.7675168419642
-3404 1284 -0.2426662636331511
-3562 1284  0.351545654975462
-3838 1284  5.059544422314488e-09
-1019 1285 -8.259331367595493
-1085 1285  29.59209543546304
-1087 1285  0.02592331471776193
-1143 1285 -3361.290884691284
-1219 1285 -8948.234516972396
-1285 1285  17460.80753327147
-1286 1285 -355.3005757252953
-1287 1285 -6086.481643221267
-1371 1285  1.320047522502771
-1482 1285 -1.404708895439669
-1540 1285  0.8723149865446768
-1616 1285  5.912988175484174
-1785 1285  7.224333431575175
-1861 1285 -1.327266925701353
-1972 1285  1.404553468671611
-2030 1285 -0.8836887209187385
-2031 1285 -0.01536794211867761
-2106 1285 -7.658943042300214
-2217 1285  7.340228308969663
-2275 1285 -14.23089451374669
-2276 1285 -5.11537337312014
-3459 1285 -0.3809398851482149
-3732 1285  0.7958833775240365
-3909 1285 -0.1159747915130676
-4143 1285 -3.395976211351126e-07
-4145 1285 -3.796000963562918e-08
-1086 1286  0.686208191891071
-1187 1286 -2971.781291812511
-1194 1286 -1300.878810624146
-1272 1286 -1016.223869586874
-1285 1286 -355.3005757252953
-1286 1286  2699.816287331327
-1438 1286 -0.7243212706529887
-1443 1286  0.7276690822554982
-1533 1286  0.7647617774871864
-1540 1286  0.1581455441266535
-1683 1286 -3.862010681433929
-1688 1286  4.142224845053123
-1778 1286  2.902282675062765
-1785 1286 -5.755496466930055
-1933 1286 -0.7000132451960701
-2023 1286 -0.7325004544442668
-2030 1286 -0.1537391891864879
-2173 1286  3.078841940888978
-2178 1286 -4.115085973968236
-2268 1286 -3.63284662768968
-2275 1286  11.28627080486608
-3721 1286  0.1010252017704575
-3732 1286 -1.977140486920936e-08
-3750 1286  0.4053180506262709
-4105 1286  0.2078468422749022
-4145 1286 -0.5565646529232413
-1085 1287 -61.62793826679011
-1087 1287 -2.546465438723989
-1285 1287 -6086.481643221267
-1287 1287  385721884.8626792
-2031 1287  2.184062692900765
-2276 1287  10.62505824201603
-4143 1287 -6.981968381225556e-07
-4145 1287  1.047611239979318e-07
-972 1288 -7.327623003108759
-1069 1288 -0.7843205938603733
-1088 1288  5.391699794599194
-1172 1288  7.434019202353014
-1269 1288 -3477.000248020089
-1288 1288  4282.153118111866
-1410 1288  1.244376425382915
-1527 1288  0.7887413865352516
-1900 1288 -1.24310360204769
-2262 1288 -5.178361384702474
-3621 1288 -729.7679166195321
-3622 1288 -0.1110170413597872
-4088 1288  0.1214000777347138
-4089 1288 -2.311546232203199e-10
-1089 1289 -0.2240623962201166
-1191 1289 -568.5385856792361
-1278 1289 -226.1606344644552
-1289 1289 -1.466292598487834
-1440 1289  0.2021428644557957
-1535 1289  0.2404117558855258
-1685 1289  1.349752296503026
-1780 1289  1.734079556945439
-2025 1289 -0.2700601958355842
-2270 1289 -1.948837786899108
-3737 1289  0.1077273281804682
-3740 1289 -1.285860098954039e-10
-3742 1289  175.3675926617765
-4125 1289 -0.04556750232906767
-1090 1290  5.7352105175062
-1129 1290 -413.568512787767
-1205 1290 -2885.83782801923
-1211 1290 -4191.670580733649
-1290 1290  6601.729492759859
-1349 1290 -1.478717303034038
-1462 1290 -1.259969479211936
-1472 1290  0.8608065436983287
-1594 1290 -8.636336925830612
-1707 1290 -7.594160657672793
-1839 1290  1.409679754203769
-1952 1290  1.263388549847514
-1962 1290 -0.8912069480512377
-2084 1290  8.40294784283585
-2197 1290  7.714039345814099
-2207 1290 -5.617762924949319
-3363 1290  0.08038780949826715
-3677 1290  0.005776489288478071
-3826 1290 -0.1235888174409047
-3869 1290  1.863167620186346e-09
-1039 1291 -1.37057590441584
-1091 1291 -6.443255723032384
-1099 1291 -2064.592972124319
-1107 1291 -1886.24602540587
-1222 1291 -1973.389699961115
-1291 1291  13284.54847765901
-1302 1291 -0.9510122522007739
-1317 1291  1.072482363899002
-1428 1291  1.168201103632352
-1465 1291  1.508116623610983
-1489 1291  0.6649208109974206
-1502 1291  1.440632854355911
-1547 1291 -5.89270443011472
-1562 1291  6.515090289325624
-1673 1291  6.948584822479258
-1710 1291  9.461386332231092
-1734 1291  5.042399262607923
-1747 1291  8.78157444783267
-1792 1291  1.008248356955873
-1807 1291 -1.048942876587671
-1918 1291 -1.112629226904043
-1955 1291 -1.284756180926196
-1979 1291 -0.6401504275752985
-2037 1291  6.423284179440299
-2163 1291 -6.744836034528775
-2200 1291 -8.06920223422221
-2224 1291 -4.49133368198552
-2237 1291 -8.407949717547142
-3180 1291  0.001908677924063157
-3231 1291  0.03029500957792949
-3250 1291  9.598412509248533e-09
-3687 1291  0.03333223564354496
-3689 1291 -596.6060378153098
-3783 1291  0.01209611832050858
-3785 1291 -887.2704343124278
-3912 1291 -0.1615941136931558
-3984 1291 -847.0044738143413
-3988 1291  0.01997349105916452
-1092 1292  3.090538086242762
-1186 1292 -2147.026562228744
-1248 1292 -2665.923631479373
-1279 1292 -3788.030924191581
-1292 1292  3983.405381393538
-1433 1292  0.6074290077697537
-1515 1292 -0.6969060374661296
-1539 1292  0.5515734249662468
-1678 1292  3.925788159606755
-1760 1292 -4.412546562177374
-1923 1292 -0.6360979627977104
-2005 1292  0.6674861693273666
-2029 1292 -0.5910536703502837
-2168 1292 -4.052578948064181
-2250 1292  4.445925686481872
-2274 1292 -3.418869542545371
-3436 1292 -0.01760425743209124
-3707 1292  0.0005819686631925447
-4035 1292  0.008367416410551054
-4137 1292  2.545271146625616e-09
-980 1293  1.054444086425987
-1036 1293  4.132697532463695
-1093 1293  0.7694145957527926
-1094 1293  0.2993576079332945
-1095 1293  0.06090597788496124
-1180 1293  7.599039601885398
-1236 1293 -4853.806147764349
-1269 1293 -2617.344977460001
-1293 1293  8749.068783994611
-1294 1293 -164.3747074129485
-1295 1293 -228.5337576424091
-1528 1293 -0.6342324694119614
-1773 1293 -4.118301711438476
-1787 1293  11.01771333410158
-1788 1293  6.743407936044972
-1911 1293 -0.9011439297224629
-1990 1293 -0.6573535907506843
-2018 1293  0.7388262639417724
-2032 1293 -0.2812552173432513
-2033 1293 -0.05346314055039419
-2156 1293 -6.282130880745431
-2235 1293 -3.858923807592198
-2263 1293  4.406401785498837
-2277 1293 -8.516713910804523
-2278 1293 -5.460172754442011
-3663 1293  0.191146221601152
-3665 1293 -2195.222019859931
-3979 1293 -1.493529512737457e-08
-3981 1293 -0.100220615586992
-4088 1293 -0.1119597125414634
-4147 1293 -6.98925406994455e-08
-4151 1293 -1.82950870328269e-07
-1094 1294 -0.07547758445687719
-1293 1294 -164.3747074129485
-1294 1294  7639.560996461915
-1787 1294 -2.84098517566667
-2032 1294  0.07089516340263885
-2277 1294  2.196645621020095
-3979 1294  3.664150378945852e-09
-4147 1294 -1.762221939205499e-08
-1095 1295 -0.2101993947180658
-1293 1295 -228.5337576424091
-1295 1295  1734538.607648828
-1788 1295 -23.80011488422777
-2033 1295  0.1844652672016618
-2278 1295  19.27597045240107
-3979 1295  5.14547137606125e-08
-4151 1295 -6.313957408021764e-07
-1016 1296 -14.33700504327225
-1096 1296 -2.150639159344168
-1216 1296 -7591.989817871691
-1296 1296  31452.29747062553
-1463 1296  2.014211143978124
-1479 1296  2.026152750963511
-1708 1296  13.54117139429895
-1724 1296  13.57026540414182
-1953 1296 -1.999469777299275
-2198 1296 -13.32061162330918
-3639 1296  0.3247141990426317
-3641 1296 -1447.354453913685
-3896 1296  9.930907562960556e-08
-3898 1296 -0.191269335220411
-1078 1297  3.631803890207103
-1097 1297 -4.215505277639843
-1123 1297 -808.5357239496374
-1270 1297  5.293330560693399
-1278 1297 -1522.865223630604
-1297 1297  3641.527667894987
-1339 1297 -1.067537541383138
-1530 1297 -0.9272236969416601
-1536 1297  0.8485062079897245
-1584 1297 -6.271879177065279
-1775 1297 -5.141467784189666
-1829 1297  1.068092189082941
-2020 1297  0.98501487730545
-2026 1297 -0.8650341355217372
-2074 1297  6.765893252663498
-3333 1297  0.04698307485545858
-4096 1297 -0.03625623505669459
-4102 1297 -200.6725500667741
-4125 1297 -0.08433753082497672
-4126 1297 -5.736181502014048e-09
-942 1298  2.244902451590447
-995 1298 -12.63775213409496
-1098 1298  2.091534388030289
-1142 1298  14.90052319731221
-1195 1298  12.86354640857851
-1298 1298  21916.56379947439
-1857 1298 -2.239071302275198
-1934 1298 -2.085292610827683
-2102 1298 -14.58766608851529
-3447 1298 -0.1518122571549573
-3449 1298 -805.5840716483974
-3755 1298  7.298680398948321e-08
-3757 1298 -2377.009340752339
-3758 1298  0.1249966020337667
-900 1299  0.3323231709594552
-1299 1299 -16421.91141900974
-2279 1299 -0.2412135486919414
-2777 1299  0.0009212144480015666
-299 1300 -116.617845527334
-1300 1300  12.11450772465217
-2281 1300  0.1661135696488067
-3179 1300  0.004072679508227696
-422 1301 -62.98405706157925
-1301 1301  5.029592871399626
-2283 1301  0.4624605338731521
-3183 1301  0.006325580300835057
-1302 1302  5.668506884069838
-1992 1302 -36.74905919231615
-2285 1302 -0.4947242318500673
-3187 1302  0.009068887762870591
-902 1303  0.3692929538115763
-1303 1303 -17152.4293534602
-2287 1303 -0.1766165844146627
-2781 1303  0.001126548430048662
-1304 1304  4.360425760376224
-1389 1304 -1194.040564031954
-2289 1304 -0.3050775516043246
-3195 1304  0.0006920242127232574
-301 1305 -304.5398217309754
-1305 1305  4.242853200747289
-2291 1305 -0.3914193643951301
-3199 1305  0.002176793677748951
-904 1306  0.3412920430536406
-1306 1306 -15990.46775417401
-2293 1306 -0.2389835454891805
-2785 1306  0.0008530994861319499
-903 1307  8.041398985395846
-1307 1307 -27.96163286928218
-2295 1307 -0.480083502439464
-2869 1307  0.008969100346799186
-303 1308 -246.424319230538
-1308 1308  6.55614516809574
-2297 1308 -0.4223099512030265
-3211 1308  0.001294461194310491
-905 1309  0.2531411838153556
-1309 1309 -16927.29847060978
-2299 1309 -0.3252342258811831
-2789 1309  0.0009498724832470081
-305 1310 -127.348789895109
-1310 1310  4.739072521175232
-2301 1310 -0.3300938184824252
-3219 1310  0.004708060827964712
-305 1311 -121.9718452523159
-1311 1311  3.734090074267024
-2303 1311 -0.4908310005655379
-3223 1311  0.004351148550575133
-305 1312 -116.5170618756044
-1312 1312  5.750116599788575
-2305 1312  0.4025596461382885
-3227 1312  0.003943055987286657
-907 1313  0.2843149864254252
-1313 1313 -15651.87407075314
-2307 1313 -0.3680346263601992
-2793 1313  0.0007934834584695918
-307 1314 -53.76846535636626
-1314 1314  7.210736930309491
-2309 1314 -0.4818355431139471
-3235 1314  0.005246094254530227
-429 1315 -1421.489364558306
-1315 1315  2.12239641260848
-2311 1315 -0.2936091186007417
-3239 1315  0.00123992126917307
-307 1316 -71.39421959235987
-1316 1316  8.994693294053674
-2313 1316  0.1478598441021074
-3243 1316  0.009039595818823305
-307 1317 -60.66237248504986
-1317 1317  5.061913797901139
-2315 1317 -0.4722142733900354
-3247 1317  0.006531690474418353
-909 1318  0.3142463672992255
-1318 1318 -19147.60457802069
-2317 1318 -0.1245359476338054
-2797 1318  0.001735428750918286
-911 1319  0.3570471483125166
-1319 1319 -16807.87112981469
-2319 1319 -0.1886155730243332
-2799 1319  0.001055952156612547
-309 1320 -48.65659892814862
-1320 1320  9.205270068346719
-2321 1320 -0.496807801273085
-3259 1320  0.0040209120475161
-309 1321 -54.80645079899097
-1321 1321  9.624813023079948
-2323 1321  0.382691560057723
-3263 1321  0.005036551566437221
-456 1322 -100.9659996956966
-1322 1322  5.922623483236039
-2325 1322 -0.3957723556168835
-3267 1322  0.003847683823750525
-313 1323 -288.936297706273
-1323 1323  6.622390702502909
-2327 1323 -0.5915454173823133
-3271 1323  0.000841250571717003
-312 1324 -314.0609448710903
-1324 1324  5.191487889306021
-2329 1324 -0.6320496066149439
-3275 1324  0.0008578383472785198
-312 1325 -417.45542835111
-1325 1325  4.055752454004177
-2331 1325  0.4149441342907414
-3279 1325  0.001413154992221117
-913 1326  0.2442413628704792
-1326 1326 -16211.02934024309
-2333 1326 -0.3720931546947013
-2805 1326  0.000882780537773012
-916 1327  0.5026016922761118
-1327 1327 -16956.49425532315
-2335 1327 -0.1474513421173272
-2809 1327  0.00110154462157207
-917 1328  0.421945735946015
-1328 1328 -17479.24164685459
-2337 1328 -0.150780607320847
-2811 1328  0.001217084582774276
-915 1329  0.3915301941686539
-1329 1329 -17627.34904124655
-2339 1329 -0.166183616330309
-2813 1329  0.001208942347715192
-919 1330  0.5049950972208146
-1330 1330 -15611.76874269715
-2341 1330 -0.2132021764710746
-2815 1330  0.000779202277584279
-407 1331 -1111.645130144312
-1331 1331  5.283005872263223
-2343 1331  0.3955427940574144
-3303 1331  0.0004793400206406562
-1332 1332  15.68617080585754
-1821 1332 -33.11136834639819
-2345 1332  0.3375417510714098
-3307 1332  0.006039682245261113
-921 1333  0.3539340117715401
-1333 1333 -17332.50264229675
-2347 1333 -0.1696981519351195
-2819 1333  0.00118182103217571
-922 1334  0.3171963274323468
-1334 1334 -17422.49927394747
-2349 1334 -0.1917131080017181
-2821 1334  0.001163085689897921
-320 1335 -111.0869138886556
-1335 1335  6.886702449628651
-2351 1335 -0.3349809983213075
-3319 1335  0.004096199521818046
-456 1336 -102.1576650288912
-1336 1336  9.171935901419351
-2353 1336  0.2366093522986016
-3323 1336  0.003915495812117704
-924 1337  0.36929603544498
-1337 1337 -17020.79035452148
-2355 1337 -0.1646365262812593
-2825 1337  0.001142829721099769
-323 1338 -271.7302916287101
-1338 1338  4.703906103995145
-2357 1338 -0.5108679392009977
-3331 1338  0.001588745929855299
-497 1339 -285.6892959186181
-1339 1339  4.656667378127493
-2359 1339 -0.5198632092236741
-3335 1339  0.001427060126464311
-925 1340  0.2262443261916516
-1340 1340 -19127.32781891363
-2361 1340 -0.1981115390391876
-2829 1340  0.001550353210027538
-325 1341 -104.0178710348266
-1341 1341  7.677703978905812
-2363 1341 -0.3571743727843014
-3343 1341  0.003811275724011594
-325 1342 -120.5778323373125
-1342 1342  8.778768198726569
-2365 1342 -0.1837305798049131
-3347 1342  0.005121318497429929
-927 1343  0.2087528761419341
-1343 1343 -15783.20681029278
-2367 1343 -0.4115664959143436
-2833 1343  0.0009378587812624336
-422 1344 -60.13908113984031
-1344 1344  5.321843079599814
-2369 1344  0.4948051560351502
-3355 1344  0.005775450127326082
-327 1345 -298.6016789907157
-1345 1345  4.558938647755557
-2371 1345 -0.413301806558095
-3359 1345  0.0017937012250687
-929 1346  0.4651682718810348
-1346 1346 -17200.90873720961
-2373 1346 -0.1377344537960406
-2837 1346  0.001191863116158495
-931 1347  0.4764456266019807
-1347 1347 -15372.8164617752
-2375 1347 -0.2309521391793573
-2839 1347  0.0007388559992690033
-475 1348 -264.9193537345231
-1348 1348  6.478855428097081
-2377 1348  0.373558902257503
-3371 1348  0.001531669494970955
-490 1349 -233.9428403939642
-1349 1349  6.316750882719969
-2379 1349 -0.5029630185937802
-3375 1349  0.001266167020288444
-933 1350  0.3022564890631526
-1350 1350 -16176.07327682596
-2381 1350 -0.3005092230155128
-2843 1350  0.0008420525300582919
-332 1351 -325.9882767001909
-1351 1351  2.896696576781846
-2383 1351 -0.4425084873605576
-3383 1351  0.002427896906783189
-332 1352 -309.9603770265387
-1352 1352  3.97695531013616
-2385 1352  0.3915587655305323
-3387 1352  0.002164171743180618
-935 1353  0.5157826632315755
-1353 1353 -15319.47843184996
-2387 1353 -0.2006755550776053
-2847 1353  0.0007649883521880128
-936 1354  0.3627133183774772
-1354 1354 -15342.38909234029
-2389 1354 -0.3338254063159729
-2849 1354  0.0007051468741409639
-356 1355 -238.4037452088409
-1355 1355  7.085454722674737
-2391 1355  0.4723947763539851
-3399 1355  0.001219792748132925
-381 1356 -263.4733430658865
-1356 1356  8.850553534693345
-2393 1356  0.2663650847823781
-3403 1356  0.001454930057613886
-937 1357  0.2986025758467496
-1357 1357 -17899.71932992315
-2395 1357 -0.1866146962681199
-2853 1357  0.001278376874421266
-337 1358 -369.8949236118411
-1358 1358  7.20471030819053
-2397 1358 -0.08570975586546606
-3411 1358  0.00469741256465844
-1359 1359  4.811679126595192
-1397 1359 -295.1726851067902
-2399 1359  0.224708222751262
-3415 1359  0.003317459266130739
-339 1360 -285.4713473471323
-1360 1360  3.187196022756801
-2401 1360 -0.3594998341374985
-3419 1360  0.002558593316707209
-339 1361 -436.1033211741603
-1361 1361  2.361516391014415
-2403 1361 -0.1478613302797105
-3423 1361  0.005951706713139653
-339 1362 -229.7060573133963
-1362 1362  3.949015286514313
-2405 1362 -0.58313561816606
-3427 1362  0.001644612800042599
-1363 1363  3.718703165886996
-1367 1363 -1269.292530967632
-2407 1363 -0.2293236201966689
-3431 1363  0.0009333933720843851
-479 1364 -80.63995640005464
-1364 1364  3.873556911241002
-2409 1364  0.42314514820173
-3435 1364  0.006430911860547507
-1365 1365  3.057040031444601
-1366 1365 -1242.790681293919
-2411 1365 -0.3584357830568553
-3439 1365  0.0007547011133879957
-1030 1366  2.768986183801089
-1366 1366 -1347.578694003212
-2413 1366  0.3532322900264607
-2859 1366  0.0008859553416288813
-942 1367  4.104329447502788
-1367 1367 -1306.453480190415
-2415 1367  0.1980349828762465
-2861 1367  0.0009881120225423083
-344 1368 -200.3447771713429
-1368 1368  6.140079525850513
-2417 1368 -0.4977177300614612
-3451 1368  0.001487119509469413
-1369 1369  5.11477659652108
-1874 1369 -43.12103915398581
-2419 1369 -0.8291778794111532
-3455 1369  0.00473831831522752
-943 1370  5.956658436407072
-1370 1370 -55.1131720540257
-2421 1370 -0.5661094051595106
-2977 1370  0.004665764046068509
-343 1371 -138.6615789467266
-1371 1371  3.940241620211022
-2423 1371 -0.7204052217365209
-3463 1371  0.002305146191007362
-944 1372  2.750846739635994
-1372 1372 -1285.697515288127
-2425 1372 -0.3615774432500488
-2895 1372  0.000868626976203476
-420 1373 -354.9809878986568
-1373 1373  4.076318621842777
-2427 1373  0.2921192102024898
-3471 1373  0.00245314466950895
-345 1374 -1359.412942527259
-1374 1374  2.042467781185541
-2429 1374  0.2741740413061578
-3475 1374  0.001238197354358431
-345 1375 -1420.161798638148
-1375 1375  1.82335585995627
-2431 1375 -0.2773076465817324
-3479 1375  0.001352052470830934
-947 1376  4.657866587415518
-1376 1376 -3270.212868561243
-2433 1376  0.01066646559674583
-2871 1376  0.007206573329488603
-948 1377  3.930431675339554
-1377 1377 -3383.287424261197
-2435 1377  0.01083754926061709
-2873 1377  0.00804911233999905
-949 1378  3.761329917621492
-1378 1378 -3162.550200229806
-2437 1378  0.01175361905645754
-2875 1378  0.009054992045809201
-950 1379  3.433153619622813
-1379 1379 -3145.78338095254
-2439 1379  0.01148289706327333
-2877 1379  0.009182548452643763
-951 1380  4.033761579760155
-1380 1380 -4127.243651965615
-2441 1380  0.008746086931736756
-2879 1380  0.008827942062630214
-1307 1381 -29.79361601042121
-1381 1381  6.379413846576525
-2443 1381  0.533396313689748
-3503 1381  0.01016666328654509
-352 1382 -332.4374750614756
-1382 1382  2.227180481236951
-2445 1382 -0.4043473535609413
-3507 1382  0.00314554681455971
-1383 1383  7.331635289629858
-1388 1383 -837.2400970977618
-2447 1383 -0.2764593929515081
-3511 1383  0.0005973204797509472
-379 1384 -373.0009736238315
-1384 1384  5.304835054096878
-2449 1384 -0.3420333412549693
-3515 1384  0.001435248761046485
-1385 1385  3.60741915585239
-1874 1385 -49.71040942894664
-2451 1385 -0.799264511380765
-3519 1385  0.006093662194062648
-1386 1386  7.425984975242814
-1874 1386 -42.92117101237346
-2453 1386  0.6047360650845038
-3523 1386  0.004492944878941051
-1387 1387  6.982525249864085
-1874 1387 -40.28901682798929
-2455 1387  0.7188696788923561
-3527 1387  0.003995523251182433
-954 1388  2.017091895439074
-1388 1388 -1637.126658242943
-2457 1388  0.3127717018709487
-2885 1388  0.00119543363370181
-955 1389  3.178888388583059
-1389 1389 -1330.347020163584
-2459 1389  0.3275939897877578
-2887 1389  0.0008583523754602983
-956 1390  2.807087996816875
-1390 1390 -1267.301926818796
-2461 1390  0.3539736971000679
-2963 1390  0.0008457781934121071
-475 1391 -338.2005407012112
-1391 1391  6.546784158248459
-2463 1391 -0.2029217905250291
-3543 1391  0.002489295349595793
-357 1392 -1177.545535152468
-1392 1392  3.23901762142973
-2465 1392  0.4429448994016109
-3547 1392  0.0006496765340230643
-357 1393 -1310.740583779899
-1393 1393  2.722688512430139
-2467 1393 -0.3781458794328671
-3551 1393  0.0008068265997349701
-1394 1394  3.132140010189771
-1885 1394 -1319.017361047579
-2469 1394  0.2653610804974919
-3555 1394  0.000985141500557223
-432 1395 -59.68238994762429
-1395 1395  5.363493298492033
-2471 1395  0.4397497774331201
-3559 1395  0.006455405048779167
-1372 1396 -1313.040134659249
-1396 1396  2.759734100154303
-2473 1396  0.3337399888939511
-3563 1396  0.0008956256821566003
-1057 1397  8.81339838666673
-1397 1397 -464.5939628653576
-2475 1397 -0.02735485822257834
-3091 1397  0.008210508525845766
-1398 1398  4.130734036137791
-2007 1398 -222.6965047597604
-2477 1398  0.2999541214461424
-3571 1398  0.003003991364081873
-1399 1399  2.800615008201704
-1400 1399 -1276.699267408946
-2479 1399 -0.1700655800972397
-3575 1399  0.001771985501876807
-1282 1400  7.984203741216811
-1400 1400 -1861.580695310057
-2481 1400  0.01658810146559478
-2899 1400  0.003762700731209146
-963 1401  0.7834252790859237
-1401 1401 -8640.649777085511
-2483 1401  23.38653888958006
-2903 1401  1.272095576781374e-05
-6 1402 -1386.423275602331
-1402 1402  0.6061321059687221
-2485 1402  0.3443611707400913
-3587 1402  0.004109859119880277
-966 1403  1.924848272360943
-1403 1403 -1718.105842836117
-2487 1403  0.2424191842671502
-2909 1403  0.00174213380263186
-967 1404  0.9293065890597459
-1404 1404 -1984.576121024297
-2489 1404  0.2409008336125803
-2911 1404  0.002955443890929092
-968 1405  1.515772943789459
-1405 1405 -1772.16179742598
-2491 1405  0.2465099761064946
-2913 1405  0.002046775451173374
-969 1406  0.9154558602176557
-1406 1406 -1941.832300205852
-2493 1406  0.2338083946556382
-2915 1406  0.00307288164617961
-970 1407  0.8856096204701881
-1407 1407 -1948.610857464787
-2495 1407  0.2329931159431059
-2917 1407  0.003118631919909647
-971 1408  1.437804385943619
-1408 1408 -1660.153292834334
-2497 1408  0.2443954748733559
-2919 1408  0.002064849466551203
-1056 1409  5.097464053705828
-1409 1409 -17.56919005402462
-2499 1409 -0.2852018326572599
-2905 1409  0.032281480855581
-372 1410 -1160.143762481141
-1410 1410  3.115298956803423
-2501 1410  0.3449705889808716
-3619 1410  0.0007323167865500322
-974 1411  3.239214549120592
-1411 1411 -8636.32260883259
-2503 1411  0.001345572973655427
-2925 1411  0.02978207371484943
-975 1412  2.661281271291927
-1412 1412 -4186.612833750586
-2505 1412  0.002458604260643332
-2927 1412  0.0412672721801762
-976 1413  2.838604475646982
-1413 1413 -18149.83069473371
-2507 1413  0.0006418068359919498
-2929 1413  0.03353177478347204
-977 1414  3.600111623032867
-1414 1414 -16487.63935391007
-2509 1414  0.0006826837434793908
-2931 1414  0.02793847595744413
-1006 1415  2.120223669016545
-1415 1415 -1403.69572640005
-2511 1415 -0.2149865933546265
-2989 1415  0.001457965405823138
-1416 1416  4.450891698791136
-1903 1416 -27.80845944515527
-2513 1416 -0.8943223168280579
-3643 1416  0.008610178916267667
-378 1417 -1197.091400538973
-1417 1417  5.382734010251454
-2515 1417  0.2320388001825638
-3647 1417  0.0006982215071245835
-378 1418 -1307.168528785002
-1418 1418  4.03844885715099
-2517 1418 -0.2673963766163865
-3651 1418  0.0008328470662532488
-1419 1419  5.435793144188034
-1421 1419 -1132.012822917658
-2519 1419 -0.3015732552854993
-3655 1419  0.0005279492621660577
-379 1420 -455.3580237130959
-1420 1420  5.673414870400691
-2521 1420 -0.189153266415218
-3659 1420  0.002114474146017625
-980 1421  1.663115651008956
-1421 1421 -1554.218213369194
-2523 1421  0.3162746729687368
-2937 1421  0.001273995556104298
-381 1422 -297.8083722314928
-1422 1422  3.6738422903388
-2525 1422 -0.4954179106036137
-3667 1422  0.001914372234934805
-381 1423 -318.3099272773828
-1423 1423  4.931596810478233
-2527 1423  0.312090482848181
-3671 1423  0.00214903785711034
-1424 1424  2.954431694987449
-1855 1424 -1225.44512859976
-2529 1424 -0.420740409235667
-3675 1424  0.0006972192640149882
-1425 1425  3.597870353883553
-1848 1425 -1341.319488664768
-2531 1425 -0.1900856271423472
-3679 1425  0.001387066813399525
-384 1426 -92.63800029624825
-1426 1426  5.141916652259088
-2533 1426 -0.6351218515149885
-3683 1426  0.00307995418805288
-984 1427  2.812910582659263
-1427 1427 -1792.17746207041
-2535 1427 -0.09233746945236616
-3057 1427  0.00232588309964105
-384 1428 -105.1073406416851
-1428 1428  6.815584747470943
-2537 1428 -0.3290166805679281
-3691 1428  0.003968006746482242
-386 1429 -115.8710614082713
-1429 1429  8.598554966475483
-2539 1429 -0.4508575542320269
-3695 1429  0.001800493541729985
-1430 1430  2.402848964234089
-1815 1430 -45.05797616799539
-2541 1430  0.5795528708874975
-3699 1430  0.01294566560248524
-386 1431 -115.6732086865693
-1431 1431  3.660576083522866
-2543 1431 -0.4501212252666432
-3703 1431  0.004438329159053629
-986 1432  1.803796874291739
-1432 1432 -1440.733981857976
-2545 1432 -0.3246876875018179
-3125 1432  0.001230012908033431
-386 1433 -119.2838530078724
-1433 1433  3.693870991143675
-2547 1433  0.3866661651573575
-3711 1433  0.004689294710517987
-988 1434  8.262976513892538
-1434 1434 -3914.440467798176
-2549 1434  0.001312335288620512
-2953 1434  0.02600204860566276
-989 1435  6.75928451641217
-1435 1435 -9557.969759456002
-2551 1435  0.0008215588597796699
-2955 1435  0.02269136945434388
-990 1436  6.286782905770047
-1436 1436 -10416.16918230558
-2553 1436  0.0007450434486385868
-2957 1436  0.02398061921832441
-1437 1437  4.863234068877579
-1928 1437 -71.78900859503443
-2555 1437  0.5235605415859514
-3727 1437  0.005825805095761705
-486 1438 -112.4245790619285
-1438 1438  3.654645324820387
-2557 1438  0.6177638045468883
-3731 1438  0.003360644493367182
-992 1439  2.770038046695757
-1439 1439 -1904.378289787439
-2559 1439  0.1199895291469222
-2961 1439  0.002293391027564734
-391 1440 -133.1916354887076
-1440 1440  8.553881810766296
-2561 1440  0.203001397699786
-3739 1440  0.004179365275079932
-420 1441 -293.7957542138436
-1441 1441  4.071233084981746
-2563 1441  0.4903758402830745
-3743 1441  0.001694003237061658
-394 1442 -273.7709840087026
-1442 1442  4.626630643115307
-2565 1442  0.4838079806998937
-3747 1442  0.001601064318780595
-394 1443 -304.386875196408
-1443 1443  4.819926478975458
-2567 1443 -0.3081198770448175
-3751 1443  0.001946527003138808
-1098 1444  3.210809158694981
-1444 1444 -1367.085437280008
-2569 1444  0.2236474776175843
-2967 1444  0.001102967128701974
-1445 1445  3.851674085393307
-1447 1445 -1299.463572095045
-2571 1445 -0.3103276689217324
-3759 1445  0.0006757264712473715
-996 1446  2.651381876594489
-1446 1446 -1935.598728461542
-2573 1446 -0.275851162842649
-2969 1446  0.0007622487762262233
-1040 1447  2.58440440493602
-1447 1447 -1393.724096630088
-2575 1447  0.3221464351873659
-2971 1447  0.0009857917802666748
-398 1448 -51.11619948659765
-1448 1448  6.545719728055657
-2577 1448 -0.6958761831620622
-3771 1448  0.003803805871044814
-398 1449 -56.91674680233886
-1449 1449  5.387449154306919
-2579 1449 -0.583252118916699
-3775 1449  0.00473626941325111
-398 1450 -54.13080652643053
-1450 1450  10.56116102730794
-2581 1450  0.3219996994163201
-3779 1450  0.004231559667264044
-1008 1451  2.192274887117568
-1451 1451 -1694.45394947356
-2583 1451 -0.1231955649162646
-2993 1451  0.002338269681504373
-399 1452 -39.11433892927088
-1452 1452  3.725435059567603
-2585 1452 -2.421691173290549
-3787 1452  0.002785947797894975
-399 1453 -69.39344058325088
-1453 1453  5.699343322844305
-2587 1453  0.2588732884211721
-3791 1453  0.008550454467021431
-401 1454 -94.18004615832454
-1454 1454  9.853444154856955
-2589 1454 -0.5465093405911027
-3795 1454  0.001748386142823009
-1002 1455  0.6148168725043865
-1455 1455 -6186.640086873116
-2591 1455  31.84061239984158
-2981 1455  1.444471295879164e-05
-1003 1456  0.733300648789027
-1456 1456 -9258.546324954847
-2593 1456  28.84769630748797
-2983 1456  9.346594752676875e-06
-1370 1457 -45.6431060305684
-1457 1457  5.623959759241418
-2595 1457  0.9316726753588556
-3807 1457  0.003439387923659759
-401 1458 -95.82820923136651
-1458 1458  5.156990654240387
-2597 1458  0.7823178849555039
-3811 1458  0.002344163243169693
-1459 1459  2.02177708473591
-1492 1459 -894.1445427962663
-2599 1459 -2.52048241012962
-3815 1459  0.0002314920577447962
-405 1460 -294.7077344824907
-1460 1460  4.338050735598012
-2601 1460 -0.350498698842088
-3819 1460  0.002213111143858121
-405 1461 -297.7314148715081
-1461 1461  3.336518577462315
-2603 1461 -0.4576521731453182
-3823 1461  0.002289548031987322
-490 1462 -345.0634879409996
-1462 1462  3.432096244817039
-2605 1462 -0.3132143614225264
-3827 1462  0.00275597918269146
-1415 1463 -1319.761941130769
-1463 1463  5.167687511911667
-2607 1463  0.101747472019995
-3831 1463  0.001286102412847703
-484 1464 -253.7591971467399
-1464 1464  4.397548178584936
-2609 1464  0.6529986254582271
-3835 1464  0.001488586959398302
-1451 1465 -1333.079786610828
-1465 1465  2.807197757919511
-2611 1465 -0.1988605404170427
-3839 1465  0.001449288496860595
-1466 1466  6.821596506872711
-1851 1466 -14.58657917675677
-2613 1466  0.267724443820664
-3843 1466  0.03847791458198641
-1467 1467  4.082745801006982
-1956 1467 -1277.459248023892
-2615 1467 -0.1320198647588829
-3847 1467  0.001331536489217977
-411 1468 -113.8659171919261
-1468 1468  12.16370306936955
-2617 1468 -0.4884348221780255
-3851 1468  0.001389733691811989
-1012 1469  0.6922710528461018
-1469 1469 -3403.375662059264
-2619 1469  6.393149409211086
-3001 1469  9.894340445329639e-05
-1013 1470  0.9679618404674936
-1470 1470 -9244.38449451425
-2621 1470  13.61387792723724
-3003 1470  1.482042185147961e-05
-426 1471 -129.5729114917453
-1471 1471  4.640123233526718
-2623 1471  0.1174995202916021
-3863 1471  0.01230395744748231
-411 1472 -125.6929813506389
-1472 1472  4.083595669790726
-2625 1472  0.4574647052145012
-3867 1472  0.004115637657561823
-415 1473 -103.5178172597545
-1473 1473  13.64297835723395
-2627 1473 -0.3400741687238894
-3871 1473  0.001865256592979132
-1474 1474  7.432083284538689
-1981 1474 -27.2211092658337
-2629 1474  0.4358076176675587
-3875 1474  0.00837310147958267
-415 1475 -193.3276841043456
-1475 1475  9.243250804285458
-2631 1475  0.04586169342645511
-3879 1475  0.0116383104408643
-415 1476 -102.3339494307426
-1476 1476  6.987236455151689
-2633 1476 -0.4060288904731509
-3883 1476  0.003273945598422162
-429 1477 -1333.404649458178
-1477 1477  2.39813417884677
-2635 1477 -0.32265098291189
-3887 1477  0.001104639692794528
-416 1478 -257.0424342781794
-1478 1478  4.872894046756631
-2637 1478 -0.4274289888767369
-3891 1478  0.001753935752301071
-416 1479 -364.6469827985827
-1479 1479  3.666098639291213
-2639 1479  0.2139280209580445
-3895 1479  0.003463445087067367
-420 1480 -225.3379392805973
-1480 1480  6.936513618148165
-2641 1480 -0.45896252925564
-3899 1480  0.001273550519844935
-419 1481 -131.7144595143791
-1481 1481  6.717285078805506
-2643 1481  0.6939032742158369
-3903 1481  0.001470739163878755
-485 1482 -138.2283842007703
-1482 1482  1.562111955011237
-2645 1482  1.045179501603212
-3907 1482  0.003741439421903615
-422 1483 -70.31286063739829
-1483 1483  10.83128966436301
-2647 1483 -0.4690349307253373
-3911 1483  0.002654955896847113
-1023 1484  2.298164777974027
-1484 1484 -1887.480809470994
-2649 1484  0.38440963665635
-3023 1484  0.0008757358038397385
-1024 1485  2.489086508511099
-1485 1485 -1862.905571445791
-2651 1485  0.3504553942057013
-3025 1485  0.0009202119884836104
-1025 1486  1.530982244361096
-1486 1486 -2089.757450012758
-2653 1486  0.2927751267701255
-3027 1486  0.0013857649001816
-1487 1487  2.965391170512979
-1973 1487 -34.60754748070792
-2655 1487 -0.9136535932070854
-3927 1487  0.008688294155203842
-454 1488 -657.2647720227772
-1488 1488  1.739287210125133
-2657 1488  0.1256357268577556
-3931 1488  0.006476974321480879
-422 1489 -58.90809499914865
-1489 1489  5.818503346690632
-2659 1489  0.484327973434509
-3935 1489  0.00558084251145403
-1490 1490  4.145530174201174
-1951 1490 -1148.685319889543
-2661 1490 -0.4158218221210877
-3939 1490  0.0005323635117978055
-426 1491 -55.77574949647519
-1491 1491  12.25543807287879
-2663 1491  0.4759321005851245
-3943 1491  0.002410205635913214
-1028 1492  4.577395664557206
-1492 1492 -1177.236190688291
-2665 1492  0.5137320980598533
-3033 1492  0.0003995351735689209
-432 1493 -81.51090935305866
-1493 1493  7.350593198915067
-2667 1493 -0.3892150799012169
-3951 1493  0.003783807165999799
-1033 1494  4.4348660978868
-1494 1494 -3724.14417902935
-2669 1494  0.1863477812003617
-3043 1494  0.0005240782131166497
-1034 1495  3.248940655118019
-1495 1495 -4064.969791447917
-2671 1495  0.1745842511613437
-3045 1495  0.0006723559311602305
-454 1496 -337.815695541272
-1496 1496  2.093336656375155
-2673 1496 -0.7186221160452759
-3963 1496  0.00171834700633741
-432 1497 -51.28046095625373
-1497 1497  6.867530241475136
-2675 1497  0.5203531411457867
-3967 1497  0.004818312918029584
-432 1498 -62.49294649216096
-1498 1498  10.14178945374154
-2677 1498 -0.1913856351165074
-3971 1498  0.007098702112012079
-1499 1499  4.591717254542565
-1500 1499 -1119.511295530479
-2679 1499  0.3388396466565379
-3975 1499  0.0006074379931324419
-1093 1500  1.989738181911157
-1500 1500 -1470.147478176272
-2681 1500 -0.355429594472465
-3049 1500  0.001049231869003212
-439 1501 -94.94610178907207
-1501 1501  7.935781265781992
-2683 1501  0.3997908532391482
-3983 1501  0.00336915014670341
-439 1502 -120.1015515613789
-1502 1502  4.32511889642304
-2685 1502 -0.3428911238355603
-3987 1502  0.005379152472555147
-460 1503 -1453.267111123336
-1503 1503  2.16132391751468
-2687 1503 -0.3305622720110889
-3991 1503  0.001111460898759239
-1074 1504  3.374113559666903
-1504 1504 -1281.197517233999
-2689 1504 -0.2436393619121192
-3063 1504  0.0009756797068563019
-1505 1505  2.744133762682808
-1514 1505 -1443.098497561728
-2691 1505  0.2453331394023213
-3999 1505  0.001187870243836471
-1045 1506  0.6620369411300263
-1506 1506 -6333.352783592504
-2693 1506  29.39451847005743
-3067 1506  1.434866011484066e-05
-444 1507 -319.687894463376
-1507 1507  5.561366858668526
-2695 1507  0.3918218595384317
-4007 1507  0.0014631639637669
-448 1508 -156.9536240366783
-1508 1508  4.367121803999058
-2697 1508 -0.5208327148502887
-4011 1508  0.002607346686330265
-1049 1509  1.501443481224012
-1509 1509 -1386.841317910654
-2699 1509  0.4093034712580556
-3075 1509  0.001436946898833183
-1050 1510  2.046183757751627
-1510 1510 -1652.528923853896
-2701 1510  0.3139432303888887
-3077 1510  0.001347876650566752
-1051 1511  1.734383100866361
-1511 1511 -1759.776539672839
-2703 1511  0.3128830755303908
-3079 1511  0.001490401786239473
-1052 1512  1.590373655740153
-1512 1512 -1762.230597412185
-2705 1512  0.2781023775271403
-3081 1512  0.001655944388231386
-1053 1513  1.539399172915233
-1513 1513 -1778.766172586407
-2707 1513  0.2761967663045794
-3083 1513  0.001701471392212871
-1048 1514  3.113918499513999
-1514 1514 -1280.586129651733
-2709 1514 -0.2870757766334358
-3143 1514  0.0009387715985215069
-492 1515 -284.5560271729748
-1515 1515  2.227051982652208
-2711 1515 -0.5751371098165369
-4039 1515  0.00228033139040549
-1516 1516  5.497686376367542
-1906 1516 -1214.20785479098
-2713 1516 -0.3164743577683208
-4043 1516  0.0004031283152377141
-1058 1517  5.920323678467011
-1517 1517 -35275.65056794864
-2715 1517  0.0002603335005947787
-3095 1517  0.02510763823362299
-1518 1518  1.872155078461913
-2007 1518 -281.3424527503379
-2717 1518 -0.3821410197975971
-4051 1518  0.004787271394550057
-1062 1519  5.155957958850873
-1519 1519 -15081.1485121053
-2719 1519  0.0005345093331185485
-3101 1519  0.02585059027313626
-1063 1520  7.218811248728353
-1520 1520 -16322.74803150922
-2721 1520  0.0004459490940684492
-3103 1520  0.02398509796041077
-1064 1521  7.075882839979499
-1521 1521 -11661.81866319783
-2723 1521  0.0006404324352507512
-3105 1521  0.02317810654772064
-1065 1522  5.549718155884937
-1522 1522 -12071.07001909268
-2725 1522  0.0006385206646629043
-3107 1522  0.02612393526617514
-1066 1523  8.987131743426813
-1523 1523 -10603.54130606578
-2727 1523  0.000630922008963406
-3109 1523  0.01974036093858521
-1067 1524  7.073656321403489
-1524 1524 -11318.43109210416
-2729 1524  0.0006501703848056444
-3111 1524  0.02351370604953601
-1068 1525  7.497907634820762
-1525 1525 -10195.65103652299
-2731 1525  0.0006559175699886972
-3113 1525  0.02263930530886814
-1526 1526  5.491936501011876
-1851 1526 -10.68753553673826
-2733 1526 -0.8800339580088794
-4083 1526  0.0215005867928751
-469 1527 -1395.812439078742
-1527 1527  2.763887709407143
-2735 1527  0.2411558997368115
-4087 1527  0.001126683613627308
-493 1528 -48.07226215433916
-1528 1528  6.465623332056095
-2737 1528  0.4387937415067642
-4091 1528  0.006891497100827161
-478 1529 -128.0685014752367
-1529 1529  9.920476591021989
-2739 1529 -0.1666873148413968
-4095 1529  0.004306155684158316
-497 1530 -271.5408599549471
-1530 1530  8.239370225433877
-2741 1530  0.3436678702512194
-4099 1530  0.001278988670577771
-1531 1531  3.150909841954586
-1532 1531 -1116.146860423355
-2743 1531 -0.5518900249492503
-4103 1531  0.0005904289455391131
-1071 1532  3.217137788747237
-1532 1532 -1395.069189494207
-2745 1532  0.2515624113404345
-3119 1532  0.0009194716848954582
-472 1533 -239.1182110177501
-1533 1533  5.084689951907726
-2747 1533 -0.8469720785595276
-4111 1533  0.0009052326830714288
-477 1534 -237.9626783531122
-1534 1534  3.945873747631456
-2749 1534 -0.9858356029493499
-4115 1534  0.000992266253014921
-478 1535 -123.7987667072477
-1535 1535  13.82148848700469
-2751 1535  0.1280409753821415
-4119 1535  0.004024252672441474
-478 1536 -78.70049630917389
-1536 1536  4.299742862778459
-2753 1536 -1.835179127668628
-4123 1536  0.001630688272806079
-1080 1537  1.738985765064566
-1537 1537 -2751.002445075206
-2755 1537  0.3864229560206465
-3137 1537  0.000767485316979087
-1081 1538  1.298069093586603
-1538 1538 -2761.360034218368
-2757 1538  0.3750998189126993
-3139 1538  0.0009152151847254224
-479 1539 -77.09008222118388
-1539 1539  3.27848682728167
-2759 1539 -0.516338327826575
-4135 1539  0.005926040074521586
-486 1540 -131.7196097356496
-1540 1540  8.334156253956104
-2761 1540 -0.5658283514265692
-4139 1540  0.001368092087469303
-1087 1541  0.7844661652700029
-1541 1541 -45335.1954409973
-2763 1541  0.3695300341581737
-3151 1541  0.0001242076078107286
-1094 1542  5.664236473525226
-1542 1542 -21344.46684810789
-2765 1542  0.0002870401850520609
-3165 1542  0.03009886865068647
-1095 1543  9.38137758123437
-1543 1543 -18576.65367398521
-2767 1543  0.0003989272310504771
-3167 1543  0.0181917050854742
-1100 1544  0.4209469588586773
-1544 1544 -18425.43685640642
-2279 1544 -0.09268507579991085
-2778 1544  0.0016843275657298
-699 1545 -94.70154811534771
-1545 1545  11.80994163900174
-2281 1545  0.2348156967063924
-3181 1545  0.003728230321768011
-822 1546 -60.9315937244375
-1546 1546  6.13394880971154
-2283 1546  0.2528208987834961
-3185 1546  0.009840549643778531
-1547 1547  7.015232649381273
-2052 1547 -38.42657784269577
-2285 1547 -0.2206266516513799
-3189 1547  0.01601930503333173
-1102 1548  0.4683919690682322
-1548 1548 -19263.39666857488
-2287 1548 -0.06752189118262934
-2782 1548  0.002065324101967616
-1549 1549  6.264117240664985
-1634 1549 -1351.851190781317
-2289 1549 -0.0730286811201015
-3197 1549  0.001795814074916934
-901 1550  5.964960897183323
-1550 1550 -343.1905811392148
-2291 1550 -0.1030731399156117
-2780 1550  0.005292303306486457
-1103 1551  0.4321994354326509
-1551 1551 -17954.06452194431
-2293 1551 -0.09183437174715321
-2786 1551  0.001558801796769806
-746 1552 -71.60785037080608
-1552 1552  7.857253594901273
-2295 1552  0.1604723692757136
-3209 1552  0.009429398379399266
-703 1553 -287.1737937003262
-1553 1553  8.881186521865489
-2297 1553  0.1140677401031672
-3213 1553  0.003074581210011338
-906 1554  0.3202584181695263
-1554 1554 -18984.56010772376
-2299 1554 -0.1253361300551693
-2790 1554  0.001734639619015351
-705 1555 -123.8024931489951
-1555 1555  5.724533092062521
-2301 1555 -0.1765502579767677
-3221 1555  0.007663393581365427
-705 1556 -136.3741855526524
-1556 1556  4.994979193430934
-2303 1556 -0.154677890064579
-3225 1556  0.009368261214996339
-705 1557 -124.0620405337453
-1557 1557  7.594103734758398
-2305 1557  0.1487428464011694
-3229 1557  0.007699130189734118
-908 1558  0.3591198466383587
-1558 1558 -17562.01775281531
-2307 1558 -0.1418061655056151
-2794 1558  0.001451216791368765
-707 1559 -52.95894194050124
-1559 1559  8.661644082850422
-2309 1559 -0.2388406309706918
-3237 1559  0.00920708556558796
-1029 1560  3.613689390608025
-1560 1560 -1970.719501164815
-2311 1560 -0.03178578024713478
-3036 1560  0.004834243485814674
-707 1561 -50.07992223279546
-1561 1561  8.659934351403086
-2313 1561  0.256909813210024
-3245 1561  0.00804661042634007
-707 1562 -72.94981475825553
-1562 1562  7.356150780840059
-2315 1562 -0.1046018218296087
-3249 1562  0.01708826510242087
-910 1563  0.3942430026751412
-1563 1563 -21348.38058440163
-2317 1563 -0.04988201584301125
-2798 1563  0.003093469244700799
-1111 1564  0.4518359008190032
-1564 1564 -18855.823309453
-2319 1564 -0.07276217302329462
-2800 1564  0.001925016235561819
-709 1565 -52.52014968113491
-1565 1565  11.81516875226901
-2321 1565  0.1763884106301378
-3261 1565  0.008371082559818523
-709 1566 -58.44422004546879
-1566 1566  13.00730635191752
-2323 1566 -0.1329182782584699
-3265 1566  0.01023391253227582
-856 1567 -115.1682425790467
-1567 1567  7.134515633503723
-2325 1567  0.1694463566618233
-3269 1567  0.006579407115653415
-713 1568 -403.3712790108879
-1568 1568  8.632966165362591
-2327 1568 -0.08253448028257152
-3273 1568  0.003421623294652123
-712 1569 -407.8543128053418
-1569 1569  7.480402966450393
-2329 1569  0.09175946688769955
-3277 1569  0.00326881651484232
-712 1570 -403.2701195876705
-1570 1570  6.053226700604847
-2331 1570 -0.1391230222463695
-3281 1570  0.002979644794530615
-914 1571  0.3086859033408764
-1571 1571 -18200.17011809114
-2333 1571 -0.1433588781709148
-2806 1571  0.001612416603821292
-1116 1572  0.6377435166524491
-1572 1572 -19050.40909074716
-2335 1572 -0.05626831364793678
-2810 1572  0.002021535306013392
-1117 1573  0.5340480266846045
-1573 1573 -19602.52751313596
-2337 1573 -0.05817082924203715
-2812 1573  0.002219061379516586
-918 1574  0.496336357506843
-1574 1574 -19790.86619941803
-2339 1574 -0.06367574453596507
-2814 1574  0.002213244026450304
-1119 1575  0.6396465178955674
-1575 1575 -17520.9066424914
-2341 1575 -0.0819141354576636
-2816 1575  0.001424435274536764
-1576 1576  9.289251639392099
-2199 1576 -1314.488490707493
-2343 1576 -0.04831816621937788
-3305 1576  0.00190641019701785
-1577 1577  19.59521280325052
-2066 1577 -30.88445273847646
-2345 1577 -0.1760787931148337
-3309 1577  0.01024866648551087
-1121 1578  0.448509772635577
-1578 1578 -19459.2097238253
-2347 1578 -0.06510757825207561
-2820 1578  0.00216159793600779
-1122 1579  0.4020378235142212
-1579 1579 -19563.40178710883
-2349 1579 -0.07348349316451037
-2822 1579  0.002128580341413205
-920 1580  11.49160779515473
-1580 1580 -153.3239932480942
-2351 1580  0.03944668306923331
-2818 1580  0.01517969877264569
-856 1581 -110.9830147806093
-1581 1581  10.94559077853698
-2353 1581  0.1182819046044615
-3325 1581  0.006073374848498163
-1124 1582  0.4664105082637809
-1582 1582 -19072.9824263394
-2355 1582 -0.06404854821625307
-2826 1582  0.002072440732307426
-923 1583  7.204897543934027
-1583 1583 -327.8415125728117
-2357 1583 -0.0885781928998835
-2824 1583  0.005038100808173289
-1584 1584  7.745248535399567
-2271 1584 -303.8666561211577
-2359 1584 -0.07822635220456177
-3337 1584  0.005558785326437868
-926 1585  0.2859810800059127
-1585 1585 -21438.05305311238
-2361 1585 -0.07680746283325847
-2830 1585  0.002818239303591395
-725 1586 -131.6376658199634
-1586 1586  12.35150271313844
-2363 1586  0.05216856069111805
-3345 1586  0.01299345028768497
-725 1587 -101.2983170663943
-1587 1587  10.32704712479219
-2365 1587 -0.1284297479768136
-3349 1587  0.007694156187287882
-928 1588  0.2638545151755176
-1588 1588 -17706.80708692556
-2367 1588 -0.1585763878680793
-2834 1588  0.00171426589027294
-822 1589 -60.74879639706707
-1589 1589  6.747469381550355
-2369 1589  0.2278264743594363
-3357 1589  0.009795864941168062
-727 1590 -340.9877642962214
-1590 1590  6.103161381610501
-2371 1590 -0.1220911632709661
-3361 1590  0.004016334441257254
-930 1591  0.5856260944274582
-1591 1591 -19229.80704563983
-2373 1591 -0.05432974549764544
-2838 1591  0.00214370521255672
-1131 1592  0.602732034666853
-1592 1592 -17250.21196149158
-2375 1592 -0.08909672224847444
-2840 1592  0.001347021732686253
-875 1593 -307.2139916977106
-1593 1593  9.231892983713553
-2377 1593  0.09662035202600507
-3373 1593  0.003605337840406464
-890 1594 -349.4178227246429
-1594 1594  11.53750339202983
-2379 1594 -0.03874066375731782
-3377 1594  0.006022698821091823
-1133 1595  0.3823095244806657
-1595 1595 -18148.71862379513
-2381 1595 -0.1157986402063187
-2844 1595  0.001537593431467799
-932 1596  3.759419384143553
-1596 1596 -350.8749049521504
-2383 1596 -0.1550385259319598
-2842 1596  0.005045580372450799
-1596 1597 -349.525972853892
-1597 1597  5.568673568414256
-2385 1597  0.1097305804557451
-3389 1597  0.004936492740348092
-1135 1598  0.652651776418919
-1598 1598 -17188.33141996008
-2387 1598 -0.07741322426033026
-2848 1598  0.001394539743204105
-1134 1599  0.4585964515198062
-1599 1599 -17221.86948891564
-2389 1599 -0.1286239024388166
-2850 1599  0.001287567811453098
-756 1600 -307.265267842773
-1600 1600  11.40560520752418
-2391 1600  0.07282084580241761
-3401 1600  0.003818472653616001
-781 1601 -256.0588790788852
-1601 1601  10.32999844507644
-2393 1601  0.1699263311417943
-3405 1601  0.002035811224188586
-938 1602  0.3782636212847815
-1602 1602 -20083.61864669313
-2395 1602 -0.07172764858434916
-2854 1602  0.002336372415399436
-737 1603 -262.4471277152124
-1603 1603  5.593819250509369
-2397 1603  0.3050576835026495
-3413 1603  0.00245215307332014
-857 1604 -315.137283850678
-1604 1604  4.846962465184034
-2399 1604  0.2068334057548165
-3417 1604  0.003331066019807603
-739 1605 -298.0864078722183
-1605 1605  4.014843773669729
-2401 1605  0.1495820406715405
-3421 1605  0.004746941745649515
-739 1606 -285.8909834401574
-1606 1606  2.100726724748749
-2403 1606 -0.3612348953316888
-3425 1606  0.004352281673098233
-739 1607 -408.7832335639023
-1607 1607  7.554236674680917
-2405 1607  0.03126449269119468
-3429 1607  0.008862541129188691
-1608 1608  4.625084390822101
-1612 1608 -1375.862662839072
-2407 1608 -0.09511283394769576
-3433 1608  0.001681916425607402
-1609 1609  5.058522467030047
-1784 1609 -80.2193526426998
-2409 1609  0.1765863442776434
-3437 1609  0.01197021890676733
-1182 1610  4.189017705158125
-1610 1610 -1439.569994942649
-2411 1610  0.09444763078437719
-2860 1610  0.001821444699037764
-1610 1611 -1474.926377146568
-1611 1611  3.912969993275015
-2413 1611 -0.1063494845711242
-3445 1611  0.001909042723318199
-1142 1612  5.140739895609729
-1612 1612 -1388.793683741341
-2415 1612  0.08633500225011058
-2862 1612  0.001712405942532441
-744 1613 -252.2885237459685
-1613 1613  7.82585977033871
-2417 1613 -0.1054013687030942
-3453 1613  0.004489168449797369
-1614 1614  7.95121257275057
-1630 1614 -50.47971099857919
-2419 1614 -0.1371328219183342
-3457 1614  0.01633539313650934
-1615 1615  8.593585019339528
-2120 1615 -49.91127100320831
-2421 1615 -0.1696769505008175
-3461 1615  0.01260242986980742
-743 1616 -157.131131008579
-1616 1616  5.351680386731817
-2423 1616 -0.1678465865963971
-3465 1616  0.006681965695475232
-1617 1617  3.775614735741489
-1641 1617 -1402.181171260665
-2425 1617 -0.1021595851058913
-3469 1617  0.002082161476505394
-820 1618 -335.793574863611
-1618 1618  5.083916573183807
-2427 1618  0.1498140723675607
-3473 1618  0.004122707179401047
-1619 1619  2.675607339345396
-1620 1619 -1483.228901833766
-2429 1619  0.09966399264609031
-3477 1619  0.00239611479019984
-945 1620  2.343864161786165
-1620 1620 -1553.767313963687
-2431 1620 -0.1021338181094035
-2868 1620  0.002630837634752338
-1147 1621  1.543087745431399
-1621 1621 -2032.294944522458
-2433 1621  1.027405058684619
-2872 1621  0.0004298383031319623
-1148 1622  1.294093298886285
-1622 1622 -2161.517370916221
-2435 1622  1.035430338244845
-2874 1622  0.0004721276620525438
-946 1623  1.245820405796252
-1623 1623 -1965.171276998922
-2437 1623  1.132573885359342
-2876 1623  0.0005404729617105076
-1150 1624  1.130461556720701
-1624 1624 -2011.325553409659
-2439 1624  1.096989498294941
-2878 1624  0.0005385186595112105
-48 1625 -947.8080133550635
-1625 1625  1.545695419693732
-2441 1625  1.576501168390627
-3501 1625  0.000738108085585141
-746 1626 -80.2043307459878
-1626 1626  6.784715588521387
-2443 1626  0.1398090615522035
-3505 1626  0.01181045238790955
-752 1627 -358.1256606139722
-1627 1627  2.87720735177041
-2445 1627 -0.1463628716658473
-3509 1627  0.006341097449414356
-1628 1628  10.64580025395756
-1633 1628 -946.4993922263754
-2447 1628 -0.05970599140078857
-3513 1628  0.001709881200915327
-779 1629 -410.9259659918831
-1629 1629  6.323546734518151
-2449 1629  0.152339017646971
-3517 1629  0.002501831916591455
-1000 1630  5.851730685056556
-1630 1630 -59.09622972922144
-2451 1630  0.120393801145661
-2884 1630  0.02166473185771623
-1630 1631 -43.69121511045876
-1631 1631  11.33592184861562
-2453 1631  0.1544468567080951
-3525 1631  0.01171187118567339
-1630 1632 -45.26251731254106
-1632 1632  11.50083919055273
-2455 1632  0.1258547872274984
-3529 1632  0.01268609052228339
-1154 1633  3.367757951666376
-1633 1633 -2094.821178932827
-2457 1633  0.04000224600538602
-2886 1633  0.00438402189092121
-1155 1634  4.887036067400858
-1634 1634 -1596.562954523447
-2459 1634  0.06114217742781798
-2888 1634  0.00250281237797008
-1635 1635  3.977636161674676
-2172 1635 -1420.43303749647
-2461 1635  0.09996941509051212
-3541 1635  0.001891043522858185
-875 1636 -310.2635904187793
-1636 1636  7.602458701266929
-2463 1636 -0.1322602591418904
-3545 1636  0.003667040914980837
-1217 1637  5.798807440562412
-1637 1637 -1555.582081433006
-2465 1637  0.04478328549069006
-2892 1637  0.002717922628890469
-1637 1638 -1490.044806601116
-1638 1638  4.15614996725906
-2467 1638 -0.07136785085690645
-3553 1638  0.002499509756808013
-1639 1639  4.081677144998319
-2130 1639 -1428.509188469133
-2469 1639  0.1007116905315015
-3557 1639  0.001847764096351857
-832 1640 -66.88919874628864
-1640 1640  6.902063277362459
-2471 1640  0.1692123750855458
-3561 1640  0.01168187326594283
-959 1641  4.35180763337338
-1641 1641 -1635.880446215101
-2473 1641 -0.05438327802565162
-2896 1641  0.002801715936584132
-857 1642 -216.0334906512935
-1642 1642  4.640556920095166
-2475 1642 -0.6125464046789606
-3569 1642  0.001563851446999221
-858 1643 -330.3011979043866
-1643 1643  4.196712062065106
-2477 1643  0.1882841243361535
-3573 1643  0.003038343659137196
-961 1644  2.551059225694364
-1644 1644 -1582.59139337238
-2479 1644 -0.1795099761298055
-2900 1644  0.001455820779834379
-1644 1645 -927.7605629713167
-1645 1645  3.626585505548464
-2481 1645  0.5692601762921932
-3581 1645  0.0004996836381066346
-962 1646  19.13304546764732
-1646 1646 -56173.00367620957
-2483 1646 -4.528166521929149e-05
-2904 1646  0.03290590405224753
-965 1647  0.7415300425733925
-1647 1647 -1798.962366958954
-2485 1647  0.1584805113003082
-2908 1647  0.006605582704795889
-50 1648 -1602.154579037757
-1648 1648  2.537886768231079
-2487 1648  0.1789331829732354
-3593 1648  0.002293010736883277
-964 1649  1.052700930966815
-1649 1649 -1805.387244919989
-2489 1649  0.2318901716680636
-2912 1649  0.003609759091074866
-52 1650 -1672.560616497262
-1650 1650  1.886875011526777
-2491 1650  0.18507291152769
-3601 1650  0.0027895765195812
-53 1651 -1736.681418547389
-1651 1651  1.004492569171269
-2493 1651  0.2559294137774343
-3605 1651  0.003469423590966108
-54 1652 -1744.38334675991
-1652 1652  0.9675344147546086
-2495 1652  0.2553116682026549
-3609 1652  0.00353096399715718
-55 1653 -1578.337187890964
-1653 1653  1.733088886821635
-2497 1653  0.1850395098356168
-3613 1653  0.002871912243801552
-1654 1654  4.909697213749066
-2117 1654 -14.86291565563162
-2499 1654  0.390540098759081
-3617 1654  0.02950709946407396
-1172 1655  5.489980636102064
-1655 1655 -1618.963465864502
-2501 1655  0.03342586460999813
-2922 1655  0.003062114712856479
-56 1656 -750.1977888434145
-1656 1656  0.8107453118787733
-2503 1656  3.224514182063999
-3625 1656  0.000815905245810513
-57 1657 -454.349588508947
-1657 1657  0.7394238804556282
-2505 1657  3.182000960903725
-3629 1657  0.00148698317973046
-58 1658 -937.8974042315033
-1658 1658  0.5836917319355491
-2507 1658  5.515960845011515
-3633 1658  0.0005479008995914197
-59 1659 -889.0967633151976
-1659 1659  0.7675634623859809
-2509 1659  4.920359358439487
-3637 1659  0.0005016515610365206
-1206 1660  2.338015588056324
-1660 1660 -1547.059702964963
-2511 1660 -0.1327293340165539
-2990 1660  0.001943253195808534
-773 1661 -135.998061396762
-1661 1661  6.066391449410494
-2513 1661  0.04435814071805195
-3645 1661  0.0217854046428508
-1662 1662  6.930754193190324
-1663 1662 -1255.707173660711
-2515 1662 -0.09875202978820967
-3649 1662  0.001220074423809261
-1041 1663  5.126383130506217
-1663 1663 -1468.90019029828
-2517 1663  0.09430040141542814
-2934 1663  0.001670158315822575
-1664 1664  4.929097726466118
-1666 1664 -1293.847311903297
-2519 1664  0.1553884953900216
-3657 1664  0.001026048097861809
-779 1665 -459.6038371669388
-1665 1665  6.562151811292857
-2521 1665  0.1129564356289893
-3661 1665  0.003093765508925175
-1180 1666  1.990410506883116
-1666 1666 -1545.450519903304
-2523 1666 -0.1802199613520399
-2938 1666  0.001873985837347229
-781 1667 -367.6331396885809
-1667 1667  4.866384250937969
-2525 1667  0.1349748586012675
-3669 1667  0.004321862642805954
-781 1668 -298.4479790314356
-1668 1668  5.688689200828974
-2527 1668 -0.224373509793738
-3673 1668  0.002798788342743851
-1011 1669  4.405147664394409
-1669 1669 -1495.777176495769
-2529 1669  0.07787860580799441
-2942 1669  0.002086401274569133
-1218 1670  3.834698064428319
-1670 1670 -1533.793391362161
-2531 1670  0.1290791462085817
-2944 1670  0.001665432063801794
-784 1671 -153.1942449175083
-1671 1671  9.122041227009294
-2533 1671 -0.04844675456049608
-3685 1671  0.01361131500452684
-1184 1672  2.509630416919152
-1672 1672 -1452.412581790726
-2535 1672  0.1745420007093612
-3058 1672  0.00172948834346845
-784 1673 -106.302999456909
-1673 1673  8.245690219381995
-2537 1673 -0.1656201138879724
-3693 1673  0.006559135675281503
-786 1674 -133.2986622760959
-1674 1674  9.351725708391308
-2539 1674 -0.1505442317369579
-3697 1674  0.004500768246033835
-1675 1675  3.241458825572762
-2060 1675 -48.83113539159432
-2541 1675  0.1922551101717189
-3701 1675  0.02663845477624546
-786 1676 -121.2535428786183
-1676 1676  4.813024972990116
-2543 1676 -0.1599443116995181
-3705 1676  0.009211625796665643
-1274 1677  2.274731948814601
-1677 1677 -1567.10699891207
-2545 1677 -0.1283198998011498
-3126 1677  0.002286487780322626
-786 1678 -126.2373785289777
-1678 1678  5.000900261527375
-2547 1678 -0.1292478872971707
-3713 1678  0.00992004440023208
-60 1679 -457.8930633249275
-1679 1679  1.564794215233169
-2549 1679  5.802495030097443
-3717 1679  0.0003644921698466457
-987 1680  0.9886929177097209
-1680 1680 -2119.625191287763
-2551 1680  4.348249404183824
-2956 1680  0.0001662894242141198
-1190 1681  0.8832624199164085
-1681 1681 -2232.468832444119
-2553 1681  4.722524623428282
-2958 1681  0.0001583237854652538
-787 1682 -177.6359650918144
-1682 1682  5.491941738533162
-2555 1682  0.1268899830601398
-3729 1682  0.007684351982983249
-886 1683 -108.0991094828331
-1683 1683  5.548711987030634
-2557 1683  0.1907246660453396
-3733 1683  0.007612917736247711
-991 1684  2.380705119026724
-1684 1684 -1488.996464674805
-2559 1684  0.3584223063522697
-2962 1684  0.001386532732486353
-791 1685 -119.0962751518446
-1685 1685  9.011775441080573
-2561 1685  0.1990042899232015
-3741 1685  0.004602035072414893
-820 1686 -356.9125375460848
-1686 1686  6.32479519279416
-2563 1686  0.09363089415499334
-3745 1686  0.004695388864328182
-794 1687 -303.9341121893127
-1687 1687  7.139596631140285
-2565 1687  0.1130336965210424
-3749 1687  0.004028977172689097
-794 1688 -301.6071311890342
-1688 1688  6.252292571035978
-2567 1688 -0.1225474573200583
-3753 1688  0.003902058440420307
-1195 1689  4.240340181312149
-1689 1689 -1507.145374398769
-2569 1689  0.07753429754535195
-2968 1689  0.002196344218812255
-997 1690  3.947328445532761
-1690 1690 -1443.035879728521
-2571 1690 -0.1497184219294691
-2972 1690  0.001263791063289122
-1196 1691  3.144054024261908
-1691 1691 -2094.452122116551
-2573 1691 -0.1301712404853408
-2970 1691  0.001276910015239077
-1690 1692 -1438.401804291923
-1692 1692  3.243567001900533
-2575 1692  0.1537460316411932
-3769 1692  0.001592466556747902
-798 1693 -68.68887955759087
-1693 1693  10.48923930011014
-2577 1693  0.08890596217980609
-3773 1693  0.01402979964901917
-798 1694 -65.35317307855226
-1694 1694  7.717735001723169
-2579 1694 -0.1346845367963926
-3777 1694  0.01275460432334989
-798 1695 -49.99682031538818
-1695 1695  13.44241674595665
-2581 1695 -0.1618338407629296
-3781 1695  0.007373498838497469
-1208 1696  2.085017424193
-1696 1696 -1526.042604103454
-2583 1696  0.1654175795499135
-2994 1696  0.002053808266667994
-999 1697  11.29589550667638
-1697 1697 -102.5526269566786
-2585 1697 -0.01614866339436597
-2976 1697  0.05182020660590447
-1697 1698 -52.11952350302194
-1698 1698  6.796790398187187
-2587 1698 -0.200025621410963
-3793 1698  0.01305142121535185
-801 1699 -106.6011780216583
-1699 1699  14.67920176970155
-2589 1699 -0.08653414206433409
-3797 1699  0.006862647688623929
-1202 1700  16.92007433822707
-1700 1700 -43002.51911591569
-2591 1700 -4.690062449741391e-05
-2982 1700  0.04089952789786202
-1203 1701  20.4618479887596
-1701 1701 -64484.86655730321
-2593 1701 -3.429273113850615e-05
-2984 1701  0.03197455500486216
-1702 1702  10.57287611312878
-2120 1702 -51.84652767986045
-2595 1702  0.10597916780677
-3809 1702  0.01461555143963207
-801 1703 -118.3393931588882
-1703 1703  9.881649534957022
-2597 1703  0.07211115026646071
-3813 1703  0.0109523653049153
-1004 1704  9.357580306279953
-1704 1704 -2616.969987642703
-2599 1704 -0.003326018368451452
-3034 1704  0.01271446421827436
-805 1705 -297.709055530353
-1705 1705  5.268104839160443
-2601 1705 -0.1685104373219622
-3821 1705  0.003818712751264618
-805 1706 -331.8369949896509
-1706 1706  4.345881026636466
-2603 1706 -0.1519770413677799
-3825 1706  0.004809088667099274
-890 1707 -341.395838786864
-1707 1707  4.577246699593937
-2605 1707 -0.1163894569624065
-3829 1707  0.005752066822765839
-1660 1708 -1131.21152477239
-1708 1708  4.778575385576687
-2607 1708  0.1612480981858468
-3833 1708  0.001036778478523589
-884 1709 -430.325567156862
-1709 1709  7.935790725976097
-2609 1709  0.04867805021410753
-3837 1709  0.006393311962360625
-1696 1710 -1387.470519821244
-1710 1710  2.977783914089383
-2611 1710 -0.1538546845104868
-3841 1710  0.001700127961389886
-1711 1711  4.924367513602002
-1771 1711 -33.99171600412565
-2613 1711  0.2935533450234599
-3845 1711  0.01766548052892358
-809 1712 -1227.52274774633
-1712 1712  3.894053238117803
-2615 1712  0.1612523570575912
-3849 1712  0.001195085651160139
-1713 1713  10.93806979401289
-1717 1713 -137.0837482046144
-2617 1713 -0.1681399935275716
-3853 1713  0.00399620272929112
-1212 1714  7.512573460654014
-1714 1714 -14912.84496887192
-2619 1714 -0.000263700452219623
-3002 1714  0.04341619045972051
-1213 1715  21.23566212034473
-1715 1715 -51511.86319008204
-2621 1715 -5.586646078540747e-05
-3004 1715  0.0235612852282902
-1010 1716  3.891832175542285
-1716 1716 -77.01686009671205
-2623 1716  0.3998996708507609
-3030 1716  0.007683218528146999
-1090 1717  5.273747361955984
-1717 1717 -119.1218935338928
-2625 1717  0.2129755068839117
-3000 1717  0.007333811374796757
-815 1718 -120.7045050005255
-1718 1718  8.193646066739516
-2627 1718 -0.2586943980063583
-3873 1718  0.003823529587678011
-1719 1719  8.909361307278845
-2231 1719 -23.57121050397152
-2629 1719  0.2954837404412515
-3877 1719  0.01215346638180655
-815 1720 -80.20121388753263
-1720 1720  5.498628886812048
-2631 1720  0.7532194114725147
-3881 1720  0.003019762922634524
-815 1721 -113.9874410285411
-1721 1721  8.557143439995434
-2633 1721  0.1587647828723089
-3885 1721  0.006124281162912542
-1560 1722 -1307.863076710002
-1722 1722  3.052321424376364
-2635 1722 -0.1350370344901572
-3889 1722  0.002155724633158465
-816 1723 -296.6587145617269
-1723 1723  6.165958730224928
-2637 1723 -0.1403809995916198
-3893 1723  0.003704455381418496
-816 1724 -347.8089509424088
-1724 1724  4.264776865800714
-2639 1724 -0.1358345190847692
-3897 1724  0.004996325035351742
-820 1725 -253.3858185062365
-1725 1725  7.7760742573487
-2641 1725 -0.1590047570659309
-3901 1725  0.003024383794303084
-1726 1726  12.723623314396
-2215 1726 -145.4276641880034
-2643 1726  0.09862866268575531
-3905 1726  0.005056420924296964
-1019 1727  2.666152965933335
-1727 1727 -166.2264057254376
-2645 1727  0.1490326692808954
-3148 1727  0.0128552824245083
-822 1728 -88.35690904464344
-1728 1728  5.690920734130215
-2647 1728 -0.3044710922257314
-3913 1728  0.006968895908726415
-64 1729 -1808.976355162421
-1729 1729  4.213041098699232
-2649 1729  0.1464469844197936
-3917 1729  0.001538440199813304
-65 1730 -1529.723973180399
-1730 1730  5.357329400717711
-2651 1730  0.1984683173208769
-3921 1730  0.001058160006160973
-1225 1731  1.891529659129155
-1731 1731 -1878.631462550398
-2653 1731  0.2847703740543556
-3028 1731  0.001541280633195248
-1021 1732  5.191284902739278
-1732 1732 -56.09349534828303
-2655 1732  0.0741326869470682
-3020 1732  0.03740648547010496
-854 1733 -467.2993939817052
-1733 1733  1.735401045504999
-2657 1733 -0.1905076856767562
-3933 1733  0.006324039794552517
-822 1734 -57.28389550104841
-1734 1734  7.227063466987659
-2659 1734  0.2554443072620345
-3937 1734  0.008772231202107173
-1735 1735  4.620373825612975
-2196 1735 -1284.461857125257
-2661 1735 -0.1627178019259907
-3941 1735  0.001118269735498948
-1716 1736 -53.24994360811269
-1736 1736  17.58751732208018
-2663 1736 -0.2175541085968585
-3945 1736  0.003882904905316165
-1704 1737 -1083.556762405448
-1737 1737  9.983701017863959
-2665 1737 -0.04862359530866486
-3949 1737  0.002170256613187626
-832 1738 -84.06953056548426
-1738 1738  7.088529923190809
-2667 1738 -0.2649587548673778
-3953 1738  0.005798880790520519
-67 1739 -2254.257229613936
-1739 1739  8.333234328742472
-2669 1739  0.197228464133999
-3957 1739  0.0005182838160652974
-1031 1740  4.516437728111675
-1740 1740 -2504.604832410719
-2671 1740  0.2340215743243752
-3046 1740  0.000719966335902969
-854 1741 -582.1294820043491
-1741 1741  4.100933333302652
-2673 1741 -0.0370584339888418
-3965 1741  0.009856054741935179
-832 1742 -59.09372182667386
-1742 1742  9.249242572554918
-2675 1742  0.1751900754402029
-3969 1742  0.009218139257277373
-832 1743 -44.87012523624564
-1743 1743  8.900323357107847
-2677 1743  0.4235524066745655
-3973 1743  0.005272309892624279
-1744 1744  6.82470037511901
-1745 1744 -1236.100717223117
-2679 1744  0.09646788709832067
-3977 1744  0.001296557446196982
-1036 1745  2.651304124674491
-1745 1745 -1660.641795945935
-2681 1745 -0.1069501833931525
-3050 1745  0.002343908477136452
-839 1746 -109.9588006974711
-1746 1746  9.964308531455309
-2683 1746  0.1625927915066934
-3985 1746  0.005687775843262853
-839 1747 -129.8071459044353
-1747 1747  5.008038927801397
-2685 1747 -0.1874409581057452
-3989 1747  0.007909157348944779
-1060 1748  2.810591760096432
-1748 1748 -1629.841174651477
-2687 1748  0.1104993737469106
-3098 1748  0.00230008344184941
-1749 1749  4.254422143024387
-2240 1749 -1380.279131732715
-2689 1749 -0.09766669385553386
-3997 1749  0.001805784149122609
-1043 1750  3.559130344004886
-1750 1750 -1565.415007126418
-2691 1750  0.09294349640346077
-3144 1750  0.002239602047160964
-1044 1751  17.73869895099792
-1751 1751 -43193.72331192907
-2693 1751 -4.739757669780065e-05
-3068 1751  0.03880525118438068
-844 1752 -323.4191825769936
-1752 1752  8.121377855621056
-2695 1752  0.1137497534022037
-4009 1752  0.003469224482167516
-848 1753 -182.1552338439317
-1753 1753  4.6918587188925
-2697 1753 -0.1620484550495435
-4013 1753  0.007071926854626404
-1249 1754  2.952431724529157
-1754 1754 -1021.427694239394
-2699 1754  0.1779337399408347
-3076 1754  0.002739570112367415
-70 1755 -1356.306668125053
-1755 1755  4.080245895776766
-2701 1755  0.1982328660095435
-4021 1755  0.001508473070417131
-71 1756 -1550.501925752894
-1756 1756  2.833991262156498
-2703 1756  0.1844874946189381
-4025 1756  0.002078431308567534
-72 1757 -1461.513220882238
-1757 1757  2.230216158081471
-2705 1757  0.244104057346213
-4029 1757  0.001945389714289203
-73 1758 -1474.31463330485
-1758 1758  2.11378126554774
-2707 1758  0.2488990680550784
-4033 1758  0.001993552565265008
-1750 1759 -1396.852787898307
-1759 1759  3.896615576766267
-2709 1759 -0.1113819036710585
-4037 1759  0.001789692624572141
-892 1760 -339.9363785417463
-1760 1760  3.27917553517219
-2711 1760 -0.1169152573076894
-4041 1760  0.00646096026953419
-1761 1761  6.138820693139372
-2151 1761 -1356.93253093293
-2713 1761 -0.07698570639890857
-4045 1761  0.001399661724098957
-74 1762 -922.2752719137213
-1762 1762  0.9579993383387213
-2715 1762  11.14739714497946
-4049 1762  0.000219590190439067
-858 1763 -471.7651348192199
-1763 1763  2.024927669840395
-2717 1763  0.1554320934452434
-4053 1763  0.006188906365168377
-1262 1764  0.6800635049014432
-1764 1764 -2477.242565758394
-2719 1764  5.741407462044037
-3102 1764  0.0001434129825704319
-76 1765 -695.4491731006095
-1765 1765  1.226331625859217
-2721 1765  9.163761430829846
-4061 1765  0.0002439790980817506
-1061 1766  0.9860199056421528
-1766 1766 -1877.352469934645
-2723 1766  5.767059286213642
-3106 1766  0.0001490905118305186
-1265 1767  0.7641384780188016
-1767 1767 -2058.913294513919
-2725 1767  5.650459500989021
-3108 1767  0.0001621542960211256
-1266 1768  1.257569328186931
-1768 1768 -1669.969527065347
-2727 1768  5.720222066080108
-3110 1768  0.0001287645800605308
-1267 1769  0.9855846822133956
-1769 1769 -1822.079812004944
-2729 1769  5.855434617171491
-3112 1769  0.0001512851197436847
-1268 1770  1.04489898841882
-1770 1770 -1642.48231480523
-2731 1770  5.906181210680177
-3114 1770  0.0001456087124288582
-1077 1771  5.446758579372629
-1771 1771 -39.8313288230236
-2733 1771  0.1679955867460642
-3100 1771  0.02524756024454677
-1088 1772  3.366289701753765
-1772 1772 -1453.855216625808
-2735 1772  0.1201547163932482
-3116 1772  0.00179187485188899
-893 1773 -88.07077665502878
-1773 1773  7.023657380150102
-2737 1773  0.172735205835799
-4093 1773  0.008159159437463653
-1774 1774  11.88363245336253
-1781 1774 -92.26802542553942
-2739 1774  0.1310711442192151
-4097 1774  0.006721269101813483
-1775 1775  13.6661282064412
-2271 1775 -270.4226421310651
-2741 1775  0.0630421594254459
-4101 1775  0.004367590347772836
-1072 1776  5.834179421399259
-1776 1776 -1704.53913758438
-2743 1776  0.0368398814399567
-3120 1776  0.003118907177103055
-1776 1777 -1328.68635440853
-1777 1777  4.335231192834105
-2745 1777 -0.09721273839226541
-4109 1777  0.001889101762288917
-872 1778 -324.8318137854403
-1778 1778  9.883711222736482
-2747 1778  0.04524248037789224
-4113 1778  0.006652423644643841
-877 1779 -374.8764564904459
-1779 1779  6.606303903934953
-2749 1779 -0.05844088532524816
-4117 1779  0.006514619238101249
-1780 1780  14.27009956285187
-1781 1780 -72.56570109333919
-2751 1780 -0.2192447310494288
-4121 1780  0.004157757492589407
-1078 1781  14.02807711051003
-1781 1781 -212.1907537540941
-2753 1781  0.0093803456490435
-3134 1781  0.03564615373321222
-82 1782 -1444.925587559849
-1782 1782  4.051300083922375
-2755 1782  0.1696359690794401
-4129 1782  0.001764012109856772
-1281 1783  2.304828423526273
-1783 1783 -1698.599903687457
-2757 1783  0.2154672694474411
-3140 1783  0.001804901242834977
-1092 1784  4.440740416984238
-1784 1784 -84.27337155944021
-2759 1784  0.1563923263725714
-3136 1784  0.01332048699292719
-886 1785 -146.8985247574478
-1785 1785  9.599198921865771
-2761 1785 -0.1535827179703731
-4141 1785  0.004169233330436866
-1085 1786  4.182327502644779
-1786 1786 -2504.997784199022
-2763 1786  0.1905019068065086
-3152 1786  0.001079634196703565
-85 1787 -647.7168499720574
-1787 1787  0.8386055878177558
-2765 1787  14.17811209757292
-4149 1787  0.0002113191084465661
-86 1788 -647.2932388607724
-1788 1788  1.713315824453342
-2767 1788  8.081208632035654
-4153 1788  0.0002184583605285697
-299 1789 -101.6692445633731
-1789 1789  1.077927120069085
-2280 1789  0.4638345268861317
-3176 1789  0.01518846527205823
-899 1790  6.707319778353869
-1790 1790 -1400.763427944943
-2282 1790 -0.1171398876204244
-3011 1790  0.001000392465764426
-299 1791 -121.900978849199
-1791 1791  4.398381781048775
-2284 1791 -0.4304312753963275
-3184 1791  0.004534804164295149
-299 1792 -122.5206702801755
-1792 1792  4.228609907569939
-2286 1792  0.4225606468582997
-3188 1792  0.004532850904955333
-301 1793 -301.3545137610665
-1793 1793  0.9388524004746962
-2288 1793  0.2943113306588002
-3192 1793  0.01039556111159428
-301 1794 -265.5995294516822
-1794 1794  6.290838699682876
-2290 1794  0.3743951155653602
-3196 1794  0.001654738067973231
-1795 1795  2.95180477919497
-1842 1795 -1328.059271078283
-2292 1795  0.3184437332485786
-3200 1795  0.000922492312879772
-303 1796 -275.381526703475
-1796 1796  0.8655873783036971
-2294 1796  0.3991520721206581
-3204 1796  0.007819372791836317
-303 1797 -305.9428524547798
-1797 1797  4.357695569147362
-2296 1797  0.3448062169581231
-3208 1797  0.00198409374825764
-362 1798 -273.8571240789814
-1798 1798  6.271467987853487
-2298 1798  0.4163534406274162
-3212 1798  0.001180703517808303
-305 1799 -103.6735421877064
-1799 1799  0.8258525634872588
-2300 1799  0.6239097919698932
-3216 1799  0.01588587386834779
-384 1800 -118.4060011955383
-1800 1800  4.852275384021054
-2302 1800  0.3317934535905247
-3220 1800  0.005036189165657313
-399 1801 -60.96288613393003
-1801 1801  4.438993814771638
-2304 1801  0.5267241324446033
-3224 1801  0.006660153124828426
-1802 1802  3.183226568638103
-1991 1802 -1401.226384116477
-2306 1802 -0.2880976473424125
-3228 1802  0.0009674036423805762
-307 1803 -45.61538669592837
-1803 1803  1.118284064744516
-2308 1803  0.7765432181564137
-3232 1803  0.02086064684629033
-422 1804 -55.21857342172993
-1804 1804  6.944610352658391
-2310 1804  0.4752231410863287
-3236 1804  0.004881382654547464
-307 1805 -64.6965009865016
-1805 1805  4.506874015672992
-2312 1805  0.4485203675871874
-3240 1805  0.007422250586746427
-439 1806 -130.8728852516794
-1806 1806  7.772528439443829
-2314 1806 -0.1377473445162312
-3244 1806  0.006367261316488591
-1807 1807  5.717200439130998
-1992 1807 -36.18476976809642
-2316 1807  0.5064852131538
-3248 1807  0.00872545082924399
-309 1808 -69.64168157022513
-1808 1808  1.17048730065587
-2318 1808  0.2571869189216621
-3252 1808  0.03972025078180432
-309 1809 -55.76240657485057
-1809 1809  1.356706280601356
-2320 1809  0.3916020995201141
-3256 1809  0.02547611861651027
-313 1810 -278.7531515404145
-1810 1810  5.935651367845557
-2322 1810  0.4045567407557998
-3260 1810  0.001403055745082602
-391 1811 -116.01415125886
-1811 1811  7.866093118131661
-2324 1811 -0.3322785058448103
-3264 1811  0.003171622775683676
-309 1812 -57.37217754133817
-1812 1812  6.921218030938292
-2326 1812  0.4366009542545615
-3268 1812  0.00554786920226437
-312 1813 -242.9398363399226
-1813 1813  6.697001214688135
-2328 1813  0.5892891508115687
-3272 1813  0.0008757042896855123
-419 1814 -116.1043230776163
-1814 1814  5.937171454568121
-2330 1814  1.031765062393027
-3276 1814  0.001190245120571321
-912 1815  7.525178403866417
-1815 1815 -31.29687216661689
-2332 1815 -0.5368637578512274
-3071 1815  0.006295122976212335
-313 1816 -284.6866218388363
-1816 1816  0.6191405042986501
-2334 1816  0.6167314111295573
-3284 1816  0.008114173195193898
-1817 1817  2.179761213796664
-1821 1817 -36.67269099216028
-2336 1817  0.3310701127831404
-3288 1817  0.03619016545942499
-1818 1818  1.823278915801148
-1821 1818 -38.41647162556927
-2338 1818  0.337094746642738
-3292 1818  0.03971389620619416
-1819 1819  1.698731768620023
-1821 1819 -38.48702554204165
-2340 1819  0.3720598064715243
-3296 1819  0.03986167163264697
-1820 1820  2.226759967985491
-1821 1820 -31.51609232707513
-2342 1820  0.4803517580962079
-3300 1820  0.02676427502414026
-1007 1821  13.4005488845067
-1821 1821 -28.105045711462
-2344 1821 -0.6484741851329106
-2807 1821  0.004359672791846426
-1822 1822  6.452286730756979
-1915 1822 -1148.338107851003
-2346 1822 -0.2062979542711787
-3308 1822  0.0007307954101522737
-320 1823 -110.9788010398832
-1823 1823  1.158013025593776
-2348 1823  0.3231881679107107
-3312 1823  0.01998853005907888
-320 1824 -110.6632721880846
-1824 1824  1.041618026710652
-2350 1824  0.3650963193282887
-3316 1824  0.0198798996402246
-325 1825 -106.5673296823539
-1825 1825  6.785053675814972
-2352 1825  0.3279397104479789
-3320 1825  0.004000250473230064
-320 1826 -110.4746806445262
-1826 1826  9.324470431849685
-2354 1826 -0.2372175650649101
-3324 1826  0.004051596317296923
-323 1827 -312.1352460882863
-1827 1827  0.9237967396282526
-2356 1827  0.2708036975786697
-3328 1827  0.01015271408301956
-362 1828 -306.1707000880867
-1828 1828  4.522013621012862
-2358 1828  0.4831988452769137
-3332 1828  0.001468002769513598
-323 1829 -268.005427632543
-1829 1829  4.819142815554258
-2360 1829  0.5254114800415531
-3336 1829  0.001544038947918175
-325 1830 -123.2291839880428
-1830 1830  0.7359627483813387
-2362 1830  0.370280352148334
-3340 1830  0.02615827126484596
-391 1831 -120.2021649979784
-1831 1831  7.302330267651148
-2364 1831  0.353019920768631
-3344 1831  0.003401814250274597
-456 1832 -116.1443434772085
-1832 1832  8.759577361623631
-2366 1832  0.1877146534020675
-3348 1832  0.005060615847359536
-327 1833 -258.9487419495387
-1833 1833  0.5348328095576971
-2368 1833  0.6903575064460875
-3352 1833  0.008837253187175041
-327 1834 -329.9404840258471
-1834 1834  3.588470025723119
-2370 1834 -0.3995006872704646
-3356 1834  0.00221861210371068
-1038 1835  3.212877484813512
-1835 1835 -1297.830937438026
-2372 1835  0.3335609329182144
-3053 1835  0.0007954486497328819
-329 1836 -125.0209566151239
-1836 1836  1.479621537099887
-2374 1836  0.2592958859943023
-3364 1836  0.01872639812172585
-329 1837 -101.4697467138049
-1837 1837  1.552787641592646
-2376 1837  0.4377070227596882
-3368 1837  0.01237589418369946
-329 1838 -105.8065777979689
-1838 1838  8.326168299313496
-2378 1838 -0.4326204283218003
-3372 1838  0.002751685875050906
-329 1839 -96.71494936649103
-1839 1839  8.096580967059195
-2380 1839  0.563121317845319
-3376 1839  0.002293964321536093
-332 1840 -264.275729646232
-1840 1840  0.773323986147318
-2382 1840  0.5036002788122459
-3380 1840  0.007888933983503288
-352 1841 -293.066322787933
-1841 1841  2.894015425891116
-2384 1841  0.4374740476554567
-3384 1841  0.002464202900146512
-1035 1842  2.822747401879994
-1842 1842 -1353.938445456118
-2386 1842 -0.3222408774211639
-3047 1842  0.0009592215405761291
-1843 1843  1.638248669248236
-1846 1843 -108.8523166266819
-2388 1843  0.3799777711016727
-3392 1843  0.0119602940545533
-1844 1844  1.176739949016042
-1846 1844 -103.7126993255025
-2390 1844  0.6357091254305228
-3396 1844  0.01166488712395074
-1845 1845  8.942328533639676
-1846 1845 -100.1145963710793
-2392 1845 -0.5635229915555421
-3400 1845  0.002077512515280729
-981 1846  11.26655140607853
-1846 1846 -110.8016182928714
-2394 1846 -0.3035513417999643
-2845 1846  0.002543904703657913
-337 1847 -274.8553042800676
-1847 1847  0.7825661744590023
-2396 1847  0.3212719394188062
-3408 1847  0.0126789154159025
-983 1848  5.057824214743202
-1848 1848 -1620.739611914232
-2398 1848  0.07033500174578537
-2943 1848  0.002024630564729973
-337 1849 -307.7484892352572
-1849 1849  4.79846785897801
-2400 1849 -0.2245587755506762
-3416 1849  0.003255750687821012
-398 1850 -64.84305238357298
-1850 1850  4.580602587418016
-2402 1850  0.4379133264508283
-3420 1850  0.006110746205119582
-939 1851  5.52794280827098
-1851 1851 -16.10476514667766
-2404 1851  0.2457031220130788
-3099 1851  0.04707760781803985
-478 1852 -101.7537554994466
-1852 1852  4.837100171165655
-2406 1852  0.6575968965590965
-3428 1852  0.002719985014812242
-1853 1853  3.694102619211036
-1854 1853 -1242.454717536272
-2408 1853  0.2292832397716183
-3432 1853  0.0009203120285360352
-1079 1854  1.970970231250026
-1854 1854 -1455.098749317579
-2410 1854 -0.2946263988269318
-2857 1854  0.001267355105049699
-982 1855  3.040085219142655
-1855 1855 -1269.238362620525
-2412 1855  0.3674899949834071
-2941 1855  0.0007455918750581245
-1856 1856  2.918144951839834
-1965 1856 -1119.776904764913
-2414 1856 -0.358149304314646
-3444 1856  0.001003644851414047
-498 1857 -1280.777882156608
-1857 1857  4.123483094099871
-2416 1857 -0.1981905656779542
-3448 1857  0.0009965003960475049
-343 1858 -65.91250277346103
-1858 1858  7.866104370676174
-2418 1858  0.5863532491442011
-3452 1858  0.002663675528426596
-343 1859 -151.3811068800854
-1859 1859  3.779911922330594
-2420 1859  0.676872928952266
-3456 1859  0.002335038558192694
-343 1860 -159.9708456309545
-1860 1860  4.575171516303893
-2422 1860  0.4926836001045419
-3460 1860  0.002498149276821813
-485 1861 -127.9110281261581
-1861 1861  3.924567541092951
-2424 1861  0.6936422211413747
-3464 1861  0.002362476830133221
-344 1862 -315.9024050162253
-1862 1862  3.826254096728573
-2426 1862  0.4298291519255887
-3468 1862  0.001929749558429306
-344 1863 -358.782009005071
-1863 1863  4.096977356352023
-2428 1863 -0.2929670419078287
-3472 1863  0.002463831602328363
-352 1864 -313.6568640390976
-1864 1864  2.876369892472561
-2430 1864 -0.3325105683429021
-3476 1864  0.002779642002173852
-1409 1865 -13.66247944369221
-1865 1865  5.527150768491762
-2432 1865  0.5012808683324748
-3480 1865  0.01952363564111144
-1307 1866 -15.5948834700682
-1866 1866  17.26249952801965
-2434 1866 -0.01937948406660046
-3484 1866  0.1626429678579705
-1307 1867 -16.24247111042913
-1867 1867  14.44634424441149
-2436 1867 -0.0200281710755901
-3488 1867  0.1789169121997822
-1307 1868 -14.7892626321223
-1868 1868  14.04786405689022
-2438 1868 -0.02148219787072791
-3492 1868  0.2073379396796323
-1307 1869 -14.74604734110974
-1869 1869  12.6670479033574
-2440 1869 -0.02126487268390402
-3496 1869  0.2061419754730205
-1307 1870 -21.53155213685957
-1870 1870  15.2850840004798
-2442 1870 -0.01515825745898139
-3500 1870  0.2084508119925247
-372 1871 -1338.557384720627
-1871 1871  2.438324611105513
-2444 1871 -0.3072494006703159
-3504 1871  0.0009819936272868124
-1409 1872 -13.86164060705798
-1872 1872  4.793989873863781
-2446 1872  0.6025736075654631
-3508 1872  0.02023566694527784
-1873 1873  17.87284749444791
-1874 1873 -18.01825116204381
-2448 1873  0.4607487274148538
-3512 1873  0.004905574726885498
-979 1874  8.815510423999974
-1874 1874 -44.51367803683263
-2450 1874  0.4601615553751069
-2883 1874  0.004785941363708135
-1370 1875 -59.31030352171324
-1875 1875  3.435120286019725
-2452 1875  0.7898807291279358
-3520 1875  0.005456064724980531
-410 1876 -95.11637636344641
-1876 1876  6.227801375022604
-2454 1876 -0.5448550035608645
-3524 1876  0.002972034323586343
-426 1877 -62.65563298356832
-1877 1877  6.205734509115272
-2456 1877 -0.7112006565720582
-3528 1877  0.002963069916509047
-401 1878 -137.220649291519
-1878 1878  3.594156286348642
-2458 1878 -0.4309015624391707
-3532 1878  0.00470779793137561
-1397 1879 -241.1777446051989
-1879 1879  4.740940585753161
-2460 1879 -0.421297996296329
-3536 1879  0.002213774594729203
-356 1880 -302.0373081441935
-1880 1880  4.013858921947923
-2462 1880 -0.4334843149085262
-3540 1880  0.001967847060143347
-356 1881 -339.884593358769
-1881 1881  6.511901335193489
-2464 1881  0.2019684391055969
-3544 1881  0.002467645928209655
-1790 1882 -1136.647709911751
-1882 1882  3.269308251777509
-2466 1882 -0.4705159048444391
-3548 1882  0.0006604176253755498
-1037 1883  2.703936131935484
-1883 1883 -1335.596838688865
-2468 1883  0.3712099084130325
-3051 1883  0.0008008235108888716
-1444 1884 -1299.363195504282
-1884 1884  3.155612325232486
-2470 1884 -0.2660353447014131
-3556 1884  0.0009981865922368972
-958 1885  2.546878710023711
-1885 1885 -1386.412367526018
-2472 1885 -0.2897782680634379
-2893 1885  0.001090105230124902
-401 1886 -119.2628629587167
-1886 1886  4.914858596122737
-2474 1886 -0.4468471629984179
-3564 1886  0.003558794753835321
-960 1887  6.209091064066214
-1887 1887 -1942.580602090081
-2476 1887  0.02303438519199841
-2897 1887  0.003573660032862564
-1887 1888 -1149.608351572851
-1888 1888  2.887296553688009
-2478 1888 -0.2437019726733171
-3572 1888  0.00126070155494538
-1409 1889 -14.63977712587374
-1889 1889  8.019342926689088
-2480 1889  0.2926855116749797
-3576 1889  0.0224029250963791
-1082 1890  8.030319348297542
-1890 1890 -2057.918514698858
-2482 1890 -0.01579633712996128
-3141 1890  0.003786126107846711
-362 1891 -241.4737700878197
-1891 1891  4.977760599682532
-2484 1891 -0.8149863628841933
-3584 1891  0.001230938538038181
-1409 1892 -4.158339449315758
-1892 1892  2.317651149747798
-2486 1892 -0.7256220588031617
-3588 1892  0.09879642602006852
-1409 1893 -6.321546398056254
-1893 1893  7.14894107859813
-2488 1893 -0.4976321718455345
-3592 1893  0.03844282691513459
-1409 1894 -7.804363263475375
-1894 1894  3.383146747526674
-2490 1894 -0.4961149924439166
-3596 1894  0.06306776508636516
-1409 1895 -6.58912812924158
-1895 1895  5.594076654825956
-2492 1895 -0.510067162772699
-3600 1895  0.04479802402474396
-1409 1896 -7.649837605944053
-1896 1896  3.328116887610061
-2494 1896 -0.4801640088667681
-3604 1896  0.06536987024666262
-1409 1897 -7.69701995245717
-1897 1897  3.214678878395649
-2496 1897 -0.4783057895236895
-3608 1897  0.06617898539405291
-1409 1898 -6.086553600399663
-1898 1898  5.286814915057556
-2498 1898 -0.5073755737823042
-3612 1898  0.04501137747996262
-456 1899 -157.9270743656402
-1899 1899  3.060833158311745
-2500 1899  0.2195673688647587
-3616 1899  0.009378728642209877
-1900 1900  3.124616488720384
-2017 1900 -1149.893986737135
-2502 1900 -0.350098224088436
-3620 1900  0.0007370125534000677
-1901 1901  11.46538487377347
-1903 1901 -25.58784425527002
-2504 1901  0.004528716666476341
-3624 1901  0.6020929703070654
-1902 1902  11.61445266776299
-1903 1902 -11.88075702335064
-2506 1902  0.003858840238686249
-3628 1902  1.401132294277499
-973 1903  7.931141454227703
-1903 1903 -54.03709490860278
-2508 1903  0.005403119658024451
-2923 1903  0.3758028597659226
-1903 1904 -49.0029739474849
-1904 1904  10.38985995381369
-2510 1904  0.005116437161350581
-3636 1904  0.3383684616440788
-1903 1905 -34.74349045619399
-1905 1905  5.274149732297824
-2512 1905  0.3614419935688245
-3640 1905  0.01342936913136771
-1055 1906  1.680006856153657
-1906 1906 -1275.123506533136
-2514 1906  0.5644202599576373
-3087 1906  0.0008155034897751313
-398 1907 -53.49842881762557
-1907 1907  11.36834776685953
-2516 1907 -0.3438732803624757
-3648 1907  0.004133278243333555
-978 1908  3.981648724060497
-1908 1908 -1319.134352142724
-2518 1908  0.2715368875046588
-3059 1908  0.0008040773752573335
-379 1909 -215.9521258687181
-1909 1909  7.936527709755322
-2520 1909  0.4028072587377506
-3656 1909  0.001278322246727311
-476 1910 -493.1627696346783
-1910 1910  5.534920557416696
-2522 1910  0.1837519366993671
-3660 1910  0.001999528023858018
-493 1911 -53.90619683861826
-1911 1911  3.745702162475517
-2524 1911 -0.4996161946091237
-3664 1911  0.008960119794622546
-444 1912 -342.1330151030756
-1912 1912  3.499243447462259
-2526 1912  0.4893961341975044
-3668 1912  0.001720719953907996
-484 1913 -306.5243546068534
-1913 1913  4.952358887713497
-2528 1913 -0.3065265473112019
-3672 1913  0.002197626492909087
-411 1914 -102.6354740822629
-1914 1914  5.231194892529889
-2530 1914  0.6048601072199059
-3676 1914  0.002721550548841121
-1018 1915  3.508830808080819
-1915 1915 -1535.226081909187
-2532 1915  0.1860931450132002
-3013 1915  0.001305838102648445
-399 1916 -49.97345032394828
-1916 1916  5.985986065281415
-2534 1916  0.6690902668564817
-3684 1916  0.004438130353246017
-384 1917 -162.0141549217757
-1917 1917  5.055157513061181
-2536 1917  0.1263791104196366
-3688 1917  0.00940934842178536
-1918 1918  8.94117625255519
-1992 1918 -33.71929484844811
-2538 1918  0.3839594711748638
-3692 1918  0.007585063482622535
-1919 1919  4.834616698618166
-1920 1919 -1156.779939313139
-2540 1919  0.3417002020878817
-3696 1919  0.0004614465673554198
-1047 1920  0.9505763099739064
-1920 1920 -2303.443736143837
-2542 1920 -0.3580379646538538
-2947 1920  0.001377148193309211
-448 1921 -113.7824108068532
-1921 1921  3.616822557551397
-2544 1921  0.4449142856253013
-3704 1921  0.004343153870675413
-386 1922 -121.4320613630403
-1922 1922  3.21889629143859
-2546 1922  0.4461110896526219
-3708 1922  0.004905569227212183
-492 1923 -314.9941849685434
-1923 1923  2.924614144859001
-2548 1923 -0.3382766882528339
-3712 1923  0.00268264578621197
-1924 1924  19.53069803414619
-1928 1924 -15.33177832110156
-2550 1924  0.01439043946019499
-3716 1924  0.1788413622273466
-1925 1925  14.69789381330568
-1928 1925 -49.94972952086324
-2552 1925  0.008760842370192201
-3720 1925  0.133773303083964
-1926 1926  13.23744473122067
-1928 1926 -54.44165562919285
-2554 1926  0.00890402837809562
-3724 1926  0.1308411907639882
-1390 1927 -1338.547491511747
-1927 1927  2.313352821995473
-2556 1927 -0.3456801265779962
-3728 1927  0.0009459805136166574
-1086 1928  4.408486684243702
-1928 1928 -63.79072791858248
-2558 1928 -0.7248750001072389
-2951 1928  0.005209275704562927
-391 1929 -67.64929086151163
-1929 1929  5.995398509885752
-2560 1929 -0.1837226529034885
-3736 1929  0.01380038719662924
-1089 1930  4.987981631971432
-1930 1930 -1329.895116223397
-2562 1930 -0.1531666552012178
-3155 1930  0.001130589200957007
-394 1931 -281.744969252588
-1931 1931  4.080013401664326
-2564 1931 -0.5051458348552837
-3744 1931  0.001680904211310969
-1908 1932 -1197.283781547747
-1932 1932  3.249654147247933
-2566 1932 -0.4155301814520989
-3748 1932  0.0006736040825845264
-486 1933 -119.1012159433853
-1933 1933  5.990944680358314
-2568 1933  0.3462276026425328
-3752 1933  0.003284291861798131
-498 1934 -1356.531427998502
-1934 1934  3.229847896446473
-2570 1934 -0.2242455162623405
-3756 1934  0.001117869191372407
-1446 1935 -1143.36710400461
-1935 1935  3.895963455405033
-2572 1935  0.3245871165120744
-3760 1935  0.0007017298701820985
-1936 1936  6.886257504275831
-1973 1936 -32.18487755999478
-2574 1936  0.4653523913028809
-3764 1936  0.007536605982690551
-1427 1937 -1209.039048347723
-1937 1937  2.707528675905532
-2576 1937 -0.333246014529252
-3768 1937  0.001080857059295198
-472 1938 -293.3131463113477
-1938 1938  4.197681730399675
-2578 1938  0.5792515598032306
-3772 1938  0.001332242049959249
-477 1939 -298.238748377585
-1939 1939  3.559125252631731
-2580 1939  0.4566027908873816
-3776 1939  0.001772899225362471
-1890 1940 -972.8769469454667
-1940 1940  5.370203291089556
-2582 1940 -0.2253988735383735
-3780 1940  0.0008469569496671471
-399 1941 -85.32588697691688
-1941 1941  4.481236041630056
-2584 1941  0.1774720614068959
-3784 1941  0.01292454086643344
-415 1942 -88.67226391527699
-1942 1942  3.503869273575924
-2586 1942  1.218330247483215
-3788 1942  0.00250559134395913
-439 1943 -127.4445013018118
-1943 1943  4.932211784009985
-2588 1943 -0.2444719756141379
-3792 1943  0.006039676197215248
-1370 1944 -29.11525814204184
-1944 1944  12.92509130443996
-2590 1944  0.6340994985786209
-3796 1944  0.00332455934900208
-1370 1945 -16.87783507179551
-1945 1945  6.963395067069431
-2592 1945 -1.282456190904951
-3800 1945  0.005516618807937719
-1370 1946 -29.27401382993264
-1946 1946  8.20748669336651
-2594 1946 -1.085554606806667
-3804 1946  0.003499686844742199
-426 1947 -57.88255988036589
-1947 1947  5.140340349921296
-2596 1947 -1.03602619980799
-3808 1947  0.002695204623589962
-1001 1948  2.762528604699168
-1948 1948 -1179.473328514326
-2598 1948 -0.627656447368364
-2985 1948  0.0005202508078158344
-1948 1949 -1112.363081683879
-1949 1949  2.637769067800761
-2600 1949  0.7698732969464314
-3816 1949  0.0004616790832713247
-411 1950 -119.8659568390688
-1950 1950  5.347644834779035
-2602 1950  0.3911824002123577
-3820 1950  0.003690632751693497
-1027 1951  2.286717637069104
-1951 1951 -1462.759816234802
-2604 1951  0.3690948751451494
-3031 1951  0.0009417216375000832
-405 1952 -329.5972945308118
-1952 1952  3.430259707843811
-2606 1952  0.3162981891849708
-3828 1952  0.002745893837511805
-1953 1953  5.22866490720696
-1969 1953 -1331.327148348726
-2608 1953 -0.1019983291898142
-3832 1953  0.001313568433023475
-407 1954 -1246.529432769099
-1954 1954  3.015100507243954
-2610 1954 -0.5589755650604602
-3836 1954  0.0006040599848978036
-1955 1955  6.486770562819233
-1992 1955 -40.0865932868005
-2612 1955  0.3123756862682738
-3840 1955  0.01071515778810642
-1009 1956  2.089007039389847
-1956 1956 -1643.815562542815
-2614 1956 -0.1358262986664969
-2995 1956  0.002205806782900263
-1073 1957  4.015345384796815
-1957 1957 -1425.284446253262
-2616 1957  0.1297570097955425
-3123 1957  0.001279610739959884
-410 1958 -65.95449814158825
-1958 1958  14.21588238657893
-2618 1958  0.5346710961640079
-3852 1958  0.001954246820883836
-410 1959 -8.723987554335546
-1959 1959  4.63521196383065
-2620 1959 -1.800612709103617
-3856 1959  0.009930744376663589
-410 1960 -43.6549692815422
-1960 1960  7.851107779123613
-2622 1960 -1.155212641574131
-3860 1960  0.002500885653894513
-410 1961 -183.7758384048604
-1961 1961  4.362866399653369
-2624 1961 -0.10893918577006
-3864 1961  0.01087216558491282
-490 1962 -323.5366796927644
-1962 1962  3.314597254560581
-2626 1962 -0.4059559816217324
-3868 1962  0.002476672415383437
-1963 1963  7.094819650515715
-1964 1963 -1123.060704870721
-2628 1963  0.284851484394085
-3872 1963  0.0004654379073351586
-1014 1964  2.758844665527414
-1964 1964 -1962.678855914129
-2630 1964 -0.251856105361882
-3005 1964  0.0007789889760783562
-1015 1965  5.36424014425243
-1965 1965 -1984.935281822672
-2632 1965 -0.03372710870745033
-3037 1965  0.003128947444078676
-460 1966 -1219.03743806919
-1966 1966  3.832755747061612
-2634 1966  0.2996883137876917
-3884 1966  0.0007814113477166023
-416 1967 -305.81056344102
-1967 1967  3.344564282593129
-2636 1967  0.3897416427359257
-3888 1967  0.002466223742873347
-432 1968 -49.74533438256115
-1968 1968  7.209963343664713
-2638 1968  0.5271461292678989
-3892 1968  0.004556134798128607
-1096 1969  2.650447108276864
-1969 1969 -1474.263314614031
-2640 1969 -0.1785974349768129
-3169 1969  0.001610776396565617
-419 1970 -79.95166156859864
-1970 1970  8.754460075831085
-2642 1970  0.5273107585383225
-3900 1970  0.002196786139001541
-476 1971 -298.3132764183199
-1971 1971  5.21837181322108
-2644 1971 -0.8072266327489762
-3904 1971  0.0007714731595300477
-419 1972 -159.5652172094772
-1972 1972  1.521343477166647
-2646 1972 -1.074344281788989
-3908 1972  0.003487196424232244
-1022 1973  11.66633099712536
-1973 1973 -24.21142044251713
-2648 1973  0.6244445149825922
-3019 1973  0.004805094562342568
-1973 1974 -16.17329708469773
-1974 1974  6.913438662925516
-2650 1974 -0.7085501020623595
-3916 1974  0.01106461485781375
-1973 1975 -16.79039672305305
-1975 1975  7.092319999538366
-2652 1975 -0.6032800041956174
-3920 1975  0.01237778660328535
-1973 1976 -19.18063033824095
-1976 1976  4.504333486281526
-2654 1976 -0.5312358475512081
-3924 1976  0.01700566749367823
-1977 1977  3.064329241855908
-1981 1977 -28.87681478961062
-2656 1977  0.9638580985212747
-3928 1977  0.009334914461997442
-1973 1978 -58.99846518694028
-1978 1978  3.065752767611288
-2658 1978 -0.1726253430003272
-3932 1978  0.02503841121672441
-1979 1979  6.821785171341784
-1992 1979 -34.20546235418676
-2660 1979 -0.5273299139143387
-3936 1979  0.008007127655204529
-426 1980 -39.32522531591619
-1980 1980  9.188046774378051
-2662 1980  0.6449471976234391
-3940 1980  0.003535036514714461
-1026 1981  15.60000600766554
-1981 1981 -19.26799730716725
-2664 1981 -0.4997455661082103
-3039 1981  0.004366719693026736
-1982 1982  4.949586119190606
-1993 1982 -1020.895360476986
-2666 1982 -0.4434171700268535
-3948 1982  0.0004723725694069484
-1981 1983 -24.40551081717152
-1983 1983  9.450557390041146
-2668 1983  0.4663001387974148
-3952 1983  0.006908344872276637
-1981 1984 -34.66205988832636
-1984 1984  13.94274720304045
-2670 1984 -0.3189037545692974
-3956 1984  0.00648834630734077
-1981 1985 -39.08905611715512
-1985 1985  9.638290245359952
-2672 1985 -0.3170204710469747
-3960 1985  0.0082751929249987
-1981 1986 -25.73482602625847
-1986 1986  3.86258710892769
-2674 1986  0.976486147068505
-3964 1986  0.007383221743510169
-1835 1987 -1286.699352558668
-1987 1987  3.232293749053333
-2676 1987 -0.3439335715846778
-3968 1987  0.0007871273401471218
-484 1988 -343.6950631335396
-1988 1988  6.854993073279579
-2678 1988  0.1547028690476915
-3972 1988  0.00273487585036142
-1883 1989 -1142.59645115147
-1989 1989  4.596890674335506
-2680 1989 -0.3495907295178143
-3976 1989  0.0005887925249880207
-493 1990 -50.03970959544274
-1990 1990  4.514311543471527
-2682 1990  0.5744327013188233
-3980 1990  0.007514577559867656
-1239 1991  4.273476333560106
-1991 1991 -1245.08530407766
-2684 1991 -0.2813043833760332
-3069 1991  0.0007659718727101854
-1039 1992  5.650865178732744
-1992 1992 -39.06108881613515
-2686 1992  0.3955497753263165
-3159 1992  0.01019124310589202
-1042 1993  2.128671701455008
-1993 1993 -1545.590542610641
-2688 1993  0.330790297730725
-3061 1993  0.001075547549495041
-1432 1994 -1288.009326238795
-1994 1994  3.373236017368225
-2690 1994  0.245190319901469
-3996 1994  0.0009735186880607913
-1504 1995 -1425.924800186832
-1995 1995  2.768916866004834
-2692 1995 -0.2470979106274057
-4000 1995  0.001207820622579016
-444 1996 -171.6721801828181
-1996 1996  4.350082008188727
-2694 1996 -0.8934591499488236
-4004 1996  0.001505642513562084
-475 1997 -272.0493857089738
-1997 1997  5.835339611366566
-2696 1997 -0.4094422815228064
-4008 1997  0.001622558008834811
-1815 1998 -31.13094626304603
-1998 1998  6.273071084077785
-2698 1998  0.6508797907773681
-4012 1998  0.006407727072907419
-1815 1999 -3.550926574038505
-1999 1999  5.891708948648517
-2700 1999 -0.8644424052260842
-4016 1999  0.0347431850057986
-1815 2000 -13.48124907748484
-2000 2000  6.039983956553282
-2702 2000 -0.5386946049506361
-4020 2000  0.019139463715007
-1815 2001 -13.95763763671764
-2001 2001  5.335102309454471
-2704 2001 -0.5619603703326097
-4024 2001  0.02024997891309413
-1815 2002 -14.40960272507699
-2002 2002  4.826786095112703
-2706 2002 -0.4989049199151928
-4028 2002  0.02204757930171117
-1815 2003 -14.58499342652336
-2003 2003  4.663673792105634
-2708 2003 -0.4967941065737113
-4032 2003  0.02261098557575327
-448 2004 -105.446862001134
-2004 2004  5.538149308614118
-2710 2004  0.3954580330580911
-4036 2004  0.003715639375564892
-448 2005 -106.4028375448804
-2005 2005  2.787430652169496
-2712 2005  0.640289646401769
-4040 2005  0.003931178294356625
-454 2006 -198.5952483373434
-2006 2006  8.295885019917028
-2714 2006  0.4286015274180359
-4044 2006  0.001071057839405194
-1059 2007  7.928418980272388
-2007 2007 -700.4947699391313
-2716 2007  0.004269479894055767
-3093 2007  0.04476228128502545
-479 2008 -95.10938060848929
-2008 2008  2.428640135644111
-2718 2008  0.4261594979710852
-4052 2008  0.008958295749658335
-1851 2009 -12.04655336420402
-2009 2009  16.47742044559651
-2720 2009  0.01030673789282408
-4056 2009  0.3838931121017528
-1851 2010 -12.11364270258297
-2010 2010  22.23399320199848
-2722 2010  0.01266738670347477
-4060 2010  0.311778304583312
-1851 2011 -8.22902423866134
-2011 2011  22.85902787429374
-2724 2011  0.01389738784940935
-4064 2011  0.3433557380926616
-1851 2012 -9.209673704385494
-2012 2012  18.54247362970166
-2726 2012  0.01095829037406584
-4068 2012  0.4301523174392808
-1851 2013 -7.169396300539337
-2013 2013  28.97191890791841
-2728 2013  0.01440604931380529
-4072 2013  0.2876753913896934
-1851 2014 -7.905394811720712
-2014 2014  22.90635023674714
-2730 2014  0.01412310998717665
-4076 2014  0.3498094028164577
-1851 2015 -6.913440311217936
-2015 2015  24.33074727531835
-2732 2015  0.01427692261993418
-4080 2015  0.3390025466116671
-477 2016 -340.5820452017388
-2016 2016  2.208450030879043
-2734 2016  0.5273258880967172
-4084 2016  0.00237548481421698
-1069 2017  2.781752994797915
-2017 2017 -1429.354425178098
-2736 2017 -0.2390321723025959
-3153 2017  0.001140663405325003
-469 2018 -1319.125546000257
-2018 2018  2.912178827084738
-2738 2018 -0.2745073576810962
-4092 2018  0.001009787058010458
-1070 2019  5.599572994528584
-2019 2019 -1426.386550071892
-2740 2019  0.1242577346967617
-3117 2019  0.00109550590404936
-2019 2020 -1051.60330096319
-2020 2020  5.994209355056856
-2742 2020 -0.2986936630393312
-4100 2020  0.0005957613718304553
-472 2021 -273.2060828791963
-2021 2021  4.161771196418701
-2744 2021  0.7749702805017827
-4104 2021  0.001150360150332114
-1957 2022 -1251.592013646793
-2022 2022  3.322110871855388
-2746 2022 -0.2590299860673196
-4108 2022  0.0009878595624204158
-486 2023 -82.4528305498375
-2023 2023  6.446916296953027
-2748 2023  0.9878581106610489
-4112 2023  0.001621619527829153
-476 2024 -221.2635879075791
-2024 2024  4.230773004945792
-2750 2024  0.83313447077432
-4116 2024  0.001188758515697542
-1930 2025 -1278.727802451221
-2025 2025  7.857436245259873
-2752 2025 -0.09699561503198664
-4120 2025  0.001044723928505841
-497 2026 -258.8888628284263
-2026 2026  3.696321876416155
-2754 2026  0.9432303306922852
-4124 2026  0.001165220306249647
-479 2027 -25.80608281993659
-2027 2027  4.980850774426043
-2756 2027 -0.723925405766279
-4128 2027  0.009668964138980598
-479 2028 -27.74274388077859
-2028 2028  3.754470501609538
-2758 2028 -0.6821870226709527
-4132 2028  0.01083235501048359
-492 2029 -312.8351201515704
-2029 2029  2.362308143022933
-2760 2029  0.4432101206282965
-4136 2029  0.002676878284088942
-485 2030 -106.3422401541953
-2030 2030  8.646235660883518
-2762 2030  0.5743976794403722
-4140 2030  0.001464397909435215
-485 2031 -166.413571525405
-2031 2031  2.978974753398158
-2764 2031 -0.8666912965737777
-4144 2031  0.002343779129113723
-493 2032 -70.19438117818132
-2032 2032  9.893426968176996
-2766 2032  0.01095253479461104
-4148 2032  0.0993118838893443
-493 2033 -78.02760680056846
-2033 2033  21.22166760392006
-2768 2033  0.004781083103017359
-4152 2033  0.1155021826800087
-699 2034 -116.254058426669
-2034 2034  1.362279335536674
-2280 2034  0.1776859613752735
-3178 2034  0.02756698782311096
-2035 2035  6.593989719295108
-2127 2035 -1063.361714528966
-2282 2035 -0.1686225023517153
-3182 2035  0.0009578764182344008
-699 2036 -128.7151420873748
-2036 2036  5.110489561178684
-2284 2036 -0.2294685995787264
-3186 2036  0.007018459542290348
-699 2037 -134.6101055831586
-2037 2037  5.241250854683242
-2286 2037  0.1862365373886236
-3190 2037  0.00759532575759598
-1550 2038 -304.647627311909
-2038 2038  1.225211590363166
-2288 2038  0.1162526489952567
-3194 2038  0.02033916179016841
-1550 2039 -303.6755998197619
-2039 2039  9.150447768001063
-2290 2039  0.09090233254426537
-3198 2039  0.004141325516279747
-2040 2040  4.178709433689302
-2087 2040 -1507.095428153633
-2292 2040  0.08441252585843845
-3202 2040  0.002178117174593526
-703 2041 -287.8253845063738
-2041 2041  1.118500047554219
-2294 2041  0.1569416237697656
-3206 2041  0.01493946180636459
-703 2042 -310.99582024153
-2042 2042  5.635075703794728
-2296 2042 -0.1470971542430732
-3210 2042  0.003585627113051674
-762 2043 -275.9084934756087
-2043 2043  9.472269231422434
-2298 2043 -0.1084103977076431
-3214 2043  0.003043495037890958
-705 2044 -108.9523767055269
-2044 2044  1.066054919609428
-2300 2044  0.2458729163013723
-3218 2044  0.03021750355380833
-784 2045 -117.5623798133825
-2045 2045  5.867844621528932
-2302 2045  0.1764937921731119
-3222 2045  0.008023070608262042
-1697 2046 -56.76693463695905
-2046 2046  6.307778305821778
-2304 2046  0.1767989964268323
-3226 2046  0.01562602942879436
-1046 2047  4.089469755570528
-2047 2047 -1547.903118801705
-2306 2047 -0.1043560294495194
-3070 2047  0.001893962458911336
-707 2048 -47.01189367725382
-2048 2048  1.453282910153127
-2308 2048  0.3080625949669278
-3234 2048  0.04008513148927602
-822 2049 -54.48808992798245
-2049 2049  8.754829522616353
-2310 2049  0.2360290859558374
-3238 2049  0.007900795641140584
-707 2050 -93.79563538032262
-2050 2050  7.607025929898225
-2312 2050  0.04825762007543867
-3242 2050  0.02822292740456577
-839 2051 -105.6218717573673
-2051 2051  7.209258592532089
-2314 2051 -0.2295458523844411
-3246 2051  0.005220083303881453
-1091 2052  8.260010647845204
-2052 2052 -45.27355828678601
-2316 2052  0.1115367935708566
-3160 2052  0.02206704511270894
-709 2053 -71.48205026841438
-2053 2053  1.503944833656077
-2318 2053  0.1058671586234522
-3254 2053  0.07477494866840419
-709 2054 -57.81317230630791
-2054 2054  1.756999795333312
-2320 2054  0.1550963396610254
-3258 2054  0.04893194565742957
-713 2055 -281.3097209478202
-2055 2055  8.333025717575424
-2322 2055 -0.1358187924429873
-3262 2055  0.00298198818538526
-791 2056 -137.1554046617416
-2056 2056  9.954437737461609
-2324 2056  0.1158998426965637
-3266 2056  0.006104956663563518
-709 2057 -57.85806956991055
-2057 2057  8.985892277429645
-2326 2057 -0.1871670285025489
-3270 2057  0.01008184609791411
-712 2058 -277.9291057772549
-2058 2058  13.63902685108462
-2328 2058  0.08441094784420153
-3274 2058  0.002589589366922812
-2059 2059  13.75844861280676
-2215 2059 -164.7201130197417
-2330 2059 -0.05562556027624922
-3278 2059  0.006756368132922738
-1112 2060  9.011062142512486
-2060 2060 -33.81724303530061
-2332 2060  0.2054208626437619
-3072 2060  0.01287695285048797
-713 2061 -277.119091147535
-2061 2061  0.8138879164921292
-2334 2061  0.2480796714683461
-3286 2061  0.01604516294524086
-2062 2062  2.854904550605542
-2066 2062 -36.88282535114031
-2336 2062  0.1309999949189102
-3290 2062  0.07139718376216436
-2063 2063  2.382559920737088
-2066 2063 -38.52362543915015
-2338 2063  0.1348920395825686
-3294 2063  0.07789140856387446
-2064 2064  2.222986339792984
-2066 2064 -38.66505399025309
-2340 2064  0.1478411151739352
-3298 2064  0.07846785118095698
-2065 2065  2.912810030122295
-2066 2065 -31.6350585093486
-2342 2065  0.1914370186595753
-3302 2065  0.05259636694181062
-1207 2066  21.65848410135114
-2066 2066 -38.08156604778259
-2344 2066  0.08377148904147035
-2808 2066  0.01561142725119222
-2067 2067  7.602708839683275
-2160 2067 -1239.624856043634
-2346 2067  0.1043900787502424
-3310 2067  0.001140936654369433
-1580 2068 -111.4646540333997
-2068 2068  1.513344914714974
-2348 2068  0.1284506027739841
-3314 2068  0.0392249822964684
-1580 2069 -111.1745117377567
-2069 2069  1.361520831023814
-2350 2069  0.1449696716921745
-3318 2069  0.03903060155704903
-725 2070 -142.2046129288638
-2070 2070  11.46100381309955
-2352 2070 -0.03875470100391585
-3322 2070  0.01516268397985598
-1580 2071 -103.4708021967054
-2071 2071  11.49437533928763
-2354 2071 -0.123830609841622
-3326 2071  0.006913912857735371
-1583 2072 -298.7359064250596
-2072 2072  1.216417974845579
-2356 2072  0.1105236630114776
-3330 2072  0.02025965316108067
-762 2073 -336.8243978424858
-2073 2073  7.245181781754158
-2358 2073  0.09073099839660206
-3334 2073  0.00451186806656099
-1583 2074 -326.2824303807073
-2074 2074  7.871753304018089
-2360 2074  0.0825069857399373
-3338 2074  0.004985618827914547
-725 2075 -119.199332372214
-2075 2075  0.9683564394347679
-2362 2075  0.1503007858341583
-3342 2075  0.0521001814761122
-791 2076 -180.4753155990831
-2076 2076  11.42641600066333
-2364 2076 -0.04777819884013859
-3346 2076  0.01056133397428599
-856 2077 -117.3115075315568
-2077 2077  9.866368768324078
-2366 2077  0.1242642220168503
-3350 2077  0.00678516914651727
-727 2078 -272.0317439182016
-2078 2078  0.6902095968345779
-2368 2078  0.2721182940045235
-3354 2078  0.01674617104452671
-727 2079 -336.2704669221371
-2079 2079  4.390097869341906
-2370 2079 -0.1829124999460475
-3358 2079  0.003957075002863835
-1238 2080  4.451290718579245
-2080 2080 -1446.166309523558
-2372 2080  0.1009613901942342
-3054 2080  0.001707396604035941
-2081 2081  1.94736180188326
-2083 2081 -118.5292865869793
-2374 2081  0.1076318904023976
-3366 2081  0.03729470474745956
-2082 2082  2.053730505034701
-2083 2082 -96.75789985452708
-2376 2082  0.1776243671061898
-3370 2082  0.02493351554952283
-1075 2083  11.87929266291417
-2083 2083 -113.9099321998116
-2378 2083 -0.1125400217472849
-2836 2083  0.007066492804996273
-2083 2084 -139.5923217119656
-2084 2084  14.8685191357142
-2380 2084  0.04628018070972438
-3378 2084  0.0105883848494995
-1596 2085 -273.2174821630666
-2085 2085  1.001843241790515
-2382 2085  0.1992142492979329
-3382 2085  0.01512519439909087
-752 2086 -312.5285740232399
-2086 2086  3.893502727153444
-2384 2086  0.1559040304242874
-3386 2086  0.004867897652356694
-1235 2087  3.896863634483198
-2087 2087 -1528.913791925641
-2386 2087 -0.08915743596796895
-3048 2087  0.00224263695121669
-2088 2088  2.128348439022877
-2090 2088 -111.1708407077942
-2388 2088  0.1510415792376266
-3394 2088  0.02313494457906885
-2089 2089  1.530457443318091
-2090 2089 -105.8746380386655
-2390 2089  0.2527154403111643
-3398 2089  0.02254346769173401
-934 2090  13.99685203813836
-2090 2090 -135.0509367320167
-2392 2090 -0.07966811853968624
-2846 2090  0.007010745916640551
-2090 2091 -100.0765173546339
-2091 2091  13.03167033082069
-2394 2091 -0.1978726929434511
-3406 2091  0.003848514944751526
-737 2092 -351.7384079675031
-2092 2092  0.9606419660085704
-2396 2092  0.1190518291670926
-3410 2092  0.02153155584735624
-1670 2093 -1205.4064202328
-2093 2093  3.896453929673696
-2398 2093 -0.2460311055612411
-3414 2093  0.001028363329486931
-737 2094 -309.8208477002044
-2094 2094  4.857233958126034
-2400 2094 -0.2103959172847824
-3418 2094  0.003421704678565958
-798 2095 -63.00144864963377
-2095 2095  6.030018439318698
-2402 2095 -0.1823951579301308
-3422 2095  0.01178272631769443
-1771 2096 -33.35710566176917
-2096 2096  3.64070794816464
-2404 2096  0.4234806911341454
-3426 2096  0.01707485299878989
-1781 2097 -149.0690573659064
-2097 2097  10.1404814049545
-2406 2097 -0.03366812880804327
-3430 2097  0.01755433519751259
-2098 2098  4.692341990336677
-2099 2098 -1326.699075158569
-2408 2098  0.09589996761388657
-3434 2098  0.001630753860045084
-940 2099  2.455950934716126
-2099 2099 -1582.769498051866
-2410 2099 -0.1192055091852236
-2858 2099  0.002330334855841668
-1669 2100 -1408.138756167021
-2100 2100  4.482752371805137
-2412 2100 -0.0913967464088717
-3442 2100  0.001843266313745345
-941 2101  3.513507003847669
-2101 2101 -1679.799948366659
-2414 2101  0.1067652808173516
-3038 2101  0.001825192537572441
-2102 2102  5.121634542677966
-2179 2102 -1348.72810908421
-2416 2102 -0.08642839304797841
-3450 2102  0.001762896847965685
-743 2103 -70.28451441984423
-2103 2103  13.81936647694512
-2418 2103  0.1224913336251501
-3454 2103  0.006836889974004559
-743 2104 -174.5511038366085
-2104 2104  6.108252012876951
-2420 2104  0.1232115838138538
-3458 2104  0.007007925568758736
-743 2105 -163.2629558170631
-2105 2105  6.61797598861204
-2422 2105  0.1458229277753334
-3462 2105  0.005873616575094122
-1727 2106 -127.8803524612996
-2106 2106  6.18395224060007
-2424 2106  0.1869994138422493
-3466 2106  0.005610435401023395
-744 2107 -343.8282467755249
-2107 2107  5.425768165721768
-2426 2107  0.1243626259089881
-3470 2107  0.004351703339650936
-744 2108 -340.8237853887593
-2108 2108  5.015211698687797
-2428 2108 -0.149642205461953
-3474 2108  0.004232437836851164
-752 2109 -336.3627989515738
-2109 2109  3.742160235640967
-2430 2109 -0.1210270813360829
-3478 2109  0.00555280640673149
-2110 2110  7.053894690456646
-2117 2110 -16.282455472127
-2432 2110  0.181244990753315
-3482 2110  0.03541685358970547
-746 2111 -8.765297393525904
-2111 2111  5.361468130704726
-2434 2111 -2.099407317666164
-3486 2111  0.008236509823014474
-746 2112 -9.210837529294329
-2112 2112  4.513828312547935
-2436 2112 -2.099046152908578
-3490 2112  0.009223257182620324
-746 2113 -8.314922294229779
-2113 2113  4.366310261693636
-2438 2113 -2.3232191764992
-3494 2113  0.01050608747623768
-746 2114 -8.361552482805211
-2114 2114  3.956994704656382
-2440 2114 -2.229759078647565
-3498 2114  0.01062497489801806
-746 2115 -10.50121760524715
-2115 2115  4.264900917453799
-2442 2115 -2.820119469264003
-3502 2115  0.007948242671805029
-1655 2116 -1351.12522936696
-2116 2116  3.247381743706517
-2444 2116 -0.1064731526095451
-3506 2116  0.002148314583414475
-952 2117  6.185562461268164
-2117 2117 -16.55199492375567
-2446 2117  0.2139666338910703
-2906 2117  0.03685169509823494
-1630 2118 -19.04736438576279
-2118 2118  28.06040217986433
-2448 2118  0.1019487335374342
-3514 2118  0.0137906417920731
-1630 2119 -39.1308766868132
-2119 2119  12.01425377570906
-2450 2119 -0.2061528425704132
-3518 2119  0.009303998751781386
-953 2120  5.945418404127518
-2120 2120 -63.98358437997059
-2452 2120 -0.1148860314163944
-2978 2120  0.02091228195574186
-810 2121 -96.50997380438551
-2121 2121  9.103384773113829
-2454 2121 -0.1330170041161471
-3526 2121  0.008627475835805955
-1716 2122 -85.04968168489033
-2122 2122  9.438888577223668
-2456 2122 -0.1073622229111081
-3530 2122  0.009649886890828659
-801 2123 -156.3984297145197
-2123 2123  6.190391211677775
-2458 2123 -0.05678315093978254
-3534 2123  0.01873664327863148
-857 2124 -404.166050448459
-2124 2124  6.656168722454082
-2460 2124 -0.07021960246037766
-3538 2124  0.005476574489081974
-756 2125 -341.0931018868103
-2125 2125  5.491666775418012
-2462 2125 -0.1186609141678331
-3542 2125  0.004729539689689114
-756 2126 -301.9003736908056
-2126 2126  7.686660379443556
-2464 2126  0.1329277299929537
-3546 2126  0.00366901957924182
-1017 2127  5.61223414210605
-2127 2127 -1809.384404896597
-2466 2127 -0.04036045463481888
-3012 2127  0.002780578732839252
-957 2128  4.159434126999496
-2128 2128 -1583.280905696588
-2468 2128  0.07225182247031578
-3052 2128  0.002258023136854922
-1689 2129 -1412.362136880344
-2129 2129  4.024185540800096
-2470 2129 -0.100034192619883
-3558 2129  0.001932242375185583
-1032 2130  3.218684483675813
-2130 2130 -1512.250759457661
-2472 2130 -0.1114576459576665
-2894 2130  0.002074025323621762
-801 2131 -126.3034594719376
-2131 2131  7.954416979677193
-2474 2131  0.07850380857557879
-3566 2131  0.01222844151572613
-2132 2132  3.17852537231288
-2133 2132 -1021.939107874584
-2476 2132  0.4916885932583254
-3570 2132  0.0006414415360330333
-1160 2133  2.945059870904722
-2133 2133 -1512.245711784602
-2478 2133 -0.1586572171452398
-2898 2133  0.001414845742753739
-2117 2134 -12.85873395711873
-2134 2134  8.080073804395154
-2480 2134  0.3408694718928448
-3578 2134  0.02207500168266791
-2135 2135  3.66926937307163
-2185 2135 -949.4160584467323
-2482 2135 -0.5859732481780078
-3582 2135  0.0005029038477193737
-762 2136 -334.71993668788
-2136 2136  8.170584099729947
-2484 2136  0.07509815561251482
-3586 2136  0.006006331416365539
-2117 2137 -4.68570583775738
-2137 2137  2.778013614142899
-2486 2137 -0.3323630168187701
-3590 2137  0.1602211799218243
-2117 2138 -6.569471674738725
-2138 2138  6.924495998814408
-2488 2138 -0.3680998962405606
-3594 2138  0.05302712387955065
-2117 2139 -7.378806665030903
-2139 2139  3.451189611415207
-2490 2139 -0.4581176150659059
-3598 2139  0.07200676148810309
-2117 2140 -6.726108868932145
-2140 2140  5.746025815755991
-2492 2140 -0.3724327269861961
-3602 2140  0.05962100119453918
-2117 2141 -6.982763315138396
-2141 2141  3.307664437943429
-2494 2141 -0.5067141365537682
-3606 2141  0.06956589248662981
-2117 2142 -7.016413577398498
-2142 2142  3.209214983593041
-2496 2142 -0.5042260023462158
-3610 2142  0.07023826370427121
-2117 2143 -6.151213853839583
-2143 2143  5.612374640258077
-2498 2143 -0.3677359737660884
-3614 2143  0.05871777369397748
-856 2144 -131.9375715502208
-2144 2144  2.985129676088162
-2500 2144 -0.2998672521788097
-3618 2144  0.008602802752313498
-972 2145  5.456336436211689
-2145 2145 -1668.568919153097
-2502 2145 -0.03257068910337899
-3154 2145  0.003108182426433724
-773 2146 -9.759370161367945
-2146 2146  2.233014253505581
-2504 2146 -3.833955781960661
-3626 2146  0.009265772232708086
-773 2147 -3.824469467733722
-2147 2147  1.985197878319526
-2506 2147 -6.252920651221183
-3630 2147  0.01535943325292646
-773 2148 -22.0316225896654
-2148 2148  1.62655302961875
-2508 2148 -3.542334556735019
-3634 2148  0.006608622451082367
-773 2149 -19.89518259071058
-2149 2149  2.124763369071786
-2510 2149 -3.407349241168243
-3638 2149  0.005900417463837842
-773 2150 -93.26783624734638
-2150 2150  4.699502919587786
-2512 2150  0.1708955495131764
-3642 2150  0.01023798290573628
-1255 2151  3.482112602331831
-2151 2151 -1875.357477626669
-2514 2151 -0.03036476754524816
-3088 2151  0.004903868312070231
-798 2152 -51.36676192902856
-2152 2152  14.16495761512215
-2516 2152  0.1578014166231115
-3650 2152  0.00778313097281933
-2153 2153  5.483147129036918
-2177 2153 -1384.758308839667
-2518 2153 -0.09193233690226214
-3654 2153  0.001652995322303571
-779 2154 -214.5852170173555
-2154 2154  13.13678521227168
-2520 2154 -0.1683908418346857
-3658 2154  0.001812794973973293
-2155 2155  6.819550808326079
-2216 2155 -399.3472933098862
-2522 2155 -0.116110806968134
-3662 2155  0.003304023126536034
-893 2156 -94.29553748866022
-2156 2156  3.716088592076674
-2524 2156  0.2497754017512982
-3666 2156  0.009671103241031176
-844 2157 -354.5380781683582
-2157 2157  5.213544230958012
-2526 2157 -0.1293710693667469
-3670 2157  0.004280615664270764
-884 2158 -289.7082847871828
-2158 2158  5.360504399026522
-2528 2158  0.2293733406665809
-3674 2158  0.002931883492565051
-1717 2159 -126.9054310671548
-2159 2159  8.34943451090035
-2530 2159 -0.1009386461263404
-3678 2159  0.008254928556956509
-1183 2160  3.919110652589746
-2160 2160 -1513.186311996676
-2532 2160 -0.1309259113047055
-3014 2160  0.00169962922165525
-1697 2161 -66.74817920758088
-2161 2161  11.18534751112075
-2534 2161  0.05669785722272903
-3686 2161  0.02142420942973285
-784 2162 -114.4793517963469
-2162 2162  4.679317810446525
-2536 2162 -0.2486847965028564
-3690 2162  0.007592017929505716
-2052 2163 -33.91929258744634
-2163 2163  10.96265801745419
-2538 2163  0.1940686199464453
-3694 2163  0.01239979486593527
-2164 2164  8.062081654679709
-2165 2164 -1222.524005166853
-2540 2164  0.1044236949304687
-3698 2164  0.0008416286799556312
-985 2165  1.251333503471155
-2165 2165 -2603.281852933697
-2542 2165 -0.1170091683665216
-2948 2165  0.002872457779933695
-848 2166 -114.9637094411854
-2166 2166  4.888571989846326
-2544 2166  0.1612239847733691
-3706 2166  0.008928425033709206
-786 2167 -121.8957475389813
-2167 2167  4.206946386074862
-2546 2167  0.181437742187558
-3710 2167  0.00933669679110603
-892 2168 -327.4234482652935
-2168 2168  3.930409653834767
-2548 2168  0.1150695611031082
-3714 2168  0.005754627427048083
-787 2169 -6.24756560403434
-2169 2169  5.368197347599874
-2550 2169 -3.573289293030423
-3718 2169  0.006397495182738116
-787 2170 -18.89808564694528
-2170 2170  3.784571749614252
-2552 2170 -2.916757791234899
-3722 2170  0.00412519236349653
-787 2171 -20.62915659632829
-2171 2171  3.411100769889254
-2554 2171 -2.948494780789273
-3726 2171  0.004047158334189482
-993 2172  3.087002648539354
-2172 2172 -1506.397431951823
-2556 2172 -0.1031938954442674
-2964 2172  0.002132344920593365
-787 2173 -170.023137662953
-2173 2173  4.650926558563436
-2558 2173 -0.1521679999018858
-3734 2173  0.007972301064410235
-791 2174 -45.7592083489813
-2174 2174  4.858814960201284
-2560 2174 -0.5529352148144631
-3738 2174  0.008695987757307259
-1289 2175  5.009999875255914
-2175 2175 -1346.893561357031
-2562 2175 -0.1448367639545626
-3156 2175  0.001178830356129778
-794 2176 -347.1985023913132
-2176 2176  6.114912483766074
-2564 2176 -0.08952016961860668
-3746 2176  0.005211806473068219
-994 2177  4.567303761279918
-2177 2177 -1480.003424652446
-2566 2177 -0.08472478198026717
-3060 2177  0.001920170686230696
-886 2178 -108.7083889381549
-2178 2178  8.096733468146425
-2568 2178  0.1421494758907888
-3754 2178  0.006704128721188893
-995 2179  4.213824656027188
-2179 2179 -1519.469895890335
-2570 2179 -0.07706700524990751
-3174 2179  0.002237503450970262
-1691 2180 -1166.753340022242
-2180 2180  5.680389346472349
-2572 2180  0.1444473955056696
-3762 2180  0.001045466262769684
-1732 2181 -32.32185579454295
-2181 2181  8.680597597954014
-2574 2181  0.2255429562296098
-3766 2181  0.01245643296977996
-1672 2182 -1436.006295719475
-2182 2182  3.127037822968334
-2576 2182 -0.151870667599087
-3770 2182  0.001726283305420655
-872 2183 -313.4339444900776
-2183 2183  7.963542386205262
-2578 2183 -0.06474292354903258
-3774 2183  0.006058149703100444
-877 2184 -300.1393535459937
-2184 2184  5.42788620866588
-2580 2184  0.1133867662473916
-3778 2184  0.004750113430816235
-998 2185  5.878917673856323
-2185 2185 -1428.191459747777
-2582 2185  0.1015023800057121
-3142 2185  0.001139072744127021
-1697 2186 -54.00562981105288
-2186 2186  4.67546375941967
-2584 2186 -0.264586927583474
-3786 2186  0.0140099358624532
-815 2187 -185.2524967505786
-2187 2187  7.522783604978186
-2586 2187  0.03932985943560216
-3790 2187  0.01648814929963461
-839 2188 -128.594774536177
-2188 2188  5.430042622363441
-2588 2188  0.1733751342852024
-3794 2188  0.007739884657506192
-2120 2189 -29.28485320635267
-2189 2189  24.01494552657894
-2590 2189  0.104793236898054
-3798 2189  0.01107704644766072
-2120 2190 -20.84064754660397
-2190 2190  10.64775227529997
-2592 2190  0.1437033518738441
-3802 2190  0.02770173624585955
-2120 2191 -36.23593022245935
-2191 2191  13.36990297318379
-2594 2191  0.112469362197627
-3806 2191  0.01765991222836373
-1716 2192 -94.41087219520271
-2192 2192  8.861752777781344
-2596 2192 -0.07884724198772149
-3810 2192  0.01267346837506577
-2193 2193  5.355974370748386
-2194 2193 -1519.09897684033
-2598 2193 -0.04271857028849171
-3814 2193  0.003114487333054934
-1204 2194  6.185139221633936
-2194 2194 -1656.528852030899
-2600 2194  0.02752220267509899
-2986 2194  0.003695072696773231
-1717 2195 -111.9988300228975
-2195 2195  6.809172415733807
-2602 2195  0.1929820576103272
-3822 2195  0.006392453421471818
-1005 2196  3.09008802429738
-2196 2196 -1596.142391375567
-2604 2196  0.1250493243013468
-3032 2196  0.001883726199993154
-805 2197 -355.4846212830439
-2197 2197  4.499074366596648
-2606 2197  0.1148699713737675
-3830 2197  0.005400958558659237
-2198 2198  4.804865036708031
-2214 2198 -1143.60129135068
-2608 2198 -0.1615176016313629
-3834 2198  0.001065127518678436
-1084 2199  5.806460716784121
-2199 2199 -1753.0575043452
-2610 2199 -0.03688188469150969
-2992 2199  0.003398276739297626
-2052 2200 -35.70217191761186
-2200 2200  7.199818137765514
-2612 2200  0.2532649928913371
-3842 2200  0.01373119140820175
-809 2201 -1506.533916325817
-2201 2201  1.926406935099201
-2614 2201 -0.1982479163957737
-3846 2201  0.001800943508559765
-2202 2202  3.965156627189976
-2267 2202 -1232.001050341556
-2616 2202 -0.1624424533450675
-3850 2202  0.001215285839406951
-810 2203 -52.15381884035941
-2203 2203  31.00680216081683
-2618 2203  0.1742097470834353
-3854 2203  0.003445552970141769
-810 2204 -18.93525889233717
-2204 2204  12.60241045455382
-2620 2204  0.02300115730517048
-3858 2204  0.1319138366683021
-810 2205 -80.38564175531182
-2205 2205  15.41343460037283
-2622 2205  0.034635368896544
-3862 2205  0.02390999892652716
-810 2206 -91.28444390040092
-2206 2206  3.832594921604338
-2624 2206 -0.3911929626725137
-3866 2206  0.007563606211267081
-890 2207 -303.9354889996754
-2207 2207  4.159018631162431
-2626 2207 -0.1883580091188502
-3870 2207  0.004660300321219048
-2208 2208  14.40382534397125
-2209 2208 -1016.038939862732
-2628 2208  0.1530160829243506
-3874 2208  0.0004449483101018156
-1214 2209  3.015252189670739
-2209 2209 -2116.689327493415
-2630 2209 -0.1612767817800078
-3006 2209  0.001058231373611423
-2101 2210 -1043.800098785419
-2210 2210  3.00679695511697
-2632 2210 -0.5300049615928422
-3882 2210  0.0006992246679674015
-1748 2211 -1306.498645974566
-2211 2211  5.149209964509931
-2634 2211 -0.1104715392797194
-3886 2211  0.001476771414936002
-816 2212 -324.405753550522
-2212 2212  4.306287197091149
-2636 2212  0.1611418556604022
-3890 2212  0.004400588120779132
-832 2213 -56.95477588403464
-2213 2213  9.801414691082627
-2638 2213  0.179026380922564
-3894 2213  0.00860441933472752
-1016 2214  2.948617038264048
-2214 2214 -1622.883696919502
-2640 2214  0.1094120254077982
-3170 2214  0.002145009099397701
-1020 2215  14.64089060417686
-2215 2215 -67.84254400099019
-2642 2215  0.1880559437330346
-3016 2215  0.004460848311151768
-1076 2216  8.974758681364245
-2216 2216 -456.729207514713
-2644 2216 -0.05312771678709906
-3130 2216  0.004557092943777746
-2215 2217 -193.8165634075667
-2217 2217  2.541648003046831
-2646 2217 -0.1325528534387045
-3910 2217  0.01450982405114899
-1732 2218 -19.59324371202498
-2218 2218  28.46767710922084
-2648 2218  0.2730372422608475
-3914 2218  0.005157071009933244
-1732 2219 -19.41412540093749
-2219 2219  6.6028144804622
-2650 2219 -0.2765679639928379
-3918 2219  0.02612786147197997
-1732 2220 -18.56084480886999
-2220 2220  4.975973020678023
-2652 2220 -0.4251407410310105
-3922 2220  0.02478823388272888
-1732 2221 -16.81796495425821
-2221 2221  4.220607505708247
-2654 2221 -0.5372060661120606
-3926 2221  0.02142613560740973
-2222 2222  5.614276394514303
-2231 2222 -44.43020643761637
-2656 2222 -0.07388768653768076
-3930 2222  0.0427789483906531
-1732 2223 -44.64235389450946
-2223 2223  3.006008494629031
-2658 2223  0.2566908442587441
-3934 2223  0.0234935444047887
-2052 2224 -34.58560243700298
-2224 2224  8.023915727134943
-2660 2224 -0.275051179212371
-3938 2224  0.01322498499950834
-1716 2225 -38.53172034872582
-2225 2225  13.68747505294833
-2662 2225  0.2617645898062311
-3942 2225  0.005998510819612981
-2226 2226  16.66976710007537
-2231 2226 -18.52397709846915
-2664 2226  0.2828527186687306
-3946 2226  0.007812900721490596
-1228 2227  7.311204366461816
-2227 2227 -1350.897822356165
-2666 2227  0.06450870336342215
-3062 2227  0.001678768597205332
-2228 2228  13.07420285267797
-2231 2228 -20.32550666111602
-2668 2228  0.3020557569469072
-3954 2228  0.009275613422783751
-2229 2229  8.467858774888629
-2231 2229 -31.17736956735129
-2670 2229 -0.4149278867504431
-3958 2229  0.01016168905078159
-2230 2230  7.780959123178358
-2231 2230 -30.83346999517691
-2672 2230 -0.4419310962134438
-3962 2230  0.00996722449751054
-1054 2231  7.508760202322218
-2231 2231 -42.77542700610918
-2674 2231  0.05570908603588222
-3040 2231  0.03948697092465923
-2080 2232 -1428.515318978645
-2232 2232  4.168085658793879
-2676 2232 -0.114079576000751
-3970 2232  0.001677191552912289
-884 2233 -242.1780927811974
-2233 2233  6.255402866825885
-2678 2233 -0.3347632371841684
-3974 2233  0.002027970847867968
-2128 2234 -1303.604343599536
-2234 2234  6.071785803494002
-2680 2234 -0.09046043792228446
-3978 2234  0.001537781149854484
-893 2235 -102.3315584238236
-2235 2235  5.339475922255236
-2682 2235  0.1483671939561078
-3982 2235  0.01108540321534985
-2047 2236 -1333.656774685993
-2236 2236  5.311692012444222
-2684 2236 -0.1158650579574214
-3986 2236  0.001409923238876857
-2052 2237 -38.23756381734963
-2237 2237  6.756753262704435
-2686 2237  0.2229181676645297
-3990 2237  0.01577747778749642
-2227 2238 -1591.552719889067
-2238 2238  2.951842078980999
-2688 2238 -0.1086062631708077
-3994 2238  0.002314762263413514
-1677 2239 -1384.784060943572
-2239 2239  4.313636411491303
-2690 2239  0.09873935578019329
-3998 2239  0.001768071828684483
-1083 2240  3.53773728948649
-2240 2240 -1561.56620828576
-2692 2240 -0.09297121279972072
-3064 2240  0.002309866592158181
-844 2241 -238.5976480186172
-2241 2241  6.391215314600386
-2694 2241  0.1005481075389927
-4006 2241  0.006737745131675257
-875 2242 -316.8133947953895
-2242 2242  7.904228210941027
-2696 2242 -0.1107176446094387
-4010 2242  0.003851572691296312
-2060 2243 -32.29231243943666
-2243 2243  11.09669376957527
-2698 2243  0.1812633458304332
-4014 2243  0.01207958502317402
-2060 2244 -4.08462782159169
-2244 2244  5.359216924491829
-2700 2244 -0.3743197183632451
-4018 2244  0.08054258955875965
-2060 2245 -13.92596672443947
-2245 2245  4.316855635510668
-2702 2245 -0.4207031498800982
-4022 2245  0.03578117245295978
-2060 2246 -14.22224897443281
-2246 2246  4.726445981799073
-2704 2246 -0.3589957638732138
-4026 2246  0.0368359311789389
-2060 2247 -12.94220849005653
-2247 2247  4.245709838291275
-2706 2247 -0.4669064306323872
-4030 2247  0.03116076015736728
-2060 2248 -12.95883236672648
-2248 2248  4.154176556399479
-2708 2248 -0.4733234783700364
-4034 2248  0.03127325765942
-848 2249 -103.0305453695494
-2249 2249  7.399370929771859
-2710 2249  0.1603305174549954
-4038 2249  0.007143262340257282
-848 2250 -123.3658773366547
-2250 2250  4.221315980120219
-2712 2250  0.1358268290729027
-4042 2250  0.01064154757997323
-854 2251 -212.104160974466
-2251 2251  16.56692854796862
-2714 2251  0.08923648311862642
-4046 2251  0.002359860273289677
-858 2252 -160.9760242098117
-2252 2252  1.876776993784172
-2716 2252 -3.455017245353995
-4050 2252  0.001086849801189867
-1784 2253 -92.01703556881691
-2253 2253  3.081655102447302
-2718 2253 -0.1994726820739341
-4054 2253  0.01577200707147027
-1771 2254 -6.210870714987588
-2254 2254  3.707071367433247
-2720 2254 -3.656761234719059
-4058 2254  0.008627090183268422
-1771 2255 -6.513390408795219
-2255 2255  5.200657162878421
-2722 2255 -3.798595591223752
-4062 2255  0.007620531270145355
-1771 2256 -4.640420835507132
-2256 2256  5.533488946326441
-2724 2256 -3.482295420824327
-4066 2256  0.009230750663652134
-1771 2257 -4.738916599895016
-2257 2257  4.168910055156443
-2726 2257 -3.913587096844168
-4070 2257  0.009628675897545392
-1771 2258 -4.118661769723557
-2258 2258  7.127509675415745
-2728 2258 -3.355021230629743
-4074 2258  0.008026467514725346
-1771 2259 -4.457141098502226
-2259 2259  5.54551145364278
-2730 2259 -3.540289155598937
-4078 2259  0.009400950653362484
-1771 2260 -3.898893283827271
-2260 2260  5.889447827116065
-2732 2260 -3.576670360769923
-4082 2260  0.009115319160917641
-877 2261 -337.8295892423019
-2261 2261  3.371992084196615
-2734 2261 -0.1365022792253678
-4086 2261  0.006183100582157816
-2145 2262 -1331.391267344377
-2262 2262  3.419875832853283
-2736 2262 -0.1226762451022061
-4090 2262  0.001982202309389708
-1772 2263 -1422.759825760848
-2263 2263  3.489728875483112
-2738 2263 -0.125616171688122
-4094 2263  0.001722020163455822
-2264 2264  6.35673096494675
-2265 2264 -1285.913026873232
-2740 2264 -0.09093864764125277
-4098 2264  0.001488490442629464
-1270 2265  9.085015566293682
-2265 2265 -1416.901510606822
-2742 2265 -0.04803578302891041
-3118 2265  0.001808133082679544
-872 2266 -374.8291340849071
-2266 2266  9.53046694058189
-2744 2266 -0.03343584586800835
-4106 2266  0.008622786701664917
-1273 2267  4.142922682425776
-2267 2267 -1495.038796569758
-2746 2267  0.09567817198065209
-3124 2267  0.001791661921895718
-886 2268 -125.4140836404946
-2268 2268  14.33869690694082
-2748 2268  0.05102946065995515
-4114 2268  0.009192577866867101
-2216 2269 -272.8671228494698
-2269 2269  8.96941517920289
-2750 2269  0.08226341234101277
-4118 2269  0.004555864744459201
-2175 2270 -1141.695109156205
-2270 2270  7.188740199011378
-2752 2270  0.1479853343471768
-4122 2270  0.0008465650095658808
-1097 2271  9.415073365629931
-2271 2271 -447.3631435643646
-2754 2271  0.0208759382523774
-3172 2271  0.0119801205028469
-1784 2272 -30.20856579477716
-2272 2272  4.497507478986273
-2756 2272 -0.2824387372225564
-4130 2272  0.02492113327360181
-1784 2273 -28.4454237197186
-2273 2273  3.634792913757748
-2758 2273 -0.364407399913329
-4134 2273  0.02142005947727858
-892 2274 -335.1570659698852
-2274 2274  3.326033848414284
-2760 2274 -0.1308230800711792
-4138 2274  0.006100055330023413
-1727 2275 -98.27206692230219
-2275 2275  15.58372198199141
-2762 2275  0.1680855058326074
-4142 2275  0.002971289788738846
-1727 2276 -218.9673369039903
-2276 2276  2.213150142494187
-2764 2276 -0.2411266255595946
-4146 2276  0.009641291447598741
-893 2277 -22.04467690843927
-2277 2277  2.683371796432915
-2766 2277 -3.811722119467089
-4150 2277  0.003455105439651116
-893 2278 -19.15705378024538
-2278 2278  4.758341632139925
-2768 2278 -4.273666966017419
-4154 2278  0.002455886901999718
-2279 2279 -30.64257011981108
-4155 2279  0.1119239523365642
-2280 2280 -15.90706277519701
-4156 2280  0.1344983767680533
-2281 2281 -39.31435081710389
-4157 2281  0.05725605428029161
-2282 2282 -53.45947004812159
-4158 2282  0.05216171918870285
-2283 2283 -12.56288032466136
-4159 2283  0.1539694825415593
-2284 2284 -13.62587967120271
-4160 2284  0.1493781664609394
-2285 2285 -14.84344573266865
-4161 2285  0.1275567881938274
-2286 2286 -17.5348042025758
-4162 2286  0.1221572123415935
-2287 2287 -65.95158912186295
-4163 2287  0.05457593896858125
-2288 2288 -38.93574330129159
-4164 2288  0.06321218681720535
-2289 2289 -46.43966016640264
-4165 2289  0.06186778636566236
-2290 2290 -37.97891276037357
-4166 2290  0.06579250414222838
-2291 2291 -32.44683207965974
-4167 2291  0.07459638214306399
-2292 2292 -39.5091491257876
-4168 2292  0.07089844289995044
-2293 2293 -31.1299517046727
-4169 2293  0.1132257544445178
-2294 2294 -18.48285697955544
-4170 2294  0.1311574879890062
-2295 2295 -13.83350837304568
-4171 2295  0.1250942117543966
-2296 2296 -20.00745980162628
-4172 2296  0.1124796744211208
-2297 2297 -22.76489333995555
-4173 2297  0.1091433013247975
-2298 2298 -23.20868313310848
-4174 2298  0.1068428344512747
-2299 2299 -16.39949055853167
-4175 2299  0.1950373653598296
-2300 2300 -8.433072185518855
-4176 2300  0.234442695629943
-2301 2301 -24.12392710831048
-4177 2301  0.09286348782294103
-2302 2302 -23.44721624588622
-4178 2302  0.09295081632350155
-2303 2303 -18.65225809968672
-4179 2303  0.1160167901159831
-2304 2304 -16.41237497028351
-4180 2304  0.1195639195240917
-2305 2305 -26.1956792336746
-4181 2305  0.08227428276048966
-2306 2306 -36.11162541606858
-4182 2306  0.07476050888274532
-2307 2307 -12.26843876024679
-4183 2307  0.2549964732720557
-2308 2308 -5.60785504010041
-4184 2308  0.3147948877353467
-2309 2309 -12.72775567793986
-4185 2309  0.1499916722253693
-2310 2310 -13.02604478229861
-4186 2310  0.1510934076781992
-2311 2311 -129.8752387098407
-4187 2311  0.02229437531027123
-2312 2312 -86.07489921163659
-4188 2312  0.02512228842644491
-2313 2313 -41.9565272197917
-4189 2313  0.05259461324228189
-2314 2314 -45.97094678900272
-4190 2314  0.05122115361898756
-2315 2315 -33.18669707365662
-4191 2315  0.06314786189660278
-2316 2316 -31.12915756520504
-4192 2316  0.06454617756571765
-2317 2317 -142.3054925634301
-4193 2317  0.02451291342972035
-2318 2318 -68.22390452124144
-4194 2318  0.03023588270107117
-2319 2319 -53.6144848893498
-4195 2319  0.06515053384464752
-2320 2320 -25.16934902030714
-4196 2320  0.08036398379489164
-2321 2321 -16.85797529931353
-4197 2321  0.1207354096769008
-2322 2322 -22.25527628315144
-4198 2322  0.111247620215162
-2323 2323 -31.89341246694178
-4199 2323  0.0598464422176723
-2324 2324 -34.63780504233422
-4200 2324  0.05883639375784257
-2325 2325 -19.56367976914622
-4201 2325  0.1041261644708809
-2326 2326 -18.10501936478283
-4202 2326  0.1059349161469206
-2327 2327 -23.14455148007878
-4203 2327  0.1067434958432255
-2328 2328 -22.69925480783547
-4204 2328  0.107783758246107
-2329 2329 -17.70174015534158
-4205 2329  0.1318433256281249
-2330 2330 -21.45404488617399
-4206 2330  0.1023926470171234
-2331 2331 -17.63733576375392
-4207 2331  0.1174606480262585
-2332 2332 -11.7352471887321
-4208 2332  0.1370406035465139
-2333 2333 -12.13240977183652
-4209 2333  0.2598244585599962
-2334 2334 -7.139021306502358
-4210 2334  0.3011457380652403
-2335 2335 -107.2257743582197
-4211 2335  0.03529553863313488
-2336 2336 -47.10985428696654
-4212 2336  0.04440104766664882
-2337 2337 -99.89329952085217
-4213 2337  0.03807030882715409
-2338 2338 -43.849780178493
-4214 2338  0.04789165040582281
-2339 2339 -82.05099007645858
-4215 2339  0.04523501309428279
-2340 2340 -35.88141193511292
-4216 2340  0.05690513248832715
-2341 2341 -43.98879537092815
-4217 2341  0.08484703303098966
-2342 2342 -18.96707230539594
-4218 2342  0.1067434556159296
-2343 2343 -56.56046644171085
-4219 2343  0.05359243482324032
-2344 2344 -33.41745949104828
-4220 2344  0.06317838635427207
-2345 2345 -30.17627002650658
-4221 2345  0.06964684548079618
-2346 2346 -49.61405821049816
-4222 2346  0.06090938912384517
-2347 2347 -71.73978108877516
-4223 2347  0.05047735368336375
-2348 2348 -36.41609607105951
-4224 2348  0.06065143810652002
-2349 2349 -54.57250563862199
-4225 2349  0.06527171143018294
-2350 2350 -27.55492217173241
-4226 2350  0.07842905683946086
-2351 2351 -141.187307118909
-4227 2351  0.01648867989436317
-2352 2352 -139.724744622845
-4228 2352  0.01645744857919768
-2353 2353 -54.42625553131974
-4229 2353  0.04140589344375343
-2354 2354 -53.63654595730242
-4230 2354  0.04137477168029266
-2355 2355 -72.83364223501292
-4231 2355  0.05024271505377079
-2356 2356 -43.25696538963752
-4232 2356  0.05819292819446779
-2357 2357 -27.39216973415159
-4233 2357  0.08644731178445782
-2358 2358 -27.18217755332223
-4234 2358  0.0881774354285155
-2359 2359 -29.56194702569874
-4235 2359  0.07854956718479483
-2360 2360 -29.09591654321273
-4236 2360  0.08088629591679292
-2361 2361 -49.02162975759376
-4237 2361  0.06196352242558906
-2362 2362 -24.45068261782124
-4238 2362  0.07445393853436093
-2363 2363 -89.42768025930282
-4239 2363  0.02227190700963811
-2364 2364 -93.18051438131391
-4240 2364  0.02210796656752718
-2365 2365 -67.3680095180724
-4241 2365  0.03053677007493292
-2366 2366 -69.2391511776303
-4242 2366  0.03055031791921223
-2367 2367 -9.491059373455291
-4243 2367  0.3227402014045704
-2368 2368 -5.581731936811549
-4244 2368  0.3742462296885684
-2369 2369 -12.98416731930361
-4245 2369  0.1495189977985396
-2370 2370 -16.37502476196387
-4246 2370  0.1396576915609483
-2371 2371 -24.74624389281993
-4247 2371  0.0959600196445011
-2372 2372 -29.26099129371331
-4248 2372  0.09160013610325606
-2373 2373 -116.5520245666077
-4249 2373  0.03238831682088406
-2374 2374 -60.67747015800169
-4250 2374  0.03891597628974513
-2375 2375 -35.17526953063072
-4251 2375  0.1040498288515364
-2376 2376 -17.85544888919775
-4252 2376  0.1250335344962033
-2377 2377 -33.99037882808334
-4253 2377  0.07354424421148734
-2378 2378 -29.8305087797851
-4254 2378  0.07573419554041499
-2379 2379 -71.94298278001283
-4255 2379  0.03740432360218568
-2380 2380 -61.46922915998202
-4256 2380  0.03950027041496559
-2381 2381 -18.66777584361901
-4257 2381  0.1731743135770299
-2382 2382 -11.00044392597633
-4258 2382  0.2006420120224123
-2383 2383 -17.48331613863921
-4259 2383  0.123989508586981
-2384 2384 -17.19494524360543
-4260 2384  0.1249604166473383
-2385 2385 -30.70003257734891
-4261 2385  0.07784203640661237
-2386 2386 -36.85462203202088
-4262 2386  0.07330592731963612
-2387 2387 -47.65157494137991
-4263 2387  0.08028777006742899
-2388 2388 -25.01333265604174
-4264 2388  0.0964743360402416
-2389 2389 -15.75639783117426
-4265 2389  0.2190258722049764
-2390 2390 -8.087445797244316
-4266 2390  0.2633272411692072
-2391 2391 -36.17015837528741
-4267 2391  0.06957909845609414
-2392 2392 -33.28326480640938
-4268 2392  0.07030012238977493
-2393 2393 -26.11298173680401
-4269 2393  0.09973395782747847
-2394 2394 -22.92312256359989
-4270 2394  0.1031681699564553
-2395 2395 -56.97774952449042
-4271 2395  0.05962657655380851
-2396 2396 -33.52104777869483
-4272 2396  0.06906218356084276
-2397 2397 -52.94756993572931
-4273 2397  0.04571341958096362
-2398 2398 -64.26911966105814
-4274 2398  0.04323898123382816
-2399 2399 -26.98015626399983
-4275 2399  0.08386625845421411
-2400 2400 -27.22391958877084
-4276 2400  0.08378225446442003
-2401 2401 -19.76131076987808
-4277 2401  0.1122242528985911
-2402 2402 -16.01743515162783
-4278 2402  0.1188666281484007
-2403 2403 -19.43122435760818
-4279 2403  0.1045633964079625
-2404 2404 -12.23386114811299
-4280 2404  0.1190321852045049
-2405 2405 -73.10925705238337
-4281 2405  0.03275918527410111
-2406 2406 -64.22433096257478
-4282 2406  0.03290823451403464
-2407 2407 -42.25947272070731
-4283 2407  0.05982483436476562
-2408 2408 -42.5796885142234
-4284 2408  0.0599994371975387
-2409 2409 -17.39127207242933
-4285 2409  0.1010965789427608
-2410 2410 -25.68769827203282
-4286 2410  0.08945263200161861
-2411 2411 -26.68291634429617
-4287 2411  0.09794072646952565
-2412 2412 -27.21898062309888
-4288 2412  0.09609845781539586
-2413 2413 -25.76998281146062
-4289 2413  0.09478081423298597
-2414 2414 -24.86767301361041
-4290 2414  0.09682007347625202
-2415 2415 -58.10296868779811
-4291 2415  0.04439899649821998
-2416 2416 -57.88228387547031
-4292 2416  0.04432167070785589
-2417 2417 -22.81351654840187
-4293 2417  0.09795233791467686
-2418 2418 -20.38105920054591
-4294 2418  0.1009310156598904
-2419 2419 -12.73795579528641
-4295 2419  0.1494082769805102
-2420 2420 -14.69078715887743
-4296 2420  0.1477107284697679
-2421 2421 -14.44408841920709
-4297 2421  0.1347570443534146
-2422 2422 -16.85321717357648
-4298 2422  0.1301057826785725
-2423 2423 -9.712758796207535
-4299 2423  0.2109922133663125
-2424 2424 -9.389773414469078
-4300 2424  0.2216757858079083
-2425 2425 -25.11095667227453
-4301 2425  0.1009088790211743
-2426 2426 -20.66188603221291
-4302 2426  0.1077363422537471
-2427 2427 -27.06081236628384
-4303 2427  0.08121814441222064
-2428 2428 -27.07076287154962
-4304 2428  0.08100157620017676
-2429 2429 -34.23717637845797
-4305 2429  0.07146136906804773
-2430 2430 -28.46510147456754
-4306 2430  0.07530760103938473
-2431 2431 -30.28857007982523
-4307 2431  0.0756830656817377
-2432 2432 -15.86269138512577
-4308 2432  0.09186430414464557
-2433 2433 -77.63028018021609
-4309 2433  0.02611063867260898
-2434 2434 -41.22285965712726
-4310 2434  0.03315490509683634
-2435 2435 -75.31457172347281
-4311 2435  0.02697755251935119
-2436 2436 -39.42580673521844
-4312 2436  0.03463901097796575
-2437 2437 -66.76292516277346
-4313 2437  0.02832518569906331
-2438 2438 -35.37814580251727
-4314 2438  0.03595292820557699
-2439 2439 -66.63623857562646
-4315 2439  0.03000021100359908
-2440 2440 -34.79046888868552
-4316 2440  0.03852461104579297
-2441 2441 -70.10959107942233
-4317 2441  0.02505955443137283
-2442 2442 -42.54586097045568
-4318 2442  0.03009113666214551
-2443 2443 -14.59114960552235
-4319 2443  0.1104038318847466
-2444 2444 -25.00017435761698
-4320 2444  0.09409028096243087
-2445 2445 -17.87480776263565
-4321 2445  0.1082117185579281
-2446 2446 -11.25720746291567
-4322 2446  0.1246628009126823
-2447 2447 -65.97984348957961
-4323 2447  0.04180879219292764
-2448 2448 -41.15873081988094
-4324 2448  0.0481218504922679
-2449 2449 -20.94431514667631
-4325 2449  0.1133121138548909
-2450 2450 -15.92440412543586
-4326 2450  0.1211892095724366
-2451 2451 -14.55529250850832
-4327 2451  0.1319492973893008
-2452 2452 -15.05159484553243
-4328 2452  0.1295603694183773
-2453 2453 -15.49210742892535
-4329 2453  0.1237672779717439
-2454 2454 -16.86689236335236
-4330 2454  0.1190191866407344
-2455 2455 -16.12647392997589
-4331 2455  0.1234473604947949
-2456 2456 -18.07799191201674
-4332 2456  0.1170303315506402
-2457 2457 -87.48075773456222
-4333 2457  0.03099330087280881
-2458 2458 -63.51574285277481
-4334 2458  0.03389257841306987
-2459 2459 -52.8716550775216
-4335 2459  0.05172445215334955
-2460 2460 -43.40343426461138
-4336 2460  0.05430350869804322
-2461 2461 -26.738027357087
-4337 2461  0.09424770419016756
-2462 2462 -21.96349363409914
-4338 2462  0.09826661486038374
-2463 2463 -50.66661535986736
-4339 2463  0.0462401459381085
-2464 2464 -50.64878861975378
-4340 2464  0.04627964277352791
-2465 2465 -51.18764721528638
-4341 2465  0.05197455058635655
-2466 2466 -52.99960506788916
-4342 2466  0.0500773202512933
-2467 2467 -34.82199055582354
-4343 2467  0.0720527625538885
-2468 2468 -34.54423303070557
-4344 2468  0.07323693626014423
-2469 2469 -35.71241339576211
-4345 2469  0.06963929261539442
-2470 2470 -35.52512736400956
-4346 2470  0.06931508368711226
-2471 2471 -20.01915962930323
-4347 2471  0.09169519061656187
-2472 2472 -30.22720745240589
-4348 2472  0.08112062356808004
-2473 2473 -57.69560648218724
-4349 2473  0.04719669980974917
-2474 2474 -40.73513928545416
-4350 2474  0.05210915501845393
-2475 2475 -77.65714066116759
-4351 2475  0.0313882444475996
-2476 2476 -94.69922254722279
-4352 2476  0.02988204032213718
-2477 2477 -17.51506605655669
-4353 2477  0.1182734942483623
-2478 2478 -21.37522400115791
-4354 2478  0.1116440029982784
-2479 2479 -28.31087433954066
-4355 2479  0.07849826606941687
-2480 2480 -15.06418166640937
-4356 2480  0.09515591336471275
-2481 2481 -112.4426933973505
-4357 2481  0.02425323629999322
-2482 2482 -114.7794462571026
-4358 2482  0.02374202363688017
-2483 2483 -1058.66302267636
-4359 2483  0.003336217141762166
-2484 2484 -20.57374819150079
-4360 2484  0.1002942709937806
-2485 2485 -14.42988051776977
-4361 2485  0.1045980986716633
-2486 2486 -7.306137981052756
-4362 2486  0.1450248547856023
-2487 2487 -19.0892306599637
-4363 2487  0.07893767286330232
-2488 2488 -9.897416463906245
-4364 2488  0.1110355041530463
-2489 2489 -14.15628764799962
-4365 2489  0.1056165886407372
-2490 2490 -7.445942181911135
-4366 2490  0.1469110448440436
-2491 2491 -17.88569115961042
-4367 2491  0.0844548826318236
-2492 2492 -9.352570774505041
-4368 2492  0.1179620616930775
-2493 2493 -12.90605807444327
-4369 2493  0.1163895966138301
-2494 2494 -6.780155483318999
-4370 2494  0.1619022871289025
-2495 2495 -12.84963493329398
-4371 2495  0.1179126051390297
-2496 2496 -6.754664202360627
-4372 2496  0.163943606815921
-2497 2497 -17.05542913502588
-4373 2497  0.09281544971650256
-2498 2498 -8.949835317480224
-4374 2498  0.1291673121601658
-2499 2499 -13.22126092554109
-4375 2499  0.1091027386322047
-2500 2500 -17.94565770436443
-4376 2500  0.09868634510820998
-2501 2501 -88.03377071956065
-4377 2501  0.03138171724828276
-2502 2502 -88.64391292782304
-4378 2502  0.03107201824492463
-2503 2503 -246.2252886607093
-4379 2503  0.007046922610030895
-2504 2504 -109.1007957172354
-4380 2504  0.01101660630146964
-2505 2505 -142.1356981120592
-4381 2505  0.01058157400013313
-2506 2506 -91.07653645375166
-4382 2506  0.01148750825962296
-2507 2507 -290.0185142498619
-4383 2507  0.006443500536982267
-2508 2508 -83.51485409842199
-4384 2508  0.01548335127494408
-2509 2509 -311.0119847471854
-4385 2509  0.00597458391786553
-2510 2510 -95.15381296120231
-4386 2510  0.01356286245237532
-2511 2511 -26.58818103268005
-4387 2511  0.08067632449851583
-2512 2512 -15.77787777432359
-4388 2512  0.09279367829967604
-2513 2513 -28.09789857924424
-4389 2513  0.05352478163276006
-2514 2514 -51.51652538609446
-4390 2514  0.04290798273430511
-2515 2515 -41.49391694494913
-4391 2515  0.06249995882762888
-2516 2516 -27.01589354302817
-4392 2516  0.07116953203564343
-2517 2517 -38.65350036810663
-4393 2517  0.0629850732263631
-2518 2518 -39.56708513242784
-4394 2518  0.06201074516854403
-2519 2519 -18.86848979808568
-4395 2519  0.1388172847349383
-2520 2520 -16.18698208267388
-4396 2520  0.1387155232393041
-2521 2521 -60.01674254260863
-4397 2521  0.04087687055194109
-2522 2522 -59.89391025874022
-4398 2522  0.0408158184845995
-2523 2523 -14.2607056227518
-4399 2523  0.16752597842276
-2524 2524 -9.060706229208979
-4400 2524  0.190051566204261
-2525 2525 -16.28650106020694
-4401 2525  0.1386676867121169
-2526 2526 -16.75458882036434
-4402 2526  0.1357012594441545
-2527 2527 -16.27270670749873
-4403 2527  0.1392440681462813
-2528 2528 -15.92119246330756
-4404 2528  0.1406647778145255
-2529 2529 -28.21287469737472
-4405 2529  0.09441858750058517
-2530 2530 -21.15680116671571
-4406 2530  0.1006087923147142
-2531 2531 -43.60043341376043
-4407 2531  0.06143761520357147
-2532 2532 -43.93633027014358
-4408 2532  0.06133403882792478
-2533 2533 -49.07486674953635
-4409 2533  0.04699650969640473
-2534 2534 -43.91999193831722
-4410 2534  0.04890819174360618
-2535 2535 -67.19582273963222
-4411 2535  0.04211376285307712
-2536 2536 -47.86048355346968
-4412 2536  0.04615381541177109
-2537 2537 -25.60045733925898
-4413 2537  0.08601398163729794
-2538 2538 -22.11505592822052
-4414 2538  0.09020858091390621
-2539 2539 -17.94022473260717
-4415 2539  0.1174371003592423
-2540 2540 -25.43801423175152
-4416 2540  0.1054437328415359
-2541 2541 -10.89812561676288
-4417 2541  0.1422836968177371
-2542 2542 -18.98202374381732
-4418 2542  0.1201353281558331
-2543 2543 -18.02447912466057
-4419 2543  0.1106512982331891
-2544 2544 -18.03362624973672
-4420 2544  0.1111624363988149
-2545 2545 -21.9629176261085
-4421 2545  0.1157619489239656
-2546 2546 -15.83184549062703
-4422 2546  0.1272888309498838
-2547 2547 -27.27913987608121
-4423 2547  0.07736596157094124
-2548 2548 -31.03660494689432
-4424 2548  0.07480348354180001
-2549 2549 -137.3561110785759
-4425 2549  0.01358184067264488
-2550 2550 -34.52648286375157
-4426 2550  0.03737066957094008
-2551 2551 -273.003320275044
-4427 2551  0.008364473049625471
-2552 2552 -63.02258634203405
-4428 2552  0.02403448844219899
-2553 2553 -270.3252397919941
-4429 2553  0.008639804623062942
-2554 2554 -58.78525414828648
-4430 2554  0.02625639288774997
-2555 2555 -14.91535393188155
-4431 2555  0.1161527301577223
-2556 2556 -22.95107272924469
-4432 2556  0.1020323879042606
-2557 2557 -8.882219892636956
-4433 2557  0.2014975528639363
-2558 2558 -8.38327245565061
-4434 2558  0.1990940038546843
-2559 2559 -19.7641779034735
-4435 2559  0.08443225770800758
-2560 2560 -13.85918199986529
-4436 2560  0.1060523235832099
-2561 2561 -29.84297186502756
-4437 2561  0.0646071730806282
-2562 2562 -41.37136670068732
-4438 2562  0.05882715775282683
-2563 2563 -24.74678079539619
-4439 2563  0.08826574845857212
-2564 2564 -25.30084569767662
-4440 2564  0.08591479678730043
-2565 2565 -20.16790931364713
-4441 2565  0.1044415334241497
-2566 2566 -25.56596459955379
-4442 2566  0.09489523920199339
-2567 2567 -29.25612917202748
-4443 2567  0.07681065950214608
-2568 2568 -25.80465236855202
-4444 2568  0.08001928892924734
-2569 2569 -59.28218084687754
-4445 2569  0.04278093838414154
-2570 2570 -59.00633582364991
-4446 2570  0.04268963280680383
-2571 2571 -18.95360574513186
-4447 2571  0.1336334529360571
-2572 2572 -19.27413566638218
-4448 2572  0.1312746239509822
-2573 2573 -23.32199475871141
-4449 2573  0.1095862115528146
-2574 2574 -13.51862385101003
-4450 2574  0.1284114154007702
-2575 2575 -19.10980418333428
-4451 2575  0.130697379255608
-2576 2576 -18.85936884549872
-4452 2576  0.1298008120879252
-2577 2577 -23.31393913523488
-4453 2577  0.08185794921773322
-2578 2578 -30.79418778750476
-4454 2578  0.07247955898383203
-2579 2579 -16.9883469839645
-4455 2579  0.1099319749948935
-2580 2580 -21.187092922521
-4456 2580  0.1047819410896658
-2581 2581 -26.29693971768204
-4457 2581  0.07632832587451829
-2582 2582 -38.95148117385504
-4458 2582  0.06826513546331373
-2583 2583 -52.08330111375977
-4459 2583  0.05548920412162031
-2584 2584 -33.84723329190945
-4460 2584  0.06242960062044907
-2585 2585 -43.32944578653592
-4461 2585  0.04632685940212
-2586 2586 -28.83073740675175
-4462 2586  0.07312501103900995
-2587 2587 -29.85090627723714
-4463 2587  0.07032430974327428
-2588 2588 -33.11942355235149
-4464 2588  0.06854366154651186
-2589 2589 -28.31376341779287
-4465 2589  0.07592649786773957
-2590 2590 -24.52008198179582
-4466 2590  0.08048544405713381
-2591 2591 -707.7212790741364
-4467 2591  0.004938393551872891
-2592 2592 -8.884240395089975
-4468 2592  0.1827766081729927
-2593 2593 -1074.723247601268
-4469 2593  0.003511947818170114
-2594 2594 -13.01900306964629
-4470 2594  0.1360779090415198
-2595 2595 -13.60038854703725
-4471 2595  0.1452791181060122
-2596 2596 -16.14500136718066
-4472 2596  0.1289123528531947
-2597 2597 -23.41697572970584
-4473 2597  0.08873046260064339
-2598 2598 -36.39820196128922
-4474 2598  0.07358879924577265
-2599 2599 -128.6511580023181
-4475 2599  0.02249697437083394
-2600 2600 -45.41658912759856
-4476 2600  0.06015545646416934
-2601 2601 -19.71178008061172
-4477 2601  0.1149408597814195
-2602 2602 -17.36290067611262
-4478 2602  0.1197299143217263
-2603 2603 -16.65598169709429
-4479 2603  0.1329399661696483
-2604 2604 -20.42677738021681
-4480 2604  0.1268622654461261
-2605 2605 -33.48699900983425
-4481 2605  0.07083316622215098
-2606 2606 -33.89059592051035
-4482 2606  0.07092249337821212
-2607 2607 -58.01692328951272
-4483 2607  0.04313215890526797
-2608 2608 -57.58794486792134
-4484 2608  0.0430707185925129
-2609 2609 -40.94469844121115
-4485 2609  0.05828891131718438
-2610 2610 -52.06987938902305
-4486 2610  0.05207878693856764
-2611 2611 -33.06151549792124
-4487 2611  0.08502397913127217
-2612 2612 -20.45953083972572
-4488 2612  0.09762487990214475
-2613 2613 -17.17816150141144
-4489 2613  0.08664546140173429
-2614 2614 -33.21629444772113
-4490 2614  0.07206662686403367
-2615 2615 -44.93527974087805
-4491 2615  0.06021960728449145
-2616 2616 -44.99723518348887
-4492 2616  0.06014048126963593
-2617 2617 -15.0104256461242
-4493 2617  0.1400842568053372
-2618 2618 -13.86141918235845
-4494 2618  0.1407784681908091
-2619 2619 -663.9661521913164
-4495 2619  0.004590118292260531
-2620 2620 -49.90751676561297
-4496 2620  0.03278132812471792
-2621 2621 -1427.055298280313
-4497 2621  0.002578629170411114
-2622 2622 -39.91317791668997
-4498 2622  0.04725418204370446
-2623 2623 -30.30829080397546
-4499 2623  0.06943073957133265
-2624 2624 -29.90431021967977
-4500 2624  0.06940166048736073
-2625 2625 -13.47525275108809
-4501 2625  0.1507981659667463
-2626 2626 -15.12006951497744
-4502 2626  0.1445519709377717
-2627 2627 -13.56000062513
-4503 2627  0.1474714715736538
-2628 2628 -20.92338281670835
-4504 2628  0.1250083610670657
-2629 2629 -11.29250264561315
-4505 2629  0.1540455285343737
-2630 2630 -19.82430901439865
-4506 2630  0.1316779216956888
-2631 2631 -41.79788680020079
-4507 2631  0.04887673082053536
-2632 2632 -58.51214685233085
-4508 2632  0.04405790634343706
-2633 2633 -20.04263715626028
-4509 2633  0.09783075276190294
-2634 2634 -28.57642216065699
-4510 2634  0.08758746566650595
-2635 2635 -21.49012601492517
-4511 2635  0.1127304462248533
-2636 2636 -17.88198588321188
-4512 2636  0.1198306861816738
-2637 2637 -18.25413748051501
-4513 2637  0.1179404500054688
-2638 2638 -14.48573493618683
-4514 2638  0.1276826221232589
-2639 2639 -44.0223952034774
-4515 2639  0.04925506294141044
-2640 2640 -52.48104620642393
-4516 2640  0.04668295011547591
-2641 2641 -14.92342134195389
-4517 2641  0.1449032831622934
-2642 2642 -13.27446957183143
-4518 2642  0.1500180902974244
-2643 2643 -18.49427815385324
-4519 2643  0.1156917693131826
-2644 2644 -26.21942302703896
-4520 2644  0.08878444770835722
-2645 2645 -7.39806028426747
-4521 2645  0.2660666859811378
-2646 2646 -7.707636052561853
-4522 2646  0.2523400427959562
-2647 2647 -8.939428394535847
-4523 2647  0.2062107927551356
-2648 2648 -8.642130280788454
-4524 2648  0.1973401445684108
-2649 2649 -14.50623659110181
-4525 2649  0.1168486283772575
-2650 2650 -8.772955710261652
-4526 2650  0.1583170028860684
-2651 2651 -11.53775096265082
-4527 2651  0.1436319844485981
-2652 2652 -6.502242278141505
-4528 2652  0.2063030559582233
-2653 2653 -8.679115879554525
-4529 2653  0.1986493661779628
-2654 2654 -5.239025768901889
-4530 2654  0.268512450670337
-2655 2655 -21.47326559208629
-4531 2655  0.08198335751273234
-2656 2656 -21.61383762226636
-4532 2656  0.0802614495631593
-2657 2657 -49.96777929661594
-4533 2657  0.04473833858653953
-2658 2658 -36.67569230448201
-4534 2658  0.04934649227872699
-2659 2659 -11.88600697816776
-4535 2659  0.1622656440695301
-2660 2660 -10.90493013046683
-4536 2660  0.1644914901176733
-2661 2661 -13.12946138104847
-4537 2661  0.1971617765865387
-2662 2662 -8.410110269164797
-4538 2662  0.2246878379541619
-2663 2663 -12.29662737304278
-4539 2663  0.1653528402871484
-2664 2664 -10.10870496831491
-4540 2664  0.1808539471990467
-2665 2665 -39.02857243546901
-4541 2665  0.06547653177229835
-2666 2666 -33.70459416258378
-4542 2666  0.07668646750391619
-2667 2667 -11.53800954208228
-4543 2667  0.1656395711946458
-2668 2668 -10.30563379190454
-4544 2668  0.1681816235944119
-2669 2669 -22.73592311892862
-4545 2669  0.08371453213203854
-2670 2670 -13.20295306002752
-4546 2670  0.1192293447602497
-2671 2671 -19.99502311700704
-4547 2671  0.0949481452932414
-2672 2672 -12.1403872692989
-4548 2672  0.1305019717232
-2673 2673 -43.0743841807851
-4549 2673  0.05387167080053495
-2674 2674 -30.09884070379308
-4550 2674  0.06126472914941399
-2675 2675 -15.59699735883546
-4551 2675  0.1207157634492268
-2676 2676 -23.95216626455397
-4552 2676  0.1059158657084649
-2677 2677 -17.76723538490329
-4553 2677  0.1076828917326143
-2678 2678 -22.22333545208209
-4554 2678  0.1005416445063582
-2679 2679 -28.65423870847926
-4555 2679  0.08533876965342507
-2680 2680 -29.62058200277956
-4556 2680  0.08217527931971953
-2681 2681 -23.10572017615622
-4557 2681  0.1008750823603064
-2682 2682 -14.85325187087084
-4558 2682  0.1147271773647323
-2683 2683 -21.68107710891621
-4559 2683  0.09924864616415091
-2684 2684 -30.5213668411177
-4560 2684  0.09003795816713295
-2685 2685 -20.55827331727173
-4561 2685  0.1061597115028363
-2686 2686 -17.63060893722474
-4562 2686  0.1112383961276221
-2687 2687 -25.61127524716377
-4563 2687  0.09414695009113042
-2688 2688 -25.88133772095298
-4564 2688  0.09312878048140187
-2689 2689 -40.2130645920106
-4565 2689  0.06503883056206027
-2690 2690 -40.5384810644213
-4566 2690  0.06520091326552124
-2691 2691 -45.4076676012495
-4567 2691  0.05438011932993274
-2692 2692 -45.32463217142699
-4568 2692  0.05424499968998244
-2693 2693 -741.2098311583648
-4569 2693  0.004642257136293556
-2694 2694 -12.77174729620695
-4570 2694  0.1561461065838448
-2695 2695 -25.52026560286867
-4571 2695  0.09110300396209042
-2696 2696 -25.19976764447012
-4572 2696  0.08985858246964482
-2697 2697 -13.36559398752397
-4573 2697  0.1369403680143674
-2698 2698 -10.93453152881607
-4574 2698  0.1443467202436076
-2699 2699 -10.33116067419111
-4575 2699  0.1444965096904146
-2700 2700 -5.232280705035966
-4576 2700  0.2001302970724999
-2701 2701 -12.42379991806771
-4577 2701  0.1169753107410663
-2702 2702 -6.810968169618136
-4578 2702  0.1692496580266484
-2703 2703 -13.59626784548458
-4579 2703  0.1070580624084518
-2704 2704 -7.821157425305699
-4580 2704  0.1489024263563629
-2705 2705 -10.76106835441625
-4581 2705  0.1389339606569908
-2706 2706 -6.232406029477263
-4582 2706  0.1923766534637027
-2707 2707 -10.60859259802773
-4583 2707  0.1408475375569699
-2708 2708 -6.159803642957836
-4584 2708  0.1946165869364156
-2709 2709 -29.83696730878977
-4585 2709  0.08143798523629654
-2710 2710 -21.66867981200853
-4586 2710  0.08990418420725502
-2711 2711 -15.45716843571818
-4587 2711  0.1409949336938461
-2712 2712 -13.42102810671431
-4588 2712  0.1482421057059079
-2713 2713 -36.06244598983877
-4589 2713  0.07108280634794585
-2714 2714 -30.36544494899079
-4590 2714  0.07302488343650103
-2715 2715 -415.0261506058654
-4591 2715  0.005220165246689828
-2716 2716 -107.7628213452974
-4592 2716  0.0177286273434418
-2717 2717 -17.08211902970442
-4593 2717  0.1103339245794259
-2718 2718 -14.32959261684598
-4594 2718  0.1169847200870831
-2719 2719 -297.1502631714062
-4595 2719  0.008577244581252723
-2720 2720 -46.29589079798393
-4596 2720  0.03050852001168283
-2721 2721 -274.346995221428
-4597 2721  0.007474666263219379
-2722 2722 -41.62150383812094
-4598 2722  0.0299510636579375
-2723 2723 -269.482318837368
-4599 2723  0.008518265423418114
-2724 2724 -40.36292797655138
-4600 2724  0.03145582454813847
-2725 2725 -262.4871457333651
-4601 2725  0.009163617744673737
-2726 2726 -43.50730408335345
-4602 2726  0.03066175704966475
-2727 2727 -273.5041186801383
-4603 2727  0.008500565636813628
-2728 2728 -40.20028642953007
-4604 2728  0.03197867846257434
-2729 2729 -261.5760375061137
-4605 2729  0.008721338342122487
-2730 2730 -39.17187343467762
-4606 2730  0.03218983559492441
-2731 2731 -248.2417586977549
-4607 2731  0.009416820658209826
-2732 2732 -37.08017309753349
-4608 2732  0.03478080575779452
-2733 2733 -8.502233418711862
-4609 2733  0.1630576386497255
-2734 2734 -14.15927832194173
-4610 2734  0.1397777235897797
-2735 2735 -33.36501978941855
-4611 2735  0.07450425080229808
-2736 2736 -32.85653123541292
-4612 2736  0.07431869114522616
-2737 2737 -17.10162207087117
-4613 2737  0.1026547557561215
-2738 2738 -26.32253245363182
-4614 2738  0.09073387960505001
-2739 2739 -65.3094506908532
-4615 2739  0.03251041922797052
-2740 2740 -92.13433902811157
-4616 2740  0.02963281885079689
-2741 2741 -58.09827276913884
-4617 2741  0.04067578976053766
-2742 2742 -71.50178838039953
-4618 2742  0.03790745656462562
-2743 2743 -51.64006323574402
-4619 2743  0.05000150737058116
-2744 2744 -49.57879692739116
-4620 2744  0.04572886699134029
-2745 2745 -38.8690834748674
-4621 2745  0.06781434452399084
-2746 2746 -38.73649388997055
-4622 2746  0.06815527408168873
-2747 2747 -30.07499672096169
-4623 2747  0.07359845373321169
-2748 2748 -26.6892215368137
-4624 2748  0.0772865214414882
-2749 2749 -19.69795854475236
-4625 2749  0.110682745736426
-2750 2750 -16.419717428569
-4626 2750  0.1291532255596348
-2751 2751 -48.1812508688813
-4627 2751  0.04269896374187671
-2752 2752 -68.5724907725667
-4628 2752  0.0390336607619539
-2753 2753 -91.97145850968209
-4629 2753  0.02315332863810588
-2754 2754 -67.81117594819983
-4630 2754  0.03413086015654149
-2755 2755 -12.03075557160653
-4631 2755  0.137818520958807
-2756 2756 -7.305149508872643
-4632 2756  0.1812916553406337
-2757 2757 -8.650252245975691
-4633 2757  0.2024915552993207
-2758 2758 -5.254056246815064
-4634 2758  0.2652537536970932
-2759 2759 -14.74425618462372
-4635 2759  0.1248354377365412
-2760 2760 -18.14750969243607
-4636 2760  0.1161671612051354
-2761 2761 -12.8677934052044
-4637 2761  0.1622679931288024
-2762 2762 -12.778764953814
-4638 2762  0.1671516006457047
-2763 2763 -9.449827385174531
-4639 2763  0.246228453637452
-2764 2764 -5.864406879133139
-4640 2764  0.3172743251153573
-2765 2765 -252.032563895863
-4641 2765  0.009473364208539475
-2766 2766 -37.38065612259518
-4642 2766  0.04601859096082538
-2767 2767 -370.6027942610422
-4643 2767  0.006024175975105252
-2768 2768 -99.30572095390851
-4644 2768  0.01699700550635434
-87 2769  0.191132249227754
-2769 2769  2.047051335767824
-88 2770  0.188455821489309
-2770 2770  2.067610228210245
-89 2771  0.1928444447454806
-2771 2771  2.048523350808737
-90 2772  0.1839664735152792
-2772 2772  2.055941963685915
-91 2773  0.1930767719574465
-2773 2773  2.03082325966889
-92 2774  0.188272051984764
-2774 2774  2.065517700723633
-99 2775  3.290947783257571e-05
-2775 2775 -0.0006120966867165351
-3176 2775 -101.6692445633731
-3179 2775 -116.617845527334
-3184 2775 -121.900978849199
-3188 2775 -122.5206702801755
-499 2776  2.969991746869057e-05
-2776 2776 -0.0006211735898036456
-3178 2776 -116.254058426669
-3181 2776 -94.70154811534771
-3186 2776 -128.7151420873748
-3190 2776 -134.6101055831586
-100 2777  0.002413822826166146
-300 2777 -0.0448953457699415
-2777 2777 -16421.91141900974
-500 2778  0.002140785332416097
-700 2778 -0.04477438386005135
-2778 2778 -18425.43685640642
-101 2779  7.669175073506544e-05
-2779 2779 -0.001427149194237635
-3192 2779 -301.3545137610665
-3196 2779 -265.5995294516822
-3199 2779 -304.5398217309754
-501 2780  6.257938167652743e-05
-701 2780 -0.001308882018352955
-2780 2780 -343.1905811392148
-3194 2780 -304.647627311909
-3198 2780 -303.6755998197619
-102 2781  0.002334975615736897
-302 2781 -0.04345125747104736
-2781 2781 -17152.4293534602
-502 2782  0.002070345414517271
-702 2782 -0.04330235585374924
-2782 2782 -19263.39666857488
-103 2783  7.862767004737615e-05
-2783 2783 -0.00146533094654159
-3204 2783 -275.381526703475
-3208 2783 -305.9428524547798
-3211 2783 -246.424319230538
-503 2784  6.598718961816696e-05
-2784 2784 -0.001380044009109575
-3206 2784 -287.8253845063738
-3210 2784 -310.99582024153
-3213 2784 -287.1737937003262
-104 2785  0.002444212485053505
-304 2785 -0.04555077314529363
-2785 2785 -15990.46775417401
-504 2786  0.002169195438705725
-704 2786 -0.04536606994919442
-2786 2786 -17954.06452194431
-105 2787  3.312742415530926e-05
-2787 2787 -0.000615931807560431
-3216 2787 -103.6735421877064
-3219 2787 -127.348789895109
-3223 2787 -121.9718452523159
-3227 2787 -116.5170618756044
-505 2788  2.796488776981597e-05
-2788 2788 -0.0005847977484658657
-3218 2788 -108.9523767055269
-3221 2788 -123.8024931489951
-3225 2788 -136.3741855526524
-3229 2788 -124.0620405337453
-106 2789  0.002446060487080032
-306 2789 -0.04547802485000561
-2789 2789 -16927.29847060978
-506 2790  0.002169609219361102
-706 2790 -0.04537018025868264
-2790 2790 -18984.56010772376
-107 2791  1.85390023051578e-05
-2791 2791 -0.0003447400910227232
-3232 2791 -45.61538669592837
-3235 2791 -53.76846535636626
-3240 2791 -64.6965009865016
-3243 2791 -71.39421959235987
-3247 2791 -60.66237248504986
-507 2792  1.542323564781866e-05
-2792 2792 -0.0003225540370318406
-3234 2792 -47.01189367725382
-3237 2792 -52.95894194050124
-3242 2792 -93.79563538032262
-3245 2792 -50.07992223279546
-3249 2792 -72.94981475825553
-108 2793  0.002547216874358393
-308 2793 -0.04736458693151107
-2793 2793 -15651.87407075314
-508 2794  0.002258865500111695
-708 2794 -0.04724019321521711
-2794 2794 -17562.01775281531
-109 2795  1.897510509495169e-05
-2795 2795 -0.0003535062191017872
-3252 2795 -69.64168157022513
-3256 2795 -55.76240657485057
-3259 2795 -48.65659892814862
-3263 2795 -54.80645079899097
-3268 2795 -57.37217754133817
-509 2796  1.584972253664808e-05
-2796 2796 -0.0003315309935129994
-3254 2796 -71.48205026841438
-3258 2796 -57.81317230630791
-3261 2796 -52.52014968113491
-3265 2796 -58.44422004546879
-3270 2796 -57.85806956991055
-110 2797  0.002159979334399842
-310 2797 -0.04024041627283683
-2797 2797 -19147.60457802069
-510 2798  0.001923496708434376
-710 2798 -0.04023406550441066
-2798 2798 -21348.38058440163
-111 2799  0.002343469592596242
-311 2799 -0.04365875661862523
-2799 2799 -16807.87112981469
-511 2800  0.002080184625715104
-711 2800 -0.04351148837855855
-2800 2800 -18855.823309453
-112 2801  6.912678178860192e-05
-2801 2801 -0.001288430244282178
-3272 2801 -242.9398363399226
-3275 2801 -314.0609448710903
-3279 2801 -417.45542835111
-512 2802  5.363623218723445e-05
-2802 2802 -0.001121798254310583
-3274 2802 -277.9291057772549
-3277 2802 -407.8543128053418
-3281 2802 -403.2701195876705
-113 2803  8.227547212772366e-05
-2803 2803 -0.001533427005586317
-3260 2803 -278.7531515404145
-3271 2803 -288.936297706273
-3284 2803 -284.6866218388363
-513 2804  6.526480001887723e-05
-2804 2804 -0.001364992385095566
-3262 2804 -281.3097209478202
-3273 2804 -403.3712790108879
-3286 2804 -277.119091147535
-114 2805  0.002514571520234397
-314 2805 -0.04686420874278824
-2805 2805 -16211.02934024309
-514 2806  0.002231758090236919
-714 2806 -0.04667596022758146
-2806 2806 -18200.17011809114
-115 2807  1.36295453496316e-05
-315 2807 -0.0002535975426531655
-2807 2807 -28.105045711462
-3288 2807 -36.67269099216028
-3292 2807 -38.41647162556927
-3296 2807 -38.48702554204165
-3300 2807 -31.51609232707513
-3307 2807 -33.11136834639819
-515 2808  1.108814355667258e-05
-715 2808 -0.0002318355913094673
-2808 2808 -38.08156604778259
-3290 2808 -36.88282535114031
-3294 2808 -38.52362543915015
-3298 2808 -38.66505399025309
-3302 2808 -31.6350585093486
-3309 2808 -30.88445273847646
-116 2809  0.002351560480566124
-316 2809 -0.04375416605988374
-2809 2809 -16956.49425532315
-516 2810  0.002084868727865555
-716 2810 -0.04359131642342973
-2810 2810 -19050.40909074716
-117 2811  0.002323346228192606
-317 2811 -0.04322919930108315
-2811 2811 -17479.24164685459
-517 2812  0.00206178424314167
-717 2812 -0.04310861514976851
-2812 2812 -19602.52751313596
-118 2813  0.00233327829315433
-318 2813 -0.04341399965950225
-2813 2813 -17627.34904124655
-518 2814  0.002069382114290299
-718 2814 -0.04326749513063304
-2814 2814 -19790.86619941803
-119 2815  0.002498796266540171
-319 2815 -0.04649354450491961
-2815 2815 -15611.76874269715
-519 2816  0.00221663957128552
-719 2816 -0.04634637060467815
-2816 2816 -17520.9066424914
-120 2817  3.27326552556781e-05
-2817 2817 -0.0006096456996633435
-3312 2817 -110.9788010398832
-3316 2817 -110.6632721880846
-3319 2817 -111.0869138886556
-3324 2817 -110.4746806445262
-520 2818  2.660882184061133e-05
-720 2818 -0.0005566159660873793
-2818 2818 -153.3239932480942
-3314 2818 -111.4646540333997
-3318 2818 -111.1745117377567
-3326 2818 -103.4708021967054
-121 2819  0.002315859228091882
-321 2819 -0.04313281610466401
-2819 2819 -17332.50264229675
-521 2820  0.002054509126465163
-721 2820 -0.04297719715336602
-2820 2820 -19459.2097238253
-122 2821  0.002331553298144831
-322 2821 -0.04342507632571436
-2821 2821 -17422.49927394747
-522 2822  0.00206825652931222
-722 2822 -0.04326473059133334
-2822 2822 -19563.40178710883
-123 2823  7.851640521308691e-05
-2823 2823 -0.001463440545769149
-3328 2823 -312.1352460882863
-3331 2823 -271.7302916287101
-3336 2823 -268.005427632543
-523 2824  6.153966619548327e-05
-723 2824 -0.001286901060450809
-2824 2824 -327.8415125728117
-3330 2824 -298.7359064250596
-3338 2824 -326.2824303807073
-124 2825  0.002317790327734316
-324 2825 -0.04320045332535793
-2825 2825 -17020.79035452148
-524 2826  0.002059372058053505
-724 2826 -0.04306501782502264
-2826 2826 -19072.9824263394
-125 2827  3.083171649862372e-05
-2827 2827 -0.0005742502993288677
-3320 2827 -106.5673296823539
-3340 2827 -123.2291839880428
-3343 2827 -104.0178710348266
-3347 2827 -120.5778323373125
-525 2828  2.43735293056645e-05
-2828 2828 -0.0005098568126613753
-3322 2828 -142.2046129288638
-3342 2828 -119.199332372214
-3345 2828 -131.6376658199634
-3349 2828 -101.2983170663943
-126 2829  0.002195503166803161
-326 2829 -0.04089185063854536
-2829 2829 -19127.32781891363
-526 2830  0.001949465632114935
-726 2830 -0.04077980858523558
-2830 2830 -21438.05305311238
-127 2831  7.861266506825708e-05
-2831 2831 -0.001462067874606355
-3352 2831 -258.9487419495387
-3356 2831 -329.9404840258471
-3359 2831 -298.6016789907157
-527 2832  6.633863604418218e-05
-2832 2832 -0.001387501933997578
-3354 2832 -272.0317439182016
-3358 2832 -336.2704669221371
-3361 2832 -340.9877642962214
-128 2833  0.002549007835364769
-328 2833 -0.04740439876751025
-2833 2833 -15783.20681029278
-528 2834  0.00226068439540678
-728 2834 -0.0472821775742908
-2834 2834 -17706.80708692556
-129 2835  3.651865383153894e-05
-2835 2835 -0.0006801292745497548
-3364 2835 -125.0209566151239
-3368 2835 -101.4697467138049
-3372 2835 -105.8065777979689
-3376 2835 -96.71494936649103
-529 2836  2.851329636555464e-05
-729 2836 -0.000595912541544476
-2836 2836 -113.9099321998116
-3366 2836 -118.5292865869793
-3370 2836 -96.75789985452708
-3378 2836 -139.5923217119656
-130 2837  0.002311132186057771
-330 2837 -0.04304284573387514
-2837 2837 -17200.90873720961
-530 2838  0.002055759561026759
-730 2838 -0.04296426793696614
-2838 2838 -19229.80704563983
-131 2839  0.002512265623002052
-331 2839 -0.04678851497708159
-2839 2839 -15372.8164617752
-531 2840  0.002230330410563787
-731 2840 -0.04661257133778172
-2840 2840 -17250.21196149158
-132 2841  7.556051646964701e-05
-2841 2841 -0.001406493806589446
-3380 2841 -264.275729646232
-3383 2841 -325.9882767001909
-3387 2841 -309.9603770265387
-532 2842  6.29130756763182e-05
-732 2842 -0.00131594329597764
-2842 2842 -350.8749049521504
-3382 2842 -273.2174821630666
-3389 2842 -349.525972853892
-133 2843  0.002461879610445951
-333 2843 -0.04582496681635583
-2843 2843 -16176.07327682596
-533 2844  0.00218424136016743
-733 2844 -0.0456871850050998
-2844 2844 -18148.71862379513
-134 2845  3.850335510777407e-05
-334 2845 -0.0007169453104404059
-2845 2845 -110.8016182928714
-3392 2845 -108.8523166266819
-3396 2845 -103.7126993255025
-3400 2845 -100.1145963710793
-534 2846  3.176046354497157e-05
-734 2846 -0.0006637811766721075
-2846 2846 -135.0509367320167
-3394 2846 -111.1708407077942
-3398 2846 -105.8746380386655
-3406 2846 -100.0765173546339
-135 2847  0.002492200397856114
-335 2847 -0.04640543643374314
-2847 2847 -15319.47843184996
-535 2848  0.002212413586388379
-735 2848 -0.04623851729751089
-2848 2848 -17188.33141996008
-136 2849  0.002590053989662534
-336 2849 -0.04822607065271291
-2849 2849 -15342.38909234029
-536 2850  0.002298717079426249
-736 2850 -0.0480417201738854
-2850 2850 -17221.86948891564
-137 2851  6.694842994227576e-05
-2851 2851 -0.001245392720896029
-3408 2851 -274.8553042800676
-3411 2851 -369.8949236118411
-3416 2851 -307.7484892352572
-537 2852  6.612709803152977e-05
-2852 2852 -0.001382881004978853
-3410 2852 -351.7384079675031
-3413 2852 -262.4471277152124
-3418 2852 -309.8208477002044
-138 2853  0.002281699642120062
-338 2853 -0.04244471017584212
-2853 2853 -17899.71932992315
-538 2854  0.002023889893301537
-738 2854 -0.04232451806232144
-2854 2854 -20083.61864669313
-139 2855  6.668642108710151e-05
-2855 2855 -0.001242146227443936
-3419 2855 -285.4713473471323
-3423 2855 -436.1033211741603
-3427 2855 -229.7060573133963
-539 2856  5.644326073895785e-05
-2856 2856 -0.001180351155434611
-3421 2856 -298.0864078722183
-3425 2856 -285.8909834401574
-3429 2856 -408.7832335639023
-140 2857  0.0002425793965424298
-340 2857 -0.004513431075080159
-2857 2857 -1455.098749317579
-3432 2857 -1242.454717536272
-540 2858  0.0002109624490132657
-740 2858 -0.004412880081124752
-2858 2858 -1582.769498051866
-3434 2858 -1326.699075158569
-141 2859  0.0002624552599551592
-341 2859 -0.004885512119259889
-2859 2859 -1347.578694003212
-3439 2859 -1242.790681293919
-541 2860  0.0002180184147619238
-741 2860 -0.004557562649114181
-2860 2860 -1439.569994942649
-3445 2860 -1474.926377146568
-142 2861  0.0002482857056241695
-342 2861 -0.004618836123996159
-2861 2861 -1306.453480190415
-3431 2861 -1269.292530967632
-542 2862  0.0002168279126185967
-742 2862 -0.00453531126448616
-2862 2862 -1388.793683741341
-3433 2862 -1375.862662839072
-143 2863  3.100849114586814e-05
-2863 2863 -0.0005784813977331862
-3452 2863 -65.91250277346103
-3456 2863 -151.3811068800854
-3460 2863 -159.9708456309545
-3463 2863 -138.6615789467266
-543 2864  2.40913445823822e-05
-2864 2864 -0.000503749724503456
-3454 2864 -70.28451441984423
-3458 2864 -174.5511038366085
-3462 2864 -163.2629558170631
-3465 2864 -157.131131008579
-144 2865  7.880071041324561e-05
-2865 2865 -0.001470189001474494
-3451 2865 -200.3447771713429
-3468 2865 -315.9024050162253
-3472 2865 -358.782009005071
-544 2866  6.434988345650326e-05
-2866 2866 -0.001345574214205537
-3453 2866 -252.2885237459685
-3470 2866 -343.8282467755249
-3474 2866 -340.8237853887593
-145 2867  0.0002319713008317471
-2867 2867 -0.004319878341018758
-3475 2867 -1359.412942527259
-3479 2867 -1420.161798638148
-545 2868  0.0001989115751638596
-745 2868 -0.004161190641349405
-2868 2868 -1553.767313963687
-3477 2868 -1483.228901833766
-146 2869  1.03968750016697e-05
-346 2869 -0.0001937112443097265
-2869 2869 -27.96163286928218
-3484 2869 -15.5948834700682
-3488 2869 -16.24247111042913
-3492 2869 -14.7892626321223
-3496 2869 -14.74604734110974
-3500 2869 -21.53155213685957
-3503 2869 -29.79361601042121
-546 2870  1.820335133825889e-05
-2870 2870 -0.0003807362332910267
-3209 2870 -71.60785037080608
-3486 2870 -8.765297393525904
-3490 2870 -9.210837529294329
-3494 2870 -8.314922294229779
-3498 2870 -8.361552482805211
-3502 2870 -10.50121760524715
-3505 2870 -80.2043307459878
-1 2871  27.83662668591554
-147 2871  0.0009765914152797697
-347 2871 -0.01819554706699653
-2871 2871 -3270.212868561243
-44 2872  9.664803062187673
-547 2872  0.001881561431239332
-747 2872 -0.03935916995322433
-2872 2872 -2032.294944522458
-2 2873  27.53589835708392
-148 2873  0.0009675053188040424
-348 2873 -0.01802624915195298
-2873 2873 -3383.287424261197
-45 2874  8.906846167844099
-548 2874  0.001907467106720621
-748 2874 -0.03990008471308273
-2874 2874 -2161.517370916221
-3 2875  28.36062562360079
-149 2875  0.0009896578163032042
-349 2875 -0.01843899618102551
-2875 2875 -3162.550200229806
-46 2876  9.821457822989728
-549 2876  0.001906610501894908
-749 2876 -0.03988317432312981
-2876 2876 -1965.171276998922
-4 2877  28.21925391958385
-150 2877  0.0009881347343625977
-350 2877 -0.01841060981384306
-2877 2877 -3145.78338095254
-47 2878  9.098592168210979
-550 2878  0.001949643189531951
-750 2878 -0.04078229766318428
-2878 2878 -2011.325553409659
-5 2879  25.95736762236096
-151 2879  0.0009219840688063665
-351 2879 -0.01717818552570617
-2879 2879 -4127.243651965615
-551 2880  0.0008441117886190319
-751 2880 -0.01766023403411908
-2880 2880  104.3552046887556
-3501 2880 -947.8080133550635
-152 2881  6.748223267049977e-05
-2881 2881 -0.001256535981144061
-3384 2881 -293.066322787933
-3476 2881 -313.6568640390976
-3507 2881 -332.4374750614756
-552 2882  5.675680462998339e-05
-2882 2882 -0.001187293521703485
-3386 2882 -312.5285740232399
-3478 2882 -336.3627989515738
-3509 2882 -358.1256606139722
-153 2883  1.096721977986664e-05
-353 2883 -0.0002044990454309726
-2883 2883 -44.51367803683263
-3455 2883 -43.12103915398581
-3512 2883 -18.01825116204381
-3519 2883 -49.71040942894664
-3523 2883 -42.92117101237346
-3527 2883 -40.28901682798929
-553 2884  8.254916480290429e-06
-753 2884 -0.0001725925911988493
-2884 2884 -59.09622972922144
-3457 2884 -50.47971099857919
-3514 2884 -19.04736438576279
-3518 2884 -39.1308766868132
-3525 2884 -43.69121511045876
-3529 2884 -45.26251731254106
-154 2885  0.0002765958107932558
-354 2885 -0.005158007404572995
-2885 2885 -1637.126658242943
-3511 2885 -837.2400970977618
-554 2886  0.0002143832494831788
-754 2886 -0.004482285199688079
-2886 2886 -2094.821178932827
-3513 2886 -946.4993922263754
-155 2887  0.0002689427432129922
-355 2887 -0.005003923514694866
-2887 2887 -1330.347020163584
-3195 2887 -1194.040564031954
-555 2888  0.0002152290952202552
-755 2888 -0.004501500167327725
-2888 2888 -1596.562954523447
-3197 2888 -1351.851190781317
-156 2889  7.678999350825993e-05
-2889 2889 -0.001430404585463677
-3399 2889 -238.4037452088409
-3540 2889 -302.0373081441935
-3544 2889 -339.884593358769
-556 2890  6.296834363466511e-05
-2890 2890 -0.00131608856808379
-3401 2890 -307.265267842773
-3542 2890 -341.0931018868103
-3546 2890 -301.9003736908056
-157 2891  0.0002662782114042295
-2891 2891 -0.004953175864922998
-3547 2891 -1177.545535152468
-3551 2891 -1310.740583779899
-557 2892  0.000202070761556368
-757 2892 -0.004225502698148476
-2892 2892 -1555.582081433006
-3553 2892 -1490.044806601116
-158 2893  0.0002479660168346599
-358 2893 -0.004613733705409966
-2893 2893 -1386.412367526018
-3555 2893 -1319.017361047579
-558 2894  0.0002137229690333978
-758 2894 -0.004470671113542863
-2894 2894 -1512.250759457661
-3557 2894 -1428.509188469133
-159 2895  0.0002585780513565103
-359 2895 -0.004822604611468701
-2895 2895 -1285.697515288127
-3563 2895 -1313.040134659249
-559 2896  0.0002072120632641939
-759 2896 -0.004332413344118552
-2896 2896 -1635.880446215101
-3469 2896 -1402.181171260665
-160 2897  0.0002133964918098159
-360 2897 -0.003969961874120492
-2897 2897 -1942.580602090081
-3572 2897 -1149.608351572851
-560 2898  0.0002442514875826953
-760 2898 -0.005108986069369431
-2898 2898 -1512.245711784602
-3570 2898 -1021.939107874584
-161 2899  0.0002015674054957784
-361 2899 -0.003753931548661254
-2899 2899 -1861.580695310057
-3575 2899 -1276.699267408946
-561 2900  0.0002451718016401516
-761 2900 -0.005128263093238606
-2900 2900 -1582.59139337238
-3581 2900 -927.7605629713167
-162 2901  8.489615229574495e-05
-2901 2901 -0.001582798352723314
-3212 2901 -273.8571240789814
-3332 2901 -306.1707000880867
-3584 2901 -241.4737700878197
-562 2902  6.341211780697017e-05
-2902 2902 -0.001326090781396694
-3214 2902 -275.9084934756087
-3334 2902 -336.8243978424858
-3586 2902 -334.71993668788
-87 2903  0.5188471304468957
-163 2903  0.004275107728950991
-363 2903 -0.07971634771599438
-2903 2903 -8640.649777085511
-563 2904  0.001094729169089652
-763 2904 -0.02289312059755941
-2904 2904 -56173.00367620957
-164 2905  4.587849448195336e-06
-364 2905 -8.544969360670532e-05
-2905 2905 -17.56919005402462
-3480 2905 -13.66247944369221
-3508 2905 -13.86164060705798
-3576 2905 -14.63977712587374
-3588 2905 -4.158339449315758
-3592 2905 -6.321546398056254
-3596 2905 -7.804363263475375
-3600 2905 -6.58912812924158
-3604 2905 -7.649837605944053
-3608 2905 -7.69701995245717
-3612 2905 -6.086553600399663
-564 2906  4.25003180262438e-06
-764 2906 -8.891414554671908e-05
-2906 2906 -16.55199492375567
-3482 2906 -16.282455472127
-3578 2906 -12.85873395711873
-3590 2906 -4.68570583775738
-3594 2906 -6.569471674738725
-3598 2906 -7.378806665030903
-3602 2906 -6.726108868932145
-3606 2906 -6.982763315138396
-3610 2906 -7.016413577398498
-3614 2906 -6.151213853839583
-3617 2906 -14.86291565563162
-165 2907  0.0006376528809592441
-365 2907 -0.01187757040352815
-2907 2907  174.0181709119047
-3587 2907 -1386.423275602331
-49 2908  87.08836569250704
-565 2908  0.0006968247424079914
-765 2908 -0.01457874205479662
-2908 2908 -1798.962366958954
-7 2909  209.3244594426036
-166 2909  0.0005334869199648178
-366 2909 -0.009936857146071412
-2909 2909 -1718.105842836117
-566 2910  0.0005043323519647758
-766 2910 -0.01055148350464198
-2910 2910  194.4659454572846
-3593 2910 -1602.154579037757
-8 2911  207.0714997893401
-167 2911  0.00050533641056909
-367 2911 -0.009412379775864633
-2911 2911 -1984.576121024297
-51 2912  196.1842421835629
-567 2912  0.0004814844967328351
-767 2912 -0.01007346783371174
-2912 2912 -1805.387244919989
-9 2913  207.1774742583954
-168 2913  0.0005289936433977529
-368 2913 -0.009853159585355355
-2913 2913 -1772.16179742598
-568 2914  0.000498175840325201
-768 2914 -0.01042267889641481
-2914 2914  192.0789529667144
-3601 2914 -1672.560616497262
-10 2915  211.1161536132544
-169 2915  0.0005052051026997433
-369 2915 -0.009409866734849158
-2915 2915 -1941.832300205852
-569 2916  0.0004883751107654518
-769 2916 -0.01021762623013111
-2916 2916  198.0844573442002
-3605 2916 -1736.681418547389
-11 2917  210.6281765088935
-170 2917  0.000504438897809328
-370 2917 -0.0093956000093805
-2917 2917 -1948.610857464787
-570 2918  0.0004874751788934755
-770 2918 -0.0101988030095205
-2918 2918  197.7267055670312
-3609 2918 -1744.38334675991
-12 2919  210.3058251130699
-171 2919  0.0005374455412895908
-371 2919 -0.01001058661641098
-2919 2919 -1660.153292834334
-571 2920  0.0005051537934849839
-771 2920 -0.01056867448637758
-2920 2920  194.7139670039287
-3613 2920 -1578.337187890964
-172 2921  0.0002525862906039789
-2921 2921 -0.004704700041595145
-3504 2921 -1338.557384720627
-3619 2921 -1160.143762481141
-572 2922  0.0001984180754674663
-772 2922 -0.004149701089090458
-2922 2922 -1618.963465864502
-3506 2922 -1351.12522936696
-173 2923  9.934430498701247e-06
-373 2923 -0.0001848018496588619
-2923 2923 -54.03709490860278
-3624 2923 -25.58784425527002
-3628 2923 -11.88075702335064
-3636 2923 -49.0029739474849
-3640 2923 -34.74349045619399
-3643 2923 -27.80845944515527
-573 2924  1.97916651121529e-05
-2924 2924 -0.0004140519179016492
-3626 2924 -9.759370161367945
-3630 2924 -3.824469467733722
-3634 2924 -22.0316225896654
-3638 2924 -19.89518259071058
-3642 2924 -93.26783624734638
-3645 2924 -135.998061396762
-13 2925  5.68042322902059
-174 2925  0.001278228660117016
-374 2925 -0.02377781766418163
-2925 2925 -8636.32260883259
-574 2926  0.0008873951493023308
-774 2926 -0.018565803370506
-2926 2926  102.7471989517859
-3625 2926 -750.1977888434145
-14 2927  7.273812219891007
-175 2927  0.001575520226438138
-375 2927 -0.02930808378764303
-2927 2927 -4186.612833750586
-575 2928  0.001156885426737085
-775 2928 -0.02420400608865609
-2928 2928  92.37138271390904
-3629 2928 -454.349588508947
-15 2929  4.409102144996014
-176 2929  0.00103799158743649
-376 2929 -0.01930888852136976
-2929 2929 -18149.83069473371
-576 2930  0.0006093535781038161
-776 2930 -0.0127487047039688
-2930 2930  190.7684806838747
-3633 2930 -937.8974042315033
-16 2931  4.550892940867493
-177 2931  0.00106627247492575
-377 2931 -0.01983496473725442
-2931 2931 -16487.63935391007
-577 2932  0.0006241449559768249
-777 2932 -0.01305815905236744
-2932 2932  188.8552198757506
-3637 2932 -889.0967633151976
-178 2933  0.000261841749692465
-2933 2933 -0.004879689278140042
-3647 2933 -1197.091400538973
-3651 2933 -1307.168528785002
-578 2934  0.0002261787363921484
-778 2934 -0.004729137568385933
-2934 2934 -1468.90019029828
-3649 2934 -1255.707173660711
-179 2935  6.472756195288161e-05
-2935 2935 -0.001206424923433159
-3515 2935 -373.0009736238315
-3656 2935 -215.9521258687181
-3659 2935 -455.3580237130959
-579 2936  5.786973344046168e-05
-2936 2936 -0.001210195935425439
-3517 2936 -410.9259659918831
-3658 2936 -214.5852170173555
-3661 2936 -459.6038371669388
-180 2937  0.0002575738198962803
-380 2937 -0.00480049537954443
-2937 2937 -1554.218213369194
-3655 2937 -1132.012822917658
-580 2938  0.0002266107658869627
-780 2938 -0.004739384227582458
-2938 2938 -1545.450519903304
-3657 2938 -1293.847311903297
-181 2939  7.772147814560617e-05
-2939 2939 -0.001447510061668577
-3403 2939 -263.4733430658865
-3667 2939 -297.8083722314928
-3671 2939 -318.3099272773828
-581 2940  6.870255487037702e-05
-2940 2940 -0.001436061405957997
-3405 2940 -256.0588790788852
-3669 2940 -367.6331396885809
-3673 2940 -298.4479790314356
-182 2941  0.0002699459866081826
-382 2941 -0.005027041570114009
-2941 2941 -1269.238362620525
-3675 2941 -1225.44512859976
-582 2942  0.0002165795532411714
-782 2942 -0.004527229250447725
-2942 2942 -1495.777176495769
-3442 2942 -1408.138756167021
-183 2943  0.000230584620055753
-383 2943 -0.004289483096781133
-2943 2943 -1620.739611914232
-3679 2943 -1341.319488664768
-583 2944  0.000236616467506408
-783 2944 -0.004947831503020766
-2944 2944 -1533.793391362161
-3414 2944 -1205.4064202328
-184 2945  3.113361531407949e-05
-2945 2945 -0.0005788988446378421
-3220 2945 -118.4060011955383
-3683 2945 -92.63800029624825
-3688 2945 -162.0141549217757
-3691 2945 -105.1073406416851
-584 2946  2.680443331181534e-05
-2946 2946 -0.0005605482647067403
-3222 2946 -117.5623798133825
-3685 2946 -153.1942449175083
-3690 2946 -114.4793517963469
-3693 2946 -106.302999456909
-185 2947  0.0002117592014781903
-385 2947 -0.003942330393462493
-2947 2947 -2303.443736143837
-3696 2947 -1156.779939313139
-585 2948  0.0001816346318888309
-785 2948 -0.003799635403101938
-2948 2948 -2603.281852933697
-3698 2948 -1222.524005166853
-186 2949  3.083953604217996e-05
-2949 2949 -0.0005740898096234878
-3695 2949 -115.8710614082713
-3703 2949 -115.6732086865693
-3708 2949 -121.4320613630403
-3711 2949 -119.2838530078724
-586 2950  2.52662353434824e-05
-2950 2950 -0.0005285298443190662
-3697 2950 -133.2986622760959
-3705 2950 -121.2535428786183
-3710 2950 -121.8957475389813
-3713 2950 -126.2373785289777
-187 2951  2.169737691949768e-05
-387 2951 -0.0004045233295404804
-2951 2951 -63.79072791858248
-3716 2951 -15.33177832110156
-3720 2951 -49.94972952086324
-3724 2951 -54.44165562919285
-3727 2951 -71.78900859503443
-587 2952  3.479200106475429e-05
-2952 2952 -0.0007273659114068365
-3718 2952 -6.24756560403434
-3722 2952 -18.89808564694528
-3726 2952 -20.62915659632829
-3729 2952 -177.6359650918144
-3734 2952 -170.023137662953
-17 2953  7.251786459051289
-188 2953  0.001643729524906235
-388 2953 -0.03064584960083465
-2953 2953 -3914.440467798176
-588 2954  0.00171173667699996
-788 2954 -0.03581192339833519
-2954 2954  43.71332794263182
-3717 2954 -457.8930633249275
-18 2955  5.370237884159537
-189 2955  0.001272395619536466
-389 2955 -0.02372239346799022
-2955 2955 -9557.969759456002
-61 2956  5.289532905230916
-589 2956  0.002448783148342478
-789 2956 -0.05120231699174218
-2956 2956 -2119.625191287763
-19 2957  5.206824237539474
-190 2957  0.001241043851549547
-390 2957 -0.02313788584603573
-2957 2957 -10416.16918230558
-62 2958  5.212039501524443
-590 2958  0.002415983069022926
-790 2958 -0.05051641835691328
-2958 2958 -2232.468832444119
-191 2959  3.437890099741047e-05
-2959 2959 -0.0006403700785130899
-3264 2959 -116.01415125886
-3344 2959 -120.2021649979784
-3736 2959 -67.64929086151163
-3739 2959 -133.1916354887076
-591 2960  3.109764001755754e-05
-2960 2960 -0.0006505150403953535
-3266 2960 -137.1554046617416
-3346 2960 -180.4753155990831
-3738 2960 -45.7592083489813
-3741 2960 -119.0962751518446
-20 2961  156.4648269651306
-192 2961  0.0005900815905843416
-392 2961 -0.01099169507116858
-2961 2961 -1904.378289787439
-63 2962  134.487235958624
-592 2962  0.0006372419289340028
-792 2962 -0.01333216968778421
-2962 2962 -1488.996464674805
-193 2963  0.0002533575753207606
-393 2963 -0.004721519459519589
-2963 2963 -1267.301926818796
-3728 2963 -1338.547491511747
-593 2964  0.0002111507457449779
-793 2964 -0.004413757630275583
-2964 2964 -1506.397431951823
-3541 2964 -1420.43303749647
-194 2965  7.547399481258875e-05
-2965 2965 -0.001408304602554578
-3744 2965 -281.744969252588
-3747 2965 -273.7709840087026
-3751 2965 -304.386875196408
-594 2966  6.036731193060422e-05
-2966 2966 -0.001262348984622675
-3746 2966 -347.1985023913132
-3749 2966 -303.9341121893127
-3753 2966 -301.6071311890342
-195 2967  0.0002452042454861101
-395 2967 -0.004561366275467525
-2967 2967 -1367.085437280008
-3556 2967 -1299.363195504282
-595 2968  0.0002097250549737993
-795 2968 -0.0043867077924628
-2968 2968 -1507.145374398769
-3558 2968 -1412.362136880344
-196 2969  0.0002328687727934642
-396 2969 -0.004331722370505195
-2969 2969 -1935.598728461542
-3760 2969 -1143.36710400461
-596 2970  0.0002081199033744698
-796 2970 -0.004353179353834643
-2970 2970 -2094.452122116551
-3762 2970 -1166.753340022242
-197 2971  0.0002595392277337602
-397 2971 -0.004827687097720198
-2971 2971 -1393.724096630088
-3759 2971 -1299.463572095045
-597 2972  0.000227180506082851
-797 2972 -0.004751796082881135
-2972 2972 -1443.035879728521
-3769 2972 -1438.401804291923
-198 2973  1.924427533902808e-05
-2973 2973 -0.000358519151350955
-3420 2973 -64.84305238357298
-3648 2973 -53.49842881762557
-3771 2973 -51.11619948659765
-3775 2973 -56.91674680233886
-3779 2973 -54.13080652643053
-598 2974  1.541997157145382e-05
-2974 2974 -0.0003224467177012544
-3422 2974 -63.00144864963377
-3650 2974 -51.36676192902856
-3773 2974 -68.68887955759087
-3777 2974 -65.35317307855226
-3781 2974 -49.99682031538818
-199 2975  1.845055124298193e-05
-2975 2975 -0.0003430998893339208
-3224 2975 -60.96288613393003
-3684 2975 -49.97345032394828
-3784 2975 -85.32588697691688
-3787 2975 -39.11433892927088
-3791 2975 -69.39344058325088
-599 2976  1.355929089197479e-05
-799 2976 -0.0002835617900106833
-2976 2976 -102.5526269566786
-3226 2976 -56.76693463695905
-3686 2976 -66.74817920758088
-3786 2976 -54.00562981105288
-3793 2976 -52.11952350302194
-200 2977  1.271450383568884e-05
-400 2977 -0.000237119492924652
-2977 2977 -55.1131720540257
-3520 2977 -59.31030352171324
-3796 2977 -29.11525814204184
-3800 2977 -16.87783507179551
-3804 2977 -29.27401382993264
-3807 2977 -45.6431060305684
-600 2978  8.800095713229208e-06
-800 2978 -0.0001839880046561397
-2978 2978 -63.98358437997059
-3461 2978 -49.91127100320831
-3798 2978 -29.28485320635267
-3802 2978 -20.84064754660397
-3806 2978 -36.23593022245935
-3809 2978 -51.84652767986045
-201 2979  3.429173053633755e-05
-2979 2979 -0.0006394846353445878
-3532 2979 -137.220649291519
-3564 2979 -119.2628629587167
-3795 2979 -94.18004615832454
-3811 2979 -95.82820923136651
-601 2980  2.42057119496486e-05
-2980 2980 -0.0005060858793490622
-3534 2980 -156.3984297145197
-3566 2980 -126.3034594719376
-3797 2980 -106.6011780216583
-3813 2980 -118.3393931588882
-88 2981  0.5765945406640466
-202 2981  0.004704415141495417
-402 2981 -0.08776236008052875
-2981 2981 -6186.640086873116
-602 2982  0.001228820363981551
-802 2982 -0.0256910783553563
-2982 2982 -43002.51911591569
-89 2983  0.5085834639940715
-203 2983  0.004200021129835351
-403 2983 -0.07834503334779185
-2983 2983 -9258.546324954847
-603 2984  0.001028630443507459
-803 2984 -0.02150584016679073
-2984 2984 -64484.86655730321
-204 2985  0.0002910577290827514
-404 2985 -0.005425436839739358
-2985 2985 -1179.473328514326
-3816 2985 -1112.363081683879
-604 2986  0.0001942101919334908
-804 2986 -0.004060328656766309
-2986 2986 -1656.528852030899
-3814 2986 -1519.09897684033
-205 2987  7.220011983011933e-05
-2987 2987 -0.00134475543576407
-3819 2987 -294.7077344824907
-3823 2987 -297.7314148715081
-3828 2987 -329.5972945308118
-605 2988  6.122274390669451e-05
-2988 2988 -0.001279590719005279
-3821 2988 -297.709055530353
-3825 2988 -331.8369949896509
-3830 2988 -355.4846212830439
-206 2989  0.000224096727052125
-406 2989 -0.004167960315020639
-2989 2989 -1403.69572640005
-3831 2989 -1319.761941130769
-606 2990  0.0002174236966432122
-806 2990 -0.004548139563342544
-2990 2990 -1547.059702964963
-3833 2990 -1131.21152477239
-207 2991  0.000290626767416135
-2991 2991 -0.005408777291702704
-3303 2991 -1111.645130144312
-3836 2991 -1246.529432769099
-607 2992  0.0002089999771376453
-807 2992 -0.004369780664630857
-2992 2992 -1753.0575043452
-3305 2992 -1314.488490707493
-208 2993  0.0002284567159346352
-408 2993 -0.004248277560160953
-2993 2993 -1694.45394947356
-3839 2993 -1333.079786610828
-608 2994  0.0002225826697345396
-808 2994 -0.004654857896170716
-2994 2994 -1526.042604103454
-3841 2994 -1387.470519821244
-209 2995  0.0002221752697164552
-409 2995 -0.004138430725600249
-2995 2995 -1643.815562542815
-3847 2995 -1277.459248023892
-609 2996  0.0002239401818748291
-2996 2996 -0.0046832586311171
-3846 2996 -1506.533916325817
-3849 2996 -1227.52274774633
-210 2997  2.138755265132116e-05
-2997 2997 -0.0003986119327895535
-3524 2997 -95.11637636344641
-3852 2997 -65.95449814158825
-3856 2997 -8.723987554335546
-3860 2997 -43.6549692815422
-3864 2997 -183.7758384048604
-610 2998  1.553974479996341e-05
-2998 2998 -0.0003248647658803822
-3526 2998 -96.50997380438551
-3854 2998 -52.15381884035941
-3858 2998 -18.93525889233717
-3862 2998 -80.38564175531182
-3866 2998 -91.28444390040092
-211 2999  3.369714148784347e-05
-2999 2999 -0.0006277791128440753
-3676 2999 -102.6354740822629
-3820 2999 -119.8659568390688
-3851 2999 -113.8659171919261
-3867 2999 -125.6929813506389
-611 3000  2.716807484438463e-05
-811 3000 -0.0005678888436367579
-3000 3000 -119.1218935338928
-3678 3000 -126.9054310671548
-3822 3000 -111.9988300228975
-3853 3000 -137.0837482046144
-90 3001  0.6653230544623372
-212 3001  0.005390424631237284
-412 3001 -0.1004752481225429
-3001 3001 -3403.375662059264
-612 3002  0.001917255257562923
-812 3002 -0.04008099176668933
-3002 3002 -14912.84496887192
-91 3003  0.4974529583688729
-213 3003  0.00413020803099228
-413 3003 -0.07698122934126954
-3003 3003 -9244.38449451425
-613 3004  0.001120828337875203
-813 3004 -0.02343137807364655
-3004 3004 -51511.86319008204
-214 3005  0.0002345475086669507
-414 3005 -0.004363396788473575
-3005 3005 -1962.678855914129
-3872 3005 -1123.060704870721
-614 3006  0.0002241843674551126
-814 3006 -0.004688747582473832
-3006 3006 -2116.689327493415
-3874 3006 -1016.038939862732
-215 3007  3.152944354031921e-05
-3007 3007 -0.0005865256311667257
-3788 3007 -88.67226391527699
-3871 3007 -103.5178172597545
-3879 3007 -193.3276841043456
-3883 3007 -102.3339494307426
-615 3008  2.762658094449223e-05
-3008 3008 -0.000577763345796909
-3790 3008 -185.2524967505786
-3873 3008 -120.7045050005255
-3881 3008 -80.20121388753263
-3885 3008 -113.9874410285411
-216 3009  7.021648346571437e-05
-3009 3009 -0.00130599598075002
-3888 3009 -305.81056344102
-3891 3009 -257.0424342781794
-3895 3009 -364.6469827985827
-616 3010  6.07496359260314e-05
-3010 3010 -0.001270615822728721
-3890 3010 -324.405753550522
-3893 3010 -296.6587145617269
-3897 3010 -347.8089509424088
-217 3011  0.0002629900627038044
-417 3011 -0.004890840474051387
-3011 3011 -1400.763427944943
-3548 3011 -1136.647709911751
-617 3012  0.0002239249658143049
-817 3012 -0.004682511701431127
-3012 3012 -1809.384404896597
-3182 3012 -1063.361714528966
-218 3013  0.0002593440111466169
-418 3013 -0.004824851841231687
-3013 3013 -1535.226081909187
-3308 3013 -1148.338107851003
-618 3014  0.000236317776079967
-818 3014 -0.004941326446914052
-3014 3014 -1513.186311996676
-3310 3014 -1239.624856043634
-219 3015  3.244177482910733e-05
-3015 3015 -0.0006055799728042041
-3276 3015 -116.1043230776163
-3900 3015 -79.95166156859864
-3903 3015 -131.7144595143791
-3908 3015 -159.5652172094772
-619 3016  2.356441270887905e-05
-819 3016 -0.0004928617663929631
-3016 3016 -67.84254400099019
-3278 3016 -164.7201130197417
-3905 3016 -145.4276641880034
-3910 3016 -193.8165634075667
-220 3017  7.768080563995795e-05
-3017 3017 -0.001449863056352223
-3471 3017 -354.9809878986568
-3743 3017 -293.7957542138436
-3899 3017 -225.3379392805973
-620 3018  6.37038810105057e-05
-3018 3018 -0.001332244617993727
-3473 3018 -335.793574863611
-3745 3018 -356.9125375460848
-3901 3018 -253.3858185062365
-221 3019  7.320703537932268e-06
-421 3019 -0.0001362209906944794
-3019 3019 -24.21142044251713
-3764 3019 -32.18487755999478
-3916 3019 -16.17329708469773
-3920 3019 -16.79039672305305
-3924 3019 -19.18063033824095
-3927 3019 -34.60754748070792
-3932 3019 -58.99846518694028
-621 3020  6.279195576451717e-06
-821 3020 -0.0001313598373684713
-3020 3020 -56.09349534828303
-3766 3020 -32.32185579454295
-3914 3020 -19.59324371202498
-3918 3020 -19.41412540093749
-3922 3020 -18.56084480886999
-3926 3020 -16.81796495425821
-3934 3020 -44.64235389450946
-222 3021  1.858157080718637e-05
-3021 3021 -0.0003457132849898687
-3183 3021 -62.98405706157925
-3236 3021 -55.21857342172993
-3355 3021 -60.13908113984031
-3911 3021 -70.31286063739829
-3935 3021 -58.90809499914865
-622 3022  1.580870884158368e-05
-3022 3022 -0.0003306921046308134
-3185 3022 -60.9315937244375
-3238 3022 -54.48808992798245
-3357 3022 -60.74879639706707
-3913 3022 -88.35690904464344
-3937 3022 -57.28389550104841
-21 3023  355.1643329947775
-223 3023  0.000414588079576942
-423 3023 -0.007716759589340684
-3023 3023 -1887.480809470994
-623 3024  0.000365824983912368
-823 3024 -0.007653672082923691
-3024 3024  373.243500456582
-3917 3024 -1808.976355162421
-22 3025  371.7856140819339
-224 3025  0.0004090754753228922
-424 3025 -0.007612824341018029
-3025 3025 -1862.905571445791
-624 3026  0.0003753799864904563
-824 3026 -0.007853582503672928
-3026 3026  423.2601696305773
-3921 3026 -1529.723973180399
-23 3027  359.0699905626743
-225 3027  0.0003948065156009082
-425 3027 -0.007347018696585064
-3027 3027 -2089.757450012758
-66 3028  332.6331657750853
-625 3028  0.0003868371757116728
-825 3028 -0.008093282368846372
-3028 3028 -1878.631462550398
-226 3029  1.605819002986071e-05
-3029 3029 -0.0002992883659006699
-3528 3029 -62.65563298356832
-3808 3029 -57.88255988036589
-3863 3029 -129.5729114917453
-3940 3029 -39.32522531591619
-3943 3029 -55.77574949647519
-626 3030  1.347322955615387e-05
-826 3030 -0.0002816718078576912
-3030 3030 -77.01686009671205
-3530 3030 -85.04968168489033
-3810 3030 -94.41087219520271
-3942 3030 -38.53172034872582
-3945 3030 -53.24994360811269
-227 3031  0.0002744085827181243
-427 3031 -0.005113009743909019
-3031 3031 -1462.759816234802
-3939 3031 -1148.685319889543
-627 3032  0.000232641750217292
-827 3032 -0.004862970717328313
-3032 3032 -1596.142391375567
-3941 3032 -1284.461857125257
-228 3033  0.000315555720187749
-428 3033 -0.005879952690476989
-3033 3033 -1177.236190688291
-3815 3033 -894.1445427962663
-628 3034  0.0001758353255510753
-828 3034 -0.003676111486126634
-3034 3034 -2616.969987642703
-3949 3034 -1083.556762405448
-229 3035  0.0002451618647443908
-3035 3035 -0.004558675471144818
-3239 3035 -1421.489364558306
-3887 3035 -1333.404649458178
-629 3036  0.0001960485134623598
-829 3036 -0.004100030848673585
-3036 3036 -1970.719501164815
-3889 3036 -1307.863076710002
-230 3037  0.0002235717635631153
-430 3037 -0.004159338793200366
-3037 3037 -1984.935281822672
-3444 3037 -1119.776904764913
-630 3038  0.0002387670275501264
-830 3038 -0.004991865811779856
-3038 3038 -1679.799948366659
-3882 3038 -1043.800098785419
-231 3039  6.355058706178921e-06
-431 3039 -0.0001182663255270315
-3039 3039 -19.26799730716725
-3875 3039 -27.2211092658337
-3928 3039 -28.87681478961062
-3952 3039 -24.40551081717152
-3956 3039 -34.66205988832636
-3960 3039 -39.08905611715512
-3964 3039 -25.73482602625847
-631 3040  5.168868543624866e-06
-831 3040 -0.0001081325281897822
-3040 3040 -42.77542700610918
-3877 3040 -23.57121050397152
-3930 3040 -44.43020643761637
-3946 3040 -18.52397709846915
-3954 3040 -20.32550666111602
-3958 3040 -31.17736956735129
-3962 3040 -30.83346999517691
-232 3041  1.761027469906141e-05
-3041 3041 -0.000327730907480755
-3559 3041 -59.68238994762429
-3892 3041 -49.74533438256115
-3951 3041 -81.51090935305866
-3967 3041 -51.28046095625373
-3971 3041 -62.49294649216096
-632 3042  1.571190151769112e-05
-3042 3042 -0.0003286879780215192
-3561 3042 -66.88919874628864
-3894 3042 -56.95477588403464
-3953 3042 -84.06953056548426
-3969 3042 -59.09372182667386
-3973 3042 -44.87012523624564
-24 3043  301.804028950772
-233 3043  0.0003339577621497702
-433 3043 -0.006215092193261671
-3043 3043 -3724.14417902935
-633 3044  0.000264159408233615
-833 3044 -0.005526660401051663
-3044 3044  634.1949520085401
-3957 3044 -2254.257229613936
-25 3045  293.7476901038987
-234 3045  0.0003263861015757376
-434 3045 -0.00607418045573903
-3045 3045 -4064.969791447917
-68 3046  629.0610030966138
-634 3046  0.0002540803250229119
-834 3046 -0.005315791513777863
-3046 3046 -2504.604832410719
-235 3047  0.000261318627407984
-435 3047 -0.004863454220574115
-3047 3047 -1353.938445456118
-3200 3047 -1328.059271078283
-635 3048  0.000215751906695563
-835 3048 -0.004512686485115116
-3048 3048 -1528.913791925641
-3202 3048 -1507.095428153633
-236 3049  0.0002593250911130165
-436 3049 -0.00483017161704053
-3049 3049 -1470.147478176272
-3975 3049 -1119.511295530479
-636 3050  0.0002173382328317587
-836 3050 -0.004545857556180593
-3050 3050 -1660.641795945935
-3977 3050 -1236.100717223117
-237 3051  0.0002680207291845583
-437 3051 -0.004986977865485246
-3051 3051 -1335.596838688865
-3976 3051 -1142.59645115147
-637 3052  0.0002149933824167279
-837 3052 -0.004495843450029012
-3052 3052 -1583.280905696588
-3978 3052 -1303.604343599536
-238 3053  0.0002673059275966173
-438 3053 -0.004972539521710323
-3053 3053 -1297.830937438026
-3968 3053 -1286.699352558668
-638 3054  0.0002247913073730503
-838 3054 -0.004701903383526127
-3054 3054 -1446.166309523558
-3970 3054 -1428.515318978645
-239 3055  3.086294218884017e-05
-3055 3055 -0.0005739031613858375
-3244 3055 -130.8728852516794
-3792 3055 -127.4445013018118
-3983 3055 -94.94610178907207
-3987 3055 -120.1015515613789
-639 3056  2.871344757710019e-05
-3056 3056 -0.0006004851677575757
-3246 3056 -105.6218717573673
-3794 3056 -128.594774536177
-3985 3056 -109.9588006974711
-3989 3056 -129.8071459044353
-240 3057  0.0002328109278869417
-440 3057 -0.004328790316093996
-3057 3057 -1792.17746207041
-3768 3057 -1209.039048347723
-640 3058  0.0002236223011459347
-840 3058 -0.004676543895854219
-3058 3058 -1452.412581790726
-3770 3058 -1436.006295719475
-241 3059  0.0002641904594502731
-441 3059 -0.004925256200277052
-3059 3059 -1319.134352142724
-3748 3059 -1197.283781547747
-641 3060  0.0002168929556922801
-841 3060 -0.004534651717942856
-3060 3060 -1480.003424652446
-3654 3060 -1384.758308839667
-242 3061  0.0002683021695836888
-442 3061 -0.00499437409528073
-3061 3061 -1545.590542610641
-3948 3061 -1020.895360476986
-642 3062  0.000214433397151115
-842 3062 -0.004483357193906658
-3062 3062 -1350.897822356165
-3994 3062 -1591.552719889067
-243 3063  0.0002456464353693151
-443 3063 -0.004571499010522023
-3063 3063 -1281.197517233999
-4000 3063 -1425.924800186832
-643 3064  0.0002118857100087749
-843 3064 -0.004431863029243509
-3064 3064 -1561.56620828576
-3997 3064 -1380.279131732715
-244 3065  8.679696529177102e-05
-3065 3065 -0.001617281983965069
-3668 3065 -342.1330151030756
-4004 3065 -171.6721801828181
-4007 3065 -319.687894463376
-644 3066  6.668929891919444e-05
-3066 3066 -0.001393861160763459
-3670 3066 -354.5380781683582
-4006 3066 -238.5976480186172
-4009 3066 -323.4191825769936
-92 3067  0.5674518919590757
-245 3067  0.004650627025322416
-445 3067 -0.08668664125278241
-3067 3067 -6333.352783592504
-645 3068  0.001216300194894553
-845 3068 -0.02542108586614161
-3068 3068 -43193.72331192907
-246 3069  0.0002662216516774879
-446 3069 -0.004948899205940827
-3069 3069 -1245.08530407766
-3228 3069 -1401.226384116477
-646 3070  0.0002291693448236669
-846 3070 -0.00479207219007637
-3070 3070 -1547.903118801705
-3986 3070 -1333.656774685993
-247 3071  7.51138059276294e-06
-447 3071 -0.0001398607764914253
-3071 3071 -31.29687216661689
-3699 3071 -45.05797616799539
-4012 3071 -31.13094626304603
-4016 3071 -3.550926574038505
-4020 3071 -13.48124907748484
-4024 3071 -13.95763763671764
-4028 3071 -14.40960272507699
-4032 3071 -14.58499342652336
-647 3072  6.287606542843668e-06
-847 3072 -0.0001315370479632399
-3072 3072 -33.81724303530061
-3701 3072 -48.83113539159432
-4014 3072 -32.29231243943666
-4018 3072 -4.08462782159169
-4022 3072 -13.92596672443947
-4026 3072 -14.22224897443281
-4030 3072 -12.94220849005653
-4034 3072 -12.95883236672648
-248 3073  3.004103046734714e-05
-3073 3073 -0.0005593298618830121
-3704 3073 -113.7824108068532
-4011 3073 -156.9536240366783
-4036 3073 -105.446862001134
-4040 3073 -106.4028375448804
-648 3074  2.413112609046681e-05
-3074 3074 -0.0005048145130488554
-3706 3074 -114.9637094411854
-4013 3074 -182.1552338439317
-4038 3074 -103.0305453695494
-4042 3074 -123.3658773366547
-26 3075  48.14115697103193
-249 3075  0.001207340810307569
-449 3075 -0.02248546792889855
-3075 3075 -1386.841317910654
-69 3076  93.45345366175701
-649 3076  0.000863574471068328
-849 3076 -0.01806741737098674
-3076 3076 -1021.427694239394
-27 3077  298.8536823264385
-250 3077  0.0004614895303912071
-450 3077 -0.008593508936690492
-3077 3077 -1652.528923853896
-650 3078  0.0004260522383241782
-850 3078 -0.008913735735757969
-3078 3078  332.535475221442
-4021 3078 -1356.306668125053
-28 3079  280.2402751866119
-251 3079  0.0004630428688442491
-451 3079 -0.008622952094502315
-3079 3079 -1759.776539672839
-651 3080  0.0004160262001499919
-851 3080 -0.008703965704582829
-3080 3080  306.4518132610639
-4025 3080 -1550.501925752894
-29 3081  292.0666383956488
-252 3081  0.0004532491959284241
-452 3081 -0.008440059637270876
-3081 3081 -1762.230597412185
-652 3082  0.0004138991064746598
-852 3082 -0.008659467467314986
-3082 3082  330.915251155541
-4029 3082 -1461.513220882238
-30 3083  291.2499991968344
-253 3083  0.0004521454893938125
-453 3083 -0.008419507258893049
-3083 3083 -1778.766172586407
-653 3084  0.0004130347731292516
-853 3084 -0.008641380035250229
-3084 3084  330.1517854407101
-4033 3084 -1474.31463330485
-254 3085  5.598515203854433e-05
-3085 3085 -0.001041762786317972
-3931 3085 -657.2647720227772
-3963 3085 -337.815695541272
-4044 3085 -198.5952483373434
-654 3086  4.565100404794502e-05
-3086 3086 -0.0009550099367320633
-3933 3086 -467.2993939817052
-3965 3086 -582.1294820043491
-4046 3086 -212.104160974466
-255 3087  0.0002533575753207605
-455 3087 -0.004714351957937914
-3087 3087 -1275.123506533136
-4043 3087 -1214.20785479098
-655 3088  0.0001830830851796531
-855 3088 -0.003830083690845991
-3088 3088 -1875.357477626669
-4045 3088 -1356.93253093293
-256 3089  2.985054127319469e-05
-3089 3089 -0.0005559936336606194
-3267 3089 -100.9659996956966
-3323 3089 -102.1576650288912
-3348 3089 -116.1443434772085
-3616 3089 -157.9270743656402
-656 3090  2.73993020604243e-05
-3090 3090 -0.0005731648617574944
-3269 3090 -115.1682425790467
-3325 3090 -110.9830147806093
-3350 3090 -117.3115075315568
-3618 3090 -131.9375715502208
-257 3091  6.412379987319553e-05
-457 3091 -0.001192950010299268
-3091 3091 -464.5939628653576
-3415 3091 -295.1726851067902
-3536 3091 -241.1777446051989
-657 3092  6.664285539255191e-05
-3092 3092 -0.001393809984053777
-3417 3092 -315.137283850678
-3538 3092 -404.166050448459
-3569 3092 -216.0334906512935
-258 3093  5.25703542360426e-05
-458 3093 -0.0009782249711306297
-3093 3093 -700.4947699391313
-3571 3093 -222.6965047597604
-4051 3093 -281.3424527503379
-658 3094  6.691396143939732e-05
-3094 3094 -0.001399882851998593
-3573 3094 -330.3011979043866
-4050 3094 -160.9760242098117
-4053 3094 -471.7651348192199
-31 3095  3.449093081313446
-259 3095  0.0009048698354838305
-459 3095 -0.01683773791286734
-3095 3095 -35275.65056794864
-659 3096  0.0004190976423508806
-859 3096 -0.008768229606671188
-3096 3096  478.2477416883492
-4049 3096 -922.2752719137213
-260 3097  0.0002540711174143024
-3097 3097 -0.004728474636470728
-3884 3097 -1219.03743806919
-3991 3097 -1453.267111123336
-660 3098  0.0002170672928360428
-860 3098 -0.004538751110163672
-3098 3098 -1629.841174651477
-3886 3098 -1306.498645974566
-261 3099  4.246187106963387e-06
-461 3099 -7.909564314661077e-05
-3099 3099 -16.10476514667766
-3843 3099 -14.58657917675677
-4056 3099 -12.04655336420402
-4060 3099 -12.11364270258297
-4064 3099 -8.22902423866134
-4068 3099 -9.209673704385494
-4072 3099 -7.169396300539337
-4076 3099 -7.905394811720712
-4080 3099 -6.913440311217936
-4083 3099 -10.68753553673826
-661 3100  9.006559059406882e-06
-861 3100 -0.0001883669172606672
-3100 3100 -39.8313288230236
-3426 3100 -33.35710566176917
-3845 3100 -33.99171600412565
-4058 3100 -6.210870714987588
-4062 3100 -6.513390408795219
-4066 3100 -4.640420835507132
-4070 3100 -4.738916599895016
-4074 3100 -4.118661769723557
-4078 3100 -4.457141098502226
-4082 3100 -3.898893283827271
-32 3101  4.629564668789765
-262 3101  0.001114727656952293
-462 3101 -0.02076453503788485
-3101 3101 -15081.1485121053
-75 3102  7.67469753523842
-662 3102  0.001934718172261428
-862 3102 -0.04046801465160255
-3102 3102 -2477.242565758394
-33 3103  4.560258670279392
-263 3103  0.001101692355154358
-463 3103 -0.02052180865954734
-3103 3103 -16322.74803150922
-663 3104  0.0006652306770844735
-863 3104 -0.013917741230942
-3104 3104  227.0111876369949
-4061 3104 -695.4491731006095
-34 3105  5.09241050532172
-264 3105  0.001203469082965028
-464 3105 -0.02241755093136628
-3105 3105 -11661.81866319783
-77 3106  9.553096348683425
-664 3106  0.001967551209388498
-864 3106 -0.04115730664256551
-3106 3106 -1877.352469934645
-35 3107  5.010651825296775
-265 3107  0.001189571893627759
-465 3107 -0.02215869240460923
-3107 3107 -12071.07001909268
-78 3108  8.110850832086975
-665 3108  0.002041158694041396
-865 3108 -0.04269440434322424
-3108 3108 -2058.913294513919
-36 3109  5.242946874727973
-266 3109  0.001235207912293277
-466 3109 -0.0230087751155384
-3109 3109 -10603.54130606578
-79 3110  10.39623748337741
-666 3110  0.00198259809923097
-866 3110 -0.04147286882309242
-3110 3110 -1669.969527065347
-37 3111  5.144271915952205
-267 3111  0.001213907246388981
-467 3111 -0.02261199800075029
-3111 3111 -11318.43109210416
-80 3112  9.643129255484244
-667 3112  0.001984600471653515
-867 3112 -0.04151398331184361
-3112 3112 -1822.079812004944
-38 3113  5.301254453954041
-268 3113  0.001248064342159848
-468 3113 -0.02324824654993116
-3113 3113 -10195.65103652299
-81 3114  9.910899138700378
-668 3114  0.002041914114836727
-868 3114 -0.04271285292823011
-3114 3114 -1642.48231480523
-269 3115  0.0002455808494513976
-3115 3115 -0.004574455857323951
-4087 3115 -1395.812439078742
-4092 3115 -1319.125546000257
-669 3116  0.0002175189436278616
-869 3116 -0.004549463760823265
-3116 3116 -1453.855216625808
-4094 3116 -1422.759825760848
-270 3117  0.0002583346492311023
-470 3117 -0.004812246867113213
-3117 3117 -1426.386550071892
-4100 3117 -1051.60330096319
-670 3118  0.0002195538691403081
-870 3118 -0.004591087669232101
-3118 3118 -1416.901510606822
-4098 3118 -1285.913026873232
-271 3119  0.0002662424078920341
-471 3119 -0.004960285432200295
-3119 3119 -1395.069189494207
-4103 3119 -1116.146860423355
-671 3120  0.0002056555622572706
-871 3120 -0.004300400914613589
-3120 3120 -1704.53913758438
-4109 3120 -1328.68635440853
-272 3121  8.403976013422934e-05
-3121 3121 -0.001566447111419446
-3772 3121 -293.3131463113477
-4104 3121 -273.2060828791963
-4111 3121 -239.1182110177501
-672 3122  5.456576583106384e-05
-3122 3122 -0.001141007317637787
-3774 3122 -313.4339444900776
-4106 3122 -374.8291340849071
-4113 3122 -324.8318137854403
-273 3123  0.0002445586324395708
-473 3123 -0.004555614778039736
-3123 3123 -1425.284446253262
-4108 3123 -1251.592013646793
-673 3124  0.0002265618216088804
-873 3124 -0.004737768664129753
-3124 3124 -1495.038796569758
-3850 3124 -1232.001050341556
-274 3125  0.0002479871234677247
-474 3125 -0.004615638198090777
-3125 3125 -1440.733981857976
-3996 3125 -1288.009326238795
-674 3126  0.0002149306512683066
-874 3126 -0.004495757699438218
-3126 3126 -1567.10699891207
-3998 3126 -1384.784060943572
-275 3127  7.669511520069878e-05
-3127 3127 -0.00142856435158415
-3371 3127 -264.9193537345231
-3543 3127 -338.2005407012112
-4008 3127 -272.0493857089738
-675 3128  6.434492811419656e-05
-3128 3128 -0.00134479006259534
-3373 3128 -307.2139916977106
-3545 3128 -310.2635904187793
-4010 3128 -316.8133947953895
-276 3129  6.873824789383105e-05
-3129 3129 -0.001281196436983179
-3660 3129 -493.1627696346783
-3904 3129 -298.3132764183199
-4116 3129 -221.2635879075791
-676 3130  5.150654637627023e-05
-876 3130 -0.001077116562284023
-3130 3130 -456.729207514713
-3662 3130 -399.3472933098862
-4118 3130 -272.8671228494698
-277 3131  7.621293902825032e-05
-3131 3131 -0.001420343791668607
-3776 3131 -298.238748377585
-4084 3131 -340.5820452017388
-4115 3131 -237.9626783531122
-677 3132  5.617823458389736e-05
-3132 3132 -0.001174808882995873
-3778 3132 -300.1393535459937
-4086 3132 -337.8295892423019
-4117 3132 -374.8764564904459
-278 3133  3.384974683660592e-05
-3133 3133 -0.0006305375141651542
-3428 3133 -101.7537554994466
-4095 3133 -128.0685014752367
-4119 3133 -123.7987667072477
-4123 3133 -78.70049630917389
-678 3134  2.404372801287854e-05
-878 3134 -0.0005027950584220524
-3134 3134 -212.1907537540941
-3430 3134 -149.0690573659064
-4097 3134 -92.26802542553942
-4121 3134 -72.56570109333919
-279 3135  2.096412860650296e-05
-3135 3135 -0.0003901485068014212
-3435 3135 -80.63995640005464
-4052 3135 -95.10938060848929
-4128 3135 -25.80608281993659
-4132 3135 -27.74274388077859
-4135 3135 -77.09008222118388
-679 3136  1.717879208496453e-05
-879 3136 -0.0003593726340029264
-3136 3136 -84.27337155944021
-3437 3136 -80.2193526426998
-4054 3136 -92.01703556881691
-4130 3136 -30.20856579477716
-4134 3136 -28.4454237197186
-39 3137  32.64302008813611
-280 3137  0.001117288914086055
-480 3137 -0.02080039878504665
-3137 3137 -2751.002445075206
-680 3138  0.0006055238470906594
-880 3138 -0.01266857423038254
-3138 3138  154.3592785305395
-4129 3138 -1444.925587559849
-40 3139  36.65251244868855
-281 3139  0.001054854527421347
-481 3139 -0.0196355765396314
-3139 3139 -2761.360034218368
-83 3140  111.6726634543674
-681 3140  0.00066417680046111
-881 3140 -0.0138956857388481
-3140 3140 -1698.599903687457
-282 3141  0.0002196191015981265
-482 3141 -0.004090434994215512
-3141 3141 -2057.918514698858
-3780 3141 -972.8769469454667
-682 3142  0.000254445809205194
-882 3142 -0.005321133682834458
-3142 3142 -1428.191459747777
-3582 3142 -949.4160584467323
-283 3143  0.0002467932626555914
-483 3143 -0.004593389096329082
-3143 3143 -1280.586129651733
-3999 3143 -1443.098497561728
-683 3144  0.0002125568346125282
-883 3144 -0.004446072187239606
-3144 3144 -1565.415007126418
-4037 3144 -1396.852787898307
-284 3145  7.450142895904204e-05
-3145 3145 -0.001386907437178993
-3672 3145 -306.5243546068534
-3835 3145 -253.7591971467399
-3972 3145 -343.6950631335396
-684 3146  6.56850785054022e-05
-3146 3146 -0.001373362866427219
-3674 3146 -289.7082847871828
-3837 3146 -430.325567156862
-3974 3146 -242.1780927811974
-285 3147  2.829020688581102e-05
-3147 3147 -0.0005283436953610114
-3464 3147 -127.9110281261581
-3907 3147 -138.2283842007703
-4140 3147 -106.3422401541953
-4144 3147 -166.413571525405
-685 3148  2.157615448738946e-05
-885 3148 -0.0004513182899802171
-3148 3148 -166.2264057254376
-3466 3148 -127.8803524612996
-4142 3148 -98.27206692230219
-4146 3148 -218.9673369039903
-286 3149  3.417957292668527e-05
-3149 3149 -0.000638010057411905
-3731 3149 -112.4245790619285
-3752 3149 -119.1012159433853
-4112 3149 -82.4528305498375
-4139 3149 -131.7196097356496
-686 3150  2.58604492209297e-05
-3150 3150 -0.0005408106592783898
-3733 3150 -108.0991094828331
-3754 3150 -108.7083889381549
-4114 3150 -125.4140836404946
-4141 3150 -146.8985247574478
-41 3151  0.2816487510175935
-287 3151  0.003283370210303194
-487 3151 -0.06133865450778468
-3151 3151 -45335.1954409973
-84 3152  637.526528274368
-687 3152  0.0002595670747658185
-887 3152 -0.00543058103861438
-3152 3152 -2504.997784199022
-288 3153  0.0002518348855609722
-488 3153 -0.004690277097936834
-3153 3153 -1429.354425178098
-3620 3153 -1149.893986737135
-688 3154  0.0002022003039771082
-888 3154 -0.004228762010147807
-3154 3154 -1668.568919153097
-4090 3154 -1331.391267344377
-289 3155  0.0002411624189112242
-489 3155 -0.004491600758461845
-3155 3155 -1329.895116223397
-4120 3155 -1278.727802451221
-689 3156  0.0002396620483604543
-889 3156 -0.005012410749634397
-3156 3156 -1346.893561357031
-4122 3156 -1141.695109156205
-290 3157  7.539218264852078e-05
-3157 3157 -0.001404078290862924
-3375 3157 -233.9428403939642
-3827 3157 -345.0634879409996
-3868 3157 -323.5366796927644
-690 3158  5.947442365670711e-05
-3158 3158 -0.001243009510545163
-3377 3158 -349.4178227246429
-3829 3158 -341.395838786864
-3870 3158 -303.9354889996754
-291 3159  1.200272275159141e-05
-491 3159 -0.0002232081554610602
-3159 3159 -39.06108881613515
-3187 3159 -36.74905919231615
-3248 3159 -36.18476976809642
-3692 3159 -33.71929484844811
-3840 3159 -40.0865932868005
-3936 3159 -34.20546235418676
-691 3160  1.034576531382441e-05
-891 3160 -0.0002163685141143933
-3160 3160 -45.27355828678601
-3189 3160 -38.42657784269577
-3694 3160 -33.91929258744634
-3842 3160 -35.70217191761186
-3938 3160 -34.58560243700298
-3990 3160 -38.23756381734963
-292 3161  6.96324664263306e-05
-3161 3161 -0.001296142399366137
-3712 3161 -314.9941849685434
-4039 3161 -284.5560271729748
-4136 3161 -312.8351201515704
-692 3162  5.61734866346371e-05
-3162 3162 -0.001175077666104349
-3714 3162 -327.4234482652935
-4041 3162 -339.9363785417463
-4138 3162 -335.1570659698852
-293 3163  1.522298845113386e-05
-3163 3163 -0.0002836445861316432
-3664 3163 -53.90619683861826
-3980 3163 -50.03970959544274
-4091 3163 -48.07226215433916
-4148 3163 -70.19438117818132
-4152 3163 -78.02760680056846
-693 3164  2.097430373839078e-05
-3164 3164 -0.0004387420733795854
-3666 3164 -94.29553748866022
-3982 3164 -102.3315584238236
-4093 3164 -88.07077665502878
-4150 3164 -22.04467690843927
-4154 3164 -19.15705378024538
-42 3165  4.071309785349727
-294 3165  0.001033902935611
-494 3165 -0.01926436789258804
-3165 3165 -21344.46684810789
-694 3166  0.0005656688027113958
-894 3166 -0.01183475115907131
-3166 3166  351.8137876203253
-4149 3166 -647.7168499720574
-43 3167  4.307158971381956
-295 3167  0.001080160651772333
-495 3167 -0.02012629165347246
-3167 3167 -18576.65367398521
-695 3168  0.0005436438887964516
-895 3168 -0.01137394716537204
-3168 3168  375.2293748346238
-4153 3168 -647.2932388607724
-296 3169  0.0002274052897093193
-496 3169 -0.004229346815530425
-3169 3169 -1474.263314614031
-3832 3169 -1331.327148348726
-696 3170  0.0002205299244709283
-896 3170 -0.004612588747673908
-3170 3170 -1622.883696919502
-3834 3170 -1143.60129135068
-297 3171  8.191795508733727e-05
-3171 3171 -0.001526302574451327
-3335 3171 -285.6892959186181
-4099 3171 -271.5408599549471
-4124 3171 -258.8888628284263
-697 3172  5.567172963433427e-05
-897 3172 -0.001164167913108969
-3172 3172 -447.3631435643646
-3337 3172 -303.8666561211577
-4101 3172 -270.4226421310651
-298 3173  0.0002440296582495396
-3173 3173 -0.004539146102095633
-3448 3173 -1280.777882156608
-3756 3173 -1356.531427998502
-698 3174  0.0002104689560761738
-898 3174 -0.004402141629369785
-3174 3174 -1519.469895890335
-3450 3174 -1348.72810908421
-1099 3175  3.90179844422034
-2777 3175  0.3323231709594552
-3175 3175 -0.6557243053380895
-3177 3175 -0.4467658871986893
-3180 3175  1.206910571484867
-1099 3176 -3.117398006170587
-3175 3176  0.5238452566749743
-3176 3176  1.077927120069085
-3177 3176  0.3569501217106127
-3180 3176 -0.964177128949417
-1099 3177  18.11856811951377
-2778 3177  0.4209469588586773
-3175 3177  0.1239167259828344
-3177 3177 -2.078221001294088
-3180 3177 -0.2280781806545443
-1099 3178 -14.49628017496882
-3175 3178 -0.09896104242681487
-3177 3178  1.662740239879939
-3178 3178  1.362279335536674
-3180 3178  0.182145342635277
-1099 3179 -0.6755533116091292
-3179 3179  12.11450772465217
-3180 3179 -0.2666484393421067
-3549 3179  0.4831540182231924
-3550 3179  0.6965288550619865
-1099 3180  0.748415362690662
-3011 3180  6.707319778353869
-3180 3180  0.2976922456375642
-3549 3180 -0.5351332038396536
-3550 3180 -0.7776202985712402
-1099 3181 -4.388112379693994
-3180 3181  0.04150889810448934
-3181 3181  11.80994163900174
-3549 3181  3.222143654814309
-3550 3181 -0.1084279560868184
-1099 3182  4.793052405651258
-3180 3182 -0.04691994301089037
-3182 3182  6.593989719295108
-3549 3182 -3.519407860941207
-3550 3182  0.1225624806414849
-1099 3183  0.7019805848906145
-1222 3183 -1.34894428731261
-3180 3183  0.2992074794146934
-3183 3183  5.029592871399626
-3912 3183 -0.5004641350120078
-1099 3184 -0.7170782280222106
-1222 3184  1.379371250818873
-3180 3184 -0.3082137931705407
-3184 3184  4.398381781048775
-3912 3184  0.5155283875242916
-1099 3185  4.688513878451054
-1222 3185 -8.08332465039811
-3180 3185 -0.04283097769707251
-3185 3185  6.13394880971154
-3912 3185  0.07164048254012781
-1099 3186 -4.577372090822865
-1222 3186  7.890854922569274
-3180 3186  0.0469964530296306
-3186 3186  5.110489561178684
-3912 3186 -0.07860779169062282
-1099 3187  0.9345988945068904
-1291 3187 -0.9510122522007739
-3180 3187  0.3659411458715927
-3187 3187  5.668506884069838
-3250 3187 -0.378532286868665
-1099 3188 -0.9906143812408313
-1291 3188  1.008248356955873
-3180 3188 -0.3866656521166111
-3188 3188  4.228609907569939
-3250 3188  0.3999698727527718
-1099 3189  5.460780976802373
-1291 3189 -5.89270443011472
-3180 3189 -0.05430488575489788
-3189 3189  7.015232649381273
-3250 3189  0.05617338423090624
-1099 3190 -5.952678106564355
-1291 3190  6.423284179440299
-3180 3190  0.05564749987182292
-3190 3190  5.241250854683242
-3250 3190 -0.0575621944201822
-1101 3191  2.396365901362685
-2781 3191  0.3692929538115763
-3191 3191 -0.7228885291144198
-3193 3191 -0.507839750924587
-3201 3191  1.30916366888589
-1101 3192 -2.005604434346106
-3191 3192  0.6049980674938733
-3192 3192  0.9388524004746962
-3193 3192  0.4250292779567368
-3201 3192 -1.095661997402904
-1101 3193  12.9370165976562
-2782 3193  0.4683919690682322
-3191 3193  0.1174975525597555
-3193 3193 -2.74278713799485
-3201 3193 -0.2127901063566257
-1101 3194 -10.78395981212982
-3191 3194 -0.0978991772419131
-3193 3194  2.286315857699142
-3194 3194  1.225211590363166
-3201 3194  0.1772971086094612
-1101 3195  0.8150861897290412
-3195 3195  4.360425760376224
-3201 3195  0.5107916688939119
-3535 3195 -1.125276871701074
-3537 3195 -0.3265396605978775
-1101 3196 -0.7626219247567129
-3196 3196  6.290838699682876
-3201 3196 -0.477229303927426
-3535 3196  1.051338796050469
-3537 3196  0.3055666271798853
-1101 3197  4.805357363027642
-3197 3197  6.264117240664985
-3201 3197 -0.06890385651214917
-3535 3197  0.1517955769952752
-3537 3197 -2.057848389166554
-1101 3198 -4.635964505191407
-3198 3198  9.150447768001063
-3201 3198  0.06209868527935691
-3535 3198 -0.1368037471308435
-3537 3198  1.985217529071911
-1101 3199 -0.6823995581967828
-3199 3199  4.242853200747289
-3201 3199 -0.4283133356578697
-3388 3199  0.7494701150025187
-3390 3199  0.7074154071969436
-1101 3200  0.7310365971011838
-3200 3200  2.95180477919497
-3201 3200  0.4582406516171209
-3388 3200 -0.8018374523380369
-3390 3200 -0.757770830580615
-1101 3201 -4.068874943070814
-2780 3201  5.964960897183323
-3201 3201  0.06118773248632485
-3388 3201 -0.1070673572020186
-3390 3201  4.025719241708225
-1101 3202  4.453586165850771
-3201 3202 -0.06384302758694906
-3202 3202  4.178709433689302
-3388 3202  0.1117136386944541
-3390 3202 -4.406458593664035
-1104 3203 -2.198584419259259
-2785 3203  0.3412920430536406
-3203 3203 -0.6792687245591004
-3205 3203  0.4418790781503563
-3207 3203  1.330583469556757
-1104 3204  1.842120980065284
-3203 3204  0.5690765366396284
-3204 3204  0.8655873783036971
-3205 3204 -0.3702357374009728
-3207 3204 -1.114733838300572
-1104 3205 -9.91214250431854
-2786 3205  0.4321994354326509
-3203 3205  0.1324561697400607
-3205 3205  1.988830230508456
-3207 3205 -0.2594613641476355
-1104 3206  8.287501645404546
-3203 3206 -0.1105483719365449
-3205 3206 -1.662852877071636
-3206 3206  1.118500047554219
-3207 3206  0.2165473412317848
-1146 3207 -2.361641607385089
-2869 3207  8.041398985395846
-3205 3207  0.08312708337804624
-3207 3207  0.3437697039398471
-3493 3207 -1.586867364738098
-1146 3208  2.713143028094987
-3205 3208 -0.09553894960349026
-3207 3208 -0.3969711646061925
-3208 3208  4.357695569147362
-3493 3208  1.832449394568724
-1146 3209 -16.60084760747767
-3205 3209  0.5226757781751344
-3207 3209 -0.04965019816243767
-3209 3209  7.857253594901273
-3493 3209  0.2291891292739927
-1146 3210  19.25433639519875
-3205 3210 -0.6062893774873906
-3207 3210  0.05175900233798363
-3210 3210  5.635075703794728
-3493 3210 -0.2389235313648264
-1162 3211  1.309709728249127
-3205 3211 -0.1224846354050169
-3207 3211 -0.5852368430454002
-3211 3211  6.55614516809574
-3585 3211  1.262649342413203
-1162 3212 -1.31124216056868
-3205 3212  0.1225743034582106
-3207 3212  0.5883155616478667
-3212 3212  6.271467987853487
-3585 3212 -1.269291682288203
-1162 3213  7.353509965567679
-3205 3213 -0.8243769016067968
-3207 3213  0.08053041805249524
-3213 3213  8.881186521865489
-3585 3213 -0.1737444943984078
-1162 3214 -7.879175491863245
-3205 3214  0.8830238932831251
-3207 3214 -0.07227285678601035
-3214 3214  9.472269231422434
-3585 3214  0.1559287939226689
-1105 3215  4.67810007016215
-1106 3215 -1.631607376128249
-2789 3215  0.2531411838153556
-3215 3215  0.6335410282029971
-3217 3215 -0.3767309422255076
-1105 3216 -3.733990098942753
-1106 3216  1.302327940379904
-3215 3216 -0.5055179512196509
-3216 3216  0.8258525634872588
-3217 3216  0.3006028743159249
-1105 3217  21.96944745588132
-1106 3217 -7.704273779899018
-2790 3217  0.3202584181695263
-3215 3217 -0.1182378866958986
-3217 3217  0.07030936984151577
-1105 3218 -17.53117282921072
-1106 3218  6.147850850098586
-3215 3218  0.09380808290631094
-3217 3218 -0.05578235013745535
-3218 3218  1.066054919609428
-1105 3219 -1.783496057488212
-3215 3219 -0.3154842751742941
-3219 3219  4.739072521175232
-3687 3219  0.5301531225102935
-3689 3219  0.536638352929684
-1105 3220  1.769775574269363
-3215 3220  0.3125038943229862
-3220 3220  4.852275384021054
-3687 3220 -0.5251447644432619
-3689 3220 -0.5324820321255284
-1105 3221 -11.04480381088002
-3215 3221  0.04682645461498169
-3221 3221  5.724533092062521
-3687 3221 -0.07868915532003624
-3689 3221  3.241025990367159
-1105 3222  11.09041831683827
-3215 3222 -0.04577764559625989
-3222 3222  5.867844621528932
-3687 3222  0.07692669227529296
-3689 3222 -3.254430202003916
-1105 3223 -1.508511143996298
-1199 3223  1.20832718522049
-3215 3223 -0.2710351994059734
-3223 3223  3.734090074267024
-3789 3223  0.3534179161138158
-1105 3224  1.454256781217253
-1199 3224 -1.164701975884189
-3215 3224  0.2607673136961148
-3224 3224  4.438993814771638
-3789 3224 -0.3400290471461453
-1105 3225 -9.134818941626229
-1199 3225  6.923454910311763
-3215 3225  0.03931244733723651
-3225 3225  4.994979193430934
-3789 3225 -0.05126169311480969
-1105 3226  8.960689765455296
-1199 3226 -6.791708696781908
-3215 3226 -0.03642270094644444
-3226 3226  6.307778305821778
-3789 3226  0.04749359159231554
-1105 3227 -1.075929273318226
-1246 3227  1.1553111591236
-3215 3227 -0.1808814651699105
-3227 3227  5.750116599788575
-3230 3227  0.5273830925356003
-1105 3228  1.197241948885333
-1246 3228 -1.285241695049613
-3215 3228  0.2018352389125933
-3228 3228  3.183226568638103
-3230 3228 -0.5884765052096235
-1105 3229 -6.350942033760408
-1246 3229  7.303265994649156
-3215 3229  0.02556035874667221
-3229 3229  7.594103734758398
-3230 3229 -0.07452450160925486
-1105 3230  6.86425229593742
-1246 3230 -7.893949514842467
-3070 3230  4.089469755570528
-3215 3230 -0.02967706389761363
-3230 3230  0.08652728305245758
-1107 3231  2.142516119134805
-1108 3231 -1.573116856290268
-2793 3231  0.2843149864254252
-3231 3231  0.8685875283864275
-3233 3231 -0.5277615543631613
-1107 3232 -1.645419760215324
-1108 3232  1.208147660144721
-3231 3232 -0.6665887169130639
-3232 3232  1.118284064744516
-3233 3232  0.405025269027891
-1107 3233  13.4599818919095
-1108 3233 -10.04545533443596
-2794 3233  0.3591198466383587
-3231 3233 -0.1206085521049
-3233 3233  0.0732828354634789
-1107 3234 -10.37596843446376
-1108 3234  7.743774096250811
-3231 3234  0.09140412991465983
-3233 3234 -0.05553796721970868
-3234 3234  1.453282910153127
-1107 3235 -0.5705865186070875
-1222 3235  1.364159900297422
-3231 3235 -0.2641123399251132
-3235 3235  7.210736930309491
-3912 3235  0.5174114692252539
-1107 3236  0.5798036087807967
-1222 3236 -1.382288592585748
-3231 3236  0.264840773439715
-3236 3236  6.944610352658391
-3912 3236 -0.51883851294131
-1107 3237 -3.927743397133805
-1222 3237  7.688776076853612
-3231 3237  0.03852583269225138
-3237 3237  8.661644082850422
-3912 3237 -0.07547435194461619
-1107 3238  4.354207390518346
-1222 3238 -8.526509371438566
-3231 3238 -0.03462807079041883
-3238 3238  8.754829522616353
-3912 3238  0.06783840917537932
-1107 3239  0.83228047449342
-1229 3239 -2.191374628381395
-3231 3239  0.3232358427975743
-3239 3239  2.12239641260848
-3241 3239 -0.4783889678510902
-1107 3240 -0.7224565093335877
-1229 3240  1.902211567258754
-3231 3240 -0.2805720782281593
-3240 3240  4.506874015672992
-3241 3240  0.4152466067801181
-1107 3241  4.624502996452096
-1229 3241 -12.23234957660924
-3036 3241  3.613689390608025
-3231 3241 -0.0432552053525782
-3241 3241  0.06401769328460785
-1107 3242 -4.029548223983352
-1229 3242  10.65862220841107
-3231 3242  0.03751584297835035
-3241 3242 -0.05552343838215974
-3242 3242  7.607025929898225
-1107 3243 -1.049370725032446
-3231 3243 -0.4391440793017472
-3243 3243  8.994693294053674
-3984 3243  0.329511624503851
-3988 3243  0.6149993551148858
-1107 3244  1.078904268217582
-3231 3244  0.4521140880195338
-3244 3244  7.772528439443829
-3984 3244 -0.3387833259526468
-3988 3244 -0.6331632046878009
-1107 3245 -7.445261240507539
-3231 3245  0.06421136233811769
-3245 3245  8.659934351403086
-3984 3245  2.346704499747039
-3988 3245 -0.08992480666432051
-1107 3246  7.684265898844557
-3231 3246 -0.06661321196104331
-3246 3246  7.209258592532089
-3984 3246 -2.422035549841484
-3988 3246  0.09328847712876323
-1107 3247 -0.8572328018230927
-1291 3247  1.072482363899002
-3231 3247 -0.3568635586597714
-3247 3247  5.061913797901139
-3250 3247  0.4323552440504631
-1107 3248  0.8384291813617976
-1291 3248 -1.048942876587671
-3231 3248  0.3488833124621628
-3248 3248  5.717200439130998
-3250 3248 -0.4226868393937711
-1107 3249 -5.317465612935973
-1291 3249  6.515090289325624
-3231 3249  0.04742106470294021
-3249 3249  7.356150780840059
-3250 3249 -0.05745261880975507
-1107 3250  5.25881232878616
-1291 3250 -6.443255723032384
-3160 3250  8.260010647845204
-3231 3250 -0.04590890985801144
-3250 3250  0.05562057947383459
-1109 3251  1.939877143225896
-1110 3251 -1.676593698201225
-2797 3251  0.3142463672992255
-3251 3251  0.8308549968215676
-3253 3251 -0.4438807412776823
-1109 3252 -1.510326401442479
-1110 3252  1.305342318696957
-3251 3252 -0.6468743896711402
-3252 3252  1.17048730065587
-3253 3252  0.3455898859598944
-1109 3253  12.05955198419484
-1110 3253 -10.42375234850035
-2798 3253  0.3942430026751412
-3251 3253 -0.1180094345852181
-3253 3253  0.06304603751777908
-1109 3254 -9.357216344456917
-1110 3254  8.087970926761525
-3251 3254  0.09155689421138136
-3253 3254 -0.04891388055328518
-3254 3254  1.503944833656077
-1109 3255  3.125336311573782
-2799 3255  0.3570471483125166
-3251 3255  1.178287817096909
-3255 3255 -0.7560405827089444
-3257 3255 -0.4116790582017967
-1109 3256 -2.417889075233369
-3251 3256 -0.9115398593145428
-3255 3256  0.5848835203070897
-3256 3256  1.356706280601356
-3257 3256  0.318491909188027
-1109 3257  17.05859243350098
-2800 3257  0.4518359008190032
-3251 3257 -0.1896378964682771
-3255 3257  0.1216799016922899
-3257 3257 -2.248550885624917
-1109 3258 -13.15767006303645
-3251 3258  0.1461665781785075
-3255 3258 -0.0937868179023721
-3257 3258  1.734357075949347
-3258 3258  1.756999795333312
-1109 3259 -0.8274104651385586
-1113 3259  0.4961924573567638
-3251 3259 -0.3808984499831934
-3259 3259  9.205270068346719
-3283 3259  0.4341815958622278
-1109 3260  0.9005554601895391
-1113 3260 -0.5405511595669753
-3251 3260  0.4177027925543588
-3260 3260  5.935651367845557
-3283 3260 -0.47613442657843
-1109 3261 -5.271819109375664
-1113 3261  2.593920292764252
-3251 3261  0.05765705741137306
-3261 3261  11.81516875226901
-3283 3261 -0.06572259141693702
-1109 3262  6.213237251680835
-1113 3262 -3.058452409713802
-3251 3262 -0.05614325502160191
-3262 3262  8.333025717575424
-3283 3262  0.06399702614503872
-1109 3263 -0.4348953511661833
-1191 3263  1.175900677297633
-3251 3263 -0.1870913236157566
-3263 3263  9.624813023079948
-3737 3263  0.8837282746556666
-1109 3264  0.4484995047183518
-1191 3264 -1.21299435510975
-3251 3264  0.1922808728830558
-3264 3264  7.866093118131661
-3737 3264 -0.9082411774006912
-1109 3265 -2.730241540721851
-1191 3265  8.191331821483582
-3251 3265  0.02414079055417204
-3265 3265  13.00730635191752
-3737 3265 -0.1140293348347746
-1109 3266  2.709099303685861
-1191 3266 -8.128733597512404
-3251 3266 -0.02648423136389328
-3266 3266  9.954437737461609
-3737 3266  0.1250986076557095
-1109 3267  0.6948742967493817
-1256 3267 -0.832500880855937
-3251 3267  0.2900679375737975
-3267 3267  5.922623483236039
-3615 3267 -0.6676477309983397
-1109 3268 -0.6744651058693761
-1256 3268  0.8076647819842718
-3251 3268 -0.2829982094483208
-3268 3268  6.921218030938292
-3615 3268  0.6513753777654052
-1109 3269  4.127509889624091
-1256 3269 -5.568800014444435
-3251 3269 -0.04144109370586249
-3269 3269  7.134515633503723
-3615 3269  0.0953847309503813
-1109 3270 -4.187820272924031
-1256 3270  5.649414375316669
-3251 3270  0.03752795575477595
-3270 3270  8.985892277429645
-3615 3270 -0.08637788346499958
-1113 3271 -0.07766092191744503
-3271 3271  6.622390702502909
-3280 3271  2.234351633778104
-3282 3271  0.4647968875266144
-3283 3271 -1.277768992783638
-1113 3272  0.07749102351241138
-3272 3272  6.697001214688135
-3280 3272 -2.216197503914771
-3282 3272 -0.461579540307102
-3283 3272  1.267387106656292
-1113 3273 -5.536155174462214
-3273 3273  8.632966165362591
-3280 3273 -0.05370982180576693
-3282 3273  8.078759484940766
-3283 3273  0.03071528396597896
-1113 3274  10.08058940955139
-3274 3274  13.63902685108462
-3280 3274  0.02389497166849069
-3282 3274 -14.71609250303336
-3283 3274 -0.01366492785641506
-1219 3275  1.186931963583773
-3275 3275  5.191487889306021
-3280 3275 -0.7145749449009308
-3282 3275 -0.3518552487923934
-3909 3275  0.4852466322371797
-1219 3276 -1.163100359430245
-3276 3276  5.937171454568121
-3280 3276  0.7042110547013974
-3282 3276  0.3454461800752524
-3909 3276 -0.4782088220647328
-1219 3277  4.538624497527303
-3277 3277  7.480402966450393
-3280 3277  0.1018194078112303
-3282 3277 -2.973502502756232
-3909 3277 -0.06914253723748093
-1219 3278 -5.879825080146367
-3278 3278  13.75844861280676
-3280 3278 -0.06006624651158884
-3282 3278  3.831383234547688
-3909 3278  0.04078920488167646
-1247 3279  1.809931157886173
-3279 3279  4.055752454004177
-3280 3279 -0.3263007639770462
-3282 3279 -0.3153990132372302
-3700 3279  1.094320306063321
-1247 3280 -1.609166188869075
-3071 3280  7.525178403866417
-3280 3280  0.2841185889321567
-3282 3280  0.2786502814775124
-3700 3280 -0.9528532431520396
-1247 3281  17.72193962278147
-3280 3281  0.03520668996055829
-3281 3281  6.053226700604847
-3282 3281 -1.846682900076352
-3700 3281 -0.1180732624206324
-1247 3282 -12.79869678055506
-3072 3282  9.011062142512486
-3280 3282 -0.04170737899807132
-3282 3282  1.330625120766269
-3700 3282  0.1398747315022498
-1113 3283  1.411694129026913
-1114 3283 -1.953828666208752
-2805 3283  0.2442413628704792
-3283 3283  1.209832173694697
-3285 3283 -0.4729524263951293
-1113 3284 -1.181461144083559
-1114 3284  1.635191861604235
-3283 3284 -1.011959526480044
-3284 3284  0.6191405042986501
-3285 3284  0.3955992606815731
-1113 3285  7.598376411100657
-1114 3285 -10.63017727165662
-2806 3285  0.3086859033408764
-3283 3285 -0.1965692531354175
-3285 3285  0.07684363769328444
-1113 3286 -6.356847459918975
-1114 3286  8.893256781914742
-3283 3286  0.16260052281627
-3285 3286 -0.0635644459381669
-3286 3286  0.8138879164921292
-1115 3287  0.8291504643489213
-2809 3287  0.5026016922761118
-3287 3287 -0.8406516622461359
-3289 3287 -1.764966808023946
-3295 3287  0.7040247465778748
-1115 3288 -0.6273802630392492
-3287 3288  0.6360757700302417
-3288 3288  2.179761213796664
-3289 3288  1.335469727403376
-3295 3288 -0.5326975522814722
-1115 3289  6.060870599331241
-2810 3289  0.6377435166524491
-3287 3289  0.1008972634833432
-3289 3289 -12.90519241544406
-3295 3289 -0.08449893522421127
-1115 3290 -4.565490514889956
-3287 3290 -0.07598026692793559
-3289 3290  9.721133715560624
-3290 3290  2.854904550605542
-3295 3290  0.0636315736602887
-1115 3291  1.087917687407729
-2811 3291  0.421945735946015
-3291 3291 -0.6811758959436226
-3293 3291 -1.92561216315536
-3295 3291  0.7089846504761526
-1115 3292 -0.8229776380829138
-3291 3292  0.5152845777429752
-3292 3292  1.823278915801148
-3293 3292  1.456668801268975
-3295 3292 -0.5363208804398027
-1115 3293  6.108025865977322
-2812 3293  0.5340480266846045
-3291 3293  0.106599044606048
-3293 3293 -10.81324405966066
-3295 3293 -0.110950911256798
-1115 3294 -4.599499980584284
-3291 3294 -0.08025582932800379
-3293 3294  8.142649836749401
-3294 3294  2.382559920737088
-3295 3294  0.08353224393820573
-1115 3295  1.03698725022598
-1118 3295 -3.079285624558723
-2813 3295  0.3915301941686539
-3295 3295  0.6046143966065896
-3297 3295 -0.5574485806251291
-1115 3296 -0.7835532341187602
-1118 3296  2.326725060942811
-3295 3296 -0.4568430531993541
-3296 3296  1.698731768620023
-3297 3296  0.4212048422990773
-1115 3297  5.206454265535194
-1118 3297 -15.46450544545504
-2814 3297  0.496336357506843
-3295 3297 -0.105707984352315
-3297 3297  0.09746173125990555
-1115 3298 -3.916354844917197
-1118 3298  11.63257903923041
-3295 3298  0.07949206863020782
-3297 3298 -0.07329091248500011
-3298 3298  2.222986339792984
-1115 3299  1.203896198092496
-2815 3299  0.5049950972208146
-3295 3299  0.8317530171779519
-3299 3299 -1.050055987727851
-3301 3299 -0.8504213801855677
-1115 3300 -0.9058789578950869
-3295 3300 -0.62581245429748
-3299 3300  0.7900639988771302
-3300 3300  2.226759967985491
-3301 3300  0.6399047897114686
-1115 3301  7.161691500726718
-2816 3301  0.6396465178955674
-3295 3301 -0.1227103143041787
-3299 3301  0.1549170217960287
-3301 3301 -5.066758450128532
-1115 3302 -5.367583074737629
-3295 3302  0.09182130440361659
-3299 3302 -0.1159208424841469
-3301 3302  3.797461282443998
-3302 3302  2.912810030122295
-1115 3303  0.3657690249134106
-3295 3303  0.2917249086810749
-3303 3303  5.283005872263223
-3304 3303 -1.272612899536508
-3306 3303 -0.9824708043480003
-1115 3304 -0.3073032073380307
-2807 3304  13.4005488845067
-3295 3304 -0.2448716520171612
-3304 3304  1.068221512166234
-3306 3304  0.8254367475625018
-1115 3305  2.364021463492015
-3295 3305 -0.03508775876381117
-3304 3305  0.1530658956087559
-3305 3305  9.289251639392099
-3306 3305 -5.749376920340797
-1115 3306 -1.924123739016748
-2808 3306  21.65848410135114
-3295 3306  0.0315126719245443
-3304 3306 -0.1374700328859463
-3306 3306  4.679067459779453
-1115 3307 -0.3247977981682151
-3295 3307 -0.2357224145100838
-3307 3307  15.68617080585754
-3680 3307  1.009668228367086
-3681 3307  0.7382929550608727
-1115 3308  0.3847781687309847
-3295 3308  0.2784275257111113
-3308 3308  6.452286730756979
-3680 3308 -1.192586743172628
-3681 3308 -0.8747461134231366
-1115 3309 -2.071401609337344
-3295 3309  0.03219168634554583
-3309 3309  19.59521280325052
-3680 3309 -0.1378864330242373
-3681 3309  4.971628818787869
-1115 3310  2.413229394356441
-3295 3310 -0.03947913814761612
-3310 3310  7.602708839683275
-3680 3310  0.1691007261817176
-3681 3310 -5.79221202090099
-1120 3311  2.135100351227087
-2819 3311  0.3539340117715401
-3311 3311 -0.8339300311071588
-3313 3311 -0.5068532103322505
-3321 3311  1.177129754067613
-1120 3312 -1.698663037724611
-3311 3312  0.663454125795566
-3312 3312  1.158013025593776
-3313 3312  0.4032470041467199
-3321 3312 -0.9364953446946004
-1120 3313  10.46247277613657
-2820 3313  0.448509772635577
-3311 3313  0.1494134127611411
-3313 3313 -2.484458992001519
-3321 3313 -0.210903753621177
-1120 3314 -8.287409620934962
-3311 3314 -0.1183130930611414
-3313 3314  1.967960133569293
-3314 3314  1.513344914714974
-3321 3314  0.1670042532862615
-1120 3315  2.343665408910121
-2821 3315  0.3171963274323468
-3315 3315 -0.7103363652127415
-3317 3315 -0.6385059697814306
-3321 3315  1.102777759352953
-1120 3316 -1.861641814867798
-3315 3316  0.5642245864034647
-3316 3316  1.041618026710652
-3317 3316  0.5071839300131559
-3321 3316 -0.8759432230102847
-1120 3317  9.800756234215104
-2822 3317  0.4020378235142212
-3315 3317  0.1491074223715117
-3317 3317 -2.671248407940532
-3321 3317 -0.2314851909580943
-1120 3318 -7.751411372933291
-3315 3318 -0.1178752338080321
-3317 3318  2.112688522497416
-3318 3318  1.361520831023814
-3321 3318  0.1829980732903851
-1120 3319 -0.3615308982581444
-1125 3319  2.348697674884534
-3319 3319  6.886702449628651
-3321 3319 -0.2469155357709296
-3339 3319  0.417439106543153
-1120 3320  0.3604726785054282
-1125 3320 -2.341822696956453
-3320 3320  6.785053675814972
-3321 3320  0.2462070999198854
-3339 3320 -0.4162414142724755
-1120 3321 -2.03036538771662
-1125 3321  13.1085584275136
-2818 3321  11.49160779515473
-3321 3321  0.0332412618584253
-3339 3321 -0.0561981756523469
-1120 3322  2.026270799843273
-1125 3322 -13.08212862081111
-3321 3322 -0.03297048082370924
-3322 3322  11.46100381309955
-3339 3322  0.05574038917549447
-1120 3323  0.3927812825996569
-1256 3323 -0.9162532415237825
-3321 3323  0.2859457102140647
-3323 3323  9.171935901419351
-3615 3323 -0.6868141551464079
-1120 3324 -0.3918426939236055
-1256 3324  0.9140951370430707
-3321 3324 -0.2856483693432067
-3324 3324  9.324470431849685
-3615 3324  0.6860999709089316
-1120 3325  2.59903491355898
-1256 3325 -5.934397339882138
-3321 3325 -0.03967668007685861
-3325 3325  10.94559077853698
-3615 3325  0.09529957797094311
-1120 3326 -2.553508280303047
-1256 3326  5.830423114127847
-3321 3326  0.03987931942944642
-3326 3326  11.49437533928763
-3615 3326 -0.09578629824956811
-1123 3327  2.943851431944534
-2825 3327  0.36929603544498
-3327 3327 -0.6263787815684199
-3329 3327 -0.4953533182334247
-3333 3327  1.180123281848412
-1123 3328 -2.470752260998025
-3327 3328  0.5257061452404348
-3328 3328  0.9237967396282526
-3329 3328  0.4157462993400855
-3333 3328 -0.9904519113108774
-1123 3329  13.89790436354166
-2826 3329  0.4664105082637809
-3327 3329  0.1167052455721185
-3329 3329 -2.339205561498814
-3333 3329 -0.2198774630721576
-1123 3330 -11.59313057596244
-3327 3330 -0.09732280277379812
-3329 3330  1.951280910683752
-3330 3330  1.216417974845579
-3333 3330  0.1833601469074586
-1123 3331 -0.570678276525063
-1162 3331  0.8523919363373601
-3331 3331  4.703906103995145
-3333 3331 -0.3406461438400099
-3585 3331  0.8386535165213083
-1123 3332  0.5736363845700033
-1162 3332 -0.856404463517094
-3332 3332  4.522013621012862
-3333 3332  0.3416270693871428
-3585 3332 -0.8410685054311342
-1123 3333 -3.671971040472746
-1162 3333  4.779511398382989
-2824 3333  7.204897543934027
-3333 3333  0.044945398609309
-3585 3333 -0.1106532901568742
-1123 3334  3.920428096681185
-1162 3334 -5.103865209233088
-3333 3334 -0.04175041492380615
-3334 3334  7.245181781754158
-3585 3334  0.1027874024856677
-1123 3335  0.6120165062645778
-1297 3335 -1.067537541383138
-3333 3335  0.2932964264633682
-3335 3335  4.656667378127493
-4126 3335 -0.6882189757870322
-1123 3336 -0.6120093889121652
-1297 3336  1.068092189082941
-3333 3336 -0.2928143935251802
-3336 3336  4.819142815554258
-4126 3336  0.6870878872871973
-1123 3337  3.103677962152558
-1297 3337 -6.271879177065279
-3333 3337 -0.04107484230154301
-3337 3337  7.745248535399567
-4126 3337  0.09638196496373758
-1123 3338 -3.348978833033903
-1297 3338  6.765893252663498
-3333 3338  0.03819605890891807
-3338 3338  7.871753304018089
-4126 3338 -0.08962691042088074
-1125 3339  4.915187326484495
-1126 3339 -1.601102874759421
-2829 3339  0.2262443261916516
-3339 3339  0.9158563211506956
-3341 3339 -0.2927325904424944
-1125 3340 -3.892710312405375
-1126 3340  1.268035068691731
-3339 3340 -0.7253102290040822
-3340 3340  0.7359627483813387
-3341 3340  0.2318288767653417
-1125 3341  31.10618111938879
-1126 3341 -10.14088311419212
-2830 3341  0.2859810800059127
-3339 3341 -0.1272016430323864
-3341 3341  0.04065710484656371
-1125 3342 -24.49836116910814
-1126 3342  7.98667681451962
-3339 3342  0.1000961984697644
-3341 3342 -0.03199346752849337
-3342 3342  0.9683564394347679
-1125 3343 -1.038181523195801
-1191 3343  0.8434979015723048
-3339 3343 -0.2028592580467722
-3343 3343  7.677703978905812
-3737 3343  0.5914570612494344
-1125 3344  1.045469772476363
-1191 3344 -0.8494202444993569
-3339 3344  0.2043567543949261
-3344 3344  7.302330267651148
-3737 3344 -0.5958231660939404
-1125 3345 -6.405420033831049
-1191 3345  5.038247575538431
-3339 3345  0.02578232528300831
-3345 3345  12.35150271313844
-3737 3345 -0.07517102493073879
-1125 3346  6.57456188741837
-1191 3346 -5.17134230247954
-3339 3346 -0.02562846525512418
-3346 3346  11.42641600066333
-3737 3346  0.07472243017192762
-1125 3347 -1.475513352129115
-1256 3347  0.5292255309373513
-3339 3347 -0.265292539862559
-3347 3347  8.778768198726569
-3615 3347  0.3769085362086894
-1125 3348  1.483623786920446
-1256 3348 -0.5321225312697891
-3339 3348  0.2663833810837147
-3348 3348  8.759577361623631
-3615 3348 -0.3784583248613025
-1125 3349 -9.211238697688215
-1256 3349  3.257536454796313
-3339 3349  0.03969926645685068
-3349 3349  10.32704712479219
-3615 3349 -0.05640185893113407
-1125 3350  9.387198805594265
-1256 3350 -3.319769019166089
-3339 3350 -0.03983694023450458
-3350 3350  9.866368768324078
-3615 3350  0.05659745592016639
-1127 3351  3.674495501266763
-1128 3351 -1.79658977637768
-2833 3351  0.2087528761419341
-3351 3351  0.8024124670559588
-3353 3351 -0.3890405552638334
-1127 3352 -3.071721810536903
-1128 3352  1.501892209348857
-3351 3352 -0.6702504154277392
-3352 3352  0.5348328095576971
-3353 3352  0.3249632881958178
-1127 3353  15.40703792650803
-1128 3353 -7.62281482571927
-2834 3353  0.2638545151755176
-3351 3353 -0.1672629016238211
-3353 3353  0.08109551483106915
-1127 3354 -12.91543154405716
-1128 3354  6.390044080050587
-3351 3354  0.1384521427653982
-3353 3354 -0.06712694619083193
-3354 3354  0.6902095968345779
-1127 3355  0.7456668279970676
-1222 3355 -1.317805230846083
-3351 3355  0.2713456332689875
-3355 3355  5.321843079599814
-3912 3355 -0.5210644672440067
-1127 3356 -0.8026522732803606
-1222 3356  1.420202184002578
-3351 3356 -0.2943648723795997
-3356 3356  3.588470025723119
-3912 3356  0.5652682652525904
-1127 3357  5.42541555256465
-1222 3357 -8.373828345005908
-3351 3357 -0.03534560477449201
-3357 3357  6.747469381550355
-3912 3357  0.06787409290268773
-1127 3358 -5.499147013641394
-1222 3358  8.486264343097229
-3351 3358  0.04133866598755555
-3358 3358  4.390097869341906
-3912 3358 -0.07938255615129304
-1127 3359 -0.7996212717110843
-3351 3359 -0.2905604220304306
-3359 3359  4.558938647755557
-3360 3359  0.8486339951838073
-3362 3359  0.7729194107635493
-1127 3360  0.8518603849189976
-3053 3360  3.212877484813512
-3351 3360  0.3082806228234461
-3360 3360 -0.9003890301240479
-3362 3360 -0.8228886130706252
-1127 3361 -5.319291135044536
-3351 3361  0.03965502824588246
-3360 3361 -0.1158196454089177
-3361 3361  6.103161381610501
-3362 3361  4.532886615050296
-1127 3362  5.987378019002024
-3054 3362  4.451290718579245
-3351 3362 -0.03922286341907703
-3360 3362  0.114557430269681
-3362 3362 -5.102978103686564
-1129 3363  1.299439778554684
-1130 3363 -1.66026707237404
-2837 3363  0.4651682718810348
-3363 3363  0.7226460726801913
-3365 3363 -0.550866066704825
-1129 3364 -1.042770960437875
-1130 3364  1.332326685534424
-3363 3364 -0.5799017536618772
-3364 3364  1.479621537099887
-3365 3364  0.4420534618421999
-1129 3365  7.102528425181987
-1130 3365 -9.076322024257928
-2838 3365  0.5856260944274582
-3363 3365 -0.1165066754950316
-3365 3365  0.08881190461157644
-1129 3366 -5.662448967730832
-1130 3366  7.236044290225851
-3363 3366  0.0928673422230218
-3365 3366 -0.07079187097217753
-3366 3366  1.94736180188326
-1129 3367  1.541080820735221
-2839 3367  0.4764456266019807
-3363 3367  1.013213317405777
-3367 3367 -0.8713042567736969
-3369 3367 -0.6495975312525708
-1129 3368 -1.227391380982957
-3363 3368 -0.8068876077794779
-3367 3368  0.6938762009132229
-3368 3368  1.552787641592646
-3369 3368  0.517371091673868
-1129 3369  9.939184679247292
-2840 3369  0.602732034666853
-3363 3369 -0.1379058232777914
-3367 3369  0.1185909509790812
-3369 3369 -4.19974615976149
-1129 3370 -7.872866521897349
-3363 3370  0.108959701522947
-3367 3370 -0.09369897742442948
-3369 3370  3.32663494902817
-3370 3370  2.053730505034701
-1129 3371  0.3879503154264504
-1275 3371 -1.448830170961889
-3363 3371  0.2698019009628373
-3371 3371  6.478855428097081
-3374 3371 -0.7320220831026034
-1129 3372 -0.3704848889591102
-1275 3372  1.383965071862861
-3363 3372 -0.2582339488597439
-3372 3372  8.326168299313496
-3374 3372  0.7006361055927417
-1129 3373  2.623574290306916
-1275 3373 -8.986270417976883
-3363 3373 -0.03441373334906914
-3373 3373  9.231892983713553
-3374 3373  0.09337077568253691
-1129 3374 -2.402169826049755
-1275 3374  8.227311149593612
-2836 3374  11.87929266291417
-3363 3374  0.0344323188052018
-3374 3374 -0.09342120143663683
-1129 3375  0.4769040583265569
-1290 3375 -1.478717303034038
-3363 3375  0.3049120194105165
-3375 3375  6.316750882719969
-3869 3375 -0.5115751971165835
-1129 3376 -0.4546264934490209
-1290 3376  1.409679754203769
-3363 3376 -0.290608095678543
-3376 3376  8.096580967059195
-3869 3376  0.4875763642175987
-1129 3377  2.673553644466764
-1290 3377 -8.636336925830612
-3363 3377 -0.0381463241923688
-3377 3377  11.53750339202983
-3869 3377  0.06400112844259723
-1129 3378 -2.601348954389372
-1290 3378  8.40294784283585
-3363 3378  0.03561931390732355
-3378 3378  14.8685191357142
-3869 3378 -0.05976136187915728
-1132 3379  5.296182902592481
-2843 3379  0.3022564890631526
-3379 3379 -0.6655581949704933
-3381 3379 -0.4107380900624172
-3385 3379  0.9950752022868599
-1132 3380 -4.429008880591571
-3379 3380  0.556417909169059
-3380 3380  0.773323986147318
-3381 3380  0.3434864457301403
-3385 3380 -0.8318997011928906
-1132 3381  31.57511354793287
-2844 3381  0.3823095244806657
-3379 3381  0.09780942999031755
-3381 3381 -2.46437663093789
-3385 3381 -0.1462347531270238
-1132 3382 -26.39354744654073
-3379 3382 -0.081216619435513
-3381 3382  2.059964500670208
-3382 3382  1.001843241790515
-3385 3382  0.1214268633825941
-1132 3383 -1.001383316318819
-1152 3383  2.237203602157426
-3383 3383  2.896696576781846
-3385 3383 -0.2179951469697534
-3510 3383  0.3736626954944669
-1132 3384  1.001723462128635
-1152 3384 -2.235862100853107
-3384 3384  2.894015425891116
-3385 3384  0.2168261119599326
-3510 3384 -0.371658867524124
-1132 3385 -6.619137619636859
-1152 3385  12.86842858727532
-2842 3385  3.759419384143553
-3385 3385  0.03071718917257135
-3510 3385 -0.05265194140229647
-1132 3386  7.035896378794858
-1152 3386 -13.68121522319965
-3385 3386 -0.02828487750009587
-3386 3386  3.893502727153444
-3510 3386  0.04848274704887379
-1132 3387 -1.624449345440718
-3385 3387 -0.3001546575377701
-3387 3387  3.97695531013616
-3388 3387  0.6028365532304726
-3390 3387  0.5396384156967375
-1132 3388  1.726617122079207
-3047 3388  2.822747401879994
-3385 3388  0.319559566955514
-3388 3388 -0.6418097572616985
-3390 3388 -0.5734959694043438
-1132 3389 -9.45015732593915
-3385 3389  0.04181506051016628
-3388 3389 -0.08398219490530567
-3389 3389  5.568673568414256
-3390 3389  3.329588287063118
-1132 3390  9.780829511014637
-3048 3390  3.896863634483198
-3385 3390 -0.04598524577927123
-3388 3390  0.09235767751344538
-3390 3390 -3.446229419844843
-2847 3391  0.5157826632315755
-3391 3391 -1.022533948777946
-3393 3391 -0.6272414912186035
-3397 3391  0.9789296151027944
-3402 3391  1.295303116128931
-3391 3392  0.8214096444475624
-3392 3392  1.638248669248236
-3393 3392  0.5038949949918273
-3397 3392 -0.7864238954796325
-3402 3392 -1.040527283561455
-2848 3393  0.652651776418919
-3391 3393  0.1590351780865364
-3393 3393 -3.546001048357712
-3397 3393  5.52827321690606
-3402 3393 -0.2014590928700254
-3391 3394 -0.1271985280646792
-3393 3394  2.839337127425348
-3394 3394  2.128348439022877
-3397 3394 -4.426572750783254
-3402 3394  0.1611297600105126
-1136 3395 -2.504819998846709
-2849 3395  0.3627133183774772
-3395 3395 -0.5966459314393776
-3397 3395  0.8060051230938539
-3402 3395  0.9236744260690454
-1136 3396  1.999577413517036
-3395 3396  0.4760705063655813
-3396 3396  1.176739949016042
-3397 3396 -0.6434239789652723
-3402 3396 -0.7370102242628088
-1136 3397 -12.33236152279313
-2850 3397  0.4585964515198062
-3395 3397  0.1069710232300935
-3397 3397  3.935792415055033
-3402 3397 -0.1656030709028884
-1136 3398  9.843630982732011
-3395 3398 -0.0846438706881978
-3397 3398 -3.141533746260626
-3398 3398  1.530457443318091
-3402 3398  0.1310381493586493
-1156 3399 -2.83581958505017
-3397 3399  0.1787660967132971
-3399 3399  7.085454722674737
-3402 3399  0.2884753453660877
-3539 3399 -0.6725972680232485
-1156 3400  2.730179802550922
-3397 3400 -0.1720286735101973
-3400 3400  8.942328533639676
-3402 3400 -0.2781466555799013
-3539 3400  0.6485153191009497
-1156 3401 -16.46922548582782
-3397 3401  1.195222696531314
-3401 3401  11.40560520752418
-3402 3401 -0.03571433811432929
-3539 3401  0.08327008401522945
-1156 3402  14.8446566737469
-2846 3402  13.99685203813836
-3397 3402 -1.077545589280008
-3402 3402  0.03713931211835794
-3539 3402 -0.08659249488156374
-1181 3403 -1.245236453990957
-3397 3403  0.227902895301987
-3402 3403  0.3675964442608564
-3403 3403  8.850553534693345
-3404 3403 -0.7881853230597796
-1181 3404  1.186629605393843
-2845 3404  11.26655140607853
-3397 3404 -0.2169526291382712
-3402 3404 -0.3531403690853147
-3404 3404  0.7571891954848207
-1181 3405 -8.143818476815005
-3397 3405  1.651751732869493
-3402 3405 -0.04937881125291942
-3404 3405  0.1058760358195188
-3405 3405  10.32999844507644
-1181 3406  7.392567965078647
-3397 3406 -1.499487001255235
-3402 3406  0.05012946220403072
-3404 3406 -0.1074855510138141
-3406 3406  13.03167033082069
-1137 3407  3.764148086597181
-1138 3407 -1.150203221975869
-2853 3407  0.2986025758467496
-3407 3407  0.8149415052122866
-3409 3407 -0.423007195636838
-1137 3408 -3.132558998302706
-1138 3408  0.9572098200987793
-3407 3408 -0.6781849347733043
-3408 3408  0.7825661744590023
-3409 3408  0.3520217163400922
-1137 3409  21.66729182498108
-1138 3409 -6.624244633334891
-2854 3409  0.3782636212847815
-3407 3409 -0.1243103797855209
-3409 3409  0.06452510371026647
-1137 3410 -18.12681824855953
-1138 3410  5.541831397222044
-3407 3410  0.1039416562056201
-3409 3410 -0.05395242262195776
-3410 3410  0.9606419660085704
-1137 3411 -0.9698287367725562
-3407 3411 -0.2383349916533942
-3411 3411  7.20471030819053
-3412 3411  0.508985204526428
-3682 3411  0.8647226323217699
-1137 3412  1.032890475327049
-2943 3412  5.057824214743202
-3407 3412  0.254906136411951
-3412 3412 -0.5443743324327373
-3682 3412 -0.920980246976824
-1137 3413 -7.297572934511336
-3407 3413  0.03694910669518697
-3412 3413 -0.07890804660218982
-3413 3413  5.593819250509369
-3682 3413  6.441034596124477
-1137 3414  7.818158304523587
-3407 3414 -0.0393496559091599
-3412 3414  0.08403462925030836
-3414 3414  3.896453929673696
-3682 3414 -6.900463469725019
-1137 3415  0.9047099666389939
-1257 3415 -1.25500744830349
-3407 3415  0.2261950636286986
-3415 3415  4.811679126595192
-3567 3415 -0.4783279591317424
-1137 3416 -0.9050341677071546
-1257 3416  1.255737278168746
-3407 3416 -0.2275356005844802
-3416 3416  4.79846785897801
-3567 3416  0.4811627526763632
-1137 3417  6.45187659773848
-1257 3417 -8.635276008329065
-3407 3417 -0.03205345150204408
-3417 3417  4.846962465184034
-3567 3417  0.06778247851274402
-1137 3418 -6.340117021540678
-1257 3418  8.485710540012704
-3407 3418  0.03282777232623683
-3418 3418  4.857233958126034
-3567 3418 -0.06941991168041609
-1139 3419 -1.335544408289586
-1198 3419  0.8076695160259592
-3419 3419  3.187196022756801
-3424 3419 -0.3462185500007934
-3782 3419  0.5428701937830359
-1139 3420  1.240138484038131
-1198 3420 -0.7500806792626978
-3420 3420  4.580602587418016
-3424 3420  0.3224747859897226
-3782 3420 -0.5056400055975701
-1139 3421 -8.252392785519895
-1198 3421  4.667678323593734
-3421 3421  4.014843773669729
-3424 3421  0.05123776962395828
-3782 3421 -0.08034074986653671
-1139 3422  7.855616270644008
-1198 3422 -4.443468566171155
-3422 3422  6.030018439318698
-3424 3422 -0.04575369741188583
-3782 3422  0.0717417324410374
-1139 3423 -0.8646099843727195
-1261 3423  2.069258919988845
-3423 3423  2.361516391014415
-3424 3423 -0.2320144445773898
-4065 3423  0.7029356993730441
-1139 3424  0.7236931569492924
-1261 3424 -1.73190689006448
-3099 3424  5.52794280827098
-3424 3424  0.1931415244916508
-4065 3424 -0.5851621559330497
-1139 3425 -6.168162236656806
-1261 3425  14.65077728050171
-3424 3425  0.03546099465982042
-3425 3425  2.100726724748749
-4065 3425 -0.1074364103798295
-1139 3426  5.42738844602399
-1261 3426 -12.89108953321492
-3424 3426 -0.03079932722685834
-3426 3426  3.64070794816464
-4065 3426  0.09331292568385514
-1139 3427 -1.017234363558991
-1278 3427  1.112187399499515
-3424 3427 -0.2607314758298161
-3427 3427  3.949015286514313
-4125 3427  0.553519904683873
-1139 3428  0.9696341561279327
-1278 3428 -1.060130544389497
-3424 3428  0.2485615317211312
-3428 3428  4.837100171165655
-4125 3428 -0.5276837209948572
-1139 3429 -5.52169483921999
-1278 3429  5.806648304333962
-3424 3429  0.03391211355038503
-3429 3429  7.554236674680917
-4125 3429 -0.0719937238121951
-1139 3430  5.270632236356677
-1278 3430 -5.542727459691219
-3424 3430 -0.03113925486204759
-3430 3430  10.1404814049545
-4125 3430  0.06610708326760587
-1140 3431  2.259667890544613
-3431 3431  3.718703165886996
-3438 3431  0.5513261219039329
-3447 3431 -0.6945907046787592
-3449 3431 -0.1988363546578532
-1140 3432 -2.270634095598604
-3432 3432  3.694102619211036
-3438 3432 -0.5523151632588987
-3447 3432  0.6958367528966991
-3449 3432  0.1998547861173985
-1140 3433  14.10655573046575
-3433 3433  4.625084390822101
-3438 3433 -0.07581389194665446
-3447 3433  0.09551447417331302
-3449 3433 -1.328711904185037
-1140 3434 -14.60105738105317
-3434 3434  4.692341990336677
-3438 3434  0.07308587747533948
-3447 3434 -0.09207757282087774
-3449 3434  1.375232541596462
-1140 3435  1.274050971060097
-1279 3435 -0.9008717883866754
-3435 3435  3.873556911241002
-3436 3435 -0.477692739132276
-3438 3435  0.3379217843189056
-1140 3436 -1.450543140636408
-1279 3436  1.026223447218297
-2857 3436  1.970970231250026
-3436 3436  0.5464386534076658
-3438 3436 -0.3865529233619078
-1140 3437  8.932423077614509
-1279 3437 -5.728897213689582
-3436 3437  0.06242579865530338
-3437 3437  5.058522467030047
-3438 3437 -0.04416026357748661
-1140 3438 -9.766438076143141
-1279 3438  6.263257813414933
-2858 3438  2.455950934716126
-3436 3438 -0.07560356709805922
-3438 3438  0.05348227050940345
-1141 3439 -2.430429941692298
-3439 3439  3.057040031444601
-3440 3439  0.4919493884250233
-3441 3439  0.3890546571390462
-3446 3439 -0.5049927482856362
-1141 3440  2.430336628269451
-2941 3440  3.040085219142655
-3440 3440 -0.4938069079659762
-3441 3440 -0.3891594472236065
-3446 3440  0.5068995173965466
-1141 3441 -15.87534314164886
-2860 3441  4.189017705158125
-3440 3441 -0.06846566225879751
-3441 3441  2.174900388529984
-3446 3441  0.07028093491071742
-1141 3442  16.87912030479092
-3440 3442  0.06253236404377792
-3441 3442 -2.312987185873944
-3442 3442  4.482752371805137
-3446 3442 -0.06419032347283443
-1141 3443 -1.646756874307934
-1230 3443  1.503112217723057
-2859 3443  2.768986183801089
-3443 3443  0.3346190606095454
-3446 3443 -0.2880222797088077
-1141 3444  1.633130299287086
-1230 3444 -1.491993548294496
-3443 3444 -0.3298503080348287
-3444 3444  2.918144951839834
-3446 3444  0.2839175912746367
-1141 3445 -9.784170471621888
-1230 3445  11.06068010791183
-3443 3445 -0.04214323686986587
-3445 3445  3.912969993275015
-3446 3445  0.03627465553054866
-1141 3446  9.000659918292111
-1230 3446 -10.17957716286965
-3038 3446  3.513507003847669
-3443 3446  0.04770925997819617
-3446 3446 -0.04106559201113318
-1298 3447  2.244902451590447
-2861 3447  4.104329447502788
-3447 3447 -0.6262760807964288
-3449 3447 -0.1942141594406955
-3755 3447  0.5580597443798457
-1298 3448 -2.239071302275198
-3447 3448  0.6259992432861905
-3448 3448  4.123483094099871
-3449 3448  0.1937305440085906
-3755 3448 -0.5578130610481082
-1298 3449  14.90052319731221
-2862 3449  5.140739895609729
-3447 3449  0.08541380566675676
-3449 3449 -1.236746782381144
-3755 3449 -0.07611021403896462
-1298 3450 -14.58766608851529
-3447 3450 -0.08734801081128452
-3449 3450  1.210760367602043
-3450 3450  5.121634542677966
-3755 3450  0.07783373831464936
-1143 3451  0.4635977544092252
-1144 3451 -0.2370748743210307
-3451 3451  6.140079525850513
-3459 3451  0.7904980737004598
-3467 3451 -1.331243999373356
-1143 3452 -0.4445531462354627
-1144 3452  0.2279035771985117
-3452 3452  7.866104370676174
-3459 3452 -0.7569472851527189
-3467 3452  1.274742551217584
-1143 3453  12.40932627072254
-1144 3453 -16.36104222451913
-3453 3453  7.82585977033871
-3459 3453 -0.01589242009037819
-3467 3453  0.02676374501685801
-1143 3454 -17.80142727045091
-1144 3454  23.46700243272763
-3454 3454  13.81936647694512
-3459 3454  0.008839724875075651
-3467 3454 -0.01488660262126709
-1143 3455  1.302765765741359
-1153 3455 -0.836906752491439
-3455 3455  5.11477659652108
-3459 3455  0.389853698082942
-3522 3455 -0.3396256574061067
-1143 3456 -1.381540417492638
-1153 3456  0.8886397238365555
-3456 3456  3.779911922330594
-3459 3456 -0.4123451764514424
-3522 3456  0.3592193746505485
-1143 3457  6.98481666623176
-1153 3457 -5.664043368783884
-3457 3457  7.95121257275057
-3459 3457 -0.05097074908982863
-3522 3457  0.04440377057660186
-1143 3458 -8.259223745110498
-1153 3458  6.694072816259766
-3458 3458  6.108252012876951
-3459 3458  0.04762898411627194
-3522 3458 -0.04149255251807921
-1143 3459  1.291757288849152
-1200 3459 -0.6720863108170413
-2977 3459  5.956658436407072
-3459 3459  0.4010834794380356
-3521 3459 -0.4083493179864217
-1143 3460 -1.364938725708047
-1200 3460  0.7107553380565065
-3459 3460 -0.422257802508621
-3460 3460  4.575171516303893
-3521 3460  0.4299072250755208
-1143 3461  7.534250103970887
-1200 3461 -4.55768979835456
-3459 3461 -0.05298921735679361
-3461 3461  8.593585019339528
-3521 3461  0.05394914494757658
-1143 3462 -8.559591467156073
-1200 3462  5.176873191896383
-3459 3462  0.05159156407864095
-3462 3462  6.61797598861204
-3521 3462 -0.0525261724439103
-1143 3463 -0.7574396769710391
-1285 3463  1.320047522502771
-3459 3463 -0.3527490893724305
-3463 3463  3.940241620211022
-4145 3463  0.5218813327191538
-1143 3464  0.7694060268190259
-1285 3464 -1.327266925701353
-3459 3464  0.3517173794087308
-3464 3464  3.924567541092951
-4145 3464 -0.5203549498394896
-1143 3465 -5.957774276479973
-1285 3465  5.912988175484174
-3459 3465  0.05011610583704667
-3465 3465  5.351680386731817
-4145 3465 -0.0741452235963624
-1143 3466  7.695785971397996
-1285 3466 -7.658943042300214
-3459 3466 -0.03676343318442028
-3466 3466  6.18395224060007
-4145 3466  0.05439035871006938
-1144 3467  1.435114498189556
-1159 3467 -0.7084294249201389
-2895 3467  2.750846739635994
-3467 3467  0.5565316981856262
-3565 3467 -0.455735424632883
-1144 3468 -1.347826856887033
-1159 3468  0.6658399640468106
-3467 3468 -0.5208443815767149
-3468 3468  3.826254096728573
-3565 3468  0.4265116186182513
-1144 3469  8.28258643358139
-1159 3469 -4.70567772590082
-3467 3469 -0.07578302580183896
-3469 3469  3.775614735741489
-3565 3469  0.06205757831290015
-1144 3470 -8.289596312603999
-1159 3470  4.70876580251244
-3467 3470  0.06567306281750092
-3470 3470  5.425768165721768
-3565 3470 -0.05377868191093232
-1144 3471  0.9151036757311578
-1220 3471 -1.19384433734122
-3467 3471  0.3643483277204766
-3471 3471  4.076318621842777
-3902 3471 -0.4310186947198901
-1144 3472 -0.9121228986991278
-1220 3472  1.19038235472501
-3467 3472 -0.3648201588099976
-3472 3472  4.096977356352023
-3902 3472  0.4315768639356127
-1144 3473  5.88454311618508
-1220 3473 -7.176916273976581
-3467 3473 -0.05244155811268235
-3473 3473  5.083916573183807
-3902 3473  0.06203758932618613
-1144 3474 -5.695361538067679
-1220 3474  6.94588192299627
-3467 3474  0.05453932493439867
-3474 3474  5.015211698687797
-3902 3474 -0.06451921651788971
-1145 3475 -0.8709148373434052
-1152 3475  1.89078858943646
-3475 3475  2.042467781185541
-3481 3475 -0.4558445477281668
-3510 3475  0.346250946841912
-1145 3476  0.8161899275134221
-1152 3476 -1.771758321773721
-3476 3476  2.876369892472561
-3481 3476  0.4280106222713313
-3510 3476 -0.3251088204486319
-1145 3477 -5.468004288207832
-1152 3477  12.47290766139074
-3477 3477  2.675607339345396
-3481 3477  0.06127856833043964
-3510 3477 -0.04654604823349623
-1145 3478  5.008647029629411
-1152 3478 -11.42535283239911
-3478 3478  3.742160235640967
-3481 3478 -0.05909584881736413
-3510 3478  0.04488809553479817
-1145 3479 -0.7158770103681851
-1164 3479  1.53391330281752
-3479 3479  1.82335585995627
-3481 3479 -0.374795164020496
-3597 3479  0.9977461831798213
-1145 3480  0.5731872346895394
-1164 3480 -1.227967849405166
-3480 3480  5.527150768491762
-3481 3480  0.2993620588617579
-3597 3480 -0.796934913498078
-1145 3481 -4.380699190373416
-1164 3481  8.887311933312343
-2868 3481  2.343864161786165
-3481 3481  0.05493458803622932
-3597 3481 -0.1462419497352594
-1145 3482  3.606200291493431
-1164 3482 -7.316287484026701
-3481 3482 -0.04272179200188883
-3482 3482  7.053894690456646
-3597 3482  0.1137301358193506
-1146 3483  0.3710219555670884
-2871 3483  4.657866587415518
-3483 3483 -1.868071070429303
-3485 3483 -0.29728149214413
-3493 3483  0.6596703231013034
-1146 3484 -0.2840579403070753
-3483 3484  1.457322172487891
-3484 3484  17.26249952801965
-3485 3484  0.2275648264341223
-3493 3484 -0.5146229196552202
-1146 3485  9.774567204299803
-2872 3485  1.543087745431399
-3483 3485  0.130474703738788
-3485 3485 -7.972830198709099
-3493 3485 -0.04607441940211257
-1146 3486 -7.612750342076007
-3483 3486 -0.1017136559236855
-3485 3486  6.210600761683166
-3486 3486  5.361468130704726
-3493 3486  0.03591805543649509
-1146 3487  0.4116155869533685
-2873 3487  3.930431675339554
-3487 3487 -1.574413538600893
-3489 3487 -0.3482611836492402
-3493 3487  0.6964352973578405
-1146 3488 -0.3163234064086289
-3487 3488  1.227397748220299
-3488 3488  14.44634424441149
-3489 3488  0.2676108406262902
-3493 3488 -0.54293430207528
-1146 3489  10.34202414091307
-2874 3489  1.294093298886285
-3487 3489  0.1158095295205662
-3489 3489 -8.870545870477601
-3493 3489 -0.05122786494849402
-1146 3490 -8.044573973054565
-3487 3490 -0.09014415779328049
-3489 3490  6.900692384581894
-3490 3490  4.513828312547935
-3493 3490  0.03987489423752329
-1146 3491  0.240725896352511
-1149 3491 -1.043634220767106
-2875 3491  3.761329917621492
-3491 3491 -1.314617609442261
-3493 3491  0.4923049138822416
-1146 3492 -0.183873148952859
-1149 3492  0.7969897672034445
-3491 3492  1.026019024575663
-3492 3492  14.04786405689022
-3493 3492 -0.3842289985371204
-1146 3493  7.290551754845374
-1149 3493 -32.25568801983241
-2876 3493  1.245820405796252
-3491 3493  0.07978182835732844
-3493 3493 -0.02987711852991685
-1146 3494 -5.670365588613417
-1149 3494  25.09335121359859
-3491 3494 -0.06212921315277567
-3493 3494  0.02326647438088502
-3494 3494  4.366310261693636
-1146 3495  0.4501210912144902
-2877 3495  3.433153619622813
-3493 3495  0.715978620671633
-3495 3495 -1.527117243615793
-3497 3495 -0.3623546349676862
-1146 3496 -0.3458178949312415
-3493 3496 -0.5575412851129458
-3495 3496  1.189184824715845
-3496 3496  12.6670479033574
-3497 3496  0.278365585734024
-1146 3497  10.63405779705095
-2878 3497  1.130461556720701
-3493 3497 -0.05602966115093699
-3495 3497  0.1195061685183861
-3497 3497 -8.671639188939132
-1146 3498 -8.266934236741347
-3493 3498  0.04358377996383721
-3495 3498 -0.0929602365253548
-3497 3498  6.741962214906343
-3498 3498  3.956994704656382
-1146 3499  0.160517360987844
-1151 3499 -0.998072574886444
-2879 3499  4.033761579760155
-3493 3499  0.3935095664336485
-3499 3499 -2.315948119036116
-1146 3500 -0.1213287753319718
-1151 3500  0.7540610774154664
-3493 3500 -0.3068323659627205
-3499 3500  1.805821005194248
-3500 3500  15.2850840004798
-1146 3501  5.696313853321982
-1151 3501 -36.48745359925508
-3493 3501 -0.01947378168904091
-3499 3501  0.1146103472959883
-3501 3501  1.545695419693732
-1146 3502 -4.640371833668683
-1151 3502  29.73823906985804
-3493 3502  0.01588572902323566
-3499 3502 -0.09349334143083275
-3502 3502  4.264900917453799
-1146 3503 -1.606503189940934
-3493 3503 -0.9451250300389605
-3503 3503  6.379413846576525
-3621 3503  0.2386303085913993
-3622 3503  0.361952958820202
-1146 3504  1.963053376342537
-3493 3504  1.159453727321497
-3504 3504  2.438324611105513
-3621 3504 -0.2914011576499591
-3622 3504 -0.4440340630930363
-1146 3505 -10.23123479260624
-3493 3505  0.1285136378673143
-3505 3505  6.784715588521387
-3621 3505  1.708753031112981
-3622 3505 -0.0492166538779582
-1146 3506  11.40396313242804
-3493 3506 -0.1618170065455398
-3506 3506  3.247381743706517
-3621 3506 -1.904872805295951
-3622 3506  0.06197078951995561
-1152 3507 -1.329774596250828
-1164 3507  1.309395068074605
-3507 3507  2.227180481236951
-3510 3507 -0.2375980214699439
-3597 3507  0.8327117855650359
-1152 3508  1.136378995889816
-1164 3508 -1.118335669994368
-3508 3508  4.793989873863781
-3510 3508  0.2021244230605367
-3597 3508 -0.7083871666596919
-1152 3509 -8.235239513230283
-1164 3509  7.323453369206338
-3509 3509  2.87720735177041
-3510 3509  0.03516906518592851
-3597 3509 -0.1232573187539455
-1152 3510  7.392636495600143
-1164 3510 -6.574874411640814
-2906 3510  6.185562461268164
-3510 3510 -0.02840317316188208
-3597 3510  0.0995448400328353
-1153 3511  0.4589386576308901
-3511 3511  7.331635289629858
-3522 3511  0.5039010794292205
-3531 3511 -1.221909365821883
-3533 3511 -0.3159937452580066
-1153 3512 -0.390921429635482
-3512 3512  17.87284749444791
-3522 3512 -0.4287950305689273
-3531 3512  1.039784761849569
-3533 3512  0.269234075312335
-1153 3513  8.639460196076747
-3513 3513  10.64580025395756
-3522 3513 -0.01971558690748688
-3531 3513  0.0478083125406707
-3533 3513 -7.316230054006878
-1153 3514 -8.030380636400102
-3514 3514  28.06040217986433
-3522 3514  0.01488552126604759
-3531 3514 -0.03609588983362996
-3533 3514  6.800195871908971
-1153 3515  0.9130715613792872
-1179 3515 -0.8197257413953748
-3515 3515  5.304835054096878
-3516 3515 -0.5584106677342894
-3522 3515  0.2953161655653164
-1153 3516 -0.8271158196402747
-1179 3516  0.7414429026651962
-2883 3516  8.815510423999974
-3516 3516  0.5102169475088717
-3522 3516 -0.2698288575970699
-1153 3517  5.326644585481059
-1179 3517 -5.904518861818641
-3516 3517  0.07802819319119383
-3517 3517  6.323546734518151
-3522 3517 -0.04126530553706707
-1153 3518 -5.221321173922546
-1179 3518  5.785549463415167
-3516 3518 -0.06247185293402092
-3518 3518  12.01425377570906
-3522 3518  0.03303831593886034
-1153 3519 -1.159052407654222
-1200 3519  0.9377921426824671
-3519 3519  3.60741915585239
-3521 3519  0.5044255115610694
-3522 3519 -0.4316172727288611
-1153 3520  1.167217498441967
-1200 3520 -0.9444048128648267
-3520 3520  3.435120286019725
-3521 3520 -0.5084203185048906
-3522 3520  0.4350354735110427
-1153 3521 -7.424743250141112
-1200 3521  5.539440141695474
-2884 3521  5.851730685056556
-3521 3521 -0.0632030828564326
-3522 3521  0.05408041747556818
-1153 3522  7.683291575595472
-1200 3522 -5.732696863714277
-2978 3522  5.945418404127518
-3521 3522  0.06036516740083515
-3522 3522 -0.05165212370154924
-1153 3523 -0.95405564531345
-1210 3523  0.8083482178595198
-3522 3523 -0.3071829453370418
-3523 3523  7.425984975242814
-3865 3523  0.5073715942942281
-1153 3524  0.9758265066742177
-1210 3524 -0.8256124134252635
-3522 3524  0.3154316116230693
-3524 3524  6.227801375022604
-3865 3524 -0.520995849897842
-1153 3525 -5.830059675395175
-1210 3525  6.106888895148331
-3522 3525  0.03634845233500034
-3525 3525  11.33592184861562
-3865 3525 -0.06003644567930714
-1153 3526  5.406317175231911
-1210 3526 -5.665416980231268
-3522 3526 -0.04190662314365428
-3526 3526  9.103384773113829
-3865 3526  0.06921683159380579
-1153 3527 -1.164584670469045
-1226 3527  0.6235636743364441
-3522 3527 -0.3757659547701289
-3527 3527  6.982525249864085
-3944 3527  0.4009167330545462
-1153 3528  1.194200001945778
-1226 3528 -0.638771030322958
-3522 3528  0.3861958328991782
-3528 3528  6.205734509115272
-3944 3528 -0.4120447041029439
-1153 3529 -6.959432015872205
-1226 3529  4.601918715157254
-3522 3529  0.04350500092493118
-3529 3529  11.50083919055273
-3944 3529 -0.04641687896666508
-1153 3530  6.502617587410443
-1226 3530 -4.301673841163688
-3522 3530 -0.05038154112818095
-3530 3530  9.438888577223668
-3944 3530  0.05375367996741477
-1201 3531  1.404267791315138
-2885 3531  2.017091895439074
-3531 3531 -0.7047014242829946
-3533 3531 -0.7102443269723129
-3812 3531  0.4168827888937753
-1201 3532 -1.26049341189051
-3531 3532  0.6326150413526075
-3532 3532  3.594156286348642
-3533 3532  0.6375281091348801
-3812 3532 -0.3742383847223765
-1201 3533  8.573349645239935
-2886 3533  3.367757951666376
-3531 3533  0.08773798910582717
-3533 3533 -4.291561368908504
-3812 3533 -0.05190348185770162
-1201 3534 -7.620517473966892
-3531 3534 -0.07880583325813648
-3533 3534  3.814598094334272
-3534 3534  6.190391211677775
-3812 3534  0.04661945388172894
-1257 3535  1.582943479566342
-2887 3535  3.178888388583059
-3535 3535 -0.8318326858768657
-3537 3535 -0.2560164048234211
-3567 3535  0.6196319089697585
-1257 3536 -1.467471123130216
-3535 3536  0.7717420244337035
-3536 3536  4.740940585753161
-3537 3536  0.2373539075192858
-3567 3536 -0.5748703939518289
-1257 3537  9.963872683218066
-2888 3537  4.887036067400858
-3535 3537  0.1071151913100275
-3537 3537 -1.53916732182028
-3567 3537 -0.07979007268887017
-1257 3538 -9.158675410300438
-3535 3538 -0.103164844124989
-3537 3538  1.414753666929491
-3538 3538  6.656168722454082
-3567 3538  0.07684746029948268
-1156 3539  1.446175075669395
-1193 3539 -1.408878182521636
-2963 3539  2.807087996816875
-3539 3539  0.377331040401673
-3730 3539 -0.3067285442619483
-1156 3540 -1.34371595571893
-1193 3540  1.31040985134737
-3539 3540 -0.3524048448936835
-3540 3540  4.013858921947923
-3730 3540  0.2864662948217338
-1156 3541  9.606152975975975
-1193 3541 -7.997310816234859
-3539 3541 -0.05101371158298227
-3541 3541  3.977636161674676
-3730 3541  0.04146852449400996
-1156 3542 -8.318029485848061
-1193 3542  6.922985628648402
-3539 3542  0.0519531882185968
-3542 3542  5.491666775418012
-3730 3542 -0.04223221544427847
-1156 3543  1.341321871020527
-1275 3543 -0.6717547568728794
-3374 3543 -0.3540923641672868
-3539 3543  0.3360771639281279
-3543 3543  6.546784158248459
-1156 3544 -1.344209358632625
-1275 3544  0.6732602446776976
-3374 3544  0.3539682057779985
-3539 3544 -0.3359593223602985
-3544 3544  6.511901335193489
-1156 3545  8.638251497123527
-1275 3545 -4.45699083051915
-3374 3545  0.05033110776786022
-3539 3545 -0.0477704058819864
-3545 3545  7.602458701266929
-1156 3546 -8.759308364969019
-1275 3546  4.519424008698003
-3374 3546 -0.04941476276368469
-3539 3546  0.04690068187393769
-3546 3546  7.686660379443556
-1157 3547 -1.058339009138057
-3547 3547  3.23901762142973
-3549 3547  0.5146427113986781
-3550 3547  0.7708782074770815
-3554 3547 -0.5244507842233862
-1157 3548  1.056354509649101
-3548 3548  3.269308251777509
-3549 3548 -0.5135793765110132
-3550 3548 -0.769913863334036
-3554 3548  0.5237947129566466
-1157 3549 -5.697020217678423
-2892 3549  5.798807440562412
-3549 3549  3.063488442035085
-3550 3549 -0.09921702332192101
-3554 3549  0.06750021622713791
-1157 3550  5.434176163677479
-3012 3550  5.61223414210605
-3549 3550 -2.922493795579336
-3550 3550  0.1047517766322093
-3554 3550 -0.07126566929859467
-1157 3551 -0.8311703023259965
-1237 3551  1.098284058681807
-3551 3551  2.722688512430139
-3552 3551  0.6178543307689458
-3554 3551 -0.4599292057141804
-1157 3552  0.8325741162821748
-1237 3552 -1.099720672228146
-3051 3552  2.703936131935484
-3552 3552 -0.6175404035074211
-3554 3552  0.4596955190523657
-1157 3553 -4.937009585402042
-1237 3553  5.821493748851517
-3552 3553 -0.08767549225845923
-3553 3553  4.15614996725906
-3554 3553  0.06526541533640681
-1157 3554  5.222317454495946
-1237 3554 -6.158831607607141
-3052 3554  4.159434126999496
-3552 3554  0.08262454568163802
-3554 3554 -0.06150550344214045
-1158 3555 -1.955210228146363
-3555 3555  3.132140010189771
-3560 3555 -0.5159715744957702
-3757 3555  0.4429005215649865
-3758 3555  0.5043002560591464
-1158 3556  1.946171184010313
-3556 3556  3.155612325232486
-3560 3556  0.5154078088312681
-3757 3556 -0.4406738447969539
-3758 3556 -0.5037492428192337
-1158 3557 -11.53240152096248
-3557 3557  4.081677144998319
-3560 3557  0.07280632131793441
-3757 3557  2.841907482790103
-3758 3557 -0.07115943648492561
-1158 3558  11.05483594732497
-3558 3558  4.024185540800096
-3560 3558 -0.07622870906823133
-3757 3558 -2.724430091994604
-3758 3558  0.07450440954956594
-1158 3559  1.409323205256857
-1232 3559 -0.5518231235725737
-3559 3559  5.363493298492033
-3560 3559  0.4404116309763916
-3562 3559 -0.4013655946326785
-1158 3560 -1.618511483884628
-1232 3560  0.6340347863620098
-2893 3560  2.546878710023711
-3560 3560 -0.5079389600530799
-3562 3560  0.4629060824003016
-1158 3561  9.858875177032653
-1232 3561 -3.512065591551504
-3560 3561 -0.05761232611155925
-3561 3561  6.902063277362459
-3562 3561  0.05250452962986646
-1158 3562 -10.84726586818638
-1232 3562  3.863844809715488
-2894 3562  3.218684483675813
-3560 3562  0.06996802127804672
-3562 3562 -0.06376479295806904
-1159 3563 -0.8140452921845157
-1201 3563  1.434561395231843
-3563 3563  2.759734100154303
-3565 3563 -0.4764363681173263
-3812 3563  0.4317442185526948
-1159 3564  0.7270941854498505
-1201 3564 -1.281336173089582
-3564 3564  4.914858596122737
-3565 3564  0.425337444217761
-3812 3564 -0.3854386330763389
-1159 3565 -4.963811290770392
-1201 3565  9.07977748727345
-2896 3565  4.35180763337338
-3565 3565  0.05983512144077003
-3812 3565 -0.05422228334611339
-1159 3566  4.341750101297229
-1201 3566 -7.942016865501306
-3565 3566 -0.05429826449104555
-3566 3566  7.954416979677193
-3812 3566  0.04920481168154859
-1257 3567 -1.262443950678755
-3091 3567  8.81339838666673
-3567 3567 -0.4844075229863823
-3568 3567  0.6869492137454632
-3574 3567  0.1819988190111484
-1257 3568  1.35575676912055
-2897 3568  6.209091064066214
-3567 3568  0.5172246467921727
-3568 3568 -0.7334879158217498
-3574 3568 -0.1954534815545992
-1257 3569 -10.37821929880745
-3567 3569  0.08534035623263046
-3568 3569 -0.1210230804289373
-3569 3569  4.640556920095166
-3574 3569  1.503671479436259
-1257 3570  11.16247281732313
-3567 3570 -0.09171746472834619
-3568 3570  0.1300666015536589
-3570 3570  3.17852537231288
-3574 3570 -1.617273344051984
-1258 3571 -2.563829187882418
-3568 3571  0.5990913842872198
-3571 3571  4.130734036137791
-3574 3571  0.1483181477839719
-4047 3571 -0.8683349920286506
-1258 3572  2.735594657287679
-3568 3572 -0.6450813979793052
-3572 3572  2.887296553688009
-3574 3572 -0.1580808975787135
-4047 3572  0.9349938344358532
-1258 3573 -17.20403442676472
-3568 3573 -0.08258340338438294
-3573 3573  4.196712062065106
-3574 3573  1.098045296521147
-4047 3573  0.1196980307182942
-1258 3574  17.4862782237909
-2898 3574  2.945059870904722
-3568 3574  0.09355727171015052
-3574 3574 -1.116117521563536
-4047 3574 -0.1356037741743049
-1161 3575 -0.7691880733918308
-1164 3575  1.084345240019609
-3575 3575  2.800615008201704
-3577 3575 -0.3915200345225039
-3597 3575  0.7615674553221563
-1161 3576  0.6268912276704567
-1164 3576 -0.8831818088721408
-3576 3576  8.019342926689088
-3577 3576  0.3156322845396378
-3597 3576 -0.6139539603574344
-1161 3577 -5.531733877787849
-1164 3577  7.397606534928237
-2900 3577  2.551059225694364
-3577 3577  0.0579584615013995
-3597 3577 -0.1127382359726241
-1161 3578  4.552179963964262
-1164 3578 -6.087474676626123
-3577 3578 -0.04478280109740496
-3578 3578  8.080073804395154
-3597 3578  0.08710952407721209
-1161 3579 -1.1983605947858
-2899 3579  7.984203741216811
-3577 3579 -0.5414818109862019
-3579 3579  0.1809697428574308
-3580 3579  0.5556174516419152
-1161 3580  1.188366015048527
-3141 3580  8.030319348297542
-3577 3580  0.5430535951303894
-3579 3580 -0.1794700371248673
-3580 3580 -0.5572302679600371
-1161 3581 -9.698019033406775
-3577 3581  0.1050418924830158
-3579 3581  1.449320632063192
-3580 3581 -0.1077840611317312
-3581 3581  3.626585505548464
-1161 3582  9.673953645810657
-3577 3582 -0.1048558318511552
-3579 3582 -1.445634609268258
-3580 3582  0.1075931432984304
-3582 3582  3.66926937307163
-1162 3583 -0.1874965313805473
-1163 3583 -2.513198853349591
-2903 3583  0.7834252790859237
-3583 3583 -2.017535229907724
-3585 3583  0.9725210198756302
-1162 3584  0.1209829834408293
-1163 3584  1.622962910510173
-3583 3584  1.301823718285398
-3584 3584  4.977760599682532
-3585 3584 -0.6275235799788761
-1162 3585  3.094186181666295
-1163 3585 -22.22398035275692
-2904 3585  19.13304546764732
-3583 3585 -0.02818946822305024
-3585 3585  0.013588288313204
-1162 3586 -3.100342937307653
-1163 3586  22.19095820062608
-3583 3586 -0.05257145459795929
-3585 3586  0.02534124008545374
-3586 3586  8.170584099729947
-1164 3587  0.6351578870039599
-1165 3587 -2.945954324697929
-3587 3587  0.6061321059687221
-3589 3587 -1.12285875271712
-3597 3587  0.4637568782856321
-1164 3588 -0.4937492055719114
-1165 3588  2.290345940174964
-3588 3588  2.317651149747798
-3589 3588  0.8749284659928406
-3597 3588 -0.3613580899024239
-1164 3589  4.32562955086895
-1165 3589 -19.24637451834769
-2908 3589  0.7415300425733925
-3589 3589  0.1535310519658627
-3597 3589 -0.06341054135910112
-1164 3590 -3.280051421621569
-1165 3590  14.59397827582356
-3589 3590 -0.1216448842352145
-3590 3590  2.778013614142899
-3597 3590  0.05024109366902035
-1164 3591  0.08216399590560011
-1166 3591 -2.385134481547733
-2909 3591  1.924848272360943
-3591 3591 -3.296259100425206
-3597 3591  0.5198404247302832
-1164 3592 -0.06363192359750822
-1166 3592  1.86782777672809
-3591 3592  2.604757945218924
-3592 3592  7.14894107859813
-3597 3592 -0.4107864203962353
-1164 3593  5.774615805405687
-1166 3593 -77.15294482380699
-3591 3593  0.06194506283026378
-3593 3593  2.537886768231079
-3597 3593 -0.009769119110653198
-1164 3594 -3.234138467376079
-1166 3594  43.20570292286196
-3591 3594 -0.07659021204936765
-3594 3594  6.924495998814408
-3597 3594  0.01207874962159066
-1164 3595  0.2283230594027073
-1167 3595 -2.457041662433694
-2911 3595  0.9293065890597459
-3595 3595 -2.019730940286236
-3597 3595  0.5553092879536204
-1164 3596 -0.1784848649491971
-1167 3596  1.923107839672399
-3595 3596  1.591079742886095
-3596 3596  3.383146747526674
-3597 3596 -0.4374549805006661
-1164 3597  5.50164767601109
-1167 3597 -50.0814285278305
-2912 3597  1.052700930966815
-3595 3597  0.08806168674949939
-3597 3597 -0.02421187475492601
-1164 3598 -4.008535209606107
-1167 3598  36.48944546192245
-3595 3598 -0.07642904841532995
-3597 3598  0.02101357146535312
-3598 3598  3.451189611415207
-1164 3599  0.1369008468551218
-1168 3599 -2.338296025296659
-2913 3599  1.515772943789459
-3597 3599  0.5186307214601972
-3599 3599 -2.498229903851033
-1164 3600 -0.1066451828453128
-1168 3600  1.829379136090265
-3597 3600 -0.4089115281925863
-3599 3600  1.969715571194801
-3600 3600  5.594076654825956
-1164 3601  5.345308539788538
-1168 3601 -59.77277409880886
-3597 3601 -0.01510224391735361
-3599 3601  0.07274709308264668
-3601 3601  1.886875011526777
-1164 3602 -3.486179794654417
-1168 3602  38.98112179863244
-3597 3602  0.01521306273357519
-3599 3602 -0.07328090426879212
-3602 3602  5.746025815755991
-1164 3603  0.2495784622735603
-1169 3603 -1.994003072051536
-2915 3603  0.9154558602176557
-3597 3603  0.649415218994155
-3603 3603 -1.985602696417218
-1164 3604 -0.1950409950189936
-1169 3604  1.560276963803856
-3597 3604 -0.5116989493650622
-3603 3604  1.564531880214944
-3604 3604  3.328116887610061
-1164 3605  6.482098577577909
-1169 3605 -44.05096156861522
-3597 3605 -0.02666374087039809
-3603 3605  0.08152503085904574
-3605 3605  1.004492569171269
-1164 3606 -4.729287128522186
-1169 3606  32.13935140025399
-3597 3606  0.02305717637758508
-3603 3606 -0.07049787292944913
-3606 3606  3.307664437943429
-1164 3607  0.290360813562731
-1170 3607 -1.69914940163431
-2917 3607  0.8856096204701881
-3597 3607  0.6812081861155168
-3607 3607 -1.973620237480235
-1164 3608 -0.2270292150903642
-1170 3608  1.329845790084345
-3597 3608 -0.5365945143548635
-3607 3608  1.554640438029102
-3608 3608  3.214678878395649
-1164 3609  6.760502321679454
-1170 3609 -34.56956155699346
-3597 3609 -0.03084309267107718
-3607 3609  0.08935968933261426
-3609 3609  0.9675344147546086
-1164 3610 -4.991429629225376
-1170 3610  25.52356491229814
-3597 3610  0.02625275857880516
-3607 3610 -0.07606041248016723
-3610 3610  3.209214983593041
-1164 3611  0.2916736644580288
-1171 3611 -0.665968396826871
-2919 3611  1.437804385943619
-3597 3611  0.7223443830121247
-3611 3611 -3.13745961756631
-1164 3612 -0.227785867490716
-1171 3612  0.5209611043831808
-3597 3612 -0.5685854447490166
-3611 3612  2.469616866953718
-3612 3612  5.286814915057556
-1164 3613  7.136143023604448
-1171 3613 -13.01473834386188
-3597 3613 -0.03084162623578303
-3611 3613  0.1339587586344098
-3613 3613  1.733088886821635
-1164 3614 -5.072850637376669
-1171 3614  9.251460191495257
-3597 3614  0.02765977036275495
-3611 3614 -0.1201385579859136
-3614 3614  5.612374640258077
-1164 3615 -1.244478079620031
-1256 3615  0.5060573535847106
-2905 3615  5.097464053705828
-3597 3615 -0.8389463619039152
-3615 3615  0.3580752890249853
-1164 3616  1.380302330026541
-1256 3616 -0.5613536361877731
-3597 3616  0.9351913725744683
-3615 3616 -0.3991541488639627
-3616 3616  3.060833158311745
-1164 3617 -8.209962281569412
-1256 3617  3.248543549093237
-3597 3617  0.1326396128473568
-3615 3617 -0.05661263921413731
-3617 3617  4.909697213749066
-1164 3618  9.204827106431074
-1256 3618 -3.642181922510498
-3597 3618 -0.1454153226254008
-3615 3618  0.06206550983734454
-3618 3618  2.985129676088162
-1288 3619  1.244376425382915
-3619 3619  3.115298956803423
-3621 3619 -0.4153067634742414
-3622 3619 -0.5844299652988821
-4089 3619  0.6011938633446567
-1288 3620 -1.24310360204769
-3620 3620  3.124616488720384
-3621 3620  0.4148880148577192
-3622 3620  0.5839521479322916
-4089 3620 -0.6007023401756713
-1288 3621  7.434019202353014
-2922 3621  5.489980636102064
-3621 3621 -2.410186223870166
-3622 3621  0.07265746990450016
-4089 3621 -0.07474159031252897
-1288 3622 -7.327623003108759
-3154 3622  5.456336436211689
-3621 3622  2.375670562103454
-3622 3622 -0.07373932525269625
-4089 3622  0.07585447780115982
-1173 3623  0.2557390494241053
-1174 3623 -1.144600450292391
-2925 3623  3.239214549120592
-3623 3623 -1.489946307075277
-3632 3623  0.3551605612820263
-1173 3624 -0.1995643723770443
-1174 3624  0.8931472133700507
-3623 3624  1.172932465922301
-3624 3624  11.46538487377347
-3632 3624 -0.2795935336492826
-1173 3625  7.851143468601221
-1174 3625 -35.44874624987744
-3623 3625  0.1109786463408995
-3625 3625  0.8107453118787733
-3632 3625 -0.02645413337217794
-1173 3626 -6.360533060844858
-1174 3626  28.71959313604403
-3623 3626 -0.08990946696979601
-3626 3626  2.233014253505581
-3632 3626  0.02143184395432529
-1173 3627  0.1570011938827146
-1175 3627 -2.274571006209698
-2927 3627  2.661281271291927
-3627 3627 -1.862462882642613
-3632 3627  0.2921170861291708
-1173 3628 -0.116240689813081
-1175 3628  1.683936323933614
-3627 3628  1.39495808915565
-3628 3628  11.61445266776299
-3632 3628 -0.2187915238870602
-1173 3629  6.250508524222748
-1175 3629 -91.60308946892607
-3627 3629  0.1002259832656134
-3629 3629  0.7394238804556282
-3632 3629 -0.01571989566011668
-1173 3630 -5.161142881361593
-1175 3630  75.64317956330889
-3627 3630 -0.08276202516527353
-3630 3630  1.985197878319526
-3632 3630  0.01298076963505745
-1173 3631  0.3282970589794034
-1176 3631 -0.9769081143049713
-2929 3631  2.838604475646982
-3631 3631 -1.060044755936421
-3632 3631  0.4746237402281227
-1173 3632 -0.271490414221333
-1176 3632  0.8078351046585803
-2923 3632  7.931141454227703
-3631 3632  0.8847241794276495
-3632 3632 -0.3961258208756168
-1173 3633  11.16120855053096
-1176 3633 -33.51914650193375
-3631 3633  0.08068481631873459
-3632 3633 -0.03612576647010336
-3633 3633  0.5836917319355491
-1173 3634 -8.873223095071516
-1176 3634  26.64903944246305
-3631 3634 -0.06414506658049314
-3632 3634  0.02872026982551443
-3634 3634  1.62655302961875
-1173 3635  0.2551706654878987
-1177 3635 -0.9321754366844185
-2931 3635  3.600111623032867
-3632 3635  0.4514386490354154
-3635 3635 -1.23044462799305
-1173 3636 -0.2092439937336671
-1177 3636  0.7643499314882201
-3632 3636 -0.374371261515384
-3635 3636  1.020389168253177
-3636 3636  10.38985995381369
-1173 3637  10.50033560799464
-1177 3637 -38.7926439689844
-3632 3637 -0.02777304520351097
-3635 3637  0.07569842401993218
-3637 3637  0.7675634623859809
-1173 3638 -8.38018467970365
-1177 3638  30.96188122061356
-3632 3638  0.02216543630854258
-3635 3638 -0.06041428240856869
-3638 3638  2.124763369071786
-1173 3639  2.90118294589533
-2989 3639  2.120223669016545
-3632 3639  1.238869862237301
-3639 3639 -0.3843162680834073
-3641 3639 -0.2630108885425641
-1173 3640 -2.396216081757496
-3632 3640 -1.017068720550362
-3639 3640  0.315510181481362
-3640 3640  5.274149732297824
-3641 3640  0.2173437473146551
-1173 3641  17.2984093564214
-2990 3641  2.338015588056324
-3632 3641 -0.189558782352581
-3639 3641  0.05880401649662903
-3641 3641 -1.681171094173677
-1173 3642 -15.269239506628
-3632 3642  0.1551422975518752
-3639 3642 -0.0481274996143231
-3641 3642  1.48390937397938
-3642 3642  4.699502919587786
-1173 3643 -1.862842055174311
-3632 3643 -0.8761486615418773
-3643 3643  4.450891698791136
-3644 3643  0.3852188693949516
-3646 3643  0.2775953024833963
-1173 3644  2.290266277161505
-3087 3644  1.680006856153657
-3632 3644  1.07765260038838
-3644 3644 -0.4738147012535258
-3646 3644 -0.3412498653140608
-1173 3645 -10.98109255814183
-3632 3645  0.1233417700929442
-3644 3645 -0.05423004030019284
-3645 3645  6.066391449410494
-3646 3645  1.447748389810384
-1173 3646  13.31557405400697
-3088 3646  3.482112602331831
-3632 3646 -0.1324205549928908
-3644 3646  0.05822173646792159
-3646 3646 -1.755833205048359
-1178 3647 -1.333010028374286
-1198 3647  0.4276243851783329
-3647 3647  5.382734010251454
-3652 3647 -0.6175792065318458
-3782 3647  0.3482442751733312
-1178 3648  1.161252280274343
-1198 3648 -0.3728502600956505
-3648 3648  11.36834776685953
-3652 3648  0.5343612663363052
-3782 3648 -0.3013188428428641
-1178 3649 -8.372193934777899
-1198 3649  3.153670952857216
-3649 3649  6.930754193190324
-3652 3649  0.07945126249495234
-3782 3649 -0.04480145546761378
-1178 3650  6.800859133664147
-1198 3650 -2.562369962751278
-3650 3650  14.16495761512215
-3652 3650 -0.07522703272150684
-3782 3650  0.04241947139162745
-1178 3651 -0.8125299017211322
-1241 3651  1.357408579856344
-3651 3651  4.03844885715099
-3652 3651 -0.4393579796250129
-3653 3651  0.3530044280053762
-1178 3652  0.8130995252895313
-1241 3652 -1.359305061838616
-3059 3652  3.981648724060497
-3652 3652  0.4424042011090423
-3653 3652 -0.3554519303210625
-1178 3653 -5.553339525120581
-1241 3653  7.799677070497177
-2934 3653  5.126383130506217
-3652 3653  0.06144026122959138
-3653 3653 -0.04936449382429299
-1178 3654  5.955799061503828
-1241 3654 -8.367326703808663
-3652 3654 -0.05571639966019448
-3653 3654  0.04476562781300211
-3654 3654  5.483147129036918
-1179 3655  0.6657203254231561
-3516 3655  1.458939452040597
-3655 3655  5.435793144188034
-3663 3655 -1.713575067511206
-3665 3655 -0.07200114489392796
-1179 3656 -0.620628984403677
-3516 3656 -1.369672770763905
-3656 3656  7.936527709755322
-3663 3656  1.608728249378172
-3665 3656  0.06494821711852092
-1179 3657  11.0089408248834
-3516 3657 -0.03732647943580095
-3657 3657  4.929097726466118
-3663 3657  0.04384124675612543
-3665 3657 -7.044466715389948
-1179 3658 -21.83306996925088
-3516 3658  0.01230778581599272
-3658 3658  13.13678521227168
-3663 3658 -0.01445592199255047
-3665 3658  13.9640986807489
-1179 3659 -0.6535981302102936
-1276 3659  1.0954096953943
-3516 3659 -0.4069131581499621
-3659 3659  5.673414870400691
-3906 3659  0.3962293587228495
-1179 3660  0.654965320821791
-1276 3660 -1.097736986344627
-3516 3660  0.4084263136180201
-3660 3660  5.534920557416696
-3906 3660 -0.3977027851991092
-1179 3661 -4.521169451237201
-1276 3661  7.39116338334737
-3516 3661  0.05514218346051624
-3661 3661  6.562151811292857
-3906 3661 -0.05369438553050169
-1179 3662  4.541421673872437
-1276 3662 -7.424306139552618
-3516 3662 -0.05411446430890252
-3662 3662  6.819550808326079
-3906 3662  0.05269364988891542
-1293 3663  1.054444086425987
-2937 3663  1.663115651008956
-3663 3663 -0.6095818027697771
-3665 3663 -0.5886831418066317
-3979 3663  1.112658784433996
-1293 3664 -0.9011439297224629
-3663 3664  0.5163436862281057
-3664 3664  3.745702162475517
-3665 3664  0.5026533925982484
-3979 3664 -0.9424729144772576
-1293 3665  7.599039601885398
-2938 3665  1.990410506883116
-3663 3665  0.08236427128493604
-3665 3665 -3.725196007278359
-3979 3665 -0.1503380342921753
-1293 3666 -6.282130880745431
-3663 3666 -0.07686249969756757
-3665 3666  3.079189492083107
-3666 3666  3.716088592076674
-3979 3666  0.1402957488124907
-1181 3667 -0.7314697767746239
-1244 3667  1.049384777518011
-3404 3667 -0.5279378228544918
-3667 3667  3.6738422903388
-4005 3667  0.5728450058803592
-1181 3668  0.7341243737421274
-1244 3668 -1.053710576606368
-3404 3668  0.5324544811476037
-3668 3668  3.499243447462259
-4005 3668 -0.5777458579020762
-1181 3669 -4.855239864111756
-1244 3669  5.781563336973328
-3404 3669  0.07447310375085667
-3669 3669  4.866384250937969
-4005 3669 -0.08080789765245955
-1181 3670  5.221813720290219
-1244 3670 -6.220262112848772
-3404 3670 -0.06679631775422318
-3670 3670  5.213544230958012
-4005 3670  0.07247811272512387
-1181 3671 -0.7155603330739858
-1284 3671  0.8548643208929529
-3404 3671 -0.4465921770040591
-3671 3671  4.931596810478233
-3838 3671  0.5627128757731583
-1181 3672  0.7181252765245181
-1284 3672 -0.8599114611795811
-3404 3672  0.4419162630302035
-3672 3672  4.952358887713497
-3838 3672 -0.556821153672813
-1181 3673 -4.616497763711963
-1284 3673  6.615573091843956
-3404 3673  0.05896090325177052
-3673 3673  5.688689200828974
-3838 3673 -0.07429162698182507
-1181 3674  4.299471983766863
-1284 3674 -6.162584239401313
-3404 3674 -0.06482153965237164
-3674 3674  5.360504399026522
-3838 3674  0.08167611720054455
-1211 3675  2.391996372285566
-3440 3675 -0.5594355361631519
-3441 3675 -0.3797072308357856
-3675 3675  2.954431694987449
-3677 3675  0.4828314550248289
-1211 3676 -2.145916049270679
-3440 3676  0.5029955255009219
-3441 3676  0.3406113200706714
-3676 3676  5.231194892529889
-3677 3676 -0.4341198328483751
-1211 3677  13.0725484602826
-2942 3677  4.405147664394409
-3440 3677  0.07604924015919289
-3441 3677 -2.422622246591045
-3677 3677 -0.06563573978769766
-1211 3678 -12.47045602748613
-3440 3678 -0.06227853779439045
-3441 3678  2.310469301341059
-3677 3678  0.05375067380652562
-3678 3678  8.34943451090035
-3412 3679 -0.5603093540142886
-3679 3679  3.597870353883553
-3680 3679  0.6760566940874065
-3681 3679  0.5452258426352274
-3682 3679 -1.009313309363917
-3013 3680  3.508830808080819
-3412 3680  0.5635642781039163
-3680 3680 -0.6799840124585417
-3681 3680 -0.5470217549008489
-3682 3680  1.01257666395277
-2944 3681  3.834698064428319
-3412 3681  0.08560093037836217
-3680 3681 -0.1032841618434341
-3681 3681  3.376506641724802
-3682 3681 -6.434333697729489
-3014 3682  3.919110652589746
-3412 3682 -0.08418167880156238
-3680 3682  0.1015717247366548
-3681 3682 -3.409620496099906
-3682 3682  6.497392945010104
-1199 3683  1.368477623792071
-3683 3683  5.141916652259088
-3687 3683 -0.5222444421836163
-3689 3683 -0.5140805691755161
-3789 3683  0.4052407270632351
-1199 3684 -1.328312311377791
-3684 3684  5.985986065281415
-3687 3684  0.5068312762468304
-3689 3684  0.4990000847384159
-3789 3684 -0.393280728897523
-1199 3685  7.348777917610443
-3685 3685  9.122041227009294
-3687 3685  0.06925873775509218
-3689 3685 -2.845200624001803
-3789 3685 -0.05374200082628659
-1199 3686 -7.188787771767452
-3686 3686  11.18534751112075
-3687 3686 -0.06572612318307269
-3689 3686  2.783228216931563
-3789 3686  0.05100083369846863
-1240 3687 -1.964051061571361
-3057 3687  2.812910582659263
-3687 3687  0.4708669091657322
-3689 3687  0.4457567421222746
-3767 3687 -0.5600389881261721
-1240 3688  1.753538660217751
-3687 3688 -0.4207481970090967
-3688 3688  5.055157513061181
-3689 3688 -0.3979802529569074
-3767 3688  0.5004288683746699
-1240 3689 -13.90759343198216
-3058 3689  2.509630416919152
-3687 3689 -0.07142707696284654
-3689 3689  3.145655382980586
-3767 3689  0.08495383117483674
-1240 3690  12.36730783343644
-3687 3690  0.06335201956550504
-3689 3690 -2.797268987536868
-3690 3690  4.679317810446525
-3767 3690 -0.07534953134862798
-1291 3691  1.168201103632352
-3250 3691  0.4402806087748638
-3687 3691 -0.473560945926676
-3689 3691 -0.4618434163253965
-3691 3691  6.815584747470943
-1291 3692 -1.112629226904043
-3250 3692 -0.4181873107360542
-3687 3692  0.4497976392777369
-3689 3692  0.4399385753362182
-3692 3692  8.94117625255519
-1291 3693  6.948584822479258
-3250 3693 -0.06554270780152668
-3687 3693  0.07049701051212577
-3689 3693 -2.86924462211004
-3693 3693  8.245690219381995
-1291 3694 -6.744836034528775
-3250 3694  0.06074566431115451
-3687 3694 -0.0653373636694603
-3689 3694  2.785061239788074
-3694 3694  10.96265801745419
-1185 3695  0.427839673034417
-1186 3695 -0.1593269903331485
-3695 3695  8.598554966475483
-3702 3695  1.030575023275442
-3707 3695 -1.032621098489623
-1185 3696 -0.4773773394419411
-1186 3696  0.1799753946873774
-3696 3696  4.834616698618166
-3702 3696 -1.145483220005211
-3707 3696  1.147757430782475
-1185 3697  7.594442537507458
-1186 3697 -11.15066330216473
-3697 3697  9.351725708391308
-3702 3697 -0.02747275796180583
-3707 3697  0.02752730161739742
-1185 3698 -15.25141034328379
-1186 3698  22.3842856817472
-3698 3698  8.062081654679709
-3702 3698  0.01404202717338338
-3707 3698 -0.01406990582666666
-1185 3699  0.3935586339260975
-1247 3699 -2.744556354611642
-3699 3699  2.402848964234089
-3700 3699 -1.20025775658178
-3702 3699  0.2717664094081579
-1185 3700 -0.4755757198066598
-1247 3700  3.317198260843825
-2947 3700  0.9505763099739064
-3700 3700  1.453850177649487
-3702 3700 -0.3291856606887862
-1185 3701  2.737883920038361
-1247 3701 -17.82514147813323
-3700 3701  0.1525851828618167
-3701 3701  3.241458825572762
-3702 3701 -0.0345488517344297
-1185 3702 -3.207071041908369
-1247 3702  20.87891349987074
-2948 3702  1.251333503471155
-3700 3702 -0.1919032731754043
-3702 3702  0.04345138635311043
-1186 3703 -0.9577254771843337
-1248 3703  0.9482549598362122
-3703 3703  3.660576083522866
-3707 3703 -0.4083268764891531
-4035 3703  0.4304029276676956
-1186 3704  0.9613848499477831
-1248 3704 -0.951589473510047
-3704 3704  3.616822557551397
-3707 3704  0.4085753213662854
-4035 3704 -0.4306648046310764
-1186 3705 -5.868347071872142
-1248 3705  5.385061985342238
-3705 3705  4.813024972990116
-3707 3705  0.05989119208712813
-4035 3705 -0.06312918864769614
-1186 3706  6.083446015993633
-1248 3706 -5.582795958856527
-3706 3706  4.888571989846326
-3707 3706 -0.05737989255880681
-4035 3706  0.06048211658000992
-1186 3707  1.385816766949729
-3125 3707  1.803796874291739
-3707 3707  0.5500829381246285
-3709 3707 -0.5956130198385742
-3995 3707 -0.6107864907888784
-1186 3708 -1.243512052379119
-3707 3708 -0.4922431501793627
-3708 3708  3.21889629143859
-3709 3708  0.5345682106594648
-3995 3708  0.5465638824173062
-1186 3709  8.036162305989681
-3126 3709  2.274731948814601
-3707 3709 -0.0811542494651884
-3709 3709 -3.668557345336211
-3995 3709  0.09010990126768369
-1186 3710 -7.386558767037341
-3707 3710  0.070036883103212
-3709 3710  3.371880224457544
-3710 3710  4.206946386074862
-3995 3710 -0.07776569511906935
-1186 3711 -1.161465841778596
-1292 3711  0.6074290077697537
-3707 3711 -0.4948905454048655
-3711 3711  3.693870991143675
-4137 3711  0.4137426206676649
-1186 3712  1.216256331347602
-1292 3712 -0.6360979627977104
-3707 3712  0.5176954413573776
-3712 3712  2.924614144859001
-4137 3712 -0.432808164560258
-1186 3713 -7.2681528859035
-1292 3713  3.925788159606755
-3707 3713  0.06634446170697339
-3713 3713  5.000900261527375
-4137 3713 -0.05546586352942447
-1186 3714  7.502813973266923
-1292 3714 -4.052578948064181
-3707 3714 -0.0706577625688625
-3714 3714  3.930409653834767
-4137 3714  0.05907190615621608
-1187 3715  0.1264070600600939
-1188 3715 -0.3212481593266459
-2953 3715  8.262976513892538
-3715 3715 -5.202909164506705
-3721 3715  0.3582392973900931
-1187 3716 -0.1073922996838256
-1188 3716  0.2724361971567971
-3715 3716  4.684918794633313
-3716 3716  19.53069803414619
-3721 3716 -0.3225737686846976
-1187 3717  7.447124334255668
-1188 3717 -20.05891812971564
-3715 3717  0.2259382495226564
-3717 3717  1.564794215233169
-3721 3717 -0.01555667362303796
-1187 3718 -5.738141943326152
-1188 3718  15.48272469413396
-3715 3718 -0.1741000527436083
-3718 3718  5.368197347599874
-3721 3718  0.01198742445782478
-1187 3719  0.2372873765635557
-1189 3719 -1.124307960005156
-2955 3719  6.75928451641217
-3719 3719 -0.8670555746636077
-3721 3719  0.3692986362671331
-1187 3720 -0.2078695435133901
-1189 3720  0.9846771888007256
-3719 3720  0.7764968852232117
-3720 3720  14.69789381330568
-3721 3720 -0.3307276363338796
-1187 3721  8.201087086647963
-1189 3721 -39.72433181103246
-2956 3721  0.9886929177097209
-3719 3721  0.07324325135033663
-3721 3721 -0.03119596209267699
-1187 3722 -6.163756956372798
-1189 3722  29.86325998837519
-3719 3722 -0.05504842538990734
-3721 3722  0.02344637301136434
-3722 3722  3.784571749614252
-1187 3723  0.2982302677568088
-2957 3723  6.286782905770047
-3721 3723  0.4209233628580787
-3723 3723 -0.8154789505427221
-3725 3723 -0.8870247005076219
-1187 3724 -0.263500479617136
-3721 3724 -0.3794252839956596
-3723 3724  0.735082344446817
-3724 3724  13.23744473122067
-3725 3724  0.78356840863058
-1187 3725  9.459243822580133
-2958 3725  0.8832624199164085
-3721 3725 -0.03967666278728941
-3723 3725  0.07686787236308722
-3725 3725 -28.7033608824471
-1187 3726 -7.08054579490533
-3721 3726  0.02969939062660183
-3723 3726 -0.05753833129026276
-3725 3726  21.48971813079931
-3726 3726  3.411100769889254
-1187 3727 -1.429262548630259
-1193 3727  0.9009035038811737
-3721 3727 -0.6561496372500165
-3727 3727  4.863234068877579
-3730 3727  0.2302667811178378
-1187 3728  1.673333652021131
-1193 3728 -1.053604700739367
-3721 3728  0.7717184399373285
-3728 3728  2.313352821995473
-3730 3728 -0.2708240788464196
-1187 3729 -8.428942532838935
-1193 3729  6.200089823344896
-3721 3729  0.09217929587242674
-3729 3729  5.491941738533162
-3730 3729 -0.03234906878134064
-1187 3730  8.914589671247429
-1193 3730 -6.559072047159436
-2964 3730  3.087002648539354
-3721 3730 -0.1144339936435742
-3730 3730  0.04015905194614094
-1187 3731  1.193368232042292
-1286 3731 -0.7243212706529887
-3721 3731  0.7655516692341778
-3731 3731  3.654645324820387
-3732 3731 -0.379859058350982
-1187 3732 -1.120321545249203
-1286 3732  0.686208191891071
-2951 3732  4.408486684243702
-3721 3732 -0.7325809744992411
-3732 3732  0.3634993303815826
-1187 3733  10.03672807275466
-1286 3733 -3.862010681433929
-3721 3733 -0.09262370563918518
-3732 3733  0.04595895354819852
-3733 3733  5.548711987030634
-1187 3734 -8.019890861375147
-1286 3734  3.078841940888978
-3721 3734  0.1187456445424696
-3732 3734 -0.05892039757983346
-3734 3734  4.650926558563436
-1191 3735  0.2167729964694829
-1192 3735 -0.6168315851058197
-2961 3735  2.770038046695757
-3735 3735 -3.343157745435155
-3737 3735  0.6367155082099926
-1191 3736 -0.1851844777328494
-1192 3736  0.5298067490807584
-3735 3736  2.948553233605713
-3736 3736  5.995398509885752
-3737 3736 -0.5615617669201883
-1191 3737  6.880724984338275
-1192 3737 -16.5456764895831
-2962 3737  2.380705119026724
-3735 3737  0.1286813403240229
-3737 3737 -0.02450778911447643
-1191 3738 -5.444318005853047
-1192 3738  13.09683550707402
-3735 3738 -0.1243245932586171
-3737 3738  0.02367803215021713
-3738 3738  4.858814960201284
-1191 3739 -1.207141725362662
-3737 3739 -0.7678220254674442
-3739 3739  8.553881810766296
-3740 3739  0.62780002806294
-3742 3739  0.2021428644557957
-1191 3740  1.338906589780056
-3155 3740  4.987981631971432
-3737 3740  0.8596786811959881
-3740 3740 -0.702905468036533
-3742 3740 -0.2240623962201166
-1191 3741 -7.610279116600212
-3737 3741  0.1159096366071837
-3740 3741 -0.09477205745752662
-3741 3741  9.011775441080573
-3742 3741  1.349752296503026
-1191 3742  8.267433239967934
-3156 3742  5.009999875255914
-3737 3742 -0.134708500073735
-3740 3742  0.1101427118805583
-3742 3742 -1.466292598487834
-1194 3743  0.7978308621718716
-1220 3743 -1.258218614571573
-3743 3743  4.071233084981746
-3750 3743  0.3903679836850418
-3902 3743 -0.4616774221684724
-1194 3744 -0.7965184326656924
-1220 3744  1.257430980505297
-3744 3744  4.080013401664326
-3750 3744 -0.3911547364780136
-3902 3744  0.462607892946107
-1194 3745  5.753350078361281
-1220 3745 -7.448069304163958
-3745 3745  6.32479519279416
-3750 3745 -0.04713547548536468
-3902 3745  0.05574582374646275
-1194 3746 -5.243221560545977
-1220 3746  6.785039108229999
-3746 3746  6.114912483766074
-3750 3746  0.05251609088198397
-3902 3746 -0.06210932882324032
-1194 3747 -0.9429089218588855
-1241 3747  1.314196419979139
-3653 3747  0.4156932976845179
-3747 3747  4.626630643115307
-3750 3747 -0.3268708215805732
-1194 3748  1.007009323828432
-1241 3748 -1.397856747533507
-3653 3748 -0.4464578225348482
-3748 3748  3.249654147247933
-3750 3748  0.3510617949962554
-1194 3749 -5.062453033294843
-1241 3749  9.967472353837746
-3653 3749 -0.0518656621296367
-3749 3749  7.139596631140285
-3750 3749  0.04078336525166011
-1194 3750  4.615763356430092
-1241 3750 -9.100375712236817
-3060 3750  4.567303761279918
-3653 3750  0.06749642593198793
-3750 3750 -0.05307425527674776
-1194 3751 -1.048401908253753
-1286 3751  0.7276690822554982
-3732 3751  0.4552127889008838
-3750 3751 -0.4480493340962357
-3751 3751  4.819926478975458
-1194 3752  1.008926901289915
-1286 3752 -0.7000132451960701
-3732 3752 -0.4363998945032376
-3750 3752  0.4295324887597057
-3752 3752  5.990944680358314
-1194 3753 -6.460239724417905
-1286 3753  4.142224845053123
-3732 3753 -0.0671242009178893
-3750 3753  0.06606790111415485
-3753 3753  6.252292571035978
-1194 3754  6.417487426740468
-1286 3754 -4.115085973968236
-3732 3754  0.06139143428489695
-3750 3754 -0.06042534814756716
-3754 3754  8.096733468146425
-1298 3755  2.091534388030289
-2967 3755  3.210809158694981
-3755 3755  0.4897853072672988
-3757 3755 -0.4226945468326312
-3758 3755 -0.452566586299552
-1298 3756 -2.085292610827683
-3755 3756 -0.4890001558455759
-3756 3756  3.229847896446473
-3757 3756  0.421458967625683
-3758 3756  0.4518410984309186
-1298 3757  12.86354640857851
-2968 3757  4.240340181312149
-3755 3757 -0.06975006572860991
-3757 3757 -2.511814056672408
-3758 3757  0.06444976742378884
-1298 3758 -12.63775213409496
-3174 3758  4.213824656027188
-3755 3758  0.07102142348298011
-3757 3758  2.46769213011002
-3758 3758 -0.06562451486991176
-1197 3759 -0.2014575555660574
-3759 3759  3.851674085393307
-3761 3759 -1.621718887703775
-3763 3759  1.610329947831114
-3765 3759  0.3339756423139817
-1197 3760  0.2031705763180116
-3760 3760  3.895963455405033
-3761 3760  1.610590177548189
-3763 3760 -1.599279391917784
-3765 3760 -0.3333397745722857
-1197 3761 -14.35390862680364
-2972 3761  3.947328445532761
-3761 3761  0.03833691442155994
-3763 3761 -0.03806768353539278
-3765 3761  7.933710171395212
-1197 3762  22.89375892241828
-3761 3762 -0.0203945981981267
-3762 3762  5.680389346472349
-3763 3762  0.02025137186317635
-3765 3762 -12.65627869255672
-1221 3763  1.24868825794066
-2969 3763  2.651381876594489
-3763 3763 -0.5780567342368751
-3765 3763 -0.4851424038589532
-3929 3763  0.8354279312537486
-1221 3764 -1.04285678342914
-3763 3764  0.4789577056707151
-3764 3764  6.886257504275831
-3765 3764  0.4058510497943707
-3929 3764 -0.6922065283691701
-1221 3765  7.570117512134129
-2970 3765  3.144054024261908
-3763 3765  0.0800375428343096
-3765 3765 -3.452105402557807
-3929 3765 -0.115673073026255
-1221 3766 -6.772078091746644
-3763 3766 -0.06052909694974323
-3765 3766  3.087553492426145
-3766 3766  8.680597597954014
-3929 3766  0.08747878062892703
-1197 3767 -1.07013962521279
-1240 3767  1.100245831300216
-2971 3767  2.58440440493602
-3761 3767 -0.5676654342824057
-3767 3767  0.3524182508441939
-1197 3768  1.056592885189063
-1240 3768 -1.083749439601529
-3761 3768  0.5657348666913733
-3767 3768 -0.3512197152059827
-3768 3768  2.707528675905532
-1197 3769 -6.741118804159942
-1240 3769  8.345685815764492
-3761 3769  0.07297260077582075
-3767 3769 -0.04530287520055806
-3769 3769  3.243567001900533
-1197 3770  6.152334929788743
-1240 3770 -7.619017266862353
-3761 3770 -0.08114410763163431
-3767 3770  0.05037591290722684
-3770 3770  3.127037822968334
-1198 3771 -0.439551175407246
-1272 3771  1.458551817589764
-3771 3771  6.545719728055657
-3782 3771 -0.365279863502515
-4105 3771  0.4751085479629472
-1198 3772  0.4774393407531063
-1272 3772 -1.584168614837459
-3772 3772  4.197681730399675
-3782 3772  0.3974234470529114
-4105 3772 -0.5169167417147753
-1198 3773 -2.839026331445033
-1272 3773  7.706555182452361
-3773 3773  10.48923930011014
-3782 3773  0.04813127346458962
-4105 3773 -0.0626029018629746
-1198 3774  3.287177902478644
-1272 3774 -8.926654332769894
-3774 3774  7.963542386205262
-3782 3774 -0.04568621642826008
-4105 3774  0.05942268960851749
-1198 3775 -0.5013982344400243
-1277 3775  1.914092476329704
-3775 3775  5.387449154306919
-3782 3775 -0.4082845072120672
-4085 3775  0.345354821535383
-1198 3776  0.5451054580886334
-1277 3776 -2.079388121953743
-3776 3776  3.559125252631731
-3782 3776  0.4422425294985197
-4085 3776 -0.3740788277596528
-1198 3777 -3.303170792875972
-1277 3777  10.8116764259082
-3777 3777  7.717735001723169
-3782 3777  0.0550827407381792
-4085 3777 -0.04659273070930623
-1198 3778  3.830799029309135
-1277 3778 -12.54156489623117
-3778 3778  5.42788620866588
-3782 3778 -0.05462691338373406
-4085 3778  0.04620716091210701
-1198 3779 -0.651822591782886
-3579 3779  0.2587210270240423
-3580 3779  0.903604725385467
-3779 3779  10.56116102730794
-3782 3779 -0.4687418217907903
-1198 3780  0.7429555445394144
-3579 3780 -0.2949996783558586
-3580 3780 -1.025075116496526
-3780 3780  5.370203291089556
-3782 3780  0.5317541664847034
-1198 3781 -4.18167192374513
-3579 3781  1.823381827579377
-3580 3781 -0.1192040010499577
-3781 3781  13.44241674595665
-3782 3781  0.06183666269902745
-1198 3782  4.666850163800948
-3142 3782  5.878917673856323
-3579 3782 -2.035095850329601
-3580 3782  0.1454178910812212
-3782 3782 -0.07543502736476798
-1199 3783  1.727739253821067
-2993 3783  2.192274887117568
-3783 3783 -0.5784495929786024
-3785 3783 -0.5174648007567855
-3789 3783  0.5183688055136251
-1199 3784 -1.501409605633508
-3783 3784  0.5029686408166937
-3784 3784  4.481236041630056
-3785 3784  0.4496774480466198
-3789 3784 -0.4507276981705915
-1199 3785  11.30404407094388
-2994 3785  2.085017424193
-3783 3785  0.09104873968379988
-3785 3785 -3.396560628333188
-3789 3785 -0.08159194337121471
-1199 3786 -9.700381322229893
-3783 3786 -0.07792564132243848
-3785 3786  2.914703332052592
-3786 3786  4.67546375941967
-3789 3786  0.06983187835467947
-1199 3787 -0.8399267021510434
-1215 3787  1.459995747599291
-3787 3787  3.725435059567603
-3789 3787 -0.2893683449568387
-3880 3787  0.6080023804991873
-1199 3788  0.8359873259695152
-1215 3788 -1.452748484428911
-3788 3788  3.503869273575924
-3789 3788  0.287922613699735
-3880 3788 -0.6049647018408247
-1199 3789 -4.371489003866787
-1215 3789  6.510195367269129
-2976 3789  11.29589550667638
-3789 3789  0.03207522870468611
-3880 3789 -0.06739443255416847
-1199 3790  5.178316230070377
-1215 3790 -7.713673913612801
-3789 3790 -0.03256016366442265
-3790 3790  7.522783604978186
-3880 3790  0.06841334707970535
-1199 3791 -1.468192548459289
-3789 3791 -0.4394391630156717
-3791 3791  5.699343322844305
-3984 3791  0.3506092217084191
-3988 3791  0.6086144765908342
-1199 3792  1.512757808386402
-3789 3792  0.4523778897162504
-3792 3792  4.932211784009985
-3984 3792 -0.3612542032138767
-3988 3792 -0.6265343550208405
-1199 3793 -9.106914833269292
-3789 3793  0.06534871961755113
-3793 3793  6.796790398187187
-3984 3793  2.196719064593049
-3988 3793 -0.09050667335377813
-1199 3794  9.45399632332005
-3789 3794 -0.06846325693408806
-3794 3794  5.430042622363441
-3984 3794 -2.280441915279873
-3988 3794  0.09482024542077011
-1200 3795  0.3629961991311105
-1201 3795 -0.407193771543868
-3521 3795  0.679218643726438
-3795 3795  9.853444154856955
-3812 3795 -0.8337078536373765
-1200 3796 -0.3459711048373486
-1201 3796  0.3887267162890052
-3521 3796 -0.646543619474914
-3796 3796  12.92509130443996
-3812 3796  0.793600850409622
-1200 3797  6.519516423137556
-1201 3797 -14.76253650232996
-3521 3797 -0.02138310187554149
-3797 3797  14.67920176970155
-3812 3797  0.02624671765627676
-1200 3798 -8.416866787483741
-1201 3798  19.05461479822566
-3521 3798  0.01366621132412578
-3798 3798  24.01494552657894
-3812 3798 -0.01677460978968741
-1200 3799 -0.2146298528456314
-2981 3799  0.6148168725043865
-3521 3799  0.6456696194137627
-3799 3799 -2.531048253677342
-3801 3799 -3.056071350822656
-1200 3800  0.1240942996274762
-3521 3800 -0.3733120922080947
-3799 3800  1.463396899358267
-3800 3800  6.963395067069431
-3801 3800  1.769816809290096
-1200 3801  4.262660366847292
-2982 3801  16.92007433822707
-3521 3801  0.008696076442513374
-3799 3801 -0.03408893408002122
-3801 3801 -21.38658464065627
-1200 3802 -3.267971087874407
-3521 3802  0.0188568404948961
-3799 3802 -0.07391949654966468
-3801 3802  16.31440493686975
-3802 3802  10.64775227529997
-1200 3803 -0.2466962960354827
-2983 3803  0.733300648789027
-3521 3803  0.6490872503986104
-3803 3803 -1.997280942097913
-3805 3803 -2.791333479427456
-1200 3804  0.142484329197361
-3521 3804 -0.3748931907170733
-3803 3804  1.153569146030313
-3804 3804  8.20748669336651
-3805 3804  1.61398408077391
-1200 3805  3.952836893778014
-2984 3805  20.4618479887596
-3521 3805  0.009220007761798222
-3803 3805 -0.02837052457481739
-3805 3805 -21.33286151008621
-1200 3806 -3.485047843454746
-3521 3806  0.01698943114939008
-3803 3806 -0.0522775128165329
-3805 3806  18.72857532796202
-3806 3806  13.36990297318379
-1200 3807 -1.021937167464553
-1226 3807  0.6755139749733998
-3521 3807 -0.4983302017793433
-3807 3807  5.623959759241418
-3944 3807  0.4549416885390786
-1200 3808  1.047546391471734
-1226 3808 -0.6914780544113759
-3521 3808  0.511463018605014
-3808 3808  5.140340349921296
-3944 3808 -0.4669310599249826
-1200 3809 -5.814375262302929
-1226 3809  5.152102189575969
-3521 3809  0.05434191170091966
-3809 3809  10.57287611312878
-3944 3809 -0.04961048112152108
-1200 3810  5.190494080029731
-1226 3810 -4.603043930537255
-3521 3810 -0.06524249632027486
-3810 3810  8.861752777781344
-3944 3810  0.05956197584346536
-1201 3811 -1.590343166199202
-3811 3811  5.156990654240387
-3812 3811 -0.4245578636109541
-3817 3811  0.5272035599677525
-3818 3811  0.3953856088196608
-1201 3812  1.799225606089988
-2985 3812  2.762528604699168
-3812 3812  0.4806889285303752
-3817 3812 -0.5969054776253586
-3818 3812 -0.4470126234812586
-1201 3813 -8.889282523176131
-3812 3813  0.04834484106772149
-3813 3813  9.881649534957022
-3817 3813 -0.06003321219915891
-3818 3813  2.728687156136997
-1201 3814  8.984707214224725
-3812 3814 -0.06044152600849758
-3814 3814  5.355974370748386
-3817 3814  0.07505452239311973
-3818 3814 -2.759212294612531
-3815 3815  2.02177708473591
-3817 3815  0.735678771801283
-3818 3815  0.6834769259705972
-3947 3815 -1.021406661640207
-3950 3815 -0.7053415505671318
-3816 3816  2.637769067800761
-3817 3816 -0.6821644402533772
-3818 3816 -0.6337624789359847
-3947 3816  0.9471080725937661
-3950 3816  0.6542644047347482
-3034 3817  9.357580306279953
-3817 3817 -0.07310899636166125
-3818 3817  2.682500907926349
-3947 3817  0.1015035621142007
-3950 3817 -3.296532554515937
-2986 3818  6.185139221633936
-3817 3818  0.07557928868904694
-3818 3818 -3.302192501193137
-3947 3818 -0.1049332832589505
-3950 3818  4.056786931974255
-1205 3819 -0.8432794677340965
-1211 3819  1.889672965421464
-3677 3819  0.3898068756189718
-3819 3819  4.338050735598012
-3826 3819 -0.2843296651791673
-1205 3820  0.8122467217164322
-1211 3820 -1.818638852608572
-3677 3820 -0.3729707479870724
-3820 3820  5.347644834779035
-3826 3820  0.2720491980250402
-1205 3821 -5.634150174253534
-1211 3821  11.31995792783744
-3677 3821 -0.05561566446213129
-3821 3821  5.268104839160443
-3826 3821  0.04056671199071347
-1205 3822  5.676480078927482
-1211 3822 -11.4061945499796
-3677 3822  0.04995516877647897
-3822 3822  6.809172415733807
-3826 3822 -0.03643788065469865
-1205 3823 -0.8258269842122644
-1227 3823  1.978322116312245
-3823 3823  3.336518577462315
-3824 3823  0.46381160028816
-3826 3823 -0.2824496877119697
-1205 3824  0.8899170771677279
-1227 3824 -2.12995668613235
-3031 3824  2.286717637069104
-3824 3824 -0.4971288084268892
-3826 3824  0.3027390358618913
-1205 3825 -5.4483738044597
-1227 3825  11.36265377461624
-3824 3825 -0.06522504456566502
-3825 3825  4.345881026636466
-3826 3825  0.03972042410566178
-1205 3826  6.238244344298185
-1227 3826 -13.01237515753422
-3032 3826  3.09008802429738
-3824 3826  0.06474071332200976
-3826 3826 -0.03942547846731512
-1205 3827  1.219896523532217
-1290 3827 -1.259969479211936
-3826 3827  0.361444564712929
-3827 3827  3.432096244817039
-3869 3827 -0.4083683463701124
-1205 3828 -1.223176188342998
-1290 3828  1.263388549847514
-3826 3828 -0.362096525989742
-3828 3828  3.430259707843811
-3869 3828  0.4091049471507078
-1205 3829  7.196077445191182
-1290 3829 -7.594160657672793
-3826 3829 -0.05221684139379965
-3829 3829  4.577246699593937
-3869 3829  0.05899578318349421
-1205 3830 -7.309708780356933
-1290 3830  7.714039345814099
-3826 3830  0.05186623003949892
-3830 3830  4.499074366596648
-3869 3830 -0.05859965444633063
-1296 3831  2.014211143978124
-3639 3831 -0.438438003559472
-3641 3831 -0.3186270474808822
-3831 3831  5.167687511911667
-3896 3831  0.3753050069630106
-1296 3832 -1.999469777299275
-3639 3832  0.4387320075683089
-3641 3832  0.3163594839300718
-3832 3832  5.22866490720696
-3896 3832 -0.375556675786625
-1296 3833  13.54117139429895
-3639 3833  0.06917527995921767
-3641 3833 -2.097538363083144
-3833 3833  4.778575385576687
-3896 3833 -0.05921436717618114
-1296 3834 -13.32061162330918
-3639 3834 -0.07005237522168833
-3641 3834  2.063427325485965
-3834 3834  4.804865036708031
-3896 3834  0.05996516487372638
-1284 3835 -0.9961364902256777
-3304 3835  0.685622022908018
-3306 3835  0.4854666781569684
-3835 3835  4.397548178584936
-3838 3835 -0.5533622237789459
-1284 3836  1.069122194004589
-3304 3836 -0.7361807383336668
-3306 3836 -0.5209336705035591
-3836 3836  3.015100507243954
-3838 3836  0.5941679188478999
-1284 3837 -5.607807767179665
-3304 3837 -0.08242829338295353
-3306 3837  3.061789935852789
-3837 3837  7.935790725976097
-3838 3837  0.06652747753817977
-1284 3838  5.611106742482268
-2992 3838  5.806460716784121
-3304 3838  0.09247066617765989
-3306 3838 -3.064005787467996
-3838 3838 -0.07463262812556315
-1291 3839  1.508116623610983
-3250 3839  0.5759940121176559
-3783 3839 -0.5364511862384768
-3785 3839 -0.4753802942885685
-3839 3839  2.807197757919511
-1291 3840 -1.284756180926196
-3250 3840 -0.4896888030185261
-3783 3840  0.4560709551496668
-3785 3840  0.404989697114798
-3840 3840  6.486770562819233
-1291 3841  9.461386332231092
-3250 3841 -0.08806669474583405
-3783 3841  0.08202078818634363
-3785 3841 -3.032021718341749
-3841 3841  2.977783914089383
-1291 3842 -8.06920223422221
-3250 3842  0.0737280609488582
-3783 3842 -0.06866652243426218
-3785 3842  2.585875819257028
-3842 3842  7.199818137765514
-1209 3843  0.5953885346820643
-1261 3843 -1.57915325880766
-3843 3843  6.821596506872711
-3844 3843  0.2358476052144947
-4065 3843 -0.504600061009487
-1209 3844 -0.7585928866866933
-1261 3844  2.012113389129591
-2995 3844  2.089007039389847
-3844 3844 -0.3015768589062842
-4065 3844  0.6452289446177003
-1209 3845  4.119179357221501
-1261 3845 -10.80951279223802
-3844 3845 -0.03869333526436093
-3845 3845  4.924367513602002
-4065 3845  0.08278506503087066
-1209 3846 -5.027617955580298
-1261 3846  13.19349528576872
-3844 3846  0.04790427095168613
-3846 3846  1.926406935099201
-4065 3846 -0.1024920224347914
-1209 3847 -1.089204680743506
-3844 3847 -0.4668956305239359
-3847 3847  4.082745801006982
-3848 3847  0.6058903557215245
-4110 3847  0.4330918580020012
-1209 3848  1.088994441422246
-3123 3848  4.015345384796815
-3844 3848  0.4686479373193943
-3848 3848 -0.608164323859633
-4110 3848 -0.4330444227258353
-1209 3849 -7.735889765633795
-3844 3849  0.06831378599514144
-3848 3849 -0.08865078486777231
-3849 3849  3.894053238117803
-4110 3849  3.000161281315632
-1209 3850  7.784946548619343
-3844 3850 -0.06731177696858127
-3848 3850  0.08735047797722066
-3850 3850  3.965156627189976
-4110 3850 -3.019182774708455
-1210 3851  0.8561130448515417
-1211 3851  0.1281477546658397
-3677 3851 -0.7247989127679281
-3851 3851  12.16370306936955
-3865 3851  0.8859027431941622
-1210 3852 -0.8532795399901716
-1211 3852 -0.1109241627550626
-3677 3852  0.716806091826627
-3852 3852  14.21588238657893
-3865 3852 -0.8761333273285709
-1210 3853  5.980557287714181
-1211 3853 -11.87347468739131
-3677 3853  0.02917701292546772
-3853 3853  10.93806979401289
-3865 3853 -0.0356622993406167
-1210 3854 -18.68721764116278
-1211 3854  36.98601183293343
-3677 3854  0.005800656373065214
-3854 3854  31.00680216081683
-3865 3854 -0.007089990482464425
-1210 3855  0.7431772970946299
-3001 3855  0.6922710528461018
-3855 3855 -2.292588351471605
-3857 3855 -1.366971763051765
-3865 3855  0.4334726720192261
-1210 3856 -0.4962120658716677
-3855 3856  1.530738344827112
-3856 3856  4.63521196383065
-3857 3856  0.9127356585543187
-3865 3856 -0.2894253737565123
-1210 3857  2.858778724621237
-3002 3857  7.512573460654014
-3855 3857  0.1599551507030707
-3857 3857 -4.993718753234045
-3865 3857 -0.030243626830779
-1210 3858 -2.748627634170752
-3855 3858 -0.1619410267687995
-3857 3858  4.801186593602593
-3858 3858  12.60241045455382
-3865 3858  0.03061910767275307
-1210 3859  0.1504825244382092
-3003 3859  0.9679618404674936
-3859 3859 -3.052106539486126
-3861 3859 -1.461494307484497
-3865 3859  1.10546489936218
-1210 3860 -0.09392255060612208
-3859 3860  1.904949633779228
-3860 3860  7.851107779123613
-3861 3860  0.9122971104867462
-3865 3860 -0.689967708515946
-1210 3861  7.743227386312433
-3004 3861  21.23566212034473
-3859 3861  0.01795727665852618
-3861 3861 -26.724645220298
-3865 3861 -0.006504078012125597
-1210 3862 -5.577951998041309
-3859 3862 -0.03639888061415681
-3861 3862  19.24788508110388
-3862 3862  15.41343460037283
-3865 3862  0.01318357808761143
-1210 3863  1.079868425719443
-1226 3863 -0.6825259342509997
-3863 3863  4.640123233526718
-3865 3863  0.6066026005909084
-3944 3863 -0.3918428911464509
-1210 3864 -1.078106438997537
-1226 3864  0.6814150005342681
-3864 3864  4.362866399653369
-3865 3864 -0.6067370542914696
-3944 3864  0.3919297432745195
-1210 3865  8.196309806968394
-1226 3865 -5.174118981866978
-3030 3865  3.891832175542285
-3865 3865 -0.09003421014280026
-3944 3865  0.05815877671819319
-1210 3866 -8.119865478221724
-1226 3866  5.125870371838962
-3865 3866  0.08946924016864499
-3866 3866  3.832594921604338
-3944 3866 -0.05779382696712341
-1211 3867 -1.855130366892074
-1290 3867  0.8608065436983287
-3677 3867 -0.3567726280354734
-3867 3867  4.083595669790726
-3869 3867  0.2940184468249131
-1211 3868  1.922720458247535
-1290 3868 -0.8912069480512377
-3677 3868  0.372176173311343
-3868 3868  3.314597254560581
-3869 3868 -0.3067126001924134
-1211 3869 -10.91898889290387
-1290 3869  5.7352105175062
-3000 3869  5.273747361955984
-3677 3869  0.0501196804098229
-3869 3869 -0.04130392701536499
-1211 3870  10.6937103252593
-1290 3870 -5.617762924949319
-3677 3870 -0.05599008922381862
-3869 3870  0.04614176586870518
-3870 3870  4.159018631162431
-1215 3871 -0.01820502103528791
-3871 3871  13.64297835723395
-3876 3871  2.989747253335061
-3878 3871  0.5034153766096773
-3880 3871 -2.129272677610363
-1215 3872  0.02666245738760768
-3872 3872  7.094819650515715
-3876 3872 -3.114543931331563
-3878 3872 -0.5285023005439647
-3880 3872  2.218151814941477
-1215 3873 -13.39915434300085
-3873 3873  8.193646066739516
-3876 3873 -0.03612826240957417
-3878 3873  7.330338437253632
-3880 3873  0.02573024256563223
-1215 3874  70.14746870615768
-3874 3874  14.40382534397125
-3876 3874  0.005103966248689504
-3878 3874 -38.38649753335458
-3880 3874 -0.003635001543577661
-1231 3875 -0.4587975488680787
-3875 3875  7.432083284538689
-3876 3875  0.7143220365505623
-3878 3875  0.5409851231816658
-3961 3875 -0.7100791253590051
-1231 3876  0.5502970449270767
-3005 3876  2.758844665527414
-3876 3876 -0.8667371882258156
-3878 3876 -0.6474678388648655
-3961 3876  0.8615889655364799
-1231 3877 -3.038828905876916
-3876 3877 -0.09209370496921034
-3877 3877  8.909361307278845
-3878 3877  4.154393438575824
-3961 3877  0.091546689209522
-1231 3878  3.431330450031093
-3006 3878  3.015252189670739
-3876 3878  0.1222265439314361
-3878 3878 -4.691610853794245
-3961 3878 -0.1215005459296716
-1215 3879 -0.6929215427424715
-1230 3879  1.396683757074456
-3443 3879  0.2815088383826861
-3879 3879  9.243250804285458
-3880 3879 -0.3137213903015109
-1215 3880  0.7627672411821809
-1230 3880 -1.537590286100393
-3037 3880  5.36424014425243
-3443 3880 -0.3151578642820492
-3880 3880  0.3512208139362644
-1215 3881 -5.674417187443032
-1230 3881  11.28827516445929
-3443 3881 -0.04750496067965254
-3880 3881  0.05294086820243799
-3881 3881  5.498628886812048
-1215 3882  6.342712909556337
-1230 3882 -12.61596887929604
-3443 3882  0.05328665197422941
-3880 3882 -0.05938414807119664
-3882 3882  3.00679695511697
-1215 3883 -0.7557720101028491
-1260 3883  1.465217279636702
-3880 3883 -0.3561529571137585
-3883 3883  6.987236455151689
-3993 3883  0.3144339221536311
-1215 3884  0.8446553956316701
-1260 3884 -1.639787184528095
-3880 3884  0.4019295688857895
-3884 3884  3.832755747061612
-3993 3884 -0.3548483544779605
-1215 3885 -4.902109438887917
-1260 3885  7.639002702936234
-3880 3885  0.05477280767251791
-3885 3885  8.557143439995434
-3993 3885 -0.04835683208530907
-1215 3886  5.982335823908858
-1260 3886 -9.327145572218971
-3880 3886 -0.05419810230840651
-3886 3886  5.149209964509931
-3993 3886  0.0478494465417923
-1216 3887  1.27864903424727
-1229 3887 -1.428392337327512
-3241 3887 -0.3331233226375472
-3887 3887  2.39813417884677
-3898 3887  0.4440953835549432
-1216 3888 -1.203527161495361
-1229 3888  1.345690298242458
-3241 3888  0.3118250064706282
-3888 3888  3.344564282593129
-3898 3888 -0.4157020431777712
-1216 3889  7.097758064026325
-1229 3889 -8.947761787238363
-3241 3889  0.04981683537143853
-3889 3889  3.052321424376364
-3898 3889 -0.06641212160291675
-1216 3890 -7.071254972430305
-1229 3890  8.913001519049024
-3241 3890 -0.04374019547106585
-3890 3890  4.306287197091149
-3898 3890  0.0583111945771093
-1216 3891 -0.8926090143541199
-1232 3891  0.8109099469826276
-3562 3891  0.5426439670912149
-3891 3891  4.872894046756631
-3898 3891 -0.4182071807007093
-1216 3892  0.8343920480599086
-1232 3892 -0.7551476418861992
-3562 3892 -0.5011094365728938
-3892 3892  7.209963343664713
-3898 3892  0.3861971705223876
-1216 3893 -6.326257993395976
-1232 3893  4.304302054670996
-3562 3893 -0.0774999583800744
-3893 3893  6.165958730224928
-3898 3893  0.05972800042777431
-1216 3894  6.701573121465414
-1232 3894 -4.563164317669306
-3562 3894  0.06105375638109871
-3894 3894  9.801414691082627
-3898 3894 -0.04705317091092858
-1216 3895 -0.7140138395673383
-1296 3895  2.026152750963511
-3895 3895  3.666098639291213
-3896 3895  0.3999378180984413
-3898 3895 -0.2752074710610874
-1216 3896  0.7578565321914594
-1296 3896 -2.150639159344168
-3169 3896  2.650447108276864
-3896 3896 -0.4237633937620111
-3898 3896  0.2916024608025469
-1216 3897 -4.667666885335501
-1296 3897  13.57026540414182
-3896 3897 -0.05601679710717079
-3897 3897  4.264776865800714
-3898 3897  0.03854659492344367
-1216 3898  4.931373980172026
-1296 3898 -14.33700504327225
-3170 3898  2.948617038264048
-3896 3898  0.06052671066335877
-3898 3898 -0.04164998212099323
-1219 3899  0.3970145449540784
-1220 3899  0.2510335818284439
-3899 3899  6.936513618148165
-3902 3899 -1.237336533231109
-3909 3899  0.8564968076053606
-1219 3900 -0.3832969439187849
-1220 3900 -0.2383819755548176
-3900 3900  8.754460075831085
-3902 3900  1.190146632344225
-3909 3900 -0.8238314830349427
-1219 3901  7.895064096101138
-1220 3901 -15.34203421950439
-3901 3901  7.7760742573487
-3902 3901  0.03292730456029068
-3909 3901 -0.02279261177659806
-1219 3902 -12.39953745174649
-1220 3902  24.06894650972545
-3016 3902  14.64089060417686
-3902 3902  0.0162354147387335
-3909 3902 -0.01123831756390642
-1219 3903 -0.9053256046972465
-1276 3903  0.7622334325753616
-3903 3903  6.717285078805506
-3906 3903  0.4347352342689453
-3909 3903 -0.3737564443230481
-1219 3904  0.9552717849491632
-1276 3904 -0.7961901433494216
-3904 3904  5.21837181322108
-3906 3904 -0.4596523479291749
-3909 3904  0.3951785216481155
-1219 3905 -4.875642107836658
-1276 3905  8.663532512230066
-3905 3905  12.723623314396
-3906 3905 -0.03991575131466345
-3909 3905  0.03431690856375876
-1219 3906  3.543387736643543
-1276 3906 -6.327609477811816
-3130 3906  8.974758681364245
-3906 3906  0.06205063220109317
-3909 3906 -0.0533470071697257
-1219 3907  0.9177680669862572
-1285 3907 -1.404708895439669
-3907 3907  1.562111955011237
-3909 3907  0.5871755452865656
-4145 3907 -0.6299407955834422
-1219 3908 -0.9166426538946926
-1285 3908  1.404553468671611
-3908 3908  1.521343477166647
-3909 3908 -0.5881819817933323
-4145 3908  0.6310205330126762
-1219 3909  6.850788060807428
-1285 3909 -8.259331367595493
-3148 3909  2.666152965933335
-3909 3909 -0.06669039335604635
-4145 3909  0.07154759728281507
-1219 3910 -6.091736844072287
-1285 3910  7.340228308969663
-3909 3910  0.0754972172149095
-3910 3910  2.541648003046831
-4145 3910 -0.0809958409515938
-1221 3911  1.3927177800326
-1222 3911 -0.007859345312601223
-3911 3911  10.83128966436301
-3912 3911 -2.073696496349991
-3929 3911  3.79240453316803
-1221 3912 -1.125363487049104
-1222 3912  0.01423058830428089
-3019 3912  11.66633099712536
-3912 3912  1.663057831262543
-3929 3912 -3.041422922448816
-1221 3913  12.16385913519791
-1222 3913 -10.85847562888647
-3912 3913  0.02497094611631533
-3913 3913  5.690920734130215
-3929 3913 -0.04566720801028241
-1221 3914 -71.95507332617049
-1222 3914  64.19748679899583
-3912 3914 -0.002045250376681948
-3914 3914  28.46767710922084
-3929 3914  0.003740381880205069
-1221 3915  0.06266636804055448
-1223 3915 -1.67562254077075
-3023 3915  2.298164777974027
-3915 3915 -4.70085606120625
-3929 3915  0.542397859204706
-1221 3916 -0.04937898970914998
-1223 3916  1.400506034542053
-3915 3916  3.965827407882584
-3916 3916  6.913438662925516
-3929 3916 -0.4575882069145712
-1221 3917  7.419701498530326
-1223 3917 -31.5516818208411
-3915 3917  0.07595329060940451
-3917 3917  4.213041098699232
-3929 3917 -0.008763702119294985
-1221 3918 -2.767625773617935
-1223 3918  11.74797745436025
-3915 3918 -0.1826240999274897
-3918 3918  6.6028144804622
-3929 3918  0.02107167706267504
-1221 3919 -0.01329158934947286
-1224 3919 -1.619002941030767
-3025 3919  2.489086508511099
-3919 3919 -12.32981280576957
-3929 3919  0.9285201706573676
-1221 3920  0.01432224943308115
-1224 3920  1.240374621996592
-3919 3920  9.557700354905545
-3920 3920  7.092319999538366
-3929 3920 -0.7197609326620245
-1221 3921  18.43213599777828
-1224 3921 -71.14768126953655
-3919 3921 -0.03581879691919143
-3921 3921  5.357329400717711
-3929 3921  0.002697403111634132
-1221 3922 -3.444370960504515
-1224 3922  13.28414805225883
-3919 3922 -0.2193559255602421
-3922 3922  4.975973020678023
-3929 3922  0.01651901814280531
-1221 3923  0.1952707801632194
-3027 3923  1.530982244361096
-3923 3923 -4.01982058245716
-3925 3923 -0.4989904160230794
-3929 3923  0.9936427272244395
-1221 3924 -0.1578981940773998
-3923 3924  3.367916184667294
-3924 3924  4.504333486281526
-3925 3924  0.4114850212014874
-3929 3924 -0.8325011910731969
-1221 3925  11.51290363653453
-3028 3925  1.891529659129155
-3923 3925  0.09357350555324298
-3925 3925 -13.59929356823245
-3929 3925 -0.02313004556960633
-1221 3926 -6.724196771960667
-3923 3926 -0.1215895162194106
-3925 3926  7.942511812214237
-3926 3926  4.220607505708247
-3929 3926  0.03005520669887832
-1221 3927 -1.212084092212188
-1231 3927  0.4518610492803866
-3927 3927  2.965391170512979
-3929 3927 -0.8967784154793547
-3961 3927  0.7422711571120613
-1221 3928  1.209186270330734
-1231 3928 -0.4507761853932629
-3928 3928  3.064329241855908
-3929 3928  0.8948534463356967
-3961 3928 -0.7406778437037493
-1221 3929 -7.485018312352968
-1231 3929  2.659744674264035
-3020 3929  5.191284902739278
-3929 3929  0.1084484784869682
-3961 3929 -0.08976373229337341
-1221 3930  7.574571864439869
-1231 3930 -2.691626173647931
-3929 3930 -0.1046366912454179
-3930 3930  5.614276394514303
-3961 3930  0.08660868342331542
-1221 3931  1.095375364582689
-1254 3931 -1.129575111871219
-3929 3931  0.7922255118762921
-3931 3931  1.739287210125133
-3966 3931 -0.360035618481635
-1221 3932 -0.9823491292559811
-1254 3932  1.013019198664114
-3929 3932 -0.7101965596901837
-3932 3932  3.065752767611288
-3966 3932  0.3227566567579966
-1221 3933  7.911773723924099
-1254 3933 -8.140433666520655
-3929 3933 -0.1118336807589077
-3933 3933  1.735401045504999
-3966 3933  0.0508240492328381
-1221 3934 -7.11281355093847
-1254 3934  7.318382790662603
-3929 3934  0.1007269057461446
-3934 3934  3.006008494629031
-3966 3934 -0.04577645287156246
-1222 3935 -1.271418220001935
-1291 3935  0.6649208109974206
-3250 3935  0.3034672176079867
-3912 3935 -0.4907051457898425
-3935 3935  5.818503346690632
-1222 3936  1.227066802011116
-1291 3936 -0.6401504275752985
-3250 3936 -0.2958963642102718
-3912 3936  0.4784631094026429
-3936 3936  6.821785171341784
-1222 3937 -8.059296167830137
-1291 3937  5.042399262607923
-3250 3937 -0.03927673116269679
-3912 3937  0.06351030020065121
-3937 3937  7.227063466987659
-1222 3938  7.176638732185156
-1291 3938 -4.49133368198552
-3250 3938  0.04237843896777917
-3912 3938 -0.06852574797351926
-3938 3938  8.023915727134943
-1226 3939  0.4270339140806115
-1227 3939  0.5603572465690376
-3824 3939 -0.9429319477336707
-3939 3939  4.145530174201174
-3944 3939  0.6215604636346272
-1226 3940 -0.3710313110194243
-1227 3940 -0.4748156898844662
-3824 3940  0.8140205429349932
-3940 3940  9.188046774378051
-3944 3940 -0.5365848376342145
-1226 3941  5.935483300800602
-1227 3941 -19.35438219191495
-3824 3941  0.04011828954905043
-3941 3941  4.620373825612975
-3944 3941 -0.02644511378818943
-1226 3942 -7.803072635065493
-1227 3942  25.37588114465589
-3824 3942  0.01992003233190689
-3942 3942  13.68747505294833
-3944 3942 -0.01313085696332133
-1226 3943 -0.7846384665529746
-1231 3943  0.3255367652745427
-3943 3943  12.25543807287879
-3944 3943 -0.3325996421497863
-3961 3943  0.9054351523586399
-1226 3944  0.755030805637508
-1231 3944 -0.3228669408970674
-3039 3944  15.60000600766554
-3944 3944  0.3099939897897582
-3961 3944 -0.8438958429460618
-1226 3945 -4.492594197832112
-1231 3945  4.347828500049935
-3944 3945  0.03183450902391857
-3945 3945  17.58751732208018
-3961 3945 -0.08666300222702368
-1226 3946  3.075207046085619
-1231 3946 -2.991719644070264
-3944 3946 -0.04857313406685036
-3946 3946  16.66976710007537
-3961 3946  0.132230518229328
-1242 3947  1.253920472558426
-3033 3947  4.577395664557206
-3947 3947 -0.5322000356380353
-3950 3947 -0.4953844799756825
-3992 3947  0.4525049260712891
-1242 3948 -1.238917691943269
-3947 3948  0.5237179831441521
-3948 3948  4.949586119190606
-3950 3948  0.4897034551735098
-3992 3948 -0.4452930315210079
-1242 3949  11.01433164719148
-3947 3949  0.05306117946887205
-3949 3949  9.983701017863959
-3950 3949 -2.566147949050465
-3992 3949 -0.04511545186958153
-1242 3950 -9.126926589105361
-3062 3950  7.311204366461816
-3947 3950 -0.0742486869524237
-3950 3950  2.120683161051391
-3992 3950  0.06313020358974136
-1231 3951  0.2106155001707166
-1232 3951 -0.1400665666639198
-3562 3951 -1.946310278292044
-3951 3951  7.350593198915067
-3961 3951  2.25449865380728
-1231 3952 -0.2015349441592879
-1232 3952  0.1353078380308161
-3562 3952  1.852560722557531
-3952 3952  9.450557390041146
-3961 3952 -2.145904330715092
-1231 3953  7.458763893376871
-1232 3953 -13.70528369826246
-3562 3953  0.03334939331384015
-3953 3953  7.088529923190809
-3961 3953 -0.03863010084770278
-1231 3954 -10.96688731001504
-1232 3954  20.15001583937854
-3562 3954 -0.01773237777185056
-3954 3954  13.07420285267797
-3961 3954  0.02054020998672473
-1231 3955  0.004909842124062877
-1233 3955 -2.020187774905765
-3043 3955  4.4348660978868
-3955 3955 -12.4148271960868
-3961 3955  1.009041934732004
-1231 3956 -0.003161834423974073
-1233 3956  1.781009244152953
-3955 3956  11.0878526649361
-3956 3956  13.94274720304045
-3961 3956 -0.9011892093493707
-1231 3957  8.097430154499932
-1233 3957 -157.5425247202729
-3955 3957  0.02687544221336557
-3957 3957  8.333234328742472
-3961 3957 -0.002184359699851513
-1231 3958 -1.811688403660364
-1233 3958  35.24928960928063
-3955 3958 -0.1319416399569855
-3958 3958  8.467858774888629
-3961 3958  0.01072384218894925
-1231 3959  0.02074944381764173
-1234 3959 -1.371053578210635
-3045 3959  3.248940655118019
-3959 3959 -7.17362028563042
-3961 3959  0.7970691740614322
-1231 3960 -0.0164243186421904
-1234 3960  1.141090378496645
-3959 3960  6.063171165496134
-3960 3960  9.638290245359952
-3961 3960 -0.6736858992600538
-1231 3961  4.547623166879417
-1234 3961 -73.86690653394791
-3046 3961  4.516437728111675
-3959 3961  0.05906849706200258
-3961 3961 -0.006563168427045199
-1231 3962 -2.052721196214168
-1234 3962  33.34660355498111
-3959 3962 -0.1122634591316911
-3961 3962  0.01247372164743909
-3962 3962  7.780959123178358
-1231 3963  0.532889016796235
-1254 3963 -1.47406685145989
-3961 3963  0.8761671906630912
-3963 3963  2.093336656375155
-3966 3963 -0.4810676928389651
-1231 3964 -0.4748422610025065
-1254 3964  1.313534336735618
-3961 3964 -0.7806222496203883
-3964 3964  3.86258710892769
-3966 3964  0.4286078600129237
-1231 3965  3.043863568647225
-1254 3965 -8.8135613453359
-3961 3965 -0.102634553712519
-3965 3965  4.100933333302652
-3966 3965  0.05635245017868282
-1231 3966 -2.787316614264462
-1254 3966  8.070563365347549
-3040 3966  7.508760202322218
-3961 3966  0.08977398271040733
-3966 3966 -0.04929123482331746
-1232 3967 -0.7796711087896829
-3360 3967  0.6104971732174486
-3362 3967  0.4765768244277365
-3562 3967 -0.5245384263354095
-3967 3967  6.867530241475136
-1232 3968  0.8977654818327304
-3360 3968 -0.7076702624193901
-3362 3968 -0.5475735569802812
-3562 3968  0.6080294260127852
-3968 3968  3.232293749053333
-1232 3969 -4.689757573113847
-3360 3969 -0.07772507850532541
-3362 3969  3.549100696841748
-3562 3969  0.0667812926161708
-3969 3969  9.249242572554918
-1232 3970  4.893403592122139
-3360 3970  0.1013203708049441
-3362 3970 -3.704885884667695
-3562 3970 -0.08705433897040452
-3970 3970  4.168085658793879
-1232 3971 -0.6622307311134192
-1284 3971  0.7155565720950264
-3562 3971 -0.4795952455071584
-3838 3971  0.3788236665372302
-3971 3971  10.14178945374154
-1232 3972  0.7022784082231178
-1284 3972 -0.7605034956051496
-3562 3972  0.5234134383708403
-3838 3972 -0.4134348697073162
-3972 3972  6.854993073279579
-1232 3973 -4.573004681716164
-1284 3973  4.486795549329334
-3562 3973  0.07931274417722842
-3838 3973 -0.06264771144796216
-3973 3973  8.900323357107847
-1232 3974  5.053130885639514
-1284 3974 -4.956539581732367
-3562 3974 -0.08168946937534756
-3838 3974  0.0645250439743737
-3974 3974  6.255402866825885
-1236 3975 -1.707969879333584
-1237 3975  0.4947465498372808
-3552 3975  0.4166092968480921
-3975 3975  4.591717254542565
-3981 3975 -0.4189118125301785
-1236 3976  1.704175130397323
-1237 3976 -0.4892572997374934
-3552 3976 -0.4195050597952255
-3976 3976  4.596890674335506
-3981 3976  0.4218235797759384
-1236 3977 -9.799664782253885
-1237 3977  4.604525758409499
-3552 3977 -0.04632910565148451
-3977 3977  6.82470037511901
-3981 3977  0.04658515728812977
-1236 3978  7.820271137733112
-1237 3978 -3.682710629785925
-3552 3978  0.06106634188723461
-3978 3978  6.071785803494002
-3981 3978 -0.06140384326059996
-1236 3979 -1.475543576621253
-1293 3979  0.7694145957527926
-3049 3979  1.989738181911157
-3979 3979  0.6697749461861511
-3981 3979 -0.4549872682817689
-1236 3980  1.26182715384045
-1293 3980 -0.6573535907506843
-3979 3980 -0.5697578684182797
-3980 3980  4.514311543471527
-3981 3980  0.3870443013878095
-1236 3981 -9.15833764800569
-1293 3981  4.132697532463695
-3050 3981  2.651304124674491
-3979 3981 -0.09910914931789394
-3981 3981  0.0673261986980111
-1236 3982  8.549665708518704
-1293 3982 -3.858923807592198
-3979 3982  0.07974589801570604
-3981 3982 -0.05417247763812049
-3982 3982  5.339475922255236
-1246 3983  1.438432936083485
-3230 3983  0.6723063436140674
-3983 3983  7.935781265781992
-3984 3983 -0.2561778390712127
-3988 3983 -0.4164294722809435
-1246 3984 -1.610282307909457
-3069 3984  4.273476333560106
-3230 3984 -0.7572610645998691
-3984 3984  0.2870186143926446
-3988 3984  0.4690507958247839
-1246 3985  9.519388148008218
-3230 3985 -0.09487256468584683
-3984 3985 -1.513498503028144
-3985 3985  9.964308531455309
-3988 3985  0.05876447905234403
-1246 3986 -10.10772533983988
-3230 3986  0.1135294637182152
-3984 3986  1.606821457458256
-3986 3986  5.311692012444222
-3988 3986 -0.07032064342926077
-1291 3987  1.440632854355911
-3250 3987  0.5483159810343683
-3984 3987 -0.3620744060739602
-3987 3987  4.32511889642304
-3988 3987 -0.6338113374831589
-1291 3988 -1.37057590441584
-3159 3988  5.650865178732744
-3250 3988 -0.5207505831071904
-3984 3988  0.3444837790072527
-3988 3988  0.6019478457506698
-1291 3989  8.78157444783267
-3250 3989 -0.08202283215104343
-3984 3989 -2.259168544657926
-3988 3989  0.09481212065301986
-3989 3989  5.008038927801397
-1291 3990 -8.407949717547142
-3250 3990  0.07658965210480174
-3984 3990  2.163038539238373
-3988 3990 -0.08853178006291147
-3990 3990  6.756753262704435
-1242 3991  1.629228927232157
-1260 3991 -1.327284851091953
-3991 3991  2.16132391751468
-3992 3991  0.4124828656886404
-3993 3991 -0.3603730844290743
-1242 3992 -1.629221333651718
-1260 3992  1.326901691271821
-3061 3992  2.128671701455008
-3992 3992 -0.4139428557132467
-3993 3992  0.3616486310085054
-1242 3993  9.698372940762637
-1260 3993 -8.871639857149663
-3098 3993  2.810591760096432
-3992 3993 -0.05662335010049897
-3993 3993  0.0494700096023555
-1242 3994 -10.14050217838622
-1260 3994  9.274886402498227
-3992 3994  0.05291302942259243
-3993 3994 -0.04622842111919301
-3994 3994  2.951842078980999
-1243 3995 -1.085914715783544
-3063 3995  3.374113559666903
-3709 3995  0.5173331784807369
-3995 3995  0.5285974251080995
-4001 3995 -0.6664131668293063
-1243 3996  1.090787344712805
-3709 3996 -0.5195702043955498
-3995 3996 -0.5296400026704317
-3996 3996  3.373236017368225
-4001 3996  0.6677275648607324
-1243 3997 -7.064788357696017
-3709 3997  3.190434157991982
-3995 3997 -0.07362898186763496
-3997 3997  4.254422143024387
-4001 3997  0.09282550509358571
-1243 3998  7.264719577018943
-3709 3998 -3.280810777838017
-3995 3998  0.07160445085414079
-3998 3998  4.313636411491303
-4001 3998 -0.09027313903964421
-1243 3999  0.6281268734336682
-1283 3999 -2.060237066109748
-3999 3999  2.744133762682808
-4001 3999  0.3915613265931385
-4002 3999 -0.4363940642551595
-1243 4000 -0.6265552018341662
-1283 4000  2.055356159329762
-4000 4000  2.768916866004834
-4001 4000 -0.3914197776820793
-4002 4000  0.436236308367658
-1243 4001  4.240704676139979
-1283 4001 -13.22275915134779
-3144 4001  3.559130344004886
-4001 4001 -0.05174692126183886
-4002 4001  0.05767180706691709
-1243 4002 -4.137810875831871
-1283 4002  12.90162353980625
-3064 4002  3.53773728948649
-4001 4002  0.05322704516850629
-4002 4002 -0.05932140125143925
-1244 4003 -0.2687441652242472
-1245 4003 -2.64805073698945
-3067 4003  0.6620369411300263
-4003 4003 -2.048612698416842
-4005 4003  0.820611639782226
-1244 4004  0.1719397258262731
-1245 4004  1.697227123155527
-4003 4004  1.310681131981685
-4004 4004  4.350082008188727
-4005 4004 -0.5250188060331279
-1244 4005  4.840410468840943
-1245 4005 -17.55536468313575
-3068 4005  17.73869895099792
-4003 4005 -0.03019372128547943
-4005 4005  0.0120946820032662
-1244 4006 -4.198187441675281
-1245 4006  15.14037838846916
-4003 4006 -0.0708727983567128
-4005 4006  0.02838947709364594
-4006 4006  6.391215314600386
-1244 4007 -0.8397429008188915
-1275 4007  0.8498311284186953
-3374 4007  0.4930004778176136
-4005 4007 -0.4669117262552038
-4007 4007  5.561366858668526
-1244 4008  0.8301209943719979
-1275 4008 -0.8390792003026281
-3374 4008 -0.4894324078324542
-4005 4008  0.4635324724996192
-4008 4008  5.835339611366566
-1244 4009 -5.094647168144733
-1275 4009  6.16663657582833
-3374 4009 -0.06110666180098917
-4005 4009  0.05787300059726924
-4009 4009  8.121377855621056
-1244 4010  4.665509938312524
-1275 4010 -5.648969220034825
-3374 4010  0.06731065292933117
-4005 4010 -0.06374868700680204
-4010 4010  7.904228210941027
-1247 4011  1.530441977284314
-1248 4011 -0.06447409590659667
-3700 4011  2.877901742353491
-4011 4011  4.367121803999058
-4035 4011 -0.6882177177695019
-1247 4012 -1.407573526367871
-1248 4012  0.06007445012883673
-3700 4012 -2.638744501957641
-4012 4012  6.273071084077785
-4035 4012  0.6310259631828135
-1247 4013  29.57976624661321
-1248 4013 -6.123461182640077
-3700 4013 -0.06336630111296625
-4013 4013  4.6918587188925
-4035 4013  0.01515333567288418
-1247 4014 -53.85667321915488
-1248 4014  11.14530142529302
-3700 4014  0.02360881028802457
-4014 4014  11.09669376957527
-4035 4014 -0.005645780499229315
-1247 4015  0.1166433208217011
-3075 4015  1.501443481224012
-3700 4015  0.8927534376223362
-4015 4015 -5.349524285735199
-4017 4015 -0.873821607012483
-1247 4016 -0.08565703803104169
-3700 4016 -0.7104217187916017
-4015 4016  4.256962871978451
-4016 4016  5.891708948648517
-4017 4016  0.6890445982902249
-1247 4017  20.03181033622185
-3076 4017  2.952431724529157
-3700 4017 -0.01054317113570646
-4015 4017  0.06317640197425206
-4017 4017 -19.53524156207414
-1247 4018 -6.579453823854884
-3700 4018  0.02726814578945946
-4015 4018 -0.1633951794307062
-4017 4018  6.407256334560128
-4018 4018  5.359216924491829
-1247 4019 -0.02717925250316533
-1250 4019 -1.817279018958227
-3077 4019  2.046183757751627
-3700 4019  1.086728699880724
-4019 4019 -6.261171463865746
-1247 4020  0.02664962184511493
-1250 4020  1.406918477809915
-3700 4020 -0.8503392225516677
-4019 4020  4.899216957673652
-4020 4020  6.039983956553282
-1247 4021  32.27437037187642
-1250 4021 -94.3593702933843
-3700 4021  0.003251598109236279
-4019 4021 -0.01873403481084517
-4021 4021  4.080245895776766
-1247 4022 -6.848306051500932
-1250 4022  20.01255346091463
-3700 4022  0.0163341792817986
-4019 4022 -0.09410913433692274
-4022 4022  4.316855635510668
-1247 4023  0.09444032567228004
-1251 4023 -2.226655844234514
-3079 4023  1.734383100866361
-3700 4023  0.6671056579552171
-4023 4023 -2.766844775492583
-1247 4024 -0.07327003201142045
-1251 4024  1.833646104710556
-3700 4024 -0.5558849024538212
-4023 4024  2.305552680881037
-4024 4024  5.335102309454471
-1247 4025  14.54355524326141
-1251 4025 -61.53830667090656
-3700 4025 -0.008293849473066403
-4023 4025  0.03439903980669907
-4025 4025  2.833991262156498
-1247 4026 -5.638102373386884
-1251 4026  23.8410264235214
-3700 4026  0.01844642734702238
-4023 4026 -0.07650722269100975
-4026 4026  4.726445981799073
-1247 4027  0.172119184851368
-1252 4027 -0.4312061772864732
-3081 4027  1.590373655740153
-3700 4027  1.05305624994934
-4027 4027 -5.381641954276575
-1247 4028 -0.1354663985444235
-1252 4028  0.3531894656037325
-3700 4028 -0.8748469326863306
-4027 4028  4.470903578741806
-4028 4028  4.826786095112703
-1247 4029  21.46631831646637
-1252 4029 -14.79999463216308
-3700 4029 -0.01413376895939106
-4027 4029  0.07223059927479664
-4029 4029  2.230216158081471
-1247 4030 -9.972291551898609
-1252 4030  6.874531929256725
-3700 4030  0.02460560601569959
-4027 4030 -0.1257469025523179
-4030 4030  4.245709838291275
-1247 4031  0.1888753139595323
-1253 4031 -0.4702289341982735
-3083 4031  1.539399172915233
-3700 4031  1.037339225236382
-4031 4031 -4.687819398588907
-1247 4032 -0.1491602373155923
-1253 4032  0.3847064364584903
-3700 4032 -0.8610429361225128
-4031 4032  3.891122287459317
-4032 4032  4.663673792105634
-1247 4033  20.73321827180935
-1253 4033 -15.68593782996181
-3700 4033 -0.01520700997590703
-4031 4033  0.0687217012769834
-4033 4033  2.11378126554774
-1247 4034 -10.04768624738478
-1253 4034  7.60087658660375
-3700 4034  0.0250006050735233
-4031 4034 -0.1129797452838212
-4034 4034  4.154176556399479
-1248 4035  0.6694889657961215
-1283 4035 -1.982804233034109
-3143 4035  3.113918499513999
-4002 4035 -0.459904502442264
-4035 4035  0.3107249264599946
-1248 4036 -0.6039061080741983
-1283 4036  1.790720369267351
-4002 4036  0.412417507493414
-4035 4036 -0.2786413244623387
-4036 4036  5.538149308614118
-1248 4037  3.860578289092625
-1283 4037 -13.25822033623029
-4002 4037  0.06550898280666229
-4035 4037 -0.04425978383985179
-4037 4037  3.896615576766267
-1248 4038 -3.705027669694788
-1283 4038  12.72139776360994
-4002 4038 -0.0539047788936704
-4035 4038  0.03641964444494807
-4038 4038  7.399370929771859
-1248 4039  1.3251531551657
-1292 4039 -0.6969060374661296
-4035 4039  0.6251677208402014
-4039 4039  2.227051982652208
-4137 4039 -0.4958500939596942
-1248 4040 -1.268742456177846
-1292 4040  0.6674861693273666
-4035 4040 -0.5974098867295464
-4040 4040  2.787430652169496
-4137 4040  0.4738340425976885
-1248 4041  7.496982675083278
-1292 4041 -4.412546562177374
-4035 4041 -0.08455623381433652
-4041 4041  3.27917553517219
-4137 4041  0.06706554910635512
-1248 4042 -7.554675688566832
-1292 4042  4.445925686481872
-4035 4042  0.07599027364771335
-4042 4042  4.221315980120219
-4137 4042 -0.06027148087172729
-1254 4043  0.5506640854169045
-3644 4043 -3.184217886071492
-3646 4043 -0.04737658212996103
-3966 4043  1.092095516810633
-4043 4043  5.497686376367542
-1254 4044 -0.5106474974579881
-3644 4044  2.948576740677008
-3646 4044  0.04431965773281515
-3966 4044 -1.011277354339053
-4044 4044  8.295885019917028
-1254 4045  14.65998231301212
-3644 4045  0.04295228525329622
-3646 4045 -8.348557923958264
-3966 4045 -0.01473140339016458
-4045 4045  6.138820693139372
-1254 4046 -30.55860129286959
-3644 4046 -0.01359440263092582
-3646 4046  17.39938467274956
-3966 4046  0.004662490664314871
-4046 4046  16.56692854796862
-1258 4047  0.1232979933573407
-1259 4047 -0.6452838448164313
-3095 4047  5.920323678467011
-4047 4047  0.362575344758596
-4048 4047 -2.564559113532273
-1258 4048 -0.1209319110556626
-1259 4048  0.632740697185234
-3093 4048  7.928418980272388
-4047 4048 -0.3636201727245672
-4048 4048  2.571949365299086
-1258 4049  12.18565261931853
-1259 4049 -65.20912110432079
-4047 4049 -0.01098342848397128
-4048 4049  0.07768771932121485
-4049 4049  0.9579993383387213
-1258 4050 -10.49071174200598
-1259 4050  56.15303779109885
-4047 4050  0.00945573560770001
-4048 4050 -0.06688207921038947
-4050 4050  1.876776993784172
-1258 4051 -1.563633429097309
-1279 4051  0.9598433696475256
-3436 4051  0.5099376587097865
-4047 4051 -0.5655683563282903
-4051 4051  1.872155078461913
-1258 4052  1.480104939178236
-1279 4052 -0.9086990247085501
-3436 4052 -0.4842038390860414
-4047 4052  0.5370271536576061
-4052 4052  2.428640135644111
-1258 4053 -10.48222291782284
-1279 4053  6.050637155898348
-3436 4053 -0.0732024928126479
-4047 4053  0.08118838221113756
-4053 4053  2.024927669840395
-1258 4054  9.97517741318884
-1279 4054 -5.758158315413606
-3436 4054  0.06570225375133355
-4047 4054 -0.07286991856067998
-4054 4054  3.081655102447302
-1261 4055  0.8070622115620506
-3101 4055  5.155957958850873
-4055 4055 -0.8631938020705067
-4057 4055 -0.2638765488723211
-4065 4055  0.5513927020288057
-1261 4056 -0.6614271231410632
-4055 4056  0.7158415642942259
-4056 4056  16.47742044559651
-4057 4056  0.2162445878661694
-4065 4056 -0.4572667382619597
-1261 4057  19.56348943490517
-3102 4057  0.6800635049014432
-4055 4057  0.1116686347193875
-4057 4057 -6.472519984532425
-4065 4057 -0.07133191883687945
-1261 4058 -13.58702526706866
-4055 4058 -0.07755499290003219
-4057 4058  4.495539572060578
-4058 4058  3.707071367433247
-4065 4058  0.04954073695663612
-1261 4059  0.1378648745322591
-1263 4059 -0.691791395091818
-3103 4059  7.218811248728353
-4059 4059 -2.704903553672343
-4065 4059  0.274604206069199
-1261 4060 -0.1128908565994482
-1263 4060  0.5659529217467251
-4059 4060  2.309980743686521
-4060 4060  22.23399320199848
-4065 4060 -0.234511292387475
-1261 4061  9.267190079702225
-1263 4061 -48.49819059668351
-4059 4061  0.1141644010503557
-4061 4061  1.226331625859217
-4065 4061 -0.01159007117619253
-1261 4062 -6.734223707278317
-1263 4062  35.27417920269734
-4059 4062 -0.0829609857753576
-4062 4062  5.200657162878421
-4065 4062  0.008422272802529579
-1261 4063  0.2889839995395265
-1264 4063 -0.7136256126923212
-3105 4063  7.075882839979499
-4063 4063 -1.123100460933939
-4065 4063  0.3315414677749981
-1261 4064 -0.2352278222126487
-1264 4064  0.58059959929439
-4063 4064  0.9425431964377532
-4064 4064  22.85902787429374
-4065 4064 -0.2782406077266243
-1261 4065  11.5414689090377
-1264 4065 -29.38500860027402
-3106 4065  0.9860199056421528
-4063 4065  0.08489250803032906
-4065 4065 -0.02506043554827784
-1261 4066 -7.978904295943519
-1264 4066  20.3242031183579
-4063 4066 -0.05868867931656194
-4065 4066  0.01732501370911077
-4066 4066  5.533488946326441
-1261 4067  0.5085375863685291
-3107 4067  5.549718155884937
-4065 4067  0.4722832634265456
-4067 4067 -1.058757894121426
-4069 4067 -0.3738089874614378
-1261 4068 -0.4120665414866511
-4065 4068 -0.3888500715901649
-4067 4068  0.8717185528421763
-4068 4068  18.54247362970166
-4069 4068  0.3028572253506908
-1261 4069  16.53723785781054
-3108 4069  0.7641384780188016
-4065 4069 -0.04435831718922839
-4067 4069  0.0994418437640489
-4069 4069 -12.35162972393133
-1261 4070 -11.52066789126238
-4065 4070  0.03090230780246723
-4067 4070 -0.06927635354904116
-4069 4070  8.60586674968607
-4070 4070  4.168910055156443
-1261 4071  0.280802632960781
-3109 4071  8.987131743426813
-4065 4071  0.3748533853638924
-4071 4071 -1.72573819152988
-4073 4071 -0.3417951192656607
-1261 4072 -0.22803093980903
-4065 4072 -0.3164800245438473
-4071 4072  1.457000754258734
-4072 4072  28.97191890791841
-4073 4072  0.2773430210866213
-1261 4073  13.01235217685689
-3110 4073  1.257569328186931
-4065 4073 -0.02428214981556918
-4071 4073  0.1117893953885428
-4073 4073 -16.46702251650682
-1261 4074 -8.975664909834439
-4065 4074  0.01674951147636541
-4071 4074 -0.07711087260468982
-4073 4074  11.36739026212329
-4074 4074  7.127509675415745
-1261 4075  0.2803074902090753
-3111 4075  7.073656321403489
-4065 4075  0.3334711887748352
-4075 4075 -1.16718388103411
-4077 4075 -0.7056173157039639
-1261 4076 -0.2279652047020327
-4065 4076 -0.2799242389758857
-4075 4076  0.9797639815414531
-4076 4076  22.90635023674714
-4077 4076  0.5735599734651922
-1261 4077  11.6057964531526
-3112 4077  0.9855846822133956
-4065 4077 -0.02430205065482175
-4075 4077  0.08505970757052372
-4077 4077 -30.15492728060605
-1261 4078 -8.020245176780445
-4065 4078  0.01679417334838154
-4075 4078 -0.05878135529351257
-4077 4078  20.84931717772058
-4078 4078  5.54551145364278
-1261 4079  0.3804015427898206
-3113 4079  7.497907634820762
-4065 4079  0.428348930387738
-4079 4079 -1.628837266086651
-4081 4079 -0.2788437142254693
-1261 4080 -0.3095050463446962
-4065 4080 -0.35912875888719
-4079 4080  1.365621025992523
-4080 4080  24.33074727531835
-4081 4080  0.2267698773866179
-1261 4081  14.91330785855446
-3114 4081  1.04489898841882
-4065 4081 -0.03299211467792259
-4079 4081  0.1254556322243195
-4081 4081 -11.26470641944774
-1261 4082 -10.30293666523228
-4065 4082  0.02279291647912941
-4079 4082 -0.08667221774173783
-4081 4082  7.785831386629021
-4082 4082  5.889447827116065
-1261 4083 -1.261809071898539
-1277 4083  1.218052141141831
-4065 4083 -0.4788760357050242
-4083 4083  5.491936501011876
-4085 4083  0.2096383898392737
-1261 4084  1.52499539890934
-1277 4084 -1.47300996574878
-4065 4084  0.5819307001253576
-4084 4084  2.208450030879043
-4085 4084 -0.254752808402104
-1261 4085 -8.828418052973735
-1277 4085  6.880737915568475
-3100 4085  5.446758579372629
-4065 4085  0.07100846120402798
-4085 4085 -0.03108549679221456
-1261 4086  11.03436685036456
-1277 4086 -8.604012257663832
-4065 4086 -0.07203379071050602
-4085 4086  0.03153435706244356
-4086 4086  3.371992084196615
-1269 4087 -1.4539442060976
-1288 4087  0.7887413865352516
-4087 4087  2.763887709407143
-4088 4087 -0.4136141792640044
-4089 4087  0.3890882961019005
-1269 4088  1.446310343605032
-1288 4088 -0.7843205938603733
-3153 4088  2.781752994797915
-4088 4088  0.4131220203587079
-4089 4088 -0.3886253205090088
-1269 4089 -9.271286129126777
-1288 4089  5.391699794599194
-3116 4089  3.366289701753765
-4088 4089  0.05643659927341725
-4089 4089 -0.0530901051026678
-1269 4090  8.904067016422458
-1288 4090 -5.178361384702474
-4088 4090 -0.05836392563537035
-4089 4090  0.05490314771048892
-4090 4090  3.419875832853283
-1269 4091  1.012670283104241
-1293 4091 -0.6342324694119614
-3979 4091 -0.5913713271015886
-4088 4091  0.3288070523355396
-4091 4091  6.465623332056095
-1269 4092 -1.178077129193787
-1293 4092  0.7388262639417724
-3979 4092  0.6938074616461828
-4088 4092 -0.3857623389865421
-4092 4092  2.912178827084738
-1269 4093  7.586503401180923
-1293 4093 -4.118301711438476
-3979 4093  0.07867933243141366
-4088 4093 -0.0437463201053321
-4093 4093  7.023657380150102
-1269 4094 -8.118669664395844
-1293 4094  4.406401785498837
-3979 4094 -0.0979571314547415
-4088 4094  0.05446492613488767
-4094 4094  3.489728875483112
-1278 4095 -0.892482446316431
-4095 4095  9.920476591021989
-4096 4095  0.7713662868604313
-4102 4095  0.2350047561293398
-4125 4095 -0.4916143473982249
-1278 4096  0.9977668333321339
-3117 4096  5.599572994528584
-4096 4096 -0.8634917741294119
-4102 4096 -0.2627322880432431
-4125 4096  0.5503286211148283
-1278 4097 -6.008458200039911
-4096 4097 -0.1070003826683109
-4097 4097  11.88363245336253
-4102 4097  1.557484621286901
-4125 4097  0.06819448061561421
-1278 4098  6.822965307570624
-4096 4098  0.1197679507899034
-4098 4098  6.35673096494675
-4102 4098 -1.768620278216182
-4125 4098 -0.07633162606373356
-1297 4099 -0.9272236969416601
-4096 4099  0.807466687308941
-4099 4099  8.239370225433877
-4102 4099  0.2512854022636909
-4126 4099 -0.5280375750914569
-1297 4100  0.98501487730545
-4096 4100 -0.8588799767622773
-4100 4100  5.994209355056856
-4102 4100 -0.2668875724092826
-4126 4100  0.5616589604898961
-1297 4101 -5.141467784189666
-4096 4101 -0.10710356231765
-4101 4101  13.6661282064412
-4102 4101  1.526214952953268
-4126 4101  0.07003967621048186
-1297 4102  5.293330560693399
-3118 4102  9.085015566293682
-4096 4102  0.1209694588614057
-4102 4102 -1.571445886133012
-4126 4102 -0.07910718884290392
-1271 4103 -0.760898897674599
-1272 4103  1.417933728749779
-4103 4103  3.150909841954586
-4105 4103  0.433000632670903
-4107 4103 -0.4147446507941761
-1271 4104  0.7226406195901061
-1272 4104 -1.346294002937515
-4104 4104  4.161771196418701
-4105 4104 -0.411421568297578
-4107 4104  0.3940753934243332
-1271 4105 -4.340970455026474
-1272 4105  6.781290570506568
-3120 4105  5.834179421399259
-4105 4105 -0.05876042249592989
-4107 4105  0.05628299145492302
-1271 4106  4.279036439188589
-1272 4106 -6.686952398469979
-4105 4106  0.048609548122274
-4106 4106  9.53046694058189
-4107 4106 -0.04656009377371514
-1271 4107 -1.255594290221721
-3119 4107  3.217137788747237
-3848 4107  0.5724845587882514
-4107 4107 -0.6085690165184504
-4110 4107  0.4234260846924687
-1271 4108  1.25204782308929
-3848 4108 -0.5695671862975206
-4107 4108  0.6054677581871165
-4108 4108  3.322110871855388
-4110 4108 -0.4222691319575352
-1271 4109 -7.410192007597576
-3848 4109 -0.08093525038797107
-4107 4109  0.08603670609830409
-4109 4109  4.335231192834105
-4110 4109  2.647109335618053
-1271 4110  7.278483379939773
-3124 4110  4.142922682425776
-3848 4110  0.08401358938736914
-4107 4110 -0.08930907686990948
-4110 4110 -2.600153453324001
-1272 4111 -0.9419881525931177
-1286 4111  0.7647617774871864
-3732 4111  0.4075275464698439
-4105 4111 -0.4656525838783859
-4111 4111  5.084689951907726
-1272 4112  0.9057504822207362
-1286 4112 -0.7325004544442668
-3732 4112 -0.3908429951038652
-4105 4112  0.4465883401929728
-4112 4112  6.446916296953027
-1272 4113 -6.501760444913997
-1286 4113  2.902282675062765
-3732 4113 -0.05521218124589022
-4105 4113  0.06308701112702084
-4113 4113  9.883711222736482
-1272 4114  8.109951937853118
-1286 4114 -3.63284662768968
-3732 4114  0.03809075496731831
-4105 4114 -0.0435235817211751
-4114 4114  14.33869690694082
-1276 4115  0.732482060546454
-1277 4115  0.3546679947849138
-3906 4115  0.9163581866371621
-4085 4115 -0.7265339828613597
-4115 4115  3.945873747631456
-1276 4116 -0.7185090865811576
-1277 4116 -0.3467933517133751
-3906 4116 -0.8984979081539249
-4085 4116  0.7123734728657446
-4116 4116  4.230773004945792
-1276 4117  11.68038468730556
-1277 4117 -17.00310330783589
-3906 4117 -0.02453437618813773
-4085 4117  0.01945206395154361
-4117 4117  6.606303903934953
-1276 4118 -19.033710166426
-1277 4118  27.66621100756034
-3906 4118 -0.01320838662255178
-4085 4118  0.01047226060725421
-4118 4118  8.96941517920289
-1278 4119 -0.7623806137748301
-3740 4119  0.7972203259666385
-3742 4119  0.2404117558855258
-4119 4119  13.82148848700469
-4125 4119 -0.4124268531628535
-1278 4120  0.8561953528406251
-3740 4120 -0.8888199114143011
-3742 4120 -0.2700601958355842
-4120 4120  7.857436245259873
-4125 4120  0.4598141657371973
-1278 4121 -5.293591386916911
-3740 4121 -0.1140343010598945
-3742 4121  1.734079556945439
-4121 4121  14.27009956285187
-4125 4121  0.05899348825775656
-1278 4122  5.949256441509949
-3740 4122  0.1321323092122628
-3742 4122 -1.948837786899108
-4122 4122  7.188740199011378
-4125 4122 -0.0683561503822409
-1278 4123 -0.8740833773154867
-1297 4123  0.8485062079897245
-4123 4123  4.299742862778459
-4125 4123 -0.4682412747380797
-4126 4123  0.4804475820214714
-1278 4124  0.8912310185466943
-1297 4124 -0.8650341355217372
-4124 4124  3.696321876416155
-4125 4124  0.4773702028337876
-4126 4124 -0.4898144867918466
-1278 4125 -4.159585723108061
-1297 4125  3.631803890207103
-3134 4125  14.02807711051003
-4125 4125  0.05326867823029235
-4126 4125 -0.05465730817416642
-1278 4126  4.827479834422186
-1297 4126 -4.215505277639843
-3172 4126  9.415073365629931
-4125 4126 -0.05561345056860315
-4126 4126  0.05706320500793532
-1279 4127 -0.03784001059522953
-1280 4127 -4.420138342044347
-3137 4127  1.738985765064566
-3436 4127  0.4622771875668804
-4127 4127 -4.657034860973241
-1279 4128  0.034701868533437
-1280 4128  3.515861595090497
-3436 4128 -0.3713550248720283
-4127 4128  3.741074279977133
-4128 4128  4.980850774426043
-1279 4129  8.860625202191928
-1280 4129 -85.6481884037366
-3436 4129  0.004661810659483987
-4127 4129 -0.04696362991810545
-4129 4129  4.051300083922375
-1279 4130 -2.549488194921787
-1280 4130  24.55744025691587
-3436 4130  0.01681848484248474
-4127 4130 -0.1694313981454469
-4130 4130  4.497507478986273
-1279 4131  0.1538807196017282
-3139 4131  1.298069093586603
-3436 4131  0.8099385613394385
-4131 4131 -4.051106699452466
-4133 4131 -0.9333140615563935
-1279 4132 -0.1216367091902295
-3436 4132 -0.6777265383675919
-4131 4132  3.389815784838334
-4132 4132  3.754470501609538
-4133 4132  0.7715902563305668
-1279 4133  12.77341480514826
-3140 4133  2.304828423526273
-3436 4133 -0.01559842054359869
-4131 4133  0.07801933255350572
-4133 4133 -18.08711888236061
-1279 4134 -5.706211648738831
-3436 4134  0.03070862867577365
-4131 4134 -0.1535967507877281
-4133 4134  8.071327482551563
-4134 4134  3.634792913757748
-1279 4135 -1.539612202532529
-1292 4135  0.5515734249662468
-3436 4135 -0.8293599276847743
-4135 4135  3.27848682728167
-4137 4135  0.3355805184553645
-1279 4136  1.649614277715424
-1292 4136 -0.5910536703502837
-3436 4136  0.8908658230580816
-4136 4136  2.362308143022933
-4137 4136 -0.3604673975635156
-1279 4137 -9.369448393175393
-1292 4137  3.090538086242762
-3136 4137  4.440740416984238
-3436 4137  0.1208149423257971
-4137 4137 -0.04888485641695448
-1279 4138  10.36404052218175
-1292 4138 -3.418869542545371
-3436 4138 -0.1233504357896458
-4137 4138  0.04991078277622987
-4138 4138  3.326033848414284
-1285 4139  0.8723149865446768
-1286 4139  0.1581455441266535
-3732 4139 -0.8918227216379703
-4139 4139  8.334156253956104
-4145 4139  0.7709461549449258
-1285 4140 -0.8836887209187385
-1286 4140 -0.1537391891864879
-3732 4140  0.8980743533410442
-4140 4140  8.646235660883518
-4145 4140 -0.7763504480927439
-1285 4141  7.224333431575175
-1286 4141 -5.755496466930055
-3732 4141  0.04687701190851433
-4141 4141  9.599198921865771
-4145 4141 -0.04052335874533514
-1285 4142 -14.23089451374669
-1286 4142  11.28627080486608
-3732 4142  0.01958613529525647
-4142 4142  15.58372198199141
-4145 4142 -0.01693145434596672
-1285 4143  0.02592331471776193
-1287 4143 -2.546465438723989
-3151 4143  0.7844661652700029
-4143 4143 -7.678951010732729
-4145 4143  1.152193173408145
-1285 4144 -0.01536794211867761
-1287 4144  2.184062692900765
-4143 4144  6.620514347654194
-4144 4144  2.978974753398158
-4145 4144 -0.9933793593885633
-1285 4145  29.59209543546304
-1287 4145 -61.62793826679011
-3152 4145  4.182327502644779
-4143 4145  0.02199763125930786
-4145 4145 -0.003300648816836982
-1285 4146 -5.11537337312014
-1287 4146  10.62505824201603
-4143 4146 -0.1825976122374652
-4145 4146  0.02739797688598089
-4146 4146  2.213150142494187
-1293 4147  0.2993576079332945
-1294 4147 -0.07547758445687719
-3165 4147  5.664236473525226
-3979 4147  0.9009354882907198
-4147 4147 -4.332893970446715
-1293 4148 -0.2812552173432513
-1294 4148  0.07089516340263885
-3979 4148 -0.8656710639493923
-4147 4148  4.163295798784388
-4148 4148  9.893426968176996
-1293 4149  11.01771333410158
-1294 4149 -2.84098517566667
-3979 4149 -0.07642521674859
-4147 4149  0.3675539094017725
-4149 4149  0.8386055878177558
-1293 4150 -8.516713910804523
-1294 4150  2.196645621020095
-3979 4150  0.05907688964639677
-4147 4150 -0.2841201198847871
-4150 4150  2.683371796432915
-1293 4151  0.06090597788496124
-1295 4151 -0.2101993947180658
-3167 4151  9.38137758123437
-3979 4151  0.58462927218427
-4151 4151 -7.173917743684755
-1293 4152 -0.05346314055039419
-1295 4152  0.1844652672016618
-3979 4152 -0.5248141364580845
-4151 4152  6.439933176124918
-4152 4152  21.22166760392006
-1293 4153  6.743407936044972
-1295 4153 -23.80011488422777
-3979 4153 -0.01466585550328597
-4151 4153  0.1799630056638323
-4153 4153  1.713315824453342
-1293 4154 -5.460172754442011
-1295 4154  19.27597045240107
-3979 4154  0.0118751178970765
-4151 4154 -0.1457181893610931
-4154 4154  4.758341632139925
-2777 4155 -0.2412135486919414
-2778 4155 -0.09268507579991085
-4155 4155 -30.64257011981108
-3176 4156  0.4638345268861317
-3178 4156  0.1776859613752735
-4156 4156 -15.90706277519701
-3179 4157  0.1661135696488067
-3181 4157  0.2348156967063924
-4157 4157 -39.31435081710389
-3011 4158 -0.1171398876204244
-3182 4158 -0.1686225023517153
-4158 4158 -53.45947004812159
-3183 4159  0.4624605338731521
-3185 4159  0.2528208987834961
-4159 4159 -12.56288032466136
-3184 4160 -0.4304312753963275
-3186 4160 -0.2294685995787264
-4160 4160 -13.62587967120271
-3187 4161 -0.4947242318500673
-3189 4161 -0.2206266516513799
-4161 4161 -14.84344573266865
-3188 4162  0.4225606468582997
-3190 4162  0.1862365373886236
-4162 4162 -17.5348042025758
-2781 4163 -0.1766165844146627
-2782 4163 -0.06752189118262934
-4163 4163 -65.95158912186295
-3192 4164  0.2943113306588002
-3194 4164  0.1162526489952567
-4164 4164 -38.93574330129159
-3195 4165 -0.3050775516043246
-3197 4165 -0.0730286811201015
-4165 4165 -46.43966016640264
-3196 4166  0.3743951155653602
-3198 4166  0.09090233254426537
-4166 4166 -37.97891276037357
-2780 4167 -0.1030731399156117
-3199 4167 -0.3914193643951301
-4167 4167 -32.44683207965974
-3200 4168  0.3184437332485786
-3202 4168  0.08441252585843845
-4168 4168 -39.5091491257876
-2785 4169 -0.2389835454891805
-2786 4169 -0.09183437174715321
-4169 4169 -31.1299517046727
-3204 4170  0.3991520721206581
-3206 4170  0.1569416237697656
-4170 4170 -18.48285697955544
-2869 4171 -0.480083502439464
-3209 4171  0.1604723692757136
-4171 4171 -13.83350837304568
-3208 4172  0.3448062169581231
-3210 4172 -0.1470971542430732
-4172 4172 -20.00745980162628
-3211 4173 -0.4223099512030265
-3213 4173  0.1140677401031672
-4173 4173 -22.76489333995555
-3212 4174  0.4163534406274162
-3214 4174 -0.1084103977076431
-4174 4174 -23.20868313310848
-2789 4175 -0.3252342258811831
-2790 4175 -0.1253361300551693
-4175 4175 -16.39949055853167
-3216 4176  0.6239097919698932
-3218 4176  0.2458729163013723
-4176 4176 -8.433072185518855
-3219 4177 -0.3300938184824252
-3221 4177 -0.1765502579767677
-4177 4177 -24.12392710831048
-3220 4178  0.3317934535905247
-3222 4178  0.1764937921731119
-4178 4178 -23.44721624588622
-3223 4179 -0.4908310005655379
-3225 4179 -0.154677890064579
-4179 4179 -18.65225809968672
-3224 4180  0.5267241324446033
-3226 4180  0.1767989964268323
-4180 4180 -16.41237497028351
-3227 4181  0.4025596461382885
-3229 4181  0.1487428464011694
-4181 4181 -26.1956792336746
-3070 4182 -0.1043560294495194
-3228 4182 -0.2880976473424125
-4182 4182 -36.11162541606858
-2793 4183 -0.3680346263601992
-2794 4183 -0.1418061655056151
-4183 4183 -12.26843876024679
-3232 4184  0.7765432181564137
-3234 4184  0.3080625949669278
-4184 4184 -5.60785504010041
-3235 4185 -0.4818355431139471
-3237 4185 -0.2388406309706918
-4185 4185 -12.72775567793986
-3236 4186  0.4752231410863287
-3238 4186  0.2360290859558374
-4186 4186 -13.02604478229861
-3036 4187 -0.03178578024713478
-3239 4187 -0.2936091186007417
-4187 4187 -129.8752387098407
-3240 4188  0.4485203675871874
-3242 4188  0.04825762007543867
-4188 4188 -86.07489921163659
-3243 4189  0.1478598441021074
-3245 4189  0.256909813210024
-4189 4189 -41.9565272197917
-3244 4190 -0.1377473445162312
-3246 4190 -0.2295458523844411
-4190 4190 -45.97094678900272
-3247 4191 -0.4722142733900354
-3249 4191 -0.1046018218296087
-4191 4191 -33.18669707365662
-3160 4192  0.1115367935708566
-3248 4192  0.5064852131538
-4192 4192 -31.12915756520504
-2797 4193 -0.1245359476338054
-2798 4193 -0.04988201584301125
-4193 4193 -142.3054925634301
-3252 4194  0.2571869189216621
-3254 4194  0.1058671586234522
-4194 4194 -68.22390452124144
-2799 4195 -0.1886155730243332
-2800 4195 -0.07276217302329462
-4195 4195 -53.6144848893498
-3256 4196  0.3916020995201141
-3258 4196  0.1550963396610254
-4196 4196 -25.16934902030714
-3259 4197 -0.496807801273085
-3261 4197  0.1763884106301378
-4197 4197 -16.85797529931353
-3260 4198  0.4045567407557998
-3262 4198 -0.1358187924429873
-4198 4198 -22.25527628315144
-3263 4199  0.382691560057723
-3265 4199 -0.1329182782584699
-4199 4199 -31.89341246694178
-3264 4200 -0.3322785058448103
-3266 4200  0.1158998426965637
-4200 4200 -34.63780504233422
-3267 4201 -0.3957723556168835
-3269 4201  0.1694463566618233
-4201 4201 -19.56367976914622
-3268 4202  0.4366009542545615
-3270 4202 -0.1871670285025489
-4202 4202 -18.10501936478283
-3271 4203 -0.5915454173823133
-3273 4203 -0.08253448028257152
-4203 4203 -23.14455148007878
-3272 4204  0.5892891508115687
-3274 4204  0.08441094784420153
-4204 4204 -22.69925480783547
-3275 4205 -0.6320496066149439
-3277 4205  0.09175946688769955
-4205 4205 -17.70174015534158
-3276 4206  1.031765062393027
-3278 4206 -0.05562556027624922
-4206 4206 -21.45404488617399
-3279 4207  0.4149441342907414
-3281 4207 -0.1391230222463695
-4207 4207 -17.63733576375392
-3071 4208 -0.5368637578512274
-3072 4208  0.2054208626437619
-4208 4208 -11.7352471887321
-2805 4209 -0.3720931546947013
-2806 4209 -0.1433588781709148
-4209 4209 -12.13240977183652
-3284 4210  0.6167314111295573
-3286 4210  0.2480796714683461
-4210 4210 -7.139021306502358
-2809 4211 -0.1474513421173272
-2810 4211 -0.05626831364793678
-4211 4211 -107.2257743582197
-3288 4212  0.3310701127831404
-3290 4212  0.1309999949189102
-4212 4212 -47.10985428696654
-2811 4213 -0.150780607320847
-2812 4213 -0.05817082924203715
-4213 4213 -99.89329952085217
-3292 4214  0.337094746642738
-3294 4214  0.1348920395825686
-4214 4214 -43.849780178493
-2813 4215 -0.166183616330309
-2814 4215 -0.06367574453596507
-4215 4215 -82.05099007645858
-3296 4216  0.3720598064715243
-3298 4216  0.1478411151739352
-4216 4216 -35.88141193511292
-2815 4217 -0.2132021764710746
-2816 4217 -0.0819141354576636
-4217 4217 -43.98879537092815
-3300 4218  0.4803517580962079
-3302 4218  0.1914370186595753
-4218 4218 -18.96707230539594
-3303 4219  0.3955427940574144
-3305 4219 -0.04831816621937788
-4219 4219 -56.56046644171085
-2807 4220 -0.6484741851329106
-2808 4220  0.08377148904147035
-4220 4220 -33.41745949104828
-3307 4221  0.3375417510714098
-3309 4221 -0.1760787931148337
-4221 4221 -30.17627002650658
-3308 4222 -0.2062979542711787
-3310 4222  0.1043900787502424
-4222 4222 -49.61405821049816
-2819 4223 -0.1696981519351195
-2820 4223 -0.06510757825207561
-4223 4223 -71.73978108877516
-3312 4224  0.3231881679107107
-3314 4224  0.1284506027739841
-4224 4224 -36.41609607105951
-2821 4225 -0.1917131080017181
-2822 4225 -0.07348349316451037
-4225 4225 -54.57250563862199
-3316 4226  0.3650963193282887
-3318 4226  0.1449696716921745
-4226 4226 -27.55492217173241
-2818 4227  0.03944668306923331
-3319 4227 -0.3349809983213075
-4227 4227 -141.187307118909
-3320 4228  0.3279397104479789
-3322 4228 -0.03875470100391585
-4228 4228 -139.724744622845
-3323 4229  0.2366093522986016
-3325 4229  0.1182819046044615
-4229 4229 -54.42625553131974
-3324 4230 -0.2372175650649101
-3326 4230 -0.123830609841622
-4230 4230 -53.63654595730242
-2825 4231 -0.1646365262812593
-2826 4231 -0.06404854821625307
-4231 4231 -72.83364223501292
-3328 4232  0.2708036975786697
-3330 4232  0.1105236630114776
-4232 4232 -43.25696538963752
-2824 4233 -0.0885781928998835
-3331 4233 -0.5108679392009977
-4233 4233 -27.39216973415159
-3332 4234  0.4831988452769137
-3334 4234  0.09073099839660206
-4234 4234 -27.18217755332223
-3335 4235 -0.5198632092236741
-3337 4235 -0.07822635220456177
-4235 4235 -29.56194702569874
-3336 4236  0.5254114800415531
-3338 4236  0.0825069857399373
-4236 4236 -29.09591654321273
-2829 4237 -0.1981115390391876
-2830 4237 -0.07680746283325847
-4237 4237 -49.02162975759376
-3340 4238  0.370280352148334
-3342 4238  0.1503007858341583
-4238 4238 -24.45068261782124
-3343 4239 -0.3571743727843014
-3345 4239  0.05216856069111805
-4239 4239 -89.42768025930282
-3344 4240  0.353019920768631
-3346 4240 -0.04777819884013859
-4240 4240 -93.18051438131391
-3347 4241 -0.1837305798049131
-3349 4241 -0.1284297479768136
-4241 4241 -67.3680095180724
-3348 4242  0.1877146534020675
-3350 4242  0.1242642220168503
-4242 4242 -69.2391511776303
-2833 4243 -0.4115664959143436
-2834 4243 -0.1585763878680793
-4243 4243 -9.491059373455291
-3352 4244  0.6903575064460875
-3354 4244  0.2721182940045235
-4244 4244 -5.581731936811549
-3355 4245  0.4948051560351502
-3357 4245  0.2278264743594363
-4245 4245 -12.98416731930361
-3356 4246 -0.3995006872704646
-3358 4246 -0.1829124999460475
-4246 4246 -16.37502476196387
-3359 4247 -0.413301806558095
-3361 4247 -0.1220911632709661
-4247 4247 -24.74624389281993
-3053 4248  0.3335609329182144
-3054 4248  0.1009613901942342
-4248 4248 -29.26099129371331
-2837 4249 -0.1377344537960406
-2838 4249 -0.05432974549764544
-4249 4249 -116.5520245666077
-3364 4250  0.2592958859943023
-3366 4250  0.1076318904023976
-4250 4250 -60.67747015800169
-2839 4251 -0.2309521391793573
-2840 4251 -0.08909672224847444
-4251 4251 -35.17526953063072
-3368 4252  0.4377070227596882
-3370 4252  0.1776243671061898
-4252 4252 -17.85544888919775
-3371 4253  0.373558902257503
-3373 4253  0.09662035202600507
-4253 4253 -33.99037882808334
-2836 4254 -0.1125400217472849
-3372 4254 -0.4326204283218003
-4254 4254 -29.8305087797851
-3375 4255 -0.5029630185937802
-3377 4255 -0.03874066375731782
-4255 4255 -71.94298278001283
-3376 4256  0.563121317845319
-3378 4256  0.04628018070972438
-4256 4256 -61.46922915998202
-2843 4257 -0.3005092230155128
-2844 4257 -0.1157986402063187
-4257 4257 -18.66777584361901
-3380 4258  0.5036002788122459
-3382 4258  0.1992142492979329
-4258 4258 -11.00044392597633
-2842 4259 -0.1550385259319598
-3383 4259 -0.4425084873605576
-4259 4259 -17.48331613863921
-3384 4260  0.4374740476554567
-3386 4260  0.1559040304242874
-4260 4260 -17.19494524360543
-3387 4261  0.3915587655305323
-3389 4261  0.1097305804557451
-4261 4261 -30.70003257734891
-3047 4262 -0.3222408774211639
-3048 4262 -0.08915743596796895
-4262 4262 -36.85462203202088
-2847 4263 -0.2006755550776053
-2848 4263 -0.07741322426033026
-4263 4263 -47.65157494137991
-3392 4264  0.3799777711016727
-3394 4264  0.1510415792376266
-4264 4264 -25.01333265604174
-2849 4265 -0.3338254063159729
-2850 4265 -0.1286239024388166
-4265 4265 -15.75639783117426
-3396 4266  0.6357091254305228
-3398 4266  0.2527154403111643
-4266 4266 -8.087445797244316
-3399 4267  0.4723947763539851
-3401 4267  0.07282084580241761
-4267 4267 -36.17015837528741
-2846 4268 -0.07966811853968624
-3400 4268 -0.5635229915555421
-4268 4268 -33.28326480640938
-3403 4269  0.2663650847823781
-3405 4269  0.1699263311417943
-4269 4269 -26.11298173680401
-2845 4270 -0.3035513417999643
-3406 4270 -0.1978726929434511
-4270 4270 -22.92312256359989
-2853 4271 -0.1866146962681199
-2854 4271 -0.07172764858434916
-4271 4271 -56.97774952449042
-3408 4272  0.3212719394188062
-3410 4272  0.1190518291670926
-4272 4272 -33.52104777869483
-3411 4273 -0.08570975586546606
-3413 4273  0.3050576835026495
-4273 4273 -52.94756993572931
-2943 4274  0.07033500174578537
-3414 4274 -0.2460311055612411
-4274 4274 -64.26911966105814
-3415 4275  0.224708222751262
-3417 4275  0.2068334057548165
-4275 4275 -26.98015626399983
-3416 4276 -0.2245587755506762
-3418 4276 -0.2103959172847824
-4276 4276 -27.22391958877084
-3419 4277 -0.3594998341374985
-3421 4277  0.1495820406715405
-4277 4277 -19.76131076987808
-3420 4278  0.4379133264508283
-3422 4278 -0.1823951579301308
-4278 4278 -16.01743515162783
-3423 4279 -0.1478613302797105
-3425 4279 -0.3612348953316888
-4279 4279 -19.43122435760818
-3099 4280  0.2457031220130788
-3426 4280  0.4234806911341454
-4280 4280 -12.23386114811299
-3427 4281 -0.58313561816606
-3429 4281  0.03126449269119468
-4281 4281 -73.10925705238337
-3428 4282  0.6575968965590965
-3430 4282 -0.03366812880804327
-4282 4282 -64.22433096257478
-3431 4283 -0.2293236201966689
-3433 4283 -0.09511283394769576
-4283 4283 -42.25947272070731
-3432 4284  0.2292832397716183
-3434 4284  0.09589996761388657
-4284 4284 -42.5796885142234
-3435 4285  0.42314514820173
-3437 4285  0.1765863442776434
-4285 4285 -17.39127207242933
-2857 4286 -0.2946263988269318
-2858 4286 -0.1192055091852236
-4286 4286 -25.68769827203282
-2860 4287  0.09444763078437719
-3439 4287 -0.3584357830568553
-4287 4287 -26.68291634429617
-2941 4288  0.3674899949834071
-3442 4288 -0.0913967464088717
-4288 4288 -27.21898062309888
-2859 4289  0.3532322900264607
-3445 4289 -0.1063494845711242
-4289 4289 -25.76998281146062
-3038 4290  0.1067652808173516
-3444 4290 -0.358149304314646
-4290 4290 -24.86767301361041
-2861 4291  0.1980349828762465
-2862 4291  0.08633500225011058
-4291 4291 -58.10296868779811
-3448 4292 -0.1981905656779542
-3450 4292 -0.08642839304797841
-4292 4292 -57.88228387547031
-3451 4293 -0.4977177300614612
-3453 4293 -0.1054013687030942
-4293 4293 -22.81351654840187
-3452 4294  0.5863532491442011
-3454 4294  0.1224913336251501
-4294 4294 -20.38105920054591
-3455 4295 -0.8291778794111532
-3457 4295 -0.1371328219183342
-4295 4295 -12.73795579528641
-3456 4296  0.676872928952266
-3458 4296  0.1232115838138538
-4296 4296 -14.69078715887743
-2977 4297 -0.5661094051595106
-3461 4297 -0.1696769505008175
-4297 4297 -14.44408841920709
-3460 4298  0.4926836001045419
-3462 4298  0.1458229277753334
-4298 4298 -16.85321717357648
-3463 4299 -0.7204052217365209
-3465 4299 -0.1678465865963971
-4299 4299 -9.712758796207535
-3464 4300  0.6936422211413747
-3466 4300  0.1869994138422493
-4300 4300 -9.389773414469078
-2895 4301 -0.3615774432500488
-3469 4301 -0.1021595851058913
-4301 4301 -25.11095667227453
-3468 4302  0.4298291519255887
-3470 4302  0.1243626259089881
-4302 4302 -20.66188603221291
-3471 4303  0.2921192102024898
-3473 4303  0.1498140723675607
-4303 4303 -27.06081236628384
-3472 4304 -0.2929670419078287
-3474 4304 -0.149642205461953
-4304 4304 -27.07076287154962
-3475 4305  0.2741740413061578
-3477 4305  0.09966399264609031
-4305 4305 -34.23717637845797
-3476 4306 -0.3325105683429021
-3478 4306 -0.1210270813360829
-4306 4306 -28.46510147456754
-2868 4307 -0.1021338181094035
-3479 4307 -0.2773076465817324
-4307 4307 -30.28857007982523
-3480 4308  0.5012808683324748
-3482 4308  0.181244990753315
-4308 4308 -15.86269138512577
-2871 4309  0.01066646559674583
-2872 4309  1.027405058684619
-4309 4309 -77.63028018021609
-3484 4310 -0.01937948406660046
-3486 4310 -2.099407317666164
-4310 4310 -41.22285965712726
-2873 4311  0.01083754926061709
-2874 4311  1.035430338244845
-4311 4311 -75.31457172347281
-3488 4312 -0.0200281710755901
-3490 4312 -2.099046152908578
-4312 4312 -39.42580673521844
-2875 4313  0.01175361905645754
-2876 4313  1.132573885359342
-4313 4313 -66.76292516277346
-3492 4314 -0.02148219787072791
-3494 4314 -2.3232191764992
-4314 4314 -35.37814580251727
-2877 4315  0.01148289706327333
-2878 4315  1.096989498294941
-4315 4315 -66.63623857562646
-3496 4316 -0.02126487268390402
-3498 4316 -2.229759078647565
-4316 4316 -34.79046888868552
-2879 4317  0.008746086931736756
-3501 4317  1.576501168390627
-4317 4317 -70.10959107942233
-3500 4318 -0.01515825745898139
-3502 4318 -2.820119469264003
-4318 4318 -42.54586097045568
-3503 4319  0.533396313689748
-3505 4319  0.1398090615522035
-4319 4319 -14.59114960552235
-3504 4320 -0.3072494006703159
-3506 4320 -0.1064731526095451
-4320 4320 -25.00017435761698
-3507 4321 -0.4043473535609413
-3509 4321 -0.1463628716658473
-4321 4321 -17.87480776263565
-2906 4322  0.2139666338910703
-3508 4322  0.6025736075654631
-4322 4322 -11.25720746291567
-3511 4323 -0.2764593929515081
-3513 4323 -0.05970599140078857
-4323 4323 -65.97984348957961
-3512 4324  0.4607487274148538
-3514 4324  0.1019487335374342
-4324 4324 -41.15873081988094
-3515 4325 -0.3420333412549693
-3517 4325  0.152339017646971
-4325 4325 -20.94431514667631
-2883 4326  0.4601615553751069
-3518 4326 -0.2061528425704132
-4326 4326 -15.92440412543586
-2884 4327  0.120393801145661
-3519 4327 -0.799264511380765
-4327 4327 -14.55529250850832
-2978 4328 -0.1148860314163944
-3520 4328  0.7898807291279358
-4328 4328 -15.05159484553243
-3523 4329  0.6047360650845038
-3525 4329  0.1544468567080951
-4329 4329 -15.49210742892535
-3524 4330 -0.5448550035608645
-3526 4330 -0.1330170041161471
-4330 4330 -16.86689236335236
-3527 4331  0.7188696788923561
-3529 4331  0.1258547872274984
-4331 4331 -16.12647392997589
-3528 4332 -0.7112006565720582
-3530 4332 -0.1073622229111081
-4332 4332 -18.07799191201674
-2885 4333  0.3127717018709487
-2886 4333  0.04000224600538602
-4333 4333 -87.48075773456222
-3532 4334 -0.4309015624391707
-3534 4334 -0.05678315093978254
-4334 4334 -63.51574285277481
-2887 4335  0.3275939897877578
-2888 4335  0.06114217742781798
-4335 4335 -52.8716550775216
-3536 4336 -0.421297996296329
-3538 4336 -0.07021960246037766
-4336 4336 -43.40343426461138
-2963 4337  0.3539736971000679
-3541 4337  0.09996941509051212
-4337 4337 -26.738027357087
-3540 4338 -0.4334843149085262
-3542 4338 -0.1186609141678331
-4338 4338 -21.96349363409914
-3543 4339 -0.2029217905250291
-3545 4339 -0.1322602591418904
-4339 4339 -50.66661535986736
-3544 4340  0.2019684391055969
-3546 4340  0.1329277299929537
-4340 4340 -50.64878861975378
-2892 4341  0.04478328549069006
-3547 4341  0.4429448994016109
-4341 4341 -51.18764721528638
-3012 4342 -0.04036045463481888
-3548 4342 -0.4705159048444391
-4342 4342 -52.99960506788916
-3551 4343 -0.3781458794328671
-3553 4343 -0.07136785085690645
-4343 4343 -34.82199055582354
-3051 4344  0.3712099084130325
-3052 4344  0.07225182247031578
-4344 4344 -34.54423303070557
-3555 4345  0.2653610804974919
-3557 4345  0.1007116905315015
-4345 4345 -35.71241339576211
-3556 4346 -0.2660353447014131
-3558 4346 -0.100034192619883
-4346 4346 -35.52512736400956
-3559 4347  0.4397497774331201
-3561 4347  0.1692123750855458
-4347 4347 -20.01915962930323
-2893 4348 -0.2897782680634379
-2894 4348 -0.1114576459576665
-4348 4348 -30.22720745240589
-2896 4349 -0.05438327802565162
-3563 4349  0.3337399888939511
-4349 4349 -57.69560648218724
-3564 4350 -0.4468471629984179
-3566 4350  0.07850380857557879
-4350 4350 -40.73513928545416
-3091 4351 -0.02735485822257834
-3569 4351 -0.6125464046789606
-4351 4351 -77.65714066116759
-2897 4352  0.02303438519199841
-3570 4352  0.4916885932583254
-4352 4352 -94.69922254722279
-3571 4353  0.2999541214461424
-3573 4353  0.1882841243361535
-4353 4353 -17.51506605655669
-2898 4354 -0.1586572171452398
-3572 4354 -0.2437019726733171
-4354 4354 -21.37522400115791
-2900 4355 -0.1795099761298055
-3575 4355 -0.1700655800972397
-4355 4355 -28.31087433954066
-3576 4356  0.2926855116749797
-3578 4356  0.3408694718928448
-4356 4356 -15.06418166640937
-2899 4357  0.01658810146559478
-3581 4357  0.5692601762921932
-4357 4357 -112.4426933973505
-3141 4358 -0.01579633712996128
-3582 4358 -0.5859732481780078
-4358 4358 -114.7794462571026
-2903 4359  23.38653888958006
-2904 4359 -4.528166521929149e-05
-4359 4359 -1058.66302267636
-3584 4360 -0.8149863628841933
-3586 4360  0.07509815561251482
-4360 4360 -20.57374819150079
-2908 4361  0.1584805113003082
-3587 4361  0.3443611707400913
-4361 4361 -14.42988051776977
-3588 4362 -0.7256220588031617
-3590 4362 -0.3323630168187701
-4362 4362 -7.306137981052756
-2909 4363  0.2424191842671502
-3593 4363  0.1789331829732354
-4363 4363 -19.0892306599637
-3592 4364 -0.4976321718455345
-3594 4364 -0.3680998962405606
-4364 4364 -9.897416463906245
-2911 4365  0.2409008336125803
-2912 4365  0.2318901716680636
-4365 4365 -14.15628764799962
-3596 4366 -0.4961149924439166
-3598 4366 -0.4581176150659059
-4366 4366 -7.445942181911135
-2913 4367  0.2465099761064946
-3601 4367  0.18507291152769
-4367 4367 -17.88569115961042
-3600 4368 -0.510067162772699
-3602 4368 -0.3724327269861961
-4368 4368 -9.352570774505041
-2915 4369  0.2338083946556382
-3605 4369  0.2559294137774343
-4369 4369 -12.90605807444327
-3604 4370 -0.4801640088667681
-3606 4370 -0.5067141365537682
-4370 4370 -6.780155483318999
-2917 4371  0.2329931159431059
-3609 4371  0.2553116682026549
-4371 4371 -12.84963493329398
-3608 4372 -0.4783057895236895
-3610 4372 -0.5042260023462158
-4372 4372 -6.754664202360627
-2919 4373  0.2443954748733559
-3613 4373  0.1850395098356168
-4373 4373 -17.05542913502588
-3612 4374 -0.5073755737823042
-3614 4374 -0.3677359737660884
-4374 4374 -8.949835317480224
-2905 4375 -0.2852018326572599
-3617 4375  0.390540098759081
-4375 4375 -13.22126092554109
-3616 4376  0.2195673688647587
-3618 4376 -0.2998672521788097
-4376 4376 -17.94565770436443
-2922 4377  0.03342586460999813
-3619 4377  0.3449705889808716
-4377 4377 -88.03377071956065
-3154 4378 -0.03257068910337899
-3620 4378 -0.350098224088436
-4378 4378 -88.64391292782304
-2925 4379  0.001345572973655427
-3625 4379  3.224514182063999
-4379 4379 -246.2252886607093
-3624 4380  0.004528716666476341
-3626 4380 -3.833955781960661
-4380 4380 -109.1007957172354
-2927 4381  0.002458604260643332
-3629 4381  3.182000960903725
-4381 4381 -142.1356981120592
-3628 4382  0.003858840238686249
-3630 4382 -6.252920651221183
-4382 4382 -91.07653645375166
-2929 4383  0.0006418068359919498
-3633 4383  5.515960845011515
-4383 4383 -290.0185142498619
-2923 4384  0.005403119658024451
-3634 4384 -3.542334556735019
-4384 4384 -83.51485409842199
-2931 4385  0.0006826837434793908
-3637 4385  4.920359358439487
-4385 4385 -311.0119847471854
-3636 4386  0.005116437161350581
-3638 4386 -3.407349241168243
-4386 4386 -95.15381296120231
-2989 4387 -0.2149865933546265
-2990 4387 -0.1327293340165539
-4387 4387 -26.58818103268005
-3640 4388  0.3614419935688245
-3642 4388  0.1708955495131764
-4388 4388 -15.77787777432359
-3643 4389 -0.8943223168280579
-3645 4389  0.04435814071805195
-4389 4389 -28.09789857924424
-3087 4390  0.5644202599576373
-3088 4390 -0.03036476754524816
-4390 4390 -51.51652538609446
-3647 4391  0.2320388001825638
-3649 4391 -0.09875202978820967
-4391 4391 -41.49391694494913
-3648 4392 -0.3438732803624757
-3650 4392  0.1578014166231115
-4392 4392 -27.01589354302817
-2934 4393  0.09430040141542814
-3651 4393 -0.2673963766163865
-4393 4393 -38.65350036810663
-3059 4394  0.2715368875046588
-3654 4394 -0.09193233690226214
-4394 4394 -39.56708513242784
-3655 4395 -0.3015732552854993
-3657 4395  0.1553884953900216
-4395 4395 -18.86848979808568
-3656 4396  0.4028072587377506
-3658 4396 -0.1683908418346857
-4396 4396 -16.18698208267388
-3659 4397 -0.189153266415218
-3661 4397  0.1129564356289893
-4397 4397 -60.01674254260863
-3660 4398  0.1837519366993671
-3662 4398 -0.116110806968134
-4398 4398 -59.89391025874022
-2937 4399  0.3162746729687368
-2938 4399 -0.1802199613520399
-4399 4399 -14.2607056227518
-3664 4400 -0.4996161946091237
-3666 4400  0.2497754017512982
-4400 4400 -9.060706229208979
-3667 4401 -0.4954179106036137
-3669 4401  0.1349748586012675
-4401 4401 -16.28650106020694
-3668 4402  0.4893961341975044
-3670 4402 -0.1293710693667469
-4402 4402 -16.75458882036434
-3671 4403  0.312090482848181
-3673 4403 -0.224373509793738
-4403 4403 -16.27270670749873
-3672 4404 -0.3065265473112019
-3674 4404  0.2293733406665809
-4404 4404 -15.92119246330756
-2942 4405  0.07787860580799441
-3675 4405 -0.420740409235667
-4405 4405 -28.21287469737472
-3676 4406  0.6048601072199059
-3678 4406 -0.1009386461263404
-4406 4406 -21.15680116671571
-2944 4407  0.1290791462085817
-3679 4407 -0.1900856271423472
-4407 4407 -43.60043341376043
-3013 4408  0.1860931450132002
-3014 4408 -0.1309259113047055
-4408 4408 -43.93633027014358
-3683 4409 -0.6351218515149885
-3685 4409 -0.04844675456049608
-4409 4409 -49.07486674953635
-3684 4410  0.6690902668564817
-3686 4410  0.05669785722272903
-4410 4410 -43.91999193831722
-3057 4411 -0.09233746945236616
-3058 4411  0.1745420007093612
-4411 4411 -67.19582273963222
-3688 4412  0.1263791104196366
-3690 4412 -0.2486847965028564
-4412 4412 -47.86048355346968
-3691 4413 -0.3290166805679281
-3693 4413 -0.1656201138879724
-4413 4413 -25.60045733925898
-3692 4414  0.3839594711748638
-3694 4414  0.1940686199464453
-4414 4414 -22.11505592822052
-3695 4415 -0.4508575542320269
-3697 4415 -0.1505442317369579
-4415 4415 -17.94022473260717
-3696 4416  0.3417002020878817
-3698 4416  0.1044236949304687
-4416 4416 -25.43801423175152
-3699 4417  0.5795528708874975
-3701 4417  0.1922551101717189
-4417 4417 -10.89812561676288
-2947 4418 -0.3580379646538538
-2948 4418 -0.1170091683665216
-4418 4418 -18.98202374381732
-3703 4419 -0.4501212252666432
-3705 4419 -0.1599443116995181
-4419 4419 -18.02447912466057
-3704 4420  0.4449142856253013
-3706 4420  0.1612239847733691
-4420 4420 -18.03362624973672
-3125 4421 -0.3246876875018179
-3126 4421 -0.1283198998011498
-4421 4421 -21.9629176261085
-3708 4422  0.4461110896526219
-3710 4422  0.181437742187558
-4422 4422 -15.83184549062703
-3711 4423  0.3866661651573575
-3713 4423 -0.1292478872971707
-4423 4423 -27.27913987608121
-3712 4424 -0.3382766882528339
-3714 4424  0.1150695611031082
-4424 4424 -31.03660494689432
-2953 4425  0.001312335288620512
-3717 4425  5.802495030097443
-4425 4425 -137.3561110785759
-3716 4426  0.01439043946019499
-3718 4426 -3.573289293030423
-4426 4426 -34.52648286375157
-2955 4427  0.0008215588597796699
-2956 4427  4.348249404183824
-4427 4427 -273.003320275044
-3720 4428  0.008760842370192201
-3722 4428 -2.916757791234899
-4428 4428 -63.02258634203405
-2957 4429  0.0007450434486385868
-2958 4429  4.722524623428282
-4429 4429 -270.3252397919941
-3724 4430  0.00890402837809562
-3726 4430 -2.948494780789273
-4430 4430 -58.78525414828648
-3727 4431  0.5235605415859514
-3729 4431  0.1268899830601398
-4431 4431 -14.91535393188155
-2964 4432 -0.1031938954442674
-3728 4432 -0.3456801265779962
-4432 4432 -22.95107272924469
-3731 4433  0.6177638045468883
-3733 4433  0.1907246660453396
-4433 4433 -8.882219892636956
-2951 4434 -0.7248750001072389
-3734 4434 -0.1521679999018858
-4434 4434 -8.38327245565061
-2961 4435  0.1199895291469222
-2962 4435  0.3584223063522697
-4435 4435 -19.7641779034735
-3736 4436 -0.1837226529034885
-3738 4436 -0.5529352148144631
-4436 4436 -13.85918199986529
-3739 4437  0.203001397699786
-3741 4437  0.1990042899232015
-4437 4437 -29.84297186502756
-3155 4438 -0.1531666552012178
-3156 4438 -0.1448367639545626
-4438 4438 -41.37136670068732
-3743 4439  0.4903758402830745
-3745 4439  0.09363089415499334
-4439 4439 -24.74678079539619
-3744 4440 -0.5051458348552837
-3746 4440 -0.08952016961860668
-4440 4440 -25.30084569767662
-3747 4441  0.4838079806998937
-3749 4441  0.1130336965210424
-4441 4441 -20.16790931364713
-3060 4442 -0.08472478198026717
-3748 4442 -0.4155301814520989
-4442 4442 -25.56596459955379
-3751 4443 -0.3081198770448175
-3753 4443 -0.1225474573200583
-4443 4443 -29.25612917202748
-3752 4444  0.3462276026425328
-3754 4444  0.1421494758907888
-4444 4444 -25.80465236855202
-2967 4445  0.2236474776175843
-2968 4445  0.07753429754535195
-4445 4445 -59.28218084687754
-3174 4446 -0.07706700524990751
-3756 4446 -0.2242455162623405
-4446 4446 -59.00633582364991
-2972 4447 -0.1497184219294691
-3759 4447 -0.3103276689217324
-4447 4447 -18.95360574513186
-3760 4448  0.3245871165120744
-3762 4448  0.1444473955056696
-4448 4448 -19.27413566638218
-2969 4449 -0.275851162842649
-2970 4449 -0.1301712404853408
-4449 4449 -23.32199475871141
-3764 4450  0.4653523913028809
-3766 4450  0.2255429562296098
-4450 4450 -13.51862385101003
-2971 4451  0.3221464351873659
-3769 4451  0.1537460316411932
-4451 4451 -19.10980418333428
-3768 4452 -0.333246014529252
-3770 4452 -0.151870667599087
-4452 4452 -18.85936884549872
-3771 4453 -0.6958761831620622
-3773 4453  0.08890596217980609
-4453 4453 -23.31393913523488
-3772 4454  0.5792515598032306
-3774 4454 -0.06474292354903258
-4454 4454 -30.79418778750476
-3775 4455 -0.583252118916699
-3777 4455 -0.1346845367963926
-4455 4455 -16.9883469839645
-3776 4456  0.4566027908873816
-3778 4456  0.1133867662473916
-4456 4456 -21.187092922521
-3779 4457  0.3219996994163201
-3781 4457 -0.1618338407629296
-4457 4457 -26.29693971768204
-3142 4458  0.1015023800057121
-3780 4458 -0.2253988735383735
-4458 4458 -38.95148117385504
-2993 4459 -0.1231955649162646
-2994 4459  0.1654175795499135
-4459 4459 -52.08330111375977
-3784 4460  0.1774720614068959
-3786 4460 -0.264586927583474
-4460 4460 -33.84723329190945
-2976 4461 -0.01614866339436597
-3787 4461 -2.421691173290549
-4461 4461 -43.32944578653592
-3788 4462  1.218330247483215
-3790 4462  0.03932985943560216
-4462 4462 -28.83073740675175
-3791 4463  0.2588732884211721
-3793 4463 -0.200025621410963
-4463 4463 -29.85090627723714
-3792 4464 -0.2444719756141379
-3794 4464  0.1733751342852024
-4464 4464 -33.11942355235149
-3795 4465 -0.5465093405911027
-3797 4465 -0.08653414206433409
-4465 4465 -28.31376341779287
-3796 4466  0.6340994985786209
-3798 4466  0.104793236898054
-4466 4466 -24.52008198179582
-2981 4467  31.84061239984158
-2982 4467 -4.690062449741391e-05
-4467 4467 -707.7212790741364
-3800 4468 -1.282456190904951
-3802 4468  0.1437033518738441
-4468 4468 -8.884240395089975
-2983 4469  28.84769630748797
-2984 4469 -3.429273113850615e-05
-4469 4469 -1074.723247601268
-3804 4470 -1.085554606806667
-3806 4470  0.112469362197627
-4470 4470 -13.01900306964629
-3807 4471  0.9316726753588556
-3809 4471  0.10597916780677
-4471 4471 -13.60038854703725
-3808 4472 -1.03602619980799
-3810 4472 -0.07884724198772149
-4472 4472 -16.14500136718066
-3811 4473  0.7823178849555039
-3813 4473  0.07211115026646071
-4473 4473 -23.41697572970584
-2985 4474 -0.627656447368364
-3814 4474 -0.04271857028849171
-4474 4474 -36.39820196128922
-3034 4475 -0.003326018368451452
-3815 4475 -2.52048241012962
-4475 4475 -128.6511580023181
-2986 4476  0.02752220267509899
-3816 4476  0.7698732969464314
-4476 4476 -45.41658912759856
-3819 4477 -0.350498698842088
-3821 4477 -0.1685104373219622
-4477 4477 -19.71178008061172
-3820 4478  0.3911824002123577
-3822 4478  0.1929820576103272
-4478 4478 -17.36290067611262
-3823 4479 -0.4576521731453182
-3825 4479 -0.1519770413677799
-4479 4479 -16.65598169709429
-3031 4480  0.3690948751451494
-3032 4480  0.1250493243013468
-4480 4480 -20.42677738021681
-3827 4481 -0.3132143614225264
-3829 4481 -0.1163894569624065
-4481 4481 -33.48699900983425
-3828 4482  0.3162981891849708
-3830 4482  0.1148699713737675
-4482 4482 -33.89059592051035
-3831 4483  0.101747472019995
-3833 4483  0.1612480981858468
-4483 4483 -58.01692328951272
-3832 4484 -0.1019983291898142
-3834 4484 -0.1615176016313629
-4484 4484 -57.58794486792134
-3835 4485  0.6529986254582271
-3837 4485  0.04867805021410753
-4485 4485 -40.94469844121115
-2992 4486 -0.03688188469150969
-3836 4486 -0.5589755650604602
-4486 4486 -52.06987938902305
-3839 4487 -0.1988605404170427
-3841 4487 -0.1538546845104868
-4487 4487 -33.06151549792124
-3840 4488  0.3123756862682738
-3842 4488  0.2532649928913371
-4488 4488 -20.45953083972572
-3843 4489  0.267724443820664
-3845 4489  0.2935533450234599
-4489 4489 -17.17816150141144
-2995 4490 -0.1358262986664969
-3846 4490 -0.1982479163957737
-4490 4490 -33.21629444772113
-3847 4491 -0.1320198647588829
-3849 4491  0.1612523570575912
-4491 4491 -44.93527974087805
-3123 4492  0.1297570097955425
-3850 4492 -0.1624424533450675
-4492 4492 -44.99723518348887
-3851 4493 -0.4884348221780255
-3853 4493 -0.1681399935275716
-4493 4493 -15.0104256461242
-3852 4494  0.5346710961640079
-3854 4494  0.1742097470834353
-4494 4494 -13.86141918235845
-3001 4495  6.393149409211086
-3002 4495 -0.000263700452219623
-4495 4495 -663.9661521913164
-3856 4496 -1.800612709103617
-3858 4496  0.02300115730517048
-4496 4496 -49.90751676561297
-3003 4497  13.61387792723724
-3004 4497 -5.586646078540747e-05
-4497 4497 -1427.055298280313
-3860 4498 -1.155212641574131
-3862 4498  0.034635368896544
-4498 4498 -39.91317791668997
-3030 4499  0.3998996708507609
-3863 4499  0.1174995202916021
-4499 4499 -30.30829080397546
-3864 4500 -0.10893918577006
-3866 4500 -0.3911929626725137
-4500 4500 -29.90431021967977
-3000 4501  0.2129755068839117
-3867 4501  0.4574647052145012
-4501 4501 -13.47525275108809
-3868 4502 -0.4059559816217324
-3870 4502 -0.1883580091188502
-4502 4502 -15.12006951497744
-3871 4503 -0.3400741687238894
-3873 4503 -0.2586943980063583
-4503 4503 -13.56000062513
-3872 4504  0.284851484394085
-3874 4504  0.1530160829243506
-4504 4504 -20.92338281670835
-3875 4505  0.4358076176675587
-3877 4505  0.2954837404412515
-4505 4505 -11.29250264561315
-3005 4506 -0.251856105361882
-3006 4506 -0.1612767817800078
-4506 4506 -19.82430901439865
-3879 4507  0.04586169342645511
-3881 4507  0.7532194114725147
-4507 4507 -41.79788680020079
-3037 4508 -0.03372710870745033
-3882 4508 -0.5300049615928422
-4508 4508 -58.51214685233085
-3883 4509 -0.4060288904731509
-3885 4509  0.1587647828723089
-4509 4509 -20.04263715626028
-3884 4510  0.2996883137876917
-3886 4510 -0.1104715392797194
-4510 4510 -28.57642216065699
-3887 4511 -0.32265098291189
-3889 4511 -0.1350370344901572
-4511 4511 -21.49012601492517
-3888 4512  0.3897416427359257
-3890 4512  0.1611418556604022
-4512 4512 -17.88198588321188
-3891 4513 -0.4274289888767369
-3893 4513 -0.1403809995916198
-4513 4513 -18.25413748051501
-3892 4514  0.5271461292678989
-3894 4514  0.179026380922564
-4514 4514 -14.48573493618683
-3895 4515  0.2139280209580445
-3897 4515 -0.1358345190847692
-4515 4515 -44.0223952034774
-3169 4516 -0.1785974349768129
-3170 4516  0.1094120254077982
-4516 4516 -52.48104620642393
-3899 4517 -0.45896252925564
-3901 4517 -0.1590047570659309
-4517 4517 -14.92342134195389
-3016 4518  0.1880559437330346
-3900 4518  0.5273107585383225
-4518 4518 -13.27446957183143
-3903 4519  0.6939032742158369
-3905 4519  0.09862866268575531
-4519 4519 -18.49427815385324
-3130 4520 -0.05312771678709906
-3904 4520 -0.8072266327489762
-4520 4520 -26.21942302703896
-3148 4521  0.1490326692808954
-3907 4521  1.045179501603212
-4521 4521 -7.39806028426747
-3908 4522 -1.074344281788989
-3910 4522 -0.1325528534387045
-4522 4522 -7.707636052561853
-3911 4523 -0.4690349307253373
-3913 4523 -0.3044710922257314
-4523 4523 -8.939428394535847
-3019 4524  0.6244445149825922
-3914 4524  0.2730372422608475
-4524 4524 -8.642130280788454
-3023 4525  0.38440963665635
-3917 4525  0.1464469844197936
-4525 4525 -14.50623659110181
-3916 4526 -0.7085501020623595
-3918 4526 -0.2765679639928379
-4526 4526 -8.772955710261652
-3025 4527  0.3504553942057013
-3921 4527  0.1984683173208769
-4527 4527 -11.53775096265082
-3920 4528 -0.6032800041956174
-3922 4528 -0.4251407410310105
-4528 4528 -6.502242278141505
-3027 4529  0.2927751267701255
-3028 4529  0.2847703740543556
-4529 4529 -8.679115879554525
-3924 4530 -0.5312358475512081
-3926 4530 -0.5372060661120606
-4530 4530 -5.239025768901889
-3020 4531  0.0741326869470682
-3927 4531 -0.9136535932070854
-4531 4531 -21.47326559208629
-3928 4532  0.9638580985212747
-3930 4532 -0.07388768653768076
-4532 4532 -21.61383762226636
-3931 4533  0.1256357268577556
-3933 4533 -0.1905076856767562
-4533 4533 -49.96777929661594
-3932 4534 -0.1726253430003272
-3934 4534  0.2566908442587441
-4534 4534 -36.67569230448201
-3935 4535  0.484327973434509
-3937 4535  0.2554443072620345
-4535 4535 -11.88600697816776
-3936 4536 -0.5273299139143387
-3938 4536 -0.275051179212371
-4536 4536 -10.90493013046683
-3939 4537 -0.4158218221210877
-3941 4537 -0.1627178019259907
-4537 4537 -13.12946138104847
-3940 4538  0.6449471976234391
-3942 4538  0.2617645898062311
-4538 4538 -8.410110269164797
-3943 4539  0.4759321005851245
-3945 4539 -0.2175541085968585
-4539 4539 -12.29662737304278
-3039 4540 -0.4997455661082103
-3946 4540  0.2828527186687306
-4540 4540 -10.10870496831491
-3033 4541  0.5137320980598533
-3949 4541 -0.04862359530866486
-4541 4541 -39.02857243546901
-3062 4542  0.06450870336342215
-3948 4542 -0.4434171700268535
-4542 4542 -33.70459416258378
-3951 4543 -0.3892150799012169
-3953 4543 -0.2649587548673778
-4543 4543 -11.53800954208228
-3952 4544  0.4663001387974148
-3954 4544  0.3020557569469072
-4544 4544 -10.30563379190454
-3043 4545  0.1863477812003617
-3957 4545  0.197228464133999
-4545 4545 -22.73592311892862
-3956 4546 -0.3189037545692974
-3958 4546 -0.4149278867504431
-4546 4546 -13.20295306002752
-3045 4547  0.1745842511613437
-3046 4547  0.2340215743243752
-4547 4547 -19.99502311700704
-3960 4548 -0.3170204710469747
-3962 4548 -0.4419310962134438
-4548 4548 -12.1403872692989
-3963 4549 -0.7186221160452759
-3965 4549 -0.0370584339888418
-4549 4549 -43.0743841807851
-3040 4550  0.05570908603588222
-3964 4550  0.976486147068505
-4550 4550 -30.09884070379308
-3967 4551  0.5203531411457867
-3969 4551  0.1751900754402029
-4551 4551 -15.59699735883546
-3968 4552 -0.3439335715846778
-3970 4552 -0.114079576000751
-4552 4552 -23.95216626455397
-3971 4553 -0.1913856351165074
-3973 4553  0.4235524066745655
-4553 4553 -17.76723538490329
-3972 4554  0.1547028690476915
-3974 4554 -0.3347632371841684
-4554 4554 -22.22333545208209
-3975 4555  0.3388396466565379
-3977 4555  0.09646788709832067
-4555 4555 -28.65423870847926
-3976 4556 -0.3495907295178143
-3978 4556 -0.09046043792228446
-4556 4556 -29.62058200277956
-3049 4557 -0.355429594472465
-3050 4557 -0.1069501833931525
-4557 4557 -23.10572017615622
-3980 4558  0.5744327013188233
-3982 4558  0.1483671939561078
-4558 4558 -14.85325187087084
-3983 4559  0.3997908532391482
-3985 4559  0.1625927915066934
-4559 4559 -21.68107710891621
-3069 4560 -0.2813043833760332
-3986 4560 -0.1158650579574214
-4560 4560 -30.5213668411177
-3987 4561 -0.3428911238355603
-3989 4561 -0.1874409581057452
-4561 4561 -20.55827331727173
-3159 4562  0.3955497753263165
-3990 4562  0.2229181676645297
-4562 4562 -17.63060893722474
-3098 4563  0.1104993737469106
-3991 4563 -0.3305622720110889
-4563 4563 -25.61127524716377
-3061 4564  0.330790297730725
-3994 4564 -0.1086062631708077
-4564 4564 -25.88133772095298
-3063 4565 -0.2436393619121192
-3997 4565 -0.09766669385553386
-4565 4565 -40.2130645920106
-3996 4566  0.245190319901469
-3998 4566  0.09873935578019329
-4566 4566 -40.5384810644213
-3144 4567  0.09294349640346077
-3999 4567  0.2453331394023213
-4567 4567 -45.4076676012495
-3064 4568 -0.09297121279972072
-4000 4568 -0.2470979106274057
-4568 4568 -45.32463217142699
-3067 4569  29.39451847005743
-3068 4569 -4.739757669780065e-05
-4569 4569 -741.2098311583648
-4004 4570 -0.8934591499488236
-4006 4570  0.1005481075389927
-4570 4570 -12.77174729620695
-4007 4571  0.3918218595384317
-4009 4571  0.1137497534022037
-4571 4571 -25.52026560286867
-4008 4572 -0.4094422815228064
-4010 4572 -0.1107176446094387
-4572 4572 -25.19976764447012
-4011 4573 -0.5208327148502887
-4013 4573 -0.1620484550495435
-4573 4573 -13.36559398752397
-4012 4574  0.6508797907773681
-4014 4574  0.1812633458304332
-4574 4574 -10.93453152881607
-3075 4575  0.4093034712580556
-3076 4575  0.1779337399408347
-4575 4575 -10.33116067419111
-4016 4576 -0.8644424052260842
-4018 4576 -0.3743197183632451
-4576 4576 -5.232280705035966
-3077 4577  0.3139432303888887
-4021 4577  0.1982328660095435
-4577 4577 -12.42379991806771
-4020 4578 -0.5386946049506361
-4022 4578 -0.4207031498800982
-4578 4578 -6.810968169618136
-3079 4579  0.3128830755303908
-4025 4579  0.1844874946189381
-4579 4579 -13.59626784548458
-4024 4580 -0.5619603703326097
-4026 4580 -0.3589957638732138
-4580 4580 -7.821157425305699
-3081 4581  0.2781023775271403
-4029 4581  0.244104057346213
-4581 4581 -10.76106835441625
-4028 4582 -0.4989049199151928
-4030 4582 -0.4669064306323872
-4582 4582 -6.232406029477263
-3083 4583  0.2761967663045794
-4033 4583  0.2488990680550784
-4583 4583 -10.60859259802773
-4032 4584 -0.4967941065737113
-4034 4584 -0.4733234783700364
-4584 4584 -6.159803642957836
-3143 4585 -0.2870757766334358
-4037 4585 -0.1113819036710585
-4585 4585 -29.83696730878977
-4036 4586  0.3954580330580911
-4038 4586  0.1603305174549954
-4586 4586 -21.66867981200853
-4039 4587 -0.5751371098165369
-4041 4587 -0.1169152573076894
-4587 4587 -15.45716843571818
-4040 4588  0.640289646401769
-4042 4588  0.1358268290729027
-4588 4588 -13.42102810671431
-4043 4589 -0.3164743577683208
-4045 4589 -0.07698570639890857
-4589 4589 -36.06244598983877
-4044 4590  0.4286015274180359
-4046 4590  0.08923648311862642
-4590 4590 -30.36544494899079
-3095 4591  0.0002603335005947787
-4049 4591  11.14739714497946
-4591 4591 -415.0261506058654
-3093 4592  0.004269479894055767
-4050 4592 -3.455017245353995
-4592 4592 -107.7628213452974
-4051 4593 -0.3821410197975971
-4053 4593  0.1554320934452434
-4593 4593 -17.08211902970442
-4052 4594  0.4261594979710852
-4054 4594 -0.1994726820739341
-4594 4594 -14.32959261684598
-3101 4595  0.0005345093331185485
-3102 4595  5.741407462044037
-4595 4595 -297.1502631714062
-4056 4596  0.01030673789282408
-4058 4596 -3.656761234719059
-4596 4596 -46.29589079798393
-3103 4597  0.0004459490940684492
-4061 4597  9.163761430829846
-4597 4597 -274.346995221428
-4060 4598  0.01266738670347477
-4062 4598 -3.798595591223752
-4598 4598 -41.62150383812094
-3105 4599  0.0006404324352507512
-3106 4599  5.767059286213642
-4599 4599 -269.482318837368
-4064 4600  0.01389738784940935
-4066 4600 -3.482295420824327
-4600 4600 -40.36292797655138
-3107 4601  0.0006385206646629043
-3108 4601  5.650459500989021
-4601 4601 -262.4871457333651
-4068 4602  0.01095829037406584
-4070 4602 -3.913587096844168
-4602 4602 -43.50730408335345
-3109 4603  0.000630922008963406
-3110 4603  5.720222066080108
-4603 4603 -273.5041186801383
-4072 4604  0.01440604931380529
-4074 4604 -3.355021230629743
-4604 4604 -40.20028642953007
-3111 4605  0.0006501703848056444
-3112 4605  5.855434617171491
-4605 4605 -261.5760375061137
-4076 4606  0.01412310998717665
-4078 4606 -3.540289155598937
-4606 4606 -39.17187343467762
-3113 4607  0.0006559175699886972
-3114 4607  5.906181210680177
-4607 4607 -248.2417586977549
-4080 4608  0.01427692261993418
-4082 4608 -3.576670360769923
-4608 4608 -37.08017309753349
-3100 4609  0.1679955867460642
-4083 4609 -0.8800339580088794
-4609 4609 -8.502233418711862
-4084 4610  0.5273258880967172
-4086 4610 -0.1365022792253678
-4610 4610 -14.15927832194173
-3116 4611  0.1201547163932482
-4087 4611  0.2411558997368115
-4611 4611 -33.36501978941855
-3153 4612 -0.2390321723025959
-4090 4612 -0.1226762451022061
-4612 4612 -32.85653123541292
-4091 4613  0.4387937415067642
-4093 4613  0.172735205835799
-4613 4613 -17.10162207087117
-4092 4614 -0.2745073576810962
-4094 4614 -0.125616171688122
-4614 4614 -26.32253245363182
-4095 4615 -0.1666873148413968
-4097 4615  0.1310711442192151
-4615 4615 -65.3094506908532
-3117 4616  0.1242577346967617
-4098 4616 -0.09093864764125277
-4616 4616 -92.13433902811157
-4099 4617  0.3436678702512194
-4101 4617  0.0630421594254459
-4617 4617 -58.09827276913884
-3118 4618 -0.04803578302891041
-4100 4618 -0.2986936630393312
-4618 4618 -71.50178838039953
-3120 4619  0.0368398814399567
-4103 4619 -0.5518900249492503
-4619 4619 -51.64006323574402
-4104 4620  0.7749702805017827
-4106 4620 -0.03343584586800835
-4620 4620 -49.57879692739116
-3119 4621  0.2515624113404345
-4109 4621 -0.09721273839226541
-4621 4621 -38.8690834748674
-3124 4622  0.09567817198065209
-4108 4622 -0.2590299860673196
-4622 4622 -38.73649388997055
-4111 4623 -0.8469720785595276
-4113 4623  0.04524248037789224
-4623 4623 -30.07499672096169
-4112 4624  0.9878581106610489
-4114 4624  0.05102946065995515
-4624 4624 -26.6892215368137
-4115 4625 -0.9858356029493499
-4117 4625 -0.05844088532524816
-4625 4625 -19.69795854475236
-4116 4626  0.83313447077432
-4118 4626  0.08226341234101277
-4626 4626 -16.419717428569
-4119 4627  0.1280409753821415
-4121 4627 -0.2192447310494288
-4627 4627 -48.1812508688813
-4120 4628 -0.09699561503198664
-4122 4628  0.1479853343471768
-4628 4628 -68.5724907725667
-3134 4629  0.0093803456490435
-4123 4629 -1.835179127668628
-4629 4629 -91.97145850968209
-3172 4630  0.0208759382523774
-4124 4630  0.9432303306922852
-4630 4630 -67.81117594819983
-3137 4631  0.3864229560206465
-4129 4631  0.1696359690794401
-4631 4631 -12.03075557160653
-4128 4632 -0.723925405766279
-4130 4632 -0.2824387372225564
-4632 4632 -7.305149508872643
-3139 4633  0.3750998189126993
-3140 4633  0.2154672694474411
-4633 4633 -8.650252245975691
-4132 4634 -0.6821870226709527
-4134 4634 -0.364407399913329
-4634 4634 -5.254056246815064
-3136 4635  0.1563923263725714
-4135 4635 -0.516338327826575
-4635 4635 -14.74425618462372
-4136 4636  0.4432101206282965
-4138 4636 -0.1308230800711792
-4636 4636 -18.14750969243607
-4139 4637 -0.5658283514265692
-4141 4637 -0.1535827179703731
-4637 4637 -12.8677934052044
-4140 4638  0.5743976794403722
-4142 4638  0.1680855058326074
-4638 4638 -12.778764953814
-3151 4639  0.3695300341581737
-3152 4639  0.1905019068065086
-4639 4639 -9.449827385174531
-4144 4640 -0.8666912965737777
-4146 4640 -0.2411266255595946
-4640 4640 -5.864406879133139
-3165 4641  0.0002870401850520609
-4149 4641  14.17811209757292
-4641 4641 -252.032563895863
-4148 4642  0.01095253479461104
-4150 4642 -3.811722119467089
-4642 4642 -37.38065612259518
-3167 4643  0.0003989272310504771
-4153 4643  8.081208632035654
-4643 4643 -370.6027942610422
-4152 4644  0.004781083103017359
-4154 4644 -4.273666966017419
-4644 4644 -99.30572095390851
+1258 1258 7682
+1 1 -1
+1 105 1
+1 882 -0.0272284
+1 883 0.324344
+1 935 -0.0668099
+2 2 -1
+2 106 1
+2 882 -0.0691867
+2 884 0.88477
+2 936 -0.155711
+3 3 -1
+3 107 1
+3 882 -0.118567
+3 885 0.917368
+3 937 -0.119677
+4 4 -1
+4 108 1
+4 882 -0.148731
+4 886 1
+4 938 -0.148731
+5 5 -1
+5 109 1
+5 882 -0.0809082
+5 887 0.95243
+5 939 -0.155931
+6 6 -1
+6 110 1
+6 882 -0.150521
+6 888 1
+6 940 -0.174949
+7 7 -1
+7 111 1
+7 882 -0.0539074
+7 889 0.620342
+7 941 -0.0814787
+8 8 -1
+8 112 1
+8 882 -0.0558806
+8 890 0.643433
+8 942 -0.0889036
+9 9 -1
+9 289 1
+10 10 -1
+10 114 1
+10 882 -0.0302074
+10 892 0.325091
+11 11 -1
+11 115 1
+11 882 -0.0597808
+11 893 0.643358
+12 12 -1
+12 116 1
+12 882 -0.0393616
+12 894 0.423608
+13 13 -1
+13 293 1
+14 14 -1
+14 118 1
+14 882 -0.0310215
+14 896 0.333851
+15 15 -1
+15 295 1
+16 16 -1
+16 296 1
+17 17 -1
+17 297 1
+18 18 -1
+18 298 1
+19 19 -1
+19 299 1
+20 20 -1
+20 300 1
+21 21 -1
+21 301 1
+22 22 -1
+22 302 1
+23 23 -1
+23 303 1
+24 24 -1
+24 304 1
+25 25 -1
+25 305 1
+26 26 -1
+26 306 1
+27 27 -1
+27 307 1
+28 28 -1
+28 308 1
+29 29 -1
+29 309 1
+30 30 -1
+30 310 1
+31 31 -1
+31 311 1
+32 32 -1
+32 136 1
+32 882 -0.0367834
+32 914 0.395861
+33 33 -1
+33 313 1
+34 34 -1
+34 138 1
+34 882 -0.0744019
+34 916 0.800709
+35 35 -1
+35 315 1
+36 36 -1
+36 316 1
+37 37 -1
+37 317 1
+38 38 -1
+38 318 1
+39 39 -1
+39 319 1
+40 40 -1
+40 320 1
+41 41 -1
+41 321 1
+42 42 -1
+42 322 1
+43 43 -1
+43 323 1
+44 44 -1
+44 148 1
+44 882 -0.0603907
+44 926 0.62276
+45 45 -1
+45 325 1
+46 46 -1
+46 326 1
+47 47 -1
+47 327 1
+48 48 -1
+48 328 1
+49 49 -1
+49 153 1
+50 50 -1
+50 330 1
+51 51 -1
+51 331 1
+52 52 -1
+52 332 1
+53 1 -1
+53 53 1
+53 882 0.0192316
+53 883 -0.175656
+53 935 -0.0668099
+54 2 -0.480547
+54 54 1
+54 882 0.0132125
+54 884 -0.0748264
+54 936 -0.0748264
+55 3 -0.314153
+55 55 1
+55 882 0.00921186
+55 885 -0.0375969
+55 937 -0.0375969
+56 4 -0.236647
+56 56 1
+56 882 0.0112634
+56 886 -0.0351966
+56 938 -0.0351966
+57 5 -0.451117
+57 57 1
+57 882 0.00996095
+57 887 -0.0703429
+57 939 -0.0703429
+58 6 -0.248166
+58 58 1
+58 882 0.00910574
+58 888 -0.0453067
+58 940 -0.0434164
+59 7 -0.712432
+59 59 1
+59 882 0.0080546
+59 889 -0.058048
+59 941 -0.058048
+60 8 -0.64291
+60 60 1
+60 882 0.0105338
+60 890 -0.0863306
+60 942 -0.057157
+61 9 -0.185612
+61 61 1
+61 882 0.0477969
+61 891 -0.514388
+62 10 -0.310143
+62 62 1
+62 882 0.0370914
+62 892 -0.399175
+63 11 -0.342239
+63 63 1
+63 882 0.0260007
+63 893 -0.279818
+64 12 -0.525587
+64 64 1
+64 882 0.035064
+64 894 -0.377357
+65 13 -0.450577
+65 65 1
+65 882 0.00459242
+65 895 -0.0494234
+66 14 -0.396895
+66 66 1
+66 882 0.0341477
+66 896 -0.367496
+67 15 -0.411692
+67 67 1
+67 882 0.00820557
+67 897 -0.0883079
+68 16 -0.426883
+68 68 1
+68 882 0.0253781
+68 898 -0.273117
+69 17 -0.337349
+69 69 1
+69 882 0.0151135
+69 899 -0.162651
+70 18 -0.39165
+70 70 1
+70 882 0.0100679
+70 900 -0.10835
+71 19 -0.421632
+71 71 1
+71 882 0.0165739
+71 901 -0.178368
+72 20 -0.357698
+72 72 1
+72 882 0.0132227
+72 902 -0.142301
+73 21 -0.5
+73 73 1
+73 882 0.018584
+73 903 -0.2
+74 22 -0.542937
+74 74 1
+74 882 0.0145942
+74 904 -0.157063
+75 23 -0.253654
+75 75 1
+75 882 0.0321825
+75 905 -0.346346
+76 24 -0.431501
+76 76 1
+76 882 0.015657
+76 906 -0.168499
+77 25 -0.356604
+77 77 1
+77 882 0.0133244
+77 907 -0.143396
+78 26 -0.278988
+78 78 1
+78 882 0.0205364
+78 908 -0.221012
+79 27 -0.368863
+79 79 1
+79 882 0.0121853
+79 909 -0.131137
+80 28 -0.396756
+80 80 1
+80 882 0.0188854
+80 910 -0.203244
+81 29 -0.23717
+81 81 1
+81 882 0.0244222
+81 911 -0.26283
+82 30 -0.379337
+82 82 1
+82 882 0.020504
+82 912 -0.220663
+83 31 -0.294478
+83 83 1
+83 882 0.0190971
+83 913 -0.205522
+84 32 -1
+84 84 1
+84 882 0.0282606
+84 914 -0.304139
+85 33 -0.449828
+85 85 1
+85 882 0.023246
+85 915 -0.250172
+86 34 -0.215086
+86 86 1
+86 882 0.0304572
+86 916 -0.327779
+87 35 -0.213124
+87 87 1
+87 882 0.0359485
+87 917 -0.386876
+88 36 -0.313853
+88 88 1
+88 882 0.0265888
+88 918 -0.286147
+89 37 -0.150698
+89 89 1
+89 882 0.0324571
+89 919 -0.349302
+90 38 -0.295086
+90 90 1
+90 882 0.0190406
+90 920 -0.204914
+91 39 -0.336836
+91 91 1
+91 882 0.0151612
+91 921 -0.163164
+92 40 -0.355496
+92 92 1
+92 882 0.0134273
+92 922 -0.144504
+93 41 -0.380661
+93 93 1
+93 882 0.011089
+93 923 -0.119339
+94 42 -0.150226
+94 94 1
+94 882 0.041793
+94 924 -0.449774
+95 43 -0.37766
+95 95 1
+95 882 0.0113678
+95 925 -0.12234
+96 44 -0.5
+96 96 1
+96 882 0.0162646
+96 926 -0.167724
+97 45 -0.186516
+97 97 1
+97 882 0.0291289
+97 927 -0.313484
+98 46 -0.265
+98 98 1
+98 882 0.0218362
+98 928 -0.235
+99 47 -0.38173
+99 99 1
+99 882 0.0109896
+99 929 -0.11827
+100 48 -0.490919
+100 100 1
+100 882 0.000843779
+100 930 -0.00908071
+101 49 -0.240662
+101 101 1
+101 882 0.04646
+101 931 -0.5
+102 50 -0.25
+102 102 1
+102 882 0.02323
+102 932 -0.25
+103 51 -0.5
+103 103 1
+104 52 -0.5
+104 104 1
+105 105 -1
+106 106 -1
+107 107 -1
+108 108 -1
+109 109 -1
+110 110 -1
+111 111 -1
+112 112 -1
+113 113 -1
+114 114 -1
+115 115 -1
+116 116 -1
+117 117 -1
+118 118 -1
+119 119 -1
+120 120 -1
+121 121 -1
+122 122 -1
+123 123 -1
+124 124 -1
+125 125 -1
+126 126 -1
+127 127 -1
+128 128 -1
+129 129 -1
+130 130 -1
+131 131 -1
+132 132 -1
+133 133 -1
+134 134 -1
+135 135 -1
+136 136 -1
+137 137 -1
+138 138 -1
+139 139 -1
+140 140 -1
+141 141 -1
+142 142 -1
+143 143 -1
+144 144 -1
+145 145 -1
+146 146 -1
+147 147 -1
+148 148 -1
+149 149 -1
+150 150 -1
+151 151 -1
+152 152 -1
+153 153 -1
+154 154 -1
+155 155 -1
+156 156 -1
+157 157 -1
+158 158 -1
+159 159 -1
+160 160 -1
+161 161 -1
+162 162 -1
+163 163 -1
+164 164 -1
+165 165 -1
+166 166 -1
+167 167 -1
+168 168 -1
+169 169 -1
+170 170 -1
+171 171 -1
+172 172 -1
+173 173 -1
+174 174 -1
+175 175 -1
+176 176 -1
+177 177 -1
+178 178 -1
+179 179 -1
+180 180 -1
+181 181 -1
+182 182 -1
+183 183 -1
+184 184 -1
+185 185 -1
+186 186 -1
+187 187 -1
+188 188 -1
+189 189 -1
+190 190 -1
+191 191 -1
+192 192 -1
+193 193 -1
+194 194 -1
+195 195 -1
+196 196 -1
+197 197 -1
+198 198 -1
+199 199 -1
+200 200 -1
+201 201 -1
+202 202 -1
+203 203 -1
+204 204 -1
+205 205 -1
+206 206 -1
+207 207 -1
+208 208 -1
+209 1 -0.490612
+209 209 1
+209 774 -0.176719
+209 775 0.00510588
+209 776 0.0304883
+209 777 0.000295542
+209 778 0.000182222
+209 779 0.0855255
+209 780 0.0300062
+209 781 0.0120144
+209 782 0.0375191
+209 783 0.137524
+210 1 -0.356657
+210 210 1
+210 774 0.158026
+210 775 -0.0228243
+210 776 -0.136289
+210 777 -0.00132113
+210 778 -0.000814568
+210 779 -0.382316
+210 780 -0.134134
+210 781 0.00873402
+210 782 0.027275
+210 783 0.0999746
+211 1 -0.356657
+211 211 1
+211 774 0.158026
+211 775 -0.0228243
+211 776 -0.136289
+211 777 -0.00132113
+211 778 -0.000814568
+211 779 -0.382316
+211 780 -0.134134
+211 781 0.00873402
+211 782 0.027275
+211 783 0.0999746
+212 1 -0.356657
+212 212 1
+212 774 0.158026
+212 775 -0.0228243
+212 776 -0.136289
+212 777 -0.00132113
+212 778 -0.000814568
+212 779 -0.382316
+212 780 -0.134134
+212 781 0.00873402
+212 782 0.027275
+212 783 0.0999746
+213 1 -0.356657
+213 213 1
+213 774 0.158026
+213 775 -0.0228243
+213 776 -0.136289
+213 777 -0.00132113
+213 778 -0.000814568
+213 779 -0.382316
+213 780 -0.134134
+213 781 0.00873402
+213 782 0.027275
+213 783 0.0999746
+214 1 -0.356657
+214 214 1
+214 774 0.158026
+214 775 -0.0228243
+214 776 -0.136289
+214 777 -0.00132113
+214 778 -0.000814568
+214 779 -0.382316
+214 780 -0.134134
+214 781 0.00873402
+214 782 0.027275
+214 783 0.0999746
+215 1 -0.356657
+215 215 1
+215 774 0.158026
+215 775 -0.0228243
+215 776 -0.136289
+215 777 -0.00132113
+215 778 -0.000814568
+215 779 -0.382316
+215 780 -0.134134
+215 781 0.00873402
+215 782 0.027275
+215 783 0.0999746
+216 1 -1
+216 216 1
+216 774 0.0101472
+216 775 0.000238343
+216 776 0.0014232
+216 777 1.37959e-05
+216 778 8.50612e-06
+216 779 0.00399233
+216 780 0.00140069
+216 781 -0.0124316
+216 782 0.00175139
+216 783 0.0064196
+217 1 -1
+217 217 1
+217 774 0.157218
+217 775 0.00369282
+217 776 0.0220507
+217 777 0.000213751
+217 778 0.000131792
+217 779 0.0618562
+217 780 0.0217019
+217 781 0.0086894
+217 782 -0.0751193
+217 783 -0.275344
+218 1 -1
+218 218 1
+218 774 0.157218
+218 775 0.00369282
+218 776 0.0220507
+218 777 0.000213751
+218 778 0.000131792
+218 779 0.0618562
+218 780 0.0217019
+218 781 0.0086894
+218 782 -0.0751193
+218 783 -0.275344
+219 2 -0.480547
+219 219 1
+219 774 -0.120991
+219 775 0.0154453
+219 776 0.00642393
+219 777 0.00151259
+219 778 0.00161095
+219 779 0.0600357
+219 780 0.0236527
+219 781 0.0236125
+219 782 0.0426199
+219 783 0.0358883
+220 2 -0.480547
+220 220 1
+220 774 0.133823
+220 775 -0.0784193
+220 776 -0.0326157
+220 777 -0.00767973
+220 778 -0.00817914
+220 779 -0.304814
+220 780 -0.12009
+220 781 0.019263
+220 782 0.0347692
+220 783 0.0292776
+221 2 -0.480547
+221 221 1
+221 774 0.133823
+221 775 -0.0784193
+221 776 -0.0326157
+221 777 -0.00767973
+221 778 -0.00817914
+221 779 -0.304814
+221 780 -0.12009
+221 781 0.019263
+221 782 0.0347692
+221 783 0.0292776
+222 2 -0.480547
+222 222 1
+222 774 0.133823
+222 775 -0.0784193
+222 776 -0.0326157
+222 777 -0.00767973
+222 778 -0.00817914
+222 779 -0.304814
+222 780 -0.12009
+222 781 0.019263
+222 782 0.0347692
+222 783 0.0292776
+223 2 -0.480547
+223 223 1
+223 774 0.133823
+223 775 -0.0784193
+223 776 -0.0326157
+223 777 -0.00767973
+223 778 -0.00817914
+223 779 -0.304814
+223 780 -0.12009
+223 781 0.019263
+223 782 0.0347692
+223 783 0.0292776
+224 2 -0.480547
+224 224 1
+224 774 0.133823
+224 775 -0.0784193
+224 776 -0.0326157
+224 777 -0.00767973
+224 778 -0.00817914
+224 779 -0.304814
+224 780 -0.12009
+224 781 0.019263
+224 782 0.0347692
+224 783 0.0292776
+225 2 -0.480547
+225 225 1
+225 774 0.133823
+225 775 -0.0784193
+225 776 -0.0326157
+225 777 -0.00767973
+225 778 -0.00817914
+225 779 -0.304814
+225 780 -0.12009
+225 781 0.019263
+225 782 0.0347692
+225 783 0.0292776
+226 2 -0.4982
+226 226 1
+226 774 0.0313792
+226 775 0.00295454
+226 776 0.00122884
+226 777 0.000289343
+226 778 0.000308159
+226 779 0.0114842
+226 780 0.00452453
+226 781 -0.0339904
+226 782 0.00815277
+226 783 0.00686508
+227 2 -0.403449
+227 227 1
+227 774 0.112353
+227 775 0.0105787
+227 776 0.00439983
+227 777 0.00103599
+227 778 0.00110336
+227 779 0.0411191
+227 780 0.0162
+227 781 0.0161725
+227 782 -0.27882
+227 783 0.0245803
+228 2 -0.480547
+228 228 1
+228 774 0.0526932
+228 775 0.00496139
+228 776 0.00206351
+228 777 0.000485877
+228 778 0.000517473
+228 779 0.0192848
+228 780 0.00759777
+228 781 0.00758487
+228 782 0.0136905
+228 783 -0.144301
+229 3 -0.288579
+229 229 1
+229 774 -0.0281186
+229 775 -0.0809218
+229 776 -0.0331206
+229 777 -0.0904523
+229 778 -0.203154
+229 779 -0.318682
+229 780 -0.104795
+229 781 0.0194124
+229 782 0.127631
+229 783 0.300771
+230 3 -0.550007
+230 230 1
+230 774 -0.0535916
+230 775 -0.15423
+230 776 -0.063125
+230 777 -0.172394
+230 778 -0.387195
+230 779 -0.607381
+230 780 -0.199731
+230 781 0.0369984
+230 782 0.243254
+230 783 0.573244
+231 3 -0.288579
+231 231 1
+231 774 -0.0281186
+231 775 -0.0809218
+231 776 -0.0331206
+231 777 -0.0904523
+231 778 -0.203154
+231 779 -0.318682
+231 780 -0.104795
+231 781 0.0194124
+231 782 0.127631
+231 783 0.300771
+232 3 -0.45548
+232 232 1
+232 774 -0.0443811
+232 775 -0.127723
+232 776 -0.0522761
+232 777 -0.142766
+232 778 -0.32065
+232 779 -0.502993
+232 780 -0.165404
+232 781 0.0306397
+232 782 0.201447
+232 783 0.474724
+233 3 -0.288579
+233 233 1
+233 774 -0.0281186
+233 775 -0.0809218
+233 776 -0.0331206
+233 777 -0.0904523
+233 778 -0.203154
+233 779 -0.318682
+233 780 -0.104795
+233 781 0.0194124
+233 782 0.127631
+233 783 0.300771
+234 214 -0.135764
+234 224 -0.904153
+234 234 -1
+234 244 -1
+234 254 -1
+234 264 -1
+234 274 -0.420563
+234 284 -1
+234 615 1
+235 3 -0.288579
+235 235 1
+235 774 -0.0281186
+235 775 -0.0809218
+235 776 -0.0331206
+235 777 -0.0904523
+235 778 -0.203154
+235 779 -0.318682
+235 780 -0.104795
+235 781 0.0194124
+235 782 0.127631
+235 783 0.300771
+236 3 -1
+236 236 1
+236 774 0.003544
+236 775 0.0101992
+236 776 0.00417444
+236 777 0.0114004
+236 778 0.0256051
+236 779 0.0401659
+236 780 0.0132081
+236 781 -0.0398166
+236 782 0.0101289
+236 783 0.0238693
+237 3 -0.697099
+237 237 1
+237 774 0.0382777
+237 775 0.110158
+237 776 0.0450868
+237 777 0.123132
+237 778 0.276553
+237 779 0.43382
+237 780 0.142657
+237 781 0.0166393
+237 782 -0.203954
+237 783 -0.480631
+238 3 -0.669703
+238 238 1
+238 774 0.0367733
+238 775 0.105829
+238 776 0.0433149
+238 777 0.118293
+238 778 0.265684
+238 779 0.41677
+238 780 0.13705
+238 781 0.0159854
+238 782 -0.195938
+238 783 -0.461742
+239 4 -0.236647
+239 239 1
+239 774 -0.130794
+239 775 0.0606839
+239 776 0.0320154
+239 777 0.00287214
+239 778 0.00224143
+239 779 0.0606839
+239 780 0.0313052
+239 781 0.00931949
+239 782 0.0686807
+239 783 0.0325047
+240 210 -0.019387
+240 220 -0.556394
+240 230 -1
+240 240 -1
+240 250 -1
+240 260 -1
+240 270 -0.826905
+240 280 -0.425814
+240 611 1
+241 211 -0.040263
+241 221 -0.0804856
+241 231 -0.271311
+241 241 -1
+241 251 -0.20782
+241 261 -0.473692
+241 271 -0.132407
+241 281 -0.0825791
+241 612 1
+242 4 -0.236647
+242 242 1
+242 774 0.125826
+242 775 -0.236647
+242 776 0.0261181
+242 777 -0.0112004
+242 778 -0.00874083
+242 779 -0.236647
+242 780 -0.12208
+242 781 0.00760281
+242 782 0.0560295
+242 783 0.0265173
+243 4 -0.236647
+243 243 1
+243 774 0.125826
+243 775 -0.236647
+243 776 0.0261181
+243 777 -0.0112004
+243 778 -0.00874083
+243 779 -0.236647
+243 780 -0.12208
+243 781 0.00760281
+243 782 0.0560295
+243 783 0.0265173
+244 4 -0.418068
+244 244 1
+244 774 0.222288
+244 775 -0.418068
+244 776 0.0461411
+244 777 -0.019787
+244 778 -0.0154419
+244 779 -0.418068
+244 780 -0.21567
+244 781 0.0134314
+244 782 0.0989837
+244 783 0.0468463
+245 4 -0.236647
+245 245 1
+245 774 0.125826
+245 775 -0.236647
+245 776 0.0261181
+245 777 -0.0112004
+245 778 -0.00874083
+245 779 -0.236647
+245 780 -0.12208
+245 781 0.00760281
+245 782 0.0560295
+245 783 0.0265173
+246 4 -0.236647
+246 246 1
+246 774 0.0284585
+246 775 0.0111969
+246 776 0.00590724
+246 777 0.000529946
+246 778 0.000413572
+246 779 0.0111969
+246 780 0.0057762
+246 781 -0.0354232
+246 782 0.0126724
+246 783 0.00599752
+247 4 -0.51253
+247 247 1
+247 774 0.272513
+247 775 0.10722
+247 776 0.0565666
+247 777 0.00507466
+247 778 0.00396029
+247 779 0.10722
+247 780 0.0553118
+247 781 0.0164662
+247 782 -0.67322
+247 783 0.0574311
+248 4 -0.251139
+248 248 1
+248 774 0.0525784
+248 775 0.0206868
+248 776 0.0109139
+248 777 0.000979098
+248 778 0.000764092
+248 779 0.0206868
+248 780 0.0106718
+248 781 0.00317696
+248 782 0.0234129
+248 783 -0.154292
+249 5 -0.159645
+249 249 1
+249 774 -0.176719
+249 775 0.175403
+249 776 0.0463324
+249 777 0.100047
+249 778 0.183705
+249 779 0.0104931
+249 780 0.11349
+249 781 0.00205281
+249 782 0.0346089
+249 783 0.0311976
+250 5 -1
+250 250 1
+250 774 0.399752
+250 775 -0.954805
+250 776 -0.252211
+250 777 -0.544607
+250 778 -1
+250 779 0.0657277
+250 780 -0.617784
+250 781 0.0128586
+250 782 0.216786
+250 783 0.195418
+251 5 -0.442072
+251 251 1
+251 774 0.176719
+251 775 -0.422093
+251 776 -0.111495
+251 777 -0.240756
+251 778 -0.442072
+251 779 0.0290564
+251 780 -0.273105
+251 781 0.00568442
+251 782 0.0958351
+251 783 0.0863889
+252 5 -0.442072
+252 252 1
+252 774 0.176719
+252 775 -0.422093
+252 776 -0.111495
+252 777 -0.240756
+252 778 -0.442072
+252 779 0.0290564
+252 780 -0.273105
+252 781 0.00568442
+252 782 0.0958351
+252 783 0.0863889
+253 5 -0.442072
+253 253 1
+253 774 0.176719
+253 775 -0.422093
+253 776 -0.111495
+253 777 -0.240756
+253 778 -0.442072
+253 779 0.0290564
+253 780 -0.273105
+253 781 0.00568442
+253 782 0.0958351
+253 783 0.0863889
+254 5 -0.834293
+254 254 1
+254 774 0.00999785
+254 775 0.0274787
+254 776 0.00725846
+254 777 0.0156734
+254 778 0.0287793
+254 779 -0.0798795
+254 780 0.0177794
+254 781 0.000321595
+254 782 0.00542185
+254 783 0.00488743
+255 5 -0.442072
+255 255 1
+255 774 0.176719
+255 775 -0.422093
+255 776 -0.111495
+255 777 -0.240756
+255 778 -0.442072
+255 779 0.0290564
+255 780 -0.273105
+255 781 0.00568442
+255 782 0.0958351
+255 783 0.0863889
+256 5 -0.451117
+256 256 1
+256 774 0.00295854
+256 775 0.00813141
+256 776 0.0021479
+256 777 0.00463804
+256 778 0.0085163
+256 779 0.000486446
+256 780 0.00526124
+256 781 -0.0077802
+256 782 0.00160442
+256 783 0.00144627
+257 5 -0.917677
+257 257 1
+257 774 0.0357191
+257 775 0.0981723
+257 776 0.0259321
+257 777 0.0559961
+257 778 0.102819
+257 779 0.00587298
+257 780 0.0635201
+257 781 0.00114895
+257 782 -0.19304
+257 783 0.0174612
+258 5 -0.451117
+258 258 1
+258 774 0.017559
+258 775 0.0482601
+258 776 0.0127479
+258 777 0.0275269
+258 778 0.0505445
+258 779 0.00288707
+258 780 0.0312256
+258 781 0.000564809
+258 782 0.00952226
+258 783 -0.104055
+259 6 -0.248166
+259 259 1
+259 774 -0.00533653
+259 775 -0.0430944
+259 776 -0.0276397
+259 777 -0.0461556
+259 778 -0.156262
+259 779 0.0307867
+259 780 -0.164107
+259 781 0.00396438
+259 782 0.0796086
+259 783 0.0654325
+260 6 -0.526981
+260 260 1
+260 774 -0.0113321
+260 775 -0.091511
+260 776 -0.0586929
+260 777 -0.0980114
+260 778 -0.331821
+260 779 0.0653755
+260 780 -0.348481
+260 781 0.00841835
+260 782 0.169049
+260 783 0.138946
+261 6 -0.248166
+261 261 1
+261 774 -0.00533653
+261 775 -0.0430944
+261 776 -0.0276397
+261 777 -0.0461556
+261 778 -0.156262
+261 779 0.0307867
+261 780 -0.164107
+261 781 0.00396438
+261 782 0.0796086
+261 783 0.0654325
+262 6 -0.418163
+262 262 1
+262 774 -0.00899212
+262 775 -0.0726146
+262 776 -0.0465732
+262 777 -0.0777727
+262 778 -0.263302
+262 779 0.0518759
+262 780 -0.276522
+262 781 0.00668002
+262 782 0.134141
+262 783 0.110255
+263 6 -0.248166
+263 263 1
+263 774 -0.00533653
+263 775 -0.0430944
+263 776 -0.0276397
+263 777 -0.0461556
+263 778 -0.156262
+263 779 0.0307867
+263 780 -0.164107
+263 781 0.00396438
+263 782 0.0796086
+263 783 0.0654325
+264 6 -1
+264 264 1
+264 774 0.00338455
+264 775 0.0273314
+264 776 0.0175297
+264 777 0.0292729
+264 778 0.0991045
+264 779 -0.173908
+264 780 0.10408
+264 781 0.000478883
+264 782 0.00961645
+264 783 0.00790403
+265 6 -0.60407
+265 265 1
+265 774 -0.0129898
+265 775 -0.104898
+265 776 -0.0672787
+265 777 -0.112349
+265 778 -0.380361
+265 779 0.0749389
+265 780 -0.399458
+265 781 0.00964982
+265 782 0.193778
+265 783 0.159271
+266 6 -0.248166
+266 266 1
+266 774 0.000459667
+266 775 0.00371198
+266 776 0.00238077
+266 777 0.00397565
+266 778 0.0134597
+266 779 0.00050508
+266 780 0.0141355
+266 781 -0.00781032
+266 782 0.00130604
+266 783 0.00107347
+267 6 -0.862353
+267 267 1
+267 774 0.00948
+267 775 0.0765544
+267 776 0.0491001
+267 777 0.0819923
+267 778 0.277588
+267 779 0.0104166
+267 780 0.291525
+267 781 0.00134134
+267 782 -0.335907
+267 783 0.0221389
+268 6 -0.288952
+268 268 1
+268 774 0.0031765
+268 775 0.0256514
+268 776 0.0164522
+268 777 0.0274735
+268 778 0.0930126
+268 779 0.00349033
+268 780 0.0976825
+268 781 0.000449447
+268 782 0.00902534
+268 783 -0.123733
+269 7 -0.712432
+269 269 1
+269 774 -0.0298816
+269 775 -0.0312565
+269 776 -0.01439
+269 777 -0.00911597
+269 778 -0.032305
+269 779 -0.0380249
+269 780 -0.117919
+269 781 0.00339877
+269 782 0.0660878
+269 783 0.09081
+270 7 -0.712432
+270 270 1
+270 774 -0.0298816
+270 775 -0.0312565
+270 776 -0.01439
+270 777 -0.00911597
+270 778 -0.032305
+270 779 -0.0380249
+270 780 -0.117919
+270 781 0.00339877
+270 782 0.0660878
+270 783 0.09081
+271 7 -0.712432
+271 271 1
+271 774 -0.0298816
+271 775 -0.0312565
+271 776 -0.01439
+271 777 -0.00911597
+271 778 -0.032305
+271 779 -0.0380249
+271 780 -0.117919
+271 781 0.00339877
+271 782 0.0660878
+271 783 0.09081
+272 7 -0.712432
+272 272 1
+272 774 -0.0298816
+272 775 -0.0312565
+272 776 -0.01439
+272 777 -0.00911597
+272 778 -0.032305
+272 779 -0.0380249
+272 780 -0.117919
+272 781 0.00339877
+272 782 0.0660878
+272 783 0.09081
+273 7 -0.712432
+273 273 1
+273 774 -0.0298816
+273 775 -0.0312565
+273 776 -0.01439
+273 777 -0.00911597
+273 778 -0.032305
+273 779 -0.0380249
+273 780 -0.117919
+273 781 0.00339877
+273 782 0.0660878
+273 783 0.09081
+274 7 -0.712432
+274 274 1
+274 774 -0.0298816
+274 775 -0.0312565
+274 776 -0.01439
+274 777 -0.00911597
+274 778 -0.032305
+274 779 -0.0380249
+274 780 -0.117919
+274 781 0.00339877
+274 782 0.0660878
+274 783 0.09081
+275 7 -0.712432
+275 275 1
+275 774 -0.0298816
+275 775 -0.0312565
+275 776 -0.01439
+275 777 -0.00911597
+275 778 -0.032305
+275 779 -0.0380249
+275 780 -0.117919
+275 781 0.00339877
+275 782 0.0660878
+275 783 0.09081
+276 7 -0.712432
+276 276 1
+276 774 0.00284002
+276 775 0.00297068
+276 776 0.00136766
+276 777 0.000866402
+276 778 0.00307034
+276 779 0.00361397
+276 780 0.0112073
+276 781 -0.0078196
+276 782 0.00108422
+276 783 0.00148981
+277 7 -1
+277 277 1
+277 774 0.0236592
+277 775 0.0247477
+277 776 0.0113935
+277 777 0.00721769
+277 778 0.0255779
+277 779 0.0301067
+277 780 0.0933639
+277 781 0.000464513
+277 782 -0.137533
+277 783 0.0124111
+278 7 -0.712432
+278 278 1
+278 774 0.0168556
+278 775 0.0176311
+278 776 0.0081171
+278 777 0.00514212
+278 778 0.0182225
+278 779 0.021449
+278 780 0.0665155
+278 781 0.000330934
+278 782 0.0064349
+278 783 -0.103797
+279 8 -0.64291
+279 279 1
+279 774 -0.0219829
+279 775 -0.06257
+279 776 -0.0348886
+279 777 -0.251155
+279 778 -0.325078
+279 779 0.0438946
+279 780 -0.122006
+279 781 0.00181639
+279 782 0.0994723
+279 783 0.168283
+280 8 -0.64291
+280 280 1
+280 774 -0.0219829
+280 775 -0.06257
+280 776 -0.0348886
+280 777 -0.251155
+280 778 -0.325078
+280 779 0.0438946
+280 780 -0.122006
+280 781 0.00181639
+280 782 0.0994723
+280 783 0.168283
+281 8 -0.64291
+281 281 1
+281 774 -0.0219829
+281 775 -0.06257
+281 776 -0.0348886
+281 777 -0.251155
+281 778 -0.325078
+281 779 0.0438946
+281 780 -0.122006
+281 781 0.00181639
+281 782 0.0994723
+281 783 0.168283
+282 8 -0.814131
+282 282 1
+282 774 -0.0278374
+282 775 -0.0792337
+282 776 -0.0441802
+282 777 -0.318043
+282 778 -0.411653
+282 779 0.0555847
+282 780 -0.154499
+282 781 0.00230014
+282 782 0.125964
+282 783 0.213101
+283 8 -0.64291
+283 283 1
+283 774 -0.0219829
+283 775 -0.06257
+283 776 -0.0348886
+283 777 -0.251155
+283 778 -0.325078
+283 779 0.0438946
+283 780 -0.122006
+283 781 0.00181639
+283 782 0.0994723
+283 783 0.168283
+284 8 -1
+284 284 1
+284 774 0.00274535
+284 775 0.00781409
+284 776 0.00435709
+284 777 0.0313657
+284 778 0.0405976
+284 779 -0.0665182
+284 780 0.0152368
+284 781 8.46948e-05
+284 782 0.0046382
+284 783 0.00784672
+285 8 -0.64291
+285 285 1
+285 774 -0.0219829
+285 775 -0.06257
+285 776 -0.0348886
+285 777 -0.251155
+285 778 -0.325078
+285 779 0.0438946
+285 780 -0.122006
+285 781 0.00181639
+285 782 0.0994723
+285 783 0.168283
+286 8 -0.64291
+286 286 1
+286 774 0.000965935
+286 775 0.00274934
+286 776 0.00153302
+286 777 0.0110358
+286 778 0.014284
+286 779 0.000720127
+286 780 0.00536099
+286 781 -0.00784556
+286 782 0.00163192
+286 783 0.00276082
+287 8 -0.755752
+287 287 1
+287 774 0.00673906
+287 775 0.0191814
+287 776 0.0106954
+287 777 0.076994
+287 778 0.0996556
+287 779 0.00502412
+287 780 0.0374021
+287 781 0.000207902
+287 782 -0.11136
+287 783 0.0192615
+288 8 -0.64291
+288 288 1
+288 774 0.00573284
+288 775 0.0163174
+288 776 0.00909849
+288 777 0.065498
+288 778 0.084776
+288 779 0.00427397
+288 780 0.0318176
+288 781 0.00017686
+288 782 0.00968551
+288 783 -0.0962532
+289 289 -1
+289 620 1
+290 10 -1
+290 290 1
+291 11 -1
+291 291 1
+292 12 -1
+292 292 1
+293 293 -1
+293 624 1
+294 14 -0.396895
+294 294 1
+295 295 -1
+295 626 1
+296 296 -1
+296 627 1
+297 297 -1
+297 628 1
+298 298 -1
+298 629 1
+299 299 -1
+299 630 1
+300 300 -1
+300 631 1
+301 301 -1
+301 632 1
+302 302 -1
+302 633 1
+303 303 -1
+303 634 1
+304 304 -1
+304 635 1
+305 305 -1
+305 636 1
+306 306 -1
+306 637 1
+307 307 -1
+307 638 1
+308 308 -1
+308 639 1
+309 309 -1
+309 640 1
+310 310 -1
+310 641 1
+311 311 -1
+311 642 1
+312 32 -1
+312 312 1
+313 313 -1
+313 644 1
+314 34 -1
+314 314 1
+315 315 -1
+315 646 1
+316 316 -1
+316 647 1
+317 317 -1
+317 648 1
+318 318 -1
+318 649 1
+319 319 -1
+319 650 1
+320 320 -1
+320 651 1
+321 321 -1
+321 652 1
+322 322 -1
+322 653 1
+323 323 -1
+323 654 1
+324 44 -0.5
+324 324 1
+325 325 -1
+325 656 1
+326 326 -1
+326 657 1
+327 327 -1
+327 658 1
+328 328 -1
+328 659 1
+329 49 -1
+329 329 1
+330 330 -1
+330 661 1
+331 331 -1
+331 662 1
+332 332 -1
+332 663 1
+333 333 1
+333 441 -1
+334 334 1
+334 442 -1
+335 335 1
+335 443 -1
+336 336 1
+336 444 -1
+336 777 0.542386
+336 831 -0.471892
+337 337 1
+337 445 -0.598332
+337 778 0.826787
+337 832 -1
+338 338 1
+338 446 -1
+339 1 -0.000878618
+339 2 -0.00707421
+339 3 -0.0109867
+339 4 -0.00857576
+339 5 -0.0101033
+339 6 -0.0308595
+339 7 -0.0357838
+339 8 -0.00811866
+339 10 -0.124662
+339 15 -0.237178
+339 33 -0.98494
+339 34 -0.203228
+339 339 -1
+339 616 1
+339 780 0.138992
+339 834 -0.138992
+340 340 1
+340 448 -1
+341 9 -0.0299104
+341 32 -0.501857
+341 33 -0.178518
+341 34 -0.096039
+341 42 -0.062485
+341 341 -1
+341 618 1
+342 12 -0.476066
+342 14 -0.00708418
+342 30 -0.145787
+342 32 -0.280731
+342 35 -0.0468844
+342 342 -1
+342 619 1
+343 343 1
+343 451 -1
+343 784 0.000750351
+343 838 -0.000750383
+344 344 -1
+344 621 1
+345 30 -0.481977
+345 345 -1
+345 622 1
+345 786 0.0289408
+345 840 -0.0289408
+346 346 1
+346 454 -1
+347 347 1
+347 455 -1
+348 348 1
+348 456 -1
+348 789 0.032998
+348 843 -0.124518
+349 349 1
+349 457 -1
+349 790 1
+349 844 -1
+350 350 1
+350 458 -0.596871
+350 791 1
+350 845 -1
+351 351 1
+351 459 -1
+351 792 1
+351 846 -1
+352 352 1
+352 460 -1
+352 793 1
+352 847 -1
+353 353 1
+353 461 -1
+353 794 0.406251
+353 848 -0.435779
+354 354 1
+354 462 -1
+354 795 1
+354 849 -1
+355 355 1
+355 463 -1
+355 796 0.211531
+355 850 -0.21153
+356 356 1
+356 464 -1
+357 357 1
+357 465 -0.833333
+357 798 1
+357 852 -1
+358 358 1
+358 466 -1
+358 799 0.0674362
+358 853 -0.151235
+359 359 1
+359 467 -1
+360 360 1
+360 468 -1
+361 361 1
+361 469 -1
+362 362 1
+362 470 -1
+362 803 0.281935
+362 857 -0.281935
+363 363 1
+363 471 -1
+363 804 1
+363 858 -1
+364 364 1
+364 472 -0.65419
+364 805 1
+364 859 -1
+365 365 1
+365 473 -0.488345
+365 806 0.440547
+365 860 -0.440547
+366 366 1
+366 474 -1
+366 807 0.0281648
+366 861 -0.233849
+367 367 1
+367 475 -1
+367 808 0.424783
+367 862 -0.424783
+368 10 -0.0677556
+368 15 -0.0515922
+368 29 -0.0156699
+368 32 -0.095851
+368 33 -0.132957
+368 42 -0.0209473
+368 368 -1
+368 645 1
+368 809 0.0604974
+368 863 -0.0604974
+369 369 1
+369 477 -0.701051
+369 810 1
+369 864 -1
+370 370 1
+370 478 -1
+370 811 0.539913
+370 865 -0.539913
+371 371 1
+371 479 -1
+372 372 1
+372 480 -0.786733
+372 813 1
+372 867 -1
+373 373 1
+373 481 -1
+374 374 1
+374 482 -1
+374 815 0.818432
+374 869 -0.818432
+375 375 1
+375 483 -1
+375 816 0.0834249
+375 870 -0.0834249
+376 376 1
+376 484 -0.952687
+376 817 1
+376 871 -1
+377 377 1
+377 485 -1
+378 378 1
+378 486 -1
+379 379 1
+379 487 -1
+380 380 1
+380 488 -1
+381 381 1
+381 489 -1
+382 382 1
+382 490 -1
+383 383 -1
+383 660 1
+384 384 1
+384 492 -1
+385 385 1
+385 493 -1
+386 386 1
+386 494 -1
+387 387 1
+387 441 -1
+387 774 -0.0883596
+387 828 0.5
+388 388 1
+388 442 -1
+388 775 -0.5
+388 829 0.5
+389 389 1
+389 443 -1
+389 776 -0.5
+389 830 0.5
+390 390 1
+390 444 -1
+390 777 -0.0323066
+390 831 0.0281077
+391 391 1
+391 445 -1
+391 778 -0.271755
+391 832 0.328688
+392 392 1
+392 446 -1
+392 779 -0.764633
+392 833 1
+393 393 1
+393 447 -0.844375
+393 780 -0.306152
+393 834 0.306152
+394 394 1
+394 448 -1
+394 781 -0.0624038
+394 835 0.5
+395 395 1
+395 449 -0.0921706
+395 782 -0.555577
+395 836 1
+396 396 1
+396 450 -0.209166
+396 783 -0.392377
+396 837 0.392377
+397 397 1
+397 451 -0.290321
+397 784 -1
+397 838 1
+398 398 1
+398 452 -0.311031
+398 785 -0.414177
+398 839 0.414177
+399 399 1
+399 453 -0.684478
+399 786 -0.0783429
+399 840 0.0783429
+400 400 1
+400 454 -0.5
+400 787 -0.177883
+400 841 1
+401 401 1
+401 455 -0.714286
+401 788 -1
+401 842 1
+402 402 1
+402 456 -0.929815
+402 789 -0.265006
+402 843 1
+403 403 1
+403 457 -0.412023
+403 790 -0.688953
+403 844 0.688953
+404 404 1
+404 458 -1
+404 791 -0.324597
+404 845 0.324597
+405 405 1
+405 459 -0.344382
+405 792 -0.273569
+405 846 0.273569
+406 406 1
+406 460 -0.416107
+406 793 -0.911075
+406 847 0.911075
+407 407 1
+407 461 -0.423489
+407 794 -0.93224
+407 848 1
+408 408 1
+408 462 -0.357861
+408 795 -0.633689
+408 849 0.633689
+409 409 1
+409 463 -0.500781
+409 796 -0.394069
+409 850 0.39407
+410 410 1
+410 464 -1
+410 797 -0.5
+410 851 0.5
+411 411 1
+411 465 -1
+412 412 1
+412 466 -1
+412 799 -0.155515
+412 853 0.348765
+413 413 1
+413 467 -1
+413 800 -0.5
+413 854 0.5
+414 414 1
+414 468 -1
+414 801 -0.5
+414 855 0.5
+415 415 1
+415 469 -0.769231
+415 802 -1
+415 856 1
+416 416 1
+416 470 -0.418415
+416 803 -1
+416 857 1
+417 417 1
+417 471 -0.776664
+417 804 -0.185148
+417 858 0.185148
+418 418 1
+418 472 -1
+418 805 -0.80131
+418 859 0.80131
+419 419 1
+419 473 -1
+419 806 -0.15519
+419 860 0.15519
+420 420 1
+420 474 -1
+420 807 -0.0320552
+420 861 0.266151
+421 421 1
+421 475 -0.686955
+421 808 -0.808193
+421 862 0.808193
+422 422 1
+422 476 -0.277374
+422 809 -0.207202
+422 863 0.207202
+423 423 1
+423 477 -1
+423 810 -0.573571
+423 864 0.573571
+424 424 1
+424 478 -0.593465
+424 811 -1
+424 865 1
+425 425 1
+425 479 -1
+425 812 -0.327889
+425 866 0.327889
+426 426 1
+426 480 -1
+426 813 -0.12892
+426 867 0.128921
+427 427 1
+427 481 -1
+427 814 -1
+427 868 1
+428 428 1
+428 482 -0.407229
+428 815 -0.966711
+428 869 0.966711
+429 429 1
+429 483 -1
+429 816 -0.416575
+429 870 0.416575
+430 430 1
+430 484 -1
+430 817 -0.150338
+430 871 0.150338
+431 431 1
+431 485 -1
+431 818 -1
+431 872 1
+432 432 1
+432 486 -1
+432 819 -0.113932
+432 873 0.5
+433 433 1
+433 487 -0.251124
+433 820 -0.877537
+433 874 1
+434 434 1
+434 488 -0.335443
+434 821 -0.833626
+434 875 1
+435 435 1
+435 489 -1
+435 822 -0.528119
+435 876 1
+436 436 1
+436 490 -0.740512
+436 823 -1
+436 877 1
+437 437 1
+437 491 -0.240662
+437 824 -1
+437 878 1
+438 438 1
+438 492 -0.5
+438 825 -1
+438 879 1
+439 439 1
+439 493 -1
+439 826 -1
+439 880 1
+440 440 1
+440 494 -1
+440 827 -1
+440 881 1
+441 441 1
+441 987 0.21752
+441 990 0.000778065
+441 991 0.00995924
+441 993 0.00552011
+441 995 0.00454668
+441 996 0.0128337
+441 997 0.011282
+441 998 0.014559
+441 999 0.00399023
+441 1001 0.00027693
+441 1002 0.00179942
+441 1003 0.00340027
+441 1004 0.00188028
+441 1005 0.000725365
+441 1006 0.000389279
+441 1007 0.000518497
+441 1008 0.000112724
+441 1009 0.00148847
+441 1011 8.70593e-05
+441 1012 0.000388479
+441 1016 0.00343397
+441 1017 0.00150103
+441 1018 0.0131291
+441 1019 0.000270031
+441 1020 0.00198783
+441 1021 0.00910337
+441 1022 0.0134662
+441 1023 0.000316114
+441 1024 0.00796811
+441 1025 0.000263912
+441 1026 0.000762254
+441 1027 1.50855e-05
+441 1028 0.000991959
+441 1029 0.00133275
+441 1030 0.00133454
+441 1033 0.00506813
+441 1034 0.0252857
+441 1036 0.0213428
+441 1037 0.0206654
+441 1210 -0.0649712
+442 442 1
+442 988 0.21752
+442 990 0.000778065
+442 991 0.00995924
+442 993 0.00552011
+442 995 0.00454668
+442 996 0.0128337
+442 997 0.011282
+442 998 0.014559
+442 999 0.00399023
+442 1001 0.00027693
+442 1002 0.00179942
+442 1003 0.00340027
+442 1004 0.00188028
+442 1005 0.000725365
+442 1006 0.000389279
+442 1007 0.000518497
+442 1008 0.000112724
+442 1009 0.00148847
+442 1011 8.70593e-05
+442 1012 0.000388479
+442 1016 0.00343397
+442 1017 0.00150103
+442 1018 0.0131291
+442 1019 0.000270031
+442 1020 0.00198783
+442 1021 0.00910337
+442 1022 0.0134662
+442 1023 0.000316114
+442 1024 0.00796811
+442 1025 0.000263912
+442 1026 0.000762254
+442 1027 1.50855e-05
+442 1028 0.000991959
+442 1029 0.00133275
+442 1030 0.00133454
+442 1033 0.00506813
+442 1034 0.0252857
+442 1036 0.0213428
+442 1037 0.0206654
+442 1210 -0.0649712
+443 443 1
+443 989 0.21752
+443 990 0.000778065
+443 991 0.00995924
+443 993 0.00552011
+443 995 0.00454668
+443 996 0.0128337
+443 997 0.011282
+443 998 0.014559
+443 999 0.00399023
+443 1001 0.00027693
+443 1002 0.00179942
+443 1003 0.00340027
+443 1004 0.00188028
+443 1005 0.000725365
+443 1006 0.000389279
+443 1007 0.000518497
+443 1008 0.000112724
+443 1009 0.00148847
+443 1011 8.70593e-05
+443 1012 0.000388479
+443 1016 0.00343397
+443 1017 0.00150103
+443 1018 0.0131291
+443 1019 0.000270031
+443 1020 0.00198783
+443 1021 0.00910337
+443 1022 0.0134662
+443 1023 0.000316114
+443 1024 0.00796811
+443 1025 0.000263912
+443 1026 0.000762254
+443 1027 1.50855e-05
+443 1028 0.000991959
+443 1029 0.00133275
+443 1030 0.00133454
+443 1033 0.00506813
+443 1034 0.0252857
+443 1036 0.0213428
+443 1037 0.0206654
+443 1210 -0.0649712
+444 444 1
+444 990 0.218298
+444 991 0.00995924
+444 993 0.00552011
+444 995 0.00454668
+444 996 0.0128337
+444 997 0.011282
+444 998 0.014559
+444 999 0.00399023
+444 1001 0.00027693
+444 1002 0.00179942
+444 1003 0.00340027
+444 1004 0.00188028
+444 1005 0.000725365
+444 1006 0.000389279
+444 1007 0.000518497
+444 1008 0.000112724
+444 1009 0.00148847
+444 1011 8.70593e-05
+444 1012 0.000388479
+444 1016 0.00343397
+444 1017 0.00150103
+444 1018 0.0131291
+444 1019 0.000270031
+444 1020 0.00198783
+444 1021 0.00910337
+444 1022 0.0134662
+444 1023 0.000316114
+444 1024 0.00796811
+444 1025 0.000263912
+444 1026 0.000762254
+444 1027 1.50855e-05
+444 1028 0.000991959
+444 1029 0.00133275
+444 1030 0.00133454
+444 1033 0.00506813
+444 1034 0.0252857
+444 1036 0.0213428
+444 1037 0.0206654
+444 1210 -0.0649712
+445 445 1
+445 990 0.000778065
+445 991 0.227479
+445 993 0.00552011
+445 995 0.00454668
+445 996 0.0128337
+445 997 0.011282
+445 998 0.014559
+445 999 0.00399023
+445 1001 0.00027693
+445 1002 0.00179942
+445 1003 0.00340027
+445 1004 0.00188028
+445 1005 0.000725365
+445 1006 0.000389279
+445 1007 0.000518497
+445 1008 0.000112724
+445 1009 0.00148847
+445 1011 8.70593e-05
+445 1012 0.000388479
+445 1016 0.00343397
+445 1017 0.00150103
+445 1018 0.0131291
+445 1019 0.000270031
+445 1020 0.00198783
+445 1021 0.00910337
+445 1022 0.0134662
+445 1023 0.000316114
+445 1024 0.00796811
+445 1025 0.000263912
+445 1026 0.000762254
+445 1027 1.50855e-05
+445 1028 0.000991959
+445 1029 0.00133275
+445 1030 0.00133454
+445 1033 0.00506813
+445 1034 0.0252857
+445 1036 0.0213428
+445 1037 0.0206654
+445 1210 -0.0649712
+446 446 1
+446 990 0.00194401
+446 991 0.0248834
+446 992 0.543478
+446 993 0.0137921
+446 995 0.01136
+446 996 0.0320654
+446 997 0.0281882
+446 998 0.036376
+446 999 0.00996967
+446 1001 0.000691916
+446 1002 0.00449588
+446 1003 0.00849565
+446 1004 0.00469793
+446 1005 0.00181234
+446 1006 0.000972622
+446 1007 0.00129547
+446 1008 0.000281642
+446 1009 0.00371897
+446 1011 0.00021752
+446 1012 0.000970622
+446 1016 0.00857984
+446 1017 0.00375035
+446 1018 0.0328033
+446 1019 0.000674678
+446 1020 0.00496664
+446 1021 0.0227449
+446 1022 0.0336455
+446 1023 0.000789818
+446 1024 0.0199085
+446 1025 0.000659391
+446 1026 0.00190451
+446 1027 3.76913e-05
+446 1028 0.00247843
+446 1029 0.0033299
+446 1030 0.00333439
+446 1033 0.0126628
+446 1034 0.0631768
+446 1036 0.0533255
+446 1037 0.0516329
+446 1210 -0.162332
+447 447 1
+447 990 0.000921469
+447 991 0.0117948
+447 993 0.264148
+447 995 0.00538467
+447 996 0.0151991
+447 997 0.0133613
+447 998 0.0172423
+447 999 0.00472566
+447 1001 0.000327971
+447 1002 0.00213106
+447 1003 0.00402697
+447 1004 0.00222683
+447 1005 0.000859055
+447 1006 0.000461026
+447 1007 0.00061406
+447 1008 0.000133499
+447 1009 0.0017628
+447 1011 0.000103105
+447 1012 0.000460078
+447 1016 0.00406687
+447 1017 0.00177768
+447 1018 0.0155489
+447 1019 0.0003198
+447 1020 0.0023542
+447 1021 0.0107812
+447 1022 0.0159481
+447 1023 0.000374376
+447 1024 0.0094367
+447 1025 0.000312553
+447 1026 0.000902744
+447 1027 1.78658e-05
+447 1028 0.00117478
+447 1029 0.00157838
+447 1030 0.00158051
+447 1033 0.00600223
+447 1034 0.029946
+447 1036 0.0252765
+447 1037 0.0244742
+447 1210 -0.0769458
+448 448 1
+448 990 0.00230427
+448 991 0.0294947
+448 993 0.016348
+448 994 0.644193
+448 995 0.0134652
+448 996 0.0380076
+448 997 0.0334119
+448 998 0.043117
+448 999 0.0118172
+448 1001 0.000820139
+448 1002 0.00532904
+448 1003 0.01007
+448 1004 0.00556853
+448 1005 0.0021482
+448 1006 0.00115286
+448 1007 0.00153555
+448 1008 0.000333835
+448 1009 0.00440816
+448 1011 0.00025783
+448 1012 0.00115049
+448 1016 0.0101698
+448 1017 0.00444535
+448 1018 0.0388823
+448 1019 0.000799707
+448 1020 0.00588704
+448 1021 0.02696
+448 1022 0.0398806
+448 1023 0.000936184
+448 1024 0.0235979
+448 1025 0.000781587
+448 1026 0.00225745
+448 1027 4.46762e-05
+448 1028 0.00293772
+448 1029 0.00394698
+448 1030 0.0039523
+448 1033 0.0150095
+448 1034 0.0748846
+448 1036 0.0632076
+448 1037 0.0612013
+448 1210 -0.192415
+449 341 1
+449 449 -1
+449 782 0.0223749
+449 836 -0.0402736
+450 450 1
+450 990 0.00594296
+450 991 0.0760699
+450 993 0.0421633
+450 995 0.0347281
+450 996 1
+450 997 0.086173
+450 998 0.111203
+450 999 0.0304779
+450 1001 0.00211523
+450 1002 0.0137442
+450 1003 0.0259717
+450 1004 0.0143618
+450 1005 0.00554043
+450 1006 0.00297336
+450 1007 0.00396034
+450 1008 0.000860997
+450 1009 0.0113691
+450 1011 0.00066497
+450 1012 0.00296725
+450 1016 0.0262291
+450 1017 0.011465
+450 1018 0.100282
+450 1019 0.00206253
+450 1020 0.0151833
+450 1021 0.0695326
+450 1022 0.102856
+450 1023 0.00241452
+450 1024 0.0608614
+450 1025 0.0020158
+450 1026 0.00582219
+450 1027 0.000115225
+450 1028 0.0075767
+450 1029 0.0101797
+450 1030 0.0101934
+450 1033 0.038711
+450 1034 0.193135
+450 1036 0.163019
+450 1037 0.157845
+450 1210 -0.496258
+451 451 1
+451 990 0.00268001
+451 991 0.0343042
+451 993 0.0190138
+451 995 0.0156608
+451 996 0.0442053
+451 997 0.788099
+451 998 0.0501479
+451 999 0.0137442
+451 1001 0.000953875
+451 1002 0.00619802
+451 1003 0.0117121
+451 1004 0.00647656
+451 1005 0.00249849
+451 1006 0.00134086
+451 1007 0.00178594
+451 1008 0.000388272
+451 1009 0.00512697
+451 1011 0.000299872
+451 1012 0.0013381
+451 1016 0.0118282
+451 1017 0.00517023
+451 1018 0.0452226
+451 1019 0.000930111
+451 1020 0.006847
+451 1021 0.0313562
+451 1022 0.0463837
+451 1023 0.00108884
+451 1024 0.0274458
+451 1025 0.000909035
+451 1026 0.00262555
+451 1027 5.19612e-05
+451 1028 0.00341676
+451 1029 0.00459059
+451 1030 0.00459678
+451 1033 0.017457
+451 1034 0.0870956
+451 1036 0.0735146
+451 1037 0.071181
+451 1210 -0.223791
+452 452 1
+452 990 0.00250157
+452 991 0.03202
+452 993 0.0177478
+452 995 0.0146181
+452 996 0.0412619
+452 997 0.0362727
+452 998 0.746159
+452 999 0.012829
+452 1001 0.000890361
+452 1002 0.00578532
+452 1003 0.0109323
+452 1004 0.00604532
+452 1005 0.00233213
+452 1006 0.00125157
+452 1007 0.00166702
+452 1008 0.000362419
+452 1009 0.00478559
+452 1011 0.000279905
+452 1012 0.001249
+452 1016 0.0110406
+452 1017 0.00482597
+452 1018 0.0422114
+452 1019 0.000868179
+452 1020 0.00639109
+452 1021 0.0292683
+452 1022 0.0432952
+452 1023 0.00101634
+452 1024 0.0256184
+452 1025 0.000848507
+452 1026 0.00245073
+452 1027 4.85014e-05
+452 1028 0.00318926
+452 1029 0.00428493
+452 1030 0.0042907
+452 1033 0.0162946
+452 1034 0.0812963
+452 1036 0.0686196
+452 1037 0.0664414
+452 1210 -0.208889
+453 453 1
+453 990 0.00336647
+453 991 0.0430908
+453 993 0.0238839
+453 995 0.0196722
+453 996 0.0555279
+453 997 0.0488138
+453 998 0.0629926
+453 999 0.958411
+453 1001 0.0011982
+453 1002 0.00778556
+453 1003 0.014712
+453 1004 0.00813545
+453 1005 0.00313845
+453 1006 0.0016843
+453 1007 0.00224339
+453 1008 0.000487723
+453 1009 0.00644017
+453 1011 0.000376681
+453 1012 0.00168084
+453 1016 0.0148578
+453 1017 0.00649451
+453 1018 0.0568058
+453 1019 0.00116835
+453 1020 0.00860078
+453 1021 0.0393877
+453 1022 0.0582643
+453 1023 0.00136774
+453 1024 0.0344757
+453 1025 0.00114187
+453 1026 0.00329806
+453 1027 6.52704e-05
+453 1028 0.00429192
+453 1029 0.00576641
+453 1030 0.00577419
+453 1033 0.0219284
+453 1034 0.109404
+453 1036 0.0923444
+453 1037 0.0894132
+453 1210 -0.281112
+454 454 1
+454 990 0.00164245
+454 991 0.0210234
+454 993 0.0116527
+454 995 0.00959779
+454 996 0.0270913
+454 997 0.0238156
+454 998 0.0307333
+454 999 0.00842316
+454 1000 0.459173
+454 1001 0.000584585
+454 1002 0.00379847
+454 1003 0.0071778
+454 1004 0.00396918
+454 1005 0.00153121
+454 1006 0.000821747
+454 1007 0.00109452
+454 1008 0.000237953
+454 1009 0.00314208
+454 1011 0.000183778
+454 1012 0.000820058
+454 1016 0.00724892
+454 1017 0.00316859
+454 1018 0.0277148
+454 1019 0.000570021
+454 1020 0.00419621
+454 1021 0.0192167
+454 1022 0.0284264
+454 1023 0.0006673
+454 1024 0.0168203
+454 1025 0.000557105
+454 1026 0.00160908
+454 1027 3.18446e-05
+454 1028 0.00209397
+454 1029 0.00281336
+454 1030 0.00281715
+454 1033 0.0106986
+454 1034 0.0533768
+454 1036 0.0450536
+454 1037 0.0436235
+454 1210 -0.137151
+455 455 1
+455 990 0.00148017
+455 991 0.0189461
+455 993 0.0105013
+455 995 0.00864945
+455 996 0.0244145
+455 997 0.0214624
+455 998 0.0276966
+455 999 0.00759088
+455 1001 0.414329
+455 1002 0.00342315
+455 1003 0.00646857
+455 1004 0.00357699
+455 1005 0.00137991
+455 1006 0.000740552
+455 1007 0.000986371
+455 1008 0.000214442
+455 1009 0.00283161
+455 1011 0.000165619
+455 1012 0.000739029
+455 1016 0.00653266
+455 1017 0.0028555
+455 1018 0.0249763
+455 1019 0.000513698
+455 1020 0.00378158
+455 1021 0.0173179
+455 1022 0.0256176
+455 1023 0.000601365
+455 1024 0.0151583
+455 1025 0.000502058
+455 1026 0.00145009
+455 1027 2.86981e-05
+455 1028 0.00188707
+455 1029 0.00253537
+455 1030 0.00253879
+455 1033 0.00964145
+455 1034 0.0481027
+455 1036 0.0406019
+455 1037 0.0393131
+455 1210 -0.123599
+456 456 1
+456 990 0.00194401
+456 991 0.0248834
+456 993 0.0137921
+456 995 0.01136
+456 996 0.0320654
+456 997 0.0281882
+456 998 0.036376
+456 999 0.00996967
+456 1001 0.000691916
+456 1002 0.547974
+456 1003 0.00849565
+456 1004 0.00469793
+456 1005 0.00181234
+456 1006 0.000972622
+456 1007 0.00129547
+456 1008 0.000281642
+456 1009 0.00371897
+456 1011 0.00021752
+456 1012 0.000970622
+456 1016 0.00857984
+456 1017 0.00375035
+456 1018 0.0328033
+456 1019 0.000674678
+456 1020 0.00496664
+456 1021 0.0227449
+456 1022 0.0336455
+456 1023 0.000789818
+456 1024 0.0199085
+456 1025 0.000659391
+456 1026 0.00190451
+456 1027 3.76913e-05
+456 1028 0.00247843
+456 1029 0.0033299
+456 1030 0.00333439
+456 1033 0.0126628
+456 1034 0.0631768
+456 1036 0.0533255
+456 1037 0.0516329
+456 1210 -0.162332
+457 457 1
+457 990 0.0018884
+457 991 0.0241716
+457 993 0.0133976
+457 995 0.011035
+457 996 0.0311481
+457 997 0.0273819
+457 998 0.0353354
+457 999 0.00968449
+457 1001 0.000672123
+457 1002 0.00436728
+457 1003 0.536184
+457 1004 0.00456354
+457 1005 0.0017605
+457 1006 0.0009448
+457 1007 0.00125842
+457 1008 0.000273586
+457 1009 0.00361259
+457 1011 0.000211297
+457 1012 0.000942857
+457 1016 0.00833441
+457 1017 0.00364307
+457 1018 0.0318649
+457 1019 0.000655379
+457 1020 0.00482457
+457 1021 0.0220943
+457 1022 0.0326831
+457 1023 0.000767225
+457 1024 0.019339
+457 1025 0.000640529
+457 1026 0.00185003
+457 1027 3.66132e-05
+457 1028 0.00240753
+457 1029 0.00323464
+457 1030 0.003239
+457 1033 0.0123006
+457 1034 0.0613697
+457 1036 0.0518001
+457 1037 0.0501559
+457 1210 -0.157688
+458 458 1
+458 990 0.00148017
+458 991 0.0189461
+458 993 0.0105013
+458 995 0.00864945
+458 996 0.0244145
+458 997 0.0214624
+458 998 0.0276966
+458 999 0.00759088
+458 1001 0.000526823
+458 1002 0.00342315
+458 1003 0.00646857
+458 1004 0.41738
+458 1005 0.00137991
+458 1006 0.000740552
+458 1007 0.000986371
+458 1008 0.000214442
+458 1009 0.00283161
+458 1011 0.000165619
+458 1012 0.000739029
+458 1016 0.00653266
+458 1017 0.0028555
+458 1018 0.0249763
+458 1019 0.000513698
+458 1020 0.00378158
+458 1021 0.0173179
+458 1022 0.0256176
+458 1023 0.000601365
+458 1024 0.0151583
+458 1025 0.000502058
+458 1026 0.00145009
+458 1027 2.86981e-05
+458 1028 0.00188707
+458 1029 0.00253537
+458 1030 0.00253879
+458 1033 0.00964145
+458 1034 0.0481027
+458 1036 0.0406019
+458 1037 0.0393131
+458 1210 -0.123599
+459 459 1
+459 990 0.00429804
+459 991 0.0550149
+459 993 0.0304931
+459 995 0.0251159
+459 996 0.0708937
+459 997 0.0623216
+459 998 0.080424
+459 999 0.0220421
+459 1001 0.00152976
+459 1002 0.00993999
+459 1003 0.0187831
+459 1004 0.0103867
+459 1005 1
+459 1006 0.00215038
+459 1007 0.00286418
+459 1008 0.000622686
+459 1009 0.00822231
+459 1011 0.000480916
+459 1012 0.00214596
+459 1016 0.0189693
+459 1017 0.00829168
+459 1018 0.0725251
+459 1019 0.00149165
+459 1020 0.0109808
+459 1021 0.050287
+459 1022 0.0743873
+459 1023 0.00174622
+459 1024 0.0440159
+459 1025 0.00145785
+459 1026 0.0042107
+459 1027 8.33321e-05
+459 1028 0.00547959
+459 1029 0.0073621
+459 1030 0.00737203
+459 1033 0.0279964
+459 1034 0.139678
+459 1036 0.117898
+459 1037 0.114156
+459 1210 -0.358901
+460 460 1
+460 990 0.00355717
+460 991 0.0455318
+460 993 0.0252369
+460 995 0.0207866
+460 996 0.0586735
+460 997 0.051579
+460 998 0.0665611
+460 999 0.0182426
+460 1001 0.00126607
+460 1002 0.00822661
+460 1003 0.0155454
+460 1004 0.00859632
+460 1005 0.00331624
+460 1006 0.996241
+460 1007 0.00237047
+460 1008 0.000515352
+460 1009 0.006805
+460 1011 0.000398019
+460 1012 0.00177605
+460 1016 0.0156995
+460 1017 0.00686242
+460 1018 0.0600238
+460 1019 0.00123453
+460 1020 0.009088
+460 1021 0.0416189
+460 1022 0.0615649
+460 1023 0.00144522
+460 1024 0.0364288
+460 1025 0.00120656
+460 1026 0.00348489
+460 1027 6.89679e-05
+460 1028 0.00453505
+460 1029 0.00609307
+460 1030 0.00610129
+460 1033 0.0231706
+460 1034 0.115602
+460 1036 0.0975756
+460 1037 0.0944783
+460 1210 -0.297036
+461 461 1
+461 990 0.00453136
+461 991 0.0580014
+461 993 0.0321485
+461 995 0.0264793
+461 996 0.0747421
+461 997 0.0657047
+461 998 0.0847899
+461 999 0.0232386
+461 1001 0.00161281
+461 1002 0.0104796
+461 1003 0.0198028
+461 1004 0.0109505
+461 1005 0.00422444
+461 1006 0.00226711
+461 1007 1
+461 1008 0.000656488
+461 1009 0.00866866
+461 1011 0.000507023
+461 1012 0.00226245
+461 1016 0.019999
+461 1017 0.0087418
+461 1018 0.0764622
+461 1019 0.00157263
+461 1020 0.0115769
+461 1021 0.0530169
+461 1022 0.0784254
+461 1023 0.00184101
+461 1024 0.0464053
+461 1025 0.00153699
+461 1026 0.00443928
+461 1027 8.78559e-05
+461 1028 0.00577705
+461 1029 0.00776176
+461 1030 0.00777222
+461 1033 0.0295162
+461 1034 0.147261
+461 1036 0.124298
+461 1037 0.120353
+461 1210 -0.378384
+462 462 1
+462 990 0.00543231
+462 991 0.0695335
+462 993 0.0385404
+462 995 0.031744
+462 996 0.0896027
+462 997 0.0787685
+462 998 0.101648
+462 999 0.027859
+462 1001 0.00193347
+462 1002 0.0125632
+462 1003 0.0237401
+462 1004 0.0131278
+462 1005 0.00506436
+462 1006 0.00271787
+462 1007 0.00362005
+462 1008 1
+462 1009 0.0103922
+462 1011 0.000607832
+462 1012 0.00271228
+462 1016 0.0239753
+462 1017 0.0104799
+462 1018 0.0916648
+462 1019 0.00188531
+462 1020 0.0138787
+462 1021 0.063558
+462 1022 0.0940183
+462 1023 0.00220705
+462 1024 0.0556319
+462 1025 0.00184259
+462 1026 0.00532192
+462 1027 0.000105324
+462 1028 0.00692567
+462 1029 0.00930499
+462 1030 0.00931753
+462 1033 0.0353848
+462 1034 0.17654
+462 1036 0.149012
+462 1037 0.144282
+462 1210 -0.453616
+463 463 1
+463 990 0.00295571
+463 991 0.0378331
+463 993 0.0209698
+463 995 0.0172719
+463 996 0.0487528
+463 997 0.0428579
+463 998 0.0553067
+463 999 0.0151581
+463 1001 0.001052
+463 1002 0.00683562
+463 1003 0.012917
+463 1004 0.00714282
+463 1005 0.00275552
+463 1006 0.00147879
+463 1007 0.00196966
+463 1008 0.000428214
+463 1009 0.831968
+463 1011 0.000330721
+463 1012 0.00147575
+463 1016 0.0130449
+463 1017 0.0057021
+463 1018 0.0498747
+463 1019 0.00102579
+463 1020 0.00755137
+463 1021 0.0345818
+463 1022 0.0511553
+463 1023 0.00120085
+463 1024 0.0302693
+463 1025 0.00100255
+463 1026 0.00289565
+463 1027 5.73066e-05
+463 1028 0.00376825
+463 1029 0.00506284
+463 1030 0.00506966
+463 1033 0.0192528
+463 1034 0.0960553
+463 1036 0.0810772
+463 1037 0.0785036
+463 1210 -0.246812
+464 464 1
+464 990 0.00194401
+464 991 0.0248834
+464 993 0.0137921
+464 995 0.01136
+464 996 0.0320654
+464 997 0.0281882
+464 998 0.036376
+464 999 0.00996967
+464 1001 0.000691916
+464 1002 0.00449588
+464 1003 0.00849565
+464 1004 0.00469793
+464 1005 0.00181234
+464 1006 0.000972622
+464 1007 0.00129547
+464 1008 0.000281642
+464 1009 0.00371897
+464 1010 0.543478
+464 1011 0.00021752
+464 1012 0.000970622
+464 1016 0.00857984
+464 1017 0.00375035
+464 1018 0.0328033
+464 1019 0.000674678
+464 1020 0.00496664
+464 1021 0.0227449
+464 1022 0.0336455
+464 1023 0.000789818
+464 1024 0.0199085
+464 1025 0.000659391
+464 1026 0.00190451
+464 1027 3.76913e-05
+464 1028 0.00247843
+464 1029 0.0033299
+464 1030 0.00333439
+464 1033 0.0126628
+464 1034 0.0631768
+464 1036 0.0533255
+464 1037 0.0516329
+464 1210 -0.162332
+465 465 1
+465 990 0.00194401
+465 991 0.0248834
+465 993 0.0137921
+465 995 0.01136
+465 996 0.0320654
+465 997 0.0281882
+465 998 0.036376
+465 999 0.00996967
+465 1001 0.000691916
+465 1002 0.00449588
+465 1003 0.00849565
+465 1004 0.00469793
+465 1005 0.00181234
+465 1006 0.000972622
+465 1007 0.00129547
+465 1008 0.000281642
+465 1009 0.00371897
+465 1011 0.543695
+465 1012 0.000970622
+465 1016 0.00857984
+465 1017 0.00375035
+465 1018 0.0328033
+465 1019 0.000674678
+465 1020 0.00496664
+465 1021 0.0227449
+465 1022 0.0336455
+465 1023 0.000789818
+465 1024 0.0199085
+465 1025 0.000659391
+465 1026 0.00190451
+465 1027 3.76913e-05
+465 1028 0.00247843
+465 1029 0.0033299
+465 1030 0.00333439
+465 1033 0.0126628
+465 1034 0.0631768
+465 1036 0.0533255
+465 1037 0.0516329
+465 1210 -0.162332
+466 466 1
+466 990 0.00148017
+466 991 0.0189461
+466 993 0.0105013
+466 995 0.00864945
+466 996 0.0244145
+466 997 0.0214624
+466 998 0.0276966
+466 999 0.00759088
+466 1001 0.000526823
+466 1002 0.00342315
+466 1003 0.00646857
+466 1004 0.00357699
+466 1005 0.00137991
+466 1006 0.000740552
+466 1007 0.000986371
+466 1008 0.000214442
+466 1009 0.00283161
+466 1011 0.000165619
+466 1012 0.414542
+466 1016 0.00653266
+466 1017 0.0028555
+466 1018 0.0249763
+466 1019 0.000513698
+466 1020 0.00378158
+466 1021 0.0173179
+466 1022 0.0256176
+466 1023 0.000601365
+466 1024 0.0151583
+466 1025 0.000502058
+466 1026 0.00145009
+466 1027 2.86981e-05
+466 1028 0.00188707
+466 1029 0.00253537
+466 1030 0.00253879
+466 1033 0.00964145
+466 1034 0.0481027
+466 1036 0.0406019
+466 1037 0.0393131
+466 1210 -0.123599
+467 467 1
+467 990 0.00194401
+467 991 0.0248834
+467 993 0.0137921
+467 995 0.01136
+467 996 0.0320654
+467 997 0.0281882
+467 998 0.036376
+467 999 0.00996967
+467 1001 0.000691916
+467 1002 0.00449588
+467 1003 0.00849565
+467 1004 0.00469793
+467 1005 0.00181234
+467 1006 0.000972622
+467 1007 0.00129547
+467 1008 0.000281642
+467 1009 0.00371897
+467 1011 0.00021752
+467 1012 0.000970622
+467 1013 0.543478
+467 1016 0.00857984
+467 1017 0.00375035
+467 1018 0.0328033
+467 1019 0.000674678
+467 1020 0.00496664
+467 1021 0.0227449
+467 1022 0.0336455
+467 1023 0.000789818
+467 1024 0.0199085
+467 1025 0.000659391
+467 1026 0.00190451
+467 1027 3.76913e-05
+467 1028 0.00247843
+467 1029 0.0033299
+467 1030 0.00333439
+467 1033 0.0126628
+467 1034 0.0631768
+467 1036 0.0533255
+467 1037 0.0516329
+467 1210 -0.162332
+468 468 1
+468 990 0.00194401
+468 991 0.0248834
+468 993 0.0137921
+468 995 0.01136
+468 996 0.0320654
+468 997 0.0281882
+468 998 0.036376
+468 999 0.00996967
+468 1001 0.000691916
+468 1002 0.00449588
+468 1003 0.00849565
+468 1004 0.00469793
+468 1005 0.00181234
+468 1006 0.000972622
+468 1007 0.00129547
+468 1008 0.000281642
+468 1009 0.00371897
+468 1011 0.00021752
+468 1012 0.000970622
+468 1014 0.543478
+468 1016 0.00857984
+468 1017 0.00375035
+468 1018 0.0328033
+468 1019 0.000674678
+468 1020 0.00496664
+468 1021 0.0227449
+468 1022 0.0336455
+468 1023 0.000789818
+468 1024 0.0199085
+468 1025 0.000659391
+468 1026 0.00190451
+468 1027 3.76913e-05
+468 1028 0.00247843
+468 1029 0.0033299
+468 1030 0.00333439
+468 1033 0.0126628
+468 1034 0.0631768
+468 1036 0.0533255
+468 1037 0.0516329
+468 1210 -0.162332
+469 469 1
+469 990 0.00194401
+469 991 0.0248834
+469 993 0.0137921
+469 995 0.01136
+469 996 0.0320654
+469 997 0.0281882
+469 998 0.036376
+469 999 0.00996967
+469 1001 0.000691916
+469 1002 0.00449588
+469 1003 0.00849565
+469 1004 0.00469793
+469 1005 0.00181234
+469 1006 0.000972622
+469 1007 0.00129547
+469 1008 0.000281642
+469 1009 0.00371897
+469 1011 0.00021752
+469 1012 0.000970622
+469 1015 0.543478
+469 1016 0.00857984
+469 1017 0.00375035
+469 1018 0.0328033
+469 1019 0.000674678
+469 1020 0.00496664
+469 1021 0.0227449
+469 1022 0.0336455
+469 1023 0.000789818
+469 1024 0.0199085
+469 1025 0.000659391
+469 1026 0.00190451
+469 1027 3.76913e-05
+469 1028 0.00247843
+469 1029 0.0033299
+469 1030 0.00333439
+469 1033 0.0126628
+469 1034 0.0631768
+469 1036 0.0533255
+469 1037 0.0516329
+469 1210 -0.162332
+470 470 1
+470 990 0.00349654
+470 991 0.0447557
+470 993 0.0248068
+470 995 0.0204323
+470 996 0.0576734
+470 997 0.0506999
+470 998 0.0654266
+470 999 0.0179317
+470 1001 0.00124449
+470 1002 0.00808638
+470 1003 0.0152805
+470 1004 0.00844979
+470 1005 0.00325971
+470 1006 0.00174938
+470 1007 0.00233007
+470 1008 0.000506567
+470 1009 0.00668901
+470 1011 0.000391235
+470 1012 0.00174578
+470 1016 0.992942
+470 1017 0.00674545
+470 1018 0.0590007
+470 1019 0.00121349
+470 1020 0.0089331
+470 1021 0.0409095
+470 1022 0.0605156
+470 1023 0.00142058
+470 1024 0.0358078
+470 1025 0.00118599
+470 1026 0.00342549
+470 1027 6.77924e-05
+470 1028 0.00445775
+470 1029 0.00598922
+470 1030 0.00599729
+470 1033 0.0227756
+470 1034 0.113631
+470 1036 0.0959124
+470 1037 0.0928679
+470 1210 -0.291973
+471 471 1
+471 990 0.00250303
+471 991 0.0320388
+471 993 0.0177581
+471 995 0.0146266
+471 996 0.041286
+471 997 0.0362939
+471 998 0.0468362
+471 999 0.0128365
+471 1001 0.000890882
+471 1002 0.00578871
+471 1003 0.0109386
+471 1004 0.00604885
+471 1005 0.00233349
+471 1006 0.00125231
+471 1007 0.001668
+471 1008 0.000362631
+471 1009 0.00478839
+471 1011 0.000280069
+471 1012 0.00124973
+471 1016 0.011047
+471 1017 0.704588
+471 1018 0.0422361
+471 1019 0.000868687
+471 1020 0.00639483
+471 1021 0.0292854
+471 1022 0.0433206
+471 1023 0.00101694
+471 1024 0.0256333
+471 1025 0.000849003
+471 1026 0.00245216
+471 1027 4.85297e-05
+471 1028 0.00319112
+471 1029 0.00428743
+471 1030 0.00429321
+471 1033 0.0163041
+471 1034 0.0813438
+471 1036 0.0686597
+471 1037 0.0664803
+471 1210 -0.209012
+472 472 1
+472 990 0.00226931
+472 991 0.0290472
+472 993 0.0161
+472 995 0.0132609
+472 996 0.037431
+472 997 0.0329051
+472 998 0.0424629
+472 999 0.0116379
+472 1001 0.000807698
+472 1002 0.0052482
+472 1003 0.00991728
+472 1004 0.00548406
+472 1005 0.00211561
+472 1006 0.00113538
+472 1007 0.00151225
+472 1008 0.000328771
+472 1009 0.00434128
+472 1011 0.000253918
+472 1012 0.00113304
+472 1016 0.0100155
+472 1017 0.00437791
+472 1018 0.672713
+472 1019 0.000787575
+472 1020 0.00579773
+472 1021 0.026551
+472 1022 0.0392756
+472 1023 0.000921982
+472 1024 0.0232399
+472 1025 0.00076973
+472 1026 0.0022232
+472 1027 4.39984e-05
+472 1028 0.00289316
+472 1029 0.0038871
+472 1030 0.00389235
+472 1033 0.0147818
+472 1034 0.0737486
+472 1036 0.0622488
+472 1037 0.0602729
+472 1210 -0.189496
+473 473 1
+473 990 0.00148017
+473 991 0.0189461
+473 993 0.0105013
+473 995 0.00864945
+473 996 0.0244145
+473 997 0.0214624
+473 998 0.0276966
+473 999 0.00759088
+473 1001 0.000526823
+473 1002 0.00342315
+473 1003 0.00646857
+473 1004 0.00357699
+473 1005 0.00137991
+473 1006 0.000740552
+473 1007 0.000986371
+473 1008 0.000214442
+473 1009 0.00283161
+473 1011 0.000165619
+473 1012 0.000739029
+473 1016 0.00653266
+473 1017 0.0028555
+473 1018 0.0249763
+473 1019 0.414316
+473 1020 0.00378158
+473 1021 0.0173179
+473 1022 0.0256176
+473 1023 0.000601365
+473 1024 0.0151583
+473 1025 0.000502058
+473 1026 0.00145009
+473 1027 2.86981e-05
+473 1028 0.00188707
+473 1029 0.00253537
+473 1030 0.00253879
+473 1033 0.00964145
+473 1034 0.0481027
+473 1036 0.0406019
+473 1037 0.0393131
+473 1210 -0.123599
+474 474 1
+474 990 0.00124306
+474 991 0.0159112
+474 993 0.00881911
+474 995 0.00726392
+474 996 0.0205036
+474 997 0.0180244
+474 998 0.0232599
+474 999 0.00637492
+474 1001 0.000442433
+474 1002 0.00287481
+474 1003 0.00543239
+474 1004 0.003004
+474 1005 0.00115887
+474 1006 0.000621925
+474 1007 0.000828368
+474 1008 0.000180091
+474 1009 0.00237803
+474 1011 0.000139089
+474 1012 0.000620646
+474 1016 0.00548622
+474 1017 0.00239809
+474 1018 0.0209755
+474 1019 0.00043141
+474 1020 0.350693
+474 1021 0.0145438
+474 1022 0.021514
+474 1023 0.000505034
+474 1024 0.0127301
+474 1025 0.000421635
+474 1026 0.0012178
+474 1027 2.4101e-05
+474 1028 0.00158479
+474 1029 0.00212924
+474 1030 0.00213211
+474 1033 0.00809702
+474 1034 0.0403973
+474 1036 0.034098
+474 1037 0.0330157
+474 1210 -0.1038
+475 475 1
+475 990 0.00113263
+475 991 0.0144977
+475 993 0.00803562
+475 995 0.00661859
+475 996 0.0186821
+475 997 0.0164231
+475 998 0.0211935
+475 999 0.00580857
+475 1001 0.000403127
+475 1002 0.00261941
+475 1003 0.00494977
+475 1004 0.00273713
+475 1005 0.00105591
+475 1006 0.000566673
+475 1007 0.000754775
+475 1008 0.000164092
+475 1009 0.00216676
+475 1011 0.000126732
+475 1012 0.000565508
+475 1016 0.00499882
+475 1017 0.00218504
+475 1018 0.019112
+475 1019 0.000393084
+475 1020 0.00289368
+475 1021 0.329895
+475 1022 0.0196027
+475 1023 0.000460167
+475 1024 0.0115992
+475 1025 0.000384177
+475 1026 0.00110961
+475 1027 2.19599e-05
+475 1028 0.00144399
+475 1029 0.00194008
+475 1030 0.00194269
+475 1033 0.00737768
+475 1034 0.0368084
+475 1036 0.0310688
+475 1037 0.0300826
+475 1210 -0.0945784
+476 476 1
+476 990 0.00280511
+476 991 0.0359055
+476 993 0.0199013
+476 995 0.0163919
+476 996 0.0462687
+476 997 0.0406742
+476 998 0.0524887
+476 999 0.0143857
+476 1001 0.0009984
+476 1002 0.00648733
+476 1003 0.0122588
+476 1004 0.00677888
+476 1005 0.00261512
+476 1006 0.00140344
+476 1007 0.00186931
+476 1008 0.000406396
+476 1009 0.00536629
+476 1011 0.00031387
+476 1012 0.00140056
+476 1016 0.0123803
+476 1017 0.00541156
+476 1018 0.0473335
+476 1019 0.000973527
+476 1020 0.00716661
+476 1021 0.0328198
+476 1022 0.83276
+476 1023 0.00113967
+476 1024 0.028727
+476 1025 0.000951468
+476 1026 0.00274811
+476 1027 5.43867e-05
+476 1028 0.00357625
+476 1029 0.00480487
+476 1030 0.00481135
+476 1033 0.0182718
+476 1034 0.091161
+476 1036 0.0769461
+476 1037 0.0745037
+476 1210 -0.234237
+477 477 1
+477 990 0.00164245
+477 991 0.0210234
+477 993 0.0116527
+477 995 0.00959779
+477 996 0.0270913
+477 997 0.0238156
+477 998 0.0307333
+477 999 0.00842316
+477 1001 0.000584585
+477 1002 0.00379847
+477 1003 0.0071778
+477 1004 0.00396918
+477 1005 0.00153121
+477 1006 0.000821747
+477 1007 0.00109452
+477 1008 0.000237953
+477 1009 0.00314208
+477 1011 0.000183778
+477 1012 0.000820058
+477 1016 0.00724892
+477 1017 0.00316859
+477 1018 0.0277148
+477 1019 0.000570021
+477 1020 0.00419621
+477 1021 0.0192167
+477 1022 0.0284264
+477 1023 0.45984
+477 1024 0.0168203
+477 1025 0.000557105
+477 1026 0.00160908
+477 1027 3.18446e-05
+477 1028 0.00209397
+477 1029 0.00281336
+477 1030 0.00281715
+477 1033 0.0106986
+477 1034 0.0533768
+477 1036 0.0450536
+477 1037 0.0436235
+477 1210 -0.137151
+478 478 1
+478 990 0.00107044
+478 991 0.0137016
+478 993 0.00759442
+478 995 0.00625519
+478 996 0.0176563
+478 997 0.0155214
+478 998 0.0200299
+478 999 0.00548965
+478 1001 0.000380993
+478 1002 0.00247559
+478 1003 0.004678
+478 1004 0.00258684
+478 1005 0.000997937
+478 1006 0.000535559
+478 1007 0.000713333
+478 1008 0.000155082
+478 1009 0.00204779
+478 1011 0.000119774
+478 1012 0.000534458
+478 1016 0.00472436
+478 1017 0.00206507
+478 1018 0.0180626
+478 1019 0.000371501
+478 1020 0.0027348
+478 1021 0.0125242
+478 1022 0.0185264
+478 1023 0.000434901
+478 1024 0.31022
+478 1025 0.000363083
+478 1026 0.00104869
+478 1027 2.07541e-05
+478 1028 0.00136471
+478 1029 0.00183356
+478 1030 0.00183603
+478 1033 0.0069726
+478 1034 0.0347874
+478 1036 0.0293629
+478 1037 0.0284308
+478 1210 -0.0893855
+479 479 1
+479 990 0.000962223
+479 991 0.0123165
+479 993 0.00682665
+479 995 0.00562281
+479 996 0.0158713
+479 997 0.0139522
+479 998 0.0180049
+479 999 0.00493466
+479 1001 0.000342476
+479 1002 0.00222531
+479 1003 0.00420507
+479 1004 0.00232532
+479 1005 0.000897049
+479 1006 0.000481416
+479 1007 0.000641218
+479 1008 0.000139404
+479 1009 0.00184077
+479 1011 0.000107665
+479 1012 0.000480426
+479 1016 0.00424674
+479 1017 0.0018563
+479 1018 0.0162366
+479 1019 0.000333944
+479 1020 0.00245832
+479 1021 0.011258
+479 1022 0.0166534
+479 1023 0.000390934
+479 1024 0.00985406
+479 1025 0.26933
+479 1026 0.00094267
+479 1027 1.8656e-05
+479 1028 0.00122674
+479 1029 0.00164819
+479 1030 0.00165041
+479 1033 0.00626769
+479 1034 0.0312705
+479 1036 0.0263944
+479 1037 0.0255566
+479 1210 -0.0803489
+480 480 1
+480 990 0.00148017
+480 991 0.0189461
+480 993 0.0105013
+480 995 0.00864945
+480 996 0.0244145
+480 997 0.0214624
+480 998 0.0276966
+480 999 0.00759088
+480 1001 0.000526823
+480 1002 0.00342315
+480 1003 0.00646857
+480 1004 0.00357699
+480 1005 0.00137991
+480 1006 0.000740552
+480 1007 0.000986371
+480 1008 0.000214442
+480 1009 0.00283161
+480 1011 0.000165619
+480 1012 0.000739029
+480 1016 0.00653266
+480 1017 0.0028555
+480 1018 0.0249763
+480 1019 0.000513698
+480 1020 0.00378158
+480 1021 0.0173179
+480 1022 0.0256176
+480 1023 0.000601365
+480 1024 0.0151583
+480 1025 0.000502058
+480 1026 0.415253
+480 1027 2.86981e-05
+480 1028 0.00188707
+480 1029 0.00253537
+480 1030 0.00253879
+480 1033 0.00964145
+480 1034 0.0481027
+480 1036 0.0406019
+480 1037 0.0393131
+480 1210 -0.123599
+481 481 1
+481 990 0.000778065
+481 991 0.00995924
+481 993 0.00552011
+481 995 0.00454668
+481 996 0.0128337
+481 997 0.011282
+481 998 0.014559
+481 999 0.00399023
+481 1001 0.00027693
+481 1002 0.00179942
+481 1003 0.00340027
+481 1004 0.00188028
+481 1005 0.000725365
+481 1006 0.000389279
+481 1007 0.000518497
+481 1008 0.000112724
+481 1009 0.00148847
+481 1011 8.70593e-05
+481 1012 0.000388479
+481 1016 0.00343397
+481 1017 0.00150103
+481 1018 0.0131291
+481 1019 0.000270031
+481 1020 0.00198783
+481 1021 0.00910337
+481 1022 0.0134662
+481 1023 0.000316114
+481 1024 0.00796811
+481 1025 0.000263912
+481 1026 0.000762254
+481 1027 0.217535
+481 1028 0.000991959
+481 1029 0.00133275
+481 1030 0.00133454
+481 1033 0.00506813
+481 1034 0.0252857
+481 1036 0.0213428
+481 1037 0.0206654
+481 1210 -0.0649712
+482 482 1
+482 990 0.00477376
+482 991 0.0611042
+482 993 0.0338682
+482 995 0.0278958
+482 996 0.0787404
+482 997 0.0692196
+482 998 0.0893256
+482 999 0.0244817
+482 1001 0.00169908
+482 1002 0.0110402
+482 1003 0.0208621
+482 1004 0.0115363
+482 1005 0.00445042
+482 1006 0.00238839
+482 1007 0.0031812
+482 1008 0.000691607
+482 1009 0.00913238
+482 1011 0.000534146
+482 1012 0.00238348
+482 1016 0.0210688
+482 1017 0.00920943
+482 1018 0.0805525
+482 1019 0.00165675
+482 1020 0.0121962
+482 1021 0.055853
+482 1022 0.0826207
+482 1023 0.00193949
+482 1024 0.0488877
+482 1025 0.00161921
+482 1026 0.00467675
+482 1027 9.25556e-05
+482 1028 1
+482 1029 0.00817696
+482 1030 0.00818799
+482 1033 0.0310951
+482 1034 0.155138
+482 1036 0.130947
+482 1037 0.126791
+482 1210 -0.398626
+483 483 1
+483 990 0.00148017
+483 991 0.0189461
+483 993 0.0105013
+483 995 0.00864945
+483 996 0.0244145
+483 997 0.0214624
+483 998 0.0276966
+483 999 0.00759088
+483 1001 0.000526823
+483 1002 0.00342315
+483 1003 0.00646857
+483 1004 0.00357699
+483 1005 0.00137991
+483 1006 0.000740552
+483 1007 0.000986371
+483 1008 0.000214442
+483 1009 0.00283161
+483 1011 0.000165619
+483 1012 0.000739029
+483 1016 0.00653266
+483 1017 0.0028555
+483 1018 0.0249763
+483 1019 0.000513698
+483 1020 0.00378158
+483 1021 0.0173179
+483 1022 0.0256176
+483 1023 0.000601365
+483 1024 0.0151583
+483 1025 0.000502058
+483 1026 0.00145009
+483 1027 2.86981e-05
+483 1028 0.00188707
+483 1029 0.416338
+483 1030 0.00253879
+483 1033 0.00964145
+483 1034 0.0481027
+483 1036 0.0406019
+483 1037 0.0393131
+483 1210 -0.123599
+484 484 1
+484 990 0.00194401
+484 991 0.0248834
+484 993 0.0137921
+484 995 0.01136
+484 996 0.0320654
+484 997 0.0281882
+484 998 0.036376
+484 999 0.00996967
+484 1001 0.000691916
+484 1002 0.00449588
+484 1003 0.00849565
+484 1004 0.00469793
+484 1005 0.00181234
+484 1006 0.000972622
+484 1007 0.00129547
+484 1008 0.000281642
+484 1009 0.00371897
+484 1011 0.00021752
+484 1012 0.000970622
+484 1016 0.00857984
+484 1017 0.00375035
+484 1018 0.0328033
+484 1019 0.000674678
+484 1020 0.00496664
+484 1021 0.0227449
+484 1022 0.0336455
+484 1023 0.000789818
+484 1024 0.0199085
+484 1025 0.000659391
+484 1026 0.00190451
+484 1027 3.76913e-05
+484 1028 0.00247843
+484 1029 0.0033299
+484 1030 0.546812
+484 1033 0.0126628
+484 1034 0.0631768
+484 1036 0.0533255
+484 1037 0.0516329
+484 1210 -0.162332
+485 485 1
+485 990 0.00194401
+485 991 0.0248834
+485 993 0.0137921
+485 995 0.01136
+485 996 0.0320654
+485 997 0.0281882
+485 998 0.036376
+485 999 0.00996967
+485 1001 0.000691916
+485 1002 0.00449588
+485 1003 0.00849565
+485 1004 0.00469793
+485 1005 0.00181234
+485 1006 0.000972622
+485 1007 0.00129547
+485 1008 0.000281642
+485 1009 0.00371897
+485 1011 0.00021752
+485 1012 0.000970622
+485 1016 0.00857984
+485 1017 0.00375035
+485 1018 0.0328033
+485 1019 0.000674678
+485 1020 0.00496664
+485 1021 0.0227449
+485 1022 0.0336455
+485 1023 0.000789818
+485 1024 0.0199085
+485 1025 0.000659391
+485 1026 0.00190451
+485 1027 3.76913e-05
+485 1028 0.00247843
+485 1029 0.0033299
+485 1030 0.00333439
+485 1031 0.543478
+485 1033 0.0126628
+485 1034 0.0631768
+485 1036 0.0533255
+485 1037 0.0516329
+485 1210 -0.162332
+486 486 1
+486 990 0.00194401
+486 991 0.0248834
+486 993 0.0137921
+486 995 0.01136
+486 996 0.0320654
+486 997 0.0281882
+486 998 0.036376
+486 999 0.00996967
+486 1001 0.000691916
+486 1002 0.00449588
+486 1003 0.00849565
+486 1004 0.00469793
+486 1005 0.00181234
+486 1006 0.000972622
+486 1007 0.00129547
+486 1008 0.000281642
+486 1009 0.00371897
+486 1011 0.00021752
+486 1012 0.000970622
+486 1016 0.00857984
+486 1017 0.00375035
+486 1018 0.0328033
+486 1019 0.000674678
+486 1020 0.00496664
+486 1021 0.0227449
+486 1022 0.0336455
+486 1023 0.000789818
+486 1024 0.0199085
+486 1025 0.000659391
+486 1026 0.00190451
+486 1027 3.76913e-05
+486 1028 0.00247843
+486 1029 0.0033299
+486 1030 0.00333439
+486 1032 0.543478
+486 1033 0.0126628
+486 1034 0.0631768
+486 1036 0.0533255
+486 1037 0.0516329
+486 1210 -0.162332
+487 487 1
+487 990 0.00774125
+487 991 0.099088
+487 993 0.0549216
+487 995 0.0452365
+487 996 0.127687
+487 997 0.112248
+487 998 0.144853
+487 999 0.0397002
+487 1001 0.00275528
+487 1002 0.017903
+487 1003 0.0338305
+487 1004 0.0187076
+487 1005 0.00721692
+487 1006 0.00387308
+487 1007 0.00515871
+487 1008 0.00112153
+487 1009 0.0148093
+487 1011 0.000866184
+487 1012 0.00386511
+487 1016 0.0341658
+487 1017 0.0149343
+487 1018 0.130626
+487 1019 0.00268663
+487 1020 0.0197776
+487 1021 0.0905726
+487 1022 0.13398
+487 1023 0.00314513
+487 1024 0.0792776
+487 1025 0.00262576
+487 1026 0.00758394
+487 1027 0.000150091
+487 1028 0.00986935
+487 1029 0.01326
+487 1030 0.0132779
+487 1033 1
+487 1034 0.251576
+487 1036 0.212348
+487 1037 0.205607
+487 1210 -0.646421
+488 488 1
+488 990 0.00579536
+488 991 0.0741806
+488 993 0.0411161
+488 995 0.0338656
+488 996 0.0955911
+488 997 0.0840327
+488 998 0.108442
+488 999 0.0297209
+488 1001 0.00206269
+488 1002 0.0134028
+488 1003 0.0253267
+488 1004 0.0140051
+488 1005 0.00540282
+488 1006 0.00289951
+488 1007 0.00386198
+488 1008 0.000839612
+488 1009 0.0110867
+488 1011 0.000648454
+488 1012 0.00289355
+488 1016 0.0255776
+488 1017 0.0111803
+488 1018 0.0977909
+488 1019 0.0020113
+488 1020 0.0148062
+488 1021 0.0678057
+488 1022 0.100302
+488 1023 0.00235455
+488 1024 0.0593499
+488 1025 0.00196573
+488 1026 0.00567759
+488 1027 0.000112363
+488 1028 0.00738852
+488 1029 0.00992686
+488 1030 0.00994024
+488 1033 0.0377496
+488 1034 1
+488 1036 0.15897
+488 1037 0.153924
+488 1210 -0.483932
+489 489 1
+489 990 0.00194401
+489 991 0.0248834
+489 993 0.0137921
+489 995 0.01136
+489 996 0.0320654
+489 997 0.0281882
+489 998 0.036376
+489 999 0.00996967
+489 1001 0.000691916
+489 1002 0.00449588
+489 1003 0.00849565
+489 1004 0.00469793
+489 1005 0.00181234
+489 1006 0.000972622
+489 1007 0.00129547
+489 1008 0.000281642
+489 1009 0.00371897
+489 1011 0.00021752
+489 1012 0.000970622
+489 1016 0.00857984
+489 1017 0.00375035
+489 1018 0.0328033
+489 1019 0.000674678
+489 1020 0.00496664
+489 1021 0.0227449
+489 1022 0.0336455
+489 1023 0.000789818
+489 1024 0.0199085
+489 1025 0.000659391
+489 1026 0.00190451
+489 1027 3.76913e-05
+489 1028 0.00247843
+489 1029 0.0033299
+489 1030 0.00333439
+489 1033 0.0126628
+489 1034 0.0631768
+489 1035 0.543478
+489 1036 0.0533255
+489 1037 0.0516329
+489 1210 -0.162332
+490 490 1
+490 990 0.00262523
+490 991 0.0336029
+490 993 0.0186251
+490 995 0.0153407
+490 996 0.0433016
+490 997 0.0380658
+490 998 0.0491227
+490 999 0.0134632
+490 1001 0.000934375
+490 1002 0.00607131
+490 1003 0.0114727
+490 1004 0.00634416
+490 1005 0.00244741
+490 1006 0.00131344
+490 1007 0.00174943
+490 1008 0.000380334
+490 1009 0.00502216
+490 1011 0.000293742
+490 1012 0.00131074
+490 1016 0.0115864
+490 1017 0.00506453
+490 1018 0.0442981
+490 1019 0.000911096
+490 1020 0.00670703
+490 1021 0.0307152
+490 1022 0.0454355
+490 1023 0.00106658
+490 1024 0.0268848
+490 1025 0.000890452
+490 1026 0.00257188
+490 1027 5.0899e-05
+490 1028 0.00334691
+490 1029 0.00449675
+490 1030 0.00450281
+490 1033 0.0171001
+490 1034 0.085315
+490 1036 0.805933
+490 1037 0.0697259
+490 1210 -0.219216
+491 383 1
+491 491 -1
+492 492 1
+492 990 0.00194401
+492 991 0.0248834
+492 993 0.0137921
+492 995 0.01136
+492 996 0.0320654
+492 997 0.0281882
+492 998 0.036376
+492 999 0.00996967
+492 1001 0.000691916
+492 1002 0.00449588
+492 1003 0.00849565
+492 1004 0.00469793
+492 1005 0.00181234
+492 1006 0.000972622
+492 1007 0.00129547
+492 1008 0.000281642
+492 1009 0.00371897
+492 1011 0.00021752
+492 1012 0.000970622
+492 1016 0.00857984
+492 1017 0.00375035
+492 1018 0.0328033
+492 1019 0.000674678
+492 1020 0.00496664
+492 1021 0.0227449
+492 1022 0.0336455
+492 1023 0.000789818
+492 1024 0.0199085
+492 1025 0.000659391
+492 1026 0.00190451
+492 1027 3.76913e-05
+492 1028 0.00247843
+492 1029 0.0033299
+492 1030 0.00333439
+492 1033 0.0126628
+492 1034 0.0631768
+492 1036 0.0533255
+492 1037 0.0516329
+492 1038 0.543478
+492 1210 -0.162332
+493 493 1
+493 990 0.00194401
+493 991 0.0248834
+493 993 0.0137921
+493 995 0.01136
+493 996 0.0320654
+493 997 0.0281882
+493 998 0.036376
+493 999 0.00996967
+493 1001 0.000691916
+493 1002 0.00449588
+493 1003 0.00849565
+493 1004 0.00469793
+493 1005 0.00181234
+493 1006 0.000972622
+493 1007 0.00129547
+493 1008 0.000281642
+493 1009 0.00371897
+493 1011 0.00021752
+493 1012 0.000970622
+493 1016 0.00857984
+493 1017 0.00375035
+493 1018 0.0328033
+493 1019 0.000674678
+493 1020 0.00496664
+493 1021 0.0227449
+493 1022 0.0336455
+493 1023 0.000789818
+493 1024 0.0199085
+493 1025 0.000659391
+493 1026 0.00190451
+493 1027 3.76913e-05
+493 1028 0.00247843
+493 1029 0.0033299
+493 1030 0.00333439
+493 1033 0.0126628
+493 1034 0.0631768
+493 1036 0.0533255
+493 1037 0.0516329
+493 1039 0.543478
+493 1210 -0.162332
+494 494 1
+494 990 0.00194401
+494 991 0.0248834
+494 993 0.0137921
+494 995 0.01136
+494 996 0.0320654
+494 997 0.0281882
+494 998 0.036376
+494 999 0.00996967
+494 1001 0.000691916
+494 1002 0.00449588
+494 1003 0.00849565
+494 1004 0.00469793
+494 1005 0.00181234
+494 1006 0.000972622
+494 1007 0.00129547
+494 1008 0.000281642
+494 1009 0.00371897
+494 1011 0.00021752
+494 1012 0.000970622
+494 1016 0.00857984
+494 1017 0.00375035
+494 1018 0.0328033
+494 1019 0.000674678
+494 1020 0.00496664
+494 1021 0.0227449
+494 1022 0.0336455
+494 1023 0.000789818
+494 1024 0.0199085
+494 1025 0.000659391
+494 1026 0.00190451
+494 1027 3.76913e-05
+494 1028 0.00247843
+494 1029 0.0033299
+494 1030 0.00333439
+494 1033 0.0126628
+494 1034 0.0631768
+494 1036 0.0533255
+494 1037 0.0516329
+494 1040 0.543478
+494 1210 -0.162332
+495 1 -0.00717212
+495 2 -0.035991
+495 3 -0.00244208
+495 4 -0.0272421
+495 5 -0.00474775
+495 6 -0.000752988
+495 7 -0.00785165
+495 8 -0.00134216
+495 9 -0.120912
+495 495 -1
+495 610 1
+495 774 1.8827e-05
+495 828 -0.000106536
+496 2 -9.15722e-05
+496 3 -0.00400288
+496 4 -0.000115487
+496 5 -0.00307373
+496 6 -0.00552314
+496 7 -0.00184269
+496 8 -0.0040653
+496 10 -0.00723113
+496 33 -0.046336
+496 337 -0.595014
+496 496 -1
+496 614 1
+496 778 0.0362015
+496 832 -0.0437858
+497 11 -0.0155559
+497 12 -0.312971
+497 30 -0.350423
+497 497 -1
+497 617 1
+497 781 0.0197342
+497 835 -0.158117
+498 1 -0.00163032
+498 2 -0.00332623
+498 3 -0.00595986
+498 4 -0.00120193
+498 5 -0.00118529
+498 6 -0.00092166
+498 7 -0.00115571
+498 8 -0.000293134
+498 11 -0.0126571
+498 14 -0.00947131
+498 25 -0.0375599
+498 28 -0.025843
+498 29 -0.000101719
+498 30 -0.0260751
+498 35 -0.00220154
+498 498 -1
+498 623 1
+498 787 0.0163999
+498 841 -0.0921948
+499 12 -0.0159929
+499 348 -0.573771
+499 499 -1
+499 625 1
+500 366 -0.409032
+500 500 -1
+500 535 -0.079955
+500 643 1
+501 13 -0.000984225
+501 27 -0.000330689
+501 37 -0.00257163
+501 40 -0.00570399
+501 41 -0.00163662
+501 501 -1
+501 655 1
+502 502 1
+503 503 1
+504 504 1
+505 505 1
+506 506 1
+507 507 1
+508 508 1
+509 509 1
+510 510 1
+511 511 1
+512 512 1
+513 513 1
+514 514 1
+515 515 1
+516 516 1
+517 517 1
+518 518 1
+519 519 1
+520 520 1
+521 521 1
+522 522 1
+523 523 1
+524 524 1
+525 525 1
+526 526 1
+527 527 1
+528 528 1
+529 529 1
+530 530 1
+531 531 1
+532 532 1
+533 533 1
+534 534 1
+535 535 1
+536 536 1
+537 537 1
+538 538 1
+539 539 1
+540 540 1
+541 541 1
+542 542 1
+543 543 1
+544 544 1
+545 545 1
+546 546 1
+547 547 1
+548 548 1
+549 549 1
+550 550 1
+551 551 1
+552 552 1
+553 553 1
+554 554 1
+555 555 1
+556 556 1
+557 557 1
+558 558 1
+559 559 1
+560 560 1
+561 561 1
+562 562 1
+563 563 1
+564 564 1
+565 565 1
+566 566 1
+567 567 1
+568 568 1
+569 569 1
+570 570 1
+571 571 1
+572 572 1
+573 573 1
+574 574 1
+575 575 1
+576 576 1
+577 577 1
+578 578 1
+579 579 1
+580 580 1
+581 581 1
+582 582 1
+583 583 1
+584 584 1
+585 585 1
+586 586 1
+587 587 1
+588 588 1
+589 589 1
+590 590 1
+591 591 1
+592 592 1
+593 593 1
+594 594 1
+595 595 1
+596 596 1
+597 597 1
+598 598 1
+599 599 1
+600 600 1
+601 601 1
+602 602 1
+603 603 1
+604 604 1
+605 605 1
+606 606 1
+607 607 1
+608 608 1
+609 609 1
+610 209 -0.043952
+610 219 -0.353123
+610 229 -0.0485111
+610 239 -0.641809
+610 249 -0.166942
+610 259 -0.019262
+610 269 -0.0579069
+610 279 -0.0109584
+610 610 1
+611 1 -0.100385
+611 2 -0.417853
+611 3 -0.556539
+611 4 -0.531019
+611 5 -0.650868
+611 6 -0.611778
+611 7 -0.83542
+611 8 -0.571251
+611 9 -0.163067
+611 611 1
+611 775 0.494564
+611 829 -0.494564
+612 1 -0.0275091
+612 2 -0.102679
+612 3 -0.13331
+612 4 -0.152439
+612 5 -0.147934
+612 6 -0.152556
+612 7 -0.204032
+612 8 -0.142221
+612 10 -0.127611
+612 612 1
+612 776 0.20829
+612 830 -0.20829
+613 212 -0.000831395
+613 222 -0.0403694
+613 232 -1
+613 242 -0.148177
+613 252 -0.955922
+613 262 -1
+613 272 -0.178676
+613 282 -1
+613 613 1
+614 213 -3.90944e-05
+614 223 -0.00327899
+614 233 -0.270357
+614 243 -0.00881912
+614 253 -0.133864
+614 263 -0.435067
+614 273 -0.0482902
+614 283 -0.125001
+614 614 1
+615 1 -0.0663264
+615 2 -0.477231
+615 3 -0.887672
+615 4 -0.441584
+615 5 -0.827647
+615 6 -0.977173
+615 7 -0.306539
+615 8 -0.966324
+615 615 1
+616 215 -0.0342955
+616 225 -0.256479
+616 235 -0.74296
+616 245 -0.65619
+616 255 -0.44057
+616 265 -1
+616 275 -0.939044
+616 285 -0.249932
+616 616 1
+617 216 -0.195842
+617 226 -1
+617 236 -1
+617 246 -0.790967
+617 256 -0.435923
+617 266 -0.62518
+617 276 -0.260798
+617 286 -0.0775504
+617 617 1
+618 217 -0.0648448
+618 227 -0.828139
+618 237 -1
+618 247 -1
+618 257 -1
+618 267 -1
+618 277 -1
+618 287 -1
+618 618 1
+619 218 -0.0968963
+619 228 -0.606149
+619 238 -1
+619 248 -1
+619 258 -0.747552
+619 268 -1
+619 278 -0.786277
+619 288 -0.810728
+619 619 1
+620 1 -0.000704707
+620 2 -0.00499972
+620 3 -0.00584212
+620 4 -0.0053844
+620 5 -0.00644053
+620 6 -0.00686584
+620 7 -0.00906491
+620 8 -0.00644567
+620 33 -0.127324
+620 36 -0.207431
+620 42 -0.0281642
+620 343 -1
+620 620 1
+621 290 -1
+621 621 1
+622 291 -1
+622 622 1
+623 292 -1
+623 623 1
+624 47 -0.631786
+624 347 -0.557123
+624 624 1
+624 788 0.0238499
+624 842 -0.0238499
+625 294 -1
+625 625 1
+626 349 -1
+626 626 1
+627 1 -0.000429238
+627 2 -0.00174786
+627 3 -0.00169504
+627 4 -0.00148446
+627 5 -0.00157287
+627 6 -0.00170342
+627 7 -0.00109714
+627 8 -0.00162914
+627 18 -0.0169615
+627 33 -0.0130517
+627 42 -0.00146614
+627 46 -0.433353
+627 48 -0.1531
+627 350 -0.240523
+627 519 -0.0647246
+627 627 1
+627 791 0.0955186
+627 845 -0.0955186
+628 351 -1
+628 628 1
+629 352 -1
+629 629 1
+630 353 -1
+630 630 1
+631 354 -1
+631 631 1
+632 355 -1
+632 632 1
+633 633 1
+633 721 -0.515079
+633 797 0.495516
+633 851 -0.495516
+634 43 -0.0275948
+634 47 -0.965874
+634 634 1
+634 798 0.0718853
+634 852 -0.0718853
+635 358 -0.15926
+635 635 1
+635 718 -1
+635 719 -0.494386
+635 720 -0.0192678
+635 721 -0.026059
+635 799 0.0825456
+635 853 -0.18512
+636 636 1
+636 721 -0.758402
+636 800 0.0118052
+636 854 -0.0118052
+637 637 1
+637 721 -1
+638 47 -0.982566
+638 49 -0.0245416
+638 638 1
+638 802 0.476485
+638 856 -0.476485
+639 362 -1
+639 639 1
+640 1 -0.00580506
+640 2 -0.0183126
+640 3 -0.0239172
+640 4 -0.0141713
+640 5 -0.0128056
+640 6 -0.0125451
+640 7 -0.0147619
+640 8 -0.0113214
+640 9 -0.00367068
+640 11 -0.00641643
+640 12 -0.185604
+640 14 -0.0220699
+640 16 -0.0306262
+640 18 -0.00156647
+640 20 -0.0594226
+640 29 -0.00357042
+640 30 -0.128616
+640 41 -0.0747195
+640 42 -0.00710593
+640 47 -0.186788
+640 49 -0.0408423
+640 363 -1
+640 640 1
+640 804 0.551057
+640 858 -0.551057
+641 1 -0.00258827
+641 2 -0.00813357
+641 3 -0.0106286
+641 4 -0.00629893
+641 5 -0.00569203
+641 6 -0.00557687
+641 7 -0.00656203
+641 8 -0.00502906
+641 9 -0.0075622
+641 11 -0.000221333
+641 14 -0.0139655
+641 17 -0.00046487
+641 18 -0.00122083
+641 19 -0.000858075
+641 24 -0.00207605
+641 25 -0.0147487
+641 26 -0.00144265
+641 27 -0.0063542
+641 28 -0.00783908
+641 29 -1.49601e-05
+641 30 -0.193728
+641 31 -0.00171421
+641 32 -9.98154e-05
+641 33 -0.000649799
+641 35 -0.00116189
+641 36 -0.00904361
+641 42 -0.00713049
+641 44 -0.00597472
+641 45 -0.0215122
+641 46 -0.135842
+641 47 -0.0746321
+641 48 -0.0129928
+641 49 -0.0176689
+641 364 -1
+641 533 -0.00897368
+641 641 1
+641 805 0.0431877
+641 859 -0.0431877
+642 1 -0.0133536
+642 2 -0.0539228
+642 3 -0.0523028
+642 4 -0.0457872
+642 5 -0.0485444
+642 6 -0.0525637
+642 7 -0.0338096
+642 8 -0.0502958
+642 9 -0.0836236
+642 26 -0.0202208
+642 31 -0.0220816
+642 42 -0.0124993
+642 46 -0.0767299
+642 47 -0.189626
+642 365 -0.416647
+642 642 1
+642 806 1
+642 860 -1
+643 312 -1
+643 643 1
+644 9 -0.0223493
+644 33 -0.0851426
+644 34 -0.0420804
+644 42 -0.00521666
+644 367 -1
+644 536 -0.0809771
+644 644 1
+645 314 -1
+645 645 1
+646 14 -1
+646 30 -0.198543
+646 42 -0.0145487
+646 369 -0.380009
+646 646 1
+647 16 -0.0741007
+647 17 -0.00527914
+647 19 -0.0224621
+647 25 -0.0171311
+647 26 -0.00286847
+647 27 -0.0056189
+647 28 -0.0643283
+647 36 -0.20552
+647 37 -0.00123195
+647 46 -0.0260046
+647 47 -0.205823
+647 370 -1
+647 647 1
+647 811 0.924298
+647 865 -0.924298
+648 9 -0.028214
+648 10 -0.00649127
+648 11 -0.0121961
+648 12 -0.0163425
+648 13 -0.0169614
+648 14 -0.0138948
+648 15 -0.00119848
+648 16 -0.00834542
+648 17 -0.000262448
+648 19 -0.00183055
+648 20 -7.1814e-05
+648 23 -0.000585641
+648 24 -0.0267798
+648 25 -0.00189725
+648 26 -0.00572018
+648 27 -0.000731297
+648 28 -0.00403256
+648 29 -0.00386283
+648 30 -0.0029321
+648 31 -0.00246832
+648 32 -0.0078801
+648 33 -0.0042333
+648 34 -0.0055543
+648 35 -0.00500088
+648 36 -0.0896809
+648 37 -0.000937912
+648 38 -0.00405818
+648 39 -0.000760787
+648 41 -0.0314581
+648 42 -0.00366826
+648 43 -0.00290966
+648 44 -0.591848
+648 45 -0.632046
+648 371 -0.663585
+648 540 -0.422062
+648 648 1
+648 812 1
+648 866 -1
+649 10 -0.0113493
+649 13 -0.128858
+649 21 -0.0831146
+649 27 -0.157431
+649 38 -0.202046
+649 40 -0.0190175
+649 47 -0.231463
+649 49 -0.0343764
+649 372 -0.259163
+649 649 1
+649 813 0.57419
+649 867 -0.57419
+650 34 -0.423175
+650 39 -0.259071
+650 373 -0.232202
+650 650 1
+651 21 -0.0516462
+651 40 -0.0408238
+651 42 -0.0186745
+651 374 -1
+651 651 1
+651 815 0.0550506
+651 869 -0.0550506
+652 17 -0.00435719
+652 21 -0.0357006
+652 22 -0.00173169
+652 23 -0.00156849
+652 24 -0.0450701
+652 25 -0.0335507
+652 26 -0.000616466
+652 27 -0.00765772
+652 28 -0.00465308
+652 29 -0.000405143
+652 41 -0.412855
+652 42 -0.00188614
+652 45 -0.0788363
+652 47 -0.140973
+652 375 -0.199317
+652 652 1
+652 721 -0.036733
+652 816 0.196715
+652 870 -0.196715
+653 1 -0.00248814
+653 2 -0.0100258
+653 3 -0.00972458
+653 4 -0.00851398
+653 5 -0.00902834
+653 6 -0.00977217
+653 7 -0.00628679
+653 8 -0.00934951
+653 9 -0.0126099
+653 13 -0.213837
+653 14 -0.345077
+653 15 -0.10442
+653 16 -0.0100145
+653 21 -0.119578
+653 24 -0.0170181
+653 25 -0.00721397
+653 26 -0.00218596
+653 27 -0.029694
+653 29 -0.0148334
+653 31 -0.0136938
+653 32 -0.0615235
+653 36 -0.0102149
+653 37 -0.0945828
+653 38 -0.120758
+653 40 -0.138766
+653 41 -0.00954198
+653 43 -0.00119064
+653 45 -0.00976582
+653 47 -0.0344051
+653 376 -0.435117
+653 545 -0.835088
+653 653 1
+653 817 0.261636
+653 871 -0.261636
+654 47 -0.937186
+654 49 -0.046395
+654 654 1
+655 324 -1
+655 655 1
+656 1 -0.000369607
+656 2 -0.00143743
+656 3 -0.00142103
+656 4 -0.00122758
+656 5 -0.00130965
+656 6 -0.00140732
+656 7 -0.000969078
+656 8 -0.00134423
+656 9 -0.00229024
+656 10 -0.000921046
+656 11 -0.000281669
+656 12 -0.00489378
+656 13 -0.000505418
+656 14 -0.00148486
+656 15 -0.00047861
+656 16 -0.00121453
+656 17 -8.77156e-05
+656 18 -7.91111e-05
+656 19 -0.000181207
+656 20 -2.45968e-05
+656 21 -0.000640365
+656 22 -1.75006e-05
+656 23 -4.52409e-05
+656 24 -0.00116832
+656 25 -0.00102837
+656 26 -6.59462e-05
+656 27 -0.000372541
+656 28 -0.000883274
+656 29 -0.000200292
+656 30 -0.00679508
+656 31 -0.000171488
+656 32 -0.00166108
+656 33 -0.00203184
+656 34 -0.000624423
+656 35 -0.000232653
+656 36 -0.00359742
+656 37 -0.000299545
+656 38 -0.00033227
+656 39 -1.23049e-05
+656 40 -0.000633287
+656 41 -0.00500449
+656 42 -0.000328951
+656 43 -1.94771e-05
+656 44 -0.00106584
+656 45 -0.00205049
+656 46 -0.010015
+656 47 -0.00982627
+656 48 -0.00102855
+656 49 -0.000385279
+656 50 -0.0108448
+656 379 -1
+656 495 -0.0767144
+656 496 -0.0212555
+656 497 -0.11286
+656 498 -0.0365119
+656 499 -0.022276
+656 500 -0.0143437
+656 501 -0.16536
+656 548 -0.073728
+656 656 1
+656 718 -0.0313887
+656 719 -0.0199158
+656 720 -0.00135379
+656 721 -0.00732734
+657 1 -0.000628808
+657 2 -0.00244798
+657 3 -0.00242081
+657 4 -0.00209207
+657 5 -0.00223096
+657 6 -0.00239765
+657 7 -0.00165082
+657 8 -0.00229022
+657 9 -0.00390248
+657 10 -0.00157021
+657 11 -0.000480048
+657 12 -0.00833934
+657 13 -0.000860595
+657 14 -0.00253048
+657 15 -0.000815578
+657 16 -0.0020701
+657 17 -0.00014916
+657 18 -0.000135392
+657 19 -0.00030952
+657 20 -4.12185e-05
+657 21 -0.00108992
+657 22 -2.91832e-05
+657 23 -7.64813e-05
+657 24 -0.00199098
+657 25 -0.00175305
+657 26 -0.000111852
+657 27 -0.000634722
+657 28 -0.00150523
+657 29 -0.00034114
+657 30 -0.0115798
+657 31 -0.000292825
+657 32 -0.00282927
+657 33 -0.00346282
+657 34 -0.00106409
+657 35 -0.000396315
+657 36 -0.00612963
+657 37 -0.000510918
+657 38 -0.000565565
+657 39 -2.12053e-05
+657 40 -0.00107987
+657 41 -0.00852751
+657 42 -0.000560476
+657 43 -3.29519e-05
+657 44 -0.00181592
+657 45 -0.00349397
+657 46 -0.0170658
+657 47 -0.0167429
+657 48 -0.00175258
+657 49 -0.000656089
+657 50 -0.00899034
+657 380 -1
+657 495 -0.0722878
+657 496 -0.0200303
+657 497 -0.0072509
+657 498 -0.0344048
+657 499 -0.0209898
+657 500 -0.0135162
+657 501 -0.155817
+657 549 -0.00971781
+657 657 1
+657 718 -0.0143231
+657 719 -0.0130732
+657 720 -0.000617763
+657 721 -0.00222901
+658 658 1
+658 718 -0.271211
+658 719 -0.337126
+658 720 -0.00103203
+658 721 -0.541698
+659 1 -0.000214549
+659 2 -0.000673947
+659 3 -0.000880923
+659 4 -0.000522096
+659 5 -0.000471835
+659 6 -0.000462164
+659 7 -0.000543691
+659 8 -0.00041685
+659 9 -0.000269902
+659 10 -0.000163783
+659 11 -5.35855e-05
+659 12 -0.000687986
+659 13 -0.000118351
+659 14 -0.000234172
+659 15 -7.61505e-05
+659 16 -0.000324933
+659 17 -2.07555e-05
+659 18 -1.65276e-05
+659 19 -4.27361e-05
+659 20 -6.63044e-06
+659 21 -0.00010688
+659 22 -4.06847e-06
+659 23 -7.60296e-06
+659 24 -0.00663805
+659 25 -0.000228824
+659 26 -3.82906e-05
+659 27 -0.000101252
+659 28 -0.000176507
+659 29 -3.78828e-05
+659 30 -0.00136457
+659 31 -2.84542e-05
+659 32 -0.000231508
+659 33 -0.000321286
+659 34 -9.99274e-05
+659 35 -4.29518e-05
+659 36 -0.000600214
+659 37 -7.01323e-05
+659 38 -7.71595e-05
+659 39 -4.38389e-06
+659 40 -0.000113145
+659 41 -0.000947317
+659 42 -5.0657e-05
+659 43 -3.24702e-05
+659 44 -0.00179073
+659 45 -0.000596942
+659 46 -0.149702
+659 47 -0.0238313
+659 49 -0.00129417
+659 382 -1
+659 551 -0.00217704
+659 659 1
+660 329 -1
+660 660 1
+661 661 1
+662 554 -1
+662 662 1
+663 44 -1
+663 663 1
+664 9 -0.185612
+664 664 1
+664 774 -0.0883307
+664 828 0.499836
+665 1 -0.0100133
+665 2 -0.033639
+665 3 -0.053405
+665 4 -0.0425988
+665 5 -0.0586404
+665 6 -0.0595594
+665 7 -0.0783737
+665 8 -0.0578505
+665 665 1
+665 775 -0.454577
+665 829 0.454577
+666 1 -0.00998593
+666 2 -0.0336417
+666 3 -0.0534082
+666 4 -0.0425947
+666 5 -0.058642
+666 6 -0.0595634
+666 7 -0.0783575
+666 8 -0.057871
+666 666 1
+666 776 -0.423312
+666 830 0.423312
+667 1 -0.00546095
+667 2 -0.0183719
+667 3 -0.0291691
+667 4 -0.0232661
+667 5 -0.0320322
+667 6 -0.0325312
+667 7 -0.0428028
+667 8 -0.0316041
+667 10 -0.0408226
+667 390 -0.322186
+667 667 1
+667 777 -0.110658
+667 831 0.0962756
+668 10 -0.0447819
+668 34 -0.0273434
+668 391 -0.728481
+668 668 1
+668 778 -0.014568
+668 832 0.0176201
+669 392 -1
+669 669 1
+670 15 -0.0184055
+670 393 -0.955293
+670 670 1
+670 780 -0.0406504
+670 834 0.0406504
+671 11 -0.0329819
+671 12 -0.36665
+671 30 -0.0781541
+671 671 1
+671 781 -0.0430318
+671 835 0.344785
+672 395 -1
+672 672 1
+673 396 -1
+673 673 1
+674 397 -1
+674 674 1
+675 398 -1
+675 675 1
+676 30 -0.00115865
+676 399 -0.996946
+676 676 1
+676 786 -0.00226038
+676 840 0.00226038
+677 12 -0.203809
+677 30 -0.23224
+677 677 1
+677 787 -0.0717421
+677 841 0.403311
+678 47 -0.2755
+678 678 1
+678 788 -1
+678 842 1
+679 14 -0.136525
+679 402 -0.705534
+679 679 1
+680 403 -1
+680 680 1
+681 48 -0.0324273
+681 404 -0.867891
+681 573 -0.0660543
+681 681 1
+681 791 -0.111877
+681 845 0.111877
+682 405 -1
+682 682 1
+683 406 -1
+683 683 1
+684 19 -0.0856527
+684 407 -0.807249
+684 684 1
+685 408 -1
+685 685 1
+686 21 -0.0252783
+686 409 -0.803196
+686 686 1
+686 721 -0.0408988
+687 687 1
+687 721 -0.279656
+687 797 -0.230966
+687 851 0.230966
+688 23 -0.0345839
+688 43 -0.01221
+688 44 -0.235552
+688 411 -0.234578
+688 580 -0.125645
+688 688 1
+688 798 -0.00698937
+688 852 0.00698937
+689 412 -0.101583
+689 689 1
+689 718 -0.194968
+689 719 -0.30295
+689 720 -0.062551
+689 721 -0.0797798
+689 799 -0.12142
+689 853 0.272301
+690 25 -0.266353
+690 690 1
+690 721 -0.0707766
+690 800 -0.125441
+690 854 0.125441
+691 26 -0.278988
+691 691 1
+692 47 -0.342394
+692 692 1
+692 802 -1
+692 856 1
+693 28 -0.00459965
+693 416 -1
+693 693 1
+694 1 -0.00070602
+694 2 -0.00390846
+694 3 -0.00122137
+694 4 -0.00292794
+694 5 -0.00143536
+694 6 -0.000964824
+694 7 -0.00126083
+694 8 -0.000909832
+694 16 -0.0057344
+694 28 -0.00534589
+694 29 -0.0360672
+694 47 -0.0669903
+694 417 -0.610174
+694 694 1
+694 804 -0.0622544
+694 858 0.0622544
+695 30 -0.00575459
+695 418 -1
+695 695 1
+695 805 -0.0346067
+695 859 0.0346067
+696 1 -0.0027809
+696 2 -0.00712236
+696 3 -0.0139685
+696 4 -0.00262997
+696 5 -0.00292491
+696 6 -0.00206912
+696 7 -0.002638
+696 8 -0.000595957
+696 9 -0.0879329
+696 24 -0.00161525
+696 30 -0.0359358
+696 31 -0.0494735
+696 47 -0.00726652
+696 419 -0.145812
+696 696 1
+696 718 -0.00180659
+696 806 -0.123282
+696 860 0.123282
+697 420 -1
+697 697 1
+698 421 -1
+698 698 1
+699 10 -0.017513
+699 34 -0.00174071
+699 422 -0.935439
+699 699 1
+699 809 -0.0117259
+699 863 0.0117259
+700 35 -0.0683899
+700 423 -0.679108
+700 700 1
+701 1 -0.000238091
+701 2 -0.00137739
+701 3 -0.00202571
+701 4 -0.00107395
+701 5 -0.00107763
+701 6 -0.00106681
+701 7 -0.00119111
+701 8 -0.000920962
+701 10 -0.00743153
+701 16 -0.0569402
+701 28 -0.00813799
+701 29 -0.00318154
+701 30 -0.0128145
+701 36 -0.146798
+701 46 -0.00380063
+701 47 -0.00107909
+701 48 -0.00556261
+701 424 -0.385992
+701 701 1
+701 811 -0.660795
+701 865 0.660795
+702 9 -0.00616457
+702 10 -0.00184827
+702 11 -0.00252845
+702 12 -0.0033806
+702 13 -0.00171326
+702 14 -0.00204369
+702 15 -0.00024871
+702 16 -0.0013137
+702 17 -4.07596e-05
+702 19 -0.000316146
+702 20 -1.27113e-05
+702 23 -8.5632e-05
+702 24 -0.00397148
+702 25 -0.000328214
+702 26 -0.000993401
+702 27 -0.000111418
+702 28 -0.000716241
+702 29 -0.000706276
+702 30 -0.000597173
+702 31 -0.000407075
+702 32 -0.00190119
+702 33 -0.00109019
+702 34 -0.00159975
+702 35 -0.00103229
+702 36 -0.0156323
+702 37 -0.000102285
+702 38 -0.000436227
+702 39 -0.000203488
+702 41 -0.00793915
+702 42 -0.00090327
+702 43 -0.000393224
+702 44 -0.157159
+702 45 -0.0932584
+702 702 1
+702 812 -0.141602
+702 866 0.141602
+703 10 -0.0468272
+703 47 -0.106868
+703 426 -0.569057
+703 703 1
+703 813 -0.16254
+703 867 0.16254
+704 427 -1
+704 704 1
+705 40 -0.0261008
+705 428 -0.926579
+705 705 1
+705 815 -0.0602503
+705 869 0.0602503
+706 24 -0.0416903
+706 41 -0.0601498
+706 45 -0.0115848
+706 47 -0.0171341
+706 429 -0.0386049
+706 706 1
+706 718 -0.0796703
+706 719 -0.00893041
+706 720 -0.0197172
+706 721 -0.131418
+706 816 -0.190254
+706 870 0.190254
+707 1 -0.00164179
+707 2 -0.00256474
+707 3 -0.00309463
+707 4 -0.00174835
+707 5 -0.00185159
+707 6 -0.00183346
+707 7 -0.00175449
+707 8 -0.00184801
+707 9 -0.00888356
+707 16 -0.01382
+707 42 -0.00474013
+707 45 -0.00765647
+707 430 -0.49117
+707 599 -0.0715436
+707 707 1
+707 718 -0.0206485
+707 719 -0.0201683
+707 721 -0.055094
+707 817 -0.0444008
+707 871 0.0444008
+708 431 -1
+708 708 1
+709 432 -1
+709 709 1
+710 433 -1
+710 710 1
+711 434 -1
+711 711 1
+712 435 -1
+712 712 1
+713 436 -1
+713 713 1
+714 437 -1
+714 714 1
+715 50 -0.25
+715 715 1
+716 439 -1
+716 716 1
+717 440 -1
+717 717 1
+718 718 1
+718 722 -0.167496
+718 723 -0.167496
+718 724 -0.167496
+718 725 -0.167496
+718 726 -0.167496
+718 727 -0.167496
+718 728 -0.167496
+718 729 -0.167496
+719 719 1
+719 730 -0.184096
+719 731 -0.184096
+719 732 -0.184096
+719 733 -0.184096
+719 734 -0.175952
+719 735 -0.129094
+719 736 -0.0698886
+719 737 -0.184096
+719 738 -0.00555164
+719 739 -0.015267
+719 740 -0.0209833
+719 741 -0.0198165
+719 742 -0.056979
+719 743 -0.00676059
+719 744 -0.0086215
+719 745 -0.184096
+719 746 -0.184096
+719 747 -0.184096
+719 748 -0.0130377
+719 749 -0.134063
+719 750 -0.0953692
+719 751 -0.184096
+719 752 -0.0429227
+719 753 -0.112645
+719 754 -0.127905
+719 755 -0.184096
+719 756 -0.145356
+719 757 -0.184096
+719 758 -0.184096
+719 759 -0.0707951
+719 760 -0.0515545
+719 761 -0.051998
+719 762 -0.184096
+719 763 -0.184096
+720 720 1
+720 764 -0.088871
+720 765 -1
+721 721 1
+721 766 -0.286066
+721 767 -1
+721 768 -0.286066
+721 769 -0.858198
+721 770 -0.858198
+722 722 -1
+722 1099 1
+723 723 -1
+723 1100 1
+724 722 1
+724 723 1
+724 724 1
+724 725 1
+724 726 1
+724 727 1
+724 728 1
+724 729 1
+724 730 1
+724 731 1
+724 732 1
+724 733 1
+724 734 0.955764
+724 735 0.701235
+724 736 0.379632
+724 737 1
+724 738 0.0301562
+724 739 0.0829296
+724 740 0.11398
+724 741 0.107642
+724 742 0.309508
+724 743 0.0367232
+724 744 0.0468316
+724 745 1
+724 746 1
+724 747 1
+724 748 0.0708203
+724 749 0.728222
+724 750 0.518041
+724 751 1
+724 752 0.233154
+724 753 0.611884
+724 754 0.694775
+724 755 1
+724 756 0.789564
+724 757 1
+724 758 1
+724 759 0.384556
+724 760 0.280042
+724 761 0.282451
+724 762 1
+724 763 1
+724 764 0.0228823
+724 765 0.257478
+724 1095 1
+724 1096 1
+724 1097 0.246717
+724 1211 -1
+725 725 -1
+725 1102 1
+726 726 -1
+726 1103 1
+727 727 -1
+727 1104 1
+728 728 -1
+728 1105 1
+729 729 -1
+729 1106 1
+730 730 -1
+730 1107 1
+731 731 -1
+731 1108 1
+732 732 -1
+732 1109 1
+733 733 -1
+733 1110 1
+734 734 -1
+734 1111 1
+735 735 -1
+735 1112 1
+736 736 -1
+736 1113 1
+737 737 -1
+737 1114 1
+738 738 -1
+738 1115 1
+739 739 -1
+739 1116 1
+740 740 -1
+740 1117 1
+741 741 -1
+741 1118 1
+742 742 -1
+742 1119 1
+743 743 -1
+743 1120 1
+744 744 -1
+744 1121 1
+745 745 -1
+745 1122 1
+746 746 -1
+746 1123 1
+747 747 -1
+747 1124 1
+748 748 -1
+748 1125 1
+749 749 -1
+749 1126 1
+750 750 -1
+750 1127 1
+751 751 -1
+751 1128 1
+752 752 -1
+752 1129 1
+753 753 -1
+753 1130 1
+754 754 -1
+754 1131 1
+755 755 -1
+755 1132 1
+756 756 -1
+756 1133 1
+757 757 -1
+757 1134 1
+758 758 -1
+758 1135 1
+759 759 -1
+759 1136 1
+760 760 -1
+760 1137 1
+761 761 -1
+761 1138 1
+762 762 -1
+762 1139 1
+763 763 -1
+763 1140 1
+764 764 -1
+764 1141 1
+765 765 -1
+765 1142 1
+766 766 1
+767 767 1
+768 768 1
+769 769 1
+770 770 1
+771 771 1
+772 772 1
+773 773 -1
+773 1150 1
+774 774 -1
+774 820 -0.0570765
+774 821 -0.298217
+774 1041 1
+775 4 -1
+775 240 1
+775 774 0.531703
+775 775 -1
+775 776 0.110367
+775 777 -0.0473296
+775 778 -0.0369362
+775 779 -1
+775 780 -0.515874
+775 781 0.0321273
+775 782 0.236764
+775 783 0.112054
+776 4 -0.463593
+776 241 1
+776 774 0.0975633
+776 775 0.038386
+776 776 -1
+776 777 0.00181679
+776 778 0.00141783
+776 779 0.038386
+776 780 0.0198023
+776 781 0.00589511
+776 782 0.0434444
+776 783 0.0205611
+777 1 -0.00783667
+777 2 -0.0315317
+777 3 -0.086087
+777 4 -0.03693
+777 5 -0.0850945
+777 6 -0.0781598
+777 7 -0.0735158
+777 8 -0.137967
+777 9 -0.286725
+777 10 -0.55176
+777 336 -0.173423
+777 613 1
+777 777 1
+777 831 -0.87003
+778 778 -1
+778 820 -0.0121988
+778 821 -0.063741
+778 1045 1
+779 3 -0.905539
+779 234 1
+779 774 -0.088234
+779 775 -0.253927
+779 776 -0.10393
+779 777 -0.283832
+779 778 -0.637483
+779 779 -1
+779 780 -0.328839
+779 781 0.0609147
+779 782 0.400496
+779 783 0.943797
+780 339 1
+780 447 -1
+780 780 1
+780 834 -1
+781 781 -1
+781 820 -0.316005
+781 821 -0.112573
+781 1048 1
+782 782 -1
+782 836 -0.00668143
+782 995 1
+783 342 1
+783 450 -1
+783 783 1
+783 837 -1
+784 774 -0.227297
+784 775 -0.0224543
+784 777 -0.0532907
+784 782 -0.00909488
+784 784 1
+784 804 -0.000435337
+784 805 -0.0519988
+784 806 -0.00614166
+784 808 -0.0658373
+784 812 -0.00505009
+784 817 -0.00558911
+784 820 -0.0032032
+784 821 -0.0302643
+784 823 -0.00234384
+784 828 -0.000420836
+784 860 -0.0523853
+784 866 -0.00779233
+784 871 -0.0232019
+784 882 -0.00749475
+784 891 -0.223529
+785 344 1
+785 452 -1
+785 785 1
+785 839 -1
+786 345 1
+786 453 -1
+786 786 1
+786 840 -1
+787 787 -1
+787 820 -0.0567035
+787 821 -0.296266
+787 1054 1
+788 788 1
+788 812 -0.0071366
+788 813 -0.089245
+788 817 -0.222797
+788 819 -0.012088
+788 820 -0.00166169
+788 821 -0.0156886
+788 823 -0.00241595
+788 866 -0.00509077
+788 882 -0.0477294
+788 895 -0.0563431
+789 789 -1
+789 820 -0.0380628
+789 821 -0.198866
+789 1056 1
+790 780 -0.350648
+790 790 1
+790 809 -0.041368
+790 812 -0.000693291
+790 817 -0.149576
+790 820 -0.00216339
+790 821 -0.0204412
+790 823 -0.00213719
+790 834 -0.0930398
+790 866 -0.00101603
+790 882 -0.0508115
+790 897 -0.117295
+791 791 1
+791 804 -0.00199658
+791 811 -0.151638
+791 812 -0.0008211
+791 817 -0.00243991
+791 820 -0.000933739
+791 821 -0.00882461
+791 823 -0.00155106
+791 858 -0.00330908
+791 865 -0.162985
+791 866 -0.000912802
+791 871 -0.0198407
+791 882 -0.0353037
+791 898 -0.243081
+792 792 1
+792 805 -0.0543641
+792 811 -0.334248
+792 812 -0.000798938
+792 816 -0.353309
+792 820 -0.00208649
+792 821 -0.0196734
+792 823 -0.00306542
+792 866 -0.000876255
+792 882 -0.0699388
+792 899 -0.362898
+793 791 -0.48861
+793 793 1
+793 804 -0.00294179
+793 805 -0.132927
+793 820 -0.00175207
+793 821 -0.0166263
+793 823 -0.0022727
+793 882 -0.0562448
+793 900 -0.167458
+794 794 1
+794 805 -0.0241374
+794 811 -0.342092
+794 812 -0.00134041
+794 820 -0.00103681
+794 821 -0.00981976
+794 823 -0.00151823
+794 848 -0.0495994
+794 866 -0.00163483
+794 882 -0.0400145
+794 901 -0.182176
+795 795 1
+795 804 -0.330107
+795 812 -0.000602097
+795 820 -0.00161141
+795 821 -0.0149729
+795 823 -0.00269704
+795 866 -0.000752621
+795 882 -0.0853244
+795 902 -0.365306
+796 796 1
+796 813 -0.0487057
+796 815 -0.0455313
+796 816 -0.338542
+796 817 -0.105416
+796 820 -0.00178138
+796 821 -0.0168117
+796 823 -0.00184605
+796 850 -0.0168937
+796 882 -0.0263379
+796 903 -0.113379
+797 797 1
+797 816 -0.396429
+797 820 -0.00117527
+797 821 -0.0108669
+797 823 -0.00169643
+797 882 -0.0411835
+797 904 -0.128214
+798 798 1
+798 812 -0.00264334
+798 816 -0.188573
+798 820 -0.00159559
+798 821 -0.0149566
+798 823 -0.00166491
+798 852 -0.355678
+798 866 -0.00272953
+798 882 -0.0158978
+798 905 -0.233614
+799 799 1
+799 805 -0.0219854
+799 812 -0.00738232
+799 816 -0.330943
+799 817 -0.011617
+799 820 -0.00251663
+799 821 -0.0237798
+799 823 -0.0887798
+799 860 -0.001482
+799 866 -0.00773159
+799 870 -0.316522
+799 882 -0.0858839
+799 906 -0.360928
+800 787 -0.0239939
+800 800 1
+800 805 -0.0719362
+800 811 -0.045238
+800 812 -0.000240882
+800 816 -0.113465
+800 817 -0.00226805
+800 820 -0.00102024
+800 821 -0.00964347
+800 823 -0.00140952
+800 854 -0.0250664
+800 866 -0.000294286
+800 882 -0.0362237
+800 907 -0.15676
+801 801 1
+801 805 -0.0418702
+801 806 -0.0062684
+801 811 -0.0450736
+801 812 -0.0043216
+801 816 -0.0124057
+801 817 -0.00408953
+801 820 -0.000389309
+801 821 -0.0036613
+801 823 -0.00140351
+801 855 -0.00976003
+801 866 -0.00530017
+801 882 -0.0428837
+801 908 -0.365606
+802 802 1
+802 805 -0.0800279
+802 811 -0.038314
+802 812 -0.000239752
+802 813 -0.0849581
+802 816 -0.0668725
+802 817 -0.0241065
+802 819 -0.00316462
+802 820 -0.000954361
+802 821 -0.00901591
+802 823 -0.00161049
+802 866 -0.000257961
+802 882 -0.043122
+802 909 -0.164988
+803 787 -0.0351324
+803 803 1
+803 805 -0.0813666
+803 811 -0.361501
+803 812 -0.00108956
+803 816 -0.033488
+803 820 -0.00186481
+803 821 -0.0176209
+803 823 -0.00231376
+803 857 -0.0012968
+803 858 -0.00847154
+803 865 -0.063969
+803 866 -0.00136666
+803 882 -0.0511047
+803 910 -0.281738
+804 787 -0.000775186
+804 804 1
+804 805 -0.000870471
+804 809 -0.032898
+804 812 -0.00585078
+804 816 -0.0163454
+804 817 -0.0556347
+804 820 -0.0023705
+804 821 -0.0223871
+804 823 -0.0027838
+804 858 -0.3204
+804 865 -0.140193
+804 866 -0.00755463
+804 882 -0.0429508
+804 911 -0.512246
+805 781 -0.031573
+805 783 -0.0196137
+805 786 -0.00904428
+805 787 -0.00423573
+805 804 -0.00275136
+805 805 1
+805 810 -0.0113046
+805 812 -9.46642e-05
+805 820 -0.00171424
+805 821 -0.0161981
+805 823 -0.00213743
+805 835 -0.0258741
+805 840 -0.000278373
+805 841 -0.048481
+805 859 -0.00890702
+805 860 -0.00386152
+805 865 -0.0120362
+805 866 -0.000136157
+805 882 -0.0521273
+805 912 -0.326332
+806 805 -0.164118
+806 806 1
+806 812 -0.00615151
+806 817 -0.0845086
+806 820 -0.00333951
+806 821 -0.0316187
+806 823 -0.00344044
+806 860 -0.410371
+806 866 -0.00716448
+806 882 -0.049657
+806 913 -0.372972
+807 807 -1
+807 820 -0.083744
+807 821 -0.437556
+807 1074 1
+808 778 -0.0419002
+808 780 -0.167184
+808 782 -0.0201416
+808 784 -0.0472442
+808 791 -0.00881034
+808 805 -0.00165791
+808 808 1
+808 809 -0.01224
+808 812 -0.000281159
+808 820 -0.00105446
+808 821 -0.00996459
+808 823 -0.00103527
+808 866 -0.000511334
+808 882 -0.0236439
+808 915 -0.141515
+809 368 1
+809 476 -1
+809 809 1
+809 863 -1
+810 783 -0.110782
+810 787 -0.00628098
+810 805 -0.0253095
+810 810 1
+810 812 -0.00283565
+810 820 -0.00103082
+810 821 -0.0097365
+810 823 -0.00118161
+810 864 -0.0667206
+810 866 -0.00413368
+810 882 -0.0150035
+810 917 -0.293105
+811 784 -0.0701326
+811 805 -0.0210249
+811 811 1
+811 812 -0.00542725
+811 817 -0.00153077
+811 820 -0.00170114
+811 821 -0.0160721
+811 823 -0.00176228
+811 865 -0.258453
+811 866 -0.00668087
+811 882 -0.0242118
+811 918 -0.237564
+812 811 -0.0256471
+812 812 1
+812 817 -0.234432
+812 819 -0.0751362
+812 820 -0.00234282
+812 821 -0.0221572
+812 823 -0.00340576
+812 866 -0.000723022
+812 882 -0.024328
+812 919 -0.60686
+813 812 -0.00308965
+813 813 1
+813 817 -0.227662
+813 820 -0.00197669
+813 821 -0.018656
+813 823 -0.00285007
+813 866 -0.00234542
+813 882 -0.0388854
+813 920 -0.290604
+814 812 -0.00882476
+814 814 1
+814 820 -0.00111529
+814 821 -0.0106572
+814 823 -0.00246711
+814 866 -0.016669
+814 882 -0.120693
+814 921 -0.629187
+815 813 -0.0131791
+815 815 1
+815 817 -0.144666
+815 819 -0.070097
+815 820 -0.00208333
+815 821 -0.0196977
+815 823 -0.00231106
+815 869 -0.0248633
+815 882 -0.02993
+815 922 -0.130932
+816 804 -0.00316559
+816 812 -0.00201144
+816 816 1
+816 817 -0.00151081
+816 819 -0.00305462
+816 820 -0.00250037
+816 821 -0.0236241
+816 823 -0.00293872
+816 866 -0.00358492
+816 870 -0.105924
+816 882 -0.0562958
+816 923 -0.189938
+817 782 -0.0428666
+817 784 -0.0635426
+817 791 -0.00601768
+817 804 -0.00190138
+817 805 -0.11062
+817 806 -0.00207115
+817 808 -0.0346713
+817 809 -0.0117254
+817 810 -0.0103615
+817 812 -0.00148137
+817 815 -0.0186752
+817 816 -0.0202888
+817 817 1
+817 820 -0.00103801
+817 821 -0.00980655
+817 823 -0.000992499
+817 866 -0.00257603
+817 871 -0.0279316
+817 882 -0.008022
+817 924 -0.258477
+818 798 -0.0139765
+818 812 -0.00665203
+818 817 -0.00674046
+818 818 1
+818 820 -0.000347941
+818 821 -0.00326399
+818 823 -0.00360152
+818 852 -0.0636048
+818 866 -0.00634868
+818 882 -0.131949
+818 925 -0.460003
+819 819 -1
+819 820 -0.0442657
+819 821 -0.23128
+819 1086 1
+820 805 -0.105978
+820 812 -0.0810533
+820 816 -0.269294
+820 817 -0.00310118
+820 820 1
+820 821 -0.0194133
+820 823 -0.00371401
+820 866 -0.084458
+820 870 -0.0409159
+820 871 -0.014327
+820 882 -0.0517426
+820 927 -0.935919
+821 791 -0.103422
+821 805 -0.122536
+821 806 -0.000739276
+821 811 -0.0127001
+821 820 -0.00183754
+821 821 1
+821 823 -0.170543
+821 865 -0.00259631
+821 882 -0.0865469
+821 928 -0.82597
+822 788 -0.112262
+822 798 -0.0159983
+822 802 -0.224073
+822 804 -0.00925309
+822 805 -0.214353
+822 806 -0.00581718
+822 811 -0.320054
+822 813 -0.0284852
+822 816 -0.280741
+822 817 -0.0063696
+822 818 -0.0306488
+822 820 -0.00574051
+822 821 -0.0542349
+822 822 1
+822 823 -0.0864428
+822 842 -0.00116754
+822 856 -0.0372051
+822 858 -0.0293749
+822 860 -0.00180819
+822 865 -0.0023471
+822 867 -0.0464603
+822 870 -0.0352807
+822 882 -0.134327
+822 929 -0.447891
+823 791 -0.032073
+823 805 -0.0102879
+823 820 -0.000165656
+823 821 -0.00156512
+823 823 1
+823 845 -0.00579992
+823 865 -0.00333559
+823 882 -0.176979
+823 930 -0.0352309
+824 824 -1
+824 1037 1
+825 820 -0.00218877
+825 821 -0.010061
+825 825 1
+825 879 -0.970874
+826 826 1
+826 882 -0.18584
+827 827 1
+827 882 -0.18584
+828 828 1
+829 829 1
+830 830 1
+831 831 1
+832 832 1
+833 833 1
+834 834 1
+835 835 1
+836 836 1
+837 837 1
+838 838 1
+839 839 1
+840 840 1
+841 841 1
+842 842 1
+843 843 1
+844 844 1
+845 845 1
+846 846 1
+847 847 1
+848 848 1
+849 849 1
+850 850 1
+851 851 1
+852 852 1
+853 853 1
+854 854 1
+855 855 1
+856 856 1
+857 857 1
+858 858 1
+859 859 1
+860 860 1
+861 861 1
+862 862 1
+863 863 1
+864 864 1
+865 865 1
+866 866 1
+867 867 1
+868 868 1
+869 869 1
+870 870 1
+871 871 1
+872 872 1
+873 873 1
+874 874 1
+875 875 1
+876 876 1
+877 877 1
+878 878 1
+879 879 1
+880 880 1
+881 881 1
+882 882 1
+882 1208 -1
+883 774 0.0673987
+883 775 -0.0320654
+883 776 -0.0129695
+883 777 -0.00349137
+883 778 8.46554e-05
+883 779 -0.0146927
+883 780 0.0129387
+883 781 0.243718
+883 782 0.0491226
+883 783 0.180055
+883 784 -0.00175565
+883 787 -0.00365789
+883 791 -0.00194543
+883 804 -0.00171521
+883 805 -0.0443389
+883 806 -0.00244335
+883 817 -0.00274749
+883 820 -0.00128787
+883 821 -0.012149
+883 823 -0.00464172
+883 829 -0.00373727
+883 830 -0.0048465
+883 831 -0.0198809
+883 858 -0.00184652
+883 860 -0.00412738
+883 865 -0.00308879
+883 871 -0.0106828
+883 882 -0.155009
+883 883 -1
+883 935 -0.380344
+884 774 0.39483
+884 775 -0.0325507
+884 776 -0.0398055
+884 777 -0.00354224
+884 778 0.00568179
+884 779 -0.0221155
+884 780 0.0834861
+884 781 0.390935
+884 782 0.159594
+884 783 0.341298
+884 784 -0.00785409
+884 787 -0.00470577
+884 791 -0.00499509
+884 804 -0.00341178
+884 805 -0.087857
+884 806 -0.00622128
+884 817 -0.00698071
+884 820 -0.00315821
+884 821 -0.0298229
+884 823 -0.00919386
+884 829 -0.00791664
+884 830 -0.0102953
+884 831 -0.0421738
+884 858 -0.00644557
+884 860 -0.00666549
+884 865 -0.0112674
+884 871 -0.0105228
+884 882 -0.444328
+884 884 -1
+884 936 -1
+885 774 0.0434937
+885 775 -0.00180072
+885 776 -0.043605
+885 777 0.137444
+885 778 0.361109
+885 779 0.011781
+885 780 0.186287
+885 781 1
+885 782 0.424346
+885 783 1
+885 784 -0.0116955
+885 787 -0.0107452
+885 791 -0.00617329
+885 804 -0.00567858
+885 805 -0.146309
+885 806 -0.0076901
+885 817 -0.00862882
+885 820 -0.00397885
+885 821 -0.0375839
+885 823 -0.0153148
+885 829 -0.0160169
+885 830 -0.0208289
+885 831 -0.0853318
+885 858 -0.00256686
+885 860 -0.0166593
+885 865 -0.0211176
+885 871 -0.0161807
+885 882 -0.990723
+885 885 -1
+885 937 -1
+886 774 1
+886 775 0.275619
+886 776 0.525771
+886 777 -0.00147904
+886 778 0.0205061
+886 779 -0.033057
+886 780 0.286434
+886 781 0.399051
+886 782 0.665142
+886 783 0.799471
+886 784 -0.0229804
+886 787 -0.00461986
+886 791 -0.0115259
+886 804 -0.00717314
+886 805 -0.184855
+886 806 -0.0143523
+886 817 -0.0161058
+886 820 -0.00732778
+886 821 -0.0692449
+886 823 -0.0193505
+886 829 -0.0272374
+886 830 -0.0354148
+886 831 -0.145105
+886 858 -0.0131186
+886 860 -0.00668696
+886 865 -0.0238682
+886 871 -0.0194889
+886 882 -1
+886 886 -1
+886 938 -1
+887 774 0.0968101
+887 775 0.113032
+887 776 -0.0521862
+887 777 0.147479
+887 778 0.321465
+887 779 0.00514601
+887 780 0.19861
+887 781 0.230956
+887 782 0.656062
+887 783 0.591396
+887 784 -0.0151426
+887 787 -0.00250976
+887 791 -0.0067276
+887 804 -0.00357076
+887 805 -0.0920222
+887 806 -0.00838255
+887 817 -0.00940847
+887 820 -0.00430665
+887 821 -0.0406784
+887 823 -0.00963369
+887 829 -0.020655
+887 830 -0.0268595
+887 831 -0.110054
+887 858 -0.00354281
+887 860 -0.00409686
+887 865 -0.0131936
+887 871 -0.0113701
+887 882 -0.518873
+887 887 -1
+887 939 -1
+888 774 0.0288711
+888 775 -0.0445302
+888 776 -0.0528477
+888 777 0.240995
+888 778 0.952124
+888 779 0.0286735
+888 780 1
+888 781 0.295554
+888 782 1
+888 783 0.821927
+888 784 -0.0261838
+888 787 -0.00316547
+888 791 -0.0118181
+888 804 -0.00567406
+888 805 -0.146243
+888 806 -0.0147225
+888 817 -0.0165181
+888 820 -0.00750648
+888 821 -0.0709115
+888 823 -0.0153059
+888 829 -0.0340281
+888 830 -0.0442515
+888 831 -0.181291
+888 858 -0.00386272
+888 860 -0.00470093
+888 865 -0.0211857
+888 871 -0.018262
+888 882 -0.824475
+888 888 -1
+888 940 -0.958277
+889 774 0.14049
+889 775 -0.0758823
+889 776 -0.0971732
+889 777 0.0223631
+889 778 0.177537
+889 779 -0.00509654
+889 780 0.648092
+889 781 0.20765
+889 782 0.680314
+889 783 0.934805
+889 784 -0.0202813
+889 787 -0.00232868
+889 791 -0.0044656
+889 804 -0.00391702
+889 805 -0.100952
+889 806 -0.00555559
+889 817 -0.00623437
+889 820 -0.00303247
+889 821 -0.0286434
+889 823 -0.0105635
+889 829 -0.0262695
+889 830 -0.0341526
+889 831 -0.13994
+889 858 -0.00296139
+889 860 -0.00351614
+889 865 -0.0138772
+889 871 -0.0102523
+889 882 -0.661614
+889 889 -1
+889 941 -1
+890 774 0.0241641
+890 775 -0.0923294
+890 776 -0.0795351
+890 777 0.283251
+890 778 0.419097
+890 779 0.025
+890 780 0.157303
+890 781 0.056133
+890 782 0.51795
+890 783 0.876248
+890 784 -0.0145279
+890 787 -0.000595015
+890 791 -0.00668003
+890 804 -0.00302631
+890 805 -0.0779409
+890 806 -0.00832573
+890 817 -0.00934014
+890 820 -0.00423752
+890 821 -0.0400314
+890 823 -0.00815897
+890 829 -0.0195339
+890 830 -0.0254099
+890 831 -0.104091
+890 858 -0.00215278
+890 860 -0.000800215
+890 865 -0.0108091
+890 871 -0.0108786
+890 882 -0.416147
+890 890 -1
+890 942 -0.662071
+891 9 -1
+891 113 1
+891 882 -0.09292
+891 891 1
+892 776 -0.0882331
+892 777 -0.179093
+892 778 -0.0307757
+892 780 -0.0995916
+892 785 1
+892 809 -0.0293575
+892 812 -0.00202911
+892 813 -0.00583967
+892 820 -0.00224971
+892 821 -0.0212662
+892 823 -0.0024839
+892 831 -0.104179
+892 832 -0.572844
+892 863 -0.0391493
+892 865 -0.0675828
+892 866 -0.00408012
+892 867 -0.0851166
+892 882 -0.0234699
+892 892 -1
+893 781 -0.0746911
+893 786 1
+893 787 -0.109568
+893 804 -0.0073147
+893 805 -0.014629
+893 812 -0.0209835
+893 820 -0.00378675
+893 821 -0.0357848
+893 823 -0.00447294
+893 835 -0.581887
+893 866 -0.0307216
+893 882 -0.0731165
+893 893 -1
+894 781 -0.173591
+894 783 -0.394282
+894 787 1
+894 789 -0.0145358
+894 804 -0.0244421
+894 812 -0.00324806
+894 820 -0.00760011
+894 821 -0.0718116
+894 823 -0.00663397
+894 835 -0.747246
+894 841 -0.261913
+894 866 -0.00474494
+894 882 -0.0548233
+894 894 -1
+895 13 -1
+895 117 1
+895 882 -0.09292
+895 895 1
+896 783 -0.0162646
+896 787 -0.0262557
+896 789 1
+896 804 -0.00805683
+896 805 -0.295588
+896 810 -0.971656
+896 812 -0.00765544
+896 817 -0.470795
+896 820 -0.00639254
+896 821 -0.0604058
+896 823 -0.00625952
+896 843 -0.131442
+896 866 -0.00795178
+896 882 -0.0335031
+896 896 -1
+897 15 -1
+897 119 1
+897 882 -0.09292
+897 897 1
+898 16 -1
+898 120 1
+898 882 -0.09292
+898 898 1
+899 17 -1
+899 121 1
+899 882 -0.09292
+899 899 1
+900 18 -1
+900 122 1
+900 882 -0.09292
+900 900 1
+901 19 -1
+901 123 1
+901 882 -0.09292
+901 901 1
+902 20 -1
+902 124 1
+902 882 -0.09292
+902 902 1
+903 21 -1
+903 125 1
+903 882 -0.09292
+903 903 1
+904 22 -1
+904 126 1
+904 882 -0.09292
+904 904 1
+905 23 -1
+905 127 1
+905 882 -0.09292
+905 905 1
+906 24 -1
+906 128 1
+906 882 -0.09292
+906 906 1
+907 25 -1
+907 129 1
+907 882 -0.09292
+907 907 1
+908 26 -1
+908 130 1
+908 882 -0.09292
+908 908 1
+909 27 -1
+909 131 1
+909 882 -0.09292
+909 909 1
+910 28 -1
+910 132 1
+910 882 -0.09292
+910 910 1
+911 29 -1
+911 133 1
+911 882 -0.09292
+911 911 1
+912 30 -1
+912 134 1
+912 882 -0.09292
+912 912 1
+913 31 -1
+913 135 1
+913 882 -0.09292
+913 913 1
+914 782 -0.637004
+914 783 -0.874071
+914 805 -0.00286504
+914 807 1
+914 809 -0.0992694
+914 812 -0.0058878
+914 817 -0.113831
+914 820 -0.009698
+914 821 -0.0915909
+914 823 -0.00839219
+914 866 -0.0100318
+914 882 -0.120942
+914 914 -1
+915 33 -1
+915 137 1
+915 882 -0.09292
+915 915 1
+916 780 -0.374713
+916 782 -0.117704
+916 808 -0.499641
+916 809 1
+916 812 -0.00400712
+916 814 -0.0355228
+916 820 -0.00352007
+916 821 -0.0332611
+916 823 -0.00349765
+916 832 -0.807261
+916 863 -0.00898083
+916 866 -0.00815054
+916 882 -0.0488219
+916 916 -1
+917 35 -1
+917 139 1
+917 882 -0.09292
+917 917 1
+918 36 -1
+918 140 1
+918 882 -0.09292
+918 918 1
+919 37 -1
+919 141 1
+919 882 -0.09292
+919 919 1
+920 38 -1
+920 142 1
+920 882 -0.09292
+920 920 1
+921 39 -1
+921 143 1
+921 882 -0.09292
+921 921 1
+922 40 -1
+922 144 1
+922 882 -0.09292
+922 922 1
+923 41 -1
+923 145 1
+923 882 -0.09292
+923 923 1
+924 42 -1
+924 146 1
+924 882 -0.09292
+924 924 1
+925 43 -1
+925 147 1
+925 882 -0.09292
+925 925 1
+926 805 -0.0157263
+926 812 -0.0405516
+926 819 1
+926 820 -0.000570637
+926 821 -0.00539078
+926 823 -0.00595274
+926 827 -0.572088
+926 852 -0.0367744
+926 866 -0.0760444
+926 882 -0.18003
+926 926 -1
+927 45 -1
+927 149 1
+927 882 -0.09292
+927 927 1
+928 46 -1
+928 150 1
+928 882 -0.09292
+928 928 1
+929 47 -1
+929 151 1
+929 882 -0.09292
+929 929 1
+930 48 -1
+930 152 1
+930 882 -0.09292
+930 930 1
+931 802 -0.00164851
+931 804 -0.00059595
+931 805 -0.0149477
+931 813 -0.00124612
+931 818 -0.000446911
+931 820 -6.62979e-05
+931 821 -0.000626001
+931 823 -0.00138272
+931 824 0.515464
+931 931 -1
+932 50 -1
+932 154 1
+932 882 -0.09292
+932 932 1
+933 51 -1
+933 155 1
+933 882 -0.09292
+933 933 1
+934 52 -1
+934 156 1
+934 882 -0.09292
+934 934 1
+935 1 -1
+935 157 1
+935 882 -0.0272284
+935 883 -0.175656
+935 935 1
+936 2 -1
+936 158 1
+936 882 -0.0691867
+936 884 -0.155711
+936 936 1
+937 3 -0.755043
+937 159 1
+937 882 -0.0895231
+937 885 -0.0903614
+937 937 1
+938 4 -0.856188
+938 160 1
+938 882 -0.127341
+938 886 -0.127341
+938 938 1
+939 5 -0.510347
+939 161 1
+939 882 -0.0412913
+939 887 -0.0795787
+939 939 1
+940 6 -0.248166
+940 162 1
+940 882 -0.0373542
+940 888 -0.0453067
+940 940 1
+941 7 -0.745588
+941 163 1
+941 882 -0.0401927
+941 889 -0.0607495
+941 941 1
+942 8 -0.64291
+942 164 1
+942 882 -0.0359262
+942 890 -0.0863306
+942 942 1
+943 9 -0.185612
+943 165 1
+943 882 -0.017247
+943 891 -0.514388
+943 943 1
+944 10 -0.310143
+944 166 1
+944 882 -0.00936862
+944 892 -0.399175
+944 944 1
+945 11 -0.342239
+945 167 1
+945 882 -0.0204593
+945 893 -0.279818
+945 945 1
+946 12 -0.525587
+946 168 1
+946 882 -0.020688
+946 894 -0.377357
+946 946 1
+947 13 -0.450577
+947 169 1
+947 882 -0.0418676
+947 895 -0.0494234
+947 947 1
+948 14 -0.396895
+948 170 1
+948 882 -0.0123123
+948 896 -0.367496
+948 948 1
+949 15 -0.411692
+949 171 1
+949 882 -0.0382544
+949 897 -0.0883079
+949 949 1
+950 16 -0.426883
+950 172 1
+950 882 -0.0396659
+950 898 -0.273117
+950 950 1
+951 17 -0.337349
+951 173 1
+951 882 -0.0313465
+951 899 -0.162651
+951 951 1
+952 18 -0.39165
+952 174 1
+952 882 -0.0363921
+952 900 -0.10835
+952 952 1
+953 19 -0.421632
+953 175 1
+953 882 -0.0391781
+953 901 -0.178368
+953 953 1
+954 20 -0.357698
+954 176 1
+954 882 -0.0332373
+954 902 -0.142301
+954 954 1
+955 21 -0.5
+955 177 1
+955 882 -0.04646
+955 903 -0.2
+955 955 1
+956 22 -0.542937
+956 178 1
+956 882 -0.0504497
+956 904 -0.157063
+956 956 1
+957 23 -0.253654
+957 179 1
+957 882 -0.0235695
+957 905 -0.346346
+957 957 1
+958 24 -0.431501
+958 180 1
+958 882 -0.040095
+958 906 -0.168499
+958 958 1
+959 25 -0.356604
+959 181 1
+959 882 -0.0331356
+959 907 -0.143396
+959 959 1
+960 26 -0.278988
+960 182 1
+960 882 -0.0259236
+960 908 -0.221012
+960 960 1
+961 27 -0.368863
+961 183 1
+961 882 -0.0342747
+961 909 -0.131137
+961 961 1
+962 28 -0.396756
+962 184 1
+962 882 -0.0368666
+962 910 -0.203244
+962 962 1
+963 29 -0.23717
+963 185 1
+963 882 -0.0220378
+963 911 -0.26283
+963 963 1
+964 30 -0.379337
+964 186 1
+964 882 -0.035248
+964 912 -0.220663
+964 964 1
+965 31 -0.294478
+965 187 1
+965 882 -0.0273629
+965 913 -0.205522
+965 965 1
+966 32 -1
+966 188 1
+966 882 -0.0367834
+966 914 -0.304139
+966 966 1
+967 33 -0.449828
+967 189 1
+967 882 -0.041798
+967 915 -0.250172
+967 967 1
+968 34 -0.215086
+968 190 1
+968 882 -0.0160028
+968 916 -0.327779
+968 968 1
+969 35 -0.213124
+969 191 1
+969 882 -0.0198035
+969 917 -0.386876
+969 969 1
+970 36 -0.313853
+970 192 1
+970 882 -0.0291632
+970 918 -0.286147
+970 970 1
+971 37 -0.150698
+971 193 1
+971 882 -0.0140029
+971 919 -0.349302
+971 971 1
+972 38 -0.295086
+972 194 1
+972 882 -0.0274194
+972 920 -0.204914
+972 972 1
+973 39 -0.336836
+973 195 1
+973 882 -0.0312988
+973 921 -0.163164
+973 973 1
+974 40 -0.355496
+974 196 1
+974 882 -0.0330327
+974 922 -0.144504
+974 974 1
+975 41 -0.380661
+975 197 1
+975 882 -0.035371
+975 923 -0.119339
+975 975 1
+976 42 -0.150226
+976 198 1
+976 882 -0.013959
+976 924 -0.449774
+976 976 1
+977 43 -0.37766
+977 199 1
+977 882 -0.0350922
+977 925 -0.12234
+977 977 1
+978 44 -0.5
+978 200 1
+978 882 -0.0301953
+978 926 -0.167724
+978 978 1
+979 45 -0.186516
+979 201 1
+979 882 -0.0173311
+979 927 -0.313484
+979 979 1
+980 46 -0.265
+980 202 1
+980 882 -0.0246238
+980 928 -0.235
+980 980 1
+981 47 -0.38173
+981 203 1
+981 882 -0.0354704
+981 929 -0.11827
+981 981 1
+982 48 -0.490919
+982 204 1
+982 882 -0.0456162
+982 930 -0.00908071
+982 982 1
+983 49 -0.240662
+983 205 1
+983 931 -0.5
+983 983 1
+984 50 -0.25
+984 206 1
+984 882 -0.02323
+984 932 -0.25
+984 984 1
+985 51 -0.5
+985 207 1
+985 882 -0.04646
+985 985 1
+986 52 -0.5
+986 208 1
+986 882 -0.04646
+986 986 1
+987 774 -0.176719
+987 987 1
+988 775 -1
+988 988 1
+989 776 -1
+989 989 1
+990 777 -0.0646132
+990 831 -0.943785
+990 990 1
+991 778 -0.135877
+991 832 -0.835656
+991 991 1
+992 779 -0.382316
+992 992 1
+993 780 -0.153076
+993 834 -0.422188
+993 993 1
+994 781 -0.124808
+994 994 1
+995 449 1
+995 990 0.0119756
+995 991 0.153287
+995 993 0.0849625
+995 995 1
+995 996 0.19753
+995 997 0.173646
+995 998 0.224084
+995 999 0.0614154
+995 1001 0.00426236
+995 1002 0.0276956
+995 1003 0.0523351
+995 1004 0.0289403
+995 1005 0.0111644
+995 1006 0.00599157
+995 1007 0.00798041
+995 1008 0.00173498
+995 1009 0.0229097
+995 1011 0.00133997
+995 1012 0.00597925
+995 1016 0.0528537
+995 1017 0.023103
+995 1018 0.202076
+995 1019 0.00415617
+995 1020 0.0305956
+995 1021 0.140114
+995 1022 0.207264
+995 1023 0.00486545
+995 1024 0.122641
+995 1025 0.00406199
+995 1026 0.0117322
+995 1027 0.000232187
+995 1028 0.0152677
+995 1029 0.0205129
+995 1030 0.0205406
+995 1033 0.0780059
+995 1034 0.389183
+995 1036 0.328497
+995 1037 0.31807
+995 1210 -1
+996 783 -0.36138
+996 837 -0.192642
+996 996 1
+997 784 -0.47619
+997 838 -0.000103739
+997 997 1
+998 785 -0.197227
+998 839 -0.14811
+998 998 1
+999 786 -0.0270148
+999 840 -0.236027
+999 999 1
+1000 787 -0.177883
+1000 1000 1
+1001 788 -1
+1001 1001 1
+1002 789 -0.237508
+1002 843 -0.103765
+1002 1002 1
+1003 790 -0.344477
+1003 844 -0.206011
+1003 1003 1
+1004 791 -0.162299
+1004 845 -0.837701
+1004 1004 1
+1005 792 -0.275031
+1005 846 -0.346223
+1005 1005 1
+1006 793 -0.455538
+1006 847 -0.208054
+1006 1006 1
+1007 794 -1
+1007 848 -0.197961
+1007 1007 1
+1008 795 -0.481565
+1008 849 -0.271953
+1008 1008 1
+1009 796 -0.788139
+1009 850 -0.211861
+1009 1009 1
+1010 797 -1
+1010 1010 1
+1011 852 -1
+1011 1011 1
+1012 799 -0.31103
+1012 853 -0.30247
+1012 1012 1
+1013 800 -1
+1013 1013 1
+1014 801 -1
+1014 1014 1
+1015 802 -1
+1015 1015 1
+1016 803 -0.532489
+1016 857 -0.0628155
+1016 1016 1
+1017 804 -0.0925742
+1017 858 -0.388332
+1017 1017 1
+1018 805 -0.23931
+1018 859 -0.456516
+1018 1018 1
+1019 806 -0.0646626
+1019 860 -0.375884
+1019 1019 1
+1020 807 -0.0641104
+1020 861 -0.467698
+1020 1020 1
+1021 808 -0.734721
+1021 862 -0.265279
+1021 1021 1
+1022 809 -0.103601
+1022 863 -0.138687
+1022 1022 1
+1023 810 -0.286785
+1023 864 -0.713215
+1023 1023 1
+1024 811 -0.757335
+1024 865 -0.242665
+1024 1024 1
+1025 812 -0.655777
+1025 1025 1
+1026 813 -0.0920861
+1026 867 -0.907914
+1026 1026 1
+1027 814 -0.5
+1027 1027 1
+1028 815 -0.9985
+1028 869 -0.344249
+1028 1028 1
+1029 816 -0.83315
+1029 870 -0.16685
+1029 1029 1
+1030 817 -0.125281
+1030 871 -0.874719
+1030 1030 1
+1031 818 -0.5
+1031 1031 1
+1032 819 -0.227864
+1032 1032 1
+1033 820 -1
+1033 1033 1
+1034 821 -0.832012
+1034 1034 1
+1035 822 -0.264059
+1035 1035 1
+1036 823 -0.5
+1036 1036 1
+1037 491 1
+1037 990 0.00601169
+1037 991 0.0769497
+1037 993 0.0426509
+1037 995 0.0351297
+1037 996 0.0991594
+1037 997 0.0871696
+1037 998 0.11249
+1037 999 0.0308304
+1037 1001 0.00213969
+1037 1002 0.0139031
+1037 1003 0.0262721
+1037 1004 0.0145279
+1037 1005 0.00560451
+1037 1006 0.00300775
+1037 1007 0.00400615
+1037 1008 0.000870955
+1037 1009 0.0115006
+1037 1011 0.000672661
+1037 1012 0.00300157
+1037 1016 0.0265324
+1037 1017 0.0115976
+1037 1018 0.101441
+1037 1019 0.00208638
+1037 1020 0.0153589
+1037 1021 0.0703368
+1037 1022 0.104046
+1037 1023 0.00244244
+1037 1024 0.0615653
+1037 1025 0.00203911
+1037 1026 0.00588953
+1037 1027 0.000116557
+1037 1028 0.00766433
+1037 1029 0.0102974
+1037 1030 0.0103113
+1037 1033 0.0391587
+1037 1034 0.195369
+1037 1036 0.164905
+1037 1037 1
+1037 1210 -0.501997
+1038 825 -1
+1038 1038 1
+1039 826 -0.5
+1039 1039 1
+1040 827 -0.5
+1040 1040 1
+1041 495 1
+1041 1041 1
+1042 1042 1
+1043 1043 1
+1044 1044 1
+1045 496 0.149083
+1045 1045 1
+1046 1046 1
+1047 1047 1
+1048 497 1
+1048 1048 1
+1049 1049 1
+1050 1050 1
+1051 1051 1
+1052 1052 1
+1053 1053 1
+1054 498 1
+1054 1054 1
+1055 1055 1
+1056 499 0.547921
+1056 1056 1
+1057 1057 1
+1058 1058 1
+1059 1059 1
+1060 1060 1
+1061 1061 1
+1062 1062 1
+1063 1063 1
+1064 1064 1
+1065 1065 1
+1066 1066 1
+1067 1067 1
+1068 1068 1
+1069 1069 1
+1070 1070 1
+1071 1071 1
+1072 1072 1
+1073 1073 1
+1074 500 0.933784
+1074 1074 1
+1075 1075 1
+1076 1076 1
+1077 1077 1
+1078 1078 1
+1079 1079 1
+1080 1080 1
+1081 1081 1
+1082 1082 1
+1083 1083 1
+1084 1084 1
+1085 1085 1
+1086 501 0.26286
+1086 1086 1
+1087 1087 1
+1088 1088 1
+1089 1089 1
+1090 1090 1
+1091 1091 1
+1092 1092 1
+1093 1093 1
+1094 1094 1
+1095 799 -1
+1095 820 -0.0676126
+1095 821 -0.171072
+1095 822 -1
+1095 853 -0.297254
+1095 860 -0.00165756
+1095 870 -0.604874
+1095 871 -0.0830574
+1095 1095 1
+1096 799 -0.389236
+1096 820 -0.0337754
+1096 821 -0.122934
+1096 822 -0.978662
+1096 853 -0.36365
+1096 870 -0.053381
+1096 871 -0.0638712
+1096 1096 1
+1097 799 -0.0614863
+1097 820 -0.00930582
+1097 821 -0.0235457
+1097 822 -0.0121431
+1097 853 -0.304332
+1097 870 -0.477706
+1097 1097 1
+1098 797 -0.00144722
+1098 799 -0.00228269
+1098 800 -0.144243
+1098 801 -0.0319625
+1098 816 -0.0236271
+1098 820 -0.00138258
+1098 821 -0.00233209
+1098 822 -0.174961
+1098 850 -0.00185399
+1098 851 -0.00168575
+1098 853 -0.0106549
+1098 854 -0.00126683
+1098 870 -0.0874002
+1098 871 -0.0194126
+1098 1098 1
+1099 1099 -1
+1099 1151 0.252439
+1099 1203 -0.164485
+1100 1100 -1
+1100 1152 0.833114
+1100 1203 -0.502631
+1101 724 -1
+1101 1101 1
+1102 1102 -1
+1102 1154 1
+1102 1203 -0.690982
+1103 1103 -1
+1103 1155 0.718982
+1103 1203 -0.403858
+1104 1104 -1
+1104 1156 1
+1104 1203 -0.771115
+1105 1105 -1
+1105 1157 0.478418
+1105 1203 -0.268731
+1106 1106 -1
+1106 1158 0.526264
+1106 1203 -0.295607
+1107 1107 -1
+1107 1159 0.360745
+1107 1203 -0.244849
+1108 1108 -1
+1108 1160 0.156087
+1108 1203 -0.169505
+1109 1109 -1
+1109 1161 0.0735158
+1109 1203 -0.0498974
+1110 1110 -1
+1110 1162 0.0914569
+1110 1203 -0.07841
+1111 1111 -1
+1111 1163 0.05
+1111 1203 -0.0509048
+1112 1112 -1
+1112 1164 0.0807692
+1112 1203 -0.0626521
+1113 1113 -1
+1113 1165 0.03
+1113 1203 -0.0271492
+1114 1114 -1
+1114 1166 0.149867
+1114 1203 -0.135626
+1115 1115 -1
+1115 1167 0.06
+1115 1203 -0.0542984
+1116 1116 -1
+1116 1168 0.0580645
+1116 1203 -0.0525469
+1117 1117 -1
+1117 1169 0.06
+1117 1203 -0.0542984
+1118 1118 -1
+1118 1170 0.0439024
+1118 1203 -0.0397306
+1119 1119 -1
+1119 1171 0.0633333
+1119 1203 -0.0542984
+1120 1120 -1
+1120 1172 0.0633333
+1120 1203 -0.0542984
+1121 1121 -1
+1121 1173 0.06
+1121 1203 -0.0542984
+1122 1122 -1
+1122 1174 0.13161
+1122 1203 -0.0974487
+1123 1123 -1
+1123 1175 0.114565
+1123 1203 -0.0717775
+1124 1124 -1
+1124 1176 0.341249
+1124 1203 -0.213799
+1125 1125 -1
+1125 1177 0.0709677
+1125 1203 -0.0525469
+1126 1126 -1
+1126 1178 0.0652174
+1126 1203 -0.0708241
+1127 1127 -1
+1127 1179 0.2
+1127 1203 -0.108597
+1128 1128 -1
+1128 1180 0.0974367
+1128 1203 -0.0835368
+1129 1129 -1
+1129 1181 0.16
+1129 1203 -0.108597
+1130 1130 -1
+1130 1182 0.0489796
+1130 1203 -0.0332439
+1131 1131 -1
+1131 1183 0.0923077
+1131 1203 -0.0626521
+1132 1132 -1
+1132 1184 0.263306
+1132 1203 -0.178714
+1133 1133 -1
+1133 1185 0.0316667
+1133 1203 -0.0271492
+1134 1134 -1
+1134 1186 0.152349
+1134 1203 -0.112805
+1135 1135 -1
+1135 1187 0.756899
+1135 1203 -0.725266
+1136 1136 -1
+1136 1188 0.088
+1136 1203 -0.0651581
+1137 1137 -1
+1137 1189 0.322222
+1137 1203 -0.180995
+1138 1138 -1
+1138 1190 0.0709677
+1138 1203 -0.0525469
+1139 1139 -1
+1139 1191 0.412052
+1139 1203 -0.258159
+1140 1140 -1
+1140 1192 0.100018
+1140 1203 -0.0905135
+1141 1141 -1
+1141 1193 0.2125
+1141 1203 -0.203619
+1142 1142 -1
+1142 1194 1
+1142 1203 -1
+1143 766 -0.16
+1143 1143 1
+1144 767 -0.0854504
+1144 1144 1
+1145 768 -0.2
+1145 1145 1
+1146 769 -0.15
+1146 1146 1
+1147 770 -0.15
+1147 1147 1
+1148 771 -0.16
+1148 1148 1
+1149 772 -0.12
+1149 1149 1
+1150 1150 -1
+1150 1202 0.2125
+1150 1203 -0.203619
+1151 883 -1
+1151 1095 0.131871
+1151 1151 1
+1152 884 -1
+1152 1095 0.131871
+1152 1152 1
+1153 885 -1
+1153 1095 0.202387
+1153 1153 1
+1154 886 -0.622688
+1154 1095 0.151033
+1154 1154 1
+1155 887 -1
+1155 1095 0.131871
+1155 1155 1
+1156 888 -0.805762
+1156 1095 0.181034
+1156 1156 1
+1157 889 -1
+1157 1095 0.131871
+1157 1157 1
+1158 890 -1
+1158 1095 0.131871
+1158 1158 1
+1159 891 -1
+1159 1096 0.184096
+1159 1159 1
+1160 892 -1
+1160 1096 0.184096
+1160 1160 1
+1161 893 -1
+1161 1096 0.184096
+1161 1161 1
+1162 894 -1
+1162 1096 0.184096
+1162 1162 1
+1163 895 -1
+1163 1096 0.184096
+1163 1163 1
+1164 896 -1
+1164 1096 0.184096
+1164 1164 1
+1165 897 -1
+1165 1096 0.184096
+1165 1165 1
+1166 898 -1
+1166 1096 0.184096
+1166 1166 1
+1167 899 -1
+1167 1096 0.184096
+1167 1167 1
+1168 900 -1
+1168 1096 0.184096
+1168 1168 1
+1169 901 -1
+1169 1096 0.184096
+1169 1169 1
+1170 902 -1
+1170 1096 0.184096
+1170 1170 1
+1171 903 -1
+1171 1096 0.184096
+1171 1171 1
+1172 904 -1
+1172 1096 0.184096
+1172 1172 1
+1173 905 -1
+1173 1096 0.184096
+1173 1173 1
+1174 906 -1
+1174 1096 0.184096
+1174 1174 1
+1175 907 -1
+1175 1096 0.184096
+1175 1175 1
+1176 908 -1
+1176 1096 0.184096
+1176 1176 1
+1177 909 -1
+1177 1096 0.184096
+1177 1177 1
+1178 910 -1
+1178 1096 0.184096
+1178 1178 1
+1179 911 -1
+1179 1096 0.184096
+1179 1179 1
+1180 912 -1
+1180 1096 0.184096
+1180 1180 1
+1181 913 -1
+1181 1096 0.184096
+1181 1181 1
+1182 914 -1
+1182 1096 0.184096
+1182 1182 1
+1183 915 -1
+1183 1096 0.184096
+1183 1183 1
+1184 916 -1
+1184 1096 0.184096
+1184 1184 1
+1185 917 -1
+1185 1096 0.184096
+1185 1185 1
+1186 918 -1
+1186 1096 0.184096
+1186 1186 1
+1187 919 -1
+1187 1096 0.184096
+1187 1187 1
+1188 920 -1
+1188 1096 0.184096
+1188 1188 1
+1189 921 -1
+1189 1096 0.184096
+1189 1189 1
+1190 922 -1
+1190 1096 0.184096
+1190 1190 1
+1191 923 -1
+1191 1096 0.184096
+1191 1191 1
+1192 924 -1
+1192 1096 0.184096
+1192 1192 1
+1193 925 -1
+1193 1097 0.958208
+1193 1193 1
+1194 926 -1
+1194 1097 1
+1194 1194 1
+1195 927 -1
+1195 1098 1
+1195 1195 1
+1196 928 -1
+1196 1098 1
+1196 1196 1
+1197 929 -1
+1197 1098 1
+1197 1197 1
+1198 930 -1
+1198 1098 1
+1198 1198 1
+1199 931 -1
+1199 1098 1
+1199 1199 1
+1200 932 -1
+1200 1098 1
+1200 1200 1
+1201 933 -1
+1201 1098 1
+1201 1201 1
+1202 934 -1
+1202 1097 0.958208
+1202 1202 1
+1203 1101 -1
+1203 1153 1
+1203 1203 -1
+1204 53 -0.00356159
+1204 54 -0.0336925
+1204 55 -0.0901731
+1204 56 -0.0566755
+1204 57 -0.0280032
+1204 58 -0.0498668
+1204 59 -0.0237598
+1204 60 -0.0164391
+1204 61 -0.00231136
+1204 62 -0.00248041
+1204 63 -0.00127227
+1204 64 -0.0053773
+1204 65 -0.00257951
+1204 66 -0.00156979
+1204 67 -0.00218603
+1204 68 -0.00861218
+1204 69 -0.00069778
+1204 70 -0.000519145
+1204 71 -0.00132794
+1204 72 -0.000291507
+1204 73 -0.00151601
+1204 74 -9.04287e-05
+1204 75 -0.000142274
+1204 76 -0.00739766
+1204 77 -0.00819739
+1204 78 -0.0020846
+1204 79 -0.00365354
+1204 80 -0.00488446
+1204 81 -0.00122505
+1204 82 -0.0436096
+1204 83 -0.000693262
+1204 84 -0.00165847
+1204 85 -0.00810871
+1204 86 -0.00322366
+1204 87 -0.00127205
+1204 88 -0.0130608
+1204 89 -0.00165249
+1204 90 -0.00177342
+1204 91 -0.0003165
+1204 92 -0.00204896
+1204 93 -0.023698
+1204 94 -0.00135483
+1204 95 -0.00156582
+1204 96 -0.0538427
+1204 97 -0.0221646
+1204 98 -0.142507
+1204 99 -0.0482242
+1204 100 -0.179205
+1204 103 -0.0576347
+1204 104 -0.0317969
+1204 1204 1
+1205 664 -2.1238e-05
+1205 665 -0.00138877
+1205 666 -0.00169529
+1205 667 -0.0130306
+1205 668 -0.0715933
+1205 670 -0.0156391
+1205 671 -0.0196914
+1205 672 -0.000465031
+1205 673 -0.0646045
+1205 674 -1.41097e-05
+1205 675 -0.0213195
+1205 676 -0.0482019
+1205 677 -0.0117546
+1205 678 -9.87693e-05
+1205 679 -0.00290226
+1205 680 -0.00432888
+1205 681 -0.0161348
+1205 682 -0.00365646
+1205 683 -0.00112062
+1205 684 -0.00128508
+1205 685 -0.000721945
+1205 686 -0.00312228
+1205 687 -0.000479361
+1205 688 -0.00376875
+1205 689 -0.00984223
+1205 690 -0.00119829
+1205 691 -8.72301e-05
+1205 692 -0.00276712
+1205 693 -0.002697
+1205 694 -0.0204224
+1205 695 -0.121343
+1205 696 -0.0130022
+1205 697 -0.00675853
+1205 698 -0.0129594
+1205 699 -0.0472033
+1205 700 -0.00285192
+1205 701 -0.0533207
+1205 702 -0.0206797
+1205 703 -0.0094465
+1205 705 -0.00370004
+1205 706 -0.0503387
+1205 707 -0.0259307
+1205 715 -0.288725
+1205 1205 1
+1206 495 -0.11692
+1206 496 -0.0323978
+1206 497 -0.043002
+1206 498 -0.0556474
+1206 499 -0.0339496
+1206 500 -0.021862
+1206 501 -0.252024
+1206 1041 -0.00584602
+1206 1042 -0.000453596
+1206 1043 -0.000104409
+1206 1044 -0.00156987
+1206 1045 -0.0108657
+1206 1047 -0.000602874
+1206 1048 -0.0240811
+1206 1049 -0.00540687
+1206 1050 -0.0217262
+1206 1051 -0.0160277
+1206 1052 -0.00016788
+1206 1053 -0.000827674
+1206 1054 -0.0133554
+1206 1055 -3.36677e-05
+1206 1056 -0.00309803
+1206 1057 -1.20522e-05
+1206 1058 -0.0014971
+1206 1059 -7.5916e-05
+1206 1060 -0.000157007
+1206 1061 -0.00011764
+1206 1062 -5.89512e-07
+1206 1063 -1.72268e-05
+1206 1065 -0.000583617
+1206 1066 -0.00450151
+1206 1067 -0.00143061
+1206 1068 -2.23359e-05
+1206 1069 -0.000210587
+1206 1070 -0.00193471
+1206 1071 -0.00185886
+1206 1072 -0.0156699
+1206 1073 -0.00145537
+1206 1074 -0.00117061
+1206 1075 -0.00473823
+1206 1076 -0.0011633
+1206 1077 -0.00446149
+1206 1078 -0.0120799
+1206 1079 -4.05453e-05
+1206 1080 -0.00253228
+1206 1082 -7.26409e-05
+1206 1083 -0.00411938
+1206 1084 -0.00660947
+1206 1086 -0.0479387
+1206 1092 -0.265067
+1206 1206 1
+1207 1205 0.891321
+1207 1206 -1
+1207 1207 1
+1208 777 -0.00139419
+1208 778 -0.0375281
+1208 780 -0.0234336
+1208 782 -0.142696
+1208 783 -0.154243
+1208 784 -0.148987
+1208 785 -0.0796309
+1208 786 -0.0065742
+1208 788 -0.0102514
+1208 789 -0.0203145
+1208 790 -0.0324831
+1208 791 -0.0112967
+1208 792 -0.00738503
+1208 793 -0.00656445
+1208 794 -0.0246457
+1208 795 -0.00258027
+1208 796 -0.0434266
+1208 799 -0.00447284
+1208 803 -0.0676893
+1208 804 -0.00660502
+1208 805 -0.191619
+1208 806 -0.000646368
+1208 807 -0.00423834
+1208 808 -0.185485
+1208 809 -0.0386895
+1208 810 -0.00363747
+1208 811 -0.179137
+1208 812 -0.00513758
+1208 813 -0.00259841
+1208 814 -0.000209176
+1208 815 -0.04708
+1208 816 -0.041104
+1208 817 -0.00794721
+1208 820 -0.240904
+1208 821 -1
+1208 823 -0.507245
+1208 824 -0.753005
+1208 831 -0.0203645
+1208 832 -0.230801
+1208 834 -0.0646304
+1208 836 -0.000953411
+1208 837 -0.0822229
+1208 838 -3.24572e-05
+1208 839 -0.0597999
+1208 840 -0.0574384
+1208 843 -0.00887517
+1208 844 -0.0194263
+1208 845 -0.0583077
+1208 846 -0.00929663
+1208 847 -0.00299812
+1208 848 -0.0048789
+1208 849 -0.00145715
+1208 850 -0.0116736
+1208 852 -0.00413819
+1208 853 -0.00434974
+1208 857 -0.00798503
+1208 858 -0.0277068
+1208 859 -0.365539
+1208 860 -0.00375734
+1208 861 -0.0309196
+1208 862 -0.0669713
+1208 863 -0.0517922
+1208 864 -0.00904613
+1208 865 -0.057399
+1208 867 -0.0256187
+1208 869 -0.0162316
+1208 870 -0.00823165
+1208 871 -0.0554877
+1208 1208 1
+1209 1095 -1
+1209 1096 -1
+1209 1097 -0.246717
+1209 1209 1
+1210 1208 0.709287
+1210 1210 -1
+1211 1209 1
+1211 1211 -1
+1212 775 -0.869892
+1212 820 -0.00880705
+1212 821 -0.0458645
+1212 1042 1
+1212 1212 1
+1213 776 -0.870138
+1213 820 -0.0088084
+1213 821 -0.0457606
+1213 1043 1
+1213 1213 1
+1214 777 -1
+1214 820 -0.00876915
+1214 821 -0.045843
+1214 1044 1
+1214 1214 1
+1215 779 -0.382316
+1215 1046 1
+1215 1215 1
+1216 780 -0.500512
+1216 820 -0.00877156
+1216 821 -0.0458295
+1216 1047 1
+1216 1216 1
+1217 782 -0.24815
+1217 820 -0.0351032
+1217 821 -0.0125025
+1217 1049 1
+1217 1217 1
+1218 783 -0.267686
+1218 820 -0.0351011
+1218 821 -0.0125047
+1218 1050 1
+1218 1218 1
+1219 784 -0.414376
+1219 820 -0.00877562
+1219 821 -0.0458491
+1219 1051 1
+1219 1219 1
+1220 785 -0.300469
+1220 820 -0.00873086
+1220 821 -0.0458608
+1220 1052 1
+1220 1220 1
+1221 786 -0.228839
+1221 820 -0.00878509
+1221 821 -0.0458508
+1221 1053 1
+1221 1221 1
+1222 788 -0.86965
+1222 820 -0.00853635
+1222 821 -0.0462225
+1222 1055 1
+1222 1222 1
+1223 790 -0.478685
+1223 820 -0.00953845
+1223 821 -0.0453058
+1223 1057 1
+1223 1223 1
+1224 791 -0.870012
+1224 820 -0.00877307
+1224 821 -0.0458465
+1224 1058 1
+1224 1224 1
+1225 792 -0.448312
+1225 820 -0.00870723
+1225 821 -0.0456732
+1225 1059 1
+1225 1225 1
+1226 793 -0.577217
+1226 820 -0.00878635
+1226 821 -0.0459068
+1226 1060 1
+1226 1226 1
+1227 794 -0.810779
+1227 820 -0.00879491
+1227 821 -0.0459516
+1227 1061 1
+1227 1227 1
+1228 795 -0.440689
+1228 821 -0.0463126
+1228 1062 1
+1228 1228 1
+1229 796 -0.870722
+1229 820 -0.00834161
+1229 821 -0.0459604
+1229 1063 1
+1229 1229 1
+1230 797 -1
+1230 1064 1
+1230 1230 1
+1231 798 -0.870034
+1231 820 -0.00876552
+1231 821 -0.0458448
+1231 1065 1
+1231 1231 1
+1232 799 -0.387936
+1232 820 -0.00877231
+1232 821 -0.0458516
+1232 1066 1
+1232 1232 1
+1233 800 -0.869969
+1233 820 -0.00877899
+1233 821 -0.0458588
+1233 1067 1
+1233 1233 1
+1234 801 -0.868035
+1234 820 -0.00900698
+1234 821 -0.0464484
+1234 1068 1
+1234 1234 1
+1235 802 -0.869984
+1235 820 -0.00873443
+1235 821 -0.0458948
+1235 1069 1
+1235 1235 1
+1236 803 -0.517911
+1236 820 -0.00877923
+1236 821 -0.0458485
+1236 1070 1
+1236 1236 1
+1237 804 -0.418376
+1237 820 -0.00878186
+1237 821 -0.045854
+1237 1071 1
+1237 1237 1
+1238 805 -0.605368
+1238 820 -0.0087761
+1238 821 -0.0458489
+1238 1072 1
+1238 1238 1
+1239 806 -0.383285
+1239 820 -0.00876787
+1239 821 -0.0458478
+1239 1073 1
+1239 1239 1
+1240 808 -0.869999
+1240 820 -0.00877683
+1240 821 -0.0458484
+1240 1075 1
+1240 1240 1
+1241 809 -0.210788
+1241 820 -0.00877043
+1241 821 -0.0458588
+1241 1076 1
+1241 1241 1
+1242 810 -0.87001
+1242 820 -0.00877369
+1242 821 -0.0458468
+1242 1077 1
+1242 1242 1
+1243 811 -0.870005
+1243 820 -0.00877432
+1243 821 -0.0458485
+1243 1078 1
+1243 1243 1
+1244 812 -0.571024
+1244 820 -0.00850601
+1244 821 -0.0457889
+1244 1079 1
+1244 1244 1
+1245 813 -0.869995
+1245 820 -0.0087731
+1245 821 -0.0458538
+1245 1080 1
+1245 1245 1
+1246 814 -0.5
+1246 1081 1
+1246 1246 1
+1247 815 -0.870153
+1247 820 -0.00870415
+1247 821 -0.0458532
+1247 1082 1
+1247 1247 1
+1248 816 -0.869995
+1248 820 -0.00877676
+1248 821 -0.0458501
+1248 1083 1
+1248 1248 1
+1249 817 -0.869998
+1249 820 -0.00877485
+1249 821 -0.0458509
+1249 1084 1
+1249 1249 1
+1250 818 -0.5
+1250 1085 1
+1250 1250 1
+1251 820 -0.438768
+1251 1087 1
+1251 1251 1
+1252 821 -0.416813
+1252 1088 1
+1252 1252 1
+1253 822 -0.264059
+1253 1089 1
+1253 1253 1
+1254 823 -0.5
+1254 1090 1
+1254 1254 1
+1255 824 -0.5
+1255 1091 1
+1255 1255 1
+1256 825 -1
+1256 1092 1
+1256 1256 1
+1257 826 -0.5
+1257 1093 1
+1257 1257 1
+1258 827 -0.5
+1258 1094 1
+1258 1258 1
diff --git a/reference/reorder/mc64_kernels.cpp b/reference/reorder/mc64_kernels.cpp
index 46706942f96..fba5190df13 100644
--- a/reference/reorder/mc64_kernels.cpp
+++ b/reference/reorder/mc64_kernels.cpp
@@ -66,157 +66,6 @@ namespace reference {
 namespace mc64 {
 
 
-namespace {
-
-
-float fastexp2(float x)
-{
-    bool positive = x >= 0;
-    float xi = floor(x);
-    float xf = x - xi;
-
-    union {
-        float f;
-        unsigned int i;
-    } frac;
-
-    frac.f = xf ? 1. +
-                      (((xf * 1.36779598e-02 + 5.16742848e-02) * xf +
-                        2.41696769e-01) *
-                           xf +
-                       6.92937406e-01) *
-                          xf +
-                      6.58721338e-06
-                : 1.;
-
-    unsigned int exp = (frac.i & 0x7F800000) + ((unsigned int)(abs(xi)) << 23);
-
-    if (!positive) {
-        exp = (254 - (exp >> 23)) << 23;
-    }
-
-    frac.i = (frac.i & 0x007FFFFF) | (exp & 0x7F800000);
-    return frac.f;
-}
-
-
-double fastexp2(double x)
-{
-    bool positive = x >= 0;
-    double xi = floor(x);
-    double xf = x - xi;
-
-    union {
-        double f;
-        long int i;
-    } frac;
-
-    frac.f = xf ? 1. +
-                      (((xf * 1.36779598e-02 + 5.16742848e-02) * xf +
-                        2.41696769e-01) *
-                           xf +
-                       6.92937406e-01) *
-                          xf +
-                      6.58721338e-06
-                : 1.;
-
-    long int exp = (frac.i & 0x7FF0000000000000) + ((long int)(abs(xi)) << 52);
-
-    if (!positive) {
-        exp = (2046 - (exp >> 52)) << 52;
-    }
-
-    frac.i = (frac.i & 0x000FFFFFFFFFFFFF) | (exp & 0x7FF0000000000000);
-
-    return frac.f;
-}
-
-
-float fastlog2(float x)
-{
-    float signif, fexp;
-    int exp;
-    float lg2;
-    union {
-        float f;
-        unsigned int i;
-    } ux1, ux2;
-    int greater;
-
-#define FN                                                                  \
-    fexp + (((((-.149902 * signif + .293811) * signif - .369586) * signif + \
-              .481330) *                                                    \
-                 signif -                                                   \
-             .721171) *                                                     \
-                signif +                                                    \
-            1.442691) *                                                     \
-               signif
-
-    ux1.f = x;
-    exp = (ux1.i & 0x7F800000) >> 23;
-
-    greater = ux1.i & 0x00400000;  // true if signif > 1.5
-    if (greater) {
-        ux2.i = (ux1.i & 0x007FFFFF) | 0x3f000000;
-        signif = ux2.f;
-        fexp = exp - 126;
-        signif = signif - 1.0;
-        lg2 = FN;
-    } else {
-        ux2.i = (ux1.i & 0x007FFFFF) | 0x3f800000;
-        signif = ux2.f;
-        fexp = exp - 127;
-        signif = signif - 1.0;
-        lg2 = FN;
-    }
-    return lg2;
-}
-
-
-double fastlog2(double x)
-{
-    double signif, fexp;
-    long int exp;
-    double lg2;
-    union {
-        double f;
-        long int i;
-    } ux1, ux2;
-    long int greater;
-
-#define FN                                                                  \
-    fexp + (((((-.149902 * signif + .293811) * signif - .369586) * signif + \
-              .481330) *                                                    \
-                 signif -                                                   \
-             .721171) *                                                     \
-                signif +                                                    \
-            1.442691) *                                                     \
-               signif
-
-    ux1.f = x;
-    exp = (ux1.i & 0x7FF0000000000000) >> 52;
-
-    greater = ux1.i & 0x0008000000000000;  // true if signif > 1.5
-    if (greater) {
-        ux2.i = (ux1.i & 0x000FFFFFFFFFFFFF) | 0x3fe0000000000000;
-        signif = ux2.f;
-        fexp = exp - 1022;
-        signif = signif - 1.0;
-        lg2 = FN;
-    } else {
-        ux2.i = (ux1.i & 0x000FFFFFFFFFFFFF) | 0x3ff0000000000000;
-        signif = ux2.f;
-        fexp = exp - 1023;
-        signif = signif - 1.0;
-        lg2 = FN;
-    }
-    return lg2;
-}
-
-
-}  // namespace
-
-
 template <typename ValueType, typename IndexType>
 void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
                         const matrix::Csr<ValueType, IndexType>* mtx,
@@ -233,17 +82,14 @@ void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
     auto weight =
         strategy == gko::reorder::reordering_strategy::max_diagonal_sum
             ? [](ValueType a) { return abs(a); }
-            //: [](ValueType a) { return fastlog2(abs(a)); };
             : [](ValueType a) { return std::log2(abs(a)); };
-    //: strategy == gko::reorder::reordering_strategy::max_diagonal_product_fast
-    //? [](ValueType a) { return fastlog2(abs(a)); }
-    //: [](ValueType a) { return std::log2(abs(a)); };
-    // workspace.resize_and_reset(nnz + 4 * num_rows);
     auto weights = workspace.get_data();
     auto u = weights + nnz;
-    auto m = u + 2 * num_rows;
+    auto distance = u + num_rows;
+    auto m = distance + num_rows;
     for (IndexType col = 0; col < num_rows; col++) {
         u[col] = inf;
+        distance[col] = inf;
     }
 
     for (IndexType row = 0; row < num_rows; row++) {
@@ -286,60 +132,10 @@ void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
     const auto u = c + nnz;
     auto p = permutation.get_data();
     auto ip = inv_permutation.get_data();
-    auto parents_ = parents.get_data();
-    auto prev_rows = parents_ + num_rows;
-    auto generation = prev_rows + num_rows;
-    auto idxs = parents_ + 4 * num_rows;
-    auto unmatched = parents_ + 5 * num_rows;
+    auto idxs = parents.get_data() + 4 * num_rows;
+    auto unmatched = idxs + num_rows;
     auto um_cnt = 0;
 
-    /*for (IndexType root = 0; root < num_rows; root++) {
-        IndexType row = root;
-        IndexType jap = -1;
-        while (true) {
-            const auto row_begin = row_ptrs[row];
-            const auto row_end = row_ptrs[row + 1];
-            for (IndexType idx = row_begin; idx < row_end; idx++) {
-                const auto col = col_idxs[idx];
-                if (c[idx] - u[col] == zero<ValueType>() && ip[col] == -1) {
-                    jap = col;
-                    parents_[col] = row;
-                    break;
-                }
-            }
-            if (jap != -1) break;
-            IndexType idx = row_begin;
-            for (; idx < row_end; idx++) {
-                const auto col = col_idxs[idx];
-                if (c[idx] - u[col] == zero<ValueType>()
-                        && generation[col] != num_rows + root) {
-                    generation[col] = num_rows + root;
-                    parents_[col] = row;
-                    auto next_row = ip[col];
-                    prev_rows[next_row] = row;
-                    row = next_row;
-                    break;
-                }
-            }
-            if (idx == row_end) {
-                if (row == root) break;
-                row = prev_rows[row];
-            }
-        }
-        if (jap == -1) {
-            unmatched[um_cnt++] = root;
-        } else {
-            IndexType col = jap;
-            do {
-                row = parents_[col];
-                ip[col] = row;
-                auto idx = row_ptrs[row];
-                while (col_idxs[idx] != col) idx++;
-                idxs[row] = idx;
-                std::swap(col, p[row]);
-            } while (row != root);
-        }
-    }*/
     // For each row, look for an unmatched column col for which weight(row, col)
     // = 0. If one is found, add the edge (row, col) to the matching and move on
     // to the next row.
@@ -349,7 +145,7 @@ void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
         bool matched = false;
         for (IndexType idx = row_begin; idx < row_end; idx++) {
             const auto col = col_idxs[idx];
-            if (c[idx] - u[col] == zero<ValueType>() && ip[col] == -1) {
+            if (abs(c[idx] - u[col]) < 1e-14 && ip[col] == -1) {
                 p[row] = col;
                 ip[col] = row;
                 idxs[row] = idx;
@@ -375,15 +171,14 @@ void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
         bool found = false;
         for (IndexType idx = row_begin; idx < row_end; idx++) {
             const auto col = col_idxs[idx];
-            if (c[idx] - u[col] == zero<ValueType>()) {
+            if (abs(c[idx] - u[col]) < 1e-14) {
                 const auto row_1 = ip[col];
                 const auto row_1_begin = row_ptrs[row_1];
                 const auto row_1_end = row_ptrs[row_1 + 1];
                 for (IndexType idx_1 = row_1_begin; idx_1 < row_1_end;
                      idx_1++) {
                     const auto col_1 = col_idxs[idx_1];
-                    if (c[idx_1] - u[col_1] == zero<ValueType>() &&
-                        ip[col_1] == -1) {
+                    if (abs(c[idx_1] - u[col_1]) < 1e-14 && ip[col_1] == -1) {
                         p[row] = col;
                         ip[col] = row;
                         idxs[row] = idx;
@@ -471,10 +266,11 @@ void shortest_augmenting_path(
         const auto dist = distance[col];
         const auto gen = generation[col];
         if (dist < lsap && gen == num_rows + root) {
-            generation[col] = root;
-            if (dist == lsp) {
+            if (abs(dist - lsp) < 1e-14) {
+                generation[col] = 2 * num_rows + root;
                 q_j.push_back(col);
             } else {
+                generation[col] = root;
                 handles[col] = Q.insert(dist, col);
             }
         }
@@ -501,15 +297,16 @@ void shortest_augmenting_path(
             generation[col] = -root;
             marked_cols[marked_counter++] = col;
             Q.pop_min();
-            while (Q.min_key() == lsp && !Q.empty()) {
-                q_j.push_back(Q.min_val());
-                Q.pop_min();
-            }
+            // while (Q.min_key() == lsp && !Q.empty()) {
+            //     q_j.push_back(Q.min_val());
+            //     Q.pop_min();
+            // }
             row = ip[col];
         }
         const auto row_begin = row_ptrs[row];
         const auto row_end = row_ptrs[row + 1];
-        const auto vi = c[idxs[row]] - u[p[row]];  // v[row];
+        const auto vi = p[row] == -1 ? zero<ValueType>()
+                                     : c[idxs[row]] - u[p[row]];  // v[row];
         for (IndexType idx = row_begin; idx < row_end; idx++) {
             const auto col = col_idxs[idx];
             const auto gen = generation[col];
@@ -517,27 +314,35 @@ void shortest_augmenting_path(
             if (gen == -root) continue;
 
             const ValueType dnew = lsp + c[idx] - u[col] - vi;
-
+            // if (col == 392960) std::cout << distance[col] << ", " << lsp <<
+            // ", " << dnew << std::endl; if (dnew < lsp && abs(lsp - dnew) >
+            // 1e-10){
+            //     std::cout << root + num_rows << ", " << gen << ", " << jsap
+            //     << ", " << col << std::endl; exit(1);
+            // }
             if (dnew < lsap) {
                 if (ip[col] == -1) {
                     lsap = dnew;
                     jsap = col;
                     parents_[col] = row;
                 } else {
-                    if (gen != root || dnew < distance[col]) {
+                    if ((gen != root || dnew < distance[col]) &&
+                        gen != 2 * num_rows + root) {
                         distance[col] = dnew;
                         parents_[col] = row;
-                        generation[col] = root;  // num_rows + gen;
-                        if (dnew == lsp) {
+                        if (abs(dnew - lsp) < 1e-14) {
+                            generation[col] = 2 * num_rows + root;
                             q_j.push_back(col);
-                            /*if (gen == root) {
-                                Q.update_key(handles[col], -inf);
-                                Q.pop_min();
-                            }*/
+                            // if (gen == root) {
+                            //     Q.update_key(handles[col], -inf);
+                            //     Q.pop_min();
+                            // }
                         } else if (gen != root) {
                             // if (gen != root) {
+                            generation[col] = root;  // num_rows + gen;
                             handles[col] = Q.insert(dnew, col);
                         } else {
+                            generation[col] = root;  // num_rows + gen;
                             Q.update_key(handles[col], dnew);
                         }
                     }
diff --git a/reference/test/reorder/mc64_kernels.cpp b/reference/test/reorder/mc64_kernels.cpp
index 5b05dee93b7..11e01b430a1 100644
--- a/reference/test/reorder/mc64_kernels.cpp
+++ b/reference/test/reorder/mc64_kernels.cpp
@@ -41,6 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/matrix/csr.hpp>
+#include <ginkgo/core/matrix/permutation.hpp>
 #include <ginkgo/core/reorder/mc64.hpp>
 
 
@@ -62,6 +63,8 @@ class Mc64 : public ::testing::Test {
         typename std::tuple_element<1, decltype(ValueIndexType())>::type;
     using real_type = gko::remove_complex<value_type>;
     using matrix_type = gko::matrix::Csr<value_type, index_type>;
+    using permutation_type = gko::matrix::Permutation<index_type>;
+    static constexpr auto inf = std::numeric_limits<real_type>::infinity();
 
     Mc64()
         : ref(gko::ReferenceExecutor::create()),
@@ -74,129 +77,118 @@ class Mc64 : public ::testing::Test {
                                             {0., 5., 8., 0., 0., 0.}},
                                            ref)),
           expected_workspace_sum{
-              ref, I<real_type>({2., 1., 0., 0., 4., 0., 2., 0., 1., 0., 2.,
-                                 3., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0.,
-                                 0., 0., 0., 0., 0., 0., 0., 0., 0.})},
-          expected_workspace_product{ref, I<real_type>({std::log2(3.),
-                                                        std::log2(1.5),
-                                                        0.,
-                                                        0.,
-                                                        std::log2(5.),
-                                                        0.,
-                                                        std::log2(1.5),
-                                                        0.,
-                                                        std::log2(4. / 3.),
-                                                        0.,
-                                                        std::log2(2.),
-                                                        std::log2(1.6),
-                                                        0.,
-                                                        0.,
-                                                        std::log2(1.5),
-                                                        0.,
-                                                        0.,
-                                                        0.,
-                                                        std::log2(4. / 3.),
-                                                        0.,
-                                                        0.,
-                                                        0.,
-                                                        0.,
-                                                        0.,
-                                                        0.,
-                                                        0.,
-                                                        0.,
-                                                        0.,
-                                                        0.,
-                                                        0.,
-                                                        0.})},
+              ref,
+              I<real_type>({2.,  1.,  0.,  0., 4., 0., 2., 0., 1.,  0.,  2.,
+                            3.,  0.,  0.,  1., 0., 0., 0., 1., inf, inf, inf,
+                            inf, inf, inf, 3., 5., 6., 4., 4., 8.})},
+          expected_workspace_product{
+              ref,
+              I<real_type>({real_type{std::log2(3.)},
+                            real_type{std::log2(3.)} - real_type{std::log2(2.)},
+                            0.,
+                            0.,
+                            real_type{std::log2(5.)},
+                            0.,
+                            real_type{std::log2(6.)} - real_type{std::log2(4.)},
+                            0.,
+                            real_type{std::log2(4.)} - real_type{std::log2(3.)},
+                            0.,
+                            real_type{std::log2(4.)} - real_type{std::log2(2.)},
+                            real_type{std::log2(8.)} - real_type{std::log2(5.)},
+                            0.,
+                            0.,
+                            real_type{std::log2(3.)} - real_type{std::log2(2.)},
+                            0.,
+                            0.,
+                            0.,
+                            real_type{std::log2(4.)} - real_type{std::log2(3.)},
+                            inf,
+                            inf,
+                            inf,
+                            inf,
+                            inf,
+                            inf,
+                            real_type{std::log2(3.)},
+                            real_type{std::log2(5.)},
+                            real_type{std::log2(6.)},
+                            real_type{std::log2(4.)},
+                            real_type{std::log2(4.)},
+                            real_type{std::log2(8.)}})},
           expected_perm{ref, I<index_type>({1, 0, 3, 5, -1, 2})},
           expected_inv_perm{ref, I<index_type>({1, 0, 5, 2, -1, 3})},
-          tolerance{std::numeric_limits<real_type>::epsilon()}
+          tolerance{10 * std::numeric_limits<real_type>::epsilon()}
     {}
 
     std::shared_ptr<const gko::ReferenceExecutor> ref;
-    gko::Array<real_type> tmp;
+    gko::array<real_type> tmp;
     std::shared_ptr<matrix_type> mtx;
-    gko::Array<real_type> expected_workspace_sum;
-    gko::Array<real_type> expected_workspace_product;
-    gko::Array<index_type> expected_perm;
-    gko::Array<index_type> expected_inv_perm;
+    gko::array<real_type> expected_workspace_sum;
+    gko::array<real_type> expected_workspace_product;
+    gko::array<index_type> expected_perm;
+    gko::array<index_type> expected_inv_perm;
     const real_type tolerance;
 };
 
 TYPED_TEST_SUITE(Mc64, gko::test::ValueIndexTypes, PairTypenameNameGenerator);
 
 
-TYPED_TEST(Mc64, InitializeWeightsExampleSum)
+TYPED_TEST(Mc64, InitializeWeightsSum)
 {
     using matrix_type = typename TestFixture::matrix_type;
     using real_type = typename TestFixture::real_type;
 
+    const auto num_rows = this->mtx->get_size()[0];
+    const auto nnz = this->mtx->get_num_stored_elements();
+    gko::array<real_type> workspace{this->ref, nnz + 3 * num_rows};
+
     gko::kernels::reference::mc64::initialize_weights(
-        this->ref, this->mtx.get(), this->tmp,
+        this->ref, this->mtx.get(), workspace,
         gko::reorder::reordering_strategy::max_diagonal_sum);
 
-    GKO_ASSERT_ARRAY_EQ(this->tmp, this->expected_workspace_sum);
+    GKO_ASSERT_ARRAY_EQ(workspace, this->expected_workspace_sum);
 }
 
 
-TYPED_TEST(Mc64, InitializeWeightsExampleProduct)
+TYPED_TEST(Mc64, InitializeWeightsProduct)
 {
     using matrix_type = typename TestFixture::matrix_type;
     using real_type = typename TestFixture::real_type;
 
+    const auto num_rows = this->mtx->get_size()[0];
+    const auto nnz = this->mtx->get_num_stored_elements();
+    gko::array<real_type> workspace{this->ref, nnz + 3 * num_rows};
+
     gko::kernels::reference::mc64::initialize_weights(
-        this->ref, this->mtx.get(), this->tmp,
+        this->ref, this->mtx.get(), workspace,
         gko::reorder::reordering_strategy::max_diagonal_product);
 
-    GKO_ASSERT_EQ(this->tmp.get_num_elems(),
-                  this->expected_workspace_product.get_num_elems());
-    for (gko::size_type i = 0; i < this->tmp.get_num_elems(); i++) {
-        GKO_ASSERT_NEAR(this->tmp.get_data()[i],
-                        this->expected_workspace_product.get_data()[i],
-                        this->tolerance);
-    }
+    GKO_ASSERT_ARRAY_EQ(workspace, this->expected_workspace_product);
 }
 
 
-TYPED_TEST(Mc64, InitialMatchingExampleSum)
+TYPED_TEST(Mc64, InitialMatching)
 {
     using index_type = typename TestFixture::index_type;
-    gko::Array<index_type> p{this->ref,
+    gko::array<index_type> p{this->ref,
                              I<index_type>({-1, -1, -1, -1, -1, -1})};
-    gko::Array<index_type> ip{this->ref,
+    gko::array<index_type> ip{this->ref,
                               I<index_type>({-1, -1, -1, -1, -1, -1})};
-    std::list<index_type> unmatched_rows{};
+    const auto num_rows = this->mtx->get_size()[0];
+    gko::array<index_type> parents{this->ref, 6 * num_rows};
+    parents.fill(gko::zero<index_type>());
+    gko::array<index_type> expected_parents{
+        this->ref,
+        I<index_type>{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0, 0,  0, 0, 0, 0,
+                      0, 0, 0, 0, 0, 0, 1, 3, 5, 8, 0, 12, 4, -1, 0, 0, 0, 0}};
 
     gko::kernels::reference::mc64::initial_matching(
-        this->ref, this->mtx->get_size()[0], this->mtx->get_const_row_ptrs(),
+        this->ref, num_rows, this->mtx->get_const_row_ptrs(),
         this->mtx->get_const_col_idxs(), this->expected_workspace_sum, p, ip,
-        unmatched_rows);
-
-    GKO_ASSERT_ARRAY_EQ(p, this->expected_perm);
-    GKO_ASSERT_ARRAY_EQ(ip, this->expected_inv_perm);
-    GKO_ASSERT_EQ(unmatched_rows.size(), 1u);
-    GKO_ASSERT_EQ(unmatched_rows.front(), 4 * gko::one<index_type>());
-}
-
-
-TYPED_TEST(Mc64, InitialMatchingExampleProduct)
-{
-    using index_type = typename TestFixture::index_type;
-    gko::Array<index_type> p{this->ref,
-                             I<index_type>({-1, -1, -1, -1, -1, -1})};
-    gko::Array<index_type> ip{this->ref,
-                              I<index_type>({-1, -1, -1, -1, -1, -1})};
-    std::list<index_type> unmatched_rows{};
-
-    gko::kernels::reference::mc64::initial_matching(
-        this->ref, this->mtx->get_size()[0], this->mtx->get_const_row_ptrs(),
-        this->mtx->get_const_col_idxs(), this->expected_workspace_product, p,
-        ip, unmatched_rows);
+        parents);
 
     GKO_ASSERT_ARRAY_EQ(p, this->expected_perm);
     GKO_ASSERT_ARRAY_EQ(ip, this->expected_inv_perm);
-    GKO_ASSERT_EQ(unmatched_rows.size(), 1u);
-    GKO_ASSERT_EQ(unmatched_rows.front(), 4 * gko::one<index_type>());
+    GKO_ASSERT_ARRAY_EQ(parents, expected_parents);
 }
 
 
@@ -204,77 +196,46 @@ TYPED_TEST(Mc64, ShortestAugmentingPathExample)
 {
     using index_type = typename TestFixture::index_type;
     using real_type = typename TestFixture::real_type;
-    gko::Array<index_type> expected_perm{this->ref,
+    gko::array<index_type> expected_perm{this->ref,
                                          I<index_type>{1, 0, 3, 5, 4, 2}};
-    gko::Array<index_type> expected_inv_perm{this->ref,
+    gko::array<index_type> expected_inv_perm{this->ref,
                                              I<index_type>{1, 0, 5, 2, 4, 3}};
-    gko::Array<index_type> parents{
-        this->ref, I<index_type>{-1, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, -2,
-                                 -2, -2, -2, -2, -2, -2}};
-    gko::Array<index_type> expected_parents{
-        this->ref, I<index_type>{-1, -1, 3, 4, 4, 2, -1, -1, -1, -1, -1, -1, -2,
-                                 -2, -2, -2, -2, -2}};
+    gko::array<index_type> parents{
+        this->ref,
+        I<index_type>{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0, 0,  0, 0, 0, 0,
+                      0, 0, 0, 0, 0, 0, 1, 3, 5, 8, 0, 12, 4, -1, 0, 0, 0, 0}};
+    gko::array<index_type> expected_parents{
+        this->ref, I<index_type>{0, 0, 3,  4,  4,  2,  0, 0,  0, 0, 0, 0,
+                                 0, 0, -4, -4, 0,  -4, 3, 5,  2, 0, 0, 0,
+                                 1, 3, 5,  8,  10, 12, 4, -1, 0, 0, 0, 0}};
+    gko::array<real_type> expected_workspace{
+        this->ref,
+        I<real_type>{2.,  1., 0., 0.,        4.,        0., 2., 0.,
+                     1.,  0., 2., 3.,        0.,        0., 1., -1.,
+                     -2., 0., 0., this->inf, this->inf, 1., 0., this->inf,
+                     1.,  3., 5., 6.,        4.,        4., 8.}};
+    gko::addressable_priority_queue<real_type, index_type, 2> Q{};
+    std::vector<index_type> q_j{};
 
     gko::kernels::reference::mc64::shortest_augmenting_path(
         this->ref, this->mtx->get_size()[0], this->mtx->get_const_row_ptrs(),
         this->mtx->get_const_col_idxs(), this->expected_workspace_sum,
         this->expected_perm, this->expected_inv_perm,
-        4 * gko::one<index_type>(), parents);
+        4 * gko::one<index_type>(), parents, Q, q_j);
 
     GKO_ASSERT_ARRAY_EQ(expected_perm, this->expected_perm);
     GKO_ASSERT_ARRAY_EQ(expected_inv_perm, this->expected_inv_perm);
     GKO_ASSERT_ARRAY_EQ(parents, expected_parents);
+    GKO_ASSERT_ARRAY_EQ(this->expected_workspace_sum, expected_workspace);
 }
 
 
-/*TYPED_TEST(Mc64, ShortestAugmentingPathExample2)
-{
-    using index_type = typename TestFixture::index_type;
-    using real_type = typename TestFixture::real_type;
-    gko::Array<index_type> row_ptrs{
-        this->ref, I<index_type>{0, 2, 6, 7, 10, 12, 15, 19, 21}};
-    gko::Array<index_type> col_idxs{
-        this->ref, I<index_type>{0, 1, 0, 1, 4, 6, 2, 3, 4, 5, 4,
-                                 7, 4, 5, 6, 1, 3, 5, 7, 0, 2}};
-    gko::Array<real_type> workspace{
-        this->ref,
-        I<real_type>{1., 0., 0., 0., 2., 4., 0., 0., 4., 2., 0., 1., 8.,
-                     0., 6., 2., 4., 1., 8., 6., 4., 0., 0., 0., 0., 0.,
-                     0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.}};
-    gko::Array<real_type> expected_workspace{
-        this->ref,
-        I<real_type>{1.,  0., 0., 0., 2., 4., 0., 0., 4.,  2.,  0., 1.,  8.,
-                     0.,  6., 2., 4., 1., 8., 6., 4., -3., -4., 0., -2., -1.,
-                     -5., 0., 0., 4., 3., 0., 2., 1., 5.,  6.,  0.}};
-    gko::Array<index_type> perm{this->ref,
-                                I<index_type>{1, 0, 2, 3, 4, 5, -1, -1}};
-    gko::Array<index_type> inv_perm{this->ref,
-                                    I<index_type>{1, 0, 2, 3, 4, 5, -1, -1}};
-    gko::Array<index_type> expected_perm{
-        this->ref, I<index_type>{0, 4, 2, 3, 7, 5, 1, -1}};
-    gko::Array<index_type> expected_inv_perm{
-        this->ref, I<index_type>{0, 6, 2, 3, 1, 5, -1, 4}};
-    gko::Array<index_type> parents{
-        this->ref, I<index_type>{-1, -1, -1, -1, -1, -1, -1, -1}};
-    gko::Array<index_type> expected_parents{
-        this->ref, I<index_type>{0, 6, -1, 6, 1, 6, 5, 4}};
-
-    gko::kernels::reference::mc64::shortest_augmenting_path(
-        this->ref, 8u, row_ptrs.get_data(), col_idxs.get_data(), workspace,
-        perm, inv_perm, 6 * gko::one<index_type>(), parents);
-
-    GKO_ASSERT_ARRAY_EQ(perm, expected_perm);
-    GKO_ASSERT_ARRAY_EQ(inv_perm, expected_inv_perm);
-    GKO_ASSERT_ARRAY_EQ(parents, expected_parents);
-    GKO_ASSERT_ARRAY_EQ(workspace, expected_workspace);
-}*/
-
-
 TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingExampleSum)
 {
     using index_type = typename TestFixture::index_type;
     using real_type = typename TestFixture::real_type;
     using value_type = typename TestFixture::value_type;
+    using permutation_type = typename TestFixture::permutation_type;
 
     auto mc64_factory =
         gko::reorder::Mc64<value_type, index_type>::build()
@@ -282,8 +243,10 @@ TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingExampleSum)
             .on(this->ref);
     auto mc64 = mc64_factory->generate(this->mtx);
 
-    auto perm = mc64->get_permutation()->get_const_permutation();
-    auto inv_perm = mc64->get_inverse_permutation()->get_const_permutation();
+    auto perm = gko::as<permutation_type>(mc64->get_permutation())
+                    ->get_const_permutation();
+    auto inv_perm = gko::as<permutation_type>(mc64->get_inverse_permutation())
+                        ->get_const_permutation();
     GKO_ASSERT_EQ(perm[0], 1);
     GKO_ASSERT_EQ(perm[1], 0);
     GKO_ASSERT_EQ(perm[2], 3);
@@ -305,15 +268,7 @@ TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingExampleProduct)
     using real_type = typename TestFixture::real_type;
     using value_type = typename TestFixture::value_type;
     using matrix_type = typename TestFixture::matrix_type;
-
-    auto expected_result =
-        gko::initialize<matrix_type>({{1., 0.3, 0., 0., 0., 0.},
-                                      {0., 1., 1., 0., 0., 0.},
-                                      {0., 0., 1., 0., 0., 1.},
-                                      {0., 0., 0., 1., 0.6, 0.},
-                                      {1. / 3., 1., 0., 0., 1., 0.},
-                                      {0., 0., 0., 1., 0., 1.}},
-                                     this->ref);
+    using permutation_type = typename TestFixture::permutation_type;
 
     auto mc64_factory =
         gko::reorder::Mc64<value_type, index_type>::build()
@@ -322,28 +277,14 @@ TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingExampleProduct)
             .on(this->ref);
     auto mc64 = mc64_factory->generate(this->mtx);
 
-    auto perm = mc64->get_permutation();  //->get_permutation();
-    auto inv_perm =
-        mc64->get_inverse_permutation();         //->get_const_permutation();
-    auto row_scaling = mc64->get_row_scaling();  //->get_const_values();
-    auto col_scaling = mc64->get_col_scaling();  //->get_const_values();
-
-    auto result = gko::clone(this->ref, this->mtx);
-    col_scaling->rapply(result.get(), result.get());
-    row_scaling->apply(result.get(), result.get());
-    perm->apply(result.get(), result.get());
-
-    auto rp = result->get_row_ptrs();
-    auto ci = result->get_col_idxs();
-    auto v = result->get_values();
-    for (auto i = 0; i < result->get_size()[0]; i++) {
-        for (auto idx = rp[i]; idx < rp[i + 1]; idx++)
-            std::cout << "(" << i << "," << ci[idx] << "," << v[idx] << ")";
-    }
-    std::cout << "CHECKING" << std::endl;
-    GKO_ASSERT_MTX_NEAR(result, expected_result, this->tolerance);
-    std::cout << "DONE" << std::endl;
-    /*GKO_ASSERT_EQ(perm[0], 4);
+    auto perm = gko::as<permutation_type>(mc64->get_permutation())
+                    ->get_const_permutation();
+    auto inv_perm = gko::as<permutation_type>(mc64->get_inverse_permutation())
+                        ->get_const_permutation();
+    auto row_scaling = mc64->get_row_scaling()->get_const_values();
+    auto col_scaling = mc64->get_col_scaling()->get_const_values();
+
+    GKO_ASSERT_EQ(perm[0], 4);
     GKO_ASSERT_EQ(perm[1], 0);
     GKO_ASSERT_EQ(perm[2], 5);
     GKO_ASSERT_EQ(perm[3], 2);
@@ -355,18 +296,18 @@ TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingExampleProduct)
     GKO_ASSERT_EQ(inv_perm[3], 4);
     GKO_ASSERT_EQ(inv_perm[4], 0);
     GKO_ASSERT_EQ(inv_perm[5], 2);
-    GKO_ASSERT_NEAR(row_scaling[0], real_type{0.6}, this->tolerance);
-    GKO_ASSERT_NEAR(row_scaling[1], real_type{1./3.}, this->tolerance);
-    GKO_ASSERT_NEAR(row_scaling[2], real_type{2./3.}, this->tolerance);
-    GKO_ASSERT_NEAR(row_scaling[3], real_type{0.75}, this->tolerance);
-    GKO_ASSERT_NEAR(row_scaling[4], real_type{5./6.}, this->tolerance);
-    GKO_ASSERT_NEAR(row_scaling[5], real_type{0.5}, this->tolerance);
-    GKO_ASSERT_NEAR(col_scaling[0], real_type{1./3.}, this->tolerance);
-    GKO_ASSERT_NEAR(col_scaling[1], real_type{0.6}, this->tolerance);
-    GKO_ASSERT_NEAR(col_scaling[2], real_type{0.375}, this->tolerance);
-    GKO_ASSERT_NEAR(col_scaling[3], real_type{1./3.}, this->tolerance);
-    GKO_ASSERT_NEAR(col_scaling[4], real_type{0.4}, this->tolerance);
-    GKO_ASSERT_NEAR(col_scaling[5], real_type{0.5}, this->tolerance);*/
+    GKO_ASSERT_NEAR(row_scaling[0], value_type{1. / 3.}, this->tolerance);
+    GKO_ASSERT_NEAR(row_scaling[1], value_type{0.2}, this->tolerance);
+    GKO_ASSERT_NEAR(row_scaling[2], value_type{0.2}, this->tolerance);
+    GKO_ASSERT_NEAR(row_scaling[3], value_type{4. / 15.}, this->tolerance);
+    GKO_ASSERT_NEAR(row_scaling[4], value_type{0.3}, this->tolerance);
+    GKO_ASSERT_NEAR(row_scaling[5], value_type{2. / 15.}, this->tolerance);
+    GKO_ASSERT_NEAR(col_scaling[0], value_type{1.}, this->tolerance);
+    GKO_ASSERT_NEAR(col_scaling[1], value_type{1.5}, this->tolerance);
+    GKO_ASSERT_NEAR(col_scaling[2], value_type{0.9375}, this->tolerance);
+    GKO_ASSERT_NEAR(col_scaling[3], value_type{5. / 6.}, this->tolerance);
+    GKO_ASSERT_NEAR(col_scaling[4], value_type{1.}, this->tolerance);
+    GKO_ASSERT_NEAR(col_scaling[5], value_type{1.25}, this->tolerance);
 }
 
 
@@ -416,7 +357,8 @@ TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingLargeExampleProduct)
 
     auto mc64_factory =
         gko::reorder::Mc64<value_type, index_type>::build()
-            .with_strategy(gko::reorder::reordering_strategy::max_diagonal_sum)
+            .with_strategy(
+                gko::reorder::reordering_strategy::max_diagonal_product)
             .on(this->ref);
     auto mc64 = mc64_factory->generate(mtx);
 
@@ -428,7 +370,7 @@ TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingLargeExampleProduct)
     row_scaling->apply(mtx.get(), mtx.get());
     perm->apply(mtx.get(), mtx.get());
 
-    // GKO_ASSERT_MTX_NEAR(mtx, expected_result, this->tolerance);
+    GKO_ASSERT_MTX_NEAR(mtx, expected_result, 1e-6);
     GKO_ASSERT_MTX_EQ_SPARSITY(mtx, expected_result);
 }
 

From 72d2c2b84ba900f26e606dc759b71897d3a395ee Mon Sep 17 00:00:00 2001
From: Fritz Goebel <fritz.goebel@kit.edu>
Date: Thu, 15 Sep 2022 11:35:56 -0400
Subject: [PATCH 554/583] Add more tests

---
 core/components/addressable_pq.hpp      |  48 ++++--
 core/reorder/mc64.cpp                   |  57 ++++---
 core/reorder/mc64_kernels.hpp           |  44 ++---
 core/test/components/CMakeLists.txt     |   1 +
 core/test/components/addressable_pq.cpp | 153 +++++++++++++++++
 cuda/reorder/mc64_kernels.cu            |   7 +-
 cuda/test/reorder/CMakeLists.txt        |   1 +
 cuda/test/reorder/mc64_kernels.cpp      |  85 ++++++++++
 dpcpp/reorder/mc64_kernels.dp.cpp       |   7 +-
 dpcpp/test/CMakeLists.txt               |   1 +
 dpcpp/test/reorder/CMakeLists.txt       |   1 +
 dpcpp/test/reorder/mc64_kernels.cpp     |  85 ++++++++++
 hip/reorder/mc64_kernels.hip.cpp        |   7 +-
 hip/test/CMakeLists.txt                 |   1 +
 hip/test/reorder/CMakeLists.txt         |   1 +
 hip/test/reorder/mc64_kernels.cpp       |  85 ++++++++++
 include/ginkgo/core/reorder/mc64.hpp    | 133 ++++++++++-----
 omp/reorder/mc64_kernels.cpp            |   7 +-
 omp/test/reorder/CMakeLists.txt         |   1 +
 omp/test/reorder/mc64_kernels.cpp       |  84 ++++++++++
 reference/reorder/mc64_kernels.cpp      | 210 ++++++++++++------------
 reference/test/reorder/CMakeLists.txt   |   1 +
 reference/test/reorder/mc64.cpp         | 203 +++++++++++++++++++++++
 reference/test/reorder/mc64_kernels.cpp | 202 +++++++++++++----------
 24 files changed, 1127 insertions(+), 298 deletions(-)
 create mode 100644 core/test/components/addressable_pq.cpp
 create mode 100644 cuda/test/reorder/mc64_kernels.cpp
 create mode 100644 dpcpp/test/reorder/CMakeLists.txt
 create mode 100644 dpcpp/test/reorder/mc64_kernels.cpp
 create mode 100644 hip/test/reorder/CMakeLists.txt
 create mode 100644 hip/test/reorder/mc64_kernels.cpp
 create mode 100644 omp/test/reorder/mc64_kernels.cpp
 create mode 100644 reference/test/reorder/mc64.cpp

diff --git a/core/components/addressable_pq.hpp b/core/components/addressable_pq.hpp
index f4a4b9ec829..d7e92588db4 100644
--- a/core/components/addressable_pq.hpp
+++ b/core/components/addressable_pq.hpp
@@ -34,9 +34,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define GKO_CORE_COMPONENTS_ADRESSABLE_PQ_HPP_
 
 
+#include <algorithm>
 #include <vector>
 
 
+#include <ginkgo/core/base/types.hpp>
+
+
 namespace gko {
 
 
@@ -46,11 +50,13 @@ namespace gko {
  * It allows inserting key-value pairs, modifying their key as well as accessing
  * and removing the key-value pair with the minimum key.
  *
- * @tparam Degree_Log2 the binary logarithm of the heap arity, i.e.,
- *         `k = 1 << Degree_Log2`
+ * @tparam KeyType    The type of the keys
+ * @tparam ValueType  The type of the values
  */
-template <typename KeyType, typename ValueType, int Degree_Log2>
+template <typename KeyType, typename ValueType>
 struct addressable_priority_queue {
+    explicit addressable_priority_queue(int deg_log2) : degree{1 << deg_log2} {}
+
     /**
      * Inserts the given key-value pair into the PQ.
      * Duplicate keys are allowed, they may be returned in an arbitrary order.
@@ -70,12 +76,14 @@ struct addressable_priority_queue {
         return handle;
     }
 
-    /** Updates the key of the pair with the given handle. */
+    /**
+     * Updates the key of the pair with the given handle.
+     */
     void update_key(std::size_t handle, KeyType new_key)
     {
         auto pos = m_handle_pos[handle];
-        assert(pos < size());
-        assert(m_handles[pos] == handle);
+        GKO_ASSERT(pos < size());
+        GKO_ASSERT(m_handles[pos] == handle);
         auto old_key = m_keys[pos];
         m_keys[pos] = new_key;
         if (old_key < new_key) {
@@ -85,16 +93,24 @@ struct addressable_priority_queue {
         }
     }
 
-    /** Returns the minimum key from the queue. */
+    /**
+     * Returns the minimum key from the queue.
+     */
     KeyType min_key() const { return m_keys[0]; }
 
-    /** Returns the value belonging to the minimum key from the queue. */
+    /**
+     * Returns the value belonging to the minimum key from the queue.
+     */
     ValueType min_val() const { return m_values[0]; }
 
-    /** Returns the key-value pair with the minimum key from the queue. */
+    /**
+     * Returns the key-value pair with the minimum key from the queue.
+     */
     std::pair<KeyType, ValueType> min() const { return {min_key(), min_val()}; }
 
-    /** Removes the key-value pair with the minimum key from the queue. */
+    /**
+     * Removes the key-value pair with the minimum key from the queue.
+     */
     void pop_min()
     {
         swap(0, size() - 1);
@@ -105,10 +121,14 @@ struct addressable_priority_queue {
         sift_down(0);
     }
 
-    /** Returns the number of key-value pairs in the queue. */
+    /**
+     * Returns the number of key-value pairs in the queue.
+     */
     std::size_t size() const { return m_keys.size(); }
 
-    /** Returns true if and only if the queue has size 0. */
+    /**
+     * Returns true if and only if the queue has size 0.
+     */
     bool empty() const { return size() == 0; }
 
     void reset()
@@ -120,8 +140,7 @@ struct addressable_priority_queue {
     }
 
 private:
-    constexpr static int degree = 1 << 4;       // Degree_Log2;
-    constexpr static auto invalid_handle = -1;  //((std::size_t)-1);
+    // constexpr static int degree = 1 << Degree_Log2;
 
     std::size_t parent(std::size_t i) const { return (i - 1) / degree; }
 
@@ -166,6 +185,7 @@ struct addressable_priority_queue {
 
     std::size_t next_handle() const { return m_handle_pos.size(); }
 
+    const int degree;
     std::vector<KeyType> m_keys;
     std::vector<ValueType> m_values;
     std::vector<std::size_t> m_handles;
diff --git a/core/reorder/mc64.cpp b/core/reorder/mc64.cpp
index 530642d1d7f..77acabe3228 100644
--- a/core/reorder/mc64.cpp
+++ b/core/reorder/mc64.cpp
@@ -77,47 +77,66 @@ void Mc64<ValueType, IndexType>::generate(std::shared_ptr<const Executor>& exec,
     size_type num_rows = mtx->get_size()[0];
     size_type nnz = mtx->get_num_stored_elements();
 
-    array<remove_complex<ValueType>> workspace{exec, nnz + 3 * num_rows};
+    // A real valued array with space for:
+    //     - nnz entries for weights
+    //     - num_rows entries each for the dual vector u, distance information
+    //       and the max weight per row
+    array<remove_complex<ValueType>> value_workspace{exec, nnz + 3 * num_rows};
+    // A zero initialized index array with space for n entries each for parent
+    // information, priority queue handles, generation information, marked
+    // columns, indices corresponding to matched columns in the according row
+    // and still unmatched rows
+    array<IndexType> index_workspace{exec, 6 * num_rows};
+    index_workspace.fill(0);
+
     array<IndexType> permutation{exec, num_rows};
     array<IndexType> inv_permutation{exec, num_rows};
     permutation.fill(-one<IndexType>());
     inv_permutation.fill(-one<IndexType>());
+
     const auto row_ptrs = mtx->get_const_row_ptrs();
     const auto col_idxs = mtx->get_const_col_idxs();
 
-    exec->run(mc64::make_initialize_weights(mtx.get(), workspace,
+    exec->run(mc64::make_initialize_weights(mtx.get(), value_workspace,
                                             parameters_.strategy));
 
-    array<IndexType> parents{exec, 6 * num_rows};
-    parents.fill(0);
-    exec->run(mc64::make_initial_matching(num_rows, row_ptrs, col_idxs,
-                                          workspace, permutation,
-                                          inv_permutation, parents));
-
-    addressable_priority_queue<remove_complex<ValueType>, IndexType, 2> Q{};
+    // Compute an initial extreme matching from the nonzero entries for which
+    // the reduced weight (W(i, j) - u(j) - v(i)) is zero. Here, W is the
+    // weight matrix and u and v are the dual vectors. Note that v initially
+    // only contains zeros and hence can still be ignored here.
+    exec->run(mc64::make_initial_matching(
+        num_rows, row_ptrs, col_idxs, value_workspace, permutation,
+        inv_permutation, index_workspace, parameters_.tolerance));
+
+    // For each row that is not contained in the initial matching, search for
+    // an augmenting path, update the matching and compute the new entries
+    // of the dual vectors.
+    addressable_priority_queue<remove_complex<ValueType>, IndexType> Q{
+        parameters_.log2_degree};
     std::vector<IndexType> q_j{};
-    const auto unmatched = parents.get_data() + 5 * num_rows;
+    const auto unmatched = index_workspace.get_data() + 5 * num_rows;
     auto um = 0;
     auto root = unmatched[um];
     while (root != 0 && um < num_rows) {
         if (root != -1)
             exec->run(mc64::make_shortest_augmenting_path(
-                num_rows, row_ptrs, col_idxs, workspace, permutation,
-                inv_permutation, root, parents, Q, q_j));
+                num_rows, row_ptrs, col_idxs, value_workspace, permutation,
+                inv_permutation, root, index_workspace, Q, q_j,
+                parameters_.tolerance));
         root = unmatched[++um];
     }
 
-    permutation_ = std::move(share(PermutationMatrix::create(
-        exec, system_matrix->get_size(), permutation,
-        gko::matrix::row_permute | matrix::inverse_permute)));
-    inv_permutation_ = std::move(share(
+    permutation_ = std::move(share(
         PermutationMatrix::create(exec, system_matrix->get_size(),
-                                  inv_permutation, matrix::column_permute)));
+                                  inv_permutation, gko::matrix::row_permute)));
+    inv_permutation_ = std::move(share(PermutationMatrix::create(
+        exec, system_matrix->get_size(), permutation, matrix::row_permute)));
     row_scaling_ = std::move(DiagonalMatrix::create(exec, num_rows));
     col_scaling_ = std::move(DiagonalMatrix::create(exec, num_rows));
+
     exec->run(mc64::make_compute_scaling(
-        mtx.get(), workspace, permutation, parents, parameters_.strategy,
-        row_scaling_.get(), col_scaling_.get()));
+        mtx.get(), value_workspace, permutation, index_workspace,
+        parameters_.strategy, row_scaling_.get(), col_scaling_.get()));
 }
 
 
diff --git a/core/reorder/mc64_kernels.hpp b/core/reorder/mc64_kernels.hpp
index 76e5e619437..17c8c0c3d10 100644
--- a/core/reorder/mc64_kernels.hpp
+++ b/core/reorder/mc64_kernels.hpp
@@ -52,10 +52,10 @@ namespace gko {
 namespace kernels {
 
 
-#define GKO_DECLARE_MC64_INITIALIZE_WEIGHTS_KERNEL(ValueType, IndexType)  \
-    void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,  \
-                            const matrix::Csr<ValueType, IndexType>* mtx, \
-                            array<remove_complex<ValueType>>& workspace,  \
+#define GKO_DECLARE_MC64_INITIALIZE_WEIGHTS_KERNEL(ValueType, IndexType)       \
+    void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,       \
+                            const matrix::Csr<ValueType, IndexType>* mtx,      \
+                            array<remove_complex<ValueType>>& value_workspace, \
                             gko::reorder::reordering_strategy strategy)
 
 
@@ -63,30 +63,32 @@ namespace kernels {
     void initial_matching(                                                \
         std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,  \
         const IndexType* row_ptrs, const IndexType* col_idxs,             \
-        const array<ValueType>& workspace, array<IndexType>& permutation, \
-        array<IndexType>& inv_permutation, array<IndexType>& parents)
+        const array<ValueType>& value_workspace,                          \
+        array<IndexType>& permutation, array<IndexType>& inv_permutation, \
+        array<IndexType>& index_workspace, ValueType tolerance)
 
 
 #define GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL(ValueType, IndexType) \
     void shortest_augmenting_path(                                             \
         std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,       \
         const IndexType* row_ptrs, const IndexType* col_idxs,                  \
-        array<ValueType>& workspace, array<IndexType>& permutation,            \
+        array<ValueType>& value_workspace, array<IndexType>& permutation,      \
         array<IndexType>& inv_permutation, IndexType root,                     \
-        array<IndexType>& parents,                                             \
-        addressable_priority_queue<ValueType, IndexType, 2>& Q,                \
-        std::vector<IndexType>& q_j)
-
-
-#define GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL(ValueType, IndexType)       \
-    void compute_scaling(std::shared_ptr<const DefaultExecutor> exec,       \
-                         const matrix::Csr<ValueType, IndexType>* mtx,      \
-                         const array<remove_complex<ValueType>>& workspace, \
-                         const array<IndexType>& permutation,               \
-                         const array<IndexType>& parents,                   \
-                         gko::reorder::reordering_strategy strategy,        \
-                         gko::matrix::Diagonal<ValueType>* row_scaling,     \
-                         gko::matrix::Diagonal<ValueType>* col_scaling)
+        array<IndexType>& index_workspace,                                     \
+        addressable_priority_queue<ValueType, IndexType>& Q,                   \
+        std::vector<IndexType>& q_j, ValueType tolerance)
+
+
+#define GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL(ValueType, IndexType) \
+    void compute_scaling(                                             \
+        std::shared_ptr<const DefaultExecutor> exec,                  \
+        const matrix::Csr<ValueType, IndexType>* mtx,                 \
+        const array<remove_complex<ValueType>>& value_workspace,      \
+        const array<IndexType>& permutation,                          \
+        const array<IndexType>& index_workspace,                      \
+        gko::reorder::reordering_strategy strategy,                   \
+        gko::matrix::Diagonal<ValueType>* row_scaling,                \
+        gko::matrix::Diagonal<ValueType>* col_scaling)
 
 
 #define GKO_DECLARE_ALL_AS_TEMPLATES                                        \
diff --git a/core/test/components/CMakeLists.txt b/core/test/components/CMakeLists.txt
index a04f47199a5..e88b373d246 100644
--- a/core/test/components/CMakeLists.txt
+++ b/core/test/components/CMakeLists.txt
@@ -1 +1,2 @@
+ginkgo_create_test(addressable_pq)
 ginkgo_create_test(disjoint_sets)
diff --git a/core/test/components/addressable_pq.cpp b/core/test/components/addressable_pq.cpp
new file mode 100644
index 00000000000..9068eeccd95
--- /dev/null
+++ b/core/test/components/addressable_pq.cpp
@@ -0,0 +1,153 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "core/components/addressable_pq.hpp"
+
+
+#include <algorithm>
+#include <type_traits>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/executor.hpp>
+
+
+#include "core/test/utils.hpp"
+
+
+namespace {
+
+
+template <typename ValueIndexType>
+class AddressablePriorityQueue : public ::testing::Test {
+protected:
+    using value_type =
+        typename std::tuple_element<0, decltype(ValueIndexType())>::type;
+    using index_type =
+        typename std::tuple_element<1, decltype(ValueIndexType())>::type;
+    using pq_type = gko::addressable_priority_queue<value_type, index_type>;
+
+    AddressablePriorityQueue() : exec(gko::ReferenceExecutor::create()) {}
+
+    void assert_min(pq_type pq, value_type key, index_type val)
+    {
+        ASSERT_EQ(pq.min_key(), key);
+        ASSERT_EQ(pq.min_val(), val);
+        ASSERT_TRUE((pq.min() == std::pair<value_type, index_type>{key, val}));
+        ASSERT_FALSE(pq.empty());
+    }
+
+    void test_pq_functionality(pq_type& pq)
+    {
+        pq.insert(value_type{.5}, 1);
+        ASSERT_EQ(pq.size(), 1);
+        assert_min(pq, .5, 1);
+
+        // insert larger key
+        const auto handle_7 = pq.insert(value_type{1.}, 7);
+        ASSERT_EQ(pq.size(), 2);
+        assert_min(pq, .5, 1);
+
+        // insert min key
+        const auto handle_4 = pq.insert(value_type{.1}, 4);
+        ASSERT_EQ(pq.size(), 3);
+        assert_min(pq, .1, 4);
+
+        // update key to have different min
+        pq.update_key(handle_4, value_type{.7});
+        ASSERT_EQ(pq.size(), 3);
+        assert_min(pq, .5, 1);
+
+        // insert same key as min
+        pq.insert(value_type{.5}, 2);
+        ASSERT_EQ(pq.size(), 4);
+        assert_min(pq, .5, 1);
+
+        // update max to new min key
+        pq.update_key(handle_7, value_type{.2});
+        ASSERT_EQ(pq.size(), 4);
+        assert_min(pq, .2, 7);
+
+        // insert intermediate key
+        pq.insert(value_type{.3}, 5);
+        ASSERT_EQ(pq.size(), 5);
+        assert_min(pq, .2, 7);
+
+        // pop min works
+        pq.pop_min();
+        ASSERT_EQ(pq.size(), 4);
+        assert_min(pq, .3, 5);
+
+        // reset works
+        pq.reset();
+        ASSERT_EQ(pq.size(), 0);
+        ASSERT_TRUE(pq.empty());
+    }
+
+    std::shared_ptr<const gko::Executor> exec;
+};
+
+TYPED_TEST_SUITE(AddressablePriorityQueue, gko::test::RealValueIndexTypes,
+                 TypenameNameGenerator);
+
+
+TYPED_TEST(AddressablePriorityQueue, InitializesCorrectly)
+{
+    using pq_type = typename TestFixture::pq_type;
+    pq_type pq{4};
+
+    ASSERT_EQ(pq.size(), 0);
+    ASSERT_TRUE(pq.empty());
+}
+
+
+TYPED_TEST(AddressablePriorityQueue, WorksWithDegree2)
+{
+    using pq_type = typename TestFixture::pq_type;
+    pq_type pq{2};
+
+    this->test_pq_functionality(pq);
+}
+
+
+TYPED_TEST(AddressablePriorityQueue, WorksWithDegree4)
+{
+    using pq_type = typename TestFixture::pq_type;
+    pq_type pq{4};
+
+    this->test_pq_functionality(pq);
+}
+
+
+}  // namespace
diff --git a/cuda/reorder/mc64_kernels.cu b/cuda/reorder/mc64_kernels.cu
index 87d556df242..c90fddb2c97 100644
--- a/cuda/reorder/mc64_kernels.cu
+++ b/cuda/reorder/mc64_kernels.cu
@@ -67,7 +67,8 @@ void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
                       const array<ValueType>& workspace,
                       array<IndexType>& permutation,
                       array<IndexType>& inv_permutation,
-                      array<IndexType>& parents) GKO_NOT_IMPLEMENTED;
+                      array<IndexType>& parents,
+                      ValueType tolerance) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL);
@@ -80,8 +81,8 @@ void shortest_augmenting_path(
     array<ValueType>& workspace, array<IndexType>& permutation,
     array<IndexType>& inv_permutation, IndexType root,
     array<IndexType>& parents,
-    addressable_priority_queue<ValueType, IndexType, 2>& Q,
-    std::vector<IndexType>& q_j) GKO_NOT_IMPLEMENTED;
+    addressable_priority_queue<ValueType, IndexType>& Q,
+    std::vector<IndexType>& q_j, ValueType tolerance) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL);
diff --git a/cuda/test/reorder/CMakeLists.txt b/cuda/test/reorder/CMakeLists.txt
index 79deba957b3..b3abcc16d3f 100644
--- a/cuda/test/reorder/CMakeLists.txt
+++ b/cuda/test/reorder/CMakeLists.txt
@@ -1 +1,2 @@
+ginkgo_create_test(mc64_kernels RESOURCE_TYPE cudagpu)
 ginkgo_create_test(rcm_kernels RESOURCE_TYPE cudagpu)
diff --git a/cuda/test/reorder/mc64_kernels.cpp b/cuda/test/reorder/mc64_kernels.cpp
new file mode 100644
index 00000000000..7132a711bda
--- /dev/null
+++ b/cuda/test/reorder/mc64_kernels.cpp
@@ -0,0 +1,85 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/reorder/mc64.hpp>
+
+
+#include <gtest/gtest.h>
+
+
+#include "core/test/utils/assertions.hpp"
+
+
+namespace {
+
+
+class Mc64 : public ::testing::Test {
+protected:
+    using v_type = double;
+    using i_type = int;
+    using CsrMtx = gko::matrix::Csr<v_type, i_type>;
+    using reorder_type = gko::reorder::Mc64<v_type, i_type>;
+    using perm_type = gko::matrix::Permutation<i_type>;
+
+
+    Mc64()
+        : exec(gko::CudaExecutor::create(0, gko::ReferenceExecutor::create(),
+                                         true)),
+          // clang-format off
+          p_mtx(gko::initialize<CsrMtx>({{1.0, 2.0, 0.0, -1.3, 2.1},
+                                         {2.0, 5.0, 1.5, 0.0, 0.0},
+                                         {0.0, 1.5, 1.5, 1.1, 0.0},
+                                         {-1.3, 0.0, 1.1, 2.0, 0.0},
+                                         {2.1, 0.0, 0.0, 0.0, 1.0}},
+                                        exec)),
+          // clang-format on
+          mc64_factory(reorder_type::build().on(exec)),
+          reorder_op(mc64_factory->generate(p_mtx))
+    {}
+
+    std::shared_ptr<const gko::Executor> exec;
+    std::unique_ptr<reorder_type::Factory> mc64_factory;
+    std::shared_ptr<CsrMtx> p_mtx;
+    std::unique_ptr<reorder_type> reorder_op;
+};
+
+
+TEST_F(Mc64, IsExecutedOnReferenceExecutor)
+{
+    // This only executes successfully if computed on reference executor.
+    auto p = reorder_op->get_permutation();
+
+    ASSERT_TRUE(true);
+}
+
+
+}  // namespace
diff --git a/dpcpp/reorder/mc64_kernels.dp.cpp b/dpcpp/reorder/mc64_kernels.dp.cpp
index 00147a53553..659ce61c20c 100644
--- a/dpcpp/reorder/mc64_kernels.dp.cpp
+++ b/dpcpp/reorder/mc64_kernels.dp.cpp
@@ -67,7 +67,8 @@ void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
                       const array<ValueType>& workspace,
                       array<IndexType>& permutation,
                       array<IndexType>& inv_permutation,
-                      array<IndexType>& parents) GKO_NOT_IMPLEMENTED;
+                      array<IndexType>& parents,
+                      ValueType tolerance) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL);
@@ -80,8 +81,8 @@ void shortest_augmenting_path(
     array<ValueType>& workspace, array<IndexType>& permutation,
     array<IndexType>& inv_permutation, IndexType root,
     array<IndexType>& parents,
-    addressable_priority_queue<ValueType, IndexType, 2>& Q,
-    std::vector<IndexType>& q_j) GKO_NOT_IMPLEMENTED;
+    addressable_priority_queue<ValueType, IndexType>& Q,
+    std::vector<IndexType>& q_j, ValueType tolerance) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL);
diff --git a/dpcpp/test/CMakeLists.txt b/dpcpp/test/CMakeLists.txt
index b882f57715e..550f21ace18 100644
--- a/dpcpp/test/CMakeLists.txt
+++ b/dpcpp/test/CMakeLists.txt
@@ -5,3 +5,4 @@ add_subdirectory(base)
 add_subdirectory(components)
 add_subdirectory(matrix)
 add_subdirectory(preconditioner)
+add_subdirectory(reorder)
diff --git a/dpcpp/test/reorder/CMakeLists.txt b/dpcpp/test/reorder/CMakeLists.txt
new file mode 100644
index 00000000000..1bbf057a529
--- /dev/null
+++ b/dpcpp/test/reorder/CMakeLists.txt
@@ -0,0 +1 @@
+ginkgo_create_test(mc64_kernels)
diff --git a/dpcpp/test/reorder/mc64_kernels.cpp b/dpcpp/test/reorder/mc64_kernels.cpp
new file mode 100644
index 00000000000..237bf21db04
--- /dev/null
+++ b/dpcpp/test/reorder/mc64_kernels.cpp
@@ -0,0 +1,85 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/reorder/mc64.hpp>
+
+
+#include <gtest/gtest.h>
+
+
+#include "core/test/utils/assertions.hpp"
+
+
+namespace {
+
+
+class Mc64 : public ::testing::Test {
+protected:
+    using v_type = double;
+    using i_type = int;
+    using CsrMtx = gko::matrix::Csr<v_type, i_type>;
+    using reorder_type = gko::reorder::Mc64<v_type, i_type>;
+    using perm_type = gko::matrix::Permutation<i_type>;
+
+
+    Mc64()
+        : exec(gko::DpcppExecutor::create(0, gko::ReferenceExecutor::create(),
+                                          true)),
+          // clang-format off
+          p_mtx(gko::initialize<CsrMtx>({{1.0, 2.0, 0.0, -1.3, 2.1},
+                                         {2.0, 5.0, 1.5, 0.0, 0.0},
+                                         {0.0, 1.5, 1.5, 1.1, 0.0},
+                                         {-1.3, 0.0, 1.1, 2.0, 0.0},
+                                         {2.1, 0.0, 0.0, 0.0, 1.0}},
+                                        exec)),
+          // clang-format on
+          mc64_factory(reorder_type::build().on(exec)),
+          reorder_op(mc64_factory->generate(p_mtx))
+    {}
+
+    std::shared_ptr<const gko::Executor> exec;
+    std::unique_ptr<reorder_type::Factory> mc64_factory;
+    std::shared_ptr<CsrMtx> p_mtx;
+    std::unique_ptr<reorder_type> reorder_op;
+};
+
+
+TEST_F(Mc64, IsExecutedOnReferenceExecutor)
+{
+    // This only executes successfully if computed on reference executor.
+    auto p = reorder_op->get_permutation();
+
+    ASSERT_TRUE(true);
+}
+
+
+}  // namespace
diff --git a/hip/reorder/mc64_kernels.hip.cpp b/hip/reorder/mc64_kernels.hip.cpp
index a40fc3d0dd6..fd7514cbd22 100644
--- a/hip/reorder/mc64_kernels.hip.cpp
+++ b/hip/reorder/mc64_kernels.hip.cpp
@@ -67,7 +67,8 @@ void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
                       const array<ValueType>& workspace,
                       array<IndexType>& permutation,
                       array<IndexType>& inv_permutation,
-                      array<IndexType>& parents) GKO_NOT_IMPLEMENTED;
+                      array<IndexType>& parents,
+                      ValueType tolerance) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL);
@@ -80,8 +81,8 @@ void shortest_augmenting_path(
     array<ValueType>& workspace, array<IndexType>& permutation,
     array<IndexType>& inv_permutation, IndexType root,
     array<IndexType>& parents,
-    addressable_priority_queue<ValueType, IndexType, 2>& Q,
-    std::vector<IndexType>& q_j) GKO_NOT_IMPLEMENTED;
+    addressable_priority_queue<ValueType, IndexType>& Q,
+    std::vector<IndexType>& q_j, ValueType tolerance) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL);
diff --git a/hip/test/CMakeLists.txt b/hip/test/CMakeLists.txt
index f046f9b86ca..050994a50c4 100644
--- a/hip/test/CMakeLists.txt
+++ b/hip/test/CMakeLists.txt
@@ -3,5 +3,6 @@ include(${PROJECT_SOURCE_DIR}/cmake/create_test.cmake)
 add_subdirectory(base)
 add_subdirectory(components)
 add_subdirectory(matrix)
+add_subdirectory(reorder)
 add_subdirectory(solver)
 add_subdirectory(utils)
diff --git a/hip/test/reorder/CMakeLists.txt b/hip/test/reorder/CMakeLists.txt
new file mode 100644
index 00000000000..1bbf057a529
--- /dev/null
+++ b/hip/test/reorder/CMakeLists.txt
@@ -0,0 +1 @@
+ginkgo_create_test(mc64_kernels)
diff --git a/hip/test/reorder/mc64_kernels.cpp b/hip/test/reorder/mc64_kernels.cpp
new file mode 100644
index 00000000000..f872ee4123d
--- /dev/null
+++ b/hip/test/reorder/mc64_kernels.cpp
@@ -0,0 +1,85 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/reorder/mc64.hpp>
+
+
+#include <gtest/gtest.h>
+
+
+#include "core/test/utils/assertions.hpp"
+
+
+namespace {
+
+
+class Mc64 : public ::testing::Test {
+protected:
+    using v_type = double;
+    using i_type = int;
+    using CsrMtx = gko::matrix::Csr<v_type, i_type>;
+    using reorder_type = gko::reorder::Mc64<v_type, i_type>;
+    using perm_type = gko::matrix::Permutation<i_type>;
+
+
+    Mc64()
+        : exec(gko::HipExecutor::create(0, gko::ReferenceExecutor::create(),
+                                        true)),
+          // clang-format off
+          p_mtx(gko::initialize<CsrMtx>({{1.0, 2.0, 0.0, -1.3, 2.1},
+                                         {2.0, 5.0, 1.5, 0.0, 0.0},
+                                         {0.0, 1.5, 1.5, 1.1, 0.0},
+                                         {-1.3, 0.0, 1.1, 2.0, 0.0},
+                                         {2.1, 0.0, 0.0, 0.0, 1.0}},
+                                        exec)),
+          // clang-format on
+          mc64_factory(reorder_type::build().on(exec)),
+          reorder_op(mc64_factory->generate(p_mtx))
+    {}
+
+    std::shared_ptr<const gko::Executor> exec;
+    std::unique_ptr<reorder_type::Factory> mc64_factory;
+    std::shared_ptr<CsrMtx> p_mtx;
+    std::unique_ptr<reorder_type> reorder_op;
+};
+
+
+TEST_F(Mc64, IsExecutedOnReferenceExecutor)
+{
+    // This only executes successfully if computed on reference executor.
+    auto p = reorder_op->get_permutation();
+
+    ASSERT_TRUE(true);
+}
+
+
+}  // namespace
diff --git a/include/ginkgo/core/reorder/mc64.hpp b/include/ginkgo/core/reorder/mc64.hpp
index 1838e44fb55..73a53a1191a 100644
--- a/include/ginkgo/core/reorder/mc64.hpp
+++ b/include/ginkgo/core/reorder/mc64.hpp
@@ -63,20 +63,45 @@ namespace reorder {
 
 /**
  * Strategy defining the goal of the MC64 reordering.
- * max_diagonal_product and max_diagonal_product_fast aim at
- * maximizing the product of absolute diagonal entries using
- * the standard library or faster, approximate implementations
- * for logarithm and exponential function computations.
+ * max_diagonal_product aims at maximizing the product of
+ * absolute diagonal entries.
  * max_diag_sum aims at maximizing the sum of absolute values
  * for the diagonal entries.
  */
-enum class reordering_strategy {
-    max_diagonal_product,
-    max_diagonal_product_fast,
-    max_diagonal_sum
-};
+enum class reordering_strategy { max_diagonal_product, max_diagonal_sum };
 
 
+/**
+ * MC64 is an algorithm for permuting large entries to the diagonal of a
+ * sparse matrix. This approach can increase numerical stability of e.g.
+ * an LU factorization without pivoting. Under the assumption of working
+ * on a nonsingular square matrix, the algorithm computes a minimum weight
+ * extreme matching on a weighted edge bipartite graph of the matrix. It is
+ * described in detail in "On Algorithms for Permuting Large Entries to the
+ * Diagonal of a Sparse Matrix" (Duff, Koster, 2001). There are two strategies
+ * for choosing the weights supported:
+ *  - Maximizing the product of the absolute values on the diagonal.
+ *    For this strategy, the weights are computed as
+ *      c(i, j) = log2(a_i) - log2(abs(a(i, j))) if a(i, j) is nonzero and
+ * infinity otherwise Here, a_i is the maximum absolute value in row i of the
+ * matrix A. In this case, the implementation computes a row permutation P and
+ * row and column scaling coefficients L and R such that the matrix P*L*A*R has
+ * values with unity absolute value on the diagonal and smaller or equal entries
+ * everywhere else.
+ *  - Maximizing the sum of the absolute values on the diagonal.
+ *    For this strategy, the weights are computed as
+ *      c(i, j) = a_i - abs(a(i, j)) if a(i, j) is nonzero and infinity
+ * otherwise In this case, no scaling coefficients are computed.
+ *
+ * @note  This class is derived from polymorphic object but is not a LinOp as it
+ * does not make sense for this class to implement the apply methods. The
+ * objective of this class is to generate a reordering/permutation vector (in
+ * the form of the Permutation matrix), which can be used to apply to reorder a
+ * matrix as required.
+ *
+ * @tparam ValueType  Type of the values of all matrices used in this class
+ * @tparam IndexType  Type of the indices of all matrices used in this class
+ */
 template <typename ValueType = default_precision, typename IndexType = int32>
 class Mc64 : public EnablePolymorphicObject<Mc64<ValueType, IndexType>,
                                             ReorderingBase>,
@@ -113,11 +138,23 @@ class Mc64 : public EnablePolymorphicObject<Mc64<ValueType, IndexType>,
         return inv_permutation_;
     }
 
+    /**
+     * Gets the row scaling coefficients. If the strategy is max_diagonal_sum,
+     * these are all 1.
+     *
+     * @return the row scaling coefficients (diagonal matrix)
+     */
     std::shared_ptr<const DiagonalMatrix> get_row_scaling() const
     {
         return row_scaling_;
     }
 
+    /**
+     * Gets the column sclaing coefficients. If the strategy is
+     * max_diagonal_sum, these are all 1.
+     *
+     * @return the column scaling coefficients (diagonal matrix)
+     */
     std::shared_ptr<const DiagonalMatrix> get_col_scaling() const
     {
         return col_scaling_;
@@ -129,7 +166,21 @@ class Mc64 : public EnablePolymorphicObject<Mc64<ValueType, IndexType>,
          * This parameter controls the goal of the permutation.
          */
         reordering_strategy GKO_FACTORY_PARAMETER_SCALAR(
-            strategy, reordering_strategy::max_diagonal_product_fast);
+            strategy, reordering_strategy::max_diagonal_product);
+
+        /**
+         * This parameter controls the tolerance below which a weight is
+         * considered to be zero.
+         */
+        remove_complex<ValueType> GKO_FACTORY_PARAMETER_SCALAR(tolerance,
+                                                               1e-14);
+
+        /**
+         * This parameter controls the binary logarithm of the heap arity
+         * for the addressable priority queue used in generating the
+         * minimum weight perfect matching.
+         */
+        int GKO_FACTORY_PARAMETER_SCALAR(log2_degree, 4);
     };
     GKO_ENABLE_REORDERING_BASE_FACTORY(Mc64, parameters, Factory);
     GKO_ENABLE_BUILD_METHOD(Factory);
@@ -151,40 +202,42 @@ class Mc64 : public EnablePolymorphicObject<Mc64<ValueType, IndexType>,
               factory->get_executor()),
           parameters_{factory->get_parameters()}
     {
-        // Always execute the reordering on the cpu.
-        const auto is_gpu_executor =
-            this->get_executor() != this->get_executor()->get_master();
-        auto cpu_exec = is_gpu_executor ? this->get_executor()->get_master()
-                                        : this->get_executor();
+        auto exec = this->get_executor();
+        // Always execute the reordering on a reference executor as the
+        // algorithm is only implemented sequentially.
+        const auto is_gpu_executor = exec != exec->get_master();
+        const auto host_is_ref =
+            dynamic_cast<const ReferenceExecutor*>(exec->get_master().get());
+        auto ref =
+            host_is_ref ? exec->get_master() : ReferenceExecutor::create();
+
+        auto system_matrix = share(matrix_type::create(ref));
 
         // The system matrix has to be square.
         GKO_ASSERT_IS_SQUARE_MATRIX(args.system_matrix);
+        if (args.system_matrix->get_size()) {
+            system_matrix =
+                copy_and_convert_to<matrix_type>(ref, args.system_matrix);
+        }
 
-        auto const dim = args.system_matrix->get_size();
-        // permutation_ = PermutationMatrix::create(cpu_exec, dim);
-        // inv_permutation_ = PermutationMatrix::create(cpu_exec, dim);
-        // row_scaling_ = DiagonalMatrix::create(cpu_exec, dim[0]);
-        // col_scaling_ = DiagonalMatrix::create(cpu_exec, dim[0]);
-
-        this->generate(cpu_exec, args.system_matrix);
-
-        // Copy back results to gpu if necessary.
-        if (is_gpu_executor) {
-            const auto gpu_exec = this->get_executor();
-            auto gpu_perm = share(PermutationMatrix::create(gpu_exec, dim));
-            gpu_perm->copy_from(permutation_.get());
-            permutation_ = gpu_perm;
-            auto gpu_inv_perm = share(PermutationMatrix::create(gpu_exec, dim));
-            gpu_inv_perm->copy_from(inv_permutation_.get());
-            inv_permutation_ = gpu_inv_perm;
-            auto gpu_row_scaling =
-                share(DiagonalMatrix::create(gpu_exec, dim[0]));
-            gpu_row_scaling->copy_from(row_scaling_.get());
-            row_scaling_ = gpu_row_scaling;
-            auto gpu_col_scaling =
-                share(DiagonalMatrix::create(gpu_exec, dim[0]));
-            gpu_col_scaling->copy_from(col_scaling_.get());
-            col_scaling_ = gpu_col_scaling;
+        auto const dim = system_matrix->get_size();
+
+        this->generate(ref, system_matrix);
+
+        // Copy back results to original executor if necessary.
+        if (ref != exec) {
+            auto perm = share(PermutationMatrix::create(exec, dim));
+            perm->copy_from(permutation_.get());
+            permutation_ = perm;
+            auto inv_perm = share(PermutationMatrix::create(exec, dim));
+            inv_perm->copy_from(inv_permutation_.get());
+            inv_permutation_ = inv_perm;
+            auto row_scaling = share(DiagonalMatrix::create(exec, dim[0]));
+            row_scaling->copy_from(row_scaling_.get());
+            row_scaling_ = row_scaling;
+            auto col_scaling = share(DiagonalMatrix::create(exec, dim[0]));
+            col_scaling->copy_from(col_scaling_.get());
+            col_scaling_ = col_scaling;
         }
     }
 
diff --git a/omp/reorder/mc64_kernels.cpp b/omp/reorder/mc64_kernels.cpp
index 83df0158aaa..37bf4a93a27 100644
--- a/omp/reorder/mc64_kernels.cpp
+++ b/omp/reorder/mc64_kernels.cpp
@@ -67,7 +67,8 @@ void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
                       const array<ValueType>& workspace,
                       array<IndexType>& permutation,
                       array<IndexType>& inv_permutation,
-                      array<IndexType>& parents) GKO_NOT_IMPLEMENTED;
+                      array<IndexType>& parents,
+                      ValueType tolerance) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL);
@@ -80,8 +81,8 @@ void shortest_augmenting_path(
     array<ValueType>& workspace, array<IndexType>& permutation,
     array<IndexType>& inv_permutation, IndexType root,
     array<IndexType>& parents,
-    addressable_priority_queue<ValueType, IndexType, 2>& Q,
-    std::vector<IndexType>& q_j) GKO_NOT_IMPLEMENTED;
+    addressable_priority_queue<ValueType, IndexType>& Q,
+    std::vector<IndexType>& q_j, ValueType tolerance) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL);
diff --git a/omp/test/reorder/CMakeLists.txt b/omp/test/reorder/CMakeLists.txt
index 65aea4a0fdb..fc591590225 100644
--- a/omp/test/reorder/CMakeLists.txt
+++ b/omp/test/reorder/CMakeLists.txt
@@ -1 +1,2 @@
+ginkgo_create_test(mc64_kernels RESOURCE_TYPE cpu)
 ginkgo_create_test(rcm_kernels RESOURCE_TYPE cpu)
diff --git a/omp/test/reorder/mc64_kernels.cpp b/omp/test/reorder/mc64_kernels.cpp
new file mode 100644
index 00000000000..5b241950d92
--- /dev/null
+++ b/omp/test/reorder/mc64_kernels.cpp
@@ -0,0 +1,84 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/reorder/mc64.hpp>
+
+
+#include <gtest/gtest.h>
+
+
+#include "core/test/utils/assertions.hpp"
+
+
+namespace {
+
+
+class Mc64 : public ::testing::Test {
+protected:
+    using v_type = double;
+    using i_type = int;
+    using CsrMtx = gko::matrix::Csr<v_type, i_type>;
+    using reorder_type = gko::reorder::Mc64<v_type, i_type>;
+    using perm_type = gko::matrix::Permutation<i_type>;
+
+
+    Mc64()
+        : exec(gko::OmpExecutor::create()),
+          // clang-format off
+          p_mtx(gko::initialize<CsrMtx>({{1.0, 2.0, 0.0, -1.3, 2.1},
+                                         {2.0, 5.0, 1.5, 0.0, 0.0},
+                                         {0.0, 1.5, 1.5, 1.1, 0.0},
+                                         {-1.3, 0.0, 1.1, 2.0, 0.0},
+                                         {2.1, 0.0, 0.0, 0.0, 1.0}},
+                                        exec)),
+          // clang-format on
+          mc64_factory(reorder_type::build().on(exec)),
+          reorder_op(mc64_factory->generate(p_mtx))
+    {}
+
+    std::shared_ptr<const gko::Executor> exec;
+    std::unique_ptr<reorder_type::Factory> mc64_factory;
+    std::shared_ptr<CsrMtx> p_mtx;
+    std::unique_ptr<reorder_type> reorder_op;
+};
+
+
+TEST_F(Mc64, IsExecutedOnReferenceExecutor)
+{
+    // This only executes successfully if computed on reference executor.
+    auto p = reorder_op->get_permutation();
+
+    ASSERT_TRUE(true);
+}
+
+
+}  // namespace
diff --git a/reference/reorder/mc64_kernels.cpp b/reference/reorder/mc64_kernels.cpp
index fba5190df13..a367d17a145 100644
--- a/reference/reorder/mc64_kernels.cpp
+++ b/reference/reorder/mc64_kernels.cpp
@@ -32,27 +32,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "core/reorder/mc64_kernels.hpp"
 
-#include <algorithm>
+
 #include <cmath>
-#include <iterator>
-#include <memory>
-#include <queue>
-#include <set>
-#include <utility>
-#include <vector>
 
 
-#include <ginkgo/config.hpp>
 #include <ginkgo/core/base/array.hpp>
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/base/types.hpp>
-#include <ginkgo/core/matrix/coo.hpp>
 #include <ginkgo/core/matrix/csr.hpp>
 #include <ginkgo/core/matrix/permutation.hpp>
-#include <ginkgo/core/matrix/sparsity_csr.hpp>
-
-
-#include "core/base/allocator.hpp"
 
 
 namespace gko {
@@ -69,7 +57,7 @@ namespace mc64 {
 template <typename ValueType, typename IndexType>
 void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
                         const matrix::Csr<ValueType, IndexType>* mtx,
-                        array<remove_complex<ValueType>>& workspace,
+                        array<remove_complex<ValueType>>& value_workspace,
                         gko::reorder::reordering_strategy strategy)
 {
     constexpr auto inf =
@@ -79,16 +67,16 @@ void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
     const auto row_ptrs = mtx->get_const_row_ptrs();
     const auto col_idxs = mtx->get_const_col_idxs();
     const auto values = mtx->get_const_values();
-    auto weight =
+    auto calculate_weight =
         strategy == gko::reorder::reordering_strategy::max_diagonal_sum
             ? [](ValueType a) { return abs(a); }
             : [](ValueType a) { return std::log2(abs(a)); };
-    auto weights = workspace.get_data();
-    auto u = weights + nnz;
-    auto distance = u + num_rows;
-    auto m = distance + num_rows;
+    auto weights = value_workspace.get_data();
+    auto dual_u = weights + nnz;
+    auto distance = dual_u + num_rows;
+    auto row_maxima = distance + num_rows;
     for (IndexType col = 0; col < num_rows; col++) {
-        u[col] = inf;
+        dual_u[col] = inf;
         distance[col] = inf;
     }
 
@@ -97,18 +85,18 @@ void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
         const auto row_end = row_ptrs[row + 1];
         auto row_max = -inf;
         for (IndexType idx = row_begin; idx < row_end; idx++) {
-            const auto w = weight(values[idx]);
-            weights[idx] = w;
-            if (w > row_max) row_max = w;
+            const auto weight = calculate_weight(values[idx]);
+            weights[idx] = weight;
+            if (weight > row_max) row_max = weight;
         }
 
-        m[row] = row_max;
+        row_maxima[row] = row_max;
 
         for (IndexType idx = row_begin; idx < row_end; idx++) {
-            const auto c = row_max - weights[idx];
-            weights[idx] = c;
+            const auto weight = row_max - weights[idx];
+            weights[idx] = weight;
             const auto col = col_idxs[idx];
-            if (c < u[col]) u[col] = c;
+            if (weight < dual_u[col]) dual_u[col] = weight;
         }
     }
 }
@@ -122,17 +110,17 @@ template <typename ValueType, typename IndexType>
 void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
                       size_type num_rows, const IndexType* row_ptrs,
                       const IndexType* col_idxs,
-                      const array<ValueType>& workspace,
+                      const array<ValueType>& value_workspace,
                       array<IndexType>& permutation,
                       array<IndexType>& inv_permutation,
-                      array<IndexType>& parents)
+                      array<IndexType>& index_workspace, ValueType tolerance)
 {
     const auto nnz = row_ptrs[num_rows];
-    const auto c = workspace.get_const_data();
-    const auto u = c + nnz;
+    const auto weights = value_workspace.get_const_data();
+    const auto dual_u = weights + nnz;
     auto p = permutation.get_data();
     auto ip = inv_permutation.get_data();
-    auto idxs = parents.get_data() + 4 * num_rows;
+    auto idxs = index_workspace.get_data() + 4 * num_rows;
     auto unmatched = idxs + num_rows;
     auto um_cnt = 0;
 
@@ -145,7 +133,7 @@ void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
         bool matched = false;
         for (IndexType idx = row_begin; idx < row_end; idx++) {
             const auto col = col_idxs[idx];
-            if (abs(c[idx] - u[col]) < 1e-14 && ip[col] == -1) {
+            if (abs(weights[idx] - dual_u[col]) < tolerance && ip[col] == -1) {
                 p[row] = col;
                 ip[col] = row;
                 idxs[row] = idx;
@@ -154,6 +142,7 @@ void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
             }
         }
         if (!matched) {
+            // Mark unmatched rows for later.
             unmatched[um_cnt++] = row;
         }
     }
@@ -165,20 +154,24 @@ void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
     // (row, col) and (row_1, col_1).
     auto um = 0;
     auto row = unmatched[um];
+    // If row == 0 we passed the last unmatched row and reached the
+    // zero-initialized part of the array. Row 0 is always matched as the matrix
+    // is assumed to be nonsingular and the previous loop starts with row 0.
     while (row != 0 && um < num_rows) {
         const auto row_begin = row_ptrs[row];
         const auto row_end = row_ptrs[row + 1];
         bool found = false;
         for (IndexType idx = row_begin; idx < row_end; idx++) {
             const auto col = col_idxs[idx];
-            if (abs(c[idx] - u[col]) < 1e-14) {
+            if (abs(weights[idx] - dual_u[col]) < tolerance) {
                 const auto row_1 = ip[col];
                 const auto row_1_begin = row_ptrs[row_1];
                 const auto row_1_end = row_ptrs[row_1 + 1];
                 for (IndexType idx_1 = row_1_begin; idx_1 < row_1_end;
                      idx_1++) {
                     const auto col_1 = col_idxs[idx_1];
-                    if (abs(c[idx_1] - u[col_1]) < 1e-14 && ip[col_1] == -1) {
+                    if (abs(weights[idx_1] - dual_u[col_1]) < tolerance &&
+                        ip[col_1] == -1) {
                         p[row] = col;
                         ip[col] = row;
                         idxs[row] = idx;
@@ -193,6 +186,7 @@ void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
             }
         }
         if (found) {
+            // Mark previously unmatched row as matched.
             unmatched[um] = -1;
         }
         row = unmatched[++um];
@@ -207,32 +201,43 @@ template <typename ValueType, typename IndexType>
 void shortest_augmenting_path(
     std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,
     const IndexType* row_ptrs, const IndexType* col_idxs,
-    array<ValueType>& workspace, array<IndexType>& permutation,
+    array<ValueType>& value_workspace, array<IndexType>& permutation,
     array<IndexType>& inv_permutation, IndexType root,
-    array<IndexType>& parents,
-    addressable_priority_queue<ValueType, IndexType, 2>& Q,
-    std::vector<IndexType>& q_j)
+    array<IndexType>& index_workspace,
+    addressable_priority_queue<ValueType, IndexType>& Q,
+    std::vector<IndexType>& q_j, ValueType tolerance)
 {
     constexpr auto inf = std::numeric_limits<ValueType>::infinity();
     const auto nnz = row_ptrs[num_rows];
-    auto c = workspace.get_data();
-    auto u = c + nnz;
-    auto distance = u + num_rows;
+    auto weights = value_workspace.get_data();
+    auto dual_u = weights + nnz;
+    auto distance = dual_u + num_rows;
 
     auto p = permutation.get_data();
     auto ip = inv_permutation.get_data();
 
-    auto parents_ = parents.get_data();
-    auto handles = parents_ + num_rows;
+    auto parents = index_workspace.get_data();
+    // Handles to access and update entries in the addressable priority queue.
+    auto handles = parents + num_rows;
+    // Generation array to mark visited nodes.
     auto generation = handles + num_rows;
+    // Set of marked columns whos shortest alternating paths and distances to
+    // the root are known.
     auto marked_cols = generation + num_rows;
+    // Indices of the nonzero entries corresponding to the matched column in
+    // each matched row. So, if row i is matched to column j, W(i,j) is found
+    // at weights[idxs[i]] where W is the weight matrix.
     auto idxs = marked_cols + num_rows;
 
     Q.reset();
     q_j.clear();
 
-    ValueType lsp = inf;  // zero<ValueType>();
+    // The length of the current path.
+    ValueType lsp = inf;
+    // The length of the currently shortest found augmenting path starting from
+    // root.
     ValueType lsap = inf;
+    // The column at the end of the currently shortest found augmenting path.
     IndexType jsap = -1;
 
     auto row = root;
@@ -241,18 +246,22 @@ void shortest_augmenting_path(
     const auto begin = row_ptrs[row];
     const auto end = row_ptrs[row + 1];
 
+    // Look for matching candidates in the row corresponding to root.
+    // As root is not yet matched, the corresponding entry in the dual
+    // vector v is 0 so we do not have to compute it.
     for (IndexType idx = begin; idx < end; idx++) {
         const auto col = col_idxs[idx];
-        const ValueType dnew = c[idx] - u[col];
+        const ValueType dnew = weights[idx] - dual_u[col];
 
         if (dnew < lsap) {
             if (ip[col] == -1) {
+                // col is unmatched so we found an augmenting path.
                 lsap = dnew;
                 jsap = col;
-                parents_[col] = row;
+                parents[col] = row;
             } else {
                 distance[col] = dnew;
-                parents_[col] = row;
+                parents[col] = row;
                 generation[col] = num_rows + root;
                 if (dnew < lsp) {
                     lsp = dnew;
@@ -261,13 +270,16 @@ void shortest_augmenting_path(
         }
     }
 
+    // Write the columns in the row corresponding to root with the
+    // smallest distance into q_j, other columns with distance
+    // smaller than lsap into the priority queue Q.
     for (IndexType idx = begin; idx < end; idx++) {
         const auto col = col_idxs[idx];
         const auto dist = distance[col];
         const auto gen = generation[col];
         if (dist < lsap && gen == num_rows + root) {
-            if (abs(dist - lsp) < 1e-14) {
-                generation[col] = 2 * num_rows + root;
+            if (abs(dist - lsp) < tolerance) {
+                generation[col] = -num_rows - root;
                 q_j.push_back(col);
             } else {
                 generation[col] = root;
@@ -277,7 +289,14 @@ void shortest_augmenting_path(
     }
 
     while (true) {
+        // Mark the column with the shortest known distance to the root
+        // and proceed in its matched row. If both q_j and Q are empty
+        // or if the current path becomes longer than the currently
+        // shortest augmenting path, we are done.
         if (q_j.size() > 0) {
+            // q_j is known to contain only entries with shortest known
+            // distance to the root, so if it is not empty we do not
+            // have to operate on the priority queue.
             if (lsap <= lsp) break;
             const auto col = q_j.back();
             q_j.pop_back();
@@ -288,6 +307,8 @@ void shortest_augmenting_path(
             if (Q.empty()) break;
             auto col = Q.min_val();
             while (generation[col] == -root && !Q.empty()) {
+                // If col is already marked because it previously was in q_j
+                // we have to disregard it.
                 Q.pop_min();
                 col = Q.min_val();
             }
@@ -297,83 +318,70 @@ void shortest_augmenting_path(
             generation[col] = -root;
             marked_cols[marked_counter++] = col;
             Q.pop_min();
-            // while (Q.min_key() == lsp && !Q.empty()) {
-            //     q_j.push_back(Q.min_val());
-            //     Q.pop_min();
-            // }
             row = ip[col];
         }
         const auto row_begin = row_ptrs[row];
         const auto row_end = row_ptrs[row + 1];
-        const auto vi = p[row] == -1 ? zero<ValueType>()
-                                     : c[idxs[row]] - u[p[row]];  // v[row];
+        // Compute the entry of the dual vector v corresponding to row.
+        const auto dual_vi = p[row] == -1 ? zero<ValueType>()
+                                          : weights[idxs[row]] - dual_u[p[row]];
         for (IndexType idx = row_begin; idx < row_end; idx++) {
             const auto col = col_idxs[idx];
             const auto gen = generation[col];
 
+            // col is already marked. Note that root will never be 0 as this row
+            // is guaranteed to already be part of the initial matching.
             if (gen == -root) continue;
 
-            const ValueType dnew = lsp + c[idx] - u[col] - vi;
-            // if (col == 392960) std::cout << distance[col] << ", " << lsp <<
-            // ", " << dnew << std::endl; if (dnew < lsp && abs(lsp - dnew) >
-            // 1e-10){
-            //     std::cout << root + num_rows << ", " << gen << ", " << jsap
-            //     << ", " << col << std::endl; exit(1);
-            // }
+            const ValueType dnew = lsp + weights[idx] - dual_u[col] - dual_vi;
+
             if (dnew < lsap) {
                 if (ip[col] == -1) {
+                    // col is unmatched so we found an augmenting path.
                     lsap = dnew;
                     jsap = col;
-                    parents_[col] = row;
+                    parents[col] = row;
                 } else {
                     if ((gen != root || dnew < distance[col]) &&
-                        gen != 2 * num_rows + root) {
+                        gen != -num_rows - root) {
                         distance[col] = dnew;
-                        parents_[col] = row;
-                        if (abs(dnew - lsp) < 1e-14) {
-                            generation[col] = 2 * num_rows + root;
+                        parents[col] = row;
+                        if (abs(dnew - lsp) < tolerance) {
+                            // dnew is the shortest currently possible distance,
+                            // so col can be put into q_j and be marked
+                            // accordingly.
+                            generation[col] = -num_rows - root;
                             q_j.push_back(col);
-                            // if (gen == root) {
-                            //     Q.update_key(handles[col], -inf);
-                            //     Q.pop_min();
-                            // }
                         } else if (gen != root) {
-                            // if (gen != root) {
-                            generation[col] = root;  // num_rows + gen;
+                            // col was not encountered before.
+                            generation[col] = root;
                             handles[col] = Q.insert(dnew, col);
                         } else {
-                            generation[col] = root;  // num_rows + gen;
+                            // col was already encountered but with larger
+                            // distance on a different path.
+                            generation[col] = root;
                             Q.update_key(handles[col], dnew);
                         }
                     }
                 }
             }
         }
-        /*for (IndexType idx = row_begin; idx < row_end; idx++) {
-            const auto col = col_idxs[idx];
-            const auto gen = generation[col];
-            const auto dist = distance[col];
-            if (dist < lsap) {
-                generation[col] = root;
-                if (dist == lsp) {
-                    q_j.push_back(col);
-                } else if ()
-            }
-        }*/
     }
     if (lsap != inf) {
         IndexType col = jsap;
+        // Update the matching along the shortest augmenting path.
         do {
-            row = parents_[col];
+            row = parents[col];
             ip[col] = row;
             auto idx = row_ptrs[row];
             while (col_idxs[idx] != col) idx++;
             idxs[row] = idx;
             std::swap(col, p[row]);
         } while (row != root);
+        // Update the dual vector u.
         for (size_type i = 0; i < marked_counter; i++) {
             const auto col = marked_cols[i];
-            u[col] += distance[col] - lsap;
+            dual_u[col] += distance[col] - lsap;
         }
     }
 }
@@ -385,9 +393,9 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
 template <typename ValueType, typename IndexType>
 void compute_scaling(std::shared_ptr<const DefaultExecutor> exec,
                      const matrix::Csr<ValueType, IndexType>* mtx,
-                     const array<remove_complex<ValueType>>& workspace,
+                     const array<remove_complex<ValueType>>& value_workspace,
                      const array<IndexType>& permutation,
-                     const array<IndexType>& parents,
+                     const array<IndexType>& index_workspace,
                      gko::reorder::reordering_strategy strategy,
                      gko::matrix::Diagonal<ValueType>* row_scaling,
                      gko::matrix::Diagonal<ValueType>* col_scaling)
@@ -399,27 +407,21 @@ void compute_scaling(std::shared_ptr<const DefaultExecutor> exec,
     const auto row_ptrs = mtx->get_const_row_ptrs();
     const auto col_idxs = mtx->get_const_col_idxs();
     const auto values = mtx->get_const_values();
-    const auto weights = workspace.get_const_data();
-    const auto u = weights + nnz;
-    const auto m = u + 2 * num_rows;
+    const auto weights = value_workspace.get_const_data();
+    const auto dual_u = weights + nnz;
+    const auto row_maxima = dual_u + 2 * num_rows;
     const auto p = permutation.get_const_data();
-    const auto idxs = parents.get_const_data() + 4 * num_rows;
+    const auto idxs = index_workspace.get_const_data() + 4 * num_rows;
     auto rv = row_scaling->get_values();
     auto cv = col_scaling->get_values();
 
-    if (strategy == gko::reorder::reordering_strategy::max_diagonal_product ||
-        strategy ==
-            gko::reorder::reordering_strategy::max_diagonal_product_fast) {
-        // auto exp2_ = strategy ==
-        // gko::reorder::reordering_strategy::max_diagonal_product_fast
-        //     ? [](remove_complex<ValueType> a) { return fastexp2(a); }
-        //     : [](remove_complex<ValueType> a) { return std::exp2(a); };
+    if (strategy == gko::reorder::reordering_strategy::max_diagonal_product) {
         for (size_type i = 0; i < num_rows; i++) {
-            const remove_complex<ValueType> u_val = std::exp2(u[i]);
+            const remove_complex<ValueType> u_val = std::exp2(dual_u[i]);
             const remove_complex<ValueType> v_val =
-                weights[idxs[i]] - u[p[i]] - m[i];
+                std::exp2(weights[idxs[i]] - dual_u[p[i]] - row_maxima[i]);
             cv[i] = ValueType{u_val};
-            rv[i] = ValueType{std::exp2(v_val)};
+            rv[i] = ValueType{v_val};
         }
     } else {
         for (size_type i = 0; i < num_rows; i++) {
diff --git a/reference/test/reorder/CMakeLists.txt b/reference/test/reorder/CMakeLists.txt
index c102beafc36..731fb1de8f5 100644
--- a/reference/test/reorder/CMakeLists.txt
+++ b/reference/test/reorder/CMakeLists.txt
@@ -3,5 +3,6 @@ if(GINKGO_HAVE_METIS)
 endif()
 ginkgo_create_test(rcm)
 ginkgo_create_test(rcm_kernels)
+ginkgo_create_test(mc64)
 ginkgo_create_test(mc64_kernels)
 ginkgo_create_test(scaled_reordered)
diff --git a/reference/test/reorder/mc64.cpp b/reference/test/reorder/mc64.cpp
new file mode 100644
index 00000000000..f62e48f3f84
--- /dev/null
+++ b/reference/test/reorder/mc64.cpp
@@ -0,0 +1,203 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/reorder/mc64.hpp>
+
+
+#include <algorithm>
+#include <fstream>
+#include <memory>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/matrix/csr.hpp>
+#include <ginkgo/core/matrix/dense.hpp>
+#include <ginkgo/core/matrix/permutation.hpp>
+#include <ginkgo/core/matrix/sparsity_csr.hpp>
+
+
+#include "core/test/utils.hpp"
+#include "core/test/utils/assertions.hpp"
+
+
+namespace {
+
+
+template <typename ValueIndexType>
+class Mc64 : public ::testing::Test {
+protected:
+    using v_type =
+        typename std::tuple_element<0, decltype(ValueIndexType())>::type;
+    using i_type =
+        typename std::tuple_element<1, decltype(ValueIndexType())>::type;
+    using real_type = gko::remove_complex<v_type>;
+    using reorder_type = gko::reorder::Mc64<v_type, i_type>;
+    using Mtx = gko::matrix::Dense<v_type>;
+    using CsrMtx = gko::matrix::Csr<v_type, i_type>;
+    Mc64()
+        : exec(gko::ReferenceExecutor::create()),
+          mc64_factory(reorder_type::build().on(exec)),
+          // clang-format off
+          id3_mtx(gko::initialize<CsrMtx>(
+              {{1.0, 0.0, 0.0}, 
+              {0.0, 1.0, 0.0}, 
+              {0.0, 0.0, 1.0}}, exec)),
+          not_id3_mtx(gko::initialize<CsrMtx>(
+              {{1.0, 0.0, 2.0}, 
+              {0.0, 1.0, 0.0}, 
+              {2.0, 0.0, 1.0}}, exec)),
+          // clang-format on
+          reorder_op(mc64_factory->generate(id3_mtx))
+    {}
+
+    void assert_correct_permutation(const reorder_type* mc64)
+    {
+        auto perm =
+            gko::as<gko::matrix::Permutation<i_type>>(mc64->get_permutation())
+                ->get_const_permutation();
+
+        ASSERT_EQ(perm[0], 0);
+        ASSERT_EQ(perm[1], 1);
+        ASSERT_EQ(perm[2], 2);
+    }
+
+    std::shared_ptr<const gko::Executor> exec;
+    std::shared_ptr<CsrMtx> id3_mtx;
+    std::shared_ptr<CsrMtx> not_id3_mtx;
+    std::unique_ptr<typename reorder_type::Factory> mc64_factory;
+    std::unique_ptr<reorder_type> reorder_op;
+};
+
+TYPED_TEST_SUITE(Mc64, gko::test::ValueIndexTypes, PairTypenameNameGenerator);
+
+
+TYPED_TEST(Mc64, CanBeCleared)
+{
+    this->reorder_op->clear();
+
+    auto reorder_op_perm = this->reorder_op->get_permutation();
+
+    ASSERT_EQ(reorder_op_perm, nullptr);
+}
+
+
+TYPED_TEST(Mc64, CanBeCopied)
+{
+    auto mc64 = this->mc64_factory->generate(this->id3_mtx);
+    auto mc64_copy = this->mc64_factory->generate(this->not_id3_mtx);
+
+    mc64_copy->copy_from(mc64.get());
+
+    this->assert_correct_permutation(mc64_copy.get());
+}
+
+
+TYPED_TEST(Mc64, CanBeMoved)
+{
+    auto mc64 = this->mc64_factory->generate(this->id3_mtx);
+    auto mc64_move = this->mc64_factory->generate(this->not_id3_mtx);
+
+    mc64->move_to(mc64_move.get());
+
+    this->assert_correct_permutation(mc64_move.get());
+}
+
+
+TYPED_TEST(Mc64, CanBeCloned)
+{
+    auto mc64 = this->mc64_factory->generate(this->id3_mtx);
+
+    auto mc64_clone = mc64->clone();
+
+    this->assert_correct_permutation(mc64_clone.get());
+}
+
+
+TYPED_TEST(Mc64, HasSensibleDefaults)
+{
+    using reorder_type = typename TestFixture::reorder_type;
+    using real_type = typename TestFixture::real_type;
+
+    auto mc64 = reorder_type::build().on(this->exec)->generate(this->id3_mtx);
+
+    ASSERT_EQ(mc64->get_parameters().strategy,
+              gko::reorder::reordering_strategy::max_diagonal_product);
+    ASSERT_EQ(mc64->get_parameters().tolerance, real_type{1e-14});
+    ASSERT_EQ(mc64->get_parameters().log2_degree, 4);
+}
+
+
+TYPED_TEST(Mc64, CanBeCreatedWithReorderingStrategy)
+{
+    using reorder_type = typename TestFixture::reorder_type;
+
+    auto mc64 =
+        reorder_type::build()
+            .with_strategy(gko::reorder::reordering_strategy::max_diagonal_sum)
+            .on(this->exec)
+            ->generate(this->id3_mtx);
+
+    this->assert_correct_permutation(mc64.get());
+}
+
+
+TYPED_TEST(Mc64, CanBeCreatedWithTolerance)
+{
+    using reorder_type = typename TestFixture::reorder_type;
+    using real_type = typename TestFixture::real_type;
+
+    auto mc64 = reorder_type::build()
+                    .with_tolerance(real_type{1e-10})
+                    .on(this->exec)
+                    ->generate(this->id3_mtx);
+
+    this->assert_correct_permutation(mc64.get());
+}
+
+
+TYPED_TEST(Mc64, CanBeCreatedWithLog2Degree)
+{
+    using reorder_type = typename TestFixture::reorder_type;
+
+    auto mc64 = reorder_type::build()
+                    .with_log2_degree(2)
+                    .on(this->exec)
+                    ->generate(this->id3_mtx);
+
+    this->assert_correct_permutation(mc64.get());
+}
+
+
+}  // namespace
diff --git a/reference/test/reorder/mc64_kernels.cpp b/reference/test/reorder/mc64_kernels.cpp
index 11e01b430a1..de1d473509e 100644
--- a/reference/test/reorder/mc64_kernels.cpp
+++ b/reference/test/reorder/mc64_kernels.cpp
@@ -65,6 +65,7 @@ class Mc64 : public ::testing::Test {
     using matrix_type = gko::matrix::Csr<value_type, index_type>;
     using permutation_type = gko::matrix::Permutation<index_type>;
     static constexpr auto inf = std::numeric_limits<real_type>::infinity();
+    static constexpr real_type tol = 1e-14;
 
     Mc64()
         : ref(gko::ReferenceExecutor::create()),
@@ -76,56 +77,12 @@ class Mc64 : public ::testing::Test {
                                             {0., 0., 0., 4., 2., 0.},
                                             {0., 5., 8., 0., 0., 0.}},
                                            ref)),
-          expected_workspace_sum{
-              ref,
-              I<real_type>({2.,  1.,  0.,  0., 4., 0., 2., 0., 1.,  0.,  2.,
-                            3.,  0.,  0.,  1., 0., 0., 0., 1., inf, inf, inf,
-                            inf, inf, inf, 3., 5., 6., 4., 4., 8.})},
-          expected_workspace_product{
-              ref,
-              I<real_type>({real_type{std::log2(3.)},
-                            real_type{std::log2(3.)} - real_type{std::log2(2.)},
-                            0.,
-                            0.,
-                            real_type{std::log2(5.)},
-                            0.,
-                            real_type{std::log2(6.)} - real_type{std::log2(4.)},
-                            0.,
-                            real_type{std::log2(4.)} - real_type{std::log2(3.)},
-                            0.,
-                            real_type{std::log2(4.)} - real_type{std::log2(2.)},
-                            real_type{std::log2(8.)} - real_type{std::log2(5.)},
-                            0.,
-                            0.,
-                            real_type{std::log2(3.)} - real_type{std::log2(2.)},
-                            0.,
-                            0.,
-                            0.,
-                            real_type{std::log2(4.)} - real_type{std::log2(3.)},
-                            inf,
-                            inf,
-                            inf,
-                            inf,
-                            inf,
-                            inf,
-                            real_type{std::log2(3.)},
-                            real_type{std::log2(5.)},
-                            real_type{std::log2(6.)},
-                            real_type{std::log2(4.)},
-                            real_type{std::log2(4.)},
-                            real_type{std::log2(8.)}})},
-          expected_perm{ref, I<index_type>({1, 0, 3, 5, -1, 2})},
-          expected_inv_perm{ref, I<index_type>({1, 0, 5, 2, -1, 3})},
           tolerance{10 * std::numeric_limits<real_type>::epsilon()}
     {}
 
     std::shared_ptr<const gko::ReferenceExecutor> ref;
     gko::array<real_type> tmp;
     std::shared_ptr<matrix_type> mtx;
-    gko::array<real_type> expected_workspace_sum;
-    gko::array<real_type> expected_workspace_product;
-    gko::array<index_type> expected_perm;
-    gko::array<index_type> expected_inv_perm;
     const real_type tolerance;
 };
 
@@ -139,13 +96,22 @@ TYPED_TEST(Mc64, InitializeWeightsSum)
 
     const auto num_rows = this->mtx->get_size()[0];
     const auto nnz = this->mtx->get_num_stored_elements();
-    gko::array<real_type> workspace{this->ref, nnz + 3 * num_rows};
+    gko::array<real_type> value_workspace{this->ref, nnz + 3 * num_rows};
+    gko::array<real_type> expected_value_workspace{
+        this->ref,
+        I<real_type>{2.,        1.,        0.,        0.,        4.,
+                     0.,        2.,        0.,        1.,        0.,
+                     2.,        3.,        0.,        0.,        1.,
+                     0.,        0.,        0.,        1.,        this->inf,
+                     this->inf, this->inf, this->inf, this->inf, this->inf,
+                     3.,        5.,        6.,        4.,        4.,
+                     8.}};
 
     gko::kernels::reference::mc64::initialize_weights(
-        this->ref, this->mtx.get(), workspace,
+        this->ref, this->mtx.get(), value_workspace,
         gko::reorder::reordering_strategy::max_diagonal_sum);
 
-    GKO_ASSERT_ARRAY_EQ(workspace, this->expected_workspace_sum);
+    GKO_ASSERT_ARRAY_EQ(value_workspace, expected_value_workspace);
 }
 
 
@@ -156,39 +122,88 @@ TYPED_TEST(Mc64, InitializeWeightsProduct)
 
     const auto num_rows = this->mtx->get_size()[0];
     const auto nnz = this->mtx->get_num_stored_elements();
-    gko::array<real_type> workspace{this->ref, nnz + 3 * num_rows};
+    gko::array<real_type> value_workspace{this->ref, nnz + 3 * num_rows};
+    // This is really ugly, but the logarithms screw up the result if they are
+    // merged
+    gko::array<real_type> expected_value_workspace{
+        this->ref,
+        I<real_type>{real_type{std::log2(3.)},
+                     real_type{std::log2(3.)} - real_type{std::log2(2.)},
+                     0.,
+                     0.,
+                     real_type{std::log2(5.)},
+                     0.,
+                     real_type{std::log2(6.)} - real_type{std::log2(4.)},
+                     0.,
+                     real_type{std::log2(4.)} - real_type{std::log2(3.)},
+                     0.,
+                     real_type{std::log2(4.)} - real_type{std::log2(2.)},
+                     real_type{std::log2(8.)} - real_type{std::log2(5.)},
+                     0.,
+                     0.,
+                     real_type{std::log2(3.)} - real_type{std::log2(2.)},
+                     0.,
+                     0.,
+                     0.,
+                     real_type{std::log2(4.)} - real_type{std::log2(3.)},
+                     this->inf,
+                     this->inf,
+                     this->inf,
+                     this->inf,
+                     this->inf,
+                     this->inf,
+                     real_type{std::log2(3.)},
+                     real_type{std::log2(5.)},
+                     real_type{std::log2(6.)},
+                     real_type{std::log2(4.)},
+                     real_type{std::log2(4.)},
+                     real_type{std::log2(8.)}}};
 
     gko::kernels::reference::mc64::initialize_weights(
-        this->ref, this->mtx.get(), workspace,
+        this->ref, this->mtx.get(), value_workspace,
         gko::reorder::reordering_strategy::max_diagonal_product);
 
-    GKO_ASSERT_ARRAY_EQ(workspace, this->expected_workspace_product);
+    GKO_ASSERT_ARRAY_EQ(value_workspace, expected_value_workspace);
 }
 
 
 TYPED_TEST(Mc64, InitialMatching)
 {
     using index_type = typename TestFixture::index_type;
+    using real_type = typename TestFixture::real_type;
     gko::array<index_type> p{this->ref,
                              I<index_type>({-1, -1, -1, -1, -1, -1})};
     gko::array<index_type> ip{this->ref,
                               I<index_type>({-1, -1, -1, -1, -1, -1})};
     const auto num_rows = this->mtx->get_size()[0];
-    gko::array<index_type> parents{this->ref, 6 * num_rows};
-    parents.fill(gko::zero<index_type>());
-    gko::array<index_type> expected_parents{
+    gko::array<real_type> value_workspace{
+        this->ref,
+        I<real_type>{2.,        1.,        0.,        0.,        4.,
+                     0.,        2.,        0.,        1.,        0.,
+                     2.,        3.,        0.,        0.,        1.,
+                     0.,        0.,        0.,        1.,        this->inf,
+                     this->inf, this->inf, this->inf, this->inf, this->inf,
+                     3.,        5.,        6.,        4.,        4.,
+                     8.}};
+    gko::array<index_type> index_workspace{this->ref, 6 * num_rows};
+    index_workspace.fill(gko::zero<index_type>());
+    gko::array<index_type> expected_index_workspace{
         this->ref,
         I<index_type>{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0, 0,  0, 0, 0, 0,
                       0, 0, 0, 0, 0, 0, 1, 3, 5, 8, 0, 12, 4, -1, 0, 0, 0, 0}};
+    gko::array<index_type> expected_perm{this->ref,
+                                         I<index_type>({1, 0, 3, 5, -1, 2})};
+    gko::array<index_type> expected_inv_perm{
+        this->ref, I<index_type>({1, 0, 5, 2, -1, 3})};
 
     gko::kernels::reference::mc64::initial_matching(
         this->ref, num_rows, this->mtx->get_const_row_ptrs(),
-        this->mtx->get_const_col_idxs(), this->expected_workspace_sum, p, ip,
-        parents);
+        this->mtx->get_const_col_idxs(), value_workspace, p, ip,
+        index_workspace, this->tol);
 
-    GKO_ASSERT_ARRAY_EQ(p, this->expected_perm);
-    GKO_ASSERT_ARRAY_EQ(ip, this->expected_inv_perm);
-    GKO_ASSERT_ARRAY_EQ(parents, expected_parents);
+    GKO_ASSERT_ARRAY_EQ(p, expected_perm);
+    GKO_ASSERT_ARRAY_EQ(ip, expected_inv_perm);
+    GKO_ASSERT_ARRAY_EQ(index_workspace, expected_index_workspace);
 }
 
 
@@ -196,37 +211,48 @@ TYPED_TEST(Mc64, ShortestAugmentingPathExample)
 {
     using index_type = typename TestFixture::index_type;
     using real_type = typename TestFixture::real_type;
+    gko::array<index_type> perm{this->ref, I<index_type>({1, 0, 3, 5, -1, 2})};
+    gko::array<index_type> inv_perm{this->ref,
+                                    I<index_type>({1, 0, 5, 2, -1, 3})};
     gko::array<index_type> expected_perm{this->ref,
                                          I<index_type>{1, 0, 3, 5, 4, 2}};
     gko::array<index_type> expected_inv_perm{this->ref,
                                              I<index_type>{1, 0, 5, 2, 4, 3}};
-    gko::array<index_type> parents{
+    gko::array<index_type> index_workspace{
         this->ref,
         I<index_type>{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0, 0,  0, 0, 0, 0,
                       0, 0, 0, 0, 0, 0, 1, 3, 5, 8, 0, 12, 4, -1, 0, 0, 0, 0}};
-    gko::array<index_type> expected_parents{
+    gko::array<index_type> expected_index_workspace{
         this->ref, I<index_type>{0, 0, 3,  4,  4,  2,  0, 0,  0, 0, 0, 0,
                                  0, 0, -4, -4, 0,  -4, 3, 5,  2, 0, 0, 0,
                                  1, 3, 5,  8,  10, 12, 4, -1, 0, 0, 0, 0}};
-    gko::array<real_type> expected_workspace{
+    gko::array<real_type> value_workspace{
+        this->ref,
+        I<real_type>{2.,        1.,        0.,        0.,        4.,
+                     0.,        2.,        0.,        1.,        0.,
+                     2.,        3.,        0.,        0.,        1.,
+                     0.,        0.,        0.,        1.,        this->inf,
+                     this->inf, this->inf, this->inf, this->inf, this->inf,
+                     3.,        5.,        6.,        4.,        4.,
+                     8.}};
+    gko::array<real_type> expected_value_workspace{
         this->ref,
         I<real_type>{2.,  1., 0., 0.,        4.,        0., 2., 0.,
                      1.,  0., 2., 3.,        0.,        0., 1., -1.,
                      -2., 0., 0., this->inf, this->inf, 1., 0., this->inf,
                      1.,  3., 5., 6.,        4.,        4., 8.}};
-    gko::addressable_priority_queue<real_type, index_type, 2> Q{};
+    gko::addressable_priority_queue<real_type, index_type> Q{4};
     std::vector<index_type> q_j{};
 
     gko::kernels::reference::mc64::shortest_augmenting_path(
         this->ref, this->mtx->get_size()[0], this->mtx->get_const_row_ptrs(),
-        this->mtx->get_const_col_idxs(), this->expected_workspace_sum,
-        this->expected_perm, this->expected_inv_perm,
-        4 * gko::one<index_type>(), parents, Q, q_j);
-
-    GKO_ASSERT_ARRAY_EQ(expected_perm, this->expected_perm);
-    GKO_ASSERT_ARRAY_EQ(expected_inv_perm, this->expected_inv_perm);
-    GKO_ASSERT_ARRAY_EQ(parents, expected_parents);
-    GKO_ASSERT_ARRAY_EQ(this->expected_workspace_sum, expected_workspace);
+        this->mtx->get_const_col_idxs(), value_workspace, perm, inv_perm,
+        4 * gko::one<index_type>(), index_workspace, Q, q_j, this->tol);
+
+    GKO_ASSERT_ARRAY_EQ(perm, expected_perm);
+    GKO_ASSERT_ARRAY_EQ(inv_perm, expected_inv_perm);
+    GKO_ASSERT_ARRAY_EQ(index_workspace, expected_index_workspace);
+    GKO_ASSERT_ARRAY_EQ(value_workspace, expected_value_workspace);
 }
 
 
@@ -249,16 +275,16 @@ TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingExampleSum)
                         ->get_const_permutation();
     GKO_ASSERT_EQ(perm[0], 1);
     GKO_ASSERT_EQ(perm[1], 0);
-    GKO_ASSERT_EQ(perm[2], 3);
-    GKO_ASSERT_EQ(perm[3], 5);
+    GKO_ASSERT_EQ(perm[2], 5);
+    GKO_ASSERT_EQ(perm[3], 2);
     GKO_ASSERT_EQ(perm[4], 4);
-    GKO_ASSERT_EQ(perm[5], 2);
+    GKO_ASSERT_EQ(perm[5], 3);
     GKO_ASSERT_EQ(inv_perm[0], 1);
     GKO_ASSERT_EQ(inv_perm[1], 0);
-    GKO_ASSERT_EQ(inv_perm[2], 5);
-    GKO_ASSERT_EQ(inv_perm[3], 2);
+    GKO_ASSERT_EQ(inv_perm[2], 3);
+    GKO_ASSERT_EQ(inv_perm[3], 5);
     GKO_ASSERT_EQ(inv_perm[4], 4);
-    GKO_ASSERT_EQ(inv_perm[5], 3);
+    GKO_ASSERT_EQ(inv_perm[5], 2);
 }
 
 
@@ -284,18 +310,18 @@ TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingExampleProduct)
     auto row_scaling = mc64->get_row_scaling()->get_const_values();
     auto col_scaling = mc64->get_col_scaling()->get_const_values();
 
-    GKO_ASSERT_EQ(perm[0], 4);
-    GKO_ASSERT_EQ(perm[1], 0);
-    GKO_ASSERT_EQ(perm[2], 5);
-    GKO_ASSERT_EQ(perm[3], 2);
-    GKO_ASSERT_EQ(perm[4], 3);
-    GKO_ASSERT_EQ(perm[5], 1);
-    GKO_ASSERT_EQ(inv_perm[0], 1);
-    GKO_ASSERT_EQ(inv_perm[1], 5);
-    GKO_ASSERT_EQ(inv_perm[2], 3);
-    GKO_ASSERT_EQ(inv_perm[3], 4);
-    GKO_ASSERT_EQ(inv_perm[4], 0);
-    GKO_ASSERT_EQ(inv_perm[5], 2);
+    GKO_ASSERT_EQ(perm[0], 1);
+    GKO_ASSERT_EQ(perm[1], 5);
+    GKO_ASSERT_EQ(perm[2], 3);
+    GKO_ASSERT_EQ(perm[3], 4);
+    GKO_ASSERT_EQ(perm[4], 0);
+    GKO_ASSERT_EQ(perm[5], 2);
+    GKO_ASSERT_EQ(inv_perm[0], 4);
+    GKO_ASSERT_EQ(inv_perm[1], 0);
+    GKO_ASSERT_EQ(inv_perm[2], 5);
+    GKO_ASSERT_EQ(inv_perm[3], 2);
+    GKO_ASSERT_EQ(inv_perm[4], 3);
+    GKO_ASSERT_EQ(inv_perm[5], 1);
     GKO_ASSERT_NEAR(row_scaling[0], value_type{1. / 3.}, this->tolerance);
     GKO_ASSERT_NEAR(row_scaling[1], value_type{0.2}, this->tolerance);
     GKO_ASSERT_NEAR(row_scaling[2], value_type{0.2}, this->tolerance);

From 12619d7ab18c1cbbf95542a2cc82be385712f724 Mon Sep 17 00:00:00 2001
From: Fritz Goebel <fritz.goebel@kit.edu>
Date: Fri, 16 Sep 2022 03:42:38 -0400
Subject: [PATCH 555/583] Small fix after rebasing

---
 include/ginkgo/core/reorder/mc64.hpp    |  4 ++--
 reference/test/reorder/mc64_kernels.cpp | 12 ++++--------
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/include/ginkgo/core/reorder/mc64.hpp b/include/ginkgo/core/reorder/mc64.hpp
index 73a53a1191a..218c9aa5ece 100644
--- a/include/ginkgo/core/reorder/mc64.hpp
+++ b/include/ginkgo/core/reorder/mc64.hpp
@@ -122,7 +122,7 @@ class Mc64 : public EnablePolymorphicObject<Mc64<ValueType, IndexType>,
      *
      * @return the permutation (permutation matrix)
      */
-    std::shared_ptr<const LinOp> get_permutation() const override
+    std::shared_ptr<const PermutationMatrix> get_permutation() const
     {
         return permutation_;
     }
@@ -133,7 +133,7 @@ class Mc64 : public EnablePolymorphicObject<Mc64<ValueType, IndexType>,
      *
      * @return the inverse permutation (permutation matrix)
      */
-    std::shared_ptr<const LinOp> get_inverse_permutation() const override
+    std::shared_ptr<const PermutationMatrix> get_inverse_permutation() const
     {
         return inv_permutation_;
     }
diff --git a/reference/test/reorder/mc64_kernels.cpp b/reference/test/reorder/mc64_kernels.cpp
index de1d473509e..a13f0063f6c 100644
--- a/reference/test/reorder/mc64_kernels.cpp
+++ b/reference/test/reorder/mc64_kernels.cpp
@@ -269,10 +269,8 @@ TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingExampleSum)
             .on(this->ref);
     auto mc64 = mc64_factory->generate(this->mtx);
 
-    auto perm = gko::as<permutation_type>(mc64->get_permutation())
-                    ->get_const_permutation();
-    auto inv_perm = gko::as<permutation_type>(mc64->get_inverse_permutation())
-                        ->get_const_permutation();
+    auto perm = mc64->get_permutation()->get_const_permutation();
+    auto inv_perm = mc64->get_inverse_permutation()->get_const_permutation();
     GKO_ASSERT_EQ(perm[0], 1);
     GKO_ASSERT_EQ(perm[1], 0);
     GKO_ASSERT_EQ(perm[2], 5);
@@ -303,10 +301,8 @@ TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingExampleProduct)
             .on(this->ref);
     auto mc64 = mc64_factory->generate(this->mtx);
 
-    auto perm = gko::as<permutation_type>(mc64->get_permutation())
-                    ->get_const_permutation();
-    auto inv_perm = gko::as<permutation_type>(mc64->get_inverse_permutation())
-                        ->get_const_permutation();
+    auto perm = mc64->get_permutation()->get_const_permutation();
+    auto inv_perm = mc64->get_inverse_permutation()->get_const_permutation();
     auto row_scaling = mc64->get_row_scaling()->get_const_values();
     auto col_scaling = mc64->get_col_scaling()->get_const_values();
 

From 9e909de3ca030293284fd4ee986789acfc657009 Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Fri, 16 Sep 2022 07:43:56 +0000
Subject: [PATCH 556/583] Format files

Co-authored-by: fritzgoebel <fritzgoebel@users.noreply.github.com>
---
 core/components/addressable_pq.hpp      | 4 ++--
 core/reorder/mc64_kernels.hpp           | 4 +++-
 reference/test/reorder/mc64_kernels.cpp | 4 +++-
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/core/components/addressable_pq.hpp b/core/components/addressable_pq.hpp
index d7e92588db4..5911c188518 100644
--- a/core/components/addressable_pq.hpp
+++ b/core/components/addressable_pq.hpp
@@ -30,8 +30,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#ifndef GKO_CORE_COMPONENTS_ADRESSABLE_PQ_HPP_
-#define GKO_CORE_COMPONENTS_ADRESSABLE_PQ_HPP_
+#ifndef GKO_CORE_COMPONENTS_ADDRESSABLE_PQ_HPP_
+#define GKO_CORE_COMPONENTS_ADDRESSABLE_PQ_HPP_
 
 
 #include <algorithm>
diff --git a/core/reorder/mc64_kernels.hpp b/core/reorder/mc64_kernels.hpp
index 17c8c0c3d10..49629b71f39 100644
--- a/core/reorder/mc64_kernels.hpp
+++ b/core/reorder/mc64_kernels.hpp
@@ -34,6 +34,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define GKO_CORE_REORDER_MC64_KERNELS_HPP_
 
 
+#include <ginkgo/core/reorder/mc64.hpp>
+
+
 #include <list>
 #include <memory>
 
@@ -41,7 +44,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/executor.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/matrix/csr.hpp>
-#include <ginkgo/core/reorder/mc64.hpp>
 
 
 #include "core/base/kernel_declaration.hpp"
diff --git a/reference/test/reorder/mc64_kernels.cpp b/reference/test/reorder/mc64_kernels.cpp
index a13f0063f6c..723f783f85b 100644
--- a/reference/test/reorder/mc64_kernels.cpp
+++ b/reference/test/reorder/mc64_kernels.cpp
@@ -30,6 +30,9 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
+#include <ginkgo/core/reorder/mc64.hpp>
+
+
 #include <algorithm>
 #include <cmath>
 #include <fstream>
@@ -42,7 +45,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/matrix/csr.hpp>
 #include <ginkgo/core/matrix/permutation.hpp>
-#include <ginkgo/core/reorder/mc64.hpp>
 
 
 #include "core/reorder/mc64_kernels.hpp"

From 951bcc169f82f90255b92320f4ac74cd24e0065a Mon Sep 17 00:00:00 2001
From: Fritz Goebel <fritz.goebel@kit.edu>
Date: Fri, 16 Sep 2022 05:05:55 -0400
Subject: [PATCH 557/583] work on reference kernel tests

---
 reference/test/reorder/mc64_kernels.cpp | 239 +++++++++++-------------
 1 file changed, 107 insertions(+), 132 deletions(-)

diff --git a/reference/test/reorder/mc64_kernels.cpp b/reference/test/reorder/mc64_kernels.cpp
index 723f783f85b..07a1d15029f 100644
--- a/reference/test/reorder/mc64_kernels.cpp
+++ b/reference/test/reorder/mc64_kernels.cpp
@@ -79,11 +79,87 @@ class Mc64 : public ::testing::Test {
                                             {0., 0., 0., 4., 2., 0.},
                                             {0., 5., 8., 0., 0., 0.}},
                                            ref)),
+          value_workspace{ref, 31},  // 13 (nnz) + 3 * 6 (n)
+          initialized_value_workspace_sum{
+              ref,
+              I<real_type>{2.,  1.,  0.,  0., 4., 0., 2., 0., 1.,  0.,  2.,
+                           3.,  0.,  0.,  1., 0., 0., 0., 1., inf, inf, inf,
+                           inf, inf, inf, 3., 5., 6., 4., 4., 8.}},
+          // if the logarithms are merged together, the rounding messes up the
+          // accuracy for GKO_ASSRT_ARRAY_EQ
+          initialized_value_workspace_product{
+              ref,
+              I<real_type>{real_type{std::log2(3.)},
+                           real_type{std::log2(3.)} - real_type{std::log2(2.)},
+                           0.,
+                           0.,
+                           real_type{std::log2(5.)},
+                           0.,
+                           real_type{std::log2(6.)} - real_type{std::log2(4.)},
+                           0.,
+                           real_type{std::log2(4.)} - real_type{std::log2(3.)},
+                           0.,
+                           real_type{std::log2(4.)} - real_type{std::log2(2.)},
+                           real_type{std::log2(8.)} - real_type{std::log2(5.)},
+                           0.,
+                           0.,
+                           real_type{std::log2(3.)} - real_type{std::log2(2.)},
+                           0.,
+                           0.,
+                           0.,
+                           real_type{std::log2(4.)} - real_type{std::log2(3.)},
+                           inf,
+                           inf,
+                           inf,
+                           inf,
+                           inf,
+                           inf,
+                           real_type{std::log2(3.)},
+                           real_type{std::log2(5.)},
+                           real_type{std::log2(6.)},
+                           real_type{std::log2(4.)},
+                           real_type{std::log2(4.)},
+                           real_type{std::log2(8.)}}},
+          empty_permutation{ref, I<index_type>{-1, -1, -1, -1, -1, -1}},
+          empty_inverse_permutation{ref, I<index_type>{-1, -1, -1, -1, -1, -1}},
+          empty_index_workspace{ref, 36},  // 6 * 6 (n)
+          initial_matching_index_workspace{
+              ref, I<index_type>{0, 0, 0, 0, 0, 0,  0, 0,  0, 0, 0, 0,
+                                 0, 0, 0, 0, 0, 0,  0, 0,  0, 0, 0, 0,
+                                 1, 3, 5, 8, 0, 12, 4, -1, 0, 0, 0, 0}},
+          initial_matching_permutation{ref, I<index_type>{1, 0, 3, 5, -1, 2}},
+          initial_matching_inverse_permutation{
+              ref, I<index_type>{1, 0, 5, 2, -1, 3}},
+          final_permutation{ref, I<index_type>{1, 0, 3, 5, 4, 2}},
+          final_inverse_permutation{ref, I<index_type>{1, 0, 5, 2, 4, 3}},
+          final_index_workspace{
+              ref, I<index_type>{0, 0, 3,  4,  4,  2,  0, 0,  0, 0, 0, 0,
+                                 0, 0, -4, -4, 0,  -4, 3, 5,  2, 0, 0, 0,
+                                 1, 3, 5,  8,  10, 12, 4, -1, 0, 0, 0, 0}},
+          final_value_workspace{
+              ref, I<real_type>{2., 1.,  0., 0., 4.,  0.,  2., 0., 1.,  0.,  2.,
+                                3., 0.,  0., 1., -1., -2., 0., 0., inf, inf, 1.,
+                                0., inf, 1., 3., 5.,  6.,  4., 4., 8.}},
           tolerance{10 * std::numeric_limits<real_type>::epsilon()}
-    {}
+    {
+        empty_index_workspace.fill(gko::zero<index_type>());
+    }
 
     std::shared_ptr<const gko::ReferenceExecutor> ref;
     gko::array<real_type> tmp;
+    gko::array<real_type> value_workspace;
+    gko::array<real_type> initialized_value_workspace_sum;
+    gko::array<real_type> initialized_value_workspace_product;
+    gko::array<real_type> final_value_workspace;
+    gko::array<index_type> empty_permutation;
+    gko::array<index_type> empty_inverse_permutation;
+    gko::array<index_type> empty_index_workspace;
+    gko::array<index_type> initial_matching_permutation;
+    gko::array<index_type> initial_matching_inverse_permutation;
+    gko::array<index_type> initial_matching_index_workspace;
+    gko::array<index_type> final_permutation;
+    gko::array<index_type> final_inverse_permutation;
+    gko::array<index_type> final_index_workspace;
     std::shared_ptr<matrix_type> mtx;
     const real_type tolerance;
 };
@@ -93,168 +169,67 @@ TYPED_TEST_SUITE(Mc64, gko::test::ValueIndexTypes, PairTypenameNameGenerator);
 
 TYPED_TEST(Mc64, InitializeWeightsSum)
 {
-    using matrix_type = typename TestFixture::matrix_type;
-    using real_type = typename TestFixture::real_type;
-
-    const auto num_rows = this->mtx->get_size()[0];
-    const auto nnz = this->mtx->get_num_stored_elements();
-    gko::array<real_type> value_workspace{this->ref, nnz + 3 * num_rows};
-    gko::array<real_type> expected_value_workspace{
-        this->ref,
-        I<real_type>{2.,        1.,        0.,        0.,        4.,
-                     0.,        2.,        0.,        1.,        0.,
-                     2.,        3.,        0.,        0.,        1.,
-                     0.,        0.,        0.,        1.,        this->inf,
-                     this->inf, this->inf, this->inf, this->inf, this->inf,
-                     3.,        5.,        6.,        4.,        4.,
-                     8.}};
-
     gko::kernels::reference::mc64::initialize_weights(
-        this->ref, this->mtx.get(), value_workspace,
+        this->ref, this->mtx.get(), this->value_workspace,
         gko::reorder::reordering_strategy::max_diagonal_sum);
 
-    GKO_ASSERT_ARRAY_EQ(value_workspace, expected_value_workspace);
+    GKO_ASSERT_ARRAY_EQ(this->value_workspace,
+                        this->initialized_value_workspace_sum);
 }
 
 
 TYPED_TEST(Mc64, InitializeWeightsProduct)
 {
-    using matrix_type = typename TestFixture::matrix_type;
-    using real_type = typename TestFixture::real_type;
-
-    const auto num_rows = this->mtx->get_size()[0];
-    const auto nnz = this->mtx->get_num_stored_elements();
-    gko::array<real_type> value_workspace{this->ref, nnz + 3 * num_rows};
-    // This is really ugly, but the logarithms screw up the result if they are
-    // merged
-    gko::array<real_type> expected_value_workspace{
-        this->ref,
-        I<real_type>{real_type{std::log2(3.)},
-                     real_type{std::log2(3.)} - real_type{std::log2(2.)},
-                     0.,
-                     0.,
-                     real_type{std::log2(5.)},
-                     0.,
-                     real_type{std::log2(6.)} - real_type{std::log2(4.)},
-                     0.,
-                     real_type{std::log2(4.)} - real_type{std::log2(3.)},
-                     0.,
-                     real_type{std::log2(4.)} - real_type{std::log2(2.)},
-                     real_type{std::log2(8.)} - real_type{std::log2(5.)},
-                     0.,
-                     0.,
-                     real_type{std::log2(3.)} - real_type{std::log2(2.)},
-                     0.,
-                     0.,
-                     0.,
-                     real_type{std::log2(4.)} - real_type{std::log2(3.)},
-                     this->inf,
-                     this->inf,
-                     this->inf,
-                     this->inf,
-                     this->inf,
-                     this->inf,
-                     real_type{std::log2(3.)},
-                     real_type{std::log2(5.)},
-                     real_type{std::log2(6.)},
-                     real_type{std::log2(4.)},
-                     real_type{std::log2(4.)},
-                     real_type{std::log2(8.)}}};
-
     gko::kernels::reference::mc64::initialize_weights(
-        this->ref, this->mtx.get(), value_workspace,
+        this->ref, this->mtx.get(), this->value_workspace,
         gko::reorder::reordering_strategy::max_diagonal_product);
 
-    GKO_ASSERT_ARRAY_EQ(value_workspace, expected_value_workspace);
+    GKO_ASSERT_ARRAY_EQ(this->value_workspace,
+                        this->initialized_value_workspace_product);
 }
 
 
 TYPED_TEST(Mc64, InitialMatching)
 {
-    using index_type = typename TestFixture::index_type;
-    using real_type = typename TestFixture::real_type;
-    gko::array<index_type> p{this->ref,
-                             I<index_type>({-1, -1, -1, -1, -1, -1})};
-    gko::array<index_type> ip{this->ref,
-                              I<index_type>({-1, -1, -1, -1, -1, -1})};
     const auto num_rows = this->mtx->get_size()[0];
-    gko::array<real_type> value_workspace{
-        this->ref,
-        I<real_type>{2.,        1.,        0.,        0.,        4.,
-                     0.,        2.,        0.,        1.,        0.,
-                     2.,        3.,        0.,        0.,        1.,
-                     0.,        0.,        0.,        1.,        this->inf,
-                     this->inf, this->inf, this->inf, this->inf, this->inf,
-                     3.,        5.,        6.,        4.,        4.,
-                     8.}};
-    gko::array<index_type> index_workspace{this->ref, 6 * num_rows};
-    index_workspace.fill(gko::zero<index_type>());
-    gko::array<index_type> expected_index_workspace{
-        this->ref,
-        I<index_type>{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0, 0,  0, 0, 0, 0,
-                      0, 0, 0, 0, 0, 0, 1, 3, 5, 8, 0, 12, 4, -1, 0, 0, 0, 0}};
-    gko::array<index_type> expected_perm{this->ref,
-                                         I<index_type>({1, 0, 3, 5, -1, 2})};
-    gko::array<index_type> expected_inv_perm{
-        this->ref, I<index_type>({1, 0, 5, 2, -1, 3})};
 
     gko::kernels::reference::mc64::initial_matching(
         this->ref, num_rows, this->mtx->get_const_row_ptrs(),
-        this->mtx->get_const_col_idxs(), value_workspace, p, ip,
-        index_workspace, this->tol);
-
-    GKO_ASSERT_ARRAY_EQ(p, expected_perm);
-    GKO_ASSERT_ARRAY_EQ(ip, expected_inv_perm);
-    GKO_ASSERT_ARRAY_EQ(index_workspace, expected_index_workspace);
+        this->mtx->get_const_col_idxs(), this->initialized_value_workspace_sum,
+        this->empty_permutation, this->empty_inverse_permutation,
+        this->empty_index_workspace, this->tol);
+
+    GKO_ASSERT_ARRAY_EQ(this->empty_permutation,
+                        this->initial_matching_permutation);
+    GKO_ASSERT_ARRAY_EQ(this->empty_inverse_permutation,
+                        this->initial_matching_inverse_permutation);
+    GKO_ASSERT_ARRAY_EQ(this->empty_index_workspace,
+                        this->initial_matching_index_workspace);
 }
 
 
-TYPED_TEST(Mc64, ShortestAugmentingPathExample)
+TYPED_TEST(Mc64, ShortestAugmentingPath)
 {
     using index_type = typename TestFixture::index_type;
     using real_type = typename TestFixture::real_type;
-    gko::array<index_type> perm{this->ref, I<index_type>({1, 0, 3, 5, -1, 2})};
-    gko::array<index_type> inv_perm{this->ref,
-                                    I<index_type>({1, 0, 5, 2, -1, 3})};
-    gko::array<index_type> expected_perm{this->ref,
-                                         I<index_type>{1, 0, 3, 5, 4, 2}};
-    gko::array<index_type> expected_inv_perm{this->ref,
-                                             I<index_type>{1, 0, 5, 2, 4, 3}};
-    gko::array<index_type> index_workspace{
-        this->ref,
-        I<index_type>{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0, 0,  0, 0, 0, 0,
-                      0, 0, 0, 0, 0, 0, 1, 3, 5, 8, 0, 12, 4, -1, 0, 0, 0, 0}};
-    gko::array<index_type> expected_index_workspace{
-        this->ref, I<index_type>{0, 0, 3,  4,  4,  2,  0, 0,  0, 0, 0, 0,
-                                 0, 0, -4, -4, 0,  -4, 3, 5,  2, 0, 0, 0,
-                                 1, 3, 5,  8,  10, 12, 4, -1, 0, 0, 0, 0}};
-    gko::array<real_type> value_workspace{
-        this->ref,
-        I<real_type>{2.,        1.,        0.,        0.,        4.,
-                     0.,        2.,        0.,        1.,        0.,
-                     2.,        3.,        0.,        0.,        1.,
-                     0.,        0.,        0.,        1.,        this->inf,
-                     this->inf, this->inf, this->inf, this->inf, this->inf,
-                     3.,        5.,        6.,        4.,        4.,
-                     8.}};
-    gko::array<real_type> expected_value_workspace{
-        this->ref,
-        I<real_type>{2.,  1., 0., 0.,        4.,        0., 2., 0.,
-                     1.,  0., 2., 3.,        0.,        0., 1., -1.,
-                     -2., 0., 0., this->inf, this->inf, 1., 0., this->inf,
-                     1.,  3., 5., 6.,        4.,        4., 8.}};
     gko::addressable_priority_queue<real_type, index_type> Q{4};
     std::vector<index_type> q_j{};
 
     gko::kernels::reference::mc64::shortest_augmenting_path(
         this->ref, this->mtx->get_size()[0], this->mtx->get_const_row_ptrs(),
-        this->mtx->get_const_col_idxs(), value_workspace, perm, inv_perm,
-        4 * gko::one<index_type>(), index_workspace, Q, q_j, this->tol);
-
-    GKO_ASSERT_ARRAY_EQ(perm, expected_perm);
-    GKO_ASSERT_ARRAY_EQ(inv_perm, expected_inv_perm);
-    GKO_ASSERT_ARRAY_EQ(index_workspace, expected_index_workspace);
-    GKO_ASSERT_ARRAY_EQ(value_workspace, expected_value_workspace);
+        this->mtx->get_const_col_idxs(), this->initialized_value_workspace_sum,
+        this->initial_matching_permutation,
+        this->initial_matching_inverse_permutation, 4 * gko::one<index_type>(),
+        this->initial_matching_index_workspace, Q, q_j, this->tol);
+
+    GKO_ASSERT_ARRAY_EQ(this->initial_matching_permutation,
+                        this->final_permutation);
+    GKO_ASSERT_ARRAY_EQ(this->initial_matching_inverse_permutation,
+                        this->final_inverse_permutation);
+    GKO_ASSERT_ARRAY_EQ(this->initial_matching_index_workspace,
+                        this->final_index_workspace);
+    GKO_ASSERT_ARRAY_EQ(this->initialized_value_workspace_sum,
+                        this->final_value_workspace);
 }
 
 

From f163e67816f34f2ec72588a294f31636418b9dca Mon Sep 17 00:00:00 2001
From: Fritz Goebel <fritz.goebel@kit.edu>
Date: Fri, 16 Sep 2022 05:15:29 -0400
Subject: [PATCH 558/583] fix typo

---
 include/ginkgo/core/reorder/mc64.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/ginkgo/core/reorder/mc64.hpp b/include/ginkgo/core/reorder/mc64.hpp
index 218c9aa5ece..2122df5dd4a 100644
--- a/include/ginkgo/core/reorder/mc64.hpp
+++ b/include/ginkgo/core/reorder/mc64.hpp
@@ -76,7 +76,7 @@ enum class reordering_strategy { max_diagonal_product, max_diagonal_sum };
  * sparse matrix. This approach can increase numerical stability of e.g.
  * an LU factorization without pivoting. Under the assumption of working
  * on a nonsingular square matrix, the algorithm computes a minimum weight
- * extreme matching on a weighted edge bipartite graph of the matrix. It is
+ * perfect matching on a weighted edge bipartite graph of the matrix. It is
  * described in detail in "On Algorithms for Permuting Large Entries to the
  * Diagonal of a Sparse Matrix" (Duff, Koster, 2001). There are two strategies
  * for choosing the weights supported:

From e8438bd4f22bad28ddb601b893e34b32538cd173 Mon Sep 17 00:00:00 2001
From: Fritz Goebel <fritz.goebel@kit.edu>
Date: Mon, 13 Feb 2023 14:21:05 -0500
Subject: [PATCH 559/583] Address review comments

---
 core/components/addressable_pq.hpp      |  21 ++-
 core/reorder/mc64.cpp                   |  46 ++++--
 core/reorder/mc64_kernels.hpp           |  50 +++---
 cuda/reorder/mc64_kernels.cu            |  39 +++--
 dpcpp/reorder/mc64_kernels.dp.cpp       |  39 +++--
 hip/reorder/mc64_kernels.hip.cpp        |  39 +++--
 include/ginkgo/core/reorder/mc64.hpp    |  26 ++--
 omp/reorder/mc64_kernels.cpp            |  39 +++--
 reference/reorder/mc64_kernels.cpp      | 146 ++++++++++-------
 reference/test/reorder/mc64.cpp         |   8 +-
 reference/test/reorder/mc64_kernels.cpp | 198 +++++++++++++-----------
 11 files changed, 401 insertions(+), 250 deletions(-)

diff --git a/core/components/addressable_pq.hpp b/core/components/addressable_pq.hpp
index 5911c188518..a9c07b86c68 100644
--- a/core/components/addressable_pq.hpp
+++ b/core/components/addressable_pq.hpp
@@ -60,6 +60,7 @@ struct addressable_priority_queue {
     /**
      * Inserts the given key-value pair into the PQ.
      * Duplicate keys are allowed, they may be returned in an arbitrary order.
+     *
      * @returns a handle for the pair to be used when modifying the key.
      */
     std::size_t insert(KeyType key, ValueType value)
@@ -95,16 +96,22 @@ struct addressable_priority_queue {
 
     /**
      * Returns the minimum key from the queue.
+     *
+     * @return the minimun key from the queue
      */
     KeyType min_key() const { return m_keys[0]; }
 
     /**
      * Returns the value belonging to the minimum key from the queue.
+     *
+     * @return the value corresponding to the minimun key
      */
     ValueType min_val() const { return m_values[0]; }
 
     /**
      * Returns the key-value pair with the minimum key from the queue.
+     *
+     * @return the key-value pair corresponding to the minimun key
      */
     std::pair<KeyType, ValueType> min() const { return {min_key(), min_val()}; }
 
@@ -123,11 +130,15 @@ struct addressable_priority_queue {
 
     /**
      * Returns the number of key-value pairs in the queue.
+     *
+     * @return  the number of key-value pairs in the queue
      */
     std::size_t size() const { return m_keys.size(); }
 
     /**
      * Returns true if and only if the queue has size 0.
+     *
+     * @return if queue has size 0
      */
     bool empty() const { return size() == 0; }
 
@@ -140,8 +151,6 @@ struct addressable_priority_queue {
     }
 
 private:
-    // constexpr static int degree = 1 << Degree_Log2;
-
     std::size_t parent(std::size_t i) const { return (i - 1) / degree; }
 
     std::size_t first_child(std::size_t i) const { return degree * i + 1; }
@@ -154,6 +163,10 @@ struct addressable_priority_queue {
         std::swap(m_handle_pos[m_handles[i]], m_handle_pos[m_handles[j]]);
     }
 
+    /**
+     * Moves the key-value pair at position i down (toward the leaves)
+     * until its key is smaller or equal to the one of all its children.
+     */
     void sift_down(std::size_t i)
     {
         auto cur = i;
@@ -171,6 +184,10 @@ struct addressable_priority_queue {
         }
     }
 
+    /**
+     * Moves the key-value pair at position i up (toward the root)
+     * until its key is larger or equal to the one of its parent.
+     * */
     void sift_up(std::size_t i)
     {
         auto cur = i;
diff --git a/core/reorder/mc64.cpp b/core/reorder/mc64.cpp
index 77acabe3228..6dc289c2ff1 100644
--- a/core/reorder/mc64.cpp
+++ b/core/reorder/mc64.cpp
@@ -77,17 +77,30 @@ void Mc64<ValueType, IndexType>::generate(std::shared_ptr<const Executor>& exec,
     size_type num_rows = mtx->get_size()[0];
     size_type nnz = mtx->get_num_stored_elements();
 
-    // A real valued array with space for:
+    // Real valued arrays with space for:
     //     - nnz entries for weights
     //     - num_rows entries each for the dual vector u, distance information
     //       and the max weight per row
-    array<remove_complex<ValueType>> value_workspace{exec, nnz + 3 * num_rows};
-    // A zero initialized index array with space for n entries each for parent
+    array<remove_complex<ValueType>> weights{exec, nnz};
+    array<remove_complex<ValueType>> dual_u{exec, num_rows};
+    array<remove_complex<ValueType>> distance{exec, num_rows};
+    array<remove_complex<ValueType>> row_maxima{exec, num_rows};
+    // Zero initialized index arrays with space for n entries each for parent
     // information, priority queue handles, generation information, marked
     // columns, indices corresponding to matched columns in the according row
     // and still unmatched rows
-    array<IndexType> index_workspace{exec, 6 * num_rows};
-    index_workspace.fill(0);
+    array<IndexType> parents{exec, num_rows};
+    array<IndexType> handles{exec, num_rows};
+    array<IndexType> generation{exec, num_rows};
+    array<IndexType> marked_cols{exec, num_rows};
+    array<IndexType> matched_idxs{exec, num_rows};
+    array<IndexType> unmatched_rows{exec, num_rows};
+    parents.fill(0);
+    handles.fill(0);
+    generation.fill(0);
+    marked_cols.fill(0);
+    matched_idxs.fill(0);
+    unmatched_rows.fill(0);
 
     array<IndexType> permutation{exec, num_rows};
     array<IndexType> inv_permutation{exec, num_rows};
@@ -97,32 +110,35 @@ void Mc64<ValueType, IndexType>::generate(std::shared_ptr<const Executor>& exec,
     const auto row_ptrs = mtx->get_const_row_ptrs();
     const auto col_idxs = mtx->get_const_col_idxs();
 
-    exec->run(mc64::make_initialize_weights(mtx.get(), value_workspace,
+    exec->run(mc64::make_initialize_weights(mtx.get(), weights, dual_u,
+                                            distance, row_maxima,
                                             parameters_.strategy));
 
-    // Compute an initial extreme matching from the nonzero entries for which
+    // Compute an initial maximum matching from the nonzero entries for which
     // the reduced weight (W(i, j) - u(j) - v(i)) is zero. Here, W is the
     // weight matrix and u and v are the dual vectors. Note that v initially
     // only contains zeros and hence can still be ignored here.
     exec->run(mc64::make_initial_matching(
-        num_rows, row_ptrs, col_idxs, value_workspace, permutation,
-        inv_permutation, index_workspace, parameters_.tolerance));
+        num_rows, row_ptrs, col_idxs, weights, dual_u, permutation,
+        inv_permutation, matched_idxs, unmatched_rows, parameters_.tolerance));
 
     // For each row that is not contained in the initial matching, search for
     // an augmenting path, update the matching and compute the new entries
     // of the dual vectors.
     addressable_priority_queue<remove_complex<ValueType>, IndexType> Q{
-        parameters_.log2_degree};
+        parameters_.deg_log2};
     std::vector<IndexType> q_j{};
-    const auto unmatched = index_workspace.get_data() + 5 * num_rows;
+    const auto unmatched = unmatched_rows.get_data();
     auto um = 0;
     auto root = unmatched[um];
     while (root != 0 && um < num_rows) {
-        if (root != -1)
+        if (root != -1) {
             exec->run(mc64::make_shortest_augmenting_path(
-                num_rows, row_ptrs, col_idxs, value_workspace, permutation,
-                inv_permutation, root, index_workspace, Q, q_j,
+                num_rows, row_ptrs, col_idxs, weights, dual_u, distance,
+                permutation, inv_permutation, root, parents, handles,
+                generation, marked_cols, matched_idxs, Q, q_j,
                 parameters_.tolerance));
+        }
         root = unmatched[++um];
     }
 
@@ -135,7 +151,7 @@ void Mc64<ValueType, IndexType>::generate(std::shared_ptr<const Executor>& exec,
     col_scaling_ = std::move(DiagonalMatrix::create(exec, num_rows));
 
     exec->run(mc64::make_compute_scaling(
-        mtx.get(), value_workspace, permutation, index_workspace,
+        mtx.get(), weights, dual_u, row_maxima, permutation, matched_idxs,
         parameters_.strategy, row_scaling_.get(), col_scaling_.get()));
 }
 
diff --git a/core/reorder/mc64_kernels.hpp b/core/reorder/mc64_kernels.hpp
index 49629b71f39..dfc4fca7948 100644
--- a/core/reorder/mc64_kernels.hpp
+++ b/core/reorder/mc64_kernels.hpp
@@ -54,29 +54,39 @@ namespace gko {
 namespace kernels {
 
 
-#define GKO_DECLARE_MC64_INITIALIZE_WEIGHTS_KERNEL(ValueType, IndexType)       \
-    void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,       \
-                            const matrix::Csr<ValueType, IndexType>* mtx,      \
-                            array<remove_complex<ValueType>>& value_workspace, \
-                            gko::reorder::reordering_strategy strategy)
-
-
-#define GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL(ValueType, IndexType)    \
-    void initial_matching(                                                \
-        std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,  \
-        const IndexType* row_ptrs, const IndexType* col_idxs,             \
-        const array<ValueType>& value_workspace,                          \
-        array<IndexType>& permutation, array<IndexType>& inv_permutation, \
-        array<IndexType>& index_workspace, ValueType tolerance)
+#define GKO_DECLARE_MC64_INITIALIZE_WEIGHTS_KERNEL(ValueType, IndexType) \
+    void initialize_weights(                                             \
+        std::shared_ptr<const DefaultExecutor> exec,                     \
+        const matrix::Csr<ValueType, IndexType>* mtx,                    \
+        array<remove_complex<ValueType>>& weights_array,                 \
+        array<remove_complex<ValueType>>& dual_u_array,                  \
+        array<remove_complex<ValueType>>& distance_array,                \
+        array<remove_complex<ValueType>>& row_maxima_array,              \
+        gko::reorder::mc64_strategy strategy)
+
+
+#define GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL(ValueType, IndexType)       \
+    void initial_matching(                                                   \
+        std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,     \
+        const IndexType* row_ptrs, const IndexType* col_idxs,                \
+        const array<ValueType>& weights_array,                               \
+        const array<ValueType>& dual_u_array, array<IndexType>& permutation, \
+        array<IndexType>& inv_permutation,                                   \
+        array<IndexType>& matched_idxs_array,                                \
+        array<IndexType>& unmatched_rows_array, ValueType tolerance)
 
 
 #define GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL(ValueType, IndexType) \
     void shortest_augmenting_path(                                             \
         std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,       \
         const IndexType* row_ptrs, const IndexType* col_idxs,                  \
-        array<ValueType>& value_workspace, array<IndexType>& permutation,      \
+        array<ValueType>& weights_array, array<ValueType>& dual_u_array,       \
+        array<ValueType>& distance_array, array<IndexType>& permutation,       \
         array<IndexType>& inv_permutation, IndexType root,                     \
-        array<IndexType>& index_workspace,                                     \
+        array<IndexType>& parents_array, array<IndexType>& handles_array,      \
+        array<IndexType>& generation_array,                                    \
+        array<IndexType>& marked_cols_array,                                   \
+        array<IndexType>& matched_idxs_array,                                  \
         addressable_priority_queue<ValueType, IndexType>& Q,                   \
         std::vector<IndexType>& q_j, ValueType tolerance)
 
@@ -85,10 +95,12 @@ namespace kernels {
     void compute_scaling(                                             \
         std::shared_ptr<const DefaultExecutor> exec,                  \
         const matrix::Csr<ValueType, IndexType>* mtx,                 \
-        const array<remove_complex<ValueType>>& value_workspace,      \
+        const array<remove_complex<ValueType>>& weights_array,        \
+        const array<remove_complex<ValueType>>& dual_u_array,         \
+        const array<remove_complex<ValueType>>& row_maxima_array,     \
         const array<IndexType>& permutation,                          \
-        const array<IndexType>& index_workspace,                      \
-        gko::reorder::reordering_strategy strategy,                   \
+        const array<IndexType>& matched_idxs_array,                   \
+        gko::reorder::mc64_strategy strategy,                         \
         gko::matrix::Diagonal<ValueType>* row_scaling,                \
         gko::matrix::Diagonal<ValueType>* col_scaling)
 
diff --git a/cuda/reorder/mc64_kernels.cu b/cuda/reorder/mc64_kernels.cu
index c90fddb2c97..ac759d23e03 100644
--- a/cuda/reorder/mc64_kernels.cu
+++ b/cuda/reorder/mc64_kernels.cu
@@ -52,8 +52,11 @@ namespace mc64 {
 template <typename ValueType, typename IndexType>
 void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
                         const matrix::Csr<ValueType, IndexType>* mtx,
-                        array<remove_complex<ValueType>>& workspace,
-                        gko::reorder::reordering_strategy strategy)
+                        array<remove_complex<ValueType>>& weights_array,
+                        array<remove_complex<ValueType>>& dual_u_array,
+                        array<remove_complex<ValueType>>& distance_array,
+                        array<remove_complex<ValueType>>& row_maxima_array,
+                        gko::reorder::mc64_strategy strategy)
     GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
@@ -64,10 +67,12 @@ template <typename ValueType, typename IndexType>
 void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
                       size_type num_rows, const IndexType* row_ptrs,
                       const IndexType* col_idxs,
-                      const array<ValueType>& workspace,
+                      const array<ValueType>& weights_array,
+                      const array<ValueType>& dual_u_array,
                       array<IndexType>& permutation,
                       array<IndexType>& inv_permutation,
-                      array<IndexType>& parents,
+                      array<IndexType>& matched_idxs_array,
+                      array<IndexType>& unmatched_rows_array,
                       ValueType tolerance) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
@@ -78,9 +83,12 @@ template <typename ValueType, typename IndexType>
 void shortest_augmenting_path(
     std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,
     const IndexType* row_ptrs, const IndexType* col_idxs,
-    array<ValueType>& workspace, array<IndexType>& permutation,
+    array<ValueType>& weights_array, array<ValueType>& dual_u_array,
+    array<ValueType>& distance_array, array<IndexType>& permutation,
     array<IndexType>& inv_permutation, IndexType root,
-    array<IndexType>& parents,
+    array<IndexType>& parents_array, array<IndexType>& handles_array,
+    array<IndexType>& generation_array, array<IndexType>& marked_cols_array,
+    array<IndexType>& matched_idxs_array,
     addressable_priority_queue<ValueType, IndexType>& Q,
     std::vector<IndexType>& q_j, ValueType tolerance) GKO_NOT_IMPLEMENTED;
 
@@ -89,14 +97,17 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
 
 
 template <typename ValueType, typename IndexType>
-void compute_scaling(
-    std::shared_ptr<const DefaultExecutor> exec,
-    const matrix::Csr<ValueType, IndexType>* mtx,
-    const array<remove_complex<ValueType>>& workspace,
-    const array<IndexType>& permutation, const array<IndexType>& parents,
-    gko::reorder::reordering_strategy strategy,
-    gko::matrix::Diagonal<ValueType>* row_scaling,
-    gko::matrix::Diagonal<ValueType>* col_scaling) GKO_NOT_IMPLEMENTED;
+void compute_scaling(std::shared_ptr<const DefaultExecutor> exec,
+                     const matrix::Csr<ValueType, IndexType>* mtx,
+                     const array<remove_complex<ValueType>>& weights_array,
+                     const array<remove_complex<ValueType>>& dual_u_array,
+                     const array<remove_complex<ValueType>>& row_maxima_array,
+                     const array<IndexType>& permutation,
+                     const array<IndexType>& matched_idxs_array,
+                     gko::reorder::mc64_strategy strategy,
+                     gko::matrix::Diagonal<ValueType>* row_scaling,
+                     gko::matrix::Diagonal<ValueType>* col_scaling)
+    GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL);
diff --git a/dpcpp/reorder/mc64_kernels.dp.cpp b/dpcpp/reorder/mc64_kernels.dp.cpp
index 659ce61c20c..a70b2b16b72 100644
--- a/dpcpp/reorder/mc64_kernels.dp.cpp
+++ b/dpcpp/reorder/mc64_kernels.dp.cpp
@@ -52,8 +52,11 @@ namespace mc64 {
 template <typename ValueType, typename IndexType>
 void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
                         const matrix::Csr<ValueType, IndexType>* mtx,
-                        array<remove_complex<ValueType>>& workspace,
-                        gko::reorder::reordering_strategy strategy)
+                        array<remove_complex<ValueType>>& weights_array,
+                        array<remove_complex<ValueType>>& dual_u_array,
+                        array<remove_complex<ValueType>>& distance_array,
+                        array<remove_complex<ValueType>>& row_maxima_array,
+                        gko::reorder::mc64_strategy strategy)
     GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
@@ -64,10 +67,12 @@ template <typename ValueType, typename IndexType>
 void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
                       size_type num_rows, const IndexType* row_ptrs,
                       const IndexType* col_idxs,
-                      const array<ValueType>& workspace,
+                      const array<ValueType>& weights_array,
+                      const array<ValueType>& dual_u_array,
                       array<IndexType>& permutation,
                       array<IndexType>& inv_permutation,
-                      array<IndexType>& parents,
+                      array<IndexType>& matched_idxs_array,
+                      array<IndexType>& unmatched_rows_array,
                       ValueType tolerance) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
@@ -78,9 +83,12 @@ template <typename ValueType, typename IndexType>
 void shortest_augmenting_path(
     std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,
     const IndexType* row_ptrs, const IndexType* col_idxs,
-    array<ValueType>& workspace, array<IndexType>& permutation,
+    array<ValueType>& weights_array, array<ValueType>& dual_u_array,
+    array<ValueType>& distance_array, array<IndexType>& permutation,
     array<IndexType>& inv_permutation, IndexType root,
-    array<IndexType>& parents,
+    array<IndexType>& parents_array, array<IndexType>& handles_array,
+    array<IndexType>& generation_array, array<IndexType>& marked_cols_array,
+    array<IndexType>& matched_idxs_array,
     addressable_priority_queue<ValueType, IndexType>& Q,
     std::vector<IndexType>& q_j, ValueType tolerance) GKO_NOT_IMPLEMENTED;
 
@@ -89,14 +97,17 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
 
 
 template <typename ValueType, typename IndexType>
-void compute_scaling(
-    std::shared_ptr<const DefaultExecutor> exec,
-    const matrix::Csr<ValueType, IndexType>* mtx,
-    const array<remove_complex<ValueType>>& workspace,
-    const array<IndexType>& permutation, const array<IndexType>& parents,
-    gko::reorder::reordering_strategy strategy,
-    gko::matrix::Diagonal<ValueType>* row_scaling,
-    gko::matrix::Diagonal<ValueType>* col_scaling) GKO_NOT_IMPLEMENTED;
+void compute_scaling(std::shared_ptr<const DefaultExecutor> exec,
+                     const matrix::Csr<ValueType, IndexType>* mtx,
+                     const array<remove_complex<ValueType>>& weights_array,
+                     const array<remove_complex<ValueType>>& dual_u_array,
+                     const array<remove_complex<ValueType>>& row_maxima_array,
+                     const array<IndexType>& permutation,
+                     const array<IndexType>& matched_idxs_array,
+                     gko::reorder::mc64_strategy strategy,
+                     gko::matrix::Diagonal<ValueType>* row_scaling,
+                     gko::matrix::Diagonal<ValueType>* col_scaling)
+    GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL);
diff --git a/hip/reorder/mc64_kernels.hip.cpp b/hip/reorder/mc64_kernels.hip.cpp
index fd7514cbd22..4a6bd3a4693 100644
--- a/hip/reorder/mc64_kernels.hip.cpp
+++ b/hip/reorder/mc64_kernels.hip.cpp
@@ -52,8 +52,11 @@ namespace mc64 {
 template <typename ValueType, typename IndexType>
 void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
                         const matrix::Csr<ValueType, IndexType>* mtx,
-                        array<remove_complex<ValueType>>& workspace,
-                        gko::reorder::reordering_strategy strategy)
+                        array<remove_complex<ValueType>>& weights_array,
+                        array<remove_complex<ValueType>>& dual_u_array,
+                        array<remove_complex<ValueType>>& distance_array,
+                        array<remove_complex<ValueType>>& row_maxima_array,
+                        gko::reorder::mc64_strategy strategy)
     GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
@@ -64,10 +67,12 @@ template <typename ValueType, typename IndexType>
 void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
                       size_type num_rows, const IndexType* row_ptrs,
                       const IndexType* col_idxs,
-                      const array<ValueType>& workspace,
+                      const array<ValueType>& weights_array,
+                      const array<ValueType>& dual_u_array,
                       array<IndexType>& permutation,
                       array<IndexType>& inv_permutation,
-                      array<IndexType>& parents,
+                      array<IndexType>& matched_idxs_array,
+                      array<IndexType>& unmatched_rows_array,
                       ValueType tolerance) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
@@ -78,9 +83,12 @@ template <typename ValueType, typename IndexType>
 void shortest_augmenting_path(
     std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,
     const IndexType* row_ptrs, const IndexType* col_idxs,
-    array<ValueType>& workspace, array<IndexType>& permutation,
+    array<ValueType>& weights_array, array<ValueType>& dual_u_array,
+    array<ValueType>& distance_array, array<IndexType>& permutation,
     array<IndexType>& inv_permutation, IndexType root,
-    array<IndexType>& parents,
+    array<IndexType>& parents_array, array<IndexType>& handles_array,
+    array<IndexType>& generation_array, array<IndexType>& marked_cols_array,
+    array<IndexType>& matched_idxs_array,
     addressable_priority_queue<ValueType, IndexType>& Q,
     std::vector<IndexType>& q_j, ValueType tolerance) GKO_NOT_IMPLEMENTED;
 
@@ -89,14 +97,17 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
 
 
 template <typename ValueType, typename IndexType>
-void compute_scaling(
-    std::shared_ptr<const DefaultExecutor> exec,
-    const matrix::Csr<ValueType, IndexType>* mtx,
-    const array<remove_complex<ValueType>>& workspace,
-    const array<IndexType>& permutation, const array<IndexType>& parents,
-    gko::reorder::reordering_strategy strategy,
-    gko::matrix::Diagonal<ValueType>* row_scaling,
-    gko::matrix::Diagonal<ValueType>* col_scaling) GKO_NOT_IMPLEMENTED;
+void compute_scaling(std::shared_ptr<const DefaultExecutor> exec,
+                     const matrix::Csr<ValueType, IndexType>* mtx,
+                     const array<remove_complex<ValueType>>& weights_array,
+                     const array<remove_complex<ValueType>>& dual_u_array,
+                     const array<remove_complex<ValueType>>& row_maxima_array,
+                     const array<IndexType>& permutation,
+                     const array<IndexType>& matched_idxs_array,
+                     gko::reorder::mc64_strategy strategy,
+                     gko::matrix::Diagonal<ValueType>* row_scaling,
+                     gko::matrix::Diagonal<ValueType>* col_scaling)
+    GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL);
diff --git a/include/ginkgo/core/reorder/mc64.hpp b/include/ginkgo/core/reorder/mc64.hpp
index 2122df5dd4a..e2bd30cabea 100644
--- a/include/ginkgo/core/reorder/mc64.hpp
+++ b/include/ginkgo/core/reorder/mc64.hpp
@@ -38,7 +38,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/abstract_factory.hpp>
-#include <ginkgo/core/base/array.hpp>
 #include <ginkgo/core/base/dim.hpp>
 #include <ginkgo/core/base/lin_op.hpp>
 #include <ginkgo/core/base/polymorphic_object.hpp>
@@ -68,7 +67,7 @@ namespace reorder {
  * max_diag_sum aims at maximizing the sum of absolute values
  * for the diagonal entries.
  */
-enum class reordering_strategy { max_diagonal_product, max_diagonal_sum };
+enum class mc64_strategy { max_diagonal_product, max_diagonal_sum };
 
 
 /**
@@ -82,16 +81,16 @@ enum class reordering_strategy { max_diagonal_product, max_diagonal_sum };
  * for choosing the weights supported:
  *  - Maximizing the product of the absolute values on the diagonal.
  *    For this strategy, the weights are computed as
- *      c(i, j) = log2(a_i) - log2(abs(a(i, j))) if a(i, j) is nonzero and
- * infinity otherwise Here, a_i is the maximum absolute value in row i of the
- * matrix A. In this case, the implementation computes a row permutation P and
- * row and column scaling coefficients L and R such that the matrix P*L*A*R has
- * values with unity absolute value on the diagonal and smaller or equal entries
- * everywhere else.
+ *      $c(i, j) = log_2(a_i) - log_2(abs(a(i, j)))$ if $a(i, j) \neq 0 $ and
+ * $c(i, j) = \infty$ otherwise. Here, a_i is the maximum absolute value in row
+ * i of the matrix A. In this case, the implementation computes a row
+ * permutation P and row and column scaling coefficients L and R such that the
+ * matrix P*L*A*R has values with unity absolute value on the diagonal and
+ * smaller or equal entries everywhere else.
  *  - Maximizing the sum of the absolute values on the diagonal.
  *    For this strategy, the weights are computed as
- *      c(i, j) = a_i - abs(a(i, j)) if a(i, j) is nonzero and infinity
- * otherwise In this case, no scaling coefficients are computed.
+ *      $c(i, j) = a_i - abs(a(i, j))$ if $a(i, j) \neq 0$ and $c(i, j) =
+ * \infty$ otherwise. In this case, no scaling coefficients are computed.
  *
  * @note  This class is derived from polymorphic object but is not a LinOp as it
  * does not make sense for this class to implement the apply methods. The
@@ -115,7 +114,6 @@ class Mc64 : public EnablePolymorphicObject<Mc64<ValueType, IndexType>,
     using value_type = ValueType;
     using index_type = IndexType;
 
-
     /**
      * Gets the permutation (permutation matrix, output of the algorithm) of the
      * linear operator.
@@ -165,8 +163,8 @@ class Mc64 : public EnablePolymorphicObject<Mc64<ValueType, IndexType>,
         /**
          * This parameter controls the goal of the permutation.
          */
-        reordering_strategy GKO_FACTORY_PARAMETER_SCALAR(
-            strategy, reordering_strategy::max_diagonal_product);
+        mc64_strategy GKO_FACTORY_PARAMETER_SCALAR(
+            strategy, mc64_strategy::max_diagonal_product);
 
         /**
          * This parameter controls the tolerance below which a weight is
@@ -180,7 +178,7 @@ class Mc64 : public EnablePolymorphicObject<Mc64<ValueType, IndexType>,
          * for the addressable priority queue used in generating the
          * minimum weight perfect matching.
          */
-        int GKO_FACTORY_PARAMETER_SCALAR(log2_degree, 4);
+        int GKO_FACTORY_PARAMETER_SCALAR(deg_log2, 4);
     };
     GKO_ENABLE_REORDERING_BASE_FACTORY(Mc64, parameters, Factory);
     GKO_ENABLE_BUILD_METHOD(Factory);
diff --git a/omp/reorder/mc64_kernels.cpp b/omp/reorder/mc64_kernels.cpp
index 37bf4a93a27..09fedf19f5a 100644
--- a/omp/reorder/mc64_kernels.cpp
+++ b/omp/reorder/mc64_kernels.cpp
@@ -52,8 +52,11 @@ namespace mc64 {
 template <typename ValueType, typename IndexType>
 void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
                         const matrix::Csr<ValueType, IndexType>* mtx,
-                        array<remove_complex<ValueType>>& workspace,
-                        gko::reorder::reordering_strategy strategy)
+                        array<remove_complex<ValueType>>& weights_array,
+                        array<remove_complex<ValueType>>& dual_u_array,
+                        array<remove_complex<ValueType>>& distance_array,
+                        array<remove_complex<ValueType>>& row_maxima_array,
+                        gko::reorder::mc64_strategy strategy)
     GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
@@ -64,10 +67,12 @@ template <typename ValueType, typename IndexType>
 void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
                       size_type num_rows, const IndexType* row_ptrs,
                       const IndexType* col_idxs,
-                      const array<ValueType>& workspace,
+                      const array<ValueType>& weights_array,
+                      const array<ValueType>& dual_u_array,
                       array<IndexType>& permutation,
                       array<IndexType>& inv_permutation,
-                      array<IndexType>& parents,
+                      array<IndexType>& matched_idxs_array,
+                      array<IndexType>& unmatched_rows_array,
                       ValueType tolerance) GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
@@ -78,9 +83,12 @@ template <typename ValueType, typename IndexType>
 void shortest_augmenting_path(
     std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,
     const IndexType* row_ptrs, const IndexType* col_idxs,
-    array<ValueType>& workspace, array<IndexType>& permutation,
+    array<ValueType>& weights_array, array<ValueType>& dual_u_array,
+    array<ValueType>& distance_array, array<IndexType>& permutation,
     array<IndexType>& inv_permutation, IndexType root,
-    array<IndexType>& parents,
+    array<IndexType>& parents_array, array<IndexType>& handles_array,
+    array<IndexType>& generation_array, array<IndexType>& marked_cols_array,
+    array<IndexType>& matched_idxs_array,
     addressable_priority_queue<ValueType, IndexType>& Q,
     std::vector<IndexType>& q_j, ValueType tolerance) GKO_NOT_IMPLEMENTED;
 
@@ -89,14 +97,17 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
 
 
 template <typename ValueType, typename IndexType>
-void compute_scaling(
-    std::shared_ptr<const DefaultExecutor> exec,
-    const matrix::Csr<ValueType, IndexType>* mtx,
-    const array<remove_complex<ValueType>>& workspace,
-    const array<IndexType>& permutation, const array<IndexType>& parents,
-    gko::reorder::reordering_strategy strategy,
-    gko::matrix::Diagonal<ValueType>* row_scaling,
-    gko::matrix::Diagonal<ValueType>* col_scaling) GKO_NOT_IMPLEMENTED;
+void compute_scaling(std::shared_ptr<const DefaultExecutor> exec,
+                     const matrix::Csr<ValueType, IndexType>* mtx,
+                     const array<remove_complex<ValueType>>& weights_array,
+                     const array<remove_complex<ValueType>>& dual_u_array,
+                     const array<remove_complex<ValueType>>& row_maxima_array,
+                     const array<IndexType>& permutation,
+                     const array<IndexType>& matched_idxs_array,
+                     gko::reorder::mc64_strategy strategy,
+                     gko::matrix::Diagonal<ValueType>* row_scaling,
+                     gko::matrix::Diagonal<ValueType>* col_scaling)
+    GKO_NOT_IMPLEMENTED;
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL);
diff --git a/reference/reorder/mc64_kernels.cpp b/reference/reorder/mc64_kernels.cpp
index a367d17a145..a041a2230db 100644
--- a/reference/reorder/mc64_kernels.cpp
+++ b/reference/reorder/mc64_kernels.cpp
@@ -57,8 +57,11 @@ namespace mc64 {
 template <typename ValueType, typename IndexType>
 void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
                         const matrix::Csr<ValueType, IndexType>* mtx,
-                        array<remove_complex<ValueType>>& value_workspace,
-                        gko::reorder::reordering_strategy strategy)
+                        array<remove_complex<ValueType>>& weights_array,
+                        array<remove_complex<ValueType>>& dual_u_array,
+                        array<remove_complex<ValueType>>& distance_array,
+                        array<remove_complex<ValueType>>& row_maxima_array,
+                        gko::reorder::mc64_strategy strategy)
 {
     constexpr auto inf =
         std::numeric_limits<remove_complex<ValueType>>::infinity();
@@ -68,13 +71,13 @@ void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
     const auto col_idxs = mtx->get_const_col_idxs();
     const auto values = mtx->get_const_values();
     auto calculate_weight =
-        strategy == gko::reorder::reordering_strategy::max_diagonal_sum
+        strategy == gko::reorder::mc64_strategy::max_diagonal_sum
             ? [](ValueType a) { return abs(a); }
             : [](ValueType a) { return std::log2(abs(a)); };
-    auto weights = value_workspace.get_data();
-    auto dual_u = weights + nnz;
-    auto distance = dual_u + num_rows;
-    auto row_maxima = distance + num_rows;
+    auto weights = weights_array.get_data();
+    auto dual_u = dual_u_array.get_data();
+    auto distance = distance_array.get_data();
+    auto row_maxima = row_maxima_array.get_data();
     for (IndexType col = 0; col < num_rows; col++) {
         dual_u[col] = inf;
         distance[col] = inf;
@@ -87,7 +90,7 @@ void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
         for (IndexType idx = row_begin; idx < row_end; idx++) {
             const auto weight = calculate_weight(values[idx]);
             weights[idx] = weight;
-            if (weight > row_max) row_max = weight;
+            row_max = std::max(weight, row_max);
         }
 
         row_maxima[row] = row_max;
@@ -96,7 +99,9 @@ void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
             const auto weight = row_max - weights[idx];
             weights[idx] = weight;
             const auto col = col_idxs[idx];
-            if (weight < dual_u[col]) dual_u[col] = weight;
+            if (weight < dual_u[col]) {
+                dual_u[col] = weight;
+            }
         }
     }
 }
@@ -107,26 +112,29 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
 
 // Assume -1 in permutation and inv_permutation
 template <typename ValueType, typename IndexType>
-void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
-                      size_type num_rows, const IndexType* row_ptrs,
-                      const IndexType* col_idxs,
-                      const array<ValueType>& value_workspace,
-                      array<IndexType>& permutation,
-                      array<IndexType>& inv_permutation,
-                      array<IndexType>& index_workspace, ValueType tolerance)
+void initial_matching(
+    std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,
+    const IndexType* row_ptrs, const IndexType* col_idxs,
+    const array<ValueType>& weights_array, const array<ValueType>& dual_u_array,
+    array<IndexType>& permutation, array<IndexType>& inv_permutation,
+    array<IndexType>& matched_idxs_array,
+    array<IndexType>& unmatched_rows_array, ValueType tolerance)
 {
     const auto nnz = row_ptrs[num_rows];
-    const auto weights = value_workspace.get_const_data();
-    const auto dual_u = weights + nnz;
+    const auto weights = weights_array.get_const_data();
+    const auto dual_u = dual_u_array.get_const_data();
     auto p = permutation.get_data();
     auto ip = inv_permutation.get_data();
-    auto idxs = index_workspace.get_data() + 4 * num_rows;
-    auto unmatched = idxs + num_rows;
+    auto idxs = matched_idxs_array.get_data();
+    auto unmatched = unmatched_rows_array.get_data();
     auto um_cnt = 0;
 
-    // For each row, look for an unmatched column col for which weight(row, col)
-    // = 0. If one is found, add the edge (row, col) to the matching and move on
-    // to the next row.
+    // In the following comments, w(row, col) will refer to the reduced weight
+    // abs(weights(row, col) - dual_u(col)) where dual_u is a dual vector
+    // needed for non-negativity of all weights.
+    // For each row, look for an unmatched column col for which
+    // w(row, col) < tolerance. If one is found, add the edge (row, col) to the
+    // matching and move on to the next row.
     for (IndexType row = 0; row < num_rows; row++) {
         const auto row_begin = row_ptrs[row];
         const auto row_end = row_ptrs[row + 1];
@@ -147,11 +155,11 @@ void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
         }
     }
 
-    // For remaining unmatched rows, look for a matched column with weight(row,
-    // col) = 0 that is matched to another row, row_1. If there is another
-    // column col_1 with weight(row_1, col_1) = 0 that is not yet matched,
-    // replace the matched edge (row_1, col) with the two new matched edges
-    // (row, col) and (row_1, col_1).
+    // For remaining unmatched rows, look for a matched column with i
+    // w(row, col) < tolerance that is matched to another row, row_1.
+    // If there is another column col_1 with w(row_1, col_1) < tolerance
+    // that is not yet matched, replace the matched edge (row_1, col)
+    // with the two new matched edges (row, col) and (row_1, col_1).
     auto um = 0;
     auto row = unmatched[um];
     // If row == 0 we passed the last unmatched row and reached the
@@ -201,33 +209,50 @@ template <typename ValueType, typename IndexType>
 void shortest_augmenting_path(
     std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,
     const IndexType* row_ptrs, const IndexType* col_idxs,
-    array<ValueType>& value_workspace, array<IndexType>& permutation,
+    array<ValueType>& weights_array, array<ValueType>& dual_u_array,
+    array<ValueType>& distance_array, array<IndexType>& permutation,
     array<IndexType>& inv_permutation, IndexType root,
-    array<IndexType>& index_workspace,
+    array<IndexType>& parents_array, array<IndexType>& handles_array,
+    array<IndexType>& generation_array, array<IndexType>& marked_cols_array,
+    array<IndexType>& matched_idxs_array,
     addressable_priority_queue<ValueType, IndexType>& Q,
     std::vector<IndexType>& q_j, ValueType tolerance)
 {
     constexpr auto inf = std::numeric_limits<ValueType>::infinity();
     const auto nnz = row_ptrs[num_rows];
-    auto weights = value_workspace.get_data();
-    auto dual_u = weights + nnz;
-    auto distance = dual_u + num_rows;
+    auto weights = weights_array.get_data();
+    auto dual_u = dual_u_array.get_data();
+    auto distance = distance_array.get_data();
 
     auto p = permutation.get_data();
     auto ip = inv_permutation.get_data();
 
-    auto parents = index_workspace.get_data();
+    auto parents = parents_array.get_data();
     // Handles to access and update entries in the addressable priority queue.
-    auto handles = parents + num_rows;
+    auto handles = handles_array.get_data();
     // Generation array to mark visited nodes.
-    auto generation = handles + num_rows;
-    // Set of marked columns whos shortest alternating paths and distances to
+    // It can take four states:
+    //  - gen[col] = #rows + root: The distance to col is smaller than the
+    //      length of the currently shortest augmenting path.
+    //  - gen[col] = - #rows - root: The distance to col is within a tolerance
+    //      of the currently shortest distance to the root. In this case, col
+    //      is placed into the vector q_j holding the nodes with the shortest
+    //      known distance to the root.
+    //  - gen[col] = root: The distance to col is smaller than the length of
+    //      the currently shortest augmenting path but larger than the currently
+    //      shortest known distance to the root. In this case, col is placed
+    //      into the priority queue Q.
+    //  - gen[col] = - root: The shortest possible distance for col to the root
+    //      has been found. If encountered again, col does not need to be
+    //      considered another time.
+    auto generation = generation_array.get_data();
+    // Set of marked columns whose shortest alternating paths and distances to
     // the root are known.
-    auto marked_cols = generation + num_rows;
+    auto marked_cols = marked_cols_array.get_data();
     // Indices of the nonzero entries corresponding to the matched column in
     // each matched row. So, if row i is matched to column j, W(i,j) is found
     // at weights[idxs[i]] where W is the weight matrix.
-    auto idxs = marked_cols + num_rows;
+    auto idxs = matched_idxs_array.get_data();
 
     Q.reset();
     q_j.clear();
@@ -297,14 +322,18 @@ void shortest_augmenting_path(
             // q_j is known to contain only entries with shortest known
             // distance to the root, so if it is not empty we do not
             // have to operate on the priority queue.
-            if (lsap <= lsp) break;
+            if (lsap <= lsp) {
+                break;
+            }
             const auto col = q_j.back();
             q_j.pop_back();
             generation[col] = -root;
             marked_cols[marked_counter++] = col;
             row = ip[col];
         } else {
-            if (Q.empty()) break;
+            if (Q.empty()) {
+                break;
+            }
             auto col = Q.min_val();
             while (generation[col] == -root && !Q.empty()) {
                 // If col is already marked because it previously was in q_j
@@ -312,9 +341,13 @@ void shortest_augmenting_path(
                 Q.pop_min();
                 col = Q.min_val();
             }
-            if (Q.empty()) break;
+            if (Q.empty()) {
+                break;
+            }
             lsp = distance[col];
-            if (lsap <= lsp) break;
+            if (lsap <= lsp) {
+                break;
+            }
             generation[col] = -root;
             marked_cols[marked_counter++] = col;
             Q.pop_min();
@@ -331,7 +364,9 @@ void shortest_augmenting_path(
 
             // col is already marked. Note that root will never be 0 as this row
             // is guaranteed to already be part of the initial matching.
-            if (gen == -root) continue;
+            if (gen == -root) {
+                continue;
+            }
 
             const ValueType dnew = lsp + weights[idx] - dual_u[col] - dual_vi;
 
@@ -374,7 +409,9 @@ void shortest_augmenting_path(
             row = parents[col];
             ip[col] = row;
             auto idx = row_ptrs[row];
-            while (col_idxs[idx] != col) idx++;
+            while (col_idxs[idx] != col) {
+                idx++;
+            }
             idxs[row] = idx;
             std::swap(col, p[row]);
         } while (row != root);
@@ -393,10 +430,12 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
 template <typename ValueType, typename IndexType>
 void compute_scaling(std::shared_ptr<const DefaultExecutor> exec,
                      const matrix::Csr<ValueType, IndexType>* mtx,
-                     const array<remove_complex<ValueType>>& value_workspace,
+                     const array<remove_complex<ValueType>>& weights_array,
+                     const array<remove_complex<ValueType>>& dual_u_array,
+                     const array<remove_complex<ValueType>>& row_maxima_array,
                      const array<IndexType>& permutation,
-                     const array<IndexType>& index_workspace,
-                     gko::reorder::reordering_strategy strategy,
+                     const array<IndexType>& matched_idxs_array,
+                     gko::reorder::mc64_strategy strategy,
                      gko::matrix::Diagonal<ValueType>* row_scaling,
                      gko::matrix::Diagonal<ValueType>* col_scaling)
 {
@@ -407,15 +446,15 @@ void compute_scaling(std::shared_ptr<const DefaultExecutor> exec,
     const auto row_ptrs = mtx->get_const_row_ptrs();
     const auto col_idxs = mtx->get_const_col_idxs();
     const auto values = mtx->get_const_values();
-    const auto weights = value_workspace.get_const_data();
-    const auto dual_u = weights + nnz;
-    const auto row_maxima = dual_u + 2 * num_rows;
+    const auto weights = weights_array.get_const_data();
+    const auto dual_u = dual_u_array.get_const_data();
+    const auto row_maxima = row_maxima_array.get_const_data();
     const auto p = permutation.get_const_data();
-    const auto idxs = index_workspace.get_const_data() + 4 * num_rows;
+    const auto idxs = matched_idxs_array.get_const_data();
     auto rv = row_scaling->get_values();
     auto cv = col_scaling->get_values();
 
-    if (strategy == gko::reorder::reordering_strategy::max_diagonal_product) {
+    if (strategy == gko::reorder::mc64_strategy::max_diagonal_product) {
         for (size_type i = 0; i < num_rows; i++) {
             const remove_complex<ValueType> u_val = std::exp2(dual_u[i]);
             const remove_complex<ValueType> v_val =
@@ -434,6 +473,7 @@ void compute_scaling(std::shared_ptr<const DefaultExecutor> exec,
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
     GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL);
 
+
 }  // namespace mc64
 }  // namespace reference
 }  // namespace kernels
diff --git a/reference/test/reorder/mc64.cpp b/reference/test/reorder/mc64.cpp
index f62e48f3f84..49fca0c9dbb 100644
--- a/reference/test/reorder/mc64.cpp
+++ b/reference/test/reorder/mc64.cpp
@@ -153,9 +153,9 @@ TYPED_TEST(Mc64, HasSensibleDefaults)
     auto mc64 = reorder_type::build().on(this->exec)->generate(this->id3_mtx);
 
     ASSERT_EQ(mc64->get_parameters().strategy,
-              gko::reorder::reordering_strategy::max_diagonal_product);
+              gko::reorder::mc64_strategy::max_diagonal_product);
     ASSERT_EQ(mc64->get_parameters().tolerance, real_type{1e-14});
-    ASSERT_EQ(mc64->get_parameters().log2_degree, 4);
+    ASSERT_EQ(mc64->get_parameters().deg_log2, 4);
 }
 
 
@@ -165,7 +165,7 @@ TYPED_TEST(Mc64, CanBeCreatedWithReorderingStrategy)
 
     auto mc64 =
         reorder_type::build()
-            .with_strategy(gko::reorder::reordering_strategy::max_diagonal_sum)
+            .with_strategy(gko::reorder::mc64_strategy::max_diagonal_sum)
             .on(this->exec)
             ->generate(this->id3_mtx);
 
@@ -192,7 +192,7 @@ TYPED_TEST(Mc64, CanBeCreatedWithLog2Degree)
     using reorder_type = typename TestFixture::reorder_type;
 
     auto mc64 = reorder_type::build()
-                    .with_log2_degree(2)
+                    .with_deg_log2(2)
                     .on(this->exec)
                     ->generate(this->id3_mtx);
 
diff --git a/reference/test/reorder/mc64_kernels.cpp b/reference/test/reorder/mc64_kernels.cpp
index 07a1d15029f..818a7502d52 100644
--- a/reference/test/reorder/mc64_kernels.cpp
+++ b/reference/test/reorder/mc64_kernels.cpp
@@ -79,87 +79,101 @@ class Mc64 : public ::testing::Test {
                                             {0., 0., 0., 4., 2., 0.},
                                             {0., 5., 8., 0., 0., 0.}},
                                            ref)),
-          value_workspace{ref, 31},  // 13 (nnz) + 3 * 6 (n)
-          initialized_value_workspace_sum{
-              ref,
-              I<real_type>{2.,  1.,  0.,  0., 4., 0., 2., 0., 1.,  0.,  2.,
-                           3.,  0.,  0.,  1., 0., 0., 0., 1., inf, inf, inf,
-                           inf, inf, inf, 3., 5., 6., 4., 4., 8.}},
+          weights{ref, 13},
+          dual_u{ref, 6},
+          distance{ref, 6},
+          row_maxima{ref, 6},
+          initialized_weights_sum{ref, I<real_type>{2., 1., 0., 0., 4., 0., 2.,
+                                                    0., 1., 0., 2., 3., 0.}},
+          initialized_dual_u_sum{ref, I<real_type>{0., 1., 0., 0., 0., 1.}},
+          initialized_row_maxima_sum{ref, I<real_type>{3., 5., 6., 4., 4., 8.}},
           // if the logarithms are merged together, the rounding messes up the
           // accuracy for GKO_ASSRT_ARRAY_EQ
-          initialized_value_workspace_product{
+          initialized_weights_product{
+              ref,
+              I<real_type>{
+                  real_type{std::log2(3.)},
+                  real_type{std::log2(3.)} - real_type{std::log2(2.)}, 0., 0.,
+                  real_type{std::log2(5.)}, 0.,
+                  real_type{std::log2(6.)} - real_type{std::log2(4.)}, 0.,
+                  real_type{std::log2(4.)} - real_type{std::log2(3.)}, 0.,
+                  real_type{std::log2(4.)} - real_type{std::log2(2.)},
+                  real_type{std::log2(8.)} - real_type{std::log2(5.)}, 0.}},
+          initialized_dual_u_product{
+              ref,
+              I<real_type>{
+                  0., real_type{std::log2(3.)} - real_type{std::log2(2.)}, 0.,
+                  0., 0., real_type{std::log2(4.)} - real_type{std::log2(3.)}}},
+          initialized_row_maxima_product{
               ref,
-              I<real_type>{real_type{std::log2(3.)},
-                           real_type{std::log2(3.)} - real_type{std::log2(2.)},
-                           0.,
-                           0.,
-                           real_type{std::log2(5.)},
-                           0.,
-                           real_type{std::log2(6.)} - real_type{std::log2(4.)},
-                           0.,
-                           real_type{std::log2(4.)} - real_type{std::log2(3.)},
-                           0.,
-                           real_type{std::log2(4.)} - real_type{std::log2(2.)},
-                           real_type{std::log2(8.)} - real_type{std::log2(5.)},
-                           0.,
-                           0.,
-                           real_type{std::log2(3.)} - real_type{std::log2(2.)},
-                           0.,
-                           0.,
-                           0.,
-                           real_type{std::log2(4.)} - real_type{std::log2(3.)},
-                           inf,
-                           inf,
-                           inf,
-                           inf,
-                           inf,
-                           inf,
-                           real_type{std::log2(3.)},
-                           real_type{std::log2(5.)},
-                           real_type{std::log2(6.)},
-                           real_type{std::log2(4.)},
-                           real_type{std::log2(4.)},
-                           real_type{std::log2(8.)}}},
+              I<real_type>{real_type{std::log2(3.)}, real_type{std::log2(5.)},
+                           real_type{std::log2(6.)}, real_type{std::log2(4.)},
+                           real_type{std::log2(4.)}, real_type{std::log2(8.)}}},
+          initialized_distance{ref, I<real_type>{inf, inf, inf, inf, inf, inf}},
           empty_permutation{ref, I<index_type>{-1, -1, -1, -1, -1, -1}},
           empty_inverse_permutation{ref, I<index_type>{-1, -1, -1, -1, -1, -1}},
-          empty_index_workspace{ref, 36},  // 6 * 6 (n)
-          initial_matching_index_workspace{
-              ref, I<index_type>{0, 0, 0, 0, 0, 0,  0, 0,  0, 0, 0, 0,
-                                 0, 0, 0, 0, 0, 0,  0, 0,  0, 0, 0, 0,
-                                 1, 3, 5, 8, 0, 12, 4, -1, 0, 0, 0, 0}},
+          empty_matched_idxs{ref, I<index_type>{0, 0, 0, 0, 0, 0}},
+          empty_unmatched_rows{ref, I<index_type>{0, 0, 0, 0, 0, 0}},
+          initial_parents{ref, I<index_type>{0, 0, 0, 0, 0, 0}},
+          initial_handles{ref, I<index_type>{0, 0, 0, 0, 0, 0}},
+          initial_generation{ref, I<index_type>{0, 0, 0, 0, 0, 0}},
+          initial_marked_cols{ref, I<index_type>{0, 0, 0, 0, 0, 0}},
+          initial_matched_idxs{ref, I<index_type>{1, 3, 5, 8, 0, 12}},
+          initial_unmatched_rows{ref, I<index_type>{4, -1, 0, 0, 0, 0}},
           initial_matching_permutation{ref, I<index_type>{1, 0, 3, 5, -1, 2}},
           initial_matching_inverse_permutation{
               ref, I<index_type>{1, 0, 5, 2, -1, 3}},
           final_permutation{ref, I<index_type>{1, 0, 3, 5, 4, 2}},
           final_inverse_permutation{ref, I<index_type>{1, 0, 5, 2, 4, 3}},
-          final_index_workspace{
-              ref, I<index_type>{0, 0, 3,  4,  4,  2,  0, 0,  0, 0, 0, 0,
-                                 0, 0, -4, -4, 0,  -4, 3, 5,  2, 0, 0, 0,
-                                 1, 3, 5,  8,  10, 12, 4, -1, 0, 0, 0, 0}},
-          final_value_workspace{
-              ref, I<real_type>{2., 1.,  0., 0., 4.,  0.,  2., 0., 1.,  0.,  2.,
-                                3., 0.,  0., 1., -1., -2., 0., 0., inf, inf, 1.,
-                                0., inf, 1., 3., 5.,  6.,  4., 4., 8.}},
+          final_parents{ref, I<index_type>{0, 0, 3, 4, 4, 2}},
+          final_handles{ref, I<index_type>{0, 0, 0, 0, 0, 0}},
+          final_generation{ref, I<index_type>{0, 0, -4, -4, 0, -4}},
+          final_marked_cols{ref, I<index_type>{3, 5, 2, 0, 0, 0}},
+          final_matched_idxs{ref, I<index_type>{1, 3, 5, 8, 10, 12}},
+          final_weights{ref, I<real_type>{2., 1., 0., 0., 4., 0., 2., 0., 1.,
+                                          0., 2., 3., 0.}},
+          final_dual_u{ref, I<real_type>{0., 1., -1., -2., 0., 0.}},
+          final_distance{ref, I<real_type>{inf, inf, 1., 0., inf, 1.}},
+          final_row_maxima{ref, I<real_type>{3., 5., 6., 4., 4., 8.}},
           tolerance{10 * std::numeric_limits<real_type>::epsilon()}
-    {
-        empty_index_workspace.fill(gko::zero<index_type>());
-    }
+    {}
 
     std::shared_ptr<const gko::ReferenceExecutor> ref;
     gko::array<real_type> tmp;
-    gko::array<real_type> value_workspace;
-    gko::array<real_type> initialized_value_workspace_sum;
-    gko::array<real_type> initialized_value_workspace_product;
-    gko::array<real_type> final_value_workspace;
+    gko::array<real_type> weights;
+    gko::array<real_type> dual_u;
+    gko::array<real_type> distance;
+    gko::array<real_type> row_maxima;
+    gko::array<real_type> initialized_weights_sum;
+    gko::array<real_type> initialized_dual_u_sum;
+    gko::array<real_type> initialized_row_maxima_sum;
+    gko::array<real_type> initialized_weights_product;
+    gko::array<real_type> initialized_dual_u_product;
+    gko::array<real_type> initialized_row_maxima_product;
+    gko::array<real_type> initialized_distance;
+    gko::array<real_type> final_weights;
+    gko::array<real_type> final_dual_u;
+    gko::array<real_type> final_distance;
+    gko::array<real_type> final_row_maxima;
     gko::array<index_type> empty_permutation;
     gko::array<index_type> empty_inverse_permutation;
-    gko::array<index_type> empty_index_workspace;
+    gko::array<index_type> empty_matched_idxs;
+    gko::array<index_type> empty_unmatched_rows;
     gko::array<index_type> initial_matching_permutation;
     gko::array<index_type> initial_matching_inverse_permutation;
-    gko::array<index_type> initial_matching_index_workspace;
+    gko::array<index_type> initial_parents;
+    gko::array<index_type> initial_handles;
+    gko::array<index_type> initial_generation;
+    gko::array<index_type> initial_marked_cols;
+    gko::array<index_type> initial_matched_idxs;
+    gko::array<index_type> initial_unmatched_rows;
     gko::array<index_type> final_permutation;
     gko::array<index_type> final_inverse_permutation;
-    gko::array<index_type> final_index_workspace;
+    gko::array<index_type> final_parents;
+    gko::array<index_type> final_handles;
+    gko::array<index_type> final_generation;
+    gko::array<index_type> final_marked_cols;
+    gko::array<index_type> final_matched_idxs;
     std::shared_ptr<matrix_type> mtx;
     const real_type tolerance;
 };
@@ -170,22 +184,26 @@ TYPED_TEST_SUITE(Mc64, gko::test::ValueIndexTypes, PairTypenameNameGenerator);
 TYPED_TEST(Mc64, InitializeWeightsSum)
 {
     gko::kernels::reference::mc64::initialize_weights(
-        this->ref, this->mtx.get(), this->value_workspace,
-        gko::reorder::reordering_strategy::max_diagonal_sum);
+        this->ref, this->mtx.get(), this->weights, this->dual_u, this->distance,
+        this->row_maxima, gko::reorder::mc64_strategy::max_diagonal_sum);
 
-    GKO_ASSERT_ARRAY_EQ(this->value_workspace,
-                        this->initialized_value_workspace_sum);
+    GKO_ASSERT_ARRAY_EQ(this->weights, this->initialized_weights_sum);
+    GKO_ASSERT_ARRAY_EQ(this->dual_u, this->initialized_dual_u_sum);
+    GKO_ASSERT_ARRAY_EQ(this->distance, this->initialized_distance);
+    GKO_ASSERT_ARRAY_EQ(this->row_maxima, this->initialized_row_maxima_sum);
 }
 
 
 TYPED_TEST(Mc64, InitializeWeightsProduct)
 {
     gko::kernels::reference::mc64::initialize_weights(
-        this->ref, this->mtx.get(), this->value_workspace,
-        gko::reorder::reordering_strategy::max_diagonal_product);
+        this->ref, this->mtx.get(), this->weights, this->dual_u, this->distance,
+        this->row_maxima, gko::reorder::mc64_strategy::max_diagonal_product);
 
-    GKO_ASSERT_ARRAY_EQ(this->value_workspace,
-                        this->initialized_value_workspace_product);
+    GKO_ASSERT_ARRAY_EQ(this->weights, this->initialized_weights_product);
+    GKO_ASSERT_ARRAY_EQ(this->dual_u, this->initialized_dual_u_product);
+    GKO_ASSERT_ARRAY_EQ(this->distance, this->initialized_distance);
+    GKO_ASSERT_ARRAY_EQ(this->row_maxima, this->initialized_row_maxima_product);
 }
 
 
@@ -195,16 +213,18 @@ TYPED_TEST(Mc64, InitialMatching)
 
     gko::kernels::reference::mc64::initial_matching(
         this->ref, num_rows, this->mtx->get_const_row_ptrs(),
-        this->mtx->get_const_col_idxs(), this->initialized_value_workspace_sum,
-        this->empty_permutation, this->empty_inverse_permutation,
-        this->empty_index_workspace, this->tol);
+        this->mtx->get_const_col_idxs(), this->initialized_weights_sum,
+        this->initialized_dual_u_sum, this->empty_permutation,
+        this->empty_inverse_permutation, this->empty_matched_idxs,
+        this->empty_unmatched_rows, this->tol);
 
     GKO_ASSERT_ARRAY_EQ(this->empty_permutation,
                         this->initial_matching_permutation);
     GKO_ASSERT_ARRAY_EQ(this->empty_inverse_permutation,
                         this->initial_matching_inverse_permutation);
-    GKO_ASSERT_ARRAY_EQ(this->empty_index_workspace,
-                        this->initial_matching_index_workspace);
+    GKO_ASSERT_ARRAY_EQ(this->empty_matched_idxs, this->initial_matched_idxs);
+    GKO_ASSERT_ARRAY_EQ(this->empty_unmatched_rows,
+                        this->initial_unmatched_rows);
 }
 
 
@@ -217,19 +237,26 @@ TYPED_TEST(Mc64, ShortestAugmentingPath)
 
     gko::kernels::reference::mc64::shortest_augmenting_path(
         this->ref, this->mtx->get_size()[0], this->mtx->get_const_row_ptrs(),
-        this->mtx->get_const_col_idxs(), this->initialized_value_workspace_sum,
+        this->mtx->get_const_col_idxs(), this->initialized_weights_sum,
+        this->initialized_dual_u_sum, this->initialized_distance,
         this->initial_matching_permutation,
         this->initial_matching_inverse_permutation, 4 * gko::one<index_type>(),
-        this->initial_matching_index_workspace, Q, q_j, this->tol);
+        this->initial_parents, this->initial_handles, this->initial_generation,
+        this->initial_marked_cols, this->initial_matched_idxs, Q, q_j,
+        this->tol);
 
     GKO_ASSERT_ARRAY_EQ(this->initial_matching_permutation,
                         this->final_permutation);
     GKO_ASSERT_ARRAY_EQ(this->initial_matching_inverse_permutation,
                         this->final_inverse_permutation);
-    GKO_ASSERT_ARRAY_EQ(this->initial_matching_index_workspace,
-                        this->final_index_workspace);
-    GKO_ASSERT_ARRAY_EQ(this->initialized_value_workspace_sum,
-                        this->final_value_workspace);
+    GKO_ASSERT_ARRAY_EQ(this->initial_parents, this->final_parents);
+    GKO_ASSERT_ARRAY_EQ(this->initial_handles, this->final_handles);
+    GKO_ASSERT_ARRAY_EQ(this->initial_generation, this->final_generation);
+    GKO_ASSERT_ARRAY_EQ(this->initial_marked_cols, this->final_marked_cols);
+    GKO_ASSERT_ARRAY_EQ(this->initial_matched_idxs, this->final_matched_idxs);
+    GKO_ASSERT_ARRAY_EQ(this->initialized_weights_sum, this->final_weights);
+    GKO_ASSERT_ARRAY_EQ(this->initialized_dual_u_sum, this->final_dual_u);
+    GKO_ASSERT_ARRAY_EQ(this->initialized_distance, this->final_distance);
 }
 
 
@@ -242,7 +269,7 @@ TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingExampleSum)
 
     auto mc64_factory =
         gko::reorder::Mc64<value_type, index_type>::build()
-            .with_strategy(gko::reorder::reordering_strategy::max_diagonal_sum)
+            .with_strategy(gko::reorder::mc64_strategy::max_diagonal_sum)
             .on(this->ref);
     auto mc64 = mc64_factory->generate(this->mtx);
 
@@ -273,8 +300,7 @@ TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingExampleProduct)
 
     auto mc64_factory =
         gko::reorder::Mc64<value_type, index_type>::build()
-            .with_strategy(
-                gko::reorder::reordering_strategy::max_diagonal_product)
+            .with_strategy(gko::reorder::mc64_strategy::max_diagonal_product)
             .on(this->ref);
     auto mc64 = mc64_factory->generate(this->mtx);
 
@@ -324,8 +350,7 @@ TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingLargeTrivialExampleProduct)
 
     auto mc64_factory =
         gko::reorder::Mc64<value_type, index_type>::build()
-            .with_strategy(
-                gko::reorder::reordering_strategy::max_diagonal_product)
+            .with_strategy(gko::reorder::mc64_strategy::max_diagonal_product)
             .on(this->ref);
     auto mc64 = mc64_factory->generate(mtx);
 
@@ -356,8 +381,7 @@ TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingLargeExampleProduct)
 
     auto mc64_factory =
         gko::reorder::Mc64<value_type, index_type>::build()
-            .with_strategy(
-                gko::reorder::reordering_strategy::max_diagonal_product)
+            .with_strategy(gko::reorder::mc64_strategy::max_diagonal_product)
             .on(this->ref);
     auto mc64 = mc64_factory->generate(mtx);
 

From 7bd0a83929e5acc4e4a4111c657e56b1d3061cb9 Mon Sep 17 00:00:00 2001
From: Fritz Goebel <fritz.goebel@kit.edu>
Date: Mon, 13 Feb 2023 14:28:18 -0500
Subject: [PATCH 560/583] update Copyright statement

---
 core/components/addressable_pq.hpp      | 2 +-
 core/reorder/mc64.cpp                   | 2 +-
 core/reorder/mc64_kernels.hpp           | 2 +-
 core/test/components/addressable_pq.cpp | 2 +-
 cuda/reorder/mc64_kernels.cu            | 2 +-
 cuda/test/reorder/mc64_kernels.cpp      | 2 +-
 dpcpp/reorder/mc64_kernels.dp.cpp       | 2 +-
 dpcpp/test/reorder/mc64_kernels.cpp     | 2 +-
 hip/reorder/mc64_kernels.hip.cpp        | 2 +-
 hip/test/reorder/mc64_kernels.cpp       | 2 +-
 include/ginkgo/core/reorder/mc64.hpp    | 2 +-
 omp/reorder/mc64_kernels.cpp            | 2 +-
 omp/test/reorder/mc64_kernels.cpp       | 2 +-
 reference/reorder/mc64_kernels.cpp      | 2 +-
 reference/test/reorder/mc64.cpp         | 2 +-
 reference/test/reorder/mc64_kernels.cpp | 2 +-
 16 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/core/components/addressable_pq.hpp b/core/components/addressable_pq.hpp
index a9c07b86c68..9130b472c87 100644
--- a/core/components/addressable_pq.hpp
+++ b/core/components/addressable_pq.hpp
@@ -1,5 +1,5 @@
 /*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2022, the Ginkgo authors
+Copyright (c) 2017-2023, the Ginkgo authors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/core/reorder/mc64.cpp b/core/reorder/mc64.cpp
index 6dc289c2ff1..510dfc80e76 100644
--- a/core/reorder/mc64.cpp
+++ b/core/reorder/mc64.cpp
@@ -1,5 +1,5 @@
 /*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2022, the Ginkgo authors
+Copyright (c) 2017-2023, the Ginkgo authors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/core/reorder/mc64_kernels.hpp b/core/reorder/mc64_kernels.hpp
index dfc4fca7948..aa8a36d07cf 100644
--- a/core/reorder/mc64_kernels.hpp
+++ b/core/reorder/mc64_kernels.hpp
@@ -1,5 +1,5 @@
 /*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2022, the Ginkgo authors
+Copyright (c) 2017-2023, the Ginkgo authors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/core/test/components/addressable_pq.cpp b/core/test/components/addressable_pq.cpp
index 9068eeccd95..93531578afb 100644
--- a/core/test/components/addressable_pq.cpp
+++ b/core/test/components/addressable_pq.cpp
@@ -1,5 +1,5 @@
 /*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2022, the Ginkgo authors
+Copyright (c) 2017-2023, the Ginkgo authors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/cuda/reorder/mc64_kernels.cu b/cuda/reorder/mc64_kernels.cu
index ac759d23e03..7b5c0a9d994 100644
--- a/cuda/reorder/mc64_kernels.cu
+++ b/cuda/reorder/mc64_kernels.cu
@@ -1,5 +1,5 @@
 /*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2022, the Ginkgo authors
+Copyright (c) 2017-2023, the Ginkgo authors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/cuda/test/reorder/mc64_kernels.cpp b/cuda/test/reorder/mc64_kernels.cpp
index 7132a711bda..8e452feab3e 100644
--- a/cuda/test/reorder/mc64_kernels.cpp
+++ b/cuda/test/reorder/mc64_kernels.cpp
@@ -1,5 +1,5 @@
 /*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2022, the Ginkgo authors
+Copyright (c) 2017-2023, the Ginkgo authors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/dpcpp/reorder/mc64_kernels.dp.cpp b/dpcpp/reorder/mc64_kernels.dp.cpp
index a70b2b16b72..9eb6dea8478 100644
--- a/dpcpp/reorder/mc64_kernels.dp.cpp
+++ b/dpcpp/reorder/mc64_kernels.dp.cpp
@@ -1,5 +1,5 @@
 /*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2022, the Ginkgo authors
+Copyright (c) 2017-2023, the Ginkgo authors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/dpcpp/test/reorder/mc64_kernels.cpp b/dpcpp/test/reorder/mc64_kernels.cpp
index 237bf21db04..2a9ea3d5d9d 100644
--- a/dpcpp/test/reorder/mc64_kernels.cpp
+++ b/dpcpp/test/reorder/mc64_kernels.cpp
@@ -1,5 +1,5 @@
 /*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2022, the Ginkgo authors
+Copyright (c) 2017-2023, the Ginkgo authors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/hip/reorder/mc64_kernels.hip.cpp b/hip/reorder/mc64_kernels.hip.cpp
index 4a6bd3a4693..8c2d9e5bcb7 100644
--- a/hip/reorder/mc64_kernels.hip.cpp
+++ b/hip/reorder/mc64_kernels.hip.cpp
@@ -1,5 +1,5 @@
 /*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2022, the Ginkgo authors
+Copyright (c) 2017-2023, the Ginkgo authors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/hip/test/reorder/mc64_kernels.cpp b/hip/test/reorder/mc64_kernels.cpp
index f872ee4123d..8b0c0ab060e 100644
--- a/hip/test/reorder/mc64_kernels.cpp
+++ b/hip/test/reorder/mc64_kernels.cpp
@@ -1,5 +1,5 @@
 /*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2022, the Ginkgo authors
+Copyright (c) 2017-2023, the Ginkgo authors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/include/ginkgo/core/reorder/mc64.hpp b/include/ginkgo/core/reorder/mc64.hpp
index e2bd30cabea..372cb58a9f9 100644
--- a/include/ginkgo/core/reorder/mc64.hpp
+++ b/include/ginkgo/core/reorder/mc64.hpp
@@ -1,5 +1,5 @@
 /*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2022, the Ginkgo authors
+Copyright (c) 2017-2023, the Ginkgo authors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/omp/reorder/mc64_kernels.cpp b/omp/reorder/mc64_kernels.cpp
index 09fedf19f5a..ff6793d7b7d 100644
--- a/omp/reorder/mc64_kernels.cpp
+++ b/omp/reorder/mc64_kernels.cpp
@@ -1,5 +1,5 @@
 /*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2022, the Ginkgo authors
+Copyright (c) 2017-2023, the Ginkgo authors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/omp/test/reorder/mc64_kernels.cpp b/omp/test/reorder/mc64_kernels.cpp
index 5b241950d92..0e1ad2c6924 100644
--- a/omp/test/reorder/mc64_kernels.cpp
+++ b/omp/test/reorder/mc64_kernels.cpp
@@ -1,5 +1,5 @@
 /*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2022, the Ginkgo authors
+Copyright (c) 2017-2023, the Ginkgo authors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/reference/reorder/mc64_kernels.cpp b/reference/reorder/mc64_kernels.cpp
index a041a2230db..8a58f7f82f1 100644
--- a/reference/reorder/mc64_kernels.cpp
+++ b/reference/reorder/mc64_kernels.cpp
@@ -1,5 +1,5 @@
 /*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2022, the Ginkgo authors
+Copyright (c) 2017-2023, the Ginkgo authors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/reference/test/reorder/mc64.cpp b/reference/test/reorder/mc64.cpp
index 49fca0c9dbb..3c76d17fc71 100644
--- a/reference/test/reorder/mc64.cpp
+++ b/reference/test/reorder/mc64.cpp
@@ -1,5 +1,5 @@
 /*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2022, the Ginkgo authors
+Copyright (c) 2017-2023, the Ginkgo authors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/reference/test/reorder/mc64_kernels.cpp b/reference/test/reorder/mc64_kernels.cpp
index 818a7502d52..712fb3d2051 100644
--- a/reference/test/reorder/mc64_kernels.cpp
+++ b/reference/test/reorder/mc64_kernels.cpp
@@ -1,5 +1,5 @@
 /*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2022, the Ginkgo authors
+Copyright (c) 2017-2023, the Ginkgo authors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without

From ab287ca77620bc3820f4cbef0305bed9fe093f23 Mon Sep 17 00:00:00 2001
From: Fritz Goebel <fritz.goebel@kit.edu>
Date: Mon, 13 Feb 2023 14:45:25 -0500
Subject: [PATCH 561/583] Fix rebasing error

---
 include/ginkgo/core/reorder/mc64.hpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/ginkgo/core/reorder/mc64.hpp b/include/ginkgo/core/reorder/mc64.hpp
index 372cb58a9f9..efcc6a995f7 100644
--- a/include/ginkgo/core/reorder/mc64.hpp
+++ b/include/ginkgo/core/reorder/mc64.hpp
@@ -103,9 +103,9 @@ enum class mc64_strategy { max_diagonal_product, max_diagonal_sum };
  */
 template <typename ValueType = default_precision, typename IndexType = int32>
 class Mc64 : public EnablePolymorphicObject<Mc64<ValueType, IndexType>,
-                                            ReorderingBase>,
+                                            ReorderingBase<IndexType>>,
              public EnablePolymorphicAssignment<Mc64<ValueType, IndexType>> {
-    friend class EnablePolymorphicObject<Mc64, ReorderingBase>;
+    friend class EnablePolymorphicObject<Mc64, ReorderingBase<IndexType>>;
 
 public:
     using matrix_type = matrix::Csr<ValueType, IndexType>;
@@ -192,11 +192,12 @@ class Mc64 : public EnablePolymorphicObject<Mc64<ValueType, IndexType>,
                   std::shared_ptr<LinOp> system_matrix);
 
     explicit Mc64(std::shared_ptr<const Executor> exec)
-        : EnablePolymorphicObject<Mc64, ReorderingBase>(std::move(exec))
+        : EnablePolymorphicObject<Mc64, ReorderingBase<IndexType>>(
+              std::move(exec))
     {}
 
     explicit Mc64(const Factory* factory, const ReorderingBaseArgs& args)
-        : EnablePolymorphicObject<Mc64, ReorderingBase>(
+        : EnablePolymorphicObject<Mc64, ReorderingBase<IndexType>>(
               factory->get_executor()),
           parameters_{factory->get_parameters()}
     {

From 7ddbc8c716d7de73a1d94ff3772f70a783933849 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Tue, 22 Aug 2023 08:07:51 +0200
Subject: [PATCH 562/583] make MC64 core functionality

---
 core/components/addressable_pq.hpp            |  19 +-
 core/device_hooks/common_kernels.inc.cpp      |  15 -
 core/reorder/mc64.cpp                         | 564 ++++++++++++++++--
 .../reorder/mc64.hpp                          |  72 +--
 core/reorder/mc64_kernels.hpp                 | 129 ----
 core/test/components/addressable_pq.cpp       |  25 +-
 cuda/CMakeLists.txt                           |   1 -
 cuda/reorder/mc64_kernels.cu                  | 119 ----
 cuda/test/reorder/mc64_kernels.cpp            |  85 ---
 dpcpp/CMakeLists.txt                          |   1 -
 dpcpp/reorder/mc64_kernels.dp.cpp             | 119 ----
 dpcpp/test/CMakeLists.txt                     |   1 -
 dpcpp/test/reorder/CMakeLists.txt             |   1 -
 hip/CMakeLists.txt                            |   1 -
 hip/test/CMakeLists.txt                       |   1 -
 hip/test/reorder/CMakeLists.txt               |   1 -
 hip/test/reorder/mc64_kernels.cpp             |  85 ---
 .../ginkgo/core/factorization/cholesky.hpp    |   5 +
 include/ginkgo/core/factorization/lu.hpp      |   5 +
 include/ginkgo/core/reorder/amd.hpp           |   5 +
 include/ginkgo/core/reorder/mc64.hpp          | 163 ++---
 include/ginkgo/core/reorder/rcm.hpp           |   5 +
 omp/CMakeLists.txt                            |   1 -
 omp/reorder/mc64_kernels.cpp                  | 119 ----
 omp/test/reorder/CMakeLists.txt               |   1 -
 omp/test/reorder/mc64_kernels.cpp             |  84 ---
 reference/CMakeLists.txt                      |   1 -
 reference/reorder/mc64_kernels.cpp            | 480 ---------------
 reference/test/reorder/mc64.cpp               |  93 +--
 reference/test/reorder/mc64_kernels.cpp       | 189 +++---
 test/reorder/CMakeLists.txt                   |   1 +
 .../mc64_kernels.cpp => test/reorder/mc64.cpp |  61 +-
 32 files changed, 774 insertions(+), 1678 deletions(-)
 rename hip/reorder/mc64_kernels.hip.cpp => core/reorder/mc64.hpp (61%)
 delete mode 100644 core/reorder/mc64_kernels.hpp
 delete mode 100644 cuda/reorder/mc64_kernels.cu
 delete mode 100644 cuda/test/reorder/mc64_kernels.cpp
 delete mode 100644 dpcpp/reorder/mc64_kernels.dp.cpp
 delete mode 100644 dpcpp/test/reorder/CMakeLists.txt
 delete mode 100644 hip/test/reorder/CMakeLists.txt
 delete mode 100644 hip/test/reorder/mc64_kernels.cpp
 delete mode 100644 omp/reorder/mc64_kernels.cpp
 delete mode 100644 omp/test/reorder/mc64_kernels.cpp
 delete mode 100644 reference/reorder/mc64_kernels.cpp
 rename dpcpp/test/reorder/mc64_kernels.cpp => test/reorder/mc64.cpp (53%)

diff --git a/core/components/addressable_pq.hpp b/core/components/addressable_pq.hpp
index 9130b472c87..a625ef9eb92 100644
--- a/core/components/addressable_pq.hpp
+++ b/core/components/addressable_pq.hpp
@@ -52,10 +52,11 @@ namespace gko {
  *
  * @tparam KeyType    The type of the keys
  * @tparam ValueType  The type of the values
+ * @tparam deg_log2  The binary logarithm of the node degree k
  */
-template <typename KeyType, typename ValueType>
+template <typename KeyType, typename ValueType, int deg_log2 = 4>
 struct addressable_priority_queue {
-    explicit addressable_priority_queue(int deg_log2) : degree{1 << deg_log2} {}
+    constexpr static int degree = 1 << deg_log2;
 
     /**
      * Inserts the given key-value pair into the PQ.
@@ -69,10 +70,11 @@ struct addressable_priority_queue {
         m_values.push_back(value);
         auto handle = next_handle();
         m_handles.push_back(handle);
-        if (handle == m_handle_pos.size())
+        if (handle == m_handle_pos.size()) {
             m_handle_pos.push_back(size() - 1);
-        else
+        } else {
             m_handle_pos[handle] = size() - 1;
+        }
         sift_up(size() - 1);
         return handle;
     }
@@ -97,21 +99,21 @@ struct addressable_priority_queue {
     /**
      * Returns the minimum key from the queue.
      *
-     * @return the minimun key from the queue
+     * @return the minimum key from the queue
      */
     KeyType min_key() const { return m_keys[0]; }
 
     /**
      * Returns the value belonging to the minimum key from the queue.
      *
-     * @return the value corresponding to the minimun key
+     * @return the value corresponding to the minimum key
      */
     ValueType min_val() const { return m_values[0]; }
 
     /**
      * Returns the key-value pair with the minimum key from the queue.
      *
-     * @return the key-value pair corresponding to the minimun key
+     * @return the key-value pair corresponding to the minimum key
      */
     std::pair<KeyType, ValueType> min() const { return {min_key(), min_val()}; }
 
@@ -123,7 +125,6 @@ struct addressable_priority_queue {
         swap(0, size() - 1);
         m_keys.pop_back();
         m_values.pop_back();
-        auto old_handle = m_handles.back();
         m_handles.pop_back();
         sift_down(0);
     }
@@ -164,6 +165,7 @@ struct addressable_priority_queue {
     }
 
     /**
+     * Restores the heap invariant downwards, i.e. the
      * Moves the key-value pair at position i down (toward the leaves)
      * until its key is smaller or equal to the one of all its children.
      */
@@ -202,7 +204,6 @@ struct addressable_priority_queue {
 
     std::size_t next_handle() const { return m_handle_pos.size(); }
 
-    const int degree;
     std::vector<KeyType> m_keys;
     std::vector<ValueType> m_values;
     std::vector<std::size_t> m_handles;
diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp
index 926b6609a39..762d7d78a16 100644
--- a/core/device_hooks/common_kernels.inc.cpp
+++ b/core/device_hooks/common_kernels.inc.cpp
@@ -74,7 +74,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/multigrid/pgm_kernels.hpp"
 #include "core/preconditioner/isai_kernels.hpp"
 #include "core/preconditioner/jacobi_kernels.hpp"
-#include "core/reorder/mc64_kernels.hpp"
 #include "core/reorder/rcm_kernels.hpp"
 #include "core/solver/batch_bicgstab_kernels.hpp"
 #include "core/solver/bicg_kernels.hpp"
@@ -909,20 +908,6 @@ GKO_STUB_VALUE_AND_INDEX_TYPE(
 }  // namespace par_ilut_factorization
 
 
-namespace mc64 {
-
-
-GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_MC64_INITIALIZE_WEIGHTS_KERNEL);
-GKO_STUB_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL);
-GKO_STUB_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL);
-GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL);
-
-
-}  // namespace mc64
-
-
 namespace rcm {
 
 
diff --git a/core/reorder/mc64.cpp b/core/reorder/mc64.cpp
index 510dfc80e76..a31de2526c9 100644
--- a/core/reorder/mc64.cpp
+++ b/core/reorder/mc64.cpp
@@ -45,56 +45,517 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/utils.hpp>
 #include <ginkgo/core/matrix/csr.hpp>
 #include <ginkgo/core/matrix/permutation.hpp>
+#include <ginkgo/core/matrix/scaled_permutation.hpp>
 #include <ginkgo/core/matrix/sparsity_csr.hpp>
 
 
+#include "core/components/addressable_pq.hpp"
+#include "core/components/fill_array_kernels.hpp"
 #include "core/matrix/csr_kernels.hpp"
-#include "core/reorder/mc64_kernels.hpp"
 
 
 namespace gko {
+namespace experimental {
 namespace reorder {
 namespace mc64 {
-namespace {
 
 
-GKO_REGISTER_OPERATION(initialize_weights, mc64::initialize_weights);
-GKO_REGISTER_OPERATION(initial_matching, mc64::initial_matching);
-GKO_REGISTER_OPERATION(shortest_augmenting_path,
-                       mc64::shortest_augmenting_path);
-GKO_REGISTER_OPERATION(compute_scaling, mc64::compute_scaling);
+template <typename ValueType, typename IndexType>
+void initialize_weights(const matrix::Csr<ValueType, IndexType>* mtx,
+                        array<remove_complex<ValueType>>& weights_array,
+                        array<remove_complex<ValueType>>& dual_u_array,
+                        array<remove_complex<ValueType>>& distance_array,
+                        array<remove_complex<ValueType>>& row_maxima_array,
+                        gko::experimental::reorder::mc64_strategy strategy)
+{
+    constexpr auto inf =
+        std::numeric_limits<remove_complex<ValueType>>::infinity();
+    const auto nnz = mtx->get_num_stored_elements();
+    const auto num_rows = mtx->get_size()[0];
+    const auto row_ptrs = mtx->get_const_row_ptrs();
+    const auto col_idxs = mtx->get_const_col_idxs();
+    const auto values = mtx->get_const_values();
+    auto calculate_weight =
+        strategy == gko::experimental::reorder::mc64_strategy::max_diagonal_sum
+            ? [](ValueType a) { return abs(a); }
+            : [](ValueType a) { return std::log2(abs(a)); };
+    auto weights = weights_array.get_data();
+    auto dual_u = dual_u_array.get_data();
+    auto distance = distance_array.get_data();
+    auto row_maxima = row_maxima_array.get_data();
+    dual_u_array.fill(inf);
+    distance_array.fill(inf);
+    for (IndexType row = 0; row < num_rows; row++) {
+        const auto row_begin = row_ptrs[row];
+        const auto row_end = row_ptrs[row + 1];
+        auto row_max = -inf;
+        for (IndexType idx = row_begin; idx < row_end; idx++) {
+            const auto weight = calculate_weight(values[idx]);
+            weights[idx] = weight;
+            row_max = std::max(weight, row_max);
+        }
+
+        row_maxima[row] = row_max;
+
+        for (IndexType idx = row_begin; idx < row_end; idx++) {
+            const auto weight = row_max - weights[idx];
+            weights[idx] = weight;
+            const auto col = col_idxs[idx];
+            dual_u[col] = std::min(weight, dual_u[col]);
+        }
+    }
+}
+
+
+// Assume -1 in permutation and inv_permutation
+template <typename ValueType, typename IndexType>
+void initial_matching(
+    size_type num_rows, const IndexType* row_ptrs, const IndexType* col_idxs,
+    const array<ValueType>& weights_array, const array<ValueType>& dual_u_array,
+    array<IndexType>& permutation, array<IndexType>& inv_permutation,
+    array<IndexType>& matched_idxs_array,
+    array<IndexType>& unmatched_rows_array, ValueType tolerance)
+{
+    const auto nnz = row_ptrs[num_rows];
+    const auto weights = weights_array.get_const_data();
+    const auto dual_u = dual_u_array.get_const_data();
+    auto p = permutation.get_data();
+    auto ip = inv_permutation.get_data();
+    auto idxs = matched_idxs_array.get_data();
+    auto unmatched = unmatched_rows_array.get_data();
+    size_type um_count = 0;
+
+    // In the following comments, w(row, col) will refer to the reduced weight
+    // abs(weights(row, col) - dual_u(col)) where dual_u is a dual vector
+    // needed for non-negativity of all weights.
+    // For each row, look for an unmatched column col for which
+    // w(row, col) < tolerance. If one is found, add the edge (row, col) to the
+    // matching and move on to the next row.
+    for (IndexType row = 0; row < num_rows; row++) {
+        const auto row_begin = row_ptrs[row];
+        const auto row_end = row_ptrs[row + 1];
+        bool matched = false;
+        for (IndexType idx = row_begin; idx < row_end; idx++) {
+            const auto col = col_idxs[idx];
+            if (abs(weights[idx] - dual_u[col]) < tolerance && ip[col] == -1) {
+                p[row] = col;
+                ip[col] = row;
+                idxs[row] = idx;
+                matched = true;
+                break;
+            }
+        }
+        if (!matched) {
+            // Mark unmatched rows for later.
+            unmatched[um_count++] = row;
+        }
+    }
+
+    // For remaining unmatched rows, look for a matched column with i
+    // w(row, col) < tolerance that is matched to another row, row_1.
+    // If there is another column col_1 with w(row_1, col_1) < tolerance
+    // that is not yet matched, replace the matched edge (row_1, col)
+    // with the two new matched edges (row, col) and (row_1, col_1).
+    auto um = 0;
+    auto row = unmatched[um];
+    // If row == 0 we passed the last unmatched row and reached the
+    // zero-initialized part of the array. Row 0 is always matched as the matrix
+    // is assumed to be nonsingular and the previous loop starts with row 0.
+    while (row != 0 && um < num_rows) {
+        const auto row_begin = row_ptrs[row];
+        const auto row_end = row_ptrs[row + 1];
+        bool found = [&] {
+            for (IndexType idx = row_begin; idx < row_end; idx++) {
+                const auto col = col_idxs[idx];
+                if (abs(weights[idx] - dual_u[col]) < tolerance) {
+                    const auto row_1 = ip[col];
+                    const auto row_1_begin = row_ptrs[row_1];
+                    const auto row_1_end = row_ptrs[row_1 + 1];
+                    for (IndexType idx_1 = row_1_begin; idx_1 < row_1_end;
+                         idx_1++) {
+                        const auto col_1 = col_idxs[idx_1];
+                        if (abs(weights[idx_1] - dual_u[col_1]) < tolerance &&
+                            ip[col_1] == -1) {
+                            p[row] = col;
+                            ip[col] = row;
+                            idxs[row] = idx;
+                            p[row_1] = col_1;
+                            ip[col_1] = row_1;
+                            idxs[row_1] = idx_1;
+                            return true;
+                        }
+                    }
+                }
+            }
+            return false;
+        }();
+        if (found) {
+            // Mark previously unmatched row as matched.
+            unmatched[um] = -1;
+        }
+        row = unmatched[++um];
+    }
+}
+
+
+template <typename ValueType, typename IndexType>
+void shortest_augmenting_path(
+    size_type num_rows, const IndexType* row_ptrs, const IndexType* col_idxs,
+    array<ValueType>& weights_array, array<ValueType>& dual_u_array,
+    array<ValueType>& distance_array, array<IndexType>& permutation,
+    array<IndexType>& inv_permutation, IndexType root,
+    array<IndexType>& parents_array, array<IndexType>& handles_array,
+    array<IndexType>& generation_array, array<IndexType>& marked_cols_array,
+    array<IndexType>& matched_idxs_array,
+    addressable_priority_queue<ValueType, IndexType>& Q,
+    std::vector<IndexType>& q_j, ValueType tolerance)
+{
+    constexpr auto inf = std::numeric_limits<ValueType>::infinity();
+    const auto nnz = row_ptrs[num_rows];
+    auto weights = weights_array.get_data();
+    auto dual_u = dual_u_array.get_data();
+    auto distance = distance_array.get_data();
+
+    auto p = permutation.get_data();
+    auto ip = inv_permutation.get_data();
+
+    auto parents = parents_array.get_data();
+    // Handles to access and update entries in the addressable priority queue.
+    auto handles = handles_array.get_data();
+    // Generation array to mark visited nodes.
+    // It can take four states:
+    //  - gen[col] = #rows + root: The distance to col is smaller than the
+    //      length of the currently shortest augmenting path.
+    //  - gen[col] = - #rows - root: The distance to col is within a tolerance
+    //      of the currently shortest distance to the root. In this case, col
+    //      is placed into the vector q_j holding the nodes with the shortest
+    //      known distance to the root.
+    //  - gen[col] = root: The distance to col is smaller than the length of
+    //      the currently shortest augmenting path but larger than the currently
+    //      shortest known distance to the root. In this case, col is placed
+    //      into the priority queue Q.
+    //  - gen[col] = - root: The shortest possible distance for col to the root
+    //      has been found. If encountered again, col does not need to be
+    //      considered another time.
+    auto generation = generation_array.get_data();
+    // Set of marked columns whose shortest alternating paths and distances to
+    // the root are known.
+    auto marked_cols = marked_cols_array.get_data();
+    // Indices of the nonzero entries corresponding to the matched column in
+    // each matched row. So, if row i is matched to column j, W(i,j) is found
+    // at weights[idxs[i]] where W is the weight matrix.
+    auto idxs = matched_idxs_array.get_data();
+
+    Q.reset();
+    q_j.clear();
+
+    // The length of the current path.
+    ValueType lsp = inf;
+    // The length of the currently shortest found augmenting path starting from
+    // root.
+    ValueType lsap = inf;
+    // The column at the end of the currently shortest found augmenting path.
+    IndexType jsap = -1;
+
+    auto row = root;
+    IndexType marked_counter = 0;
+
+    const auto begin = row_ptrs[row];
+    const auto end = row_ptrs[row + 1];
+
+    // Look for matching candidates in the row corresponding to root.
+    // As root is not yet matched, the corresponding entry in the dual
+    // vector v is 0 so we do not have to compute it.
+    for (IndexType idx = begin; idx < end; idx++) {
+        const auto col = col_idxs[idx];
+        const ValueType dnew = weights[idx] - dual_u[col];
+
+        if (dnew < lsap) {
+            if (ip[col] == -1) {
+                // col is unmatched so we found an augmenting path.
+                lsap = dnew;
+                jsap = col;
+                parents[col] = row;
+            } else {
+                distance[col] = dnew;
+                parents[col] = row;
+                generation[col] = num_rows + root;
+                if (dnew < lsp) {
+                    lsp = dnew;
+                }
+            }
+        }
+    }
+
+    // Write the columns in the row corresponding to root with the
+    // smallest distance into q_j, other columns with distance
+    // smaller than lsap into the priority queue Q.
+    for (IndexType idx = begin; idx < end; idx++) {
+        const auto col = col_idxs[idx];
+        const auto dist = distance[col];
+        const auto gen = generation[col];
+        if (dist < lsap && gen == num_rows + root) {
+            if (abs(dist - lsp) < tolerance) {
+                generation[col] = -num_rows - root;
+                q_j.push_back(col);
+            } else {
+                generation[col] = root;
+                handles[col] = Q.insert(dist, col);
+            }
+        }
+    }
+
+    while (true) {
+        // Mark the column with the shortest known distance to the root
+        // and proceed in its matched row. If both q_j and Q are empty
+        // or if the current path becomes longer than the currently
+        // shortest augmenting path, we are done.
+        if (q_j.size() > 0) {
+            // q_j is known to contain only entries with shortest known
+            // distance to the root, so if it is not empty we do not
+            // have to operate on the priority queue.
+            if (lsap <= lsp) {
+                break;
+            }
+            const auto col = q_j.back();
+            q_j.pop_back();
+            generation[col] = -root;
+            marked_cols[marked_counter++] = col;
+            row = ip[col];
+        } else {
+            if (Q.empty()) {
+                break;
+            }
+            auto col = Q.min_val();
+            while (generation[col] == -root && !Q.empty()) {
+                // If col is already marked because it previously was in q_j
+                // we have to disregard it.
+                Q.pop_min();
+                col = Q.min_val();
+            }
+            if (Q.empty()) {
+                break;
+            }
+            lsp = distance[col];
+            if (lsap <= lsp) {
+                break;
+            }
+            generation[col] = -root;
+            marked_cols[marked_counter++] = col;
+            Q.pop_min();
+            row = ip[col];
+        }
+        const auto row_begin = row_ptrs[row];
+        const auto row_end = row_ptrs[row + 1];
+        // Compute the entry of the dual vector v corresponding to row.
+        const auto dual_vi = p[row] == -1 ? zero<ValueType>()
+                                          : weights[idxs[row]] - dual_u[p[row]];
+        for (IndexType idx = row_begin; idx < row_end; idx++) {
+            const auto col = col_idxs[idx];
+            const auto gen = generation[col];
+
+            // col is already marked. Note that root will never be 0 as this row
+            // is guaranteed to already be part of the initial matching.
+            if (gen == -root) {
+                continue;
+            }
+
+            const ValueType dnew = lsp + weights[idx] - dual_u[col] - dual_vi;
+
+            if (dnew < lsap) {
+                if (ip[col] == -1) {
+                    // col is unmatched so we found an augmenting path.
+                    lsap = dnew;
+                    jsap = col;
+                    parents[col] = row;
+                } else {
+                    if ((gen != root || dnew < distance[col]) &&
+                        gen != -num_rows - root) {
+                        distance[col] = dnew;
+                        parents[col] = row;
+                        if (abs(dnew - lsp) < tolerance) {
+                            // dnew is the shortest currently possible distance,
+                            // so col can be put into q_j and be marked
+                            // accordingly.
+                            generation[col] = -num_rows - root;
+                            q_j.push_back(col);
+                        } else if (gen != root) {
+                            // col was not encountered before.
+                            generation[col] = root;
+                            handles[col] = Q.insert(dnew, col);
+                        } else {
+                            // col was already encountered but with larger
+                            // distance on a different path.
+                            generation[col] = root;
+                            Q.update_key(handles[col], dnew);
+                        }
+                    }
+                }
+            }
+        }
+    }
+    if (lsap != inf) {
+        IndexType col = jsap;
+        // Update the matching along the shortest augmenting path.
+        do {
+            row = parents[col];
+            ip[col] = row;
+            auto idx = row_ptrs[row];
+            while (col_idxs[idx] != col) {
+                idx++;
+            }
+            idxs[row] = idx;
+            std::swap(col, p[row]);
+        } while (row != root);
+        // Update the dual vector u.
+        for (size_type i = 0; i < marked_counter; i++) {
+            const auto col = marked_cols[i];
+            dual_u[col] += distance[col] - lsap;
+        }
+    }
+}
+
+
+template <typename ValueType, typename IndexType>
+void augment_matching(
+    const matrix::Csr<ValueType, IndexType>* mtx,
+    array<remove_complex<ValueType>>& weights,
+    array<remove_complex<ValueType>>& dual_u,
+    array<remove_complex<ValueType>>& distance, array<IndexType>& permutation,
+    array<IndexType>& inv_permutation, array<IndexType>& unmatched_rows,
+    array<IndexType>& parents, array<IndexType>& handles,
+    array<IndexType>& generation, array<IndexType>& marked_cols,
+    array<IndexType>& matched_idxs, remove_complex<ValueType> tolerance)
+{
+    const auto num_rows = mtx->get_size()[0];
+    const auto row_ptrs = mtx->get_const_row_ptrs();
+    const auto col_idxs = mtx->get_const_col_idxs();
+    // For each row that is not contained in the initial matching, search for
+    // an augmenting path, update the matching and compute the new entries
+    // of the dual vectors.
+    addressable_priority_queue<remove_complex<ValueType>, IndexType> Q;
+    std::vector<IndexType> q_j{};
+    const auto unmatched = unmatched_rows.get_data();
+    size_type um = 0;
+    auto root = unmatched[um];
+    for (size_type um = 1; root != 0 && um < num_rows; um++) {
+        if (root != -1) {
+            mc64::shortest_augmenting_path(
+                num_rows, row_ptrs, col_idxs, weights, dual_u, distance,
+                permutation, inv_permutation, root, parents, handles,
+                generation, marked_cols, matched_idxs, Q, q_j, tolerance);
+        }
+        root = unmatched[um];
+    }
+}
+
+
+template <typename ValueType, typename IndexType>
+void compute_scaling(const matrix::Csr<ValueType, IndexType>* mtx,
+                     const array<remove_complex<ValueType>>& weights_array,
+                     const array<remove_complex<ValueType>>& dual_u_array,
+                     const array<remove_complex<ValueType>>& row_maxima_array,
+                     const array<IndexType>& permutation,
+                     const array<IndexType>& matched_idxs_array,
+                     mc64_strategy strategy, ValueType* row_scaling,
+                     ValueType* col_scaling)
+{
+    constexpr auto inf =
+        std::numeric_limits<remove_complex<ValueType>>::infinity();
+    const auto nnz = mtx->get_num_stored_elements();
+    const auto num_rows = mtx->get_size()[0];
+    const auto row_ptrs = mtx->get_const_row_ptrs();
+    const auto col_idxs = mtx->get_const_col_idxs();
+    const auto values = mtx->get_const_values();
+    const auto weights = weights_array.get_const_data();
+    const auto dual_u = dual_u_array.get_const_data();
+    const auto row_maxima = row_maxima_array.get_const_data();
+    const auto p = permutation.get_const_data();
+    const auto idxs = matched_idxs_array.get_const_data();
+
+    if (strategy == mc64_strategy::max_diagonal_product) {
+        for (size_type i = 0; i < num_rows; i++) {
+            const remove_complex<ValueType> u_val = std::exp2(dual_u[i]);
+            const remove_complex<ValueType> v_val =
+                std::exp2(weights[idxs[i]] - dual_u[p[i]] - row_maxima[i]);
+            col_scaling[i] = ValueType{u_val};
+            row_scaling[i] = ValueType{v_val};
+        }
+    } else {
+        for (size_type i = 0; i < num_rows; i++) {
+            col_scaling[i] = 1.;
+            row_scaling[i] = 1.;
+        }
+    }
+}
 
 
-}  // anonymous namespace
 }  // namespace mc64
 
 
+namespace {
+
+
+GKO_REGISTER_HOST_OPERATION(initialize_weights, mc64::initialize_weights);
+GKO_REGISTER_HOST_OPERATION(initial_matching, mc64::initial_matching);
+GKO_REGISTER_HOST_OPERATION(augment_matching, mc64::augment_matching);
+GKO_REGISTER_HOST_OPERATION(compute_scaling, mc64::compute_scaling);
+GKO_REGISTER_OPERATION(fill_seq_array, components::fill_seq_array);
+
+
+}  // namespace
+
+
+template <typename ValueType, typename IndexType>
+std::unique_ptr<Composition<ValueType>> Mc64<ValueType, IndexType>::generate(
+    std::shared_ptr<const LinOp> system_matrix) const
+{
+    auto product = std::unique_ptr<Composition<ValueType>>(
+        static_cast<Composition<ValueType>*>(
+            this->LinOpFactory::generate(std::move(system_matrix)).release()));
+    return product;
+}
+
+
 template <typename ValueType, typename IndexType>
-void Mc64<ValueType, IndexType>::generate(std::shared_ptr<const Executor>& exec,
-                                          std::shared_ptr<LinOp> system_matrix)
+Mc64<ValueType, IndexType>::Mc64(std::shared_ptr<const Executor> exec,
+                                 const parameters_type& params)
+    : EnablePolymorphicObject<Mc64, LinOpFactory>(std::move(exec)),
+      parameters_{params}
+{}
+
+
+template <typename ValueType, typename IndexType>
+std::unique_ptr<LinOp> Mc64<ValueType, IndexType>::generate_impl(
+    std::shared_ptr<const LinOp> system_matrix) const
 {
-    auto mtx = as<matrix_type>(system_matrix);
-    size_type num_rows = mtx->get_size()[0];
-    size_type nnz = mtx->get_num_stored_elements();
+    const auto exec = this->get_executor();
+    const auto host_exec = exec->get_master();
+    const auto mtx =
+        copy_and_convert_to<matrix_type>(host_exec, system_matrix.get());
+    const auto num_rows = mtx->get_size()[0];
+    const auto nnz = mtx->get_num_stored_elements();
 
     // Real valued arrays with space for:
     //     - nnz entries for weights
     //     - num_rows entries each for the dual vector u, distance information
     //       and the max weight per row
-    array<remove_complex<ValueType>> weights{exec, nnz};
-    array<remove_complex<ValueType>> dual_u{exec, num_rows};
-    array<remove_complex<ValueType>> distance{exec, num_rows};
-    array<remove_complex<ValueType>> row_maxima{exec, num_rows};
+    array<remove_complex<ValueType>> weights{host_exec, nnz};
+    array<remove_complex<ValueType>> dual_u{host_exec, num_rows};
+    array<remove_complex<ValueType>> distance{host_exec, num_rows};
+    array<remove_complex<ValueType>> row_maxima{host_exec, num_rows};
     // Zero initialized index arrays with space for n entries each for parent
     // information, priority queue handles, generation information, marked
     // columns, indices corresponding to matched columns in the according row
     // and still unmatched rows
-    array<IndexType> parents{exec, num_rows};
-    array<IndexType> handles{exec, num_rows};
-    array<IndexType> generation{exec, num_rows};
-    array<IndexType> marked_cols{exec, num_rows};
-    array<IndexType> matched_idxs{exec, num_rows};
-    array<IndexType> unmatched_rows{exec, num_rows};
+    array<IndexType> parents{host_exec, num_rows};
+    array<IndexType> handles{host_exec, num_rows};
+    array<IndexType> generation{host_exec, num_rows};
+    array<IndexType> marked_cols{host_exec, num_rows};
+    array<IndexType> matched_idxs{host_exec, num_rows};
+    array<IndexType> unmatched_rows{host_exec, num_rows};
+    array<ValueType> row_scaling{host_exec, num_rows};
+    array<ValueType> col_scaling{host_exec, num_rows};
     parents.fill(0);
     handles.fill(0);
     generation.fill(0);
@@ -102,57 +563,43 @@ void Mc64<ValueType, IndexType>::generate(std::shared_ptr<const Executor>& exec,
     matched_idxs.fill(0);
     unmatched_rows.fill(0);
 
-    array<IndexType> permutation{exec, num_rows};
-    array<IndexType> inv_permutation{exec, num_rows};
+    array<IndexType> permutation{host_exec, num_rows};
+    array<IndexType> inv_permutation{host_exec, num_rows};
     permutation.fill(-one<IndexType>());
     inv_permutation.fill(-one<IndexType>());
 
     const auto row_ptrs = mtx->get_const_row_ptrs();
     const auto col_idxs = mtx->get_const_col_idxs();
 
-    exec->run(mc64::make_initialize_weights(mtx.get(), weights, dual_u,
-                                            distance, row_maxima,
-                                            parameters_.strategy));
+    exec->run(make_initialize_weights(mtx.get(), weights, dual_u, distance,
+                                      row_maxima, parameters_.strategy));
 
     // Compute an initial maximum matching from the nonzero entries for which
     // the reduced weight (W(i, j) - u(j) - v(i)) is zero. Here, W is the
     // weight matrix and u and v are the dual vectors. Note that v initially
     // only contains zeros and hence can still be ignored here.
-    exec->run(mc64::make_initial_matching(
+    exec->run(make_initial_matching(
         num_rows, row_ptrs, col_idxs, weights, dual_u, permutation,
         inv_permutation, matched_idxs, unmatched_rows, parameters_.tolerance));
 
-    // For each row that is not contained in the initial matching, search for
-    // an augmenting path, update the matching and compute the new entries
-    // of the dual vectors.
-    addressable_priority_queue<remove_complex<ValueType>, IndexType> Q{
-        parameters_.deg_log2};
-    std::vector<IndexType> q_j{};
-    const auto unmatched = unmatched_rows.get_data();
-    auto um = 0;
-    auto root = unmatched[um];
-    while (root != 0 && um < num_rows) {
-        if (root != -1) {
-            exec->run(mc64::make_shortest_augmenting_path(
-                num_rows, row_ptrs, col_idxs, weights, dual_u, distance,
-                permutation, inv_permutation, root, parents, handles,
-                generation, marked_cols, matched_idxs, Q, q_j,
-                parameters_.tolerance));
-        }
-        root = unmatched[++um];
-    }
-
-    permutation_ = std::move(share(
-        PermutationMatrix::create(exec, system_matrix->get_size(),
-                                  inv_permutation, gko::matrix::row_permute)));
-    inv_permutation_ = std::move(share(PermutationMatrix::create(
-        exec, system_matrix->get_size(), permutation, matrix::row_permute)));
-    row_scaling_ = std::move(DiagonalMatrix::create(exec, num_rows));
-    col_scaling_ = std::move(DiagonalMatrix::create(exec, num_rows));
+    exec->run(make_augment_matching(
+        mtx.get(), weights, dual_u, distance, permutation, inv_permutation,
+        unmatched_rows, parents, handles, generation, marked_cols, matched_idxs,
+        this->get_parameters().tolerance));
 
-    exec->run(mc64::make_compute_scaling(
+    exec->run(make_compute_scaling(
         mtx.get(), weights, dual_u, row_maxima, permutation, matched_idxs,
-        parameters_.strategy, row_scaling_.get(), col_scaling_.get()));
+        parameters_.strategy, row_scaling.get_data(), col_scaling.get_data()));
+
+    array<index_type> identity_permutation{exec, num_rows};
+    exec->run(make_fill_seq_array(identity_permutation.get_data(), num_rows));
+
+    using perm_type = gko::matrix::ScaledPermutation<ValueType, IndexType>;
+    return result_type::create(
+        perm_type::create(exec, std::move(row_scaling),
+                          std::move(inv_permutation)),
+        perm_type::create(exec, std::move(col_scaling),
+                          std::move(identity_permutation)));
 }
 
 
@@ -161,4 +608,5 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_MC64);
 
 
 }  // namespace reorder
+}  // namespace experimental
 }  // namespace gko
diff --git a/hip/reorder/mc64_kernels.hip.cpp b/core/reorder/mc64.hpp
similarity index 61%
rename from hip/reorder/mc64_kernels.hip.cpp
rename to core/reorder/mc64.hpp
index 8c2d9e5bcb7..59adf8fe97b 100644
--- a/hip/reorder/mc64_kernels.hip.cpp
+++ b/core/reorder/mc64.hpp
@@ -30,59 +30,46 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include "core/reorder/mc64_kernels.hpp"
+#ifndef GKO_CORE_REORDER_MC64_HPP_
+#define GKO_CORE_REORDER_MC64_HPP_
+
+#include <ginkgo/core/reorder/mc64.hpp>
 
 
 #include <ginkgo/core/base/array.hpp>
-#include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/matrix/csr.hpp>
 
 
+#include "core/components/addressable_pq.hpp"
+
+
 namespace gko {
-namespace kernels {
-namespace hip {
-/**
- * @brief The reordering namespace.
- *
- * @ingroup reorder
- */
+namespace experimental {
+namespace reorder {
 namespace mc64 {
 
 
 template <typename ValueType, typename IndexType>
-void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
-                        const matrix::Csr<ValueType, IndexType>* mtx,
+void initialize_weights(const matrix::Csr<ValueType, IndexType>* mtx,
                         array<remove_complex<ValueType>>& weights_array,
                         array<remove_complex<ValueType>>& dual_u_array,
                         array<remove_complex<ValueType>>& distance_array,
                         array<remove_complex<ValueType>>& row_maxima_array,
-                        gko::reorder::mc64_strategy strategy)
-    GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_MC64_INITIALIZE_WEIGHTS_KERNEL);
+                        gko::experimental::reorder::mc64_strategy strategy);
 
 
 template <typename ValueType, typename IndexType>
-void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
-                      size_type num_rows, const IndexType* row_ptrs,
-                      const IndexType* col_idxs,
-                      const array<ValueType>& weights_array,
-                      const array<ValueType>& dual_u_array,
-                      array<IndexType>& permutation,
-                      array<IndexType>& inv_permutation,
-                      array<IndexType>& matched_idxs_array,
-                      array<IndexType>& unmatched_rows_array,
-                      ValueType tolerance) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL);
+void initial_matching(
+    size_type num_rows, const IndexType* row_ptrs, const IndexType* col_idxs,
+    const array<ValueType>& weights_array, const array<ValueType>& dual_u_array,
+    array<IndexType>& permutation, array<IndexType>& inv_permutation,
+    array<IndexType>& matched_idxs_array,
+    array<IndexType>& unmatched_rows_array, ValueType tolerance);
 
 
 template <typename ValueType, typename IndexType>
 void shortest_augmenting_path(
-    std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,
-    const IndexType* row_ptrs, const IndexType* col_idxs,
+    size_type num_rows, const IndexType* row_ptrs, const IndexType* col_idxs,
     array<ValueType>& weights_array, array<ValueType>& dual_u_array,
     array<ValueType>& distance_array, array<IndexType>& permutation,
     array<IndexType>& inv_permutation, IndexType root,
@@ -90,30 +77,23 @@ void shortest_augmenting_path(
     array<IndexType>& generation_array, array<IndexType>& marked_cols_array,
     array<IndexType>& matched_idxs_array,
     addressable_priority_queue<ValueType, IndexType>& Q,
-    std::vector<IndexType>& q_j, ValueType tolerance) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL);
+    std::vector<IndexType>& q_j, ValueType tolerance);
 
 
 template <typename ValueType, typename IndexType>
-void compute_scaling(std::shared_ptr<const DefaultExecutor> exec,
-                     const matrix::Csr<ValueType, IndexType>* mtx,
+void compute_scaling(const matrix::Csr<ValueType, IndexType>* mtx,
                      const array<remove_complex<ValueType>>& weights_array,
                      const array<remove_complex<ValueType>>& dual_u_array,
                      const array<remove_complex<ValueType>>& row_maxima_array,
                      const array<IndexType>& permutation,
                      const array<IndexType>& matched_idxs_array,
-                     gko::reorder::mc64_strategy strategy,
-                     gko::matrix::Diagonal<ValueType>* row_scaling,
-                     gko::matrix::Diagonal<ValueType>* col_scaling)
-    GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL);
+                     mc64_strategy strategy, ValueType* row_scaling,
+                     ValueType* col_scaling);
 
 
 }  // namespace mc64
-}  // namespace hip
-}  // namespace kernels
+}  // namespace reorder
+}  // namespace experimental
 }  // namespace gko
+
+#endif  // GKO_CORE_REORDER_MC64_HPP_
diff --git a/core/reorder/mc64_kernels.hpp b/core/reorder/mc64_kernels.hpp
deleted file mode 100644
index aa8a36d07cf..00000000000
--- a/core/reorder/mc64_kernels.hpp
+++ /dev/null
@@ -1,129 +0,0 @@
-/*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2023, the Ginkgo authors
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
-1. Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
-notice, this list of conditions and the following disclaimer in the
-documentation and/or other materials provided with the distribution.
-
-3. Neither the name of the copyright holder nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-******************************<GINKGO LICENSE>*******************************/
-
-#ifndef GKO_CORE_REORDER_MC64_KERNELS_HPP_
-#define GKO_CORE_REORDER_MC64_KERNELS_HPP_
-
-
-#include <ginkgo/core/reorder/mc64.hpp>
-
-
-#include <list>
-#include <memory>
-
-
-#include <ginkgo/core/base/executor.hpp>
-#include <ginkgo/core/base/types.hpp>
-#include <ginkgo/core/matrix/csr.hpp>
-
-
-#include "core/base/kernel_declaration.hpp"
-#include "core/components/addressable_pq.hpp"
-
-
-namespace gko {
-namespace kernels {
-
-
-#define GKO_DECLARE_MC64_INITIALIZE_WEIGHTS_KERNEL(ValueType, IndexType) \
-    void initialize_weights(                                             \
-        std::shared_ptr<const DefaultExecutor> exec,                     \
-        const matrix::Csr<ValueType, IndexType>* mtx,                    \
-        array<remove_complex<ValueType>>& weights_array,                 \
-        array<remove_complex<ValueType>>& dual_u_array,                  \
-        array<remove_complex<ValueType>>& distance_array,                \
-        array<remove_complex<ValueType>>& row_maxima_array,              \
-        gko::reorder::mc64_strategy strategy)
-
-
-#define GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL(ValueType, IndexType)       \
-    void initial_matching(                                                   \
-        std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,     \
-        const IndexType* row_ptrs, const IndexType* col_idxs,                \
-        const array<ValueType>& weights_array,                               \
-        const array<ValueType>& dual_u_array, array<IndexType>& permutation, \
-        array<IndexType>& inv_permutation,                                   \
-        array<IndexType>& matched_idxs_array,                                \
-        array<IndexType>& unmatched_rows_array, ValueType tolerance)
-
-
-#define GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL(ValueType, IndexType) \
-    void shortest_augmenting_path(                                             \
-        std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,       \
-        const IndexType* row_ptrs, const IndexType* col_idxs,                  \
-        array<ValueType>& weights_array, array<ValueType>& dual_u_array,       \
-        array<ValueType>& distance_array, array<IndexType>& permutation,       \
-        array<IndexType>& inv_permutation, IndexType root,                     \
-        array<IndexType>& parents_array, array<IndexType>& handles_array,      \
-        array<IndexType>& generation_array,                                    \
-        array<IndexType>& marked_cols_array,                                   \
-        array<IndexType>& matched_idxs_array,                                  \
-        addressable_priority_queue<ValueType, IndexType>& Q,                   \
-        std::vector<IndexType>& q_j, ValueType tolerance)
-
-
-#define GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL(ValueType, IndexType) \
-    void compute_scaling(                                             \
-        std::shared_ptr<const DefaultExecutor> exec,                  \
-        const matrix::Csr<ValueType, IndexType>* mtx,                 \
-        const array<remove_complex<ValueType>>& weights_array,        \
-        const array<remove_complex<ValueType>>& dual_u_array,         \
-        const array<remove_complex<ValueType>>& row_maxima_array,     \
-        const array<IndexType>& permutation,                          \
-        const array<IndexType>& matched_idxs_array,                   \
-        gko::reorder::mc64_strategy strategy,                         \
-        gko::matrix::Diagonal<ValueType>* row_scaling,                \
-        gko::matrix::Diagonal<ValueType>* col_scaling)
-
-
-#define GKO_DECLARE_ALL_AS_TEMPLATES                                        \
-    template <typename ValueType, typename IndexType>                       \
-    GKO_DECLARE_MC64_INITIALIZE_WEIGHTS_KERNEL(ValueType, IndexType);       \
-    template <typename ValueType, typename IndexType>                       \
-    GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL(ValueType, IndexType);         \
-    template <typename ValueType, typename IndexType>                       \
-    GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL(ValueType, IndexType); \
-    template <typename ValueType, typename IndexType>                       \
-    GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL(ValueType, IndexType)
-
-
-GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(mc64, GKO_DECLARE_ALL_AS_TEMPLATES);
-
-
-#undef GKO_DECLARE_ALL_AS_TEMPLATES
-
-
-}  // namespace kernels
-}  // namespace gko
-
-
-#endif  // GKO_CORE_REORDER_MC64_KERNELS_HPP_
diff --git a/core/test/components/addressable_pq.cpp b/core/test/components/addressable_pq.cpp
index 93531578afb..3d02bc40cd0 100644
--- a/core/test/components/addressable_pq.cpp
+++ b/core/test/components/addressable_pq.cpp
@@ -56,11 +56,13 @@ class AddressablePriorityQueue : public ::testing::Test {
         typename std::tuple_element<0, decltype(ValueIndexType())>::type;
     using index_type =
         typename std::tuple_element<1, decltype(ValueIndexType())>::type;
-    using pq_type = gko::addressable_priority_queue<value_type, index_type>;
+    using pq_type2 = gko::addressable_priority_queue<value_type, index_type, 1>;
+    using pq_type4 = gko::addressable_priority_queue<value_type, index_type, 2>;
 
     AddressablePriorityQueue() : exec(gko::ReferenceExecutor::create()) {}
 
-    void assert_min(pq_type pq, value_type key, index_type val)
+    template <typename PQType>
+    void assert_min(const PQType& pq, value_type key, index_type val)
     {
         ASSERT_EQ(pq.min_key(), key);
         ASSERT_EQ(pq.min_val(), val);
@@ -68,8 +70,11 @@ class AddressablePriorityQueue : public ::testing::Test {
         ASSERT_FALSE(pq.empty());
     }
 
-    void test_pq_functionality(pq_type& pq)
+    template <typename PQType>
+    void test_pq_functionality()
     {
+        PQType pq;
+
         pq.insert(value_type{.5}, 1);
         ASSERT_EQ(pq.size(), 1);
         assert_min(pq, .5, 1);
@@ -124,8 +129,8 @@ TYPED_TEST_SUITE(AddressablePriorityQueue, gko::test::RealValueIndexTypes,
 
 TYPED_TEST(AddressablePriorityQueue, InitializesCorrectly)
 {
-    using pq_type = typename TestFixture::pq_type;
-    pq_type pq{4};
+    using pq_type = typename TestFixture::pq_type2;
+    pq_type pq;
 
     ASSERT_EQ(pq.size(), 0);
     ASSERT_TRUE(pq.empty());
@@ -134,19 +139,13 @@ TYPED_TEST(AddressablePriorityQueue, InitializesCorrectly)
 
 TYPED_TEST(AddressablePriorityQueue, WorksWithDegree2)
 {
-    using pq_type = typename TestFixture::pq_type;
-    pq_type pq{2};
-
-    this->test_pq_functionality(pq);
+    this->template test_pq_functionality<typename TestFixture::pq_type2>();
 }
 
 
 TYPED_TEST(AddressablePriorityQueue, WorksWithDegree4)
 {
-    using pq_type = typename TestFixture::pq_type;
-    pq_type pq{4};
-
-    this->test_pq_functionality(pq);
+    this->template test_pq_functionality<typename TestFixture::pq_type4>();
 }
 
 
diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt
index b7340106f7d..1efa8192aeb 100644
--- a/cuda/CMakeLists.txt
+++ b/cuda/CMakeLists.txt
@@ -55,7 +55,6 @@ target_sources(ginkgo_cuda
     preconditioner/jacobi_generate_kernel.cu
     preconditioner/jacobi_kernels.cu
     preconditioner/jacobi_simple_apply_kernel.cu
-    reorder/mc64_kernels.cu
     reorder/rcm_kernels.cu
     solver/batch_bicgstab_kernels.cu
     solver/cb_gmres_kernels.cu
diff --git a/cuda/reorder/mc64_kernels.cu b/cuda/reorder/mc64_kernels.cu
deleted file mode 100644
index 7b5c0a9d994..00000000000
--- a/cuda/reorder/mc64_kernels.cu
+++ /dev/null
@@ -1,119 +0,0 @@
-/*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2023, the Ginkgo authors
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
-1. Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
-notice, this list of conditions and the following disclaimer in the
-documentation and/or other materials provided with the distribution.
-
-3. Neither the name of the copyright holder nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-******************************<GINKGO LICENSE>*******************************/
-
-#include "core/reorder/mc64_kernels.hpp"
-
-
-#include <ginkgo/core/base/array.hpp>
-#include <ginkgo/core/base/types.hpp>
-#include <ginkgo/core/matrix/csr.hpp>
-
-
-namespace gko {
-namespace kernels {
-namespace cuda {
-/**
- * @brief The reordering namespace.
- *
- * @ingroup reorder
- */
-namespace mc64 {
-
-
-template <typename ValueType, typename IndexType>
-void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
-                        const matrix::Csr<ValueType, IndexType>* mtx,
-                        array<remove_complex<ValueType>>& weights_array,
-                        array<remove_complex<ValueType>>& dual_u_array,
-                        array<remove_complex<ValueType>>& distance_array,
-                        array<remove_complex<ValueType>>& row_maxima_array,
-                        gko::reorder::mc64_strategy strategy)
-    GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_MC64_INITIALIZE_WEIGHTS_KERNEL);
-
-
-template <typename ValueType, typename IndexType>
-void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
-                      size_type num_rows, const IndexType* row_ptrs,
-                      const IndexType* col_idxs,
-                      const array<ValueType>& weights_array,
-                      const array<ValueType>& dual_u_array,
-                      array<IndexType>& permutation,
-                      array<IndexType>& inv_permutation,
-                      array<IndexType>& matched_idxs_array,
-                      array<IndexType>& unmatched_rows_array,
-                      ValueType tolerance) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL);
-
-
-template <typename ValueType, typename IndexType>
-void shortest_augmenting_path(
-    std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,
-    const IndexType* row_ptrs, const IndexType* col_idxs,
-    array<ValueType>& weights_array, array<ValueType>& dual_u_array,
-    array<ValueType>& distance_array, array<IndexType>& permutation,
-    array<IndexType>& inv_permutation, IndexType root,
-    array<IndexType>& parents_array, array<IndexType>& handles_array,
-    array<IndexType>& generation_array, array<IndexType>& marked_cols_array,
-    array<IndexType>& matched_idxs_array,
-    addressable_priority_queue<ValueType, IndexType>& Q,
-    std::vector<IndexType>& q_j, ValueType tolerance) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL);
-
-
-template <typename ValueType, typename IndexType>
-void compute_scaling(std::shared_ptr<const DefaultExecutor> exec,
-                     const matrix::Csr<ValueType, IndexType>* mtx,
-                     const array<remove_complex<ValueType>>& weights_array,
-                     const array<remove_complex<ValueType>>& dual_u_array,
-                     const array<remove_complex<ValueType>>& row_maxima_array,
-                     const array<IndexType>& permutation,
-                     const array<IndexType>& matched_idxs_array,
-                     gko::reorder::mc64_strategy strategy,
-                     gko::matrix::Diagonal<ValueType>* row_scaling,
-                     gko::matrix::Diagonal<ValueType>* col_scaling)
-    GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL);
-
-
-}  // namespace mc64
-}  // namespace cuda
-}  // namespace kernels
-}  // namespace gko
diff --git a/cuda/test/reorder/mc64_kernels.cpp b/cuda/test/reorder/mc64_kernels.cpp
deleted file mode 100644
index 8e452feab3e..00000000000
--- a/cuda/test/reorder/mc64_kernels.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-/*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2023, the Ginkgo authors
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
-1. Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
-notice, this list of conditions and the following disclaimer in the
-documentation and/or other materials provided with the distribution.
-
-3. Neither the name of the copyright holder nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-******************************<GINKGO LICENSE>*******************************/
-
-#include <ginkgo/core/reorder/mc64.hpp>
-
-
-#include <gtest/gtest.h>
-
-
-#include "core/test/utils/assertions.hpp"
-
-
-namespace {
-
-
-class Mc64 : public ::testing::Test {
-protected:
-    using v_type = double;
-    using i_type = int;
-    using CsrMtx = gko::matrix::Csr<v_type, i_type>;
-    using reorder_type = gko::reorder::Mc64<v_type, i_type>;
-    using perm_type = gko::matrix::Permutation<i_type>;
-
-
-    Mc64()
-        : exec(gko::CudaExecutor::create(0, gko::ReferenceExecutor::create(),
-                                         true)),
-          // clang-format off
-          p_mtx(gko::initialize<CsrMtx>({{1.0, 2.0, 0.0, -1.3, 2.1},
-                                         {2.0, 5.0, 1.5, 0.0, 0.0},
-                                         {0.0, 1.5, 1.5, 1.1, 0.0},
-                                         {-1.3, 0.0, 1.1, 2.0, 0.0},
-                                         {2.1, 0.0, 0.0, 0.0, 1.0}},
-                                        exec)),
-          // clang-format on
-          mc64_factory(reorder_type::build().on(exec)),
-          reorder_op(mc64_factory->generate(p_mtx))
-    {}
-
-    std::shared_ptr<const gko::Executor> exec;
-    std::unique_ptr<reorder_type::Factory> mc64_factory;
-    std::shared_ptr<CsrMtx> p_mtx;
-    std::unique_ptr<reorder_type> reorder_op;
-};
-
-
-TEST_F(Mc64, IsExecutedOnReferenceExecutor)
-{
-    // This only executes successfully if computed on reference executor.
-    auto p = reorder_op->get_permutation();
-
-    ASSERT_TRUE(true);
-}
-
-
-}  // namespace
diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt
index b53505d8ca8..7499bca97a5 100644
--- a/dpcpp/CMakeLists.txt
+++ b/dpcpp/CMakeLists.txt
@@ -53,7 +53,6 @@ target_sources(ginkgo_dpcpp
     preconditioner/jacobi_generate_kernel.dp.cpp
     preconditioner/jacobi_kernels.dp.cpp
     preconditioner/jacobi_simple_apply_kernel.dp.cpp
-    reorder/mc64_kernels.dp.cpp
     reorder/rcm_kernels.dp.cpp
     solver/batch_bicgstab_kernels.dp.cpp
     solver/cb_gmres_kernels.dp.cpp
diff --git a/dpcpp/reorder/mc64_kernels.dp.cpp b/dpcpp/reorder/mc64_kernels.dp.cpp
deleted file mode 100644
index 9eb6dea8478..00000000000
--- a/dpcpp/reorder/mc64_kernels.dp.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-/*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2023, the Ginkgo authors
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
-1. Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
-notice, this list of conditions and the following disclaimer in the
-documentation and/or other materials provided with the distribution.
-
-3. Neither the name of the copyright holder nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-******************************<GINKGO LICENSE>*******************************/
-
-#include "core/reorder/mc64_kernels.hpp"
-
-
-#include <ginkgo/core/base/array.hpp>
-#include <ginkgo/core/base/types.hpp>
-#include <ginkgo/core/matrix/csr.hpp>
-
-
-namespace gko {
-namespace kernels {
-namespace dpcpp {
-/**
- * @brief The reordering namespace.
- *
- * @ingroup reorder
- */
-namespace mc64 {
-
-
-template <typename ValueType, typename IndexType>
-void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
-                        const matrix::Csr<ValueType, IndexType>* mtx,
-                        array<remove_complex<ValueType>>& weights_array,
-                        array<remove_complex<ValueType>>& dual_u_array,
-                        array<remove_complex<ValueType>>& distance_array,
-                        array<remove_complex<ValueType>>& row_maxima_array,
-                        gko::reorder::mc64_strategy strategy)
-    GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_MC64_INITIALIZE_WEIGHTS_KERNEL);
-
-
-template <typename ValueType, typename IndexType>
-void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
-                      size_type num_rows, const IndexType* row_ptrs,
-                      const IndexType* col_idxs,
-                      const array<ValueType>& weights_array,
-                      const array<ValueType>& dual_u_array,
-                      array<IndexType>& permutation,
-                      array<IndexType>& inv_permutation,
-                      array<IndexType>& matched_idxs_array,
-                      array<IndexType>& unmatched_rows_array,
-                      ValueType tolerance) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL);
-
-
-template <typename ValueType, typename IndexType>
-void shortest_augmenting_path(
-    std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,
-    const IndexType* row_ptrs, const IndexType* col_idxs,
-    array<ValueType>& weights_array, array<ValueType>& dual_u_array,
-    array<ValueType>& distance_array, array<IndexType>& permutation,
-    array<IndexType>& inv_permutation, IndexType root,
-    array<IndexType>& parents_array, array<IndexType>& handles_array,
-    array<IndexType>& generation_array, array<IndexType>& marked_cols_array,
-    array<IndexType>& matched_idxs_array,
-    addressable_priority_queue<ValueType, IndexType>& Q,
-    std::vector<IndexType>& q_j, ValueType tolerance) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL);
-
-
-template <typename ValueType, typename IndexType>
-void compute_scaling(std::shared_ptr<const DefaultExecutor> exec,
-                     const matrix::Csr<ValueType, IndexType>* mtx,
-                     const array<remove_complex<ValueType>>& weights_array,
-                     const array<remove_complex<ValueType>>& dual_u_array,
-                     const array<remove_complex<ValueType>>& row_maxima_array,
-                     const array<IndexType>& permutation,
-                     const array<IndexType>& matched_idxs_array,
-                     gko::reorder::mc64_strategy strategy,
-                     gko::matrix::Diagonal<ValueType>* row_scaling,
-                     gko::matrix::Diagonal<ValueType>* col_scaling)
-    GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL);
-
-
-}  // namespace mc64
-}  // namespace dpcpp
-}  // namespace kernels
-}  // namespace gko
diff --git a/dpcpp/test/CMakeLists.txt b/dpcpp/test/CMakeLists.txt
index 550f21ace18..b882f57715e 100644
--- a/dpcpp/test/CMakeLists.txt
+++ b/dpcpp/test/CMakeLists.txt
@@ -5,4 +5,3 @@ add_subdirectory(base)
 add_subdirectory(components)
 add_subdirectory(matrix)
 add_subdirectory(preconditioner)
-add_subdirectory(reorder)
diff --git a/dpcpp/test/reorder/CMakeLists.txt b/dpcpp/test/reorder/CMakeLists.txt
deleted file mode 100644
index 1bbf057a529..00000000000
--- a/dpcpp/test/reorder/CMakeLists.txt
+++ /dev/null
@@ -1 +0,0 @@
-ginkgo_create_test(mc64_kernels)
diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt
index 5dbd6030a23..cb193920edc 100644
--- a/hip/CMakeLists.txt
+++ b/hip/CMakeLists.txt
@@ -51,7 +51,6 @@ set(GINKGO_HIP_SOURCES
     preconditioner/jacobi_generate_kernel.hip.cpp
     preconditioner/jacobi_kernels.hip.cpp
     preconditioner/jacobi_simple_apply_kernel.hip.cpp
-    reorder/mc64_kernels.hip.cpp
     reorder/rcm_kernels.hip.cpp
     solver/batch_bicgstab_kernels.hip.cpp
     solver/cb_gmres_kernels.hip.cpp
diff --git a/hip/test/CMakeLists.txt b/hip/test/CMakeLists.txt
index 050994a50c4..f046f9b86ca 100644
--- a/hip/test/CMakeLists.txt
+++ b/hip/test/CMakeLists.txt
@@ -3,6 +3,5 @@ include(${PROJECT_SOURCE_DIR}/cmake/create_test.cmake)
 add_subdirectory(base)
 add_subdirectory(components)
 add_subdirectory(matrix)
-add_subdirectory(reorder)
 add_subdirectory(solver)
 add_subdirectory(utils)
diff --git a/hip/test/reorder/CMakeLists.txt b/hip/test/reorder/CMakeLists.txt
deleted file mode 100644
index 1bbf057a529..00000000000
--- a/hip/test/reorder/CMakeLists.txt
+++ /dev/null
@@ -1 +0,0 @@
-ginkgo_create_test(mc64_kernels)
diff --git a/hip/test/reorder/mc64_kernels.cpp b/hip/test/reorder/mc64_kernels.cpp
deleted file mode 100644
index 8b0c0ab060e..00000000000
--- a/hip/test/reorder/mc64_kernels.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-/*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2023, the Ginkgo authors
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
-1. Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
-notice, this list of conditions and the following disclaimer in the
-documentation and/or other materials provided with the distribution.
-
-3. Neither the name of the copyright holder nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-******************************<GINKGO LICENSE>*******************************/
-
-#include <ginkgo/core/reorder/mc64.hpp>
-
-
-#include <gtest/gtest.h>
-
-
-#include "core/test/utils/assertions.hpp"
-
-
-namespace {
-
-
-class Mc64 : public ::testing::Test {
-protected:
-    using v_type = double;
-    using i_type = int;
-    using CsrMtx = gko::matrix::Csr<v_type, i_type>;
-    using reorder_type = gko::reorder::Mc64<v_type, i_type>;
-    using perm_type = gko::matrix::Permutation<i_type>;
-
-
-    Mc64()
-        : exec(gko::HipExecutor::create(0, gko::ReferenceExecutor::create(),
-                                        true)),
-          // clang-format off
-          p_mtx(gko::initialize<CsrMtx>({{1.0, 2.0, 0.0, -1.3, 2.1},
-                                         {2.0, 5.0, 1.5, 0.0, 0.0},
-                                         {0.0, 1.5, 1.5, 1.1, 0.0},
-                                         {-1.3, 0.0, 1.1, 2.0, 0.0},
-                                         {2.1, 0.0, 0.0, 0.0, 1.0}},
-                                        exec)),
-          // clang-format on
-          mc64_factory(reorder_type::build().on(exec)),
-          reorder_op(mc64_factory->generate(p_mtx))
-    {}
-
-    std::shared_ptr<const gko::Executor> exec;
-    std::unique_ptr<reorder_type::Factory> mc64_factory;
-    std::shared_ptr<CsrMtx> p_mtx;
-    std::unique_ptr<reorder_type> reorder_op;
-};
-
-
-TEST_F(Mc64, IsExecutedOnReferenceExecutor)
-{
-    // This only executes successfully if computed on reference executor.
-    auto p = reorder_op->get_permutation();
-
-    ASSERT_TRUE(true);
-}
-
-
-}  // namespace
diff --git a/include/ginkgo/core/factorization/cholesky.hpp b/include/ginkgo/core/factorization/cholesky.hpp
index 3b40cf45f71..0db17e1de78 100644
--- a/include/ginkgo/core/factorization/cholesky.hpp
+++ b/include/ginkgo/core/factorization/cholesky.hpp
@@ -96,6 +96,11 @@ class Cholesky
         bool GKO_FACTORY_PARAMETER_SCALAR(skip_sorting, false);
     };
 
+    /**
+     * Returns the parameters used to construct the factory.
+     */
+    const parameters_type& get_parameters() { return parameters_; }
+
     /**
      * @copydoc LinOpFactory::generate
      * @note This function overrides the default LinOpFactory::generate to
diff --git a/include/ginkgo/core/factorization/lu.hpp b/include/ginkgo/core/factorization/lu.hpp
index f7049bc0131..e8f629de9e7 100644
--- a/include/ginkgo/core/factorization/lu.hpp
+++ b/include/ginkgo/core/factorization/lu.hpp
@@ -126,6 +126,11 @@ class Lu
         bool GKO_FACTORY_PARAMETER_SCALAR(skip_sorting, false);
     };
 
+    /**
+     * Returns the parameters used to construct the factory.
+     */
+    const parameters_type& get_parameters() { return parameters_; }
+
     /**
      * @copydoc LinOpFactory::generate
      * @note This function overrides the default LinOpFactory::generate to
diff --git a/include/ginkgo/core/reorder/amd.hpp b/include/ginkgo/core/reorder/amd.hpp
index f15aa7ff0f0..917c6713f56 100644
--- a/include/ginkgo/core/reorder/amd.hpp
+++ b/include/ginkgo/core/reorder/amd.hpp
@@ -88,6 +88,11 @@ class Amd : public EnablePolymorphicObject<Amd<IndexType>, LinOpFactory>,
         bool GKO_FACTORY_PARAMETER_SCALAR(skip_sorting, false);
     };
 
+    /**
+     * Returns the parameters used to construct the factory.
+     */
+    const parameters_type& get_parameters() { return parameters_; }
+
     /**
      * @copydoc LinOpFactory::generate
      * @note This function overrides the default LinOpFactory::generate to
diff --git a/include/ginkgo/core/reorder/mc64.hpp b/include/ginkgo/core/reorder/mc64.hpp
index efcc6a995f7..5af01d3cabc 100644
--- a/include/ginkgo/core/reorder/mc64.hpp
+++ b/include/ginkgo/core/reorder/mc64.hpp
@@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/abstract_factory.hpp>
+#include <ginkgo/core/base/composition.hpp>
 #include <ginkgo/core/base/dim.hpp>
 #include <ginkgo/core/base/lin_op.hpp>
 #include <ginkgo/core/base/polymorphic_object.hpp>
@@ -52,6 +53,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 namespace gko {
+namespace experimental {
 /**
  * @brief The Reorder namespace.
  *
@@ -102,64 +104,23 @@ enum class mc64_strategy { max_diagonal_product, max_diagonal_sum };
  * @tparam IndexType  Type of the indices of all matrices used in this class
  */
 template <typename ValueType = default_precision, typename IndexType = int32>
-class Mc64 : public EnablePolymorphicObject<Mc64<ValueType, IndexType>,
-                                            ReorderingBase<IndexType>>,
-             public EnablePolymorphicAssignment<Mc64<ValueType, IndexType>> {
-    friend class EnablePolymorphicObject<Mc64, ReorderingBase<IndexType>>;
-
+class Mc64
+    : public EnablePolymorphicObject<Mc64<ValueType, IndexType>, LinOpFactory>,
+      public EnablePolymorphicAssignment<Mc64<ValueType, IndexType>> {
 public:
-    using matrix_type = matrix::Csr<ValueType, IndexType>;
-    using PermutationMatrix = matrix::Permutation<IndexType>;
-    using DiagonalMatrix = matrix::Diagonal<ValueType>;
+    struct parameters_type;
+    friend class EnablePolymorphicObject<Mc64<ValueType, IndexType>,
+                                         LinOpFactory>;
+    friend class enable_parameters_type<parameters_type,
+                                        Mc64<ValueType, IndexType>>;
+
     using value_type = ValueType;
     using index_type = IndexType;
+    using result_type = Composition<value_type>;
+    using matrix_type = matrix::Csr<value_type, index_type>;
 
-    /**
-     * Gets the permutation (permutation matrix, output of the algorithm) of the
-     * linear operator.
-     *
-     * @return the permutation (permutation matrix)
-     */
-    std::shared_ptr<const PermutationMatrix> get_permutation() const
-    {
-        return permutation_;
-    }
-
-    /**
-     * Gets the inverse permutation (permutation matrix, output of the
-     * algorithm) of the linear operator.
-     *
-     * @return the inverse permutation (permutation matrix)
-     */
-    std::shared_ptr<const PermutationMatrix> get_inverse_permutation() const
-    {
-        return inv_permutation_;
-    }
-
-    /**
-     * Gets the row scaling coefficients. If the strategy is max_diagonal_sum,
-     * these are all 1.
-     *
-     * @return the row scaling coefficients (diagonal matrix)
-     */
-    std::shared_ptr<const DiagonalMatrix> get_row_scaling() const
-    {
-        return row_scaling_;
-    }
-
-    /**
-     * Gets the column sclaing coefficients. If the strategy is
-     * max_diagonal_sum, these are all 1.
-     *
-     * @return the column scaling coefficients (diagonal matrix)
-     */
-    std::shared_ptr<const DiagonalMatrix> get_col_scaling() const
-    {
-        return col_scaling_;
-    }
-
-    GKO_CREATE_FACTORY_PARAMETERS(parameters, Factory)
-    {
+    struct parameters_type
+        : public enable_parameters_type<parameters_type, Mc64> {
         /**
          * This parameter controls the goal of the permutation.
          */
@@ -172,83 +133,39 @@ class Mc64 : public EnablePolymorphicObject<Mc64<ValueType, IndexType>,
          */
         remove_complex<ValueType> GKO_FACTORY_PARAMETER_SCALAR(tolerance,
                                                                1e-14);
-
-        /**
-         * This parameter controls the binary logarithm of the heap arity
-         * for the addressable priority queue used in generating the
-         * minimum weight perfect matching.
-         */
-        int GKO_FACTORY_PARAMETER_SCALAR(deg_log2, 4);
     };
-    GKO_ENABLE_REORDERING_BASE_FACTORY(Mc64, parameters, Factory);
-    GKO_ENABLE_BUILD_METHOD(Factory);
 
-protected:
     /**
-     * Generates the permutation matrix and the inverse permutation
-     * matrix.
+     * Returns the parameters used to construct the factory.
      */
-    void generate(std::shared_ptr<const Executor>& exec,
-                  std::shared_ptr<LinOp> system_matrix);
-
-    explicit Mc64(std::shared_ptr<const Executor> exec)
-        : EnablePolymorphicObject<Mc64, ReorderingBase<IndexType>>(
-              std::move(exec))
-    {}
-
-    explicit Mc64(const Factory* factory, const ReorderingBaseArgs& args)
-        : EnablePolymorphicObject<Mc64, ReorderingBase<IndexType>>(
-              factory->get_executor()),
-          parameters_{factory->get_parameters()}
-    {
-        auto exec = this->get_executor();
-        // Always execute the reordering on a reference executor as the
-        // algorithm is only implemented sequentially.
-        const auto is_gpu_executor = exec != exec->get_master();
-        const auto host_is_ref =
-            dynamic_cast<const ReferenceExecutor*>(exec->get_master().get());
-        auto ref =
-            host_is_ref ? exec->get_master() : ReferenceExecutor::create();
-
-        auto system_matrix = share(matrix_type::create(ref));
-
-        // The system matrix has to be square.
-        GKO_ASSERT_IS_SQUARE_MATRIX(args.system_matrix);
-        if (args.system_matrix->get_size()) {
-            system_matrix =
-                copy_and_convert_to<matrix_type>(ref, args.system_matrix);
-        }
-
-        auto const dim = system_matrix->get_size();
-
-        this->generate(ref, system_matrix);
-
-        // Copy back results to original executor if necessary.
-        if (ref != exec) {
-            auto perm = share(PermutationMatrix::create(exec, dim));
-            perm->copy_from(permutation_.get());
-            permutation_ = perm;
-            auto inv_perm = share(PermutationMatrix::create(exec, dim));
-            inv_perm->copy_from(inv_permutation_.get());
-            inv_permutation_ = inv_perm;
-            auto row_scaling = share(DiagonalMatrix::create(exec, dim[0]));
-            row_scaling->copy_from(row_scaling_.get());
-            row_scaling_ = row_scaling;
-            auto col_scaling = share(DiagonalMatrix::create(exec, dim[0]));
-            col_scaling->copy_from(col_scaling_.get());
-            col_scaling_ = col_scaling;
-        }
-    }
-
-private:
-    std::shared_ptr<PermutationMatrix> permutation_;
-    std::shared_ptr<PermutationMatrix> inv_permutation_;
-    std::shared_ptr<DiagonalMatrix> row_scaling_;
-    std::shared_ptr<DiagonalMatrix> col_scaling_;
+    const parameters_type& get_parameters() const { return parameters_; }
+
+    /**
+     * @copydoc LinOpFactory::generate
+     * @note This function overrides the default LinOpFactory::generate to
+     *       return a Permutation instead of a generic LinOp, which would
+     *       need to be cast to ScaledPermutation again to access its indices.
+     *       It is only necessary because smart pointers aren't covariant.
+     */
+    std::unique_ptr<result_type> generate(
+        std::shared_ptr<const LinOp> system_matrix) const;
+
+    /** Creates a new parameter_type to set up the factory. */
+    static parameters_type build() { return {}; }
+
+protected:
+    explicit Mc64(std::shared_ptr<const Executor> exec,
+                  const parameters_type& params = {});
+
+    std::unique_ptr<LinOp> generate_impl(
+        std::shared_ptr<const LinOp> system_matrix) const override;
+
+    parameters_type parameters_;
 };
 
 
 }  // namespace reorder
+}  // namespace experimental
 }  // namespace gko
 
 
diff --git a/include/ginkgo/core/reorder/rcm.hpp b/include/ginkgo/core/reorder/rcm.hpp
index 7c9b7751008..b0151814c72 100644
--- a/include/ginkgo/core/reorder/rcm.hpp
+++ b/include/ginkgo/core/reorder/rcm.hpp
@@ -226,6 +226,11 @@ class Rcm : public EnablePolymorphicObject<Rcm<IndexType>, LinOpFactory>,
             strategy, rcm_starting_strategy::pseudo_peripheral);
     };
 
+    /**
+     * Returns the parameters used to construct the factory.
+     */
+    const parameters_type& get_parameters() { return parameters_; }
+
     /**
      * @copydoc LinOpFactory::generate
      * @note This function overrides the default LinOpFactory::generate to
diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt
index 071e64b17de..557061c5fed 100644
--- a/omp/CMakeLists.txt
+++ b/omp/CMakeLists.txt
@@ -37,7 +37,6 @@ target_sources(ginkgo_omp
     multigrid/pgm_kernels.cpp
     preconditioner/isai_kernels.cpp
     preconditioner/jacobi_kernels.cpp
-    reorder/mc64_kernels.cpp
     reorder/rcm_kernels.cpp
     solver/batch_bicgstab_kernels.cpp
     solver/cb_gmres_kernels.cpp
diff --git a/omp/reorder/mc64_kernels.cpp b/omp/reorder/mc64_kernels.cpp
deleted file mode 100644
index ff6793d7b7d..00000000000
--- a/omp/reorder/mc64_kernels.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-/*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2023, the Ginkgo authors
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
-1. Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
-notice, this list of conditions and the following disclaimer in the
-documentation and/or other materials provided with the distribution.
-
-3. Neither the name of the copyright holder nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-******************************<GINKGO LICENSE>*******************************/
-
-#include "core/reorder/mc64_kernels.hpp"
-
-
-#include <ginkgo/core/base/array.hpp>
-#include <ginkgo/core/base/types.hpp>
-#include <ginkgo/core/matrix/csr.hpp>
-
-
-namespace gko {
-namespace kernels {
-namespace omp {
-/**
- * @brief The reordering namespace.
- *
- * @ingroup reorder
- */
-namespace mc64 {
-
-
-template <typename ValueType, typename IndexType>
-void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
-                        const matrix::Csr<ValueType, IndexType>* mtx,
-                        array<remove_complex<ValueType>>& weights_array,
-                        array<remove_complex<ValueType>>& dual_u_array,
-                        array<remove_complex<ValueType>>& distance_array,
-                        array<remove_complex<ValueType>>& row_maxima_array,
-                        gko::reorder::mc64_strategy strategy)
-    GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_MC64_INITIALIZE_WEIGHTS_KERNEL);
-
-
-template <typename ValueType, typename IndexType>
-void initial_matching(std::shared_ptr<const DefaultExecutor> exec,
-                      size_type num_rows, const IndexType* row_ptrs,
-                      const IndexType* col_idxs,
-                      const array<ValueType>& weights_array,
-                      const array<ValueType>& dual_u_array,
-                      array<IndexType>& permutation,
-                      array<IndexType>& inv_permutation,
-                      array<IndexType>& matched_idxs_array,
-                      array<IndexType>& unmatched_rows_array,
-                      ValueType tolerance) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL);
-
-
-template <typename ValueType, typename IndexType>
-void shortest_augmenting_path(
-    std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,
-    const IndexType* row_ptrs, const IndexType* col_idxs,
-    array<ValueType>& weights_array, array<ValueType>& dual_u_array,
-    array<ValueType>& distance_array, array<IndexType>& permutation,
-    array<IndexType>& inv_permutation, IndexType root,
-    array<IndexType>& parents_array, array<IndexType>& handles_array,
-    array<IndexType>& generation_array, array<IndexType>& marked_cols_array,
-    array<IndexType>& matched_idxs_array,
-    addressable_priority_queue<ValueType, IndexType>& Q,
-    std::vector<IndexType>& q_j, ValueType tolerance) GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL);
-
-
-template <typename ValueType, typename IndexType>
-void compute_scaling(std::shared_ptr<const DefaultExecutor> exec,
-                     const matrix::Csr<ValueType, IndexType>* mtx,
-                     const array<remove_complex<ValueType>>& weights_array,
-                     const array<remove_complex<ValueType>>& dual_u_array,
-                     const array<remove_complex<ValueType>>& row_maxima_array,
-                     const array<IndexType>& permutation,
-                     const array<IndexType>& matched_idxs_array,
-                     gko::reorder::mc64_strategy strategy,
-                     gko::matrix::Diagonal<ValueType>* row_scaling,
-                     gko::matrix::Diagonal<ValueType>* col_scaling)
-    GKO_NOT_IMPLEMENTED;
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL);
-
-
-}  // namespace mc64
-}  // namespace omp
-}  // namespace kernels
-}  // namespace gko
diff --git a/omp/test/reorder/CMakeLists.txt b/omp/test/reorder/CMakeLists.txt
index fc591590225..65aea4a0fdb 100644
--- a/omp/test/reorder/CMakeLists.txt
+++ b/omp/test/reorder/CMakeLists.txt
@@ -1,2 +1 @@
-ginkgo_create_test(mc64_kernels RESOURCE_TYPE cpu)
 ginkgo_create_test(rcm_kernels RESOURCE_TYPE cpu)
diff --git a/omp/test/reorder/mc64_kernels.cpp b/omp/test/reorder/mc64_kernels.cpp
deleted file mode 100644
index 0e1ad2c6924..00000000000
--- a/omp/test/reorder/mc64_kernels.cpp
+++ /dev/null
@@ -1,84 +0,0 @@
-/*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2023, the Ginkgo authors
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
-1. Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
-notice, this list of conditions and the following disclaimer in the
-documentation and/or other materials provided with the distribution.
-
-3. Neither the name of the copyright holder nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-******************************<GINKGO LICENSE>*******************************/
-
-#include <ginkgo/core/reorder/mc64.hpp>
-
-
-#include <gtest/gtest.h>
-
-
-#include "core/test/utils/assertions.hpp"
-
-
-namespace {
-
-
-class Mc64 : public ::testing::Test {
-protected:
-    using v_type = double;
-    using i_type = int;
-    using CsrMtx = gko::matrix::Csr<v_type, i_type>;
-    using reorder_type = gko::reorder::Mc64<v_type, i_type>;
-    using perm_type = gko::matrix::Permutation<i_type>;
-
-
-    Mc64()
-        : exec(gko::OmpExecutor::create()),
-          // clang-format off
-          p_mtx(gko::initialize<CsrMtx>({{1.0, 2.0, 0.0, -1.3, 2.1},
-                                         {2.0, 5.0, 1.5, 0.0, 0.0},
-                                         {0.0, 1.5, 1.5, 1.1, 0.0},
-                                         {-1.3, 0.0, 1.1, 2.0, 0.0},
-                                         {2.1, 0.0, 0.0, 0.0, 1.0}},
-                                        exec)),
-          // clang-format on
-          mc64_factory(reorder_type::build().on(exec)),
-          reorder_op(mc64_factory->generate(p_mtx))
-    {}
-
-    std::shared_ptr<const gko::Executor> exec;
-    std::unique_ptr<reorder_type::Factory> mc64_factory;
-    std::shared_ptr<CsrMtx> p_mtx;
-    std::unique_ptr<reorder_type> reorder_op;
-};
-
-
-TEST_F(Mc64, IsExecutedOnReferenceExecutor)
-{
-    // This only executes successfully if computed on reference executor.
-    auto p = reorder_op->get_permutation();
-
-    ASSERT_TRUE(true);
-}
-
-
-}  // namespace
diff --git a/reference/CMakeLists.txt b/reference/CMakeLists.txt
index 86497f43619..f8dff69723b 100644
--- a/reference/CMakeLists.txt
+++ b/reference/CMakeLists.txt
@@ -42,7 +42,6 @@ target_sources(ginkgo_reference
     multigrid/pgm_kernels.cpp
     preconditioner/isai_kernels.cpp
     preconditioner/jacobi_kernels.cpp
-    reorder/mc64_kernels.cpp
     reorder/rcm_kernels.cpp
     solver/batch_bicgstab_kernels.cpp
     solver/bicg_kernels.cpp
diff --git a/reference/reorder/mc64_kernels.cpp b/reference/reorder/mc64_kernels.cpp
deleted file mode 100644
index 8a58f7f82f1..00000000000
--- a/reference/reorder/mc64_kernels.cpp
+++ /dev/null
@@ -1,480 +0,0 @@
-/*******************************<GINKGO LICENSE>******************************
-Copyright (c) 2017-2023, the Ginkgo authors
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
-1. Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
-notice, this list of conditions and the following disclaimer in the
-documentation and/or other materials provided with the distribution.
-
-3. Neither the name of the copyright holder nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-******************************<GINKGO LICENSE>*******************************/
-
-#include "core/reorder/mc64_kernels.hpp"
-
-
-#include <cmath>
-
-
-#include <ginkgo/core/base/array.hpp>
-#include <ginkgo/core/base/math.hpp>
-#include <ginkgo/core/base/types.hpp>
-#include <ginkgo/core/matrix/csr.hpp>
-#include <ginkgo/core/matrix/permutation.hpp>
-
-
-namespace gko {
-namespace kernels {
-namespace reference {
-/**
- * @brief The reordering namespace.
- *
- * @ingroup reorder
- */
-namespace mc64 {
-
-
-template <typename ValueType, typename IndexType>
-void initialize_weights(std::shared_ptr<const DefaultExecutor> exec,
-                        const matrix::Csr<ValueType, IndexType>* mtx,
-                        array<remove_complex<ValueType>>& weights_array,
-                        array<remove_complex<ValueType>>& dual_u_array,
-                        array<remove_complex<ValueType>>& distance_array,
-                        array<remove_complex<ValueType>>& row_maxima_array,
-                        gko::reorder::mc64_strategy strategy)
-{
-    constexpr auto inf =
-        std::numeric_limits<remove_complex<ValueType>>::infinity();
-    const auto nnz = mtx->get_num_stored_elements();
-    const auto num_rows = mtx->get_size()[0];
-    const auto row_ptrs = mtx->get_const_row_ptrs();
-    const auto col_idxs = mtx->get_const_col_idxs();
-    const auto values = mtx->get_const_values();
-    auto calculate_weight =
-        strategy == gko::reorder::mc64_strategy::max_diagonal_sum
-            ? [](ValueType a) { return abs(a); }
-            : [](ValueType a) { return std::log2(abs(a)); };
-    auto weights = weights_array.get_data();
-    auto dual_u = dual_u_array.get_data();
-    auto distance = distance_array.get_data();
-    auto row_maxima = row_maxima_array.get_data();
-    for (IndexType col = 0; col < num_rows; col++) {
-        dual_u[col] = inf;
-        distance[col] = inf;
-    }
-
-    for (IndexType row = 0; row < num_rows; row++) {
-        const auto row_begin = row_ptrs[row];
-        const auto row_end = row_ptrs[row + 1];
-        auto row_max = -inf;
-        for (IndexType idx = row_begin; idx < row_end; idx++) {
-            const auto weight = calculate_weight(values[idx]);
-            weights[idx] = weight;
-            row_max = std::max(weight, row_max);
-        }
-
-        row_maxima[row] = row_max;
-
-        for (IndexType idx = row_begin; idx < row_end; idx++) {
-            const auto weight = row_max - weights[idx];
-            weights[idx] = weight;
-            const auto col = col_idxs[idx];
-            if (weight < dual_u[col]) {
-                dual_u[col] = weight;
-            }
-        }
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_MC64_INITIALIZE_WEIGHTS_KERNEL);
-
-
-// Assume -1 in permutation and inv_permutation
-template <typename ValueType, typename IndexType>
-void initial_matching(
-    std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,
-    const IndexType* row_ptrs, const IndexType* col_idxs,
-    const array<ValueType>& weights_array, const array<ValueType>& dual_u_array,
-    array<IndexType>& permutation, array<IndexType>& inv_permutation,
-    array<IndexType>& matched_idxs_array,
-    array<IndexType>& unmatched_rows_array, ValueType tolerance)
-{
-    const auto nnz = row_ptrs[num_rows];
-    const auto weights = weights_array.get_const_data();
-    const auto dual_u = dual_u_array.get_const_data();
-    auto p = permutation.get_data();
-    auto ip = inv_permutation.get_data();
-    auto idxs = matched_idxs_array.get_data();
-    auto unmatched = unmatched_rows_array.get_data();
-    auto um_cnt = 0;
-
-    // In the following comments, w(row, col) will refer to the reduced weight
-    // abs(weights(row, col) - dual_u(col)) where dual_u is a dual vector
-    // needed for non-negativity of all weights.
-    // For each row, look for an unmatched column col for which
-    // w(row, col) < tolerance. If one is found, add the edge (row, col) to the
-    // matching and move on to the next row.
-    for (IndexType row = 0; row < num_rows; row++) {
-        const auto row_begin = row_ptrs[row];
-        const auto row_end = row_ptrs[row + 1];
-        bool matched = false;
-        for (IndexType idx = row_begin; idx < row_end; idx++) {
-            const auto col = col_idxs[idx];
-            if (abs(weights[idx] - dual_u[col]) < tolerance && ip[col] == -1) {
-                p[row] = col;
-                ip[col] = row;
-                idxs[row] = idx;
-                matched = true;
-                break;
-            }
-        }
-        if (!matched) {
-            // Mark unmatched rows for later.
-            unmatched[um_cnt++] = row;
-        }
-    }
-
-    // For remaining unmatched rows, look for a matched column with i
-    // w(row, col) < tolerance that is matched to another row, row_1.
-    // If there is another column col_1 with w(row_1, col_1) < tolerance
-    // that is not yet matched, replace the matched edge (row_1, col)
-    // with the two new matched edges (row, col) and (row_1, col_1).
-    auto um = 0;
-    auto row = unmatched[um];
-    // If row == 0 we passed the last unmatched row and reached the
-    // zero-initialized part of the array. Row 0 is always matched as the matrix
-    // is assumed to be nonsingular and the previous loop starts with row 0.
-    while (row != 0 && um < num_rows) {
-        const auto row_begin = row_ptrs[row];
-        const auto row_end = row_ptrs[row + 1];
-        bool found = false;
-        for (IndexType idx = row_begin; idx < row_end; idx++) {
-            const auto col = col_idxs[idx];
-            if (abs(weights[idx] - dual_u[col]) < tolerance) {
-                const auto row_1 = ip[col];
-                const auto row_1_begin = row_ptrs[row_1];
-                const auto row_1_end = row_ptrs[row_1 + 1];
-                for (IndexType idx_1 = row_1_begin; idx_1 < row_1_end;
-                     idx_1++) {
-                    const auto col_1 = col_idxs[idx_1];
-                    if (abs(weights[idx_1] - dual_u[col_1]) < tolerance &&
-                        ip[col_1] == -1) {
-                        p[row] = col;
-                        ip[col] = row;
-                        idxs[row] = idx;
-                        p[row_1] = col_1;
-                        ip[col_1] = row_1;
-                        idxs[row_1] = idx_1;
-                        found = true;
-                        break;
-                    }
-                }
-                if (found) break;
-            }
-        }
-        if (found) {
-            // Mark previously unmatched row as matched.
-            unmatched[um] = -1;
-        }
-        row = unmatched[++um];
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_MC64_INITIAL_MATCHING_KERNEL);
-
-
-template <typename ValueType, typename IndexType>
-void shortest_augmenting_path(
-    std::shared_ptr<const DefaultExecutor> exec, size_type num_rows,
-    const IndexType* row_ptrs, const IndexType* col_idxs,
-    array<ValueType>& weights_array, array<ValueType>& dual_u_array,
-    array<ValueType>& distance_array, array<IndexType>& permutation,
-    array<IndexType>& inv_permutation, IndexType root,
-    array<IndexType>& parents_array, array<IndexType>& handles_array,
-    array<IndexType>& generation_array, array<IndexType>& marked_cols_array,
-    array<IndexType>& matched_idxs_array,
-    addressable_priority_queue<ValueType, IndexType>& Q,
-    std::vector<IndexType>& q_j, ValueType tolerance)
-{
-    constexpr auto inf = std::numeric_limits<ValueType>::infinity();
-    const auto nnz = row_ptrs[num_rows];
-    auto weights = weights_array.get_data();
-    auto dual_u = dual_u_array.get_data();
-    auto distance = distance_array.get_data();
-
-    auto p = permutation.get_data();
-    auto ip = inv_permutation.get_data();
-
-    auto parents = parents_array.get_data();
-    // Handles to access and update entries in the addressable priority queue.
-    auto handles = handles_array.get_data();
-    // Generation array to mark visited nodes.
-    // It can take four states:
-    //  - gen[col] = #rows + root: The distance to col is smaller than the
-    //      length of the currently shortest augmenting path.
-    //  - gen[col] = - #rows - root: The distance to col is within a tolerance
-    //      of the currently shortest distance to the root. In this case, col
-    //      is placed into the vector q_j holding the nodes with the shortest
-    //      known distance to the root.
-    //  - gen[col] = root: The distance to col is smaller than the length of
-    //      the currently shortest augmenting path but larger than the currently
-    //      shortest known distance to the root. In this case, col is placed
-    //      into the priority queue Q.
-    //  - gen[col] = - root: The shortest possible distance for col to the root
-    //      has been found. If encountered again, col does not need to be
-    //      considered another time.
-    auto generation = generation_array.get_data();
-    // Set of marked columns whose shortest alternating paths and distances to
-    // the root are known.
-    auto marked_cols = marked_cols_array.get_data();
-    // Indices of the nonzero entries corresponding to the matched column in
-    // each matched row. So, if row i is matched to column j, W(i,j) is found
-    // at weights[idxs[i]] where W is the weight matrix.
-    auto idxs = matched_idxs_array.get_data();
-
-    Q.reset();
-    q_j.clear();
-
-    // The length of the current path.
-    ValueType lsp = inf;
-    // The length of the currently shortest found augmenting path starting from
-    // root.
-    ValueType lsap = inf;
-    // The column at the end of the currently shortest found augmenting path.
-    IndexType jsap = -1;
-
-    auto row = root;
-    auto marked_counter = 0;
-
-    const auto begin = row_ptrs[row];
-    const auto end = row_ptrs[row + 1];
-
-    // Look for matching candidates in the row corresponding to root.
-    // As root is not yet matched, the corresponding entry in the dual
-    // vector v is 0 so we do not have to compute it.
-    for (IndexType idx = begin; idx < end; idx++) {
-        const auto col = col_idxs[idx];
-        const ValueType dnew = weights[idx] - dual_u[col];
-
-        if (dnew < lsap) {
-            if (ip[col] == -1) {
-                // col is unmatched so we found an augmenting path.
-                lsap = dnew;
-                jsap = col;
-                parents[col] = row;
-            } else {
-                distance[col] = dnew;
-                parents[col] = row;
-                generation[col] = num_rows + root;
-                if (dnew < lsp) {
-                    lsp = dnew;
-                }
-            }
-        }
-    }
-
-    // Write the columns in the row corresponding to root with the
-    // smallest distance into q_j, other columns with distance
-    // smaller than lsap into the priority queue Q.
-    for (IndexType idx = begin; idx < end; idx++) {
-        const auto col = col_idxs[idx];
-        const auto dist = distance[col];
-        const auto gen = generation[col];
-        if (dist < lsap && gen == num_rows + root) {
-            if (abs(dist - lsp) < tolerance) {
-                generation[col] = -num_rows - root;
-                q_j.push_back(col);
-            } else {
-                generation[col] = root;
-                handles[col] = Q.insert(dist, col);
-            }
-        }
-    }
-
-    while (true) {
-        // Mark the column with the shortest known distance to the root
-        // and proceed in its matched row. If both q_j and Q are empty
-        // or if the current path becomes longer than the currently
-        // shortest augmenting path, we are done.
-        if (q_j.size() > 0) {
-            // q_j is known to contain only entries with shortest known
-            // distance to the root, so if it is not empty we do not
-            // have to operate on the priority queue.
-            if (lsap <= lsp) {
-                break;
-            }
-            const auto col = q_j.back();
-            q_j.pop_back();
-            generation[col] = -root;
-            marked_cols[marked_counter++] = col;
-            row = ip[col];
-        } else {
-            if (Q.empty()) {
-                break;
-            }
-            auto col = Q.min_val();
-            while (generation[col] == -root && !Q.empty()) {
-                // If col is already marked because it previously was in q_j
-                // we have to disregard it.
-                Q.pop_min();
-                col = Q.min_val();
-            }
-            if (Q.empty()) {
-                break;
-            }
-            lsp = distance[col];
-            if (lsap <= lsp) {
-                break;
-            }
-            generation[col] = -root;
-            marked_cols[marked_counter++] = col;
-            Q.pop_min();
-            row = ip[col];
-        }
-        const auto row_begin = row_ptrs[row];
-        const auto row_end = row_ptrs[row + 1];
-        // Compute the entry of the dual vector v corresponding to row.
-        const auto dual_vi = p[row] == -1 ? zero<ValueType>()
-                                          : weights[idxs[row]] - dual_u[p[row]];
-        for (IndexType idx = row_begin; idx < row_end; idx++) {
-            const auto col = col_idxs[idx];
-            const auto gen = generation[col];
-
-            // col is already marked. Note that root will never be 0 as this row
-            // is guaranteed to already be part of the initial matching.
-            if (gen == -root) {
-                continue;
-            }
-
-            const ValueType dnew = lsp + weights[idx] - dual_u[col] - dual_vi;
-
-            if (dnew < lsap) {
-                if (ip[col] == -1) {
-                    // col is unmatched so we found an augmenting path.
-                    lsap = dnew;
-                    jsap = col;
-                    parents[col] = row;
-                } else {
-                    if ((gen != root || dnew < distance[col]) &&
-                        gen != -num_rows - root) {
-                        distance[col] = dnew;
-                        parents[col] = row;
-                        if (abs(dnew - lsp) < tolerance) {
-                            // dnew is the shortest currently possible distance,
-                            // so col can be put into q_j and be marked
-                            // accordingly.
-                            generation[col] = -num_rows - root;
-                            q_j.push_back(col);
-                        } else if (gen != root) {
-                            // col was not encountered before.
-                            generation[col] = root;
-                            handles[col] = Q.insert(dnew, col);
-                        } else {
-                            // col was already encountered but with larger
-                            // distance on a different path.
-                            generation[col] = root;
-                            Q.update_key(handles[col], dnew);
-                        }
-                    }
-                }
-            }
-        }
-    }
-    if (lsap != inf) {
-        IndexType col = jsap;
-        // Update the matching along the shortest augmenting path.
-        do {
-            row = parents[col];
-            ip[col] = row;
-            auto idx = row_ptrs[row];
-            while (col_idxs[idx] != col) {
-                idx++;
-            }
-            idxs[row] = idx;
-            std::swap(col, p[row]);
-        } while (row != root);
-        // Update the dual vector u.
-        for (size_type i = 0; i < marked_counter; i++) {
-            const auto col = marked_cols[i];
-            dual_u[col] += distance[col] - lsap;
-        }
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH_KERNEL);
-
-
-template <typename ValueType, typename IndexType>
-void compute_scaling(std::shared_ptr<const DefaultExecutor> exec,
-                     const matrix::Csr<ValueType, IndexType>* mtx,
-                     const array<remove_complex<ValueType>>& weights_array,
-                     const array<remove_complex<ValueType>>& dual_u_array,
-                     const array<remove_complex<ValueType>>& row_maxima_array,
-                     const array<IndexType>& permutation,
-                     const array<IndexType>& matched_idxs_array,
-                     gko::reorder::mc64_strategy strategy,
-                     gko::matrix::Diagonal<ValueType>* row_scaling,
-                     gko::matrix::Diagonal<ValueType>* col_scaling)
-{
-    constexpr auto inf =
-        std::numeric_limits<remove_complex<ValueType>>::infinity();
-    const auto nnz = mtx->get_num_stored_elements();
-    const auto num_rows = mtx->get_size()[0];
-    const auto row_ptrs = mtx->get_const_row_ptrs();
-    const auto col_idxs = mtx->get_const_col_idxs();
-    const auto values = mtx->get_const_values();
-    const auto weights = weights_array.get_const_data();
-    const auto dual_u = dual_u_array.get_const_data();
-    const auto row_maxima = row_maxima_array.get_const_data();
-    const auto p = permutation.get_const_data();
-    const auto idxs = matched_idxs_array.get_const_data();
-    auto rv = row_scaling->get_values();
-    auto cv = col_scaling->get_values();
-
-    if (strategy == gko::reorder::mc64_strategy::max_diagonal_product) {
-        for (size_type i = 0; i < num_rows; i++) {
-            const remove_complex<ValueType> u_val = std::exp2(dual_u[i]);
-            const remove_complex<ValueType> v_val =
-                std::exp2(weights[idxs[i]] - dual_u[p[i]] - row_maxima[i]);
-            cv[i] = ValueType{u_val};
-            rv[i] = ValueType{v_val};
-        }
-    } else {
-        for (size_type i = 0; i < num_rows; i++) {
-            cv[i] = 1.;
-            rv[i] = 1.;
-        }
-    }
-}
-
-GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
-    GKO_DECLARE_MC64_COMPUTE_SCALING_KERNEL);
-
-
-}  // namespace mc64
-}  // namespace reference
-}  // namespace kernels
-}  // namespace gko
diff --git a/reference/test/reorder/mc64.cpp b/reference/test/reorder/mc64.cpp
index 3c76d17fc71..9351c1e6642 100644
--- a/reference/test/reorder/mc64.cpp
+++ b/reference/test/reorder/mc64.cpp
@@ -41,10 +41,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <gtest/gtest.h>
 
 
+#include <ginkgo/core/base/composition.hpp>
 #include <ginkgo/core/base/executor.hpp>
 #include <ginkgo/core/matrix/csr.hpp>
 #include <ginkgo/core/matrix/dense.hpp>
 #include <ginkgo/core/matrix/permutation.hpp>
+#include <ginkgo/core/matrix/scaled_permutation.hpp>
 #include <ginkgo/core/matrix/sparsity_csr.hpp>
 
 
@@ -63,7 +65,9 @@ class Mc64 : public ::testing::Test {
     using i_type =
         typename std::tuple_element<1, decltype(ValueIndexType())>::type;
     using real_type = gko::remove_complex<v_type>;
-    using reorder_type = gko::reorder::Mc64<v_type, i_type>;
+    using reorder_type = gko::experimental::reorder::Mc64<v_type, i_type>;
+    using perm_type = gko::matrix::ScaledPermutation<v_type, i_type>;
+    using result_type = gko::Composition<v_type>;
     using Mtx = gko::matrix::Dense<v_type>;
     using CsrMtx = gko::matrix::Csr<v_type, i_type>;
     Mc64()
@@ -82,11 +86,18 @@ class Mc64 : public ::testing::Test {
           reorder_op(mc64_factory->generate(id3_mtx))
     {}
 
-    void assert_correct_permutation(const reorder_type* mc64)
+    std::pair<std::shared_ptr<const perm_type>,
+              std::shared_ptr<const perm_type>>
+    unpack(const result_type* result)
     {
-        auto perm =
-            gko::as<gko::matrix::Permutation<i_type>>(mc64->get_permutation())
-                ->get_const_permutation();
+        GKO_ASSERT_EQ(result->get_operators().size(), 2);
+        return std::make_pair(gko::as<perm_type>(result->get_operators()[0]),
+                              gko::as<perm_type>(result->get_operators()[1]));
+    }
+
+    void assert_correct_permutation(const result_type* mc64)
+    {
+        auto perm = unpack(mc64).first->get_const_permutation();
 
         ASSERT_EQ(perm[0], 0);
         ASSERT_EQ(perm[1], 1);
@@ -96,66 +107,20 @@ class Mc64 : public ::testing::Test {
     std::shared_ptr<const gko::Executor> exec;
     std::shared_ptr<CsrMtx> id3_mtx;
     std::shared_ptr<CsrMtx> not_id3_mtx;
-    std::unique_ptr<typename reorder_type::Factory> mc64_factory;
-    std::unique_ptr<reorder_type> reorder_op;
+    std::unique_ptr<reorder_type> mc64_factory;
+    std::unique_ptr<result_type> reorder_op;
 };
 
 TYPED_TEST_SUITE(Mc64, gko::test::ValueIndexTypes, PairTypenameNameGenerator);
 
 
-TYPED_TEST(Mc64, CanBeCleared)
-{
-    this->reorder_op->clear();
-
-    auto reorder_op_perm = this->reorder_op->get_permutation();
-
-    ASSERT_EQ(reorder_op_perm, nullptr);
-}
-
-
-TYPED_TEST(Mc64, CanBeCopied)
-{
-    auto mc64 = this->mc64_factory->generate(this->id3_mtx);
-    auto mc64_copy = this->mc64_factory->generate(this->not_id3_mtx);
-
-    mc64_copy->copy_from(mc64.get());
-
-    this->assert_correct_permutation(mc64_copy.get());
-}
-
-
-TYPED_TEST(Mc64, CanBeMoved)
-{
-    auto mc64 = this->mc64_factory->generate(this->id3_mtx);
-    auto mc64_move = this->mc64_factory->generate(this->not_id3_mtx);
-
-    mc64->move_to(mc64_move.get());
-
-    this->assert_correct_permutation(mc64_move.get());
-}
-
-
-TYPED_TEST(Mc64, CanBeCloned)
-{
-    auto mc64 = this->mc64_factory->generate(this->id3_mtx);
-
-    auto mc64_clone = mc64->clone();
-
-    this->assert_correct_permutation(mc64_clone.get());
-}
-
-
 TYPED_TEST(Mc64, HasSensibleDefaults)
 {
-    using reorder_type = typename TestFixture::reorder_type;
     using real_type = typename TestFixture::real_type;
 
-    auto mc64 = reorder_type::build().on(this->exec)->generate(this->id3_mtx);
-
-    ASSERT_EQ(mc64->get_parameters().strategy,
-              gko::reorder::mc64_strategy::max_diagonal_product);
-    ASSERT_EQ(mc64->get_parameters().tolerance, real_type{1e-14});
-    ASSERT_EQ(mc64->get_parameters().deg_log2, 4);
+    ASSERT_EQ(this->mc64_factory->get_parameters().strategy,
+              gko::experimental::reorder::mc64_strategy::max_diagonal_product);
+    ASSERT_EQ(this->mc64_factory->get_parameters().tolerance, real_type{1e-14});
 }
 
 
@@ -165,7 +130,8 @@ TYPED_TEST(Mc64, CanBeCreatedWithReorderingStrategy)
 
     auto mc64 =
         reorder_type::build()
-            .with_strategy(gko::reorder::mc64_strategy::max_diagonal_sum)
+            .with_strategy(
+                gko::experimental::reorder::mc64_strategy::max_diagonal_sum)
             .on(this->exec)
             ->generate(this->id3_mtx);
 
@@ -187,17 +153,4 @@ TYPED_TEST(Mc64, CanBeCreatedWithTolerance)
 }
 
 
-TYPED_TEST(Mc64, CanBeCreatedWithLog2Degree)
-{
-    using reorder_type = typename TestFixture::reorder_type;
-
-    auto mc64 = reorder_type::build()
-                    .with_deg_log2(2)
-                    .on(this->exec)
-                    ->generate(this->id3_mtx);
-
-    this->assert_correct_permutation(mc64.get());
-}
-
-
 }  // namespace
diff --git a/reference/test/reorder/mc64_kernels.cpp b/reference/test/reorder/mc64_kernels.cpp
index 712fb3d2051..a3ed844d2f2 100644
--- a/reference/test/reorder/mc64_kernels.cpp
+++ b/reference/test/reorder/mc64_kernels.cpp
@@ -47,7 +47,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/matrix/permutation.hpp>
 
 
-#include "core/reorder/mc64_kernels.hpp"
+#include "core/components/addressable_pq.hpp"
+#include "core/reorder/mc64.hpp"
 #include "core/test/utils.hpp"
 #include "core/test/utils/assertions.hpp"
 #include "matrices/config.hpp"
@@ -65,7 +66,7 @@ class Mc64 : public ::testing::Test {
         typename std::tuple_element<1, decltype(ValueIndexType())>::type;
     using real_type = gko::remove_complex<value_type>;
     using matrix_type = gko::matrix::Csr<value_type, index_type>;
-    using permutation_type = gko::matrix::Permutation<index_type>;
+    using perm_type = gko::matrix::ScaledPermutation<value_type, index_type>;
     static constexpr auto inf = std::numeric_limits<real_type>::infinity();
     static constexpr real_type tol = 1e-14;
 
@@ -91,24 +92,35 @@ class Mc64 : public ::testing::Test {
           // accuracy for GKO_ASSRT_ARRAY_EQ
           initialized_weights_product{
               ref,
-              I<real_type>{
-                  real_type{std::log2(3.)},
-                  real_type{std::log2(3.)} - real_type{std::log2(2.)}, 0., 0.,
-                  real_type{std::log2(5.)}, 0.,
-                  real_type{std::log2(6.)} - real_type{std::log2(4.)}, 0.,
-                  real_type{std::log2(4.)} - real_type{std::log2(3.)}, 0.,
-                  real_type{std::log2(4.)} - real_type{std::log2(2.)},
-                  real_type{std::log2(8.)} - real_type{std::log2(5.)}, 0.}},
+              I<real_type>{static_cast<real_type>(std::log(3.)),
+                           static_cast<real_type>(std::log(3.)) -
+                               static_cast<real_type>(std::log(2.)),
+                           0., 0., static_cast<real_type>(std::log(5.)), 0.,
+                           static_cast<real_type>(std::log(6.)) -
+                               static_cast<real_type>(std::log(4.)),
+                           0.,
+                           static_cast<real_type>(std::log(4.)) -
+                               static_cast<real_type>(std::log(3.)),
+                           0.,
+                           static_cast<real_type>(std::log(4.)) -
+                               static_cast<real_type>(std::log(2.)),
+                           static_cast<real_type>(std::log(8.)) -
+                               static_cast<real_type>(std::log(5.)),
+                           0.}},
           initialized_dual_u_product{
-              ref,
-              I<real_type>{
-                  0., real_type{std::log2(3.)} - real_type{std::log2(2.)}, 0.,
-                  0., 0., real_type{std::log2(4.)} - real_type{std::log2(3.)}}},
+              ref, I<real_type>{0.,
+                                static_cast<real_type>(std::log(3.)) -
+                                    static_cast<real_type>(std::log(2.)),
+                                0., 0., 0.,
+                                static_cast<real_type>(std::log(4.)) -
+                                    static_cast<real_type>(std::log(3.))}},
           initialized_row_maxima_product{
-              ref,
-              I<real_type>{real_type{std::log2(3.)}, real_type{std::log2(5.)},
-                           real_type{std::log2(6.)}, real_type{std::log2(4.)},
-                           real_type{std::log2(4.)}, real_type{std::log2(8.)}}},
+              ref, I<real_type>{static_cast<real_type>(std::log(3.)),
+                                static_cast<real_type>(std::log(5.)),
+                                static_cast<real_type>(std::log(6.)),
+                                static_cast<real_type>(std::log(4.)),
+                                static_cast<real_type>(std::log(4.)),
+                                static_cast<real_type>(std::log(8.))}},
           initialized_distance{ref, I<real_type>{inf, inf, inf, inf, inf, inf}},
           empty_permutation{ref, I<index_type>{-1, -1, -1, -1, -1, -1}},
           empty_inverse_permutation{ref, I<index_type>{-1, -1, -1, -1, -1, -1}},
@@ -138,6 +150,14 @@ class Mc64 : public ::testing::Test {
           tolerance{10 * std::numeric_limits<real_type>::epsilon()}
     {}
 
+    std::pair<std::shared_ptr<const perm_type>,
+              std::shared_ptr<const perm_type>>
+    unpack(const gko::Composition<value_type>* result)
+    {
+        return std::make_pair(gko::as<perm_type>(result->get_operators()[0]),
+                              gko::as<perm_type>(result->get_operators()[1]));
+    }
+
     std::shared_ptr<const gko::ReferenceExecutor> ref;
     gko::array<real_type> tmp;
     gko::array<real_type> weights;
@@ -183,9 +203,10 @@ TYPED_TEST_SUITE(Mc64, gko::test::ValueIndexTypes, PairTypenameNameGenerator);
 
 TYPED_TEST(Mc64, InitializeWeightsSum)
 {
-    gko::kernels::reference::mc64::initialize_weights(
-        this->ref, this->mtx.get(), this->weights, this->dual_u, this->distance,
-        this->row_maxima, gko::reorder::mc64_strategy::max_diagonal_sum);
+    gko::experimental::reorder::mc64::initialize_weights(
+        this->mtx.get(), this->weights, this->dual_u, this->distance,
+        this->row_maxima,
+        gko::experimental::reorder::mc64_strategy::max_diagonal_sum);
 
     GKO_ASSERT_ARRAY_EQ(this->weights, this->initialized_weights_sum);
     GKO_ASSERT_ARRAY_EQ(this->dual_u, this->initialized_dual_u_sum);
@@ -196,9 +217,10 @@ TYPED_TEST(Mc64, InitializeWeightsSum)
 
 TYPED_TEST(Mc64, InitializeWeightsProduct)
 {
-    gko::kernels::reference::mc64::initialize_weights(
-        this->ref, this->mtx.get(), this->weights, this->dual_u, this->distance,
-        this->row_maxima, gko::reorder::mc64_strategy::max_diagonal_product);
+    gko::experimental::reorder::mc64::initialize_weights(
+        this->mtx.get(), this->weights, this->dual_u, this->distance,
+        this->row_maxima,
+        gko::experimental::reorder::mc64_strategy::max_diagonal_product);
 
     GKO_ASSERT_ARRAY_EQ(this->weights, this->initialized_weights_product);
     GKO_ASSERT_ARRAY_EQ(this->dual_u, this->initialized_dual_u_product);
@@ -211,8 +233,8 @@ TYPED_TEST(Mc64, InitialMatching)
 {
     const auto num_rows = this->mtx->get_size()[0];
 
-    gko::kernels::reference::mc64::initial_matching(
-        this->ref, num_rows, this->mtx->get_const_row_ptrs(),
+    gko::experimental::reorder::mc64::initial_matching(
+        num_rows, this->mtx->get_const_row_ptrs(),
         this->mtx->get_const_col_idxs(), this->initialized_weights_sum,
         this->initialized_dual_u_sum, this->empty_permutation,
         this->empty_inverse_permutation, this->empty_matched_idxs,
@@ -232,11 +254,11 @@ TYPED_TEST(Mc64, ShortestAugmentingPath)
 {
     using index_type = typename TestFixture::index_type;
     using real_type = typename TestFixture::real_type;
-    gko::addressable_priority_queue<real_type, index_type> Q{4};
+    gko::addressable_priority_queue<real_type, index_type> Q;
     std::vector<index_type> q_j{};
 
-    gko::kernels::reference::mc64::shortest_augmenting_path(
-        this->ref, this->mtx->get_size()[0], this->mtx->get_const_row_ptrs(),
+    gko::experimental::reorder::mc64::shortest_augmenting_path(
+        this->mtx->get_size()[0], this->mtx->get_const_row_ptrs(),
         this->mtx->get_const_col_idxs(), this->initialized_weights_sum,
         this->initialized_dual_u_sum, this->initialized_distance,
         this->initial_matching_permutation,
@@ -265,28 +287,22 @@ TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingExampleSum)
     using index_type = typename TestFixture::index_type;
     using real_type = typename TestFixture::real_type;
     using value_type = typename TestFixture::value_type;
-    using permutation_type = typename TestFixture::permutation_type;
-
     auto mc64_factory =
-        gko::reorder::Mc64<value_type, index_type>::build()
-            .with_strategy(gko::reorder::mc64_strategy::max_diagonal_sum)
+        gko::experimental::reorder::Mc64<value_type, index_type>::build()
+            .with_strategy(
+                gko::experimental::reorder::mc64_strategy::max_diagonal_sum)
             .on(this->ref);
+
     auto mc64 = mc64_factory->generate(this->mtx);
 
-    auto perm = mc64->get_permutation()->get_const_permutation();
-    auto inv_perm = mc64->get_inverse_permutation()->get_const_permutation();
-    GKO_ASSERT_EQ(perm[0], 1);
-    GKO_ASSERT_EQ(perm[1], 0);
-    GKO_ASSERT_EQ(perm[2], 5);
-    GKO_ASSERT_EQ(perm[3], 2);
-    GKO_ASSERT_EQ(perm[4], 4);
-    GKO_ASSERT_EQ(perm[5], 3);
-    GKO_ASSERT_EQ(inv_perm[0], 1);
-    GKO_ASSERT_EQ(inv_perm[1], 0);
-    GKO_ASSERT_EQ(inv_perm[2], 3);
-    GKO_ASSERT_EQ(inv_perm[3], 5);
-    GKO_ASSERT_EQ(inv_perm[4], 4);
-    GKO_ASSERT_EQ(inv_perm[5], 2);
+    auto perm_obj = this->unpack(mc64.get()).first;
+    auto perm = perm_obj->get_const_permutation();
+    ASSERT_EQ(perm[0], 1);
+    ASSERT_EQ(perm[1], 0);
+    ASSERT_EQ(perm[2], 5);
+    ASSERT_EQ(perm[3], 2);
+    ASSERT_EQ(perm[4], 4);
+    ASSERT_EQ(perm[5], 3);
 }
 
 
@@ -296,31 +312,26 @@ TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingExampleProduct)
     using real_type = typename TestFixture::real_type;
     using value_type = typename TestFixture::value_type;
     using matrix_type = typename TestFixture::matrix_type;
-    using permutation_type = typename TestFixture::permutation_type;
-
+    using perm_type = typename TestFixture::perm_type;
     auto mc64_factory =
-        gko::reorder::Mc64<value_type, index_type>::build()
-            .with_strategy(gko::reorder::mc64_strategy::max_diagonal_product)
+        gko::experimental::reorder::Mc64<value_type, index_type>::build()
+            .with_strategy(
+                gko::experimental::reorder::mc64_strategy::max_diagonal_product)
             .on(this->ref);
     auto mc64 = mc64_factory->generate(this->mtx);
 
-    auto perm = mc64->get_permutation()->get_const_permutation();
-    auto inv_perm = mc64->get_inverse_permutation()->get_const_permutation();
-    auto row_scaling = mc64->get_row_scaling()->get_const_values();
-    auto col_scaling = mc64->get_col_scaling()->get_const_values();
-
-    GKO_ASSERT_EQ(perm[0], 1);
-    GKO_ASSERT_EQ(perm[1], 5);
-    GKO_ASSERT_EQ(perm[2], 3);
-    GKO_ASSERT_EQ(perm[3], 4);
-    GKO_ASSERT_EQ(perm[4], 0);
-    GKO_ASSERT_EQ(perm[5], 2);
-    GKO_ASSERT_EQ(inv_perm[0], 4);
-    GKO_ASSERT_EQ(inv_perm[1], 0);
-    GKO_ASSERT_EQ(inv_perm[2], 5);
-    GKO_ASSERT_EQ(inv_perm[3], 2);
-    GKO_ASSERT_EQ(inv_perm[4], 3);
-    GKO_ASSERT_EQ(inv_perm[5], 1);
+    auto perm = this->unpack(mc64.get()).first->get_const_permutation();
+    auto row_scaling =
+        this->unpack(mc64.get()).first->get_const_scaling_factors();
+    auto col_scaling =
+        this->unpack(mc64.get()).second->get_const_scaling_factors();
+
+    ASSERT_EQ(perm[0], 1);
+    ASSERT_EQ(perm[1], 5);
+    ASSERT_EQ(perm[2], 3);
+    ASSERT_EQ(perm[3], 4);
+    ASSERT_EQ(perm[4], 0);
+    ASSERT_EQ(perm[5], 2);
     GKO_ASSERT_NEAR(row_scaling[0], value_type{1. / 3.}, this->tolerance);
     GKO_ASSERT_NEAR(row_scaling[1], value_type{0.2}, this->tolerance);
     GKO_ASSERT_NEAR(row_scaling[2], value_type{0.2}, this->tolerance);
@@ -342,25 +353,24 @@ TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingLargeTrivialExampleProduct)
     using real_type = typename TestFixture::real_type;
     using value_type = typename TestFixture::value_type;
     using matrix_type = typename TestFixture::matrix_type;
-
+    using perm_type = typename TestFixture::perm_type;
+    // read input data
     std::ifstream mtx_stream{gko::matrices::location_1138_bus_mtx};
     auto mtx = gko::share(gko::read<matrix_type>(mtx_stream, this->ref));
     std::ifstream result_stream{gko::matrices::location_1138_bus_mc64_result};
     auto expected_result = gko::read<matrix_type>(result_stream, this->ref);
-
+    // compute mc64
     auto mc64_factory =
-        gko::reorder::Mc64<value_type, index_type>::build()
-            .with_strategy(gko::reorder::mc64_strategy::max_diagonal_product)
+        gko::experimental::reorder::Mc64<value_type, index_type>::build()
+            .with_strategy(
+                gko::experimental::reorder::mc64_strategy::max_diagonal_product)
             .on(this->ref);
     auto mc64 = mc64_factory->generate(mtx);
+    // get components
+    auto row_perm = gko::as<perm_type>(mc64->get_operators()[0]);
+    auto col_perm = gko::as<perm_type>(mc64->get_operators()[1]);
 
-    auto perm = mc64->get_permutation();
-    auto row_scaling = mc64->get_row_scaling();
-    auto col_scaling = mc64->get_col_scaling();
-
-    col_scaling->rapply(mtx.get(), mtx.get());
-    row_scaling->apply(mtx.get(), mtx.get());
-    perm->apply(mtx.get(), mtx.get());
+    mtx = mtx->scale_permute(row_perm, col_perm);
 
     GKO_ASSERT_MTX_NEAR(mtx, expected_result, this->tolerance);
 }
@@ -372,26 +382,25 @@ TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingLargeExampleProduct)
     using real_type = typename TestFixture::real_type;
     using value_type = typename TestFixture::value_type;
     using matrix_type = typename TestFixture::matrix_type;
-
+    using perm_type = typename TestFixture::perm_type;
+    // read input data
     std::ifstream mtx_stream{gko::matrices::location_nontrivial_mc64_example};
     auto mtx = gko::share(gko::read<matrix_type>(mtx_stream, this->ref));
     mtx->sort_by_column_index();
     std::ifstream result_stream{gko::matrices::location_nontrivial_mc64_result};
     auto expected_result = gko::read<matrix_type>(result_stream, this->ref);
-
+    // compute mc64
     auto mc64_factory =
-        gko::reorder::Mc64<value_type, index_type>::build()
-            .with_strategy(gko::reorder::mc64_strategy::max_diagonal_product)
+        gko::experimental::reorder::Mc64<value_type, index_type>::build()
+            .with_strategy(
+                gko::experimental::reorder::mc64_strategy::max_diagonal_product)
             .on(this->ref);
     auto mc64 = mc64_factory->generate(mtx);
+    // get components
+    auto row_perm = gko::as<perm_type>(mc64->get_operators()[0]);
+    auto col_perm = gko::as<perm_type>(mc64->get_operators()[1]);
 
-    auto perm = mc64->get_permutation();
-    auto row_scaling = mc64->get_row_scaling();
-    auto col_scaling = mc64->get_col_scaling();
-
-    col_scaling->rapply(mtx.get(), mtx.get());
-    row_scaling->apply(mtx.get(), mtx.get());
-    perm->apply(mtx.get(), mtx.get());
+    mtx = mtx->scale_permute(row_perm, col_perm);
 
     GKO_ASSERT_MTX_NEAR(mtx, expected_result, 1e-6);
     GKO_ASSERT_MTX_EQ_SPARSITY(mtx, expected_result);
diff --git a/test/reorder/CMakeLists.txt b/test/reorder/CMakeLists.txt
index d87d5f8313c..c9f3980e8bf 100644
--- a/test/reorder/CMakeLists.txt
+++ b/test/reorder/CMakeLists.txt
@@ -1,4 +1,5 @@
 ginkgo_create_common_test(amd DISABLE_EXECUTORS dpcpp)
+ginkgo_create_common_test(mc64 DISABLE_EXECUTORS dpcpp)
 if (GINKGO_HAVE_METIS)
     ginkgo_create_common_test(nested_dissection)
 endif()
diff --git a/dpcpp/test/reorder/mc64_kernels.cpp b/test/reorder/mc64.cpp
similarity index 53%
rename from dpcpp/test/reorder/mc64_kernels.cpp
rename to test/reorder/mc64.cpp
index 2a9ea3d5d9d..fbf01b30453 100644
--- a/dpcpp/test/reorder/mc64_kernels.cpp
+++ b/test/reorder/mc64.cpp
@@ -36,49 +36,62 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <gtest/gtest.h>
 
 
+#include <ginkgo/core/base/composition.hpp>
+
+
 #include "core/test/utils/assertions.hpp"
+#include "test/utils/executor.hpp"
 
 
 namespace {
 
 
-class Mc64 : public ::testing::Test {
+class Mc64 : public CommonTestFixture {
 protected:
     using v_type = double;
     using i_type = int;
     using CsrMtx = gko::matrix::Csr<v_type, i_type>;
-    using reorder_type = gko::reorder::Mc64<v_type, i_type>;
-    using perm_type = gko::matrix::Permutation<i_type>;
-
+    using reorder_type = gko::experimental::reorder::Mc64<v_type, i_type>;
+    using result_type = gko::Composition<v_type>;
+    using perm_type = gko::matrix::ScaledPermutation<v_type, i_type>;
 
     Mc64()
-        : exec(gko::DpcppExecutor::create(0, gko::ReferenceExecutor::create(),
-                                          true)),
-          // clang-format off
-          p_mtx(gko::initialize<CsrMtx>({{1.0, 2.0, 0.0, -1.3, 2.1},
-                                         {2.0, 5.0, 1.5, 0.0, 0.0},
-                                         {0.0, 1.5, 1.5, 1.1, 0.0},
-                                         {-1.3, 0.0, 1.1, 2.0, 0.0},
-                                         {2.1, 0.0, 0.0, 0.0, 1.0}},
-                                        exec)),
-          // clang-format on
-          mc64_factory(reorder_type::build().on(exec)),
-          reorder_op(mc64_factory->generate(p_mtx))
+        : mtx(gko::initialize<CsrMtx>({{1.0, 2.0, 0.0, -1.3, 2.1},
+                                       {2.0, 5.0, 1.5, 0.0, 0.0},
+                                       {0.0, 1.5, 1.5, 1.1, 0.0},
+                                       {-1.3, 0.0, 1.1, 2.0, 0.0},
+                                       {2.1, 0.0, 0.0, 0.0, 1.0}},
+                                      ref)),
+          dmtx(mtx->clone(exec)),
+          mc64_factory(reorder_type::build().on(ref)),
+          dmc64_factory(reorder_type::build().on(exec))
     {}
 
-    std::shared_ptr<const gko::Executor> exec;
-    std::unique_ptr<reorder_type::Factory> mc64_factory;
-    std::shared_ptr<CsrMtx> p_mtx;
-    std::unique_ptr<reorder_type> reorder_op;
+    std::pair<std::shared_ptr<const perm_type>,
+              std::shared_ptr<const perm_type>>
+    unpack(const result_type* result)
+    {
+        GKO_ASSERT_EQ(result->get_operators().size(), 2);
+        return std::make_pair(gko::as<perm_type>(result->get_operators()[0]),
+                              gko::as<perm_type>(result->get_operators()[1]));
+    }
+
+    std::unique_ptr<reorder_type> mc64_factory;
+    std::unique_ptr<reorder_type> dmc64_factory;
+    std::shared_ptr<CsrMtx> mtx;
+    std::shared_ptr<CsrMtx> dmtx;
 };
 
 
-TEST_F(Mc64, IsExecutedOnReferenceExecutor)
+TEST_F(Mc64, IsEquivalentToReference)
 {
-    // This only executes successfully if computed on reference executor.
-    auto p = reorder_op->get_permutation();
+    auto perm = mc64_factory->generate(mtx);
+    auto dperm = dmc64_factory->generate(dmtx);
 
-    ASSERT_TRUE(true);
+    auto ops = unpack(perm.get());
+    auto dops = unpack(dperm.get());
+    GKO_ASSERT_MTX_NEAR(ops.first, dops.first, 0.0);
+    GKO_ASSERT_MTX_NEAR(ops.second, dops.second, 0.0);
 }
 
 

From 10c14cdfb3b84d8b3de8fa68d13fabc06a1f8698 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 2 Nov 2023 10:42:19 -0400
Subject: [PATCH 563/583] review updates

---
 core/components/addressable_pq.hpp            | 82 +++++++++++--------
 core/reorder/mc64.cpp                         |  3 +-
 core/test/components/addressable_pq.cpp       |  4 +-
 include/ginkgo/core/reorder/mc64.hpp          |  9 +-
 matrices/CMakeLists.txt                       |  2 +-
 matrices/config.hpp.in                        |  6 +-
 ...64_result.mtx => 1138_bus_mc64_result.mtx} |  0
 reference/test/reorder/mc64_kernels.cpp       |  2 +-
 8 files changed, 61 insertions(+), 47 deletions(-)
 rename matrices/test/{mc64_result.mtx => 1138_bus_mc64_result.mtx} (100%)

diff --git a/core/components/addressable_pq.hpp b/core/components/addressable_pq.hpp
index a625ef9eb92..fc8fca01a39 100644
--- a/core/components/addressable_pq.hpp
+++ b/core/components/addressable_pq.hpp
@@ -38,9 +38,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <vector>
 
 
+#include <ginkgo/core/base/executor.hpp>
 #include <ginkgo/core/base/types.hpp>
 
 
+#include "core/base/allocator.hpp"
+
+
 namespace gko {
 
 
@@ -58,22 +62,29 @@ template <typename KeyType, typename ValueType, int deg_log2 = 4>
 struct addressable_priority_queue {
     constexpr static int degree = 1 << deg_log2;
 
+    addressable_priority_queue(std::shared_ptr<const Executor> exec)
+        : keys_{exec}, values_{exec}, handles_{exec}, handle_pos_{exec}
+    {}
+
     /**
      * Inserts the given key-value pair into the PQ.
      * Duplicate keys are allowed, they may be returned in an arbitrary order.
      *
+     * @param key  the key by which the queue is ordered
+     * @param value  the value associated with the key
+     *
      * @returns a handle for the pair to be used when modifying the key.
      */
     std::size_t insert(KeyType key, ValueType value)
     {
-        m_keys.push_back(key);
-        m_values.push_back(value);
+        keys_.push_back(key);
+        values_.push_back(value);
         auto handle = next_handle();
-        m_handles.push_back(handle);
-        if (handle == m_handle_pos.size()) {
-            m_handle_pos.push_back(size() - 1);
+        handles_.push_back(handle);
+        if (handle == handle_pos_.size()) {
+            handle_pos_.push_back(size() - 1);
         } else {
-            m_handle_pos[handle] = size() - 1;
+            handle_pos_[handle] = size() - 1;
         }
         sift_up(size() - 1);
         return handle;
@@ -84,11 +95,11 @@ struct addressable_priority_queue {
      */
     void update_key(std::size_t handle, KeyType new_key)
     {
-        auto pos = m_handle_pos[handle];
+        auto pos = handle_pos_[handle];
         GKO_ASSERT(pos < size());
-        GKO_ASSERT(m_handles[pos] == handle);
-        auto old_key = m_keys[pos];
-        m_keys[pos] = new_key;
+        GKO_ASSERT(handles_[pos] == handle);
+        auto old_key = keys_[pos];
+        keys_[pos] = new_key;
         if (old_key < new_key) {
             sift_down(pos);
         } else {
@@ -101,14 +112,14 @@ struct addressable_priority_queue {
      *
      * @return the minimum key from the queue
      */
-    KeyType min_key() const { return m_keys[0]; }
+    KeyType min_key() const { return keys_[0]; }
 
     /**
      * Returns the value belonging to the minimum key from the queue.
      *
      * @return the value corresponding to the minimum key
      */
-    ValueType min_val() const { return m_values[0]; }
+    ValueType min_val() const { return values_[0]; }
 
     /**
      * Returns the key-value pair with the minimum key from the queue.
@@ -123,9 +134,9 @@ struct addressable_priority_queue {
     void pop_min()
     {
         swap(0, size() - 1);
-        m_keys.pop_back();
-        m_values.pop_back();
-        m_handles.pop_back();
+        keys_.pop_back();
+        values_.pop_back();
+        handles_.pop_back();
         sift_down(0);
     }
 
@@ -134,7 +145,7 @@ struct addressable_priority_queue {
      *
      * @return  the number of key-value pairs in the queue
      */
-    std::size_t size() const { return m_keys.size(); }
+    std::size_t size() const { return keys_.size(); }
 
     /**
      * Returns true if and only if the queue has size 0.
@@ -145,10 +156,10 @@ struct addressable_priority_queue {
 
     void reset()
     {
-        m_keys.clear();
-        m_values.clear();
-        m_handles.clear();
-        m_handle_pos.clear();
+        keys_.clear();
+        values_.clear();
+        handles_.clear();
+        handle_pos_.clear();
     }
 
 private:
@@ -158,10 +169,10 @@ struct addressable_priority_queue {
 
     void swap(std::size_t i, std::size_t j)
     {
-        std::swap(m_keys[i], m_keys[j]);
-        std::swap(m_values[i], m_values[j]);
-        std::swap(m_handles[i], m_handles[j]);
-        std::swap(m_handle_pos[m_handles[i]], m_handle_pos[m_handles[j]]);
+        std::swap(keys_[i], keys_[j]);
+        std::swap(values_[i], values_[j]);
+        std::swap(handles_[i], handles_[j]);
+        std::swap(handle_pos_[handles_[i]], handle_pos_[handles_[j]]);
     }
 
     /**
@@ -173,14 +184,14 @@ struct addressable_priority_queue {
     {
         auto cur = i;
         while (first_child(cur) < size()) {
-            const auto begin = m_keys.begin() + first_child(cur);
+            const auto begin = keys_.begin() + first_child(cur);
             const auto end =
-                m_keys.begin() + std::min(first_child(cur + 1), size());
+                keys_.begin() + std::min(first_child(cur + 1), size());
             const auto it = std::min_element(begin, end);
-            if (m_keys[cur] <= *it) {
+            if (keys_[cur] <= *it) {
                 break;
             }
-            auto min_child = std::distance(m_keys.begin(), it);
+            auto min_child = std::distance(keys_.begin(), it);
             swap(cur, min_child);
             cur = min_child;
         }
@@ -189,12 +200,12 @@ struct addressable_priority_queue {
     /**
      * Moves the key-value pair at position i up (toward the root)
      * until its key is larger or equal to the one of its parent.
-     * */
+     */
     void sift_up(std::size_t i)
     {
         auto cur = i;
         while (cur > 0) {
-            if (m_keys[cur] >= m_keys[parent(cur)]) {
+            if (keys_[cur] >= keys_[parent(cur)]) {
                 break;
             }
             swap(cur, parent(cur));
@@ -202,12 +213,13 @@ struct addressable_priority_queue {
         }
     }
 
-    std::size_t next_handle() const { return m_handle_pos.size(); }
+    // FIXME use free-list
+    std::size_t next_handle() const { return handle_pos_.size(); }
 
-    std::vector<KeyType> m_keys;
-    std::vector<ValueType> m_values;
-    std::vector<std::size_t> m_handles;
-    std::vector<std::size_t> m_handle_pos;
+    vector<KeyType> keys_;
+    vector<ValueType> values_;
+    vector<std::size_t> handles_;
+    vector<std::size_t> handle_pos_;
 };
 
 
diff --git a/core/reorder/mc64.cpp b/core/reorder/mc64.cpp
index a31de2526c9..7529ab05471 100644
--- a/core/reorder/mc64.cpp
+++ b/core/reorder/mc64.cpp
@@ -433,7 +433,8 @@ void augment_matching(
     // For each row that is not contained in the initial matching, search for
     // an augmenting path, update the matching and compute the new entries
     // of the dual vectors.
-    addressable_priority_queue<remove_complex<ValueType>, IndexType> Q;
+    addressable_priority_queue<remove_complex<ValueType>, IndexType> Q{
+        mtx->get_executor()};
     std::vector<IndexType> q_j{};
     const auto unmatched = unmatched_rows.get_data();
     size_type um = 0;
diff --git a/core/test/components/addressable_pq.cpp b/core/test/components/addressable_pq.cpp
index 3d02bc40cd0..7a4677872f9 100644
--- a/core/test/components/addressable_pq.cpp
+++ b/core/test/components/addressable_pq.cpp
@@ -73,7 +73,7 @@ class AddressablePriorityQueue : public ::testing::Test {
     template <typename PQType>
     void test_pq_functionality()
     {
-        PQType pq;
+        PQType pq{exec};
 
         pq.insert(value_type{.5}, 1);
         ASSERT_EQ(pq.size(), 1);
@@ -130,7 +130,7 @@ TYPED_TEST_SUITE(AddressablePriorityQueue, gko::test::RealValueIndexTypes,
 TYPED_TEST(AddressablePriorityQueue, InitializesCorrectly)
 {
     using pq_type = typename TestFixture::pq_type2;
-    pq_type pq;
+    pq_type pq{this->exec};
 
     ASSERT_EQ(pq.size(), 0);
     ASSERT_TRUE(pq.empty());
diff --git a/include/ginkgo/core/reorder/mc64.hpp b/include/ginkgo/core/reorder/mc64.hpp
index 5af01d3cabc..8546d35fa68 100644
--- a/include/ginkgo/core/reorder/mc64.hpp
+++ b/include/ginkgo/core/reorder/mc64.hpp
@@ -79,11 +79,12 @@ enum class mc64_strategy { max_diagonal_product, max_diagonal_sum };
  * on a nonsingular square matrix, the algorithm computes a minimum weight
  * perfect matching on a weighted edge bipartite graph of the matrix. It is
  * described in detail in "On Algorithms for Permuting Large Entries to the
- * Diagonal of a Sparse Matrix" (Duff, Koster, 2001). There are two strategies
- * for choosing the weights supported:
+ * Diagonal of a Sparse Matrix" (Duff, Koster, 2001,
+ * DOI: 10.1137/S0895479899358443). There are two strategies for choosing the
+ * weights supported:
  *  - Maximizing the product of the absolute values on the diagonal.
  *    For this strategy, the weights are computed as
- *      $c(i, j) = log_2(a_i) - log_2(abs(a(i, j)))$ if $a(i, j) \neq 0 $ and
+ *      $c(i, j) = \log_2(a_i) - \log_2(|a(i, j)|)$ if $a(i, j) \neq 0 $ and
  * $c(i, j) = \infty$ otherwise. Here, a_i is the maximum absolute value in row
  * i of the matrix A. In this case, the implementation computes a row
  * permutation P and row and column scaling coefficients L and R such that the
@@ -91,7 +92,7 @@ enum class mc64_strategy { max_diagonal_product, max_diagonal_sum };
  * smaller or equal entries everywhere else.
  *  - Maximizing the sum of the absolute values on the diagonal.
  *    For this strategy, the weights are computed as
- *      $c(i, j) = a_i - abs(a(i, j))$ if $a(i, j) \neq 0$ and $c(i, j) =
+ *      $c(i, j) = a_i - |a(i, j)|$ if $a(i, j) \neq 0$ and $c(i, j) =
  * \infty$ otherwise. In this case, no scaling coefficients are computed.
  *
  * @note  This class is derived from polymorphic object but is not a LinOp as it
diff --git a/matrices/CMakeLists.txt b/matrices/CMakeLists.txt
index bcc87b73e3c..391bb346ae0 100644
--- a/matrices/CMakeLists.txt
+++ b/matrices/CMakeLists.txt
@@ -36,6 +36,6 @@ configure_file("test/isai_spd_excess.mtx" "${Ginkgo_BINARY_DIR}/matrices/test/is
 configure_file("test/isai_spd_excess_rhs.mtx" "${Ginkgo_BINARY_DIR}/matrices/test/isai_spd_excess_rhs.mtx")
 configure_file("test/isai_spd_inv.mtx" "${Ginkgo_BINARY_DIR}/matrices/test/isai_spd_inv.mtx")
 configure_file("test/isai_spd_inv_partial.mtx" "${Ginkgo_BINARY_DIR}/matrices/test/isai_spd_inv_partial.mtx")
-configure_file("test/mc64_result.mtx" "${Ginkgo_BINARY_DIR}/matrices/test/mc64_result.mtx")
+configure_file("test/1138_bus_mc64_result.mtx" "${Ginkgo_BINARY_DIR}/matrices/test/1138_bus_mc64_result.mtx")
 configure_file("test/nontrivial_mc64_example.mtx" "${Ginkgo_BINARY_DIR}/matrices/test/nontrivial_mc64_example.mtx")
 configure_file("test/nontrivial_mc64_result.mtx" "${Ginkgo_BINARY_DIR}/matrices/test/nontrivial_mc64_result.mtx")
diff --git a/matrices/config.hpp.in b/matrices/config.hpp.in
index 628e27cdd21..e802365fcd9 100644
--- a/matrices/config.hpp.in
+++ b/matrices/config.hpp.in
@@ -68,10 +68,10 @@ const char* location_isai_mtxs = "@Ginkgo_BINARY_DIR@/matrices/test/";
 const char* location_1138_bus_mtx =
     "@Ginkgo_BINARY_DIR@/matrices/test/1138_bus.mtx";
 const char* location_1138_bus_mc64_result =
-    "@Ginkgo_BINARY_DIR@/matrices/test/mc64_result.mtx";
-const char* location_nontrivial_mc64_example = 
+    "@Ginkgo_BINARY_DIR@/matrices/test/1138_bus_mc64_result.mtx";
+const char* location_nontrivial_mc64_example =
     "@Ginkgo_BINARY_DIR@/matrices/test/nontrivial_mc64_example.mtx";
-const char* location_nontrivial_mc64_result = 
+const char* location_nontrivial_mc64_result =
     "@Ginkgo_BINARY_DIR@/matrices/test/nontrivial_mc64_result.mtx";
 
 
diff --git a/matrices/test/mc64_result.mtx b/matrices/test/1138_bus_mc64_result.mtx
similarity index 100%
rename from matrices/test/mc64_result.mtx
rename to matrices/test/1138_bus_mc64_result.mtx
diff --git a/reference/test/reorder/mc64_kernels.cpp b/reference/test/reorder/mc64_kernels.cpp
index a3ed844d2f2..a5aa9f543ae 100644
--- a/reference/test/reorder/mc64_kernels.cpp
+++ b/reference/test/reorder/mc64_kernels.cpp
@@ -254,7 +254,7 @@ TYPED_TEST(Mc64, ShortestAugmentingPath)
 {
     using index_type = typename TestFixture::index_type;
     using real_type = typename TestFixture::real_type;
-    gko::addressable_priority_queue<real_type, index_type> Q;
+    gko::addressable_priority_queue<real_type, index_type> Q{this->ref};
     std::vector<index_type> q_j{};
 
     gko::experimental::reorder::mc64::shortest_augmenting_path(

From e9ac4bffb84ef8a8139c795c78649c6c83cf4293 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 2 Nov 2023 11:34:39 -0400
Subject: [PATCH 564/583] optimize addressable PQ for MC64

---
 core/components/addressable_pq.hpp      | 84 ++++++++++++++-----------
 core/reorder/mc64.cpp                   | 46 +++++++-------
 core/reorder/mc64.hpp                   |  5 +-
 core/test/components/addressable_pq.cpp | 14 ++---
 reference/test/reorder/mc64_kernels.cpp | 10 +--
 5 files changed, 83 insertions(+), 76 deletions(-)

diff --git a/core/components/addressable_pq.hpp b/core/components/addressable_pq.hpp
index fc8fca01a39..a49a19e5bdd 100644
--- a/core/components/addressable_pq.hpp
+++ b/core/components/addressable_pq.hpp
@@ -55,15 +55,22 @@ namespace gko {
  * and removing the key-value pair with the minimum key.
  *
  * @tparam KeyType    The type of the keys
- * @tparam ValueType  The type of the values
+ * @tparam ValueType  The type of the values, it needs to be an integer type.
  * @tparam deg_log2  The binary logarithm of the node degree k
  */
 template <typename KeyType, typename ValueType, int deg_log2 = 4>
 struct addressable_priority_queue {
     constexpr static int degree = 1 << deg_log2;
 
-    addressable_priority_queue(std::shared_ptr<const Executor> exec)
-        : keys_{exec}, values_{exec}, handles_{exec}, handle_pos_{exec}
+    /**
+     * Constructs an addressable PQ from its host executor and an array for
+     * storing the binary heap positions for each of the values.
+     */
+    addressable_priority_queue(std::shared_ptr<const Executor> exec,
+                               size_type num_values)
+        : keys_{exec},
+          values_{exec},
+          heap_pos_{num_values, unused_handle(), exec}
     {}
 
     /**
@@ -71,33 +78,32 @@ struct addressable_priority_queue {
      * Duplicate keys are allowed, they may be returned in an arbitrary order.
      *
      * @param key  the key by which the queue is ordered
-     * @param value  the value associated with the key
-     *
-     * @returns a handle for the pair to be used when modifying the key.
+     * @param value  the value associated with the key. No two keys may have the
+     *               same value!
      */
-    std::size_t insert(KeyType key, ValueType value)
+    void insert(KeyType key, ValueType value)
     {
+        GKO_ASSERT(value < static_cast<ValueType>(heap_pos_.size()));
+        GKO_ASSERT(value >= 0);
+        GKO_ASSERT(heap_pos_[value] == unused_handle());
         keys_.push_back(key);
         values_.push_back(value);
-        auto handle = next_handle();
-        handles_.push_back(handle);
-        if (handle == handle_pos_.size()) {
-            handle_pos_.push_back(size() - 1);
-        } else {
-            handle_pos_[handle] = size() - 1;
-        }
-        sift_up(size() - 1);
-        return handle;
+        const auto new_pos = size() - 1;
+        heap_pos_[value] = new_pos;
+        sift_up(new_pos);
     }
 
     /**
-     * Updates the key of the pair with the given handle.
+     * Updates the key of the pair with the given new key.
      */
-    void update_key(std::size_t handle, KeyType new_key)
+    void update_key(KeyType new_key, ValueType value)
     {
-        auto pos = handle_pos_[handle];
+        GKO_ASSERT(value < static_cast<ValueType>(heap_pos_.size()));
+        GKO_ASSERT(value >= 0);
+        auto pos = heap_pos_[value];
         GKO_ASSERT(pos < size());
-        GKO_ASSERT(handles_[pos] == handle);
+        GKO_ASSERT(pos != unused_handle());
+        GKO_ASSERT(values_[pos] == value);
         auto old_key = keys_[pos];
         keys_[pos] = new_key;
         if (old_key < new_key) {
@@ -134,9 +140,9 @@ struct addressable_priority_queue {
     void pop_min()
     {
         swap(0, size() - 1);
+        heap_pos_[values_.back()] = unused_handle();
         keys_.pop_back();
         values_.pop_back();
-        handles_.pop_back();
         sift_down(0);
     }
 
@@ -156,10 +162,11 @@ struct addressable_priority_queue {
 
     void reset()
     {
+        for (auto value : values_) {
+            heap_pos_[value] = unused_handle();
+        }
         keys_.clear();
         values_.clear();
-        handles_.clear();
-        handle_pos_.clear();
     }
 
 private:
@@ -167,12 +174,15 @@ struct addressable_priority_queue {
 
     std::size_t first_child(std::size_t i) const { return degree * i + 1; }
 
+    // This is a function instead of a member because otherwise we'd need to
+    // explicitly export the symbol. C++17 fixes this with inline variables
+    constexpr static size_type unused_handle() { return ~size_type{}; }
+
     void swap(std::size_t i, std::size_t j)
     {
         std::swap(keys_[i], keys_[j]);
         std::swap(values_[i], values_[j]);
-        std::swap(handles_[i], handles_[j]);
-        std::swap(handle_pos_[handles_[i]], handle_pos_[handles_[j]]);
+        std::swap(heap_pos_[values_[i]], heap_pos_[values_[j]]);
     }
 
     /**
@@ -184,14 +194,20 @@ struct addressable_priority_queue {
     {
         auto cur = i;
         while (first_child(cur) < size()) {
-            const auto begin = keys_.begin() + first_child(cur);
-            const auto end =
-                keys_.begin() + std::min(first_child(cur + 1), size());
-            const auto it = std::min_element(begin, end);
+            typename std::vector<KeyType>::const_iterator it;
+            if (first_child(cur + 1) < size()) {
+                // fast path: known loop trip count
+                it = std::min_element(keys_.cbegin() + first_child(cur),
+                                      keys_.cbegin() + first_child(cur + 1));
+            } else {
+                // slow path: unknown loop trip count
+                it = std::min_element(keys_.cbegin() + first_child(cur),
+                                      keys_.cbegin() + size());
+            }
             if (keys_[cur] <= *it) {
                 break;
             }
-            auto min_child = std::distance(keys_.begin(), it);
+            auto min_child = std::distance(keys_.cbegin(), it);
             swap(cur, min_child);
             cur = min_child;
         }
@@ -213,13 +229,11 @@ struct addressable_priority_queue {
         }
     }
 
-    // FIXME use free-list
-    std::size_t next_handle() const { return handle_pos_.size(); }
-
     vector<KeyType> keys_;
     vector<ValueType> values_;
-    vector<std::size_t> handles_;
-    vector<std::size_t> handle_pos_;
+    // for each value, heap_pos_[value] stores the position of this value inside
+    // the heap, or unused_handle() if it's not in the heap.
+    vector<size_type> heap_pos_;
 };
 
 
diff --git a/core/reorder/mc64.cpp b/core/reorder/mc64.cpp
index 7529ab05471..40b6e93825e 100644
--- a/core/reorder/mc64.cpp
+++ b/core/reorder/mc64.cpp
@@ -204,9 +204,8 @@ void shortest_augmenting_path(
     array<ValueType>& weights_array, array<ValueType>& dual_u_array,
     array<ValueType>& distance_array, array<IndexType>& permutation,
     array<IndexType>& inv_permutation, IndexType root,
-    array<IndexType>& parents_array, array<IndexType>& handles_array,
-    array<IndexType>& generation_array, array<IndexType>& marked_cols_array,
-    array<IndexType>& matched_idxs_array,
+    array<IndexType>& parents_array, array<IndexType>& generation_array,
+    array<IndexType>& marked_cols_array, array<IndexType>& matched_idxs_array,
     addressable_priority_queue<ValueType, IndexType>& Q,
     std::vector<IndexType>& q_j, ValueType tolerance)
 {
@@ -220,8 +219,6 @@ void shortest_augmenting_path(
     auto ip = inv_permutation.get_data();
 
     auto parents = parents_array.get_data();
-    // Handles to access and update entries in the addressable priority queue.
-    auto handles = handles_array.get_data();
     // Generation array to mark visited nodes.
     // It can take four states:
     //  - gen[col] = #rows + root: The distance to col is smaller than the
@@ -300,7 +297,7 @@ void shortest_augmenting_path(
                 q_j.push_back(col);
             } else {
                 generation[col] = root;
-                handles[col] = Q.insert(dist, col);
+                Q.insert(dist, col);
             }
         }
     }
@@ -382,12 +379,12 @@ void shortest_augmenting_path(
                         } else if (gen != root) {
                             // col was not encountered before.
                             generation[col] = root;
-                            handles[col] = Q.insert(dnew, col);
+                            Q.insert(dnew, col);
                         } else {
                             // col was already encountered but with larger
                             // distance on a different path.
                             generation[col] = root;
-                            Q.update_key(handles[col], dnew);
+                            Q.update_key(dnew, col);
                         }
                     }
                 }
@@ -417,24 +414,27 @@ void shortest_augmenting_path(
 
 
 template <typename ValueType, typename IndexType>
-void augment_matching(
-    const matrix::Csr<ValueType, IndexType>* mtx,
-    array<remove_complex<ValueType>>& weights,
-    array<remove_complex<ValueType>>& dual_u,
-    array<remove_complex<ValueType>>& distance, array<IndexType>& permutation,
-    array<IndexType>& inv_permutation, array<IndexType>& unmatched_rows,
-    array<IndexType>& parents, array<IndexType>& handles,
-    array<IndexType>& generation, array<IndexType>& marked_cols,
-    array<IndexType>& matched_idxs, remove_complex<ValueType> tolerance)
+void augment_matching(const matrix::Csr<ValueType, IndexType>* mtx,
+                      array<remove_complex<ValueType>>& weights,
+                      array<remove_complex<ValueType>>& dual_u,
+                      array<remove_complex<ValueType>>& distance,
+                      array<IndexType>& permutation,
+                      array<IndexType>& inv_permutation,
+                      array<IndexType>& unmatched_rows,
+                      array<IndexType>& parents, array<IndexType>& generation,
+                      array<IndexType>& marked_cols,
+                      array<IndexType>& matched_idxs,
+                      remove_complex<ValueType> tolerance)
 {
+    const auto host_exec = mtx->get_executor();
     const auto num_rows = mtx->get_size()[0];
     const auto row_ptrs = mtx->get_const_row_ptrs();
     const auto col_idxs = mtx->get_const_col_idxs();
+    addressable_priority_queue<remove_complex<ValueType>, IndexType> Q{
+        host_exec, num_rows};
     // For each row that is not contained in the initial matching, search for
     // an augmenting path, update the matching and compute the new entries
     // of the dual vectors.
-    addressable_priority_queue<remove_complex<ValueType>, IndexType> Q{
-        mtx->get_executor()};
     std::vector<IndexType> q_j{};
     const auto unmatched = unmatched_rows.get_data();
     size_type um = 0;
@@ -443,8 +443,8 @@ void augment_matching(
         if (root != -1) {
             mc64::shortest_augmenting_path(
                 num_rows, row_ptrs, col_idxs, weights, dual_u, distance,
-                permutation, inv_permutation, root, parents, handles,
-                generation, marked_cols, matched_idxs, Q, q_j, tolerance);
+                permutation, inv_permutation, root, parents, generation,
+                marked_cols, matched_idxs, Q, q_j, tolerance);
         }
         root = unmatched[um];
     }
@@ -550,7 +550,6 @@ std::unique_ptr<LinOp> Mc64<ValueType, IndexType>::generate_impl(
     // columns, indices corresponding to matched columns in the according row
     // and still unmatched rows
     array<IndexType> parents{host_exec, num_rows};
-    array<IndexType> handles{host_exec, num_rows};
     array<IndexType> generation{host_exec, num_rows};
     array<IndexType> marked_cols{host_exec, num_rows};
     array<IndexType> matched_idxs{host_exec, num_rows};
@@ -558,7 +557,6 @@ std::unique_ptr<LinOp> Mc64<ValueType, IndexType>::generate_impl(
     array<ValueType> row_scaling{host_exec, num_rows};
     array<ValueType> col_scaling{host_exec, num_rows};
     parents.fill(0);
-    handles.fill(0);
     generation.fill(0);
     marked_cols.fill(0);
     matched_idxs.fill(0);
@@ -585,7 +583,7 @@ std::unique_ptr<LinOp> Mc64<ValueType, IndexType>::generate_impl(
 
     exec->run(make_augment_matching(
         mtx.get(), weights, dual_u, distance, permutation, inv_permutation,
-        unmatched_rows, parents, handles, generation, marked_cols, matched_idxs,
+        unmatched_rows, parents, generation, marked_cols, matched_idxs,
         this->get_parameters().tolerance));
 
     exec->run(make_compute_scaling(
diff --git a/core/reorder/mc64.hpp b/core/reorder/mc64.hpp
index 59adf8fe97b..3870534f87b 100644
--- a/core/reorder/mc64.hpp
+++ b/core/reorder/mc64.hpp
@@ -73,9 +73,8 @@ void shortest_augmenting_path(
     array<ValueType>& weights_array, array<ValueType>& dual_u_array,
     array<ValueType>& distance_array, array<IndexType>& permutation,
     array<IndexType>& inv_permutation, IndexType root,
-    array<IndexType>& parents_array, array<IndexType>& handles_array,
-    array<IndexType>& generation_array, array<IndexType>& marked_cols_array,
-    array<IndexType>& matched_idxs_array,
+    array<IndexType>& parents_array, array<IndexType>& generation_array,
+    array<IndexType>& marked_cols_array, array<IndexType>& matched_idxs_array,
     addressable_priority_queue<ValueType, IndexType>& Q,
     std::vector<IndexType>& q_j, ValueType tolerance);
 
diff --git a/core/test/components/addressable_pq.cpp b/core/test/components/addressable_pq.cpp
index 7a4677872f9..4351b7fc590 100644
--- a/core/test/components/addressable_pq.cpp
+++ b/core/test/components/addressable_pq.cpp
@@ -64,33 +64,33 @@ class AddressablePriorityQueue : public ::testing::Test {
     template <typename PQType>
     void assert_min(const PQType& pq, value_type key, index_type val)
     {
+        ASSERT_FALSE(pq.empty());
         ASSERT_EQ(pq.min_key(), key);
         ASSERT_EQ(pq.min_val(), val);
         ASSERT_TRUE((pq.min() == std::pair<value_type, index_type>{key, val}));
-        ASSERT_FALSE(pq.empty());
     }
 
     template <typename PQType>
     void test_pq_functionality()
     {
-        PQType pq{exec};
+        PQType pq{exec, 8};
 
         pq.insert(value_type{.5}, 1);
         ASSERT_EQ(pq.size(), 1);
         assert_min(pq, .5, 1);
 
         // insert larger key
-        const auto handle_7 = pq.insert(value_type{1.}, 7);
+        pq.insert(value_type{1.}, 7);
         ASSERT_EQ(pq.size(), 2);
         assert_min(pq, .5, 1);
 
         // insert min key
-        const auto handle_4 = pq.insert(value_type{.1}, 4);
+        pq.insert(value_type{.1}, 4);
         ASSERT_EQ(pq.size(), 3);
         assert_min(pq, .1, 4);
 
         // update key to have different min
-        pq.update_key(handle_4, value_type{.7});
+        pq.update_key(value_type{.7}, 4);
         ASSERT_EQ(pq.size(), 3);
         assert_min(pq, .5, 1);
 
@@ -100,7 +100,7 @@ class AddressablePriorityQueue : public ::testing::Test {
         assert_min(pq, .5, 1);
 
         // update max to new min key
-        pq.update_key(handle_7, value_type{.2});
+        pq.update_key(value_type{.2}, 7);
         ASSERT_EQ(pq.size(), 4);
         assert_min(pq, .2, 7);
 
@@ -130,7 +130,7 @@ TYPED_TEST_SUITE(AddressablePriorityQueue, gko::test::RealValueIndexTypes,
 TYPED_TEST(AddressablePriorityQueue, InitializesCorrectly)
 {
     using pq_type = typename TestFixture::pq_type2;
-    pq_type pq{this->exec};
+    pq_type pq{this->exec, 0};
 
     ASSERT_EQ(pq.size(), 0);
     ASSERT_TRUE(pq.empty());
diff --git a/reference/test/reorder/mc64_kernels.cpp b/reference/test/reorder/mc64_kernels.cpp
index a5aa9f543ae..f82429c44cf 100644
--- a/reference/test/reorder/mc64_kernels.cpp
+++ b/reference/test/reorder/mc64_kernels.cpp
@@ -127,7 +127,6 @@ class Mc64 : public ::testing::Test {
           empty_matched_idxs{ref, I<index_type>{0, 0, 0, 0, 0, 0}},
           empty_unmatched_rows{ref, I<index_type>{0, 0, 0, 0, 0, 0}},
           initial_parents{ref, I<index_type>{0, 0, 0, 0, 0, 0}},
-          initial_handles{ref, I<index_type>{0, 0, 0, 0, 0, 0}},
           initial_generation{ref, I<index_type>{0, 0, 0, 0, 0, 0}},
           initial_marked_cols{ref, I<index_type>{0, 0, 0, 0, 0, 0}},
           initial_matched_idxs{ref, I<index_type>{1, 3, 5, 8, 0, 12}},
@@ -138,7 +137,6 @@ class Mc64 : public ::testing::Test {
           final_permutation{ref, I<index_type>{1, 0, 3, 5, 4, 2}},
           final_inverse_permutation{ref, I<index_type>{1, 0, 5, 2, 4, 3}},
           final_parents{ref, I<index_type>{0, 0, 3, 4, 4, 2}},
-          final_handles{ref, I<index_type>{0, 0, 0, 0, 0, 0}},
           final_generation{ref, I<index_type>{0, 0, -4, -4, 0, -4}},
           final_marked_cols{ref, I<index_type>{3, 5, 2, 0, 0, 0}},
           final_matched_idxs{ref, I<index_type>{1, 3, 5, 8, 10, 12}},
@@ -182,7 +180,6 @@ class Mc64 : public ::testing::Test {
     gko::array<index_type> initial_matching_permutation;
     gko::array<index_type> initial_matching_inverse_permutation;
     gko::array<index_type> initial_parents;
-    gko::array<index_type> initial_handles;
     gko::array<index_type> initial_generation;
     gko::array<index_type> initial_marked_cols;
     gko::array<index_type> initial_matched_idxs;
@@ -190,7 +187,6 @@ class Mc64 : public ::testing::Test {
     gko::array<index_type> final_permutation;
     gko::array<index_type> final_inverse_permutation;
     gko::array<index_type> final_parents;
-    gko::array<index_type> final_handles;
     gko::array<index_type> final_generation;
     gko::array<index_type> final_marked_cols;
     gko::array<index_type> final_matched_idxs;
@@ -254,7 +250,8 @@ TYPED_TEST(Mc64, ShortestAugmentingPath)
 {
     using index_type = typename TestFixture::index_type;
     using real_type = typename TestFixture::real_type;
-    gko::addressable_priority_queue<real_type, index_type> Q{this->ref};
+    gko::addressable_priority_queue<real_type, index_type> Q{
+        this->ref, this->mtx->get_size()[0]};
     std::vector<index_type> q_j{};
 
     gko::experimental::reorder::mc64::shortest_augmenting_path(
@@ -263,7 +260,7 @@ TYPED_TEST(Mc64, ShortestAugmentingPath)
         this->initialized_dual_u_sum, this->initialized_distance,
         this->initial_matching_permutation,
         this->initial_matching_inverse_permutation, 4 * gko::one<index_type>(),
-        this->initial_parents, this->initial_handles, this->initial_generation,
+        this->initial_parents, this->initial_generation,
         this->initial_marked_cols, this->initial_matched_idxs, Q, q_j,
         this->tol);
 
@@ -272,7 +269,6 @@ TYPED_TEST(Mc64, ShortestAugmentingPath)
     GKO_ASSERT_ARRAY_EQ(this->initial_matching_inverse_permutation,
                         this->final_inverse_permutation);
     GKO_ASSERT_ARRAY_EQ(this->initial_parents, this->final_parents);
-    GKO_ASSERT_ARRAY_EQ(this->initial_handles, this->final_handles);
     GKO_ASSERT_ARRAY_EQ(this->initial_generation, this->final_generation);
     GKO_ASSERT_ARRAY_EQ(this->initial_marked_cols, this->final_marked_cols);
     GKO_ASSERT_ARRAY_EQ(this->initial_matched_idxs, this->final_matched_idxs);

From db10ce67603275fbabbc32b1eed1d65f6007da36 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 2 Nov 2023 11:55:08 -0400
Subject: [PATCH 565/583] fix test

---
 reference/test/reorder/mc64_kernels.cpp | 44 ++++++++++++-------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/reference/test/reorder/mc64_kernels.cpp b/reference/test/reorder/mc64_kernels.cpp
index f82429c44cf..d3adf599ce8 100644
--- a/reference/test/reorder/mc64_kernels.cpp
+++ b/reference/test/reorder/mc64_kernels.cpp
@@ -92,35 +92,35 @@ class Mc64 : public ::testing::Test {
           // accuracy for GKO_ASSRT_ARRAY_EQ
           initialized_weights_product{
               ref,
-              I<real_type>{static_cast<real_type>(std::log(3.)),
-                           static_cast<real_type>(std::log(3.)) -
-                               static_cast<real_type>(std::log(2.)),
-                           0., 0., static_cast<real_type>(std::log(5.)), 0.,
-                           static_cast<real_type>(std::log(6.)) -
-                               static_cast<real_type>(std::log(4.)),
+              I<real_type>{static_cast<real_type>(std::log2(3.)),
+                           static_cast<real_type>(std::log2(3.)) -
+                               static_cast<real_type>(std::log2(2.)),
+                           0., 0., static_cast<real_type>(std::log2(5.)), 0.,
+                           static_cast<real_type>(std::log2(6.)) -
+                               static_cast<real_type>(std::log2(4.)),
                            0.,
-                           static_cast<real_type>(std::log(4.)) -
-                               static_cast<real_type>(std::log(3.)),
+                           static_cast<real_type>(std::log2(4.)) -
+                               static_cast<real_type>(std::log2(3.)),
                            0.,
-                           static_cast<real_type>(std::log(4.)) -
-                               static_cast<real_type>(std::log(2.)),
-                           static_cast<real_type>(std::log(8.)) -
-                               static_cast<real_type>(std::log(5.)),
+                           static_cast<real_type>(std::log2(4.)) -
+                               static_cast<real_type>(std::log2(2.)),
+                           static_cast<real_type>(std::log2(8.)) -
+                               static_cast<real_type>(std::log2(5.)),
                            0.}},
           initialized_dual_u_product{
               ref, I<real_type>{0.,
-                                static_cast<real_type>(std::log(3.)) -
-                                    static_cast<real_type>(std::log(2.)),
+                                static_cast<real_type>(std::log2(3.)) -
+                                    static_cast<real_type>(std::log2(2.)),
                                 0., 0., 0.,
-                                static_cast<real_type>(std::log(4.)) -
-                                    static_cast<real_type>(std::log(3.))}},
+                                static_cast<real_type>(std::log2(4.)) -
+                                    static_cast<real_type>(std::log2(3.))}},
           initialized_row_maxima_product{
-              ref, I<real_type>{static_cast<real_type>(std::log(3.)),
-                                static_cast<real_type>(std::log(5.)),
-                                static_cast<real_type>(std::log(6.)),
-                                static_cast<real_type>(std::log(4.)),
-                                static_cast<real_type>(std::log(4.)),
-                                static_cast<real_type>(std::log(8.))}},
+              ref, I<real_type>{static_cast<real_type>(std::log2(3.)),
+                                static_cast<real_type>(std::log2(5.)),
+                                static_cast<real_type>(std::log2(6.)),
+                                static_cast<real_type>(std::log2(4.)),
+                                static_cast<real_type>(std::log2(4.)),
+                                static_cast<real_type>(std::log2(8.))}},
           initialized_distance{ref, I<real_type>{inf, inf, inf, inf, inf, inf}},
           empty_permutation{ref, I<index_type>{-1, -1, -1, -1, -1, -1}},
           empty_inverse_permutation{ref, I<index_type>{-1, -1, -1, -1, -1, -1}},

From 482764da820e41fee602e4cbab8217560808e968 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 2 Nov 2023 17:23:10 -0400
Subject: [PATCH 566/583] fix libstdc++ build issues

---
 core/components/addressable_pq.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/components/addressable_pq.hpp b/core/components/addressable_pq.hpp
index a49a19e5bdd..5c9b4be8582 100644
--- a/core/components/addressable_pq.hpp
+++ b/core/components/addressable_pq.hpp
@@ -194,7 +194,7 @@ struct addressable_priority_queue {
     {
         auto cur = i;
         while (first_child(cur) < size()) {
-            typename std::vector<KeyType>::const_iterator it;
+            auto it = keys_.cbegin();
             if (first_child(cur + 1) < size()) {
                 // fast path: known loop trip count
                 it = std::min_element(keys_.cbegin() + first_child(cur),

From 19f103b466ca0824abe1fd96426b8e47c1b76cc8 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Thu, 2 Nov 2023 20:01:55 -0400
Subject: [PATCH 567/583] review updates
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Thomas Grützmacher <thomas.gruetzmacher@kit.edu>
---
 core/components/addressable_pq.hpp            |  1 +
 core/reorder/mc64.cpp                         | 44 ++++++++++---------
 .../ginkgo/core/factorization/cholesky.hpp    |  2 +
 include/ginkgo/core/factorization/lu.hpp      |  2 +
 include/ginkgo/core/reorder/amd.hpp           |  2 +
 include/ginkgo/core/reorder/mc64.hpp          |  2 +
 .../ginkgo/core/reorder/nested_dissection.hpp |  2 +
 include/ginkgo/core/reorder/rcm.hpp           |  2 +
 8 files changed, 37 insertions(+), 20 deletions(-)

diff --git a/core/components/addressable_pq.hpp b/core/components/addressable_pq.hpp
index 5c9b4be8582..ef63b7c789f 100644
--- a/core/components/addressable_pq.hpp
+++ b/core/components/addressable_pq.hpp
@@ -160,6 +160,7 @@ struct addressable_priority_queue {
      */
     bool empty() const { return size() == 0; }
 
+    /** Clears the queue, removing all entries. */
     void reset()
     {
         for (auto value : values_) {
diff --git a/core/reorder/mc64.cpp b/core/reorder/mc64.cpp
index 40b6e93825e..113f858eaec 100644
--- a/core/reorder/mc64.cpp
+++ b/core/reorder/mc64.cpp
@@ -75,34 +75,38 @@ void initialize_weights(const matrix::Csr<ValueType, IndexType>* mtx,
     const auto row_ptrs = mtx->get_const_row_ptrs();
     const auto col_idxs = mtx->get_const_col_idxs();
     const auto values = mtx->get_const_values();
-    auto calculate_weight =
-        strategy == gko::experimental::reorder::mc64_strategy::max_diagonal_sum
-            ? [](ValueType a) { return abs(a); }
-            : [](ValueType a) { return std::log2(abs(a)); };
     auto weights = weights_array.get_data();
     auto dual_u = dual_u_array.get_data();
     auto distance = distance_array.get_data();
     auto row_maxima = row_maxima_array.get_data();
     dual_u_array.fill(inf);
     distance_array.fill(inf);
-    for (IndexType row = 0; row < num_rows; row++) {
-        const auto row_begin = row_ptrs[row];
-        const auto row_end = row_ptrs[row + 1];
-        auto row_max = -inf;
-        for (IndexType idx = row_begin; idx < row_end; idx++) {
-            const auto weight = calculate_weight(values[idx]);
-            weights[idx] = weight;
-            row_max = std::max(weight, row_max);
-        }
+    auto run_computation = [&](auto calculate_weight) {
+        for (IndexType row = 0; row < num_rows; row++) {
+            const auto row_begin = row_ptrs[row];
+            const auto row_end = row_ptrs[row + 1];
+            auto row_max = -inf;
+            for (IndexType idx = row_begin; idx < row_end; idx++) {
+                const auto weight = calculate_weight(values[idx]);
+                weights[idx] = weight;
+                row_max = std::max(weight, row_max);
+            }
 
-        row_maxima[row] = row_max;
+            row_maxima[row] = row_max;
 
-        for (IndexType idx = row_begin; idx < row_end; idx++) {
-            const auto weight = row_max - weights[idx];
-            weights[idx] = weight;
-            const auto col = col_idxs[idx];
-            dual_u[col] = std::min(weight, dual_u[col]);
+            for (IndexType idx = row_begin; idx < row_end; idx++) {
+                const auto weight = row_max - weights[idx];
+                weights[idx] = weight;
+                const auto col = col_idxs[idx];
+                dual_u[col] = std::min(weight, dual_u[col]);
+            }
         }
+    };
+    if (strategy ==
+        gko::experimental::reorder::mc64_strategy::max_diagonal_sum) {
+        run_computation([](ValueType a) { return abs(a); });
+    } else {
+        run_computation([](ValueType a) { return std::log2(abs(a)); });
     }
 }
 
@@ -156,7 +160,7 @@ void initial_matching(
     // If there is another column col_1 with w(row_1, col_1) < tolerance
     // that is not yet matched, replace the matched edge (row_1, col)
     // with the two new matched edges (row, col) and (row_1, col_1).
-    auto um = 0;
+    size_type um = 0;
     auto row = unmatched[um];
     // If row == 0 we passed the last unmatched row and reached the
     // zero-initialized part of the array. Row 0 is always matched as the matrix
diff --git a/include/ginkgo/core/factorization/cholesky.hpp b/include/ginkgo/core/factorization/cholesky.hpp
index 0db17e1de78..7516611009e 100644
--- a/include/ginkgo/core/factorization/cholesky.hpp
+++ b/include/ginkgo/core/factorization/cholesky.hpp
@@ -98,6 +98,8 @@ class Cholesky
 
     /**
      * Returns the parameters used to construct the factory.
+     *
+     * @return the parameters used to construct the factory.
      */
     const parameters_type& get_parameters() { return parameters_; }
 
diff --git a/include/ginkgo/core/factorization/lu.hpp b/include/ginkgo/core/factorization/lu.hpp
index e8f629de9e7..e8994007f27 100644
--- a/include/ginkgo/core/factorization/lu.hpp
+++ b/include/ginkgo/core/factorization/lu.hpp
@@ -128,6 +128,8 @@ class Lu
 
     /**
      * Returns the parameters used to construct the factory.
+     *
+     * @return the parameters used to construct the factory.
      */
     const parameters_type& get_parameters() { return parameters_; }
 
diff --git a/include/ginkgo/core/reorder/amd.hpp b/include/ginkgo/core/reorder/amd.hpp
index 917c6713f56..3fcfd78db66 100644
--- a/include/ginkgo/core/reorder/amd.hpp
+++ b/include/ginkgo/core/reorder/amd.hpp
@@ -90,6 +90,8 @@ class Amd : public EnablePolymorphicObject<Amd<IndexType>, LinOpFactory>,
 
     /**
      * Returns the parameters used to construct the factory.
+     *
+     * @return the parameters used to construct the factory.
      */
     const parameters_type& get_parameters() { return parameters_; }
 
diff --git a/include/ginkgo/core/reorder/mc64.hpp b/include/ginkgo/core/reorder/mc64.hpp
index 8546d35fa68..6ef52b681d6 100644
--- a/include/ginkgo/core/reorder/mc64.hpp
+++ b/include/ginkgo/core/reorder/mc64.hpp
@@ -138,6 +138,8 @@ class Mc64
 
     /**
      * Returns the parameters used to construct the factory.
+     *
+     * @return the parameters used to construct the factory.
      */
     const parameters_type& get_parameters() const { return parameters_; }
 
diff --git a/include/ginkgo/core/reorder/nested_dissection.hpp b/include/ginkgo/core/reorder/nested_dissection.hpp
index 8e870a61407..9faf68706c8 100644
--- a/include/ginkgo/core/reorder/nested_dissection.hpp
+++ b/include/ginkgo/core/reorder/nested_dissection.hpp
@@ -112,6 +112,8 @@ class NestedDissection
 
     /**
      * Returns the parameters used to construct the factory.
+     *
+     * @return the parameters used to construct the factory.
      */
     const parameters_type& get_parameters() { return parameters_; }
 
diff --git a/include/ginkgo/core/reorder/rcm.hpp b/include/ginkgo/core/reorder/rcm.hpp
index b0151814c72..6aac61f61cd 100644
--- a/include/ginkgo/core/reorder/rcm.hpp
+++ b/include/ginkgo/core/reorder/rcm.hpp
@@ -228,6 +228,8 @@ class Rcm : public EnablePolymorphicObject<Rcm<IndexType>, LinOpFactory>,
 
     /**
      * Returns the parameters used to construct the factory.
+     *
+     * @return the parameters used to construct the factory.
      */
     const parameters_type& get_parameters() { return parameters_; }
 

From 02dc8521cd5a0c493d6b381520599a7322b444d2 Mon Sep 17 00:00:00 2001
From: ginkgo-bot <ginkgo.library@gmail.com>
Date: Fri, 3 Nov 2023 00:03:55 +0000
Subject: [PATCH 568/583] Format files

Co-authored-by: Tobias Ribizel <upsj@users.noreply.github.com>
---
 core/reorder/mc64.hpp | 1 +
 test/reorder/mc64.cpp | 4 +---
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/core/reorder/mc64.hpp b/core/reorder/mc64.hpp
index 3870534f87b..41ac9a5b321 100644
--- a/core/reorder/mc64.hpp
+++ b/core/reorder/mc64.hpp
@@ -33,6 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #ifndef GKO_CORE_REORDER_MC64_HPP_
 #define GKO_CORE_REORDER_MC64_HPP_
 
+
 #include <ginkgo/core/reorder/mc64.hpp>
 
 
diff --git a/test/reorder/mc64.cpp b/test/reorder/mc64.cpp
index fbf01b30453..51e3a143c88 100644
--- a/test/reorder/mc64.cpp
+++ b/test/reorder/mc64.cpp
@@ -30,13 +30,11 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include <ginkgo/core/reorder/mc64.hpp>
-
-
 #include <gtest/gtest.h>
 
 
 #include <ginkgo/core/base/composition.hpp>
+#include <ginkgo/core/reorder/mc64.hpp>
 
 
 #include "core/test/utils/assertions.hpp"

From a04bd8c867c3986c128d2b1e71d3f6ad2a624ffa Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Sat, 4 Nov 2023 16:33:21 +0100
Subject: [PATCH 569/583] fix linking errors

---
 core/reorder/mc64.cpp | 50 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/core/reorder/mc64.cpp b/core/reorder/mc64.cpp
index 113f858eaec..2e13506b435 100644
--- a/core/reorder/mc64.cpp
+++ b/core/reorder/mc64.cpp
@@ -60,6 +60,15 @@ namespace reorder {
 namespace mc64 {
 
 
+#define GKO_DECLARE_MC64_INITIALIZE_WEIGHTS(ValueType, IndexType) \
+    void initialize_weights(                                      \
+        const matrix::Csr<ValueType, IndexType>* mtx,             \
+        array<remove_complex<ValueType>>& weights_array,          \
+        array<remove_complex<ValueType>>& dual_u_array,           \
+        array<remove_complex<ValueType>>& distance_array,         \
+        array<remove_complex<ValueType>>& row_maxima_array,       \
+        gko::experimental::reorder::mc64_strategy strategy)
+
 template <typename ValueType, typename IndexType>
 void initialize_weights(const matrix::Csr<ValueType, IndexType>* mtx,
                         array<remove_complex<ValueType>>& weights_array,
@@ -111,6 +120,15 @@ void initialize_weights(const matrix::Csr<ValueType, IndexType>* mtx,
 }
 
 
+#define GKO_DECLARE_MC64_INITIAL_MATCHING(ValueType, IndexType)              \
+    void initial_matching(                                                   \
+        size_type num_rows, const IndexType* row_ptrs,                       \
+        const IndexType* col_idxs, const array<ValueType>& weights_array,    \
+        const array<ValueType>& dual_u_array, array<IndexType>& permutation, \
+        array<IndexType>& inv_permutation,                                   \
+        array<IndexType>& matched_idxs_array,                                \
+        array<IndexType>& unmatched_rows_array, ValueType tolerance)
+
 // Assume -1 in permutation and inv_permutation
 template <typename ValueType, typename IndexType>
 void initial_matching(
@@ -202,6 +220,19 @@ void initial_matching(
 }
 
 
+#define GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH(ValueType, IndexType)   \
+    void shortest_augmenting_path(                                        \
+        size_type num_rows, const IndexType* row_ptrs,                    \
+        const IndexType* col_idxs, array<ValueType>& weights_array,       \
+        array<ValueType>& dual_u_array, array<ValueType>& distance_array, \
+        array<IndexType>& permutation, array<IndexType>& inv_permutation, \
+        IndexType root, array<IndexType>& parents_array,                  \
+        array<IndexType>& generation_array,                               \
+        array<IndexType>& marked_cols_array,                              \
+        array<IndexType>& matched_idxs_array,                             \
+        addressable_priority_queue<ValueType, IndexType>& Q,              \
+        std::vector<IndexType>& q_j, ValueType tolerance)
+
 template <typename ValueType, typename IndexType>
 void shortest_augmenting_path(
     size_type num_rows, const IndexType* row_ptrs, const IndexType* col_idxs,
@@ -455,6 +486,16 @@ void augment_matching(const matrix::Csr<ValueType, IndexType>* mtx,
 }
 
 
+#define GKO_DECLARE_MC64_COMPUTE_SCALING(ValueType, IndexType)              \
+    void compute_scaling(                                                   \
+        const matrix::Csr<ValueType, IndexType>* mtx,                       \
+        const array<remove_complex<ValueType>>& weights_array,              \
+        const array<remove_complex<ValueType>>& dual_u_array,               \
+        const array<remove_complex<ValueType>>& row_maxima_array,           \
+        const array<IndexType>& permutation,                                \
+        const array<IndexType>& matched_idxs_array, mc64_strategy strategy, \
+        ValueType* row_scaling, ValueType* col_scaling)
+
 template <typename ValueType, typename IndexType>
 void compute_scaling(const matrix::Csr<ValueType, IndexType>* mtx,
                      const array<remove_complex<ValueType>>& weights_array,
@@ -495,6 +536,15 @@ void compute_scaling(const matrix::Csr<ValueType, IndexType>* mtx,
 }
 
 
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_INITIALIZE_WEIGHTS);
+GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_INITIAL_MATCHING);
+GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE(
+    GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH);
+GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_MC64_COMPUTE_SCALING);
+
+
 }  // namespace mc64
 
 

From de65877956a44d41dae5f16f3267d1cca1c890c4 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Sat, 4 Nov 2023 21:39:10 +0100
Subject: [PATCH 570/583] review updates

- use degree instead of log2(degree) for addressable PQ
- make mc64 "kernels" header declarations macros for easier consistency
- snake case
- improved formatting
- Mc64 is now final
- remove nonexistent test
- remove unused variables
- replace -1 by invalid_index
- use node instead of value in addressable PQ
- fix variable shadowing

Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
Co-authored-by: Yuhsiang M. Tsai <yhmtsai@gmail.com>
---
 core/components/addressable_pq.hpp      | 118 ++++++++++---------
 core/reorder/mc64.cpp                   | 150 +++++++++---------------
 core/reorder/mc64.hpp                   |  75 +++++++-----
 core/test/components/addressable_pq.cpp |   6 +-
 cuda/test/reorder/CMakeLists.txt        |   1 -
 include/ginkgo/core/reorder/mc64.hpp    |  27 ++---
 reference/test/reorder/mc64.cpp         |  19 ++-
 reference/test/reorder/mc64_kernels.cpp |  20 ++--
 8 files changed, 190 insertions(+), 226 deletions(-)

diff --git a/core/components/addressable_pq.hpp b/core/components/addressable_pq.hpp
index ef63b7c789f..64502eb5b99 100644
--- a/core/components/addressable_pq.hpp
+++ b/core/components/addressable_pq.hpp
@@ -51,59 +51,65 @@ namespace gko {
 /**
  * An addressable priority queue based on a k-ary heap.
  *
- * It allows inserting key-value pairs, modifying their key as well as accessing
- * and removing the key-value pair with the minimum key.
+ * It allows inserting key-node pairs, modifying their key as well as accessing
+ * and removing the key-node pair with the minimum key.
  *
  * @tparam KeyType    The type of the keys
- * @tparam ValueType  The type of the values, it needs to be an integer type.
- * @tparam deg_log2  The binary logarithm of the node degree k
+ * @tparam IndexType  The type of the nodes, it needs to be an integer type.
+ * @tparam degree     The node degree k
  */
-template <typename KeyType, typename ValueType, int deg_log2 = 4>
+template <typename KeyType, typename IndexType, int degree = 4>
 struct addressable_priority_queue {
-    constexpr static int degree = 1 << deg_log2;
-
     /**
-     * Constructs an addressable PQ from its host executor and an array for
-     * storing the binary heap positions for each of the values.
+     * Constructs an addressable PQ from a host executor.
+     *
+     * @param host_exec  the host executor for allocating the data
+     * @param num_nodes  the number of nodes that may be inserted into this
+     *                   queue. Every node ID inserted must be below num_nodes.
      */
-    addressable_priority_queue(std::shared_ptr<const Executor> exec,
-                               size_type num_values)
-        : keys_{exec},
-          values_{exec},
-          heap_pos_{num_values, unused_handle(), exec}
+    addressable_priority_queue(std::shared_ptr<const Executor> host_exec,
+                               size_type num_nodes)
+        : keys_{host_exec},
+          nodes_{host_exec},
+          heap_pos_{num_nodes, invalid_index<IndexType>(), host_exec}
     {}
 
     /**
-     * Inserts the given key-value pair into the PQ.
+     * Inserts the given key-node pair into the PQ.
      * Duplicate keys are allowed, they may be returned in an arbitrary order.
      *
      * @param key  the key by which the queue is ordered
-     * @param value  the value associated with the key. No two keys may have the
-     *               same value!
+     * @param node  the node associated with the key. Every node may only be
+     *              inserted once!
      */
-    void insert(KeyType key, ValueType value)
+    void insert(KeyType key, IndexType node)
     {
-        GKO_ASSERT(value < static_cast<ValueType>(heap_pos_.size()));
-        GKO_ASSERT(value >= 0);
-        GKO_ASSERT(heap_pos_[value] == unused_handle());
+        GKO_ASSERT(node < static_cast<IndexType>(heap_pos_.size()));
+        GKO_ASSERT(node >= 0);
+        GKO_ASSERT(heap_pos_[node] == invalid_index<IndexType>());
         keys_.push_back(key);
-        values_.push_back(value);
+        nodes_.push_back(node);
         const auto new_pos = size() - 1;
-        heap_pos_[value] = new_pos;
+        heap_pos_[node] = new_pos;
         sift_up(new_pos);
     }
 
     /**
-     * Updates the key of the pair with the given new key.
+     * Updates the key of a node with the given new key.
+     * Duplicate keys are allowed, they may be returned in an arbitrary order.
+     *
+     * @param new_key  the key by which the queue is ordered
+     * @param node  the node associated with the key. It must have been inserted
+     *              beforehand.
      */
-    void update_key(KeyType new_key, ValueType value)
+    void update_key(KeyType new_key, IndexType node)
     {
-        GKO_ASSERT(value < static_cast<ValueType>(heap_pos_.size()));
-        GKO_ASSERT(value >= 0);
-        auto pos = heap_pos_[value];
+        GKO_ASSERT(node < static_cast<IndexType>(heap_pos_.size()));
+        GKO_ASSERT(node >= 0);
+        auto pos = heap_pos_[node];
         GKO_ASSERT(pos < size());
-        GKO_ASSERT(pos != unused_handle());
-        GKO_ASSERT(values_[pos] == value);
+        GKO_ASSERT(pos != invalid_index<IndexType>());
+        GKO_ASSERT(nodes_[pos] == node);
         auto old_key = keys_[pos];
         keys_[pos] = new_key;
         if (old_key < new_key) {
@@ -121,35 +127,39 @@ struct addressable_priority_queue {
     KeyType min_key() const { return keys_[0]; }
 
     /**
-     * Returns the value belonging to the minimum key from the queue.
+     * Returns the node belonging to the minimum key from the queue.
      *
-     * @return the value corresponding to the minimum key
+     * @return the node corresponding to the minimum key
      */
-    ValueType min_val() const { return values_[0]; }
+    IndexType min_node() const { return nodes_[0]; }
 
     /**
-     * Returns the key-value pair with the minimum key from the queue.
+     * Returns the key-node pair with the minimum key from the queue.
      *
-     * @return the key-value pair corresponding to the minimum key
+     * @return the key-node pair corresponding to the minimum key
      */
-    std::pair<KeyType, ValueType> min() const { return {min_key(), min_val()}; }
+    std::pair<KeyType, IndexType> min() const
+    {
+        return {min_key(), min_node()};
+    }
 
     /**
-     * Removes the key-value pair with the minimum key from the queue.
+     * Removes the key-node pair with the minimum key from the queue.
      */
     void pop_min()
     {
+        GKO_ASSERT(!empty());
         swap(0, size() - 1);
-        heap_pos_[values_.back()] = unused_handle();
+        heap_pos_[nodes_.back()] = invalid_index<IndexType>();
         keys_.pop_back();
-        values_.pop_back();
+        nodes_.pop_back();
         sift_down(0);
     }
 
     /**
-     * Returns the number of key-value pairs in the queue.
+     * Returns the number of key-node pairs in the queue.
      *
-     * @return  the number of key-value pairs in the queue
+     * @return  the number of key-node pairs in the queue
      */
     std::size_t size() const { return keys_.size(); }
 
@@ -163,11 +173,11 @@ struct addressable_priority_queue {
     /** Clears the queue, removing all entries. */
     void reset()
     {
-        for (auto value : values_) {
-            heap_pos_[value] = unused_handle();
+        for (auto node : nodes_) {
+            heap_pos_[node] = invalid_index<IndexType>();
         }
         keys_.clear();
-        values_.clear();
+        nodes_.clear();
     }
 
 private:
@@ -175,20 +185,16 @@ struct addressable_priority_queue {
 
     std::size_t first_child(std::size_t i) const { return degree * i + 1; }
 
-    // This is a function instead of a member because otherwise we'd need to
-    // explicitly export the symbol. C++17 fixes this with inline variables
-    constexpr static size_type unused_handle() { return ~size_type{}; }
-
     void swap(std::size_t i, std::size_t j)
     {
         std::swap(keys_[i], keys_[j]);
-        std::swap(values_[i], values_[j]);
-        std::swap(heap_pos_[values_[i]], heap_pos_[values_[j]]);
+        std::swap(nodes_[i], nodes_[j]);
+        std::swap(heap_pos_[nodes_[i]], heap_pos_[nodes_[j]]);
     }
 
     /**
      * Restores the heap invariant downwards, i.e. the
-     * Moves the key-value pair at position i down (toward the leaves)
+     * Moves the key-node pair at position i down (toward the leaves)
      * until its key is smaller or equal to the one of all its children.
      */
     void sift_down(std::size_t i)
@@ -215,7 +221,7 @@ struct addressable_priority_queue {
     }
 
     /**
-     * Moves the key-value pair at position i up (toward the root)
+     * Moves the key-node pair at position i up (toward the root)
      * until its key is larger or equal to the one of its parent.
      */
     void sift_up(std::size_t i)
@@ -231,10 +237,10 @@ struct addressable_priority_queue {
     }
 
     vector<KeyType> keys_;
-    vector<ValueType> values_;
-    // for each value, heap_pos_[value] stores the position of this value inside
-    // the heap, or unused_handle() if it's not in the heap.
-    vector<size_type> heap_pos_;
+    vector<IndexType> nodes_;
+    // for each node, heap_pos_[node] stores the position of this node inside
+    // the heap, or invalid_index<IndexType>() if it's not in the heap.
+    vector<IndexType> heap_pos_;
 };
 
 
diff --git a/core/reorder/mc64.cpp b/core/reorder/mc64.cpp
index 2e13506b435..48588ad792f 100644
--- a/core/reorder/mc64.cpp
+++ b/core/reorder/mc64.cpp
@@ -52,6 +52,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "core/components/addressable_pq.hpp"
 #include "core/components/fill_array_kernels.hpp"
 #include "core/matrix/csr_kernels.hpp"
+#include "core/reorder/mc64.hpp"
 
 
 namespace gko {
@@ -60,36 +61,22 @@ namespace reorder {
 namespace mc64 {
 
 
-#define GKO_DECLARE_MC64_INITIALIZE_WEIGHTS(ValueType, IndexType) \
-    void initialize_weights(                                      \
-        const matrix::Csr<ValueType, IndexType>* mtx,             \
-        array<remove_complex<ValueType>>& weights_array,          \
-        array<remove_complex<ValueType>>& dual_u_array,           \
-        array<remove_complex<ValueType>>& distance_array,         \
-        array<remove_complex<ValueType>>& row_maxima_array,       \
-        gko::experimental::reorder::mc64_strategy strategy)
-
 template <typename ValueType, typename IndexType>
-void initialize_weights(const matrix::Csr<ValueType, IndexType>* mtx,
+void initialize_weights(const matrix::Csr<ValueType, IndexType>* host_mtx,
                         array<remove_complex<ValueType>>& weights_array,
                         array<remove_complex<ValueType>>& dual_u_array,
-                        array<remove_complex<ValueType>>& distance_array,
                         array<remove_complex<ValueType>>& row_maxima_array,
                         gko::experimental::reorder::mc64_strategy strategy)
 {
     constexpr auto inf =
         std::numeric_limits<remove_complex<ValueType>>::infinity();
-    const auto nnz = mtx->get_num_stored_elements();
-    const auto num_rows = mtx->get_size()[0];
-    const auto row_ptrs = mtx->get_const_row_ptrs();
-    const auto col_idxs = mtx->get_const_col_idxs();
-    const auto values = mtx->get_const_values();
+    const auto num_rows = host_mtx->get_size()[0];
+    const auto row_ptrs = host_mtx->get_const_row_ptrs();
+    const auto col_idxs = host_mtx->get_const_col_idxs();
+    const auto values = host_mtx->get_const_values();
     auto weights = weights_array.get_data();
     auto dual_u = dual_u_array.get_data();
-    auto distance = distance_array.get_data();
     auto row_maxima = row_maxima_array.get_data();
-    dual_u_array.fill(inf);
-    distance_array.fill(inf);
     auto run_computation = [&](auto calculate_weight) {
         for (IndexType row = 0; row < num_rows; row++) {
             const auto row_begin = row_ptrs[row];
@@ -120,16 +107,7 @@ void initialize_weights(const matrix::Csr<ValueType, IndexType>* mtx,
 }
 
 
-#define GKO_DECLARE_MC64_INITIAL_MATCHING(ValueType, IndexType)              \
-    void initial_matching(                                                   \
-        size_type num_rows, const IndexType* row_ptrs,                       \
-        const IndexType* col_idxs, const array<ValueType>& weights_array,    \
-        const array<ValueType>& dual_u_array, array<IndexType>& permutation, \
-        array<IndexType>& inv_permutation,                                   \
-        array<IndexType>& matched_idxs_array,                                \
-        array<IndexType>& unmatched_rows_array, ValueType tolerance)
-
-// Assume -1 in permutation and inv_permutation
+// Assume invalid_index in permutation and inv_permutation
 template <typename ValueType, typename IndexType>
 void initial_matching(
     size_type num_rows, const IndexType* row_ptrs, const IndexType* col_idxs,
@@ -159,7 +137,8 @@ void initial_matching(
         bool matched = false;
         for (IndexType idx = row_begin; idx < row_end; idx++) {
             const auto col = col_idxs[idx];
-            if (abs(weights[idx] - dual_u[col]) < tolerance && ip[col] == -1) {
+            if (abs(weights[idx] - dual_u[col]) < tolerance &&
+                ip[col] == invalid_index<IndexType>()) {
                 p[row] = col;
                 ip[col] = row;
                 idxs[row] = idx;
@@ -197,7 +176,7 @@ void initial_matching(
                          idx_1++) {
                         const auto col_1 = col_idxs[idx_1];
                         if (abs(weights[idx_1] - dual_u[col_1]) < tolerance &&
-                            ip[col_1] == -1) {
+                            ip[col_1] == invalid_index<IndexType>()) {
                             p[row] = col;
                             ip[col] = row;
                             idxs[row] = idx;
@@ -213,26 +192,13 @@ void initial_matching(
         }();
         if (found) {
             // Mark previously unmatched row as matched.
-            unmatched[um] = -1;
+            unmatched[um] = invalid_index<IndexType>();
         }
         row = unmatched[++um];
     }
 }
 
 
-#define GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH(ValueType, IndexType)   \
-    void shortest_augmenting_path(                                        \
-        size_type num_rows, const IndexType* row_ptrs,                    \
-        const IndexType* col_idxs, array<ValueType>& weights_array,       \
-        array<ValueType>& dual_u_array, array<ValueType>& distance_array, \
-        array<IndexType>& permutation, array<IndexType>& inv_permutation, \
-        IndexType root, array<IndexType>& parents_array,                  \
-        array<IndexType>& generation_array,                               \
-        array<IndexType>& marked_cols_array,                              \
-        array<IndexType>& matched_idxs_array,                             \
-        addressable_priority_queue<ValueType, IndexType>& Q,              \
-        std::vector<IndexType>& q_j, ValueType tolerance)
-
 template <typename ValueType, typename IndexType>
 void shortest_augmenting_path(
     size_type num_rows, const IndexType* row_ptrs, const IndexType* col_idxs,
@@ -241,7 +207,7 @@ void shortest_augmenting_path(
     array<IndexType>& inv_permutation, IndexType root,
     array<IndexType>& parents_array, array<IndexType>& generation_array,
     array<IndexType>& marked_cols_array, array<IndexType>& matched_idxs_array,
-    addressable_priority_queue<ValueType, IndexType>& Q,
+    addressable_priority_queue<ValueType, IndexType>& queue,
     std::vector<IndexType>& q_j, ValueType tolerance)
 {
     constexpr auto inf = std::numeric_limits<ValueType>::infinity();
@@ -265,7 +231,7 @@ void shortest_augmenting_path(
     //  - gen[col] = root: The distance to col is smaller than the length of
     //      the currently shortest augmenting path but larger than the currently
     //      shortest known distance to the root. In this case, col is placed
-    //      into the priority queue Q.
+    //      into the priority queue.
     //  - gen[col] = - root: The shortest possible distance for col to the root
     //      has been found. If encountered again, col does not need to be
     //      considered another time.
@@ -278,7 +244,7 @@ void shortest_augmenting_path(
     // at weights[idxs[i]] where W is the weight matrix.
     auto idxs = matched_idxs_array.get_data();
 
-    Q.reset();
+    queue.reset();
     q_j.clear();
 
     // The length of the current path.
@@ -287,7 +253,7 @@ void shortest_augmenting_path(
     // root.
     ValueType lsap = inf;
     // The column at the end of the currently shortest found augmenting path.
-    IndexType jsap = -1;
+    auto jsap = invalid_index<IndexType>();
 
     auto row = root;
     IndexType marked_counter = 0;
@@ -303,7 +269,7 @@ void shortest_augmenting_path(
         const ValueType dnew = weights[idx] - dual_u[col];
 
         if (dnew < lsap) {
-            if (ip[col] == -1) {
+            if (ip[col] == invalid_index<IndexType>()) {
                 // col is unmatched so we found an augmenting path.
                 lsap = dnew;
                 jsap = col;
@@ -321,7 +287,7 @@ void shortest_augmenting_path(
 
     // Write the columns in the row corresponding to root with the
     // smallest distance into q_j, other columns with distance
-    // smaller than lsap into the priority queue Q.
+    // smaller than lsap into the priority queue.
     for (IndexType idx = begin; idx < end; idx++) {
         const auto col = col_idxs[idx];
         const auto dist = distance[col];
@@ -332,14 +298,14 @@ void shortest_augmenting_path(
                 q_j.push_back(col);
             } else {
                 generation[col] = root;
-                Q.insert(dist, col);
+                queue.insert(dist, col);
             }
         }
     }
 
     while (true) {
         // Mark the column with the shortest known distance to the root
-        // and proceed in its matched row. If both q_j and Q are empty
+        // and proceed in its matched row. If both q_j and queue are empty
         // or if the current path becomes longer than the currently
         // shortest augmenting path, we are done.
         if (q_j.size() > 0) {
@@ -355,17 +321,17 @@ void shortest_augmenting_path(
             marked_cols[marked_counter++] = col;
             row = ip[col];
         } else {
-            if (Q.empty()) {
+            if (queue.empty()) {
                 break;
             }
-            auto col = Q.min_val();
-            while (generation[col] == -root && !Q.empty()) {
+            auto col = queue.min_node();
+            while (generation[col] == -root && !queue.empty()) {
                 // If col is already marked because it previously was in q_j
                 // we have to disregard it.
-                Q.pop_min();
-                col = Q.min_val();
+                queue.pop_min();
+                col = queue.min_node();
             }
-            if (Q.empty()) {
+            if (queue.empty()) {
                 break;
             }
             lsp = distance[col];
@@ -374,14 +340,15 @@ void shortest_augmenting_path(
             }
             generation[col] = -root;
             marked_cols[marked_counter++] = col;
-            Q.pop_min();
+            queue.pop_min();
             row = ip[col];
         }
         const auto row_begin = row_ptrs[row];
         const auto row_end = row_ptrs[row + 1];
         // Compute the entry of the dual vector v corresponding to row.
-        const auto dual_vi = p[row] == -1 ? zero<ValueType>()
-                                          : weights[idxs[row]] - dual_u[p[row]];
+        const auto dual_vi = p[row] == invalid_index<IndexType>()
+                                 ? zero<ValueType>()
+                                 : weights[idxs[row]] - dual_u[p[row]];
         for (IndexType idx = row_begin; idx < row_end; idx++) {
             const auto col = col_idxs[idx];
             const auto gen = generation[col];
@@ -395,7 +362,7 @@ void shortest_augmenting_path(
             const ValueType dnew = lsp + weights[idx] - dual_u[col] - dual_vi;
 
             if (dnew < lsap) {
-                if (ip[col] == -1) {
+                if (ip[col] == invalid_index<IndexType>()) {
                     // col is unmatched so we found an augmenting path.
                     lsap = dnew;
                     jsap = col;
@@ -414,12 +381,12 @@ void shortest_augmenting_path(
                         } else if (gen != root) {
                             // col was not encountered before.
                             generation[col] = root;
-                            Q.insert(dnew, col);
+                            queue.insert(dnew, col);
                         } else {
                             // col was already encountered but with larger
                             // distance on a different path.
                             generation[col] = root;
-                            Q.update_key(dnew, col);
+                            queue.update_key(dnew, col);
                         }
                     }
                 }
@@ -441,15 +408,15 @@ void shortest_augmenting_path(
         } while (row != root);
         // Update the dual vector u.
         for (size_type i = 0; i < marked_counter; i++) {
-            const auto col = marked_cols[i];
-            dual_u[col] += distance[col] - lsap;
+            const auto marked_col = marked_cols[i];
+            dual_u[marked_col] += distance[marked_col] - lsap;
         }
     }
 }
 
 
 template <typename ValueType, typename IndexType>
-void augment_matching(const matrix::Csr<ValueType, IndexType>* mtx,
+void augment_matching(const matrix::Csr<ValueType, IndexType>* host_mtx,
                       array<remove_complex<ValueType>>& weights,
                       array<remove_complex<ValueType>>& dual_u,
                       array<remove_complex<ValueType>>& distance,
@@ -461,43 +428,32 @@ void augment_matching(const matrix::Csr<ValueType, IndexType>* mtx,
                       array<IndexType>& matched_idxs,
                       remove_complex<ValueType> tolerance)
 {
-    const auto host_exec = mtx->get_executor();
-    const auto num_rows = mtx->get_size()[0];
-    const auto row_ptrs = mtx->get_const_row_ptrs();
-    const auto col_idxs = mtx->get_const_col_idxs();
-    addressable_priority_queue<remove_complex<ValueType>, IndexType> Q{
+    const auto host_exec = host_mtx->get_executor();
+    const auto num_rows = host_mtx->get_size()[0];
+    const auto row_ptrs = host_mtx->get_const_row_ptrs();
+    const auto col_idxs = host_mtx->get_const_col_idxs();
+    addressable_priority_queue<remove_complex<ValueType>, IndexType> queue{
         host_exec, num_rows};
     // For each row that is not contained in the initial matching, search for
     // an augmenting path, update the matching and compute the new entries
     // of the dual vectors.
     std::vector<IndexType> q_j{};
     const auto unmatched = unmatched_rows.get_data();
-    size_type um = 0;
-    auto root = unmatched[um];
+    auto root = unmatched[0];
     for (size_type um = 1; root != 0 && um < num_rows; um++) {
-        if (root != -1) {
+        if (root != invalid_index<IndexType>()) {
             mc64::shortest_augmenting_path(
                 num_rows, row_ptrs, col_idxs, weights, dual_u, distance,
                 permutation, inv_permutation, root, parents, generation,
-                marked_cols, matched_idxs, Q, q_j, tolerance);
+                marked_cols, matched_idxs, queue, q_j, tolerance);
         }
         root = unmatched[um];
     }
 }
 
 
-#define GKO_DECLARE_MC64_COMPUTE_SCALING(ValueType, IndexType)              \
-    void compute_scaling(                                                   \
-        const matrix::Csr<ValueType, IndexType>* mtx,                       \
-        const array<remove_complex<ValueType>>& weights_array,              \
-        const array<remove_complex<ValueType>>& dual_u_array,               \
-        const array<remove_complex<ValueType>>& row_maxima_array,           \
-        const array<IndexType>& permutation,                                \
-        const array<IndexType>& matched_idxs_array, mc64_strategy strategy, \
-        ValueType* row_scaling, ValueType* col_scaling)
-
 template <typename ValueType, typename IndexType>
-void compute_scaling(const matrix::Csr<ValueType, IndexType>* mtx,
+void compute_scaling(const matrix::Csr<ValueType, IndexType>* host_mtx,
                      const array<remove_complex<ValueType>>& weights_array,
                      const array<remove_complex<ValueType>>& dual_u_array,
                      const array<remove_complex<ValueType>>& row_maxima_array,
@@ -508,11 +464,7 @@ void compute_scaling(const matrix::Csr<ValueType, IndexType>* mtx,
 {
     constexpr auto inf =
         std::numeric_limits<remove_complex<ValueType>>::infinity();
-    const auto nnz = mtx->get_num_stored_elements();
-    const auto num_rows = mtx->get_size()[0];
-    const auto row_ptrs = mtx->get_const_row_ptrs();
-    const auto col_idxs = mtx->get_const_col_idxs();
-    const auto values = mtx->get_const_values();
+    const auto num_rows = host_mtx->get_size()[0];
     const auto weights = weights_array.get_const_data();
     const auto dual_u = dual_u_array.get_const_data();
     const auto row_maxima = row_maxima_array.get_const_data();
@@ -615,17 +567,21 @@ std::unique_ptr<LinOp> Mc64<ValueType, IndexType>::generate_impl(
     marked_cols.fill(0);
     matched_idxs.fill(0);
     unmatched_rows.fill(0);
+    constexpr auto inf =
+        std::numeric_limits<remove_complex<ValueType>>::infinity();
+    dual_u.fill(inf);
+    distance.fill(inf);
 
     array<IndexType> permutation{host_exec, num_rows};
     array<IndexType> inv_permutation{host_exec, num_rows};
-    permutation.fill(-one<IndexType>());
-    inv_permutation.fill(-one<IndexType>());
+    permutation.fill(invalid_index<IndexType>());
+    inv_permutation.fill(invalid_index<IndexType>());
 
     const auto row_ptrs = mtx->get_const_row_ptrs();
     const auto col_idxs = mtx->get_const_col_idxs();
 
-    exec->run(make_initialize_weights(mtx.get(), weights, dual_u, distance,
-                                      row_maxima, parameters_.strategy));
+    exec->run(make_initialize_weights(mtx.get(), weights, dual_u, row_maxima,
+                                      parameters_.strategy));
 
     // Compute an initial maximum matching from the nonzero entries for which
     // the reduced weight (W(i, j) - u(j) - v(i)) is zero. Here, W is the
diff --git a/core/reorder/mc64.hpp b/core/reorder/mc64.hpp
index 41ac9a5b321..5ec0e94cd57 100644
--- a/core/reorder/mc64.hpp
+++ b/core/reorder/mc64.hpp
@@ -50,45 +50,58 @@ namespace reorder {
 namespace mc64 {
 
 
-template <typename ValueType, typename IndexType>
-void initialize_weights(const matrix::Csr<ValueType, IndexType>* mtx,
-                        array<remove_complex<ValueType>>& weights_array,
-                        array<remove_complex<ValueType>>& dual_u_array,
-                        array<remove_complex<ValueType>>& distance_array,
-                        array<remove_complex<ValueType>>& row_maxima_array,
-                        gko::experimental::reorder::mc64_strategy strategy);
+#define GKO_DECLARE_MC64_INITIALIZE_WEIGHTS(ValueType, IndexType) \
+    void initialize_weights(                                      \
+        const matrix::Csr<ValueType, IndexType>* mtx,             \
+        array<remove_complex<ValueType>>& weights_array,          \
+        array<remove_complex<ValueType>>& dual_u_array,           \
+        array<remove_complex<ValueType>>& row_maxima_array,       \
+        gko::experimental::reorder::mc64_strategy strategy)
+
+#define GKO_DECLARE_MC64_INITIAL_MATCHING(ValueType, IndexType)              \
+    void initial_matching(                                                   \
+        size_type num_rows, const IndexType* row_ptrs,                       \
+        const IndexType* col_idxs, const array<ValueType>& weights_array,    \
+        const array<ValueType>& dual_u_array, array<IndexType>& permutation, \
+        array<IndexType>& inv_permutation,                                   \
+        array<IndexType>& matched_idxs_array,                                \
+        array<IndexType>& unmatched_rows_array, ValueType tolerance)
+
+#define GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH(ValueType, IndexType)   \
+    void shortest_augmenting_path(                                        \
+        size_type num_rows, const IndexType* row_ptrs,                    \
+        const IndexType* col_idxs, array<ValueType>& weights_array,       \
+        array<ValueType>& dual_u_array, array<ValueType>& distance_array, \
+        array<IndexType>& permutation, array<IndexType>& inv_permutation, \
+        IndexType root, array<IndexType>& parents_array,                  \
+        array<IndexType>& generation_array,                               \
+        array<IndexType>& marked_cols_array,                              \
+        array<IndexType>& matched_idxs_array,                             \
+        addressable_priority_queue<ValueType, IndexType>& queue,          \
+        std::vector<IndexType>& q_j, ValueType tolerance)
+
+#define GKO_DECLARE_MC64_COMPUTE_SCALING(ValueType, IndexType)              \
+    void compute_scaling(                                                   \
+        const matrix::Csr<ValueType, IndexType>* mtx,                       \
+        const array<remove_complex<ValueType>>& weights_array,              \
+        const array<remove_complex<ValueType>>& dual_u_array,               \
+        const array<remove_complex<ValueType>>& row_maxima_array,           \
+        const array<IndexType>& permutation,                                \
+        const array<IndexType>& matched_idxs_array, mc64_strategy strategy, \
+        ValueType* row_scaling, ValueType* col_scaling)
 
 
 template <typename ValueType, typename IndexType>
-void initial_matching(
-    size_type num_rows, const IndexType* row_ptrs, const IndexType* col_idxs,
-    const array<ValueType>& weights_array, const array<ValueType>& dual_u_array,
-    array<IndexType>& permutation, array<IndexType>& inv_permutation,
-    array<IndexType>& matched_idxs_array,
-    array<IndexType>& unmatched_rows_array, ValueType tolerance);
-
+GKO_DECLARE_MC64_INITIALIZE_WEIGHTS(ValueType, IndexType);
 
 template <typename ValueType, typename IndexType>
-void shortest_augmenting_path(
-    size_type num_rows, const IndexType* row_ptrs, const IndexType* col_idxs,
-    array<ValueType>& weights_array, array<ValueType>& dual_u_array,
-    array<ValueType>& distance_array, array<IndexType>& permutation,
-    array<IndexType>& inv_permutation, IndexType root,
-    array<IndexType>& parents_array, array<IndexType>& generation_array,
-    array<IndexType>& marked_cols_array, array<IndexType>& matched_idxs_array,
-    addressable_priority_queue<ValueType, IndexType>& Q,
-    std::vector<IndexType>& q_j, ValueType tolerance);
+GKO_DECLARE_MC64_INITIAL_MATCHING(ValueType, IndexType);
 
+template <typename ValueType, typename IndexType>
+GKO_DECLARE_MC64_SHORTEST_AUGMENTING_PATH(ValueType, IndexType);
 
 template <typename ValueType, typename IndexType>
-void compute_scaling(const matrix::Csr<ValueType, IndexType>* mtx,
-                     const array<remove_complex<ValueType>>& weights_array,
-                     const array<remove_complex<ValueType>>& dual_u_array,
-                     const array<remove_complex<ValueType>>& row_maxima_array,
-                     const array<IndexType>& permutation,
-                     const array<IndexType>& matched_idxs_array,
-                     mc64_strategy strategy, ValueType* row_scaling,
-                     ValueType* col_scaling);
+GKO_DECLARE_MC64_COMPUTE_SCALING(ValueType, IndexType);
 
 
 }  // namespace mc64
diff --git a/core/test/components/addressable_pq.cpp b/core/test/components/addressable_pq.cpp
index 4351b7fc590..87bb40e6570 100644
--- a/core/test/components/addressable_pq.cpp
+++ b/core/test/components/addressable_pq.cpp
@@ -56,8 +56,8 @@ class AddressablePriorityQueue : public ::testing::Test {
         typename std::tuple_element<0, decltype(ValueIndexType())>::type;
     using index_type =
         typename std::tuple_element<1, decltype(ValueIndexType())>::type;
-    using pq_type2 = gko::addressable_priority_queue<value_type, index_type, 1>;
-    using pq_type4 = gko::addressable_priority_queue<value_type, index_type, 2>;
+    using pq_type2 = gko::addressable_priority_queue<value_type, index_type, 2>;
+    using pq_type4 = gko::addressable_priority_queue<value_type, index_type, 4>;
 
     AddressablePriorityQueue() : exec(gko::ReferenceExecutor::create()) {}
 
@@ -66,7 +66,7 @@ class AddressablePriorityQueue : public ::testing::Test {
     {
         ASSERT_FALSE(pq.empty());
         ASSERT_EQ(pq.min_key(), key);
-        ASSERT_EQ(pq.min_val(), val);
+        ASSERT_EQ(pq.min_node(), val);
         ASSERT_TRUE((pq.min() == std::pair<value_type, index_type>{key, val}));
     }
 
diff --git a/cuda/test/reorder/CMakeLists.txt b/cuda/test/reorder/CMakeLists.txt
index b3abcc16d3f..79deba957b3 100644
--- a/cuda/test/reorder/CMakeLists.txt
+++ b/cuda/test/reorder/CMakeLists.txt
@@ -1,2 +1 @@
-ginkgo_create_test(mc64_kernels RESOURCE_TYPE cudagpu)
 ginkgo_create_test(rcm_kernels RESOURCE_TYPE cudagpu)
diff --git a/include/ginkgo/core/reorder/mc64.hpp b/include/ginkgo/core/reorder/mc64.hpp
index 6ef52b681d6..6d323934df5 100644
--- a/include/ginkgo/core/reorder/mc64.hpp
+++ b/include/ginkgo/core/reorder/mc64.hpp
@@ -84,28 +84,25 @@ enum class mc64_strategy { max_diagonal_product, max_diagonal_sum };
  * weights supported:
  *  - Maximizing the product of the absolute values on the diagonal.
  *    For this strategy, the weights are computed as
- *      $c(i, j) = \log_2(a_i) - \log_2(|a(i, j)|)$ if $a(i, j) \neq 0 $ and
- * $c(i, j) = \infty$ otherwise. Here, a_i is the maximum absolute value in row
- * i of the matrix A. In this case, the implementation computes a row
- * permutation P and row and column scaling coefficients L and R such that the
- * matrix P*L*A*R has values with unity absolute value on the diagonal and
- * smaller or equal entries everywhere else.
+ *    $c(i, j) = \log_2(a_i) - \log_2(|a(i, j)|)$ if $a(i, j) \neq 0 $ and
+ *    $c(i, j) = \infty$ otherwise. Here, a_i is the maximum absolute value in
+ *    row i of the matrix A. In this case, the implementation computes a row
+ *    permutation P and row and column scaling coefficients L and R such that
+ *    the matrix P*L*A*R has values with unity absolute value on the diagonal
+ *    and smaller or equal entries everywhere else.
  *  - Maximizing the sum of the absolute values on the diagonal.
  *    For this strategy, the weights are computed as
- *      $c(i, j) = a_i - |a(i, j)|$ if $a(i, j) \neq 0$ and $c(i, j) =
- * \infty$ otherwise. In this case, no scaling coefficients are computed.
+ *    $c(i, j) = a_i - |a(i, j)|$ if $a(i, j) \neq 0$ and $c(i, j) =
+ *    \infty$ otherwise. In this case, no scaling coefficients are computed.
  *
- * @note  This class is derived from polymorphic object but is not a LinOp as it
- * does not make sense for this class to implement the apply methods. The
- * objective of this class is to generate a reordering/permutation vector (in
- * the form of the Permutation matrix), which can be used to apply to reorder a
- * matrix as required.
+ * This class creates a Combination of two ScaledPermutations representing the
+ * row and column permutation and scaling factors computed by this algorithm.
  *
  * @tparam ValueType  Type of the values of all matrices used in this class
  * @tparam IndexType  Type of the indices of all matrices used in this class
  */
 template <typename ValueType = default_precision, typename IndexType = int32>
-class Mc64
+class Mc64 final
     : public EnablePolymorphicObject<Mc64<ValueType, IndexType>, LinOpFactory>,
       public EnablePolymorphicAssignment<Mc64<ValueType, IndexType>> {
 public:
@@ -156,7 +153,7 @@ class Mc64
     /** Creates a new parameter_type to set up the factory. */
     static parameters_type build() { return {}; }
 
-protected:
+private:
     explicit Mc64(std::shared_ptr<const Executor> exec,
                   const parameters_type& params = {});
 
diff --git a/reference/test/reorder/mc64.cpp b/reference/test/reorder/mc64.cpp
index 9351c1e6642..1f508d324a7 100644
--- a/reference/test/reorder/mc64.cpp
+++ b/reference/test/reorder/mc64.cpp
@@ -73,17 +73,10 @@ class Mc64 : public ::testing::Test {
     Mc64()
         : exec(gko::ReferenceExecutor::create()),
           mc64_factory(reorder_type::build().on(exec)),
-          // clang-format off
           id3_mtx(gko::initialize<CsrMtx>(
-              {{1.0, 0.0, 0.0}, 
-              {0.0, 1.0, 0.0}, 
-              {0.0, 0.0, 1.0}}, exec)),
+              {{1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0.0, 0.0, 1.0}}, exec)),
           not_id3_mtx(gko::initialize<CsrMtx>(
-              {{1.0, 0.0, 2.0}, 
-              {0.0, 1.0, 0.0}, 
-              {2.0, 0.0, 1.0}}, exec)),
-          // clang-format on
-          reorder_op(mc64_factory->generate(id3_mtx))
+              {{1.0, 0.0, 2.0}, {0.0, 1.0, 0.0}, {2.0, 0.0, 1.0}}, exec))
     {}
 
     std::pair<std::shared_ptr<const perm_type>,
@@ -108,7 +101,6 @@ class Mc64 : public ::testing::Test {
     std::shared_ptr<CsrMtx> id3_mtx;
     std::shared_ptr<CsrMtx> not_id3_mtx;
     std::unique_ptr<reorder_type> mc64_factory;
-    std::unique_ptr<result_type> reorder_op;
 };
 
 TYPED_TEST_SUITE(Mc64, gko::test::ValueIndexTypes, PairTypenameNameGenerator);
@@ -133,9 +125,12 @@ TYPED_TEST(Mc64, CanBeCreatedWithReorderingStrategy)
             .with_strategy(
                 gko::experimental::reorder::mc64_strategy::max_diagonal_sum)
             .on(this->exec)
-            ->generate(this->id3_mtx);
+            ->generate(this->not_id3_mtx);
 
-    this->assert_correct_permutation(mc64.get());
+    auto perm = this->unpack(mc64.get()).first->get_const_permutation();
+    ASSERT_EQ(perm[0], 2);
+    ASSERT_EQ(perm[1], 1);
+    ASSERT_EQ(perm[2], 0);
 }
 
 
diff --git a/reference/test/reorder/mc64_kernels.cpp b/reference/test/reorder/mc64_kernels.cpp
index d3adf599ce8..6e19cfaee45 100644
--- a/reference/test/reorder/mc64_kernels.cpp
+++ b/reference/test/reorder/mc64_kernels.cpp
@@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <algorithm>
 #include <cmath>
 #include <fstream>
+#include <limits>
 #include <memory>
 
 
@@ -144,7 +145,6 @@ class Mc64 : public ::testing::Test {
                                           0., 2., 3., 0.}},
           final_dual_u{ref, I<real_type>{0., 1., -1., -2., 0., 0.}},
           final_distance{ref, I<real_type>{inf, inf, 1., 0., inf, 1.}},
-          final_row_maxima{ref, I<real_type>{3., 5., 6., 4., 4., 8.}},
           tolerance{10 * std::numeric_limits<real_type>::epsilon()}
     {}
 
@@ -172,7 +172,6 @@ class Mc64 : public ::testing::Test {
     gko::array<real_type> final_weights;
     gko::array<real_type> final_dual_u;
     gko::array<real_type> final_distance;
-    gko::array<real_type> final_row_maxima;
     gko::array<index_type> empty_permutation;
     gko::array<index_type> empty_inverse_permutation;
     gko::array<index_type> empty_matched_idxs;
@@ -199,28 +198,30 @@ TYPED_TEST_SUITE(Mc64, gko::test::ValueIndexTypes, PairTypenameNameGenerator);
 
 TYPED_TEST(Mc64, InitializeWeightsSum)
 {
+    this->dual_u.fill(
+        std::numeric_limits<typename TestFixture::real_type>::infinity());
+
     gko::experimental::reorder::mc64::initialize_weights(
-        this->mtx.get(), this->weights, this->dual_u, this->distance,
-        this->row_maxima,
+        this->mtx.get(), this->weights, this->dual_u, this->row_maxima,
         gko::experimental::reorder::mc64_strategy::max_diagonal_sum);
 
     GKO_ASSERT_ARRAY_EQ(this->weights, this->initialized_weights_sum);
     GKO_ASSERT_ARRAY_EQ(this->dual_u, this->initialized_dual_u_sum);
-    GKO_ASSERT_ARRAY_EQ(this->distance, this->initialized_distance);
     GKO_ASSERT_ARRAY_EQ(this->row_maxima, this->initialized_row_maxima_sum);
 }
 
 
 TYPED_TEST(Mc64, InitializeWeightsProduct)
 {
+    this->dual_u.fill(
+        std::numeric_limits<typename TestFixture::real_type>::infinity());
+
     gko::experimental::reorder::mc64::initialize_weights(
-        this->mtx.get(), this->weights, this->dual_u, this->distance,
-        this->row_maxima,
+        this->mtx.get(), this->weights, this->dual_u, this->row_maxima,
         gko::experimental::reorder::mc64_strategy::max_diagonal_product);
 
     GKO_ASSERT_ARRAY_EQ(this->weights, this->initialized_weights_product);
     GKO_ASSERT_ARRAY_EQ(this->dual_u, this->initialized_dual_u_product);
-    GKO_ASSERT_ARRAY_EQ(this->distance, this->initialized_distance);
     GKO_ASSERT_ARRAY_EQ(this->row_maxima, this->initialized_row_maxima_product);
 }
 
@@ -305,10 +306,7 @@ TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingExampleSum)
 TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingExampleProduct)
 {
     using index_type = typename TestFixture::index_type;
-    using real_type = typename TestFixture::real_type;
     using value_type = typename TestFixture::value_type;
-    using matrix_type = typename TestFixture::matrix_type;
-    using perm_type = typename TestFixture::perm_type;
     auto mc64_factory =
         gko::experimental::reorder::Mc64<value_type, index_type>::build()
             .with_strategy(

From 32ddaf2853b18650f1916534ff091ed83bd48edc Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Mon, 6 Nov 2023 04:33:53 -0500
Subject: [PATCH 571/583] handle empty inputs

---
 core/reorder/mc64.cpp           | 42 ++++++++++++++++++---------------
 reference/test/reorder/mc64.cpp | 16 +++++++++++++
 2 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/core/reorder/mc64.cpp b/core/reorder/mc64.cpp
index 48588ad792f..b9b8daf382e 100644
--- a/core/reorder/mc64.cpp
+++ b/core/reorder/mc64.cpp
@@ -580,25 +580,29 @@ std::unique_ptr<LinOp> Mc64<ValueType, IndexType>::generate_impl(
     const auto row_ptrs = mtx->get_const_row_ptrs();
     const auto col_idxs = mtx->get_const_col_idxs();
 
-    exec->run(make_initialize_weights(mtx.get(), weights, dual_u, row_maxima,
-                                      parameters_.strategy));
-
-    // Compute an initial maximum matching from the nonzero entries for which
-    // the reduced weight (W(i, j) - u(j) - v(i)) is zero. Here, W is the
-    // weight matrix and u and v are the dual vectors. Note that v initially
-    // only contains zeros and hence can still be ignored here.
-    exec->run(make_initial_matching(
-        num_rows, row_ptrs, col_idxs, weights, dual_u, permutation,
-        inv_permutation, matched_idxs, unmatched_rows, parameters_.tolerance));
-
-    exec->run(make_augment_matching(
-        mtx.get(), weights, dual_u, distance, permutation, inv_permutation,
-        unmatched_rows, parents, generation, marked_cols, matched_idxs,
-        this->get_parameters().tolerance));
-
-    exec->run(make_compute_scaling(
-        mtx.get(), weights, dual_u, row_maxima, permutation, matched_idxs,
-        parameters_.strategy, row_scaling.get_data(), col_scaling.get_data()));
+    if (num_rows > 0) {
+        exec->run(make_initialize_weights(mtx.get(), weights, dual_u,
+                                          row_maxima, parameters_.strategy));
+
+        // Compute an initial maximum matching from the nonzero entries for
+        // which the reduced weight (W(i, j) - u(j) - v(i)) is zero. Here, W is
+        // the weight matrix and u and v are the dual vectors. Note that v
+        // initially only contains zeros and hence can still be ignored here.
+        exec->run(make_initial_matching(num_rows, row_ptrs, col_idxs, weights,
+                                        dual_u, permutation, inv_permutation,
+                                        matched_idxs, unmatched_rows,
+                                        parameters_.tolerance));
+
+        exec->run(make_augment_matching(
+            mtx.get(), weights, dual_u, distance, permutation, inv_permutation,
+            unmatched_rows, parents, generation, marked_cols, matched_idxs,
+            this->get_parameters().tolerance));
+
+        exec->run(make_compute_scaling(
+            mtx.get(), weights, dual_u, row_maxima, permutation, matched_idxs,
+            parameters_.strategy, row_scaling.get_data(),
+            col_scaling.get_data()));
+    }
 
     array<index_type> identity_permutation{exec, num_rows};
     exec->run(make_fill_seq_array(identity_permutation.get_data(), num_rows));
diff --git a/reference/test/reorder/mc64.cpp b/reference/test/reorder/mc64.cpp
index 1f508d324a7..e06d040e48e 100644
--- a/reference/test/reorder/mc64.cpp
+++ b/reference/test/reorder/mc64.cpp
@@ -134,6 +134,22 @@ TYPED_TEST(Mc64, CanBeCreatedWithReorderingStrategy)
 }
 
 
+TYPED_TEST(Mc64, CanBeCreatedEmpty)
+{
+    using reorder_type = typename TestFixture::reorder_type;
+    using matrix_type = typename TestFixture::CsrMtx;
+
+    auto mc64 =
+        reorder_type::build()
+            .with_strategy(
+                gko::experimental::reorder::mc64_strategy::max_diagonal_sum)
+            .on(this->exec)
+            ->generate(matrix_type::create(this->exec));
+
+    ASSERT_FALSE(mc64->get_size());
+}
+
+
 TYPED_TEST(Mc64, CanBeCreatedWithTolerance)
 {
     using reorder_type = typename TestFixture::reorder_type;

From 546a9bd77d2730c060cb0fc251d6451ece7a1eae Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Mon, 6 Nov 2023 08:35:47 -0500
Subject: [PATCH 572/583] disable MSVC debug build

---
 .github/workflows/windows-msvc-ref.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/windows-msvc-ref.yml b/.github/workflows/windows-msvc-ref.yml
index f7d73e2fd82..d8759738ecd 100644
--- a/.github/workflows/windows-msvc-ref.yml
+++ b/.github/workflows/windows-msvc-ref.yml
@@ -27,7 +27,8 @@ jobs:
       fail-fast: false
       matrix:
         config:
-        - {shared: "ON", build_type: "Debug", name: "reference/debug/shared"}
+        # Debug shared exceeds symbol limit
+        # - {shared: "ON", build_type: "Debug", name: "reference/debug/shared"}
         - {shared: "OFF", build_type: "Release", name: "reference/release/static"}
         # Debug static needs too much storage
         # - {shared: "OFF", build_type: "Debug", name: "reference/debug/static"}

From 6aaef34a9c752e0187d110c165baa17bebe7a309 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Mon, 6 Nov 2023 08:36:30 -0500
Subject: [PATCH 573/583] add MSVC release shared

---
 .github/workflows/windows-msvc-ref.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/windows-msvc-ref.yml b/.github/workflows/windows-msvc-ref.yml
index d8759738ecd..87f9214876d 100644
--- a/.github/workflows/windows-msvc-ref.yml
+++ b/.github/workflows/windows-msvc-ref.yml
@@ -30,6 +30,7 @@ jobs:
         # Debug shared exceeds symbol limit
         # - {shared: "ON", build_type: "Debug", name: "reference/debug/shared"}
         - {shared: "OFF", build_type: "Release", name: "reference/release/static"}
+        - {shared: "ON", build_type: "Release", name: "reference/release/shared"}
         # Debug static needs too much storage
         # - {shared: "OFF", build_type: "Debug", name: "reference/debug/static"}
     name: msvc/${{ matrix.config.name }}

From cdb1bbf6585cb85f6a65100bba19c9a5fcaff738 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Mon, 6 Nov 2023 08:51:05 -0500
Subject: [PATCH 574/583] test mc64 float arrays only approximately

---
 reference/test/reorder/mc64_kernels.cpp | 41 +++++++++++++++++++------
 1 file changed, 32 insertions(+), 9 deletions(-)

diff --git a/reference/test/reorder/mc64_kernels.cpp b/reference/test/reorder/mc64_kernels.cpp
index 6e19cfaee45..06d59ed34df 100644
--- a/reference/test/reorder/mc64_kernels.cpp
+++ b/reference/test/reorder/mc64_kernels.cpp
@@ -156,6 +156,20 @@ class Mc64 : public ::testing::Test {
                               gko::as<perm_type>(result->get_operators()[1]));
     }
 
+    void assert_array_near(const gko::array<real_type>& a,
+                           const gko::array<real_type>& b, std::string name)
+    {
+        ASSERT_EQ(a.get_num_elems(), b.get_num_elems());
+        for (gko::size_type i = 0; i < a.get_num_elems(); i++) {
+            if (std::isfinite(a.get_const_data()[i]) ||
+                std::isfinite(b.get_const_data()[i])) {
+                ASSERT_NEAR(a.get_const_data()[i], b.get_const_data()[i],
+                            r<value_type>::value)
+                    << name << '[' << i << ']';
+            }
+        }
+    }
+
     std::shared_ptr<const gko::ReferenceExecutor> ref;
     gko::array<real_type> tmp;
     gko::array<real_type> weights;
@@ -205,9 +219,12 @@ TYPED_TEST(Mc64, InitializeWeightsSum)
         this->mtx.get(), this->weights, this->dual_u, this->row_maxima,
         gko::experimental::reorder::mc64_strategy::max_diagonal_sum);
 
-    GKO_ASSERT_ARRAY_EQ(this->weights, this->initialized_weights_sum);
-    GKO_ASSERT_ARRAY_EQ(this->dual_u, this->initialized_dual_u_sum);
-    GKO_ASSERT_ARRAY_EQ(this->row_maxima, this->initialized_row_maxima_sum);
+    this->assert_array_near(this->weights, this->initialized_weights_sum,
+                            "weights");
+    this->assert_array_near(this->dual_u, this->initialized_dual_u_sum,
+                            "dual_u");
+    this->assert_array_near(this->row_maxima, this->initialized_row_maxima_sum,
+                            "row_maxima");
 }
 
 
@@ -220,9 +237,12 @@ TYPED_TEST(Mc64, InitializeWeightsProduct)
         this->mtx.get(), this->weights, this->dual_u, this->row_maxima,
         gko::experimental::reorder::mc64_strategy::max_diagonal_product);
 
-    GKO_ASSERT_ARRAY_EQ(this->weights, this->initialized_weights_product);
-    GKO_ASSERT_ARRAY_EQ(this->dual_u, this->initialized_dual_u_product);
-    GKO_ASSERT_ARRAY_EQ(this->row_maxima, this->initialized_row_maxima_product);
+    this->assert_array_near(this->weights, this->initialized_weights_product,
+                            "weights");
+    this->assert_array_near(this->dual_u, this->initialized_dual_u_product,
+                            "dual_u");
+    this->assert_array_near(this->row_maxima,
+                            this->initialized_row_maxima_product, "row_maxima");
 }
 
 
@@ -273,9 +293,12 @@ TYPED_TEST(Mc64, ShortestAugmentingPath)
     GKO_ASSERT_ARRAY_EQ(this->initial_generation, this->final_generation);
     GKO_ASSERT_ARRAY_EQ(this->initial_marked_cols, this->final_marked_cols);
     GKO_ASSERT_ARRAY_EQ(this->initial_matched_idxs, this->final_matched_idxs);
-    GKO_ASSERT_ARRAY_EQ(this->initialized_weights_sum, this->final_weights);
-    GKO_ASSERT_ARRAY_EQ(this->initialized_dual_u_sum, this->final_dual_u);
-    GKO_ASSERT_ARRAY_EQ(this->initialized_distance, this->final_distance);
+    this->assert_array_near(this->initialized_weights_sum, this->final_weights,
+                            "weights");
+    this->assert_array_near(this->initialized_dual_u_sum, this->final_dual_u,
+                            "dual_u");
+    this->assert_array_near(this->initialized_distance, this->final_distance,
+                            "distance");
 }
 
 

From a73c666f64840875a5a767a346b926d2afb5006b Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <mail@ribizel.de>
Date: Mon, 6 Nov 2023 11:23:12 -0500
Subject: [PATCH 575/583] fix link, clarify variables in mc64_kernels test

---
 reference/test/reorder/mc64_kernels.cpp | 47 ++++++++++++++-----------
 1 file changed, 26 insertions(+), 21 deletions(-)

diff --git a/reference/test/reorder/mc64_kernels.cpp b/reference/test/reorder/mc64_kernels.cpp
index 06d59ed34df..35db2ca95a8 100644
--- a/reference/test/reorder/mc64_kernels.cpp
+++ b/reference/test/reorder/mc64_kernels.cpp
@@ -68,8 +68,12 @@ class Mc64 : public ::testing::Test {
     using real_type = gko::remove_complex<value_type>;
     using matrix_type = gko::matrix::Csr<value_type, index_type>;
     using perm_type = gko::matrix::ScaledPermutation<value_type, index_type>;
-    static constexpr auto inf = std::numeric_limits<real_type>::infinity();
-    static constexpr real_type tol = 1e-14;
+    // this is a constexpr member functions to avoid having to export the symbol
+    // for the constant variable
+    static constexpr real_type inf()
+    {
+        return std::numeric_limits<real_type>::infinity();
+    }
 
     Mc64()
         : ref(gko::ReferenceExecutor::create()),
@@ -122,7 +126,8 @@ class Mc64 : public ::testing::Test {
                                 static_cast<real_type>(std::log2(4.)),
                                 static_cast<real_type>(std::log2(4.)),
                                 static_cast<real_type>(std::log2(8.))}},
-          initialized_distance{ref, I<real_type>{inf, inf, inf, inf, inf, inf}},
+          initialized_distance{
+              ref, I<real_type>{inf(), inf(), inf(), inf(), inf(), inf()}},
           empty_permutation{ref, I<index_type>{-1, -1, -1, -1, -1, -1}},
           empty_inverse_permutation{ref, I<index_type>{-1, -1, -1, -1, -1, -1}},
           empty_matched_idxs{ref, I<index_type>{0, 0, 0, 0, 0, 0}},
@@ -144,8 +149,8 @@ class Mc64 : public ::testing::Test {
           final_weights{ref, I<real_type>{2., 1., 0., 0., 4., 0., 2., 0., 1.,
                                           0., 2., 3., 0.}},
           final_dual_u{ref, I<real_type>{0., 1., -1., -2., 0., 0.}},
-          final_distance{ref, I<real_type>{inf, inf, 1., 0., inf, 1.}},
-          tolerance{10 * std::numeric_limits<real_type>::epsilon()}
+          final_distance{ref, I<real_type>{inf(), inf(), 1., 0., inf(), 1.}},
+          zero_tol{1e-14}
     {}
 
     std::pair<std::shared_ptr<const perm_type>,
@@ -204,7 +209,7 @@ class Mc64 : public ::testing::Test {
     gko::array<index_type> final_marked_cols;
     gko::array<index_type> final_matched_idxs;
     std::shared_ptr<matrix_type> mtx;
-    const real_type tolerance;
+    const real_type zero_tol;
 };
 
 TYPED_TEST_SUITE(Mc64, gko::test::ValueIndexTypes, PairTypenameNameGenerator);
@@ -255,7 +260,7 @@ TYPED_TEST(Mc64, InitialMatching)
         this->mtx->get_const_col_idxs(), this->initialized_weights_sum,
         this->initialized_dual_u_sum, this->empty_permutation,
         this->empty_inverse_permutation, this->empty_matched_idxs,
-        this->empty_unmatched_rows, this->tol);
+        this->empty_unmatched_rows, this->zero_tol);
 
     GKO_ASSERT_ARRAY_EQ(this->empty_permutation,
                         this->initial_matching_permutation);
@@ -283,7 +288,7 @@ TYPED_TEST(Mc64, ShortestAugmentingPath)
         this->initial_matching_inverse_permutation, 4 * gko::one<index_type>(),
         this->initial_parents, this->initial_generation,
         this->initial_marked_cols, this->initial_matched_idxs, Q, q_j,
-        this->tol);
+        this->zero_tol);
 
     GKO_ASSERT_ARRAY_EQ(this->initial_matching_permutation,
                         this->final_permutation);
@@ -349,18 +354,18 @@ TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingExampleProduct)
     ASSERT_EQ(perm[3], 4);
     ASSERT_EQ(perm[4], 0);
     ASSERT_EQ(perm[5], 2);
-    GKO_ASSERT_NEAR(row_scaling[0], value_type{1. / 3.}, this->tolerance);
-    GKO_ASSERT_NEAR(row_scaling[1], value_type{0.2}, this->tolerance);
-    GKO_ASSERT_NEAR(row_scaling[2], value_type{0.2}, this->tolerance);
-    GKO_ASSERT_NEAR(row_scaling[3], value_type{4. / 15.}, this->tolerance);
-    GKO_ASSERT_NEAR(row_scaling[4], value_type{0.3}, this->tolerance);
-    GKO_ASSERT_NEAR(row_scaling[5], value_type{2. / 15.}, this->tolerance);
-    GKO_ASSERT_NEAR(col_scaling[0], value_type{1.}, this->tolerance);
-    GKO_ASSERT_NEAR(col_scaling[1], value_type{1.5}, this->tolerance);
-    GKO_ASSERT_NEAR(col_scaling[2], value_type{0.9375}, this->tolerance);
-    GKO_ASSERT_NEAR(col_scaling[3], value_type{5. / 6.}, this->tolerance);
-    GKO_ASSERT_NEAR(col_scaling[4], value_type{1.}, this->tolerance);
-    GKO_ASSERT_NEAR(col_scaling[5], value_type{1.25}, this->tolerance);
+    GKO_ASSERT_NEAR(row_scaling[0], value_type{1. / 3.}, r<value_type>::value);
+    GKO_ASSERT_NEAR(row_scaling[1], value_type{0.2}, r<value_type>::value);
+    GKO_ASSERT_NEAR(row_scaling[2], value_type{0.2}, r<value_type>::value);
+    GKO_ASSERT_NEAR(row_scaling[3], value_type{4. / 15.}, r<value_type>::value);
+    GKO_ASSERT_NEAR(row_scaling[4], value_type{0.3}, r<value_type>::value);
+    GKO_ASSERT_NEAR(row_scaling[5], value_type{2. / 15.}, r<value_type>::value);
+    GKO_ASSERT_NEAR(col_scaling[0], value_type{1.}, r<value_type>::value);
+    GKO_ASSERT_NEAR(col_scaling[1], value_type{1.5}, r<value_type>::value);
+    GKO_ASSERT_NEAR(col_scaling[2], value_type{0.9375}, r<value_type>::value);
+    GKO_ASSERT_NEAR(col_scaling[3], value_type{5. / 6.}, r<value_type>::value);
+    GKO_ASSERT_NEAR(col_scaling[4], value_type{1.}, r<value_type>::value);
+    GKO_ASSERT_NEAR(col_scaling[5], value_type{1.25}, r<value_type>::value);
 }
 
 
@@ -389,7 +394,7 @@ TYPED_TEST(Mc64, CreatesCorrectPermutationAndScalingLargeTrivialExampleProduct)
 
     mtx = mtx->scale_permute(row_perm, col_perm);
 
-    GKO_ASSERT_MTX_NEAR(mtx, expected_result, this->tolerance);
+    GKO_ASSERT_MTX_NEAR(mtx, expected_result, r<value_type>::value);
 }
 
 

From 96dc859784e88853e69295341aa790125a7c3795 Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Mon, 6 Nov 2023 17:06:37 -0500
Subject: [PATCH 576/583] fix format error in master

---
 core/distributed/vector.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/core/distributed/vector.cpp b/core/distributed/vector.cpp
index b61d5c36328..8b6b60f0963 100644
--- a/core/distributed/vector.cpp
+++ b/core/distributed/vector.cpp
@@ -30,10 +30,12 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include <ginkgo/core/distributed/partition.hpp>
 #include <ginkgo/core/distributed/vector.hpp>
 
 
+#include <ginkgo/core/distributed/partition.hpp>
+
+
 #include "core/distributed/vector_kernels.hpp"
 #include "core/matrix/dense_kernels.hpp"
 

From a59d72bbc7ddfd4146ee7a015bcedc00b9ebdb6b Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Mon, 6 Nov 2023 17:57:47 -0500
Subject: [PATCH 577/583] Change the benchmark tests stderr to include (master)

---
 benchmark/test/reference/blas.profile.stderr                  | 4 ++--
 benchmark/test/reference/blas.simple.stderr                   | 4 ++--
 benchmark/test/reference/conversion.all.stderr                | 4 ++--
 benchmark/test/reference/conversion.matrix.stderr             | 4 ++--
 benchmark/test/reference/conversion.profile.stderr            | 4 ++--
 benchmark/test/reference/conversion.simple.stderr             | 4 ++--
 benchmark/test/reference/distributed_solver.matrix.stderr     | 4 ++--
 benchmark/test/reference/distributed_solver.profile.stderr    | 4 ++--
 benchmark/test/reference/distributed_solver.simple.stderr     | 4 ++--
 benchmark/test/reference/matrix_statistics.matrix.stderr      | 4 ++--
 benchmark/test/reference/matrix_statistics.simple.stderr      | 4 ++--
 .../test/reference/multi_vector_distributed.profile.stderr    | 4 ++--
 .../test/reference/multi_vector_distributed.simple.stderr     | 4 ++--
 benchmark/test/reference/preconditioner.matrix.stderr         | 4 ++--
 benchmark/test/reference/preconditioner.profile.stderr        | 4 ++--
 benchmark/test/reference/preconditioner.reordered.stderr      | 4 ++--
 benchmark/test/reference/preconditioner.simple.stderr         | 4 ++--
 benchmark/test/reference/solver.matrix.stderr                 | 4 ++--
 benchmark/test/reference/solver.profile.stderr                | 4 ++--
 benchmark/test/reference/solver.reordered.stderr              | 4 ++--
 benchmark/test/reference/solver.simple.stderr                 | 4 ++--
 benchmark/test/reference/sparse_blas.matrix.stderr            | 4 ++--
 benchmark/test/reference/sparse_blas.profile.stderr           | 4 ++--
 benchmark/test/reference/sparse_blas.reordered.stderr         | 4 ++--
 benchmark/test/reference/sparse_blas.simple.stderr            | 4 ++--
 benchmark/test/reference/spmv.matrix.stderr                   | 4 ++--
 benchmark/test/reference/spmv.profile.stderr                  | 4 ++--
 benchmark/test/reference/spmv.reordered.stderr                | 4 ++--
 benchmark/test/reference/spmv.simple.stderr                   | 4 ++--
 benchmark/test/reference/spmv_distributed.profile.stderr      | 4 ++--
 benchmark/test/reference/spmv_distributed.simple.stderr       | 4 ++--
 31 files changed, 62 insertions(+), 62 deletions(-)

diff --git a/benchmark/test/reference/blas.profile.stderr b/benchmark/test/reference/blas.profile.stderr
index 7307fb0ad7e..e156d489be3 100644
--- a/benchmark/test/reference/blas.profile.stderr
+++ b/benchmark/test/reference/blas.profile.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/blas.simple.stderr b/benchmark/test/reference/blas.simple.stderr
index 966ed597166..7c5e66b9188 100644
--- a/benchmark/test/reference/blas.simple.stderr
+++ b/benchmark/test/reference/blas.simple.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/conversion.all.stderr b/benchmark/test/reference/conversion.all.stderr
index f6f1002e443..37c88fd8b86 100644
--- a/benchmark/test/reference/conversion.all.stderr
+++ b/benchmark/test/reference/conversion.all.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/conversion.matrix.stderr b/benchmark/test/reference/conversion.matrix.stderr
index 5e7bd1cce24..c828fe11267 100644
--- a/benchmark/test/reference/conversion.matrix.stderr
+++ b/benchmark/test/reference/conversion.matrix.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/conversion.profile.stderr b/benchmark/test/reference/conversion.profile.stderr
index b25fb4d42ee..417c7bd71e7 100644
--- a/benchmark/test/reference/conversion.profile.stderr
+++ b/benchmark/test/reference/conversion.profile.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/conversion.simple.stderr b/benchmark/test/reference/conversion.simple.stderr
index 53777a4fc53..317330a2334 100644
--- a/benchmark/test/reference/conversion.simple.stderr
+++ b/benchmark/test/reference/conversion.simple.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/distributed_solver.matrix.stderr b/benchmark/test/reference/distributed_solver.matrix.stderr
index cd2bb49261c..fe739a2b773 100644
--- a/benchmark/test/reference/distributed_solver.matrix.stderr
+++ b/benchmark/test/reference/distributed_solver.matrix.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running on reference(0)
 Running with 2 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/distributed_solver.profile.stderr b/benchmark/test/reference/distributed_solver.profile.stderr
index e8ef115f8c2..ade54da3089 100644
--- a/benchmark/test/reference/distributed_solver.profile.stderr
+++ b/benchmark/test/reference/distributed_solver.profile.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/distributed_solver.simple.stderr b/benchmark/test/reference/distributed_solver.simple.stderr
index bdf57c2d0e1..02c580674b3 100644
--- a/benchmark/test/reference/distributed_solver.simple.stderr
+++ b/benchmark/test/reference/distributed_solver.simple.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running on reference(0)
 Running with 2 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/matrix_statistics.matrix.stderr b/benchmark/test/reference/matrix_statistics.matrix.stderr
index 0b31ef3a888..b25e792459a 100644
--- a/benchmark/test/reference/matrix_statistics.matrix.stderr
+++ b/benchmark/test/reference/matrix_statistics.matrix.stderr
@@ -1,4 +1,4 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running test case <filename>
 Matrix is of size (36, 36), 208
diff --git a/benchmark/test/reference/matrix_statistics.simple.stderr b/benchmark/test/reference/matrix_statistics.simple.stderr
index bfaa411873e..06e12e1159e 100644
--- a/benchmark/test/reference/matrix_statistics.simple.stderr
+++ b/benchmark/test/reference/matrix_statistics.simple.stderr
@@ -1,4 +1,4 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running test case stencil(100, 7pt)
 Matrix is of size (125, 125), 725
diff --git a/benchmark/test/reference/multi_vector_distributed.profile.stderr b/benchmark/test/reference/multi_vector_distributed.profile.stderr
index 85bd138514b..29dc6b8d286 100644
--- a/benchmark/test/reference/multi_vector_distributed.profile.stderr
+++ b/benchmark/test/reference/multi_vector_distributed.profile.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/multi_vector_distributed.simple.stderr b/benchmark/test/reference/multi_vector_distributed.simple.stderr
index 966ed597166..7c5e66b9188 100644
--- a/benchmark/test/reference/multi_vector_distributed.simple.stderr
+++ b/benchmark/test/reference/multi_vector_distributed.simple.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/preconditioner.matrix.stderr b/benchmark/test/reference/preconditioner.matrix.stderr
index 7452ab91b3a..82212a3d2c4 100644
--- a/benchmark/test/reference/preconditioner.matrix.stderr
+++ b/benchmark/test/reference/preconditioner.matrix.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/preconditioner.profile.stderr b/benchmark/test/reference/preconditioner.profile.stderr
index 328a738583c..b90c5e44912 100644
--- a/benchmark/test/reference/preconditioner.profile.stderr
+++ b/benchmark/test/reference/preconditioner.profile.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/preconditioner.reordered.stderr b/benchmark/test/reference/preconditioner.reordered.stderr
index a428671486f..a0bec924a46 100644
--- a/benchmark/test/reference/preconditioner.reordered.stderr
+++ b/benchmark/test/reference/preconditioner.reordered.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/preconditioner.simple.stderr b/benchmark/test/reference/preconditioner.simple.stderr
index a428671486f..a0bec924a46 100644
--- a/benchmark/test/reference/preconditioner.simple.stderr
+++ b/benchmark/test/reference/preconditioner.simple.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/solver.matrix.stderr b/benchmark/test/reference/solver.matrix.stderr
index cd2bb49261c..fe739a2b773 100644
--- a/benchmark/test/reference/solver.matrix.stderr
+++ b/benchmark/test/reference/solver.matrix.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running on reference(0)
 Running with 2 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/solver.profile.stderr b/benchmark/test/reference/solver.profile.stderr
index a9846dff61f..0f972f0aec8 100644
--- a/benchmark/test/reference/solver.profile.stderr
+++ b/benchmark/test/reference/solver.profile.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/solver.reordered.stderr b/benchmark/test/reference/solver.reordered.stderr
index d9c04b69cf5..b133e6bfc57 100644
--- a/benchmark/test/reference/solver.reordered.stderr
+++ b/benchmark/test/reference/solver.reordered.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running on reference(0)
 Running with 2 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/solver.simple.stderr b/benchmark/test/reference/solver.simple.stderr
index d9c04b69cf5..b133e6bfc57 100644
--- a/benchmark/test/reference/solver.simple.stderr
+++ b/benchmark/test/reference/solver.simple.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running on reference(0)
 Running with 2 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/sparse_blas.matrix.stderr b/benchmark/test/reference/sparse_blas.matrix.stderr
index 483429fd71d..cbd08e1d21e 100644
--- a/benchmark/test/reference/sparse_blas.matrix.stderr
+++ b/benchmark/test/reference/sparse_blas.matrix.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/sparse_blas.profile.stderr b/benchmark/test/reference/sparse_blas.profile.stderr
index 70a9299ccae..e8376ca2713 100644
--- a/benchmark/test/reference/sparse_blas.profile.stderr
+++ b/benchmark/test/reference/sparse_blas.profile.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/sparse_blas.reordered.stderr b/benchmark/test/reference/sparse_blas.reordered.stderr
index 497d5a72bbf..2a7bd2a6665 100644
--- a/benchmark/test/reference/sparse_blas.reordered.stderr
+++ b/benchmark/test/reference/sparse_blas.reordered.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/sparse_blas.simple.stderr b/benchmark/test/reference/sparse_blas.simple.stderr
index fe6cf23d5b7..21c2241c6a5 100644
--- a/benchmark/test/reference/sparse_blas.simple.stderr
+++ b/benchmark/test/reference/sparse_blas.simple.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/spmv.matrix.stderr b/benchmark/test/reference/spmv.matrix.stderr
index 45beba6cafb..a184b39b9fd 100644
--- a/benchmark/test/reference/spmv.matrix.stderr
+++ b/benchmark/test/reference/spmv.matrix.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/spmv.profile.stderr b/benchmark/test/reference/spmv.profile.stderr
index 3c3ec3b7cfe..dff3b58a9dd 100644
--- a/benchmark/test/reference/spmv.profile.stderr
+++ b/benchmark/test/reference/spmv.profile.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/spmv.reordered.stderr b/benchmark/test/reference/spmv.reordered.stderr
index 97fe670aff7..07044cc70f8 100644
--- a/benchmark/test/reference/spmv.reordered.stderr
+++ b/benchmark/test/reference/spmv.reordered.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/spmv.simple.stderr b/benchmark/test/reference/spmv.simple.stderr
index 97fe670aff7..07044cc70f8 100644
--- a/benchmark/test/reference/spmv.simple.stderr
+++ b/benchmark/test/reference/spmv.simple.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/spmv_distributed.profile.stderr b/benchmark/test/reference/spmv_distributed.profile.stderr
index dc3cfd377c7..4cd21d00758 100644
--- a/benchmark/test/reference/spmv_distributed.profile.stderr
+++ b/benchmark/test/reference/spmv_distributed.profile.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running on reference(0)
 Running with 0 warm iterations and 1 running iterations
 The random seed for right hand sides is 42
diff --git a/benchmark/test/reference/spmv_distributed.simple.stderr b/benchmark/test/reference/spmv_distributed.simple.stderr
index 7c7f6fccf54..7d59e4f4190 100644
--- a/benchmark/test/reference/spmv_distributed.simple.stderr
+++ b/benchmark/test/reference/spmv_distributed.simple.stderr
@@ -1,5 +1,5 @@
-This is Ginkgo 1.7.0 (develop)
-    running with core module 1.7.0 (develop)
+This is Ginkgo 1.7.0 (master)
+    running with core module 1.7.0 (master)
 Running on reference(0)
 Running with 2 warm iterations and 10 running iterations
 The random seed for right hand sides is 42

From 1175703530ba0f43b6e89215c60669e70b6b53be Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Mon, 6 Nov 2023 18:17:59 -0500
Subject: [PATCH 578/583] Add initial 1.7.0 changelog

---
 CHANGELOG.md | 100 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 100 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index af4d3c06bb3..8e601c368c6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,106 @@ git log --first-parent
 
 Please visit our wiki [Changelog](https://github.com/ginkgo-project/ginkgo/wiki/Changelog) for unreleased changes.
 
+## Version 1.7.0
+
+The Ginkgo team is proud to announce the new Ginkgo minor release 1.7.0. This release brings new features such as:
+- Complete GPU-resident sparse direct solvers feature set and interfaces,
+- Sparse batched iterative kernels support for Dense and ELL matrix types and the BiCGSTAB solver,
+- A new MC64 reordering,
+- MPI support for the SYCL backend,
+- Improved Cholesky factorization performance,
+- Improved ParILU(T)/ParIC(T) preconditioner convergence,
+and more!
+
+If you face an issue, please first check our [known issues page](https://github.com/ginkgo-project/ginkgo/wiki/Known-Issues) and the [open issues list](https://github.com/ginkgo-project/ginkgo/issues) and if you do not find a solution, feel free to [open a new issue](https://github.com/ginkgo-project/ginkgo/issues/new/choose) or ask a question using the [github discussions](https://github.com/ginkgo-project/ginkgo/discussions).
+
+Supported systems and requirements:
++ For all platforms, CMake 3.16+
++ C++14 compliant compiler
++ Linux and macOS
+  + GCC: 5.5+
+  + clang: 3.9+
+  + Intel compiler: 2019+
+  + Apple Clang: 14.0 is tested. Earlier versions might also work.
+  + NVHPC: 22.7+
+  + Cray Compiler: 14.0.1+
+  + CUDA module: CMake 3.18+, and CUDA 10.0+ or NVHPC 22.7+
+  + HIP module: ROCm 4.5+
+  + DPC++ module: Intel OneAPI 2021.3+ with oneMKL and oneDPL. Set the CXX compiler to `dpcpp` or `icpx`.
+  + MPI: version 3.1+, ideally with GPUDirect support for best performance
++ Windows
+  + MinGW: GCC 5.5+
+  + Microsoft Visual Studio: VS 2019+
+  + CUDA module: CUDA 10.0+, Microsoft Visual Studio
+  + OpenMP module: MinGW.
+
+### Version support changes
+
++ CUDA 9.2 is no longer supported [#1382](https://github.com/ginkgo-project/ginkgo/pull/1382)
++ Ginkgo now requires CMake version 3.16 (and 3.18 for CUDA) [#1368](https://github.com/ginkgo-project/ginkgo/pull/1368)
+
+### Interface changes
+
++ `const` Factory parameters can no longer be modified through `with_*` functions, as this breaks const-correctness [#1336](https://github.com/ginkgo-project/ginkgo/pull/1336) [#1439](https://github.com/ginkgo-project/ginkgo/pull/1439)
+
+### Deprecations
+
++ The `device_reset` parameter of CUDA and HIP executors no longer has an effect, and its `allocation_mode` parameters have been deprecated in favor of the `Allocator` interface [#1315](https://github.com/ginkgo-project/ginkgo/pull/1315)
++ The CMake parameter `GINKGO_BUILD_DPCPP` has been deprecated in favor of `GINKGO_BUILD_SYCL`. [#1350](https://github.com/ginkgo-project/ginkgo/pull/1350)
++ The `gko::reorder::Rcm` interface has been deprecated in favor of `gko::experimental::reorder::Rcm` based on `Permutation` [#1418](https://github.com/ginkgo-project/ginkgo/pull/1418)
+
+### Added features
+
++ Adds a batch::BatchLinOp class that forms a base class for batched linear operators such as batched matrix formats, solver and preconditioners [#1379](https://github.com/ginkgo-project/ginkgo/pull/1379)
++ Adds a batch::MultiVector class that enables operations such as dot, norm, scale on batched vectors [#1371](https://github.com/ginkgo-project/ginkgo/pull/1371)
++ Adds a batch::Dense matrix format that stores batched dense matrices and provides gemv operations for these dense matrices. [#1413](https://github.com/ginkgo-project/ginkgo/pull/1413)
++ Adds a batch::Ell matrix format that stores batched Ell matrices and provides spmv operations for these batched Ell matrices. [#1416](https://github.com/ginkgo-project/ginkgo/pull/1416) [#1437](https://github.com/ginkgo-project/ginkgo/pull/1437)
++ Add a batch::Bicgstab solver (class, core, and reference kernels) that enables iterative solution of batched linear systems [#1438](https://github.com/ginkgo-project/ginkgo/pull/1438).
++ Add device kernels (CUDA, HIP, and DPCPP) for batch::Bicgstab solver. [#1443](https://github.com/ginkgo-project/ginkgo/pull/1443).
++ New MC64 reordering algorithm which optimizes the diagonal product or sum of a matrix by permuting the rows, and computes additional scaling factors for equilibriation [#1120](https://github.com/ginkgo-project/ginkgo/pull/1120)
++ New interface for (non-symmetric) permutation and scaled permutation of Dense and Csr matrices [#1415](https://github.com/ginkgo-project/ginkgo/pull/1415)
++ LU and Cholesky Factorizations can now be separated into their factors [#1432](https://github.com/ginkgo-project/ginkgo/pull/1432)
++ New symbolic LU factorization algorithm that is optimized for matrices with an almost-symmetric sparsity pattern [#1445](https://github.com/ginkgo-project/ginkgo/pull/1445)
++ Sorting kernels for SparsityCsr on all backends [#1343](https://github.com/ginkgo-project/ginkgo/pull/1343)
++ Allow passing pre-generated local solver as factory parameter for the distributed Schwarz preconditioner [#1426](https://github.com/ginkgo-project/ginkgo/pull/1426)
++ Add DPCPP kernels for Partition [#1034](https://github.com/ginkgo-project/ginkgo/pull/1034), and CSR's `check_diagonal_entries` and `add_scaled_identity` functionality [#1436](https://github.com/ginkgo-project/ginkgo/pull/1436)
++ Adds a helper function to create a partition based on either local sizes, or local ranges [#1227](https://github.com/ginkgo-project/ginkgo/pull/1227)
++ Add function to compute arithmetic mean of dense and distributed vectors [#1275](https://github.com/ginkgo-project/ginkgo/pull/1275)
++ Adds `icpx` compiler supports [#1350](https://github.com/ginkgo-project/ginkgo/pull/1350)
++ All backends can be built simultaneously [#1333](https://github.com/ginkgo-project/ginkgo/pull/1333)
++ Emits a CMake warning in downstream projects that use different compilers than the installed Ginkgo [#1372](https://github.com/ginkgo-project/ginkgo/pull/1372)
++ Reordering algorithms in sparse_blas benchmark [#1354](https://github.com/ginkgo-project/ginkgo/pull/1354)
++ Benchmarks gained an `-allocator` parameter to specify device allocators [#1385](https://github.com/ginkgo-project/ginkgo/pull/1385)
++ Benchmarks gained an `-input_matrix` parameter that initializes the input JSON based on the filename [#1387](https://github.com/ginkgo-project/ginkgo/pull/1387)
++ Benchmark inputs can now be reordered as a preprocessing step [#1408](https://github.com/ginkgo-project/ginkgo/pull/1408)
+
+
+### Improvements
+
++ Significantly improve Cholesky factorization performance [#1366](https://github.com/ginkgo-project/ginkgo/pull/1366)
++ Improve parallel build performance [#1378](https://github.com/ginkgo-project/ginkgo/pull/1378)
++ Allow constrained parallel test execution using CTest resources [#1373](https://github.com/ginkgo-project/ginkgo/pull/1373)
++ Use arithmetic type more inside mixed precision ELL [#1414](https://github.com/ginkgo-project/ginkgo/pull/1414)
++ Most factory parameters of factory type no longer need to be constructed explicitly via `.on(exec)` [#1336](https://github.com/ginkgo-project/ginkgo/pull/1336) [#1439](https://github.com/ginkgo-project/ginkgo/pull/1439)
++ Improve ParILU(T)/ParIC(T) convergence by using more appropriate atomic operations [#1434](https://github.com/ginkgo-project/ginkgo/pull/1434)
+
+### Fixes
+
++ Fix an over-allocation for OpenMP reductions [#1369](https://github.com/ginkgo-project/ginkgo/pull/1369)
++ Fix DPCPP's common-kernel reduction for empty input sizes [#1362](https://github.com/ginkgo-project/ginkgo/pull/1362)
++ Fix several typos in the API and documentation [#1348](https://github.com/ginkgo-project/ginkgo/pull/1348)
++ Fix inconsistent `Threads` between generations [#1388](https://github.com/ginkgo-project/ginkgo/pull/1388)
++ Fix benchmark median condition [#1398](https://github.com/ginkgo-project/ginkgo/pull/1398)
++ Fix HIP 5.6.0 compilation [#1411](https://github.com/ginkgo-project/ginkgo/pull/1411)
++ Fix missing destruction of rand_generator from cuda/hip [#1417](https://github.com/ginkgo-project/ginkgo/pull/1417)
++ Fix PAPI logger destruction order [#1419](https://github.com/ginkgo-project/ginkgo/pull/1419)
++ Fix TAU logger compilation [#1422](https://github.com/ginkgo-project/ginkgo/pull/1422)
++ Fix relative criterion to not iterate if the residual is already zero [#1079](https://github.com/ginkgo-project/ginkgo/pull/1079)
++ Fix memory_order invocations with C++20 changes [#1402](https://github.com/ginkgo-project/ginkgo/pull/1402)
++ Fix `check_diagonal_entries_exist` report correctly when only missing diagonal value in the last rows. [#1440](https://github.com/ginkgo-project/ginkgo/pull/1440)
++ Fix checking OpenMPI version in cross-compilation settings [#1446](https://github.com/ginkgo-project/ginkgo/pull/1446)
++ Fix false-positive deprecation warnings in Ginkgo, especially for the old Rcm (it doesn't emit deprecation warnings anymore as a result but is still considered deprecated) [#1444](https://github.com/ginkgo-project/ginkgo/pull/1444)
+
 ## Version 1.6.0
 
 The Ginkgo team is proud to announce the new Ginkgo minor release 1.6.0. This release brings new features such as:

From a7d7097f02b5253418945f4c0cc4e63c48dea434 Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Mon, 6 Nov 2023 20:02:18 -0500
Subject: [PATCH 579/583] Changelog updates

Co-authored-by: Pratik Nayak <pratikvn@pm.me>
Signed-off-by: Terry Cojean <terry.cojean@kit.edu>
---
 CHANGELOG.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8e601c368c6..6d451d5b1ec 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,10 +15,10 @@ Please visit our wiki [Changelog](https://github.com/ginkgo-project/ginkgo/wiki/
 
 The Ginkgo team is proud to announce the new Ginkgo minor release 1.7.0. This release brings new features such as:
 - Complete GPU-resident sparse direct solvers feature set and interfaces,
-- Sparse batched iterative kernels support for Dense and ELL matrix types and the BiCGSTAB solver,
+- Improved Cholesky factorization performance,
 - A new MC64 reordering,
+- Batched iterative solver support with the BiCGSTAB solver with batched Dense and ELL matrix types,
 - MPI support for the SYCL backend,
-- Improved Cholesky factorization performance,
 - Improved ParILU(T)/ParIC(T) preconditioner convergence,
 and more!
 
@@ -36,8 +36,8 @@ Supported systems and requirements:
   + Cray Compiler: 14.0.1+
   + CUDA module: CMake 3.18+, and CUDA 10.0+ or NVHPC 22.7+
   + HIP module: ROCm 4.5+
-  + DPC++ module: Intel OneAPI 2021.3+ with oneMKL and oneDPL. Set the CXX compiler to `dpcpp` or `icpx`.
-  + MPI: version 3.1+, ideally with GPUDirect support for best performance
+  + DPC++ module: Intel OneAPI 2022.1+ with oneMKL and oneDPL. Set the CXX compiler to `dpcpp` or `icpx`.
+  + MPI: standard version 3.1+, ideally GPU Aware, for best performance
 + Windows
   + MinGW: GCC 5.5+
   + Microsoft Visual Studio: VS 2019+

From 4104e92970609b2d195d4b379a0a190d848cc129 Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Mon, 6 Nov 2023 20:03:23 -0500
Subject: [PATCH 580/583] Changelog: recap previous deprecations

Signed-off-by: Terry Cojean <terry.cojean@kit.edu>
---
 CHANGELOG.md | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6d451d5b1ec..32732cc7838 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -53,12 +53,26 @@ Supported systems and requirements:
 
 + `const` Factory parameters can no longer be modified through `with_*` functions, as this breaks const-correctness [#1336](https://github.com/ginkgo-project/ginkgo/pull/1336) [#1439](https://github.com/ginkgo-project/ginkgo/pull/1439)
 
-### Deprecations
+### New Deprecations
 
 + The `device_reset` parameter of CUDA and HIP executors no longer has an effect, and its `allocation_mode` parameters have been deprecated in favor of the `Allocator` interface [#1315](https://github.com/ginkgo-project/ginkgo/pull/1315)
 + The CMake parameter `GINKGO_BUILD_DPCPP` has been deprecated in favor of `GINKGO_BUILD_SYCL`. [#1350](https://github.com/ginkgo-project/ginkgo/pull/1350)
 + The `gko::reorder::Rcm` interface has been deprecated in favor of `gko::experimental::reorder::Rcm` based on `Permutation` [#1418](https://github.com/ginkgo-project/ginkgo/pull/1418)
 
+### Summary of previous deprecations
++ The class `AmgxPgm` is deprecated in favor of `Pgm`.
++ `gko::lend()` is not necessary anymore.
++ Default constructors for the CSR `load_balance` and `automatical` strategies
++ The classes `RelativeResidualNorm` and `AbsoluteResidualNorm` are deprecated in favor of `ResidualNorm`.
++ The PolymorphicObject's move-semantic `copy_from` variant
++ The templated `SolverBase` class.
++ Logger constructors and create functions with the `executor` parameter.
++ The virtual, protected, Dense functions `compute_norm1_impl`, `add_scaled_impl`, etc.
++ Logger events for solvers and criterion without the additional `implicit_tau_sq` parameter.
++ The Permutation class' `permute_mask` functionality
++ The global `gko::solver::default_krylov_dim`, use instead `gko::solver::gmres_default_krylov_dim`.
++ Multiple functions with typos (`set_complex_subpsace()`, range functions such as `conj_operaton` etc).
+
 ### Added features
 
 + Adds a batch::BatchLinOp class that forms a base class for batched linear operators such as batched matrix formats, solver and preconditioners [#1379](https://github.com/ginkgo-project/ginkgo/pull/1379)

From 109052f3492be5b7d13a02081b05bb5a310ec514 Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Wed, 8 Nov 2023 09:16:00 -0500
Subject: [PATCH 581/583] Remove lend from batch documentation

---
 include/ginkgo/core/base/batch_lin_op.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/ginkgo/core/base/batch_lin_op.hpp b/include/ginkgo/core/base/batch_lin_op.hpp
index 497936374a2..03f3a6dc6f4 100644
--- a/include/ginkgo/core/base/batch_lin_op.hpp
+++ b/include/ginkgo/core/base/batch_lin_op.hpp
@@ -215,7 +215,7 @@ class BatchLinOp : public EnableAbstractPolymorphicObject<BatchLinOp> {
  * // create a batch linear operator which represents the solver
  * auto batch_cg = batch_cg_factory->generate(A);
  * // solve the system
- * batch_cg->apply(gko::lend(batch_b), gko::lend(batch_x));
+ * batch_cg->apply(batch_b, batch_x);
  * ```
  *
  * @ingroup BatchLinOp

From e03110e36c95dc18cc48c7ebb640303ad48982fb Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Thu, 9 Nov 2023 21:06:35 -0500
Subject: [PATCH 582/583] Changelog: update the verion requirement and ordering

---
 CHANGELOG.md | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 32732cc7838..e7239058991 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -34,19 +34,19 @@ Supported systems and requirements:
   + Apple Clang: 14.0 is tested. Earlier versions might also work.
   + NVHPC: 22.7+
   + Cray Compiler: 14.0.1+
-  + CUDA module: CMake 3.18+, and CUDA 10.0+ or NVHPC 22.7+
+  + CUDA module: CMake 3.18+, and CUDA 10.1+ or NVHPC 22.7+
   + HIP module: ROCm 4.5+
   + DPC++ module: Intel OneAPI 2022.1+ with oneMKL and oneDPL. Set the CXX compiler to `dpcpp` or `icpx`.
   + MPI: standard version 3.1+, ideally GPU Aware, for best performance
 + Windows
   + MinGW: GCC 5.5+
   + Microsoft Visual Studio: VS 2019+
-  + CUDA module: CUDA 10.0+, Microsoft Visual Studio
+  + CUDA module: CUDA 10.1+, Microsoft Visual Studio
   + OpenMP module: MinGW.
 
 ### Version support changes
 
-+ CUDA 9.2 is no longer supported [#1382](https://github.com/ginkgo-project/ginkgo/pull/1382)
++ CUDA 9.2 is no longer supported and 10.0 is untested [#1382](https://github.com/ginkgo-project/ginkgo/pull/1382)
 + Ginkgo now requires CMake version 3.16 (and 3.18 for CUDA) [#1368](https://github.com/ginkgo-project/ginkgo/pull/1368)
 
 ### Interface changes
@@ -55,23 +55,24 @@ Supported systems and requirements:
 
 ### New Deprecations
 
-+ The `device_reset` parameter of CUDA and HIP executors no longer has an effect, and its `allocation_mode` parameters have been deprecated in favor of the `Allocator` interface [#1315](https://github.com/ginkgo-project/ginkgo/pull/1315)
++ The `device_reset` parameter of CUDA and HIP executors no longer has an effect, and its `allocation_mode` parameters have been deprecated in favor of the `Allocator` interface. [#1315](https://github.com/ginkgo-project/ginkgo/pull/1315)
 + The CMake parameter `GINKGO_BUILD_DPCPP` has been deprecated in favor of `GINKGO_BUILD_SYCL`. [#1350](https://github.com/ginkgo-project/ginkgo/pull/1350)
-+ The `gko::reorder::Rcm` interface has been deprecated in favor of `gko::experimental::reorder::Rcm` based on `Permutation` [#1418](https://github.com/ginkgo-project/ginkgo/pull/1418)
++ The `gko::reorder::Rcm` interface has been deprecated in favor of `gko::experimental::reorder::Rcm` based on `Permutation`. [#1418](https://github.com/ginkgo-project/ginkgo/pull/1418)
++ The Permutation class' `permute_mask` functionality. [#1415](https://github.com/ginkgo-project/ginkgo/pull/1415)
++ Multiple functions with typos (`set_complex_subpsace()`, range functions such as `conj_operaton` etc). [#1348](https://github.com/ginkgo-project/ginkgo/pull/1348)
 
 ### Summary of previous deprecations
-+ The class `AmgxPgm` is deprecated in favor of `Pgm`.
 + `gko::lend()` is not necessary anymore.
-+ Default constructors for the CSR `load_balance` and `automatical` strategies
 + The classes `RelativeResidualNorm` and `AbsoluteResidualNorm` are deprecated in favor of `ResidualNorm`.
++ The class `AmgxPgm` is deprecated in favor of `Pgm`.
++ Default constructors for the CSR `load_balance` and `automatical` strategies
 + The PolymorphicObject's move-semantic `copy_from` variant
 + The templated `SolverBase` class.
++ The class `MachineTopology` is deprecated in favor of `machine_topology`.
 + Logger constructors and create functions with the `executor` parameter.
 + The virtual, protected, Dense functions `compute_norm1_impl`, `add_scaled_impl`, etc.
 + Logger events for solvers and criterion without the additional `implicit_tau_sq` parameter.
-+ The Permutation class' `permute_mask` functionality
 + The global `gko::solver::default_krylov_dim`, use instead `gko::solver::gmres_default_krylov_dim`.
-+ Multiple functions with typos (`set_complex_subpsace()`, range functions such as `conj_operaton` etc).
 
 ### Added features
 

From 6cee27c763781489a749182ab62e9ce22f97361a Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Fri, 10 Nov 2023 09:13:51 -0500
Subject: [PATCH 583/583] typo: fix OneAPI -> oneAPI

Co-authored-by: Tobias Ribizel <upsj@users.noreply.github.com>
Signed-off-by: Terry Cojean <terry.cojean@kit.edu>
---
 CHANGELOG.md | 2 +-
 README.md    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e7239058991..90834b209dc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -36,7 +36,7 @@ Supported systems and requirements:
   + Cray Compiler: 14.0.1+
   + CUDA module: CMake 3.18+, and CUDA 10.1+ or NVHPC 22.7+
   + HIP module: ROCm 4.5+
-  + DPC++ module: Intel OneAPI 2022.1+ with oneMKL and oneDPL. Set the CXX compiler to `dpcpp` or `icpx`.
+  + DPC++ module: Intel oneAPI 2022.1+ with oneMKL and oneDPL. Set the CXX compiler to `dpcpp` or `icpx`.
   + MPI: standard version 3.1+, ideally GPU Aware, for best performance
 + Windows
   + MinGW: GCC 5.5+
diff --git a/README.md b/README.md
index 7f64d42599d..d873026a34f 100644
--- a/README.md
+++ b/README.md
@@ -64,7 +64,7 @@ The Ginkgo HIP module has the following __additional__ requirements:
 
 The Ginkgo DPC++(SYCL) module has the following __additional__ requirements:
 
-* _OneAPI 2021.3+_
+* _oneAPI 2022.1+_
 * Set `dpcpp` or `icpx` as the `CMAKE_CXX_COMPILER`
 * `c++17` is used to compile Ginkgo
 * The following oneAPI packages should be available: